diff options
author | Marcel Ziswiler <marcel.ziswiler@toradex.com> | 2020-02-04 11:31:11 +0100 |
---|---|---|
committer | Marcel Ziswiler <marcel.ziswiler@toradex.com> | 2020-02-07 13:50:20 +0100 |
commit | 500f9a04cd76f504285ad99b15e390e20ab17e0c (patch) | |
tree | e1c5ff3a78e75f0847b47fbac63d279dd411e7e5 /net | |
parent | 0f549d8c4d5edd68a2a492afd48228458d3bca4a (diff) | |
parent | 6d0c334a400db31751c787c411e7187ab59a3f1d (diff) |
Merge tag 'v4.14.164' into 4.14-2.3.x-imx
This is the 4.14.164 stable release
Conflicts:
arch/arm/Kconfig.debug
arch/arm/boot/dts/imx7s.dtsi
arch/arm/mach-imx/cpuidle-imx6q.c
arch/arm/mach-imx/cpuidle-imx6sx.c
arch/arm64/kernel/cpu_errata.c
arch/arm64/kvm/hyp/tlb.c
drivers/crypto/caam/caamalg.c
drivers/crypto/mxs-dcp.c
drivers/dma/imx-sdma.c
drivers/gpio/gpio-vf610.c
drivers/gpu/drm/bridge/adv7511/adv7511_drv.c
drivers/input/keyboard/imx_keypad.c
drivers/input/keyboard/snvs_pwrkey.c
drivers/mmc/core/block.c
drivers/mmc/core/queue.h
drivers/mmc/host/sdhci-esdhc-imx.c
drivers/net/can/flexcan.c
drivers/net/can/rx-offload.c
drivers/net/ethernet/freescale/fec_main.c
drivers/net/wireless/ath/ath10k/pci.c
drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.c
drivers/pci/dwc/pci-imx6.c
drivers/spi/spi-fsl-lpspi.c
drivers/usb/dwc3/gadget.c
include/net/tcp.h
sound/soc/fsl/Kconfig
sound/soc/fsl/fsl_esai.c
Diffstat (limited to 'net')
342 files changed, 4172 insertions, 2780 deletions
diff --git a/net/8021q/vlan.h b/net/8021q/vlan.h index 0e7afdf86127..235bed825e3a 100644 --- a/net/8021q/vlan.h +++ b/net/8021q/vlan.h @@ -110,6 +110,7 @@ int vlan_check_real_dev(struct net_device *real_dev, void vlan_setup(struct net_device *dev); int register_vlan_dev(struct net_device *dev); void unregister_vlan_dev(struct net_device *dev, struct list_head *head); +void vlan_dev_uninit(struct net_device *dev); bool vlan_dev_inherit_address(struct net_device *dev, struct net_device *real_dev); diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c index 236452ebbd9e..ed3717dc2d20 100644 --- a/net/8021q/vlan_dev.c +++ b/net/8021q/vlan_dev.c @@ -366,10 +366,12 @@ static int vlan_dev_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) ifrr.ifr_ifru = ifr->ifr_ifru; switch (cmd) { + case SIOCSHWTSTAMP: + if (!net_eq(dev_net(dev), &init_net)) + break; case SIOCGMIIPHY: case SIOCGMIIREG: case SIOCSMIIREG: - case SIOCSHWTSTAMP: case SIOCGHWTSTAMP: if (netif_device_present(real_dev) && ops->ndo_do_ioctl) err = ops->ndo_do_ioctl(real_dev, &ifrr, cmd); @@ -608,7 +610,8 @@ static int vlan_dev_init(struct net_device *dev) return 0; } -static void vlan_dev_uninit(struct net_device *dev) +/* Note: this function might be called multiple times for the same device. */ +void vlan_dev_uninit(struct net_device *dev) { struct vlan_priority_tci_mapping *pm; struct vlan_dev_priv *vlan = vlan_dev_priv(dev); diff --git a/net/8021q/vlan_netlink.c b/net/8021q/vlan_netlink.c index 5e831de3103e..fdf39dd5e755 100644 --- a/net/8021q/vlan_netlink.c +++ b/net/8021q/vlan_netlink.c @@ -95,11 +95,13 @@ static int vlan_changelink(struct net_device *dev, struct nlattr *tb[], struct ifla_vlan_flags *flags; struct ifla_vlan_qos_mapping *m; struct nlattr *attr; - int rem; + int rem, err; if (data[IFLA_VLAN_FLAGS]) { flags = nla_data(data[IFLA_VLAN_FLAGS]); - vlan_dev_change_flags(dev, flags->flags, flags->mask); + err = vlan_dev_change_flags(dev, flags->flags, flags->mask); + if (err) + return err; } if (data[IFLA_VLAN_INGRESS_QOS]) { nla_for_each_nested(attr, data[IFLA_VLAN_INGRESS_QOS], rem) { @@ -110,7 +112,9 @@ static int vlan_changelink(struct net_device *dev, struct nlattr *tb[], if (data[IFLA_VLAN_EGRESS_QOS]) { nla_for_each_nested(attr, data[IFLA_VLAN_EGRESS_QOS], rem) { m = nla_data(attr); - vlan_dev_set_egress_priority(dev, m->from, m->to); + err = vlan_dev_set_egress_priority(dev, m->from, m->to); + if (err) + return err; } } return 0; @@ -157,10 +161,11 @@ static int vlan_newlink(struct net *src_net, struct net_device *dev, return -EINVAL; err = vlan_changelink(dev, tb, data, extack); - if (err < 0) - return err; - - return register_vlan_dev(dev); + if (!err) + err = register_vlan_dev(dev); + if (err) + vlan_dev_uninit(dev); + return err; } static inline size_t vlan_qos_map_size(unsigned int n) diff --git a/net/9p/client.c b/net/9p/client.c index ef0f8fe3ac08..6a6b290574a1 100644 --- a/net/9p/client.c +++ b/net/9p/client.c @@ -1082,7 +1082,7 @@ struct p9_client *p9_client_create(const char *dev_name, char *options) p9_debug(P9_DEBUG_ERROR, "Please specify a msize of at least 4k\n"); err = -EINVAL; - goto free_client; + goto close_trans; } err = p9_client_version(clnt); diff --git a/net/9p/protocol.c b/net/9p/protocol.c index 9743837aebc6..1885403c9a3e 100644 --- a/net/9p/protocol.c +++ b/net/9p/protocol.c @@ -570,9 +570,10 @@ int p9stat_read(struct p9_client *clnt, char *buf, int len, struct p9_wstat *st) if (ret) { p9_debug(P9_DEBUG_9P, "<<< p9stat_read failed: %d\n", ret); trace_9p_protocol_dump(clnt, &fake_pdu); + return ret; } - return ret; + return fake_pdu.offset; } EXPORT_SYMBOL(p9stat_read); @@ -621,13 +622,19 @@ int p9dirent_read(struct p9_client *clnt, char *buf, int len, if (ret) { p9_debug(P9_DEBUG_9P, "<<< p9dirent_read failed: %d\n", ret); trace_9p_protocol_dump(clnt, &fake_pdu); - goto out; + return ret; } - strcpy(dirent->d_name, nameptr); + ret = strscpy(dirent->d_name, nameptr, sizeof(dirent->d_name)); + if (ret < 0) { + p9_debug(P9_DEBUG_ERROR, + "On the wire dirent name too long: %s\n", + nameptr); + kfree(nameptr); + return ret; + } kfree(nameptr); -out: return fake_pdu.offset; } EXPORT_SYMBOL(p9dirent_read); diff --git a/net/9p/trans_common.c b/net/9p/trans_common.c index 38aa6345bdfa..9c0c894b56f8 100644 --- a/net/9p/trans_common.c +++ b/net/9p/trans_common.c @@ -14,6 +14,7 @@ #include <linux/mm.h> #include <linux/module.h> +#include "trans_common.h" /** * p9_release_req_pages - Release pages after the transaction. diff --git a/net/9p/trans_rdma.c b/net/9p/trans_rdma.c index f58467a49090..16a4a31f16e0 100644 --- a/net/9p/trans_rdma.c +++ b/net/9p/trans_rdma.c @@ -276,8 +276,7 @@ p9_cm_event_handler(struct rdma_cm_id *id, struct rdma_cm_event *event) case RDMA_CM_EVENT_DISCONNECTED: if (rdma) rdma->state = P9_RDMA_CLOSED; - if (c) - c->status = Disconnected; + c->status = Disconnected; break; case RDMA_CM_EVENT_TIMEWAIT_EXIT: @@ -476,7 +475,7 @@ static int rdma_request(struct p9_client *client, struct p9_req_t *req) err = post_recv(client, rpl_context); if (err) { - p9_debug(P9_DEBUG_FCALL, "POST RECV failed\n"); + p9_debug(P9_DEBUG_ERROR, "POST RECV failed: %d\n", err); goto recv_error; } /* remove posted receive buffer from request structure */ @@ -545,7 +544,7 @@ dont_need_post_recv: recv_error: kfree(rpl_context); spin_lock_irqsave(&rdma->req_lock, flags); - if (rdma->state < P9_RDMA_CLOSING) { + if (err != -EINTR && rdma->state < P9_RDMA_CLOSING) { rdma->state = P9_RDMA_CLOSING; spin_unlock_irqrestore(&rdma->req_lock, flags); rdma_disconnect(rdma->cm_id); diff --git a/net/9p/trans_virtio.c b/net/9p/trans_virtio.c index e73fd647065a..f88911cffa1a 100644 --- a/net/9p/trans_virtio.c +++ b/net/9p/trans_virtio.c @@ -764,10 +764,16 @@ static struct p9_trans_module p9_virtio_trans = { /* The standard init function */ static int __init p9_virtio_init(void) { + int rc; + INIT_LIST_HEAD(&virtio_chan_list); v9fs_register_trans(&p9_virtio_trans); - return register_virtio_driver(&p9_virtio_drv); + rc = register_virtio_driver(&p9_virtio_drv); + if (rc) + v9fs_unregister_trans(&p9_virtio_trans); + + return rc; } static void __exit p9_virtio_cleanup(void) diff --git a/net/9p/trans_xen.c b/net/9p/trans_xen.c index c10bdf63eae7..ea9f1773abc8 100644 --- a/net/9p/trans_xen.c +++ b/net/9p/trans_xen.c @@ -392,8 +392,8 @@ static int xen_9pfs_front_probe(struct xenbus_device *dev, unsigned int max_rings, max_ring_order, len = 0; versions = xenbus_read(XBT_NIL, dev->otherend, "versions", &len); - if (!len) - return -EINVAL; + if (IS_ERR(versions)) + return PTR_ERR(versions); if (strcmp(versions, "1")) { kfree(versions); return -EINVAL; @@ -530,13 +530,19 @@ static struct xenbus_driver xen_9pfs_front_driver = { static int p9_trans_xen_init(void) { + int rc; + if (!xen_domain()) return -ENODEV; pr_info("Initialising Xen transport for 9pfs\n"); v9fs_register_trans(&p9_xen_trans); - return xenbus_register_frontend(&xen_9pfs_front_driver); + rc = xenbus_register_frontend(&xen_9pfs_front_driver); + if (rc) + v9fs_unregister_trans(&p9_xen_trans); + + return rc; } module_init(p9_trans_xen_init); diff --git a/net/appletalk/aarp.c b/net/appletalk/aarp.c index 8ad3ec2610b6..b9e85a4751a6 100644 --- a/net/appletalk/aarp.c +++ b/net/appletalk/aarp.c @@ -879,15 +879,24 @@ static struct notifier_block aarp_notifier = { static unsigned char aarp_snap_id[] = { 0x00, 0x00, 0x00, 0x80, 0xF3 }; -void __init aarp_proto_init(void) +int __init aarp_proto_init(void) { + int rc; + aarp_dl = register_snap_client(aarp_snap_id, aarp_rcv); - if (!aarp_dl) + if (!aarp_dl) { printk(KERN_CRIT "Unable to register AARP with SNAP.\n"); + return -ENOMEM; + } setup_timer(&aarp_timer, aarp_expire_timeout, 0); aarp_timer.expires = jiffies + sysctl_aarp_expiry_time; add_timer(&aarp_timer); - register_netdevice_notifier(&aarp_notifier); + rc = register_netdevice_notifier(&aarp_notifier); + if (rc) { + del_timer_sync(&aarp_timer); + unregister_snap_client(aarp_dl); + } + return rc; } /* Remove the AARP entries associated with a device. */ diff --git a/net/appletalk/atalk_proc.c b/net/appletalk/atalk_proc.c index af46bc49e1e9..b5f84f428aa6 100644 --- a/net/appletalk/atalk_proc.c +++ b/net/appletalk/atalk_proc.c @@ -293,7 +293,7 @@ out_interface: goto out; } -void __exit atalk_proc_exit(void) +void atalk_proc_exit(void) { remove_proc_entry("interface", atalk_proc_dir); remove_proc_entry("route", atalk_proc_dir); diff --git a/net/appletalk/ddp.c b/net/appletalk/ddp.c index 5d035c1f1156..b4268bd2e557 100644 --- a/net/appletalk/ddp.c +++ b/net/appletalk/ddp.c @@ -1029,6 +1029,11 @@ static int atalk_create(struct net *net, struct socket *sock, int protocol, */ if (sock->type != SOCK_RAW && sock->type != SOCK_DGRAM) goto out; + + rc = -EPERM; + if (sock->type == SOCK_RAW && !kern && !capable(CAP_NET_RAW)) + goto out; + rc = -ENOMEM; sk = sk_alloc(net, PF_APPLETALK, GFP_KERNEL, &ddp_proto, kern); if (!sk) @@ -1906,31 +1911,61 @@ static unsigned char ddp_snap_id[] = { 0x08, 0x00, 0x07, 0x80, 0x9B }; EXPORT_SYMBOL(atrtr_get_dev); EXPORT_SYMBOL(atalk_find_dev_addr); -static const char atalk_err_snap[] __initconst = - KERN_CRIT "Unable to register DDP with SNAP.\n"; - /* Called by proto.c on kernel start up */ static int __init atalk_init(void) { - int rc = proto_register(&ddp_proto, 0); + int rc; - if (rc != 0) + rc = proto_register(&ddp_proto, 0); + if (rc) goto out; - (void)sock_register(&atalk_family_ops); + rc = sock_register(&atalk_family_ops); + if (rc) + goto out_proto; + ddp_dl = register_snap_client(ddp_snap_id, atalk_rcv); - if (!ddp_dl) - printk(atalk_err_snap); + if (!ddp_dl) { + pr_crit("Unable to register DDP with SNAP.\n"); + rc = -ENOMEM; + goto out_sock; + } dev_add_pack(<alk_packet_type); dev_add_pack(&ppptalk_packet_type); - register_netdevice_notifier(&ddp_notifier); - aarp_proto_init(); - atalk_proc_init(); - atalk_register_sysctl(); + rc = register_netdevice_notifier(&ddp_notifier); + if (rc) + goto out_snap; + + rc = aarp_proto_init(); + if (rc) + goto out_dev; + + rc = atalk_proc_init(); + if (rc) + goto out_aarp; + + rc = atalk_register_sysctl(); + if (rc) + goto out_proc; out: return rc; +out_proc: + atalk_proc_exit(); +out_aarp: + aarp_cleanup_module(); +out_dev: + unregister_netdevice_notifier(&ddp_notifier); +out_snap: + dev_remove_pack(&ppptalk_packet_type); + dev_remove_pack(<alk_packet_type); + unregister_snap_client(ddp_dl); +out_sock: + sock_unregister(PF_APPLETALK); +out_proto: + proto_unregister(&ddp_proto); + goto out; } module_init(atalk_init); diff --git a/net/appletalk/sysctl_net_atalk.c b/net/appletalk/sysctl_net_atalk.c index c744a853fa5f..d945b7c0176d 100644 --- a/net/appletalk/sysctl_net_atalk.c +++ b/net/appletalk/sysctl_net_atalk.c @@ -45,9 +45,12 @@ static struct ctl_table atalk_table[] = { static struct ctl_table_header *atalk_table_header; -void atalk_register_sysctl(void) +int __init atalk_register_sysctl(void) { atalk_table_header = register_net_sysctl(&init_net, "net/appletalk", atalk_table); + if (!atalk_table_header) + return -ENOMEM; + return 0; } void atalk_unregister_sysctl(void) diff --git a/net/atm/common.c b/net/atm/common.c index 9e812c782a37..0fd2d26d4c6e 100644 --- a/net/atm/common.c +++ b/net/atm/common.c @@ -667,7 +667,7 @@ unsigned int vcc_poll(struct file *file, struct socket *sock, poll_table *wait) mask |= POLLHUP; /* readable? */ - if (!skb_queue_empty(&sk->sk_receive_queue)) + if (!skb_queue_empty_lockless(&sk->sk_receive_queue)) mask |= POLLIN | POLLRDNORM; /* writable? */ diff --git a/net/atm/lec.c b/net/atm/lec.c index 9f2365694ad4..85ce89c8a35c 100644 --- a/net/atm/lec.c +++ b/net/atm/lec.c @@ -710,7 +710,10 @@ static int lec_vcc_attach(struct atm_vcc *vcc, void __user *arg) static int lec_mcast_attach(struct atm_vcc *vcc, int arg) { - if (arg < 0 || arg >= MAX_LEC_ITF || !dev_lec[arg]) + if (arg < 0 || arg >= MAX_LEC_ITF) + return -EINVAL; + arg = array_index_nospec(arg, MAX_LEC_ITF); + if (!dev_lec[arg]) return -EINVAL; vcc->proto_data = dev_lec[arg]; return lec_mcast_make(netdev_priv(dev_lec[arg]), vcc); @@ -728,6 +731,7 @@ static int lecd_attach(struct atm_vcc *vcc, int arg) i = arg; if (arg >= MAX_LEC_ITF) return -EINVAL; + i = array_index_nospec(arg, MAX_LEC_ITF); if (!dev_lec[i]) { int size; diff --git a/net/ax25/af_ax25.c b/net/ax25/af_ax25.c index d783d90c20f1..63fa6ea2341e 100644 --- a/net/ax25/af_ax25.c +++ b/net/ax25/af_ax25.c @@ -859,6 +859,8 @@ static int ax25_create(struct net *net, struct socket *sock, int protocol, break; case SOCK_RAW: + if (!capable(CAP_NET_RAW)) + return -EPERM; break; default: return -ESOCKTNOSUPPORT; diff --git a/net/ax25/ax25_ip.c b/net/ax25/ax25_ip.c index 183b1c583d56..dd526c91363d 100644 --- a/net/ax25/ax25_ip.c +++ b/net/ax25/ax25_ip.c @@ -114,6 +114,7 @@ netdev_tx_t ax25_ip_xmit(struct sk_buff *skb) dst = (ax25_address *)(bp + 1); src = (ax25_address *)(bp + 8); + ax25_route_lock_use(); route = ax25_get_route(dst, NULL); if (route) { digipeat = route->digipeat; @@ -206,9 +207,8 @@ netdev_tx_t ax25_ip_xmit(struct sk_buff *skb) ax25_queue_xmit(skb, dev); put: - if (route) - ax25_put_route(route); + ax25_route_lock_unuse(); return NETDEV_TX_OK; } diff --git a/net/ax25/ax25_route.c b/net/ax25/ax25_route.c index 0446b892618a..b8e1a5e6a9d3 100644 --- a/net/ax25/ax25_route.c +++ b/net/ax25/ax25_route.c @@ -40,7 +40,7 @@ #include <linux/export.h> static ax25_route *ax25_route_list; -static DEFINE_RWLOCK(ax25_route_lock); +DEFINE_RWLOCK(ax25_route_lock); void ax25_rt_device_down(struct net_device *dev) { @@ -349,6 +349,7 @@ const struct file_operations ax25_route_fops = { * Find AX.25 route * * Only routes with a reference count of zero can be destroyed. + * Must be called with ax25_route_lock read locked. */ ax25_route *ax25_get_route(ax25_address *addr, struct net_device *dev) { @@ -356,7 +357,6 @@ ax25_route *ax25_get_route(ax25_address *addr, struct net_device *dev) ax25_route *ax25_def_rt = NULL; ax25_route *ax25_rt; - read_lock(&ax25_route_lock); /* * Bind to the physical interface we heard them on, or the default * route if none is found; @@ -379,11 +379,6 @@ ax25_route *ax25_get_route(ax25_address *addr, struct net_device *dev) if (ax25_spe_rt != NULL) ax25_rt = ax25_spe_rt; - if (ax25_rt != NULL) - ax25_hold_route(ax25_rt); - - read_unlock(&ax25_route_lock); - return ax25_rt; } @@ -414,9 +409,12 @@ int ax25_rt_autobind(ax25_cb *ax25, ax25_address *addr) ax25_route *ax25_rt; int err = 0; - if ((ax25_rt = ax25_get_route(addr, NULL)) == NULL) + ax25_route_lock_use(); + ax25_rt = ax25_get_route(addr, NULL); + if (!ax25_rt) { + ax25_route_lock_unuse(); return -EHOSTUNREACH; - + } if ((ax25->ax25_dev = ax25_dev_ax25dev(ax25_rt->dev)) == NULL) { err = -EHOSTUNREACH; goto put; @@ -445,14 +443,15 @@ int ax25_rt_autobind(ax25_cb *ax25, ax25_address *addr) } if (ax25->sk != NULL) { + local_bh_disable(); bh_lock_sock(ax25->sk); sock_reset_flag(ax25->sk, SOCK_ZAPPED); bh_unlock_sock(ax25->sk); + local_bh_enable(); } put: - ax25_put_route(ax25_rt); - + ax25_route_lock_unuse(); return err; } diff --git a/net/batman-adv/bat_iv_ogm.c b/net/batman-adv/bat_iv_ogm.c index 5bd9b389f8c9..8b3f9441b3a0 100644 --- a/net/batman-adv/bat_iv_ogm.c +++ b/net/batman-adv/bat_iv_ogm.c @@ -450,17 +450,23 @@ static u8 batadv_hop_penalty(u8 tq, const struct batadv_priv *bat_priv) * batadv_iv_ogm_aggr_packet - checks if there is another OGM attached * @buff_pos: current position in the skb * @packet_len: total length of the skb - * @tvlv_len: tvlv length of the previously considered OGM + * @ogm_packet: potential OGM in buffer * * Return: true if there is enough space for another OGM, false otherwise. */ -static bool batadv_iv_ogm_aggr_packet(int buff_pos, int packet_len, - __be16 tvlv_len) +static bool +batadv_iv_ogm_aggr_packet(int buff_pos, int packet_len, + const struct batadv_ogm_packet *ogm_packet) { int next_buff_pos = 0; - next_buff_pos += buff_pos + BATADV_OGM_HLEN; - next_buff_pos += ntohs(tvlv_len); + /* check if there is enough space for the header */ + next_buff_pos += buff_pos + sizeof(*ogm_packet); + if (next_buff_pos > packet_len) + return false; + + /* check if there is enough space for the optional TVLV */ + next_buff_pos += ntohs(ogm_packet->tvlv_len); return (next_buff_pos <= packet_len) && (next_buff_pos <= BATADV_MAX_AGGREGATION_BYTES); @@ -488,7 +494,7 @@ static void batadv_iv_ogm_send_to_if(struct batadv_forw_packet *forw_packet, /* adjust all flags and log packets */ while (batadv_iv_ogm_aggr_packet(buff_pos, forw_packet->packet_len, - batadv_ogm_packet->tvlv_len)) { + batadv_ogm_packet)) { /* we might have aggregated direct link packets with an * ordinary base packet */ @@ -1838,7 +1844,7 @@ static int batadv_iv_ogm_receive(struct sk_buff *skb, /* unpack the aggregated packets and process them one by one */ while (batadv_iv_ogm_aggr_packet(ogm_offset, skb_headlen(skb), - ogm_packet->tvlv_len)) { + ogm_packet)) { batadv_iv_ogm_process(skb, ogm_offset, if_incoming); ogm_offset += BATADV_OGM_HLEN; diff --git a/net/batman-adv/bat_v_ogm.c b/net/batman-adv/bat_v_ogm.c index 8be61734fc43..e07f636160b6 100644 --- a/net/batman-adv/bat_v_ogm.c +++ b/net/batman-adv/bat_v_ogm.c @@ -642,17 +642,23 @@ batadv_v_ogm_process_per_outif(struct batadv_priv *bat_priv, * batadv_v_ogm_aggr_packet - checks if there is another OGM aggregated * @buff_pos: current position in the skb * @packet_len: total length of the skb - * @tvlv_len: tvlv length of the previously considered OGM + * @ogm2_packet: potential OGM2 in buffer * * Return: true if there is enough space for another OGM, false otherwise. */ -static bool batadv_v_ogm_aggr_packet(int buff_pos, int packet_len, - __be16 tvlv_len) +static bool +batadv_v_ogm_aggr_packet(int buff_pos, int packet_len, + const struct batadv_ogm2_packet *ogm2_packet) { int next_buff_pos = 0; - next_buff_pos += buff_pos + BATADV_OGM2_HLEN; - next_buff_pos += ntohs(tvlv_len); + /* check if there is enough space for the header */ + next_buff_pos += buff_pos + sizeof(*ogm2_packet); + if (next_buff_pos > packet_len) + return false; + + /* check if there is enough space for the optional TVLV */ + next_buff_pos += ntohs(ogm2_packet->tvlv_len); return (next_buff_pos <= packet_len) && (next_buff_pos <= BATADV_MAX_AGGREGATION_BYTES); @@ -829,7 +835,7 @@ int batadv_v_ogm_packet_recv(struct sk_buff *skb, ogm_packet = (struct batadv_ogm2_packet *)skb->data; while (batadv_v_ogm_aggr_packet(ogm_offset, skb_headlen(skb), - ogm_packet->tvlv_len)) { + ogm_packet)) { batadv_v_ogm_process(skb, ogm_offset, if_incoming); ogm_offset += BATADV_OGM2_HLEN; diff --git a/net/batman-adv/bridge_loop_avoidance.c b/net/batman-adv/bridge_loop_avoidance.c index c3c848f64fdd..c761c0c233e4 100644 --- a/net/batman-adv/bridge_loop_avoidance.c +++ b/net/batman-adv/bridge_loop_avoidance.c @@ -803,6 +803,8 @@ static void batadv_bla_del_claim(struct batadv_priv *bat_priv, const u8 *mac, const unsigned short vid) { struct batadv_bla_claim search_claim, *claim; + struct batadv_bla_claim *claim_removed_entry; + struct hlist_node *claim_removed_node; ether_addr_copy(search_claim.addr, mac); search_claim.vid = vid; @@ -813,10 +815,18 @@ static void batadv_bla_del_claim(struct batadv_priv *bat_priv, batadv_dbg(BATADV_DBG_BLA, bat_priv, "%s(): %pM, vid %d\n", __func__, mac, batadv_print_vid(vid)); - batadv_hash_remove(bat_priv->bla.claim_hash, batadv_compare_claim, - batadv_choose_claim, claim); - batadv_claim_put(claim); /* reference from the hash is gone */ + claim_removed_node = batadv_hash_remove(bat_priv->bla.claim_hash, + batadv_compare_claim, + batadv_choose_claim, claim); + if (!claim_removed_node) + goto free_claim; + /* reference from the hash is gone */ + claim_removed_entry = hlist_entry(claim_removed_node, + struct batadv_bla_claim, hash_entry); + batadv_claim_put(claim_removed_entry); + +free_claim: /* don't need the reference from hash_find() anymore */ batadv_claim_put(claim); } diff --git a/net/batman-adv/distributed-arp-table.c b/net/batman-adv/distributed-arp-table.c index 4f0111bc6621..8d1d0fdb157e 100644 --- a/net/batman-adv/distributed-arp-table.c +++ b/net/batman-adv/distributed-arp-table.c @@ -1240,7 +1240,6 @@ bool batadv_dat_snoop_incoming_arp_reply(struct batadv_priv *bat_priv, hw_src, &ip_src, hw_dst, &ip_dst, dat_entry->mac_addr, &dat_entry->ip); dropped = true; - goto out; } /* Update our internal cache with both the IP addresses the node got @@ -1249,6 +1248,9 @@ bool batadv_dat_snoop_incoming_arp_reply(struct batadv_priv *bat_priv, batadv_dat_entry_add(bat_priv, ip_src, hw_src, vid); batadv_dat_entry_add(bat_priv, ip_dst, hw_dst, vid); + if (dropped) + goto out; + /* If BLA is enabled, only forward ARP replies if we have claimed the * source of the ARP reply or if no one else of the same backbone has * already claimed that client. This prevents that different gateways diff --git a/net/batman-adv/hard-interface.c b/net/batman-adv/hard-interface.c index e348f76ea8c1..2e1a084b0bd2 100644 --- a/net/batman-adv/hard-interface.c +++ b/net/batman-adv/hard-interface.c @@ -19,7 +19,6 @@ #include "main.h" #include <linux/atomic.h> -#include <linux/bug.h> #include <linux/byteorder/generic.h> #include <linux/errno.h> #include <linux/fs.h> @@ -172,8 +171,10 @@ static bool batadv_is_on_batman_iface(const struct net_device *net_dev) parent_dev = __dev_get_by_index((struct net *)parent_net, dev_get_iflink(net_dev)); /* if we got a NULL parent_dev there is something broken.. */ - if (WARN(!parent_dev, "Cannot find parent device")) + if (!parent_dev) { + pr_err("Cannot find parent device\n"); return false; + } if (batadv_mutual_parents(net_dev, net, parent_dev, parent_net)) return false; diff --git a/net/batman-adv/main.c b/net/batman-adv/main.c index fb381fb26a66..5762e52f1d1f 100644 --- a/net/batman-adv/main.c +++ b/net/batman-adv/main.c @@ -153,6 +153,7 @@ int batadv_mesh_init(struct net_device *soft_iface) spin_lock_init(&bat_priv->tt.commit_lock); spin_lock_init(&bat_priv->gw.list_lock); #ifdef CONFIG_BATMAN_ADV_MCAST + spin_lock_init(&bat_priv->mcast.mla_lock); spin_lock_init(&bat_priv->mcast.want_lists_lock); #endif spin_lock_init(&bat_priv->tvlv.container_list_lock); diff --git a/net/batman-adv/multicast.c b/net/batman-adv/multicast.c index fa02fb73367c..d47865e0e697 100644 --- a/net/batman-adv/multicast.c +++ b/net/batman-adv/multicast.c @@ -269,8 +269,6 @@ static void batadv_mcast_mla_list_free(struct hlist_head *mcast_list) * translation table except the ones listed in the given mcast_list. * * If mcast_list is NULL then all are retracted. - * - * Do not call outside of the mcast worker! (or cancel mcast worker first) */ static void batadv_mcast_mla_tt_retract(struct batadv_priv *bat_priv, struct hlist_head *mcast_list) @@ -278,8 +276,6 @@ static void batadv_mcast_mla_tt_retract(struct batadv_priv *bat_priv, struct batadv_hw_addr *mcast_entry; struct hlist_node *tmp; - WARN_ON(delayed_work_pending(&bat_priv->mcast.work)); - hlist_for_each_entry_safe(mcast_entry, tmp, &bat_priv->mcast.mla_list, list) { if (mcast_list && @@ -303,8 +299,6 @@ static void batadv_mcast_mla_tt_retract(struct batadv_priv *bat_priv, * * Adds multicast listener announcements from the given mcast_list to the * translation table if they have not been added yet. - * - * Do not call outside of the mcast worker! (or cancel mcast worker first) */ static void batadv_mcast_mla_tt_add(struct batadv_priv *bat_priv, struct hlist_head *mcast_list) @@ -312,8 +306,6 @@ static void batadv_mcast_mla_tt_add(struct batadv_priv *bat_priv, struct batadv_hw_addr *mcast_entry; struct hlist_node *tmp; - WARN_ON(delayed_work_pending(&bat_priv->mcast.work)); - if (!mcast_list) return; @@ -600,7 +592,10 @@ static void batadv_mcast_mla_update(struct work_struct *work) priv_mcast = container_of(delayed_work, struct batadv_priv_mcast, work); bat_priv = container_of(priv_mcast, struct batadv_priv, mcast); + spin_lock(&bat_priv->mcast.mla_lock); __batadv_mcast_mla_update(bat_priv); + spin_unlock(&bat_priv->mcast.mla_lock); + batadv_mcast_start_timer(bat_priv); } diff --git a/net/batman-adv/netlink.c b/net/batman-adv/netlink.c index ab13b4d58733..edb35bcc046d 100644 --- a/net/batman-adv/netlink.c +++ b/net/batman-adv/netlink.c @@ -110,7 +110,7 @@ batadv_netlink_get_ifindex(const struct nlmsghdr *nlh, int attrtype) { struct nlattr *attr = nlmsg_find_attr(nlh, GENL_HDRLEN, attrtype); - return attr ? nla_get_u32(attr) : 0; + return (attr && nla_len(attr) == sizeof(u32)) ? nla_get_u32(attr) : 0; } /** diff --git a/net/batman-adv/soft-interface.c b/net/batman-adv/soft-interface.c index 8cedb5db1ab3..7c883420485b 100644 --- a/net/batman-adv/soft-interface.c +++ b/net/batman-adv/soft-interface.c @@ -213,10 +213,14 @@ static int batadv_interface_tx(struct sk_buff *skb, netif_trans_update(soft_iface); vid = batadv_get_vid(skb, 0); + + skb_reset_mac_header(skb); ethhdr = eth_hdr(skb); switch (ntohs(ethhdr->h_proto)) { case ETH_P_8021Q: + if (!pskb_may_pull(skb, sizeof(*vhdr))) + goto dropped; vhdr = vlan_eth_hdr(skb); /* drop batman-in-batman packets to prevent loops */ diff --git a/net/batman-adv/translation-table.c b/net/batman-adv/translation-table.c index 9da3455847ff..2c2670b85fa9 100644 --- a/net/batman-adv/translation-table.c +++ b/net/batman-adv/translation-table.c @@ -614,14 +614,26 @@ static void batadv_tt_global_free(struct batadv_priv *bat_priv, struct batadv_tt_global_entry *tt_global, const char *message) { + struct batadv_tt_global_entry *tt_removed_entry; + struct hlist_node *tt_removed_node; + batadv_dbg(BATADV_DBG_TT, bat_priv, "Deleting global tt entry %pM (vid: %d): %s\n", tt_global->common.addr, batadv_print_vid(tt_global->common.vid), message); - batadv_hash_remove(bat_priv->tt.global_hash, batadv_compare_tt, - batadv_choose_tt, &tt_global->common); - batadv_tt_global_entry_put(tt_global); + tt_removed_node = batadv_hash_remove(bat_priv->tt.global_hash, + batadv_compare_tt, + batadv_choose_tt, + &tt_global->common); + if (!tt_removed_node) + return; + + /* drop reference of remove hash entry */ + tt_removed_entry = hlist_entry(tt_removed_node, + struct batadv_tt_global_entry, + common.hash_entry); + batadv_tt_global_entry_put(tt_removed_entry); } /** @@ -1313,9 +1325,10 @@ u16 batadv_tt_local_remove(struct batadv_priv *bat_priv, const u8 *addr, unsigned short vid, const char *message, bool roaming) { + struct batadv_tt_local_entry *tt_removed_entry; struct batadv_tt_local_entry *tt_local_entry; u16 flags, curr_flags = BATADV_NO_FLAGS; - void *tt_entry_exists; + struct hlist_node *tt_removed_node; tt_local_entry = batadv_tt_local_hash_find(bat_priv, addr, vid); if (!tt_local_entry) @@ -1344,15 +1357,18 @@ u16 batadv_tt_local_remove(struct batadv_priv *bat_priv, const u8 *addr, */ batadv_tt_local_event(bat_priv, tt_local_entry, BATADV_TT_CLIENT_DEL); - tt_entry_exists = batadv_hash_remove(bat_priv->tt.local_hash, + tt_removed_node = batadv_hash_remove(bat_priv->tt.local_hash, batadv_compare_tt, batadv_choose_tt, &tt_local_entry->common); - if (!tt_entry_exists) + if (!tt_removed_node) goto out; - /* extra call to free the local tt entry */ - batadv_tt_local_entry_put(tt_local_entry); + /* drop reference of remove hash entry */ + tt_removed_entry = hlist_entry(tt_removed_node, + struct batadv_tt_local_entry, + common.hash_entry); + batadv_tt_local_entry_put(tt_removed_entry); out: if (tt_local_entry) @@ -3734,6 +3750,8 @@ static void batadv_tt_purge(struct work_struct *work) void batadv_tt_free(struct batadv_priv *bat_priv) { + batadv_tvlv_handler_unregister(bat_priv, BATADV_TVLV_ROAM, 1); + batadv_tvlv_container_unregister(bat_priv, BATADV_TVLV_TT, 1); batadv_tvlv_handler_unregister(bat_priv, BATADV_TVLV_TT, 1); diff --git a/net/batman-adv/types.h b/net/batman-adv/types.h index a62795868794..d5e3968619b8 100644 --- a/net/batman-adv/types.h +++ b/net/batman-adv/types.h @@ -798,6 +798,7 @@ struct batadv_mcast_querier_state { * @flags: the flags we have last sent in our mcast tvlv * @enabled: whether the multicast tvlv is currently enabled * @bridged: whether the soft interface has a bridge on top + * @mla_lock: a lock protecting mla_list and mla_flags * @num_disabled: number of nodes that have no mcast tvlv * @num_want_all_unsnoopables: number of nodes wanting unsnoopable IP traffic * @num_want_all_ipv4: counter for items in want_all_ipv4_list @@ -816,6 +817,7 @@ struct batadv_priv_mcast { u8 flags; bool enabled; bool bridged; + spinlock_t mla_lock; atomic_t num_disabled; atomic_t num_want_all_unsnoopables; atomic_t num_want_all_ipv4; diff --git a/net/bluetooth/6lowpan.c b/net/bluetooth/6lowpan.c index 4e2576fc0c59..357475cceec6 100644 --- a/net/bluetooth/6lowpan.c +++ b/net/bluetooth/6lowpan.c @@ -187,10 +187,16 @@ static inline struct lowpan_peer *peer_lookup_dst(struct lowpan_btle_dev *dev, } if (!rt) { - nexthop = &lowpan_cb(skb)->gw; - - if (ipv6_addr_any(nexthop)) - return NULL; + if (ipv6_addr_any(&lowpan_cb(skb)->gw)) { + /* There is neither route nor gateway, + * probably the destination is a direct peer. + */ + nexthop = daddr; + } else { + /* There is a known gateway + */ + nexthop = &lowpan_cb(skb)->gw; + } } else { nexthop = rt6_nexthop(rt, daddr); diff --git a/net/bluetooth/af_bluetooth.c b/net/bluetooth/af_bluetooth.c index 583951e82cee..b48d54783e5d 100644 --- a/net/bluetooth/af_bluetooth.c +++ b/net/bluetooth/af_bluetooth.c @@ -154,15 +154,25 @@ void bt_sock_unlink(struct bt_sock_list *l, struct sock *sk) } EXPORT_SYMBOL(bt_sock_unlink); -void bt_accept_enqueue(struct sock *parent, struct sock *sk) +void bt_accept_enqueue(struct sock *parent, struct sock *sk, bool bh) { BT_DBG("parent %p, sk %p", parent, sk); sock_hold(sk); - lock_sock_nested(sk, SINGLE_DEPTH_NESTING); + + if (bh) + bh_lock_sock_nested(sk); + else + lock_sock_nested(sk, SINGLE_DEPTH_NESTING); + list_add_tail(&bt_sk(sk)->accept_q, &bt_sk(parent)->accept_q); bt_sk(sk)->parent = parent; - release_sock(sk); + + if (bh) + bh_unlock_sock(sk); + else + release_sock(sk); + parent->sk_ack_backlog++; } EXPORT_SYMBOL(bt_accept_enqueue); @@ -450,7 +460,7 @@ unsigned int bt_sock_poll(struct file *file, struct socket *sock, if (sk->sk_state == BT_LISTEN) return bt_accept_poll(sk); - if (sk->sk_err || !skb_queue_empty(&sk->sk_error_queue)) + if (sk->sk_err || !skb_queue_empty_lockless(&sk->sk_error_queue)) mask |= POLLERR | (sock_flag(sk, SOCK_SELECT_ERR_QUEUE) ? POLLPRI : 0); @@ -460,7 +470,7 @@ unsigned int bt_sock_poll(struct file *file, struct socket *sock, if (sk->sk_shutdown == SHUTDOWN_MASK) mask |= POLLHUP; - if (!skb_queue_empty(&sk->sk_receive_queue)) + if (!skb_queue_empty_lockless(&sk->sk_receive_queue)) mask |= POLLIN | POLLRDNORM; if (sk->sk_state == BT_CLOSED) diff --git a/net/bluetooth/hci_conn.c b/net/bluetooth/hci_conn.c index cc061495f653..1d085eed72d0 100644 --- a/net/bluetooth/hci_conn.c +++ b/net/bluetooth/hci_conn.c @@ -1054,8 +1054,10 @@ struct hci_conn *hci_connect_le_scan(struct hci_dev *hdev, bdaddr_t *dst, if (!conn) return ERR_PTR(-ENOMEM); - if (hci_explicit_conn_params_set(hdev, dst, dst_type) < 0) + if (hci_explicit_conn_params_set(hdev, dst, dst_type) < 0) { + hci_conn_del(conn); return ERR_PTR(-EBUSY); + } conn->state = BT_CONNECT; set_bit(HCI_CONN_SCANNING, &conn->flags); @@ -1281,8 +1283,16 @@ auth: return 0; encrypt: - if (test_bit(HCI_CONN_ENCRYPT, &conn->flags)) + if (test_bit(HCI_CONN_ENCRYPT, &conn->flags)) { + /* Ensure that the encryption key size has been read, + * otherwise stall the upper layer responses. + */ + if (!conn->enc_key_size) + return 0; + + /* Nothing else needed, all requirements are met */ return 1; + } hci_conn_encrypt(conn); return 0; diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c index 6bc679cd3481..ff80a9d41ce1 100644 --- a/net/bluetooth/hci_core.c +++ b/net/bluetooth/hci_core.c @@ -802,8 +802,8 @@ static int hci_init4_req(struct hci_request *req, unsigned long opt) if (hdev->le_features[0] & HCI_LE_DATA_LEN_EXT) { struct hci_cp_le_write_def_data_len cp; - cp.tx_len = hdev->le_max_tx_len; - cp.tx_time = hdev->le_max_tx_time; + cp.tx_len = cpu_to_le16(hdev->le_max_tx_len); + cp.tx_time = cpu_to_le16(hdev->le_max_tx_time); hci_req_add(req, HCI_OP_LE_WRITE_DEF_DATA_LEN, sizeof(cp), &cp); } @@ -4215,7 +4215,14 @@ static void hci_rx_work(struct work_struct *work) hci_send_to_sock(hdev, skb); } - if (hci_dev_test_flag(hdev, HCI_USER_CHANNEL)) { + /* If the device has been opened in HCI_USER_CHANNEL, + * the userspace has exclusive access to device. + * When device is HCI_INIT, we still need to process + * the data packets to the driver in order + * to complete its setup(). + */ + if (hci_dev_test_flag(hdev, HCI_USER_CHANNEL) && + !test_bit(HCI_INIT, &hdev->flags)) { kfree_skb(skb); continue; } diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c index 01f211e31f47..363dc85bbc5c 100644 --- a/net/bluetooth/hci_event.c +++ b/net/bluetooth/hci_event.c @@ -5212,6 +5212,12 @@ static bool hci_get_cmd_complete(struct hci_dev *hdev, u16 opcode, return true; } + /* Check if request ended in Command Status - no way to retreive + * any extra parameters in this case. + */ + if (hdr->evt == HCI_EV_CMD_STATUS) + return false; + if (hdr->evt != HCI_EV_CMD_COMPLETE) { BT_DBG("Last event is not cmd complete (0x%2.2x)", hdr->evt); return false; diff --git a/net/bluetooth/hci_request.c b/net/bluetooth/hci_request.c index b73ac149de34..759329bec399 100644 --- a/net/bluetooth/hci_request.c +++ b/net/bluetooth/hci_request.c @@ -1095,6 +1095,14 @@ static u8 create_instance_adv_data(struct hci_dev *hdev, u8 instance, u8 *ptr) instance_flags = get_adv_instance_flags(hdev, instance); + /* If instance already has the flags set skip adding it once + * again. + */ + if (adv_instance && eir_get_data(adv_instance->adv_data, + adv_instance->adv_data_len, EIR_FLAGS, + NULL)) + goto skip_flags; + /* The Add Advertising command allows userspace to set both the general * and limited discoverable flags. */ @@ -1127,6 +1135,7 @@ static u8 create_instance_adv_data(struct hci_dev *hdev, u8 instance, u8 *ptr) } } +skip_flags: if (adv_instance) { memcpy(ptr, adv_instance->adv_data, adv_instance->adv_data_len); diff --git a/net/bluetooth/hci_sock.c b/net/bluetooth/hci_sock.c index 65d734c165bd..4a05235929b9 100644 --- a/net/bluetooth/hci_sock.c +++ b/net/bluetooth/hci_sock.c @@ -826,8 +826,6 @@ static int hci_sock_release(struct socket *sock) if (!sk) return 0; - hdev = hci_pi(sk)->hdev; - switch (hci_pi(sk)->channel) { case HCI_CHANNEL_MONITOR: atomic_dec(&monitor_promisc); @@ -849,6 +847,7 @@ static int hci_sock_release(struct socket *sock) bt_sock_unlink(&hci_sk_list, sk); + hdev = hci_pi(sk)->hdev; if (hdev) { if (hci_pi(sk)->channel == HCI_CHANNEL_USER) { /* When releasing a user channel exclusive access, diff --git a/net/bluetooth/hidp/sock.c b/net/bluetooth/hidp/sock.c index 008ba439bd62..cc80c76177b6 100644 --- a/net/bluetooth/hidp/sock.c +++ b/net/bluetooth/hidp/sock.c @@ -76,6 +76,7 @@ static int hidp_sock_ioctl(struct socket *sock, unsigned int cmd, unsigned long sockfd_put(csock); return err; } + ca.name[sizeof(ca.name)-1] = 0; err = hidp_connection_add(&ca, csock, isock); if (!err && copy_to_user(argp, &ca, sizeof(ca))) diff --git a/net/bluetooth/l2cap_core.c b/net/bluetooth/l2cap_core.c index 9b7907ebfa01..ebdf1b0e576a 100644 --- a/net/bluetooth/l2cap_core.c +++ b/net/bluetooth/l2cap_core.c @@ -1340,6 +1340,21 @@ static void l2cap_request_info(struct l2cap_conn *conn) sizeof(req), &req); } +static bool l2cap_check_enc_key_size(struct hci_conn *hcon) +{ + /* The minimum encryption key size needs to be enforced by the + * host stack before establishing any L2CAP connections. The + * specification in theory allows a minimum of 1, but to align + * BR/EDR and LE transports, a minimum of 7 is chosen. + * + * This check might also be called for unencrypted connections + * that have no key size requirements. Ensure that the link is + * actually encrypted before enforcing a key size. + */ + return (!test_bit(HCI_CONN_ENCRYPT, &hcon->flags) || + hcon->enc_key_size >= HCI_MIN_ENC_KEY_SIZE); +} + static void l2cap_do_start(struct l2cap_chan *chan) { struct l2cap_conn *conn = chan->conn; @@ -1357,9 +1372,14 @@ static void l2cap_do_start(struct l2cap_chan *chan) if (!(conn->info_state & L2CAP_INFO_FEAT_MASK_REQ_DONE)) return; - if (l2cap_chan_check_security(chan, true) && - __l2cap_no_conn_pending(chan)) + if (!l2cap_chan_check_security(chan, true) || + !__l2cap_no_conn_pending(chan)) + return; + + if (l2cap_check_enc_key_size(conn->hcon)) l2cap_start_connection(chan); + else + __set_chan_timer(chan, L2CAP_DISC_TIMEOUT); } static inline int l2cap_mode_supported(__u8 mode, __u32 feat_mask) @@ -1438,7 +1458,10 @@ static void l2cap_conn_start(struct l2cap_conn *conn) continue; } - l2cap_start_connection(chan); + if (l2cap_check_enc_key_size(conn->hcon)) + l2cap_start_connection(chan); + else + l2cap_chan_close(chan, ECONNREFUSED); } else if (chan->state == BT_CONNECT2) { struct l2cap_conn_rsp rsp; @@ -3336,16 +3359,22 @@ static int l2cap_parse_conf_req(struct l2cap_chan *chan, void *data, size_t data while (len >= L2CAP_CONF_OPT_SIZE) { len -= l2cap_get_conf_opt(&req, &type, &olen, &val); + if (len < 0) + break; hint = type & L2CAP_CONF_HINT; type &= L2CAP_CONF_MASK; switch (type) { case L2CAP_CONF_MTU: + if (olen != 2) + break; mtu = val; break; case L2CAP_CONF_FLUSH_TO: + if (olen != 2) + break; chan->flush_to = val; break; @@ -3353,26 +3382,30 @@ static int l2cap_parse_conf_req(struct l2cap_chan *chan, void *data, size_t data break; case L2CAP_CONF_RFC: - if (olen == sizeof(rfc)) - memcpy(&rfc, (void *) val, olen); + if (olen != sizeof(rfc)) + break; + memcpy(&rfc, (void *) val, olen); break; case L2CAP_CONF_FCS: + if (olen != 1) + break; if (val == L2CAP_FCS_NONE) set_bit(CONF_RECV_NO_FCS, &chan->conf_state); break; case L2CAP_CONF_EFS: - if (olen == sizeof(efs)) { - remote_efs = 1; - memcpy(&efs, (void *) val, olen); - } + if (olen != sizeof(efs)) + break; + remote_efs = 1; + memcpy(&efs, (void *) val, olen); break; case L2CAP_CONF_EWS: + if (olen != 2) + break; if (!(chan->conn->local_fixed_chan & L2CAP_FC_A2MP)) return -ECONNREFUSED; - set_bit(FLAG_EXT_CTRL, &chan->flags); set_bit(CONF_EWS_RECV, &chan->conf_state); chan->tx_win_max = L2CAP_DEFAULT_EXT_WINDOW; @@ -3382,7 +3415,6 @@ static int l2cap_parse_conf_req(struct l2cap_chan *chan, void *data, size_t data default: if (hint) break; - result = L2CAP_CONF_UNKNOWN; *((u8 *) ptr++) = type; break; @@ -3547,58 +3579,65 @@ static int l2cap_parse_conf_rsp(struct l2cap_chan *chan, void *rsp, int len, while (len >= L2CAP_CONF_OPT_SIZE) { len -= l2cap_get_conf_opt(&rsp, &type, &olen, &val); + if (len < 0) + break; switch (type) { case L2CAP_CONF_MTU: + if (olen != 2) + break; if (val < L2CAP_DEFAULT_MIN_MTU) { *result = L2CAP_CONF_UNACCEPT; chan->imtu = L2CAP_DEFAULT_MIN_MTU; } else chan->imtu = val; - l2cap_add_conf_opt(&ptr, L2CAP_CONF_MTU, 2, chan->imtu, endptr - ptr); + l2cap_add_conf_opt(&ptr, L2CAP_CONF_MTU, 2, chan->imtu, + endptr - ptr); break; case L2CAP_CONF_FLUSH_TO: + if (olen != 2) + break; chan->flush_to = val; - l2cap_add_conf_opt(&ptr, L2CAP_CONF_FLUSH_TO, - 2, chan->flush_to, endptr - ptr); + l2cap_add_conf_opt(&ptr, L2CAP_CONF_FLUSH_TO, 2, + chan->flush_to, endptr - ptr); break; case L2CAP_CONF_RFC: - if (olen == sizeof(rfc)) - memcpy(&rfc, (void *)val, olen); - + if (olen != sizeof(rfc)) + break; + memcpy(&rfc, (void *)val, olen); if (test_bit(CONF_STATE2_DEVICE, &chan->conf_state) && rfc.mode != chan->mode) return -ECONNREFUSED; - chan->fcs = 0; - - l2cap_add_conf_opt(&ptr, L2CAP_CONF_RFC, - sizeof(rfc), (unsigned long) &rfc, endptr - ptr); + l2cap_add_conf_opt(&ptr, L2CAP_CONF_RFC, sizeof(rfc), + (unsigned long) &rfc, endptr - ptr); break; case L2CAP_CONF_EWS: + if (olen != 2) + break; chan->ack_win = min_t(u16, val, chan->ack_win); l2cap_add_conf_opt(&ptr, L2CAP_CONF_EWS, 2, chan->tx_win, endptr - ptr); break; case L2CAP_CONF_EFS: - if (olen == sizeof(efs)) { - memcpy(&efs, (void *)val, olen); - - if (chan->local_stype != L2CAP_SERV_NOTRAFIC && - efs.stype != L2CAP_SERV_NOTRAFIC && - efs.stype != chan->local_stype) - return -ECONNREFUSED; - - l2cap_add_conf_opt(&ptr, L2CAP_CONF_EFS, sizeof(efs), - (unsigned long) &efs, endptr - ptr); - } + if (olen != sizeof(efs)) + break; + memcpy(&efs, (void *)val, olen); + if (chan->local_stype != L2CAP_SERV_NOTRAFIC && + efs.stype != L2CAP_SERV_NOTRAFIC && + efs.stype != chan->local_stype) + return -ECONNREFUSED; + l2cap_add_conf_opt(&ptr, L2CAP_CONF_EFS, sizeof(efs), + (unsigned long) &efs, endptr - ptr); break; case L2CAP_CONF_FCS: + if (olen != 1) + break; if (*result == L2CAP_CONF_PENDING) if (val == L2CAP_FCS_NONE) set_bit(CONF_RECV_NO_FCS, @@ -3727,13 +3766,18 @@ static void l2cap_conf_rfc_get(struct l2cap_chan *chan, void *rsp, int len) while (len >= L2CAP_CONF_OPT_SIZE) { len -= l2cap_get_conf_opt(&rsp, &type, &olen, &val); + if (len < 0) + break; switch (type) { case L2CAP_CONF_RFC: - if (olen == sizeof(rfc)) - memcpy(&rfc, (void *)val, olen); + if (olen != sizeof(rfc)) + break; + memcpy(&rfc, (void *)val, olen); break; case L2CAP_CONF_EWS: + if (olen != 2) + break; txwin_ext = val; break; } @@ -4340,6 +4384,12 @@ static inline int l2cap_disconnect_rsp(struct l2cap_conn *conn, l2cap_chan_lock(chan); + if (chan->state != BT_DISCONN) { + l2cap_chan_unlock(chan); + mutex_unlock(&conn->chan_lock); + return 0; + } + l2cap_chan_hold(chan); l2cap_chan_del(chan, 0); @@ -4868,10 +4918,8 @@ void __l2cap_physical_cfm(struct l2cap_chan *chan, int result) BT_DBG("chan %p, result %d, local_amp_id %d, remote_amp_id %d", chan, result, local_amp_id, remote_amp_id); - if (chan->state == BT_DISCONN || chan->state == BT_CLOSED) { - l2cap_chan_unlock(chan); + if (chan->state == BT_DISCONN || chan->state == BT_CLOSED) return; - } if (chan->state != BT_CONNECTED) { l2cap_do_create(chan, result, local_amp_id, remote_amp_id); @@ -6769,6 +6817,16 @@ static int l2cap_le_data_rcv(struct l2cap_chan *chan, struct sk_buff *skb) chan->sdu_len = sdu_len; chan->sdu_last_frag = skb; + /* Detect if remote is not able to use the selected MPS */ + if (skb->len + L2CAP_SDULEN_SIZE < chan->mps) { + u16 mps_len = skb->len + L2CAP_SDULEN_SIZE; + + /* Adjust the number of credits */ + BT_DBG("chan->mps %u -> %u", chan->mps, mps_len); + chan->mps = mps_len; + l2cap_chan_le_send_credits(chan); + } + return 0; } @@ -7434,7 +7492,7 @@ static void l2cap_security_cfm(struct hci_conn *hcon, u8 status, u8 encrypt) } if (chan->state == BT_CONNECT) { - if (!status) + if (!status && l2cap_check_enc_key_size(hcon)) l2cap_start_connection(chan); else __set_chan_timer(chan, L2CAP_DISC_TIMEOUT); @@ -7443,7 +7501,7 @@ static void l2cap_security_cfm(struct hci_conn *hcon, u8 status, u8 encrypt) struct l2cap_conn_rsp rsp; __u16 res, stat; - if (!status) { + if (!status && l2cap_check_enc_key_size(hcon)) { if (test_bit(FLAG_DEFER_SETUP, &chan->flags)) { res = L2CAP_CR_PEND; stat = L2CAP_CS_AUTHOR_PEND; diff --git a/net/bluetooth/l2cap_sock.c b/net/bluetooth/l2cap_sock.c index a4189cb1661f..49bee4fa7636 100644 --- a/net/bluetooth/l2cap_sock.c +++ b/net/bluetooth/l2cap_sock.c @@ -1256,7 +1256,7 @@ static struct l2cap_chan *l2cap_sock_new_connection_cb(struct l2cap_chan *chan) l2cap_sock_init(sk, parent); - bt_accept_enqueue(parent, sk); + bt_accept_enqueue(parent, sk, false); release_sock(parent); diff --git a/net/bluetooth/rfcomm/sock.c b/net/bluetooth/rfcomm/sock.c index 1aaccf637479..8fcd9130439d 100644 --- a/net/bluetooth/rfcomm/sock.c +++ b/net/bluetooth/rfcomm/sock.c @@ -988,7 +988,7 @@ int rfcomm_connect_ind(struct rfcomm_session *s, u8 channel, struct rfcomm_dlc * rfcomm_pi(sk)->channel = channel; sk->sk_state = BT_CONFIG; - bt_accept_enqueue(parent, sk); + bt_accept_enqueue(parent, sk, true); /* Accept connection and return socket DLC */ *d = rfcomm_pi(sk)->dlc; diff --git a/net/bluetooth/sco.c b/net/bluetooth/sco.c index 81fe3949c158..2d23b29ce00d 100644 --- a/net/bluetooth/sco.c +++ b/net/bluetooth/sco.c @@ -193,7 +193,7 @@ static void __sco_chan_add(struct sco_conn *conn, struct sock *sk, conn->sk = sk; if (parent) - bt_accept_enqueue(parent, sk); + bt_accept_enqueue(parent, sk, true); } static int sco_chan_add(struct sco_conn *conn, struct sock *sk, diff --git a/net/bluetooth/smp.c b/net/bluetooth/smp.c index dbcc439fc78b..6e9a247b15d7 100644 --- a/net/bluetooth/smp.c +++ b/net/bluetooth/smp.c @@ -2571,6 +2571,19 @@ static int smp_cmd_ident_addr_info(struct l2cap_conn *conn, goto distribute; } + /* Drop IRK if peer is using identity address during pairing but is + * providing different address as identity information. + * + * Microsoft Surface Precision Mouse is known to have this bug. + */ + if (hci_is_identity_address(&hcon->dst, hcon->dst_type) && + (bacmp(&info->bdaddr, &hcon->dst) || + info->addr_type != hcon->dst_type)) { + bt_dev_err(hcon->hdev, + "ignoring IRK with invalid identity address"); + goto distribute; + } + bacpy(&smp->id_addr, &info->bdaddr); smp->id_addr_type = info->addr_type; diff --git a/net/bridge/br_device.c b/net/bridge/br_device.c index f6b6a92f1c48..b7cc322acdc8 100644 --- a/net/bridge/br_device.c +++ b/net/bridge/br_device.c @@ -217,6 +217,12 @@ static int br_set_mac_address(struct net_device *dev, void *p) if (!is_valid_ether_addr(addr->sa_data)) return -EADDRNOTAVAIL; + /* dev_set_mac_addr() can be called by a master device on bridge's + * NETDEV_UNREGISTER, but since it's being destroyed do nothing + */ + if (dev->reg_state != NETREG_REGISTERED) + return -EBUSY; + spin_lock_bh(&br->lock); if (!ether_addr_equal(dev->dev_addr, addr->sa_data)) { /* Mac address will be changed in br_stp_change_bridge_id(). */ diff --git a/net/bridge/br_if.c b/net/bridge/br_if.c index 55a73ef388bf..d9156eabddb8 100644 --- a/net/bridge/br_if.c +++ b/net/bridge/br_if.c @@ -518,13 +518,15 @@ int br_add_if(struct net_bridge *br, struct net_device *dev) call_netdevice_notifiers(NETDEV_JOIN, dev); err = dev_set_allmulti(dev, 1); - if (err) - goto put_back; + if (err) { + kfree(p); /* kobject not yet init'd, manually free */ + goto err1; + } err = kobject_init_and_add(&p->kobj, &brport_ktype, &(dev->dev.kobj), SYSFS_BRIDGE_PORT_ATTR); if (err) - goto err1; + goto err2; err = br_sysfs_addif(p); if (err) @@ -607,12 +609,9 @@ err3: sysfs_remove_link(br->ifobj, p->dev->name); err2: kobject_put(&p->kobj); - p = NULL; /* kobject_put frees */ -err1: dev_set_allmulti(dev, -1); -put_back: +err1: dev_put(dev); - kfree(p); return err; } diff --git a/net/bridge/br_input.c b/net/bridge/br_input.c index 7637f58c1226..10fa84056cb5 100644 --- a/net/bridge/br_input.c +++ b/net/bridge/br_input.c @@ -236,13 +236,10 @@ static void __br_handle_local_finish(struct sk_buff *skb) /* note: already called with rcu_read_lock */ static int br_handle_local_finish(struct net *net, struct sock *sk, struct sk_buff *skb) { - struct net_bridge_port *p = br_port_get_rcu(skb->dev); - __br_handle_local_finish(skb); - BR_INPUT_SKB_CB(skb)->brdev = p->br->dev; - br_pass_frame_up(skb); - return 0; + /* return 1 to signal the okfn() was called so it's ok to use the skb */ + return 1; } /* @@ -318,10 +315,18 @@ rx_handler_result_t br_handle_frame(struct sk_buff **pskb) goto forward; } - /* Deliver packet to local host only */ - NF_HOOK(NFPROTO_BRIDGE, NF_BR_LOCAL_IN, dev_net(skb->dev), - NULL, skb, skb->dev, NULL, br_handle_local_finish); - return RX_HANDLER_CONSUMED; + /* The else clause should be hit when nf_hook(): + * - returns < 0 (drop/error) + * - returns = 0 (stolen/nf_queue) + * Thus return 1 from the okfn() to signal the skb is ok to pass + */ + if (NF_HOOK(NFPROTO_BRIDGE, NF_BR_LOCAL_IN, + dev_net(skb->dev), NULL, skb, skb->dev, NULL, + br_handle_local_finish) == 1) { + return RX_HANDLER_PASS; + } else { + return RX_HANDLER_CONSUMED; + } } forward: diff --git a/net/bridge/br_mdb.c b/net/bridge/br_mdb.c index 31ddff22563e..a7e98e52b122 100644 --- a/net/bridge/br_mdb.c +++ b/net/bridge/br_mdb.c @@ -373,7 +373,7 @@ static int nlmsg_populate_rtr_fill(struct sk_buff *skb, struct nlmsghdr *nlh; struct nlattr *nest; - nlh = nlmsg_put(skb, pid, seq, type, sizeof(*bpm), NLM_F_MULTI); + nlh = nlmsg_put(skb, pid, seq, type, sizeof(*bpm), 0); if (!nlh) return -EMSGSIZE; diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c index a813dfe2dc2c..b24782d53474 100644 --- a/net/bridge/br_multicast.c +++ b/net/bridge/br_multicast.c @@ -1120,6 +1120,7 @@ static int br_ip4_multicast_igmp3_report(struct net_bridge *br, int type; int err = 0; __be32 group; + u16 nsrcs; ih = igmpv3_report_hdr(skb); num = ntohs(ih->ngrec); @@ -1133,8 +1134,9 @@ static int br_ip4_multicast_igmp3_report(struct net_bridge *br, grec = (void *)(skb->data + len - sizeof(*grec)); group = grec->grec_mca; type = grec->grec_type; + nsrcs = ntohs(grec->grec_nsrcs); - len += ntohs(grec->grec_nsrcs) * 4; + len += nsrcs * 4; if (!pskb_may_pull(skb, len)) return -EINVAL; @@ -1155,7 +1157,7 @@ static int br_ip4_multicast_igmp3_report(struct net_bridge *br, src = eth_hdr(skb)->h_source; if ((type == IGMPV3_CHANGE_TO_INCLUDE || type == IGMPV3_MODE_IS_INCLUDE) && - ntohs(grec->grec_nsrcs) == 0) { + nsrcs == 0) { br_ip4_multicast_leave_group(br, port, group, vid, src); } else { err = br_ip4_multicast_add_group(br, port, group, vid, @@ -1190,23 +1192,26 @@ static int br_ip6_multicast_mld2_report(struct net_bridge *br, len = skb_transport_offset(skb) + sizeof(*icmp6h); for (i = 0; i < num; i++) { - __be16 *nsrcs, _nsrcs; - - nsrcs = skb_header_pointer(skb, - len + offsetof(struct mld2_grec, - grec_nsrcs), - sizeof(_nsrcs), &_nsrcs); - if (!nsrcs) + __be16 *_nsrcs, __nsrcs; + u16 nsrcs; + + _nsrcs = skb_header_pointer(skb, + len + offsetof(struct mld2_grec, + grec_nsrcs), + sizeof(__nsrcs), &__nsrcs); + if (!_nsrcs) return -EINVAL; + nsrcs = ntohs(*_nsrcs); + if (!pskb_may_pull(skb, len + sizeof(*grec) + - sizeof(struct in6_addr) * ntohs(*nsrcs))) + sizeof(struct in6_addr) * nsrcs)) return -EINVAL; grec = (struct mld2_grec *)(skb->data + len); len += sizeof(*grec) + - sizeof(struct in6_addr) * ntohs(*nsrcs); + sizeof(struct in6_addr) * nsrcs; /* We treat these as MLDv1 reports for now. */ switch (grec->grec_type) { @@ -1225,7 +1230,7 @@ static int br_ip6_multicast_mld2_report(struct net_bridge *br, src = eth_hdr(skb)->h_source; if ((grec->grec_type == MLD2_CHANGE_TO_INCLUDE || grec->grec_type == MLD2_MODE_IS_INCLUDE) && - ntohs(*nsrcs) == 0) { + nsrcs == 0) { br_ip6_multicast_leave_group(br, port, &grec->grec_mca, vid, src); } else { @@ -1390,14 +1395,7 @@ static void br_multicast_query_received(struct net_bridge *br, return; br_multicast_update_query_timer(br, query, max_delay); - - /* Based on RFC4541, section 2.1.1 IGMP Forwarding Rules, - * the arrival port for IGMP Queries where the source address - * is 0.0.0.0 should not be added to router port list. - */ - if ((saddr->proto == htons(ETH_P_IP) && saddr->u.ip4) || - saddr->proto == htons(ETH_P_IPV6)) - br_multicast_mark_router(br, port); + br_multicast_mark_router(br, port); } static int br_ip4_multicast_query(struct net_bridge *br, @@ -1484,7 +1482,6 @@ static int br_ip6_multicast_query(struct net_bridge *br, struct sk_buff *skb, u16 vid) { - const struct ipv6hdr *ip6h = ipv6_hdr(skb); struct mld_msg *mld; struct net_bridge_mdb_entry *mp; struct mld2_query *mld2q; @@ -1528,7 +1525,7 @@ static int br_ip6_multicast_query(struct net_bridge *br, if (is_general_query) { saddr.proto = htons(ETH_P_IPV6); - saddr.u.ip6 = ip6h->saddr; + saddr.u.ip6 = ipv6_hdr(skb)->saddr; br_multicast_query_received(br, port, &br->ip6_other_query, &saddr, max_delay); @@ -1596,6 +1593,9 @@ br_multicast_leave_group(struct net_bridge *br, if (!br_port_group_equal(p, port, src)) continue; + if (p->flags & MDB_PG_FLAGS_PERMANENT) + break; + rcu_assign_pointer(*pp, p->next); hlist_del_init(&p->mglist); del_timer(&p->timer); @@ -2126,7 +2126,8 @@ static void br_multicast_start_querier(struct net_bridge *br, __br_multicast_open(br, query); - list_for_each_entry(port, &br->port_list, list) { + rcu_read_lock(); + list_for_each_entry_rcu(port, &br->port_list, list) { if (port->state == BR_STATE_DISABLED || port->state == BR_STATE_BLOCKING) continue; @@ -2138,6 +2139,7 @@ static void br_multicast_start_querier(struct net_bridge *br, br_multicast_enable(&port->ip6_own_query); #endif } + rcu_read_unlock(); } int br_multicast_toggle(struct net_bridge *br, unsigned long val) diff --git a/net/bridge/br_netfilter_hooks.c b/net/bridge/br_netfilter_hooks.c index 3f3859b8d49f..6feab2279143 100644 --- a/net/bridge/br_netfilter_hooks.c +++ b/net/bridge/br_netfilter_hooks.c @@ -512,6 +512,7 @@ static unsigned int br_nf_pre_routing(void *priv, nf_bridge->ipv4_daddr = ip_hdr(skb)->daddr; skb->protocol = htons(ETH_P_IP); + skb->transport_header = skb->network_header + ip_hdr(skb)->ihl * 4; NF_HOOK(NFPROTO_IPV4, NF_INET_PRE_ROUTING, state->net, state->sk, skb, skb->dev, NULL, @@ -642,6 +643,9 @@ static unsigned int br_nf_forward_arp(void *priv, nf_bridge_pull_encap_header(skb); } + if (unlikely(!pskb_may_pull(skb, sizeof(struct arphdr)))) + return NF_DROP; + if (arp_hdr(skb)->ar_pln != 4) { if (IS_VLAN_ARP(skb)) nf_bridge_push_encap_header(skb); @@ -881,11 +885,6 @@ static const struct nf_br_ops br_ops = { .br_dev_xmit_hook = br_nf_dev_xmit, }; -void br_netfilter_enable(void) -{ -} -EXPORT_SYMBOL_GPL(br_netfilter_enable); - /* For br_nf_post_routing, we need (prio = NF_BR_PRI_LAST), because * br_dev_queue_push_xmit is called afterwards */ static const struct nf_hook_ops br_nf_ops[] = { diff --git a/net/bridge/br_netfilter_ipv6.c b/net/bridge/br_netfilter_ipv6.c index 5811208863b7..09d5e0c7b3ba 100644 --- a/net/bridge/br_netfilter_ipv6.c +++ b/net/bridge/br_netfilter_ipv6.c @@ -235,6 +235,8 @@ unsigned int br_nf_pre_routing_ipv6(void *priv, nf_bridge->ipv6_daddr = ipv6_hdr(skb)->daddr; skb->protocol = htons(ETH_P_IPV6); + skb->transport_header = skb->network_header + sizeof(struct ipv6hdr); + NF_HOOK(NFPROTO_IPV6, NF_INET_PRE_ROUTING, state->net, state->sk, skb, skb->dev, NULL, br_nf_pre_routing_finish_ipv6); diff --git a/net/bridge/br_nf_core.c b/net/bridge/br_nf_core.c index 20cbb727df4d..c217276bd76a 100644 --- a/net/bridge/br_nf_core.c +++ b/net/bridge/br_nf_core.c @@ -26,7 +26,8 @@ #endif static void fake_update_pmtu(struct dst_entry *dst, struct sock *sk, - struct sk_buff *skb, u32 mtu) + struct sk_buff *skb, u32 mtu, + bool confirm_neigh) { } diff --git a/net/bridge/br_stp_bpdu.c b/net/bridge/br_stp_bpdu.c index 1b75d6bf12bd..37ddcea3fc96 100644 --- a/net/bridge/br_stp_bpdu.c +++ b/net/bridge/br_stp_bpdu.c @@ -147,7 +147,6 @@ void br_send_tcn_bpdu(struct net_bridge_port *p) void br_stp_rcv(const struct stp_proto *proto, struct sk_buff *skb, struct net_device *dev) { - const unsigned char *dest = eth_hdr(skb)->h_dest; struct net_bridge_port *p; struct net_bridge *br; const unsigned char *buf; @@ -176,7 +175,7 @@ void br_stp_rcv(const struct stp_proto *proto, struct sk_buff *skb, if (p->state == BR_STATE_DISABLED) goto out; - if (!ether_addr_equal(dest, br->group_addr)) + if (!ether_addr_equal(eth_hdr(skb)->h_dest, br->group_addr)) goto out; if (p->flags & BR_BPDU_GUARD) { diff --git a/net/bridge/br_vlan.c b/net/bridge/br_vlan.c index 9b8a53568b0f..e24a74884768 100644 --- a/net/bridge/br_vlan.c +++ b/net/bridge/br_vlan.c @@ -636,6 +636,11 @@ void br_vlan_flush(struct net_bridge *br) ASSERT_RTNL(); + /* delete auto-added default pvid local fdb before flushing vlans + * otherwise it will be leaked on bridge device init failure + */ + br_fdb_delete_by_port(br, NULL, 0, 1); + vg = br_vlan_group(br); __vlan_flush(vg); RCU_INIT_POINTER(br->vlgrp, NULL); diff --git a/net/bridge/netfilter/ebt_dnat.c b/net/bridge/netfilter/ebt_dnat.c index dfc86a0199da..1d8c834d9018 100644 --- a/net/bridge/netfilter/ebt_dnat.c +++ b/net/bridge/netfilter/ebt_dnat.c @@ -19,7 +19,6 @@ static unsigned int ebt_dnat_tg(struct sk_buff *skb, const struct xt_action_param *par) { const struct ebt_nat_info *info = par->targinfo; - struct net_device *dev; if (!skb_make_writable(skb, 0)) return EBT_DROP; @@ -32,10 +31,22 @@ ebt_dnat_tg(struct sk_buff *skb, const struct xt_action_param *par) else skb->pkt_type = PACKET_MULTICAST; } else { - if (xt_hooknum(par) != NF_BR_BROUTING) - dev = br_port_get_rcu(xt_in(par))->br->dev; - else + const struct net_device *dev; + + switch (xt_hooknum(par)) { + case NF_BR_BROUTING: dev = xt_in(par); + break; + case NF_BR_PRE_ROUTING: + dev = br_port_get_rcu(xt_in(par))->br->dev; + break; + default: + dev = NULL; + break; + } + + if (!dev) /* NF_BR_LOCAL_OUT */ + return info->target; if (ether_addr_equal(info->mac, dev->dev_addr)) skb->pkt_type = PACKET_HOST; diff --git a/net/bridge/netfilter/ebtables.c b/net/bridge/netfilter/ebtables.c index 22e4c15a1fc3..35a670ec9077 100644 --- a/net/bridge/netfilter/ebtables.c +++ b/net/bridge/netfilter/ebtables.c @@ -31,10 +31,6 @@ /* needed for logical [in,out]-dev filtering */ #include "../br_private.h" -#define BUGPRINT(format, args...) printk("kernel msg: ebtables bug: please "\ - "report to author: "format, ## args) -/* #define BUGPRINT(format, args...) */ - /* Each cpu has its own set of counters, so there is no need for write_lock in * the softirq * For reading or updating the counters, the user context needs to @@ -453,8 +449,6 @@ static int ebt_verify_pointers(const struct ebt_replace *repl, /* we make userspace set this right, * so there is no misunderstanding */ - BUGPRINT("EBT_ENTRY_OR_ENTRIES shouldn't be set " - "in distinguisher\n"); return -EINVAL; } if (i != NF_BR_NUMHOOKS) @@ -472,18 +466,14 @@ static int ebt_verify_pointers(const struct ebt_replace *repl, offset += e->next_offset; } } - if (offset != limit) { - BUGPRINT("entries_size too small\n"); + if (offset != limit) return -EINVAL; - } /* check if all valid hooks have a chain */ for (i = 0; i < NF_BR_NUMHOOKS; i++) { if (!newinfo->hook_entry[i] && - (valid_hooks & (1 << i))) { - BUGPRINT("Valid hook without chain\n"); + (valid_hooks & (1 << i))) return -EINVAL; - } } return 0; } @@ -510,26 +500,20 @@ ebt_check_entry_size_and_hooks(const struct ebt_entry *e, /* this checks if the previous chain has as many entries * as it said it has */ - if (*n != *cnt) { - BUGPRINT("nentries does not equal the nr of entries " - "in the chain\n"); + if (*n != *cnt) return -EINVAL; - } + if (((struct ebt_entries *)e)->policy != EBT_DROP && ((struct ebt_entries *)e)->policy != EBT_ACCEPT) { /* only RETURN from udc */ if (i != NF_BR_NUMHOOKS || - ((struct ebt_entries *)e)->policy != EBT_RETURN) { - BUGPRINT("bad policy\n"); + ((struct ebt_entries *)e)->policy != EBT_RETURN) return -EINVAL; - } } if (i == NF_BR_NUMHOOKS) /* it's a user defined chain */ (*udc_cnt)++; - if (((struct ebt_entries *)e)->counter_offset != *totalcnt) { - BUGPRINT("counter_offset != totalcnt"); + if (((struct ebt_entries *)e)->counter_offset != *totalcnt) return -EINVAL; - } *n = ((struct ebt_entries *)e)->nentries; *cnt = 0; return 0; @@ -537,15 +521,13 @@ ebt_check_entry_size_and_hooks(const struct ebt_entry *e, /* a plain old entry, heh */ if (sizeof(struct ebt_entry) > e->watchers_offset || e->watchers_offset > e->target_offset || - e->target_offset >= e->next_offset) { - BUGPRINT("entry offsets not in right order\n"); + e->target_offset >= e->next_offset) return -EINVAL; - } + /* this is not checked anywhere else */ - if (e->next_offset - e->target_offset < sizeof(struct ebt_entry_target)) { - BUGPRINT("target size too small\n"); + if (e->next_offset - e->target_offset < sizeof(struct ebt_entry_target)) return -EINVAL; - } + (*cnt)++; (*totalcnt)++; return 0; @@ -665,18 +647,15 @@ ebt_check_entry(struct ebt_entry *e, struct net *net, if (e->bitmask == 0) return 0; - if (e->bitmask & ~EBT_F_MASK) { - BUGPRINT("Unknown flag for bitmask\n"); + if (e->bitmask & ~EBT_F_MASK) return -EINVAL; - } - if (e->invflags & ~EBT_INV_MASK) { - BUGPRINT("Unknown flag for inv bitmask\n"); + + if (e->invflags & ~EBT_INV_MASK) return -EINVAL; - } - if ((e->bitmask & EBT_NOPROTO) && (e->bitmask & EBT_802_3)) { - BUGPRINT("NOPROTO & 802_3 not allowed\n"); + + if ((e->bitmask & EBT_NOPROTO) && (e->bitmask & EBT_802_3)) return -EINVAL; - } + /* what hook do we belong to? */ for (i = 0; i < NF_BR_NUMHOOKS; i++) { if (!newinfo->hook_entry[i]) @@ -735,13 +714,11 @@ ebt_check_entry(struct ebt_entry *e, struct net *net, t->u.target = target; if (t->u.target == &ebt_standard_target) { if (gap < sizeof(struct ebt_standard_target)) { - BUGPRINT("Standard target size too big\n"); ret = -EFAULT; goto cleanup_watchers; } if (((struct ebt_standard_target *)t)->verdict < -NUM_STANDARD_TARGETS) { - BUGPRINT("Invalid standard target\n"); ret = -EFAULT; goto cleanup_watchers; } @@ -801,10 +778,9 @@ static int check_chainloops(const struct ebt_entries *chain, struct ebt_cl_stack if (strcmp(t->u.name, EBT_STANDARD_TARGET)) goto letscontinue; if (e->target_offset + sizeof(struct ebt_standard_target) > - e->next_offset) { - BUGPRINT("Standard target size too big\n"); + e->next_offset) return -1; - } + verdict = ((struct ebt_standard_target *)t)->verdict; if (verdict >= 0) { /* jump to another chain */ struct ebt_entries *hlp2 = @@ -813,14 +789,12 @@ static int check_chainloops(const struct ebt_entries *chain, struct ebt_cl_stack if (hlp2 == cl_s[i].cs.chaininfo) break; /* bad destination or loop */ - if (i == udc_cnt) { - BUGPRINT("bad destination\n"); + if (i == udc_cnt) return -1; - } - if (cl_s[i].cs.n) { - BUGPRINT("loop\n"); + + if (cl_s[i].cs.n) return -1; - } + if (cl_s[i].hookmask & (1 << hooknr)) goto letscontinue; /* this can't be 0, so the loop test is correct */ @@ -853,24 +827,21 @@ static int translate_table(struct net *net, const char *name, i = 0; while (i < NF_BR_NUMHOOKS && !newinfo->hook_entry[i]) i++; - if (i == NF_BR_NUMHOOKS) { - BUGPRINT("No valid hooks specified\n"); + if (i == NF_BR_NUMHOOKS) return -EINVAL; - } - if (newinfo->hook_entry[i] != (struct ebt_entries *)newinfo->entries) { - BUGPRINT("Chains don't start at beginning\n"); + + if (newinfo->hook_entry[i] != (struct ebt_entries *)newinfo->entries) return -EINVAL; - } + /* make sure chains are ordered after each other in same order * as their corresponding hooks */ for (j = i + 1; j < NF_BR_NUMHOOKS; j++) { if (!newinfo->hook_entry[j]) continue; - if (newinfo->hook_entry[j] <= newinfo->hook_entry[i]) { - BUGPRINT("Hook order must be followed\n"); + if (newinfo->hook_entry[j] <= newinfo->hook_entry[i]) return -EINVAL; - } + i = j; } @@ -888,15 +859,11 @@ static int translate_table(struct net *net, const char *name, if (ret != 0) return ret; - if (i != j) { - BUGPRINT("nentries does not equal the nr of entries in the " - "(last) chain\n"); + if (i != j) return -EINVAL; - } - if (k != newinfo->nentries) { - BUGPRINT("Total nentries is wrong\n"); + + if (k != newinfo->nentries) return -EINVAL; - } /* get the location of the udc, put them in an array * while we're at it, allocate the chainstack @@ -929,7 +896,6 @@ static int translate_table(struct net *net, const char *name, ebt_get_udc_positions, newinfo, &i, cl_s); /* sanity check */ if (i != udc_cnt) { - BUGPRINT("i != udc_cnt\n"); vfree(cl_s); return -EFAULT; } @@ -1030,7 +996,6 @@ static int do_replace_finish(struct net *net, struct ebt_replace *repl, goto free_unlock; if (repl->num_counters && repl->num_counters != t->private->nentries) { - BUGPRINT("Wrong nr. of counters requested\n"); ret = -EINVAL; goto free_unlock; } @@ -1115,15 +1080,12 @@ static int do_replace(struct net *net, const void __user *user, if (copy_from_user(&tmp, user, sizeof(tmp)) != 0) return -EFAULT; - if (len != sizeof(tmp) + tmp.entries_size) { - BUGPRINT("Wrong len argument\n"); + if (len != sizeof(tmp) + tmp.entries_size) return -EINVAL; - } - if (tmp.entries_size == 0) { - BUGPRINT("Entries_size never zero\n"); + if (tmp.entries_size == 0) return -EINVAL; - } + /* overflow check */ if (tmp.nentries >= ((INT_MAX - sizeof(struct ebt_table_info)) / NR_CPUS - SMP_CACHE_BYTES) / sizeof(struct ebt_counter)) @@ -1150,7 +1112,6 @@ static int do_replace(struct net *net, const void __user *user, } if (copy_from_user( newinfo->entries, tmp.entries, tmp.entries_size) != 0) { - BUGPRINT("Couldn't copy entries from userspace\n"); ret = -EFAULT; goto free_entries; } @@ -1197,10 +1158,8 @@ int ebt_register_table(struct net *net, const struct ebt_table *input_table, if (input_table == NULL || (repl = input_table->table) == NULL || repl->entries == NULL || repl->entries_size == 0 || - repl->counters != NULL || input_table->private != NULL) { - BUGPRINT("Bad table data for ebt_register_table!!!\n"); + repl->counters != NULL || input_table->private != NULL) return -EINVAL; - } /* Don't add one table to multiple lists. */ table = kmemdup(input_table, sizeof(struct ebt_table), GFP_KERNEL); @@ -1238,13 +1197,10 @@ int ebt_register_table(struct net *net, const struct ebt_table *input_table, ((char *)repl->hook_entry[i] - repl->entries); } ret = translate_table(net, repl->name, newinfo); - if (ret != 0) { - BUGPRINT("Translate_table failed\n"); + if (ret != 0) goto free_chainstack; - } if (table->check && table->check(newinfo, table->valid_hooks)) { - BUGPRINT("The table doesn't like its own initial data, lol\n"); ret = -EINVAL; goto free_chainstack; } @@ -1255,7 +1211,6 @@ int ebt_register_table(struct net *net, const struct ebt_table *input_table, list_for_each_entry(t, &net->xt.tables[NFPROTO_BRIDGE], list) { if (strcmp(t->name, table->name) == 0) { ret = -EEXIST; - BUGPRINT("Table name already exists\n"); goto free_unlock; } } @@ -1327,7 +1282,6 @@ static int do_update_counters(struct net *net, const char *name, goto free_tmp; if (num_counters != t->private->nentries) { - BUGPRINT("Wrong nr of counters\n"); ret = -EINVAL; goto unlock_mutex; } @@ -1452,10 +1406,8 @@ static int copy_counters_to_user(struct ebt_table *t, if (num_counters == 0) return 0; - if (num_counters != nentries) { - BUGPRINT("Num_counters wrong\n"); + if (num_counters != nentries) return -EINVAL; - } counterstmp = vmalloc(nentries * sizeof(*counterstmp)); if (!counterstmp) @@ -1501,15 +1453,11 @@ static int copy_everything_to_user(struct ebt_table *t, void __user *user, (tmp.num_counters ? nentries * sizeof(struct ebt_counter) : 0)) return -EINVAL; - if (tmp.nentries != nentries) { - BUGPRINT("Nentries wrong\n"); + if (tmp.nentries != nentries) return -EINVAL; - } - if (tmp.entries_size != entries_size) { - BUGPRINT("Wrong size\n"); + if (tmp.entries_size != entries_size) return -EINVAL; - } ret = copy_counters_to_user(t, oldcounters, tmp.counters, tmp.num_counters, nentries); @@ -1581,7 +1529,6 @@ static int do_ebt_get_ctl(struct sock *sk, int cmd, void __user *user, int *len) } mutex_unlock(&ebt_mutex); if (copy_to_user(user, &tmp, *len) != 0) { - BUGPRINT("c2u Didn't work\n"); ret = -EFAULT; break; } @@ -1832,20 +1779,28 @@ static int compat_calc_entry(const struct ebt_entry *e, return 0; } +static int ebt_compat_init_offsets(unsigned int number) +{ + if (number > INT_MAX) + return -EINVAL; + + /* also count the base chain policies */ + number += NF_BR_NUMHOOKS; + + return xt_compat_init_offsets(NFPROTO_BRIDGE, number); +} static int compat_table_info(const struct ebt_table_info *info, struct compat_ebt_replace *newinfo) { unsigned int size = info->entries_size; const void *entries = info->entries; + int ret; newinfo->entries_size = size; - if (info->nentries) { - int ret = xt_compat_init_offsets(NFPROTO_BRIDGE, - info->nentries); - if (ret) - return ret; - } + ret = ebt_compat_init_offsets(info->nentries); + if (ret) + return ret; return EBT_ENTRY_ITERATE(entries, size, compat_calc_entry, info, entries, newinfo); @@ -1921,7 +1876,7 @@ static int ebt_buf_count(struct ebt_entries_buf_state *state, unsigned int sz) } static int ebt_buf_add(struct ebt_entries_buf_state *state, - void *data, unsigned int sz) + const void *data, unsigned int sz) { if (state->buf_kern_start == NULL) goto count_only; @@ -1955,7 +1910,7 @@ enum compat_mwt { EBT_COMPAT_TARGET, }; -static int compat_mtw_from_user(struct compat_ebt_entry_mwt *mwt, +static int compat_mtw_from_user(const struct compat_ebt_entry_mwt *mwt, enum compat_mwt compat_mwt, struct ebt_entries_buf_state *state, const unsigned char *base) @@ -2031,22 +1986,23 @@ static int compat_mtw_from_user(struct compat_ebt_entry_mwt *mwt, /* return size of all matches, watchers or target, including necessary * alignment and padding. */ -static int ebt_size_mwt(struct compat_ebt_entry_mwt *match32, +static int ebt_size_mwt(const struct compat_ebt_entry_mwt *match32, unsigned int size_left, enum compat_mwt type, struct ebt_entries_buf_state *state, const void *base) { + const char *buf = (const char *)match32; int growth = 0; - char *buf; if (size_left == 0) return 0; - buf = (char *) match32; - - while (size_left >= sizeof(*match32)) { + do { struct ebt_entry_match *match_kern; int ret; + if (size_left < sizeof(*match32)) + return -EINVAL; + match_kern = (struct ebt_entry_match *) state->buf_kern_start; if (match_kern) { char *tmp; @@ -2083,21 +2039,18 @@ static int ebt_size_mwt(struct compat_ebt_entry_mwt *match32, if (match_kern) match_kern->match_size = ret; - if (WARN_ON(type == EBT_COMPAT_TARGET && size_left)) - return -EINVAL; - match32 = (struct compat_ebt_entry_mwt *) buf; - } + } while (size_left); return growth; } /* called for all ebt_entry structures. */ -static int size_entry_mwt(struct ebt_entry *entry, const unsigned char *base, +static int size_entry_mwt(const struct ebt_entry *entry, const unsigned char *base, unsigned int *total, struct ebt_entries_buf_state *state) { - unsigned int i, j, startoff, new_offset = 0; + unsigned int i, j, startoff, next_expected_off, new_offset = 0; /* stores match/watchers/targets & offset of next struct ebt_entry: */ unsigned int offsets[4]; unsigned int *offsets_update = NULL; @@ -2184,11 +2137,13 @@ static int size_entry_mwt(struct ebt_entry *entry, const unsigned char *base, return ret; } - startoff = state->buf_user_offset - startoff; + next_expected_off = state->buf_user_offset - startoff; + if (next_expected_off != entry->next_offset) + return -EINVAL; - if (WARN_ON(*total < startoff)) + if (*total < entry->next_offset) return -EINVAL; - *total -= startoff; + *total -= entry->next_offset; return 0; } @@ -2292,9 +2247,10 @@ static int compat_do_replace(struct net *net, void __user *user, xt_compat_lock(NFPROTO_BRIDGE); - ret = xt_compat_init_offsets(NFPROTO_BRIDGE, tmp.nentries); + ret = ebt_compat_init_offsets(tmp.nentries); if (ret < 0) goto out_unlock; + ret = compat_copy_entries(entries_tmp, tmp.entries_size, &state); if (ret < 0) goto out_unlock; @@ -2316,8 +2272,10 @@ static int compat_do_replace(struct net *net, void __user *user, state.buf_kern_len = size64; ret = compat_copy_entries(entries_tmp, tmp.entries_size, &state); - if (WARN_ON(ret < 0)) + if (WARN_ON(ret < 0)) { + vfree(entries_tmp); goto out_unlock; + } vfree(entries_tmp); tmp.entries_size = size64; diff --git a/net/caif/caif_socket.c b/net/caif/caif_socket.c index 632d5a416d97..df936d2f58bd 100644 --- a/net/caif/caif_socket.c +++ b/net/caif/caif_socket.c @@ -953,7 +953,7 @@ static unsigned int caif_poll(struct file *file, mask |= POLLRDHUP; /* readable? */ - if (!skb_queue_empty(&sk->sk_receive_queue) || + if (!skb_queue_empty_lockless(&sk->sk_receive_queue) || (sk->sk_shutdown & RCV_SHUTDOWN)) mask |= POLLIN | POLLRDNORM; diff --git a/net/caif/cfctrl.c b/net/caif/cfctrl.c index f5afda1abc76..4dc82e9a855d 100644 --- a/net/caif/cfctrl.c +++ b/net/caif/cfctrl.c @@ -352,15 +352,14 @@ static int cfctrl_recv(struct cflayer *layer, struct cfpkt *pkt) u8 cmdrsp; u8 cmd; int ret = -1; - u16 tmp16; u8 len; u8 param[255]; - u8 linkid; + u8 linkid = 0; struct cfctrl *cfctrl = container_obj(layer); struct cfctrl_request_info rsp, *req; - cfpkt_extr_head(pkt, &cmdrsp, 1); + cmdrsp = cfpkt_extr_head_u8(pkt); cmd = cmdrsp & CFCTRL_CMD_MASK; if (cmd != CFCTRL_CMD_LINK_ERR && CFCTRL_RSP_BIT != (CFCTRL_RSP_BIT & cmdrsp) @@ -378,13 +377,12 @@ static int cfctrl_recv(struct cflayer *layer, struct cfpkt *pkt) u8 physlinkid; u8 prio; u8 tmp; - u32 tmp32; u8 *cp; int i; struct cfctrl_link_param linkparam; memset(&linkparam, 0, sizeof(linkparam)); - cfpkt_extr_head(pkt, &tmp, 1); + tmp = cfpkt_extr_head_u8(pkt); serv = tmp & CFCTRL_SRV_MASK; linkparam.linktype = serv; @@ -392,13 +390,13 @@ static int cfctrl_recv(struct cflayer *layer, struct cfpkt *pkt) servtype = tmp >> 4; linkparam.chtype = servtype; - cfpkt_extr_head(pkt, &tmp, 1); + tmp = cfpkt_extr_head_u8(pkt); physlinkid = tmp & 0x07; prio = tmp >> 3; linkparam.priority = prio; linkparam.phyid = physlinkid; - cfpkt_extr_head(pkt, &endpoint, 1); + endpoint = cfpkt_extr_head_u8(pkt); linkparam.endpoint = endpoint & 0x03; switch (serv) { @@ -407,45 +405,43 @@ static int cfctrl_recv(struct cflayer *layer, struct cfpkt *pkt) if (CFCTRL_ERR_BIT & cmdrsp) break; /* Link ID */ - cfpkt_extr_head(pkt, &linkid, 1); + linkid = cfpkt_extr_head_u8(pkt); break; case CFCTRL_SRV_VIDEO: - cfpkt_extr_head(pkt, &tmp, 1); + tmp = cfpkt_extr_head_u8(pkt); linkparam.u.video.connid = tmp; if (CFCTRL_ERR_BIT & cmdrsp) break; /* Link ID */ - cfpkt_extr_head(pkt, &linkid, 1); + linkid = cfpkt_extr_head_u8(pkt); break; case CFCTRL_SRV_DATAGRAM: - cfpkt_extr_head(pkt, &tmp32, 4); linkparam.u.datagram.connid = - le32_to_cpu(tmp32); + cfpkt_extr_head_u32(pkt); if (CFCTRL_ERR_BIT & cmdrsp) break; /* Link ID */ - cfpkt_extr_head(pkt, &linkid, 1); + linkid = cfpkt_extr_head_u8(pkt); break; case CFCTRL_SRV_RFM: /* Construct a frame, convert * DatagramConnectionID * to network format long and copy it out... */ - cfpkt_extr_head(pkt, &tmp32, 4); linkparam.u.rfm.connid = - le32_to_cpu(tmp32); + cfpkt_extr_head_u32(pkt); cp = (u8 *) linkparam.u.rfm.volume; - for (cfpkt_extr_head(pkt, &tmp, 1); + for (tmp = cfpkt_extr_head_u8(pkt); cfpkt_more(pkt) && tmp != '\0'; - cfpkt_extr_head(pkt, &tmp, 1)) + tmp = cfpkt_extr_head_u8(pkt)) *cp++ = tmp; *cp = '\0'; if (CFCTRL_ERR_BIT & cmdrsp) break; /* Link ID */ - cfpkt_extr_head(pkt, &linkid, 1); + linkid = cfpkt_extr_head_u8(pkt); break; case CFCTRL_SRV_UTIL: @@ -454,13 +450,11 @@ static int cfctrl_recv(struct cflayer *layer, struct cfpkt *pkt) * to network format long and copy it out... */ /* Fifosize KB */ - cfpkt_extr_head(pkt, &tmp16, 2); linkparam.u.utility.fifosize_kb = - le16_to_cpu(tmp16); + cfpkt_extr_head_u16(pkt); /* Fifosize bufs */ - cfpkt_extr_head(pkt, &tmp16, 2); linkparam.u.utility.fifosize_bufs = - le16_to_cpu(tmp16); + cfpkt_extr_head_u16(pkt); /* name */ cp = (u8 *) linkparam.u.utility.name; caif_assert(sizeof(linkparam.u.utility.name) @@ -468,24 +462,24 @@ static int cfctrl_recv(struct cflayer *layer, struct cfpkt *pkt) for (i = 0; i < UTILITY_NAME_LENGTH && cfpkt_more(pkt); i++) { - cfpkt_extr_head(pkt, &tmp, 1); + tmp = cfpkt_extr_head_u8(pkt); *cp++ = tmp; } /* Length */ - cfpkt_extr_head(pkt, &len, 1); + len = cfpkt_extr_head_u8(pkt); linkparam.u.utility.paramlen = len; /* Param Data */ cp = linkparam.u.utility.params; while (cfpkt_more(pkt) && len--) { - cfpkt_extr_head(pkt, &tmp, 1); + tmp = cfpkt_extr_head_u8(pkt); *cp++ = tmp; } if (CFCTRL_ERR_BIT & cmdrsp) break; /* Link ID */ - cfpkt_extr_head(pkt, &linkid, 1); + linkid = cfpkt_extr_head_u8(pkt); /* Length */ - cfpkt_extr_head(pkt, &len, 1); + len = cfpkt_extr_head_u8(pkt); /* Param Data */ cfpkt_extr_head(pkt, ¶m, len); break; @@ -522,7 +516,7 @@ static int cfctrl_recv(struct cflayer *layer, struct cfpkt *pkt) } break; case CFCTRL_CMD_LINK_DESTROY: - cfpkt_extr_head(pkt, &linkid, 1); + linkid = cfpkt_extr_head_u8(pkt); cfctrl->res.linkdestroy_rsp(cfctrl->serv.layer.up, linkid); break; case CFCTRL_CMD_LINK_ERR: diff --git a/net/can/af_can.c b/net/can/af_can.c index e3626e8500c2..46c85731d16f 100644 --- a/net/can/af_can.c +++ b/net/can/af_can.c @@ -105,6 +105,7 @@ EXPORT_SYMBOL(can_ioctl); static void can_sock_destruct(struct sock *sk) { skb_queue_purge(&sk->sk_receive_queue); + skb_queue_purge(&sk->sk_error_queue); } static const struct can_proto *can_get_proto(int protocol) @@ -958,6 +959,8 @@ static struct pernet_operations can_pernet_ops __read_mostly = { static __init int can_init(void) { + int err; + /* check for correct padding to be able to use the structs similarly */ BUILD_BUG_ON(offsetof(struct can_frame, can_dlc) != offsetof(struct canfd_frame, len) || @@ -971,15 +974,31 @@ static __init int can_init(void) if (!rcv_cache) return -ENOMEM; - register_pernet_subsys(&can_pernet_ops); + err = register_pernet_subsys(&can_pernet_ops); + if (err) + goto out_pernet; /* protocol register */ - sock_register(&can_family_ops); - register_netdevice_notifier(&can_netdev_notifier); + err = sock_register(&can_family_ops); + if (err) + goto out_sock; + err = register_netdevice_notifier(&can_netdev_notifier); + if (err) + goto out_notifier; + dev_add_pack(&can_packet); dev_add_pack(&canfd_packet); return 0; + +out_notifier: + sock_unregister(PF_CAN); +out_sock: + unregister_pernet_subsys(&can_pernet_ops); +out_pernet: + kmem_cache_destroy(rcv_cache); + + return err; } static __exit void can_exit(void) diff --git a/net/ceph/ceph_common.c b/net/ceph/ceph_common.c index cdb5b693a135..1001377ae428 100644 --- a/net/ceph/ceph_common.c +++ b/net/ceph/ceph_common.c @@ -720,7 +720,6 @@ int __ceph_open_session(struct ceph_client *client, unsigned long started) } EXPORT_SYMBOL(__ceph_open_session); - int ceph_open_session(struct ceph_client *client) { int ret; @@ -736,6 +735,23 @@ int ceph_open_session(struct ceph_client *client) } EXPORT_SYMBOL(ceph_open_session); +int ceph_wait_for_latest_osdmap(struct ceph_client *client, + unsigned long timeout) +{ + u64 newest_epoch; + int ret; + + ret = ceph_monc_get_version(&client->monc, "osdmap", &newest_epoch); + if (ret) + return ret; + + if (client->osdc.osdmap->epoch >= newest_epoch) + return 0; + + ceph_osdc_maybe_request_map(&client->osdc); + return ceph_monc_wait_osdmap(&client->monc, newest_epoch, timeout); +} +EXPORT_SYMBOL(ceph_wait_for_latest_osdmap); static int __init init_ceph_lib(void) { diff --git a/net/ceph/messenger.c b/net/ceph/messenger.c index f864807284d4..081a41c75341 100644 --- a/net/ceph/messenger.c +++ b/net/ceph/messenger.c @@ -2057,6 +2057,8 @@ static int process_connect(struct ceph_connection *con) dout("process_connect on %p tag %d\n", con, (int)con->in_tag); if (con->auth) { + int len = le32_to_cpu(con->in_reply.authorizer_len); + /* * Any connection that defines ->get_authorizer() * should also define ->add_authorizer_challenge() and @@ -2066,8 +2068,7 @@ static int process_connect(struct ceph_connection *con) */ if (con->in_reply.tag == CEPH_MSGR_TAG_CHALLENGE_AUTHORIZER) { ret = con->ops->add_authorizer_challenge( - con, con->auth->authorizer_reply_buf, - le32_to_cpu(con->in_reply.authorizer_len)); + con, con->auth->authorizer_reply_buf, len); if (ret < 0) return ret; @@ -2077,10 +2078,12 @@ static int process_connect(struct ceph_connection *con) return 0; } - ret = con->ops->verify_authorizer_reply(con); - if (ret < 0) { - con->error_msg = "bad authorize reply"; - return ret; + if (len) { + ret = con->ops->verify_authorizer_reply(con); + if (ret < 0) { + con->error_msg = "bad authorize reply"; + return ret; + } } } @@ -3210,9 +3213,10 @@ void ceph_con_keepalive(struct ceph_connection *con) dout("con_keepalive %p\n", con); mutex_lock(&con->mutex); clear_standby(con); + con_flag_set(con, CON_FLAG_KEEPALIVE_PENDING); mutex_unlock(&con->mutex); - if (con_flag_test_and_set(con, CON_FLAG_KEEPALIVE_PENDING) == 0 && - con_flag_test_and_set(con, CON_FLAG_WRITE_PENDING) == 0) + + if (con_flag_test_and_set(con, CON_FLAG_WRITE_PENDING) == 0) queue_con(con); } EXPORT_SYMBOL(ceph_con_keepalive); diff --git a/net/ceph/mon_client.c b/net/ceph/mon_client.c index f14498a7eaec..daca0af59942 100644 --- a/net/ceph/mon_client.c +++ b/net/ceph/mon_client.c @@ -922,6 +922,15 @@ int ceph_monc_blacklist_add(struct ceph_mon_client *monc, mutex_unlock(&monc->mutex); ret = wait_generic_request(req); + if (!ret) + /* + * Make sure we have the osdmap that includes the blacklist + * entry. This is needed to ensure that the OSDs pick up the + * new blacklist before processing any future requests from + * this client. + */ + ret = ceph_wait_for_latest_osdmap(monc->client, 0); + out: put_generic_request(req); return ret; diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c index 53ea2d48896c..92b2641ab93b 100644 --- a/net/ceph/osd_client.c +++ b/net/ceph/osd_client.c @@ -1330,7 +1330,7 @@ static enum calc_target_result calc_target(struct ceph_osd_client *osdc, struct ceph_osds up, acting; bool force_resend = false; bool unpaused = false; - bool legacy_change; + bool legacy_change = false; bool split = false; bool sort_bitwise = ceph_osdmap_flag(osdc, CEPH_OSDMAP_SORTBITWISE); bool recovery_deletes = ceph_osdmap_flag(osdc, @@ -1426,15 +1426,14 @@ static enum calc_target_result calc_target(struct ceph_osd_client *osdc, t->osd = acting.primary; } - if (unpaused || legacy_change || force_resend || - (split && con && CEPH_HAVE_FEATURE(con->peer_features, - RESEND_ON_SPLIT))) + if (unpaused || legacy_change || force_resend || split) ct_res = CALC_TARGET_NEED_RESEND; else ct_res = CALC_TARGET_NO_ACTION; out: - dout("%s t %p -> ct_res %d osd %d\n", __func__, t, ct_res, t->osd); + dout("%s t %p -> %d%d%d%d ct_res %d osd%d\n", __func__, t, unpaused, + legacy_change, force_resend, split, ct_res, t->osd); return ct_res; } diff --git a/net/core/datagram.c b/net/core/datagram.c index d8a0774f7608..85fcca395fad 100644 --- a/net/core/datagram.c +++ b/net/core/datagram.c @@ -97,7 +97,7 @@ int __skb_wait_for_more_packets(struct sock *sk, int *err, long *timeo_p, if (error) goto out_err; - if (sk->sk_receive_queue.prev != skb) + if (READ_ONCE(sk->sk_receive_queue.prev) != skb) goto out; /* Socket shut down? */ @@ -281,7 +281,7 @@ struct sk_buff *__skb_try_recv_datagram(struct sock *sk, unsigned int flags, break; sk_busy_loop(sk, flags & MSG_DONTWAIT); - } while (!skb_queue_empty(&sk->sk_receive_queue)); + } while (READ_ONCE(sk->sk_receive_queue.prev) != *last); error = -EAGAIN; @@ -844,7 +844,7 @@ unsigned int datagram_poll(struct file *file, struct socket *sock, mask = 0; /* exceptional events? */ - if (sk->sk_err || !skb_queue_empty(&sk->sk_error_queue)) + if (sk->sk_err || !skb_queue_empty_lockless(&sk->sk_error_queue)) mask |= POLLERR | (sock_flag(sk, SOCK_SELECT_ERR_QUEUE) ? POLLPRI : 0); @@ -854,7 +854,7 @@ unsigned int datagram_poll(struct file *file, struct socket *sock, mask |= POLLHUP; /* readable? */ - if (!skb_queue_empty(&sk->sk_receive_queue)) + if (!skb_queue_empty_lockless(&sk->sk_receive_queue)) mask |= POLLIN | POLLRDNORM; /* Connection-based need to check for termination and startup */ diff --git a/net/core/dev.c b/net/core/dev.c index 54ba5b5bc55c..f9f05b3df460 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -3029,7 +3029,7 @@ struct sk_buff *dev_hard_start_xmit(struct sk_buff *first, struct net_device *de } skb = next; - if (netif_xmit_stopped(txq) && skb) { + if (netif_tx_queue_stopped(txq) && skb) { rc = NETDEV_TX_BUSY; break; } @@ -5067,7 +5067,6 @@ static struct sk_buff *napi_frags_skb(struct napi_struct *napi) skb_reset_mac_header(skb); skb_gro_reset_offset(skb); - eth = skb_gro_header_fast(skb, 0); if (unlikely(skb_gro_header_hard(skb, hlen))) { eth = skb_gro_header_slow(skb, hlen, 0); if (unlikely(!eth)) { @@ -5077,6 +5076,7 @@ static struct sk_buff *napi_frags_skb(struct napi_struct *napi) return NULL; } } else { + eth = (const struct ethhdr *)skb->data; gro_pull_from_frag0(skb, hlen); NAPI_GRO_CB(skb)->frag0 += hlen; NAPI_GRO_CB(skb)->frag0_len -= hlen; @@ -5308,11 +5308,14 @@ bool napi_complete_done(struct napi_struct *n, int work_done) if (work_done) timeout = n->dev->gro_flush_timeout; + /* When the NAPI instance uses a timeout and keeps postponing + * it, we need to bound somehow the time packets are kept in + * the GRO layer + */ + napi_gro_flush(n, !!timeout); if (timeout) hrtimer_start(&n->timer, ns_to_ktime(timeout), HRTIMER_MODE_REL_PINNED); - else - napi_gro_flush(n, false); } if (unlikely(!list_empty(&n->poll_list))) { /* If n->poll_list is not empty, we need to mask irqs */ @@ -6873,7 +6876,8 @@ int __dev_set_mtu(struct net_device *dev, int new_mtu) if (ops->ndo_change_mtu) return ops->ndo_change_mtu(dev, new_mtu); - dev->mtu = new_mtu; + /* Pairs with all the lockless reads of dev->mtu in the stack */ + WRITE_ONCE(dev->mtu, new_mtu); return 0; } EXPORT_SYMBOL(__dev_set_mtu); @@ -7260,7 +7264,7 @@ static netdev_features_t netdev_sync_upper_features(struct net_device *lower, netdev_features_t feature; int feature_bit; - for_each_netdev_feature(&upper_disables, feature_bit) { + for_each_netdev_feature(upper_disables, feature_bit) { feature = __NETIF_F_BIT(feature_bit); if (!(upper->wanted_features & feature) && (features & feature)) { @@ -7280,7 +7284,7 @@ static void netdev_sync_lower_features(struct net_device *upper, netdev_features_t feature; int feature_bit; - for_each_netdev_feature(&upper_disables, feature_bit) { + for_each_netdev_feature(upper_disables, feature_bit) { feature = __NETIF_F_BIT(feature_bit); if (!(features & feature) && (lower->features & feature)) { netdev_dbg(upper, "Disabling feature %pNF on lower dev %s.\n", @@ -7695,6 +7699,8 @@ int register_netdevice(struct net_device *dev) ret = notifier_to_errno(ret); if (ret) { rollback_registered(dev); + rcu_barrier(); + dev->reg_state = NETREG_UNREGISTERED; } /* @@ -7849,7 +7855,7 @@ static void netdev_wait_allrefs(struct net_device *dev) refcnt = netdev_refcnt_read(dev); - if (time_after(jiffies, warning_time + 10 * HZ)) { + if (refcnt && time_after(jiffies, warning_time + 10 * HZ)) { pr_emerg("unregister_netdevice: waiting for %s to become free. Usage count = %d\n", dev->name, refcnt); warning_time = jiffies; @@ -8649,6 +8655,8 @@ static void __net_exit default_device_exit(struct net *net) /* Push remaining network devices to init_net */ snprintf(fb_name, IFNAMSIZ, "dev%d", dev->ifindex); + if (__dev_get_by_name(&init_net, fb_name)) + snprintf(fb_name, IFNAMSIZ, "dev%%d"); err = dev_change_net_namespace(dev, &init_net, fb_name); if (err) { pr_emerg("%s: failed to move %s to init_net: %d\n", diff --git a/net/core/ethtool.c b/net/core/ethtool.c index 3469f5053c79..7822defa5a5d 100644 --- a/net/core/ethtool.c +++ b/net/core/ethtool.c @@ -890,8 +890,13 @@ static noinline_for_stack int ethtool_get_drvinfo(struct net_device *dev, if (rc >= 0) info.n_priv_flags = rc; } - if (ops->get_regs_len) - info.regdump_len = ops->get_regs_len(dev); + if (ops->get_regs_len) { + int ret = ops->get_regs_len(dev); + + if (ret > 0) + info.regdump_len = ret; + } + if (ops->get_eeprom_len) info.eedump_len = ops->get_eeprom_len(dev); @@ -1392,6 +1397,9 @@ static int ethtool_get_regs(struct net_device *dev, char __user *useraddr) return -EFAULT; reglen = ops->get_regs_len(dev); + if (reglen <= 0) + return reglen; + if (regs.len > reglen) regs.len = reglen; @@ -1402,13 +1410,16 @@ static int ethtool_get_regs(struct net_device *dev, char __user *useraddr) return -ENOMEM; } + if (regs.len < reglen) + reglen = regs.len; + ops->get_regs(dev, ®s, regbuf); ret = -EFAULT; if (copy_to_user(useraddr, ®s, sizeof(regs))) goto out; useraddr += offsetof(struct ethtool_regs, data); - if (regbuf && copy_to_user(useraddr, regbuf, regs.len)) + if (copy_to_user(useraddr, regbuf, reglen)) goto out; ret = 0; @@ -1439,11 +1450,13 @@ static int ethtool_reset(struct net_device *dev, char __user *useraddr) static int ethtool_get_wol(struct net_device *dev, char __user *useraddr) { - struct ethtool_wolinfo wol = { .cmd = ETHTOOL_GWOL }; + struct ethtool_wolinfo wol; if (!dev->ethtool_ops->get_wol) return -EOPNOTSUPP; + memset(&wol, 0, sizeof(struct ethtool_wolinfo)); + wol.cmd = ETHTOOL_GWOL; dev->ethtool_ops->get_wol(dev, &wol); if (copy_to_user(useraddr, &wol, sizeof(wol))) @@ -1815,11 +1828,15 @@ static int ethtool_get_strings(struct net_device *dev, void __user *useraddr) WARN_ON_ONCE(!ret); gstrings.len = ret; - data = vzalloc(gstrings.len * ETH_GSTRING_LEN); - if (gstrings.len && !data) - return -ENOMEM; + if (gstrings.len) { + data = vzalloc(gstrings.len * ETH_GSTRING_LEN); + if (!data) + return -ENOMEM; - __ethtool_get_strings(dev, gstrings.string_set, data); + __ethtool_get_strings(dev, gstrings.string_set, data); + } else { + data = NULL; + } ret = -EFAULT; if (copy_to_user(useraddr, &gstrings, sizeof(gstrings))) @@ -1915,11 +1932,14 @@ static int ethtool_get_stats(struct net_device *dev, void __user *useraddr) return -EFAULT; stats.n_stats = n_stats; - data = vzalloc(n_stats * sizeof(u64)); - if (n_stats && !data) - return -ENOMEM; - - ops->get_ethtool_stats(dev, &stats, data); + if (n_stats) { + data = vzalloc(n_stats * sizeof(u64)); + if (!data) + return -ENOMEM; + ops->get_ethtool_stats(dev, &stats, data); + } else { + data = NULL; + } ret = -EFAULT; if (copy_to_user(useraddr, &stats, sizeof(stats))) @@ -1955,13 +1975,17 @@ static int ethtool_get_phy_stats(struct net_device *dev, void __user *useraddr) return -EFAULT; stats.n_stats = n_stats; - data = vzalloc(n_stats * sizeof(u64)); - if (n_stats && !data) - return -ENOMEM; + if (n_stats) { + data = vzalloc(n_stats * sizeof(u64)); + if (!data) + return -ENOMEM; - mutex_lock(&phydev->lock); - phydev->drv->get_stats(phydev, &stats, data); - mutex_unlock(&phydev->lock); + mutex_lock(&phydev->lock); + phydev->drv->get_stats(phydev, &stats, data); + mutex_unlock(&phydev->lock); + } else { + data = NULL; + } ret = -EFAULT; if (copy_to_user(useraddr, &stats, sizeof(stats))) diff --git a/net/core/filter.c b/net/core/filter.c index f7605ce484d4..a3646230fbee 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -3081,10 +3081,12 @@ BPF_CALL_5(bpf_setsockopt, struct bpf_sock_ops_kern *, bpf_sock, /* Only some socketops are supported */ switch (optname) { case SO_RCVBUF: + val = min_t(u32, val, sysctl_rmem_max); sk->sk_userlocks |= SOCK_RCVBUF_LOCK; sk->sk_rcvbuf = max_t(int, val * 2, SOCK_MIN_RCVBUF); break; case SO_SNDBUF: + val = min_t(u32, val, sysctl_wmem_max); sk->sk_userlocks |= SOCK_SNDBUF_LOCK; sk->sk_sndbuf = max_t(int, val * 2, SOCK_MIN_SNDBUF); break; @@ -3102,7 +3104,10 @@ BPF_CALL_5(bpf_setsockopt, struct bpf_sock_ops_kern *, bpf_sock, sk->sk_rcvlowat = val ? : 1; break; case SO_MARK: - sk->sk_mark = val; + if (sk->sk_mark != val) { + sk->sk_mark = val; + sk_dst_reset(sk); + } break; default: ret = -EINVAL; @@ -3129,7 +3134,7 @@ BPF_CALL_5(bpf_setsockopt, struct bpf_sock_ops_kern *, bpf_sock, /* Only some options are supported */ switch (optname) { case TCP_BPF_IW: - if (val <= 0 || tp->data_segs_out > 0) + if (val <= 0 || tp->data_segs_out > tp->syn_data) ret = -EINVAL; else tp->snd_cwnd = val; diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c index e2e716003ede..071de3013364 100644 --- a/net/core/flow_dissector.c +++ b/net/core/flow_dissector.c @@ -450,9 +450,10 @@ bool __skb_flow_dissect(const struct sk_buff *skb, nhoff = skb_network_offset(skb); hlen = skb_headlen(skb); #if IS_ENABLED(CONFIG_NET_DSA) - if (unlikely(skb->dev && netdev_uses_dsa(skb->dev))) { + if (unlikely(skb->dev && netdev_uses_dsa(skb->dev) && + proto == htons(ETH_P_XDSA))) { const struct dsa_device_ops *ops; - int offset; + int offset = 0; ops = skb->dev->dsa_ptr->tag_ops; if (ops->flow_dissect && @@ -889,45 +890,34 @@ out_bad: } EXPORT_SYMBOL(__skb_flow_dissect); -static u32 hashrnd __read_mostly; +static siphash_key_t hashrnd __read_mostly; static __always_inline void __flow_hash_secret_init(void) { net_get_random_once(&hashrnd, sizeof(hashrnd)); } -static __always_inline u32 __flow_hash_words(const u32 *words, u32 length, - u32 keyval) +static const void *flow_keys_hash_start(const struct flow_keys *flow) { - return jhash2(words, length, keyval); -} - -static inline const u32 *flow_keys_hash_start(const struct flow_keys *flow) -{ - const void *p = flow; - - BUILD_BUG_ON(FLOW_KEYS_HASH_OFFSET % sizeof(u32)); - return (const u32 *)(p + FLOW_KEYS_HASH_OFFSET); + BUILD_BUG_ON(FLOW_KEYS_HASH_OFFSET % SIPHASH_ALIGNMENT); + return &flow->FLOW_KEYS_HASH_START_FIELD; } static inline size_t flow_keys_hash_length(const struct flow_keys *flow) { - size_t diff = FLOW_KEYS_HASH_OFFSET + sizeof(flow->addrs); - BUILD_BUG_ON((sizeof(*flow) - FLOW_KEYS_HASH_OFFSET) % sizeof(u32)); - BUILD_BUG_ON(offsetof(typeof(*flow), addrs) != - sizeof(*flow) - sizeof(flow->addrs)); + size_t len = offsetof(typeof(*flow), addrs) - FLOW_KEYS_HASH_OFFSET; switch (flow->control.addr_type) { case FLOW_DISSECTOR_KEY_IPV4_ADDRS: - diff -= sizeof(flow->addrs.v4addrs); + len += sizeof(flow->addrs.v4addrs); break; case FLOW_DISSECTOR_KEY_IPV6_ADDRS: - diff -= sizeof(flow->addrs.v6addrs); + len += sizeof(flow->addrs.v6addrs); break; case FLOW_DISSECTOR_KEY_TIPC_ADDRS: - diff -= sizeof(flow->addrs.tipcaddrs); + len += sizeof(flow->addrs.tipcaddrs); break; } - return (sizeof(*flow) - diff) / sizeof(u32); + return len; } __be32 flow_get_u32_src(const struct flow_keys *flow) @@ -993,14 +983,15 @@ static inline void __flow_hash_consistentify(struct flow_keys *keys) } } -static inline u32 __flow_hash_from_keys(struct flow_keys *keys, u32 keyval) +static inline u32 __flow_hash_from_keys(struct flow_keys *keys, + const siphash_key_t *keyval) { u32 hash; __flow_hash_consistentify(keys); - hash = __flow_hash_words(flow_keys_hash_start(keys), - flow_keys_hash_length(keys), keyval); + hash = siphash(flow_keys_hash_start(keys), + flow_keys_hash_length(keys), keyval); if (!hash) hash = 1; @@ -1010,12 +1001,13 @@ static inline u32 __flow_hash_from_keys(struct flow_keys *keys, u32 keyval) u32 flow_hash_from_keys(struct flow_keys *keys) { __flow_hash_secret_init(); - return __flow_hash_from_keys(keys, hashrnd); + return __flow_hash_from_keys(keys, &hashrnd); } EXPORT_SYMBOL(flow_hash_from_keys); static inline u32 ___skb_get_hash(const struct sk_buff *skb, - struct flow_keys *keys, u32 keyval) + struct flow_keys *keys, + const siphash_key_t *keyval) { skb_flow_dissect_flow_keys(skb, keys, FLOW_DISSECTOR_F_STOP_AT_FLOW_LABEL); @@ -1063,7 +1055,7 @@ u32 __skb_get_hash_symmetric(const struct sk_buff *skb) NULL, 0, 0, 0, FLOW_DISSECTOR_F_STOP_AT_FLOW_LABEL); - return __flow_hash_from_keys(&keys, hashrnd); + return __flow_hash_from_keys(&keys, &hashrnd); } EXPORT_SYMBOL_GPL(__skb_get_hash_symmetric); @@ -1083,13 +1075,14 @@ void __skb_get_hash(struct sk_buff *skb) __flow_hash_secret_init(); - hash = ___skb_get_hash(skb, &keys, hashrnd); + hash = ___skb_get_hash(skb, &keys, &hashrnd); __skb_set_sw_hash(skb, hash, flow_keys_have_l4(&keys)); } EXPORT_SYMBOL(__skb_get_hash); -__u32 skb_get_hash_perturb(const struct sk_buff *skb, u32 perturb) +__u32 skb_get_hash_perturb(const struct sk_buff *skb, + const siphash_key_t *perturb) { struct flow_keys keys; diff --git a/net/core/gro_cells.c b/net/core/gro_cells.c index acf45ddbe924..e095fb871d91 100644 --- a/net/core/gro_cells.c +++ b/net/core/gro_cells.c @@ -13,22 +13,36 @@ int gro_cells_receive(struct gro_cells *gcells, struct sk_buff *skb) { struct net_device *dev = skb->dev; struct gro_cell *cell; + int res; - if (!gcells->cells || skb_cloned(skb) || netif_elide_gro(dev)) - return netif_rx(skb); + rcu_read_lock(); + if (unlikely(!(dev->flags & IFF_UP))) + goto drop; + + if (!gcells->cells || skb_cloned(skb) || netif_elide_gro(dev)) { + res = netif_rx(skb); + goto unlock; + } cell = this_cpu_ptr(gcells->cells); if (skb_queue_len(&cell->napi_skbs) > netdev_max_backlog) { +drop: atomic_long_inc(&dev->rx_dropped); kfree_skb(skb); - return NET_RX_DROP; + res = NET_RX_DROP; + goto unlock; } __skb_queue_tail(&cell->napi_skbs, skb); if (skb_queue_len(&cell->napi_skbs) == 1) napi_schedule(&cell->napi); - return NET_RX_SUCCESS; + + res = NET_RX_SUCCESS; + +unlock: + rcu_read_unlock(); + return res; } EXPORT_SYMBOL(gro_cells_receive); diff --git a/net/core/neighbour.c b/net/core/neighbour.c index dcb89cbc2730..16ac50b1b9a7 100644 --- a/net/core/neighbour.c +++ b/net/core/neighbour.c @@ -18,6 +18,7 @@ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #include <linux/slab.h> +#include <linux/kmemleak.h> #include <linux/types.h> #include <linux/kernel.h> #include <linux/module.h> @@ -30,6 +31,7 @@ #include <linux/times.h> #include <net/net_namespace.h> #include <net/neighbour.h> +#include <net/arp.h> #include <net/dst.h> #include <net/sock.h> #include <net/netevent.h> @@ -360,12 +362,14 @@ static struct neigh_hash_table *neigh_hash_alloc(unsigned int shift) ret = kmalloc(sizeof(*ret), GFP_ATOMIC); if (!ret) return NULL; - if (size <= PAGE_SIZE) + if (size <= PAGE_SIZE) { buckets = kzalloc(size, GFP_ATOMIC); - else + } else { buckets = (struct neighbour __rcu **) __get_free_pages(GFP_ATOMIC | __GFP_ZERO, get_order(size)); + kmemleak_alloc(buckets, size, 1, GFP_ATOMIC); + } if (!buckets) { kfree(ret); return NULL; @@ -385,10 +389,12 @@ static void neigh_hash_free_rcu(struct rcu_head *head) size_t size = (1 << nht->hash_shift) * sizeof(struct neighbour *); struct neighbour __rcu **buckets = nht->hash_buckets; - if (size <= PAGE_SIZE) + if (size <= PAGE_SIZE) { kfree(buckets); - else + } else { + kmemleak_free(buckets); free_pages((unsigned long)buckets, get_order(size)); + } kfree(nht); } @@ -1017,6 +1023,7 @@ int __neigh_event_send(struct neighbour *neigh, struct sk_buff *skb) atomic_set(&neigh->probes, NEIGH_VAR(neigh->parms, UCAST_PROBES)); + neigh_del_timer(neigh); neigh->nud_state = NUD_INCOMPLETE; neigh->updated = now; next = now + max(NEIGH_VAR(neigh->parms, RETRANS_TIME), @@ -1033,6 +1040,7 @@ int __neigh_event_send(struct neighbour *neigh, struct sk_buff *skb) } } else if (neigh->nud_state & NUD_STALE) { neigh_dbg(2, "neigh %p is delayed\n", neigh); + neigh_del_timer(neigh); neigh->nud_state = NUD_DELAY; neigh->updated = jiffies; neigh_add_timer(neigh, jiffies + @@ -1086,7 +1094,7 @@ static void neigh_update_hhs(struct neighbour *neigh) if (update) { hh = &neigh->hh; - if (hh->hh_len) { + if (READ_ONCE(hh->hh_len)) { write_seqlock_bh(&hh->hh_lock); update(hh, neigh->dev, neigh->ha); write_sequnlock_bh(&hh->hh_lock); @@ -1347,7 +1355,7 @@ int neigh_resolve_output(struct neighbour *neigh, struct sk_buff *skb) struct net_device *dev = neigh->dev; unsigned int seq; - if (dev->header_ops->cache && !neigh->hh.hh_len) + if (dev->header_ops->cache && !READ_ONCE(neigh->hh.hh_len)) neigh_hh_init(neigh); do { @@ -2528,7 +2536,13 @@ int neigh_xmit(int index, struct net_device *dev, if (!tbl) goto out; rcu_read_lock_bh(); - neigh = __neigh_lookup_noref(tbl, addr, dev); + if (index == NEIGH_ARP_TABLE) { + u32 key = *((u32 *)addr); + + neigh = __ipv4_neigh_lookup_noref(dev, key); + } else { + neigh = __neigh_lookup_noref(tbl, addr, dev); + } if (!neigh) neigh = __neigh_create(tbl, addr, dev, false); err = PTR_ERR(neigh); @@ -2736,6 +2750,7 @@ static void *neigh_get_idx_any(struct seq_file *seq, loff_t *pos) } void *neigh_seq_start(struct seq_file *seq, loff_t *pos, struct neigh_table *tbl, unsigned int neigh_seq_flags) + __acquires(tbl->lock) __acquires(rcu_bh) { struct neigh_seq_state *state = seq->private; @@ -2746,6 +2761,7 @@ void *neigh_seq_start(struct seq_file *seq, loff_t *pos, struct neigh_table *tbl rcu_read_lock_bh(); state->nht = rcu_dereference_bh(tbl->nht); + read_lock(&tbl->lock); return *pos ? neigh_get_idx_any(seq, pos) : SEQ_START_TOKEN; } @@ -2779,8 +2795,13 @@ out: EXPORT_SYMBOL(neigh_seq_next); void neigh_seq_stop(struct seq_file *seq, void *v) + __releases(tbl->lock) __releases(rcu_bh) { + struct neigh_seq_state *state = seq->private; + struct neigh_table *tbl = state->tbl; + + read_unlock(&tbl->lock); rcu_read_unlock_bh(); } EXPORT_SYMBOL(neigh_seq_stop); diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c index 8f17724a173c..dee57c5ff738 100644 --- a/net/core/net-sysfs.c +++ b/net/core/net-sysfs.c @@ -917,6 +917,8 @@ static int rx_queue_add_kobject(struct net_device *dev, int index) if (error) return error; + dev_hold(queue->dev); + if (dev->sysfs_rx_queue_group) { error = sysfs_create_group(kobj, dev->sysfs_rx_queue_group); if (error) { @@ -926,7 +928,6 @@ static int rx_queue_add_kobject(struct net_device *dev, int index) } kobject_uevent(kobj, KOBJ_ADD); - dev_hold(queue->dev); return error; } @@ -1327,6 +1328,8 @@ static int netdev_queue_add_kobject(struct net_device *dev, int index) if (error) return error; + dev_hold(queue->dev); + #ifdef CONFIG_BQL error = sysfs_create_group(kobj, &dql_group); if (error) { @@ -1336,7 +1339,6 @@ static int netdev_queue_add_kobject(struct net_device *dev, int index) #endif kobject_uevent(kobj, KOBJ_ADD); - dev_hold(queue->dev); return 0; } @@ -1402,6 +1404,9 @@ static int register_queue_kobjects(struct net_device *dev) error: netdev_queue_update_kobjects(dev, txq, 0); net_rx_queue_update_kobjects(dev, rxq, 0); +#ifdef CONFIG_SYSFS + kset_unregister(dev->queues_kset); +#endif return error; } diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c index 0dd6359e5924..1af25d53f63c 100644 --- a/net/core/net_namespace.c +++ b/net/core/net_namespace.c @@ -285,6 +285,7 @@ static __net_init int setup_net(struct net *net, struct user_namespace *user_ns) atomic_set(&net->count, 1); refcount_set(&net->passive, 1); + get_random_bytes(&net->hash_mix, sizeof(u32)); net->dev_base_seq = 1; net->user_ns = user_ns; idr_init(&net->netns_ids); @@ -853,7 +854,8 @@ static int __init net_ns_init(void) mutex_unlock(&net_mutex); - register_pernet_subsys(&net_ns_ops); + if (register_pernet_subsys(&net_ns_ops)) + panic("Could not register network namespace subsystems"); rtnl_register(PF_UNSPEC, RTM_NEWNSID, rtnl_net_newid, NULL, RTNL_FLAG_DOIT_UNLOCKED); diff --git a/net/core/netpoll.c b/net/core/netpoll.c index 912731bed7b7..abab3753a9e0 100644 --- a/net/core/netpoll.c +++ b/net/core/netpoll.c @@ -122,7 +122,7 @@ static void queue_process(struct work_struct *work) txq = netdev_get_tx_queue(dev, q_index); HARD_TX_LOCK(dev, txq, smp_processor_id()); if (netif_xmit_frozen_or_stopped(txq) || - netpoll_start_xmit(skb, dev, txq) != NETDEV_TX_OK) { + !dev_xmit_complete(netpoll_start_xmit(skb, dev, txq))) { skb_queue_head(&npinfo->txq, skb); HARD_TX_UNLOCK(dev, txq); local_irq_restore(flags); @@ -357,7 +357,7 @@ void netpoll_send_skb_on_dev(struct netpoll *np, struct sk_buff *skb, HARD_TX_UNLOCK(dev, txq); - if (status == NETDEV_TX_OK) + if (dev_xmit_complete(status)) break; } @@ -374,7 +374,7 @@ void netpoll_send_skb_on_dev(struct netpoll *np, struct sk_buff *skb, } - if (status != NETDEV_TX_OK) { + if (!dev_xmit_complete(status)) { skb_queue_tail(&npinfo->txq, skb); schedule_delayed_work(&npinfo->tx_work,0); } diff --git a/net/core/pktgen.c b/net/core/pktgen.c index 6e1e10ff433a..884afb8e9fc4 100644 --- a/net/core/pktgen.c +++ b/net/core/pktgen.c @@ -3149,7 +3149,13 @@ static int pktgen_wait_thread_run(struct pktgen_thread *t) { while (thread_is_running(t)) { + /* note: 't' will still be around even after the unlock/lock + * cycle because pktgen_thread threads are only cleared at + * net exit + */ + mutex_unlock(&pktgen_thread_lock); msleep_interruptible(100); + mutex_lock(&pktgen_thread_lock); if (signal_pending(current)) goto signal; @@ -3164,6 +3170,10 @@ static int pktgen_wait_all_threads_run(struct pktgen_net *pn) struct pktgen_thread *t; int sig = 1; + /* prevent from racing with rmmod */ + if (!try_module_get(THIS_MODULE)) + return sig; + mutex_lock(&pktgen_thread_lock); list_for_each_entry(t, &pn->pktgen_threads, th_list) { @@ -3177,6 +3187,7 @@ static int pktgen_wait_all_threads_run(struct pktgen_net *pn) t->control |= (T_STOP); mutex_unlock(&pktgen_thread_lock); + module_put(THIS_MODULE); return sig; } diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index 925af6b43017..b598e9909fec 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -1767,6 +1767,8 @@ static int do_setvfinfo(struct net_device *dev, struct nlattr **tb) if (tb[IFLA_VF_MAC]) { struct ifla_vf_mac *ivm = nla_data(tb[IFLA_VF_MAC]); + if (ivm->vf >= INT_MAX) + return -EINVAL; err = -EOPNOTSUPP; if (ops->ndo_set_vf_mac) err = ops->ndo_set_vf_mac(dev, ivm->vf, @@ -1778,6 +1780,8 @@ static int do_setvfinfo(struct net_device *dev, struct nlattr **tb) if (tb[IFLA_VF_VLAN]) { struct ifla_vf_vlan *ivv = nla_data(tb[IFLA_VF_VLAN]); + if (ivv->vf >= INT_MAX) + return -EINVAL; err = -EOPNOTSUPP; if (ops->ndo_set_vf_vlan) err = ops->ndo_set_vf_vlan(dev, ivv->vf, ivv->vlan, @@ -1810,6 +1814,8 @@ static int do_setvfinfo(struct net_device *dev, struct nlattr **tb) if (len == 0) return -EINVAL; + if (ivvl[0]->vf >= INT_MAX) + return -EINVAL; err = ops->ndo_set_vf_vlan(dev, ivvl[0]->vf, ivvl[0]->vlan, ivvl[0]->qos, ivvl[0]->vlan_proto); if (err < 0) @@ -1820,6 +1826,8 @@ static int do_setvfinfo(struct net_device *dev, struct nlattr **tb) struct ifla_vf_tx_rate *ivt = nla_data(tb[IFLA_VF_TX_RATE]); struct ifla_vf_info ivf; + if (ivt->vf >= INT_MAX) + return -EINVAL; err = -EOPNOTSUPP; if (ops->ndo_get_vf_config) err = ops->ndo_get_vf_config(dev, ivt->vf, &ivf); @@ -1838,6 +1846,8 @@ static int do_setvfinfo(struct net_device *dev, struct nlattr **tb) if (tb[IFLA_VF_RATE]) { struct ifla_vf_rate *ivt = nla_data(tb[IFLA_VF_RATE]); + if (ivt->vf >= INT_MAX) + return -EINVAL; err = -EOPNOTSUPP; if (ops->ndo_set_vf_rate) err = ops->ndo_set_vf_rate(dev, ivt->vf, @@ -1850,6 +1860,8 @@ static int do_setvfinfo(struct net_device *dev, struct nlattr **tb) if (tb[IFLA_VF_SPOOFCHK]) { struct ifla_vf_spoofchk *ivs = nla_data(tb[IFLA_VF_SPOOFCHK]); + if (ivs->vf >= INT_MAX) + return -EINVAL; err = -EOPNOTSUPP; if (ops->ndo_set_vf_spoofchk) err = ops->ndo_set_vf_spoofchk(dev, ivs->vf, @@ -1861,6 +1873,8 @@ static int do_setvfinfo(struct net_device *dev, struct nlattr **tb) if (tb[IFLA_VF_LINK_STATE]) { struct ifla_vf_link_state *ivl = nla_data(tb[IFLA_VF_LINK_STATE]); + if (ivl->vf >= INT_MAX) + return -EINVAL; err = -EOPNOTSUPP; if (ops->ndo_set_vf_link_state) err = ops->ndo_set_vf_link_state(dev, ivl->vf, @@ -1874,6 +1888,8 @@ static int do_setvfinfo(struct net_device *dev, struct nlattr **tb) err = -EOPNOTSUPP; ivrssq_en = nla_data(tb[IFLA_VF_RSS_QUERY_EN]); + if (ivrssq_en->vf >= INT_MAX) + return -EINVAL; if (ops->ndo_set_vf_rss_query_en) err = ops->ndo_set_vf_rss_query_en(dev, ivrssq_en->vf, ivrssq_en->setting); @@ -1884,6 +1900,8 @@ static int do_setvfinfo(struct net_device *dev, struct nlattr **tb) if (tb[IFLA_VF_TRUST]) { struct ifla_vf_trust *ivt = nla_data(tb[IFLA_VF_TRUST]); + if (ivt->vf >= INT_MAX) + return -EINVAL; err = -EOPNOTSUPP; if (ops->ndo_set_vf_trust) err = ops->ndo_set_vf_trust(dev, ivt->vf, ivt->setting); @@ -1894,15 +1912,18 @@ static int do_setvfinfo(struct net_device *dev, struct nlattr **tb) if (tb[IFLA_VF_IB_NODE_GUID]) { struct ifla_vf_guid *ivt = nla_data(tb[IFLA_VF_IB_NODE_GUID]); + if (ivt->vf >= INT_MAX) + return -EINVAL; if (!ops->ndo_set_vf_guid) return -EOPNOTSUPP; - return handle_vf_guid(dev, ivt, IFLA_VF_IB_NODE_GUID); } if (tb[IFLA_VF_IB_PORT_GUID]) { struct ifla_vf_guid *ivt = nla_data(tb[IFLA_VF_IB_PORT_GUID]); + if (ivt->vf >= INT_MAX) + return -EINVAL; if (!ops->ndo_set_vf_guid) return -EOPNOTSUPP; diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 873032d1a083..b1933e1f3aef 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -353,6 +353,8 @@ static void *__netdev_alloc_frag(unsigned int fragsz, gfp_t gfp_mask) */ void *netdev_alloc_frag(unsigned int fragsz) { + fragsz = SKB_DATA_ALIGN(fragsz); + return __netdev_alloc_frag(fragsz, GFP_ATOMIC | __GFP_COLD); } EXPORT_SYMBOL(netdev_alloc_frag); @@ -366,6 +368,8 @@ static void *__napi_alloc_frag(unsigned int fragsz, gfp_t gfp_mask) void *napi_alloc_frag(unsigned int fragsz) { + fragsz = SKB_DATA_ALIGN(fragsz); + return __napi_alloc_frag(fragsz, GFP_ATOMIC | __GFP_COLD); } EXPORT_SYMBOL(napi_alloc_frag); @@ -2295,6 +2299,7 @@ do_frag_list: kv.iov_base = skb->data + offset; kv.iov_len = slen; memset(&msg, 0, sizeof(msg)); + msg.msg_flags = MSG_DONTWAIT; ret = kernel_sendmsg_locked(sk, &msg, &kv, 1, slen); if (ret <= 0) @@ -3509,6 +3514,25 @@ struct sk_buff *skb_segment(struct sk_buff *head_skb, int pos; int dummy; + if (list_skb && !list_skb->head_frag && skb_headlen(list_skb) && + (skb_shinfo(head_skb)->gso_type & SKB_GSO_DODGY)) { + /* gso_size is untrusted, and we have a frag_list with a linear + * non head_frag head. + * + * (we assume checking the first list_skb member suffices; + * i.e if either of the list_skb members have non head_frag + * head, then the first one has too). + * + * If head_skb's headlen does not fit requested gso_size, it + * means that the frag_list members do NOT terminate on exact + * gso_size boundaries. Hence we cannot perform skb_frag_t page + * sharing. Therefore we must fallback to copying the frag_list + * skbs; we do so by disabling SG. + */ + if (mss != GSO_BY_FRAGS && mss != skb_headlen(head_skb)) + features &= ~NETIF_F_SG; + } + __skb_push(head_skb, doffset); proto = skb_network_protocol(head_skb, &dummy); if (unlikely(!proto)) @@ -3804,7 +3828,7 @@ int skb_gro_receive(struct sk_buff **head, struct sk_buff *skb) struct sk_buff *lp, *p = *head; unsigned int delta_truesize; - if (unlikely(p->len + len >= 65536)) + if (unlikely(p->len + len >= 65536 || NAPI_GRO_CB(skb)->flush)) return -E2BIG; lp = NAPI_GRO_CB(p)->last; @@ -4930,37 +4954,74 @@ unsigned int skb_gso_transport_seglen(const struct sk_buff *skb) EXPORT_SYMBOL_GPL(skb_gso_transport_seglen); /** - * skb_gso_validate_mtu - Return in case such skb fits a given MTU + * skb_gso_size_check - check the skb size, considering GSO_BY_FRAGS * - * @skb: GSO skb - * @mtu: MTU to validate against + * There are a couple of instances where we have a GSO skb, and we + * want to determine what size it would be after it is segmented. * - * skb_gso_validate_mtu validates if a given skb will fit a wanted MTU - * once split. + * We might want to check: + * - L3+L4+payload size (e.g. IP forwarding) + * - L2+L3+L4+payload size (e.g. sanity check before passing to driver) + * + * This is a helper to do that correctly considering GSO_BY_FRAGS. + * + * @seg_len: The segmented length (from skb_gso_*_seglen). In the + * GSO_BY_FRAGS case this will be [header sizes + GSO_BY_FRAGS]. + * + * @max_len: The maximum permissible length. + * + * Returns true if the segmented length <= max length. */ -bool skb_gso_validate_mtu(const struct sk_buff *skb, unsigned int mtu) -{ +static inline bool skb_gso_size_check(const struct sk_buff *skb, + unsigned int seg_len, + unsigned int max_len) { const struct skb_shared_info *shinfo = skb_shinfo(skb); const struct sk_buff *iter; - unsigned int hlen; - - hlen = skb_gso_network_seglen(skb); if (shinfo->gso_size != GSO_BY_FRAGS) - return hlen <= mtu; + return seg_len <= max_len; /* Undo this so we can re-use header sizes */ - hlen -= GSO_BY_FRAGS; + seg_len -= GSO_BY_FRAGS; skb_walk_frags(skb, iter) { - if (hlen + skb_headlen(iter) > mtu) + if (seg_len + skb_headlen(iter) > max_len) return false; } return true; } + +/** + * skb_gso_validate_mtu - Return in case such skb fits a given MTU + * + * @skb: GSO skb + * @mtu: MTU to validate against + * + * skb_gso_validate_mtu validates if a given skb will fit a wanted MTU + * once split. + */ +bool skb_gso_validate_mtu(const struct sk_buff *skb, unsigned int mtu) +{ + return skb_gso_size_check(skb, skb_gso_network_seglen(skb), mtu); +} EXPORT_SYMBOL_GPL(skb_gso_validate_mtu); +/** + * skb_gso_validate_mac_len - Will a split GSO skb fit in a given length? + * + * @skb: GSO skb + * @len: length to validate against + * + * skb_gso_validate_mac_len validates if a given skb will fit a wanted + * length once split, including L2, L3 and L4 headers and the payload. + */ +bool skb_gso_validate_mac_len(const struct sk_buff *skb, unsigned int len) +{ + return skb_gso_size_check(skb, skb_gso_mac_seglen(skb), len); +} +EXPORT_SYMBOL_GPL(skb_gso_validate_mac_len); + static struct sk_buff *skb_reorder_vlan_header(struct sk_buff *skb) { int mac_len; diff --git a/net/core/sock.c b/net/core/sock.c index a88579589946..90ccbbf9e6b0 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -1039,7 +1039,7 @@ set_rcvbuf: break; case SO_INCOMING_CPU: - sk->sk_incoming_cpu = val; + WRITE_ONCE(sk->sk_incoming_cpu, val); break; case SO_CNX_ADVICE: @@ -1351,16 +1351,13 @@ int sock_getsockopt(struct socket *sock, int level, int optname, break; case SO_INCOMING_CPU: - v.val = sk->sk_incoming_cpu; + v.val = READ_ONCE(sk->sk_incoming_cpu); break; case SO_MEMINFO: { u32 meminfo[SK_MEMINFO_VARS]; - if (get_user(len, optlen)) - return -EFAULT; - sk_get_meminfo(sk, meminfo); len = min_t(unsigned int, len, sizeof(meminfo)); @@ -1564,8 +1561,6 @@ static void __sk_destruct(struct rcu_head *head) sk_filter_uncharge(sk, filter); RCU_INIT_POINTER(sk->sk_filter, NULL); } - if (rcu_access_pointer(sk->sk_reuseport_cb)) - reuseport_detach_sock(sk); sock_disable_timestamp(sk, SK_FLAGS_TIMESTAMP); @@ -1588,7 +1583,14 @@ static void __sk_destruct(struct rcu_head *head) void sk_destruct(struct sock *sk) { - if (sock_flag(sk, SOCK_RCU_FREE)) + bool use_call_rcu = sock_flag(sk, SOCK_RCU_FREE); + + if (rcu_access_pointer(sk->sk_reuseport_cb)) { + reuseport_detach_sock(sk); + use_call_rcu = true; + } + + if (use_call_rcu) call_rcu(&sk->sk_rcu, __sk_destruct); else __sk_destruct(&sk->sk_rcu); @@ -2355,7 +2357,7 @@ int __sk_mem_raise_allocated(struct sock *sk, int size, int amt, int kind) } if (sk_has_memory_pressure(sk)) { - int alloc; + u64 alloc; if (!sk_under_memory_pressure(sk)) return 1; @@ -3379,7 +3381,7 @@ bool sk_busy_loop_end(void *p, unsigned long start_time) { struct sock *sk = p; - return !skb_queue_empty(&sk->sk_receive_queue) || + return !skb_queue_empty_lockless(&sk->sk_receive_queue) || sk_busy_loop_timeout(sk, start_time); } EXPORT_SYMBOL(sk_busy_loop_end); diff --git a/net/core/stream.c b/net/core/stream.c index 1cff9c6270c6..31839fb06d88 100644 --- a/net/core/stream.c +++ b/net/core/stream.c @@ -120,7 +120,6 @@ int sk_stream_wait_memory(struct sock *sk, long *timeo_p) int err = 0; long vm_wait = 0; long current_timeo = *timeo_p; - bool noblock = (*timeo_p ? false : true); DEFINE_WAIT_FUNC(wait, woken_wake_function); if (sk_stream_memory_free(sk)) @@ -133,11 +132,8 @@ int sk_stream_wait_memory(struct sock *sk, long *timeo_p) if (sk->sk_err || (sk->sk_shutdown & SEND_SHUTDOWN)) goto do_error; - if (!*timeo_p) { - if (noblock) - set_bit(SOCK_NOSPACE, &sk->sk_socket->flags); - goto do_nonblock; - } + if (!*timeo_p) + goto do_eagain; if (signal_pending(current)) goto do_interrupted; sk_clear_bit(SOCKWQ_ASYNC_NOSPACE, sk); @@ -169,7 +165,13 @@ out: do_error: err = -EPIPE; goto out; -do_nonblock: +do_eagain: + /* Make sure that whenever EAGAIN is returned, EPOLLOUT event can + * be generated later. + * When TCP receives ACK packets that make room, tcp_check_space() + * only calls tcp_new_space() if SOCK_NOSPACE is set. + */ + set_bit(SOCK_NOSPACE, &sk->sk_socket->flags); err = -EAGAIN; goto out; do_interrupted: diff --git a/net/core/sysctl_net_core.c b/net/core/sysctl_net_core.c index a47ad6cd41c0..069e3c4fcc44 100644 --- a/net/core/sysctl_net_core.c +++ b/net/core/sysctl_net_core.c @@ -25,9 +25,12 @@ static int zero = 0; static int one = 1; +static int two __maybe_unused = 2; static int min_sndbuf = SOCK_MIN_SNDBUF; static int min_rcvbuf = SOCK_MIN_RCVBUF; static int max_skb_frags = MAX_SKB_FRAGS; +static long long_one __maybe_unused = 1; +static long long_max __maybe_unused = LONG_MAX; static int net_msg_warn; /* Unused, but still a sysctl */ @@ -250,6 +253,52 @@ static int proc_do_rss_key(struct ctl_table *table, int write, return proc_dostring(&fake_table, write, buffer, lenp, ppos); } +#ifdef CONFIG_BPF_JIT +static int proc_dointvec_minmax_bpf_enable(struct ctl_table *table, int write, + void __user *buffer, size_t *lenp, + loff_t *ppos) +{ + int ret, jit_enable = *(int *)table->data; + struct ctl_table tmp = *table; + + if (write && !capable(CAP_SYS_ADMIN)) + return -EPERM; + + tmp.data = &jit_enable; + ret = proc_dointvec_minmax(&tmp, write, buffer, lenp, ppos); + if (write && !ret) { + *(int *)table->data = jit_enable; + if (jit_enable == 2) + pr_warn("bpf_jit_enable = 2 was set! NEVER use this in production, only for JIT debugging!\n"); + } + return ret; +} + +# ifdef CONFIG_HAVE_EBPF_JIT +static int +proc_dointvec_minmax_bpf_restricted(struct ctl_table *table, int write, + void __user *buffer, size_t *lenp, + loff_t *ppos) +{ + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; + + return proc_dointvec_minmax(table, write, buffer, lenp, ppos); +} +# endif /* CONFIG_HAVE_EBPF_JIT */ + +static int +proc_dolongvec_minmax_bpf_restricted(struct ctl_table *table, int write, + void __user *buffer, size_t *lenp, + loff_t *ppos) +{ + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; + + return proc_doulongvec_minmax(table, write, buffer, lenp, ppos); +} +#endif + static struct ctl_table net_core_table[] = { #ifdef CONFIG_NET { @@ -325,13 +374,14 @@ static struct ctl_table net_core_table[] = { .data = &bpf_jit_enable, .maxlen = sizeof(int), .mode = 0644, -#ifndef CONFIG_BPF_JIT_ALWAYS_ON - .proc_handler = proc_dointvec -#else - .proc_handler = proc_dointvec_minmax, + .proc_handler = proc_dointvec_minmax_bpf_enable, +# ifdef CONFIG_BPF_JIT_ALWAYS_ON .extra1 = &one, .extra2 = &one, -#endif +# else + .extra1 = &zero, + .extra2 = &two, +# endif }, # ifdef CONFIG_HAVE_EBPF_JIT { @@ -339,16 +389,29 @@ static struct ctl_table net_core_table[] = { .data = &bpf_jit_harden, .maxlen = sizeof(int), .mode = 0600, - .proc_handler = proc_dointvec, + .proc_handler = proc_dointvec_minmax_bpf_restricted, + .extra1 = &zero, + .extra2 = &two, }, { .procname = "bpf_jit_kallsyms", .data = &bpf_jit_kallsyms, .maxlen = sizeof(int), .mode = 0600, - .proc_handler = proc_dointvec, + .proc_handler = proc_dointvec_minmax_bpf_restricted, + .extra1 = &zero, + .extra2 = &one, }, # endif + { + .procname = "bpf_jit_limit", + .data = &bpf_jit_limit, + .maxlen = sizeof(long), + .mode = 0600, + .proc_handler = proc_dolongvec_minmax_bpf_restricted, + .extra1 = &long_one, + .extra2 = &long_max, + }, #endif { .procname = "netdev_tstamp_prequeue", diff --git a/net/dccp/ccid.h b/net/dccp/ccid.h index 6eb837a47b5c..baaaeb2b2c42 100644 --- a/net/dccp/ccid.h +++ b/net/dccp/ccid.h @@ -202,7 +202,7 @@ static inline void ccid_hc_tx_packet_recv(struct ccid *ccid, struct sock *sk, static inline int ccid_hc_tx_parse_options(struct ccid *ccid, struct sock *sk, u8 pkt, u8 opt, u8 *val, u8 len) { - if (ccid->ccid_ops->ccid_hc_tx_parse_options == NULL) + if (!ccid || !ccid->ccid_ops->ccid_hc_tx_parse_options) return 0; return ccid->ccid_ops->ccid_hc_tx_parse_options(sk, pkt, opt, val, len); } @@ -214,7 +214,7 @@ static inline int ccid_hc_tx_parse_options(struct ccid *ccid, struct sock *sk, static inline int ccid_hc_rx_parse_options(struct ccid *ccid, struct sock *sk, u8 pkt, u8 opt, u8 *val, u8 len) { - if (ccid->ccid_ops->ccid_hc_rx_parse_options == NULL) + if (!ccid || !ccid->ccid_ops->ccid_hc_rx_parse_options) return 0; return ccid->ccid_ops->ccid_hc_rx_parse_options(sk, pkt, opt, val, len); } diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c index 8e08cea6f178..176bddacc16e 100644 --- a/net/dccp/ipv4.c +++ b/net/dccp/ipv4.c @@ -121,7 +121,7 @@ int dccp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) inet->inet_daddr, inet->inet_sport, inet->inet_dport); - inet->inet_id = dp->dccps_iss ^ jiffies; + inet->inet_id = prandom_u32(); err = dccp_connect(sk); rt = NULL; @@ -417,7 +417,7 @@ struct sock *dccp_v4_request_recv_sock(const struct sock *sk, RCU_INIT_POINTER(newinet->inet_opt, rcu_dereference(ireq->ireq_opt)); newinet->mc_index = inet_iif(skb); newinet->mc_ttl = ip_hdr(skb)->ttl; - newinet->inet_id = jiffies; + newinet->inet_id = prandom_u32(); if (dst == NULL && (dst = inet_csk_route_child_sock(sk, newsk, req)) == NULL) goto put_and_exit; diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c index 6344f1b18a6a..58a401e9cf09 100644 --- a/net/dccp/ipv6.c +++ b/net/dccp/ipv6.c @@ -433,8 +433,8 @@ static struct sock *dccp_v6_request_recv_sock(const struct sock *sk, newnp->ipv6_mc_list = NULL; newnp->ipv6_ac_list = NULL; newnp->ipv6_fl_list = NULL; - newnp->mcast_oif = inet6_iif(skb); - newnp->mcast_hops = ipv6_hdr(skb)->hop_limit; + newnp->mcast_oif = inet_iif(skb); + newnp->mcast_hops = ip_hdr(skb)->ttl; /* * No need to charge this sock to the relevant IPv6 refcnt debug socks count diff --git a/net/decnet/dn_dev.c b/net/decnet/dn_dev.c index df042b6d80b8..22876a197ebe 100644 --- a/net/decnet/dn_dev.c +++ b/net/decnet/dn_dev.c @@ -56,7 +56,7 @@ #include <net/dn_neigh.h> #include <net/dn_fib.h> -#define DN_IFREQ_SIZE (sizeof(struct ifreq) - sizeof(struct sockaddr) + sizeof(struct sockaddr_dn)) +#define DN_IFREQ_SIZE (offsetof(struct ifreq, ifr_ifru) + sizeof(struct sockaddr_dn)) static char dn_rt_all_end_mcast[ETH_ALEN] = {0xAB,0x00,0x00,0x04,0x00,0x00}; static char dn_rt_all_rt_mcast[ETH_ALEN] = {0xAB,0x00,0x00,0x03,0x00,0x00}; diff --git a/net/decnet/dn_route.c b/net/decnet/dn_route.c index 0bd3afd01dd2..ccc189bc3617 100644 --- a/net/decnet/dn_route.c +++ b/net/decnet/dn_route.c @@ -118,7 +118,8 @@ static void dn_dst_ifdown(struct dst_entry *, struct net_device *dev, int how); static struct dst_entry *dn_dst_negative_advice(struct dst_entry *); static void dn_dst_link_failure(struct sk_buff *); static void dn_dst_update_pmtu(struct dst_entry *dst, struct sock *sk, - struct sk_buff *skb , u32 mtu); + struct sk_buff *skb , u32 mtu, + bool confirm_neigh); static void dn_dst_redirect(struct dst_entry *dst, struct sock *sk, struct sk_buff *skb); static struct neighbour *dn_dst_neigh_lookup(const struct dst_entry *dst, @@ -259,7 +260,8 @@ static int dn_dst_gc(struct dst_ops *ops) * advertise to the other end). */ static void dn_dst_update_pmtu(struct dst_entry *dst, struct sock *sk, - struct sk_buff *skb, u32 mtu) + struct sk_buff *skb, u32 mtu, + bool confirm_neigh) { struct dn_route *rt = (struct dn_route *) dst; struct neighbour *n = rt->n; diff --git a/net/dsa/dsa.c b/net/dsa/dsa.c index 03c58b0eb082..8c8f756f3cc8 100644 --- a/net/dsa/dsa.c +++ b/net/dsa/dsa.c @@ -300,15 +300,22 @@ static int __init dsa_init_module(void) rc = dsa_slave_register_notifier(); if (rc) - return rc; + goto register_notifier_fail; rc = dsa_legacy_register(); if (rc) - return rc; + goto legacy_register_fail; dev_add_pack(&dsa_pack_type); return 0; + +legacy_register_fail: + dsa_slave_unregister_notifier(); +register_notifier_fail: + destroy_workqueue(dsa_owq); + + return rc; } module_init(dsa_init_module); diff --git a/net/dsa/dsa2.c b/net/dsa/dsa2.c index 045d8a176279..0796355e74c1 100644 --- a/net/dsa/dsa2.c +++ b/net/dsa/dsa2.c @@ -62,7 +62,7 @@ static struct dsa_switch_tree *dsa_add_dst(u32 tree) return NULL; dst->tree = tree; INIT_LIST_HEAD(&dst->list); - list_add_tail(&dsa_switch_trees, &dst->list); + list_add_tail(&dst->list, &dsa_switch_trees); kref_init(&dst->refcount); return dst; diff --git a/net/dsa/slave.c b/net/dsa/slave.c index 242e74b9d454..b14d530a32b1 100644 --- a/net/dsa/slave.c +++ b/net/dsa/slave.c @@ -156,10 +156,14 @@ static void dsa_slave_change_rx_flags(struct net_device *dev, int change) struct dsa_slave_priv *p = netdev_priv(dev); struct net_device *master = dsa_master_netdev(p); - if (change & IFF_ALLMULTI) - dev_set_allmulti(master, dev->flags & IFF_ALLMULTI ? 1 : -1); - if (change & IFF_PROMISC) - dev_set_promiscuity(master, dev->flags & IFF_PROMISC ? 1 : -1); + if (dev->flags & IFF_UP) { + if (change & IFF_ALLMULTI) + dev_set_allmulti(master, + dev->flags & IFF_ALLMULTI ? 1 : -1); + if (change & IFF_PROMISC) + dev_set_promiscuity(master, + dev->flags & IFF_PROMISC ? 1 : -1); + } } static void dsa_slave_set_rx_mode(struct net_device *dev) diff --git a/net/ethernet/eth.c b/net/ethernet/eth.c index eaeba9b99a73..7e0e5f2706ba 100644 --- a/net/ethernet/eth.c +++ b/net/ethernet/eth.c @@ -239,7 +239,12 @@ int eth_header_cache(const struct neighbour *neigh, struct hh_cache *hh, __be16 eth->h_proto = type; memcpy(eth->h_source, dev->dev_addr, ETH_ALEN); memcpy(eth->h_dest, neigh->ha, ETH_ALEN); - hh->hh_len = ETH_HLEN; + + /* Pairs with READ_ONCE() in neigh_resolve_output(), + * neigh_hh_output() and neigh_update_hhs(). + */ + smp_store_release(&hh->hh_len, ETH_HLEN); + return 0; } EXPORT_SYMBOL(eth_header_cache); diff --git a/net/hsr/hsr_device.c b/net/hsr/hsr_device.c index 172d8309f89e..cfe20f15f618 100644 --- a/net/hsr/hsr_device.c +++ b/net/hsr/hsr_device.c @@ -94,9 +94,8 @@ static void hsr_check_announce(struct net_device *hsr_dev, && (old_operstate != IF_OPER_UP)) { /* Went up */ hsr->announce_count = 0; - hsr->announce_timer.expires = jiffies + - msecs_to_jiffies(HSR_ANNOUNCE_INTERVAL); - add_timer(&hsr->announce_timer); + mod_timer(&hsr->announce_timer, + jiffies + msecs_to_jiffies(HSR_ANNOUNCE_INTERVAL)); } if ((hsr_dev->operstate != IF_OPER_UP) && (old_operstate == IF_OPER_UP)) @@ -332,6 +331,7 @@ static void hsr_announce(unsigned long data) { struct hsr_priv *hsr; struct hsr_port *master; + unsigned long interval; hsr = (struct hsr_priv *) data; @@ -343,18 +343,16 @@ static void hsr_announce(unsigned long data) hsr->protVersion); hsr->announce_count++; - hsr->announce_timer.expires = jiffies + - msecs_to_jiffies(HSR_ANNOUNCE_INTERVAL); + interval = msecs_to_jiffies(HSR_ANNOUNCE_INTERVAL); } else { send_hsr_supervision_frame(master, HSR_TLV_LIFE_CHECK, hsr->protVersion); - hsr->announce_timer.expires = jiffies + - msecs_to_jiffies(HSR_LIFE_CHECK_INTERVAL); + interval = msecs_to_jiffies(HSR_LIFE_CHECK_INTERVAL); } if (is_admin_up(master->dev)) - add_timer(&hsr->announce_timer); + mod_timer(&hsr->announce_timer, jiffies + interval); rcu_read_unlock(); } @@ -487,7 +485,7 @@ int hsr_dev_finalize(struct net_device *hsr_dev, struct net_device *slave[2], res = hsr_add_port(hsr, hsr_dev, HSR_PT_MASTER); if (res) - return res; + goto err_add_port; res = register_netdevice(hsr_dev); if (res) @@ -507,6 +505,8 @@ int hsr_dev_finalize(struct net_device *hsr_dev, struct net_device *slave[2], fail: hsr_for_each_port(hsr, port) hsr_del_port(port); +err_add_port: + hsr_del_node(&hsr->self_node_db); return res; } diff --git a/net/hsr/hsr_framereg.c b/net/hsr/hsr_framereg.c index 284a9b820df8..6705420b3111 100644 --- a/net/hsr/hsr_framereg.c +++ b/net/hsr/hsr_framereg.c @@ -124,6 +124,18 @@ int hsr_create_self_node(struct list_head *self_node_db, return 0; } +void hsr_del_node(struct list_head *self_node_db) +{ + struct hsr_node *node; + + rcu_read_lock(); + node = list_first_or_null_rcu(self_node_db, struct hsr_node, mac_list); + rcu_read_unlock(); + if (node) { + list_del_rcu(&node->mac_list); + kfree(node); + } +} /* Allocate an hsr_node and add it to node_db. 'addr' is the node's AddressA; * seq_out is used to initialize filtering of outgoing duplicate frames diff --git a/net/hsr/hsr_framereg.h b/net/hsr/hsr_framereg.h index 4e04f0e868e9..43958a338095 100644 --- a/net/hsr/hsr_framereg.h +++ b/net/hsr/hsr_framereg.h @@ -16,6 +16,7 @@ struct hsr_node; +void hsr_del_node(struct list_head *self_node_db); struct hsr_node *hsr_add_node(struct list_head *node_db, unsigned char addr[], u16 seq_out); struct hsr_node *hsr_get_node(struct hsr_port *port, struct sk_buff *skb, diff --git a/net/ieee802154/6lowpan/reassembly.c b/net/ieee802154/6lowpan/reassembly.c index 2cc224106b69..ec7a5da56129 100644 --- a/net/ieee802154/6lowpan/reassembly.c +++ b/net/ieee802154/6lowpan/reassembly.c @@ -25,7 +25,7 @@ #include <net/ieee802154_netdev.h> #include <net/6lowpan.h> -#include <net/ipv6.h> +#include <net/ipv6_frag.h> #include <net/inet_frag.h> #include "6lowpan_i.h" diff --git a/net/ieee802154/socket.c b/net/ieee802154/socket.c index a60658c85a9a..e95004b507d3 100644 --- a/net/ieee802154/socket.c +++ b/net/ieee802154/socket.c @@ -1001,6 +1001,9 @@ static int ieee802154_create(struct net *net, struct socket *sock, switch (sock->type) { case SOCK_RAW: + rc = -EPERM; + if (!capable(CAP_NET_RAW)) + goto out; proto = &ieee802154_raw_prot; ops = &ieee802154_raw_ops; break; diff --git a/net/ipv4/cipso_ipv4.c b/net/ipv4/cipso_ipv4.c index 777fa3b7fb13..f0165c5f376b 100644 --- a/net/ipv4/cipso_ipv4.c +++ b/net/ipv4/cipso_ipv4.c @@ -667,7 +667,8 @@ static int cipso_v4_map_lvl_valid(const struct cipso_v4_doi *doi_def, u8 level) case CIPSO_V4_MAP_PASS: return 0; case CIPSO_V4_MAP_TRANS: - if (doi_def->map.std->lvl.cipso[level] < CIPSO_V4_INV_LVL) + if ((level < doi_def->map.std->lvl.cipso_size) && + (doi_def->map.std->lvl.cipso[level] < CIPSO_V4_INV_LVL)) return 0; break; } @@ -1735,13 +1736,26 @@ validate_return: */ void cipso_v4_error(struct sk_buff *skb, int error, u32 gateway) { + unsigned char optbuf[sizeof(struct ip_options) + 40]; + struct ip_options *opt = (struct ip_options *)optbuf; + if (ip_hdr(skb)->protocol == IPPROTO_ICMP || error != -EACCES) return; + /* + * We might be called above the IP layer, + * so we can not use icmp_send and IPCB here. + */ + + memset(opt, 0, sizeof(struct ip_options)); + opt->optlen = ip_hdr(skb)->ihl*4 - sizeof(struct iphdr); + if (__ip_options_compile(dev_net(skb->dev), opt, skb, NULL)) + return; + if (gateway) - icmp_send(skb, ICMP_DEST_UNREACH, ICMP_NET_ANO, 0); + __icmp_send(skb, ICMP_DEST_UNREACH, ICMP_NET_ANO, 0, opt); else - icmp_send(skb, ICMP_DEST_UNREACH, ICMP_HOST_ANO, 0); + __icmp_send(skb, ICMP_DEST_UNREACH, ICMP_HOST_ANO, 0, opt); } /** diff --git a/net/ipv4/datagram.c b/net/ipv4/datagram.c index f915abff1350..d3eddfd13875 100644 --- a/net/ipv4/datagram.c +++ b/net/ipv4/datagram.c @@ -75,7 +75,7 @@ int __ip4_datagram_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len inet->inet_dport = usin->sin_port; sk->sk_state = TCP_ESTABLISHED; sk_set_txhash(sk); - inet->inet_id = jiffies; + inet->inet_id = prandom_u32(); sk_dst_set(sk, &rt->dst); err = 0; diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c index bffa88ecc534..5f020c051af9 100644 --- a/net/ipv4/devinet.c +++ b/net/ipv4/devinet.c @@ -66,6 +66,11 @@ #include <net/net_namespace.h> #include <net/addrconf.h> +#define IPV6ONLY_FLAGS \ + (IFA_F_NODAD | IFA_F_OPTIMISTIC | IFA_F_DADFAILED | \ + IFA_F_HOMEADDRESS | IFA_F_TENTATIVE | \ + IFA_F_MANAGETEMPADDR | IFA_F_STABLE_PRIVACY) + static struct ipv4_devconf ipv4_devconf = { .data = { [IPV4_DEVCONF_ACCEPT_REDIRECTS - 1] = 1, @@ -455,6 +460,9 @@ static int __inet_insert_ifa(struct in_ifaddr *ifa, struct nlmsghdr *nlh, ifa->ifa_flags &= ~IFA_F_SECONDARY; last_primary = &in_dev->ifa_list; + /* Don't set IPv6 only flags to IPv4 addresses */ + ifa->ifa_flags &= ~IPV6ONLY_FLAGS; + for (ifap = &in_dev->ifa_list; (ifa1 = *ifap) != NULL; ifap = &ifa1->ifa_next) { if (!(ifa1->ifa_flags & IFA_F_SECONDARY) && @@ -1418,11 +1426,6 @@ skip: } } -static bool inetdev_valid_mtu(unsigned int mtu) -{ - return mtu >= IPV4_MIN_MTU; -} - static void inetdev_send_gratuitous_arp(struct net_device *dev, struct in_device *in_dev) diff --git a/net/ipv4/esp4.c b/net/ipv4/esp4.c index d57aa64fa7c7..6ade9fdd0933 100644 --- a/net/ipv4/esp4.c +++ b/net/ipv4/esp4.c @@ -205,7 +205,7 @@ static void esp_output_fill_trailer(u8 *tail, int tfclen, int plen, __u8 proto) tail[plen - 1] = proto; } -static void esp_output_udp_encap(struct xfrm_state *x, struct sk_buff *skb, struct esp_info *esp) +static int esp_output_udp_encap(struct xfrm_state *x, struct sk_buff *skb, struct esp_info *esp) { int encap_type; struct udphdr *uh; @@ -213,6 +213,7 @@ static void esp_output_udp_encap(struct xfrm_state *x, struct sk_buff *skb, stru __be16 sport, dport; struct xfrm_encap_tmpl *encap = x->encap; struct ip_esp_hdr *esph = esp->esph; + unsigned int len; spin_lock_bh(&x->lock); sport = encap->encap_sport; @@ -220,11 +221,14 @@ static void esp_output_udp_encap(struct xfrm_state *x, struct sk_buff *skb, stru encap_type = encap->encap_type; spin_unlock_bh(&x->lock); + len = skb->len + esp->tailen - skb_transport_offset(skb); + if (len + sizeof(struct iphdr) >= IP_MAX_MTU) + return -EMSGSIZE; + uh = (struct udphdr *)esph; uh->source = sport; uh->dest = dport; - uh->len = htons(skb->len + esp->tailen - - skb_transport_offset(skb)); + uh->len = htons(len); uh->check = 0; switch (encap_type) { @@ -241,6 +245,8 @@ static void esp_output_udp_encap(struct xfrm_state *x, struct sk_buff *skb, stru *skb_mac_header(skb) = IPPROTO_UDP; esp->esph = esph; + + return 0; } int esp_output_head(struct xfrm_state *x, struct sk_buff *skb, struct esp_info *esp) @@ -254,8 +260,12 @@ int esp_output_head(struct xfrm_state *x, struct sk_buff *skb, struct esp_info * int tailen = esp->tailen; /* this is non-NULL only with UDP Encapsulation */ - if (x->encap) - esp_output_udp_encap(x, skb, esp); + if (x->encap) { + int err = esp_output_udp_encap(x, skb, esp); + + if (err < 0) + return err; + } if (!skb_cloned(skb)) { if (tailen <= skb_tailroom(skb)) { @@ -307,7 +317,7 @@ int esp_output_head(struct xfrm_state *x, struct sk_buff *skb, struct esp_info * skb->len += tailen; skb->data_len += tailen; skb->truesize += tailen; - if (sk) + if (sk && sk_fullsock(sk)) refcount_add(tailen, &sk->sk_wmem_alloc); goto out; diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c index b5317b2b191d..ff499000f6cd 100644 --- a/net/ipv4/fib_frontend.c +++ b/net/ipv4/fib_frontend.c @@ -675,6 +675,10 @@ static int rtm_to_fib_config(struct net *net, struct sk_buff *skb, case RTA_GATEWAY: cfg->fc_gw = nla_get_be32(attr); break; + case RTA_VIA: + NL_SET_ERR_MSG(extack, "IPv4 does not support RTA_VIA attribute"); + err = -EINVAL; + goto errout; case RTA_PRIORITY: cfg->fc_priority = nla_get_u32(attr); break; diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c index e76b8a7bb891..eff703cb13b6 100644 --- a/net/ipv4/fib_semantics.c +++ b/net/ipv4/fib_semantics.c @@ -1471,8 +1471,8 @@ int fib_sync_down_addr(struct net_device *dev, __be32 local) int ret = 0; unsigned int hash = fib_laddr_hashfn(local); struct hlist_head *head = &fib_info_laddrhash[hash]; + int tb_id = l3mdev_fib_table(dev) ? : RT_TABLE_MAIN; struct net *net = dev_net(dev); - int tb_id = l3mdev_fib_table(dev); struct fib_info *fi; if (!fib_info_laddrhash || local == 0) diff --git a/net/ipv4/fou.c b/net/ipv4/fou.c index c9ec1603666b..665f11d7388e 100644 --- a/net/ipv4/fou.c +++ b/net/ipv4/fou.c @@ -120,6 +120,7 @@ static int gue_udp_recv(struct sock *sk, struct sk_buff *skb) struct guehdr *guehdr; void *data; u16 doffset = 0; + u8 proto_ctype; if (!fou) return 1; @@ -211,13 +212,14 @@ static int gue_udp_recv(struct sock *sk, struct sk_buff *skb) if (unlikely(guehdr->control)) return gue_control_message(skb, guehdr); + proto_ctype = guehdr->proto_ctype; __skb_pull(skb, sizeof(struct udphdr) + hdrlen); skb_reset_transport_header(skb); if (iptunnel_pull_offloads(skb)) goto drop; - return -guehdr->proto_ctype; + return -proto_ctype; drop: kfree_skb(skb); diff --git a/net/ipv4/gre_demux.c b/net/ipv4/gre_demux.c index b798862b6be5..7efe740c06eb 100644 --- a/net/ipv4/gre_demux.c +++ b/net/ipv4/gre_demux.c @@ -86,13 +86,14 @@ int gre_parse_header(struct sk_buff *skb, struct tnl_ptk_info *tpi, options = (__be32 *)(greh + 1); if (greh->flags & GRE_CSUM) { - if (skb_checksum_simple_validate(skb)) { + if (!skb_checksum_simple_validate(skb)) { + skb_checksum_try_convert(skb, IPPROTO_GRE, 0, + null_compute_pseudo); + } else if (csum_err) { *csum_err = true; return -EINVAL; } - skb_checksum_try_convert(skb, IPPROTO_GRE, 0, - null_compute_pseudo); options++; } diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c index 3c1570d3e22f..995ef3d23368 100644 --- a/net/ipv4/icmp.c +++ b/net/ipv4/icmp.c @@ -254,10 +254,11 @@ bool icmp_global_allow(void) bool rc = false; /* Check if token bucket is empty and cannot be refilled - * without taking the spinlock. + * without taking the spinlock. The READ_ONCE() are paired + * with the following WRITE_ONCE() in this same function. */ - if (!icmp_global.credit) { - delta = min_t(u32, now - icmp_global.stamp, HZ); + if (!READ_ONCE(icmp_global.credit)) { + delta = min_t(u32, now - READ_ONCE(icmp_global.stamp), HZ); if (delta < HZ / 50) return false; } @@ -267,14 +268,14 @@ bool icmp_global_allow(void) if (delta >= HZ / 50) { incr = sysctl_icmp_msgs_per_sec * delta / HZ ; if (incr) - icmp_global.stamp = now; + WRITE_ONCE(icmp_global.stamp, now); } credit = min_t(u32, icmp_global.credit + incr, sysctl_icmp_msgs_burst); if (credit) { credit--; rc = true; } - icmp_global.credit = credit; + WRITE_ONCE(icmp_global.credit, credit); spin_unlock(&icmp_global.lock); return rc; } @@ -573,7 +574,8 @@ relookup_failed: * MUST reply to only the first fragment. */ -void icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info) +void __icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info, + const struct ip_options *opt) { struct iphdr *iph; int room; @@ -694,7 +696,7 @@ void icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info) iph->tos; mark = IP4_REPLY_MARK(net, skb_in->mark); - if (ip_options_echo(net, &icmp_param.replyopts.opt.opt, skb_in)) + if (__ip_options_echo(net, &icmp_param.replyopts.opt.opt, skb_in, opt)) goto out_unlock; @@ -747,7 +749,7 @@ out_bh_enable: local_bh_enable(); out:; } -EXPORT_SYMBOL(icmp_send); +EXPORT_SYMBOL(__icmp_send); static void icmp_socket_deliver(struct sk_buff *skb, u32 info) diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c index 502aae3e3ab8..b6f0ee01f2e0 100644 --- a/net/ipv4/igmp.c +++ b/net/ipv4/igmp.c @@ -190,6 +190,17 @@ static void ip_ma_put(struct ip_mc_list *im) pmc != NULL; \ pmc = rtnl_dereference(pmc->next_rcu)) +static void ip_sf_list_clear_all(struct ip_sf_list *psf) +{ + struct ip_sf_list *next; + + while (psf) { + next = psf->sf_next; + kfree(psf); + psf = next; + } +} + #ifdef CONFIG_IP_MULTICAST /* @@ -635,6 +646,13 @@ static void igmpv3_clear_zeros(struct ip_sf_list **ppsf) } } +static void kfree_pmc(struct ip_mc_list *pmc) +{ + ip_sf_list_clear_all(pmc->sources); + ip_sf_list_clear_all(pmc->tomb); + kfree(pmc); +} + static void igmpv3_send_cr(struct in_device *in_dev) { struct ip_mc_list *pmc, *pmc_prev, *pmc_next; @@ -671,7 +689,7 @@ static void igmpv3_send_cr(struct in_device *in_dev) else in_dev->mc_tomb = pmc_next; in_dev_put(pmc->interface); - kfree(pmc); + kfree_pmc(pmc); } else pmc_prev = pmc; } @@ -1202,13 +1220,13 @@ static void igmpv3_del_delrec(struct in_device *in_dev, struct ip_mc_list *im) im->interface = pmc->interface; im->crcount = in_dev->mr_qrv ?: net->ipv4.sysctl_igmp_qrv; if (im->sfmode == MCAST_INCLUDE) { - im->tomb = pmc->tomb; - im->sources = pmc->sources; + swap(im->tomb, pmc->tomb); + swap(im->sources, pmc->sources); for (psf = im->sources; psf; psf = psf->sf_next) psf->sf_crcount = im->crcount; } in_dev_put(pmc->interface); - kfree(pmc); + kfree_pmc(pmc); } spin_unlock_bh(&im->lock); } @@ -1229,21 +1247,18 @@ static void igmpv3_clear_delrec(struct in_device *in_dev) nextpmc = pmc->next; ip_mc_clear_src(pmc); in_dev_put(pmc->interface); - kfree(pmc); + kfree_pmc(pmc); } /* clear dead sources, too */ rcu_read_lock(); for_each_pmc_rcu(in_dev, pmc) { - struct ip_sf_list *psf, *psf_next; + struct ip_sf_list *psf; spin_lock_bh(&pmc->lock); psf = pmc->tomb; pmc->tomb = NULL; spin_unlock_bh(&pmc->lock); - for (; psf; psf = psf_next) { - psf_next = psf->sf_next; - kfree(psf); - } + ip_sf_list_clear_all(psf); } rcu_read_unlock(); } @@ -2107,7 +2122,7 @@ static int ip_mc_add_src(struct in_device *in_dev, __be32 *pmca, int sfmode, static void ip_mc_clear_src(struct ip_mc_list *pmc) { - struct ip_sf_list *psf, *nextpsf, *tomb, *sources; + struct ip_sf_list *tomb, *sources; spin_lock_bh(&pmc->lock); tomb = pmc->tomb; @@ -2119,14 +2134,8 @@ static void ip_mc_clear_src(struct ip_mc_list *pmc) pmc->sfcount[MCAST_EXCLUDE] = 1; spin_unlock_bh(&pmc->lock); - for (psf = tomb; psf; psf = nextpsf) { - nextpsf = psf->sf_next; - kfree(psf); - } - for (psf = sources; psf; psf = nextpsf) { - nextpsf = psf->sf_next; - kfree(psf); - } + ip_sf_list_clear_all(tomb); + ip_sf_list_clear_all(sources); } /* Join a multicast group diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c index 9d6b172caf6c..f7224c4fc30f 100644 --- a/net/ipv4/inet_connection_sock.c +++ b/net/ipv4/inet_connection_sock.c @@ -1088,7 +1088,7 @@ struct dst_entry *inet_csk_update_pmtu(struct sock *sk, u32 mtu) if (!dst) goto out; } - dst->ops->update_pmtu(dst, sk, NULL, mtu); + dst->ops->update_pmtu(dst, sk, NULL, mtu, true); dst = __sk_dst_check(sk, 0); if (!dst) diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c index 857ec3dbb742..eb158badebc4 100644 --- a/net/ipv4/inet_diag.c +++ b/net/ipv4/inet_diag.c @@ -109,6 +109,7 @@ static size_t inet_sk_attr_size(struct sock *sk, + nla_total_size(1) /* INET_DIAG_TOS */ + nla_total_size(1) /* INET_DIAG_TCLASS */ + nla_total_size(4) /* INET_DIAG_MARK */ + + nla_total_size(4) /* INET_DIAG_CLASS_ID */ + nla_total_size(sizeof(struct inet_diag_meminfo)) + nla_total_size(sizeof(struct inet_diag_msg)) + nla_total_size(SK_MEMINFO_VARS * sizeof(u32)) @@ -288,12 +289,19 @@ int inet_sk_diag_fill(struct sock *sk, struct inet_connection_sock *icsk, goto errout; } - if (ext & (1 << (INET_DIAG_CLASS_ID - 1))) { + if (ext & (1 << (INET_DIAG_CLASS_ID - 1)) || + ext & (1 << (INET_DIAG_TCLASS - 1))) { u32 classid = 0; #ifdef CONFIG_SOCK_CGROUP_DATA classid = sock_cgroup_classid(&sk->sk_cgrp_data); #endif + /* Fallback to socket priority if class id isn't set. + * Classful qdiscs use it as direct reference to class. + * For cgroup2 classid is always zero. + */ + if (!classid) + classid = sk->sk_priority; if (nla_put_u32(skb, INET_DIAG_CLASS_ID, classid)) goto errout; @@ -903,11 +911,12 @@ void inet_diag_dump_icsk(struct inet_hashinfo *hashinfo, struct sk_buff *skb, for (i = s_i; i < INET_LHTABLE_SIZE; i++) { struct inet_listen_hashbucket *ilb; + struct hlist_nulls_node *node; num = 0; ilb = &hashinfo->listening_hash[i]; spin_lock(&ilb->lock); - sk_for_each(sk, &ilb->head) { + sk_nulls_for_each(sk, node, &ilb->nulls_head) { struct inet_sock *inet = inet_sk(sk); if (!net_eq(sock_net(sk), net)) diff --git a/net/ipv4/inet_fragment.c b/net/ipv4/inet_fragment.c index 6ffee9d2b0e5..481cded81b2d 100644 --- a/net/ipv4/inet_fragment.c +++ b/net/ipv4/inet_fragment.c @@ -24,6 +24,62 @@ #include <net/sock.h> #include <net/inet_frag.h> #include <net/inet_ecn.h> +#include <net/ip.h> +#include <net/ipv6.h> + +/* Use skb->cb to track consecutive/adjacent fragments coming at + * the end of the queue. Nodes in the rb-tree queue will + * contain "runs" of one or more adjacent fragments. + * + * Invariants: + * - next_frag is NULL at the tail of a "run"; + * - the head of a "run" has the sum of all fragment lengths in frag_run_len. + */ +struct ipfrag_skb_cb { + union { + struct inet_skb_parm h4; + struct inet6_skb_parm h6; + }; + struct sk_buff *next_frag; + int frag_run_len; +}; + +#define FRAG_CB(skb) ((struct ipfrag_skb_cb *)((skb)->cb)) + +static void fragcb_clear(struct sk_buff *skb) +{ + RB_CLEAR_NODE(&skb->rbnode); + FRAG_CB(skb)->next_frag = NULL; + FRAG_CB(skb)->frag_run_len = skb->len; +} + +/* Append skb to the last "run". */ +static void fragrun_append_to_last(struct inet_frag_queue *q, + struct sk_buff *skb) +{ + fragcb_clear(skb); + + FRAG_CB(q->last_run_head)->frag_run_len += skb->len; + FRAG_CB(q->fragments_tail)->next_frag = skb; + q->fragments_tail = skb; +} + +/* Create a new "run" with the skb. */ +static void fragrun_create(struct inet_frag_queue *q, struct sk_buff *skb) +{ + BUILD_BUG_ON(sizeof(struct ipfrag_skb_cb) > sizeof(skb->cb)); + fragcb_clear(skb); + + if (q->last_run_head) + rb_link_node(&skb->rbnode, &q->last_run_head->rbnode, + &q->last_run_head->rbnode.rb_right); + else + rb_link_node(&skb->rbnode, NULL, &q->rb_fragments.rb_node); + rb_insert_color(&skb->rbnode, &q->rb_fragments); + + q->fragments_tail = skb; + q->last_run_head = skb; +} /* Given the OR values of all fragments, apply RFC 3168 5.3 requirements * Value : 0xff if frame should be dropped. @@ -122,6 +178,28 @@ static void inet_frag_destroy_rcu(struct rcu_head *head) kmem_cache_free(f->frags_cachep, q); } +unsigned int inet_frag_rbtree_purge(struct rb_root *root) +{ + struct rb_node *p = rb_first(root); + unsigned int sum = 0; + + while (p) { + struct sk_buff *skb = rb_entry(p, struct sk_buff, rbnode); + + p = rb_next(p); + rb_erase(&skb->rbnode, root); + while (skb) { + struct sk_buff *next = FRAG_CB(skb)->next_frag; + + sum += skb->truesize; + kfree_skb(skb); + skb = next; + } + } + return sum; +} +EXPORT_SYMBOL(inet_frag_rbtree_purge); + void inet_frag_destroy(struct inet_frag_queue *q) { struct sk_buff *fp; @@ -224,3 +302,218 @@ struct inet_frag_queue *inet_frag_find(struct netns_frags *nf, void *key) return fq; } EXPORT_SYMBOL(inet_frag_find); + +int inet_frag_queue_insert(struct inet_frag_queue *q, struct sk_buff *skb, + int offset, int end) +{ + struct sk_buff *last = q->fragments_tail; + + /* RFC5722, Section 4, amended by Errata ID : 3089 + * When reassembling an IPv6 datagram, if + * one or more its constituent fragments is determined to be an + * overlapping fragment, the entire datagram (and any constituent + * fragments) MUST be silently discarded. + * + * Duplicates, however, should be ignored (i.e. skb dropped, but the + * queue/fragments kept for later reassembly). + */ + if (!last) + fragrun_create(q, skb); /* First fragment. */ + else if (last->ip_defrag_offset + last->len < end) { + /* This is the common case: skb goes to the end. */ + /* Detect and discard overlaps. */ + if (offset < last->ip_defrag_offset + last->len) + return IPFRAG_OVERLAP; + if (offset == last->ip_defrag_offset + last->len) + fragrun_append_to_last(q, skb); + else + fragrun_create(q, skb); + } else { + /* Binary search. Note that skb can become the first fragment, + * but not the last (covered above). + */ + struct rb_node **rbn, *parent; + + rbn = &q->rb_fragments.rb_node; + do { + struct sk_buff *curr; + int curr_run_end; + + parent = *rbn; + curr = rb_to_skb(parent); + curr_run_end = curr->ip_defrag_offset + + FRAG_CB(curr)->frag_run_len; + if (end <= curr->ip_defrag_offset) + rbn = &parent->rb_left; + else if (offset >= curr_run_end) + rbn = &parent->rb_right; + else if (offset >= curr->ip_defrag_offset && + end <= curr_run_end) + return IPFRAG_DUP; + else + return IPFRAG_OVERLAP; + } while (*rbn); + /* Here we have parent properly set, and rbn pointing to + * one of its NULL left/right children. Insert skb. + */ + fragcb_clear(skb); + rb_link_node(&skb->rbnode, parent, rbn); + rb_insert_color(&skb->rbnode, &q->rb_fragments); + } + + skb->ip_defrag_offset = offset; + + return IPFRAG_OK; +} +EXPORT_SYMBOL(inet_frag_queue_insert); + +void *inet_frag_reasm_prepare(struct inet_frag_queue *q, struct sk_buff *skb, + struct sk_buff *parent) +{ + struct sk_buff *fp, *head = skb_rb_first(&q->rb_fragments); + struct sk_buff **nextp; + int delta; + + if (head != skb) { + fp = skb_clone(skb, GFP_ATOMIC); + if (!fp) + return NULL; + FRAG_CB(fp)->next_frag = FRAG_CB(skb)->next_frag; + if (RB_EMPTY_NODE(&skb->rbnode)) + FRAG_CB(parent)->next_frag = fp; + else + rb_replace_node(&skb->rbnode, &fp->rbnode, + &q->rb_fragments); + if (q->fragments_tail == skb) + q->fragments_tail = fp; + skb_morph(skb, head); + FRAG_CB(skb)->next_frag = FRAG_CB(head)->next_frag; + rb_replace_node(&head->rbnode, &skb->rbnode, + &q->rb_fragments); + consume_skb(head); + head = skb; + } + WARN_ON(head->ip_defrag_offset != 0); + + delta = -head->truesize; + + /* Head of list must not be cloned. */ + if (skb_unclone(head, GFP_ATOMIC)) + return NULL; + + delta += head->truesize; + if (delta) + add_frag_mem_limit(q->net, delta); + + /* If the first fragment is fragmented itself, we split + * it to two chunks: the first with data and paged part + * and the second, holding only fragments. + */ + if (skb_has_frag_list(head)) { + struct sk_buff *clone; + int i, plen = 0; + + clone = alloc_skb(0, GFP_ATOMIC); + if (!clone) + return NULL; + skb_shinfo(clone)->frag_list = skb_shinfo(head)->frag_list; + skb_frag_list_init(head); + for (i = 0; i < skb_shinfo(head)->nr_frags; i++) + plen += skb_frag_size(&skb_shinfo(head)->frags[i]); + clone->data_len = head->data_len - plen; + clone->len = clone->data_len; + head->truesize += clone->truesize; + clone->csum = 0; + clone->ip_summed = head->ip_summed; + add_frag_mem_limit(q->net, clone->truesize); + skb_shinfo(head)->frag_list = clone; + nextp = &clone->next; + } else { + nextp = &skb_shinfo(head)->frag_list; + } + + return nextp; +} +EXPORT_SYMBOL(inet_frag_reasm_prepare); + +void inet_frag_reasm_finish(struct inet_frag_queue *q, struct sk_buff *head, + void *reasm_data) +{ + struct sk_buff **nextp = (struct sk_buff **)reasm_data; + struct rb_node *rbn; + struct sk_buff *fp; + + skb_push(head, head->data - skb_network_header(head)); + + /* Traverse the tree in order, to build frag_list. */ + fp = FRAG_CB(head)->next_frag; + rbn = rb_next(&head->rbnode); + rb_erase(&head->rbnode, &q->rb_fragments); + while (rbn || fp) { + /* fp points to the next sk_buff in the current run; + * rbn points to the next run. + */ + /* Go through the current run. */ + while (fp) { + *nextp = fp; + nextp = &fp->next; + fp->prev = NULL; + memset(&fp->rbnode, 0, sizeof(fp->rbnode)); + fp->sk = NULL; + head->data_len += fp->len; + head->len += fp->len; + if (head->ip_summed != fp->ip_summed) + head->ip_summed = CHECKSUM_NONE; + else if (head->ip_summed == CHECKSUM_COMPLETE) + head->csum = csum_add(head->csum, fp->csum); + head->truesize += fp->truesize; + fp = FRAG_CB(fp)->next_frag; + } + /* Move to the next run. */ + if (rbn) { + struct rb_node *rbnext = rb_next(rbn); + + fp = rb_to_skb(rbn); + rb_erase(rbn, &q->rb_fragments); + rbn = rbnext; + } + } + sub_frag_mem_limit(q->net, head->truesize); + + *nextp = NULL; + head->next = NULL; + head->prev = NULL; + head->tstamp = q->stamp; +} +EXPORT_SYMBOL(inet_frag_reasm_finish); + +struct sk_buff *inet_frag_pull_head(struct inet_frag_queue *q) +{ + struct sk_buff *head; + + if (q->fragments) { + head = q->fragments; + q->fragments = head->next; + } else { + struct sk_buff *skb; + + head = skb_rb_first(&q->rb_fragments); + if (!head) + return NULL; + skb = FRAG_CB(head)->next_frag; + if (skb) + rb_replace_node(&head->rbnode, &skb->rbnode, + &q->rb_fragments); + else + rb_erase(&head->rbnode, &q->rb_fragments); + memset(&head->rbnode, 0, sizeof(head->rbnode)); + barrier(); + } + if (head == q->fragments_tail) + q->fragments_tail = NULL; + + sub_frag_mem_limit(q->net, head->truesize); + + return head; +} +EXPORT_SYMBOL(inet_frag_pull_head); diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c index 24b066c32e06..0af13f5bdc9a 100644 --- a/net/ipv4/inet_hashtables.c +++ b/net/ipv4/inet_hashtables.c @@ -193,7 +193,7 @@ static inline int compute_score(struct sock *sk, struct net *net, if (sk->sk_bound_dev_if) score += 4; } - if (sk->sk_incoming_cpu == raw_smp_processor_id()) + if (READ_ONCE(sk->sk_incoming_cpu) == raw_smp_processor_id()) score++; } return score; @@ -219,9 +219,10 @@ struct sock *__inet_lookup_listener(struct net *net, int score, hiscore = 0, matches = 0, reuseport = 0; bool exact_dif = inet_exact_dif_match(net, skb); struct sock *sk, *result = NULL; + struct hlist_nulls_node *node; u32 phash = 0; - sk_for_each_rcu(sk, &ilb->head) { + sk_nulls_for_each_rcu(sk, node, &ilb->nulls_head) { score = compute_score(sk, net, hnum, daddr, dif, sdif, exact_dif); if (score > hiscore) { @@ -442,10 +443,11 @@ static int inet_reuseport_add_sock(struct sock *sk, struct inet_listen_hashbucket *ilb) { struct inet_bind_bucket *tb = inet_csk(sk)->icsk_bind_hash; + const struct hlist_nulls_node *node; struct sock *sk2; kuid_t uid = sock_i_uid(sk); - sk_for_each_rcu(sk2, &ilb->head) { + sk_nulls_for_each_rcu(sk2, node, &ilb->nulls_head) { if (sk2 != sk && sk2->sk_family == sk->sk_family && ipv6_only_sock(sk2) == ipv6_only_sock(sk) && @@ -480,9 +482,9 @@ int __inet_hash(struct sock *sk, struct sock *osk) } if (IS_ENABLED(CONFIG_IPV6) && sk->sk_reuseport && sk->sk_family == AF_INET6) - hlist_add_tail_rcu(&sk->sk_node, &ilb->head); + __sk_nulls_add_node_tail_rcu(sk, &ilb->nulls_head); else - hlist_add_head_rcu(&sk->sk_node, &ilb->head); + __sk_nulls_add_node_rcu(sk, &ilb->nulls_head); sock_set_flag(sk, SOCK_RCU_FREE); sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1); unlock: @@ -525,10 +527,7 @@ void inet_unhash(struct sock *sk) spin_lock_bh(lock); if (rcu_access_pointer(sk->sk_reuseport_cb)) reuseport_detach_sock(sk); - if (listener) - done = __sk_del_node_init(sk); - else - done = __sk_nulls_del_node_init_rcu(sk); + done = __sk_nulls_del_node_init_rcu(sk); if (done) sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1); spin_unlock_bh(lock); @@ -664,7 +663,8 @@ void inet_hashinfo_init(struct inet_hashinfo *h) for (i = 0; i < INET_LHTABLE_SIZE; i++) { spin_lock_init(&h->listening_hash[i].lock); - INIT_HLIST_HEAD(&h->listening_hash[i].head); + INIT_HLIST_NULLS_HEAD(&h->listening_hash[i].nulls_head, + i + LISTENING_NULLS_BASE); } } EXPORT_SYMBOL_GPL(inet_hashinfo_init); diff --git a/net/ipv4/inetpeer.c b/net/ipv4/inetpeer.c index 64007ce87273..f94881412d5b 100644 --- a/net/ipv4/inetpeer.c +++ b/net/ipv4/inetpeer.c @@ -159,7 +159,12 @@ static void inet_peer_gc(struct inet_peer_base *base, base->total / inet_peer_threshold * HZ; for (i = 0; i < gc_cnt; i++) { p = gc_stack[i]; - delta = (__u32)jiffies - p->dtime; + + /* The READ_ONCE() pairs with the WRITE_ONCE() + * in inet_putpeer() + */ + delta = (__u32)jiffies - READ_ONCE(p->dtime); + if (delta < ttl || !refcount_dec_if_one(&p->refcnt)) gc_stack[i] = NULL; } @@ -215,6 +220,7 @@ struct inet_peer *inet_getpeer(struct inet_peer_base *base, atomic_set(&p->rid, 0); p->metrics[RTAX_LOCK-1] = INETPEER_METRICS_NEW; p->rate_tokens = 0; + p->n_redirects = 0; /* 60*HZ is arbitrary, but chosen enough high so that the first * calculation of tokens is at its maximum. */ @@ -235,7 +241,10 @@ EXPORT_SYMBOL_GPL(inet_getpeer); void inet_putpeer(struct inet_peer *p) { - p->dtime = (__u32)jiffies; + /* The WRITE_ONCE() pairs with itself (we run lockless) + * and the READ_ONCE() in inet_peer_gc() + */ + WRITE_ONCE(p->dtime, (__u32)jiffies); if (refcount_dec_and_test(&p->refcnt)) call_rcu(&p->rcu, inetpeer_free_rcu); diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c index d95b32af4a0e..5a1d39e32196 100644 --- a/net/ipv4/ip_fragment.c +++ b/net/ipv4/ip_fragment.c @@ -57,57 +57,6 @@ */ static const char ip_frag_cache_name[] = "ip4-frags"; -/* Use skb->cb to track consecutive/adjacent fragments coming at - * the end of the queue. Nodes in the rb-tree queue will - * contain "runs" of one or more adjacent fragments. - * - * Invariants: - * - next_frag is NULL at the tail of a "run"; - * - the head of a "run" has the sum of all fragment lengths in frag_run_len. - */ -struct ipfrag_skb_cb { - struct inet_skb_parm h; - struct sk_buff *next_frag; - int frag_run_len; -}; - -#define FRAG_CB(skb) ((struct ipfrag_skb_cb *)((skb)->cb)) - -static void ip4_frag_init_run(struct sk_buff *skb) -{ - BUILD_BUG_ON(sizeof(struct ipfrag_skb_cb) > sizeof(skb->cb)); - - FRAG_CB(skb)->next_frag = NULL; - FRAG_CB(skb)->frag_run_len = skb->len; -} - -/* Append skb to the last "run". */ -static void ip4_frag_append_to_last_run(struct inet_frag_queue *q, - struct sk_buff *skb) -{ - RB_CLEAR_NODE(&skb->rbnode); - FRAG_CB(skb)->next_frag = NULL; - - FRAG_CB(q->last_run_head)->frag_run_len += skb->len; - FRAG_CB(q->fragments_tail)->next_frag = skb; - q->fragments_tail = skb; -} - -/* Create a new "run" with the skb. */ -static void ip4_frag_create_run(struct inet_frag_queue *q, struct sk_buff *skb) -{ - if (q->last_run_head) - rb_link_node(&skb->rbnode, &q->last_run_head->rbnode, - &q->last_run_head->rbnode.rb_right); - else - rb_link_node(&skb->rbnode, NULL, &q->rb_fragments.rb_node); - rb_insert_color(&skb->rbnode, &q->rb_fragments); - - ip4_frag_init_run(skb); - q->fragments_tail = skb; - q->last_run_head = skb; -} - /* Describe an entry in the "incomplete datagrams" queue. */ struct ipq { struct inet_frag_queue q; @@ -212,27 +161,9 @@ static void ip_expire(struct timer_list *t) * pull the head out of the tree in order to be able to * deal with head->dev. */ - if (qp->q.fragments) { - head = qp->q.fragments; - qp->q.fragments = head->next; - } else { - head = skb_rb_first(&qp->q.rb_fragments); - if (!head) - goto out; - if (FRAG_CB(head)->next_frag) - rb_replace_node(&head->rbnode, - &FRAG_CB(head)->next_frag->rbnode, - &qp->q.rb_fragments); - else - rb_erase(&head->rbnode, &qp->q.rb_fragments); - memset(&head->rbnode, 0, sizeof(head->rbnode)); - barrier(); - } - if (head == qp->q.fragments_tail) - qp->q.fragments_tail = NULL; - - sub_frag_mem_limit(qp->q.net, head->truesize); - + head = inet_frag_pull_head(&qp->q); + if (!head) + goto out; head->dev = dev_get_by_index_rcu(net, qp->iif); if (!head->dev) goto out; @@ -345,12 +276,10 @@ static int ip_frag_reinit(struct ipq *qp) static int ip_frag_queue(struct ipq *qp, struct sk_buff *skb) { struct net *net = container_of(qp->q.net, struct net, ipv4.frags); - struct rb_node **rbn, *parent; - struct sk_buff *skb1, *prev_tail; - int ihl, end, skb1_run_end; + int ihl, end, flags, offset; + struct sk_buff *prev_tail; struct net_device *dev; unsigned int fragsize; - int flags, offset; int err = -ENOENT; u8 ecn; @@ -382,7 +311,7 @@ static int ip_frag_queue(struct ipq *qp, struct sk_buff *skb) */ if (end < qp->q.len || ((qp->q.flags & INET_FRAG_LAST_IN) && end != qp->q.len)) - goto err; + goto discard_qp; qp->q.flags |= INET_FRAG_LAST_IN; qp->q.len = end; } else { @@ -394,82 +323,33 @@ static int ip_frag_queue(struct ipq *qp, struct sk_buff *skb) if (end > qp->q.len) { /* Some bits beyond end -> corruption. */ if (qp->q.flags & INET_FRAG_LAST_IN) - goto err; + goto discard_qp; qp->q.len = end; } } if (end == offset) - goto err; + goto discard_qp; err = -ENOMEM; if (!pskb_pull(skb, skb_network_offset(skb) + ihl)) - goto err; + goto discard_qp; err = pskb_trim_rcsum(skb, end - offset); if (err) - goto err; + goto discard_qp; /* Note : skb->rbnode and skb->dev share the same location. */ dev = skb->dev; /* Makes sure compiler wont do silly aliasing games */ barrier(); - /* RFC5722, Section 4, amended by Errata ID : 3089 - * When reassembling an IPv6 datagram, if - * one or more its constituent fragments is determined to be an - * overlapping fragment, the entire datagram (and any constituent - * fragments) MUST be silently discarded. - * - * We do the same here for IPv4 (and increment an snmp counter) but - * we do not want to drop the whole queue in response to a duplicate - * fragment. - */ - - err = -EINVAL; - /* Find out where to put this fragment. */ prev_tail = qp->q.fragments_tail; - if (!prev_tail) - ip4_frag_create_run(&qp->q, skb); /* First fragment. */ - else if (prev_tail->ip_defrag_offset + prev_tail->len < end) { - /* This is the common case: skb goes to the end. */ - /* Detect and discard overlaps. */ - if (offset < prev_tail->ip_defrag_offset + prev_tail->len) - goto discard_qp; - if (offset == prev_tail->ip_defrag_offset + prev_tail->len) - ip4_frag_append_to_last_run(&qp->q, skb); - else - ip4_frag_create_run(&qp->q, skb); - } else { - /* Binary search. Note that skb can become the first fragment, - * but not the last (covered above). - */ - rbn = &qp->q.rb_fragments.rb_node; - do { - parent = *rbn; - skb1 = rb_to_skb(parent); - skb1_run_end = skb1->ip_defrag_offset + - FRAG_CB(skb1)->frag_run_len; - if (end <= skb1->ip_defrag_offset) - rbn = &parent->rb_left; - else if (offset >= skb1_run_end) - rbn = &parent->rb_right; - else if (offset >= skb1->ip_defrag_offset && - end <= skb1_run_end) - goto err; /* No new data, potential duplicate */ - else - goto discard_qp; /* Found an overlap */ - } while (*rbn); - /* Here we have parent properly set, and rbn pointing to - * one of its NULL left/right children. Insert skb. - */ - ip4_frag_init_run(skb); - rb_link_node(&skb->rbnode, parent, rbn); - rb_insert_color(&skb->rbnode, &qp->q.rb_fragments); - } + err = inet_frag_queue_insert(&qp->q, skb, offset, end); + if (err) + goto insert_error; if (dev) qp->iif = dev->ifindex; - skb->ip_defrag_offset = offset; qp->q.stamp = skb->tstamp; qp->q.meat += skb->len; @@ -494,15 +374,24 @@ static int ip_frag_queue(struct ipq *qp, struct sk_buff *skb) skb->_skb_refdst = 0UL; err = ip_frag_reasm(qp, skb, prev_tail, dev); skb->_skb_refdst = orefdst; + if (err) + inet_frag_kill(&qp->q); return err; } skb_dst_drop(skb); return -EINPROGRESS; +insert_error: + if (err == IPFRAG_DUP) { + kfree_skb(skb); + return -EINVAL; + } + err = -EINVAL; + __IP_INC_STATS(net, IPSTATS_MIB_REASM_OVERLAPS); discard_qp: inet_frag_kill(&qp->q); - __IP_INC_STATS(net, IPSTATS_MIB_REASM_OVERLAPS); + __IP_INC_STATS(net, IPSTATS_MIB_REASMFAILS); err: kfree_skb(skb); return err; @@ -514,13 +403,8 @@ static int ip_frag_reasm(struct ipq *qp, struct sk_buff *skb, { struct net *net = container_of(qp->q.net, struct net, ipv4.frags); struct iphdr *iph; - struct sk_buff *fp, *head = skb_rb_first(&qp->q.rb_fragments); - struct sk_buff **nextp; /* To build frag_list. */ - struct rb_node *rbn; - int len; - int ihlen; - int delta; - int err; + void *reasm_data; + int len, err; u8 ecn; ipq_kill(qp); @@ -530,117 +414,23 @@ static int ip_frag_reasm(struct ipq *qp, struct sk_buff *skb, err = -EINVAL; goto out_fail; } - /* Make the one we just received the head. */ - if (head != skb) { - fp = skb_clone(skb, GFP_ATOMIC); - if (!fp) - goto out_nomem; - FRAG_CB(fp)->next_frag = FRAG_CB(skb)->next_frag; - if (RB_EMPTY_NODE(&skb->rbnode)) - FRAG_CB(prev_tail)->next_frag = fp; - else - rb_replace_node(&skb->rbnode, &fp->rbnode, - &qp->q.rb_fragments); - if (qp->q.fragments_tail == skb) - qp->q.fragments_tail = fp; - skb_morph(skb, head); - FRAG_CB(skb)->next_frag = FRAG_CB(head)->next_frag; - rb_replace_node(&head->rbnode, &skb->rbnode, - &qp->q.rb_fragments); - consume_skb(head); - head = skb; - } - WARN_ON(head->ip_defrag_offset != 0); - - /* Allocate a new buffer for the datagram. */ - ihlen = ip_hdrlen(head); - len = ihlen + qp->q.len; + /* Make the one we just received the head. */ + reasm_data = inet_frag_reasm_prepare(&qp->q, skb, prev_tail); + if (!reasm_data) + goto out_nomem; + len = ip_hdrlen(skb) + qp->q.len; err = -E2BIG; if (len > 65535) goto out_oversize; - delta = - head->truesize; - - /* Head of list must not be cloned. */ - if (skb_unclone(head, GFP_ATOMIC)) - goto out_nomem; - - delta += head->truesize; - if (delta) - add_frag_mem_limit(qp->q.net, delta); - - /* If the first fragment is fragmented itself, we split - * it to two chunks: the first with data and paged part - * and the second, holding only fragments. */ - if (skb_has_frag_list(head)) { - struct sk_buff *clone; - int i, plen = 0; - - clone = alloc_skb(0, GFP_ATOMIC); - if (!clone) - goto out_nomem; - skb_shinfo(clone)->frag_list = skb_shinfo(head)->frag_list; - skb_frag_list_init(head); - for (i = 0; i < skb_shinfo(head)->nr_frags; i++) - plen += skb_frag_size(&skb_shinfo(head)->frags[i]); - clone->len = clone->data_len = head->data_len - plen; - head->truesize += clone->truesize; - clone->csum = 0; - clone->ip_summed = head->ip_summed; - add_frag_mem_limit(qp->q.net, clone->truesize); - skb_shinfo(head)->frag_list = clone; - nextp = &clone->next; - } else { - nextp = &skb_shinfo(head)->frag_list; - } + inet_frag_reasm_finish(&qp->q, skb, reasm_data); - skb_push(head, head->data - skb_network_header(head)); + skb->dev = dev; + IPCB(skb)->frag_max_size = max(qp->max_df_size, qp->q.max_size); - /* Traverse the tree in order, to build frag_list. */ - fp = FRAG_CB(head)->next_frag; - rbn = rb_next(&head->rbnode); - rb_erase(&head->rbnode, &qp->q.rb_fragments); - while (rbn || fp) { - /* fp points to the next sk_buff in the current run; - * rbn points to the next run. - */ - /* Go through the current run. */ - while (fp) { - *nextp = fp; - nextp = &fp->next; - fp->prev = NULL; - memset(&fp->rbnode, 0, sizeof(fp->rbnode)); - fp->sk = NULL; - head->data_len += fp->len; - head->len += fp->len; - if (head->ip_summed != fp->ip_summed) - head->ip_summed = CHECKSUM_NONE; - else if (head->ip_summed == CHECKSUM_COMPLETE) - head->csum = csum_add(head->csum, fp->csum); - head->truesize += fp->truesize; - fp = FRAG_CB(fp)->next_frag; - } - /* Move to the next run. */ - if (rbn) { - struct rb_node *rbnext = rb_next(rbn); - - fp = rb_to_skb(rbn); - rb_erase(rbn, &qp->q.rb_fragments); - rbn = rbnext; - } - } - sub_frag_mem_limit(qp->q.net, head->truesize); - - *nextp = NULL; - head->next = NULL; - head->prev = NULL; - head->dev = dev; - head->tstamp = qp->q.stamp; - IPCB(head)->frag_max_size = max(qp->max_df_size, qp->q.max_size); - - iph = ip_hdr(head); + iph = ip_hdr(skb); iph->tot_len = htons(len); iph->tos |= ecn; @@ -653,7 +443,7 @@ static int ip_frag_reasm(struct ipq *qp, struct sk_buff *skb, * from one very small df-fragment and one large non-df frag. */ if (qp->max_df_size == qp->q.max_size) { - IPCB(head)->flags |= IPSKB_FRAG_PMTU; + IPCB(skb)->flags |= IPSKB_FRAG_PMTU; iph->frag_off = htons(IP_DF); } else { iph->frag_off = 0; @@ -751,28 +541,6 @@ struct sk_buff *ip_check_defrag(struct net *net, struct sk_buff *skb, u32 user) } EXPORT_SYMBOL(ip_check_defrag); -unsigned int inet_frag_rbtree_purge(struct rb_root *root) -{ - struct rb_node *p = rb_first(root); - unsigned int sum = 0; - - while (p) { - struct sk_buff *skb = rb_entry(p, struct sk_buff, rbnode); - - p = rb_next(p); - rb_erase(&skb->rbnode, root); - while (skb) { - struct sk_buff *next = FRAG_CB(skb)->next_frag; - - sum += skb->truesize; - kfree_skb(skb); - skb = next; - } - } - return sum; -} -EXPORT_SYMBOL(inet_frag_rbtree_purge); - #ifdef CONFIG_SYSCTL static int dist_min; diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c index dd3bcf22fe8b..9940a59306b5 100644 --- a/net/ipv4/ip_gre.c +++ b/net/ipv4/ip_gre.c @@ -230,13 +230,10 @@ static void gre_err(struct sk_buff *skb, u32 info) const int type = icmp_hdr(skb)->type; const int code = icmp_hdr(skb)->code; struct tnl_ptk_info tpi; - bool csum_err = false; - if (gre_parse_header(skb, &tpi, &csum_err, htons(ETH_P_IP), - iph->ihl * 4) < 0) { - if (!csum_err) /* ignore csum errors. */ - return; - } + if (gre_parse_header(skb, &tpi, NULL, htons(ETH_P_IP), + iph->ihl * 4) < 0) + return; if (type == ICMP_DEST_UNREACH && code == ICMP_FRAG_NEEDED) { ipv4_update_pmtu(skb, dev_net(skb->dev), info, @@ -592,6 +589,9 @@ static void erspan_fb_xmit(struct sk_buff *skb, struct net_device *dev, truncate = true; } + if (tun_info->options_len < sizeof(*md)) + goto err_free_rt; + md = ip_tunnel_info_opts(tun_info); if (!md) goto err_free_rt; @@ -689,7 +689,7 @@ static void erspan_build_header(struct sk_buff *skb, __be32 id, u32 index, bool truncate) { struct iphdr *iphdr = ip_hdr(skb); - struct ethhdr *eth = eth_hdr(skb); + struct ethhdr *eth = (struct ethhdr *)skb->data; enum erspan_encap_type enc_type; struct erspanhdr *ershdr; struct qtag_prefix { @@ -1424,6 +1424,7 @@ nla_put_failure: static void erspan_setup(struct net_device *dev) { ether_setup(dev); + dev->max_mtu = 0; dev->netdev_ops = &erspan_netdev_ops; dev->priv_flags &= ~IFF_TX_SKB_SHARING; dev->priv_flags |= IFF_LIVE_ADDR_CHANGE; diff --git a/net/ipv4/ip_input.c b/net/ipv4/ip_input.c index 1b160378ea9c..6fc45d3a1f8a 100644 --- a/net/ipv4/ip_input.c +++ b/net/ipv4/ip_input.c @@ -259,11 +259,10 @@ int ip_local_deliver(struct sk_buff *skb) ip_local_deliver_finish); } -static inline bool ip_rcv_options(struct sk_buff *skb) +static inline bool ip_rcv_options(struct sk_buff *skb, struct net_device *dev) { struct ip_options *opt; const struct iphdr *iph; - struct net_device *dev = skb->dev; /* It looks as overkill, because not all IP options require packet mangling. @@ -299,7 +298,7 @@ static inline bool ip_rcv_options(struct sk_buff *skb) } } - if (ip_options_rcv_srr(skb)) + if (ip_options_rcv_srr(skb, dev)) goto drop; } @@ -362,7 +361,7 @@ static int ip_rcv_finish(struct net *net, struct sock *sk, struct sk_buff *skb) } #endif - if (iph->ihl > 5 && ip_rcv_options(skb)) + if (iph->ihl > 5 && ip_rcv_options(skb, dev)) goto drop; rt = skb_rtable(skb); diff --git a/net/ipv4/ip_options.c b/net/ipv4/ip_options.c index ed194d46c00e..3db31bb9df50 100644 --- a/net/ipv4/ip_options.c +++ b/net/ipv4/ip_options.c @@ -251,8 +251,9 @@ static void spec_dst_fill(__be32 *spec_dst, struct sk_buff *skb) * If opt == NULL, then skb->data should point to IP header. */ -int ip_options_compile(struct net *net, - struct ip_options *opt, struct sk_buff *skb) +int __ip_options_compile(struct net *net, + struct ip_options *opt, struct sk_buff *skb, + __be32 *info) { __be32 spec_dst = htonl(INADDR_ANY); unsigned char *pp_ptr = NULL; @@ -468,11 +469,22 @@ eol: return 0; error: - if (skb) { - icmp_send(skb, ICMP_PARAMETERPROB, 0, htonl((pp_ptr-iph)<<24)); - } + if (info) + *info = htonl((pp_ptr-iph)<<24); return -EINVAL; } + +int ip_options_compile(struct net *net, + struct ip_options *opt, struct sk_buff *skb) +{ + int ret; + __be32 info; + + ret = __ip_options_compile(net, opt, skb, &info); + if (ret != 0 && skb) + icmp_send(skb, ICMP_PARAMETERPROB, 0, info); + return ret; +} EXPORT_SYMBOL(ip_options_compile); /* @@ -600,7 +612,7 @@ void ip_forward_options(struct sk_buff *skb) } } -int ip_options_rcv_srr(struct sk_buff *skb) +int ip_options_rcv_srr(struct sk_buff *skb, struct net_device *dev) { struct ip_options *opt = &(IPCB(skb)->opt); int srrspace, srrptr; @@ -635,7 +647,7 @@ int ip_options_rcv_srr(struct sk_buff *skb) orefdst = skb->_skb_refdst; skb_dst_set(skb, NULL); - err = ip_route_input(skb, nexthop, iph->saddr, iph->tos, skb->dev); + err = ip_route_input(skb, nexthop, iph->saddr, iph->tos, dev); rt2 = skb_rtable(skb); if (err || (rt2->rt_type != RTN_UNICAST && rt2->rt_type != RTN_LOCAL)) { skb_dst_drop(skb); diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index e2dd325bed9b..73cd64c7692f 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -518,6 +518,7 @@ static void ip_copy_metadata(struct sk_buff *to, struct sk_buff *from) to->pkt_type = from->pkt_type; to->priority = from->priority; to->protocol = from->protocol; + to->skb_iif = from->skb_iif; skb_dst_drop(to); skb_dst_copy(to, from); to->dev = from->dev; @@ -1122,13 +1123,17 @@ static int ip_setup_cork(struct sock *sk, struct inet_cork *cork, rt = *rtp; if (unlikely(!rt)) return -EFAULT; - /* - * We steal reference to this route, caller should not release it - */ - *rtp = NULL; + cork->fragsize = ip_sk_use_pmtu(sk) ? - dst_mtu(&rt->dst) : rt->dst.dev->mtu; + dst_mtu(&rt->dst) : READ_ONCE(rt->dst.dev->mtu); + + if (!inetdev_valid_mtu(cork->fragsize)) + return -ENETUNREACH; + cork->dst = &rt->dst; + /* We stole this route, caller should not release it. */ + *rtp = NULL; + cork->length = 0; cork->ttl = ipc->ttl; cork->tos = ipc->tos; diff --git a/net/ipv4/ip_tunnel.c b/net/ipv4/ip_tunnel.c index fabc299cb875..f1784162acc2 100644 --- a/net/ipv4/ip_tunnel.c +++ b/net/ipv4/ip_tunnel.c @@ -521,7 +521,7 @@ static int tnl_update_pmtu(struct net_device *dev, struct sk_buff *skb, else mtu = skb_dst(skb) ? dst_mtu(skb_dst(skb)) : dev->mtu; - skb_dst_update_pmtu(skb, mtu); + skb_dst_update_pmtu_no_confirm(skb, mtu); if (skb->protocol == htons(ETH_P_IP)) { if (!skb_is_gso(skb) && @@ -661,13 +661,19 @@ void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev, dst = tnl_params->daddr; if (dst == 0) { /* NBMA tunnel */ + struct ip_tunnel_info *tun_info; if (!skb_dst(skb)) { dev->stats.tx_fifo_errors++; goto tx_error; } - if (skb->protocol == htons(ETH_P_IP)) { + tun_info = skb_tunnel_info(skb); + if (tun_info && (tun_info->mode & IP_TUNNEL_INFO_TX) && + ip_tunnel_info_af(tun_info) == AF_INET && + tun_info->key.u.ipv4.dst) + dst = tun_info->key.u.ipv4.dst; + else if (skb->protocol == htons(ETH_P_IP)) { rt = skb_rtable(skb); dst = rt_nexthop(rt, inner_iph->daddr); } diff --git a/net/ipv4/ip_tunnel_core.c b/net/ipv4/ip_tunnel_core.c index 423091727e15..2aaf7f8a3a96 100644 --- a/net/ipv4/ip_tunnel_core.c +++ b/net/ipv4/ip_tunnel_core.c @@ -89,9 +89,12 @@ void iptunnel_xmit(struct sock *sk, struct rtable *rt, struct sk_buff *skb, __ip_select_ident(net, iph, skb_shinfo(skb)->gso_segs ?: 1); err = ip_local_out(net, sk, skb); - if (unlikely(net_xmit_eval(err))) - pkt_len = 0; - iptunnel_xmit_stats(dev, pkt_len); + + if (dev) { + if (unlikely(net_xmit_eval(err))) + pkt_len = 0; + iptunnel_xmit_stats(dev, pkt_len); + } } EXPORT_SYMBOL_GPL(iptunnel_xmit); diff --git a/net/ipv4/ip_vti.c b/net/ipv4/ip_vti.c index 00d4371d4573..08c15dd42d93 100644 --- a/net/ipv4/ip_vti.c +++ b/net/ipv4/ip_vti.c @@ -74,6 +74,33 @@ drop: return 0; } +static int vti_input_ipip(struct sk_buff *skb, int nexthdr, __be32 spi, + int encap_type) +{ + struct ip_tunnel *tunnel; + const struct iphdr *iph = ip_hdr(skb); + struct net *net = dev_net(skb->dev); + struct ip_tunnel_net *itn = net_generic(net, vti_net_id); + + tunnel = ip_tunnel_lookup(itn, skb->dev->ifindex, TUNNEL_NO_KEY, + iph->saddr, iph->daddr, 0); + if (tunnel) { + if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) + goto drop; + + XFRM_TUNNEL_SKB_CB(skb)->tunnel.ip4 = tunnel; + + skb->dev = tunnel->dev; + + return xfrm_input(skb, nexthdr, spi, encap_type); + } + + return -EINVAL; +drop: + kfree_skb(skb); + return 0; +} + static int vti_rcv(struct sk_buff *skb) { XFRM_SPI_SKB_CB(skb)->family = AF_INET; @@ -82,6 +109,14 @@ static int vti_rcv(struct sk_buff *skb) return vti_input(skb, ip_hdr(skb)->protocol, 0, 0); } +static int vti_rcv_ipip(struct sk_buff *skb) +{ + XFRM_SPI_SKB_CB(skb)->family = AF_INET; + XFRM_SPI_SKB_CB(skb)->daddroff = offsetof(struct iphdr, daddr); + + return vti_input_ipip(skb, ip_hdr(skb)->protocol, ip_hdr(skb)->saddr, 0); +} + static int vti_rcv_cb(struct sk_buff *skb, int err) { unsigned short family; @@ -209,7 +244,7 @@ static netdev_tx_t vti_xmit(struct sk_buff *skb, struct net_device *dev, mtu = dst_mtu(dst); if (skb->len > mtu) { - skb_dst_update_pmtu(skb, mtu); + skb_dst_update_pmtu_no_confirm(skb, mtu); if (skb->protocol == htons(ETH_P_IP)) { icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu)); @@ -439,6 +474,12 @@ static struct xfrm4_protocol vti_ipcomp4_protocol __read_mostly = { .priority = 100, }; +static struct xfrm_tunnel ipip_handler __read_mostly = { + .handler = vti_rcv_ipip, + .err_handler = vti4_err, + .priority = 0, +}; + static int __net_init vti_init_net(struct net *net) { int err; @@ -607,6 +648,13 @@ static int __init vti_init(void) if (err < 0) goto xfrm_proto_comp_failed; + msg = "ipip tunnel"; + err = xfrm4_tunnel_register(&ipip_handler, AF_INET); + if (err < 0) { + pr_info("%s: cant't register tunnel\n",__func__); + goto xfrm_tunnel_failed; + } + msg = "netlink interface"; err = rtnl_link_register(&vti_link_ops); if (err < 0) @@ -615,6 +663,8 @@ static int __init vti_init(void) return err; rtnl_link_failed: + xfrm4_tunnel_deregister(&ipip_handler, AF_INET); +xfrm_tunnel_failed: xfrm4_protocol_deregister(&vti_ipcomp4_protocol, IPPROTO_COMP); xfrm_proto_comp_failed: xfrm4_protocol_deregister(&vti_ah4_protocol, IPPROTO_AH); @@ -630,6 +680,7 @@ pernet_dev_failed: static void __exit vti_fini(void) { rtnl_link_unregister(&vti_link_ops); + xfrm4_tunnel_deregister(&ipip_handler, AF_INET); xfrm4_protocol_deregister(&vti_ipcomp4_protocol, IPPROTO_COMP); xfrm4_protocol_deregister(&vti_ah4_protocol, IPPROTO_AH); xfrm4_protocol_deregister(&vti_esp4_protocol, IPPROTO_ESP); diff --git a/net/ipv4/netfilter/ipt_rpfilter.c b/net/ipv4/netfilter/ipt_rpfilter.c index 37fb9552e858..341d1bd637af 100644 --- a/net/ipv4/netfilter/ipt_rpfilter.c +++ b/net/ipv4/netfilter/ipt_rpfilter.c @@ -96,6 +96,7 @@ static bool rpfilter_mt(const struct sk_buff *skb, struct xt_action_param *par) flow.flowi4_mark = info->flags & XT_RPFILTER_VALID_MARK ? skb->mark : 0; flow.flowi4_tos = RT_TOS(iph->tos); flow.flowi4_scope = RT_SCOPE_UNIVERSE; + flow.flowi4_oif = l3mdev_master_ifindex_rcu(xt_in(par)); return rpfilter_lookup_reverse(xt_net(par), &flow, xt_in(par), info->flags) ^ invert; } diff --git a/net/ipv4/netfilter/nf_nat_masquerade_ipv4.c b/net/ipv4/netfilter/nf_nat_masquerade_ipv4.c index 0c366aad89cb..b531fe204323 100644 --- a/net/ipv4/netfilter/nf_nat_masquerade_ipv4.c +++ b/net/ipv4/netfilter/nf_nat_masquerade_ipv4.c @@ -105,12 +105,26 @@ static int masq_device_event(struct notifier_block *this, return NOTIFY_DONE; } +static int inet_cmp(struct nf_conn *ct, void *ptr) +{ + struct in_ifaddr *ifa = (struct in_ifaddr *)ptr; + struct net_device *dev = ifa->ifa_dev->dev; + struct nf_conntrack_tuple *tuple; + + if (!device_cmp(ct, (void *)(long)dev->ifindex)) + return 0; + + tuple = &ct->tuplehash[IP_CT_DIR_REPLY].tuple; + + return ifa->ifa_address == tuple->dst.u3.ip; +} + static int masq_inet_event(struct notifier_block *this, unsigned long event, void *ptr) { struct in_device *idev = ((struct in_ifaddr *)ptr)->ifa_dev; - struct netdev_notifier_info info; + struct net *net = dev_net(idev->dev); /* The masq_dev_notifier will catch the case of the device going * down. So if the inetdev is dead and being destroyed we have @@ -120,8 +134,10 @@ static int masq_inet_event(struct notifier_block *this, if (idev->dead) return NOTIFY_DONE; - netdev_notifier_info_init(&info, idev->dev); - return masq_device_event(this, event, &info); + if (event == NETDEV_DOWN) + nf_ct_iterate_cleanup_net(net, inet_cmp, ptr, 0, 0); + + return NOTIFY_DONE; } static struct notifier_block masq_dev_notifier = { diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c index 125c1eab3eaa..53a11894f9e4 100644 --- a/net/ipv4/raw.c +++ b/net/ipv4/raw.c @@ -174,6 +174,7 @@ static int icmp_filter(const struct sock *sk, const struct sk_buff *skb) static int raw_v4_input(struct sk_buff *skb, const struct iphdr *iph, int hash) { int sdif = inet_sdif(skb); + int dif = inet_iif(skb); struct sock *sk; struct hlist_head *head; int delivered = 0; @@ -186,8 +187,7 @@ static int raw_v4_input(struct sk_buff *skb, const struct iphdr *iph, int hash) net = dev_net(skb->dev); sk = __raw_v4_lookup(net, __sk_head(head), iph->protocol, - iph->saddr, iph->daddr, - skb->dev->ifindex, sdif); + iph->saddr, iph->daddr, dif, sdif); while (sk) { delivered = 1; @@ -202,7 +202,7 @@ static int raw_v4_input(struct sk_buff *skb, const struct iphdr *iph, int hash) } sk = __raw_v4_lookup(net, sk_next(sk), iph->protocol, iph->saddr, iph->daddr, - skb->dev->ifindex, sdif); + dif, sdif); } out: read_unlock(&raw_v4_hashinfo.lock); diff --git a/net/ipv4/raw_diag.c b/net/ipv4/raw_diag.c index c200065ef9a5..6367ecdf76c4 100644 --- a/net/ipv4/raw_diag.c +++ b/net/ipv4/raw_diag.c @@ -23,9 +23,6 @@ raw_get_hashinfo(const struct inet_diag_req_v2 *r) return &raw_v6_hashinfo; #endif } else { - pr_warn_once("Unexpected inet family %d\n", - r->sdiag_family); - WARN_ON_ONCE(1); return ERR_PTR(-EINVAL); } } diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 7afa8d2463d8..8b855d3eec9e 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -145,7 +145,8 @@ static unsigned int ipv4_mtu(const struct dst_entry *dst); static struct dst_entry *ipv4_negative_advice(struct dst_entry *dst); static void ipv4_link_failure(struct sk_buff *skb); static void ip_rt_update_pmtu(struct dst_entry *dst, struct sock *sk, - struct sk_buff *skb, u32 mtu); + struct sk_buff *skb, u32 mtu, + bool confirm_neigh); static void ip_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_buff *skb); static void ipv4_dst_destroy(struct dst_entry *dst); @@ -517,15 +518,17 @@ EXPORT_SYMBOL(ip_idents_reserve); void __ip_select_ident(struct net *net, struct iphdr *iph, int segs) { - static u32 ip_idents_hashrnd __read_mostly; u32 hash, id; - net_get_random_once(&ip_idents_hashrnd, sizeof(ip_idents_hashrnd)); + /* Note the following code is not safe, but this is okay. */ + if (unlikely(siphash_key_is_zero(&net->ipv4.ip_id_key))) + get_random_bytes(&net->ipv4.ip_id_key, + sizeof(net->ipv4.ip_id_key)); - hash = jhash_3words((__force u32)iph->daddr, + hash = siphash_3u32((__force u32)iph->daddr, (__force u32)iph->saddr, - iph->protocol ^ net_hash_mix(net), - ip_idents_hashrnd); + iph->protocol, + &net->ipv4.ip_id_key); id = ip_idents_reserve(hash, segs); iph->id = htons(id); } @@ -904,13 +907,15 @@ void ip_rt_send_redirect(struct sk_buff *skb) /* No redirected packets during ip_rt_redirect_silence; * reset the algorithm. */ - if (time_after(jiffies, peer->rate_last + ip_rt_redirect_silence)) + if (time_after(jiffies, peer->rate_last + ip_rt_redirect_silence)) { peer->rate_tokens = 0; + peer->n_redirects = 0; + } /* Too many ignored redirects; do not send anything * set dst.rate_last to the last seen redirected packet. */ - if (peer->rate_tokens >= ip_rt_redirect_number) { + if (peer->n_redirects >= ip_rt_redirect_number) { peer->rate_last = jiffies; goto out_put_peer; } @@ -921,15 +926,15 @@ void ip_rt_send_redirect(struct sk_buff *skb) if (peer->rate_tokens == 0 || time_after(jiffies, (peer->rate_last + - (ip_rt_redirect_load << peer->rate_tokens)))) { + (ip_rt_redirect_load << peer->n_redirects)))) { __be32 gw = rt_nexthop(rt, ip_hdr(skb)->daddr); icmp_send(skb, ICMP_REDIRECT, ICMP_REDIR_HOST, gw); peer->rate_last = jiffies; - ++peer->rate_tokens; + ++peer->n_redirects; #ifdef CONFIG_IP_ROUTE_VERBOSE if (log_martians && - peer->rate_tokens == ip_rt_redirect_number) + peer->n_redirects == ip_rt_redirect_number) net_warn_ratelimited("host %pI4/if%d ignores redirects for %pI4 to %pI4\n", &ip_hdr(skb)->saddr, inet_iif(skb), &ip_hdr(skb)->daddr, &gw); @@ -1038,7 +1043,8 @@ static void __ip_rt_update_pmtu(struct rtable *rt, struct flowi4 *fl4, u32 mtu) } static void ip_rt_update_pmtu(struct dst_entry *dst, struct sock *sk, - struct sk_buff *skb, u32 mtu) + struct sk_buff *skb, u32 mtu, + bool confirm_neigh) { struct rtable *rt = (struct rtable *) dst; struct flowi4 fl4; @@ -1189,11 +1195,39 @@ static struct dst_entry *ipv4_dst_check(struct dst_entry *dst, u32 cookie) return dst; } +static void ipv4_send_dest_unreach(struct sk_buff *skb) +{ + struct ip_options opt; + int res; + + /* Recompile ip options since IPCB may not be valid anymore. + * Also check we have a reasonable ipv4 header. + */ + if (!pskb_network_may_pull(skb, sizeof(struct iphdr)) || + ip_hdr(skb)->version != 4 || ip_hdr(skb)->ihl < 5) + return; + + memset(&opt, 0, sizeof(opt)); + if (ip_hdr(skb)->ihl > 5) { + if (!pskb_network_may_pull(skb, ip_hdr(skb)->ihl * 4)) + return; + opt.optlen = ip_hdr(skb)->ihl * 4 - sizeof(struct iphdr); + + rcu_read_lock(); + res = __ip_options_compile(dev_net(skb->dev), &opt, skb, NULL); + rcu_read_unlock(); + + if (res) + return; + } + __icmp_send(skb, ICMP_DEST_UNREACH, ICMP_HOST_UNREACH, 0, &opt); +} + static void ipv4_link_failure(struct sk_buff *skb) { struct rtable *rt; - icmp_send(skb, ICMP_DEST_UNREACH, ICMP_HOST_UNREACH, 0); + ipv4_send_dest_unreach(skb); rt = skb_rtable(skb); if (rt) @@ -1312,6 +1346,10 @@ static void ip_del_fnhe(struct fib_nh *nh, __be32 daddr) if (fnhe->fnhe_daddr == daddr) { rcu_assign_pointer(*fnhe_p, rcu_dereference_protected( fnhe->fnhe_next, lockdep_is_held(&fnhe_lock))); + /* set fnhe_daddr to 0 to ensure it won't bind with + * new dsts in rt_bind_exception(). + */ + fnhe->fnhe_daddr = 0; fnhe_flush_routes(fnhe); kfree_rcu(fnhe, rcu); break; @@ -2120,12 +2158,13 @@ int ip_route_input_rcu(struct sk_buff *skb, __be32 daddr, __be32 saddr, int our = 0; int err = -EINVAL; - if (in_dev) - our = ip_check_mc_rcu(in_dev, daddr, saddr, - ip_hdr(skb)->protocol); + if (!in_dev) + return err; + our = ip_check_mc_rcu(in_dev, daddr, saddr, + ip_hdr(skb)->protocol); /* check l3 master if no match yet */ - if ((!in_dev || !our) && netif_is_l3_slave(dev)) { + if (!our && netif_is_l3_slave(dev)) { struct in_device *l3_in_dev; l3_in_dev = __in_dev_get_rcu(skb->dev); @@ -2314,14 +2353,17 @@ struct rtable *ip_route_output_key_hash_rcu(struct net *net, struct flowi4 *fl4, int orig_oif = fl4->flowi4_oif; unsigned int flags = 0; struct rtable *rth; - int err = -ENETUNREACH; + int err; if (fl4->saddr) { - rth = ERR_PTR(-EINVAL); if (ipv4_is_multicast(fl4->saddr) || ipv4_is_lbcast(fl4->saddr) || - ipv4_is_zeronet(fl4->saddr)) + ipv4_is_zeronet(fl4->saddr)) { + rth = ERR_PTR(-EINVAL); goto out; + } + + rth = ERR_PTR(-ENETUNREACH); /* I removed check for oif == dev_out->oif here. It was wrong for two reasons: @@ -2489,7 +2531,8 @@ static unsigned int ipv4_blackhole_mtu(const struct dst_entry *dst) } static void ipv4_rt_blackhole_update_pmtu(struct dst_entry *dst, struct sock *sk, - struct sk_buff *skb, u32 mtu) + struct sk_buff *skb, u32 mtu, + bool confirm_neigh) { } diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c index d1a31473bcde..78771272f613 100644 --- a/net/ipv4/sysctl_net_ipv4.c +++ b/net/ipv4/sysctl_net_ipv4.c @@ -47,6 +47,7 @@ static int tcp_syn_retries_min = 1; static int tcp_syn_retries_max = MAX_TCP_SYNCNT; static int ip_ping_group_range_min[] = { 0, 0 }; static int ip_ping_group_range_max[] = { GID_T_MAX, GID_T_MAX }; +static int one_day_secs = 24 * 3600; /* obsolete */ static int sysctl_tcp_low_latency __read_mostly; @@ -554,7 +555,9 @@ static struct ctl_table ipv4_table[] = { .data = &sysctl_tcp_min_rtt_wlen, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = proc_dointvec + .proc_handler = proc_dointvec_minmax, + .extra1 = &zero, + .extra2 = &one_day_secs }, { .procname = "tcp_low_latency", diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 541686f3f835..8f07655718f3 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -574,7 +574,7 @@ unsigned int tcp_poll(struct file *file, struct socket *sock, poll_table *wait) } /* This barrier is coupled with smp_wmb() in tcp_reset() */ smp_rmb(); - if (sk->sk_err || !skb_queue_empty(&sk->sk_error_queue)) + if (sk->sk_err || !skb_queue_empty_lockless(&sk->sk_error_queue)) mask |= POLLERR; return mask; @@ -914,6 +914,22 @@ static int tcp_send_mss(struct sock *sk, int *size_goal, int flags) return mss_now; } +/* In some cases, both sendpage() and sendmsg() could have added + * an skb to the write queue, but failed adding payload on it. + * We need to remove it to consume less memory, but more + * importantly be able to generate EPOLLOUT for Edge Trigger epoll() + * users. + */ +static void tcp_remove_empty_skb(struct sock *sk, struct sk_buff *skb) +{ + if (skb && !skb->len && + TCP_SKB_CB(skb)->end_seq == TCP_SKB_CB(skb)->seq) { + tcp_unlink_write_queue(skb, sk); + tcp_check_send_head(sk, skb); + sk_wmem_free_skb(sk, skb); + } +} + ssize_t do_tcp_sendpages(struct sock *sk, struct page *page, int offset, size_t size, int flags) { @@ -1034,6 +1050,7 @@ out: return copied; do_error: + tcp_remove_empty_skb(sk, tcp_write_queue_tail(sk)); if (copied) goto out; out_err: @@ -1412,17 +1429,11 @@ out_nopush: sock_zerocopy_put(uarg); return copied + copied_syn; +do_error: + skb = tcp_write_queue_tail(sk); do_fault: - if (!skb->len) { - tcp_unlink_write_queue(skb, sk); - /* It is the one place in all of TCP, except connection - * reset, where we can be unlinking the send_head. - */ - tcp_check_send_head(sk, skb); - sk_wmem_free_skb(sk, skb); - } + tcp_remove_empty_skb(sk, skb); -do_error: if (copied + copied_syn) goto out; out_err: @@ -1776,7 +1787,7 @@ int tcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int nonblock, if (unlikely(flags & MSG_ERRQUEUE)) return inet_recv_error(sk, msg, len, addr_len); - if (sk_can_busy_loop(sk) && skb_queue_empty(&sk->sk_receive_queue) && + if (sk_can_busy_loop(sk) && skb_queue_empty_lockless(&sk->sk_receive_queue) && (sk->sk_state == TCP_ESTABLISHED)) sk_busy_loop(sk, nonblock); diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 8e080f3b75bd..d2b1c39c4223 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -247,7 +247,7 @@ static void tcp_ecn_accept_cwr(struct tcp_sock *tp, const struct sk_buff *skb) static void tcp_ecn_withdraw_cwr(struct tcp_sock *tp) { - tp->ecn_flags &= ~TCP_ECN_DEMAND_CWR; + tp->ecn_flags &= ~TCP_ECN_QUEUE_CWR; } static void __tcp_ecn_check_ce(struct sock *sk, const struct sk_buff *skb) @@ -1750,8 +1750,11 @@ tcp_sacktag_write_queue(struct sock *sk, const struct sk_buff *ack_skb, } /* Ignore very old stuff early */ - if (!after(sp[used_sacks].end_seq, prior_snd_una)) + if (!after(sp[used_sacks].end_seq, prior_snd_una)) { + if (i == 0) + first_sack_index = -1; continue; + } used_sacks++; } diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 0569718e3656..b4f0fc34b0ed 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -245,7 +245,7 @@ int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) inet->inet_daddr); } - inet->inet_id = tp->write_seq ^ jiffies; + inet->inet_id = prandom_u32(); if (tcp_fastopen_defer_connect(sk, &err)) return err; @@ -1368,7 +1368,7 @@ struct sock *tcp_v4_syn_recv_sock(const struct sock *sk, struct sk_buff *skb, inet_csk(newsk)->icsk_ext_hdr_len = 0; if (inet_opt) inet_csk(newsk)->icsk_ext_hdr_len = inet_opt->opt.optlen; - newinet->inet_id = newtp->write_seq ^ jiffies; + newinet->inet_id = prandom_u32(); if (!dst) { dst = inet_csk_route_child_sock(sk, newsk, req); @@ -1936,13 +1936,14 @@ static void *listening_get_next(struct seq_file *seq, void *cur) struct tcp_iter_state *st = seq->private; struct net *net = seq_file_net(seq); struct inet_listen_hashbucket *ilb; + struct hlist_nulls_node *node; struct sock *sk = cur; if (!sk) { get_head: ilb = &tcp_hashinfo.listening_hash[st->bucket]; spin_lock(&ilb->lock); - sk = sk_head(&ilb->head); + sk = sk_nulls_head(&ilb->nulls_head); st->offset = 0; goto get_sk; } @@ -1950,9 +1951,9 @@ get_head: ++st->num; ++st->offset; - sk = sk_next(sk); + sk = sk_nulls_next(sk); get_sk: - sk_for_each_from(sk) { + sk_nulls_for_each_from(sk, node) { if (!net_eq(sock_net(sk), net)) continue; if (sk->sk_family == st->family) diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index a99086bf26ea..e1eb56e21dd5 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -708,8 +708,9 @@ static unsigned int tcp_established_options(struct sock *sk, struct sk_buff *skb min_t(unsigned int, eff_sacks, (remaining - TCPOLEN_SACK_BASE_ALIGNED) / TCPOLEN_SACK_PERBLOCK); - size += TCPOLEN_SACK_BASE_ALIGNED + - opts->num_sack_blocks * TCPOLEN_SACK_PERBLOCK; + if (likely(opts->num_sack_blocks)) + size += TCPOLEN_SACK_BASE_ALIGNED + + opts->num_sack_blocks * TCPOLEN_SACK_PERBLOCK; } return size; @@ -2025,7 +2026,7 @@ static bool tcp_can_coalesce_send_queue_head(struct sock *sk, int len) if (len <= skb->len) break; - if (unlikely(TCP_SKB_CB(skb)->eor)) + if (unlikely(TCP_SKB_CB(skb)->eor) || tcp_has_tx_tstamp(skb)) return false; len -= skb->len; @@ -2148,6 +2149,7 @@ static int tcp_mtu_probe(struct sock *sk) * we need to propagate it to the new skb. */ TCP_SKB_CB(nskb)->eor = TCP_SKB_CB(skb)->eor; + tcp_skb_collapse_tstamp(nskb, skb); tcp_unlink_write_queue(skb, sk); sk_wmem_free_skb(sk, skb); } else { @@ -2378,6 +2380,14 @@ static bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle, if (tcp_small_queue_check(sk, skb, 0)) break; + /* Argh, we hit an empty skb(), presumably a thread + * is sleeping in sendmsg()/sk_stream_wait_memory(). + * We do not want to send a pure-ack packet and have + * a strange looking rtx queue with empty packet(s). + */ + if (TCP_SKB_CB(skb)->end_seq == TCP_SKB_CB(skb)->seq) + break; + if (unlikely(tcp_transmit_skb(sk, skb, 1, gfp))) break; @@ -2931,7 +2941,7 @@ int __tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb, int segs) if (likely(!err)) { TCP_SKB_CB(skb)->sacked |= TCPCB_EVER_RETRANS; } else if (err != -EBUSY) { - NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPRETRANSFAIL); + NET_ADD_STATS(sock_net(sk), LINUX_MIB_TCPRETRANSFAIL, segs); } return err; } diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c index 592d6e9967a9..895129b0928c 100644 --- a/net/ipv4/tcp_timer.c +++ b/net/ipv4/tcp_timer.c @@ -358,7 +358,7 @@ static void tcp_probe_timer(struct sock *sk) return; } - if (icsk->icsk_probes_out > max_probes) { + if (icsk->icsk_probes_out >= max_probes) { abort: tcp_write_err(sk); } else { /* Only send another probe if we didn't close things up. */ @@ -413,6 +413,7 @@ void tcp_retransmit_timer(struct sock *sk) struct tcp_sock *tp = tcp_sk(sk); struct net *net = sock_net(sk); struct inet_connection_sock *icsk = inet_csk(sk); + struct sk_buff *skb; if (tp->fastopen_rsk) { WARN_ON_ONCE(sk->sk_state != TCP_SYN_RECV && @@ -423,10 +424,13 @@ void tcp_retransmit_timer(struct sock *sk) */ return; } + if (!tp->packets_out) - goto out; + return; - WARN_ON(tcp_write_queue_empty(sk)); + skb = tcp_rtx_queue_head(sk); + if (WARN_ON_ONCE(!skb)) + return; tp->tlp_high_seq = 0; @@ -459,16 +463,17 @@ void tcp_retransmit_timer(struct sock *sk) goto out; } tcp_enter_loss(sk); - tcp_retransmit_skb(sk, tcp_write_queue_head(sk), 1); + tcp_retransmit_skb(sk, skb, 1); __sk_dst_reset(sk); goto out_reset_timer; } + __NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPTIMEOUTS); if (tcp_write_timeout(sk)) goto out; if (icsk->icsk_retransmits == 0) { - int mib_idx; + int mib_idx = 0; if (icsk->icsk_ca_state == TCP_CA_Recovery) { if (tcp_is_sack(tp)) @@ -483,10 +488,9 @@ void tcp_retransmit_timer(struct sock *sk) mib_idx = LINUX_MIB_TCPSACKFAILURES; else mib_idx = LINUX_MIB_TCPRENOFAILURES; - } else { - mib_idx = LINUX_MIB_TCPTIMEOUTS; } - __NET_INC_STATS(sock_net(sk), mib_idx); + if (mib_idx) + __NET_INC_STATS(sock_net(sk), mib_idx); } tcp_enter_loss(sk); diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index b89920c0f226..e33258d69246 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -419,7 +419,7 @@ static int compute_score(struct sock *sk, struct net *net, score += 4; } - if (sk->sk_incoming_cpu == raw_smp_processor_id()) + if (READ_ONCE(sk->sk_incoming_cpu) == raw_smp_processor_id()) score++; return score; } @@ -563,7 +563,11 @@ static inline struct sock *__udp4_lib_lookup_skb(struct sk_buff *skb, struct sock *udp4_lib_lookup_skb(struct sk_buff *skb, __be16 sport, __be16 dport) { - return __udp4_lib_lookup_skb(skb, sport, dport, &udp_table); + const struct iphdr *iph = ip_hdr(skb); + + return __udp4_lib_lookup(dev_net(skb->dev), iph->saddr, sport, + iph->daddr, dport, inet_iif(skb), + inet_sdif(skb), &udp_table, NULL); } EXPORT_SYMBOL_GPL(udp4_lib_lookup_skb); @@ -1191,6 +1195,20 @@ static void udp_set_dev_scratch(struct sk_buff *skb) scratch->_tsize_state |= UDP_SKB_IS_STATELESS; } +static void udp_skb_csum_unnecessary_set(struct sk_buff *skb) +{ + /* We come here after udp_lib_checksum_complete() returned 0. + * This means that __skb_checksum_complete() might have + * set skb->csum_valid to 1. + * On 64bit platforms, we can set csum_unnecessary + * to true, but only if the skb is not shared. + */ +#if BITS_PER_LONG == 64 + if (!skb_shared(skb)) + udp_skb_scratch(skb)->csum_unnecessary = true; +#endif +} + static int udp_skb_truesize(struct sk_buff *skb) { return udp_skb_scratch(skb)->_tsize_state & ~UDP_SKB_IS_STATELESS; @@ -1320,7 +1338,7 @@ int __udp_enqueue_schedule_skb(struct sock *sk, struct sk_buff *skb) * queue contains some other skb */ rmem = atomic_add_return(size, &sk->sk_rmem_alloc); - if (rmem > (size + sk->sk_rcvbuf)) + if (rmem > (size + (unsigned int)sk->sk_rcvbuf)) goto uncharge_drop; spin_lock(&list->lock); @@ -1426,10 +1444,7 @@ static struct sk_buff *__first_packet_length(struct sock *sk, *total += skb->truesize; kfree_skb(skb); } else { - /* the csum related bits could be changed, refresh - * the scratch area - */ - udp_set_dev_scratch(skb); + udp_skb_csum_unnecessary_set(skb); break; } } @@ -1453,7 +1468,7 @@ static int first_packet_length(struct sock *sk) spin_lock_bh(&rcvq->lock); skb = __first_packet_length(sk, rcvq, &total); - if (!skb && !skb_queue_empty(sk_queue)) { + if (!skb && !skb_queue_empty_lockless(sk_queue)) { spin_lock(&sk_queue->lock); skb_queue_splice_tail_init(sk_queue, rcvq); spin_unlock(&sk_queue->lock); @@ -1528,7 +1543,7 @@ struct sk_buff *__skb_recv_udp(struct sock *sk, unsigned int flags, return skb; } - if (skb_queue_empty(sk_queue)) { + if (skb_queue_empty_lockless(sk_queue)) { spin_unlock_bh(&queue->lock); goto busy_check; } @@ -1555,7 +1570,7 @@ busy_check: break; sk_busy_loop(sk, flags & MSG_DONTWAIT); - } while (!skb_queue_empty(sk_queue)); + } while (!skb_queue_empty_lockless(sk_queue)); /* sk_queue is empty, reader_queue may contain peeked packets */ } while (timeo && @@ -2535,7 +2550,7 @@ unsigned int udp_poll(struct file *file, struct socket *sock, poll_table *wait) unsigned int mask = datagram_poll(file, sock, wait); struct sock *sk = sock->sk; - if (!skb_queue_empty(&udp_sk(sk)->reader_queue)) + if (!skb_queue_empty_lockless(&udp_sk(sk)->reader_queue)) mask |= POLLIN | POLLRDNORM; sock_rps_record_flow(sk); diff --git a/net/ipv4/xfrm4_policy.c b/net/ipv4/xfrm4_policy.c index 4b586e7d5637..08f00225ed1b 100644 --- a/net/ipv4/xfrm4_policy.c +++ b/net/ipv4/xfrm4_policy.c @@ -111,7 +111,8 @@ static void _decode_session4(struct sk_buff *skb, struct flowi *fl, int reverse) { const struct iphdr *iph = ip_hdr(skb); - u8 *xprth = skb_network_header(skb) + iph->ihl * 4; + int ihl = iph->ihl; + u8 *xprth = skb_network_header(skb) + ihl * 4; struct flowi4 *fl4 = &fl->u.ip4; int oif = 0; @@ -122,6 +123,11 @@ _decode_session4(struct sk_buff *skb, struct flowi *fl, int reverse) fl4->flowi4_mark = skb->mark; fl4->flowi4_oif = reverse ? skb->skb_iif : oif; + fl4->flowi4_proto = iph->protocol; + fl4->daddr = reverse ? iph->saddr : iph->daddr; + fl4->saddr = reverse ? iph->daddr : iph->saddr; + fl4->flowi4_tos = iph->tos; + if (!ip_is_fragment(iph)) { switch (iph->protocol) { case IPPROTO_UDP: @@ -133,7 +139,7 @@ _decode_session4(struct sk_buff *skb, struct flowi *fl, int reverse) pskb_may_pull(skb, xprth + 4 - skb->data)) { __be16 *ports; - xprth = skb_network_header(skb) + iph->ihl * 4; + xprth = skb_network_header(skb) + ihl * 4; ports = (__be16 *)xprth; fl4->fl4_sport = ports[!!reverse]; @@ -146,7 +152,7 @@ _decode_session4(struct sk_buff *skb, struct flowi *fl, int reverse) pskb_may_pull(skb, xprth + 2 - skb->data)) { u8 *icmp; - xprth = skb_network_header(skb) + iph->ihl * 4; + xprth = skb_network_header(skb) + ihl * 4; icmp = xprth; fl4->fl4_icmp_type = icmp[0]; @@ -159,7 +165,7 @@ _decode_session4(struct sk_buff *skb, struct flowi *fl, int reverse) pskb_may_pull(skb, xprth + 4 - skb->data)) { __be32 *ehdr; - xprth = skb_network_header(skb) + iph->ihl * 4; + xprth = skb_network_header(skb) + ihl * 4; ehdr = (__be32 *)xprth; fl4->fl4_ipsec_spi = ehdr[0]; @@ -171,7 +177,7 @@ _decode_session4(struct sk_buff *skb, struct flowi *fl, int reverse) pskb_may_pull(skb, xprth + 8 - skb->data)) { __be32 *ah_hdr; - xprth = skb_network_header(skb) + iph->ihl * 4; + xprth = skb_network_header(skb) + ihl * 4; ah_hdr = (__be32 *)xprth; fl4->fl4_ipsec_spi = ah_hdr[1]; @@ -183,7 +189,7 @@ _decode_session4(struct sk_buff *skb, struct flowi *fl, int reverse) pskb_may_pull(skb, xprth + 4 - skb->data)) { __be16 *ipcomp_hdr; - xprth = skb_network_header(skb) + iph->ihl * 4; + xprth = skb_network_header(skb) + ihl * 4; ipcomp_hdr = (__be16 *)xprth; fl4->fl4_ipsec_spi = htonl(ntohs(ipcomp_hdr[1])); @@ -196,7 +202,7 @@ _decode_session4(struct sk_buff *skb, struct flowi *fl, int reverse) __be16 *greflags; __be32 *gre_hdr; - xprth = skb_network_header(skb) + iph->ihl * 4; + xprth = skb_network_header(skb) + ihl * 4; greflags = (__be16 *)xprth; gre_hdr = (__be32 *)xprth; @@ -213,19 +219,16 @@ _decode_session4(struct sk_buff *skb, struct flowi *fl, int reverse) break; } } - fl4->flowi4_proto = iph->protocol; - fl4->daddr = reverse ? iph->saddr : iph->daddr; - fl4->saddr = reverse ? iph->daddr : iph->saddr; - fl4->flowi4_tos = iph->tos; } static void xfrm4_update_pmtu(struct dst_entry *dst, struct sock *sk, - struct sk_buff *skb, u32 mtu) + struct sk_buff *skb, u32 mtu, + bool confirm_neigh) { struct xfrm_dst *xdst = (struct xfrm_dst *)dst; struct dst_entry *path = xdst->route; - path->ops->update_pmtu(path, sk, skb, mtu); + path->ops->update_pmtu(path, sk, skb, mtu, confirm_neigh); } static void xfrm4_redirect(struct dst_entry *dst, struct sock *sk, diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 9ac6f6232294..a81201dd3a1a 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -1124,7 +1124,8 @@ check_cleanup_prefix_route(struct inet6_ifaddr *ifp, unsigned long *expires) list_for_each_entry(ifa, &idev->addr_list, if_list) { if (ifa == ifp) continue; - if (!ipv6_prefix_equal(&ifa->addr, &ifp->addr, + if (ifa->prefix_len != ifp->prefix_len || + !ipv6_prefix_equal(&ifa->addr, &ifp->addr, ifp->prefix_len)) continue; if (ifa->flags & (IFA_F_PERMANENT | IFA_F_NOPREFIXROUTE)) @@ -5546,13 +5547,20 @@ static void __ipv6_ifa_notify(int event, struct inet6_ifaddr *ifp) switch (event) { case RTM_NEWADDR: /* - * If the address was optimistic - * we inserted the route at the start of - * our DAD process, so we don't need - * to do it again + * If the address was optimistic we inserted the route at the + * start of our DAD process, so we don't need to do it again. + * If the device was taken down in the middle of the DAD + * cycle there is a race where we could get here without a + * host route, so nothing to insert. That will be fixed when + * the device is brought up. */ - if (!rcu_access_pointer(ifp->rt->rt6i_node)) + if (ifp->rt && !rcu_access_pointer(ifp->rt->rt6i_node)) { ip6_ins_rt(ifp->rt); + } else if (!ifp->rt && (ifp->idev->dev->flags & IFF_UP)) { + pr_warn("BUG: Address %pI6c on device %s is missing its host route.\n", + &ifp->addr, ifp->idev->dev->name); + } + if (ifp->idev->cnf.forwarding) addrconf_join_anycast(ifp); if (!ipv6_addr_any(&ifp->peer_addr)) diff --git a/net/ipv6/esp6.c b/net/ipv6/esp6.c index aed6a4f0fa2f..081c3f427f02 100644 --- a/net/ipv6/esp6.c +++ b/net/ipv6/esp6.c @@ -275,7 +275,7 @@ int esp6_output_head(struct xfrm_state *x, struct sk_buff *skb, struct esp_info skb->len += tailen; skb->data_len += tailen; skb->truesize += tailen; - if (sk) + if (sk && sk_fullsock(sk)) refcount_add(tailen, &sk->sk_wmem_alloc); goto out; diff --git a/net/ipv6/inet6_connection_sock.c b/net/ipv6/inet6_connection_sock.c index 9a31d13bf180..890adadcda16 100644 --- a/net/ipv6/inet6_connection_sock.c +++ b/net/ipv6/inet6_connection_sock.c @@ -150,7 +150,7 @@ struct dst_entry *inet6_csk_update_pmtu(struct sock *sk, u32 mtu) if (IS_ERR(dst)) return NULL; - dst->ops->update_pmtu(dst, sk, NULL, mtu); + dst->ops->update_pmtu(dst, sk, NULL, mtu, true); dst = inet6_csk_route_socket(sk, &fl6); return IS_ERR(dst) ? NULL : dst; diff --git a/net/ipv6/inet6_hashtables.c b/net/ipv6/inet6_hashtables.c index 6dc93ac28261..24a21979d7df 100644 --- a/net/ipv6/inet6_hashtables.c +++ b/net/ipv6/inet6_hashtables.c @@ -118,7 +118,7 @@ static inline int compute_score(struct sock *sk, struct net *net, if (sk->sk_bound_dev_if) score++; } - if (sk->sk_incoming_cpu == raw_smp_processor_id()) + if (READ_ONCE(sk->sk_incoming_cpu) == raw_smp_processor_id()) score++; } return score; @@ -137,9 +137,10 @@ struct sock *inet6_lookup_listener(struct net *net, int score, hiscore = 0, matches = 0, reuseport = 0; bool exact_dif = inet6_exact_dif_match(net, skb); struct sock *sk, *result = NULL; + struct hlist_nulls_node *node; u32 phash = 0; - sk_for_each(sk, &ilb->head) { + sk_nulls_for_each(sk, node, &ilb->nulls_head) { score = compute_score(sk, net, hnum, daddr, dif, sdif, exact_dif); if (score > hiscore) { reuseport = sk->sk_reuseport; diff --git a/net/ipv6/ip6_flowlabel.c b/net/ipv6/ip6_flowlabel.c index 15535ee327c5..c7458a606e2e 100644 --- a/net/ipv6/ip6_flowlabel.c +++ b/net/ipv6/ip6_flowlabel.c @@ -94,15 +94,21 @@ static struct ip6_flowlabel *fl_lookup(struct net *net, __be32 label) return fl; } +static void fl_free_rcu(struct rcu_head *head) +{ + struct ip6_flowlabel *fl = container_of(head, struct ip6_flowlabel, rcu); + + if (fl->share == IPV6_FL_S_PROCESS) + put_pid(fl->owner.pid); + kfree(fl->opt); + kfree(fl); +} + static void fl_free(struct ip6_flowlabel *fl) { - if (fl) { - if (fl->share == IPV6_FL_S_PROCESS) - put_pid(fl->owner.pid); - kfree(fl->opt); - kfree_rcu(fl, rcu); - } + if (fl) + call_rcu(&fl->rcu, fl_free_rcu); } static void fl_release(struct ip6_flowlabel *fl) @@ -248,9 +254,9 @@ struct ip6_flowlabel *fl6_sock_lookup(struct sock *sk, __be32 label) rcu_read_lock_bh(); for_each_sk_fl_rcu(np, sfl) { struct ip6_flowlabel *fl = sfl->fl; - if (fl->label == label) { + + if (fl->label == label && atomic_inc_not_zero(&fl->users)) { fl->lastuse = jiffies; - atomic_inc(&fl->users); rcu_read_unlock_bh(); return fl; } @@ -617,7 +623,8 @@ int ipv6_flowlabel_opt(struct sock *sk, char __user *optval, int optlen) goto done; } fl1 = sfl->fl; - atomic_inc(&fl1->users); + if (!atomic_inc_not_zero(&fl1->users)) + fl1 = NULL; break; } } @@ -634,9 +641,9 @@ recheck: if (fl1->share == IPV6_FL_S_EXCL || fl1->share != fl->share || ((fl1->share == IPV6_FL_S_PROCESS) && - (fl1->owner.pid == fl->owner.pid)) || + (fl1->owner.pid != fl->owner.pid)) || ((fl1->share == IPV6_FL_S_USER) && - uid_eq(fl1->owner.uid, fl->owner.uid))) + !uid_eq(fl1->owner.uid, fl->owner.uid))) goto release; err = -ENOMEM; diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c index 4228f3b2f347..726ba41133a3 100644 --- a/net/ipv6/ip6_gre.c +++ b/net/ipv6/ip6_gre.c @@ -527,7 +527,7 @@ static netdev_tx_t __gre6_xmit(struct sk_buff *skb, /* TooBig packet may have updated dst->dev's mtu */ if (dst && dst_mtu(dst) > dst->dev->mtu) - dst->ops->update_pmtu(dst, NULL, skb, dst->dev->mtu); + dst->ops->update_pmtu(dst, NULL, skb, dst->dev->mtu, false); return ip6_tnl_xmit(skb, dev, dsfield, fl6, encap_limit, pmtu, NEXTHDR_GRE); diff --git a/net/ipv6/ip6_input.c b/net/ipv6/ip6_input.c index 9ee208a348f5..e41070fb4fc0 100644 --- a/net/ipv6/ip6_input.c +++ b/net/ipv6/ip6_input.c @@ -173,6 +173,16 @@ int ipv6_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt if (ipv6_addr_is_multicast(&hdr->saddr)) goto err; + /* While RFC4291 is not explicit about v4mapped addresses + * in IPv6 headers, it seems clear linux dual-stack + * model can not deal properly with these. + * Security models could be fooled by ::ffff:127.0.0.1 for example. + * + * https://tools.ietf.org/html/draft-itojun-v6ops-v4mapped-harmful-02 + */ + if (ipv6_addr_v4mapped(&hdr->saddr)) + goto err; + skb->transport_header = skb->network_header + sizeof(*hdr); IP6CB(skb)->nhoff = offsetof(struct ipv6hdr, nexthdr); diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index 7ca8264cbdf9..2af849ba33c9 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -611,7 +611,7 @@ int ip6_fragment(struct net *net, struct sock *sk, struct sk_buff *skb, inet6_sk(skb->sk) : NULL; struct ipv6hdr *tmp_hdr; struct frag_hdr *fh; - unsigned int mtu, hlen, left, len; + unsigned int mtu, hlen, left, len, nexthdr_offset; int hroom, troom; __be32 frag_id; int ptr, offset = 0, err = 0; @@ -622,6 +622,7 @@ int ip6_fragment(struct net *net, struct sock *sk, struct sk_buff *skb, goto fail; hlen = err; nexthdr = *prevhdr; + nexthdr_offset = prevhdr - skb_network_header(skb); mtu = ip6_skb_dst_mtu(skb); @@ -656,6 +657,7 @@ int ip6_fragment(struct net *net, struct sock *sk, struct sk_buff *skb, (err = skb_checksum_help(skb))) goto fail; + prevhdr = skb_network_header(skb) + nexthdr_offset; hroom = LL_RESERVED_SPACE(rt->dst.dev); if (skb_has_frag_list(skb)) { unsigned int first_len = skb_pagelen(skb); diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c index 1812c2a748ff..5bc2788e6ba4 100644 --- a/net/ipv6/ip6_tunnel.c +++ b/net/ipv6/ip6_tunnel.c @@ -633,7 +633,7 @@ ip4ip6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, IPPROTO_IPIP, RT_TOS(eiph->tos), 0); if (IS_ERR(rt) || - rt->dst.dev->type != ARPHRD_TUNNEL) { + rt->dst.dev->type != ARPHRD_TUNNEL6) { if (!IS_ERR(rt)) ip_rt_put(rt); goto out; @@ -643,7 +643,7 @@ ip4ip6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, ip_rt_put(rt); if (ip_route_input(skb2, eiph->daddr, eiph->saddr, eiph->tos, skb2->dev) || - skb_dst(skb2)->dev->type != ARPHRD_TUNNEL) + skb_dst(skb2)->dev->type != ARPHRD_TUNNEL6) goto out; } @@ -652,7 +652,7 @@ ip4ip6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, if (rel_info > dst_mtu(skb_dst(skb2))) goto out; - skb_dst_update_pmtu(skb2, rel_info); + skb_dst_update_pmtu_no_confirm(skb2, rel_info); } if (rel_type == ICMP_REDIRECT) skb_dst(skb2)->ops->redirect(skb_dst(skb2), NULL, skb2); @@ -1138,7 +1138,7 @@ route_lookup: mtu = max(mtu, skb->protocol == htons(ETH_P_IPV6) ? IPV6_MIN_MTU : IPV4_MIN_MTU); - skb_dst_update_pmtu(skb, mtu); + skb_dst_update_pmtu_no_confirm(skb, mtu); if (skb->len - t->tun_hlen - eth_hlen > mtu && !skb_is_gso(skb)) { *pmtu = mtu; err = -EMSGSIZE; @@ -1280,12 +1280,11 @@ ip4ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev) } fl6.flowi6_uid = sock_net_uid(dev_net(dev), NULL); + dsfield = INET_ECN_encapsulate(dsfield, ipv4_get_dsfield(iph)); if (iptunnel_handle_offloads(skb, SKB_GSO_IPXIP6)) return -1; - dsfield = INET_ECN_encapsulate(dsfield, ipv4_get_dsfield(iph)); - skb_set_inner_ipproto(skb, IPPROTO_IPIP); err = ip6_tnl_xmit(skb, dev, dsfield, &fl6, encap_limit, &mtu, @@ -1371,12 +1370,11 @@ ip6ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev) } fl6.flowi6_uid = sock_net_uid(dev_net(dev), NULL); + dsfield = INET_ECN_encapsulate(dsfield, ipv6_get_dsfield(ipv6h)); if (iptunnel_handle_offloads(skb, SKB_GSO_IPXIP6)) return -1; - dsfield = INET_ECN_encapsulate(dsfield, ipv6_get_dsfield(ipv6h)); - skb_set_inner_ipproto(skb, IPPROTO_IPV6); err = ip6_tnl_xmit(skb, dev, dsfield, &fl6, encap_limit, &mtu, diff --git a/net/ipv6/ip6_vti.c b/net/ipv6/ip6_vti.c index 6b2416b4a53e..557fe3880a3f 100644 --- a/net/ipv6/ip6_vti.c +++ b/net/ipv6/ip6_vti.c @@ -483,7 +483,7 @@ vti6_xmit(struct sk_buff *skb, struct net_device *dev, struct flowi *fl) mtu = dst_mtu(dst); if (skb->len > mtu) { - skb_dst_update_pmtu(skb, mtu); + skb_dst_update_pmtu_no_confirm(skb, mtu); if (skb->protocol == htons(ETH_P_IPV6)) { if (mtu < IPV6_MIN_MTU) diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c index b2fdb3fdd217..459f282d90e1 100644 --- a/net/ipv6/ip6mr.c +++ b/net/ipv6/ip6mr.c @@ -2002,10 +2002,10 @@ int ip6mr_compat_ioctl(struct sock *sk, unsigned int cmd, void __user *arg) static inline int ip6mr_forward2_finish(struct net *net, struct sock *sk, struct sk_buff *skb) { - __IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), - IPSTATS_MIB_OUTFORWDATAGRAMS); - __IP6_ADD_STATS(net, ip6_dst_idev(skb_dst(skb)), - IPSTATS_MIB_OUTOCTETS, skb->len); + IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), + IPSTATS_MIB_OUTFORWDATAGRAMS); + IP6_ADD_STATS(net, ip6_dst_idev(skb_dst(skb)), + IPSTATS_MIB_OUTOCTETS, skb->len); return dst_output(net, sk, skb); } diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c index bd269e78272a..611dc5d55fa0 100644 --- a/net/ipv6/mcast.c +++ b/net/ipv6/mcast.c @@ -772,12 +772,13 @@ static void mld_del_delrec(struct inet6_dev *idev, struct ifmcaddr6 *im) im->idev = pmc->idev; im->mca_crcount = idev->mc_qrv; if (im->mca_sfmode == MCAST_INCLUDE) { - im->mca_tomb = pmc->mca_tomb; - im->mca_sources = pmc->mca_sources; + swap(im->mca_tomb, pmc->mca_tomb); + swap(im->mca_sources, pmc->mca_sources); for (psf = im->mca_sources; psf; psf = psf->sf_next) psf->sf_crcount = im->mca_crcount; } in6_dev_put(pmc->idev); + ip6_mc_clear_src(pmc); kfree(pmc); } spin_unlock_bh(&im->mca_lock); diff --git a/net/ipv6/netfilter.c b/net/ipv6/netfilter.c index 1f8b1a433b5d..a776fbc3b2a9 100644 --- a/net/ipv6/netfilter.c +++ b/net/ipv6/netfilter.c @@ -24,9 +24,11 @@ int ip6_route_me_harder(struct net *net, struct sk_buff *skb) struct sock *sk = sk_to_full_sk(skb->sk); unsigned int hh_len; struct dst_entry *dst; + int strict = (ipv6_addr_type(&iph->daddr) & + (IPV6_ADDR_MULTICAST | IPV6_ADDR_LINKLOCAL)); struct flowi6 fl6 = { .flowi6_oif = sk && sk->sk_bound_dev_if ? sk->sk_bound_dev_if : - rt6_need_strict(&iph->daddr) ? skb_dst(skb)->dev->ifindex : 0, + strict ? skb_dst(skb)->dev->ifindex : 0, .flowi6_mark = skb->mark, .flowi6_uid = sock_net_uid(net, sk), .daddr = iph->daddr, diff --git a/net/ipv6/netfilter/ip6t_rpfilter.c b/net/ipv6/netfilter/ip6t_rpfilter.c index 40eb16bd9786..d535768bea0f 100644 --- a/net/ipv6/netfilter/ip6t_rpfilter.c +++ b/net/ipv6/netfilter/ip6t_rpfilter.c @@ -58,7 +58,9 @@ static bool rpfilter_lookup_reverse6(struct net *net, const struct sk_buff *skb, if (rpfilter_addr_linklocal(&iph->saddr)) { lookup_flags |= RT6_LOOKUP_F_IFACE; fl6.flowi6_oif = dev->ifindex; - } else if ((flags & XT_RPFILTER_LOOSE) == 0) + /* Set flowi6_oif for vrf devices to lookup route in l3mdev domain. */ + } else if (netif_is_l3_master(dev) || netif_is_l3_slave(dev) || + (flags & XT_RPFILTER_LOOSE) == 0) fl6.flowi6_oif = dev->ifindex; rt = (void *) ip6_route_lookup(net, &fl6, lookup_flags); @@ -73,7 +75,9 @@ static bool rpfilter_lookup_reverse6(struct net *net, const struct sk_buff *skb, goto out; } - if (rt->rt6i_idev->dev == dev || (flags & XT_RPFILTER_LOOSE)) + if (rt->rt6i_idev->dev == dev || + l3mdev_master_ifindex_rcu(rt->rt6i_idev->dev) == dev->ifindex || + (flags & XT_RPFILTER_LOOSE)) ret = true; out: ip6_rt_put(rt); diff --git a/net/ipv6/netfilter/nf_conntrack_reasm.c b/net/ipv6/netfilter/nf_conntrack_reasm.c index 237fb04c6716..35d5a76867d0 100644 --- a/net/ipv6/netfilter/nf_conntrack_reasm.c +++ b/net/ipv6/netfilter/nf_conntrack_reasm.c @@ -33,9 +33,8 @@ #include <net/sock.h> #include <net/snmp.h> -#include <net/inet_frag.h> +#include <net/ipv6_frag.h> -#include <net/ipv6.h> #include <net/protocol.h> #include <net/transp_v6.h> #include <net/rawv6.h> @@ -52,14 +51,6 @@ static const char nf_frags_cache_name[] = "nf-frags"; -struct nf_ct_frag6_skb_cb -{ - struct inet6_skb_parm h; - int offset; -}; - -#define NFCT_FRAG6_CB(skb) ((struct nf_ct_frag6_skb_cb *)((skb)->cb)) - static struct inet_frags nf_frags; #ifdef CONFIG_SYSCTL @@ -145,6 +136,9 @@ static void __net_exit nf_ct_frags6_sysctl_unregister(struct net *net) } #endif +static int nf_ct_frag6_reasm(struct frag_queue *fq, struct sk_buff *skb, + struct sk_buff *prev_tail, struct net_device *dev); + static inline u8 ip6_frag_ecn(const struct ipv6hdr *ipv6h) { return 1 << (ipv6_get_dsfield(ipv6h) & INET_ECN_MASK); @@ -159,7 +153,7 @@ static void nf_ct_frag6_expire(struct timer_list *t) fq = container_of(frag, struct frag_queue, q); net = container_of(fq->q.net, struct net, nf_frag.frags); - ip6_expire_frag_queue(net, fq); + ip6frag_expire_frag_queue(net, fq); } /* Creation primitives. */ @@ -186,9 +180,10 @@ static struct frag_queue *fq_find(struct net *net, __be32 id, u32 user, static int nf_ct_frag6_queue(struct frag_queue *fq, struct sk_buff *skb, const struct frag_hdr *fhdr, int nhoff) { - struct sk_buff *prev, *next; unsigned int payload_len; - int offset, end; + struct net_device *dev; + struct sk_buff *prev; + int offset, end, err; u8 ecn; if (fq->q.flags & INET_FRAG_COMPLETE) { @@ -263,55 +258,25 @@ static int nf_ct_frag6_queue(struct frag_queue *fq, struct sk_buff *skb, goto err; } - /* Find out which fragments are in front and at the back of us - * in the chain of fragments so far. We must know where to put - * this fragment, right? - */ + /* Note : skb->rbnode and skb->dev share the same location. */ + dev = skb->dev; + /* Makes sure compiler wont do silly aliasing games */ + barrier(); + prev = fq->q.fragments_tail; - if (!prev || NFCT_FRAG6_CB(prev)->offset < offset) { - next = NULL; - goto found; - } - prev = NULL; - for (next = fq->q.fragments; next != NULL; next = next->next) { - if (NFCT_FRAG6_CB(next)->offset >= offset) - break; /* bingo! */ - prev = next; + err = inet_frag_queue_insert(&fq->q, skb, offset, end); + if (err) { + if (err == IPFRAG_DUP) { + /* No error for duplicates, pretend they got queued. */ + kfree_skb(skb); + return -EINPROGRESS; + } + goto insert_error; } -found: - /* RFC5722, Section 4: - * When reassembling an IPv6 datagram, if - * one or more its constituent fragments is determined to be an - * overlapping fragment, the entire datagram (and any constituent - * fragments, including those not yet received) MUST be silently - * discarded. - */ + if (dev) + fq->iif = dev->ifindex; - /* Check for overlap with preceding fragment. */ - if (prev && - (NFCT_FRAG6_CB(prev)->offset + prev->len) > offset) - goto discard_fq; - - /* Look for overlap with succeeding segment. */ - if (next && NFCT_FRAG6_CB(next)->offset < end) - goto discard_fq; - - NFCT_FRAG6_CB(skb)->offset = offset; - - /* Insert this fragment in the chain of fragments. */ - skb->next = next; - if (!next) - fq->q.fragments_tail = skb; - if (prev) - prev->next = skb; - else - fq->q.fragments = skb; - - if (skb->dev) { - fq->iif = skb->dev->ifindex; - skb->dev = NULL; - } fq->q.stamp = skb->tstamp; fq->q.meat += skb->len; fq->ecn |= ecn; @@ -327,11 +292,27 @@ found: fq->q.flags |= INET_FRAG_FIRST_IN; } - return 0; + if (fq->q.flags == (INET_FRAG_FIRST_IN | INET_FRAG_LAST_IN) && + fq->q.meat == fq->q.len) { + unsigned long orefdst = skb->_skb_refdst; + + skb->_skb_refdst = 0UL; + err = nf_ct_frag6_reasm(fq, skb, prev, dev); + skb->_skb_refdst = orefdst; -discard_fq: + /* After queue has assumed skb ownership, only 0 or + * -EINPROGRESS must be returned. + */ + return err ? -EINPROGRESS : 0; + } + + skb_dst_drop(skb); + return -EINPROGRESS; + +insert_error: inet_frag_kill(&fq->q); err: + skb_dst_drop(skb); return -EINVAL; } @@ -341,147 +322,67 @@ err: * It is called with locked fq, and caller must check that * queue is eligible for reassembly i.e. it is not COMPLETE, * the last and the first frames arrived and all the bits are here. - * - * returns true if *prev skb has been transformed into the reassembled - * skb, false otherwise. */ -static bool -nf_ct_frag6_reasm(struct frag_queue *fq, struct sk_buff *prev, struct net_device *dev) +static int nf_ct_frag6_reasm(struct frag_queue *fq, struct sk_buff *skb, + struct sk_buff *prev_tail, struct net_device *dev) { - struct sk_buff *fp, *head = fq->q.fragments; - int payload_len, delta; + void *reasm_data; + int payload_len; u8 ecn; inet_frag_kill(&fq->q); - WARN_ON(head == NULL); - WARN_ON(NFCT_FRAG6_CB(head)->offset != 0); - ecn = ip_frag_ecn_table[fq->ecn]; if (unlikely(ecn == 0xff)) - return false; + goto err; - /* Unfragmented part is taken from the first segment. */ - payload_len = ((head->data - skb_network_header(head)) - + reasm_data = inet_frag_reasm_prepare(&fq->q, skb, prev_tail); + if (!reasm_data) + goto err; + + payload_len = ((skb->data - skb_network_header(skb)) - sizeof(struct ipv6hdr) + fq->q.len - sizeof(struct frag_hdr)); if (payload_len > IPV6_MAXPLEN) { net_dbg_ratelimited("nf_ct_frag6_reasm: payload len = %d\n", payload_len); - return false; - } - - delta = - head->truesize; - - /* Head of list must not be cloned. */ - if (skb_unclone(head, GFP_ATOMIC)) - return false; - - delta += head->truesize; - if (delta) - add_frag_mem_limit(fq->q.net, delta); - - /* If the first fragment is fragmented itself, we split - * it to two chunks: the first with data and paged part - * and the second, holding only fragments. */ - if (skb_has_frag_list(head)) { - struct sk_buff *clone; - int i, plen = 0; - - clone = alloc_skb(0, GFP_ATOMIC); - if (clone == NULL) - return false; - - clone->next = head->next; - head->next = clone; - skb_shinfo(clone)->frag_list = skb_shinfo(head)->frag_list; - skb_frag_list_init(head); - for (i = 0; i < skb_shinfo(head)->nr_frags; i++) - plen += skb_frag_size(&skb_shinfo(head)->frags[i]); - clone->len = clone->data_len = head->data_len - plen; - head->data_len -= clone->len; - head->len -= clone->len; - clone->csum = 0; - clone->ip_summed = head->ip_summed; - - add_frag_mem_limit(fq->q.net, clone->truesize); - } - - /* morph head into last received skb: prev. - * - * This allows callers of ipv6 conntrack defrag to continue - * to use the last skb(frag) passed into the reasm engine. - * The last skb frag 'silently' turns into the full reassembled skb. - * - * Since prev is also part of q->fragments we have to clone it first. - */ - if (head != prev) { - struct sk_buff *iter; - - fp = skb_clone(prev, GFP_ATOMIC); - if (!fp) - return false; - - fp->next = prev->next; - - iter = head; - while (iter) { - if (iter->next == prev) { - iter->next = fp; - break; - } - iter = iter->next; - } - - skb_morph(prev, head); - prev->next = head->next; - consume_skb(head); - head = prev; + goto err; } /* We have to remove fragment header from datagram and to relocate * header in order to calculate ICV correctly. */ - skb_network_header(head)[fq->nhoffset] = skb_transport_header(head)[0]; - memmove(head->head + sizeof(struct frag_hdr), head->head, - (head->data - head->head) - sizeof(struct frag_hdr)); - head->mac_header += sizeof(struct frag_hdr); - head->network_header += sizeof(struct frag_hdr); - - skb_shinfo(head)->frag_list = head->next; - skb_reset_transport_header(head); - skb_push(head, head->data - skb_network_header(head)); - - for (fp = head->next; fp; fp = fp->next) { - head->data_len += fp->len; - head->len += fp->len; - if (head->ip_summed != fp->ip_summed) - head->ip_summed = CHECKSUM_NONE; - else if (head->ip_summed == CHECKSUM_COMPLETE) - head->csum = csum_add(head->csum, fp->csum); - head->truesize += fp->truesize; - fp->sk = NULL; - } - sub_frag_mem_limit(fq->q.net, head->truesize); + skb_network_header(skb)[fq->nhoffset] = skb_transport_header(skb)[0]; + memmove(skb->head + sizeof(struct frag_hdr), skb->head, + (skb->data - skb->head) - sizeof(struct frag_hdr)); + skb->mac_header += sizeof(struct frag_hdr); + skb->network_header += sizeof(struct frag_hdr); + + skb_reset_transport_header(skb); - head->ignore_df = 1; - head->next = NULL; - head->dev = dev; - head->tstamp = fq->q.stamp; - ipv6_hdr(head)->payload_len = htons(payload_len); - ipv6_change_dsfield(ipv6_hdr(head), 0xff, ecn); - IP6CB(head)->frag_max_size = sizeof(struct ipv6hdr) + fq->q.max_size; + inet_frag_reasm_finish(&fq->q, skb, reasm_data); + + skb->ignore_df = 1; + skb->dev = dev; + ipv6_hdr(skb)->payload_len = htons(payload_len); + ipv6_change_dsfield(ipv6_hdr(skb), 0xff, ecn); + IP6CB(skb)->frag_max_size = sizeof(struct ipv6hdr) + fq->q.max_size; /* Yes, and fold redundant checksum back. 8) */ - if (head->ip_summed == CHECKSUM_COMPLETE) - head->csum = csum_partial(skb_network_header(head), - skb_network_header_len(head), - head->csum); + if (skb->ip_summed == CHECKSUM_COMPLETE) + skb->csum = csum_partial(skb_network_header(skb), + skb_network_header_len(skb), + skb->csum); fq->q.fragments = NULL; fq->q.rb_fragments = RB_ROOT; fq->q.fragments_tail = NULL; + fq->q.last_run_head = NULL; + + return 0; - return true; +err: + inet_frag_kill(&fq->q); + return -EINVAL; } /* @@ -550,7 +451,6 @@ find_prev_fhdr(struct sk_buff *skb, u8 *prevhdrp, int *prevhoff, int *fhoff) int nf_ct_frag6_gather(struct net *net, struct sk_buff *skb, u32 user) { u16 savethdr = skb->transport_header; - struct net_device *dev = skb->dev; int fhoff, nhoff, ret; struct frag_hdr *fhdr; struct frag_queue *fq; @@ -573,10 +473,6 @@ int nf_ct_frag6_gather(struct net *net, struct sk_buff *skb, u32 user) hdr = ipv6_hdr(skb); fhdr = (struct frag_hdr *)skb_transport_header(skb); - if (skb->len - skb_network_offset(skb) < IPV6_MIN_MTU && - fhdr->frag_off & htons(IP6_MF)) - return -EINVAL; - skb_orphan(skb); fq = fq_find(net, fhdr->identification, user, hdr, skb->dev ? skb->dev->ifindex : 0); @@ -588,24 +484,11 @@ int nf_ct_frag6_gather(struct net *net, struct sk_buff *skb, u32 user) spin_lock_bh(&fq->q.lock); ret = nf_ct_frag6_queue(fq, skb, fhdr, nhoff); - if (ret < 0) { - if (ret == -EPROTO) { - skb->transport_header = savethdr; - ret = 0; - } - goto out_unlock; - } - - /* after queue has assumed skb ownership, only 0 or -EINPROGRESS - * must be returned. - */ - ret = -EINPROGRESS; - if (fq->q.flags == (INET_FRAG_FIRST_IN | INET_FRAG_LAST_IN) && - fq->q.meat == fq->q.len && - nf_ct_frag6_reasm(fq, skb, dev)) + if (ret == -EPROTO) { + skb->transport_header = savethdr; ret = 0; + } -out_unlock: spin_unlock_bh(&fq->q.lock); inet_frag_put(&fq->q); return ret; @@ -641,16 +524,24 @@ static struct pernet_operations nf_ct_net_ops = { .exit = nf_ct_net_exit, }; +static const struct rhashtable_params nfct_rhash_params = { + .head_offset = offsetof(struct inet_frag_queue, node), + .hashfn = ip6frag_key_hashfn, + .obj_hashfn = ip6frag_obj_hashfn, + .obj_cmpfn = ip6frag_obj_cmpfn, + .automatic_shrinking = true, +}; + int nf_ct_frag6_init(void) { int ret = 0; - nf_frags.constructor = ip6_frag_init; + nf_frags.constructor = ip6frag_init; nf_frags.destructor = NULL; nf_frags.qsize = sizeof(struct frag_queue); nf_frags.frag_expire = nf_ct_frag6_expire; nf_frags.frags_cache_name = nf_frags_cache_name; - nf_frags.rhash_params = ip6_rhash_params; + nf_frags.rhash_params = nfct_rhash_params; ret = inet_frags_init(&nf_frags); if (ret) goto out; diff --git a/net/ipv6/netfilter/nf_defrag_ipv6_hooks.c b/net/ipv6/netfilter/nf_defrag_ipv6_hooks.c index b326da59257f..123bfb13a5d1 100644 --- a/net/ipv6/netfilter/nf_defrag_ipv6_hooks.c +++ b/net/ipv6/netfilter/nf_defrag_ipv6_hooks.c @@ -14,8 +14,7 @@ #include <linux/skbuff.h> #include <linux/icmp.h> #include <linux/sysctl.h> -#include <net/ipv6.h> -#include <net/inet_frag.h> +#include <net/ipv6_frag.h> #include <linux/netfilter_ipv6.h> #include <linux/netfilter_bridge.h> diff --git a/net/ipv6/netfilter/nf_nat_masquerade_ipv6.c b/net/ipv6/netfilter/nf_nat_masquerade_ipv6.c index 98f61fcb9108..b0f3745d1bee 100644 --- a/net/ipv6/netfilter/nf_nat_masquerade_ipv6.c +++ b/net/ipv6/netfilter/nf_nat_masquerade_ipv6.c @@ -88,18 +88,30 @@ static struct notifier_block masq_dev_notifier = { struct masq_dev_work { struct work_struct work; struct net *net; + struct in6_addr addr; int ifindex; }; +static int inet_cmp(struct nf_conn *ct, void *work) +{ + struct masq_dev_work *w = (struct masq_dev_work *)work; + struct nf_conntrack_tuple *tuple; + + if (!device_cmp(ct, (void *)(long)w->ifindex)) + return 0; + + tuple = &ct->tuplehash[IP_CT_DIR_REPLY].tuple; + + return ipv6_addr_equal(&w->addr, &tuple->dst.u3.in6); +} + static void iterate_cleanup_work(struct work_struct *work) { struct masq_dev_work *w; - long index; w = container_of(work, struct masq_dev_work, work); - index = w->ifindex; - nf_ct_iterate_cleanup_net(w->net, device_cmp, (void *)index, 0, 0); + nf_ct_iterate_cleanup_net(w->net, inet_cmp, (void *)w, 0, 0); put_net(w->net); kfree(w); @@ -148,6 +160,7 @@ static int masq_inet_event(struct notifier_block *this, INIT_WORK(&w->work, iterate_cleanup_work); w->ifindex = dev->ifindex; w->net = net; + w->addr = ifa->addr; schedule_work(&w->work); return NOTIFY_DONE; diff --git a/net/ipv6/output_core.c b/net/ipv6/output_core.c index 4fe7c90962dd..868ae23dbae1 100644 --- a/net/ipv6/output_core.c +++ b/net/ipv6/output_core.c @@ -10,15 +10,25 @@ #include <net/secure_seq.h> #include <linux/netfilter.h> -static u32 __ipv6_select_ident(struct net *net, u32 hashrnd, +static u32 __ipv6_select_ident(struct net *net, const struct in6_addr *dst, const struct in6_addr *src) { + const struct { + struct in6_addr dst; + struct in6_addr src; + } __aligned(SIPHASH_ALIGNMENT) combined = { + .dst = *dst, + .src = *src, + }; u32 hash, id; - hash = __ipv6_addr_jhash(dst, hashrnd); - hash = __ipv6_addr_jhash(src, hash); - hash ^= net_hash_mix(net); + /* Note the following code is not safe, but this is okay. */ + if (unlikely(siphash_key_is_zero(&net->ipv4.ip_id_key))) + get_random_bytes(&net->ipv4.ip_id_key, + sizeof(net->ipv4.ip_id_key)); + + hash = siphash(&combined, sizeof(combined), &net->ipv4.ip_id_key); /* Treat id of 0 as unset and if we get 0 back from ip_idents_reserve, * set the hight order instead thus minimizing possible future @@ -41,7 +51,6 @@ static u32 __ipv6_select_ident(struct net *net, u32 hashrnd, */ __be32 ipv6_proxy_select_ident(struct net *net, struct sk_buff *skb) { - static u32 ip6_proxy_idents_hashrnd __read_mostly; struct in6_addr buf[2]; struct in6_addr *addrs; u32 id; @@ -53,11 +62,7 @@ __be32 ipv6_proxy_select_ident(struct net *net, struct sk_buff *skb) if (!addrs) return 0; - net_get_random_once(&ip6_proxy_idents_hashrnd, - sizeof(ip6_proxy_idents_hashrnd)); - - id = __ipv6_select_ident(net, ip6_proxy_idents_hashrnd, - &addrs[1], &addrs[0]); + id = __ipv6_select_ident(net, &addrs[1], &addrs[0]); return htonl(id); } EXPORT_SYMBOL_GPL(ipv6_proxy_select_ident); @@ -66,12 +71,9 @@ __be32 ipv6_select_ident(struct net *net, const struct in6_addr *daddr, const struct in6_addr *saddr) { - static u32 ip6_idents_hashrnd __read_mostly; u32 id; - net_get_random_once(&ip6_idents_hashrnd, sizeof(ip6_idents_hashrnd)); - - id = __ipv6_select_ident(net, ip6_idents_hashrnd, daddr, saddr); + id = __ipv6_select_ident(net, daddr, saddr); return htonl(id); } EXPORT_SYMBOL(ipv6_select_ident); diff --git a/net/ipv6/ping.c b/net/ipv6/ping.c index ac826dd338ff..d5cdba8213a4 100644 --- a/net/ipv6/ping.c +++ b/net/ipv6/ping.c @@ -233,7 +233,7 @@ static int __net_init ping_v6_proc_init_net(struct net *net) return ping_proc_register(net, &ping_v6_seq_afinfo); } -static void __net_init ping_v6_proc_exit_net(struct net *net) +static void __net_exit ping_v6_proc_exit_net(struct net *net) { return ping_proc_unregister(net, &ping_v6_seq_afinfo); } diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c index f08cc6527339..ac428311965f 100644 --- a/net/ipv6/raw.c +++ b/net/ipv6/raw.c @@ -288,7 +288,9 @@ static int rawv6_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len) /* Binding to link-local address requires an interface */ if (!sk->sk_bound_dev_if) goto out_unlock; + } + if (sk->sk_bound_dev_if) { err = -ENODEV; dev = dev_get_by_index_rcu(sock_net(sk), sk->sk_bound_dev_if); @@ -780,6 +782,7 @@ static int rawv6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) struct sockcm_cookie sockc; struct ipcm6_cookie ipc6; int addr_len = msg->msg_namelen; + int hdrincl; u16 proto; int err; @@ -793,6 +796,13 @@ static int rawv6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) if (msg->msg_flags & MSG_OOB) return -EOPNOTSUPP; + /* hdrincl should be READ_ONCE(inet->hdrincl) + * but READ_ONCE() doesn't work with bit fields. + * Doing this indirectly yields the same result. + */ + hdrincl = inet->hdrincl; + hdrincl = READ_ONCE(hdrincl); + /* * Get and verify the address. */ @@ -887,11 +897,14 @@ static int rawv6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) opt = ipv6_fixup_options(&opt_space, opt); fl6.flowi6_proto = proto; - rfv.msg = msg; - rfv.hlen = 0; - err = rawv6_probe_proto_opt(&rfv, &fl6); - if (err) - goto out; + + if (!hdrincl) { + rfv.msg = msg; + rfv.hlen = 0; + err = rawv6_probe_proto_opt(&rfv, &fl6); + if (err) + goto out; + } if (!ipv6_addr_any(daddr)) fl6.daddr = *daddr; @@ -908,7 +921,7 @@ static int rawv6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) fl6.flowi6_oif = np->ucast_oif; security_sk_classify_flow(sk, flowi6_to_flowi(&fl6)); - if (inet->hdrincl) + if (hdrincl) fl6.flowi6_flags |= FLOWI_FLAG_KNOWN_NH; if (ipc6.tclass < 0) @@ -931,7 +944,7 @@ static int rawv6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) goto do_confirm; back_from_confirm: - if (inet->hdrincl) + if (hdrincl) err = rawv6_send_hdrinc(sk, msg, len, &fl6, &dst, msg->msg_flags); else { ipc6.opt = opt; diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c index 2a8c680b67cd..fe797b29ca89 100644 --- a/net/ipv6/reassembly.c +++ b/net/ipv6/reassembly.c @@ -57,18 +57,11 @@ #include <net/rawv6.h> #include <net/ndisc.h> #include <net/addrconf.h> -#include <net/inet_frag.h> +#include <net/ipv6_frag.h> #include <net/inet_ecn.h> static const char ip6_frag_cache_name[] = "ip6-frags"; -struct ip6frag_skb_cb { - struct inet6_skb_parm h; - int offset; -}; - -#define FRAG6_CB(skb) ((struct ip6frag_skb_cb *)((skb)->cb)) - static u8 ip6_frag_ecn(const struct ipv6hdr *ipv6h) { return 1 << (ipv6_get_dsfield(ipv6h) & INET_ECN_MASK); @@ -76,63 +69,8 @@ static u8 ip6_frag_ecn(const struct ipv6hdr *ipv6h) static struct inet_frags ip6_frags; -static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff *prev, - struct net_device *dev); - -void ip6_frag_init(struct inet_frag_queue *q, const void *a) -{ - struct frag_queue *fq = container_of(q, struct frag_queue, q); - const struct frag_v6_compare_key *key = a; - - q->key.v6 = *key; - fq->ecn = 0; -} -EXPORT_SYMBOL(ip6_frag_init); - -void ip6_expire_frag_queue(struct net *net, struct frag_queue *fq) -{ - struct net_device *dev = NULL; - struct sk_buff *head; - - rcu_read_lock(); - spin_lock(&fq->q.lock); - - if (fq->q.flags & INET_FRAG_COMPLETE) - goto out; - - inet_frag_kill(&fq->q); - - dev = dev_get_by_index_rcu(net, fq->iif); - if (!dev) - goto out; - - __IP6_INC_STATS(net, __in6_dev_get(dev), IPSTATS_MIB_REASMFAILS); - __IP6_INC_STATS(net, __in6_dev_get(dev), IPSTATS_MIB_REASMTIMEOUT); - - /* Don't send error if the first segment did not arrive. */ - head = fq->q.fragments; - if (!(fq->q.flags & INET_FRAG_FIRST_IN) || !head) - goto out; - - /* But use as source device on which LAST ARRIVED - * segment was received. And do not use fq->dev - * pointer directly, device might already disappeared. - */ - head->dev = dev; - skb_get(head); - spin_unlock(&fq->q.lock); - - icmpv6_send(head, ICMPV6_TIME_EXCEED, ICMPV6_EXC_FRAGTIME, 0); - kfree_skb(head); - goto out_rcu_unlock; - -out: - spin_unlock(&fq->q.lock); -out_rcu_unlock: - rcu_read_unlock(); - inet_frag_put(&fq->q); -} -EXPORT_SYMBOL(ip6_expire_frag_queue); +static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff *skb, + struct sk_buff *prev_tail, struct net_device *dev); static void ip6_frag_expire(struct timer_list *t) { @@ -143,7 +81,7 @@ static void ip6_frag_expire(struct timer_list *t) fq = container_of(frag, struct frag_queue, q); net = container_of(fq->q.net, struct net, ipv6.frags); - ip6_expire_frag_queue(net, fq); + ip6frag_expire_frag_queue(net, fq); } static struct frag_queue * @@ -170,27 +108,29 @@ fq_find(struct net *net, __be32 id, const struct ipv6hdr *hdr, int iif) } static int ip6_frag_queue(struct frag_queue *fq, struct sk_buff *skb, - struct frag_hdr *fhdr, int nhoff) + struct frag_hdr *fhdr, int nhoff, + u32 *prob_offset) { - struct sk_buff *prev, *next; - struct net_device *dev; - int offset, end, fragsize; struct net *net = dev_net(skb_dst(skb)->dev); + int offset, end, fragsize; + struct sk_buff *prev_tail; + struct net_device *dev; + int err = -ENOENT; u8 ecn; if (fq->q.flags & INET_FRAG_COMPLETE) goto err; + err = -EINVAL; offset = ntohs(fhdr->frag_off) & ~0x7; end = offset + (ntohs(ipv6_hdr(skb)->payload_len) - ((u8 *)(fhdr + 1) - (u8 *)(ipv6_hdr(skb) + 1))); if ((unsigned int)end > IPV6_MAXPLEN) { - __IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), - IPSTATS_MIB_INHDRERRORS); - icmpv6_param_prob(skb, ICMPV6_HDR_FIELD, - ((u8 *)&fhdr->frag_off - - skb_network_header(skb))); + *prob_offset = (u8 *)&fhdr->frag_off - skb_network_header(skb); + /* note that if prob_offset is set, the skb is freed elsewhere, + * we do not free it here. + */ return -1; } @@ -210,7 +150,7 @@ static int ip6_frag_queue(struct frag_queue *fq, struct sk_buff *skb, */ if (end < fq->q.len || ((fq->q.flags & INET_FRAG_LAST_IN) && end != fq->q.len)) - goto err; + goto discard_fq; fq->q.flags |= INET_FRAG_LAST_IN; fq->q.len = end; } else { @@ -221,79 +161,42 @@ static int ip6_frag_queue(struct frag_queue *fq, struct sk_buff *skb, /* RFC2460 says always send parameter problem in * this case. -DaveM */ - __IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), - IPSTATS_MIB_INHDRERRORS); - icmpv6_param_prob(skb, ICMPV6_HDR_FIELD, - offsetof(struct ipv6hdr, payload_len)); + *prob_offset = offsetof(struct ipv6hdr, payload_len); return -1; } if (end > fq->q.len) { /* Some bits beyond end -> corruption. */ if (fq->q.flags & INET_FRAG_LAST_IN) - goto err; + goto discard_fq; fq->q.len = end; } } if (end == offset) - goto err; + goto discard_fq; + err = -ENOMEM; /* Point into the IP datagram 'data' part. */ if (!pskb_pull(skb, (u8 *) (fhdr + 1) - skb->data)) - goto err; - - if (pskb_trim_rcsum(skb, end - offset)) - goto err; - - /* Find out which fragments are in front and at the back of us - * in the chain of fragments so far. We must know where to put - * this fragment, right? - */ - prev = fq->q.fragments_tail; - if (!prev || FRAG6_CB(prev)->offset < offset) { - next = NULL; - goto found; - } - prev = NULL; - for (next = fq->q.fragments; next != NULL; next = next->next) { - if (FRAG6_CB(next)->offset >= offset) - break; /* bingo! */ - prev = next; - } - -found: - /* RFC5722, Section 4, amended by Errata ID : 3089 - * When reassembling an IPv6 datagram, if - * one or more its constituent fragments is determined to be an - * overlapping fragment, the entire datagram (and any constituent - * fragments) MUST be silently discarded. - */ - - /* Check for overlap with preceding fragment. */ - if (prev && - (FRAG6_CB(prev)->offset + prev->len) > offset) goto discard_fq; - /* Look for overlap with succeeding segment. */ - if (next && FRAG6_CB(next)->offset < end) + err = pskb_trim_rcsum(skb, end - offset); + if (err) goto discard_fq; - FRAG6_CB(skb)->offset = offset; + /* Note : skb->rbnode and skb->dev share the same location. */ + dev = skb->dev; + /* Makes sure compiler wont do silly aliasing games */ + barrier(); - /* Insert this fragment in the chain of fragments. */ - skb->next = next; - if (!next) - fq->q.fragments_tail = skb; - if (prev) - prev->next = skb; - else - fq->q.fragments = skb; + prev_tail = fq->q.fragments_tail; + err = inet_frag_queue_insert(&fq->q, skb, offset, end); + if (err) + goto insert_error; - dev = skb->dev; - if (dev) { + if (dev) fq->iif = dev->ifindex; - skb->dev = NULL; - } + fq->q.stamp = skb->tstamp; fq->q.meat += skb->len; fq->ecn |= ecn; @@ -313,44 +216,48 @@ found: if (fq->q.flags == (INET_FRAG_FIRST_IN | INET_FRAG_LAST_IN) && fq->q.meat == fq->q.len) { - int res; unsigned long orefdst = skb->_skb_refdst; skb->_skb_refdst = 0UL; - res = ip6_frag_reasm(fq, prev, dev); + err = ip6_frag_reasm(fq, skb, prev_tail, dev); skb->_skb_refdst = orefdst; - return res; + return err; } skb_dst_drop(skb); - return -1; + return -EINPROGRESS; +insert_error: + if (err == IPFRAG_DUP) { + kfree_skb(skb); + return -EINVAL; + } + err = -EINVAL; + __IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), + IPSTATS_MIB_REASM_OVERLAPS); discard_fq: inet_frag_kill(&fq->q); -err: __IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), IPSTATS_MIB_REASMFAILS); +err: kfree_skb(skb); - return -1; + return err; } /* * Check if this packet is complete. - * Returns NULL on failure by any reason, and pointer - * to current nexthdr field in reassembled frame. * * It is called with locked fq, and caller must check that * queue is eligible for reassembly i.e. it is not COMPLETE, * the last and the first frames arrived and all the bits are here. */ -static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff *prev, - struct net_device *dev) +static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff *skb, + struct sk_buff *prev_tail, struct net_device *dev) { struct net *net = container_of(fq->q.net, struct net, ipv6.frags); - struct sk_buff *fp, *head = fq->q.fragments; - int payload_len, delta; unsigned int nhoff; - int sum_truesize; + void *reasm_data; + int payload_len; u8 ecn; inet_frag_kill(&fq->q); @@ -359,120 +266,40 @@ static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff *prev, if (unlikely(ecn == 0xff)) goto out_fail; - /* Make the one we just received the head. */ - if (prev) { - head = prev->next; - fp = skb_clone(head, GFP_ATOMIC); - - if (!fp) - goto out_oom; - - fp->next = head->next; - if (!fp->next) - fq->q.fragments_tail = fp; - prev->next = fp; - - skb_morph(head, fq->q.fragments); - head->next = fq->q.fragments->next; - - consume_skb(fq->q.fragments); - fq->q.fragments = head; - } - - WARN_ON(head == NULL); - WARN_ON(FRAG6_CB(head)->offset != 0); + reasm_data = inet_frag_reasm_prepare(&fq->q, skb, prev_tail); + if (!reasm_data) + goto out_oom; - /* Unfragmented part is taken from the first segment. */ - payload_len = ((head->data - skb_network_header(head)) - + payload_len = ((skb->data - skb_network_header(skb)) - sizeof(struct ipv6hdr) + fq->q.len - sizeof(struct frag_hdr)); if (payload_len > IPV6_MAXPLEN) goto out_oversize; - delta = - head->truesize; - - /* Head of list must not be cloned. */ - if (skb_unclone(head, GFP_ATOMIC)) - goto out_oom; - - delta += head->truesize; - if (delta) - add_frag_mem_limit(fq->q.net, delta); - - /* If the first fragment is fragmented itself, we split - * it to two chunks: the first with data and paged part - * and the second, holding only fragments. */ - if (skb_has_frag_list(head)) { - struct sk_buff *clone; - int i, plen = 0; - - clone = alloc_skb(0, GFP_ATOMIC); - if (!clone) - goto out_oom; - clone->next = head->next; - head->next = clone; - skb_shinfo(clone)->frag_list = skb_shinfo(head)->frag_list; - skb_frag_list_init(head); - for (i = 0; i < skb_shinfo(head)->nr_frags; i++) - plen += skb_frag_size(&skb_shinfo(head)->frags[i]); - clone->len = clone->data_len = head->data_len - plen; - head->data_len -= clone->len; - head->len -= clone->len; - clone->csum = 0; - clone->ip_summed = head->ip_summed; - add_frag_mem_limit(fq->q.net, clone->truesize); - } - /* We have to remove fragment header from datagram and to relocate * header in order to calculate ICV correctly. */ nhoff = fq->nhoffset; - skb_network_header(head)[nhoff] = skb_transport_header(head)[0]; - memmove(head->head + sizeof(struct frag_hdr), head->head, - (head->data - head->head) - sizeof(struct frag_hdr)); - if (skb_mac_header_was_set(head)) - head->mac_header += sizeof(struct frag_hdr); - head->network_header += sizeof(struct frag_hdr); - - skb_reset_transport_header(head); - skb_push(head, head->data - skb_network_header(head)); - - sum_truesize = head->truesize; - for (fp = head->next; fp;) { - bool headstolen; - int delta; - struct sk_buff *next = fp->next; - - sum_truesize += fp->truesize; - if (head->ip_summed != fp->ip_summed) - head->ip_summed = CHECKSUM_NONE; - else if (head->ip_summed == CHECKSUM_COMPLETE) - head->csum = csum_add(head->csum, fp->csum); - - if (skb_try_coalesce(head, fp, &headstolen, &delta)) { - kfree_skb_partial(fp, headstolen); - } else { - if (!skb_shinfo(head)->frag_list) - skb_shinfo(head)->frag_list = fp; - head->data_len += fp->len; - head->len += fp->len; - head->truesize += fp->truesize; - } - fp = next; - } - sub_frag_mem_limit(fq->q.net, sum_truesize); + skb_network_header(skb)[nhoff] = skb_transport_header(skb)[0]; + memmove(skb->head + sizeof(struct frag_hdr), skb->head, + (skb->data - skb->head) - sizeof(struct frag_hdr)); + if (skb_mac_header_was_set(skb)) + skb->mac_header += sizeof(struct frag_hdr); + skb->network_header += sizeof(struct frag_hdr); + + skb_reset_transport_header(skb); + + inet_frag_reasm_finish(&fq->q, skb, reasm_data); - head->next = NULL; - head->dev = dev; - head->tstamp = fq->q.stamp; - ipv6_hdr(head)->payload_len = htons(payload_len); - ipv6_change_dsfield(ipv6_hdr(head), 0xff, ecn); - IP6CB(head)->nhoff = nhoff; - IP6CB(head)->flags |= IP6SKB_FRAGMENTED; - IP6CB(head)->frag_max_size = fq->q.max_size; + skb->dev = dev; + ipv6_hdr(skb)->payload_len = htons(payload_len); + ipv6_change_dsfield(ipv6_hdr(skb), 0xff, ecn); + IP6CB(skb)->nhoff = nhoff; + IP6CB(skb)->flags |= IP6SKB_FRAGMENTED; + IP6CB(skb)->frag_max_size = fq->q.max_size; /* Yes, and fold redundant checksum back. 8) */ - skb_postpush_rcsum(head, skb_network_header(head), - skb_network_header_len(head)); + skb_postpush_rcsum(skb, skb_network_header(skb), + skb_network_header_len(skb)); rcu_read_lock(); __IP6_INC_STATS(net, __in6_dev_get(dev), IPSTATS_MIB_REASMOKS); @@ -480,6 +307,7 @@ static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff *prev, fq->q.fragments = NULL; fq->q.rb_fragments = RB_ROOT; fq->q.fragments_tail = NULL; + fq->q.last_run_head = NULL; return 1; out_oversize: @@ -491,6 +319,7 @@ out_fail: rcu_read_lock(); __IP6_INC_STATS(net, __in6_dev_get(dev), IPSTATS_MIB_REASMFAILS); rcu_read_unlock(); + inet_frag_kill(&fq->q); return -1; } @@ -529,22 +358,26 @@ static int ipv6_frag_rcv(struct sk_buff *skb) return 1; } - if (skb->len - skb_network_offset(skb) < IPV6_MIN_MTU && - fhdr->frag_off & htons(IP6_MF)) - goto fail_hdr; - iif = skb->dev ? skb->dev->ifindex : 0; fq = fq_find(net, fhdr->identification, hdr, iif); if (fq) { + u32 prob_offset = 0; int ret; spin_lock(&fq->q.lock); fq->iif = iif; - ret = ip6_frag_queue(fq, skb, fhdr, IP6CB(skb)->nhoff); + ret = ip6_frag_queue(fq, skb, fhdr, IP6CB(skb)->nhoff, + &prob_offset); spin_unlock(&fq->q.lock); inet_frag_put(&fq->q); + if (prob_offset) { + __IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), + IPSTATS_MIB_INHDRERRORS); + /* icmpv6_param_prob() calls kfree_skb(skb) */ + icmpv6_param_prob(skb, ICMPV6_HDR_FIELD, prob_offset); + } return ret; } @@ -712,42 +545,19 @@ static struct pernet_operations ip6_frags_ops = { .exit = ipv6_frags_exit_net, }; -static u32 ip6_key_hashfn(const void *data, u32 len, u32 seed) -{ - return jhash2(data, - sizeof(struct frag_v6_compare_key) / sizeof(u32), seed); -} - -static u32 ip6_obj_hashfn(const void *data, u32 len, u32 seed) -{ - const struct inet_frag_queue *fq = data; - - return jhash2((const u32 *)&fq->key.v6, - sizeof(struct frag_v6_compare_key) / sizeof(u32), seed); -} - -static int ip6_obj_cmpfn(struct rhashtable_compare_arg *arg, const void *ptr) -{ - const struct frag_v6_compare_key *key = arg->key; - const struct inet_frag_queue *fq = ptr; - - return !!memcmp(&fq->key, key, sizeof(*key)); -} - -const struct rhashtable_params ip6_rhash_params = { +static const struct rhashtable_params ip6_rhash_params = { .head_offset = offsetof(struct inet_frag_queue, node), - .hashfn = ip6_key_hashfn, - .obj_hashfn = ip6_obj_hashfn, - .obj_cmpfn = ip6_obj_cmpfn, + .hashfn = ip6frag_key_hashfn, + .obj_hashfn = ip6frag_obj_hashfn, + .obj_cmpfn = ip6frag_obj_cmpfn, .automatic_shrinking = true, }; -EXPORT_SYMBOL(ip6_rhash_params); int __init ipv6_frag_init(void) { int ret; - ip6_frags.constructor = ip6_frag_init; + ip6_frags.constructor = ip6frag_init; ip6_frags.destructor = NULL; ip6_frags.qsize = sizeof(struct frag_queue); ip6_frags.frag_expire = ip6_frag_expire; diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 74dd35d6567c..b81522bcf223 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -93,7 +93,8 @@ static int ip6_pkt_prohibit(struct sk_buff *skb); static int ip6_pkt_prohibit_out(struct net *net, struct sock *sk, struct sk_buff *skb); static void ip6_link_failure(struct sk_buff *skb); static void ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk, - struct sk_buff *skb, u32 mtu); + struct sk_buff *skb, u32 mtu, + bool confirm_neigh); static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_buff *skb); static void rt6_dst_from_metrics_check(struct rt6_info *rt); @@ -264,7 +265,8 @@ static unsigned int ip6_blackhole_mtu(const struct dst_entry *dst) } static void ip6_rt_blackhole_update_pmtu(struct dst_entry *dst, struct sock *sk, - struct sk_buff *skb, u32 mtu) + struct sk_buff *skb, u32 mtu, + bool confirm_neigh) { } @@ -1471,7 +1473,8 @@ static bool rt6_cache_allowed_for_pmtu(const struct rt6_info *rt) } static void __ip6_rt_update_pmtu(struct dst_entry *dst, const struct sock *sk, - const struct ipv6hdr *iph, u32 mtu) + const struct ipv6hdr *iph, u32 mtu, + bool confirm_neigh) { const struct in6_addr *daddr, *saddr; struct rt6_info *rt6 = (struct rt6_info *)dst; @@ -1489,7 +1492,10 @@ static void __ip6_rt_update_pmtu(struct dst_entry *dst, const struct sock *sk, daddr = NULL; saddr = NULL; } - dst_confirm_neigh(dst, daddr); + + if (confirm_neigh) + dst_confirm_neigh(dst, daddr); + mtu = max_t(u32, mtu, IPV6_MIN_MTU); if (mtu >= dst_mtu(dst)) return; @@ -1518,9 +1524,11 @@ static void __ip6_rt_update_pmtu(struct dst_entry *dst, const struct sock *sk, } static void ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk, - struct sk_buff *skb, u32 mtu) + struct sk_buff *skb, u32 mtu, + bool confirm_neigh) { - __ip6_rt_update_pmtu(dst, sk, skb ? ipv6_hdr(skb) : NULL, mtu); + __ip6_rt_update_pmtu(dst, sk, skb ? ipv6_hdr(skb) : NULL, mtu, + confirm_neigh); } void ip6_update_pmtu(struct sk_buff *skb, struct net *net, __be32 mtu, @@ -1540,7 +1548,7 @@ void ip6_update_pmtu(struct sk_buff *skb, struct net *net, __be32 mtu, dst = ip6_route_output(net, NULL, &fl6); if (!dst->error) - __ip6_rt_update_pmtu(dst, NULL, iph, ntohl(mtu)); + __ip6_rt_update_pmtu(dst, NULL, iph, ntohl(mtu), true); dst_release(dst); } EXPORT_SYMBOL_GPL(ip6_update_pmtu); @@ -3024,6 +3032,10 @@ static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh, cfg->fc_gateway = nla_get_in6_addr(tb[RTA_GATEWAY]); cfg->fc_flags |= RTF_GATEWAY; } + if (tb[RTA_VIA]) { + NL_SET_ERR_MSG(extack, "IPv6 does not support RTA_VIA attribute"); + goto errout; + } if (tb[RTA_DST]) { int plen = (rtm->rtm_dst_len + 7) >> 3; @@ -3491,7 +3503,7 @@ static int rt6_fill_node(struct net *net, table = rt->rt6i_table->tb6_id; else table = RT6_TABLE_UNSPEC; - rtm->rtm_table = table; + rtm->rtm_table = table < 256 ? table : RT_TABLE_COMPAT; if (nla_put_u32(skb, RTA_TABLE, table)) goto nla_put_failure; if (rt->rt6i_flags & RTF_REJECT) { diff --git a/net/ipv6/seg6.c b/net/ipv6/seg6.c index c81407770956..fdeb90dd1c82 100644 --- a/net/ipv6/seg6.c +++ b/net/ipv6/seg6.c @@ -220,9 +220,7 @@ static int seg6_genl_get_tunsrc(struct sk_buff *skb, struct genl_info *info) rcu_read_unlock(); genlmsg_end(msg, hdr); - genlmsg_reply(msg, info); - - return 0; + return genlmsg_reply(msg, info); nla_put_failure: rcu_read_unlock(); diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c index 5d00a38cd1cb..fb3f917db57a 100644 --- a/net/ipv6/sit.c +++ b/net/ipv6/sit.c @@ -540,7 +540,8 @@ static int ipip6_err(struct sk_buff *skb, u32 info) } err = 0; - if (!ip6_err_gen_icmpv6_unreach(skb, iph->ihl * 4, type, data_len)) + if (__in6_dev_get(skb->dev) && + !ip6_err_gen_icmpv6_unreach(skb, iph->ihl * 4, type, data_len)) goto out; if (t->parms.iph.daddr == 0) @@ -660,6 +661,10 @@ static int ipip6_rcv(struct sk_buff *skb) !net_eq(tunnel->net, dev_net(tunnel->dev)))) goto out; + /* skb can be uncloned in iptunnel_pull_header, so + * old iph is no longer valid + */ + iph = (const struct iphdr *)skb_mac_header(skb); err = IP_ECN_decapsulate(iph, skb); if (unlikely(err)) { if (log_ecn_error) @@ -766,8 +771,9 @@ static bool check_6rd(struct ip_tunnel *tunnel, const struct in6_addr *v6dst, pbw0 = tunnel->ip6rd.prefixlen >> 5; pbi0 = tunnel->ip6rd.prefixlen & 0x1f; - d = (ntohl(v6dst->s6_addr32[pbw0]) << pbi0) >> - tunnel->ip6rd.relay_prefixlen; + d = tunnel->ip6rd.relay_prefixlen < 32 ? + (ntohl(v6dst->s6_addr32[pbw0]) << pbi0) >> + tunnel->ip6rd.relay_prefixlen : 0; pbi1 = pbi0 - tunnel->ip6rd.relay_prefixlen; if (pbi1 > 0) @@ -926,7 +932,7 @@ static netdev_tx_t ipip6_tunnel_xmit(struct sk_buff *skb, } if (tunnel->parms.iph.daddr) - skb_dst_update_pmtu(skb, mtu); + skb_dst_update_pmtu_no_confirm(skb, mtu); if (skb->len > mtu && !skb_is_gso(skb)) { icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu); @@ -1064,7 +1070,7 @@ static void ipip6_tunnel_bind_dev(struct net_device *dev) if (!tdev && tunnel->parms.link) tdev = __dev_get_by_index(tunnel->net, tunnel->parms.link); - if (tdev) { + if (tdev && !netif_is_l3_master(tdev)) { int t_hlen = tunnel->hlen + sizeof(struct iphdr); dev->hard_header_len = tdev->hard_header_len + sizeof(struct iphdr); @@ -1855,6 +1861,7 @@ static int __net_init sit_init_net(struct net *net) err_reg_dev: ipip6_dev_free(sitn->fb_tunnel_dev); + free_netdev(sitn->fb_tunnel_dev); err_alloc_dev: return err; } diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 7b4ce3f9e2f4..5ec73cf386df 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -718,6 +718,7 @@ static void tcp_v6_init_req(struct request_sock *req, const struct sock *sk_listener, struct sk_buff *skb) { + bool l3_slave = ipv6_l3mdev_skb(TCP_SKB_CB(skb)->header.h6.flags); struct inet_request_sock *ireq = inet_rsk(req); const struct ipv6_pinfo *np = inet6_sk(sk_listener); @@ -725,7 +726,7 @@ static void tcp_v6_init_req(struct request_sock *req, ireq->ir_v6_loc_addr = ipv6_hdr(skb)->daddr; /* So that link locals have meaning */ - if (!sk_listener->sk_bound_dev_if && + if ((!sk_listener->sk_bound_dev_if || l3_slave) && ipv6_addr_type(&ireq->ir_v6_rmt_addr) & IPV6_ADDR_LINKLOCAL) ireq->ir_iif = tcp_v6_iif(skb); diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index 8d185a0fc5af..a2ba7356fa65 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -170,7 +170,7 @@ static int compute_score(struct sock *sk, struct net *net, score++; } - if (sk->sk_incoming_cpu == raw_smp_processor_id()) + if (READ_ONCE(sk->sk_incoming_cpu) == raw_smp_processor_id()) score++; return score; @@ -308,7 +308,7 @@ struct sock *udp6_lib_lookup_skb(struct sk_buff *skb, return __udp6_lib_lookup(dev_net(skb->dev), &iph->saddr, sport, &iph->daddr, dport, inet6_iif(skb), - inet6_sdif(skb), &udp_table, skb); + inet6_sdif(skb), &udp_table, NULL); } EXPORT_SYMBOL_GPL(udp6_lib_lookup_skb); @@ -506,7 +506,7 @@ void __udp6_lib_err(struct sk_buff *skb, struct inet6_skb_parm *opt, struct net *net = dev_net(skb->dev); sk = __udp6_lib_lookup(net, daddr, uh->dest, saddr, uh->source, - inet6_iif(skb), 0, udptable, skb); + inet6_iif(skb), 0, udptable, NULL); if (!sk) { __ICMP6_INC_STATS(net, __in6_dev_get(skb->dev), ICMP6_MIB_INERRORS); diff --git a/net/ipv6/xfrm6_policy.c b/net/ipv6/xfrm6_policy.c index d6b012295b45..b0d80cef7c2b 100644 --- a/net/ipv6/xfrm6_policy.c +++ b/net/ipv6/xfrm6_policy.c @@ -219,12 +219,13 @@ _decode_session6(struct sk_buff *skb, struct flowi *fl, int reverse) } static void xfrm6_update_pmtu(struct dst_entry *dst, struct sock *sk, - struct sk_buff *skb, u32 mtu) + struct sk_buff *skb, u32 mtu, + bool confirm_neigh) { struct xfrm_dst *xdst = (struct xfrm_dst *)dst; struct dst_entry *path = xdst->route; - path->ops->update_pmtu(path, sk, skb, mtu); + path->ops->update_pmtu(path, sk, skb, mtu, confirm_neigh); } static void xfrm6_redirect(struct dst_entry *dst, struct sock *sk, diff --git a/net/ipv6/xfrm6_tunnel.c b/net/ipv6/xfrm6_tunnel.c index 4e438bc7ee87..b51368ebd1e6 100644 --- a/net/ipv6/xfrm6_tunnel.c +++ b/net/ipv6/xfrm6_tunnel.c @@ -144,6 +144,9 @@ static u32 __xfrm6_tunnel_alloc_spi(struct net *net, xfrm_address_t *saddr) index = __xfrm6_tunnel_spi_check(net, spi); if (index >= 0) goto alloc_spi; + + if (spi == XFRM6_TUNNEL_SPI_MAX) + break; } for (spi = XFRM6_TUNNEL_SPI_MIN; spi < xfrm6_tn->spi; spi++) { index = __xfrm6_tunnel_spi_check(net, spi); @@ -388,6 +391,10 @@ static void __exit xfrm6_tunnel_fini(void) xfrm6_tunnel_deregister(&xfrm6_tunnel_handler, AF_INET6); xfrm_unregister_type(&xfrm6_tunnel_type, AF_INET6); unregister_pernet_subsys(&xfrm6_tunnel_net_ops); + /* Someone maybe has gotten the xfrm6_tunnel_spi. + * So need to wait it. + */ + rcu_barrier(); kmem_cache_destroy(xfrm6_tunnel_spi_kmem); } diff --git a/net/kcm/kcmsock.c b/net/kcm/kcmsock.c index 9bf997404918..7b4f3f865861 100644 --- a/net/kcm/kcmsock.c +++ b/net/kcm/kcmsock.c @@ -2059,14 +2059,14 @@ static int __init kcm_init(void) if (err) goto fail; - err = sock_register(&kcm_family_ops); - if (err) - goto sock_register_fail; - err = register_pernet_device(&kcm_net_ops); if (err) goto net_ops_fail; + err = sock_register(&kcm_family_ops); + if (err) + goto sock_register_fail; + err = kcm_proc_init(); if (err) goto proc_init_fail; @@ -2074,12 +2074,12 @@ static int __init kcm_init(void) return 0; proc_init_fail: - unregister_pernet_device(&kcm_net_ops); - -net_ops_fail: sock_unregister(PF_KCM); sock_register_fail: + unregister_pernet_device(&kcm_net_ops); + +net_ops_fail: proto_unregister(&kcm_proto); fail: @@ -2095,8 +2095,8 @@ fail: static void __exit kcm_exit(void) { kcm_proc_exit(); - unregister_pernet_device(&kcm_net_ops); sock_unregister(PF_KCM); + unregister_pernet_device(&kcm_net_ops); proto_unregister(&kcm_proto); destroy_workqueue(kcm_wq); diff --git a/net/key/af_key.c b/net/key/af_key.c index 3b209cbfe1df..f8f7065f7b62 100644 --- a/net/key/af_key.c +++ b/net/key/af_key.c @@ -196,30 +196,22 @@ static int pfkey_release(struct socket *sock) return 0; } -static int pfkey_broadcast_one(struct sk_buff *skb, struct sk_buff **skb2, - gfp_t allocation, struct sock *sk) +static int pfkey_broadcast_one(struct sk_buff *skb, gfp_t allocation, + struct sock *sk) { int err = -ENOBUFS; - sock_hold(sk); - if (*skb2 == NULL) { - if (refcount_read(&skb->users) != 1) { - *skb2 = skb_clone(skb, allocation); - } else { - *skb2 = skb; - refcount_inc(&skb->users); - } - } - if (*skb2 != NULL) { - if (atomic_read(&sk->sk_rmem_alloc) <= sk->sk_rcvbuf) { - skb_set_owner_r(*skb2, sk); - skb_queue_tail(&sk->sk_receive_queue, *skb2); - sk->sk_data_ready(sk); - *skb2 = NULL; - err = 0; - } + if (atomic_read(&sk->sk_rmem_alloc) > sk->sk_rcvbuf) + return err; + + skb = skb_clone(skb, allocation); + + if (skb) { + skb_set_owner_r(skb, sk); + skb_queue_tail(&sk->sk_receive_queue, skb); + sk->sk_data_ready(sk); + err = 0; } - sock_put(sk); return err; } @@ -234,7 +226,6 @@ static int pfkey_broadcast(struct sk_buff *skb, gfp_t allocation, { struct netns_pfkey *net_pfkey = net_generic(net, pfkey_net_id); struct sock *sk; - struct sk_buff *skb2 = NULL; int err = -ESRCH; /* XXX Do we need something like netlink_overrun? I think @@ -253,7 +244,7 @@ static int pfkey_broadcast(struct sk_buff *skb, gfp_t allocation, * socket. */ if (pfk->promisc) - pfkey_broadcast_one(skb, &skb2, GFP_ATOMIC, sk); + pfkey_broadcast_one(skb, GFP_ATOMIC, sk); /* the exact target will be processed later */ if (sk == one_sk) @@ -268,7 +259,7 @@ static int pfkey_broadcast(struct sk_buff *skb, gfp_t allocation, continue; } - err2 = pfkey_broadcast_one(skb, &skb2, GFP_ATOMIC, sk); + err2 = pfkey_broadcast_one(skb, GFP_ATOMIC, sk); /* Error is cleared after successful sending to at least one * registered KM */ @@ -278,9 +269,8 @@ static int pfkey_broadcast(struct sk_buff *skb, gfp_t allocation, rcu_read_unlock(); if (one_sk != NULL) - err = pfkey_broadcast_one(skb, &skb2, allocation, one_sk); + err = pfkey_broadcast_one(skb, allocation, one_sk); - kfree_skb(skb2); kfree_skb(skb); return err; } @@ -1961,8 +1951,10 @@ parse_ipsecrequest(struct xfrm_policy *xp, struct sadb_x_ipsecrequest *rq) if (rq->sadb_x_ipsecrequest_mode == 0) return -EINVAL; + if (!xfrm_id_proto_valid(rq->sadb_x_ipsecrequest_proto)) + return -EINVAL; - t->id.proto = rq->sadb_x_ipsecrequest_proto; /* XXX check proto */ + t->id.proto = rq->sadb_x_ipsecrequest_proto; if ((mode = pfkey_mode_to_xfrm(rq->sadb_x_ipsecrequest_mode)) < 0) return -EINVAL; t->mode = mode; @@ -2448,8 +2440,10 @@ static int key_pol_get_resp(struct sock *sk, struct xfrm_policy *xp, const struc goto out; } err = pfkey_xfrm_policy2msg(out_skb, xp, dir); - if (err < 0) + if (err < 0) { + kfree_skb(out_skb); goto out; + } out_hdr = (struct sadb_msg *) out_skb->data; out_hdr->sadb_msg_version = hdr->sadb_msg_version; @@ -2700,8 +2694,10 @@ static int dump_sp(struct xfrm_policy *xp, int dir, int count, void *ptr) return PTR_ERR(out_skb); err = pfkey_xfrm_policy2msg(out_skb, xp, dir); - if (err < 0) + if (err < 0) { + kfree_skb(out_skb); return err; + } out_hdr = (struct sadb_msg *) out_skb->data; out_hdr->sadb_msg_version = pfk->dump.msg_version; diff --git a/net/l2tp/l2tp_ip6.c b/net/l2tp/l2tp_ip6.c index 3c77507601c7..bec13226ce4f 100644 --- a/net/l2tp/l2tp_ip6.c +++ b/net/l2tp/l2tp_ip6.c @@ -684,9 +684,6 @@ static int l2tp_ip6_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, if (flags & MSG_OOB) goto out; - if (addr_len) - *addr_len = sizeof(*lsa); - if (flags & MSG_ERRQUEUE) return ipv6_recv_error(sk, msg, len, addr_len); @@ -716,6 +713,7 @@ static int l2tp_ip6_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, lsa->l2tp_conn_id = 0; if (ipv6_addr_type(&lsa->l2tp_addr) & IPV6_ADDR_LINKLOCAL) lsa->l2tp_scope_id = inet6_iif(skb); + *addr_len = sizeof(*lsa); } if (np->rxopt.all) diff --git a/net/l2tp/l2tp_ppp.c b/net/l2tp/l2tp_ppp.c index 8bef35aa8786..a7fcf48e9087 100644 --- a/net/l2tp/l2tp_ppp.c +++ b/net/l2tp/l2tp_ppp.c @@ -1793,6 +1793,9 @@ static const struct proto_ops pppol2tp_ops = { .recvmsg = pppol2tp_recvmsg, .mmap = sock_no_mmap, .ioctl = pppox_ioctl, +#ifdef CONFIG_COMPAT + .compat_ioctl = pppox_compat_ioctl, +#endif }; static const struct pppox_proto pppol2tp_proto = { diff --git a/net/lapb/lapb_iface.c b/net/lapb/lapb_iface.c index e15314e3b464..299de0c0508e 100644 --- a/net/lapb/lapb_iface.c +++ b/net/lapb/lapb_iface.c @@ -182,6 +182,7 @@ int lapb_unregister(struct net_device *dev) lapb = __lapb_devtostruct(dev); if (!lapb) goto out; + lapb_put(lapb); lapb_stop_t1timer(lapb); lapb_stop_t2timer(lapb); diff --git a/net/llc/llc_c_ac.c b/net/llc/llc_c_ac.c index 4b60f68cb492..8354ae40ec85 100644 --- a/net/llc/llc_c_ac.c +++ b/net/llc/llc_c_ac.c @@ -372,6 +372,7 @@ int llc_conn_ac_send_i_cmd_p_set_1(struct sock *sk, struct sk_buff *skb) llc_pdu_init_as_i_cmd(skb, 1, llc->vS, llc->vR); rc = llc_mac_hdr_init(skb, llc->dev->dev_addr, llc->daddr.mac); if (likely(!rc)) { + skb_get(skb); llc_conn_send_pdu(sk, skb); llc_conn_ac_inc_vs_by_1(sk, skb); } @@ -389,7 +390,8 @@ static int llc_conn_ac_send_i_cmd_p_set_0(struct sock *sk, struct sk_buff *skb) llc_pdu_init_as_i_cmd(skb, 0, llc->vS, llc->vR); rc = llc_mac_hdr_init(skb, llc->dev->dev_addr, llc->daddr.mac); if (likely(!rc)) { - rc = llc_conn_send_pdu(sk, skb); + skb_get(skb); + llc_conn_send_pdu(sk, skb); llc_conn_ac_inc_vs_by_1(sk, skb); } return rc; @@ -406,6 +408,7 @@ int llc_conn_ac_send_i_xxx_x_set_0(struct sock *sk, struct sk_buff *skb) llc_pdu_init_as_i_cmd(skb, 0, llc->vS, llc->vR); rc = llc_mac_hdr_init(skb, llc->dev->dev_addr, llc->daddr.mac); if (likely(!rc)) { + skb_get(skb); llc_conn_send_pdu(sk, skb); llc_conn_ac_inc_vs_by_1(sk, skb); } @@ -916,7 +919,8 @@ static int llc_conn_ac_send_i_rsp_f_set_ackpf(struct sock *sk, llc_pdu_init_as_i_cmd(skb, llc->ack_pf, llc->vS, llc->vR); rc = llc_mac_hdr_init(skb, llc->dev->dev_addr, llc->daddr.mac); if (likely(!rc)) { - rc = llc_conn_send_pdu(sk, skb); + skb_get(skb); + llc_conn_send_pdu(sk, skb); llc_conn_ac_inc_vs_by_1(sk, skb); } return rc; diff --git a/net/llc/llc_conn.c b/net/llc/llc_conn.c index 56c3fb5cc805..444c13e752a0 100644 --- a/net/llc/llc_conn.c +++ b/net/llc/llc_conn.c @@ -30,7 +30,7 @@ #endif static int llc_find_offset(int state, int ev_type); -static int llc_conn_send_pdus(struct sock *sk, struct sk_buff *skb); +static void llc_conn_send_pdus(struct sock *sk); static int llc_conn_service(struct sock *sk, struct sk_buff *skb); static int llc_exec_conn_trans_actions(struct sock *sk, struct llc_conn_state_trans *trans, @@ -193,11 +193,11 @@ out_skb_put: return rc; } -int llc_conn_send_pdu(struct sock *sk, struct sk_buff *skb) +void llc_conn_send_pdu(struct sock *sk, struct sk_buff *skb) { /* queue PDU to send to MAC layer */ skb_queue_tail(&sk->sk_write_queue, skb); - return llc_conn_send_pdus(sk, skb); + llc_conn_send_pdus(sk); } /** @@ -255,7 +255,7 @@ void llc_conn_resend_i_pdu_as_cmd(struct sock *sk, u8 nr, u8 first_p_bit) if (howmany_resend > 0) llc->vS = (llc->vS + 1) % LLC_2_SEQ_NBR_MODULO; /* any PDUs to re-send are queued up; start sending to MAC */ - llc_conn_send_pdus(sk, NULL); + llc_conn_send_pdus(sk); out:; } @@ -296,7 +296,7 @@ void llc_conn_resend_i_pdu_as_rsp(struct sock *sk, u8 nr, u8 first_f_bit) if (howmany_resend > 0) llc->vS = (llc->vS + 1) % LLC_2_SEQ_NBR_MODULO; /* any PDUs to re-send are queued up; start sending to MAC */ - llc_conn_send_pdus(sk, NULL); + llc_conn_send_pdus(sk); out:; } @@ -340,16 +340,12 @@ out: /** * llc_conn_send_pdus - Sends queued PDUs * @sk: active connection - * @hold_skb: the skb held by caller, or NULL if does not care * - * Sends queued pdus to MAC layer for transmission. When @hold_skb is - * NULL, always return 0. Otherwise, return 0 if @hold_skb is sent - * successfully, or 1 for failure. + * Sends queued pdus to MAC layer for transmission. */ -static int llc_conn_send_pdus(struct sock *sk, struct sk_buff *hold_skb) +static void llc_conn_send_pdus(struct sock *sk) { struct sk_buff *skb; - int ret = 0; while ((skb = skb_dequeue(&sk->sk_write_queue)) != NULL) { struct llc_pdu_sn *pdu = llc_pdu_sn_hdr(skb); @@ -361,20 +357,10 @@ static int llc_conn_send_pdus(struct sock *sk, struct sk_buff *hold_skb) skb_queue_tail(&llc_sk(sk)->pdu_unack_q, skb); if (!skb2) break; - dev_queue_xmit(skb2); - } else { - bool is_target = skb == hold_skb; - int rc; - - if (is_target) - skb_get(skb); - rc = dev_queue_xmit(skb); - if (is_target) - ret = rc; + skb = skb2; } + dev_queue_xmit(skb); } - - return ret; } /** diff --git a/net/llc/llc_core.c b/net/llc/llc_core.c index 260b3dc1b4a2..64d4bef04e73 100644 --- a/net/llc/llc_core.c +++ b/net/llc/llc_core.c @@ -127,9 +127,7 @@ void llc_sap_close(struct llc_sap *sap) list_del_rcu(&sap->node); spin_unlock_bh(&llc_sap_list_lock); - synchronize_rcu(); - - kfree(sap); + kfree_rcu(sap, rcu); } static struct packet_type llc_packet_type __read_mostly = { diff --git a/net/llc/llc_output.c b/net/llc/llc_output.c index 94425e421213..9e4b6bcf6920 100644 --- a/net/llc/llc_output.c +++ b/net/llc/llc_output.c @@ -72,6 +72,8 @@ int llc_build_and_send_ui_pkt(struct llc_sap *sap, struct sk_buff *skb, rc = llc_mac_hdr_init(skb, skb->dev->dev_addr, dmac); if (likely(!rc)) rc = dev_queue_xmit(skb); + else + kfree_skb(skb); return rc; } diff --git a/net/llc/llc_s_ac.c b/net/llc/llc_s_ac.c index a94bd56bcac6..7ae4cc684d3a 100644 --- a/net/llc/llc_s_ac.c +++ b/net/llc/llc_s_ac.c @@ -58,8 +58,10 @@ int llc_sap_action_send_ui(struct llc_sap *sap, struct sk_buff *skb) ev->daddr.lsap, LLC_PDU_CMD); llc_pdu_init_as_ui_cmd(skb); rc = llc_mac_hdr_init(skb, ev->saddr.mac, ev->daddr.mac); - if (likely(!rc)) + if (likely(!rc)) { + skb_get(skb); rc = dev_queue_xmit(skb); + } return rc; } @@ -81,8 +83,10 @@ int llc_sap_action_send_xid_c(struct llc_sap *sap, struct sk_buff *skb) ev->daddr.lsap, LLC_PDU_CMD); llc_pdu_init_as_xid_cmd(skb, LLC_XID_NULL_CLASS_2, 0); rc = llc_mac_hdr_init(skb, ev->saddr.mac, ev->daddr.mac); - if (likely(!rc)) + if (likely(!rc)) { + skb_get(skb); rc = dev_queue_xmit(skb); + } return rc; } @@ -135,8 +139,10 @@ int llc_sap_action_send_test_c(struct llc_sap *sap, struct sk_buff *skb) ev->daddr.lsap, LLC_PDU_CMD); llc_pdu_init_as_test_cmd(skb); rc = llc_mac_hdr_init(skb, ev->saddr.mac, ev->daddr.mac); - if (likely(!rc)) + if (likely(!rc)) { + skb_get(skb); rc = dev_queue_xmit(skb); + } return rc; } diff --git a/net/llc/llc_sap.c b/net/llc/llc_sap.c index d90928f50226..a7534950e60a 100644 --- a/net/llc/llc_sap.c +++ b/net/llc/llc_sap.c @@ -197,29 +197,22 @@ out: * After executing actions of the event, upper layer will be indicated * if needed(on receiving an UI frame). sk can be null for the * datalink_proto case. + * + * This function always consumes a reference to the skb. */ static void llc_sap_state_process(struct llc_sap *sap, struct sk_buff *skb) { struct llc_sap_state_ev *ev = llc_sap_ev(skb); - /* - * We have to hold the skb, because llc_sap_next_state - * will kfree it in the sending path and we need to - * look at the skb->cb, where we encode llc_sap_state_ev. - */ - skb_get(skb); ev->ind_cfm_flag = 0; llc_sap_next_state(sap, skb); - if (ev->ind_cfm_flag == LLC_IND) { - if (skb->sk->sk_state == TCP_LISTEN) - kfree_skb(skb); - else { - llc_save_primitive(skb->sk, skb, ev->prim); - /* queue skb to the user. */ - if (sock_queue_rcv_skb(skb->sk, skb)) - kfree_skb(skb); - } + if (ev->ind_cfm_flag == LLC_IND && skb->sk->sk_state != TCP_LISTEN) { + llc_save_primitive(skb->sk, skb, ev->prim); + + /* queue skb to the user. */ + if (sock_queue_rcv_skb(skb->sk, skb) == 0) + return; } kfree_skb(skb); } diff --git a/net/llc/llc_station.c b/net/llc/llc_station.c index 204a8351efff..c29170e767a8 100644 --- a/net/llc/llc_station.c +++ b/net/llc/llc_station.c @@ -32,7 +32,7 @@ static int llc_stat_ev_rx_null_dsap_xid_c(struct sk_buff *skb) return LLC_PDU_IS_CMD(pdu) && /* command PDU */ LLC_PDU_TYPE_IS_U(pdu) && /* U type PDU */ LLC_U_PDU_CMD(pdu) == LLC_1_PDU_CMD_XID && - !pdu->dsap ? 0 : 1; /* NULL DSAP value */ + !pdu->dsap; /* NULL DSAP value */ } static int llc_stat_ev_rx_null_dsap_test_c(struct sk_buff *skb) @@ -42,7 +42,7 @@ static int llc_stat_ev_rx_null_dsap_test_c(struct sk_buff *skb) return LLC_PDU_IS_CMD(pdu) && /* command PDU */ LLC_PDU_TYPE_IS_U(pdu) && /* U type PDU */ LLC_U_PDU_CMD(pdu) == LLC_1_PDU_CMD_TEST && - !pdu->dsap ? 0 : 1; /* NULL DSAP */ + !pdu->dsap; /* NULL DSAP */ } static int llc_station_ac_send_xid_r(struct sk_buff *skb) diff --git a/net/mac80211/agg-tx.c b/net/mac80211/agg-tx.c index 197947a07f83..ed57db9b6086 100644 --- a/net/mac80211/agg-tx.c +++ b/net/mac80211/agg-tx.c @@ -8,7 +8,7 @@ * Copyright 2007, Michael Wu <flamingice@sourmilk.net> * Copyright 2007-2010, Intel Corporation * Copyright(c) 2015-2017 Intel Deutschland GmbH - * Copyright (C) 2018 Intel Corporation + * Copyright (C) 2018 - 2019 Intel Corporation * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as @@ -361,6 +361,8 @@ int ___ieee80211_stop_tx_ba_session(struct sta_info *sta, u16 tid, set_bit(HT_AGG_STATE_STOPPING, &tid_tx->state); + ieee80211_agg_stop_txq(sta, tid); + spin_unlock_bh(&sta->lock); ht_dbg(sta->sdata, "Tx BA session stop requested for %pM tid %u\n", diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c index 63558335e41e..8168c667d91d 100644 --- a/net/mac80211/cfg.c +++ b/net/mac80211/cfg.c @@ -884,6 +884,7 @@ static int ieee80211_start_ap(struct wiphy *wiphy, struct net_device *dev, BSS_CHANGED_P2P_PS | BSS_CHANGED_TXPOWER; int err; + int prev_beacon_int; old = sdata_dereference(sdata->u.ap.beacon, sdata); if (old) @@ -906,6 +907,7 @@ static int ieee80211_start_ap(struct wiphy *wiphy, struct net_device *dev, sdata->needed_rx_chains = sdata->local->rx_chains; + prev_beacon_int = sdata->vif.bss_conf.beacon_int; sdata->vif.bss_conf.beacon_int = params->beacon_interval; mutex_lock(&local->mtx); @@ -914,8 +916,10 @@ static int ieee80211_start_ap(struct wiphy *wiphy, struct net_device *dev, if (!err) ieee80211_vif_copy_chanctx_to_vlans(sdata, false); mutex_unlock(&local->mtx); - if (err) + if (err) { + sdata->vif.bss_conf.beacon_int = prev_beacon_int; return err; + } /* * Apply control port protocol, this allows us to @@ -1455,6 +1459,11 @@ static int ieee80211_add_station(struct wiphy *wiphy, struct net_device *dev, if (is_multicast_ether_addr(mac)) return -EINVAL; + if (params->sta_flags_set & BIT(NL80211_STA_FLAG_TDLS_PEER) && + sdata->vif.type == NL80211_IFTYPE_STATION && + !sdata->u.mgd.associated) + return -EINVAL; + sta = sta_info_alloc(sdata, mac, GFP_KERNEL); if (!sta) return -ENOMEM; diff --git a/net/mac80211/debugfs_netdev.c b/net/mac80211/debugfs_netdev.c index c813207bb123..928b6b0464b8 100644 --- a/net/mac80211/debugfs_netdev.c +++ b/net/mac80211/debugfs_netdev.c @@ -490,9 +490,14 @@ static ssize_t ieee80211_if_fmt_aqm( const struct ieee80211_sub_if_data *sdata, char *buf, int buflen) { struct ieee80211_local *local = sdata->local; - struct txq_info *txqi = to_txq_info(sdata->vif.txq); + struct txq_info *txqi; int len; + if (!sdata->vif.txq) + return 0; + + txqi = to_txq_info(sdata->vif.txq); + spin_lock_bh(&local->fq.lock); rcu_read_lock(); @@ -659,7 +664,9 @@ static void add_common_files(struct ieee80211_sub_if_data *sdata) DEBUGFS_ADD(rc_rateidx_vht_mcs_mask_5ghz); DEBUGFS_ADD(hw_queues); - if (sdata->local->ops->wake_tx_queue) + if (sdata->local->ops->wake_tx_queue && + sdata->vif.type != NL80211_IFTYPE_P2P_DEVICE && + sdata->vif.type != NL80211_IFTYPE_NAN) DEBUGFS_ADD(aqm); } diff --git a/net/mac80211/driver-ops.c b/net/mac80211/driver-ops.c index bb886e7db47f..f783d1377d9a 100644 --- a/net/mac80211/driver-ops.c +++ b/net/mac80211/driver-ops.c @@ -169,11 +169,16 @@ int drv_conf_tx(struct ieee80211_local *local, if (!check_sdata_in_driver(sdata)) return -EIO; - if (WARN_ONCE(params->cw_min == 0 || - params->cw_min > params->cw_max, - "%s: invalid CW_min/CW_max: %d/%d\n", - sdata->name, params->cw_min, params->cw_max)) + if (params->cw_min == 0 || params->cw_min > params->cw_max) { + /* + * If we can't configure hardware anyway, don't warn. We may + * never have initialized the CW parameters. + */ + WARN_ONCE(local->ops->conf_tx, + "%s: invalid CW_min/CW_max: %d/%d\n", + sdata->name, params->cw_min, params->cw_max); return -EINVAL; + } trace_drv_conf_tx(local, sdata, ac, params); if (local->ops->conf_tx) diff --git a/net/mac80211/driver-ops.h b/net/mac80211/driver-ops.h index 4d82fe7d627c..284276b3e0b4 100644 --- a/net/mac80211/driver-ops.h +++ b/net/mac80211/driver-ops.h @@ -1164,6 +1164,9 @@ static inline void drv_wake_tx_queue(struct ieee80211_local *local, { struct ieee80211_sub_if_data *sdata = vif_to_sdata(txq->txq.vif); + if (local->in_reconfig) + return; + if (!check_sdata_in_driver(sdata)) return; diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h index 894937bcd479..0e209a88d88a 100644 --- a/net/mac80211/ieee80211_i.h +++ b/net/mac80211/ieee80211_i.h @@ -1405,7 +1405,7 @@ ieee80211_get_sband(struct ieee80211_sub_if_data *sdata) rcu_read_lock(); chanctx_conf = rcu_dereference(sdata->vif.chanctx_conf); - if (WARN_ON(!chanctx_conf)) { + if (WARN_ON_ONCE(!chanctx_conf)) { rcu_read_unlock(); return NULL; } @@ -2150,6 +2150,9 @@ void ieee80211_tdls_cancel_channel_switch(struct wiphy *wiphy, const u8 *addr); void ieee80211_teardown_tdls_peers(struct ieee80211_sub_if_data *sdata); void ieee80211_tdls_chsw_work(struct work_struct *wk); +void ieee80211_tdls_handle_disconnect(struct ieee80211_sub_if_data *sdata, + const u8 *peer, u16 reason); +const char *ieee80211_get_reason_code_string(u16 reason_code); extern const struct ethtool_ops ieee80211_ethtool_ops; diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c index 222c063244f5..6ce13e976b7a 100644 --- a/net/mac80211/iface.c +++ b/net/mac80211/iface.c @@ -1924,6 +1924,9 @@ void ieee80211_if_remove(struct ieee80211_sub_if_data *sdata) list_del_rcu(&sdata->list); mutex_unlock(&sdata->local->iflist_mtx); + if (sdata->vif.txq) + ieee80211_txq_purge(sdata->local, to_txq_info(sdata->vif.txq)); + synchronize_rcu(); if (sdata->dev) { diff --git a/net/mac80211/mesh.c b/net/mac80211/mesh.c index 96e57d7c2872..c6edae051e9b 100644 --- a/net/mac80211/mesh.c +++ b/net/mac80211/mesh.c @@ -922,6 +922,7 @@ void ieee80211_stop_mesh(struct ieee80211_sub_if_data *sdata) /* flush STAs and mpaths on this iface */ sta_info_flush(sdata); + ieee80211_free_keys(sdata, true); mesh_path_flush_by_iface(sdata); /* stop the beacon */ @@ -1209,7 +1210,8 @@ int ieee80211_mesh_finish_csa(struct ieee80211_sub_if_data *sdata) ifmsh->chsw_ttl = 0; /* Remove the CSA and MCSP elements from the beacon */ - tmp_csa_settings = rcu_dereference(ifmsh->csa); + tmp_csa_settings = rcu_dereference_protected(ifmsh->csa, + lockdep_is_held(&sdata->wdev.mtx)); RCU_INIT_POINTER(ifmsh->csa, NULL); if (tmp_csa_settings) kfree_rcu(tmp_csa_settings, rcu_head); @@ -1231,6 +1233,8 @@ int ieee80211_mesh_csa_beacon(struct ieee80211_sub_if_data *sdata, struct mesh_csa_settings *tmp_csa_settings; int ret = 0; + lockdep_assert_held(&sdata->wdev.mtx); + tmp_csa_settings = kmalloc(sizeof(*tmp_csa_settings), GFP_ATOMIC); if (!tmp_csa_settings) diff --git a/net/mac80211/mesh_pathtbl.c b/net/mac80211/mesh_pathtbl.c index 97269caafecd..130022091205 100644 --- a/net/mac80211/mesh_pathtbl.c +++ b/net/mac80211/mesh_pathtbl.c @@ -23,7 +23,7 @@ static void mesh_path_free_rcu(struct mesh_table *tbl, struct mesh_path *mpath); static u32 mesh_table_hash(const void *addr, u32 len, u32 seed) { /* Use last four bytes of hw addr as hash index */ - return jhash_1word(*(u32 *)(addr+2), seed); + return jhash_1word(__get_unaligned_cpu32((u8 *)addr + 2), seed); } static const struct rhashtable_params mesh_rht_params = { @@ -448,17 +448,15 @@ struct mesh_path *mesh_path_add(struct ieee80211_sub_if_data *sdata, } while (unlikely(ret == -EEXIST && !mpath)); - if (ret && ret != -EEXIST) - return ERR_PTR(ret); - - /* At this point either new_mpath was added, or we found a - * matching entry already in the table; in the latter case - * free the unnecessary new entry. - */ - if (ret == -EEXIST) { + if (ret) { kfree(new_mpath); + + if (ret != -EEXIST) + return ERR_PTR(ret); + new_mpath = mpath; } + sdata->u.mesh.mesh_paths_generation++; return new_mpath; } @@ -488,6 +486,9 @@ int mpp_path_add(struct ieee80211_sub_if_data *sdata, &new_mpath->rhash, mesh_rht_params); + if (ret) + kfree(new_mpath); + sdata->u.mesh.mpp_paths_generation++; return ret; } diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index 4c59b5507e7a..36bd59ff49c4 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -1071,9 +1071,6 @@ static void ieee80211_chswitch_work(struct work_struct *work) goto out; } - /* XXX: shouldn't really modify cfg80211-owned data! */ - ifmgd->associated->channel = sdata->csa_chandef.chan; - ifmgd->csa_waiting_bcn = true; ieee80211_sta_reset_beacon_monitor(sdata); @@ -1870,6 +1867,16 @@ static bool ieee80211_sta_wmm_params(struct ieee80211_local *local, } } + /* WMM specification requires all 4 ACIs. */ + for (ac = 0; ac < IEEE80211_NUM_ACS; ac++) { + if (params[ac].cw_min == 0) { + sdata_info(sdata, + "AP has invalid WMM params (missing AC %d), using defaults\n", + ac); + return false; + } + } + for (ac = 0; ac < IEEE80211_NUM_ACS; ac++) { mlme_dbg(sdata, "WMM AC=%d acm=%d aifs=%d cWmin=%d cWmax=%d txop=%d uapsd=%d, downgraded=%d\n", @@ -2423,7 +2430,8 @@ struct sk_buff *ieee80211_ap_probereq_get(struct ieee80211_hw *hw, rcu_read_lock(); ssid = ieee80211_bss_get_ie(cbss, WLAN_EID_SSID); - if (WARN_ON_ONCE(ssid == NULL)) + if (WARN_ONCE(!ssid || ssid[1] > IEEE80211_MAX_SSID_LEN, + "invalid SSID element (len=%d)", ssid ? ssid[1] : -1)) ssid_len = 0; else ssid_len = ssid[1]; @@ -2747,7 +2755,7 @@ static void ieee80211_rx_mgmt_auth(struct ieee80211_sub_if_data *sdata, #define case_WLAN(type) \ case WLAN_REASON_##type: return #type -static const char *ieee80211_get_reason_code_string(u16 reason_code) +const char *ieee80211_get_reason_code_string(u16 reason_code) { switch (reason_code) { case_WLAN(UNSPECIFIED); @@ -2812,6 +2820,11 @@ static void ieee80211_rx_mgmt_deauth(struct ieee80211_sub_if_data *sdata, if (len < 24 + 2) return; + if (!ether_addr_equal(mgmt->bssid, mgmt->sa)) { + ieee80211_tdls_handle_disconnect(sdata, mgmt->sa, reason_code); + return; + } + if (ifmgd->associated && ether_addr_equal(mgmt->bssid, ifmgd->associated->bssid)) { const u8 *bssid = ifmgd->associated->bssid; @@ -2861,6 +2874,11 @@ static void ieee80211_rx_mgmt_disassoc(struct ieee80211_sub_if_data *sdata, reason_code = le16_to_cpu(mgmt->u.disassoc.reason_code); + if (!ether_addr_equal(mgmt->bssid, mgmt->sa)) { + ieee80211_tdls_handle_disconnect(sdata, mgmt->sa, reason_code); + return; + } + sdata_info(sdata, "disassociated from %pM (Reason: %u=%s)\n", mgmt->sa, reason_code, ieee80211_get_reason_code_string(reason_code)); @@ -4739,7 +4757,7 @@ int ieee80211_mgd_assoc(struct ieee80211_sub_if_data *sdata, rcu_read_lock(); ssidie = ieee80211_bss_get_ie(req->bss, WLAN_EID_SSID); - if (!ssidie) { + if (!ssidie || ssidie[1] > sizeof(assoc_data->ssid)) { rcu_read_unlock(); kfree(assoc_data); return -EINVAL; diff --git a/net/mac80211/rc80211_minstrel_ht.c b/net/mac80211/rc80211_minstrel_ht.c index 4a5bdad9f303..e57811e4b91f 100644 --- a/net/mac80211/rc80211_minstrel_ht.c +++ b/net/mac80211/rc80211_minstrel_ht.c @@ -129,7 +129,7 @@ #define CCK_GROUP \ [MINSTREL_CCK_GROUP] = { \ - .streams = 0, \ + .streams = 1, \ .flags = 0, \ .duration = { \ CCK_DURATION_LIST(false), \ @@ -282,7 +282,8 @@ minstrel_ht_get_stats(struct minstrel_priv *mp, struct minstrel_ht_sta *mi, break; /* short preamble */ - if (!(mi->supported[group] & BIT(idx))) + if ((mi->supported[group] & BIT(idx + 4)) && + (rate->flags & IEEE80211_TX_RC_USE_SHORT_PREAMBLE)) idx += 4; } return &mi->groups[group].rates[idx]; @@ -1077,18 +1078,23 @@ minstrel_ht_get_rate(void *priv, struct ieee80211_sta *sta, void *priv_sta, return; sample_group = &minstrel_mcs_groups[sample_idx / MCS_GROUP_RATES]; + sample_idx %= MCS_GROUP_RATES; + + if (sample_group == &minstrel_mcs_groups[MINSTREL_CCK_GROUP] && + (sample_idx >= 4) != txrc->short_preamble) + return; + info->flags |= IEEE80211_TX_CTL_RATE_CTRL_PROBE; rate->count = 1; - if (sample_idx / MCS_GROUP_RATES == MINSTREL_CCK_GROUP) { + if (sample_group == &minstrel_mcs_groups[MINSTREL_CCK_GROUP]) { int idx = sample_idx % ARRAY_SIZE(mp->cck_rates); rate->idx = mp->cck_rates[idx]; } else if (sample_group->flags & IEEE80211_TX_RC_VHT_MCS) { ieee80211_rate_set_vht(rate, sample_idx % MCS_GROUP_RATES, sample_group->streams); } else { - rate->idx = sample_idx % MCS_GROUP_RATES + - (sample_group->streams - 1) * 8; + rate->idx = sample_idx + (sample_group->streams - 1) * 8; } rate->flags = sample_group->flags; @@ -1132,7 +1138,6 @@ minstrel_ht_update_caps(void *priv, struct ieee80211_supported_band *sband, struct ieee80211_mcs_info *mcs = &sta->ht_cap.mcs; u16 sta_cap = sta->ht_cap.cap; struct ieee80211_sta_vht_cap *vht_cap = &sta->vht_cap; - struct sta_info *sinfo = container_of(sta, struct sta_info, sta); int use_vht; int n_supported = 0; int ack_dur; @@ -1258,8 +1263,7 @@ minstrel_ht_update_caps(void *priv, struct ieee80211_supported_band *sband, if (!n_supported) goto use_legacy; - if (test_sta_flag(sinfo, WLAN_STA_SHORT_PREAMBLE)) - mi->cck_supported_short |= mi->cck_supported_short << 4; + mi->supported[MINSTREL_CCK_GROUP] |= mi->cck_supported_short << 4; /* create an initial rate table with the lowest supported rates */ minstrel_ht_update_stats(mp, mi); diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c index 9e19ddbcb06e..4a6b3c7b35e3 100644 --- a/net/mac80211/rx.c +++ b/net/mac80211/rx.c @@ -141,6 +141,9 @@ ieee80211_rx_radiotap_hdrlen(struct ieee80211_local *local, /* allocate extra bitmaps */ if (status->chains) len += 4 * hweight8(status->chains); + /* vendor presence bitmap */ + if (status->flag & RX_FLAG_RADIOTAP_VENDOR_DATA) + len += 4; if (ieee80211_have_rx_timestamp(status)) { len = ALIGN(len, 8); @@ -182,8 +185,6 @@ ieee80211_rx_radiotap_hdrlen(struct ieee80211_local *local, if (status->flag & RX_FLAG_RADIOTAP_VENDOR_DATA) { struct ieee80211_vendor_radiotap *rtap = (void *)skb->data; - /* vendor presence bitmap */ - len += 4; /* alignment for fixed 6-byte vendor data header */ len = ALIGN(len, 2); /* vendor data header */ @@ -205,7 +206,7 @@ static void ieee80211_handle_mu_mimo_mon(struct ieee80211_sub_if_data *sdata, struct ieee80211_hdr_3addr hdr; u8 category; u8 action_code; - } __packed action; + } __packed __aligned(2) action; if (!sdata) return; @@ -2532,7 +2533,9 @@ ieee80211_rx_h_mesh_fwding(struct ieee80211_rx_data *rx) skb_set_queue_mapping(skb, q); if (!--mesh_hdr->ttl) { - IEEE80211_IFSTA_MESH_CTR_INC(ifmsh, dropped_frames_ttl); + if (!is_multicast_ether_addr(hdr->addr1)) + IEEE80211_IFSTA_MESH_CTR_INC(ifmsh, + dropped_frames_ttl); goto out; } @@ -3586,6 +3589,8 @@ static bool ieee80211_accept_frame(struct ieee80211_rx_data *rx) case NL80211_IFTYPE_STATION: if (!bssid && !sdata->u.mgd.use_4addr) return false; + if (ieee80211_is_robust_mgmt_frame(skb) && !rx->sta) + return false; if (multicast) return true; return ether_addr_equal(sdata->vif.addr, hdr->addr1); diff --git a/net/mac80211/sta_info.c b/net/mac80211/sta_info.c index f1b496222bda..1a86974b02e3 100644 --- a/net/mac80211/sta_info.c +++ b/net/mac80211/sta_info.c @@ -2313,7 +2313,8 @@ unsigned long ieee80211_sta_last_active(struct sta_info *sta) { struct ieee80211_sta_rx_stats *stats = sta_get_last_rx_stats(sta); - if (time_after(stats->last_rx, sta->status_stats.last_ack)) + if (!sta->status_stats.last_ack || + time_after(stats->last_rx, sta->status_stats.last_ack)) return stats->last_rx; return sta->status_stats.last_ack; } diff --git a/net/mac80211/status.c b/net/mac80211/status.c index b18466cf466c..fbe7354aeac7 100644 --- a/net/mac80211/status.c +++ b/net/mac80211/status.c @@ -856,7 +856,8 @@ static void __ieee80211_tx_status(struct ieee80211_hw *hw, I802_DEBUG_INC(local->dot11FailedCount); } - if (ieee80211_is_nullfunc(fc) && ieee80211_has_pm(fc) && + if ((ieee80211_is_nullfunc(fc) || ieee80211_is_qos_nullfunc(fc)) && + ieee80211_has_pm(fc) && ieee80211_hw_check(&local->hw, REPORTS_TX_ACK_STATUS) && !(info->flags & IEEE80211_TX_CTL_INJECTED) && local->ps_sdata && !(local->scanning)) { diff --git a/net/mac80211/tdls.c b/net/mac80211/tdls.c index 6e7aa65cf345..7a32b6820397 100644 --- a/net/mac80211/tdls.c +++ b/net/mac80211/tdls.c @@ -1988,3 +1988,26 @@ void ieee80211_tdls_chsw_work(struct work_struct *wk) } rtnl_unlock(); } + +void ieee80211_tdls_handle_disconnect(struct ieee80211_sub_if_data *sdata, + const u8 *peer, u16 reason) +{ + struct ieee80211_sta *sta; + + rcu_read_lock(); + sta = ieee80211_find_sta(&sdata->vif, peer); + if (!sta || !sta->tdls) { + rcu_read_unlock(); + return; + } + rcu_read_unlock(); + + tdls_dbg(sdata, "disconnected from TDLS peer %pM (Reason: %u=%s)\n", + peer, reason, + ieee80211_get_reason_code_string(reason)); + + ieee80211_tdls_oper_request(&sdata->vif, peer, + NL80211_TDLS_TEARDOWN, + WLAN_REASON_TDLS_TEARDOWN_UNREACHABLE, + GFP_ATOMIC); +} diff --git a/net/mac80211/trace_msg.h b/net/mac80211/trace_msg.h index 366b9e6f043e..40141df09f25 100644 --- a/net/mac80211/trace_msg.h +++ b/net/mac80211/trace_msg.h @@ -1,4 +1,9 @@ /* SPDX-License-Identifier: GPL-2.0 */ +/* + * Portions of this file + * Copyright (C) 2019 Intel Corporation + */ + #ifdef CONFIG_MAC80211_MESSAGE_TRACING #if !defined(__MAC80211_MSG_DRIVER_TRACE) || defined(TRACE_HEADER_MULTI_READ) @@ -11,7 +16,7 @@ #undef TRACE_SYSTEM #define TRACE_SYSTEM mac80211_msg -#define MAX_MSG_LEN 100 +#define MAX_MSG_LEN 120 DECLARE_EVENT_CLASS(mac80211_msg_event, TP_PROTO(struct va_format *vaf), diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c index 6b9bf9c027a2..09c7aa519ca8 100644 --- a/net/mac80211/tx.c +++ b/net/mac80211/tx.c @@ -1856,9 +1856,16 @@ static int ieee80211_skb_resize(struct ieee80211_sub_if_data *sdata, int head_need, bool may_encrypt) { struct ieee80211_local *local = sdata->local; + struct ieee80211_hdr *hdr; + bool enc_tailroom; int tail_need = 0; - if (may_encrypt && sdata->crypto_tx_tailroom_needed_cnt) { + hdr = (struct ieee80211_hdr *) skb->data; + enc_tailroom = may_encrypt && + (sdata->crypto_tx_tailroom_needed_cnt || + ieee80211_is_mgmt(hdr->frame_control)); + + if (enc_tailroom) { tail_need = IEEE80211_ENCRYPT_TAILROOM; tail_need -= skb_tailroom(skb); tail_need = max_t(int, tail_need, 0); @@ -1866,8 +1873,7 @@ static int ieee80211_skb_resize(struct ieee80211_sub_if_data *sdata, if (skb_cloned(skb) && (!ieee80211_hw_check(&local->hw, SUPPORTS_CLONED_SKBS) || - !skb_clone_writable(skb, ETH_HLEN) || - (may_encrypt && sdata->crypto_tx_tailroom_needed_cnt))) + !skb_clone_writable(skb, ETH_HLEN) || enc_tailroom)) I802_DEBUG_INC(local->tx_expand_skb_head_cloned); else if (head_need || tail_need) I802_DEBUG_INC(local->tx_expand_skb_head); @@ -3119,6 +3125,7 @@ static bool ieee80211_amsdu_aggregate(struct ieee80211_sub_if_data *sdata, u8 max_subframes = sta->sta.max_amsdu_subframes; int max_frags = local->hw.max_tx_fragments; int max_amsdu_len = sta->sta.max_amsdu_len; + int orig_truesize; __be16 len; void *data; bool ret = false; @@ -3152,6 +3159,7 @@ static bool ieee80211_amsdu_aggregate(struct ieee80211_sub_if_data *sdata, if (!head) goto out; + orig_truesize = head->truesize; orig_len = head->len; if (skb->len + head->len > max_amsdu_len) @@ -3206,6 +3214,7 @@ static bool ieee80211_amsdu_aggregate(struct ieee80211_sub_if_data *sdata, *frag_tail = skb; out_recalc: + fq->memory_usage += head->truesize - orig_truesize; if (head->len != orig_len) { flow->backlog += head->len - orig_len; tin->backlog_bytes += head->len - orig_len; diff --git a/net/mac80211/wpa.c b/net/mac80211/wpa.c index 0d722ea98a1b..e3bb69ba6887 100644 --- a/net/mac80211/wpa.c +++ b/net/mac80211/wpa.c @@ -1169,7 +1169,7 @@ ieee80211_crypto_aes_gmac_decrypt(struct ieee80211_rx_data *rx) struct ieee80211_rx_status *status = IEEE80211_SKB_RXCB(skb); struct ieee80211_key *key = rx->key; struct ieee80211_mmie_16 *mmie; - u8 aad[GMAC_AAD_LEN], mic[GMAC_MIC_LEN], ipn[6], nonce[GMAC_NONCE_LEN]; + u8 aad[GMAC_AAD_LEN], *mic, ipn[6], nonce[GMAC_NONCE_LEN]; struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)skb->data; if (!ieee80211_is_mgmt(hdr->frame_control)) @@ -1200,13 +1200,18 @@ ieee80211_crypto_aes_gmac_decrypt(struct ieee80211_rx_data *rx) memcpy(nonce, hdr->addr2, ETH_ALEN); memcpy(nonce + ETH_ALEN, ipn, 6); + mic = kmalloc(GMAC_MIC_LEN, GFP_ATOMIC); + if (!mic) + return RX_DROP_UNUSABLE; if (ieee80211_aes_gmac(key->u.aes_gmac.tfm, aad, nonce, skb->data + 24, skb->len - 24, mic) < 0 || crypto_memneq(mic, mmie->mic, sizeof(mmie->mic))) { key->u.aes_gmac.icverrors++; + kfree(mic); return RX_DROP_UNUSABLE; } + kfree(mic); } memcpy(key->u.aes_gmac.rx_pn, ipn, 6); diff --git a/net/mpls/af_mpls.c b/net/mpls/af_mpls.c index aee385eb72e7..9a153f64b8d7 100644 --- a/net/mpls/af_mpls.c +++ b/net/mpls/af_mpls.c @@ -1787,6 +1787,9 @@ static int rtm_to_route_config(struct sk_buff *skb, goto errout; break; } + case RTA_GATEWAY: + NL_SET_ERR_MSG(extack, "MPLS does not support RTA_GATEWAY attribute"); + goto errout; case RTA_VIA: { if (nla_get_via(nla, &cfg->rc_via_alen, diff --git a/net/netfilter/ipset/ip_set_core.c b/net/netfilter/ipset/ip_set_core.c index a3f1dc7cf538..94d74ec61f42 100644 --- a/net/netfilter/ipset/ip_set_core.c +++ b/net/netfilter/ipset/ip_set_core.c @@ -1128,7 +1128,7 @@ static int ip_set_rename(struct net *net, struct sock *ctnl, return -ENOENT; write_lock_bh(&ip_set_ref_lock); - if (set->ref != 0) { + if (set->ref != 0 || set->ref_netlink != 0) { ret = -IPSET_ERR_REFERENCED; goto out; } @@ -1950,8 +1950,9 @@ ip_set_sockfn_get(struct sock *sk, int optval, void __user *user, int *len) } req_version->version = IPSET_PROTOCOL; - ret = copy_to_user(user, req_version, - sizeof(struct ip_set_req_version)); + if (copy_to_user(user, req_version, + sizeof(struct ip_set_req_version))) + ret = -EFAULT; goto done; } case IP_SET_OP_GET_BYNAME: { @@ -2008,7 +2009,8 @@ ip_set_sockfn_get(struct sock *sk, int optval, void __user *user, int *len) } /* end of switch(op) */ copy: - ret = copy_to_user(user, data, copylen); + if (copy_to_user(user, data, copylen)) + ret = -EFAULT; done: vfree(data); diff --git a/net/netfilter/ipset/ip_set_hash_gen.h b/net/netfilter/ipset/ip_set_hash_gen.h index dfd268166e42..42d9cd22447e 100644 --- a/net/netfilter/ipset/ip_set_hash_gen.h +++ b/net/netfilter/ipset/ip_set_hash_gen.h @@ -624,7 +624,7 @@ retry: goto cleanup; } m->size = AHASH_INIT_SIZE; - extsize = ext_size(AHASH_INIT_SIZE, dsize); + extsize += ext_size(AHASH_INIT_SIZE, dsize); RCU_INIT_POINTER(hbucket(t, key), m); } else if (m->pos >= m->size) { struct hbucket *ht; diff --git a/net/netfilter/ipvs/Kconfig b/net/netfilter/ipvs/Kconfig index b32fb0dbe237..3f8e490d1133 100644 --- a/net/netfilter/ipvs/Kconfig +++ b/net/netfilter/ipvs/Kconfig @@ -29,6 +29,7 @@ config IP_VS_IPV6 bool "IPv6 support for IPVS" depends on IPV6 = y || IP_VS = IPV6 select IP6_NF_IPTABLES + select NF_DEFRAG_IPV6 ---help--- Add IPv6 support to IPVS. diff --git a/net/netfilter/ipvs/ip_vs_app.c b/net/netfilter/ipvs/ip_vs_app.c index 299edc6add5a..363475b246f6 100644 --- a/net/netfilter/ipvs/ip_vs_app.c +++ b/net/netfilter/ipvs/ip_vs_app.c @@ -198,21 +198,29 @@ struct ip_vs_app *register_ip_vs_app(struct netns_ipvs *ipvs, struct ip_vs_app * mutex_lock(&__ip_vs_app_mutex); + /* increase the module use count */ + if (!ip_vs_use_count_inc()) { + err = -ENOENT; + goto out_unlock; + } + list_for_each_entry(a, &ipvs->app_list, a_list) { if (!strcmp(app->name, a->name)) { err = -EEXIST; + /* decrease the module use count */ + ip_vs_use_count_dec(); goto out_unlock; } } a = kmemdup(app, sizeof(*app), GFP_KERNEL); if (!a) { err = -ENOMEM; + /* decrease the module use count */ + ip_vs_use_count_dec(); goto out_unlock; } INIT_LIST_HEAD(&a->incs_list); list_add(&a->a_list, &ipvs->app_list); - /* increase the module use count */ - ip_vs_use_count_inc(); out_unlock: mutex_unlock(&__ip_vs_app_mutex); diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c index 1bd53b1e7672..2156571455db 100644 --- a/net/netfilter/ipvs/ip_vs_core.c +++ b/net/netfilter/ipvs/ip_vs_core.c @@ -1524,14 +1524,12 @@ ip_vs_try_to_schedule(struct netns_ipvs *ipvs, int af, struct sk_buff *skb, /* sorry, all this trouble for a no-hit :) */ IP_VS_DBG_PKT(12, af, pp, skb, iph->off, "ip_vs_in: packet continues traversal as normal"); - if (iph->fragoffs) { - /* Fragment that couldn't be mapped to a conn entry - * is missing module nf_defrag_ipv6 - */ - IP_VS_DBG_RL("Unhandled frag, load nf_defrag_ipv6\n"); + + /* Fragment couldn't be mapped to a conn entry */ + if (iph->fragoffs) IP_VS_DBG_PKT(7, af, pp, skb, iph->off, "unhandled fragment"); - } + *verdict = NF_ACCEPT; return 0; } @@ -1637,7 +1635,7 @@ ip_vs_in_icmp(struct netns_ipvs *ipvs, struct sk_buff *skb, int *related, if (!cp) { int v; - if (!sysctl_schedule_icmp(ipvs)) + if (ipip || !sysctl_schedule_icmp(ipvs)) return NF_ACCEPT; if (!ip_vs_try_to_schedule(ipvs, AF_INET, skb, pd, &v, &cp, &ciph)) @@ -2208,7 +2206,6 @@ static const struct nf_hook_ops ip_vs_ops[] = { static int __net_init __ip_vs_init(struct net *net) { struct netns_ipvs *ipvs; - int ret; ipvs = net_generic(net, ip_vs_net_id); if (ipvs == NULL) @@ -2240,17 +2237,11 @@ static int __net_init __ip_vs_init(struct net *net) if (ip_vs_sync_net_init(ipvs) < 0) goto sync_fail; - ret = nf_register_net_hooks(net, ip_vs_ops, ARRAY_SIZE(ip_vs_ops)); - if (ret < 0) - goto hook_fail; - return 0; /* * Error handling */ -hook_fail: - ip_vs_sync_net_cleanup(ipvs); sync_fail: ip_vs_conn_net_cleanup(ipvs); conn_fail: @@ -2270,7 +2261,6 @@ static void __net_exit __ip_vs_cleanup(struct net *net) { struct netns_ipvs *ipvs = net_ipvs(net); - nf_unregister_net_hooks(net, ip_vs_ops, ARRAY_SIZE(ip_vs_ops)); ip_vs_service_net_cleanup(ipvs); /* ip_vs_flush() with locks */ ip_vs_conn_net_cleanup(ipvs); ip_vs_app_net_cleanup(ipvs); @@ -2281,10 +2271,24 @@ static void __net_exit __ip_vs_cleanup(struct net *net) net->ipvs = NULL; } +static int __net_init __ip_vs_dev_init(struct net *net) +{ + int ret; + + ret = nf_register_net_hooks(net, ip_vs_ops, ARRAY_SIZE(ip_vs_ops)); + if (ret < 0) + goto hook_fail; + return 0; + +hook_fail: + return ret; +} + static void __net_exit __ip_vs_dev_cleanup(struct net *net) { struct netns_ipvs *ipvs = net_ipvs(net); EnterFunction(2); + nf_unregister_net_hooks(net, ip_vs_ops, ARRAY_SIZE(ip_vs_ops)); ipvs->enable = 0; /* Disable packet reception */ smp_wmb(); ip_vs_sync_net_cleanup(ipvs); @@ -2299,6 +2303,7 @@ static struct pernet_operations ipvs_core_ops = { }; static struct pernet_operations ipvs_core_dev_ops = { + .init = __ip_vs_dev_init, .exit = __ip_vs_dev_cleanup, }; diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c index 2f45c3ce77ef..5ec80818ace2 100644 --- a/net/netfilter/ipvs/ip_vs_ctl.c +++ b/net/netfilter/ipvs/ip_vs_ctl.c @@ -43,6 +43,7 @@ #ifdef CONFIG_IP_VS_IPV6 #include <net/ipv6.h> #include <net/ip6_route.h> +#include <net/netfilter/ipv6/nf_defrag_ipv6.h> #endif #include <net/route.h> #include <net/sock.h> @@ -97,7 +98,6 @@ static bool __ip_vs_addr_is_local_v6(struct net *net, static void update_defense_level(struct netns_ipvs *ipvs) { struct sysinfo i; - static int old_secure_tcp = 0; int availmem; int nomem; int to_change = -1; @@ -178,35 +178,35 @@ static void update_defense_level(struct netns_ipvs *ipvs) spin_lock(&ipvs->securetcp_lock); switch (ipvs->sysctl_secure_tcp) { case 0: - if (old_secure_tcp >= 2) + if (ipvs->old_secure_tcp >= 2) to_change = 0; break; case 1: if (nomem) { - if (old_secure_tcp < 2) + if (ipvs->old_secure_tcp < 2) to_change = 1; ipvs->sysctl_secure_tcp = 2; } else { - if (old_secure_tcp >= 2) + if (ipvs->old_secure_tcp >= 2) to_change = 0; } break; case 2: if (nomem) { - if (old_secure_tcp < 2) + if (ipvs->old_secure_tcp < 2) to_change = 1; } else { - if (old_secure_tcp >= 2) + if (ipvs->old_secure_tcp >= 2) to_change = 0; ipvs->sysctl_secure_tcp = 1; } break; case 3: - if (old_secure_tcp < 2) + if (ipvs->old_secure_tcp < 2) to_change = 1; break; } - old_secure_tcp = ipvs->sysctl_secure_tcp; + ipvs->old_secure_tcp = ipvs->sysctl_secure_tcp; if (to_change >= 0) ip_vs_protocol_timeout_change(ipvs, ipvs->sysctl_secure_tcp > 1); @@ -893,11 +893,17 @@ ip_vs_new_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest, #ifdef CONFIG_IP_VS_IPV6 if (udest->af == AF_INET6) { + int ret; + atype = ipv6_addr_type(&udest->addr.in6); if ((!(atype & IPV6_ADDR_UNICAST) || atype & IPV6_ADDR_LINKLOCAL) && !__ip_vs_addr_is_local_v6(svc->ipvs->net, &udest->addr.in6)) return -EINVAL; + + ret = nf_defrag_ipv6_enable(svc->ipvs->net); + if (ret) + return ret; } else #endif { @@ -1190,7 +1196,8 @@ ip_vs_add_service(struct netns_ipvs *ipvs, struct ip_vs_service_user_kern *u, struct ip_vs_service *svc = NULL; /* increase the module use count */ - ip_vs_use_count_inc(); + if (!ip_vs_use_count_inc()) + return -ENOPROTOOPT; /* Lookup the scheduler by 'u->sched_name' */ if (strcmp(u->sched_name, "none")) { @@ -1221,6 +1228,10 @@ ip_vs_add_service(struct netns_ipvs *ipvs, struct ip_vs_service_user_kern *u, ret = -EINVAL; goto out_err; } + + ret = nf_defrag_ipv6_enable(ipvs->net); + if (ret) + goto out_err; } #endif @@ -2253,6 +2264,18 @@ static int ip_vs_set_timeout(struct netns_ipvs *ipvs, struct ip_vs_timeout_user u->udp_timeout); #ifdef CONFIG_IP_VS_PROTO_TCP + if (u->tcp_timeout < 0 || u->tcp_timeout > (INT_MAX / HZ) || + u->tcp_fin_timeout < 0 || u->tcp_fin_timeout > (INT_MAX / HZ)) { + return -EINVAL; + } +#endif + +#ifdef CONFIG_IP_VS_PROTO_UDP + if (u->udp_timeout < 0 || u->udp_timeout > (INT_MAX / HZ)) + return -EINVAL; +#endif + +#ifdef CONFIG_IP_VS_PROTO_TCP if (u->tcp_timeout) { pd = ip_vs_proto_data_get(ipvs, IPPROTO_TCP); pd->timeout_table[IP_VS_TCP_S_ESTABLISHED] @@ -2372,9 +2395,6 @@ do_ip_vs_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len) if (copy_from_user(arg, user, len) != 0) return -EFAULT; - /* increase the module use count */ - ip_vs_use_count_inc(); - /* Handle daemons since they have another lock */ if (cmd == IP_VS_SO_SET_STARTDAEMON || cmd == IP_VS_SO_SET_STOPDAEMON) { @@ -2387,15 +2407,13 @@ do_ip_vs_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len) ret = -EINVAL; if (strscpy(cfg.mcast_ifn, dm->mcast_ifn, sizeof(cfg.mcast_ifn)) <= 0) - goto out_dec; + return ret; cfg.syncid = dm->syncid; ret = start_sync_thread(ipvs, &cfg, dm->state); } else { - mutex_lock(&ipvs->sync_mutex); ret = stop_sync_thread(ipvs, dm->state); - mutex_unlock(&ipvs->sync_mutex); } - goto out_dec; + return ret; } mutex_lock(&__ip_vs_mutex); @@ -2490,10 +2508,6 @@ do_ip_vs_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len) out_unlock: mutex_unlock(&__ip_vs_mutex); - out_dec: - /* decrease the module use count */ - ip_vs_use_count_dec(); - return ret; } @@ -3501,10 +3515,8 @@ static int ip_vs_genl_del_daemon(struct netns_ipvs *ipvs, struct nlattr **attrs) if (!attrs[IPVS_DAEMON_ATTR_STATE]) return -EINVAL; - mutex_lock(&ipvs->sync_mutex); ret = stop_sync_thread(ipvs, nla_get_u32(attrs[IPVS_DAEMON_ATTR_STATE])); - mutex_unlock(&ipvs->sync_mutex); return ret; } diff --git a/net/netfilter/ipvs/ip_vs_pe.c b/net/netfilter/ipvs/ip_vs_pe.c index 0df17caa8af6..714e7e05c102 100644 --- a/net/netfilter/ipvs/ip_vs_pe.c +++ b/net/netfilter/ipvs/ip_vs_pe.c @@ -67,7 +67,8 @@ int register_ip_vs_pe(struct ip_vs_pe *pe) struct ip_vs_pe *tmp; /* increase the module use count */ - ip_vs_use_count_inc(); + if (!ip_vs_use_count_inc()) + return -ENOENT; mutex_lock(&ip_vs_pe_mutex); /* Make sure that the pe with this name doesn't exist diff --git a/net/netfilter/ipvs/ip_vs_sched.c b/net/netfilter/ipvs/ip_vs_sched.c index a2ff7d746ebf..3bd0ff36dc41 100644 --- a/net/netfilter/ipvs/ip_vs_sched.c +++ b/net/netfilter/ipvs/ip_vs_sched.c @@ -184,7 +184,8 @@ int register_ip_vs_scheduler(struct ip_vs_scheduler *scheduler) } /* increase the module use count */ - ip_vs_use_count_inc(); + if (!ip_vs_use_count_inc()) + return -ENOENT; mutex_lock(&ip_vs_sched_mutex); diff --git a/net/netfilter/ipvs/ip_vs_sync.c b/net/netfilter/ipvs/ip_vs_sync.c index 5e07f7a6794e..b373e053ff9a 100644 --- a/net/netfilter/ipvs/ip_vs_sync.c +++ b/net/netfilter/ipvs/ip_vs_sync.c @@ -195,6 +195,7 @@ union ip_vs_sync_conn { #define IPVS_OPT_F_PARAM (1 << (IPVS_OPT_PARAM-1)) struct ip_vs_sync_thread_data { + struct task_struct *task; struct netns_ipvs *ipvs; struct socket *sock; char *buf; @@ -374,8 +375,11 @@ static inline void sb_queue_tail(struct netns_ipvs *ipvs, max(IPVS_SYNC_SEND_DELAY, 1)); ms->sync_queue_len++; list_add_tail(&sb->list, &ms->sync_queue); - if ((++ms->sync_queue_delay) == IPVS_SYNC_WAKEUP_RATE) - wake_up_process(ms->master_thread); + if ((++ms->sync_queue_delay) == IPVS_SYNC_WAKEUP_RATE) { + int id = (int)(ms - ipvs->ms); + + wake_up_process(ipvs->master_tinfo[id].task); + } } else ip_vs_sync_buff_release(sb); spin_unlock(&ipvs->sync_lock); @@ -1645,8 +1649,10 @@ static void master_wakeup_work_handler(struct work_struct *work) spin_lock_bh(&ipvs->sync_lock); if (ms->sync_queue_len && ms->sync_queue_delay < IPVS_SYNC_WAKEUP_RATE) { + int id = (int)(ms - ipvs->ms); + ms->sync_queue_delay = IPVS_SYNC_WAKEUP_RATE; - wake_up_process(ms->master_thread); + wake_up_process(ipvs->master_tinfo[id].task); } spin_unlock_bh(&ipvs->sync_lock); } @@ -1712,10 +1718,6 @@ done: if (sb) ip_vs_sync_buff_release(sb); - /* release the sending multicast socket */ - sock_release(tinfo->sock); - kfree(tinfo); - return 0; } @@ -1749,11 +1751,6 @@ static int sync_thread_backup(void *data) } } - /* release the sending multicast socket */ - sock_release(tinfo->sock); - kfree(tinfo->buf); - kfree(tinfo); - return 0; } @@ -1761,8 +1758,8 @@ static int sync_thread_backup(void *data) int start_sync_thread(struct netns_ipvs *ipvs, struct ipvs_sync_daemon_cfg *c, int state) { - struct ip_vs_sync_thread_data *tinfo = NULL; - struct task_struct **array = NULL, *task; + struct ip_vs_sync_thread_data *ti = NULL, *tinfo; + struct task_struct *task; struct net_device *dev; char *name; int (*threadfn)(void *data); @@ -1774,6 +1771,10 @@ int start_sync_thread(struct netns_ipvs *ipvs, struct ipvs_sync_daemon_cfg *c, IP_VS_DBG(7, "Each ip_vs_sync_conn entry needs %zd bytes\n", sizeof(struct ip_vs_sync_conn_v0)); + /* increase the module use count */ + if (!ip_vs_use_count_inc()) + return -ENOPROTOOPT; + /* Do not hold one mutex and then to block on another */ for (;;) { rtnl_lock(); @@ -1831,7 +1832,7 @@ int start_sync_thread(struct netns_ipvs *ipvs, struct ipvs_sync_daemon_cfg *c, threadfn = sync_thread_master; } else if (state == IP_VS_STATE_BACKUP) { result = -EEXIST; - if (ipvs->backup_threads) + if (ipvs->backup_tinfo) goto out_early; ipvs->bcfg = *c; @@ -1858,28 +1859,22 @@ int start_sync_thread(struct netns_ipvs *ipvs, struct ipvs_sync_daemon_cfg *c, master_wakeup_work_handler); ms->ipvs = ipvs; } - } else { - array = kcalloc(count, sizeof(struct task_struct *), - GFP_KERNEL); - result = -ENOMEM; - if (!array) - goto out; } + result = -ENOMEM; + ti = kcalloc(count, sizeof(struct ip_vs_sync_thread_data), + GFP_KERNEL); + if (!ti) + goto out; for (id = 0; id < count; id++) { - result = -ENOMEM; - tinfo = kmalloc(sizeof(*tinfo), GFP_KERNEL); - if (!tinfo) - goto out; + tinfo = &ti[id]; tinfo->ipvs = ipvs; - tinfo->sock = NULL; if (state == IP_VS_STATE_BACKUP) { + result = -ENOMEM; tinfo->buf = kmalloc(ipvs->bcfg.sync_maxlen, GFP_KERNEL); if (!tinfo->buf) goto out; - } else { - tinfo->buf = NULL; } tinfo->id = id; if (state == IP_VS_STATE_MASTER) @@ -1894,17 +1889,15 @@ int start_sync_thread(struct netns_ipvs *ipvs, struct ipvs_sync_daemon_cfg *c, result = PTR_ERR(task); goto out; } - tinfo = NULL; - if (state == IP_VS_STATE_MASTER) - ipvs->ms[id].master_thread = task; - else - array[id] = task; + tinfo->task = task; } /* mark as active */ - if (state == IP_VS_STATE_BACKUP) - ipvs->backup_threads = array; + if (state == IP_VS_STATE_MASTER) + ipvs->master_tinfo = ti; + else + ipvs->backup_tinfo = ti; spin_lock_bh(&ipvs->sync_buff_lock); ipvs->sync_state |= state; spin_unlock_bh(&ipvs->sync_buff_lock); @@ -1912,56 +1905,64 @@ int start_sync_thread(struct netns_ipvs *ipvs, struct ipvs_sync_daemon_cfg *c, mutex_unlock(&ipvs->sync_mutex); rtnl_unlock(); - /* increase the module use count */ - ip_vs_use_count_inc(); - return 0; out: /* We do not need RTNL lock anymore, release it here so that - * sock_release below and in the kthreads can use rtnl_lock - * to leave the mcast group. + * sock_release below can use rtnl_lock to leave the mcast group. */ rtnl_unlock(); - count = id; - while (count-- > 0) { - if (state == IP_VS_STATE_MASTER) - kthread_stop(ipvs->ms[count].master_thread); - else - kthread_stop(array[count]); + id = min(id, count - 1); + if (ti) { + for (tinfo = ti + id; tinfo >= ti; tinfo--) { + if (tinfo->task) + kthread_stop(tinfo->task); + } } if (!(ipvs->sync_state & IP_VS_STATE_MASTER)) { kfree(ipvs->ms); ipvs->ms = NULL; } mutex_unlock(&ipvs->sync_mutex); - if (tinfo) { - if (tinfo->sock) - sock_release(tinfo->sock); - kfree(tinfo->buf); - kfree(tinfo); + + /* No more mutexes, release socks */ + if (ti) { + for (tinfo = ti + id; tinfo >= ti; tinfo--) { + if (tinfo->sock) + sock_release(tinfo->sock); + kfree(tinfo->buf); + } + kfree(ti); } - kfree(array); + + /* decrease the module use count */ + ip_vs_use_count_dec(); return result; out_early: mutex_unlock(&ipvs->sync_mutex); rtnl_unlock(); + + /* decrease the module use count */ + ip_vs_use_count_dec(); return result; } int stop_sync_thread(struct netns_ipvs *ipvs, int state) { - struct task_struct **array; + struct ip_vs_sync_thread_data *ti, *tinfo; int id; int retc = -EINVAL; IP_VS_DBG(7, "%s(): pid %d\n", __func__, task_pid_nr(current)); + mutex_lock(&ipvs->sync_mutex); if (state == IP_VS_STATE_MASTER) { + retc = -ESRCH; if (!ipvs->ms) - return -ESRCH; + goto err; + ti = ipvs->master_tinfo; /* * The lock synchronizes with sb_queue_tail(), so that we don't @@ -1980,38 +1981,56 @@ int stop_sync_thread(struct netns_ipvs *ipvs, int state) struct ipvs_master_sync_state *ms = &ipvs->ms[id]; int ret; + tinfo = &ti[id]; pr_info("stopping master sync thread %d ...\n", - task_pid_nr(ms->master_thread)); + task_pid_nr(tinfo->task)); cancel_delayed_work_sync(&ms->master_wakeup_work); - ret = kthread_stop(ms->master_thread); + ret = kthread_stop(tinfo->task); if (retc >= 0) retc = ret; } kfree(ipvs->ms); ipvs->ms = NULL; + ipvs->master_tinfo = NULL; } else if (state == IP_VS_STATE_BACKUP) { - if (!ipvs->backup_threads) - return -ESRCH; + retc = -ESRCH; + if (!ipvs->backup_tinfo) + goto err; + ti = ipvs->backup_tinfo; ipvs->sync_state &= ~IP_VS_STATE_BACKUP; - array = ipvs->backup_threads; retc = 0; for (id = ipvs->threads_mask; id >= 0; id--) { int ret; + tinfo = &ti[id]; pr_info("stopping backup sync thread %d ...\n", - task_pid_nr(array[id])); - ret = kthread_stop(array[id]); + task_pid_nr(tinfo->task)); + ret = kthread_stop(tinfo->task); if (retc >= 0) retc = ret; } - kfree(array); - ipvs->backup_threads = NULL; + ipvs->backup_tinfo = NULL; + } else { + goto err; } + id = ipvs->threads_mask; + mutex_unlock(&ipvs->sync_mutex); + + /* No more mutexes, release socks */ + for (tinfo = ti + id; tinfo >= ti; tinfo--) { + if (tinfo->sock) + sock_release(tinfo->sock); + kfree(tinfo->buf); + } + kfree(ti); /* decrease the module use count */ ip_vs_use_count_dec(); + return retc; +err: + mutex_unlock(&ipvs->sync_mutex); return retc; } @@ -2030,7 +2049,6 @@ void ip_vs_sync_net_cleanup(struct netns_ipvs *ipvs) { int retc; - mutex_lock(&ipvs->sync_mutex); retc = stop_sync_thread(ipvs, IP_VS_STATE_MASTER); if (retc && retc != -ESRCH) pr_err("Failed to stop Master Daemon\n"); @@ -2038,5 +2056,4 @@ void ip_vs_sync_net_cleanup(struct netns_ipvs *ipvs) retc = stop_sync_thread(ipvs, IP_VS_STATE_BACKUP); if (retc && retc != -ESRCH) pr_err("Failed to stop Backup Daemon\n"); - mutex_unlock(&ipvs->sync_mutex); } diff --git a/net/netfilter/ipvs/ip_vs_xmit.c b/net/netfilter/ipvs/ip_vs_xmit.c index 4527921b1c3a..97d411033f8a 100644 --- a/net/netfilter/ipvs/ip_vs_xmit.c +++ b/net/netfilter/ipvs/ip_vs_xmit.c @@ -209,7 +209,7 @@ static inline void maybe_update_pmtu(int skb_af, struct sk_buff *skb, int mtu) struct rtable *ort = skb_rtable(skb); if (!skb->dev && sk && sk_fullsock(sk)) - ort->dst.ops->update_pmtu(&ort->dst, sk, NULL, mtu); + ort->dst.ops->update_pmtu(&ort->dst, sk, NULL, mtu, true); } static inline bool ensure_mtu_is_adequate(struct netns_ipvs *ipvs, int skb_af, diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index b793b55d1488..2e65271bed01 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c @@ -25,6 +25,7 @@ #include <linux/slab.h> #include <linux/random.h> #include <linux/jhash.h> +#include <linux/siphash.h> #include <linux/err.h> #include <linux/percpu.h> #include <linux/moduleparam.h> @@ -300,6 +301,40 @@ nf_ct_invert_tuple(struct nf_conntrack_tuple *inverse, } EXPORT_SYMBOL_GPL(nf_ct_invert_tuple); +/* Generate a almost-unique pseudo-id for a given conntrack. + * + * intentionally doesn't re-use any of the seeds used for hash + * table location, we assume id gets exposed to userspace. + * + * Following nf_conn items do not change throughout lifetime + * of the nf_conn: + * + * 1. nf_conn address + * 2. nf_conn->master address (normally NULL) + * 3. the associated net namespace + * 4. the original direction tuple + */ +u32 nf_ct_get_id(const struct nf_conn *ct) +{ + static __read_mostly siphash_key_t ct_id_seed; + unsigned long a, b, c, d; + + net_get_random_once(&ct_id_seed, sizeof(ct_id_seed)); + + a = (unsigned long)ct; + b = (unsigned long)ct->master; + c = (unsigned long)nf_ct_net(ct); + d = (unsigned long)siphash(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple, + sizeof(ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple), + &ct_id_seed); +#ifdef CONFIG_64BIT + return siphash_4u64((u64)a, (u64)b, (u64)c, (u64)d, &ct_id_seed); +#else + return siphash_4u32((u32)a, (u32)b, (u32)c, (u32)d, &ct_id_seed); +#endif +} +EXPORT_SYMBOL_GPL(nf_ct_get_id); + static void clean_from_lists(struct nf_conn *ct) { @@ -763,10 +798,18 @@ __nf_conntrack_confirm(struct sk_buff *skb) * REJECT will give spurious warnings here. */ - /* No external references means no one else could have - * confirmed us. + /* Another skb with the same unconfirmed conntrack may + * win the race. This may happen for bridge(br_flood) + * or broadcast/multicast packets do skb_clone with + * unconfirmed conntrack. */ - WARN_ON(nf_ct_is_confirmed(ct)); + if (unlikely(nf_ct_is_confirmed(ct))) { + WARN_ON_ONCE(1); + nf_conntrack_double_unlock(hash, reply_hash); + local_bh_enable(); + return NF_DROP; + } + pr_debug("Confirming conntrack %p\n", ct); /* We have to check the DYING flag after unlink to prevent * a race against nf_ct_get_next_corpse() possibly called from @@ -869,6 +912,22 @@ nf_conntrack_tuple_taken(const struct nf_conntrack_tuple *tuple, } if (nf_ct_key_equal(h, tuple, zone, net)) { + /* Tuple is taken already, so caller will need to find + * a new source port to use. + * + * Only exception: + * If the *original tuples* are identical, then both + * conntracks refer to the same flow. + * This is a rare situation, it can occur e.g. when + * more than one UDP packet is sent from same socket + * in different threads. + * + * Let nf_ct_resolve_clash() deal with this later. + */ + if (nf_ct_tuple_equal(&ignored_conntrack->tuplehash[IP_CT_DIR_ORIGINAL].tuple, + &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple)) + continue; + NF_CT_STAT_INC_ATOMIC(net, found); rcu_read_unlock(); return 1; diff --git a/net/netfilter/nf_conntrack_ftp.c b/net/netfilter/nf_conntrack_ftp.c index f0e9a7511e1a..c236c7d1655d 100644 --- a/net/netfilter/nf_conntrack_ftp.c +++ b/net/netfilter/nf_conntrack_ftp.c @@ -323,7 +323,7 @@ static int find_pattern(const char *data, size_t dlen, i++; } - pr_debug("Skipped up to `%c'!\n", skip); + pr_debug("Skipped up to 0x%hhx delimiter!\n", skip); *numoff = i; *numlen = getnum(data + i, dlen - i, cmd, term, numoff); diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c index 48dab1403b2c..39a32edaa92c 100644 --- a/net/netfilter/nf_conntrack_netlink.c +++ b/net/netfilter/nf_conntrack_netlink.c @@ -29,6 +29,7 @@ #include <linux/spinlock.h> #include <linux/interrupt.h> #include <linux/slab.h> +#include <linux/siphash.h> #include <linux/netfilter.h> #include <net/netlink.h> @@ -445,7 +446,9 @@ err: static int ctnetlink_dump_id(struct sk_buff *skb, const struct nf_conn *ct) { - if (nla_put_be32(skb, CTA_ID, htonl((unsigned long)ct))) + __be32 id = (__force __be32)nf_ct_get_id(ct); + + if (nla_put_be32(skb, CTA_ID, id)) goto nla_put_failure; return 0; @@ -1179,8 +1182,9 @@ static int ctnetlink_del_conntrack(struct net *net, struct sock *ctnl, ct = nf_ct_tuplehash_to_ctrack(h); if (cda[CTA_ID]) { - u_int32_t id = ntohl(nla_get_be32(cda[CTA_ID])); - if (id != (u32)(unsigned long)ct) { + __be32 id = nla_get_be32(cda[CTA_ID]); + + if (id != (__force __be32)nf_ct_get_id(ct)) { nf_ct_put(ct); return -ENOENT; } @@ -2521,6 +2525,25 @@ nla_put_failure: static const union nf_inet_addr any_addr; +static __be32 nf_expect_get_id(const struct nf_conntrack_expect *exp) +{ + static __read_mostly siphash_key_t exp_id_seed; + unsigned long a, b, c, d; + + net_get_random_once(&exp_id_seed, sizeof(exp_id_seed)); + + a = (unsigned long)exp; + b = (unsigned long)exp->helper; + c = (unsigned long)exp->master; + d = (unsigned long)siphash(&exp->tuple, sizeof(exp->tuple), &exp_id_seed); + +#ifdef CONFIG_64BIT + return (__force __be32)siphash_4u64((u64)a, (u64)b, (u64)c, (u64)d, &exp_id_seed); +#else + return (__force __be32)siphash_4u32((u32)a, (u32)b, (u32)c, (u32)d, &exp_id_seed); +#endif +} + static int ctnetlink_exp_dump_expect(struct sk_buff *skb, const struct nf_conntrack_expect *exp) @@ -2568,7 +2591,7 @@ ctnetlink_exp_dump_expect(struct sk_buff *skb, } #endif if (nla_put_be32(skb, CTA_EXPECT_TIMEOUT, htonl(timeout)) || - nla_put_be32(skb, CTA_EXPECT_ID, htonl((unsigned long)exp)) || + nla_put_be32(skb, CTA_EXPECT_ID, nf_expect_get_id(exp)) || nla_put_be32(skb, CTA_EXPECT_FLAGS, htonl(exp->flags)) || nla_put_be32(skb, CTA_EXPECT_CLASS, htonl(exp->class))) goto nla_put_failure; @@ -2873,7 +2896,8 @@ static int ctnetlink_get_expect(struct net *net, struct sock *ctnl, if (cda[CTA_EXPECT_ID]) { __be32 id = nla_get_be32(cda[CTA_EXPECT_ID]); - if (ntohl(id) != (u32)(unsigned long)exp) { + + if (id != nf_expect_get_id(exp)) { nf_ct_expect_put(exp); return -ENOENT; } @@ -3398,6 +3422,9 @@ static void __net_exit ctnetlink_net_exit_batch(struct list_head *net_exit_list) list_for_each_entry(net, net_exit_list, exit_list) ctnetlink_net_exit(net); + + /* wait for other cpus until they are done with ctnl_notifiers */ + synchronize_rcu(); } static struct pernet_operations ctnetlink_net_ops = { diff --git a/net/netfilter/nf_queue.c b/net/netfilter/nf_queue.c index f7e21953b1de..b06ef4c62522 100644 --- a/net/netfilter/nf_queue.c +++ b/net/netfilter/nf_queue.c @@ -138,6 +138,11 @@ static int __nf_queue(struct sk_buff *skb, const struct nf_hook_state *state, goto err; } + if (skb_dst(skb) && !skb_dst_force(skb)) { + status = -ENETDOWN; + goto err; + } + *entry = (struct nf_queue_entry) { .skb = skb, .state = *state, @@ -146,7 +151,6 @@ static int __nf_queue(struct sk_buff *skb, const struct nf_hook_state *state, }; nf_queue_entry_get_refs(entry); - skb_dst_force(skb); afinfo->saveroute(skb, entry); status = qh->outfn(entry, queuenum); @@ -193,6 +197,7 @@ static unsigned int nf_iterate(struct sk_buff *skb, repeat: verdict = nf_hook_entry_hookfn(hook, skb, state); if (verdict != NF_ACCEPT) { + *index = i; if (verdict != NF_REPEAT) return verdict; goto repeat; diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 623ec29ade26..91490446ebb4 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -220,6 +220,18 @@ static int nft_delchain(struct nft_ctx *ctx) return err; } +/* either expr ops provide both activate/deactivate, or neither */ +static bool nft_expr_check_ops(const struct nft_expr_ops *ops) +{ + if (!ops) + return true; + + if (WARN_ON_ONCE((!ops->activate ^ !ops->deactivate))) + return false; + + return true; +} + static void nft_rule_expr_activate(const struct nft_ctx *ctx, struct nft_rule *rule) { @@ -304,6 +316,9 @@ static int nft_delrule_by_chain(struct nft_ctx *ctx) int err; list_for_each_entry(rule, &ctx->chain->rules, list) { + if (!nft_is_active_next(ctx->net, rule)) + continue; + err = nft_delrule(ctx, rule); if (err < 0) return err; @@ -1721,6 +1736,9 @@ static int nf_tables_delchain(struct net *net, struct sock *nlsk, */ int nft_register_expr(struct nft_expr_type *type) { + if (!nft_expr_check_ops(type->ops)) + return -EINVAL; + nfnl_lock(NFNL_SUBSYS_NFTABLES); if (type->family == NFPROTO_UNSPEC) list_add_tail_rcu(&type->list, &nf_tables_expressions); @@ -1870,6 +1888,10 @@ static int nf_tables_expr_parse(const struct nft_ctx *ctx, err = PTR_ERR(ops); goto err1; } + if (!nft_expr_check_ops(ops)) { + err = -EINVAL; + goto err1; + } } else ops = type->ops; @@ -3109,8 +3131,11 @@ static int nf_tables_newset(struct net *net, struct sock *nlsk, NFT_SET_OBJECT)) return -EINVAL; /* Only one of these operations is supported */ - if ((flags & (NFT_SET_MAP | NFT_SET_EVAL | NFT_SET_OBJECT)) == - (NFT_SET_MAP | NFT_SET_EVAL | NFT_SET_OBJECT)) + if ((flags & (NFT_SET_MAP | NFT_SET_OBJECT)) == + (NFT_SET_MAP | NFT_SET_OBJECT)) + return -EOPNOTSUPP; + if ((flags & (NFT_SET_EVAL | NFT_SET_OBJECT)) == + (NFT_SET_EVAL | NFT_SET_OBJECT)) return -EOPNOTSUPP; } @@ -3892,14 +3917,20 @@ static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set, if (nla[NFTA_SET_ELEM_DATA] == NULL && !(flags & NFT_SET_ELEM_INTERVAL_END)) return -EINVAL; - if (nla[NFTA_SET_ELEM_DATA] != NULL && - flags & NFT_SET_ELEM_INTERVAL_END) - return -EINVAL; } else { if (nla[NFTA_SET_ELEM_DATA] != NULL) return -EINVAL; } + if ((flags & NFT_SET_ELEM_INTERVAL_END) && + (nla[NFTA_SET_ELEM_DATA] || + nla[NFTA_SET_ELEM_OBJREF] || + nla[NFTA_SET_ELEM_TIMEOUT] || + nla[NFTA_SET_ELEM_EXPIRATION] || + nla[NFTA_SET_ELEM_USERDATA] || + nla[NFTA_SET_ELEM_EXPR])) + return -EINVAL; + timeout = 0; if (nla[NFTA_SET_ELEM_TIMEOUT] != NULL) { if (!(set->flags & NFT_SET_TIMEOUT)) @@ -4046,6 +4077,8 @@ err6: err5: kfree(trans); err4: + if (obj) + obj->use--; kfree(elem.priv); err3: if (nla[NFTA_SET_ELEM_DATA] != NULL) diff --git a/net/netfilter/nfnetlink.c b/net/netfilter/nfnetlink.c index 733d3e4a30d8..2cee032af46d 100644 --- a/net/netfilter/nfnetlink.c +++ b/net/netfilter/nfnetlink.c @@ -530,7 +530,7 @@ static int nfnetlink_bind(struct net *net, int group) ss = nfnetlink_get_subsys(type << 8); rcu_read_unlock(); if (!ss) - request_module("nfnetlink-subsys-%d", type); + request_module_nowait("nfnetlink-subsys-%d", type); return 0; } #endif diff --git a/net/netfilter/nft_compat.c b/net/netfilter/nft_compat.c index 7533c2fd6b76..8281656808ae 100644 --- a/net/netfilter/nft_compat.c +++ b/net/netfilter/nft_compat.c @@ -277,6 +277,7 @@ nft_target_destroy(const struct nft_ctx *ctx, const struct nft_expr *expr) { struct xt_target *target = expr->ops->data; void *info = nft_expr_priv(expr); + struct module *me = target->me; struct xt_tgdtor_param par; par.net = ctx->net; @@ -287,7 +288,25 @@ nft_target_destroy(const struct nft_ctx *ctx, const struct nft_expr *expr) par.target->destroy(&par); if (nft_xt_put(container_of(expr->ops, struct nft_xt, ops))) - module_put(target->me); + module_put(me); +} + +static int nft_extension_dump_info(struct sk_buff *skb, int attr, + const void *info, + unsigned int size, unsigned int user_size) +{ + unsigned int info_size, aligned_size = XT_ALIGN(size); + struct nlattr *nla; + + nla = nla_reserve(skb, attr, aligned_size); + if (!nla) + return -1; + + info_size = user_size ? : size; + memcpy(nla_data(nla), info, info_size); + memset(nla_data(nla) + info_size, 0, aligned_size - info_size); + + return 0; } static int nft_target_dump(struct sk_buff *skb, const struct nft_expr *expr) @@ -297,7 +316,8 @@ static int nft_target_dump(struct sk_buff *skb, const struct nft_expr *expr) if (nla_put_string(skb, NFTA_TARGET_NAME, target->name) || nla_put_be32(skb, NFTA_TARGET_REV, htonl(target->revision)) || - nla_put(skb, NFTA_TARGET_INFO, XT_ALIGN(target->targetsize), info)) + nft_extension_dump_info(skb, NFTA_TARGET_INFO, info, + target->targetsize, target->usersize)) goto nla_put_failure; return 0; @@ -533,7 +553,8 @@ static int __nft_match_dump(struct sk_buff *skb, const struct nft_expr *expr, if (nla_put_string(skb, NFTA_MATCH_NAME, match->name) || nla_put_be32(skb, NFTA_MATCH_REV, htonl(match->revision)) || - nla_put(skb, NFTA_MATCH_INFO, XT_ALIGN(match->matchsize), info)) + nft_extension_dump_info(skb, NFTA_MATCH_INFO, info, + match->matchsize, match->usersize)) goto nla_put_failure; return 0; diff --git a/net/netfilter/nft_hash.c b/net/netfilter/nft_hash.c index 24f2f7567ddb..010a565b4000 100644 --- a/net/netfilter/nft_hash.c +++ b/net/netfilter/nft_hash.c @@ -131,7 +131,7 @@ static int nft_symhash_init(const struct nft_ctx *ctx, priv->dreg = nft_parse_register(tb[NFTA_HASH_DREG]); priv->modulus = ntohl(nla_get_be32(tb[NFTA_HASH_MODULUS])); - if (priv->modulus <= 1) + if (priv->modulus < 1) return -ERANGE; if (priv->offset + priv->modulus - 1 < priv->offset) diff --git a/net/netfilter/nft_lookup.c b/net/netfilter/nft_lookup.c index 475570e89ede..44015a151ad6 100644 --- a/net/netfilter/nft_lookup.c +++ b/net/netfilter/nft_lookup.c @@ -76,9 +76,6 @@ static int nft_lookup_init(const struct nft_ctx *ctx, if (IS_ERR(set)) return PTR_ERR(set); - if (set->flags & NFT_SET_EVAL) - return -EOPNOTSUPP; - priv->sreg = nft_parse_register(tb[NFTA_LOOKUP_SREG]); err = nft_validate_register_load(priv->sreg, set->klen); if (err < 0) diff --git a/net/netfilter/nft_set_rbtree.c b/net/netfilter/nft_set_rbtree.c index d83a4ec5900d..6f3205de887f 100644 --- a/net/netfilter/nft_set_rbtree.c +++ b/net/netfilter/nft_set_rbtree.c @@ -224,10 +224,6 @@ static void *nft_rbtree_deactivate(const struct net *net, else if (d > 0) parent = parent->rb_right; else { - if (!nft_set_elem_active(&rbe->ext, genmask)) { - parent = parent->rb_left; - continue; - } if (nft_rbtree_interval_end(rbe) && !nft_rbtree_interval_end(this)) { parent = parent->rb_left; @@ -236,6 +232,9 @@ static void *nft_rbtree_deactivate(const struct net *net, nft_rbtree_interval_end(this)) { parent = parent->rb_right; continue; + } else if (!nft_set_elem_active(&rbe->ext, genmask)) { + parent = parent->rb_left; + continue; } nft_rbtree_flush(net, set, rbe); return rbe; diff --git a/net/netfilter/x_tables.c b/net/netfilter/x_tables.c index 8e054c63b54e..d42211a0b5e3 100644 --- a/net/netfilter/x_tables.c +++ b/net/netfilter/x_tables.c @@ -1798,7 +1798,7 @@ static int __init xt_init(void) seqcount_init(&per_cpu(xt_recseq, i)); } - xt = kmalloc(sizeof(struct xt_af) * NFPROTO_NUMPROTO, GFP_KERNEL); + xt = kcalloc(NFPROTO_NUMPROTO, sizeof(struct xt_af), GFP_KERNEL); if (!xt) return -ENOMEM; diff --git a/net/netfilter/xt_cgroup.c b/net/netfilter/xt_cgroup.c index 891f4e7e8ea7..db18c0177b0f 100644 --- a/net/netfilter/xt_cgroup.c +++ b/net/netfilter/xt_cgroup.c @@ -66,6 +66,38 @@ static int cgroup_mt_check_v1(const struct xt_mtchk_param *par) return 0; } +static int cgroup_mt_check_v2(const struct xt_mtchk_param *par) +{ + struct xt_cgroup_info_v2 *info = par->matchinfo; + struct cgroup *cgrp; + + if ((info->invert_path & ~1) || (info->invert_classid & ~1)) + return -EINVAL; + + if (!info->has_path && !info->has_classid) { + pr_info("xt_cgroup: no path or classid specified\n"); + return -EINVAL; + } + + if (info->has_path && info->has_classid) { + pr_info_ratelimited("path and classid specified\n"); + return -EINVAL; + } + + info->priv = NULL; + if (info->has_path) { + cgrp = cgroup_get_from_path(info->path); + if (IS_ERR(cgrp)) { + pr_info_ratelimited("invalid path, errno=%ld\n", + PTR_ERR(cgrp)); + return -EINVAL; + } + info->priv = cgrp; + } + + return 0; +} + static bool cgroup_mt_v0(const struct sk_buff *skb, struct xt_action_param *par) { @@ -95,6 +127,24 @@ static bool cgroup_mt_v1(const struct sk_buff *skb, struct xt_action_param *par) info->invert_classid; } +static bool cgroup_mt_v2(const struct sk_buff *skb, struct xt_action_param *par) +{ + const struct xt_cgroup_info_v2 *info = par->matchinfo; + struct sock_cgroup_data *skcd = &skb->sk->sk_cgrp_data; + struct cgroup *ancestor = info->priv; + struct sock *sk = skb->sk; + + if (!sk || !sk_fullsock(sk) || !net_eq(xt_net(par), sock_net(sk))) + return false; + + if (ancestor) + return cgroup_is_descendant(sock_cgroup_ptr(skcd), ancestor) ^ + info->invert_path; + else + return (info->classid == sock_cgroup_classid(skcd)) ^ + info->invert_classid; +} + static void cgroup_mt_destroy_v1(const struct xt_mtdtor_param *par) { struct xt_cgroup_info_v1 *info = par->matchinfo; @@ -103,6 +153,14 @@ static void cgroup_mt_destroy_v1(const struct xt_mtdtor_param *par) cgroup_put(info->priv); } +static void cgroup_mt_destroy_v2(const struct xt_mtdtor_param *par) +{ + struct xt_cgroup_info_v2 *info = par->matchinfo; + + if (info->priv) + cgroup_put(info->priv); +} + static struct xt_match cgroup_mt_reg[] __read_mostly = { { .name = "cgroup", @@ -130,6 +188,20 @@ static struct xt_match cgroup_mt_reg[] __read_mostly = { (1 << NF_INET_POST_ROUTING) | (1 << NF_INET_LOCAL_IN), }, + { + .name = "cgroup", + .revision = 2, + .family = NFPROTO_UNSPEC, + .checkentry = cgroup_mt_check_v2, + .match = cgroup_mt_v2, + .matchsize = sizeof(struct xt_cgroup_info_v2), + .usersize = offsetof(struct xt_cgroup_info_v2, priv), + .destroy = cgroup_mt_destroy_v2, + .me = THIS_MODULE, + .hooks = (1 << NF_INET_LOCAL_OUT) | + (1 << NF_INET_POST_ROUTING) | + (1 << NF_INET_LOCAL_IN), + }, }; static int __init cgroup_mt_init(void) diff --git a/net/netfilter/xt_nfacct.c b/net/netfilter/xt_nfacct.c index 6f92d25590a8..ea447b437f12 100644 --- a/net/netfilter/xt_nfacct.c +++ b/net/netfilter/xt_nfacct.c @@ -55,25 +55,39 @@ nfacct_mt_destroy(const struct xt_mtdtor_param *par) nfnl_acct_put(info->nfacct); } -static struct xt_match nfacct_mt_reg __read_mostly = { - .name = "nfacct", - .family = NFPROTO_UNSPEC, - .checkentry = nfacct_mt_checkentry, - .match = nfacct_mt, - .destroy = nfacct_mt_destroy, - .matchsize = sizeof(struct xt_nfacct_match_info), - .usersize = offsetof(struct xt_nfacct_match_info, nfacct), - .me = THIS_MODULE, +static struct xt_match nfacct_mt_reg[] __read_mostly = { + { + .name = "nfacct", + .revision = 0, + .family = NFPROTO_UNSPEC, + .checkentry = nfacct_mt_checkentry, + .match = nfacct_mt, + .destroy = nfacct_mt_destroy, + .matchsize = sizeof(struct xt_nfacct_match_info), + .usersize = offsetof(struct xt_nfacct_match_info, nfacct), + .me = THIS_MODULE, + }, + { + .name = "nfacct", + .revision = 1, + .family = NFPROTO_UNSPEC, + .checkentry = nfacct_mt_checkentry, + .match = nfacct_mt, + .destroy = nfacct_mt_destroy, + .matchsize = sizeof(struct xt_nfacct_match_info_v1), + .usersize = offsetof(struct xt_nfacct_match_info_v1, nfacct), + .me = THIS_MODULE, + }, }; static int __init nfacct_mt_init(void) { - return xt_register_match(&nfacct_mt_reg); + return xt_register_matches(nfacct_mt_reg, ARRAY_SIZE(nfacct_mt_reg)); } static void __exit nfacct_mt_exit(void) { - xt_unregister_match(&nfacct_mt_reg); + xt_unregister_matches(nfacct_mt_reg, ARRAY_SIZE(nfacct_mt_reg)); } module_init(nfacct_mt_init); diff --git a/net/netfilter/xt_physdev.c b/net/netfilter/xt_physdev.c index bb33598e4530..ec247d8370e8 100644 --- a/net/netfilter/xt_physdev.c +++ b/net/netfilter/xt_physdev.c @@ -96,8 +96,7 @@ match_outdev: static int physdev_mt_check(const struct xt_mtchk_param *par) { const struct xt_physdev_info *info = par->matchinfo; - - br_netfilter_enable(); + static bool brnf_probed __read_mostly; if (!(info->bitmask & XT_PHYSDEV_OP_MASK) || info->bitmask & ~XT_PHYSDEV_OP_MASK) @@ -113,6 +112,12 @@ static int physdev_mt_check(const struct xt_mtchk_param *par) if (par->hook_mask & (1 << NF_INET_LOCAL_OUT)) return -EINVAL; } + + if (!brnf_probed) { + brnf_probed = true; + request_module("br_netfilter"); + } + return 0; } diff --git a/net/netlabel/netlabel_kapi.c b/net/netlabel/netlabel_kapi.c index ea7c67050792..ee3e5b6471a6 100644 --- a/net/netlabel/netlabel_kapi.c +++ b/net/netlabel/netlabel_kapi.c @@ -903,7 +903,8 @@ int netlbl_bitmap_walk(const unsigned char *bitmap, u32 bitmap_len, (state == 0 && (byte & bitmask) == 0)) return bit_spot; - bit_spot++; + if (++bit_spot >= bitmap_len) + return -1; bitmask >>= 1; if (bitmask == 0) { byte = bitmap[++byte_offset]; diff --git a/net/netlink/genetlink.c b/net/netlink/genetlink.c index b9ce82c9440f..e9b8b0b0ac43 100644 --- a/net/netlink/genetlink.c +++ b/net/netlink/genetlink.c @@ -365,7 +365,7 @@ int genl_register_family(struct genl_family *family) start, end + 1, GFP_KERNEL); if (family->id < 0) { err = family->id; - goto errout_locked; + goto errout_free; } err = genl_validate_assign_mc_groups(family); @@ -384,6 +384,7 @@ int genl_register_family(struct genl_family *family) errout_remove: idr_remove(&genl_fam_idr, family->id); +errout_free: kfree(family->attrbuf); errout_locked: genl_unlock_all(); diff --git a/net/netrom/af_netrom.c b/net/netrom/af_netrom.c index fc876b0c3e06..aebc804c10aa 100644 --- a/net/netrom/af_netrom.c +++ b/net/netrom/af_netrom.c @@ -871,7 +871,7 @@ int nr_rx_frame(struct sk_buff *skb, struct net_device *dev) unsigned short frametype, flags, window, timeout; int ret; - skb->sk = NULL; /* Initially we don't know who it's for */ + skb_orphan(skb); /* * skb->data points to the netrom frame start @@ -969,7 +969,9 @@ int nr_rx_frame(struct sk_buff *skb, struct net_device *dev) window = skb->data[20]; + sock_hold(make); skb->sk = make; + skb->destructor = sock_efree; make->sk_state = TCP_ESTABLISHED; /* Fill in his circuit details */ diff --git a/net/nfc/llcp_commands.c b/net/nfc/llcp_commands.c index 6a196e438b6c..d1fc019e932e 100644 --- a/net/nfc/llcp_commands.c +++ b/net/nfc/llcp_commands.c @@ -419,6 +419,10 @@ int nfc_llcp_send_connect(struct nfc_llcp_sock *sock) sock->service_name, sock->service_name_len, &service_name_tlv_length); + if (!service_name_tlv) { + err = -ENOMEM; + goto error_tlv; + } size += service_name_tlv_length; } @@ -429,9 +433,17 @@ int nfc_llcp_send_connect(struct nfc_llcp_sock *sock) miux_tlv = nfc_llcp_build_tlv(LLCP_TLV_MIUX, (u8 *)&miux, 0, &miux_tlv_length); + if (!miux_tlv) { + err = -ENOMEM; + goto error_tlv; + } size += miux_tlv_length; rw_tlv = nfc_llcp_build_tlv(LLCP_TLV_RW, &rw, 0, &rw_tlv_length); + if (!rw_tlv) { + err = -ENOMEM; + goto error_tlv; + } size += rw_tlv_length; pr_debug("SKB size %d SN length %zu\n", size, sock->service_name_len); @@ -484,9 +496,17 @@ int nfc_llcp_send_cc(struct nfc_llcp_sock *sock) miux_tlv = nfc_llcp_build_tlv(LLCP_TLV_MIUX, (u8 *)&miux, 0, &miux_tlv_length); + if (!miux_tlv) { + err = -ENOMEM; + goto error_tlv; + } size += miux_tlv_length; rw_tlv = nfc_llcp_build_tlv(LLCP_TLV_RW, &rw, 0, &rw_tlv_length); + if (!rw_tlv) { + err = -ENOMEM; + goto error_tlv; + } size += rw_tlv_length; skb = llcp_allocate_pdu(sock, LLCP_PDU_CC, size); diff --git a/net/nfc/llcp_core.c b/net/nfc/llcp_core.c index 02eef5cf3cce..7e619ff8a653 100644 --- a/net/nfc/llcp_core.c +++ b/net/nfc/llcp_core.c @@ -532,10 +532,10 @@ static u8 nfc_llcp_reserve_sdp_ssap(struct nfc_llcp_local *local) static int nfc_llcp_build_gb(struct nfc_llcp_local *local) { - u8 *gb_cur, *version_tlv, version, version_length; - u8 *lto_tlv, lto_length; - u8 *wks_tlv, wks_length; - u8 *miux_tlv, miux_length; + u8 *gb_cur, version, version_length; + u8 lto_length, wks_length, miux_length; + u8 *version_tlv = NULL, *lto_tlv = NULL, + *wks_tlv = NULL, *miux_tlv = NULL; __be16 wks = cpu_to_be16(local->local_wks); u8 gb_len = 0; int ret = 0; @@ -543,17 +543,33 @@ static int nfc_llcp_build_gb(struct nfc_llcp_local *local) version = LLCP_VERSION_11; version_tlv = nfc_llcp_build_tlv(LLCP_TLV_VERSION, &version, 1, &version_length); + if (!version_tlv) { + ret = -ENOMEM; + goto out; + } gb_len += version_length; lto_tlv = nfc_llcp_build_tlv(LLCP_TLV_LTO, &local->lto, 1, <o_length); + if (!lto_tlv) { + ret = -ENOMEM; + goto out; + } gb_len += lto_length; pr_debug("Local wks 0x%lx\n", local->local_wks); wks_tlv = nfc_llcp_build_tlv(LLCP_TLV_WKS, (u8 *)&wks, 2, &wks_length); + if (!wks_tlv) { + ret = -ENOMEM; + goto out; + } gb_len += wks_length; miux_tlv = nfc_llcp_build_tlv(LLCP_TLV_MIUX, (u8 *)&local->miux, 0, &miux_length); + if (!miux_tlv) { + ret = -ENOMEM; + goto out; + } gb_len += miux_length; gb_len += ARRAY_SIZE(llcp_magic); diff --git a/net/nfc/llcp_sock.c b/net/nfc/llcp_sock.c index fb7afcaa3004..694a43c05eb9 100644 --- a/net/nfc/llcp_sock.c +++ b/net/nfc/llcp_sock.c @@ -119,9 +119,14 @@ static int llcp_sock_bind(struct socket *sock, struct sockaddr *addr, int alen) llcp_sock->service_name = kmemdup(llcp_addr.service_name, llcp_sock->service_name_len, GFP_KERNEL); - + if (!llcp_sock->service_name) { + ret = -ENOMEM; + goto put_dev; + } llcp_sock->ssap = nfc_llcp_get_sdp_ssap(local, llcp_sock); if (llcp_sock->ssap == LLCP_SAP_MAX) { + kfree(llcp_sock->service_name); + llcp_sock->service_name = NULL; ret = -EADDRINUSE; goto put_dev; } @@ -562,11 +567,11 @@ static unsigned int llcp_sock_poll(struct file *file, struct socket *sock, if (sk->sk_state == LLCP_LISTEN) return llcp_accept_poll(sk); - if (sk->sk_err || !skb_queue_empty(&sk->sk_error_queue)) + if (sk->sk_err || !skb_queue_empty_lockless(&sk->sk_error_queue)) mask |= POLLERR | (sock_flag(sk, SOCK_SELECT_ERR_QUEUE) ? POLLPRI : 0); - if (!skb_queue_empty(&sk->sk_receive_queue)) + if (!skb_queue_empty_lockless(&sk->sk_receive_queue)) mask |= POLLIN | POLLRDNORM; if (sk->sk_state == LLCP_CLOSED) @@ -1012,10 +1017,13 @@ static int llcp_sock_create(struct net *net, struct socket *sock, sock->type != SOCK_RAW) return -ESOCKTNOSUPPORT; - if (sock->type == SOCK_RAW) + if (sock->type == SOCK_RAW) { + if (!capable(CAP_NET_RAW)) + return -EPERM; sock->ops = &llcp_rawsock_ops; - else + } else { sock->ops = &llcp_sock_ops; + } sk = nfc_llcp_sock_alloc(sock, sock->type, GFP_ATOMIC, kern); if (sk == NULL) diff --git a/net/nfc/nci/data.c b/net/nfc/nci/data.c index 908f25e3773e..5405d073804c 100644 --- a/net/nfc/nci/data.c +++ b/net/nfc/nci/data.c @@ -119,7 +119,7 @@ static int nci_queue_tx_data_frags(struct nci_dev *ndev, conn_info = nci_get_conn_info_by_conn_id(ndev, conn_id); if (!conn_info) { rc = -EPROTO; - goto free_exit; + goto exit; } __skb_queue_head_init(&frags_q); diff --git a/net/nfc/nci/hci.c b/net/nfc/nci/hci.c index ddfc52ac1f9b..c0d323b58e73 100644 --- a/net/nfc/nci/hci.c +++ b/net/nfc/nci/hci.c @@ -312,6 +312,10 @@ static void nci_hci_cmd_received(struct nci_dev *ndev, u8 pipe, create_info = (struct nci_hci_create_pipe_resp *)skb->data; dest_gate = create_info->dest_gate; new_pipe = create_info->pipe; + if (new_pipe >= NCI_HCI_MAX_PIPES) { + status = NCI_HCI_ANY_E_NOK; + goto exit; + } /* Save the new created pipe and bind with local gate, * the description for skb->data[3] is destination gate id @@ -336,6 +340,10 @@ static void nci_hci_cmd_received(struct nci_dev *ndev, u8 pipe, goto exit; } delete_info = (struct nci_hci_delete_pipe_noti *)skb->data; + if (delete_info->pipe >= NCI_HCI_MAX_PIPES) { + status = NCI_HCI_ANY_E_NOK; + goto exit; + } ndev->hci_dev->pipes[delete_info->pipe].gate = NCI_HCI_INVALID_GATE; diff --git a/net/nfc/nci/uart.c b/net/nfc/nci/uart.c index 8d104c1db628..6f5addb5225c 100644 --- a/net/nfc/nci/uart.c +++ b/net/nfc/nci/uart.c @@ -348,7 +348,7 @@ static int nci_uart_default_recv_buf(struct nci_uart *nu, const u8 *data, nu->rx_packet_len = -1; nu->rx_skb = nci_skb_alloc(nu->ndev, NCI_MAX_PACKET_SIZE, - GFP_KERNEL); + GFP_ATOMIC); if (!nu->rx_skb) return -ENOMEM; } diff --git a/net/nfc/netlink.c b/net/nfc/netlink.c index 08ed6abe4aae..7b8d4d235a3a 100644 --- a/net/nfc/netlink.c +++ b/net/nfc/netlink.c @@ -970,7 +970,8 @@ static int nfc_genl_dep_link_down(struct sk_buff *skb, struct genl_info *info) int rc; u32 idx; - if (!info->attrs[NFC_ATTR_DEVICE_INDEX]) + if (!info->attrs[NFC_ATTR_DEVICE_INDEX] || + !info->attrs[NFC_ATTR_TARGET_INDEX]) return -EINVAL; idx = nla_get_u32(info->attrs[NFC_ATTR_DEVICE_INDEX]); @@ -1019,7 +1020,8 @@ static int nfc_genl_llc_get_params(struct sk_buff *skb, struct genl_info *info) struct sk_buff *msg = NULL; u32 idx; - if (!info->attrs[NFC_ATTR_DEVICE_INDEX]) + if (!info->attrs[NFC_ATTR_DEVICE_INDEX] || + !info->attrs[NFC_ATTR_FIRMWARE_NAME]) return -EINVAL; idx = nla_get_u32(info->attrs[NFC_ATTR_DEVICE_INDEX]); @@ -1098,7 +1100,6 @@ static int nfc_genl_llc_set_params(struct sk_buff *skb, struct genl_info *info) local = nfc_llcp_find_local(dev); if (!local) { - nfc_put_device(dev); rc = -ENODEV; goto exit; } @@ -1158,7 +1159,6 @@ static int nfc_genl_llc_sdreq(struct sk_buff *skb, struct genl_info *info) local = nfc_llcp_find_local(dev); if (!local) { - nfc_put_device(dev); rc = -ENODEV; goto exit; } diff --git a/net/openvswitch/actions.c b/net/openvswitch/actions.c index a54a556fcdb5..5601e2c6ac05 100644 --- a/net/openvswitch/actions.c +++ b/net/openvswitch/actions.c @@ -174,8 +174,7 @@ static void update_ethertype(struct sk_buff *skb, struct ethhdr *hdr, if (skb->ip_summed == CHECKSUM_COMPLETE) { __be16 diff[] = { ~(hdr->h_proto), ethertype }; - skb->csum = ~csum_partial((char *)diff, sizeof(diff), - ~skb->csum); + skb->csum = csum_partial((char *)diff, sizeof(diff), skb->csum); } hdr->h_proto = ethertype; @@ -267,8 +266,7 @@ static int set_mpls(struct sk_buff *skb, struct sw_flow_key *flow_key, if (skb->ip_summed == CHECKSUM_COMPLETE) { __be32 diff[] = { ~(stack->label_stack_entry), lse }; - skb->csum = ~csum_partial((char *)diff, sizeof(diff), - ~skb->csum); + skb->csum = csum_partial((char *)diff, sizeof(diff), skb->csum); } stack->label_stack_entry = lse; diff --git a/net/openvswitch/conntrack.c b/net/openvswitch/conntrack.c index 285f8797c26a..737e37b28d93 100644 --- a/net/openvswitch/conntrack.c +++ b/net/openvswitch/conntrack.c @@ -23,6 +23,7 @@ #include <net/netfilter/nf_conntrack_seqadj.h> #include <net/netfilter/nf_conntrack_zones.h> #include <net/netfilter/ipv6/nf_defrag_ipv6.h> +#include <net/ipv6_frag.h> #ifdef CONFIG_NF_NAT_NEEDED #include <linux/netfilter/nf_nat.h> @@ -878,6 +879,17 @@ static int ovs_ct_nat(struct net *net, struct sw_flow_key *key, } err = ovs_ct_nat_execute(skb, ct, ctinfo, &info->range, maniptype); + if (err == NF_ACCEPT && + ct->status & IPS_SRC_NAT && ct->status & IPS_DST_NAT) { + if (maniptype == NF_NAT_MANIP_SRC) + maniptype = NF_NAT_MANIP_DST; + else + maniptype = NF_NAT_MANIP_SRC; + + err = ovs_ct_nat_execute(skb, ct, ctinfo, &info->range, + maniptype); + } + /* Mark NAT done if successful and update the flow key. */ if (err == NF_ACCEPT) ovs_nat_update_key(key, skb, maniptype); @@ -1082,7 +1094,8 @@ static int ovs_ct_commit(struct net *net, struct sw_flow_key *key, &info->labels.mask); if (err) return err; - } else if (labels_nonzero(&info->labels.mask)) { + } else if (IS_ENABLED(CONFIG_NF_CONNTRACK_LABELS) && + labels_nonzero(&info->labels.mask)) { err = ovs_ct_set_labels(ct, key, &info->labels.value, &info->labels.mask); if (err) diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c index 363dd904733d..3248cf04d0b3 100644 --- a/net/openvswitch/datapath.c +++ b/net/openvswitch/datapath.c @@ -724,9 +724,13 @@ static size_t ovs_flow_cmd_msg_size(const struct sw_flow_actions *acts, { size_t len = NLMSG_ALIGN(sizeof(struct ovs_header)); - /* OVS_FLOW_ATTR_UFID */ + /* OVS_FLOW_ATTR_UFID, or unmasked flow key as fallback + * see ovs_nla_put_identifier() + */ if (sfid && ovs_identifier_is_ufid(sfid)) len += nla_total_size(sfid->ufid_len); + else + len += nla_total_size(ovs_key_attr_size()); /* OVS_FLOW_ATTR_KEY */ if (!sfid || should_fill_key(sfid, ufid_flags)) @@ -902,7 +906,10 @@ static struct sk_buff *ovs_flow_cmd_build_info(const struct sw_flow *flow, retval = ovs_flow_cmd_fill_info(flow, dp_ifindex, skb, info->snd_portid, info->snd_seq, 0, cmd, ufid_flags); - BUG_ON(retval < 0); + if (WARN_ON_ONCE(retval < 0)) { + kfree_skb(skb); + skb = ERR_PTR(retval); + } return skb; } @@ -1365,7 +1372,10 @@ static int ovs_flow_cmd_del(struct sk_buff *skb, struct genl_info *info) OVS_FLOW_CMD_DEL, ufid_flags); rcu_read_unlock(); - BUG_ON(err < 0); + if (WARN_ON_ONCE(err < 0)) { + kfree_skb(reply); + goto out_free; + } ovs_notify(&dp_flow_genl_family, reply, info); } else { @@ -1373,6 +1383,7 @@ static int ovs_flow_cmd_del(struct sk_buff *skb, struct genl_info *info) } } +out_free: ovs_flow_free(flow, true); return 0; unlock: @@ -2240,7 +2251,7 @@ static const struct nla_policy vport_policy[OVS_VPORT_ATTR_MAX + 1] = { [OVS_VPORT_ATTR_STATS] = { .len = sizeof(struct ovs_vport_stats) }, [OVS_VPORT_ATTR_PORT_NO] = { .type = NLA_U32 }, [OVS_VPORT_ATTR_TYPE] = { .type = NLA_U32 }, - [OVS_VPORT_ATTR_UPCALL_PID] = { .type = NLA_U32 }, + [OVS_VPORT_ATTR_UPCALL_PID] = { .type = NLA_UNSPEC }, [OVS_VPORT_ATTR_OPTIONS] = { .type = NLA_NESTED }, }; diff --git a/net/openvswitch/flow_netlink.c b/net/openvswitch/flow_netlink.c index e687b89dafe6..f5deae2ccb79 100644 --- a/net/openvswitch/flow_netlink.c +++ b/net/openvswitch/flow_netlink.c @@ -1967,14 +1967,14 @@ static struct nlattr *reserve_sfa_size(struct sw_flow_actions **sfa, struct sw_flow_actions *acts; int new_acts_size; - int req_size = NLA_ALIGN(attr_len); + size_t req_size = NLA_ALIGN(attr_len); int next_offset = offsetof(struct sw_flow_actions, actions) + (*sfa)->actions_len; if (req_size <= (ksize(*sfa) - next_offset)) goto out; - new_acts_size = ksize(*sfa) * 2; + new_acts_size = max(next_offset + req_size, ksize(*sfa) * 2); if (new_acts_size > MAX_ACTIONS_BUFSIZE) { if ((MAX_ACTIONS_BUFSIZE - next_offset) < req_size) { diff --git a/net/openvswitch/vport-internal_dev.c b/net/openvswitch/vport-internal_dev.c index 04a3128adcf0..1083b5e90134 100644 --- a/net/openvswitch/vport-internal_dev.c +++ b/net/openvswitch/vport-internal_dev.c @@ -44,7 +44,8 @@ static struct internal_dev *internal_dev_priv(struct net_device *netdev) } /* Called with rcu_read_lock_bh. */ -static int internal_dev_xmit(struct sk_buff *skb, struct net_device *netdev) +static netdev_tx_t +internal_dev_xmit(struct sk_buff *skb, struct net_device *netdev) { int len, err; @@ -63,7 +64,7 @@ static int internal_dev_xmit(struct sk_buff *skb, struct net_device *netdev) } else { netdev->stats.tx_errors++; } - return 0; + return NETDEV_TX_OK; } static int internal_dev_open(struct net_device *netdev) @@ -156,7 +157,7 @@ static void do_setup(struct net_device *netdev) netdev->priv_flags |= IFF_LIVE_ADDR_CHANGE | IFF_OPENVSWITCH | IFF_PHONY_HEADROOM | IFF_NO_QUEUE; netdev->needs_free_netdev = true; - netdev->priv_destructor = internal_dev_destructor; + netdev->priv_destructor = NULL; netdev->ethtool_ops = &internal_dev_ethtool_ops; netdev->rtnl_link_ops = &internal_dev_link_ops; @@ -176,6 +177,7 @@ static struct vport *internal_dev_create(const struct vport_parms *parms) { struct vport *vport; struct internal_dev *internal_dev; + struct net_device *dev; int err; vport = ovs_vport_alloc(0, &ovs_internal_vport_ops, parms); @@ -184,8 +186,9 @@ static struct vport *internal_dev_create(const struct vport_parms *parms) goto error; } - vport->dev = alloc_netdev(sizeof(struct internal_dev), - parms->name, NET_NAME_USER, do_setup); + dev = alloc_netdev(sizeof(struct internal_dev), + parms->name, NET_NAME_USER, do_setup); + vport->dev = dev; if (!vport->dev) { err = -ENOMEM; goto error_free_vport; @@ -209,6 +212,7 @@ static struct vport *internal_dev_create(const struct vport_parms *parms) err = register_netdevice(vport->dev); if (err) goto error_unlock; + vport->dev->priv_destructor = internal_dev_destructor; dev_set_promiscuity(vport->dev, 1); rtnl_unlock(); @@ -218,9 +222,9 @@ static struct vport *internal_dev_create(const struct vport_parms *parms) error_unlock: rtnl_unlock(); - free_percpu(vport->dev->tstats); + free_percpu(dev->tstats); error_free_netdev: - free_netdev(vport->dev); + free_netdev(dev); error_free_vport: ovs_vport_free(vport); error: diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index 44a093c75567..4e1058159b08 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -593,7 +593,8 @@ static int prb_calc_retire_blk_tmo(struct packet_sock *po, msec = 1; div = ecmd.base.speed / 1000; } - } + } else + return DEFAULT_PRB_RETIRE_TOV; mbits = (blk_size_in_bytes * 8) / (1024 * 1024); @@ -2438,6 +2439,9 @@ static void tpacket_destruct_skb(struct sk_buff *skb) ts = __packet_set_timestamp(po, ph, skb); __packet_set_status(po, ph, TP_STATUS_AVAILABLE | ts); + + if (!packet_read_pending(&po->tx_ring)) + complete(&po->skb_completion); } sock_wfree(skb); @@ -2632,7 +2636,7 @@ static int tpacket_parse_header(struct packet_sock *po, void *frame, static int tpacket_snd(struct packet_sock *po, struct msghdr *msg) { - struct sk_buff *skb; + struct sk_buff *skb = NULL; struct net_device *dev; struct virtio_net_hdr *vnet_hdr = NULL; struct sockcm_cookie sockc; @@ -2641,19 +2645,26 @@ static int tpacket_snd(struct packet_sock *po, struct msghdr *msg) void *ph; DECLARE_SOCKADDR(struct sockaddr_ll *, saddr, msg->msg_name); bool need_wait = !(msg->msg_flags & MSG_DONTWAIT); + unsigned char *addr = NULL; int tp_len, size_max; - unsigned char *addr; void *data; int len_sum = 0; int status = TP_STATUS_AVAILABLE; int hlen, tlen, copylen = 0; + long timeo = 0; mutex_lock(&po->pg_vec_lock); + /* packet_sendmsg() check on tx_ring.pg_vec was lockless, + * we need to confirm it under protection of pg_vec_lock. + */ + if (unlikely(!po->tx_ring.pg_vec)) { + err = -EBUSY; + goto out; + } if (likely(saddr == NULL)) { dev = packet_cached_dev_get(po); proto = po->num; - addr = NULL; } else { err = -EINVAL; if (msg->msg_namelen < sizeof(struct sockaddr_ll)) @@ -2663,10 +2674,13 @@ static int tpacket_snd(struct packet_sock *po, struct msghdr *msg) sll_addr))) goto out; proto = saddr->sll_protocol; - addr = saddr->sll_halen ? saddr->sll_addr : NULL; dev = dev_get_by_index(sock_net(&po->sk), saddr->sll_ifindex); - if (addr && dev && saddr->sll_halen < dev->addr_len) - goto out_put; + if (po->sk.sk_socket->type == SOCK_DGRAM) { + if (dev && msg->msg_namelen < dev->addr_len + + offsetof(struct sockaddr_ll, sll_addr)) + goto out_put; + addr = saddr->sll_addr; + } } err = -ENXIO; @@ -2691,12 +2705,21 @@ static int tpacket_snd(struct packet_sock *po, struct msghdr *msg) if ((size_max > dev->mtu + reserve + VLAN_HLEN) && !po->has_vnet_hdr) size_max = dev->mtu + reserve + VLAN_HLEN; + reinit_completion(&po->skb_completion); + do { ph = packet_current_frame(po, &po->tx_ring, TP_STATUS_SEND_REQUEST); if (unlikely(ph == NULL)) { - if (need_wait && need_resched()) - schedule(); + if (need_wait && skb) { + timeo = sock_sndtimeo(&po->sk, msg->msg_flags & MSG_DONTWAIT); + timeo = wait_for_completion_interruptible_timeout(&po->skb_completion, timeo); + if (timeo <= 0) { + err = !timeo ? -ETIMEDOUT : -ERESTARTSYS; + goto out_put; + } + } + /* check for additional frames */ continue; } @@ -2838,7 +2861,7 @@ static int packet_snd(struct socket *sock, struct msghdr *msg, size_t len) struct sk_buff *skb; struct net_device *dev; __be16 proto; - unsigned char *addr; + unsigned char *addr = NULL; int err, reserve = 0; struct sockcm_cookie sockc; struct virtio_net_hdr vnet_hdr = { 0 }; @@ -2855,7 +2878,6 @@ static int packet_snd(struct socket *sock, struct msghdr *msg, size_t len) if (likely(saddr == NULL)) { dev = packet_cached_dev_get(po); proto = po->num; - addr = NULL; } else { err = -EINVAL; if (msg->msg_namelen < sizeof(struct sockaddr_ll)) @@ -2863,10 +2885,13 @@ static int packet_snd(struct socket *sock, struct msghdr *msg, size_t len) if (msg->msg_namelen < (saddr->sll_halen + offsetof(struct sockaddr_ll, sll_addr))) goto out; proto = saddr->sll_protocol; - addr = saddr->sll_halen ? saddr->sll_addr : NULL; dev = dev_get_by_index(sock_net(sk), saddr->sll_ifindex); - if (addr && dev && saddr->sll_halen < dev->addr_len) - goto out_unlock; + if (sock->type == SOCK_DGRAM) { + if (dev && msg->msg_namelen < dev->addr_len + + offsetof(struct sockaddr_ll, sll_addr)) + goto out_unlock; + addr = saddr->sll_addr; + } } err = -ENXIO; @@ -3248,6 +3273,7 @@ static int packet_create(struct net *net, struct socket *sock, int protocol, sock_init_data(sock, sk); po = pkt_sk(sk); + init_completion(&po->skb_completion); sk->sk_family = PF_PACKET; po->num = proto; po->xmit = dev_queue_xmit; @@ -3281,7 +3307,7 @@ static int packet_create(struct net *net, struct socket *sock, int protocol, } mutex_lock(&net->packet.sklist_lock); - sk_add_node_rcu(sk, &net->packet.sklist); + sk_add_node_tail_rcu(sk, &net->packet.sklist); mutex_unlock(&net->packet.sklist_lock); preempt_disable(); @@ -4232,7 +4258,7 @@ static struct pgv *alloc_pg_vec(struct tpacket_req *req, int order) struct pgv *pg_vec; int i; - pg_vec = kcalloc(block_nr, sizeof(struct pgv), GFP_KERNEL); + pg_vec = kcalloc(block_nr, sizeof(struct pgv), GFP_KERNEL | __GFP_NOWARN); if (unlikely(!pg_vec)) goto out; @@ -4313,7 +4339,7 @@ static int packet_set_ring(struct sock *sk, union tpacket_req_u *req_u, rb->frames_per_block = req->tp_block_size / req->tp_frame_size; if (unlikely(rb->frames_per_block == 0)) goto out; - if (unlikely(req->tp_block_size > UINT_MAX / req->tp_block_nr)) + if (unlikely(rb->frames_per_block > UINT_MAX / req->tp_block_nr)) goto out; if (unlikely((rb->frames_per_block * req->tp_block_nr) != req->tp_frame_nr)) @@ -4336,7 +4362,7 @@ static int packet_set_ring(struct sock *sk, union tpacket_req_u *req_u, req3->tp_sizeof_priv || req3->tp_feature_req_word) { err = -EINVAL; - goto out; + goto out_free_pg_vec; } } break; @@ -4400,6 +4426,7 @@ static int packet_set_ring(struct sock *sk, union tpacket_req_u *req_u, prb_shutdown_retire_blk_timer(po, rb_queue); } +out_free_pg_vec: if (pg_vec) free_pg_vec(pg_vec, order, req->tp_block_nr); out: @@ -4625,14 +4652,29 @@ static void __exit packet_exit(void) static int __init packet_init(void) { - int rc = proto_register(&packet_proto, 0); + int rc; - if (rc != 0) + rc = proto_register(&packet_proto, 0); + if (rc) goto out; + rc = sock_register(&packet_family_ops); + if (rc) + goto out_proto; + rc = register_pernet_subsys(&packet_net_ops); + if (rc) + goto out_sock; + rc = register_netdevice_notifier(&packet_netdev_notifier); + if (rc) + goto out_pernet; - sock_register(&packet_family_ops); - register_pernet_subsys(&packet_net_ops); - register_netdevice_notifier(&packet_netdev_notifier); + return 0; + +out_pernet: + unregister_pernet_subsys(&packet_net_ops); +out_sock: + sock_unregister(PF_PACKET); +out_proto: + proto_unregister(&packet_proto); out: return rc; } diff --git a/net/packet/internal.h b/net/packet/internal.h index 3bb7c5fb3bff..c70a2794456f 100644 --- a/net/packet/internal.h +++ b/net/packet/internal.h @@ -128,6 +128,7 @@ struct packet_sock { unsigned int tp_hdrlen; unsigned int tp_reserve; unsigned int tp_tstamp; + struct completion skb_completion; struct net_device __rcu *cached_dev; int (*xmit)(struct sk_buff *skb); struct packet_type prot_hook ____cacheline_aligned_in_smp; diff --git a/net/phonet/pep.c b/net/phonet/pep.c index e81537991ddf..bffcef58ebf5 100644 --- a/net/phonet/pep.c +++ b/net/phonet/pep.c @@ -132,7 +132,7 @@ static int pep_indicate(struct sock *sk, u8 id, u8 code, ph->utid = 0; ph->message_id = id; ph->pipe_handle = pn->pipe_handle; - ph->data[0] = code; + ph->error_code = code; return pn_skb_send(sk, skb, NULL); } @@ -153,7 +153,7 @@ static int pipe_handler_request(struct sock *sk, u8 id, u8 code, ph->utid = id; /* whatever */ ph->message_id = id; ph->pipe_handle = pn->pipe_handle; - ph->data[0] = code; + ph->error_code = code; return pn_skb_send(sk, skb, NULL); } @@ -208,7 +208,7 @@ static int pep_ctrlreq_error(struct sock *sk, struct sk_buff *oskb, u8 code, struct pnpipehdr *ph; struct sockaddr_pn dst; u8 data[4] = { - oph->data[0], /* PEP type */ + oph->pep_type, /* PEP type */ code, /* error code, at an unusual offset */ PAD, PAD, }; @@ -221,7 +221,7 @@ static int pep_ctrlreq_error(struct sock *sk, struct sk_buff *oskb, u8 code, ph->utid = oph->utid; ph->message_id = PNS_PEP_CTRL_RESP; ph->pipe_handle = oph->pipe_handle; - ph->data[0] = oph->data[1]; /* CTRL id */ + ph->data0 = oph->data[0]; /* CTRL id */ pn_skb_get_src_sockaddr(oskb, &dst); return pn_skb_send(sk, skb, &dst); @@ -272,17 +272,17 @@ static int pipe_rcv_status(struct sock *sk, struct sk_buff *skb) return -EINVAL; hdr = pnp_hdr(skb); - if (hdr->data[0] != PN_PEP_TYPE_COMMON) { + if (hdr->pep_type != PN_PEP_TYPE_COMMON) { net_dbg_ratelimited("Phonet unknown PEP type: %u\n", - (unsigned int)hdr->data[0]); + (unsigned int)hdr->pep_type); return -EOPNOTSUPP; } - switch (hdr->data[1]) { + switch (hdr->data[0]) { case PN_PEP_IND_FLOW_CONTROL: switch (pn->tx_fc) { case PN_LEGACY_FLOW_CONTROL: - switch (hdr->data[4]) { + switch (hdr->data[3]) { case PEP_IND_BUSY: atomic_set(&pn->tx_credits, 0); break; @@ -292,7 +292,7 @@ static int pipe_rcv_status(struct sock *sk, struct sk_buff *skb) } break; case PN_ONE_CREDIT_FLOW_CONTROL: - if (hdr->data[4] == PEP_IND_READY) + if (hdr->data[3] == PEP_IND_READY) atomic_set(&pn->tx_credits, wake = 1); break; } @@ -301,12 +301,12 @@ static int pipe_rcv_status(struct sock *sk, struct sk_buff *skb) case PN_PEP_IND_ID_MCFC_GRANT_CREDITS: if (pn->tx_fc != PN_MULTI_CREDIT_FLOW_CONTROL) break; - atomic_add(wake = hdr->data[4], &pn->tx_credits); + atomic_add(wake = hdr->data[3], &pn->tx_credits); break; default: net_dbg_ratelimited("Phonet unknown PEP indication: %u\n", - (unsigned int)hdr->data[1]); + (unsigned int)hdr->data[0]); return -EOPNOTSUPP; } if (wake) @@ -318,7 +318,7 @@ static int pipe_rcv_created(struct sock *sk, struct sk_buff *skb) { struct pep_sock *pn = pep_sk(sk); struct pnpipehdr *hdr = pnp_hdr(skb); - u8 n_sb = hdr->data[0]; + u8 n_sb = hdr->data0; pn->rx_fc = pn->tx_fc = PN_LEGACY_FLOW_CONTROL; __skb_pull(skb, sizeof(*hdr)); @@ -506,7 +506,7 @@ static int pep_connresp_rcv(struct sock *sk, struct sk_buff *skb) return -ECONNREFUSED; /* Parse sub-blocks */ - n_sb = hdr->data[4]; + n_sb = hdr->data[3]; while (n_sb > 0) { u8 type, buf[6], len = sizeof(buf); const u8 *data = pep_get_sb(skb, &type, &len, buf); @@ -739,7 +739,7 @@ static int pipe_do_remove(struct sock *sk) ph->utid = 0; ph->message_id = PNS_PIPE_REMOVE_REQ; ph->pipe_handle = pn->pipe_handle; - ph->data[0] = PAD; + ph->data0 = PAD; return pn_skb_send(sk, skb, NULL); } @@ -817,7 +817,7 @@ static struct sock *pep_sock_accept(struct sock *sk, int flags, int *errp, peer_type = hdr->other_pep_type << 8; /* Parse sub-blocks (options) */ - n_sb = hdr->data[4]; + n_sb = hdr->data[3]; while (n_sb > 0) { u8 type, buf[1], len = sizeof(buf); const u8 *data = pep_get_sb(skb, &type, &len, buf); @@ -1109,7 +1109,7 @@ static int pipe_skb_send(struct sock *sk, struct sk_buff *skb) ph->utid = 0; if (pn->aligned) { ph->message_id = PNS_PIPE_ALIGNED_DATA; - ph->data[0] = 0; /* padding */ + ph->data0 = 0; /* padding */ } else ph->message_id = PNS_PIPE_DATA; ph->pipe_handle = pn->pipe_handle; diff --git a/net/phonet/socket.c b/net/phonet/socket.c index 1b050dd17393..a1df36f3bb6e 100644 --- a/net/phonet/socket.c +++ b/net/phonet/socket.c @@ -352,9 +352,9 @@ static unsigned int pn_socket_poll(struct file *file, struct socket *sock, if (sk->sk_state == TCP_CLOSE) return POLLERR; - if (!skb_queue_empty(&sk->sk_receive_queue)) + if (!skb_queue_empty_lockless(&sk->sk_receive_queue)) mask |= POLLIN | POLLRDNORM; - if (!skb_queue_empty(&pn->ctrlreq_queue)) + if (!skb_queue_empty_lockless(&pn->ctrlreq_queue)) mask |= POLLPRI; if (!mask && sk->sk_state == TCP_CLOSE_WAIT) return POLLHUP; diff --git a/net/psample/psample.c b/net/psample/psample.c index 64f95624f219..30e8239bd774 100644 --- a/net/psample/psample.c +++ b/net/psample/psample.c @@ -156,7 +156,7 @@ static void psample_group_destroy(struct psample_group *group) { psample_group_notify(group, PSAMPLE_CMD_DEL_GROUP); list_del(&group->list); - kfree(group); + kfree_rcu(group, rcu); } static struct psample_group * @@ -223,7 +223,7 @@ void psample_sample_packet(struct psample_group *group, struct sk_buff *skb, data_len = PSAMPLE_MAX_PACKET_SIZE - meta_len - NLA_HDRLEN - NLA_ALIGNTO; - nl_skb = genlmsg_new(meta_len + data_len, GFP_ATOMIC); + nl_skb = genlmsg_new(meta_len + nla_total_size(data_len), GFP_ATOMIC); if (unlikely(!nl_skb)) return; diff --git a/net/qrtr/qrtr.c b/net/qrtr/qrtr.c index 084adea6a818..8d9a244f4534 100644 --- a/net/qrtr/qrtr.c +++ b/net/qrtr/qrtr.c @@ -129,6 +129,7 @@ static void __qrtr_node_release(struct kref *kref) list_del(&node->item); mutex_unlock(&qrtr_node_lock); + cancel_work_sync(&node->work); skb_queue_purge(&node->rx_queue); kfree(node); } diff --git a/net/rds/bind.c b/net/rds/bind.c index 48257d3a4201..4f1427c3452d 100644 --- a/net/rds/bind.c +++ b/net/rds/bind.c @@ -62,10 +62,10 @@ struct rds_sock *rds_find_bound(__be32 addr, __be16 port) rcu_read_lock(); rs = rhashtable_lookup(&bind_hash_table, &key, ht_parms); - if (rs && !sock_flag(rds_rs_to_sk(rs), SOCK_DEAD)) - rds_sock_addref(rs); - else + if (rs && (sock_flag(rds_rs_to_sk(rs), SOCK_DEAD) || + !refcount_inc_not_zero(&rds_rs_to_sk(rs)->sk_refcnt))) rs = NULL; + rcu_read_unlock(); rdsdebug("returning rs %p for %pI4:%u\n", rs, &addr, diff --git a/net/rds/ib.c b/net/rds/ib.c index c21eb4850b9d..e723146cec29 100644 --- a/net/rds/ib.c +++ b/net/rds/ib.c @@ -140,6 +140,9 @@ static void rds_ib_add_one(struct ib_device *device) refcount_set(&rds_ibdev->refcount, 1); INIT_WORK(&rds_ibdev->free_work, rds_ib_dev_free); + INIT_LIST_HEAD(&rds_ibdev->ipaddr_list); + INIT_LIST_HEAD(&rds_ibdev->conn_list); + rds_ibdev->max_wrs = device->attrs.max_qp_wr; rds_ibdev->max_sge = min(device->attrs.max_sge, RDS_IB_MAX_SGE); @@ -199,9 +202,6 @@ static void rds_ib_add_one(struct ib_device *device) device->name, rds_ibdev->use_fastreg ? "FRMR" : "FMR"); - INIT_LIST_HEAD(&rds_ibdev->ipaddr_list); - INIT_LIST_HEAD(&rds_ibdev->conn_list); - down_write(&rds_ib_devices_lock); list_add_tail_rcu(&rds_ibdev->list, &rds_ib_devices); up_write(&rds_ib_devices_lock); diff --git a/net/rds/ib_fmr.c b/net/rds/ib_fmr.c index 86ef907067bb..353b59d3bd44 100644 --- a/net/rds/ib_fmr.c +++ b/net/rds/ib_fmr.c @@ -44,6 +44,17 @@ struct rds_ib_mr *rds_ib_alloc_fmr(struct rds_ib_device *rds_ibdev, int npages) else pool = rds_ibdev->mr_1m_pool; + if (atomic_read(&pool->dirty_count) >= pool->max_items / 10) + queue_delayed_work(rds_ib_mr_wq, &pool->flush_worker, 10); + + /* Switch pools if one of the pool is reaching upper limit */ + if (atomic_read(&pool->dirty_count) >= pool->max_items * 9 / 10) { + if (pool->pool_type == RDS_IB_MR_8K_POOL) + pool = rds_ibdev->mr_1m_pool; + else + pool = rds_ibdev->mr_8k_pool; + } + ibmr = rds_ib_try_reuse_ibmr(pool); if (ibmr) return ibmr; diff --git a/net/rds/ib_rdma.c b/net/rds/ib_rdma.c index 9a3c54e659e9..2c78266ea205 100644 --- a/net/rds/ib_rdma.c +++ b/net/rds/ib_rdma.c @@ -416,12 +416,14 @@ int rds_ib_flush_mr_pool(struct rds_ib_mr_pool *pool, wait_clean_list_grace(); list_to_llist_nodes(pool, &unmap_list, &clean_nodes, &clean_tail); - if (ibmr_ret) + if (ibmr_ret) { *ibmr_ret = llist_entry(clean_nodes, struct rds_ib_mr, llnode); - + clean_nodes = clean_nodes->next; + } /* more than one entry in llist nodes */ - if (clean_nodes->next) - llist_add_batch(clean_nodes->next, clean_tail, &pool->clean_list); + if (clean_nodes) + llist_add_batch(clean_nodes, clean_tail, + &pool->clean_list); } @@ -442,9 +444,6 @@ struct rds_ib_mr *rds_ib_try_reuse_ibmr(struct rds_ib_mr_pool *pool) struct rds_ib_mr *ibmr = NULL; int iter = 0; - if (atomic_read(&pool->dirty_count) >= pool->max_items_soft / 10) - queue_delayed_work(rds_ib_mr_wq, &pool->flush_worker, 10); - while (1) { ibmr = rds_ib_reuse_mr(pool); if (ibmr) diff --git a/net/rfkill/core.c b/net/rfkill/core.c index 2064c3a35ef8..e31b4288f32c 100644 --- a/net/rfkill/core.c +++ b/net/rfkill/core.c @@ -998,10 +998,13 @@ static void rfkill_sync_work(struct work_struct *work) int __must_check rfkill_register(struct rfkill *rfkill) { static unsigned long rfkill_no; - struct device *dev = &rfkill->dev; + struct device *dev; int error; - BUG_ON(!rfkill); + if (!rfkill) + return -EINVAL; + + dev = &rfkill->dev; mutex_lock(&rfkill_global_mutex); @@ -1312,10 +1315,12 @@ static const struct file_operations rfkill_fops = { .llseek = no_llseek, }; +#define RFKILL_NAME "rfkill" + static struct miscdevice rfkill_miscdev = { - .name = "rfkill", .fops = &rfkill_fops, - .minor = MISC_DYNAMIC_MINOR, + .name = RFKILL_NAME, + .minor = RFKILL_MINOR, }; static int __init rfkill_init(void) @@ -1367,3 +1372,6 @@ static void __exit rfkill_exit(void) class_unregister(&rfkill_class); } module_exit(rfkill_exit); + +MODULE_ALIAS_MISCDEV(RFKILL_MINOR); +MODULE_ALIAS("devname:" RFKILL_NAME); diff --git a/net/rose/af_rose.c b/net/rose/af_rose.c index 4a9729257023..6a5c4992cf61 100644 --- a/net/rose/af_rose.c +++ b/net/rose/af_rose.c @@ -318,9 +318,11 @@ void rose_destroy_socket(struct sock *); /* * Handler for deferred kills. */ -static void rose_destroy_timer(unsigned long data) +static void rose_destroy_timer(struct timer_list *t) { - rose_destroy_socket((struct sock *)data); + struct sock *sk = from_timer(sk, t, sk_timer); + + rose_destroy_socket(sk); } /* @@ -353,8 +355,7 @@ void rose_destroy_socket(struct sock *sk) if (sk_has_allocations(sk)) { /* Defer: outstanding buffers */ - setup_timer(&sk->sk_timer, rose_destroy_timer, - (unsigned long)sk); + timer_setup(&sk->sk_timer, rose_destroy_timer, 0); sk->sk_timer.expires = jiffies + 10 * HZ; add_timer(&sk->sk_timer); } else @@ -538,8 +539,8 @@ static int rose_create(struct net *net, struct socket *sock, int protocol, sock->ops = &rose_proto_ops; sk->sk_protocol = protocol; - init_timer(&rose->timer); - init_timer(&rose->idletimer); + timer_setup(&rose->timer, NULL, 0); + timer_setup(&rose->idletimer, NULL, 0); rose->t1 = msecs_to_jiffies(sysctl_rose_call_request_timeout); rose->t2 = msecs_to_jiffies(sysctl_rose_reset_request_timeout); @@ -582,8 +583,8 @@ static struct sock *rose_make_new(struct sock *osk) sk->sk_state = TCP_ESTABLISHED; sock_copy_flags(sk, osk); - init_timer(&rose->timer); - init_timer(&rose->idletimer); + timer_setup(&rose->timer, NULL, 0); + timer_setup(&rose->idletimer, NULL, 0); orose = rose_sk(osk); rose->t1 = orose->t1; diff --git a/net/rose/rose_link.c b/net/rose/rose_link.c index c76638cc2cd5..cda4c6678ef1 100644 --- a/net/rose/rose_link.c +++ b/net/rose/rose_link.c @@ -27,8 +27,8 @@ #include <linux/interrupt.h> #include <net/rose.h> -static void rose_ftimer_expiry(unsigned long); -static void rose_t0timer_expiry(unsigned long); +static void rose_ftimer_expiry(struct timer_list *); +static void rose_t0timer_expiry(struct timer_list *); static void rose_transmit_restart_confirmation(struct rose_neigh *neigh); static void rose_transmit_restart_request(struct rose_neigh *neigh); @@ -37,8 +37,7 @@ void rose_start_ftimer(struct rose_neigh *neigh) { del_timer(&neigh->ftimer); - neigh->ftimer.data = (unsigned long)neigh; - neigh->ftimer.function = &rose_ftimer_expiry; + neigh->ftimer.function = (TIMER_FUNC_TYPE)rose_ftimer_expiry; neigh->ftimer.expires = jiffies + msecs_to_jiffies(sysctl_rose_link_fail_timeout); @@ -49,8 +48,7 @@ static void rose_start_t0timer(struct rose_neigh *neigh) { del_timer(&neigh->t0timer); - neigh->t0timer.data = (unsigned long)neigh; - neigh->t0timer.function = &rose_t0timer_expiry; + neigh->t0timer.function = (TIMER_FUNC_TYPE)rose_t0timer_expiry; neigh->t0timer.expires = jiffies + msecs_to_jiffies(sysctl_rose_restart_request_timeout); @@ -77,13 +75,13 @@ static int rose_t0timer_running(struct rose_neigh *neigh) return timer_pending(&neigh->t0timer); } -static void rose_ftimer_expiry(unsigned long param) +static void rose_ftimer_expiry(struct timer_list *t) { } -static void rose_t0timer_expiry(unsigned long param) +static void rose_t0timer_expiry(struct timer_list *t) { - struct rose_neigh *neigh = (struct rose_neigh *)param; + struct rose_neigh *neigh = from_timer(neigh, t, t0timer); rose_transmit_restart_request(neigh); diff --git a/net/rose/rose_loopback.c b/net/rose/rose_loopback.c index 344456206b70..094a6621f8e8 100644 --- a/net/rose/rose_loopback.c +++ b/net/rose/rose_loopback.c @@ -16,15 +16,17 @@ #include <linux/init.h> static struct sk_buff_head loopback_queue; +#define ROSE_LOOPBACK_LIMIT 1000 static struct timer_list loopback_timer; static void rose_set_loopback_timer(void); +static void rose_loopback_timer(struct timer_list *unused); void rose_loopback_init(void) { skb_queue_head_init(&loopback_queue); - init_timer(&loopback_timer); + timer_setup(&loopback_timer, rose_loopback_timer, 0); } static int rose_loopback_running(void) @@ -34,36 +36,30 @@ static int rose_loopback_running(void) int rose_loopback_queue(struct sk_buff *skb, struct rose_neigh *neigh) { - struct sk_buff *skbn; + struct sk_buff *skbn = NULL; - skbn = skb_clone(skb, GFP_ATOMIC); + if (skb_queue_len(&loopback_queue) < ROSE_LOOPBACK_LIMIT) + skbn = skb_clone(skb, GFP_ATOMIC); - kfree_skb(skb); - - if (skbn != NULL) { + if (skbn) { + consume_skb(skb); skb_queue_tail(&loopback_queue, skbn); if (!rose_loopback_running()) rose_set_loopback_timer(); + } else { + kfree_skb(skb); } return 1; } -static void rose_loopback_timer(unsigned long); - static void rose_set_loopback_timer(void) { - del_timer(&loopback_timer); - - loopback_timer.data = 0; - loopback_timer.function = &rose_loopback_timer; - loopback_timer.expires = jiffies + 10; - - add_timer(&loopback_timer); + mod_timer(&loopback_timer, jiffies + 10); } -static void rose_loopback_timer(unsigned long param) +static void rose_loopback_timer(struct timer_list *unused) { struct sk_buff *skb; struct net_device *dev; @@ -71,8 +67,12 @@ static void rose_loopback_timer(unsigned long param) struct sock *sk; unsigned short frametype; unsigned int lci_i, lci_o; + int count; - while ((skb = skb_dequeue(&loopback_queue)) != NULL) { + for (count = 0; count < ROSE_LOOPBACK_LIMIT; count++) { + skb = skb_dequeue(&loopback_queue); + if (!skb) + return; if (skb->len < ROSE_MIN_LEN) { kfree_skb(skb); continue; @@ -109,6 +109,8 @@ static void rose_loopback_timer(unsigned long param) kfree_skb(skb); } } + if (!skb_queue_empty(&loopback_queue)) + mod_timer(&loopback_timer, jiffies + 1); } void __exit rose_loopback_clear(void) diff --git a/net/rose/rose_route.c b/net/rose/rose_route.c index 2741abec7ee7..d94d6110bb1c 100644 --- a/net/rose/rose_route.c +++ b/net/rose/rose_route.c @@ -104,8 +104,8 @@ static int __must_check rose_add_node(struct rose_route_struct *rose_route, skb_queue_head_init(&rose_neigh->queue); - init_timer(&rose_neigh->ftimer); - init_timer(&rose_neigh->t0timer); + timer_setup(&rose_neigh->ftimer, NULL, 0); + timer_setup(&rose_neigh->t0timer, NULL, 0); if (rose_route->ndigis != 0) { rose_neigh->digipeat = @@ -390,8 +390,8 @@ void rose_add_loopback_neigh(void) skb_queue_head_init(&sn->queue); - init_timer(&sn->ftimer); - init_timer(&sn->t0timer); + timer_setup(&sn->ftimer, NULL, 0); + timer_setup(&sn->t0timer, NULL, 0); spin_lock_bh(&rose_neigh_list_lock); sn->next = rose_neigh_list; diff --git a/net/rose/rose_subr.c b/net/rose/rose_subr.c index 7ca57741b2fb..7849f286bb93 100644 --- a/net/rose/rose_subr.c +++ b/net/rose/rose_subr.c @@ -105,16 +105,17 @@ void rose_write_internal(struct sock *sk, int frametype) struct sk_buff *skb; unsigned char *dptr; unsigned char lci1, lci2; - char buffer[100]; - int len, faclen = 0; + int maxfaclen = 0; + int len, faclen; + int reserve; - len = AX25_BPQ_HEADER_LEN + AX25_MAX_HEADER_LEN + ROSE_MIN_LEN + 1; + reserve = AX25_BPQ_HEADER_LEN + AX25_MAX_HEADER_LEN + 1; + len = ROSE_MIN_LEN; switch (frametype) { case ROSE_CALL_REQUEST: len += 1 + ROSE_ADDR_LEN + ROSE_ADDR_LEN; - faclen = rose_create_facilities(buffer, rose); - len += faclen; + maxfaclen = 256; break; case ROSE_CALL_ACCEPTED: case ROSE_CLEAR_REQUEST: @@ -123,15 +124,16 @@ void rose_write_internal(struct sock *sk, int frametype) break; } - if ((skb = alloc_skb(len, GFP_ATOMIC)) == NULL) + skb = alloc_skb(reserve + len + maxfaclen, GFP_ATOMIC); + if (!skb) return; /* * Space for AX.25 header and PID. */ - skb_reserve(skb, AX25_BPQ_HEADER_LEN + AX25_MAX_HEADER_LEN + 1); + skb_reserve(skb, reserve); - dptr = skb_put(skb, skb_tailroom(skb)); + dptr = skb_put(skb, len); lci1 = (rose->lci >> 8) & 0x0F; lci2 = (rose->lci >> 0) & 0xFF; @@ -146,7 +148,8 @@ void rose_write_internal(struct sock *sk, int frametype) dptr += ROSE_ADDR_LEN; memcpy(dptr, &rose->source_addr, ROSE_ADDR_LEN); dptr += ROSE_ADDR_LEN; - memcpy(dptr, buffer, faclen); + faclen = rose_create_facilities(dptr, rose); + skb_put(skb, faclen); dptr += faclen; break; diff --git a/net/rose/rose_timer.c b/net/rose/rose_timer.c index bc5469d6d9cb..3b89d66f15bb 100644 --- a/net/rose/rose_timer.c +++ b/net/rose/rose_timer.c @@ -29,8 +29,8 @@ #include <net/rose.h> static void rose_heartbeat_expiry(unsigned long); -static void rose_timer_expiry(unsigned long); -static void rose_idletimer_expiry(unsigned long); +static void rose_timer_expiry(struct timer_list *); +static void rose_idletimer_expiry(struct timer_list *); void rose_start_heartbeat(struct sock *sk) { @@ -49,8 +49,7 @@ void rose_start_t1timer(struct sock *sk) del_timer(&rose->timer); - rose->timer.data = (unsigned long)sk; - rose->timer.function = &rose_timer_expiry; + rose->timer.function = (TIMER_FUNC_TYPE)rose_timer_expiry; rose->timer.expires = jiffies + rose->t1; add_timer(&rose->timer); @@ -62,8 +61,7 @@ void rose_start_t2timer(struct sock *sk) del_timer(&rose->timer); - rose->timer.data = (unsigned long)sk; - rose->timer.function = &rose_timer_expiry; + rose->timer.function = (TIMER_FUNC_TYPE)rose_timer_expiry; rose->timer.expires = jiffies + rose->t2; add_timer(&rose->timer); @@ -75,8 +73,7 @@ void rose_start_t3timer(struct sock *sk) del_timer(&rose->timer); - rose->timer.data = (unsigned long)sk; - rose->timer.function = &rose_timer_expiry; + rose->timer.function = (TIMER_FUNC_TYPE)rose_timer_expiry; rose->timer.expires = jiffies + rose->t3; add_timer(&rose->timer); @@ -88,8 +85,7 @@ void rose_start_hbtimer(struct sock *sk) del_timer(&rose->timer); - rose->timer.data = (unsigned long)sk; - rose->timer.function = &rose_timer_expiry; + rose->timer.function = (TIMER_FUNC_TYPE)rose_timer_expiry; rose->timer.expires = jiffies + rose->hb; add_timer(&rose->timer); @@ -102,8 +98,7 @@ void rose_start_idletimer(struct sock *sk) del_timer(&rose->idletimer); if (rose->idle > 0) { - rose->idletimer.data = (unsigned long)sk; - rose->idletimer.function = &rose_idletimer_expiry; + rose->idletimer.function = (TIMER_FUNC_TYPE)rose_idletimer_expiry; rose->idletimer.expires = jiffies + rose->idle; add_timer(&rose->idletimer); @@ -163,10 +158,10 @@ static void rose_heartbeat_expiry(unsigned long param) bh_unlock_sock(sk); } -static void rose_timer_expiry(unsigned long param) +static void rose_timer_expiry(struct timer_list *t) { - struct sock *sk = (struct sock *)param; - struct rose_sock *rose = rose_sk(sk); + struct rose_sock *rose = from_timer(rose, t, timer); + struct sock *sk = &rose->sock; bh_lock_sock(sk); switch (rose->state) { @@ -192,9 +187,10 @@ static void rose_timer_expiry(unsigned long param) bh_unlock_sock(sk); } -static void rose_idletimer_expiry(unsigned long param) +static void rose_idletimer_expiry(struct timer_list *t) { - struct sock *sk = (struct sock *)param; + struct rose_sock *rose = from_timer(rose, t, idletimer); + struct sock *sk = &rose->sock; bh_lock_sock(sk); rose_clear_queues(sk); diff --git a/net/rxrpc/af_rxrpc.c b/net/rxrpc/af_rxrpc.c index 2a32f60652d8..296b9efe6641 100644 --- a/net/rxrpc/af_rxrpc.c +++ b/net/rxrpc/af_rxrpc.c @@ -504,6 +504,7 @@ static int rxrpc_sendmsg(struct socket *sock, struct msghdr *m, size_t len) switch (rx->sk.sk_state) { case RXRPC_UNBOUND: + case RXRPC_CLIENT_UNBOUND: rx->srx.srx_family = AF_RXRPC; rx->srx.srx_service = 0; rx->srx.transport_type = SOCK_DGRAM; @@ -528,10 +529,9 @@ static int rxrpc_sendmsg(struct socket *sock, struct msghdr *m, size_t len) } rx->local = local; - rx->sk.sk_state = RXRPC_CLIENT_UNBOUND; + rx->sk.sk_state = RXRPC_CLIENT_BOUND; /* Fall through */ - case RXRPC_CLIENT_UNBOUND: case RXRPC_CLIENT_BOUND: if (!m->msg_name && test_bit(RXRPC_SOCK_CONNECTED, &rx->flags)) { diff --git a/net/rxrpc/call_object.c b/net/rxrpc/call_object.c index 8a5a42e8ec23..ddaa471a2607 100644 --- a/net/rxrpc/call_object.c +++ b/net/rxrpc/call_object.c @@ -684,27 +684,27 @@ void rxrpc_destroy_all_calls(struct rxrpc_net *rxnet) _enter(""); - if (list_empty(&rxnet->calls)) - return; + if (!list_empty(&rxnet->calls)) { + write_lock(&rxnet->call_lock); - write_lock(&rxnet->call_lock); + while (!list_empty(&rxnet->calls)) { + call = list_entry(rxnet->calls.next, + struct rxrpc_call, link); + _debug("Zapping call %p", call); - while (!list_empty(&rxnet->calls)) { - call = list_entry(rxnet->calls.next, struct rxrpc_call, link); - _debug("Zapping call %p", call); + rxrpc_see_call(call); + list_del_init(&call->link); - rxrpc_see_call(call); - list_del_init(&call->link); + pr_err("Call %p still in use (%d,%s,%lx,%lx)!\n", + call, atomic_read(&call->usage), + rxrpc_call_states[call->state], + call->flags, call->events); - pr_err("Call %p still in use (%d,%s,%lx,%lx)!\n", - call, atomic_read(&call->usage), - rxrpc_call_states[call->state], - call->flags, call->events); + write_unlock(&rxnet->call_lock); + cond_resched(); + write_lock(&rxnet->call_lock); + } write_unlock(&rxnet->call_lock); - cond_resched(); - write_lock(&rxnet->call_lock); } - - write_unlock(&rxnet->call_lock); } diff --git a/net/rxrpc/conn_client.c b/net/rxrpc/conn_client.c index 78a154173d90..0aa4bf09fb9c 100644 --- a/net/rxrpc/conn_client.c +++ b/net/rxrpc/conn_client.c @@ -351,7 +351,7 @@ static int rxrpc_get_client_conn(struct rxrpc_call *call, * normally have to take channel_lock but we do this before anyone else * can see the connection. */ - list_add_tail(&call->chan_wait_link, &candidate->waiting_calls); + list_add(&call->chan_wait_link, &candidate->waiting_calls); if (cp->exclusive) { call->conn = candidate; @@ -430,7 +430,7 @@ found_extant_conn: call->conn = conn; call->security_ix = conn->security_ix; call->service_id = conn->service_id; - list_add(&call->chan_wait_link, &conn->waiting_calls); + list_add_tail(&call->chan_wait_link, &conn->waiting_calls); spin_unlock(&conn->channel_lock); _leave(" = 0 [extant %d]", conn->debug_id); return 0; diff --git a/net/rxrpc/peer_event.c b/net/rxrpc/peer_event.c index 7f749505e699..7d73e8ce6660 100644 --- a/net/rxrpc/peer_event.c +++ b/net/rxrpc/peer_event.c @@ -150,6 +150,9 @@ void rxrpc_error_report(struct sock *sk) struct rxrpc_peer *peer; struct sk_buff *skb; + if (unlikely(!local)) + return; + _enter("%p{%d}", sk, local->debug_id); skb = sock_dequeue_err_skb(sk); diff --git a/net/rxrpc/recvmsg.c b/net/rxrpc/recvmsg.c index abcf48026d99..b74cde2fd214 100644 --- a/net/rxrpc/recvmsg.c +++ b/net/rxrpc/recvmsg.c @@ -588,6 +588,7 @@ error_requeue_call: } error_no_call: release_sock(&rx->sk); +error_trace: trace_rxrpc_recvmsg(call, rxrpc_recvmsg_return, 0, 0, 0, ret); return ret; @@ -596,7 +597,7 @@ wait_interrupted: wait_error: finish_wait(sk_sleep(&rx->sk), &wait); call = NULL; - goto error_no_call; + goto error_trace; } /** diff --git a/net/rxrpc/sendmsg.c b/net/rxrpc/sendmsg.c index 016e293681b8..a980b49d7a4f 100644 --- a/net/rxrpc/sendmsg.c +++ b/net/rxrpc/sendmsg.c @@ -586,6 +586,7 @@ int rxrpc_do_sendmsg(struct rxrpc_sock *rx, struct msghdr *msg, size_t len) case RXRPC_CALL_SERVER_PREALLOC: case RXRPC_CALL_SERVER_SECURING: case RXRPC_CALL_SERVER_ACCEPTING: + rxrpc_put_call(call, rxrpc_call_put); ret = -EBUSY; goto error_release_sock; default: diff --git a/net/sched/act_api.c b/net/sched/act_api.c index 4444d7e755e6..8ae0addb7657 100644 --- a/net/sched/act_api.c +++ b/net/sched/act_api.c @@ -1072,10 +1072,16 @@ tcf_add_notify(struct net *net, struct nlmsghdr *n, struct list_head *actions, static int tcf_action_add(struct net *net, struct nlattr *nla, struct nlmsghdr *n, u32 portid, int ovr) { - int ret = 0; + int loop, ret; LIST_HEAD(actions); - ret = tcf_action_init(net, NULL, nla, NULL, NULL, ovr, 0, &actions); + for (loop = 0; loop < 10; loop++) { + ret = tcf_action_init(net, NULL, nla, NULL, NULL, ovr, 0, + &actions); + if (ret != -EAGAIN) + break; + } + if (ret) return ret; @@ -1122,10 +1128,7 @@ static int tc_ctl_action(struct sk_buff *skb, struct nlmsghdr *n, */ if (n->nlmsg_flags & NLM_F_REPLACE) ovr = 1; -replay: ret = tcf_action_add(net, tca[TCA_ACT_TAB], n, portid, ovr); - if (ret == -EAGAIN) - goto replay; break; case RTM_DELACTION: ret = tca_action_gd(net, tca[TCA_ACT_TAB], n, diff --git a/net/sched/act_bpf.c b/net/sched/act_bpf.c index 364a878e51cb..bdc8885c0448 100644 --- a/net/sched/act_bpf.c +++ b/net/sched/act_bpf.c @@ -402,7 +402,7 @@ static __net_init int bpf_init_net(struct net *net) { struct tc_action_net *tn = net_generic(net, bpf_net_id); - return tc_action_net_init(tn, &act_bpf_ops); + return tc_action_net_init(net, tn, &act_bpf_ops); } static void __net_exit bpf_exit_net(struct net *net) diff --git a/net/sched/act_connmark.c b/net/sched/act_connmark.c index 10b7a8855a6c..de0cd73a5a5d 100644 --- a/net/sched/act_connmark.c +++ b/net/sched/act_connmark.c @@ -206,7 +206,7 @@ static __net_init int connmark_init_net(struct net *net) { struct tc_action_net *tn = net_generic(net, connmark_net_id); - return tc_action_net_init(tn, &act_connmark_ops); + return tc_action_net_init(net, tn, &act_connmark_ops); } static void __net_exit connmark_exit_net(struct net *net) diff --git a/net/sched/act_csum.c b/net/sched/act_csum.c index d836f998117b..a449594553d0 100644 --- a/net/sched/act_csum.c +++ b/net/sched/act_csum.c @@ -632,7 +632,7 @@ static __net_init int csum_init_net(struct net *net) { struct tc_action_net *tn = net_generic(net, csum_net_id); - return tc_action_net_init(tn, &act_csum_ops); + return tc_action_net_init(net, tn, &act_csum_ops); } static void __net_exit csum_exit_net(struct net *net) diff --git a/net/sched/act_gact.c b/net/sched/act_gact.c index a0ac42b3ed06..69512d3d0818 100644 --- a/net/sched/act_gact.c +++ b/net/sched/act_gact.c @@ -232,7 +232,7 @@ static __net_init int gact_init_net(struct net *net) { struct tc_action_net *tn = net_generic(net, gact_net_id); - return tc_action_net_init(tn, &act_gact_ops); + return tc_action_net_init(net, tn, &act_gact_ops); } static void __net_exit gact_exit_net(struct net *net) diff --git a/net/sched/act_ife.c b/net/sched/act_ife.c index 31de26c99023..aea8ee40e76b 100644 --- a/net/sched/act_ife.c +++ b/net/sched/act_ife.c @@ -459,6 +459,9 @@ static int tcf_ife_init(struct net *net, struct nlattr *nla, int ret = 0; int err; + if (!nla) + return -EINVAL; + err = nla_parse_nested(tb, TCA_IFE_MAX, nla, ife_policy, NULL); if (err < 0) return err; @@ -834,7 +837,7 @@ static __net_init int ife_init_net(struct net *net) { struct tc_action_net *tn = net_generic(net, ife_net_id); - return tc_action_net_init(tn, &act_ife_ops); + return tc_action_net_init(net, tn, &act_ife_ops); } static void __net_exit ife_exit_net(struct net *net) diff --git a/net/sched/act_ipt.c b/net/sched/act_ipt.c index 18b2fd2ba7d7..a2687dd95a3d 100644 --- a/net/sched/act_ipt.c +++ b/net/sched/act_ipt.c @@ -65,12 +65,13 @@ static int ipt_init_target(struct net *net, struct xt_entry_target *t, return 0; } -static void ipt_destroy_target(struct xt_entry_target *t) +static void ipt_destroy_target(struct xt_entry_target *t, struct net *net) { struct xt_tgdtor_param par = { .target = t->u.kernel.target, .targinfo = t->data, .family = NFPROTO_IPV4, + .net = net, }; if (par.target->destroy != NULL) par.target->destroy(&par); @@ -82,7 +83,7 @@ static void tcf_ipt_release(struct tc_action *a, int bind) struct tcf_ipt *ipt = to_ipt(a); if (ipt->tcfi_t) { - ipt_destroy_target(ipt->tcfi_t); + ipt_destroy_target(ipt->tcfi_t, a->idrinfo->net); kfree(ipt->tcfi_t); } kfree(ipt->tcfi_tname); @@ -172,7 +173,7 @@ static int __tcf_ipt_init(struct net *net, unsigned int id, struct nlattr *nla, spin_lock_bh(&ipt->tcf_lock); if (ret != ACT_P_CREATED) { - ipt_destroy_target(ipt->tcfi_t); + ipt_destroy_target(ipt->tcfi_t, net); kfree(ipt->tcfi_tname); kfree(ipt->tcfi_t); } @@ -337,7 +338,7 @@ static __net_init int ipt_init_net(struct net *net) { struct tc_action_net *tn = net_generic(net, ipt_net_id); - return tc_action_net_init(tn, &act_ipt_ops); + return tc_action_net_init(net, tn, &act_ipt_ops); } static void __net_exit ipt_exit_net(struct net *net) @@ -387,7 +388,7 @@ static __net_init int xt_init_net(struct net *net) { struct tc_action_net *tn = net_generic(net, xt_net_id); - return tc_action_net_init(tn, &act_xt_ops); + return tc_action_net_init(net, tn, &act_xt_ops); } static void __net_exit xt_exit_net(struct net *net) diff --git a/net/sched/act_mirred.c b/net/sched/act_mirred.c index 6ce8de373f83..529bb064c4a4 100644 --- a/net/sched/act_mirred.c +++ b/net/sched/act_mirred.c @@ -343,7 +343,7 @@ static __net_init int mirred_init_net(struct net *net) { struct tc_action_net *tn = net_generic(net, mirred_net_id); - return tc_action_net_init(tn, &act_mirred_ops); + return tc_action_net_init(net, tn, &act_mirred_ops); } static void __net_exit mirred_exit_net(struct net *net) diff --git a/net/sched/act_nat.c b/net/sched/act_nat.c index c365d01b99c8..5a136943af27 100644 --- a/net/sched/act_nat.c +++ b/net/sched/act_nat.c @@ -307,7 +307,7 @@ static __net_init int nat_init_net(struct net *net) { struct tc_action_net *tn = net_generic(net, nat_net_id); - return tc_action_net_init(tn, &act_nat_ops); + return tc_action_net_init(net, tn, &act_nat_ops); } static void __net_exit nat_exit_net(struct net *net) diff --git a/net/sched/act_pedit.c b/net/sched/act_pedit.c index 656b6ada9221..fb0caa500ac8 100644 --- a/net/sched/act_pedit.c +++ b/net/sched/act_pedit.c @@ -46,7 +46,7 @@ static struct tcf_pedit_key_ex *tcf_pedit_keys_ex_parse(struct nlattr *nla, int err = -EINVAL; int rem; - if (!nla || !n) + if (!nla) return NULL; keys_ex = kcalloc(n, sizeof(*k), GFP_KERNEL); @@ -163,6 +163,9 @@ static int tcf_pedit_init(struct net *net, struct nlattr *nla, return -EINVAL; parm = nla_data(pattr); + if (!parm->nkeys) + return -EINVAL; + ksize = parm->nkeys * sizeof(struct tc_pedit_key); if (nla_len(pattr) < sizeof(*parm) + ksize) return -EINVAL; @@ -172,8 +175,6 @@ static int tcf_pedit_init(struct net *net, struct nlattr *nla, return PTR_ERR(keys_ex); if (!tcf_idr_check(tn, parm->index, a, bind)) { - if (!parm->nkeys) - return -EINVAL; ret = tcf_idr_create(tn, parm->index, est, a, &act_pedit_ops, bind, false); if (ret) @@ -458,7 +459,7 @@ static __net_init int pedit_init_net(struct net *net) { struct tc_action_net *tn = net_generic(net, pedit_net_id); - return tc_action_net_init(tn, &act_pedit_ops); + return tc_action_net_init(net, tn, &act_pedit_ops); } static void __net_exit pedit_exit_net(struct net *net) diff --git a/net/sched/act_police.c b/net/sched/act_police.c index c16127109f21..a7fcc591c241 100644 --- a/net/sched/act_police.c +++ b/net/sched/act_police.c @@ -331,7 +331,7 @@ static __net_init int police_init_net(struct net *net) { struct tc_action_net *tn = net_generic(net, police_net_id); - return tc_action_net_init(tn, &act_police_ops); + return tc_action_net_init(net, tn, &act_police_ops); } static void __net_exit police_exit_net(struct net *net) diff --git a/net/sched/act_sample.c b/net/sched/act_sample.c index a859b55d7899..9d92eac01958 100644 --- a/net/sched/act_sample.c +++ b/net/sched/act_sample.c @@ -45,6 +45,7 @@ static int tcf_sample_init(struct net *net, struct nlattr *nla, struct tc_sample *parm; struct tcf_sample *s; bool exists = false; + u32 rate; int ret; if (!nla) @@ -73,10 +74,17 @@ static int tcf_sample_init(struct net *net, struct nlattr *nla, if (!ovr) return -EEXIST; } - s = to_sample(*a); + rate = nla_get_u32(tb[TCA_SAMPLE_RATE]); + if (!rate) { + tcf_idr_release(*a, bind); + return -EINVAL; + } + + s = to_sample(*a); s->tcf_action = parm->action; s->rate = nla_get_u32(tb[TCA_SAMPLE_RATE]); + s->rate = rate; s->psample_group_num = nla_get_u32(tb[TCA_SAMPLE_PSAMPLE_GROUP]); psample_group = psample_group_get(net, s->psample_group_num); if (!psample_group) { @@ -84,13 +92,16 @@ static int tcf_sample_init(struct net *net, struct nlattr *nla, tcf_idr_release(*a, bind); return -ENOMEM; } - RCU_INIT_POINTER(s->psample_group, psample_group); + rcu_swap_protected(s->psample_group, psample_group, + lockdep_is_held(&s->tcf_lock)); if (tb[TCA_SAMPLE_TRUNC_SIZE]) { s->truncate = true; s->trunc_size = nla_get_u32(tb[TCA_SAMPLE_TRUNC_SIZE]); } + if (psample_group) + psample_group_put(psample_group); if (ret == ACT_P_CREATED) tcf_idr_insert(tn, *a); return ret; @@ -121,6 +132,7 @@ static bool tcf_sample_dev_ok_push(struct net_device *dev) case ARPHRD_TUNNEL6: case ARPHRD_SIT: case ARPHRD_IPGRE: + case ARPHRD_IP6GRE: case ARPHRD_VOID: case ARPHRD_NONE: return false; @@ -241,7 +253,7 @@ static __net_init int sample_init_net(struct net *net) { struct tc_action_net *tn = net_generic(net, sample_net_id); - return tc_action_net_init(tn, &act_sample_ops); + return tc_action_net_init(net, tn, &act_sample_ops); } static void __net_exit sample_exit_net(struct net *net) diff --git a/net/sched/act_simple.c b/net/sched/act_simple.c index f3ed63aa4111..86d8b66b9928 100644 --- a/net/sched/act_simple.c +++ b/net/sched/act_simple.c @@ -198,7 +198,7 @@ static __net_init int simp_init_net(struct net *net) { struct tc_action_net *tn = net_generic(net, simp_net_id); - return tc_action_net_init(tn, &act_simp_ops); + return tc_action_net_init(net, tn, &act_simp_ops); } static void __net_exit simp_exit_net(struct net *net) diff --git a/net/sched/act_skbedit.c b/net/sched/act_skbedit.c index 6e749497009e..1a8a49e33320 100644 --- a/net/sched/act_skbedit.c +++ b/net/sched/act_skbedit.c @@ -239,7 +239,7 @@ static __net_init int skbedit_init_net(struct net *net) { struct tc_action_net *tn = net_generic(net, skbedit_net_id); - return tc_action_net_init(tn, &act_skbedit_ops); + return tc_action_net_init(net, tn, &act_skbedit_ops); } static void __net_exit skbedit_exit_net(struct net *net) diff --git a/net/sched/act_skbmod.c b/net/sched/act_skbmod.c index d227599f7e73..20ea9d11821b 100644 --- a/net/sched/act_skbmod.c +++ b/net/sched/act_skbmod.c @@ -267,7 +267,7 @@ static __net_init int skbmod_init_net(struct net *net) { struct tc_action_net *tn = net_generic(net, skbmod_net_id); - return tc_action_net_init(tn, &act_skbmod_ops); + return tc_action_net_init(net, tn, &act_skbmod_ops); } static void __net_exit skbmod_exit_net(struct net *net) diff --git a/net/sched/act_tunnel_key.c b/net/sched/act_tunnel_key.c index cd51f2ed55fa..62e22738022d 100644 --- a/net/sched/act_tunnel_key.c +++ b/net/sched/act_tunnel_key.c @@ -324,7 +324,7 @@ static __net_init int tunnel_key_init_net(struct net *net) { struct tc_action_net *tn = net_generic(net, tunnel_key_net_id); - return tc_action_net_init(tn, &act_tunnel_key_ops); + return tc_action_net_init(net, tn, &act_tunnel_key_ops); } static void __net_exit tunnel_key_exit_net(struct net *net) diff --git a/net/sched/act_vlan.c b/net/sched/act_vlan.c index 5c10a0fce35b..c9a3eeb351fa 100644 --- a/net/sched/act_vlan.c +++ b/net/sched/act_vlan.c @@ -271,7 +271,7 @@ static __net_init int vlan_init_net(struct net *net) { struct tc_action_net *tn = net_generic(net, vlan_net_id); - return tc_action_net_init(tn, &act_vlan_ops); + return tc_action_net_init(net, tn, &act_vlan_ops); } static void __net_exit vlan_exit_net(struct net *net) diff --git a/net/sched/cls_matchall.c b/net/sched/cls_matchall.c index 6499aecfbfc4..d8fd152779c8 100644 --- a/net/sched/cls_matchall.c +++ b/net/sched/cls_matchall.c @@ -125,6 +125,11 @@ static void mall_destroy(struct tcf_proto *tp) static void *mall_get(struct tcf_proto *tp, u32 handle) { + struct cls_mall_head *head = rtnl_dereference(tp->root); + + if (head && head->handle == handle) + return head; + return NULL; } diff --git a/net/sched/cls_tcindex.c b/net/sched/cls_tcindex.c index 52829fdc280b..75c7c7cc7499 100644 --- a/net/sched/cls_tcindex.c +++ b/net/sched/cls_tcindex.c @@ -322,9 +322,9 @@ tcindex_set_parms(struct net *net, struct tcf_proto *tp, unsigned long base, struct nlattr *est, bool ovr) { struct tcindex_filter_result new_filter_result, *old_r = r; - struct tcindex_filter_result cr; struct tcindex_data *cp = NULL, *oldp; struct tcindex_filter *f = NULL; /* make gcc behave */ + struct tcf_result cr = {}; int err, balloc = 0; struct tcf_exts e; @@ -365,11 +365,8 @@ tcindex_set_parms(struct net *net, struct tcf_proto *tp, unsigned long base, err = tcindex_filter_result_init(&new_filter_result); if (err < 0) goto errout1; - err = tcindex_filter_result_init(&cr); - if (err < 0) - goto errout1; if (old_r) - cr.res = r->res; + cr = r->res; if (tb[TCA_TCINDEX_HASH]) cp->hash = nla_get_u32(tb[TCA_TCINDEX_HASH]); @@ -460,8 +457,8 @@ tcindex_set_parms(struct net *net, struct tcf_proto *tp, unsigned long base, } if (tb[TCA_TCINDEX_CLASSID]) { - cr.res.classid = nla_get_u32(tb[TCA_TCINDEX_CLASSID]); - tcf_bind_filter(tp, &cr.res, base); + cr.classid = nla_get_u32(tb[TCA_TCINDEX_CLASSID]); + tcf_bind_filter(tp, &cr, base); } if (old_r && old_r != r) { @@ -473,7 +470,7 @@ tcindex_set_parms(struct net *net, struct tcf_proto *tp, unsigned long base, } oldp = p; - r->res = cr.res; + r->res = cr; tcf_exts_change(&r->exts, &e); rcu_assign_pointer(tp->root, cp); @@ -492,6 +489,8 @@ tcindex_set_parms(struct net *net, struct tcf_proto *tp, unsigned long base, ; /* nothing */ rcu_assign_pointer(*fp, f); + } else { + tcf_exts_destroy(&new_filter_result.exts); } if (oldp) @@ -504,7 +503,6 @@ errout_alloc: else if (balloc == 2) kfree(cp->h); errout1: - tcf_exts_destroy(&cr.exts); tcf_exts_destroy(&new_filter_result.exts); errout: kfree(cp); diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c index 7b4270987ac1..296e95f72eb1 100644 --- a/net/sched/sch_api.c +++ b/net/sched/sch_api.c @@ -1217,7 +1217,8 @@ check_loop_fn(struct Qdisc *q, unsigned long cl, struct qdisc_walker *w) */ const struct nla_policy rtm_tca_policy[TCA_MAX + 1] = { - [TCA_KIND] = { .type = NLA_STRING }, + [TCA_KIND] = { .type = NLA_NUL_STRING, + .len = IFNAMSIZ - 1 }, [TCA_RATE] = { .type = NLA_BINARY, .len = sizeof(struct tc_estimator) }, [TCA_STAB] = { .type = NLA_NESTED }, @@ -1694,6 +1695,8 @@ static void tc_bind_tclass(struct Qdisc *q, u32 portid, u32 clid, cl = cops->find(q, portid); if (!cl) return; + if (!cops->tcf_block) + return; block = cops->tcf_block(q, cl); if (!block) return; diff --git a/net/sched/sch_cbq.c b/net/sched/sch_cbq.c index aeffa320429d..40fd1ee0095c 100644 --- a/net/sched/sch_cbq.c +++ b/net/sched/sch_cbq.c @@ -1131,6 +1131,26 @@ static const struct nla_policy cbq_policy[TCA_CBQ_MAX + 1] = { [TCA_CBQ_POLICE] = { .len = sizeof(struct tc_cbq_police) }, }; +static int cbq_opt_parse(struct nlattr *tb[TCA_CBQ_MAX + 1], struct nlattr *opt) +{ + int err; + + if (!opt) + return -EINVAL; + + err = nla_parse_nested(tb, TCA_CBQ_MAX, opt, cbq_policy, NULL); + if (err < 0) + return err; + + if (tb[TCA_CBQ_WRROPT]) { + const struct tc_cbq_wrropt *wrr = nla_data(tb[TCA_CBQ_WRROPT]); + + if (wrr->priority > TC_CBQ_MAXPRIO) + err = -EINVAL; + } + return err; +} + static int cbq_init(struct Qdisc *sch, struct nlattr *opt) { struct cbq_sched_data *q = qdisc_priv(sch); @@ -1142,10 +1162,7 @@ static int cbq_init(struct Qdisc *sch, struct nlattr *opt) hrtimer_init(&q->delay_timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS_PINNED); q->delay_timer.function = cbq_undelay; - if (!opt) - return -EINVAL; - - err = nla_parse_nested(tb, TCA_CBQ_MAX, opt, cbq_policy, NULL); + err = cbq_opt_parse(tb, opt); if (err < 0) return err; @@ -1459,10 +1476,7 @@ cbq_change_class(struct Qdisc *sch, u32 classid, u32 parentid, struct nlattr **t struct cbq_class *parent; struct qdisc_rate_table *rtab = NULL; - if (opt == NULL) - return -EINVAL; - - err = nla_parse_nested(tb, TCA_CBQ_MAX, opt, cbq_policy, NULL); + err = cbq_opt_parse(tb, opt); if (err < 0) return err; diff --git a/net/sched/sch_codel.c b/net/sched/sch_codel.c index c518a1efcb9d..b22e5cde6059 100644 --- a/net/sched/sch_codel.c +++ b/net/sched/sch_codel.c @@ -71,10 +71,10 @@ static struct sk_buff *dequeue_func(struct codel_vars *vars, void *ctx) struct Qdisc *sch = ctx; struct sk_buff *skb = __qdisc_dequeue_head(&sch->q); - if (skb) + if (skb) { sch->qstats.backlog -= qdisc_pkt_len(skb); - - prefetch(&skb->end); /* we'll need skb_shinfo() */ + prefetch(&skb->end); /* we'll need skb_shinfo() */ + } return skb; } diff --git a/net/sched/sch_dsmark.c b/net/sched/sch_dsmark.c index 2836c80c7aa5..b507a72d5813 100644 --- a/net/sched/sch_dsmark.c +++ b/net/sched/sch_dsmark.c @@ -353,6 +353,8 @@ static int dsmark_init(struct Qdisc *sch, struct nlattr *opt) goto errout; err = -EINVAL; + if (!tb[TCA_DSMARK_INDICES]) + goto errout; indices = nla_get_u16(tb[TCA_DSMARK_INDICES]); if (hweight32(indices) != 1) diff --git a/net/sched/sch_fq.c b/net/sched/sch_fq.c index f50eb87cfe79..7a944f508cae 100644 --- a/net/sched/sch_fq.c +++ b/net/sched/sch_fq.c @@ -734,7 +734,7 @@ static int fq_change(struct Qdisc *sch, struct nlattr *opt) if (tb[TCA_FQ_QUANTUM]) { u32 quantum = nla_get_u32(tb[TCA_FQ_QUANTUM]); - if (quantum > 0) + if (quantum > 0 && quantum <= (1 << 20)) q->quantum = quantum; else err = -EINVAL; diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c index 79549baf5804..21b981abbacb 100644 --- a/net/sched/sch_generic.c +++ b/net/sched/sch_generic.c @@ -703,7 +703,11 @@ static void qdisc_rcu_free(struct rcu_head *head) void qdisc_destroy(struct Qdisc *qdisc) { - const struct Qdisc_ops *ops = qdisc->ops; + const struct Qdisc_ops *ops; + + if (!qdisc) + return; + ops = qdisc->ops; if (qdisc->flags & TCQ_F_BUILTIN || !refcount_dec_and_test(&qdisc->refcnt)) diff --git a/net/sched/sch_hhf.c b/net/sched/sch_hhf.c index 73a53c08091b..c73475c3a464 100644 --- a/net/sched/sch_hhf.c +++ b/net/sched/sch_hhf.c @@ -4,11 +4,11 @@ * Copyright (C) 2013 Nandita Dukkipati <nanditad@google.com> */ -#include <linux/jhash.h> #include <linux/jiffies.h> #include <linux/module.h> #include <linux/skbuff.h> #include <linux/vmalloc.h> +#include <linux/siphash.h> #include <net/pkt_sched.h> #include <net/sock.h> @@ -125,7 +125,7 @@ struct wdrr_bucket { struct hhf_sched_data { struct wdrr_bucket buckets[WDRR_BUCKET_CNT]; - u32 perturbation; /* hash perturbation */ + siphash_key_t perturbation; /* hash perturbation */ u32 quantum; /* psched_mtu(qdisc_dev(sch)); */ u32 drop_overlimit; /* number of times max qdisc packet * limit was hit @@ -263,7 +263,7 @@ static enum wdrr_bucket_idx hhf_classify(struct sk_buff *skb, struct Qdisc *sch) } /* Get hashed flow-id of the skb. */ - hash = skb_get_hash_perturb(skb, q->perturbation); + hash = skb_get_hash_perturb(skb, &q->perturbation); /* Check if this packet belongs to an already established HH flow. */ flow_pos = hash & HHF_BIT_MASK; @@ -528,7 +528,7 @@ static int hhf_change(struct Qdisc *sch, struct nlattr *opt) new_hhf_non_hh_weight = nla_get_u32(tb[TCA_HHF_NON_HH_WEIGHT]); non_hh_quantum = (u64)new_quantum * new_hhf_non_hh_weight; - if (non_hh_quantum > INT_MAX) + if (non_hh_quantum == 0 || non_hh_quantum > INT_MAX) return -EINVAL; sch_tree_lock(sch); @@ -578,7 +578,7 @@ static int hhf_init(struct Qdisc *sch, struct nlattr *opt) sch->limit = 1000; q->quantum = psched_mtu(qdisc_dev(sch)); - q->perturbation = prandom_u32(); + get_random_bytes(&q->perturbation, sizeof(q->perturbation)); INIT_LIST_HEAD(&q->new_buckets); INIT_LIST_HEAD(&q->old_buckets); diff --git a/net/sched/sch_mq.c b/net/sched/sch_mq.c index f3a3e507422b..442ac9c3f16f 100644 --- a/net/sched/sch_mq.c +++ b/net/sched/sch_mq.c @@ -191,7 +191,8 @@ static int mq_dump_class_stats(struct Qdisc *sch, unsigned long cl, struct netdev_queue *dev_queue = mq_queue_get(sch, cl); sch = dev_queue->qdisc_sleeping; - if (gnet_stats_copy_basic(&sch->running, d, NULL, &sch->bstats) < 0 || + if (gnet_stats_copy_basic(&sch->running, d, sch->cpu_bstats, + &sch->bstats) < 0 || gnet_stats_copy_queue(d, NULL, &sch->qstats, sch->q.qlen) < 0) return -1; return 0; diff --git a/net/sched/sch_mqprio.c b/net/sched/sch_mqprio.c index 6bcdfe6e7b63..bb8d3fbc13bb 100644 --- a/net/sched/sch_mqprio.c +++ b/net/sched/sch_mqprio.c @@ -366,8 +366,8 @@ static int mqprio_dump_class_stats(struct Qdisc *sch, unsigned long cl, struct netdev_queue *dev_queue = mqprio_queue_get(sch, cl); sch = dev_queue->qdisc_sleeping; - if (gnet_stats_copy_basic(qdisc_root_sleeping_running(sch), - d, NULL, &sch->bstats) < 0 || + if (gnet_stats_copy_basic(qdisc_root_sleeping_running(sch), d, + sch->cpu_bstats, &sch->bstats) < 0 || gnet_stats_copy_queue(d, NULL, &sch->qstats, sch->q.qlen) < 0) return -1; diff --git a/net/sched/sch_multiq.c b/net/sched/sch_multiq.c index ff4fc3e0facd..65aa03d46857 100644 --- a/net/sched/sch_multiq.c +++ b/net/sched/sch_multiq.c @@ -340,7 +340,7 @@ static int multiq_dump_class_stats(struct Qdisc *sch, unsigned long cl, cl_q = q->queues[cl - 1]; if (gnet_stats_copy_basic(qdisc_root_sleeping_running(sch), - d, NULL, &cl_q->bstats) < 0 || + d, cl_q->cpu_bstats, &cl_q->bstats) < 0 || gnet_stats_copy_queue(d, NULL, &cl_q->qstats, cl_q->q.qlen) < 0) return -1; diff --git a/net/sched/sch_netem.c b/net/sched/sch_netem.c index 3f4f0b946798..6266121a03f9 100644 --- a/net/sched/sch_netem.c +++ b/net/sched/sch_netem.c @@ -435,6 +435,7 @@ static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch, int nb = 0; int count = 1; int rc = NET_XMIT_SUCCESS; + int rc_drop = NET_XMIT_DROP; /* Do not fool qdisc_drop_all() */ skb->prev = NULL; @@ -468,12 +469,13 @@ static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch, * skb will be queued. */ if (count > 1 && (skb2 = skb_clone(skb, GFP_ATOMIC)) != NULL) { - struct Qdisc *rootq = qdisc_root(sch); + struct Qdisc *rootq = qdisc_root_bh(sch); u32 dupsave = q->duplicate; /* prevent duplicating a dup... */ q->duplicate = 0; rootq->enqueue(skb2, rootq, to_free); q->duplicate = dupsave; + rc_drop = NET_XMIT_SUCCESS; } /* @@ -486,7 +488,7 @@ static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch, if (skb_is_gso(skb)) { segs = netem_segment(skb, sch, to_free); if (!segs) - return NET_XMIT_DROP; + return rc_drop; } else { segs = skb; } @@ -509,8 +511,10 @@ static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch, 1<<(prandom_u32() % 8); } - if (unlikely(sch->q.qlen >= sch->limit)) - return qdisc_drop_all(skb, sch, to_free); + if (unlikely(sch->q.qlen >= sch->limit)) { + qdisc_drop_all(skb, sch, to_free); + return rc_drop; + } qdisc_qstats_backlog_inc(sch, skb); @@ -704,7 +708,7 @@ static int get_dist_table(struct Qdisc *sch, const struct nlattr *attr) struct disttable *d; int i; - if (n > NETEM_DIST_MAX) + if (!n || n > NETEM_DIST_MAX) return -EINVAL; d = kvmalloc(sizeof(struct disttable) + n * sizeof(s16), GFP_KERNEL); diff --git a/net/sched/sch_prio.c b/net/sched/sch_prio.c index 2dd6c68ae91e..ff6bc7cf6cbd 100644 --- a/net/sched/sch_prio.c +++ b/net/sched/sch_prio.c @@ -244,8 +244,14 @@ static int prio_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new, struct prio_sched_data *q = qdisc_priv(sch); unsigned long band = arg - 1; - if (new == NULL) - new = &noop_qdisc; + if (!new) { + new = qdisc_create_dflt(sch->dev_queue, &pfifo_qdisc_ops, + TC_H_MAKE(sch->handle, arg)); + if (!new) + new = &noop_qdisc; + else + qdisc_hash_add(new, true); + } *old = qdisc_replace(sch, new, &q->queues[band]); return 0; @@ -298,7 +304,7 @@ static int prio_dump_class_stats(struct Qdisc *sch, unsigned long cl, cl_q = q->queues[cl - 1]; if (gnet_stats_copy_basic(qdisc_root_sleeping_running(sch), - d, NULL, &cl_q->bstats) < 0 || + d, cl_q->cpu_bstats, &cl_q->bstats) < 0 || gnet_stats_copy_queue(d, NULL, &cl_q->qstats, cl_q->q.qlen) < 0) return -1; diff --git a/net/sched/sch_sfb.c b/net/sched/sch_sfb.c index cc39e170b4aa..04f15e0aeaa8 100644 --- a/net/sched/sch_sfb.c +++ b/net/sched/sch_sfb.c @@ -22,7 +22,7 @@ #include <linux/errno.h> #include <linux/skbuff.h> #include <linux/random.h> -#include <linux/jhash.h> +#include <linux/siphash.h> #include <net/ip.h> #include <net/pkt_sched.h> #include <net/pkt_cls.h> @@ -49,7 +49,7 @@ struct sfb_bucket { * (Section 4.4 of SFB reference : moving hash functions) */ struct sfb_bins { - u32 perturbation; /* jhash perturbation */ + siphash_key_t perturbation; /* siphash key */ struct sfb_bucket bins[SFB_LEVELS][SFB_NUMBUCKETS]; }; @@ -221,7 +221,8 @@ static u32 sfb_compute_qlen(u32 *prob_r, u32 *avgpm_r, const struct sfb_sched_da static void sfb_init_perturbation(u32 slot, struct sfb_sched_data *q) { - q->bins[slot].perturbation = prandom_u32(); + get_random_bytes(&q->bins[slot].perturbation, + sizeof(q->bins[slot].perturbation)); } static void sfb_swap_slot(struct sfb_sched_data *q) @@ -317,9 +318,9 @@ static int sfb_enqueue(struct sk_buff *skb, struct Qdisc *sch, /* If using external classifiers, get result and record it. */ if (!sfb_classify(skb, fl, &ret, &salt)) goto other_drop; - sfbhash = jhash_1word(salt, q->bins[slot].perturbation); + sfbhash = siphash_1u32(salt, &q->bins[slot].perturbation); } else { - sfbhash = skb_get_hash_perturb(skb, q->bins[slot].perturbation); + sfbhash = skb_get_hash_perturb(skb, &q->bins[slot].perturbation); } @@ -355,7 +356,7 @@ static int sfb_enqueue(struct sk_buff *skb, struct Qdisc *sch, /* Inelastic flow */ if (q->double_buffering) { sfbhash = skb_get_hash_perturb(skb, - q->bins[slot].perturbation); + &q->bins[slot].perturbation); if (!sfbhash) sfbhash = 1; sfb_skb_cb(skb)->hashes[slot] = sfbhash; diff --git a/net/sched/sch_sfq.c b/net/sched/sch_sfq.c index 3fbf20126045..cbc54ddfe076 100644 --- a/net/sched/sch_sfq.c +++ b/net/sched/sch_sfq.c @@ -18,7 +18,7 @@ #include <linux/errno.h> #include <linux/init.h> #include <linux/skbuff.h> -#include <linux/jhash.h> +#include <linux/siphash.h> #include <linux/slab.h> #include <linux/vmalloc.h> #include <net/netlink.h> @@ -121,7 +121,7 @@ struct sfq_sched_data { u8 headdrop; u8 maxdepth; /* limit of packets per flow */ - u32 perturbation; + siphash_key_t perturbation; u8 cur_depth; /* depth of longest slot */ u8 flags; unsigned short scaled_quantum; /* SFQ_ALLOT_SIZE(quantum) */ @@ -160,7 +160,7 @@ static inline struct sfq_head *sfq_dep_head(struct sfq_sched_data *q, sfq_index static unsigned int sfq_hash(const struct sfq_sched_data *q, const struct sk_buff *skb) { - return skb_get_hash_perturb(skb, q->perturbation) & (q->divisor - 1); + return skb_get_hash_perturb(skb, &q->perturbation) & (q->divisor - 1); } static unsigned int sfq_classify(struct sk_buff *skb, struct Qdisc *sch, @@ -609,9 +609,11 @@ static void sfq_perturbation(unsigned long arg) struct Qdisc *sch = (struct Qdisc *)arg; struct sfq_sched_data *q = qdisc_priv(sch); spinlock_t *root_lock = qdisc_lock(qdisc_root_sleeping(sch)); + siphash_key_t nkey; + get_random_bytes(&nkey, sizeof(nkey)); spin_lock(root_lock); - q->perturbation = prandom_u32(); + q->perturbation = nkey; if (!q->filter_list && q->tail) sfq_rehash(sch); spin_unlock(root_lock); @@ -690,7 +692,7 @@ static int sfq_change(struct Qdisc *sch, struct nlattr *opt) del_timer(&q->perturb_timer); if (q->perturb_period) { mod_timer(&q->perturb_timer, jiffies + q->perturb_period); - q->perturbation = prandom_u32(); + get_random_bytes(&q->perturbation, sizeof(q->perturbation)); } sch_tree_unlock(sch); kfree(p); @@ -746,7 +748,7 @@ static int sfq_init(struct Qdisc *sch, struct nlattr *opt) q->quantum = psched_mtu(qdisc_dev(sch)); q->scaled_quantum = SFQ_ALLOT_SIZE(q->quantum); q->perturb_period = 0; - q->perturbation = prandom_u32(); + get_random_bytes(&q->perturbation, sizeof(q->perturbation)); if (opt) { int err = sfq_change(sch, opt); diff --git a/net/sched/sch_tbf.c b/net/sched/sch_tbf.c index b36ecb58aa6e..107cc76b6e24 100644 --- a/net/sched/sch_tbf.c +++ b/net/sched/sch_tbf.c @@ -142,16 +142,6 @@ static u64 psched_ns_t2l(const struct psched_ratecfg *r, return len; } -/* - * Return length of individual segments of a gso packet, - * including all headers (MAC, IP, TCP/UDP) - */ -static unsigned int skb_gso_mac_seglen(const struct sk_buff *skb) -{ - unsigned int hdr_len = skb_transport_header(skb) - skb_mac_header(skb); - return hdr_len + skb_gso_transport_seglen(skb); -} - /* GSO packet is too big, segment it so that tbf can transmit * each segment in time */ diff --git a/net/sctp/associola.c b/net/sctp/associola.c index 23fec3817e0c..dd1a3bd80be5 100644 --- a/net/sctp/associola.c +++ b/net/sctp/associola.c @@ -80,6 +80,7 @@ static struct sctp_association *sctp_association_init( /* Discarding const is appropriate here. */ asoc->ep = (struct sctp_endpoint *)ep; asoc->base.sk = (struct sock *)sk; + asoc->base.net = sock_net(sk); sctp_endpoint_hold(asoc->ep); sock_hold(asoc->base.sk); diff --git a/net/sctp/endpointola.c b/net/sctp/endpointola.c index ee1e601a0b11..c71b4191df1e 100644 --- a/net/sctp/endpointola.c +++ b/net/sctp/endpointola.c @@ -126,10 +126,6 @@ static struct sctp_endpoint *sctp_endpoint_init(struct sctp_endpoint *ep, /* Initialize the bind addr area */ sctp_bind_addr_init(&ep->base.bind_addr, 0); - /* Remember who we are attached to. */ - ep->base.sk = sk; - sock_hold(ep->base.sk); - /* Create the lists of associations. */ INIT_LIST_HEAD(&ep->asocs); @@ -167,6 +163,11 @@ static struct sctp_endpoint *sctp_endpoint_init(struct sctp_endpoint *ep, ep->prsctp_enable = net->sctp.prsctp_enable; ep->reconf_enable = net->sctp.reconf_enable; + /* Remember who we are attached to. */ + ep->base.sk = sk; + ep->base.net = sock_net(sk); + sock_hold(ep->base.sk); + return ep; nomem_hmacs: diff --git a/net/sctp/input.c b/net/sctp/input.c index 0247cc432e02..3c0affecf272 100644 --- a/net/sctp/input.c +++ b/net/sctp/input.c @@ -813,7 +813,7 @@ static inline int sctp_hash_cmp(struct rhashtable_compare_arg *arg, if (!sctp_transport_hold(t)) return err; - if (!net_eq(sock_net(t->asoc->base.sk), x->net)) + if (!net_eq(t->asoc->base.net, x->net)) goto out; if (x->lport != htons(t->asoc->base.bind_addr.port)) goto out; @@ -828,7 +828,7 @@ static inline __u32 sctp_hash_obj(const void *data, u32 len, u32 seed) { const struct sctp_transport *t = data; const union sctp_addr *paddr = &t->ipaddr; - const struct net *net = sock_net(t->asoc->base.sk); + const struct net *net = t->asoc->base.net; __be16 lport = htons(t->asoc->base.bind_addr.port); __u32 addr; diff --git a/net/sctp/ipv6.c b/net/sctp/ipv6.c index 7eb06fa75730..53a66ee1331f 100644 --- a/net/sctp/ipv6.c +++ b/net/sctp/ipv6.c @@ -974,7 +974,7 @@ static const struct proto_ops inet6_seqpacket_ops = { .owner = THIS_MODULE, .release = inet6_release, .bind = inet6_bind, - .connect = inet_dgram_connect, + .connect = sctp_inet_connect, .socketpair = sock_no_socketpair, .accept = inet_accept, .getname = sctp_getname, diff --git a/net/sctp/offload.c b/net/sctp/offload.c index 35bc7106d182..055e1ab1e963 100644 --- a/net/sctp/offload.c +++ b/net/sctp/offload.c @@ -36,6 +36,7 @@ static __le32 sctp_gso_make_checksum(struct sk_buff *skb) { skb->ip_summed = CHECKSUM_NONE; skb->csum_not_inet = 0; + gso_reset_checksum(skb, ~0); return sctp_compute_cksum(skb, skb_transport_offset(skb)); } diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c index cbb04d66f564..bf39f317953a 100644 --- a/net/sctp/protocol.c +++ b/net/sctp/protocol.c @@ -253,6 +253,7 @@ static void sctp_v4_from_skb(union sctp_addr *addr, struct sk_buff *skb, sa->sin_port = sh->dest; sa->sin_addr.s_addr = ip_hdr(skb)->daddr; } + memset(sa->sin_zero, 0, sizeof(sa->sin_zero)); } /* Initialize an sctp_addr from a socket. */ @@ -261,6 +262,7 @@ static void sctp_v4_from_sk(union sctp_addr *addr, struct sock *sk) addr->v4.sin_family = AF_INET; addr->v4.sin_port = 0; addr->v4.sin_addr.s_addr = inet_sk(sk)->inet_rcv_saddr; + memset(addr->v4.sin_zero, 0, sizeof(addr->v4.sin_zero)); } /* Initialize sk->sk_rcv_saddr from sctp_addr. */ @@ -283,6 +285,7 @@ static void sctp_v4_from_addr_param(union sctp_addr *addr, addr->v4.sin_family = AF_INET; addr->v4.sin_port = port; addr->v4.sin_addr.s_addr = param->v4.addr.s_addr; + memset(addr->v4.sin_zero, 0, sizeof(addr->v4.sin_zero)); } /* Initialize an address parameter from a sctp_addr and return the length @@ -307,6 +310,7 @@ static void sctp_v4_dst_saddr(union sctp_addr *saddr, struct flowi4 *fl4, saddr->v4.sin_family = AF_INET; saddr->v4.sin_port = port; saddr->v4.sin_addr.s_addr = fl4->saddr; + memset(saddr->v4.sin_zero, 0, sizeof(saddr->v4.sin_zero)); } /* Compare two addresses exactly. */ @@ -329,6 +333,7 @@ static void sctp_v4_inaddr_any(union sctp_addr *addr, __be16 port) addr->v4.sin_family = AF_INET; addr->v4.sin_addr.s_addr = htonl(INADDR_ANY); addr->v4.sin_port = port; + memset(addr->v4.sin_zero, 0, sizeof(addr->v4.sin_zero)); } /* Is this a wildcard address? */ @@ -605,6 +610,7 @@ out: static int sctp_v4_addr_to_user(struct sctp_sock *sp, union sctp_addr *addr) { /* No address mapping for V4 sockets */ + memset(addr->v4.sin_zero, 0, sizeof(addr->v4.sin_zero)); return sizeof(struct sockaddr_in); } @@ -1018,7 +1024,7 @@ static const struct proto_ops inet_seqpacket_ops = { .owner = THIS_MODULE, .release = inet_release, /* Needs to be wrapped... */ .bind = inet_bind, - .connect = inet_dgram_connect, + .connect = sctp_inet_connect, .socketpair = sock_no_socketpair, .accept = inet_accept, .getname = inet_getname, /* Semantics are different. */ @@ -1343,7 +1349,7 @@ static int __net_init sctp_ctrlsock_init(struct net *net) return status; } -static void __net_init sctp_ctrlsock_exit(struct net *net) +static void __net_exit sctp_ctrlsock_exit(struct net *net) { /* Free the control endpoint. */ inet_ctl_sock_destroy(net->sctp.ctl_sock); diff --git a/net/sctp/sctp_diag.c b/net/sctp/sctp_diag.c index a72a7d925d46..75274a60b77a 100644 --- a/net/sctp/sctp_diag.c +++ b/net/sctp/sctp_diag.c @@ -225,6 +225,7 @@ static size_t inet_assoc_attr_size(struct sctp_association *asoc) + nla_total_size(1) /* INET_DIAG_TOS */ + nla_total_size(1) /* INET_DIAG_TCLASS */ + nla_total_size(4) /* INET_DIAG_MARK */ + + nla_total_size(4) /* INET_DIAG_CLASS_ID */ + nla_total_size(addrlen * asoc->peer.transport_count) + nla_total_size(addrlen * addrcnt) + nla_total_size(sizeof(struct inet_diag_meminfo)) diff --git a/net/sctp/sm_make_chunk.c b/net/sctp/sm_make_chunk.c index e4a400f88168..f67df16bd340 100644 --- a/net/sctp/sm_make_chunk.c +++ b/net/sctp/sm_make_chunk.c @@ -2318,7 +2318,6 @@ int sctp_process_init(struct sctp_association *asoc, struct sctp_chunk *chunk, union sctp_addr addr; struct sctp_af *af; int src_match = 0; - char *cookie; /* We must include the address that the INIT packet came from. * This is the only address that matters for an INIT packet. @@ -2422,14 +2421,6 @@ int sctp_process_init(struct sctp_association *asoc, struct sctp_chunk *chunk, /* Peer Rwnd : Current calculated value of the peer's rwnd. */ asoc->peer.rwnd = asoc->peer.i.a_rwnd; - /* Copy cookie in case we need to resend COOKIE-ECHO. */ - cookie = asoc->peer.cookie; - if (cookie) { - asoc->peer.cookie = kmemdup(cookie, asoc->peer.cookie_len, gfp); - if (!asoc->peer.cookie) - goto clean_up; - } - /* RFC 2960 7.2.1 The initial value of ssthresh MAY be arbitrarily * high (for example, implementations MAY use the size of the receiver * advertised window). @@ -2595,7 +2586,11 @@ do_addr_param: case SCTP_PARAM_STATE_COOKIE: asoc->peer.cookie_len = ntohs(param.p->length) - sizeof(struct sctp_paramhdr); - asoc->peer.cookie = param.cookie->body; + if (asoc->peer.cookie) + kfree(asoc->peer.cookie); + asoc->peer.cookie = kmemdup(param.cookie->body, asoc->peer.cookie_len, gfp); + if (!asoc->peer.cookie) + retval = 0; break; case SCTP_PARAM_HEARTBEAT_INFO: @@ -2657,6 +2652,8 @@ do_addr_param: goto fall_through; /* Save peer's random parameter */ + if (asoc->peer.peer_random) + kfree(asoc->peer.peer_random); asoc->peer.peer_random = kmemdup(param.p, ntohs(param.p->length), gfp); if (!asoc->peer.peer_random) { @@ -2670,6 +2667,8 @@ do_addr_param: goto fall_through; /* Save peer's HMAC list */ + if (asoc->peer.peer_hmacs) + kfree(asoc->peer.peer_hmacs); asoc->peer.peer_hmacs = kmemdup(param.p, ntohs(param.p->length), gfp); if (!asoc->peer.peer_hmacs) { @@ -2685,6 +2684,8 @@ do_addr_param: if (!ep->auth_enable) goto fall_through; + if (asoc->peer.peer_chunks) + kfree(asoc->peer.peer_chunks); asoc->peer.peer_chunks = kmemdup(param.p, ntohs(param.p->length), gfp); if (!asoc->peer.peer_chunks) diff --git a/net/sctp/sm_sideeffect.c b/net/sctp/sm_sideeffect.c index e2d9a4b49c9c..482bb0a5d4d3 100644 --- a/net/sctp/sm_sideeffect.c +++ b/net/sctp/sm_sideeffect.c @@ -541,8 +541,8 @@ static void sctp_do_8_2_transport_strike(struct sctp_cmd_seq *commands, */ if (net->sctp.pf_enable && (transport->state == SCTP_ACTIVE) && - (asoc->pf_retrans < transport->pathmaxrxt) && - (transport->error_count > asoc->pf_retrans)) { + (transport->error_count < transport->pathmaxrxt) && + (transport->error_count > transport->pf_retrans)) { sctp_assoc_control_transport(asoc, transport, SCTP_TRANSPORT_PF, @@ -878,6 +878,11 @@ static void sctp_cmd_new_state(struct sctp_cmd_seq *cmds, asoc->rto_initial; } + if (sctp_state(asoc, ESTABLISHED)) { + kfree(asoc->peer.cookie); + asoc->peer.cookie = NULL; + } + if (sctp_state(asoc, ESTABLISHED) || sctp_state(asoc, CLOSED) || sctp_state(asoc, SHUTDOWN_RECEIVED)) { @@ -1092,32 +1097,6 @@ static void sctp_cmd_send_msg(struct sctp_association *asoc, } -/* Sent the next ASCONF packet currently stored in the association. - * This happens after the ASCONF_ACK was succeffully processed. - */ -static void sctp_cmd_send_asconf(struct sctp_association *asoc) -{ - struct net *net = sock_net(asoc->base.sk); - - /* Send the next asconf chunk from the addip chunk - * queue. - */ - if (!list_empty(&asoc->addip_chunk_list)) { - struct list_head *entry = asoc->addip_chunk_list.next; - struct sctp_chunk *asconf = list_entry(entry, - struct sctp_chunk, list); - list_del_init(entry); - - /* Hold the chunk until an ASCONF_ACK is received. */ - sctp_chunk_hold(asconf); - if (sctp_primitive_ASCONF(net, asoc, asconf)) - sctp_chunk_free(asconf); - else - asoc->addip_last_asconf = asconf; - } -} - - /* These three macros allow us to pull the debugging code out of the * main flow of sctp_do_sm() to keep attention focused on the real * functionality there. @@ -1380,8 +1359,10 @@ static int sctp_cmd_interpreter(enum sctp_event event_type, /* Generate an INIT ACK chunk. */ new_obj = sctp_make_init_ack(asoc, chunk, GFP_ATOMIC, 0); - if (!new_obj) - goto nomem; + if (!new_obj) { + error = -ENOMEM; + break; + } sctp_add_cmd_sf(commands, SCTP_CMD_REPLY, SCTP_CHUNK(new_obj)); @@ -1403,7 +1384,8 @@ static int sctp_cmd_interpreter(enum sctp_event event_type, if (!new_obj) { if (cmd->obj.chunk) sctp_chunk_free(cmd->obj.chunk); - goto nomem; + error = -ENOMEM; + break; } sctp_add_cmd_sf(commands, SCTP_CMD_REPLY, SCTP_CHUNK(new_obj)); @@ -1450,8 +1432,10 @@ static int sctp_cmd_interpreter(enum sctp_event event_type, /* Generate a SHUTDOWN chunk. */ new_obj = sctp_make_shutdown(asoc, chunk); - if (!new_obj) - goto nomem; + if (!new_obj) { + error = -ENOMEM; + break; + } sctp_add_cmd_sf(commands, SCTP_CMD_REPLY, SCTP_CHUNK(new_obj)); break; @@ -1763,9 +1747,6 @@ static int sctp_cmd_interpreter(enum sctp_event event_type, } sctp_cmd_send_msg(asoc, cmd->obj.msg, gfp); break; - case SCTP_CMD_SEND_NEXT_ASCONF: - sctp_cmd_send_asconf(asoc); - break; case SCTP_CMD_PURGE_ASCONF_QUEUE: sctp_asconf_queue_teardown(asoc); break; @@ -1784,11 +1765,17 @@ static int sctp_cmd_interpreter(enum sctp_event event_type, break; } - if (error) + if (error) { + cmd = sctp_next_cmd(commands); + while (cmd) { + if (cmd->verb == SCTP_CMD_REPLY) + sctp_chunk_free(cmd->obj.chunk); + cmd = sctp_next_cmd(commands); + } break; + } } -out: /* If this is in response to a received chunk, wait until * we are done with the packet to open the queue so that we don't * send multiple packets in response to a single request. @@ -1803,8 +1790,5 @@ out: sp->data_ready_signalled = 0; return error; -nomem: - error = -ENOMEM; - goto out; } diff --git a/net/sctp/sm_statefuns.c b/net/sctp/sm_statefuns.c index 01b078172306..a2e058127ef7 100644 --- a/net/sctp/sm_statefuns.c +++ b/net/sctp/sm_statefuns.c @@ -3756,6 +3756,29 @@ enum sctp_disposition sctp_sf_do_asconf(struct net *net, return SCTP_DISPOSITION_CONSUME; } +static enum sctp_disposition sctp_send_next_asconf( + struct net *net, + const struct sctp_endpoint *ep, + struct sctp_association *asoc, + const union sctp_subtype type, + struct sctp_cmd_seq *commands) +{ + struct sctp_chunk *asconf; + struct list_head *entry; + + if (list_empty(&asoc->addip_chunk_list)) + return SCTP_DISPOSITION_CONSUME; + + entry = asoc->addip_chunk_list.next; + asconf = list_entry(entry, struct sctp_chunk, list); + + list_del_init(entry); + sctp_chunk_hold(asconf); + asoc->addip_last_asconf = asconf; + + return sctp_sf_do_prm_asconf(net, ep, asoc, type, asconf, commands); +} + /* * ADDIP Section 4.3 General rules for address manipulation * When building TLV parameters for the ASCONF Chunk that will add or @@ -3847,14 +3870,10 @@ enum sctp_disposition sctp_sf_do_asconf_ack(struct net *net, SCTP_TO(SCTP_EVENT_TIMEOUT_T4_RTO)); if (!sctp_process_asconf_ack((struct sctp_association *)asoc, - asconf_ack)) { - /* Successfully processed ASCONF_ACK. We can - * release the next asconf if we have one. - */ - sctp_add_cmd_sf(commands, SCTP_CMD_SEND_NEXT_ASCONF, - SCTP_NULL()); - return SCTP_DISPOSITION_CONSUME; - } + asconf_ack)) + return sctp_send_next_asconf(net, ep, + (struct sctp_association *)asoc, + type, commands); abort = sctp_make_abort(asoc, asconf_ack, sizeof(struct sctp_errhdr)); diff --git a/net/sctp/socket.c b/net/sctp/socket.c index 6a2532370545..09cda66d0567 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -1076,7 +1076,7 @@ out: */ static int __sctp_connect(struct sock *sk, struct sockaddr *kaddrs, - int addrs_size, + int addrs_size, int flags, sctp_assoc_t *assoc_id) { struct net *net = sock_net(sk); @@ -1094,7 +1094,6 @@ static int __sctp_connect(struct sock *sk, union sctp_addr *sa_addr = NULL; void *addr_buf; unsigned short port; - unsigned int f_flags = 0; sp = sctp_sk(sk); ep = sp->ep; @@ -1244,13 +1243,7 @@ static int __sctp_connect(struct sock *sk, sp->pf->to_sk_daddr(sa_addr, sk); sk->sk_err = 0; - /* in-kernel sockets don't generally have a file allocated to them - * if all they do is call sock_create_kern(). - */ - if (sk->sk_socket->file) - f_flags = sk->sk_socket->file->f_flags; - - timeo = sock_sndtimeo(sk, f_flags & O_NONBLOCK); + timeo = sock_sndtimeo(sk, flags & O_NONBLOCK); if (assoc_id) *assoc_id = asoc->assoc_id; @@ -1345,7 +1338,7 @@ static int __sctp_setsockopt_connectx(struct sock *sk, { struct sockaddr *kaddrs; gfp_t gfp = GFP_KERNEL; - int err = 0; + int err = 0, flags = 0; pr_debug("%s: sk:%p addrs:%p addrs_size:%d\n", __func__, sk, addrs, addrs_size); @@ -1365,11 +1358,18 @@ static int __sctp_setsockopt_connectx(struct sock *sk, return -ENOMEM; if (__copy_from_user(kaddrs, addrs, addrs_size)) { - err = -EFAULT; - } else { - err = __sctp_connect(sk, kaddrs, addrs_size, assoc_id); + kfree(kaddrs); + return -EFAULT; } + /* in-kernel sockets don't generally have a file allocated to them + * if all they do is call sock_create_kern(). + */ + if (sk->sk_socket->file) + flags = sk->sk_socket->file->f_flags; + + err = __sctp_connect(sk, kaddrs, addrs_size, flags, assoc_id); + kfree(kaddrs); return err; @@ -4166,31 +4166,36 @@ out_nounlock: * len: the size of the address. */ static int sctp_connect(struct sock *sk, struct sockaddr *addr, - int addr_len) + int addr_len, int flags) { - int err = 0; struct sctp_af *af; + int err = -EINVAL; lock_sock(sk); - pr_debug("%s: sk:%p, sockaddr:%p, addr_len:%d\n", __func__, sk, addr, addr_len); /* Validate addr_len before calling common connect/connectx routine. */ af = sctp_get_af_specific(addr->sa_family); - if (!af || addr_len < af->sockaddr_len) { - err = -EINVAL; - } else { - /* Pass correct addr len to common routine (so it knows there - * is only one address being passed. - */ - err = __sctp_connect(sk, addr, af->sockaddr_len, NULL); - } + if (af && addr_len >= af->sockaddr_len) + err = __sctp_connect(sk, addr, af->sockaddr_len, flags, NULL); release_sock(sk); return err; } +int sctp_inet_connect(struct socket *sock, struct sockaddr *uaddr, + int addr_len, int flags) +{ + if (addr_len < sizeof(uaddr->sa_family)) + return -EINVAL; + + if (uaddr->sa_family == AF_UNSPEC) + return -EOPNOTSUPP; + + return sctp_connect(sock->sk, uaddr, addr_len, flags); +} + /* FIXME: Write comments. */ static int sctp_disconnect(struct sock *sk, int flags) { @@ -7366,7 +7371,7 @@ unsigned int sctp_poll(struct file *file, struct socket *sock, poll_table *wait) mask = 0; /* Is there any exceptional events? */ - if (sk->sk_err || !skb_queue_empty(&sk->sk_error_queue)) + if (sk->sk_err || !skb_queue_empty_lockless(&sk->sk_error_queue)) mask |= POLLERR | (sock_flag(sk, SOCK_SELECT_ERR_QUEUE) ? POLLPRI : 0); if (sk->sk_shutdown & RCV_SHUTDOWN) @@ -7375,7 +7380,7 @@ unsigned int sctp_poll(struct file *file, struct socket *sock, poll_table *wait) mask |= POLLHUP; /* Is it readable? Reconsider this code with TCP-style support. */ - if (!skb_queue_empty(&sk->sk_receive_queue)) + if (!skb_queue_empty_lockless(&sk->sk_receive_queue)) mask |= POLLIN | POLLRDNORM; /* The association is either gone or not ready. */ @@ -7711,7 +7716,7 @@ struct sk_buff *sctp_skb_recv_datagram(struct sock *sk, int flags, if (sk_can_busy_loop(sk)) { sk_busy_loop(sk, noblock); - if (!skb_queue_empty(&sk->sk_receive_queue)) + if (!skb_queue_empty_lockless(&sk->sk_receive_queue)) continue; } @@ -8131,7 +8136,7 @@ void sctp_copy_sock(struct sock *newsk, struct sock *sk, newinet->inet_rcv_saddr = inet->inet_rcv_saddr; newinet->inet_dport = htons(asoc->peer.port); newinet->pmtudisc = inet->pmtudisc; - newinet->inet_id = asoc->next_tsn ^ jiffies; + newinet->inet_id = prandom_u32(); newinet->uc_ttl = inet->uc_ttl; newinet->mc_loop = 1; @@ -8298,7 +8303,6 @@ struct proto sctp_prot = { .name = "SCTP", .owner = THIS_MODULE, .close = sctp_close, - .connect = sctp_connect, .disconnect = sctp_disconnect, .accept = sctp_accept, .ioctl = sctp_ioctl, @@ -8313,7 +8317,7 @@ struct proto sctp_prot = { .backlog_rcv = sctp_backlog_rcv, .hash = sctp_hash, .unhash = sctp_unhash, - .get_port = sctp_get_port, + .no_autobind = true, .obj_size = sizeof(struct sctp_sock), .sysctl_mem = sysctl_sctp_mem, .sysctl_rmem = sysctl_sctp_rmem, @@ -8337,7 +8341,6 @@ struct proto sctpv6_prot = { .name = "SCTPv6", .owner = THIS_MODULE, .close = sctp_close, - .connect = sctp_connect, .disconnect = sctp_disconnect, .accept = sctp_accept, .ioctl = sctp_ioctl, @@ -8352,7 +8355,7 @@ struct proto sctpv6_prot = { .backlog_rcv = sctp_backlog_rcv, .hash = sctp_hash, .unhash = sctp_unhash, - .get_port = sctp_get_port, + .no_autobind = true, .obj_size = sizeof(struct sctp6_sock), .sysctl_mem = sysctl_sctp_mem, .sysctl_rmem = sysctl_sctp_rmem, diff --git a/net/sctp/transport.c b/net/sctp/transport.c index 43105cf04bc4..4c55b759a58e 100644 --- a/net/sctp/transport.c +++ b/net/sctp/transport.c @@ -210,7 +210,8 @@ void sctp_transport_reset_hb_timer(struct sctp_transport *transport) /* When a data chunk is sent, reset the heartbeat interval. */ expires = jiffies + sctp_transport_timeout(transport); - if (time_before(transport->hb_timer.expires, expires) && + if ((time_before(transport->hb_timer.expires, expires) || + !timer_pending(&transport->hb_timer)) && !mod_timer(&transport->hb_timer, expires + prandom_u32_max(transport->rto))) sctp_transport_hold(transport); @@ -271,7 +272,7 @@ bool sctp_transport_update_pmtu(struct sctp_transport *t, u32 pmtu) pf->af->from_sk(&addr, sk); pf->to_sk_daddr(&t->ipaddr, sk); - dst->ops->update_pmtu(dst, sk, NULL, pmtu); + dst->ops->update_pmtu(dst, sk, NULL, pmtu, true); pf->to_sk_daddr(&addr, sk); dst = sctp_transport_dst_check(t); diff --git a/net/smc/smc_core.c b/net/smc/smc_core.c index f04a037dc967..0de788fa43e9 100644 --- a/net/smc/smc_core.c +++ b/net/smc/smc_core.c @@ -103,6 +103,8 @@ static void smc_lgr_unregister_conn(struct smc_connection *conn) struct smc_link_group *lgr = conn->lgr; int reduced = 0; + if (!lgr) + return; write_lock_bh(&lgr->conns_lock); if (conn->alert_token_local) { reduced = 1; @@ -431,6 +433,8 @@ int smc_conn_create(struct smc_sock *smc, __be32 peer_in_addr, local_contact = SMC_REUSE_CONTACT; conn->lgr = lgr; smc_lgr_register_conn(conn); /* add smc conn to lgr */ + if (delayed_work_pending(&lgr->free_work)) + cancel_delayed_work(&lgr->free_work); write_unlock_bh(&lgr->conns_lock); break; } diff --git a/net/smc/smc_tx.c b/net/smc/smc_tx.c index 710ab3fbf607..ec9397ec2def 100644 --- a/net/smc/smc_tx.c +++ b/net/smc/smc_tx.c @@ -70,13 +70,11 @@ static int smc_tx_wait_memory(struct smc_sock *smc, int flags) DEFINE_WAIT_FUNC(wait, woken_wake_function); struct smc_connection *conn = &smc->conn; struct sock *sk = &smc->sk; - bool noblock; long timeo; int rc = 0; /* similar to sk_stream_wait_memory */ timeo = sock_sndtimeo(sk, flags & MSG_DONTWAIT); - noblock = timeo ? false : true; add_wait_queue(sk_sleep(sk), &wait); while (1) { sk_set_bit(SOCKWQ_ASYNC_NOSPACE, sk); @@ -91,8 +89,8 @@ static int smc_tx_wait_memory(struct smc_sock *smc, int flags) break; } if (!timeo) { - if (noblock) - set_bit(SOCK_NOSPACE, &sk->sk_socket->flags); + /* ensure EPOLLOUT is subsequently generated */ + set_bit(SOCK_NOSPACE, &sk->sk_socket->flags); rc = -EAGAIN; break; } diff --git a/net/smc/smc_wr.c b/net/smc/smc_wr.c index de4537f66832..ed6736a1a112 100644 --- a/net/smc/smc_wr.c +++ b/net/smc/smc_wr.c @@ -223,12 +223,14 @@ int smc_wr_tx_put_slot(struct smc_link *link, pend = container_of(wr_pend_priv, struct smc_wr_tx_pend, priv); if (pend->idx < link->wr_tx_cnt) { + u32 idx = pend->idx; + /* clear the full struct smc_wr_tx_pend including .priv */ memset(&link->wr_tx_pends[pend->idx], 0, sizeof(link->wr_tx_pends[pend->idx])); memset(&link->wr_tx_bufs[pend->idx], 0, sizeof(link->wr_tx_bufs[pend->idx])); - test_and_clear_bit(pend->idx, link->wr_tx_mask); + test_and_clear_bit(idx, link->wr_tx_mask); return 1; } diff --git a/net/socket.c b/net/socket.c index a401578f3f28..5b134a6b6216 100644 --- a/net/socket.c +++ b/net/socket.c @@ -600,6 +600,7 @@ static void __sock_release(struct socket *sock, struct inode *inode) if (inode) inode_lock(inode); sock->ops->release(sock); + sock->sk = NULL; if (inode) inode_unlock(inode); sock->ops = NULL; @@ -890,7 +891,7 @@ static ssize_t sock_read_iter(struct kiocb *iocb, struct iov_iter *to) .msg_iocb = iocb}; ssize_t res; - if (file->f_flags & O_NONBLOCK) + if (file->f_flags & O_NONBLOCK || (iocb->ki_flags & IOCB_NOWAIT)) msg.msg_flags = MSG_DONTWAIT; if (iocb->ki_pos != 0) @@ -915,7 +916,7 @@ static ssize_t sock_write_iter(struct kiocb *iocb, struct iov_iter *from) if (iocb->ki_pos != 0) return -ESPIPE; - if (file->f_flags & O_NONBLOCK) + if (file->f_flags & O_NONBLOCK || (iocb->ki_flags & IOCB_NOWAIT)) msg.msg_flags = MSG_DONTWAIT; if (sock->type == SOCK_SEQPACKET) @@ -2655,15 +2656,6 @@ out_fs: core_initcall(sock_init); /* early initcall */ -static int __init jit_init(void) -{ -#ifdef CONFIG_BPF_JIT_ALWAYS_ON - bpf_jit_enable = 1; -#endif - return 0; -} -pure_initcall(jit_init); - #ifdef CONFIG_PROC_FS void socket_seq_show(struct seq_file *seq) { diff --git a/net/sunrpc/auth_gss/gss_krb5_seal.c b/net/sunrpc/auth_gss/gss_krb5_seal.c index 1d74d653e6c0..ad0dcb69395d 100644 --- a/net/sunrpc/auth_gss/gss_krb5_seal.c +++ b/net/sunrpc/auth_gss/gss_krb5_seal.c @@ -63,6 +63,7 @@ #include <linux/sunrpc/gss_krb5.h> #include <linux/random.h> #include <linux/crypto.h> +#include <linux/atomic.h> #if IS_ENABLED(CONFIG_SUNRPC_DEBUG) # define RPCDBG_FACILITY RPCDBG_AUTH diff --git a/net/sunrpc/auth_gss/gss_krb5_seqnum.c b/net/sunrpc/auth_gss/gss_krb5_seqnum.c index c8b9082f4a9d..2d2ed6772fe4 100644 --- a/net/sunrpc/auth_gss/gss_krb5_seqnum.c +++ b/net/sunrpc/auth_gss/gss_krb5_seqnum.c @@ -44,7 +44,7 @@ krb5_make_rc4_seq_num(struct krb5_ctx *kctx, int direction, s32 seqnum, unsigned char *cksum, unsigned char *buf) { struct crypto_skcipher *cipher; - unsigned char plain[8]; + unsigned char *plain; s32 code; dprintk("RPC: %s:\n", __func__); @@ -53,6 +53,10 @@ krb5_make_rc4_seq_num(struct krb5_ctx *kctx, int direction, s32 seqnum, if (IS_ERR(cipher)) return PTR_ERR(cipher); + plain = kmalloc(8, GFP_NOFS); + if (!plain) + return -ENOMEM; + plain[0] = (unsigned char) ((seqnum >> 24) & 0xff); plain[1] = (unsigned char) ((seqnum >> 16) & 0xff); plain[2] = (unsigned char) ((seqnum >> 8) & 0xff); @@ -69,6 +73,7 @@ krb5_make_rc4_seq_num(struct krb5_ctx *kctx, int direction, s32 seqnum, code = krb5_encrypt(cipher, cksum, plain, buf, 8); out: crypto_free_skcipher(cipher); + kfree(plain); return code; } s32 @@ -78,12 +83,17 @@ krb5_make_seq_num(struct krb5_ctx *kctx, u32 seqnum, unsigned char *cksum, unsigned char *buf) { - unsigned char plain[8]; + unsigned char *plain; + s32 code; if (kctx->enctype == ENCTYPE_ARCFOUR_HMAC) return krb5_make_rc4_seq_num(kctx, direction, seqnum, cksum, buf); + plain = kmalloc(8, GFP_NOFS); + if (!plain) + return -ENOMEM; + plain[0] = (unsigned char) (seqnum & 0xff); plain[1] = (unsigned char) ((seqnum >> 8) & 0xff); plain[2] = (unsigned char) ((seqnum >> 16) & 0xff); @@ -94,7 +104,9 @@ krb5_make_seq_num(struct krb5_ctx *kctx, plain[6] = direction; plain[7] = direction; - return krb5_encrypt(key, cksum, plain, buf, 8); + code = krb5_encrypt(key, cksum, plain, buf, 8); + kfree(plain); + return code; } static s32 @@ -102,7 +114,7 @@ krb5_get_rc4_seq_num(struct krb5_ctx *kctx, unsigned char *cksum, unsigned char *buf, int *direction, s32 *seqnum) { struct crypto_skcipher *cipher; - unsigned char plain[8]; + unsigned char *plain; s32 code; dprintk("RPC: %s:\n", __func__); @@ -115,20 +127,28 @@ krb5_get_rc4_seq_num(struct krb5_ctx *kctx, unsigned char *cksum, if (code) goto out; + plain = kmalloc(8, GFP_NOFS); + if (!plain) { + code = -ENOMEM; + goto out; + } + code = krb5_decrypt(cipher, cksum, buf, plain, 8); if (code) - goto out; + goto out_plain; if ((plain[4] != plain[5]) || (plain[4] != plain[6]) || (plain[4] != plain[7])) { code = (s32)KG_BAD_SEQ; - goto out; + goto out_plain; } *direction = plain[4]; *seqnum = ((plain[0] << 24) | (plain[1] << 16) | (plain[2] << 8) | (plain[3])); +out_plain: + kfree(plain); out: crypto_free_skcipher(cipher); return code; @@ -141,26 +161,33 @@ krb5_get_seq_num(struct krb5_ctx *kctx, int *direction, u32 *seqnum) { s32 code; - unsigned char plain[8]; struct crypto_skcipher *key = kctx->seq; + unsigned char *plain; dprintk("RPC: krb5_get_seq_num:\n"); if (kctx->enctype == ENCTYPE_ARCFOUR_HMAC) return krb5_get_rc4_seq_num(kctx, cksum, buf, direction, seqnum); + plain = kmalloc(8, GFP_NOFS); + if (!plain) + return -ENOMEM; if ((code = krb5_decrypt(key, cksum, buf, plain, 8))) - return code; + goto out; if ((plain[4] != plain[5]) || (plain[4] != plain[6]) || - (plain[4] != plain[7])) - return (s32)KG_BAD_SEQ; + (plain[4] != plain[7])) { + code = (s32)KG_BAD_SEQ; + goto out; + } *direction = plain[4]; *seqnum = ((plain[0]) | (plain[1] << 8) | (plain[2] << 16) | (plain[3] << 24)); - return 0; +out: + kfree(plain); + return code; } diff --git a/net/sunrpc/cache.c b/net/sunrpc/cache.c index f2cf4edf219b..556989b0b5fc 100644 --- a/net/sunrpc/cache.c +++ b/net/sunrpc/cache.c @@ -54,8 +54,6 @@ static void cache_init(struct cache_head *h, struct cache_detail *detail) h->last_refresh = now; } -static void cache_fresh_locked(struct cache_head *head, time_t expiry, - struct cache_detail *detail); static void cache_fresh_unlocked(struct cache_head *head, struct cache_detail *detail); @@ -100,7 +98,6 @@ struct cache_head *sunrpc_cache_lookup(struct cache_detail *detail, if (cache_is_expired(detail, tmp)) { hlist_del_init(&tmp->cache_list); detail->entries --; - cache_fresh_locked(tmp, 0, detail); freeme = tmp; break; } diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index 6d118357d9dc..9259529e0412 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c @@ -2706,6 +2706,7 @@ int rpc_clnt_add_xprt(struct rpc_clnt *clnt, xprt = xprt_iter_xprt(&clnt->cl_xpi); if (xps == NULL || xprt == NULL) { rcu_read_unlock(); + xprt_switch_put(xps); return -EAGAIN; } resvport = xprt->resvport; diff --git a/net/sunrpc/sched.c b/net/sunrpc/sched.c index f9db5fe52d36..aff76fb43430 100644 --- a/net/sunrpc/sched.c +++ b/net/sunrpc/sched.c @@ -99,65 +99,79 @@ __rpc_add_timer(struct rpc_wait_queue *queue, struct rpc_task *task) list_add(&task->u.tk_wait.timer_list, &queue->timer_list.list); } -static void rpc_rotate_queue_owner(struct rpc_wait_queue *queue) -{ - struct list_head *q = &queue->tasks[queue->priority]; - struct rpc_task *task; - - if (!list_empty(q)) { - task = list_first_entry(q, struct rpc_task, u.tk_wait.list); - if (task->tk_owner == queue->owner) - list_move_tail(&task->u.tk_wait.list, q); - } -} - static void rpc_set_waitqueue_priority(struct rpc_wait_queue *queue, int priority) { if (queue->priority != priority) { - /* Fairness: rotate the list when changing priority */ - rpc_rotate_queue_owner(queue); queue->priority = priority; + queue->nr = 1U << priority; } } -static void rpc_set_waitqueue_owner(struct rpc_wait_queue *queue, pid_t pid) -{ - queue->owner = pid; - queue->nr = RPC_BATCH_COUNT; -} - static void rpc_reset_waitqueue_priority(struct rpc_wait_queue *queue) { rpc_set_waitqueue_priority(queue, queue->maxpriority); - rpc_set_waitqueue_owner(queue, 0); } /* - * Add new request to a priority queue. + * Add a request to a queue list */ -static void __rpc_add_wait_queue_priority(struct rpc_wait_queue *queue, - struct rpc_task *task, - unsigned char queue_priority) +static void +__rpc_list_enqueue_task(struct list_head *q, struct rpc_task *task) { - struct list_head *q; struct rpc_task *t; - INIT_LIST_HEAD(&task->u.tk_wait.links); - if (unlikely(queue_priority > queue->maxpriority)) - queue_priority = queue->maxpriority; - if (queue_priority > queue->priority) - rpc_set_waitqueue_priority(queue, queue_priority); - q = &queue->tasks[queue_priority]; list_for_each_entry(t, q, u.tk_wait.list) { if (t->tk_owner == task->tk_owner) { - list_add_tail(&task->u.tk_wait.list, &t->u.tk_wait.links); + list_add_tail(&task->u.tk_wait.links, + &t->u.tk_wait.links); + /* Cache the queue head in task->u.tk_wait.list */ + task->u.tk_wait.list.next = q; + task->u.tk_wait.list.prev = NULL; return; } } + INIT_LIST_HEAD(&task->u.tk_wait.links); list_add_tail(&task->u.tk_wait.list, q); } /* + * Remove request from a queue list + */ +static void +__rpc_list_dequeue_task(struct rpc_task *task) +{ + struct list_head *q; + struct rpc_task *t; + + if (task->u.tk_wait.list.prev == NULL) { + list_del(&task->u.tk_wait.links); + return; + } + if (!list_empty(&task->u.tk_wait.links)) { + t = list_first_entry(&task->u.tk_wait.links, + struct rpc_task, + u.tk_wait.links); + /* Assume __rpc_list_enqueue_task() cached the queue head */ + q = t->u.tk_wait.list.next; + list_add_tail(&t->u.tk_wait.list, q); + list_del(&task->u.tk_wait.links); + } + list_del(&task->u.tk_wait.list); +} + +/* + * Add new request to a priority queue. + */ +static void __rpc_add_wait_queue_priority(struct rpc_wait_queue *queue, + struct rpc_task *task, + unsigned char queue_priority) +{ + if (unlikely(queue_priority > queue->maxpriority)) + queue_priority = queue->maxpriority; + __rpc_list_enqueue_task(&queue->tasks[queue_priority], task); +} + +/* * Add new request to wait queue. * * Swapper tasks always get inserted at the head of the queue. @@ -194,13 +208,7 @@ static void __rpc_add_wait_queue(struct rpc_wait_queue *queue, */ static void __rpc_remove_wait_queue_priority(struct rpc_task *task) { - struct rpc_task *t; - - if (!list_empty(&task->u.tk_wait.links)) { - t = list_entry(task->u.tk_wait.links.next, struct rpc_task, u.tk_wait.list); - list_move(&t->u.tk_wait.list, &task->u.tk_wait.list); - list_splice_init(&task->u.tk_wait.links, &t->u.tk_wait.links); - } + __rpc_list_dequeue_task(task); } /* @@ -212,7 +220,8 @@ static void __rpc_remove_wait_queue(struct rpc_wait_queue *queue, struct rpc_tas __rpc_disable_timer(queue, task); if (RPC_IS_PRIORITY(queue)) __rpc_remove_wait_queue_priority(task); - list_del(&task->u.tk_wait.list); + else + list_del(&task->u.tk_wait.list); queue->qlen--; dprintk("RPC: %5u removed from queue %p \"%s\"\n", task->tk_pid, queue, rpc_qname(queue)); @@ -481,17 +490,9 @@ static struct rpc_task *__rpc_find_next_queued_priority(struct rpc_wait_queue *q * Service a batch of tasks from a single owner. */ q = &queue->tasks[queue->priority]; - if (!list_empty(q)) { - task = list_entry(q->next, struct rpc_task, u.tk_wait.list); - if (queue->owner == task->tk_owner) { - if (--queue->nr) - goto out; - list_move_tail(&task->u.tk_wait.list, q); - } - /* - * Check if we need to switch queues. - */ - goto new_owner; + if (!list_empty(q) && --queue->nr) { + task = list_first_entry(q, struct rpc_task, u.tk_wait.list); + goto out; } /* @@ -503,7 +504,7 @@ static struct rpc_task *__rpc_find_next_queued_priority(struct rpc_wait_queue *q else q = q - 1; if (!list_empty(q)) { - task = list_entry(q->next, struct rpc_task, u.tk_wait.list); + task = list_first_entry(q, struct rpc_task, u.tk_wait.list); goto new_queue; } } while (q != &queue->tasks[queue->priority]); @@ -513,8 +514,6 @@ static struct rpc_task *__rpc_find_next_queued_priority(struct rpc_wait_queue *q new_queue: rpc_set_waitqueue_priority(queue, (unsigned int)(q - &queue->tasks[0])); -new_owner: - rpc_set_waitqueue_owner(queue, task->tk_owner); out: return task; } diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c index d0282cc88b14..b852c34bb637 100644 --- a/net/sunrpc/xprt.c +++ b/net/sunrpc/xprt.c @@ -795,17 +795,11 @@ void xprt_connect(struct rpc_task *task) static void xprt_connect_status(struct rpc_task *task) { - struct rpc_xprt *xprt = task->tk_rqstp->rq_xprt; - - if (task->tk_status == 0) { - xprt->stat.connect_count++; - xprt->stat.connect_time += (long)jiffies - xprt->stat.connect_start; + switch (task->tk_status) { + case 0: dprintk("RPC: %5u xprt_connect_status: connection established\n", task->tk_pid); - return; - } - - switch (task->tk_status) { + break; case -ECONNREFUSED: case -ECONNRESET: case -ECONNABORTED: @@ -822,7 +816,7 @@ static void xprt_connect_status(struct rpc_task *task) default: dprintk("RPC: %5u xprt_connect_status: error %d connecting to " "server %s\n", task->tk_pid, -task->tk_status, - xprt->servername); + task->tk_rqstp->rq_xprt->servername); task->tk_status = -EIO; } } diff --git a/net/sunrpc/xprtrdma/svc_rdma_transport.c b/net/sunrpc/xprtrdma/svc_rdma_transport.c index 5caf8e722a11..51e6cf2dc277 100644 --- a/net/sunrpc/xprtrdma/svc_rdma_transport.c +++ b/net/sunrpc/xprtrdma/svc_rdma_transport.c @@ -524,9 +524,14 @@ static void handle_connect_req(struct rdma_cm_id *new_cma_id, /* Save client advertised inbound read limit for use later in accept. */ newxprt->sc_ord = param->initiator_depth; - /* Set the local and remote addresses in the transport */ sa = (struct sockaddr *)&newxprt->sc_cm_id->route.addr.dst_addr; svc_xprt_set_remote(&newxprt->sc_xprt, sa, svc_addr_len(sa)); + /* The remote port is arbitrary and not under the control of the + * client ULP. Set it to a fixed value so that the DRC continues + * to be effective after a reconnect. + */ + rpc_set_port((struct sockaddr *)&newxprt->sc_xprt.xpt_remote, 0); + sa = (struct sockaddr *)&newxprt->sc_cm_id->route.addr.src_addr; svc_xprt_set_local(&newxprt->sc_xprt, sa, svc_addr_len(sa)); diff --git a/net/sunrpc/xprtrdma/transport.c b/net/sunrpc/xprtrdma/transport.c index 8cf5ccfe180d..b1b40a1be8c5 100644 --- a/net/sunrpc/xprtrdma/transport.c +++ b/net/sunrpc/xprtrdma/transport.c @@ -238,8 +238,12 @@ rpcrdma_connect_worker(struct work_struct *work) if (++xprt->connect_cookie == 0) /* maintain a reserved value */ ++xprt->connect_cookie; if (ep->rep_connected > 0) { - if (!xprt_test_and_set_connected(xprt)) + if (!xprt_test_and_set_connected(xprt)) { + xprt->stat.connect_count++; + xprt->stat.connect_time += (long)jiffies - + xprt->stat.connect_start; xprt_wake_pending_tasks(xprt, 0); + } } else { if (xprt_test_and_clear_connected(xprt)) xprt_wake_pending_tasks(xprt, -ENOTCONN); diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c index 05a58cc1b0cd..f75b5b7c1fc2 100644 --- a/net/sunrpc/xprtsock.c +++ b/net/sunrpc/xprtsock.c @@ -127,7 +127,7 @@ static struct ctl_table xs_tunables_table[] = { .mode = 0644, .proc_handler = proc_dointvec_minmax, .extra1 = &xprt_min_resvport_limit, - .extra2 = &xprt_max_resvport + .extra2 = &xprt_max_resvport_limit }, { .procname = "max_resvport", @@ -135,7 +135,7 @@ static struct ctl_table xs_tunables_table[] = { .maxlen = sizeof(unsigned int), .mode = 0644, .proc_handler = proc_dointvec_minmax, - .extra1 = &xprt_min_resvport, + .extra1 = &xprt_min_resvport_limit, .extra2 = &xprt_max_resvport_limit }, { @@ -1592,6 +1592,9 @@ static void xs_tcp_state_change(struct sock *sk) clear_bit(XPRT_SOCK_CONNECTING, &transport->sock_state); xprt_clear_connecting(xprt); + xprt->stat.connect_count++; + xprt->stat.connect_time += (long)jiffies - + xprt->stat.connect_start; xprt_wake_pending_tasks(xprt, -EAGAIN); } spin_unlock(&xprt->transport_lock); @@ -1751,11 +1754,17 @@ static void xs_udp_timer(struct rpc_xprt *xprt, struct rpc_task *task) spin_unlock_bh(&xprt->transport_lock); } -static unsigned short xs_get_random_port(void) +static int xs_get_random_port(void) { - unsigned short range = xprt_max_resvport - xprt_min_resvport + 1; - unsigned short rand = (unsigned short) prandom_u32() % range; - return rand + xprt_min_resvport; + unsigned short min = xprt_min_resvport, max = xprt_max_resvport; + unsigned short range; + unsigned short rand; + + if (max < min) + return -EADDRINUSE; + range = max - min + 1; + rand = (unsigned short) prandom_u32() % range; + return rand + min; } /** @@ -1812,9 +1821,9 @@ static void xs_set_srcport(struct sock_xprt *transport, struct socket *sock) transport->srcport = xs_sock_getport(sock); } -static unsigned short xs_get_srcport(struct sock_xprt *transport) +static int xs_get_srcport(struct sock_xprt *transport) { - unsigned short port = transport->srcport; + int port = transport->srcport; if (port == 0 && transport->xprt.resvport) port = xs_get_random_port(); @@ -1835,7 +1844,7 @@ static int xs_bind(struct sock_xprt *transport, struct socket *sock) { struct sockaddr_storage myaddr; int err, nloop = 0; - unsigned short port = xs_get_srcport(transport); + int port = xs_get_srcport(transport); unsigned short last; /* @@ -1853,8 +1862,8 @@ static int xs_bind(struct sock_xprt *transport, struct socket *sock) * transport->xprt.resvport == 1) xs_get_srcport above will * ensure that port is non-zero and we will bind as needed. */ - if (port == 0) - return 0; + if (port <= 0) + return port; memcpy(&myaddr, &transport->srcaddr, transport->xprt.addrlen); do { @@ -2008,8 +2017,6 @@ static int xs_local_finish_connecting(struct rpc_xprt *xprt, } /* Tell the socket layer to start connecting... */ - xprt->stat.connect_count++; - xprt->stat.connect_start = jiffies; return kernel_connect(sock, xs_addr(xprt), xprt->addrlen, 0); } @@ -2041,6 +2048,9 @@ static int xs_local_setup_socket(struct sock_xprt *transport) case 0: dprintk("RPC: xprt %p connected to %s\n", xprt, xprt->address_strings[RPC_DISPLAY_ADDR]); + xprt->stat.connect_count++; + xprt->stat.connect_time += (long)jiffies - + xprt->stat.connect_start; xprt_set_connected(xprt); case -ENOBUFS: break; @@ -2361,8 +2371,6 @@ static int xs_tcp_finish_connecting(struct rpc_xprt *xprt, struct socket *sock) xs_set_memalloc(xprt); /* Tell the socket layer to start connecting... */ - xprt->stat.connect_count++; - xprt->stat.connect_start = jiffies; set_bit(XPRT_SOCK_CONNECTING, &transport->sock_state); ret = kernel_connect(sock, xs_addr(xprt), xprt->addrlen, O_NONBLOCK); switch (ret) { @@ -3284,12 +3292,8 @@ static int param_set_uint_minmax(const char *val, static int param_set_portnr(const char *val, const struct kernel_param *kp) { - if (kp->arg == &xprt_min_resvport) - return param_set_uint_minmax(val, kp, - RPC_MIN_RESVPORT, - xprt_max_resvport); return param_set_uint_minmax(val, kp, - xprt_min_resvport, + RPC_MIN_RESVPORT, RPC_MAX_RESVPORT); } diff --git a/net/tipc/core.c b/net/tipc/core.c index 0b982d048fb9..35f162ece2b7 100644 --- a/net/tipc/core.c +++ b/net/tipc/core.c @@ -71,9 +71,6 @@ static int __net_init tipc_init_net(struct net *net) goto out_nametbl; INIT_LIST_HEAD(&tn->dist_queue); - err = tipc_topsrv_start(net); - if (err) - goto out_subscr; err = tipc_bcast_init(net); if (err) @@ -82,8 +79,6 @@ static int __net_init tipc_init_net(struct net *net) return 0; out_bclink: - tipc_bcast_stop(net); -out_subscr: tipc_nametbl_stop(net); out_nametbl: tipc_sk_rht_destroy(net); @@ -93,7 +88,6 @@ out_sk_rht: static void __net_exit tipc_exit_net(struct net *net) { - tipc_topsrv_stop(net); tipc_net_stop(net); tipc_bcast_stop(net); tipc_nametbl_stop(net); @@ -107,6 +101,11 @@ static struct pernet_operations tipc_net_ops = { .size = sizeof(struct tipc_net), }; +static struct pernet_operations tipc_topsrv_net_ops = { + .init = tipc_topsrv_init_net, + .exit = tipc_topsrv_exit_net, +}; + static int __init tipc_init(void) { int err; @@ -117,54 +116,62 @@ static int __init tipc_init(void) sysctl_tipc_rmem[1] = RCVBUF_DEF; sysctl_tipc_rmem[2] = RCVBUF_MAX; - err = tipc_netlink_start(); + err = tipc_register_sysctl(); if (err) - goto out_netlink; + goto out_sysctl; - err = tipc_netlink_compat_start(); + err = register_pernet_device(&tipc_net_ops); if (err) - goto out_netlink_compat; + goto out_pernet; err = tipc_socket_init(); if (err) goto out_socket; - err = tipc_register_sysctl(); + err = register_pernet_device(&tipc_topsrv_net_ops); if (err) - goto out_sysctl; - - err = register_pernet_subsys(&tipc_net_ops); - if (err) - goto out_pernet; + goto out_pernet_topsrv; err = tipc_bearer_setup(); if (err) goto out_bearer; + err = tipc_netlink_start(); + if (err) + goto out_netlink; + + err = tipc_netlink_compat_start(); + if (err) + goto out_netlink_compat; + pr_info("Started in single node mode\n"); return 0; + +out_netlink_compat: + tipc_netlink_stop(); +out_netlink: + tipc_bearer_cleanup(); out_bearer: - unregister_pernet_subsys(&tipc_net_ops); + unregister_pernet_device(&tipc_topsrv_net_ops); +out_pernet_topsrv: + tipc_socket_stop(); +out_socket: + unregister_pernet_device(&tipc_net_ops); out_pernet: tipc_unregister_sysctl(); out_sysctl: - tipc_socket_stop(); -out_socket: - tipc_netlink_compat_stop(); -out_netlink_compat: - tipc_netlink_stop(); -out_netlink: pr_err("Unable to start in single node mode\n"); return err; } static void __exit tipc_exit(void) { - tipc_bearer_cleanup(); - unregister_pernet_subsys(&tipc_net_ops); - tipc_netlink_stop(); tipc_netlink_compat_stop(); + tipc_netlink_stop(); + tipc_bearer_cleanup(); + unregister_pernet_device(&tipc_topsrv_net_ops); tipc_socket_stop(); + unregister_pernet_device(&tipc_net_ops); tipc_unregister_sysctl(); pr_info("Deactivated\n"); diff --git a/net/tipc/link.c b/net/tipc/link.c index ac0144f532aa..da749916faac 100644 --- a/net/tipc/link.c +++ b/net/tipc/link.c @@ -157,6 +157,7 @@ struct tipc_link { struct { u16 len; u16 limit; + struct sk_buff *target_bskb; } backlog[5]; u16 snd_nxt; u16 last_retransm; @@ -826,6 +827,8 @@ void link_prepare_wakeup(struct tipc_link *l) void tipc_link_reset(struct tipc_link *l) { + u32 imp; + l->peer_session = ANY_SESSION; l->session++; l->mtu = l->advertised_mtu; @@ -833,11 +836,10 @@ void tipc_link_reset(struct tipc_link *l) __skb_queue_purge(&l->deferdq); skb_queue_splice_init(&l->wakeupq, l->inputq); __skb_queue_purge(&l->backlogq); - l->backlog[TIPC_LOW_IMPORTANCE].len = 0; - l->backlog[TIPC_MEDIUM_IMPORTANCE].len = 0; - l->backlog[TIPC_HIGH_IMPORTANCE].len = 0; - l->backlog[TIPC_CRITICAL_IMPORTANCE].len = 0; - l->backlog[TIPC_SYSTEM_IMPORTANCE].len = 0; + for (imp = 0; imp <= TIPC_SYSTEM_IMPORTANCE; imp++) { + l->backlog[imp].len = 0; + l->backlog[imp].target_bskb = NULL; + } kfree_skb(l->reasm_buf); kfree_skb(l->failover_reasm_skb); l->reasm_buf = NULL; @@ -876,7 +878,7 @@ int tipc_link_xmit(struct tipc_link *l, struct sk_buff_head *list, u16 bc_ack = l->bc_rcvlink->rcv_nxt - 1; struct sk_buff_head *transmq = &l->transmq; struct sk_buff_head *backlogq = &l->backlogq; - struct sk_buff *skb, *_skb, *bskb; + struct sk_buff *skb, *_skb, **tskb; int pkt_cnt = skb_queue_len(list); int rc = 0; @@ -922,19 +924,21 @@ int tipc_link_xmit(struct tipc_link *l, struct sk_buff_head *list, seqno++; continue; } - if (tipc_msg_bundle(skb_peek_tail(backlogq), hdr, mtu)) { + tskb = &l->backlog[imp].target_bskb; + if (tipc_msg_bundle(*tskb, hdr, mtu)) { kfree_skb(__skb_dequeue(list)); l->stats.sent_bundled++; continue; } - if (tipc_msg_make_bundle(&bskb, hdr, mtu, l->addr)) { + if (tipc_msg_make_bundle(tskb, hdr, mtu, l->addr)) { kfree_skb(__skb_dequeue(list)); - __skb_queue_tail(backlogq, bskb); - l->backlog[msg_importance(buf_msg(bskb))].len++; + __skb_queue_tail(backlogq, *tskb); + l->backlog[imp].len++; l->stats.sent_bundled++; l->stats.sent_bundles++; continue; } + l->backlog[imp].target_bskb = NULL; l->backlog[imp].len += skb_queue_len(list); skb_queue_splice_tail_init(list, backlogq); } @@ -949,6 +953,7 @@ void tipc_link_advance_backlog(struct tipc_link *l, struct sk_buff_head *xmitq) u16 seqno = l->snd_nxt; u16 ack = l->rcv_nxt - 1; u16 bc_ack = l->bc_rcvlink->rcv_nxt - 1; + u32 imp; while (skb_queue_len(&l->transmq) < l->window) { skb = skb_peek(&l->backlogq); @@ -959,7 +964,10 @@ void tipc_link_advance_backlog(struct tipc_link *l, struct sk_buff_head *xmitq) break; __skb_dequeue(&l->backlogq); hdr = buf_msg(skb); - l->backlog[msg_importance(hdr)].len--; + imp = msg_importance(hdr); + l->backlog[imp].len--; + if (unlikely(skb == l->backlog[imp].target_bskb)) + l->backlog[imp].target_bskb = NULL; __skb_queue_tail(&l->transmq, skb); __skb_queue_tail(xmitq, _skb); TIPC_SKB_CB(skb)->ackers = l->ackers; @@ -1065,7 +1073,7 @@ static bool tipc_data_input(struct tipc_link *l, struct sk_buff *skb, default: pr_warn("Dropping received illegal msg type\n"); kfree_skb(skb); - return false; + return true; }; } diff --git a/net/tipc/msg.c b/net/tipc/msg.c index 17146c16ee2d..e38396025874 100644 --- a/net/tipc/msg.c +++ b/net/tipc/msg.c @@ -456,10 +456,7 @@ bool tipc_msg_make_bundle(struct sk_buff **skb, struct tipc_msg *msg, bmsg = buf_msg(_skb); tipc_msg_init(msg_prevnode(msg), bmsg, MSG_BUNDLER, 0, INT_H_SIZE, dnode); - if (msg_isdata(msg)) - msg_set_importance(bmsg, TIPC_CRITICAL_IMPORTANCE); - else - msg_set_importance(bmsg, TIPC_SYSTEM_IMPORTANCE); + msg_set_importance(bmsg, msg_importance(msg)); msg_set_seqno(bmsg, msg_seqno(msg)); msg_set_ack(bmsg, msg_ack(msg)); msg_set_bcast_ack(bmsg, msg_bcast_ack(msg)); diff --git a/net/tipc/name_distr.c b/net/tipc/name_distr.c index 23f8899e0f8c..7ebcaff8c1c4 100644 --- a/net/tipc/name_distr.c +++ b/net/tipc/name_distr.c @@ -224,7 +224,8 @@ static void tipc_publ_purge(struct net *net, struct publication *publ, u32 addr) publ->key); } - kfree_rcu(p, rcu); + if (p) + kfree_rcu(p, rcu); } /** diff --git a/net/tipc/netlink_compat.c b/net/tipc/netlink_compat.c index 73895daf8943..fa0522cd683e 100644 --- a/net/tipc/netlink_compat.c +++ b/net/tipc/netlink_compat.c @@ -55,6 +55,7 @@ struct tipc_nl_compat_msg { int rep_type; int rep_size; int req_type; + int req_size; struct net *net; struct sk_buff *rep; struct tlv_desc *req; @@ -252,7 +253,8 @@ static int tipc_nl_compat_dumpit(struct tipc_nl_compat_cmd_dump *cmd, int err; struct sk_buff *arg; - if (msg->req_type && !TLV_CHECK_TYPE(msg->req, msg->req_type)) + if (msg->req_type && (!msg->req_size || + !TLV_CHECK_TYPE(msg->req, msg->req_type))) return -EINVAL; msg->rep = tipc_tlv_alloc(msg->rep_size); @@ -262,8 +264,14 @@ static int tipc_nl_compat_dumpit(struct tipc_nl_compat_cmd_dump *cmd, if (msg->rep_type) tipc_tlv_init(msg->rep, msg->rep_type); - if (cmd->header) - (*cmd->header)(msg); + if (cmd->header) { + err = (*cmd->header)(msg); + if (err) { + kfree_skb(msg->rep); + msg->rep = NULL; + return err; + } + } arg = nlmsg_new(0, GFP_KERNEL); if (!arg) { @@ -339,7 +347,8 @@ static int tipc_nl_compat_doit(struct tipc_nl_compat_cmd_doit *cmd, { int err; - if (msg->req_type && !TLV_CHECK_TYPE(msg->req, msg->req_type)) + if (msg->req_type && (!msg->req_size || + !TLV_CHECK_TYPE(msg->req, msg->req_type))) return -EINVAL; err = __tipc_nl_compat_doit(cmd, msg); @@ -388,7 +397,12 @@ static int tipc_nl_compat_bearer_enable(struct tipc_nl_compat_cmd_doit *cmd, if (!bearer) return -EMSGSIZE; - len = min_t(int, TLV_GET_DATA_LEN(msg->req), TIPC_MAX_BEARER_NAME); + len = TLV_GET_DATA_LEN(msg->req); + len -= offsetof(struct tipc_bearer_config, name); + if (len <= 0) + return -EINVAL; + + len = min_t(int, len, TIPC_MAX_BEARER_NAME); if (!string_is_valid(b->name, len)) return -EINVAL; @@ -425,7 +439,11 @@ static int tipc_nl_compat_bearer_disable(struct tipc_nl_compat_cmd_doit *cmd, if (!bearer) return -EMSGSIZE; - len = min_t(int, TLV_GET_DATA_LEN(msg->req), TIPC_MAX_BEARER_NAME); + len = TLV_GET_DATA_LEN(msg->req); + if (len <= 0) + return -EINVAL; + + len = min_t(int, len, TIPC_MAX_BEARER_NAME); if (!string_is_valid(name, len)) return -EINVAL; @@ -517,7 +535,11 @@ static int tipc_nl_compat_link_stat_dump(struct tipc_nl_compat_msg *msg, name = (char *)TLV_DATA(msg->req); - len = min_t(int, TLV_GET_DATA_LEN(msg->req), TIPC_MAX_LINK_NAME); + len = TLV_GET_DATA_LEN(msg->req); + if (len <= 0) + return -EINVAL; + + len = min_t(int, len, TIPC_MAX_LINK_NAME); if (!string_is_valid(name, len)) return -EINVAL; @@ -757,7 +779,12 @@ static int tipc_nl_compat_link_set(struct tipc_nl_compat_cmd_doit *cmd, lc = (struct tipc_link_config *)TLV_DATA(msg->req); - len = min_t(int, TLV_GET_DATA_LEN(msg->req), TIPC_MAX_LINK_NAME); + len = TLV_GET_DATA_LEN(msg->req); + len -= offsetof(struct tipc_link_config, name); + if (len <= 0) + return -EINVAL; + + len = min_t(int, len, TIPC_MAX_LINK_NAME); if (!string_is_valid(lc->name, len)) return -EINVAL; @@ -790,7 +817,11 @@ static int tipc_nl_compat_link_reset_stats(struct tipc_nl_compat_cmd_doit *cmd, if (!link) return -EMSGSIZE; - len = min_t(int, TLV_GET_DATA_LEN(msg->req), TIPC_MAX_LINK_NAME); + len = TLV_GET_DATA_LEN(msg->req); + if (len <= 0) + return -EINVAL; + + len = min_t(int, len, TIPC_MAX_LINK_NAME); if (!string_is_valid(name, len)) return -EINVAL; @@ -943,6 +974,10 @@ static int tipc_nl_compat_publ_dump(struct tipc_nl_compat_msg *msg, u32 sock) hdr = genlmsg_put(args, 0, 0, &tipc_genl_family, NLM_F_MULTI, TIPC_NL_PUBL_GET); + if (!hdr) { + kfree_skb(args); + return -EMSGSIZE; + } nest = nla_nest_start(args, TIPC_NLA_SOCK); if (!nest) { @@ -990,8 +1025,11 @@ static int tipc_nl_compat_sk_dump(struct tipc_nl_compat_msg *msg, u32 node; struct nlattr *con[TIPC_NLA_CON_MAX + 1]; - nla_parse_nested(con, TIPC_NLA_CON_MAX, - sock[TIPC_NLA_SOCK_CON], NULL, NULL); + err = nla_parse_nested(con, TIPC_NLA_CON_MAX, + sock[TIPC_NLA_SOCK_CON], NULL, NULL); + + if (err) + return err; node = nla_get_u32(con[TIPC_NLA_CON_NODE]); tipc_tlv_sprintf(msg->rep, " connected to <%u.%u.%u:%u>", @@ -1239,8 +1277,8 @@ static int tipc_nl_compat_recv(struct sk_buff *skb, struct genl_info *info) goto send; } - len = nlmsg_attrlen(req_nlh, GENL_HDRLEN + TIPC_GENL_HDRLEN); - if (!len || !TLV_OK(msg.req, len)) { + msg.req_size = nlmsg_attrlen(req_nlh, GENL_HDRLEN + TIPC_GENL_HDRLEN); + if (msg.req_size && !TLV_OK(msg.req, msg.req_size)) { msg.rep = tipc_get_err_tlv(TIPC_CFG_NOT_SUPPORTED); err = -EOPNOTSUPP; goto send; diff --git a/net/tipc/socket.c b/net/tipc/socket.c index e5f9f43ff15b..21929ba196eb 100644 --- a/net/tipc/socket.c +++ b/net/tipc/socket.c @@ -709,19 +709,19 @@ static unsigned int tipc_poll(struct file *file, struct socket *sock, switch (sk->sk_state) { case TIPC_ESTABLISHED: - case TIPC_CONNECTING: if (!tsk->cong_link_cnt && !tsk_conn_cong(tsk)) mask |= POLLOUT; /* fall thru' */ case TIPC_LISTEN: - if (!skb_queue_empty(&sk->sk_receive_queue)) + case TIPC_CONNECTING: + if (!skb_queue_empty_lockless(&sk->sk_receive_queue)) mask |= (POLLIN | POLLRDNORM); break; case TIPC_OPEN: if (!tsk->cong_link_cnt) mask |= POLLOUT; if (tipc_sk_type_connectionless(sk) && - (!skb_queue_empty(&sk->sk_receive_queue))) + (!skb_queue_empty_lockless(&sk->sk_receive_queue))) mask |= (POLLIN | POLLRDNORM); break; case TIPC_DISCONNECTING: @@ -943,7 +943,7 @@ static int __tipc_sendmsg(struct socket *sock, struct msghdr *m, size_t dlen) if (unlikely(!dest)) { dest = &tsk->peer; - if (!syn || dest->family != AF_TIPC) + if (!syn && dest->family != AF_TIPC) return -EDESTADDRREQ; } @@ -1588,7 +1588,7 @@ static bool filter_connect(struct tipc_sock *tsk, struct sk_buff *skb) return true; /* If empty 'ACK-' message, wake up sleeping connect() */ - sk->sk_data_ready(sk); + sk->sk_state_change(sk); /* 'ACK-' message is neither accepted nor rejected: */ msg_set_dest_droppable(hdr, 1); diff --git a/net/tipc/subscr.c b/net/tipc/subscr.c index 959c9aea3d1a..144fe993c231 100644 --- a/net/tipc/subscr.c +++ b/net/tipc/subscr.c @@ -344,7 +344,7 @@ static void *tipc_subscrb_connect_cb(int conid) return (void *)tipc_subscrb_create(conid); } -int tipc_topsrv_start(struct net *net) +static int tipc_topsrv_start(struct net *net) { struct tipc_net *tn = net_generic(net, tipc_net_id); const char name[] = "topology_server"; @@ -382,7 +382,7 @@ int tipc_topsrv_start(struct net *net) return tipc_server_start(topsrv); } -void tipc_topsrv_stop(struct net *net) +static void tipc_topsrv_stop(struct net *net) { struct tipc_net *tn = net_generic(net, tipc_net_id); struct tipc_server *topsrv = tn->topsrv; @@ -391,3 +391,13 @@ void tipc_topsrv_stop(struct net *net) kfree(topsrv->saddr); kfree(topsrv); } + +int __net_init tipc_topsrv_init_net(struct net *net) +{ + return tipc_topsrv_start(net); +} + +void __net_exit tipc_topsrv_exit_net(struct net *net) +{ + tipc_topsrv_stop(net); +} diff --git a/net/tipc/subscr.h b/net/tipc/subscr.h index ee52957dc952..1caed50a8c3a 100644 --- a/net/tipc/subscr.h +++ b/net/tipc/subscr.h @@ -75,8 +75,9 @@ void tipc_subscrp_report_overlap(struct tipc_subscription *sub, void tipc_subscrp_convert_seq(struct tipc_name_seq *in, int swap, struct tipc_name_seq *out); u32 tipc_subscrp_convert_seq_type(u32 type, int swap); -int tipc_topsrv_start(struct net *net); -void tipc_topsrv_stop(struct net *net); + +int __net_init tipc_topsrv_init_net(struct net *net); +void __net_exit tipc_topsrv_exit_net(struct net *net); void tipc_subscrp_put(struct tipc_subscription *subscription); void tipc_subscrp_get(struct tipc_subscription *subscription); diff --git a/net/tipc/udp_media.c b/net/tipc/udp_media.c index e3cff9d6c092..de011fdd7964 100644 --- a/net/tipc/udp_media.c +++ b/net/tipc/udp_media.c @@ -174,7 +174,6 @@ static int tipc_udp_xmit(struct net *net, struct sk_buff *skb, goto tx_error; } - skb->dev = rt->dst.dev; ttl = ip4_dst_hoplimit(&rt->dst); udp_tunnel_xmit_skb(rt, ub->ubsock->sk, skb, src->ipv4.s_addr, dst->ipv4.s_addr, 0, ttl, 0, src->port, @@ -193,10 +192,9 @@ static int tipc_udp_xmit(struct net *net, struct sk_buff *skb, if (err) goto tx_error; ttl = ip6_dst_hoplimit(ndst); - err = udp_tunnel6_xmit_skb(ndst, ub->ubsock->sk, skb, - ndst->dev, &src->ipv6, - &dst->ipv6, 0, ttl, 0, src->port, - dst->port, false); + err = udp_tunnel6_xmit_skb(ndst, ub->ubsock->sk, skb, NULL, + &src->ipv6, &dst->ipv6, 0, ttl, 0, + src->port, dst->port, false); #endif } return err; diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index 7f46bab4ce5c..99f581a61cfa 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -225,6 +225,8 @@ static inline void unix_release_addr(struct unix_address *addr) static int unix_mkname(struct sockaddr_un *sunaddr, int len, unsigned int *hashp) { + *hashp = 0; + if (len <= sizeof(short) || len > sizeof(*sunaddr)) return -EINVAL; if (!sunaddr || sunaddr->sun_family != AF_UNIX) @@ -892,7 +894,7 @@ retry: addr->hash ^= sk->sk_type; __unix_remove_socket(sk); - u->addr = addr; + smp_store_release(&u->addr, addr); __unix_insert_socket(&unix_socket_table[addr->hash], sk); spin_unlock(&unix_table_lock); err = 0; @@ -1062,7 +1064,7 @@ static int unix_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) err = 0; __unix_remove_socket(sk); - u->addr = addr; + smp_store_release(&u->addr, addr); __unix_insert_socket(list, sk); out_unlock: @@ -1333,15 +1335,29 @@ restart: RCU_INIT_POINTER(newsk->sk_wq, &newu->peer_wq); otheru = unix_sk(other); - /* copy address information from listening to new sock*/ - if (otheru->addr) { - refcount_inc(&otheru->addr->refcnt); - newu->addr = otheru->addr; - } + /* copy address information from listening to new sock + * + * The contents of *(otheru->addr) and otheru->path + * are seen fully set up here, since we have found + * otheru in hash under unix_table_lock. Insertion + * into the hash chain we'd found it in had been done + * in an earlier critical area protected by unix_table_lock, + * the same one where we'd set *(otheru->addr) contents, + * as well as otheru->path and otheru->addr itself. + * + * Using smp_store_release() here to set newu->addr + * is enough to make those stores, as well as stores + * to newu->path visible to anyone who gets newu->addr + * by smp_load_acquire(). IOW, the same warranties + * as for unix_sock instances bound in unix_bind() or + * in unix_autobind(). + */ if (otheru->path.dentry) { path_get(&otheru->path); newu->path = otheru->path; } + refcount_inc(&otheru->addr->refcnt); + smp_store_release(&newu->addr, otheru->addr); /* Set credentials */ copy_peercred(sk, other); @@ -1455,7 +1471,7 @@ out: static int unix_getname(struct socket *sock, struct sockaddr *uaddr, int *uaddr_len, int peer) { struct sock *sk = sock->sk; - struct unix_sock *u; + struct unix_address *addr; DECLARE_SOCKADDR(struct sockaddr_un *, sunaddr, uaddr); int err = 0; @@ -1470,19 +1486,15 @@ static int unix_getname(struct socket *sock, struct sockaddr *uaddr, int *uaddr_ sock_hold(sk); } - u = unix_sk(sk); - unix_state_lock(sk); - if (!u->addr) { + addr = smp_load_acquire(&unix_sk(sk)->addr); + if (!addr) { sunaddr->sun_family = AF_UNIX; sunaddr->sun_path[0] = 0; *uaddr_len = sizeof(short); } else { - struct unix_address *addr = u->addr; - *uaddr_len = addr->len; memcpy(sunaddr, addr->name, *uaddr_len); } - unix_state_unlock(sk); sock_put(sk); out: return err; @@ -2075,11 +2087,11 @@ static int unix_seqpacket_recvmsg(struct socket *sock, struct msghdr *msg, static void unix_copy_addr(struct msghdr *msg, struct sock *sk) { - struct unix_sock *u = unix_sk(sk); + struct unix_address *addr = smp_load_acquire(&unix_sk(sk)->addr); - if (u->addr) { - msg->msg_namelen = u->addr->len; - memcpy(msg->msg_name, u->addr->name, u->addr->len); + if (addr) { + msg->msg_namelen = addr->len; + memcpy(msg->msg_name, addr->name, addr->len); } } @@ -2583,15 +2595,14 @@ static int unix_open_file(struct sock *sk) if (!ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN)) return -EPERM; - unix_state_lock(sk); + if (!smp_load_acquire(&unix_sk(sk)->addr)) + return -ENOENT; + path = unix_sk(sk)->path; - if (!path.dentry) { - unix_state_unlock(sk); + if (!path.dentry) return -ENOENT; - } path_get(&path); - unix_state_unlock(sk); fd = get_unused_fd_flags(O_CLOEXEC); if (fd < 0) @@ -2656,7 +2667,7 @@ static unsigned int unix_poll(struct file *file, struct socket *sock, poll_table mask |= POLLRDHUP | POLLIN | POLLRDNORM; /* readable? */ - if (!skb_queue_empty(&sk->sk_receive_queue)) + if (!skb_queue_empty_lockless(&sk->sk_receive_queue)) mask |= POLLIN | POLLRDNORM; /* Connection-based need to check for termination and startup */ @@ -2684,7 +2695,7 @@ static unsigned int unix_dgram_poll(struct file *file, struct socket *sock, mask = 0; /* exceptional events? */ - if (sk->sk_err || !skb_queue_empty(&sk->sk_error_queue)) + if (sk->sk_err || !skb_queue_empty_lockless(&sk->sk_error_queue)) mask |= POLLERR | (sock_flag(sk, SOCK_SELECT_ERR_QUEUE) ? POLLPRI : 0); @@ -2694,7 +2705,7 @@ static unsigned int unix_dgram_poll(struct file *file, struct socket *sock, mask |= POLLHUP; /* readable? */ - if (!skb_queue_empty(&sk->sk_receive_queue)) + if (!skb_queue_empty_lockless(&sk->sk_receive_queue)) mask |= POLLIN | POLLRDNORM; /* Connection-based need to check for termination and startup */ @@ -2831,7 +2842,7 @@ static int unix_seq_show(struct seq_file *seq, void *v) (s->sk_state == TCP_ESTABLISHED ? SS_CONNECTING : SS_DISCONNECTING), sock_i_ino(s)); - if (u->addr) { + if (u->addr) { // under unix_table_lock here int i, len; seq_putc(seq, ' '); diff --git a/net/unix/diag.c b/net/unix/diag.c index 384c84e83462..3183d9b8ab33 100644 --- a/net/unix/diag.c +++ b/net/unix/diag.c @@ -10,7 +10,8 @@ static int sk_diag_dump_name(struct sock *sk, struct sk_buff *nlskb) { - struct unix_address *addr = unix_sk(sk)->addr; + /* might or might not have unix_table_lock */ + struct unix_address *addr = smp_load_acquire(&unix_sk(sk)->addr); if (!addr) return 0; diff --git a/net/vmw_vsock/af_vsock.c b/net/vmw_vsock/af_vsock.c index f2fd556c1233..73eac97e19fb 100644 --- a/net/vmw_vsock/af_vsock.c +++ b/net/vmw_vsock/af_vsock.c @@ -36,7 +36,7 @@ * not support simultaneous connects (two "client" sockets connecting). * * - "Server" sockets are referred to as listener sockets throughout this - * implementation because they are in the VSOCK_SS_LISTEN state. When a + * implementation because they are in the TCP_LISTEN state. When a * connection request is received (the second kind of socket mentioned above), * we create a new socket and refer to it as a pending socket. These pending * sockets are placed on the pending connection list of the listener socket. @@ -82,6 +82,15 @@ * argument, we must ensure the reference count is increased to ensure the * socket isn't freed before the function is run; the deferred function will * then drop the reference. + * + * - sk->sk_state uses the TCP state constants because they are widely used by + * other address families and exposed to userspace tools like ss(8): + * + * TCP_CLOSE - unconnected + * TCP_SYN_SENT - connecting + * TCP_ESTABLISHED - connected + * TCP_CLOSING - disconnecting + * TCP_LISTEN - listening */ #include <linux/types.h> @@ -98,6 +107,7 @@ #include <linux/mutex.h> #include <linux/net.h> #include <linux/poll.h> +#include <linux/random.h> #include <linux/skbuff.h> #include <linux/smp.h> #include <linux/socket.h> @@ -279,7 +289,8 @@ EXPORT_SYMBOL_GPL(vsock_insert_connected); void vsock_remove_bound(struct vsock_sock *vsk) { spin_lock_bh(&vsock_table_lock); - __vsock_remove_bound(vsk); + if (__vsock_in_bound_table(vsk)) + __vsock_remove_bound(vsk); spin_unlock_bh(&vsock_table_lock); } EXPORT_SYMBOL_GPL(vsock_remove_bound); @@ -287,7 +298,8 @@ EXPORT_SYMBOL_GPL(vsock_remove_bound); void vsock_remove_connected(struct vsock_sock *vsk) { spin_lock_bh(&vsock_table_lock); - __vsock_remove_connected(vsk); + if (__vsock_in_connected_table(vsk)) + __vsock_remove_connected(vsk); spin_unlock_bh(&vsock_table_lock); } EXPORT_SYMBOL_GPL(vsock_remove_connected); @@ -323,35 +335,10 @@ struct sock *vsock_find_connected_socket(struct sockaddr_vm *src, } EXPORT_SYMBOL_GPL(vsock_find_connected_socket); -static bool vsock_in_bound_table(struct vsock_sock *vsk) -{ - bool ret; - - spin_lock_bh(&vsock_table_lock); - ret = __vsock_in_bound_table(vsk); - spin_unlock_bh(&vsock_table_lock); - - return ret; -} - -static bool vsock_in_connected_table(struct vsock_sock *vsk) -{ - bool ret; - - spin_lock_bh(&vsock_table_lock); - ret = __vsock_in_connected_table(vsk); - spin_unlock_bh(&vsock_table_lock); - - return ret; -} - void vsock_remove_sock(struct vsock_sock *vsk) { - if (vsock_in_bound_table(vsk)) - vsock_remove_bound(vsk); - - if (vsock_in_connected_table(vsk)) - vsock_remove_connected(vsk); + vsock_remove_bound(vsk); + vsock_remove_connected(vsk); } EXPORT_SYMBOL_GPL(vsock_remove_sock); @@ -482,10 +469,9 @@ static void vsock_pending_work(struct work_struct *work) * incoming packets can't find this socket, and to reduce the reference * count. */ - if (vsock_in_connected_table(vsk)) - vsock_remove_connected(vsk); + vsock_remove_connected(vsk); - sk->sk_state = SS_FREE; + sk->sk_state = TCP_CLOSE; out: release_sock(sk); @@ -502,9 +488,13 @@ out: static int __vsock_bind_stream(struct vsock_sock *vsk, struct sockaddr_vm *addr) { - static u32 port = LAST_RESERVED_PORT + 1; + static u32 port = 0; struct sockaddr_vm new_addr; + if (!port) + port = LAST_RESERVED_PORT + 1 + + prandom_u32_max(U32_MAX - LAST_RESERVED_PORT); + vsock_addr_init(&new_addr, addr->svm_cid, addr->svm_port); if (addr->svm_port == VMADDR_PORT_ANY) { @@ -626,7 +616,6 @@ struct sock *__vsock_create(struct net *net, sk->sk_destruct = vsock_sk_destruct; sk->sk_backlog_rcv = vsock_queue_rcv_skb; - sk->sk_state = 0; sock_reset_flag(sk, SOCK_DONE); INIT_LIST_HEAD(&vsk->bound_table); @@ -664,7 +653,7 @@ struct sock *__vsock_create(struct net *net, } EXPORT_SYMBOL_GPL(__vsock_create); -static void __vsock_release(struct sock *sk) +static void __vsock_release(struct sock *sk, int level) { if (sk) { struct sk_buff *skb; @@ -674,9 +663,17 @@ static void __vsock_release(struct sock *sk) vsk = vsock_sk(sk); pending = NULL; /* Compiler warning. */ + /* The release call is supposed to use lock_sock_nested() + * rather than lock_sock(), if a sock lock should be acquired. + */ transport->release(vsk); - lock_sock(sk); + /* When "level" is SINGLE_DEPTH_NESTING, use the nested + * version to avoid the warning "possible recursive locking + * detected". When "level" is 0, lock_sock_nested(sk, level) + * is the same as lock_sock(sk). + */ + lock_sock_nested(sk, level); sock_orphan(sk); sk->sk_shutdown = SHUTDOWN_MASK; @@ -685,7 +682,7 @@ static void __vsock_release(struct sock *sk) /* Clean up any sockets that never were accepted. */ while ((pending = vsock_dequeue_accept(sk)) != NULL) { - __vsock_release(pending); + __vsock_release(pending, SINGLE_DEPTH_NESTING); sock_put(pending); } @@ -734,7 +731,7 @@ EXPORT_SYMBOL_GPL(vsock_stream_has_space); static int vsock_release(struct socket *sock) { - __vsock_release(sock->sk); + __vsock_release(sock->sk, 0); sock->sk = NULL; sock->state = SS_FREE; @@ -888,7 +885,7 @@ static unsigned int vsock_poll(struct file *file, struct socket *sock, * the queue and write as long as the socket isn't shutdown for * sending. */ - if (!skb_queue_empty(&sk->sk_receive_queue) || + if (!skb_queue_empty_lockless(&sk->sk_receive_queue) || (sk->sk_shutdown & RCV_SHUTDOWN)) { mask |= POLLIN | POLLRDNORM; } @@ -902,7 +899,7 @@ static unsigned int vsock_poll(struct file *file, struct socket *sock, /* Listening sockets that have connections in their accept * queue can be read. */ - if (sk->sk_state == VSOCK_SS_LISTEN + if (sk->sk_state == TCP_LISTEN && !vsock_is_accept_queue_empty(sk)) mask |= POLLIN | POLLRDNORM; @@ -931,7 +928,7 @@ static unsigned int vsock_poll(struct file *file, struct socket *sock, } /* Connected sockets that can produce data can be written. */ - if (sk->sk_state == SS_CONNECTED) { + if (sk->sk_state == TCP_ESTABLISHED) { if (!(sk->sk_shutdown & SEND_SHUTDOWN)) { bool space_avail_now = false; int ret = transport->notify_poll_out( @@ -953,7 +950,7 @@ static unsigned int vsock_poll(struct file *file, struct socket *sock, * POLLOUT|POLLWRNORM when peer is closed and nothing to read, * but local send is not shutdown. */ - if (sk->sk_state == SS_UNCONNECTED) { + if (sk->sk_state == TCP_CLOSE) { if (!(sk->sk_shutdown & SEND_SHUTDOWN)) mask |= POLLOUT | POLLWRNORM; @@ -1123,9 +1120,9 @@ static void vsock_connect_timeout(struct work_struct *work) sk = sk_vsock(vsk); lock_sock(sk); - if (sk->sk_state == SS_CONNECTING && + if (sk->sk_state == TCP_SYN_SENT && (sk->sk_shutdown != SHUTDOWN_MASK)) { - sk->sk_state = SS_UNCONNECTED; + sk->sk_state = TCP_CLOSE; sk->sk_err = ETIMEDOUT; sk->sk_error_report(sk); cancel = 1; @@ -1171,7 +1168,7 @@ static int vsock_stream_connect(struct socket *sock, struct sockaddr *addr, err = -EALREADY; break; default: - if ((sk->sk_state == VSOCK_SS_LISTEN) || + if ((sk->sk_state == TCP_LISTEN) || vsock_addr_cast(addr, addr_len, &remote_addr) != 0) { err = -EINVAL; goto out; @@ -1194,7 +1191,7 @@ static int vsock_stream_connect(struct socket *sock, struct sockaddr *addr, if (err) goto out; - sk->sk_state = SS_CONNECTING; + sk->sk_state = TCP_SYN_SENT; err = transport->connect(vsk); if (err < 0) @@ -1214,7 +1211,7 @@ static int vsock_stream_connect(struct socket *sock, struct sockaddr *addr, timeout = vsk->connect_timeout; prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE); - while (sk->sk_state != SS_CONNECTED && sk->sk_err == 0) { + while (sk->sk_state != TCP_ESTABLISHED && sk->sk_err == 0) { if (flags & O_NONBLOCK) { /* If we're not going to block, we schedule a timeout * function to generate a timeout on the connection @@ -1235,13 +1232,13 @@ static int vsock_stream_connect(struct socket *sock, struct sockaddr *addr, if (signal_pending(current)) { err = sock_intr_errno(timeout); - sk->sk_state = SS_UNCONNECTED; + sk->sk_state = TCP_CLOSE; sock->state = SS_UNCONNECTED; vsock_transport_cancel_pkt(vsk); goto out_wait; } else if (timeout == 0) { err = -ETIMEDOUT; - sk->sk_state = SS_UNCONNECTED; + sk->sk_state = TCP_CLOSE; sock->state = SS_UNCONNECTED; vsock_transport_cancel_pkt(vsk); goto out_wait; @@ -1252,7 +1249,7 @@ static int vsock_stream_connect(struct socket *sock, struct sockaddr *addr, if (sk->sk_err) { err = -sk->sk_err; - sk->sk_state = SS_UNCONNECTED; + sk->sk_state = TCP_CLOSE; sock->state = SS_UNCONNECTED; } else { err = 0; @@ -1285,7 +1282,7 @@ static int vsock_accept(struct socket *sock, struct socket *newsock, int flags, goto out; } - if (listener->sk_state != VSOCK_SS_LISTEN) { + if (listener->sk_state != TCP_LISTEN) { err = -EINVAL; goto out; } @@ -1375,7 +1372,7 @@ static int vsock_listen(struct socket *sock, int backlog) } sk->sk_max_ack_backlog = backlog; - sk->sk_state = VSOCK_SS_LISTEN; + sk->sk_state = TCP_LISTEN; err = 0; @@ -1555,7 +1552,7 @@ static int vsock_stream_sendmsg(struct socket *sock, struct msghdr *msg, /* Callers should not provide a destination with stream sockets. */ if (msg->msg_namelen) { - err = sk->sk_state == SS_CONNECTED ? -EISCONN : -EOPNOTSUPP; + err = sk->sk_state == TCP_ESTABLISHED ? -EISCONN : -EOPNOTSUPP; goto out; } @@ -1566,7 +1563,7 @@ static int vsock_stream_sendmsg(struct socket *sock, struct msghdr *msg, goto out; } - if (sk->sk_state != SS_CONNECTED || + if (sk->sk_state != TCP_ESTABLISHED || !vsock_addr_bound(&vsk->local_addr)) { err = -ENOTCONN; goto out; @@ -1690,7 +1687,7 @@ vsock_stream_recvmsg(struct socket *sock, struct msghdr *msg, size_t len, lock_sock(sk); - if (sk->sk_state != SS_CONNECTED) { + if (sk->sk_state != TCP_ESTABLISHED) { /* Recvmsg is supposed to return 0 if a peer performs an * orderly shutdown. Differentiate between that case and when a * peer has not connected or a local shutdown occured with the diff --git a/net/vmw_vsock/hyperv_transport.c b/net/vmw_vsock/hyperv_transport.c index 2c63f7b169b5..6614512f8180 100644 --- a/net/vmw_vsock/hyperv_transport.c +++ b/net/vmw_vsock/hyperv_transport.c @@ -35,6 +35,9 @@ /* The MTU is 16KB per the host side's design */ #define HVS_MTU_SIZE (1024 * 16) +/* How long to wait for graceful shutdown of a connection */ +#define HVS_CLOSE_TIMEOUT (8 * HZ) + struct vmpipe_proto_header { u32 pkt_type; u32 data_size; @@ -217,18 +220,6 @@ static void hvs_set_channel_pending_send_size(struct vmbus_channel *chan) set_channel_pending_send_size(chan, HVS_PKT_LEN(HVS_SEND_BUF_SIZE)); - /* See hvs_stream_has_space(): we must make sure the host has seen - * the new pending send size, before we can re-check the writable - * bytes. - */ - virt_mb(); -} - -static void hvs_clear_channel_pending_send_size(struct vmbus_channel *chan) -{ - set_channel_pending_send_size(chan, 0); - - /* Ditto */ virt_mb(); } @@ -298,27 +289,42 @@ static void hvs_channel_cb(void *ctx) if (hvs_channel_readable(chan)) sk->sk_data_ready(sk); - /* See hvs_stream_has_space(): when we reach here, the writable bytes - * may be already less than HVS_PKT_LEN(HVS_SEND_BUF_SIZE). - */ if (hv_get_bytes_to_write(&chan->outbound) > 0) sk->sk_write_space(sk); } -static void hvs_close_connection(struct vmbus_channel *chan) +static void hvs_do_close_lock_held(struct vsock_sock *vsk, + bool cancel_timeout) { - struct sock *sk = get_per_channel_state(chan); - struct vsock_sock *vsk = vsock_sk(sk); - - lock_sock(sk); + struct sock *sk = sk_vsock(vsk); - sk->sk_state = SS_UNCONNECTED; sock_set_flag(sk, SOCK_DONE); - vsk->peer_shutdown |= SEND_SHUTDOWN | RCV_SHUTDOWN; - + vsk->peer_shutdown = SHUTDOWN_MASK; + if (vsock_stream_has_data(vsk) <= 0) + sk->sk_state = TCP_CLOSING; sk->sk_state_change(sk); + if (vsk->close_work_scheduled && + (!cancel_timeout || cancel_delayed_work(&vsk->close_work))) { + vsk->close_work_scheduled = false; + vsock_remove_sock(vsk); + /* Release the reference taken while scheduling the timeout */ + sock_put(sk); + } +} + +static void hvs_close_connection(struct vmbus_channel *chan) +{ + struct sock *sk = get_per_channel_state(chan); + + lock_sock(sk); + hvs_do_close_lock_held(vsock_sk(sk), true); release_sock(sk); + + /* Release the refcnt for the channel that's opened in + * hvs_open_connection(). + */ + sock_put(sk); } static void hvs_open_connection(struct vmbus_channel *chan) @@ -328,8 +334,9 @@ static void hvs_open_connection(struct vmbus_channel *chan) struct sockaddr_vm addr; struct sock *sk, *new = NULL; - struct vsock_sock *vnew; - struct hvsock *hvs, *hvs_new; + struct vsock_sock *vnew = NULL; + struct hvsock *hvs = NULL; + struct hvsock *hvs_new = NULL; int ret; if_type = &chan->offermsg.offer.if_type; @@ -350,8 +357,8 @@ static void hvs_open_connection(struct vmbus_channel *chan) lock_sock(sk); - if ((conn_from_host && sk->sk_state != VSOCK_SS_LISTEN) || - (!conn_from_host && sk->sk_state != SS_CONNECTING)) + if ((conn_from_host && sk->sk_state != TCP_LISTEN) || + (!conn_from_host && sk->sk_state != TCP_SYN_SENT)) goto out; if (conn_from_host) { @@ -363,7 +370,7 @@ static void hvs_open_connection(struct vmbus_channel *chan) if (!new) goto out; - new->sk_state = SS_CONNECTING; + new->sk_state = TCP_SYN_SENT; vnew = vsock_sk(new); hvs_new = vnew->trans; hvs_new->chan = chan; @@ -387,10 +394,20 @@ static void hvs_open_connection(struct vmbus_channel *chan) } set_per_channel_state(chan, conn_from_host ? new : sk); + + /* This reference will be dropped by hvs_close_connection(). */ + sock_hold(conn_from_host ? new : sk); vmbus_set_chn_rescind_callback(chan, hvs_close_connection); + /* Set the pending send size to max packet size to always get + * notifications from the host when there is enough writable space. + * The host is optimized to send notifications only when the pending + * size boundary is crossed, and not always. + */ + hvs_set_channel_pending_send_size(chan); + if (conn_from_host) { - new->sk_state = SS_CONNECTED; + new->sk_state = TCP_ESTABLISHED; sk->sk_ack_backlog++; hvs_addr_init(&vnew->local_addr, if_type); @@ -403,7 +420,7 @@ static void hvs_open_connection(struct vmbus_channel *chan) vsock_enqueue_accept(sk, new); } else { - sk->sk_state = SS_CONNECTED; + sk->sk_state = TCP_ESTABLISHED; sk->sk_socket->state = SS_CONNECTED; vsock_insert_connected(vsock_sk(sk)); @@ -453,50 +470,80 @@ static int hvs_connect(struct vsock_sock *vsk) return vmbus_send_tl_connect_request(&h->vm_srv_id, &h->host_srv_id); } +static void hvs_shutdown_lock_held(struct hvsock *hvs, int mode) +{ + struct vmpipe_proto_header hdr; + + if (hvs->fin_sent || !hvs->chan) + return; + + /* It can't fail: see hvs_channel_writable_bytes(). */ + (void)hvs_send_data(hvs->chan, (struct hvs_send_buf *)&hdr, 0); + hvs->fin_sent = true; +} + static int hvs_shutdown(struct vsock_sock *vsk, int mode) { struct sock *sk = sk_vsock(vsk); - struct vmpipe_proto_header hdr; - struct hvs_send_buf *send_buf; - struct hvsock *hvs; if (!(mode & SEND_SHUTDOWN)) return 0; lock_sock(sk); + hvs_shutdown_lock_held(vsk->trans, mode); + release_sock(sk); + return 0; +} - hvs = vsk->trans; - if (hvs->fin_sent) - goto out; - - send_buf = (struct hvs_send_buf *)&hdr; +static void hvs_close_timeout(struct work_struct *work) +{ + struct vsock_sock *vsk = + container_of(work, struct vsock_sock, close_work.work); + struct sock *sk = sk_vsock(vsk); - /* It can't fail: see hvs_channel_writable_bytes(). */ - (void)hvs_send_data(hvs->chan, send_buf, 0); + sock_hold(sk); + lock_sock(sk); + if (!sock_flag(sk, SOCK_DONE)) + hvs_do_close_lock_held(vsk, false); - hvs->fin_sent = true; -out: + vsk->close_work_scheduled = false; release_sock(sk); - return 0; + sock_put(sk); } -static void hvs_release(struct vsock_sock *vsk) +/* Returns true, if it is safe to remove socket; false otherwise */ +static bool hvs_close_lock_held(struct vsock_sock *vsk) { struct sock *sk = sk_vsock(vsk); - struct hvsock *hvs = vsk->trans; - struct vmbus_channel *chan; - lock_sock(sk); + if (!(sk->sk_state == TCP_ESTABLISHED || + sk->sk_state == TCP_CLOSING)) + return true; - sk->sk_state = TCP_CLOSING; - vsock_remove_sock(vsk); + if ((sk->sk_shutdown & SHUTDOWN_MASK) != SHUTDOWN_MASK) + hvs_shutdown_lock_held(vsk->trans, SHUTDOWN_MASK); - release_sock(sk); + if (sock_flag(sk, SOCK_DONE)) + return true; - chan = hvs->chan; - if (chan) - hvs_shutdown(vsk, RCV_SHUTDOWN | SEND_SHUTDOWN); + /* This reference will be dropped by the delayed close routine */ + sock_hold(sk); + INIT_DELAYED_WORK(&vsk->close_work, hvs_close_timeout); + vsk->close_work_scheduled = true; + schedule_delayed_work(&vsk->close_work, HVS_CLOSE_TIMEOUT); + return false; +} +static void hvs_release(struct vsock_sock *vsk) +{ + struct sock *sk = sk_vsock(vsk); + bool remove_sock; + + lock_sock_nested(sk, SINGLE_DEPTH_NESTING); + remove_sock = hvs_close_lock_held(vsk); + release_sock(sk); + if (remove_sock) + vsock_remove_sock(vsk); } static void hvs_destruct(struct vsock_sock *vsk) @@ -652,23 +699,8 @@ static s64 hvs_stream_has_data(struct vsock_sock *vsk) static s64 hvs_stream_has_space(struct vsock_sock *vsk) { struct hvsock *hvs = vsk->trans; - struct vmbus_channel *chan = hvs->chan; - s64 ret; - ret = hvs_channel_writable_bytes(chan); - if (ret > 0) { - hvs_clear_channel_pending_send_size(chan); - } else { - /* See hvs_channel_cb() */ - hvs_set_channel_pending_send_size(chan); - - /* Re-check the writable bytes to avoid race */ - ret = hvs_channel_writable_bytes(chan); - if (ret > 0) - hvs_clear_channel_pending_send_size(chan); - } - - return ret; + return hvs_channel_writable_bytes(hvs->chan); } static u64 hvs_stream_rcvhiwat(struct vsock_sock *vsk) diff --git a/net/vmw_vsock/virtio_transport.c b/net/vmw_vsock/virtio_transport.c index fdb294441682..96ab344f17bb 100644 --- a/net/vmw_vsock/virtio_transport.c +++ b/net/vmw_vsock/virtio_transport.c @@ -75,6 +75,9 @@ static u32 virtio_transport_get_local_cid(void) { struct virtio_vsock *vsock = virtio_vsock_get(); + if (!vsock) + return VMADDR_CID_ANY; + return vsock->guest_cid; } @@ -414,7 +417,7 @@ static void virtio_vsock_event_fill(struct virtio_vsock *vsock) static void virtio_vsock_reset_sock(struct sock *sk) { lock_sock(sk); - sk->sk_state = SS_UNCONNECTED; + sk->sk_state = TCP_CLOSE; sk->sk_err = ECONNRESET; sk->sk_error_report(sk); release_sock(sk); @@ -584,10 +587,6 @@ static int virtio_vsock_probe(struct virtio_device *vdev) virtio_vsock_update_guest_cid(vsock); - ret = vsock_core_init(&virtio_transport.transport); - if (ret < 0) - goto out_vqs; - vsock->rx_buf_nr = 0; vsock->rx_buf_max_nr = 0; atomic_set(&vsock->queued_replies, 0); @@ -618,8 +617,6 @@ static int virtio_vsock_probe(struct virtio_device *vdev) mutex_unlock(&the_virtio_vsock_mutex); return 0; -out_vqs: - vsock->vdev->config->del_vqs(vsock->vdev); out: kfree(vsock); mutex_unlock(&the_virtio_vsock_mutex); @@ -637,6 +634,9 @@ static void virtio_vsock_remove(struct virtio_device *vdev) flush_work(&vsock->event_work); flush_work(&vsock->send_pkt_work); + /* Reset all connected sockets when the device disappear */ + vsock_for_each_connected_socket(virtio_vsock_reset_sock); + vdev->config->reset(vdev); mutex_lock(&vsock->rx_lock); @@ -669,7 +669,6 @@ static void virtio_vsock_remove(struct virtio_device *vdev) mutex_lock(&the_virtio_vsock_mutex); the_virtio_vsock = NULL; - vsock_core_exit(); mutex_unlock(&the_virtio_vsock_mutex); vdev->config->del_vqs(vdev); @@ -702,15 +701,28 @@ static int __init virtio_vsock_init(void) virtio_vsock_workqueue = alloc_workqueue("virtio_vsock", 0, 0); if (!virtio_vsock_workqueue) return -ENOMEM; + + ret = vsock_core_init(&virtio_transport.transport); + if (ret) + goto out_wq; + ret = register_virtio_driver(&virtio_vsock_driver); if (ret) - destroy_workqueue(virtio_vsock_workqueue); + goto out_vci; + + return 0; + +out_vci: + vsock_core_exit(); +out_wq: + destroy_workqueue(virtio_vsock_workqueue); return ret; } static void __exit virtio_vsock_exit(void) { unregister_virtio_driver(&virtio_vsock_driver); + vsock_core_exit(); destroy_workqueue(virtio_vsock_workqueue); } diff --git a/net/vmw_vsock/virtio_transport_common.c b/net/vmw_vsock/virtio_transport_common.c index edba7ab97563..d20f43057323 100644 --- a/net/vmw_vsock/virtio_transport_common.c +++ b/net/vmw_vsock/virtio_transport_common.c @@ -92,8 +92,17 @@ static struct sk_buff *virtio_transport_build_skb(void *opaque) struct virtio_vsock_pkt *pkt = opaque; struct af_vsockmon_hdr *hdr; struct sk_buff *skb; + size_t payload_len; + void *payload_buf; - skb = alloc_skb(sizeof(*hdr) + sizeof(pkt->hdr) + pkt->len, + /* A packet could be split to fit the RX buffer, so we can retrieve + * the payload length from the header and the buffer pointer taking + * care of the offset in the original packet. + */ + payload_len = le32_to_cpu(pkt->hdr.len); + payload_buf = pkt->buf + pkt->off; + + skb = alloc_skb(sizeof(*hdr) + sizeof(pkt->hdr) + payload_len, GFP_ATOMIC); if (!skb) return NULL; @@ -133,8 +142,8 @@ static struct sk_buff *virtio_transport_build_skb(void *opaque) skb_put_data(skb, &pkt->hdr, sizeof(pkt->hdr)); - if (pkt->len) { - skb_put_data(skb, pkt->buf, pkt->len); + if (payload_len) { + skb_put_data(skb, payload_buf, payload_len); } return skb; @@ -662,6 +671,8 @@ static int virtio_transport_reset(struct vsock_sock *vsk, */ static int virtio_transport_reset_no_sock(struct virtio_vsock_pkt *pkt) { + const struct virtio_transport *t; + struct virtio_vsock_pkt *reply; struct virtio_vsock_pkt_info info = { .op = VIRTIO_VSOCK_OP_RST, .type = le16_to_cpu(pkt->hdr.type), @@ -672,15 +683,21 @@ static int virtio_transport_reset_no_sock(struct virtio_vsock_pkt *pkt) if (le16_to_cpu(pkt->hdr.op) == VIRTIO_VSOCK_OP_RST) return 0; - pkt = virtio_transport_alloc_pkt(&info, 0, - le64_to_cpu(pkt->hdr.dst_cid), - le32_to_cpu(pkt->hdr.dst_port), - le64_to_cpu(pkt->hdr.src_cid), - le32_to_cpu(pkt->hdr.src_port)); - if (!pkt) + reply = virtio_transport_alloc_pkt(&info, 0, + le64_to_cpu(pkt->hdr.dst_cid), + le32_to_cpu(pkt->hdr.dst_port), + le64_to_cpu(pkt->hdr.src_cid), + le32_to_cpu(pkt->hdr.src_port)); + if (!reply) return -ENOMEM; - return virtio_transport_get_ops()->send_pkt(pkt); + t = virtio_transport_get_ops(); + if (!t) { + virtio_transport_free_pkt(reply); + return -ENOTCONN; + } + + return t->send_pkt(reply); } static void virtio_transport_wait_close(struct sock *sk, long timeout) @@ -708,7 +725,7 @@ static void virtio_transport_do_close(struct vsock_sock *vsk, sock_set_flag(sk, SOCK_DONE); vsk->peer_shutdown = SHUTDOWN_MASK; if (vsock_stream_has_data(vsk) <= 0) - sk->sk_state = SS_DISCONNECTING; + sk->sk_state = TCP_CLOSING; sk->sk_state_change(sk); if (vsk->close_work_scheduled && @@ -748,8 +765,8 @@ static bool virtio_transport_close(struct vsock_sock *vsk) { struct sock *sk = &vsk->sk; - if (!(sk->sk_state == SS_CONNECTED || - sk->sk_state == SS_DISCONNECTING)) + if (!(sk->sk_state == TCP_ESTABLISHED || + sk->sk_state == TCP_CLOSING)) return true; /* Already received SHUTDOWN from peer, reply with RST */ @@ -778,12 +795,19 @@ static bool virtio_transport_close(struct vsock_sock *vsk) void virtio_transport_release(struct vsock_sock *vsk) { + struct virtio_vsock_sock *vvs = vsk->trans; + struct virtio_vsock_pkt *pkt, *tmp; struct sock *sk = &vsk->sk; bool remove_sock = true; - lock_sock(sk); + lock_sock_nested(sk, SINGLE_DEPTH_NESTING); if (sk->sk_type == SOCK_STREAM) remove_sock = virtio_transport_close(vsk); + + list_for_each_entry_safe(pkt, tmp, &vvs->rx_queue, list) { + list_del(&pkt->list); + virtio_transport_free_pkt(pkt); + } release_sock(sk); if (remove_sock) @@ -801,7 +825,7 @@ virtio_transport_recv_connecting(struct sock *sk, switch (le16_to_cpu(pkt->hdr.op)) { case VIRTIO_VSOCK_OP_RESPONSE: - sk->sk_state = SS_CONNECTED; + sk->sk_state = TCP_ESTABLISHED; sk->sk_socket->state = SS_CONNECTED; vsock_insert_connected(vsk); sk->sk_state_change(sk); @@ -821,7 +845,7 @@ virtio_transport_recv_connecting(struct sock *sk, destroy: virtio_transport_reset(vsk, pkt); - sk->sk_state = SS_UNCONNECTED; + sk->sk_state = TCP_CLOSE; sk->sk_err = skerr; sk->sk_error_report(sk); return err; @@ -857,7 +881,7 @@ virtio_transport_recv_connected(struct sock *sk, vsk->peer_shutdown |= SEND_SHUTDOWN; if (vsk->peer_shutdown == SHUTDOWN_MASK && vsock_stream_has_data(vsk) <= 0) - sk->sk_state = SS_DISCONNECTING; + sk->sk_state = TCP_CLOSING; if (le32_to_cpu(pkt->hdr.flags)) sk->sk_state_change(sk); break; @@ -928,7 +952,7 @@ virtio_transport_recv_listen(struct sock *sk, struct virtio_vsock_pkt *pkt) lock_sock_nested(child, SINGLE_DEPTH_NESTING); - child->sk_state = SS_CONNECTED; + child->sk_state = TCP_ESTABLISHED; vchild = vsock_sk(child); vsock_addr_init(&vchild->local_addr, le64_to_cpu(pkt->hdr.dst_cid), @@ -1016,18 +1040,18 @@ void virtio_transport_recv_pkt(struct virtio_vsock_pkt *pkt) sk->sk_write_space(sk); switch (sk->sk_state) { - case VSOCK_SS_LISTEN: + case TCP_LISTEN: virtio_transport_recv_listen(sk, pkt); virtio_transport_free_pkt(pkt); break; - case SS_CONNECTING: + case TCP_SYN_SENT: virtio_transport_recv_connecting(sk, pkt); virtio_transport_free_pkt(pkt); break; - case SS_CONNECTED: + case TCP_ESTABLISHED: virtio_transport_recv_connected(sk, pkt); break; - case SS_DISCONNECTING: + case TCP_CLOSING: virtio_transport_recv_disconnecting(sk, pkt); virtio_transport_free_pkt(pkt); break; diff --git a/net/vmw_vsock/vmci_transport.c b/net/vmw_vsock/vmci_transport.c index bf7c51644446..ba4cb18c4b9a 100644 --- a/net/vmw_vsock/vmci_transport.c +++ b/net/vmw_vsock/vmci_transport.c @@ -776,7 +776,7 @@ static int vmci_transport_recv_stream_cb(void *data, struct vmci_datagram *dg) /* The local context ID may be out of date, update it. */ vsk->local_addr.svm_cid = dst.svm_cid; - if (sk->sk_state == SS_CONNECTED) + if (sk->sk_state == TCP_ESTABLISHED) vmci_trans(vsk)->notify_ops->handle_notify_pkt( sk, pkt, true, &dst, &src, &bh_process_pkt); @@ -834,7 +834,9 @@ static void vmci_transport_handle_detach(struct sock *sk) * left in our consume queue. */ if (vsock_stream_has_data(vsk) <= 0) { - if (sk->sk_state == SS_CONNECTING) { + sk->sk_state = TCP_CLOSE; + + if (sk->sk_state == TCP_SYN_SENT) { /* The peer may detach from a queue pair while * we are still in the connecting state, i.e., * if the peer VM is killed after attaching to @@ -843,12 +845,10 @@ static void vmci_transport_handle_detach(struct sock *sk) * event like a reset. */ - sk->sk_state = SS_UNCONNECTED; sk->sk_err = ECONNRESET; sk->sk_error_report(sk); return; } - sk->sk_state = SS_UNCONNECTED; } sk->sk_state_change(sk); } @@ -916,17 +916,17 @@ static void vmci_transport_recv_pkt_work(struct work_struct *work) vsock_sk(sk)->local_addr.svm_cid = pkt->dg.dst.context; switch (sk->sk_state) { - case VSOCK_SS_LISTEN: + case TCP_LISTEN: vmci_transport_recv_listen(sk, pkt); break; - case SS_CONNECTING: + case TCP_SYN_SENT: /* Processing of pending connections for servers goes through * the listening socket, so see vmci_transport_recv_listen() * for that path. */ vmci_transport_recv_connecting_client(sk, pkt); break; - case SS_CONNECTED: + case TCP_ESTABLISHED: vmci_transport_recv_connected(sk, pkt); break; default: @@ -975,7 +975,7 @@ static int vmci_transport_recv_listen(struct sock *sk, vsock_sk(pending)->local_addr.svm_cid = pkt->dg.dst.context; switch (pending->sk_state) { - case SS_CONNECTING: + case TCP_SYN_SENT: err = vmci_transport_recv_connecting_server(sk, pending, pkt); @@ -1105,7 +1105,7 @@ static int vmci_transport_recv_listen(struct sock *sk, vsock_add_pending(sk, pending); sk->sk_ack_backlog++; - pending->sk_state = SS_CONNECTING; + pending->sk_state = TCP_SYN_SENT; vmci_trans(vpending)->produce_size = vmci_trans(vpending)->consume_size = qp_size; vmci_trans(vpending)->queue_pair_size = qp_size; @@ -1229,11 +1229,11 @@ vmci_transport_recv_connecting_server(struct sock *listener, * the socket will be valid until it is removed from the queue. * * If we fail sending the attach below, we remove the socket from the - * connected list and move the socket to SS_UNCONNECTED before + * connected list and move the socket to TCP_CLOSE before * releasing the lock, so a pending slow path processing of an incoming * packet will not see the socket in the connected state in that case. */ - pending->sk_state = SS_CONNECTED; + pending->sk_state = TCP_ESTABLISHED; vsock_insert_connected(vpending); @@ -1264,7 +1264,7 @@ vmci_transport_recv_connecting_server(struct sock *listener, destroy: pending->sk_err = skerr; - pending->sk_state = SS_UNCONNECTED; + pending->sk_state = TCP_CLOSE; /* As long as we drop our reference, all necessary cleanup will handle * when the cleanup function drops its reference and our destruct * implementation is called. Note that since the listen handler will @@ -1302,7 +1302,7 @@ vmci_transport_recv_connecting_client(struct sock *sk, * accounting (it can already be found since it's in the bound * table). */ - sk->sk_state = SS_CONNECTED; + sk->sk_state = TCP_ESTABLISHED; sk->sk_socket->state = SS_CONNECTED; vsock_insert_connected(vsk); sk->sk_state_change(sk); @@ -1370,7 +1370,7 @@ vmci_transport_recv_connecting_client(struct sock *sk, destroy: vmci_transport_send_reset(sk, pkt); - sk->sk_state = SS_UNCONNECTED; + sk->sk_state = TCP_CLOSE; sk->sk_err = skerr; sk->sk_error_report(sk); return err; @@ -1558,7 +1558,7 @@ static int vmci_transport_recv_connected(struct sock *sk, sock_set_flag(sk, SOCK_DONE); vsk->peer_shutdown = SHUTDOWN_MASK; if (vsock_stream_has_data(vsk) <= 0) - sk->sk_state = SS_DISCONNECTING; + sk->sk_state = TCP_CLOSING; sk->sk_state_change(sk); break; @@ -1648,6 +1648,10 @@ static void vmci_transport_cleanup(struct work_struct *work) static void vmci_transport_destruct(struct vsock_sock *vsk) { + /* transport can be NULL if we hit a failure at init() time */ + if (!vmci_trans(vsk)) + return; + /* Ensure that the detach callback doesn't use the sk/vsk * we are about to destruct. */ @@ -1822,7 +1826,7 @@ static int vmci_transport_connect(struct vsock_sock *vsk) err = vmci_transport_send_conn_request( sk, vmci_trans(vsk)->queue_pair_size); if (err < 0) { - sk->sk_state = SS_UNCONNECTED; + sk->sk_state = TCP_CLOSE; return err; } } else { @@ -1832,7 +1836,7 @@ static int vmci_transport_connect(struct vsock_sock *vsk) sk, vmci_trans(vsk)->queue_pair_size, supported_proto_versions); if (err < 0) { - sk->sk_state = SS_UNCONNECTED; + sk->sk_state = TCP_CLOSE; return err; } diff --git a/net/vmw_vsock/vmci_transport_notify.c b/net/vmw_vsock/vmci_transport_notify.c index 1406db4d97d1..41fb427f150a 100644 --- a/net/vmw_vsock/vmci_transport_notify.c +++ b/net/vmw_vsock/vmci_transport_notify.c @@ -355,7 +355,7 @@ vmci_transport_notify_pkt_poll_in(struct sock *sk, * queue. Ask for notifications when there is something to * read. */ - if (sk->sk_state == SS_CONNECTED) { + if (sk->sk_state == TCP_ESTABLISHED) { if (!send_waiting_read(sk, 1)) return -1; diff --git a/net/vmw_vsock/vmci_transport_notify_qstate.c b/net/vmw_vsock/vmci_transport_notify_qstate.c index f3a0afc46208..0cc84f2bb05e 100644 --- a/net/vmw_vsock/vmci_transport_notify_qstate.c +++ b/net/vmw_vsock/vmci_transport_notify_qstate.c @@ -176,7 +176,7 @@ vmci_transport_notify_pkt_poll_in(struct sock *sk, * queue. Ask for notifications when there is something to * read. */ - if (sk->sk_state == SS_CONNECTED) + if (sk->sk_state == TCP_ESTABLISHED) vsock_block_update_write_window(sk); *data_ready_now = false; } diff --git a/net/wireless/ap.c b/net/wireless/ap.c index 63682176c96c..c4bd3ecef508 100644 --- a/net/wireless/ap.c +++ b/net/wireless/ap.c @@ -40,6 +40,8 @@ int __cfg80211_stop_ap(struct cfg80211_registered_device *rdev, cfg80211_sched_dfs_chan_update(rdev); } + schedule_work(&cfg80211_disconnect_work); + return err; } diff --git a/net/wireless/core.c b/net/wireless/core.c index 45cbade9ad68..0e08629b15d1 100644 --- a/net/wireless/core.c +++ b/net/wireless/core.c @@ -498,7 +498,7 @@ use_default_name: &rdev->rfkill_ops, rdev); if (!rdev->rfkill) { - kfree(rdev); + wiphy_free(&rdev->wiphy); return NULL; } diff --git a/net/wireless/core.h b/net/wireless/core.h index 90f90c7d8bf9..507ec6446eb6 100644 --- a/net/wireless/core.h +++ b/net/wireless/core.h @@ -429,6 +429,8 @@ void cfg80211_process_wdev_events(struct wireless_dev *wdev); bool cfg80211_does_bw_fit_range(const struct ieee80211_freq_range *freq_range, u32 center_freq_khz, u32 bw_khz); +extern struct work_struct cfg80211_disconnect_work; + /** * cfg80211_chandef_dfs_usable - checks if chandef is DFS usable * @wiphy: the wiphy to validate against diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index 46e9812d13c0..df8c5312f26a 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -199,6 +199,38 @@ cfg80211_get_dev_from_info(struct net *netns, struct genl_info *info) return __cfg80211_rdev_from_attrs(netns, info->attrs); } +static int validate_beacon_head(const struct nlattr *attr, + struct netlink_ext_ack *extack) +{ + const u8 *data = nla_data(attr); + unsigned int len = nla_len(attr); + const struct element *elem; + const struct ieee80211_mgmt *mgmt = (void *)data; + unsigned int fixedlen = offsetof(struct ieee80211_mgmt, + u.beacon.variable); + + if (len < fixedlen) + goto err; + + if (ieee80211_hdrlen(mgmt->frame_control) != + offsetof(struct ieee80211_mgmt, u.beacon)) + goto err; + + data += fixedlen; + len -= fixedlen; + + for_each_element(elem, data, len) { + /* nothing */ + } + + if (for_each_element_completed(elem, data, len)) + return 0; + +err: + NL_SET_ERR_MSG_ATTR(extack, attr, "malformed beacon head"); + return -EINVAL; +} + /* policy for the attributes */ static const struct nla_policy nl80211_policy[NUM_NL80211_ATTR] = { [NL80211_ATTR_WIPHY] = { .type = NLA_U32 }, @@ -251,7 +283,8 @@ static const struct nla_policy nl80211_policy[NUM_NL80211_ATTR] = { [NL80211_ATTR_MNTR_FLAGS] = { /* NLA_NESTED can't be empty */ }, [NL80211_ATTR_MESH_ID] = { .type = NLA_BINARY, .len = IEEE80211_MAX_MESH_ID_LEN }, - [NL80211_ATTR_MPATH_NEXT_HOP] = { .type = NLA_U32 }, + [NL80211_ATTR_MPATH_NEXT_HOP] = { .type = NLA_BINARY, + .len = ETH_ALEN }, [NL80211_ATTR_REG_ALPHA2] = { .type = NLA_STRING, .len = 2 }, [NL80211_ATTR_REG_RULES] = { .type = NLA_NESTED }, @@ -2111,6 +2144,8 @@ static int nl80211_parse_chandef(struct cfg80211_registered_device *rdev, control_freq = nla_get_u32(info->attrs[NL80211_ATTR_WIPHY_FREQ]); + memset(chandef, 0, sizeof(*chandef)); + chandef->chan = ieee80211_get_channel(&rdev->wiphy, control_freq); chandef->width = NL80211_CHAN_WIDTH_20_NOHT; chandef->center_freq1 = control_freq; @@ -2580,7 +2615,7 @@ static int nl80211_send_iface(struct sk_buff *msg, u32 portid, u32 seq, int flag if (rdev->ops->get_channel) { int ret; - struct cfg80211_chan_def chandef; + struct cfg80211_chan_def chandef = {}; ret = rdev_get_channel(rdev, wdev, &chandef); if (ret == 0) { @@ -3083,7 +3118,7 @@ static void get_key_callback(void *c, struct key_params *params) params->cipher))) goto nla_put_failure; - if (nla_put_u8(cookie->msg, NL80211_ATTR_KEY_IDX, cookie->idx)) + if (nla_put_u8(cookie->msg, NL80211_KEY_IDX, cookie->idx)) goto nla_put_failure; nla_nest_end(cookie->msg, key); @@ -3736,6 +3771,12 @@ static int nl80211_parse_beacon(struct nlattr *attrs[], memset(bcn, 0, sizeof(*bcn)); if (attrs[NL80211_ATTR_BEACON_HEAD]) { + int ret = validate_beacon_head(attrs[NL80211_ATTR_BEACON_HEAD], + NULL); + + if (ret) + return ret; + bcn->head = nla_data(attrs[NL80211_ATTR_BEACON_HEAD]); bcn->head_len = nla_len(attrs[NL80211_ATTR_BEACON_HEAD]); if (!bcn->head_len) @@ -5464,6 +5505,9 @@ static int nl80211_del_mpath(struct sk_buff *skb, struct genl_info *info) if (!rdev->ops->del_mpath) return -EOPNOTSUPP; + if (dev->ieee80211_ptr->iftype != NL80211_IFTYPE_MESH_POINT) + return -EOPNOTSUPP; + return rdev_del_mpath(rdev, dev, dst); } @@ -9753,9 +9797,11 @@ static int cfg80211_cqm_rssi_update(struct cfg80211_registered_device *rdev, hyst = wdev->cqm_config->rssi_hyst; n = wdev->cqm_config->n_rssi_thresholds; - for (i = 0; i < n; i++) + for (i = 0; i < n; i++) { + i = array_index_nospec(i, n); if (last < wdev->cqm_config->rssi_thresholds[i]) break; + } low_index = i - 1; if (low_index >= 0) { @@ -12761,7 +12807,8 @@ static const struct genl_ops nl80211_ops[] = { .policy = nl80211_policy, .flags = GENL_UNS_ADMIN_PERM, .internal_flags = NL80211_FLAG_NEED_NETDEV_UP | - NL80211_FLAG_NEED_RTNL, + NL80211_FLAG_NEED_RTNL | + NL80211_FLAG_CLEAR_SKB, }, { .cmd = NL80211_CMD_DEAUTHENTICATE, @@ -12812,7 +12859,8 @@ static const struct genl_ops nl80211_ops[] = { .policy = nl80211_policy, .flags = GENL_UNS_ADMIN_PERM, .internal_flags = NL80211_FLAG_NEED_NETDEV_UP | - NL80211_FLAG_NEED_RTNL, + NL80211_FLAG_NEED_RTNL | + NL80211_FLAG_CLEAR_SKB, }, { .cmd = NL80211_CMD_UPDATE_CONNECT_PARAMS, @@ -12820,7 +12868,8 @@ static const struct genl_ops nl80211_ops[] = { .policy = nl80211_policy, .flags = GENL_ADMIN_PERM, .internal_flags = NL80211_FLAG_NEED_NETDEV_UP | - NL80211_FLAG_NEED_RTNL, + NL80211_FLAG_NEED_RTNL | + NL80211_FLAG_CLEAR_SKB, }, { .cmd = NL80211_CMD_DISCONNECT, @@ -12849,7 +12898,8 @@ static const struct genl_ops nl80211_ops[] = { .policy = nl80211_policy, .flags = GENL_UNS_ADMIN_PERM, .internal_flags = NL80211_FLAG_NEED_NETDEV_UP | - NL80211_FLAG_NEED_RTNL, + NL80211_FLAG_NEED_RTNL | + NL80211_FLAG_CLEAR_SKB, }, { .cmd = NL80211_CMD_DEL_PMKSA, @@ -13201,7 +13251,8 @@ static const struct genl_ops nl80211_ops[] = { .policy = nl80211_policy, .flags = GENL_UNS_ADMIN_PERM, .internal_flags = NL80211_FLAG_NEED_WIPHY | - NL80211_FLAG_NEED_RTNL, + NL80211_FLAG_NEED_RTNL | + NL80211_FLAG_CLEAR_SKB, }, { .cmd = NL80211_CMD_SET_QOS_MAP, @@ -13256,7 +13307,8 @@ static const struct genl_ops nl80211_ops[] = { .doit = nl80211_set_pmk, .policy = nl80211_policy, .internal_flags = NL80211_FLAG_NEED_NETDEV_UP | - NL80211_FLAG_NEED_RTNL, + NL80211_FLAG_NEED_RTNL | + NL80211_FLAG_CLEAR_SKB, }, { .cmd = NL80211_CMD_DEL_PMK, @@ -14700,6 +14752,11 @@ void cfg80211_ch_switch_notify(struct net_device *dev, wdev->chandef = *chandef; wdev->preset_chandef = *chandef; + + if (wdev->iftype == NL80211_IFTYPE_STATION && + !WARN_ON(!wdev->current_bss)) + wdev->current_bss->pub.channel = chandef->chan; + nl80211_ch_switch_notify(rdev, dev, chandef, GFP_KERNEL, NL80211_CMD_CH_SWITCH_NOTIFY, 0); } diff --git a/net/wireless/reg.c b/net/wireless/reg.c index 544f7e24d0aa..283d8997f671 100644 --- a/net/wireless/reg.c +++ b/net/wireless/reg.c @@ -759,7 +759,7 @@ static bool is_valid_rd(const struct ieee80211_regdomain *rd) * definitions (the "2.4 GHz band", the "5 GHz band" and the "60GHz band"), * however it is safe for now to assume that a frequency rule should not be * part of a frequency's band if the start freq or end freq are off by more - * than 2 GHz for the 2.4 and 5 GHz bands, and by more than 10 GHz for the + * than 2 GHz for the 2.4 and 5 GHz bands, and by more than 20 GHz for the * 60 GHz band. * This resolution can be lowered and should be considered as we add * regulatory rule support for other "bands". @@ -774,7 +774,7 @@ static bool freq_in_rule_band(const struct ieee80211_freq_range *freq_range, * with the Channel starting frequency above 45 GHz. */ u32 limit = freq_khz > 45 * ONE_GHZ_IN_KHZ ? - 10 * ONE_GHZ_IN_KHZ : 2 * ONE_GHZ_IN_KHZ; + 20 * ONE_GHZ_IN_KHZ : 2 * ONE_GHZ_IN_KHZ; if (abs(freq_khz - freq_range->start_freq_khz) <= limit) return true; if (abs(freq_khz - freq_range->end_freq_khz) <= limit) @@ -1567,7 +1567,7 @@ static void reg_call_notifier(struct wiphy *wiphy, static bool reg_wdev_chan_valid(struct wiphy *wiphy, struct wireless_dev *wdev) { - struct cfg80211_chan_def chandef; + struct cfg80211_chan_def chandef = {}; struct cfg80211_registered_device *rdev = wiphy_to_rdev(wiphy); enum nl80211_iftype iftype; @@ -2252,7 +2252,7 @@ static void reg_process_pending_hints(void) /* When last_request->processed becomes true this will be rescheduled */ if (lr && !lr->processed) { - reg_process_hint(lr); + pr_debug("Pending regulatory request, waiting for it to be processed...\n"); return; } @@ -2704,8 +2704,54 @@ static void restore_regulatory_settings(bool reset_user) schedule_work(®_work); } +static bool is_wiphy_all_set_reg_flag(enum ieee80211_regulatory_flags flag) +{ + struct cfg80211_registered_device *rdev; + struct wireless_dev *wdev; + + list_for_each_entry(rdev, &cfg80211_rdev_list, list) { + list_for_each_entry(wdev, &rdev->wiphy.wdev_list, list) { + wdev_lock(wdev); + if (!(wdev->wiphy->regulatory_flags & flag)) { + wdev_unlock(wdev); + return false; + } + wdev_unlock(wdev); + } + } + + return true; +} + void regulatory_hint_disconnect(void) { + /* Restore of regulatory settings is not required when wiphy(s) + * ignore IE from connected access point but clearance of beacon hints + * is required when wiphy(s) supports beacon hints. + */ + if (is_wiphy_all_set_reg_flag(REGULATORY_COUNTRY_IE_IGNORE)) { + struct reg_beacon *reg_beacon, *btmp; + + if (is_wiphy_all_set_reg_flag(REGULATORY_DISABLE_BEACON_HINTS)) + return; + + spin_lock_bh(®_pending_beacons_lock); + list_for_each_entry_safe(reg_beacon, btmp, + ®_pending_beacons, list) { + list_del(®_beacon->list); + kfree(reg_beacon); + } + spin_unlock_bh(®_pending_beacons_lock); + + list_for_each_entry_safe(reg_beacon, btmp, + ®_beacon_list, list) { + list_del(®_beacon->list); + kfree(reg_beacon); + } + + return; + } + pr_debug("All devices are disconnected, going to restore regulatory settings\n"); restore_regulatory_settings(false); } diff --git a/net/wireless/scan.c b/net/wireless/scan.c index 5ed0ed0559dc..ea47ef156e7d 100644 --- a/net/wireless/scan.c +++ b/net/wireless/scan.c @@ -484,6 +484,8 @@ const u8 *cfg80211_find_ie_match(u8 eid, const u8 *ies, int len, const u8 *match, int match_len, int match_offset) { + const struct element *elem; + /* match_offset can't be smaller than 2, unless match_len is * zero, in which case match_offset must be zero as well. */ @@ -491,14 +493,10 @@ const u8 *cfg80211_find_ie_match(u8 eid, const u8 *ies, int len, (!match_len && match_offset))) return NULL; - while (len >= 2 && len >= ies[1] + 2) { - if ((ies[0] == eid) && - (ies[1] + 2 >= match_offset + match_len) && - !memcmp(ies + match_offset, match, match_len)) - return ies; - - len -= ies[1] + 2; - ies += ies[1] + 2; + for_each_element_id(elem, eid, ies, len) { + if (elem->datalen >= match_offset - 2 + match_len && + !memcmp(elem->data + match_offset - 2, match, match_len)) + return (void *)elem; } return NULL; diff --git a/net/wireless/sme.c b/net/wireless/sme.c index d014aea07160..8344153800e2 100644 --- a/net/wireless/sme.c +++ b/net/wireless/sme.c @@ -642,11 +642,15 @@ static bool cfg80211_is_all_idle(void) * All devices must be idle as otherwise if you are actively * scanning some new beacon hints could be learned and would * count as new regulatory hints. + * Also if there is any other active beaconing interface we + * need not issue a disconnect hint and reset any info such + * as chan dfs state, etc. */ list_for_each_entry(rdev, &cfg80211_rdev_list, list) { list_for_each_entry(wdev, &rdev->wiphy.wdev_list, list) { wdev_lock(wdev); - if (wdev->conn || wdev->current_bss) + if (wdev->conn || wdev->current_bss || + cfg80211_beaconing_iface_active(wdev)) is_all_idle = false; wdev_unlock(wdev); } @@ -663,7 +667,7 @@ static void disconnect_work(struct work_struct *work) rtnl_unlock(); } -static DECLARE_WORK(cfg80211_disconnect_work, disconnect_work); +DECLARE_WORK(cfg80211_disconnect_work, disconnect_work); /* diff --git a/net/wireless/util.c b/net/wireless/util.c index a52a0eb44aa0..907072dddf12 100644 --- a/net/wireless/util.c +++ b/net/wireless/util.c @@ -1042,6 +1042,7 @@ int cfg80211_change_iface(struct cfg80211_registered_device *rdev, } cfg80211_process_rdev_events(rdev); + cfg80211_mlme_purge_registrations(dev->ieee80211_ptr); } err = rdev_change_virtual_intf(rdev, dev, ntype, params); diff --git a/net/wireless/wext-compat.c b/net/wireless/wext-compat.c index 7ca04a7de85a..bf3bae4ac5f4 100644 --- a/net/wireless/wext-compat.c +++ b/net/wireless/wext-compat.c @@ -800,7 +800,7 @@ static int cfg80211_wext_giwfreq(struct net_device *dev, { struct wireless_dev *wdev = dev->ieee80211_ptr; struct cfg80211_registered_device *rdev = wiphy_to_rdev(wdev->wiphy); - struct cfg80211_chan_def chandef; + struct cfg80211_chan_def chandef = {}; int ret; switch (wdev->iftype) { diff --git a/net/wireless/wext-sme.c b/net/wireless/wext-sme.c index c67d7a82ab13..73fd0eae08ca 100644 --- a/net/wireless/wext-sme.c +++ b/net/wireless/wext-sme.c @@ -202,6 +202,7 @@ int cfg80211_mgd_wext_giwessid(struct net_device *dev, struct iw_point *data, char *ssid) { struct wireless_dev *wdev = dev->ieee80211_ptr; + int ret = 0; /* call only for station! */ if (WARN_ON(wdev->iftype != NL80211_IFTYPE_STATION)) @@ -219,7 +220,10 @@ int cfg80211_mgd_wext_giwessid(struct net_device *dev, if (ie) { data->flags = 1; data->length = ie[1]; - memcpy(ssid, ie + 2, data->length); + if (data->length > IW_ESSID_MAX_SIZE) + ret = -EINVAL; + else + memcpy(ssid, ie + 2, data->length); } rcu_read_unlock(); } else if (wdev->wext.connect.ssid && wdev->wext.connect.ssid_len) { @@ -229,7 +233,7 @@ int cfg80211_mgd_wext_giwessid(struct net_device *dev, } wdev_unlock(wdev); - return 0; + return ret; } int cfg80211_mgd_wext_siwap(struct net_device *dev, diff --git a/net/x25/af_x25.c b/net/x25/af_x25.c index ac095936552d..a156b6dc3a72 100644 --- a/net/x25/af_x25.c +++ b/net/x25/af_x25.c @@ -100,7 +100,7 @@ int x25_parse_address_block(struct sk_buff *skb, } len = *skb->data; - needed = 1 + (len >> 4) + (len & 0x0f); + needed = 1 + ((len >> 4) + (len & 0x0f) + 1) / 2; if (!pskb_may_pull(skb, needed)) { /* packet is too short to hold the addresses it claims @@ -288,7 +288,7 @@ static struct sock *x25_find_listener(struct x25_address *addr, sk_for_each(s, &x25_list) if ((!strcmp(addr->x25_addr, x25_sk(s)->source_addr.x25_addr) || - !strcmp(addr->x25_addr, + !strcmp(x25_sk(s)->source_addr.x25_addr, null_x25_address.x25_addr)) && s->sk_state == TCP_LISTEN) { /* @@ -352,17 +352,15 @@ static unsigned int x25_new_lci(struct x25_neigh *nb) unsigned int lci = 1; struct sock *sk; - read_lock_bh(&x25_list_lock); - - while ((sk = __x25_find_socket(lci, nb)) != NULL) { + while ((sk = x25_find_socket(lci, nb)) != NULL) { sock_put(sk); if (++lci == 4096) { lci = 0; break; } + cond_resched(); } - read_unlock_bh(&x25_list_lock); return lci; } @@ -680,25 +678,32 @@ static int x25_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) struct sockaddr_x25 *addr = (struct sockaddr_x25 *)uaddr; int len, i, rc = 0; - if (!sock_flag(sk, SOCK_ZAPPED) || - addr_len != sizeof(struct sockaddr_x25) || + if (addr_len != sizeof(struct sockaddr_x25) || addr->sx25_family != AF_X25) { rc = -EINVAL; goto out; } - len = strlen(addr->sx25_addr.x25_addr); - for (i = 0; i < len; i++) { - if (!isdigit(addr->sx25_addr.x25_addr[i])) { - rc = -EINVAL; - goto out; + /* check for the null_x25_address */ + if (strcmp(addr->sx25_addr.x25_addr, null_x25_address.x25_addr)) { + + len = strlen(addr->sx25_addr.x25_addr); + for (i = 0; i < len; i++) { + if (!isdigit(addr->sx25_addr.x25_addr[i])) { + rc = -EINVAL; + goto out; + } } } lock_sock(sk); - x25_sk(sk)->source_addr = addr->sx25_addr; - x25_insert_socket(sk); - sock_reset_flag(sk, SOCK_ZAPPED); + if (sock_flag(sk, SOCK_ZAPPED)) { + x25_sk(sk)->source_addr = addr->sx25_addr; + x25_insert_socket(sk); + sock_reset_flag(sk, SOCK_ZAPPED); + } else { + rc = -EINVAL; + } release_sock(sk); SOCK_DEBUG(sk, "x25_bind: socket is bound\n"); out: @@ -814,8 +819,13 @@ static int x25_connect(struct socket *sock, struct sockaddr *uaddr, sock->state = SS_CONNECTED; rc = 0; out_put_neigh: - if (rc) + if (rc) { + read_lock_bh(&x25_list_lock); x25_neigh_put(x25->neighbour); + x25->neighbour = NULL; + read_unlock_bh(&x25_list_lock); + x25->state = X25_STATE_0; + } out_put_route: x25_route_put(rt); out: diff --git a/net/xfrm/Kconfig b/net/xfrm/Kconfig index 286ed25c1a69..2e747ae7dc89 100644 --- a/net/xfrm/Kconfig +++ b/net/xfrm/Kconfig @@ -14,6 +14,8 @@ config XFRM_ALGO tristate select XFRM select CRYPTO + select CRYPTO_HASH + select CRYPTO_BLKCIPHER config XFRM_USER tristate "Transformation user configuration interface" diff --git a/net/xfrm/xfrm_device.c b/net/xfrm/xfrm_device.c index 30e5746085b8..4e458fd9236a 100644 --- a/net/xfrm/xfrm_device.c +++ b/net/xfrm/xfrm_device.c @@ -153,12 +153,6 @@ static int xfrm_dev_register(struct net_device *dev) return NOTIFY_DONE; } -static int xfrm_dev_unregister(struct net_device *dev) -{ - xfrm_policy_cache_flush(); - return NOTIFY_DONE; -} - static int xfrm_dev_feat_change(struct net_device *dev) { if ((dev->features & NETIF_F_HW_ESP) && !dev->xfrmdev_ops) @@ -178,7 +172,6 @@ static int xfrm_dev_down(struct net_device *dev) if (dev->features & NETIF_F_HW_ESP) xfrm_dev_state_flush(dev_net(dev), dev, true); - xfrm_policy_cache_flush(); return NOTIFY_DONE; } @@ -190,9 +183,6 @@ static int xfrm_dev_event(struct notifier_block *this, unsigned long event, void case NETDEV_REGISTER: return xfrm_dev_register(dev); - case NETDEV_UNREGISTER: - return xfrm_dev_unregister(dev); - case NETDEV_FEAT_CHANGE: return xfrm_dev_feat_change(dev); diff --git a/net/xfrm/xfrm_input.c b/net/xfrm/xfrm_input.c index 06dec32503bd..311597401b82 100644 --- a/net/xfrm/xfrm_input.c +++ b/net/xfrm/xfrm_input.c @@ -130,7 +130,7 @@ struct sec_path *secpath_dup(struct sec_path *src) sp->len = 0; sp->olen = 0; - memset(sp->ovec, 0, sizeof(sp->ovec[XFRM_MAX_OFFLOAD_DEPTH])); + memset(sp->ovec, 0, sizeof(sp->ovec)); if (src) { int i; @@ -245,6 +245,9 @@ int xfrm_input(struct sk_buff *skb, int nexthdr, __be32 spi, int encap_type) else XFRM_INC_STATS(net, LINUX_MIB_XFRMINSTATEINVALID); + + if (encap_type == -1) + dev_put(skb->dev); goto drop; } diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index 70ec57b887f6..b5006a091fd6 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -45,8 +45,6 @@ struct xfrm_flo { u8 flags; }; -static DEFINE_PER_CPU(struct xfrm_dst *, xfrm_last_dst); -static struct work_struct *xfrm_pcpu_work __read_mostly; static DEFINE_SPINLOCK(xfrm_policy_afinfo_lock); static struct xfrm_policy_afinfo const __rcu *xfrm_policy_afinfo[AF_INET6 + 1] __read_mostly; @@ -1715,108 +1713,6 @@ static int xfrm_expand_policies(const struct flowi *fl, u16 family, } -static void xfrm_last_dst_update(struct xfrm_dst *xdst, struct xfrm_dst *old) -{ - this_cpu_write(xfrm_last_dst, xdst); - if (old) - dst_release(&old->u.dst); -} - -static void __xfrm_pcpu_work_fn(void) -{ - struct xfrm_dst *old; - - old = this_cpu_read(xfrm_last_dst); - if (old && !xfrm_bundle_ok(old)) - xfrm_last_dst_update(NULL, old); -} - -static void xfrm_pcpu_work_fn(struct work_struct *work) -{ - local_bh_disable(); - rcu_read_lock(); - __xfrm_pcpu_work_fn(); - rcu_read_unlock(); - local_bh_enable(); -} - -void xfrm_policy_cache_flush(void) -{ - struct xfrm_dst *old; - bool found = 0; - int cpu; - - might_sleep(); - - local_bh_disable(); - rcu_read_lock(); - for_each_possible_cpu(cpu) { - old = per_cpu(xfrm_last_dst, cpu); - if (old && !xfrm_bundle_ok(old)) { - if (smp_processor_id() == cpu) { - __xfrm_pcpu_work_fn(); - continue; - } - found = true; - break; - } - } - - rcu_read_unlock(); - local_bh_enable(); - - if (!found) - return; - - get_online_cpus(); - - for_each_possible_cpu(cpu) { - bool bundle_release; - - rcu_read_lock(); - old = per_cpu(xfrm_last_dst, cpu); - bundle_release = old && !xfrm_bundle_ok(old); - rcu_read_unlock(); - - if (!bundle_release) - continue; - - if (cpu_online(cpu)) { - schedule_work_on(cpu, &xfrm_pcpu_work[cpu]); - continue; - } - - rcu_read_lock(); - old = per_cpu(xfrm_last_dst, cpu); - if (old && !xfrm_bundle_ok(old)) { - per_cpu(xfrm_last_dst, cpu) = NULL; - dst_release(&old->u.dst); - } - rcu_read_unlock(); - } - - put_online_cpus(); -} - -static bool xfrm_xdst_can_reuse(struct xfrm_dst *xdst, - struct xfrm_state * const xfrm[], - int num) -{ - const struct dst_entry *dst = &xdst->u.dst; - int i; - - if (xdst->num_xfrms != num) - return false; - - for (i = 0; i < num; i++) { - if (!dst || dst->xfrm != xfrm[i]) - return false; - dst = dst->child; - } - - return xfrm_bundle_ok(xdst); -} - static struct xfrm_dst * xfrm_resolve_and_create_bundle(struct xfrm_policy **pols, int num_pols, const struct flowi *fl, u16 family, @@ -1824,7 +1720,7 @@ xfrm_resolve_and_create_bundle(struct xfrm_policy **pols, int num_pols, { struct net *net = xp_net(pols[0]); struct xfrm_state *xfrm[XFRM_MAX_DEPTH]; - struct xfrm_dst *xdst, *old; + struct xfrm_dst *xdst; struct dst_entry *dst; int err; @@ -1839,21 +1735,6 @@ xfrm_resolve_and_create_bundle(struct xfrm_policy **pols, int num_pols, return ERR_PTR(err); } - xdst = this_cpu_read(xfrm_last_dst); - if (xdst && - xdst->u.dst.dev == dst_orig->dev && - xdst->num_pols == num_pols && - memcmp(xdst->pols, pols, - sizeof(struct xfrm_policy *) * num_pols) == 0 && - xfrm_xdst_can_reuse(xdst, xfrm, err)) { - dst_hold(&xdst->u.dst); - while (err > 0) - xfrm_state_put(xfrm[--err]); - return xdst; - } - - old = xdst; - dst = xfrm_bundle_create(pols[0], xfrm, err, fl, dst_orig); if (IS_ERR(dst)) { XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTBUNDLEGENERROR); @@ -1866,9 +1747,6 @@ xfrm_resolve_and_create_bundle(struct xfrm_policy **pols, int num_pols, memcpy(xdst->pols, pols, sizeof(struct xfrm_policy *) * num_pols); xdst->policy_genid = atomic_read(&pols[0]->genid); - atomic_set(&xdst->u.dst.__refcnt, 2); - xfrm_last_dst_update(xdst, old); - return xdst; } @@ -2069,11 +1947,8 @@ xfrm_bundle_lookup(struct net *net, const struct flowi *fl, u16 family, u8 dir, if (num_xfrms <= 0) goto make_dummy_bundle; - local_bh_disable(); xdst = xfrm_resolve_and_create_bundle(pols, num_pols, fl, family, xflo->dst_orig); - local_bh_enable(); - if (IS_ERR(xdst)) { err = PTR_ERR(xdst); if (err != -EAGAIN) @@ -2160,11 +2035,9 @@ struct dst_entry *xfrm_lookup(struct net *net, struct dst_entry *dst_orig, goto no_transform; } - local_bh_disable(); xdst = xfrm_resolve_and_create_bundle( pols, num_pols, fl, family, dst_orig); - local_bh_enable(); if (IS_ERR(xdst)) { xfrm_pols_put(pols, num_pols); @@ -2992,15 +2865,6 @@ static struct pernet_operations __net_initdata xfrm_net_ops = { void __init xfrm_init(void) { - int i; - - xfrm_pcpu_work = kmalloc_array(NR_CPUS, sizeof(*xfrm_pcpu_work), - GFP_KERNEL); - BUG_ON(!xfrm_pcpu_work); - - for (i = 0; i < NR_CPUS; i++) - INIT_WORK(&xfrm_pcpu_work[i], xfrm_pcpu_work_fn); - register_pernet_subsys(&xfrm_net_ops); seqcount_init(&xfrm_policy_hash_generation); xfrm_input_init(); diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c index 0cd2bdf3b217..190ca59d5ba3 100644 --- a/net/xfrm/xfrm_state.c +++ b/net/xfrm/xfrm_state.c @@ -449,6 +449,8 @@ static void xfrm_state_gc_destroy(struct xfrm_state *x) x->type->destructor(x); xfrm_put_type(x->type); } + if (x->xfrag.page) + put_page(x->xfrag.page); xfrm_dev_state_free(x); security_xfrm_state_free(x); kfree(x); @@ -735,10 +737,9 @@ restart: } out: spin_unlock_bh(&net->xfrm.xfrm_state_lock); - if (cnt) { + if (cnt) err = 0; - xfrm_policy_cache_flush(); - } + return err; } EXPORT_SYMBOL(xfrm_state_flush); @@ -2331,7 +2332,7 @@ void xfrm_state_fini(struct net *net) unsigned int sz; flush_work(&net->xfrm.state_hash_work); - xfrm_state_flush(net, IPSEC_PROTO_ANY, false); + xfrm_state_flush(net, 0, false); flush_work(&xfrm_state_gc_work); WARN_ON(!list_empty(&net->xfrm.state_all)); diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c index 4e8319766f2b..339a070da597 100644 --- a/net/xfrm/xfrm_user.c +++ b/net/xfrm/xfrm_user.c @@ -151,6 +151,25 @@ static int verify_newsa_info(struct xfrm_usersa_info *p, err = -EINVAL; switch (p->family) { case AF_INET: + break; + + case AF_INET6: +#if IS_ENABLED(CONFIG_IPV6) + break; +#else + err = -EAFNOSUPPORT; + goto out; +#endif + + default: + goto out; + } + + switch (p->sel.family) { + case AF_UNSPEC: + break; + + case AF_INET: if (p->sel.prefixlen_d > 32 || p->sel.prefixlen_s > 32) goto out; @@ -1381,7 +1400,7 @@ static int verify_newpolicy_info(struct xfrm_userpolicy_info *p) ret = verify_policy_dir(p->dir); if (ret) return ret; - if (p->index && ((p->index & XFRM_POLICY_MAX) != p->dir)) + if (p->index && (xfrm_policy_id2dir(p->index) != p->dir)) return -EINVAL; return 0; @@ -1445,10 +1464,15 @@ static int validate_tmpl(int nr, struct xfrm_user_tmpl *ut, u16 family) if (!ut[i].family) ut[i].family = family; - if ((ut[i].mode == XFRM_MODE_TRANSPORT) && - (ut[i].family != prev_family)) - return -EINVAL; - + switch (ut[i].mode) { + case XFRM_MODE_TUNNEL: + case XFRM_MODE_BEET: + break; + default: + if (ut[i].family != prev_family) + return -EINVAL; + break; + } if (ut[i].mode >= XFRM_MODE_MAX) return -EINVAL; @@ -1465,20 +1489,8 @@ static int validate_tmpl(int nr, struct xfrm_user_tmpl *ut, u16 family) return -EINVAL; } - switch (ut[i].id.proto) { - case IPPROTO_AH: - case IPPROTO_ESP: - case IPPROTO_COMP: -#if IS_ENABLED(CONFIG_IPV6) - case IPPROTO_ROUTING: - case IPPROTO_DSTOPTS: -#endif - case IPSEC_PROTO_ANY: - break; - default: + if (!xfrm_id_proto_valid(ut[i].id.proto)) return -EINVAL; - } - } return 0; |