diff options
Diffstat (limited to 'net')
83 files changed, 963 insertions, 467 deletions
diff --git a/net/bluetooth/6lowpan.c b/net/bluetooth/6lowpan.c index 2f03b780b40d..960a19b3e26d 100644 --- a/net/bluetooth/6lowpan.c +++ b/net/bluetooth/6lowpan.c @@ -486,6 +486,8 @@ static int send_mcast_pkt(struct sk_buff *skb, struct net_device *netdev) int ret; local_skb = skb_clone(skb, GFP_ATOMIC); + if (!local_skb) + continue; BT_DBG("xmit %s to %pMR type %u IP %pI6c chan %p", netdev->name, diff --git a/net/bluetooth/hci_conn.c b/net/bluetooth/hci_conn.c index 17b46ad6a349..54eabaa46960 100644 --- a/net/bluetooth/hci_conn.c +++ b/net/bluetooth/hci_conn.c @@ -870,8 +870,10 @@ static int hci_le_big_terminate(struct hci_dev *hdev, struct hci_conn *conn) d->big_sync_term = true; } - if (!d->pa_sync_term && !d->big_sync_term) + if (!d->pa_sync_term && !d->big_sync_term) { + kfree(d); return 0; + } ret = hci_cmd_sync_queue(hdev, big_terminate_sync, d, terminate_big_destroy); diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c index c46c1236ebfa..28d7929dc593 100644 --- a/net/bluetooth/hci_core.c +++ b/net/bluetooth/hci_core.c @@ -539,46 +539,9 @@ static int hci_dev_do_reset(struct hci_dev *hdev) hci_req_sync_lock(hdev); - /* Drop queues */ - skb_queue_purge(&hdev->rx_q); - skb_queue_purge(&hdev->cmd_q); - - /* Cancel these to avoid queueing non-chained pending work */ - hci_dev_set_flag(hdev, HCI_CMD_DRAIN_WORKQUEUE); - /* Wait for - * - * if (!hci_dev_test_flag(hdev, HCI_CMD_DRAIN_WORKQUEUE)) - * queue_delayed_work(&hdev->{cmd,ncmd}_timer) - * - * inside RCU section to see the flag or complete scheduling. - */ - synchronize_rcu(); - /* Explicitly cancel works in case scheduled after setting the flag. */ - cancel_delayed_work(&hdev->cmd_timer); - cancel_delayed_work(&hdev->ncmd_timer); - - /* Avoid potential lockdep warnings from the *_flush() calls by - * ensuring the workqueue is empty up front. - */ - drain_workqueue(hdev->workqueue); - - hci_dev_lock(hdev); - hci_inquiry_cache_flush(hdev); - hci_conn_hash_flush(hdev); - hci_dev_unlock(hdev); - - if (hdev->flush) - hdev->flush(hdev); - - hci_dev_clear_flag(hdev, HCI_CMD_DRAIN_WORKQUEUE); - - atomic_set(&hdev->cmd_cnt, 1); - hdev->acl_cnt = 0; - hdev->sco_cnt = 0; - hdev->le_cnt = 0; - hdev->iso_cnt = 0; - - ret = hci_reset_sync(hdev); + ret = hci_dev_close_sync(hdev); + if (!ret) + ret = hci_dev_open_sync(hdev); hci_req_sync_unlock(hdev); return ret; diff --git a/net/bluetooth/hci_sync.c b/net/bluetooth/hci_sync.c index aff8562a8690..aeccd8084cba 100644 --- a/net/bluetooth/hci_sync.c +++ b/net/bluetooth/hci_sync.c @@ -5301,6 +5301,12 @@ int hci_dev_close_sync(struct hci_dev *hdev) bt_dev_dbg(hdev, ""); + /* Set HCI_DRAIN_WORKQUEUE flag to prevent queuing work during + * reset/close. See hci_cmd_work() and handle_cmd_cnt_and_timer(). + */ + hci_dev_set_flag(hdev, HCI_CMD_DRAIN_WORKQUEUE); + synchronize_rcu(); + if (hci_dev_test_flag(hdev, HCI_UNREGISTER)) { disable_delayed_work(&hdev->power_off); disable_delayed_work(&hdev->ncmd_timer); @@ -5324,6 +5330,7 @@ int hci_dev_close_sync(struct hci_dev *hdev) if (!test_and_clear_bit(HCI_UP, &hdev->flags)) { cancel_delayed_work_sync(&hdev->cmd_timer); + hci_dev_clear_flag(hdev, HCI_CMD_DRAIN_WORKQUEUE); return err; } @@ -5386,6 +5393,10 @@ int hci_dev_close_sync(struct hci_dev *hdev) /* Reset device */ skb_queue_purge(&hdev->cmd_q); atomic_set(&hdev->cmd_cnt, 1); + hdev->acl_cnt = 0; + hdev->sco_cnt = 0; + hdev->le_cnt = 0; + hdev->iso_cnt = 0; if (hci_test_quirk(hdev, HCI_QUIRK_RESET_ON_CLOSE) && !auto_off && !hci_dev_test_flag(hdev, HCI_UNCONFIGURED)) { set_bit(HCI_INIT, &hdev->flags); @@ -5423,6 +5434,7 @@ int hci_dev_close_sync(struct hci_dev *hdev) /* Clear flags */ hdev->flags &= BIT(HCI_RAW); hci_dev_clear_volatile_flags(hdev); + hci_dev_clear_flag(hdev, HCI_CMD_DRAIN_WORKQUEUE); memset(hdev->eir, 0, sizeof(hdev->eir)); memset(hdev->dev_class, 0, sizeof(hdev->dev_class)); @@ -6699,6 +6711,7 @@ int hci_le_create_cis_sync(struct hci_dev *hdev) DEFINE_FLEX(struct hci_cp_le_create_cis, cmd, cis, num_cis, 0x1f); size_t aux_num_cis = 0; struct hci_conn *conn; + u16 timeout = 0; u8 cig = BT_ISO_QOS_CIG_UNSET; /* The spec allows only one pending LE Create CIS command at a time. If @@ -6769,6 +6782,7 @@ int hci_le_create_cis_sync(struct hci_dev *hdev) set_bit(HCI_CONN_CREATE_CIS, &conn->flags); cis->acl_handle = cpu_to_le16(conn->parent->handle); cis->cis_handle = cpu_to_le16(conn->handle); + timeout = conn->conn_timeout; aux_num_cis++; if (aux_num_cis >= cmd->num_cis) @@ -6788,7 +6802,7 @@ done: return __hci_cmd_sync_status_sk(hdev, HCI_OP_LE_CREATE_CIS, struct_size(cmd, cis, cmd->num_cis), cmd, HCI_EVT_LE_CIS_ESTABLISHED, - conn->conn_timeout, NULL); + timeout, NULL); } int hci_le_remove_cig_sync(struct hci_dev *hdev, u8 handle) diff --git a/net/bluetooth/hidp/core.c b/net/bluetooth/hidp/core.c index 976f91eeb745..70344bd3248a 100644 --- a/net/bluetooth/hidp/core.c +++ b/net/bluetooth/hidp/core.c @@ -179,12 +179,21 @@ static void hidp_input_report(struct hidp_session *session, struct sk_buff *skb) { struct input_dev *dev = session->input; unsigned char *keys = session->keys; - unsigned char *udata = skb->data + 1; - signed char *sdata = skb->data + 1; - int i, size = skb->len - 1; + unsigned char *udata; + signed char *sdata; + u8 *hdr; + int i; + + hdr = skb_pull_data(skb, 1); + if (!hdr) + return; - switch (skb->data[0]) { + switch (*hdr) { case 0x01: /* Keyboard report */ + udata = skb_pull_data(skb, 8); + if (!udata) + break; + for (i = 0; i < 8; i++) input_report_key(dev, hidp_keycode[i + 224], (udata[0] >> i) & 1); @@ -213,6 +222,10 @@ static void hidp_input_report(struct hidp_session *session, struct sk_buff *skb) break; case 0x02: /* Mouse report */ + sdata = skb_pull_data(skb, 3); + if (!sdata) + break; + input_report_key(dev, BTN_LEFT, sdata[0] & 0x01); input_report_key(dev, BTN_RIGHT, sdata[0] & 0x02); input_report_key(dev, BTN_MIDDLE, sdata[0] & 0x04); @@ -222,7 +235,7 @@ static void hidp_input_report(struct hidp_session *session, struct sk_buff *skb) input_report_rel(dev, REL_X, sdata[1]); input_report_rel(dev, REL_Y, sdata[2]); - if (size > 3) + if (skb->len > 0) input_report_rel(dev, REL_WHEEL, sdata[3]); break; } diff --git a/net/bluetooth/iso.c b/net/bluetooth/iso.c index d7af617cda45..876649556d3c 100644 --- a/net/bluetooth/iso.c +++ b/net/bluetooth/iso.c @@ -564,7 +564,7 @@ static void iso_recv_frame(struct iso_conn *conn, struct sk_buff *skb) struct sock *sk; iso_conn_lock(conn); - sk = conn->sk; + sk = iso_sock_hold(conn); iso_conn_unlock(conn); if (!sk) @@ -573,11 +573,15 @@ static void iso_recv_frame(struct iso_conn *conn, struct sk_buff *skb) BT_DBG("sk %p len %d", sk, skb->len); if (sk->sk_state != BT_CONNECTED) - goto drop; + goto drop_put; - if (!sock_queue_rcv_skb(sk, skb)) + if (!sock_queue_rcv_skb(sk, skb)) { + sock_put(sk); return; + } +drop_put: + sock_put(sk); drop: kfree_skb(skb); } @@ -860,8 +864,8 @@ static void __iso_sock_close(struct sock *sk) /* Must be called on unlocked socket. */ static void iso_sock_close(struct sock *sk) { - iso_sock_clear_timer(sk); lock_sock(sk); + iso_sock_clear_timer(sk); __iso_sock_close(sk); release_sock(sk); iso_sock_kill(sk); diff --git a/net/bluetooth/l2cap_core.c b/net/bluetooth/l2cap_core.c index fdccd62ccca8..45b175399e8d 100644 --- a/net/bluetooth/l2cap_core.c +++ b/net/bluetooth/l2cap_core.c @@ -411,8 +411,10 @@ static void l2cap_chan_timeout(struct work_struct *work) BT_DBG("chan %p state %s", chan, state_to_string(chan->state)); - if (!conn) + if (!conn) { + l2cap_chan_put(chan); return; + } mutex_lock(&conn->lock); /* __set_chan_timer() calls l2cap_chan_hold(chan) while scheduling @@ -5260,6 +5262,7 @@ static inline int l2cap_ecred_conn_rsp(struct l2cap_conn *conn, cmd_len -= sizeof(*rsp); list_for_each_entry_safe(chan, tmp, &conn->chan_l, list) { + struct l2cap_chan *orig; u16 dcid; if (chan->ident != cmd->ident || @@ -5281,8 +5284,10 @@ static inline int l2cap_ecred_conn_rsp(struct l2cap_conn *conn, BT_DBG("dcid[%d] 0x%4.4x", i, dcid); + orig = __l2cap_get_chan_by_dcid(conn, dcid); + /* Check if dcid is already in use */ - if (dcid && __l2cap_get_chan_by_dcid(conn, dcid)) { + if (dcid && orig) { /* If a device receives a * L2CAP_CREDIT_BASED_CONNECTION_RSP packet with an * already-assigned Destination CID, then both the @@ -5291,10 +5296,24 @@ static inline int l2cap_ecred_conn_rsp(struct l2cap_conn *conn, */ l2cap_chan_del(chan, ECONNREFUSED); l2cap_chan_unlock(chan); - chan = __l2cap_get_chan_by_dcid(conn, dcid); - l2cap_chan_lock(chan); - l2cap_chan_del(chan, ECONNRESET); - l2cap_chan_unlock(chan); + + /* Check that the dcid channel mode is + * L2CAP_MODE_EXT_FLOWCTL since this procedure is only + * valid for that mode and shouldn't disconnect a dcid + * in other modes. + */ + if (orig->mode == L2CAP_MODE_EXT_FLOWCTL) { + l2cap_chan_lock(orig); + /* Disconnect the original channel as it may be + * considered connected since dcid has already + * been assigned; don't call l2cap_chan_close + * directly since that could lead to + * l2cap_chan_del and then removing the channel + * from the list while we're iterating over it. + */ + __set_chan_timer(orig, 0); + l2cap_chan_unlock(orig); + } continue; } @@ -5458,14 +5477,20 @@ static inline int l2cap_ecred_reconf_rsp(struct l2cap_conn *conn, BT_DBG("result 0x%4.4x", result); - if (!result) + if (!result) { + list_for_each_entry(chan, &conn->chan_l, list) { + if (chan->ident == cmd->ident) + chan->ident = 0; + } return 0; + } list_for_each_entry_safe(chan, tmp, &conn->chan_l, list) { if (chan->ident != cmd->ident) continue; - l2cap_chan_hold(chan); + if (!l2cap_chan_hold_unless_zero(chan)) + continue; l2cap_chan_lock(chan); l2cap_chan_del(chan, ECONNRESET); diff --git a/net/bluetooth/l2cap_sock.c b/net/bluetooth/l2cap_sock.c index b34e7da8d906..c138aa4ae266 100644 --- a/net/bluetooth/l2cap_sock.c +++ b/net/bluetooth/l2cap_sock.c @@ -1499,6 +1499,10 @@ static void l2cap_sock_cleanup_listen(struct sock *parent) * pin it (hold_unless_zero() additionally skips a chan already past * its last reference). We then drop the sk lock before taking * chan->lock, so sk and chan locks are never held together. + * + * Since we cannot call l2cap_chan_close() without conn->lock, + * schedule l2cap_chan_timeout to close the channel; it already + * acquires conn->lock -> chan->lock in the correct order. */ while ((sk = bt_accept_dequeue(parent, NULL))) { struct l2cap_chan *chan; @@ -1516,14 +1520,12 @@ static void l2cap_sock_cleanup_listen(struct sock *parent) state_to_string(chan->state)); l2cap_chan_lock(chan); - __clear_chan_timer(chan); - l2cap_chan_close(chan, ECONNRESET); - /* l2cap_conn_del() may already have killed this socket - * (it sets SOCK_DEAD); skip the duplicate to avoid a - * double sock_put()/l2cap_chan_put(). + /* Since we cannot call l2cap_chan_close() without + * conn->lock, schedule its timer to trigger the close + * and cleanup of this channel. */ - if (!sock_flag(sk, SOCK_DEAD)) - l2cap_sock_kill(sk); + if (chan->conn) + __set_chan_timer(chan, 0); l2cap_chan_unlock(chan); l2cap_chan_put(chan); diff --git a/net/bridge/br_netlink.c b/net/bridge/br_netlink.c index c04a4d0889ae..b9591dd755f9 100644 --- a/net/bridge/br_netlink.c +++ b/net/bridge/br_netlink.c @@ -1000,19 +1000,25 @@ static int br_setport(struct net_bridge_port *p, struct nlattr *tb[], br_port_flags_change(p, changed_mask); if (tb[IFLA_BRPORT_COST]) { + spin_lock_bh(&p->br->lock); err = br_stp_set_path_cost(p, nla_get_u32(tb[IFLA_BRPORT_COST])); + spin_unlock_bh(&p->br->lock); if (err) return err; } if (tb[IFLA_BRPORT_PRIORITY]) { + spin_lock_bh(&p->br->lock); err = br_stp_set_port_priority(p, nla_get_u16(tb[IFLA_BRPORT_PRIORITY])); + spin_unlock_bh(&p->br->lock); if (err) return err; } if (tb[IFLA_BRPORT_STATE]) { + spin_lock_bh(&p->br->lock); err = br_set_port_state(p, nla_get_u8(tb[IFLA_BRPORT_STATE])); + spin_unlock_bh(&p->br->lock); if (err) return err; } @@ -1114,9 +1120,7 @@ int br_setlink(struct net_device *dev, struct nlmsghdr *nlh, u16 flags, if (err) return err; - spin_lock_bh(&p->br->lock); err = br_setport(p, tb, extack); - spin_unlock_bh(&p->br->lock); } else { /* Binary compatibility with old RSTP */ if (nla_len(protinfo) < sizeof(u8)) @@ -1203,17 +1207,10 @@ static int br_port_slave_changelink(struct net_device *brdev, struct nlattr *data[], struct netlink_ext_ack *extack) { - struct net_bridge *br = netdev_priv(brdev); - int ret; - if (!data) return 0; - spin_lock_bh(&br->lock); - ret = br_setport(br_port_get_rtnl(dev), data, extack); - spin_unlock_bh(&br->lock); - - return ret; + return br_setport(br_port_get_rtnl(dev), data, extack); } static int br_port_fill_slave_info(struct sk_buff *skb, diff --git a/net/bridge/br_switchdev.c b/net/bridge/br_switchdev.c index 18b558a931ad..ee3ad9dfbab9 100644 --- a/net/bridge/br_switchdev.c +++ b/net/bridge/br_switchdev.c @@ -99,7 +99,6 @@ int br_switchdev_set_port_flag(struct net_bridge_port *p, attr.u.brport_flags.val = flags; attr.u.brport_flags.mask = mask; - /* We run from atomic context here */ err = call_switchdev_notifiers(SWITCHDEV_PORT_ATTR_SET, p->dev, &info.info, extack); err = notifier_to_errno(err); diff --git a/net/bridge/br_sysfs_if.c b/net/bridge/br_sysfs_if.c index 1f57c36a7fc0..d6df81fa0d13 100644 --- a/net/bridge/br_sysfs_if.c +++ b/net/bridge/br_sysfs_if.c @@ -86,16 +86,34 @@ static ssize_t show_path_cost(struct net_bridge_port *p, char *buf) return sysfs_emit(buf, "%d\n", p->path_cost); } -static BRPORT_ATTR(path_cost, 0644, - show_path_cost, br_stp_set_path_cost); +static int store_path_cost(struct net_bridge_port *p, unsigned long v) +{ + int ret; + + spin_lock_bh(&p->br->lock); + ret = br_stp_set_path_cost(p, v); + spin_unlock_bh(&p->br->lock); + return ret; +} + +static BRPORT_ATTR(path_cost, 0644, show_path_cost, store_path_cost); static ssize_t show_priority(struct net_bridge_port *p, char *buf) { return sysfs_emit(buf, "%d\n", p->priority); } -static BRPORT_ATTR(priority, 0644, - show_priority, br_stp_set_port_priority); +static int store_priority(struct net_bridge_port *p, unsigned long v) +{ + int ret; + + spin_lock_bh(&p->br->lock); + ret = br_stp_set_port_priority(p, v); + spin_unlock_bh(&p->br->lock); + return ret; +} + +static BRPORT_ATTR(priority, 0644, show_priority, store_priority); static ssize_t show_designated_root(struct net_bridge_port *p, char *buf) { @@ -334,17 +352,13 @@ static ssize_t brport_store(struct kobject *kobj, ret = -ENOMEM; goto out_unlock; } - spin_lock_bh(&p->br->lock); ret = brport_attr->store_raw(p, buf_copy); - spin_unlock_bh(&p->br->lock); kfree(buf_copy); } else if (brport_attr->store) { val = simple_strtoul(buf, &endp, 0); if (endp == buf) goto out_unlock; - spin_lock_bh(&p->br->lock); ret = brport_attr->store(p, val); - spin_unlock_bh(&p->br->lock); } if (!ret) { diff --git a/net/bridge/netfilter/ebtables.c b/net/bridge/netfilter/ebtables.c index b9f4daac09af..8a6a069329d2 100644 --- a/net/bridge/netfilter/ebtables.c +++ b/net/bridge/netfilter/ebtables.c @@ -1956,6 +1956,25 @@ enum compat_mwt { EBT_COMPAT_TARGET, }; +static bool match_size_ok(const struct xt_match *match, unsigned int match_size) +{ + u16 csize; + + if (match->matchsize == -1) /* cannot validate ebt_among */ + return true; + + csize = match->compatsize ? : match->matchsize; + + return match_size >= csize; +} + +static bool tgt_size_ok(const struct xt_target *tgt, unsigned int tgt_size) +{ + u16 csize = tgt->compatsize ? : tgt->targetsize; + + return tgt_size >= csize; +} + static int compat_mtw_from_user(const struct compat_ebt_entry_mwt *mwt, enum compat_mwt compat_mwt, struct ebt_entries_buf_state *state, @@ -1981,6 +2000,11 @@ static int compat_mtw_from_user(const struct compat_ebt_entry_mwt *mwt, if (IS_ERR(match)) return PTR_ERR(match); + if (!match_size_ok(match, match_size)) { + module_put(match->me); + return -EINVAL; + } + off = ebt_compat_match_offset(match, match_size); if (dst) { if (match->compat_from_user) @@ -2000,6 +2024,12 @@ static int compat_mtw_from_user(const struct compat_ebt_entry_mwt *mwt, mwt->u.revision); if (IS_ERR(wt)) return PTR_ERR(wt); + + if (!tgt_size_ok(wt, match_size)) { + module_put(wt->me); + return -EINVAL; + } + off = xt_compat_target_offset(wt); if (dst) { diff --git a/net/core/filter.c b/net/core/filter.c index 9590877b0714..80439767e0ee 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -2869,7 +2869,7 @@ BPF_CALL_4(bpf_msg_push_data, struct sk_msg *, msg, u32, start, psge->length = start - offset; rsge.length -= psge->length; - rsge.offset += start; + rsge.offset += start - offset; sk_msg_iter_var_next(i); sg_unmark_end(psge); diff --git a/net/core/netmem_priv.h b/net/core/netmem_priv.h index 3e6fde8f1726..23175cb2bd86 100644 --- a/net/core/netmem_priv.h +++ b/net/core/netmem_priv.h @@ -8,18 +8,21 @@ static inline unsigned long netmem_get_pp_magic(netmem_ref netmem) return netmem_to_nmdesc(netmem)->pp_magic & ~PP_DMA_INDEX_MASK; } -static inline bool netmem_is_pp(netmem_ref netmem) +static inline void netmem_or_pp_magic(netmem_ref netmem, unsigned long pp_magic) +{ + netmem_to_nmdesc(netmem)->pp_magic |= pp_magic; +} + +static inline void netmem_clear_pp_magic(netmem_ref netmem) { - struct page *page; + WARN_ON_ONCE(netmem_to_nmdesc(netmem)->pp_magic & PP_DMA_INDEX_MASK); - /* XXX: Now that the offset of page_type is shared between - * struct page and net_iov, just cast the netmem to struct page - * unconditionally by clearing NET_IOV if any, no matter whether - * it comes from struct net_iov or struct page. This should be - * adjusted once the offset is no longer shared. - */ - page = (struct page *)((__force unsigned long)netmem & ~NET_IOV); - return PageNetpp(page); + netmem_to_nmdesc(netmem)->pp_magic = 0; +} + +static inline bool netmem_is_pp(netmem_ref netmem) +{ + return (netmem_get_pp_magic(netmem) & PP_MAGIC_MASK) == PP_SIGNATURE; } static inline void netmem_set_pp(netmem_ref netmem, struct page_pool *pool) diff --git a/net/core/page_pool.c b/net/core/page_pool.c index 6e576dec80db..8171d1173221 100644 --- a/net/core/page_pool.c +++ b/net/core/page_pool.c @@ -707,18 +707,8 @@ s32 page_pool_inflight(const struct page_pool *pool, bool strict) void page_pool_set_pp_info(struct page_pool *pool, netmem_ref netmem) { - struct page *page; - netmem_set_pp(netmem, pool); - - /* XXX: Now that the offset of page_type is shared between - * struct page and net_iov, just cast the netmem to struct page - * unconditionally by clearing NET_IOV if any, no matter whether - * it comes from struct net_iov or struct page. This should be - * adjusted once the offset is no longer shared. - */ - page = (struct page *)((__force unsigned long)netmem & ~NET_IOV); - __SetPageNetpp(page); + netmem_or_pp_magic(netmem, PP_SIGNATURE); /* Ensuring all pages have been split into one fragment initially: * page_pool_set_pp_info() is only called once for every page when it @@ -733,17 +723,7 @@ void page_pool_set_pp_info(struct page_pool *pool, netmem_ref netmem) void page_pool_clear_pp_info(netmem_ref netmem) { - struct page *page; - - /* XXX: Now that the offset of page_type is shared between - * struct page and net_iov, just cast the netmem to struct page - * unconditionally by clearing NET_IOV if any, no matter whether - * it comes from struct net_iov or struct page. This should be - * adjusted once the offset is no longer shared. - */ - page = (struct page *)((__force unsigned long)netmem & ~NET_IOV); - __ClearPageNetpp(page); - + netmem_clear_pp_magic(netmem); netmem_set_pp(netmem, NULL); } diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 44ac121cfccb..c02f0a507ba8 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -2787,6 +2787,8 @@ done: skb->data_len = 0; skb_set_tail_pointer(skb, len); } + if (!skb_shinfo(skb)->nr_frags && !skb_has_frag_list(skb)) + skb->unreadable = 0; if (!skb->sk || skb->destructor == sock_edemux) skb_condense(skb); @@ -2794,16 +2796,37 @@ done: } EXPORT_SYMBOL(___pskb_trim); +static int pskb_trim_rcsum_complete(struct sk_buff *skb, unsigned int len) +{ + int delta = skb->len - len; + + if (skb_frags_readable(skb)) { + skb->csum = csum_block_sub(skb->csum, + skb_checksum(skb, len, delta, 0), + len); + return 0; + } + + if (len > skb_headlen(skb)) + return -EFAULT; + + /* The trimmed bytes are unreadable, but the remaining packet can be + * checksummed by software after trimming. + */ + skb->ip_summed = CHECKSUM_NONE; + return 0; +} + /* Note : use pskb_trim_rcsum() instead of calling this directly */ int pskb_trim_rcsum_slow(struct sk_buff *skb, unsigned int len) { if (skb->ip_summed == CHECKSUM_COMPLETE) { - int delta = skb->len - len; + int err; - skb->csum = csum_block_sub(skb->csum, - skb_checksum(skb, len, delta, 0), - len); + err = pskb_trim_rcsum_complete(skb, len); + if (err) + return err; } else if (skb->ip_summed == CHECKSUM_PARTIAL) { int hdlen = (len > skb_headlen(skb)) ? skb_headlen(skb) : len; int offset = skb_checksum_start_offset(skb) + skb->csum_offset; @@ -6800,6 +6823,11 @@ static int pskb_carve_inside_header(struct sk_buff *skb, const u32 off, skb_copy_from_linear_data_offset(skb, off, data, new_hlen); skb->len -= off; + /* Remove SKBFL_MANAGED_FRAG_REFS instead of trying to honour it + * while refcounting frags below. + */ + skb_zcopy_downgrade_managed(skb); + memcpy((struct skb_shared_info *)(data + size), skb_shinfo(skb), offsetof(struct skb_shared_info, @@ -6810,6 +6838,8 @@ static int pskb_carve_inside_header(struct sk_buff *skb, const u32 off, skb_kfree_head(data); return -ENOMEM; } + if (skb_zcopy(skb)) + net_zcopy_get(skb_zcopy(skb)); for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) skb_frag_ref(skb, i); if (skb_has_frag_list(skb)) @@ -6911,6 +6941,11 @@ static int pskb_carve_inside_nonlinear(struct sk_buff *skb, const u32 off, return -ENOMEM; size = SKB_WITH_OVERHEAD(size); + /* Remove SKBFL_MANAGED_FRAG_REFS instead of trying to honour it + * while refcounting frags below. + */ + skb_zcopy_downgrade_managed(skb); + memcpy((struct skb_shared_info *)(data + size), skb_shinfo(skb), offsetof(struct skb_shared_info, frags[0])); if (skb_orphan_frags(skb, gfp_mask)) { @@ -6953,6 +6988,8 @@ static int pskb_carve_inside_nonlinear(struct sk_buff *skb, const u32 off, skb_kfree_head(data); return -ENOMEM; } + if (skb_zcopy(skb)) + net_zcopy_get(skb_zcopy(skb)); skb_release_data(skb, SKB_CONSUMED); skb->head = data; diff --git a/net/ethtool/cmis.h b/net/ethtool/cmis.h index 4a9a946cabf0..778783a0f23c 100644 --- a/net/ethtool/cmis.h +++ b/net/ethtool/cmis.h @@ -63,9 +63,9 @@ struct ethtool_cmis_cdb_request { * struct ethtool_cmis_cdb_cmd_args - CDB commands execution arguments * @req: CDB command fields as described in the CMIS standard. * @max_duration: Maximum duration time for command completion in msec. + * @msleep_pre_rpl: Waiting time before checking reply in msec. * @read_write_len_ext: Allowable additional number of byte octets to the LPL * in a READ or a WRITE commands. - * @msleep_pre_rpl: Waiting time before checking reply in msec. * @rpl_exp_len: Expected reply length in bytes. * @flags: Validation flags for CDB commands. * @err_msg: Error message to be sent to user space. @@ -73,8 +73,8 @@ struct ethtool_cmis_cdb_request { struct ethtool_cmis_cdb_cmd_args { struct ethtool_cmis_cdb_request req; u16 max_duration; + u16 msleep_pre_rpl; u8 read_write_len_ext; - u8 msleep_pre_rpl; u8 rpl_exp_len; u8 flags; char *err_msg; diff --git a/net/ethtool/cmis_cdb.c b/net/ethtool/cmis_cdb.c index 3670ca42dd40..f3a53a984460 100644 --- a/net/ethtool/cmis_cdb.c +++ b/net/ethtool/cmis_cdb.c @@ -513,8 +513,13 @@ static int cmis_cdb_process_reply(struct net_device *dev, } rpl = (struct ethtool_cmis_cdb_rpl *)page_data->data; - if ((args->rpl_exp_len > rpl->hdr.rpl_len + rpl_hdr_len) || - !rpl->hdr.rpl_chk_code) { + if (rpl->hdr.rpl_len != args->rpl_exp_len) { + netdev_warn(dev, "CDB reply length mismatch, expected %u got %u\n", + args->rpl_exp_len, rpl->hdr.rpl_len); + err = -EIO; + goto out; + } + if (!rpl->hdr.rpl_chk_code) { err = -EIO; goto out; } diff --git a/net/ethtool/cmis_fw_update.c b/net/ethtool/cmis_fw_update.c index df5f344209c4..291d04d2776a 100644 --- a/net/ethtool/cmis_fw_update.c +++ b/net/ethtool/cmis_fw_update.c @@ -44,6 +44,20 @@ enum cmis_cdb_fw_write_mechanism { CMIS_CDB_FW_WRITE_MECHANISM_BOTH = 0x11, }; +/* See section 9.7.2 "CMD 0101h: Start Firmware Download" in CMIS standard + * revision 5.2. + * struct cmis_cdb_start_fw_download_pl is a structured layout of the + * flat array, ethtool_cmis_cdb_request::payload. + */ +struct cmis_cdb_start_fw_download_pl { + __struct_group(cmis_cdb_start_fw_download_pl_h, head, /* no attrs */, + __be32 image_size; + __be32 resv1; + ); + u8 vendor_data[ETHTOOL_CMIS_CDB_LPL_MAX_PL_LENGTH - + sizeof(struct cmis_cdb_start_fw_download_pl_h)]; +}; + static int cmis_fw_update_fw_mng_features_get(struct ethtool_cmis_cdb *cdb, struct net_device *dev, @@ -86,6 +100,14 @@ cmis_fw_update_fw_mng_features_get(struct ethtool_cmis_cdb *cdb, */ cdb->read_write_len_ext = rpl->read_write_len_ext; fw_mng->start_cmd_payload_size = rpl->start_cmd_payload_size; + if (fw_mng->start_cmd_payload_size > + sizeof_field(struct cmis_cdb_start_fw_download_pl, vendor_data)) { + ethnl_module_fw_flash_ntf_err(dev, ntf_params, + "Start cmd payload size exceeds max LPL payload", + NULL); + return -EINVAL; + } + fw_mng->write_mechanism = rpl->write_mechanism == CMIS_CDB_FW_WRITE_MECHANISM_LPL ? CMIS_CDB_FW_WRITE_MECHANISM_LPL : @@ -97,20 +119,6 @@ cmis_fw_update_fw_mng_features_get(struct ethtool_cmis_cdb *cdb, return 0; } -/* See section 9.7.2 "CMD 0101h: Start Firmware Download" in CMIS standard - * revision 5.2. - * struct cmis_cdb_start_fw_download_pl is a structured layout of the - * flat array, ethtool_cmis_cdb_request::payload. - */ -struct cmis_cdb_start_fw_download_pl { - __struct_group(cmis_cdb_start_fw_download_pl_h, head, /* no attrs */, - __be32 image_size; - __be32 resv1; - ); - u8 vendor_data[ETHTOOL_CMIS_CDB_LPL_MAX_PL_LENGTH - - sizeof(struct cmis_cdb_start_fw_download_pl_h)]; -}; - static int cmis_fw_update_start_download(struct ethtool_cmis_cdb *cdb, struct ethtool_cmis_fw_update_params *fw_update, @@ -122,6 +130,14 @@ cmis_fw_update_start_download(struct ethtool_cmis_cdb *cdb, u8 lpl_len; int err; + if (fw_update->fw->size < vendor_data_size) { + ethnl_module_fw_flash_ntf_err(fw_update->dev, + &fw_update->ntf_params, + "Firmware image too small for module's start payload", + NULL); + return -EINVAL; + } + pl.image_size = cpu_to_be32(fw_update->fw->size); memcpy(pl.vendor_data, fw_update->fw->data, vendor_data_size); diff --git a/net/ethtool/coalesce.c b/net/ethtool/coalesce.c index 1e2c5c7048a8..e73fc3e5a02b 100644 --- a/net/ethtool/coalesce.c +++ b/net/ethtool/coalesce.c @@ -472,6 +472,12 @@ static int ethnl_update_profile(struct net_device *dev, nla_for_each_nested_type(nest, ETHTOOL_A_PROFILE_IRQ_MODERATION, nests, rem) { + if (i >= NET_DIM_PARAMS_NUM_PROFILES) { + NL_SET_BAD_ATTR(extack, nest); + ret = -E2BIG; + goto err_out; + } + ret = nla_parse_nested(tb, len_irq_moder - 1, nest, coalesce_irq_moderation_policy, extack); diff --git a/net/ethtool/eeprom.c b/net/ethtool/eeprom.c index a557e3996c85..0b8cfeddb014 100644 --- a/net/ethtool/eeprom.c +++ b/net/ethtool/eeprom.c @@ -44,6 +44,9 @@ static int fallback_set_params(struct eeprom_req_info *request, if (offset >= modinfo->eeprom_len) return -EINVAL; + if (length > modinfo->eeprom_len - offset) + return -EINVAL; + eeprom->cmd = ETHTOOL_GMODULEEEPROM; eeprom->len = length; eeprom->offset = offset; @@ -69,7 +72,7 @@ static int eeprom_fallback(struct eeprom_req_info *request, if (err < 0) return err; - data = kmalloc(eeprom.len, GFP_KERNEL); + data = kzalloc(eeprom.len, GFP_KERNEL); if (!data) return -ENOMEM; err = ethtool_get_module_eeprom_call(dev, &eeprom, data); @@ -141,12 +144,11 @@ static int eeprom_prepare_data(const struct ethnl_req_info *req_base, return 0; err_ops: + if (ret == -EOPNOTSUPP) + ret = eeprom_fallback(request, reply); ethnl_ops_complete(dev); err_free: kfree(page_data.data); - - if (ret == -EOPNOTSUPP) - return eeprom_fallback(request, reply); return ret; } diff --git a/net/ethtool/linkstate.c b/net/ethtool/linkstate.c index 8a5985fd7712..24569e92942c 100644 --- a/net/ethtool/linkstate.c +++ b/net/ethtool/linkstate.c @@ -106,10 +106,8 @@ static int linkstate_prepare_data(const struct ethnl_req_info *req_base, phydev = ethnl_req_get_phydev(req_base, tb, ETHTOOL_A_LINKSTATE_HEADER, info->extack); - if (IS_ERR(phydev)) { - ret = PTR_ERR(phydev); - goto out; - } + if (IS_ERR(phydev)) + return PTR_ERR(phydev); ret = ethnl_ops_begin(dev); if (ret < 0) diff --git a/net/ethtool/module.c b/net/ethtool/module.c index cad2eb25b5a4..ea4fb2a76650 100644 --- a/net/ethtool/module.c +++ b/net/ethtool/module.c @@ -120,12 +120,6 @@ ethnl_set_module_validate(struct ethnl_req_info *req_info, if (!tb[ETHTOOL_A_MODULE_POWER_MODE_POLICY]) return 0; - if (req_info->dev->ethtool->module_fw_flash_in_progress) { - NL_SET_ERR_MSG(info->extack, - "Module firmware flashing is in progress"); - return -EBUSY; - } - if (!ops->get_module_power_mode || !ops->set_module_power_mode) { NL_SET_ERR_MSG_ATTR(info->extack, tb[ETHTOOL_A_MODULE_POWER_MODE_POLICY], @@ -148,6 +142,12 @@ ethnl_set_module(struct ethnl_req_info *req_info, struct genl_info *info) ops = dev->ethtool_ops; + if (dev->ethtool->module_fw_flash_in_progress) { + NL_SET_ERR_MSG(info->extack, + "Module firmware flashing is in progress"); + return -EBUSY; + } + power_new.policy = nla_get_u8(tb[ETHTOOL_A_MODULE_POWER_MODE_POLICY]); ret = ops->get_module_power_mode(dev, &power, info->extack); if (ret < 0) @@ -221,14 +221,22 @@ static void module_flash_fw_work_list_del(struct list_head *list) static void module_flash_fw_work(struct work_struct *work) { struct ethtool_module_fw_flash *module_fw; + struct net_device *dev; module_fw = container_of(work, struct ethtool_module_fw_flash, work); + dev = module_fw->fw_update.dev; ethtool_cmis_fw_update(&module_fw->fw_update); module_flash_fw_work_list_del(&module_fw->list); - module_fw->fw_update.dev->ethtool->module_fw_flash_in_progress = false; - netdev_put(module_fw->fw_update.dev, &module_fw->dev_tracker); + + rtnl_lock(); + netdev_lock_ops(dev); + dev->ethtool->module_fw_flash_in_progress = false; + netdev_unlock_ops(dev); + rtnl_unlock(); + + netdev_put(dev, &module_fw->dev_tracker); release_firmware(module_fw->fw_update.fw); kfree(module_fw); } @@ -283,11 +291,9 @@ void ethnl_module_fw_flash_sock_destroy(struct ethnl_sock_priv *sk_priv) spin_lock(&module_fw_flash_work_list_lock); list_for_each_entry(work, &module_fw_flash_work_list, list) { - if (work->fw_update.dev == sk_priv->dev && - work->fw_update.ntf_params.portid == sk_priv->portid) { + if (work->fw_update.ntf_params.portid == sk_priv->portid && + dev_net(work->fw_update.dev) == sk_priv->net) work->fw_update.ntf_params.closed_sock = true; - break; - } } spin_unlock(&module_fw_flash_work_list_lock); } @@ -319,14 +325,13 @@ module_flash_fw_schedule(struct net_device *dev, const char *file_name, if (err < 0) goto err_release_firmware; - dev->ethtool->module_fw_flash_in_progress = true; - netdev_hold(dev, &module_fw->dev_tracker, GFP_KERNEL); fw_update->dev = dev; fw_update->ntf_params.portid = info->snd_portid; fw_update->ntf_params.seq = info->snd_seq; fw_update->ntf_params.closed_sock = false; - err = ethnl_sock_priv_set(skb, dev, fw_update->ntf_params.portid, + err = ethnl_sock_priv_set(skb, dev_net(dev), + fw_update->ntf_params.portid, ETHTOOL_SOCK_TYPE_MODULE_FW_FLASH); if (err < 0) goto err_release_firmware; @@ -335,6 +340,9 @@ module_flash_fw_schedule(struct net_device *dev, const char *file_name, if (err < 0) goto err_release_firmware; + dev->ethtool->module_fw_flash_in_progress = true; + netdev_hold(dev, &module_fw->dev_tracker, GFP_KERNEL); + schedule_work(&module_fw->work); return 0; @@ -427,10 +435,11 @@ int ethnl_act_module_fw_flash(struct sk_buff *skb, struct genl_info *info) ret = ethnl_module_fw_flash_validate(dev, info->extack); if (ret < 0) - goto out_unlock; + goto out_complete; ret = module_flash_fw(dev, tb, skb, info); +out_complete: ethnl_ops_complete(dev); out_unlock: diff --git a/net/ethtool/netlink.c b/net/ethtool/netlink.c index 5046023a30b1..7d45f9a884e5 100644 --- a/net/ethtool/netlink.c +++ b/net/ethtool/netlink.c @@ -53,7 +53,7 @@ const struct nla_policy ethnl_header_policy_phy_stats[] = { [ETHTOOL_A_HEADER_PHY_INDEX] = NLA_POLICY_MIN(NLA_U32, 1), }; -int ethnl_sock_priv_set(struct sk_buff *skb, struct net_device *dev, u32 portid, +int ethnl_sock_priv_set(struct sk_buff *skb, struct net *net, u32 portid, enum ethnl_sock_type type) { struct ethnl_sock_priv *sk_priv; @@ -62,7 +62,7 @@ int ethnl_sock_priv_set(struct sk_buff *skb, struct net_device *dev, u32 portid, if (IS_ERR(sk_priv)) return PTR_ERR(sk_priv); - sk_priv->dev = dev; + sk_priv->net = net; sk_priv->portid = portid; sk_priv->type = type; diff --git a/net/ethtool/netlink.h b/net/ethtool/netlink.h index aaf6f2468768..fd2198e45d2b 100644 --- a/net/ethtool/netlink.h +++ b/net/ethtool/netlink.h @@ -318,12 +318,12 @@ enum ethnl_sock_type { }; struct ethnl_sock_priv { - struct net_device *dev; + struct net *net; u32 portid; enum ethnl_sock_type type; }; -int ethnl_sock_priv_set(struct sk_buff *skb, struct net_device *dev, u32 portid, +int ethnl_sock_priv_set(struct sk_buff *skb, struct net *net, u32 portid, enum ethnl_sock_type type); /** diff --git a/net/ethtool/pse-pd.c b/net/ethtool/pse-pd.c index 2eb9bdc2dcb9..757c9e0cc856 100644 --- a/net/ethtool/pse-pd.c +++ b/net/ethtool/pse-pd.c @@ -62,14 +62,14 @@ static int pse_prepare_data(const struct ethnl_req_info *req_base, struct phy_device *phydev; int ret; - ret = ethnl_ops_begin(dev); - if (ret < 0) - return ret; - phydev = ethnl_req_get_phydev(req_base, tb, ETHTOOL_A_PSE_HEADER, info->extack); if (IS_ERR(phydev)) - return -ENODEV; + return PTR_ERR(phydev); + + ret = ethnl_ops_begin(dev); + if (ret < 0) + return ret; ret = pse_get_pse_attributes(phydev, info->extack, data); diff --git a/net/ethtool/rss.c b/net/ethtool/rss.c index 353110b862ab..53792f53f922 100644 --- a/net/ethtool/rss.c +++ b/net/ethtool/rss.c @@ -134,8 +134,7 @@ rss_get_data_alloc(struct net_device *dev, struct rss_reply_data *data) if (!rss_config) return -ENOMEM; - if (data->indir_size) - data->indir_table = (u32 *)rss_config; + data->indir_table = (u32 *)rss_config; if (data->hkey_size) data->hkey = rss_config + indir_bytes; @@ -170,8 +169,10 @@ rss_prepare_get(const struct rss_req_info *request, struct net_device *dev, rxfh.key = data->hkey; ret = ops->get_rxfh(dev, &rxfh); - if (ret) + if (ret) { + rss_get_data_free(data); goto out_unlock; + } data->hfunc = rxfh.hfunc; data->input_xfrm = rxfh.input_xfrm; @@ -686,7 +687,7 @@ rss_set_prep_indir(struct net_device *dev, struct genl_info *info, ethtool_rxfh_indir_default(i, num_rx_rings); } - *mod |= memcmp(rxfh->indir, data->indir_table, data->indir_size); + *mod |= memcmp(rxfh->indir, data->indir_table, alloc_size); return user_size; @@ -981,11 +982,17 @@ ethnl_rss_create_validate(struct net_device *dev, struct genl_info *info) } static void -ethnl_rss_create_send_ntf(struct sk_buff *rsp, struct net_device *dev) +ethnl_rss_create_send_ntf(const struct sk_buff *rsp, struct net_device *dev) { - struct nlmsghdr *nlh = (void *)rsp->data; struct genlmsghdr *genl_hdr; + struct nlmsghdr *nlh; + struct sk_buff *ntf; + + ntf = skb_copy_expand(rsp, 0, 0, GFP_KERNEL); + if (!ntf) + return; + nlh = nlmsg_hdr(ntf); /* Convert the reply into a notification */ nlh->nlmsg_pid = 0; nlh->nlmsg_seq = ethnl_bcast_seq_next(); @@ -993,7 +1000,7 @@ ethnl_rss_create_send_ntf(struct sk_buff *rsp, struct net_device *dev) genl_hdr = nlmsg_data(nlh); genl_hdr->cmd = ETHTOOL_MSG_RSS_CREATE_NTF; - ethnl_multicast(rsp, dev); + ethnl_multicast(ntf, dev); } int ethnl_rss_create_doit(struct sk_buff *skb, struct genl_info *info) @@ -1099,17 +1106,13 @@ int ethnl_rss_create_doit(struct sk_buff *skb, struct genl_info *info) ntf_fail |= rss_fill_reply(rsp, &req.base, &data.base); if (WARN_ON(!hdr || ntf_fail)) { ret = -EMSGSIZE; - goto exit_unlock; + goto err_remove_ctx; } genlmsg_end(rsp, hdr); - /* Use the same skb for the response and the notification, - * genlmsg_reply() will copy the skb if it has elevated user count. - */ - skb_get(rsp); - ret = genlmsg_reply(rsp, info); ethnl_rss_create_send_ntf(rsp, dev); + ret = genlmsg_reply(rsp, info); rsp = NULL; exit_unlock: @@ -1131,6 +1134,10 @@ exit_free_rsp: nlmsg_free(rsp); return ret; +err_remove_ctx: + if (ops->remove_rxfh_context(dev, ctx, req.rss_context, NULL)) + /* leave the context on failure, like ethnl_rss_delete_doit() */ + goto exit_unlock; err_ctx_id_free: xa_erase(&dev->ethtool->rss_ctx, req.rss_context); err_unlock_free_ctx: @@ -1168,8 +1175,10 @@ int ethnl_rss_delete_doit(struct sk_buff *skb, struct genl_info *info) dev = req.dev; ops = dev->ethtool_ops; - if (!ops->create_rxfh_context) + if (!ops->create_rxfh_context) { + ret = -EOPNOTSUPP; goto exit_free_dev; + } rtnl_lock(); netdev_lock_ops(dev); diff --git a/net/ethtool/strset.c b/net/ethtool/strset.c index bb1e829ba099..94c4718d31ae 100644 --- a/net/ethtool/strset.c +++ b/net/ethtool/strset.c @@ -311,7 +311,7 @@ static int strset_prepare_data(const struct ethnl_req_info *req_base, return 0; } - phydev = ethnl_req_get_phydev(req_base, tb, ETHTOOL_A_HEADER_FLAGS, + phydev = ethnl_req_get_phydev(req_base, tb, ETHTOOL_A_STRSET_HEADER, info->extack); /* phydev can be NULL, check for errors only */ diff --git a/net/ethtool/tsconfig.c b/net/ethtool/tsconfig.c index e4f518e49d4c..fc4f93cfa459 100644 --- a/net/ethtool/tsconfig.c +++ b/net/ethtool/tsconfig.c @@ -69,8 +69,10 @@ static int tsconfig_prepare_data(const struct ethnl_req_info *req_base, if (ret) goto out; - if (ts_info.phc_index == -1) - return -ENODEV; + if (ts_info.phc_index == -1) { + ret = -ENODEV; + goto out; + } data->hwprov_desc.index = ts_info.phc_index; data->hwprov_desc.qualifier = ts_info.phc_qualifier; @@ -224,16 +226,21 @@ static int tsconfig_send_reply(struct net_device *dev, struct genl_info *info) reply_len = ret + ethnl_reply_header_size(); rskb = ethnl_reply_init(reply_len, dev, ETHTOOL_MSG_TSCONFIG_SET_REPLY, ETHTOOL_A_TSCONFIG_HEADER, info, &reply_payload); - if (!rskb) + if (!rskb) { + ret = -ENOMEM; goto err_cleanup; + } ret = tsconfig_fill_reply(rskb, &req_info->base, &reply_data->base); if (ret < 0) - goto err_cleanup; + goto err_free_msg; genlmsg_end(rskb, reply_payload); ret = genlmsg_reply(rskb, info); + rskb = NULL; +err_free_msg: + nlmsg_free(rskb); err_cleanup: kfree(reply_data); kfree(req_info); diff --git a/net/ethtool/tsinfo.c b/net/ethtool/tsinfo.c index a865f0fdd26b..14bf01e3b55c 100644 --- a/net/ethtool/tsinfo.c +++ b/net/ethtool/tsinfo.c @@ -83,6 +83,11 @@ tsinfo_parse_request(struct ethnl_req_info *req_base, if (!tb[ETHTOOL_A_TSINFO_HWTSTAMP_PROVIDER]) return 0; + if (req_base->flags & ETHTOOL_FLAG_STATS) { + NL_SET_ERR_MSG(extack, "can't query statistics for a provider"); + return -EOPNOTSUPP; + } + return ts_parse_hwtst_provider(tb[ETHTOOL_A_TSINFO_HWTSTAMP_PROVIDER], &req->hwprov_desc, extack, &mod); } @@ -402,10 +407,8 @@ static int ethnl_tsinfo_dump_one_netdev(struct sk_buff *skb, continue; ehdr = ethnl_tsinfo_prepare_dump(skb, dev, reply_data, cb); - if (IS_ERR(ehdr)) { - ret = PTR_ERR(ehdr); - goto err; - } + if (IS_ERR(ehdr)) + return PTR_ERR(ehdr); reply_data->ts_info.phc_qualifier = ctx->pos_phcqualifier; ret = ops->get_ts_info(dev, &reply_data->ts_info); @@ -523,6 +526,12 @@ int ethnl_tsinfo_start(struct netlink_callback *cb) if (ret < 0) goto free_reply_data; + if (req_info->base.flags & ETHTOOL_FLAG_STATS) { + NL_SET_ERR_MSG(cb->extack, "stats not supported in dump"); + ret = -EOPNOTSUPP; + goto err_dev_put; + } + ctx->req_info = req_info; ctx->reply_data = reply_data; ctx->pos_ifindex = 0; @@ -532,6 +541,8 @@ int ethnl_tsinfo_start(struct netlink_callback *cb) return 0; +err_dev_put: + ethnl_parse_header_dev_put(&req_info->base); free_reply_data: kfree(reply_data); free_req_info: diff --git a/net/handshake/genl.c b/net/handshake/genl.c index 870612609491..4b20cd9cdd0e 100644 --- a/net/handshake/genl.c +++ b/net/handshake/genl.c @@ -10,6 +10,7 @@ #include "genl.h" #include <uapi/linux/handshake.h> +#include <linux/err.h> /* HANDSHAKE_CMD_ACCEPT - do */ static const struct nla_policy handshake_accept_nl_policy[HANDSHAKE_A_ACCEPT_HANDLER_CLASS + 1] = { @@ -18,7 +19,7 @@ static const struct nla_policy handshake_accept_nl_policy[HANDSHAKE_A_ACCEPT_HAN /* HANDSHAKE_CMD_DONE - do */ static const struct nla_policy handshake_done_nl_policy[HANDSHAKE_A_DONE_REMOTE_AUTH + 1] = { - [HANDSHAKE_A_DONE_STATUS] = { .type = NLA_U32, }, + [HANDSHAKE_A_DONE_STATUS] = NLA_POLICY_MAX(NLA_U32, MAX_ERRNO), [HANDSHAKE_A_DONE_SOCKFD] = { .type = NLA_S32, }, [HANDSHAKE_A_DONE_REMOTE_AUTH] = { .type = NLA_U32, }, }; diff --git a/net/handshake/genl.h b/net/handshake/genl.h index 8d3e18672daf..46b65f131669 100644 --- a/net/handshake/genl.h +++ b/net/handshake/genl.h @@ -11,6 +11,7 @@ #include <net/genetlink.h> #include <uapi/linux/handshake.h> +#include <linux/err.h> int handshake_nl_accept_doit(struct sk_buff *skb, struct genl_info *info); int handshake_nl_done_doit(struct sk_buff *skb, struct genl_info *info); diff --git a/net/handshake/handshake-test.c b/net/handshake/handshake-test.c index 55442b2f518a..3dd507470d5f 100644 --- a/net/handshake/handshake-test.c +++ b/net/handshake/handshake-test.c @@ -25,7 +25,7 @@ static int test_accept_func(struct handshake_req *req, struct genl_info *info, return 0; } -static void test_done_func(struct handshake_req *req, unsigned int status, +static void test_done_func(struct handshake_req *req, int status, struct genl_info *info) { } @@ -208,6 +208,7 @@ static void handshake_req_submit_test3(struct kunit *test) static void handshake_req_submit_test4(struct kunit *test) { struct handshake_req *req, *result; + unsigned long fcount_before; struct socket *sock; struct file *filp; int err; @@ -224,8 +225,10 @@ static void handshake_req_submit_test4(struct kunit *test) KUNIT_ASSERT_NOT_NULL(test, sock->sk); sock->file = filp; + fcount_before = file_count(filp); err = handshake_req_submit(sock, req, GFP_KERNEL); KUNIT_ASSERT_EQ(test, err, 0); + KUNIT_EXPECT_EQ(test, file_count(filp), fcount_before + 1); /* Act */ result = handshake_req_hash_lookup(sock->sk); @@ -235,11 +238,13 @@ static void handshake_req_submit_test4(struct kunit *test) KUNIT_EXPECT_PTR_EQ(test, req, result); handshake_req_cancel(sock->sk); + KUNIT_EXPECT_EQ(test, file_count(filp), fcount_before); fput(filp); } static void handshake_req_submit_test5(struct kunit *test) { + unsigned long fcount_before; struct handshake_req *req; struct handshake_net *hn; struct socket *sock; @@ -265,12 +270,14 @@ static void handshake_req_submit_test5(struct kunit *test) saved = hn->hn_pending; hn->hn_pending = hn->hn_pending_max + 1; + fcount_before = file_count(filp); /* Act */ err = handshake_req_submit(sock, req, GFP_KERNEL); /* Assert */ KUNIT_EXPECT_EQ(test, err, -EAGAIN); + KUNIT_EXPECT_EQ(test, file_count(filp), fcount_before); fput(filp); hn->hn_pending = saved; @@ -279,6 +286,7 @@ static void handshake_req_submit_test5(struct kunit *test) static void handshake_req_submit_test6(struct kunit *test) { struct handshake_req *req1, *req2; + unsigned long fcount_before; struct socket *sock; struct file *filp; int err; @@ -296,21 +304,26 @@ static void handshake_req_submit_test6(struct kunit *test) KUNIT_ASSERT_NOT_ERR_OR_NULL(test, filp); KUNIT_ASSERT_NOT_NULL(test, sock->sk); sock->file = filp; + fcount_before = file_count(filp); /* Act */ err = handshake_req_submit(sock, req1, GFP_KERNEL); KUNIT_ASSERT_EQ(test, err, 0); + KUNIT_EXPECT_EQ(test, file_count(filp), fcount_before + 1); err = handshake_req_submit(sock, req2, GFP_KERNEL); /* Assert */ KUNIT_EXPECT_EQ(test, err, -EBUSY); + KUNIT_EXPECT_EQ(test, file_count(filp), fcount_before + 1); handshake_req_cancel(sock->sk); + KUNIT_EXPECT_EQ(test, file_count(filp), fcount_before); fput(filp); } static void handshake_req_cancel_test1(struct kunit *test) { + unsigned long fcount_before; struct handshake_req *req; struct socket *sock; struct file *filp; @@ -329,8 +342,10 @@ static void handshake_req_cancel_test1(struct kunit *test) KUNIT_ASSERT_NOT_ERR_OR_NULL(test, filp); sock->file = filp; + fcount_before = file_count(filp); err = handshake_req_submit(sock, req, GFP_KERNEL); KUNIT_ASSERT_EQ(test, err, 0); + KUNIT_EXPECT_EQ(test, file_count(filp), fcount_before + 1); /* NB: handshake_req hasn't been accepted */ @@ -339,12 +354,14 @@ static void handshake_req_cancel_test1(struct kunit *test) /* Assert */ KUNIT_EXPECT_TRUE(test, result); + KUNIT_EXPECT_EQ(test, file_count(filp), fcount_before); fput(filp); } static void handshake_req_cancel_test2(struct kunit *test) { + unsigned long fcount_before; struct handshake_req *req, *next; struct handshake_net *hn; struct socket *sock; @@ -365,8 +382,10 @@ static void handshake_req_cancel_test2(struct kunit *test) KUNIT_ASSERT_NOT_ERR_OR_NULL(test, filp); sock->file = filp; + fcount_before = file_count(filp); err = handshake_req_submit(sock, req, GFP_KERNEL); KUNIT_ASSERT_EQ(test, err, 0); + KUNIT_EXPECT_EQ(test, file_count(filp), fcount_before + 1); net = sock_net(sock->sk); hn = handshake_pernet(net); @@ -375,18 +394,24 @@ static void handshake_req_cancel_test2(struct kunit *test) /* Pretend to accept this request */ next = handshake_req_next(hn, HANDSHAKE_HANDLER_CLASS_TLSHD); KUNIT_ASSERT_PTR_EQ(test, req, next); + /* Simulate FD_PREPARE() consuming the file reference handed + * off by handshake_req_next(); see handshake_nl_accept_doit(). + */ + fput(filp); /* Act */ result = handshake_req_cancel(sock->sk); /* Assert */ KUNIT_EXPECT_TRUE(test, result); + KUNIT_EXPECT_EQ(test, file_count(filp), fcount_before); fput(filp); } static void handshake_req_cancel_test3(struct kunit *test) { + unsigned long fcount_before; struct handshake_req *req, *next; struct handshake_net *hn; struct socket *sock; @@ -407,8 +432,10 @@ static void handshake_req_cancel_test3(struct kunit *test) KUNIT_ASSERT_NOT_ERR_OR_NULL(test, filp); sock->file = filp; + fcount_before = file_count(filp); err = handshake_req_submit(sock, req, GFP_KERNEL); KUNIT_ASSERT_EQ(test, err, 0); + KUNIT_EXPECT_EQ(test, file_count(filp), fcount_before + 1); net = sock_net(sock->sk); hn = handshake_pernet(net); @@ -417,15 +444,21 @@ static void handshake_req_cancel_test3(struct kunit *test) /* Pretend to accept this request */ next = handshake_req_next(hn, HANDSHAKE_HANDLER_CLASS_TLSHD); KUNIT_ASSERT_PTR_EQ(test, req, next); + /* Simulate FD_PREPARE() consuming the file reference handed + * off by handshake_req_next(); see handshake_nl_accept_doit(). + */ + fput(filp); /* Pretend to complete this request */ handshake_complete(next, -ETIMEDOUT, NULL); + KUNIT_EXPECT_EQ(test, file_count(filp), fcount_before); /* Act */ result = handshake_req_cancel(sock->sk); /* Assert */ KUNIT_EXPECT_FALSE(test, result); + KUNIT_EXPECT_EQ(test, file_count(filp), fcount_before); fput(filp); } @@ -446,6 +479,7 @@ static struct handshake_proto handshake_req_alloc_proto_destroy = { static void handshake_req_destroy_test1(struct kunit *test) { + unsigned long fcount_before; struct handshake_req *req; struct socket *sock; struct file *filp; @@ -465,10 +499,12 @@ static void handshake_req_destroy_test1(struct kunit *test) KUNIT_ASSERT_NOT_ERR_OR_NULL(test, filp); sock->file = filp; + fcount_before = file_count(filp); err = handshake_req_submit(sock, req, GFP_KERNEL); KUNIT_ASSERT_EQ(test, err, 0); handshake_req_cancel(sock->sk); + KUNIT_EXPECT_EQ(test, file_count(filp), fcount_before); /* Act */ /* Ensure the close/release/put process has run to diff --git a/net/handshake/handshake.h b/net/handshake/handshake.h index a48163765a7a..da61cadd1ad3 100644 --- a/net/handshake/handshake.h +++ b/net/handshake/handshake.h @@ -24,6 +24,7 @@ enum hn_flags_bits { HANDSHAKE_F_NET_DRAINING, }; +struct file; struct handshake_proto; /* One handshake request */ @@ -32,6 +33,7 @@ struct handshake_req { struct rhash_head hr_rhash; unsigned long hr_flags; const struct handshake_proto *hr_proto; + struct file *hr_file; struct sock *hr_sk; void (*hr_odestruct)(struct sock *sk); @@ -57,7 +59,7 @@ struct handshake_proto { int (*hp_accept)(struct handshake_req *req, struct genl_info *info, int fd); void (*hp_done)(struct handshake_req *req, - unsigned int status, + int status, struct genl_info *info); void (*hp_destroy)(struct handshake_req *req); }; @@ -86,7 +88,7 @@ struct handshake_req *handshake_req_hash_lookup(struct sock *sk); struct handshake_req *handshake_req_next(struct handshake_net *hn, int class); int handshake_req_submit(struct socket *sock, struct handshake_req *req, gfp_t flags); -void handshake_complete(struct handshake_req *req, unsigned int status, +void handshake_complete(struct handshake_req *req, int status, struct genl_info *info); bool handshake_req_cancel(struct sock *sk); diff --git a/net/handshake/netlink.c b/net/handshake/netlink.c index b989456fc4c5..3fd4fef9bab1 100644 --- a/net/handshake/netlink.c +++ b/net/handshake/netlink.c @@ -92,7 +92,6 @@ int handshake_nl_accept_doit(struct sk_buff *skb, struct genl_info *info) struct net *net = sock_net(skb->sk); struct handshake_net *hn = handshake_pernet(net); struct handshake_req *req = NULL; - struct socket *sock; int class, err; err = -EOPNOTSUPP; @@ -107,15 +106,13 @@ int handshake_nl_accept_doit(struct sk_buff *skb, struct genl_info *info) err = -EAGAIN; req = handshake_req_next(hn, class); if (req) { - sock = req->hr_sk->sk_socket; - - FD_PREPARE(fdf, O_CLOEXEC, sock->file); + FD_PREPARE(fdf, O_CLOEXEC, req->hr_file); if (fdf.err) { + fput(req->hr_file); /* drop ref from handshake_req_next() */ err = fdf.err; goto out_complete; } - get_file(sock->file); /* FD_PREPARE() consumes a reference. */ err = req->hr_proto->hp_accept(req, info, fd_prepare_fd(fdf)); if (err) goto out_complete; /* Automatic cleanup handles fput */ @@ -160,7 +157,7 @@ int handshake_nl_done_doit(struct sk_buff *skb, struct genl_info *info) status = -EIO; if (info->attrs[HANDSHAKE_A_DONE_STATUS]) - status = nla_get_u32(info->attrs[HANDSHAKE_A_DONE_STATUS]); + status = -(int)nla_get_u32(info->attrs[HANDSHAKE_A_DONE_STATUS]); handshake_complete(req, status, info); sockfd_put(sock); @@ -202,21 +199,21 @@ static void __net_exit handshake_net_exit(struct net *net) * accepted and are in progress will be destroyed when * the socket is closed. */ - spin_lock(&hn->hn_lock); + spin_lock_bh(&hn->hn_lock); set_bit(HANDSHAKE_F_NET_DRAINING, &hn->hn_flags); - list_splice_init(&requests, &hn->hn_requests); - spin_unlock(&hn->hn_lock); + list_splice_init(&hn->hn_requests, &requests); + list_for_each_entry(req, &requests, hr_list) + get_file(req->hr_file); + spin_unlock_bh(&hn->hn_lock); while (!list_empty(&requests)) { - req = list_first_entry(&requests, struct handshake_req, hr_list); - list_del(&req->hr_list); - - /* - * Requests on this list have not yet been - * accepted, so they do not have an fd to put. - */ + struct file *file; + req = list_first_entry(&requests, struct handshake_req, hr_list); + file = req->hr_file; + list_del_init(&req->hr_list); handshake_complete(req, -ETIMEDOUT, NULL); + fput(file); } } diff --git a/net/handshake/request.c b/net/handshake/request.c index 2829adbeb149..cd30d54d0501 100644 --- a/net/handshake/request.c +++ b/net/handshake/request.c @@ -13,6 +13,7 @@ #include <linux/module.h> #include <linux/skbuff.h> #include <linux/inet.h> +#include <linux/file.h> #include <linux/rhashtable.h> #include <net/sock.h> @@ -162,35 +163,56 @@ static void __remove_pending_locked(struct handshake_net *hn, * otherwise %false. * * If @req was on a pending list, it has not yet been accepted. + * Returns %false when the net namespace is draining; the drain + * loop has taken ownership of the pending list. */ static bool remove_pending(struct handshake_net *hn, struct handshake_req *req) { bool ret = false; - spin_lock(&hn->hn_lock); - if (!list_empty(&req->hr_list)) { + spin_lock_bh(&hn->hn_lock); + if (!test_bit(HANDSHAKE_F_NET_DRAINING, &hn->hn_flags) && + !list_empty(&req->hr_list)) { __remove_pending_locked(hn, req); ret = true; } - spin_unlock(&hn->hn_lock); + spin_unlock_bh(&hn->hn_lock); return ret; } +/** + * handshake_req_next - Return the next queued handshake request + * @hn: per-net handshake state + * @class: handler class to match + * + * On a non-NULL return, the caller owns an extra reference + * on @req->hr_file. FD_PREPARE() consumes it on success; on + * the FD_PREPARE() failure path the caller must fput() it. + * + * Return: pointer to a removed handshake_req, or NULL. + */ struct handshake_req *handshake_req_next(struct handshake_net *hn, int class) { struct handshake_req *req, *pos; req = NULL; - spin_lock(&hn->hn_lock); + spin_lock_bh(&hn->hn_lock); list_for_each_entry(pos, &hn->hn_requests, hr_list) { if (pos->hr_proto->hp_handler_class != class) continue; __remove_pending_locked(hn, pos); + /* Hand off a file reference to the accept side under + * hn_lock. A concurrent handshake_req_cancel() can drop + * hr_file before accept reaches FD_PREPARE(); this extra + * reference keeps the file alive until FD_PREPARE() takes + * ownership. + */ + get_file(pos->hr_file); req = pos; break; } - spin_unlock(&hn->hn_lock); + spin_unlock_bh(&hn->hn_lock); return req; } @@ -215,9 +237,16 @@ EXPORT_SYMBOL_IF_KUNIT(handshake_req_next); * A zero return value from handshake_req_submit() means that * exactly one subsequent completion callback is guaranteed. * - * A negative return value from handshake_req_submit() means that - * no completion callback will be done and that @req has been - * destroyed. + * A negative return value from handshake_req_submit() guarantees that + * no completion callback will occur and that @req is no longer owned by + * the caller. If cancellation wins the completion race after the request + * has been published, final destruction is deferred until socket teardown. + * + * The caller must hold a reference on @sock->file for the duration + * of this call. Once the request is published to the accept side, a + * concurrent completion or cancellation may release the request's pin on + * @sock->file; the caller's reference is what keeps @sock->sk valid until + * handshake_req_submit() returns. */ int handshake_req_submit(struct socket *sock, struct handshake_req *req, gfp_t flags) @@ -236,6 +265,14 @@ int handshake_req_submit(struct socket *sock, struct handshake_req *req, kfree(req); return -EINVAL; } + + /* + * Pin sock->file for the lifetime of the request so the + * accept side does not race a consumer that releases the + * socket while a handshake is pending. + */ + req->hr_file = get_file(sock->file); + req->hr_odestruct = req->hr_sk->sk_destruct; req->hr_sk->sk_destruct = handshake_sk_destruct; @@ -249,7 +286,7 @@ int handshake_req_submit(struct socket *sock, struct handshake_req *req, if (READ_ONCE(hn->hn_pending) >= hn->hn_pending_max) goto out_err; - spin_lock(&hn->hn_lock); + spin_lock_bh(&hn->hn_lock); ret = -EOPNOTSUPP; if (test_bit(HANDSHAKE_F_NET_DRAINING, &hn->hn_flags)) goto out_unlock; @@ -258,7 +295,7 @@ int handshake_req_submit(struct socket *sock, struct handshake_req *req, goto out_unlock; if (!__add_pending_locked(hn, req)) goto out_unlock; - spin_unlock(&hn->hn_lock); + spin_unlock_bh(&hn->hn_lock); ret = handshake_genl_notify(net, req->hr_proto, flags); if (ret) { @@ -267,35 +304,36 @@ int handshake_req_submit(struct socket *sock, struct handshake_req *req, goto out_err; } - /* Prevent socket release while a handshake request is pending */ - sock_hold(req->hr_sk); - trace_handshake_submit(net, req, req->hr_sk); return 0; out_unlock: - spin_unlock(&hn->hn_lock); + spin_unlock_bh(&hn->hn_lock); out_err: - /* Restore original destructor so socket teardown still runs on failure */ - req->hr_sk->sk_destruct = req->hr_odestruct; trace_handshake_submit_err(net, req, req->hr_sk, ret); - handshake_req_destroy(req); + if (!test_and_set_bit(HANDSHAKE_F_REQ_COMPLETED, &req->hr_flags)) { + /* Restore original destructor so socket teardown still runs. */ + req->hr_sk->sk_destruct = req->hr_odestruct; + fput(req->hr_file); + handshake_req_destroy(req); + } return ret; } EXPORT_SYMBOL(handshake_req_submit); -void handshake_complete(struct handshake_req *req, unsigned int status, +void handshake_complete(struct handshake_req *req, int status, struct genl_info *info) { struct sock *sk = req->hr_sk; struct net *net = sock_net(sk); if (!test_and_set_bit(HANDSHAKE_F_REQ_COMPLETED, &req->hr_flags)) { + struct file *file = req->hr_file; + trace_handshake_complete(net, req, sk, status); req->hr_proto->hp_done(req, status, info); - /* Handshake request is no longer pending */ - sock_put(sk); + fput(file); } } EXPORT_SYMBOL_IF_KUNIT(handshake_complete); @@ -342,8 +380,7 @@ bool handshake_req_cancel(struct sock *sk) out_true: trace_handshake_cancel(net, req, sk); - /* Handshake request is no longer pending */ - sock_put(sk); + fput(req->hr_file); return true; } EXPORT_SYMBOL(handshake_req_cancel); diff --git a/net/handshake/tlshd.c b/net/handshake/tlshd.c index 8f9532a15f43..7567150c2a4f 100644 --- a/net/handshake/tlshd.c +++ b/net/handshake/tlshd.c @@ -93,7 +93,7 @@ static void tls_handshake_remote_peerids(struct tls_handshake_req *treq, * */ static void tls_handshake_done(struct handshake_req *req, - unsigned int status, struct genl_info *info) + int status, struct genl_info *info) { struct tls_handshake_req *treq = handshake_req_private(req); @@ -104,7 +104,7 @@ static void tls_handshake_done(struct handshake_req *req, if (!status) set_bit(HANDSHAKE_F_REQ_SESSION, &req->hr_flags); - treq->th_consumer_done(treq->th_consumer_data, -status, + treq->th_consumer_done(treq->th_consumer_data, status, treq->th_peerid[0]); } @@ -425,6 +425,8 @@ EXPORT_SYMBOL(tls_server_hello_psk); * Request cancellation races with request completion. To determine * who won, callers examine the return value from this function. * + * Context: May be called from process or softirq context. + * * Return values: * %true - Uncompleted handshake request was canceled * %false - Handshake request already completed or not found diff --git a/net/hsr/hsr_forward.c b/net/hsr/hsr_forward.c index 0aca859c88cb..f669a226d728 100644 --- a/net/hsr/hsr_forward.c +++ b/net/hsr/hsr_forward.c @@ -84,7 +84,7 @@ static bool is_supervision_frame(struct hsr_priv *hsr, struct sk_buff *skb) /* Get next tlv */ total_length += hsr_sup_tag->tlv.HSR_TLV_length; - if (!pskb_may_pull(skb, total_length)) + if (!pskb_may_pull(skb, total_length + sizeof(struct hsr_sup_tlv))) return false; skb_pull(skb, total_length); hsr_sup_tlv = (struct hsr_sup_tlv *)skb->data; @@ -100,7 +100,7 @@ static bool is_supervision_frame(struct hsr_priv *hsr, struct sk_buff *skb) /* make sure another tlv follows */ total_length += sizeof(struct hsr_sup_tlv) + hsr_sup_tlv->HSR_TLV_length; - if (!pskb_may_pull(skb, total_length)) + if (!pskb_may_pull(skb, total_length + sizeof(struct hsr_sup_tlv))) return false; /* get next tlv */ diff --git a/net/ipv4/ah4.c b/net/ipv4/ah4.c index 4366cbac3f06..6fd642d2278d 100644 --- a/net/ipv4/ah4.c +++ b/net/ipv4/ah4.c @@ -143,7 +143,7 @@ static void ah_output_done(void *data, int err) } kfree(AH_SKB_CB(skb)->tmp); - xfrm_output_resume(skb->sk, skb, err); + xfrm_output_resume(skb_to_full_sk(skb), skb, err); } static int ah_output(struct xfrm_state *x, struct sk_buff *skb) diff --git a/net/ipv4/esp4.c b/net/ipv4/esp4.c index 6a5febbdbee4..513c8215c947 100644 --- a/net/ipv4/esp4.c +++ b/net/ipv4/esp4.c @@ -419,8 +419,8 @@ int esp_output_head(struct xfrm_state *x, struct sk_buff *skb, struct esp_info * return err; } - if (ALIGN(tailen, L1_CACHE_BYTES) > PAGE_SIZE || - ALIGN(skb->data_len, L1_CACHE_BYTES) > PAGE_SIZE) + if (ALIGN(skb->data_len + tailen, L1_CACHE_BYTES) > + PAGE_SIZE) goto cow; if (!skb_cloned(skb)) { diff --git a/net/ipv4/ip_tunnel_core.c b/net/ipv4/ip_tunnel_core.c index 2667f53482bd..d3c677e9bff2 100644 --- a/net/ipv4/ip_tunnel_core.c +++ b/net/ipv4/ip_tunnel_core.c @@ -212,7 +212,7 @@ EXPORT_SYMBOL_GPL(iptunnel_handle_offloads); */ static int iptunnel_pmtud_build_icmp(struct sk_buff *skb, int mtu) { - const struct iphdr *iph = ip_hdr(skb); + const struct iphdr *iph; struct icmphdr *icmph; struct iphdr *niph; struct ethhdr eh; @@ -226,7 +226,6 @@ static int iptunnel_pmtud_build_icmp(struct sk_buff *skb, int mtu) skb_copy_bits(skb, skb_mac_offset(skb), &eh, ETH_HLEN); pskb_pull(skb, ETH_HLEN); - skb_reset_network_header(skb); err = pskb_trim(skb, 576 - sizeof(*niph) - sizeof(*icmph)); if (err) @@ -236,7 +235,7 @@ static int iptunnel_pmtud_build_icmp(struct sk_buff *skb, int mtu) err = skb_cow(skb, sizeof(*niph) + sizeof(*icmph) + ETH_HLEN); if (err) return err; - + iph = ip_hdr(skb); icmph = skb_push(skb, sizeof(*icmph)); *icmph = (struct icmphdr) { .type = ICMP_DEST_UNREACH, @@ -281,7 +280,6 @@ static int iptunnel_pmtud_build_icmp(struct sk_buff *skb, int mtu) */ static int iptunnel_pmtud_check_icmp(struct sk_buff *skb, int mtu) { - const struct icmphdr *icmph = icmp_hdr(skb); const struct iphdr *iph = ip_hdr(skb); if (mtu < 576 || iph->frag_off != htons(IP_DF)) @@ -292,9 +290,17 @@ static int iptunnel_pmtud_check_icmp(struct sk_buff *skb, int mtu) ipv4_is_lbcast(iph->saddr) || ipv4_is_multicast(iph->saddr)) return 0; - if (iph->protocol == IPPROTO_ICMP && icmp_is_err(icmph->type)) - return 0; + if (iph->protocol == IPPROTO_ICMP) { + const struct icmphdr *icmph; + if (!pskb_network_may_pull(skb, iph->ihl * 4 + + offsetofend(struct icmphdr, type))) + return 0; + iph = ip_hdr(skb); + icmph = (void *)iph + iph->ihl * 4; + if (icmp_is_err(icmph->type)) + return 0; + } return iptunnel_pmtud_build_icmp(skb, mtu); } @@ -308,7 +314,7 @@ static int iptunnel_pmtud_check_icmp(struct sk_buff *skb, int mtu) */ static int iptunnel_pmtud_build_icmpv6(struct sk_buff *skb, int mtu) { - const struct ipv6hdr *ip6h = ipv6_hdr(skb); + const struct ipv6hdr *ip6h; struct icmp6hdr *icmp6h; struct ipv6hdr *nip6h; struct ethhdr eh; @@ -323,7 +329,6 @@ static int iptunnel_pmtud_build_icmpv6(struct sk_buff *skb, int mtu) skb_copy_bits(skb, skb_mac_offset(skb), &eh, ETH_HLEN); pskb_pull(skb, ETH_HLEN); - skb_reset_network_header(skb); err = pskb_trim(skb, IPV6_MIN_MTU - sizeof(*nip6h) - sizeof(*icmp6h)); if (err) @@ -334,6 +339,7 @@ static int iptunnel_pmtud_build_icmpv6(struct sk_buff *skb, int mtu) if (err) return err; + ip6h = ipv6_hdr(skb); icmp6h = skb_push(skb, sizeof(*icmp6h)); *icmp6h = (struct icmp6hdr) { .icmp6_type = ICMPV6_PKT_TOOBIG, diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c index d8bdb1bdbff1..c0e85cc171ae 100644 --- a/net/ipv4/sysctl_net_ipv4.c +++ b/net/ipv4/sysctl_net_ipv4.c @@ -1705,10 +1705,10 @@ static __net_exit void ipv4_sysctl_exit_net(struct net *net) { const struct ctl_table *table; - kfree(net->ipv4.sysctl_local_reserved_ports); table = net->ipv4.ipv4_hdr->ctl_table_arg; unregister_net_sysctl_table(net->ipv4.ipv4_hdr); kfree(table); + kfree(net->ipv4.sysctl_local_reserved_ports); } static __net_initdata struct pernet_operations ipv4_sysctl_ops = { diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 5476b6536eb7..bb84a78b80f6 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -1013,7 +1013,7 @@ ipv6_link_dev_addr(struct inet6_dev *idev, struct inet6_ifaddr *ifp) list_for_each(p, &idev->addr_list) { struct inet6_ifaddr *ifa = list_entry(p, struct inet6_ifaddr, if_list); - if (ifp_scope > ipv6_addr_src_scope(&ifa->addr)) + if (ifp_scope >= ipv6_addr_src_scope(&ifa->addr)) break; } diff --git a/net/ipv6/ah6.c b/net/ipv6/ah6.c index de1e68199a01..76f7a2de9108 100644 --- a/net/ipv6/ah6.c +++ b/net/ipv6/ah6.c @@ -337,7 +337,7 @@ static void ah6_output_done(void *data, int err) ah6_restore_hdrs(top_iph, iph_ext, extlen); kfree(AH_SKB_CB(skb)->tmp); - xfrm_output_resume(skb->sk, skb, err); + xfrm_output_resume(skb_to_full_sk(skb), skb, err); } static int ah6_output(struct xfrm_state *x, struct sk_buff *skb) diff --git a/net/ipv6/datagram.c b/net/ipv6/datagram.c index ca3605acb433..38d7b4845281 100644 --- a/net/ipv6/datagram.c +++ b/net/ipv6/datagram.c @@ -617,6 +617,18 @@ void ip6_datagram_recv_common_ctl(struct sock *sk, struct msghdr *msg, } } +static u16 ipv6_get_exthdr_len(const struct sk_buff *skb, const u8 *ptr) +{ + u16 len; + + if (ptr + 2 > skb_tail_pointer(skb)) + return 0; + + len = (ptr[1] + 1) << 3; + + return (len <= skb_tail_pointer(skb) - ptr) ? len : 0; +} + void ip6_datagram_recv_specific_ctl(struct sock *sk, struct msghdr *msg, struct sk_buff *skb) { @@ -643,7 +655,10 @@ void ip6_datagram_recv_specific_ctl(struct sock *sk, struct msghdr *msg, /* HbH is allowed only once */ if (np->rxopt.bits.hopopts && (opt->flags & IP6SKB_HOPBYHOP)) { u8 *ptr = nh + sizeof(struct ipv6hdr); - put_cmsg(msg, SOL_IPV6, IPV6_HOPOPTS, (ptr[1]+1)<<3, ptr); + u16 len = ipv6_get_exthdr_len(skb, ptr); + + if (len) + put_cmsg(msg, SOL_IPV6, IPV6_HOPOPTS, len, ptr); } if (opt->lastopt && @@ -664,26 +679,37 @@ void ip6_datagram_recv_specific_ctl(struct sock *sk, struct msghdr *msg, unsigned int len; u8 *ptr = nh + off; + if (ptr + 2 > skb_tail_pointer(skb)) + return; + switch (nexthdr) { case IPPROTO_DSTOPTS: nexthdr = ptr[0]; - len = (ptr[1] + 1) << 3; + len = ipv6_get_exthdr_len(skb, ptr); + if (!len) + return; if (np->rxopt.bits.dstopts) put_cmsg(msg, SOL_IPV6, IPV6_DSTOPTS, len, ptr); break; case IPPROTO_ROUTING: nexthdr = ptr[0]; - len = (ptr[1] + 1) << 3; + len = ipv6_get_exthdr_len(skb, ptr); + if (!len) + return; if (np->rxopt.bits.srcrt) put_cmsg(msg, SOL_IPV6, IPV6_RTHDR, len, ptr); break; case IPPROTO_AH: nexthdr = ptr[0]; len = (ptr[1] + 2) << 2; + if (ptr + len > skb_tail_pointer(skb)) + return; break; default: nexthdr = ptr[0]; - len = (ptr[1] + 1) << 3; + len = ipv6_get_exthdr_len(skb, ptr); + if (!len) + return; break; } @@ -705,19 +731,31 @@ void ip6_datagram_recv_specific_ctl(struct sock *sk, struct msghdr *msg, } if (np->rxopt.bits.ohopopts && (opt->flags & IP6SKB_HOPBYHOP)) { u8 *ptr = nh + sizeof(struct ipv6hdr); - put_cmsg(msg, SOL_IPV6, IPV6_2292HOPOPTS, (ptr[1]+1)<<3, ptr); + u16 len = ipv6_get_exthdr_len(skb, ptr); + + if (len) + put_cmsg(msg, SOL_IPV6, IPV6_2292HOPOPTS, len, ptr); } if (np->rxopt.bits.odstopts && opt->dst0) { u8 *ptr = nh + opt->dst0; - put_cmsg(msg, SOL_IPV6, IPV6_2292DSTOPTS, (ptr[1]+1)<<3, ptr); + u16 len = ipv6_get_exthdr_len(skb, ptr); + + if (len) + put_cmsg(msg, SOL_IPV6, IPV6_2292DSTOPTS, len, ptr); } if (np->rxopt.bits.osrcrt && opt->srcrt) { struct ipv6_rt_hdr *rthdr = (struct ipv6_rt_hdr *)(nh + opt->srcrt); - put_cmsg(msg, SOL_IPV6, IPV6_2292RTHDR, (rthdr->hdrlen+1) << 3, rthdr); + u16 len = ipv6_get_exthdr_len(skb, (u8 *)rthdr); + + if (len) + put_cmsg(msg, SOL_IPV6, IPV6_2292RTHDR, len, rthdr); } if (np->rxopt.bits.odstopts && opt->dst1) { u8 *ptr = nh + opt->dst1; - put_cmsg(msg, SOL_IPV6, IPV6_2292DSTOPTS, (ptr[1]+1)<<3, ptr); + u16 len = ipv6_get_exthdr_len(skb, ptr); + + if (len) + put_cmsg(msg, SOL_IPV6, IPV6_2292DSTOPTS, len, ptr); } if (np->rxopt.bits.rxorigdstaddr) { struct sockaddr_in6 sin6; diff --git a/net/ipv6/esp6.c b/net/ipv6/esp6.c index 9c06c5a1419d..57481e423e59 100644 --- a/net/ipv6/esp6.c +++ b/net/ipv6/esp6.c @@ -448,8 +448,8 @@ int esp6_output_head(struct xfrm_state *x, struct sk_buff *skb, struct esp_info return err; } - if (ALIGN(tailen, L1_CACHE_BYTES) > PAGE_SIZE || - ALIGN(skb->data_len, L1_CACHE_BYTES) > PAGE_SIZE) + if (ALIGN(skb->data_len + tailen, L1_CACHE_BYTES) > + PAGE_SIZE) goto cow; if (!skb_cloned(skb)) { diff --git a/net/ipv6/exthdrs.c b/net/ipv6/exthdrs.c index cf90f933ca1a..43f46ef9c53b 100644 --- a/net/ipv6/exthdrs.c +++ b/net/ipv6/exthdrs.c @@ -184,6 +184,8 @@ static bool ip6_parse_tlv(bool hopbyhop, case IPV6_TLV_JUMBO: if (!ipv6_hop_jumbo(skb, off)) return false; + + nh = skb_network_header(skb); break; case IPV6_TLV_CALIPSO: if (!ipv6_hop_calipso(skb, off)) @@ -201,6 +203,8 @@ static bool ip6_parse_tlv(bool hopbyhop, case IPV6_TLV_HAO: if (!ipv6_dest_hao(skb, off)) return false; + + nh = skb_network_header(skb); break; #endif default: @@ -544,7 +548,7 @@ looped_back: * unsigned char which is segments_left field. Should not be * higher than that. */ - if (r || (n + 1) > 255) { + if (r || (n + 1) > 127) { kfree_skb(skb); return -1; } diff --git a/net/ipv6/ip6_vti.c b/net/ipv6/ip6_vti.c index ad5290be4dd6..df793c8bfffb 100644 --- a/net/ipv6/ip6_vti.c +++ b/net/ipv6/ip6_vti.c @@ -722,10 +722,11 @@ vti6_tnl_change(struct ip6_tnl *t, const struct __ip6_tnl_parm *p, static int vti6_update(struct ip6_tnl *t, struct __ip6_tnl_parm *p, bool keep_mtu) { - struct net *net = dev_net(t->dev); - struct vti6_net *ip6n = net_generic(net, vti6_net_id); + struct net *net = t->net; + struct vti6_net *ip6n; int err; + ip6n = net_generic(net, vti6_net_id); vti6_tnl_unlink(ip6n, t); synchronize_net(); err = vti6_tnl_change(t, p, keep_mtu); @@ -834,17 +835,24 @@ vti6_siocdevprivate(struct net_device *dev, struct ifreq *ifr, void __user *data if (p.proto != IPPROTO_IPV6 && p.proto != 0) break; vti6_parm_from_user(&p1, &p); - t = vti6_locate(net, &p1, cmd == SIOCADDTUNNEL); if (dev != ip6n->fb_tnl_dev && cmd == SIOCCHGTUNNEL) { + struct ip6_tnl *self = netdev_priv(dev); + + err = -EPERM; + if (!ns_capable(self->net->user_ns, CAP_NET_ADMIN)) + break; + t = vti6_locate(self->net, &p1, false); if (t) { if (t->dev != dev) { err = -EEXIST; break; } } else - t = netdev_priv(dev); + t = self; err = vti6_update(t, &p1, false); + } else { + t = vti6_locate(net, &p1, cmd == SIOCADDTUNNEL); } if (t) { err = 0; @@ -1031,11 +1039,12 @@ static int vti6_changelink(struct net_device *dev, struct nlattr *tb[], struct nlattr *data[], struct netlink_ext_ack *extack) { - struct ip6_tnl *t; + struct ip6_tnl *t = netdev_priv(dev); + struct net *net = t->net; struct __ip6_tnl_parm p; - struct net *net = dev_net(dev); - struct vti6_net *ip6n = net_generic(net, vti6_net_id); + struct vti6_net *ip6n; + ip6n = net_generic(net, vti6_net_id); if (dev == ip6n->fb_tnl_dev) return -EINVAL; diff --git a/net/ipv6/netfilter/nft_fib_ipv6.c b/net/ipv6/netfilter/nft_fib_ipv6.c index 8b2dba88ee96..c0a0075e2590 100644 --- a/net/ipv6/netfilter/nft_fib_ipv6.c +++ b/net/ipv6/netfilter/nft_fib_ipv6.c @@ -160,17 +160,33 @@ static bool nft_fib6_info_nh_dev_match(const struct net_device *nh_dev, l3mdev_master_ifindex_rcu(nh_dev) == dev->ifindex; } +static int nft_fib6_nh_match_dev_cb(struct fib6_nh *nh, void *arg) +{ + const struct net_device *dev = arg; + + return nft_fib6_info_nh_dev_match(nh->fib_nh_dev, dev); +} + static bool nft_fib6_info_nh_uses_dev(struct fib6_info *rt, const struct net_device *dev) { const struct net_device *nh_dev; struct fib6_info *iter; + /* External nexthop: fib6_siblings slot aliases nh_list, walk via nh. */ + if (rt->nh) + return nexthop_for_each_fib6_nh(rt->nh, + nft_fib6_nh_match_dev_cb, + (void *)dev); + nh_dev = fib6_info_nh_dev(rt); if (nft_fib6_info_nh_dev_match(nh_dev, dev)) return true; - list_for_each_entry(iter, &rt->fib6_siblings, fib6_siblings) { + if (!READ_ONCE(rt->fib6_nsiblings)) + return false; + + list_for_each_entry_rcu(iter, &rt->fib6_siblings, fib6_siblings) { nh_dev = fib6_info_nh_dev(iter); if (nft_fib6_info_nh_dev_match(nh_dev, dev)) diff --git a/net/ipv6/route.c b/net/ipv6/route.c index b106e5fef9cb..636f0120d7e3 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -481,6 +481,9 @@ void fib6_select_path(const struct net *net, struct fib6_result *res, const struct fib6_nh *nh = sibling->fib6_nh; int nh_upper_bound; + if (!READ_ONCE(first->fib6_nsiblings)) + break; + nh_upper_bound = atomic_read(&nh->fib_nh_upper_bound); if (hash > nh_upper_bound) continue; @@ -5902,6 +5905,8 @@ static int rt6_fill_node(struct net *net, struct sk_buff *skb, goto nla_put_failure; } + if (!READ_ONCE(rt->fib6_nsiblings)) + break; } rcu_read_unlock(); diff --git a/net/iucv/af_iucv.c b/net/iucv/af_iucv.c index 72dfccd4e3d5..c2dc3338670e 100644 --- a/net/iucv/af_iucv.c +++ b/net/iucv/af_iucv.c @@ -1540,7 +1540,7 @@ static int iucv_sock_getsockopt(struct socket *sock, int level, int optname, struct sock *sk = sock->sk; struct iucv_sock *iucv = iucv_sk(sk); unsigned int val; - int len; + int len, rc; if (level != SOL_IUCV) return -ENOPROTOOPT; @@ -1553,26 +1553,34 @@ static int iucv_sock_getsockopt(struct socket *sock, int level, int optname, len = min_t(unsigned int, len, sizeof(int)); + rc = 0; + + lock_sock(sk); switch (optname) { case SO_IPRMDATA_MSG: val = (iucv->flags & IUCV_IPRMDATA) ? 1 : 0; break; case SO_MSGLIMIT: - lock_sock(sk); val = (iucv->path != NULL) ? iucv->path->msglim /* connected */ : iucv->msglimit; /* default */ - release_sock(sk); break; case SO_MSGSIZE: - if (sk->sk_state == IUCV_OPEN) - return -EBADFD; + if (sk->sk_state == IUCV_OPEN) { + rc = -EBADFD; + break; + } val = (iucv->hs_dev) ? iucv->hs_dev->mtu - sizeof(struct af_iucv_trans_hdr) - ETH_HLEN : 0x7fffffff; break; default: - return -ENOPROTOOPT; + rc = -ENOPROTOOPT; + break; } + release_sock(sk); + + if (rc) + return rc; if (put_user(len, optlen)) return -EFAULT; diff --git a/net/key/af_key.c b/net/key/af_key.c index a166a88d8788..9cffeef18cd9 100644 --- a/net/key/af_key.c +++ b/net/key/af_key.c @@ -3564,7 +3564,7 @@ static int set_ipsecrequest(struct sk_buff *skb, #ifdef CONFIG_NET_KEY_MIGRATE static int pfkey_send_migrate(const struct xfrm_selector *sel, u8 dir, u8 type, const struct xfrm_migrate *m, int num_bundles, - const struct xfrm_kmaddress *k, + const struct xfrm_kmaddress *k, struct net *net, const struct xfrm_encap_tmpl *encap) { int i; @@ -3669,7 +3669,7 @@ static int pfkey_send_migrate(const struct xfrm_selector *sel, u8 dir, u8 type, } /* broadcast migrate message to sockets */ - pfkey_broadcast(skb, GFP_ATOMIC, BROADCAST_ALL, NULL, &init_net); + pfkey_broadcast(skb, GFP_ATOMIC, BROADCAST_ALL, NULL, net); return 0; @@ -3680,7 +3680,7 @@ err: #else static int pfkey_send_migrate(const struct xfrm_selector *sel, u8 dir, u8 type, const struct xfrm_migrate *m, int num_bundles, - const struct xfrm_kmaddress *k, + const struct xfrm_kmaddress *k, struct net *net, const struct xfrm_encap_tmpl *encap) { return -ENOPROTOOPT; diff --git a/net/l2tp/l2tp_core.c b/net/l2tp/l2tp_core.c index 1455f67e01dd..9419c8555d22 100644 --- a/net/l2tp/l2tp_core.c +++ b/net/l2tp/l2tp_core.c @@ -441,12 +441,13 @@ struct l2tp_session *l2tp_session_get_by_ifname(const struct net *net, idr_for_each_entry_ul(&pn->l2tp_tunnel_idr, tunnel, tmp, tunnel_id) { if (tunnel) { list_for_each_entry_rcu(session, &tunnel->session_list, list) { - if (!strcmp(session->ifname, ifname)) { - refcount_inc(&session->ref_count); - rcu_read_unlock_bh(); + if (strcmp(session->ifname, ifname)) + continue; + if (!refcount_inc_not_zero(&session->ref_count)) + continue; + rcu_read_unlock_bh(); - return session; - } + return session; } } } diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index 8ba5b22a1eef..b521b5ebd664 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c @@ -568,6 +568,13 @@ static void destroy_gre_conntrack(struct nf_conn *ct) #endif } +static void warn_on_keymap_list_leak(const struct net *net) +{ +#ifdef CONFIG_NF_CT_PROTO_GRE + WARN_ON_ONCE(!list_empty(&net->ct.nf_ct_proto.gre.keymap_list)); +#endif +} + void nf_ct_destroy(struct nf_conntrack *nfct) { struct nf_conn *ct = (struct nf_conn *)nfct; @@ -2510,6 +2517,7 @@ i_see_dead_people: } list_for_each_entry(net, net_exit_list, exit_list) { + warn_on_keymap_list_leak(net); nf_conntrack_ecache_pernet_fini(net); nf_conntrack_expect_pernet_fini(net); free_percpu(net->ct.stat); diff --git a/net/netfilter/nf_conntrack_pptp.c b/net/netfilter/nf_conntrack_pptp.c index 4c679638df06..dc23e4181618 100644 --- a/net/netfilter/nf_conntrack_pptp.c +++ b/net/netfilter/nf_conntrack_pptp.c @@ -225,13 +225,9 @@ static int exp_gre(struct nf_conn *ct, __be16 callid, __be16 peer_callid) if (nf_ct_expect_related(exp_reply, 0) != 0) goto out_unexpect_orig; - /* Add GRE keymap entries */ - if (nf_ct_gre_keymap_add(ct, IP_CT_DIR_ORIGINAL, &exp_orig->tuple) != 0) + if (!nf_ct_gre_keymap_add(ct, &exp_orig->tuple, + &exp_reply->tuple)) goto out_unexpect_both; - if (nf_ct_gre_keymap_add(ct, IP_CT_DIR_REPLY, &exp_reply->tuple) != 0) { - nf_ct_gre_keymap_destroy(ct); - goto out_unexpect_both; - } ret = 0; out_put_both: diff --git a/net/netfilter/nf_conntrack_proto_gre.c b/net/netfilter/nf_conntrack_proto_gre.c index 94c19bc4edc5..35e22082d65a 100644 --- a/net/netfilter/nf_conntrack_proto_gre.c +++ b/net/netfilter/nf_conntrack_proto_gre.c @@ -87,41 +87,97 @@ static __be16 gre_keymap_lookup(struct net *net, struct nf_conntrack_tuple *t) return key; } -/* add a single keymap entry, associate with specified master ct */ -int nf_ct_gre_keymap_add(struct nf_conn *ct, enum ip_conntrack_dir dir, - struct nf_conntrack_tuple *t) +enum nf_ct_gre_km_act { + NF_CT_GRE_KM_NEW, + NF_CT_GRE_KM_BAD, + NF_CT_GRE_KM_DUP +}; + +static enum nf_ct_gre_km_act +nf_ct_gre_km_acceptable(const struct nf_ct_pptp_master *ct_pptp_info, + const struct nf_conntrack_tuple *orig, + const struct nf_conntrack_tuple *repl) +{ + struct nf_ct_gre_keymap *km_orig, *km_repl; + + lockdep_assert_held(&keymap_lock); + + km_orig = ct_pptp_info->keymap[IP_CT_DIR_ORIGINAL]; + km_repl = ct_pptp_info->keymap[IP_CT_DIR_REPLY]; + + if (km_orig && km_repl) { + if (!gre_key_cmpfn(km_orig, orig)) + return NF_CT_GRE_KM_BAD; + + if (!gre_key_cmpfn(km_repl, repl)) + return NF_CT_GRE_KM_BAD; + + return NF_CT_GRE_KM_DUP; + } + + DEBUG_NET_WARN_ON_ONCE(km_orig); + DEBUG_NET_WARN_ON_ONCE(km_repl); + return NF_CT_GRE_KM_NEW; +} + +/* add keymap entries, associate with specified master ct */ +bool nf_ct_gre_keymap_add(struct nf_conn *ct, + const struct nf_conntrack_tuple *orig, + const struct nf_conntrack_tuple *repl) { struct net *net = nf_ct_net(ct); struct nf_gre_net *net_gre = gre_pernet(net); struct nf_ct_pptp_master *ct_pptp_info = nfct_help_data(ct); - struct nf_ct_gre_keymap **kmp, *km; - - kmp = &ct_pptp_info->keymap[dir]; - if (*kmp) { - /* check whether it's a retransmission */ - list_for_each_entry_rcu(km, &net_gre->keymap_list, list) { - if (gre_key_cmpfn(km, t) && km == *kmp) - return 0; - } - pr_debug("trying to override keymap_%s for ct %p\n", - dir == IP_CT_DIR_REPLY ? "reply" : "orig", ct); - return -EEXIST; - } + struct nf_ct_gre_keymap *km_orig, *km_repl; + bool ret = false; - km = kmalloc_obj(*km, GFP_ATOMIC); - if (!km) - return -ENOMEM; - memcpy(&km->tuple, t, sizeof(*t)); - *kmp = km; + km_orig = kmalloc_obj(*km_orig, GFP_ATOMIC); + if (!km_orig) + return false; + km_repl = kmalloc_obj(*km_repl, GFP_ATOMIC); + if (!km_repl) + goto km_free; - pr_debug("adding new entry %p: ", km); - nf_ct_dump_tuple(&km->tuple); + memcpy(&km_orig->tuple, orig, sizeof(*orig)); + memcpy(&km_repl->tuple, repl, sizeof(*repl)); spin_lock_bh(&keymap_lock); - list_add_tail(&km->list, &net_gre->keymap_list); + if (nf_ct_is_dying(ct)) + goto unlock_free; + + switch (nf_ct_gre_km_acceptable(ct_pptp_info, orig, repl)) { + case NF_CT_GRE_KM_NEW: + break; + case NF_CT_GRE_KM_DUP: + ret = true; + goto unlock_free; + case NF_CT_GRE_KM_BAD: + pr_debug("trying to override keymap for ct %p\n", ct); + goto unlock_free; + } + + if (ct_pptp_info->keymap[IP_CT_DIR_ORIGINAL] || + ct_pptp_info->keymap[IP_CT_DIR_REPLY]) + goto unlock_free; + + pr_debug("adding new entries %p,%p: ", km_orig, km_repl); + nf_ct_dump_tuple(&km_orig->tuple); + nf_ct_dump_tuple(&km_repl->tuple); + + list_add_tail_rcu(&km_orig->list, &net_gre->keymap_list); + list_add_tail_rcu(&km_repl->list, &net_gre->keymap_list); + ct_pptp_info->keymap[IP_CT_DIR_ORIGINAL] = km_orig; + ct_pptp_info->keymap[IP_CT_DIR_REPLY] = km_repl; spin_unlock_bh(&keymap_lock); - return 0; + return true; + +unlock_free: + spin_unlock_bh(&keymap_lock); +km_free: + kfree(km_orig); + kfree(km_repl); + return ret; } EXPORT_SYMBOL_GPL(nf_ct_gre_keymap_add); diff --git a/net/netfilter/nf_conntrack_proto_tcp.c b/net/netfilter/nf_conntrack_proto_tcp.c index b67426c2189b..e99ab1e88e9f 100644 --- a/net/netfilter/nf_conntrack_proto_tcp.c +++ b/net/netfilter/nf_conntrack_proto_tcp.c @@ -1221,7 +1221,8 @@ int nf_conntrack_tcp_packet(struct nf_conn *ct, new_state = old_state; } if (((test_bit(IPS_SEEN_REPLY_BIT, &ct->status) - && ct->proto.tcp.last_index == TCP_SYN_SET) + && ct->proto.tcp.last_index == TCP_SYN_SET + && ct->proto.tcp.last_dir != dir) || (!test_bit(IPS_ASSURED_BIT, &ct->status) && ct->proto.tcp.last_index == TCP_ACK_SET)) && ntohl(th->ack_seq) == ct->proto.tcp.last_end) { diff --git a/net/netfilter/nf_synproxy_core.c b/net/netfilter/nf_synproxy_core.c index 57f57e2fc80a..036c8586f49b 100644 --- a/net/netfilter/nf_synproxy_core.c +++ b/net/netfilter/nf_synproxy_core.c @@ -200,6 +200,8 @@ synproxy_tstamp_adjust(struct sk_buff *skb, unsigned int protoff, if (skb_ensure_writable(skb, optend)) return 0; + th = (struct tcphdr *)(skb->data + protoff); + while (optoff < optend) { unsigned char *op = skb->data + optoff; diff --git a/net/netfilter/nfnetlink_queue.c b/net/netfilter/nfnetlink_queue.c index 984a0eb9e149..60ab88d45096 100644 --- a/net/netfilter/nfnetlink_queue.c +++ b/net/netfilter/nfnetlink_queue.c @@ -1141,6 +1141,9 @@ nfqnl_mangle(void *data, unsigned int data_len, struct nf_queue_entry *e, int di { struct sk_buff *nskb; + if (e->state.net->user_ns != &init_user_ns) + return -EPERM; + if (diff < 0) { unsigned int min_len = skb_transport_offset(e->skb); @@ -1537,8 +1540,7 @@ static int nfqnl_recv_verdict(struct sk_buff *skb, const struct nfnl_info *info, if (nfqnl_mangle(nla_data(nfqa[NFQA_PAYLOAD]), payload_len, entry, diff) < 0) verdict = NF_DROP; - - if (ct && diff) + else if (ct && diff) nfnl_ct->seq_adjust(entry->skb, ct, ctinfo, diff); } diff --git a/net/netfilter/nft_bitwise.c b/net/netfilter/nft_bitwise.c index 94dccdcfa06b..785b8e9731d1 100644 --- a/net/netfilter/nft_bitwise.c +++ b/net/netfilter/nft_bitwise.c @@ -43,8 +43,10 @@ static void nft_bitwise_eval_lshift(u32 *dst, const u32 *src, u32 carry = 0; for (i = DIV_ROUND_UP(priv->len, sizeof(u32)); i > 0; i--) { - dst[i - 1] = (src[i - 1] << shift) | carry; - carry = src[i - 1] >> (BITS_PER_TYPE(u32) - shift); + u32 tmp_src = src[i - 1]; + + dst[i - 1] = (tmp_src << shift) | carry; + carry = tmp_src >> (BITS_PER_TYPE(u32) - shift); } } @@ -56,8 +58,10 @@ static void nft_bitwise_eval_rshift(u32 *dst, const u32 *src, u32 carry = 0; for (i = 0; i < DIV_ROUND_UP(priv->len, sizeof(u32)); i++) { - dst[i] = carry | (src[i] >> shift); - carry = src[i] << (BITS_PER_TYPE(u32) - shift); + u32 tmp_src = src[i]; + + dst[i] = carry | (tmp_src >> shift); + carry = tmp_src << (BITS_PER_TYPE(u32) - shift); } } @@ -235,6 +239,9 @@ static int nft_bitwise_init_bool(const struct nft_ctx *ctx, &priv->sreg2, priv->len); if (err < 0) return err; + + if (nft_reg_overlap(priv->sreg2, priv->dreg, priv->len)) + return -EINVAL; } return 0; @@ -265,6 +272,9 @@ static int nft_bitwise_init(const struct nft_ctx *ctx, if (err < 0) return err; + if (nft_reg_overlap(priv->sreg, priv->dreg, priv->len)) + return -EINVAL; + if (tb[NFTA_BITWISE_OP]) { priv->op = ntohl(nla_get_be32(tb[NFTA_BITWISE_OP])); switch (priv->op) { diff --git a/net/netfilter/nft_byteorder.c b/net/netfilter/nft_byteorder.c index e00dddfa2fc0..2316c77f4228 100644 --- a/net/netfilter/nft_byteorder.c +++ b/net/netfilter/nft_byteorder.c @@ -144,9 +144,16 @@ static int nft_byteorder_init(const struct nft_ctx *ctx, if (err < 0) return err; - return nft_parse_register_store(ctx, tb[NFTA_BYTEORDER_DREG], - &priv->dreg, NULL, NFT_DATA_VALUE, - priv->len); + err = nft_parse_register_store(ctx, tb[NFTA_BYTEORDER_DREG], + &priv->dreg, NULL, NFT_DATA_VALUE, + priv->len); + if (err < 0) + return err; + + if (nft_reg_overlap(priv->sreg, priv->dreg, priv->len)) + return -EINVAL; + + return 0; } static int nft_byteorder_dump(struct sk_buff *skb, diff --git a/net/netfilter/nft_payload.c b/net/netfilter/nft_payload.c index 01e13e5255a9..484a5490832e 100644 --- a/net/netfilter/nft_payload.c +++ b/net/netfilter/nft_payload.c @@ -917,6 +917,9 @@ static int nft_payload_set_init(const struct nft_ctx *ctx, struct nft_payload_set *priv = nft_expr_priv(expr); int err; + if (ctx->net->user_ns != &init_user_ns) + return -EPERM; + priv->base = ntohl(nla_get_be32(tb[NFTA_PAYLOAD_BASE])); priv->len = ntohl(nla_get_be32(tb[NFTA_PAYLOAD_LEN])); diff --git a/net/netfilter/xt_cpu.c b/net/netfilter/xt_cpu.c index 3bdc302a0f91..9cb259902a58 100644 --- a/net/netfilter/xt_cpu.c +++ b/net/netfilter/xt_cpu.c @@ -34,7 +34,7 @@ static bool cpu_mt(const struct sk_buff *skb, struct xt_action_param *par) { const struct xt_cpu_info *info = par->matchinfo; - return (info->cpu == smp_processor_id()) ^ info->invert; + return (info->cpu == raw_smp_processor_id()) ^ info->invert; } static struct xt_match cpu_mt_reg __read_mostly = { diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c index 2aeb0680807d..7269e23b578d 100644 --- a/net/netlink/af_netlink.c +++ b/net/netlink/af_netlink.c @@ -1482,9 +1482,14 @@ static void do_one_broadcast(struct sock *sk, p->skb2 = NULL; goto out; } - NETLINK_CB(p->skb2).nsid = peernet2id(sock_net(sk), p->net); - if (NETLINK_CB(p->skb2).nsid != NETNSA_NSID_NOT_ASSIGNED) - NETLINK_CB(p->skb2).nsid_is_set = true; + + NETLINK_CB(p->skb2).nsid_is_set = false; + if (!net_eq(sock_net(sk), p->net)) { + NETLINK_CB(p->skb2).nsid = peernet2id(sock_net(sk), p->net); + if (NETLINK_CB(p->skb2).nsid != NETNSA_NSID_NOT_ASSIGNED) + NETLINK_CB(p->skb2).nsid_is_set = true; + } + val = netlink_broadcast_deliver(sk, p->skb2); if (val < 0) { netlink_overrun(sk); diff --git a/net/nfc/hci/core.c b/net/nfc/hci/core.c index 0d33c81a15fe..ba6f0310ffd7 100644 --- a/net/nfc/hci/core.c +++ b/net/nfc/hci/core.c @@ -861,6 +861,11 @@ static void nfc_hci_recv_from_llc(struct nfc_hci_dev *hdev, struct sk_buff *skb) struct sk_buff *frag_skb; int msg_len; + if (!pskb_may_pull(skb, NFC_HCI_HCP_PACKET_HEADER_LEN)) { + kfree_skb(skb); + return; + } + packet = (struct hcp_packet *)skb->data; if ((packet->header & ~NFC_HCI_FRAGMENT) == 0) { skb_queue_tail(&hdev->rx_hcp_frags, skb); @@ -904,6 +909,11 @@ static void nfc_hci_recv_from_llc(struct nfc_hci_dev *hdev, struct sk_buff *skb) * unblock waiting cmd context. Otherwise, enqueue to dispatch * in separate context where handler can also execute command. */ + if (!pskb_may_pull(hcp_skb, NFC_HCI_HCP_HEADER_LEN)) { + kfree_skb(hcp_skb); + return; + } + packet = (struct hcp_packet *)hcp_skb->data; type = HCP_MSG_GET_TYPE(packet->message.header); if (type == NFC_HCI_HCP_RESPONSE) { diff --git a/net/nfc/llcp_core.c b/net/nfc/llcp_core.c index db5bc6a878dd..dc65c719f35f 100644 --- a/net/nfc/llcp_core.c +++ b/net/nfc/llcp_core.c @@ -1218,6 +1218,15 @@ static void nfc_llcp_recv_cc(struct nfc_llcp_local *local, sk = &llcp_sock->sk; + lock_sock(sk); + + /* Check if socket was destroyed whilst waiting for the lock */ + if (!sk_hashed(sk)) { + release_sock(sk); + nfc_llcp_sock_put(llcp_sock); + return; + } + /* Unlink from connecting and link to the client array */ nfc_llcp_sock_unlink(&local->connecting_sockets, sk); nfc_llcp_sock_link(&local->sockets, sk); @@ -1229,6 +1238,8 @@ static void nfc_llcp_recv_cc(struct nfc_llcp_local *local, sk->sk_state = LLCP_CONNECTED; sk->sk_state_change(sk); + release_sock(sk); + nfc_llcp_sock_put(llcp_sock); } diff --git a/net/nfc/llcp_sock.c b/net/nfc/llcp_sock.c index f1be1e84f665..feab29fc62f4 100644 --- a/net/nfc/llcp_sock.c +++ b/net/nfc/llcp_sock.c @@ -633,6 +633,8 @@ static int llcp_sock_release(struct socket *sock) if (sock->type == SOCK_RAW) nfc_llcp_sock_unlink(&local->raw_sockets, sk); + else if (sk->sk_state == LLCP_CONNECTING) + nfc_llcp_sock_unlink(&local->connecting_sockets, sk); else nfc_llcp_sock_unlink(&local->sockets, sk); diff --git a/net/nfc/nci/hci.c b/net/nfc/nci/hci.c index 40ae8e5a7ec7..c03e8a0bd3bd 100644 --- a/net/nfc/nci/hci.c +++ b/net/nfc/nci/hci.c @@ -439,6 +439,11 @@ void nci_hci_data_received_cb(void *context, return; } + if (!pskb_may_pull(skb, NCI_HCI_HCP_PACKET_HEADER_LEN)) { + kfree_skb(skb); + return; + } + packet = (struct nci_hcp_packet *)skb->data; if ((packet->header & ~NCI_HCI_FRAGMENT) == 0) { skb_queue_tail(&ndev->hci_dev->rx_hcp_frags, skb); @@ -482,6 +487,11 @@ void nci_hci_data_received_cb(void *context, * unblock waiting cmd context. Otherwise, enqueue to dispatch * in separate context where handler can also execute command. */ + if (!pskb_may_pull(hcp_skb, NCI_HCI_HCP_HEADER_LEN)) { + kfree_skb(hcp_skb); + return; + } + packet = (struct nci_hcp_packet *)hcp_skb->data; type = NCI_HCP_MSG_GET_TYPE(packet->message.header); if (type == NCI_HCI_HCP_RESPONSE) { diff --git a/net/sched/act_mirred.c b/net/sched/act_mirred.c index 2c5a7a321a94..553342c55cf7 100644 --- a/net/sched/act_mirred.c +++ b/net/sched/act_mirred.c @@ -26,6 +26,10 @@ #include <net/tc_act/tc_mirred.h> #include <net/tc_wrapper.h> +#define MIRRED_DEFER_LIMIT 3 +_Static_assert(MIRRED_DEFER_LIMIT <= 3, + "MIRRED_DEFER_LIMIT exceeds tc_depth bitfield width"); + static LIST_HEAD(mirred_list); static DEFINE_SPINLOCK(mirred_list_lock); @@ -234,12 +238,15 @@ tcf_mirred_forward(bool at_ingress, bool want_ingress, struct sk_buff *skb) { int err; - if (!want_ingress) + if (!want_ingress) { err = tcf_dev_queue_xmit(skb, dev_queue_xmit); - else if (!at_ingress) - err = netif_rx(skb); - else - err = netif_receive_skb(skb); + } else { + skb->tc_depth++; + if (!at_ingress) + err = netif_rx(skb); + else + err = netif_receive_skb(skb); + } return err; } @@ -365,7 +372,8 @@ assign_prev: dev_is_mac_header_xmit(dev_prev), m_eaction, retval); - return retval; + /* If the packet wasn't redirected, we have to register as a drop */ + return TC_ACT_SHOT; } static int tcf_blockcast_mirror(struct sk_buff *skb, struct tcf_mirred *m, @@ -389,14 +397,12 @@ static int tcf_blockcast_mirror(struct sk_buff *skb, struct tcf_mirred *m, static int tcf_blockcast(struct sk_buff *skb, struct tcf_mirred *m, const u32 blockid, struct tcf_result *res, - int retval) + int m_eaction, int retval) { const u32 exception_ifindex = skb->dev->ifindex; struct tcf_block *block; bool is_redirect; - int m_eaction; - m_eaction = READ_ONCE(m->tcfm_eaction); is_redirect = tcf_mirred_is_act_redirect(m_eaction); /* we are already under rcu protection, so can call block lookup @@ -405,7 +411,7 @@ static int tcf_blockcast(struct sk_buff *skb, struct tcf_mirred *m, block = tcf_block_lookup(dev_net(skb->dev), blockid); if (!block || xa_empty(&block->ports)) { tcf_action_inc_overlimit_qstats(&m->common); - return retval; + return is_redirect ? TC_ACT_SHOT : retval; } if (is_redirect) @@ -423,9 +429,10 @@ TC_INDIRECT_SCOPE int tcf_mirred_act(struct sk_buff *skb, { struct tcf_mirred *m = to_mirred(a); int retval = READ_ONCE(m->tcf_action); + bool m_mac_header_xmit, is_redirect; struct netdev_xmit *xmit; - bool m_mac_header_xmit; struct net_device *dev; + bool want_ingress; int i, m_eaction; u32 blockid; @@ -434,7 +441,8 @@ TC_INDIRECT_SCOPE int tcf_mirred_act(struct sk_buff *skb, #else xmit = this_cpu_ptr(&softnet_data.xmit); #endif - if (unlikely(xmit->sched_mirred_nest >= MIRRED_NEST_LIMIT)) { + if (unlikely(xmit->sched_mirred_nest >= MIRRED_NEST_LIMIT || + skb->tc_depth >= MIRRED_DEFER_LIMIT)) { net_warn_ratelimited("Packet exceeded mirred recursion limit on dev %s\n", netdev_name(skb->dev)); return TC_ACT_SHOT; @@ -444,34 +452,51 @@ TC_INDIRECT_SCOPE int tcf_mirred_act(struct sk_buff *skb, tcf_action_update_bstats(&m->common, skb); blockid = READ_ONCE(m->tcfm_blockid); - if (blockid) - return tcf_blockcast(skb, m, blockid, res, retval); + m_eaction = READ_ONCE(m->tcfm_eaction); + want_ingress = tcf_mirred_act_wants_ingress(m_eaction); + if (blockid) { + if (!want_ingress) + xmit->sched_mirred_dev[xmit->sched_mirred_nest++] = NULL; + retval = tcf_blockcast(skb, m, blockid, res, m_eaction, retval); + if (!want_ingress) + xmit->sched_mirred_nest--; + return retval; + } + + is_redirect = tcf_mirred_is_act_redirect(m_eaction); dev = rcu_dereference_bh(m->tcfm_dev); if (unlikely(!dev)) { pr_notice_once("tc mirred: target device is gone\n"); tcf_action_inc_overlimit_qstats(&m->common); - return retval; - } - for (i = 0; i < xmit->sched_mirred_nest; i++) { - if (xmit->sched_mirred_dev[i] != dev) - continue; - pr_notice_once("tc mirred: loop on device %s\n", - netdev_name(dev)); - tcf_action_inc_overlimit_qstats(&m->common); - return retval; + goto err_out; } - xmit->sched_mirred_dev[xmit->sched_mirred_nest++] = dev; + if (!want_ingress) { + for (i = 0; i < xmit->sched_mirred_nest; i++) { + if (xmit->sched_mirred_dev[i] != dev) + continue; + pr_notice_once("tc mirred: loop on device %s\n", + netdev_name(dev)); + tcf_action_inc_overlimit_qstats(&m->common); + goto err_out; + } + xmit->sched_mirred_dev[xmit->sched_mirred_nest++] = dev; + } m_mac_header_xmit = READ_ONCE(m->tcfm_mac_header_xmit); - m_eaction = READ_ONCE(m->tcfm_eaction); retval = tcf_mirred_to_dev(skb, m, dev, m_mac_header_xmit, m_eaction, retval); - xmit->sched_mirred_nest--; + if (!want_ingress) + xmit->sched_mirred_nest--; return retval; + +err_out: + if (is_redirect) + retval = TC_ACT_SHOT; + return retval; } static void tcf_stats_update(struct tc_action *a, u64 bytes, u64 packets, diff --git a/net/sched/sch_netem.c b/net/sched/sch_netem.c index bc18e1976b6e..17a79fe2f091 100644 --- a/net/sched/sch_netem.c +++ b/net/sched/sch_netem.c @@ -461,7 +461,8 @@ static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch, skb->prev = NULL; /* Random duplication */ - if (q->duplicate && q->duplicate >= get_crandom(&q->dup_cor, &q->prng)) + if (q->duplicate && skb->tc_depth == 0 && + q->duplicate >= get_crandom(&q->dup_cor, &q->prng)) ++count; /* Drop packet? */ @@ -540,11 +541,9 @@ static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch, */ if (skb2) { struct Qdisc *rootq = qdisc_root_bh(sch); - u32 dupsave = q->duplicate; /* prevent duplicating a dup... */ - q->duplicate = 0; + skb2->tc_depth++; /* prevent duplicating a dup... */ rootq->enqueue(skb2, rootq, to_free); - q->duplicate = dupsave; skb2 = NULL; } @@ -1007,41 +1006,6 @@ static int parse_attr(struct nlattr *tb[], int maxtype, struct nlattr *nla, return 0; } -static const struct Qdisc_class_ops netem_class_ops; - -static int check_netem_in_tree(struct Qdisc *sch, bool duplicates, - struct netlink_ext_ack *extack) -{ - struct Qdisc *root, *q; - unsigned int i; - - root = qdisc_root_sleeping(sch); - - if (sch != root && root->ops->cl_ops == &netem_class_ops) { - if (duplicates || - ((struct netem_sched_data *)qdisc_priv(root))->duplicate) - goto err; - } - - if (!qdisc_dev(root)) - return 0; - - hash_for_each(qdisc_dev(root)->qdisc_hash, i, q, hash) { - if (sch != q && q->ops->cl_ops == &netem_class_ops) { - if (duplicates || - ((struct netem_sched_data *)qdisc_priv(q))->duplicate) - goto err; - } - } - - return 0; - -err: - NL_SET_ERR_MSG(extack, - "netem: cannot mix duplicating netems with other netems in tree"); - return -EINVAL; -} - /* Parse netlink message to set options */ static int netem_change(struct Qdisc *sch, struct nlattr *opt, struct netlink_ext_ack *extack) @@ -1118,11 +1082,6 @@ static int netem_change(struct Qdisc *sch, struct nlattr *opt, q->gap = qopt->gap; q->counter = 0; q->loss = qopt->loss; - - ret = check_netem_in_tree(sch, qopt->duplicate, extack); - if (ret) - goto unlock; - q->duplicate = qopt->duplicate; /* for compatibility with earlier versions. diff --git a/net/sctp/socket.c b/net/sctp/socket.c index 1d2568bb6bc2..66e12fb0c646 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -9403,6 +9403,8 @@ static int sctp_wait_for_connect(struct sctp_association *asoc, long *timeo_p) release_sock(sk); current_timeo = schedule_timeout(current_timeo); lock_sock(sk); + if (sk != asoc->base.sk) + goto do_error; *timeo_p = current_timeo; } diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c index dffbd529762d..b5db69073e20 100644 --- a/net/smc/af_smc.c +++ b/net/smc/af_smc.c @@ -188,10 +188,12 @@ static bool smc_hs_congested(const struct sock *sk) struct smc_hashinfo smc_v4_hashinfo = { .lock = __RW_LOCK_UNLOCKED(smc_v4_hashinfo.lock), + .ht = HLIST_HEAD_INIT, }; struct smc_hashinfo smc_v6_hashinfo = { .lock = __RW_LOCK_UNLOCKED(smc_v6_hashinfo.lock), + .ht = HLIST_HEAD_INIT, }; int smc_hash_sk(struct sock *sk) @@ -3517,8 +3519,6 @@ static int __init smc_init(void) pr_err("%s: sock_register fails with %d\n", __func__, rc); goto out_proto6; } - INIT_HLIST_HEAD(&smc_v4_hashinfo.ht); - INIT_HLIST_HEAD(&smc_v6_hashinfo.ht); rc = smc_ib_register_client(); if (rc) { diff --git a/net/sunrpc/cache.c b/net/sunrpc/cache.c index b5474ce534fb..27dd6b58b8ff 100644 --- a/net/sunrpc/cache.c +++ b/net/sunrpc/cache.c @@ -1348,6 +1348,9 @@ static void *__cache_seq_start(struct seq_file *m, loff_t *pos) hash = n >> 32; entry = n & ((1LL<<32) - 1); + if (hash >= cd->hash_size) + return NULL; + hlist_for_each_entry_rcu(ch, &cd->hash_table[hash], cache_list) if (!entry--) return ch; diff --git a/net/vmw_vsock/af_vsock.c b/net/vmw_vsock/af_vsock.c index 44037b066a5f..2ce1063d4a67 100644 --- a/net/vmw_vsock/af_vsock.c +++ b/net/vmw_vsock/af_vsock.c @@ -642,7 +642,7 @@ int vsock_assign_transport(struct vsock_sock *vsk, struct vsock_sock *psk) */ sock_reset_flag(sk, SOCK_DONE); sk->sk_state = TCP_CLOSE; - vsk->peer_shutdown = 0; + WRITE_ONCE(vsk->peer_shutdown, 0); } if (sk->sk_type == SOCK_SEQPACKET) { @@ -933,7 +933,7 @@ static struct sock *__vsock_create(struct net *net, vsk->rejected = false; vsk->sent_request = false; vsk->ignore_connecting_rst = false; - vsk->peer_shutdown = 0; + WRITE_ONCE(vsk->peer_shutdown, 0); INIT_DELAYED_WORK(&vsk->connect_work, vsock_connect_timeout); INIT_DELAYED_WORK(&vsk->pending_work, vsock_pending_work); @@ -1241,6 +1241,25 @@ out: return err; } +static __poll_t vsock_poll_shutdown(struct sock *sk, u32 peer_shutdown) +{ + __poll_t mask = 0; + + /* INET sockets treat local write shutdown and peer write shutdown as a + * case of EPOLLHUP set. + */ + if (sk->sk_shutdown == SHUTDOWN_MASK || + ((sk->sk_shutdown & SEND_SHUTDOWN) && + (peer_shutdown & SEND_SHUTDOWN))) + mask |= EPOLLHUP; + + if (sk->sk_shutdown & RCV_SHUTDOWN || + peer_shutdown & SEND_SHUTDOWN) + mask |= EPOLLRDHUP; + + return mask; +} + static __poll_t vsock_poll(struct file *file, struct socket *sock, poll_table *wait) { @@ -1258,24 +1277,17 @@ static __poll_t vsock_poll(struct file *file, struct socket *sock, /* Signify that there has been an error on this socket. */ mask |= EPOLLERR; - /* INET sockets treat local write shutdown and peer write shutdown as a - * case of EPOLLHUP set. - */ - if ((sk->sk_shutdown == SHUTDOWN_MASK) || - ((sk->sk_shutdown & SEND_SHUTDOWN) && - (vsk->peer_shutdown & SEND_SHUTDOWN))) { - mask |= EPOLLHUP; - } - - if (sk->sk_shutdown & RCV_SHUTDOWN || - vsk->peer_shutdown & SEND_SHUTDOWN) { - mask |= EPOLLRDHUP; - } - if (sk_is_readable(sk)) mask |= EPOLLIN | EPOLLRDNORM; if (sock->type == SOCK_DGRAM) { + u32 peer_shutdown = READ_ONCE(vsk->peer_shutdown); + + /* DGRAM sockets do not take lock_sock() in poll(), so use one + * lockless snapshot for all shutdown-derived mask bits. + */ + mask |= vsock_poll_shutdown(sk, peer_shutdown); + /* For datagram sockets we can read if there is something in * the queue and write as long as the socket isn't shutdown for * sending. @@ -1290,6 +1302,7 @@ static __poll_t vsock_poll(struct file *file, struct socket *sock, } else if (sock_type_connectible(sk->sk_type)) { const struct vsock_transport *transport; + u32 peer_shutdown; lock_sock(sk); @@ -1322,8 +1335,10 @@ static __poll_t vsock_poll(struct file *file, struct socket *sock, * terminated should also be considered read, and we check the * shutdown flag for that. */ + peer_shutdown = READ_ONCE(vsk->peer_shutdown); + mask |= vsock_poll_shutdown(sk, peer_shutdown); if (sk->sk_shutdown & RCV_SHUTDOWN || - vsk->peer_shutdown & SEND_SHUTDOWN) { + peer_shutdown & SEND_SHUTDOWN) { mask |= EPOLLIN | EPOLLRDNORM; } diff --git a/net/vmw_vsock/hyperv_transport.c b/net/vmw_vsock/hyperv_transport.c index 7a8963595bf9..b3394946b2ed 100644 --- a/net/vmw_vsock/hyperv_transport.c +++ b/net/vmw_vsock/hyperv_transport.c @@ -264,7 +264,7 @@ static void hvs_do_close_lock_held(struct vsock_sock *vsk, struct sock *sk = sk_vsock(vsk); sock_set_flag(sk, SOCK_DONE); - vsk->peer_shutdown = SHUTDOWN_MASK; + WRITE_ONCE(vsk->peer_shutdown, SHUTDOWN_MASK); if (vsock_stream_has_data(vsk) <= 0) sk->sk_state = TCP_CLOSING; sk->sk_state_change(sk); @@ -593,7 +593,9 @@ static int hvs_update_recv_data(struct hvsock *hvs) return -EIO; if (payload_len == 0) - hvs->vsk->peer_shutdown |= SEND_SHUTDOWN; + WRITE_ONCE(hvs->vsk->peer_shutdown, + READ_ONCE(hvs->vsk->peer_shutdown) | + SEND_SHUTDOWN); hvs->recv_data_len = payload_len; hvs->recv_data_off = 0; @@ -736,7 +738,8 @@ static s64 hvs_stream_has_data(struct vsock_sock *vsk) return ret; return hvs->recv_data_len; case 0: - vsk->peer_shutdown |= SEND_SHUTDOWN; + WRITE_ONCE(vsk->peer_shutdown, + READ_ONCE(vsk->peer_shutdown) | SEND_SHUTDOWN); ret = 0; break; default: /* -1 */ diff --git a/net/vmw_vsock/virtio_transport_common.c b/net/vmw_vsock/virtio_transport_common.c index df3b418e0392..b10666937c49 100644 --- a/net/vmw_vsock/virtio_transport_common.c +++ b/net/vmw_vsock/virtio_transport_common.c @@ -205,6 +205,7 @@ static u16 virtio_transport_get_type(struct sock *sk) static struct sk_buff *virtio_transport_alloc_skb(struct virtio_vsock_pkt_info *info, size_t payload_len, bool zcopy, + struct ubuf_info *uarg, u32 src_cid, u32 src_port, u32 dst_cid, @@ -245,6 +246,12 @@ static struct sk_buff *virtio_transport_alloc_skb(struct virtio_vsock_pkt_info * if (info->msg && payload_len > 0) { int err; + /* Bind the zerocopy lifetime before filling frags so error + * rollback frees managed fixed-buffer pages through + * the uarg-aware path. + */ + skb_zcopy_set(skb, uarg, NULL); + err = virtio_transport_fill_skb(skb, info, payload_len, zcopy); if (err) goto out; @@ -364,6 +371,7 @@ static int virtio_transport_send_pkt_info(struct vsock_sock *vsk, skb_len = min(max_skb_len, rest_len); skb = virtio_transport_alloc_skb(info, skb_len, can_zcopy, + uarg, src_cid, src_port, dst_cid, dst_port); if (!skb) { @@ -371,8 +379,6 @@ static int virtio_transport_send_pkt_info(struct vsock_sock *vsk, break; } - skb_zcopy_set(skb, uarg, NULL); - virtio_transport_inc_tx_pkt(vvs, skb); ret = t_ops->send_pkt(skb, info->net); @@ -417,7 +423,7 @@ static int virtio_transport_send_pkt_info(struct vsock_sock *vsk, static bool virtio_transport_inc_rx_pkt(struct virtio_vsock_sock *vvs, u32 len) { - u64 skb_overhead = (skb_queue_len(&vvs->rx_queue) + 1) * SKB_TRUESIZE(0); + u64 skb_overhead = ((u64)skb_queue_len(&vvs->rx_queue) + 1) * SKB_TRUESIZE(0); /* Allow at most buf_alloc * 2 total budget (payload + overhead), * similar to how SO_RCVBUF is doubled to reserve space for sk_buff @@ -1183,7 +1189,7 @@ static int virtio_transport_reset_no_sock(const struct virtio_transport *t, if (!t) return -ENOTCONN; - reply = virtio_transport_alloc_skb(&info, 0, false, + reply = virtio_transport_alloc_skb(&info, 0, false, NULL, le64_to_cpu(hdr->dst_cid), le32_to_cpu(hdr->dst_port), le64_to_cpu(hdr->src_cid), @@ -1228,7 +1234,7 @@ static void virtio_transport_do_close(struct vsock_sock *vsk, struct sock *sk = sk_vsock(vsk); sock_set_flag(sk, SOCK_DONE); - vsk->peer_shutdown = SHUTDOWN_MASK; + WRITE_ONCE(vsk->peer_shutdown, SHUTDOWN_MASK); if (vsock_stream_has_data(vsk) <= 0) sk->sk_state = TCP_CLOSING; sk->sk_state_change(sk); @@ -1431,12 +1437,15 @@ virtio_transport_recv_connected(struct sock *sk, case VIRTIO_VSOCK_OP_CREDIT_UPDATE: sk->sk_write_space(sk); break; - case VIRTIO_VSOCK_OP_SHUTDOWN: + case VIRTIO_VSOCK_OP_SHUTDOWN: { + u32 peer_shutdown = READ_ONCE(vsk->peer_shutdown); + if (le32_to_cpu(hdr->flags) & VIRTIO_VSOCK_SHUTDOWN_RCV) - vsk->peer_shutdown |= RCV_SHUTDOWN; + peer_shutdown |= RCV_SHUTDOWN; if (le32_to_cpu(hdr->flags) & VIRTIO_VSOCK_SHUTDOWN_SEND) - vsk->peer_shutdown |= SEND_SHUTDOWN; - if (vsk->peer_shutdown == SHUTDOWN_MASK) { + peer_shutdown |= SEND_SHUTDOWN; + WRITE_ONCE(vsk->peer_shutdown, peer_shutdown); + if (peer_shutdown == SHUTDOWN_MASK) { if (vsock_stream_has_data(vsk) <= 0 && !sock_flag(sk, SOCK_DONE)) { (void)virtio_transport_reset(vsk, NULL); virtio_transport_do_close(vsk, true); @@ -1451,6 +1460,7 @@ virtio_transport_recv_connected(struct sock *sk, if (le32_to_cpu(virtio_vsock_hdr(skb)->flags)) sk->sk_state_change(sk); break; + } case VIRTIO_VSOCK_OP_RST: virtio_transport_do_close(vsk, true); break; diff --git a/net/vmw_vsock/vmci_transport.c b/net/vmw_vsock/vmci_transport.c index d2579380f51e..5c1ecd5bfdbc 100644 --- a/net/vmw_vsock/vmci_transport.c +++ b/net/vmw_vsock/vmci_transport.c @@ -819,7 +819,7 @@ static void vmci_transport_handle_detach(struct sock *sk) /* On a detach the peer will not be sending or receiving * anymore. */ - vsk->peer_shutdown = SHUTDOWN_MASK; + WRITE_ONCE(vsk->peer_shutdown, SHUTDOWN_MASK); /* We should not be sending anymore since the peer won't be * there to receive, but we can still receive if there is data @@ -1542,7 +1542,9 @@ static int vmci_transport_recv_connected(struct sock *sk, if (pkt->u.mode) { vsk = vsock_sk(sk); - vsk->peer_shutdown |= pkt->u.mode; + WRITE_ONCE(vsk->peer_shutdown, + READ_ONCE(vsk->peer_shutdown) | + pkt->u.mode); sk->sk_state_change(sk); } break; @@ -1559,7 +1561,7 @@ static int vmci_transport_recv_connected(struct sock *sk, * a clean shutdown. */ sock_set_flag(sk, SOCK_DONE); - vsk->peer_shutdown = SHUTDOWN_MASK; + WRITE_ONCE(vsk->peer_shutdown, SHUTDOWN_MASK); if (vsock_stream_has_data(vsk) <= 0) sk->sk_state = TCP_CLOSING; diff --git a/net/xfrm/xfrm_input.c b/net/xfrm/xfrm_input.c index f65291eba1f6..e4c2cd24936d 100644 --- a/net/xfrm/xfrm_input.c +++ b/net/xfrm/xfrm_input.c @@ -797,9 +797,12 @@ static void xfrm_trans_reinject(struct work_struct *work) spin_unlock_bh(&trans->queue_lock); local_bh_disable(); - while ((skb = __skb_dequeue(&queue))) - XFRM_TRANS_SKB_CB(skb)->finish(XFRM_TRANS_SKB_CB(skb)->net, - NULL, skb); + while ((skb = __skb_dequeue(&queue))) { + struct net *net = XFRM_TRANS_SKB_CB(skb)->net; + + XFRM_TRANS_SKB_CB(skb)->finish(net, NULL, skb); + put_net(net); + } local_bh_enable(); } @@ -808,6 +811,7 @@ int xfrm_trans_queue_net(struct net *net, struct sk_buff *skb, struct sk_buff *)) { struct xfrm_trans_tasklet *trans; + struct net *hold_net; trans = this_cpu_ptr(&xfrm_trans_tasklet); @@ -816,8 +820,12 @@ int xfrm_trans_queue_net(struct net *net, struct sk_buff *skb, BUILD_BUG_ON(sizeof(struct xfrm_trans_cb) > sizeof(skb->cb)); + hold_net = maybe_get_net(net); + if (!hold_net) + return -ENODEV; + XFRM_TRANS_SKB_CB(skb)->finish = finish; - XFRM_TRANS_SKB_CB(skb)->net = net; + XFRM_TRANS_SKB_CB(skb)->net = hold_net; spin_lock_bh(&trans->queue_lock); __skb_queue_tail(&trans->queue, skb); spin_unlock_bh(&trans->queue_lock); diff --git a/net/xfrm/xfrm_ipcomp.c b/net/xfrm/xfrm_ipcomp.c index 5f38dff16177..671d48f8c937 100644 --- a/net/xfrm/xfrm_ipcomp.c +++ b/net/xfrm/xfrm_ipcomp.c @@ -51,11 +51,15 @@ static int ipcomp_post_acomp(struct sk_buff *skb, int err, int hlen) struct scatterlist *dsg; int len, dlen; - if (unlikely(err)) - goto out_free_req; + if (unlikely(!req)) + return err; extra = acomp_request_extra(req); dsg = extra->sg; + + if (unlikely(err)) + goto out_free_req; + dlen = req->dlen; pskb_trim_unique(skb, 0); @@ -84,10 +88,10 @@ static int ipcomp_post_acomp(struct sk_buff *skb, int err, int hlen) skb_shinfo(skb)->nr_frags++; } while ((dlen -= len)); - for (; dsg; dsg = sg_next(dsg)) +out_free_req: + for (; dsg && sg_page(dsg); dsg = sg_next(dsg)) __free_page(sg_page(dsg)); -out_free_req: acomp_request_free(req); return err; } diff --git a/net/xfrm/xfrm_iptfs.c b/net/xfrm/xfrm_iptfs.c index 97bc979e55ba..6c6bbc040517 100644 --- a/net/xfrm/xfrm_iptfs.c +++ b/net/xfrm/xfrm_iptfs.c @@ -2650,7 +2650,8 @@ static void __iptfs_init_state(struct xfrm_state *x, x->props.enc_hdr_len = sizeof(struct ip_iptfs_hdr); /* Always keep a module reference when x->mode_data is set */ - __module_get(x->mode_cbs->owner); + if (x->mode_data != xtfs) + __module_get(x->mode_cbs->owner); x->mode_data = xtfs; xtfs->x = x; @@ -2658,22 +2659,39 @@ static void __iptfs_init_state(struct xfrm_state *x, static int iptfs_clone_state(struct xfrm_state *x, struct xfrm_state *orig) { + struct skb_wseq *w_saved = NULL; struct xfrm_iptfs_data *xtfs; xtfs = kmemdup(orig->mode_data, sizeof(*xtfs), GFP_KERNEL); if (!xtfs) return -ENOMEM; - xtfs->ra_newskb = NULL; if (xtfs->cfg.reorder_win_size) { - xtfs->w_saved = kzalloc_objs(*xtfs->w_saved, - xtfs->cfg.reorder_win_size); - if (!xtfs->w_saved) { + w_saved = kzalloc_objs(*w_saved, xtfs->cfg.reorder_win_size); + if (!w_saved) { kfree_sensitive(xtfs); return -ENOMEM; } } + xtfs->w_saved = w_saved; + + __skb_queue_head_init(&xtfs->queue); + xtfs->queue_size = 0; + hrtimer_setup(&xtfs->iptfs_timer, iptfs_delay_timer, CLOCK_MONOTONIC, + IPTFS_HRTIMER_MODE); + + spin_lock_init(&xtfs->drop_lock); + hrtimer_setup(&xtfs->drop_timer, iptfs_drop_timer, CLOCK_MONOTONIC, + IPTFS_HRTIMER_MODE); + xtfs->w_seq_set = false; + xtfs->w_wantseq = 0; + xtfs->w_savedlen = 0; + xtfs->ra_newskb = NULL; + xtfs->ra_wantseq = 0; + xtfs->ra_runtlen = 0; + + __module_get(x->mode_cbs->owner); x->mode_data = xtfs; xtfs->x = x; diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index c944327ce66c..dd09d2063da2 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -4276,21 +4276,21 @@ out_byidx: return -ENOMEM; } -static void xfrm_policy_fini(struct net *net) +static void __net_exit xfrm_net_pre_exit(struct net *net) { - struct xfrm_pol_inexact_bin *b, *t; - unsigned int sz; - int dir; - disable_work_sync(&net->xfrm.policy_hthresh.work); - flush_work(&net->xfrm.policy_hash_work); #ifdef CONFIG_XFRM_SUB_POLICY xfrm_policy_flush(net, XFRM_POLICY_TYPE_SUB, false); #endif xfrm_policy_flush(net, XFRM_POLICY_TYPE_MAIN, false); +} - synchronize_rcu(); +static void xfrm_policy_fini(struct net *net) +{ + struct xfrm_pol_inexact_bin *b, *t; + unsigned int sz; + int dir; WARN_ON(!list_empty(&net->xfrm.policy_all)); @@ -4368,6 +4368,7 @@ static void __net_exit xfrm_net_exit(struct net *net) static struct pernet_operations __net_initdata xfrm_net_ops = { .init = xfrm_net_init, + .pre_exit = xfrm_net_pre_exit, .exit = xfrm_net_exit, }; @@ -4703,7 +4704,7 @@ int xfrm_migrate(const struct xfrm_selector *sel, u8 dir, u8 type, } /* Stage 5 - announce */ - km_migrate(sel, dir, type, m, num_migrate, k, encap); + km_migrate(sel, dir, type, m, num_migrate, k, net, encap); xfrm_pol_put(pol); diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c index 686014d39429..589c3b6e4679 100644 --- a/net/xfrm/xfrm_state.c +++ b/net/xfrm/xfrm_state.c @@ -2837,7 +2837,7 @@ EXPORT_SYMBOL(km_policy_expired); #ifdef CONFIG_XFRM_MIGRATE int km_migrate(const struct xfrm_selector *sel, u8 dir, u8 type, const struct xfrm_migrate *m, int num_migrate, - const struct xfrm_kmaddress *k, + const struct xfrm_kmaddress *k, struct net *net, const struct xfrm_encap_tmpl *encap) { int err = -EINVAL; @@ -2848,7 +2848,7 @@ int km_migrate(const struct xfrm_selector *sel, u8 dir, u8 type, list_for_each_entry_rcu(km, &xfrm_km_list, list) { if (km->migrate) { ret = km->migrate(sel, dir, type, m, num_migrate, k, - encap); + net, encap); if (!ret) err = ret; } @@ -3114,10 +3114,14 @@ u32 xfrm_state_mtu(struct xfrm_state *x, int mtu) const struct xfrm_type *type = READ_ONCE(x->type); struct crypto_aead *aead; u32 blksize, net_adj = 0; + u32 overhead, payload_mtu; if (x->km.state != XFRM_STATE_VALID || - !type || type->proto != IPPROTO_ESP) + !type || type->proto != IPPROTO_ESP) { + if (mtu <= x->props.header_len) + return 1; return mtu - x->props.header_len; + } aead = x->data; blksize = ALIGN(crypto_aead_blocksize(aead), 4); @@ -3140,8 +3144,17 @@ u32 xfrm_state_mtu(struct xfrm_state *x, int mtu) break; } - return ((mtu - x->props.header_len - crypto_aead_authsize(aead) - - net_adj) & ~(blksize - 1)) + net_adj - 2; + overhead = x->props.header_len + crypto_aead_authsize(aead) + net_adj; + if (mtu <= overhead) + return 1; + + payload_mtu = mtu - overhead; + payload_mtu &= ~(blksize - 1); + if (payload_mtu <= 2) + return 1; + + return payload_mtu + net_adj - 2; + } EXPORT_SYMBOL_GPL(xfrm_state_mtu); diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c index 38a90e5ee3d9..71a4b7278eba 100644 --- a/net/xfrm/xfrm_user.c +++ b/net/xfrm/xfrm_user.c @@ -3271,10 +3271,9 @@ out_cancel: static int xfrm_send_migrate(const struct xfrm_selector *sel, u8 dir, u8 type, const struct xfrm_migrate *m, int num_migrate, - const struct xfrm_kmaddress *k, + const struct xfrm_kmaddress *k, struct net *net, const struct xfrm_encap_tmpl *encap) { - struct net *net = &init_net; struct sk_buff *skb; int err; @@ -3292,7 +3291,7 @@ static int xfrm_send_migrate(const struct xfrm_selector *sel, u8 dir, u8 type, #else static int xfrm_send_migrate(const struct xfrm_selector *sel, u8 dir, u8 type, const struct xfrm_migrate *m, int num_migrate, - const struct xfrm_kmaddress *k, + const struct xfrm_kmaddress *k, struct net *net, const struct xfrm_encap_tmpl *encap) { return -ENOPROTOOPT; |
