diff options
| author | Linus Torvalds <torvalds@linux-foundation.org> | 2026-02-26 08:00:13 -0800 |
|---|---|---|
| committer | Linus Torvalds <torvalds@linux-foundation.org> | 2026-02-26 08:00:13 -0800 |
| commit | b9c8fc2caea6ff7e45c6942de8fee53515c66b34 (patch) | |
| tree | b5f52beddc862ff040c0fb8250b1e4fc6243fa40 /net | |
| parent | f4d0ec0aa20d49f09dc01d82894ce80d72de0560 (diff) | |
| parent | baed0d9ba91d4f390da12d5039128ee897253d60 (diff) | |
Merge tag 'net-7.0-rc2' of git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net
Pull networking fixes from Paolo Abeni:
"Including fixes from IPsec, Bluetooth and netfilter
Current release - regressions:
- wifi: fix dev_alloc_name() return value check
- rds: fix recursive lock in rds_tcp_conn_slots_available
Current release - new code bugs:
- vsock: lock down child_ns_mode as write-once
Previous releases - regressions:
- core:
- do not pass flow_id to set_rps_cpu()
- consume xmit errors of GSO frames
- netconsole: avoid OOB reads, msg is not nul-terminated
- netfilter: h323: fix OOB read in decode_choice()
- tcp: re-enable acceptance of FIN packets when RWIN is 0
- udplite: fix null-ptr-deref in __udp_enqueue_schedule_skb().
- wifi: brcmfmac: fix potential kernel oops when probe fails
- phy: register phy led_triggers during probe to avoid AB-BA deadlock
- eth:
- bnxt_en: fix deleting of Ntuple filters
- wan: farsync: fix use-after-free bugs caused by unfinished tasklets
- xscale: check for PTP support properly
Previous releases - always broken:
- tcp: fix potential race in tcp_v6_syn_recv_sock()
- kcm: fix zero-frag skb in frag_list on partial sendmsg error
- xfrm:
- fix race condition in espintcp_close()
- always flush state and policy upon NETDEV_UNREGISTER event
- bluetooth:
- purge error queues in socket destructors
- fix response to L2CAP_ECRED_CONN_REQ
- eth:
- mlx5:
- fix circular locking dependency in dump
- fix "scheduling while atomic" in IPsec MAC address query
- gve: fix incorrect buffer cleanup for QPL
- team: avoid NETDEV_CHANGEMTU event when unregistering slave
- usb: validate USB endpoints"
* tag 'net-7.0-rc2' of git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net: (72 commits)
netfilter: nf_conntrack_h323: fix OOB read in decode_choice()
dpaa2-switch: validate num_ifs to prevent out-of-bounds write
net: consume xmit errors of GSO frames
vsock: document write-once behavior of the child_ns_mode sysctl
vsock: lock down child_ns_mode as write-once
selftests/vsock: change tests to respect write-once child ns mode
net/mlx5e: Fix "scheduling while atomic" in IPsec MAC address query
net/mlx5: Fix missing devlink lock in SRIOV enable error path
net/mlx5: E-switch, Clear legacy flag when moving to switchdev
net/mlx5: LAG, disable MPESW in lag_disable_change()
net/mlx5: DR, Fix circular locking dependency in dump
selftests: team: Add a reference count leak test
team: avoid NETDEV_CHANGEMTU event when unregistering slave
net: mana: Fix double destroy_workqueue on service rescan PCI path
MAINTAINERS: Update maintainer entry for QUALCOMM ETHQOS ETHERNET DRIVER
dpll: zl3073x: Remove redundant cleanup in devm_dpll_init()
selftests/net: packetdrill: Verify acceptance of FIN packets when RWIN is 0
tcp: re-enable acceptance of FIN packets when RWIN is 0
vsock: Use container_of() to get net namespace in sysctl handlers
net: usb: kaweth: validate USB endpoints
...
Diffstat (limited to 'net')
37 files changed, 325 insertions, 166 deletions
diff --git a/net/bluetooth/hci_sock.c b/net/bluetooth/hci_sock.c index 4e7bf63af9c5..0290dea081f6 100644 --- a/net/bluetooth/hci_sock.c +++ b/net/bluetooth/hci_sock.c @@ -2166,6 +2166,7 @@ static void hci_sock_destruct(struct sock *sk) mgmt_cleanup(sk); skb_queue_purge(&sk->sk_receive_queue); skb_queue_purge(&sk->sk_write_queue); + skb_queue_purge(&sk->sk_error_queue); } static const struct proto_ops hci_sock_ops = { diff --git a/net/bluetooth/hci_sync.c b/net/bluetooth/hci_sync.c index 44c205ff9fa8..121dbc8208ec 100644 --- a/net/bluetooth/hci_sync.c +++ b/net/bluetooth/hci_sync.c @@ -4592,7 +4592,7 @@ static int hci_le_set_host_features_sync(struct hci_dev *hdev) { int err; - if (iso_capable(hdev)) { + if (cis_capable(hdev)) { /* Connected Isochronous Channels (Host Support) */ err = hci_le_set_host_feature_sync(hdev, 32, (iso_enabled(hdev) ? 0x01 : diff --git a/net/bluetooth/iso.c b/net/bluetooth/iso.c index ec61db89936b..be145e2736b7 100644 --- a/net/bluetooth/iso.c +++ b/net/bluetooth/iso.c @@ -746,6 +746,7 @@ static void iso_sock_destruct(struct sock *sk) skb_queue_purge(&sk->sk_receive_queue); skb_queue_purge(&sk->sk_write_queue); + skb_queue_purge(&sk->sk_error_queue); } static void iso_sock_cleanup_listen(struct sock *parent) diff --git a/net/bluetooth/l2cap_core.c b/net/bluetooth/l2cap_core.c index 2ad1cb4cbde7..ad98db9632fd 100644 --- a/net/bluetooth/l2cap_core.c +++ b/net/bluetooth/l2cap_core.c @@ -4916,6 +4916,13 @@ static int l2cap_le_connect_req(struct l2cap_conn *conn, goto response_unlock; } + /* Check if Key Size is sufficient for the security level */ + if (!l2cap_check_enc_key_size(conn->hcon, pchan)) { + result = L2CAP_CR_LE_BAD_KEY_SIZE; + chan = NULL; + goto response_unlock; + } + /* Check for valid dynamic CID range */ if (scid < L2CAP_CID_DYN_START || scid > L2CAP_CID_LE_DYN_END) { result = L2CAP_CR_LE_INVALID_SCID; @@ -5051,13 +5058,15 @@ static inline int l2cap_ecred_conn_req(struct l2cap_conn *conn, struct l2cap_chan *chan, *pchan; u16 mtu, mps; __le16 psm; - u8 result, len = 0; + u8 result, rsp_len = 0; int i, num_scid; bool defer = false; if (!enable_ecred) return -EINVAL; + memset(pdu, 0, sizeof(*pdu)); + if (cmd_len < sizeof(*req) || (cmd_len - sizeof(*req)) % sizeof(u16)) { result = L2CAP_CR_LE_INVALID_PARAMS; goto response; @@ -5066,6 +5075,9 @@ static inline int l2cap_ecred_conn_req(struct l2cap_conn *conn, cmd_len -= sizeof(*req); num_scid = cmd_len / sizeof(u16); + /* Always respond with the same number of scids as in the request */ + rsp_len = cmd_len; + if (num_scid > L2CAP_ECRED_MAX_CID) { result = L2CAP_CR_LE_INVALID_PARAMS; goto response; @@ -5075,7 +5087,7 @@ static inline int l2cap_ecred_conn_req(struct l2cap_conn *conn, mps = __le16_to_cpu(req->mps); if (mtu < L2CAP_ECRED_MIN_MTU || mps < L2CAP_ECRED_MIN_MPS) { - result = L2CAP_CR_LE_UNACCEPT_PARAMS; + result = L2CAP_CR_LE_INVALID_PARAMS; goto response; } @@ -5095,8 +5107,6 @@ static inline int l2cap_ecred_conn_req(struct l2cap_conn *conn, BT_DBG("psm 0x%2.2x mtu %u mps %u", __le16_to_cpu(psm), mtu, mps); - memset(pdu, 0, sizeof(*pdu)); - /* Check if we have socket listening on psm */ pchan = l2cap_global_chan_by_psm(BT_LISTEN, psm, &conn->hcon->src, &conn->hcon->dst, LE_LINK); @@ -5109,7 +5119,16 @@ static inline int l2cap_ecred_conn_req(struct l2cap_conn *conn, if (!smp_sufficient_security(conn->hcon, pchan->sec_level, SMP_ALLOW_STK)) { - result = L2CAP_CR_LE_AUTHENTICATION; + result = pchan->sec_level == BT_SECURITY_MEDIUM ? + L2CAP_CR_LE_ENCRYPTION : L2CAP_CR_LE_AUTHENTICATION; + goto unlock; + } + + /* Check if the listening channel has set an output MTU then the + * requested MTU shall be less than or equal to that value. + */ + if (pchan->omtu && mtu < pchan->omtu) { + result = L2CAP_CR_LE_UNACCEPT_PARAMS; goto unlock; } @@ -5121,7 +5140,6 @@ static inline int l2cap_ecred_conn_req(struct l2cap_conn *conn, BT_DBG("scid[%d] 0x%4.4x", i, scid); pdu->dcid[i] = 0x0000; - len += sizeof(*pdu->dcid); /* Check for valid dynamic CID range */ if (scid < L2CAP_CID_DYN_START || scid > L2CAP_CID_LE_DYN_END) { @@ -5188,7 +5206,7 @@ response: return 0; l2cap_send_cmd(conn, cmd->ident, L2CAP_ECRED_CONN_RSP, - sizeof(*pdu) + len, pdu); + sizeof(*pdu) + rsp_len, pdu); return 0; } @@ -5310,14 +5328,14 @@ static inline int l2cap_ecred_reconf_req(struct l2cap_conn *conn, struct l2cap_ecred_reconf_req *req = (void *) data; struct l2cap_ecred_reconf_rsp rsp; u16 mtu, mps, result; - struct l2cap_chan *chan; + struct l2cap_chan *chan[L2CAP_ECRED_MAX_CID] = {}; int i, num_scid; if (!enable_ecred) return -EINVAL; - if (cmd_len < sizeof(*req) || cmd_len - sizeof(*req) % sizeof(u16)) { - result = L2CAP_CR_LE_INVALID_PARAMS; + if (cmd_len < sizeof(*req) || (cmd_len - sizeof(*req)) % sizeof(u16)) { + result = L2CAP_RECONF_INVALID_CID; goto respond; } @@ -5327,42 +5345,69 @@ static inline int l2cap_ecred_reconf_req(struct l2cap_conn *conn, BT_DBG("mtu %u mps %u", mtu, mps); if (mtu < L2CAP_ECRED_MIN_MTU) { - result = L2CAP_RECONF_INVALID_MTU; + result = L2CAP_RECONF_INVALID_PARAMS; goto respond; } if (mps < L2CAP_ECRED_MIN_MPS) { - result = L2CAP_RECONF_INVALID_MPS; + result = L2CAP_RECONF_INVALID_PARAMS; goto respond; } cmd_len -= sizeof(*req); num_scid = cmd_len / sizeof(u16); + + if (num_scid > L2CAP_ECRED_MAX_CID) { + result = L2CAP_RECONF_INVALID_PARAMS; + goto respond; + } + result = L2CAP_RECONF_SUCCESS; + /* Check if each SCID, MTU and MPS are valid */ for (i = 0; i < num_scid; i++) { u16 scid; scid = __le16_to_cpu(req->scid[i]); - if (!scid) - return -EPROTO; + if (!scid) { + result = L2CAP_RECONF_INVALID_CID; + goto respond; + } - chan = __l2cap_get_chan_by_dcid(conn, scid); - if (!chan) - continue; + chan[i] = __l2cap_get_chan_by_dcid(conn, scid); + if (!chan[i]) { + result = L2CAP_RECONF_INVALID_CID; + goto respond; + } - /* If the MTU value is decreased for any of the included - * channels, then the receiver shall disconnect all - * included channels. + /* The MTU field shall be greater than or equal to the greatest + * current MTU size of these channels. */ - if (chan->omtu > mtu) { - BT_ERR("chan %p decreased MTU %u -> %u", chan, - chan->omtu, mtu); + if (chan[i]->omtu > mtu) { + BT_ERR("chan %p decreased MTU %u -> %u", chan[i], + chan[i]->omtu, mtu); result = L2CAP_RECONF_INVALID_MTU; + goto respond; } - chan->omtu = mtu; - chan->remote_mps = mps; + /* If more than one channel is being configured, the MPS field + * shall be greater than or equal to the current MPS size of + * each of these channels. If only one channel is being + * configured, the MPS field may be less than the current MPS + * of that channel. + */ + if (chan[i]->remote_mps >= mps && i) { + BT_ERR("chan %p decreased MPS %u -> %u", chan[i], + chan[i]->remote_mps, mps); + result = L2CAP_RECONF_INVALID_MPS; + goto respond; + } + } + + /* Commit the new MTU and MPS values after checking they are valid */ + for (i = 0; i < num_scid; i++) { + chan[i]->omtu = mtu; + chan[i]->remote_mps = mps; } respond: diff --git a/net/bluetooth/l2cap_sock.c b/net/bluetooth/l2cap_sock.c index 25f097cb1685..597686790371 100644 --- a/net/bluetooth/l2cap_sock.c +++ b/net/bluetooth/l2cap_sock.c @@ -1029,10 +1029,17 @@ static int l2cap_sock_setsockopt(struct socket *sock, int level, int optname, break; } - /* Setting is not supported as it's the remote side that - * decides this. - */ - err = -EPERM; + /* Only allow setting output MTU when not connected */ + if (sk->sk_state == BT_CONNECTED) { + err = -EISCONN; + break; + } + + err = copy_safe_from_sockptr(&mtu, sizeof(mtu), optval, optlen); + if (err) + break; + + chan->omtu = mtu; break; case BT_RCVMTU: @@ -1816,6 +1823,7 @@ static void l2cap_sock_destruct(struct sock *sk) skb_queue_purge(&sk->sk_receive_queue); skb_queue_purge(&sk->sk_write_queue); + skb_queue_purge(&sk->sk_error_queue); } static void l2cap_skb_msg_name(struct sk_buff *skb, void *msg_name, diff --git a/net/bluetooth/sco.c b/net/bluetooth/sco.c index 70a378ff168e..e7db50165879 100644 --- a/net/bluetooth/sco.c +++ b/net/bluetooth/sco.c @@ -470,6 +470,7 @@ static void sco_sock_destruct(struct sock *sk) skb_queue_purge(&sk->sk_receive_queue); skb_queue_purge(&sk->sk_write_queue); + skb_queue_purge(&sk->sk_error_queue); } static void sco_sock_cleanup_listen(struct sock *parent) diff --git a/net/core/dev.c b/net/core/dev.c index 6ff4256700e6..c1a9f7fdcffa 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -4822,6 +4822,8 @@ int __dev_queue_xmit(struct sk_buff *skb, struct net_device *sb_dev) * to -1 or to their cpu id, but not to our id. */ if (READ_ONCE(txq->xmit_lock_owner) != cpu) { + bool is_list = false; + if (dev_xmit_recursion()) goto recursion_alert; @@ -4832,17 +4834,28 @@ int __dev_queue_xmit(struct sk_buff *skb, struct net_device *sb_dev) HARD_TX_LOCK(dev, txq, cpu); if (!netif_xmit_stopped(txq)) { + is_list = !!skb->next; + dev_xmit_recursion_inc(); skb = dev_hard_start_xmit(skb, dev, txq, &rc); dev_xmit_recursion_dec(); - if (dev_xmit_complete(rc)) { - HARD_TX_UNLOCK(dev, txq); - goto out; - } + + /* GSO segments a single SKB into + * a list of frames. TCP expects error + * to mean none of the data was sent. + */ + if (is_list) + rc = NETDEV_TX_OK; } HARD_TX_UNLOCK(dev, txq); + if (!skb) /* xmit completed */ + goto out; + net_crit_ratelimited("Virtual device %s asks to queue packet!\n", dev->name); + /* NETDEV_TX_BUSY or queue was stopped */ + if (!is_list) + rc = -ENETDOWN; } else { /* Recursion is detected! It is possible, * unfortunately @@ -4850,10 +4863,10 @@ int __dev_queue_xmit(struct sk_buff *skb, struct net_device *sb_dev) recursion_alert: net_crit_ratelimited("Dead loop on virtual device %s, fix it urgently!\n", dev->name); + rc = -ENETDOWN; } } - rc = -ENETDOWN; rcu_read_unlock_bh(); dev_core_stats_tx_dropped_inc(dev); @@ -4992,8 +5005,7 @@ static bool rps_flow_is_active(struct rps_dev_flow *rflow, static struct rps_dev_flow * set_rps_cpu(struct net_device *dev, struct sk_buff *skb, - struct rps_dev_flow *rflow, u16 next_cpu, u32 hash, - u32 flow_id) + struct rps_dev_flow *rflow, u16 next_cpu, u32 hash) { if (next_cpu < nr_cpu_ids) { u32 head; @@ -5004,6 +5016,7 @@ set_rps_cpu(struct net_device *dev, struct sk_buff *skb, struct rps_dev_flow *tmp_rflow; unsigned int tmp_cpu; u16 rxq_index; + u32 flow_id; int rc; /* Should we steer this flow to a different hardware queue? */ @@ -5019,6 +5032,7 @@ set_rps_cpu(struct net_device *dev, struct sk_buff *skb, if (!flow_table) goto out; + flow_id = rfs_slot(hash, flow_table); tmp_rflow = &flow_table->flows[flow_id]; tmp_cpu = READ_ONCE(tmp_rflow->cpu); @@ -5066,7 +5080,6 @@ static int get_rps_cpu(struct net_device *dev, struct sk_buff *skb, struct rps_dev_flow_table *flow_table; struct rps_map *map; int cpu = -1; - u32 flow_id; u32 tcpu; u32 hash; @@ -5113,8 +5126,7 @@ static int get_rps_cpu(struct net_device *dev, struct sk_buff *skb, /* OK, now we know there is a match, * we can look at the local (per receive queue) flow table */ - flow_id = rfs_slot(hash, flow_table); - rflow = &flow_table->flows[flow_id]; + rflow = &flow_table->flows[rfs_slot(hash, flow_table)]; tcpu = rflow->cpu; /* @@ -5133,8 +5145,7 @@ static int get_rps_cpu(struct net_device *dev, struct sk_buff *skb, ((int)(READ_ONCE(per_cpu(softnet_data, tcpu).input_queue_head) - rflow->last_qtail)) >= 0)) { tcpu = next_cpu; - rflow = set_rps_cpu(dev, skb, rflow, next_cpu, hash, - flow_id); + rflow = set_rps_cpu(dev, skb, rflow, next_cpu, hash); } if (tcpu < nr_cpu_ids && cpu_online(tcpu)) { diff --git a/net/core/skbuff.c b/net/core/skbuff.c index dc47d3efc72e..0e217041958a 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -5590,15 +5590,28 @@ static void __skb_complete_tx_timestamp(struct sk_buff *skb, static bool skb_may_tx_timestamp(struct sock *sk, bool tsonly) { - bool ret; + struct socket *sock; + struct file *file; + bool ret = false; if (likely(tsonly || READ_ONCE(sock_net(sk)->core.sysctl_tstamp_allow_data))) return true; - read_lock_bh(&sk->sk_callback_lock); - ret = sk->sk_socket && sk->sk_socket->file && - file_ns_capable(sk->sk_socket->file, &init_user_ns, CAP_NET_RAW); - read_unlock_bh(&sk->sk_callback_lock); + /* The sk pointer remains valid as long as the skb is. The sk_socket and + * file pointer may become NULL if the socket is closed. Both structures + * (including file->cred) are RCU freed which means they can be accessed + * within a RCU read section. + */ + rcu_read_lock(); + sock = READ_ONCE(sk->sk_socket); + if (!sock) + goto out; + file = READ_ONCE(sock->file); + if (!file) + goto out; + ret = file_ns_capable(file, &init_user_ns, CAP_NET_RAW); +out: + rcu_read_unlock(); return ret; } diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c index 569befcf021b..061751aabc8e 100644 --- a/net/ipv4/syncookies.c +++ b/net/ipv4/syncookies.c @@ -203,7 +203,7 @@ struct sock *tcp_get_cookie_sock(struct sock *sk, struct sk_buff *skb, bool own_req; child = icsk->icsk_af_ops->syn_recv_sock(sk, skb, req, dst, - NULL, &own_req); + NULL, &own_req, NULL); if (child) { refcount_set(&req->rsk_refcnt, 1); sock_rps_save_rxhash(child, skb); diff --git a/net/ipv4/tcp_fastopen.c b/net/ipv4/tcp_fastopen.c index 1b7ba2cf3efe..9fdc19accafd 100644 --- a/net/ipv4/tcp_fastopen.c +++ b/net/ipv4/tcp_fastopen.c @@ -333,7 +333,7 @@ static struct sock *tcp_fastopen_create_child(struct sock *sk, bool own_req; child = inet_csk(sk)->icsk_af_ops->syn_recv_sock(sk, skb, req, NULL, - NULL, &own_req); + NULL, &own_req, NULL); if (!child) return NULL; diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 65a7a5ea8eb7..41c57efd125c 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -4858,15 +4858,24 @@ static enum skb_drop_reason tcp_disordered_ack_check(const struct sock *sk, */ static enum skb_drop_reason tcp_sequence(const struct sock *sk, - u32 seq, u32 end_seq) + u32 seq, u32 end_seq, + const struct tcphdr *th) { const struct tcp_sock *tp = tcp_sk(sk); + u32 seq_limit; if (before(end_seq, tp->rcv_wup)) return SKB_DROP_REASON_TCP_OLD_SEQUENCE; - if (after(end_seq, tp->rcv_nxt + tcp_receive_window(tp))) { - if (after(seq, tp->rcv_nxt + tcp_receive_window(tp))) + seq_limit = tp->rcv_nxt + tcp_receive_window(tp); + if (unlikely(after(end_seq, seq_limit))) { + /* Some stacks are known to handle FIN incorrectly; allow the + * FIN to extend beyond the window and check it in detail later. + */ + if (!after(end_seq - th->fin, seq_limit)) + return SKB_NOT_DROPPED_YET; + + if (after(seq, seq_limit)) return SKB_DROP_REASON_TCP_INVALID_SEQUENCE; /* Only accept this packet if receive queue is empty. */ @@ -6379,7 +6388,8 @@ static bool tcp_validate_incoming(struct sock *sk, struct sk_buff *skb, step1: /* Step 1: check sequence number */ - reason = tcp_sequence(sk, TCP_SKB_CB(skb)->seq, TCP_SKB_CB(skb)->end_seq); + reason = tcp_sequence(sk, TCP_SKB_CB(skb)->seq, + TCP_SKB_CB(skb)->end_seq, th); if (reason) { /* RFC793, page 37: "In all states except SYN-SENT, all reset * (RST) segments are validated by checking their SEQ-fields." diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 63a8b174cf99..d53d39be291a 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -1705,7 +1705,9 @@ struct sock *tcp_v4_syn_recv_sock(const struct sock *sk, struct sk_buff *skb, struct request_sock *req, struct dst_entry *dst, struct request_sock *req_unhash, - bool *own_req) + bool *own_req, + void (*opt_child_init)(struct sock *newsk, + const struct sock *sk)) { struct inet_request_sock *ireq; bool found_dup_sk = false; @@ -1757,6 +1759,10 @@ struct sock *tcp_v4_syn_recv_sock(const struct sock *sk, struct sk_buff *skb, } sk_setup_caps(newsk, dst); +#if IS_ENABLED(CONFIG_IPV6) + if (opt_child_init) + opt_child_init(newsk, sk); +#endif tcp_ca_openreq_child(newsk, dst); tcp_sync_mss(newsk, dst4_mtu(dst)); diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c index ec128865f5c0..d9c5a43bd281 100644 --- a/net/ipv4/tcp_minisocks.c +++ b/net/ipv4/tcp_minisocks.c @@ -925,7 +925,7 @@ struct sock *tcp_check_req(struct sock *sk, struct sk_buff *skb, * socket is created, wait for troubles. */ child = inet_csk(sk)->icsk_af_ops->syn_recv_sock(sk, skb, req, NULL, - req, &own_req); + req, &own_req, NULL); if (!child) goto listen_overflow; diff --git a/net/ipv4/udplite.c b/net/ipv4/udplite.c index d3e621a11a1a..826e9e79eb19 100644 --- a/net/ipv4/udplite.c +++ b/net/ipv4/udplite.c @@ -20,10 +20,9 @@ EXPORT_SYMBOL(udplite_table); /* Designate sk as UDP-Lite socket */ static int udplite_sk_init(struct sock *sk) { - udp_init_sock(sk); pr_warn_once("UDP-Lite is deprecated and scheduled to be removed in 2025, " "please contact the netdev mailing list\n"); - return 0; + return udp_init_sock(sk); } static int udplite_rcv(struct sk_buff *skb) diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index d10487b4e5bf..e46a0efae012 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -1312,11 +1312,48 @@ static void tcp_v6_restore_cb(struct sk_buff *skb) sizeof(struct inet6_skb_parm)); } +/* Called from tcp_v4_syn_recv_sock() for v6_mapped children. */ +static void tcp_v6_mapped_child_init(struct sock *newsk, const struct sock *sk) +{ + struct inet_sock *newinet = inet_sk(newsk); + struct ipv6_pinfo *newnp; + + newinet->pinet6 = newnp = tcp_inet6_sk(newsk); + newinet->ipv6_fl_list = NULL; + + memcpy(newnp, tcp_inet6_sk(sk), sizeof(struct ipv6_pinfo)); + + newnp->saddr = newsk->sk_v6_rcv_saddr; + + inet_csk(newsk)->icsk_af_ops = &ipv6_mapped; + if (sk_is_mptcp(newsk)) + mptcpv6_handle_mapped(newsk, true); + newsk->sk_backlog_rcv = tcp_v4_do_rcv; +#if defined(CONFIG_TCP_MD5SIG) || defined(CONFIG_TCP_AO) + tcp_sk(newsk)->af_specific = &tcp_sock_ipv6_mapped_specific; +#endif + + newnp->ipv6_mc_list = NULL; + newnp->ipv6_ac_list = NULL; + newnp->pktoptions = NULL; + newnp->opt = NULL; + + /* tcp_v4_syn_recv_sock() has initialized newinet->mc_{index,ttl} */ + newnp->mcast_oif = newinet->mc_index; + newnp->mcast_hops = newinet->mc_ttl; + + newnp->rcv_flowinfo = 0; + if (inet6_test_bit(REPFLOW, sk)) + newnp->flow_label = 0; +} + static struct sock *tcp_v6_syn_recv_sock(const struct sock *sk, struct sk_buff *skb, struct request_sock *req, struct dst_entry *dst, struct request_sock *req_unhash, - bool *own_req) + bool *own_req, + void (*opt_child_init)(struct sock *newsk, + const struct sock *sk)) { const struct ipv6_pinfo *np = tcp_inet6_sk(sk); struct inet_request_sock *ireq; @@ -1332,61 +1369,10 @@ static struct sock *tcp_v6_syn_recv_sock(const struct sock *sk, struct sk_buff * #endif struct flowi6 fl6; - if (skb->protocol == htons(ETH_P_IP)) { - /* - * v6 mapped - */ - - newsk = tcp_v4_syn_recv_sock(sk, skb, req, dst, - req_unhash, own_req); - - if (!newsk) - return NULL; - - newinet = inet_sk(newsk); - newinet->pinet6 = tcp_inet6_sk(newsk); - newinet->ipv6_fl_list = NULL; - - newnp = tcp_inet6_sk(newsk); - newtp = tcp_sk(newsk); - - memcpy(newnp, np, sizeof(struct ipv6_pinfo)); - - newnp->saddr = newsk->sk_v6_rcv_saddr; - - inet_csk(newsk)->icsk_af_ops = &ipv6_mapped; - if (sk_is_mptcp(newsk)) - mptcpv6_handle_mapped(newsk, true); - newsk->sk_backlog_rcv = tcp_v4_do_rcv; -#if defined(CONFIG_TCP_MD5SIG) || defined(CONFIG_TCP_AO) - newtp->af_specific = &tcp_sock_ipv6_mapped_specific; -#endif - - newnp->ipv6_mc_list = NULL; - newnp->ipv6_ac_list = NULL; - newnp->pktoptions = NULL; - newnp->opt = NULL; - newnp->mcast_oif = inet_iif(skb); - newnp->mcast_hops = ip_hdr(skb)->ttl; - newnp->rcv_flowinfo = 0; - if (inet6_test_bit(REPFLOW, sk)) - newnp->flow_label = 0; - - /* - * No need to charge this sock to the relevant IPv6 refcnt debug socks count - * here, tcp_create_openreq_child now does this for us, see the comment in - * that function for the gory details. -acme - */ - - /* It is tricky place. Until this moment IPv4 tcp - worked with IPv6 icsk.icsk_af_ops. - Sync it now. - */ - tcp_sync_mss(newsk, inet_csk(newsk)->icsk_pmtu_cookie); - - return newsk; - } - + if (skb->protocol == htons(ETH_P_IP)) + return tcp_v4_syn_recv_sock(sk, skb, req, dst, + req_unhash, own_req, + tcp_v6_mapped_child_init); ireq = inet_rsk(req); if (sk_acceptq_is_full(sk)) diff --git a/net/ipv6/udplite.c b/net/ipv6/udplite.c index 2cec542437f7..e867721cda4d 100644 --- a/net/ipv6/udplite.c +++ b/net/ipv6/udplite.c @@ -16,10 +16,9 @@ static int udplitev6_sk_init(struct sock *sk) { - udpv6_init_sock(sk); pr_warn_once("UDP-Lite is deprecated and scheduled to be removed in 2025, " "please contact the netdev mailing list\n"); - return 0; + return udpv6_init_sock(sk); } static int udplitev6_rcv(struct sk_buff *skb) diff --git a/net/ipv6/xfrm6_policy.c b/net/ipv6/xfrm6_policy.c index 1f19b6f14484..125ea9a5b8a0 100644 --- a/net/ipv6/xfrm6_policy.c +++ b/net/ipv6/xfrm6_policy.c @@ -57,6 +57,7 @@ static int xfrm6_get_saddr(xfrm_address_t *saddr, struct dst_entry *dst; struct net_device *dev; struct inet6_dev *idev; + int err; dst = xfrm6_dst_lookup(params); if (IS_ERR(dst)) @@ -68,9 +69,11 @@ static int xfrm6_get_saddr(xfrm_address_t *saddr, return -EHOSTUNREACH; } dev = idev->dev; - ipv6_dev_get_saddr(dev_net(dev), dev, ¶ms->daddr->in6, 0, - &saddr->in6); + err = ipv6_dev_get_saddr(dev_net(dev), dev, ¶ms->daddr->in6, 0, + &saddr->in6); dst_release(dst); + if (err) + return -EHOSTUNREACH; return 0; } diff --git a/net/kcm/kcmsock.c b/net/kcm/kcmsock.c index 5dd7e0509a48..3912e75079f5 100644 --- a/net/kcm/kcmsock.c +++ b/net/kcm/kcmsock.c @@ -628,7 +628,7 @@ retry: skb = txm->frag_skb; } - if (WARN_ON(!skb_shinfo(skb)->nr_frags) || + if (WARN_ON_ONCE(!skb_shinfo(skb)->nr_frags) || WARN_ON_ONCE(!skb_frag_page(&skb_shinfo(skb)->frags[0]))) { ret = -EINVAL; goto out; @@ -749,7 +749,7 @@ static int kcm_sendmsg(struct socket *sock, struct msghdr *msg, size_t len) { struct sock *sk = sock->sk; struct kcm_sock *kcm = kcm_sk(sk); - struct sk_buff *skb = NULL, *head = NULL; + struct sk_buff *skb = NULL, *head = NULL, *frag_prev = NULL; size_t copy, copied = 0; long timeo = sock_sndtimeo(sk, msg->msg_flags & MSG_DONTWAIT); int eor = (sock->type == SOCK_DGRAM) ? @@ -824,6 +824,7 @@ start: else skb->next = tskb; + frag_prev = skb; skb = tskb; skb->ip_summed = CHECKSUM_UNNECESSARY; continue; @@ -933,6 +934,22 @@ partial_message: out_error: kcm_push(kcm); + /* When MAX_SKB_FRAGS was reached, a new skb was allocated and + * linked into the frag_list before data copy. If the copy + * subsequently failed, this skb has zero frags. Remove it from + * the frag_list to prevent kcm_write_msgs from later hitting + * WARN_ON(!skb_shinfo(skb)->nr_frags). + */ + if (frag_prev && !skb_shinfo(skb)->nr_frags) { + if (head == frag_prev) + skb_shinfo(head)->frag_list = NULL; + else + frag_prev->next = NULL; + kfree_skb(skb); + /* Update skb as it may be saved in partial_message via goto */ + skb = frag_prev; + } + if (sock->type == SOCK_SEQPACKET) { /* Wrote some bytes before encountering an * error, return partial success. diff --git a/net/mac80211/link.c b/net/mac80211/link.c index f589450754a9..03bfca27d205 100644 --- a/net/mac80211/link.c +++ b/net/mac80211/link.c @@ -281,6 +281,7 @@ static int ieee80211_vif_update_links(struct ieee80211_sub_if_data *sdata, struct ieee80211_bss_conf *old[IEEE80211_MLD_MAX_NUM_LINKS]; struct ieee80211_link_data *old_data[IEEE80211_MLD_MAX_NUM_LINKS]; bool use_deflink = old_links == 0; /* set for error case */ + bool non_sta = sdata->vif.type != NL80211_IFTYPE_STATION; lockdep_assert_wiphy(sdata->local->hw.wiphy); @@ -337,6 +338,7 @@ static int ieee80211_vif_update_links(struct ieee80211_sub_if_data *sdata, link = links[link_id]; ieee80211_link_init(sdata, link_id, &link->data, &link->conf); ieee80211_link_setup(&link->data); + ieee80211_set_wmm_default(&link->data, true, non_sta); } if (new_links == 0) diff --git a/net/mac80211/mesh.c b/net/mac80211/mesh.c index a7e71eef3660..28624e57aa49 100644 --- a/net/mac80211/mesh.c +++ b/net/mac80211/mesh.c @@ -1635,6 +1635,9 @@ static void mesh_rx_csa_frame(struct ieee80211_sub_if_data *sdata, if (!mesh_matches_local(sdata, elems)) goto free; + if (!elems->mesh_chansw_params_ie) + goto free; + ifmsh->chsw_ttl = elems->mesh_chansw_params_ie->mesh_ttl; if (!--ifmsh->chsw_ttl) fwd_csa = false; diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index e8edcee0e08a..810bea1aacc5 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -7085,6 +7085,9 @@ static void ieee80211_ml_reconfiguration(struct ieee80211_sub_if_data *sdata, control = le16_to_cpu(prof->control); link_id = control & IEEE80211_MLE_STA_RECONF_CONTROL_LINK_ID; + if (link_id >= IEEE80211_MLD_MAX_NUM_LINKS) + continue; + removed_links |= BIT(link_id); /* the MAC address should not be included, but handle it */ diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c index 7ef1d2bd26e4..6716970693e9 100644 --- a/net/mptcp/subflow.c +++ b/net/mptcp/subflow.c @@ -808,7 +808,9 @@ static struct sock *subflow_syn_recv_sock(const struct sock *sk, struct request_sock *req, struct dst_entry *dst, struct request_sock *req_unhash, - bool *own_req) + bool *own_req, + void (*opt_child_init)(struct sock *newsk, + const struct sock *sk)) { struct mptcp_subflow_context *listener = mptcp_subflow_ctx(sk); struct mptcp_subflow_request_sock *subflow_req; @@ -855,7 +857,7 @@ static struct sock *subflow_syn_recv_sock(const struct sock *sk, create_child: child = listener->icsk_af_ops->syn_recv_sock(sk, skb, req, dst, - req_unhash, own_req); + req_unhash, own_req, opt_child_init); if (child && *own_req) { struct mptcp_subflow_context *ctx = mptcp_subflow_ctx(child); diff --git a/net/netfilter/nf_conntrack_h323_asn1.c b/net/netfilter/nf_conntrack_h323_asn1.c index 540d97715bd2..62aa22a07876 100644 --- a/net/netfilter/nf_conntrack_h323_asn1.c +++ b/net/netfilter/nf_conntrack_h323_asn1.c @@ -796,7 +796,7 @@ static int decode_choice(struct bitstr *bs, const struct field_t *f, if (ext || (son->attr & OPEN)) { BYTE_ALIGN(bs); - if (nf_h323_error_boundary(bs, len, 0)) + if (nf_h323_error_boundary(bs, 2, 0)) return H323_ERROR_BOUND; len = get_len(bs); if (nf_h323_error_boundary(bs, len, 0)) diff --git a/net/psp/psp_main.c b/net/psp/psp_main.c index c5c42b5a00d5..d4c04c923c5a 100644 --- a/net/psp/psp_main.c +++ b/net/psp/psp_main.c @@ -166,9 +166,46 @@ static void psp_write_headers(struct net *net, struct sk_buff *skb, __be32 spi, { struct udphdr *uh = udp_hdr(skb); struct psphdr *psph = (struct psphdr *)(uh + 1); + const struct sock *sk = skb->sk; uh->dest = htons(PSP_DEFAULT_UDP_PORT); - uh->source = udp_flow_src_port(net, skb, 0, 0, false); + + /* A bit of theory: Selection of the source port. + * + * We need some entropy, so that multiple flows use different + * source ports for better RSS spreading at the receiver. + * + * We also need that all packets belonging to one TCP flow + * use the same source port through their duration, + * so that all these packets land in the same receive queue. + * + * udp_flow_src_port() is using sk_txhash, inherited from + * skb_set_hash_from_sk() call in __tcp_transmit_skb(). + * This field is subject to reshuffling, thanks to + * sk_rethink_txhash() calls in various TCP functions. + * + * Instead, use sk->sk_hash which is constant through + * the whole flow duration. + */ + if (likely(sk)) { + u32 hash = sk->sk_hash; + int min, max; + + /* These operations are cheap, no need to cache the result + * in another socket field. + */ + inet_get_local_port_range(net, &min, &max); + /* Since this is being sent on the wire obfuscate hash a bit + * to minimize possibility that any useful information to an + * attacker is leaked. Only upper 16 bits are relevant in the + * computation for 16 bit port value because we use a + * reciprocal divide. + */ + hash ^= hash << 16; + uh->source = htons((((u64)hash * (max - min)) >> 32) + min); + } else { + uh->source = udp_flow_src_port(net, skb, 0, 0, false); + } uh->check = 0; uh->len = htons(udp_len); diff --git a/net/rds/connection.c b/net/rds/connection.c index e23fd9a628ac..412441aaa298 100644 --- a/net/rds/connection.c +++ b/net/rds/connection.c @@ -455,6 +455,9 @@ void rds_conn_shutdown(struct rds_conn_path *cp) rcu_read_unlock(); } + /* we do not hold the socket lock here but it is safe because + * fan-out is disabled when calling conn_slots_available() + */ if (conn->c_trans->conn_slots_available) conn->c_trans->conn_slots_available(conn, false); } diff --git a/net/rds/tcp_listen.c b/net/rds/tcp_listen.c index b4ab68a1da6d..08a506aa7ce7 100644 --- a/net/rds/tcp_listen.c +++ b/net/rds/tcp_listen.c @@ -59,30 +59,12 @@ void rds_tcp_keepalive(struct socket *sock) static int rds_tcp_get_peer_sport(struct socket *sock) { - union { - struct sockaddr_storage storage; - struct sockaddr addr; - struct sockaddr_in sin; - struct sockaddr_in6 sin6; - } saddr; - int sport; - - if (kernel_getpeername(sock, &saddr.addr) >= 0) { - switch (saddr.addr.sa_family) { - case AF_INET: - sport = ntohs(saddr.sin.sin_port); - break; - case AF_INET6: - sport = ntohs(saddr.sin6.sin6_port); - break; - default: - sport = -1; - } - } else { - sport = -1; - } + struct sock *sk = sock->sk; + + if (!sk) + return -1; - return sport; + return ntohs(READ_ONCE(inet_sk(sk)->inet_dport)); } /* rds_tcp_accept_one_path(): if accepting on cp_index > 0, make sure the diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c index 7072556b38f6..d0119afcc6a1 100644 --- a/net/smc/af_smc.c +++ b/net/smc/af_smc.c @@ -124,7 +124,9 @@ static struct sock *smc_tcp_syn_recv_sock(const struct sock *sk, struct request_sock *req, struct dst_entry *dst, struct request_sock *req_unhash, - bool *own_req) + bool *own_req, + void (*opt_child_init)(struct sock *newsk, + const struct sock *sk)) { struct smc_sock *smc; struct sock *child; @@ -142,7 +144,7 @@ static struct sock *smc_tcp_syn_recv_sock(const struct sock *sk, /* passthrough to original syn recv sock fct */ child = smc->ori_af_ops->syn_recv_sock(sk, skb, req, dst, req_unhash, - own_req); + own_req, opt_child_init); /* child must not inherit smc or its ops */ if (child) { rcu_assign_sk_user_data(child, NULL); diff --git a/net/socket.c b/net/socket.c index 136b98c54fb3..05952188127f 100644 --- a/net/socket.c +++ b/net/socket.c @@ -674,7 +674,7 @@ static void __sock_release(struct socket *sock, struct inode *inode) iput(SOCK_INODE(sock)); return; } - sock->file = NULL; + WRITE_ONCE(sock->file, NULL); } /** diff --git a/net/tipc/name_table.c b/net/tipc/name_table.c index 8e129cf52245..253c72d1366e 100644 --- a/net/tipc/name_table.c +++ b/net/tipc/name_table.c @@ -348,7 +348,8 @@ static bool tipc_service_insert_publ(struct net *net, /* Return if the publication already exists */ list_for_each_entry(_p, &sr->all_publ, all_publ) { - if (_p->key == key && (!_p->sk.node || _p->sk.node == node)) { + if (_p->key == key && _p->sk.ref == p->sk.ref && + (!_p->sk.node || _p->sk.node == node)) { pr_debug("Failed to bind duplicate %u,%u,%u/%u:%u/%u\n", p->sr.type, p->sr.lower, p->sr.upper, node, p->sk.ref, key); @@ -388,7 +389,8 @@ static struct publication *tipc_service_remove_publ(struct service_range *r, u32 node = sk->node; list_for_each_entry(p, &r->all_publ, all_publ) { - if (p->key != key || (node && node != p->sk.node)) + if (p->key != key || p->sk.ref != sk->ref || + (node && node != p->sk.node)) continue; list_del(&p->all_publ); list_del(&p->local_publ); diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c index 7877c238e758..5fe07f110fe8 100644 --- a/net/tls/tls_sw.c +++ b/net/tls/tls_sw.c @@ -2533,7 +2533,7 @@ void tls_sw_cancel_work_tx(struct tls_context *tls_ctx) set_bit(BIT_TX_CLOSING, &ctx->tx_bitmask); set_bit(BIT_TX_SCHEDULED, &ctx->tx_bitmask); - cancel_delayed_work_sync(&ctx->tx_work.work); + disable_delayed_work_sync(&ctx->tx_work.work); } void tls_sw_release_resources_tx(struct sock *sk) diff --git a/net/vmw_vsock/af_vsock.c b/net/vmw_vsock/af_vsock.c index 9880756d9eff..2f7d94d682cb 100644 --- a/net/vmw_vsock/af_vsock.c +++ b/net/vmw_vsock/af_vsock.c @@ -90,16 +90,20 @@ * * - /proc/sys/net/vsock/ns_mode (read-only) reports the current namespace's * mode, which is set at namespace creation and immutable thereafter. - * - /proc/sys/net/vsock/child_ns_mode (writable) controls what mode future + * - /proc/sys/net/vsock/child_ns_mode (write-once) controls what mode future * child namespaces will inherit when created. The initial value matches * the namespace's own ns_mode. * * Changing child_ns_mode only affects newly created namespaces, not the * current namespace or existing children. A "local" namespace cannot set - * child_ns_mode to "global". At namespace creation, ns_mode is inherited - * from the parent's child_ns_mode. + * child_ns_mode to "global". child_ns_mode is write-once, so that it may be + * configured and locked down by a namespace manager. Writing a different + * value after the first write returns -EBUSY. At namespace creation, ns_mode + * is inherited from the parent's child_ns_mode. * - * The init_net mode is "global" and cannot be modified. + * The init_net mode is "global" and cannot be modified. The init_net + * child_ns_mode is also write-once, so an init process (e.g. systemd) can + * set it to "local" to ensure all new namespaces inherit local mode. * * The modes affect the allocation and accessibility of CIDs as follows: * @@ -2825,7 +2829,7 @@ static int vsock_net_mode_string(const struct ctl_table *table, int write, if (write) return -EPERM; - net = current->nsproxy->net_ns; + net = container_of(table->data, struct net, vsock.mode); return __vsock_net_mode_string(table, write, buffer, lenp, ppos, vsock_net_mode(net), NULL); @@ -2838,7 +2842,7 @@ static int vsock_net_child_mode_string(const struct ctl_table *table, int write, struct net *net; int ret; - net = current->nsproxy->net_ns; + net = container_of(table->data, struct net, vsock.child_ns_mode); ret = __vsock_net_mode_string(table, write, buffer, lenp, ppos, vsock_net_child_mode(net), &new_mode); @@ -2853,7 +2857,8 @@ static int vsock_net_child_mode_string(const struct ctl_table *table, int write, new_mode == VSOCK_NET_MODE_GLOBAL) return -EPERM; - vsock_net_set_child_mode(net, new_mode); + if (!vsock_net_set_child_mode(net, new_mode)) + return -EBUSY; } return 0; diff --git a/net/wireless/core.c b/net/wireless/core.c index 4b0645d56344..28ca4290ca99 100644 --- a/net/wireless/core.c +++ b/net/wireless/core.c @@ -1211,6 +1211,7 @@ void wiphy_unregister(struct wiphy *wiphy) /* this has nothing to do now but make sure it's gone */ cancel_work_sync(&rdev->wiphy_work); + cancel_work_sync(&rdev->rfkill_block); cancel_work_sync(&rdev->conn_work); flush_work(&rdev->event_work); cancel_delayed_work_sync(&rdev->dfs_update_channels_wk); diff --git a/net/wireless/radiotap.c b/net/wireless/radiotap.c index 326faea38ca3..c85eaa583a46 100644 --- a/net/wireless/radiotap.c +++ b/net/wireless/radiotap.c @@ -239,14 +239,14 @@ int ieee80211_radiotap_iterator_next( default: if (!iterator->current_namespace || iterator->_arg_index >= iterator->current_namespace->n_bits) { - if (iterator->current_namespace == &radiotap_ns) - return -ENOENT; align = 0; } else { align = iterator->current_namespace->align_size[iterator->_arg_index].align; size = iterator->current_namespace->align_size[iterator->_arg_index].size; } if (!align) { + if (iterator->current_namespace == &radiotap_ns) + return -ENOENT; /* skip all subsequent data */ iterator->_arg = iterator->_next_ns_data; /* give up on this namespace */ diff --git a/net/wireless/wext-compat.c b/net/wireless/wext-compat.c index 55783a7f51c2..5a70a0120343 100644 --- a/net/wireless/wext-compat.c +++ b/net/wireless/wext-compat.c @@ -683,7 +683,7 @@ static int cfg80211_wext_siwencodeext(struct net_device *dev, idx = erq->flags & IW_ENCODE_INDEX; if (cipher == WLAN_CIPHER_SUITE_AES_CMAC) { - if (idx < 4 || idx > 5) { + if (idx < 5 || idx > 6) { idx = wdev->wext.default_mgmt_key; if (idx < 0) return -EINVAL; diff --git a/net/xfrm/espintcp.c b/net/xfrm/espintcp.c index 61f1cef89bc1..e1b11ab59f6e 100644 --- a/net/xfrm/espintcp.c +++ b/net/xfrm/espintcp.c @@ -536,7 +536,7 @@ static void espintcp_close(struct sock *sk, long timeout) sk->sk_prot = &tcp_prot; barrier(); - cancel_work_sync(&ctx->work); + disable_work_sync(&ctx->work); strp_done(&ctx->strp); skb_queue_purge(&ctx->out_queue); diff --git a/net/xfrm/xfrm_device.c b/net/xfrm/xfrm_device.c index 52ae0e034d29..550457e4c4f0 100644 --- a/net/xfrm/xfrm_device.c +++ b/net/xfrm/xfrm_device.c @@ -544,6 +544,14 @@ static int xfrm_dev_down(struct net_device *dev) return NOTIFY_DONE; } +static int xfrm_dev_unregister(struct net_device *dev) +{ + xfrm_dev_state_flush(dev_net(dev), dev, true); + xfrm_dev_policy_flush(dev_net(dev), dev, true); + + return NOTIFY_DONE; +} + static int xfrm_dev_event(struct notifier_block *this, unsigned long event, void *ptr) { struct net_device *dev = netdev_notifier_info_to_dev(ptr); @@ -556,8 +564,10 @@ static int xfrm_dev_event(struct notifier_block *this, unsigned long event, void return xfrm_api_check(dev); case NETDEV_DOWN: - case NETDEV_UNREGISTER: return xfrm_dev_down(dev); + + case NETDEV_UNREGISTER: + return xfrm_dev_unregister(dev); } return NOTIFY_DONE; } diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index ee8bb445c4d3..9e1a522ab1c5 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -3801,8 +3801,8 @@ int __xfrm_policy_check(struct sock *sk, int dir, struct sk_buff *skb, struct xfrm_tmpl *tp[XFRM_MAX_DEPTH]; struct xfrm_tmpl *stp[XFRM_MAX_DEPTH]; struct xfrm_tmpl **tpp = tp; + int i, k = 0; int ti = 0; - int i, k; sp = skb_sec_path(skb); if (!sp) @@ -3828,6 +3828,12 @@ int __xfrm_policy_check(struct sock *sk, int dir, struct sk_buff *skb, tpp = stp; } + if (pol->xdo.type == XFRM_DEV_OFFLOAD_PACKET && sp == &dummy) + /* This policy template was already checked by HW + * and secpath was removed in __xfrm_policy_check2. + */ + goto out; + /* For each tunnel xfrm, find the first matching tmpl. * For each tmpl before that, find corresponding xfrm. * Order is _important_. Later we will implement @@ -3837,7 +3843,7 @@ int __xfrm_policy_check(struct sock *sk, int dir, struct sk_buff *skb, * verified to allow them to be skipped in future policy * checks (e.g. nested tunnels). */ - for (i = xfrm_nr-1, k = 0; i >= 0; i--) { + for (i = xfrm_nr - 1; i >= 0; i--) { k = xfrm_policy_ok(tpp[i], sp, k, family, if_id); if (k < 0) { if (k < -1) @@ -3853,6 +3859,7 @@ int __xfrm_policy_check(struct sock *sk, int dir, struct sk_buff *skb, goto reject; } +out: xfrm_pols_put(pols, npols); sp->verified_cnt = k; |
