summaryrefslogtreecommitdiff
path: root/net
diff options
context:
space:
mode:
Diffstat (limited to 'net')
-rw-r--r--net/Makefile2
-rw-r--r--net/bluetooth/eir.c10
-rw-r--r--net/bluetooth/hci_core.c16
-rw-r--r--net/bluetooth/hci_sync.c20
-rw-r--r--net/bluetooth/l2cap_core.c18
-rw-r--r--net/bluetooth/mgmt.c149
-rw-r--r--net/bluetooth/mgmt_util.c49
-rw-r--r--net/bluetooth/mgmt_util.h8
-rw-r--r--net/bridge/br_mdb.c2
-rw-r--r--net/bridge/br_nf_core.c7
-rw-r--r--net/bridge/br_private.h1
-rw-r--r--net/bridge/netfilter/nf_conntrack_bridge.c12
-rw-r--r--net/can/bcm.c79
-rw-r--r--net/core/pktgen.c13
-rw-r--r--net/core/scm.c17
-rw-r--r--net/core/skmsg.c53
-rw-r--r--net/dsa/tag_brcm.c2
-rw-r--r--net/ipv4/esp4.c49
-rw-r--r--net/ipv4/fib_frontend.c18
-rw-r--r--net/ipv4/fib_rules.c4
-rw-r--r--net/ipv4/fib_trie.c22
-rw-r--r--net/ipv4/inet_hashtables.c37
-rw-r--r--net/ipv4/ip_gre.c16
-rw-r--r--net/ipv4/tcp_input.c56
-rw-r--r--net/ipv4/udp_offload.c5
-rw-r--r--net/ipv6/esp6.c49
-rw-r--r--net/ipv6/fib6_rules.c4
-rw-r--r--net/ipv6/ip6_output.c9
-rw-r--r--net/ipv6/netfilter.c12
-rw-r--r--net/ipv6/netfilter/nft_fib_ipv6.c13
-rw-r--r--net/ipv6/seg6_local.c6
-rw-r--r--net/llc/af_llc.c8
-rw-r--r--net/mac80211/main.c6
-rw-r--r--net/mac80211/mlme.c4
-rw-r--r--net/mctp/device.c65
-rw-r--r--net/mctp/route.c4
-rw-r--r--net/ncsi/internal.h21
-rw-r--r--net/ncsi/ncsi-pkt.h23
-rw-r--r--net/ncsi/ncsi-rsp.c21
-rw-r--r--net/netfilter/nf_conntrack_standalone.c12
-rw-r--r--net/netfilter/nf_nat_core.c12
-rw-r--r--net/netfilter/nft_quota.c20
-rw-r--r--net/netfilter/nft_set_pipapo_avx2.c21
-rw-r--r--net/netfilter/nft_tunnel.c8
-rw-r--r--net/netlabel/netlabel_kapi.c5
-rw-r--r--net/openvswitch/flow.c2
-rw-r--r--net/sched/sch_codel.c2
-rw-r--r--net/sched/sch_ets.c2
-rw-r--r--net/sched/sch_fq.c2
-rw-r--r--net/sched/sch_fq_codel.c2
-rw-r--r--net/sched/sch_fq_pie.c2
-rw-r--r--net/sched/sch_hfsc.c15
-rw-r--r--net/sched/sch_hhf.c2
-rw-r--r--net/sched/sch_pie.c2
-rw-r--r--net/sched/sch_prio.c2
-rw-r--r--net/sched/sch_red.c2
-rw-r--r--net/sched/sch_sfq.c5
-rw-r--r--net/sched/sch_tbf.c2
-rw-r--r--net/sctp/sysctl.c4
-rw-r--r--net/smc/smc_pnet.c8
-rw-r--r--net/sunrpc/clnt.c3
-rw-r--r--net/sunrpc/rpcb_clnt.c5
-rw-r--r--net/sunrpc/sched.c2
-rw-r--r--net/tipc/crypto.c9
-rw-r--r--net/tls/tls_strp.c3
-rw-r--r--net/tls/tls_sw.c15
-rw-r--r--net/unix/Kconfig5
-rw-r--r--net/unix/Makefile2
-rw-r--r--net/unix/af_unix.c120
-rw-r--r--net/unix/garbage.c691
-rw-r--r--net/unix/scm.c161
-rw-r--r--net/unix/scm.h10
-rw-r--r--net/xfrm/xfrm_device.c2
-rw-r--r--net/xfrm/xfrm_policy.c3
-rw-r--r--net/xfrm/xfrm_state.c8
75 files changed, 1135 insertions, 946 deletions
diff --git a/net/Makefile b/net/Makefile
index 428c523296e3..cedc19be7ada 100644
--- a/net/Makefile
+++ b/net/Makefile
@@ -17,7 +17,7 @@ obj-$(CONFIG_NETFILTER) += netfilter/
obj-$(CONFIG_INET) += ipv4/
obj-$(CONFIG_TLS) += tls/
obj-$(CONFIG_XFRM) += xfrm/
-obj-$(CONFIG_UNIX_SCM) += unix/
+obj-$(CONFIG_UNIX) += unix/
obj-y += ipv6/
obj-$(CONFIG_BPFILTER) += bpfilter/
obj-$(CONFIG_PACKET) += packet/
diff --git a/net/bluetooth/eir.c b/net/bluetooth/eir.c
index 1bc51e2b05a3..3e1713673ecc 100644
--- a/net/bluetooth/eir.c
+++ b/net/bluetooth/eir.c
@@ -366,17 +366,19 @@ u8 eir_create_scan_rsp(struct hci_dev *hdev, u8 instance, u8 *ptr)
void *eir_get_service_data(u8 *eir, size_t eir_len, u16 uuid, size_t *len)
{
- while ((eir = eir_get_data(eir, eir_len, EIR_SERVICE_DATA, len))) {
+ size_t dlen;
+
+ while ((eir = eir_get_data(eir, eir_len, EIR_SERVICE_DATA, &dlen))) {
u16 value = get_unaligned_le16(eir);
if (uuid == value) {
if (len)
- *len -= 2;
+ *len = dlen - 2;
return &eir[2];
}
- eir += *len;
- eir_len -= *len;
+ eir += dlen;
+ eir_len -= dlen;
}
return NULL;
diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c
index 70be66cd6036..f3596cfdffc2 100644
--- a/net/bluetooth/hci_core.c
+++ b/net/bluetooth/hci_core.c
@@ -1862,10 +1862,8 @@ void hci_free_adv_monitor(struct hci_dev *hdev, struct adv_monitor *monitor)
if (monitor->handle)
idr_remove(&hdev->adv_monitors_idr, monitor->handle);
- if (monitor->state != ADV_MONITOR_STATE_NOT_REGISTERED) {
+ if (monitor->state != ADV_MONITOR_STATE_NOT_REGISTERED)
hdev->adv_monitors_cnt--;
- mgmt_adv_monitor_removed(hdev, monitor->handle);
- }
kfree(monitor);
}
@@ -2492,6 +2490,7 @@ struct hci_dev *hci_alloc_dev_priv(int sizeof_priv)
mutex_init(&hdev->lock);
mutex_init(&hdev->req_lock);
+ mutex_init(&hdev->mgmt_pending_lock);
ida_init(&hdev->unset_handle_ida);
@@ -3380,23 +3379,18 @@ static void hci_link_tx_to(struct hci_dev *hdev, __u8 type)
bt_dev_err(hdev, "link tx timeout");
- rcu_read_lock();
+ hci_dev_lock(hdev);
/* Kill stalled connections */
- list_for_each_entry_rcu(c, &h->list, list) {
+ list_for_each_entry(c, &h->list, list) {
if (c->type == type && c->sent) {
bt_dev_err(hdev, "killing stalled connection %pMR",
&c->dst);
- /* hci_disconnect might sleep, so, we have to release
- * the RCU read lock before calling it.
- */
- rcu_read_unlock();
hci_disconnect(c, HCI_ERROR_REMOTE_USER_TERM);
- rcu_read_lock();
}
}
- rcu_read_unlock();
+ hci_dev_unlock(hdev);
}
static struct hci_chan *hci_chan_sent(struct hci_dev *hdev, __u8 type,
diff --git a/net/bluetooth/hci_sync.c b/net/bluetooth/hci_sync.c
index d6f40806ee51..e92bc4ceb5ad 100644
--- a/net/bluetooth/hci_sync.c
+++ b/net/bluetooth/hci_sync.c
@@ -1586,7 +1586,8 @@ static int hci_enable_per_advertising_sync(struct hci_dev *hdev, u8 instance)
static int hci_adv_bcast_annoucement(struct hci_dev *hdev, struct adv_info *adv)
{
u8 bid[3];
- u8 ad[4 + 3];
+ u8 ad[HCI_MAX_EXT_AD_LENGTH];
+ u8 len;
/* Skip if NULL adv as instance 0x00 is used for general purpose
* advertising so it cannot used for the likes of Broadcast Announcement
@@ -1612,8 +1613,10 @@ static int hci_adv_bcast_annoucement(struct hci_dev *hdev, struct adv_info *adv)
/* Generate Broadcast ID */
get_random_bytes(bid, sizeof(bid));
- eir_append_service_data(ad, 0, 0x1852, bid, sizeof(bid));
- hci_set_adv_instance_data(hdev, adv->instance, sizeof(ad), ad, 0, NULL);
+ len = eir_append_service_data(ad, 0, 0x1852, bid, sizeof(bid));
+ memcpy(ad + len, adv->adv_data, adv->adv_data_len);
+ hci_set_adv_instance_data(hdev, adv->instance, len + adv->adv_data_len,
+ ad, 0, NULL);
return hci_update_adv_data_sync(hdev, adv->instance);
}
@@ -1630,8 +1633,15 @@ int hci_start_per_adv_sync(struct hci_dev *hdev, u8 instance, u8 data_len,
if (instance) {
adv = hci_find_adv_instance(hdev, instance);
- /* Create an instance if that could not be found */
- if (!adv) {
+ if (adv) {
+ /* Turn it into periodic advertising */
+ adv->periodic = true;
+ adv->per_adv_data_len = data_len;
+ if (data)
+ memcpy(adv->per_adv_data, data, data_len);
+ adv->flags = flags;
+ } else if (!adv) {
+ /* Create an instance if that could not be found */
adv = hci_add_per_instance(hdev, instance, flags,
data_len, data,
sync_interval,
diff --git a/net/bluetooth/l2cap_core.c b/net/bluetooth/l2cap_core.c
index 72ee41b894a5..2744ad11687c 100644
--- a/net/bluetooth/l2cap_core.c
+++ b/net/bluetooth/l2cap_core.c
@@ -1411,7 +1411,8 @@ static void l2cap_request_info(struct l2cap_conn *conn)
sizeof(req), &req);
}
-static bool l2cap_check_enc_key_size(struct hci_conn *hcon)
+static bool l2cap_check_enc_key_size(struct hci_conn *hcon,
+ struct l2cap_chan *chan)
{
/* The minimum encryption key size needs to be enforced by the
* host stack before establishing any L2CAP connections. The
@@ -1425,7 +1426,7 @@ static bool l2cap_check_enc_key_size(struct hci_conn *hcon)
int min_key_size = hcon->hdev->min_enc_key_size;
/* On FIPS security level, key size must be 16 bytes */
- if (hcon->sec_level == BT_SECURITY_FIPS)
+ if (chan->sec_level == BT_SECURITY_FIPS)
min_key_size = 16;
return (!test_bit(HCI_CONN_ENCRYPT, &hcon->flags) ||
@@ -1453,7 +1454,7 @@ static void l2cap_do_start(struct l2cap_chan *chan)
!__l2cap_no_conn_pending(chan))
return;
- if (l2cap_check_enc_key_size(conn->hcon))
+ if (l2cap_check_enc_key_size(conn->hcon, chan))
l2cap_start_connection(chan);
else
__set_chan_timer(chan, L2CAP_DISC_TIMEOUT);
@@ -1528,7 +1529,7 @@ static void l2cap_conn_start(struct l2cap_conn *conn)
continue;
}
- if (l2cap_check_enc_key_size(conn->hcon))
+ if (l2cap_check_enc_key_size(conn->hcon, chan))
l2cap_start_connection(chan);
else
l2cap_chan_close(chan, ECONNREFUSED);
@@ -3955,7 +3956,7 @@ static void l2cap_connect(struct l2cap_conn *conn, struct l2cap_cmd_hdr *cmd,
/* Check if the ACL is secure enough (if not SDP) */
if (psm != cpu_to_le16(L2CAP_PSM_SDP) &&
(!hci_conn_check_link_mode(conn->hcon) ||
- !l2cap_check_enc_key_size(conn->hcon))) {
+ !l2cap_check_enc_key_size(conn->hcon, pchan))) {
conn->disc_reason = HCI_ERROR_AUTH_FAILURE;
result = L2CAP_CR_SEC_BLOCK;
goto response;
@@ -4832,7 +4833,8 @@ static int l2cap_le_connect_req(struct l2cap_conn *conn,
if (!smp_sufficient_security(conn->hcon, pchan->sec_level,
SMP_ALLOW_STK)) {
- result = L2CAP_CR_LE_AUTHENTICATION;
+ result = pchan->sec_level == BT_SECURITY_MEDIUM ?
+ L2CAP_CR_LE_ENCRYPTION : L2CAP_CR_LE_AUTHENTICATION;
chan = NULL;
goto response_unlock;
}
@@ -7323,7 +7325,7 @@ static void l2cap_security_cfm(struct hci_conn *hcon, u8 status, u8 encrypt)
}
if (chan->state == BT_CONNECT) {
- if (!status && l2cap_check_enc_key_size(hcon))
+ if (!status && l2cap_check_enc_key_size(hcon, chan))
l2cap_start_connection(chan);
else
__set_chan_timer(chan, L2CAP_DISC_TIMEOUT);
@@ -7333,7 +7335,7 @@ static void l2cap_security_cfm(struct hci_conn *hcon, u8 status, u8 encrypt)
struct l2cap_conn_rsp rsp;
__u16 res, stat;
- if (!status && l2cap_check_enc_key_size(hcon)) {
+ if (!status && l2cap_check_enc_key_size(hcon, chan)) {
if (test_bit(FLAG_DEFER_SETUP, &chan->flags)) {
res = L2CAP_CR_PEND;
stat = L2CAP_CS_AUTHOR_PEND;
diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c
index 29e420e9754b..853d217cabc9 100644
--- a/net/bluetooth/mgmt.c
+++ b/net/bluetooth/mgmt.c
@@ -1433,22 +1433,17 @@ static void settings_rsp(struct mgmt_pending_cmd *cmd, void *data)
send_settings_rsp(cmd->sk, cmd->opcode, match->hdev);
- list_del(&cmd->list);
-
if (match->sk == NULL) {
match->sk = cmd->sk;
sock_hold(match->sk);
}
-
- mgmt_pending_free(cmd);
}
static void cmd_status_rsp(struct mgmt_pending_cmd *cmd, void *data)
{
u8 *status = data;
- mgmt_cmd_status(cmd->sk, cmd->index, cmd->opcode, *status);
- mgmt_pending_remove(cmd);
+ mgmt_cmd_status(cmd->sk, cmd->hdev->id, cmd->opcode, *status);
}
static void cmd_complete_rsp(struct mgmt_pending_cmd *cmd, void *data)
@@ -1462,8 +1457,6 @@ static void cmd_complete_rsp(struct mgmt_pending_cmd *cmd, void *data)
if (cmd->cmd_complete) {
cmd->cmd_complete(cmd, match->mgmt_status);
- mgmt_pending_remove(cmd);
-
return;
}
@@ -1472,13 +1465,13 @@ static void cmd_complete_rsp(struct mgmt_pending_cmd *cmd, void *data)
static int generic_cmd_complete(struct mgmt_pending_cmd *cmd, u8 status)
{
- return mgmt_cmd_complete(cmd->sk, cmd->index, cmd->opcode, status,
+ return mgmt_cmd_complete(cmd->sk, cmd->hdev->id, cmd->opcode, status,
cmd->param, cmd->param_len);
}
static int addr_cmd_complete(struct mgmt_pending_cmd *cmd, u8 status)
{
- return mgmt_cmd_complete(cmd->sk, cmd->index, cmd->opcode, status,
+ return mgmt_cmd_complete(cmd->sk, cmd->hdev->id, cmd->opcode, status,
cmd->param, sizeof(struct mgmt_addr_info));
}
@@ -1518,7 +1511,7 @@ static void mgmt_set_discoverable_complete(struct hci_dev *hdev, void *data,
if (err) {
u8 mgmt_err = mgmt_status(err);
- mgmt_cmd_status(cmd->sk, cmd->index, cmd->opcode, mgmt_err);
+ mgmt_cmd_status(cmd->sk, cmd->hdev->id, cmd->opcode, mgmt_err);
hci_dev_clear_flag(hdev, HCI_LIMITED_DISCOVERABLE);
goto done;
}
@@ -1693,7 +1686,7 @@ static void mgmt_set_connectable_complete(struct hci_dev *hdev, void *data,
if (err) {
u8 mgmt_err = mgmt_status(err);
- mgmt_cmd_status(cmd->sk, cmd->index, cmd->opcode, mgmt_err);
+ mgmt_cmd_status(cmd->sk, cmd->hdev->id, cmd->opcode, mgmt_err);
goto done;
}
@@ -1930,8 +1923,8 @@ static void set_ssp_complete(struct hci_dev *hdev, void *data, int err)
new_settings(hdev, NULL);
}
- mgmt_pending_foreach(MGMT_OP_SET_SSP, hdev, cmd_status_rsp,
- &mgmt_err);
+ mgmt_pending_foreach(MGMT_OP_SET_SSP, hdev, true,
+ cmd_status_rsp, &mgmt_err);
return;
}
@@ -1941,7 +1934,7 @@ static void set_ssp_complete(struct hci_dev *hdev, void *data, int err)
changed = hci_dev_test_and_clear_flag(hdev, HCI_SSP_ENABLED);
}
- mgmt_pending_foreach(MGMT_OP_SET_SSP, hdev, settings_rsp, &match);
+ mgmt_pending_foreach(MGMT_OP_SET_SSP, hdev, true, settings_rsp, &match);
if (changed)
new_settings(hdev, match.sk);
@@ -2061,12 +2054,12 @@ static void set_le_complete(struct hci_dev *hdev, void *data, int err)
bt_dev_dbg(hdev, "err %d", err);
if (status) {
- mgmt_pending_foreach(MGMT_OP_SET_LE, hdev, cmd_status_rsp,
- &status);
+ mgmt_pending_foreach(MGMT_OP_SET_LE, hdev, true, cmd_status_rsp,
+ &status);
return;
}
- mgmt_pending_foreach(MGMT_OP_SET_LE, hdev, settings_rsp, &match);
+ mgmt_pending_foreach(MGMT_OP_SET_LE, hdev, true, settings_rsp, &match);
new_settings(hdev, match.sk);
@@ -2125,7 +2118,7 @@ static void set_mesh_complete(struct hci_dev *hdev, void *data, int err)
struct sock *sk = cmd->sk;
if (status) {
- mgmt_pending_foreach(MGMT_OP_SET_MESH_RECEIVER, hdev,
+ mgmt_pending_foreach(MGMT_OP_SET_MESH_RECEIVER, hdev, true,
cmd_status_rsp, &status);
return;
}
@@ -2566,7 +2559,7 @@ static void mgmt_class_complete(struct hci_dev *hdev, void *data, int err)
bt_dev_dbg(hdev, "err %d", err);
- mgmt_cmd_complete(cmd->sk, cmd->index, cmd->opcode,
+ mgmt_cmd_complete(cmd->sk, cmd->hdev->id, cmd->opcode,
mgmt_status(err), hdev->dev_class, 3);
mgmt_pending_free(cmd);
@@ -3354,7 +3347,7 @@ static int pairing_complete(struct mgmt_pending_cmd *cmd, u8 status)
bacpy(&rp.addr.bdaddr, &conn->dst);
rp.addr.type = link_to_bdaddr(conn->type, conn->dst_type);
- err = mgmt_cmd_complete(cmd->sk, cmd->index, MGMT_OP_PAIR_DEVICE,
+ err = mgmt_cmd_complete(cmd->sk, cmd->hdev->id, MGMT_OP_PAIR_DEVICE,
status, &rp, sizeof(rp));
/* So we don't get further callbacks for this connection */
@@ -5165,24 +5158,14 @@ static void mgmt_adv_monitor_added(struct sock *sk, struct hci_dev *hdev,
mgmt_event(MGMT_EV_ADV_MONITOR_ADDED, hdev, &ev, sizeof(ev), sk);
}
-void mgmt_adv_monitor_removed(struct hci_dev *hdev, u16 handle)
+static void mgmt_adv_monitor_removed(struct sock *sk, struct hci_dev *hdev,
+ __le16 handle)
{
struct mgmt_ev_adv_monitor_removed ev;
- struct mgmt_pending_cmd *cmd;
- struct sock *sk_skip = NULL;
- struct mgmt_cp_remove_adv_monitor *cp;
- cmd = pending_find(MGMT_OP_REMOVE_ADV_MONITOR, hdev);
- if (cmd) {
- cp = cmd->param;
+ ev.monitor_handle = handle;
- if (cp->monitor_handle)
- sk_skip = cmd->sk;
- }
-
- ev.monitor_handle = cpu_to_le16(handle);
-
- mgmt_event(MGMT_EV_ADV_MONITOR_REMOVED, hdev, &ev, sizeof(ev), sk_skip);
+ mgmt_event(MGMT_EV_ADV_MONITOR_REMOVED, hdev, &ev, sizeof(ev), sk);
}
static int read_adv_mon_features(struct sock *sk, struct hci_dev *hdev,
@@ -5253,7 +5236,7 @@ static void mgmt_add_adv_patterns_monitor_complete(struct hci_dev *hdev,
hci_update_passive_scan(hdev);
}
- mgmt_cmd_complete(cmd->sk, cmd->index, cmd->opcode,
+ mgmt_cmd_complete(cmd->sk, cmd->hdev->id, cmd->opcode,
mgmt_status(status), &rp, sizeof(rp));
mgmt_pending_remove(cmd);
@@ -5284,8 +5267,7 @@ static int __add_adv_patterns_monitor(struct sock *sk, struct hci_dev *hdev,
if (pending_find(MGMT_OP_SET_LE, hdev) ||
pending_find(MGMT_OP_ADD_ADV_PATTERNS_MONITOR, hdev) ||
- pending_find(MGMT_OP_ADD_ADV_PATTERNS_MONITOR_RSSI, hdev) ||
- pending_find(MGMT_OP_REMOVE_ADV_MONITOR, hdev)) {
+ pending_find(MGMT_OP_ADD_ADV_PATTERNS_MONITOR_RSSI, hdev)) {
status = MGMT_STATUS_BUSY;
goto unlock;
}
@@ -5455,8 +5437,7 @@ static void mgmt_remove_adv_monitor_complete(struct hci_dev *hdev,
struct mgmt_pending_cmd *cmd = data;
struct mgmt_cp_remove_adv_monitor *cp;
- if (status == -ECANCELED ||
- cmd != pending_find(MGMT_OP_REMOVE_ADV_MONITOR, hdev))
+ if (status == -ECANCELED)
return;
hci_dev_lock(hdev);
@@ -5465,12 +5446,14 @@ static void mgmt_remove_adv_monitor_complete(struct hci_dev *hdev,
rp.monitor_handle = cp->monitor_handle;
- if (!status)
+ if (!status) {
+ mgmt_adv_monitor_removed(cmd->sk, hdev, cp->monitor_handle);
hci_update_passive_scan(hdev);
+ }
- mgmt_cmd_complete(cmd->sk, cmd->index, cmd->opcode,
+ mgmt_cmd_complete(cmd->sk, cmd->hdev->id, cmd->opcode,
mgmt_status(status), &rp, sizeof(rp));
- mgmt_pending_remove(cmd);
+ mgmt_pending_free(cmd);
hci_dev_unlock(hdev);
bt_dev_dbg(hdev, "remove monitor %d complete, status %d",
@@ -5480,10 +5463,6 @@ static void mgmt_remove_adv_monitor_complete(struct hci_dev *hdev,
static int mgmt_remove_adv_monitor_sync(struct hci_dev *hdev, void *data)
{
struct mgmt_pending_cmd *cmd = data;
-
- if (cmd != pending_find(MGMT_OP_REMOVE_ADV_MONITOR, hdev))
- return -ECANCELED;
-
struct mgmt_cp_remove_adv_monitor *cp = cmd->param;
u16 handle = __le16_to_cpu(cp->monitor_handle);
@@ -5502,14 +5481,13 @@ static int remove_adv_monitor(struct sock *sk, struct hci_dev *hdev,
hci_dev_lock(hdev);
if (pending_find(MGMT_OP_SET_LE, hdev) ||
- pending_find(MGMT_OP_REMOVE_ADV_MONITOR, hdev) ||
pending_find(MGMT_OP_ADD_ADV_PATTERNS_MONITOR, hdev) ||
pending_find(MGMT_OP_ADD_ADV_PATTERNS_MONITOR_RSSI, hdev)) {
status = MGMT_STATUS_BUSY;
goto unlock;
}
- cmd = mgmt_pending_add(sk, MGMT_OP_REMOVE_ADV_MONITOR, hdev, data, len);
+ cmd = mgmt_pending_new(sk, MGMT_OP_REMOVE_ADV_MONITOR, hdev, data, len);
if (!cmd) {
status = MGMT_STATUS_NO_RESOURCES;
goto unlock;
@@ -5519,7 +5497,7 @@ static int remove_adv_monitor(struct sock *sk, struct hci_dev *hdev,
mgmt_remove_adv_monitor_complete);
if (err) {
- mgmt_pending_remove(cmd);
+ mgmt_pending_free(cmd);
if (err == -ENOMEM)
status = MGMT_STATUS_NO_RESOURCES;
@@ -5872,7 +5850,7 @@ static void start_discovery_complete(struct hci_dev *hdev, void *data, int err)
cmd != pending_find(MGMT_OP_START_SERVICE_DISCOVERY, hdev))
return;
- mgmt_cmd_complete(cmd->sk, cmd->index, cmd->opcode, mgmt_status(err),
+ mgmt_cmd_complete(cmd->sk, cmd->hdev->id, cmd->opcode, mgmt_status(err),
cmd->param, 1);
mgmt_pending_remove(cmd);
@@ -6110,7 +6088,7 @@ static void stop_discovery_complete(struct hci_dev *hdev, void *data, int err)
bt_dev_dbg(hdev, "err %d", err);
- mgmt_cmd_complete(cmd->sk, cmd->index, cmd->opcode, mgmt_status(err),
+ mgmt_cmd_complete(cmd->sk, cmd->hdev->id, cmd->opcode, mgmt_status(err),
cmd->param, 1);
mgmt_pending_remove(cmd);
@@ -6335,7 +6313,7 @@ static void set_advertising_complete(struct hci_dev *hdev, void *data, int err)
u8 status = mgmt_status(err);
if (status) {
- mgmt_pending_foreach(MGMT_OP_SET_ADVERTISING, hdev,
+ mgmt_pending_foreach(MGMT_OP_SET_ADVERTISING, hdev, true,
cmd_status_rsp, &status);
return;
}
@@ -6345,7 +6323,7 @@ static void set_advertising_complete(struct hci_dev *hdev, void *data, int err)
else
hci_dev_clear_flag(hdev, HCI_ADVERTISING);
- mgmt_pending_foreach(MGMT_OP_SET_ADVERTISING, hdev, settings_rsp,
+ mgmt_pending_foreach(MGMT_OP_SET_ADVERTISING, hdev, true, settings_rsp,
&match);
new_settings(hdev, match.sk);
@@ -6689,7 +6667,7 @@ static void set_bredr_complete(struct hci_dev *hdev, void *data, int err)
*/
hci_dev_clear_flag(hdev, HCI_BREDR_ENABLED);
- mgmt_cmd_status(cmd->sk, cmd->index, cmd->opcode, mgmt_err);
+ mgmt_cmd_status(cmd->sk, cmd->hdev->id, cmd->opcode, mgmt_err);
} else {
send_settings_rsp(cmd->sk, MGMT_OP_SET_BREDR, hdev);
new_settings(hdev, cmd->sk);
@@ -6826,7 +6804,7 @@ static void set_secure_conn_complete(struct hci_dev *hdev, void *data, int err)
if (err) {
u8 mgmt_err = mgmt_status(err);
- mgmt_cmd_status(cmd->sk, cmd->index, cmd->opcode, mgmt_err);
+ mgmt_cmd_status(cmd->sk, cmd->hdev->id, cmd->opcode, mgmt_err);
goto done;
}
@@ -7273,7 +7251,7 @@ static void get_conn_info_complete(struct hci_dev *hdev, void *data, int err)
rp.max_tx_power = HCI_TX_POWER_INVALID;
}
- mgmt_cmd_complete(cmd->sk, cmd->index, MGMT_OP_GET_CONN_INFO, status,
+ mgmt_cmd_complete(cmd->sk, cmd->hdev->id, MGMT_OP_GET_CONN_INFO, status,
&rp, sizeof(rp));
mgmt_pending_free(cmd);
@@ -7433,7 +7411,7 @@ static void get_clock_info_complete(struct hci_dev *hdev, void *data, int err)
}
complete:
- mgmt_cmd_complete(cmd->sk, cmd->index, cmd->opcode, status, &rp,
+ mgmt_cmd_complete(cmd->sk, cmd->hdev->id, cmd->opcode, status, &rp,
sizeof(rp));
mgmt_pending_free(cmd);
@@ -7605,11 +7583,16 @@ static void add_device_complete(struct hci_dev *hdev, void *data, int err)
struct mgmt_cp_add_device *cp = cmd->param;
if (!err) {
+ struct hci_conn_params *params;
+
+ params = hci_conn_params_lookup(hdev, &cp->addr.bdaddr,
+ le_addr_type(cp->addr.type));
+
device_added(cmd->sk, hdev, &cp->addr.bdaddr, cp->addr.type,
cp->action);
device_flags_changed(NULL, hdev, &cp->addr.bdaddr,
cp->addr.type, hdev->conn_flags,
- PTR_UINT(cmd->user_data));
+ params ? params->flags : 0);
}
mgmt_cmd_complete(cmd->sk, hdev->id, MGMT_OP_ADD_DEVICE,
@@ -7712,8 +7695,6 @@ static int add_device(struct sock *sk, struct hci_dev *hdev,
goto unlock;
}
- cmd->user_data = UINT_PTR(current_flags);
-
err = hci_cmd_sync_queue(hdev, add_device_sync, cmd,
add_device_complete);
if (err < 0) {
@@ -8634,10 +8615,10 @@ static void add_advertising_complete(struct hci_dev *hdev, void *data, int err)
rp.instance = cp->instance;
if (err)
- mgmt_cmd_status(cmd->sk, cmd->index, cmd->opcode,
+ mgmt_cmd_status(cmd->sk, cmd->hdev->id, cmd->opcode,
mgmt_status(err));
else
- mgmt_cmd_complete(cmd->sk, cmd->index, cmd->opcode,
+ mgmt_cmd_complete(cmd->sk, cmd->hdev->id, cmd->opcode,
mgmt_status(err), &rp, sizeof(rp));
add_adv_complete(hdev, cmd->sk, cp->instance, err);
@@ -8825,10 +8806,10 @@ static void add_ext_adv_params_complete(struct hci_dev *hdev, void *data,
hci_remove_adv_instance(hdev, cp->instance);
- mgmt_cmd_status(cmd->sk, cmd->index, cmd->opcode,
+ mgmt_cmd_status(cmd->sk, cmd->hdev->id, cmd->opcode,
mgmt_status(err));
} else {
- mgmt_cmd_complete(cmd->sk, cmd->index, cmd->opcode,
+ mgmt_cmd_complete(cmd->sk, cmd->hdev->id, cmd->opcode,
mgmt_status(err), &rp, sizeof(rp));
}
@@ -8976,10 +8957,10 @@ static void add_ext_adv_data_complete(struct hci_dev *hdev, void *data, int err)
rp.instance = cp->instance;
if (err)
- mgmt_cmd_status(cmd->sk, cmd->index, cmd->opcode,
+ mgmt_cmd_status(cmd->sk, cmd->hdev->id, cmd->opcode,
mgmt_status(err));
else
- mgmt_cmd_complete(cmd->sk, cmd->index, cmd->opcode,
+ mgmt_cmd_complete(cmd->sk, cmd->hdev->id, cmd->opcode,
mgmt_status(err), &rp, sizeof(rp));
mgmt_pending_free(cmd);
@@ -9138,10 +9119,10 @@ static void remove_advertising_complete(struct hci_dev *hdev, void *data,
rp.instance = cp->instance;
if (err)
- mgmt_cmd_status(cmd->sk, cmd->index, cmd->opcode,
+ mgmt_cmd_status(cmd->sk, cmd->hdev->id, cmd->opcode,
mgmt_status(err));
else
- mgmt_cmd_complete(cmd->sk, cmd->index, cmd->opcode,
+ mgmt_cmd_complete(cmd->sk, cmd->hdev->id, cmd->opcode,
MGMT_STATUS_SUCCESS, &rp, sizeof(rp));
mgmt_pending_free(cmd);
@@ -9412,7 +9393,7 @@ void mgmt_index_removed(struct hci_dev *hdev)
if (test_bit(HCI_QUIRK_RAW_DEVICE, &hdev->quirks))
return;
- mgmt_pending_foreach(0, hdev, cmd_complete_rsp, &match);
+ mgmt_pending_foreach(0, hdev, true, cmd_complete_rsp, &match);
if (hci_dev_test_flag(hdev, HCI_UNCONFIGURED)) {
mgmt_index_event(MGMT_EV_UNCONF_INDEX_REMOVED, hdev, NULL, 0,
@@ -9450,7 +9431,8 @@ void mgmt_power_on(struct hci_dev *hdev, int err)
hci_update_passive_scan(hdev);
}
- mgmt_pending_foreach(MGMT_OP_SET_POWERED, hdev, settings_rsp, &match);
+ mgmt_pending_foreach(MGMT_OP_SET_POWERED, hdev, true, settings_rsp,
+ &match);
new_settings(hdev, match.sk);
@@ -9465,7 +9447,8 @@ void __mgmt_power_off(struct hci_dev *hdev)
struct cmd_lookup match = { NULL, hdev };
u8 zero_cod[] = { 0, 0, 0 };
- mgmt_pending_foreach(MGMT_OP_SET_POWERED, hdev, settings_rsp, &match);
+ mgmt_pending_foreach(MGMT_OP_SET_POWERED, hdev, true, settings_rsp,
+ &match);
/* If the power off is because of hdev unregistration let
* use the appropriate INVALID_INDEX status. Otherwise use
@@ -9479,7 +9462,7 @@ void __mgmt_power_off(struct hci_dev *hdev)
else
match.mgmt_status = MGMT_STATUS_NOT_POWERED;
- mgmt_pending_foreach(0, hdev, cmd_complete_rsp, &match);
+ mgmt_pending_foreach(0, hdev, true, cmd_complete_rsp, &match);
if (memcmp(hdev->dev_class, zero_cod, sizeof(zero_cod)) != 0) {
mgmt_limited_event(MGMT_EV_CLASS_OF_DEV_CHANGED, hdev,
@@ -9720,7 +9703,6 @@ static void unpair_device_rsp(struct mgmt_pending_cmd *cmd, void *data)
device_unpaired(hdev, &cp->addr.bdaddr, cp->addr.type, cmd->sk);
cmd->cmd_complete(cmd, 0);
- mgmt_pending_remove(cmd);
}
bool mgmt_powering_down(struct hci_dev *hdev)
@@ -9773,8 +9755,8 @@ void mgmt_disconnect_failed(struct hci_dev *hdev, bdaddr_t *bdaddr,
struct mgmt_cp_disconnect *cp;
struct mgmt_pending_cmd *cmd;
- mgmt_pending_foreach(MGMT_OP_UNPAIR_DEVICE, hdev, unpair_device_rsp,
- hdev);
+ mgmt_pending_foreach(MGMT_OP_UNPAIR_DEVICE, hdev, true,
+ unpair_device_rsp, hdev);
cmd = pending_find(MGMT_OP_DISCONNECT, hdev);
if (!cmd)
@@ -9967,7 +9949,7 @@ void mgmt_auth_enable_complete(struct hci_dev *hdev, u8 status)
if (status) {
u8 mgmt_err = mgmt_status(status);
- mgmt_pending_foreach(MGMT_OP_SET_LINK_SECURITY, hdev,
+ mgmt_pending_foreach(MGMT_OP_SET_LINK_SECURITY, hdev, true,
cmd_status_rsp, &mgmt_err);
return;
}
@@ -9977,8 +9959,8 @@ void mgmt_auth_enable_complete(struct hci_dev *hdev, u8 status)
else
changed = hci_dev_test_and_clear_flag(hdev, HCI_LINK_SECURITY);
- mgmt_pending_foreach(MGMT_OP_SET_LINK_SECURITY, hdev, settings_rsp,
- &match);
+ mgmt_pending_foreach(MGMT_OP_SET_LINK_SECURITY, hdev, true,
+ settings_rsp, &match);
if (changed)
new_settings(hdev, match.sk);
@@ -10002,9 +9984,12 @@ void mgmt_set_class_of_dev_complete(struct hci_dev *hdev, u8 *dev_class,
{
struct cmd_lookup match = { NULL, hdev, mgmt_status(status) };
- mgmt_pending_foreach(MGMT_OP_SET_DEV_CLASS, hdev, sk_lookup, &match);
- mgmt_pending_foreach(MGMT_OP_ADD_UUID, hdev, sk_lookup, &match);
- mgmt_pending_foreach(MGMT_OP_REMOVE_UUID, hdev, sk_lookup, &match);
+ mgmt_pending_foreach(MGMT_OP_SET_DEV_CLASS, hdev, false, sk_lookup,
+ &match);
+ mgmt_pending_foreach(MGMT_OP_ADD_UUID, hdev, false, sk_lookup,
+ &match);
+ mgmt_pending_foreach(MGMT_OP_REMOVE_UUID, hdev, false, sk_lookup,
+ &match);
if (!status) {
mgmt_limited_event(MGMT_EV_CLASS_OF_DEV_CHANGED, hdev, dev_class,
diff --git a/net/bluetooth/mgmt_util.c b/net/bluetooth/mgmt_util.c
index 0115f783bde8..4ba500c377a4 100644
--- a/net/bluetooth/mgmt_util.c
+++ b/net/bluetooth/mgmt_util.c
@@ -217,47 +217,47 @@ int mgmt_cmd_complete(struct sock *sk, u16 index, u16 cmd, u8 status,
struct mgmt_pending_cmd *mgmt_pending_find(unsigned short channel, u16 opcode,
struct hci_dev *hdev)
{
- struct mgmt_pending_cmd *cmd;
+ struct mgmt_pending_cmd *cmd, *tmp;
+
+ mutex_lock(&hdev->mgmt_pending_lock);
- list_for_each_entry(cmd, &hdev->mgmt_pending, list) {
+ list_for_each_entry_safe(cmd, tmp, &hdev->mgmt_pending, list) {
if (hci_sock_get_channel(cmd->sk) != channel)
continue;
- if (cmd->opcode == opcode)
- return cmd;
- }
- return NULL;
-}
-
-struct mgmt_pending_cmd *mgmt_pending_find_data(unsigned short channel,
- u16 opcode,
- struct hci_dev *hdev,
- const void *data)
-{
- struct mgmt_pending_cmd *cmd;
-
- list_for_each_entry(cmd, &hdev->mgmt_pending, list) {
- if (cmd->user_data != data)
- continue;
- if (cmd->opcode == opcode)
+ if (cmd->opcode == opcode) {
+ mutex_unlock(&hdev->mgmt_pending_lock);
return cmd;
+ }
}
+ mutex_unlock(&hdev->mgmt_pending_lock);
+
return NULL;
}
-void mgmt_pending_foreach(u16 opcode, struct hci_dev *hdev,
+void mgmt_pending_foreach(u16 opcode, struct hci_dev *hdev, bool remove,
void (*cb)(struct mgmt_pending_cmd *cmd, void *data),
void *data)
{
struct mgmt_pending_cmd *cmd, *tmp;
+ mutex_lock(&hdev->mgmt_pending_lock);
+
list_for_each_entry_safe(cmd, tmp, &hdev->mgmt_pending, list) {
if (opcode > 0 && cmd->opcode != opcode)
continue;
+ if (remove)
+ list_del(&cmd->list);
+
cb(cmd, data);
+
+ if (remove)
+ mgmt_pending_free(cmd);
}
+
+ mutex_unlock(&hdev->mgmt_pending_lock);
}
struct mgmt_pending_cmd *mgmt_pending_new(struct sock *sk, u16 opcode,
@@ -271,7 +271,7 @@ struct mgmt_pending_cmd *mgmt_pending_new(struct sock *sk, u16 opcode,
return NULL;
cmd->opcode = opcode;
- cmd->index = hdev->id;
+ cmd->hdev = hdev;
cmd->param = kmemdup(data, len, GFP_KERNEL);
if (!cmd->param) {
@@ -297,7 +297,9 @@ struct mgmt_pending_cmd *mgmt_pending_add(struct sock *sk, u16 opcode,
if (!cmd)
return NULL;
+ mutex_lock(&hdev->mgmt_pending_lock);
list_add_tail(&cmd->list, &hdev->mgmt_pending);
+ mutex_unlock(&hdev->mgmt_pending_lock);
return cmd;
}
@@ -311,7 +313,10 @@ void mgmt_pending_free(struct mgmt_pending_cmd *cmd)
void mgmt_pending_remove(struct mgmt_pending_cmd *cmd)
{
+ mutex_lock(&cmd->hdev->mgmt_pending_lock);
list_del(&cmd->list);
+ mutex_unlock(&cmd->hdev->mgmt_pending_lock);
+
mgmt_pending_free(cmd);
}
@@ -321,7 +326,7 @@ void mgmt_mesh_foreach(struct hci_dev *hdev,
{
struct mgmt_mesh_tx *mesh_tx, *tmp;
- list_for_each_entry_safe(mesh_tx, tmp, &hdev->mgmt_pending, list) {
+ list_for_each_entry_safe(mesh_tx, tmp, &hdev->mesh_pending, list) {
if (!sk || mesh_tx->sk == sk)
cb(mesh_tx, data);
}
diff --git a/net/bluetooth/mgmt_util.h b/net/bluetooth/mgmt_util.h
index bdf978605d5a..024e51dd6937 100644
--- a/net/bluetooth/mgmt_util.h
+++ b/net/bluetooth/mgmt_util.h
@@ -33,7 +33,7 @@ struct mgmt_mesh_tx {
struct mgmt_pending_cmd {
struct list_head list;
u16 opcode;
- int index;
+ struct hci_dev *hdev;
void *param;
size_t param_len;
struct sock *sk;
@@ -54,11 +54,7 @@ int mgmt_cmd_complete(struct sock *sk, u16 index, u16 cmd, u8 status,
struct mgmt_pending_cmd *mgmt_pending_find(unsigned short channel, u16 opcode,
struct hci_dev *hdev);
-struct mgmt_pending_cmd *mgmt_pending_find_data(unsigned short channel,
- u16 opcode,
- struct hci_dev *hdev,
- const void *data);
-void mgmt_pending_foreach(u16 opcode, struct hci_dev *hdev,
+void mgmt_pending_foreach(u16 opcode, struct hci_dev *hdev, bool remove,
void (*cb)(struct mgmt_pending_cmd *cmd, void *data),
void *data);
struct mgmt_pending_cmd *mgmt_pending_add(struct sock *sk, u16 opcode,
diff --git a/net/bridge/br_mdb.c b/net/bridge/br_mdb.c
index 7305f5f8215c..96bea0c8408f 100644
--- a/net/bridge/br_mdb.c
+++ b/net/bridge/br_mdb.c
@@ -1030,7 +1030,7 @@ static int br_mdb_add_group(const struct br_mdb_config *cfg,
/* host join */
if (!port) {
- if (mp->host_joined) {
+ if (mp->host_joined && !(cfg->nlflags & NLM_F_REPLACE)) {
NL_SET_ERR_MSG_MOD(extack, "Group is already joined by host");
return -EEXIST;
}
diff --git a/net/bridge/br_nf_core.c b/net/bridge/br_nf_core.c
index 98aea5485aae..a8c67035e23c 100644
--- a/net/bridge/br_nf_core.c
+++ b/net/bridge/br_nf_core.c
@@ -65,17 +65,14 @@ static struct dst_ops fake_dst_ops = {
* ipt_REJECT needs it. Future netfilter modules might
* require us to fill additional fields.
*/
-static const u32 br_dst_default_metrics[RTAX_MAX] = {
- [RTAX_MTU - 1] = 1500,
-};
-
void br_netfilter_rtable_init(struct net_bridge *br)
{
struct rtable *rt = &br->fake_rtable;
rcuref_init(&rt->dst.__rcuref, 1);
rt->dst.dev = br->dev;
- dst_init_metrics(&rt->dst, br_dst_default_metrics, true);
+ dst_init_metrics(&rt->dst, br->metrics, false);
+ dst_metric_set(&rt->dst, RTAX_MTU, br->dev->mtu);
rt->dst.flags = DST_NOXFRM | DST_FAKE_RTABLE;
rt->dst.ops = &fake_dst_ops;
}
diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h
index 72d80fd943a8..9197b511e459 100644
--- a/net/bridge/br_private.h
+++ b/net/bridge/br_private.h
@@ -502,6 +502,7 @@ struct net_bridge {
struct rtable fake_rtable;
struct rt6_info fake_rt6_info;
};
+ u32 metrics[RTAX_MAX];
#endif
u16 group_fwd_mask;
u16 group_fwd_mask_required;
diff --git a/net/bridge/netfilter/nf_conntrack_bridge.c b/net/bridge/netfilter/nf_conntrack_bridge.c
index 4fbfbafdfa02..4b4a396d9722 100644
--- a/net/bridge/netfilter/nf_conntrack_bridge.c
+++ b/net/bridge/netfilter/nf_conntrack_bridge.c
@@ -60,19 +60,19 @@ static int nf_br_ip_fragment(struct net *net, struct sock *sk,
struct ip_fraglist_iter iter;
struct sk_buff *frag;
- if (first_len - hlen > mtu ||
- skb_headroom(skb) < ll_rs)
+ if (first_len - hlen > mtu)
goto blackhole;
- if (skb_cloned(skb))
+ if (skb_cloned(skb) ||
+ skb_headroom(skb) < ll_rs)
goto slow_path;
skb_walk_frags(skb, frag) {
- if (frag->len > mtu ||
- skb_headroom(frag) < hlen + ll_rs)
+ if (frag->len > mtu)
goto blackhole;
- if (skb_shared(frag))
+ if (skb_shared(frag) ||
+ skb_headroom(frag) < hlen + ll_rs)
goto slow_path;
}
diff --git a/net/can/bcm.c b/net/can/bcm.c
index a1f5db0fd5d4..75653584f31b 100644
--- a/net/can/bcm.c
+++ b/net/can/bcm.c
@@ -58,6 +58,7 @@
#include <linux/can/skb.h>
#include <linux/can/bcm.h>
#include <linux/slab.h>
+#include <linux/spinlock.h>
#include <net/sock.h>
#include <net/net_namespace.h>
@@ -120,6 +121,7 @@ struct bcm_op {
struct canfd_frame last_sframe;
struct sock *sk;
struct net_device *rx_reg_dev;
+ spinlock_t bcm_tx_lock; /* protect currframe/count in runtime updates */
};
struct bcm_sock {
@@ -205,7 +207,9 @@ static int bcm_proc_show(struct seq_file *m, void *v)
seq_printf(m, " / bound %s", bcm_proc_getifname(net, ifname, bo->ifindex));
seq_printf(m, " <<<\n");
- list_for_each_entry(op, &bo->rx_ops, list) {
+ rcu_read_lock();
+
+ list_for_each_entry_rcu(op, &bo->rx_ops, list) {
unsigned long reduction;
@@ -261,6 +265,9 @@ static int bcm_proc_show(struct seq_file *m, void *v)
seq_printf(m, "# sent %ld\n", op->frames_abs);
}
seq_putc(m, '\n');
+
+ rcu_read_unlock();
+
return 0;
}
#endif /* CONFIG_PROC_FS */
@@ -273,13 +280,18 @@ static void bcm_can_tx(struct bcm_op *op)
{
struct sk_buff *skb;
struct net_device *dev;
- struct canfd_frame *cf = op->frames + op->cfsiz * op->currframe;
+ struct canfd_frame *cf;
int err;
/* no target device? => exit */
if (!op->ifindex)
return;
+ /* read currframe under lock protection */
+ spin_lock_bh(&op->bcm_tx_lock);
+ cf = op->frames + op->cfsiz * op->currframe;
+ spin_unlock_bh(&op->bcm_tx_lock);
+
dev = dev_get_by_index(sock_net(op->sk), op->ifindex);
if (!dev) {
/* RFC: should this bcm_op remove itself here? */
@@ -300,6 +312,10 @@ static void bcm_can_tx(struct bcm_op *op)
skb->dev = dev;
can_skb_set_owner(skb, op->sk);
err = can_send(skb, 1);
+
+ /* update currframe and count under lock protection */
+ spin_lock_bh(&op->bcm_tx_lock);
+
if (!err)
op->frames_abs++;
@@ -308,6 +324,11 @@ static void bcm_can_tx(struct bcm_op *op)
/* reached last frame? */
if (op->currframe >= op->nframes)
op->currframe = 0;
+
+ if (op->count > 0)
+ op->count--;
+
+ spin_unlock_bh(&op->bcm_tx_lock);
out:
dev_put(dev);
}
@@ -404,7 +425,7 @@ static enum hrtimer_restart bcm_tx_timeout_handler(struct hrtimer *hrtimer)
struct bcm_msg_head msg_head;
if (op->kt_ival1 && (op->count > 0)) {
- op->count--;
+ bcm_can_tx(op);
if (!op->count && (op->flags & TX_COUNTEVT)) {
/* create notification to user */
@@ -419,7 +440,6 @@ static enum hrtimer_restart bcm_tx_timeout_handler(struct hrtimer *hrtimer)
bcm_send_to_user(op, &msg_head, NULL, 0);
}
- bcm_can_tx(op);
} else if (op->kt_ival2) {
bcm_can_tx(op);
@@ -801,7 +821,7 @@ static int bcm_delete_rx_op(struct list_head *ops, struct bcm_msg_head *mh,
REGMASK(op->can_id),
bcm_rx_handler, op);
- list_del(&op->list);
+ list_del_rcu(&op->list);
bcm_remove_op(op);
return 1; /* done */
}
@@ -821,7 +841,7 @@ static int bcm_delete_tx_op(struct list_head *ops, struct bcm_msg_head *mh,
list_for_each_entry_safe(op, n, ops, list) {
if ((op->can_id == mh->can_id) && (op->ifindex == ifindex) &&
(op->flags & CAN_FD_FRAME) == (mh->flags & CAN_FD_FRAME)) {
- list_del(&op->list);
+ list_del_rcu(&op->list);
bcm_remove_op(op);
return 1; /* done */
}
@@ -914,6 +934,27 @@ static int bcm_tx_setup(struct bcm_msg_head *msg_head, struct msghdr *msg,
}
op->flags = msg_head->flags;
+ /* only lock for unlikely count/nframes/currframe changes */
+ if (op->nframes != msg_head->nframes ||
+ op->flags & TX_RESET_MULTI_IDX ||
+ op->flags & SETTIMER) {
+
+ spin_lock_bh(&op->bcm_tx_lock);
+
+ if (op->nframes != msg_head->nframes ||
+ op->flags & TX_RESET_MULTI_IDX) {
+ /* potentially update changed nframes */
+ op->nframes = msg_head->nframes;
+ /* restart multiple frame transmission */
+ op->currframe = 0;
+ }
+
+ if (op->flags & SETTIMER)
+ op->count = msg_head->count;
+
+ spin_unlock_bh(&op->bcm_tx_lock);
+ }
+
} else {
/* insert new BCM operation for the given can_id */
@@ -921,9 +962,14 @@ static int bcm_tx_setup(struct bcm_msg_head *msg_head, struct msghdr *msg,
if (!op)
return -ENOMEM;
+ spin_lock_init(&op->bcm_tx_lock);
op->can_id = msg_head->can_id;
op->cfsiz = CFSIZ(msg_head->flags);
op->flags = msg_head->flags;
+ op->nframes = msg_head->nframes;
+
+ if (op->flags & SETTIMER)
+ op->count = msg_head->count;
/* create array for CAN frames and copy the data */
if (msg_head->nframes > 1) {
@@ -982,22 +1028,8 @@ static int bcm_tx_setup(struct bcm_msg_head *msg_head, struct msghdr *msg,
} /* if ((op = bcm_find_op(&bo->tx_ops, msg_head->can_id, ifindex))) */
- if (op->nframes != msg_head->nframes) {
- op->nframes = msg_head->nframes;
- /* start multiple frame transmission with index 0 */
- op->currframe = 0;
- }
-
- /* check flags */
-
- if (op->flags & TX_RESET_MULTI_IDX) {
- /* start multiple frame transmission with index 0 */
- op->currframe = 0;
- }
-
if (op->flags & SETTIMER) {
/* set timer values */
- op->count = msg_head->count;
op->ival1 = msg_head->ival1;
op->ival2 = msg_head->ival2;
op->kt_ival1 = bcm_timeval_to_ktime(msg_head->ival1);
@@ -1014,11 +1046,8 @@ static int bcm_tx_setup(struct bcm_msg_head *msg_head, struct msghdr *msg,
op->flags |= TX_ANNOUNCE;
}
- if (op->flags & TX_ANNOUNCE) {
+ if (op->flags & TX_ANNOUNCE)
bcm_can_tx(op);
- if (op->count)
- op->count--;
- }
if (op->flags & STARTTIMER)
bcm_tx_start_timer(op);
@@ -1234,7 +1263,7 @@ static int bcm_rx_setup(struct bcm_msg_head *msg_head, struct msghdr *msg,
bcm_rx_handler, op, "bcm", sk);
if (err) {
/* this bcm rx op is broken -> remove it */
- list_del(&op->list);
+ list_del_rcu(&op->list);
bcm_remove_op(op);
return err;
}
diff --git a/net/core/pktgen.c b/net/core/pktgen.c
index 359e24c3f22c..6afea369ca21 100644
--- a/net/core/pktgen.c
+++ b/net/core/pktgen.c
@@ -897,6 +897,10 @@ static ssize_t get_labels(const char __user *buffer, struct pktgen_dev *pkt_dev)
pkt_dev->nr_labels = 0;
do {
__u32 tmp;
+
+ if (n >= MAX_MPLS_LABELS)
+ return -E2BIG;
+
len = hex32_arg(&buffer[i], 8, &tmp);
if (len <= 0)
return len;
@@ -908,8 +912,6 @@ static ssize_t get_labels(const char __user *buffer, struct pktgen_dev *pkt_dev)
return -EFAULT;
i++;
n++;
- if (n >= MAX_MPLS_LABELS)
- return -E2BIG;
} while (c == ',');
pkt_dev->nr_labels = n;
@@ -1875,8 +1877,8 @@ static ssize_t pktgen_thread_write(struct file *file,
i = len;
/* Read variable name */
-
- len = strn_len(&user_buffer[i], sizeof(name) - 1);
+ max = min(sizeof(name) - 1, count - i);
+ len = strn_len(&user_buffer[i], max);
if (len < 0)
return len;
@@ -1906,7 +1908,8 @@ static ssize_t pktgen_thread_write(struct file *file,
if (!strcmp(name, "add_device")) {
char f[32];
memset(f, 0, 32);
- len = strn_len(&user_buffer[i], sizeof(f) - 1);
+ max = min(sizeof(f) - 1, count - i);
+ len = strn_len(&user_buffer[i], max);
if (len < 0) {
ret = len;
goto out;
diff --git a/net/core/scm.c b/net/core/scm.c
index 737917c7ac62..431bfb3ea392 100644
--- a/net/core/scm.c
+++ b/net/core/scm.c
@@ -36,6 +36,7 @@
#include <net/compat.h>
#include <net/scm.h>
#include <net/cls_cgroup.h>
+#include <net/af_unix.h>
/*
@@ -85,8 +86,15 @@ static int scm_fp_copy(struct cmsghdr *cmsg, struct scm_fp_list **fplp)
return -ENOMEM;
*fplp = fpl;
fpl->count = 0;
+ fpl->count_unix = 0;
fpl->max = SCM_MAX_FD;
fpl->user = NULL;
+#if IS_ENABLED(CONFIG_UNIX)
+ fpl->inflight = false;
+ fpl->dead = false;
+ fpl->edges = NULL;
+ INIT_LIST_HEAD(&fpl->vertices);
+#endif
}
fpp = &fpl->fp[fpl->count];
@@ -109,6 +117,9 @@ static int scm_fp_copy(struct cmsghdr *cmsg, struct scm_fp_list **fplp)
fput(file);
return -EINVAL;
}
+ if (unix_get_socket(file))
+ fpl->count_unix++;
+
*fpp++ = file;
fpl->count++;
}
@@ -371,8 +382,14 @@ struct scm_fp_list *scm_fp_dup(struct scm_fp_list *fpl)
if (new_fpl) {
for (i = 0; i < fpl->count; i++)
get_file(fpl->fp[i]);
+
new_fpl->max = new_fpl->count;
new_fpl->user = get_uid(fpl->user);
+#if IS_ENABLED(CONFIG_UNIX)
+ new_fpl->inflight = false;
+ new_fpl->edges = NULL;
+ INIT_LIST_HEAD(&new_fpl->vertices);
+#endif
}
return new_fpl;
}
diff --git a/net/core/skmsg.c b/net/core/skmsg.c
index b9b941c487c8..2076db464e93 100644
--- a/net/core/skmsg.c
+++ b/net/core/skmsg.c
@@ -529,16 +529,22 @@ static int sk_psock_skb_ingress_enqueue(struct sk_buff *skb,
u32 off, u32 len,
struct sk_psock *psock,
struct sock *sk,
- struct sk_msg *msg)
+ struct sk_msg *msg,
+ bool take_ref)
{
int num_sge, copied;
+ /* skb_to_sgvec will fail when the total number of fragments in
+ * frag_list and frags exceeds MAX_MSG_FRAGS. For example, the
+ * caller may aggregate multiple skbs.
+ */
num_sge = skb_to_sgvec(skb, msg->sg.data, off, len);
if (num_sge < 0) {
/* skb linearize may fail with ENOMEM, but lets simply try again
* later if this happens. Under memory pressure we don't want to
* drop the skb. We need to linearize the skb so that the mapping
* in skb_to_sgvec can not error.
+ * Note that skb_linearize requires the skb not to be shared.
*/
if (skb_linearize(skb))
return -EAGAIN;
@@ -555,7 +561,7 @@ static int sk_psock_skb_ingress_enqueue(struct sk_buff *skb,
msg->sg.start = 0;
msg->sg.size = copied;
msg->sg.end = num_sge;
- msg->skb = skb;
+ msg->skb = take_ref ? skb_get(skb) : skb;
sk_psock_queue_msg(psock, msg);
sk_psock_data_ready(sk, psock);
@@ -563,7 +569,7 @@ static int sk_psock_skb_ingress_enqueue(struct sk_buff *skb,
}
static int sk_psock_skb_ingress_self(struct sk_psock *psock, struct sk_buff *skb,
- u32 off, u32 len);
+ u32 off, u32 len, bool take_ref);
static int sk_psock_skb_ingress(struct sk_psock *psock, struct sk_buff *skb,
u32 off, u32 len)
@@ -577,7 +583,7 @@ static int sk_psock_skb_ingress(struct sk_psock *psock, struct sk_buff *skb,
* correctly.
*/
if (unlikely(skb->sk == sk))
- return sk_psock_skb_ingress_self(psock, skb, off, len);
+ return sk_psock_skb_ingress_self(psock, skb, off, len, true);
msg = sk_psock_create_ingress_msg(sk, skb);
if (!msg)
return -EAGAIN;
@@ -589,7 +595,7 @@ static int sk_psock_skb_ingress(struct sk_psock *psock, struct sk_buff *skb,
* into user buffers.
*/
skb_set_owner_r(skb, sk);
- err = sk_psock_skb_ingress_enqueue(skb, off, len, psock, sk, msg);
+ err = sk_psock_skb_ingress_enqueue(skb, off, len, psock, sk, msg, true);
if (err < 0)
kfree(msg);
return err;
@@ -600,7 +606,7 @@ static int sk_psock_skb_ingress(struct sk_psock *psock, struct sk_buff *skb,
* because the skb is already accounted for here.
*/
static int sk_psock_skb_ingress_self(struct sk_psock *psock, struct sk_buff *skb,
- u32 off, u32 len)
+ u32 off, u32 len, bool take_ref)
{
struct sk_msg *msg = alloc_sk_msg(GFP_ATOMIC);
struct sock *sk = psock->sk;
@@ -609,7 +615,7 @@ static int sk_psock_skb_ingress_self(struct sk_psock *psock, struct sk_buff *skb
if (unlikely(!msg))
return -EAGAIN;
skb_set_owner_r(skb, sk);
- err = sk_psock_skb_ingress_enqueue(skb, off, len, psock, sk, msg);
+ err = sk_psock_skb_ingress_enqueue(skb, off, len, psock, sk, msg, take_ref);
if (err < 0)
kfree(msg);
return err;
@@ -618,18 +624,13 @@ static int sk_psock_skb_ingress_self(struct sk_psock *psock, struct sk_buff *skb
static int sk_psock_handle_skb(struct sk_psock *psock, struct sk_buff *skb,
u32 off, u32 len, bool ingress)
{
- int err = 0;
-
if (!ingress) {
if (!sock_writeable(psock->sk))
return -EAGAIN;
return skb_send_sock(psock->sk, skb, off, len);
}
- skb_get(skb);
- err = sk_psock_skb_ingress(psock, skb, off, len);
- if (err < 0)
- kfree_skb(skb);
- return err;
+
+ return sk_psock_skb_ingress(psock, skb, off, len);
}
static void sk_psock_skb_state(struct sk_psock *psock,
@@ -654,12 +655,14 @@ static void sk_psock_backlog(struct work_struct *work)
bool ingress;
int ret;
+ /* Increment the psock refcnt to synchronize with close(fd) path in
+ * sock_map_close(), ensuring we wait for backlog thread completion
+ * before sk_socket freed. If refcnt increment fails, it indicates
+ * sock_map_close() completed with sk_socket potentially already freed.
+ */
+ if (!sk_psock_get(psock->sk))
+ return;
mutex_lock(&psock->work_mutex);
- if (unlikely(state->len)) {
- len = state->len;
- off = state->off;
- }
-
while ((skb = skb_peek(&psock->ingress_skb))) {
len = skb->len;
off = 0;
@@ -669,6 +672,13 @@ static void sk_psock_backlog(struct work_struct *work)
off = stm->offset;
len = stm->full_len;
}
+
+ /* Resume processing from previous partial state */
+ if (unlikely(state->len)) {
+ len = state->len;
+ off = state->off;
+ }
+
ingress = skb_bpf_ingress(skb);
skb_bpf_redirect_clear(skb);
do {
@@ -696,11 +706,14 @@ static void sk_psock_backlog(struct work_struct *work)
len -= ret;
} while (len);
+ /* The entire skb sent, clear state */
+ sk_psock_skb_state(psock, state, 0, 0);
skb = skb_dequeue(&psock->ingress_skb);
kfree_skb(skb);
}
end:
mutex_unlock(&psock->work_mutex);
+ sk_psock_put(psock->sk, psock);
}
struct sk_psock *sk_psock_init(struct sock *sk, int node)
@@ -1013,7 +1026,7 @@ static int sk_psock_verdict_apply(struct sk_psock *psock, struct sk_buff *skb,
off = stm->offset;
len = stm->full_len;
}
- err = sk_psock_skb_ingress_self(psock, skb, off, len);
+ err = sk_psock_skb_ingress_self(psock, skb, off, len, false);
}
if (err < 0) {
spin_lock_bh(&psock->ingress_lock);
diff --git a/net/dsa/tag_brcm.c b/net/dsa/tag_brcm.c
index cacdafb41200..146c1dbd15a9 100644
--- a/net/dsa/tag_brcm.c
+++ b/net/dsa/tag_brcm.c
@@ -257,7 +257,7 @@ static struct sk_buff *brcm_leg_tag_rcv(struct sk_buff *skb,
int source_port;
u8 *brcm_tag;
- if (unlikely(!pskb_may_pull(skb, BRCM_LEG_PORT_ID)))
+ if (unlikely(!pskb_may_pull(skb, BRCM_LEG_TAG_LEN + VLAN_HLEN)))
return NULL;
brcm_tag = dsa_etype_header_pos_rx(skb);
diff --git a/net/ipv4/esp4.c b/net/ipv4/esp4.c
index eeace9b509ce..49fd664f50fc 100644
--- a/net/ipv4/esp4.c
+++ b/net/ipv4/esp4.c
@@ -118,47 +118,16 @@ static void esp_ssg_unref(struct xfrm_state *x, void *tmp, struct sk_buff *skb)
}
#ifdef CONFIG_INET_ESPINTCP
-struct esp_tcp_sk {
- struct sock *sk;
- struct rcu_head rcu;
-};
-
-static void esp_free_tcp_sk(struct rcu_head *head)
-{
- struct esp_tcp_sk *esk = container_of(head, struct esp_tcp_sk, rcu);
-
- sock_put(esk->sk);
- kfree(esk);
-}
-
static struct sock *esp_find_tcp_sk(struct xfrm_state *x)
{
struct xfrm_encap_tmpl *encap = x->encap;
struct net *net = xs_net(x);
- struct esp_tcp_sk *esk;
__be16 sport, dport;
- struct sock *nsk;
struct sock *sk;
- sk = rcu_dereference(x->encap_sk);
- if (sk && sk->sk_state == TCP_ESTABLISHED)
- return sk;
-
spin_lock_bh(&x->lock);
sport = encap->encap_sport;
dport = encap->encap_dport;
- nsk = rcu_dereference_protected(x->encap_sk,
- lockdep_is_held(&x->lock));
- if (sk && sk == nsk) {
- esk = kmalloc(sizeof(*esk), GFP_ATOMIC);
- if (!esk) {
- spin_unlock_bh(&x->lock);
- return ERR_PTR(-ENOMEM);
- }
- RCU_INIT_POINTER(x->encap_sk, NULL);
- esk->sk = sk;
- call_rcu(&esk->rcu, esp_free_tcp_sk);
- }
spin_unlock_bh(&x->lock);
sk = inet_lookup_established(net, net->ipv4.tcp_death_row.hashinfo, x->id.daddr.a4,
@@ -171,20 +140,6 @@ static struct sock *esp_find_tcp_sk(struct xfrm_state *x)
return ERR_PTR(-EINVAL);
}
- spin_lock_bh(&x->lock);
- nsk = rcu_dereference_protected(x->encap_sk,
- lockdep_is_held(&x->lock));
- if (encap->encap_sport != sport ||
- encap->encap_dport != dport) {
- sock_put(sk);
- sk = nsk ?: ERR_PTR(-EREMCHG);
- } else if (sk == nsk) {
- sock_put(sk);
- } else {
- rcu_assign_pointer(x->encap_sk, sk);
- }
- spin_unlock_bh(&x->lock);
-
return sk;
}
@@ -207,6 +162,8 @@ static int esp_output_tcp_finish(struct xfrm_state *x, struct sk_buff *skb)
err = espintcp_push_skb(sk, skb);
bh_unlock_sock(sk);
+ sock_put(sk);
+
out:
rcu_read_unlock();
return err;
@@ -391,6 +348,8 @@ static struct ip_esp_hdr *esp_output_tcp_encap(struct xfrm_state *x,
if (IS_ERR(sk))
return ERR_CAST(sk);
+ sock_put(sk);
+
*lenp = htons(len);
esph = (struct ip_esp_hdr *)(lenp + 1);
diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c
index 90ce87ffed46..7993ff46de23 100644
--- a/net/ipv4/fib_frontend.c
+++ b/net/ipv4/fib_frontend.c
@@ -829,19 +829,33 @@ static int rtm_to_fib_config(struct net *net, struct sk_buff *skb,
}
}
+ if (cfg->fc_dst_len > 32) {
+ NL_SET_ERR_MSG(extack, "Invalid prefix length");
+ err = -EINVAL;
+ goto errout;
+ }
+
+ if (cfg->fc_dst_len < 32 && (ntohl(cfg->fc_dst) << cfg->fc_dst_len)) {
+ NL_SET_ERR_MSG(extack, "Invalid prefix for given prefix length");
+ err = -EINVAL;
+ goto errout;
+ }
+
if (cfg->fc_nh_id) {
if (cfg->fc_oif || cfg->fc_gw_family ||
cfg->fc_encap || cfg->fc_mp) {
NL_SET_ERR_MSG(extack,
"Nexthop specification and nexthop id are mutually exclusive");
- return -EINVAL;
+ err = -EINVAL;
+ goto errout;
}
}
if (has_gw && has_via) {
NL_SET_ERR_MSG(extack,
"Nexthop configuration can not contain both GATEWAY and VIA");
- return -EINVAL;
+ err = -EINVAL;
+ goto errout;
}
if (!cfg->fc_table)
diff --git a/net/ipv4/fib_rules.c b/net/ipv4/fib_rules.c
index 513f475c6a53..298a9944a3d1 100644
--- a/net/ipv4/fib_rules.c
+++ b/net/ipv4/fib_rules.c
@@ -222,9 +222,9 @@ static int fib4_rule_configure(struct fib_rule *rule, struct sk_buff *skb,
struct nlattr **tb,
struct netlink_ext_ack *extack)
{
- struct net *net = sock_net(skb->sk);
+ struct fib4_rule *rule4 = (struct fib4_rule *)rule;
+ struct net *net = rule->fr_net;
int err = -EINVAL;
- struct fib4_rule *rule4 = (struct fib4_rule *) rule;
if (!inet_validate_dscp(frh->tos)) {
NL_SET_ERR_MSG(extack,
diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c
index 77b97c48da5e..fa54b36b241a 100644
--- a/net/ipv4/fib_trie.c
+++ b/net/ipv4/fib_trie.c
@@ -1192,22 +1192,6 @@ static int fib_insert_alias(struct trie *t, struct key_vector *tp,
return 0;
}
-static bool fib_valid_key_len(u32 key, u8 plen, struct netlink_ext_ack *extack)
-{
- if (plen > KEYLENGTH) {
- NL_SET_ERR_MSG(extack, "Invalid prefix length");
- return false;
- }
-
- if ((plen < KEYLENGTH) && (key << plen)) {
- NL_SET_ERR_MSG(extack,
- "Invalid prefix for given prefix length");
- return false;
- }
-
- return true;
-}
-
static void fib_remove_alias(struct trie *t, struct key_vector *tp,
struct key_vector *l, struct fib_alias *old);
@@ -1228,9 +1212,6 @@ int fib_table_insert(struct net *net, struct fib_table *tb,
key = ntohl(cfg->fc_dst);
- if (!fib_valid_key_len(key, plen, extack))
- return -EINVAL;
-
pr_debug("Insert table=%u %08x/%d\n", tb->tb_id, key, plen);
fi = fib_create_info(cfg, extack);
@@ -1723,9 +1704,6 @@ int fib_table_delete(struct net *net, struct fib_table *tb,
key = ntohl(cfg->fc_dst);
- if (!fib_valid_key_len(key, plen, extack))
- return -EINVAL;
-
l = fib_find_node(t, &tp, key);
if (!l)
return -ESRCH;
diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c
index 7967ff7e02f7..60e81f6b1c6d 100644
--- a/net/ipv4/inet_hashtables.c
+++ b/net/ipv4/inet_hashtables.c
@@ -1231,22 +1231,37 @@ int inet_ehash_locks_alloc(struct inet_hashinfo *hashinfo)
{
unsigned int locksz = sizeof(spinlock_t);
unsigned int i, nblocks = 1;
+ spinlock_t *ptr = NULL;
- if (locksz != 0) {
- /* allocate 2 cache lines or at least one spinlock per cpu */
- nblocks = max(2U * L1_CACHE_BYTES / locksz, 1U);
- nblocks = roundup_pow_of_two(nblocks * num_possible_cpus());
+ if (locksz == 0)
+ goto set_mask;
- /* no more locks than number of hash buckets */
- nblocks = min(nblocks, hashinfo->ehash_mask + 1);
+ /* Allocate 2 cache lines or at least one spinlock per cpu. */
+ nblocks = max(2U * L1_CACHE_BYTES / locksz, 1U) * num_possible_cpus();
- hashinfo->ehash_locks = kvmalloc_array(nblocks, locksz, GFP_KERNEL);
- if (!hashinfo->ehash_locks)
- return -ENOMEM;
+ /* At least one page per NUMA node. */
+ nblocks = max(nblocks, num_online_nodes() * PAGE_SIZE / locksz);
+
+ nblocks = roundup_pow_of_two(nblocks);
+
+ /* No more locks than number of hash buckets. */
+ nblocks = min(nblocks, hashinfo->ehash_mask + 1);
- for (i = 0; i < nblocks; i++)
- spin_lock_init(&hashinfo->ehash_locks[i]);
+ if (num_online_nodes() > 1) {
+ /* Use vmalloc() to allow NUMA policy to spread pages
+ * on all available nodes if desired.
+ */
+ ptr = vmalloc_array(nblocks, locksz);
+ }
+ if (!ptr) {
+ ptr = kvmalloc_array(nblocks, locksz, GFP_KERNEL);
+ if (!ptr)
+ return -ENOMEM;
}
+ for (i = 0; i < nblocks; i++)
+ spin_lock_init(&ptr[i]);
+ hashinfo->ehash_locks = ptr;
+set_mask:
hashinfo->ehash_locks_mask = nblocks - 1;
return 0;
}
diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c
index 890c15510b42..f261e29adc7c 100644
--- a/net/ipv4/ip_gre.c
+++ b/net/ipv4/ip_gre.c
@@ -140,7 +140,6 @@ static int ipgre_err(struct sk_buff *skb, u32 info,
const struct iphdr *iph;
const int type = icmp_hdr(skb)->type;
const int code = icmp_hdr(skb)->code;
- unsigned int data_len = 0;
struct ip_tunnel *t;
if (tpi->proto == htons(ETH_P_TEB))
@@ -181,7 +180,6 @@ static int ipgre_err(struct sk_buff *skb, u32 info,
case ICMP_TIME_EXCEEDED:
if (code != ICMP_EXC_TTL)
return 0;
- data_len = icmp_hdr(skb)->un.reserved[1] * 4; /* RFC 4884 4.1 */
break;
case ICMP_REDIRECT:
@@ -189,10 +187,16 @@ static int ipgre_err(struct sk_buff *skb, u32 info,
}
#if IS_ENABLED(CONFIG_IPV6)
- if (tpi->proto == htons(ETH_P_IPV6) &&
- !ip6_err_gen_icmpv6_unreach(skb, iph->ihl * 4 + tpi->hdr_len,
- type, data_len))
- return 0;
+ if (tpi->proto == htons(ETH_P_IPV6)) {
+ unsigned int data_len = 0;
+
+ if (type == ICMP_TIME_EXCEEDED)
+ data_len = icmp_hdr(skb)->un.reserved[1] * 4; /* RFC 4884 4.1 */
+
+ if (!ip6_err_gen_icmpv6_unreach(skb, iph->ihl * 4 + tpi->hdr_len,
+ type, data_len))
+ return 0;
+ }
#endif
if (t->parms.iph.daddr == 0 ||
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 10d38ec0ff5a..a172248b6678 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -425,6 +425,20 @@ static bool tcp_ecn_rcv_ecn_echo(const struct tcp_sock *tp, const struct tcphdr
return false;
}
+static void tcp_count_delivered_ce(struct tcp_sock *tp, u32 ecn_count)
+{
+ tp->delivered_ce += ecn_count;
+}
+
+/* Updates the delivered and delivered_ce counts */
+static void tcp_count_delivered(struct tcp_sock *tp, u32 delivered,
+ bool ece_ack)
+{
+ tp->delivered += delivered;
+ if (ece_ack)
+ tcp_count_delivered_ce(tp, delivered);
+}
+
/* Buffer size and advertised window tuning.
*
* 1. Tuning sk->sk_sndbuf, when connection enters established state.
@@ -1137,15 +1151,6 @@ void tcp_mark_skb_lost(struct sock *sk, struct sk_buff *skb)
}
}
-/* Updates the delivered and delivered_ce counts */
-static void tcp_count_delivered(struct tcp_sock *tp, u32 delivered,
- bool ece_ack)
-{
- tp->delivered += delivered;
- if (ece_ack)
- tp->delivered_ce += delivered;
-}
-
/* This procedure tags the retransmission queue when SACKs arrive.
*
* We have three tag bits: SACKED(S), RETRANS(R) and LOST(L).
@@ -3816,12 +3821,23 @@ static void tcp_process_tlp_ack(struct sock *sk, u32 ack, int flag)
}
}
-static inline void tcp_in_ack_event(struct sock *sk, u32 flags)
+static void tcp_in_ack_event(struct sock *sk, int flag)
{
const struct inet_connection_sock *icsk = inet_csk(sk);
- if (icsk->icsk_ca_ops->in_ack_event)
- icsk->icsk_ca_ops->in_ack_event(sk, flags);
+ if (icsk->icsk_ca_ops->in_ack_event) {
+ u32 ack_ev_flags = 0;
+
+ if (flag & FLAG_WIN_UPDATE)
+ ack_ev_flags |= CA_ACK_WIN_UPDATE;
+ if (flag & FLAG_SLOWPATH) {
+ ack_ev_flags |= CA_ACK_SLOWPATH;
+ if (flag & FLAG_ECE)
+ ack_ev_flags |= CA_ACK_ECE;
+ }
+
+ icsk->icsk_ca_ops->in_ack_event(sk, ack_ev_flags);
+ }
}
/* Congestion control has updated the cwnd already. So if we're in
@@ -3938,12 +3954,8 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag)
tcp_snd_una_update(tp, ack);
flag |= FLAG_WIN_UPDATE;
- tcp_in_ack_event(sk, CA_ACK_WIN_UPDATE);
-
NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPHPACKS);
} else {
- u32 ack_ev_flags = CA_ACK_SLOWPATH;
-
if (ack_seq != TCP_SKB_CB(skb)->end_seq)
flag |= FLAG_DATA;
else
@@ -3955,19 +3967,12 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag)
flag |= tcp_sacktag_write_queue(sk, skb, prior_snd_una,
&sack_state);
- if (tcp_ecn_rcv_ecn_echo(tp, tcp_hdr(skb))) {
+ if (tcp_ecn_rcv_ecn_echo(tp, tcp_hdr(skb)))
flag |= FLAG_ECE;
- ack_ev_flags |= CA_ACK_ECE;
- }
if (sack_state.sack_delivered)
tcp_count_delivered(tp, sack_state.sack_delivered,
flag & FLAG_ECE);
-
- if (flag & FLAG_WIN_UPDATE)
- ack_ev_flags |= CA_ACK_WIN_UPDATE;
-
- tcp_in_ack_event(sk, ack_ev_flags);
}
/* This is a deviation from RFC3168 since it states that:
@@ -3994,6 +3999,8 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag)
tcp_rack_update_reo_wnd(sk, &rs);
+ tcp_in_ack_event(sk, flag);
+
if (tp->tlp_high_seq)
tcp_process_tlp_ack(sk, ack, flag);
@@ -4025,6 +4032,7 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag)
return 1;
no_queue:
+ tcp_in_ack_event(sk, flag);
/* If data was DSACKed, see if we can undo a cwnd reduction. */
if (flag & FLAG_DSACKING_ACK) {
tcp_fastretrans_alert(sk, prior_snd_una, num_dupack, &flag,
diff --git a/net/ipv4/udp_offload.c b/net/ipv4/udp_offload.c
index 132cfc3b2c84..3870b59f5400 100644
--- a/net/ipv4/udp_offload.c
+++ b/net/ipv4/udp_offload.c
@@ -332,6 +332,7 @@ struct sk_buff *__udp_gso_segment(struct sk_buff *gso_skb,
bool copy_dtor;
__sum16 check;
__be16 newlen;
+ int ret = 0;
mss = skb_shinfo(gso_skb)->gso_size;
if (gso_skb->len <= sizeof(*uh) + mss)
@@ -354,6 +355,10 @@ struct sk_buff *__udp_gso_segment(struct sk_buff *gso_skb,
if (skb_pagelen(gso_skb) - sizeof(*uh) == skb_shinfo(gso_skb)->gso_size)
return __udp_gso_segment_list(gso_skb, features, is_ipv6);
+ ret = __skb_linearize(gso_skb);
+ if (ret)
+ return ERR_PTR(ret);
+
/* Setup csum, as fraglist skips this in udp4_gro_receive. */
gso_skb->csum_start = skb_transport_header(gso_skb) - gso_skb->head;
gso_skb->csum_offset = offsetof(struct udphdr, check);
diff --git a/net/ipv6/esp6.c b/net/ipv6/esp6.c
index 62bb9651133c..7e4c8628cf98 100644
--- a/net/ipv6/esp6.c
+++ b/net/ipv6/esp6.c
@@ -135,47 +135,16 @@ static void esp_ssg_unref(struct xfrm_state *x, void *tmp, struct sk_buff *skb)
}
#ifdef CONFIG_INET6_ESPINTCP
-struct esp_tcp_sk {
- struct sock *sk;
- struct rcu_head rcu;
-};
-
-static void esp_free_tcp_sk(struct rcu_head *head)
-{
- struct esp_tcp_sk *esk = container_of(head, struct esp_tcp_sk, rcu);
-
- sock_put(esk->sk);
- kfree(esk);
-}
-
static struct sock *esp6_find_tcp_sk(struct xfrm_state *x)
{
struct xfrm_encap_tmpl *encap = x->encap;
struct net *net = xs_net(x);
- struct esp_tcp_sk *esk;
__be16 sport, dport;
- struct sock *nsk;
struct sock *sk;
- sk = rcu_dereference(x->encap_sk);
- if (sk && sk->sk_state == TCP_ESTABLISHED)
- return sk;
-
spin_lock_bh(&x->lock);
sport = encap->encap_sport;
dport = encap->encap_dport;
- nsk = rcu_dereference_protected(x->encap_sk,
- lockdep_is_held(&x->lock));
- if (sk && sk == nsk) {
- esk = kmalloc(sizeof(*esk), GFP_ATOMIC);
- if (!esk) {
- spin_unlock_bh(&x->lock);
- return ERR_PTR(-ENOMEM);
- }
- RCU_INIT_POINTER(x->encap_sk, NULL);
- esk->sk = sk;
- call_rcu(&esk->rcu, esp_free_tcp_sk);
- }
spin_unlock_bh(&x->lock);
sk = __inet6_lookup_established(net, net->ipv4.tcp_death_row.hashinfo, &x->id.daddr.in6,
@@ -188,20 +157,6 @@ static struct sock *esp6_find_tcp_sk(struct xfrm_state *x)
return ERR_PTR(-EINVAL);
}
- spin_lock_bh(&x->lock);
- nsk = rcu_dereference_protected(x->encap_sk,
- lockdep_is_held(&x->lock));
- if (encap->encap_sport != sport ||
- encap->encap_dport != dport) {
- sock_put(sk);
- sk = nsk ?: ERR_PTR(-EREMCHG);
- } else if (sk == nsk) {
- sock_put(sk);
- } else {
- rcu_assign_pointer(x->encap_sk, sk);
- }
- spin_unlock_bh(&x->lock);
-
return sk;
}
@@ -224,6 +179,8 @@ static int esp_output_tcp_finish(struct xfrm_state *x, struct sk_buff *skb)
err = espintcp_push_skb(sk, skb);
bh_unlock_sock(sk);
+ sock_put(sk);
+
out:
rcu_read_unlock();
return err;
@@ -427,6 +384,8 @@ static struct ip_esp_hdr *esp6_output_tcp_encap(struct xfrm_state *x,
if (IS_ERR(sk))
return ERR_CAST(sk);
+ sock_put(sk);
+
*lenp = htons(len);
esph = (struct ip_esp_hdr *)(lenp + 1);
diff --git a/net/ipv6/fib6_rules.c b/net/ipv6/fib6_rules.c
index 6eeab21512ba..e0f0c5f8cccd 100644
--- a/net/ipv6/fib6_rules.c
+++ b/net/ipv6/fib6_rules.c
@@ -350,9 +350,9 @@ static int fib6_rule_configure(struct fib_rule *rule, struct sk_buff *skb,
struct nlattr **tb,
struct netlink_ext_ack *extack)
{
+ struct fib6_rule *rule6 = (struct fib6_rule *)rule;
+ struct net *net = rule->fr_net;
int err = -EINVAL;
- struct net *net = sock_net(skb->sk);
- struct fib6_rule *rule6 = (struct fib6_rule *) rule;
if (!inet_validate_dscp(frh->tos)) {
NL_SET_ERR_MSG(extack,
diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index c86d5dca29df..28777b142240 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -1452,6 +1452,7 @@ static int ip6_setup_cork(struct sock *sk, struct inet_cork_full *cork,
}
v6_cork->hop_limit = ipc6->hlimit;
v6_cork->tclass = ipc6->tclass;
+ v6_cork->dontfrag = ipc6->dontfrag;
if (rt->dst.flags & DST_XFRM_TUNNEL)
mtu = np->pmtudisc >= IPV6_PMTUDISC_PROBE ?
READ_ONCE(rt->dst.dev->mtu) : dst_mtu(&rt->dst);
@@ -1485,7 +1486,7 @@ static int __ip6_append_data(struct sock *sk,
int getfrag(void *from, char *to, int offset,
int len, int odd, struct sk_buff *skb),
void *from, size_t length, int transhdrlen,
- unsigned int flags, struct ipcm6_cookie *ipc6)
+ unsigned int flags)
{
struct sk_buff *skb, *skb_prev = NULL;
struct inet_cork *cork = &cork_full->base;
@@ -1541,7 +1542,7 @@ static int __ip6_append_data(struct sock *sk,
if (headersize + transhdrlen > mtu)
goto emsgsize;
- if (cork->length + length > mtu - headersize && ipc6->dontfrag &&
+ if (cork->length + length > mtu - headersize && v6_cork->dontfrag &&
(sk->sk_protocol == IPPROTO_UDP ||
sk->sk_protocol == IPPROTO_ICMPV6 ||
sk->sk_protocol == IPPROTO_RAW)) {
@@ -1913,7 +1914,7 @@ int ip6_append_data(struct sock *sk,
return __ip6_append_data(sk, &sk->sk_write_queue, &inet->cork,
&np->cork, sk_page_frag(sk), getfrag,
- from, length, transhdrlen, flags, ipc6);
+ from, length, transhdrlen, flags);
}
EXPORT_SYMBOL_GPL(ip6_append_data);
@@ -2118,7 +2119,7 @@ struct sk_buff *ip6_make_skb(struct sock *sk,
err = __ip6_append_data(sk, &queue, cork, &v6_cork,
&current->task_frag, getfrag, from,
length + exthdrlen, transhdrlen + exthdrlen,
- flags, ipc6);
+ flags);
if (err) {
__ip6_flush_pending_frames(sk, &queue, cork, &v6_cork);
return ERR_PTR(err);
diff --git a/net/ipv6/netfilter.c b/net/ipv6/netfilter.c
index 7c4af48d529e..606aae4e78a9 100644
--- a/net/ipv6/netfilter.c
+++ b/net/ipv6/netfilter.c
@@ -163,20 +163,20 @@ int br_ip6_fragment(struct net *net, struct sock *sk, struct sk_buff *skb,
struct ip6_fraglist_iter iter;
struct sk_buff *frag2;
- if (first_len - hlen > mtu ||
- skb_headroom(skb) < (hroom + sizeof(struct frag_hdr)))
+ if (first_len - hlen > mtu)
goto blackhole;
- if (skb_cloned(skb))
+ if (skb_cloned(skb) ||
+ skb_headroom(skb) < (hroom + sizeof(struct frag_hdr)))
goto slow_path;
skb_walk_frags(skb, frag2) {
- if (frag2->len > mtu ||
- skb_headroom(frag2) < (hlen + hroom + sizeof(struct frag_hdr)))
+ if (frag2->len > mtu)
goto blackhole;
/* Partially cloned skb? */
- if (skb_shared(frag2))
+ if (skb_shared(frag2) ||
+ skb_headroom(frag2) < (hlen + hroom + sizeof(struct frag_hdr)))
goto slow_path;
}
diff --git a/net/ipv6/netfilter/nft_fib_ipv6.c b/net/ipv6/netfilter/nft_fib_ipv6.c
index c9f1634b3838..a89ce0fbfe4b 100644
--- a/net/ipv6/netfilter/nft_fib_ipv6.c
+++ b/net/ipv6/netfilter/nft_fib_ipv6.c
@@ -158,6 +158,7 @@ void nft_fib6_eval(const struct nft_expr *expr, struct nft_regs *regs,
{
const struct nft_fib *priv = nft_expr_priv(expr);
int noff = skb_network_offset(pkt->skb);
+ const struct net_device *found = NULL;
const struct net_device *oif = NULL;
u32 *dest = &regs->data[priv->dreg];
struct ipv6hdr *iph, _iph;
@@ -202,11 +203,15 @@ void nft_fib6_eval(const struct nft_expr *expr, struct nft_regs *regs,
if (rt->rt6i_flags & (RTF_REJECT | RTF_ANYCAST | RTF_LOCAL))
goto put_rt_err;
- if (oif && oif != rt->rt6i_idev->dev &&
- l3mdev_master_ifindex_rcu(rt->rt6i_idev->dev) != oif->ifindex)
- goto put_rt_err;
+ if (!oif) {
+ found = rt->rt6i_idev->dev;
+ } else {
+ if (oif == rt->rt6i_idev->dev ||
+ l3mdev_master_ifindex_rcu(rt->rt6i_idev->dev) == oif->ifindex)
+ found = oif;
+ }
- nft_fib_store_result(dest, priv, rt->rt6i_idev->dev);
+ nft_fib_store_result(dest, priv, found);
put_rt_err:
ip6_rt_put(rt);
}
diff --git a/net/ipv6/seg6_local.c b/net/ipv6/seg6_local.c
index c434940131b1..7f295b9c1374 100644
--- a/net/ipv6/seg6_local.c
+++ b/net/ipv6/seg6_local.c
@@ -1638,10 +1638,8 @@ static const struct nla_policy seg6_local_policy[SEG6_LOCAL_MAX + 1] = {
[SEG6_LOCAL_SRH] = { .type = NLA_BINARY },
[SEG6_LOCAL_TABLE] = { .type = NLA_U32 },
[SEG6_LOCAL_VRFTABLE] = { .type = NLA_U32 },
- [SEG6_LOCAL_NH4] = { .type = NLA_BINARY,
- .len = sizeof(struct in_addr) },
- [SEG6_LOCAL_NH6] = { .type = NLA_BINARY,
- .len = sizeof(struct in6_addr) },
+ [SEG6_LOCAL_NH4] = NLA_POLICY_EXACT_LEN(sizeof(struct in_addr)),
+ [SEG6_LOCAL_NH6] = NLA_POLICY_EXACT_LEN(sizeof(struct in6_addr)),
[SEG6_LOCAL_IIF] = { .type = NLA_U32 },
[SEG6_LOCAL_OIF] = { .type = NLA_U32 },
[SEG6_LOCAL_BPF] = { .type = NLA_NESTED },
diff --git a/net/llc/af_llc.c b/net/llc/af_llc.c
index cc25fec44f85..19f3de3c24ef 100644
--- a/net/llc/af_llc.c
+++ b/net/llc/af_llc.c
@@ -888,15 +888,15 @@ static int llc_ui_recvmsg(struct socket *sock, struct msghdr *msg, size_t len,
if (sk->sk_type != SOCK_STREAM)
goto copy_uaddr;
+ /* Partial read */
+ if (used + offset < skb_len)
+ continue;
+
if (!(flags & MSG_PEEK)) {
skb_unlink(skb, &sk->sk_receive_queue);
kfree_skb(skb);
*seq = 0;
}
-
- /* Partial read */
- if (used + offset < skb_len)
- continue;
} while (len > 0);
out:
diff --git a/net/mac80211/main.c b/net/mac80211/main.c
index d1046f495e63..3a6fff98748b 100644
--- a/net/mac80211/main.c
+++ b/net/mac80211/main.c
@@ -1186,10 +1186,12 @@ int ieee80211_register_hw(struct ieee80211_hw *hw)
return -EINVAL;
}
- local->int_scan_req = kzalloc(sizeof(*local->int_scan_req) +
- sizeof(void *) * channels, GFP_KERNEL);
+ local->int_scan_req = kzalloc(struct_size(local->int_scan_req,
+ channels, channels),
+ GFP_KERNEL);
if (!local->int_scan_req)
return -ENOMEM;
+ local->int_scan_req->n_channels = channels;
eth_broadcast_addr(local->int_scan_req->bssid);
diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c
index 42e2c84ed248..2c7e139efd53 100644
--- a/net/mac80211/mlme.c
+++ b/net/mac80211/mlme.c
@@ -2959,7 +2959,8 @@ static void ieee80211_set_disassoc(struct ieee80211_sub_if_data *sdata,
if (tx)
ieee80211_flush_queues(local, sdata, false);
- drv_mgd_complete_tx(sdata->local, sdata, &info);
+ if (tx || frame_buf)
+ drv_mgd_complete_tx(sdata->local, sdata, &info);
/* clear AP addr only after building the needed mgmt frames */
eth_zero_addr(sdata->deflink.u.mgd.bssid);
@@ -7821,7 +7822,6 @@ int ieee80211_mgd_deauth(struct ieee80211_sub_if_data *sdata,
ieee80211_report_disconnect(sdata, frame_buf,
sizeof(frame_buf), true,
req->reason_code, false);
- drv_mgd_complete_tx(sdata->local, sdata, &info);
return 0;
}
diff --git a/net/mctp/device.c b/net/mctp/device.c
index 85cc5f31f1e7..8d1386601bbe 100644
--- a/net/mctp/device.c
+++ b/net/mctp/device.c
@@ -20,8 +20,7 @@
#include <net/sock.h>
struct mctp_dump_cb {
- int h;
- int idx;
+ unsigned long ifindex;
size_t a_idx;
};
@@ -115,43 +114,36 @@ static int mctp_dump_addrinfo(struct sk_buff *skb, struct netlink_callback *cb)
{
struct mctp_dump_cb *mcb = (void *)cb->ctx;
struct net *net = sock_net(skb->sk);
- struct hlist_head *head;
struct net_device *dev;
struct ifaddrmsg *hdr;
struct mctp_dev *mdev;
- int ifindex;
- int idx = 0, rc;
-
- hdr = nlmsg_data(cb->nlh);
- // filter by ifindex if requested
- ifindex = hdr->ifa_index;
+ int ifindex = 0, rc;
+
+ /* Filter by ifindex if a header is provided */
+ if (cb->nlh->nlmsg_len >= nlmsg_msg_size(sizeof(*hdr))) {
+ hdr = nlmsg_data(cb->nlh);
+ ifindex = hdr->ifa_index;
+ } else {
+ if (cb->strict_check) {
+ NL_SET_ERR_MSG(cb->extack, "mctp: Invalid header for addr dump request");
+ return -EINVAL;
+ }
+ }
rcu_read_lock();
- for (; mcb->h < NETDEV_HASHENTRIES; mcb->h++, mcb->idx = 0) {
- idx = 0;
- head = &net->dev_index_head[mcb->h];
- hlist_for_each_entry_rcu(dev, head, index_hlist) {
- if (idx >= mcb->idx &&
- (ifindex == 0 || ifindex == dev->ifindex)) {
- mdev = __mctp_dev_get(dev);
- if (mdev) {
- rc = mctp_dump_dev_addrinfo(mdev,
- skb, cb);
- mctp_dev_put(mdev);
- // Error indicates full buffer, this
- // callback will get retried.
- if (rc < 0)
- goto out;
- }
- }
- idx++;
- // reset for next iteration
- mcb->a_idx = 0;
- }
+ for_each_netdev_dump(net, dev, mcb->ifindex) {
+ if (ifindex && ifindex != dev->ifindex)
+ continue;
+ mdev = __mctp_dev_get(dev);
+ if (!mdev)
+ continue;
+ rc = mctp_dump_dev_addrinfo(mdev, skb, cb);
+ mctp_dev_put(mdev);
+ if (rc < 0)
+ break;
+ mcb->a_idx = 0;
}
-out:
rcu_read_unlock();
- mcb->idx = idx;
return skb->len;
}
@@ -525,9 +517,12 @@ static struct notifier_block mctp_dev_nb = {
};
static const struct rtnl_msg_handler mctp_device_rtnl_msg_handlers[] = {
- {THIS_MODULE, PF_MCTP, RTM_NEWADDR, mctp_rtm_newaddr, NULL, 0},
- {THIS_MODULE, PF_MCTP, RTM_DELADDR, mctp_rtm_deladdr, NULL, 0},
- {THIS_MODULE, PF_MCTP, RTM_GETADDR, NULL, mctp_dump_addrinfo, 0},
+ {.owner = THIS_MODULE, .protocol = PF_MCTP, .msgtype = RTM_NEWADDR,
+ .doit = mctp_rtm_newaddr},
+ {.owner = THIS_MODULE, .protocol = PF_MCTP, .msgtype = RTM_DELADDR,
+ .doit = mctp_rtm_deladdr},
+ {.owner = THIS_MODULE, .protocol = PF_MCTP, .msgtype = RTM_GETADDR,
+ .dumpit = mctp_dump_addrinfo},
};
int __init mctp_device_init(void)
diff --git a/net/mctp/route.c b/net/mctp/route.c
index d3c1f54386ef..009ba5edbd52 100644
--- a/net/mctp/route.c
+++ b/net/mctp/route.c
@@ -274,8 +274,10 @@ static void mctp_flow_prepare_output(struct sk_buff *skb, struct mctp_dev *dev)
key = flow->key;
- if (WARN_ON(key->dev && key->dev != dev))
+ if (key->dev) {
+ WARN_ON(key->dev != dev);
return;
+ }
mctp_dev_set_key(dev, key);
}
diff --git a/net/ncsi/internal.h b/net/ncsi/internal.h
index 4e0842df5234..2c260f33b55c 100644
--- a/net/ncsi/internal.h
+++ b/net/ncsi/internal.h
@@ -143,16 +143,15 @@ struct ncsi_channel_vlan_filter {
};
struct ncsi_channel_stats {
- u32 hnc_cnt_hi; /* Counter cleared */
- u32 hnc_cnt_lo; /* Counter cleared */
- u32 hnc_rx_bytes; /* Rx bytes */
- u32 hnc_tx_bytes; /* Tx bytes */
- u32 hnc_rx_uc_pkts; /* Rx UC packets */
- u32 hnc_rx_mc_pkts; /* Rx MC packets */
- u32 hnc_rx_bc_pkts; /* Rx BC packets */
- u32 hnc_tx_uc_pkts; /* Tx UC packets */
- u32 hnc_tx_mc_pkts; /* Tx MC packets */
- u32 hnc_tx_bc_pkts; /* Tx BC packets */
+ u64 hnc_cnt; /* Counter cleared */
+ u64 hnc_rx_bytes; /* Rx bytes */
+ u64 hnc_tx_bytes; /* Tx bytes */
+ u64 hnc_rx_uc_pkts; /* Rx UC packets */
+ u64 hnc_rx_mc_pkts; /* Rx MC packets */
+ u64 hnc_rx_bc_pkts; /* Rx BC packets */
+ u64 hnc_tx_uc_pkts; /* Tx UC packets */
+ u64 hnc_tx_mc_pkts; /* Tx MC packets */
+ u64 hnc_tx_bc_pkts; /* Tx BC packets */
u32 hnc_fcs_err; /* FCS errors */
u32 hnc_align_err; /* Alignment errors */
u32 hnc_false_carrier; /* False carrier detection */
@@ -181,7 +180,7 @@ struct ncsi_channel_stats {
u32 hnc_tx_1023_frames; /* Tx 512-1023 bytes frames */
u32 hnc_tx_1522_frames; /* Tx 1024-1522 bytes frames */
u32 hnc_tx_9022_frames; /* Tx 1523-9022 bytes frames */
- u32 hnc_rx_valid_bytes; /* Rx valid bytes */
+ u64 hnc_rx_valid_bytes; /* Rx valid bytes */
u32 hnc_rx_runt_pkts; /* Rx error runt packets */
u32 hnc_rx_jabber_pkts; /* Rx error jabber packets */
u32 ncsi_rx_cmds; /* Rx NCSI commands */
diff --git a/net/ncsi/ncsi-pkt.h b/net/ncsi/ncsi-pkt.h
index f2f3b5c1b941..24edb2737972 100644
--- a/net/ncsi/ncsi-pkt.h
+++ b/net/ncsi/ncsi-pkt.h
@@ -252,16 +252,15 @@ struct ncsi_rsp_gp_pkt {
/* Get Controller Packet Statistics */
struct ncsi_rsp_gcps_pkt {
struct ncsi_rsp_pkt_hdr rsp; /* Response header */
- __be32 cnt_hi; /* Counter cleared */
- __be32 cnt_lo; /* Counter cleared */
- __be32 rx_bytes; /* Rx bytes */
- __be32 tx_bytes; /* Tx bytes */
- __be32 rx_uc_pkts; /* Rx UC packets */
- __be32 rx_mc_pkts; /* Rx MC packets */
- __be32 rx_bc_pkts; /* Rx BC packets */
- __be32 tx_uc_pkts; /* Tx UC packets */
- __be32 tx_mc_pkts; /* Tx MC packets */
- __be32 tx_bc_pkts; /* Tx BC packets */
+ __be64 cnt; /* Counter cleared */
+ __be64 rx_bytes; /* Rx bytes */
+ __be64 tx_bytes; /* Tx bytes */
+ __be64 rx_uc_pkts; /* Rx UC packets */
+ __be64 rx_mc_pkts; /* Rx MC packets */
+ __be64 rx_bc_pkts; /* Rx BC packets */
+ __be64 tx_uc_pkts; /* Tx UC packets */
+ __be64 tx_mc_pkts; /* Tx MC packets */
+ __be64 tx_bc_pkts; /* Tx BC packets */
__be32 fcs_err; /* FCS errors */
__be32 align_err; /* Alignment errors */
__be32 false_carrier; /* False carrier detection */
@@ -290,11 +289,11 @@ struct ncsi_rsp_gcps_pkt {
__be32 tx_1023_frames; /* Tx 512-1023 bytes frames */
__be32 tx_1522_frames; /* Tx 1024-1522 bytes frames */
__be32 tx_9022_frames; /* Tx 1523-9022 bytes frames */
- __be32 rx_valid_bytes; /* Rx valid bytes */
+ __be64 rx_valid_bytes; /* Rx valid bytes */
__be32 rx_runt_pkts; /* Rx error runt packets */
__be32 rx_jabber_pkts; /* Rx error jabber packets */
__be32 checksum; /* Checksum */
-};
+} __packed __aligned(4);
/* Get NCSI Statistics */
struct ncsi_rsp_gns_pkt {
diff --git a/net/ncsi/ncsi-rsp.c b/net/ncsi/ncsi-rsp.c
index 4a8ce2949fae..8668888c5a2f 100644
--- a/net/ncsi/ncsi-rsp.c
+++ b/net/ncsi/ncsi-rsp.c
@@ -926,16 +926,15 @@ static int ncsi_rsp_handler_gcps(struct ncsi_request *nr)
/* Update HNC's statistics */
ncs = &nc->stats;
- ncs->hnc_cnt_hi = ntohl(rsp->cnt_hi);
- ncs->hnc_cnt_lo = ntohl(rsp->cnt_lo);
- ncs->hnc_rx_bytes = ntohl(rsp->rx_bytes);
- ncs->hnc_tx_bytes = ntohl(rsp->tx_bytes);
- ncs->hnc_rx_uc_pkts = ntohl(rsp->rx_uc_pkts);
- ncs->hnc_rx_mc_pkts = ntohl(rsp->rx_mc_pkts);
- ncs->hnc_rx_bc_pkts = ntohl(rsp->rx_bc_pkts);
- ncs->hnc_tx_uc_pkts = ntohl(rsp->tx_uc_pkts);
- ncs->hnc_tx_mc_pkts = ntohl(rsp->tx_mc_pkts);
- ncs->hnc_tx_bc_pkts = ntohl(rsp->tx_bc_pkts);
+ ncs->hnc_cnt = be64_to_cpu(rsp->cnt);
+ ncs->hnc_rx_bytes = be64_to_cpu(rsp->rx_bytes);
+ ncs->hnc_tx_bytes = be64_to_cpu(rsp->tx_bytes);
+ ncs->hnc_rx_uc_pkts = be64_to_cpu(rsp->rx_uc_pkts);
+ ncs->hnc_rx_mc_pkts = be64_to_cpu(rsp->rx_mc_pkts);
+ ncs->hnc_rx_bc_pkts = be64_to_cpu(rsp->rx_bc_pkts);
+ ncs->hnc_tx_uc_pkts = be64_to_cpu(rsp->tx_uc_pkts);
+ ncs->hnc_tx_mc_pkts = be64_to_cpu(rsp->tx_mc_pkts);
+ ncs->hnc_tx_bc_pkts = be64_to_cpu(rsp->tx_bc_pkts);
ncs->hnc_fcs_err = ntohl(rsp->fcs_err);
ncs->hnc_align_err = ntohl(rsp->align_err);
ncs->hnc_false_carrier = ntohl(rsp->false_carrier);
@@ -964,7 +963,7 @@ static int ncsi_rsp_handler_gcps(struct ncsi_request *nr)
ncs->hnc_tx_1023_frames = ntohl(rsp->tx_1023_frames);
ncs->hnc_tx_1522_frames = ntohl(rsp->tx_1522_frames);
ncs->hnc_tx_9022_frames = ntohl(rsp->tx_9022_frames);
- ncs->hnc_rx_valid_bytes = ntohl(rsp->rx_valid_bytes);
+ ncs->hnc_rx_valid_bytes = be64_to_cpu(rsp->rx_valid_bytes);
ncs->hnc_rx_runt_pkts = ntohl(rsp->rx_runt_pkts);
ncs->hnc_rx_jabber_pkts = ntohl(rsp->rx_jabber_pkts);
diff --git a/net/netfilter/nf_conntrack_standalone.c b/net/netfilter/nf_conntrack_standalone.c
index 559665467b04..1d5de1d9f008 100644
--- a/net/netfilter/nf_conntrack_standalone.c
+++ b/net/netfilter/nf_conntrack_standalone.c
@@ -621,7 +621,9 @@ static struct ctl_table nf_ct_sysctl_table[] = {
.data = &nf_conntrack_max,
.maxlen = sizeof(int),
.mode = 0644,
- .proc_handler = proc_dointvec,
+ .proc_handler = proc_dointvec_minmax,
+ .extra1 = SYSCTL_ZERO,
+ .extra2 = SYSCTL_INT_MAX,
},
[NF_SYSCTL_CT_COUNT] = {
.procname = "nf_conntrack_count",
@@ -657,7 +659,9 @@ static struct ctl_table nf_ct_sysctl_table[] = {
.data = &nf_ct_expect_max,
.maxlen = sizeof(int),
.mode = 0644,
- .proc_handler = proc_dointvec,
+ .proc_handler = proc_dointvec_minmax,
+ .extra1 = SYSCTL_ONE,
+ .extra2 = SYSCTL_INT_MAX,
},
[NF_SYSCTL_CT_ACCT] = {
.procname = "nf_conntrack_acct",
@@ -951,7 +955,9 @@ static struct ctl_table nf_ct_netfilter_table[] = {
.data = &nf_conntrack_max,
.maxlen = sizeof(int),
.mode = 0644,
- .proc_handler = proc_dointvec,
+ .proc_handler = proc_dointvec_minmax,
+ .extra1 = SYSCTL_ZERO,
+ .extra2 = SYSCTL_INT_MAX,
},
{ }
};
diff --git a/net/netfilter/nf_nat_core.c b/net/netfilter/nf_nat_core.c
index ccca6e3848bc..9df883d79acc 100644
--- a/net/netfilter/nf_nat_core.c
+++ b/net/netfilter/nf_nat_core.c
@@ -248,7 +248,7 @@ static noinline bool
nf_nat_used_tuple_new(const struct nf_conntrack_tuple *tuple,
const struct nf_conn *ignored_ct)
{
- static const unsigned long uses_nat = IPS_NAT_MASK | IPS_SEQ_ADJUST_BIT;
+ static const unsigned long uses_nat = IPS_NAT_MASK | IPS_SEQ_ADJUST;
const struct nf_conntrack_tuple_hash *thash;
const struct nf_conntrack_zone *zone;
struct nf_conn *ct;
@@ -287,8 +287,14 @@ nf_nat_used_tuple_new(const struct nf_conntrack_tuple *tuple,
zone = nf_ct_zone(ignored_ct);
thash = nf_conntrack_find_get(net, zone, tuple);
- if (unlikely(!thash)) /* clashing entry went away */
- return false;
+ if (unlikely(!thash)) {
+ struct nf_conntrack_tuple reply;
+
+ nf_ct_invert_tuple(&reply, tuple);
+ thash = nf_conntrack_find_get(net, zone, &reply);
+ if (!thash) /* clashing entry went away */
+ return false;
+ }
ct = nf_ct_tuplehash_to_ctrack(thash);
diff --git a/net/netfilter/nft_quota.c b/net/netfilter/nft_quota.c
index 9b2d7463d3d3..df0798da2329 100644
--- a/net/netfilter/nft_quota.c
+++ b/net/netfilter/nft_quota.c
@@ -19,10 +19,16 @@ struct nft_quota {
};
static inline bool nft_overquota(struct nft_quota *priv,
- const struct sk_buff *skb)
+ const struct sk_buff *skb,
+ bool *report)
{
- return atomic64_add_return(skb->len, priv->consumed) >=
- atomic64_read(&priv->quota);
+ u64 consumed = atomic64_add_return(skb->len, priv->consumed);
+ u64 quota = atomic64_read(&priv->quota);
+
+ if (report)
+ *report = consumed >= quota;
+
+ return consumed > quota;
}
static inline bool nft_quota_invert(struct nft_quota *priv)
@@ -34,7 +40,7 @@ static inline void nft_quota_do_eval(struct nft_quota *priv,
struct nft_regs *regs,
const struct nft_pktinfo *pkt)
{
- if (nft_overquota(priv, pkt->skb) ^ nft_quota_invert(priv))
+ if (nft_overquota(priv, pkt->skb, NULL) ^ nft_quota_invert(priv))
regs->verdict.code = NFT_BREAK;
}
@@ -51,13 +57,13 @@ static void nft_quota_obj_eval(struct nft_object *obj,
const struct nft_pktinfo *pkt)
{
struct nft_quota *priv = nft_obj_data(obj);
- bool overquota;
+ bool overquota, report;
- overquota = nft_overquota(priv, pkt->skb);
+ overquota = nft_overquota(priv, pkt->skb, &report);
if (overquota ^ nft_quota_invert(priv))
regs->verdict.code = NFT_BREAK;
- if (overquota &&
+ if (report &&
!test_and_set_bit(NFT_QUOTA_DEPLETED_BIT, &priv->flags))
nft_obj_notify(nft_net(pkt), obj->key.table, obj, 0, 0,
NFT_MSG_NEWOBJ, 0, nft_pf(pkt), 0, GFP_ATOMIC);
diff --git a/net/netfilter/nft_set_pipapo_avx2.c b/net/netfilter/nft_set_pipapo_avx2.c
index c15db28c5ebc..be7c16c79f71 100644
--- a/net/netfilter/nft_set_pipapo_avx2.c
+++ b/net/netfilter/nft_set_pipapo_avx2.c
@@ -1114,6 +1114,25 @@ bool nft_pipapo_avx2_estimate(const struct nft_set_desc *desc, u32 features,
}
/**
+ * pipapo_resmap_init_avx2() - Initialise result map before first use
+ * @m: Matching data, including mapping table
+ * @res_map: Result map
+ *
+ * Like pipapo_resmap_init() but do not set start map bits covered by the first field.
+ */
+static inline void pipapo_resmap_init_avx2(const struct nft_pipapo_match *m, unsigned long *res_map)
+{
+ const struct nft_pipapo_field *f = m->f;
+ int i;
+
+ /* Starting map doesn't need to be set to all-ones for this implementation,
+ * but we do need to zero the remaining bits, if any.
+ */
+ for (i = f->bsize; i < m->bsize_max; i++)
+ res_map[i] = 0ul;
+}
+
+/**
* nft_pipapo_avx2_lookup() - Lookup function for AVX2 implementation
* @net: Network namespace
* @set: nftables API set representation
@@ -1171,7 +1190,7 @@ bool nft_pipapo_avx2_lookup(const struct net *net, const struct nft_set *set,
res = scratch->map + (map_index ? m->bsize_max : 0);
fill = scratch->map + (map_index ? 0 : m->bsize_max);
- /* Starting map doesn't need to be set for this implementation */
+ pipapo_resmap_init_avx2(m, res);
nft_pipapo_avx2_prepare();
diff --git a/net/netfilter/nft_tunnel.c b/net/netfilter/nft_tunnel.c
index d499eb3f4f29..3e3ae29dde33 100644
--- a/net/netfilter/nft_tunnel.c
+++ b/net/netfilter/nft_tunnel.c
@@ -617,10 +617,10 @@ static int nft_tunnel_opts_dump(struct sk_buff *skb,
struct geneve_opt *opt;
int offset = 0;
- inner = nla_nest_start_noflag(skb, NFTA_TUNNEL_KEY_OPTS_GENEVE);
- if (!inner)
- goto failure;
while (opts->len > offset) {
+ inner = nla_nest_start_noflag(skb, NFTA_TUNNEL_KEY_OPTS_GENEVE);
+ if (!inner)
+ goto failure;
opt = (struct geneve_opt *)(opts->u.data + offset);
if (nla_put_be16(skb, NFTA_TUNNEL_KEY_GENEVE_CLASS,
opt->opt_class) ||
@@ -630,8 +630,8 @@ static int nft_tunnel_opts_dump(struct sk_buff *skb,
opt->length * 4, opt->opt_data))
goto inner_failure;
offset += sizeof(*opt) + opt->length * 4;
+ nla_nest_end(skb, inner);
}
- nla_nest_end(skb, inner);
}
nla_nest_end(skb, nest);
return 0;
diff --git a/net/netlabel/netlabel_kapi.c b/net/netlabel/netlabel_kapi.c
index 27511c90a26f..daef0eeaea2c 100644
--- a/net/netlabel/netlabel_kapi.c
+++ b/net/netlabel/netlabel_kapi.c
@@ -1140,6 +1140,11 @@ int netlbl_conn_setattr(struct sock *sk,
break;
#if IS_ENABLED(CONFIG_IPV6)
case AF_INET6:
+ if (sk->sk_family != AF_INET6) {
+ ret_val = -EAFNOSUPPORT;
+ goto conn_setattr_return;
+ }
+
addr6 = (struct sockaddr_in6 *)addr;
entry = netlbl_domhsh_getentry_af6(secattr->domain,
&addr6->sin6_addr);
diff --git a/net/openvswitch/flow.c b/net/openvswitch/flow.c
index 8a848ce72e29..b80bd3a90773 100644
--- a/net/openvswitch/flow.c
+++ b/net/openvswitch/flow.c
@@ -788,7 +788,7 @@ static int key_extract_l3l4(struct sk_buff *skb, struct sw_flow_key *key)
memset(&key->ipv4, 0, sizeof(key->ipv4));
}
} else if (eth_p_mpls(key->eth.type)) {
- u8 label_count = 1;
+ size_t label_count = 1;
memset(&key->mpls, 0, sizeof(key->mpls));
skb_set_inner_network_header(skb, skb->mac_len);
diff --git a/net/sched/sch_codel.c b/net/sched/sch_codel.c
index 5f2e06815745..63c02040b426 100644
--- a/net/sched/sch_codel.c
+++ b/net/sched/sch_codel.c
@@ -168,7 +168,7 @@ static int codel_change(struct Qdisc *sch, struct nlattr *opt,
qlen = sch->q.qlen;
while (sch->q.qlen > sch->limit) {
- struct sk_buff *skb = __qdisc_dequeue_head(&sch->q);
+ struct sk_buff *skb = qdisc_dequeue_internal(sch, true);
dropped += qdisc_pkt_len(skb);
qdisc_qstats_backlog_dec(sch, skb);
diff --git a/net/sched/sch_ets.c b/net/sched/sch_ets.c
index 9da86db4d2c2..3ee46f6e005d 100644
--- a/net/sched/sch_ets.c
+++ b/net/sched/sch_ets.c
@@ -661,7 +661,7 @@ static int ets_qdisc_change(struct Qdisc *sch, struct nlattr *opt,
for (i = q->nbands; i < oldbands; i++) {
if (i >= q->nstrict && q->classes[i].qdisc->q.qlen)
list_del_init(&q->classes[i].alist);
- qdisc_tree_flush_backlog(q->classes[i].qdisc);
+ qdisc_purge_queue(q->classes[i].qdisc);
}
q->nstrict = nstrict;
memcpy(q->prio2band, priomap, sizeof(priomap));
diff --git a/net/sched/sch_fq.c b/net/sched/sch_fq.c
index f59a2cb2c803..91f5ef6be0f2 100644
--- a/net/sched/sch_fq.c
+++ b/net/sched/sch_fq.c
@@ -901,7 +901,7 @@ static int fq_change(struct Qdisc *sch, struct nlattr *opt,
sch_tree_lock(sch);
}
while (sch->q.qlen > sch->limit) {
- struct sk_buff *skb = fq_dequeue(sch);
+ struct sk_buff *skb = qdisc_dequeue_internal(sch, false);
if (!skb)
break;
diff --git a/net/sched/sch_fq_codel.c b/net/sched/sch_fq_codel.c
index 9330923a624c..47b5a056165c 100644
--- a/net/sched/sch_fq_codel.c
+++ b/net/sched/sch_fq_codel.c
@@ -431,7 +431,7 @@ static int fq_codel_change(struct Qdisc *sch, struct nlattr *opt,
while (sch->q.qlen > sch->limit ||
q->memory_usage > q->memory_limit) {
- struct sk_buff *skb = fq_codel_dequeue(sch);
+ struct sk_buff *skb = qdisc_dequeue_internal(sch, false);
q->cstats.drop_len += qdisc_pkt_len(skb);
rtnl_kfree_skbs(skb, skb);
diff --git a/net/sched/sch_fq_pie.c b/net/sched/sch_fq_pie.c
index 68e6acd0f130..607c580d75e4 100644
--- a/net/sched/sch_fq_pie.c
+++ b/net/sched/sch_fq_pie.c
@@ -357,7 +357,7 @@ static int fq_pie_change(struct Qdisc *sch, struct nlattr *opt,
/* Drop excess packets if new limit is lower */
while (sch->q.qlen > sch->limit) {
- struct sk_buff *skb = fq_pie_qdisc_dequeue(sch);
+ struct sk_buff *skb = qdisc_dequeue_internal(sch, false);
len_dropped += qdisc_pkt_len(skb);
num_dropped += 1;
diff --git a/net/sched/sch_hfsc.c b/net/sched/sch_hfsc.c
index 5d9cccfac4a1..afcb83d469ff 100644
--- a/net/sched/sch_hfsc.c
+++ b/net/sched/sch_hfsc.c
@@ -175,6 +175,11 @@ struct hfsc_sched {
#define HT_INFINITY 0xffffffffffffffffULL /* infinite time value */
+static bool cl_in_el_or_vttree(struct hfsc_class *cl)
+{
+ return ((cl->cl_flags & HFSC_FSC) && cl->cl_nactive) ||
+ ((cl->cl_flags & HFSC_RSC) && !RB_EMPTY_NODE(&cl->el_node));
+}
/*
* eligible tree holds backlogged classes being sorted by their eligible times.
@@ -1040,6 +1045,8 @@ hfsc_change_class(struct Qdisc *sch, u32 classid, u32 parentid,
if (cl == NULL)
return -ENOBUFS;
+ RB_CLEAR_NODE(&cl->el_node);
+
err = tcf_block_get(&cl->block, &cl->filter_list, sch, extack);
if (err) {
kfree(cl);
@@ -1570,7 +1577,10 @@ hfsc_enqueue(struct sk_buff *skb, struct Qdisc *sch, struct sk_buff **to_free)
return err;
}
- if (first && !cl->cl_nactive) {
+ sch->qstats.backlog += len;
+ sch->q.qlen++;
+
+ if (first && !cl_in_el_or_vttree(cl)) {
if (cl->cl_flags & HFSC_RSC)
init_ed(cl, len);
if (cl->cl_flags & HFSC_FSC)
@@ -1585,9 +1595,6 @@ hfsc_enqueue(struct sk_buff *skb, struct Qdisc *sch, struct sk_buff **to_free)
}
- sch->qstats.backlog += len;
- sch->q.qlen++;
-
return NET_XMIT_SUCCESS;
}
diff --git a/net/sched/sch_hhf.c b/net/sched/sch_hhf.c
index d26cd436cbe3..83fc44f20e31 100644
--- a/net/sched/sch_hhf.c
+++ b/net/sched/sch_hhf.c
@@ -560,7 +560,7 @@ static int hhf_change(struct Qdisc *sch, struct nlattr *opt,
qlen = sch->q.qlen;
prev_backlog = sch->qstats.backlog;
while (sch->q.qlen > sch->limit) {
- struct sk_buff *skb = hhf_dequeue(sch);
+ struct sk_buff *skb = qdisc_dequeue_internal(sch, false);
rtnl_kfree_skbs(skb, skb);
}
diff --git a/net/sched/sch_pie.c b/net/sched/sch_pie.c
index 2da6250ec346..48c5ab8ec143 100644
--- a/net/sched/sch_pie.c
+++ b/net/sched/sch_pie.c
@@ -190,7 +190,7 @@ static int pie_change(struct Qdisc *sch, struct nlattr *opt,
/* Drop excess packets if new limit is lower */
qlen = sch->q.qlen;
while (sch->q.qlen > sch->limit) {
- struct sk_buff *skb = __qdisc_dequeue_head(&sch->q);
+ struct sk_buff *skb = qdisc_dequeue_internal(sch, true);
dropped += qdisc_pkt_len(skb);
qdisc_qstats_backlog_dec(sch, skb);
diff --git a/net/sched/sch_prio.c b/net/sched/sch_prio.c
index fdc5ef52c3ee..fdd9caa41e80 100644
--- a/net/sched/sch_prio.c
+++ b/net/sched/sch_prio.c
@@ -211,7 +211,7 @@ static int prio_tune(struct Qdisc *sch, struct nlattr *opt,
memcpy(q->prio2band, qopt->priomap, TC_PRIO_MAX+1);
for (i = q->bands; i < oldbands; i++)
- qdisc_tree_flush_backlog(q->queues[i]);
+ qdisc_purge_queue(q->queues[i]);
for (i = oldbands; i < q->bands; i++) {
q->queues[i] = queues[i];
diff --git a/net/sched/sch_red.c b/net/sched/sch_red.c
index 16277b6a0238..3c6b4460cf2c 100644
--- a/net/sched/sch_red.c
+++ b/net/sched/sch_red.c
@@ -283,7 +283,7 @@ static int __red_change(struct Qdisc *sch, struct nlattr **tb,
q->userbits = userbits;
q->limit = ctl->limit;
if (child) {
- qdisc_tree_flush_backlog(q->qdisc);
+ qdisc_purge_queue(q->qdisc);
old_child = q->qdisc;
q->qdisc = child;
}
diff --git a/net/sched/sch_sfq.c b/net/sched/sch_sfq.c
index 002941d35b64..d564675a8be4 100644
--- a/net/sched/sch_sfq.c
+++ b/net/sched/sch_sfq.c
@@ -310,7 +310,10 @@ drop:
/* It is difficult to believe, but ALL THE SLOTS HAVE LENGTH 1. */
x = q->tail->next;
slot = &q->slots[x];
- q->tail->next = slot->next;
+ if (slot->next == x)
+ q->tail = NULL; /* no more active slots */
+ else
+ q->tail->next = slot->next;
q->ht[slot->hash] = SFQ_EMPTY_SLOT;
goto drop;
}
diff --git a/net/sched/sch_tbf.c b/net/sched/sch_tbf.c
index f92174008499..61dd5c8f2310 100644
--- a/net/sched/sch_tbf.c
+++ b/net/sched/sch_tbf.c
@@ -452,7 +452,7 @@ static int tbf_change(struct Qdisc *sch, struct nlattr *opt,
sch_tree_lock(sch);
if (child) {
- qdisc_tree_flush_backlog(q->qdisc);
+ qdisc_purge_queue(q->qdisc);
old = q->qdisc;
q->qdisc = child;
}
diff --git a/net/sctp/sysctl.c b/net/sctp/sysctl.c
index fd73be940f46..77a76634014a 100644
--- a/net/sctp/sysctl.c
+++ b/net/sctp/sysctl.c
@@ -529,6 +529,8 @@ static int proc_sctp_do_auth(struct ctl_table *ctl, int write,
return ret;
}
+static DEFINE_MUTEX(sctp_sysctl_mutex);
+
static int proc_sctp_do_udp_port(struct ctl_table *ctl, int write,
void *buffer, size_t *lenp, loff_t *ppos)
{
@@ -553,6 +555,7 @@ static int proc_sctp_do_udp_port(struct ctl_table *ctl, int write,
if (new_value > max || new_value < min)
return -EINVAL;
+ mutex_lock(&sctp_sysctl_mutex);
net->sctp.udp_port = new_value;
sctp_udp_sock_stop(net);
if (new_value) {
@@ -565,6 +568,7 @@ static int proc_sctp_do_udp_port(struct ctl_table *ctl, int write,
lock_sock(sk);
sctp_sk(sk)->udp_port = htons(net->sctp.udp_port);
release_sock(sk);
+ mutex_unlock(&sctp_sysctl_mutex);
}
return ret;
diff --git a/net/smc/smc_pnet.c b/net/smc/smc_pnet.c
index dbcc72b43d0c..d44d7f427fc9 100644
--- a/net/smc/smc_pnet.c
+++ b/net/smc/smc_pnet.c
@@ -1084,14 +1084,16 @@ static void smc_pnet_find_roce_by_pnetid(struct net_device *ndev,
struct smc_init_info *ini)
{
u8 ndev_pnetid[SMC_MAX_PNETID_LEN];
+ struct net_device *base_ndev;
struct net *net;
- ndev = pnet_find_base_ndev(ndev);
+ base_ndev = pnet_find_base_ndev(ndev);
net = dev_net(ndev);
- if (smc_pnetid_by_dev_port(ndev->dev.parent, ndev->dev_port,
+ if (smc_pnetid_by_dev_port(base_ndev->dev.parent, base_ndev->dev_port,
ndev_pnetid) &&
+ smc_pnet_find_ndev_pnetid_by_table(base_ndev, ndev_pnetid) &&
smc_pnet_find_ndev_pnetid_by_table(ndev, ndev_pnetid)) {
- smc_pnet_find_rdma_dev(ndev, ini);
+ smc_pnet_find_rdma_dev(base_ndev, ini);
return; /* pnetid could not be determined */
}
_smc_pnet_find_roce_by_pnetid(ndev_pnetid, ini, NULL, net);
diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c
index 142ee6554848..4ffb2bcaf364 100644
--- a/net/sunrpc/clnt.c
+++ b/net/sunrpc/clnt.c
@@ -275,9 +275,6 @@ static struct rpc_xprt *rpc_clnt_set_transport(struct rpc_clnt *clnt,
old = rcu_dereference_protected(clnt->cl_xprt,
lockdep_is_held(&clnt->cl_lock));
- if (!xprt_bound(xprt))
- clnt->cl_autobind = 1;
-
clnt->cl_timeout = timeout;
rcu_assign_pointer(clnt->cl_xprt, xprt);
spin_unlock(&clnt->cl_lock);
diff --git a/net/sunrpc/rpcb_clnt.c b/net/sunrpc/rpcb_clnt.c
index 102c3818bc54..53bcca365fb1 100644
--- a/net/sunrpc/rpcb_clnt.c
+++ b/net/sunrpc/rpcb_clnt.c
@@ -820,9 +820,10 @@ static void rpcb_getport_done(struct rpc_task *child, void *data)
}
trace_rpcb_setport(child, map->r_status, map->r_port);
- xprt->ops->set_port(xprt, map->r_port);
- if (map->r_port)
+ if (map->r_port) {
+ xprt->ops->set_port(xprt, map->r_port);
xprt_set_bound(xprt);
+ }
}
/*
diff --git a/net/sunrpc/sched.c b/net/sunrpc/sched.c
index 9b45fbdc90ca..73bc39281ef5 100644
--- a/net/sunrpc/sched.c
+++ b/net/sunrpc/sched.c
@@ -276,6 +276,8 @@ EXPORT_SYMBOL_GPL(rpc_destroy_wait_queue);
static int rpc_wait_bit_killable(struct wait_bit_key *key, int mode)
{
+ if (unlikely(current->flags & PF_EXITING))
+ return -EINTR;
schedule();
if (signal_pending_state(mode, current))
return -ERESTARTSYS;
diff --git a/net/tipc/crypto.c b/net/tipc/crypto.c
index c524421ec652..79f91b6ca8c8 100644
--- a/net/tipc/crypto.c
+++ b/net/tipc/crypto.c
@@ -817,12 +817,20 @@ static int tipc_aead_encrypt(struct tipc_aead *aead, struct sk_buff *skb,
goto exit;
}
+ /* Get net to avoid freed tipc_crypto when delete namespace */
+ if (!maybe_get_net(aead->crypto->net)) {
+ tipc_bearer_put(b);
+ rc = -ENODEV;
+ goto exit;
+ }
+
/* Now, do encrypt */
rc = crypto_aead_encrypt(req);
if (rc == -EINPROGRESS || rc == -EBUSY)
return rc;
tipc_bearer_put(b);
+ put_net(aead->crypto->net);
exit:
kfree(ctx);
@@ -860,6 +868,7 @@ static void tipc_aead_encrypt_done(void *data, int err)
kfree(tx_ctx);
tipc_bearer_put(b);
tipc_aead_put(aead);
+ put_net(net);
}
/**
diff --git a/net/tls/tls_strp.c b/net/tls/tls_strp.c
index 5df08d848b5c..1852fac3e72b 100644
--- a/net/tls/tls_strp.c
+++ b/net/tls/tls_strp.c
@@ -395,7 +395,6 @@ static int tls_strp_read_copy(struct tls_strparser *strp, bool qshort)
return 0;
shinfo = skb_shinfo(strp->anchor);
- shinfo->frag_list = NULL;
/* If we don't know the length go max plus page for cipher overhead */
need_spc = strp->stm.full_len ?: TLS_MAX_PAYLOAD_SIZE + PAGE_SIZE;
@@ -411,6 +410,8 @@ static int tls_strp_read_copy(struct tls_strparser *strp, bool qshort)
page, 0, 0);
}
+ shinfo->frag_list = NULL;
+
strp->copy_mode = 1;
strp->stm.offset = 0;
diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c
index 6e30fe879d53..4a9a3aed5d6d 100644
--- a/net/tls/tls_sw.c
+++ b/net/tls/tls_sw.c
@@ -908,6 +908,13 @@ more_data:
&msg_redir, send, flags);
lock_sock(sk);
if (err < 0) {
+ /* Regardless of whether the data represented by
+ * msg_redir is sent successfully, we have already
+ * uncharged it via sk_msg_return_zero(). The
+ * msg->sg.size represents the remaining unprocessed
+ * data, which needs to be uncharged here.
+ */
+ sk_mem_uncharge(sk, msg->sg.size);
*copied -= sk_msg_free_nocharge(sk, &msg_redir);
msg->sg.size = 0;
}
@@ -1120,9 +1127,13 @@ alloc_encrypted:
num_async++;
else if (ret == -ENOMEM)
goto wait_for_memory;
- else if (ctx->open_rec && ret == -ENOSPC)
+ else if (ctx->open_rec && ret == -ENOSPC) {
+ if (msg_pl->cork_bytes) {
+ ret = 0;
+ goto send_end;
+ }
goto rollback_iter;
- else if (ret != -EAGAIN)
+ } else if (ret != -EAGAIN)
goto send_end;
}
continue;
diff --git a/net/unix/Kconfig b/net/unix/Kconfig
index 28b232f281ab..8b5d04210d7c 100644
--- a/net/unix/Kconfig
+++ b/net/unix/Kconfig
@@ -16,11 +16,6 @@ config UNIX
Say Y unless you know what you are doing.
-config UNIX_SCM
- bool
- depends on UNIX
- default y
-
config AF_UNIX_OOB
bool
depends on UNIX
diff --git a/net/unix/Makefile b/net/unix/Makefile
index 20491825b4d0..4ddd125c4642 100644
--- a/net/unix/Makefile
+++ b/net/unix/Makefile
@@ -11,5 +11,3 @@ unix-$(CONFIG_BPF_SYSCALL) += unix_bpf.o
obj-$(CONFIG_UNIX_DIAG) += unix_diag.o
unix_diag-y := diag.o
-
-obj-$(CONFIG_UNIX_SCM) += scm.o
diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
index ab23c8d72122..236a2cd2bc93 100644
--- a/net/unix/af_unix.c
+++ b/net/unix/af_unix.c
@@ -117,8 +117,6 @@
#include <linux/file.h>
#include <linux/btf_ids.h>
-#include "scm.h"
-
static atomic_long_t unix_nr_socks;
static struct hlist_head bsd_socket_buckets[UNIX_HASH_SIZE / 2];
static spinlock_t bsd_socket_locks[UNIX_HASH_SIZE / 2];
@@ -980,11 +978,11 @@ static struct sock *unix_create1(struct net *net, struct socket *sock, int kern,
sk->sk_max_ack_backlog = READ_ONCE(net->unx.sysctl_max_dgram_qlen);
sk->sk_destruct = unix_sock_destructor;
u = unix_sk(sk);
- u->inflight = 0;
+ u->listener = NULL;
+ u->vertex = NULL;
u->path.dentry = NULL;
u->path.mnt = NULL;
spin_lock_init(&u->lock);
- INIT_LIST_HEAD(&u->link);
mutex_init(&u->iolock); /* single task reading lock */
mutex_init(&u->bindlock); /* single task binding lock */
init_waitqueue_head(&u->peer_wait);
@@ -1583,6 +1581,7 @@ restart:
newsk->sk_type = sk->sk_type;
init_peercred(newsk);
newu = unix_sk(newsk);
+ newu->listener = other;
RCU_INIT_POINTER(newsk->sk_wq, &newu->peer_wq);
otheru = unix_sk(other);
@@ -1678,8 +1677,8 @@ static int unix_accept(struct socket *sock, struct socket *newsock, int flags,
bool kern)
{
struct sock *sk = sock->sk;
- struct sock *tsk;
struct sk_buff *skb;
+ struct sock *tsk;
int err;
err = -EOPNOTSUPP;
@@ -1709,6 +1708,7 @@ static int unix_accept(struct socket *sock, struct socket *newsock, int flags,
/* attach accepted sock to socket */
unix_state_lock(tsk);
+ unix_update_edges(unix_sk(tsk));
newsock->state = SS_CONNECTED;
unix_sock_inherit_flags(sock, newsock);
sock_graft(tsk, newsock);
@@ -1752,51 +1752,65 @@ out:
return err;
}
+/* The "user->unix_inflight" variable is protected by the garbage
+ * collection lock, and we just read it locklessly here. If you go
+ * over the limit, there might be a tiny race in actually noticing
+ * it across threads. Tough.
+ */
+static inline bool too_many_unix_fds(struct task_struct *p)
+{
+ struct user_struct *user = current_user();
+
+ if (unlikely(READ_ONCE(user->unix_inflight) > task_rlimit(p, RLIMIT_NOFILE)))
+ return !capable(CAP_SYS_RESOURCE) && !capable(CAP_SYS_ADMIN);
+ return false;
+}
+
+static int unix_attach_fds(struct scm_cookie *scm, struct sk_buff *skb)
+{
+ if (too_many_unix_fds(current))
+ return -ETOOMANYREFS;
+
+ /* Need to duplicate file references for the sake of garbage
+ * collection. Otherwise a socket in the fps might become a
+ * candidate for GC while the skb is not yet queued.
+ */
+ UNIXCB(skb).fp = scm_fp_dup(scm->fp);
+ if (!UNIXCB(skb).fp)
+ return -ENOMEM;
+
+ if (unix_prepare_fpl(UNIXCB(skb).fp))
+ return -ENOMEM;
+
+ return 0;
+}
+
+static void unix_detach_fds(struct scm_cookie *scm, struct sk_buff *skb)
+{
+ scm->fp = UNIXCB(skb).fp;
+ UNIXCB(skb).fp = NULL;
+
+ unix_destroy_fpl(scm->fp);
+}
+
static void unix_peek_fds(struct scm_cookie *scm, struct sk_buff *skb)
{
scm->fp = scm_fp_dup(UNIXCB(skb).fp);
+}
- /*
- * Garbage collection of unix sockets starts by selecting a set of
- * candidate sockets which have reference only from being in flight
- * (total_refs == inflight_refs). This condition is checked once during
- * the candidate collection phase, and candidates are marked as such, so
- * that non-candidates can later be ignored. While inflight_refs is
- * protected by unix_gc_lock, total_refs (file count) is not, hence this
- * is an instantaneous decision.
- *
- * Once a candidate, however, the socket must not be reinstalled into a
- * file descriptor while the garbage collection is in progress.
- *
- * If the above conditions are met, then the directed graph of
- * candidates (*) does not change while unix_gc_lock is held.
- *
- * Any operations that changes the file count through file descriptors
- * (dup, close, sendmsg) does not change the graph since candidates are
- * not installed in fds.
- *
- * Dequeing a candidate via recvmsg would install it into an fd, but
- * that takes unix_gc_lock to decrement the inflight count, so it's
- * serialized with garbage collection.
- *
- * MSG_PEEK is special in that it does not change the inflight count,
- * yet does install the socket into an fd. The following lock/unlock
- * pair is to ensure serialization with garbage collection. It must be
- * done between incrementing the file count and installing the file into
- * an fd.
- *
- * If garbage collection starts after the barrier provided by the
- * lock/unlock, then it will see the elevated refcount and not mark this
- * as a candidate. If a garbage collection is already in progress
- * before the file count was incremented, then the lock/unlock pair will
- * ensure that garbage collection is finished before progressing to
- * installing the fd.
- *
- * (*) A -> B where B is on the queue of A or B is on the queue of C
- * which is on the queue of listening socket A.
- */
- spin_lock(&unix_gc_lock);
- spin_unlock(&unix_gc_lock);
+static void unix_destruct_scm(struct sk_buff *skb)
+{
+ struct scm_cookie scm;
+
+ memset(&scm, 0, sizeof(scm));
+ scm.pid = UNIXCB(skb).pid;
+ if (UNIXCB(skb).fp)
+ unix_detach_fds(&scm, skb);
+
+ /* Alas, it calls VFS */
+ /* So fscking what? fput() had been SMP-safe since the last Summer */
+ scm_destroy(&scm);
+ sock_wfree(skb);
}
static int unix_scm_to_skb(struct scm_cookie *scm, struct sk_buff *skb, bool send_fds)
@@ -1855,8 +1869,10 @@ static void scm_stat_add(struct sock *sk, struct sk_buff *skb)
struct scm_fp_list *fp = UNIXCB(skb).fp;
struct unix_sock *u = unix_sk(sk);
- if (unlikely(fp && fp->count))
+ if (unlikely(fp && fp->count)) {
atomic_add(fp->count, &u->scm_stat.nr_fds);
+ unix_add_edges(fp, u);
+ }
}
static void scm_stat_del(struct sock *sk, struct sk_buff *skb)
@@ -1864,8 +1880,10 @@ static void scm_stat_del(struct sock *sk, struct sk_buff *skb)
struct scm_fp_list *fp = UNIXCB(skb).fp;
struct unix_sock *u = unix_sk(sk);
- if (unlikely(fp && fp->count))
+ if (unlikely(fp && fp->count)) {
atomic_sub(fp->count, &u->scm_stat.nr_fds);
+ unix_del_edges(fp);
+ }
}
/*
@@ -1885,11 +1903,12 @@ static int unix_dgram_sendmsg(struct socket *sock, struct msghdr *msg,
long timeo;
int err;
- wait_for_unix_gc();
err = scm_send(sock, msg, &scm, false);
if (err < 0)
return err;
+ wait_for_unix_gc(scm.fp);
+
err = -EOPNOTSUPP;
if (msg->msg_flags&MSG_OOB)
goto out;
@@ -2157,11 +2176,12 @@ static int unix_stream_sendmsg(struct socket *sock, struct msghdr *msg,
bool fds_sent = false;
int data_len;
- wait_for_unix_gc();
err = scm_send(sock, msg, &scm, false);
if (err < 0)
return err;
+ wait_for_unix_gc(scm.fp);
+
err = -EOPNOTSUPP;
if (msg->msg_flags & MSG_OOB) {
#if IS_ENABLED(CONFIG_AF_UNIX_OOB)
diff --git a/net/unix/garbage.c b/net/unix/garbage.c
index 2a758531e102..23efb78fe9ef 100644
--- a/net/unix/garbage.c
+++ b/net/unix/garbage.c
@@ -81,278 +81,551 @@
#include <net/scm.h>
#include <net/tcp_states.h>
-#include "scm.h"
+struct unix_sock *unix_get_socket(struct file *filp)
+{
+ struct inode *inode = file_inode(filp);
+
+ /* Socket ? */
+ if (S_ISSOCK(inode->i_mode) && !(filp->f_mode & FMODE_PATH)) {
+ struct socket *sock = SOCKET_I(inode);
+ const struct proto_ops *ops;
+ struct sock *sk = sock->sk;
-/* Internal data structures and random procedures: */
+ ops = READ_ONCE(sock->ops);
-static LIST_HEAD(gc_candidates);
-static DECLARE_WAIT_QUEUE_HEAD(unix_gc_wait);
+ /* PF_UNIX ? */
+ if (sk && ops && ops->family == PF_UNIX)
+ return unix_sk(sk);
+ }
-static void scan_inflight(struct sock *x, void (*func)(struct unix_sock *),
- struct sk_buff_head *hitlist)
+ return NULL;
+}
+
+static struct unix_vertex *unix_edge_successor(struct unix_edge *edge)
{
- struct sk_buff *skb;
- struct sk_buff *next;
-
- spin_lock(&x->sk_receive_queue.lock);
- skb_queue_walk_safe(&x->sk_receive_queue, skb, next) {
- /* Do we have file descriptors ? */
- if (UNIXCB(skb).fp) {
- bool hit = false;
- /* Process the descriptors of this socket */
- int nfd = UNIXCB(skb).fp->count;
- struct file **fp = UNIXCB(skb).fp->fp;
-
- while (nfd--) {
- /* Get the socket the fd matches if it indeed does so */
- struct sock *sk = unix_get_socket(*fp++);
-
- if (sk) {
- struct unix_sock *u = unix_sk(sk);
-
- /* Ignore non-candidates, they could
- * have been added to the queues after
- * starting the garbage collection
- */
- if (test_bit(UNIX_GC_CANDIDATE, &u->gc_flags)) {
- hit = true;
-
- func(u);
- }
- }
- }
- if (hit && hitlist != NULL) {
- __skb_unlink(skb, &x->sk_receive_queue);
- __skb_queue_tail(hitlist, skb);
- }
- }
+ /* If an embryo socket has a fd,
+ * the listener indirectly holds the fd's refcnt.
+ */
+ if (edge->successor->listener)
+ return unix_sk(edge->successor->listener)->vertex;
+
+ return edge->successor->vertex;
+}
+
+static bool unix_graph_maybe_cyclic;
+static bool unix_graph_grouped;
+
+static void unix_update_graph(struct unix_vertex *vertex)
+{
+ /* If the receiver socket is not inflight, no cyclic
+ * reference could be formed.
+ */
+ if (!vertex)
+ return;
+
+ unix_graph_maybe_cyclic = true;
+ unix_graph_grouped = false;
+}
+
+static LIST_HEAD(unix_unvisited_vertices);
+
+enum unix_vertex_index {
+ UNIX_VERTEX_INDEX_MARK1,
+ UNIX_VERTEX_INDEX_MARK2,
+ UNIX_VERTEX_INDEX_START,
+};
+
+static unsigned long unix_vertex_unvisited_index = UNIX_VERTEX_INDEX_MARK1;
+
+static void unix_add_edge(struct scm_fp_list *fpl, struct unix_edge *edge)
+{
+ struct unix_vertex *vertex = edge->predecessor->vertex;
+
+ if (!vertex) {
+ vertex = list_first_entry(&fpl->vertices, typeof(*vertex), entry);
+ vertex->index = unix_vertex_unvisited_index;
+ vertex->out_degree = 0;
+ INIT_LIST_HEAD(&vertex->edges);
+ INIT_LIST_HEAD(&vertex->scc_entry);
+
+ list_move_tail(&vertex->entry, &unix_unvisited_vertices);
+ edge->predecessor->vertex = vertex;
}
- spin_unlock(&x->sk_receive_queue.lock);
+
+ vertex->out_degree++;
+ list_add_tail(&edge->vertex_entry, &vertex->edges);
+
+ unix_update_graph(unix_edge_successor(edge));
}
-static void scan_children(struct sock *x, void (*func)(struct unix_sock *),
- struct sk_buff_head *hitlist)
+static void unix_del_edge(struct scm_fp_list *fpl, struct unix_edge *edge)
{
- if (x->sk_state != TCP_LISTEN) {
- scan_inflight(x, func, hitlist);
- } else {
- struct sk_buff *skb;
- struct sk_buff *next;
- struct unix_sock *u;
- LIST_HEAD(embryos);
+ struct unix_vertex *vertex = edge->predecessor->vertex;
- /* For a listening socket collect the queued embryos
- * and perform a scan on them as well.
- */
- spin_lock(&x->sk_receive_queue.lock);
- skb_queue_walk_safe(&x->sk_receive_queue, skb, next) {
- u = unix_sk(skb->sk);
+ if (!fpl->dead)
+ unix_update_graph(unix_edge_successor(edge));
- /* An embryo cannot be in-flight, so it's safe
- * to use the list link.
- */
- BUG_ON(!list_empty(&u->link));
- list_add_tail(&u->link, &embryos);
- }
- spin_unlock(&x->sk_receive_queue.lock);
+ list_del(&edge->vertex_entry);
+ vertex->out_degree--;
- while (!list_empty(&embryos)) {
- u = list_entry(embryos.next, struct unix_sock, link);
- scan_inflight(&u->sk, func, hitlist);
- list_del_init(&u->link);
- }
+ if (!vertex->out_degree) {
+ edge->predecessor->vertex = NULL;
+ list_move_tail(&vertex->entry, &fpl->vertices);
}
}
-static void dec_inflight(struct unix_sock *usk)
+static void unix_free_vertices(struct scm_fp_list *fpl)
{
- usk->inflight--;
+ struct unix_vertex *vertex, *next_vertex;
+
+ list_for_each_entry_safe(vertex, next_vertex, &fpl->vertices, entry) {
+ list_del(&vertex->entry);
+ kfree(vertex);
+ }
}
-static void inc_inflight(struct unix_sock *usk)
+static DEFINE_SPINLOCK(unix_gc_lock);
+unsigned int unix_tot_inflight;
+
+void unix_add_edges(struct scm_fp_list *fpl, struct unix_sock *receiver)
{
- usk->inflight++;
+ int i = 0, j = 0;
+
+ spin_lock(&unix_gc_lock);
+
+ if (!fpl->count_unix)
+ goto out;
+
+ do {
+ struct unix_sock *inflight = unix_get_socket(fpl->fp[j++]);
+ struct unix_edge *edge;
+
+ if (!inflight)
+ continue;
+
+ edge = fpl->edges + i++;
+ edge->predecessor = inflight;
+ edge->successor = receiver;
+
+ unix_add_edge(fpl, edge);
+ } while (i < fpl->count_unix);
+
+ receiver->scm_stat.nr_unix_fds += fpl->count_unix;
+ WRITE_ONCE(unix_tot_inflight, unix_tot_inflight + fpl->count_unix);
+out:
+ WRITE_ONCE(fpl->user->unix_inflight, fpl->user->unix_inflight + fpl->count);
+
+ spin_unlock(&unix_gc_lock);
+
+ fpl->inflight = true;
+
+ unix_free_vertices(fpl);
}
-static void inc_inflight_move_tail(struct unix_sock *u)
+void unix_del_edges(struct scm_fp_list *fpl)
{
- u->inflight++;
+ struct unix_sock *receiver;
+ int i = 0;
+
+ spin_lock(&unix_gc_lock);
- /* If this still might be part of a cycle, move it to the end
- * of the list, so that it's checked even if it was already
- * passed over
+ if (!fpl->count_unix)
+ goto out;
+
+ do {
+ struct unix_edge *edge = fpl->edges + i++;
+
+ unix_del_edge(fpl, edge);
+ } while (i < fpl->count_unix);
+
+ if (!fpl->dead) {
+ receiver = fpl->edges[0].successor;
+ receiver->scm_stat.nr_unix_fds -= fpl->count_unix;
+ }
+ WRITE_ONCE(unix_tot_inflight, unix_tot_inflight - fpl->count_unix);
+out:
+ WRITE_ONCE(fpl->user->unix_inflight, fpl->user->unix_inflight - fpl->count);
+
+ spin_unlock(&unix_gc_lock);
+
+ fpl->inflight = false;
+}
+
+void unix_update_edges(struct unix_sock *receiver)
+{
+ /* nr_unix_fds is only updated under unix_state_lock().
+ * If it's 0 here, the embryo socket is not part of the
+ * inflight graph, and GC will not see it, so no lock needed.
*/
- if (test_bit(UNIX_GC_MAYBE_CYCLE, &u->gc_flags))
- list_move_tail(&u->link, &gc_candidates);
+ if (!receiver->scm_stat.nr_unix_fds) {
+ receiver->listener = NULL;
+ } else {
+ spin_lock(&unix_gc_lock);
+ unix_update_graph(unix_sk(receiver->listener)->vertex);
+ receiver->listener = NULL;
+ spin_unlock(&unix_gc_lock);
+ }
}
-static bool gc_in_progress;
-#define UNIX_INFLIGHT_TRIGGER_GC 16000
+int unix_prepare_fpl(struct scm_fp_list *fpl)
+{
+ struct unix_vertex *vertex;
+ int i;
+
+ if (!fpl->count_unix)
+ return 0;
+
+ for (i = 0; i < fpl->count_unix; i++) {
+ vertex = kmalloc(sizeof(*vertex), GFP_KERNEL);
+ if (!vertex)
+ goto err;
+
+ list_add(&vertex->entry, &fpl->vertices);
+ }
+
+ fpl->edges = kvmalloc_array(fpl->count_unix, sizeof(*fpl->edges),
+ GFP_KERNEL_ACCOUNT);
+ if (!fpl->edges)
+ goto err;
+
+ return 0;
-void wait_for_unix_gc(void)
+err:
+ unix_free_vertices(fpl);
+ return -ENOMEM;
+}
+
+void unix_destroy_fpl(struct scm_fp_list *fpl)
{
- /* If number of inflight sockets is insane,
- * force a garbage collect right now.
- * Paired with the WRITE_ONCE() in unix_inflight(),
- * unix_notinflight() and gc_in_progress().
- */
- if (READ_ONCE(unix_tot_inflight) > UNIX_INFLIGHT_TRIGGER_GC &&
- !READ_ONCE(gc_in_progress))
- unix_gc();
- wait_event(unix_gc_wait, !READ_ONCE(gc_in_progress));
+ if (fpl->inflight)
+ unix_del_edges(fpl);
+
+ kvfree(fpl->edges);
+ unix_free_vertices(fpl);
}
-/* The external entry point: unix_gc() */
-void unix_gc(void)
+static bool unix_vertex_dead(struct unix_vertex *vertex)
{
- struct sk_buff *next_skb, *skb;
+ struct unix_edge *edge;
struct unix_sock *u;
- struct unix_sock *next;
- struct sk_buff_head hitlist;
- struct list_head cursor;
- LIST_HEAD(not_cycle_list);
+ long total_ref;
- spin_lock(&unix_gc_lock);
+ list_for_each_entry(edge, &vertex->edges, vertex_entry) {
+ struct unix_vertex *next_vertex = unix_edge_successor(edge);
- /* Avoid a recursive GC. */
- if (gc_in_progress)
- goto out;
+ /* The vertex's fd can be received by a non-inflight socket. */
+ if (!next_vertex)
+ return false;
- /* Paired with READ_ONCE() in wait_for_unix_gc(). */
- WRITE_ONCE(gc_in_progress, true);
+ /* The vertex's fd can be received by an inflight socket in
+ * another SCC.
+ */
+ if (next_vertex->scc_index != vertex->scc_index)
+ return false;
+ }
- /* First, select candidates for garbage collection. Only
- * in-flight sockets are considered, and from those only ones
- * which don't have any external reference.
- *
- * Holding unix_gc_lock will protect these candidates from
- * being detached, and hence from gaining an external
- * reference. Since there are no possible receivers, all
- * buffers currently on the candidates' queues stay there
- * during the garbage collection.
- *
- * We also know that no new candidate can be added onto the
- * receive queues. Other, non candidate sockets _can_ be
- * added to queue, so we must make sure only to touch
- * candidates.
- *
- * Embryos, though never candidates themselves, affect which
- * candidates are reachable by the garbage collector. Before
- * being added to a listener's queue, an embryo may already
- * receive data carrying SCM_RIGHTS, potentially making the
- * passed socket a candidate that is not yet reachable by the
- * collector. It becomes reachable once the embryo is
- * enqueued. Therefore, we must ensure that no SCM-laden
- * embryo appears in a (candidate) listener's queue between
- * consecutive scan_children() calls.
- */
- list_for_each_entry_safe(u, next, &gc_inflight_list, link) {
- struct sock *sk = &u->sk;
- long total_refs;
-
- total_refs = file_count(sk->sk_socket->file);
-
- BUG_ON(!u->inflight);
- BUG_ON(total_refs < u->inflight);
- if (total_refs == u->inflight) {
- list_move_tail(&u->link, &gc_candidates);
- __set_bit(UNIX_GC_CANDIDATE, &u->gc_flags);
- __set_bit(UNIX_GC_MAYBE_CYCLE, &u->gc_flags);
-
- if (sk->sk_state == TCP_LISTEN) {
- unix_state_lock_nested(sk, U_LOCK_GC_LISTENER);
- unix_state_unlock(sk);
+ /* No receiver exists out of the same SCC. */
+
+ edge = list_first_entry(&vertex->edges, typeof(*edge), vertex_entry);
+ u = edge->predecessor;
+ total_ref = file_count(u->sk.sk_socket->file);
+
+ /* If not close()d, total_ref > out_degree. */
+ if (total_ref != vertex->out_degree)
+ return false;
+
+ return true;
+}
+
+enum unix_recv_queue_lock_class {
+ U_RECVQ_LOCK_NORMAL,
+ U_RECVQ_LOCK_EMBRYO,
+};
+
+static void unix_collect_queue(struct unix_sock *u, struct sk_buff_head *hitlist)
+{
+ skb_queue_splice_init(&u->sk.sk_receive_queue, hitlist);
+
+#if IS_ENABLED(CONFIG_AF_UNIX_OOB)
+ if (u->oob_skb) {
+ WARN_ON_ONCE(skb_unref(u->oob_skb));
+ u->oob_skb = NULL;
+ }
+#endif
+}
+
+static void unix_collect_skb(struct list_head *scc, struct sk_buff_head *hitlist)
+{
+ struct unix_vertex *vertex;
+
+ list_for_each_entry_reverse(vertex, scc, scc_entry) {
+ struct sk_buff_head *queue;
+ struct unix_edge *edge;
+ struct unix_sock *u;
+
+ edge = list_first_entry(&vertex->edges, typeof(*edge), vertex_entry);
+ u = edge->predecessor;
+ queue = &u->sk.sk_receive_queue;
+
+ spin_lock(&queue->lock);
+
+ if (u->sk.sk_state == TCP_LISTEN) {
+ struct sk_buff *skb;
+
+ skb_queue_walk(queue, skb) {
+ struct sk_buff_head *embryo_queue = &skb->sk->sk_receive_queue;
+
+ /* listener -> embryo order, the inversion never happens. */
+ spin_lock_nested(&embryo_queue->lock, U_RECVQ_LOCK_EMBRYO);
+ unix_collect_queue(unix_sk(skb->sk), hitlist);
+ spin_unlock(&embryo_queue->lock);
}
+ } else {
+ unix_collect_queue(u, hitlist);
}
+
+ spin_unlock(&queue->lock);
}
+}
- /* Now remove all internal in-flight reference to children of
- * the candidates.
- */
- list_for_each_entry(u, &gc_candidates, link)
- scan_children(&u->sk, dec_inflight, NULL);
+static bool unix_scc_cyclic(struct list_head *scc)
+{
+ struct unix_vertex *vertex;
+ struct unix_edge *edge;
- /* Restore the references for children of all candidates,
- * which have remaining references. Do this recursively, so
- * only those remain, which form cyclic references.
- *
- * Use a "cursor" link, to make the list traversal safe, even
- * though elements might be moved about.
+ /* SCC containing multiple vertices ? */
+ if (!list_is_singular(scc))
+ return true;
+
+ vertex = list_first_entry(scc, typeof(*vertex), scc_entry);
+
+ /* Self-reference or a embryo-listener circle ? */
+ list_for_each_entry(edge, &vertex->edges, vertex_entry) {
+ if (unix_edge_successor(edge) == vertex)
+ return true;
+ }
+
+ return false;
+}
+
+static LIST_HEAD(unix_visited_vertices);
+static unsigned long unix_vertex_grouped_index = UNIX_VERTEX_INDEX_MARK2;
+
+static void __unix_walk_scc(struct unix_vertex *vertex, unsigned long *last_index,
+ struct sk_buff_head *hitlist)
+{
+ LIST_HEAD(vertex_stack);
+ struct unix_edge *edge;
+ LIST_HEAD(edge_stack);
+
+next_vertex:
+ /* Push vertex to vertex_stack and mark it as on-stack
+ * (index >= UNIX_VERTEX_INDEX_START).
+ * The vertex will be popped when finalising SCC later.
*/
- list_add(&cursor, &gc_candidates);
- while (cursor.next != &gc_candidates) {
- u = list_entry(cursor.next, struct unix_sock, link);
+ list_add(&vertex->scc_entry, &vertex_stack);
+
+ vertex->index = *last_index;
+ vertex->scc_index = *last_index;
+ (*last_index)++;
+
+ /* Explore neighbour vertices (receivers of the current vertex's fd). */
+ list_for_each_entry(edge, &vertex->edges, vertex_entry) {
+ struct unix_vertex *next_vertex = unix_edge_successor(edge);
+
+ if (!next_vertex)
+ continue;
+
+ if (next_vertex->index == unix_vertex_unvisited_index) {
+ /* Iterative deepening depth first search
+ *
+ * 1. Push a forward edge to edge_stack and set
+ * the successor to vertex for the next iteration.
+ */
+ list_add(&edge->stack_entry, &edge_stack);
- /* Move cursor to after the current position. */
- list_move(&cursor, &u->link);
+ vertex = next_vertex;
+ goto next_vertex;
- if (u->inflight) {
- list_move_tail(&u->link, &not_cycle_list);
- __clear_bit(UNIX_GC_MAYBE_CYCLE, &u->gc_flags);
- scan_children(&u->sk, inc_inflight_move_tail, NULL);
+ /* 2. Pop the edge directed to the current vertex
+ * and restore the ancestor for backtracking.
+ */
+prev_vertex:
+ edge = list_first_entry(&edge_stack, typeof(*edge), stack_entry);
+ list_del_init(&edge->stack_entry);
+
+ next_vertex = vertex;
+ vertex = edge->predecessor->vertex;
+
+ /* If the successor has a smaller scc_index, two vertices
+ * are in the same SCC, so propagate the smaller scc_index
+ * to skip SCC finalisation.
+ */
+ vertex->scc_index = min(vertex->scc_index, next_vertex->scc_index);
+ } else if (next_vertex->index != unix_vertex_grouped_index) {
+ /* Loop detected by a back/cross edge.
+ *
+ * The successor is on vertex_stack, so two vertices are in
+ * the same SCC. If the successor has a smaller *scc_index*,
+ * propagate it to skip SCC finalisation.
+ */
+ vertex->scc_index = min(vertex->scc_index, next_vertex->scc_index);
+ } else {
+ /* The successor was already grouped as another SCC */
}
}
- list_del(&cursor);
- /* Now gc_candidates contains only garbage. Restore original
- * inflight counters for these as well, and remove the skbuffs
- * which are creating the cycle(s).
- */
- skb_queue_head_init(&hitlist);
- list_for_each_entry(u, &gc_candidates, link) {
- scan_children(&u->sk, inc_inflight, &hitlist);
+ if (vertex->index == vertex->scc_index) {
+ struct unix_vertex *v;
+ struct list_head scc;
+ bool scc_dead = true;
-#if IS_ENABLED(CONFIG_AF_UNIX_OOB)
- if (u->oob_skb) {
- kfree_skb(u->oob_skb);
- u->oob_skb = NULL;
+ /* SCC finalised.
+ *
+ * If the scc_index was not updated, all the vertices above on
+ * vertex_stack are in the same SCC. Group them using scc_entry.
+ */
+ __list_cut_position(&scc, &vertex_stack, &vertex->scc_entry);
+
+ list_for_each_entry_reverse(v, &scc, scc_entry) {
+ /* Don't restart DFS from this vertex in unix_walk_scc(). */
+ list_move_tail(&v->entry, &unix_visited_vertices);
+
+ /* Mark vertex as off-stack. */
+ v->index = unix_vertex_grouped_index;
+
+ if (scc_dead)
+ scc_dead = unix_vertex_dead(v);
}
-#endif
+
+ if (scc_dead)
+ unix_collect_skb(&scc, hitlist);
+ else if (!unix_graph_maybe_cyclic)
+ unix_graph_maybe_cyclic = unix_scc_cyclic(&scc);
+
+ list_del(&scc);
}
- /* not_cycle_list contains those sockets which do not make up a
- * cycle. Restore these to the inflight list.
+ /* Need backtracking ? */
+ if (!list_empty(&edge_stack))
+ goto prev_vertex;
+}
+
+static void unix_walk_scc(struct sk_buff_head *hitlist)
+{
+ unsigned long last_index = UNIX_VERTEX_INDEX_START;
+
+ unix_graph_maybe_cyclic = false;
+
+ /* Visit every vertex exactly once.
+ * __unix_walk_scc() moves visited vertices to unix_visited_vertices.
*/
- while (!list_empty(&not_cycle_list)) {
- u = list_entry(not_cycle_list.next, struct unix_sock, link);
- __clear_bit(UNIX_GC_CANDIDATE, &u->gc_flags);
- list_move_tail(&u->link, &gc_inflight_list);
+ while (!list_empty(&unix_unvisited_vertices)) {
+ struct unix_vertex *vertex;
+
+ vertex = list_first_entry(&unix_unvisited_vertices, typeof(*vertex), entry);
+ __unix_walk_scc(vertex, &last_index, hitlist);
}
- spin_unlock(&unix_gc_lock);
+ list_replace_init(&unix_visited_vertices, &unix_unvisited_vertices);
+ swap(unix_vertex_unvisited_index, unix_vertex_grouped_index);
- /* We need io_uring to clean its registered files, ignore all io_uring
- * originated skbs. It's fine as io_uring doesn't keep references to
- * other io_uring instances and so killing all other files in the cycle
- * will put all io_uring references forcing it to go through normal
- * release.path eventually putting registered files.
- */
- skb_queue_walk_safe(&hitlist, skb, next_skb) {
- if (skb->destructor == io_uring_destruct_scm) {
- __skb_unlink(skb, &hitlist);
- skb_queue_tail(&skb->sk->sk_receive_queue, skb);
+ unix_graph_grouped = true;
+}
+
+static void unix_walk_scc_fast(struct sk_buff_head *hitlist)
+{
+ unix_graph_maybe_cyclic = false;
+
+ while (!list_empty(&unix_unvisited_vertices)) {
+ struct unix_vertex *vertex;
+ struct list_head scc;
+ bool scc_dead = true;
+
+ vertex = list_first_entry(&unix_unvisited_vertices, typeof(*vertex), entry);
+ list_add(&scc, &vertex->scc_entry);
+
+ list_for_each_entry_reverse(vertex, &scc, scc_entry) {
+ list_move_tail(&vertex->entry, &unix_visited_vertices);
+
+ if (scc_dead)
+ scc_dead = unix_vertex_dead(vertex);
}
+
+ if (scc_dead)
+ unix_collect_skb(&scc, hitlist);
+ else if (!unix_graph_maybe_cyclic)
+ unix_graph_maybe_cyclic = unix_scc_cyclic(&scc);
+
+ list_del(&scc);
}
- /* Here we are. Hitlist is filled. Die. */
- __skb_queue_purge(&hitlist);
+ list_replace_init(&unix_visited_vertices, &unix_unvisited_vertices);
+}
+
+static bool gc_in_progress;
+
+static void __unix_gc(struct work_struct *work)
+{
+ struct sk_buff_head hitlist;
+ struct sk_buff *skb;
spin_lock(&unix_gc_lock);
- /* There could be io_uring registered files, just push them back to
- * the inflight list
- */
- list_for_each_entry_safe(u, next, &gc_candidates, link)
- list_move_tail(&u->link, &gc_inflight_list);
+ if (!unix_graph_maybe_cyclic) {
+ spin_unlock(&unix_gc_lock);
+ goto skip_gc;
+ }
+
+ __skb_queue_head_init(&hitlist);
+
+ if (unix_graph_grouped)
+ unix_walk_scc_fast(&hitlist);
+ else
+ unix_walk_scc(&hitlist);
- /* All candidates should have been detached by now. */
- BUG_ON(!list_empty(&gc_candidates));
+ spin_unlock(&unix_gc_lock);
+
+ skb_queue_walk(&hitlist, skb) {
+ if (UNIXCB(skb).fp)
+ UNIXCB(skb).fp->dead = true;
+ }
- /* Paired with READ_ONCE() in wait_for_unix_gc(). */
+ __skb_queue_purge(&hitlist);
+skip_gc:
WRITE_ONCE(gc_in_progress, false);
+}
- wake_up(&unix_gc_wait);
+static DECLARE_WORK(unix_gc_work, __unix_gc);
- out:
- spin_unlock(&unix_gc_lock);
+void unix_gc(void)
+{
+ WRITE_ONCE(gc_in_progress, true);
+ queue_work(system_unbound_wq, &unix_gc_work);
+}
+
+#define UNIX_INFLIGHT_TRIGGER_GC 16000
+#define UNIX_INFLIGHT_SANE_USER (SCM_MAX_FD * 8)
+
+void wait_for_unix_gc(struct scm_fp_list *fpl)
+{
+ /* If number of inflight sockets is insane,
+ * force a garbage collect right now.
+ *
+ * Paired with the WRITE_ONCE() in unix_inflight(),
+ * unix_notinflight(), and __unix_gc().
+ */
+ if (READ_ONCE(unix_tot_inflight) > UNIX_INFLIGHT_TRIGGER_GC &&
+ !READ_ONCE(gc_in_progress))
+ unix_gc();
+
+ /* Penalise users who want to send AF_UNIX sockets
+ * but whose sockets have not been received yet.
+ */
+ if (!fpl || !fpl->count_unix ||
+ READ_ONCE(fpl->user->unix_inflight) < UNIX_INFLIGHT_SANE_USER)
+ return;
+
+ if (READ_ONCE(gc_in_progress))
+ flush_work(&unix_gc_work);
}
diff --git a/net/unix/scm.c b/net/unix/scm.c
deleted file mode 100644
index e92f2fad6410..000000000000
--- a/net/unix/scm.c
+++ /dev/null
@@ -1,161 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-#include <linux/module.h>
-#include <linux/kernel.h>
-#include <linux/string.h>
-#include <linux/socket.h>
-#include <linux/net.h>
-#include <linux/fs.h>
-#include <net/af_unix.h>
-#include <net/scm.h>
-#include <linux/init.h>
-#include <linux/io_uring.h>
-
-#include "scm.h"
-
-unsigned int unix_tot_inflight;
-EXPORT_SYMBOL(unix_tot_inflight);
-
-LIST_HEAD(gc_inflight_list);
-EXPORT_SYMBOL(gc_inflight_list);
-
-DEFINE_SPINLOCK(unix_gc_lock);
-EXPORT_SYMBOL(unix_gc_lock);
-
-struct sock *unix_get_socket(struct file *filp)
-{
- struct sock *u_sock = NULL;
- struct inode *inode = file_inode(filp);
-
- /* Socket ? */
- if (S_ISSOCK(inode->i_mode) && !(filp->f_mode & FMODE_PATH)) {
- struct socket *sock = SOCKET_I(inode);
- const struct proto_ops *ops = READ_ONCE(sock->ops);
- struct sock *s = sock->sk;
-
- /* PF_UNIX ? */
- if (s && ops && ops->family == PF_UNIX)
- u_sock = s;
- }
-
- return u_sock;
-}
-EXPORT_SYMBOL(unix_get_socket);
-
-/* Keep the number of times in flight count for the file
- * descriptor if it is for an AF_UNIX socket.
- */
-void unix_inflight(struct user_struct *user, struct file *fp)
-{
- struct sock *s = unix_get_socket(fp);
-
- spin_lock(&unix_gc_lock);
-
- if (s) {
- struct unix_sock *u = unix_sk(s);
-
- if (!u->inflight) {
- BUG_ON(!list_empty(&u->link));
- list_add_tail(&u->link, &gc_inflight_list);
- } else {
- BUG_ON(list_empty(&u->link));
- }
- u->inflight++;
- /* Paired with READ_ONCE() in wait_for_unix_gc() */
- WRITE_ONCE(unix_tot_inflight, unix_tot_inflight + 1);
- }
- WRITE_ONCE(user->unix_inflight, user->unix_inflight + 1);
- spin_unlock(&unix_gc_lock);
-}
-
-void unix_notinflight(struct user_struct *user, struct file *fp)
-{
- struct sock *s = unix_get_socket(fp);
-
- spin_lock(&unix_gc_lock);
-
- if (s) {
- struct unix_sock *u = unix_sk(s);
-
- BUG_ON(!u->inflight);
- BUG_ON(list_empty(&u->link));
-
- u->inflight--;
- if (!u->inflight)
- list_del_init(&u->link);
- /* Paired with READ_ONCE() in wait_for_unix_gc() */
- WRITE_ONCE(unix_tot_inflight, unix_tot_inflight - 1);
- }
- WRITE_ONCE(user->unix_inflight, user->unix_inflight - 1);
- spin_unlock(&unix_gc_lock);
-}
-
-/*
- * The "user->unix_inflight" variable is protected by the garbage
- * collection lock, and we just read it locklessly here. If you go
- * over the limit, there might be a tiny race in actually noticing
- * it across threads. Tough.
- */
-static inline bool too_many_unix_fds(struct task_struct *p)
-{
- struct user_struct *user = current_user();
-
- if (unlikely(READ_ONCE(user->unix_inflight) > task_rlimit(p, RLIMIT_NOFILE)))
- return !capable(CAP_SYS_RESOURCE) && !capable(CAP_SYS_ADMIN);
- return false;
-}
-
-int unix_attach_fds(struct scm_cookie *scm, struct sk_buff *skb)
-{
- int i;
-
- if (too_many_unix_fds(current))
- return -ETOOMANYREFS;
-
- /*
- * Need to duplicate file references for the sake of garbage
- * collection. Otherwise a socket in the fps might become a
- * candidate for GC while the skb is not yet queued.
- */
- UNIXCB(skb).fp = scm_fp_dup(scm->fp);
- if (!UNIXCB(skb).fp)
- return -ENOMEM;
-
- for (i = scm->fp->count - 1; i >= 0; i--)
- unix_inflight(scm->fp->user, scm->fp->fp[i]);
- return 0;
-}
-EXPORT_SYMBOL(unix_attach_fds);
-
-void unix_detach_fds(struct scm_cookie *scm, struct sk_buff *skb)
-{
- int i;
-
- scm->fp = UNIXCB(skb).fp;
- UNIXCB(skb).fp = NULL;
-
- for (i = scm->fp->count-1; i >= 0; i--)
- unix_notinflight(scm->fp->user, scm->fp->fp[i]);
-}
-EXPORT_SYMBOL(unix_detach_fds);
-
-void unix_destruct_scm(struct sk_buff *skb)
-{
- struct scm_cookie scm;
-
- memset(&scm, 0, sizeof(scm));
- scm.pid = UNIXCB(skb).pid;
- if (UNIXCB(skb).fp)
- unix_detach_fds(&scm, skb);
-
- /* Alas, it calls VFS */
- /* So fscking what? fput() had been SMP-safe since the last Summer */
- scm_destroy(&scm);
- sock_wfree(skb);
-}
-EXPORT_SYMBOL(unix_destruct_scm);
-
-void io_uring_destruct_scm(struct sk_buff *skb)
-{
- unix_destruct_scm(skb);
-}
-EXPORT_SYMBOL(io_uring_destruct_scm);
diff --git a/net/unix/scm.h b/net/unix/scm.h
deleted file mode 100644
index 5a255a477f16..000000000000
--- a/net/unix/scm.h
+++ /dev/null
@@ -1,10 +0,0 @@
-#ifndef NET_UNIX_SCM_H
-#define NET_UNIX_SCM_H
-
-extern struct list_head gc_inflight_list;
-extern spinlock_t unix_gc_lock;
-
-int unix_attach_fds(struct scm_cookie *scm, struct sk_buff *skb);
-void unix_detach_fds(struct scm_cookie *scm, struct sk_buff *skb);
-
-#endif
diff --git a/net/xfrm/xfrm_device.c b/net/xfrm/xfrm_device.c
index 04dc0c8a8370..7188d3592dde 100644
--- a/net/xfrm/xfrm_device.c
+++ b/net/xfrm/xfrm_device.c
@@ -371,7 +371,6 @@ int xfrm_dev_policy_add(struct net *net, struct xfrm_policy *xp,
xdo->dev = dev;
netdev_tracker_alloc(dev, &xdo->dev_tracker, GFP_ATOMIC);
- xdo->real_dev = dev;
xdo->type = XFRM_DEV_OFFLOAD_PACKET;
switch (dir) {
case XFRM_POLICY_IN:
@@ -393,7 +392,6 @@ int xfrm_dev_policy_add(struct net *net, struct xfrm_policy *xp,
err = dev->xfrmdev_ops->xdo_dev_policy_add(xp, extack);
if (err) {
xdo->dev = NULL;
- xdo->real_dev = NULL;
xdo->type = XFRM_DEV_OFFLOAD_UNSPECIFIED;
xdo->dir = 0;
netdev_put(dev, &xdo->dev_tracker);
diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c
index 68b3f9e7edff..2edb0f868c57 100644
--- a/net/xfrm/xfrm_policy.c
+++ b/net/xfrm/xfrm_policy.c
@@ -1603,6 +1603,9 @@ int xfrm_policy_insert(int dir, struct xfrm_policy *policy, int excl)
struct xfrm_policy *delpol;
struct hlist_head *chain;
+ /* Sanitize mark before store */
+ policy->mark.v &= policy->mark.m;
+
spin_lock_bh(&net->xfrm.xfrm_policy_lock);
chain = policy_hash_bysel(net, &policy->selector, policy->family, dir);
if (chain)
diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c
index 8a6e8656d014..d2bd5bddfb05 100644
--- a/net/xfrm/xfrm_state.c
+++ b/net/xfrm/xfrm_state.c
@@ -754,9 +754,6 @@ int __xfrm_state_delete(struct xfrm_state *x)
net->xfrm.state_num--;
spin_unlock(&net->xfrm.xfrm_state_lock);
- if (x->encap_sk)
- sock_put(rcu_dereference_raw(x->encap_sk));
-
xfrm_dev_state_delete(x);
/* All xfrm_state objects are created by xfrm_state_alloc.
@@ -1329,7 +1326,6 @@ found:
xso->type = XFRM_DEV_OFFLOAD_PACKET;
xso->dir = xdo->dir;
xso->dev = xdo->dev;
- xso->real_dev = xdo->real_dev;
xso->flags = XFRM_DEV_OFFLOAD_FLAG_ACQ;
netdev_hold(xso->dev, &xso->dev_tracker, GFP_ATOMIC);
error = xso->dev->xfrmdev_ops->xdo_dev_state_add(x, NULL);
@@ -1337,7 +1333,6 @@ found:
xso->dir = 0;
netdev_put(xso->dev, &xso->dev_tracker);
xso->dev = NULL;
- xso->real_dev = NULL;
xso->type = XFRM_DEV_OFFLOAD_UNSPECIFIED;
x->km.state = XFRM_STATE_DEAD;
to_put = x;
@@ -1478,6 +1473,9 @@ static void __xfrm_state_insert(struct xfrm_state *x)
list_add(&x->km.all, &net->xfrm.state_all);
+ /* Sanitize mark before store */
+ x->mark.v &= x->mark.m;
+
h = xfrm_dst_hash(net, &x->id.daddr, &x->props.saddr,
x->props.reqid, x->props.family);
XFRM_STATE_INSERT(bydst, &x->bydst, net->xfrm.state_bydst + h,