summaryrefslogtreecommitdiff
path: root/net
diff options
context:
space:
mode:
Diffstat (limited to 'net')
-rw-r--r--net/6lowpan/debugfs.c3
-rw-r--r--net/802/Makefile1
-rw-r--r--net/802/garp.c14
-rw-r--r--net/802/mrp.c14
-rw-r--r--net/802/p8023.c60
-rw-r--r--net/8021q/vlan.c5
-rw-r--r--net/8021q/vlan.h6
-rw-r--r--net/8021q/vlan_dev.c12
-rw-r--r--net/9p/client.c6
-rw-r--r--net/9p/trans_fd.c2
-rw-r--r--net/9p/trans_virtio.c10
-rw-r--r--net/9p/trans_xen.c4
-rw-r--r--net/Kconfig1
-rw-r--r--net/Makefile1
-rw-r--r--net/appletalk/ddp.c10
-rw-r--r--net/atm/atm_sysfs.c24
-rw-r--r--net/atm/br2684.c4
-rw-r--r--net/atm/resources.c7
-rw-r--r--net/ax25/ax25_ip.c4
-rw-r--r--net/ax25/ax25_out.c13
-rw-r--r--net/ax25/ax25_route.c13
-rw-r--r--net/batman-adv/bat_iv_ogm.c81
-rw-r--r--net/batman-adv/bat_v.c40
-rw-r--r--net/batman-adv/bat_v_elp.c9
-rw-r--r--net/batman-adv/bat_v_ogm.c39
-rw-r--r--net/batman-adv/bridge_loop_avoidance.c43
-rw-r--r--net/batman-adv/bridge_loop_avoidance.h1
-rw-r--r--net/batman-adv/distributed-arp-table.c27
-rw-r--r--net/batman-adv/fragmentation.c6
-rw-r--r--net/batman-adv/gateway_client.c60
-rw-r--r--net/batman-adv/gateway_client.h16
-rw-r--r--net/batman-adv/gateway_common.c2
-rw-r--r--net/batman-adv/hard-interface.c86
-rw-r--r--net/batman-adv/hard-interface.h6
-rw-r--r--net/batman-adv/hash.h2
-rw-r--r--net/batman-adv/log.c2
-rw-r--r--net/batman-adv/main.h3
-rw-r--r--net/batman-adv/multicast.c52
-rw-r--r--net/batman-adv/netlink.c14
-rw-r--r--net/batman-adv/network-coding.c24
-rw-r--r--net/batman-adv/originator.c114
-rw-r--r--net/batman-adv/originator.h96
-rw-r--r--net/batman-adv/routing.c48
-rw-r--r--net/batman-adv/send.c399
-rw-r--r--net/batman-adv/send.h12
-rw-r--r--net/batman-adv/soft-interface.c76
-rw-r--r--net/batman-adv/soft-interface.h18
-rw-r--r--net/batman-adv/tp_meter.c27
-rw-r--r--net/batman-adv/translation-table.c109
-rw-r--r--net/batman-adv/translation-table.h18
-rw-r--r--net/batman-adv/tvlv.c9
-rw-r--r--net/bluetooth/6lowpan.c54
-rw-r--r--net/bluetooth/a2mp.c24
-rw-r--r--net/bluetooth/amp.c6
-rw-r--r--net/bluetooth/bnep/core.c8
-rw-r--r--net/bluetooth/cmtp/capi.c22
-rw-r--r--net/bluetooth/cmtp/cmtp.h2
-rw-r--r--net/bluetooth/cmtp/core.c5
-rw-r--r--net/bluetooth/hci_conn.c12
-rw-r--r--net/bluetooth/hci_core.c124
-rw-r--r--net/bluetooth/hci_debugfs.c8
-rw-r--r--net/bluetooth/hci_event.c394
-rw-r--r--net/bluetooth/hci_request.c284
-rw-r--r--net/bluetooth/hci_sock.c67
-rw-r--r--net/bluetooth/hci_sysfs.c4
-rw-r--r--net/bluetooth/hidp/core.c8
-rw-r--r--net/bluetooth/l2cap_core.c16
-rw-r--r--net/bluetooth/mgmt.c64
-rw-r--r--net/bluetooth/mgmt_config.c4
-rw-r--r--net/bluetooth/msft.c8
-rw-r--r--net/bluetooth/rfcomm/sock.c8
-rw-r--r--net/bluetooth/rfcomm/tty.c26
-rw-r--r--net/bluetooth/sco.c114
-rw-r--r--net/bluetooth/smp.c84
-rw-r--r--net/bluetooth/smp.h6
-rw-r--r--net/bpf/test_run.c189
-rw-r--r--net/bpfilter/main.c2
-rw-r--r--net/bridge/br.c62
-rw-r--r--net/bridge/br_cfm.c2
-rw-r--r--net/bridge/br_device.c16
-rw-r--r--net/bridge/br_fdb.c91
-rw-r--r--net/bridge/br_forward.c19
-rw-r--r--net/bridge/br_if.c34
-rw-r--r--net/bridge/br_input.c24
-rw-r--r--net/bridge/br_ioctl.c83
-rw-r--r--net/bridge/br_mdb.c209
-rw-r--r--net/bridge/br_mrp.c33
-rw-r--r--net/bridge/br_multicast.c2089
-rw-r--r--net/bridge/br_multicast_eht.c92
-rw-r--r--net/bridge/br_netlink.c62
-rw-r--r--net/bridge/br_private.h608
-rw-r--r--net/bridge/br_private_mcast_eht.h3
-rw-r--r--net/bridge/br_private_mrp.h11
-rw-r--r--net/bridge/br_private_tunnel.h6
-rw-r--r--net/bridge/br_stp.c4
-rw-r--r--net/bridge/br_switchdev.c256
-rw-r--r--net/bridge/br_sysfs_br.c48
-rw-r--r--net/bridge/br_sysfs_if.c4
-rw-r--r--net/bridge/br_vlan.c155
-rw-r--r--net/bridge/br_vlan_options.c427
-rw-r--r--net/bridge/br_vlan_tunnel.c14
-rw-r--r--net/bridge/netfilter/ebtable_broute.c17
-rw-r--r--net/bridge/netfilter/ebtable_filter.c17
-rw-r--r--net/bridge/netfilter/ebtable_nat.c17
-rw-r--r--net/bridge/netfilter/ebtables.c109
-rw-r--r--net/bridge/netfilter/nf_conntrack_bridge.c6
-rw-r--r--net/caif/caif_socket.c5
-rw-r--r--net/caif/cfcnfg.c2
-rw-r--r--net/caif/chnl_net.c2
-rw-r--r--net/can/bcm.c11
-rw-r--r--net/can/gw.c3
-rw-r--r--net/can/isotp.c47
-rw-r--r--net/can/j1939/j1939-priv.h10
-rw-r--r--net/can/j1939/main.c4
-rw-r--r--net/can/j1939/socket.c152
-rw-r--r--net/can/j1939/transport.c81
-rw-r--r--net/can/proc.c6
-rw-r--r--net/can/raw.c34
-rw-r--r--net/ceph/auth.c7
-rw-r--r--net/ceph/auth_none.c4
-rw-r--r--net/ceph/auth_none.h1
-rw-r--r--net/ceph/auth_x_protocol.h2
-rw-r--r--net/ceph/cls_lock_client.c12
-rw-r--r--net/ceph/mon_client.c2
-rw-r--r--net/ceph/osdmap.c4
-rw-r--r--net/core/Makefile2
-rw-r--r--net/core/bpf_sk_storage.c7
-rw-r--r--net/core/dev.c434
-rw-r--r--net/core/dev_addr_lists.c144
-rw-r--r--net/core/dev_ioctl.c264
-rw-r--r--net/core/devlink.c1358
-rw-r--r--net/core/drop_monitor.c6
-rw-r--r--net/core/dst.c6
-rw-r--r--net/core/fib_rules.c4
-rw-r--r--net/core/filter.c250
-rw-r--r--net/core/flow_dissector.c34
-rw-r--r--net/core/flow_offload.c90
-rw-r--r--net/core/link_watch.c5
-rw-r--r--net/core/lwtunnel.c5
-rw-r--r--net/core/neighbour.c31
-rw-r--r--net/core/net-procfs.c24
-rw-r--r--net/core/net-traces.c1
-rw-r--r--net/core/net_namespace.c52
-rw-r--r--net/core/netpoll.c8
-rw-r--r--net/core/page_pool.c142
-rw-r--r--net/core/pktgen.c206
-rw-r--r--net/core/ptp_classifier.c2
-rw-r--r--net/core/rtnetlink.c104
-rw-r--r--net/core/scm.c4
-rw-r--r--net/core/selftests.c12
-rw-r--r--net/core/skbuff.c120
-rw-r--r--net/core/skmsg.c131
-rw-r--r--net/core/sock.c212
-rw-r--r--net/core/sock_map.c25
-rw-r--r--net/core/sock_reuseport.c366
-rw-r--r--net/core/xdp.c39
-rw-r--r--net/dcb/dcbnl.c6
-rw-r--r--net/dccp/ccids/lib/tfrc_equation.c1
-rw-r--r--net/dccp/dccp.h6
-rw-r--r--net/dccp/ipv4.c5
-rw-r--r--net/dccp/ipv6.c4
-rw-r--r--net/dccp/proto.c4
-rw-r--r--net/dccp/timer.c2
-rw-r--r--net/decnet/af_decnet.c27
-rw-r--r--net/decnet/dn_dev.c6
-rw-r--r--net/decnet/dn_fib.c9
-rw-r--r--net/decnet/dn_nsp_in.c2
-rw-r--r--net/decnet/dn_nsp_out.c2
-rw-r--r--net/decnet/dn_route.c20
-rw-r--r--net/devres.c2
-rw-r--r--net/dsa/Kconfig13
-rw-r--r--net/dsa/Makefile3
-rw-r--r--net/dsa/dsa.c2
-rw-r--r--net/dsa/dsa2.c148
-rw-r--r--net/dsa/dsa_priv.h213
-rw-r--r--net/dsa/master.c12
-rw-r--r--net/dsa/port.c388
-rw-r--r--net/dsa/slave.c473
-rw-r--r--net/dsa/switch.c399
-rw-r--r--net/dsa/tag_8021q.c623
-rw-r--r--net/dsa/tag_ar9331.c5
-rw-r--r--net/dsa/tag_brcm.c40
-rw-r--r--net/dsa/tag_dsa.c99
-rw-r--r--net/dsa/tag_gswip.c5
-rw-r--r--net/dsa/tag_hellcreek.c8
-rw-r--r--net/dsa/tag_ksz.c26
-rw-r--r--net/dsa/tag_lan9303.c26
-rw-r--r--net/dsa/tag_mtk.c21
-rw-r--r--net/dsa/tag_ocelot.c9
-rw-r--r--net/dsa/tag_ocelot_8021q.c25
-rw-r--r--net/dsa/tag_qca.c18
-rw-r--r--net/dsa/tag_rtl4_a.c30
-rw-r--r--net/dsa/tag_sja1105.c516
-rw-r--r--net/dsa/tag_trailer.c6
-rw-r--r--net/dsa/tag_xrs700x.c8
-rw-r--r--net/ethernet/eth.c8
-rw-r--r--net/ethtool/Makefile2
-rw-r--r--net/ethtool/coalesce.c29
-rw-r--r--net/ethtool/common.c14
-rw-r--r--net/ethtool/eeprom.c13
-rw-r--r--net/ethtool/ioctl.c172
-rw-r--r--net/ethtool/netlink.c72
-rw-r--r--net/ethtool/netlink.h23
-rw-r--r--net/ethtool/phc_vclocks.c94
-rw-r--r--net/hsr/hsr_framereg.c3
-rw-r--r--net/ieee802154/nl-phy.c3
-rw-r--r--net/ieee802154/nl802154.c3
-rw-r--r--net/ieee802154/socket.c14
-rw-r--r--net/ipv4/af_inet.c18
-rw-r--r--net/ipv4/ah4.c2
-rw-r--r--net/ipv4/bpf_tcp_ca.c41
-rw-r--r--net/ipv4/cipso_ipv4.c3
-rw-r--r--net/ipv4/devinet.c25
-rw-r--r--net/ipv4/esp4.c8
-rw-r--r--net/ipv4/esp4_offload.c4
-rw-r--r--net/ipv4/fib_frontend.c14
-rw-r--r--net/ipv4/fib_lookup.h2
-rw-r--r--net/ipv4/fib_semantics.c13
-rw-r--r--net/ipv4/fib_trie.c4
-rw-r--r--net/ipv4/fou.c10
-rw-r--r--net/ipv4/gre_demux.c1
-rw-r--r--net/ipv4/icmp.c68
-rw-r--r--net/ipv4/igmp.c51
-rw-r--r--net/ipv4/inet_connection_sock.c205
-rw-r--r--net/ipv4/inet_diag.c17
-rw-r--r--net/ipv4/inet_hashtables.c2
-rw-r--r--net/ipv4/ip_gre.c16
-rw-r--r--net/ipv4/ip_output.c71
-rw-r--r--net/ipv4/ip_sockglue.c24
-rw-r--r--net/ipv4/ip_tunnel.c29
-rw-r--r--net/ipv4/ip_vti.c3
-rw-r--r--net/ipv4/ipcomp.c2
-rw-r--r--net/ipv4/ipip.c4
-rw-r--r--net/ipv4/ipmr.c7
-rw-r--r--net/ipv4/netfilter/arptable_filter.c23
-rw-r--r--net/ipv4/netfilter/ipt_CLUSTERIP.c56
-rw-r--r--net/ipv4/netfilter/iptable_filter.c24
-rw-r--r--net/ipv4/netfilter/iptable_mangle.c19
-rw-r--r--net/ipv4/netfilter/iptable_nat.c20
-rw-r--r--net/ipv4/netfilter/iptable_raw.c21
-rw-r--r--net/ipv4/netfilter/iptable_security.c23
-rw-r--r--net/ipv4/netfilter/nft_reject_ipv4.c2
-rw-r--r--net/ipv4/nexthop.c2
-rw-r--r--net/ipv4/ping.c2
-rw-r--r--net/ipv4/proc.c2
-rw-r--r--net/ipv4/protocol.c6
-rw-r--r--net/ipv4/raw.c4
-rw-r--r--net/ipv4/raw_diag.c7
-rw-r--r--net/ipv4/route.c217
-rw-r--r--net/ipv4/sysctl_net_ipv4.c40
-rw-r--r--net/ipv4/tcp.c26
-rw-r--r--net/ipv4/tcp_bbr.c2
-rw-r--r--net/ipv4/tcp_bpf.c33
-rw-r--r--net/ipv4/tcp_fastopen.c50
-rw-r--r--net/ipv4/tcp_input.c123
-rw-r--r--net/ipv4/tcp_ipv4.c444
-rw-r--r--net/ipv4/tcp_minisocks.c7
-rw-r--r--net/ipv4/tcp_offload.c3
-rw-r--r--net/ipv4/tcp_output.c4
-rw-r--r--net/ipv4/tcp_recovery.c3
-rw-r--r--net/ipv4/tcp_timer.c6
-rw-r--r--net/ipv4/tcp_yeah.c2
-rw-r--r--net/ipv4/tunnel4.c3
-rw-r--r--net/ipv4/udp.c39
-rw-r--r--net/ipv4/udp_bpf.c56
-rw-r--r--net/ipv4/udp_diag.c6
-rw-r--r--net/ipv4/udp_offload.c12
-rw-r--r--net/ipv4/udplite.c1
-rw-r--r--net/ipv4/xfrm4_protocol.c3
-rw-r--r--net/ipv4/xfrm4_tunnel.c1
-rw-r--r--net/ipv6/Kconfig11
-rw-r--r--net/ipv6/Makefile3
-rw-r--r--net/ipv6/addrconf.c101
-rw-r--r--net/ipv6/af_inet6.c16
-rw-r--r--net/ipv6/ah6.c2
-rw-r--r--net/ipv6/esp6.c4
-rw-r--r--net/ipv6/esp6_offload.c1
-rw-r--r--net/ipv6/exthdrs.c185
-rw-r--r--net/ipv6/fib6_rules.c2
-rw-r--r--net/ipv6/icmp.c21
-rw-r--r--net/ipv6/ioam6.c910
-rw-r--r--net/ipv6/ioam6_iptunnel.c274
-rw-r--r--net/ipv6/ip6_fib.c15
-rw-r--r--net/ipv6/ip6_gre.c17
-rw-r--r--net/ipv6/ip6_output.c113
-rw-r--r--net/ipv6/ip6_tunnel.c26
-rw-r--r--net/ipv6/ip6_vti.c21
-rw-r--r--net/ipv6/ip6mr.c3
-rw-r--r--net/ipv6/ipcomp6.c2
-rw-r--r--net/ipv6/ipv6_sockglue.c18
-rw-r--r--net/ipv6/mcast.c55
-rw-r--r--net/ipv6/mip6.c99
-rw-r--r--net/ipv6/ndisc.c17
-rw-r--r--net/ipv6/netfilter/ip6_tables.c2
-rw-r--r--net/ipv6/netfilter/ip6table_filter.c23
-rw-r--r--net/ipv6/netfilter/ip6table_mangle.c22
-rw-r--r--net/ipv6/netfilter/ip6table_nat.c16
-rw-r--r--net/ipv6/netfilter/ip6table_raw.c24
-rw-r--r--net/ipv6/netfilter/ip6table_security.c22
-rw-r--r--net/ipv6/netfilter/nf_socket_ipv6.c4
-rw-r--r--net/ipv6/netfilter/nft_reject_ipv6.c2
-rw-r--r--net/ipv6/output_core.c28
-rw-r--r--net/ipv6/raw.c2
-rw-r--r--net/ipv6/route.c183
-rw-r--r--net/ipv6/seg6_iptunnel.c76
-rw-r--r--net/ipv6/seg6_local.c204
-rw-r--r--net/ipv6/sit.c46
-rw-r--r--net/ipv6/sysctl_net_ipv6.c50
-rw-r--r--net/ipv6/tcp_ipv6.c41
-rw-r--r--net/ipv6/udp.c31
-rw-r--r--net/ipv6/xfrm6_output.c9
-rw-r--r--net/ipv6/xfrm6_tunnel.c1
-rw-r--r--net/iucv/af_iucv.c99
-rw-r--r--net/iucv/iucv.c82
-rw-r--r--net/kcm/kcmsock.c2
-rw-r--r--net/key/af_key.c6
-rw-r--r--net/l2tp/l2tp_ip.c3
-rw-r--r--net/l2tp/l2tp_ppp.c2
-rw-r--r--net/lapb/lapb_iface.c4
-rw-r--r--net/llc/af_llc.c16
-rw-r--r--net/llc/llc_s_ac.c2
-rw-r--r--net/mac80211/cfg.c298
-rw-r--r--net/mac80211/chan.c108
-rw-r--r--net/mac80211/debugfs.c70
-rw-r--r--net/mac80211/debugfs_netdev.c33
-rw-r--r--net/mac80211/debugfs_sta.c24
-rw-r--r--net/mac80211/driver-ops.h62
-rw-r--r--net/mac80211/he.c8
-rw-r--r--net/mac80211/ht.c18
-rw-r--r--net/mac80211/ibss.c15
-rw-r--r--net/mac80211/ieee80211_i.h217
-rw-r--r--net/mac80211/iface.c286
-rw-r--r--net/mac80211/led.c12
-rw-r--r--net/mac80211/main.c34
-rw-r--r--net/mac80211/mesh.h2
-rw-r--r--net/mac80211/mesh_hwmp.c2
-rw-r--r--net/mac80211/mesh_pathtbl.c2
-rw-r--r--net/mac80211/mesh_plink.c2
-rw-r--r--net/mac80211/mlme.c252
-rw-r--r--net/mac80211/rate.c13
-rw-r--r--net/mac80211/rc80211_minstrel_ht.c34
-rw-r--r--net/mac80211/rx.c159
-rw-r--r--net/mac80211/s1g.c180
-rw-r--r--net/mac80211/sta_info.c85
-rw-r--r--net/mac80211/sta_info.h11
-rw-r--r--net/mac80211/status.c59
-rw-r--r--net/mac80211/tdls.c28
-rw-r--r--net/mac80211/trace.h100
-rw-r--r--net/mac80211/tx.c510
-rw-r--r--net/mac80211/util.c47
-rw-r--r--net/mac802154/iface.c2
-rw-r--r--net/mctp/Kconfig13
-rw-r--r--net/mctp/Makefile3
-rw-r--r--net/mctp/af_mctp.c395
-rw-r--r--net/mctp/device.c423
-rw-r--r--net/mctp/neigh.c342
-rw-r--r--net/mctp/route.c1116
-rw-r--r--net/mpls/af_mpls.c2
-rw-r--r--net/mptcp/ctrl.c88
-rw-r--r--net/mptcp/mib.c7
-rw-r--r--net/mptcp/mib.h7
-rw-r--r--net/mptcp/mptcp_diag.c7
-rw-r--r--net/mptcp/options.c639
-rw-r--r--net/mptcp/pm.c85
-rw-r--r--net/mptcp/pm_netlink.c276
-rw-r--r--net/mptcp/protocol.c527
-rw-r--r--net/mptcp/protocol.h165
-rw-r--r--net/mptcp/sockopt.c187
-rw-r--r--net/mptcp/subflow.c262
-rw-r--r--net/mptcp/syncookies.c16
-rw-r--r--net/mptcp/token.c9
-rw-r--r--net/ncsi/Kconfig6
-rw-r--r--net/ncsi/internal.h12
-rw-r--r--net/ncsi/ncsi-manage.c78
-rw-r--r--net/ncsi/ncsi-pkt.h6
-rw-r--r--net/ncsi/ncsi-rsp.c53
-rw-r--r--net/netfilter/Kconfig12
-rw-r--r--net/netfilter/Makefile6
-rw-r--r--net/netfilter/ipset/ip_set_core.c50
-rw-r--r--net/netfilter/ipset/ip_set_hash_ip.c9
-rw-r--r--net/netfilter/ipset/ip_set_hash_ipmark.c10
-rw-r--r--net/netfilter/ipset/ip_set_hash_ipport.c3
-rw-r--r--net/netfilter/ipset/ip_set_hash_ipportip.c3
-rw-r--r--net/netfilter/ipset/ip_set_hash_ipportnet.c3
-rw-r--r--net/netfilter/ipset/ip_set_hash_net.c11
-rw-r--r--net/netfilter/ipset/ip_set_hash_netiface.c10
-rw-r--r--net/netfilter/ipset/ip_set_hash_netnet.c16
-rw-r--r--net/netfilter/ipset/ip_set_hash_netport.c11
-rw-r--r--net/netfilter/ipset/ip_set_hash_netportnet.c16
-rw-r--r--net/netfilter/ipvs/Kconfig2
-rw-r--r--net/netfilter/nf_conntrack_core.c212
-rw-r--r--net/netfilter/nf_conntrack_ecache.c219
-rw-r--r--net/netfilter/nf_conntrack_expect.c37
-rw-r--r--net/netfilter/nf_conntrack_h323_main.c2
-rw-r--r--net/netfilter/nf_conntrack_helper.c6
-rw-r--r--net/netfilter/nf_conntrack_netlink.c227
-rw-r--r--net/netfilter/nf_conntrack_proto.c29
-rw-r--r--net/netfilter/nf_conntrack_proto_dccp.c14
-rw-r--r--net/netfilter/nf_conntrack_proto_gre.c13
-rw-r--r--net/netfilter/nf_conntrack_proto_icmp.c7
-rw-r--r--net/netfilter/nf_conntrack_proto_icmpv6.c3
-rw-r--r--net/netfilter/nf_conntrack_proto_sctp.c2
-rw-r--r--net/netfilter/nf_conntrack_proto_tcp.c96
-rw-r--r--net/netfilter/nf_conntrack_proto_udp.c10
-rw-r--r--net/netfilter/nf_conntrack_standalone.c67
-rw-r--r--net/netfilter/nf_flow_table_core.c58
-rw-r--r--net/netfilter/nf_flow_table_offload.c8
-rw-r--r--net/netfilter/nf_hooks_lwtunnel.c53
-rw-r--r--net/netfilter/nf_nat_core.c18
-rw-r--r--net/netfilter/nf_queue.c43
-rw-r--r--net/netfilter/nf_tables_api.c280
-rw-r--r--net/netfilter/nf_tables_core.c3
-rw-r--r--net/netfilter/nf_tables_offload.c35
-rw-r--r--net/netfilter/nf_tables_trace.c6
-rw-r--r--net/netfilter/nfnetlink.c3
-rw-r--r--net/netfilter/nfnetlink_acct.c9
-rw-r--r--net/netfilter/nfnetlink_cthelper.c10
-rw-r--r--net/netfilter/nfnetlink_cttimeout.c34
-rw-r--r--net/netfilter/nfnetlink_hook.c391
-rw-r--r--net/netfilter/nfnetlink_log.c5
-rw-r--r--net/netfilter/nfnetlink_queue.c24
-rw-r--r--net/netfilter/nft_chain_filter.c26
-rw-r--r--net/netfilter/nft_chain_nat.c4
-rw-r--r--net/netfilter/nft_chain_route.c4
-rw-r--r--net/netfilter/nft_compat.c53
-rw-r--r--net/netfilter/nft_ct.c9
-rw-r--r--net/netfilter/nft_exthdr.c67
-rw-r--r--net/netfilter/nft_flow_offload.c2
-rw-r--r--net/netfilter/nft_last.c99
-rw-r--r--net/netfilter/nft_lookup.c35
-rw-r--r--net/netfilter/nft_nat.c4
-rw-r--r--net/netfilter/nft_objref.c4
-rw-r--r--net/netfilter/nft_osf.c5
-rw-r--r--net/netfilter/nft_payload.c10
-rw-r--r--net/netfilter/nft_reject_inet.c4
-rw-r--r--net/netfilter/nft_set_bitmap.c5
-rw-r--r--net/netfilter/nft_set_hash.c17
-rw-r--r--net/netfilter/nft_set_pipapo.h2
-rw-r--r--net/netfilter/nft_set_pipapo_avx2.c12
-rw-r--r--net/netfilter/nft_set_pipapo_avx2.h2
-rw-r--r--net/netfilter/nft_set_rbtree.c5
-rw-r--r--net/netfilter/nft_synproxy.c4
-rw-r--r--net/netfilter/nft_tproxy.c13
-rw-r--r--net/netfilter/x_tables.c98
-rw-r--r--net/netfilter/xt_AUDIT.c2
-rw-r--r--net/netfilter/xt_CT.c12
-rw-r--r--net/netfilter/xt_bpf.c2
-rw-r--r--net/netfilter/xt_limit.c46
-rw-r--r--net/netlabel/netlabel_calipso.c4
-rw-r--r--net/netlabel/netlabel_cipso_v4.c16
-rw-r--r--net/netlabel/netlabel_domainhash.c2
-rw-r--r--net/netlabel/netlabel_kapi.c2
-rw-r--r--net/netlabel/netlabel_mgmt.c27
-rw-r--r--net/netlabel/netlabel_unlabeled.c16
-rw-r--r--net/netlabel/netlabel_user.h4
-rw-r--r--net/netlink/af_netlink.c14
-rw-r--r--net/netlink/genetlink.c17
-rw-r--r--net/netrom/nr_loopback.c3
-rw-r--r--net/netrom/nr_route.c7
-rw-r--r--net/netrom/nr_timer.c20
-rw-r--r--net/nfc/af_nfc.c2
-rw-r--r--net/nfc/core.c8
-rw-r--r--net/nfc/digital_core.c4
-rw-r--r--net/nfc/hci/command.c2
-rw-r--r--net/nfc/hci/core.c16
-rw-r--r--net/nfc/hci/llc.c4
-rw-r--r--net/nfc/hci/llc.h6
-rw-r--r--net/nfc/hci/llc_nop.c2
-rw-r--r--net/nfc/hci/llc_shdlc.c14
-rw-r--r--net/nfc/llcp.h8
-rw-r--r--net/nfc/llcp_commands.c46
-rw-r--r--net/nfc/llcp_core.c44
-rw-r--r--net/nfc/nci/core.c176
-rw-r--r--net/nfc/nci/data.c12
-rw-r--r--net/nfc/nci/hci.c54
-rw-r--r--net/nfc/nci/ntf.c87
-rw-r--r--net/nfc/nci/rsp.c48
-rw-r--r--net/nfc/nci/spi.c2
-rw-r--r--net/nfc/nci/uart.c7
-rw-r--r--net/nfc/netlink.c4
-rw-r--r--net/nfc/nfc.h2
-rw-r--r--net/nfc/rawsock.c4
-rw-r--r--net/openvswitch/Makefile3
-rw-r--r--net/openvswitch/actions.c12
-rw-r--r--net/openvswitch/conntrack.c11
-rw-r--r--net/openvswitch/datapath.c80
-rw-r--r--net/openvswitch/datapath.h20
-rw-r--r--net/openvswitch/flow.c13
-rw-r--r--net/openvswitch/flow_table.c6
-rw-r--r--net/openvswitch/openvswitch_trace.c10
-rw-r--r--net/openvswitch/openvswitch_trace.h158
-rw-r--r--net/openvswitch/vport.c1
-rw-r--r--net/packet/af_packet.c27
-rw-r--r--net/phonet/af_phonet.c3
-rw-r--r--net/phonet/pn_dev.c12
-rw-r--r--net/phonet/socket.c3
-rw-r--r--net/qrtr/ns.c4
-rw-r--r--net/qrtr/qrtr.c20
-rw-r--r--net/rds/ib_frmr.c4
-rw-r--r--net/rds/ib_ring.c2
-rw-r--r--net/rds/tcp_connect.c1
-rw-r--r--net/rds/tcp_recv.c2
-rw-r--r--net/rds/threads.c2
-rw-r--r--net/rxrpc/Kconfig7
-rw-r--r--net/rxrpc/af_rxrpc.c1
-rw-r--r--net/rxrpc/local_event.c2
-rw-r--r--net/sched/act_api.c76
-rw-r--r--net/sched/act_bpf.c10
-rw-r--r--net/sched/act_connmark.c4
-rw-r--r--net/sched/act_csum.c7
-rw-r--r--net/sched/act_ct.c18
-rw-r--r--net/sched/act_ctinfo.c4
-rw-r--r--net/sched/act_gact.c4
-rw-r--r--net/sched/act_gate.c4
-rw-r--r--net/sched/act_ife.c9
-rw-r--r--net/sched/act_ipt.c21
-rw-r--r--net/sched/act_mirred.c13
-rw-r--r--net/sched/act_mpls.c4
-rw-r--r--net/sched/act_nat.c6
-rw-r--r--net/sched/act_pedit.c4
-rw-r--r--net/sched/act_police.c4
-rw-r--r--net/sched/act_sample.c7
-rw-r--r--net/sched/act_simple.c4
-rw-r--r--net/sched/act_skbedit.c4
-rw-r--r--net/sched/act_skbmod.c49
-rw-r--r--net/sched/act_tunnel_key.c4
-rw-r--r--net/sched/act_vlan.c15
-rw-r--r--net/sched/cls_api.c91
-rw-r--r--net/sched/cls_basic.c10
-rw-r--r--net/sched/cls_bpf.c15
-rw-r--r--net/sched/cls_cgroup.c6
-rw-r--r--net/sched/cls_flow.c6
-rw-r--r--net/sched/cls_flower.c18
-rw-r--r--net/sched/cls_fw.c13
-rw-r--r--net/sched/cls_matchall.c17
-rw-r--r--net/sched/cls_route.c10
-rw-r--r--net/sched/cls_rsvp.h9
-rw-r--r--net/sched/cls_tcindex.c17
-rw-r--r--net/sched/cls_u32.c24
-rw-r--r--net/sched/ematch.c2
-rw-r--r--net/sched/sch_api.c10
-rw-r--r--net/sched/sch_atm.c2
-rw-r--r--net/sched/sch_cake.c4
-rw-r--r--net/sched/sch_cbq.c4
-rw-r--r--net/sched/sch_drr.c2
-rw-r--r--net/sched/sch_dsmark.c2
-rw-r--r--net/sched/sch_ets.c9
-rw-r--r--net/sched/sch_fq_codel.c14
-rw-r--r--net/sched/sch_fq_pie.c2
-rw-r--r--net/sched/sch_generic.c43
-rw-r--r--net/sched/sch_gred.c2
-rw-r--r--net/sched/sch_hfsc.c2
-rw-r--r--net/sched/sch_htb.c138
-rw-r--r--net/sched/sch_multiq.c2
-rw-r--r--net/sched/sch_prio.c2
-rw-r--r--net/sched/sch_qfq.c10
-rw-r--r--net/sched/sch_sfb.c2
-rw-r--r--net/sched/sch_sfq.c2
-rw-r--r--net/sched/sch_taprio.c92
-rw-r--r--net/sctp/associola.c6
-rw-r--r--net/sctp/auth.c12
-rw-r--r--net/sctp/bind_addr.c19
-rw-r--r--net/sctp/debug.c1
-rw-r--r--net/sctp/diag.c6
-rw-r--r--net/sctp/input.c145
-rw-r--r--net/sctp/ipv6.c126
-rw-r--r--net/sctp/output.c35
-rw-r--r--net/sctp/outqueue.c13
-rw-r--r--net/sctp/protocol.c32
-rw-r--r--net/sctp/sm_make_chunk.c75
-rw-r--r--net/sctp/sm_sideeffect.c37
-rw-r--r--net/sctp/sm_statefuns.c69
-rw-r--r--net/sctp/sm_statetable.c43
-rw-r--r--net/sctp/socket.c127
-rw-r--r--net/sctp/sysctl.c35
-rw-r--r--net/sctp/transport.c162
-rw-r--r--net/smc/Makefile2
-rw-r--r--net/smc/af_smc.c106
-rw-r--r--net/smc/smc_core.c63
-rw-r--r--net/smc/smc_core.h4
-rw-r--r--net/smc/smc_ib.c3
-rw-r--r--net/smc/smc_ism.c1
-rw-r--r--net/smc/smc_llc.c10
-rw-r--r--net/smc/smc_netlink.c11
-rw-r--r--net/smc/smc_netlink.h2
-rw-r--r--net/smc/smc_pnet.c3
-rw-r--r--net/smc/smc_rx.c8
-rw-r--r--net/smc/smc_stats.c413
-rw-r--r--net/smc/smc_stats.h266
-rw-r--r--net/smc/smc_tx.c41
-rw-r--r--net/smc/smc_wr.c10
-rw-r--r--net/socket.c494
-rw-r--r--net/strparser/strparser.c2
-rw-r--r--net/sunrpc/Makefile2
-rw-r--r--net/sunrpc/auth_gss/gss_rpc_upcall.c2
-rw-r--r--net/sunrpc/auth_gss/svcauth_gss.c53
-rw-r--r--net/sunrpc/cache.c2
-rw-r--r--net/sunrpc/clnt.c96
-rw-r--r--net/sunrpc/debugfs.c75
-rw-r--r--net/sunrpc/fail.h25
-rw-r--r--net/sunrpc/rpc_pipe.c2
-rw-r--r--net/sunrpc/sched.c12
-rw-r--r--net/sunrpc/sunrpc_syms.c10
-rw-r--r--net/sunrpc/svc.c83
-rw-r--r--net/sunrpc/svc_xprt.c19
-rw-r--r--net/sunrpc/svcauth.c8
-rw-r--r--net/sunrpc/svcauth_unix.c18
-rw-r--r--net/sunrpc/sysfs.c618
-rw-r--r--net/sunrpc/sysfs.h42
-rw-r--r--net/sunrpc/xdr.c7
-rw-r--r--net/sunrpc/xprt.c76
-rw-r--r--net/sunrpc/xprtmultipath.c41
-rw-r--r--net/sunrpc/xprtrdma/backchannel.c2
-rw-r--r--net/sunrpc/xprtrdma/frwr_ops.c14
-rw-r--r--net/sunrpc/xprtrdma/svc_rdma_rw.c62
-rw-r--r--net/sunrpc/xprtrdma/svc_rdma_sendto.c41
-rw-r--r--net/sunrpc/xprtrdma/svc_rdma_transport.c11
-rw-r--r--net/sunrpc/xprtrdma/transport.c15
-rw-r--r--net/sunrpc/xprtrdma/verbs.c28
-rw-r--r--net/sunrpc/xprtrdma/xprt_rdma.h2
-rw-r--r--net/sunrpc/xprtsock.c50
-rw-r--r--net/switchdev/switchdev.c333
-rw-r--r--net/tipc/bcast.c2
-rw-r--r--net/tipc/crypto.c14
-rw-r--r--net/tipc/link.c1
-rw-r--r--net/tipc/msg.c27
-rw-r--r--net/tipc/msg.h3
-rw-r--r--net/tipc/name_table.c6
-rw-r--r--net/tipc/name_table.h4
-rw-r--r--net/tipc/node.c2
-rw-r--r--net/tipc/socket.c224
-rw-r--r--net/tipc/subscr.c2
-rw-r--r--net/tls/tls_device.c2
-rw-r--r--net/tls/tls_sw.c5
-rw-r--r--net/unix/Kconfig5
-rw-r--r--net/unix/Makefile1
-rw-r--r--net/unix/af_unix.c684
-rw-r--r--net/unix/diag.c6
-rw-r--r--net/unix/unix_bpf.c174
-rw-r--r--net/vmw_vsock/af_vsock.c470
-rw-r--r--net/vmw_vsock/hyperv_transport.c4
-rw-r--r--net/vmw_vsock/virtio_transport.c37
-rw-r--r--net/vmw_vsock/virtio_transport_common.c188
-rw-r--r--net/vmw_vsock/vmci_transport.c6
-rw-r--r--net/vmw_vsock/vsock_loopback.c12
-rw-r--r--net/wireless/chan.c43
-rw-r--r--net/wireless/core.c50
-rw-r--r--net/wireless/core.h3
-rw-r--r--net/wireless/nl80211.c200
-rw-r--r--net/wireless/pmsr.c12
-rw-r--r--net/wireless/radiotap.c9
-rw-r--r--net/wireless/rdev-ops.h25
-rw-r--r--net/wireless/reg.c14
-rw-r--r--net/wireless/scan.c31
-rw-r--r--net/wireless/trace.h82
-rw-r--r--net/wireless/wext-compat.c8
-rw-r--r--net/wireless/wext-spy.c14
-rw-r--r--net/x25/af_x25.c2
-rw-r--r--net/x25/x25_forward.c8
-rw-r--r--net/x25/x25_link.c5
-rw-r--r--net/x25/x25_route.c15
-rw-r--r--net/xdp/xdp_umem.c7
-rw-r--r--net/xdp/xsk.c6
-rw-r--r--net/xdp/xsk.h4
-rw-r--r--net/xdp/xsk_queue.h11
-rw-r--r--net/xdp/xskmap.c32
-rw-r--r--net/xfrm/xfrm_compat.c49
-rw-r--r--net/xfrm/xfrm_device.c1
-rw-r--r--net/xfrm/xfrm_hash.h7
-rw-r--r--net/xfrm/xfrm_input.c6
-rw-r--r--net/xfrm/xfrm_interface.c1
-rw-r--r--net/xfrm/xfrm_ipcomp.c2
-rw-r--r--net/xfrm/xfrm_output.c131
-rw-r--r--net/xfrm/xfrm_policy.c41
-rw-r--r--net/xfrm/xfrm_replay.c171
-rw-r--r--net/xfrm/xfrm_state.c81
-rw-r--r--net/xfrm/xfrm_user.c95
677 files changed, 29261 insertions, 10592 deletions
diff --git a/net/6lowpan/debugfs.c b/net/6lowpan/debugfs.c
index 1c140af06d52..600b9563bfc5 100644
--- a/net/6lowpan/debugfs.c
+++ b/net/6lowpan/debugfs.c
@@ -170,7 +170,8 @@ static void lowpan_dev_debugfs_ctx_init(struct net_device *dev,
struct dentry *root;
char buf[32];
- WARN_ON_ONCE(id > LOWPAN_IPHC_CTX_TABLE_SIZE);
+ if (WARN_ON_ONCE(id >= LOWPAN_IPHC_CTX_TABLE_SIZE))
+ return;
sprintf(buf, "%d", id);
diff --git a/net/802/Makefile b/net/802/Makefile
index 19406a87bdaa..bfed80221b8b 100644
--- a/net/802/Makefile
+++ b/net/802/Makefile
@@ -8,7 +8,6 @@ obj-$(CONFIG_LLC) += p8022.o psnap.o
obj-$(CONFIG_NET_FC) += fc.o
obj-$(CONFIG_FDDI) += fddi.o
obj-$(CONFIG_HIPPI) += hippi.o
-obj-$(CONFIG_IPX) += p8022.o psnap.o p8023.o
obj-$(CONFIG_ATALK) += p8022.o psnap.o
obj-$(CONFIG_STP) += stp.o
obj-$(CONFIG_GARP) += garp.o
diff --git a/net/802/garp.c b/net/802/garp.c
index 400bd857e5f5..f6012f8e59f0 100644
--- a/net/802/garp.c
+++ b/net/802/garp.c
@@ -203,6 +203,19 @@ static void garp_attr_destroy(struct garp_applicant *app, struct garp_attr *attr
kfree(attr);
}
+static void garp_attr_destroy_all(struct garp_applicant *app)
+{
+ struct rb_node *node, *next;
+ struct garp_attr *attr;
+
+ for (node = rb_first(&app->gid);
+ next = node ? rb_next(node) : NULL, node != NULL;
+ node = next) {
+ attr = rb_entry(node, struct garp_attr, node);
+ garp_attr_destroy(app, attr);
+ }
+}
+
static int garp_pdu_init(struct garp_applicant *app)
{
struct sk_buff *skb;
@@ -609,6 +622,7 @@ void garp_uninit_applicant(struct net_device *dev, struct garp_application *appl
spin_lock_bh(&app->lock);
garp_gid_event(app, GARP_EVENT_TRANSMIT_PDU);
+ garp_attr_destroy_all(app);
garp_pdu_queue(app);
spin_unlock_bh(&app->lock);
diff --git a/net/802/mrp.c b/net/802/mrp.c
index bea6e43d45a0..35e04cc5390c 100644
--- a/net/802/mrp.c
+++ b/net/802/mrp.c
@@ -292,6 +292,19 @@ static void mrp_attr_destroy(struct mrp_applicant *app, struct mrp_attr *attr)
kfree(attr);
}
+static void mrp_attr_destroy_all(struct mrp_applicant *app)
+{
+ struct rb_node *node, *next;
+ struct mrp_attr *attr;
+
+ for (node = rb_first(&app->mad);
+ next = node ? rb_next(node) : NULL, node != NULL;
+ node = next) {
+ attr = rb_entry(node, struct mrp_attr, node);
+ mrp_attr_destroy(app, attr);
+ }
+}
+
static int mrp_pdu_init(struct mrp_applicant *app)
{
struct sk_buff *skb;
@@ -895,6 +908,7 @@ void mrp_uninit_applicant(struct net_device *dev, struct mrp_application *appl)
spin_lock_bh(&app->lock);
mrp_mad_event(app, MRP_EVENT_TX);
+ mrp_attr_destroy_all(app);
mrp_pdu_queue(app);
spin_unlock_bh(&app->lock);
diff --git a/net/802/p8023.c b/net/802/p8023.c
deleted file mode 100644
index 19cd56990db2..000000000000
--- a/net/802/p8023.c
+++ /dev/null
@@ -1,60 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-or-later
-/*
- * NET3: 802.3 data link hooks used for IPX 802.3
- *
- * 802.3 isn't really a protocol data link layer. Some old IPX stuff
- * uses it however. Note that there is only one 802.3 protocol layer
- * in the system. We don't currently support different protocols
- * running raw 802.3 on different devices. Thankfully nobody else
- * has done anything like the old IPX.
- */
-
-#include <linux/in.h>
-#include <linux/mm.h>
-#include <linux/module.h>
-#include <linux/netdevice.h>
-#include <linux/skbuff.h>
-#include <linux/slab.h>
-
-#include <net/datalink.h>
-#include <net/p8022.h>
-
-/*
- * Place an 802.3 header on a packet. The driver will do the mac
- * addresses, we just need to give it the buffer length.
- */
-static int p8023_request(struct datalink_proto *dl,
- struct sk_buff *skb, unsigned char *dest_node)
-{
- struct net_device *dev = skb->dev;
-
- dev_hard_header(skb, dev, ETH_P_802_3, dest_node, NULL, skb->len);
- return dev_queue_xmit(skb);
-}
-
-/*
- * Create an 802.3 client. Note there can be only one 802.3 client
- */
-struct datalink_proto *make_8023_client(void)
-{
- struct datalink_proto *proto = kmalloc(sizeof(*proto), GFP_ATOMIC);
-
- if (proto) {
- proto->header_length = 0;
- proto->request = p8023_request;
- }
- return proto;
-}
-
-/*
- * Destroy the 802.3 client.
- */
-void destroy_8023_client(struct datalink_proto *dl)
-{
- kfree(dl);
-}
-
-EXPORT_SYMBOL(destroy_8023_client);
-EXPORT_SYMBOL(make_8023_client);
-
-MODULE_LICENSE("GPL");
diff --git a/net/8021q/vlan.c b/net/8021q/vlan.c
index fb3d3262dc1a..55275ef9a31a 100644
--- a/net/8021q/vlan.c
+++ b/net/8021q/vlan.c
@@ -67,7 +67,7 @@ static int vlan_group_prealloc_vid(struct vlan_group *vg,
return 0;
size = sizeof(struct net_device *) * VLAN_GROUP_ARRAY_PART_LEN;
- array = kzalloc(size, GFP_KERNEL);
+ array = kzalloc(size, GFP_KERNEL_ACCOUNT);
if (array == NULL)
return -ENOBUFS;
@@ -638,7 +638,8 @@ static int vlan_ioctl_handler(struct net *net, void __user *arg)
case GET_VLAN_REALDEV_NAME_CMD:
err = 0;
- vlan_dev_get_realdev_name(dev, args.u.device2);
+ vlan_dev_get_realdev_name(dev, args.u.device2,
+ sizeof(args.u.device2));
if (copy_to_user(arg, &args,
sizeof(struct vlan_ioctl_args)))
err = -EFAULT;
diff --git a/net/8021q/vlan.h b/net/8021q/vlan.h
index fa3ad3d4d58c..1a705a4ef7fa 100644
--- a/net/8021q/vlan.h
+++ b/net/8021q/vlan.h
@@ -108,7 +108,8 @@ static inline netdev_features_t vlan_tnl_features(struct net_device *real_dev)
netdev_features_t ret;
ret = real_dev->hw_enc_features &
- (NETIF_F_CSUM_MASK | NETIF_F_ALL_TSO | NETIF_F_GSO_ENCAP_ALL);
+ (NETIF_F_CSUM_MASK | NETIF_F_GSO_SOFTWARE |
+ NETIF_F_GSO_ENCAP_ALL);
if ((ret & NETIF_F_GSO_ENCAP_ALL) && (ret & NETIF_F_CSUM_MASK))
return (ret & ~NETIF_F_CSUM_MASK) | NETIF_F_HW_CSUM;
@@ -129,7 +130,8 @@ void vlan_dev_set_ingress_priority(const struct net_device *dev,
int vlan_dev_set_egress_priority(const struct net_device *dev,
u32 skb_prio, u16 vlan_prio);
int vlan_dev_change_flags(const struct net_device *dev, u32 flag, u32 mask);
-void vlan_dev_get_realdev_name(const struct net_device *dev, char *result);
+void vlan_dev_get_realdev_name(const struct net_device *dev, char *result,
+ size_t size);
int vlan_check_real_dev(struct net_device *real_dev,
__be16 protocol, u16 vlan_id,
diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c
index 4db3f0621959..0c21d1fec852 100644
--- a/net/8021q/vlan_dev.c
+++ b/net/8021q/vlan_dev.c
@@ -239,9 +239,9 @@ int vlan_dev_change_flags(const struct net_device *dev, u32 flags, u32 mask)
return 0;
}
-void vlan_dev_get_realdev_name(const struct net_device *dev, char *result)
+void vlan_dev_get_realdev_name(const struct net_device *dev, char *result, size_t size)
{
- strncpy(result, vlan_dev_priv(dev)->real_dev->name, 23);
+ strscpy_pad(result, vlan_dev_priv(dev)->real_dev->name, size);
}
bool vlan_dev_inherit_address(struct net_device *dev,
@@ -360,7 +360,7 @@ static int vlan_dev_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
struct ifreq ifrr;
int err = -EOPNOTSUPP;
- strncpy(ifrr.ifr_name, real_dev->name, IFNAMSIZ);
+ strscpy_pad(ifrr.ifr_name, real_dev->name, IFNAMSIZ);
ifrr.ifr_ifru = ifr->ifr_ifru;
switch (cmd) {
@@ -372,8 +372,8 @@ static int vlan_dev_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
case SIOCGMIIREG:
case SIOCSMIIREG:
case SIOCGHWTSTAMP:
- if (netif_device_present(real_dev) && ops->ndo_do_ioctl)
- err = ops->ndo_do_ioctl(real_dev, &ifrr, cmd);
+ if (netif_device_present(real_dev) && ops->ndo_eth_ioctl)
+ err = ops->ndo_eth_ioctl(real_dev, &ifrr, cmd);
break;
}
@@ -814,7 +814,7 @@ static const struct net_device_ops vlan_netdev_ops = {
.ndo_set_mac_address = vlan_dev_set_mac_address,
.ndo_set_rx_mode = vlan_dev_set_rx_mode,
.ndo_change_rx_flags = vlan_dev_change_rx_flags,
- .ndo_do_ioctl = vlan_dev_ioctl,
+ .ndo_eth_ioctl = vlan_dev_ioctl,
.ndo_neigh_setup = vlan_dev_neigh_setup,
.ndo_get_stats64 = vlan_dev_get_stats64,
#if IS_ENABLED(CONFIG_FCOE)
diff --git a/net/9p/client.c b/net/9p/client.c
index b7b958f61faf..213f12ed76cd 100644
--- a/net/9p/client.c
+++ b/net/9p/client.c
@@ -30,6 +30,8 @@
#define CREATE_TRACE_POINTS
#include <trace/events/9p.h>
+#define DEFAULT_MSIZE (128 * 1024)
+
/*
* Client Option Parsing (code inspired by NFS code)
* - a little lazy - parse all client options
@@ -65,7 +67,7 @@ EXPORT_SYMBOL(p9_is_proto_dotu);
int p9_show_client_options(struct seq_file *m, struct p9_client *clnt)
{
- if (clnt->msize != 8192)
+ if (clnt->msize != DEFAULT_MSIZE)
seq_printf(m, ",msize=%u", clnt->msize);
seq_printf(m, ",trans=%s", clnt->trans_mod->name);
@@ -139,7 +141,7 @@ static int parse_opts(char *opts, struct p9_client *clnt)
int ret = 0;
clnt->proto_version = p9_proto_2000L;
- clnt->msize = 8192;
+ clnt->msize = DEFAULT_MSIZE;
if (!opts)
return 0;
diff --git a/net/9p/trans_fd.c b/net/9p/trans_fd.c
index f4dd0456beaf..007bbcc68010 100644
--- a/net/9p/trans_fd.c
+++ b/net/9p/trans_fd.c
@@ -34,7 +34,7 @@
#include <linux/syscalls.h> /* killme */
#define P9_PORT 564
-#define MAX_SOCK_BUF (64*1024)
+#define MAX_SOCK_BUF (1024*1024)
#define MAXPOLLWADDR 2
static struct p9_trans_module p9_tcp_trans;
diff --git a/net/9p/trans_virtio.c b/net/9p/trans_virtio.c
index 93f2f8654882..490a4c900339 100644
--- a/net/9p/trans_virtio.c
+++ b/net/9p/trans_virtio.c
@@ -99,7 +99,7 @@ static unsigned int rest_of_page(void *data)
* @client: client instance
*
* This reclaims a channel by freeing its resources and
- * reseting its inuse flag.
+ * resetting its inuse flag.
*
*/
@@ -463,7 +463,7 @@ req_retry_pinned:
* For example TREAD have 11.
* 11 is the read/write header = PDU Header(7) + IO Size (4).
* Arrange in such a way that server places header in the
- * alloced memory and payload onto the user buffer.
+ * allocated memory and payload onto the user buffer.
*/
in = pack_sg_list(chan->sg, out,
VIRTQUEUE_NUM, req->rc.sdata, in_hdr_len);
@@ -610,7 +610,7 @@ static int p9_virtio_probe(struct virtio_device *vdev)
chan->vc_wq = kmalloc(sizeof(wait_queue_head_t), GFP_KERNEL);
if (!chan->vc_wq) {
err = -ENOMEM;
- goto out_free_tag;
+ goto out_remove_file;
}
init_waitqueue_head(chan->vc_wq);
chan->ring_bufs_avail = 1;
@@ -628,6 +628,8 @@ static int p9_virtio_probe(struct virtio_device *vdev)
return 0;
+out_remove_file:
+ sysfs_remove_file(&vdev->dev.kobj, &dev_attr_mount_tag.attr);
out_free_tag:
kfree(tag);
out_free_vq:
@@ -760,7 +762,7 @@ static struct p9_trans_module p9_virtio_trans = {
.cancelled = p9_virtio_cancelled,
/*
* We leave one entry for input and one entry for response
- * headers. We also skip one more entry to accomodate, address
+ * headers. We also skip one more entry to accommodate, address
* that are not at page boundary, that can result in an extra
* page in zero copy.
*/
diff --git a/net/9p/trans_xen.c b/net/9p/trans_xen.c
index f4fea28e05da..3ec1a51a6944 100644
--- a/net/9p/trans_xen.c
+++ b/net/9p/trans_xen.c
@@ -138,7 +138,7 @@ static bool p9_xen_write_todo(struct xen_9pfs_dataring *ring, RING_IDX size)
static int p9_xen_request(struct p9_client *client, struct p9_req_t *p9_req)
{
- struct xen_9pfs_front_priv *priv = NULL;
+ struct xen_9pfs_front_priv *priv;
RING_IDX cons, prod, masked_cons, masked_prod;
unsigned long flags;
u32 size = p9_req->tc.size;
@@ -151,7 +151,7 @@ static int p9_xen_request(struct p9_client *client, struct p9_req_t *p9_req)
break;
}
read_unlock(&xen_9pfs_lock);
- if (!priv || priv->client != client)
+ if (list_entry_is_head(priv, &xen_9pfs_devs, list))
return -EINVAL;
num = p9_req->tc.tag % priv->num_rings;
diff --git a/net/Kconfig b/net/Kconfig
index c7392c449b25..fb13460c6dab 100644
--- a/net/Kconfig
+++ b/net/Kconfig
@@ -363,6 +363,7 @@ source "net/bluetooth/Kconfig"
source "net/rxrpc/Kconfig"
source "net/kcm/Kconfig"
source "net/strparser/Kconfig"
+source "net/mctp/Kconfig"
config FIB_RULES
bool
diff --git a/net/Makefile b/net/Makefile
index 9ca9572188fe..fbfeb8a0bb37 100644
--- a/net/Makefile
+++ b/net/Makefile
@@ -78,3 +78,4 @@ obj-$(CONFIG_QRTR) += qrtr/
obj-$(CONFIG_NET_NCSI) += ncsi/
obj-$(CONFIG_XDP_SOCKETS) += xdp/
obj-$(CONFIG_MPTCP) += mptcp/
+obj-$(CONFIG_MCTP) += mctp/
diff --git a/net/appletalk/ddp.c b/net/appletalk/ddp.c
index ebda397fa95a..bf5736c1d458 100644
--- a/net/appletalk/ddp.c
+++ b/net/appletalk/ddp.c
@@ -666,7 +666,7 @@ static int atif_ioctl(int cmd, void __user *arg)
struct rtentry rtdef;
int add_route;
- if (copy_from_user(&atreq, arg, sizeof(atreq)))
+ if (get_user_ifreq(&atreq, NULL, arg))
return -EFAULT;
dev = __dev_get_by_name(&init_net, atreq.ifr_name);
@@ -707,7 +707,7 @@ static int atif_ioctl(int cmd, void __user *arg)
/*
* Phase 1 is fine on LocalTalk but we don't do
- * EtherTalk phase 1. Anyone wanting to add it go ahead.
+ * EtherTalk phase 1. Anyone wanting to add it, go ahead.
*/
if (dev->type == ARPHRD_ETHER && nr->nr_phase != 2)
return -EPROTONOSUPPORT;
@@ -828,7 +828,7 @@ static int atif_ioctl(int cmd, void __user *arg)
nr = (struct atalk_netrange *)&(atif->nets);
/*
* Phase 1 is fine on Localtalk but we don't do
- * Ethertalk phase 1. Anyone wanting to add it go ahead.
+ * Ethertalk phase 1. Anyone wanting to add it, go ahead.
*/
if (dev->type == ARPHRD_ETHER && nr->nr_phase != 2)
return -EPROTONOSUPPORT;
@@ -865,7 +865,7 @@ static int atif_ioctl(int cmd, void __user *arg)
return 0;
}
- return copy_to_user(arg, &atreq, sizeof(atreq)) ? -EFAULT : 0;
+ return put_user_ifreq(&atreq, arg);
}
static int atrtr_ioctl_addrt(struct rtentry *rt)
@@ -2018,7 +2018,7 @@ module_init(atalk_init);
* by the network device layer.
*
* Ergo, before the AppleTalk module can be removed, all AppleTalk
- * sockets be closed from user space.
+ * sockets should be closed from user space.
*/
static void __exit atalk_exit(void)
{
diff --git a/net/atm/atm_sysfs.c b/net/atm/atm_sysfs.c
index aa1b57161f3b..0fdbdfd19474 100644
--- a/net/atm/atm_sysfs.c
+++ b/net/atm/atm_sysfs.c
@@ -11,7 +11,7 @@
#define to_atm_dev(cldev) container_of(cldev, struct atm_dev, class_dev)
-static ssize_t show_type(struct device *cdev,
+static ssize_t type_show(struct device *cdev,
struct device_attribute *attr, char *buf)
{
struct atm_dev *adev = to_atm_dev(cdev);
@@ -19,7 +19,7 @@ static ssize_t show_type(struct device *cdev,
return scnprintf(buf, PAGE_SIZE, "%s\n", adev->type);
}
-static ssize_t show_address(struct device *cdev,
+static ssize_t address_show(struct device *cdev,
struct device_attribute *attr, char *buf)
{
struct atm_dev *adev = to_atm_dev(cdev);
@@ -27,7 +27,7 @@ static ssize_t show_address(struct device *cdev,
return scnprintf(buf, PAGE_SIZE, "%pM\n", adev->esi);
}
-static ssize_t show_atmaddress(struct device *cdev,
+static ssize_t atmaddress_show(struct device *cdev,
struct device_attribute *attr, char *buf)
{
unsigned long flags;
@@ -50,7 +50,7 @@ static ssize_t show_atmaddress(struct device *cdev,
return count;
}
-static ssize_t show_atmindex(struct device *cdev,
+static ssize_t atmindex_show(struct device *cdev,
struct device_attribute *attr, char *buf)
{
struct atm_dev *adev = to_atm_dev(cdev);
@@ -58,7 +58,7 @@ static ssize_t show_atmindex(struct device *cdev,
return scnprintf(buf, PAGE_SIZE, "%d\n", adev->number);
}
-static ssize_t show_carrier(struct device *cdev,
+static ssize_t carrier_show(struct device *cdev,
struct device_attribute *attr, char *buf)
{
struct atm_dev *adev = to_atm_dev(cdev);
@@ -67,7 +67,7 @@ static ssize_t show_carrier(struct device *cdev,
adev->signal == ATM_PHY_SIG_LOST ? 0 : 1);
}
-static ssize_t show_link_rate(struct device *cdev,
+static ssize_t link_rate_show(struct device *cdev,
struct device_attribute *attr, char *buf)
{
struct atm_dev *adev = to_atm_dev(cdev);
@@ -90,12 +90,12 @@ static ssize_t show_link_rate(struct device *cdev,
return scnprintf(buf, PAGE_SIZE, "%d\n", link_rate);
}
-static DEVICE_ATTR(address, 0444, show_address, NULL);
-static DEVICE_ATTR(atmaddress, 0444, show_atmaddress, NULL);
-static DEVICE_ATTR(atmindex, 0444, show_atmindex, NULL);
-static DEVICE_ATTR(carrier, 0444, show_carrier, NULL);
-static DEVICE_ATTR(type, 0444, show_type, NULL);
-static DEVICE_ATTR(link_rate, 0444, show_link_rate, NULL);
+static DEVICE_ATTR_RO(address);
+static DEVICE_ATTR_RO(atmaddress);
+static DEVICE_ATTR_RO(atmindex);
+static DEVICE_ATTR_RO(carrier);
+static DEVICE_ATTR_RO(type);
+static DEVICE_ATTR_RO(link_rate);
static struct device_attribute *atm_attrs[] = {
&dev_attr_atmaddress,
diff --git a/net/atm/br2684.c b/net/atm/br2684.c
index 3e17a5ecaa94..dd2a8dabed84 100644
--- a/net/atm/br2684.c
+++ b/net/atm/br2684.c
@@ -93,8 +93,8 @@ struct br2684_dev {
* This lock should be held for writing any time the list of devices or
* their attached vcc's could be altered. It should be held for reading
* any time these are being queried. Note that we sometimes need to
- * do read-locking under interrupt context, so write locking must block
- * the current CPU's interrupts
+ * do read-locking under interrupting context, so write locking must block
+ * the current CPU's interrupts.
*/
static DEFINE_RWLOCK(devs_lock);
diff --git a/net/atm/resources.c b/net/atm/resources.c
index 53236986dfe0..2b2d33eeaf20 100644
--- a/net/atm/resources.c
+++ b/net/atm/resources.c
@@ -52,10 +52,8 @@ static struct atm_dev *__alloc_atm_dev(const char *type)
static struct atm_dev *__atm_dev_lookup(int number)
{
struct atm_dev *dev;
- struct list_head *p;
- list_for_each(p, &atm_devs) {
- dev = list_entry(p, struct atm_dev, dev_list);
+ list_for_each_entry(dev, &atm_devs, dev_list) {
if (dev->number == number) {
atm_dev_hold(dev);
return dev;
@@ -215,8 +213,7 @@ int atm_getnames(void __user *buf, int __user *iobuf_len)
return -ENOMEM;
}
tmp_p = tmp_buf;
- list_for_each(p, &atm_devs) {
- dev = list_entry(p, struct atm_dev, dev_list);
+ list_for_each_entry(dev, &atm_devs, dev_list) {
*tmp_p++ = dev->number;
}
mutex_unlock(&atm_dev_mutex);
diff --git a/net/ax25/ax25_ip.c b/net/ax25/ax25_ip.c
index e4f63dd43cb5..36249776c021 100644
--- a/net/ax25/ax25_ip.c
+++ b/net/ax25/ax25_ip.c
@@ -193,10 +193,8 @@ netdev_tx_t ax25_ip_xmit(struct sk_buff *skb)
skb_pull(skb, AX25_KISS_HEADER_LEN);
if (digipeat != NULL) {
- if ((ourskb = ax25_rt_build_path(skb, src, dst, route->digipeat)) == NULL) {
- kfree_skb(skb);
+ if ((ourskb = ax25_rt_build_path(skb, src, dst, route->digipeat)) == NULL)
goto put;
- }
skb = ourskb;
}
diff --git a/net/ax25/ax25_out.c b/net/ax25/ax25_out.c
index f53751ba81b3..22f2f66c6e0a 100644
--- a/net/ax25/ax25_out.c
+++ b/net/ax25/ax25_out.c
@@ -325,7 +325,6 @@ void ax25_kick(ax25_cb *ax25)
void ax25_transmit_buffer(ax25_cb *ax25, struct sk_buff *skb, int type)
{
- struct sk_buff *skbn;
unsigned char *ptr;
int headroom;
@@ -336,18 +335,12 @@ void ax25_transmit_buffer(ax25_cb *ax25, struct sk_buff *skb, int type)
headroom = ax25_addr_size(ax25->digipeat);
- if (skb_headroom(skb) < headroom) {
- if ((skbn = skb_realloc_headroom(skb, headroom)) == NULL) {
+ if (unlikely(skb_headroom(skb) < headroom)) {
+ skb = skb_expand_head(skb, headroom);
+ if (!skb) {
printk(KERN_CRIT "AX.25: ax25_transmit_buffer - out of memory\n");
- kfree_skb(skb);
return;
}
-
- if (skb->sk != NULL)
- skb_set_owner_w(skbn, skb->sk);
-
- consume_skb(skb);
- skb = skbn;
}
ptr = skb_push(skb, headroom);
diff --git a/net/ax25/ax25_route.c b/net/ax25/ax25_route.c
index b40e0bce67ea..d0b2e094bd55 100644
--- a/net/ax25/ax25_route.c
+++ b/net/ax25/ax25_route.c
@@ -441,24 +441,17 @@ put:
struct sk_buff *ax25_rt_build_path(struct sk_buff *skb, ax25_address *src,
ax25_address *dest, ax25_digi *digi)
{
- struct sk_buff *skbn;
unsigned char *bp;
int len;
len = digi->ndigi * AX25_ADDR_LEN;
- if (skb_headroom(skb) < len) {
- if ((skbn = skb_realloc_headroom(skb, len)) == NULL) {
+ if (unlikely(skb_headroom(skb) < len)) {
+ skb = skb_expand_head(skb, len);
+ if (!skb) {
printk(KERN_CRIT "AX.25: ax25_dg_build_path - out of memory\n");
return NULL;
}
-
- if (skb->sk != NULL)
- skb_set_owner_w(skbn, skb->sk);
-
- consume_skb(skb);
-
- skb = skbn;
}
bp = skb_push(skb, len);
diff --git a/net/batman-adv/bat_iv_ogm.c b/net/batman-adv/bat_iv_ogm.c
index fc8be49010b9..f94f538fa382 100644
--- a/net/batman-adv/bat_iv_ogm.c
+++ b/net/batman-adv/bat_iv_ogm.c
@@ -519,8 +519,7 @@ batadv_iv_ogm_can_aggregate(const struct batadv_ogm_packet *new_bat_ogm_packet,
}
out:
- if (primary_if)
- batadv_hardif_put(primary_if);
+ batadv_hardif_put(primary_if);
return res;
}
@@ -857,8 +856,7 @@ static void batadv_iv_ogm_schedule_buff(struct batadv_hard_iface *hard_iface)
rcu_read_unlock();
out:
- if (primary_if)
- batadv_hardif_put(primary_if);
+ batadv_hardif_put(primary_if);
}
static void batadv_iv_ogm_schedule(struct batadv_hard_iface *hard_iface)
@@ -1046,14 +1044,10 @@ batadv_iv_ogm_orig_update(struct batadv_priv *bat_priv,
unlock:
rcu_read_unlock();
out:
- if (neigh_node)
- batadv_neigh_node_put(neigh_node);
- if (router)
- batadv_neigh_node_put(router);
- if (neigh_ifinfo)
- batadv_neigh_ifinfo_put(neigh_ifinfo);
- if (router_ifinfo)
- batadv_neigh_ifinfo_put(router_ifinfo);
+ batadv_neigh_node_put(neigh_node);
+ batadv_neigh_node_put(router);
+ batadv_neigh_ifinfo_put(neigh_ifinfo);
+ batadv_neigh_ifinfo_put(router_ifinfo);
}
/**
@@ -1194,8 +1188,7 @@ static bool batadv_iv_ogm_calc_tq(struct batadv_orig_node *orig_node,
ret = true;
out:
- if (neigh_node)
- batadv_neigh_node_put(neigh_node);
+ batadv_neigh_node_put(neigh_node);
return ret;
}
@@ -1496,16 +1489,11 @@ out_neigh:
if (orig_neigh_node && !is_single_hop_neigh)
batadv_orig_node_put(orig_neigh_node);
out:
- if (router_ifinfo)
- batadv_neigh_ifinfo_put(router_ifinfo);
- if (router)
- batadv_neigh_node_put(router);
- if (router_router)
- batadv_neigh_node_put(router_router);
- if (orig_neigh_router)
- batadv_neigh_node_put(orig_neigh_router);
- if (hardif_neigh)
- batadv_hardif_neigh_put(hardif_neigh);
+ batadv_neigh_ifinfo_put(router_ifinfo);
+ batadv_neigh_node_put(router);
+ batadv_neigh_node_put(router_router);
+ batadv_neigh_node_put(orig_neigh_router);
+ batadv_hardif_neigh_put(hardif_neigh);
consume_skb(skb_priv);
}
@@ -1851,6 +1839,8 @@ batadv_iv_ogm_orig_dump_subentry(struct sk_buff *msg, u32 portid, u32 seq,
orig_node->orig) ||
nla_put(msg, BATADV_ATTR_NEIGH_ADDRESS, ETH_ALEN,
neigh_node->addr) ||
+ nla_put_string(msg, BATADV_ATTR_HARD_IFNAME,
+ neigh_node->if_incoming->net_dev->name) ||
nla_put_u32(msg, BATADV_ATTR_HARD_IFINDEX,
neigh_node->if_incoming->net_dev->ifindex) ||
nla_put_u8(msg, BATADV_ATTR_TQ, tq_avg) ||
@@ -1924,8 +1914,7 @@ batadv_iv_ogm_orig_dump_entry(struct sk_buff *msg, u32 portid, u32 seq,
}
out:
- if (neigh_node_best)
- batadv_neigh_node_put(neigh_node_best);
+ batadv_neigh_node_put(neigh_node_best);
*sub_s = 0;
return 0;
@@ -2047,10 +2036,8 @@ static bool batadv_iv_ogm_neigh_diff(struct batadv_neigh_node *neigh1,
*diff = (int)tq1 - (int)tq2;
out:
- if (neigh1_ifinfo)
- batadv_neigh_ifinfo_put(neigh1_ifinfo);
- if (neigh2_ifinfo)
- batadv_neigh_ifinfo_put(neigh2_ifinfo);
+ batadv_neigh_ifinfo_put(neigh1_ifinfo);
+ batadv_neigh_ifinfo_put(neigh2_ifinfo);
return ret;
}
@@ -2080,6 +2067,8 @@ batadv_iv_ogm_neigh_dump_neigh(struct sk_buff *msg, u32 portid, u32 seq,
if (nla_put(msg, BATADV_ATTR_NEIGH_ADDRESS, ETH_ALEN,
hardif_neigh->addr) ||
+ nla_put_string(msg, BATADV_ATTR_HARD_IFNAME,
+ hardif_neigh->if_incoming->net_dev->name) ||
nla_put_u32(msg, BATADV_ATTR_HARD_IFINDEX,
hardif_neigh->if_incoming->net_dev->ifindex) ||
nla_put_u32(msg, BATADV_ATTR_LAST_SEEN_MSECS,
@@ -2295,8 +2284,7 @@ batadv_iv_gw_get_best_gw_node(struct batadv_priv *bat_priv)
if (tmp_gw_factor > max_gw_factor ||
(tmp_gw_factor == max_gw_factor &&
tq_avg > max_tq)) {
- if (curr_gw)
- batadv_gw_node_put(curr_gw);
+ batadv_gw_node_put(curr_gw);
curr_gw = gw_node;
kref_get(&curr_gw->refcount);
}
@@ -2310,8 +2298,7 @@ batadv_iv_gw_get_best_gw_node(struct batadv_priv *bat_priv)
* $routing_class more tq points)
*/
if (tq_avg > max_tq) {
- if (curr_gw)
- batadv_gw_node_put(curr_gw);
+ batadv_gw_node_put(curr_gw);
curr_gw = gw_node;
kref_get(&curr_gw->refcount);
}
@@ -2328,8 +2315,7 @@ batadv_iv_gw_get_best_gw_node(struct batadv_priv *bat_priv)
next:
batadv_neigh_node_put(router);
- if (router_ifinfo)
- batadv_neigh_ifinfo_put(router_ifinfo);
+ batadv_neigh_ifinfo_put(router_ifinfo);
}
rcu_read_unlock();
@@ -2393,14 +2379,10 @@ static bool batadv_iv_gw_is_eligible(struct batadv_priv *bat_priv,
ret = true;
out:
- if (router_gw_ifinfo)
- batadv_neigh_ifinfo_put(router_gw_ifinfo);
- if (router_orig_ifinfo)
- batadv_neigh_ifinfo_put(router_orig_ifinfo);
- if (router_gw)
- batadv_neigh_node_put(router_gw);
- if (router_orig)
- batadv_neigh_node_put(router_orig);
+ batadv_neigh_ifinfo_put(router_gw_ifinfo);
+ batadv_neigh_ifinfo_put(router_orig_ifinfo);
+ batadv_neigh_node_put(router_gw);
+ batadv_neigh_node_put(router_orig);
return ret;
}
@@ -2461,6 +2443,8 @@ static int batadv_iv_gw_dump_entry(struct sk_buff *msg, u32 portid,
router->addr) ||
nla_put_string(msg, BATADV_ATTR_HARD_IFNAME,
router->if_incoming->net_dev->name) ||
+ nla_put_u32(msg, BATADV_ATTR_HARD_IFINDEX,
+ router->if_incoming->net_dev->ifindex) ||
nla_put_u32(msg, BATADV_ATTR_BANDWIDTH_DOWN,
gw_node->bandwidth_down) ||
nla_put_u32(msg, BATADV_ATTR_BANDWIDTH_UP,
@@ -2473,12 +2457,9 @@ static int batadv_iv_gw_dump_entry(struct sk_buff *msg, u32 portid,
ret = 0;
out:
- if (curr_gw)
- batadv_gw_node_put(curr_gw);
- if (router_ifinfo)
- batadv_neigh_ifinfo_put(router_ifinfo);
- if (router)
- batadv_neigh_node_put(router);
+ batadv_gw_node_put(curr_gw);
+ batadv_neigh_ifinfo_put(router_ifinfo);
+ batadv_neigh_node_put(router);
return ret;
}
diff --git a/net/batman-adv/bat_v.c b/net/batman-adv/bat_v.c
index e1ca2b8c3152..54e41fc709c3 100644
--- a/net/batman-adv/bat_v.c
+++ b/net/batman-adv/bat_v.c
@@ -106,8 +106,7 @@ static void batadv_v_iface_update_mac(struct batadv_hard_iface *hard_iface)
batadv_v_primary_iface_set(hard_iface);
out:
- if (primary_if)
- batadv_hardif_put(primary_if);
+ batadv_hardif_put(primary_if);
}
static void
@@ -146,6 +145,8 @@ batadv_v_neigh_dump_neigh(struct sk_buff *msg, u32 portid, u32 seq,
if (nla_put(msg, BATADV_ATTR_NEIGH_ADDRESS, ETH_ALEN,
hardif_neigh->addr) ||
+ nla_put_string(msg, BATADV_ATTR_HARD_IFNAME,
+ hardif_neigh->if_incoming->net_dev->name) ||
nla_put_u32(msg, BATADV_ATTR_HARD_IFINDEX,
hardif_neigh->if_incoming->net_dev->ifindex) ||
nla_put_u32(msg, BATADV_ATTR_LAST_SEEN_MSECS,
@@ -298,6 +299,8 @@ batadv_v_orig_dump_subentry(struct sk_buff *msg, u32 portid, u32 seq,
if (nla_put(msg, BATADV_ATTR_ORIG_ADDRESS, ETH_ALEN, orig_node->orig) ||
nla_put(msg, BATADV_ATTR_NEIGH_ADDRESS, ETH_ALEN,
neigh_node->addr) ||
+ nla_put_string(msg, BATADV_ATTR_HARD_IFNAME,
+ neigh_node->if_incoming->net_dev->name) ||
nla_put_u32(msg, BATADV_ATTR_HARD_IFINDEX,
neigh_node->if_incoming->net_dev->ifindex) ||
nla_put_u32(msg, BATADV_ATTR_THROUGHPUT, throughput) ||
@@ -362,8 +365,7 @@ batadv_v_orig_dump_entry(struct sk_buff *msg, u32 portid, u32 seq,
}
out:
- if (neigh_node_best)
- batadv_neigh_node_put(neigh_node_best);
+ batadv_neigh_node_put(neigh_node_best);
*sub_s = 0;
return 0;
@@ -564,10 +566,8 @@ static int batadv_v_gw_throughput_get(struct batadv_gw_node *gw_node, u32 *bw)
ret = 0;
out:
- if (router)
- batadv_neigh_node_put(router);
- if (router_ifinfo)
- batadv_neigh_ifinfo_put(router_ifinfo);
+ batadv_neigh_node_put(router);
+ batadv_neigh_ifinfo_put(router_ifinfo);
return ret;
}
@@ -595,8 +595,7 @@ batadv_v_gw_get_best_gw_node(struct batadv_priv *bat_priv)
if (curr_gw && bw <= max_bw)
goto next;
- if (curr_gw)
- batadv_gw_node_put(curr_gw);
+ batadv_gw_node_put(curr_gw);
curr_gw = gw_node;
kref_get(&curr_gw->refcount);
@@ -658,10 +657,8 @@ static bool batadv_v_gw_is_eligible(struct batadv_priv *bat_priv,
ret = true;
out:
- if (curr_gw)
- batadv_gw_node_put(curr_gw);
- if (orig_gw)
- batadv_gw_node_put(orig_gw);
+ batadv_gw_node_put(curr_gw);
+ batadv_gw_node_put(orig_gw);
return ret;
}
@@ -739,6 +736,12 @@ static int batadv_v_gw_dump_entry(struct sk_buff *msg, u32 portid,
goto out;
}
+ if (nla_put_u32(msg, BATADV_ATTR_HARD_IFINDEX,
+ router->if_incoming->net_dev->ifindex)) {
+ genlmsg_cancel(msg, hdr);
+ goto out;
+ }
+
if (nla_put_u32(msg, BATADV_ATTR_BANDWIDTH_DOWN,
gw_node->bandwidth_down)) {
genlmsg_cancel(msg, hdr);
@@ -754,12 +757,9 @@ static int batadv_v_gw_dump_entry(struct sk_buff *msg, u32 portid,
ret = 0;
out:
- if (curr_gw)
- batadv_gw_node_put(curr_gw);
- if (router_ifinfo)
- batadv_neigh_ifinfo_put(router_ifinfo);
- if (router)
- batadv_neigh_node_put(router);
+ batadv_gw_node_put(curr_gw);
+ batadv_neigh_ifinfo_put(router_ifinfo);
+ batadv_neigh_node_put(router);
return ret;
}
diff --git a/net/batman-adv/bat_v_elp.c b/net/batman-adv/bat_v_elp.c
index 423c2d171703..71999e13f729 100644
--- a/net/batman-adv/bat_v_elp.c
+++ b/net/batman-adv/bat_v_elp.c
@@ -486,14 +486,11 @@ static void batadv_v_elp_neigh_update(struct batadv_priv *bat_priv,
hardif_neigh->bat_v.elp_interval = ntohl(elp_packet->elp_interval);
hardif_free:
- if (hardif_neigh)
- batadv_hardif_neigh_put(hardif_neigh);
+ batadv_hardif_neigh_put(hardif_neigh);
neigh_free:
- if (neigh)
- batadv_neigh_node_put(neigh);
+ batadv_neigh_node_put(neigh);
orig_free:
- if (orig_neigh)
- batadv_orig_node_put(orig_neigh);
+ batadv_orig_node_put(orig_neigh);
}
/**
diff --git a/net/batman-adv/bat_v_ogm.c b/net/batman-adv/bat_v_ogm.c
index a0a9636d1740..1d750f3cb2e4 100644
--- a/net/batman-adv/bat_v_ogm.c
+++ b/net/batman-adv/bat_v_ogm.c
@@ -584,12 +584,9 @@ static void batadv_v_ogm_forward(struct batadv_priv *bat_priv,
batadv_v_ogm_queue_on_if(skb, if_outgoing);
out:
- if (orig_ifinfo)
- batadv_orig_ifinfo_put(orig_ifinfo);
- if (router)
- batadv_neigh_node_put(router);
- if (neigh_ifinfo)
- batadv_neigh_ifinfo_put(neigh_ifinfo);
+ batadv_orig_ifinfo_put(orig_ifinfo);
+ batadv_neigh_node_put(router);
+ batadv_neigh_ifinfo_put(neigh_ifinfo);
}
/**
@@ -669,10 +666,8 @@ static int batadv_v_ogm_metric_update(struct batadv_priv *bat_priv,
else
ret = 0;
out:
- if (orig_ifinfo)
- batadv_orig_ifinfo_put(orig_ifinfo);
- if (neigh_ifinfo)
- batadv_neigh_ifinfo_put(neigh_ifinfo);
+ batadv_orig_ifinfo_put(orig_ifinfo);
+ batadv_neigh_ifinfo_put(neigh_ifinfo);
return ret;
}
@@ -763,16 +758,11 @@ static bool batadv_v_ogm_route_update(struct batadv_priv *bat_priv,
batadv_update_route(bat_priv, orig_node, if_outgoing, neigh_node);
out:
- if (router)
- batadv_neigh_node_put(router);
- if (orig_neigh_router)
- batadv_neigh_node_put(orig_neigh_router);
- if (orig_neigh_node)
- batadv_orig_node_put(orig_neigh_node);
- if (router_ifinfo)
- batadv_neigh_ifinfo_put(router_ifinfo);
- if (neigh_ifinfo)
- batadv_neigh_ifinfo_put(neigh_ifinfo);
+ batadv_neigh_node_put(router);
+ batadv_neigh_node_put(orig_neigh_router);
+ batadv_orig_node_put(orig_neigh_node);
+ batadv_neigh_ifinfo_put(router_ifinfo);
+ batadv_neigh_ifinfo_put(neigh_ifinfo);
return forward;
}
@@ -978,12 +968,9 @@ static void batadv_v_ogm_process(const struct sk_buff *skb, int ogm_offset,
}
rcu_read_unlock();
out:
- if (orig_node)
- batadv_orig_node_put(orig_node);
- if (neigh_node)
- batadv_neigh_node_put(neigh_node);
- if (hardif_neigh)
- batadv_hardif_neigh_put(hardif_neigh);
+ batadv_orig_node_put(orig_node);
+ batadv_neigh_node_put(neigh_node);
+ batadv_hardif_neigh_put(hardif_neigh);
}
/**
diff --git a/net/batman-adv/bridge_loop_avoidance.c b/net/batman-adv/bridge_loop_avoidance.c
index 7dc133cfc363..1669744304c5 100644
--- a/net/batman-adv/bridge_loop_avoidance.c
+++ b/net/batman-adv/bridge_loop_avoidance.c
@@ -162,6 +162,9 @@ static void batadv_backbone_gw_release(struct kref *ref)
*/
static void batadv_backbone_gw_put(struct batadv_bla_backbone_gw *backbone_gw)
{
+ if (!backbone_gw)
+ return;
+
kref_put(&backbone_gw->refcount, batadv_backbone_gw_release);
}
@@ -197,6 +200,9 @@ static void batadv_claim_release(struct kref *ref)
*/
static void batadv_claim_put(struct batadv_bla_claim *claim)
{
+ if (!claim)
+ return;
+
kref_put(&claim->refcount, batadv_claim_release);
}
@@ -395,7 +401,7 @@ static void batadv_bla_send_claim(struct batadv_priv *bat_priv, u8 *mac,
break;
case BATADV_CLAIM_TYPE_ANNOUNCE:
/* announcement frame
- * set HW SRC to the special mac containg the crc
+ * set HW SRC to the special mac containing the crc
*/
ether_addr_copy(hw_src, mac);
batadv_dbg(BATADV_DBG_BLA, bat_priv,
@@ -439,8 +445,7 @@ static void batadv_bla_send_claim(struct batadv_priv *bat_priv, u8 *mac,
netif_rx_any_context(skb);
out:
- if (primary_if)
- batadv_hardif_put(primary_if);
+ batadv_hardif_put(primary_if);
}
/**
@@ -1040,7 +1045,7 @@ static int batadv_check_claim_group(struct batadv_priv *bat_priv,
/* lets see if this originator is in our mesh */
orig_node = batadv_orig_hash_find(bat_priv, backbone_addr);
- /* dont accept claims from gateways which are not in
+ /* don't accept claims from gateways which are not in
* the same mesh or group.
*/
if (!orig_node)
@@ -1498,8 +1503,7 @@ static void batadv_bla_periodic_work(struct work_struct *work)
rcu_read_unlock();
}
out:
- if (primary_if)
- batadv_hardif_put(primary_if);
+ batadv_hardif_put(primary_if);
queue_delayed_work(batadv_event_workqueue, &bat_priv->bla.work,
msecs_to_jiffies(BATADV_BLA_PERIOD_LENGTH));
@@ -1808,8 +1812,7 @@ void batadv_bla_free(struct batadv_priv *bat_priv)
batadv_hash_destroy(bat_priv->bla.backbone_hash);
bat_priv->bla.backbone_hash = NULL;
}
- if (primary_if)
- batadv_hardif_put(primary_if);
+ batadv_hardif_put(primary_if);
}
/**
@@ -1996,10 +1999,8 @@ handled:
ret = true;
out:
- if (primary_if)
- batadv_hardif_put(primary_if);
- if (claim)
- batadv_claim_put(claim);
+ batadv_hardif_put(primary_if);
+ batadv_claim_put(claim);
return ret;
}
@@ -2103,10 +2104,8 @@ allow:
handled:
ret = true;
out:
- if (primary_if)
- batadv_hardif_put(primary_if);
- if (claim)
- batadv_claim_put(claim);
+ batadv_hardif_put(primary_if);
+ batadv_claim_put(claim);
return ret;
}
@@ -2271,11 +2270,9 @@ int batadv_bla_claim_dump(struct sk_buff *msg, struct netlink_callback *cb)
ret = msg->len;
out:
- if (primary_if)
- batadv_hardif_put(primary_if);
+ batadv_hardif_put(primary_if);
- if (soft_iface)
- dev_put(soft_iface);
+ dev_put(soft_iface);
return ret;
}
@@ -2443,11 +2440,9 @@ int batadv_bla_backbone_dump(struct sk_buff *msg, struct netlink_callback *cb)
ret = msg->len;
out:
- if (primary_if)
- batadv_hardif_put(primary_if);
+ batadv_hardif_put(primary_if);
- if (soft_iface)
- dev_put(soft_iface);
+ dev_put(soft_iface);
return ret;
}
diff --git a/net/batman-adv/bridge_loop_avoidance.h b/net/batman-adv/bridge_loop_avoidance.h
index 5c22955bb9d5..8673a265995f 100644
--- a/net/batman-adv/bridge_loop_avoidance.h
+++ b/net/batman-adv/bridge_loop_avoidance.h
@@ -52,7 +52,6 @@ void batadv_bla_update_orig_address(struct batadv_priv *bat_priv,
void batadv_bla_status_update(struct net_device *net_dev);
int batadv_bla_init(struct batadv_priv *bat_priv);
void batadv_bla_free(struct batadv_priv *bat_priv);
-int batadv_bla_claim_dump(struct sk_buff *msg, struct netlink_callback *cb);
#ifdef CONFIG_BATMAN_ADV_DAT
bool batadv_bla_check_claim(struct batadv_priv *bat_priv, u8 *addr,
unsigned short vid);
diff --git a/net/batman-adv/distributed-arp-table.c b/net/batman-adv/distributed-arp-table.c
index 8c95a11a830a..2f008e329007 100644
--- a/net/batman-adv/distributed-arp-table.c
+++ b/net/batman-adv/distributed-arp-table.c
@@ -127,6 +127,9 @@ static void batadv_dat_entry_release(struct kref *ref)
*/
static void batadv_dat_entry_put(struct batadv_dat_entry *dat_entry)
{
+ if (!dat_entry)
+ return;
+
kref_put(&dat_entry->refcount, batadv_dat_entry_release);
}
@@ -405,8 +408,7 @@ static void batadv_dat_entry_add(struct batadv_priv *bat_priv, __be32 ip,
&dat_entry->ip, dat_entry->mac_addr, batadv_print_vid(vid));
out:
- if (dat_entry)
- batadv_dat_entry_put(dat_entry);
+ batadv_dat_entry_put(dat_entry);
}
#ifdef CONFIG_BATMAN_ADV_DEBUG
@@ -594,8 +596,7 @@ static void batadv_choose_next_candidate(struct batadv_priv *bat_priv,
continue;
max = tmp_max;
- if (max_orig_node)
- batadv_orig_node_put(max_orig_node);
+ batadv_orig_node_put(max_orig_node);
max_orig_node = orig_node;
}
rcu_read_unlock();
@@ -981,11 +982,9 @@ int batadv_dat_cache_dump(struct sk_buff *msg, struct netlink_callback *cb)
ret = msg->len;
out:
- if (primary_if)
- batadv_hardif_put(primary_if);
+ batadv_hardif_put(primary_if);
- if (soft_iface)
- dev_put(soft_iface);
+ dev_put(soft_iface);
return ret;
}
@@ -1218,8 +1217,7 @@ bool batadv_dat_snoop_outgoing_arp_request(struct batadv_priv *bat_priv,
BATADV_P_DAT_DHT_GET);
}
out:
- if (dat_entry)
- batadv_dat_entry_put(dat_entry);
+ batadv_dat_entry_put(dat_entry);
return ret;
}
@@ -1286,8 +1284,7 @@ bool batadv_dat_snoop_incoming_arp_request(struct batadv_priv *bat_priv,
ret = true;
}
out:
- if (dat_entry)
- batadv_dat_entry_put(dat_entry);
+ batadv_dat_entry_put(dat_entry);
if (ret)
kfree_skb(skb);
return ret;
@@ -1420,8 +1417,7 @@ bool batadv_dat_snoop_incoming_arp_reply(struct batadv_priv *bat_priv,
out:
if (dropped)
kfree_skb(skb);
- if (dat_entry)
- batadv_dat_entry_put(dat_entry);
+ batadv_dat_entry_put(dat_entry);
/* if dropped == false -> deliver to the interface */
return dropped;
}
@@ -1830,7 +1826,6 @@ bool batadv_dat_drop_broadcast_packet(struct batadv_priv *bat_priv,
ret = true;
out:
- if (dat_entry)
- batadv_dat_entry_put(dat_entry);
+ batadv_dat_entry_put(dat_entry);
return ret;
}
diff --git a/net/batman-adv/fragmentation.c b/net/batman-adv/fragmentation.c
index a5d9d800082b..0899a729a23f 100644
--- a/net/batman-adv/fragmentation.c
+++ b/net/batman-adv/fragmentation.c
@@ -381,10 +381,8 @@ bool batadv_frag_skb_fwd(struct sk_buff *skb,
}
out:
- if (orig_node_dst)
- batadv_orig_node_put(orig_node_dst);
- if (neigh_node)
- batadv_neigh_node_put(neigh_node);
+ batadv_orig_node_put(orig_node_dst);
+ batadv_neigh_node_put(neigh_node);
return ret;
}
diff --git a/net/batman-adv/gateway_client.c b/net/batman-adv/gateway_client.c
index 007f2827935d..b7466136e292 100644
--- a/net/batman-adv/gateway_client.c
+++ b/net/batman-adv/gateway_client.c
@@ -59,7 +59,7 @@
* after rcu grace period
* @ref: kref pointer of the gw_node
*/
-static void batadv_gw_node_release(struct kref *ref)
+void batadv_gw_node_release(struct kref *ref)
{
struct batadv_gw_node *gw_node;
@@ -70,16 +70,6 @@ static void batadv_gw_node_release(struct kref *ref)
}
/**
- * batadv_gw_node_put() - decrement the gw_node refcounter and possibly release
- * it
- * @gw_node: gateway node to free
- */
-void batadv_gw_node_put(struct batadv_gw_node *gw_node)
-{
- kref_put(&gw_node->refcount, batadv_gw_node_release);
-}
-
-/**
* batadv_gw_get_selected_gw_node() - Get currently selected gateway
* @bat_priv: the bat priv with all the soft interface information
*
@@ -130,8 +120,7 @@ batadv_gw_get_selected_orig(struct batadv_priv *bat_priv)
unlock:
rcu_read_unlock();
out:
- if (gw_node)
- batadv_gw_node_put(gw_node);
+ batadv_gw_node_put(gw_node);
return orig_node;
}
@@ -148,8 +137,7 @@ static void batadv_gw_select(struct batadv_priv *bat_priv,
curr_gw_node = rcu_replace_pointer(bat_priv->gw.curr_gw, new_gw_node,
true);
- if (curr_gw_node)
- batadv_gw_node_put(curr_gw_node);
+ batadv_gw_node_put(curr_gw_node);
spin_unlock_bh(&bat_priv->gw.list_lock);
}
@@ -284,14 +272,10 @@ void batadv_gw_election(struct batadv_priv *bat_priv)
batadv_gw_select(bat_priv, next_gw);
out:
- if (curr_gw)
- batadv_gw_node_put(curr_gw);
- if (next_gw)
- batadv_gw_node_put(next_gw);
- if (router)
- batadv_neigh_node_put(router);
- if (router_ifinfo)
- batadv_neigh_ifinfo_put(router_ifinfo);
+ batadv_gw_node_put(curr_gw);
+ batadv_gw_node_put(next_gw);
+ batadv_neigh_node_put(router);
+ batadv_neigh_ifinfo_put(router_ifinfo);
}
/**
@@ -325,8 +309,7 @@ void batadv_gw_check_election(struct batadv_priv *bat_priv,
reselect:
batadv_gw_reselect(bat_priv);
out:
- if (curr_gw_orig)
- batadv_orig_node_put(curr_gw_orig);
+ batadv_orig_node_put(curr_gw_orig);
}
/**
@@ -466,13 +449,11 @@ void batadv_gw_node_update(struct batadv_priv *bat_priv,
if (gw_node == curr_gw)
batadv_gw_reselect(bat_priv);
- if (curr_gw)
- batadv_gw_node_put(curr_gw);
+ batadv_gw_node_put(curr_gw);
}
out:
- if (gw_node)
- batadv_gw_node_put(gw_node);
+ batadv_gw_node_put(gw_node);
}
/**
@@ -555,10 +536,8 @@ int batadv_gw_dump(struct sk_buff *msg, struct netlink_callback *cb)
ret = msg->len;
out:
- if (primary_if)
- batadv_hardif_put(primary_if);
- if (soft_iface)
- dev_put(soft_iface);
+ batadv_hardif_put(primary_if);
+ dev_put(soft_iface);
return ret;
}
@@ -780,15 +759,10 @@ bool batadv_gw_out_of_range(struct batadv_priv *bat_priv,
batadv_neigh_ifinfo_put(old_ifinfo);
out:
- if (orig_dst_node)
- batadv_orig_node_put(orig_dst_node);
- if (curr_gw)
- batadv_gw_node_put(curr_gw);
- if (gw_node)
- batadv_gw_node_put(gw_node);
- if (neigh_old)
- batadv_neigh_node_put(neigh_old);
- if (neigh_curr)
- batadv_neigh_node_put(neigh_curr);
+ batadv_orig_node_put(orig_dst_node);
+ batadv_gw_node_put(curr_gw);
+ batadv_gw_node_put(gw_node);
+ batadv_neigh_node_put(neigh_old);
+ batadv_neigh_node_put(neigh_curr);
return out_of_range;
}
diff --git a/net/batman-adv/gateway_client.h b/net/batman-adv/gateway_client.h
index 2ae5846ef958..95c2ccdaa554 100644
--- a/net/batman-adv/gateway_client.h
+++ b/net/batman-adv/gateway_client.h
@@ -9,6 +9,7 @@
#include "main.h"
+#include <linux/kref.h>
#include <linux/netlink.h>
#include <linux/skbuff.h>
#include <linux/types.h>
@@ -27,7 +28,7 @@ void batadv_gw_node_update(struct batadv_priv *bat_priv,
void batadv_gw_node_delete(struct batadv_priv *bat_priv,
struct batadv_orig_node *orig_node);
void batadv_gw_node_free(struct batadv_priv *bat_priv);
-void batadv_gw_node_put(struct batadv_gw_node *gw_node);
+void batadv_gw_node_release(struct kref *ref);
struct batadv_gw_node *
batadv_gw_get_selected_gw_node(struct batadv_priv *bat_priv);
int batadv_gw_dump(struct sk_buff *msg, struct netlink_callback *cb);
@@ -38,4 +39,17 @@ batadv_gw_dhcp_recipient_get(struct sk_buff *skb, unsigned int *header_len,
struct batadv_gw_node *batadv_gw_node_get(struct batadv_priv *bat_priv,
struct batadv_orig_node *orig_node);
+/**
+ * batadv_gw_node_put() - decrement the gw_node refcounter and possibly release
+ * it
+ * @gw_node: gateway node to free
+ */
+static inline void batadv_gw_node_put(struct batadv_gw_node *gw_node)
+{
+ if (!gw_node)
+ return;
+
+ kref_put(&gw_node->refcount, batadv_gw_node_release);
+}
+
#endif /* _NET_BATMAN_ADV_GATEWAY_CLIENT_H_ */
diff --git a/net/batman-adv/gateway_common.c b/net/batman-adv/gateway_common.c
index fdde305a198e..9349c76f30c5 100644
--- a/net/batman-adv/gateway_common.c
+++ b/net/batman-adv/gateway_common.c
@@ -10,7 +10,7 @@
#include <linux/atomic.h>
#include <linux/byteorder/generic.h>
#include <linux/errno.h>
-#include <linux/kernel.h>
+#include <linux/kstrtox.h>
#include <linux/limits.h>
#include <linux/math64.h>
#include <linux/netdevice.h>
diff --git a/net/batman-adv/hard-interface.c b/net/batman-adv/hard-interface.c
index 4a6a25d551a8..8a2b78f9c4b2 100644
--- a/net/batman-adv/hard-interface.c
+++ b/net/batman-adv/hard-interface.c
@@ -9,7 +9,6 @@
#include <linux/atomic.h>
#include <linux/byteorder/generic.h>
-#include <linux/errno.h>
#include <linux/gfp.h>
#include <linux/if.h>
#include <linux/if_arp.h>
@@ -237,8 +236,7 @@ static struct net_device *batadv_get_real_netdevice(struct net_device *netdev)
real_netdev = dev_get_by_index(real_net, ifindex);
out:
- if (hard_iface)
- batadv_hardif_put(hard_iface);
+ batadv_hardif_put(hard_iface);
return real_netdev;
}
@@ -403,7 +401,7 @@ int batadv_hardif_no_broadcast(struct batadv_hard_iface *if_outgoing,
goto out;
}
- /* >1 neighbors -> (re)brodcast */
+ /* >1 neighbors -> (re)broadcast */
if (rcu_dereference(hlist_next_rcu(first)))
goto out;
@@ -458,8 +456,7 @@ static void batadv_primary_if_update_addr(struct batadv_priv *bat_priv,
batadv_dat_init_own_addr(bat_priv, primary_if);
batadv_bla_update_orig_address(bat_priv, primary_if, oldif);
out:
- if (primary_if)
- batadv_hardif_put(primary_if);
+ batadv_hardif_put(primary_if);
}
static void batadv_primary_if_select(struct batadv_priv *bat_priv,
@@ -482,8 +479,7 @@ static void batadv_primary_if_select(struct batadv_priv *bat_priv,
batadv_primary_if_update_addr(bat_priv, curr_hard_iface);
out:
- if (curr_hard_iface)
- batadv_hardif_put(curr_hard_iface);
+ batadv_hardif_put(curr_hard_iface);
}
static bool
@@ -658,8 +654,7 @@ batadv_hardif_activate_interface(struct batadv_hard_iface *hard_iface)
bat_priv->algo_ops->iface.activate(hard_iface);
out:
- if (primary_if)
- batadv_hardif_put(primary_if);
+ batadv_hardif_put(primary_if);
}
static void
@@ -678,43 +673,16 @@ batadv_hardif_deactivate_interface(struct batadv_hard_iface *hard_iface)
}
/**
- * batadv_master_del_slave() - remove hard_iface from the current master iface
- * @slave: the interface enslaved in another master
- * @master: the master from which slave has to be removed
- *
- * Invoke ndo_del_slave on master passing slave as argument. In this way the
- * slave is free'd and the master can correctly change its internal state.
- *
- * Return: 0 on success, a negative value representing the error otherwise
- */
-static int batadv_master_del_slave(struct batadv_hard_iface *slave,
- struct net_device *master)
-{
- int ret;
-
- if (!master)
- return 0;
-
- ret = -EBUSY;
- if (master->netdev_ops->ndo_del_slave)
- ret = master->netdev_ops->ndo_del_slave(master, slave->net_dev);
-
- return ret;
-}
-
-/**
* batadv_hardif_enable_interface() - Enslave hard interface to soft interface
* @hard_iface: hard interface to add to soft interface
- * @net: the applicable net namespace
- * @iface_name: name of the soft interface
+ * @soft_iface: netdev struct of the mesh interface
*
* Return: 0 on success or negative error number in case of failure
*/
int batadv_hardif_enable_interface(struct batadv_hard_iface *hard_iface,
- struct net *net, const char *iface_name)
+ struct net_device *soft_iface)
{
struct batadv_priv *bat_priv;
- struct net_device *soft_iface, *master;
__be16 ethertype = htons(ETH_P_BATMAN);
int max_header_len = batadv_max_header_len();
int ret;
@@ -724,35 +692,7 @@ int batadv_hardif_enable_interface(struct batadv_hard_iface *hard_iface,
kref_get(&hard_iface->refcount);
- soft_iface = dev_get_by_name(net, iface_name);
-
- if (!soft_iface) {
- soft_iface = batadv_softif_create(net, iface_name);
-
- if (!soft_iface) {
- ret = -ENOMEM;
- goto err;
- }
-
- /* dev_get_by_name() increases the reference counter for us */
- dev_hold(soft_iface);
- }
-
- if (!batadv_softif_is_valid(soft_iface)) {
- pr_err("Can't create batman mesh interface %s: already exists as regular interface\n",
- soft_iface->name);
- ret = -EINVAL;
- goto err_dev;
- }
-
- /* check if the interface is enslaved in another virtual one and
- * in that case unlink it first
- */
- master = netdev_master_upper_dev_get(hard_iface->net_dev);
- ret = batadv_master_del_slave(hard_iface, master);
- if (ret)
- goto err_dev;
-
+ dev_hold(soft_iface);
hard_iface->soft_iface = soft_iface;
bat_priv = netdev_priv(hard_iface->soft_iface);
@@ -810,7 +750,6 @@ err_upper:
err_dev:
hard_iface->soft_iface = NULL;
dev_put(soft_iface);
-err:
batadv_hardif_put(hard_iface);
return ret;
}
@@ -868,8 +807,7 @@ void batadv_hardif_disable_interface(struct batadv_hard_iface *hard_iface)
new_if = batadv_hardif_get_active(hard_iface->soft_iface);
batadv_primary_if_select(bat_priv, new_if);
- if (new_if)
- batadv_hardif_put(new_if);
+ batadv_hardif_put(new_if);
}
bat_priv->algo_ops->iface.disable(hard_iface);
@@ -891,8 +829,7 @@ void batadv_hardif_disable_interface(struct batadv_hard_iface *hard_iface)
batadv_hardif_put(hard_iface);
out:
- if (primary_if)
- batadv_hardif_put(primary_if);
+ batadv_hardif_put(primary_if);
}
static struct batadv_hard_iface *
@@ -1047,8 +984,7 @@ static int batadv_hard_if_event(struct notifier_block *this,
hardif_put:
batadv_hardif_put(hard_iface);
out:
- if (primary_if)
- batadv_hardif_put(primary_if);
+ batadv_hardif_put(primary_if);
return NOTIFY_DONE;
}
diff --git a/net/batman-adv/hard-interface.h b/net/batman-adv/hard-interface.h
index 83d11b46a9d8..64f660dbbe54 100644
--- a/net/batman-adv/hard-interface.h
+++ b/net/batman-adv/hard-interface.h
@@ -16,7 +16,6 @@
#include <linux/rcupdate.h>
#include <linux/stddef.h>
#include <linux/types.h>
-#include <net/net_namespace.h>
/**
* enum batadv_hard_if_state - State of a hard interface
@@ -75,7 +74,7 @@ bool batadv_is_wifi_hardif(struct batadv_hard_iface *hard_iface);
struct batadv_hard_iface*
batadv_hardif_get_by_netdev(const struct net_device *net_dev);
int batadv_hardif_enable_interface(struct batadv_hard_iface *hard_iface,
- struct net *net, const char *iface_name);
+ struct net_device *soft_iface);
void batadv_hardif_disable_interface(struct batadv_hard_iface *hard_iface);
int batadv_hardif_min_mtu(struct net_device *soft_iface);
void batadv_update_min_mtu(struct net_device *soft_iface);
@@ -90,6 +89,9 @@ int batadv_hardif_no_broadcast(struct batadv_hard_iface *if_outgoing,
*/
static inline void batadv_hardif_put(struct batadv_hard_iface *hard_iface)
{
+ if (!hard_iface)
+ return;
+
kref_put(&hard_iface->refcount, batadv_hardif_release);
}
diff --git a/net/batman-adv/hash.h b/net/batman-adv/hash.h
index 46696759f194..fb251c385a1b 100644
--- a/net/batman-adv/hash.h
+++ b/net/batman-adv/hash.h
@@ -18,7 +18,7 @@
#include <linux/stddef.h>
#include <linux/types.h>
-/* callback to a compare function. should compare 2 element datas for their
+/* callback to a compare function. should compare 2 element data for their
* keys
*
* Return: true if same and false if not same
diff --git a/net/batman-adv/log.c b/net/batman-adv/log.c
index f0e5d1429662..7a93a1e94c40 100644
--- a/net/batman-adv/log.c
+++ b/net/batman-adv/log.c
@@ -7,7 +7,7 @@
#include "log.h"
#include "main.h"
-#include <stdarg.h>
+#include <linux/stdarg.h>
#include "trace.h"
diff --git a/net/batman-adv/main.h b/net/batman-adv/main.h
index 8f0102b71656..058b8f2eef65 100644
--- a/net/batman-adv/main.h
+++ b/net/batman-adv/main.h
@@ -13,7 +13,7 @@
#define BATADV_DRIVER_DEVICE "batman-adv"
#ifndef BATADV_SOURCE_VERSION
-#define BATADV_SOURCE_VERSION "2021.1"
+#define BATADV_SOURCE_VERSION "2021.3"
#endif
/* B.A.T.M.A.N. parameters */
@@ -88,7 +88,6 @@
/* number of packets to send for broadcasts on different interface types */
#define BATADV_NUM_BCASTS_DEFAULT 1
#define BATADV_NUM_BCASTS_WIRELESS 3
-#define BATADV_NUM_BCASTS_MAX 3
/* length of the single packet used by the TP meter */
#define BATADV_TP_PACKET_LEN ETH_DATA_LEN
diff --git a/net/batman-adv/multicast.c b/net/batman-adv/multicast.c
index 1d63c8cbbfe7..a3b6658ed789 100644
--- a/net/batman-adv/multicast.c
+++ b/net/batman-adv/multicast.c
@@ -91,8 +91,7 @@ static struct net_device *batadv_mcast_get_bridge(struct net_device *soft_iface)
upper = netdev_master_upper_dev_get_rcu(upper);
} while (upper && !(upper->priv_flags & IFF_EBRIDGE));
- if (upper)
- dev_hold(upper);
+ dev_hold(upper);
rcu_read_unlock();
return upper;
@@ -193,53 +192,22 @@ static u8 batadv_mcast_mla_rtr_flags_softif_get(struct batadv_priv *bat_priv,
* BATADV_MCAST_WANT_NO_RTR6: No IPv6 multicast router is present
* The former two OR'd: no multicast router is present
*/
-#if IS_ENABLED(CONFIG_IPV6)
static u8 batadv_mcast_mla_rtr_flags_bridge_get(struct batadv_priv *bat_priv,
struct net_device *bridge)
{
- struct list_head bridge_mcast_list = LIST_HEAD_INIT(bridge_mcast_list);
struct net_device *dev = bat_priv->soft_iface;
- struct br_ip_list *br_ip_entry, *tmp;
- u8 flags = BATADV_MCAST_WANT_NO_RTR6;
- int ret;
+ u8 flags = BATADV_NO_FLAGS;
if (!bridge)
return BATADV_MCAST_WANT_NO_RTR4 | BATADV_MCAST_WANT_NO_RTR6;
- /* TODO: ask the bridge if a multicast router is present (the bridge
- * is capable of performing proper RFC4286 multicast router
- * discovery) instead of searching for a ff02::2 listener here
- */
- ret = br_multicast_list_adjacent(dev, &bridge_mcast_list);
- if (ret < 0)
- return BATADV_NO_FLAGS;
-
- list_for_each_entry_safe(br_ip_entry, tmp, &bridge_mcast_list, list) {
- /* the bridge snooping does not maintain IPv4 link-local
- * addresses - therefore we won't find any IPv4 multicast router
- * address here, only IPv6 ones
- */
- if (br_ip_entry->addr.proto == htons(ETH_P_IPV6) &&
- ipv6_addr_is_ll_all_routers(&br_ip_entry->addr.dst.ip6))
- flags &= ~BATADV_MCAST_WANT_NO_RTR6;
-
- list_del(&br_ip_entry->list);
- kfree(br_ip_entry);
- }
+ if (!br_multicast_has_router_adjacent(dev, ETH_P_IP))
+ flags |= BATADV_MCAST_WANT_NO_RTR4;
+ if (!br_multicast_has_router_adjacent(dev, ETH_P_IPV6))
+ flags |= BATADV_MCAST_WANT_NO_RTR6;
return flags;
}
-#else
-static inline u8
-batadv_mcast_mla_rtr_flags_bridge_get(struct batadv_priv *bat_priv,
- struct net_device *bridge)
-{
- if (bridge)
- return BATADV_NO_FLAGS;
- else
- return BATADV_MCAST_WANT_NO_RTR4 | BATADV_MCAST_WANT_NO_RTR6;
-}
-#endif
/**
* batadv_mcast_mla_rtr_flags_get() - get multicast router flags
@@ -540,8 +508,7 @@ batadv_mcast_mla_softif_get(struct net_device *dev,
}
out:
- if (bridge)
- dev_put(bridge);
+ dev_put(bridge);
return ret4 + ret6;
}
@@ -2270,12 +2237,11 @@ batadv_mcast_netlink_get_primary(struct netlink_callback *cb,
}
out:
- if (soft_iface)
- dev_put(soft_iface);
+ dev_put(soft_iface);
if (!ret && primary_if)
*primary_if = hard_iface;
- else if (hard_iface)
+ else
batadv_hardif_put(hard_iface);
return ret;
diff --git a/net/batman-adv/netlink.c b/net/batman-adv/netlink.c
index f317d206b411..29276284d281 100644
--- a/net/batman-adv/netlink.c
+++ b/net/batman-adv/netlink.c
@@ -359,15 +359,13 @@ static int batadv_netlink_mesh_fill(struct sk_buff *msg,
atomic_read(&bat_priv->orig_interval)))
goto nla_put_failure;
- if (primary_if)
- batadv_hardif_put(primary_if);
+ batadv_hardif_put(primary_if);
genlmsg_end(msg, hdr);
return 0;
nla_put_failure:
- if (primary_if)
- batadv_hardif_put(primary_if);
+ batadv_hardif_put(primary_if);
genlmsg_cancel(msg, hdr);
return -EMSGSIZE;
@@ -814,6 +812,10 @@ static int batadv_netlink_hardif_fill(struct sk_buff *msg,
bat_priv->soft_iface->ifindex))
goto nla_put_failure;
+ if (nla_put_string(msg, BATADV_ATTR_MESH_IFNAME,
+ bat_priv->soft_iface->name))
+ goto nla_put_failure;
+
if (nla_put_u32(msg, BATADV_ATTR_HARD_IFINDEX,
net_dev->ifindex) ||
nla_put_string(msg, BATADV_ATTR_HARD_IFNAME,
@@ -1045,6 +1047,10 @@ static int batadv_netlink_vlan_fill(struct sk_buff *msg,
bat_priv->soft_iface->ifindex))
goto nla_put_failure;
+ if (nla_put_string(msg, BATADV_ATTR_MESH_IFNAME,
+ bat_priv->soft_iface->name))
+ goto nla_put_failure;
+
if (nla_put_u32(msg, BATADV_ATTR_VLANID, vlan->vid & VLAN_VID_MASK))
goto nla_put_failure;
diff --git a/net/batman-adv/network-coding.c b/net/batman-adv/network-coding.c
index 4bb76b434d07..9f06132e007d 100644
--- a/net/batman-adv/network-coding.c
+++ b/net/batman-adv/network-coding.c
@@ -217,6 +217,9 @@ static void batadv_nc_node_release(struct kref *ref)
*/
static void batadv_nc_node_put(struct batadv_nc_node *nc_node)
{
+ if (!nc_node)
+ return;
+
kref_put(&nc_node->refcount, batadv_nc_node_release);
}
@@ -241,6 +244,9 @@ static void batadv_nc_path_release(struct kref *ref)
*/
static void batadv_nc_path_put(struct batadv_nc_path *nc_path)
{
+ if (!nc_path)
+ return;
+
kref_put(&nc_path->refcount, batadv_nc_path_release);
}
@@ -930,10 +936,8 @@ void batadv_nc_update_nc_node(struct batadv_priv *bat_priv,
out_nc_node->last_seen = jiffies;
out:
- if (in_nc_node)
- batadv_nc_node_put(in_nc_node);
- if (out_nc_node)
- batadv_nc_node_put(out_nc_node);
+ batadv_nc_node_put(in_nc_node);
+ batadv_nc_node_put(out_nc_node);
}
/**
@@ -1209,14 +1213,10 @@ static bool batadv_nc_code_packets(struct batadv_priv *bat_priv,
batadv_send_unicast_skb(skb_dest, first_dest);
res = true;
out:
- if (router_neigh)
- batadv_neigh_node_put(router_neigh);
- if (router_coding)
- batadv_neigh_node_put(router_coding);
- if (router_neigh_ifinfo)
- batadv_neigh_ifinfo_put(router_neigh_ifinfo);
- if (router_coding_ifinfo)
- batadv_neigh_ifinfo_put(router_coding_ifinfo);
+ batadv_neigh_node_put(router_neigh);
+ batadv_neigh_node_put(router_coding);
+ batadv_neigh_ifinfo_put(router_neigh_ifinfo);
+ batadv_neigh_ifinfo_put(router_coding_ifinfo);
return res;
}
diff --git a/net/batman-adv/originator.c b/net/batman-adv/originator.c
index da7249448474..aadc653ca1d8 100644
--- a/net/batman-adv/originator.c
+++ b/net/batman-adv/originator.c
@@ -177,7 +177,7 @@ out:
* and queue for free after rcu grace period
* @ref: kref pointer of the originator-vlan object
*/
-static void batadv_orig_node_vlan_release(struct kref *ref)
+void batadv_orig_node_vlan_release(struct kref *ref)
{
struct batadv_orig_node_vlan *orig_vlan;
@@ -187,16 +187,6 @@ static void batadv_orig_node_vlan_release(struct kref *ref)
}
/**
- * batadv_orig_node_vlan_put() - decrement the refcounter and possibly release
- * the originator-vlan object
- * @orig_vlan: the originator-vlan object to release
- */
-void batadv_orig_node_vlan_put(struct batadv_orig_node_vlan *orig_vlan)
-{
- kref_put(&orig_vlan->refcount, batadv_orig_node_vlan_release);
-}
-
-/**
* batadv_originator_init() - Initialize all originator structures
* @bat_priv: the bat priv with all the soft interface information
*
@@ -231,7 +221,7 @@ err:
* free after rcu grace period
* @ref: kref pointer of the neigh_ifinfo
*/
-static void batadv_neigh_ifinfo_release(struct kref *ref)
+void batadv_neigh_ifinfo_release(struct kref *ref)
{
struct batadv_neigh_ifinfo *neigh_ifinfo;
@@ -244,21 +234,11 @@ static void batadv_neigh_ifinfo_release(struct kref *ref)
}
/**
- * batadv_neigh_ifinfo_put() - decrement the refcounter and possibly release
- * the neigh_ifinfo
- * @neigh_ifinfo: the neigh_ifinfo object to release
- */
-void batadv_neigh_ifinfo_put(struct batadv_neigh_ifinfo *neigh_ifinfo)
-{
- kref_put(&neigh_ifinfo->refcount, batadv_neigh_ifinfo_release);
-}
-
-/**
* batadv_hardif_neigh_release() - release hardif neigh node from lists and
* queue for free after rcu grace period
* @ref: kref pointer of the neigh_node
*/
-static void batadv_hardif_neigh_release(struct kref *ref)
+void batadv_hardif_neigh_release(struct kref *ref)
{
struct batadv_hardif_neigh_node *hardif_neigh;
@@ -274,21 +254,11 @@ static void batadv_hardif_neigh_release(struct kref *ref)
}
/**
- * batadv_hardif_neigh_put() - decrement the hardif neighbors refcounter
- * and possibly release it
- * @hardif_neigh: hardif neigh neighbor to free
- */
-void batadv_hardif_neigh_put(struct batadv_hardif_neigh_node *hardif_neigh)
-{
- kref_put(&hardif_neigh->refcount, batadv_hardif_neigh_release);
-}
-
-/**
* batadv_neigh_node_release() - release neigh_node from lists and queue for
* free after rcu grace period
* @ref: kref pointer of the neigh_node
*/
-static void batadv_neigh_node_release(struct kref *ref)
+void batadv_neigh_node_release(struct kref *ref)
{
struct hlist_node *node_tmp;
struct batadv_neigh_node *neigh_node;
@@ -309,16 +279,6 @@ static void batadv_neigh_node_release(struct kref *ref)
}
/**
- * batadv_neigh_node_put() - decrement the neighbors refcounter and possibly
- * release it
- * @neigh_node: neigh neighbor to free
- */
-void batadv_neigh_node_put(struct batadv_neigh_node *neigh_node)
-{
- kref_put(&neigh_node->refcount, batadv_neigh_node_release);
-}
-
-/**
* batadv_orig_router_get() - router to the originator depending on iface
* @orig_node: the orig node for the router
* @if_outgoing: the interface where the payload packet has been received or
@@ -704,8 +664,7 @@ batadv_neigh_node_create(struct batadv_orig_node *orig_node,
out:
spin_unlock_bh(&orig_node->neigh_list_lock);
- if (hardif_neigh)
- batadv_hardif_neigh_put(hardif_neigh);
+ batadv_hardif_neigh_put(hardif_neigh);
return neigh_node;
}
@@ -797,14 +756,10 @@ int batadv_hardif_neigh_dump(struct sk_buff *msg, struct netlink_callback *cb)
ret = msg->len;
out:
- if (hardif)
- batadv_hardif_put(hardif);
- if (hard_iface)
- dev_put(hard_iface);
- if (primary_if)
- batadv_hardif_put(primary_if);
- if (soft_iface)
- dev_put(soft_iface);
+ batadv_hardif_put(hardif);
+ dev_put(hard_iface);
+ batadv_hardif_put(primary_if);
+ dev_put(soft_iface);
return ret;
}
@@ -814,7 +769,7 @@ int batadv_hardif_neigh_dump(struct sk_buff *msg, struct netlink_callback *cb)
* free after rcu grace period
* @ref: kref pointer of the orig_ifinfo
*/
-static void batadv_orig_ifinfo_release(struct kref *ref)
+void batadv_orig_ifinfo_release(struct kref *ref)
{
struct batadv_orig_ifinfo *orig_ifinfo;
struct batadv_neigh_node *router;
@@ -826,23 +781,12 @@ static void batadv_orig_ifinfo_release(struct kref *ref)
/* this is the last reference to this object */
router = rcu_dereference_protected(orig_ifinfo->router, true);
- if (router)
- batadv_neigh_node_put(router);
+ batadv_neigh_node_put(router);
kfree_rcu(orig_ifinfo, rcu);
}
/**
- * batadv_orig_ifinfo_put() - decrement the refcounter and possibly release
- * the orig_ifinfo
- * @orig_ifinfo: the orig_ifinfo object to release
- */
-void batadv_orig_ifinfo_put(struct batadv_orig_ifinfo *orig_ifinfo)
-{
- kref_put(&orig_ifinfo->refcount, batadv_orig_ifinfo_release);
-}
-
-/**
* batadv_orig_node_free_rcu() - free the orig_node
* @rcu: rcu pointer of the orig_node
*/
@@ -865,7 +809,7 @@ static void batadv_orig_node_free_rcu(struct rcu_head *rcu)
* free after rcu grace period
* @ref: kref pointer of the orig_node
*/
-static void batadv_orig_node_release(struct kref *ref)
+void batadv_orig_node_release(struct kref *ref)
{
struct hlist_node *node_tmp;
struct batadv_neigh_node *neigh_node;
@@ -895,8 +839,7 @@ static void batadv_orig_node_release(struct kref *ref)
orig_node->last_bonding_candidate = NULL;
spin_unlock_bh(&orig_node->neigh_list_lock);
- if (last_candidate)
- batadv_orig_ifinfo_put(last_candidate);
+ batadv_orig_ifinfo_put(last_candidate);
spin_lock_bh(&orig_node->vlan_list_lock);
hlist_for_each_entry_safe(vlan, node_tmp, &orig_node->vlan_list, list) {
@@ -912,16 +855,6 @@ static void batadv_orig_node_release(struct kref *ref)
}
/**
- * batadv_orig_node_put() - decrement the orig node refcounter and possibly
- * release it
- * @orig_node: the orig node to free
- */
-void batadv_orig_node_put(struct batadv_orig_node *orig_node)
-{
- kref_put(&orig_node->refcount, batadv_orig_node_release);
-}
-
-/**
* batadv_originator_free() - Free all originator structures
* @bat_priv: the bat priv with all the soft interface information
*/
@@ -1213,8 +1146,7 @@ batadv_find_best_neighbor(struct batadv_priv *bat_priv,
if (!kref_get_unless_zero(&neigh->refcount))
continue;
- if (best)
- batadv_neigh_node_put(best);
+ batadv_neigh_node_put(best);
best = neigh;
}
@@ -1259,8 +1191,7 @@ static bool batadv_purge_orig_node(struct batadv_priv *bat_priv,
BATADV_IF_DEFAULT);
batadv_update_route(bat_priv, orig_node, BATADV_IF_DEFAULT,
best_neigh_node);
- if (best_neigh_node)
- batadv_neigh_node_put(best_neigh_node);
+ batadv_neigh_node_put(best_neigh_node);
/* ... then for all other interfaces. */
rcu_read_lock();
@@ -1279,8 +1210,7 @@ static bool batadv_purge_orig_node(struct batadv_priv *bat_priv,
hard_iface);
batadv_update_route(bat_priv, orig_node, hard_iface,
best_neigh_node);
- if (best_neigh_node)
- batadv_neigh_node_put(best_neigh_node);
+ batadv_neigh_node_put(best_neigh_node);
batadv_hardif_put(hard_iface);
}
@@ -1410,14 +1340,10 @@ int batadv_orig_dump(struct sk_buff *msg, struct netlink_callback *cb)
ret = msg->len;
out:
- if (hardif)
- batadv_hardif_put(hardif);
- if (hard_iface)
- dev_put(hard_iface);
- if (primary_if)
- batadv_hardif_put(primary_if);
- if (soft_iface)
- dev_put(soft_iface);
+ batadv_hardif_put(hardif);
+ dev_put(hard_iface);
+ batadv_hardif_put(primary_if);
+ dev_put(soft_iface);
return ret;
}
diff --git a/net/batman-adv/originator.h b/net/batman-adv/originator.h
index 805be87d55b8..ea3d69e4e670 100644
--- a/net/batman-adv/originator.h
+++ b/net/batman-adv/originator.h
@@ -12,6 +12,7 @@
#include <linux/compiler.h>
#include <linux/if_ether.h>
#include <linux/jhash.h>
+#include <linux/kref.h>
#include <linux/netlink.h>
#include <linux/skbuff.h>
#include <linux/types.h>
@@ -20,19 +21,18 @@ bool batadv_compare_orig(const struct hlist_node *node, const void *data2);
int batadv_originator_init(struct batadv_priv *bat_priv);
void batadv_originator_free(struct batadv_priv *bat_priv);
void batadv_purge_orig_ref(struct batadv_priv *bat_priv);
-void batadv_orig_node_put(struct batadv_orig_node *orig_node);
+void batadv_orig_node_release(struct kref *ref);
struct batadv_orig_node *batadv_orig_node_new(struct batadv_priv *bat_priv,
const u8 *addr);
struct batadv_hardif_neigh_node *
batadv_hardif_neigh_get(const struct batadv_hard_iface *hard_iface,
const u8 *neigh_addr);
-void
-batadv_hardif_neigh_put(struct batadv_hardif_neigh_node *hardif_neigh);
+void batadv_hardif_neigh_release(struct kref *ref);
struct batadv_neigh_node *
batadv_neigh_node_get_or_create(struct batadv_orig_node *orig_node,
struct batadv_hard_iface *hard_iface,
const u8 *neigh_addr);
-void batadv_neigh_node_put(struct batadv_neigh_node *neigh_node);
+void batadv_neigh_node_release(struct kref *ref);
struct batadv_neigh_node *
batadv_orig_router_get(struct batadv_orig_node *orig_node,
const struct batadv_hard_iface *if_outgoing);
@@ -42,7 +42,7 @@ batadv_neigh_ifinfo_new(struct batadv_neigh_node *neigh,
struct batadv_neigh_ifinfo *
batadv_neigh_ifinfo_get(struct batadv_neigh_node *neigh,
struct batadv_hard_iface *if_outgoing);
-void batadv_neigh_ifinfo_put(struct batadv_neigh_ifinfo *neigh_ifinfo);
+void batadv_neigh_ifinfo_release(struct kref *ref);
int batadv_hardif_neigh_dump(struct sk_buff *msg, struct netlink_callback *cb);
@@ -52,7 +52,7 @@ batadv_orig_ifinfo_get(struct batadv_orig_node *orig_node,
struct batadv_orig_ifinfo *
batadv_orig_ifinfo_new(struct batadv_orig_node *orig_node,
struct batadv_hard_iface *if_outgoing);
-void batadv_orig_ifinfo_put(struct batadv_orig_ifinfo *orig_ifinfo);
+void batadv_orig_ifinfo_release(struct kref *ref);
int batadv_orig_dump(struct sk_buff *msg, struct netlink_callback *cb);
struct batadv_orig_node_vlan *
@@ -61,7 +61,7 @@ batadv_orig_node_vlan_new(struct batadv_orig_node *orig_node,
struct batadv_orig_node_vlan *
batadv_orig_node_vlan_get(struct batadv_orig_node *orig_node,
unsigned short vid);
-void batadv_orig_node_vlan_put(struct batadv_orig_node_vlan *orig_vlan);
+void batadv_orig_node_vlan_release(struct kref *ref);
/**
* batadv_choose_orig() - Return the index of the orig entry in the hash table
@@ -82,4 +82,86 @@ static inline u32 batadv_choose_orig(const void *data, u32 size)
struct batadv_orig_node *
batadv_orig_hash_find(struct batadv_priv *bat_priv, const void *data);
+/**
+ * batadv_orig_node_vlan_put() - decrement the refcounter and possibly release
+ * the originator-vlan object
+ * @orig_vlan: the originator-vlan object to release
+ */
+static inline void
+batadv_orig_node_vlan_put(struct batadv_orig_node_vlan *orig_vlan)
+{
+ if (!orig_vlan)
+ return;
+
+ kref_put(&orig_vlan->refcount, batadv_orig_node_vlan_release);
+}
+
+/**
+ * batadv_neigh_ifinfo_put() - decrement the refcounter and possibly release
+ * the neigh_ifinfo
+ * @neigh_ifinfo: the neigh_ifinfo object to release
+ */
+static inline void
+batadv_neigh_ifinfo_put(struct batadv_neigh_ifinfo *neigh_ifinfo)
+{
+ if (!neigh_ifinfo)
+ return;
+
+ kref_put(&neigh_ifinfo->refcount, batadv_neigh_ifinfo_release);
+}
+
+/**
+ * batadv_hardif_neigh_put() - decrement the hardif neighbors refcounter
+ * and possibly release it
+ * @hardif_neigh: hardif neigh neighbor to free
+ */
+static inline void
+batadv_hardif_neigh_put(struct batadv_hardif_neigh_node *hardif_neigh)
+{
+ if (!hardif_neigh)
+ return;
+
+ kref_put(&hardif_neigh->refcount, batadv_hardif_neigh_release);
+}
+
+/**
+ * batadv_neigh_node_put() - decrement the neighbors refcounter and possibly
+ * release it
+ * @neigh_node: neigh neighbor to free
+ */
+static inline void batadv_neigh_node_put(struct batadv_neigh_node *neigh_node)
+{
+ if (!neigh_node)
+ return;
+
+ kref_put(&neigh_node->refcount, batadv_neigh_node_release);
+}
+
+/**
+ * batadv_orig_ifinfo_put() - decrement the refcounter and possibly release
+ * the orig_ifinfo
+ * @orig_ifinfo: the orig_ifinfo object to release
+ */
+static inline void
+batadv_orig_ifinfo_put(struct batadv_orig_ifinfo *orig_ifinfo)
+{
+ if (!orig_ifinfo)
+ return;
+
+ kref_put(&orig_ifinfo->refcount, batadv_orig_ifinfo_release);
+}
+
+/**
+ * batadv_orig_node_put() - decrement the orig node refcounter and possibly
+ * release it
+ * @orig_node: the orig node to free
+ */
+static inline void batadv_orig_node_put(struct batadv_orig_node *orig_node)
+{
+ if (!orig_node)
+ return;
+
+ kref_put(&orig_node->refcount, batadv_orig_node_release);
+}
+
#endif /* _NET_BATMAN_ADV_ORIGINATOR_H_ */
diff --git a/net/batman-adv/routing.c b/net/batman-adv/routing.c
index 40f5cffde6a3..970d0d7ccc98 100644
--- a/net/batman-adv/routing.c
+++ b/net/batman-adv/routing.c
@@ -101,8 +101,7 @@ static void _batadv_update_route(struct batadv_priv *bat_priv,
}
/* decrease refcount of previous best neighbor */
- if (curr_router)
- batadv_neigh_node_put(curr_router);
+ batadv_neigh_node_put(curr_router);
}
/**
@@ -128,8 +127,7 @@ void batadv_update_route(struct batadv_priv *bat_priv,
_batadv_update_route(bat_priv, orig_node, recv_if, neigh_node);
out:
- if (router)
- batadv_neigh_node_put(router);
+ batadv_neigh_node_put(router);
}
/**
@@ -269,10 +267,8 @@ static int batadv_recv_my_icmp_packet(struct batadv_priv *bat_priv,
goto out;
}
out:
- if (primary_if)
- batadv_hardif_put(primary_if);
- if (orig_node)
- batadv_orig_node_put(orig_node);
+ batadv_hardif_put(primary_if);
+ batadv_orig_node_put(orig_node);
kfree_skb(skb);
@@ -324,10 +320,8 @@ static int batadv_recv_icmp_ttl_exceeded(struct batadv_priv *bat_priv,
skb = NULL;
out:
- if (primary_if)
- batadv_hardif_put(primary_if);
- if (orig_node)
- batadv_orig_node_put(orig_node);
+ batadv_hardif_put(primary_if);
+ batadv_orig_node_put(orig_node);
kfree_skb(skb);
@@ -425,8 +419,7 @@ int batadv_recv_icmp_packet(struct sk_buff *skb,
skb = NULL;
put_orig_node:
- if (orig_node)
- batadv_orig_node_put(orig_node);
+ batadv_orig_node_put(orig_node);
free_skb:
kfree_skb(skb);
@@ -513,8 +506,7 @@ batadv_last_bonding_replace(struct batadv_orig_node *orig_node,
orig_node->last_bonding_candidate = new_candidate;
spin_unlock_bh(&orig_node->neigh_list_lock);
- if (old_candidate)
- batadv_orig_ifinfo_put(old_candidate);
+ batadv_orig_ifinfo_put(old_candidate);
}
/**
@@ -656,8 +648,7 @@ next:
batadv_orig_ifinfo_put(next_candidate);
}
- if (last_candidate)
- batadv_orig_ifinfo_put(last_candidate);
+ batadv_orig_ifinfo_put(last_candidate);
return router;
}
@@ -785,10 +776,8 @@ batadv_reroute_unicast_packet(struct batadv_priv *bat_priv, struct sk_buff *skb,
ret = true;
out:
- if (primary_if)
- batadv_hardif_put(primary_if);
- if (orig_node)
- batadv_orig_node_put(orig_node);
+ batadv_hardif_put(primary_if);
+ batadv_orig_node_put(orig_node);
return ret;
}
@@ -1031,8 +1020,7 @@ int batadv_recv_unicast_packet(struct sk_buff *skb,
orig_node);
rx_success:
- if (orig_node)
- batadv_orig_node_put(orig_node);
+ batadv_orig_node_put(orig_node);
return NET_RX_SUCCESS;
}
@@ -1182,9 +1170,9 @@ int batadv_recv_bcast_packet(struct sk_buff *skb,
struct batadv_bcast_packet *bcast_packet;
struct ethhdr *ethhdr;
int hdr_size = sizeof(*bcast_packet);
- int ret = NET_RX_DROP;
s32 seq_diff;
u32 seqno;
+ int ret;
/* drop packet if it has not necessary minimum size */
if (unlikely(!pskb_may_pull(skb, hdr_size)))
@@ -1210,7 +1198,7 @@ int batadv_recv_bcast_packet(struct sk_buff *skb,
if (batadv_is_my_mac(bat_priv, bcast_packet->orig))
goto free_skb;
- if (bcast_packet->ttl < 2)
+ if (bcast_packet->ttl-- < 2)
goto free_skb;
orig_node = batadv_orig_hash_find(bat_priv, bcast_packet->orig);
@@ -1249,7 +1237,9 @@ int batadv_recv_bcast_packet(struct sk_buff *skb,
batadv_skb_set_priority(skb, sizeof(struct batadv_bcast_packet));
/* rebroadcast packet */
- batadv_add_bcast_packet_to_list(bat_priv, skb, 1, false);
+ ret = batadv_forw_bcast_packet(bat_priv, skb, 0, false);
+ if (ret == NETDEV_TX_BUSY)
+ goto free_skb;
/* don't hand the broadcast up if it is from an originator
* from the same backbone.
@@ -1275,8 +1265,8 @@ spin_unlock:
spin_unlock_bh(&orig_node->bcast_seqno_lock);
free_skb:
kfree_skb(skb);
+ ret = NET_RX_DROP;
out:
- if (orig_node)
- batadv_orig_node_put(orig_node);
+ batadv_orig_node_put(orig_node);
return ret;
}
diff --git a/net/batman-adv/send.c b/net/batman-adv/send.c
index 157abe92d827..477d85a3b558 100644
--- a/net/batman-adv/send.c
+++ b/net/batman-adv/send.c
@@ -152,8 +152,7 @@ int batadv_send_unicast_skb(struct sk_buff *skb,
if (hardif_neigh && ret != NET_XMIT_DROP)
hardif_neigh->bat_v.last_unicast_tx = jiffies;
- if (hardif_neigh)
- batadv_hardif_neigh_put(hardif_neigh);
+ batadv_hardif_neigh_put(hardif_neigh);
#endif
return ret;
@@ -309,8 +308,7 @@ bool batadv_send_skb_prepare_unicast_4addr(struct batadv_priv *bat_priv,
ret = true;
out:
- if (primary_if)
- batadv_hardif_put(primary_if);
+ batadv_hardif_put(primary_if);
return ret;
}
@@ -425,8 +423,7 @@ int batadv_send_skb_via_tt_generic(struct batadv_priv *bat_priv,
ret = batadv_send_skb_unicast(bat_priv, skb, packet_type,
packet_subtype, orig_node, vid);
- if (orig_node)
- batadv_orig_node_put(orig_node);
+ batadv_orig_node_put(orig_node);
return ret;
}
@@ -452,8 +449,7 @@ int batadv_send_skb_via_gw(struct batadv_priv *bat_priv, struct sk_buff *skb,
ret = batadv_send_skb_unicast(bat_priv, skb, BATADV_UNICAST_4ADDR,
BATADV_P_DATA, orig_node, vid);
- if (orig_node)
- batadv_orig_node_put(orig_node);
+ batadv_orig_node_put(orig_node);
return ret;
}
@@ -474,10 +470,8 @@ void batadv_forw_packet_free(struct batadv_forw_packet *forw_packet,
else
consume_skb(forw_packet->skb);
- if (forw_packet->if_incoming)
- batadv_hardif_put(forw_packet->if_incoming);
- if (forw_packet->if_outgoing)
- batadv_hardif_put(forw_packet->if_outgoing);
+ batadv_hardif_put(forw_packet->if_incoming);
+ batadv_hardif_put(forw_packet->if_outgoing);
if (forw_packet->queue_left)
atomic_inc(forw_packet->queue_left);
kfree(forw_packet);
@@ -737,57 +731,52 @@ void batadv_forw_packet_ogmv1_queue(struct batadv_priv *bat_priv,
}
/**
- * batadv_add_bcast_packet_to_list() - queue broadcast packet for multiple sends
+ * batadv_forw_bcast_packet_to_list() - queue broadcast packet for transmissions
* @bat_priv: the bat priv with all the soft interface information
* @skb: broadcast packet to add
* @delay: number of jiffies to wait before sending
* @own_packet: true if it is a self-generated broadcast packet
+ * @if_in: the interface where the packet was received on
+ * @if_out: the outgoing interface to queue on
*
- * add a broadcast packet to the queue and setup timers. broadcast packets
+ * Adds a broadcast packet to the queue and sets up timers. Broadcast packets
* are sent multiple times to increase probability for being received.
*
- * The skb is not consumed, so the caller should make sure that the
- * skb is freed.
+ * This call clones the given skb, hence the caller needs to take into
+ * account that the data segment of the original skb might not be
+ * modifiable anymore.
*
* Return: NETDEV_TX_OK on success and NETDEV_TX_BUSY on errors.
*/
-int batadv_add_bcast_packet_to_list(struct batadv_priv *bat_priv,
- const struct sk_buff *skb,
- unsigned long delay,
- bool own_packet)
+static int batadv_forw_bcast_packet_to_list(struct batadv_priv *bat_priv,
+ struct sk_buff *skb,
+ unsigned long delay,
+ bool own_packet,
+ struct batadv_hard_iface *if_in,
+ struct batadv_hard_iface *if_out)
{
- struct batadv_hard_iface *primary_if;
struct batadv_forw_packet *forw_packet;
- struct batadv_bcast_packet *bcast_packet;
+ unsigned long send_time = jiffies;
struct sk_buff *newskb;
- primary_if = batadv_primary_if_get_selected(bat_priv);
- if (!primary_if)
+ newskb = skb_clone(skb, GFP_ATOMIC);
+ if (!newskb)
goto err;
- newskb = skb_copy(skb, GFP_ATOMIC);
- if (!newskb) {
- batadv_hardif_put(primary_if);
- goto err;
- }
-
- forw_packet = batadv_forw_packet_alloc(primary_if, NULL,
+ forw_packet = batadv_forw_packet_alloc(if_in, if_out,
&bat_priv->bcast_queue_left,
bat_priv, newskb);
- batadv_hardif_put(primary_if);
if (!forw_packet)
goto err_packet_free;
- /* as we have a copy now, it is safe to decrease the TTL */
- bcast_packet = (struct batadv_bcast_packet *)newskb->data;
- bcast_packet->ttl--;
-
forw_packet->own = own_packet;
INIT_DELAYED_WORK(&forw_packet->delayed_work,
batadv_send_outstanding_bcast_packet);
- batadv_forw_packet_bcast_queue(bat_priv, forw_packet, jiffies + delay);
+ send_time += delay ? delay : msecs_to_jiffies(5);
+
+ batadv_forw_packet_bcast_queue(bat_priv, forw_packet, send_time);
return NETDEV_TX_OK;
err_packet_free:
@@ -797,9 +786,222 @@ err:
}
/**
+ * batadv_forw_bcast_packet_if() - forward and queue a broadcast packet
+ * @bat_priv: the bat priv with all the soft interface information
+ * @skb: broadcast packet to add
+ * @delay: number of jiffies to wait before sending
+ * @own_packet: true if it is a self-generated broadcast packet
+ * @if_in: the interface where the packet was received on
+ * @if_out: the outgoing interface to forward to
+ *
+ * Transmits a broadcast packet on the specified interface either immediately
+ * or if a delay is given after that. Furthermore, queues additional
+ * retransmissions if this interface is a wireless one.
+ *
+ * This call clones the given skb, hence the caller needs to take into
+ * account that the data segment of the original skb might not be
+ * modifiable anymore.
+ *
+ * Return: NETDEV_TX_OK on success and NETDEV_TX_BUSY on errors.
+ */
+static int batadv_forw_bcast_packet_if(struct batadv_priv *bat_priv,
+ struct sk_buff *skb,
+ unsigned long delay,
+ bool own_packet,
+ struct batadv_hard_iface *if_in,
+ struct batadv_hard_iface *if_out)
+{
+ unsigned int num_bcasts = if_out->num_bcasts;
+ struct sk_buff *newskb;
+ int ret = NETDEV_TX_OK;
+
+ if (!delay) {
+ newskb = skb_clone(skb, GFP_ATOMIC);
+ if (!newskb)
+ return NETDEV_TX_BUSY;
+
+ batadv_send_broadcast_skb(newskb, if_out);
+ num_bcasts--;
+ }
+
+ /* delayed broadcast or rebroadcasts? */
+ if (num_bcasts >= 1) {
+ BATADV_SKB_CB(skb)->num_bcasts = num_bcasts;
+
+ ret = batadv_forw_bcast_packet_to_list(bat_priv, skb, delay,
+ own_packet, if_in,
+ if_out);
+ }
+
+ return ret;
+}
+
+/**
+ * batadv_send_no_broadcast() - check whether (re)broadcast is necessary
+ * @bat_priv: the bat priv with all the soft interface information
+ * @skb: broadcast packet to check
+ * @own_packet: true if it is a self-generated broadcast packet
+ * @if_out: the outgoing interface checked and considered for (re)broadcast
+ *
+ * Return: False if a packet needs to be (re)broadcasted on the given interface,
+ * true otherwise.
+ */
+static bool batadv_send_no_broadcast(struct batadv_priv *bat_priv,
+ struct sk_buff *skb, bool own_packet,
+ struct batadv_hard_iface *if_out)
+{
+ struct batadv_hardif_neigh_node *neigh_node = NULL;
+ struct batadv_bcast_packet *bcast_packet;
+ u8 *orig_neigh;
+ u8 *neigh_addr;
+ char *type;
+ int ret;
+
+ if (!own_packet) {
+ neigh_addr = eth_hdr(skb)->h_source;
+ neigh_node = batadv_hardif_neigh_get(if_out,
+ neigh_addr);
+ }
+
+ bcast_packet = (struct batadv_bcast_packet *)skb->data;
+ orig_neigh = neigh_node ? neigh_node->orig : NULL;
+
+ ret = batadv_hardif_no_broadcast(if_out, bcast_packet->orig,
+ orig_neigh);
+
+ batadv_hardif_neigh_put(neigh_node);
+
+ /* ok, may broadcast */
+ if (!ret)
+ return false;
+
+ /* no broadcast */
+ switch (ret) {
+ case BATADV_HARDIF_BCAST_NORECIPIENT:
+ type = "no neighbor";
+ break;
+ case BATADV_HARDIF_BCAST_DUPFWD:
+ type = "single neighbor is source";
+ break;
+ case BATADV_HARDIF_BCAST_DUPORIG:
+ type = "single neighbor is originator";
+ break;
+ default:
+ type = "unknown";
+ }
+
+ batadv_dbg(BATADV_DBG_BATMAN, bat_priv,
+ "BCAST packet from orig %pM on %s suppressed: %s\n",
+ bcast_packet->orig,
+ if_out->net_dev->name, type);
+
+ return true;
+}
+
+/**
+ * __batadv_forw_bcast_packet() - forward and queue a broadcast packet
+ * @bat_priv: the bat priv with all the soft interface information
+ * @skb: broadcast packet to add
+ * @delay: number of jiffies to wait before sending
+ * @own_packet: true if it is a self-generated broadcast packet
+ *
+ * Transmits a broadcast packet either immediately or if a delay is given
+ * after that. Furthermore, queues additional retransmissions on wireless
+ * interfaces.
+ *
+ * This call clones the given skb, hence the caller needs to take into
+ * account that the data segment of the given skb might not be
+ * modifiable anymore.
+ *
+ * Return: NETDEV_TX_OK on success and NETDEV_TX_BUSY on errors.
+ */
+static int __batadv_forw_bcast_packet(struct batadv_priv *bat_priv,
+ struct sk_buff *skb,
+ unsigned long delay,
+ bool own_packet)
+{
+ struct batadv_hard_iface *hard_iface;
+ struct batadv_hard_iface *primary_if;
+ int ret = NETDEV_TX_OK;
+
+ primary_if = batadv_primary_if_get_selected(bat_priv);
+ if (!primary_if)
+ return NETDEV_TX_BUSY;
+
+ rcu_read_lock();
+ list_for_each_entry_rcu(hard_iface, &batadv_hardif_list, list) {
+ if (hard_iface->soft_iface != bat_priv->soft_iface)
+ continue;
+
+ if (!kref_get_unless_zero(&hard_iface->refcount))
+ continue;
+
+ if (batadv_send_no_broadcast(bat_priv, skb, own_packet,
+ hard_iface)) {
+ batadv_hardif_put(hard_iface);
+ continue;
+ }
+
+ ret = batadv_forw_bcast_packet_if(bat_priv, skb, delay,
+ own_packet, primary_if,
+ hard_iface);
+ batadv_hardif_put(hard_iface);
+
+ if (ret == NETDEV_TX_BUSY)
+ break;
+ }
+ rcu_read_unlock();
+
+ batadv_hardif_put(primary_if);
+ return ret;
+}
+
+/**
+ * batadv_forw_bcast_packet() - forward and queue a broadcast packet
+ * @bat_priv: the bat priv with all the soft interface information
+ * @skb: broadcast packet to add
+ * @delay: number of jiffies to wait before sending
+ * @own_packet: true if it is a self-generated broadcast packet
+ *
+ * Transmits a broadcast packet either immediately or if a delay is given
+ * after that. Furthermore, queues additional retransmissions on wireless
+ * interfaces.
+ *
+ * Return: NETDEV_TX_OK on success and NETDEV_TX_BUSY on errors.
+ */
+int batadv_forw_bcast_packet(struct batadv_priv *bat_priv,
+ struct sk_buff *skb,
+ unsigned long delay,
+ bool own_packet)
+{
+ return __batadv_forw_bcast_packet(bat_priv, skb, delay, own_packet);
+}
+
+/**
+ * batadv_send_bcast_packet() - send and queue a broadcast packet
+ * @bat_priv: the bat priv with all the soft interface information
+ * @skb: broadcast packet to add
+ * @delay: number of jiffies to wait before sending
+ * @own_packet: true if it is a self-generated broadcast packet
+ *
+ * Transmits a broadcast packet either immediately or if a delay is given
+ * after that. Furthermore, queues additional retransmissions on wireless
+ * interfaces.
+ *
+ * Consumes the provided skb.
+ */
+void batadv_send_bcast_packet(struct batadv_priv *bat_priv,
+ struct sk_buff *skb,
+ unsigned long delay,
+ bool own_packet)
+{
+ __batadv_forw_bcast_packet(bat_priv, skb, delay, own_packet);
+ consume_skb(skb);
+}
+
+/**
* batadv_forw_packet_bcasts_left() - check if a retransmission is necessary
* @forw_packet: the forwarding packet to check
- * @hard_iface: the interface to check on
*
* Checks whether a given packet has any (re)transmissions left on the provided
* interface.
@@ -811,28 +1013,20 @@ err:
* Return: True if (re)transmissions are left, false otherwise.
*/
static bool
-batadv_forw_packet_bcasts_left(struct batadv_forw_packet *forw_packet,
- struct batadv_hard_iface *hard_iface)
+batadv_forw_packet_bcasts_left(struct batadv_forw_packet *forw_packet)
{
- unsigned int max;
-
- if (hard_iface)
- max = hard_iface->num_bcasts;
- else
- max = BATADV_NUM_BCASTS_MAX;
-
- return BATADV_SKB_CB(forw_packet->skb)->num_bcasts < max;
+ return BATADV_SKB_CB(forw_packet->skb)->num_bcasts;
}
/**
- * batadv_forw_packet_bcasts_inc() - increment retransmission counter of a
+ * batadv_forw_packet_bcasts_dec() - decrement retransmission counter of a
* packet
- * @forw_packet: the packet to increase the counter for
+ * @forw_packet: the packet to decrease the counter for
*/
static void
-batadv_forw_packet_bcasts_inc(struct batadv_forw_packet *forw_packet)
+batadv_forw_packet_bcasts_dec(struct batadv_forw_packet *forw_packet)
{
- BATADV_SKB_CB(forw_packet->skb)->num_bcasts++;
+ BATADV_SKB_CB(forw_packet->skb)->num_bcasts--;
}
/**
@@ -843,30 +1037,30 @@ batadv_forw_packet_bcasts_inc(struct batadv_forw_packet *forw_packet)
*/
bool batadv_forw_packet_is_rebroadcast(struct batadv_forw_packet *forw_packet)
{
- return BATADV_SKB_CB(forw_packet->skb)->num_bcasts > 0;
+ unsigned char num_bcasts = BATADV_SKB_CB(forw_packet->skb)->num_bcasts;
+
+ return num_bcasts != forw_packet->if_outgoing->num_bcasts;
}
+/**
+ * batadv_send_outstanding_bcast_packet() - transmit a queued broadcast packet
+ * @work: work queue item
+ *
+ * Transmits a queued broadcast packet and if necessary reschedules it.
+ */
static void batadv_send_outstanding_bcast_packet(struct work_struct *work)
{
- struct batadv_hard_iface *hard_iface;
- struct batadv_hardif_neigh_node *neigh_node;
- struct delayed_work *delayed_work;
+ unsigned long send_time = jiffies + msecs_to_jiffies(5);
struct batadv_forw_packet *forw_packet;
- struct batadv_bcast_packet *bcast_packet;
- struct sk_buff *skb1;
- struct net_device *soft_iface;
+ struct delayed_work *delayed_work;
struct batadv_priv *bat_priv;
- unsigned long send_time = jiffies + msecs_to_jiffies(5);
+ struct sk_buff *skb1;
bool dropped = false;
- u8 *neigh_addr;
- u8 *orig_neigh;
- int ret = 0;
delayed_work = to_delayed_work(work);
forw_packet = container_of(delayed_work, struct batadv_forw_packet,
delayed_work);
- soft_iface = forw_packet->if_incoming->soft_iface;
- bat_priv = netdev_priv(soft_iface);
+ bat_priv = netdev_priv(forw_packet->if_incoming->soft_iface);
if (atomic_read(&bat_priv->mesh_state) == BATADV_MESH_DEACTIVATING) {
dropped = true;
@@ -878,76 +1072,15 @@ static void batadv_send_outstanding_bcast_packet(struct work_struct *work)
goto out;
}
- bcast_packet = (struct batadv_bcast_packet *)forw_packet->skb->data;
-
- /* rebroadcast packet */
- rcu_read_lock();
- list_for_each_entry_rcu(hard_iface, &batadv_hardif_list, list) {
- if (hard_iface->soft_iface != soft_iface)
- continue;
-
- if (!batadv_forw_packet_bcasts_left(forw_packet, hard_iface))
- continue;
-
- if (forw_packet->own) {
- neigh_node = NULL;
- } else {
- neigh_addr = eth_hdr(forw_packet->skb)->h_source;
- neigh_node = batadv_hardif_neigh_get(hard_iface,
- neigh_addr);
- }
-
- orig_neigh = neigh_node ? neigh_node->orig : NULL;
-
- ret = batadv_hardif_no_broadcast(hard_iface, bcast_packet->orig,
- orig_neigh);
-
- if (ret) {
- char *type;
-
- switch (ret) {
- case BATADV_HARDIF_BCAST_NORECIPIENT:
- type = "no neighbor";
- break;
- case BATADV_HARDIF_BCAST_DUPFWD:
- type = "single neighbor is source";
- break;
- case BATADV_HARDIF_BCAST_DUPORIG:
- type = "single neighbor is originator";
- break;
- default:
- type = "unknown";
- }
-
- batadv_dbg(BATADV_DBG_BATMAN, bat_priv, "BCAST packet from orig %pM on %s suppressed: %s\n",
- bcast_packet->orig,
- hard_iface->net_dev->name, type);
-
- if (neigh_node)
- batadv_hardif_neigh_put(neigh_node);
-
- continue;
- }
-
- if (neigh_node)
- batadv_hardif_neigh_put(neigh_node);
-
- if (!kref_get_unless_zero(&hard_iface->refcount))
- continue;
-
- /* send a copy of the saved skb */
- skb1 = skb_clone(forw_packet->skb, GFP_ATOMIC);
- if (skb1)
- batadv_send_broadcast_skb(skb1, hard_iface);
-
- batadv_hardif_put(hard_iface);
- }
- rcu_read_unlock();
+ /* send a copy of the saved skb */
+ skb1 = skb_clone(forw_packet->skb, GFP_ATOMIC);
+ if (!skb1)
+ goto out;
- batadv_forw_packet_bcasts_inc(forw_packet);
+ batadv_send_broadcast_skb(skb1, forw_packet->if_outgoing);
+ batadv_forw_packet_bcasts_dec(forw_packet);
- /* if we still have some more bcasts to send */
- if (batadv_forw_packet_bcasts_left(forw_packet, NULL)) {
+ if (batadv_forw_packet_bcasts_left(forw_packet)) {
batadv_forw_packet_bcast_queue(bat_priv, forw_packet,
send_time);
return;
diff --git a/net/batman-adv/send.h b/net/batman-adv/send.h
index 2b0daf8b2bc4..08af251b765c 100644
--- a/net/batman-adv/send.h
+++ b/net/batman-adv/send.h
@@ -39,10 +39,14 @@ int batadv_send_broadcast_skb(struct sk_buff *skb,
struct batadv_hard_iface *hard_iface);
int batadv_send_unicast_skb(struct sk_buff *skb,
struct batadv_neigh_node *neigh_node);
-int batadv_add_bcast_packet_to_list(struct batadv_priv *bat_priv,
- const struct sk_buff *skb,
- unsigned long delay,
- bool own_packet);
+int batadv_forw_bcast_packet(struct batadv_priv *bat_priv,
+ struct sk_buff *skb,
+ unsigned long delay,
+ bool own_packet);
+void batadv_send_bcast_packet(struct batadv_priv *bat_priv,
+ struct sk_buff *skb,
+ unsigned long delay,
+ bool own_packet);
void
batadv_purge_outstanding_packets(struct batadv_priv *bat_priv,
const struct batadv_hard_iface *hard_iface);
diff --git a/net/batman-adv/soft-interface.c b/net/batman-adv/soft-interface.c
index 6b8181bc3122..0604b0279573 100644
--- a/net/batman-adv/soft-interface.c
+++ b/net/batman-adv/soft-interface.c
@@ -26,7 +26,6 @@
#include <linux/netdevice.h>
#include <linux/netlink.h>
#include <linux/percpu.h>
-#include <linux/printk.h>
#include <linux/random.h>
#include <linux/rculist.h>
#include <linux/rcupdate.h>
@@ -37,6 +36,7 @@
#include <linux/stddef.h>
#include <linux/string.h>
#include <linux/types.h>
+#include <net/net_namespace.h>
#include <net/netlink.h>
#include <uapi/linux/batadv_packet.h>
#include <uapi/linux/batman_adv.h>
@@ -191,7 +191,7 @@ static netdev_tx_t batadv_interface_tx(struct sk_buff *skb,
struct vlan_ethhdr *vhdr;
unsigned int header_len = 0;
int data_len = skb->len, ret;
- unsigned long brd_delay = 1;
+ unsigned long brd_delay = 0;
bool do_bcast = false, client_added;
unsigned short vid;
u32 seqno;
@@ -330,7 +330,7 @@ send:
bcast_packet = (struct batadv_bcast_packet *)skb->data;
bcast_packet->version = BATADV_COMPAT_VERSION;
- bcast_packet->ttl = BATADV_TTL;
+ bcast_packet->ttl = BATADV_TTL - 1;
/* batman packet type: broadcast */
bcast_packet->packet_type = BATADV_BCAST;
@@ -346,13 +346,7 @@ send:
seqno = atomic_inc_return(&bat_priv->bcast_seqno);
bcast_packet->seqno = htonl(seqno);
- batadv_add_bcast_packet_to_list(bat_priv, skb, brd_delay, true);
-
- /* a copy is stored in the bcast list, therefore removing
- * the original skb.
- */
- consume_skb(skb);
-
+ batadv_send_bcast_packet(bat_priv, skb, brd_delay, true);
/* unicast packet */
} else {
/* DHCP packets going to a server will use the GW feature */
@@ -389,10 +383,8 @@ dropped:
dropped_freed:
batadv_inc_counter(bat_priv, BATADV_CNT_TX_DROPPED);
end:
- if (mcast_single_orig)
- batadv_orig_node_put(mcast_single_orig);
- if (primary_if)
- batadv_hardif_put(primary_if);
+ batadv_orig_node_put(mcast_single_orig);
+ batadv_hardif_put(primary_if);
return NETDEV_TX_OK;
}
@@ -507,7 +499,7 @@ out:
* after rcu grace period
* @ref: kref pointer of the vlan object
*/
-static void batadv_softif_vlan_release(struct kref *ref)
+void batadv_softif_vlan_release(struct kref *ref)
{
struct batadv_softif_vlan *vlan;
@@ -521,19 +513,6 @@ static void batadv_softif_vlan_release(struct kref *ref)
}
/**
- * batadv_softif_vlan_put() - decrease the vlan object refcounter and
- * possibly release it
- * @vlan: the vlan object to release
- */
-void batadv_softif_vlan_put(struct batadv_softif_vlan *vlan)
-{
- if (!vlan)
- return;
-
- kref_put(&vlan->refcount, batadv_softif_vlan_release);
-}
-
-/**
* batadv_softif_vlan_get() - get the vlan object for a specific vid
* @bat_priv: the bat priv with all the soft interface information
* @vid: the identifier of the vlan object to retrieve
@@ -848,18 +827,16 @@ static int batadv_softif_slave_add(struct net_device *dev,
struct netlink_ext_ack *extack)
{
struct batadv_hard_iface *hard_iface;
- struct net *net = dev_net(dev);
int ret = -EINVAL;
hard_iface = batadv_hardif_get_by_netdev(slave_dev);
if (!hard_iface || hard_iface->soft_iface)
goto out;
- ret = batadv_hardif_enable_interface(hard_iface, net, dev->name);
+ ret = batadv_hardif_enable_interface(hard_iface, dev);
out:
- if (hard_iface)
- batadv_hardif_put(hard_iface);
+ batadv_hardif_put(hard_iface);
return ret;
}
@@ -885,8 +862,7 @@ static int batadv_softif_slave_del(struct net_device *dev,
ret = 0;
out:
- if (hard_iface)
- batadv_hardif_put(hard_iface);
+ batadv_hardif_put(hard_iface);
return ret;
}
@@ -1093,38 +1069,6 @@ static int batadv_softif_newlink(struct net *src_net, struct net_device *dev,
}
/**
- * batadv_softif_create() - Create and register soft interface
- * @net: the applicable net namespace
- * @name: name of the new soft interface
- *
- * Return: newly allocated soft_interface, NULL on errors
- */
-struct net_device *batadv_softif_create(struct net *net, const char *name)
-{
- struct net_device *soft_iface;
- int ret;
-
- soft_iface = alloc_netdev(sizeof(struct batadv_priv), name,
- NET_NAME_UNKNOWN, batadv_softif_init_early);
- if (!soft_iface)
- return NULL;
-
- dev_net_set(soft_iface, net);
-
- soft_iface->rtnl_link_ops = &batadv_link_ops;
-
- ret = register_netdevice(soft_iface);
- if (ret < 0) {
- pr_err("Unable to register the batman interface '%s': %i\n",
- name, ret);
- free_netdev(soft_iface);
- return NULL;
- }
-
- return soft_iface;
-}
-
-/**
* batadv_softif_destroy_netlink() - deletion of batadv_soft_interface via
* netlink
* @soft_iface: the to-be-removed batman-adv interface
diff --git a/net/batman-adv/soft-interface.h b/net/batman-adv/soft-interface.h
index 38b0ad182584..9f2003f1a497 100644
--- a/net/batman-adv/soft-interface.h
+++ b/net/batman-adv/soft-interface.h
@@ -9,22 +9,34 @@
#include "main.h"
+#include <linux/kref.h>
#include <linux/netdevice.h>
#include <linux/skbuff.h>
#include <linux/types.h>
-#include <net/net_namespace.h>
#include <net/rtnetlink.h>
int batadv_skb_head_push(struct sk_buff *skb, unsigned int len);
void batadv_interface_rx(struct net_device *soft_iface,
struct sk_buff *skb, int hdr_size,
struct batadv_orig_node *orig_node);
-struct net_device *batadv_softif_create(struct net *net, const char *name);
bool batadv_softif_is_valid(const struct net_device *net_dev);
extern struct rtnl_link_ops batadv_link_ops;
int batadv_softif_create_vlan(struct batadv_priv *bat_priv, unsigned short vid);
-void batadv_softif_vlan_put(struct batadv_softif_vlan *softif_vlan);
+void batadv_softif_vlan_release(struct kref *ref);
struct batadv_softif_vlan *batadv_softif_vlan_get(struct batadv_priv *bat_priv,
unsigned short vid);
+/**
+ * batadv_softif_vlan_put() - decrease the vlan object refcounter and
+ * possibly release it
+ * @vlan: the vlan object to release
+ */
+static inline void batadv_softif_vlan_put(struct batadv_softif_vlan *vlan)
+{
+ if (!vlan)
+ return;
+
+ kref_put(&vlan->refcount, batadv_softif_vlan_release);
+}
+
#endif /* _NET_BATMAN_ADV_SOFT_INTERFACE_H_ */
diff --git a/net/batman-adv/tp_meter.c b/net/batman-adv/tp_meter.c
index 789c851732b7..56b9fe97b3b4 100644
--- a/net/batman-adv/tp_meter.c
+++ b/net/batman-adv/tp_meter.c
@@ -358,6 +358,9 @@ static void batadv_tp_vars_release(struct kref *ref)
*/
static void batadv_tp_vars_put(struct batadv_tp_vars *tp_vars)
{
+ if (!tp_vars)
+ return;
+
kref_put(&tp_vars->refcount, batadv_tp_vars_release);
}
@@ -748,12 +751,9 @@ move_twnd:
wake_up(&tp_vars->more_bytes);
out:
- if (likely(primary_if))
- batadv_hardif_put(primary_if);
- if (likely(orig_node))
- batadv_orig_node_put(orig_node);
- if (likely(tp_vars))
- batadv_tp_vars_put(tp_vars);
+ batadv_hardif_put(primary_if);
+ batadv_orig_node_put(orig_node);
+ batadv_tp_vars_put(tp_vars);
}
/**
@@ -882,10 +882,8 @@ static int batadv_tp_send(void *arg)
}
out:
- if (likely(primary_if))
- batadv_hardif_put(primary_if);
- if (likely(orig_node))
- batadv_orig_node_put(orig_node);
+ batadv_hardif_put(primary_if);
+ batadv_orig_node_put(orig_node);
batadv_tp_sender_end(bat_priv, tp_vars);
batadv_tp_sender_cleanup(bat_priv, tp_vars);
@@ -1205,10 +1203,8 @@ static int batadv_tp_send_ack(struct batadv_priv *bat_priv, const u8 *dst,
ret = 0;
out:
- if (likely(orig_node))
- batadv_orig_node_put(orig_node);
- if (likely(primary_if))
- batadv_hardif_put(primary_if);
+ batadv_orig_node_put(orig_node);
+ batadv_hardif_put(primary_if);
return ret;
}
@@ -1456,8 +1452,7 @@ send_ack:
batadv_tp_send_ack(bat_priv, icmp->orig, tp_vars->last_recv,
icmp->timestamp, icmp->session, icmp->uid);
out:
- if (likely(tp_vars))
- batadv_tp_vars_put(tp_vars);
+ batadv_tp_vars_put(tp_vars);
}
/**
diff --git a/net/batman-adv/translation-table.c b/net/batman-adv/translation-table.c
index 434b4f042909..e0b3dace2020 100644
--- a/net/batman-adv/translation-table.c
+++ b/net/batman-adv/translation-table.c
@@ -247,6 +247,9 @@ static void batadv_tt_local_entry_release(struct kref *ref)
static void
batadv_tt_local_entry_put(struct batadv_tt_local_entry *tt_local_entry)
{
+ if (!tt_local_entry)
+ return;
+
kref_put(&tt_local_entry->common.refcount,
batadv_tt_local_entry_release);
}
@@ -270,7 +273,7 @@ static void batadv_tt_global_entry_free_rcu(struct rcu_head *rcu)
* queue for free after rcu grace period
* @ref: kref pointer of the nc_node
*/
-static void batadv_tt_global_entry_release(struct kref *ref)
+void batadv_tt_global_entry_release(struct kref *ref)
{
struct batadv_tt_global_entry *tt_global_entry;
@@ -283,17 +286,6 @@ static void batadv_tt_global_entry_release(struct kref *ref)
}
/**
- * batadv_tt_global_entry_put() - decrement the tt_global_entry refcounter and
- * possibly release it
- * @tt_global_entry: tt_global_entry to be free'd
- */
-void batadv_tt_global_entry_put(struct batadv_tt_global_entry *tt_global_entry)
-{
- kref_put(&tt_global_entry->common.refcount,
- batadv_tt_global_entry_release);
-}
-
-/**
* batadv_tt_global_hash_count() - count the number of orig entries
* @bat_priv: the bat priv with all the soft interface information
* @addr: the mac address of the client to count entries for
@@ -452,6 +444,9 @@ static void batadv_tt_orig_list_entry_release(struct kref *ref)
static void
batadv_tt_orig_list_entry_put(struct batadv_tt_orig_list_entry *orig_entry)
{
+ if (!orig_entry)
+ return;
+
kref_put(&orig_entry->refcount, batadv_tt_orig_list_entry_release);
}
@@ -818,14 +813,10 @@ check_roaming:
ret = true;
out:
- if (in_hardif)
- batadv_hardif_put(in_hardif);
- if (in_dev)
- dev_put(in_dev);
- if (tt_local)
- batadv_tt_local_entry_put(tt_local);
- if (tt_global)
- batadv_tt_global_entry_put(tt_global);
+ batadv_hardif_put(in_hardif);
+ dev_put(in_dev);
+ batadv_tt_local_entry_put(tt_local);
+ batadv_tt_global_entry_put(tt_global);
return ret;
}
@@ -1215,10 +1206,8 @@ int batadv_tt_local_dump(struct sk_buff *msg, struct netlink_callback *cb)
ret = msg->len;
out:
- if (primary_if)
- batadv_hardif_put(primary_if);
- if (soft_iface)
- dev_put(soft_iface);
+ batadv_hardif_put(primary_if);
+ dev_put(soft_iface);
cb->args[0] = bucket;
cb->args[1] = idx;
@@ -1305,8 +1294,7 @@ u16 batadv_tt_local_remove(struct batadv_priv *bat_priv, const u8 *addr,
batadv_tt_local_entry_put(tt_removed_entry);
out:
- if (tt_local_entry)
- batadv_tt_local_entry_put(tt_local_entry);
+ batadv_tt_local_entry_put(tt_local_entry);
return curr_flags;
}
@@ -1576,8 +1564,7 @@ batadv_tt_global_orig_entry_add(struct batadv_tt_global_entry *tt_global,
sync_flags:
batadv_tt_global_sync_flags(tt_global);
out:
- if (orig_entry)
- batadv_tt_orig_list_entry_put(orig_entry);
+ batadv_tt_orig_list_entry_put(orig_entry);
spin_unlock_bh(&tt_global->list_lock);
}
@@ -1750,10 +1737,8 @@ out_remove:
tt_global_entry->common.flags &= ~BATADV_TT_CLIENT_ROAM;
out:
- if (tt_global_entry)
- batadv_tt_global_entry_put(tt_global_entry);
- if (tt_local_entry)
- batadv_tt_local_entry_put(tt_local_entry);
+ batadv_tt_global_entry_put(tt_global_entry);
+ batadv_tt_local_entry_put(tt_local_entry);
return ret;
}
@@ -1789,15 +1774,13 @@ batadv_transtable_best_orig(struct batadv_priv *bat_priv,
}
/* release the refcount for the "old" best */
- if (best_router)
- batadv_neigh_node_put(best_router);
+ batadv_neigh_node_put(best_router);
best_entry = orig_entry;
best_router = router;
}
- if (best_router)
- batadv_neigh_node_put(best_router);
+ batadv_neigh_node_put(best_router);
return best_entry;
}
@@ -2003,10 +1986,8 @@ int batadv_tt_global_dump(struct sk_buff *msg, struct netlink_callback *cb)
ret = msg->len;
out:
- if (primary_if)
- batadv_hardif_put(primary_if);
- if (soft_iface)
- dev_put(soft_iface);
+ batadv_hardif_put(primary_if);
+ dev_put(soft_iface);
cb->args[0] = bucket;
cb->args[1] = idx;
@@ -2196,10 +2177,8 @@ static void batadv_tt_global_del(struct batadv_priv *bat_priv,
}
out:
- if (tt_global_entry)
- batadv_tt_global_entry_put(tt_global_entry);
- if (local_entry)
- batadv_tt_local_entry_put(local_entry);
+ batadv_tt_global_entry_put(tt_global_entry);
+ batadv_tt_local_entry_put(local_entry);
}
/**
@@ -2426,10 +2405,8 @@ struct batadv_orig_node *batadv_transtable_search(struct batadv_priv *bat_priv,
rcu_read_unlock();
out:
- if (tt_global_entry)
- batadv_tt_global_entry_put(tt_global_entry);
- if (tt_local_entry)
- batadv_tt_local_entry_put(tt_local_entry);
+ batadv_tt_global_entry_put(tt_global_entry);
+ batadv_tt_local_entry_put(tt_local_entry);
return orig_node;
}
@@ -2606,6 +2583,9 @@ static void batadv_tt_req_node_release(struct kref *ref)
*/
static void batadv_tt_req_node_put(struct batadv_tt_req_node *tt_req_node)
{
+ if (!tt_req_node)
+ return;
+
kref_put(&tt_req_node->refcount, batadv_tt_req_node_release);
}
@@ -2987,8 +2967,7 @@ static bool batadv_send_tt_request(struct batadv_priv *bat_priv,
ret = true;
out:
- if (primary_if)
- batadv_hardif_put(primary_if);
+ batadv_hardif_put(primary_if);
if (ret && tt_req_node) {
spin_lock_bh(&bat_priv->tt.req_list_lock);
@@ -2999,8 +2978,7 @@ out:
spin_unlock_bh(&bat_priv->tt.req_list_lock);
}
- if (tt_req_node)
- batadv_tt_req_node_put(tt_req_node);
+ batadv_tt_req_node_put(tt_req_node);
kfree(tvlv_tt_data);
return ret;
@@ -3131,10 +3109,8 @@ unlock:
spin_unlock_bh(&req_dst_orig_node->tt_buff_lock);
out:
- if (res_dst_orig_node)
- batadv_orig_node_put(res_dst_orig_node);
- if (req_dst_orig_node)
- batadv_orig_node_put(req_dst_orig_node);
+ batadv_orig_node_put(res_dst_orig_node);
+ batadv_orig_node_put(req_dst_orig_node);
kfree(tvlv_tt_data);
return ret;
}
@@ -3248,10 +3224,8 @@ unlock:
spin_unlock_bh(&bat_priv->tt.last_changeset_lock);
out:
spin_unlock_bh(&bat_priv->tt.commit_lock);
- if (orig_node)
- batadv_orig_node_put(orig_node);
- if (primary_if)
- batadv_hardif_put(primary_if);
+ batadv_orig_node_put(orig_node);
+ batadv_hardif_put(primary_if);
kfree(tvlv_tt_data);
/* The packet was for this host, so it doesn't need to be re-routed */
return true;
@@ -3336,8 +3310,7 @@ static void batadv_tt_fill_gtable(struct batadv_priv *bat_priv,
atomic_set(&orig_node->last_ttvn, ttvn);
out:
- if (orig_node)
- batadv_orig_node_put(orig_node);
+ batadv_orig_node_put(orig_node);
}
static void batadv_tt_update_changes(struct batadv_priv *bat_priv,
@@ -3378,8 +3351,7 @@ bool batadv_is_my_client(struct batadv_priv *bat_priv, const u8 *addr,
goto out;
ret = true;
out:
- if (tt_local_entry)
- batadv_tt_local_entry_put(tt_local_entry);
+ batadv_tt_local_entry_put(tt_local_entry);
return ret;
}
@@ -3442,8 +3414,7 @@ static void batadv_handle_tt_response(struct batadv_priv *bat_priv,
spin_unlock_bh(&bat_priv->tt.req_list_lock);
out:
- if (orig_node)
- batadv_orig_node_put(orig_node);
+ batadv_orig_node_put(orig_node);
}
static void batadv_tt_roam_list_free(struct batadv_priv *bat_priv)
@@ -3574,8 +3545,7 @@ static void batadv_send_roam_adv(struct batadv_priv *bat_priv, u8 *client,
&tvlv_roam, sizeof(tvlv_roam));
out:
- if (primary_if)
- batadv_hardif_put(primary_if);
+ batadv_hardif_put(primary_if);
}
static void batadv_tt_purge(struct work_struct *work)
@@ -4170,8 +4140,7 @@ static int batadv_roam_tvlv_unicast_handler_v1(struct batadv_priv *bat_priv,
atomic_read(&orig_node->last_ttvn) + 1);
out:
- if (orig_node)
- batadv_orig_node_put(orig_node);
+ batadv_orig_node_put(orig_node);
return NET_RX_SUCCESS;
}
diff --git a/net/batman-adv/translation-table.h b/net/batman-adv/translation-table.h
index e1285904f885..d18740d9a22b 100644
--- a/net/batman-adv/translation-table.h
+++ b/net/batman-adv/translation-table.h
@@ -9,6 +9,7 @@
#include "main.h"
+#include <linux/kref.h>
#include <linux/netdevice.h>
#include <linux/netlink.h>
#include <linux/skbuff.h>
@@ -28,7 +29,7 @@ void batadv_tt_global_del_orig(struct batadv_priv *bat_priv,
struct batadv_tt_global_entry *
batadv_tt_global_hash_find(struct batadv_priv *bat_priv, const u8 *addr,
unsigned short vid);
-void batadv_tt_global_entry_put(struct batadv_tt_global_entry *tt_global_entry);
+void batadv_tt_global_entry_release(struct kref *ref);
int batadv_tt_global_hash_count(struct batadv_priv *bat_priv,
const u8 *addr, unsigned short vid);
struct batadv_orig_node *batadv_transtable_search(struct batadv_priv *bat_priv,
@@ -55,4 +56,19 @@ bool batadv_tt_global_is_isolated(struct batadv_priv *bat_priv,
int batadv_tt_cache_init(void);
void batadv_tt_cache_destroy(void);
+/**
+ * batadv_tt_global_entry_put() - decrement the tt_global_entry refcounter and
+ * possibly release it
+ * @tt_global_entry: tt_global_entry to be free'd
+ */
+static inline void
+batadv_tt_global_entry_put(struct batadv_tt_global_entry *tt_global_entry)
+{
+ if (!tt_global_entry)
+ return;
+
+ kref_put(&tt_global_entry->common.refcount,
+ batadv_tt_global_entry_release);
+}
+
#endif /* _NET_BATMAN_ADV_TRANSLATION_TABLE_H_ */
diff --git a/net/batman-adv/tvlv.c b/net/batman-adv/tvlv.c
index 253f5a33a914..992773376e51 100644
--- a/net/batman-adv/tvlv.c
+++ b/net/batman-adv/tvlv.c
@@ -50,6 +50,9 @@ static void batadv_tvlv_handler_release(struct kref *ref)
*/
static void batadv_tvlv_handler_put(struct batadv_tvlv_handler *tvlv_handler)
{
+ if (!tvlv_handler)
+ return;
+
kref_put(&tvlv_handler->refcount, batadv_tvlv_handler_release);
}
@@ -106,6 +109,9 @@ static void batadv_tvlv_container_release(struct kref *ref)
*/
static void batadv_tvlv_container_put(struct batadv_tvlv_container *tvlv)
{
+ if (!tvlv)
+ return;
+
kref_put(&tvlv->refcount, batadv_tvlv_container_release);
}
@@ -438,8 +444,7 @@ int batadv_tvlv_containers_process(struct batadv_priv *bat_priv,
ogm_source, orig_node,
src, dst, tvlv_value,
tvlv_value_cont_len);
- if (tvlv_handler)
- batadv_tvlv_handler_put(tvlv_handler);
+ batadv_tvlv_handler_put(tvlv_handler);
tvlv_value = (u8 *)tvlv_value + tvlv_value_cont_len;
tvlv_value_len -= tvlv_value_cont_len;
}
diff --git a/net/bluetooth/6lowpan.c b/net/bluetooth/6lowpan.c
index 97617d02c8f9..fd164a248569 100644
--- a/net/bluetooth/6lowpan.c
+++ b/net/bluetooth/6lowpan.c
@@ -103,34 +103,6 @@ static inline bool peer_del(struct lowpan_btle_dev *dev,
return false;
}
-static inline struct lowpan_peer *peer_lookup_ba(struct lowpan_btle_dev *dev,
- bdaddr_t *ba, __u8 type)
-{
- struct lowpan_peer *peer;
-
- BT_DBG("peers %d addr %pMR type %d", atomic_read(&dev->peer_count),
- ba, type);
-
- rcu_read_lock();
-
- list_for_each_entry_rcu(peer, &dev->peers, list) {
- BT_DBG("dst addr %pMR dst type %d",
- &peer->chan->dst, peer->chan->dst_type);
-
- if (bacmp(&peer->chan->dst, ba))
- continue;
-
- if (type == peer->chan->dst_type) {
- rcu_read_unlock();
- return peer;
- }
- }
-
- rcu_read_unlock();
-
- return NULL;
-}
-
static inline struct lowpan_peer *
__peer_lookup_chan(struct lowpan_btle_dev *dev, struct l2cap_chan *chan)
{
@@ -195,7 +167,7 @@ static inline struct lowpan_peer *peer_lookup_dst(struct lowpan_btle_dev *dev,
rcu_read_lock();
list_for_each_entry_rcu(peer, &dev->peers, list) {
- BT_DBG("dst addr %pMR dst type %d ip %pI6c",
+ BT_DBG("dst addr %pMR dst type %u ip %pI6c",
&peer->chan->dst, peer->chan->dst_type,
&peer->peer_addr);
@@ -506,7 +478,7 @@ static int send_mcast_pkt(struct sk_buff *skb, struct net_device *netdev)
local_skb = skb_clone(skb, GFP_ATOMIC);
- BT_DBG("xmit %s to %pMR type %d IP %pI6c chan %p",
+ BT_DBG("xmit %s to %pMR type %u IP %pI6c chan %p",
netdev->name,
&pentry->chan->dst, pentry->chan->dst_type,
&pentry->peer_addr, pentry->chan);
@@ -549,7 +521,7 @@ static netdev_tx_t bt_xmit(struct sk_buff *skb, struct net_device *netdev)
if (err) {
if (lowpan_cb(skb)->chan) {
- BT_DBG("xmit %s to %pMR type %d IP %pI6c chan %p",
+ BT_DBG("xmit %s to %pMR type %u IP %pI6c chan %p",
netdev->name, &addr, addr_type,
&lowpan_cb(skb)->addr, lowpan_cb(skb)->chan);
err = send_pkt(lowpan_cb(skb)->chan, skb, netdev);
@@ -691,7 +663,7 @@ static struct l2cap_chan *add_peer_chan(struct l2cap_chan *chan,
static int setup_netdev(struct l2cap_chan *chan, struct lowpan_btle_dev **dev)
{
struct net_device *netdev;
- int err = 0;
+ int err;
netdev = alloc_netdev(LOWPAN_PRIV_SIZE(sizeof(struct lowpan_btle_dev)),
IFACE_NAME_TEMPLATE, NET_NAME_UNKNOWN,
@@ -818,7 +790,7 @@ static void chan_close_cb(struct l2cap_chan *chan)
BT_DBG("dev %p removing %speer %p", dev,
last ? "last " : "1 ", peer);
- BT_DBG("chan %p orig refcnt %d", chan,
+ BT_DBG("chan %p orig refcnt %u", chan,
kref_read(&chan->kref));
l2cap_chan_put(chan);
@@ -907,14 +879,6 @@ static const struct l2cap_ops bt_6lowpan_chan_ops = {
.set_shutdown = l2cap_chan_no_set_shutdown,
};
-static inline __u8 bdaddr_type(__u8 type)
-{
- if (type == ADDR_LE_DEV_PUBLIC)
- return BDADDR_LE_PUBLIC;
- else
- return BDADDR_LE_RANDOM;
-}
-
static int bt_6lowpan_connect(bdaddr_t *addr, u8 dst_type)
{
struct l2cap_chan *chan;
@@ -940,7 +904,7 @@ static int bt_6lowpan_disconnect(struct l2cap_conn *conn, u8 dst_type)
{
struct lowpan_peer *peer;
- BT_DBG("conn %p dst type %d", conn, dst_type);
+ BT_DBG("conn %p dst type %u", conn, dst_type);
peer = lookup_peer(conn);
if (!peer)
@@ -972,7 +936,7 @@ static struct l2cap_chan *bt_6lowpan_listen(void)
atomic_set(&chan->nesting, L2CAP_NESTING_PARENT);
- BT_DBG("chan %p src type %d", chan, chan->src_type);
+ BT_DBG("chan %p src type %u", chan, chan->src_type);
err = l2cap_add_psm(chan, addr, cpu_to_le16(L2CAP_PSM_IPSP));
if (err) {
@@ -1013,7 +977,7 @@ static int get_l2cap_conn(char *buf, bdaddr_t *addr, u8 *addr_type,
*conn = (struct l2cap_conn *)hcon->l2cap_data;
- BT_DBG("conn %p dst %pMR type %d", *conn, &hcon->dst, hcon->dst_type);
+ BT_DBG("conn %p dst %pMR type %u", *conn, &hcon->dst, hcon->dst_type);
return 0;
}
@@ -1155,7 +1119,7 @@ static ssize_t lowpan_control_write(struct file *fp,
return -EALREADY;
}
- BT_DBG("conn %p dst %pMR type %d user %d", conn,
+ BT_DBG("conn %p dst %pMR type %d user %u", conn,
&conn->hcon->dst, conn->hcon->dst_type,
addr_type);
}
diff --git a/net/bluetooth/a2mp.c b/net/bluetooth/a2mp.c
index 463bad58478b..1fcc482397c3 100644
--- a/net/bluetooth/a2mp.c
+++ b/net/bluetooth/a2mp.c
@@ -120,7 +120,7 @@ static int a2mp_command_rej(struct amp_mgr *mgr, struct sk_buff *skb,
if (le16_to_cpu(hdr->len) < sizeof(*rej))
return -EINVAL;
- BT_DBG("ident %d reason %d", hdr->ident, le16_to_cpu(rej->reason));
+ BT_DBG("ident %u reason %d", hdr->ident, le16_to_cpu(rej->reason));
skb_pull(skb, sizeof(*rej));
@@ -219,7 +219,7 @@ static int a2mp_discover_rsp(struct amp_mgr *mgr, struct sk_buff *skb,
cl = (void *) skb->data;
while (len >= sizeof(*cl)) {
- BT_DBG("Remote AMP id %d type %d status %d", cl->id, cl->type,
+ BT_DBG("Remote AMP id %u type %u status %u", cl->id, cl->type,
cl->status);
if (cl->id != AMP_ID_BREDR && cl->type != AMP_TYPE_BREDR) {
@@ -273,7 +273,7 @@ static int a2mp_change_notify(struct amp_mgr *mgr, struct sk_buff *skb,
struct a2mp_cl *cl = (void *) skb->data;
while (skb->len >= sizeof(*cl)) {
- BT_DBG("Controller id %d type %d status %d", cl->id, cl->type,
+ BT_DBG("Controller id %u type %u status %u", cl->id, cl->type,
cl->status);
cl = skb_pull(skb, sizeof(*cl));
}
@@ -302,7 +302,7 @@ static int a2mp_getinfo_req(struct amp_mgr *mgr, struct sk_buff *skb,
if (le16_to_cpu(hdr->len) < sizeof(*req))
return -EINVAL;
- BT_DBG("id %d", req->id);
+ BT_DBG("id %u", req->id);
hdev = hci_dev_get(req->id);
if (!hdev || hdev->dev_type != HCI_AMP) {
@@ -344,7 +344,7 @@ static int a2mp_getinfo_rsp(struct amp_mgr *mgr, struct sk_buff *skb,
if (le16_to_cpu(hdr->len) < sizeof(*rsp))
return -EINVAL;
- BT_DBG("id %d status 0x%2.2x", rsp->id, rsp->status);
+ BT_DBG("id %u status 0x%2.2x", rsp->id, rsp->status);
if (rsp->status)
return -EINVAL;
@@ -373,7 +373,7 @@ static int a2mp_getampassoc_req(struct amp_mgr *mgr, struct sk_buff *skb,
if (le16_to_cpu(hdr->len) < sizeof(*req))
return -EINVAL;
- BT_DBG("id %d", req->id);
+ BT_DBG("id %u", req->id);
/* Make sure that other request is not processed */
tmp = amp_mgr_lookup_by_state(READ_LOC_AMP_ASSOC);
@@ -423,7 +423,7 @@ static int a2mp_getampassoc_rsp(struct amp_mgr *mgr, struct sk_buff *skb,
assoc_len = len - sizeof(*rsp);
- BT_DBG("id %d status 0x%2.2x assoc len %zu", rsp->id, rsp->status,
+ BT_DBG("id %u status 0x%2.2x assoc len %zu", rsp->id, rsp->status,
assoc_len);
if (rsp->status)
@@ -457,7 +457,7 @@ static int a2mp_getampassoc_rsp(struct amp_mgr *mgr, struct sk_buff *skb,
if (!hcon)
goto done;
- BT_DBG("Created hcon %p: loc:%d -> rem:%d", hcon, hdev->id, rsp->id);
+ BT_DBG("Created hcon %p: loc:%u -> rem:%u", hcon, hdev->id, rsp->id);
mgr->bredr_chan->remote_amp_id = rsp->id;
@@ -481,7 +481,7 @@ static int a2mp_createphyslink_req(struct amp_mgr *mgr, struct sk_buff *skb,
if (le16_to_cpu(hdr->len) < sizeof(*req))
return -EINVAL;
- BT_DBG("local_id %d, remote_id %d", req->local_id, req->remote_id);
+ BT_DBG("local_id %u, remote_id %u", req->local_id, req->remote_id);
memset(&rsp, 0, sizeof(rsp));
@@ -562,7 +562,7 @@ static int a2mp_discphyslink_req(struct amp_mgr *mgr, struct sk_buff *skb,
if (le16_to_cpu(hdr->len) < sizeof(*req))
return -EINVAL;
- BT_DBG("local_id %d remote_id %d", req->local_id, req->remote_id);
+ BT_DBG("local_id %u remote_id %u", req->local_id, req->remote_id);
memset(&rsp, 0, sizeof(rsp));
@@ -599,7 +599,7 @@ send_rsp:
static inline int a2mp_cmd_rsp(struct amp_mgr *mgr, struct sk_buff *skb,
struct a2mp_cmd *hdr)
{
- BT_DBG("ident %d code 0x%2.2x", hdr->ident, hdr->code);
+ BT_DBG("ident %u code 0x%2.2x", hdr->ident, hdr->code);
skb_pull(skb, le16_to_cpu(hdr->len));
return 0;
@@ -620,7 +620,7 @@ static int a2mp_chan_recv_cb(struct l2cap_chan *chan, struct sk_buff *skb)
hdr = (void *) skb->data;
len = le16_to_cpu(hdr->len);
- BT_DBG("code 0x%2.2x id %d len %u", hdr->code, hdr->ident, len);
+ BT_DBG("code 0x%2.2x id %u len %u", hdr->code, hdr->ident, len);
skb_pull(skb, sizeof(*hdr));
diff --git a/net/bluetooth/amp.c b/net/bluetooth/amp.c
index be2d469d6369..2134f92bd7ac 100644
--- a/net/bluetooth/amp.c
+++ b/net/bluetooth/amp.c
@@ -78,7 +78,7 @@ struct amp_ctrl *amp_ctrl_lookup(struct amp_mgr *mgr, u8 id)
{
struct amp_ctrl *ctrl;
- BT_DBG("mgr %p id %d", mgr, id);
+ BT_DBG("mgr %p id %u", mgr, id);
mutex_lock(&mgr->amp_ctrls_lock);
list_for_each_entry(ctrl, &mgr->amp_ctrls, list) {
@@ -179,7 +179,7 @@ int phylink_gen_key(struct hci_conn *conn, u8 *data, u8 *len, u8 *type)
/* Legacy key */
if (conn->key_type < 3) {
- bt_dev_err(hdev, "legacy key type %d", conn->key_type);
+ bt_dev_err(hdev, "legacy key type %u", conn->key_type);
return -EACCES;
}
@@ -257,7 +257,7 @@ void amp_read_loc_assoc_frag(struct hci_dev *hdev, u8 phy_handle)
struct hci_request req;
int err;
- BT_DBG("%s handle %d", hdev->name, phy_handle);
+ BT_DBG("%s handle %u", hdev->name, phy_handle);
cp.phy_handle = phy_handle;
cp.max_len = cpu_to_le16(hdev->amp_assoc_size);
diff --git a/net/bluetooth/bnep/core.c b/net/bluetooth/bnep/core.c
index 43c284158f63..72f47b372705 100644
--- a/net/bluetooth/bnep/core.c
+++ b/net/bluetooth/bnep/core.c
@@ -126,8 +126,8 @@ static int bnep_ctrl_set_netfilter(struct bnep_session *s, __be16 *data, int len
f[i].start = get_unaligned_be16(data++);
f[i].end = get_unaligned_be16(data++);
- BT_DBG("proto filter start %d end %d",
- f[i].start, f[i].end);
+ BT_DBG("proto filter start %u end %u",
+ f[i].start, f[i].end);
}
if (i < BNEP_MAX_PROTO_FILTERS)
@@ -266,7 +266,7 @@ static int bnep_rx_extension(struct bnep_session *s, struct sk_buff *skb)
break;
}
- BT_DBG("type 0x%x len %d", h->type, h->len);
+ BT_DBG("type 0x%x len %u", h->type, h->len);
switch (h->type & BNEP_TYPE_MASK) {
case BNEP_EXT_CONTROL:
@@ -424,7 +424,7 @@ static int bnep_tx_frame(struct bnep_session *s, struct sk_buff *skb)
int len = 0, il = 0;
u8 type = 0;
- BT_DBG("skb %p dev %p type %d", skb, skb->dev, skb->pkt_type);
+ BT_DBG("skb %p dev %p type %u", skb, skb->dev, skb->pkt_type);
if (!skb->dev) {
/* Control frame sent by us */
diff --git a/net/bluetooth/cmtp/capi.c b/net/bluetooth/cmtp/capi.c
index eb41556002e3..f3bedc3b613a 100644
--- a/net/bluetooth/cmtp/capi.c
+++ b/net/bluetooth/cmtp/capi.c
@@ -74,7 +74,7 @@ static struct cmtp_application *cmtp_application_add(struct cmtp_session *sessio
{
struct cmtp_application *app = kzalloc(sizeof(*app), GFP_KERNEL);
- BT_DBG("session %p application %p appl %d", session, app, appl);
+ BT_DBG("session %p application %p appl %u", session, app, appl);
if (!app)
return NULL;
@@ -135,7 +135,7 @@ static void cmtp_send_capimsg(struct cmtp_session *session, struct sk_buff *skb)
{
struct cmtp_scb *scb = (void *) skb->cb;
- BT_DBG("session %p skb %p len %d", session, skb, skb->len);
+ BT_DBG("session %p skb %p len %u", session, skb, skb->len);
scb->id = -1;
scb->data = (CAPIMSG_COMMAND(skb->data) == CAPI_DATA_B3);
@@ -152,7 +152,7 @@ static void cmtp_send_interopmsg(struct cmtp_session *session,
struct sk_buff *skb;
unsigned char *s;
- BT_DBG("session %p subcmd 0x%02x appl %d msgnum %d", session, subcmd, appl, msgnum);
+ BT_DBG("session %p subcmd 0x%02x appl %u msgnum %u", session, subcmd, appl, msgnum);
skb = alloc_skb(CAPI_MSG_BASELEN + 6 + len, GFP_ATOMIC);
if (!skb) {
@@ -188,7 +188,7 @@ static void cmtp_recv_interopmsg(struct cmtp_session *session, struct sk_buff *s
__u16 appl, msgnum, func, info;
__u32 controller;
- BT_DBG("session %p skb %p len %d", session, skb, skb->len);
+ BT_DBG("session %p skb %p len %u", session, skb, skb->len);
switch (CAPIMSG_SUBCOMMAND(skb->data)) {
case CAPI_CONF:
@@ -321,7 +321,7 @@ void cmtp_recv_capimsg(struct cmtp_session *session, struct sk_buff *skb)
__u16 appl;
__u32 contr;
- BT_DBG("session %p skb %p len %d", session, skb, skb->len);
+ BT_DBG("session %p skb %p len %u", session, skb, skb->len);
if (skb->len < CAPI_MSG_BASELEN)
return;
@@ -344,7 +344,7 @@ void cmtp_recv_capimsg(struct cmtp_session *session, struct sk_buff *skb)
appl = application->appl;
CAPIMSG_SETAPPID(skb->data, appl);
} else {
- BT_ERR("Can't find application with id %d", appl);
+ BT_ERR("Can't find application with id %u", appl);
kfree_skb(skb);
return;
}
@@ -385,8 +385,8 @@ static void cmtp_register_appl(struct capi_ctr *ctrl, __u16 appl, capi_register_
unsigned char buf[8];
int err = 0, nconn, want = rp->level3cnt;
- BT_DBG("ctrl %p appl %d level3cnt %d datablkcnt %d datablklen %d",
- ctrl, appl, rp->level3cnt, rp->datablkcnt, rp->datablklen);
+ BT_DBG("ctrl %p appl %u level3cnt %u datablkcnt %u datablklen %u",
+ ctrl, appl, rp->level3cnt, rp->datablkcnt, rp->datablklen);
application = cmtp_application_add(session, appl);
if (!application) {
@@ -450,7 +450,7 @@ static void cmtp_release_appl(struct capi_ctr *ctrl, __u16 appl)
struct cmtp_session *session = ctrl->driverdata;
struct cmtp_application *application;
- BT_DBG("ctrl %p appl %d", ctrl, appl);
+ BT_DBG("ctrl %p appl %u", ctrl, appl);
application = cmtp_application_get(session, CMTP_APPLID, appl);
if (!application) {
@@ -483,7 +483,7 @@ static u16 cmtp_send_message(struct capi_ctr *ctrl, struct sk_buff *skb)
application = cmtp_application_get(session, CMTP_APPLID, appl);
if ((!application) || (application->state != BT_CONNECTED)) {
- BT_ERR("Can't find application with id %d", appl);
+ BT_ERR("Can't find application with id %u", appl);
return CAPI_ILLAPPNR;
}
@@ -515,7 +515,7 @@ static int cmtp_proc_show(struct seq_file *m, void *v)
seq_printf(m, "ctrl %d\n", session->num);
list_for_each_entry(app, &session->applications, list) {
- seq_printf(m, "appl %d -> %d\n", app->appl, app->mapping);
+ seq_printf(m, "appl %u -> %u\n", app->appl, app->mapping);
}
return 0;
diff --git a/net/bluetooth/cmtp/cmtp.h b/net/bluetooth/cmtp/cmtp.h
index c32638dddbf9..f6b9dc4e408f 100644
--- a/net/bluetooth/cmtp/cmtp.h
+++ b/net/bluetooth/cmtp/cmtp.h
@@ -26,7 +26,7 @@
#include <linux/types.h>
#include <net/bluetooth/bluetooth.h>
-#define BTNAMSIZ 18
+#define BTNAMSIZ 21
/* CMTP ioctl defines */
#define CMTPCONNADD _IOW('C', 200, int)
diff --git a/net/bluetooth/cmtp/core.c b/net/bluetooth/cmtp/core.c
index 07cfa3249f83..0a2d78e811cf 100644
--- a/net/bluetooth/cmtp/core.c
+++ b/net/bluetooth/cmtp/core.c
@@ -392,6 +392,11 @@ int cmtp_add_connection(struct cmtp_connadd_req *req, struct socket *sock)
if (!(session->flags & BIT(CMTP_LOOPBACK))) {
err = cmtp_attach_device(session);
if (err < 0) {
+ /* Caller will call fput in case of failure, and so
+ * will cmtp_session kthread.
+ */
+ get_file(session->sock->file);
+
atomic_inc(&session->terminate);
wake_up_interruptible(sk_sleep(session->sock->sk));
up_write(&cmtp_session_sem);
diff --git a/net/bluetooth/hci_conn.c b/net/bluetooth/hci_conn.c
index 88ec08978ff4..2b5059a56cda 100644
--- a/net/bluetooth/hci_conn.c
+++ b/net/bluetooth/hci_conn.c
@@ -257,7 +257,7 @@ int hci_disconnect(struct hci_conn *conn, __u8 reason)
{
BT_DBG("hcon %p", conn);
- /* When we are master of an established connection and it enters
+ /* When we are central of an established connection and it enters
* the disconnect timeout, then go ahead and try to read the
* current clock offset. Processing of the result is done
* within the event handling and hci_clock_offset_evt function.
@@ -758,7 +758,7 @@ void hci_le_conn_failed(struct hci_conn *conn, u8 status)
conn->state = BT_CLOSED;
/* If the status indicates successful cancellation of
- * the attempt (i.e. Unkown Connection Id) there's no point of
+ * the attempt (i.e. Unknown Connection Id) there's no point of
* notifying failure since we'll go back to keep trying to
* connect. The only exception is explicit connect requests
* where a timeout + cancel does indicate an actual failure.
@@ -1109,9 +1109,9 @@ struct hci_conn *hci_connect_le(struct hci_dev *hdev, bdaddr_t *dst,
hci_req_init(&req, hdev);
- /* Disable advertising if we're active. For master role
+ /* Disable advertising if we're active. For central role
* connections most controllers will refuse to connect if
- * advertising is enabled, and for slave role connections we
+ * advertising is enabled, and for peripheral role connections we
* anyway have to disable it in order to start directed
* advertising. Any registered advertisements will be
* re-enabled after the connection attempt is finished.
@@ -1119,7 +1119,7 @@ struct hci_conn *hci_connect_le(struct hci_dev *hdev, bdaddr_t *dst,
if (hci_dev_test_flag(hdev, HCI_LE_ADV))
__hci_req_pause_adv_instances(&req);
- /* If requested to connect as slave use directed advertising */
+ /* If requested to connect as peripheral use directed advertising */
if (conn->role == HCI_ROLE_SLAVE) {
/* If we're active scanning most controllers are unable
* to initiate advertising. Simply reject the attempt.
@@ -1842,7 +1842,7 @@ u32 hci_conn_get_phy(struct hci_conn *conn)
/* BLUETOOTH CORE SPECIFICATION Version 5.2 | Vol 2, Part B page 471:
* Table 6.2: Packets defined for synchronous, asynchronous, and
- * CSB logical transport types.
+ * CPB logical transport types.
*/
switch (conn->type) {
case SCO_LINK:
diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c
index 7d71d104fdfd..8a47a3017d61 100644
--- a/net/bluetooth/hci_core.c
+++ b/net/bluetooth/hci_core.c
@@ -545,24 +545,24 @@ static void hci_set_event_mask_page_2(struct hci_request *req)
u8 events[8] = { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 };
bool changed = false;
- /* If Connectionless Slave Broadcast master role is supported
+ /* If Connectionless Peripheral Broadcast central role is supported
* enable all necessary events for it.
*/
- if (lmp_csb_master_capable(hdev)) {
+ if (lmp_cpb_central_capable(hdev)) {
events[1] |= 0x40; /* Triggered Clock Capture */
events[1] |= 0x80; /* Synchronization Train Complete */
- events[2] |= 0x10; /* Slave Page Response Timeout */
- events[2] |= 0x20; /* CSB Channel Map Change */
+ events[2] |= 0x10; /* Peripheral Page Response Timeout */
+ events[2] |= 0x20; /* CPB Channel Map Change */
changed = true;
}
- /* If Connectionless Slave Broadcast slave role is supported
+ /* If Connectionless Peripheral Broadcast peripheral role is supported
* enable all necessary events for it.
*/
- if (lmp_csb_slave_capable(hdev)) {
+ if (lmp_cpb_peripheral_capable(hdev)) {
events[2] |= 0x01; /* Synchronization Train Received */
- events[2] |= 0x02; /* CSB Receive */
- events[2] |= 0x04; /* CSB Timeout */
+ events[2] |= 0x02; /* CPB Receive */
+ events[2] |= 0x04; /* CPB Timeout */
events[2] |= 0x08; /* Truncated Page Complete */
changed = true;
}
@@ -648,7 +648,7 @@ static int hci_init3_req(struct hci_request *req, unsigned long opt)
*/
/* If the controller supports Extended Scanner Filter
- * Policies, enable the correspondig event.
+ * Policies, enable the corresponding event.
*/
if (hdev->le_features[0] & HCI_LE_EXT_SCAN_POLICY)
events[1] |= 0x04; /* LE Direct Advertising
@@ -749,14 +749,14 @@ static int hci_init3_req(struct hci_request *req, unsigned long opt)
}
if (hdev->commands[26] & 0x40) {
- /* Read LE White List Size */
- hci_req_add(req, HCI_OP_LE_READ_WHITE_LIST_SIZE,
+ /* Read LE Accept List Size */
+ hci_req_add(req, HCI_OP_LE_READ_ACCEPT_LIST_SIZE,
0, NULL);
}
if (hdev->commands[26] & 0x80) {
- /* Clear LE White List */
- hci_req_add(req, HCI_OP_LE_CLEAR_WHITE_LIST, 0, NULL);
+ /* Clear LE Accept List */
+ hci_req_add(req, HCI_OP_LE_CLEAR_ACCEPT_LIST, 0, NULL);
}
if (hdev->commands[34] & 0x40) {
@@ -1343,6 +1343,12 @@ int hci_inquiry(void __user *arg)
goto done;
}
+ /* Restrict maximum inquiry length to 60 seconds */
+ if (ir.length > 60) {
+ err = -EINVAL;
+ goto done;
+ }
+
hci_dev_lock(hdev);
if (inquiry_cache_age(hdev) > INQUIRY_CACHE_AGE_MAX ||
inquiry_cache_empty(hdev) || ir.flags & IREQ_CACHE_FLUSH) {
@@ -1454,7 +1460,7 @@ static int hci_dev_do_open(struct hci_dev *hdev)
}
/* Check for valid public address or a configured static
- * random adddress, but let the HCI setup proceed to
+ * random address, but let the HCI setup proceed to
* be able to determine if there is a public address
* or not.
*
@@ -1718,26 +1724,28 @@ static void hci_pend_le_actions_clear(struct hci_dev *hdev)
int hci_dev_do_close(struct hci_dev *hdev)
{
bool auto_off;
+ int err = 0;
BT_DBG("%s %p", hdev->name, hdev);
+ cancel_delayed_work(&hdev->power_off);
+ cancel_delayed_work(&hdev->ncmd_timer);
+
+ hci_request_cancel_all(hdev);
+ hci_req_sync_lock(hdev);
+
if (!hci_dev_test_flag(hdev, HCI_UNREGISTER) &&
!hci_dev_test_flag(hdev, HCI_USER_CHANNEL) &&
test_bit(HCI_UP, &hdev->flags)) {
/* Execute vendor specific shutdown routine */
if (hdev->shutdown)
- hdev->shutdown(hdev);
+ err = hdev->shutdown(hdev);
}
- cancel_delayed_work(&hdev->power_off);
-
- hci_request_cancel_all(hdev);
- hci_req_sync_lock(hdev);
-
if (!test_and_clear_bit(HCI_UP, &hdev->flags)) {
cancel_delayed_work_sync(&hdev->cmd_timer);
hci_req_sync_unlock(hdev);
- return 0;
+ return err;
}
hci_leds_update_powered(hdev, false);
@@ -1844,7 +1852,7 @@ int hci_dev_do_close(struct hci_dev *hdev)
hci_req_sync_unlock(hdev);
hci_dev_put(hdev);
- return 0;
+ return err;
}
int hci_dev_close(__u16 dev)
@@ -2777,6 +2785,24 @@ static void hci_cmd_timeout(struct work_struct *work)
queue_work(hdev->workqueue, &hdev->cmd_work);
}
+/* HCI ncmd timer function */
+static void hci_ncmd_timeout(struct work_struct *work)
+{
+ struct hci_dev *hdev = container_of(work, struct hci_dev,
+ ncmd_timer.work);
+
+ bt_dev_err(hdev, "Controller not accepting commands anymore: ncmd = 0");
+
+ /* During HCI_INIT phase no events can be injected if the ncmd timer
+ * triggers since the procedure has its own timeout handling.
+ */
+ if (test_bit(HCI_INIT, &hdev->flags))
+ return;
+
+ /* This is an irrecoverable state, inject hardware error event */
+ hci_reset_dev(hdev);
+}
+
struct oob_data *hci_find_remote_oob_data(struct hci_dev *hdev,
bdaddr_t *bdaddr, u8 bdaddr_type)
{
@@ -3549,7 +3575,7 @@ void hci_conn_params_clear_disabled(struct hci_dev *hdev)
if (params->auto_connect != HCI_AUTO_CONN_DISABLED)
continue;
- /* If trying to estabilish one time connection to disabled
+ /* If trying to establish one time connection to disabled
* device, leave the params, but mark them as just once.
*/
if (params->explicit_connect) {
@@ -3694,13 +3720,13 @@ static int hci_suspend_notifier(struct notifier_block *nb, unsigned long action,
/* Suspend consists of two actions:
* - First, disconnect everything and make the controller not
* connectable (disabling scanning)
- * - Second, program event filter/whitelist and enable scan
+ * - Second, program event filter/accept list and enable scan
*/
ret = hci_change_suspend_state(hdev, BT_SUSPEND_DISCONNECT);
if (!ret)
state = BT_SUSPEND_DISCONNECT;
- /* Only configure whitelist if disconnect succeeded and wake
+ /* Only configure accept list if disconnect succeeded and wake
* isn't being prevented.
*/
if (!ret && !(hdev->prevent_wake && hdev->prevent_wake(hdev))) {
@@ -3732,11 +3758,18 @@ done:
}
/* Alloc HCI device */
-struct hci_dev *hci_alloc_dev(void)
+struct hci_dev *hci_alloc_dev_priv(int sizeof_priv)
{
struct hci_dev *hdev;
+ unsigned int alloc_size;
- hdev = kzalloc(sizeof(*hdev), GFP_KERNEL);
+ alloc_size = sizeof(*hdev);
+ if (sizeof_priv) {
+ /* Fixme: May need ALIGN-ment? */
+ alloc_size += sizeof_priv;
+ }
+
+ hdev = kzalloc(alloc_size, GFP_KERNEL);
if (!hdev)
return NULL;
@@ -3808,14 +3841,14 @@ struct hci_dev *hci_alloc_dev(void)
mutex_init(&hdev->req_lock);
INIT_LIST_HEAD(&hdev->mgmt_pending);
- INIT_LIST_HEAD(&hdev->blacklist);
- INIT_LIST_HEAD(&hdev->whitelist);
+ INIT_LIST_HEAD(&hdev->reject_list);
+ INIT_LIST_HEAD(&hdev->accept_list);
INIT_LIST_HEAD(&hdev->uuids);
INIT_LIST_HEAD(&hdev->link_keys);
INIT_LIST_HEAD(&hdev->long_term_keys);
INIT_LIST_HEAD(&hdev->identity_resolving_keys);
INIT_LIST_HEAD(&hdev->remote_oob_data);
- INIT_LIST_HEAD(&hdev->le_white_list);
+ INIT_LIST_HEAD(&hdev->le_accept_list);
INIT_LIST_HEAD(&hdev->le_resolv_list);
INIT_LIST_HEAD(&hdev->le_conn_params);
INIT_LIST_HEAD(&hdev->pend_le_conns);
@@ -3841,6 +3874,7 @@ struct hci_dev *hci_alloc_dev(void)
init_waitqueue_head(&hdev->suspend_wait_q);
INIT_DELAYED_WORK(&hdev->cmd_timer, hci_cmd_timeout);
+ INIT_DELAYED_WORK(&hdev->ncmd_timer, hci_ncmd_timeout);
hci_request_setup(hdev);
@@ -3849,7 +3883,7 @@ struct hci_dev *hci_alloc_dev(void)
return hdev;
}
-EXPORT_SYMBOL(hci_alloc_dev);
+EXPORT_SYMBOL(hci_alloc_dev_priv);
/* Free HCI device */
void hci_free_dev(struct hci_dev *hdev)
@@ -3976,14 +4010,10 @@ EXPORT_SYMBOL(hci_register_dev);
/* Unregister HCI device */
void hci_unregister_dev(struct hci_dev *hdev)
{
- int id;
-
BT_DBG("%p name %s bus %d", hdev, hdev->name, hdev->bus);
hci_dev_set_flag(hdev, HCI_UNREGISTER);
- id = hdev->id;
-
write_lock(&hci_dev_list_lock);
list_del(&hdev->list);
write_unlock(&hci_dev_list_lock);
@@ -4018,7 +4048,14 @@ void hci_unregister_dev(struct hci_dev *hdev)
}
device_del(&hdev->dev);
+ /* Actual cleanup is deferred until hci_release_dev(). */
+ hci_dev_put(hdev);
+}
+EXPORT_SYMBOL(hci_unregister_dev);
+/* Release HCI device */
+void hci_release_dev(struct hci_dev *hdev)
+{
debugfs_remove_recursive(hdev->debugfs);
kfree_const(hdev->hw_info);
kfree_const(hdev->fw_info);
@@ -4027,8 +4064,8 @@ void hci_unregister_dev(struct hci_dev *hdev)
destroy_workqueue(hdev->req_workqueue);
hci_dev_lock(hdev);
- hci_bdaddr_list_clear(&hdev->blacklist);
- hci_bdaddr_list_clear(&hdev->whitelist);
+ hci_bdaddr_list_clear(&hdev->reject_list);
+ hci_bdaddr_list_clear(&hdev->accept_list);
hci_uuids_clear(hdev);
hci_link_keys_clear(hdev);
hci_smp_ltks_clear(hdev);
@@ -4036,18 +4073,17 @@ void hci_unregister_dev(struct hci_dev *hdev)
hci_remote_oob_data_clear(hdev);
hci_adv_instances_clear(hdev);
hci_adv_monitors_clear(hdev);
- hci_bdaddr_list_clear(&hdev->le_white_list);
+ hci_bdaddr_list_clear(&hdev->le_accept_list);
hci_bdaddr_list_clear(&hdev->le_resolv_list);
hci_conn_params_clear_all(hdev);
hci_discovery_filter_clear(hdev);
hci_blocked_keys_clear(hdev);
hci_dev_unlock(hdev);
- hci_dev_put(hdev);
-
- ida_simple_remove(&hci_index_ida, id);
+ ida_simple_remove(&hci_index_ida, hdev->id);
+ kfree(hdev);
}
-EXPORT_SYMBOL(hci_unregister_dev);
+EXPORT_SYMBOL(hci_release_dev);
/* Suspend HCI device */
int hci_suspend_dev(struct hci_dev *hdev)
@@ -4078,6 +4114,8 @@ int hci_reset_dev(struct hci_dev *hdev)
hci_skb_pkt_type(skb) = HCI_EVENT_PKT;
skb_put_data(skb, hw_err, 3);
+ bt_dev_err(hdev, "Injecting HCI hardware error event");
+
/* Send Hardware Error to upper stack */
return hci_recv_frame(hdev, skb);
}
@@ -4284,7 +4322,7 @@ void *hci_sent_cmd_data(struct hci_dev *hdev, __u16 opcode)
return hdev->sent_cmd->data + HCI_COMMAND_HDR_SIZE;
}
-/* Send HCI command and wait for command commplete event */
+/* Send HCI command and wait for command complete event */
struct sk_buff *hci_cmd_sync(struct hci_dev *hdev, u16 opcode, u32 plen,
const void *param, u32 timeout)
{
diff --git a/net/bluetooth/hci_debugfs.c b/net/bluetooth/hci_debugfs.c
index 47f4f21fbc1a..841393389f7b 100644
--- a/net/bluetooth/hci_debugfs.c
+++ b/net/bluetooth/hci_debugfs.c
@@ -125,7 +125,7 @@ static int device_list_show(struct seq_file *f, void *ptr)
struct bdaddr_list *b;
hci_dev_lock(hdev);
- list_for_each_entry(b, &hdev->whitelist, list)
+ list_for_each_entry(b, &hdev->accept_list, list)
seq_printf(f, "%pMR (type %u)\n", &b->bdaddr, b->bdaddr_type);
list_for_each_entry(p, &hdev->le_conn_params, list) {
seq_printf(f, "%pMR (type %u) %u\n", &p->addr, p->addr_type,
@@ -144,7 +144,7 @@ static int blacklist_show(struct seq_file *f, void *p)
struct bdaddr_list *b;
hci_dev_lock(hdev);
- list_for_each_entry(b, &hdev->blacklist, list)
+ list_for_each_entry(b, &hdev->reject_list, list)
seq_printf(f, "%pMR (type %u)\n", &b->bdaddr, b->bdaddr_type);
hci_dev_unlock(hdev);
@@ -784,7 +784,7 @@ static int white_list_show(struct seq_file *f, void *ptr)
struct bdaddr_list *b;
hci_dev_lock(hdev);
- list_for_each_entry(b, &hdev->le_white_list, list)
+ list_for_each_entry(b, &hdev->le_accept_list, list)
seq_printf(f, "%pMR (type %u)\n", &b->bdaddr, b->bdaddr_type);
hci_dev_unlock(hdev);
@@ -1195,7 +1195,7 @@ void hci_debugfs_create_le(struct hci_dev *hdev)
&force_static_address_fops);
debugfs_create_u8("white_list_size", 0444, hdev->debugfs,
- &hdev->le_white_list_size);
+ &hdev->le_accept_list_size);
debugfs_create_file("white_list", 0444, hdev->debugfs, hdev,
&white_list_fops);
debugfs_create_u8("resolv_list_size", 0444, hdev->debugfs,
diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c
index 016b2999f219..0bca035bf2dc 100644
--- a/net/bluetooth/hci_event.c
+++ b/net/bluetooth/hci_event.c
@@ -40,6 +40,8 @@
#define ZERO_KEY "\x00\x00\x00\x00\x00\x00\x00\x00" \
"\x00\x00\x00\x00\x00\x00\x00\x00"
+#define secs_to_jiffies(_secs) msecs_to_jiffies((_secs) * 1000)
+
/* Handle HCI Event packets */
static void hci_cc_inquiry_cancel(struct hci_dev *hdev, struct sk_buff *skb,
@@ -236,7 +238,7 @@ static void hci_cc_reset(struct hci_dev *hdev, struct sk_buff *skb)
hdev->ssp_debug_mode = 0;
- hci_bdaddr_list_clear(&hdev->le_white_list);
+ hci_bdaddr_list_clear(&hdev->le_accept_list);
hci_bdaddr_list_clear(&hdev->le_resolv_list);
}
@@ -1171,6 +1173,12 @@ static void hci_cc_le_set_random_addr(struct hci_dev *hdev, struct sk_buff *skb)
bacpy(&hdev->random_addr, sent);
+ if (!bacmp(&hdev->rpa, sent)) {
+ hci_dev_clear_flag(hdev, HCI_RPA_EXPIRED);
+ queue_delayed_work(hdev->workqueue, &hdev->rpa_expired,
+ secs_to_jiffies(hdev->rpa_timeout));
+ }
+
hci_dev_unlock(hdev);
}
@@ -1201,24 +1209,30 @@ static void hci_cc_le_set_adv_set_random_addr(struct hci_dev *hdev,
{
__u8 status = *((__u8 *) skb->data);
struct hci_cp_le_set_adv_set_rand_addr *cp;
- struct adv_info *adv_instance;
+ struct adv_info *adv;
if (status)
return;
cp = hci_sent_cmd_data(hdev, HCI_OP_LE_SET_ADV_SET_RAND_ADDR);
- if (!cp)
+ /* Update only in case the adv instance since handle 0x00 shall be using
+ * HCI_OP_LE_SET_RANDOM_ADDR since that allows both extended and
+ * non-extended adverting.
+ */
+ if (!cp || !cp->handle)
return;
hci_dev_lock(hdev);
- if (!cp->handle) {
- /* Store in hdev for instance 0 (Set adv and Directed advs) */
- bacpy(&hdev->random_addr, &cp->bdaddr);
- } else {
- adv_instance = hci_find_adv_instance(hdev, cp->handle);
- if (adv_instance)
- bacpy(&adv_instance->random_addr, &cp->bdaddr);
+ adv = hci_find_adv_instance(hdev, cp->handle);
+ if (adv) {
+ bacpy(&adv->random_addr, &cp->bdaddr);
+ if (!bacmp(&hdev->rpa, &cp->bdaddr)) {
+ adv->rpa_expired = false;
+ queue_delayed_work(hdev->workqueue,
+ &adv->rpa_expired_cb,
+ secs_to_jiffies(hdev->rpa_timeout));
+ }
}
hci_dev_unlock(hdev);
@@ -1277,7 +1291,9 @@ static void hci_cc_le_set_ext_adv_enable(struct hci_dev *hdev,
struct sk_buff *skb)
{
struct hci_cp_le_set_ext_adv_enable *cp;
+ struct hci_cp_ext_adv_set *set;
__u8 status = *((__u8 *) skb->data);
+ struct adv_info *adv = NULL, *n;
BT_DBG("%s status 0x%2.2x", hdev->name, status);
@@ -1288,22 +1304,48 @@ static void hci_cc_le_set_ext_adv_enable(struct hci_dev *hdev,
if (!cp)
return;
+ set = (void *)cp->data;
+
hci_dev_lock(hdev);
+ if (cp->num_of_sets)
+ adv = hci_find_adv_instance(hdev, set->handle);
+
if (cp->enable) {
struct hci_conn *conn;
hci_dev_set_flag(hdev, HCI_LE_ADV);
+ if (adv)
+ adv->enabled = true;
+
conn = hci_lookup_le_connect(hdev);
if (conn)
queue_delayed_work(hdev->workqueue,
&conn->le_conn_timeout,
conn->conn_timeout);
} else {
+ if (adv) {
+ adv->enabled = false;
+ /* If just one instance was disabled check if there are
+ * any other instance enabled before clearing HCI_LE_ADV
+ */
+ list_for_each_entry_safe(adv, n, &hdev->adv_instances,
+ list) {
+ if (adv->enabled)
+ goto unlock;
+ }
+ } else {
+ /* All instances shall be considered disabled */
+ list_for_each_entry_safe(adv, n, &hdev->adv_instances,
+ list)
+ adv->enabled = false;
+ }
+
hci_dev_clear_flag(hdev, HCI_LE_ADV);
}
+unlock:
hci_dev_unlock(hdev);
}
@@ -1492,21 +1534,21 @@ static void hci_cc_le_read_num_adv_sets(struct hci_dev *hdev,
hdev->le_num_of_adv_sets = rp->num_of_sets;
}
-static void hci_cc_le_read_white_list_size(struct hci_dev *hdev,
- struct sk_buff *skb)
+static void hci_cc_le_read_accept_list_size(struct hci_dev *hdev,
+ struct sk_buff *skb)
{
- struct hci_rp_le_read_white_list_size *rp = (void *) skb->data;
+ struct hci_rp_le_read_accept_list_size *rp = (void *)skb->data;
BT_DBG("%s status 0x%2.2x size %u", hdev->name, rp->status, rp->size);
if (rp->status)
return;
- hdev->le_white_list_size = rp->size;
+ hdev->le_accept_list_size = rp->size;
}
-static void hci_cc_le_clear_white_list(struct hci_dev *hdev,
- struct sk_buff *skb)
+static void hci_cc_le_clear_accept_list(struct hci_dev *hdev,
+ struct sk_buff *skb)
{
__u8 status = *((__u8 *) skb->data);
@@ -1515,13 +1557,13 @@ static void hci_cc_le_clear_white_list(struct hci_dev *hdev,
if (status)
return;
- hci_bdaddr_list_clear(&hdev->le_white_list);
+ hci_bdaddr_list_clear(&hdev->le_accept_list);
}
-static void hci_cc_le_add_to_white_list(struct hci_dev *hdev,
- struct sk_buff *skb)
+static void hci_cc_le_add_to_accept_list(struct hci_dev *hdev,
+ struct sk_buff *skb)
{
- struct hci_cp_le_add_to_white_list *sent;
+ struct hci_cp_le_add_to_accept_list *sent;
__u8 status = *((__u8 *) skb->data);
BT_DBG("%s status 0x%2.2x", hdev->name, status);
@@ -1529,18 +1571,18 @@ static void hci_cc_le_add_to_white_list(struct hci_dev *hdev,
if (status)
return;
- sent = hci_sent_cmd_data(hdev, HCI_OP_LE_ADD_TO_WHITE_LIST);
+ sent = hci_sent_cmd_data(hdev, HCI_OP_LE_ADD_TO_ACCEPT_LIST);
if (!sent)
return;
- hci_bdaddr_list_add(&hdev->le_white_list, &sent->bdaddr,
- sent->bdaddr_type);
+ hci_bdaddr_list_add(&hdev->le_accept_list, &sent->bdaddr,
+ sent->bdaddr_type);
}
-static void hci_cc_le_del_from_white_list(struct hci_dev *hdev,
- struct sk_buff *skb)
+static void hci_cc_le_del_from_accept_list(struct hci_dev *hdev,
+ struct sk_buff *skb)
{
- struct hci_cp_le_del_from_white_list *sent;
+ struct hci_cp_le_del_from_accept_list *sent;
__u8 status = *((__u8 *) skb->data);
BT_DBG("%s status 0x%2.2x", hdev->name, status);
@@ -1548,11 +1590,11 @@ static void hci_cc_le_del_from_white_list(struct hci_dev *hdev,
if (status)
return;
- sent = hci_sent_cmd_data(hdev, HCI_OP_LE_DEL_FROM_WHITE_LIST);
+ sent = hci_sent_cmd_data(hdev, HCI_OP_LE_DEL_FROM_ACCEPT_LIST);
if (!sent)
return;
- hci_bdaddr_list_del(&hdev->le_white_list, &sent->bdaddr,
+ hci_bdaddr_list_del(&hdev->le_accept_list, &sent->bdaddr,
sent->bdaddr_type);
}
@@ -2069,7 +2111,7 @@ static void hci_check_pending_name(struct hci_dev *hdev, struct hci_conn *conn,
if (conn &&
(conn->state == BT_CONFIG || conn->state == BT_CONNECTED) &&
!test_and_set_bit(HCI_CONN_MGMT_CONNECTED, &conn->flags))
- mgmt_device_connected(hdev, conn, 0, name, name_len);
+ mgmt_device_connected(hdev, conn, name, name_len);
if (discov->state == DISCOVERY_STOPPED)
return;
@@ -2306,19 +2348,20 @@ static void hci_cs_disconnect(struct hci_dev *hdev, u8 status)
conn = hci_conn_hash_lookup_handle(hdev, __le16_to_cpu(cp->handle));
if (conn) {
- u8 type = conn->type;
-
mgmt_disconnect_failed(hdev, &conn->dst, conn->type,
conn->dst_type, status);
+ if (conn->type == LE_LINK) {
+ hdev->cur_adv_instance = conn->adv_instance;
+ hci_req_reenable_advertising(hdev);
+ }
+
/* If the disconnection failed for any reason, the upper layer
* does not retry to disconnect in current implementation.
* Hence, we need to do some basic cleanup here and re-enable
* advertising if necessary.
*/
hci_conn_del(conn);
- if (type == LE_LINK)
- hci_req_reenable_advertising(hdev);
}
hci_dev_unlock(hdev);
@@ -2367,7 +2410,7 @@ static void cs_le_create_conn(struct hci_dev *hdev, bdaddr_t *peer_addr,
/* We don't want the connection attempt to stick around
* indefinitely since LE doesn't have a page timeout concept
* like BR/EDR. Set a timer for any connection that doesn't use
- * the white list for connecting.
+ * the accept list for connecting.
*/
if (filter_policy == HCI_LE_USE_PEER_ADDR)
queue_delayed_work(conn->hdev->workqueue,
@@ -2623,7 +2666,7 @@ static void hci_conn_complete_evt(struct hci_dev *hdev, struct sk_buff *skb)
* only used during suspend.
*/
if (ev->link_type == ACL_LINK &&
- hci_bdaddr_list_lookup_with_flags(&hdev->whitelist,
+ hci_bdaddr_list_lookup_with_flags(&hdev->accept_list,
&ev->bdaddr,
BDADDR_BREDR)) {
conn = hci_conn_add(hdev, ev->link_type, &ev->bdaddr,
@@ -2745,19 +2788,19 @@ static void hci_conn_request_evt(struct hci_dev *hdev, struct sk_buff *skb)
return;
}
- if (hci_bdaddr_list_lookup(&hdev->blacklist, &ev->bdaddr,
+ if (hci_bdaddr_list_lookup(&hdev->reject_list, &ev->bdaddr,
BDADDR_BREDR)) {
hci_reject_conn(hdev, &ev->bdaddr);
return;
}
- /* Require HCI_CONNECTABLE or a whitelist entry to accept the
+ /* Require HCI_CONNECTABLE or an accept list entry to accept the
* connection. These features are only touched through mgmt so
* only do the checks if HCI_MGMT is set.
*/
if (hci_dev_test_flag(hdev, HCI_MGMT) &&
!hci_dev_test_flag(hdev, HCI_CONNECTABLE) &&
- !hci_bdaddr_list_lookup_with_flags(&hdev->whitelist, &ev->bdaddr,
+ !hci_bdaddr_list_lookup_with_flags(&hdev->accept_list, &ev->bdaddr,
BDADDR_BREDR)) {
hci_reject_conn(hdev, &ev->bdaddr);
return;
@@ -2795,9 +2838,9 @@ static void hci_conn_request_evt(struct hci_dev *hdev, struct sk_buff *skb)
bacpy(&cp.bdaddr, &ev->bdaddr);
if (lmp_rswitch_capable(hdev) && (mask & HCI_LM_MASTER))
- cp.role = 0x00; /* Become master */
+ cp.role = 0x00; /* Become central */
else
- cp.role = 0x01; /* Remain slave */
+ cp.role = 0x01; /* Remain peripheral */
hci_send_cmd(hdev, HCI_OP_ACCEPT_CONN_REQ, sizeof(cp), &cp);
} else if (!(flags & HCI_PROTO_DEFER)) {
@@ -2844,7 +2887,6 @@ static void hci_disconn_complete_evt(struct hci_dev *hdev, struct sk_buff *skb)
struct hci_conn_params *params;
struct hci_conn *conn;
bool mgmt_connected;
- u8 type;
BT_DBG("%s status 0x%2.2x", hdev->name, ev->status);
@@ -2899,10 +2941,7 @@ static void hci_disconn_complete_evt(struct hci_dev *hdev, struct sk_buff *skb)
}
}
- type = conn->type;
-
hci_disconn_cfm(conn, ev->reason);
- hci_conn_del(conn);
/* The suspend notifier is waiting for all devices to disconnect so
* clear the bit from pending tasks and inform the wait queue.
@@ -2922,8 +2961,12 @@ static void hci_disconn_complete_evt(struct hci_dev *hdev, struct sk_buff *skb)
* or until a connection is created or until the Advertising
* is timed out due to Directed Advertising."
*/
- if (type == LE_LINK)
+ if (conn->type == LE_LINK) {
+ hdev->cur_adv_instance = conn->adv_instance;
hci_req_reenable_advertising(hdev);
+ }
+
+ hci_conn_del(conn);
unlock:
hci_dev_unlock(hdev);
@@ -3256,7 +3299,7 @@ static void hci_remote_features_evt(struct hci_dev *hdev,
cp.pscan_rep_mode = 0x02;
hci_send_cmd(hdev, HCI_OP_REMOTE_NAME_REQ, sizeof(cp), &cp);
} else if (!test_and_set_bit(HCI_CONN_MGMT_CONNECTED, &conn->flags))
- mgmt_device_connected(hdev, conn, 0, NULL, 0);
+ mgmt_device_connected(hdev, conn, NULL, 0);
if (!hci_outgoing_auth_needed(hdev, conn)) {
conn->state = BT_CONNECTED;
@@ -3268,6 +3311,21 @@ unlock:
hci_dev_unlock(hdev);
}
+static inline void handle_cmd_cnt_and_timer(struct hci_dev *hdev, u8 ncmd)
+{
+ cancel_delayed_work(&hdev->cmd_timer);
+
+ if (!test_bit(HCI_RESET, &hdev->flags)) {
+ if (ncmd) {
+ cancel_delayed_work(&hdev->ncmd_timer);
+ atomic_set(&hdev->cmd_cnt, 1);
+ } else {
+ schedule_delayed_work(&hdev->ncmd_timer,
+ HCI_NCMD_TIMEOUT);
+ }
+ }
+}
+
static void hci_cmd_complete_evt(struct hci_dev *hdev, struct sk_buff *skb,
u16 *opcode, u8 *status,
hci_req_complete_t *req_complete,
@@ -3521,20 +3579,20 @@ static void hci_cmd_complete_evt(struct hci_dev *hdev, struct sk_buff *skb,
hci_cc_le_set_scan_enable(hdev, skb);
break;
- case HCI_OP_LE_READ_WHITE_LIST_SIZE:
- hci_cc_le_read_white_list_size(hdev, skb);
+ case HCI_OP_LE_READ_ACCEPT_LIST_SIZE:
+ hci_cc_le_read_accept_list_size(hdev, skb);
break;
- case HCI_OP_LE_CLEAR_WHITE_LIST:
- hci_cc_le_clear_white_list(hdev, skb);
+ case HCI_OP_LE_CLEAR_ACCEPT_LIST:
+ hci_cc_le_clear_accept_list(hdev, skb);
break;
- case HCI_OP_LE_ADD_TO_WHITE_LIST:
- hci_cc_le_add_to_white_list(hdev, skb);
+ case HCI_OP_LE_ADD_TO_ACCEPT_LIST:
+ hci_cc_le_add_to_accept_list(hdev, skb);
break;
- case HCI_OP_LE_DEL_FROM_WHITE_LIST:
- hci_cc_le_del_from_white_list(hdev, skb);
+ case HCI_OP_LE_DEL_FROM_ACCEPT_LIST:
+ hci_cc_le_del_from_accept_list(hdev, skb);
break;
case HCI_OP_LE_READ_SUPPORTED_STATES:
@@ -3630,11 +3688,7 @@ static void hci_cmd_complete_evt(struct hci_dev *hdev, struct sk_buff *skb,
break;
}
- if (*opcode != HCI_OP_NOP)
- cancel_delayed_work(&hdev->cmd_timer);
-
- if (ev->ncmd && !test_bit(HCI_RESET, &hdev->flags))
- atomic_set(&hdev->cmd_cnt, 1);
+ handle_cmd_cnt_and_timer(hdev, ev->ncmd);
hci_req_cmd_complete(hdev, *opcode, *status, req_complete,
req_complete_skb);
@@ -3735,11 +3789,7 @@ static void hci_cmd_status_evt(struct hci_dev *hdev, struct sk_buff *skb,
break;
}
- if (*opcode != HCI_OP_NOP)
- cancel_delayed_work(&hdev->cmd_timer);
-
- if (ev->ncmd && !test_bit(HCI_RESET, &hdev->flags))
- atomic_set(&hdev->cmd_cnt, 1);
+ handle_cmd_cnt_and_timer(hdev, ev->ncmd);
/* Indicate request completion if the command failed. Also, if
* we're not waiting for a special event and we get a success
@@ -4330,7 +4380,7 @@ static void hci_remote_ext_features_evt(struct hci_dev *hdev,
cp.pscan_rep_mode = 0x02;
hci_send_cmd(hdev, HCI_OP_REMOTE_NAME_REQ, sizeof(cp), &cp);
} else if (!test_and_set_bit(HCI_CONN_MGMT_CONNECTED, &conn->flags))
- mgmt_device_connected(hdev, conn, 0, NULL, 0);
+ mgmt_device_connected(hdev, conn, NULL, 0);
if (!hci_outgoing_auth_needed(hdev, conn)) {
conn->state = BT_CONNECTED;
@@ -4373,6 +4423,21 @@ static void hci_sync_conn_complete_evt(struct hci_dev *hdev,
switch (ev->status) {
case 0x00:
+ /* The synchronous connection complete event should only be
+ * sent once per new connection. Receiving a successful
+ * complete event when the connection status is already
+ * BT_CONNECTED means that the device is misbehaving and sent
+ * multiple complete event packets for the same new connection.
+ *
+ * Registering the device more than once can corrupt kernel
+ * memory, hence upon detecting this invalid event, we report
+ * an error and ignore the packet.
+ */
+ if (conn->state == BT_CONNECTED) {
+ bt_dev_err(hdev, "Ignoring connect complete event for existing connection");
+ goto unlock;
+ }
+
conn->handle = __le16_to_cpu(ev->handle);
conn->state = BT_CONNECTED;
conn->type = ev->link_type;
@@ -4404,12 +4469,12 @@ static void hci_sync_conn_complete_evt(struct hci_dev *hdev,
bt_dev_dbg(hdev, "SCO connected with air mode: %02x", ev->air_mode);
- switch (conn->setting & SCO_AIRMODE_MASK) {
- case SCO_AIRMODE_CVSD:
+ switch (ev->air_mode) {
+ case 0x02:
if (hdev->notify)
hdev->notify(hdev, HCI_NOTIFY_ENABLE_SCO_CVSD);
break;
- case SCO_AIRMODE_TRANSP:
+ case 0x03:
if (hdev->notify)
hdev->notify(hdev, HCI_NOTIFY_ENABLE_SCO_TRANSP);
break;
@@ -5095,9 +5160,64 @@ static void hci_disconn_phylink_complete_evt(struct hci_dev *hdev,
}
#endif
+static void le_conn_update_addr(struct hci_conn *conn, bdaddr_t *bdaddr,
+ u8 bdaddr_type, bdaddr_t *local_rpa)
+{
+ if (conn->out) {
+ conn->dst_type = bdaddr_type;
+ conn->resp_addr_type = bdaddr_type;
+ bacpy(&conn->resp_addr, bdaddr);
+
+ /* Check if the controller has set a Local RPA then it must be
+ * used instead or hdev->rpa.
+ */
+ if (local_rpa && bacmp(local_rpa, BDADDR_ANY)) {
+ conn->init_addr_type = ADDR_LE_DEV_RANDOM;
+ bacpy(&conn->init_addr, local_rpa);
+ } else if (hci_dev_test_flag(conn->hdev, HCI_PRIVACY)) {
+ conn->init_addr_type = ADDR_LE_DEV_RANDOM;
+ bacpy(&conn->init_addr, &conn->hdev->rpa);
+ } else {
+ hci_copy_identity_address(conn->hdev, &conn->init_addr,
+ &conn->init_addr_type);
+ }
+ } else {
+ conn->resp_addr_type = conn->hdev->adv_addr_type;
+ /* Check if the controller has set a Local RPA then it must be
+ * used instead or hdev->rpa.
+ */
+ if (local_rpa && bacmp(local_rpa, BDADDR_ANY)) {
+ conn->resp_addr_type = ADDR_LE_DEV_RANDOM;
+ bacpy(&conn->resp_addr, local_rpa);
+ } else if (conn->hdev->adv_addr_type == ADDR_LE_DEV_RANDOM) {
+ /* In case of ext adv, resp_addr will be updated in
+ * Adv Terminated event.
+ */
+ if (!ext_adv_capable(conn->hdev))
+ bacpy(&conn->resp_addr,
+ &conn->hdev->random_addr);
+ } else {
+ bacpy(&conn->resp_addr, &conn->hdev->bdaddr);
+ }
+
+ conn->init_addr_type = bdaddr_type;
+ bacpy(&conn->init_addr, bdaddr);
+
+ /* For incoming connections, set the default minimum
+ * and maximum connection interval. They will be used
+ * to check if the parameters are in range and if not
+ * trigger the connection update procedure.
+ */
+ conn->le_conn_min_interval = conn->hdev->le_conn_min_interval;
+ conn->le_conn_max_interval = conn->hdev->le_conn_max_interval;
+ }
+}
+
static void le_conn_complete_evt(struct hci_dev *hdev, u8 status,
- bdaddr_t *bdaddr, u8 bdaddr_type, u8 role, u16 handle,
- u16 interval, u16 latency, u16 supervision_timeout)
+ bdaddr_t *bdaddr, u8 bdaddr_type,
+ bdaddr_t *local_rpa, u8 role, u16 handle,
+ u16 interval, u16 latency,
+ u16 supervision_timeout)
{
struct hci_conn_params *params;
struct hci_conn *conn;
@@ -5122,8 +5242,8 @@ static void le_conn_complete_evt(struct hci_dev *hdev, u8 status,
conn->dst_type = bdaddr_type;
/* If we didn't have a hci_conn object previously
- * but we're in master role this must be something
- * initiated using a white list. Since white list based
+ * but we're in central role this must be something
+ * initiated using an accept list. Since accept list based
* connections are not "first class citizens" we don't
* have full tracking of them. Therefore, we go ahead
* with a "best effort" approach of determining the
@@ -5145,32 +5265,7 @@ static void le_conn_complete_evt(struct hci_dev *hdev, u8 status,
cancel_delayed_work(&conn->le_conn_timeout);
}
- if (!conn->out) {
- /* Set the responder (our side) address type based on
- * the advertising address type.
- */
- conn->resp_addr_type = hdev->adv_addr_type;
- if (hdev->adv_addr_type == ADDR_LE_DEV_RANDOM) {
- /* In case of ext adv, resp_addr will be updated in
- * Adv Terminated event.
- */
- if (!ext_adv_capable(hdev))
- bacpy(&conn->resp_addr, &hdev->random_addr);
- } else {
- bacpy(&conn->resp_addr, &hdev->bdaddr);
- }
-
- conn->init_addr_type = bdaddr_type;
- bacpy(&conn->init_addr, bdaddr);
-
- /* For incoming connections, set the default minimum
- * and maximum connection interval. They will be used
- * to check if the parameters are in range and if not
- * trigger the connection update procedure.
- */
- conn->le_conn_min_interval = hdev->le_conn_min_interval;
- conn->le_conn_max_interval = hdev->le_conn_max_interval;
- }
+ le_conn_update_addr(conn, bdaddr, bdaddr_type, local_rpa);
/* Lookup the identity address from the stored connection
* address and address type.
@@ -5187,6 +5282,23 @@ static void le_conn_complete_evt(struct hci_dev *hdev, u8 status,
conn->dst_type = irk->addr_type;
}
+ /* When using controller based address resolution, then the new
+ * address types 0x02 and 0x03 are used. These types need to be
+ * converted back into either public address or random address type
+ */
+ if (use_ll_privacy(hdev) &&
+ hci_dev_test_flag(hdev, HCI_ENABLE_LL_PRIVACY) &&
+ hci_dev_test_flag(hdev, HCI_LL_RPA_RESOLUTION)) {
+ switch (conn->dst_type) {
+ case ADDR_LE_DEV_PUBLIC_RESOLVED:
+ conn->dst_type = ADDR_LE_DEV_PUBLIC;
+ break;
+ case ADDR_LE_DEV_RANDOM_RESOLVED:
+ conn->dst_type = ADDR_LE_DEV_RANDOM;
+ break;
+ }
+ }
+
if (status) {
hci_le_conn_failed(conn, status);
goto unlock;
@@ -5198,18 +5310,25 @@ static void le_conn_complete_evt(struct hci_dev *hdev, u8 status,
addr_type = BDADDR_LE_RANDOM;
/* Drop the connection if the device is blocked */
- if (hci_bdaddr_list_lookup(&hdev->blacklist, &conn->dst, addr_type)) {
+ if (hci_bdaddr_list_lookup(&hdev->reject_list, &conn->dst, addr_type)) {
hci_conn_drop(conn);
goto unlock;
}
if (!test_and_set_bit(HCI_CONN_MGMT_CONNECTED, &conn->flags))
- mgmt_device_connected(hdev, conn, 0, NULL, 0);
+ mgmt_device_connected(hdev, conn, NULL, 0);
conn->sec_level = BT_SECURITY_LOW;
conn->handle = handle;
conn->state = BT_CONFIG;
+ /* Store current advertising instance as connection advertising instance
+ * when sotfware rotation is in use so it can be re-enabled when
+ * disconnected.
+ */
+ if (!ext_adv_capable(hdev))
+ conn->adv_instance = hdev->cur_adv_instance;
+
conn->le_conn_interval = interval;
conn->le_conn_latency = latency;
conn->le_supv_timeout = supervision_timeout;
@@ -5217,17 +5336,17 @@ static void le_conn_complete_evt(struct hci_dev *hdev, u8 status,
hci_debugfs_create_conn(conn);
hci_conn_add_sysfs(conn);
- /* The remote features procedure is defined for master
+ /* The remote features procedure is defined for central
* role only. So only in case of an initiated connection
* request the remote features.
*
- * If the local controller supports slave-initiated features
- * exchange, then requesting the remote features in slave
+ * If the local controller supports peripheral-initiated features
+ * exchange, then requesting the remote features in peripheral
* role is possible. Otherwise just transition into the
* connected state without requesting the remote features.
*/
if (conn->out ||
- (hdev->le_features[0] & HCI_LE_SLAVE_FEATURES)) {
+ (hdev->le_features[0] & HCI_LE_PERIPHERAL_FEATURES)) {
struct hci_cp_le_read_remote_features cp;
cp.handle = __cpu_to_le16(conn->handle);
@@ -5264,7 +5383,7 @@ static void hci_le_conn_complete_evt(struct hci_dev *hdev, struct sk_buff *skb)
BT_DBG("%s status 0x%2.2x", hdev->name, ev->status);
le_conn_complete_evt(hdev, ev->status, &ev->bdaddr, ev->bdaddr_type,
- ev->role, le16_to_cpu(ev->handle),
+ NULL, ev->role, le16_to_cpu(ev->handle),
le16_to_cpu(ev->interval),
le16_to_cpu(ev->latency),
le16_to_cpu(ev->supervision_timeout));
@@ -5278,7 +5397,7 @@ static void hci_le_enh_conn_complete_evt(struct hci_dev *hdev,
BT_DBG("%s status 0x%2.2x", hdev->name, ev->status);
le_conn_complete_evt(hdev, ev->status, &ev->bdaddr, ev->bdaddr_type,
- ev->role, le16_to_cpu(ev->handle),
+ &ev->local_rpa, ev->role, le16_to_cpu(ev->handle),
le16_to_cpu(ev->interval),
le16_to_cpu(ev->latency),
le16_to_cpu(ev->supervision_timeout));
@@ -5293,17 +5412,35 @@ static void hci_le_ext_adv_term_evt(struct hci_dev *hdev, struct sk_buff *skb)
{
struct hci_evt_le_ext_adv_set_term *ev = (void *) skb->data;
struct hci_conn *conn;
+ struct adv_info *adv;
BT_DBG("%s status 0x%2.2x", hdev->name, ev->status);
- if (ev->status)
+ adv = hci_find_adv_instance(hdev, ev->handle);
+
+ if (ev->status) {
+ if (!adv)
+ return;
+
+ /* Remove advertising as it has been terminated */
+ hci_remove_adv_instance(hdev, ev->handle);
+ mgmt_advertising_removed(NULL, hdev, ev->handle);
+
return;
+ }
+
+ if (adv)
+ adv->enabled = false;
conn = hci_conn_hash_lookup_handle(hdev, __le16_to_cpu(ev->conn_handle));
if (conn) {
- struct adv_info *adv_instance;
+ /* Store handle in the connection so the correct advertising
+ * instance can be re-enabled when disconnected.
+ */
+ conn->adv_instance = ev->handle;
- if (hdev->adv_addr_type != ADDR_LE_DEV_RANDOM)
+ if (hdev->adv_addr_type != ADDR_LE_DEV_RANDOM ||
+ bacmp(&conn->resp_addr, BDADDR_ANY))
return;
if (!ev->handle) {
@@ -5311,9 +5448,8 @@ static void hci_le_ext_adv_term_evt(struct hci_dev *hdev, struct sk_buff *skb)
return;
}
- adv_instance = hci_find_adv_instance(hdev, ev->handle);
- if (adv_instance)
- bacpy(&conn->resp_addr, &adv_instance->random_addr);
+ if (adv)
+ bacpy(&conn->resp_addr, &adv->random_addr);
}
}
@@ -5354,13 +5490,13 @@ static struct hci_conn *check_pending_le_conn(struct hci_dev *hdev,
return NULL;
/* Ignore if the device is blocked */
- if (hci_bdaddr_list_lookup(&hdev->blacklist, addr, addr_type))
+ if (hci_bdaddr_list_lookup(&hdev->reject_list, addr, addr_type))
return NULL;
/* Most controller will fail if we try to create new connections
- * while we have an existing one in slave role.
+ * while we have an existing one in peripheral role.
*/
- if (hdev->conn_hash.le_num_slave > 0 &&
+ if (hdev->conn_hash.le_num_peripheral > 0 &&
(!test_bit(HCI_QUIRK_VALID_LE_STATES, &hdev->quirks) ||
!(hdev->le_states[3] & 0x10)))
return NULL;
@@ -5378,7 +5514,7 @@ static struct hci_conn *check_pending_le_conn(struct hci_dev *hdev,
case HCI_AUTO_CONN_DIRECT:
/* Only devices advertising with ADV_DIRECT_IND are
* triggering a connection attempt. This is allowing
- * incoming connections from slave devices.
+ * incoming connections from peripheral devices.
*/
if (adv_type != LE_ADV_DIRECT_IND)
return NULL;
@@ -5386,8 +5522,8 @@ static struct hci_conn *check_pending_le_conn(struct hci_dev *hdev,
case HCI_AUTO_CONN_ALWAYS:
/* Devices advertising with ADV_IND or ADV_DIRECT_IND
* are triggering a connection attempt. This means
- * that incoming connections from slave device are
- * accepted and also outgoing connections to slave
+ * that incoming connections from peripheral device are
+ * accepted and also outgoing connections to peripheral
* devices are established when found.
*/
break;
@@ -5441,7 +5577,7 @@ static void process_adv_report(struct hci_dev *hdev, u8 type, bdaddr_t *bdaddr,
struct hci_conn *conn;
bool match;
u32 flags;
- u8 *ptr, real_len;
+ u8 *ptr;
switch (type) {
case LE_ADV_IND:
@@ -5472,14 +5608,10 @@ static void process_adv_report(struct hci_dev *hdev, u8 type, bdaddr_t *bdaddr,
break;
}
- real_len = ptr - data;
-
- /* Adjust for actual length */
- if (len != real_len) {
- bt_dev_err_ratelimited(hdev, "advertising data len corrected %u -> %u",
- len, real_len);
- len = real_len;
- }
+ /* Adjust for actual length. This handles the case when remote
+ * device is advertising with incorrect data length.
+ */
+ len = ptr - data;
/* If the direct address is present, then this report is from
* a LE Direct Advertising Report event. In that case it is
@@ -5752,7 +5884,7 @@ static void hci_le_remote_feat_complete_evt(struct hci_dev *hdev,
if (conn->state == BT_CONFIG) {
__u8 status;
- /* If the local controller supports slave-initiated
+ /* If the local controller supports peripheral-initiated
* features exchange, but the remote controller does
* not, then it is possible that the error code 0x1a
* for unsupported remote feature gets returned.
@@ -5761,8 +5893,8 @@ static void hci_le_remote_feat_complete_evt(struct hci_dev *hdev,
* transition into connected state and mark it as
* successful.
*/
- if ((hdev->le_features[0] & HCI_LE_SLAVE_FEATURES) &&
- !conn->out && ev->status == 0x1a)
+ if (!conn->out && ev->status == 0x1a &&
+ (hdev->le_features[0] & HCI_LE_PERIPHERAL_FEATURES))
status = 0x00;
else
status = ev->status;
@@ -6032,7 +6164,7 @@ static bool hci_get_cmd_complete(struct hci_dev *hdev, u16 opcode,
return true;
}
- /* Check if request ended in Command Status - no way to retreive
+ /* Check if request ended in Command Status - no way to retrieve
* any extra parameters in this case.
*/
if (hdr->evt == HCI_EV_CMD_STATUS)
diff --git a/net/bluetooth/hci_request.c b/net/bluetooth/hci_request.c
index fa9125b782f8..f15626607b2d 100644
--- a/net/bluetooth/hci_request.c
+++ b/net/bluetooth/hci_request.c
@@ -745,17 +745,17 @@ void hci_req_add_le_scan_disable(struct hci_request *req, bool rpa_le_conn)
}
}
-static void del_from_white_list(struct hci_request *req, bdaddr_t *bdaddr,
- u8 bdaddr_type)
+static void del_from_accept_list(struct hci_request *req, bdaddr_t *bdaddr,
+ u8 bdaddr_type)
{
- struct hci_cp_le_del_from_white_list cp;
+ struct hci_cp_le_del_from_accept_list cp;
cp.bdaddr_type = bdaddr_type;
bacpy(&cp.bdaddr, bdaddr);
- bt_dev_dbg(req->hdev, "Remove %pMR (0x%x) from whitelist", &cp.bdaddr,
+ bt_dev_dbg(req->hdev, "Remove %pMR (0x%x) from accept list", &cp.bdaddr,
cp.bdaddr_type);
- hci_req_add(req, HCI_OP_LE_DEL_FROM_WHITE_LIST, sizeof(cp), &cp);
+ hci_req_add(req, HCI_OP_LE_DEL_FROM_ACCEPT_LIST, sizeof(cp), &cp);
if (use_ll_privacy(req->hdev) &&
hci_dev_test_flag(req->hdev, HCI_ENABLE_LL_PRIVACY)) {
@@ -774,31 +774,31 @@ static void del_from_white_list(struct hci_request *req, bdaddr_t *bdaddr,
}
}
-/* Adds connection to white list if needed. On error, returns -1. */
-static int add_to_white_list(struct hci_request *req,
- struct hci_conn_params *params, u8 *num_entries,
- bool allow_rpa)
+/* Adds connection to accept list if needed. On error, returns -1. */
+static int add_to_accept_list(struct hci_request *req,
+ struct hci_conn_params *params, u8 *num_entries,
+ bool allow_rpa)
{
- struct hci_cp_le_add_to_white_list cp;
+ struct hci_cp_le_add_to_accept_list cp;
struct hci_dev *hdev = req->hdev;
- /* Already in white list */
- if (hci_bdaddr_list_lookup(&hdev->le_white_list, &params->addr,
+ /* Already in accept list */
+ if (hci_bdaddr_list_lookup(&hdev->le_accept_list, &params->addr,
params->addr_type))
return 0;
/* Select filter policy to accept all advertising */
- if (*num_entries >= hdev->le_white_list_size)
+ if (*num_entries >= hdev->le_accept_list_size)
return -1;
- /* White list can not be used with RPAs */
+ /* Accept list can not be used with RPAs */
if (!allow_rpa &&
!hci_dev_test_flag(hdev, HCI_ENABLE_LL_PRIVACY) &&
hci_find_irk_by_addr(hdev, &params->addr, params->addr_type)) {
return -1;
}
- /* During suspend, only wakeable devices can be in whitelist */
+ /* During suspend, only wakeable devices can be in accept list */
if (hdev->suspended && !hci_conn_test_flag(HCI_CONN_FLAG_REMOTE_WAKEUP,
params->current_flags))
return 0;
@@ -807,9 +807,9 @@ static int add_to_white_list(struct hci_request *req,
cp.bdaddr_type = params->addr_type;
bacpy(&cp.bdaddr, &params->addr);
- bt_dev_dbg(hdev, "Add %pMR (0x%x) to whitelist", &cp.bdaddr,
+ bt_dev_dbg(hdev, "Add %pMR (0x%x) to accept list", &cp.bdaddr,
cp.bdaddr_type);
- hci_req_add(req, HCI_OP_LE_ADD_TO_WHITE_LIST, sizeof(cp), &cp);
+ hci_req_add(req, HCI_OP_LE_ADD_TO_ACCEPT_LIST, sizeof(cp), &cp);
if (use_ll_privacy(hdev) &&
hci_dev_test_flag(hdev, HCI_ENABLE_LL_PRIVACY)) {
@@ -837,15 +837,15 @@ static int add_to_white_list(struct hci_request *req,
return 0;
}
-static u8 update_white_list(struct hci_request *req)
+static u8 update_accept_list(struct hci_request *req)
{
struct hci_dev *hdev = req->hdev;
struct hci_conn_params *params;
struct bdaddr_list *b;
u8 num_entries = 0;
bool pend_conn, pend_report;
- /* We allow whitelisting even with RPAs in suspend. In the worst case,
- * we won't be able to wake from devices that use the privacy1.2
+ /* We allow usage of accept list even with RPAs in suspend. In the worst
+ * case, we won't be able to wake from devices that use the privacy1.2
* features. Additionally, once we support privacy1.2 and IRK
* offloading, we can update this to also check for those conditions.
*/
@@ -855,13 +855,13 @@ static u8 update_white_list(struct hci_request *req)
hci_dev_test_flag(hdev, HCI_ENABLE_LL_PRIVACY))
allow_rpa = true;
- /* Go through the current white list programmed into the
+ /* Go through the current accept list programmed into the
* controller one by one and check if that address is still
* in the list of pending connections or list of devices to
* report. If not present in either list, then queue the
* command to remove it from the controller.
*/
- list_for_each_entry(b, &hdev->le_white_list, list) {
+ list_for_each_entry(b, &hdev->le_accept_list, list) {
pend_conn = hci_pend_le_action_lookup(&hdev->pend_le_conns,
&b->bdaddr,
b->bdaddr_type);
@@ -870,14 +870,14 @@ static u8 update_white_list(struct hci_request *req)
b->bdaddr_type);
/* If the device is not likely to connect or report,
- * remove it from the whitelist.
+ * remove it from the accept list.
*/
if (!pend_conn && !pend_report) {
- del_from_white_list(req, &b->bdaddr, b->bdaddr_type);
+ del_from_accept_list(req, &b->bdaddr, b->bdaddr_type);
continue;
}
- /* White list can not be used with RPAs */
+ /* Accept list can not be used with RPAs */
if (!allow_rpa &&
!hci_dev_test_flag(hdev, HCI_ENABLE_LL_PRIVACY) &&
hci_find_irk_by_addr(hdev, &b->bdaddr, b->bdaddr_type)) {
@@ -887,27 +887,27 @@ static u8 update_white_list(struct hci_request *req)
num_entries++;
}
- /* Since all no longer valid white list entries have been
+ /* Since all no longer valid accept list entries have been
* removed, walk through the list of pending connections
* and ensure that any new device gets programmed into
* the controller.
*
* If the list of the devices is larger than the list of
- * available white list entries in the controller, then
+ * available accept list entries in the controller, then
* just abort and return filer policy value to not use the
- * white list.
+ * accept list.
*/
list_for_each_entry(params, &hdev->pend_le_conns, action) {
- if (add_to_white_list(req, params, &num_entries, allow_rpa))
+ if (add_to_accept_list(req, params, &num_entries, allow_rpa))
return 0x00;
}
/* After adding all new pending connections, walk through
* the list of pending reports and also add these to the
- * white list if there is still space. Abort if space runs out.
+ * accept list if there is still space. Abort if space runs out.
*/
list_for_each_entry(params, &hdev->pend_le_reports, action) {
- if (add_to_white_list(req, params, &num_entries, allow_rpa))
+ if (add_to_accept_list(req, params, &num_entries, allow_rpa))
return 0x00;
}
@@ -921,7 +921,7 @@ static u8 update_white_list(struct hci_request *req)
hdev->interleave_scan_state != INTERLEAVE_SCAN_ALLOWLIST)
return 0x00;
- /* Select filter policy to use white list */
+ /* Select filter policy to use accept list */
return 0x01;
}
@@ -932,7 +932,7 @@ static bool scan_use_rpa(struct hci_dev *hdev)
static void hci_req_start_scan(struct hci_request *req, u8 type, u16 interval,
u16 window, u8 own_addr_type, u8 filter_policy,
- bool addr_resolv)
+ bool filter_dup, bool addr_resolv)
{
struct hci_dev *hdev = req->hdev;
@@ -997,7 +997,7 @@ static void hci_req_start_scan(struct hci_request *req, u8 type, u16 interval,
memset(&ext_enable_cp, 0, sizeof(ext_enable_cp));
ext_enable_cp.enable = LE_SCAN_ENABLE;
- ext_enable_cp.filter_dup = LE_SCAN_FILTER_DUP_ENABLE;
+ ext_enable_cp.filter_dup = filter_dup;
hci_req_add(req, HCI_OP_LE_SET_EXT_SCAN_ENABLE,
sizeof(ext_enable_cp), &ext_enable_cp);
@@ -1016,7 +1016,7 @@ static void hci_req_start_scan(struct hci_request *req, u8 type, u16 interval,
memset(&enable_cp, 0, sizeof(enable_cp));
enable_cp.enable = LE_SCAN_ENABLE;
- enable_cp.filter_dup = LE_SCAN_FILTER_DUP_ENABLE;
+ enable_cp.filter_dup = filter_dup;
hci_req_add(req, HCI_OP_LE_SET_SCAN_ENABLE, sizeof(enable_cp),
&enable_cp);
}
@@ -1053,6 +1053,8 @@ void hci_req_add_le_passive_scan(struct hci_request *req)
u8 own_addr_type;
u8 filter_policy;
u16 window, interval;
+ /* Default is to enable duplicates filter */
+ u8 filter_dup = LE_SCAN_FILTER_DUP_ENABLE;
/* Background scanning should run with address resolution */
bool addr_resolv = true;
@@ -1076,20 +1078,20 @@ void hci_req_add_le_passive_scan(struct hci_request *req)
return;
bt_dev_dbg(hdev, "interleave state %d", hdev->interleave_scan_state);
- /* Adding or removing entries from the white list must
+ /* Adding or removing entries from the accept list must
* happen before enabling scanning. The controller does
- * not allow white list modification while scanning.
+ * not allow accept list modification while scanning.
*/
- filter_policy = update_white_list(req);
+ filter_policy = update_accept_list(req);
/* When the controller is using random resolvable addresses and
* with that having LE privacy enabled, then controllers with
* Extended Scanner Filter Policies support can now enable support
* for handling directed advertising.
*
- * So instead of using filter polices 0x00 (no whitelist)
- * and 0x01 (whitelist enabled) use the new filter policies
- * 0x02 (no whitelist) and 0x03 (whitelist enabled).
+ * So instead of using filter polices 0x00 (no accept list)
+ * and 0x01 (accept list enabled) use the new filter policies
+ * 0x02 (no accept list) and 0x03 (accept list enabled).
*/
if (hci_dev_test_flag(hdev, HCI_PRIVACY) &&
(hdev->le_features[0] & HCI_LE_EXT_SCAN_POLICY))
@@ -1106,14 +1108,30 @@ void hci_req_add_le_passive_scan(struct hci_request *req)
} else if (hci_is_adv_monitoring(hdev)) {
window = hdev->le_scan_window_adv_monitor;
interval = hdev->le_scan_int_adv_monitor;
+
+ /* Disable duplicates filter when scanning for advertisement
+ * monitor for the following reasons.
+ *
+ * For HW pattern filtering (ex. MSFT), Realtek and Qualcomm
+ * controllers ignore RSSI_Sampling_Period when the duplicates
+ * filter is enabled.
+ *
+ * For SW pattern filtering, when we're not doing interleaved
+ * scanning, it is necessary to disable duplicates filter,
+ * otherwise hosts can only receive one advertisement and it's
+ * impossible to know if a peer is still in range.
+ */
+ filter_dup = LE_SCAN_FILTER_DUP_DISABLE;
} else {
window = hdev->le_scan_window;
interval = hdev->le_scan_interval;
}
- bt_dev_dbg(hdev, "LE passive scan with whitelist = %d", filter_policy);
+ bt_dev_dbg(hdev, "LE passive scan with accept list = %d",
+ filter_policy);
hci_req_start_scan(req, LE_SCAN_PASSIVE, interval, window,
- own_addr_type, filter_policy, addr_resolv);
+ own_addr_type, filter_policy, filter_dup,
+ addr_resolv);
}
static bool adv_instance_is_scannable(struct hci_dev *hdev, u8 instance)
@@ -1163,7 +1181,7 @@ static void hci_req_set_event_filter(struct hci_request *req)
/* Always clear event filter when starting */
hci_req_clear_event_filter(req);
- list_for_each_entry(b, &hdev->whitelist, list) {
+ list_for_each_entry(b, &hdev->accept_list, list) {
if (!hci_conn_test_flag(HCI_CONN_FLAG_REMOTE_WAKEUP,
b->current_flags))
continue;
@@ -1502,13 +1520,14 @@ static bool is_advertising_allowed(struct hci_dev *hdev, bool connectable)
if (hci_conn_num(hdev, LE_LINK) == 0)
return true;
- /* Check le_states if there is any connection in slave role. */
- if (hdev->conn_hash.le_num_slave > 0) {
- /* Slave connection state and non connectable mode bit 20. */
+ /* Check le_states if there is any connection in peripheral role. */
+ if (hdev->conn_hash.le_num_peripheral > 0) {
+ /* Peripheral connection state and non connectable mode bit 20.
+ */
if (!connectable && !(hdev->le_states[2] & 0x10))
return false;
- /* Slave connection state and connectable mode bit 38
+ /* Peripheral connection state and connectable mode bit 38
* and scannable bit 21.
*/
if (connectable && (!(hdev->le_states[4] & 0x40) ||
@@ -1516,13 +1535,13 @@ static bool is_advertising_allowed(struct hci_dev *hdev, bool connectable)
return false;
}
- /* Check le_states if there is any connection in master role. */
- if (hci_conn_num(hdev, LE_LINK) != hdev->conn_hash.le_num_slave) {
- /* Master connection state and non connectable mode bit 18. */
+ /* Check le_states if there is any connection in central role. */
+ if (hci_conn_num(hdev, LE_LINK) != hdev->conn_hash.le_num_peripheral) {
+ /* Central connection state and non connectable mode bit 18. */
if (!connectable && !(hdev->le_states[2] & 0x02))
return false;
- /* Master connection state and connectable mode bit 35 and
+ /* Central connection state and connectable mode bit 35 and
* scannable 19.
*/
if (connectable && (!(hdev->le_states[4] & 0x08) ||
@@ -1697,30 +1716,33 @@ void __hci_req_update_scan_rsp_data(struct hci_request *req, u8 instance)
return;
if (ext_adv_capable(hdev)) {
- struct hci_cp_le_set_ext_scan_rsp_data cp;
+ struct {
+ struct hci_cp_le_set_ext_scan_rsp_data cp;
+ u8 data[HCI_MAX_EXT_AD_LENGTH];
+ } pdu;
- memset(&cp, 0, sizeof(cp));
+ memset(&pdu, 0, sizeof(pdu));
if (instance)
len = create_instance_scan_rsp_data(hdev, instance,
- cp.data);
+ pdu.data);
else
- len = create_default_scan_rsp_data(hdev, cp.data);
+ len = create_default_scan_rsp_data(hdev, pdu.data);
if (hdev->scan_rsp_data_len == len &&
- !memcmp(cp.data, hdev->scan_rsp_data, len))
+ !memcmp(pdu.data, hdev->scan_rsp_data, len))
return;
- memcpy(hdev->scan_rsp_data, cp.data, sizeof(cp.data));
+ memcpy(hdev->scan_rsp_data, pdu.data, len);
hdev->scan_rsp_data_len = len;
- cp.handle = instance;
- cp.length = len;
- cp.operation = LE_SET_ADV_DATA_OP_COMPLETE;
- cp.frag_pref = LE_SET_ADV_DATA_NO_FRAG;
+ pdu.cp.handle = instance;
+ pdu.cp.length = len;
+ pdu.cp.operation = LE_SET_ADV_DATA_OP_COMPLETE;
+ pdu.cp.frag_pref = LE_SET_ADV_DATA_NO_FRAG;
- hci_req_add(req, HCI_OP_LE_SET_EXT_SCAN_RSP_DATA, sizeof(cp),
- &cp);
+ hci_req_add(req, HCI_OP_LE_SET_EXT_SCAN_RSP_DATA,
+ sizeof(pdu.cp) + len, &pdu.cp);
} else {
struct hci_cp_le_set_scan_rsp_data cp;
@@ -1843,26 +1865,30 @@ void __hci_req_update_adv_data(struct hci_request *req, u8 instance)
return;
if (ext_adv_capable(hdev)) {
- struct hci_cp_le_set_ext_adv_data cp;
+ struct {
+ struct hci_cp_le_set_ext_adv_data cp;
+ u8 data[HCI_MAX_EXT_AD_LENGTH];
+ } pdu;
- memset(&cp, 0, sizeof(cp));
+ memset(&pdu, 0, sizeof(pdu));
- len = create_instance_adv_data(hdev, instance, cp.data);
+ len = create_instance_adv_data(hdev, instance, pdu.data);
/* There's nothing to do if the data hasn't changed */
if (hdev->adv_data_len == len &&
- memcmp(cp.data, hdev->adv_data, len) == 0)
+ memcmp(pdu.data, hdev->adv_data, len) == 0)
return;
- memcpy(hdev->adv_data, cp.data, sizeof(cp.data));
+ memcpy(hdev->adv_data, pdu.data, len);
hdev->adv_data_len = len;
- cp.length = len;
- cp.handle = instance;
- cp.operation = LE_SET_ADV_DATA_OP_COMPLETE;
- cp.frag_pref = LE_SET_ADV_DATA_NO_FRAG;
+ pdu.cp.length = len;
+ pdu.cp.handle = instance;
+ pdu.cp.operation = LE_SET_ADV_DATA_OP_COMPLETE;
+ pdu.cp.frag_pref = LE_SET_ADV_DATA_NO_FRAG;
- hci_req_add(req, HCI_OP_LE_SET_EXT_ADV_DATA, sizeof(cp), &cp);
+ hci_req_add(req, HCI_OP_LE_SET_EXT_ADV_DATA,
+ sizeof(pdu.cp) + len, &pdu.cp);
} else {
struct hci_cp_le_set_adv_data cp;
@@ -2046,8 +2072,6 @@ int hci_get_random_address(struct hci_dev *hdev, bool require_privacy,
* current RPA has expired then generate a new one.
*/
if (use_rpa) {
- int to;
-
/* If Controller supports LL Privacy use own address type is
* 0x03
*/
@@ -2058,14 +2082,10 @@ int hci_get_random_address(struct hci_dev *hdev, bool require_privacy,
*own_addr_type = ADDR_LE_DEV_RANDOM;
if (adv_instance) {
- if (!adv_instance->rpa_expired &&
- !bacmp(&adv_instance->random_addr, &hdev->rpa))
+ if (adv_rpa_valid(adv_instance))
return 0;
-
- adv_instance->rpa_expired = false;
} else {
- if (!hci_dev_test_and_clear_flag(hdev, HCI_RPA_EXPIRED) &&
- !bacmp(&hdev->random_addr, &hdev->rpa))
+ if (rpa_valid(hdev))
return 0;
}
@@ -2077,14 +2097,6 @@ int hci_get_random_address(struct hci_dev *hdev, bool require_privacy,
bacpy(rand_addr, &hdev->rpa);
- to = msecs_to_jiffies(hdev->rpa_timeout * 1000);
- if (adv_instance)
- queue_delayed_work(hdev->workqueue,
- &adv_instance->rpa_expired_cb, to);
- else
- queue_delayed_work(hdev->workqueue,
- &hdev->rpa_expired, to);
-
return 0;
}
@@ -2127,6 +2139,30 @@ void __hci_req_clear_ext_adv_sets(struct hci_request *req)
hci_req_add(req, HCI_OP_LE_CLEAR_ADV_SETS, 0, NULL);
}
+static void set_random_addr(struct hci_request *req, bdaddr_t *rpa)
+{
+ struct hci_dev *hdev = req->hdev;
+
+ /* If we're advertising or initiating an LE connection we can't
+ * go ahead and change the random address at this time. This is
+ * because the eventual initiator address used for the
+ * subsequently created connection will be undefined (some
+ * controllers use the new address and others the one we had
+ * when the operation started).
+ *
+ * In this kind of scenario skip the update and let the random
+ * address be updated at the next cycle.
+ */
+ if (hci_dev_test_flag(hdev, HCI_LE_ADV) ||
+ hci_lookup_le_connect(hdev)) {
+ bt_dev_dbg(hdev, "Deferring random address update");
+ hci_dev_set_flag(hdev, HCI_RPA_EXPIRED);
+ return;
+ }
+
+ hci_req_add(req, HCI_OP_LE_SET_RANDOM_ADDR, 6, rpa);
+}
+
int __hci_req_setup_ext_adv_instance(struct hci_request *req, u8 instance)
{
struct hci_cp_le_set_ext_adv_params cp;
@@ -2229,6 +2265,13 @@ int __hci_req_setup_ext_adv_instance(struct hci_request *req, u8 instance)
} else {
if (!bacmp(&random_addr, &hdev->random_addr))
return 0;
+ /* Instance 0x00 doesn't have an adv_info, instead it
+ * uses hdev->random_addr to track its address so
+ * whenever it needs to be updated this also set the
+ * random address since hdev->random_addr is shared with
+ * scan state machine.
+ */
+ set_random_addr(req, &random_addr);
}
memset(&cp, 0, sizeof(cp));
@@ -2486,30 +2529,6 @@ void hci_req_clear_adv_instance(struct hci_dev *hdev, struct sock *sk,
false);
}
-static void set_random_addr(struct hci_request *req, bdaddr_t *rpa)
-{
- struct hci_dev *hdev = req->hdev;
-
- /* If we're advertising or initiating an LE connection we can't
- * go ahead and change the random address at this time. This is
- * because the eventual initiator address used for the
- * subsequently created connection will be undefined (some
- * controllers use the new address and others the one we had
- * when the operation started).
- *
- * In this kind of scenario skip the update and let the random
- * address be updated at the next cycle.
- */
- if (hci_dev_test_flag(hdev, HCI_LE_ADV) ||
- hci_lookup_le_connect(hdev)) {
- bt_dev_dbg(hdev, "Deferring random address update");
- hci_dev_set_flag(hdev, HCI_RPA_EXPIRED);
- return;
- }
-
- hci_req_add(req, HCI_OP_LE_SET_RANDOM_ADDR, 6, rpa);
-}
-
int hci_update_random_address(struct hci_request *req, bool require_privacy,
bool use_rpa, u8 *own_addr_type)
{
@@ -2521,8 +2540,6 @@ int hci_update_random_address(struct hci_request *req, bool require_privacy,
* the current RPA in use, then generate a new one.
*/
if (use_rpa) {
- int to;
-
/* If Controller supports LL Privacy use own address type is
* 0x03
*/
@@ -2532,8 +2549,7 @@ int hci_update_random_address(struct hci_request *req, bool require_privacy,
else
*own_addr_type = ADDR_LE_DEV_RANDOM;
- if (!hci_dev_test_and_clear_flag(hdev, HCI_RPA_EXPIRED) &&
- !bacmp(&hdev->random_addr, &hdev->rpa))
+ if (rpa_valid(hdev))
return 0;
err = smp_generate_rpa(hdev, hdev->irk, &hdev->rpa);
@@ -2544,9 +2560,6 @@ int hci_update_random_address(struct hci_request *req, bool require_privacy,
set_random_addr(req, &hdev->rpa);
- to = msecs_to_jiffies(hdev->rpa_timeout * 1000);
- queue_delayed_work(hdev->workqueue, &hdev->rpa_expired, to);
-
return 0;
}
@@ -2605,11 +2618,11 @@ int hci_update_random_address(struct hci_request *req, bool require_privacy,
return 0;
}
-static bool disconnected_whitelist_entries(struct hci_dev *hdev)
+static bool disconnected_accept_list_entries(struct hci_dev *hdev)
{
struct bdaddr_list *b;
- list_for_each_entry(b, &hdev->whitelist, list) {
+ list_for_each_entry(b, &hdev->accept_list, list) {
struct hci_conn *conn;
conn = hci_conn_hash_lookup_ba(hdev, ACL_LINK, &b->bdaddr);
@@ -2641,7 +2654,7 @@ void __hci_req_update_scan(struct hci_request *req)
return;
if (hci_dev_test_flag(hdev, HCI_CONNECTABLE) ||
- disconnected_whitelist_entries(hdev))
+ disconnected_accept_list_entries(hdev))
scan = SCAN_PAGE;
else
scan = SCAN_DISABLED;
@@ -3133,8 +3146,10 @@ static int active_scan(struct hci_request *req, unsigned long opt)
uint16_t interval = opt;
struct hci_dev *hdev = req->hdev;
u8 own_addr_type;
- /* White list is not used for discovery */
+ /* Accept list is not used for discovery */
u8 filter_policy = 0x00;
+ /* Default is to enable duplicates filter */
+ u8 filter_dup = LE_SCAN_FILTER_DUP_ENABLE;
/* Discovery doesn't require controller address resolution */
bool addr_resolv = false;
int err;
@@ -3159,9 +3174,26 @@ static int active_scan(struct hci_request *req, unsigned long opt)
if (err < 0)
own_addr_type = ADDR_LE_DEV_PUBLIC;
+ if (hci_is_adv_monitoring(hdev)) {
+ /* Duplicate filter should be disabled when some advertisement
+ * monitor is activated, otherwise AdvMon can only receive one
+ * advertisement for one peer(*) during active scanning, and
+ * might report loss to these peers.
+ *
+ * Note that different controllers have different meanings of
+ * |duplicate|. Some of them consider packets with the same
+ * address as duplicate, and others consider packets with the
+ * same address and the same RSSI as duplicate. Although in the
+ * latter case we don't need to disable duplicate filter, but
+ * it is common to have active scanning for a short period of
+ * time, the power impact should be neglectable.
+ */
+ filter_dup = LE_SCAN_FILTER_DUP_DISABLE;
+ }
+
hci_req_start_scan(req, LE_SCAN_ACTIVE, interval,
hdev->le_scan_window_discovery, own_addr_type,
- filter_policy, addr_resolv);
+ filter_policy, filter_dup, addr_resolv);
return 0;
}
diff --git a/net/bluetooth/hci_sock.c b/net/bluetooth/hci_sock.c
index eed0dd066e12..f1128c2134f0 100644
--- a/net/bluetooth/hci_sock.c
+++ b/net/bluetooth/hci_sock.c
@@ -59,6 +59,17 @@ struct hci_pinfo {
char comm[TASK_COMM_LEN];
};
+static struct hci_dev *hci_hdev_from_sock(struct sock *sk)
+{
+ struct hci_dev *hdev = hci_pi(sk)->hdev;
+
+ if (!hdev)
+ return ERR_PTR(-EBADFD);
+ if (hci_dev_test_flag(hdev, HCI_UNREGISTER))
+ return ERR_PTR(-EPIPE);
+ return hdev;
+}
+
void hci_sock_set_flag(struct sock *sk, int nr)
{
set_bit(nr, &hci_pi(sk)->flags);
@@ -759,19 +770,13 @@ void hci_sock_dev_event(struct hci_dev *hdev, int event)
if (event == HCI_DEV_UNREG) {
struct sock *sk;
- /* Detach sockets from device */
+ /* Wake up sockets using this dead device */
read_lock(&hci_sk_list.lock);
sk_for_each(sk, &hci_sk_list.head) {
- lock_sock(sk);
if (hci_pi(sk)->hdev == hdev) {
- hci_pi(sk)->hdev = NULL;
sk->sk_err = EPIPE;
- sk->sk_state = BT_OPEN;
sk->sk_state_change(sk);
-
- hci_dev_put(hdev);
}
- release_sock(sk);
}
read_unlock(&hci_sk_list.lock);
}
@@ -892,7 +897,7 @@ static int hci_sock_release(struct socket *sock)
return 0;
}
-static int hci_sock_blacklist_add(struct hci_dev *hdev, void __user *arg)
+static int hci_sock_reject_list_add(struct hci_dev *hdev, void __user *arg)
{
bdaddr_t bdaddr;
int err;
@@ -902,14 +907,14 @@ static int hci_sock_blacklist_add(struct hci_dev *hdev, void __user *arg)
hci_dev_lock(hdev);
- err = hci_bdaddr_list_add(&hdev->blacklist, &bdaddr, BDADDR_BREDR);
+ err = hci_bdaddr_list_add(&hdev->reject_list, &bdaddr, BDADDR_BREDR);
hci_dev_unlock(hdev);
return err;
}
-static int hci_sock_blacklist_del(struct hci_dev *hdev, void __user *arg)
+static int hci_sock_reject_list_del(struct hci_dev *hdev, void __user *arg)
{
bdaddr_t bdaddr;
int err;
@@ -919,7 +924,7 @@ static int hci_sock_blacklist_del(struct hci_dev *hdev, void __user *arg)
hci_dev_lock(hdev);
- err = hci_bdaddr_list_del(&hdev->blacklist, &bdaddr, BDADDR_BREDR);
+ err = hci_bdaddr_list_del(&hdev->reject_list, &bdaddr, BDADDR_BREDR);
hci_dev_unlock(hdev);
@@ -930,10 +935,10 @@ static int hci_sock_blacklist_del(struct hci_dev *hdev, void __user *arg)
static int hci_sock_bound_ioctl(struct sock *sk, unsigned int cmd,
unsigned long arg)
{
- struct hci_dev *hdev = hci_pi(sk)->hdev;
+ struct hci_dev *hdev = hci_hdev_from_sock(sk);
- if (!hdev)
- return -EBADFD;
+ if (IS_ERR(hdev))
+ return PTR_ERR(hdev);
if (hci_dev_test_flag(hdev, HCI_USER_CHANNEL))
return -EBUSY;
@@ -959,12 +964,12 @@ static int hci_sock_bound_ioctl(struct sock *sk, unsigned int cmd,
case HCIBLOCKADDR:
if (!capable(CAP_NET_ADMIN))
return -EPERM;
- return hci_sock_blacklist_add(hdev, (void __user *)arg);
+ return hci_sock_reject_list_add(hdev, (void __user *)arg);
case HCIUNBLOCKADDR:
if (!capable(CAP_NET_ADMIN))
return -EPERM;
- return hci_sock_blacklist_del(hdev, (void __user *)arg);
+ return hci_sock_reject_list_del(hdev, (void __user *)arg);
}
return -ENOIOCTLCMD;
@@ -1103,6 +1108,18 @@ static int hci_sock_bind(struct socket *sock, struct sockaddr *addr,
lock_sock(sk);
+ /* Allow detaching from dead device and attaching to alive device, if
+ * the caller wants to re-bind (instead of close) this socket in
+ * response to hci_sock_dev_event(HCI_DEV_UNREG) notification.
+ */
+ hdev = hci_pi(sk)->hdev;
+ if (hdev && hci_dev_test_flag(hdev, HCI_UNREGISTER)) {
+ hci_pi(sk)->hdev = NULL;
+ sk->sk_state = BT_OPEN;
+ hci_dev_put(hdev);
+ }
+ hdev = NULL;
+
if (sk->sk_state == BT_BOUND) {
err = -EALREADY;
goto done;
@@ -1130,7 +1147,7 @@ static int hci_sock_bind(struct socket *sock, struct sockaddr *addr,
if (!hci_sock_gen_cookie(sk)) {
/* In the case when a cookie has already been assigned,
* then there has been already an ioctl issued against
- * an unbound socket and with that triggerd an open
+ * an unbound socket and with that triggered an open
* notification. Send a close notification first to
* allow the state transition to bounded.
*/
@@ -1326,9 +1343,9 @@ static int hci_sock_bind(struct socket *sock, struct sockaddr *addr,
if (hci_pi(sk)->channel == HCI_CHANNEL_CONTROL) {
if (!hci_sock_gen_cookie(sk)) {
/* In the case when a cookie has already been
- * assigned, this socket will transtion from
+ * assigned, this socket will transition from
* a raw socket into a control socket. To
- * allow for a clean transtion, send the
+ * allow for a clean transition, send the
* close notification first.
*/
skb = create_monitor_ctrl_close(sk);
@@ -1379,9 +1396,9 @@ static int hci_sock_getname(struct socket *sock, struct sockaddr *addr,
lock_sock(sk);
- hdev = hci_pi(sk)->hdev;
- if (!hdev) {
- err = -EBADFD;
+ hdev = hci_hdev_from_sock(sk);
+ if (IS_ERR(hdev)) {
+ err = PTR_ERR(hdev);
goto done;
}
@@ -1743,9 +1760,9 @@ static int hci_sock_sendmsg(struct socket *sock, struct msghdr *msg,
goto done;
}
- hdev = hci_pi(sk)->hdev;
- if (!hdev) {
- err = -EBADFD;
+ hdev = hci_hdev_from_sock(sk);
+ if (IS_ERR(hdev)) {
+ err = PTR_ERR(hdev);
goto done;
}
diff --git a/net/bluetooth/hci_sysfs.c b/net/bluetooth/hci_sysfs.c
index 9874844a95a9..7827639ecf5c 100644
--- a/net/bluetooth/hci_sysfs.c
+++ b/net/bluetooth/hci_sysfs.c
@@ -83,7 +83,9 @@ void hci_conn_del_sysfs(struct hci_conn *conn)
static void bt_host_release(struct device *dev)
{
struct hci_dev *hdev = to_hci_dev(dev);
- kfree(hdev);
+
+ if (hci_dev_test_flag(hdev, HCI_UNREGISTER))
+ hci_release_dev(hdev);
module_put(THIS_MODULE);
}
diff --git a/net/bluetooth/hidp/core.c b/net/bluetooth/hidp/core.c
index 0db48c812662..80848dfc01db 100644
--- a/net/bluetooth/hidp/core.c
+++ b/net/bluetooth/hidp/core.c
@@ -508,7 +508,7 @@ static int hidp_process_data(struct hidp_session *session, struct sk_buff *skb,
unsigned char param)
{
int done_with_skb = 1;
- BT_DBG("session %p skb %p len %d param 0x%02x", session, skb, skb->len, param);
+ BT_DBG("session %p skb %p len %u param 0x%02x", session, skb, skb->len, param);
switch (param) {
case HIDP_DATA_RTYPE_INPUT:
@@ -553,7 +553,7 @@ static void hidp_recv_ctrl_frame(struct hidp_session *session,
unsigned char hdr, type, param;
int free_skb = 1;
- BT_DBG("session %p skb %p len %d", session, skb, skb->len);
+ BT_DBG("session %p skb %p len %u", session, skb, skb->len);
hdr = skb->data[0];
skb_pull(skb, 1);
@@ -589,7 +589,7 @@ static void hidp_recv_intr_frame(struct hidp_session *session,
{
unsigned char hdr;
- BT_DBG("session %p skb %p len %d", session, skb, skb->len);
+ BT_DBG("session %p skb %p len %u", session, skb, skb->len);
hdr = skb->data[0];
skb_pull(skb, 1);
@@ -794,7 +794,7 @@ static int hidp_setup_hid(struct hidp_session *session,
hid->dev.parent = &session->conn->hcon->dev;
hid->ll_driver = &hidp_hid_driver;
- /* True if device is blacklisted in drivers/hid/hid-quirks.c */
+ /* True if device is blocked in drivers/hid/hid-quirks.c */
if (hid_ignore(hid)) {
hid_destroy_device(session->hid);
session->hid = NULL;
diff --git a/net/bluetooth/l2cap_core.c b/net/bluetooth/l2cap_core.c
index b6a88b8256c7..77ba68209dbd 100644
--- a/net/bluetooth/l2cap_core.c
+++ b/net/bluetooth/l2cap_core.c
@@ -1691,7 +1691,7 @@ static void l2cap_le_conn_ready(struct l2cap_conn *conn)
if (hcon->out)
smp_conn_security(hcon, hcon->pending_sec_level);
- /* For LE slave connections, make sure the connection interval
+ /* For LE peripheral connections, make sure the connection interval
* is in the range of the minimum and maximum interval that has
* been configured for this connection. If not, then trigger
* the connection update procedure.
@@ -4237,7 +4237,7 @@ static int l2cap_connect_req(struct l2cap_conn *conn,
hci_dev_lock(hdev);
if (hci_dev_test_flag(hdev, HCI_MGMT) &&
!test_and_set_bit(HCI_CONN_MGMT_CONNECTED, &hcon->flags))
- mgmt_device_connected(hdev, hcon, 0, NULL, 0);
+ mgmt_device_connected(hdev, hcon, NULL, 0);
hci_dev_unlock(hdev);
l2cap_connect(conn, cmd, data, L2CAP_CONN_RSP, 0);
@@ -6066,7 +6066,7 @@ static inline int l2cap_ecred_conn_rsp(struct l2cap_conn *conn,
struct l2cap_ecred_conn_rsp *rsp = (void *) data;
struct hci_conn *hcon = conn->hcon;
u16 mtu, mps, credits, result;
- struct l2cap_chan *chan;
+ struct l2cap_chan *chan, *tmp;
int err = 0, sec_level;
int i = 0;
@@ -6085,7 +6085,7 @@ static inline int l2cap_ecred_conn_rsp(struct l2cap_conn *conn,
cmd_len -= sizeof(*rsp);
- list_for_each_entry(chan, &conn->chan_l, list) {
+ list_for_each_entry_safe(chan, tmp, &conn->chan_l, list) {
u16 dcid;
if (chan->ident != cmd->ident ||
@@ -6248,7 +6248,7 @@ static inline int l2cap_ecred_reconf_rsp(struct l2cap_conn *conn,
struct l2cap_cmd_hdr *cmd, u16 cmd_len,
u8 *data)
{
- struct l2cap_chan *chan;
+ struct l2cap_chan *chan, *tmp;
struct l2cap_ecred_conn_rsp *rsp = (void *) data;
u16 result;
@@ -6262,7 +6262,7 @@ static inline int l2cap_ecred_reconf_rsp(struct l2cap_conn *conn,
if (!result)
return 0;
- list_for_each_entry(chan, &conn->chan_l, list) {
+ list_for_each_entry_safe(chan, tmp, &conn->chan_l, list) {
if (chan->ident != cmd->ident)
continue;
@@ -7662,7 +7662,7 @@ static void l2cap_recv_frame(struct l2cap_conn *conn, struct sk_buff *skb)
* at least ensure that we ignore incoming data from them.
*/
if (hcon->type == LE_LINK &&
- hci_bdaddr_list_lookup(&hcon->hdev->blacklist, &hcon->dst,
+ hci_bdaddr_list_lookup(&hcon->hdev->reject_list, &hcon->dst,
bdaddr_dst_type(hcon))) {
kfree_skb(skb);
return;
@@ -8119,7 +8119,7 @@ static void l2cap_connect_cfm(struct hci_conn *hcon, u8 status)
dst_type = bdaddr_dst_type(hcon);
/* If device is blocked, do not create channels for it */
- if (hci_bdaddr_list_lookup(&hdev->blacklist, &hcon->dst, dst_type))
+ if (hci_bdaddr_list_lookup(&hdev->reject_list, &hcon->dst, dst_type))
return;
/* Find fixed channels and notify them of the new connection. We
diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c
index f9be7f9084d6..cea01e275f1e 100644
--- a/net/bluetooth/mgmt.c
+++ b/net/bluetooth/mgmt.c
@@ -40,7 +40,7 @@
#include "msft.h"
#define MGMT_VERSION 1
-#define MGMT_REVISION 20
+#define MGMT_REVISION 21
static const u16 mgmt_commands[] = {
MGMT_OP_READ_INDEX_LIST,
@@ -252,12 +252,15 @@ static const u8 mgmt_status_table[] = {
MGMT_STATUS_TIMEOUT, /* Instant Passed */
MGMT_STATUS_NOT_SUPPORTED, /* Pairing Not Supported */
MGMT_STATUS_FAILED, /* Transaction Collision */
+ MGMT_STATUS_FAILED, /* Reserved for future use */
MGMT_STATUS_INVALID_PARAMS, /* Unacceptable Parameter */
MGMT_STATUS_REJECTED, /* QoS Rejected */
MGMT_STATUS_NOT_SUPPORTED, /* Classification Not Supported */
MGMT_STATUS_REJECTED, /* Insufficient Security */
MGMT_STATUS_INVALID_PARAMS, /* Parameter Out Of Range */
+ MGMT_STATUS_FAILED, /* Reserved for future use */
MGMT_STATUS_BUSY, /* Role Switch Pending */
+ MGMT_STATUS_FAILED, /* Reserved for future use */
MGMT_STATUS_FAILED, /* Slot Violation */
MGMT_STATUS_FAILED, /* Role Switch Failed */
MGMT_STATUS_INVALID_PARAMS, /* EIR Too Large */
@@ -2956,7 +2959,7 @@ static int pair_device(struct sock *sk, struct hci_dev *hdev, void *data,
/* When pairing a new device, it is expected to remember
* this device for future connections. Adding the connection
* parameter information ahead of time allows tracking
- * of the slave preferred values and will speed up any
+ * of the peripheral preferred values and will speed up any
* further connection establishment.
*
* If connection parameters already exist, then they
@@ -3341,7 +3344,7 @@ static int set_local_name(struct sock *sk, struct hci_dev *hdev, void *data,
}
/* The name is stored in the scan response data and so
- * no need to udpate the advertising data here.
+ * no need to update the advertising data here.
*/
if (lmp_le_capable(hdev) && hci_dev_test_flag(hdev, HCI_ADVERTISING))
__hci_req_update_scan_rsp_data(&req, hdev->cur_adv_instance);
@@ -4058,8 +4061,10 @@ static int get_device_flags(struct sock *sk, struct hci_dev *hdev, void *data,
hci_dev_lock(hdev);
+ memset(&rp, 0, sizeof(rp));
+
if (cp->addr.type == BDADDR_BREDR) {
- br_params = hci_bdaddr_list_lookup_with_flags(&hdev->whitelist,
+ br_params = hci_bdaddr_list_lookup_with_flags(&hdev->accept_list,
&cp->addr.bdaddr,
cp->addr.type);
if (!br_params)
@@ -4127,7 +4132,7 @@ static int set_device_flags(struct sock *sk, struct hci_dev *hdev, void *data,
hci_dev_lock(hdev);
if (cp->addr.type == BDADDR_BREDR) {
- br_params = hci_bdaddr_list_lookup_with_flags(&hdev->whitelist,
+ br_params = hci_bdaddr_list_lookup_with_flags(&hdev->accept_list,
&cp->addr.bdaddr,
cp->addr.type);
@@ -4274,7 +4279,7 @@ int mgmt_add_adv_patterns_monitor_complete(struct hci_dev *hdev, u8 status)
done:
hci_dev_unlock(hdev);
- bt_dev_dbg(hdev, "add monitor %d complete, status %d",
+ bt_dev_dbg(hdev, "add monitor %d complete, status %u",
rp.monitor_handle, status);
return err;
@@ -4499,7 +4504,7 @@ int mgmt_remove_adv_monitor_complete(struct hci_dev *hdev, u8 status)
done:
hci_dev_unlock(hdev);
- bt_dev_dbg(hdev, "remove monitor %d complete, status %d",
+ bt_dev_dbg(hdev, "remove monitor %d complete, status %u",
rp.monitor_handle, status);
return err;
@@ -4829,7 +4834,7 @@ void mgmt_start_discovery_complete(struct hci_dev *hdev, u8 status)
{
struct mgmt_pending_cmd *cmd;
- bt_dev_dbg(hdev, "status %d", status);
+ bt_dev_dbg(hdev, "status %u", status);
hci_dev_lock(hdev);
@@ -5085,7 +5090,7 @@ void mgmt_stop_discovery_complete(struct hci_dev *hdev, u8 status)
{
struct mgmt_pending_cmd *cmd;
- bt_dev_dbg(hdev, "status %d", status);
+ bt_dev_dbg(hdev, "status %u", status);
hci_dev_lock(hdev);
@@ -5204,7 +5209,7 @@ static int block_device(struct sock *sk, struct hci_dev *hdev, void *data,
hci_dev_lock(hdev);
- err = hci_bdaddr_list_add(&hdev->blacklist, &cp->addr.bdaddr,
+ err = hci_bdaddr_list_add(&hdev->reject_list, &cp->addr.bdaddr,
cp->addr.type);
if (err < 0) {
status = MGMT_STATUS_FAILED;
@@ -5240,7 +5245,7 @@ static int unblock_device(struct sock *sk, struct hci_dev *hdev, void *data,
hci_dev_lock(hdev);
- err = hci_bdaddr_list_del(&hdev->blacklist, &cp->addr.bdaddr,
+ err = hci_bdaddr_list_del(&hdev->reject_list, &cp->addr.bdaddr,
cp->addr.type);
if (err < 0) {
status = MGMT_STATUS_INVALID_PARAMS;
@@ -5298,7 +5303,7 @@ static int set_device_id(struct sock *sk, struct hci_dev *hdev, void *data,
static void enable_advertising_instance(struct hci_dev *hdev, u8 status,
u16 opcode)
{
- bt_dev_dbg(hdev, "status %d", status);
+ bt_dev_dbg(hdev, "status %u", status);
}
static void set_advertising_complete(struct hci_dev *hdev, u8 status,
@@ -6164,7 +6169,7 @@ static int load_irks(struct sock *sk, struct hci_dev *hdev, void *cp_data,
static bool ltk_is_valid(struct mgmt_ltk_info *key)
{
- if (key->master != 0x00 && key->master != 0x01)
+ if (key->initiator != 0x00 && key->initiator != 0x01)
return false;
switch (key->addr.type) {
@@ -6242,11 +6247,11 @@ static int load_long_term_keys(struct sock *sk, struct hci_dev *hdev,
switch (key->type) {
case MGMT_LTK_UNAUTHENTICATED:
authenticated = 0x00;
- type = key->master ? SMP_LTK : SMP_LTK_SLAVE;
+ type = key->initiator ? SMP_LTK : SMP_LTK_RESPONDER;
break;
case MGMT_LTK_AUTHENTICATED:
authenticated = 0x01;
- type = key->master ? SMP_LTK : SMP_LTK_SLAVE;
+ type = key->initiator ? SMP_LTK : SMP_LTK_RESPONDER;
break;
case MGMT_LTK_P256_UNAUTH:
authenticated = 0x00;
@@ -6342,7 +6347,7 @@ static void conn_info_refresh_complete(struct hci_dev *hdev, u8 hci_status,
handle = __le16_to_cpu(cp->handle);
conn = hci_conn_hash_lookup_handle(hdev, handle);
if (!conn) {
- bt_dev_err(hdev, "unknown handle (%d) in conn_info response",
+ bt_dev_err(hdev, "unknown handle (%u) in conn_info response",
handle);
goto unlock;
}
@@ -6731,7 +6736,7 @@ static int add_device(struct sock *sk, struct hci_dev *hdev,
goto unlock;
}
- err = hci_bdaddr_list_add_with_flags(&hdev->whitelist,
+ err = hci_bdaddr_list_add_with_flags(&hdev->accept_list,
&cp->addr.bdaddr,
cp->addr.type, 0);
if (err)
@@ -6829,7 +6834,7 @@ static int remove_device(struct sock *sk, struct hci_dev *hdev,
}
if (cp->addr.type == BDADDR_BREDR) {
- err = hci_bdaddr_list_del(&hdev->whitelist,
+ err = hci_bdaddr_list_del(&hdev->accept_list,
&cp->addr.bdaddr,
cp->addr.type);
if (err) {
@@ -6900,7 +6905,7 @@ static int remove_device(struct sock *sk, struct hci_dev *hdev,
goto unlock;
}
- list_for_each_entry_safe(b, btmp, &hdev->whitelist, list) {
+ list_for_each_entry_safe(b, btmp, &hdev->accept_list, list) {
device_removed(sk, hdev, &b->bdaddr, b->bdaddr_type);
list_del(&b->list);
kfree(b);
@@ -7199,7 +7204,7 @@ static void read_local_oob_ext_data_complete(struct hci_dev *hdev, u8 status,
if (!mgmt_rp)
goto done;
- if (status)
+ if (eir_len == 0)
goto send_rsp;
eir_len = eir_append_data(mgmt_rp->eir, 0, EIR_CLASS_OF_DEV,
@@ -7585,6 +7590,9 @@ static bool tlv_data_is_valid(struct hci_dev *hdev, u32 adv_flags, u8 *data,
for (i = 0, cur_len = 0; i < len; i += (cur_len + 1)) {
cur_len = data[i];
+ if (!cur_len)
+ continue;
+
if (data[i + 1] == EIR_FLAGS &&
(!is_adv_data || flags_managed(adv_flags)))
return false;
@@ -7646,7 +7654,7 @@ static void add_advertising_complete(struct hci_dev *hdev, u8 status,
struct adv_info *adv_instance, *n;
u8 instance;
- bt_dev_dbg(hdev, "status %d", status);
+ bt_dev_dbg(hdev, "status %u", status);
hci_dev_lock(hdev);
@@ -7717,7 +7725,7 @@ static int add_advertising(struct sock *sk, struct hci_dev *hdev,
* advertising.
*/
if (hci_dev_test_flag(hdev, HCI_ENABLE_LL_PRIVACY))
- return mgmt_cmd_status(sk, hdev->id, MGMT_OP_SET_ADVERTISING,
+ return mgmt_cmd_status(sk, hdev->id, MGMT_OP_ADD_ADVERTISING,
MGMT_STATUS_NOT_SUPPORTED);
if (cp->instance < 1 || cp->instance > hdev->le_num_of_adv_sets)
@@ -8176,7 +8184,7 @@ static void remove_advertising_complete(struct hci_dev *hdev, u8 status,
struct mgmt_cp_remove_advertising *cp;
struct mgmt_rp_remove_advertising rp;
- bt_dev_dbg(hdev, "status %d", status);
+ bt_dev_dbg(hdev, "status %u", status);
hci_dev_lock(hdev);
@@ -8641,7 +8649,7 @@ static u8 mgmt_ltk_type(struct smp_ltk *ltk)
{
switch (ltk->type) {
case SMP_LTK:
- case SMP_LTK_SLAVE:
+ case SMP_LTK_RESPONDER:
if (ltk->authenticated)
return MGMT_LTK_AUTHENTICATED;
return MGMT_LTK_UNAUTHENTICATED;
@@ -8687,7 +8695,7 @@ void mgmt_new_ltk(struct hci_dev *hdev, struct smp_ltk *key, bool persistent)
ev.key.rand = key->rand;
if (key->type == SMP_LTK)
- ev.key.master = 1;
+ ev.key.initiator = 1;
/* Make sure we copy only the significant bytes based on the
* encryption key size, and set the rest of the value to zeroes.
@@ -8767,15 +8775,19 @@ void mgmt_new_conn_param(struct hci_dev *hdev, bdaddr_t *bdaddr,
}
void mgmt_device_connected(struct hci_dev *hdev, struct hci_conn *conn,
- u32 flags, u8 *name, u8 name_len)
+ u8 *name, u8 name_len)
{
char buf[512];
struct mgmt_ev_device_connected *ev = (void *) buf;
u16 eir_len = 0;
+ u32 flags = 0;
bacpy(&ev->addr.bdaddr, &conn->dst);
ev->addr.type = link_to_bdaddr(conn->type, conn->dst_type);
+ if (conn->out)
+ flags |= MGMT_DEV_FOUND_INITIATED_CONN;
+
ev->flags = __cpu_to_le32(flags);
/* We must ensure that the EIR Data fields are ordered and
diff --git a/net/bluetooth/mgmt_config.c b/net/bluetooth/mgmt_config.c
index 1deb0ca7a929..6ef701c27da4 100644
--- a/net/bluetooth/mgmt_config.c
+++ b/net/bluetooth/mgmt_config.c
@@ -146,7 +146,7 @@ int set_def_system_config(struct sock *sk, struct hci_dev *hdev, void *data,
const u16 type = le16_to_cpu(TO_TLV(buffer)->type);
if (buffer_left < exp_len) {
- bt_dev_warn(hdev, "invalid len left %d, exp >= %d",
+ bt_dev_warn(hdev, "invalid len left %u, exp >= %u",
buffer_left, exp_len);
return mgmt_cmd_status(sk, hdev->id,
@@ -198,7 +198,7 @@ int set_def_system_config(struct sock *sk, struct hci_dev *hdev, void *data,
}
if (exp_type_len && len != exp_type_len) {
- bt_dev_warn(hdev, "invalid length %d, exp %zu for type %d",
+ bt_dev_warn(hdev, "invalid length %d, exp %zu for type %u",
len, exp_type_len, type);
return mgmt_cmd_status(sk, hdev->id,
diff --git a/net/bluetooth/msft.c b/net/bluetooth/msft.c
index e28f15439ce4..b4bfae41e8a5 100644
--- a/net/bluetooth/msft.c
+++ b/net/bluetooth/msft.c
@@ -34,12 +34,12 @@ struct msft_le_monitor_advertisement_pattern {
__u8 length;
__u8 data_type;
__u8 start_byte;
- __u8 pattern[0];
+ __u8 pattern[];
};
struct msft_le_monitor_advertisement_pattern_data {
__u8 count;
- __u8 data[0];
+ __u8 data[];
};
struct msft_cp_le_monitor_advertisement {
@@ -49,7 +49,7 @@ struct msft_cp_le_monitor_advertisement {
__u8 rssi_low_interval;
__u8 rssi_sampling_period;
__u8 cond_type;
- __u8 data[0];
+ __u8 data[];
} __packed;
struct msft_rp_le_monitor_advertisement {
@@ -311,7 +311,7 @@ static void msft_le_monitor_advertisement_cb(struct hci_dev *hdev,
monitor = idr_find(&hdev->adv_monitors_idr, msft->pending_add_handle);
if (!monitor) {
- bt_dev_err(hdev, "msft add advmon: monitor %d is not found!",
+ bt_dev_err(hdev, "msft add advmon: monitor %u is not found!",
msft->pending_add_handle);
status = HCI_ERROR_UNSPECIFIED;
goto unlock;
diff --git a/net/bluetooth/rfcomm/sock.c b/net/bluetooth/rfcomm/sock.c
index ae6f80730561..2c95bb58f901 100644
--- a/net/bluetooth/rfcomm/sock.c
+++ b/net/bluetooth/rfcomm/sock.c
@@ -70,7 +70,7 @@ static void rfcomm_sk_state_change(struct rfcomm_dlc *d, int err)
BT_DBG("dlc %p state %ld err %d", d, d->state, err);
- spin_lock_bh(&sk->sk_lock.slock);
+ lock_sock(sk);
if (err)
sk->sk_err = err;
@@ -91,7 +91,7 @@ static void rfcomm_sk_state_change(struct rfcomm_dlc *d, int err)
sk->sk_state_change(sk);
}
- spin_unlock_bh(&sk->sk_lock.slock);
+ release_sock(sk);
if (parent && sock_flag(sk, SOCK_ZAPPED)) {
/* We have to drop DLC lock here, otherwise
@@ -974,7 +974,7 @@ int rfcomm_connect_ind(struct rfcomm_session *s, u8 channel, struct rfcomm_dlc *
if (!parent)
return 0;
- bh_lock_sock(parent);
+ lock_sock(parent);
/* Check for backlog size */
if (sk_acceptq_is_full(parent)) {
@@ -1001,7 +1001,7 @@ int rfcomm_connect_ind(struct rfcomm_session *s, u8 channel, struct rfcomm_dlc *
result = 1;
done:
- bh_unlock_sock(parent);
+ release_sock(parent);
if (test_bit(BT_SK_DEFER_SETUP, &bt_sk(parent)->flags))
parent->sk_state_change(parent);
diff --git a/net/bluetooth/rfcomm/tty.c b/net/bluetooth/rfcomm/tty.c
index a58584949a95..ebd78fdbd6e8 100644
--- a/net/bluetooth/rfcomm/tty.c
+++ b/net/bluetooth/rfcomm/tty.c
@@ -198,20 +198,22 @@ static void rfcomm_reparent_device(struct rfcomm_dev *dev)
hci_dev_put(hdev);
}
-static ssize_t show_address(struct device *tty_dev, struct device_attribute *attr, char *buf)
+static ssize_t address_show(struct device *tty_dev,
+ struct device_attribute *attr, char *buf)
{
struct rfcomm_dev *dev = dev_get_drvdata(tty_dev);
return sprintf(buf, "%pMR\n", &dev->dst);
}
-static ssize_t show_channel(struct device *tty_dev, struct device_attribute *attr, char *buf)
+static ssize_t channel_show(struct device *tty_dev,
+ struct device_attribute *attr, char *buf)
{
struct rfcomm_dev *dev = dev_get_drvdata(tty_dev);
return sprintf(buf, "%d\n", dev->channel);
}
-static DEVICE_ATTR(address, 0444, show_address, NULL);
-static DEVICE_ATTR(channel, 0444, show_channel, NULL);
+static DEVICE_ATTR_RO(address);
+static DEVICE_ATTR_RO(channel);
static struct rfcomm_dev *__rfcomm_dev_add(struct rfcomm_dev_req *req,
struct rfcomm_dlc *dlc)
@@ -807,7 +809,7 @@ static int rfcomm_tty_write(struct tty_struct *tty, const unsigned char *buf, in
return sent;
}
-static int rfcomm_tty_write_room(struct tty_struct *tty)
+static unsigned int rfcomm_tty_write_room(struct tty_struct *tty)
{
struct rfcomm_dev *dev = (struct rfcomm_dev *) tty->driver_data;
int room = 0;
@@ -1010,7 +1012,7 @@ static void rfcomm_tty_unthrottle(struct tty_struct *tty)
rfcomm_dlc_unthrottle(dev->dlc);
}
-static int rfcomm_tty_chars_in_buffer(struct tty_struct *tty)
+static unsigned int rfcomm_tty_chars_in_buffer(struct tty_struct *tty)
{
struct rfcomm_dev *dev = (struct rfcomm_dev *) tty->driver_data;
@@ -1125,9 +1127,10 @@ int __init rfcomm_init_ttys(void)
{
int error;
- rfcomm_tty_driver = alloc_tty_driver(RFCOMM_TTY_PORTS);
- if (!rfcomm_tty_driver)
- return -ENOMEM;
+ rfcomm_tty_driver = tty_alloc_driver(RFCOMM_TTY_PORTS,
+ TTY_DRIVER_REAL_RAW | TTY_DRIVER_DYNAMIC_DEV);
+ if (IS_ERR(rfcomm_tty_driver))
+ return PTR_ERR(rfcomm_tty_driver);
rfcomm_tty_driver->driver_name = "rfcomm";
rfcomm_tty_driver->name = "rfcomm";
@@ -1135,7 +1138,6 @@ int __init rfcomm_init_ttys(void)
rfcomm_tty_driver->minor_start = RFCOMM_TTY_MINOR;
rfcomm_tty_driver->type = TTY_DRIVER_TYPE_SERIAL;
rfcomm_tty_driver->subtype = SERIAL_TYPE_NORMAL;
- rfcomm_tty_driver->flags = TTY_DRIVER_REAL_RAW | TTY_DRIVER_DYNAMIC_DEV;
rfcomm_tty_driver->init_termios = tty_std_termios;
rfcomm_tty_driver->init_termios.c_cflag = B9600 | CS8 | CREAD | HUPCL;
rfcomm_tty_driver->init_termios.c_lflag &= ~ICANON;
@@ -1144,7 +1146,7 @@ int __init rfcomm_init_ttys(void)
error = tty_register_driver(rfcomm_tty_driver);
if (error) {
BT_ERR("Can't register RFCOMM TTY driver");
- put_tty_driver(rfcomm_tty_driver);
+ tty_driver_kref_put(rfcomm_tty_driver);
return error;
}
@@ -1156,5 +1158,5 @@ int __init rfcomm_init_ttys(void)
void rfcomm_cleanup_ttys(void)
{
tty_unregister_driver(rfcomm_tty_driver);
- put_tty_driver(rfcomm_tty_driver);
+ tty_driver_kref_put(rfcomm_tty_driver);
}
diff --git a/net/bluetooth/sco.c b/net/bluetooth/sco.c
index 3bd41563f118..98a881586512 100644
--- a/net/bluetooth/sco.c
+++ b/net/bluetooth/sco.c
@@ -48,6 +48,8 @@ struct sco_conn {
spinlock_t lock;
struct sock *sk;
+ struct delayed_work timeout_work;
+
unsigned int mtu;
};
@@ -74,31 +76,47 @@ struct sco_pinfo {
#define SCO_CONN_TIMEOUT (HZ * 40)
#define SCO_DISCONN_TIMEOUT (HZ * 2)
-static void sco_sock_timeout(struct timer_list *t)
+static void sco_sock_timeout(struct work_struct *work)
{
- struct sock *sk = from_timer(sk, t, sk_timer);
+ struct sco_conn *conn = container_of(work, struct sco_conn,
+ timeout_work.work);
+ struct sock *sk;
+
+ sco_conn_lock(conn);
+ sk = conn->sk;
+ if (sk)
+ sock_hold(sk);
+ sco_conn_unlock(conn);
+
+ if (!sk)
+ return;
BT_DBG("sock %p state %d", sk, sk->sk_state);
- bh_lock_sock(sk);
+ lock_sock(sk);
sk->sk_err = ETIMEDOUT;
sk->sk_state_change(sk);
- bh_unlock_sock(sk);
-
- sco_sock_kill(sk);
+ release_sock(sk);
sock_put(sk);
}
static void sco_sock_set_timer(struct sock *sk, long timeout)
{
+ if (!sco_pi(sk)->conn)
+ return;
+
BT_DBG("sock %p state %d timeout %ld", sk, sk->sk_state, timeout);
- sk_reset_timer(sk, &sk->sk_timer, jiffies + timeout);
+ cancel_delayed_work(&sco_pi(sk)->conn->timeout_work);
+ schedule_delayed_work(&sco_pi(sk)->conn->timeout_work, timeout);
}
static void sco_sock_clear_timer(struct sock *sk)
{
+ if (!sco_pi(sk)->conn)
+ return;
+
BT_DBG("sock %p state %d", sk, sk->sk_state);
- sk_stop_timer(sk, &sk->sk_timer);
+ cancel_delayed_work(&sco_pi(sk)->conn->timeout_work);
}
/* ---- SCO connections ---- */
@@ -173,12 +191,14 @@ static void sco_conn_del(struct hci_conn *hcon, int err)
if (sk) {
sock_hold(sk);
- bh_lock_sock(sk);
+ lock_sock(sk);
sco_sock_clear_timer(sk);
sco_chan_del(sk, err);
- bh_unlock_sock(sk);
- sco_sock_kill(sk);
+ release_sock(sk);
sock_put(sk);
+
+ /* Ensure no more work items will run before freeing conn. */
+ cancel_delayed_work_sync(&conn->timeout_work);
}
hcon->sco_data = NULL;
@@ -193,6 +213,8 @@ static void __sco_chan_add(struct sco_conn *conn, struct sock *sk,
sco_pi(sk)->conn = conn;
conn->sk = sk;
+ INIT_DELAYED_WORK(&conn->timeout_work, sco_sock_timeout);
+
if (parent)
bt_accept_enqueue(parent, sk, true);
}
@@ -212,44 +234,32 @@ static int sco_chan_add(struct sco_conn *conn, struct sock *sk,
return err;
}
-static int sco_connect(struct sock *sk)
+static int sco_connect(struct hci_dev *hdev, struct sock *sk)
{
struct sco_conn *conn;
struct hci_conn *hcon;
- struct hci_dev *hdev;
int err, type;
BT_DBG("%pMR -> %pMR", &sco_pi(sk)->src, &sco_pi(sk)->dst);
- hdev = hci_get_route(&sco_pi(sk)->dst, &sco_pi(sk)->src, BDADDR_BREDR);
- if (!hdev)
- return -EHOSTUNREACH;
-
- hci_dev_lock(hdev);
-
if (lmp_esco_capable(hdev) && !disable_esco)
type = ESCO_LINK;
else
type = SCO_LINK;
if (sco_pi(sk)->setting == BT_VOICE_TRANSPARENT &&
- (!lmp_transp_capable(hdev) || !lmp_esco_capable(hdev))) {
- err = -EOPNOTSUPP;
- goto done;
- }
+ (!lmp_transp_capable(hdev) || !lmp_esco_capable(hdev)))
+ return -EOPNOTSUPP;
hcon = hci_connect_sco(hdev, type, &sco_pi(sk)->dst,
sco_pi(sk)->setting);
- if (IS_ERR(hcon)) {
- err = PTR_ERR(hcon);
- goto done;
- }
+ if (IS_ERR(hcon))
+ return PTR_ERR(hcon);
conn = sco_conn_add(hcon);
if (!conn) {
hci_conn_drop(hcon);
- err = -ENOMEM;
- goto done;
+ return -ENOMEM;
}
/* Update source addr of the socket */
@@ -257,7 +267,7 @@ static int sco_connect(struct sock *sk)
err = sco_chan_add(conn, sk, NULL);
if (err)
- goto done;
+ return err;
if (hcon->state == BT_CONNECTED) {
sco_sock_clear_timer(sk);
@@ -267,9 +277,6 @@ static int sco_connect(struct sock *sk)
sco_sock_set_timer(sk, sk->sk_sndtimeo);
}
-done:
- hci_dev_unlock(hdev);
- hci_dev_put(hdev);
return err;
}
@@ -310,7 +317,7 @@ static void sco_recv_frame(struct sco_conn *conn, struct sk_buff *skb)
if (!sk)
goto drop;
- BT_DBG("sk %p len %d", sk, skb->len);
+ BT_DBG("sk %p len %u", sk, skb->len);
if (sk->sk_state != BT_CONNECTED)
goto drop;
@@ -394,8 +401,7 @@ static void sco_sock_cleanup_listen(struct sock *parent)
*/
static void sco_sock_kill(struct sock *sk)
{
- if (!sock_flag(sk, SOCK_ZAPPED) || sk->sk_socket ||
- sock_flag(sk, SOCK_DEAD))
+ if (!sock_flag(sk, SOCK_ZAPPED) || sk->sk_socket)
return;
BT_DBG("sk %p state %d", sk, sk->sk_state);
@@ -443,11 +449,10 @@ static void __sco_sock_close(struct sock *sk)
/* Must be called on unlocked socket. */
static void sco_sock_close(struct sock *sk)
{
- sco_sock_clear_timer(sk);
lock_sock(sk);
+ sco_sock_clear_timer(sk);
__sco_sock_close(sk);
release_sock(sk);
- sco_sock_kill(sk);
}
static void sco_skb_put_cmsg(struct sk_buff *skb, struct msghdr *msg,
@@ -500,8 +505,6 @@ static struct sock *sco_sock_alloc(struct net *net, struct socket *sock,
sco_pi(sk)->setting = BT_VOICE_CVSD_16BIT;
- timer_setup(&sk->sk_timer, sco_sock_timeout, 0);
-
bt_sock_link(&sco_sk_list, sk);
return sk;
}
@@ -566,6 +569,7 @@ static int sco_sock_connect(struct socket *sock, struct sockaddr *addr, int alen
{
struct sockaddr_sco *sa = (struct sockaddr_sco *) addr;
struct sock *sk = sock->sk;
+ struct hci_dev *hdev;
int err;
BT_DBG("sk %p", sk);
@@ -580,12 +584,19 @@ static int sco_sock_connect(struct socket *sock, struct sockaddr *addr, int alen
if (sk->sk_type != SOCK_SEQPACKET)
return -EINVAL;
+ hdev = hci_get_route(&sa->sco_bdaddr, &sco_pi(sk)->src, BDADDR_BREDR);
+ if (!hdev)
+ return -EHOSTUNREACH;
+ hci_dev_lock(hdev);
+
lock_sock(sk);
/* Set destination address and psm */
bacpy(&sco_pi(sk)->dst, &sa->sco_bdaddr);
- err = sco_connect(sk);
+ err = sco_connect(hdev, sk);
+ hci_dev_unlock(hdev);
+ hci_dev_put(hdev);
if (err)
goto done;
@@ -773,6 +784,11 @@ static void sco_conn_defer_accept(struct hci_conn *conn, u16 setting)
cp.max_latency = cpu_to_le16(0xffff);
cp.retrans_effort = 0xff;
break;
+ default:
+ /* use CVSD settings as fallback */
+ cp.max_latency = cpu_to_le16(0xffff);
+ cp.retrans_effort = 0xff;
+ break;
}
hci_send_cmd(hdev, HCI_OP_ACCEPT_SYNC_CONN_REQ,
@@ -905,7 +921,7 @@ static int sco_sock_getsockopt_old(struct socket *sock, int optname,
opts.mtu = sco_pi(sk)->conn->mtu;
- BT_DBG("mtu %d", opts.mtu);
+ BT_DBG("mtu %u", opts.mtu);
len = min_t(unsigned int, len, sizeof(opts));
if (copy_to_user(optval, (char *)&opts, len))
@@ -1083,11 +1099,11 @@ static void sco_conn_ready(struct sco_conn *conn)
BT_DBG("conn %p", conn);
if (sk) {
+ lock_sock(sk);
sco_sock_clear_timer(sk);
- bh_lock_sock(sk);
sk->sk_state = BT_CONNECTED;
sk->sk_state_change(sk);
- bh_unlock_sock(sk);
+ release_sock(sk);
} else {
sco_conn_lock(conn);
@@ -1102,12 +1118,12 @@ static void sco_conn_ready(struct sco_conn *conn)
return;
}
- bh_lock_sock(parent);
+ lock_sock(parent);
sk = sco_sock_alloc(sock_net(parent), NULL,
BTPROTO_SCO, GFP_ATOMIC, 0);
if (!sk) {
- bh_unlock_sock(parent);
+ release_sock(parent);
sco_conn_unlock(conn);
return;
}
@@ -1128,7 +1144,7 @@ static void sco_conn_ready(struct sco_conn *conn)
/* Wake up parent */
parent->sk_data_ready(parent);
- bh_unlock_sock(parent);
+ release_sock(parent);
sco_conn_unlock(conn);
}
@@ -1167,7 +1183,7 @@ static void sco_connect_cfm(struct hci_conn *hcon, __u8 status)
if (hcon->type != SCO_LINK && hcon->type != ESCO_LINK)
return;
- BT_DBG("hcon %p bdaddr %pMR status %d", hcon, &hcon->dst, status);
+ BT_DBG("hcon %p bdaddr %pMR status %u", hcon, &hcon->dst, status);
if (!status) {
struct sco_conn *conn;
@@ -1196,7 +1212,7 @@ void sco_recv_scodata(struct hci_conn *hcon, struct sk_buff *skb)
if (!conn)
goto drop;
- BT_DBG("conn %p len %d", conn, skb->len);
+ BT_DBG("conn %p len %u", conn, skb->len);
if (skb->len) {
sco_recv_frame(conn, skb);
diff --git a/net/bluetooth/smp.c b/net/bluetooth/smp.c
index 7dd51da73845..11f853d0500f 100644
--- a/net/bluetooth/smp.c
+++ b/net/bluetooth/smp.c
@@ -40,7 +40,7 @@
((struct smp_dev *)((struct l2cap_chan *)((hdev)->smp_data))->data)
/* Low-level debug macros to be used for stuff that we don't want
- * accidentially in dmesg, i.e. the values of the various crypto keys
+ * accidentally in dmesg, i.e. the values of the various crypto keys
* and the inputs & outputs of crypto functions.
*/
#ifdef DEBUG
@@ -111,9 +111,9 @@ struct smp_chan {
u8 id_addr_type;
u8 irk[16];
struct smp_csrk *csrk;
- struct smp_csrk *slave_csrk;
+ struct smp_csrk *responder_csrk;
struct smp_ltk *ltk;
- struct smp_ltk *slave_ltk;
+ struct smp_ltk *responder_ltk;
struct smp_irk *remote_irk;
u8 *link_key;
unsigned long flags;
@@ -560,7 +560,7 @@ int smp_generate_oob(struct hci_dev *hdev, u8 hash[16], u8 rand[16])
return err;
/* This is unlikely, but we need to check that
- * we didn't accidentially generate a debug key.
+ * we didn't accidentally generate a debug key.
*/
if (crypto_memneq(smp->local_pk, debug_pk, 64))
break;
@@ -753,7 +753,7 @@ static void smp_chan_destroy(struct l2cap_conn *conn)
mgmt_smp_complete(hcon, complete);
kfree_sensitive(smp->csrk);
- kfree_sensitive(smp->slave_csrk);
+ kfree_sensitive(smp->responder_csrk);
kfree_sensitive(smp->link_key);
crypto_free_shash(smp->tfm_cmac);
@@ -776,9 +776,9 @@ static void smp_chan_destroy(struct l2cap_conn *conn)
kfree_rcu(smp->ltk, rcu);
}
- if (smp->slave_ltk) {
- list_del_rcu(&smp->slave_ltk->list);
- kfree_rcu(smp->slave_ltk, rcu);
+ if (smp->responder_ltk) {
+ list_del_rcu(&smp->responder_ltk->list);
+ kfree_rcu(smp->responder_ltk, rcu);
}
if (smp->remote_irk) {
@@ -859,7 +859,7 @@ static int tk_request(struct l2cap_conn *conn, u8 remote_oob, u8 auth,
memset(smp->tk, 0, sizeof(smp->tk));
clear_bit(SMP_FLAG_TK_VALID, &smp->flags);
- bt_dev_dbg(hcon->hdev, "auth:%d lcl:%d rem:%d", auth, local_io,
+ bt_dev_dbg(hcon->hdev, "auth:%u lcl:%u rem:%u", auth, local_io,
remote_io);
/* If neither side wants MITM, either "just" confirm an incoming
@@ -909,8 +909,8 @@ static int tk_request(struct l2cap_conn *conn, u8 remote_oob, u8 auth,
hcon->pending_sec_level = BT_SECURITY_HIGH;
}
- /* If both devices have Keyoard-Display I/O, the master
- * Confirms and the slave Enters the passkey.
+ /* If both devices have Keyboard-Display I/O, the initiator
+ * Confirms and the responder Enters the passkey.
*/
if (smp->method == OVERLAP) {
if (hcon->role == HCI_ROLE_MASTER)
@@ -925,7 +925,7 @@ static int tk_request(struct l2cap_conn *conn, u8 remote_oob, u8 auth,
get_random_bytes(&passkey, sizeof(passkey));
passkey %= 1000000;
put_unaligned_le32(passkey, smp->tk);
- bt_dev_dbg(hcon->hdev, "PassKey: %d", passkey);
+ bt_dev_dbg(hcon->hdev, "PassKey: %u", passkey);
set_bit(SMP_FLAG_TK_VALID, &smp->flags);
}
@@ -979,7 +979,7 @@ static u8 smp_random(struct smp_chan *smp)
int ret;
bt_dev_dbg(conn->hcon->hdev, "conn %p %s", conn,
- conn->hcon->out ? "master" : "slave");
+ conn->hcon->out ? "initiator" : "responder");
ret = smp_c1(smp->tk, smp->rrnd, smp->preq, smp->prsp,
hcon->init_addr_type, &hcon->init_addr,
@@ -1021,8 +1021,8 @@ static u8 smp_random(struct smp_chan *smp)
else
auth = 0;
- /* Even though there's no _SLAVE suffix this is the
- * slave STK we're adding for later lookup (the master
+ /* Even though there's no _RESPONDER suffix this is the
+ * responder STK we're adding for later lookup (the initiator
* STK never needs to be stored).
*/
hci_add_ltk(hcon->hdev, &hcon->dst, hcon->dst_type,
@@ -1077,10 +1077,10 @@ static void smp_notify_keys(struct l2cap_conn *conn)
mgmt_new_csrk(hdev, smp->csrk, persistent);
}
- if (smp->slave_csrk) {
- smp->slave_csrk->bdaddr_type = hcon->dst_type;
- bacpy(&smp->slave_csrk->bdaddr, &hcon->dst);
- mgmt_new_csrk(hdev, smp->slave_csrk, persistent);
+ if (smp->responder_csrk) {
+ smp->responder_csrk->bdaddr_type = hcon->dst_type;
+ bacpy(&smp->responder_csrk->bdaddr, &hcon->dst);
+ mgmt_new_csrk(hdev, smp->responder_csrk, persistent);
}
if (smp->ltk) {
@@ -1089,10 +1089,10 @@ static void smp_notify_keys(struct l2cap_conn *conn)
mgmt_new_ltk(hdev, smp->ltk, persistent);
}
- if (smp->slave_ltk) {
- smp->slave_ltk->bdaddr_type = hcon->dst_type;
- bacpy(&smp->slave_ltk->bdaddr, &hcon->dst);
- mgmt_new_ltk(hdev, smp->slave_ltk, persistent);
+ if (smp->responder_ltk) {
+ smp->responder_ltk->bdaddr_type = hcon->dst_type;
+ bacpy(&smp->responder_ltk->bdaddr, &hcon->dst);
+ mgmt_new_ltk(hdev, smp->responder_ltk, persistent);
}
if (smp->link_key) {
@@ -1272,7 +1272,7 @@ static void smp_distribute_keys(struct smp_chan *smp)
if (*keydist & SMP_DIST_ENC_KEY) {
struct smp_cmd_encrypt_info enc;
- struct smp_cmd_master_ident ident;
+ struct smp_cmd_initiator_ident ident;
struct smp_ltk *ltk;
u8 authenticated;
__le16 ediv;
@@ -1293,14 +1293,15 @@ static void smp_distribute_keys(struct smp_chan *smp)
authenticated = hcon->sec_level == BT_SECURITY_HIGH;
ltk = hci_add_ltk(hdev, &hcon->dst, hcon->dst_type,
- SMP_LTK_SLAVE, authenticated, enc.ltk,
+ SMP_LTK_RESPONDER, authenticated, enc.ltk,
smp->enc_key_size, ediv, rand);
- smp->slave_ltk = ltk;
+ smp->responder_ltk = ltk;
ident.ediv = ediv;
ident.rand = rand;
- smp_send_cmd(conn, SMP_CMD_MASTER_IDENT, sizeof(ident), &ident);
+ smp_send_cmd(conn, SMP_CMD_INITIATOR_IDENT, sizeof(ident),
+ &ident);
*keydist &= ~SMP_DIST_ENC_KEY;
}
@@ -1343,7 +1344,7 @@ static void smp_distribute_keys(struct smp_chan *smp)
csrk->type = MGMT_CSRK_LOCAL_UNAUTHENTICATED;
memcpy(csrk->val, sign.csrk, sizeof(csrk->val));
}
- smp->slave_csrk = csrk;
+ smp->responder_csrk = csrk;
smp_send_cmd(conn, SMP_CMD_SIGN_INFO, sizeof(sign), &sign);
@@ -1654,7 +1655,7 @@ int smp_user_confirm_reply(struct hci_conn *hcon, u16 mgmt_op, __le32 passkey)
case MGMT_OP_USER_PASSKEY_REPLY:
value = le32_to_cpu(passkey);
memset(smp->tk, 0, sizeof(smp->tk));
- bt_dev_dbg(conn->hcon->hdev, "PassKey: %d", value);
+ bt_dev_dbg(conn->hcon->hdev, "PassKey: %u", value);
put_unaligned_le32(value, smp->tk);
fallthrough;
case MGMT_OP_USER_CONFIRM_REPLY:
@@ -1902,7 +1903,7 @@ static u8 sc_send_public_key(struct smp_chan *smp)
return SMP_UNSPECIFIED;
/* This is unlikely, but we need to check that
- * we didn't accidentially generate a debug key.
+ * we didn't accidentally generate a debug key.
*/
if (crypto_memneq(smp->local_pk, debug_pk, 64))
break;
@@ -2048,7 +2049,7 @@ static int fixup_sc_false_positive(struct smp_chan *smp)
struct smp_cmd_pairing *req, *rsp;
u8 auth;
- /* The issue is only observed when we're in slave role */
+ /* The issue is only observed when we're in responder role */
if (hcon->out)
return SMP_UNSPECIFIED;
@@ -2084,7 +2085,8 @@ static u8 smp_cmd_pairing_confirm(struct l2cap_conn *conn, struct sk_buff *skb)
struct hci_conn *hcon = conn->hcon;
struct hci_dev *hdev = hcon->hdev;
- bt_dev_dbg(hdev, "conn %p %s", conn, hcon->out ? "master" : "slave");
+ bt_dev_dbg(hdev, "conn %p %s", conn,
+ hcon->out ? "initiator" : "responder");
if (skb->len < sizeof(smp->pcnf))
return SMP_INVALID_PARAMS;
@@ -2251,7 +2253,7 @@ static bool smp_ltk_encrypt(struct l2cap_conn *conn, u8 sec_level)
hci_le_start_enc(hcon, key->ediv, key->rand, key->val, key->enc_size);
hcon->enc_key_size = key->enc_size;
- /* We never store STKs for master role, so clear this flag */
+ /* We never store STKs for initiator role, so clear this flag */
clear_bit(HCI_CONN_STK_ENCRYPT, &hcon->flags);
return true;
@@ -2467,7 +2469,7 @@ int smp_cancel_and_remove_pairing(struct hci_dev *hdev, bdaddr_t *bdaddr,
/* Set keys to NULL to make sure smp_failure() does not try to
* remove and free already invalidated rcu list entries. */
smp->ltk = NULL;
- smp->slave_ltk = NULL;
+ smp->responder_ltk = NULL;
smp->remote_irk = NULL;
if (test_bit(SMP_FLAG_COMPLETE, &smp->flags))
@@ -2503,7 +2505,7 @@ static int smp_cmd_encrypt_info(struct l2cap_conn *conn, struct sk_buff *skb)
return SMP_INVALID_PARAMS;
}
- SMP_ALLOW_CMD(smp, SMP_CMD_MASTER_IDENT);
+ SMP_ALLOW_CMD(smp, SMP_CMD_INITIATOR_IDENT);
skb_pull(skb, sizeof(*rp));
@@ -2512,9 +2514,9 @@ static int smp_cmd_encrypt_info(struct l2cap_conn *conn, struct sk_buff *skb)
return 0;
}
-static int smp_cmd_master_ident(struct l2cap_conn *conn, struct sk_buff *skb)
+static int smp_cmd_initiator_ident(struct l2cap_conn *conn, struct sk_buff *skb)
{
- struct smp_cmd_master_ident *rp = (void *) skb->data;
+ struct smp_cmd_initiator_ident *rp = (void *)skb->data;
struct l2cap_chan *chan = conn->smp;
struct smp_chan *smp = chan->data;
struct hci_dev *hdev = conn->hcon->hdev;
@@ -2913,7 +2915,7 @@ static int smp_cmd_dhkey_check(struct l2cap_conn *conn, struct sk_buff *skb)
return 0;
}
- /* Slave sends DHKey check as response to master */
+ /* Responder sends DHKey check as response to initiator */
sc_dhkey_check(smp);
}
@@ -3000,8 +3002,8 @@ static int smp_sig_channel(struct l2cap_chan *chan, struct sk_buff *skb)
reason = smp_cmd_encrypt_info(conn, skb);
break;
- case SMP_CMD_MASTER_IDENT:
- reason = smp_cmd_master_ident(conn, skb);
+ case SMP_CMD_INITIATOR_IDENT:
+ reason = smp_cmd_initiator_ident(conn, skb);
break;
case SMP_CMD_IDENT_INFO:
@@ -3081,7 +3083,7 @@ static void bredr_pairing(struct l2cap_chan *chan)
if (!test_bit(HCI_CONN_ENCRYPT, &hcon->flags))
return;
- /* Only master may initiate SMP over BR/EDR */
+ /* Only initiator may initiate SMP over BR/EDR */
if (hcon->role != HCI_ROLE_MASTER)
return;
diff --git a/net/bluetooth/smp.h b/net/bluetooth/smp.h
index fc35a8bf358e..87a59ec2c9f0 100644
--- a/net/bluetooth/smp.h
+++ b/net/bluetooth/smp.h
@@ -79,8 +79,8 @@ struct smp_cmd_encrypt_info {
__u8 ltk[16];
} __packed;
-#define SMP_CMD_MASTER_IDENT 0x07
-struct smp_cmd_master_ident {
+#define SMP_CMD_INITIATOR_IDENT 0x07
+struct smp_cmd_initiator_ident {
__le16 ediv;
__le64 rand;
} __packed;
@@ -146,7 +146,7 @@ struct smp_cmd_keypress_notify {
enum {
SMP_STK,
SMP_LTK,
- SMP_LTK_SLAVE,
+ SMP_LTK_RESPONDER,
SMP_LTK_P256,
SMP_LTK_P256_DEBUG,
};
diff --git a/net/bpf/test_run.c b/net/bpf/test_run.c
index a5d72c48fb66..2eb0e55ef54d 100644
--- a/net/bpf/test_run.c
+++ b/net/bpf/test_run.c
@@ -7,6 +7,7 @@
#include <linux/vmalloc.h>
#include <linux/etherdevice.h>
#include <linux/filter.h>
+#include <linux/rcupdate_trace.h>
#include <linux/sched/signal.h>
#include <net/bpf_sk_storage.h>
#include <net/sock.h>
@@ -15,6 +16,7 @@
#include <linux/error-injection.h>
#include <linux/smp.h>
#include <linux/sock_diag.h>
+#include <net/xdp.h>
#define CREATE_TRACE_POINTS
#include <trace/events/bpf_test_run.h>
@@ -87,17 +89,19 @@ reset:
static int bpf_test_run(struct bpf_prog *prog, void *ctx, u32 repeat,
u32 *retval, u32 *time, bool xdp)
{
- struct bpf_cgroup_storage *storage[MAX_BPF_CGROUP_STORAGE_TYPE] = { NULL };
+ struct bpf_prog_array_item item = {.prog = prog};
+ struct bpf_run_ctx *old_ctx;
+ struct bpf_cg_run_ctx run_ctx;
struct bpf_test_timer t = { NO_MIGRATE };
enum bpf_cgroup_storage_type stype;
int ret;
for_each_cgroup_storage_type(stype) {
- storage[stype] = bpf_cgroup_storage_alloc(prog, stype);
- if (IS_ERR(storage[stype])) {
- storage[stype] = NULL;
+ item.cgroup_storage[stype] = bpf_cgroup_storage_alloc(prog, stype);
+ if (IS_ERR(item.cgroup_storage[stype])) {
+ item.cgroup_storage[stype] = NULL;
for_each_cgroup_storage_type(stype)
- bpf_cgroup_storage_free(storage[stype]);
+ bpf_cgroup_storage_free(item.cgroup_storage[stype]);
return -ENOMEM;
}
}
@@ -106,22 +110,19 @@ static int bpf_test_run(struct bpf_prog *prog, void *ctx, u32 repeat,
repeat = 1;
bpf_test_timer_enter(&t);
+ old_ctx = bpf_set_run_ctx(&run_ctx.run_ctx);
do {
- ret = bpf_cgroup_storage_set(storage);
- if (ret)
- break;
-
+ run_ctx.prog_item = &item;
if (xdp)
*retval = bpf_prog_run_xdp(prog, ctx);
else
- *retval = BPF_PROG_RUN(prog, ctx);
-
- bpf_cgroup_storage_unset();
+ *retval = bpf_prog_run(prog, ctx);
} while (bpf_test_timer_continue(&t, repeat, &ret, time));
+ bpf_reset_run_ctx(old_ctx);
bpf_test_timer_leave(&t);
for_each_cgroup_storage_type(stype)
- bpf_cgroup_storage_free(storage[stype]);
+ bpf_cgroup_storage_free(item.cgroup_storage[stype]);
return ret;
}
@@ -326,7 +327,7 @@ __bpf_prog_test_run_raw_tp(void *data)
struct bpf_raw_tp_test_run_info *info = data;
rcu_read_lock();
- info->retval = BPF_PROG_RUN(info->prog, info->ctx);
+ info->retval = bpf_prog_run(info->prog, info->ctx);
rcu_read_unlock();
}
@@ -409,7 +410,7 @@ static void *bpf_ctx_init(const union bpf_attr *kattr, u32 max_size)
return ERR_PTR(-ENOMEM);
if (data_in) {
- err = bpf_check_uarg_tail_zero(data_in, max_size, size);
+ err = bpf_check_uarg_tail_zero(USER_BPFPTR(data_in), max_size, size);
if (err) {
kfree(data);
return ERR_PTR(err);
@@ -687,6 +688,64 @@ out:
return ret;
}
+static int xdp_convert_md_to_buff(struct xdp_md *xdp_md, struct xdp_buff *xdp)
+{
+ unsigned int ingress_ifindex, rx_queue_index;
+ struct netdev_rx_queue *rxqueue;
+ struct net_device *device;
+
+ if (!xdp_md)
+ return 0;
+
+ if (xdp_md->egress_ifindex != 0)
+ return -EINVAL;
+
+ ingress_ifindex = xdp_md->ingress_ifindex;
+ rx_queue_index = xdp_md->rx_queue_index;
+
+ if (!ingress_ifindex && rx_queue_index)
+ return -EINVAL;
+
+ if (ingress_ifindex) {
+ device = dev_get_by_index(current->nsproxy->net_ns,
+ ingress_ifindex);
+ if (!device)
+ return -ENODEV;
+
+ if (rx_queue_index >= device->real_num_rx_queues)
+ goto free_dev;
+
+ rxqueue = __netif_get_rx_queue(device, rx_queue_index);
+
+ if (!xdp_rxq_info_is_reg(&rxqueue->xdp_rxq))
+ goto free_dev;
+
+ xdp->rxq = &rxqueue->xdp_rxq;
+ /* The device is now tracked in the xdp->rxq for later
+ * dev_put()
+ */
+ }
+
+ xdp->data = xdp->data_meta + xdp_md->data;
+ return 0;
+
+free_dev:
+ dev_put(device);
+ return -EINVAL;
+}
+
+static void xdp_convert_buff_to_md(struct xdp_buff *xdp, struct xdp_md *xdp_md)
+{
+ if (!xdp_md)
+ return;
+
+ xdp_md->data = xdp->data - xdp->data_meta;
+ xdp_md->data_end = xdp->data_end - xdp->data_meta;
+
+ if (xdp_md->ingress_ifindex)
+ dev_put(xdp->rxq->dev);
+}
+
int bpf_prog_test_run_xdp(struct bpf_prog *prog, const union bpf_attr *kattr,
union bpf_attr __user *uattr)
{
@@ -697,35 +756,73 @@ int bpf_prog_test_run_xdp(struct bpf_prog *prog, const union bpf_attr *kattr,
struct netdev_rx_queue *rxqueue;
struct xdp_buff xdp = {};
u32 retval, duration;
+ struct xdp_md *ctx;
u32 max_data_sz;
void *data;
- int ret;
+ int ret = -EINVAL;
- if (kattr->test.ctx_in || kattr->test.ctx_out)
+ if (prog->expected_attach_type == BPF_XDP_DEVMAP ||
+ prog->expected_attach_type == BPF_XDP_CPUMAP)
return -EINVAL;
+ ctx = bpf_ctx_init(kattr, sizeof(struct xdp_md));
+ if (IS_ERR(ctx))
+ return PTR_ERR(ctx);
+
+ if (ctx) {
+ /* There can't be user provided data before the meta data */
+ if (ctx->data_meta || ctx->data_end != size ||
+ ctx->data > ctx->data_end ||
+ unlikely(xdp_metalen_invalid(ctx->data)))
+ goto free_ctx;
+ /* Meta data is allocated from the headroom */
+ headroom -= ctx->data;
+ }
+
/* XDP have extra tailroom as (most) drivers use full page */
max_data_sz = 4096 - headroom - tailroom;
data = bpf_test_init(kattr, max_data_sz, headroom, tailroom);
- if (IS_ERR(data))
- return PTR_ERR(data);
+ if (IS_ERR(data)) {
+ ret = PTR_ERR(data);
+ goto free_ctx;
+ }
rxqueue = __netif_get_rx_queue(current->nsproxy->net_ns->loopback_dev, 0);
xdp_init_buff(&xdp, headroom + max_data_sz + tailroom,
&rxqueue->xdp_rxq);
xdp_prepare_buff(&xdp, data, headroom, size, true);
+ ret = xdp_convert_md_to_buff(ctx, &xdp);
+ if (ret)
+ goto free_data;
+
bpf_prog_change_xdp(NULL, prog);
ret = bpf_test_run(prog, &xdp, repeat, &retval, &duration, true);
+ /* We convert the xdp_buff back to an xdp_md before checking the return
+ * code so the reference count of any held netdevice will be decremented
+ * even if the test run failed.
+ */
+ xdp_convert_buff_to_md(&xdp, ctx);
if (ret)
goto out;
- if (xdp.data != data + headroom || xdp.data_end != xdp.data + size)
- size = xdp.data_end - xdp.data;
- ret = bpf_test_finish(kattr, uattr, xdp.data, size, retval, duration);
+
+ if (xdp.data_meta != data + headroom ||
+ xdp.data_end != xdp.data_meta + size)
+ size = xdp.data_end - xdp.data_meta;
+
+ ret = bpf_test_finish(kattr, uattr, xdp.data_meta, size, retval,
+ duration);
+ if (!ret)
+ ret = bpf_ctx_finish(kattr, uattr, ctx,
+ sizeof(struct xdp_md));
+
out:
bpf_prog_change_xdp(prog, NULL);
+free_data:
kfree(data);
+free_ctx:
+ kfree(ctx);
return ret;
}
@@ -892,7 +989,7 @@ int bpf_prog_test_run_sk_lookup(struct bpf_prog *prog, const union bpf_attr *kat
bpf_test_timer_enter(&t);
do {
ctx.selected_sk = NULL;
- retval = BPF_PROG_SK_LOOKUP_RUN_ARRAY(progs, ctx, BPF_PROG_RUN);
+ retval = BPF_PROG_SK_LOOKUP_RUN_ARRAY(progs, ctx, bpf_prog_run);
} while (bpf_test_timer_continue(&t, repeat, &ret, &duration));
bpf_test_timer_leave(&t);
@@ -918,3 +1015,49 @@ out:
kfree(user_ctx);
return ret;
}
+
+int bpf_prog_test_run_syscall(struct bpf_prog *prog,
+ const union bpf_attr *kattr,
+ union bpf_attr __user *uattr)
+{
+ void __user *ctx_in = u64_to_user_ptr(kattr->test.ctx_in);
+ __u32 ctx_size_in = kattr->test.ctx_size_in;
+ void *ctx = NULL;
+ u32 retval;
+ int err = 0;
+
+ /* doesn't support data_in/out, ctx_out, duration, or repeat or flags */
+ if (kattr->test.data_in || kattr->test.data_out ||
+ kattr->test.ctx_out || kattr->test.duration ||
+ kattr->test.repeat || kattr->test.flags)
+ return -EINVAL;
+
+ if (ctx_size_in < prog->aux->max_ctx_offset ||
+ ctx_size_in > U16_MAX)
+ return -EINVAL;
+
+ if (ctx_size_in) {
+ ctx = kzalloc(ctx_size_in, GFP_USER);
+ if (!ctx)
+ return -ENOMEM;
+ if (copy_from_user(ctx, ctx_in, ctx_size_in)) {
+ err = -EFAULT;
+ goto out;
+ }
+ }
+
+ rcu_read_lock_trace();
+ retval = bpf_prog_run_pin_on_cpu(prog, ctx);
+ rcu_read_unlock_trace();
+
+ if (copy_to_user(&uattr->test.retval, &retval, sizeof(u32))) {
+ err = -EFAULT;
+ goto out;
+ }
+ if (ctx_size_in)
+ if (copy_to_user(ctx_in, ctx, ctx_size_in))
+ err = -EFAULT;
+out:
+ kfree(ctx);
+ return err;
+}
diff --git a/net/bpfilter/main.c b/net/bpfilter/main.c
index 05e1cfc1e5cd..291a92546246 100644
--- a/net/bpfilter/main.c
+++ b/net/bpfilter/main.c
@@ -57,7 +57,7 @@ int main(void)
{
debug_f = fopen("/dev/kmsg", "w");
setvbuf(debug_f, 0, _IOLBF, 0);
- fprintf(debug_f, "Started bpfilter\n");
+ fprintf(debug_f, "<5>Started bpfilter\n");
loop();
fclose(debug_f);
return 0;
diff --git a/net/bridge/br.c b/net/bridge/br.c
index ef743f94254d..d3a32c6813e0 100644
--- a/net/bridge/br.c
+++ b/net/bridge/br.c
@@ -201,6 +201,48 @@ static struct notifier_block br_switchdev_notifier = {
.notifier_call = br_switchdev_event,
};
+/* called under rtnl_mutex */
+static int br_switchdev_blocking_event(struct notifier_block *nb,
+ unsigned long event, void *ptr)
+{
+ struct netlink_ext_ack *extack = netdev_notifier_info_to_extack(ptr);
+ struct net_device *dev = switchdev_notifier_info_to_dev(ptr);
+ struct switchdev_notifier_brport_info *brport_info;
+ const struct switchdev_brport *b;
+ struct net_bridge_port *p;
+ int err = NOTIFY_DONE;
+
+ p = br_port_get_rtnl(dev);
+ if (!p)
+ goto out;
+
+ switch (event) {
+ case SWITCHDEV_BRPORT_OFFLOADED:
+ brport_info = ptr;
+ b = &brport_info->brport;
+
+ err = br_switchdev_port_offload(p, b->dev, b->ctx,
+ b->atomic_nb, b->blocking_nb,
+ b->tx_fwd_offload, extack);
+ err = notifier_from_errno(err);
+ break;
+ case SWITCHDEV_BRPORT_UNOFFLOADED:
+ brport_info = ptr;
+ b = &brport_info->brport;
+
+ br_switchdev_port_unoffload(p, b->ctx, b->atomic_nb,
+ b->blocking_nb);
+ break;
+ }
+
+out:
+ return err;
+}
+
+static struct notifier_block br_switchdev_blocking_notifier = {
+ .notifier_call = br_switchdev_blocking_event,
+};
+
/* br_boolopt_toggle - change user-controlled boolean option
*
* @br: bridge device
@@ -214,17 +256,22 @@ static struct notifier_block br_switchdev_notifier = {
int br_boolopt_toggle(struct net_bridge *br, enum br_boolopt_id opt, bool on,
struct netlink_ext_ack *extack)
{
+ int err = 0;
+
switch (opt) {
case BR_BOOLOPT_NO_LL_LEARN:
br_opt_toggle(br, BROPT_NO_LL_LEARN, on);
break;
+ case BR_BOOLOPT_MCAST_VLAN_SNOOPING:
+ err = br_multicast_toggle_vlan_snooping(br, on, extack);
+ break;
default:
/* shouldn't be called with unsupported options */
WARN_ON(1);
break;
}
- return 0;
+ return err;
}
int br_boolopt_get(const struct net_bridge *br, enum br_boolopt_id opt)
@@ -232,6 +279,8 @@ int br_boolopt_get(const struct net_bridge *br, enum br_boolopt_id opt)
switch (opt) {
case BR_BOOLOPT_NO_LL_LEARN:
return br_opt_get(br, BROPT_NO_LL_LEARN);
+ case BR_BOOLOPT_MCAST_VLAN_SNOOPING:
+ return br_opt_get(br, BROPT_MCAST_VLAN_SNOOPING_ENABLED);
default:
/* shouldn't be called with unsupported options */
WARN_ON(1);
@@ -348,11 +397,15 @@ static int __init br_init(void)
if (err)
goto err_out4;
- err = br_netlink_init();
+ err = register_switchdev_blocking_notifier(&br_switchdev_blocking_notifier);
if (err)
goto err_out5;
- brioctl_set(br_ioctl_deviceless_stub);
+ err = br_netlink_init();
+ if (err)
+ goto err_out6;
+
+ brioctl_set(br_ioctl_stub);
#if IS_ENABLED(CONFIG_ATM_LANE)
br_fdb_test_addr_hook = br_fdb_test_addr;
@@ -366,6 +419,8 @@ static int __init br_init(void)
return 0;
+err_out6:
+ unregister_switchdev_blocking_notifier(&br_switchdev_blocking_notifier);
err_out5:
unregister_switchdev_notifier(&br_switchdev_notifier);
err_out4:
@@ -385,6 +440,7 @@ static void __exit br_deinit(void)
{
stp_proto_unregister(&br_stp_proto);
br_netlink_fini();
+ unregister_switchdev_blocking_notifier(&br_switchdev_blocking_notifier);
unregister_switchdev_notifier(&br_switchdev_notifier);
unregister_netdevice_notifier(&br_device_notifier);
brioctl_set(NULL);
diff --git a/net/bridge/br_cfm.c b/net/bridge/br_cfm.c
index 001064f7583d..a3c755d0a09d 100644
--- a/net/bridge/br_cfm.c
+++ b/net/bridge/br_cfm.c
@@ -142,7 +142,7 @@ static void br_cfm_notify(int event, const struct net_bridge_port *port)
{
u32 filter = RTEXT_FILTER_CFM_STATUS;
- return br_info_notify(event, port->br, NULL, filter);
+ br_info_notify(event, port->br, NULL, filter);
}
static void cc_peer_enable(struct br_cfm_peer_mep *peer_mep)
diff --git a/net/bridge/br_device.c b/net/bridge/br_device.c
index e8b626cc6bfd..8d6bab244c4a 100644
--- a/net/bridge/br_device.c
+++ b/net/bridge/br_device.c
@@ -27,11 +27,14 @@ EXPORT_SYMBOL_GPL(nf_br_ops);
/* net device transmit always called with BH disabled */
netdev_tx_t br_dev_xmit(struct sk_buff *skb, struct net_device *dev)
{
+ struct net_bridge_mcast_port *pmctx_null = NULL;
struct net_bridge *br = netdev_priv(dev);
+ struct net_bridge_mcast *brmctx = &br->multicast_ctx;
struct net_bridge_fdb_entry *dst;
struct net_bridge_mdb_entry *mdst;
const struct nf_br_ops *nf_ops;
u8 state = BR_STATE_FORWARDING;
+ struct net_bridge_vlan *vlan;
const unsigned char *dest;
u16 vid = 0;
@@ -53,7 +56,8 @@ netdev_tx_t br_dev_xmit(struct sk_buff *skb, struct net_device *dev)
skb_reset_mac_header(skb);
skb_pull(skb, ETH_HLEN);
- if (!br_allowed_ingress(br, br_vlan_group_rcu(br), skb, &vid, &state))
+ if (!br_allowed_ingress(br, br_vlan_group_rcu(br), skb, &vid,
+ &state, &vlan))
goto out;
if (IS_ENABLED(CONFIG_INET) &&
@@ -82,15 +86,15 @@ netdev_tx_t br_dev_xmit(struct sk_buff *skb, struct net_device *dev)
br_flood(br, skb, BR_PKT_MULTICAST, false, true);
goto out;
}
- if (br_multicast_rcv(br, NULL, skb, vid)) {
+ if (br_multicast_rcv(&brmctx, &pmctx_null, vlan, skb, vid)) {
kfree_skb(skb);
goto out;
}
- mdst = br_mdb_get(br, skb, vid);
+ mdst = br_mdb_get(brmctx, skb, vid);
if ((mdst || BR_INPUT_SKB_CB_MROUTERS_ONLY(skb)) &&
- br_multicast_querier_exists(br, eth_hdr(skb), mdst))
- br_multicast_flood(mdst, skb, false, true);
+ br_multicast_querier_exists(brmctx, eth_hdr(skb), mdst))
+ br_multicast_flood(mdst, skb, brmctx, false, true);
else
br_flood(br, skb, BR_PKT_MULTICAST, false, true);
} else if ((dst = br_fdb_find_rcu(br, dest, vid)) != NULL) {
@@ -450,7 +454,7 @@ static const struct net_device_ops br_netdev_ops = {
.ndo_set_rx_mode = br_dev_set_multicast_list,
.ndo_change_rx_flags = br_dev_change_rx_flags,
.ndo_change_mtu = br_change_mtu,
- .ndo_do_ioctl = br_dev_ioctl,
+ .ndo_siocdevprivate = br_dev_siocdevprivate,
#ifdef CONFIG_NET_POLL_CONTROLLER
.ndo_netpoll_setup = br_netpoll_setup,
.ndo_netpoll_cleanup = br_netpoll_cleanup,
diff --git a/net/bridge/br_fdb.c b/net/bridge/br_fdb.c
index 698b79747d32..46812b659710 100644
--- a/net/bridge/br_fdb.c
+++ b/net/bridge/br_fdb.c
@@ -440,9 +440,14 @@ int br_fdb_test_addr(struct net_device *dev, unsigned char *addr)
if (!port)
ret = 0;
else {
+ const struct net_bridge_port *dst = NULL;
+
fdb = br_fdb_find_rcu(port->br, addr, 0);
- ret = fdb && fdb->dst && fdb->dst->dev != dev &&
- fdb->dst->state == BR_STATE_FORWARDING;
+ if (fdb)
+ dst = READ_ONCE(fdb->dst);
+
+ ret = dst && dst->dev != dev &&
+ dst->state == BR_STATE_FORWARDING;
}
rcu_read_unlock();
@@ -509,7 +514,7 @@ static struct net_bridge_fdb_entry *fdb_create(struct net_bridge *br,
fdb = kmem_cache_alloc(br_fdb_cache, GFP_ATOMIC);
if (fdb) {
memcpy(fdb->key.addr.addr, addr, ETH_ALEN);
- fdb->dst = source;
+ WRITE_ONCE(fdb->dst, source);
fdb->key.vlan_id = vid;
fdb->flags = flags;
fdb->updated = fdb->used = jiffies;
@@ -600,10 +605,10 @@ void br_fdb_update(struct net_bridge *br, struct net_bridge_port *source,
}
/* fastpath: update of existing entry */
- if (unlikely(source != fdb->dst &&
+ if (unlikely(source != READ_ONCE(fdb->dst) &&
!test_bit(BR_FDB_STICKY, &fdb->flags))) {
- br_switchdev_fdb_notify(fdb, RTM_DELNEIGH);
- fdb->dst = source;
+ br_switchdev_fdb_notify(br, fdb, RTM_DELNEIGH);
+ WRITE_ONCE(fdb->dst, source);
fdb_modified = true;
/* Take over HW learned entry */
if (unlikely(test_bit(BR_FDB_ADDED_BY_EXT_LEARN,
@@ -650,6 +655,7 @@ static int fdb_fill_info(struct sk_buff *skb, const struct net_bridge *br,
const struct net_bridge_fdb_entry *fdb,
u32 portid, u32 seq, int type, unsigned int flags)
{
+ const struct net_bridge_port *dst = READ_ONCE(fdb->dst);
unsigned long now = jiffies;
struct nda_cacheinfo ci;
struct nlmsghdr *nlh;
@@ -665,7 +671,7 @@ static int fdb_fill_info(struct sk_buff *skb, const struct net_bridge *br,
ndm->ndm_pad2 = 0;
ndm->ndm_flags = 0;
ndm->ndm_type = 0;
- ndm->ndm_ifindex = fdb->dst ? fdb->dst->dev->ifindex : br->dev->ifindex;
+ ndm->ndm_ifindex = dst ? dst->dev->ifindex : br->dev->ifindex;
ndm->ndm_state = fdb_to_nud(br, fdb);
if (test_bit(BR_FDB_OFFLOADED, &fdb->flags))
@@ -726,10 +732,11 @@ static inline size_t fdb_nlmsg_size(void)
+ nla_total_size(sizeof(u8)); /* NFEA_ACTIVITY_NOTIFY */
}
-static int br_fdb_replay_one(struct notifier_block *nb,
- struct net_bridge_fdb_entry *fdb,
- struct net_device *dev)
+static int br_fdb_replay_one(struct net_bridge *br, struct notifier_block *nb,
+ const struct net_bridge_fdb_entry *fdb,
+ unsigned long action, const void *ctx)
{
+ const struct net_bridge_port *p = READ_ONCE(fdb->dst);
struct switchdev_notifier_fdb_info item;
int err;
@@ -737,35 +744,39 @@ static int br_fdb_replay_one(struct notifier_block *nb,
item.vid = fdb->key.vlan_id;
item.added_by_user = test_bit(BR_FDB_ADDED_BY_USER, &fdb->flags);
item.offloaded = test_bit(BR_FDB_OFFLOADED, &fdb->flags);
- item.info.dev = dev;
+ item.is_local = test_bit(BR_FDB_LOCAL, &fdb->flags);
+ item.info.dev = (!p || item.is_local) ? br->dev : p->dev;
+ item.info.ctx = ctx;
- err = nb->notifier_call(nb, SWITCHDEV_FDB_ADD_TO_DEVICE, &item);
+ err = nb->notifier_call(nb, action, &item);
return notifier_to_errno(err);
}
-int br_fdb_replay(struct net_device *br_dev, struct net_device *dev,
+int br_fdb_replay(const struct net_device *br_dev, const void *ctx, bool adding,
struct notifier_block *nb)
{
struct net_bridge_fdb_entry *fdb;
struct net_bridge *br;
+ unsigned long action;
int err = 0;
- if (!netif_is_bridge_master(br_dev) || !netif_is_bridge_port(dev))
+ if (!nb)
+ return 0;
+
+ if (!netif_is_bridge_master(br_dev))
return -EINVAL;
br = netdev_priv(br_dev);
+ if (adding)
+ action = SWITCHDEV_FDB_ADD_TO_DEVICE;
+ else
+ action = SWITCHDEV_FDB_DEL_TO_DEVICE;
+
rcu_read_lock();
hlist_for_each_entry_rcu(fdb, &br->fdb_list, fdb_node) {
- struct net_bridge_port *dst = READ_ONCE(fdb->dst);
- struct net_device *dst_dev;
-
- dst_dev = dst ? dst->dev : br->dev;
- if (dst_dev != br_dev && dst_dev != dev)
- continue;
-
- err = br_fdb_replay_one(nb, fdb, dst_dev);
+ err = br_fdb_replay_one(br, nb, fdb, action, ctx);
if (err)
break;
}
@@ -774,7 +785,6 @@ int br_fdb_replay(struct net_device *br_dev, struct net_device *dev,
return err;
}
-EXPORT_SYMBOL_GPL(br_fdb_replay);
static void fdb_notify(struct net_bridge *br,
const struct net_bridge_fdb_entry *fdb, int type,
@@ -785,7 +795,7 @@ static void fdb_notify(struct net_bridge *br,
int err = -ENOBUFS;
if (swdev_notify)
- br_switchdev_fdb_notify(fdb, type);
+ br_switchdev_fdb_notify(br, fdb, type);
skb = nlmsg_new(fdb_nlmsg_size(), GFP_ATOMIC);
if (skb == NULL)
@@ -955,8 +965,8 @@ static int fdb_add_entry(struct net_bridge *br, struct net_bridge_port *source,
if (flags & NLM_F_EXCL)
return -EEXIST;
- if (fdb->dst != source) {
- fdb->dst = source;
+ if (READ_ONCE(fdb->dst) != source) {
+ WRITE_ONCE(fdb->dst, source);
modified = true;
}
}
@@ -1001,7 +1011,8 @@ static int fdb_add_entry(struct net_bridge *br, struct net_bridge_port *source,
static int __br_fdb_add(struct ndmsg *ndm, struct net_bridge *br,
struct net_bridge_port *p, const unsigned char *addr,
- u16 nlh_flags, u16 vid, struct nlattr *nfea_tb[])
+ u16 nlh_flags, u16 vid, struct nlattr *nfea_tb[],
+ struct netlink_ext_ack *extack)
{
int err = 0;
@@ -1020,6 +1031,11 @@ static int __br_fdb_add(struct ndmsg *ndm, struct net_bridge *br,
rcu_read_unlock();
local_bh_enable();
} else if (ndm->ndm_flags & NTF_EXT_LEARNED) {
+ if (!p && !(ndm->ndm_state & NUD_PERMANENT)) {
+ NL_SET_ERR_MSG_MOD(extack,
+ "FDB entry towards bridge must be permanent");
+ return -EINVAL;
+ }
err = br_fdb_external_learn_add(br, p, addr, vid, true);
} else {
spin_lock_bh(&br->hash_lock);
@@ -1092,9 +1108,11 @@ int br_fdb_add(struct ndmsg *ndm, struct nlattr *tb[],
}
/* VID was specified, so use it. */
- err = __br_fdb_add(ndm, br, p, addr, nlh_flags, vid, nfea_tb);
+ err = __br_fdb_add(ndm, br, p, addr, nlh_flags, vid, nfea_tb,
+ extack);
} else {
- err = __br_fdb_add(ndm, br, p, addr, nlh_flags, 0, nfea_tb);
+ err = __br_fdb_add(ndm, br, p, addr, nlh_flags, 0, nfea_tb,
+ extack);
if (err || !vg || !vg->num_vlans)
goto out;
@@ -1106,7 +1124,7 @@ int br_fdb_add(struct ndmsg *ndm, struct nlattr *tb[],
if (!br_vlan_should_use(v))
continue;
err = __br_fdb_add(ndm, br, p, addr, nlh_flags, v->vid,
- nfea_tb);
+ nfea_tb, extack);
if (err)
goto out;
}
@@ -1123,7 +1141,7 @@ static int fdb_delete_by_addr_and_port(struct net_bridge *br,
struct net_bridge_fdb_entry *fdb;
fdb = br_fdb_find(br, addr, vlan);
- if (!fdb || fdb->dst != p)
+ if (!fdb || READ_ONCE(fdb->dst) != p)
return -ENOENT;
fdb_delete(br, fdb, true);
@@ -1263,6 +1281,10 @@ int br_fdb_external_learn_add(struct net_bridge *br, struct net_bridge_port *p,
if (swdev_notify)
flags |= BIT(BR_FDB_ADDED_BY_USER);
+
+ if (!p)
+ flags |= BIT(BR_FDB_LOCAL);
+
fdb = fdb_create(br, p, addr, vid, flags);
if (!fdb) {
err = -ENOMEM;
@@ -1272,8 +1294,8 @@ int br_fdb_external_learn_add(struct net_bridge *br, struct net_bridge_port *p,
} else {
fdb->updated = jiffies;
- if (fdb->dst != p) {
- fdb->dst = p;
+ if (READ_ONCE(fdb->dst) != p) {
+ WRITE_ONCE(fdb->dst, p);
modified = true;
}
@@ -1289,6 +1311,9 @@ int br_fdb_external_learn_add(struct net_bridge *br, struct net_bridge_port *p,
if (swdev_notify)
set_bit(BR_FDB_ADDED_BY_USER, &fdb->flags);
+ if (!p)
+ set_bit(BR_FDB_LOCAL, &fdb->flags);
+
if (modified)
fdb_notify(br, fdb, RTM_NEWNEIGH, swdev_notify);
}
diff --git a/net/bridge/br_forward.c b/net/bridge/br_forward.c
index 6e9b049ae521..ec646656dbf1 100644
--- a/net/bridge/br_forward.c
+++ b/net/bridge/br_forward.c
@@ -48,6 +48,8 @@ int br_dev_queue_push_xmit(struct net *net, struct sock *sk, struct sk_buff *skb
skb_set_network_header(skb, depth);
}
+ br_switchdev_frame_set_offload_fwd_mark(skb);
+
dev_queue_xmit(skb);
return 0;
@@ -76,6 +78,11 @@ static void __br_forward(const struct net_bridge_port *to,
struct net *net;
int br_hook;
+ /* Mark the skb for forwarding offload early so that br_handle_vlan()
+ * can know whether to pop the VLAN header on egress or keep it.
+ */
+ nbp_switchdev_frame_mark_tx_fwd_offload(to, skb);
+
vg = nbp_vlan_group_rcu(to);
skb = br_handle_vlan(to->br, to, vg, skb);
if (!skb)
@@ -174,6 +181,8 @@ static struct net_bridge_port *maybe_deliver(
if (!should_deliver(p, skb))
return prev;
+ nbp_switchdev_frame_mark_tx_fwd_to_hwdom(p, skb);
+
if (!prev)
goto out;
@@ -267,19 +276,19 @@ static void maybe_deliver_addr(struct net_bridge_port *p, struct sk_buff *skb,
/* called with rcu_read_lock */
void br_multicast_flood(struct net_bridge_mdb_entry *mdst,
struct sk_buff *skb,
+ struct net_bridge_mcast *brmctx,
bool local_rcv, bool local_orig)
{
- struct net_device *dev = BR_INPUT_SKB_CB(skb)->brdev;
- struct net_bridge *br = netdev_priv(dev);
struct net_bridge_port *prev = NULL;
struct net_bridge_port_group *p;
bool allow_mode_include = true;
struct hlist_node *rp;
- rp = rcu_dereference(hlist_first_rcu(&br->router_list));
+ rp = br_multicast_get_first_rport_node(brmctx, skb);
+
if (mdst) {
p = rcu_dereference(mdst->ports);
- if (br_multicast_should_handle_mode(br, mdst->addr.proto) &&
+ if (br_multicast_should_handle_mode(brmctx, mdst->addr.proto) &&
br_multicast_is_star_g(&mdst->addr))
allow_mode_include = false;
} else {
@@ -290,7 +299,7 @@ void br_multicast_flood(struct net_bridge_mdb_entry *mdst,
struct net_bridge_port *port, *lport, *rport;
lport = p ? p->key.port : NULL;
- rport = hlist_entry_safe(rp, struct net_bridge_port, rlist);
+ rport = br_multicast_rport_from_node_skb(rp, skb);
if ((unsigned long)lport > (unsigned long)rport) {
port = lport;
diff --git a/net/bridge/br_if.c b/net/bridge/br_if.c
index f7d2f472ae24..4a02f8bb278a 100644
--- a/net/bridge/br_if.c
+++ b/net/bridge/br_if.c
@@ -456,7 +456,7 @@ int br_add_bridge(struct net *net, const char *name)
dev_net_set(dev, net);
dev->rtnl_link_ops = &br_link_ops;
- res = register_netdev(dev);
+ res = register_netdevice(dev);
if (res)
free_netdev(dev);
return res;
@@ -467,7 +467,6 @@ int br_del_bridge(struct net *net, const char *name)
struct net_device *dev;
int ret = 0;
- rtnl_lock();
dev = __dev_get_by_name(net, name);
if (dev == NULL)
ret = -ENXIO; /* Could not find device */
@@ -485,7 +484,6 @@ int br_del_bridge(struct net *net, const char *name)
else
br_dev_delete(dev, NULL);
- rtnl_unlock();
return ret;
}
@@ -562,7 +560,7 @@ int br_add_if(struct net_bridge *br, struct net_device *dev,
struct net_bridge_port *p;
int err = 0;
unsigned br_hr, dev_hr;
- bool changed_addr;
+ bool changed_addr, fdb_synced = false;
/* Don't allow bridging non-ethernet like devices. */
if ((dev->flags & IFF_LOOPBACK) ||
@@ -616,6 +614,7 @@ int br_add_if(struct net_bridge *br, struct net_device *dev,
err = dev_set_allmulti(dev, 1);
if (err) {
+ br_multicast_del_port(p);
kfree(p); /* kobject not yet init'd, manually free */
goto err1;
}
@@ -643,15 +642,24 @@ int br_add_if(struct net_bridge *br, struct net_device *dev,
if (err)
goto err5;
- err = nbp_switchdev_mark_set(p);
- if (err)
- goto err6;
-
dev_disable_lro(dev);
list_add_rcu(&p->list, &br->port_list);
nbp_update_port_count(br);
+ if (!br_promisc_port(p) && (p->dev->priv_flags & IFF_UNICAST_FLT)) {
+ /* When updating the port count we also update all ports'
+ * promiscuous mode.
+ * A port leaving promiscuous mode normally gets the bridge's
+ * fdb synced to the unicast filter (if supported), however,
+ * `br_port_clear_promisc` does not distinguish between
+ * non-promiscuous ports and *new* ports, so we need to
+ * sync explicitly here.
+ */
+ fdb_synced = br_fdb_sync_static(br, p) == 0;
+ if (!fdb_synced)
+ netdev_err(dev, "failed to sync bridge static fdb addresses to this port\n");
+ }
netdev_update_features(br->dev);
@@ -671,13 +679,13 @@ int br_add_if(struct net_bridge *br, struct net_device *dev,
*/
err = dev_pre_changeaddr_notify(br->dev, dev->dev_addr, extack);
if (err)
- goto err7;
+ goto err6;
}
err = nbp_vlan_init(p, extack);
if (err) {
netdev_err(dev, "failed to initialize vlan filtering on this port\n");
- goto err7;
+ goto err6;
}
spin_lock_bh(&br->lock);
@@ -700,11 +708,12 @@ int br_add_if(struct net_bridge *br, struct net_device *dev,
return 0;
-err7:
+err6:
+ if (fdb_synced)
+ br_fdb_unsync_static(br, p);
list_del_rcu(&p->list);
br_fdb_delete_by_port(br, p, 0, 1);
nbp_update_port_count(br);
-err6:
netdev_upper_dev_unlink(dev, br->dev);
err5:
dev->priv_flags &= ~IFF_BRIDGE_PORT;
@@ -714,6 +723,7 @@ err4:
err3:
sysfs_remove_link(br->ifobj, p->dev->name);
err2:
+ br_multicast_del_port(p);
kobject_put(&p->kobj);
dev_set_allmulti(dev, -1);
err1:
diff --git a/net/bridge/br_input.c b/net/bridge/br_input.c
index 8875e953ac53..b50382f957c1 100644
--- a/net/bridge/br_input.c
+++ b/net/bridge/br_input.c
@@ -69,8 +69,11 @@ int br_handle_frame_finish(struct net *net, struct sock *sk, struct sk_buff *skb
struct net_bridge_port *p = br_port_get_rcu(skb->dev);
enum br_pkt_type pkt_type = BR_PKT_UNICAST;
struct net_bridge_fdb_entry *dst = NULL;
+ struct net_bridge_mcast_port *pmctx;
struct net_bridge_mdb_entry *mdst;
bool local_rcv, mcast_hit = false;
+ struct net_bridge_mcast *brmctx;
+ struct net_bridge_vlan *vlan;
struct net_bridge *br;
u16 vid = 0;
u8 state;
@@ -78,9 +81,11 @@ int br_handle_frame_finish(struct net *net, struct sock *sk, struct sk_buff *skb
if (!p || p->state == BR_STATE_DISABLED)
goto drop;
+ brmctx = &p->br->multicast_ctx;
+ pmctx = &p->multicast_ctx;
state = p->state;
if (!br_allowed_ingress(p->br, nbp_vlan_group_rcu(p), skb, &vid,
- &state))
+ &state, &vlan))
goto out;
nbp_switchdev_frame_mark(p, skb);
@@ -98,7 +103,7 @@ int br_handle_frame_finish(struct net *net, struct sock *sk, struct sk_buff *skb
local_rcv = true;
} else {
pkt_type = BR_PKT_MULTICAST;
- if (br_multicast_rcv(br, p, skb, vid))
+ if (br_multicast_rcv(&brmctx, &pmctx, vlan, skb, vid))
goto drop;
}
}
@@ -128,11 +133,11 @@ int br_handle_frame_finish(struct net *net, struct sock *sk, struct sk_buff *skb
switch (pkt_type) {
case BR_PKT_MULTICAST:
- mdst = br_mdb_get(br, skb, vid);
+ mdst = br_mdb_get(brmctx, skb, vid);
if ((mdst || BR_INPUT_SKB_CB_MROUTERS_ONLY(skb)) &&
- br_multicast_querier_exists(br, eth_hdr(skb), mdst)) {
+ br_multicast_querier_exists(brmctx, eth_hdr(skb), mdst)) {
if ((mdst && mdst->host_joined) ||
- br_multicast_is_router(br)) {
+ br_multicast_is_router(brmctx, skb)) {
local_rcv = true;
br->dev->stats.multicast++;
}
@@ -162,7 +167,7 @@ int br_handle_frame_finish(struct net *net, struct sock *sk, struct sk_buff *skb
if (!mcast_hit)
br_flood(br, skb, pkt_type, local_rcv, false);
else
- br_multicast_flood(mdst, skb, local_rcv, false);
+ br_multicast_flood(mdst, skb, brmctx, local_rcv, false);
}
if (local_rcv)
@@ -289,11 +294,8 @@ static rx_handler_result_t br_handle_frame(struct sk_buff **pskb)
memset(skb->cb, 0, sizeof(struct br_input_skb_cb));
p = br_port_get_rcu(skb->dev);
- if (p->flags & BR_VLAN_TUNNEL) {
- if (br_handle_ingress_vlan_tunnel(skb, p,
- nbp_vlan_group_rcu(p)))
- goto drop;
- }
+ if (p->flags & BR_VLAN_TUNNEL)
+ br_handle_ingress_vlan_tunnel(skb, p, nbp_vlan_group_rcu(p));
if (unlikely(is_link_local_ether_addr(dest))) {
u16 fwd_mask = p->br->group_fwd_mask_required;
diff --git a/net/bridge/br_ioctl.c b/net/bridge/br_ioctl.c
index 2db800fc27ca..793b0db9d9a3 100644
--- a/net/bridge/br_ioctl.c
+++ b/net/bridge/br_ioctl.c
@@ -106,15 +106,32 @@ static int add_del_if(struct net_bridge *br, int ifindex, int isadd)
* This interface is deprecated because it was too difficult
* to do the translation for 32/64bit ioctl compatibility.
*/
-static int old_dev_ioctl(struct net_device *dev, struct ifreq *rq, int cmd)
+int br_dev_siocdevprivate(struct net_device *dev, struct ifreq *rq, void __user *data, int cmd)
{
struct net_bridge *br = netdev_priv(dev);
struct net_bridge_port *p = NULL;
unsigned long args[4];
+ void __user *argp;
int ret = -EOPNOTSUPP;
- if (copy_from_user(args, rq->ifr_data, sizeof(args)))
- return -EFAULT;
+ if (in_compat_syscall()) {
+ unsigned int cargs[4];
+
+ if (copy_from_user(cargs, data, sizeof(cargs)))
+ return -EFAULT;
+
+ args[0] = cargs[0];
+ args[1] = cargs[1];
+ args[2] = cargs[2];
+ args[3] = cargs[3];
+
+ argp = compat_ptr(args[1]);
+ } else {
+ if (copy_from_user(args, data, sizeof(args)))
+ return -EFAULT;
+
+ argp = (void __user *)args[1];
+ }
switch (args[0]) {
case BRCTL_ADD_IF:
@@ -171,7 +188,7 @@ static int old_dev_ioctl(struct net_device *dev, struct ifreq *rq, int cmd)
return -ENOMEM;
get_port_ifindices(br, indices, num);
- if (copy_to_user((void __user *)args[1], indices, num*sizeof(int)))
+ if (copy_to_user(argp, indices, num * sizeof(int)))
num = -EFAULT;
kfree(indices);
return num;
@@ -232,7 +249,7 @@ static int old_dev_ioctl(struct net_device *dev, struct ifreq *rq, int cmd)
rcu_read_unlock();
- if (copy_to_user((void __user *)args[1], &p, sizeof(p)))
+ if (copy_to_user(argp, &p, sizeof(p)))
return -EFAULT;
return 0;
@@ -282,8 +299,7 @@ static int old_dev_ioctl(struct net_device *dev, struct ifreq *rq, int cmd)
}
case BRCTL_GET_FDB_ENTRIES:
- return get_fdb_entries(br, (void __user *)args[1],
- args[2], args[3]);
+ return get_fdb_entries(br, argp, args[2], args[3]);
}
if (!ret) {
@@ -320,7 +336,7 @@ static int old_deviceless(struct net *net, void __user *uarg)
args[2] = get_bridge_ifindices(net, indices, args[2]);
- ret = copy_to_user((void __user *)args[1], indices, args[2]*sizeof(int))
+ ret = copy_to_user(uarg, indices, args[2]*sizeof(int))
? -EFAULT : args[2];
kfree(indices);
@@ -350,48 +366,47 @@ static int old_deviceless(struct net *net, void __user *uarg)
return -EOPNOTSUPP;
}
-int br_ioctl_deviceless_stub(struct net *net, unsigned int cmd, void __user *uarg)
+int br_ioctl_stub(struct net *net, struct net_bridge *br, unsigned int cmd,
+ struct ifreq *ifr, void __user *uarg)
{
+ int ret = -EOPNOTSUPP;
+
+ rtnl_lock();
+
switch (cmd) {
case SIOCGIFBR:
case SIOCSIFBR:
- return old_deviceless(net, uarg);
-
+ ret = old_deviceless(net, uarg);
+ break;
case SIOCBRADDBR:
case SIOCBRDELBR:
{
char buf[IFNAMSIZ];
- if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
- return -EPERM;
+ if (!ns_capable(net->user_ns, CAP_NET_ADMIN)) {
+ ret = -EPERM;
+ break;
+ }
- if (copy_from_user(buf, uarg, IFNAMSIZ))
- return -EFAULT;
+ if (copy_from_user(buf, uarg, IFNAMSIZ)) {
+ ret = -EFAULT;
+ break;
+ }
buf[IFNAMSIZ-1] = 0;
if (cmd == SIOCBRADDBR)
- return br_add_bridge(net, buf);
-
- return br_del_bridge(net, buf);
- }
+ ret = br_add_bridge(net, buf);
+ else
+ ret = br_del_bridge(net, buf);
}
- return -EOPNOTSUPP;
-}
-
-int br_dev_ioctl(struct net_device *dev, struct ifreq *rq, int cmd)
-{
- struct net_bridge *br = netdev_priv(dev);
-
- switch (cmd) {
- case SIOCDEVPRIVATE:
- return old_dev_ioctl(dev, rq, cmd);
-
+ break;
case SIOCBRADDIF:
case SIOCBRDELIF:
- return add_del_if(br, rq->ifr_ifindex, cmd == SIOCBRADDIF);
-
+ ret = add_del_if(br, ifr->ifr_ifindex, cmd == SIOCBRADDIF);
+ break;
}
- br_debug(br, "Bridge does not support ioctl 0x%x\n", cmd);
- return -EOPNOTSUPP;
+ rtnl_unlock();
+
+ return ret;
}
diff --git a/net/bridge/br_mdb.c b/net/bridge/br_mdb.c
index 95fa4af0e8dd..0281453f7766 100644
--- a/net/bridge/br_mdb.c
+++ b/net/bridge/br_mdb.c
@@ -16,31 +16,109 @@
#include "br_private.h"
-static int br_rports_fill_info(struct sk_buff *skb, struct netlink_callback *cb,
- struct net_device *dev)
+static bool
+br_ip4_rports_get_timer(struct net_bridge_mcast_port *pmctx,
+ unsigned long *timer)
{
- struct net_bridge *br = netdev_priv(dev);
- struct net_bridge_port *p;
+ *timer = br_timer_value(&pmctx->ip4_mc_router_timer);
+ return !hlist_unhashed(&pmctx->ip4_rlist);
+}
+
+static bool
+br_ip6_rports_get_timer(struct net_bridge_mcast_port *pmctx,
+ unsigned long *timer)
+{
+#if IS_ENABLED(CONFIG_IPV6)
+ *timer = br_timer_value(&pmctx->ip6_mc_router_timer);
+ return !hlist_unhashed(&pmctx->ip6_rlist);
+#else
+ *timer = 0;
+ return false;
+#endif
+}
+
+static size_t __br_rports_one_size(void)
+{
+ return nla_total_size(sizeof(u32)) + /* MDBA_ROUTER_PORT */
+ nla_total_size(sizeof(u32)) + /* MDBA_ROUTER_PATTR_TIMER */
+ nla_total_size(sizeof(u8)) + /* MDBA_ROUTER_PATTR_TYPE */
+ nla_total_size(sizeof(u32)) + /* MDBA_ROUTER_PATTR_INET_TIMER */
+ nla_total_size(sizeof(u32)) + /* MDBA_ROUTER_PATTR_INET6_TIMER */
+ nla_total_size(sizeof(u32)); /* MDBA_ROUTER_PATTR_VID */
+}
+
+size_t br_rports_size(const struct net_bridge_mcast *brmctx)
+{
+ struct net_bridge_mcast_port *pmctx;
+ size_t size = nla_total_size(0); /* MDBA_ROUTER */
+
+ rcu_read_lock();
+ hlist_for_each_entry_rcu(pmctx, &brmctx->ip4_mc_router_list,
+ ip4_rlist)
+ size += __br_rports_one_size();
+
+#if IS_ENABLED(CONFIG_IPV6)
+ hlist_for_each_entry_rcu(pmctx, &brmctx->ip6_mc_router_list,
+ ip6_rlist)
+ size += __br_rports_one_size();
+#endif
+ rcu_read_unlock();
+
+ return size;
+}
+
+int br_rports_fill_info(struct sk_buff *skb,
+ const struct net_bridge_mcast *brmctx)
+{
+ u16 vid = brmctx->vlan ? brmctx->vlan->vid : 0;
+ bool have_ip4_mc_rtr, have_ip6_mc_rtr;
+ unsigned long ip4_timer, ip6_timer;
struct nlattr *nest, *port_nest;
+ struct net_bridge_port *p;
- if (!br->multicast_router || hlist_empty(&br->router_list))
+ if (!brmctx->multicast_router || !br_rports_have_mc_router(brmctx))
return 0;
nest = nla_nest_start_noflag(skb, MDBA_ROUTER);
if (nest == NULL)
return -EMSGSIZE;
- hlist_for_each_entry_rcu(p, &br->router_list, rlist) {
- if (!p)
+ list_for_each_entry_rcu(p, &brmctx->br->port_list, list) {
+ struct net_bridge_mcast_port *pmctx;
+
+ if (vid) {
+ struct net_bridge_vlan *v;
+
+ v = br_vlan_find(nbp_vlan_group(p), vid);
+ if (!v)
+ continue;
+ pmctx = &v->port_mcast_ctx;
+ } else {
+ pmctx = &p->multicast_ctx;
+ }
+
+ have_ip4_mc_rtr = br_ip4_rports_get_timer(pmctx, &ip4_timer);
+ have_ip6_mc_rtr = br_ip6_rports_get_timer(pmctx, &ip6_timer);
+
+ if (!have_ip4_mc_rtr && !have_ip6_mc_rtr)
continue;
+
port_nest = nla_nest_start_noflag(skb, MDBA_ROUTER_PORT);
if (!port_nest)
goto fail;
+
if (nla_put_nohdr(skb, sizeof(u32), &p->dev->ifindex) ||
nla_put_u32(skb, MDBA_ROUTER_PATTR_TIMER,
- br_timer_value(&p->multicast_router_timer)) ||
+ max(ip4_timer, ip6_timer)) ||
nla_put_u8(skb, MDBA_ROUTER_PATTR_TYPE,
- p->multicast_router)) {
+ p->multicast_ctx.multicast_router) ||
+ (have_ip4_mc_rtr &&
+ nla_put_u32(skb, MDBA_ROUTER_PATTR_INET_TIMER,
+ ip4_timer)) ||
+ (have_ip6_mc_rtr &&
+ nla_put_u32(skb, MDBA_ROUTER_PATTR_INET6_TIMER,
+ ip6_timer)) ||
+ (vid && nla_put_u16(skb, MDBA_ROUTER_PATTR_VID, vid))) {
nla_nest_cancel(skb, port_nest);
goto fail;
}
@@ -195,7 +273,7 @@ static int __mdb_fill_info(struct sk_buff *skb,
switch (mp->addr.proto) {
case htons(ETH_P_IP):
- dump_srcs_mode = !!(mp->br->multicast_igmp_version == 3);
+ dump_srcs_mode = !!(mp->br->multicast_ctx.multicast_igmp_version == 3);
if (mp->addr.src.ip4) {
if (nla_put_in_addr(skb, MDBA_MDB_EATTR_SOURCE,
mp->addr.src.ip4))
@@ -205,7 +283,7 @@ static int __mdb_fill_info(struct sk_buff *skb,
break;
#if IS_ENABLED(CONFIG_IPV6)
case htons(ETH_P_IPV6):
- dump_srcs_mode = !!(mp->br->multicast_mld_version == 2);
+ dump_srcs_mode = !!(mp->br->multicast_ctx.multicast_mld_version == 2);
if (!ipv6_addr_any(&mp->addr.src.ip6)) {
if (nla_put_in6_addr(skb, MDBA_MDB_EATTR_SOURCE,
&mp->addr.src.ip6))
@@ -345,6 +423,7 @@ static int br_mdb_dump(struct sk_buff *skb, struct netlink_callback *cb)
for_each_netdev_rcu(net, dev) {
if (dev->priv_flags & IFF_EBRIDGE) {
+ struct net_bridge *br = netdev_priv(dev);
struct br_port_msg *bpm;
if (idx < s_idx)
@@ -361,7 +440,7 @@ static int br_mdb_dump(struct sk_buff *skb, struct netlink_callback *cb)
bpm->ifindex = dev->ifindex;
if (br_mdb_fill_info(skb, cb, dev) < 0)
goto out;
- if (br_rports_fill_info(skb, cb, dev) < 0)
+ if (br_rports_fill_info(skb, &br->multicast_ctx) < 0)
goto out;
cb->args[1] = 0;
@@ -438,7 +517,7 @@ static size_t rtnl_mdb_nlmsg_size(struct net_bridge_port_group *pg)
/* MDBA_MDB_EATTR_SOURCE */
if (pg->key.addr.src.ip4)
nlmsg_size += nla_total_size(sizeof(__be32));
- if (pg->key.port->br->multicast_igmp_version == 2)
+ if (pg->key.port->br->multicast_ctx.multicast_igmp_version == 2)
goto out;
addr_size = sizeof(__be32);
break;
@@ -447,7 +526,7 @@ static size_t rtnl_mdb_nlmsg_size(struct net_bridge_port_group *pg)
/* MDBA_MDB_EATTR_SOURCE */
if (!ipv6_addr_any(&pg->key.addr.src.ip6))
nlmsg_size += nla_total_size(sizeof(struct in6_addr));
- if (pg->key.port->br->multicast_mld_version == 1)
+ if (pg->key.port->br->multicast_ctx.multicast_mld_version == 1)
goto out;
addr_size = sizeof(struct in6_addr);
break;
@@ -522,19 +601,21 @@ static void br_switchdev_mdb_populate(struct switchdev_obj_port_mdb *mdb,
}
static int br_mdb_replay_one(struct notifier_block *nb, struct net_device *dev,
- struct switchdev_obj_port_mdb *mdb,
+ const struct switchdev_obj_port_mdb *mdb,
+ unsigned long action, const void *ctx,
struct netlink_ext_ack *extack)
{
struct switchdev_notifier_port_obj_info obj_info = {
.info = {
.dev = dev,
.extack = extack,
+ .ctx = ctx,
},
.obj = &mdb->obj,
};
int err;
- err = nb->notifier_call(nb, SWITCHDEV_PORT_OBJ_ADD, &obj_info);
+ err = nb->notifier_call(nb, action, &obj_info);
return notifier_to_errno(err);
}
@@ -558,16 +639,21 @@ static int br_mdb_queue_one(struct list_head *mdb_list,
}
int br_mdb_replay(struct net_device *br_dev, struct net_device *dev,
- struct notifier_block *nb, struct netlink_ext_ack *extack)
+ const void *ctx, bool adding, struct notifier_block *nb,
+ struct netlink_ext_ack *extack)
{
- struct net_bridge_mdb_entry *mp;
+ const struct net_bridge_mdb_entry *mp;
struct switchdev_obj *obj, *tmp;
struct net_bridge *br;
+ unsigned long action;
LIST_HEAD(mdb_list);
int err = 0;
ASSERT_RTNL();
+ if (!nb)
+ return 0;
+
if (!netif_is_bridge_master(br_dev) || !netif_is_bridge_port(dev))
return -EINVAL;
@@ -587,8 +673,8 @@ int br_mdb_replay(struct net_device *br_dev, struct net_device *dev,
rcu_read_lock();
hlist_for_each_entry_rcu(mp, &br->mdb_list, mdb_node) {
- struct net_bridge_port_group __rcu **pp;
- struct net_bridge_port_group *p;
+ struct net_bridge_port_group __rcu * const *pp;
+ const struct net_bridge_port_group *p;
if (mp->host_joined) {
err = br_mdb_queue_one(&mdb_list,
@@ -617,9 +703,14 @@ int br_mdb_replay(struct net_device *br_dev, struct net_device *dev,
rcu_read_unlock();
+ if (adding)
+ action = SWITCHDEV_PORT_OBJ_ADD;
+ else
+ action = SWITCHDEV_PORT_OBJ_DEL;
+
list_for_each_entry(obj, &mdb_list, list) {
err = br_mdb_replay_one(nb, dev, SWITCHDEV_OBJ_PORT_MDB(obj),
- extack);
+ action, ctx, extack);
if (err)
goto out_free_mdb;
}
@@ -632,7 +723,6 @@ out_free_mdb:
return err;
}
-EXPORT_SYMBOL_GPL(br_mdb_replay);
static void br_mdb_switchdev_host_port(struct net_device *dev,
struct net_device *lower_dev,
@@ -727,12 +817,12 @@ errout:
static int nlmsg_populate_rtr_fill(struct sk_buff *skb,
struct net_device *dev,
- int ifindex, u32 pid,
+ int ifindex, u16 vid, u32 pid,
u32 seq, int type, unsigned int flags)
{
+ struct nlattr *nest, *port_nest;
struct br_port_msg *bpm;
struct nlmsghdr *nlh;
- struct nlattr *nest;
nlh = nlmsg_put(skb, pid, seq, type, sizeof(*bpm), 0);
if (!nlh)
@@ -746,8 +836,18 @@ static int nlmsg_populate_rtr_fill(struct sk_buff *skb,
if (!nest)
goto cancel;
- if (nla_put_u32(skb, MDBA_ROUTER_PORT, ifindex))
+ port_nest = nla_nest_start_noflag(skb, MDBA_ROUTER_PORT);
+ if (!port_nest)
+ goto end;
+ if (nla_put_nohdr(skb, sizeof(u32), &ifindex)) {
+ nla_nest_cancel(skb, port_nest);
goto end;
+ }
+ if (vid && nla_put_u16(skb, MDBA_ROUTER_PATTR_VID, vid)) {
+ nla_nest_cancel(skb, port_nest);
+ goto end;
+ }
+ nla_nest_end(skb, port_nest);
nla_nest_end(skb, nest);
nlmsg_end(skb, nlh);
@@ -763,23 +863,28 @@ cancel:
static inline size_t rtnl_rtr_nlmsg_size(void)
{
return NLMSG_ALIGN(sizeof(struct br_port_msg))
- + nla_total_size(sizeof(__u32));
+ + nla_total_size(sizeof(__u32))
+ + nla_total_size(sizeof(u16));
}
-void br_rtr_notify(struct net_device *dev, struct net_bridge_port *port,
+void br_rtr_notify(struct net_device *dev, struct net_bridge_mcast_port *pmctx,
int type)
{
struct net *net = dev_net(dev);
struct sk_buff *skb;
int err = -ENOBUFS;
int ifindex;
+ u16 vid;
- ifindex = port ? port->dev->ifindex : 0;
+ ifindex = pmctx ? pmctx->port->dev->ifindex : 0;
+ vid = pmctx && br_multicast_port_ctx_is_vlan(pmctx) ? pmctx->vlan->vid :
+ 0;
skb = nlmsg_new(rtnl_rtr_nlmsg_size(), GFP_ATOMIC);
if (!skb)
goto errout;
- err = nlmsg_populate_rtr_fill(skb, dev, ifindex, 0, 0, type, NTF_SELF);
+ err = nlmsg_populate_rtr_fill(skb, dev, ifindex, vid, 0, 0, type,
+ NTF_SELF);
if (err < 0) {
kfree_skb(skb);
goto errout;
@@ -950,14 +1055,47 @@ static int br_mdb_parse(struct sk_buff *skb, struct nlmsghdr *nlh,
return 0;
}
+static struct net_bridge_mcast *
+__br_mdb_choose_context(struct net_bridge *br,
+ const struct br_mdb_entry *entry,
+ struct netlink_ext_ack *extack)
+{
+ struct net_bridge_mcast *brmctx = NULL;
+ struct net_bridge_vlan *v;
+
+ if (!br_opt_get(br, BROPT_MCAST_VLAN_SNOOPING_ENABLED)) {
+ brmctx = &br->multicast_ctx;
+ goto out;
+ }
+
+ if (!entry->vid) {
+ NL_SET_ERR_MSG_MOD(extack, "Cannot add an entry without a vlan when vlan snooping is enabled");
+ goto out;
+ }
+
+ v = br_vlan_find(br_vlan_group(br), entry->vid);
+ if (!v) {
+ NL_SET_ERR_MSG_MOD(extack, "Vlan is not configured");
+ goto out;
+ }
+ if (br_multicast_ctx_vlan_global_disabled(&v->br_mcast_ctx)) {
+ NL_SET_ERR_MSG_MOD(extack, "Vlan's multicast processing is disabled");
+ goto out;
+ }
+ brmctx = &v->br_mcast_ctx;
+out:
+ return brmctx;
+}
+
static int br_mdb_add_group(struct net_bridge *br, struct net_bridge_port *port,
struct br_mdb_entry *entry,
struct nlattr **mdb_attrs,
struct netlink_ext_ack *extack)
{
struct net_bridge_mdb_entry *mp, *star_mp;
- struct net_bridge_port_group *p;
struct net_bridge_port_group __rcu **pp;
+ struct net_bridge_port_group *p;
+ struct net_bridge_mcast *brmctx;
struct br_ip group, star_group;
unsigned long now = jiffies;
unsigned char flags = 0;
@@ -966,6 +1104,10 @@ static int br_mdb_add_group(struct net_bridge *br, struct net_bridge_port *port,
__mdb_entry_to_br_ip(entry, &group, mdb_attrs);
+ brmctx = __br_mdb_choose_context(br, entry, extack);
+ if (!brmctx)
+ return -EINVAL;
+
/* host join errors which can happen before creating the group */
if (!port) {
/* don't allow any flags for host-joined groups */
@@ -999,7 +1141,7 @@ static int br_mdb_add_group(struct net_bridge *br, struct net_bridge_port *port,
return -EEXIST;
}
- br_multicast_host_join(mp, false);
+ br_multicast_host_join(brmctx, mp, false);
br_mdb_notify(br->dev, mp, NULL, RTM_NEWMDB);
return 0;
@@ -1030,14 +1172,15 @@ static int br_mdb_add_group(struct net_bridge *br, struct net_bridge_port *port,
}
rcu_assign_pointer(*pp, p);
if (entry->state == MDB_TEMPORARY)
- mod_timer(&p->timer, now + br->multicast_membership_interval);
+ mod_timer(&p->timer,
+ now + brmctx->multicast_membership_interval);
br_mdb_notify(br->dev, mp, p, RTM_NEWMDB);
/* if we are adding a new EXCLUDE port group (*,G) it needs to be also
* added to all S,G entries for proper replication, if we are adding
* a new INCLUDE port (S,G) then all of *,G EXCLUDE ports need to be
* added to it for proper replication
*/
- if (br_multicast_should_handle_mode(br, group.proto)) {
+ if (br_multicast_should_handle_mode(brmctx, group.proto)) {
switch (filter_mode) {
case MCAST_EXCLUDE:
br_multicast_star_g_handle_mode(p, MCAST_EXCLUDE);
diff --git a/net/bridge/br_mrp.c b/net/bridge/br_mrp.c
index cd2b1e424e54..fd2de35ffb3c 100644
--- a/net/bridge/br_mrp.c
+++ b/net/bridge/br_mrp.c
@@ -204,6 +204,33 @@ static struct sk_buff *br_mrp_alloc_test_skb(struct br_mrp *mrp,
hdr->timestamp = cpu_to_be32(jiffies_to_msecs(jiffies));
br_mrp_skb_common(skb, mrp);
+
+ /* In case the node behaves as MRA then the Test frame needs to have
+ * an Option TLV which includes eventually a sub-option TLV that has
+ * the type AUTO_MGR
+ */
+ if (mrp->ring_role == BR_MRP_RING_ROLE_MRA) {
+ struct br_mrp_sub_option1_hdr *sub_opt = NULL;
+ struct br_mrp_tlv_hdr *sub_tlv = NULL;
+ struct br_mrp_oui_hdr *oui = NULL;
+ u8 length;
+
+ length = sizeof(*sub_opt) + sizeof(*sub_tlv) + sizeof(oui) +
+ MRP_OPT_PADDING;
+ br_mrp_skb_tlv(skb, BR_MRP_TLV_HEADER_OPTION, length);
+
+ oui = skb_put(skb, sizeof(*oui));
+ memset(oui, 0x0, sizeof(*oui));
+ sub_opt = skb_put(skb, sizeof(*sub_opt));
+ memset(sub_opt, 0x0, sizeof(*sub_opt));
+
+ sub_tlv = skb_put(skb, sizeof(*sub_tlv));
+ sub_tlv->type = BR_MRP_SUB_TLV_HEADER_TEST_AUTO_MGR;
+
+ /* 32 bit alligment shall be ensured therefore add 2 bytes */
+ skb_put(skb, MRP_OPT_PADDING);
+ }
+
br_mrp_skb_tlv(skb, BR_MRP_TLV_HEADER_END, 0x0);
return skb;
@@ -627,8 +654,7 @@ int br_mrp_set_ring_state(struct net_bridge *br,
if (!mrp)
return -EINVAL;
- if (mrp->ring_state == BR_MRP_RING_STATE_CLOSED &&
- state->ring_state != BR_MRP_RING_STATE_CLOSED)
+ if (mrp->ring_state != state->ring_state)
mrp->ring_transitions++;
mrp->ring_state = state->ring_state;
@@ -715,8 +741,7 @@ int br_mrp_set_in_state(struct net_bridge *br, struct br_mrp_in_state *state)
if (!mrp)
return -EINVAL;
- if (mrp->in_state == BR_MRP_IN_STATE_CLOSED &&
- state->in_state != BR_MRP_IN_STATE_CLOSED)
+ if (mrp->in_state != state->in_state)
mrp->in_transitions++;
mrp->in_state = state->in_state;
diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c
index 226bb05c3b42..3523c8c7068f 100644
--- a/net/bridge/br_multicast.c
+++ b/net/bridge/br_multicast.c
@@ -49,27 +49,30 @@ static const struct rhashtable_params br_sg_port_rht_params = {
.automatic_shrinking = true,
};
-static void br_multicast_start_querier(struct net_bridge *br,
+static void br_multicast_start_querier(struct net_bridge_mcast *brmctx,
struct bridge_mcast_own_query *query);
-static void br_multicast_add_router(struct net_bridge *br,
- struct net_bridge_port *port);
-static void br_ip4_multicast_leave_group(struct net_bridge *br,
- struct net_bridge_port *port,
+static void br_ip4_multicast_add_router(struct net_bridge_mcast *brmctx,
+ struct net_bridge_mcast_port *pmctx);
+static void br_ip4_multicast_leave_group(struct net_bridge_mcast *brmctx,
+ struct net_bridge_mcast_port *pmctx,
__be32 group,
__u16 vid,
const unsigned char *src);
static void br_multicast_port_group_rexmit(struct timer_list *t);
-static void __del_port_router(struct net_bridge_port *p);
+static void
+br_multicast_rport_del_notify(struct net_bridge_mcast_port *pmctx, bool deleted);
+static void br_ip6_multicast_add_router(struct net_bridge_mcast *brmctx,
+ struct net_bridge_mcast_port *pmctx);
#if IS_ENABLED(CONFIG_IPV6)
-static void br_ip6_multicast_leave_group(struct net_bridge *br,
- struct net_bridge_port *port,
+static void br_ip6_multicast_leave_group(struct net_bridge_mcast *brmctx,
+ struct net_bridge_mcast_port *pmctx,
const struct in6_addr *group,
__u16 vid, const unsigned char *src);
#endif
static struct net_bridge_port_group *
-__br_multicast_add_group(struct net_bridge *br,
- struct net_bridge_port *port,
+__br_multicast_add_group(struct net_bridge_mcast *brmctx,
+ struct net_bridge_mcast_port *pmctx,
struct br_ip *group,
const unsigned char *src,
u8 filter_mode,
@@ -77,6 +80,7 @@ __br_multicast_add_group(struct net_bridge *br,
bool blocked);
static void br_multicast_find_del_pg(struct net_bridge *br,
struct net_bridge_port_group *pg);
+static void __br_multicast_stop(struct net_bridge_mcast *brmctx);
static struct net_bridge_port_group *
br_sg_port_find(struct net_bridge *br,
@@ -137,12 +141,14 @@ static struct net_bridge_mdb_entry *br_mdb_ip6_get(struct net_bridge *br,
}
#endif
-struct net_bridge_mdb_entry *br_mdb_get(struct net_bridge *br,
+struct net_bridge_mdb_entry *br_mdb_get(struct net_bridge_mcast *brmctx,
struct sk_buff *skb, u16 vid)
{
+ struct net_bridge *br = brmctx->br;
struct br_ip ip;
- if (!br_opt_get(br, BROPT_MULTICAST_ENABLED))
+ if (!br_opt_get(br, BROPT_MULTICAST_ENABLED) ||
+ br_multicast_ctx_vlan_global_disabled(brmctx))
return NULL;
if (BR_INPUT_SKB_CB(skb)->igmp)
@@ -155,7 +161,7 @@ struct net_bridge_mdb_entry *br_mdb_get(struct net_bridge *br,
switch (skb->protocol) {
case htons(ETH_P_IP):
ip.dst.ip4 = ip_hdr(skb)->daddr;
- if (br->multicast_igmp_version == 3) {
+ if (brmctx->multicast_igmp_version == 3) {
struct net_bridge_mdb_entry *mdb;
ip.src.ip4 = ip_hdr(skb)->saddr;
@@ -168,7 +174,7 @@ struct net_bridge_mdb_entry *br_mdb_get(struct net_bridge *br,
#if IS_ENABLED(CONFIG_IPV6)
case htons(ETH_P_IPV6):
ip.dst.ip6 = ipv6_hdr(skb)->daddr;
- if (br->multicast_mld_version == 2) {
+ if (brmctx->multicast_mld_version == 2) {
struct net_bridge_mdb_entry *mdb;
ip.src.ip6 = ipv6_hdr(skb)->saddr;
@@ -187,6 +193,62 @@ struct net_bridge_mdb_entry *br_mdb_get(struct net_bridge *br,
return br_mdb_ip_get_rcu(br, &ip);
}
+/* IMPORTANT: this function must be used only when the contexts cannot be
+ * passed down (e.g. timer) and must be used for read-only purposes because
+ * the vlan snooping option can change, so it can return any context
+ * (non-vlan or vlan). Its initial intended purpose is to read timer values
+ * from the *current* context based on the option. At worst that could lead
+ * to inconsistent timers when the contexts are changed, i.e. src timer
+ * which needs to re-arm with a specific delay taken from the old context
+ */
+static struct net_bridge_mcast_port *
+br_multicast_pg_to_port_ctx(const struct net_bridge_port_group *pg)
+{
+ struct net_bridge_mcast_port *pmctx = &pg->key.port->multicast_ctx;
+ struct net_bridge_vlan *vlan;
+
+ lockdep_assert_held_once(&pg->key.port->br->multicast_lock);
+
+ /* if vlan snooping is disabled use the port's multicast context */
+ if (!pg->key.addr.vid ||
+ !br_opt_get(pg->key.port->br, BROPT_MCAST_VLAN_SNOOPING_ENABLED))
+ goto out;
+
+ /* locking is tricky here, due to different rules for multicast and
+ * vlans we need to take rcu to find the vlan and make sure it has
+ * the BR_VLFLAG_MCAST_ENABLED flag set, it can only change under
+ * multicast_lock which must be already held here, so the vlan's pmctx
+ * can safely be used on return
+ */
+ rcu_read_lock();
+ vlan = br_vlan_find(nbp_vlan_group_rcu(pg->key.port), pg->key.addr.vid);
+ if (vlan && !br_multicast_port_ctx_vlan_disabled(&vlan->port_mcast_ctx))
+ pmctx = &vlan->port_mcast_ctx;
+ else
+ pmctx = NULL;
+ rcu_read_unlock();
+out:
+ return pmctx;
+}
+
+/* when snooping we need to check if the contexts should be used
+ * in the following order:
+ * - if pmctx is non-NULL (port), check if it should be used
+ * - if pmctx is NULL (bridge), check if brmctx should be used
+ */
+static bool
+br_multicast_ctx_should_use(const struct net_bridge_mcast *brmctx,
+ const struct net_bridge_mcast_port *pmctx)
+{
+ if (!netif_running(brmctx->br->dev))
+ return false;
+
+ if (pmctx)
+ return !br_multicast_port_ctx_state_disabled(pmctx);
+ else
+ return !br_multicast_ctx_vlan_disabled(brmctx);
+}
+
static bool br_port_group_equal(struct net_bridge_port_group *p,
struct net_bridge_port *port,
const unsigned char *src)
@@ -200,20 +262,23 @@ static bool br_port_group_equal(struct net_bridge_port_group *p,
return ether_addr_equal(src, p->eth_addr);
}
-static void __fwd_add_star_excl(struct net_bridge_port_group *pg,
+static void __fwd_add_star_excl(struct net_bridge_mcast_port *pmctx,
+ struct net_bridge_port_group *pg,
struct br_ip *sg_ip)
{
struct net_bridge_port_group_sg_key sg_key;
- struct net_bridge *br = pg->key.port->br;
struct net_bridge_port_group *src_pg;
+ struct net_bridge_mcast *brmctx;
memset(&sg_key, 0, sizeof(sg_key));
+ brmctx = br_multicast_port_ctx_get_global(pmctx);
sg_key.port = pg->key.port;
sg_key.addr = *sg_ip;
- if (br_sg_port_find(br, &sg_key))
+ if (br_sg_port_find(brmctx->br, &sg_key))
return;
- src_pg = __br_multicast_add_group(br, pg->key.port, sg_ip, pg->eth_addr,
+ src_pg = __br_multicast_add_group(brmctx, pmctx,
+ sg_ip, pg->eth_addr,
MCAST_INCLUDE, false, false);
if (IS_ERR_OR_NULL(src_pg) ||
src_pg->rt_protocol != RTPROT_KERNEL)
@@ -253,6 +318,7 @@ void br_multicast_star_g_handle_mode(struct net_bridge_port_group *pg,
{
struct net_bridge *br = pg->key.port->br;
struct net_bridge_port_group *pg_lst;
+ struct net_bridge_mcast_port *pmctx;
struct net_bridge_mdb_entry *mp;
struct br_ip sg_ip;
@@ -262,9 +328,13 @@ void br_multicast_star_g_handle_mode(struct net_bridge_port_group *pg,
mp = br_mdb_ip_get(br, &pg->key.addr);
if (!mp)
return;
+ pmctx = br_multicast_pg_to_port_ctx(pg);
+ if (!pmctx)
+ return;
memset(&sg_ip, 0, sizeof(sg_ip));
sg_ip = pg->key.addr;
+
for (pg_lst = mlock_dereference(mp->ports, br);
pg_lst;
pg_lst = mlock_dereference(pg_lst->next, br)) {
@@ -281,7 +351,7 @@ void br_multicast_star_g_handle_mode(struct net_bridge_port_group *pg,
__fwd_del_star_excl(pg, &sg_ip);
break;
case MCAST_EXCLUDE:
- __fwd_add_star_excl(pg, &sg_ip);
+ __fwd_add_star_excl(pmctx, pg, &sg_ip);
break;
}
}
@@ -374,7 +444,9 @@ void br_multicast_sg_add_exclude_ports(struct net_bridge_mdb_entry *star_mp,
{
struct net_bridge_port_group_sg_key sg_key;
struct net_bridge *br = star_mp->br;
+ struct net_bridge_mcast_port *pmctx;
struct net_bridge_port_group *pg;
+ struct net_bridge_mcast *brmctx;
if (WARN_ON(br_multicast_is_star_g(&sg->key.addr)))
return;
@@ -397,7 +469,12 @@ void br_multicast_sg_add_exclude_ports(struct net_bridge_mdb_entry *star_mp,
if (br_sg_port_find(br, &sg_key))
continue;
- src_pg = __br_multicast_add_group(br, pg->key.port,
+ pmctx = br_multicast_pg_to_port_ctx(pg);
+ if (!pmctx)
+ continue;
+ brmctx = br_multicast_port_ctx_get_global(pmctx);
+
+ src_pg = __br_multicast_add_group(brmctx, pmctx,
&sg->key.addr,
sg->eth_addr,
MCAST_INCLUDE, false, false);
@@ -411,16 +488,23 @@ void br_multicast_sg_add_exclude_ports(struct net_bridge_mdb_entry *star_mp,
static void br_multicast_fwd_src_add(struct net_bridge_group_src *src)
{
struct net_bridge_mdb_entry *star_mp;
+ struct net_bridge_mcast_port *pmctx;
struct net_bridge_port_group *sg;
+ struct net_bridge_mcast *brmctx;
struct br_ip sg_ip;
if (src->flags & BR_SGRP_F_INSTALLED)
return;
memset(&sg_ip, 0, sizeof(sg_ip));
+ pmctx = br_multicast_pg_to_port_ctx(src->pg);
+ if (!pmctx)
+ return;
+ brmctx = br_multicast_port_ctx_get_global(pmctx);
sg_ip = src->pg->key.addr;
sg_ip.src = src->addr.src;
- sg = __br_multicast_add_group(src->br, src->pg->key.port, &sg_ip,
+
+ sg = __br_multicast_add_group(brmctx, pmctx, &sg_ip,
src->pg->eth_addr, MCAST_INCLUDE, false,
!timer_pending(&src->timer));
if (IS_ERR_OR_NULL(sg))
@@ -689,7 +773,28 @@ static void br_multicast_gc(struct hlist_head *head)
}
}
-static struct sk_buff *br_ip4_multicast_alloc_query(struct net_bridge *br,
+static void __br_multicast_query_handle_vlan(struct net_bridge_mcast *brmctx,
+ struct net_bridge_mcast_port *pmctx,
+ struct sk_buff *skb)
+{
+ struct net_bridge_vlan *vlan = NULL;
+
+ if (pmctx && br_multicast_port_ctx_is_vlan(pmctx))
+ vlan = pmctx->vlan;
+ else if (br_multicast_ctx_is_vlan(brmctx))
+ vlan = brmctx->vlan;
+
+ if (vlan && !(vlan->flags & BRIDGE_VLAN_INFO_UNTAGGED)) {
+ u16 vlan_proto;
+
+ if (br_vlan_get_proto(brmctx->br->dev, &vlan_proto) != 0)
+ return;
+ __vlan_hwaccel_put_tag(skb, htons(vlan_proto), vlan->vid);
+ }
+}
+
+static struct sk_buff *br_ip4_multicast_alloc_query(struct net_bridge_mcast *brmctx,
+ struct net_bridge_mcast_port *pmctx,
struct net_bridge_port_group *pg,
__be32 ip_dst, __be32 group,
bool with_srcs, bool over_lmqt,
@@ -711,11 +816,11 @@ static struct sk_buff *br_ip4_multicast_alloc_query(struct net_bridge *br,
u16 lmqt_srcs = 0;
igmp_hdr_size = sizeof(*ih);
- if (br->multicast_igmp_version == 3) {
+ if (brmctx->multicast_igmp_version == 3) {
igmp_hdr_size = sizeof(*ihv3);
if (pg && with_srcs) {
- lmqt = now + (br->multicast_last_member_interval *
- br->multicast_last_member_count);
+ lmqt = now + (brmctx->multicast_last_member_interval *
+ brmctx->multicast_last_member_count);
hlist_for_each_entry(ent, &pg->src_list, node) {
if (over_lmqt == time_after(ent->timer.expires,
lmqt) &&
@@ -731,19 +836,20 @@ static struct sk_buff *br_ip4_multicast_alloc_query(struct net_bridge *br,
pkt_size = sizeof(*eth) + sizeof(*iph) + 4 + igmp_hdr_size;
if ((p && pkt_size > p->dev->mtu) ||
- pkt_size > br->dev->mtu)
+ pkt_size > brmctx->br->dev->mtu)
return NULL;
- skb = netdev_alloc_skb_ip_align(br->dev, pkt_size);
+ skb = netdev_alloc_skb_ip_align(brmctx->br->dev, pkt_size);
if (!skb)
goto out;
+ __br_multicast_query_handle_vlan(brmctx, pmctx, skb);
skb->protocol = htons(ETH_P_IP);
skb_reset_mac_header(skb);
eth = eth_hdr(skb);
- ether_addr_copy(eth->h_source, br->dev->dev_addr);
+ ether_addr_copy(eth->h_source, brmctx->br->dev->dev_addr);
ip_eth_mc_map(ip_dst, eth->h_dest);
eth->h_proto = htons(ETH_P_IP);
skb_put(skb, sizeof(*eth));
@@ -759,8 +865,8 @@ static struct sk_buff *br_ip4_multicast_alloc_query(struct net_bridge *br,
iph->frag_off = htons(IP_DF);
iph->ttl = 1;
iph->protocol = IPPROTO_IGMP;
- iph->saddr = br_opt_get(br, BROPT_MULTICAST_QUERY_USE_IFADDR) ?
- inet_select_addr(br->dev, 0, RT_SCOPE_LINK) : 0;
+ iph->saddr = br_opt_get(brmctx->br, BROPT_MULTICAST_QUERY_USE_IFADDR) ?
+ inet_select_addr(brmctx->br->dev, 0, RT_SCOPE_LINK) : 0;
iph->daddr = ip_dst;
((u8 *)&iph[1])[0] = IPOPT_RA;
((u8 *)&iph[1])[1] = 4;
@@ -772,12 +878,12 @@ static struct sk_buff *br_ip4_multicast_alloc_query(struct net_bridge *br,
skb_set_transport_header(skb, skb->len);
*igmp_type = IGMP_HOST_MEMBERSHIP_QUERY;
- switch (br->multicast_igmp_version) {
+ switch (brmctx->multicast_igmp_version) {
case 2:
ih = igmp_hdr(skb);
ih->type = IGMP_HOST_MEMBERSHIP_QUERY;
- ih->code = (group ? br->multicast_last_member_interval :
- br->multicast_query_response_interval) /
+ ih->code = (group ? brmctx->multicast_last_member_interval :
+ brmctx->multicast_query_response_interval) /
(HZ / IGMP_TIMER_SCALE);
ih->group = group;
ih->csum = 0;
@@ -787,11 +893,11 @@ static struct sk_buff *br_ip4_multicast_alloc_query(struct net_bridge *br,
case 3:
ihv3 = igmpv3_query_hdr(skb);
ihv3->type = IGMP_HOST_MEMBERSHIP_QUERY;
- ihv3->code = (group ? br->multicast_last_member_interval :
- br->multicast_query_response_interval) /
+ ihv3->code = (group ? brmctx->multicast_last_member_interval :
+ brmctx->multicast_query_response_interval) /
(HZ / IGMP_TIMER_SCALE);
ihv3->group = group;
- ihv3->qqic = br->multicast_query_interval / HZ;
+ ihv3->qqic = brmctx->multicast_query_interval / HZ;
ihv3->nsrcs = htons(lmqt_srcs);
ihv3->resv = 0;
ihv3->suppress = sflag;
@@ -834,7 +940,8 @@ out:
}
#if IS_ENABLED(CONFIG_IPV6)
-static struct sk_buff *br_ip6_multicast_alloc_query(struct net_bridge *br,
+static struct sk_buff *br_ip6_multicast_alloc_query(struct net_bridge_mcast *brmctx,
+ struct net_bridge_mcast_port *pmctx,
struct net_bridge_port_group *pg,
const struct in6_addr *ip6_dst,
const struct in6_addr *group,
@@ -859,11 +966,11 @@ static struct sk_buff *br_ip6_multicast_alloc_query(struct net_bridge *br,
u8 *hopopt;
mld_hdr_size = sizeof(*mldq);
- if (br->multicast_mld_version == 2) {
+ if (brmctx->multicast_mld_version == 2) {
mld_hdr_size = sizeof(*mld2q);
if (pg && with_srcs) {
- llqt = now + (br->multicast_last_member_interval *
- br->multicast_last_member_count);
+ llqt = now + (brmctx->multicast_last_member_interval *
+ brmctx->multicast_last_member_count);
hlist_for_each_entry(ent, &pg->src_list, node) {
if (over_llqt == time_after(ent->timer.expires,
llqt) &&
@@ -879,20 +986,21 @@ static struct sk_buff *br_ip6_multicast_alloc_query(struct net_bridge *br,
pkt_size = sizeof(*eth) + sizeof(*ip6h) + 8 + mld_hdr_size;
if ((p && pkt_size > p->dev->mtu) ||
- pkt_size > br->dev->mtu)
+ pkt_size > brmctx->br->dev->mtu)
return NULL;
- skb = netdev_alloc_skb_ip_align(br->dev, pkt_size);
+ skb = netdev_alloc_skb_ip_align(brmctx->br->dev, pkt_size);
if (!skb)
goto out;
+ __br_multicast_query_handle_vlan(brmctx, pmctx, skb);
skb->protocol = htons(ETH_P_IPV6);
/* Ethernet header */
skb_reset_mac_header(skb);
eth = eth_hdr(skb);
- ether_addr_copy(eth->h_source, br->dev->dev_addr);
+ ether_addr_copy(eth->h_source, brmctx->br->dev->dev_addr);
eth->h_proto = htons(ETH_P_IPV6);
skb_put(skb, sizeof(*eth));
@@ -905,14 +1013,14 @@ static struct sk_buff *br_ip6_multicast_alloc_query(struct net_bridge *br,
ip6h->nexthdr = IPPROTO_HOPOPTS;
ip6h->hop_limit = 1;
ip6h->daddr = *ip6_dst;
- if (ipv6_dev_get_saddr(dev_net(br->dev), br->dev, &ip6h->daddr, 0,
- &ip6h->saddr)) {
+ if (ipv6_dev_get_saddr(dev_net(brmctx->br->dev), brmctx->br->dev,
+ &ip6h->daddr, 0, &ip6h->saddr)) {
kfree_skb(skb);
- br_opt_toggle(br, BROPT_HAS_IPV6_ADDR, false);
+ br_opt_toggle(brmctx->br, BROPT_HAS_IPV6_ADDR, false);
return NULL;
}
- br_opt_toggle(br, BROPT_HAS_IPV6_ADDR, true);
+ br_opt_toggle(brmctx->br, BROPT_HAS_IPV6_ADDR, true);
ipv6_eth_mc_map(&ip6h->daddr, eth->h_dest);
hopopt = (u8 *)(ip6h + 1);
@@ -930,10 +1038,10 @@ static struct sk_buff *br_ip6_multicast_alloc_query(struct net_bridge *br,
/* ICMPv6 */
skb_set_transport_header(skb, skb->len);
interval = ipv6_addr_any(group) ?
- br->multicast_query_response_interval :
- br->multicast_last_member_interval;
+ brmctx->multicast_query_response_interval :
+ brmctx->multicast_last_member_interval;
*igmp_type = ICMPV6_MGM_QUERY;
- switch (br->multicast_mld_version) {
+ switch (brmctx->multicast_mld_version) {
case 1:
mldq = (struct mld_msg *)icmp6_hdr(skb);
mldq->mld_type = ICMPV6_MGM_QUERY;
@@ -956,7 +1064,7 @@ static struct sk_buff *br_ip6_multicast_alloc_query(struct net_bridge *br,
mld2q->mld2q_suppress = sflag;
mld2q->mld2q_qrv = 2;
mld2q->mld2q_nsrcs = htons(llqt_srcs);
- mld2q->mld2q_qqic = br->multicast_query_interval / HZ;
+ mld2q->mld2q_qqic = brmctx->multicast_query_interval / HZ;
mld2q->mld2q_mca = *group;
csum = &mld2q->mld2q_cksum;
csum_start = (void *)mld2q;
@@ -997,7 +1105,8 @@ out:
}
#endif
-static struct sk_buff *br_multicast_alloc_query(struct net_bridge *br,
+static struct sk_buff *br_multicast_alloc_query(struct net_bridge_mcast *brmctx,
+ struct net_bridge_mcast_port *pmctx,
struct net_bridge_port_group *pg,
struct br_ip *ip_dst,
struct br_ip *group,
@@ -1010,7 +1119,7 @@ static struct sk_buff *br_multicast_alloc_query(struct net_bridge *br,
switch (group->proto) {
case htons(ETH_P_IP):
ip4_dst = ip_dst ? ip_dst->dst.ip4 : htonl(INADDR_ALLHOSTS_GROUP);
- return br_ip4_multicast_alloc_query(br, pg,
+ return br_ip4_multicast_alloc_query(brmctx, pmctx, pg,
ip4_dst, group->dst.ip4,
with_srcs, over_lmqt,
sflag, igmp_type,
@@ -1025,7 +1134,7 @@ static struct sk_buff *br_multicast_alloc_query(struct net_bridge *br,
ipv6_addr_set(&ip6_dst, htonl(0xff020000), 0, 0,
htonl(1));
- return br_ip6_multicast_alloc_query(br, pg,
+ return br_ip6_multicast_alloc_query(brmctx, pmctx, pg,
&ip6_dst, &group->dst.ip6,
with_srcs, over_lmqt,
sflag, igmp_type,
@@ -1203,7 +1312,8 @@ struct net_bridge_port_group *br_multicast_new_port_group(
return p;
}
-void br_multicast_host_join(struct net_bridge_mdb_entry *mp, bool notify)
+void br_multicast_host_join(const struct net_bridge_mcast *brmctx,
+ struct net_bridge_mdb_entry *mp, bool notify)
{
if (!mp->host_joined) {
mp->host_joined = true;
@@ -1216,7 +1326,7 @@ void br_multicast_host_join(struct net_bridge_mdb_entry *mp, bool notify)
if (br_group_is_l2(&mp->addr))
return;
- mod_timer(&mp->timer, jiffies + mp->br->multicast_membership_interval);
+ mod_timer(&mp->timer, jiffies + brmctx->multicast_membership_interval);
}
void br_multicast_host_leave(struct net_bridge_mdb_entry *mp, bool notify)
@@ -1232,8 +1342,8 @@ void br_multicast_host_leave(struct net_bridge_mdb_entry *mp, bool notify)
}
static struct net_bridge_port_group *
-__br_multicast_add_group(struct net_bridge *br,
- struct net_bridge_port *port,
+__br_multicast_add_group(struct net_bridge_mcast *brmctx,
+ struct net_bridge_mcast_port *pmctx,
struct br_ip *group,
const unsigned char *src,
u8 filter_mode,
@@ -1245,29 +1355,28 @@ __br_multicast_add_group(struct net_bridge *br,
struct net_bridge_mdb_entry *mp;
unsigned long now = jiffies;
- if (!netif_running(br->dev) ||
- (port && port->state == BR_STATE_DISABLED))
+ if (!br_multicast_ctx_should_use(brmctx, pmctx))
goto out;
- mp = br_multicast_new_group(br, group);
+ mp = br_multicast_new_group(brmctx->br, group);
if (IS_ERR(mp))
return ERR_CAST(mp);
- if (!port) {
- br_multicast_host_join(mp, true);
+ if (!pmctx) {
+ br_multicast_host_join(brmctx, mp, true);
goto out;
}
for (pp = &mp->ports;
- (p = mlock_dereference(*pp, br)) != NULL;
+ (p = mlock_dereference(*pp, brmctx->br)) != NULL;
pp = &p->next) {
- if (br_port_group_equal(p, port, src))
+ if (br_port_group_equal(p, pmctx->port, src))
goto found;
- if ((unsigned long)p->key.port < (unsigned long)port)
+ if ((unsigned long)p->key.port < (unsigned long)pmctx->port)
break;
}
- p = br_multicast_new_port_group(port, group, *pp, 0, src,
+ p = br_multicast_new_port_group(pmctx->port, group, *pp, 0, src,
filter_mode, RTPROT_KERNEL);
if (unlikely(!p)) {
p = ERR_PTR(-ENOMEM);
@@ -1276,18 +1385,19 @@ __br_multicast_add_group(struct net_bridge *br,
rcu_assign_pointer(*pp, p);
if (blocked)
p->flags |= MDB_PG_FLAGS_BLOCKED;
- br_mdb_notify(br->dev, mp, p, RTM_NEWMDB);
+ br_mdb_notify(brmctx->br->dev, mp, p, RTM_NEWMDB);
found:
if (igmpv2_mldv1)
- mod_timer(&p->timer, now + br->multicast_membership_interval);
+ mod_timer(&p->timer,
+ now + brmctx->multicast_membership_interval);
out:
return p;
}
-static int br_multicast_add_group(struct net_bridge *br,
- struct net_bridge_port *port,
+static int br_multicast_add_group(struct net_bridge_mcast *brmctx,
+ struct net_bridge_mcast_port *pmctx,
struct br_ip *group,
const unsigned char *src,
u8 filter_mode,
@@ -1296,18 +1406,18 @@ static int br_multicast_add_group(struct net_bridge *br,
struct net_bridge_port_group *pg;
int err;
- spin_lock(&br->multicast_lock);
- pg = __br_multicast_add_group(br, port, group, src, filter_mode,
+ spin_lock(&brmctx->br->multicast_lock);
+ pg = __br_multicast_add_group(brmctx, pmctx, group, src, filter_mode,
igmpv2_mldv1, false);
/* NULL is considered valid for host joined groups */
err = PTR_ERR_OR_ZERO(pg);
- spin_unlock(&br->multicast_lock);
+ spin_unlock(&brmctx->br->multicast_lock);
return err;
}
-static int br_ip4_multicast_add_group(struct net_bridge *br,
- struct net_bridge_port *port,
+static int br_ip4_multicast_add_group(struct net_bridge_mcast *brmctx,
+ struct net_bridge_mcast_port *pmctx,
__be32 group,
__u16 vid,
const unsigned char *src,
@@ -1325,13 +1435,13 @@ static int br_ip4_multicast_add_group(struct net_bridge *br,
br_group.vid = vid;
filter_mode = igmpv2 ? MCAST_EXCLUDE : MCAST_INCLUDE;
- return br_multicast_add_group(br, port, &br_group, src, filter_mode,
- igmpv2);
+ return br_multicast_add_group(brmctx, pmctx, &br_group, src,
+ filter_mode, igmpv2);
}
#if IS_ENABLED(CONFIG_IPV6)
-static int br_ip6_multicast_add_group(struct net_bridge *br,
- struct net_bridge_port *port,
+static int br_ip6_multicast_add_group(struct net_bridge_mcast *brmctx,
+ struct net_bridge_mcast_port *pmctx,
const struct in6_addr *group,
__u16 vid,
const unsigned char *src,
@@ -1349,28 +1459,71 @@ static int br_ip6_multicast_add_group(struct net_bridge *br,
br_group.vid = vid;
filter_mode = mldv1 ? MCAST_EXCLUDE : MCAST_INCLUDE;
- return br_multicast_add_group(br, port, &br_group, src, filter_mode,
- mldv1);
+ return br_multicast_add_group(brmctx, pmctx, &br_group, src,
+ filter_mode, mldv1);
}
#endif
-static void br_multicast_router_expired(struct timer_list *t)
+static bool br_multicast_rport_del(struct hlist_node *rlist)
{
- struct net_bridge_port *port =
- from_timer(port, t, multicast_router_timer);
- struct net_bridge *br = port->br;
+ if (hlist_unhashed(rlist))
+ return false;
+
+ hlist_del_init_rcu(rlist);
+ return true;
+}
+
+static bool br_ip4_multicast_rport_del(struct net_bridge_mcast_port *pmctx)
+{
+ return br_multicast_rport_del(&pmctx->ip4_rlist);
+}
+
+static bool br_ip6_multicast_rport_del(struct net_bridge_mcast_port *pmctx)
+{
+#if IS_ENABLED(CONFIG_IPV6)
+ return br_multicast_rport_del(&pmctx->ip6_rlist);
+#else
+ return false;
+#endif
+}
+
+static void br_multicast_router_expired(struct net_bridge_mcast_port *pmctx,
+ struct timer_list *t,
+ struct hlist_node *rlist)
+{
+ struct net_bridge *br = pmctx->port->br;
+ bool del;
spin_lock(&br->multicast_lock);
- if (port->multicast_router == MDB_RTR_TYPE_DISABLED ||
- port->multicast_router == MDB_RTR_TYPE_PERM ||
- timer_pending(&port->multicast_router_timer))
+ if (pmctx->multicast_router == MDB_RTR_TYPE_DISABLED ||
+ pmctx->multicast_router == MDB_RTR_TYPE_PERM ||
+ timer_pending(t))
goto out;
- __del_port_router(port);
+ del = br_multicast_rport_del(rlist);
+ br_multicast_rport_del_notify(pmctx, del);
out:
spin_unlock(&br->multicast_lock);
}
+static void br_ip4_multicast_router_expired(struct timer_list *t)
+{
+ struct net_bridge_mcast_port *pmctx = from_timer(pmctx, t,
+ ip4_mc_router_timer);
+
+ br_multicast_router_expired(pmctx, t, &pmctx->ip4_rlist);
+}
+
+#if IS_ENABLED(CONFIG_IPV6)
+static void br_ip6_multicast_router_expired(struct timer_list *t)
+{
+ struct net_bridge_mcast_port *pmctx = from_timer(pmctx, t,
+ ip6_mc_router_timer);
+
+ br_multicast_router_expired(pmctx, t, &pmctx->ip6_rlist);
+}
+#endif
+
static void br_mc_router_state_change(struct net_bridge *p,
bool is_mc_router)
{
@@ -1384,64 +1537,86 @@ static void br_mc_router_state_change(struct net_bridge *p,
switchdev_port_attr_set(p->dev, &attr, NULL);
}
-static void br_multicast_local_router_expired(struct timer_list *t)
+static void br_multicast_local_router_expired(struct net_bridge_mcast *brmctx,
+ struct timer_list *timer)
{
- struct net_bridge *br = from_timer(br, t, multicast_router_timer);
-
- spin_lock(&br->multicast_lock);
- if (br->multicast_router == MDB_RTR_TYPE_DISABLED ||
- br->multicast_router == MDB_RTR_TYPE_PERM ||
- timer_pending(&br->multicast_router_timer))
+ spin_lock(&brmctx->br->multicast_lock);
+ if (brmctx->multicast_router == MDB_RTR_TYPE_DISABLED ||
+ brmctx->multicast_router == MDB_RTR_TYPE_PERM ||
+ br_ip4_multicast_is_router(brmctx) ||
+ br_ip6_multicast_is_router(brmctx))
goto out;
- br_mc_router_state_change(br, false);
+ br_mc_router_state_change(brmctx->br, false);
out:
- spin_unlock(&br->multicast_lock);
+ spin_unlock(&brmctx->br->multicast_lock);
+}
+
+static void br_ip4_multicast_local_router_expired(struct timer_list *t)
+{
+ struct net_bridge_mcast *brmctx = from_timer(brmctx, t,
+ ip4_mc_router_timer);
+
+ br_multicast_local_router_expired(brmctx, t);
+}
+
+#if IS_ENABLED(CONFIG_IPV6)
+static void br_ip6_multicast_local_router_expired(struct timer_list *t)
+{
+ struct net_bridge_mcast *brmctx = from_timer(brmctx, t,
+ ip6_mc_router_timer);
+
+ br_multicast_local_router_expired(brmctx, t);
}
+#endif
-static void br_multicast_querier_expired(struct net_bridge *br,
+static void br_multicast_querier_expired(struct net_bridge_mcast *brmctx,
struct bridge_mcast_own_query *query)
{
- spin_lock(&br->multicast_lock);
- if (!netif_running(br->dev) || !br_opt_get(br, BROPT_MULTICAST_ENABLED))
+ spin_lock(&brmctx->br->multicast_lock);
+ if (!netif_running(brmctx->br->dev) ||
+ br_multicast_ctx_vlan_global_disabled(brmctx) ||
+ !br_opt_get(brmctx->br, BROPT_MULTICAST_ENABLED))
goto out;
- br_multicast_start_querier(br, query);
+ br_multicast_start_querier(brmctx, query);
out:
- spin_unlock(&br->multicast_lock);
+ spin_unlock(&brmctx->br->multicast_lock);
}
static void br_ip4_multicast_querier_expired(struct timer_list *t)
{
- struct net_bridge *br = from_timer(br, t, ip4_other_query.timer);
+ struct net_bridge_mcast *brmctx = from_timer(brmctx, t,
+ ip4_other_query.timer);
- br_multicast_querier_expired(br, &br->ip4_own_query);
+ br_multicast_querier_expired(brmctx, &brmctx->ip4_own_query);
}
#if IS_ENABLED(CONFIG_IPV6)
static void br_ip6_multicast_querier_expired(struct timer_list *t)
{
- struct net_bridge *br = from_timer(br, t, ip6_other_query.timer);
+ struct net_bridge_mcast *brmctx = from_timer(brmctx, t,
+ ip6_other_query.timer);
- br_multicast_querier_expired(br, &br->ip6_own_query);
+ br_multicast_querier_expired(brmctx, &brmctx->ip6_own_query);
}
#endif
-static void br_multicast_select_own_querier(struct net_bridge *br,
+static void br_multicast_select_own_querier(struct net_bridge_mcast *brmctx,
struct br_ip *ip,
struct sk_buff *skb)
{
if (ip->proto == htons(ETH_P_IP))
- br->ip4_querier.addr.src.ip4 = ip_hdr(skb)->saddr;
+ brmctx->ip4_querier.addr.src.ip4 = ip_hdr(skb)->saddr;
#if IS_ENABLED(CONFIG_IPV6)
else
- br->ip6_querier.addr.src.ip6 = ipv6_hdr(skb)->saddr;
+ brmctx->ip6_querier.addr.src.ip6 = ipv6_hdr(skb)->saddr;
#endif
}
-static void __br_multicast_send_query(struct net_bridge *br,
- struct net_bridge_port *port,
+static void __br_multicast_send_query(struct net_bridge_mcast *brmctx,
+ struct net_bridge_mcast_port *pmctx,
struct net_bridge_port_group *pg,
struct br_ip *ip_dst,
struct br_ip *group,
@@ -1453,19 +1628,23 @@ static void __br_multicast_send_query(struct net_bridge *br,
struct sk_buff *skb;
u8 igmp_type;
+ if (!br_multicast_ctx_should_use(brmctx, pmctx) ||
+ !br_multicast_ctx_matches_vlan_snooping(brmctx))
+ return;
+
again_under_lmqt:
- skb = br_multicast_alloc_query(br, pg, ip_dst, group, with_srcs,
- over_lmqt, sflag, &igmp_type,
+ skb = br_multicast_alloc_query(brmctx, pmctx, pg, ip_dst, group,
+ with_srcs, over_lmqt, sflag, &igmp_type,
need_rexmit);
if (!skb)
return;
- if (port) {
- skb->dev = port->dev;
- br_multicast_count(br, port, skb, igmp_type,
+ if (pmctx) {
+ skb->dev = pmctx->port->dev;
+ br_multicast_count(brmctx->br, pmctx->port, skb, igmp_type,
BR_MCAST_DIR_TX);
NF_HOOK(NFPROTO_BRIDGE, NF_BR_LOCAL_OUT,
- dev_net(port->dev), NULL, skb, NULL, skb->dev,
+ dev_net(pmctx->port->dev), NULL, skb, NULL, skb->dev,
br_dev_queue_push_xmit);
if (over_lmqt && with_srcs && sflag) {
@@ -1473,35 +1652,64 @@ again_under_lmqt:
goto again_under_lmqt;
}
} else {
- br_multicast_select_own_querier(br, group, skb);
- br_multicast_count(br, port, skb, igmp_type,
+ br_multicast_select_own_querier(brmctx, group, skb);
+ br_multicast_count(brmctx->br, NULL, skb, igmp_type,
BR_MCAST_DIR_RX);
netif_rx(skb);
}
}
-static void br_multicast_send_query(struct net_bridge *br,
- struct net_bridge_port *port,
+static void br_multicast_read_querier(const struct bridge_mcast_querier *querier,
+ struct bridge_mcast_querier *dest)
+{
+ unsigned int seq;
+
+ memset(dest, 0, sizeof(*dest));
+ do {
+ seq = read_seqcount_begin(&querier->seq);
+ dest->port_ifidx = querier->port_ifidx;
+ memcpy(&dest->addr, &querier->addr, sizeof(struct br_ip));
+ } while (read_seqcount_retry(&querier->seq, seq));
+}
+
+static void br_multicast_update_querier(struct net_bridge_mcast *brmctx,
+ struct bridge_mcast_querier *querier,
+ int ifindex,
+ struct br_ip *saddr)
+{
+ lockdep_assert_held_once(&brmctx->br->multicast_lock);
+
+ write_seqcount_begin(&querier->seq);
+ querier->port_ifidx = ifindex;
+ memcpy(&querier->addr, saddr, sizeof(*saddr));
+ write_seqcount_end(&querier->seq);
+}
+
+static void br_multicast_send_query(struct net_bridge_mcast *brmctx,
+ struct net_bridge_mcast_port *pmctx,
struct bridge_mcast_own_query *own_query)
{
struct bridge_mcast_other_query *other_query = NULL;
+ struct bridge_mcast_querier *querier;
struct br_ip br_group;
unsigned long time;
- if (!netif_running(br->dev) ||
- !br_opt_get(br, BROPT_MULTICAST_ENABLED) ||
- !br_opt_get(br, BROPT_MULTICAST_QUERIER))
+ if (!br_multicast_ctx_should_use(brmctx, pmctx) ||
+ !br_opt_get(brmctx->br, BROPT_MULTICAST_ENABLED) ||
+ !brmctx->multicast_querier)
return;
memset(&br_group.dst, 0, sizeof(br_group.dst));
- if (port ? (own_query == &port->ip4_own_query) :
- (own_query == &br->ip4_own_query)) {
- other_query = &br->ip4_other_query;
+ if (pmctx ? (own_query == &pmctx->ip4_own_query) :
+ (own_query == &brmctx->ip4_own_query)) {
+ querier = &brmctx->ip4_querier;
+ other_query = &brmctx->ip4_other_query;
br_group.proto = htons(ETH_P_IP);
#if IS_ENABLED(CONFIG_IPV6)
} else {
- other_query = &br->ip6_other_query;
+ querier = &brmctx->ip6_querier;
+ other_query = &brmctx->ip6_other_query;
br_group.proto = htons(ETH_P_IPV6);
#endif
}
@@ -1509,31 +1717,39 @@ static void br_multicast_send_query(struct net_bridge *br,
if (!other_query || timer_pending(&other_query->timer))
return;
- __br_multicast_send_query(br, port, NULL, NULL, &br_group, false, 0,
- NULL);
+ /* we're about to select ourselves as querier */
+ if (!pmctx && querier->port_ifidx) {
+ struct br_ip zeroip = {};
+
+ br_multicast_update_querier(brmctx, querier, 0, &zeroip);
+ }
+
+ __br_multicast_send_query(brmctx, pmctx, NULL, NULL, &br_group, false,
+ 0, NULL);
time = jiffies;
- time += own_query->startup_sent < br->multicast_startup_query_count ?
- br->multicast_startup_query_interval :
- br->multicast_query_interval;
+ time += own_query->startup_sent < brmctx->multicast_startup_query_count ?
+ brmctx->multicast_startup_query_interval :
+ brmctx->multicast_query_interval;
mod_timer(&own_query->timer, time);
}
static void
-br_multicast_port_query_expired(struct net_bridge_port *port,
+br_multicast_port_query_expired(struct net_bridge_mcast_port *pmctx,
struct bridge_mcast_own_query *query)
{
- struct net_bridge *br = port->br;
+ struct net_bridge *br = pmctx->port->br;
+ struct net_bridge_mcast *brmctx;
spin_lock(&br->multicast_lock);
- if (port->state == BR_STATE_DISABLED ||
- port->state == BR_STATE_BLOCKING)
+ if (br_multicast_port_ctx_state_stopped(pmctx))
goto out;
- if (query->startup_sent < br->multicast_startup_query_count)
+ brmctx = br_multicast_port_ctx_get_global(pmctx);
+ if (query->startup_sent < brmctx->multicast_startup_query_count)
query->startup_sent++;
- br_multicast_send_query(port->br, port, query);
+ br_multicast_send_query(brmctx, pmctx, query);
out:
spin_unlock(&br->multicast_lock);
@@ -1541,17 +1757,19 @@ out:
static void br_ip4_multicast_port_query_expired(struct timer_list *t)
{
- struct net_bridge_port *port = from_timer(port, t, ip4_own_query.timer);
+ struct net_bridge_mcast_port *pmctx = from_timer(pmctx, t,
+ ip4_own_query.timer);
- br_multicast_port_query_expired(port, &port->ip4_own_query);
+ br_multicast_port_query_expired(pmctx, &pmctx->ip4_own_query);
}
#if IS_ENABLED(CONFIG_IPV6)
static void br_ip6_multicast_port_query_expired(struct timer_list *t)
{
- struct net_bridge_port *port = from_timer(port, t, ip6_own_query.timer);
+ struct net_bridge_mcast_port *pmctx = from_timer(pmctx, t,
+ ip6_own_query.timer);
- br_multicast_port_query_expired(port, &port->ip6_own_query);
+ br_multicast_port_query_expired(pmctx, &pmctx->ip6_own_query);
}
#endif
@@ -1560,19 +1778,27 @@ static void br_multicast_port_group_rexmit(struct timer_list *t)
struct net_bridge_port_group *pg = from_timer(pg, t, rexmit_timer);
struct bridge_mcast_other_query *other_query = NULL;
struct net_bridge *br = pg->key.port->br;
+ struct net_bridge_mcast_port *pmctx;
+ struct net_bridge_mcast *brmctx;
bool need_rexmit = false;
spin_lock(&br->multicast_lock);
if (!netif_running(br->dev) || hlist_unhashed(&pg->mglist) ||
- !br_opt_get(br, BROPT_MULTICAST_ENABLED) ||
- !br_opt_get(br, BROPT_MULTICAST_QUERIER))
+ !br_opt_get(br, BROPT_MULTICAST_ENABLED))
+ goto out;
+
+ pmctx = br_multicast_pg_to_port_ctx(pg);
+ if (!pmctx)
+ goto out;
+ brmctx = br_multicast_port_ctx_get_global(pmctx);
+ if (!brmctx->multicast_querier)
goto out;
if (pg->key.addr.proto == htons(ETH_P_IP))
- other_query = &br->ip4_other_query;
+ other_query = &brmctx->ip4_other_query;
#if IS_ENABLED(CONFIG_IPV6)
else
- other_query = &br->ip6_other_query;
+ other_query = &brmctx->ip6_other_query;
#endif
if (!other_query || timer_pending(&other_query->timer))
@@ -1580,15 +1806,15 @@ static void br_multicast_port_group_rexmit(struct timer_list *t)
if (pg->grp_query_rexmit_cnt) {
pg->grp_query_rexmit_cnt--;
- __br_multicast_send_query(br, pg->key.port, pg, &pg->key.addr,
+ __br_multicast_send_query(brmctx, pmctx, pg, &pg->key.addr,
&pg->key.addr, false, 1, NULL);
}
- __br_multicast_send_query(br, pg->key.port, pg, &pg->key.addr,
+ __br_multicast_send_query(brmctx, pmctx, pg, &pg->key.addr,
&pg->key.addr, true, 0, &need_rexmit);
if (pg->grp_query_rexmit_cnt || need_rexmit)
mod_timer(&pg->rexmit_timer, jiffies +
- br->multicast_last_member_interval);
+ brmctx->multicast_last_member_interval);
out:
spin_unlock(&br->multicast_lock);
}
@@ -1606,21 +1832,40 @@ static int br_mc_disabled_update(struct net_device *dev, bool value,
return switchdev_port_attr_set(dev, &attr, extack);
}
+void br_multicast_port_ctx_init(struct net_bridge_port *port,
+ struct net_bridge_vlan *vlan,
+ struct net_bridge_mcast_port *pmctx)
+{
+ pmctx->port = port;
+ pmctx->vlan = vlan;
+ pmctx->multicast_router = MDB_RTR_TYPE_TEMP_QUERY;
+ timer_setup(&pmctx->ip4_mc_router_timer,
+ br_ip4_multicast_router_expired, 0);
+ timer_setup(&pmctx->ip4_own_query.timer,
+ br_ip4_multicast_port_query_expired, 0);
+#if IS_ENABLED(CONFIG_IPV6)
+ timer_setup(&pmctx->ip6_mc_router_timer,
+ br_ip6_multicast_router_expired, 0);
+ timer_setup(&pmctx->ip6_own_query.timer,
+ br_ip6_multicast_port_query_expired, 0);
+#endif
+}
+
+void br_multicast_port_ctx_deinit(struct net_bridge_mcast_port *pmctx)
+{
+#if IS_ENABLED(CONFIG_IPV6)
+ del_timer_sync(&pmctx->ip6_mc_router_timer);
+#endif
+ del_timer_sync(&pmctx->ip4_mc_router_timer);
+}
+
int br_multicast_add_port(struct net_bridge_port *port)
{
int err;
- port->multicast_router = MDB_RTR_TYPE_TEMP_QUERY;
port->multicast_eht_hosts_limit = BR_MCAST_DEFAULT_EHT_HOSTS_LIMIT;
+ br_multicast_port_ctx_init(port, NULL, &port->multicast_ctx);
- timer_setup(&port->multicast_router_timer,
- br_multicast_router_expired, 0);
- timer_setup(&port->ip4_own_query.timer,
- br_ip4_multicast_port_query_expired, 0);
-#if IS_ENABLED(CONFIG_IPV6)
- timer_setup(&port->ip6_own_query.timer,
- br_ip6_multicast_port_query_expired, 0);
-#endif
err = br_mc_disabled_update(port->dev,
br_opt_get(port->br,
BROPT_MULTICAST_ENABLED),
@@ -1649,7 +1894,7 @@ void br_multicast_del_port(struct net_bridge_port *port)
hlist_move_list(&br->mcast_gc_list, &deleted_head);
spin_unlock_bh(&br->multicast_lock);
br_multicast_gc(&deleted_head);
- del_timer_sync(&port->multicast_router_timer);
+ br_multicast_port_ctx_deinit(&port->multicast_ctx);
free_percpu(port->mcast_stats);
}
@@ -1662,50 +1907,63 @@ static void br_multicast_enable(struct bridge_mcast_own_query *query)
mod_timer(&query->timer, jiffies);
}
-static void __br_multicast_enable_port(struct net_bridge_port *port)
+static void __br_multicast_enable_port_ctx(struct net_bridge_mcast_port *pmctx)
{
- struct net_bridge *br = port->br;
+ struct net_bridge *br = pmctx->port->br;
+ struct net_bridge_mcast *brmctx;
- if (!br_opt_get(br, BROPT_MULTICAST_ENABLED) || !netif_running(br->dev))
+ brmctx = br_multicast_port_ctx_get_global(pmctx);
+ if (!br_opt_get(br, BROPT_MULTICAST_ENABLED) ||
+ !netif_running(br->dev))
return;
- br_multicast_enable(&port->ip4_own_query);
+ br_multicast_enable(&pmctx->ip4_own_query);
#if IS_ENABLED(CONFIG_IPV6)
- br_multicast_enable(&port->ip6_own_query);
+ br_multicast_enable(&pmctx->ip6_own_query);
#endif
- if (port->multicast_router == MDB_RTR_TYPE_PERM &&
- hlist_unhashed(&port->rlist))
- br_multicast_add_router(br, port);
+ if (pmctx->multicast_router == MDB_RTR_TYPE_PERM) {
+ br_ip4_multicast_add_router(brmctx, pmctx);
+ br_ip6_multicast_add_router(brmctx, pmctx);
+ }
}
void br_multicast_enable_port(struct net_bridge_port *port)
{
struct net_bridge *br = port->br;
- spin_lock(&br->multicast_lock);
- __br_multicast_enable_port(port);
- spin_unlock(&br->multicast_lock);
+ spin_lock_bh(&br->multicast_lock);
+ __br_multicast_enable_port_ctx(&port->multicast_ctx);
+ spin_unlock_bh(&br->multicast_lock);
}
-void br_multicast_disable_port(struct net_bridge_port *port)
+static void __br_multicast_disable_port_ctx(struct net_bridge_mcast_port *pmctx)
{
- struct net_bridge *br = port->br;
struct net_bridge_port_group *pg;
struct hlist_node *n;
-
- spin_lock(&br->multicast_lock);
- hlist_for_each_entry_safe(pg, n, &port->mglist, mglist)
- if (!(pg->flags & MDB_PG_FLAGS_PERMANENT))
- br_multicast_find_del_pg(br, pg);
-
- __del_port_router(port);
-
- del_timer(&port->multicast_router_timer);
- del_timer(&port->ip4_own_query.timer);
+ bool del = false;
+
+ hlist_for_each_entry_safe(pg, n, &pmctx->port->mglist, mglist)
+ if (!(pg->flags & MDB_PG_FLAGS_PERMANENT) &&
+ (!br_multicast_port_ctx_is_vlan(pmctx) ||
+ pg->key.addr.vid == pmctx->vlan->vid))
+ br_multicast_find_del_pg(pmctx->port->br, pg);
+
+ del |= br_ip4_multicast_rport_del(pmctx);
+ del_timer(&pmctx->ip4_mc_router_timer);
+ del_timer(&pmctx->ip4_own_query.timer);
+ del |= br_ip6_multicast_rport_del(pmctx);
#if IS_ENABLED(CONFIG_IPV6)
- del_timer(&port->ip6_own_query.timer);
+ del_timer(&pmctx->ip6_mc_router_timer);
+ del_timer(&pmctx->ip6_own_query.timer);
#endif
- spin_unlock(&br->multicast_lock);
+ br_multicast_rport_del_notify(pmctx, del);
+}
+
+void br_multicast_disable_port(struct net_bridge_port *port)
+{
+ spin_lock_bh(&port->br->multicast_lock);
+ __br_multicast_disable_port_ctx(&port->multicast_ctx);
+ spin_unlock_bh(&port->br->multicast_lock);
}
static int __grp_src_delete_marked(struct net_bridge_port_group *pg)
@@ -1730,31 +1988,32 @@ static void __grp_src_mod_timer(struct net_bridge_group_src *src,
br_multicast_fwd_src_handle(src);
}
-static void __grp_src_query_marked_and_rexmit(struct net_bridge_port_group *pg)
+static void __grp_src_query_marked_and_rexmit(struct net_bridge_mcast *brmctx,
+ struct net_bridge_mcast_port *pmctx,
+ struct net_bridge_port_group *pg)
{
struct bridge_mcast_other_query *other_query = NULL;
- struct net_bridge *br = pg->key.port->br;
- u32 lmqc = br->multicast_last_member_count;
+ u32 lmqc = brmctx->multicast_last_member_count;
unsigned long lmqt, lmi, now = jiffies;
struct net_bridge_group_src *ent;
- if (!netif_running(br->dev) ||
- !br_opt_get(br, BROPT_MULTICAST_ENABLED))
+ if (!netif_running(brmctx->br->dev) ||
+ !br_opt_get(brmctx->br, BROPT_MULTICAST_ENABLED))
return;
if (pg->key.addr.proto == htons(ETH_P_IP))
- other_query = &br->ip4_other_query;
+ other_query = &brmctx->ip4_other_query;
#if IS_ENABLED(CONFIG_IPV6)
else
- other_query = &br->ip6_other_query;
+ other_query = &brmctx->ip6_other_query;
#endif
- lmqt = now + br_multicast_lmqt(br);
+ lmqt = now + br_multicast_lmqt(brmctx);
hlist_for_each_entry(ent, &pg->src_list, node) {
if (ent->flags & BR_SGRP_F_SEND) {
ent->flags &= ~BR_SGRP_F_SEND;
if (ent->timer.expires > lmqt) {
- if (br_opt_get(br, BROPT_MULTICAST_QUERIER) &&
+ if (brmctx->multicast_querier &&
other_query &&
!timer_pending(&other_query->timer))
ent->src_query_rexmit_cnt = lmqc;
@@ -1763,41 +2022,42 @@ static void __grp_src_query_marked_and_rexmit(struct net_bridge_port_group *pg)
}
}
- if (!br_opt_get(br, BROPT_MULTICAST_QUERIER) ||
+ if (!brmctx->multicast_querier ||
!other_query || timer_pending(&other_query->timer))
return;
- __br_multicast_send_query(br, pg->key.port, pg, &pg->key.addr,
+ __br_multicast_send_query(brmctx, pmctx, pg, &pg->key.addr,
&pg->key.addr, true, 1, NULL);
- lmi = now + br->multicast_last_member_interval;
+ lmi = now + brmctx->multicast_last_member_interval;
if (!timer_pending(&pg->rexmit_timer) ||
time_after(pg->rexmit_timer.expires, lmi))
mod_timer(&pg->rexmit_timer, lmi);
}
-static void __grp_send_query_and_rexmit(struct net_bridge_port_group *pg)
+static void __grp_send_query_and_rexmit(struct net_bridge_mcast *brmctx,
+ struct net_bridge_mcast_port *pmctx,
+ struct net_bridge_port_group *pg)
{
struct bridge_mcast_other_query *other_query = NULL;
- struct net_bridge *br = pg->key.port->br;
unsigned long now = jiffies, lmi;
- if (!netif_running(br->dev) ||
- !br_opt_get(br, BROPT_MULTICAST_ENABLED))
+ if (!netif_running(brmctx->br->dev) ||
+ !br_opt_get(brmctx->br, BROPT_MULTICAST_ENABLED))
return;
if (pg->key.addr.proto == htons(ETH_P_IP))
- other_query = &br->ip4_other_query;
+ other_query = &brmctx->ip4_other_query;
#if IS_ENABLED(CONFIG_IPV6)
else
- other_query = &br->ip6_other_query;
+ other_query = &brmctx->ip6_other_query;
#endif
- if (br_opt_get(br, BROPT_MULTICAST_QUERIER) &&
+ if (brmctx->multicast_querier &&
other_query && !timer_pending(&other_query->timer)) {
- lmi = now + br->multicast_last_member_interval;
- pg->grp_query_rexmit_cnt = br->multicast_last_member_count - 1;
- __br_multicast_send_query(br, pg->key.port, pg, &pg->key.addr,
+ lmi = now + brmctx->multicast_last_member_interval;
+ pg->grp_query_rexmit_cnt = brmctx->multicast_last_member_count - 1;
+ __br_multicast_send_query(brmctx, pmctx, pg, &pg->key.addr,
&pg->key.addr, false, 0, NULL);
if (!timer_pending(&pg->rexmit_timer) ||
time_after(pg->rexmit_timer.expires, lmi))
@@ -1806,8 +2066,8 @@ static void __grp_send_query_and_rexmit(struct net_bridge_port_group *pg)
if (pg->filter_mode == MCAST_EXCLUDE &&
(!timer_pending(&pg->timer) ||
- time_after(pg->timer.expires, now + br_multicast_lmqt(br))))
- mod_timer(&pg->timer, now + br_multicast_lmqt(br));
+ time_after(pg->timer.expires, now + br_multicast_lmqt(brmctx))))
+ mod_timer(&pg->timer, now + br_multicast_lmqt(brmctx));
}
/* State Msg type New state Actions
@@ -1815,11 +2075,11 @@ static void __grp_send_query_and_rexmit(struct net_bridge_port_group *pg)
* INCLUDE (A) ALLOW (B) INCLUDE (A+B) (B)=GMI
* EXCLUDE (X,Y) ALLOW (A) EXCLUDE (X+A,Y-A) (A)=GMI
*/
-static bool br_multicast_isinc_allow(struct net_bridge_port_group *pg, void *h_addr,
+static bool br_multicast_isinc_allow(const struct net_bridge_mcast *brmctx,
+ struct net_bridge_port_group *pg, void *h_addr,
void *srcs, u32 nsrcs, size_t addr_size,
int grec_type)
{
- struct net_bridge *br = pg->key.port->br;
struct net_bridge_group_src *ent;
unsigned long now = jiffies;
bool changed = false;
@@ -1838,10 +2098,11 @@ static bool br_multicast_isinc_allow(struct net_bridge_port_group *pg, void *h_a
}
if (ent)
- __grp_src_mod_timer(ent, now + br_multicast_gmi(br));
+ __grp_src_mod_timer(ent, now + br_multicast_gmi(brmctx));
}
- if (br_multicast_eht_handle(pg, h_addr, srcs, nsrcs, addr_size, grec_type))
+ if (br_multicast_eht_handle(brmctx, pg, h_addr, srcs, nsrcs, addr_size,
+ grec_type))
changed = true;
return changed;
@@ -1852,7 +2113,8 @@ static bool br_multicast_isinc_allow(struct net_bridge_port_group *pg, void *h_a
* Delete (A-B)
* Group Timer=GMI
*/
-static void __grp_src_isexc_incl(struct net_bridge_port_group *pg, void *h_addr,
+static void __grp_src_isexc_incl(const struct net_bridge_mcast *brmctx,
+ struct net_bridge_port_group *pg, void *h_addr,
void *srcs, u32 nsrcs, size_t addr_size,
int grec_type)
{
@@ -1876,7 +2138,8 @@ static void __grp_src_isexc_incl(struct net_bridge_port_group *pg, void *h_addr,
br_multicast_fwd_src_handle(ent);
}
- br_multicast_eht_handle(pg, h_addr, srcs, nsrcs, addr_size, grec_type);
+ br_multicast_eht_handle(brmctx, pg, h_addr, srcs, nsrcs, addr_size,
+ grec_type);
__grp_src_delete_marked(pg);
}
@@ -1887,11 +2150,11 @@ static void __grp_src_isexc_incl(struct net_bridge_port_group *pg, void *h_addr,
* Delete (Y-A)
* Group Timer=GMI
*/
-static bool __grp_src_isexc_excl(struct net_bridge_port_group *pg, void *h_addr,
+static bool __grp_src_isexc_excl(const struct net_bridge_mcast *brmctx,
+ struct net_bridge_port_group *pg, void *h_addr,
void *srcs, u32 nsrcs, size_t addr_size,
int grec_type)
{
- struct net_bridge *br = pg->key.port->br;
struct net_bridge_group_src *ent;
unsigned long now = jiffies;
bool changed = false;
@@ -1912,13 +2175,14 @@ static bool __grp_src_isexc_excl(struct net_bridge_port_group *pg, void *h_addr,
ent = br_multicast_new_group_src(pg, &src_ip);
if (ent) {
__grp_src_mod_timer(ent,
- now + br_multicast_gmi(br));
+ now + br_multicast_gmi(brmctx));
changed = true;
}
}
}
- if (br_multicast_eht_handle(pg, h_addr, srcs, nsrcs, addr_size, grec_type))
+ if (br_multicast_eht_handle(brmctx, pg, h_addr, srcs, nsrcs, addr_size,
+ grec_type))
changed = true;
if (__grp_src_delete_marked(pg))
@@ -1927,28 +2191,28 @@ static bool __grp_src_isexc_excl(struct net_bridge_port_group *pg, void *h_addr,
return changed;
}
-static bool br_multicast_isexc(struct net_bridge_port_group *pg, void *h_addr,
+static bool br_multicast_isexc(const struct net_bridge_mcast *brmctx,
+ struct net_bridge_port_group *pg, void *h_addr,
void *srcs, u32 nsrcs, size_t addr_size,
int grec_type)
{
- struct net_bridge *br = pg->key.port->br;
bool changed = false;
switch (pg->filter_mode) {
case MCAST_INCLUDE:
- __grp_src_isexc_incl(pg, h_addr, srcs, nsrcs, addr_size,
+ __grp_src_isexc_incl(brmctx, pg, h_addr, srcs, nsrcs, addr_size,
grec_type);
br_multicast_star_g_handle_mode(pg, MCAST_EXCLUDE);
changed = true;
break;
case MCAST_EXCLUDE:
- changed = __grp_src_isexc_excl(pg, h_addr, srcs, nsrcs, addr_size,
- grec_type);
+ changed = __grp_src_isexc_excl(brmctx, pg, h_addr, srcs, nsrcs,
+ addr_size, grec_type);
break;
}
pg->filter_mode = MCAST_EXCLUDE;
- mod_timer(&pg->timer, jiffies + br_multicast_gmi(br));
+ mod_timer(&pg->timer, jiffies + br_multicast_gmi(brmctx));
return changed;
}
@@ -1957,11 +2221,12 @@ static bool br_multicast_isexc(struct net_bridge_port_group *pg, void *h_addr,
* INCLUDE (A) TO_IN (B) INCLUDE (A+B) (B)=GMI
* Send Q(G,A-B)
*/
-static bool __grp_src_toin_incl(struct net_bridge_port_group *pg, void *h_addr,
+static bool __grp_src_toin_incl(struct net_bridge_mcast *brmctx,
+ struct net_bridge_mcast_port *pmctx,
+ struct net_bridge_port_group *pg, void *h_addr,
void *srcs, u32 nsrcs, size_t addr_size,
int grec_type)
{
- struct net_bridge *br = pg->key.port->br;
u32 src_idx, to_send = pg->src_ents;
struct net_bridge_group_src *ent;
unsigned long now = jiffies;
@@ -1985,14 +2250,15 @@ static bool __grp_src_toin_incl(struct net_bridge_port_group *pg, void *h_addr,
changed = true;
}
if (ent)
- __grp_src_mod_timer(ent, now + br_multicast_gmi(br));
+ __grp_src_mod_timer(ent, now + br_multicast_gmi(brmctx));
}
- if (br_multicast_eht_handle(pg, h_addr, srcs, nsrcs, addr_size, grec_type))
+ if (br_multicast_eht_handle(brmctx, pg, h_addr, srcs, nsrcs, addr_size,
+ grec_type))
changed = true;
if (to_send)
- __grp_src_query_marked_and_rexmit(pg);
+ __grp_src_query_marked_and_rexmit(brmctx, pmctx, pg);
return changed;
}
@@ -2002,11 +2268,12 @@ static bool __grp_src_toin_incl(struct net_bridge_port_group *pg, void *h_addr,
* Send Q(G,X-A)
* Send Q(G)
*/
-static bool __grp_src_toin_excl(struct net_bridge_port_group *pg, void *h_addr,
+static bool __grp_src_toin_excl(struct net_bridge_mcast *brmctx,
+ struct net_bridge_mcast_port *pmctx,
+ struct net_bridge_port_group *pg, void *h_addr,
void *srcs, u32 nsrcs, size_t addr_size,
int grec_type)
{
- struct net_bridge *br = pg->key.port->br;
u32 src_idx, to_send = pg->src_ents;
struct net_bridge_group_src *ent;
unsigned long now = jiffies;
@@ -2033,21 +2300,24 @@ static bool __grp_src_toin_excl(struct net_bridge_port_group *pg, void *h_addr,
changed = true;
}
if (ent)
- __grp_src_mod_timer(ent, now + br_multicast_gmi(br));
+ __grp_src_mod_timer(ent, now + br_multicast_gmi(brmctx));
}
- if (br_multicast_eht_handle(pg, h_addr, srcs, nsrcs, addr_size, grec_type))
+ if (br_multicast_eht_handle(brmctx, pg, h_addr, srcs, nsrcs, addr_size,
+ grec_type))
changed = true;
if (to_send)
- __grp_src_query_marked_and_rexmit(pg);
+ __grp_src_query_marked_and_rexmit(brmctx, pmctx, pg);
- __grp_send_query_and_rexmit(pg);
+ __grp_send_query_and_rexmit(brmctx, pmctx, pg);
return changed;
}
-static bool br_multicast_toin(struct net_bridge_port_group *pg, void *h_addr,
+static bool br_multicast_toin(struct net_bridge_mcast *brmctx,
+ struct net_bridge_mcast_port *pmctx,
+ struct net_bridge_port_group *pg, void *h_addr,
void *srcs, u32 nsrcs, size_t addr_size,
int grec_type)
{
@@ -2055,12 +2325,12 @@ static bool br_multicast_toin(struct net_bridge_port_group *pg, void *h_addr,
switch (pg->filter_mode) {
case MCAST_INCLUDE:
- changed = __grp_src_toin_incl(pg, h_addr, srcs, nsrcs, addr_size,
- grec_type);
+ changed = __grp_src_toin_incl(brmctx, pmctx, pg, h_addr, srcs,
+ nsrcs, addr_size, grec_type);
break;
case MCAST_EXCLUDE:
- changed = __grp_src_toin_excl(pg, h_addr, srcs, nsrcs, addr_size,
- grec_type);
+ changed = __grp_src_toin_excl(brmctx, pmctx, pg, h_addr, srcs,
+ nsrcs, addr_size, grec_type);
break;
}
@@ -2082,7 +2352,9 @@ static bool br_multicast_toin(struct net_bridge_port_group *pg, void *h_addr,
* Send Q(G,A*B)
* Group Timer=GMI
*/
-static void __grp_src_toex_incl(struct net_bridge_port_group *pg, void *h_addr,
+static void __grp_src_toex_incl(struct net_bridge_mcast *brmctx,
+ struct net_bridge_mcast_port *pmctx,
+ struct net_bridge_port_group *pg, void *h_addr,
void *srcs, u32 nsrcs, size_t addr_size,
int grec_type)
{
@@ -2109,11 +2381,12 @@ static void __grp_src_toex_incl(struct net_bridge_port_group *pg, void *h_addr,
br_multicast_fwd_src_handle(ent);
}
- br_multicast_eht_handle(pg, h_addr, srcs, nsrcs, addr_size, grec_type);
+ br_multicast_eht_handle(brmctx, pg, h_addr, srcs, nsrcs, addr_size,
+ grec_type);
__grp_src_delete_marked(pg);
if (to_send)
- __grp_src_query_marked_and_rexmit(pg);
+ __grp_src_query_marked_and_rexmit(brmctx, pmctx, pg);
}
/* State Msg type New state Actions
@@ -2123,7 +2396,9 @@ static void __grp_src_toex_incl(struct net_bridge_port_group *pg, void *h_addr,
* Send Q(G,A-Y)
* Group Timer=GMI
*/
-static bool __grp_src_toex_excl(struct net_bridge_port_group *pg, void *h_addr,
+static bool __grp_src_toex_excl(struct net_bridge_mcast *brmctx,
+ struct net_bridge_mcast_port *pmctx,
+ struct net_bridge_port_group *pg, void *h_addr,
void *srcs, u32 nsrcs, size_t addr_size,
int grec_type)
{
@@ -2155,39 +2430,41 @@ static bool __grp_src_toex_excl(struct net_bridge_port_group *pg, void *h_addr,
}
}
- if (br_multicast_eht_handle(pg, h_addr, srcs, nsrcs, addr_size, grec_type))
+ if (br_multicast_eht_handle(brmctx, pg, h_addr, srcs, nsrcs, addr_size,
+ grec_type))
changed = true;
if (__grp_src_delete_marked(pg))
changed = true;
if (to_send)
- __grp_src_query_marked_and_rexmit(pg);
+ __grp_src_query_marked_and_rexmit(brmctx, pmctx, pg);
return changed;
}
-static bool br_multicast_toex(struct net_bridge_port_group *pg, void *h_addr,
+static bool br_multicast_toex(struct net_bridge_mcast *brmctx,
+ struct net_bridge_mcast_port *pmctx,
+ struct net_bridge_port_group *pg, void *h_addr,
void *srcs, u32 nsrcs, size_t addr_size,
int grec_type)
{
- struct net_bridge *br = pg->key.port->br;
bool changed = false;
switch (pg->filter_mode) {
case MCAST_INCLUDE:
- __grp_src_toex_incl(pg, h_addr, srcs, nsrcs, addr_size,
- grec_type);
+ __grp_src_toex_incl(brmctx, pmctx, pg, h_addr, srcs, nsrcs,
+ addr_size, grec_type);
br_multicast_star_g_handle_mode(pg, MCAST_EXCLUDE);
changed = true;
break;
case MCAST_EXCLUDE:
- changed = __grp_src_toex_excl(pg, h_addr, srcs, nsrcs, addr_size,
- grec_type);
+ changed = __grp_src_toex_excl(brmctx, pmctx, pg, h_addr, srcs,
+ nsrcs, addr_size, grec_type);
break;
}
pg->filter_mode = MCAST_EXCLUDE;
- mod_timer(&pg->timer, jiffies + br_multicast_gmi(br));
+ mod_timer(&pg->timer, jiffies + br_multicast_gmi(brmctx));
return changed;
}
@@ -2195,7 +2472,9 @@ static bool br_multicast_toex(struct net_bridge_port_group *pg, void *h_addr,
/* State Msg type New state Actions
* INCLUDE (A) BLOCK (B) INCLUDE (A) Send Q(G,A*B)
*/
-static bool __grp_src_block_incl(struct net_bridge_port_group *pg, void *h_addr,
+static bool __grp_src_block_incl(struct net_bridge_mcast *brmctx,
+ struct net_bridge_mcast_port *pmctx,
+ struct net_bridge_port_group *pg, void *h_addr,
void *srcs, u32 nsrcs, size_t addr_size, int grec_type)
{
struct net_bridge_group_src *ent;
@@ -2217,11 +2496,12 @@ static bool __grp_src_block_incl(struct net_bridge_port_group *pg, void *h_addr,
}
}
- if (br_multicast_eht_handle(pg, h_addr, srcs, nsrcs, addr_size, grec_type))
+ if (br_multicast_eht_handle(brmctx, pg, h_addr, srcs, nsrcs, addr_size,
+ grec_type))
changed = true;
if (to_send)
- __grp_src_query_marked_and_rexmit(pg);
+ __grp_src_query_marked_and_rexmit(brmctx, pmctx, pg);
return changed;
}
@@ -2230,7 +2510,9 @@ static bool __grp_src_block_incl(struct net_bridge_port_group *pg, void *h_addr,
* EXCLUDE (X,Y) BLOCK (A) EXCLUDE (X+(A-Y),Y) (A-X-Y)=Group Timer
* Send Q(G,A-Y)
*/
-static bool __grp_src_block_excl(struct net_bridge_port_group *pg, void *h_addr,
+static bool __grp_src_block_excl(struct net_bridge_mcast *brmctx,
+ struct net_bridge_mcast_port *pmctx,
+ struct net_bridge_port_group *pg, void *h_addr,
void *srcs, u32 nsrcs, size_t addr_size, int grec_type)
{
struct net_bridge_group_src *ent;
@@ -2259,28 +2541,31 @@ static bool __grp_src_block_excl(struct net_bridge_port_group *pg, void *h_addr,
}
}
- if (br_multicast_eht_handle(pg, h_addr, srcs, nsrcs, addr_size, grec_type))
+ if (br_multicast_eht_handle(brmctx, pg, h_addr, srcs, nsrcs, addr_size,
+ grec_type))
changed = true;
if (to_send)
- __grp_src_query_marked_and_rexmit(pg);
+ __grp_src_query_marked_and_rexmit(brmctx, pmctx, pg);
return changed;
}
-static bool br_multicast_block(struct net_bridge_port_group *pg, void *h_addr,
+static bool br_multicast_block(struct net_bridge_mcast *brmctx,
+ struct net_bridge_mcast_port *pmctx,
+ struct net_bridge_port_group *pg, void *h_addr,
void *srcs, u32 nsrcs, size_t addr_size, int grec_type)
{
bool changed = false;
switch (pg->filter_mode) {
case MCAST_INCLUDE:
- changed = __grp_src_block_incl(pg, h_addr, srcs, nsrcs, addr_size,
- grec_type);
+ changed = __grp_src_block_incl(brmctx, pmctx, pg, h_addr, srcs,
+ nsrcs, addr_size, grec_type);
break;
case MCAST_EXCLUDE:
- changed = __grp_src_block_excl(pg, h_addr, srcs, nsrcs, addr_size,
- grec_type);
+ changed = __grp_src_block_excl(brmctx, pmctx, pg, h_addr, srcs,
+ nsrcs, addr_size, grec_type);
break;
}
@@ -2315,12 +2600,12 @@ br_multicast_find_port(struct net_bridge_mdb_entry *mp,
return NULL;
}
-static int br_ip4_multicast_igmp3_report(struct net_bridge *br,
- struct net_bridge_port *port,
+static int br_ip4_multicast_igmp3_report(struct net_bridge_mcast *brmctx,
+ struct net_bridge_mcast_port *pmctx,
struct sk_buff *skb,
u16 vid)
{
- bool igmpv2 = br->multicast_igmp_version == 2;
+ bool igmpv2 = brmctx->multicast_igmp_version == 2;
struct net_bridge_mdb_entry *mdst;
struct net_bridge_port_group *pg;
const unsigned char *src;
@@ -2367,25 +2652,29 @@ static int br_ip4_multicast_igmp3_report(struct net_bridge *br,
if (nsrcs == 0 &&
(type == IGMPV3_CHANGE_TO_INCLUDE ||
type == IGMPV3_MODE_IS_INCLUDE)) {
- if (!port || igmpv2) {
- br_ip4_multicast_leave_group(br, port, group, vid, src);
+ if (!pmctx || igmpv2) {
+ br_ip4_multicast_leave_group(brmctx, pmctx,
+ group, vid, src);
continue;
}
} else {
- err = br_ip4_multicast_add_group(br, port, group, vid,
- src, igmpv2);
+ err = br_ip4_multicast_add_group(brmctx, pmctx, group,
+ vid, src, igmpv2);
if (err)
break;
}
- if (!port || igmpv2)
+ if (!pmctx || igmpv2)
continue;
- spin_lock_bh(&br->multicast_lock);
- mdst = br_mdb_ip4_get(br, group, vid);
+ spin_lock_bh(&brmctx->br->multicast_lock);
+ if (!br_multicast_ctx_should_use(brmctx, pmctx))
+ goto unlock_continue;
+
+ mdst = br_mdb_ip4_get(brmctx->br, group, vid);
if (!mdst)
goto unlock_continue;
- pg = br_multicast_find_port(mdst, port, src);
+ pg = br_multicast_find_port(mdst, pmctx->port, src);
if (!pg || (pg->flags & MDB_PG_FLAGS_PERMANENT))
goto unlock_continue;
/* reload grec and host addr */
@@ -2393,51 +2682,57 @@ static int br_ip4_multicast_igmp3_report(struct net_bridge *br,
h_addr = &ip_hdr(skb)->saddr;
switch (type) {
case IGMPV3_ALLOW_NEW_SOURCES:
- changed = br_multicast_isinc_allow(pg, h_addr, grec->grec_src,
+ changed = br_multicast_isinc_allow(brmctx, pg, h_addr,
+ grec->grec_src,
nsrcs, sizeof(__be32), type);
break;
case IGMPV3_MODE_IS_INCLUDE:
- changed = br_multicast_isinc_allow(pg, h_addr, grec->grec_src,
+ changed = br_multicast_isinc_allow(brmctx, pg, h_addr,
+ grec->grec_src,
nsrcs, sizeof(__be32), type);
break;
case IGMPV3_MODE_IS_EXCLUDE:
- changed = br_multicast_isexc(pg, h_addr, grec->grec_src,
+ changed = br_multicast_isexc(brmctx, pg, h_addr,
+ grec->grec_src,
nsrcs, sizeof(__be32), type);
break;
case IGMPV3_CHANGE_TO_INCLUDE:
- changed = br_multicast_toin(pg, h_addr, grec->grec_src,
+ changed = br_multicast_toin(brmctx, pmctx, pg, h_addr,
+ grec->grec_src,
nsrcs, sizeof(__be32), type);
break;
case IGMPV3_CHANGE_TO_EXCLUDE:
- changed = br_multicast_toex(pg, h_addr, grec->grec_src,
+ changed = br_multicast_toex(brmctx, pmctx, pg, h_addr,
+ grec->grec_src,
nsrcs, sizeof(__be32), type);
break;
case IGMPV3_BLOCK_OLD_SOURCES:
- changed = br_multicast_block(pg, h_addr, grec->grec_src,
+ changed = br_multicast_block(brmctx, pmctx, pg, h_addr,
+ grec->grec_src,
nsrcs, sizeof(__be32), type);
break;
}
if (changed)
- br_mdb_notify(br->dev, mdst, pg, RTM_NEWMDB);
+ br_mdb_notify(brmctx->br->dev, mdst, pg, RTM_NEWMDB);
unlock_continue:
- spin_unlock_bh(&br->multicast_lock);
+ spin_unlock_bh(&brmctx->br->multicast_lock);
}
return err;
}
#if IS_ENABLED(CONFIG_IPV6)
-static int br_ip6_multicast_mld2_report(struct net_bridge *br,
- struct net_bridge_port *port,
+static int br_ip6_multicast_mld2_report(struct net_bridge_mcast *brmctx,
+ struct net_bridge_mcast_port *pmctx,
struct sk_buff *skb,
u16 vid)
{
- bool mldv1 = br->multicast_mld_version == 1;
+ bool mldv1 = brmctx->multicast_mld_version == 1;
struct net_bridge_mdb_entry *mdst;
struct net_bridge_port_group *pg;
unsigned int nsrcs_offset;
+ struct mld2_report *mld2r;
const unsigned char *src;
- struct icmp6hdr *icmp6h;
struct in6_addr *h_addr;
struct mld2_grec *grec;
unsigned int grec_len;
@@ -2445,12 +2740,12 @@ static int br_ip6_multicast_mld2_report(struct net_bridge *br,
int i, len, num;
int err = 0;
- if (!ipv6_mc_may_pull(skb, sizeof(*icmp6h)))
+ if (!ipv6_mc_may_pull(skb, sizeof(*mld2r)))
return -EINVAL;
- icmp6h = icmp6_hdr(skb);
- num = ntohs(icmp6h->icmp6_dataun.un_data16[1]);
- len = skb_transport_offset(skb) + sizeof(*icmp6h);
+ mld2r = (struct mld2_report *)icmp6_hdr(skb);
+ num = ntohs(mld2r->mld2r_ngrec);
+ len = skb_transport_offset(skb) + sizeof(*mld2r);
for (i = 0; i < num; i++) {
__be16 *_nsrcs, __nsrcs;
@@ -2493,153 +2788,243 @@ static int br_ip6_multicast_mld2_report(struct net_bridge *br,
if ((grec->grec_type == MLD2_CHANGE_TO_INCLUDE ||
grec->grec_type == MLD2_MODE_IS_INCLUDE) &&
nsrcs == 0) {
- if (!port || mldv1) {
- br_ip6_multicast_leave_group(br, port,
+ if (!pmctx || mldv1) {
+ br_ip6_multicast_leave_group(brmctx, pmctx,
&grec->grec_mca,
vid, src);
continue;
}
} else {
- err = br_ip6_multicast_add_group(br, port,
+ err = br_ip6_multicast_add_group(brmctx, pmctx,
&grec->grec_mca, vid,
src, mldv1);
if (err)
break;
}
- if (!port || mldv1)
+ if (!pmctx || mldv1)
continue;
- spin_lock_bh(&br->multicast_lock);
- mdst = br_mdb_ip6_get(br, &grec->grec_mca, vid);
+ spin_lock_bh(&brmctx->br->multicast_lock);
+ if (!br_multicast_ctx_should_use(brmctx, pmctx))
+ goto unlock_continue;
+
+ mdst = br_mdb_ip6_get(brmctx->br, &grec->grec_mca, vid);
if (!mdst)
goto unlock_continue;
- pg = br_multicast_find_port(mdst, port, src);
+ pg = br_multicast_find_port(mdst, pmctx->port, src);
if (!pg || (pg->flags & MDB_PG_FLAGS_PERMANENT))
goto unlock_continue;
h_addr = &ipv6_hdr(skb)->saddr;
switch (grec->grec_type) {
case MLD2_ALLOW_NEW_SOURCES:
- changed = br_multicast_isinc_allow(pg, h_addr,
+ changed = br_multicast_isinc_allow(brmctx, pg, h_addr,
grec->grec_src, nsrcs,
sizeof(struct in6_addr),
grec->grec_type);
break;
case MLD2_MODE_IS_INCLUDE:
- changed = br_multicast_isinc_allow(pg, h_addr,
+ changed = br_multicast_isinc_allow(brmctx, pg, h_addr,
grec->grec_src, nsrcs,
sizeof(struct in6_addr),
grec->grec_type);
break;
case MLD2_MODE_IS_EXCLUDE:
- changed = br_multicast_isexc(pg, h_addr,
+ changed = br_multicast_isexc(brmctx, pg, h_addr,
grec->grec_src, nsrcs,
sizeof(struct in6_addr),
grec->grec_type);
break;
case MLD2_CHANGE_TO_INCLUDE:
- changed = br_multicast_toin(pg, h_addr,
+ changed = br_multicast_toin(brmctx, pmctx, pg, h_addr,
grec->grec_src, nsrcs,
sizeof(struct in6_addr),
grec->grec_type);
break;
case MLD2_CHANGE_TO_EXCLUDE:
- changed = br_multicast_toex(pg, h_addr,
+ changed = br_multicast_toex(brmctx, pmctx, pg, h_addr,
grec->grec_src, nsrcs,
sizeof(struct in6_addr),
grec->grec_type);
break;
case MLD2_BLOCK_OLD_SOURCES:
- changed = br_multicast_block(pg, h_addr,
+ changed = br_multicast_block(brmctx, pmctx, pg, h_addr,
grec->grec_src, nsrcs,
sizeof(struct in6_addr),
grec->grec_type);
break;
}
if (changed)
- br_mdb_notify(br->dev, mdst, pg, RTM_NEWMDB);
+ br_mdb_notify(brmctx->br->dev, mdst, pg, RTM_NEWMDB);
unlock_continue:
- spin_unlock_bh(&br->multicast_lock);
+ spin_unlock_bh(&brmctx->br->multicast_lock);
}
return err;
}
#endif
-static bool br_ip4_multicast_select_querier(struct net_bridge *br,
- struct net_bridge_port *port,
- __be32 saddr)
+static bool br_multicast_select_querier(struct net_bridge_mcast *brmctx,
+ struct net_bridge_mcast_port *pmctx,
+ struct br_ip *saddr)
{
- if (!timer_pending(&br->ip4_own_query.timer) &&
- !timer_pending(&br->ip4_other_query.timer))
- goto update;
+ int port_ifidx = pmctx ? pmctx->port->dev->ifindex : 0;
+ struct timer_list *own_timer, *other_timer;
+ struct bridge_mcast_querier *querier;
- if (!br->ip4_querier.addr.src.ip4)
- goto update;
+ switch (saddr->proto) {
+ case htons(ETH_P_IP):
+ querier = &brmctx->ip4_querier;
+ own_timer = &brmctx->ip4_own_query.timer;
+ other_timer = &brmctx->ip4_other_query.timer;
+ if (!querier->addr.src.ip4 ||
+ ntohl(saddr->src.ip4) <= ntohl(querier->addr.src.ip4))
+ goto update;
+ break;
+#if IS_ENABLED(CONFIG_IPV6)
+ case htons(ETH_P_IPV6):
+ querier = &brmctx->ip6_querier;
+ own_timer = &brmctx->ip6_own_query.timer;
+ other_timer = &brmctx->ip6_other_query.timer;
+ if (ipv6_addr_cmp(&saddr->src.ip6, &querier->addr.src.ip6) <= 0)
+ goto update;
+ break;
+#endif
+ default:
+ return false;
+ }
- if (ntohl(saddr) <= ntohl(br->ip4_querier.addr.src.ip4))
+ if (!timer_pending(own_timer) && !timer_pending(other_timer))
goto update;
return false;
update:
- br->ip4_querier.addr.src.ip4 = saddr;
-
- /* update protected by general multicast_lock by caller */
- rcu_assign_pointer(br->ip4_querier.port, port);
+ br_multicast_update_querier(brmctx, querier, port_ifidx, saddr);
return true;
}
-#if IS_ENABLED(CONFIG_IPV6)
-static bool br_ip6_multicast_select_querier(struct net_bridge *br,
- struct net_bridge_port *port,
- struct in6_addr *saddr)
+static struct net_bridge_port *
+__br_multicast_get_querier_port(struct net_bridge *br,
+ const struct bridge_mcast_querier *querier)
{
- if (!timer_pending(&br->ip6_own_query.timer) &&
- !timer_pending(&br->ip6_other_query.timer))
- goto update;
-
- if (ipv6_addr_cmp(saddr, &br->ip6_querier.addr.src.ip6) <= 0)
- goto update;
-
- return false;
+ int port_ifidx = READ_ONCE(querier->port_ifidx);
+ struct net_bridge_port *p;
+ struct net_device *dev;
-update:
- br->ip6_querier.addr.src.ip6 = *saddr;
+ if (port_ifidx == 0)
+ return NULL;
- /* update protected by general multicast_lock by caller */
- rcu_assign_pointer(br->ip6_querier.port, port);
+ dev = dev_get_by_index_rcu(dev_net(br->dev), port_ifidx);
+ if (!dev)
+ return NULL;
+ p = br_port_get_rtnl_rcu(dev);
+ if (!p || p->br != br)
+ return NULL;
- return true;
+ return p;
}
-#endif
-static bool br_multicast_select_querier(struct net_bridge *br,
- struct net_bridge_port *port,
- struct br_ip *saddr)
+size_t br_multicast_querier_state_size(void)
{
- switch (saddr->proto) {
- case htons(ETH_P_IP):
- return br_ip4_multicast_select_querier(br, port, saddr->src.ip4);
+ return nla_total_size(0) + /* nest attribute */
+ nla_total_size(sizeof(__be32)) + /* BRIDGE_QUERIER_IP_ADDRESS */
+ nla_total_size(sizeof(int)) + /* BRIDGE_QUERIER_IP_PORT */
+ nla_total_size_64bit(sizeof(u64)) + /* BRIDGE_QUERIER_IP_OTHER_TIMER */
#if IS_ENABLED(CONFIG_IPV6)
- case htons(ETH_P_IPV6):
- return br_ip6_multicast_select_querier(br, port, &saddr->src.ip6);
+ nla_total_size(sizeof(struct in6_addr)) + /* BRIDGE_QUERIER_IPV6_ADDRESS */
+ nla_total_size(sizeof(int)) + /* BRIDGE_QUERIER_IPV6_PORT */
+ nla_total_size_64bit(sizeof(u64)) + /* BRIDGE_QUERIER_IPV6_OTHER_TIMER */
#endif
+ 0;
+}
+
+/* protected by rtnl or rcu */
+int br_multicast_dump_querier_state(struct sk_buff *skb,
+ const struct net_bridge_mcast *brmctx,
+ int nest_attr)
+{
+ struct bridge_mcast_querier querier = {};
+ struct net_bridge_port *p;
+ struct nlattr *nest;
+
+ if (!br_opt_get(brmctx->br, BROPT_MULTICAST_ENABLED) ||
+ br_multicast_ctx_vlan_global_disabled(brmctx))
+ return 0;
+
+ nest = nla_nest_start(skb, nest_attr);
+ if (!nest)
+ return -EMSGSIZE;
+
+ rcu_read_lock();
+ if (!brmctx->multicast_querier &&
+ !timer_pending(&brmctx->ip4_other_query.timer))
+ goto out_v6;
+
+ br_multicast_read_querier(&brmctx->ip4_querier, &querier);
+ if (nla_put_in_addr(skb, BRIDGE_QUERIER_IP_ADDRESS,
+ querier.addr.src.ip4)) {
+ rcu_read_unlock();
+ goto out_err;
}
- return false;
+ p = __br_multicast_get_querier_port(brmctx->br, &querier);
+ if (timer_pending(&brmctx->ip4_other_query.timer) &&
+ (nla_put_u64_64bit(skb, BRIDGE_QUERIER_IP_OTHER_TIMER,
+ br_timer_value(&brmctx->ip4_other_query.timer),
+ BRIDGE_QUERIER_PAD) ||
+ (p && nla_put_u32(skb, BRIDGE_QUERIER_IP_PORT, p->dev->ifindex)))) {
+ rcu_read_unlock();
+ goto out_err;
+ }
+
+out_v6:
+#if IS_ENABLED(CONFIG_IPV6)
+ if (!brmctx->multicast_querier &&
+ !timer_pending(&brmctx->ip6_other_query.timer))
+ goto out;
+
+ br_multicast_read_querier(&brmctx->ip6_querier, &querier);
+ if (nla_put_in6_addr(skb, BRIDGE_QUERIER_IPV6_ADDRESS,
+ &querier.addr.src.ip6)) {
+ rcu_read_unlock();
+ goto out_err;
+ }
+
+ p = __br_multicast_get_querier_port(brmctx->br, &querier);
+ if (timer_pending(&brmctx->ip6_other_query.timer) &&
+ (nla_put_u64_64bit(skb, BRIDGE_QUERIER_IPV6_OTHER_TIMER,
+ br_timer_value(&brmctx->ip6_other_query.timer),
+ BRIDGE_QUERIER_PAD) ||
+ (p && nla_put_u32(skb, BRIDGE_QUERIER_IPV6_PORT,
+ p->dev->ifindex)))) {
+ rcu_read_unlock();
+ goto out_err;
+ }
+out:
+#endif
+ rcu_read_unlock();
+ nla_nest_end(skb, nest);
+ if (!nla_len(nest))
+ nla_nest_cancel(skb, nest);
+
+ return 0;
+
+out_err:
+ nla_nest_cancel(skb, nest);
+ return -EMSGSIZE;
}
static void
-br_multicast_update_query_timer(struct net_bridge *br,
+br_multicast_update_query_timer(struct net_bridge_mcast *brmctx,
struct bridge_mcast_other_query *query,
unsigned long max_delay)
{
if (!timer_pending(&query->timer))
query->delay_time = jiffies + max_delay;
- mod_timer(&query->timer, jiffies + br->multicast_querier_interval);
+ mod_timer(&query->timer, jiffies + brmctx->multicast_querier_interval);
}
static void br_port_mc_router_state_change(struct net_bridge_port *p,
@@ -2655,74 +3040,208 @@ static void br_port_mc_router_state_change(struct net_bridge_port *p,
switchdev_port_attr_set(p->dev, &attr, NULL);
}
-/*
- * Add port to router_list
+static struct net_bridge_port *
+br_multicast_rport_from_node(struct net_bridge_mcast *brmctx,
+ struct hlist_head *mc_router_list,
+ struct hlist_node *rlist)
+{
+ struct net_bridge_mcast_port *pmctx;
+
+#if IS_ENABLED(CONFIG_IPV6)
+ if (mc_router_list == &brmctx->ip6_mc_router_list)
+ pmctx = hlist_entry(rlist, struct net_bridge_mcast_port,
+ ip6_rlist);
+ else
+#endif
+ pmctx = hlist_entry(rlist, struct net_bridge_mcast_port,
+ ip4_rlist);
+
+ return pmctx->port;
+}
+
+static struct hlist_node *
+br_multicast_get_rport_slot(struct net_bridge_mcast *brmctx,
+ struct net_bridge_port *port,
+ struct hlist_head *mc_router_list)
+
+{
+ struct hlist_node *slot = NULL;
+ struct net_bridge_port *p;
+ struct hlist_node *rlist;
+
+ hlist_for_each(rlist, mc_router_list) {
+ p = br_multicast_rport_from_node(brmctx, mc_router_list, rlist);
+
+ if ((unsigned long)port >= (unsigned long)p)
+ break;
+
+ slot = rlist;
+ }
+
+ return slot;
+}
+
+static bool br_multicast_no_router_otherpf(struct net_bridge_mcast_port *pmctx,
+ struct hlist_node *rnode)
+{
+#if IS_ENABLED(CONFIG_IPV6)
+ if (rnode != &pmctx->ip6_rlist)
+ return hlist_unhashed(&pmctx->ip6_rlist);
+ else
+ return hlist_unhashed(&pmctx->ip4_rlist);
+#else
+ return true;
+#endif
+}
+
+/* Add port to router_list
* list is maintained ordered by pointer value
* and locked by br->multicast_lock and RCU
*/
-static void br_multicast_add_router(struct net_bridge *br,
- struct net_bridge_port *port)
+static void br_multicast_add_router(struct net_bridge_mcast *brmctx,
+ struct net_bridge_mcast_port *pmctx,
+ struct hlist_node *rlist,
+ struct hlist_head *mc_router_list)
{
- struct net_bridge_port *p;
- struct hlist_node *slot = NULL;
+ struct hlist_node *slot;
- if (!hlist_unhashed(&port->rlist))
+ if (!hlist_unhashed(rlist))
return;
- hlist_for_each_entry(p, &br->router_list, rlist) {
- if ((unsigned long) port >= (unsigned long) p)
- break;
- slot = &p->rlist;
- }
+ slot = br_multicast_get_rport_slot(brmctx, pmctx->port, mc_router_list);
if (slot)
- hlist_add_behind_rcu(&port->rlist, slot);
+ hlist_add_behind_rcu(rlist, slot);
else
- hlist_add_head_rcu(&port->rlist, &br->router_list);
- br_rtr_notify(br->dev, port, RTM_NEWMDB);
- br_port_mc_router_state_change(port, true);
+ hlist_add_head_rcu(rlist, mc_router_list);
+
+ /* For backwards compatibility for now, only notify if we
+ * switched from no IPv4/IPv6 multicast router to a new
+ * IPv4 or IPv6 multicast router.
+ */
+ if (br_multicast_no_router_otherpf(pmctx, rlist)) {
+ br_rtr_notify(pmctx->port->br->dev, pmctx, RTM_NEWMDB);
+ br_port_mc_router_state_change(pmctx->port, true);
+ }
+}
+
+/* Add port to router_list
+ * list is maintained ordered by pointer value
+ * and locked by br->multicast_lock and RCU
+ */
+static void br_ip4_multicast_add_router(struct net_bridge_mcast *brmctx,
+ struct net_bridge_mcast_port *pmctx)
+{
+ br_multicast_add_router(brmctx, pmctx, &pmctx->ip4_rlist,
+ &brmctx->ip4_mc_router_list);
+}
+
+/* Add port to router_list
+ * list is maintained ordered by pointer value
+ * and locked by br->multicast_lock and RCU
+ */
+static void br_ip6_multicast_add_router(struct net_bridge_mcast *brmctx,
+ struct net_bridge_mcast_port *pmctx)
+{
+#if IS_ENABLED(CONFIG_IPV6)
+ br_multicast_add_router(brmctx, pmctx, &pmctx->ip6_rlist,
+ &brmctx->ip6_mc_router_list);
+#endif
}
-static void br_multicast_mark_router(struct net_bridge *br,
- struct net_bridge_port *port)
+static void br_multicast_mark_router(struct net_bridge_mcast *brmctx,
+ struct net_bridge_mcast_port *pmctx,
+ struct timer_list *timer,
+ struct hlist_node *rlist,
+ struct hlist_head *mc_router_list)
{
unsigned long now = jiffies;
- if (!port) {
- if (br->multicast_router == MDB_RTR_TYPE_TEMP_QUERY) {
- if (!timer_pending(&br->multicast_router_timer))
- br_mc_router_state_change(br, true);
- mod_timer(&br->multicast_router_timer,
- now + br->multicast_querier_interval);
+ if (!br_multicast_ctx_should_use(brmctx, pmctx))
+ return;
+
+ if (!pmctx) {
+ if (brmctx->multicast_router == MDB_RTR_TYPE_TEMP_QUERY) {
+ if (!br_ip4_multicast_is_router(brmctx) &&
+ !br_ip6_multicast_is_router(brmctx))
+ br_mc_router_state_change(brmctx->br, true);
+ mod_timer(timer, now + brmctx->multicast_querier_interval);
}
return;
}
- if (port->multicast_router == MDB_RTR_TYPE_DISABLED ||
- port->multicast_router == MDB_RTR_TYPE_PERM)
+ if (pmctx->multicast_router == MDB_RTR_TYPE_DISABLED ||
+ pmctx->multicast_router == MDB_RTR_TYPE_PERM)
return;
- br_multicast_add_router(br, port);
+ br_multicast_add_router(brmctx, pmctx, rlist, mc_router_list);
+ mod_timer(timer, now + brmctx->multicast_querier_interval);
+}
+
+static void br_ip4_multicast_mark_router(struct net_bridge_mcast *brmctx,
+ struct net_bridge_mcast_port *pmctx)
+{
+ struct timer_list *timer = &brmctx->ip4_mc_router_timer;
+ struct hlist_node *rlist = NULL;
+
+ if (pmctx) {
+ timer = &pmctx->ip4_mc_router_timer;
+ rlist = &pmctx->ip4_rlist;
+ }
+
+ br_multicast_mark_router(brmctx, pmctx, timer, rlist,
+ &brmctx->ip4_mc_router_list);
+}
+
+static void br_ip6_multicast_mark_router(struct net_bridge_mcast *brmctx,
+ struct net_bridge_mcast_port *pmctx)
+{
+#if IS_ENABLED(CONFIG_IPV6)
+ struct timer_list *timer = &brmctx->ip6_mc_router_timer;
+ struct hlist_node *rlist = NULL;
+
+ if (pmctx) {
+ timer = &pmctx->ip6_mc_router_timer;
+ rlist = &pmctx->ip6_rlist;
+ }
+
+ br_multicast_mark_router(brmctx, pmctx, timer, rlist,
+ &brmctx->ip6_mc_router_list);
+#endif
+}
+
+static void
+br_ip4_multicast_query_received(struct net_bridge_mcast *brmctx,
+ struct net_bridge_mcast_port *pmctx,
+ struct bridge_mcast_other_query *query,
+ struct br_ip *saddr,
+ unsigned long max_delay)
+{
+ if (!br_multicast_select_querier(brmctx, pmctx, saddr))
+ return;
- mod_timer(&port->multicast_router_timer,
- now + br->multicast_querier_interval);
+ br_multicast_update_query_timer(brmctx, query, max_delay);
+ br_ip4_multicast_mark_router(brmctx, pmctx);
}
-static void br_multicast_query_received(struct net_bridge *br,
- struct net_bridge_port *port,
- struct bridge_mcast_other_query *query,
- struct br_ip *saddr,
- unsigned long max_delay)
+#if IS_ENABLED(CONFIG_IPV6)
+static void
+br_ip6_multicast_query_received(struct net_bridge_mcast *brmctx,
+ struct net_bridge_mcast_port *pmctx,
+ struct bridge_mcast_other_query *query,
+ struct br_ip *saddr,
+ unsigned long max_delay)
{
- if (!br_multicast_select_querier(br, port, saddr))
+ if (!br_multicast_select_querier(brmctx, pmctx, saddr))
return;
- br_multicast_update_query_timer(br, query, max_delay);
- br_multicast_mark_router(br, port);
+ br_multicast_update_query_timer(brmctx, query, max_delay);
+ br_ip6_multicast_mark_router(brmctx, pmctx);
}
+#endif
-static void br_ip4_multicast_query(struct net_bridge *br,
- struct net_bridge_port *port,
+static void br_ip4_multicast_query(struct net_bridge_mcast *brmctx,
+ struct net_bridge_mcast_port *pmctx,
struct sk_buff *skb,
u16 vid)
{
@@ -2733,14 +3252,13 @@ static void br_ip4_multicast_query(struct net_bridge *br,
struct igmpv3_query *ih3;
struct net_bridge_port_group *p;
struct net_bridge_port_group __rcu **pp;
- struct br_ip saddr;
+ struct br_ip saddr = {};
unsigned long max_delay;
unsigned long now = jiffies;
__be32 group;
- spin_lock(&br->multicast_lock);
- if (!netif_running(br->dev) ||
- (port && port->state == BR_STATE_DISABLED))
+ spin_lock(&brmctx->br->multicast_lock);
+ if (!br_multicast_ctx_should_use(brmctx, pmctx))
goto out;
group = ih->group;
@@ -2755,7 +3273,8 @@ static void br_ip4_multicast_query(struct net_bridge *br,
} else if (transport_len >= sizeof(*ih3)) {
ih3 = igmpv3_query_hdr(skb);
if (ih3->nsrcs ||
- (br->multicast_igmp_version == 3 && group && ih3->suppress))
+ (brmctx->multicast_igmp_version == 3 && group &&
+ ih3->suppress))
goto out;
max_delay = ih3->code ?
@@ -2768,16 +3287,17 @@ static void br_ip4_multicast_query(struct net_bridge *br,
saddr.proto = htons(ETH_P_IP);
saddr.src.ip4 = iph->saddr;
- br_multicast_query_received(br, port, &br->ip4_other_query,
- &saddr, max_delay);
+ br_ip4_multicast_query_received(brmctx, pmctx,
+ &brmctx->ip4_other_query,
+ &saddr, max_delay);
goto out;
}
- mp = br_mdb_ip4_get(br, group, vid);
+ mp = br_mdb_ip4_get(brmctx->br, group, vid);
if (!mp)
goto out;
- max_delay *= br->multicast_last_member_count;
+ max_delay *= brmctx->multicast_last_member_count;
if (mp->host_joined &&
(timer_pending(&mp->timer) ?
@@ -2786,23 +3306,23 @@ static void br_ip4_multicast_query(struct net_bridge *br,
mod_timer(&mp->timer, now + max_delay);
for (pp = &mp->ports;
- (p = mlock_dereference(*pp, br)) != NULL;
+ (p = mlock_dereference(*pp, brmctx->br)) != NULL;
pp = &p->next) {
if (timer_pending(&p->timer) ?
time_after(p->timer.expires, now + max_delay) :
try_to_del_timer_sync(&p->timer) >= 0 &&
- (br->multicast_igmp_version == 2 ||
+ (brmctx->multicast_igmp_version == 2 ||
p->filter_mode == MCAST_EXCLUDE))
mod_timer(&p->timer, now + max_delay);
}
out:
- spin_unlock(&br->multicast_lock);
+ spin_unlock(&brmctx->br->multicast_lock);
}
#if IS_ENABLED(CONFIG_IPV6)
-static int br_ip6_multicast_query(struct net_bridge *br,
- struct net_bridge_port *port,
+static int br_ip6_multicast_query(struct net_bridge_mcast *brmctx,
+ struct net_bridge_mcast_port *pmctx,
struct sk_buff *skb,
u16 vid)
{
@@ -2812,7 +3332,7 @@ static int br_ip6_multicast_query(struct net_bridge *br,
struct mld2_query *mld2q;
struct net_bridge_port_group *p;
struct net_bridge_port_group __rcu **pp;
- struct br_ip saddr;
+ struct br_ip saddr = {};
unsigned long max_delay;
unsigned long now = jiffies;
unsigned int offset = skb_transport_offset(skb);
@@ -2820,9 +3340,8 @@ static int br_ip6_multicast_query(struct net_bridge *br,
bool is_general_query;
int err = 0;
- spin_lock(&br->multicast_lock);
- if (!netif_running(br->dev) ||
- (port && port->state == BR_STATE_DISABLED))
+ spin_lock(&brmctx->br->multicast_lock);
+ if (!br_multicast_ctx_should_use(brmctx, pmctx))
goto out;
if (transport_len == sizeof(*mld)) {
@@ -2842,7 +3361,7 @@ static int br_ip6_multicast_query(struct net_bridge *br,
mld2q = (struct mld2_query *)icmp6_hdr(skb);
if (!mld2q->mld2q_nsrcs)
group = &mld2q->mld2q_mca;
- if (br->multicast_mld_version == 2 &&
+ if (brmctx->multicast_mld_version == 2 &&
!ipv6_addr_any(&mld2q->mld2q_mca) &&
mld2q->mld2q_suppress)
goto out;
@@ -2856,18 +3375,19 @@ static int br_ip6_multicast_query(struct net_bridge *br,
saddr.proto = htons(ETH_P_IPV6);
saddr.src.ip6 = ipv6_hdr(skb)->saddr;
- br_multicast_query_received(br, port, &br->ip6_other_query,
- &saddr, max_delay);
+ br_ip6_multicast_query_received(brmctx, pmctx,
+ &brmctx->ip6_other_query,
+ &saddr, max_delay);
goto out;
} else if (!group) {
goto out;
}
- mp = br_mdb_ip6_get(br, group, vid);
+ mp = br_mdb_ip6_get(brmctx->br, group, vid);
if (!mp)
goto out;
- max_delay *= br->multicast_last_member_count;
+ max_delay *= brmctx->multicast_last_member_count;
if (mp->host_joined &&
(timer_pending(&mp->timer) ?
time_after(mp->timer.expires, now + max_delay) :
@@ -2875,25 +3395,25 @@ static int br_ip6_multicast_query(struct net_bridge *br,
mod_timer(&mp->timer, now + max_delay);
for (pp = &mp->ports;
- (p = mlock_dereference(*pp, br)) != NULL;
+ (p = mlock_dereference(*pp, brmctx->br)) != NULL;
pp = &p->next) {
if (timer_pending(&p->timer) ?
time_after(p->timer.expires, now + max_delay) :
try_to_del_timer_sync(&p->timer) >= 0 &&
- (br->multicast_mld_version == 1 ||
+ (brmctx->multicast_mld_version == 1 ||
p->filter_mode == MCAST_EXCLUDE))
mod_timer(&p->timer, now + max_delay);
}
out:
- spin_unlock(&br->multicast_lock);
+ spin_unlock(&brmctx->br->multicast_lock);
return err;
}
#endif
static void
-br_multicast_leave_group(struct net_bridge *br,
- struct net_bridge_port *port,
+br_multicast_leave_group(struct net_bridge_mcast *brmctx,
+ struct net_bridge_mcast_port *pmctx,
struct br_ip *group,
struct bridge_mcast_other_query *other_query,
struct bridge_mcast_own_query *own_query,
@@ -2904,22 +3424,21 @@ br_multicast_leave_group(struct net_bridge *br,
unsigned long now;
unsigned long time;
- spin_lock(&br->multicast_lock);
- if (!netif_running(br->dev) ||
- (port && port->state == BR_STATE_DISABLED))
+ spin_lock(&brmctx->br->multicast_lock);
+ if (!br_multicast_ctx_should_use(brmctx, pmctx))
goto out;
- mp = br_mdb_ip_get(br, group);
+ mp = br_mdb_ip_get(brmctx->br, group);
if (!mp)
goto out;
- if (port && (port->flags & BR_MULTICAST_FAST_LEAVE)) {
+ if (pmctx && (pmctx->port->flags & BR_MULTICAST_FAST_LEAVE)) {
struct net_bridge_port_group __rcu **pp;
for (pp = &mp->ports;
- (p = mlock_dereference(*pp, br)) != NULL;
+ (p = mlock_dereference(*pp, brmctx->br)) != NULL;
pp = &p->next) {
- if (!br_port_group_equal(p, port, src))
+ if (!br_port_group_equal(p, pmctx->port, src))
continue;
if (p->flags & MDB_PG_FLAGS_PERMANENT)
@@ -2934,19 +3453,19 @@ br_multicast_leave_group(struct net_bridge *br,
if (timer_pending(&other_query->timer))
goto out;
- if (br_opt_get(br, BROPT_MULTICAST_QUERIER)) {
- __br_multicast_send_query(br, port, NULL, NULL, &mp->addr,
+ if (brmctx->multicast_querier) {
+ __br_multicast_send_query(brmctx, pmctx, NULL, NULL, &mp->addr,
false, 0, NULL);
- time = jiffies + br->multicast_last_member_count *
- br->multicast_last_member_interval;
+ time = jiffies + brmctx->multicast_last_member_count *
+ brmctx->multicast_last_member_interval;
mod_timer(&own_query->timer, time);
- for (p = mlock_dereference(mp->ports, br);
- p != NULL;
- p = mlock_dereference(p->next, br)) {
- if (!br_port_group_equal(p, port, src))
+ for (p = mlock_dereference(mp->ports, brmctx->br);
+ p != NULL && pmctx != NULL;
+ p = mlock_dereference(p->next, brmctx->br)) {
+ if (!br_port_group_equal(p, pmctx->port, src))
continue;
if (!hlist_unhashed(&p->mglist) &&
@@ -2961,10 +3480,10 @@ br_multicast_leave_group(struct net_bridge *br,
}
now = jiffies;
- time = now + br->multicast_last_member_count *
- br->multicast_last_member_interval;
+ time = now + brmctx->multicast_last_member_count *
+ brmctx->multicast_last_member_interval;
- if (!port) {
+ if (!pmctx) {
if (mp->host_joined &&
(timer_pending(&mp->timer) ?
time_after(mp->timer.expires, time) :
@@ -2975,10 +3494,10 @@ br_multicast_leave_group(struct net_bridge *br,
goto out;
}
- for (p = mlock_dereference(mp->ports, br);
+ for (p = mlock_dereference(mp->ports, brmctx->br);
p != NULL;
- p = mlock_dereference(p->next, br)) {
- if (p->key.port != port)
+ p = mlock_dereference(p->next, brmctx->br)) {
+ if (p->key.port != pmctx->port)
continue;
if (!hlist_unhashed(&p->mglist) &&
@@ -2991,11 +3510,11 @@ br_multicast_leave_group(struct net_bridge *br,
break;
}
out:
- spin_unlock(&br->multicast_lock);
+ spin_unlock(&brmctx->br->multicast_lock);
}
-static void br_ip4_multicast_leave_group(struct net_bridge *br,
- struct net_bridge_port *port,
+static void br_ip4_multicast_leave_group(struct net_bridge_mcast *brmctx,
+ struct net_bridge_mcast_port *pmctx,
__be32 group,
__u16 vid,
const unsigned char *src)
@@ -3006,20 +3525,21 @@ static void br_ip4_multicast_leave_group(struct net_bridge *br,
if (ipv4_is_local_multicast(group))
return;
- own_query = port ? &port->ip4_own_query : &br->ip4_own_query;
+ own_query = pmctx ? &pmctx->ip4_own_query : &brmctx->ip4_own_query;
memset(&br_group, 0, sizeof(br_group));
br_group.dst.ip4 = group;
br_group.proto = htons(ETH_P_IP);
br_group.vid = vid;
- br_multicast_leave_group(br, port, &br_group, &br->ip4_other_query,
+ br_multicast_leave_group(brmctx, pmctx, &br_group,
+ &brmctx->ip4_other_query,
own_query, src);
}
#if IS_ENABLED(CONFIG_IPV6)
-static void br_ip6_multicast_leave_group(struct net_bridge *br,
- struct net_bridge_port *port,
+static void br_ip6_multicast_leave_group(struct net_bridge_mcast *brmctx,
+ struct net_bridge_mcast_port *pmctx,
const struct in6_addr *group,
__u16 vid,
const unsigned char *src)
@@ -3030,14 +3550,15 @@ static void br_ip6_multicast_leave_group(struct net_bridge *br,
if (ipv6_addr_is_ll_all_nodes(group))
return;
- own_query = port ? &port->ip6_own_query : &br->ip6_own_query;
+ own_query = pmctx ? &pmctx->ip6_own_query : &brmctx->ip6_own_query;
memset(&br_group, 0, sizeof(br_group));
br_group.dst.ip6 = *group;
br_group.proto = htons(ETH_P_IPV6);
br_group.vid = vid;
- br_multicast_leave_group(br, port, &br_group, &br->ip6_other_query,
+ br_multicast_leave_group(brmctx, pmctx, &br_group,
+ &brmctx->ip6_other_query,
own_query, src);
}
#endif
@@ -3075,8 +3596,8 @@ static void br_multicast_err_count(const struct net_bridge *br,
u64_stats_update_end(&pstats->syncp);
}
-static void br_multicast_pim(struct net_bridge *br,
- struct net_bridge_port *port,
+static void br_multicast_pim(struct net_bridge_mcast *brmctx,
+ struct net_bridge_mcast_port *pmctx,
const struct sk_buff *skb)
{
unsigned int offset = skb_transport_offset(skb);
@@ -3087,27 +3608,32 @@ static void br_multicast_pim(struct net_bridge *br,
pim_hdr_type(pimhdr) != PIM_TYPE_HELLO)
return;
- br_multicast_mark_router(br, port);
+ spin_lock(&brmctx->br->multicast_lock);
+ br_ip4_multicast_mark_router(brmctx, pmctx);
+ spin_unlock(&brmctx->br->multicast_lock);
}
-static int br_ip4_multicast_mrd_rcv(struct net_bridge *br,
- struct net_bridge_port *port,
+static int br_ip4_multicast_mrd_rcv(struct net_bridge_mcast *brmctx,
+ struct net_bridge_mcast_port *pmctx,
struct sk_buff *skb)
{
if (ip_hdr(skb)->protocol != IPPROTO_IGMP ||
igmp_hdr(skb)->type != IGMP_MRDISC_ADV)
return -ENOMSG;
- br_multicast_mark_router(br, port);
+ spin_lock(&brmctx->br->multicast_lock);
+ br_ip4_multicast_mark_router(brmctx, pmctx);
+ spin_unlock(&brmctx->br->multicast_lock);
return 0;
}
-static int br_multicast_ipv4_rcv(struct net_bridge *br,
- struct net_bridge_port *port,
+static int br_multicast_ipv4_rcv(struct net_bridge_mcast *brmctx,
+ struct net_bridge_mcast_port *pmctx,
struct sk_buff *skb,
u16 vid)
{
+ struct net_bridge_port *p = pmctx ? pmctx->port : NULL;
const unsigned char *src;
struct igmphdr *ih;
int err;
@@ -3119,14 +3645,14 @@ static int br_multicast_ipv4_rcv(struct net_bridge *br,
BR_INPUT_SKB_CB(skb)->mrouters_only = 1;
} else if (pim_ipv4_all_pim_routers(ip_hdr(skb)->daddr)) {
if (ip_hdr(skb)->protocol == IPPROTO_PIM)
- br_multicast_pim(br, port, skb);
+ br_multicast_pim(brmctx, pmctx, skb);
} else if (ipv4_is_all_snoopers(ip_hdr(skb)->daddr)) {
- br_ip4_multicast_mrd_rcv(br, port, skb);
+ br_ip4_multicast_mrd_rcv(brmctx, pmctx, skb);
}
return 0;
} else if (err < 0) {
- br_multicast_err_count(br, port, skb->protocol);
+ br_multicast_err_count(brmctx->br, p, skb->protocol);
return err;
}
@@ -3138,42 +3664,45 @@ static int br_multicast_ipv4_rcv(struct net_bridge *br,
case IGMP_HOST_MEMBERSHIP_REPORT:
case IGMPV2_HOST_MEMBERSHIP_REPORT:
BR_INPUT_SKB_CB(skb)->mrouters_only = 1;
- err = br_ip4_multicast_add_group(br, port, ih->group, vid, src,
- true);
+ err = br_ip4_multicast_add_group(brmctx, pmctx, ih->group, vid,
+ src, true);
break;
case IGMPV3_HOST_MEMBERSHIP_REPORT:
- err = br_ip4_multicast_igmp3_report(br, port, skb, vid);
+ err = br_ip4_multicast_igmp3_report(brmctx, pmctx, skb, vid);
break;
case IGMP_HOST_MEMBERSHIP_QUERY:
- br_ip4_multicast_query(br, port, skb, vid);
+ br_ip4_multicast_query(brmctx, pmctx, skb, vid);
break;
case IGMP_HOST_LEAVE_MESSAGE:
- br_ip4_multicast_leave_group(br, port, ih->group, vid, src);
+ br_ip4_multicast_leave_group(brmctx, pmctx, ih->group, vid, src);
break;
}
- br_multicast_count(br, port, skb, BR_INPUT_SKB_CB(skb)->igmp,
+ br_multicast_count(brmctx->br, p, skb, BR_INPUT_SKB_CB(skb)->igmp,
BR_MCAST_DIR_RX);
return err;
}
#if IS_ENABLED(CONFIG_IPV6)
-static void br_ip6_multicast_mrd_rcv(struct net_bridge *br,
- struct net_bridge_port *port,
+static void br_ip6_multicast_mrd_rcv(struct net_bridge_mcast *brmctx,
+ struct net_bridge_mcast_port *pmctx,
struct sk_buff *skb)
{
if (icmp6_hdr(skb)->icmp6_type != ICMPV6_MRDISC_ADV)
return;
- br_multicast_mark_router(br, port);
+ spin_lock(&brmctx->br->multicast_lock);
+ br_ip6_multicast_mark_router(brmctx, pmctx);
+ spin_unlock(&brmctx->br->multicast_lock);
}
-static int br_multicast_ipv6_rcv(struct net_bridge *br,
- struct net_bridge_port *port,
+static int br_multicast_ipv6_rcv(struct net_bridge_mcast *brmctx,
+ struct net_bridge_mcast_port *pmctx,
struct sk_buff *skb,
u16 vid)
{
+ struct net_bridge_port *p = pmctx ? pmctx->port : NULL;
const unsigned char *src;
struct mld_msg *mld;
int err;
@@ -3185,11 +3714,11 @@ static int br_multicast_ipv6_rcv(struct net_bridge *br,
BR_INPUT_SKB_CB(skb)->mrouters_only = 1;
if (err == -ENODATA &&
ipv6_addr_is_all_snoopers(&ipv6_hdr(skb)->daddr))
- br_ip6_multicast_mrd_rcv(br, port, skb);
+ br_ip6_multicast_mrd_rcv(brmctx, pmctx, skb);
return 0;
} else if (err < 0) {
- br_multicast_err_count(br, port, skb->protocol);
+ br_multicast_err_count(brmctx->br, p, skb->protocol);
return err;
}
@@ -3200,29 +3729,32 @@ static int br_multicast_ipv6_rcv(struct net_bridge *br,
case ICMPV6_MGM_REPORT:
src = eth_hdr(skb)->h_source;
BR_INPUT_SKB_CB(skb)->mrouters_only = 1;
- err = br_ip6_multicast_add_group(br, port, &mld->mld_mca, vid,
- src, true);
+ err = br_ip6_multicast_add_group(brmctx, pmctx, &mld->mld_mca,
+ vid, src, true);
break;
case ICMPV6_MLD2_REPORT:
- err = br_ip6_multicast_mld2_report(br, port, skb, vid);
+ err = br_ip6_multicast_mld2_report(brmctx, pmctx, skb, vid);
break;
case ICMPV6_MGM_QUERY:
- err = br_ip6_multicast_query(br, port, skb, vid);
+ err = br_ip6_multicast_query(brmctx, pmctx, skb, vid);
break;
case ICMPV6_MGM_REDUCTION:
src = eth_hdr(skb)->h_source;
- br_ip6_multicast_leave_group(br, port, &mld->mld_mca, vid, src);
+ br_ip6_multicast_leave_group(brmctx, pmctx, &mld->mld_mca, vid,
+ src);
break;
}
- br_multicast_count(br, port, skb, BR_INPUT_SKB_CB(skb)->igmp,
+ br_multicast_count(brmctx->br, p, skb, BR_INPUT_SKB_CB(skb)->igmp,
BR_MCAST_DIR_RX);
return err;
}
#endif
-int br_multicast_rcv(struct net_bridge *br, struct net_bridge_port *port,
+int br_multicast_rcv(struct net_bridge_mcast **brmctx,
+ struct net_bridge_mcast_port **pmctx,
+ struct net_bridge_vlan *vlan,
struct sk_buff *skb, u16 vid)
{
int ret = 0;
@@ -3230,16 +3762,36 @@ int br_multicast_rcv(struct net_bridge *br, struct net_bridge_port *port,
BR_INPUT_SKB_CB(skb)->igmp = 0;
BR_INPUT_SKB_CB(skb)->mrouters_only = 0;
- if (!br_opt_get(br, BROPT_MULTICAST_ENABLED))
+ if (!br_opt_get((*brmctx)->br, BROPT_MULTICAST_ENABLED))
return 0;
+ if (br_opt_get((*brmctx)->br, BROPT_MCAST_VLAN_SNOOPING_ENABLED) && vlan) {
+ const struct net_bridge_vlan *masterv;
+
+ /* the vlan has the master flag set only when transmitting
+ * through the bridge device
+ */
+ if (br_vlan_is_master(vlan)) {
+ masterv = vlan;
+ *brmctx = &vlan->br_mcast_ctx;
+ *pmctx = NULL;
+ } else {
+ masterv = vlan->brvlan;
+ *brmctx = &vlan->brvlan->br_mcast_ctx;
+ *pmctx = &vlan->port_mcast_ctx;
+ }
+
+ if (!(masterv->priv_flags & BR_VLFLAG_GLOBAL_MCAST_ENABLED))
+ return 0;
+ }
+
switch (skb->protocol) {
case htons(ETH_P_IP):
- ret = br_multicast_ipv4_rcv(br, port, skb, vid);
+ ret = br_multicast_ipv4_rcv(*brmctx, *pmctx, skb, vid);
break;
#if IS_ENABLED(CONFIG_IPV6)
case htons(ETH_P_IPV6):
- ret = br_multicast_ipv6_rcv(br, port, skb, vid);
+ ret = br_multicast_ipv6_rcv(*brmctx, *pmctx, skb, vid);
break;
#endif
}
@@ -3247,32 +3799,39 @@ int br_multicast_rcv(struct net_bridge *br, struct net_bridge_port *port,
return ret;
}
-static void br_multicast_query_expired(struct net_bridge *br,
+static void br_multicast_query_expired(struct net_bridge_mcast *brmctx,
struct bridge_mcast_own_query *query,
struct bridge_mcast_querier *querier)
{
- spin_lock(&br->multicast_lock);
- if (query->startup_sent < br->multicast_startup_query_count)
+ spin_lock(&brmctx->br->multicast_lock);
+ if (br_multicast_ctx_vlan_disabled(brmctx))
+ goto out;
+
+ if (query->startup_sent < brmctx->multicast_startup_query_count)
query->startup_sent++;
- RCU_INIT_POINTER(querier->port, NULL);
- br_multicast_send_query(br, NULL, query);
- spin_unlock(&br->multicast_lock);
+ br_multicast_send_query(brmctx, NULL, query);
+out:
+ spin_unlock(&brmctx->br->multicast_lock);
}
static void br_ip4_multicast_query_expired(struct timer_list *t)
{
- struct net_bridge *br = from_timer(br, t, ip4_own_query.timer);
+ struct net_bridge_mcast *brmctx = from_timer(brmctx, t,
+ ip4_own_query.timer);
- br_multicast_query_expired(br, &br->ip4_own_query, &br->ip4_querier);
+ br_multicast_query_expired(brmctx, &brmctx->ip4_own_query,
+ &brmctx->ip4_querier);
}
#if IS_ENABLED(CONFIG_IPV6)
static void br_ip6_multicast_query_expired(struct timer_list *t)
{
- struct net_bridge *br = from_timer(br, t, ip6_own_query.timer);
+ struct net_bridge_mcast *brmctx = from_timer(brmctx, t,
+ ip6_own_query.timer);
- br_multicast_query_expired(br, &br->ip6_own_query, &br->ip6_querier);
+ br_multicast_query_expired(brmctx, &brmctx->ip6_own_query,
+ &brmctx->ip6_querier);
}
#endif
@@ -3289,45 +3848,65 @@ static void br_multicast_gc_work(struct work_struct *work)
br_multicast_gc(&deleted_head);
}
-void br_multicast_init(struct net_bridge *br)
+void br_multicast_ctx_init(struct net_bridge *br,
+ struct net_bridge_vlan *vlan,
+ struct net_bridge_mcast *brmctx)
{
- br->hash_max = BR_MULTICAST_DEFAULT_HASH_MAX;
-
- br->multicast_router = MDB_RTR_TYPE_TEMP_QUERY;
- br->multicast_last_member_count = 2;
- br->multicast_startup_query_count = 2;
+ brmctx->br = br;
+ brmctx->vlan = vlan;
+ brmctx->multicast_router = MDB_RTR_TYPE_TEMP_QUERY;
+ brmctx->multicast_last_member_count = 2;
+ brmctx->multicast_startup_query_count = 2;
- br->multicast_last_member_interval = HZ;
- br->multicast_query_response_interval = 10 * HZ;
- br->multicast_startup_query_interval = 125 * HZ / 4;
- br->multicast_query_interval = 125 * HZ;
- br->multicast_querier_interval = 255 * HZ;
- br->multicast_membership_interval = 260 * HZ;
+ brmctx->multicast_last_member_interval = HZ;
+ brmctx->multicast_query_response_interval = 10 * HZ;
+ brmctx->multicast_startup_query_interval = 125 * HZ / 4;
+ brmctx->multicast_query_interval = 125 * HZ;
+ brmctx->multicast_querier_interval = 255 * HZ;
+ brmctx->multicast_membership_interval = 260 * HZ;
- br->ip4_other_query.delay_time = 0;
- br->ip4_querier.port = NULL;
- br->multicast_igmp_version = 2;
+ brmctx->ip4_other_query.delay_time = 0;
+ brmctx->ip4_querier.port_ifidx = 0;
+ seqcount_init(&brmctx->ip4_querier.seq);
+ brmctx->multicast_igmp_version = 2;
#if IS_ENABLED(CONFIG_IPV6)
- br->multicast_mld_version = 1;
- br->ip6_other_query.delay_time = 0;
- br->ip6_querier.port = NULL;
+ brmctx->multicast_mld_version = 1;
+ brmctx->ip6_other_query.delay_time = 0;
+ brmctx->ip6_querier.port_ifidx = 0;
+ seqcount_init(&brmctx->ip6_querier.seq);
#endif
- br_opt_toggle(br, BROPT_MULTICAST_ENABLED, true);
- br_opt_toggle(br, BROPT_HAS_IPV6_ADDR, true);
- spin_lock_init(&br->multicast_lock);
- timer_setup(&br->multicast_router_timer,
- br_multicast_local_router_expired, 0);
- timer_setup(&br->ip4_other_query.timer,
+ timer_setup(&brmctx->ip4_mc_router_timer,
+ br_ip4_multicast_local_router_expired, 0);
+ timer_setup(&brmctx->ip4_other_query.timer,
br_ip4_multicast_querier_expired, 0);
- timer_setup(&br->ip4_own_query.timer,
+ timer_setup(&brmctx->ip4_own_query.timer,
br_ip4_multicast_query_expired, 0);
#if IS_ENABLED(CONFIG_IPV6)
- timer_setup(&br->ip6_other_query.timer,
+ timer_setup(&brmctx->ip6_mc_router_timer,
+ br_ip6_multicast_local_router_expired, 0);
+ timer_setup(&brmctx->ip6_other_query.timer,
br_ip6_multicast_querier_expired, 0);
- timer_setup(&br->ip6_own_query.timer,
+ timer_setup(&brmctx->ip6_own_query.timer,
br_ip6_multicast_query_expired, 0);
#endif
+}
+
+void br_multicast_ctx_deinit(struct net_bridge_mcast *brmctx)
+{
+ __br_multicast_stop(brmctx);
+}
+
+void br_multicast_init(struct net_bridge *br)
+{
+ br->hash_max = BR_MULTICAST_DEFAULT_HASH_MAX;
+
+ br_multicast_ctx_init(br, NULL, &br->multicast_ctx);
+
+ br_opt_toggle(br, BROPT_MULTICAST_ENABLED, true);
+ br_opt_toggle(br, BROPT_HAS_IPV6_ADDR, true);
+
+ spin_lock_init(&br->multicast_lock);
INIT_HLIST_HEAD(&br->mdb_list);
INIT_HLIST_HEAD(&br->mcast_gc_list);
INIT_WORK(&br->mcast_gc_work, br_multicast_gc_work);
@@ -3395,8 +3974,8 @@ void br_multicast_leave_snoopers(struct net_bridge *br)
br_ip6_multicast_leave_snoopers(br);
}
-static void __br_multicast_open(struct net_bridge *br,
- struct bridge_mcast_own_query *query)
+static void __br_multicast_open_query(struct net_bridge *br,
+ struct bridge_mcast_own_query *query)
{
query->startup_sent = 0;
@@ -3406,25 +3985,194 @@ static void __br_multicast_open(struct net_bridge *br,
mod_timer(&query->timer, jiffies);
}
-void br_multicast_open(struct net_bridge *br)
+static void __br_multicast_open(struct net_bridge_mcast *brmctx)
{
- __br_multicast_open(br, &br->ip4_own_query);
+ __br_multicast_open_query(brmctx->br, &brmctx->ip4_own_query);
#if IS_ENABLED(CONFIG_IPV6)
- __br_multicast_open(br, &br->ip6_own_query);
+ __br_multicast_open_query(brmctx->br, &brmctx->ip6_own_query);
#endif
}
-void br_multicast_stop(struct net_bridge *br)
+void br_multicast_open(struct net_bridge *br)
+{
+ ASSERT_RTNL();
+
+ if (br_opt_get(br, BROPT_MCAST_VLAN_SNOOPING_ENABLED)) {
+ struct net_bridge_vlan_group *vg;
+ struct net_bridge_vlan *vlan;
+
+ vg = br_vlan_group(br);
+ if (vg) {
+ list_for_each_entry(vlan, &vg->vlan_list, vlist) {
+ struct net_bridge_mcast *brmctx;
+
+ brmctx = &vlan->br_mcast_ctx;
+ if (br_vlan_is_brentry(vlan) &&
+ !br_multicast_ctx_vlan_disabled(brmctx))
+ __br_multicast_open(&vlan->br_mcast_ctx);
+ }
+ }
+ } else {
+ __br_multicast_open(&br->multicast_ctx);
+ }
+}
+
+static void __br_multicast_stop(struct net_bridge_mcast *brmctx)
{
- del_timer_sync(&br->multicast_router_timer);
- del_timer_sync(&br->ip4_other_query.timer);
- del_timer_sync(&br->ip4_own_query.timer);
+ del_timer_sync(&brmctx->ip4_mc_router_timer);
+ del_timer_sync(&brmctx->ip4_other_query.timer);
+ del_timer_sync(&brmctx->ip4_own_query.timer);
#if IS_ENABLED(CONFIG_IPV6)
- del_timer_sync(&br->ip6_other_query.timer);
- del_timer_sync(&br->ip6_own_query.timer);
+ del_timer_sync(&brmctx->ip6_mc_router_timer);
+ del_timer_sync(&brmctx->ip6_other_query.timer);
+ del_timer_sync(&brmctx->ip6_own_query.timer);
#endif
}
+void br_multicast_toggle_one_vlan(struct net_bridge_vlan *vlan, bool on)
+{
+ struct net_bridge *br;
+
+ /* it's okay to check for the flag without the multicast lock because it
+ * can only change under RTNL -> multicast_lock, we need the latter to
+ * sync with timers and packets
+ */
+ if (on == !!(vlan->priv_flags & BR_VLFLAG_MCAST_ENABLED))
+ return;
+
+ if (br_vlan_is_master(vlan)) {
+ br = vlan->br;
+
+ if (!br_vlan_is_brentry(vlan) ||
+ (on &&
+ br_multicast_ctx_vlan_global_disabled(&vlan->br_mcast_ctx)))
+ return;
+
+ spin_lock_bh(&br->multicast_lock);
+ vlan->priv_flags ^= BR_VLFLAG_MCAST_ENABLED;
+ spin_unlock_bh(&br->multicast_lock);
+
+ if (on)
+ __br_multicast_open(&vlan->br_mcast_ctx);
+ else
+ __br_multicast_stop(&vlan->br_mcast_ctx);
+ } else {
+ struct net_bridge_mcast *brmctx;
+
+ brmctx = br_multicast_port_ctx_get_global(&vlan->port_mcast_ctx);
+ if (on && br_multicast_ctx_vlan_global_disabled(brmctx))
+ return;
+
+ br = vlan->port->br;
+ spin_lock_bh(&br->multicast_lock);
+ vlan->priv_flags ^= BR_VLFLAG_MCAST_ENABLED;
+ if (on)
+ __br_multicast_enable_port_ctx(&vlan->port_mcast_ctx);
+ else
+ __br_multicast_disable_port_ctx(&vlan->port_mcast_ctx);
+ spin_unlock_bh(&br->multicast_lock);
+ }
+}
+
+static void br_multicast_toggle_vlan(struct net_bridge_vlan *vlan, bool on)
+{
+ struct net_bridge_port *p;
+
+ if (WARN_ON_ONCE(!br_vlan_is_master(vlan)))
+ return;
+
+ list_for_each_entry(p, &vlan->br->port_list, list) {
+ struct net_bridge_vlan *vport;
+
+ vport = br_vlan_find(nbp_vlan_group(p), vlan->vid);
+ if (!vport)
+ continue;
+ br_multicast_toggle_one_vlan(vport, on);
+ }
+
+ if (br_vlan_is_brentry(vlan))
+ br_multicast_toggle_one_vlan(vlan, on);
+}
+
+int br_multicast_toggle_vlan_snooping(struct net_bridge *br, bool on,
+ struct netlink_ext_ack *extack)
+{
+ struct net_bridge_vlan_group *vg;
+ struct net_bridge_vlan *vlan;
+ struct net_bridge_port *p;
+
+ if (br_opt_get(br, BROPT_MCAST_VLAN_SNOOPING_ENABLED) == on)
+ return 0;
+
+ if (on && !br_opt_get(br, BROPT_VLAN_ENABLED)) {
+ NL_SET_ERR_MSG_MOD(extack, "Cannot enable multicast vlan snooping with vlan filtering disabled");
+ return -EINVAL;
+ }
+
+ vg = br_vlan_group(br);
+ if (!vg)
+ return 0;
+
+ br_opt_toggle(br, BROPT_MCAST_VLAN_SNOOPING_ENABLED, on);
+
+ /* disable/enable non-vlan mcast contexts based on vlan snooping */
+ if (on)
+ __br_multicast_stop(&br->multicast_ctx);
+ else
+ __br_multicast_open(&br->multicast_ctx);
+ list_for_each_entry(p, &br->port_list, list) {
+ if (on)
+ br_multicast_disable_port(p);
+ else
+ br_multicast_enable_port(p);
+ }
+
+ list_for_each_entry(vlan, &vg->vlan_list, vlist)
+ br_multicast_toggle_vlan(vlan, on);
+
+ return 0;
+}
+
+bool br_multicast_toggle_global_vlan(struct net_bridge_vlan *vlan, bool on)
+{
+ ASSERT_RTNL();
+
+ /* BR_VLFLAG_GLOBAL_MCAST_ENABLED relies on eventual consistency and
+ * requires only RTNL to change
+ */
+ if (on == !!(vlan->priv_flags & BR_VLFLAG_GLOBAL_MCAST_ENABLED))
+ return false;
+
+ vlan->priv_flags ^= BR_VLFLAG_GLOBAL_MCAST_ENABLED;
+ br_multicast_toggle_vlan(vlan, on);
+
+ return true;
+}
+
+void br_multicast_stop(struct net_bridge *br)
+{
+ ASSERT_RTNL();
+
+ if (br_opt_get(br, BROPT_MCAST_VLAN_SNOOPING_ENABLED)) {
+ struct net_bridge_vlan_group *vg;
+ struct net_bridge_vlan *vlan;
+
+ vg = br_vlan_group(br);
+ if (vg) {
+ list_for_each_entry(vlan, &vg->vlan_list, vlist) {
+ struct net_bridge_mcast *brmctx;
+
+ brmctx = &vlan->br_mcast_ctx;
+ if (br_vlan_is_brentry(vlan) &&
+ !br_multicast_ctx_vlan_disabled(brmctx))
+ __br_multicast_stop(&vlan->br_mcast_ctx);
+ }
+ }
+ } else {
+ __br_multicast_stop(&br->multicast_ctx);
+ }
+}
+
void br_multicast_dev_del(struct net_bridge *br)
{
struct net_bridge_mdb_entry *mp;
@@ -3437,114 +4185,189 @@ void br_multicast_dev_del(struct net_bridge *br)
hlist_move_list(&br->mcast_gc_list, &deleted_head);
spin_unlock_bh(&br->multicast_lock);
+ br_multicast_ctx_deinit(&br->multicast_ctx);
br_multicast_gc(&deleted_head);
cancel_work_sync(&br->mcast_gc_work);
rcu_barrier();
}
-int br_multicast_set_router(struct net_bridge *br, unsigned long val)
+int br_multicast_set_router(struct net_bridge_mcast *brmctx, unsigned long val)
{
int err = -EINVAL;
- spin_lock_bh(&br->multicast_lock);
+ spin_lock_bh(&brmctx->br->multicast_lock);
switch (val) {
case MDB_RTR_TYPE_DISABLED:
case MDB_RTR_TYPE_PERM:
- br_mc_router_state_change(br, val == MDB_RTR_TYPE_PERM);
- del_timer(&br->multicast_router_timer);
- br->multicast_router = val;
+ br_mc_router_state_change(brmctx->br, val == MDB_RTR_TYPE_PERM);
+ del_timer(&brmctx->ip4_mc_router_timer);
+#if IS_ENABLED(CONFIG_IPV6)
+ del_timer(&brmctx->ip6_mc_router_timer);
+#endif
+ brmctx->multicast_router = val;
err = 0;
break;
case MDB_RTR_TYPE_TEMP_QUERY:
- if (br->multicast_router != MDB_RTR_TYPE_TEMP_QUERY)
- br_mc_router_state_change(br, false);
- br->multicast_router = val;
+ if (brmctx->multicast_router != MDB_RTR_TYPE_TEMP_QUERY)
+ br_mc_router_state_change(brmctx->br, false);
+ brmctx->multicast_router = val;
err = 0;
break;
}
- spin_unlock_bh(&br->multicast_lock);
+ spin_unlock_bh(&brmctx->br->multicast_lock);
return err;
}
-static void __del_port_router(struct net_bridge_port *p)
+static void
+br_multicast_rport_del_notify(struct net_bridge_mcast_port *pmctx, bool deleted)
{
- if (hlist_unhashed(&p->rlist))
+ if (!deleted)
+ return;
+
+ /* For backwards compatibility for now, only notify if there is
+ * no multicast router anymore for both IPv4 and IPv6.
+ */
+ if (!hlist_unhashed(&pmctx->ip4_rlist))
+ return;
+#if IS_ENABLED(CONFIG_IPV6)
+ if (!hlist_unhashed(&pmctx->ip6_rlist))
return;
- hlist_del_init_rcu(&p->rlist);
- br_rtr_notify(p->br->dev, p, RTM_DELMDB);
- br_port_mc_router_state_change(p, false);
+#endif
+
+ br_rtr_notify(pmctx->port->br->dev, pmctx, RTM_DELMDB);
+ br_port_mc_router_state_change(pmctx->port, false);
/* don't allow timer refresh */
- if (p->multicast_router == MDB_RTR_TYPE_TEMP)
- p->multicast_router = MDB_RTR_TYPE_TEMP_QUERY;
+ if (pmctx->multicast_router == MDB_RTR_TYPE_TEMP)
+ pmctx->multicast_router = MDB_RTR_TYPE_TEMP_QUERY;
}
-int br_multicast_set_port_router(struct net_bridge_port *p, unsigned long val)
+int br_multicast_set_port_router(struct net_bridge_mcast_port *pmctx,
+ unsigned long val)
{
- struct net_bridge *br = p->br;
+ struct net_bridge_mcast *brmctx;
unsigned long now = jiffies;
int err = -EINVAL;
+ bool del = false;
- spin_lock(&br->multicast_lock);
- if (p->multicast_router == val) {
+ brmctx = br_multicast_port_ctx_get_global(pmctx);
+ spin_lock_bh(&brmctx->br->multicast_lock);
+ if (pmctx->multicast_router == val) {
/* Refresh the temp router port timer */
- if (p->multicast_router == MDB_RTR_TYPE_TEMP)
- mod_timer(&p->multicast_router_timer,
- now + br->multicast_querier_interval);
+ if (pmctx->multicast_router == MDB_RTR_TYPE_TEMP) {
+ mod_timer(&pmctx->ip4_mc_router_timer,
+ now + brmctx->multicast_querier_interval);
+#if IS_ENABLED(CONFIG_IPV6)
+ mod_timer(&pmctx->ip6_mc_router_timer,
+ now + brmctx->multicast_querier_interval);
+#endif
+ }
err = 0;
goto unlock;
}
switch (val) {
case MDB_RTR_TYPE_DISABLED:
- p->multicast_router = MDB_RTR_TYPE_DISABLED;
- __del_port_router(p);
- del_timer(&p->multicast_router_timer);
+ pmctx->multicast_router = MDB_RTR_TYPE_DISABLED;
+ del |= br_ip4_multicast_rport_del(pmctx);
+ del_timer(&pmctx->ip4_mc_router_timer);
+ del |= br_ip6_multicast_rport_del(pmctx);
+#if IS_ENABLED(CONFIG_IPV6)
+ del_timer(&pmctx->ip6_mc_router_timer);
+#endif
+ br_multicast_rport_del_notify(pmctx, del);
break;
case MDB_RTR_TYPE_TEMP_QUERY:
- p->multicast_router = MDB_RTR_TYPE_TEMP_QUERY;
- __del_port_router(p);
+ pmctx->multicast_router = MDB_RTR_TYPE_TEMP_QUERY;
+ del |= br_ip4_multicast_rport_del(pmctx);
+ del |= br_ip6_multicast_rport_del(pmctx);
+ br_multicast_rport_del_notify(pmctx, del);
break;
case MDB_RTR_TYPE_PERM:
- p->multicast_router = MDB_RTR_TYPE_PERM;
- del_timer(&p->multicast_router_timer);
- br_multicast_add_router(br, p);
+ pmctx->multicast_router = MDB_RTR_TYPE_PERM;
+ del_timer(&pmctx->ip4_mc_router_timer);
+ br_ip4_multicast_add_router(brmctx, pmctx);
+#if IS_ENABLED(CONFIG_IPV6)
+ del_timer(&pmctx->ip6_mc_router_timer);
+#endif
+ br_ip6_multicast_add_router(brmctx, pmctx);
break;
case MDB_RTR_TYPE_TEMP:
- p->multicast_router = MDB_RTR_TYPE_TEMP;
- br_multicast_mark_router(br, p);
+ pmctx->multicast_router = MDB_RTR_TYPE_TEMP;
+ br_ip4_multicast_mark_router(brmctx, pmctx);
+ br_ip6_multicast_mark_router(brmctx, pmctx);
break;
default:
goto unlock;
}
err = 0;
unlock:
- spin_unlock(&br->multicast_lock);
+ spin_unlock_bh(&brmctx->br->multicast_lock);
return err;
}
-static void br_multicast_start_querier(struct net_bridge *br,
+int br_multicast_set_vlan_router(struct net_bridge_vlan *v, u8 mcast_router)
+{
+ int err;
+
+ if (br_vlan_is_master(v))
+ err = br_multicast_set_router(&v->br_mcast_ctx, mcast_router);
+ else
+ err = br_multicast_set_port_router(&v->port_mcast_ctx,
+ mcast_router);
+
+ return err;
+}
+
+static void br_multicast_start_querier(struct net_bridge_mcast *brmctx,
struct bridge_mcast_own_query *query)
{
struct net_bridge_port *port;
- __br_multicast_open(br, query);
+ if (!br_multicast_ctx_matches_vlan_snooping(brmctx))
+ return;
+
+ __br_multicast_open_query(brmctx->br, query);
rcu_read_lock();
- list_for_each_entry_rcu(port, &br->port_list, list) {
- if (port->state == BR_STATE_DISABLED ||
- port->state == BR_STATE_BLOCKING)
+ list_for_each_entry_rcu(port, &brmctx->br->port_list, list) {
+ struct bridge_mcast_own_query *ip4_own_query;
+#if IS_ENABLED(CONFIG_IPV6)
+ struct bridge_mcast_own_query *ip6_own_query;
+#endif
+
+ if (br_multicast_port_ctx_state_stopped(&port->multicast_ctx))
continue;
- if (query == &br->ip4_own_query)
- br_multicast_enable(&port->ip4_own_query);
+ if (br_multicast_ctx_is_vlan(brmctx)) {
+ struct net_bridge_vlan *vlan;
+
+ vlan = br_vlan_find(nbp_vlan_group_rcu(port),
+ brmctx->vlan->vid);
+ if (!vlan ||
+ br_multicast_port_ctx_state_stopped(&vlan->port_mcast_ctx))
+ continue;
+
+ ip4_own_query = &vlan->port_mcast_ctx.ip4_own_query;
+#if IS_ENABLED(CONFIG_IPV6)
+ ip6_own_query = &vlan->port_mcast_ctx.ip6_own_query;
+#endif
+ } else {
+ ip4_own_query = &port->multicast_ctx.ip4_own_query;
+#if IS_ENABLED(CONFIG_IPV6)
+ ip6_own_query = &port->multicast_ctx.ip6_own_query;
+#endif
+ }
+
+ if (query == &brmctx->ip4_own_query)
+ br_multicast_enable(ip4_own_query);
#if IS_ENABLED(CONFIG_IPV6)
else
- br_multicast_enable(&port->ip6_own_query);
+ br_multicast_enable(ip6_own_query);
#endif
}
rcu_read_unlock();
@@ -3578,7 +4401,7 @@ int br_multicast_toggle(struct net_bridge *br, unsigned long val,
br_multicast_open(br);
list_for_each_entry(port, &br->port_list, list)
- __br_multicast_enable_port(port);
+ __br_multicast_enable_port_ctx(&port->multicast_ctx);
change_snoopers = true;
@@ -3621,47 +4444,48 @@ bool br_multicast_router(const struct net_device *dev)
bool is_router;
spin_lock_bh(&br->multicast_lock);
- is_router = br_multicast_is_router(br);
+ is_router = br_multicast_is_router(&br->multicast_ctx, NULL);
spin_unlock_bh(&br->multicast_lock);
return is_router;
}
EXPORT_SYMBOL_GPL(br_multicast_router);
-int br_multicast_set_querier(struct net_bridge *br, unsigned long val)
+int br_multicast_set_querier(struct net_bridge_mcast *brmctx, unsigned long val)
{
unsigned long max_delay;
val = !!val;
- spin_lock_bh(&br->multicast_lock);
- if (br_opt_get(br, BROPT_MULTICAST_QUERIER) == val)
+ spin_lock_bh(&brmctx->br->multicast_lock);
+ if (brmctx->multicast_querier == val)
goto unlock;
- br_opt_toggle(br, BROPT_MULTICAST_QUERIER, !!val);
+ WRITE_ONCE(brmctx->multicast_querier, val);
if (!val)
goto unlock;
- max_delay = br->multicast_query_response_interval;
+ max_delay = brmctx->multicast_query_response_interval;
- if (!timer_pending(&br->ip4_other_query.timer))
- br->ip4_other_query.delay_time = jiffies + max_delay;
+ if (!timer_pending(&brmctx->ip4_other_query.timer))
+ brmctx->ip4_other_query.delay_time = jiffies + max_delay;
- br_multicast_start_querier(br, &br->ip4_own_query);
+ br_multicast_start_querier(brmctx, &brmctx->ip4_own_query);
#if IS_ENABLED(CONFIG_IPV6)
- if (!timer_pending(&br->ip6_other_query.timer))
- br->ip6_other_query.delay_time = jiffies + max_delay;
+ if (!timer_pending(&brmctx->ip6_other_query.timer))
+ brmctx->ip6_other_query.delay_time = jiffies + max_delay;
- br_multicast_start_querier(br, &br->ip6_own_query);
+ br_multicast_start_querier(brmctx, &brmctx->ip6_own_query);
#endif
unlock:
- spin_unlock_bh(&br->multicast_lock);
+ spin_unlock_bh(&brmctx->br->multicast_lock);
return 0;
}
-int br_multicast_set_igmp_version(struct net_bridge *br, unsigned long val)
+int br_multicast_set_igmp_version(struct net_bridge_mcast *brmctx,
+ unsigned long val)
{
/* Currently we support only version 2 and 3 */
switch (val) {
@@ -3672,15 +4496,16 @@ int br_multicast_set_igmp_version(struct net_bridge *br, unsigned long val)
return -EINVAL;
}
- spin_lock_bh(&br->multicast_lock);
- br->multicast_igmp_version = val;
- spin_unlock_bh(&br->multicast_lock);
+ spin_lock_bh(&brmctx->br->multicast_lock);
+ brmctx->multicast_igmp_version = val;
+ spin_unlock_bh(&brmctx->br->multicast_lock);
return 0;
}
#if IS_ENABLED(CONFIG_IPV6)
-int br_multicast_set_mld_version(struct net_bridge *br, unsigned long val)
+int br_multicast_set_mld_version(struct net_bridge_mcast *brmctx,
+ unsigned long val)
{
/* Currently we support version 1 and 2 */
switch (val) {
@@ -3691,9 +4516,9 @@ int br_multicast_set_mld_version(struct net_bridge *br, unsigned long val)
return -EINVAL;
}
- spin_lock_bh(&br->multicast_lock);
- br->multicast_mld_version = val;
- spin_unlock_bh(&br->multicast_lock);
+ spin_lock_bh(&brmctx->br->multicast_lock);
+ brmctx->multicast_mld_version = val;
+ spin_unlock_bh(&brmctx->br->multicast_lock);
return 0;
}
@@ -3785,7 +4610,7 @@ bool br_multicast_has_querier_anywhere(struct net_device *dev, int proto)
memset(&eth, 0, sizeof(eth));
eth.h_proto = htons(proto);
- ret = br_multicast_querier_exists(br, &eth, NULL);
+ ret = br_multicast_querier_exists(&br->multicast_ctx, &eth, NULL);
unlock:
rcu_read_unlock();
@@ -3804,9 +4629,11 @@ EXPORT_SYMBOL_GPL(br_multicast_has_querier_anywhere);
*/
bool br_multicast_has_querier_adjacent(struct net_device *dev, int proto)
{
+ struct net_bridge_mcast *brmctx;
struct net_bridge *br;
struct net_bridge_port *port;
bool ret = false;
+ int port_ifidx;
rcu_read_lock();
if (!netif_is_bridge_port(dev))
@@ -3817,17 +4644,20 @@ bool br_multicast_has_querier_adjacent(struct net_device *dev, int proto)
goto unlock;
br = port->br;
+ brmctx = &br->multicast_ctx;
switch (proto) {
case ETH_P_IP:
- if (!timer_pending(&br->ip4_other_query.timer) ||
- rcu_dereference(br->ip4_querier.port) == port)
+ port_ifidx = brmctx->ip4_querier.port_ifidx;
+ if (!timer_pending(&brmctx->ip4_other_query.timer) ||
+ port_ifidx == port->dev->ifindex)
goto unlock;
break;
#if IS_ENABLED(CONFIG_IPV6)
case ETH_P_IPV6:
- if (!timer_pending(&br->ip6_other_query.timer) ||
- rcu_dereference(br->ip6_querier.port) == port)
+ port_ifidx = brmctx->ip6_querier.port_ifidx;
+ if (!timer_pending(&brmctx->ip6_other_query.timer) ||
+ port_ifidx == port->dev->ifindex)
goto unlock;
break;
#endif
@@ -3842,6 +4672,64 @@ unlock:
}
EXPORT_SYMBOL_GPL(br_multicast_has_querier_adjacent);
+/**
+ * br_multicast_has_router_adjacent - Checks for a router behind a bridge port
+ * @dev: The bridge port adjacent to which to check for a multicast router
+ * @proto: The protocol family to check for: IGMP -> ETH_P_IP, MLD -> ETH_P_IPV6
+ *
+ * Checks whether the given interface has a bridge on top and if so returns
+ * true if a multicast router is behind one of the other ports of this
+ * bridge. Otherwise returns false.
+ */
+bool br_multicast_has_router_adjacent(struct net_device *dev, int proto)
+{
+ struct net_bridge_mcast_port *pmctx;
+ struct net_bridge_mcast *brmctx;
+ struct net_bridge_port *port;
+ bool ret = false;
+
+ rcu_read_lock();
+ port = br_port_get_check_rcu(dev);
+ if (!port)
+ goto unlock;
+
+ brmctx = &port->br->multicast_ctx;
+ switch (proto) {
+ case ETH_P_IP:
+ hlist_for_each_entry_rcu(pmctx, &brmctx->ip4_mc_router_list,
+ ip4_rlist) {
+ if (pmctx->port == port)
+ continue;
+
+ ret = true;
+ goto unlock;
+ }
+ break;
+#if IS_ENABLED(CONFIG_IPV6)
+ case ETH_P_IPV6:
+ hlist_for_each_entry_rcu(pmctx, &brmctx->ip6_mc_router_list,
+ ip6_rlist) {
+ if (pmctx->port == port)
+ continue;
+
+ ret = true;
+ goto unlock;
+ }
+ break;
+#endif
+ default:
+ /* when compiled without IPv6 support, be conservative and
+ * always assume presence of an IPv6 multicast router
+ */
+ ret = true;
+ }
+
+unlock:
+ rcu_read_unlock();
+ return ret;
+}
+EXPORT_SYMBOL_GPL(br_multicast_has_router_adjacent);
+
static void br_mcast_stats_add(struct bridge_mcast_stats __percpu *stats,
const struct sk_buff *skb, u8 type, u8 dir)
{
@@ -3913,7 +4801,8 @@ static void br_mcast_stats_add(struct bridge_mcast_stats __percpu *stats,
u64_stats_update_end(&pstats->syncp);
}
-void br_multicast_count(struct net_bridge *br, const struct net_bridge_port *p,
+void br_multicast_count(struct net_bridge *br,
+ const struct net_bridge_port *p,
const struct sk_buff *skb, u8 type, u8 dir)
{
struct bridge_mcast_stats __percpu *stats;
diff --git a/net/bridge/br_multicast_eht.c b/net/bridge/br_multicast_eht.c
index 13290a749d09..f91c071d1608 100644
--- a/net/bridge/br_multicast_eht.c
+++ b/net/bridge/br_multicast_eht.c
@@ -33,7 +33,8 @@
static bool br_multicast_del_eht_set_entry(struct net_bridge_port_group *pg,
union net_bridge_eht_addr *src_addr,
union net_bridge_eht_addr *h_addr);
-static void br_multicast_create_eht_set_entry(struct net_bridge_port_group *pg,
+static void br_multicast_create_eht_set_entry(const struct net_bridge_mcast *brmctx,
+ struct net_bridge_port_group *pg,
union net_bridge_eht_addr *src_addr,
union net_bridge_eht_addr *h_addr,
int filter_mode,
@@ -388,7 +389,8 @@ static void br_multicast_ip_src_to_eht_addr(const struct br_ip *src,
}
}
-static void br_eht_convert_host_filter_mode(struct net_bridge_port_group *pg,
+static void br_eht_convert_host_filter_mode(const struct net_bridge_mcast *brmctx,
+ struct net_bridge_port_group *pg,
union net_bridge_eht_addr *h_addr,
int filter_mode)
{
@@ -405,14 +407,15 @@ static void br_eht_convert_host_filter_mode(struct net_bridge_port_group *pg,
br_multicast_del_eht_set_entry(pg, &zero_addr, h_addr);
break;
case MCAST_EXCLUDE:
- br_multicast_create_eht_set_entry(pg, &zero_addr, h_addr,
- MCAST_EXCLUDE,
+ br_multicast_create_eht_set_entry(brmctx, pg, &zero_addr,
+ h_addr, MCAST_EXCLUDE,
true);
break;
}
}
-static void br_multicast_create_eht_set_entry(struct net_bridge_port_group *pg,
+static void br_multicast_create_eht_set_entry(const struct net_bridge_mcast *brmctx,
+ struct net_bridge_port_group *pg,
union net_bridge_eht_addr *src_addr,
union net_bridge_eht_addr *h_addr,
int filter_mode,
@@ -441,8 +444,8 @@ static void br_multicast_create_eht_set_entry(struct net_bridge_port_group *pg,
if (!set_h)
goto fail_set_entry;
- mod_timer(&set_h->timer, jiffies + br_multicast_gmi(br));
- mod_timer(&eht_set->timer, jiffies + br_multicast_gmi(br));
+ mod_timer(&set_h->timer, jiffies + br_multicast_gmi(brmctx));
+ mod_timer(&eht_set->timer, jiffies + br_multicast_gmi(brmctx));
return;
@@ -499,7 +502,8 @@ static void br_multicast_del_eht_host(struct net_bridge_port_group *pg,
}
/* create new set entries from reports */
-static void __eht_create_set_entries(struct net_bridge_port_group *pg,
+static void __eht_create_set_entries(const struct net_bridge_mcast *brmctx,
+ struct net_bridge_port_group *pg,
union net_bridge_eht_addr *h_addr,
void *srcs,
u32 nsrcs,
@@ -512,8 +516,8 @@ static void __eht_create_set_entries(struct net_bridge_port_group *pg,
memset(&eht_src_addr, 0, sizeof(eht_src_addr));
for (src_idx = 0; src_idx < nsrcs; src_idx++) {
memcpy(&eht_src_addr, srcs + (src_idx * addr_size), addr_size);
- br_multicast_create_eht_set_entry(pg, &eht_src_addr, h_addr,
- filter_mode,
+ br_multicast_create_eht_set_entry(brmctx, pg, &eht_src_addr,
+ h_addr, filter_mode,
false);
}
}
@@ -549,7 +553,8 @@ static bool __eht_del_set_entries(struct net_bridge_port_group *pg,
return changed;
}
-static bool br_multicast_eht_allow(struct net_bridge_port_group *pg,
+static bool br_multicast_eht_allow(const struct net_bridge_mcast *brmctx,
+ struct net_bridge_port_group *pg,
union net_bridge_eht_addr *h_addr,
void *srcs,
u32 nsrcs,
@@ -559,8 +564,8 @@ static bool br_multicast_eht_allow(struct net_bridge_port_group *pg,
switch (br_multicast_eht_host_filter_mode(pg, h_addr)) {
case MCAST_INCLUDE:
- __eht_create_set_entries(pg, h_addr, srcs, nsrcs, addr_size,
- MCAST_INCLUDE);
+ __eht_create_set_entries(brmctx, pg, h_addr, srcs, nsrcs,
+ addr_size, MCAST_INCLUDE);
break;
case MCAST_EXCLUDE:
changed = __eht_del_set_entries(pg, h_addr, srcs, nsrcs,
@@ -571,7 +576,8 @@ static bool br_multicast_eht_allow(struct net_bridge_port_group *pg,
return changed;
}
-static bool br_multicast_eht_block(struct net_bridge_port_group *pg,
+static bool br_multicast_eht_block(const struct net_bridge_mcast *brmctx,
+ struct net_bridge_port_group *pg,
union net_bridge_eht_addr *h_addr,
void *srcs,
u32 nsrcs,
@@ -585,7 +591,7 @@ static bool br_multicast_eht_block(struct net_bridge_port_group *pg,
addr_size);
break;
case MCAST_EXCLUDE:
- __eht_create_set_entries(pg, h_addr, srcs, nsrcs, addr_size,
+ __eht_create_set_entries(brmctx, pg, h_addr, srcs, nsrcs, addr_size,
MCAST_EXCLUDE);
break;
}
@@ -594,7 +600,8 @@ static bool br_multicast_eht_block(struct net_bridge_port_group *pg,
}
/* flush_entries is true when changing mode */
-static bool __eht_inc_exc(struct net_bridge_port_group *pg,
+static bool __eht_inc_exc(const struct net_bridge_mcast *brmctx,
+ struct net_bridge_port_group *pg,
union net_bridge_eht_addr *h_addr,
void *srcs,
u32 nsrcs,
@@ -612,11 +619,10 @@ static bool __eht_inc_exc(struct net_bridge_port_group *pg,
/* if we're changing mode del host and its entries */
if (flush_entries)
br_multicast_del_eht_host(pg, h_addr);
- __eht_create_set_entries(pg, h_addr, srcs, nsrcs, addr_size,
+ __eht_create_set_entries(brmctx, pg, h_addr, srcs, nsrcs, addr_size,
filter_mode);
/* we can be missing sets only if we've deleted some entries */
if (flush_entries) {
- struct net_bridge *br = pg->key.port->br;
struct net_bridge_group_eht_set *eht_set;
struct net_bridge_group_src *src_ent;
struct hlist_node *tmp;
@@ -647,14 +653,15 @@ static bool __eht_inc_exc(struct net_bridge_port_group *pg,
&eht_src_addr);
if (!eht_set)
continue;
- mod_timer(&eht_set->timer, jiffies + br_multicast_lmqt(br));
+ mod_timer(&eht_set->timer, jiffies + br_multicast_lmqt(brmctx));
}
}
return changed;
}
-static bool br_multicast_eht_inc(struct net_bridge_port_group *pg,
+static bool br_multicast_eht_inc(const struct net_bridge_mcast *brmctx,
+ struct net_bridge_port_group *pg,
union net_bridge_eht_addr *h_addr,
void *srcs,
u32 nsrcs,
@@ -663,14 +670,15 @@ static bool br_multicast_eht_inc(struct net_bridge_port_group *pg,
{
bool changed;
- changed = __eht_inc_exc(pg, h_addr, srcs, nsrcs, addr_size,
+ changed = __eht_inc_exc(brmctx, pg, h_addr, srcs, nsrcs, addr_size,
MCAST_INCLUDE, to_report);
- br_eht_convert_host_filter_mode(pg, h_addr, MCAST_INCLUDE);
+ br_eht_convert_host_filter_mode(brmctx, pg, h_addr, MCAST_INCLUDE);
return changed;
}
-static bool br_multicast_eht_exc(struct net_bridge_port_group *pg,
+static bool br_multicast_eht_exc(const struct net_bridge_mcast *brmctx,
+ struct net_bridge_port_group *pg,
union net_bridge_eht_addr *h_addr,
void *srcs,
u32 nsrcs,
@@ -679,14 +687,15 @@ static bool br_multicast_eht_exc(struct net_bridge_port_group *pg,
{
bool changed;
- changed = __eht_inc_exc(pg, h_addr, srcs, nsrcs, addr_size,
+ changed = __eht_inc_exc(brmctx, pg, h_addr, srcs, nsrcs, addr_size,
MCAST_EXCLUDE, to_report);
- br_eht_convert_host_filter_mode(pg, h_addr, MCAST_EXCLUDE);
+ br_eht_convert_host_filter_mode(brmctx, pg, h_addr, MCAST_EXCLUDE);
return changed;
}
-static bool __eht_ip4_handle(struct net_bridge_port_group *pg,
+static bool __eht_ip4_handle(const struct net_bridge_mcast *brmctx,
+ struct net_bridge_port_group *pg,
union net_bridge_eht_addr *h_addr,
void *srcs,
u32 nsrcs,
@@ -696,24 +705,25 @@ static bool __eht_ip4_handle(struct net_bridge_port_group *pg,
switch (grec_type) {
case IGMPV3_ALLOW_NEW_SOURCES:
- br_multicast_eht_allow(pg, h_addr, srcs, nsrcs, sizeof(__be32));
+ br_multicast_eht_allow(brmctx, pg, h_addr, srcs, nsrcs,
+ sizeof(__be32));
break;
case IGMPV3_BLOCK_OLD_SOURCES:
- changed = br_multicast_eht_block(pg, h_addr, srcs, nsrcs,
+ changed = br_multicast_eht_block(brmctx, pg, h_addr, srcs, nsrcs,
sizeof(__be32));
break;
case IGMPV3_CHANGE_TO_INCLUDE:
to_report = true;
fallthrough;
case IGMPV3_MODE_IS_INCLUDE:
- changed = br_multicast_eht_inc(pg, h_addr, srcs, nsrcs,
+ changed = br_multicast_eht_inc(brmctx, pg, h_addr, srcs, nsrcs,
sizeof(__be32), to_report);
break;
case IGMPV3_CHANGE_TO_EXCLUDE:
to_report = true;
fallthrough;
case IGMPV3_MODE_IS_EXCLUDE:
- changed = br_multicast_eht_exc(pg, h_addr, srcs, nsrcs,
+ changed = br_multicast_eht_exc(brmctx, pg, h_addr, srcs, nsrcs,
sizeof(__be32), to_report);
break;
}
@@ -722,7 +732,8 @@ static bool __eht_ip4_handle(struct net_bridge_port_group *pg,
}
#if IS_ENABLED(CONFIG_IPV6)
-static bool __eht_ip6_handle(struct net_bridge_port_group *pg,
+static bool __eht_ip6_handle(const struct net_bridge_mcast *brmctx,
+ struct net_bridge_port_group *pg,
union net_bridge_eht_addr *h_addr,
void *srcs,
u32 nsrcs,
@@ -732,18 +743,18 @@ static bool __eht_ip6_handle(struct net_bridge_port_group *pg,
switch (grec_type) {
case MLD2_ALLOW_NEW_SOURCES:
- br_multicast_eht_allow(pg, h_addr, srcs, nsrcs,
+ br_multicast_eht_allow(brmctx, pg, h_addr, srcs, nsrcs,
sizeof(struct in6_addr));
break;
case MLD2_BLOCK_OLD_SOURCES:
- changed = br_multicast_eht_block(pg, h_addr, srcs, nsrcs,
+ changed = br_multicast_eht_block(brmctx, pg, h_addr, srcs, nsrcs,
sizeof(struct in6_addr));
break;
case MLD2_CHANGE_TO_INCLUDE:
to_report = true;
fallthrough;
case MLD2_MODE_IS_INCLUDE:
- changed = br_multicast_eht_inc(pg, h_addr, srcs, nsrcs,
+ changed = br_multicast_eht_inc(brmctx, pg, h_addr, srcs, nsrcs,
sizeof(struct in6_addr),
to_report);
break;
@@ -751,7 +762,7 @@ static bool __eht_ip6_handle(struct net_bridge_port_group *pg,
to_report = true;
fallthrough;
case MLD2_MODE_IS_EXCLUDE:
- changed = br_multicast_eht_exc(pg, h_addr, srcs, nsrcs,
+ changed = br_multicast_eht_exc(brmctx, pg, h_addr, srcs, nsrcs,
sizeof(struct in6_addr),
to_report);
break;
@@ -762,7 +773,8 @@ static bool __eht_ip6_handle(struct net_bridge_port_group *pg,
#endif
/* true means an entry was deleted */
-bool br_multicast_eht_handle(struct net_bridge_port_group *pg,
+bool br_multicast_eht_handle(const struct net_bridge_mcast *brmctx,
+ struct net_bridge_port_group *pg,
void *h_addr,
void *srcs,
u32 nsrcs,
@@ -779,12 +791,12 @@ bool br_multicast_eht_handle(struct net_bridge_port_group *pg,
memset(&eht_host_addr, 0, sizeof(eht_host_addr));
memcpy(&eht_host_addr, h_addr, addr_size);
if (addr_size == sizeof(__be32))
- changed = __eht_ip4_handle(pg, &eht_host_addr, srcs, nsrcs,
- grec_type);
+ changed = __eht_ip4_handle(brmctx, pg, &eht_host_addr, srcs,
+ nsrcs, grec_type);
#if IS_ENABLED(CONFIG_IPV6)
else
- changed = __eht_ip6_handle(pg, &eht_host_addr, srcs, nsrcs,
- grec_type);
+ changed = __eht_ip6_handle(brmctx, pg, &eht_host_addr, srcs,
+ nsrcs, grec_type);
#endif
out:
diff --git a/net/bridge/br_netlink.c b/net/bridge/br_netlink.c
index e4e6e991313e..6c58fc14d2cb 100644
--- a/net/bridge/br_netlink.c
+++ b/net/bridge/br_netlink.c
@@ -287,7 +287,7 @@ static int br_port_fill_attrs(struct sk_buff *skb,
#ifdef CONFIG_BRIDGE_IGMP_SNOOPING
if (nla_put_u8(skb, IFLA_BRPORT_MULTICAST_ROUTER,
- p->multicast_router) ||
+ p->multicast_ctx.multicast_router) ||
nla_put_u32(skb, IFLA_BRPORT_MCAST_EHT_HOSTS_LIMIT,
p->multicast_eht_hosts_limit) ||
nla_put_u32(skb, IFLA_BRPORT_MCAST_EHT_HOSTS_CNT,
@@ -932,7 +932,8 @@ static int br_setport(struct net_bridge_port *p, struct nlattr *tb[],
if (tb[IFLA_BRPORT_MULTICAST_ROUTER]) {
u8 mcast_router = nla_get_u8(tb[IFLA_BRPORT_MULTICAST_ROUTER]);
- err = br_multicast_set_port_router(p, mcast_router);
+ err = br_multicast_set_port_router(&p->multicast_ctx,
+ mcast_router);
if (err)
return err;
}
@@ -1286,7 +1287,8 @@ static int br_changelink(struct net_device *brdev, struct nlattr *tb[],
if (data[IFLA_BR_MCAST_ROUTER]) {
u8 multicast_router = nla_get_u8(data[IFLA_BR_MCAST_ROUTER]);
- err = br_multicast_set_router(br, multicast_router);
+ err = br_multicast_set_router(&br->multicast_ctx,
+ multicast_router);
if (err)
return err;
}
@@ -1309,7 +1311,8 @@ static int br_changelink(struct net_device *brdev, struct nlattr *tb[],
if (data[IFLA_BR_MCAST_QUERIER]) {
u8 mcast_querier = nla_get_u8(data[IFLA_BR_MCAST_QUERIER]);
- err = br_multicast_set_querier(br, mcast_querier);
+ err = br_multicast_set_querier(&br->multicast_ctx,
+ mcast_querier);
if (err)
return err;
}
@@ -1324,49 +1327,49 @@ static int br_changelink(struct net_device *brdev, struct nlattr *tb[],
if (data[IFLA_BR_MCAST_LAST_MEMBER_CNT]) {
u32 val = nla_get_u32(data[IFLA_BR_MCAST_LAST_MEMBER_CNT]);
- br->multicast_last_member_count = val;
+ br->multicast_ctx.multicast_last_member_count = val;
}
if (data[IFLA_BR_MCAST_STARTUP_QUERY_CNT]) {
u32 val = nla_get_u32(data[IFLA_BR_MCAST_STARTUP_QUERY_CNT]);
- br->multicast_startup_query_count = val;
+ br->multicast_ctx.multicast_startup_query_count = val;
}
if (data[IFLA_BR_MCAST_LAST_MEMBER_INTVL]) {
u64 val = nla_get_u64(data[IFLA_BR_MCAST_LAST_MEMBER_INTVL]);
- br->multicast_last_member_interval = clock_t_to_jiffies(val);
+ br->multicast_ctx.multicast_last_member_interval = clock_t_to_jiffies(val);
}
if (data[IFLA_BR_MCAST_MEMBERSHIP_INTVL]) {
u64 val = nla_get_u64(data[IFLA_BR_MCAST_MEMBERSHIP_INTVL]);
- br->multicast_membership_interval = clock_t_to_jiffies(val);
+ br->multicast_ctx.multicast_membership_interval = clock_t_to_jiffies(val);
}
if (data[IFLA_BR_MCAST_QUERIER_INTVL]) {
u64 val = nla_get_u64(data[IFLA_BR_MCAST_QUERIER_INTVL]);
- br->multicast_querier_interval = clock_t_to_jiffies(val);
+ br->multicast_ctx.multicast_querier_interval = clock_t_to_jiffies(val);
}
if (data[IFLA_BR_MCAST_QUERY_INTVL]) {
u64 val = nla_get_u64(data[IFLA_BR_MCAST_QUERY_INTVL]);
- br->multicast_query_interval = clock_t_to_jiffies(val);
+ br->multicast_ctx.multicast_query_interval = clock_t_to_jiffies(val);
}
if (data[IFLA_BR_MCAST_QUERY_RESPONSE_INTVL]) {
u64 val = nla_get_u64(data[IFLA_BR_MCAST_QUERY_RESPONSE_INTVL]);
- br->multicast_query_response_interval = clock_t_to_jiffies(val);
+ br->multicast_ctx.multicast_query_response_interval = clock_t_to_jiffies(val);
}
if (data[IFLA_BR_MCAST_STARTUP_QUERY_INTVL]) {
u64 val = nla_get_u64(data[IFLA_BR_MCAST_STARTUP_QUERY_INTVL]);
- br->multicast_startup_query_interval = clock_t_to_jiffies(val);
+ br->multicast_ctx.multicast_startup_query_interval = clock_t_to_jiffies(val);
}
if (data[IFLA_BR_MCAST_STATS_ENABLED]) {
@@ -1380,7 +1383,8 @@ static int br_changelink(struct net_device *brdev, struct nlattr *tb[],
__u8 igmp_version;
igmp_version = nla_get_u8(data[IFLA_BR_MCAST_IGMP_VERSION]);
- err = br_multicast_set_igmp_version(br, igmp_version);
+ err = br_multicast_set_igmp_version(&br->multicast_ctx,
+ igmp_version);
if (err)
return err;
}
@@ -1390,7 +1394,8 @@ static int br_changelink(struct net_device *brdev, struct nlattr *tb[],
__u8 mld_version;
mld_version = nla_get_u8(data[IFLA_BR_MCAST_MLD_VERSION]);
- err = br_multicast_set_mld_version(br, mld_version);
+ err = br_multicast_set_mld_version(&br->multicast_ctx,
+ mld_version);
if (err)
return err;
}
@@ -1497,6 +1502,7 @@ static size_t br_get_size(const struct net_device *brdev)
nla_total_size_64bit(sizeof(u64)) + /* IFLA_BR_MCAST_STARTUP_QUERY_INTVL */
nla_total_size(sizeof(u8)) + /* IFLA_BR_MCAST_IGMP_VERSION */
nla_total_size(sizeof(u8)) + /* IFLA_BR_MCAST_MLD_VERSION */
+ br_multicast_querier_state_size() + /* IFLA_BR_MCAST_QUERIER_STATE */
#endif
#if IS_ENABLED(CONFIG_BRIDGE_NETFILTER)
nla_total_size(sizeof(u8)) + /* IFLA_BR_NF_CALL_IPTABLES */
@@ -1566,50 +1572,53 @@ static int br_fill_info(struct sk_buff *skb, const struct net_device *brdev)
return -EMSGSIZE;
#endif
#ifdef CONFIG_BRIDGE_IGMP_SNOOPING
- if (nla_put_u8(skb, IFLA_BR_MCAST_ROUTER, br->multicast_router) ||
+ if (nla_put_u8(skb, IFLA_BR_MCAST_ROUTER,
+ br->multicast_ctx.multicast_router) ||
nla_put_u8(skb, IFLA_BR_MCAST_SNOOPING,
br_opt_get(br, BROPT_MULTICAST_ENABLED)) ||
nla_put_u8(skb, IFLA_BR_MCAST_QUERY_USE_IFADDR,
br_opt_get(br, BROPT_MULTICAST_QUERY_USE_IFADDR)) ||
nla_put_u8(skb, IFLA_BR_MCAST_QUERIER,
- br_opt_get(br, BROPT_MULTICAST_QUERIER)) ||
+ br->multicast_ctx.multicast_querier) ||
nla_put_u8(skb, IFLA_BR_MCAST_STATS_ENABLED,
br_opt_get(br, BROPT_MULTICAST_STATS_ENABLED)) ||
nla_put_u32(skb, IFLA_BR_MCAST_HASH_ELASTICITY, RHT_ELASTICITY) ||
nla_put_u32(skb, IFLA_BR_MCAST_HASH_MAX, br->hash_max) ||
nla_put_u32(skb, IFLA_BR_MCAST_LAST_MEMBER_CNT,
- br->multicast_last_member_count) ||
+ br->multicast_ctx.multicast_last_member_count) ||
nla_put_u32(skb, IFLA_BR_MCAST_STARTUP_QUERY_CNT,
- br->multicast_startup_query_count) ||
+ br->multicast_ctx.multicast_startup_query_count) ||
nla_put_u8(skb, IFLA_BR_MCAST_IGMP_VERSION,
- br->multicast_igmp_version))
+ br->multicast_ctx.multicast_igmp_version) ||
+ br_multicast_dump_querier_state(skb, &br->multicast_ctx,
+ IFLA_BR_MCAST_QUERIER_STATE))
return -EMSGSIZE;
#if IS_ENABLED(CONFIG_IPV6)
if (nla_put_u8(skb, IFLA_BR_MCAST_MLD_VERSION,
- br->multicast_mld_version))
+ br->multicast_ctx.multicast_mld_version))
return -EMSGSIZE;
#endif
- clockval = jiffies_to_clock_t(br->multicast_last_member_interval);
+ clockval = jiffies_to_clock_t(br->multicast_ctx.multicast_last_member_interval);
if (nla_put_u64_64bit(skb, IFLA_BR_MCAST_LAST_MEMBER_INTVL, clockval,
IFLA_BR_PAD))
return -EMSGSIZE;
- clockval = jiffies_to_clock_t(br->multicast_membership_interval);
+ clockval = jiffies_to_clock_t(br->multicast_ctx.multicast_membership_interval);
if (nla_put_u64_64bit(skb, IFLA_BR_MCAST_MEMBERSHIP_INTVL, clockval,
IFLA_BR_PAD))
return -EMSGSIZE;
- clockval = jiffies_to_clock_t(br->multicast_querier_interval);
+ clockval = jiffies_to_clock_t(br->multicast_ctx.multicast_querier_interval);
if (nla_put_u64_64bit(skb, IFLA_BR_MCAST_QUERIER_INTVL, clockval,
IFLA_BR_PAD))
return -EMSGSIZE;
- clockval = jiffies_to_clock_t(br->multicast_query_interval);
+ clockval = jiffies_to_clock_t(br->multicast_ctx.multicast_query_interval);
if (nla_put_u64_64bit(skb, IFLA_BR_MCAST_QUERY_INTVL, clockval,
IFLA_BR_PAD))
return -EMSGSIZE;
- clockval = jiffies_to_clock_t(br->multicast_query_response_interval);
+ clockval = jiffies_to_clock_t(br->multicast_ctx.multicast_query_response_interval);
if (nla_put_u64_64bit(skb, IFLA_BR_MCAST_QUERY_RESPONSE_INTVL, clockval,
IFLA_BR_PAD))
return -EMSGSIZE;
- clockval = jiffies_to_clock_t(br->multicast_startup_query_interval);
+ clockval = jiffies_to_clock_t(br->multicast_ctx.multicast_startup_query_interval);
if (nla_put_u64_64bit(skb, IFLA_BR_MCAST_STARTUP_QUERY_INTVL, clockval,
IFLA_BR_PAD))
return -EMSGSIZE;
@@ -1644,7 +1653,6 @@ static size_t br_get_linkxstats_size(const struct net_device *dev, int attr)
p = br_port_get_rtnl(dev);
if (!p)
return 0;
- br = p->br;
vg = nbp_vlan_group(p);
break;
default:
diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h
index e013d33f1c7c..b4cef3a97f12 100644
--- a/net/bridge/br_private.h
+++ b/net/bridge/br_private.h
@@ -29,6 +29,8 @@
#define BR_MULTICAST_DEFAULT_HASH_MAX 4096
+#define BR_HWDOM_MAX BITS_PER_LONG
+
#define BR_VERSION "2.3"
/* Control of forwarding link local multicast */
@@ -79,7 +81,8 @@ struct bridge_mcast_other_query {
/* selected querier */
struct bridge_mcast_querier {
struct br_ip addr;
- struct net_bridge_port __rcu *port;
+ int port_ifidx;
+ seqcount_t seq;
};
/* IGMP/MLD statistics */
@@ -89,6 +92,60 @@ struct bridge_mcast_stats {
};
#endif
+/* net_bridge_mcast_port must be always defined due to forwarding stubs */
+struct net_bridge_mcast_port {
+#ifdef CONFIG_BRIDGE_IGMP_SNOOPING
+ struct net_bridge_port *port;
+ struct net_bridge_vlan *vlan;
+
+ struct bridge_mcast_own_query ip4_own_query;
+ struct timer_list ip4_mc_router_timer;
+ struct hlist_node ip4_rlist;
+#if IS_ENABLED(CONFIG_IPV6)
+ struct bridge_mcast_own_query ip6_own_query;
+ struct timer_list ip6_mc_router_timer;
+ struct hlist_node ip6_rlist;
+#endif /* IS_ENABLED(CONFIG_IPV6) */
+ unsigned char multicast_router;
+#endif /* CONFIG_BRIDGE_IGMP_SNOOPING */
+};
+
+/* net_bridge_mcast must be always defined due to forwarding stubs */
+struct net_bridge_mcast {
+#ifdef CONFIG_BRIDGE_IGMP_SNOOPING
+ struct net_bridge *br;
+ struct net_bridge_vlan *vlan;
+
+ u32 multicast_last_member_count;
+ u32 multicast_startup_query_count;
+
+ u8 multicast_querier;
+ u8 multicast_igmp_version;
+ u8 multicast_router;
+#if IS_ENABLED(CONFIG_IPV6)
+ u8 multicast_mld_version;
+#endif
+ unsigned long multicast_last_member_interval;
+ unsigned long multicast_membership_interval;
+ unsigned long multicast_querier_interval;
+ unsigned long multicast_query_interval;
+ unsigned long multicast_query_response_interval;
+ unsigned long multicast_startup_query_interval;
+ struct hlist_head ip4_mc_router_list;
+ struct timer_list ip4_mc_router_timer;
+ struct bridge_mcast_other_query ip4_other_query;
+ struct bridge_mcast_own_query ip4_own_query;
+ struct bridge_mcast_querier ip4_querier;
+#if IS_ENABLED(CONFIG_IPV6)
+ struct hlist_head ip6_mc_router_list;
+ struct timer_list ip6_mc_router_timer;
+ struct bridge_mcast_other_query ip6_other_query;
+ struct bridge_mcast_own_query ip6_own_query;
+ struct bridge_mcast_querier ip6_querier;
+#endif /* IS_ENABLED(CONFIG_IPV6) */
+#endif /* CONFIG_BRIDGE_IGMP_SNOOPING */
+};
+
struct br_tunnel_info {
__be64 tunnel_id;
struct metadata_dst __rcu *tunnel_dst;
@@ -98,6 +155,8 @@ struct br_tunnel_info {
enum {
BR_VLFLAG_PER_PORT_STATS = BIT(0),
BR_VLFLAG_ADDED_BY_SWITCHDEV = BIT(1),
+ BR_VLFLAG_MCAST_ENABLED = BIT(2),
+ BR_VLFLAG_GLOBAL_MCAST_ENABLED = BIT(3),
};
/**
@@ -114,6 +173,9 @@ enum {
* @refcnt: if MASTER flag set, this is bumped for each port referencing it
* @brvlan: if MASTER flag unset, this points to the global per-VLAN context
* for this VLAN entry
+ * @br_mcast_ctx: if MASTER flag set, this is the global vlan multicast context
+ * @port_mcast_ctx: if MASTER flag unset, this is the per-port/vlan multicast
+ * context
* @vlist: sorted list of VLAN entries
* @rcu: used for entry destruction
*
@@ -141,6 +203,11 @@ struct net_bridge_vlan {
struct br_tunnel_info tinfo;
+ union {
+ struct net_bridge_mcast br_mcast_ctx;
+ struct net_bridge_mcast_port port_mcast_ctx;
+ };
+
struct list_head vlist;
struct rcu_head rcu;
@@ -305,18 +372,14 @@ struct net_bridge_port {
struct kobject kobj;
struct rcu_head rcu;
+ struct net_bridge_mcast_port multicast_ctx;
+
#ifdef CONFIG_BRIDGE_IGMP_SNOOPING
- struct bridge_mcast_own_query ip4_own_query;
-#if IS_ENABLED(CONFIG_IPV6)
- struct bridge_mcast_own_query ip6_own_query;
-#endif /* IS_ENABLED(CONFIG_IPV6) */
+ struct bridge_mcast_stats __percpu *mcast_stats;
+
u32 multicast_eht_hosts_limit;
u32 multicast_eht_hosts_cnt;
- unsigned char multicast_router;
- struct bridge_mcast_stats __percpu *mcast_stats;
- struct timer_list multicast_router_timer;
struct hlist_head mglist;
- struct hlist_node rlist;
#endif
#ifdef CONFIG_SYSFS
@@ -327,7 +390,12 @@ struct net_bridge_port {
struct netpoll *np;
#endif
#ifdef CONFIG_NET_SWITCHDEV
- int offload_fwd_mark;
+ /* Identifier used to group ports that share the same switchdev
+ * hardware domain.
+ */
+ int hwdom;
+ int offload_count;
+ struct netdev_phys_item_id ppid;
#endif
u16 group_fwd_mask;
u16 backup_redirected_cnt;
@@ -365,7 +433,6 @@ enum net_bridge_opts {
BROPT_NF_CALL_ARPTABLES,
BROPT_GROUP_ADDR_SET,
BROPT_MULTICAST_ENABLED,
- BROPT_MULTICAST_QUERIER,
BROPT_MULTICAST_QUERY_USE_IFADDR,
BROPT_MULTICAST_STATS_ENABLED,
BROPT_HAS_IPV6_ADDR,
@@ -374,6 +441,7 @@ enum net_bridge_opts {
BROPT_VLAN_STATS_PER_PORT,
BROPT_NO_LL_LEARN,
BROPT_VLAN_BRIDGE_BINDING,
+ BROPT_MCAST_VLAN_SNOOPING_ENABLED,
};
struct net_bridge {
@@ -424,43 +492,21 @@ struct net_bridge {
BR_USER_STP, /* new RSTP in userspace */
} stp_enabled;
+ struct net_bridge_mcast multicast_ctx;
+
#ifdef CONFIG_BRIDGE_IGMP_SNOOPING
+ struct bridge_mcast_stats __percpu *mcast_stats;
u32 hash_max;
- u32 multicast_last_member_count;
- u32 multicast_startup_query_count;
-
- u8 multicast_igmp_version;
- u8 multicast_router;
-#if IS_ENABLED(CONFIG_IPV6)
- u8 multicast_mld_version;
-#endif
spinlock_t multicast_lock;
- unsigned long multicast_last_member_interval;
- unsigned long multicast_membership_interval;
- unsigned long multicast_querier_interval;
- unsigned long multicast_query_interval;
- unsigned long multicast_query_response_interval;
- unsigned long multicast_startup_query_interval;
struct rhashtable mdb_hash_tbl;
struct rhashtable sg_port_tbl;
struct hlist_head mcast_gc_list;
struct hlist_head mdb_list;
- struct hlist_head router_list;
- struct timer_list multicast_router_timer;
- struct bridge_mcast_other_query ip4_other_query;
- struct bridge_mcast_own_query ip4_own_query;
- struct bridge_mcast_querier ip4_querier;
- struct bridge_mcast_stats __percpu *mcast_stats;
-#if IS_ENABLED(CONFIG_IPV6)
- struct bridge_mcast_other_query ip6_other_query;
- struct bridge_mcast_own_query ip6_own_query;
- struct bridge_mcast_querier ip6_querier;
-#endif /* IS_ENABLED(CONFIG_IPV6) */
struct work_struct mcast_gc_work;
#endif
@@ -472,7 +518,12 @@ struct net_bridge {
u32 auto_cnt;
#ifdef CONFIG_NET_SWITCHDEV
- int offload_fwd_mark;
+ /* Counter used to make sure that hardware domains get unique
+ * identifiers in case a bridge spans multiple switchdev instances.
+ */
+ int last_hwdom;
+ /* Bit mask of hardware domain numbers in use */
+ unsigned long busy_hwdoms;
#endif
struct hlist_head fdb_list;
@@ -502,7 +553,20 @@ struct br_input_skb_cb {
#endif
#ifdef CONFIG_NET_SWITCHDEV
- int offload_fwd_mark;
+ /* Set if TX data plane offloading is used towards at least one
+ * hardware domain.
+ */
+ u8 tx_fwd_offload:1;
+ /* The switchdev hardware domain from which this packet was received.
+ * If skb->offload_fwd_mark was set, then this packet was already
+ * forwarded by hardware to the other ports in the source hardware
+ * domain, otherwise it wasn't.
+ */
+ int src_hwdom;
+ /* Bit mask of hardware domains towards this packet has already been
+ * transmitted using the TX data plane offload.
+ */
+ unsigned long fwd_hwdoms;
#endif
};
@@ -612,6 +676,20 @@ static inline bool br_vlan_valid_range(const struct bridge_vlan_info *cur,
return true;
}
+static inline u8 br_vlan_multicast_router(const struct net_bridge_vlan *v)
+{
+ u8 mcast_router = MDB_RTR_TYPE_DISABLED;
+
+#ifdef CONFIG_BRIDGE_IGMP_SNOOPING
+ if (!br_vlan_is_master(v))
+ mcast_router = v->port_mcast_ctx.multicast_router;
+ else
+ mcast_router = v->br_mcast_ctx.multicast_router;
+#endif
+
+ return mcast_router;
+}
+
static inline int br_afspec_cmd_to_rtm(int cmd)
{
switch (cmd) {
@@ -714,6 +792,8 @@ int br_fdb_external_learn_del(struct net_bridge *br, struct net_bridge_port *p,
bool swdev_notify);
void br_fdb_offloaded_set(struct net_bridge *br, struct net_bridge_port *p,
const unsigned char *addr, u16 vid, bool offloaded);
+int br_fdb_replay(const struct net_device *br_dev, const void *ctx, bool adding,
+ struct notifier_block *nb);
/* br_forward.c */
enum br_pkt_type {
@@ -786,15 +866,18 @@ br_port_get_check_rtnl(const struct net_device *dev)
}
/* br_ioctl.c */
-int br_dev_ioctl(struct net_device *dev, struct ifreq *rq, int cmd);
-int br_ioctl_deviceless_stub(struct net *net, unsigned int cmd,
- void __user *arg);
+int br_dev_siocdevprivate(struct net_device *dev, struct ifreq *rq,
+ void __user *data, int cmd);
+int br_ioctl_stub(struct net *net, struct net_bridge *br, unsigned int cmd,
+ struct ifreq *ifr, void __user *uarg);
/* br_multicast.c */
#ifdef CONFIG_BRIDGE_IGMP_SNOOPING
-int br_multicast_rcv(struct net_bridge *br, struct net_bridge_port *port,
+int br_multicast_rcv(struct net_bridge_mcast **brmctx,
+ struct net_bridge_mcast_port **pmctx,
+ struct net_bridge_vlan *vlan,
struct sk_buff *skb, u16 vid);
-struct net_bridge_mdb_entry *br_mdb_get(struct net_bridge *br,
+struct net_bridge_mdb_entry *br_mdb_get(struct net_bridge_mcast *brmctx,
struct sk_buff *skb, u16 vid);
int br_multicast_add_port(struct net_bridge_port *port);
void br_multicast_del_port(struct net_bridge_port *port);
@@ -806,17 +889,22 @@ void br_multicast_leave_snoopers(struct net_bridge *br);
void br_multicast_open(struct net_bridge *br);
void br_multicast_stop(struct net_bridge *br);
void br_multicast_dev_del(struct net_bridge *br);
-void br_multicast_flood(struct net_bridge_mdb_entry *mdst,
- struct sk_buff *skb, bool local_rcv, bool local_orig);
-int br_multicast_set_router(struct net_bridge *br, unsigned long val);
-int br_multicast_set_port_router(struct net_bridge_port *p, unsigned long val);
+void br_multicast_flood(struct net_bridge_mdb_entry *mdst, struct sk_buff *skb,
+ struct net_bridge_mcast *brmctx,
+ bool local_rcv, bool local_orig);
+int br_multicast_set_router(struct net_bridge_mcast *brmctx, unsigned long val);
+int br_multicast_set_port_router(struct net_bridge_mcast_port *pmctx,
+ unsigned long val);
+int br_multicast_set_vlan_router(struct net_bridge_vlan *v, u8 mcast_router);
int br_multicast_toggle(struct net_bridge *br, unsigned long val,
struct netlink_ext_ack *extack);
-int br_multicast_set_querier(struct net_bridge *br, unsigned long val);
+int br_multicast_set_querier(struct net_bridge_mcast *brmctx, unsigned long val);
int br_multicast_set_hash_max(struct net_bridge *br, unsigned long val);
-int br_multicast_set_igmp_version(struct net_bridge *br, unsigned long val);
+int br_multicast_set_igmp_version(struct net_bridge_mcast *brmctx,
+ unsigned long val);
#if IS_ENABLED(CONFIG_IPV6)
-int br_multicast_set_mld_version(struct net_bridge *br, unsigned long val);
+int br_multicast_set_mld_version(struct net_bridge_mcast *brmctx,
+ unsigned long val);
#endif
struct net_bridge_mdb_entry *
br_mdb_ip_get(struct net_bridge *br, struct br_ip *dst);
@@ -831,12 +919,13 @@ int br_mdb_hash_init(struct net_bridge *br);
void br_mdb_hash_fini(struct net_bridge *br);
void br_mdb_notify(struct net_device *dev, struct net_bridge_mdb_entry *mp,
struct net_bridge_port_group *pg, int type);
-void br_rtr_notify(struct net_device *dev, struct net_bridge_port *port,
+void br_rtr_notify(struct net_device *dev, struct net_bridge_mcast_port *pmctx,
int type);
void br_multicast_del_pg(struct net_bridge_mdb_entry *mp,
struct net_bridge_port_group *pg,
struct net_bridge_port_group __rcu **pp);
-void br_multicast_count(struct net_bridge *br, const struct net_bridge_port *p,
+void br_multicast_count(struct net_bridge *br,
+ const struct net_bridge_port *p,
const struct sk_buff *skb, u8 type, u8 dir);
int br_multicast_init_stats(struct net_bridge *br);
void br_multicast_uninit_stats(struct net_bridge *br);
@@ -845,7 +934,8 @@ void br_multicast_get_stats(const struct net_bridge *br,
struct br_mcast_stats *dest);
void br_mdb_init(void);
void br_mdb_uninit(void);
-void br_multicast_host_join(struct net_bridge_mdb_entry *mp, bool notify);
+void br_multicast_host_join(const struct net_bridge_mcast *brmctx,
+ struct net_bridge_mdb_entry *mp, bool notify);
void br_multicast_host_leave(struct net_bridge_mdb_entry *mp, bool notify);
void br_multicast_star_g_handle_mode(struct net_bridge_port_group *pg,
u8 filter_mode);
@@ -855,6 +945,29 @@ struct net_bridge_group_src *
br_multicast_find_group_src(struct net_bridge_port_group *pg, struct br_ip *ip);
void br_multicast_del_group_src(struct net_bridge_group_src *src,
bool fastleave);
+void br_multicast_ctx_init(struct net_bridge *br,
+ struct net_bridge_vlan *vlan,
+ struct net_bridge_mcast *brmctx);
+void br_multicast_ctx_deinit(struct net_bridge_mcast *brmctx);
+void br_multicast_port_ctx_init(struct net_bridge_port *port,
+ struct net_bridge_vlan *vlan,
+ struct net_bridge_mcast_port *pmctx);
+void br_multicast_port_ctx_deinit(struct net_bridge_mcast_port *pmctx);
+void br_multicast_toggle_one_vlan(struct net_bridge_vlan *vlan, bool on);
+int br_multicast_toggle_vlan_snooping(struct net_bridge *br, bool on,
+ struct netlink_ext_ack *extack);
+bool br_multicast_toggle_global_vlan(struct net_bridge_vlan *vlan, bool on);
+
+int br_mdb_replay(struct net_device *br_dev, struct net_device *dev,
+ const void *ctx, bool adding, struct notifier_block *nb,
+ struct netlink_ext_ack *extack);
+int br_rports_fill_info(struct sk_buff *skb,
+ const struct net_bridge_mcast *brmctx);
+int br_multicast_dump_querier_state(struct sk_buff *skb,
+ const struct net_bridge_mcast *brmctx,
+ int nest_attr);
+size_t br_multicast_querier_state_size(void);
+size_t br_rports_size(const struct net_bridge_mcast *brmctx);
static inline bool br_group_is_l2(const struct br_ip *group)
{
@@ -864,22 +977,82 @@ static inline bool br_group_is_l2(const struct br_ip *group)
#define mlock_dereference(X, br) \
rcu_dereference_protected(X, lockdep_is_held(&br->multicast_lock))
-static inline bool br_multicast_is_router(struct net_bridge *br)
+static inline struct hlist_node *
+br_multicast_get_first_rport_node(struct net_bridge_mcast *brmctx,
+ struct sk_buff *skb)
+{
+#if IS_ENABLED(CONFIG_IPV6)
+ if (skb->protocol == htons(ETH_P_IPV6))
+ return rcu_dereference(hlist_first_rcu(&brmctx->ip6_mc_router_list));
+#endif
+ return rcu_dereference(hlist_first_rcu(&brmctx->ip4_mc_router_list));
+}
+
+static inline struct net_bridge_port *
+br_multicast_rport_from_node_skb(struct hlist_node *rp, struct sk_buff *skb)
{
- return br->multicast_router == 2 ||
- (br->multicast_router == 1 &&
- timer_pending(&br->multicast_router_timer));
+ struct net_bridge_mcast_port *mctx;
+
+#if IS_ENABLED(CONFIG_IPV6)
+ if (skb->protocol == htons(ETH_P_IPV6))
+ mctx = hlist_entry_safe(rp, struct net_bridge_mcast_port,
+ ip6_rlist);
+ else
+#endif
+ mctx = hlist_entry_safe(rp, struct net_bridge_mcast_port,
+ ip4_rlist);
+
+ if (mctx)
+ return mctx->port;
+ else
+ return NULL;
+}
+
+static inline bool br_ip4_multicast_is_router(struct net_bridge_mcast *brmctx)
+{
+ return timer_pending(&brmctx->ip4_mc_router_timer);
+}
+
+static inline bool br_ip6_multicast_is_router(struct net_bridge_mcast *brmctx)
+{
+#if IS_ENABLED(CONFIG_IPV6)
+ return timer_pending(&brmctx->ip6_mc_router_timer);
+#else
+ return false;
+#endif
+}
+
+static inline bool
+br_multicast_is_router(struct net_bridge_mcast *brmctx, struct sk_buff *skb)
+{
+ switch (brmctx->multicast_router) {
+ case MDB_RTR_TYPE_PERM:
+ return true;
+ case MDB_RTR_TYPE_TEMP_QUERY:
+ if (skb) {
+ if (skb->protocol == htons(ETH_P_IP))
+ return br_ip4_multicast_is_router(brmctx);
+ else if (skb->protocol == htons(ETH_P_IPV6))
+ return br_ip6_multicast_is_router(brmctx);
+ } else {
+ return br_ip4_multicast_is_router(brmctx) ||
+ br_ip6_multicast_is_router(brmctx);
+ }
+ fallthrough;
+ default:
+ return false;
+ }
}
static inline bool
-__br_multicast_querier_exists(struct net_bridge *br,
- struct bridge_mcast_other_query *querier,
- const bool is_ipv6)
+__br_multicast_querier_exists(struct net_bridge_mcast *brmctx,
+ struct bridge_mcast_other_query *querier,
+ const bool is_ipv6)
{
bool own_querier_enabled;
- if (br_opt_get(br, BROPT_MULTICAST_QUERIER)) {
- if (is_ipv6 && !br_opt_get(br, BROPT_HAS_IPV6_ADDR))
+ if (brmctx->multicast_querier) {
+ if (is_ipv6 && !br_opt_get(brmctx->br, BROPT_HAS_IPV6_ADDR))
own_querier_enabled = false;
else
own_querier_enabled = true;
@@ -891,18 +1064,18 @@ __br_multicast_querier_exists(struct net_bridge *br,
(own_querier_enabled || timer_pending(&querier->timer));
}
-static inline bool br_multicast_querier_exists(struct net_bridge *br,
+static inline bool br_multicast_querier_exists(struct net_bridge_mcast *brmctx,
struct ethhdr *eth,
const struct net_bridge_mdb_entry *mdb)
{
switch (eth->h_proto) {
case (htons(ETH_P_IP)):
- return __br_multicast_querier_exists(br,
- &br->ip4_other_query, false);
+ return __br_multicast_querier_exists(brmctx,
+ &brmctx->ip4_other_query, false);
#if IS_ENABLED(CONFIG_IPV6)
case (htons(ETH_P_IPV6)):
- return __br_multicast_querier_exists(br,
- &br->ip6_other_query, true);
+ return __br_multicast_querier_exists(brmctx,
+ &brmctx->ip6_other_query, true);
#endif
default:
return !!mdb && br_group_is_l2(&mdb->addr);
@@ -923,15 +1096,16 @@ static inline bool br_multicast_is_star_g(const struct br_ip *ip)
}
}
-static inline bool br_multicast_should_handle_mode(const struct net_bridge *br,
- __be16 proto)
+static inline bool
+br_multicast_should_handle_mode(const struct net_bridge_mcast *brmctx,
+ __be16 proto)
{
switch (proto) {
case htons(ETH_P_IP):
- return !!(br->multicast_igmp_version == 3);
+ return !!(brmctx->multicast_igmp_version == 3);
#if IS_ENABLED(CONFIG_IPV6)
case htons(ETH_P_IPV6):
- return !!(br->multicast_mld_version == 2);
+ return !!(brmctx->multicast_mld_version == 2);
#endif
default:
return false;
@@ -943,28 +1117,145 @@ static inline int br_multicast_igmp_type(const struct sk_buff *skb)
return BR_INPUT_SKB_CB(skb)->igmp;
}
-static inline unsigned long br_multicast_lmqt(const struct net_bridge *br)
+static inline unsigned long br_multicast_lmqt(const struct net_bridge_mcast *brmctx)
{
- return br->multicast_last_member_interval *
- br->multicast_last_member_count;
+ return brmctx->multicast_last_member_interval *
+ brmctx->multicast_last_member_count;
}
-static inline unsigned long br_multicast_gmi(const struct net_bridge *br)
+static inline unsigned long br_multicast_gmi(const struct net_bridge_mcast *brmctx)
{
/* use the RFC default of 2 for QRV */
- return 2 * br->multicast_query_interval +
- br->multicast_query_response_interval;
+ return 2 * brmctx->multicast_query_interval +
+ brmctx->multicast_query_response_interval;
}
+
+static inline bool
+br_multicast_ctx_is_vlan(const struct net_bridge_mcast *brmctx)
+{
+ return !!brmctx->vlan;
+}
+
+static inline bool
+br_multicast_port_ctx_is_vlan(const struct net_bridge_mcast_port *pmctx)
+{
+ return !!pmctx->vlan;
+}
+
+static inline struct net_bridge_mcast *
+br_multicast_port_ctx_get_global(const struct net_bridge_mcast_port *pmctx)
+{
+ if (!br_multicast_port_ctx_is_vlan(pmctx))
+ return &pmctx->port->br->multicast_ctx;
+ else
+ return &pmctx->vlan->brvlan->br_mcast_ctx;
+}
+
+static inline bool
+br_multicast_ctx_vlan_global_disabled(const struct net_bridge_mcast *brmctx)
+{
+ return br_opt_get(brmctx->br, BROPT_MCAST_VLAN_SNOOPING_ENABLED) &&
+ br_multicast_ctx_is_vlan(brmctx) &&
+ !(brmctx->vlan->priv_flags & BR_VLFLAG_GLOBAL_MCAST_ENABLED);
+}
+
+static inline bool
+br_multicast_ctx_vlan_disabled(const struct net_bridge_mcast *brmctx)
+{
+ return br_multicast_ctx_is_vlan(brmctx) &&
+ !(brmctx->vlan->priv_flags & BR_VLFLAG_MCAST_ENABLED);
+}
+
+static inline bool
+br_multicast_port_ctx_vlan_disabled(const struct net_bridge_mcast_port *pmctx)
+{
+ return br_multicast_port_ctx_is_vlan(pmctx) &&
+ !(pmctx->vlan->priv_flags & BR_VLFLAG_MCAST_ENABLED);
+}
+
+static inline bool
+br_multicast_port_ctx_state_disabled(const struct net_bridge_mcast_port *pmctx)
+{
+ return pmctx->port->state == BR_STATE_DISABLED ||
+ (br_multicast_port_ctx_is_vlan(pmctx) &&
+ (br_multicast_port_ctx_vlan_disabled(pmctx) ||
+ pmctx->vlan->state == BR_STATE_DISABLED));
+}
+
+static inline bool
+br_multicast_port_ctx_state_stopped(const struct net_bridge_mcast_port *pmctx)
+{
+ return br_multicast_port_ctx_state_disabled(pmctx) ||
+ pmctx->port->state == BR_STATE_BLOCKING ||
+ (br_multicast_port_ctx_is_vlan(pmctx) &&
+ pmctx->vlan->state == BR_STATE_BLOCKING);
+}
+
+static inline bool
+br_rports_have_mc_router(const struct net_bridge_mcast *brmctx)
+{
+#if IS_ENABLED(CONFIG_IPV6)
+ return !hlist_empty(&brmctx->ip4_mc_router_list) ||
+ !hlist_empty(&brmctx->ip6_mc_router_list);
#else
-static inline int br_multicast_rcv(struct net_bridge *br,
- struct net_bridge_port *port,
+ return !hlist_empty(&brmctx->ip4_mc_router_list);
+#endif
+}
+
+static inline bool
+br_multicast_ctx_options_equal(const struct net_bridge_mcast *brmctx1,
+ const struct net_bridge_mcast *brmctx2)
+{
+ return brmctx1->multicast_igmp_version ==
+ brmctx2->multicast_igmp_version &&
+ brmctx1->multicast_last_member_count ==
+ brmctx2->multicast_last_member_count &&
+ brmctx1->multicast_startup_query_count ==
+ brmctx2->multicast_startup_query_count &&
+ brmctx1->multicast_last_member_interval ==
+ brmctx2->multicast_last_member_interval &&
+ brmctx1->multicast_membership_interval ==
+ brmctx2->multicast_membership_interval &&
+ brmctx1->multicast_querier_interval ==
+ brmctx2->multicast_querier_interval &&
+ brmctx1->multicast_query_interval ==
+ brmctx2->multicast_query_interval &&
+ brmctx1->multicast_query_response_interval ==
+ brmctx2->multicast_query_response_interval &&
+ brmctx1->multicast_startup_query_interval ==
+ brmctx2->multicast_startup_query_interval &&
+ brmctx1->multicast_querier == brmctx2->multicast_querier &&
+ brmctx1->multicast_router == brmctx2->multicast_router &&
+ !br_rports_have_mc_router(brmctx1) &&
+ !br_rports_have_mc_router(brmctx2) &&
+#if IS_ENABLED(CONFIG_IPV6)
+ brmctx1->multicast_mld_version ==
+ brmctx2->multicast_mld_version &&
+#endif
+ true;
+}
+
+static inline bool
+br_multicast_ctx_matches_vlan_snooping(const struct net_bridge_mcast *brmctx)
+{
+ bool vlan_snooping_enabled;
+
+ vlan_snooping_enabled = !!br_opt_get(brmctx->br,
+ BROPT_MCAST_VLAN_SNOOPING_ENABLED);
+
+ return !!(vlan_snooping_enabled == br_multicast_ctx_is_vlan(brmctx));
+}
+#else
+static inline int br_multicast_rcv(struct net_bridge_mcast **brmctx,
+ struct net_bridge_mcast_port **pmctx,
+ struct net_bridge_vlan *vlan,
struct sk_buff *skb,
u16 vid)
{
return 0;
}
-static inline struct net_bridge_mdb_entry *br_mdb_get(struct net_bridge *br,
+static inline struct net_bridge_mdb_entry *br_mdb_get(struct net_bridge_mcast *brmctx,
struct sk_buff *skb, u16 vid)
{
return NULL;
@@ -1013,16 +1304,18 @@ static inline void br_multicast_dev_del(struct net_bridge *br)
static inline void br_multicast_flood(struct net_bridge_mdb_entry *mdst,
struct sk_buff *skb,
+ struct net_bridge_mcast *brmctx,
bool local_rcv, bool local_orig)
{
}
-static inline bool br_multicast_is_router(struct net_bridge *br)
+static inline bool br_multicast_is_router(struct net_bridge_mcast *brmctx,
+ struct sk_buff *skb)
{
return false;
}
-static inline bool br_multicast_querier_exists(struct net_bridge *br,
+static inline bool br_multicast_querier_exists(struct net_bridge_mcast *brmctx,
struct ethhdr *eth,
const struct net_bridge_mdb_entry *mdb)
{
@@ -1066,13 +1359,67 @@ static inline int br_multicast_igmp_type(const struct sk_buff *skb)
{
return 0;
}
+
+static inline void br_multicast_ctx_init(struct net_bridge *br,
+ struct net_bridge_vlan *vlan,
+ struct net_bridge_mcast *brmctx)
+{
+}
+
+static inline void br_multicast_ctx_deinit(struct net_bridge_mcast *brmctx)
+{
+}
+
+static inline void br_multicast_port_ctx_init(struct net_bridge_port *port,
+ struct net_bridge_vlan *vlan,
+ struct net_bridge_mcast_port *pmctx)
+{
+}
+
+static inline void br_multicast_port_ctx_deinit(struct net_bridge_mcast_port *pmctx)
+{
+}
+
+static inline void br_multicast_toggle_one_vlan(struct net_bridge_vlan *vlan,
+ bool on)
+{
+}
+
+static inline int br_multicast_toggle_vlan_snooping(struct net_bridge *br,
+ bool on,
+ struct netlink_ext_ack *extack)
+{
+ return -EOPNOTSUPP;
+}
+
+static inline bool br_multicast_toggle_global_vlan(struct net_bridge_vlan *vlan,
+ bool on)
+{
+ return false;
+}
+
+static inline int br_mdb_replay(struct net_device *br_dev,
+ struct net_device *dev, const void *ctx,
+ bool adding, struct notifier_block *nb,
+ struct netlink_ext_ack *extack)
+{
+ return -EOPNOTSUPP;
+}
+
+static inline bool
+br_multicast_ctx_options_equal(const struct net_bridge_mcast *brmctx1,
+ const struct net_bridge_mcast *brmctx2)
+{
+ return true;
+}
#endif
/* br_vlan.c */
#ifdef CONFIG_BRIDGE_VLAN_FILTERING
bool br_allowed_ingress(const struct net_bridge *br,
struct net_bridge_vlan_group *vg, struct sk_buff *skb,
- u16 *vid, u8 *state);
+ u16 *vid, u8 *state,
+ struct net_bridge_vlan **vlan);
bool br_allowed_egress(struct net_bridge_vlan_group *vg,
const struct sk_buff *skb);
bool br_should_learn(struct net_bridge_port *p, struct sk_buff *skb, u16 *vid);
@@ -1116,6 +1463,9 @@ void br_vlan_notify(const struct net_bridge *br,
const struct net_bridge_port *p,
u16 vid, u16 vid_range,
int cmd);
+int br_vlan_replay(struct net_device *br_dev, struct net_device *dev,
+ const void *ctx, bool adding, struct notifier_block *nb,
+ struct netlink_ext_ack *extack);
bool br_vlan_can_enter_range(const struct net_bridge_vlan *v_curr,
const struct net_bridge_vlan *range_end);
@@ -1184,8 +1534,11 @@ static inline u16 br_vlan_flags(const struct net_bridge_vlan *v, u16 pvid)
static inline bool br_allowed_ingress(const struct net_bridge *br,
struct net_bridge_vlan_group *vg,
struct sk_buff *skb,
- u16 *vid, u8 *state)
+ u16 *vid, u8 *state,
+ struct net_bridge_vlan **vlan)
+
{
+ *vlan = NULL;
return true;
}
@@ -1358,6 +1711,14 @@ static inline bool br_vlan_can_enter_range(const struct net_bridge_vlan *v_curr,
{
return true;
}
+
+static inline int br_vlan_replay(struct net_device *br_dev,
+ struct net_device *dev, const void *ctx,
+ bool adding, struct notifier_block *nb,
+ struct netlink_ext_ack *extack)
+{
+ return -EOPNOTSUPP;
+}
#endif
/* br_vlan_options.c */
@@ -1372,6 +1733,14 @@ int br_vlan_process_options(const struct net_bridge *br,
struct net_bridge_vlan *range_end,
struct nlattr **tb,
struct netlink_ext_ack *extack);
+int br_vlan_rtm_process_global_options(struct net_device *dev,
+ const struct nlattr *attr,
+ int cmd,
+ struct netlink_ext_ack *extack);
+bool br_vlan_global_opts_can_enter_range(const struct net_bridge_vlan *v_curr,
+ const struct net_bridge_vlan *r_end);
+bool br_vlan_global_opts_fill(struct sk_buff *skb, u16 vid, u16 vid_range,
+ const struct net_bridge_vlan *v_opts);
/* vlan state manipulation helpers using *_ONCE to annotate lock-free access */
static inline u8 br_vlan_get_state(const struct net_bridge_vlan *v)
@@ -1593,7 +1962,25 @@ static inline void br_sysfs_delbr(struct net_device *dev) { return; }
/* br_switchdev.c */
#ifdef CONFIG_NET_SWITCHDEV
-int nbp_switchdev_mark_set(struct net_bridge_port *p);
+int br_switchdev_port_offload(struct net_bridge_port *p,
+ struct net_device *dev, const void *ctx,
+ struct notifier_block *atomic_nb,
+ struct notifier_block *blocking_nb,
+ bool tx_fwd_offload,
+ struct netlink_ext_ack *extack);
+
+void br_switchdev_port_unoffload(struct net_bridge_port *p, const void *ctx,
+ struct notifier_block *atomic_nb,
+ struct notifier_block *blocking_nb);
+
+bool br_switchdev_frame_uses_tx_fwd_offload(struct sk_buff *skb);
+
+void br_switchdev_frame_set_offload_fwd_mark(struct sk_buff *skb);
+
+void nbp_switchdev_frame_mark_tx_fwd_offload(const struct net_bridge_port *p,
+ struct sk_buff *skb);
+void nbp_switchdev_frame_mark_tx_fwd_to_hwdom(const struct net_bridge_port *p,
+ struct sk_buff *skb);
void nbp_switchdev_frame_mark(const struct net_bridge_port *p,
struct sk_buff *skb);
bool nbp_switchdev_allowed_egress(const struct net_bridge_port *p,
@@ -1602,20 +1989,55 @@ int br_switchdev_set_port_flag(struct net_bridge_port *p,
unsigned long flags,
unsigned long mask,
struct netlink_ext_ack *extack);
-void br_switchdev_fdb_notify(const struct net_bridge_fdb_entry *fdb,
- int type);
+void br_switchdev_fdb_notify(struct net_bridge *br,
+ const struct net_bridge_fdb_entry *fdb, int type);
int br_switchdev_port_vlan_add(struct net_device *dev, u16 vid, u16 flags,
struct netlink_ext_ack *extack);
int br_switchdev_port_vlan_del(struct net_device *dev, u16 vid);
+void br_switchdev_init(struct net_bridge *br);
static inline void br_switchdev_frame_unmark(struct sk_buff *skb)
{
skb->offload_fwd_mark = 0;
}
#else
-static inline int nbp_switchdev_mark_set(struct net_bridge_port *p)
+static inline int
+br_switchdev_port_offload(struct net_bridge_port *p,
+ struct net_device *dev, const void *ctx,
+ struct notifier_block *atomic_nb,
+ struct notifier_block *blocking_nb,
+ bool tx_fwd_offload,
+ struct netlink_ext_ack *extack)
+{
+ return -EOPNOTSUPP;
+}
+
+static inline void
+br_switchdev_port_unoffload(struct net_bridge_port *p, const void *ctx,
+ struct notifier_block *atomic_nb,
+ struct notifier_block *blocking_nb)
+{
+}
+
+static inline bool br_switchdev_frame_uses_tx_fwd_offload(struct sk_buff *skb)
+{
+ return false;
+}
+
+static inline void br_switchdev_frame_set_offload_fwd_mark(struct sk_buff *skb)
+{
+}
+
+static inline void
+nbp_switchdev_frame_mark_tx_fwd_offload(const struct net_bridge_port *p,
+ struct sk_buff *skb)
+{
+}
+
+static inline void
+nbp_switchdev_frame_mark_tx_fwd_to_hwdom(const struct net_bridge_port *p,
+ struct sk_buff *skb)
{
- return 0;
}
static inline void nbp_switchdev_frame_mark(const struct net_bridge_port *p,
@@ -1650,13 +2072,19 @@ static inline int br_switchdev_port_vlan_del(struct net_device *dev, u16 vid)
}
static inline void
-br_switchdev_fdb_notify(const struct net_bridge_fdb_entry *fdb, int type)
+br_switchdev_fdb_notify(struct net_bridge *br,
+ const struct net_bridge_fdb_entry *fdb, int type)
{
}
static inline void br_switchdev_frame_unmark(struct sk_buff *skb)
{
}
+
+static inline void br_switchdev_init(struct net_bridge *br)
+{
+}
+
#endif /* CONFIG_NET_SWITCHDEV */
/* br_arp_nd_proxy.c */
diff --git a/net/bridge/br_private_mcast_eht.h b/net/bridge/br_private_mcast_eht.h
index f89049f4892c..adf82a05515a 100644
--- a/net/bridge/br_private_mcast_eht.h
+++ b/net/bridge/br_private_mcast_eht.h
@@ -51,7 +51,8 @@ struct net_bridge_group_eht_set {
#ifdef CONFIG_BRIDGE_IGMP_SNOOPING
void br_multicast_eht_clean_sets(struct net_bridge_port_group *pg);
-bool br_multicast_eht_handle(struct net_bridge_port_group *pg,
+bool br_multicast_eht_handle(const struct net_bridge_mcast *brmctx,
+ struct net_bridge_port_group *pg,
void *h_addr,
void *srcs,
u32 nsrcs,
diff --git a/net/bridge/br_private_mrp.h b/net/bridge/br_private_mrp.h
index 9559aa2750fb..bda8e1896712 100644
--- a/net/bridge/br_private_mrp.h
+++ b/net/bridge/br_private_mrp.h
@@ -6,6 +6,8 @@
#include "br_private.h"
#include <uapi/linux/mrp_bridge.h>
+#define MRP_OPT_PADDING 0x2
+
struct br_mrp {
/* list of mrp instances */
struct hlist_node list;
@@ -134,4 +136,13 @@ struct br_mrp_in_test_hdr {
__be32 timestamp;
} __attribute__((__packed__));
+struct br_mrp_oui_hdr {
+ __u8 oui[MRP_OUI_LENGTH];
+};
+
+struct br_mrp_sub_option1_hdr {
+ __u8 type;
+ __u8 data[MRP_MANUFACTURE_DATA_LENGTH];
+};
+
#endif /* _BR_PRIVATE_MRP_H */
diff --git a/net/bridge/br_private_tunnel.h b/net/bridge/br_private_tunnel.h
index c54cc26211d7..2b053289f016 100644
--- a/net/bridge/br_private_tunnel.h
+++ b/net/bridge/br_private_tunnel.h
@@ -38,9 +38,9 @@ int nbp_vlan_tunnel_info_add(const struct net_bridge_port *port, u16 vid,
void nbp_vlan_tunnel_info_flush(struct net_bridge_port *port);
void vlan_tunnel_info_del(struct net_bridge_vlan_group *vg,
struct net_bridge_vlan *vlan);
-int br_handle_ingress_vlan_tunnel(struct sk_buff *skb,
- struct net_bridge_port *p,
- struct net_bridge_vlan_group *vg);
+void br_handle_ingress_vlan_tunnel(struct sk_buff *skb,
+ struct net_bridge_port *p,
+ struct net_bridge_vlan_group *vg);
int br_handle_egress_vlan_tunnel(struct sk_buff *skb,
struct net_bridge_vlan *vlan);
bool vlan_tunid_inrange(const struct net_bridge_vlan *v_curr,
diff --git a/net/bridge/br_stp.c b/net/bridge/br_stp.c
index 3dafb6143cff..1d80f34a139c 100644
--- a/net/bridge/br_stp.c
+++ b/net/bridge/br_stp.c
@@ -639,9 +639,9 @@ int br_set_ageing_time(struct net_bridge *br, clock_t ageing_time)
return 0;
}
-clock_t br_get_ageing_time(struct net_device *br_dev)
+clock_t br_get_ageing_time(const struct net_device *br_dev)
{
- struct net_bridge *br;
+ const struct net_bridge *br;
if (!netif_is_bridge_master(br_dev))
return 0;
diff --git a/net/bridge/br_switchdev.c b/net/bridge/br_switchdev.c
index a5e601e41cb9..6bf518d78f02 100644
--- a/net/bridge/br_switchdev.c
+++ b/net/bridge/br_switchdev.c
@@ -8,50 +8,65 @@
#include "br_private.h"
-static int br_switchdev_mark_get(struct net_bridge *br, struct net_device *dev)
-{
- struct net_bridge_port *p;
+static struct static_key_false br_switchdev_tx_fwd_offload;
- /* dev is yet to be added to the port list. */
- list_for_each_entry(p, &br->port_list, list) {
- if (netdev_port_same_parent_id(dev, p->dev))
- return p->offload_fwd_mark;
- }
+static bool nbp_switchdev_can_offload_tx_fwd(const struct net_bridge_port *p,
+ const struct sk_buff *skb)
+{
+ if (!static_branch_unlikely(&br_switchdev_tx_fwd_offload))
+ return false;
- return ++br->offload_fwd_mark;
+ return (p->flags & BR_TX_FWD_OFFLOAD) &&
+ (p->hwdom != BR_INPUT_SKB_CB(skb)->src_hwdom);
}
-int nbp_switchdev_mark_set(struct net_bridge_port *p)
+bool br_switchdev_frame_uses_tx_fwd_offload(struct sk_buff *skb)
{
- struct netdev_phys_item_id ppid = { };
- int err;
+ if (!static_branch_unlikely(&br_switchdev_tx_fwd_offload))
+ return false;
- ASSERT_RTNL();
+ return BR_INPUT_SKB_CB(skb)->tx_fwd_offload;
+}
- err = dev_get_port_parent_id(p->dev, &ppid, true);
- if (err) {
- if (err == -EOPNOTSUPP)
- return 0;
- return err;
- }
+void br_switchdev_frame_set_offload_fwd_mark(struct sk_buff *skb)
+{
+ skb->offload_fwd_mark = br_switchdev_frame_uses_tx_fwd_offload(skb);
+}
- p->offload_fwd_mark = br_switchdev_mark_get(p->br, p->dev);
+/* Mark the frame for TX forwarding offload if this egress port supports it */
+void nbp_switchdev_frame_mark_tx_fwd_offload(const struct net_bridge_port *p,
+ struct sk_buff *skb)
+{
+ if (nbp_switchdev_can_offload_tx_fwd(p, skb))
+ BR_INPUT_SKB_CB(skb)->tx_fwd_offload = true;
+}
- return 0;
+/* Lazily adds the hwdom of the egress bridge port to the bit mask of hwdoms
+ * that the skb has been already forwarded to, to avoid further cloning to
+ * other ports in the same hwdom by making nbp_switchdev_allowed_egress()
+ * return false.
+ */
+void nbp_switchdev_frame_mark_tx_fwd_to_hwdom(const struct net_bridge_port *p,
+ struct sk_buff *skb)
+{
+ if (nbp_switchdev_can_offload_tx_fwd(p, skb))
+ set_bit(p->hwdom, &BR_INPUT_SKB_CB(skb)->fwd_hwdoms);
}
void nbp_switchdev_frame_mark(const struct net_bridge_port *p,
struct sk_buff *skb)
{
- if (skb->offload_fwd_mark && !WARN_ON_ONCE(!p->offload_fwd_mark))
- BR_INPUT_SKB_CB(skb)->offload_fwd_mark = p->offload_fwd_mark;
+ if (p->hwdom)
+ BR_INPUT_SKB_CB(skb)->src_hwdom = p->hwdom;
}
bool nbp_switchdev_allowed_egress(const struct net_bridge_port *p,
const struct sk_buff *skb)
{
- return !skb->offload_fwd_mark ||
- BR_INPUT_SKB_CB(skb)->offload_fwd_mark != p->offload_fwd_mark;
+ struct br_input_skb_cb *cb = BR_INPUT_SKB_CB(skb);
+
+ return !test_bit(p->hwdom, &cb->fwd_hwdoms) &&
+ (!skb->offload_fwd_mark || cb->src_hwdom != p->hwdom);
}
/* Flags that can be offloaded to hardware */
@@ -108,8 +123,10 @@ int br_switchdev_set_port_flag(struct net_bridge_port *p,
}
void
-br_switchdev_fdb_notify(const struct net_bridge_fdb_entry *fdb, int type)
+br_switchdev_fdb_notify(struct net_bridge *br,
+ const struct net_bridge_fdb_entry *fdb, int type)
{
+ const struct net_bridge_port *dst = READ_ONCE(fdb->dst);
struct switchdev_notifier_fdb_info info = {
.addr = fdb->key.addr.addr,
.vid = fdb->key.vlan_id,
@@ -117,18 +134,16 @@ br_switchdev_fdb_notify(const struct net_bridge_fdb_entry *fdb, int type)
.is_local = test_bit(BR_FDB_LOCAL, &fdb->flags),
.offloaded = test_bit(BR_FDB_OFFLOADED, &fdb->flags),
};
-
- if (!fdb->dst)
- return;
+ struct net_device *dev = (!dst || info.is_local) ? br->dev : dst->dev;
switch (type) {
case RTM_DELNEIGH:
call_switchdev_notifiers(SWITCHDEV_FDB_DEL_TO_DEVICE,
- fdb->dst->dev, &info.info, NULL);
+ dev, &info.info, NULL);
break;
case RTM_NEWNEIGH:
call_switchdev_notifiers(SWITCHDEV_FDB_ADD_TO_DEVICE,
- fdb->dst->dev, &info.info, NULL);
+ dev, &info.info, NULL);
break;
}
}
@@ -156,3 +171,182 @@ int br_switchdev_port_vlan_del(struct net_device *dev, u16 vid)
return switchdev_port_obj_del(dev, &v.obj);
}
+
+static int nbp_switchdev_hwdom_set(struct net_bridge_port *joining)
+{
+ struct net_bridge *br = joining->br;
+ struct net_bridge_port *p;
+ int hwdom;
+
+ /* joining is yet to be added to the port list. */
+ list_for_each_entry(p, &br->port_list, list) {
+ if (netdev_phys_item_id_same(&joining->ppid, &p->ppid)) {
+ joining->hwdom = p->hwdom;
+ return 0;
+ }
+ }
+
+ hwdom = find_next_zero_bit(&br->busy_hwdoms, BR_HWDOM_MAX, 1);
+ if (hwdom >= BR_HWDOM_MAX)
+ return -EBUSY;
+
+ set_bit(hwdom, &br->busy_hwdoms);
+ joining->hwdom = hwdom;
+ return 0;
+}
+
+static void nbp_switchdev_hwdom_put(struct net_bridge_port *leaving)
+{
+ struct net_bridge *br = leaving->br;
+ struct net_bridge_port *p;
+
+ /* leaving is no longer in the port list. */
+ list_for_each_entry(p, &br->port_list, list) {
+ if (p->hwdom == leaving->hwdom)
+ return;
+ }
+
+ clear_bit(leaving->hwdom, &br->busy_hwdoms);
+}
+
+static int nbp_switchdev_add(struct net_bridge_port *p,
+ struct netdev_phys_item_id ppid,
+ bool tx_fwd_offload,
+ struct netlink_ext_ack *extack)
+{
+ int err;
+
+ if (p->offload_count) {
+ /* Prevent unsupported configurations such as a bridge port
+ * which is a bonding interface, and the member ports are from
+ * different hardware switches.
+ */
+ if (!netdev_phys_item_id_same(&p->ppid, &ppid)) {
+ NL_SET_ERR_MSG_MOD(extack,
+ "Same bridge port cannot be offloaded by two physical switches");
+ return -EBUSY;
+ }
+
+ /* Tolerate drivers that call switchdev_bridge_port_offload()
+ * more than once for the same bridge port, such as when the
+ * bridge port is an offloaded bonding/team interface.
+ */
+ p->offload_count++;
+
+ return 0;
+ }
+
+ p->ppid = ppid;
+ p->offload_count = 1;
+
+ err = nbp_switchdev_hwdom_set(p);
+ if (err)
+ return err;
+
+ if (tx_fwd_offload) {
+ p->flags |= BR_TX_FWD_OFFLOAD;
+ static_branch_inc(&br_switchdev_tx_fwd_offload);
+ }
+
+ return 0;
+}
+
+static void nbp_switchdev_del(struct net_bridge_port *p)
+{
+ if (WARN_ON(!p->offload_count))
+ return;
+
+ p->offload_count--;
+
+ if (p->offload_count)
+ return;
+
+ if (p->hwdom)
+ nbp_switchdev_hwdom_put(p);
+
+ if (p->flags & BR_TX_FWD_OFFLOAD) {
+ p->flags &= ~BR_TX_FWD_OFFLOAD;
+ static_branch_dec(&br_switchdev_tx_fwd_offload);
+ }
+}
+
+static int nbp_switchdev_sync_objs(struct net_bridge_port *p, const void *ctx,
+ struct notifier_block *atomic_nb,
+ struct notifier_block *blocking_nb,
+ struct netlink_ext_ack *extack)
+{
+ struct net_device *br_dev = p->br->dev;
+ struct net_device *dev = p->dev;
+ int err;
+
+ err = br_vlan_replay(br_dev, dev, ctx, true, blocking_nb, extack);
+ if (err && err != -EOPNOTSUPP)
+ return err;
+
+ err = br_mdb_replay(br_dev, dev, ctx, true, blocking_nb, extack);
+ if (err && err != -EOPNOTSUPP)
+ return err;
+
+ err = br_fdb_replay(br_dev, ctx, true, atomic_nb);
+ if (err && err != -EOPNOTSUPP)
+ return err;
+
+ return 0;
+}
+
+static void nbp_switchdev_unsync_objs(struct net_bridge_port *p,
+ const void *ctx,
+ struct notifier_block *atomic_nb,
+ struct notifier_block *blocking_nb)
+{
+ struct net_device *br_dev = p->br->dev;
+ struct net_device *dev = p->dev;
+
+ br_vlan_replay(br_dev, dev, ctx, false, blocking_nb, NULL);
+
+ br_mdb_replay(br_dev, dev, ctx, false, blocking_nb, NULL);
+
+ br_fdb_replay(br_dev, ctx, false, atomic_nb);
+}
+
+/* Let the bridge know that this port is offloaded, so that it can assign a
+ * switchdev hardware domain to it.
+ */
+int br_switchdev_port_offload(struct net_bridge_port *p,
+ struct net_device *dev, const void *ctx,
+ struct notifier_block *atomic_nb,
+ struct notifier_block *blocking_nb,
+ bool tx_fwd_offload,
+ struct netlink_ext_ack *extack)
+{
+ struct netdev_phys_item_id ppid;
+ int err;
+
+ err = dev_get_port_parent_id(dev, &ppid, false);
+ if (err)
+ return err;
+
+ err = nbp_switchdev_add(p, ppid, tx_fwd_offload, extack);
+ if (err)
+ return err;
+
+ err = nbp_switchdev_sync_objs(p, ctx, atomic_nb, blocking_nb, extack);
+ if (err)
+ goto out_switchdev_del;
+
+ return 0;
+
+out_switchdev_del:
+ nbp_switchdev_del(p);
+
+ return err;
+}
+
+void br_switchdev_port_unoffload(struct net_bridge_port *p, const void *ctx,
+ struct notifier_block *atomic_nb,
+ struct notifier_block *blocking_nb)
+{
+ nbp_switchdev_unsync_objs(p, ctx, atomic_nb, blocking_nb);
+
+ nbp_switchdev_del(p);
+}
diff --git a/net/bridge/br_sysfs_br.c b/net/bridge/br_sysfs_br.c
index 381467b691d5..d9a89ddd0331 100644
--- a/net/bridge/br_sysfs_br.c
+++ b/net/bridge/br_sysfs_br.c
@@ -384,13 +384,13 @@ static ssize_t multicast_router_show(struct device *d,
struct device_attribute *attr, char *buf)
{
struct net_bridge *br = to_bridge(d);
- return sprintf(buf, "%d\n", br->multicast_router);
+ return sprintf(buf, "%d\n", br->multicast_ctx.multicast_router);
}
static int set_multicast_router(struct net_bridge *br, unsigned long val,
struct netlink_ext_ack *extack)
{
- return br_multicast_set_router(br, val);
+ return br_multicast_set_router(&br->multicast_ctx, val);
}
static ssize_t multicast_router_store(struct device *d,
@@ -447,13 +447,13 @@ static ssize_t multicast_querier_show(struct device *d,
char *buf)
{
struct net_bridge *br = to_bridge(d);
- return sprintf(buf, "%d\n", br_opt_get(br, BROPT_MULTICAST_QUERIER));
+ return sprintf(buf, "%d\n", br->multicast_ctx.multicast_querier);
}
static int set_multicast_querier(struct net_bridge *br, unsigned long val,
struct netlink_ext_ack *extack)
{
- return br_multicast_set_querier(br, val);
+ return br_multicast_set_querier(&br->multicast_ctx, val);
}
static ssize_t multicast_querier_store(struct device *d,
@@ -514,13 +514,13 @@ static ssize_t multicast_igmp_version_show(struct device *d,
{
struct net_bridge *br = to_bridge(d);
- return sprintf(buf, "%u\n", br->multicast_igmp_version);
+ return sprintf(buf, "%u\n", br->multicast_ctx.multicast_igmp_version);
}
static int set_multicast_igmp_version(struct net_bridge *br, unsigned long val,
struct netlink_ext_ack *extack)
{
- return br_multicast_set_igmp_version(br, val);
+ return br_multicast_set_igmp_version(&br->multicast_ctx, val);
}
static ssize_t multicast_igmp_version_store(struct device *d,
@@ -536,13 +536,13 @@ static ssize_t multicast_last_member_count_show(struct device *d,
char *buf)
{
struct net_bridge *br = to_bridge(d);
- return sprintf(buf, "%u\n", br->multicast_last_member_count);
+ return sprintf(buf, "%u\n", br->multicast_ctx.multicast_last_member_count);
}
static int set_last_member_count(struct net_bridge *br, unsigned long val,
struct netlink_ext_ack *extack)
{
- br->multicast_last_member_count = val;
+ br->multicast_ctx.multicast_last_member_count = val;
return 0;
}
@@ -558,13 +558,13 @@ static ssize_t multicast_startup_query_count_show(
struct device *d, struct device_attribute *attr, char *buf)
{
struct net_bridge *br = to_bridge(d);
- return sprintf(buf, "%u\n", br->multicast_startup_query_count);
+ return sprintf(buf, "%u\n", br->multicast_ctx.multicast_startup_query_count);
}
static int set_startup_query_count(struct net_bridge *br, unsigned long val,
struct netlink_ext_ack *extack)
{
- br->multicast_startup_query_count = val;
+ br->multicast_ctx.multicast_startup_query_count = val;
return 0;
}
@@ -581,13 +581,13 @@ static ssize_t multicast_last_member_interval_show(
{
struct net_bridge *br = to_bridge(d);
return sprintf(buf, "%lu\n",
- jiffies_to_clock_t(br->multicast_last_member_interval));
+ jiffies_to_clock_t(br->multicast_ctx.multicast_last_member_interval));
}
static int set_last_member_interval(struct net_bridge *br, unsigned long val,
struct netlink_ext_ack *extack)
{
- br->multicast_last_member_interval = clock_t_to_jiffies(val);
+ br->multicast_ctx.multicast_last_member_interval = clock_t_to_jiffies(val);
return 0;
}
@@ -604,13 +604,13 @@ static ssize_t multicast_membership_interval_show(
{
struct net_bridge *br = to_bridge(d);
return sprintf(buf, "%lu\n",
- jiffies_to_clock_t(br->multicast_membership_interval));
+ jiffies_to_clock_t(br->multicast_ctx.multicast_membership_interval));
}
static int set_membership_interval(struct net_bridge *br, unsigned long val,
struct netlink_ext_ack *extack)
{
- br->multicast_membership_interval = clock_t_to_jiffies(val);
+ br->multicast_ctx.multicast_membership_interval = clock_t_to_jiffies(val);
return 0;
}
@@ -628,13 +628,13 @@ static ssize_t multicast_querier_interval_show(struct device *d,
{
struct net_bridge *br = to_bridge(d);
return sprintf(buf, "%lu\n",
- jiffies_to_clock_t(br->multicast_querier_interval));
+ jiffies_to_clock_t(br->multicast_ctx.multicast_querier_interval));
}
static int set_querier_interval(struct net_bridge *br, unsigned long val,
struct netlink_ext_ack *extack)
{
- br->multicast_querier_interval = clock_t_to_jiffies(val);
+ br->multicast_ctx.multicast_querier_interval = clock_t_to_jiffies(val);
return 0;
}
@@ -652,13 +652,13 @@ static ssize_t multicast_query_interval_show(struct device *d,
{
struct net_bridge *br = to_bridge(d);
return sprintf(buf, "%lu\n",
- jiffies_to_clock_t(br->multicast_query_interval));
+ jiffies_to_clock_t(br->multicast_ctx.multicast_query_interval));
}
static int set_query_interval(struct net_bridge *br, unsigned long val,
struct netlink_ext_ack *extack)
{
- br->multicast_query_interval = clock_t_to_jiffies(val);
+ br->multicast_ctx.multicast_query_interval = clock_t_to_jiffies(val);
return 0;
}
@@ -676,13 +676,13 @@ static ssize_t multicast_query_response_interval_show(
struct net_bridge *br = to_bridge(d);
return sprintf(
buf, "%lu\n",
- jiffies_to_clock_t(br->multicast_query_response_interval));
+ jiffies_to_clock_t(br->multicast_ctx.multicast_query_response_interval));
}
static int set_query_response_interval(struct net_bridge *br, unsigned long val,
struct netlink_ext_ack *extack)
{
- br->multicast_query_response_interval = clock_t_to_jiffies(val);
+ br->multicast_ctx.multicast_query_response_interval = clock_t_to_jiffies(val);
return 0;
}
@@ -700,13 +700,13 @@ static ssize_t multicast_startup_query_interval_show(
struct net_bridge *br = to_bridge(d);
return sprintf(
buf, "%lu\n",
- jiffies_to_clock_t(br->multicast_startup_query_interval));
+ jiffies_to_clock_t(br->multicast_ctx.multicast_startup_query_interval));
}
static int set_startup_query_interval(struct net_bridge *br, unsigned long val,
struct netlink_ext_ack *extack)
{
- br->multicast_startup_query_interval = clock_t_to_jiffies(val);
+ br->multicast_ctx.multicast_startup_query_interval = clock_t_to_jiffies(val);
return 0;
}
@@ -751,13 +751,13 @@ static ssize_t multicast_mld_version_show(struct device *d,
{
struct net_bridge *br = to_bridge(d);
- return sprintf(buf, "%u\n", br->multicast_mld_version);
+ return sprintf(buf, "%u\n", br->multicast_ctx.multicast_mld_version);
}
static int set_multicast_mld_version(struct net_bridge *br, unsigned long val,
struct netlink_ext_ack *extack)
{
- return br_multicast_set_mld_version(br, val);
+ return br_multicast_set_mld_version(&br->multicast_ctx, val);
}
static ssize_t multicast_mld_version_store(struct device *d,
diff --git a/net/bridge/br_sysfs_if.c b/net/bridge/br_sysfs_if.c
index 72e92376eef1..07fa76080512 100644
--- a/net/bridge/br_sysfs_if.c
+++ b/net/bridge/br_sysfs_if.c
@@ -244,13 +244,13 @@ BRPORT_ATTR_FLAG(isolated, BR_ISOLATED);
#ifdef CONFIG_BRIDGE_IGMP_SNOOPING
static ssize_t show_multicast_router(struct net_bridge_port *p, char *buf)
{
- return sprintf(buf, "%d\n", p->multicast_router);
+ return sprintf(buf, "%d\n", p->multicast_ctx.multicast_router);
}
static int store_multicast_router(struct net_bridge_port *p,
unsigned long v)
{
- return br_multicast_set_port_router(p, v);
+ return br_multicast_set_port_router(&p->multicast_ctx, v);
}
static BRPORT_ATTR(multicast_router, 0644, show_multicast_router,
store_multicast_router);
diff --git a/net/bridge/br_vlan.c b/net/bridge/br_vlan.c
index da3256a3eed0..19f65ab91a02 100644
--- a/net/bridge/br_vlan.c
+++ b/net/bridge/br_vlan.c
@@ -113,9 +113,7 @@ static void __vlan_add_list(struct net_bridge_vlan *v)
headp = &vg->vlan_list;
list_for_each_prev(hpos, headp) {
vent = list_entry(hpos, struct net_bridge_vlan, vlist);
- if (v->vid < vent->vid)
- continue;
- else
+ if (v->vid >= vent->vid)
break;
}
list_add_rcu(&v->vlist, hpos);
@@ -192,6 +190,8 @@ static void br_vlan_put_master(struct net_bridge_vlan *masterv)
rhashtable_remove_fast(&vg->vlan_hash,
&masterv->vnode, br_vlan_rht_params);
__vlan_del_list(masterv);
+ br_multicast_toggle_one_vlan(masterv, false);
+ br_multicast_ctx_deinit(&masterv->br_mcast_ctx);
call_rcu(&masterv->rcu, br_master_vlan_rcu_free);
}
}
@@ -282,10 +282,13 @@ static int __vlan_add(struct net_bridge_vlan *v, u16 flags,
} else {
v->stats = masterv->stats;
}
+ br_multicast_port_ctx_init(p, v, &v->port_mcast_ctx);
} else {
err = br_switchdev_port_vlan_add(dev, v->vid, flags, extack);
if (err && err != -EOPNOTSUPP)
goto out;
+ br_multicast_ctx_init(br, v, &v->br_mcast_ctx);
+ v->priv_flags |= BR_VLFLAG_GLOBAL_MCAST_ENABLED;
}
/* Add the dev mac and count the vlan only if it's usable */
@@ -308,6 +311,7 @@ static int __vlan_add(struct net_bridge_vlan *v, u16 flags,
__vlan_add_list(v);
__vlan_add_flags(v, flags);
+ br_multicast_toggle_one_vlan(v, true);
if (p)
nbp_vlan_set_vlan_dev_state(p, v->vid);
@@ -376,6 +380,8 @@ static int __vlan_del(struct net_bridge_vlan *v)
br_vlan_rht_params);
__vlan_del_list(v);
nbp_vlan_set_vlan_dev_state(p, v->vid);
+ br_multicast_toggle_one_vlan(v, false);
+ br_multicast_port_ctx_deinit(&v->port_mcast_ctx);
call_rcu(&v->rcu, nbp_vlan_rcu_free);
}
@@ -459,7 +465,15 @@ struct sk_buff *br_handle_vlan(struct net_bridge *br,
u64_stats_update_end(&stats->syncp);
}
- if (v->flags & BRIDGE_VLAN_INFO_UNTAGGED)
+ /* If the skb will be sent using forwarding offload, the assumption is
+ * that the switchdev will inject the packet into hardware together
+ * with the bridge VLAN, so that it can be forwarded according to that
+ * VLAN. The switchdev should deal with popping the VLAN header in
+ * hardware on each egress port as appropriate. So only strip the VLAN
+ * header if forwarding offload is not being used.
+ */
+ if (v->flags & BRIDGE_VLAN_INFO_UNTAGGED &&
+ !br_switchdev_frame_uses_tx_fwd_offload(skb))
__vlan_hwaccel_clear_tag(skb);
if (p && (p->flags & BR_VLAN_TUNNEL) &&
@@ -475,7 +489,8 @@ out:
static bool __allowed_ingress(const struct net_bridge *br,
struct net_bridge_vlan_group *vg,
struct sk_buff *skb, u16 *vid,
- u8 *state)
+ u8 *state,
+ struct net_bridge_vlan **vlan)
{
struct pcpu_sw_netstats *stats;
struct net_bridge_vlan *v;
@@ -540,8 +555,9 @@ static bool __allowed_ingress(const struct net_bridge *br,
*/
skb->vlan_tci |= pvid;
- /* if stats are disabled we can avoid the lookup */
- if (!br_opt_get(br, BROPT_VLAN_STATS_ENABLED)) {
+ /* if snooping and stats are disabled we can avoid the lookup */
+ if (!br_opt_get(br, BROPT_MCAST_VLAN_SNOOPING_ENABLED) &&
+ !br_opt_get(br, BROPT_VLAN_STATS_ENABLED)) {
if (*state == BR_STATE_FORWARDING) {
*state = br_vlan_get_pvid_state(vg);
return br_vlan_state_allowed(*state, true);
@@ -568,6 +584,8 @@ static bool __allowed_ingress(const struct net_bridge *br,
u64_stats_update_end(&stats->syncp);
}
+ *vlan = v;
+
return true;
drop:
@@ -577,17 +595,19 @@ drop:
bool br_allowed_ingress(const struct net_bridge *br,
struct net_bridge_vlan_group *vg, struct sk_buff *skb,
- u16 *vid, u8 *state)
+ u16 *vid, u8 *state,
+ struct net_bridge_vlan **vlan)
{
/* If VLAN filtering is disabled on the bridge, all packets are
* permitted.
*/
+ *vlan = NULL;
if (!br_opt_get(br, BROPT_VLAN_ENABLED)) {
BR_INPUT_SKB_CB(skb)->vlan_filtered = false;
return true;
}
- return __allowed_ingress(br, vg, skb, vid, state);
+ return __allowed_ingress(br, vg, skb, vid, state, vlan);
}
/* Called under RCU. */
@@ -674,6 +694,7 @@ static int br_vlan_add_existing(struct net_bridge *br,
vlan->flags |= BRIDGE_VLAN_INFO_BRENTRY;
vg->num_vlans++;
*changed = true;
+ br_multicast_toggle_one_vlan(vlan, true);
}
if (__vlan_add_flags(vlan, flags))
@@ -820,14 +841,21 @@ int br_vlan_filter_toggle(struct net_bridge *br, unsigned long val,
if (br_opt_get(br, BROPT_VLAN_ENABLED) == !!val)
return 0;
+ br_opt_toggle(br, BROPT_VLAN_ENABLED, !!val);
+
err = switchdev_port_attr_set(br->dev, &attr, extack);
- if (err && err != -EOPNOTSUPP)
+ if (err && err != -EOPNOTSUPP) {
+ br_opt_toggle(br, BROPT_VLAN_ENABLED, !val);
return err;
+ }
- br_opt_toggle(br, BROPT_VLAN_ENABLED, !!val);
br_manage_promisc(br);
recalculate_group_addr(br);
br_recalculate_fwd_mask(br);
+ if (!val && br_opt_get(br, BROPT_MCAST_VLAN_SNOOPING_ENABLED)) {
+ br_info(br, "vlan filtering disabled, automatically disabling multicast vlan snooping\n");
+ br_multicast_toggle_vlan_snooping(br, false, NULL);
+ }
return 0;
}
@@ -1422,6 +1450,33 @@ int br_vlan_get_info(const struct net_device *dev, u16 vid,
}
EXPORT_SYMBOL_GPL(br_vlan_get_info);
+int br_vlan_get_info_rcu(const struct net_device *dev, u16 vid,
+ struct bridge_vlan_info *p_vinfo)
+{
+ struct net_bridge_vlan_group *vg;
+ struct net_bridge_vlan *v;
+ struct net_bridge_port *p;
+
+ p = br_port_get_check_rcu(dev);
+ if (p)
+ vg = nbp_vlan_group_rcu(p);
+ else if (netif_is_bridge_master(dev))
+ vg = br_vlan_group_rcu(netdev_priv(dev));
+ else
+ return -EINVAL;
+
+ v = br_vlan_find(vg, vid);
+ if (!v)
+ return -ENOENT;
+
+ p_vinfo->vid = vid;
+ p_vinfo->flags = v->flags;
+ if (vid == br_get_pvid(vg))
+ p_vinfo->flags |= BRIDGE_VLAN_INFO_PVID;
+ return 0;
+}
+EXPORT_SYMBOL_GPL(br_vlan_get_info_rcu);
+
static int br_vlan_is_bind_vlan_dev(const struct net_device *dev)
{
return is_vlan_dev(dev) &&
@@ -1809,33 +1864,40 @@ out_kfree:
static int br_vlan_replay_one(struct notifier_block *nb,
struct net_device *dev,
struct switchdev_obj_port_vlan *vlan,
+ const void *ctx, unsigned long action,
struct netlink_ext_ack *extack)
{
struct switchdev_notifier_port_obj_info obj_info = {
.info = {
.dev = dev,
.extack = extack,
+ .ctx = ctx,
},
.obj = &vlan->obj,
};
int err;
- err = nb->notifier_call(nb, SWITCHDEV_PORT_OBJ_ADD, &obj_info);
+ err = nb->notifier_call(nb, action, &obj_info);
return notifier_to_errno(err);
}
int br_vlan_replay(struct net_device *br_dev, struct net_device *dev,
- struct notifier_block *nb, struct netlink_ext_ack *extack)
+ const void *ctx, bool adding, struct notifier_block *nb,
+ struct netlink_ext_ack *extack)
{
struct net_bridge_vlan_group *vg;
struct net_bridge_vlan *v;
struct net_bridge_port *p;
struct net_bridge *br;
+ unsigned long action;
int err = 0;
u16 pvid;
ASSERT_RTNL();
+ if (!nb)
+ return 0;
+
if (!netif_is_bridge_master(br_dev))
return -EINVAL;
@@ -1857,6 +1919,11 @@ int br_vlan_replay(struct net_device *br_dev, struct net_device *dev,
if (!vg)
return 0;
+ if (adding)
+ action = SWITCHDEV_PORT_OBJ_ADD;
+ else
+ action = SWITCHDEV_PORT_OBJ_DEL;
+
pvid = br_get_pvid(vg);
list_for_each_entry(v, &vg->vlan_list, vlist) {
@@ -1870,14 +1937,13 @@ int br_vlan_replay(struct net_device *br_dev, struct net_device *dev,
if (!br_vlan_should_use(v))
continue;
- err = br_vlan_replay_one(nb, dev, &vlan, extack);
+ err = br_vlan_replay_one(nb, dev, &vlan, ctx, action, extack);
if (err)
return err;
}
return err;
}
-EXPORT_SYMBOL_GPL(br_vlan_replay);
/* check if v_curr can enter a range ending in range_end */
bool br_vlan_can_enter_range(const struct net_bridge_vlan *v_curr,
@@ -1894,6 +1960,7 @@ static int br_vlan_dump_dev(const struct net_device *dev,
u32 dump_flags)
{
struct net_bridge_vlan *v, *range_start = NULL, *range_end = NULL;
+ bool dump_global = !!(dump_flags & BRIDGE_VLANDB_DUMPF_GLOBAL);
bool dump_stats = !!(dump_flags & BRIDGE_VLANDB_DUMPF_STATS);
struct net_bridge_vlan_group *vg;
int idx = 0, s_idx = cb->args[1];
@@ -1912,6 +1979,10 @@ static int br_vlan_dump_dev(const struct net_device *dev,
vg = br_vlan_group_rcu(br);
p = NULL;
} else {
+ /* global options are dumped only for bridge devices */
+ if (dump_global)
+ return 0;
+
p = br_port_get_rcu(dev);
if (WARN_ON(!p))
return -EINVAL;
@@ -1934,7 +2005,7 @@ static int br_vlan_dump_dev(const struct net_device *dev,
/* idx must stay at range's beginning until it is filled in */
list_for_each_entry_rcu(v, &vg->vlan_list, vlist) {
- if (!br_vlan_should_use(v))
+ if (!dump_global && !br_vlan_should_use(v))
continue;
if (idx < s_idx) {
idx++;
@@ -1947,8 +2018,21 @@ static int br_vlan_dump_dev(const struct net_device *dev,
continue;
}
- if (dump_stats || v->vid == pvid ||
- !br_vlan_can_enter_range(v, range_end)) {
+ if (dump_global) {
+ if (br_vlan_global_opts_can_enter_range(v, range_end))
+ goto update_end;
+ if (!br_vlan_global_opts_fill(skb, range_start->vid,
+ range_end->vid,
+ range_start)) {
+ err = -EMSGSIZE;
+ break;
+ }
+ /* advance number of filled vlans */
+ idx += range_end->vid - range_start->vid + 1;
+
+ range_start = v;
+ } else if (dump_stats || v->vid == pvid ||
+ !br_vlan_can_enter_range(v, range_end)) {
u16 vlan_flags = br_vlan_flags(range_start, pvid);
if (!br_vlan_fill_vids(skb, range_start->vid,
@@ -1962,6 +2046,7 @@ static int br_vlan_dump_dev(const struct net_device *dev,
range_start = v;
}
+update_end:
range_end = v;
}
@@ -1970,11 +2055,18 @@ static int br_vlan_dump_dev(const struct net_device *dev,
* - last vlan (range_start == range_end, not in range)
* - last vlan range (range_start != range_end, in range)
*/
- if (!err && range_start &&
- !br_vlan_fill_vids(skb, range_start->vid, range_end->vid,
- range_start, br_vlan_flags(range_start, pvid),
- dump_stats))
- err = -EMSGSIZE;
+ if (!err && range_start) {
+ if (dump_global &&
+ !br_vlan_global_opts_fill(skb, range_start->vid,
+ range_end->vid, range_start))
+ err = -EMSGSIZE;
+ else if (!dump_global &&
+ !br_vlan_fill_vids(skb, range_start->vid,
+ range_end->vid, range_start,
+ br_vlan_flags(range_start, pvid),
+ dump_stats))
+ err = -EMSGSIZE;
+ }
cb->args[1] = err ? idx : 0;
@@ -2044,6 +2136,7 @@ static const struct nla_policy br_vlan_db_policy[BRIDGE_VLANDB_ENTRY_MAX + 1] =
[BRIDGE_VLANDB_ENTRY_RANGE] = { .type = NLA_U16 },
[BRIDGE_VLANDB_ENTRY_STATE] = { .type = NLA_U8 },
[BRIDGE_VLANDB_ENTRY_TUNNEL_INFO] = { .type = NLA_NESTED },
+ [BRIDGE_VLANDB_ENTRY_MCAST_ROUTER] = { .type = NLA_U8 },
};
static int br_vlan_rtm_process_one(struct net_device *dev,
@@ -2178,12 +2271,22 @@ static int br_vlan_rtm_process(struct sk_buff *skb, struct nlmsghdr *nlh,
}
nlmsg_for_each_attr(attr, nlh, sizeof(*bvm), rem) {
- if (nla_type(attr) != BRIDGE_VLANDB_ENTRY)
+ switch (nla_type(attr)) {
+ case BRIDGE_VLANDB_ENTRY:
+ err = br_vlan_rtm_process_one(dev, attr,
+ nlh->nlmsg_type,
+ extack);
+ break;
+ case BRIDGE_VLANDB_GLOBAL_OPTIONS:
+ err = br_vlan_rtm_process_global_options(dev, attr,
+ nlh->nlmsg_type,
+ extack);
+ break;
+ default:
continue;
+ }
vlans++;
- err = br_vlan_rtm_process_one(dev, attr, nlh->nlmsg_type,
- extack);
if (err)
break;
}
diff --git a/net/bridge/br_vlan_options.c b/net/bridge/br_vlan_options.c
index b4add9ea8964..8ffd4ed2563c 100644
--- a/net/bridge/br_vlan_options.c
+++ b/net/bridge/br_vlan_options.c
@@ -40,22 +40,38 @@ static bool __vlan_tun_can_enter_range(const struct net_bridge_vlan *v_curr,
bool br_vlan_opts_eq_range(const struct net_bridge_vlan *v_curr,
const struct net_bridge_vlan *range_end)
{
+ u8 range_mc_rtr = br_vlan_multicast_router(range_end);
+ u8 curr_mc_rtr = br_vlan_multicast_router(v_curr);
+
return v_curr->state == range_end->state &&
- __vlan_tun_can_enter_range(v_curr, range_end);
+ __vlan_tun_can_enter_range(v_curr, range_end) &&
+ curr_mc_rtr == range_mc_rtr;
}
bool br_vlan_opts_fill(struct sk_buff *skb, const struct net_bridge_vlan *v)
{
- return !nla_put_u8(skb, BRIDGE_VLANDB_ENTRY_STATE,
- br_vlan_get_state(v)) &&
- __vlan_tun_put(skb, v);
+ if (nla_put_u8(skb, BRIDGE_VLANDB_ENTRY_STATE, br_vlan_get_state(v)) ||
+ !__vlan_tun_put(skb, v))
+ return false;
+
+#ifdef CONFIG_BRIDGE_IGMP_SNOOPING
+ if (nla_put_u8(skb, BRIDGE_VLANDB_ENTRY_MCAST_ROUTER,
+ br_vlan_multicast_router(v)))
+ return false;
+#endif
+
+ return true;
}
size_t br_vlan_opts_nl_size(void)
{
return nla_total_size(sizeof(u8)) /* BRIDGE_VLANDB_ENTRY_STATE */
+ nla_total_size(0) /* BRIDGE_VLANDB_ENTRY_TUNNEL_INFO */
- + nla_total_size(sizeof(u32)); /* BRIDGE_VLANDB_TINFO_ID */
+ + nla_total_size(sizeof(u32)) /* BRIDGE_VLANDB_TINFO_ID */
+#ifdef CONFIG_BRIDGE_IGMP_SNOOPING
+ + nla_total_size(sizeof(u8)) /* BRIDGE_VLANDB_ENTRY_MCAST_ROUTER */
+#endif
+ + 0;
}
static int br_vlan_modify_state(struct net_bridge_vlan_group *vg,
@@ -181,6 +197,18 @@ static int br_vlan_process_one_opts(const struct net_bridge *br,
return err;
}
+#ifdef CONFIG_BRIDGE_IGMP_SNOOPING
+ if (tb[BRIDGE_VLANDB_ENTRY_MCAST_ROUTER]) {
+ u8 val;
+
+ val = nla_get_u8(tb[BRIDGE_VLANDB_ENTRY_MCAST_ROUTER]);
+ err = br_multicast_set_vlan_router(v, val);
+ if (err)
+ return err;
+ *changed = true;
+ }
+#endif
+
return 0;
}
@@ -258,3 +286,392 @@ int br_vlan_process_options(const struct net_bridge *br,
return err;
}
+
+bool br_vlan_global_opts_can_enter_range(const struct net_bridge_vlan *v_curr,
+ const struct net_bridge_vlan *r_end)
+{
+ return v_curr->vid - r_end->vid == 1 &&
+ ((v_curr->priv_flags ^ r_end->priv_flags) &
+ BR_VLFLAG_GLOBAL_MCAST_ENABLED) == 0 &&
+ br_multicast_ctx_options_equal(&v_curr->br_mcast_ctx,
+ &r_end->br_mcast_ctx);
+}
+
+bool br_vlan_global_opts_fill(struct sk_buff *skb, u16 vid, u16 vid_range,
+ const struct net_bridge_vlan *v_opts)
+{
+ struct nlattr *nest2 __maybe_unused;
+ u64 clockval __maybe_unused;
+ struct nlattr *nest;
+
+ nest = nla_nest_start(skb, BRIDGE_VLANDB_GLOBAL_OPTIONS);
+ if (!nest)
+ return false;
+
+ if (nla_put_u16(skb, BRIDGE_VLANDB_GOPTS_ID, vid))
+ goto out_err;
+
+ if (vid_range && vid < vid_range &&
+ nla_put_u16(skb, BRIDGE_VLANDB_GOPTS_RANGE, vid_range))
+ goto out_err;
+
+#ifdef CONFIG_BRIDGE_IGMP_SNOOPING
+ if (nla_put_u8(skb, BRIDGE_VLANDB_GOPTS_MCAST_SNOOPING,
+ !!(v_opts->priv_flags & BR_VLFLAG_GLOBAL_MCAST_ENABLED)) ||
+ nla_put_u8(skb, BRIDGE_VLANDB_GOPTS_MCAST_IGMP_VERSION,
+ v_opts->br_mcast_ctx.multicast_igmp_version) ||
+ nla_put_u32(skb, BRIDGE_VLANDB_GOPTS_MCAST_LAST_MEMBER_CNT,
+ v_opts->br_mcast_ctx.multicast_last_member_count) ||
+ nla_put_u32(skb, BRIDGE_VLANDB_GOPTS_MCAST_STARTUP_QUERY_CNT,
+ v_opts->br_mcast_ctx.multicast_startup_query_count) ||
+ nla_put_u8(skb, BRIDGE_VLANDB_GOPTS_MCAST_QUERIER,
+ v_opts->br_mcast_ctx.multicast_querier) ||
+ br_multicast_dump_querier_state(skb, &v_opts->br_mcast_ctx,
+ BRIDGE_VLANDB_GOPTS_MCAST_QUERIER_STATE))
+ goto out_err;
+
+ clockval = jiffies_to_clock_t(v_opts->br_mcast_ctx.multicast_last_member_interval);
+ if (nla_put_u64_64bit(skb, BRIDGE_VLANDB_GOPTS_MCAST_LAST_MEMBER_INTVL,
+ clockval, BRIDGE_VLANDB_GOPTS_PAD))
+ goto out_err;
+ clockval = jiffies_to_clock_t(v_opts->br_mcast_ctx.multicast_membership_interval);
+ if (nla_put_u64_64bit(skb, BRIDGE_VLANDB_GOPTS_MCAST_MEMBERSHIP_INTVL,
+ clockval, BRIDGE_VLANDB_GOPTS_PAD))
+ goto out_err;
+ clockval = jiffies_to_clock_t(v_opts->br_mcast_ctx.multicast_querier_interval);
+ if (nla_put_u64_64bit(skb, BRIDGE_VLANDB_GOPTS_MCAST_QUERIER_INTVL,
+ clockval, BRIDGE_VLANDB_GOPTS_PAD))
+ goto out_err;
+ clockval = jiffies_to_clock_t(v_opts->br_mcast_ctx.multicast_query_interval);
+ if (nla_put_u64_64bit(skb, BRIDGE_VLANDB_GOPTS_MCAST_QUERY_INTVL,
+ clockval, BRIDGE_VLANDB_GOPTS_PAD))
+ goto out_err;
+ clockval = jiffies_to_clock_t(v_opts->br_mcast_ctx.multicast_query_response_interval);
+ if (nla_put_u64_64bit(skb, BRIDGE_VLANDB_GOPTS_MCAST_QUERY_RESPONSE_INTVL,
+ clockval, BRIDGE_VLANDB_GOPTS_PAD))
+ goto out_err;
+ clockval = jiffies_to_clock_t(v_opts->br_mcast_ctx.multicast_startup_query_interval);
+ if (nla_put_u64_64bit(skb, BRIDGE_VLANDB_GOPTS_MCAST_STARTUP_QUERY_INTVL,
+ clockval, BRIDGE_VLANDB_GOPTS_PAD))
+ goto out_err;
+
+ if (br_rports_have_mc_router(&v_opts->br_mcast_ctx)) {
+ nest2 = nla_nest_start(skb,
+ BRIDGE_VLANDB_GOPTS_MCAST_ROUTER_PORTS);
+ if (!nest2)
+ goto out_err;
+
+ rcu_read_lock();
+ if (br_rports_fill_info(skb, &v_opts->br_mcast_ctx)) {
+ rcu_read_unlock();
+ nla_nest_cancel(skb, nest2);
+ goto out_err;
+ }
+ rcu_read_unlock();
+
+ nla_nest_end(skb, nest2);
+ }
+
+#if IS_ENABLED(CONFIG_IPV6)
+ if (nla_put_u8(skb, BRIDGE_VLANDB_GOPTS_MCAST_MLD_VERSION,
+ v_opts->br_mcast_ctx.multicast_mld_version))
+ goto out_err;
+#endif
+#endif
+
+ nla_nest_end(skb, nest);
+
+ return true;
+
+out_err:
+ nla_nest_cancel(skb, nest);
+ return false;
+}
+
+static size_t rtnl_vlan_global_opts_nlmsg_size(const struct net_bridge_vlan *v)
+{
+ return NLMSG_ALIGN(sizeof(struct br_vlan_msg))
+ + nla_total_size(0) /* BRIDGE_VLANDB_GLOBAL_OPTIONS */
+ + nla_total_size(sizeof(u16)) /* BRIDGE_VLANDB_GOPTS_ID */
+#ifdef CONFIG_BRIDGE_IGMP_SNOOPING
+ + nla_total_size(sizeof(u8)) /* BRIDGE_VLANDB_GOPTS_MCAST_SNOOPING */
+ + nla_total_size(sizeof(u8)) /* BRIDGE_VLANDB_GOPTS_MCAST_IGMP_VERSION */
+ + nla_total_size(sizeof(u8)) /* BRIDGE_VLANDB_GOPTS_MCAST_MLD_VERSION */
+ + nla_total_size(sizeof(u32)) /* BRIDGE_VLANDB_GOPTS_MCAST_LAST_MEMBER_CNT */
+ + nla_total_size(sizeof(u32)) /* BRIDGE_VLANDB_GOPTS_MCAST_STARTUP_QUERY_CNT */
+ + nla_total_size(sizeof(u64)) /* BRIDGE_VLANDB_GOPTS_MCAST_LAST_MEMBER_INTVL */
+ + nla_total_size(sizeof(u64)) /* BRIDGE_VLANDB_GOPTS_MCAST_MEMBERSHIP_INTVL */
+ + nla_total_size(sizeof(u64)) /* BRIDGE_VLANDB_GOPTS_MCAST_QUERIER_INTVL */
+ + nla_total_size(sizeof(u64)) /* BRIDGE_VLANDB_GOPTS_MCAST_QUERY_INTVL */
+ + nla_total_size(sizeof(u64)) /* BRIDGE_VLANDB_GOPTS_MCAST_QUERY_RESPONSE_INTVL */
+ + nla_total_size(sizeof(u64)) /* BRIDGE_VLANDB_GOPTS_MCAST_STARTUP_QUERY_INTVL */
+ + nla_total_size(sizeof(u8)) /* BRIDGE_VLANDB_GOPTS_MCAST_QUERIER */
+ + br_multicast_querier_state_size() /* BRIDGE_VLANDB_GOPTS_MCAST_QUERIER_STATE */
+ + nla_total_size(0) /* BRIDGE_VLANDB_GOPTS_MCAST_ROUTER_PORTS */
+ + br_rports_size(&v->br_mcast_ctx) /* BRIDGE_VLANDB_GOPTS_MCAST_ROUTER_PORTS */
+#endif
+ + nla_total_size(sizeof(u16)); /* BRIDGE_VLANDB_GOPTS_RANGE */
+}
+
+static void br_vlan_global_opts_notify(const struct net_bridge *br,
+ u16 vid, u16 vid_range)
+{
+ struct net_bridge_vlan *v;
+ struct br_vlan_msg *bvm;
+ struct nlmsghdr *nlh;
+ struct sk_buff *skb;
+ int err = -ENOBUFS;
+
+ /* right now notifications are done only with rtnl held */
+ ASSERT_RTNL();
+
+ /* need to find the vlan due to flags/options */
+ v = br_vlan_find(br_vlan_group(br), vid);
+ if (!v)
+ return;
+
+ skb = nlmsg_new(rtnl_vlan_global_opts_nlmsg_size(v), GFP_KERNEL);
+ if (!skb)
+ goto out_err;
+
+ err = -EMSGSIZE;
+ nlh = nlmsg_put(skb, 0, 0, RTM_NEWVLAN, sizeof(*bvm), 0);
+ if (!nlh)
+ goto out_err;
+ bvm = nlmsg_data(nlh);
+ memset(bvm, 0, sizeof(*bvm));
+ bvm->family = AF_BRIDGE;
+ bvm->ifindex = br->dev->ifindex;
+
+ if (!br_vlan_global_opts_fill(skb, vid, vid_range, v))
+ goto out_err;
+
+ nlmsg_end(skb, nlh);
+ rtnl_notify(skb, dev_net(br->dev), 0, RTNLGRP_BRVLAN, NULL, GFP_KERNEL);
+ return;
+
+out_err:
+ rtnl_set_sk_err(dev_net(br->dev), RTNLGRP_BRVLAN, err);
+ kfree_skb(skb);
+}
+
+static int br_vlan_process_global_one_opts(const struct net_bridge *br,
+ struct net_bridge_vlan_group *vg,
+ struct net_bridge_vlan *v,
+ struct nlattr **tb,
+ bool *changed,
+ struct netlink_ext_ack *extack)
+{
+ int err __maybe_unused;
+
+ *changed = false;
+#ifdef CONFIG_BRIDGE_IGMP_SNOOPING
+ if (tb[BRIDGE_VLANDB_GOPTS_MCAST_SNOOPING]) {
+ u8 mc_snooping;
+
+ mc_snooping = nla_get_u8(tb[BRIDGE_VLANDB_GOPTS_MCAST_SNOOPING]);
+ if (br_multicast_toggle_global_vlan(v, !!mc_snooping))
+ *changed = true;
+ }
+ if (tb[BRIDGE_VLANDB_GOPTS_MCAST_IGMP_VERSION]) {
+ u8 ver;
+
+ ver = nla_get_u8(tb[BRIDGE_VLANDB_GOPTS_MCAST_IGMP_VERSION]);
+ err = br_multicast_set_igmp_version(&v->br_mcast_ctx, ver);
+ if (err)
+ return err;
+ *changed = true;
+ }
+ if (tb[BRIDGE_VLANDB_GOPTS_MCAST_LAST_MEMBER_CNT]) {
+ u32 cnt;
+
+ cnt = nla_get_u32(tb[BRIDGE_VLANDB_GOPTS_MCAST_LAST_MEMBER_CNT]);
+ v->br_mcast_ctx.multicast_last_member_count = cnt;
+ *changed = true;
+ }
+ if (tb[BRIDGE_VLANDB_GOPTS_MCAST_STARTUP_QUERY_CNT]) {
+ u32 cnt;
+
+ cnt = nla_get_u32(tb[BRIDGE_VLANDB_GOPTS_MCAST_STARTUP_QUERY_CNT]);
+ v->br_mcast_ctx.multicast_startup_query_count = cnt;
+ *changed = true;
+ }
+ if (tb[BRIDGE_VLANDB_GOPTS_MCAST_LAST_MEMBER_INTVL]) {
+ u64 val;
+
+ val = nla_get_u64(tb[BRIDGE_VLANDB_GOPTS_MCAST_LAST_MEMBER_INTVL]);
+ v->br_mcast_ctx.multicast_last_member_interval = clock_t_to_jiffies(val);
+ *changed = true;
+ }
+ if (tb[BRIDGE_VLANDB_GOPTS_MCAST_MEMBERSHIP_INTVL]) {
+ u64 val;
+
+ val = nla_get_u64(tb[BRIDGE_VLANDB_GOPTS_MCAST_MEMBERSHIP_INTVL]);
+ v->br_mcast_ctx.multicast_membership_interval = clock_t_to_jiffies(val);
+ *changed = true;
+ }
+ if (tb[BRIDGE_VLANDB_GOPTS_MCAST_QUERIER_INTVL]) {
+ u64 val;
+
+ val = nla_get_u64(tb[BRIDGE_VLANDB_GOPTS_MCAST_QUERIER_INTVL]);
+ v->br_mcast_ctx.multicast_querier_interval = clock_t_to_jiffies(val);
+ *changed = true;
+ }
+ if (tb[BRIDGE_VLANDB_GOPTS_MCAST_QUERY_INTVL]) {
+ u64 val;
+
+ val = nla_get_u64(tb[BRIDGE_VLANDB_GOPTS_MCAST_QUERY_INTVL]);
+ v->br_mcast_ctx.multicast_query_interval = clock_t_to_jiffies(val);
+ *changed = true;
+ }
+ if (tb[BRIDGE_VLANDB_GOPTS_MCAST_QUERY_RESPONSE_INTVL]) {
+ u64 val;
+
+ val = nla_get_u64(tb[BRIDGE_VLANDB_GOPTS_MCAST_QUERY_RESPONSE_INTVL]);
+ v->br_mcast_ctx.multicast_query_response_interval = clock_t_to_jiffies(val);
+ *changed = true;
+ }
+ if (tb[BRIDGE_VLANDB_GOPTS_MCAST_STARTUP_QUERY_INTVL]) {
+ u64 val;
+
+ val = nla_get_u64(tb[BRIDGE_VLANDB_GOPTS_MCAST_STARTUP_QUERY_INTVL]);
+ v->br_mcast_ctx.multicast_startup_query_interval = clock_t_to_jiffies(val);
+ *changed = true;
+ }
+ if (tb[BRIDGE_VLANDB_GOPTS_MCAST_QUERIER]) {
+ u8 val;
+
+ val = nla_get_u8(tb[BRIDGE_VLANDB_GOPTS_MCAST_QUERIER]);
+ err = br_multicast_set_querier(&v->br_mcast_ctx, val);
+ if (err)
+ return err;
+ *changed = true;
+ }
+#if IS_ENABLED(CONFIG_IPV6)
+ if (tb[BRIDGE_VLANDB_GOPTS_MCAST_MLD_VERSION]) {
+ u8 ver;
+
+ ver = nla_get_u8(tb[BRIDGE_VLANDB_GOPTS_MCAST_MLD_VERSION]);
+ err = br_multicast_set_mld_version(&v->br_mcast_ctx, ver);
+ if (err)
+ return err;
+ *changed = true;
+ }
+#endif
+#endif
+
+ return 0;
+}
+
+static const struct nla_policy br_vlan_db_gpol[BRIDGE_VLANDB_GOPTS_MAX + 1] = {
+ [BRIDGE_VLANDB_GOPTS_ID] = { .type = NLA_U16 },
+ [BRIDGE_VLANDB_GOPTS_RANGE] = { .type = NLA_U16 },
+ [BRIDGE_VLANDB_GOPTS_MCAST_SNOOPING] = { .type = NLA_U8 },
+ [BRIDGE_VLANDB_GOPTS_MCAST_MLD_VERSION] = { .type = NLA_U8 },
+ [BRIDGE_VLANDB_GOPTS_MCAST_QUERY_INTVL] = { .type = NLA_U64 },
+ [BRIDGE_VLANDB_GOPTS_MCAST_QUERIER] = { .type = NLA_U8 },
+ [BRIDGE_VLANDB_GOPTS_MCAST_IGMP_VERSION] = { .type = NLA_U8 },
+ [BRIDGE_VLANDB_GOPTS_MCAST_LAST_MEMBER_CNT] = { .type = NLA_U32 },
+ [BRIDGE_VLANDB_GOPTS_MCAST_STARTUP_QUERY_CNT] = { .type = NLA_U32 },
+ [BRIDGE_VLANDB_GOPTS_MCAST_LAST_MEMBER_INTVL] = { .type = NLA_U64 },
+ [BRIDGE_VLANDB_GOPTS_MCAST_MEMBERSHIP_INTVL] = { .type = NLA_U64 },
+ [BRIDGE_VLANDB_GOPTS_MCAST_QUERIER_INTVL] = { .type = NLA_U64 },
+ [BRIDGE_VLANDB_GOPTS_MCAST_STARTUP_QUERY_INTVL] = { .type = NLA_U64 },
+ [BRIDGE_VLANDB_GOPTS_MCAST_QUERY_RESPONSE_INTVL] = { .type = NLA_U64 },
+};
+
+int br_vlan_rtm_process_global_options(struct net_device *dev,
+ const struct nlattr *attr,
+ int cmd,
+ struct netlink_ext_ack *extack)
+{
+ struct net_bridge_vlan *v, *curr_start = NULL, *curr_end = NULL;
+ struct nlattr *tb[BRIDGE_VLANDB_GOPTS_MAX + 1];
+ struct net_bridge_vlan_group *vg;
+ u16 vid, vid_range = 0;
+ struct net_bridge *br;
+ int err = 0;
+
+ if (cmd != RTM_NEWVLAN) {
+ NL_SET_ERR_MSG_MOD(extack, "Global vlan options support only set operation");
+ return -EINVAL;
+ }
+ if (!netif_is_bridge_master(dev)) {
+ NL_SET_ERR_MSG_MOD(extack, "Global vlan options can only be set on bridge device");
+ return -EINVAL;
+ }
+ br = netdev_priv(dev);
+ vg = br_vlan_group(br);
+ if (WARN_ON(!vg))
+ return -ENODEV;
+
+ err = nla_parse_nested(tb, BRIDGE_VLANDB_GOPTS_MAX, attr,
+ br_vlan_db_gpol, extack);
+ if (err)
+ return err;
+
+ if (!tb[BRIDGE_VLANDB_GOPTS_ID]) {
+ NL_SET_ERR_MSG_MOD(extack, "Missing vlan entry id");
+ return -EINVAL;
+ }
+ vid = nla_get_u16(tb[BRIDGE_VLANDB_GOPTS_ID]);
+ if (!br_vlan_valid_id(vid, extack))
+ return -EINVAL;
+
+ if (tb[BRIDGE_VLANDB_GOPTS_RANGE]) {
+ vid_range = nla_get_u16(tb[BRIDGE_VLANDB_GOPTS_RANGE]);
+ if (!br_vlan_valid_id(vid_range, extack))
+ return -EINVAL;
+ if (vid >= vid_range) {
+ NL_SET_ERR_MSG_MOD(extack, "End vlan id is less than or equal to start vlan id");
+ return -EINVAL;
+ }
+ } else {
+ vid_range = vid;
+ }
+
+ for (; vid <= vid_range; vid++) {
+ bool changed = false;
+
+ v = br_vlan_find(vg, vid);
+ if (!v) {
+ NL_SET_ERR_MSG_MOD(extack, "Vlan in range doesn't exist, can't process global options");
+ err = -ENOENT;
+ break;
+ }
+
+ err = br_vlan_process_global_one_opts(br, vg, v, tb, &changed,
+ extack);
+ if (err)
+ break;
+
+ if (changed) {
+ /* vlan options changed, check for range */
+ if (!curr_start) {
+ curr_start = v;
+ curr_end = v;
+ continue;
+ }
+
+ if (!br_vlan_global_opts_can_enter_range(v, curr_end)) {
+ br_vlan_global_opts_notify(br, curr_start->vid,
+ curr_end->vid);
+ curr_start = v;
+ }
+ curr_end = v;
+ } else {
+ /* nothing changed and nothing to notify yet */
+ if (!curr_start)
+ continue;
+
+ br_vlan_global_opts_notify(br, curr_start->vid,
+ curr_end->vid);
+ curr_start = NULL;
+ curr_end = NULL;
+ }
+ }
+ if (curr_start)
+ br_vlan_global_opts_notify(br, curr_start->vid, curr_end->vid);
+
+ return err;
+}
diff --git a/net/bridge/br_vlan_tunnel.c b/net/bridge/br_vlan_tunnel.c
index 01017448ebde..6399a8a69d07 100644
--- a/net/bridge/br_vlan_tunnel.c
+++ b/net/bridge/br_vlan_tunnel.c
@@ -158,30 +158,28 @@ void vlan_tunnel_deinit(struct net_bridge_vlan_group *vg)
rhashtable_destroy(&vg->tunnel_hash);
}
-int br_handle_ingress_vlan_tunnel(struct sk_buff *skb,
- struct net_bridge_port *p,
- struct net_bridge_vlan_group *vg)
+void br_handle_ingress_vlan_tunnel(struct sk_buff *skb,
+ struct net_bridge_port *p,
+ struct net_bridge_vlan_group *vg)
{
struct ip_tunnel_info *tinfo = skb_tunnel_info(skb);
struct net_bridge_vlan *vlan;
if (!vg || !tinfo)
- return 0;
+ return;
/* if already tagged, ignore */
if (skb_vlan_tagged(skb))
- return 0;
+ return;
/* lookup vid, given tunnel id */
vlan = br_vlan_tunnel_lookup(&vg->tunnel_hash, tinfo->key.tun_id);
if (!vlan)
- return 0;
+ return;
skb_dst_drop(skb);
__vlan_hwaccel_put_tag(skb, p->br->vlan_proto, vlan->vid);
-
- return 0;
}
int br_handle_egress_vlan_tunnel(struct sk_buff *skb,
diff --git a/net/bridge/netfilter/ebtable_broute.c b/net/bridge/netfilter/ebtable_broute.c
index 020b1487ee0c..a7af4eaff17d 100644
--- a/net/bridge/netfilter/ebtable_broute.c
+++ b/net/bridge/netfilter/ebtable_broute.c
@@ -98,7 +98,7 @@ static const struct nf_hook_ops ebt_ops_broute = {
.priority = NF_BR_PRI_FIRST,
};
-static int __net_init broute_net_init(struct net *net)
+static int broute_table_init(struct net *net)
{
return ebt_register_table(net, &broute_table, &ebt_ops_broute);
}
@@ -114,19 +114,30 @@ static void __net_exit broute_net_exit(struct net *net)
}
static struct pernet_operations broute_net_ops = {
- .init = broute_net_init,
.exit = broute_net_exit,
.pre_exit = broute_net_pre_exit,
};
static int __init ebtable_broute_init(void)
{
- return register_pernet_subsys(&broute_net_ops);
+ int ret = ebt_register_template(&broute_table, broute_table_init);
+
+ if (ret)
+ return ret;
+
+ ret = register_pernet_subsys(&broute_net_ops);
+ if (ret) {
+ ebt_unregister_template(&broute_table);
+ return ret;
+ }
+
+ return 0;
}
static void __exit ebtable_broute_fini(void)
{
unregister_pernet_subsys(&broute_net_ops);
+ ebt_unregister_template(&broute_table);
}
module_init(ebtable_broute_init);
diff --git a/net/bridge/netfilter/ebtable_filter.c b/net/bridge/netfilter/ebtable_filter.c
index 8ec0b3736803..c0b121df4a9a 100644
--- a/net/bridge/netfilter/ebtable_filter.c
+++ b/net/bridge/netfilter/ebtable_filter.c
@@ -86,7 +86,7 @@ static const struct nf_hook_ops ebt_ops_filter[] = {
},
};
-static int __net_init frame_filter_net_init(struct net *net)
+static int frame_filter_table_init(struct net *net)
{
return ebt_register_table(net, &frame_filter, ebt_ops_filter);
}
@@ -102,19 +102,30 @@ static void __net_exit frame_filter_net_exit(struct net *net)
}
static struct pernet_operations frame_filter_net_ops = {
- .init = frame_filter_net_init,
.exit = frame_filter_net_exit,
.pre_exit = frame_filter_net_pre_exit,
};
static int __init ebtable_filter_init(void)
{
- return register_pernet_subsys(&frame_filter_net_ops);
+ int ret = ebt_register_template(&frame_filter, frame_filter_table_init);
+
+ if (ret)
+ return ret;
+
+ ret = register_pernet_subsys(&frame_filter_net_ops);
+ if (ret) {
+ ebt_unregister_template(&frame_filter);
+ return ret;
+ }
+
+ return 0;
}
static void __exit ebtable_filter_fini(void)
{
unregister_pernet_subsys(&frame_filter_net_ops);
+ ebt_unregister_template(&frame_filter);
}
module_init(ebtable_filter_init);
diff --git a/net/bridge/netfilter/ebtable_nat.c b/net/bridge/netfilter/ebtable_nat.c
index 7c8a1064a531..4078151c224f 100644
--- a/net/bridge/netfilter/ebtable_nat.c
+++ b/net/bridge/netfilter/ebtable_nat.c
@@ -85,7 +85,7 @@ static const struct nf_hook_ops ebt_ops_nat[] = {
},
};
-static int __net_init frame_nat_net_init(struct net *net)
+static int frame_nat_table_init(struct net *net)
{
return ebt_register_table(net, &frame_nat, ebt_ops_nat);
}
@@ -101,19 +101,30 @@ static void __net_exit frame_nat_net_exit(struct net *net)
}
static struct pernet_operations frame_nat_net_ops = {
- .init = frame_nat_net_init,
.exit = frame_nat_net_exit,
.pre_exit = frame_nat_net_pre_exit,
};
static int __init ebtable_nat_init(void)
{
- return register_pernet_subsys(&frame_nat_net_ops);
+ int ret = ebt_register_template(&frame_nat, frame_nat_table_init);
+
+ if (ret)
+ return ret;
+
+ ret = register_pernet_subsys(&frame_nat_net_ops);
+ if (ret) {
+ ebt_unregister_template(&frame_nat);
+ return ret;
+ }
+
+ return ret;
}
static void __exit ebtable_nat_fini(void)
{
unregister_pernet_subsys(&frame_nat_net_ops);
+ ebt_unregister_template(&frame_nat);
}
module_init(ebtable_nat_init);
diff --git a/net/bridge/netfilter/ebtables.c b/net/bridge/netfilter/ebtables.c
index f022deb3721e..83d1798dfbb4 100644
--- a/net/bridge/netfilter/ebtables.c
+++ b/net/bridge/netfilter/ebtables.c
@@ -44,7 +44,16 @@ struct ebt_pernet {
struct list_head tables;
};
+struct ebt_template {
+ struct list_head list;
+ char name[EBT_TABLE_MAXNAMELEN];
+ struct module *owner;
+ /* called when table is needed in the given netns */
+ int (*table_init)(struct net *net);
+};
+
static unsigned int ebt_pernet_id __read_mostly;
+static LIST_HEAD(template_tables);
static DEFINE_MUTEX(ebt_mutex);
#ifdef CONFIG_NETFILTER_XTABLES_COMPAT
@@ -309,30 +318,57 @@ letscontinue:
/* If it succeeds, returns element and locks mutex */
static inline void *
-find_inlist_lock_noload(struct list_head *head, const char *name, int *error,
+find_inlist_lock_noload(struct net *net, const char *name, int *error,
struct mutex *mutex)
{
- struct {
- struct list_head list;
- char name[EBT_FUNCTION_MAXNAMELEN];
- } *e;
+ struct ebt_pernet *ebt_net = net_generic(net, ebt_pernet_id);
+ struct ebt_template *tmpl;
+ struct ebt_table *table;
mutex_lock(mutex);
- list_for_each_entry(e, head, list) {
- if (strcmp(e->name, name) == 0)
- return e;
+ list_for_each_entry(table, &ebt_net->tables, list) {
+ if (strcmp(table->name, name) == 0)
+ return table;
}
+
+ list_for_each_entry(tmpl, &template_tables, list) {
+ if (strcmp(name, tmpl->name) == 0) {
+ struct module *owner = tmpl->owner;
+
+ if (!try_module_get(owner))
+ goto out;
+
+ mutex_unlock(mutex);
+
+ *error = tmpl->table_init(net);
+ if (*error) {
+ module_put(owner);
+ return NULL;
+ }
+
+ mutex_lock(mutex);
+ module_put(owner);
+ break;
+ }
+ }
+
+ list_for_each_entry(table, &ebt_net->tables, list) {
+ if (strcmp(table->name, name) == 0)
+ return table;
+ }
+
+out:
*error = -ENOENT;
mutex_unlock(mutex);
return NULL;
}
static void *
-find_inlist_lock(struct list_head *head, const char *name, const char *prefix,
+find_inlist_lock(struct net *net, const char *name, const char *prefix,
int *error, struct mutex *mutex)
{
return try_then_request_module(
- find_inlist_lock_noload(head, name, error, mutex),
+ find_inlist_lock_noload(net, name, error, mutex),
"%s%s", prefix, name);
}
@@ -340,10 +376,7 @@ static inline struct ebt_table *
find_table_lock(struct net *net, const char *name, int *error,
struct mutex *mutex)
{
- struct ebt_pernet *ebt_net = net_generic(net, ebt_pernet_id);
-
- return find_inlist_lock(&ebt_net->tables, name,
- "ebtable_", error, mutex);
+ return find_inlist_lock(net, name, "ebtable_", error, mutex);
}
static inline void ebt_free_table_info(struct ebt_table_info *info)
@@ -1258,6 +1291,54 @@ out:
return ret;
}
+int ebt_register_template(const struct ebt_table *t, int (*table_init)(struct net *net))
+{
+ struct ebt_template *tmpl;
+
+ mutex_lock(&ebt_mutex);
+ list_for_each_entry(tmpl, &template_tables, list) {
+ if (WARN_ON_ONCE(strcmp(t->name, tmpl->name) == 0)) {
+ mutex_unlock(&ebt_mutex);
+ return -EEXIST;
+ }
+ }
+
+ tmpl = kzalloc(sizeof(*tmpl), GFP_KERNEL);
+ if (!tmpl) {
+ mutex_unlock(&ebt_mutex);
+ return -ENOMEM;
+ }
+
+ tmpl->table_init = table_init;
+ strscpy(tmpl->name, t->name, sizeof(tmpl->name));
+ tmpl->owner = t->me;
+ list_add(&tmpl->list, &template_tables);
+
+ mutex_unlock(&ebt_mutex);
+ return 0;
+}
+EXPORT_SYMBOL(ebt_register_template);
+
+void ebt_unregister_template(const struct ebt_table *t)
+{
+ struct ebt_template *tmpl;
+
+ mutex_lock(&ebt_mutex);
+ list_for_each_entry(tmpl, &template_tables, list) {
+ if (strcmp(t->name, tmpl->name))
+ continue;
+
+ list_del(&tmpl->list);
+ mutex_unlock(&ebt_mutex);
+ kfree(tmpl);
+ return;
+ }
+
+ mutex_unlock(&ebt_mutex);
+ WARN_ON_ONCE(1);
+}
+EXPORT_SYMBOL(ebt_unregister_template);
+
static struct ebt_table *__ebt_find_table(struct net *net, const char *name)
{
struct ebt_pernet *ebt_net = net_generic(net, ebt_pernet_id);
diff --git a/net/bridge/netfilter/nf_conntrack_bridge.c b/net/bridge/netfilter/nf_conntrack_bridge.c
index 8d033a75a766..fdbed3158555 100644
--- a/net/bridge/netfilter/nf_conntrack_bridge.c
+++ b/net/bridge/netfilter/nf_conntrack_bridge.c
@@ -88,6 +88,12 @@ static int nf_br_ip_fragment(struct net *net, struct sock *sk,
skb = ip_fraglist_next(&iter);
}
+
+ if (!err)
+ return 0;
+
+ kfree_skb_list(iter.frag);
+
return err;
}
slow_path:
diff --git a/net/caif/caif_socket.c b/net/caif/caif_socket.c
index 3ad0a1df6712..e12fd3cad619 100644
--- a/net/caif/caif_socket.c
+++ b/net/caif/caif_socket.c
@@ -243,7 +243,7 @@ static void caif_ctrl_cb(struct cflayer *layr,
cf_sk->sk.sk_shutdown = SHUTDOWN_MASK;
cf_sk->sk.sk_err = ECONNRESET;
set_rx_flow_on(cf_sk);
- cf_sk->sk.sk_error_report(&cf_sk->sk);
+ sk_error_report(&cf_sk->sk);
break;
default:
@@ -539,7 +539,8 @@ static int caif_seqpkt_sendmsg(struct socket *sock, struct msghdr *msg,
goto err;
ret = -EINVAL;
- if (unlikely(msg->msg_iter.iov->iov_base == NULL))
+ if (unlikely(msg->msg_iter.nr_segs == 0) ||
+ unlikely(msg->msg_iter.iov->iov_base == NULL))
goto err;
noblock = msg->msg_flags & MSG_DONTWAIT;
diff --git a/net/caif/cfcnfg.c b/net/caif/cfcnfg.c
index cac30e676ac9..23267c8db7c4 100644
--- a/net/caif/cfcnfg.c
+++ b/net/caif/cfcnfg.c
@@ -480,7 +480,7 @@ got_phyid:
phyinfo = kzalloc(sizeof(struct cfcnfg_phyinfo), GFP_ATOMIC);
if (!phyinfo) {
res = -ENOMEM;
- goto out_err;
+ goto out;
}
phy_layer->id = phyid;
diff --git a/net/caif/chnl_net.c b/net/caif/chnl_net.c
index fadc7c8a3107..37b67194c0df 100644
--- a/net/caif/chnl_net.c
+++ b/net/caif/chnl_net.c
@@ -76,8 +76,6 @@ static int chnl_recv_cb(struct cflayer *layr, struct cfpkt *pkt)
u8 buf;
priv = container_of(layr, struct chnl_net, chnl);
- if (!priv)
- return -EINVAL;
skb = (struct sk_buff *) cfpkt_tonative(pkt);
diff --git a/net/can/bcm.c b/net/can/bcm.c
index f3e4d9528fa3..508f67de0b80 100644
--- a/net/can/bcm.c
+++ b/net/can/bcm.c
@@ -785,6 +785,7 @@ static int bcm_delete_rx_op(struct list_head *ops, struct bcm_msg_head *mh,
bcm_rx_handler, op);
list_del(&op->list);
+ synchronize_rcu();
bcm_remove_op(op);
return 1; /* done */
}
@@ -1417,7 +1418,7 @@ static void bcm_notify(struct bcm_sock *bo, unsigned long msg,
if (notify_enodev) {
sk->sk_err = ENODEV;
if (!sock_flag(sk, SOCK_DEAD))
- sk->sk_error_report(sk);
+ sk_error_report(sk);
}
break;
@@ -1425,7 +1426,7 @@ static void bcm_notify(struct bcm_sock *bo, unsigned long msg,
if (bo->bound && bo->ifindex == dev->ifindex) {
sk->sk_err = ENETDOWN;
if (!sock_flag(sk, SOCK_DEAD))
- sk->sk_error_report(sk);
+ sk_error_report(sk);
}
}
}
@@ -1533,9 +1534,13 @@ static int bcm_release(struct socket *sock)
REGMASK(op->can_id),
bcm_rx_handler, op);
- bcm_remove_op(op);
}
+ synchronize_rcu();
+
+ list_for_each_entry_safe(op, next, &bo->rx_ops, list)
+ bcm_remove_op(op);
+
#if IS_ENABLED(CONFIG_PROC_FS)
/* remove procfs entry */
if (net->can.bcmproc_dir && bo->bcm_proc_read)
diff --git a/net/can/gw.c b/net/can/gw.c
index ba4124805602..d8861e862f15 100644
--- a/net/can/gw.c
+++ b/net/can/gw.c
@@ -596,6 +596,7 @@ static int cgw_notifier(struct notifier_block *nb,
if (gwj->src.dev == dev || gwj->dst.dev == dev) {
hlist_del(&gwj->list);
cgw_unregister_filter(net, gwj);
+ synchronize_rcu();
kmem_cache_free(cgw_cache, gwj);
}
}
@@ -1154,6 +1155,7 @@ static void cgw_remove_all_jobs(struct net *net)
hlist_for_each_entry_safe(gwj, nx, &net->can.cgw_list, list) {
hlist_del(&gwj->list);
cgw_unregister_filter(net, gwj);
+ synchronize_rcu();
kmem_cache_free(cgw_cache, gwj);
}
}
@@ -1222,6 +1224,7 @@ static int cgw_remove_job(struct sk_buff *skb, struct nlmsghdr *nlh,
hlist_del(&gwj->list);
cgw_unregister_filter(net, gwj);
+ synchronize_rcu();
kmem_cache_free(cgw_cache, gwj);
err = 0;
break;
diff --git a/net/can/isotp.c b/net/can/isotp.c
index be6183f8ca11..caaa532ece94 100644
--- a/net/can/isotp.c
+++ b/net/can/isotp.c
@@ -168,7 +168,7 @@ static enum hrtimer_restart isotp_rx_timer_handler(struct hrtimer *hrtimer)
/* report 'connection timed out' */
sk->sk_err = ETIMEDOUT;
if (!sock_flag(sk, SOCK_DEAD))
- sk->sk_error_report(sk);
+ sk_error_report(sk);
/* reset rx state */
so->rx.state = ISOTP_IDLE;
@@ -225,8 +225,8 @@ static int isotp_send_fc(struct sock *sk, int ae, u8 flowstatus)
can_send_ret = can_send(nskb, 1);
if (can_send_ret)
- pr_notice_once("can-isotp: %s: can_send_ret %d\n",
- __func__, can_send_ret);
+ pr_notice_once("can-isotp: %s: can_send_ret %pe\n",
+ __func__, ERR_PTR(can_send_ret));
dev_put(dev);
@@ -339,7 +339,7 @@ static int isotp_rcv_fc(struct isotp_sock *so, struct canfd_frame *cf, int ae)
/* malformed PDU - report 'not a data message' */
sk->sk_err = EBADMSG;
if (!sock_flag(sk, SOCK_DEAD))
- sk->sk_error_report(sk);
+ sk_error_report(sk);
so->tx.state = ISOTP_IDLE;
wake_up_interruptible(&so->wait);
@@ -392,7 +392,7 @@ static int isotp_rcv_fc(struct isotp_sock *so, struct canfd_frame *cf, int ae)
/* overflow on receiver side - report 'message too long' */
sk->sk_err = EMSGSIZE;
if (!sock_flag(sk, SOCK_DEAD))
- sk->sk_error_report(sk);
+ sk_error_report(sk);
fallthrough;
default:
@@ -420,7 +420,7 @@ static int isotp_rcv_sf(struct sock *sk, struct canfd_frame *cf, int pcilen,
/* malformed PDU - report 'not a data message' */
sk->sk_err = EBADMSG;
if (!sock_flag(sk, SOCK_DEAD))
- sk->sk_error_report(sk);
+ sk_error_report(sk);
return 1;
}
@@ -535,7 +535,7 @@ static int isotp_rcv_cf(struct sock *sk, struct canfd_frame *cf, int ae,
/* wrong sn detected - report 'illegal byte sequence' */
sk->sk_err = EILSEQ;
if (!sock_flag(sk, SOCK_DEAD))
- sk->sk_error_report(sk);
+ sk_error_report(sk);
/* reset rx state */
so->rx.state = ISOTP_IDLE;
@@ -559,7 +559,7 @@ static int isotp_rcv_cf(struct sock *sk, struct canfd_frame *cf, int ae,
/* malformed PDU - report 'not a data message' */
sk->sk_err = EBADMSG;
if (!sock_flag(sk, SOCK_DEAD))
- sk->sk_error_report(sk);
+ sk_error_report(sk);
return 1;
}
@@ -758,7 +758,7 @@ static enum hrtimer_restart isotp_tx_timer_handler(struct hrtimer *hrtimer)
/* report 'communication error on send' */
sk->sk_err = ECOMM;
if (!sock_flag(sk, SOCK_DEAD))
- sk->sk_error_report(sk);
+ sk_error_report(sk);
/* reset tx state */
so->tx.state = ISOTP_IDLE;
@@ -801,10 +801,12 @@ isotp_tx_burst:
can_skb_set_owner(skb, sk);
can_send_ret = can_send(skb, 1);
- if (can_send_ret)
- pr_notice_once("can-isotp: %s: can_send_ret %d\n",
- __func__, can_send_ret);
-
+ if (can_send_ret) {
+ pr_notice_once("can-isotp: %s: can_send_ret %pe\n",
+ __func__, ERR_PTR(can_send_ret));
+ if (can_send_ret == -ENOBUFS)
+ pr_notice_once("can-isotp: tx queue is full, increasing txqueuelen may prevent this error\n");
+ }
if (so->tx.idx >= so->tx.len) {
/* we are done */
so->tx.state = ISOTP_IDLE;
@@ -950,8 +952,8 @@ static int isotp_sendmsg(struct socket *sock, struct msghdr *msg, size_t size)
err = can_send(skb, 1);
dev_put(dev);
if (err) {
- pr_notice_once("can-isotp: %s: can_send_ret %d\n",
- __func__, err);
+ pr_notice_once("can-isotp: %s: can_send_ret %pe\n",
+ __func__, ERR_PTR(err));
return err;
}
@@ -1028,9 +1030,6 @@ static int isotp_release(struct socket *sock)
lock_sock(sk);
- hrtimer_cancel(&so->txtimer);
- hrtimer_cancel(&so->rxtimer);
-
/* remove current filters & unregister */
if (so->bound && (!(so->opt.flags & CAN_ISOTP_SF_BROADCAST))) {
if (so->ifindex) {
@@ -1042,10 +1041,14 @@ static int isotp_release(struct socket *sock)
SINGLE_MASK(so->rxid),
isotp_rcv, sk);
dev_put(dev);
+ synchronize_rcu();
}
}
}
+ hrtimer_cancel(&so->txtimer);
+ hrtimer_cancel(&so->rxtimer);
+
so->ifindex = 0;
so->bound = 0;
@@ -1155,7 +1158,7 @@ out:
if (notify_enetdown) {
sk->sk_err = ENETDOWN;
if (!sock_flag(sk, SOCK_DEAD))
- sk->sk_error_report(sk);
+ sk_error_report(sk);
}
return err;
@@ -1354,13 +1357,13 @@ static void isotp_notify(struct isotp_sock *so, unsigned long msg,
sk->sk_err = ENODEV;
if (!sock_flag(sk, SOCK_DEAD))
- sk->sk_error_report(sk);
+ sk_error_report(sk);
break;
case NETDEV_DOWN:
sk->sk_err = ENETDOWN;
if (!sock_flag(sk, SOCK_DEAD))
- sk->sk_error_report(sk);
+ sk_error_report(sk);
break;
}
}
@@ -1482,7 +1485,7 @@ static __init int isotp_module_init(void)
err = can_proto_register(&isotp_can_proto);
if (err < 0)
- pr_err("can: registration of isotp protocol failed\n");
+ pr_err("can: registration of isotp protocol failed %pe\n", ERR_PTR(err));
else
register_netdevice_notifier(&canisotp_notifier);
diff --git a/net/can/j1939/j1939-priv.h b/net/can/j1939/j1939-priv.h
index 12369b604ce9..f6df20808f5e 100644
--- a/net/can/j1939/j1939-priv.h
+++ b/net/can/j1939/j1939-priv.h
@@ -20,9 +20,12 @@
struct j1939_session;
enum j1939_sk_errqueue_type {
- J1939_ERRQUEUE_ACK,
- J1939_ERRQUEUE_SCHED,
- J1939_ERRQUEUE_ABORT,
+ J1939_ERRQUEUE_TX_ACK,
+ J1939_ERRQUEUE_TX_SCHED,
+ J1939_ERRQUEUE_TX_ABORT,
+ J1939_ERRQUEUE_RX_RTS,
+ J1939_ERRQUEUE_RX_DPO,
+ J1939_ERRQUEUE_RX_ABORT,
};
/* j1939 devices */
@@ -87,6 +90,7 @@ struct j1939_priv {
struct list_head j1939_socks;
struct kref rx_kref;
+ u32 rx_tskey;
};
void j1939_ecu_put(struct j1939_ecu *ecu);
diff --git a/net/can/j1939/main.c b/net/can/j1939/main.c
index da3a7a7bcff2..08c8606cfd9c 100644
--- a/net/can/j1939/main.c
+++ b/net/can/j1939/main.c
@@ -193,6 +193,10 @@ static void j1939_can_rx_unregister(struct j1939_priv *priv)
can_rx_unregister(dev_net(ndev), ndev, J1939_CAN_ID, J1939_CAN_MASK,
j1939_can_recv, priv);
+ /* The last reference of priv is dropped by the RCU deferred
+ * j1939_sk_sock_destruct() of the last socket, so we can
+ * safely drop this reference here.
+ */
j1939_priv_put(priv);
}
diff --git a/net/can/j1939/socket.c b/net/can/j1939/socket.c
index 56aa66147d5a..6dff4510687a 100644
--- a/net/can/j1939/socket.c
+++ b/net/can/j1939/socket.c
@@ -352,7 +352,7 @@ static void j1939_sk_sock_destruct(struct sock *sk)
{
struct j1939_sock *jsk = j1939_sk(sk);
- /* This function will be call by the generic networking code, when then
+ /* This function will be called by the generic networking code, when
* the socket is ultimately closed (sk->sk_destruct).
*
* The race between
@@ -398,6 +398,9 @@ static int j1939_sk_init(struct sock *sk)
atomic_set(&jsk->skb_pending, 0);
spin_lock_init(&jsk->sk_session_queue_lock);
INIT_LIST_HEAD(&jsk->sk_session_queue);
+
+ /* j1939_sk_sock_destruct() depends on SOCK_RCU_FREE flag */
+ sock_set_flag(sk, SOCK_RCU_FREE);
sk->sk_destruct = j1939_sk_sock_destruct;
sk->sk_protocol = CAN_J1939;
@@ -673,7 +676,7 @@ static int j1939_sk_setsockopt(struct socket *sock, int level, int optname,
switch (optname) {
case SO_J1939_FILTER:
- if (!sockptr_is_null(optval)) {
+ if (!sockptr_is_null(optval) && optlen != 0) {
struct j1939_filter *f;
int c;
@@ -902,20 +905,33 @@ failure:
return NULL;
}
-static size_t j1939_sk_opt_stats_get_size(void)
+static size_t j1939_sk_opt_stats_get_size(enum j1939_sk_errqueue_type type)
{
- return
- nla_total_size(sizeof(u32)) + /* J1939_NLA_BYTES_ACKED */
- 0;
+ switch (type) {
+ case J1939_ERRQUEUE_RX_RTS:
+ return
+ nla_total_size(sizeof(u32)) + /* J1939_NLA_TOTAL_SIZE */
+ nla_total_size(sizeof(u32)) + /* J1939_NLA_PGN */
+ nla_total_size(sizeof(u64)) + /* J1939_NLA_SRC_NAME */
+ nla_total_size(sizeof(u64)) + /* J1939_NLA_DEST_NAME */
+ nla_total_size(sizeof(u8)) + /* J1939_NLA_SRC_ADDR */
+ nla_total_size(sizeof(u8)) + /* J1939_NLA_DEST_ADDR */
+ 0;
+ default:
+ return
+ nla_total_size(sizeof(u32)) + /* J1939_NLA_BYTES_ACKED */
+ 0;
+ }
}
static struct sk_buff *
-j1939_sk_get_timestamping_opt_stats(struct j1939_session *session)
+j1939_sk_get_timestamping_opt_stats(struct j1939_session *session,
+ enum j1939_sk_errqueue_type type)
{
struct sk_buff *stats;
u32 size;
- stats = alloc_skb(j1939_sk_opt_stats_get_size(), GFP_ATOMIC);
+ stats = alloc_skb(j1939_sk_opt_stats_get_size(type), GFP_ATOMIC);
if (!stats)
return NULL;
@@ -925,32 +941,67 @@ j1939_sk_get_timestamping_opt_stats(struct j1939_session *session)
size = min(session->pkt.tx_acked * 7,
session->total_message_size);
- nla_put_u32(stats, J1939_NLA_BYTES_ACKED, size);
+ switch (type) {
+ case J1939_ERRQUEUE_RX_RTS:
+ nla_put_u32(stats, J1939_NLA_TOTAL_SIZE,
+ session->total_message_size);
+ nla_put_u32(stats, J1939_NLA_PGN,
+ session->skcb.addr.pgn);
+ nla_put_u64_64bit(stats, J1939_NLA_SRC_NAME,
+ session->skcb.addr.src_name, J1939_NLA_PAD);
+ nla_put_u64_64bit(stats, J1939_NLA_DEST_NAME,
+ session->skcb.addr.dst_name, J1939_NLA_PAD);
+ nla_put_u8(stats, J1939_NLA_SRC_ADDR,
+ session->skcb.addr.sa);
+ nla_put_u8(stats, J1939_NLA_DEST_ADDR,
+ session->skcb.addr.da);
+ break;
+ default:
+ nla_put_u32(stats, J1939_NLA_BYTES_ACKED, size);
+ }
return stats;
}
-void j1939_sk_errqueue(struct j1939_session *session,
- enum j1939_sk_errqueue_type type)
+static void __j1939_sk_errqueue(struct j1939_session *session, struct sock *sk,
+ enum j1939_sk_errqueue_type type)
{
struct j1939_priv *priv = session->priv;
- struct sock *sk = session->sk;
struct j1939_sock *jsk;
struct sock_exterr_skb *serr;
struct sk_buff *skb;
char *state = "UNK";
int err;
- /* currently we have no sk for the RX session */
- if (!sk)
- return;
-
jsk = j1939_sk(sk);
if (!(jsk->state & J1939_SOCK_ERRQUEUE))
return;
- skb = j1939_sk_get_timestamping_opt_stats(session);
+ switch (type) {
+ case J1939_ERRQUEUE_TX_ACK:
+ if (!(sk->sk_tsflags & SOF_TIMESTAMPING_TX_ACK))
+ return;
+ break;
+ case J1939_ERRQUEUE_TX_SCHED:
+ if (!(sk->sk_tsflags & SOF_TIMESTAMPING_TX_SCHED))
+ return;
+ break;
+ case J1939_ERRQUEUE_TX_ABORT:
+ break;
+ case J1939_ERRQUEUE_RX_RTS:
+ fallthrough;
+ case J1939_ERRQUEUE_RX_DPO:
+ fallthrough;
+ case J1939_ERRQUEUE_RX_ABORT:
+ if (!(sk->sk_tsflags & SOF_TIMESTAMPING_RX_SOFTWARE))
+ return;
+ break;
+ default:
+ netdev_err(priv->ndev, "Unknown errqueue type %i\n", type);
+ }
+
+ skb = j1939_sk_get_timestamping_opt_stats(session, type);
if (!skb)
return;
@@ -961,36 +1012,42 @@ void j1939_sk_errqueue(struct j1939_session *session,
serr = SKB_EXT_ERR(skb);
memset(serr, 0, sizeof(*serr));
switch (type) {
- case J1939_ERRQUEUE_ACK:
- if (!(sk->sk_tsflags & SOF_TIMESTAMPING_TX_ACK)) {
- kfree_skb(skb);
- return;
- }
-
+ case J1939_ERRQUEUE_TX_ACK:
serr->ee.ee_errno = ENOMSG;
serr->ee.ee_origin = SO_EE_ORIGIN_TIMESTAMPING;
serr->ee.ee_info = SCM_TSTAMP_ACK;
- state = "ACK";
+ state = "TX ACK";
break;
- case J1939_ERRQUEUE_SCHED:
- if (!(sk->sk_tsflags & SOF_TIMESTAMPING_TX_SCHED)) {
- kfree_skb(skb);
- return;
- }
-
+ case J1939_ERRQUEUE_TX_SCHED:
serr->ee.ee_errno = ENOMSG;
serr->ee.ee_origin = SO_EE_ORIGIN_TIMESTAMPING;
serr->ee.ee_info = SCM_TSTAMP_SCHED;
- state = "SCH";
+ state = "TX SCH";
break;
- case J1939_ERRQUEUE_ABORT:
+ case J1939_ERRQUEUE_TX_ABORT:
serr->ee.ee_errno = session->err;
serr->ee.ee_origin = SO_EE_ORIGIN_LOCAL;
serr->ee.ee_info = J1939_EE_INFO_TX_ABORT;
- state = "ABT";
+ state = "TX ABT";
+ break;
+ case J1939_ERRQUEUE_RX_RTS:
+ serr->ee.ee_errno = ENOMSG;
+ serr->ee.ee_origin = SO_EE_ORIGIN_LOCAL;
+ serr->ee.ee_info = J1939_EE_INFO_RX_RTS;
+ state = "RX RTS";
+ break;
+ case J1939_ERRQUEUE_RX_DPO:
+ serr->ee.ee_errno = ENOMSG;
+ serr->ee.ee_origin = SO_EE_ORIGIN_LOCAL;
+ serr->ee.ee_info = J1939_EE_INFO_RX_DPO;
+ state = "RX DPO";
+ break;
+ case J1939_ERRQUEUE_RX_ABORT:
+ serr->ee.ee_errno = session->err;
+ serr->ee.ee_origin = SO_EE_ORIGIN_LOCAL;
+ serr->ee.ee_info = J1939_EE_INFO_RX_ABORT;
+ state = "RX ABT";
break;
- default:
- netdev_err(priv->ndev, "Unknown errqueue type %i\n", type);
}
serr->opt_stats = true;
@@ -1005,11 +1062,32 @@ void j1939_sk_errqueue(struct j1939_session *session,
kfree_skb(skb);
};
+void j1939_sk_errqueue(struct j1939_session *session,
+ enum j1939_sk_errqueue_type type)
+{
+ struct j1939_priv *priv = session->priv;
+ struct j1939_sock *jsk;
+
+ if (session->sk) {
+ /* send TX notifications to the socket of origin */
+ __j1939_sk_errqueue(session, session->sk, type);
+ return;
+ }
+
+ /* spread RX notifications to all sockets subscribed to this session */
+ spin_lock_bh(&priv->j1939_socks_lock);
+ list_for_each_entry(jsk, &priv->j1939_socks, list) {
+ if (j1939_sk_recv_match_one(jsk, &session->skcb))
+ __j1939_sk_errqueue(session, &jsk->sk, type);
+ }
+ spin_unlock_bh(&priv->j1939_socks_lock);
+};
+
void j1939_sk_send_loop_abort(struct sock *sk, int err)
{
sk->sk_err = err;
- sk->sk_error_report(sk);
+ sk_error_report(sk);
}
static int j1939_sk_send_loop(struct j1939_priv *priv, struct sock *sk,
@@ -1189,7 +1267,7 @@ void j1939_sk_netdev_event_netdown(struct j1939_priv *priv)
list_for_each_entry(jsk, &priv->j1939_socks, list) {
jsk->sk.sk_err = error_code;
if (!sock_flag(&jsk->sk, SOCK_DEAD))
- jsk->sk.sk_error_report(&jsk->sk);
+ sk_error_report(&jsk->sk);
j1939_sk_queue_drop_all(priv, jsk, error_code);
}
diff --git a/net/can/j1939/transport.c b/net/can/j1939/transport.c
index c3946c355882..bb5c4b8979be 100644
--- a/net/can/j1939/transport.c
+++ b/net/can/j1939/transport.c
@@ -260,10 +260,14 @@ static void __j1939_session_drop(struct j1939_session *session)
static void j1939_session_destroy(struct j1939_session *session)
{
- if (session->err)
- j1939_sk_errqueue(session, J1939_ERRQUEUE_ABORT);
- else
- j1939_sk_errqueue(session, J1939_ERRQUEUE_ACK);
+ if (session->transmission) {
+ if (session->err)
+ j1939_sk_errqueue(session, J1939_ERRQUEUE_TX_ABORT);
+ else
+ j1939_sk_errqueue(session, J1939_ERRQUEUE_TX_ACK);
+ } else if (session->err) {
+ j1939_sk_errqueue(session, J1939_ERRQUEUE_RX_ABORT);
+ }
netdev_dbg(session->priv->ndev, "%s: 0x%p\n", __func__, session);
@@ -776,7 +780,7 @@ static int j1939_session_tx_dpo(struct j1939_session *session)
static int j1939_session_tx_dat(struct j1939_session *session)
{
struct j1939_priv *priv = session->priv;
- struct j1939_sk_buff_cb *skcb;
+ struct j1939_sk_buff_cb *se_skcb;
int offset, pkt_done, pkt_end;
unsigned int len, pdelay;
struct sk_buff *se_skb;
@@ -788,7 +792,7 @@ static int j1939_session_tx_dat(struct j1939_session *session)
if (!se_skb)
return -ENOBUFS;
- skcb = j1939_skb_to_cb(se_skb);
+ se_skcb = j1939_skb_to_cb(se_skb);
tpdat = se_skb->data;
ret = 0;
pkt_done = 0;
@@ -800,7 +804,7 @@ static int j1939_session_tx_dat(struct j1939_session *session)
while (session->pkt.tx < pkt_end) {
dat[0] = session->pkt.tx - session->pkt.dpo + 1;
- offset = (session->pkt.tx * 7) - skcb->offset;
+ offset = (session->pkt.tx * 7) - se_skcb->offset;
len = se_skb->len - offset;
if (len > 7)
len = 7;
@@ -808,7 +812,8 @@ static int j1939_session_tx_dat(struct j1939_session *session)
if (offset + len > se_skb->len) {
netdev_err_once(priv->ndev,
"%s: 0x%p: requested data outside of queued buffer: offset %i, len %i, pkt.tx: %i\n",
- __func__, session, skcb->offset, se_skb->len , session->pkt.tx);
+ __func__, session, se_skcb->offset,
+ se_skb->len , session->pkt.tx);
ret = -EOVERFLOW;
goto out_free;
}
@@ -821,7 +826,7 @@ static int j1939_session_tx_dat(struct j1939_session *session)
memcpy(&dat[1], &tpdat[offset], len);
ret = j1939_tp_tx_dat(session, dat, len + 1);
if (ret < 0) {
- /* ENOBUS == CAN interface TX queue is full */
+ /* ENOBUFS == CAN interface TX queue is full */
if (ret != -ENOBUFS)
netdev_alert(priv->ndev,
"%s: 0x%p: queue data error: %i\n",
@@ -1043,7 +1048,7 @@ static int j1939_simple_txnext(struct j1939_session *session)
if (ret)
goto out_free;
- j1939_sk_errqueue(session, J1939_ERRQUEUE_SCHED);
+ j1939_sk_errqueue(session, J1939_ERRQUEUE_TX_SCHED);
j1939_sk_queue_activate_next(session);
out_free:
@@ -1075,11 +1080,16 @@ static bool j1939_session_deactivate_locked(struct j1939_session *session)
static bool j1939_session_deactivate(struct j1939_session *session)
{
+ struct j1939_priv *priv = session->priv;
bool active;
- j1939_session_list_lock(session->priv);
+ j1939_session_list_lock(priv);
+ /* This function should be called with a session ref-count of at
+ * least 2.
+ */
+ WARN_ON_ONCE(kref_read(&session->kref) < 2);
active = j1939_session_deactivate_locked(session);
- j1939_session_list_unlock(session->priv);
+ j1939_session_list_unlock(priv);
return active;
}
@@ -1092,7 +1102,7 @@ j1939_session_deactivate_activate_next(struct j1939_session *session)
}
static void __j1939_session_cancel(struct j1939_session *session,
- enum j1939_xtp_abort err)
+ enum j1939_xtp_abort err)
{
struct j1939_priv *priv = session->priv;
@@ -1110,6 +1120,8 @@ static void __j1939_session_cancel(struct j1939_session *session,
if (session->sk)
j1939_sk_send_loop_abort(session->sk, session->err);
+ else
+ j1939_sk_errqueue(session, J1939_ERRQUEUE_RX_ABORT);
}
static void j1939_session_cancel(struct j1939_session *session,
@@ -1190,13 +1202,13 @@ static enum hrtimer_restart j1939_tp_txtimer(struct hrtimer *hrtimer)
static void j1939_session_completed(struct j1939_session *session)
{
- struct sk_buff *skb;
+ struct sk_buff *se_skb;
if (!session->transmission) {
- skb = j1939_session_skb_get(session);
+ se_skb = j1939_session_skb_get(session);
/* distribute among j1939 receivers */
- j1939_sk_recv(session->priv, skb);
- consume_skb(skb);
+ j1939_sk_recv(session->priv, se_skb);
+ consume_skb(se_skb);
}
j1939_session_deactivate_activate_next(session);
@@ -1263,12 +1275,14 @@ static bool j1939_xtp_rx_cmd_bad_pgn(struct j1939_session *session,
break;
case J1939_ETP_CMD_RTS:
- case J1939_TP_CMD_RTS: /* fall through */
+ fallthrough;
+ case J1939_TP_CMD_RTS:
abort = J1939_XTP_ABORT_BUSY;
break;
case J1939_ETP_CMD_CTS:
- case J1939_TP_CMD_CTS: /* fall through */
+ fallthrough;
+ case J1939_TP_CMD_CTS:
abort = J1939_XTP_ABORT_ECTS_UNXPECTED_PGN;
break;
@@ -1277,7 +1291,8 @@ static bool j1939_xtp_rx_cmd_bad_pgn(struct j1939_session *session,
break;
case J1939_ETP_CMD_EOMA:
- case J1939_TP_CMD_EOMA: /* fall through */
+ fallthrough;
+ case J1939_TP_CMD_EOMA:
abort = J1939_XTP_ABORT_OTHER;
break;
@@ -1321,6 +1336,8 @@ static void j1939_xtp_rx_abort_one(struct j1939_priv *priv, struct sk_buff *skb,
session->err = j1939_xtp_abort_to_errno(priv, abort);
if (session->sk)
j1939_sk_send_loop_abort(session->sk, session->err);
+ else
+ j1939_sk_errqueue(session, J1939_ERRQUEUE_RX_ABORT);
j1939_session_deactivate_activate_next(session);
abort_put:
@@ -1429,7 +1446,7 @@ j1939_xtp_rx_cts_one(struct j1939_session *session, struct sk_buff *skb)
if (session->transmission) {
if (session->pkt.tx_acked)
j1939_sk_errqueue(session,
- J1939_ERRQUEUE_SCHED);
+ J1939_ERRQUEUE_TX_SCHED);
j1939_session_txtimer_cancel(session);
j1939_tp_schedule_txtimer(session, 0);
}
@@ -1621,6 +1638,9 @@ j1939_session *j1939_xtp_rx_rts_session_new(struct j1939_priv *priv,
session->pkt.rx = 0;
session->pkt.tx = 0;
+ session->tskey = priv->rx_tskey++;
+ j1939_sk_errqueue(session, J1939_ERRQUEUE_RX_RTS);
+
WARN_ON_ONCE(j1939_session_activate(session));
return session;
@@ -1743,6 +1763,9 @@ static void j1939_xtp_rx_dpo_one(struct j1939_session *session,
session->pkt.dpo = j1939_etp_ctl_to_packet(skb->data);
session->last_cmd = dat[0];
j1939_tp_set_rxtimeout(session, 750);
+
+ if (!session->transmission)
+ j1939_sk_errqueue(session, J1939_ERRQUEUE_RX_DPO);
}
static void j1939_xtp_rx_dpo(struct j1939_priv *priv, struct sk_buff *skb,
@@ -1767,7 +1790,7 @@ static void j1939_xtp_rx_dat_one(struct j1939_session *session,
struct sk_buff *skb)
{
struct j1939_priv *priv = session->priv;
- struct j1939_sk_buff_cb *skcb;
+ struct j1939_sk_buff_cb *skcb, *se_skcb;
struct sk_buff *se_skb = NULL;
const u8 *dat;
u8 *tpdat;
@@ -1792,7 +1815,8 @@ static void j1939_xtp_rx_dat_one(struct j1939_session *session,
break;
fallthrough;
case J1939_TP_CMD_BAM:
- case J1939_TP_CMD_CTS: /* fall through */
+ fallthrough;
+ case J1939_TP_CMD_CTS:
if (skcb->addr.type != J1939_ETP)
break;
fallthrough;
@@ -1817,8 +1841,8 @@ static void j1939_xtp_rx_dat_one(struct j1939_session *session,
goto out_session_cancel;
}
- skcb = j1939_skb_to_cb(se_skb);
- offset = packet * 7 - skcb->offset;
+ se_skcb = j1939_skb_to_cb(se_skb);
+ offset = packet * 7 - se_skcb->offset;
nbytes = se_skb->len - offset;
if (nbytes > 7)
nbytes = 7;
@@ -1846,7 +1870,7 @@ static void j1939_xtp_rx_dat_one(struct j1939_session *session,
if (packet == session->pkt.rx)
session->pkt.rx++;
- if (skcb->addr.type != J1939_ETP &&
+ if (se_skcb->addr.type != J1939_ETP &&
j1939_cb_is_broadcast(&session->skcb)) {
if (session->pkt.rx >= session->pkt.total)
final = true;
@@ -1869,7 +1893,7 @@ static void j1939_xtp_rx_dat_one(struct j1939_session *session,
if (!session->transmission)
j1939_tp_schedule_txtimer(session, 0);
} else {
- j1939_tp_set_rxtimeout(session, 250);
+ j1939_tp_set_rxtimeout(session, 750);
}
session->last_cmd = 0xff;
consume_skb(se_skb);
@@ -1995,7 +2019,8 @@ static void j1939_tp_cmd_recv(struct j1939_priv *priv, struct sk_buff *skb)
extd = J1939_ETP;
fallthrough;
case J1939_TP_CMD_BAM:
- case J1939_TP_CMD_RTS: /* fall through */
+ fallthrough;
+ case J1939_TP_CMD_RTS:
if (skcb->addr.type != extd)
return;
diff --git a/net/can/proc.c b/net/can/proc.c
index d1fe49e6f16d..b3099f0a3cb8 100644
--- a/net/can/proc.c
+++ b/net/can/proc.c
@@ -99,8 +99,6 @@ static void can_init_stats(struct net *net)
static unsigned long calc_rate(unsigned long oldjif, unsigned long newjif,
unsigned long count)
{
- unsigned long rate;
-
if (oldjif == newjif)
return 0;
@@ -111,9 +109,7 @@ static unsigned long calc_rate(unsigned long oldjif, unsigned long newjif,
return 99999999;
}
- rate = (count * HZ) / (newjif - oldjif);
-
- return rate;
+ return (count * HZ) / (newjif - oldjif);
}
void can_stat_update(struct timer_list *t)
diff --git a/net/can/raw.c b/net/can/raw.c
index ac96fc210025..7105fa4824e4 100644
--- a/net/can/raw.c
+++ b/net/can/raw.c
@@ -295,13 +295,13 @@ static void raw_notify(struct raw_sock *ro, unsigned long msg,
sk->sk_err = ENODEV;
if (!sock_flag(sk, SOCK_DEAD))
- sk->sk_error_report(sk);
+ sk_error_report(sk);
break;
case NETDEV_DOWN:
sk->sk_err = ENETDOWN;
if (!sock_flag(sk, SOCK_DEAD))
- sk->sk_error_report(sk);
+ sk_error_report(sk);
break;
}
}
@@ -488,7 +488,7 @@ static int raw_bind(struct socket *sock, struct sockaddr *uaddr, int len)
if (notify_enetdown) {
sk->sk_err = ENETDOWN;
if (!sock_flag(sk, SOCK_DEAD))
- sk->sk_error_report(sk);
+ sk_error_report(sk);
}
return err;
@@ -546,10 +546,18 @@ static int raw_setsockopt(struct socket *sock, int level, int optname,
return -EFAULT;
}
+ rtnl_lock();
lock_sock(sk);
- if (ro->bound && ro->ifindex)
+ if (ro->bound && ro->ifindex) {
dev = dev_get_by_index(sock_net(sk), ro->ifindex);
+ if (!dev) {
+ if (count > 1)
+ kfree(filter);
+ err = -ENODEV;
+ goto out_fil;
+ }
+ }
if (ro->bound) {
/* (try to) register the new filters */
@@ -584,10 +592,9 @@ static int raw_setsockopt(struct socket *sock, int level, int optname,
ro->count = count;
out_fil:
- if (dev)
- dev_put(dev);
-
+ dev_put(dev);
release_sock(sk);
+ rtnl_unlock();
break;
@@ -600,10 +607,16 @@ static int raw_setsockopt(struct socket *sock, int level, int optname,
err_mask &= CAN_ERR_MASK;
+ rtnl_lock();
lock_sock(sk);
- if (ro->bound && ro->ifindex)
+ if (ro->bound && ro->ifindex) {
dev = dev_get_by_index(sock_net(sk), ro->ifindex);
+ if (!dev) {
+ err = -ENODEV;
+ goto out_err;
+ }
+ }
/* remove current error mask */
if (ro->bound) {
@@ -623,10 +636,9 @@ static int raw_setsockopt(struct socket *sock, int level, int optname,
ro->err_mask = err_mask;
out_err:
- if (dev)
- dev_put(dev);
-
+ dev_put(dev);
release_sock(sk);
+ rtnl_unlock();
break;
diff --git a/net/ceph/auth.c b/net/ceph/auth.c
index d2b268a1838e..d38c9eadbe2f 100644
--- a/net/ceph/auth.c
+++ b/net/ceph/auth.c
@@ -58,12 +58,10 @@ struct ceph_auth_client *ceph_auth_init(const char *name,
const int *con_modes)
{
struct ceph_auth_client *ac;
- int ret;
- ret = -ENOMEM;
ac = kzalloc(sizeof(*ac), GFP_NOFS);
if (!ac)
- goto out;
+ return ERR_PTR(-ENOMEM);
mutex_init(&ac->mutex);
ac->negotiating = true;
@@ -78,9 +76,6 @@ struct ceph_auth_client *ceph_auth_init(const char *name,
dout("%s name '%s' preferred_mode %d fallback_mode %d\n", __func__,
ac->name, ac->preferred_mode, ac->fallback_mode);
return ac;
-
-out:
- return ERR_PTR(ret);
}
void ceph_auth_destroy(struct ceph_auth_client *ac)
diff --git a/net/ceph/auth_none.c b/net/ceph/auth_none.c
index 097e9f8d87a7..77b5519bc45f 100644
--- a/net/ceph/auth_none.c
+++ b/net/ceph/auth_none.c
@@ -112,8 +112,8 @@ static int ceph_auth_none_create_authorizer(
auth->authorizer = (struct ceph_authorizer *) au;
auth->authorizer_buf = au->buf;
auth->authorizer_buf_len = au->buf_len;
- auth->authorizer_reply_buf = au->reply_buf;
- auth->authorizer_reply_buf_len = sizeof (au->reply_buf);
+ auth->authorizer_reply_buf = NULL;
+ auth->authorizer_reply_buf_len = 0;
return 0;
}
diff --git a/net/ceph/auth_none.h b/net/ceph/auth_none.h
index 4158f064302e..bb121539e796 100644
--- a/net/ceph/auth_none.h
+++ b/net/ceph/auth_none.h
@@ -16,7 +16,6 @@ struct ceph_none_authorizer {
struct ceph_authorizer base;
char buf[128];
int buf_len;
- char reply_buf[0];
};
struct ceph_auth_none_info {
diff --git a/net/ceph/auth_x_protocol.h b/net/ceph/auth_x_protocol.h
index 792fcb974dc3..9c60feeb1bcb 100644
--- a/net/ceph/auth_x_protocol.h
+++ b/net/ceph/auth_x_protocol.h
@@ -87,7 +87,7 @@ struct ceph_x_authorize_reply {
/*
- * encyption bundle
+ * encryption bundle
*/
#define CEPHX_ENC_MAGIC 0xff009cad8826aa55ull
diff --git a/net/ceph/cls_lock_client.c b/net/ceph/cls_lock_client.c
index 17447c19d937..66136a4c1ce7 100644
--- a/net/ceph/cls_lock_client.c
+++ b/net/ceph/cls_lock_client.c
@@ -10,7 +10,9 @@
/**
* ceph_cls_lock - grab rados lock for object
- * @oid, @oloc: object to lock
+ * @osdc: OSD client instance
+ * @oid: object to lock
+ * @oloc: object to lock
* @lock_name: the name of the lock
* @type: lock type (CEPH_CLS_LOCK_EXCLUSIVE or CEPH_CLS_LOCK_SHARED)
* @cookie: user-defined identifier for this instance of the lock
@@ -82,7 +84,9 @@ EXPORT_SYMBOL(ceph_cls_lock);
/**
* ceph_cls_unlock - release rados lock for object
- * @oid, @oloc: object to lock
+ * @osdc: OSD client instance
+ * @oid: object to lock
+ * @oloc: object to lock
* @lock_name: the name of the lock
* @cookie: user-defined identifier for this instance of the lock
*/
@@ -130,7 +134,9 @@ EXPORT_SYMBOL(ceph_cls_unlock);
/**
* ceph_cls_break_lock - release rados lock for object for specified client
- * @oid, @oloc: object to lock
+ * @osdc: OSD client instance
+ * @oid: object to lock
+ * @oloc: object to lock
* @lock_name: the name of the lock
* @cookie: user-defined identifier for this instance of the lock
* @locker: current lock owner
diff --git a/net/ceph/mon_client.c b/net/ceph/mon_client.c
index 195ceb8afb06..013cbdb6cfe2 100644
--- a/net/ceph/mon_client.c
+++ b/net/ceph/mon_client.c
@@ -1508,7 +1508,7 @@ static struct ceph_msg *mon_alloc_msg(struct ceph_connection *con,
return get_generic_reply(con, hdr, skip);
/*
- * Older OSDs don't set reply tid even if the orignal
+ * Older OSDs don't set reply tid even if the original
* request had a non-zero tid. Work around this weirdness
* by allocating a new message.
*/
diff --git a/net/ceph/osdmap.c b/net/ceph/osdmap.c
index c959320c4775..75b738083523 100644
--- a/net/ceph/osdmap.c
+++ b/net/ceph/osdmap.c
@@ -1309,7 +1309,7 @@ static int get_osdmap_client_data_v(void **p, void *end,
return -EINVAL;
}
- /* old osdmap enconding */
+ /* old osdmap encoding */
struct_v = 0;
}
@@ -3010,7 +3010,7 @@ static bool is_valid_crush_name(const char *name)
* parent, returns 0.
*
* Does a linear search, as there are no parent pointers of any
- * kind. Note that the result is ambigous for items that occur
+ * kind. Note that the result is ambiguous for items that occur
* multiple times in the map.
*/
static int get_immediate_parent(struct crush_map *c, int id,
diff --git a/net/core/Makefile b/net/core/Makefile
index f7f16650fe9e..35ced6201814 100644
--- a/net/core/Makefile
+++ b/net/core/Makefile
@@ -33,8 +33,6 @@ obj-$(CONFIG_HWBM) += hwbm.o
obj-$(CONFIG_NET_DEVLINK) += devlink.o
obj-$(CONFIG_GRO_CELLS) += gro_cells.o
obj-$(CONFIG_FAILOVER) += failover.o
-ifeq ($(CONFIG_INET),y)
obj-$(CONFIG_NET_SOCK_MSG) += skmsg.o
obj-$(CONFIG_BPF_SYSCALL) += sock_map.o
-endif
obj-$(CONFIG_BPF_SYSCALL) += bpf_sk_storage.o
diff --git a/net/core/bpf_sk_storage.c b/net/core/bpf_sk_storage.c
index cc3712ad8716..68d2cbf8331a 100644
--- a/net/core/bpf_sk_storage.c
+++ b/net/core/bpf_sk_storage.c
@@ -416,7 +416,7 @@ static bool bpf_sk_storage_tracing_allowed(const struct bpf_prog *prog)
BPF_CALL_4(bpf_sk_storage_get_tracing, struct bpf_map *, map, struct sock *, sk,
void *, value, u64, flags)
{
- if (in_irq() || in_nmi())
+ if (in_hardirq() || in_nmi())
return (unsigned long)NULL;
return (unsigned long)____bpf_sk_storage_get(map, sk, value, flags);
@@ -425,7 +425,7 @@ BPF_CALL_4(bpf_sk_storage_get_tracing, struct bpf_map *, map, struct sock *, sk,
BPF_CALL_2(bpf_sk_storage_delete_tracing, struct bpf_map *, map,
struct sock *, sk)
{
- if (in_irq() || in_nmi())
+ if (in_hardirq() || in_nmi())
return -EPERM;
return ____bpf_sk_storage_delete(map, sk);
@@ -524,8 +524,7 @@ bpf_sk_storage_diag_alloc(const struct nlattr *nla_stgs)
nr_maps++;
}
- diag = kzalloc(sizeof(*diag) + sizeof(diag->maps[0]) * nr_maps,
- GFP_KERNEL);
+ diag = kzalloc(struct_size(diag, maps, nr_maps), GFP_KERNEL);
if (!diag)
return ERR_PTR(-ENOMEM);
diff --git a/net/core/dev.c b/net/core/dev.c
index ef8cf7619baf..74fd402d26dd 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -131,6 +131,7 @@
#include <trace/events/napi.h>
#include <trace/events/net.h>
#include <trace/events/skb.h>
+#include <trace/events/qdisc.h>
#include <linux/inetdevice.h>
#include <linux/cpu_rmap.h>
#include <linux/static_key.h>
@@ -148,6 +149,7 @@
#include <net/devlink.h>
#include <linux/pm_runtime.h>
#include <linux/prandom.h>
+#include <linux/once_lite.h>
#include "net-sysfs.h"
@@ -674,131 +676,6 @@ void dev_remove_offload(struct packet_offload *po)
}
EXPORT_SYMBOL(dev_remove_offload);
-/******************************************************************************
- *
- * Device Boot-time Settings Routines
- *
- ******************************************************************************/
-
-/* Boot time configuration table */
-static struct netdev_boot_setup dev_boot_setup[NETDEV_BOOT_SETUP_MAX];
-
-/**
- * netdev_boot_setup_add - add new setup entry
- * @name: name of the device
- * @map: configured settings for the device
- *
- * Adds new setup entry to the dev_boot_setup list. The function
- * returns 0 on error and 1 on success. This is a generic routine to
- * all netdevices.
- */
-static int netdev_boot_setup_add(char *name, struct ifmap *map)
-{
- struct netdev_boot_setup *s;
- int i;
-
- s = dev_boot_setup;
- for (i = 0; i < NETDEV_BOOT_SETUP_MAX; i++) {
- if (s[i].name[0] == '\0' || s[i].name[0] == ' ') {
- memset(s[i].name, 0, sizeof(s[i].name));
- strlcpy(s[i].name, name, IFNAMSIZ);
- memcpy(&s[i].map, map, sizeof(s[i].map));
- break;
- }
- }
-
- return i >= NETDEV_BOOT_SETUP_MAX ? 0 : 1;
-}
-
-/**
- * netdev_boot_setup_check - check boot time settings
- * @dev: the netdevice
- *
- * Check boot time settings for the device.
- * The found settings are set for the device to be used
- * later in the device probing.
- * Returns 0 if no settings found, 1 if they are.
- */
-int netdev_boot_setup_check(struct net_device *dev)
-{
- struct netdev_boot_setup *s = dev_boot_setup;
- int i;
-
- for (i = 0; i < NETDEV_BOOT_SETUP_MAX; i++) {
- if (s[i].name[0] != '\0' && s[i].name[0] != ' ' &&
- !strcmp(dev->name, s[i].name)) {
- dev->irq = s[i].map.irq;
- dev->base_addr = s[i].map.base_addr;
- dev->mem_start = s[i].map.mem_start;
- dev->mem_end = s[i].map.mem_end;
- return 1;
- }
- }
- return 0;
-}
-EXPORT_SYMBOL(netdev_boot_setup_check);
-
-
-/**
- * netdev_boot_base - get address from boot time settings
- * @prefix: prefix for network device
- * @unit: id for network device
- *
- * Check boot time settings for the base address of device.
- * The found settings are set for the device to be used
- * later in the device probing.
- * Returns 0 if no settings found.
- */
-unsigned long netdev_boot_base(const char *prefix, int unit)
-{
- const struct netdev_boot_setup *s = dev_boot_setup;
- char name[IFNAMSIZ];
- int i;
-
- sprintf(name, "%s%d", prefix, unit);
-
- /*
- * If device already registered then return base of 1
- * to indicate not to probe for this interface
- */
- if (__dev_get_by_name(&init_net, name))
- return 1;
-
- for (i = 0; i < NETDEV_BOOT_SETUP_MAX; i++)
- if (!strcmp(name, s[i].name))
- return s[i].map.base_addr;
- return 0;
-}
-
-/*
- * Saves at boot time configured settings for any netdevice.
- */
-int __init netdev_boot_setup(char *str)
-{
- int ints[5];
- struct ifmap map;
-
- str = get_options(str, ARRAY_SIZE(ints), ints);
- if (!str || !*str)
- return 0;
-
- /* Save settings */
- memset(&map, 0, sizeof(map));
- if (ints[0] > 0)
- map.irq = ints[1];
- if (ints[0] > 1)
- map.base_addr = ints[2];
- if (ints[0] > 2)
- map.mem_start = ints[3];
- if (ints[0] > 3)
- map.mem_end = ints[4];
-
- /* Add new entry to the list */
- return netdev_boot_setup_add(str, &map);
-}
-
-__setup("netdev=", netdev_boot_setup);
-
/*******************************************************************************
*
* Device Interface Subroutines
@@ -954,8 +831,7 @@ struct net_device *dev_get_by_name(struct net *net, const char *name)
rcu_read_lock();
dev = dev_get_by_name_rcu(net, name);
- if (dev)
- dev_hold(dev);
+ dev_hold(dev);
rcu_read_unlock();
return dev;
}
@@ -1028,8 +904,7 @@ struct net_device *dev_get_by_index(struct net *net, int ifindex)
rcu_read_lock();
dev = dev_get_by_index_rcu(net, ifindex);
- if (dev)
- dev_hold(dev);
+ dev_hold(dev);
rcu_read_unlock();
return dev;
}
@@ -3097,6 +2972,50 @@ EXPORT_SYMBOL(netif_set_real_num_rx_queues);
#endif
/**
+ * netif_set_real_num_queues - set actual number of RX and TX queues used
+ * @dev: Network device
+ * @txq: Actual number of TX queues
+ * @rxq: Actual number of RX queues
+ *
+ * Set the real number of both TX and RX queues.
+ * Does nothing if the number of queues is already correct.
+ */
+int netif_set_real_num_queues(struct net_device *dev,
+ unsigned int txq, unsigned int rxq)
+{
+ unsigned int old_rxq = dev->real_num_rx_queues;
+ int err;
+
+ if (txq < 1 || txq > dev->num_tx_queues ||
+ rxq < 1 || rxq > dev->num_rx_queues)
+ return -EINVAL;
+
+ /* Start from increases, so the error path only does decreases -
+ * decreases can't fail.
+ */
+ if (rxq > dev->real_num_rx_queues) {
+ err = netif_set_real_num_rx_queues(dev, rxq);
+ if (err)
+ return err;
+ }
+ if (txq > dev->real_num_tx_queues) {
+ err = netif_set_real_num_tx_queues(dev, txq);
+ if (err)
+ goto undo_rx;
+ }
+ if (rxq < dev->real_num_rx_queues)
+ WARN_ON(netif_set_real_num_rx_queues(dev, rxq));
+ if (txq < dev->real_num_tx_queues)
+ WARN_ON(netif_set_real_num_tx_queues(dev, txq));
+
+ return 0;
+undo_rx:
+ WARN_ON(netif_set_real_num_rx_queues(dev, old_rxq));
+ return err;
+}
+EXPORT_SYMBOL(netif_set_real_num_queues);
+
+/**
* netif_get_num_default_rss_queues - default number of RSS queues
*
* This routine should set an upper limit on the number of RSS queues
@@ -3188,7 +3107,7 @@ EXPORT_SYMBOL(__dev_kfree_skb_irq);
void __dev_kfree_skb_any(struct sk_buff *skb, enum skb_free_reason reason)
{
- if (in_irq() || irqs_disabled())
+ if (in_hardirq() || irqs_disabled())
__dev_kfree_skb_irq(skb, reason);
else
dev_kfree_skb(skb);
@@ -3487,13 +3406,16 @@ EXPORT_SYMBOL(__skb_gso_segment);
/* Take action when hardware reception checksum errors are detected. */
#ifdef CONFIG_BUG
+static void do_netdev_rx_csum_fault(struct net_device *dev, struct sk_buff *skb)
+{
+ pr_err("%s: hw csum failure\n", dev ? dev->name : "<unknown>");
+ skb_dump(KERN_ERR, skb, true);
+ dump_stack();
+}
+
void netdev_rx_csum_fault(struct net_device *dev, struct sk_buff *skb)
{
- if (net_ratelimit()) {
- pr_err("%s: hw csum failure\n", dev ? dev->name : "<unknown>");
- skb_dump(KERN_ERR, skb, true);
- dump_stack();
- }
+ DO_ONCE_LITE(do_netdev_rx_csum_fault, dev, skb);
}
EXPORT_SYMBOL(netdev_rx_csum_fault);
#endif
@@ -3840,6 +3762,18 @@ static void qdisc_pkt_len_init(struct sk_buff *skb)
}
}
+static int dev_qdisc_enqueue(struct sk_buff *skb, struct Qdisc *q,
+ struct sk_buff **to_free,
+ struct netdev_queue *txq)
+{
+ int rc;
+
+ rc = q->enqueue(skb, q, to_free) & NET_XMIT_MASK;
+ if (rc == NET_XMIT_SUCCESS)
+ trace_qdisc_enqueue(q, txq, skb);
+ return rc;
+}
+
static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q,
struct net_device *dev,
struct netdev_queue *txq)
@@ -3852,10 +3786,32 @@ static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q,
qdisc_calculate_pkt_len(skb, q);
if (q->flags & TCQ_F_NOLOCK) {
- rc = q->enqueue(skb, q, &to_free) & NET_XMIT_MASK;
- if (likely(!netif_xmit_frozen_or_stopped(txq)))
- qdisc_run(q);
+ if (q->flags & TCQ_F_CAN_BYPASS && nolock_qdisc_is_empty(q) &&
+ qdisc_run_begin(q)) {
+ /* Retest nolock_qdisc_is_empty() within the protection
+ * of q->seqlock to protect from racing with requeuing.
+ */
+ if (unlikely(!nolock_qdisc_is_empty(q))) {
+ rc = dev_qdisc_enqueue(skb, q, &to_free, txq);
+ __qdisc_run(q);
+ qdisc_run_end(q);
+
+ goto no_lock_out;
+ }
+
+ qdisc_bstats_cpu_update(q, skb);
+ if (sch_direct_xmit(skb, q, dev, txq, NULL, true) &&
+ !nolock_qdisc_is_empty(q))
+ __qdisc_run(q);
+
+ qdisc_run_end(q);
+ return NET_XMIT_SUCCESS;
+ }
+
+ rc = dev_qdisc_enqueue(skb, q, &to_free, txq);
+ qdisc_run(q);
+no_lock_out:
if (unlikely(to_free))
kfree_skb_list(to_free);
return rc;
@@ -3896,7 +3852,7 @@ static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q,
qdisc_run_end(q);
rc = NET_XMIT_SUCCESS;
} else {
- rc = q->enqueue(skb, q, &to_free) & NET_XMIT_MASK;
+ rc = dev_qdisc_enqueue(skb, q, &to_free, txq);
if (qdisc_run_begin(q)) {
if (unlikely(contended)) {
spin_unlock(&q->busylock);
@@ -3973,7 +3929,7 @@ sch_handle_egress(struct sk_buff *skb, int *ret, struct net_device *dev)
qdisc_skb_cb(skb)->post_ct = false;
mini_qdisc_bstats_cpu_update(miniq, skb);
- switch (tcf_classify(skb, miniq->filter_list, &cl_res, false)) {
+ switch (tcf_classify(skb, miniq->block, miniq->filter_list, &cl_res, false)) {
case TC_ACT_OK:
case TC_ACT_RECLASSIFY:
skb->tc_index = TC_H_MIN(cl_res.classid);
@@ -4363,7 +4319,7 @@ static inline void ____napi_schedule(struct softnet_data *sd,
* makes sure to proceed with napi polling
* if the thread is explicitly woken from here.
*/
- if (READ_ONCE(thread->state) != TASK_INTERRUPTIBLE)
+ if (READ_ONCE(thread->__state) != TASK_INTERRUPTIBLE)
set_bit(NAPI_STATE_SCHED_THREADED, &napi->state);
wake_up_process(thread);
return;
@@ -4717,45 +4673,18 @@ static struct netdev_rx_queue *netif_get_rxqueue(struct sk_buff *skb)
return rxqueue;
}
-static u32 netif_receive_generic_xdp(struct sk_buff *skb,
- struct xdp_buff *xdp,
- struct bpf_prog *xdp_prog)
+u32 bpf_prog_run_generic_xdp(struct sk_buff *skb, struct xdp_buff *xdp,
+ struct bpf_prog *xdp_prog)
{
void *orig_data, *orig_data_end, *hard_start;
struct netdev_rx_queue *rxqueue;
- u32 metalen, act = XDP_DROP;
bool orig_bcast, orig_host;
u32 mac_len, frame_sz;
__be16 orig_eth_type;
struct ethhdr *eth;
+ u32 metalen, act;
int off;
- /* Reinjected packets coming from act_mirred or similar should
- * not get XDP generic processing.
- */
- if (skb_is_redirected(skb))
- return XDP_PASS;
-
- /* XDP packets must be linear and must have sufficient headroom
- * of XDP_PACKET_HEADROOM bytes. This is the guarantee that also
- * native XDP provides, thus we need to do it here as well.
- */
- if (skb_cloned(skb) || skb_is_nonlinear(skb) ||
- skb_headroom(skb) < XDP_PACKET_HEADROOM) {
- int hroom = XDP_PACKET_HEADROOM - skb_headroom(skb);
- int troom = skb->tail + skb->data_len - skb->end;
-
- /* In case we have to go down the path and also linearize,
- * then lets do the pskb_expand_head() work just once here.
- */
- if (pskb_expand_head(skb,
- hroom > 0 ? ALIGN(hroom, NET_SKB_PAD) : 0,
- troom > 0 ? troom + 128 : 0, GFP_ATOMIC))
- goto do_drop;
- if (skb_linearize(skb))
- goto do_drop;
- }
-
/* The XDP program wants to see the packet starting at the MAC
* header.
*/
@@ -4810,6 +4739,13 @@ static u32 netif_receive_generic_xdp(struct sk_buff *skb,
skb->protocol = eth_type_trans(skb, skb->dev);
}
+ /* Redirect/Tx gives L2 packet, code that will reuse skb must __skb_pull
+ * before calling us again on redirect path. We do not call do_redirect
+ * as we leave that up to the caller.
+ *
+ * Caller is responsible for managing lifetime of skb (i.e. calling
+ * kfree_skb in response to actions it cannot handle/XDP_DROP).
+ */
switch (act) {
case XDP_REDIRECT:
case XDP_TX:
@@ -4820,6 +4756,49 @@ static u32 netif_receive_generic_xdp(struct sk_buff *skb,
if (metalen)
skb_metadata_set(skb, metalen);
break;
+ }
+
+ return act;
+}
+
+static u32 netif_receive_generic_xdp(struct sk_buff *skb,
+ struct xdp_buff *xdp,
+ struct bpf_prog *xdp_prog)
+{
+ u32 act = XDP_DROP;
+
+ /* Reinjected packets coming from act_mirred or similar should
+ * not get XDP generic processing.
+ */
+ if (skb_is_redirected(skb))
+ return XDP_PASS;
+
+ /* XDP packets must be linear and must have sufficient headroom
+ * of XDP_PACKET_HEADROOM bytes. This is the guarantee that also
+ * native XDP provides, thus we need to do it here as well.
+ */
+ if (skb_cloned(skb) || skb_is_nonlinear(skb) ||
+ skb_headroom(skb) < XDP_PACKET_HEADROOM) {
+ int hroom = XDP_PACKET_HEADROOM - skb_headroom(skb);
+ int troom = skb->tail + skb->data_len - skb->end;
+
+ /* In case we have to go down the path and also linearize,
+ * then lets do the pskb_expand_head() work just once here.
+ */
+ if (pskb_expand_head(skb,
+ hroom > 0 ? ALIGN(hroom, NET_SKB_PAD) : 0,
+ troom > 0 ? troom + 128 : 0, GFP_ATOMIC))
+ goto do_drop;
+ if (skb_linearize(skb))
+ goto do_drop;
+ }
+
+ act = bpf_prog_run_generic_xdp(skb, xdp, xdp_prog);
+ switch (act) {
+ case XDP_REDIRECT:
+ case XDP_TX:
+ case XDP_PASS:
+ break;
default:
bpf_warn_invalid_xdp_action(act);
fallthrough;
@@ -5102,8 +5081,7 @@ sch_handle_ingress(struct sk_buff *skb, struct packet_type **pt_prev, int *ret,
skb->tc_at_ingress = 1;
mini_qdisc_bstats_cpu_update(miniq, skb);
- switch (tcf_classify_ingress(skb, miniq->block, miniq->filter_list,
- &cl_res, false)) {
+ switch (tcf_classify(skb, miniq->block, miniq->filter_list, &cl_res, false)) {
case TC_ACT_OK:
case TC_ACT_RECLASSIFY:
skb->tc_index = TC_H_MIN(cl_res.classid);
@@ -5277,15 +5255,14 @@ another_round:
if (static_branch_unlikely(&generic_xdp_needed_key)) {
int ret2;
- preempt_disable();
+ migrate_disable();
ret2 = do_xdp_generic(rcu_dereference(skb->dev->xdp_prog), skb);
- preempt_enable();
+ migrate_enable();
if (ret2 != XDP_PASS) {
ret = NET_RX_DROP;
goto out;
}
- skb_reset_mac_len(skb);
}
if (eth_type_vlan(skb->protocol)) {
@@ -5611,25 +5588,6 @@ static int generic_xdp_install(struct net_device *dev, struct netdev_bpf *xdp)
struct bpf_prog *new = xdp->prog;
int ret = 0;
- if (new) {
- u32 i;
-
- mutex_lock(&new->aux->used_maps_mutex);
-
- /* generic XDP does not work with DEVMAPs that can
- * have a bpf_prog installed on an entry
- */
- for (i = 0; i < new->aux->used_map_cnt; i++) {
- if (dev_map_can_have_prog(new->aux->used_maps[i]) ||
- cpu_map_prog_allowed(new->aux->used_maps[i])) {
- mutex_unlock(&new->aux->used_maps_mutex);
- return -EINVAL;
- }
- }
-
- mutex_unlock(&new->aux->used_maps_mutex);
- }
-
switch (xdp->command) {
case XDP_SETUP_PROG:
rcu_assign_pointer(dev->xdp_prog, new);
@@ -5837,7 +5795,7 @@ static void flush_all_backlogs(void)
*/
ASSERT_RTNL();
- get_online_cpus();
+ cpus_read_lock();
cpumask_clear(&flush_cpus);
for_each_online_cpu(cpu) {
@@ -5855,7 +5813,7 @@ static void flush_all_backlogs(void)
for_each_cpu(cpu, &flush_cpus)
flush_work(per_cpu_ptr(&flush_works, cpu));
- put_online_cpus();
+ cpus_read_unlock();
}
/* Pass the currently batched GRO_NORMAL SKBs up to the stack. */
@@ -5972,7 +5930,6 @@ static void gro_list_prepare(const struct list_head *head,
diffs |= skb_vlan_tag_present(p) ^ skb_vlan_tag_present(skb);
if (skb_vlan_tag_present(p))
diffs |= skb_vlan_tag_get(p) ^ skb_vlan_tag_get(skb);
- diffs |= skb_metadata_dst_cmp(p, skb);
diffs |= skb_metadata_differs(p, skb);
if (maclen == ETH_HLEN)
diffs |= compare_ether_header(skb_mac_header(p),
@@ -5981,6 +5938,32 @@ static void gro_list_prepare(const struct list_head *head,
diffs = memcmp(skb_mac_header(p),
skb_mac_header(skb),
maclen);
+
+ /* in most common scenarions 'slow_gro' is 0
+ * otherwise we are already on some slower paths
+ * either skip all the infrequent tests altogether or
+ * avoid trying too hard to skip each of them individually
+ */
+ if (!diffs && unlikely(skb->slow_gro | p->slow_gro)) {
+#if IS_ENABLED(CONFIG_SKB_EXTENSIONS) && IS_ENABLED(CONFIG_NET_TC_SKB_EXT)
+ struct tc_skb_ext *skb_ext;
+ struct tc_skb_ext *p_ext;
+#endif
+
+ diffs |= p->sk != skb->sk;
+ diffs |= skb_metadata_dst_cmp(p, skb);
+ diffs |= skb_get_nfct(p) ^ skb_get_nfct(skb);
+
+#if IS_ENABLED(CONFIG_SKB_EXTENSIONS) && IS_ENABLED(CONFIG_NET_TC_SKB_EXT)
+ skb_ext = skb_ext_find(skb, TC_SKB_EXT);
+ p_ext = skb_ext_find(p, TC_SKB_EXT);
+
+ diffs |= (!!p_ext) ^ (!!skb_ext);
+ if (!diffs && unlikely(skb_ext))
+ diffs |= p_ext->chain ^ skb_ext->chain;
+#endif
+ }
+
NAPI_GRO_CB(p)->same_flow = !diffs;
}
}
@@ -6194,6 +6177,8 @@ static gro_result_t napi_skb_finish(struct napi_struct *napi,
case GRO_MERGED_FREE:
if (NAPI_GRO_CB(skb)->free == NAPI_GRO_FREE_STOLEN_HEAD)
napi_skb_free_stolen_head(skb);
+ else if (skb->fclone != SKB_FCLONE_UNAVAILABLE)
+ __kfree_skb(skb);
else
__kfree_skb_defer(skb);
break;
@@ -6242,7 +6227,12 @@ static void napi_reuse_skb(struct napi_struct *napi, struct sk_buff *skb)
skb->encapsulation = 0;
skb_shinfo(skb)->gso_type = 0;
skb->truesize = SKB_TRUESIZE(skb_end_offset(skb));
- skb_ext_reset(skb);
+ if (unlikely(skb->slow_gro)) {
+ skb_orphan(skb);
+ skb_ext_reset(skb);
+ nf_reset_ct(skb);
+ skb->slow_gro = 0;
+ }
napi->skb = skb;
}
@@ -6520,11 +6510,18 @@ EXPORT_SYMBOL(napi_schedule_prep);
* __napi_schedule_irqoff - schedule for receive
* @n: entry to schedule
*
- * Variant of __napi_schedule() assuming hard irqs are masked
+ * Variant of __napi_schedule() assuming hard irqs are masked.
+ *
+ * On PREEMPT_RT enabled kernels this maps to __napi_schedule()
+ * because the interrupt disabled assumption might not be true
+ * due to force-threaded interrupts and spinlock substitution.
*/
void __napi_schedule_irqoff(struct napi_struct *n)
{
- ____napi_schedule(this_cpu_ptr(&softnet_data), n);
+ if (!IS_ENABLED(CONFIG_PREEMPT_RT))
+ ____napi_schedule(this_cpu_ptr(&softnet_data), n);
+ else
+ __napi_schedule(n);
}
EXPORT_SYMBOL(__napi_schedule_irqoff);
@@ -7535,7 +7532,7 @@ void *netdev_lower_get_next_private_rcu(struct net_device *dev,
{
struct netdev_adjacent *lower;
- WARN_ON_ONCE(!rcu_read_lock_held());
+ WARN_ON_ONCE(!rcu_read_lock_held() && !rcu_read_lock_bh_held());
lower = list_entry_rcu((*iter)->next, struct netdev_adjacent, list);
@@ -9300,7 +9297,7 @@ static struct bpf_prog *dev_xdp_prog(struct net_device *dev,
return dev->xdp_state[mode].prog;
}
-static u8 dev_xdp_prog_count(struct net_device *dev)
+u8 dev_xdp_prog_count(struct net_device *dev)
{
u8 count = 0;
int i;
@@ -9310,6 +9307,7 @@ static u8 dev_xdp_prog_count(struct net_device *dev)
count++;
return count;
}
+EXPORT_SYMBOL_GPL(dev_xdp_prog_count);
u32 dev_xdp_prog_id(struct net_device *dev, enum bpf_xdp_mode mode)
{
@@ -9403,6 +9401,8 @@ static int dev_xdp_attach(struct net_device *dev, struct netlink_ext_ack *extack
{
unsigned int num_modes = hweight32(flags & XDP_FLAGS_MODES);
struct bpf_prog *cur_prog;
+ struct net_device *upper;
+ struct list_head *iter;
enum bpf_xdp_mode mode;
bpf_op_t bpf_op;
int err;
@@ -9441,6 +9441,14 @@ static int dev_xdp_attach(struct net_device *dev, struct netlink_ext_ack *extack
return -EBUSY;
}
+ /* don't allow if an upper device already has a program */
+ netdev_for_each_upper_dev_rcu(dev, upper, iter) {
+ if (dev_xdp_prog_count(upper) > 0) {
+ NL_SET_ERR_MSG(extack, "Cannot attach when an upper device already has a program");
+ return -EEXIST;
+ }
+ }
+
cur_prog = dev_xdp_prog(dev, mode);
/* can't replace attached prog with link */
if (link && cur_prog) {
@@ -9650,14 +9658,17 @@ int bpf_xdp_link_attach(const union bpf_attr *attr, struct bpf_prog *prog)
struct net_device *dev;
int err, fd;
+ rtnl_lock();
dev = dev_get_by_index(net, attr->link_create.target_ifindex);
- if (!dev)
+ if (!dev) {
+ rtnl_unlock();
return -EINVAL;
+ }
link = kzalloc(sizeof(*link), GFP_USER);
if (!link) {
err = -ENOMEM;
- goto out_put_dev;
+ goto unlock;
}
bpf_link_init(&link->link, BPF_LINK_TYPE_XDP, &bpf_xdp_link_lops, prog);
@@ -9667,14 +9678,14 @@ int bpf_xdp_link_attach(const union bpf_attr *attr, struct bpf_prog *prog)
err = bpf_link_prime(&link->link, &link_primer);
if (err) {
kfree(link);
- goto out_put_dev;
+ goto unlock;
}
- rtnl_lock();
err = dev_xdp_attach_link(dev, NULL, link);
rtnl_unlock();
if (err) {
+ link->dev = NULL;
bpf_link_cleanup(&link_primer);
goto out_put_dev;
}
@@ -9684,6 +9695,9 @@ int bpf_xdp_link_attach(const union bpf_attr *attr, struct bpf_prog *prog)
dev_put(dev);
return fd;
+unlock:
+ rtnl_unlock();
+
out_put_dev:
dev_put(dev);
return err;
@@ -10066,7 +10080,7 @@ static int netif_alloc_rx_queues(struct net_device *dev)
BUG_ON(count < 1);
- rx = kvzalloc(sz, GFP_KERNEL | __GFP_RETRY_MAYFAIL);
+ rx = kvzalloc(sz, GFP_KERNEL_ACCOUNT | __GFP_RETRY_MAYFAIL);
if (!rx)
return -ENOMEM;
@@ -10133,7 +10147,7 @@ static int netif_alloc_netdev_queues(struct net_device *dev)
if (count < 1 || count > 0xffff)
return -EINVAL;
- tx = kvzalloc(sz, GFP_KERNEL | __GFP_RETRY_MAYFAIL);
+ tx = kvzalloc(sz, GFP_KERNEL_ACCOUNT | __GFP_RETRY_MAYFAIL);
if (!tx)
return -ENOMEM;
@@ -10773,7 +10787,7 @@ struct net_device *alloc_netdev_mqs(int sizeof_priv, const char *name,
/* ensure 32-byte alignment of whole construct */
alloc_size += NETDEV_ALIGN - 1;
- p = kvzalloc(alloc_size, GFP_KERNEL | __GFP_RETRY_MAYFAIL);
+ p = kvzalloc(alloc_size, GFP_KERNEL_ACCOUNT | __GFP_RETRY_MAYFAIL);
if (!p)
return NULL;
diff --git a/net/core/dev_addr_lists.c b/net/core/dev_addr_lists.c
index 45ae6eeb2964..8c39283c26ae 100644
--- a/net/core/dev_addr_lists.c
+++ b/net/core/dev_addr_lists.c
@@ -16,10 +16,9 @@
* General list handling functions
*/
-static int __hw_addr_create_ex(struct netdev_hw_addr_list *list,
- const unsigned char *addr, int addr_len,
- unsigned char addr_type, bool global,
- bool sync)
+static struct netdev_hw_addr*
+__hw_addr_create(const unsigned char *addr, int addr_len,
+ unsigned char addr_type, bool global, bool sync)
{
struct netdev_hw_addr *ha;
int alloc_size;
@@ -29,32 +28,44 @@ static int __hw_addr_create_ex(struct netdev_hw_addr_list *list,
alloc_size = L1_CACHE_BYTES;
ha = kmalloc(alloc_size, GFP_ATOMIC);
if (!ha)
- return -ENOMEM;
+ return NULL;
memcpy(ha->addr, addr, addr_len);
ha->type = addr_type;
ha->refcount = 1;
ha->global_use = global;
ha->synced = sync ? 1 : 0;
ha->sync_cnt = 0;
- list_add_tail_rcu(&ha->list, &list->list);
- list->count++;
- return 0;
+ return ha;
}
static int __hw_addr_add_ex(struct netdev_hw_addr_list *list,
const unsigned char *addr, int addr_len,
unsigned char addr_type, bool global, bool sync,
- int sync_count)
+ int sync_count, bool exclusive)
{
+ struct rb_node **ins_point = &list->tree.rb_node, *parent = NULL;
struct netdev_hw_addr *ha;
if (addr_len > MAX_ADDR_LEN)
return -EINVAL;
- list_for_each_entry(ha, &list->list, list) {
- if (ha->type == addr_type &&
- !memcmp(ha->addr, addr, addr_len)) {
+ while (*ins_point) {
+ int diff;
+
+ ha = rb_entry(*ins_point, struct netdev_hw_addr, node);
+ diff = memcmp(addr, ha->addr, addr_len);
+ if (diff == 0)
+ diff = memcmp(&addr_type, &ha->type, sizeof(addr_type));
+
+ parent = *ins_point;
+ if (diff < 0) {
+ ins_point = &parent->rb_left;
+ } else if (diff > 0) {
+ ins_point = &parent->rb_right;
+ } else {
+ if (exclusive)
+ return -EEXIST;
if (global) {
/* check if addr is already used as global */
if (ha->global_use)
@@ -73,8 +84,25 @@ static int __hw_addr_add_ex(struct netdev_hw_addr_list *list,
}
}
- return __hw_addr_create_ex(list, addr, addr_len, addr_type, global,
- sync);
+ ha = __hw_addr_create(addr, addr_len, addr_type, global, sync);
+ if (!ha)
+ return -ENOMEM;
+
+ /* The first address in dev->dev_addrs is pointed to by dev->dev_addr
+ * and mutated freely by device drivers and netdev ops, so if we insert
+ * it into the tree we'll end up with an invalid rbtree.
+ */
+ if (list->count > 0) {
+ rb_link_node(&ha->node, parent, ins_point);
+ rb_insert_color(&ha->node, &list->tree);
+ } else {
+ RB_CLEAR_NODE(&ha->node);
+ }
+
+ list_add_tail_rcu(&ha->list, &list->list);
+ list->count++;
+
+ return 0;
}
static int __hw_addr_add(struct netdev_hw_addr_list *list,
@@ -82,7 +110,7 @@ static int __hw_addr_add(struct netdev_hw_addr_list *list,
unsigned char addr_type)
{
return __hw_addr_add_ex(list, addr, addr_len, addr_type, false, false,
- 0);
+ 0, false);
}
static int __hw_addr_del_entry(struct netdev_hw_addr_list *list,
@@ -103,24 +131,61 @@ static int __hw_addr_del_entry(struct netdev_hw_addr_list *list,
if (--ha->refcount)
return 0;
+
+ if (!RB_EMPTY_NODE(&ha->node))
+ rb_erase(&ha->node, &list->tree);
+
list_del_rcu(&ha->list);
kfree_rcu(ha, rcu_head);
list->count--;
return 0;
}
+static struct netdev_hw_addr *__hw_addr_lookup(struct netdev_hw_addr_list *list,
+ const unsigned char *addr, int addr_len,
+ unsigned char addr_type)
+{
+ struct netdev_hw_addr *ha;
+ struct rb_node *node;
+
+ /* The first address isn't inserted into the tree because in the dev->dev_addrs
+ * list it's the address pointed to by dev->dev_addr which is freely mutated
+ * in place, so we need to check it separately.
+ */
+ ha = list_first_entry(&list->list, struct netdev_hw_addr, list);
+ if (ha && !memcmp(addr, ha->addr, addr_len) &&
+ (!addr_type || addr_type == ha->type))
+ return ha;
+
+ node = list->tree.rb_node;
+
+ while (node) {
+ struct netdev_hw_addr *ha = rb_entry(node, struct netdev_hw_addr, node);
+ int diff = memcmp(addr, ha->addr, addr_len);
+
+ if (diff == 0 && addr_type)
+ diff = memcmp(&addr_type, &ha->type, sizeof(addr_type));
+
+ if (diff < 0)
+ node = node->rb_left;
+ else if (diff > 0)
+ node = node->rb_right;
+ else
+ return ha;
+ }
+
+ return NULL;
+}
+
static int __hw_addr_del_ex(struct netdev_hw_addr_list *list,
const unsigned char *addr, int addr_len,
unsigned char addr_type, bool global, bool sync)
{
- struct netdev_hw_addr *ha;
+ struct netdev_hw_addr *ha = __hw_addr_lookup(list, addr, addr_len, addr_type);
- list_for_each_entry(ha, &list->list, list) {
- if (!memcmp(ha->addr, addr, addr_len) &&
- (ha->type == addr_type || !addr_type))
- return __hw_addr_del_entry(list, ha, global, sync);
- }
- return -ENOENT;
+ if (!ha)
+ return -ENOENT;
+ return __hw_addr_del_entry(list, ha, global, sync);
}
static int __hw_addr_del(struct netdev_hw_addr_list *list,
@@ -137,7 +202,7 @@ static int __hw_addr_sync_one(struct netdev_hw_addr_list *to_list,
int err;
err = __hw_addr_add_ex(to_list, ha->addr, addr_len, ha->type,
- false, true, ha->sync_cnt);
+ false, true, ha->sync_cnt, false);
if (err && err != -EEXIST)
return err;
@@ -407,6 +472,7 @@ static void __hw_addr_flush(struct netdev_hw_addr_list *list)
{
struct netdev_hw_addr *ha, *tmp;
+ list->tree = RB_ROOT;
list_for_each_entry_safe(ha, tmp, &list->list, list) {
list_del_rcu(&ha->list);
kfree_rcu(ha, rcu_head);
@@ -418,6 +484,7 @@ void __hw_addr_init(struct netdev_hw_addr_list *list)
{
INIT_LIST_HEAD(&list->list);
list->count = 0;
+ list->tree = RB_ROOT;
}
EXPORT_SYMBOL(__hw_addr_init);
@@ -552,22 +619,14 @@ EXPORT_SYMBOL(dev_addr_del);
*/
int dev_uc_add_excl(struct net_device *dev, const unsigned char *addr)
{
- struct netdev_hw_addr *ha;
int err;
netif_addr_lock_bh(dev);
- list_for_each_entry(ha, &dev->uc.list, list) {
- if (!memcmp(ha->addr, addr, dev->addr_len) &&
- ha->type == NETDEV_HW_ADDR_T_UNICAST) {
- err = -EEXIST;
- goto out;
- }
- }
- err = __hw_addr_create_ex(&dev->uc, addr, dev->addr_len,
- NETDEV_HW_ADDR_T_UNICAST, true, false);
+ err = __hw_addr_add_ex(&dev->uc, addr, dev->addr_len,
+ NETDEV_HW_ADDR_T_UNICAST, true, false,
+ 0, true);
if (!err)
__dev_set_rx_mode(dev);
-out:
netif_addr_unlock_bh(dev);
return err;
}
@@ -745,22 +804,14 @@ EXPORT_SYMBOL(dev_uc_init);
*/
int dev_mc_add_excl(struct net_device *dev, const unsigned char *addr)
{
- struct netdev_hw_addr *ha;
int err;
netif_addr_lock_bh(dev);
- list_for_each_entry(ha, &dev->mc.list, list) {
- if (!memcmp(ha->addr, addr, dev->addr_len) &&
- ha->type == NETDEV_HW_ADDR_T_MULTICAST) {
- err = -EEXIST;
- goto out;
- }
- }
- err = __hw_addr_create_ex(&dev->mc, addr, dev->addr_len,
- NETDEV_HW_ADDR_T_MULTICAST, true, false);
+ err = __hw_addr_add_ex(&dev->mc, addr, dev->addr_len,
+ NETDEV_HW_ADDR_T_MULTICAST, true, false,
+ 0, true);
if (!err)
__dev_set_rx_mode(dev);
-out:
netif_addr_unlock_bh(dev);
return err;
}
@@ -773,7 +824,8 @@ static int __dev_mc_add(struct net_device *dev, const unsigned char *addr,
netif_addr_lock_bh(dev);
err = __hw_addr_add_ex(&dev->mc, addr, dev->addr_len,
- NETDEV_HW_ADDR_T_MULTICAST, global, false, 0);
+ NETDEV_HW_ADDR_T_MULTICAST, global, false,
+ 0, false);
if (!err)
__dev_set_rx_mode(dev);
netif_addr_unlock_bh(dev);
diff --git a/net/core/dev_ioctl.c b/net/core/dev_ioctl.c
index 478d032f34ac..0e87237fd871 100644
--- a/net/core/dev_ioctl.c
+++ b/net/core/dev_ioctl.c
@@ -1,10 +1,12 @@
// SPDX-License-Identifier: GPL-2.0
#include <linux/kmod.h>
#include <linux/netdevice.h>
+#include <linux/inetdevice.h>
#include <linux/etherdevice.h>
#include <linux/rtnetlink.h>
#include <linux/net_tstamp.h>
#include <linux/wireless.h>
+#include <linux/if_bridge.h>
#include <net/dsa.h>
#include <net/wext.h>
@@ -25,79 +27,108 @@ static int dev_ifname(struct net *net, struct ifreq *ifr)
return netdev_get_name(net, ifr->ifr_name, ifr->ifr_ifindex);
}
-static gifconf_func_t *gifconf_list[NPROTO];
-
-/**
- * register_gifconf - register a SIOCGIF handler
- * @family: Address family
- * @gifconf: Function handler
- *
- * Register protocol dependent address dumping routines. The handler
- * that is passed must not be freed or reused until it has been replaced
- * by another handler.
- */
-int register_gifconf(unsigned int family, gifconf_func_t *gifconf)
-{
- if (family >= NPROTO)
- return -EINVAL;
- gifconf_list[family] = gifconf;
- return 0;
-}
-EXPORT_SYMBOL(register_gifconf);
-
/*
* Perform a SIOCGIFCONF call. This structure will change
* size eventually, and there is nothing I can do about it.
* Thus we will need a 'compatibility mode'.
*/
-
-int dev_ifconf(struct net *net, struct ifconf *ifc, int size)
+int dev_ifconf(struct net *net, struct ifconf __user *uifc)
{
struct net_device *dev;
- char __user *pos;
- int len;
- int total;
- int i;
+ void __user *pos;
+ size_t size;
+ int len, total = 0, done;
- /*
- * Fetch the caller's info block.
- */
+ /* both the ifconf and the ifreq structures are slightly different */
+ if (in_compat_syscall()) {
+ struct compat_ifconf ifc32;
- pos = ifc->ifc_buf;
- len = ifc->ifc_len;
+ if (copy_from_user(&ifc32, uifc, sizeof(struct compat_ifconf)))
+ return -EFAULT;
- /*
- * Loop over the interfaces, and write an info block for each.
- */
+ pos = compat_ptr(ifc32.ifcbuf);
+ len = ifc32.ifc_len;
+ size = sizeof(struct compat_ifreq);
+ } else {
+ struct ifconf ifc;
+
+ if (copy_from_user(&ifc, uifc, sizeof(struct ifconf)))
+ return -EFAULT;
- total = 0;
+ pos = ifc.ifc_buf;
+ len = ifc.ifc_len;
+ size = sizeof(struct ifreq);
+ }
+
+ /* Loop over the interfaces, and write an info block for each. */
+ rtnl_lock();
for_each_netdev(net, dev) {
- for (i = 0; i < NPROTO; i++) {
- if (gifconf_list[i]) {
- int done;
- if (!pos)
- done = gifconf_list[i](dev, NULL, 0, size);
- else
- done = gifconf_list[i](dev, pos + total,
- len - total, size);
- if (done < 0)
- return -EFAULT;
- total += done;
- }
+ if (!pos)
+ done = inet_gifconf(dev, NULL, 0, size);
+ else
+ done = inet_gifconf(dev, pos + total,
+ len - total, size);
+ if (done < 0) {
+ rtnl_unlock();
+ return -EFAULT;
}
+ total += done;
}
+ rtnl_unlock();
- /*
- * All done. Write the updated control block back to the caller.
- */
- ifc->ifc_len = total;
+ return put_user(total, &uifc->ifc_len);
+}
+
+static int dev_getifmap(struct net_device *dev, struct ifreq *ifr)
+{
+ struct ifmap *ifmap = &ifr->ifr_map;
+
+ if (in_compat_syscall()) {
+ struct compat_ifmap *cifmap = (struct compat_ifmap *)ifmap;
+
+ cifmap->mem_start = dev->mem_start;
+ cifmap->mem_end = dev->mem_end;
+ cifmap->base_addr = dev->base_addr;
+ cifmap->irq = dev->irq;
+ cifmap->dma = dev->dma;
+ cifmap->port = dev->if_port;
+
+ return 0;
+ }
+
+ ifmap->mem_start = dev->mem_start;
+ ifmap->mem_end = dev->mem_end;
+ ifmap->base_addr = dev->base_addr;
+ ifmap->irq = dev->irq;
+ ifmap->dma = dev->dma;
+ ifmap->port = dev->if_port;
- /*
- * Both BSD and Solaris return 0 here, so we do too.
- */
return 0;
}
+static int dev_setifmap(struct net_device *dev, struct ifreq *ifr)
+{
+ struct compat_ifmap *cifmap = (struct compat_ifmap *)&ifr->ifr_map;
+
+ if (!dev->netdev_ops->ndo_set_config)
+ return -EOPNOTSUPP;
+
+ if (in_compat_syscall()) {
+ struct ifmap ifmap = {
+ .mem_start = cifmap->mem_start,
+ .mem_end = cifmap->mem_end,
+ .base_addr = cifmap->base_addr,
+ .irq = cifmap->irq,
+ .dma = cifmap->dma,
+ .port = cifmap->port,
+ };
+
+ return dev->netdev_ops->ndo_set_config(dev, &ifmap);
+ }
+
+ return dev->netdev_ops->ndo_set_config(dev, &ifr->ifr_map);
+}
+
/*
* Perform the SIOCxIFxxx calls, inside rcu_read_lock()
*/
@@ -128,13 +159,7 @@ static int dev_ifsioc_locked(struct net *net, struct ifreq *ifr, unsigned int cm
break;
case SIOCGIFMAP:
- ifr->ifr_map.mem_start = dev->mem_start;
- ifr->ifr_map.mem_end = dev->mem_end;
- ifr->ifr_map.base_addr = dev->base_addr;
- ifr->ifr_map.irq = dev->irq;
- ifr->ifr_map.dma = dev->dma;
- ifr->ifr_map.port = dev->if_port;
- return 0;
+ return dev_getifmap(dev, ifr);
case SIOCGIFINDEX:
ifr->ifr_ifindex = dev->ifindex;
@@ -215,19 +240,19 @@ static int net_hwtstamp_validate(struct ifreq *ifr)
return 0;
}
-static int dev_do_ioctl(struct net_device *dev,
- struct ifreq *ifr, unsigned int cmd)
+static int dev_eth_ioctl(struct net_device *dev,
+ struct ifreq *ifr, unsigned int cmd)
{
const struct net_device_ops *ops = dev->netdev_ops;
int err;
- err = dsa_ndo_do_ioctl(dev, ifr, cmd);
+ err = dsa_ndo_eth_ioctl(dev, ifr, cmd);
if (err == 0 || err != -EOPNOTSUPP)
return err;
- if (ops->ndo_do_ioctl) {
+ if (ops->ndo_eth_ioctl) {
if (netif_device_present(dev))
- err = ops->ndo_do_ioctl(dev, ifr, cmd);
+ err = ops->ndo_eth_ioctl(dev, ifr, cmd);
else
err = -ENODEV;
}
@@ -235,10 +260,55 @@ static int dev_do_ioctl(struct net_device *dev,
return err;
}
+static int dev_siocbond(struct net_device *dev,
+ struct ifreq *ifr, unsigned int cmd)
+{
+ const struct net_device_ops *ops = dev->netdev_ops;
+
+ if (ops->ndo_siocbond) {
+ if (netif_device_present(dev))
+ return ops->ndo_siocbond(dev, ifr, cmd);
+ else
+ return -ENODEV;
+ }
+
+ return -EOPNOTSUPP;
+}
+
+static int dev_siocdevprivate(struct net_device *dev, struct ifreq *ifr,
+ void __user *data, unsigned int cmd)
+{
+ const struct net_device_ops *ops = dev->netdev_ops;
+
+ if (ops->ndo_siocdevprivate) {
+ if (netif_device_present(dev))
+ return ops->ndo_siocdevprivate(dev, ifr, data, cmd);
+ else
+ return -ENODEV;
+ }
+
+ return -EOPNOTSUPP;
+}
+
+static int dev_siocwandev(struct net_device *dev, struct if_settings *ifs)
+{
+ const struct net_device_ops *ops = dev->netdev_ops;
+
+ if (ops->ndo_siocwandev) {
+ if (netif_device_present(dev))
+ return ops->ndo_siocwandev(dev, ifs);
+ else
+ return -ENODEV;
+ }
+
+ return -EOPNOTSUPP;
+}
+
/*
* Perform the SIOCxIFxxx calls, inside rtnl_lock()
*/
-static int dev_ifsioc(struct net *net, struct ifreq *ifr, unsigned int cmd)
+static int dev_ifsioc(struct net *net, struct ifreq *ifr, void __user *data,
+ unsigned int cmd)
{
int err;
struct net_device *dev = __dev_get_by_name(net, ifr->ifr_name);
@@ -275,12 +345,7 @@ static int dev_ifsioc(struct net *net, struct ifreq *ifr, unsigned int cmd)
return 0;
case SIOCSIFMAP:
- if (ops->ndo_set_config) {
- if (!netif_device_present(dev))
- return -ENODEV;
- return ops->ndo_set_config(dev, &ifr->ifr_map);
- }
- return -EOPNOTSUPP;
+ return dev_setifmap(dev, ifr);
case SIOCADDMULTI:
if (!ops->ndo_set_rx_mode ||
@@ -307,6 +372,22 @@ static int dev_ifsioc(struct net *net, struct ifreq *ifr, unsigned int cmd)
ifr->ifr_newname[IFNAMSIZ-1] = '\0';
return dev_change_name(dev, ifr->ifr_newname);
+ case SIOCWANDEV:
+ return dev_siocwandev(dev, &ifr->ifr_settings);
+
+ case SIOCBRADDIF:
+ case SIOCBRDELIF:
+ if (!netif_device_present(dev))
+ return -ENODEV;
+ if (!netif_is_bridge_master(dev))
+ return -EOPNOTSUPP;
+ dev_hold(dev);
+ rtnl_unlock();
+ err = br_ioctl_call(net, netdev_priv(dev), cmd, ifr, NULL);
+ dev_put(dev);
+ rtnl_lock();
+ return err;
+
case SIOCSHWTSTAMP:
err = net_hwtstamp_validate(ifr);
if (err)
@@ -317,23 +398,23 @@ static int dev_ifsioc(struct net *net, struct ifreq *ifr, unsigned int cmd)
* Unknown or private ioctl
*/
default:
- if ((cmd >= SIOCDEVPRIVATE &&
- cmd <= SIOCDEVPRIVATE + 15) ||
- cmd == SIOCBONDENSLAVE ||
+ if (cmd >= SIOCDEVPRIVATE &&
+ cmd <= SIOCDEVPRIVATE + 15)
+ return dev_siocdevprivate(dev, ifr, data, cmd);
+
+ if (cmd == SIOCGMIIPHY ||
+ cmd == SIOCGMIIREG ||
+ cmd == SIOCSMIIREG ||
+ cmd == SIOCSHWTSTAMP ||
+ cmd == SIOCGHWTSTAMP) {
+ err = dev_eth_ioctl(dev, ifr, cmd);
+ } else if (cmd == SIOCBONDENSLAVE ||
cmd == SIOCBONDRELEASE ||
cmd == SIOCBONDSETHWADDR ||
cmd == SIOCBONDSLAVEINFOQUERY ||
cmd == SIOCBONDINFOQUERY ||
- cmd == SIOCBONDCHANGEACTIVE ||
- cmd == SIOCGMIIPHY ||
- cmd == SIOCGMIIREG ||
- cmd == SIOCSMIIREG ||
- cmd == SIOCBRADDIF ||
- cmd == SIOCBRDELIF ||
- cmd == SIOCSHWTSTAMP ||
- cmd == SIOCGHWTSTAMP ||
- cmd == SIOCWANDEV) {
- err = dev_do_ioctl(dev, ifr, cmd);
+ cmd == SIOCBONDCHANGEACTIVE) {
+ err = dev_siocbond(dev, ifr, cmd);
} else
err = -EINVAL;
@@ -386,7 +467,8 @@ EXPORT_SYMBOL(dev_load);
* positive or a negative errno code on error.
*/
-int dev_ioctl(struct net *net, unsigned int cmd, struct ifreq *ifr, bool *need_copyout)
+int dev_ioctl(struct net *net, unsigned int cmd, struct ifreq *ifr,
+ void __user *data, bool *need_copyout)
{
int ret;
char *colon;
@@ -437,7 +519,7 @@ int dev_ioctl(struct net *net, unsigned int cmd, struct ifreq *ifr, bool *need_c
case SIOCETHTOOL:
dev_load(net, ifr->ifr_name);
rtnl_lock();
- ret = dev_ethtool(net, ifr);
+ ret = dev_ethtool(net, ifr, data);
rtnl_unlock();
if (colon)
*colon = ':';
@@ -456,7 +538,7 @@ int dev_ioctl(struct net *net, unsigned int cmd, struct ifreq *ifr, bool *need_c
if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
return -EPERM;
rtnl_lock();
- ret = dev_ifsioc(net, ifr, cmd);
+ ret = dev_ifsioc(net, ifr, data, cmd);
rtnl_unlock();
if (colon)
*colon = ':';
@@ -502,7 +584,7 @@ int dev_ioctl(struct net *net, unsigned int cmd, struct ifreq *ifr, bool *need_c
case SIOCBONDINFOQUERY:
dev_load(net, ifr->ifr_name);
rtnl_lock();
- ret = dev_ifsioc(net, ifr, cmd);
+ ret = dev_ifsioc(net, ifr, data, cmd);
rtnl_unlock();
if (need_copyout)
*need_copyout = false;
@@ -527,7 +609,7 @@ int dev_ioctl(struct net *net, unsigned int cmd, struct ifreq *ifr, bool *need_c
cmd <= SIOCDEVPRIVATE + 15)) {
dev_load(net, ifr->ifr_name);
rtnl_lock();
- ret = dev_ifsioc(net, ifr, cmd);
+ ret = dev_ifsioc(net, ifr, data, cmd);
rtnl_unlock();
return ret;
}
diff --git a/net/core/devlink.c b/net/core/devlink.c
index 051432ea4f69..a856ae401ea5 100644
--- a/net/core/devlink.c
+++ b/net/core/devlink.c
@@ -92,7 +92,8 @@ static const struct nla_policy devlink_function_nl_policy[DEVLINK_PORT_FUNCTION_
DEVLINK_PORT_FN_STATE_ACTIVE),
};
-static LIST_HEAD(devlink_list);
+static DEFINE_XARRAY_FLAGS(devlinks, XA_FLAGS_ALLOC);
+#define DEVLINK_REGISTERED XA_MARK_1
/* devlink_mutex
*
@@ -108,23 +109,23 @@ struct net *devlink_net(const struct devlink *devlink)
}
EXPORT_SYMBOL_GPL(devlink_net);
-static void __devlink_net_set(struct devlink *devlink, struct net *net)
+static void devlink_put(struct devlink *devlink)
{
- write_pnet(&devlink->_net, net);
+ if (refcount_dec_and_test(&devlink->refcount))
+ complete(&devlink->comp);
}
-void devlink_net_set(struct devlink *devlink, struct net *net)
+static bool __must_check devlink_try_get(struct devlink *devlink)
{
- if (WARN_ON(devlink->registered))
- return;
- __devlink_net_set(devlink, net);
+ return refcount_inc_not_zero(&devlink->refcount);
}
-EXPORT_SYMBOL_GPL(devlink_net_set);
static struct devlink *devlink_get_from_attrs(struct net *net,
struct nlattr **attrs)
{
struct devlink *devlink;
+ unsigned long index;
+ bool found = false;
char *busname;
char *devname;
@@ -136,19 +137,19 @@ static struct devlink *devlink_get_from_attrs(struct net *net,
lockdep_assert_held(&devlink_mutex);
- list_for_each_entry(devlink, &devlink_list, list) {
+ xa_for_each_marked(&devlinks, index, devlink, DEVLINK_REGISTERED) {
if (strcmp(devlink->dev->bus->name, busname) == 0 &&
strcmp(dev_name(devlink->dev), devname) == 0 &&
- net_eq(devlink_net(devlink), net))
- return devlink;
+ net_eq(devlink_net(devlink), net)) {
+ found = true;
+ break;
+ }
}
- return ERR_PTR(-ENODEV);
-}
+ if (!found || !devlink_try_get(devlink))
+ devlink = ERR_PTR(-ENODEV);
-static struct devlink *devlink_get_from_info(struct genl_info *info)
-{
- return devlink_get_from_attrs(genl_info_net(info), info->attrs);
+ return devlink;
}
static struct devlink_port *devlink_port_get_by_index(struct devlink *devlink,
@@ -190,6 +191,80 @@ static struct devlink_port *devlink_port_get_from_info(struct devlink *devlink,
return devlink_port_get_from_attrs(devlink, info->attrs);
}
+static inline bool
+devlink_rate_is_leaf(struct devlink_rate *devlink_rate)
+{
+ return devlink_rate->type == DEVLINK_RATE_TYPE_LEAF;
+}
+
+static inline bool
+devlink_rate_is_node(struct devlink_rate *devlink_rate)
+{
+ return devlink_rate->type == DEVLINK_RATE_TYPE_NODE;
+}
+
+static struct devlink_rate *
+devlink_rate_leaf_get_from_info(struct devlink *devlink, struct genl_info *info)
+{
+ struct devlink_rate *devlink_rate;
+ struct devlink_port *devlink_port;
+
+ devlink_port = devlink_port_get_from_attrs(devlink, info->attrs);
+ if (IS_ERR(devlink_port))
+ return ERR_CAST(devlink_port);
+ devlink_rate = devlink_port->devlink_rate;
+ return devlink_rate ?: ERR_PTR(-ENODEV);
+}
+
+static struct devlink_rate *
+devlink_rate_node_get_by_name(struct devlink *devlink, const char *node_name)
+{
+ static struct devlink_rate *devlink_rate;
+
+ list_for_each_entry(devlink_rate, &devlink->rate_list, list) {
+ if (devlink_rate_is_node(devlink_rate) &&
+ !strcmp(node_name, devlink_rate->name))
+ return devlink_rate;
+ }
+ return ERR_PTR(-ENODEV);
+}
+
+static struct devlink_rate *
+devlink_rate_node_get_from_attrs(struct devlink *devlink, struct nlattr **attrs)
+{
+ const char *rate_node_name;
+ size_t len;
+
+ if (!attrs[DEVLINK_ATTR_RATE_NODE_NAME])
+ return ERR_PTR(-EINVAL);
+ rate_node_name = nla_data(attrs[DEVLINK_ATTR_RATE_NODE_NAME]);
+ len = strlen(rate_node_name);
+ /* Name cannot be empty or decimal number */
+ if (!len || strspn(rate_node_name, "0123456789") == len)
+ return ERR_PTR(-EINVAL);
+
+ return devlink_rate_node_get_by_name(devlink, rate_node_name);
+}
+
+static struct devlink_rate *
+devlink_rate_node_get_from_info(struct devlink *devlink, struct genl_info *info)
+{
+ return devlink_rate_node_get_from_attrs(devlink, info->attrs);
+}
+
+static struct devlink_rate *
+devlink_rate_get_from_info(struct devlink *devlink, struct genl_info *info)
+{
+ struct nlattr **attrs = info->attrs;
+
+ if (attrs[DEVLINK_ATTR_PORT_INDEX])
+ return devlink_rate_leaf_get_from_info(devlink, info);
+ else if (attrs[DEVLINK_ATTR_RATE_NODE_NAME])
+ return devlink_rate_node_get_from_info(devlink, info);
+ else
+ return ERR_PTR(-EINVAL);
+}
+
struct devlink_sb {
struct list_head list;
unsigned int index;
@@ -408,12 +483,14 @@ devlink_region_snapshot_get_by_id(struct devlink_region *region, u32 id)
#define DEVLINK_NL_FLAG_NEED_PORT BIT(0)
#define DEVLINK_NL_FLAG_NEED_DEVLINK_OR_PORT BIT(1)
+#define DEVLINK_NL_FLAG_NEED_RATE BIT(2)
+#define DEVLINK_NL_FLAG_NEED_RATE_NODE BIT(3)
/* The per devlink instance lock is taken by default in the pre-doit
* operation, yet several commands do not require this. The global
* devlink lock is taken and protects from disruption by user-calls.
*/
-#define DEVLINK_NL_FLAG_NO_LOCK BIT(2)
+#define DEVLINK_NL_FLAG_NO_LOCK BIT(4)
static int devlink_nl_pre_doit(const struct genl_ops *ops,
struct sk_buff *skb, struct genl_info *info)
@@ -423,7 +500,7 @@ static int devlink_nl_pre_doit(const struct genl_ops *ops,
int err;
mutex_lock(&devlink_mutex);
- devlink = devlink_get_from_info(info);
+ devlink = devlink_get_from_attrs(genl_info_net(info), info->attrs);
if (IS_ERR(devlink)) {
mutex_unlock(&devlink_mutex);
return PTR_ERR(devlink);
@@ -442,12 +519,31 @@ static int devlink_nl_pre_doit(const struct genl_ops *ops,
devlink_port = devlink_port_get_from_info(devlink, info);
if (!IS_ERR(devlink_port))
info->user_ptr[1] = devlink_port;
+ } else if (ops->internal_flags & DEVLINK_NL_FLAG_NEED_RATE) {
+ struct devlink_rate *devlink_rate;
+
+ devlink_rate = devlink_rate_get_from_info(devlink, info);
+ if (IS_ERR(devlink_rate)) {
+ err = PTR_ERR(devlink_rate);
+ goto unlock;
+ }
+ info->user_ptr[1] = devlink_rate;
+ } else if (ops->internal_flags & DEVLINK_NL_FLAG_NEED_RATE_NODE) {
+ struct devlink_rate *rate_node;
+
+ rate_node = devlink_rate_node_get_from_info(devlink, info);
+ if (IS_ERR(rate_node)) {
+ err = PTR_ERR(rate_node);
+ goto unlock;
+ }
+ info->user_ptr[1] = rate_node;
}
return 0;
unlock:
if (~ops->internal_flags & DEVLINK_NL_FLAG_NO_LOCK)
mutex_unlock(&devlink->lock);
+ devlink_put(devlink);
mutex_unlock(&devlink_mutex);
return err;
}
@@ -460,6 +556,7 @@ static void devlink_nl_post_doit(const struct genl_ops *ops,
devlink = info->user_ptr[0];
if (~ops->internal_flags & DEVLINK_NL_FLAG_NO_LOCK)
mutex_unlock(&devlink->lock);
+ devlink_put(devlink);
mutex_unlock(&devlink_mutex);
}
@@ -723,10 +820,11 @@ static int devlink_nl_port_attrs_put(struct sk_buff *msg,
return 0;
}
-static int
-devlink_port_fn_hw_addr_fill(struct devlink *devlink, const struct devlink_ops *ops,
- struct devlink_port *port, struct sk_buff *msg,
- struct netlink_ext_ack *extack, bool *msg_updated)
+static int devlink_port_fn_hw_addr_fill(const struct devlink_ops *ops,
+ struct devlink_port *port,
+ struct sk_buff *msg,
+ struct netlink_ext_ack *extack,
+ bool *msg_updated)
{
u8 hw_addr[MAX_ADDR_LEN];
int hw_addr_len;
@@ -735,7 +833,8 @@ devlink_port_fn_hw_addr_fill(struct devlink *devlink, const struct devlink_ops *
if (!ops->port_function_hw_addr_get)
return 0;
- err = ops->port_function_hw_addr_get(devlink, port, hw_addr, &hw_addr_len, extack);
+ err = ops->port_function_hw_addr_get(port, hw_addr, &hw_addr_len,
+ extack);
if (err) {
if (err == -EOPNOTSUPP)
return 0;
@@ -748,6 +847,55 @@ devlink_port_fn_hw_addr_fill(struct devlink *devlink, const struct devlink_ops *
return 0;
}
+static int devlink_nl_rate_fill(struct sk_buff *msg,
+ struct devlink_rate *devlink_rate,
+ enum devlink_command cmd, u32 portid, u32 seq,
+ int flags, struct netlink_ext_ack *extack)
+{
+ struct devlink *devlink = devlink_rate->devlink;
+ void *hdr;
+
+ hdr = genlmsg_put(msg, portid, seq, &devlink_nl_family, flags, cmd);
+ if (!hdr)
+ return -EMSGSIZE;
+
+ if (devlink_nl_put_handle(msg, devlink))
+ goto nla_put_failure;
+
+ if (nla_put_u16(msg, DEVLINK_ATTR_RATE_TYPE, devlink_rate->type))
+ goto nla_put_failure;
+
+ if (devlink_rate_is_leaf(devlink_rate)) {
+ if (nla_put_u32(msg, DEVLINK_ATTR_PORT_INDEX,
+ devlink_rate->devlink_port->index))
+ goto nla_put_failure;
+ } else if (devlink_rate_is_node(devlink_rate)) {
+ if (nla_put_string(msg, DEVLINK_ATTR_RATE_NODE_NAME,
+ devlink_rate->name))
+ goto nla_put_failure;
+ }
+
+ if (nla_put_u64_64bit(msg, DEVLINK_ATTR_RATE_TX_SHARE,
+ devlink_rate->tx_share, DEVLINK_ATTR_PAD))
+ goto nla_put_failure;
+
+ if (nla_put_u64_64bit(msg, DEVLINK_ATTR_RATE_TX_MAX,
+ devlink_rate->tx_max, DEVLINK_ATTR_PAD))
+ goto nla_put_failure;
+
+ if (devlink_rate->parent)
+ if (nla_put_string(msg, DEVLINK_ATTR_RATE_PARENT_NODE_NAME,
+ devlink_rate->parent->name))
+ goto nla_put_failure;
+
+ genlmsg_end(msg, hdr);
+ return 0;
+
+nla_put_failure:
+ genlmsg_cancel(msg, hdr);
+ return -EMSGSIZE;
+}
+
static bool
devlink_port_fn_state_valid(enum devlink_port_fn_state state)
{
@@ -762,12 +910,11 @@ devlink_port_fn_opstate_valid(enum devlink_port_fn_opstate opstate)
opstate == DEVLINK_PORT_FN_OPSTATE_ATTACHED;
}
-static int
-devlink_port_fn_state_fill(struct devlink *devlink,
- const struct devlink_ops *ops,
- struct devlink_port *port, struct sk_buff *msg,
- struct netlink_ext_ack *extack,
- bool *msg_updated)
+static int devlink_port_fn_state_fill(const struct devlink_ops *ops,
+ struct devlink_port *port,
+ struct sk_buff *msg,
+ struct netlink_ext_ack *extack,
+ bool *msg_updated)
{
enum devlink_port_fn_opstate opstate;
enum devlink_port_fn_state state;
@@ -776,7 +923,7 @@ devlink_port_fn_state_fill(struct devlink *devlink,
if (!ops->port_fn_state_get)
return 0;
- err = ops->port_fn_state_get(devlink, port, &state, &opstate, extack);
+ err = ops->port_fn_state_get(port, &state, &opstate, extack);
if (err) {
if (err == -EOPNOTSUPP)
return 0;
@@ -804,7 +951,6 @@ static int
devlink_nl_port_function_attrs_put(struct sk_buff *msg, struct devlink_port *port,
struct netlink_ext_ack *extack)
{
- struct devlink *devlink = port->devlink;
const struct devlink_ops *ops;
struct nlattr *function_attr;
bool msg_updated = false;
@@ -814,13 +960,12 @@ devlink_nl_port_function_attrs_put(struct sk_buff *msg, struct devlink_port *por
if (!function_attr)
return -EMSGSIZE;
- ops = devlink->ops;
- err = devlink_port_fn_hw_addr_fill(devlink, ops, port, msg,
- extack, &msg_updated);
+ ops = port->devlink->ops;
+ err = devlink_port_fn_hw_addr_fill(ops, port, msg, extack,
+ &msg_updated);
if (err)
goto out;
- err = devlink_port_fn_state_fill(devlink, ops, port, msg, extack,
- &msg_updated);
+ err = devlink_port_fn_state_fill(ops, port, msg, extack, &msg_updated);
out:
if (err || !msg_updated)
nla_nest_cancel(msg, function_attr);
@@ -829,12 +974,12 @@ out:
return err;
}
-static int devlink_nl_port_fill(struct sk_buff *msg, struct devlink *devlink,
+static int devlink_nl_port_fill(struct sk_buff *msg,
struct devlink_port *devlink_port,
- enum devlink_command cmd, u32 portid,
- u32 seq, int flags,
- struct netlink_ext_ack *extack)
+ enum devlink_command cmd, u32 portid, u32 seq,
+ int flags, struct netlink_ext_ack *extack)
{
+ struct devlink *devlink = devlink_port->devlink;
void *hdr;
hdr = genlmsg_put(msg, portid, seq, &devlink_nl_family, flags, cmd);
@@ -895,28 +1040,131 @@ nla_put_failure:
static void devlink_port_notify(struct devlink_port *devlink_port,
enum devlink_command cmd)
{
- struct devlink *devlink = devlink_port->devlink;
struct sk_buff *msg;
int err;
- if (!devlink_port->registered)
+ WARN_ON(cmd != DEVLINK_CMD_PORT_NEW && cmd != DEVLINK_CMD_PORT_DEL);
+
+ msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
+ if (!msg)
return;
- WARN_ON(cmd != DEVLINK_CMD_PORT_NEW && cmd != DEVLINK_CMD_PORT_DEL);
+ err = devlink_nl_port_fill(msg, devlink_port, cmd, 0, 0, 0, NULL);
+ if (err) {
+ nlmsg_free(msg);
+ return;
+ }
+
+ genlmsg_multicast_netns(&devlink_nl_family,
+ devlink_net(devlink_port->devlink), msg, 0,
+ DEVLINK_MCGRP_CONFIG, GFP_KERNEL);
+}
+
+static void devlink_rate_notify(struct devlink_rate *devlink_rate,
+ enum devlink_command cmd)
+{
+ struct sk_buff *msg;
+ int err;
+
+ WARN_ON(cmd != DEVLINK_CMD_RATE_NEW && cmd != DEVLINK_CMD_RATE_DEL);
msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
if (!msg)
return;
- err = devlink_nl_port_fill(msg, devlink, devlink_port, cmd, 0, 0, 0,
- NULL);
+ err = devlink_nl_rate_fill(msg, devlink_rate, cmd, 0, 0, 0, NULL);
if (err) {
nlmsg_free(msg);
return;
}
- genlmsg_multicast_netns(&devlink_nl_family, devlink_net(devlink),
- msg, 0, DEVLINK_MCGRP_CONFIG, GFP_KERNEL);
+ genlmsg_multicast_netns(&devlink_nl_family,
+ devlink_net(devlink_rate->devlink), msg, 0,
+ DEVLINK_MCGRP_CONFIG, GFP_KERNEL);
+}
+
+static int devlink_nl_cmd_rate_get_dumpit(struct sk_buff *msg,
+ struct netlink_callback *cb)
+{
+ struct devlink_rate *devlink_rate;
+ struct devlink *devlink;
+ int start = cb->args[0];
+ unsigned long index;
+ int idx = 0;
+ int err = 0;
+
+ mutex_lock(&devlink_mutex);
+ xa_for_each_marked(&devlinks, index, devlink, DEVLINK_REGISTERED) {
+ if (!devlink_try_get(devlink))
+ continue;
+
+ if (!net_eq(devlink_net(devlink), sock_net(msg->sk)))
+ goto retry;
+
+ mutex_lock(&devlink->lock);
+ list_for_each_entry(devlink_rate, &devlink->rate_list, list) {
+ enum devlink_command cmd = DEVLINK_CMD_RATE_NEW;
+ u32 id = NETLINK_CB(cb->skb).portid;
+
+ if (idx < start) {
+ idx++;
+ continue;
+ }
+ err = devlink_nl_rate_fill(msg, devlink_rate, cmd, id,
+ cb->nlh->nlmsg_seq,
+ NLM_F_MULTI, NULL);
+ if (err) {
+ mutex_unlock(&devlink->lock);
+ devlink_put(devlink);
+ goto out;
+ }
+ idx++;
+ }
+ mutex_unlock(&devlink->lock);
+retry:
+ devlink_put(devlink);
+ }
+out:
+ mutex_unlock(&devlink_mutex);
+ if (err != -EMSGSIZE)
+ return err;
+
+ cb->args[0] = idx;
+ return msg->len;
+}
+
+static int devlink_nl_cmd_rate_get_doit(struct sk_buff *skb,
+ struct genl_info *info)
+{
+ struct devlink_rate *devlink_rate = info->user_ptr[1];
+ struct sk_buff *msg;
+ int err;
+
+ msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
+ if (!msg)
+ return -ENOMEM;
+
+ err = devlink_nl_rate_fill(msg, devlink_rate, DEVLINK_CMD_RATE_NEW,
+ info->snd_portid, info->snd_seq, 0,
+ info->extack);
+ if (err) {
+ nlmsg_free(msg);
+ return err;
+ }
+
+ return genlmsg_reply(msg, info);
+}
+
+static bool
+devlink_rate_is_parent_node(struct devlink_rate *devlink_rate,
+ struct devlink_rate *parent)
+{
+ while (parent) {
+ if (parent == devlink_rate)
+ return true;
+ parent = parent->parent;
+ }
+ return false;
}
static int devlink_nl_cmd_get_doit(struct sk_buff *skb, struct genl_info *info)
@@ -944,20 +1192,30 @@ static int devlink_nl_cmd_get_dumpit(struct sk_buff *msg,
{
struct devlink *devlink;
int start = cb->args[0];
+ unsigned long index;
int idx = 0;
int err;
mutex_lock(&devlink_mutex);
- list_for_each_entry(devlink, &devlink_list, list) {
- if (!net_eq(devlink_net(devlink), sock_net(msg->sk)))
+ xa_for_each_marked(&devlinks, index, devlink, DEVLINK_REGISTERED) {
+ if (!devlink_try_get(devlink))
+ continue;
+
+ if (!net_eq(devlink_net(devlink), sock_net(msg->sk))) {
+ devlink_put(devlink);
continue;
+ }
+
if (idx < start) {
idx++;
+ devlink_put(devlink);
continue;
}
+
err = devlink_nl_fill(msg, devlink, DEVLINK_CMD_NEW,
NETLINK_CB(cb->skb).portid,
cb->nlh->nlmsg_seq, NLM_F_MULTI);
+ devlink_put(devlink);
if (err)
goto out;
idx++;
@@ -973,7 +1231,6 @@ static int devlink_nl_cmd_port_get_doit(struct sk_buff *skb,
struct genl_info *info)
{
struct devlink_port *devlink_port = info->user_ptr[1];
- struct devlink *devlink = devlink_port->devlink;
struct sk_buff *msg;
int err;
@@ -981,8 +1238,7 @@ static int devlink_nl_cmd_port_get_doit(struct sk_buff *skb,
if (!msg)
return -ENOMEM;
- err = devlink_nl_port_fill(msg, devlink, devlink_port,
- DEVLINK_CMD_PORT_NEW,
+ err = devlink_nl_port_fill(msg, devlink_port, DEVLINK_CMD_PORT_NEW,
info->snd_portid, info->snd_seq, 0,
info->extack);
if (err) {
@@ -999,32 +1255,39 @@ static int devlink_nl_cmd_port_get_dumpit(struct sk_buff *msg,
struct devlink *devlink;
struct devlink_port *devlink_port;
int start = cb->args[0];
+ unsigned long index;
int idx = 0;
int err;
mutex_lock(&devlink_mutex);
- list_for_each_entry(devlink, &devlink_list, list) {
- if (!net_eq(devlink_net(devlink), sock_net(msg->sk)))
+ xa_for_each_marked(&devlinks, index, devlink, DEVLINK_REGISTERED) {
+ if (!devlink_try_get(devlink))
continue;
+
+ if (!net_eq(devlink_net(devlink), sock_net(msg->sk)))
+ goto retry;
+
mutex_lock(&devlink->lock);
list_for_each_entry(devlink_port, &devlink->port_list, list) {
if (idx < start) {
idx++;
continue;
}
- err = devlink_nl_port_fill(msg, devlink, devlink_port,
+ err = devlink_nl_port_fill(msg, devlink_port,
DEVLINK_CMD_NEW,
NETLINK_CB(cb->skb).portid,
cb->nlh->nlmsg_seq,
- NLM_F_MULTI,
- cb->extack);
+ NLM_F_MULTI, cb->extack);
if (err) {
mutex_unlock(&devlink->lock);
+ devlink_put(devlink);
goto out;
}
idx++;
}
mutex_unlock(&devlink->lock);
+retry:
+ devlink_put(devlink);
}
out:
mutex_unlock(&devlink_mutex);
@@ -1033,31 +1296,33 @@ out:
return msg->len;
}
-static int devlink_port_type_set(struct devlink *devlink,
- struct devlink_port *devlink_port,
+static int devlink_port_type_set(struct devlink_port *devlink_port,
enum devlink_port_type port_type)
{
int err;
- if (devlink->ops->port_type_set) {
- if (port_type == devlink_port->type)
- return 0;
- err = devlink->ops->port_type_set(devlink_port, port_type);
- if (err)
- return err;
- devlink_port->desired_type = port_type;
- devlink_port_notify(devlink_port, DEVLINK_CMD_PORT_NEW);
+ if (!devlink_port->devlink->ops->port_type_set)
+ return -EOPNOTSUPP;
+
+ if (port_type == devlink_port->type)
return 0;
- }
- return -EOPNOTSUPP;
+
+ err = devlink_port->devlink->ops->port_type_set(devlink_port,
+ port_type);
+ if (err)
+ return err;
+
+ devlink_port->desired_type = port_type;
+ devlink_port_notify(devlink_port, DEVLINK_CMD_PORT_NEW);
+ return 0;
}
-static int
-devlink_port_function_hw_addr_set(struct devlink *devlink, struct devlink_port *port,
- const struct nlattr *attr, struct netlink_ext_ack *extack)
+static int devlink_port_function_hw_addr_set(struct devlink_port *port,
+ const struct nlattr *attr,
+ struct netlink_ext_ack *extack)
{
- const struct devlink_ops *ops;
+ const struct devlink_ops *ops = port->devlink->ops;
const u8 *hw_addr;
int hw_addr_len;
@@ -1078,17 +1343,16 @@ devlink_port_function_hw_addr_set(struct devlink *devlink, struct devlink_port *
}
}
- ops = devlink->ops;
if (!ops->port_function_hw_addr_set) {
NL_SET_ERR_MSG_MOD(extack, "Port doesn't support function attributes");
return -EOPNOTSUPP;
}
- return ops->port_function_hw_addr_set(devlink, port, hw_addr, hw_addr_len, extack);
+ return ops->port_function_hw_addr_set(port, hw_addr, hw_addr_len,
+ extack);
}
-static int devlink_port_fn_state_set(struct devlink *devlink,
- struct devlink_port *port,
+static int devlink_port_fn_state_set(struct devlink_port *port,
const struct nlattr *attr,
struct netlink_ext_ack *extack)
{
@@ -1096,18 +1360,18 @@ static int devlink_port_fn_state_set(struct devlink *devlink,
const struct devlink_ops *ops;
state = nla_get_u8(attr);
- ops = devlink->ops;
+ ops = port->devlink->ops;
if (!ops->port_fn_state_set) {
NL_SET_ERR_MSG_MOD(extack,
"Function does not support state setting");
return -EOPNOTSUPP;
}
- return ops->port_fn_state_set(devlink, port, state, extack);
+ return ops->port_fn_state_set(port, state, extack);
}
-static int
-devlink_port_function_set(struct devlink *devlink, struct devlink_port *port,
- const struct nlattr *attr, struct netlink_ext_ack *extack)
+static int devlink_port_function_set(struct devlink_port *port,
+ const struct nlattr *attr,
+ struct netlink_ext_ack *extack)
{
struct nlattr *tb[DEVLINK_PORT_FUNCTION_ATTR_MAX + 1];
int err;
@@ -1121,7 +1385,7 @@ devlink_port_function_set(struct devlink *devlink, struct devlink_port *port,
attr = tb[DEVLINK_PORT_FUNCTION_ATTR_HW_ADDR];
if (attr) {
- err = devlink_port_function_hw_addr_set(devlink, port, attr, extack);
+ err = devlink_port_function_hw_addr_set(port, attr, extack);
if (err)
return err;
}
@@ -1131,7 +1395,7 @@ devlink_port_function_set(struct devlink *devlink, struct devlink_port *port,
*/
attr = tb[DEVLINK_PORT_FN_ATTR_STATE];
if (attr)
- err = devlink_port_fn_state_set(devlink, port, attr, extack);
+ err = devlink_port_fn_state_set(port, attr, extack);
if (!err)
devlink_port_notify(port, DEVLINK_CMD_PORT_NEW);
@@ -1142,14 +1406,13 @@ static int devlink_nl_cmd_port_set_doit(struct sk_buff *skb,
struct genl_info *info)
{
struct devlink_port *devlink_port = info->user_ptr[1];
- struct devlink *devlink = devlink_port->devlink;
int err;
if (info->attrs[DEVLINK_ATTR_PORT_TYPE]) {
enum devlink_port_type port_type;
port_type = nla_get_u16(info->attrs[DEVLINK_ATTR_PORT_TYPE]);
- err = devlink_port_type_set(devlink, devlink_port, port_type);
+ err = devlink_port_type_set(devlink_port, port_type);
if (err)
return err;
}
@@ -1158,7 +1421,7 @@ static int devlink_nl_cmd_port_set_doit(struct sk_buff *skb,
struct nlattr *attr = info->attrs[DEVLINK_ATTR_PORT_FUNCTION];
struct netlink_ext_ack *extack = info->extack;
- err = devlink_port_function_set(devlink, devlink_port, attr, extack);
+ err = devlink_port_function_set(devlink_port, attr, extack);
if (err)
return err;
}
@@ -1253,9 +1516,8 @@ static int devlink_port_new_notifiy(struct devlink *devlink,
goto out;
}
- err = devlink_nl_port_fill(msg, devlink, devlink_port,
- DEVLINK_CMD_NEW, info->snd_portid,
- info->snd_seq, 0, NULL);
+ err = devlink_nl_port_fill(msg, devlink_port, DEVLINK_CMD_NEW,
+ info->snd_portid, info->snd_seq, 0, NULL);
if (err)
goto out;
@@ -1339,6 +1601,255 @@ static int devlink_nl_cmd_port_del_doit(struct sk_buff *skb,
return devlink->ops->port_del(devlink, port_index, extack);
}
+static int
+devlink_nl_rate_parent_node_set(struct devlink_rate *devlink_rate,
+ struct genl_info *info,
+ struct nlattr *nla_parent)
+{
+ struct devlink *devlink = devlink_rate->devlink;
+ const char *parent_name = nla_data(nla_parent);
+ const struct devlink_ops *ops = devlink->ops;
+ size_t len = strlen(parent_name);
+ struct devlink_rate *parent;
+ int err = -EOPNOTSUPP;
+
+ parent = devlink_rate->parent;
+ if (parent && len) {
+ NL_SET_ERR_MSG_MOD(info->extack, "Rate object already has parent.");
+ return -EBUSY;
+ } else if (parent && !len) {
+ if (devlink_rate_is_leaf(devlink_rate))
+ err = ops->rate_leaf_parent_set(devlink_rate, NULL,
+ devlink_rate->priv, NULL,
+ info->extack);
+ else if (devlink_rate_is_node(devlink_rate))
+ err = ops->rate_node_parent_set(devlink_rate, NULL,
+ devlink_rate->priv, NULL,
+ info->extack);
+ if (err)
+ return err;
+
+ refcount_dec(&parent->refcnt);
+ devlink_rate->parent = NULL;
+ } else if (!parent && len) {
+ parent = devlink_rate_node_get_by_name(devlink, parent_name);
+ if (IS_ERR(parent))
+ return -ENODEV;
+
+ if (parent == devlink_rate) {
+ NL_SET_ERR_MSG_MOD(info->extack, "Parent to self is not allowed");
+ return -EINVAL;
+ }
+
+ if (devlink_rate_is_node(devlink_rate) &&
+ devlink_rate_is_parent_node(devlink_rate, parent->parent)) {
+ NL_SET_ERR_MSG_MOD(info->extack, "Node is already a parent of parent node.");
+ return -EEXIST;
+ }
+
+ if (devlink_rate_is_leaf(devlink_rate))
+ err = ops->rate_leaf_parent_set(devlink_rate, parent,
+ devlink_rate->priv, parent->priv,
+ info->extack);
+ else if (devlink_rate_is_node(devlink_rate))
+ err = ops->rate_node_parent_set(devlink_rate, parent,
+ devlink_rate->priv, parent->priv,
+ info->extack);
+ if (err)
+ return err;
+
+ refcount_inc(&parent->refcnt);
+ devlink_rate->parent = parent;
+ }
+
+ return 0;
+}
+
+static int devlink_nl_rate_set(struct devlink_rate *devlink_rate,
+ const struct devlink_ops *ops,
+ struct genl_info *info)
+{
+ struct nlattr *nla_parent, **attrs = info->attrs;
+ int err = -EOPNOTSUPP;
+ u64 rate;
+
+ if (attrs[DEVLINK_ATTR_RATE_TX_SHARE]) {
+ rate = nla_get_u64(attrs[DEVLINK_ATTR_RATE_TX_SHARE]);
+ if (devlink_rate_is_leaf(devlink_rate))
+ err = ops->rate_leaf_tx_share_set(devlink_rate, devlink_rate->priv,
+ rate, info->extack);
+ else if (devlink_rate_is_node(devlink_rate))
+ err = ops->rate_node_tx_share_set(devlink_rate, devlink_rate->priv,
+ rate, info->extack);
+ if (err)
+ return err;
+ devlink_rate->tx_share = rate;
+ }
+
+ if (attrs[DEVLINK_ATTR_RATE_TX_MAX]) {
+ rate = nla_get_u64(attrs[DEVLINK_ATTR_RATE_TX_MAX]);
+ if (devlink_rate_is_leaf(devlink_rate))
+ err = ops->rate_leaf_tx_max_set(devlink_rate, devlink_rate->priv,
+ rate, info->extack);
+ else if (devlink_rate_is_node(devlink_rate))
+ err = ops->rate_node_tx_max_set(devlink_rate, devlink_rate->priv,
+ rate, info->extack);
+ if (err)
+ return err;
+ devlink_rate->tx_max = rate;
+ }
+
+ nla_parent = attrs[DEVLINK_ATTR_RATE_PARENT_NODE_NAME];
+ if (nla_parent) {
+ err = devlink_nl_rate_parent_node_set(devlink_rate, info,
+ nla_parent);
+ if (err)
+ return err;
+ }
+
+ return 0;
+}
+
+static bool devlink_rate_set_ops_supported(const struct devlink_ops *ops,
+ struct genl_info *info,
+ enum devlink_rate_type type)
+{
+ struct nlattr **attrs = info->attrs;
+
+ if (type == DEVLINK_RATE_TYPE_LEAF) {
+ if (attrs[DEVLINK_ATTR_RATE_TX_SHARE] && !ops->rate_leaf_tx_share_set) {
+ NL_SET_ERR_MSG_MOD(info->extack, "TX share set isn't supported for the leafs");
+ return false;
+ }
+ if (attrs[DEVLINK_ATTR_RATE_TX_MAX] && !ops->rate_leaf_tx_max_set) {
+ NL_SET_ERR_MSG_MOD(info->extack, "TX max set isn't supported for the leafs");
+ return false;
+ }
+ if (attrs[DEVLINK_ATTR_RATE_PARENT_NODE_NAME] &&
+ !ops->rate_leaf_parent_set) {
+ NL_SET_ERR_MSG_MOD(info->extack, "Parent set isn't supported for the leafs");
+ return false;
+ }
+ } else if (type == DEVLINK_RATE_TYPE_NODE) {
+ if (attrs[DEVLINK_ATTR_RATE_TX_SHARE] && !ops->rate_node_tx_share_set) {
+ NL_SET_ERR_MSG_MOD(info->extack, "TX share set isn't supported for the nodes");
+ return false;
+ }
+ if (attrs[DEVLINK_ATTR_RATE_TX_MAX] && !ops->rate_node_tx_max_set) {
+ NL_SET_ERR_MSG_MOD(info->extack, "TX max set isn't supported for the nodes");
+ return false;
+ }
+ if (attrs[DEVLINK_ATTR_RATE_PARENT_NODE_NAME] &&
+ !ops->rate_node_parent_set) {
+ NL_SET_ERR_MSG_MOD(info->extack, "Parent set isn't supported for the nodes");
+ return false;
+ }
+ } else {
+ WARN(1, "Unknown type of rate object");
+ return false;
+ }
+
+ return true;
+}
+
+static int devlink_nl_cmd_rate_set_doit(struct sk_buff *skb,
+ struct genl_info *info)
+{
+ struct devlink_rate *devlink_rate = info->user_ptr[1];
+ struct devlink *devlink = devlink_rate->devlink;
+ const struct devlink_ops *ops = devlink->ops;
+ int err;
+
+ if (!ops || !devlink_rate_set_ops_supported(ops, info, devlink_rate->type))
+ return -EOPNOTSUPP;
+
+ err = devlink_nl_rate_set(devlink_rate, ops, info);
+
+ if (!err)
+ devlink_rate_notify(devlink_rate, DEVLINK_CMD_RATE_NEW);
+ return err;
+}
+
+static int devlink_nl_cmd_rate_new_doit(struct sk_buff *skb,
+ struct genl_info *info)
+{
+ struct devlink *devlink = info->user_ptr[0];
+ struct devlink_rate *rate_node;
+ const struct devlink_ops *ops;
+ int err;
+
+ ops = devlink->ops;
+ if (!ops || !ops->rate_node_new || !ops->rate_node_del) {
+ NL_SET_ERR_MSG_MOD(info->extack, "Rate nodes aren't supported");
+ return -EOPNOTSUPP;
+ }
+
+ if (!devlink_rate_set_ops_supported(ops, info, DEVLINK_RATE_TYPE_NODE))
+ return -EOPNOTSUPP;
+
+ rate_node = devlink_rate_node_get_from_attrs(devlink, info->attrs);
+ if (!IS_ERR(rate_node))
+ return -EEXIST;
+ else if (rate_node == ERR_PTR(-EINVAL))
+ return -EINVAL;
+
+ rate_node = kzalloc(sizeof(*rate_node), GFP_KERNEL);
+ if (!rate_node)
+ return -ENOMEM;
+
+ rate_node->devlink = devlink;
+ rate_node->type = DEVLINK_RATE_TYPE_NODE;
+ rate_node->name = nla_strdup(info->attrs[DEVLINK_ATTR_RATE_NODE_NAME], GFP_KERNEL);
+ if (!rate_node->name) {
+ err = -ENOMEM;
+ goto err_strdup;
+ }
+
+ err = ops->rate_node_new(rate_node, &rate_node->priv, info->extack);
+ if (err)
+ goto err_node_new;
+
+ err = devlink_nl_rate_set(rate_node, ops, info);
+ if (err)
+ goto err_rate_set;
+
+ refcount_set(&rate_node->refcnt, 1);
+ list_add(&rate_node->list, &devlink->rate_list);
+ devlink_rate_notify(rate_node, DEVLINK_CMD_RATE_NEW);
+ return 0;
+
+err_rate_set:
+ ops->rate_node_del(rate_node, rate_node->priv, info->extack);
+err_node_new:
+ kfree(rate_node->name);
+err_strdup:
+ kfree(rate_node);
+ return err;
+}
+
+static int devlink_nl_cmd_rate_del_doit(struct sk_buff *skb,
+ struct genl_info *info)
+{
+ struct devlink_rate *rate_node = info->user_ptr[1];
+ struct devlink *devlink = rate_node->devlink;
+ const struct devlink_ops *ops = devlink->ops;
+ int err;
+
+ if (refcount_read(&rate_node->refcnt) > 1) {
+ NL_SET_ERR_MSG_MOD(info->extack, "Node has children. Cannot delete node.");
+ return -EBUSY;
+ }
+
+ devlink_rate_notify(rate_node, DEVLINK_CMD_RATE_DEL);
+ err = ops->rate_node_del(rate_node, rate_node->priv, info->extack);
+ if (rate_node->parent)
+ refcount_dec(&rate_node->parent->refcnt);
+ list_del(&rate_node->list);
+ kfree(rate_node->name);
+ kfree(rate_node);
+ return err;
+}
+
static int devlink_nl_sb_fill(struct sk_buff *msg, struct devlink *devlink,
struct devlink_sb *devlink_sb,
enum devlink_command cmd, u32 portid,
@@ -1410,13 +1921,18 @@ static int devlink_nl_cmd_sb_get_dumpit(struct sk_buff *msg,
struct devlink *devlink;
struct devlink_sb *devlink_sb;
int start = cb->args[0];
+ unsigned long index;
int idx = 0;
int err;
mutex_lock(&devlink_mutex);
- list_for_each_entry(devlink, &devlink_list, list) {
- if (!net_eq(devlink_net(devlink), sock_net(msg->sk)))
+ xa_for_each_marked(&devlinks, index, devlink, DEVLINK_REGISTERED) {
+ if (!devlink_try_get(devlink))
continue;
+
+ if (!net_eq(devlink_net(devlink), sock_net(msg->sk)))
+ goto retry;
+
mutex_lock(&devlink->lock);
list_for_each_entry(devlink_sb, &devlink->sb_list, list) {
if (idx < start) {
@@ -1430,11 +1946,14 @@ static int devlink_nl_cmd_sb_get_dumpit(struct sk_buff *msg,
NLM_F_MULTI);
if (err) {
mutex_unlock(&devlink->lock);
+ devlink_put(devlink);
goto out;
}
idx++;
}
mutex_unlock(&devlink->lock);
+retry:
+ devlink_put(devlink);
}
out:
mutex_unlock(&devlink_mutex);
@@ -1554,14 +2073,19 @@ static int devlink_nl_cmd_sb_pool_get_dumpit(struct sk_buff *msg,
struct devlink *devlink;
struct devlink_sb *devlink_sb;
int start = cb->args[0];
+ unsigned long index;
int idx = 0;
int err = 0;
mutex_lock(&devlink_mutex);
- list_for_each_entry(devlink, &devlink_list, list) {
+ xa_for_each_marked(&devlinks, index, devlink, DEVLINK_REGISTERED) {
+ if (!devlink_try_get(devlink))
+ continue;
+
if (!net_eq(devlink_net(devlink), sock_net(msg->sk)) ||
!devlink->ops->sb_pool_get)
- continue;
+ goto retry;
+
mutex_lock(&devlink->lock);
list_for_each_entry(devlink_sb, &devlink->sb_list, list) {
err = __sb_pool_get_dumpit(msg, start, &idx, devlink,
@@ -1572,10 +2096,13 @@ static int devlink_nl_cmd_sb_pool_get_dumpit(struct sk_buff *msg,
err = 0;
} else if (err) {
mutex_unlock(&devlink->lock);
+ devlink_put(devlink);
goto out;
}
}
mutex_unlock(&devlink->lock);
+retry:
+ devlink_put(devlink);
}
out:
mutex_unlock(&devlink_mutex);
@@ -1767,14 +2294,19 @@ static int devlink_nl_cmd_sb_port_pool_get_dumpit(struct sk_buff *msg,
struct devlink *devlink;
struct devlink_sb *devlink_sb;
int start = cb->args[0];
+ unsigned long index;
int idx = 0;
int err = 0;
mutex_lock(&devlink_mutex);
- list_for_each_entry(devlink, &devlink_list, list) {
+ xa_for_each_marked(&devlinks, index, devlink, DEVLINK_REGISTERED) {
+ if (!devlink_try_get(devlink))
+ continue;
+
if (!net_eq(devlink_net(devlink), sock_net(msg->sk)) ||
!devlink->ops->sb_port_pool_get)
- continue;
+ goto retry;
+
mutex_lock(&devlink->lock);
list_for_each_entry(devlink_sb, &devlink->sb_list, list) {
err = __sb_port_pool_get_dumpit(msg, start, &idx,
@@ -1785,10 +2317,13 @@ static int devlink_nl_cmd_sb_port_pool_get_dumpit(struct sk_buff *msg,
err = 0;
} else if (err) {
mutex_unlock(&devlink->lock);
+ devlink_put(devlink);
goto out;
}
}
mutex_unlock(&devlink->lock);
+retry:
+ devlink_put(devlink);
}
out:
mutex_unlock(&devlink_mutex);
@@ -2008,14 +2543,18 @@ devlink_nl_cmd_sb_tc_pool_bind_get_dumpit(struct sk_buff *msg,
struct devlink *devlink;
struct devlink_sb *devlink_sb;
int start = cb->args[0];
+ unsigned long index;
int idx = 0;
int err = 0;
mutex_lock(&devlink_mutex);
- list_for_each_entry(devlink, &devlink_list, list) {
+ xa_for_each_marked(&devlinks, index, devlink, DEVLINK_REGISTERED) {
+ if (!devlink_try_get(devlink))
+ continue;
+
if (!net_eq(devlink_net(devlink), sock_net(msg->sk)) ||
!devlink->ops->sb_tc_pool_bind_get)
- continue;
+ goto retry;
mutex_lock(&devlink->lock);
list_for_each_entry(devlink_sb, &devlink->sb_list, list) {
@@ -2028,10 +2567,13 @@ devlink_nl_cmd_sb_tc_pool_bind_get_dumpit(struct sk_buff *msg,
err = 0;
} else if (err) {
mutex_unlock(&devlink->lock);
+ devlink_put(devlink);
goto out;
}
}
mutex_unlock(&devlink->lock);
+retry:
+ devlink_put(devlink);
}
out:
mutex_unlock(&devlink_mutex);
@@ -2207,6 +2749,23 @@ static int devlink_nl_cmd_eswitch_get_doit(struct sk_buff *skb,
return genlmsg_reply(msg, info);
}
+static int devlink_rate_nodes_check(struct devlink *devlink, u16 mode,
+ struct netlink_ext_ack *extack)
+{
+ struct devlink_rate *devlink_rate;
+
+ /* Take the lock to sync with devlink_rate_nodes_destroy() */
+ mutex_lock(&devlink->lock);
+ list_for_each_entry(devlink_rate, &devlink->rate_list, list)
+ if (devlink_rate_is_node(devlink_rate)) {
+ mutex_unlock(&devlink->lock);
+ NL_SET_ERR_MSG_MOD(extack, "Rate node(s) exists.");
+ return -EBUSY;
+ }
+ mutex_unlock(&devlink->lock);
+ return 0;
+}
+
static int devlink_nl_cmd_eswitch_set_doit(struct sk_buff *skb,
struct genl_info *info)
{
@@ -2221,6 +2780,9 @@ static int devlink_nl_cmd_eswitch_set_doit(struct sk_buff *skb,
if (!ops->eswitch_mode_set)
return -EOPNOTSUPP;
mode = nla_get_u16(info->attrs[DEVLINK_ATTR_ESWITCH_MODE]);
+ err = devlink_rate_nodes_check(devlink, mode, info->extack);
+ if (err)
+ return err;
err = ops->eswitch_mode_set(devlink, mode, info->extack);
if (err)
return err;
@@ -3283,10 +3845,12 @@ static void devlink_param_notify(struct devlink *devlink,
struct devlink_param_item *param_item,
enum devlink_command cmd);
-static void devlink_reload_netns_change(struct devlink *devlink,
- struct net *dest_net)
+static void devlink_ns_change_notify(struct devlink *devlink,
+ struct net *dest_net, struct net *curr_net,
+ bool new)
{
struct devlink_param_item *param_item;
+ enum devlink_command cmd;
/* Userspace needs to be notified about devlink objects
* removed from original and entering new network namespace.
@@ -3294,17 +3858,18 @@ static void devlink_reload_netns_change(struct devlink *devlink,
* reload process so the notifications are generated separatelly.
*/
- list_for_each_entry(param_item, &devlink->param_list, list)
- devlink_param_notify(devlink, 0, param_item,
- DEVLINK_CMD_PARAM_DEL);
- devlink_notify(devlink, DEVLINK_CMD_DEL);
+ if (!dest_net || net_eq(dest_net, curr_net))
+ return;
- __devlink_net_set(devlink, dest_net);
+ if (new)
+ devlink_notify(devlink, DEVLINK_CMD_NEW);
- devlink_notify(devlink, DEVLINK_CMD_NEW);
+ cmd = new ? DEVLINK_CMD_PARAM_NEW : DEVLINK_CMD_PARAM_DEL;
list_for_each_entry(param_item, &devlink->param_list, list)
- devlink_param_notify(devlink, 0, param_item,
- DEVLINK_CMD_PARAM_NEW);
+ devlink_param_notify(devlink, 0, param_item, cmd);
+
+ if (!new)
+ devlink_notify(devlink, DEVLINK_CMD_DEL);
}
static bool devlink_reload_supported(const struct devlink_ops *ops)
@@ -3384,6 +3949,7 @@ static int devlink_reload(struct devlink *devlink, struct net *dest_net,
u32 *actions_performed, struct netlink_ext_ack *extack)
{
u32 remote_reload_stats[DEVLINK_RELOAD_STATS_ARRAY_SIZE];
+ struct net *curr_net;
int err;
if (!devlink->reload_enabled)
@@ -3391,18 +3957,22 @@ static int devlink_reload(struct devlink *devlink, struct net *dest_net,
memcpy(remote_reload_stats, devlink->stats.remote_reload_stats,
sizeof(remote_reload_stats));
+
+ curr_net = devlink_net(devlink);
+ devlink_ns_change_notify(devlink, dest_net, curr_net, false);
err = devlink->ops->reload_down(devlink, !!dest_net, action, limit, extack);
if (err)
return err;
- if (dest_net && !net_eq(dest_net, devlink_net(devlink)))
- devlink_reload_netns_change(devlink, dest_net);
+ if (dest_net && !net_eq(dest_net, curr_net))
+ write_pnet(&devlink->_net, dest_net);
err = devlink->ops->reload_up(devlink, action, limit, actions_performed, extack);
devlink_reload_failed_set(devlink, !!err);
if (err)
return err;
+ devlink_ns_change_notify(devlink, dest_net, curr_net, true);
WARN_ON(!(*actions_performed & BIT(action)));
/* Catch driver on updating the remote action within devlink reload */
WARN_ON(memcmp(remote_reload_stats, devlink->stats.remote_reload_stats,
@@ -3599,7 +4169,7 @@ out_free_msg:
static void devlink_flash_update_begin_notify(struct devlink *devlink)
{
- struct devlink_flash_notify params = { 0 };
+ struct devlink_flash_notify params = {};
__devlink_flash_update_notify(devlink,
DEVLINK_CMD_FLASH_UPDATE,
@@ -3608,7 +4178,7 @@ static void devlink_flash_update_begin_notify(struct devlink *devlink)
static void devlink_flash_update_end_notify(struct devlink *devlink)
{
- struct devlink_flash_notify params = { 0 };
+ struct devlink_flash_notify params = {};
__devlink_flash_update_notify(devlink,
DEVLINK_CMD_FLASH_UPDATE_END,
@@ -3765,6 +4335,21 @@ static const struct devlink_param devlink_param_generic[] = {
.name = DEVLINK_PARAM_GENERIC_ENABLE_REMOTE_DEV_RESET_NAME,
.type = DEVLINK_PARAM_GENERIC_ENABLE_REMOTE_DEV_RESET_TYPE,
},
+ {
+ .id = DEVLINK_PARAM_GENERIC_ID_ENABLE_ETH,
+ .name = DEVLINK_PARAM_GENERIC_ENABLE_ETH_NAME,
+ .type = DEVLINK_PARAM_GENERIC_ENABLE_ETH_TYPE,
+ },
+ {
+ .id = DEVLINK_PARAM_GENERIC_ID_ENABLE_RDMA,
+ .name = DEVLINK_PARAM_GENERIC_ENABLE_RDMA_NAME,
+ .type = DEVLINK_PARAM_GENERIC_ENABLE_RDMA_TYPE,
+ },
+ {
+ .id = DEVLINK_PARAM_GENERIC_ID_ENABLE_VNET,
+ .name = DEVLINK_PARAM_GENERIC_ENABLE_VNET_NAME,
+ .type = DEVLINK_PARAM_GENERIC_ENABLE_VNET_TYPE,
+ },
};
static int devlink_param_generic_verify(const struct devlink_param *param)
@@ -4035,13 +4620,18 @@ static int devlink_nl_cmd_param_get_dumpit(struct sk_buff *msg,
struct devlink_param_item *param_item;
struct devlink *devlink;
int start = cb->args[0];
+ unsigned long index;
int idx = 0;
int err = 0;
mutex_lock(&devlink_mutex);
- list_for_each_entry(devlink, &devlink_list, list) {
- if (!net_eq(devlink_net(devlink), sock_net(msg->sk)))
+ xa_for_each_marked(&devlinks, index, devlink, DEVLINK_REGISTERED) {
+ if (!devlink_try_get(devlink))
continue;
+
+ if (!net_eq(devlink_net(devlink), sock_net(msg->sk)))
+ goto retry;
+
mutex_lock(&devlink->lock);
list_for_each_entry(param_item, &devlink->param_list, list) {
if (idx < start) {
@@ -4057,11 +4647,14 @@ static int devlink_nl_cmd_param_get_dumpit(struct sk_buff *msg,
err = 0;
} else if (err) {
mutex_unlock(&devlink->lock);
+ devlink_put(devlink);
goto out;
}
idx++;
}
mutex_unlock(&devlink->lock);
+retry:
+ devlink_put(devlink);
}
out:
mutex_unlock(&devlink_mutex);
@@ -4303,13 +4896,18 @@ static int devlink_nl_cmd_port_param_get_dumpit(struct sk_buff *msg,
struct devlink_port *devlink_port;
struct devlink *devlink;
int start = cb->args[0];
+ unsigned long index;
int idx = 0;
int err = 0;
mutex_lock(&devlink_mutex);
- list_for_each_entry(devlink, &devlink_list, list) {
- if (!net_eq(devlink_net(devlink), sock_net(msg->sk)))
+ xa_for_each_marked(&devlinks, index, devlink, DEVLINK_REGISTERED) {
+ if (!devlink_try_get(devlink))
continue;
+
+ if (!net_eq(devlink_net(devlink), sock_net(msg->sk)))
+ goto retry;
+
mutex_lock(&devlink->lock);
list_for_each_entry(devlink_port, &devlink->port_list, list) {
list_for_each_entry(param_item,
@@ -4329,12 +4927,15 @@ static int devlink_nl_cmd_port_param_get_dumpit(struct sk_buff *msg,
err = 0;
} else if (err) {
mutex_unlock(&devlink->lock);
+ devlink_put(devlink);
goto out;
}
idx++;
}
}
mutex_unlock(&devlink->lock);
+retry:
+ devlink_put(devlink);
}
out:
mutex_unlock(&devlink_mutex);
@@ -4544,7 +5145,6 @@ static void devlink_nl_region_notify(struct devlink_region *region,
struct devlink_snapshot *snapshot,
enum devlink_command cmd)
{
- struct devlink *devlink = region->devlink;
struct sk_buff *msg;
WARN_ON(cmd != DEVLINK_CMD_REGION_NEW && cmd != DEVLINK_CMD_REGION_DEL);
@@ -4553,8 +5153,9 @@ static void devlink_nl_region_notify(struct devlink_region *region,
if (IS_ERR(msg))
return;
- genlmsg_multicast_netns(&devlink_nl_family, devlink_net(devlink),
- msg, 0, DEVLINK_MCGRP_CONFIG, GFP_KERNEL);
+ genlmsg_multicast_netns(&devlink_nl_family,
+ devlink_net(region->devlink), msg, 0,
+ DEVLINK_MCGRP_CONFIG, GFP_KERNEL);
}
/**
@@ -4872,15 +5473,22 @@ static int devlink_nl_cmd_region_get_dumpit(struct sk_buff *msg,
{
struct devlink *devlink;
int start = cb->args[0];
+ unsigned long index;
int idx = 0;
- int err;
+ int err = 0;
mutex_lock(&devlink_mutex);
- list_for_each_entry(devlink, &devlink_list, list) {
- if (!net_eq(devlink_net(devlink), sock_net(msg->sk)))
+ xa_for_each_marked(&devlinks, index, devlink, DEVLINK_REGISTERED) {
+ if (!devlink_try_get(devlink))
continue;
+
+ if (!net_eq(devlink_net(devlink), sock_net(msg->sk)))
+ goto retry;
+
err = devlink_nl_cmd_region_get_devlink_dumpit(msg, cb, devlink,
&idx, start);
+retry:
+ devlink_put(devlink);
if (err)
goto out;
}
@@ -5243,6 +5851,7 @@ static int devlink_nl_cmd_region_read_dumpit(struct sk_buff *skb,
nla_nest_end(skb, chunks_attr);
genlmsg_end(skb, hdr);
mutex_unlock(&devlink->lock);
+ devlink_put(devlink);
mutex_unlock(&devlink_mutex);
return skb->len;
@@ -5251,6 +5860,7 @@ nla_put_failure:
genlmsg_cancel(skb, hdr);
out_unlock:
mutex_unlock(&devlink->lock);
+ devlink_put(devlink);
out_dev:
mutex_unlock(&devlink_mutex);
return err;
@@ -5397,22 +6007,20 @@ static int devlink_nl_cmd_info_get_dumpit(struct sk_buff *msg,
{
struct devlink *devlink;
int start = cb->args[0];
+ unsigned long index;
int idx = 0;
int err = 0;
mutex_lock(&devlink_mutex);
- list_for_each_entry(devlink, &devlink_list, list) {
- if (!net_eq(devlink_net(devlink), sock_net(msg->sk)))
- continue;
- if (idx < start) {
- idx++;
+ xa_for_each_marked(&devlinks, index, devlink, DEVLINK_REGISTERED) {
+ if (!devlink_try_get(devlink))
continue;
- }
- if (!devlink->ops->info_get) {
- idx++;
- continue;
- }
+ if (!net_eq(devlink_net(devlink), sock_net(msg->sk)))
+ goto retry;
+
+ if (idx < start || !devlink->ops->info_get)
+ goto inc;
mutex_lock(&devlink->lock);
err = devlink_nl_info_fill(msg, devlink, DEVLINK_CMD_INFO_GET,
@@ -5422,9 +6030,14 @@ static int devlink_nl_cmd_info_get_dumpit(struct sk_buff *msg,
mutex_unlock(&devlink->lock);
if (err == -EOPNOTSUPP)
err = 0;
- else if (err)
+ else if (err) {
+ devlink_put(devlink);
break;
+ }
+inc:
idx++;
+retry:
+ devlink_put(devlink);
}
mutex_unlock(&devlink_mutex);
@@ -6238,11 +6851,11 @@ EXPORT_SYMBOL_GPL(devlink_port_health_reporter_destroy);
static int
devlink_nl_health_reporter_fill(struct sk_buff *msg,
- struct devlink *devlink,
struct devlink_health_reporter *reporter,
enum devlink_command cmd, u32 portid,
u32 seq, int flags)
{
+ struct devlink *devlink = reporter->devlink;
struct nlattr *reporter_attr;
void *hdr;
@@ -6319,8 +6932,7 @@ static void devlink_recover_notify(struct devlink_health_reporter *reporter,
if (!msg)
return;
- err = devlink_nl_health_reporter_fill(msg, reporter->devlink,
- reporter, cmd, 0, 0, 0);
+ err = devlink_nl_health_reporter_fill(msg, reporter, cmd, 0, 0, 0);
if (err) {
nlmsg_free(msg);
return;
@@ -6510,6 +7122,7 @@ devlink_health_reporter_get_from_cb(struct netlink_callback *cb)
goto unlock;
reporter = devlink_health_reporter_get_from_attrs(devlink, attrs);
+ devlink_put(devlink);
mutex_unlock(&devlink_mutex);
return reporter;
unlock:
@@ -6553,7 +7166,7 @@ static int devlink_nl_cmd_health_reporter_get_doit(struct sk_buff *skb,
goto out;
}
- err = devlink_nl_health_reporter_fill(msg, devlink, reporter,
+ err = devlink_nl_health_reporter_fill(msg, reporter,
DEVLINK_CMD_HEALTH_REPORTER_GET,
info->snd_portid, info->snd_seq,
0);
@@ -6576,13 +7189,18 @@ devlink_nl_cmd_health_reporter_get_dumpit(struct sk_buff *msg,
struct devlink_port *port;
struct devlink *devlink;
int start = cb->args[0];
+ unsigned long index;
int idx = 0;
int err;
mutex_lock(&devlink_mutex);
- list_for_each_entry(devlink, &devlink_list, list) {
- if (!net_eq(devlink_net(devlink), sock_net(msg->sk)))
+ xa_for_each_marked(&devlinks, index, devlink, DEVLINK_REGISTERED) {
+ if (!devlink_try_get(devlink))
continue;
+
+ if (!net_eq(devlink_net(devlink), sock_net(msg->sk)))
+ goto retry_rep;
+
mutex_lock(&devlink->reporters_lock);
list_for_each_entry(reporter, &devlink->reporter_list,
list) {
@@ -6590,24 +7208,29 @@ devlink_nl_cmd_health_reporter_get_dumpit(struct sk_buff *msg,
idx++;
continue;
}
- err = devlink_nl_health_reporter_fill(msg, devlink,
- reporter,
- DEVLINK_CMD_HEALTH_REPORTER_GET,
- NETLINK_CB(cb->skb).portid,
- cb->nlh->nlmsg_seq,
- NLM_F_MULTI);
+ err = devlink_nl_health_reporter_fill(
+ msg, reporter, DEVLINK_CMD_HEALTH_REPORTER_GET,
+ NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq,
+ NLM_F_MULTI);
if (err) {
mutex_unlock(&devlink->reporters_lock);
+ devlink_put(devlink);
goto out;
}
idx++;
}
mutex_unlock(&devlink->reporters_lock);
+retry_rep:
+ devlink_put(devlink);
}
- list_for_each_entry(devlink, &devlink_list, list) {
- if (!net_eq(devlink_net(devlink), sock_net(msg->sk)))
+ xa_for_each_marked(&devlinks, index, devlink, DEVLINK_REGISTERED) {
+ if (!devlink_try_get(devlink))
continue;
+
+ if (!net_eq(devlink_net(devlink), sock_net(msg->sk)))
+ goto retry_port;
+
mutex_lock(&devlink->lock);
list_for_each_entry(port, &devlink->port_list, list) {
mutex_lock(&port->reporters_lock);
@@ -6616,14 +7239,15 @@ devlink_nl_cmd_health_reporter_get_dumpit(struct sk_buff *msg,
idx++;
continue;
}
- err = devlink_nl_health_reporter_fill(msg, devlink, reporter,
- DEVLINK_CMD_HEALTH_REPORTER_GET,
- NETLINK_CB(cb->skb).portid,
- cb->nlh->nlmsg_seq,
- NLM_F_MULTI);
+ err = devlink_nl_health_reporter_fill(
+ msg, reporter,
+ DEVLINK_CMD_HEALTH_REPORTER_GET,
+ NETLINK_CB(cb->skb).portid,
+ cb->nlh->nlmsg_seq, NLM_F_MULTI);
if (err) {
mutex_unlock(&port->reporters_lock);
mutex_unlock(&devlink->lock);
+ devlink_put(devlink);
goto out;
}
idx++;
@@ -6631,6 +7255,8 @@ devlink_nl_cmd_health_reporter_get_dumpit(struct sk_buff *msg,
mutex_unlock(&port->reporters_lock);
}
mutex_unlock(&devlink->lock);
+retry_port:
+ devlink_put(devlink);
}
out:
mutex_unlock(&devlink_mutex);
@@ -6994,8 +7620,9 @@ static void devlink_trap_stats_read(struct devlink_stats __percpu *trap_stats,
}
}
-static int devlink_trap_stats_put(struct sk_buff *msg,
- struct devlink_stats __percpu *trap_stats)
+static int
+devlink_trap_group_stats_put(struct sk_buff *msg,
+ struct devlink_stats __percpu *trap_stats)
{
struct devlink_stats stats;
struct nlattr *attr;
@@ -7023,6 +7650,50 @@ nla_put_failure:
return -EMSGSIZE;
}
+static int devlink_trap_stats_put(struct sk_buff *msg, struct devlink *devlink,
+ const struct devlink_trap_item *trap_item)
+{
+ struct devlink_stats stats;
+ struct nlattr *attr;
+ u64 drops = 0;
+ int err;
+
+ if (devlink->ops->trap_drop_counter_get) {
+ err = devlink->ops->trap_drop_counter_get(devlink,
+ trap_item->trap,
+ &drops);
+ if (err)
+ return err;
+ }
+
+ devlink_trap_stats_read(trap_item->stats, &stats);
+
+ attr = nla_nest_start(msg, DEVLINK_ATTR_STATS);
+ if (!attr)
+ return -EMSGSIZE;
+
+ if (devlink->ops->trap_drop_counter_get &&
+ nla_put_u64_64bit(msg, DEVLINK_ATTR_STATS_RX_DROPPED, drops,
+ DEVLINK_ATTR_PAD))
+ goto nla_put_failure;
+
+ if (nla_put_u64_64bit(msg, DEVLINK_ATTR_STATS_RX_PACKETS,
+ stats.rx_packets, DEVLINK_ATTR_PAD))
+ goto nla_put_failure;
+
+ if (nla_put_u64_64bit(msg, DEVLINK_ATTR_STATS_RX_BYTES,
+ stats.rx_bytes, DEVLINK_ATTR_PAD))
+ goto nla_put_failure;
+
+ nla_nest_end(msg, attr);
+
+ return 0;
+
+nla_put_failure:
+ nla_nest_cancel(msg, attr);
+ return -EMSGSIZE;
+}
+
static int devlink_nl_trap_fill(struct sk_buff *msg, struct devlink *devlink,
const struct devlink_trap_item *trap_item,
enum devlink_command cmd, u32 portid, u32 seq,
@@ -7060,7 +7731,7 @@ static int devlink_nl_trap_fill(struct sk_buff *msg, struct devlink *devlink,
if (err)
goto nla_put_failure;
- err = devlink_trap_stats_put(msg, trap_item->stats);
+ err = devlink_trap_stats_put(msg, devlink, trap_item);
if (err)
goto nla_put_failure;
@@ -7114,13 +7785,18 @@ static int devlink_nl_cmd_trap_get_dumpit(struct sk_buff *msg,
struct devlink_trap_item *trap_item;
struct devlink *devlink;
int start = cb->args[0];
+ unsigned long index;
int idx = 0;
int err;
mutex_lock(&devlink_mutex);
- list_for_each_entry(devlink, &devlink_list, list) {
- if (!net_eq(devlink_net(devlink), sock_net(msg->sk)))
+ xa_for_each_marked(&devlinks, index, devlink, DEVLINK_REGISTERED) {
+ if (!devlink_try_get(devlink))
continue;
+
+ if (!net_eq(devlink_net(devlink), sock_net(msg->sk)))
+ goto retry;
+
mutex_lock(&devlink->lock);
list_for_each_entry(trap_item, &devlink->trap_list, list) {
if (idx < start) {
@@ -7134,11 +7810,14 @@ static int devlink_nl_cmd_trap_get_dumpit(struct sk_buff *msg,
NLM_F_MULTI);
if (err) {
mutex_unlock(&devlink->lock);
+ devlink_put(devlink);
goto out;
}
idx++;
}
mutex_unlock(&devlink->lock);
+retry:
+ devlink_put(devlink);
}
out:
mutex_unlock(&devlink_mutex);
@@ -7277,7 +7956,7 @@ devlink_nl_trap_group_fill(struct sk_buff *msg, struct devlink *devlink,
group_item->policer_item->policer->id))
goto nla_put_failure;
- err = devlink_trap_stats_put(msg, group_item->stats);
+ err = devlink_trap_group_stats_put(msg, group_item->stats);
if (err)
goto nla_put_failure;
@@ -7333,13 +8012,18 @@ static int devlink_nl_cmd_trap_group_get_dumpit(struct sk_buff *msg,
u32 portid = NETLINK_CB(cb->skb).portid;
struct devlink *devlink;
int start = cb->args[0];
+ unsigned long index;
int idx = 0;
int err;
mutex_lock(&devlink_mutex);
- list_for_each_entry(devlink, &devlink_list, list) {
- if (!net_eq(devlink_net(devlink), sock_net(msg->sk)))
+ xa_for_each_marked(&devlinks, index, devlink, DEVLINK_REGISTERED) {
+ if (!devlink_try_get(devlink))
continue;
+
+ if (!net_eq(devlink_net(devlink), sock_net(msg->sk)))
+ goto retry;
+
mutex_lock(&devlink->lock);
list_for_each_entry(group_item, &devlink->trap_group_list,
list) {
@@ -7354,11 +8038,14 @@ static int devlink_nl_cmd_trap_group_get_dumpit(struct sk_buff *msg,
NLM_F_MULTI);
if (err) {
mutex_unlock(&devlink->lock);
+ devlink_put(devlink);
goto out;
}
idx++;
}
mutex_unlock(&devlink->lock);
+retry:
+ devlink_put(devlink);
}
out:
mutex_unlock(&devlink_mutex);
@@ -7639,13 +8326,18 @@ static int devlink_nl_cmd_trap_policer_get_dumpit(struct sk_buff *msg,
u32 portid = NETLINK_CB(cb->skb).portid;
struct devlink *devlink;
int start = cb->args[0];
+ unsigned long index;
int idx = 0;
int err;
mutex_lock(&devlink_mutex);
- list_for_each_entry(devlink, &devlink_list, list) {
- if (!net_eq(devlink_net(devlink), sock_net(msg->sk)))
+ xa_for_each_marked(&devlinks, index, devlink, DEVLINK_REGISTERED) {
+ if (!devlink_try_get(devlink))
continue;
+
+ if (!net_eq(devlink_net(devlink), sock_net(msg->sk)))
+ goto retry;
+
mutex_lock(&devlink->lock);
list_for_each_entry(policer_item, &devlink->trap_policer_list,
list) {
@@ -7660,11 +8352,14 @@ static int devlink_nl_cmd_trap_policer_get_dumpit(struct sk_buff *msg,
NLM_F_MULTI);
if (err) {
mutex_unlock(&devlink->lock);
+ devlink_put(devlink);
goto out;
}
idx++;
}
mutex_unlock(&devlink->lock);
+retry:
+ devlink_put(devlink);
}
out:
mutex_unlock(&devlink_mutex);
@@ -7801,6 +8496,11 @@ static const struct nla_policy devlink_nl_policy[DEVLINK_ATTR_MAX + 1] = {
[DEVLINK_ATTR_PORT_PCI_PF_NUMBER] = { .type = NLA_U16 },
[DEVLINK_ATTR_PORT_PCI_SF_NUMBER] = { .type = NLA_U32 },
[DEVLINK_ATTR_PORT_CONTROLLER_NUMBER] = { .type = NLA_U32 },
+ [DEVLINK_ATTR_RATE_TYPE] = { .type = NLA_U16 },
+ [DEVLINK_ATTR_RATE_TX_SHARE] = { .type = NLA_U64 },
+ [DEVLINK_ATTR_RATE_TX_MAX] = { .type = NLA_U64 },
+ [DEVLINK_ATTR_RATE_NODE_NAME] = { .type = NLA_NUL_STRING },
+ [DEVLINK_ATTR_RATE_PARENT_NODE_NAME] = { .type = NLA_NUL_STRING },
};
static const struct genl_small_ops devlink_nl_ops[] = {
@@ -7827,6 +8527,30 @@ static const struct genl_small_ops devlink_nl_ops[] = {
.internal_flags = DEVLINK_NL_FLAG_NEED_PORT,
},
{
+ .cmd = DEVLINK_CMD_RATE_GET,
+ .doit = devlink_nl_cmd_rate_get_doit,
+ .dumpit = devlink_nl_cmd_rate_get_dumpit,
+ .internal_flags = DEVLINK_NL_FLAG_NEED_RATE,
+ /* can be retrieved by unprivileged users */
+ },
+ {
+ .cmd = DEVLINK_CMD_RATE_SET,
+ .doit = devlink_nl_cmd_rate_set_doit,
+ .flags = GENL_ADMIN_PERM,
+ .internal_flags = DEVLINK_NL_FLAG_NEED_RATE,
+ },
+ {
+ .cmd = DEVLINK_CMD_RATE_NEW,
+ .doit = devlink_nl_cmd_rate_new_doit,
+ .flags = GENL_ADMIN_PERM,
+ },
+ {
+ .cmd = DEVLINK_CMD_RATE_DEL,
+ .doit = devlink_nl_cmd_rate_del_doit,
+ .flags = GENL_ADMIN_PERM,
+ .internal_flags = DEVLINK_NL_FLAG_NEED_RATE_NODE,
+ },
+ {
.cmd = DEVLINK_CMD_PORT_SPLIT,
.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
.doit = devlink_nl_cmd_port_split_doit,
@@ -8176,31 +8900,46 @@ static bool devlink_reload_actions_valid(const struct devlink_ops *ops)
}
/**
- * devlink_alloc - Allocate new devlink instance resources
+ * devlink_alloc_ns - Allocate new devlink instance resources
+ * in specific namespace
*
* @ops: ops
* @priv_size: size of user private data
+ * @net: net namespace
+ * @dev: parent device
*
* Allocate new devlink instance resources, including devlink index
* and name.
*/
-struct devlink *devlink_alloc(const struct devlink_ops *ops, size_t priv_size)
+struct devlink *devlink_alloc_ns(const struct devlink_ops *ops,
+ size_t priv_size, struct net *net,
+ struct device *dev)
{
struct devlink *devlink;
+ static u32 last_id;
+ int ret;
- if (WARN_ON(!ops))
- return NULL;
-
+ WARN_ON(!ops || !dev);
if (!devlink_reload_actions_valid(ops))
return NULL;
devlink = kzalloc(sizeof(*devlink) + priv_size, GFP_KERNEL);
if (!devlink)
return NULL;
+
+ ret = xa_alloc_cyclic(&devlinks, &devlink->index, devlink, xa_limit_31b,
+ &last_id, GFP_KERNEL);
+ if (ret < 0) {
+ kfree(devlink);
+ return NULL;
+ }
+
+ devlink->dev = dev;
devlink->ops = ops;
xa_init_flags(&devlink->snapshot_ids, XA_FLAGS_ALLOC);
- __devlink_net_set(devlink, &init_net);
+ write_pnet(&devlink->_net, net);
INIT_LIST_HEAD(&devlink->port_list);
+ INIT_LIST_HEAD(&devlink->rate_list);
INIT_LIST_HEAD(&devlink->sb_list);
INIT_LIST_HEAD_RCU(&devlink->dpipe_table_list);
INIT_LIST_HEAD(&devlink->resource_list);
@@ -8212,22 +8951,22 @@ struct devlink *devlink_alloc(const struct devlink_ops *ops, size_t priv_size)
INIT_LIST_HEAD(&devlink->trap_policer_list);
mutex_init(&devlink->lock);
mutex_init(&devlink->reporters_lock);
+ refcount_set(&devlink->refcount, 1);
+ init_completion(&devlink->comp);
+
return devlink;
}
-EXPORT_SYMBOL_GPL(devlink_alloc);
+EXPORT_SYMBOL_GPL(devlink_alloc_ns);
/**
* devlink_register - Register devlink instance
*
* @devlink: devlink
- * @dev: parent device
*/
-int devlink_register(struct devlink *devlink, struct device *dev)
+int devlink_register(struct devlink *devlink)
{
- devlink->dev = dev;
- devlink->registered = true;
mutex_lock(&devlink_mutex);
- list_add_tail(&devlink->list, &devlink_list);
+ xa_set_mark(&devlinks, devlink->index, DEVLINK_REGISTERED);
devlink_notify(devlink, DEVLINK_CMD_NEW);
mutex_unlock(&devlink_mutex);
return 0;
@@ -8241,11 +8980,14 @@ EXPORT_SYMBOL_GPL(devlink_register);
*/
void devlink_unregister(struct devlink *devlink)
{
+ devlink_put(devlink);
+ wait_for_completion(&devlink->comp);
+
mutex_lock(&devlink_mutex);
WARN_ON(devlink_reload_supported(devlink->ops) &&
devlink->reload_enabled);
devlink_notify(devlink, DEVLINK_CMD_DEL);
- list_del(&devlink->list);
+ xa_clear_mark(&devlinks, devlink->index, DEVLINK_REGISTERED);
mutex_unlock(&devlink_mutex);
}
EXPORT_SYMBOL_GPL(devlink_unregister);
@@ -8303,9 +9045,11 @@ void devlink_free(struct devlink *devlink)
WARN_ON(!list_empty(&devlink->resource_list));
WARN_ON(!list_empty(&devlink->dpipe_table_list));
WARN_ON(!list_empty(&devlink->sb_list));
+ WARN_ON(!list_empty(&devlink->rate_list));
WARN_ON(!list_empty(&devlink->port_list));
xa_destroy(&devlink->snapshot_ids);
+ xa_erase(&devlinks, devlink->index);
kfree(devlink);
}
@@ -8366,9 +9110,10 @@ int devlink_port_register(struct devlink *devlink,
mutex_unlock(&devlink->lock);
return -EEXIST;
}
+
+ WARN_ON(devlink_port->devlink);
devlink_port->devlink = devlink;
devlink_port->index = port_index;
- devlink_port->registered = true;
spin_lock_init(&devlink_port->type_lock);
INIT_LIST_HEAD(&devlink_port->reporter_list);
mutex_init(&devlink_port->reporters_lock);
@@ -8407,7 +9152,7 @@ static void __devlink_port_type_set(struct devlink_port *devlink_port,
enum devlink_port_type type,
void *type_dev)
{
- if (WARN_ON(!devlink_port->registered))
+ if (WARN_ON(!devlink_port->devlink))
return;
devlink_port_type_warn_cancel(devlink_port);
spin_lock_bh(&devlink_port->type_lock);
@@ -8527,7 +9272,7 @@ void devlink_port_attrs_set(struct devlink_port *devlink_port,
{
int ret;
- if (WARN_ON(devlink_port->registered))
+ if (WARN_ON(devlink_port->devlink))
return;
devlink_port->attrs = *attrs;
ret = __devlink_port_attrs_set(devlink_port, attrs->flavour);
@@ -8551,7 +9296,7 @@ void devlink_port_attrs_pci_pf_set(struct devlink_port *devlink_port, u32 contro
struct devlink_port_attrs *attrs = &devlink_port->attrs;
int ret;
- if (WARN_ON(devlink_port->registered))
+ if (WARN_ON(devlink_port->devlink))
return;
ret = __devlink_port_attrs_set(devlink_port,
DEVLINK_PORT_FLAVOUR_PCI_PF);
@@ -8578,7 +9323,7 @@ void devlink_port_attrs_pci_vf_set(struct devlink_port *devlink_port, u32 contro
struct devlink_port_attrs *attrs = &devlink_port->attrs;
int ret;
- if (WARN_ON(devlink_port->registered))
+ if (WARN_ON(devlink_port->devlink))
return;
ret = __devlink_port_attrs_set(devlink_port,
DEVLINK_PORT_FLAVOUR_PCI_VF);
@@ -8606,7 +9351,7 @@ void devlink_port_attrs_pci_sf_set(struct devlink_port *devlink_port, u32 contro
struct devlink_port_attrs *attrs = &devlink_port->attrs;
int ret;
- if (WARN_ON(devlink_port->registered))
+ if (WARN_ON(devlink_port->devlink))
return;
ret = __devlink_port_attrs_set(devlink_port,
DEVLINK_PORT_FLAVOUR_PCI_SF);
@@ -8619,6 +9364,110 @@ void devlink_port_attrs_pci_sf_set(struct devlink_port *devlink_port, u32 contro
}
EXPORT_SYMBOL_GPL(devlink_port_attrs_pci_sf_set);
+/**
+ * devlink_rate_leaf_create - create devlink rate leaf
+ *
+ * @devlink_port: devlink port object to create rate object on
+ * @priv: driver private data
+ *
+ * Create devlink rate object of type leaf on provided @devlink_port.
+ * Throws call trace if @devlink_port already has a devlink rate object.
+ *
+ * Context: Takes and release devlink->lock <mutex>.
+ *
+ * Return: -ENOMEM if failed to allocate rate object, 0 otherwise.
+ */
+int
+devlink_rate_leaf_create(struct devlink_port *devlink_port, void *priv)
+{
+ struct devlink *devlink = devlink_port->devlink;
+ struct devlink_rate *devlink_rate;
+
+ devlink_rate = kzalloc(sizeof(*devlink_rate), GFP_KERNEL);
+ if (!devlink_rate)
+ return -ENOMEM;
+
+ mutex_lock(&devlink->lock);
+ WARN_ON(devlink_port->devlink_rate);
+ devlink_rate->type = DEVLINK_RATE_TYPE_LEAF;
+ devlink_rate->devlink = devlink;
+ devlink_rate->devlink_port = devlink_port;
+ devlink_rate->priv = priv;
+ list_add_tail(&devlink_rate->list, &devlink->rate_list);
+ devlink_port->devlink_rate = devlink_rate;
+ devlink_rate_notify(devlink_rate, DEVLINK_CMD_RATE_NEW);
+ mutex_unlock(&devlink->lock);
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(devlink_rate_leaf_create);
+
+/**
+ * devlink_rate_leaf_destroy - destroy devlink rate leaf
+ *
+ * @devlink_port: devlink port linked to the rate object
+ *
+ * Context: Takes and release devlink->lock <mutex>.
+ */
+void devlink_rate_leaf_destroy(struct devlink_port *devlink_port)
+{
+ struct devlink_rate *devlink_rate = devlink_port->devlink_rate;
+ struct devlink *devlink = devlink_port->devlink;
+
+ if (!devlink_rate)
+ return;
+
+ mutex_lock(&devlink->lock);
+ devlink_rate_notify(devlink_rate, DEVLINK_CMD_RATE_DEL);
+ if (devlink_rate->parent)
+ refcount_dec(&devlink_rate->parent->refcnt);
+ list_del(&devlink_rate->list);
+ devlink_port->devlink_rate = NULL;
+ mutex_unlock(&devlink->lock);
+ kfree(devlink_rate);
+}
+EXPORT_SYMBOL_GPL(devlink_rate_leaf_destroy);
+
+/**
+ * devlink_rate_nodes_destroy - destroy all devlink rate nodes on device
+ *
+ * @devlink: devlink instance
+ *
+ * Unset parent for all rate objects and destroy all rate nodes
+ * on specified device.
+ *
+ * Context: Takes and release devlink->lock <mutex>.
+ */
+void devlink_rate_nodes_destroy(struct devlink *devlink)
+{
+ static struct devlink_rate *devlink_rate, *tmp;
+ const struct devlink_ops *ops = devlink->ops;
+
+ mutex_lock(&devlink->lock);
+ list_for_each_entry(devlink_rate, &devlink->rate_list, list) {
+ if (!devlink_rate->parent)
+ continue;
+
+ refcount_dec(&devlink_rate->parent->refcnt);
+ if (devlink_rate_is_leaf(devlink_rate))
+ ops->rate_leaf_parent_set(devlink_rate, NULL, devlink_rate->priv,
+ NULL, NULL);
+ else if (devlink_rate_is_node(devlink_rate))
+ ops->rate_node_parent_set(devlink_rate, NULL, devlink_rate->priv,
+ NULL, NULL);
+ }
+ list_for_each_entry_safe(devlink_rate, tmp, &devlink->rate_list, list) {
+ if (devlink_rate_is_node(devlink_rate)) {
+ ops->rate_node_del(devlink_rate, devlink_rate->priv, NULL);
+ list_del(&devlink_rate->list);
+ kfree(devlink_rate->name);
+ kfree(devlink_rate);
+ }
+ }
+ mutex_unlock(&devlink->lock);
+}
+EXPORT_SYMBOL_GPL(devlink_rate_nodes_destroy);
+
static int __devlink_port_phys_port_name_get(struct devlink_port *devlink_port,
char *name, size_t len)
{
@@ -8630,12 +9479,10 @@ static int __devlink_port_phys_port_name_get(struct devlink_port *devlink_port,
switch (attrs->flavour) {
case DEVLINK_PORT_FLAVOUR_PHYSICAL:
- if (!attrs->split)
- n = snprintf(name, len, "p%u", attrs->phys.port_number);
- else
- n = snprintf(name, len, "p%us%u",
- attrs->phys.port_number,
- attrs->phys.split_subport_number);
+ n = snprintf(name, len, "p%u", attrs->phys.port_number);
+ if (n < len && attrs->split)
+ n += snprintf(name + n, len - n, "s%u",
+ attrs->phys.split_subport_number);
break;
case DEVLINK_PORT_FLAVOUR_CPU:
case DEVLINK_PORT_FLAVOUR_DSA:
@@ -9092,6 +9939,22 @@ static int devlink_param_verify(const struct devlink_param *param)
return devlink_param_driver_verify(param);
}
+static int __devlink_param_register_one(struct devlink *devlink,
+ unsigned int port_index,
+ struct list_head *param_list,
+ const struct devlink_param *param,
+ enum devlink_command reg_cmd)
+{
+ int err;
+
+ err = devlink_param_verify(param);
+ if (err)
+ return err;
+
+ return devlink_param_register_one(devlink, port_index,
+ param_list, param, reg_cmd);
+}
+
static int __devlink_params_register(struct devlink *devlink,
unsigned int port_index,
struct list_head *param_list,
@@ -9106,12 +9969,8 @@ static int __devlink_params_register(struct devlink *devlink,
mutex_lock(&devlink->lock);
for (i = 0; i < params_count; i++, param++) {
- err = devlink_param_verify(param);
- if (err)
- goto rollback;
-
- err = devlink_param_register_one(devlink, port_index,
- param_list, param, reg_cmd);
+ err = __devlink_param_register_one(devlink, port_index,
+ param_list, param, reg_cmd);
if (err)
goto rollback;
}
@@ -9184,6 +10043,43 @@ void devlink_params_unregister(struct devlink *devlink,
EXPORT_SYMBOL_GPL(devlink_params_unregister);
/**
+ * devlink_param_register - register one configuration parameter
+ *
+ * @devlink: devlink
+ * @param: one configuration parameter
+ *
+ * Register the configuration parameter supported by the driver.
+ * Return: returns 0 on successful registration or error code otherwise.
+ */
+int devlink_param_register(struct devlink *devlink,
+ const struct devlink_param *param)
+{
+ int err;
+
+ mutex_lock(&devlink->lock);
+ err = __devlink_param_register_one(devlink, 0, &devlink->param_list,
+ param, DEVLINK_CMD_PARAM_NEW);
+ mutex_unlock(&devlink->lock);
+ return err;
+}
+EXPORT_SYMBOL_GPL(devlink_param_register);
+
+/**
+ * devlink_param_unregister - unregister one configuration parameter
+ * @devlink: devlink
+ * @param: configuration parameter to unregister
+ */
+void devlink_param_unregister(struct devlink *devlink,
+ const struct devlink_param *param)
+{
+ mutex_lock(&devlink->lock);
+ devlink_param_unregister_one(devlink, 0, &devlink->param_list, param,
+ DEVLINK_CMD_PARAM_DEL);
+ mutex_unlock(&devlink->lock);
+}
+EXPORT_SYMBOL_GPL(devlink_param_unregister);
+
+/**
* devlink_params_publish - publish configuration parameters
*
* @devlink: devlink
@@ -9226,6 +10122,54 @@ void devlink_params_unpublish(struct devlink *devlink)
EXPORT_SYMBOL_GPL(devlink_params_unpublish);
/**
+ * devlink_param_publish - publish one configuration parameter
+ *
+ * @devlink: devlink
+ * @param: one configuration parameter
+ *
+ * Publish previously registered configuration parameter.
+ */
+void devlink_param_publish(struct devlink *devlink,
+ const struct devlink_param *param)
+{
+ struct devlink_param_item *param_item;
+
+ list_for_each_entry(param_item, &devlink->param_list, list) {
+ if (param_item->param != param || param_item->published)
+ continue;
+ param_item->published = true;
+ devlink_param_notify(devlink, 0, param_item,
+ DEVLINK_CMD_PARAM_NEW);
+ break;
+ }
+}
+EXPORT_SYMBOL_GPL(devlink_param_publish);
+
+/**
+ * devlink_param_unpublish - unpublish one configuration parameter
+ *
+ * @devlink: devlink
+ * @param: one configuration parameter
+ *
+ * Unpublish previously registered configuration parameter.
+ */
+void devlink_param_unpublish(struct devlink *devlink,
+ const struct devlink_param *param)
+{
+ struct devlink_param_item *param_item;
+
+ list_for_each_entry(param_item, &devlink->param_list, list) {
+ if (param_item->param != param || !param_item->published)
+ continue;
+ param_item->published = false;
+ devlink_param_notify(devlink, 0, param_item,
+ DEVLINK_CMD_PARAM_DEL);
+ break;
+ }
+}
+EXPORT_SYMBOL_GPL(devlink_param_unpublish);
+
+/**
* devlink_port_params_register - register port configuration parameters
*
* @devlink_port: devlink port
@@ -10580,23 +11524,29 @@ static void __net_exit devlink_pernet_pre_exit(struct net *net)
{
struct devlink *devlink;
u32 actions_performed;
+ unsigned long index;
int err;
/* In case network namespace is getting destroyed, reload
* all devlink instances from this namespace into init_net.
*/
mutex_lock(&devlink_mutex);
- list_for_each_entry(devlink, &devlink_list, list) {
- if (net_eq(devlink_net(devlink), net)) {
- if (WARN_ON(!devlink_reload_supported(devlink->ops)))
- continue;
- err = devlink_reload(devlink, &init_net,
- DEVLINK_RELOAD_ACTION_DRIVER_REINIT,
- DEVLINK_RELOAD_LIMIT_UNSPEC,
- &actions_performed, NULL);
- if (err && err != -EOPNOTSUPP)
- pr_warn("Failed to reload devlink instance into init_net\n");
- }
+ xa_for_each_marked(&devlinks, index, devlink, DEVLINK_REGISTERED) {
+ if (!devlink_try_get(devlink))
+ continue;
+
+ if (!net_eq(devlink_net(devlink), net))
+ goto retry;
+
+ WARN_ON(!devlink_reload_supported(devlink->ops));
+ err = devlink_reload(devlink, &init_net,
+ DEVLINK_RELOAD_ACTION_DRIVER_REINIT,
+ DEVLINK_RELOAD_LIMIT_UNSPEC,
+ &actions_performed, NULL);
+ if (err && err != -EOPNOTSUPP)
+ pr_warn("Failed to reload devlink instance into init_net\n");
+retry:
+ devlink_put(devlink);
}
mutex_unlock(&devlink_mutex);
}
diff --git a/net/core/drop_monitor.c b/net/core/drop_monitor.c
index ead2a8aa57b4..49442cae6f69 100644
--- a/net/core/drop_monitor.c
+++ b/net/core/drop_monitor.c
@@ -850,8 +850,7 @@ net_dm_hw_metadata_copy(const struct devlink_trap_metadata *metadata)
}
hw_metadata->input_dev = metadata->input_dev;
- if (hw_metadata->input_dev)
- dev_hold(hw_metadata->input_dev);
+ dev_hold(hw_metadata->input_dev);
return hw_metadata;
@@ -867,8 +866,7 @@ free_hw_metadata:
static void
net_dm_hw_metadata_free(const struct devlink_trap_metadata *hw_metadata)
{
- if (hw_metadata->input_dev)
- dev_put(hw_metadata->input_dev);
+ dev_put(hw_metadata->input_dev);
kfree(hw_metadata->fa_cookie);
kfree(hw_metadata->trap_name);
kfree(hw_metadata->trap_group_name);
diff --git a/net/core/dst.c b/net/core/dst.c
index fb3bcba87744..497ef9b3fc6a 100644
--- a/net/core/dst.c
+++ b/net/core/dst.c
@@ -49,8 +49,7 @@ void dst_init(struct dst_entry *dst, struct dst_ops *ops,
unsigned short flags)
{
dst->dev = dev;
- if (dev)
- dev_hold(dev);
+ dev_hold(dev);
dst->ops = ops;
dst_init_metrics(dst, dst_default_metrics.metrics, true);
dst->expires = 0UL;
@@ -118,8 +117,7 @@ struct dst_entry *dst_destroy(struct dst_entry * dst)
if (dst->ops->destroy)
dst->ops->destroy(dst);
- if (dst->dev)
- dev_put(dst->dev);
+ dev_put(dst->dev);
lwtstate_put(dst->lwtstate);
diff --git a/net/core/fib_rules.c b/net/core/fib_rules.c
index a9f937975080..79df7cd9dbc1 100644
--- a/net/core/fib_rules.c
+++ b/net/core/fib_rules.c
@@ -57,7 +57,7 @@ int fib_default_rule_add(struct fib_rules_ops *ops,
{
struct fib_rule *r;
- r = kzalloc(ops->rule_size, GFP_KERNEL);
+ r = kzalloc(ops->rule_size, GFP_KERNEL_ACCOUNT);
if (r == NULL)
return -ENOMEM;
@@ -541,7 +541,7 @@ static int fib_nl2rule(struct sk_buff *skb, struct nlmsghdr *nlh,
goto errout;
}
- nlrule = kzalloc(ops->rule_size, GFP_KERNEL);
+ nlrule = kzalloc(ops->rule_size, GFP_KERNEL_ACCOUNT);
if (!nlrule) {
err = -ENOMEM;
goto errout;
diff --git a/net/core/filter.c b/net/core/filter.c
index 65ab4e21c087..2e32cee2c469 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -17,6 +17,7 @@
* Kris Katterjohn - Added many additional checks in bpf_check_classic()
*/
+#include <linux/atomic.h>
#include <linux/module.h>
#include <linux/types.h>
#include <linux/mm.h>
@@ -41,7 +42,6 @@
#include <linux/timer.h>
#include <linux/uaccess.h>
#include <asm/unaligned.h>
-#include <asm/cmpxchg.h>
#include <linux/filter.h>
#include <linux/ratelimit.h>
#include <linux/seccomp.h>
@@ -77,6 +77,7 @@
#include <net/transp_v6.h>
#include <linux/btf_ids.h>
#include <net/tls.h>
+#include <net/xdp.h>
static const struct bpf_func_proto *
bpf_sk_base_func_proto(enum bpf_func_id func_id);
@@ -113,7 +114,7 @@ EXPORT_SYMBOL_GPL(copy_bpf_fprog_from_user);
* Run the eBPF program and then cut skb->data to correct size returned by
* the program. If pkt_len is 0 we toss packet. If skb->len is smaller
* than pkt_len we keep whole skb->data. This is the socket level
- * wrapper to BPF_PROG_RUN. It returns 0 if the packet should
+ * wrapper to bpf_prog_run. It returns 0 if the packet should
* be accepted or -EPERM if the packet should be tossed.
*
*/
@@ -2179,17 +2180,9 @@ static int bpf_out_neigh_v6(struct net *net, struct sk_buff *skb,
skb->tstamp = 0;
if (unlikely(skb_headroom(skb) < hh_len && dev->header_ops)) {
- struct sk_buff *skb2;
-
- skb2 = skb_realloc_headroom(skb, hh_len);
- if (unlikely(!skb2)) {
- kfree_skb(skb);
+ skb = skb_expand_head(skb, hh_len);
+ if (!skb)
return -ENOMEM;
- }
- if (skb->sk)
- skb_set_owner_w(skb2, skb->sk);
- consume_skb(skb);
- skb = skb2;
}
rcu_read_lock_bh();
@@ -2213,8 +2206,7 @@ static int bpf_out_neigh_v6(struct net *net, struct sk_buff *skb,
}
rcu_read_unlock_bh();
if (dst)
- IP6_INC_STATS(dev_net(dst->dev),
- ip6_dst_idev(dst), IPSTATS_MIB_OUTNOROUTES);
+ IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTNOROUTES);
out_drop:
kfree_skb(skb);
return -ENETDOWN;
@@ -2286,17 +2278,9 @@ static int bpf_out_neigh_v4(struct net *net, struct sk_buff *skb,
skb->tstamp = 0;
if (unlikely(skb_headroom(skb) < hh_len && dev->header_ops)) {
- struct sk_buff *skb2;
-
- skb2 = skb_realloc_headroom(skb, hh_len);
- if (unlikely(!skb2)) {
- kfree_skb(skb);
+ skb = skb_expand_head(skb, hh_len);
+ if (!skb)
return -ENOMEM;
- }
- if (skb->sk)
- skb_set_owner_w(skb2, skb->sk);
- consume_skb(skb);
- skb = skb2;
}
rcu_read_lock_bh();
@@ -3241,9 +3225,6 @@ static int bpf_skb_proto_4_to_6(struct sk_buff *skb)
u32 off = skb_mac_header_len(skb);
int ret;
- if (skb_is_gso(skb) && !skb_is_gso_tcp(skb))
- return -ENOTSUPP;
-
ret = skb_cow(skb, len_diff);
if (unlikely(ret < 0))
return ret;
@@ -3255,19 +3236,11 @@ static int bpf_skb_proto_4_to_6(struct sk_buff *skb)
if (skb_is_gso(skb)) {
struct skb_shared_info *shinfo = skb_shinfo(skb);
- /* SKB_GSO_TCPV4 needs to be changed into
- * SKB_GSO_TCPV6.
- */
+ /* SKB_GSO_TCPV4 needs to be changed into SKB_GSO_TCPV6. */
if (shinfo->gso_type & SKB_GSO_TCPV4) {
shinfo->gso_type &= ~SKB_GSO_TCPV4;
shinfo->gso_type |= SKB_GSO_TCPV6;
}
-
- /* Due to IPv6 header, MSS needs to be downgraded. */
- skb_decrease_gso_size(shinfo, len_diff);
- /* Header must be checked, and gso_segs recomputed. */
- shinfo->gso_type |= SKB_GSO_DODGY;
- shinfo->gso_segs = 0;
}
skb->protocol = htons(ETH_P_IPV6);
@@ -3282,9 +3255,6 @@ static int bpf_skb_proto_6_to_4(struct sk_buff *skb)
u32 off = skb_mac_header_len(skb);
int ret;
- if (skb_is_gso(skb) && !skb_is_gso_tcp(skb))
- return -ENOTSUPP;
-
ret = skb_unclone(skb, GFP_ATOMIC);
if (unlikely(ret < 0))
return ret;
@@ -3296,19 +3266,11 @@ static int bpf_skb_proto_6_to_4(struct sk_buff *skb)
if (skb_is_gso(skb)) {
struct skb_shared_info *shinfo = skb_shinfo(skb);
- /* SKB_GSO_TCPV6 needs to be changed into
- * SKB_GSO_TCPV4.
- */
+ /* SKB_GSO_TCPV6 needs to be changed into SKB_GSO_TCPV4. */
if (shinfo->gso_type & SKB_GSO_TCPV6) {
shinfo->gso_type &= ~SKB_GSO_TCPV6;
shinfo->gso_type |= SKB_GSO_TCPV4;
}
-
- /* Due to IPv4 header, MSS can be upgraded. */
- skb_increase_gso_size(shinfo, len_diff);
- /* Header must be checked, and gso_segs recomputed. */
- shinfo->gso_type |= SKB_GSO_DODGY;
- shinfo->gso_segs = 0;
}
skb->protocol = htons(ETH_P_IP);
@@ -3902,8 +3864,7 @@ BPF_CALL_2(bpf_xdp_adjust_meta, struct xdp_buff *, xdp, int, offset)
if (unlikely(meta < xdp_frame_end ||
meta > xdp->data))
return -EINVAL;
- if (unlikely((metalen & (sizeof(__u32) - 1)) ||
- (metalen > 32)))
+ if (unlikely(xdp_metalen_invalid(metalen)))
return -EACCES;
xdp->data_meta = meta;
@@ -3919,6 +3880,34 @@ static const struct bpf_func_proto bpf_xdp_adjust_meta_proto = {
.arg2_type = ARG_ANYTHING,
};
+/* XDP_REDIRECT works by a three-step process, implemented in the functions
+ * below:
+ *
+ * 1. The bpf_redirect() and bpf_redirect_map() helpers will lookup the target
+ * of the redirect and store it (along with some other metadata) in a per-CPU
+ * struct bpf_redirect_info.
+ *
+ * 2. When the program returns the XDP_REDIRECT return code, the driver will
+ * call xdp_do_redirect() which will use the information in struct
+ * bpf_redirect_info to actually enqueue the frame into a map type-specific
+ * bulk queue structure.
+ *
+ * 3. Before exiting its NAPI poll loop, the driver will call xdp_do_flush(),
+ * which will flush all the different bulk queues, thus completing the
+ * redirect.
+ *
+ * Pointers to the map entries will be kept around for this whole sequence of
+ * steps, protected by RCU. However, there is no top-level rcu_read_lock() in
+ * the core code; instead, the RCU protection relies on everything happening
+ * inside a single NAPI poll sequence, which means it's between a pair of calls
+ * to local_bh_disable()/local_bh_enable().
+ *
+ * The map entries are marked as __rcu and the map code makes sure to
+ * dereference those pointers with rcu_dereference_check() in a way that works
+ * for both sections that to hold an rcu_read_lock() and sections that are
+ * called from NAPI without a separate rcu_read_lock(). The code below does not
+ * use RCU annotations, but relies on those in the map code.
+ */
void xdp_do_flush(void)
{
__dev_flush();
@@ -3927,6 +3916,48 @@ void xdp_do_flush(void)
}
EXPORT_SYMBOL_GPL(xdp_do_flush);
+void bpf_clear_redirect_map(struct bpf_map *map)
+{
+ struct bpf_redirect_info *ri;
+ int cpu;
+
+ for_each_possible_cpu(cpu) {
+ ri = per_cpu_ptr(&bpf_redirect_info, cpu);
+ /* Avoid polluting remote cacheline due to writes if
+ * not needed. Once we pass this test, we need the
+ * cmpxchg() to make sure it hasn't been changed in
+ * the meantime by remote CPU.
+ */
+ if (unlikely(READ_ONCE(ri->map) == map))
+ cmpxchg(&ri->map, map, NULL);
+ }
+}
+
+DEFINE_STATIC_KEY_FALSE(bpf_master_redirect_enabled_key);
+EXPORT_SYMBOL_GPL(bpf_master_redirect_enabled_key);
+
+u32 xdp_master_redirect(struct xdp_buff *xdp)
+{
+ struct net_device *master, *slave;
+ struct bpf_redirect_info *ri = this_cpu_ptr(&bpf_redirect_info);
+
+ master = netdev_master_upper_dev_get_rcu(xdp->rxq->dev);
+ slave = master->netdev_ops->ndo_xdp_get_xmit_slave(master, xdp);
+ if (slave && slave != xdp->rxq->dev) {
+ /* The target device is different from the receiving device, so
+ * redirect it to the new device.
+ * Using XDP_REDIRECT gets the correct behaviour from XDP enabled
+ * drivers to unmap the packet from their rx ring.
+ */
+ ri->tgt_index = slave->ifindex;
+ ri->map_id = INT_MAX;
+ ri->map_type = BPF_MAP_TYPE_UNSPEC;
+ return XDP_REDIRECT;
+ }
+ return XDP_TX;
+}
+EXPORT_SYMBOL_GPL(xdp_master_redirect);
+
int xdp_do_redirect(struct net_device *dev, struct xdp_buff *xdp,
struct bpf_prog *xdp_prog)
{
@@ -3934,6 +3965,7 @@ int xdp_do_redirect(struct net_device *dev, struct xdp_buff *xdp,
enum bpf_map_type map_type = ri->map_type;
void *fwd = ri->tgt_value;
u32 map_id = ri->map_id;
+ struct bpf_map *map;
int err;
ri->map_id = 0; /* Valid map id idr range: [1,INT_MAX[ */
@@ -3943,7 +3975,14 @@ int xdp_do_redirect(struct net_device *dev, struct xdp_buff *xdp,
case BPF_MAP_TYPE_DEVMAP:
fallthrough;
case BPF_MAP_TYPE_DEVMAP_HASH:
- err = dev_map_enqueue(fwd, xdp, dev);
+ map = READ_ONCE(ri->map);
+ if (unlikely(map)) {
+ WRITE_ONCE(ri->map, NULL);
+ err = dev_map_enqueue_multi(xdp, dev, map,
+ ri->flags & BPF_F_EXCLUDE_INGRESS);
+ } else {
+ err = dev_map_enqueue(fwd, xdp, dev);
+ }
break;
case BPF_MAP_TYPE_CPUMAP:
err = cpu_map_enqueue(fwd, xdp, dev);
@@ -3985,13 +4024,21 @@ static int xdp_do_generic_redirect_map(struct net_device *dev,
enum bpf_map_type map_type, u32 map_id)
{
struct bpf_redirect_info *ri = this_cpu_ptr(&bpf_redirect_info);
+ struct bpf_map *map;
int err;
switch (map_type) {
case BPF_MAP_TYPE_DEVMAP:
fallthrough;
case BPF_MAP_TYPE_DEVMAP_HASH:
- err = dev_map_generic_redirect(fwd, skb, xdp_prog);
+ map = READ_ONCE(ri->map);
+ if (unlikely(map)) {
+ WRITE_ONCE(ri->map, NULL);
+ err = dev_map_redirect_multi(dev, skb, xdp_prog, map,
+ ri->flags & BPF_F_EXCLUDE_INGRESS);
+ } else {
+ err = dev_map_generic_redirect(fwd, skb, xdp_prog);
+ }
if (unlikely(err))
goto err;
break;
@@ -4001,8 +4048,12 @@ static int xdp_do_generic_redirect_map(struct net_device *dev,
goto err;
consume_skb(skb);
break;
+ case BPF_MAP_TYPE_CPUMAP:
+ err = cpu_map_generic_redirect(fwd, skb);
+ if (unlikely(err))
+ goto err;
+ break;
default:
- /* TODO: Handle BPF_MAP_TYPE_CPUMAP */
err = -EBADRQC;
goto err;
}
@@ -4625,6 +4676,30 @@ static const struct bpf_func_proto bpf_get_netns_cookie_sock_addr_proto = {
.arg1_type = ARG_PTR_TO_CTX_OR_NULL,
};
+BPF_CALL_1(bpf_get_netns_cookie_sock_ops, struct bpf_sock_ops_kern *, ctx)
+{
+ return __bpf_get_netns_cookie(ctx ? ctx->sk : NULL);
+}
+
+static const struct bpf_func_proto bpf_get_netns_cookie_sock_ops_proto = {
+ .func = bpf_get_netns_cookie_sock_ops,
+ .gpl_only = false,
+ .ret_type = RET_INTEGER,
+ .arg1_type = ARG_PTR_TO_CTX_OR_NULL,
+};
+
+BPF_CALL_1(bpf_get_netns_cookie_sk_msg, struct sk_msg *, ctx)
+{
+ return __bpf_get_netns_cookie(ctx ? ctx->sk : NULL);
+}
+
+static const struct bpf_func_proto bpf_get_netns_cookie_sk_msg_proto = {
+ .func = bpf_get_netns_cookie_sk_msg,
+ .gpl_only = false,
+ .ret_type = RET_INTEGER,
+ .arg1_type = ARG_PTR_TO_CTX_OR_NULL,
+};
+
BPF_CALL_1(bpf_get_socket_uid, struct sk_buff *, skb)
{
struct sock *sk = sk_to_full_sk(skb->sk);
@@ -4973,6 +5048,46 @@ err_clear:
return -EINVAL;
}
+BPF_CALL_5(bpf_sk_setsockopt, struct sock *, sk, int, level,
+ int, optname, char *, optval, int, optlen)
+{
+ if (level == SOL_TCP && optname == TCP_CONGESTION) {
+ if (optlen >= sizeof("cdg") - 1 &&
+ !strncmp("cdg", optval, optlen))
+ return -ENOTSUPP;
+ }
+
+ return _bpf_setsockopt(sk, level, optname, optval, optlen);
+}
+
+const struct bpf_func_proto bpf_sk_setsockopt_proto = {
+ .func = bpf_sk_setsockopt,
+ .gpl_only = false,
+ .ret_type = RET_INTEGER,
+ .arg1_type = ARG_PTR_TO_BTF_ID_SOCK_COMMON,
+ .arg2_type = ARG_ANYTHING,
+ .arg3_type = ARG_ANYTHING,
+ .arg4_type = ARG_PTR_TO_MEM,
+ .arg5_type = ARG_CONST_SIZE,
+};
+
+BPF_CALL_5(bpf_sk_getsockopt, struct sock *, sk, int, level,
+ int, optname, char *, optval, int, optlen)
+{
+ return _bpf_getsockopt(sk, level, optname, optval, optlen);
+}
+
+const struct bpf_func_proto bpf_sk_getsockopt_proto = {
+ .func = bpf_sk_getsockopt,
+ .gpl_only = false,
+ .ret_type = RET_INTEGER,
+ .arg1_type = ARG_PTR_TO_BTF_ID_SOCK_COMMON,
+ .arg2_type = ARG_ANYTHING,
+ .arg3_type = ARG_ANYTHING,
+ .arg4_type = ARG_PTR_TO_UNINIT_MEM,
+ .arg5_type = ARG_CONST_SIZE,
+};
+
BPF_CALL_5(bpf_sock_addr_setsockopt, struct bpf_sock_addr_kern *, ctx,
int, level, int, optname, char *, optval, int, optlen)
{
@@ -7406,6 +7521,8 @@ sock_ops_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
return &bpf_sk_storage_get_proto;
case BPF_FUNC_sk_storage_delete:
return &bpf_sk_storage_delete_proto;
+ case BPF_FUNC_get_netns_cookie:
+ return &bpf_get_netns_cookie_sock_ops_proto;
#ifdef CONFIG_INET
case BPF_FUNC_load_hdr_opt:
return &bpf_sock_ops_load_hdr_opt_proto;
@@ -7452,6 +7569,8 @@ sk_msg_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
return &bpf_sk_storage_get_proto;
case BPF_FUNC_sk_storage_delete:
return &bpf_sk_storage_delete_proto;
+ case BPF_FUNC_get_netns_cookie:
+ return &bpf_get_netns_cookie_sk_msg_proto;
#ifdef CONFIG_CGROUPS
case BPF_FUNC_get_current_cgroup_id:
return &bpf_get_current_cgroup_id_proto;
@@ -10008,11 +10127,13 @@ out:
static void bpf_init_reuseport_kern(struct sk_reuseport_kern *reuse_kern,
struct sock_reuseport *reuse,
struct sock *sk, struct sk_buff *skb,
+ struct sock *migrating_sk,
u32 hash)
{
reuse_kern->skb = skb;
reuse_kern->sk = sk;
reuse_kern->selected_sk = NULL;
+ reuse_kern->migrating_sk = migrating_sk;
reuse_kern->data_end = skb->data + skb_headlen(skb);
reuse_kern->hash = hash;
reuse_kern->reuseport_id = reuse->reuseport_id;
@@ -10021,13 +10142,14 @@ static void bpf_init_reuseport_kern(struct sk_reuseport_kern *reuse_kern,
struct sock *bpf_run_sk_reuseport(struct sock_reuseport *reuse, struct sock *sk,
struct bpf_prog *prog, struct sk_buff *skb,
+ struct sock *migrating_sk,
u32 hash)
{
struct sk_reuseport_kern reuse_kern;
enum sk_action action;
- bpf_init_reuseport_kern(&reuse_kern, reuse, sk, skb, hash);
- action = BPF_PROG_RUN(prog, &reuse_kern);
+ bpf_init_reuseport_kern(&reuse_kern, reuse, sk, skb, migrating_sk, hash);
+ action = bpf_prog_run(prog, &reuse_kern);
if (action == SK_PASS)
return reuse_kern.selected_sk;
@@ -10136,6 +10258,8 @@ sk_reuseport_func_proto(enum bpf_func_id func_id,
return &sk_reuseport_load_bytes_proto;
case BPF_FUNC_skb_load_bytes_relative:
return &sk_reuseport_load_bytes_relative_proto;
+ case BPF_FUNC_get_socket_cookie:
+ return &bpf_get_socket_ptr_cookie_proto;
default:
return bpf_base_func_proto(func_id);
}
@@ -10165,6 +10289,14 @@ sk_reuseport_is_valid_access(int off, int size,
case offsetof(struct sk_reuseport_md, hash):
return size == size_default;
+ case offsetof(struct sk_reuseport_md, sk):
+ info->reg_type = PTR_TO_SOCKET;
+ return size == sizeof(__u64);
+
+ case offsetof(struct sk_reuseport_md, migrating_sk):
+ info->reg_type = PTR_TO_SOCK_COMMON_OR_NULL;
+ return size == sizeof(__u64);
+
/* Fields that allow narrowing */
case bpf_ctx_range(struct sk_reuseport_md, eth_protocol):
if (size < sizeof_field(struct sk_buff, protocol))
@@ -10237,6 +10369,14 @@ static u32 sk_reuseport_convert_ctx_access(enum bpf_access_type type,
case offsetof(struct sk_reuseport_md, bind_inany):
SK_REUSEPORT_LOAD_FIELD(bind_inany);
break;
+
+ case offsetof(struct sk_reuseport_md, sk):
+ SK_REUSEPORT_LOAD_FIELD(sk);
+ break;
+
+ case offsetof(struct sk_reuseport_md, migrating_sk):
+ SK_REUSEPORT_LOAD_FIELD(migrating_sk);
+ break;
}
return insn - insn_buf;
diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c
index 3ed7c98a98e1..bac0184cf3de 100644
--- a/net/core/flow_dissector.c
+++ b/net/core/flow_dissector.c
@@ -943,8 +943,8 @@ bool __skb_flow_dissect(const struct net *net,
int offset = 0;
ops = skb->dev->dsa_ptr->tag_ops;
- /* Tail taggers don't break flow dissection */
- if (!ops->tail_tag) {
+ /* Only DSA header taggers break flow dissection */
+ if (ops->needed_headroom) {
if (ops->flow_dissect)
ops->flow_dissect(skb, &proto, &offset);
else
@@ -1056,8 +1056,10 @@ proto_again:
FLOW_DISSECTOR_KEY_IPV4_ADDRS,
target_container);
- memcpy(&key_addrs->v4addrs, &iph->saddr,
- sizeof(key_addrs->v4addrs));
+ memcpy(&key_addrs->v4addrs.src, &iph->saddr,
+ sizeof(key_addrs->v4addrs.src));
+ memcpy(&key_addrs->v4addrs.dst, &iph->daddr,
+ sizeof(key_addrs->v4addrs.dst));
key_control->addr_type = FLOW_DISSECTOR_KEY_IPV4_ADDRS;
}
@@ -1101,8 +1103,10 @@ proto_again:
FLOW_DISSECTOR_KEY_IPV6_ADDRS,
target_container);
- memcpy(&key_addrs->v6addrs, &iph->saddr,
- sizeof(key_addrs->v6addrs));
+ memcpy(&key_addrs->v6addrs.src, &iph->saddr,
+ sizeof(key_addrs->v6addrs.src));
+ memcpy(&key_addrs->v6addrs.dst, &iph->daddr,
+ sizeof(key_addrs->v6addrs.dst));
key_control->addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS;
}
@@ -1504,7 +1508,7 @@ __be32 flow_get_u32_dst(const struct flow_keys *flow)
}
EXPORT_SYMBOL(flow_get_u32_dst);
-/* Sort the source and destination IP (and the ports if the IP are the same),
+/* Sort the source and destination IP and the ports,
* to have consistent hash within the two directions
*/
static inline void __flow_hash_consistentify(struct flow_keys *keys)
@@ -1515,11 +1519,11 @@ static inline void __flow_hash_consistentify(struct flow_keys *keys)
case FLOW_DISSECTOR_KEY_IPV4_ADDRS:
addr_diff = (__force u32)keys->addrs.v4addrs.dst -
(__force u32)keys->addrs.v4addrs.src;
- if ((addr_diff < 0) ||
- (addr_diff == 0 &&
- ((__force u16)keys->ports.dst <
- (__force u16)keys->ports.src))) {
+ if (addr_diff < 0)
swap(keys->addrs.v4addrs.src, keys->addrs.v4addrs.dst);
+
+ if ((__force u16)keys->ports.dst <
+ (__force u16)keys->ports.src) {
swap(keys->ports.src, keys->ports.dst);
}
break;
@@ -1527,13 +1531,13 @@ static inline void __flow_hash_consistentify(struct flow_keys *keys)
addr_diff = memcmp(&keys->addrs.v6addrs.dst,
&keys->addrs.v6addrs.src,
sizeof(keys->addrs.v6addrs.dst));
- if ((addr_diff < 0) ||
- (addr_diff == 0 &&
- ((__force u16)keys->ports.dst <
- (__force u16)keys->ports.src))) {
+ if (addr_diff < 0) {
for (i = 0; i < 4; i++)
swap(keys->addrs.v6addrs.src.s6_addr32[i],
keys->addrs.v6addrs.dst.s6_addr32[i]);
+ }
+ if ((__force u16)keys->ports.dst <
+ (__force u16)keys->ports.src) {
swap(keys->ports.src, keys->ports.dst);
}
break;
diff --git a/net/core/flow_offload.c b/net/core/flow_offload.c
index 715b67f6c62f..6beaea13564a 100644
--- a/net/core/flow_offload.c
+++ b/net/core/flow_offload.c
@@ -321,13 +321,13 @@ EXPORT_SYMBOL(flow_block_cb_setup_simple);
static DEFINE_MUTEX(flow_indr_block_lock);
static LIST_HEAD(flow_block_indr_list);
static LIST_HEAD(flow_block_indr_dev_list);
+static LIST_HEAD(flow_indir_dev_list);
struct flow_indr_dev {
struct list_head list;
flow_indr_block_bind_cb_t *cb;
void *cb_priv;
refcount_t refcnt;
- struct rcu_head rcu;
};
static struct flow_indr_dev *flow_indr_dev_alloc(flow_indr_block_bind_cb_t *cb,
@@ -346,6 +346,33 @@ static struct flow_indr_dev *flow_indr_dev_alloc(flow_indr_block_bind_cb_t *cb,
return indr_dev;
}
+struct flow_indir_dev_info {
+ void *data;
+ struct net_device *dev;
+ struct Qdisc *sch;
+ enum tc_setup_type type;
+ void (*cleanup)(struct flow_block_cb *block_cb);
+ struct list_head list;
+ enum flow_block_command command;
+ enum flow_block_binder_type binder_type;
+ struct list_head *cb_list;
+};
+
+static void existing_qdiscs_register(flow_indr_block_bind_cb_t *cb, void *cb_priv)
+{
+ struct flow_block_offload bo;
+ struct flow_indir_dev_info *cur;
+
+ list_for_each_entry(cur, &flow_indir_dev_list, list) {
+ memset(&bo, 0, sizeof(bo));
+ bo.command = cur->command;
+ bo.binder_type = cur->binder_type;
+ INIT_LIST_HEAD(&bo.cb_list);
+ cb(cur->dev, cur->sch, cb_priv, cur->type, &bo, cur->data, cur->cleanup);
+ list_splice(&bo.cb_list, cur->cb_list);
+ }
+}
+
int flow_indr_dev_register(flow_indr_block_bind_cb_t *cb, void *cb_priv)
{
struct flow_indr_dev *indr_dev;
@@ -367,6 +394,7 @@ int flow_indr_dev_register(flow_indr_block_bind_cb_t *cb, void *cb_priv)
}
list_add(&indr_dev->list, &flow_block_indr_dev_list);
+ existing_qdiscs_register(cb, cb_priv);
mutex_unlock(&flow_indr_block_lock);
return 0;
@@ -463,7 +491,59 @@ out:
}
EXPORT_SYMBOL(flow_indr_block_cb_alloc);
-int flow_indr_dev_setup_offload(struct net_device *dev, struct Qdisc *sch,
+static struct flow_indir_dev_info *find_indir_dev(void *data)
+{
+ struct flow_indir_dev_info *cur;
+
+ list_for_each_entry(cur, &flow_indir_dev_list, list) {
+ if (cur->data == data)
+ return cur;
+ }
+ return NULL;
+}
+
+static int indir_dev_add(void *data, struct net_device *dev, struct Qdisc *sch,
+ enum tc_setup_type type, void (*cleanup)(struct flow_block_cb *block_cb),
+ struct flow_block_offload *bo)
+{
+ struct flow_indir_dev_info *info;
+
+ info = find_indir_dev(data);
+ if (info)
+ return -EEXIST;
+
+ info = kzalloc(sizeof(*info), GFP_KERNEL);
+ if (!info)
+ return -ENOMEM;
+
+ info->data = data;
+ info->dev = dev;
+ info->sch = sch;
+ info->type = type;
+ info->cleanup = cleanup;
+ info->command = bo->command;
+ info->binder_type = bo->binder_type;
+ info->cb_list = bo->cb_list_head;
+
+ list_add(&info->list, &flow_indir_dev_list);
+ return 0;
+}
+
+static int indir_dev_remove(void *data)
+{
+ struct flow_indir_dev_info *info;
+
+ info = find_indir_dev(data);
+ if (!info)
+ return -ENOENT;
+
+ list_del(&info->list);
+
+ kfree(info);
+ return 0;
+}
+
+int flow_indr_dev_setup_offload(struct net_device *dev, struct Qdisc *sch,
enum tc_setup_type type, void *data,
struct flow_block_offload *bo,
void (*cleanup)(struct flow_block_cb *block_cb))
@@ -471,6 +551,12 @@ int flow_indr_dev_setup_offload(struct net_device *dev, struct Qdisc *sch,
struct flow_indr_dev *this;
mutex_lock(&flow_indr_block_lock);
+
+ if (bo->command == FLOW_BLOCK_BIND)
+ indir_dev_add(data, dev, sch, type, cleanup, bo);
+ else if (bo->command == FLOW_BLOCK_UNBIND)
+ indir_dev_remove(data);
+
list_for_each_entry(this, &flow_block_indr_dev_list, list)
this->cb(dev, sch, this->cb_priv, type, bo, data, cleanup);
diff --git a/net/core/link_watch.c b/net/core/link_watch.c
index 75431ca9300f..1a455847da54 100644
--- a/net/core/link_watch.c
+++ b/net/core/link_watch.c
@@ -158,7 +158,7 @@ static void linkwatch_do_dev(struct net_device *dev)
clear_bit(__LINK_STATE_LINKWATCH_PENDING, &dev->state);
rfc2863_policy(dev);
- if (dev->flags & IFF_UP && netif_device_present(dev)) {
+ if (dev->flags & IFF_UP) {
if (netif_carrier_ok(dev))
dev_activate(dev);
else
@@ -204,7 +204,8 @@ static void __linkwatch_run_queue(int urgent_only)
dev = list_first_entry(&wrk, struct net_device, link_watch_list);
list_del_init(&dev->link_watch_list);
- if (urgent_only && !linkwatch_urgent_event(dev)) {
+ if (!netif_device_present(dev) ||
+ (urgent_only && !linkwatch_urgent_event(dev))) {
list_add_tail(&dev->link_watch_list, &lweventlist);
continue;
}
diff --git a/net/core/lwtunnel.c b/net/core/lwtunnel.c
index 8ec7d13d2860..2820aca2173a 100644
--- a/net/core/lwtunnel.c
+++ b/net/core/lwtunnel.c
@@ -23,6 +23,9 @@
#include <net/ip6_fib.h>
#include <net/rtnh.h>
+DEFINE_STATIC_KEY_FALSE(nf_hooks_lwtunnel_enabled);
+EXPORT_SYMBOL_GPL(nf_hooks_lwtunnel_enabled);
+
#ifdef CONFIG_MODULES
static const char *lwtunnel_encap_str(enum lwtunnel_encap_types encap_type)
@@ -43,6 +46,8 @@ static const char *lwtunnel_encap_str(enum lwtunnel_encap_types encap_type)
return "SEG6LOCAL";
case LWTUNNEL_ENCAP_RPL:
return "RPL";
+ case LWTUNNEL_ENCAP_IOAM6:
+ return "IOAM6";
case LWTUNNEL_ENCAP_IP6:
case LWTUNNEL_ENCAP_IP:
case LWTUNNEL_ENCAP_NONE:
diff --git a/net/core/neighbour.c b/net/core/neighbour.c
index bf774575ad71..2d5bc3a75fae 100644
--- a/net/core/neighbour.c
+++ b/net/core/neighbour.c
@@ -741,12 +741,10 @@ struct pneigh_entry * pneigh_lookup(struct neigh_table *tbl,
write_pnet(&n->net, net);
memcpy(n->key, pkey, key_len);
n->dev = dev;
- if (dev)
- dev_hold(dev);
+ dev_hold(dev);
if (tbl->pconstructor && tbl->pconstructor(n)) {
- if (dev)
- dev_put(dev);
+ dev_put(dev);
kfree(n);
n = NULL;
goto out;
@@ -778,8 +776,7 @@ int pneigh_delete(struct neigh_table *tbl, struct net *net, const void *pkey,
write_unlock_bh(&tbl->lock);
if (tbl->pdestructor)
tbl->pdestructor(n);
- if (n->dev)
- dev_put(n->dev);
+ dev_put(n->dev);
kfree(n);
return 0;
}
@@ -812,8 +809,7 @@ static int pneigh_ifdown_and_unlock(struct neigh_table *tbl,
n->next = NULL;
if (tbl->pdestructor)
tbl->pdestructor(n);
- if (n->dev)
- dev_put(n->dev);
+ dev_put(n->dev);
kfree(n);
}
return -ENOENT;
@@ -1662,8 +1658,7 @@ void neigh_parms_release(struct neigh_table *tbl, struct neigh_parms *parms)
list_del(&parms->list);
parms->dead = 1;
write_unlock_bh(&tbl->lock);
- if (parms->dev)
- dev_put(parms->dev);
+ dev_put(parms->dev);
call_rcu(&parms->rcu_head, neigh_rcu_free_parms);
}
EXPORT_SYMBOL(neigh_parms_release);
@@ -2533,6 +2528,13 @@ static bool neigh_master_filtered(struct net_device *dev, int master_idx)
return false;
master = dev ? netdev_master_upper_dev_get(dev) : NULL;
+
+ /* 0 is already used to denote NDA_MASTER wasn't passed, therefore need another
+ * invalid value for ifindex to denote "no master".
+ */
+ if (master_idx == -1)
+ return !!master;
+
if (!master || master->ifindex != master_idx)
return true;
@@ -3142,7 +3144,7 @@ static struct pneigh_entry *pneigh_get_first(struct seq_file *seq)
struct net *net = seq_file_net(seq);
struct neigh_table *tbl = state->tbl;
struct pneigh_entry *pn = NULL;
- int bucket = state->bucket;
+ int bucket;
state->flags |= NEIGH_SEQ_IS_PNEIGH;
for (bucket = 0; bucket <= PNEIGH_HASHMASK; bucket++) {
@@ -3315,12 +3317,13 @@ static int neigh_stat_seq_show(struct seq_file *seq, void *v)
struct neigh_statistics *st = v;
if (v == SEQ_START_TOKEN) {
- seq_printf(seq, "entries allocs destroys hash_grows lookups hits res_failed rcv_probes_mcast rcv_probes_ucast periodic_gc_runs forced_gc_runs unresolved_discards table_fulls\n");
+ seq_puts(seq, "entries allocs destroys hash_grows lookups hits res_failed rcv_probes_mcast rcv_probes_ucast periodic_gc_runs forced_gc_runs unresolved_discards table_fulls\n");
return 0;
}
- seq_printf(seq, "%08x %08lx %08lx %08lx %08lx %08lx %08lx "
- "%08lx %08lx %08lx %08lx %08lx %08lx\n",
+ seq_printf(seq, "%08x %08lx %08lx %08lx %08lx %08lx %08lx "
+ "%08lx %08lx %08lx "
+ "%08lx %08lx %08lx\n",
atomic_read(&tbl->entries),
st->allocs,
diff --git a/net/core/net-procfs.c b/net/core/net-procfs.c
index d8b9dbabd4a4..eab5fc88a002 100644
--- a/net/core/net-procfs.c
+++ b/net/core/net-procfs.c
@@ -77,8 +77,8 @@ static void dev_seq_printf_stats(struct seq_file *seq, struct net_device *dev)
struct rtnl_link_stats64 temp;
const struct rtnl_link_stats64 *stats = dev_get_stats(dev, &temp);
- seq_printf(seq, "%6s: %7llu %7llu %4llu %4llu %4llu %5llu %10llu %9llu "
- "%8llu %7llu %4llu %4llu %4llu %5llu %7llu %10llu\n",
+ seq_printf(seq, "%9s: %16llu %12llu %4llu %6llu %4llu %5llu %10llu %9llu "
+ "%16llu %12llu %4llu %6llu %4llu %5llu %7llu %10llu\n",
dev->name, stats->rx_bytes, stats->rx_packets,
stats->rx_errors,
stats->rx_dropped + stats->rx_missed_errors,
@@ -103,11 +103,11 @@ static void dev_seq_printf_stats(struct seq_file *seq, struct net_device *dev)
static int dev_seq_show(struct seq_file *seq, void *v)
{
if (v == SEQ_START_TOKEN)
- seq_puts(seq, "Inter-| Receive "
- " | Transmit\n"
- " face |bytes packets errs drop fifo frame "
- "compressed multicast|bytes packets errs "
- "drop fifo colls carrier compressed\n");
+ seq_puts(seq, "Interface| Receive "
+ " | Transmit\n"
+ " | bytes packets errs drop fifo frame "
+ "compressed multicast| bytes packets errs "
+ " drop fifo colls carrier compressed\n");
else
dev_seq_printf_stats(seq, v);
return 0;
@@ -259,14 +259,14 @@ static int ptype_seq_show(struct seq_file *seq, void *v)
struct packet_type *pt = v;
if (v == SEQ_START_TOKEN)
- seq_puts(seq, "Type Device Function\n");
+ seq_puts(seq, "Type Device Function\n");
else if (pt->dev == NULL || dev_net(pt->dev) == seq_file_net(seq)) {
if (pt->type == htons(ETH_P_ALL))
seq_puts(seq, "ALL ");
else
seq_printf(seq, "%04x", ntohs(pt->type));
- seq_printf(seq, " %-8s %ps\n",
+ seq_printf(seq, " %-9s %ps\n",
pt->dev ? pt->dev->name : "", pt->func);
}
@@ -327,12 +327,14 @@ static int dev_mc_seq_show(struct seq_file *seq, void *v)
struct netdev_hw_addr *ha;
struct net_device *dev = v;
- if (v == SEQ_START_TOKEN)
+ if (v == SEQ_START_TOKEN) {
+ seq_puts(seq, "Ifindex Interface Refcount Global_use Address\n");
return 0;
+ }
netif_addr_lock_bh(dev);
netdev_for_each_mc_addr(ha, dev) {
- seq_printf(seq, "%-4d %-15s %-5d %-5d %*phN\n",
+ seq_printf(seq, "%-7d %-9s %-8d %-10d %*phN\n",
dev->ifindex, dev->name,
ha->refcount, ha->global_use,
(int)dev->addr_len, ha->addr);
diff --git a/net/core/net-traces.c b/net/core/net-traces.c
index 283ddb2dbc7d..c40cd8dd75c7 100644
--- a/net/core/net-traces.c
+++ b/net/core/net-traces.c
@@ -60,3 +60,4 @@ EXPORT_TRACEPOINT_SYMBOL_GPL(kfree_skb);
EXPORT_TRACEPOINT_SYMBOL_GPL(napi_poll);
EXPORT_TRACEPOINT_SYMBOL_GPL(tcp_send_reset);
+EXPORT_TRACEPOINT_SYMBOL_GPL(tcp_bad_csum);
diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c
index 9b5a767eddd5..a448a9b5bb2d 100644
--- a/net/core/net_namespace.c
+++ b/net/core/net_namespace.c
@@ -98,7 +98,7 @@ static int net_assign_generic(struct net *net, unsigned int id, void *data)
}
ng = net_alloc_generic();
- if (ng == NULL)
+ if (!ng)
return -ENOMEM;
/*
@@ -148,13 +148,6 @@ out:
return err;
}
-static void ops_free(const struct pernet_operations *ops, struct net *net)
-{
- if (ops->id && ops->size) {
- kfree(net_generic(net, *ops->id));
- }
-}
-
static void ops_pre_exit_list(const struct pernet_operations *ops,
struct list_head *net_exit_list)
{
@@ -184,7 +177,7 @@ static void ops_free_list(const struct pernet_operations *ops,
struct net *net;
if (ops->size && ops->id) {
list_for_each_entry(net, net_exit_list, exit_list)
- ops_free(ops, net);
+ kfree(net_generic(net, *ops->id));
}
}
@@ -433,15 +426,18 @@ out_free:
static void net_free(struct net *net)
{
- kfree(rcu_access_pointer(net->gen));
- kmem_cache_free(net_cachep, net);
+ if (refcount_dec_and_test(&net->passive)) {
+ kfree(rcu_access_pointer(net->gen));
+ kmem_cache_free(net_cachep, net);
+ }
}
void net_drop_ns(void *p)
{
- struct net *ns = p;
- if (ns && refcount_dec_and_test(&ns->passive))
- net_free(ns);
+ struct net *net = (struct net *)p;
+
+ if (net)
+ net_free(net);
}
struct net *copy_net_ns(unsigned long flags,
@@ -479,7 +475,7 @@ struct net *copy_net_ns(unsigned long flags,
put_userns:
key_remove_domain(net->key_domain);
put_user_ns(user_ns);
- net_drop_ns(net);
+ net_free(net);
dec_ucounts:
dec_net_namespaces(ucounts);
return ERR_PTR(rv);
@@ -611,7 +607,7 @@ static void cleanup_net(struct work_struct *work)
dec_net_namespaces(net->ucounts);
key_remove_domain(net->key_domain);
put_user_ns(net->user_ns);
- net_drop_ns(net);
+ net_free(net);
}
}
@@ -1120,6 +1116,14 @@ static int __init net_ns_init(void)
pure_initcall(net_ns_init);
+static void free_exit_list(struct pernet_operations *ops, struct list_head *net_exit_list)
+{
+ ops_pre_exit_list(ops, net_exit_list);
+ synchronize_rcu();
+ ops_exit_list(ops, net_exit_list);
+ ops_free_list(ops, net_exit_list);
+}
+
#ifdef CONFIG_NET_NS
static int __register_pernet_operations(struct list_head *list,
struct pernet_operations *ops)
@@ -1145,10 +1149,7 @@ static int __register_pernet_operations(struct list_head *list,
out_undo:
/* If I have an error cleanup all namespaces I initialized */
list_del(&ops->list);
- ops_pre_exit_list(ops, &net_exit_list);
- synchronize_rcu();
- ops_exit_list(ops, &net_exit_list);
- ops_free_list(ops, &net_exit_list);
+ free_exit_list(ops, &net_exit_list);
return error;
}
@@ -1161,10 +1162,8 @@ static void __unregister_pernet_operations(struct pernet_operations *ops)
/* See comment in __register_pernet_operations() */
for_each_net(net)
list_add_tail(&net->exit_list, &net_exit_list);
- ops_pre_exit_list(ops, &net_exit_list);
- synchronize_rcu();
- ops_exit_list(ops, &net_exit_list);
- ops_free_list(ops, &net_exit_list);
+
+ free_exit_list(ops, &net_exit_list);
}
#else
@@ -1187,10 +1186,7 @@ static void __unregister_pernet_operations(struct pernet_operations *ops)
} else {
LIST_HEAD(net_exit_list);
list_add(&init_net.exit_list, &net_exit_list);
- ops_pre_exit_list(ops, &net_exit_list);
- synchronize_rcu();
- ops_exit_list(ops, &net_exit_list);
- ops_free_list(ops, &net_exit_list);
+ free_exit_list(ops, &net_exit_list);
}
}
diff --git a/net/core/netpoll.c b/net/core/netpoll.c
index c310c7c1cef7..edfc0f8011f8 100644
--- a/net/core/netpoll.c
+++ b/net/core/netpoll.c
@@ -36,6 +36,7 @@
#include <net/ip6_checksum.h>
#include <asm/unaligned.h>
#include <trace/events/napi.h>
+#include <linux/kconfig.h>
/*
* We maintain a small pool of fully-sized skbs, to make sure the
@@ -389,7 +390,8 @@ void netpoll_send_udp(struct netpoll *np, const char *msg, int len)
static atomic_t ip_ident;
struct ipv6hdr *ip6h;
- WARN_ON_ONCE(!irqs_disabled());
+ if (!IS_ENABLED(CONFIG_PREEMPT_RT))
+ WARN_ON_ONCE(!irqs_disabled());
udp_len = len + sizeof(*udph);
if (np->ipv6)
@@ -428,7 +430,7 @@ void netpoll_send_udp(struct netpoll *np, const char *msg, int len)
ip6h = ipv6_hdr(skb);
/* ip6h->version = 6; ip6h->priority = 0; */
- put_unaligned(0x60, (unsigned char *)ip6h);
+ *(unsigned char *)ip6h = 0x60;
ip6h->flow_lbl[0] = 0;
ip6h->flow_lbl[1] = 0;
ip6h->flow_lbl[2] = 0;
@@ -456,7 +458,7 @@ void netpoll_send_udp(struct netpoll *np, const char *msg, int len)
iph = ip_hdr(skb);
/* iph->version = 4; iph->ihl = 5; */
- put_unaligned(0x45, (unsigned char *)iph);
+ *(unsigned char *)iph = 0x45;
iph->tos = 0;
put_unaligned(htons(ip_len), &(iph->tot_len));
iph->id = htons(atomic_inc_return(&ip_ident));
diff --git a/net/core/page_pool.c b/net/core/page_pool.c
index 3c4c4c7a0402..1a6978427d6c 100644
--- a/net/core/page_pool.c
+++ b/net/core/page_pool.c
@@ -17,12 +17,15 @@
#include <linux/dma-mapping.h>
#include <linux/page-flags.h>
#include <linux/mm.h> /* for __put_page() */
+#include <linux/poison.h>
#include <trace/events/page_pool.h>
#define DEFER_TIME (msecs_to_jiffies(1000))
#define DEFER_WARN_INTERVAL (60 * HZ)
+#define BIAS_MAX LONG_MAX
+
static int page_pool_init(struct page_pool *pool,
const struct page_pool_params *params)
{
@@ -66,6 +69,10 @@ static int page_pool_init(struct page_pool *pool,
*/
}
+ if (PAGE_POOL_DMA_USE_PP_FRAG_COUNT &&
+ pool->p.flags & PP_FLAG_PAGE_FRAG)
+ return -EINVAL;
+
if (ptr_ring_init(&pool->ring, ring_qsize, GFP_KERNEL) < 0)
return -ENOMEM;
@@ -205,6 +212,19 @@ static bool page_pool_dma_map(struct page_pool *pool, struct page *page)
return true;
}
+static void page_pool_set_pp_info(struct page_pool *pool,
+ struct page *page)
+{
+ page->pp = pool;
+ page->pp_magic |= PP_SIGNATURE;
+}
+
+static void page_pool_clear_pp_info(struct page *page)
+{
+ page->pp_magic = 0;
+ page->pp = NULL;
+}
+
static struct page *__page_pool_alloc_page_order(struct page_pool *pool,
gfp_t gfp)
{
@@ -221,6 +241,8 @@ static struct page *__page_pool_alloc_page_order(struct page_pool *pool,
return NULL;
}
+ page_pool_set_pp_info(pool, page);
+
/* Track how many pages are held 'in-flight' */
pool->pages_state_hold_cnt++;
trace_page_pool_state_hold(pool, page, pool->pages_state_hold_cnt);
@@ -263,6 +285,8 @@ static struct page *__page_pool_alloc_pages_slow(struct page_pool *pool,
put_page(page);
continue;
}
+
+ page_pool_set_pp_info(pool, page);
pool->alloc.cache[pool->alloc.count++] = page;
/* Track how many pages are held 'in-flight' */
pool->pages_state_hold_cnt++;
@@ -341,10 +365,12 @@ void page_pool_release_page(struct page_pool *pool, struct page *page)
DMA_ATTR_SKIP_CPU_SYNC);
page_pool_set_dma_addr(page, 0);
skip_dma_unmap:
+ page_pool_clear_pp_info(page);
+
/* This may be the last page returned, releasing the pool, so
* it is not safe to reference pool afterwards.
*/
- count = atomic_inc_return(&pool->pages_state_release_cnt);
+ count = atomic_inc_return_relaxed(&pool->pages_state_release_cnt);
trace_page_pool_state_release(pool, page, count);
}
EXPORT_SYMBOL(page_pool_release_page);
@@ -399,6 +425,11 @@ static __always_inline struct page *
__page_pool_put_page(struct page_pool *pool, struct page *page,
unsigned int dma_sync_size, bool allow_direct)
{
+ /* It is not the last user for the page frag case */
+ if (pool->p.flags & PP_FLAG_PAGE_FRAG &&
+ page_pool_atomic_sub_frag_count_return(page, 1))
+ return NULL;
+
/* This allocator is optimized for the XDP mode that uses
* one-frame-per-page, but have fallbacks that act like the
* regular page allocator APIs.
@@ -491,6 +522,84 @@ void page_pool_put_page_bulk(struct page_pool *pool, void **data,
}
EXPORT_SYMBOL(page_pool_put_page_bulk);
+static struct page *page_pool_drain_frag(struct page_pool *pool,
+ struct page *page)
+{
+ long drain_count = BIAS_MAX - pool->frag_users;
+
+ /* Some user is still using the page frag */
+ if (likely(page_pool_atomic_sub_frag_count_return(page,
+ drain_count)))
+ return NULL;
+
+ if (page_ref_count(page) == 1 && !page_is_pfmemalloc(page)) {
+ if (pool->p.flags & PP_FLAG_DMA_SYNC_DEV)
+ page_pool_dma_sync_for_device(pool, page, -1);
+
+ return page;
+ }
+
+ page_pool_return_page(pool, page);
+ return NULL;
+}
+
+static void page_pool_free_frag(struct page_pool *pool)
+{
+ long drain_count = BIAS_MAX - pool->frag_users;
+ struct page *page = pool->frag_page;
+
+ pool->frag_page = NULL;
+
+ if (!page ||
+ page_pool_atomic_sub_frag_count_return(page, drain_count))
+ return;
+
+ page_pool_return_page(pool, page);
+}
+
+struct page *page_pool_alloc_frag(struct page_pool *pool,
+ unsigned int *offset,
+ unsigned int size, gfp_t gfp)
+{
+ unsigned int max_size = PAGE_SIZE << pool->p.order;
+ struct page *page = pool->frag_page;
+
+ if (WARN_ON(!(pool->p.flags & PP_FLAG_PAGE_FRAG) ||
+ size > max_size))
+ return NULL;
+
+ size = ALIGN(size, dma_get_cache_alignment());
+ *offset = pool->frag_offset;
+
+ if (page && *offset + size > max_size) {
+ page = page_pool_drain_frag(pool, page);
+ if (page)
+ goto frag_reset;
+ }
+
+ if (!page) {
+ page = page_pool_alloc_pages(pool, gfp);
+ if (unlikely(!page)) {
+ pool->frag_page = NULL;
+ return NULL;
+ }
+
+ pool->frag_page = page;
+
+frag_reset:
+ pool->frag_users = 1;
+ *offset = 0;
+ pool->frag_offset = size;
+ page_pool_set_frag_count(page, BIAS_MAX);
+ return page;
+ }
+
+ pool->frag_users++;
+ pool->frag_offset = *offset + size;
+ return page;
+}
+EXPORT_SYMBOL(page_pool_alloc_frag);
+
static void page_pool_empty_ring(struct page_pool *pool)
{
struct page *page;
@@ -596,6 +705,8 @@ void page_pool_destroy(struct page_pool *pool)
if (!page_pool_put(pool))
return;
+ page_pool_free_frag(pool);
+
if (!page_pool_release(pool))
return;
@@ -622,3 +733,32 @@ void page_pool_update_nid(struct page_pool *pool, int new_nid)
}
}
EXPORT_SYMBOL(page_pool_update_nid);
+
+bool page_pool_return_skb_page(struct page *page)
+{
+ struct page_pool *pp;
+
+ page = compound_head(page);
+
+ /* page->pp_magic is OR'ed with PP_SIGNATURE after the allocation
+ * in order to preserve any existing bits, such as bit 0 for the
+ * head page of compound page and bit 1 for pfmemalloc page, so
+ * mask those bits for freeing side when doing below checking,
+ * and page_is_pfmemalloc() is checked in __page_pool_put_page()
+ * to avoid recycling the pfmemalloc page.
+ */
+ if (unlikely((page->pp_magic & ~0x3UL) != PP_SIGNATURE))
+ return false;
+
+ pp = page->pp;
+
+ /* Driver set this to memory recycling info. Reset it on recycle.
+ * This will *not* work for NIC using a split-page memory model.
+ * The page will be returned to the pool here regardless of the
+ * 'flipped' fragment being in use or not.
+ */
+ page_pool_put_full_page(pp, page, false);
+
+ return true;
+}
+EXPORT_SYMBOL(page_pool_return_skb_page);
diff --git a/net/core/pktgen.c b/net/core/pktgen.c
index 3fba429f1f57..a3d74e2704c4 100644
--- a/net/core/pktgen.c
+++ b/net/core/pktgen.c
@@ -175,6 +175,9 @@
#define IP_NAME_SZ 32
#define MAX_MPLS_LABELS 16 /* This is the max label stack depth */
#define MPLS_STACK_BOTTOM htonl(0x00000100)
+/* Max number of internet mix entries that can be specified in imix_weights. */
+#define MAX_IMIX_ENTRIES 20
+#define IMIX_PRECISION 100 /* Precision of IMIX distribution */
#define func_enter() pr_debug("entering %s\n", __func__);
@@ -242,6 +245,12 @@ static char *pkt_flag_names[] = {
#define VLAN_TAG_SIZE(x) ((x)->vlan_id == 0xffff ? 0 : 4)
#define SVLAN_TAG_SIZE(x) ((x)->svlan_id == 0xffff ? 0 : 4)
+struct imix_pkt {
+ u64 size;
+ u64 weight;
+ u64 count_so_far;
+};
+
struct flow_state {
__be32 cur_daddr;
int count;
@@ -343,6 +352,12 @@ struct pktgen_dev {
__u8 traffic_class; /* ditto for the (former) Traffic Class in IPv6
(see RFC 3260, sec. 4) */
+ /* IMIX */
+ unsigned int n_imix_entries;
+ struct imix_pkt imix_entries[MAX_IMIX_ENTRIES];
+ /* Maps 0-IMIX_PRECISION range to imix_entry based on probability*/
+ __u8 imix_distribution[IMIX_PRECISION];
+
/* MPLS */
unsigned int nr_labels; /* Depth of stack, 0 = no MPLS */
__be32 labels[MAX_MPLS_LABELS];
@@ -467,10 +482,11 @@ static struct pktgen_dev *pktgen_find_dev(struct pktgen_thread *t,
static int pktgen_device_event(struct notifier_block *, unsigned long, void *);
static void pktgen_run_all_threads(struct pktgen_net *pn);
static void pktgen_reset_all_threads(struct pktgen_net *pn);
-static void pktgen_stop_all_threads_ifs(struct pktgen_net *pn);
+static void pktgen_stop_all_threads(struct pktgen_net *pn);
static void pktgen_stop(struct pktgen_thread *t);
static void pktgen_clear_counters(struct pktgen_dev *pkt_dev);
+static void fill_imix_distribution(struct pktgen_dev *pkt_dev);
/* Module parameters, defaults. */
static int pg_count_d __read_mostly = 1000;
@@ -516,14 +532,11 @@ static ssize_t pgctrl_write(struct file *file, const char __user *buf,
data[count - 1] = 0; /* Strip trailing '\n' and terminate string */
if (!strcmp(data, "stop"))
- pktgen_stop_all_threads_ifs(pn);
-
+ pktgen_stop_all_threads(pn);
else if (!strcmp(data, "start"))
pktgen_run_all_threads(pn);
-
else if (!strcmp(data, "reset"))
pktgen_reset_all_threads(pn);
-
else
return -EINVAL;
@@ -555,6 +568,16 @@ static int pktgen_if_show(struct seq_file *seq, void *v)
(unsigned long long)pkt_dev->count, pkt_dev->min_pkt_size,
pkt_dev->max_pkt_size);
+ if (pkt_dev->n_imix_entries > 0) {
+ seq_puts(seq, " imix_weights: ");
+ for (i = 0; i < pkt_dev->n_imix_entries; i++) {
+ seq_printf(seq, "%llu,%llu ",
+ pkt_dev->imix_entries[i].size,
+ pkt_dev->imix_entries[i].weight);
+ }
+ seq_puts(seq, "\n");
+ }
+
seq_printf(seq,
" frags: %d delay: %llu clone_skb: %d ifname: %s\n",
pkt_dev->nfrags, (unsigned long long) pkt_dev->delay,
@@ -672,6 +695,18 @@ static int pktgen_if_show(struct seq_file *seq, void *v)
(unsigned long long)pkt_dev->sofar,
(unsigned long long)pkt_dev->errors);
+ if (pkt_dev->n_imix_entries > 0) {
+ int i;
+
+ seq_puts(seq, " imix_size_counts: ");
+ for (i = 0; i < pkt_dev->n_imix_entries; i++) {
+ seq_printf(seq, "%llu,%llu ",
+ pkt_dev->imix_entries[i].size,
+ pkt_dev->imix_entries[i].count_so_far);
+ }
+ seq_puts(seq, "\n");
+ }
+
seq_printf(seq,
" started: %lluus stopped: %lluus idle: %lluus\n",
(unsigned long long) ktime_to_us(pkt_dev->started_at),
@@ -795,6 +830,62 @@ done_str:
return i;
}
+/* Parses imix entries from user buffer.
+ * The user buffer should consist of imix entries separated by spaces
+ * where each entry consists of size and weight delimited by commas.
+ * "size1,weight_1 size2,weight_2 ... size_n,weight_n" for example.
+ */
+static ssize_t get_imix_entries(const char __user *buffer,
+ struct pktgen_dev *pkt_dev)
+{
+ const int max_digits = 10;
+ int i = 0;
+ long len;
+ char c;
+
+ pkt_dev->n_imix_entries = 0;
+
+ do {
+ unsigned long weight;
+ unsigned long size;
+
+ len = num_arg(&buffer[i], max_digits, &size);
+ if (len < 0)
+ return len;
+ i += len;
+ if (get_user(c, &buffer[i]))
+ return -EFAULT;
+ /* Check for comma between size_i and weight_i */
+ if (c != ',')
+ return -EINVAL;
+ i++;
+
+ if (size < 14 + 20 + 8)
+ size = 14 + 20 + 8;
+
+ len = num_arg(&buffer[i], max_digits, &weight);
+ if (len < 0)
+ return len;
+ if (weight <= 0)
+ return -EINVAL;
+
+ pkt_dev->imix_entries[pkt_dev->n_imix_entries].size = size;
+ pkt_dev->imix_entries[pkt_dev->n_imix_entries].weight = weight;
+
+ i += len;
+ if (get_user(c, &buffer[i]))
+ return -EFAULT;
+
+ i++;
+ pkt_dev->n_imix_entries++;
+
+ if (pkt_dev->n_imix_entries > MAX_IMIX_ENTRIES)
+ return -E2BIG;
+ } while (c == ' ');
+
+ return i;
+}
+
static ssize_t get_labels(const char __user *buffer, struct pktgen_dev *pkt_dev)
{
unsigned int n = 0;
@@ -963,6 +1054,20 @@ static ssize_t pktgen_if_write(struct file *file,
return count;
}
+ if (!strcmp(name, "imix_weights")) {
+ if (pkt_dev->clone_skb > 0)
+ return -EINVAL;
+
+ len = get_imix_entries(&user_buffer[i], pkt_dev);
+ if (len < 0)
+ return len;
+
+ fill_imix_distribution(pkt_dev);
+
+ i += len;
+ return count;
+ }
+
if (!strcmp(name, "debug")) {
len = num_arg(&user_buffer[i], 10, &value);
if (len < 0)
@@ -1085,10 +1190,16 @@ static ssize_t pktgen_if_write(struct file *file,
len = num_arg(&user_buffer[i], 10, &value);
if (len < 0)
return len;
+ /* clone_skb is not supported for netif_receive xmit_mode and
+ * IMIX mode.
+ */
if ((value > 0) &&
((pkt_dev->xmit_mode == M_NETIF_RECEIVE) ||
!(pkt_dev->odev->priv_flags & IFF_TX_SKB_SHARING)))
return -ENOTSUPP;
+ if (value > 0 && pkt_dev->n_imix_entries > 0)
+ return -EINVAL;
+
i += len;
pkt_dev->clone_skb = value;
@@ -1193,11 +1304,6 @@ static ssize_t pktgen_if_write(struct file *file,
* pktgen_xmit() is called
*/
pkt_dev->last_ok = 1;
-
- /* override clone_skb if user passed default value
- * at module loading time
- */
- pkt_dev->clone_skb = 0;
} else if (strcmp(f, "queue_xmit") == 0) {
pkt_dev->xmit_mode = M_QUEUE_XMIT;
pkt_dev->last_ok = 1;
@@ -2480,6 +2586,14 @@ static void mod_cur_headers(struct pktgen_dev *pkt_dev)
t = pkt_dev->min_pkt_size;
}
pkt_dev->cur_pkt_size = t;
+ } else if (pkt_dev->n_imix_entries > 0) {
+ struct imix_pkt *entry;
+ __u32 t = prandom_u32() % IMIX_PRECISION;
+ __u8 entry_index = pkt_dev->imix_distribution[t];
+
+ entry = &pkt_dev->imix_entries[entry_index];
+ entry->count_so_far++;
+ pkt_dev->cur_pkt_size = entry->size;
}
set_cur_queue_map(pkt_dev);
@@ -2487,6 +2601,32 @@ static void mod_cur_headers(struct pktgen_dev *pkt_dev)
pkt_dev->flows[flow].count++;
}
+static void fill_imix_distribution(struct pktgen_dev *pkt_dev)
+{
+ int cumulative_probabilites[MAX_IMIX_ENTRIES];
+ int j = 0;
+ __u64 cumulative_prob = 0;
+ __u64 total_weight = 0;
+ int i = 0;
+
+ for (i = 0; i < pkt_dev->n_imix_entries; i++)
+ total_weight += pkt_dev->imix_entries[i].weight;
+
+ /* Fill cumulative_probabilites with sum of normalized probabilities */
+ for (i = 0; i < pkt_dev->n_imix_entries - 1; i++) {
+ cumulative_prob += div64_u64(pkt_dev->imix_entries[i].weight *
+ IMIX_PRECISION,
+ total_weight);
+ cumulative_probabilites[i] = cumulative_prob;
+ }
+ cumulative_probabilites[pkt_dev->n_imix_entries - 1] = 100;
+
+ for (i = 0; i < IMIX_PRECISION; i++) {
+ if (i == cumulative_probabilites[j])
+ j++;
+ pkt_dev->imix_distribution[i] = j;
+ }
+}
#ifdef CONFIG_XFRM
static u32 pktgen_dst_metrics[RTAX_MAX + 1] = {
@@ -3027,20 +3167,25 @@ static void pktgen_run(struct pktgen_thread *t)
t->control &= ~(T_STOP);
}
-static void pktgen_stop_all_threads_ifs(struct pktgen_net *pn)
+static void pktgen_handle_all_threads(struct pktgen_net *pn, u32 flags)
{
struct pktgen_thread *t;
- func_enter();
-
mutex_lock(&pktgen_thread_lock);
list_for_each_entry(t, &pn->pktgen_threads, th_list)
- t->control |= T_STOP;
+ t->control |= (flags);
mutex_unlock(&pktgen_thread_lock);
}
+static void pktgen_stop_all_threads(struct pktgen_net *pn)
+{
+ func_enter();
+
+ pktgen_handle_all_threads(pn, T_STOP);
+}
+
static int thread_is_running(const struct pktgen_thread *t)
{
const struct pktgen_dev *pkt_dev;
@@ -3103,16 +3248,9 @@ static int pktgen_wait_all_threads_run(struct pktgen_net *pn)
static void pktgen_run_all_threads(struct pktgen_net *pn)
{
- struct pktgen_thread *t;
-
func_enter();
- mutex_lock(&pktgen_thread_lock);
-
- list_for_each_entry(t, &pn->pktgen_threads, th_list)
- t->control |= (T_RUN);
-
- mutex_unlock(&pktgen_thread_lock);
+ pktgen_handle_all_threads(pn, T_RUN);
/* Propagate thread->control */
schedule_timeout_interruptible(msecs_to_jiffies(125));
@@ -3122,16 +3260,9 @@ static void pktgen_run_all_threads(struct pktgen_net *pn)
static void pktgen_reset_all_threads(struct pktgen_net *pn)
{
- struct pktgen_thread *t;
-
func_enter();
- mutex_lock(&pktgen_thread_lock);
-
- list_for_each_entry(t, &pn->pktgen_threads, th_list)
- t->control |= (T_REMDEVALL);
-
- mutex_unlock(&pktgen_thread_lock);
+ pktgen_handle_all_threads(pn, T_REMDEVALL);
/* Propagate thread->control */
schedule_timeout_interruptible(msecs_to_jiffies(125));
@@ -3157,7 +3288,19 @@ static void show_results(struct pktgen_dev *pkt_dev, int nr_frags)
pps = div64_u64(pkt_dev->sofar * NSEC_PER_SEC,
ktime_to_ns(elapsed));
- bps = pps * 8 * pkt_dev->cur_pkt_size;
+ if (pkt_dev->n_imix_entries > 0) {
+ int i;
+ struct imix_pkt *entry;
+
+ bps = 0;
+ for (i = 0; i < pkt_dev->n_imix_entries; i++) {
+ entry = &pkt_dev->imix_entries[i];
+ bps += entry->size * entry->count_so_far;
+ }
+ bps = div64_u64(bps * 8 * NSEC_PER_SEC, ktime_to_ns(elapsed));
+ } else {
+ bps = pps * 8 * pkt_dev->cur_pkt_size;
+ }
mbps = bps;
do_div(mbps, 1000000);
@@ -3459,7 +3602,6 @@ out:
static int pktgen_thread_worker(void *arg)
{
- DEFINE_WAIT(wait);
struct pktgen_thread *t = arg;
struct pktgen_dev *pkt_dev = NULL;
int cpu = t->cpu;
diff --git a/net/core/ptp_classifier.c b/net/core/ptp_classifier.c
index e33fde06d528..dd4cf01d1e0a 100644
--- a/net/core/ptp_classifier.c
+++ b/net/core/ptp_classifier.c
@@ -103,7 +103,7 @@ static struct bpf_prog *ptp_insns __read_mostly;
unsigned int ptp_classify_raw(const struct sk_buff *skb)
{
- return BPF_PROG_RUN(ptp_insns, skb);
+ return bpf_prog_run(ptp_insns, skb);
}
EXPORT_SYMBOL_GPL(ptp_classify_raw);
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index ec931b080156..972c8cb303a5 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -9,7 +9,7 @@
* Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
*
* Fixes:
- * Vitaly E. Lavrov RTA_OK arithmetics was wrong.
+ * Vitaly E. Lavrov RTA_OK arithmetic was wrong.
*/
#include <linux/bitops.h>
@@ -234,7 +234,7 @@ unlock:
* @msgtype: rtnetlink message type
* @doit: Function pointer called for each request message
* @dumpit: Function pointer called for each dump request (NLM_F_DUMP) message
- * @flags: rtnl_link_flags to modifiy behaviour of doit/dumpit functions
+ * @flags: rtnl_link_flags to modify behaviour of doit/dumpit functions
*
* Like rtnl_register, but for use by removable modules.
*/
@@ -254,7 +254,7 @@ EXPORT_SYMBOL_GPL(rtnl_register_module);
* @msgtype: rtnetlink message type
* @doit: Function pointer called for each request message
* @dumpit: Function pointer called for each dump request (NLM_F_DUMP) message
- * @flags: rtnl_link_flags to modifiy behaviour of doit/dumpit functions
+ * @flags: rtnl_link_flags to modify behaviour of doit/dumpit functions
*
* Registers the specified function pointers (at least one of them has
* to be non-NULL) to be called whenever a request message for the
@@ -376,12 +376,12 @@ int __rtnl_link_register(struct rtnl_link_ops *ops)
if (rtnl_link_ops_get(ops->kind))
return -EEXIST;
- /* The check for setup is here because if ops
+ /* The check for alloc/setup is here because if ops
* does not have that filled up, it is not possible
* to use the ops for creating device. So do not
* fill up dellink as well. That disables rtnl_dellink.
*/
- if (ops->setup && !ops->dellink)
+ if ((ops->alloc || ops->setup) && !ops->dellink)
ops->dellink = unregister_netdevice_queue;
list_add_tail(&ops->list, &link_ops);
@@ -543,7 +543,9 @@ static const struct rtnl_af_ops *rtnl_af_lookup(const int family)
{
const struct rtnl_af_ops *ops;
- list_for_each_entry_rcu(ops, &rtnl_af_ops, list) {
+ ASSERT_RTNL();
+
+ list_for_each_entry(ops, &rtnl_af_ops, list) {
if (ops->family == family)
return ops;
}
@@ -708,15 +710,8 @@ out:
int rtnetlink_send(struct sk_buff *skb, struct net *net, u32 pid, unsigned int group, int echo)
{
struct sock *rtnl = net->rtnl;
- int err = 0;
- NETLINK_CB(skb).dst_group = group;
- if (echo)
- refcount_inc(&skb->users);
- netlink_broadcast(rtnl, skb, pid, group, GFP_KERNEL);
- if (echo)
- err = netlink_unicast(rtnl, skb, pid, MSG_DONTWAIT);
- return err;
+ return nlmsg_notify(rtnl, skb, pid, group, echo, GFP_KERNEL);
}
int rtnl_unicast(struct sk_buff *skb, struct net *net, u32 pid)
@@ -731,12 +726,8 @@ void rtnl_notify(struct sk_buff *skb, struct net *net, u32 pid, u32 group,
struct nlmsghdr *nlh, gfp_t flags)
{
struct sock *rtnl = net->rtnl;
- int report = 0;
- if (nlh)
- report = nlmsg_report(nlh);
-
- nlmsg_notify(rtnl, skb, pid, group, report, flags);
+ nlmsg_notify(rtnl, skb, pid, group, nlmsg_report(nlh), flags);
}
EXPORT_SYMBOL(rtnl_notify);
@@ -1819,6 +1810,16 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb,
if (rtnl_fill_prop_list(skb, dev))
goto nla_put_failure;
+ if (dev->dev.parent &&
+ nla_put_string(skb, IFLA_PARENT_DEV_NAME,
+ dev_name(dev->dev.parent)))
+ goto nla_put_failure;
+
+ if (dev->dev.parent && dev->dev.parent->bus &&
+ nla_put_string(skb, IFLA_PARENT_DEV_BUS_NAME,
+ dev->dev.parent->bus->name))
+ goto nla_put_failure;
+
nlmsg_end(skb, nlh);
return 0;
@@ -1878,6 +1879,7 @@ static const struct nla_policy ifla_policy[IFLA_MAX+1] = {
[IFLA_PERM_ADDRESS] = { .type = NLA_REJECT },
[IFLA_PROTO_DOWN_REASON] = { .type = NLA_NESTED },
[IFLA_NEW_IFINDEX] = NLA_POLICY_MIN(NLA_S32, 1),
+ [IFLA_PARENT_DEV_NAME] = { .type = NLA_NUL_STRING },
};
static const struct nla_policy ifla_info_policy[IFLA_INFO_MAX+1] = {
@@ -1957,6 +1959,13 @@ static bool link_master_filtered(struct net_device *dev, int master_idx)
return false;
master = netdev_master_upper_dev_get(dev);
+
+ /* 0 is already used to denote IFLA_MASTER wasn't passed, therefore need
+ * another invalid value for ifindex to denote "no master".
+ */
+ if (master_idx == -1)
+ return !!master;
+
if (!master || master->ifindex != master_idx)
return true;
@@ -2255,7 +2264,8 @@ invalid_attr:
return -EINVAL;
}
-static int validate_linkmsg(struct net_device *dev, struct nlattr *tb[])
+static int validate_linkmsg(struct net_device *dev, struct nlattr *tb[],
+ struct netlink_ext_ack *extack)
{
if (dev) {
if (tb[IFLA_ADDRESS] &&
@@ -2274,27 +2284,18 @@ static int validate_linkmsg(struct net_device *dev, struct nlattr *tb[])
nla_for_each_nested(af, tb[IFLA_AF_SPEC], rem) {
const struct rtnl_af_ops *af_ops;
- rcu_read_lock();
af_ops = rtnl_af_lookup(nla_type(af));
- if (!af_ops) {
- rcu_read_unlock();
+ if (!af_ops)
return -EAFNOSUPPORT;
- }
- if (!af_ops->set_link_af) {
- rcu_read_unlock();
+ if (!af_ops->set_link_af)
return -EOPNOTSUPP;
- }
if (af_ops->validate_link_af) {
- err = af_ops->validate_link_af(dev, af);
- if (err < 0) {
- rcu_read_unlock();
+ err = af_ops->validate_link_af(dev, af, extack);
+ if (err < 0)
return err;
- }
}
-
- rcu_read_unlock();
}
}
@@ -2574,7 +2575,7 @@ static int do_set_proto_down(struct net_device *dev,
if (nl_proto_down) {
proto_down = nla_get_u8(nl_proto_down);
- /* Dont turn off protodown if there are active reasons */
+ /* Don't turn off protodown if there are active reasons */
if (!proto_down && dev->proto_down_reason) {
NL_SET_ERR_MSG(extack, "Cannot clear protodown, active reasons");
return -EBUSY;
@@ -2599,11 +2600,12 @@ static int do_setlink(const struct sk_buff *skb,
const struct net_device_ops *ops = dev->netdev_ops;
int err;
- err = validate_linkmsg(dev, tb);
+ err = validate_linkmsg(dev, tb, extack);
if (err < 0)
return err;
if (tb[IFLA_NET_NS_PID] || tb[IFLA_NET_NS_FD] || tb[IFLA_TARGET_NETNSID]) {
+ const char *pat = ifname && ifname[0] ? ifname : NULL;
struct net *net;
int new_ifindex;
@@ -2619,7 +2621,7 @@ static int do_setlink(const struct sk_buff *skb,
else
new_ifindex = 0;
- err = __dev_change_net_namespace(dev, net, ifname, new_ifindex);
+ err = __dev_change_net_namespace(dev, net, pat, new_ifindex);
put_net(net);
if (err)
goto errout;
@@ -2868,17 +2870,12 @@ static int do_setlink(const struct sk_buff *skb,
nla_for_each_nested(af, tb[IFLA_AF_SPEC], rem) {
const struct rtnl_af_ops *af_ops;
- rcu_read_lock();
-
BUG_ON(!(af_ops = rtnl_af_lookup(nla_type(af))));
err = af_ops->set_link_af(dev, af, extack);
- if (err < 0) {
- rcu_read_unlock();
+ if (err < 0)
goto errout;
- }
- rcu_read_unlock();
status |= DO_SETLINK_NOTIFY;
}
}
@@ -3177,8 +3174,17 @@ struct net_device *rtnl_create_link(struct net *net, const char *ifname,
return ERR_PTR(-EINVAL);
}
- dev = alloc_netdev_mqs(ops->priv_size, ifname, name_assign_type,
- ops->setup, num_tx_queues, num_rx_queues);
+ if (ops->alloc) {
+ dev = ops->alloc(tb, ifname, name_assign_type,
+ num_tx_queues, num_rx_queues);
+ if (IS_ERR(dev))
+ return dev;
+ } else {
+ dev = alloc_netdev_mqs(ops->priv_size, ifname,
+ name_assign_type, ops->setup,
+ num_tx_queues, num_rx_queues);
+ }
+
if (!dev)
return ERR_PTR(-ENOMEM);
@@ -3293,7 +3299,7 @@ replay:
m_ops = master_dev->rtnl_link_ops;
}
- err = validate_linkmsg(dev, tb);
+ err = validate_linkmsg(dev, tb, extack);
if (err < 0)
return err;
@@ -3411,7 +3417,7 @@ replay:
return -EOPNOTSUPP;
}
- if (!ops->setup)
+ if (!ops->alloc && !ops->setup)
return -EOPNOTSUPP;
if (!ifname[0]) {
@@ -3939,12 +3945,12 @@ int ndo_dflt_fdb_add(struct ndmsg *ndm,
* implement its own handler for this.
*/
if (ndm->ndm_state && !(ndm->ndm_state & NUD_PERMANENT)) {
- pr_info("%s: FDB only supports static addresses\n", dev->name);
+ netdev_info(dev, "default FDB implementation only supports local addresses\n");
return err;
}
if (vid) {
- pr_info("%s: vlans aren't supported yet for dev_uc|mc_add()\n", dev->name);
+ netdev_info(dev, "vlans aren't supported yet for dev_uc|mc_add()\n");
return err;
}
@@ -4078,7 +4084,7 @@ int ndo_dflt_fdb_del(struct ndmsg *ndm,
* implement its own handler for this.
*/
if (!(ndm->ndm_state & NUD_PERMANENT)) {
- pr_info("%s: FDB only supports static addresses\n", dev->name);
+ netdev_info(dev, "default FDB implementation only supports local addresses\n");
return err;
}
diff --git a/net/core/scm.c b/net/core/scm.c
index ae3085d9aae8..5c356f0dee30 100644
--- a/net/core/scm.c
+++ b/net/core/scm.c
@@ -79,7 +79,7 @@ static int scm_fp_copy(struct cmsghdr *cmsg, struct scm_fp_list **fplp)
if (!fpl)
{
- fpl = kmalloc(sizeof(struct scm_fp_list), GFP_KERNEL);
+ fpl = kmalloc(sizeof(struct scm_fp_list), GFP_KERNEL_ACCOUNT);
if (!fpl)
return -ENOMEM;
*fplp = fpl;
@@ -355,7 +355,7 @@ struct scm_fp_list *scm_fp_dup(struct scm_fp_list *fpl)
return NULL;
new_fpl = kmemdup(fpl, offsetof(struct scm_fp_list, fp[fpl->count]),
- GFP_KERNEL);
+ GFP_KERNEL_ACCOUNT);
if (new_fpl) {
for (i = 0; i < fpl->count; i++)
get_file(fpl->fp[i]);
diff --git a/net/core/selftests.c b/net/core/selftests.c
index ba7b0171974c..9077fa969892 100644
--- a/net/core/selftests.c
+++ b/net/core/selftests.c
@@ -318,6 +318,15 @@ static int net_test_phy_loopback_udp(struct net_device *ndev)
return __net_test_loopback(ndev, &attr);
}
+static int net_test_phy_loopback_udp_mtu(struct net_device *ndev)
+{
+ struct net_packet_attrs attr = { };
+
+ attr.dst = ndev->dev_addr;
+ attr.max_size = ndev->mtu;
+ return __net_test_loopback(ndev, &attr);
+}
+
static int net_test_phy_loopback_tcp(struct net_device *ndev)
{
struct net_packet_attrs attr = { };
@@ -345,6 +354,9 @@ static const struct net_test {
.name = "PHY internal loopback, UDP ",
.fn = net_test_phy_loopback_udp,
}, {
+ .name = "PHY internal loopback, MTU ",
+ .fn = net_test_phy_loopback_udp_mtu,
+ }, {
.name = "PHY internal loopback, TCP ",
.fn = net_test_phy_loopback_tcp,
}, {
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index bbc3b4b62032..2170bea2c7de 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -70,6 +70,7 @@
#include <net/xfrm.h>
#include <net/mpls.h>
#include <net/mptcp.h>
+#include <net/page_pool.h>
#include <linux/uaccess.h>
#include <trace/events/skb.h>
@@ -155,7 +156,7 @@ void *__netdev_alloc_frag_align(unsigned int fragsz, unsigned int align_mask)
void *data;
fragsz = SKB_DATA_ALIGN(fragsz);
- if (in_irq() || irqs_disabled()) {
+ if (in_hardirq() || irqs_disabled()) {
nc = this_cpu_ptr(&netdev_alloc_cache);
data = page_frag_alloc_align(nc, fragsz, GFP_ATOMIC, align_mask);
} else {
@@ -501,7 +502,7 @@ struct sk_buff *__netdev_alloc_skb(struct net_device *dev, unsigned int len,
if (sk_memalloc_socks())
gfp_mask |= __GFP_MEMALLOC;
- if (in_irq() || irqs_disabled()) {
+ if (in_hardirq() || irqs_disabled()) {
nc = this_cpu_ptr(&netdev_alloc_cache);
data = page_frag_alloc(nc, len, gfp_mask);
pfmemalloc = nc->pfmemalloc;
@@ -645,10 +646,13 @@ static void skb_free_head(struct sk_buff *skb)
{
unsigned char *head = skb->head;
- if (skb->head_frag)
+ if (skb->head_frag) {
+ if (skb_pp_recycle(skb, head))
+ return;
skb_free_frag(head);
- else
+ } else {
kfree(head);
+ }
}
static void skb_release_data(struct sk_buff *skb)
@@ -659,17 +663,28 @@ static void skb_release_data(struct sk_buff *skb)
if (skb->cloned &&
atomic_sub_return(skb->nohdr ? (1 << SKB_DATAREF_SHIFT) + 1 : 1,
&shinfo->dataref))
- return;
+ goto exit;
skb_zcopy_clear(skb, true);
for (i = 0; i < shinfo->nr_frags; i++)
- __skb_frag_unref(&shinfo->frags[i]);
+ __skb_frag_unref(&shinfo->frags[i], skb->pp_recycle);
if (shinfo->frag_list)
kfree_skb_list(shinfo->frag_list);
skb_free_head(skb);
+exit:
+ /* When we clone an SKB we copy the reycling bit. The pp_recycle
+ * bit is only set on the head though, so in order to avoid races
+ * while trying to recycle fragments on __skb_frag_unref() we need
+ * to make one SKB responsible for triggering the recycle path.
+ * So disable the recycling bit if an SKB is cloned and we have
+ * additional references to to the fragmented part of the SKB.
+ * Eventually the last SKB will have the recycling bit set and it's
+ * dataref set to 0, which will trigger the recycling
+ */
+ skb->pp_recycle = 0;
}
/*
@@ -709,7 +724,7 @@ void skb_release_head_state(struct sk_buff *skb)
{
skb_dst_drop(skb);
if (skb->destructor) {
- WARN_ON(in_irq());
+ WARN_ON(in_hardirq());
skb->destructor(skb);
}
#if IS_ENABLED(CONFIG_NF_CONNTRACK)
@@ -939,8 +954,13 @@ void __kfree_skb_defer(struct sk_buff *skb)
void napi_skb_free_stolen_head(struct sk_buff *skb)
{
- skb_dst_drop(skb);
- skb_ext_put(skb);
+ if (unlikely(skb->slow_gro)) {
+ nf_reset_ct(skb);
+ skb_dst_drop(skb);
+ skb_ext_put(skb);
+ skb_orphan(skb);
+ skb->slow_gro = 0;
+ }
napi_skb_cache_put(skb);
}
@@ -1046,6 +1066,7 @@ static struct sk_buff *__skb_clone(struct sk_buff *n, struct sk_buff *skb)
n->nohdr = 0;
n->peeked = 0;
C(pfmemalloc);
+ C(pp_recycle);
n->destructor = NULL;
C(tail);
C(end);
@@ -1289,7 +1310,7 @@ static void __msg_zerocopy_callback(struct ubuf_info *uarg)
}
spin_unlock_irqrestore(&q->lock, flags);
- sk->sk_error_report(sk);
+ sk_error_report(sk);
release:
consume_skb(skb);
@@ -1769,6 +1790,48 @@ struct sk_buff *skb_realloc_headroom(struct sk_buff *skb, unsigned int headroom)
EXPORT_SYMBOL(skb_realloc_headroom);
/**
+ * skb_expand_head - reallocate header of &sk_buff
+ * @skb: buffer to reallocate
+ * @headroom: needed headroom
+ *
+ * Unlike skb_realloc_headroom, this one does not allocate a new skb
+ * if possible; copies skb->sk to new skb as needed
+ * and frees original skb in case of failures.
+ *
+ * It expect increased headroom and generates warning otherwise.
+ */
+
+struct sk_buff *skb_expand_head(struct sk_buff *skb, unsigned int headroom)
+{
+ int delta = headroom - skb_headroom(skb);
+
+ if (WARN_ONCE(delta <= 0,
+ "%s is expecting an increase in the headroom", __func__))
+ return skb;
+
+ /* pskb_expand_head() might crash, if skb is shared */
+ if (skb_shared(skb)) {
+ struct sk_buff *nskb = skb_clone(skb, GFP_ATOMIC);
+
+ if (likely(nskb)) {
+ if (skb->sk)
+ skb_set_owner_w(nskb, skb->sk);
+ consume_skb(skb);
+ } else {
+ kfree_skb(skb);
+ }
+ skb = nskb;
+ }
+ if (skb &&
+ pskb_expand_head(skb, SKB_DATA_ALIGN(delta), 0, GFP_ATOMIC)) {
+ kfree_skb(skb);
+ skb = NULL;
+ }
+ return skb;
+}
+EXPORT_SYMBOL(skb_expand_head);
+
+/**
* skb_copy_expand - copy and expand sk_buff
* @skb: buffer to copy
* @newheadroom: new free bytes at head
@@ -3005,8 +3068,11 @@ skb_zerocopy_headlen(const struct sk_buff *from)
if (!from->head_frag ||
skb_headlen(from) < L1_CACHE_BYTES ||
- skb_shinfo(from)->nr_frags >= MAX_SKB_FRAGS)
+ skb_shinfo(from)->nr_frags >= MAX_SKB_FRAGS) {
hlen = skb_headlen(from);
+ if (!hlen)
+ hlen = from->len;
+ }
if (skb_has_frag_list(from))
hlen = from->len;
@@ -3497,7 +3563,7 @@ int skb_shift(struct sk_buff *tgt, struct sk_buff *skb, int shiftlen)
fragto = &skb_shinfo(tgt)->frags[merge];
skb_frag_size_add(fragto, skb_frag_size(fragfrom));
- __skb_frag_unref(fragfrom);
+ __skb_frag_unref(fragfrom, skb->pp_recycle);
}
/* Reposition in the original skb */
@@ -3818,7 +3884,7 @@ struct sk_buff *skb_segment_list(struct sk_buff *skb,
skb_push(nskb, -skb_network_offset(nskb) + offset);
skb_release_head_state(nskb);
- __copy_skb_header(nskb, skb);
+ __copy_skb_header(nskb, skb);
skb_headers_offset_update(nskb, skb_headroom(nskb) - skb_headroom(skb));
skb_copy_from_linear_data_offset(skb, -tnl_hlen,
@@ -3869,6 +3935,9 @@ int skb_gro_receive_list(struct sk_buff *p, struct sk_buff *skb)
NAPI_GRO_CB(p)->last = skb;
NAPI_GRO_CB(p)->count++;
p->data_len += skb->len;
+
+ /* sk owenrship - if any - completely transferred to the aggregated packet */
+ skb->destructor = NULL;
p->truesize += skb->truesize;
p->len += skb->len;
@@ -4236,6 +4305,7 @@ int skb_gro_receive(struct sk_buff *p, struct sk_buff *skb)
unsigned int headlen = skb_headlen(skb);
unsigned int len = skb_gro_len(skb);
unsigned int delta_truesize;
+ unsigned int new_truesize;
struct sk_buff *lp;
if (unlikely(p->len + len >= 65536 || NAPI_GRO_CB(skb)->flush))
@@ -4267,10 +4337,10 @@ int skb_gro_receive(struct sk_buff *p, struct sk_buff *skb)
skb_frag_size_sub(frag, offset);
/* all fragments truesize : remove (head size + sk_buff) */
- delta_truesize = skb->truesize -
- SKB_TRUESIZE(skb_end_offset(skb));
+ new_truesize = SKB_TRUESIZE(skb_end_offset(skb));
+ delta_truesize = skb->truesize - new_truesize;
- skb->truesize -= skb->data_len;
+ skb->truesize = new_truesize;
skb->len -= skb->data_len;
skb->data_len = 0;
@@ -4299,12 +4369,16 @@ int skb_gro_receive(struct sk_buff *p, struct sk_buff *skb)
memcpy(frag + 1, skbinfo->frags, sizeof(*frag) * skbinfo->nr_frags);
/* We dont need to clear skbinfo->nr_frags here */
- delta_truesize = skb->truesize - SKB_DATA_ALIGN(sizeof(struct sk_buff));
+ new_truesize = SKB_DATA_ALIGN(sizeof(struct sk_buff));
+ delta_truesize = skb->truesize - new_truesize;
+ skb->truesize = new_truesize;
NAPI_GRO_CB(skb)->free = NAPI_GRO_FREE_STOLEN_HEAD;
goto done;
}
merge:
+ /* sk owenrship - if any - completely transferred to the aggregated packet */
+ skb->destructor = NULL;
delta_truesize = skb->truesize;
if (offset > headlen) {
unsigned int eat = offset - headlen;
@@ -4680,7 +4754,7 @@ int sock_queue_err_skb(struct sock *sk, struct sk_buff *skb)
skb_queue_tail(&sk->sk_error_queue, skb);
if (!sock_flag(sk, SOCK_DEAD))
- sk->sk_error_report(sk);
+ sk_error_report(sk);
return 0;
}
EXPORT_SYMBOL(sock_queue_err_skb);
@@ -4711,7 +4785,7 @@ struct sk_buff *sock_dequeue_err_skb(struct sock *sk)
sk->sk_err = 0;
if (skb_next)
- sk->sk_error_report(sk);
+ sk_error_report(sk);
return skb;
}
@@ -5287,6 +5361,13 @@ bool skb_try_coalesce(struct sk_buff *to, struct sk_buff *from,
if (skb_cloned(to))
return false;
+ /* The page pool signature of struct page will eventually figure out
+ * which pages can be recycled or not but for now let's prohibit slab
+ * allocated and page_pool allocated SKBs from being coalesced.
+ */
+ if (to->pp_recycle != from->pp_recycle)
+ return false;
+
if (len <= skb_tailroom(to)) {
if (len)
BUG_ON(skb_copy_bits(from, 0, skb_put(to, len), len));
@@ -6422,6 +6503,7 @@ void *skb_ext_add(struct sk_buff *skb, enum skb_ext_id id)
new->chunks = newlen;
new->offset[id] = newoff;
set_active:
+ skb->slow_gro = 1;
skb->extensions = new;
skb->active_extensions |= 1 << id;
return skb_ext_get_ptr(new, id);
diff --git a/net/core/skmsg.c b/net/core/skmsg.c
index 43ce17a6a585..2d6249b28928 100644
--- a/net/core/skmsg.c
+++ b/net/core/skmsg.c
@@ -399,29 +399,6 @@ out:
}
EXPORT_SYMBOL_GPL(sk_msg_memcopy_from_iter);
-int sk_msg_wait_data(struct sock *sk, struct sk_psock *psock, int flags,
- long timeo, int *err)
-{
- DEFINE_WAIT_FUNC(wait, woken_wake_function);
- int ret = 0;
-
- if (sk->sk_shutdown & RCV_SHUTDOWN)
- return 1;
-
- if (!timeo)
- return ret;
-
- add_wait_queue(sk_sleep(sk), &wait);
- sk_set_bit(SOCKWQ_ASYNC_WAITDATA, sk);
- ret = sk_wait_event(sk, &timeo,
- !list_empty(&psock->ingress_msg) ||
- !skb_queue_empty(&sk->sk_receive_queue), &wait);
- sk_clear_bit(SOCKWQ_ASYNC_WAITDATA, sk);
- remove_wait_queue(sk_sleep(sk), &wait);
- return ret;
-}
-EXPORT_SYMBOL_GPL(sk_msg_wait_data);
-
/* Receive sk_msg from psock->ingress_msg to @msg. */
int sk_msg_recvmsg(struct sock *sk, struct sk_psock *psock, struct msghdr *msg,
int len, int flags)
@@ -531,10 +508,8 @@ static int sk_psock_skb_ingress_enqueue(struct sk_buff *skb,
if (skb_linearize(skb))
return -EAGAIN;
num_sge = skb_to_sgvec(skb, msg->sg.data, 0, skb->len);
- if (unlikely(num_sge < 0)) {
- kfree(msg);
+ if (unlikely(num_sge < 0))
return num_sge;
- }
copied = skb->len;
msg->sg.start = 0;
@@ -553,6 +528,7 @@ static int sk_psock_skb_ingress(struct sk_psock *psock, struct sk_buff *skb)
{
struct sock *sk = psock->sk;
struct sk_msg *msg;
+ int err;
/* If we are receiving on the same sock skb->sk is already assigned,
* skip memory accounting and owner transition seeing it already set
@@ -571,7 +547,10 @@ static int sk_psock_skb_ingress(struct sk_psock *psock, struct sk_buff *skb)
* into user buffers.
*/
skb_set_owner_r(skb, sk);
- return sk_psock_skb_ingress_enqueue(skb, psock, sk, msg);
+ err = sk_psock_skb_ingress_enqueue(skb, psock, sk, msg);
+ if (err < 0)
+ kfree(msg);
+ return err;
}
/* Puts an skb on the ingress queue of the socket already assigned to the
@@ -582,12 +561,16 @@ static int sk_psock_skb_ingress_self(struct sk_psock *psock, struct sk_buff *skb
{
struct sk_msg *msg = kzalloc(sizeof(*msg), __GFP_NOWARN | GFP_ATOMIC);
struct sock *sk = psock->sk;
+ int err;
if (unlikely(!msg))
return -EAGAIN;
sk_msg_init(msg);
skb_set_owner_r(skb, sk);
- return sk_psock_skb_ingress_enqueue(skb, psock, sk, msg);
+ err = sk_psock_skb_ingress_enqueue(skb, psock, sk, msg);
+ if (err < 0)
+ kfree(msg);
+ return err;
}
static int sk_psock_handle_skb(struct sk_psock *psock, struct sk_buff *skb,
@@ -601,23 +584,42 @@ static int sk_psock_handle_skb(struct sk_psock *psock, struct sk_buff *skb,
return sk_psock_skb_ingress(psock, skb);
}
+static void sk_psock_skb_state(struct sk_psock *psock,
+ struct sk_psock_work_state *state,
+ struct sk_buff *skb,
+ int len, int off)
+{
+ spin_lock_bh(&psock->ingress_lock);
+ if (sk_psock_test_state(psock, SK_PSOCK_TX_ENABLED)) {
+ state->skb = skb;
+ state->len = len;
+ state->off = off;
+ } else {
+ sock_drop(psock->sk, skb);
+ }
+ spin_unlock_bh(&psock->ingress_lock);
+}
+
static void sk_psock_backlog(struct work_struct *work)
{
struct sk_psock *psock = container_of(work, struct sk_psock, work);
struct sk_psock_work_state *state = &psock->work_state;
- struct sk_buff *skb;
+ struct sk_buff *skb = NULL;
bool ingress;
u32 len, off;
int ret;
mutex_lock(&psock->work_mutex);
- if (state->skb) {
+ if (unlikely(state->skb)) {
+ spin_lock_bh(&psock->ingress_lock);
skb = state->skb;
len = state->len;
off = state->off;
state->skb = NULL;
- goto start;
+ spin_unlock_bh(&psock->ingress_lock);
}
+ if (skb)
+ goto start;
while ((skb = skb_dequeue(&psock->ingress_skb))) {
len = skb->len;
@@ -632,15 +634,14 @@ start:
len, ingress);
if (ret <= 0) {
if (ret == -EAGAIN) {
- state->skb = skb;
- state->len = len;
- state->off = off;
+ sk_psock_skb_state(psock, state, skb,
+ len, off);
goto end;
}
/* Hard errors break pipe and stop xmit. */
sk_psock_report_error(psock, ret ? -ret : EPIPE);
sk_psock_clear_state(psock, SK_PSOCK_TX_ENABLED);
- kfree_skb(skb);
+ sock_drop(psock->sk, skb);
goto end;
}
off += ret;
@@ -731,8 +732,13 @@ static void __sk_psock_zap_ingress(struct sk_psock *psock)
while ((skb = skb_dequeue(&psock->ingress_skb)) != NULL) {
skb_bpf_redirect_clear(skb);
- kfree_skb(skb);
+ sock_drop(psock->sk, skb);
}
+ kfree_skb(psock->work_state.skb);
+ /* We null the skb here to ensure that calls to sk_psock_backlog
+ * do not pick up the free'd skb.
+ */
+ psock->work_state.skb = NULL;
__sk_psock_purge_ingress_msg(psock);
}
@@ -784,8 +790,6 @@ static void sk_psock_destroy(struct work_struct *work)
void sk_psock_drop(struct sock *sk, struct sk_psock *psock)
{
- sk_psock_stop(psock, false);
-
write_lock_bh(&sk->sk_callback_lock);
sk_psock_restore_proto(sk, psock);
rcu_assign_sk_user_data(sk, NULL);
@@ -795,6 +799,8 @@ void sk_psock_drop(struct sock *sk, struct sk_psock *psock)
sk_psock_stop_verdict(sk, psock);
write_unlock_bh(&sk->sk_callback_lock);
+ sk_psock_stop(psock, false);
+
INIT_RCU_WORK(&psock->rwork, sk_psock_destroy);
queue_rcu_work(system_wq, &psock->rwork);
}
@@ -847,7 +853,7 @@ out:
}
EXPORT_SYMBOL_GPL(sk_psock_msg_verdict);
-static void sk_psock_skb_redirect(struct sk_buff *skb)
+static int sk_psock_skb_redirect(struct sk_psock *from, struct sk_buff *skb)
{
struct sk_psock *psock_other;
struct sock *sk_other;
@@ -857,8 +863,8 @@ static void sk_psock_skb_redirect(struct sk_buff *skb)
* return code, but then didn't set a redirect interface.
*/
if (unlikely(!sk_other)) {
- kfree_skb(skb);
- return;
+ sock_drop(from->sk, skb);
+ return -EIO;
}
psock_other = sk_psock(sk_other);
/* This error indicates the socket is being torn down or had another
@@ -866,26 +872,30 @@ static void sk_psock_skb_redirect(struct sk_buff *skb)
* a socket that is in this state so we drop the skb.
*/
if (!psock_other || sock_flag(sk_other, SOCK_DEAD)) {
- kfree_skb(skb);
- return;
+ skb_bpf_redirect_clear(skb);
+ sock_drop(from->sk, skb);
+ return -EIO;
}
spin_lock_bh(&psock_other->ingress_lock);
if (!sk_psock_test_state(psock_other, SK_PSOCK_TX_ENABLED)) {
spin_unlock_bh(&psock_other->ingress_lock);
- kfree_skb(skb);
- return;
+ skb_bpf_redirect_clear(skb);
+ sock_drop(from->sk, skb);
+ return -EIO;
}
skb_queue_tail(&psock_other->ingress_skb, skb);
schedule_work(&psock_other->work);
spin_unlock_bh(&psock_other->ingress_lock);
+ return 0;
}
-static void sk_psock_tls_verdict_apply(struct sk_buff *skb, struct sock *sk, int verdict)
+static void sk_psock_tls_verdict_apply(struct sk_buff *skb,
+ struct sk_psock *from, int verdict)
{
switch (verdict) {
case __SK_REDIRECT:
- sk_psock_skb_redirect(skb);
+ sk_psock_skb_redirect(from, skb);
break;
case __SK_PASS:
case __SK_DROP:
@@ -909,20 +919,21 @@ int sk_psock_tls_strp_read(struct sk_psock *psock, struct sk_buff *skb)
ret = sk_psock_map_verd(ret, skb_bpf_redirect_fetch(skb));
skb->sk = NULL;
}
- sk_psock_tls_verdict_apply(skb, psock->sk, ret);
+ sk_psock_tls_verdict_apply(skb, psock, ret);
rcu_read_unlock();
return ret;
}
EXPORT_SYMBOL_GPL(sk_psock_tls_strp_read);
-static void sk_psock_verdict_apply(struct sk_psock *psock,
- struct sk_buff *skb, int verdict)
+static int sk_psock_verdict_apply(struct sk_psock *psock, struct sk_buff *skb,
+ int verdict)
{
struct sock *sk_other;
- int err = -EIO;
+ int err = 0;
switch (verdict) {
case __SK_PASS:
+ err = -EIO;
sk_other = psock->sk;
if (sock_flag(sk_other, SOCK_DEAD) ||
!sk_psock_test_state(psock, SK_PSOCK_TX_ENABLED)) {
@@ -945,18 +956,25 @@ static void sk_psock_verdict_apply(struct sk_psock *psock,
if (sk_psock_test_state(psock, SK_PSOCK_TX_ENABLED)) {
skb_queue_tail(&psock->ingress_skb, skb);
schedule_work(&psock->work);
+ err = 0;
}
spin_unlock_bh(&psock->ingress_lock);
+ if (err < 0) {
+ skb_bpf_redirect_clear(skb);
+ goto out_free;
+ }
}
break;
case __SK_REDIRECT:
- sk_psock_skb_redirect(skb);
+ err = sk_psock_skb_redirect(psock, skb);
break;
case __SK_DROP:
default:
out_free:
- kfree_skb(skb);
+ sock_drop(psock->sk, skb);
}
+
+ return err;
}
static void sk_psock_write_space(struct sock *sk)
@@ -988,7 +1006,7 @@ static void sk_psock_strp_read(struct strparser *strp, struct sk_buff *skb)
sk = strp->sk;
psock = sk_psock(sk);
if (unlikely(!psock)) {
- kfree_skb(skb);
+ sock_drop(sk, skb);
goto out;
}
prog = READ_ONCE(psock->progs.stream_verdict);
@@ -1109,7 +1127,7 @@ static int sk_psock_verdict_recv(read_descriptor_t *desc, struct sk_buff *skb,
psock = sk_psock(sk);
if (unlikely(!psock)) {
len = 0;
- kfree_skb(skb);
+ sock_drop(sk, skb);
goto out;
}
prog = READ_ONCE(psock->progs.stream_verdict);
@@ -1123,7 +1141,8 @@ static int sk_psock_verdict_recv(read_descriptor_t *desc, struct sk_buff *skb,
ret = sk_psock_map_verd(ret, skb_bpf_redirect_fetch(skb));
skb->sk = NULL;
}
- sk_psock_verdict_apply(psock, skb, ret);
+ if (sk_psock_verdict_apply(psock, skb, ret) < 0)
+ len = 0;
out:
rcu_read_unlock();
return len;
diff --git a/net/core/sock.c b/net/core/sock.c
index 946888afef88..62627e868e03 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -139,6 +139,8 @@
#include <net/tcp.h>
#include <net/busy_poll.h>
+#include <linux/ethtool.h>
+
static DEFINE_MUTEX(proto_list_mutex);
static LIST_HEAD(proto_list);
@@ -224,6 +226,7 @@ static struct lock_class_key af_family_kern_slock_keys[AF_MAX];
x "AF_IEEE802154", x "AF_CAIF" , x "AF_ALG" , \
x "AF_NFC" , x "AF_VSOCK" , x "AF_KCM" , \
x "AF_QIPCRTR", x "AF_SMC" , x "AF_XDP" , \
+ x "AF_MCTP" , \
x "AF_MAX"
static const char *const af_family_key_strings[AF_MAX+1] = {
@@ -331,6 +334,22 @@ int __sk_backlog_rcv(struct sock *sk, struct sk_buff *skb)
}
EXPORT_SYMBOL(__sk_backlog_rcv);
+void sk_error_report(struct sock *sk)
+{
+ sk->sk_error_report(sk);
+
+ switch (sk->sk_family) {
+ case AF_INET:
+ fallthrough;
+ case AF_INET6:
+ trace_inet_sk_error_report(sk);
+ break;
+ default:
+ break;
+ }
+}
+EXPORT_SYMBOL(sk_error_report);
+
static int sock_get_timeout(long timeo, void *optval, bool old_timeval)
{
struct __kernel_sock_timeval tv;
@@ -776,6 +795,103 @@ void sock_enable_timestamps(struct sock *sk)
}
EXPORT_SYMBOL(sock_enable_timestamps);
+void sock_set_timestamp(struct sock *sk, int optname, bool valbool)
+{
+ switch (optname) {
+ case SO_TIMESTAMP_OLD:
+ __sock_set_timestamps(sk, valbool, false, false);
+ break;
+ case SO_TIMESTAMP_NEW:
+ __sock_set_timestamps(sk, valbool, true, false);
+ break;
+ case SO_TIMESTAMPNS_OLD:
+ __sock_set_timestamps(sk, valbool, false, true);
+ break;
+ case SO_TIMESTAMPNS_NEW:
+ __sock_set_timestamps(sk, valbool, true, true);
+ break;
+ }
+}
+
+static int sock_timestamping_bind_phc(struct sock *sk, int phc_index)
+{
+ struct net *net = sock_net(sk);
+ struct net_device *dev = NULL;
+ bool match = false;
+ int *vclock_index;
+ int i, num;
+
+ if (sk->sk_bound_dev_if)
+ dev = dev_get_by_index(net, sk->sk_bound_dev_if);
+
+ if (!dev) {
+ pr_err("%s: sock not bind to device\n", __func__);
+ return -EOPNOTSUPP;
+ }
+
+ num = ethtool_get_phc_vclocks(dev, &vclock_index);
+ for (i = 0; i < num; i++) {
+ if (*(vclock_index + i) == phc_index) {
+ match = true;
+ break;
+ }
+ }
+
+ if (num > 0)
+ kfree(vclock_index);
+
+ if (!match)
+ return -EINVAL;
+
+ sk->sk_bind_phc = phc_index;
+
+ return 0;
+}
+
+int sock_set_timestamping(struct sock *sk, int optname,
+ struct so_timestamping timestamping)
+{
+ int val = timestamping.flags;
+ int ret;
+
+ if (val & ~SOF_TIMESTAMPING_MASK)
+ return -EINVAL;
+
+ if (val & SOF_TIMESTAMPING_OPT_ID &&
+ !(sk->sk_tsflags & SOF_TIMESTAMPING_OPT_ID)) {
+ if (sk->sk_protocol == IPPROTO_TCP &&
+ sk->sk_type == SOCK_STREAM) {
+ if ((1 << sk->sk_state) &
+ (TCPF_CLOSE | TCPF_LISTEN))
+ return -EINVAL;
+ sk->sk_tskey = tcp_sk(sk)->snd_una;
+ } else {
+ sk->sk_tskey = 0;
+ }
+ }
+
+ if (val & SOF_TIMESTAMPING_OPT_STATS &&
+ !(val & SOF_TIMESTAMPING_OPT_TSONLY))
+ return -EINVAL;
+
+ if (val & SOF_TIMESTAMPING_BIND_PHC) {
+ ret = sock_timestamping_bind_phc(sk, timestamping.bind_phc);
+ if (ret)
+ return ret;
+ }
+
+ sk->sk_tsflags = val;
+ sock_valbool_flag(sk, SOCK_TSTAMP_NEW, optname == SO_TIMESTAMPING_NEW);
+
+ if (val & SOF_TIMESTAMPING_RX_SOFTWARE)
+ sock_enable_timestamp(sk,
+ SOCK_TIMESTAMPING_RX_SOFTWARE);
+ else
+ sock_disable_timestamp(sk,
+ (1UL << SOCK_TIMESTAMPING_RX_SOFTWARE));
+ return 0;
+}
+
void sock_set_keepalive(struct sock *sk)
{
lock_sock(sk);
@@ -839,6 +955,7 @@ EXPORT_SYMBOL(sock_set_mark);
int sock_setsockopt(struct socket *sock, int level, int optname,
sockptr_t optval, unsigned int optlen)
{
+ struct so_timestamping timestamping;
struct sock_txtime sk_txtime;
struct sock *sk = sock->sk;
int val;
@@ -997,54 +1114,25 @@ set_sndbuf:
break;
case SO_TIMESTAMP_OLD:
- __sock_set_timestamps(sk, valbool, false, false);
- break;
case SO_TIMESTAMP_NEW:
- __sock_set_timestamps(sk, valbool, true, false);
- break;
case SO_TIMESTAMPNS_OLD:
- __sock_set_timestamps(sk, valbool, false, true);
- break;
case SO_TIMESTAMPNS_NEW:
- __sock_set_timestamps(sk, valbool, true, true);
+ sock_set_timestamp(sk, optname, valbool);
break;
+
case SO_TIMESTAMPING_NEW:
case SO_TIMESTAMPING_OLD:
- if (val & ~SOF_TIMESTAMPING_MASK) {
- ret = -EINVAL;
- break;
- }
-
- if (val & SOF_TIMESTAMPING_OPT_ID &&
- !(sk->sk_tsflags & SOF_TIMESTAMPING_OPT_ID)) {
- if (sk->sk_protocol == IPPROTO_TCP &&
- sk->sk_type == SOCK_STREAM) {
- if ((1 << sk->sk_state) &
- (TCPF_CLOSE | TCPF_LISTEN)) {
- ret = -EINVAL;
- break;
- }
- sk->sk_tskey = tcp_sk(sk)->snd_una;
- } else {
- sk->sk_tskey = 0;
+ if (optlen == sizeof(timestamping)) {
+ if (copy_from_sockptr(&timestamping, optval,
+ sizeof(timestamping))) {
+ ret = -EFAULT;
+ break;
}
+ } else {
+ memset(&timestamping, 0, sizeof(timestamping));
+ timestamping.flags = val;
}
-
- if (val & SOF_TIMESTAMPING_OPT_STATS &&
- !(val & SOF_TIMESTAMPING_OPT_TSONLY)) {
- ret = -EINVAL;
- break;
- }
-
- sk->sk_tsflags = val;
- sock_valbool_flag(sk, SOCK_TSTAMP_NEW, optname == SO_TIMESTAMPING_NEW);
-
- if (val & SOF_TIMESTAMPING_RX_SOFTWARE)
- sock_enable_timestamp(sk,
- SOCK_TIMESTAMPING_RX_SOFTWARE);
- else
- sock_disable_timestamp(sk,
- (1UL << SOCK_TIMESTAMPING_RX_SOFTWARE));
+ ret = sock_set_timestamping(sk, optname, timestamping);
break;
case SO_RCVLOWAT:
@@ -1172,7 +1260,7 @@ set_sndbuf:
if (val < 0)
ret = -EINVAL;
else
- sk->sk_ll_usec = val;
+ WRITE_ONCE(sk->sk_ll_usec, val);
}
break;
case SO_PREFER_BUSY_POLL:
@@ -1270,6 +1358,15 @@ set_sndbuf:
ret = sock_bindtoindex_locked(sk, val);
break;
+ case SO_BUF_LOCK:
+ if (val & ~SOCK_BUF_LOCK_MASK) {
+ ret = -EINVAL;
+ break;
+ }
+ sk->sk_userlocks = val | (sk->sk_userlocks &
+ ~SOCK_BUF_LOCK_MASK);
+ break;
+
default:
ret = -ENOPROTOOPT;
break;
@@ -1319,6 +1416,7 @@ int sock_getsockopt(struct socket *sock, int level, int optname,
struct __kernel_old_timeval tm;
struct __kernel_sock_timeval stm;
struct sock_txtime txtime;
+ struct so_timestamping timestamping;
} v;
int lv = sizeof(int);
@@ -1422,7 +1520,9 @@ int sock_getsockopt(struct socket *sock, int level, int optname,
break;
case SO_TIMESTAMPING_OLD:
- v.val = sk->sk_tsflags;
+ lv = sizeof(v.timestamping);
+ v.timestamping.flags = sk->sk_tsflags;
+ v.timestamping.bind_phc = sk->sk_bind_phc;
break;
case SO_RCVTIMEO_OLD:
@@ -1622,6 +1722,17 @@ int sock_getsockopt(struct socket *sock, int level, int optname,
v.val = sk->sk_bound_dev_if;
break;
+ case SO_NETNS_COOKIE:
+ lv = sizeof(u64);
+ if (len != lv)
+ return -EINVAL;
+ v.val64 = sock_net(sk)->net_cookie;
+ break;
+
+ case SO_BUF_LOCK:
+ v.val = sk->sk_userlocks & SOCK_BUF_LOCK_MASK;
+ break;
+
default:
/* We implement the SO_SNDLOWAT etc to not be settable
* (1003.1g 7).
@@ -2463,7 +2574,6 @@ static void sk_leave_memory_pressure(struct sock *sk)
}
}
-#define SKB_FRAG_PAGE_ORDER get_order(32768)
DEFINE_STATIC_KEY_FALSE(net_high_order_alloc_disable_key);
/**
@@ -2617,10 +2727,12 @@ int __sk_mem_raise_allocated(struct sock *sk, int size, int amt, int kind)
{
struct proto *prot = sk->sk_prot;
long allocated = sk_memory_allocated_add(sk, amt);
+ bool memcg_charge = mem_cgroup_sockets_enabled && sk->sk_memcg;
bool charged = true;
- if (mem_cgroup_sockets_enabled && sk->sk_memcg &&
- !(charged = mem_cgroup_charge_skmem(sk->sk_memcg, amt)))
+ if (memcg_charge &&
+ !(charged = mem_cgroup_charge_skmem(sk->sk_memcg, amt,
+ gfp_memcg_charge())))
goto suppress_allocation;
/* Under limit. */
@@ -2674,8 +2786,14 @@ suppress_allocation:
/* Fail only if socket is _under_ its sndbuf.
* In this case we cannot block, so that we have to fail.
*/
- if (sk->sk_wmem_queued + size >= sk->sk_sndbuf)
+ if (sk->sk_wmem_queued + size >= sk->sk_sndbuf) {
+ /* Force charge with __GFP_NOFAIL */
+ if (memcg_charge && !charged) {
+ mem_cgroup_charge_skmem(sk->sk_memcg, amt,
+ gfp_memcg_charge() | __GFP_NOFAIL);
+ }
return 1;
+ }
}
if (kind == SK_MEM_SEND || (kind == SK_MEM_RECV && charged))
@@ -2683,7 +2801,7 @@ suppress_allocation:
sk_memory_allocated_sub(sk, amt);
- if (mem_cgroup_sockets_enabled && sk->sk_memcg)
+ if (memcg_charge && charged)
mem_cgroup_uncharge_skmem(sk->sk_memcg, amt);
return 0;
diff --git a/net/core/sock_map.c b/net/core/sock_map.c
index 6f1b82b8ad49..e252b8ec2b85 100644
--- a/net/core/sock_map.c
+++ b/net/core/sock_map.c
@@ -48,7 +48,7 @@ static struct bpf_map *sock_map_alloc(union bpf_attr *attr)
bpf_map_init_from_attr(&stab->map, attr);
raw_spin_lock_init(&stab->lock);
- stab->sks = bpf_map_area_alloc(stab->map.max_entries *
+ stab->sks = bpf_map_area_alloc((u64) stab->map.max_entries *
sizeof(struct sock *),
stab->map.numa_node);
if (!stab->sks) {
@@ -211,8 +211,6 @@ out:
return psock;
}
-static bool sock_map_redirect_allowed(const struct sock *sk);
-
static int sock_map_link(struct bpf_map *map, struct sock *sk)
{
struct sk_psock_progs *progs = sock_map_progs(map);
@@ -223,13 +221,6 @@ static int sock_map_link(struct bpf_map *map, struct sock *sk)
struct sk_psock *psock;
int ret;
- /* Only sockets we can redirect into/from in BPF need to hold
- * refs to parser/verdict progs and have their sk_data_ready
- * and sk_write_space callbacks overridden.
- */
- if (!sock_map_redirect_allowed(sk))
- goto no_progs;
-
stream_verdict = READ_ONCE(progs->stream_verdict);
if (stream_verdict) {
stream_verdict = bpf_prog_inc_not_zero(stream_verdict);
@@ -264,7 +255,6 @@ static int sock_map_link(struct bpf_map *map, struct sock *sk)
}
}
-no_progs:
psock = sock_map_psock_get_checked(sk);
if (IS_ERR(psock)) {
ret = PTR_ERR(psock);
@@ -527,12 +517,6 @@ static bool sk_is_tcp(const struct sock *sk)
sk->sk_protocol == IPPROTO_TCP;
}
-static bool sk_is_udp(const struct sock *sk)
-{
- return sk->sk_type == SOCK_DGRAM &&
- sk->sk_protocol == IPPROTO_UDP;
-}
-
static bool sock_map_redirect_allowed(const struct sock *sk)
{
if (sk_is_tcp(sk))
@@ -550,10 +534,7 @@ static bool sock_map_sk_state_allowed(const struct sock *sk)
{
if (sk_is_tcp(sk))
return (1 << sk->sk_state) & (TCPF_ESTABLISHED | TCPF_LISTEN);
- else if (sk_is_udp(sk))
- return sk_hashed(sk);
-
- return false;
+ return true;
}
static int sock_hash_update_common(struct bpf_map *map, void *key,
@@ -1513,6 +1494,7 @@ void sock_map_unhash(struct sock *sk)
rcu_read_unlock();
saved_unhash(sk);
}
+EXPORT_SYMBOL_GPL(sock_map_unhash);
void sock_map_close(struct sock *sk, long timeout)
{
@@ -1536,6 +1518,7 @@ void sock_map_close(struct sock *sk, long timeout)
release_sock(sk);
saved_close(sk, timeout);
}
+EXPORT_SYMBOL_GPL(sock_map_close);
static int sock_map_iter_attach_target(struct bpf_prog *prog,
union bpf_iter_link_info *linfo,
diff --git a/net/core/sock_reuseport.c b/net/core/sock_reuseport.c
index b065f0a103ed..3f00a28fe762 100644
--- a/net/core/sock_reuseport.c
+++ b/net/core/sock_reuseport.c
@@ -6,6 +6,7 @@
* selecting the socket index from the array of available sockets.
*/
+#include <net/ip.h>
#include <net/sock_reuseport.h>
#include <linux/bpf.h>
#include <linux/idr.h>
@@ -17,6 +18,74 @@
DEFINE_SPINLOCK(reuseport_lock);
static DEFINE_IDA(reuseport_ida);
+static int reuseport_resurrect(struct sock *sk, struct sock_reuseport *old_reuse,
+ struct sock_reuseport *reuse, bool bind_inany);
+
+static int reuseport_sock_index(struct sock *sk,
+ const struct sock_reuseport *reuse,
+ bool closed)
+{
+ int left, right;
+
+ if (!closed) {
+ left = 0;
+ right = reuse->num_socks;
+ } else {
+ left = reuse->max_socks - reuse->num_closed_socks;
+ right = reuse->max_socks;
+ }
+
+ for (; left < right; left++)
+ if (reuse->socks[left] == sk)
+ return left;
+ return -1;
+}
+
+static void __reuseport_add_sock(struct sock *sk,
+ struct sock_reuseport *reuse)
+{
+ reuse->socks[reuse->num_socks] = sk;
+ /* paired with smp_rmb() in reuseport_(select|migrate)_sock() */
+ smp_wmb();
+ reuse->num_socks++;
+}
+
+static bool __reuseport_detach_sock(struct sock *sk,
+ struct sock_reuseport *reuse)
+{
+ int i = reuseport_sock_index(sk, reuse, false);
+
+ if (i == -1)
+ return false;
+
+ reuse->socks[i] = reuse->socks[reuse->num_socks - 1];
+ reuse->num_socks--;
+
+ return true;
+}
+
+static void __reuseport_add_closed_sock(struct sock *sk,
+ struct sock_reuseport *reuse)
+{
+ reuse->socks[reuse->max_socks - reuse->num_closed_socks - 1] = sk;
+ /* paired with READ_ONCE() in inet_csk_bind_conflict() */
+ WRITE_ONCE(reuse->num_closed_socks, reuse->num_closed_socks + 1);
+}
+
+static bool __reuseport_detach_closed_sock(struct sock *sk,
+ struct sock_reuseport *reuse)
+{
+ int i = reuseport_sock_index(sk, reuse, true);
+
+ if (i == -1)
+ return false;
+
+ reuse->socks[i] = reuse->socks[reuse->max_socks - reuse->num_closed_socks];
+ /* paired with READ_ONCE() in inet_csk_bind_conflict() */
+ WRITE_ONCE(reuse->num_closed_socks, reuse->num_closed_socks - 1);
+
+ return true;
+}
static struct sock_reuseport *__reuseport_alloc(unsigned int max_socks)
{
@@ -49,6 +118,12 @@ int reuseport_alloc(struct sock *sk, bool bind_inany)
reuse = rcu_dereference_protected(sk->sk_reuseport_cb,
lockdep_is_held(&reuseport_lock));
if (reuse) {
+ if (reuse->num_closed_socks) {
+ /* sk was shutdown()ed before */
+ ret = reuseport_resurrect(sk, reuse, NULL, bind_inany);
+ goto out;
+ }
+
/* Only set reuse->bind_inany if the bind_inany is true.
* Otherwise, it will overwrite the reuse->bind_inany
* which was set by the bind/hash path.
@@ -72,9 +147,9 @@ int reuseport_alloc(struct sock *sk, bool bind_inany)
}
reuse->reuseport_id = id;
+ reuse->bind_inany = bind_inany;
reuse->socks[0] = sk;
reuse->num_socks = 1;
- reuse->bind_inany = bind_inany;
rcu_assign_pointer(sk->sk_reuseport_cb, reuse);
out:
@@ -90,14 +165,30 @@ static struct sock_reuseport *reuseport_grow(struct sock_reuseport *reuse)
u32 more_socks_size, i;
more_socks_size = reuse->max_socks * 2U;
- if (more_socks_size > U16_MAX)
+ if (more_socks_size > U16_MAX) {
+ if (reuse->num_closed_socks) {
+ /* Make room by removing a closed sk.
+ * The child has already been migrated.
+ * Only reqsk left at this point.
+ */
+ struct sock *sk;
+
+ sk = reuse->socks[reuse->max_socks - reuse->num_closed_socks];
+ RCU_INIT_POINTER(sk->sk_reuseport_cb, NULL);
+ __reuseport_detach_closed_sock(sk, reuse);
+
+ return reuse;
+ }
+
return NULL;
+ }
more_reuse = __reuseport_alloc(more_socks_size);
if (!more_reuse)
return NULL;
more_reuse->num_socks = reuse->num_socks;
+ more_reuse->num_closed_socks = reuse->num_closed_socks;
more_reuse->prog = reuse->prog;
more_reuse->reuseport_id = reuse->reuseport_id;
more_reuse->bind_inany = reuse->bind_inany;
@@ -105,9 +196,13 @@ static struct sock_reuseport *reuseport_grow(struct sock_reuseport *reuse)
memcpy(more_reuse->socks, reuse->socks,
reuse->num_socks * sizeof(struct sock *));
+ memcpy(more_reuse->socks +
+ (more_reuse->max_socks - more_reuse->num_closed_socks),
+ reuse->socks + (reuse->max_socks - reuse->num_closed_socks),
+ reuse->num_closed_socks * sizeof(struct sock *));
more_reuse->synq_overflow_ts = READ_ONCE(reuse->synq_overflow_ts);
- for (i = 0; i < reuse->num_socks; ++i)
+ for (i = 0; i < reuse->max_socks; ++i)
rcu_assign_pointer(reuse->socks[i]->sk_reuseport_cb,
more_reuse);
@@ -152,13 +247,21 @@ int reuseport_add_sock(struct sock *sk, struct sock *sk2, bool bind_inany)
reuse = rcu_dereference_protected(sk2->sk_reuseport_cb,
lockdep_is_held(&reuseport_lock));
old_reuse = rcu_dereference_protected(sk->sk_reuseport_cb,
- lockdep_is_held(&reuseport_lock));
+ lockdep_is_held(&reuseport_lock));
+ if (old_reuse && old_reuse->num_closed_socks) {
+ /* sk was shutdown()ed before */
+ int err = reuseport_resurrect(sk, old_reuse, reuse, reuse->bind_inany);
+
+ spin_unlock_bh(&reuseport_lock);
+ return err;
+ }
+
if (old_reuse && old_reuse->num_socks != 1) {
spin_unlock_bh(&reuseport_lock);
return -EBUSY;
}
- if (reuse->num_socks == reuse->max_socks) {
+ if (reuse->num_socks + reuse->num_closed_socks == reuse->max_socks) {
reuse = reuseport_grow(reuse);
if (!reuse) {
spin_unlock_bh(&reuseport_lock);
@@ -166,10 +269,7 @@ int reuseport_add_sock(struct sock *sk, struct sock *sk2, bool bind_inany)
}
}
- reuse->socks[reuse->num_socks] = sk;
- /* paired with smp_rmb() in reuseport_select_sock() */
- smp_wmb();
- reuse->num_socks++;
+ __reuseport_add_sock(sk, reuse);
rcu_assign_pointer(sk->sk_reuseport_cb, reuse);
spin_unlock_bh(&reuseport_lock);
@@ -180,15 +280,77 @@ int reuseport_add_sock(struct sock *sk, struct sock *sk2, bool bind_inany)
}
EXPORT_SYMBOL(reuseport_add_sock);
+static int reuseport_resurrect(struct sock *sk, struct sock_reuseport *old_reuse,
+ struct sock_reuseport *reuse, bool bind_inany)
+{
+ if (old_reuse == reuse) {
+ /* If sk was in the same reuseport group, just pop sk out of
+ * the closed section and push sk into the listening section.
+ */
+ __reuseport_detach_closed_sock(sk, old_reuse);
+ __reuseport_add_sock(sk, old_reuse);
+ return 0;
+ }
+
+ if (!reuse) {
+ /* In bind()/listen() path, we cannot carry over the eBPF prog
+ * for the shutdown()ed socket. In setsockopt() path, we should
+ * not change the eBPF prog of listening sockets by attaching a
+ * prog to the shutdown()ed socket. Thus, we will allocate a new
+ * reuseport group and detach sk from the old group.
+ */
+ int id;
+
+ reuse = __reuseport_alloc(INIT_SOCKS);
+ if (!reuse)
+ return -ENOMEM;
+
+ id = ida_alloc(&reuseport_ida, GFP_ATOMIC);
+ if (id < 0) {
+ kfree(reuse);
+ return id;
+ }
+
+ reuse->reuseport_id = id;
+ reuse->bind_inany = bind_inany;
+ } else {
+ /* Move sk from the old group to the new one if
+ * - all the other listeners in the old group were close()d or
+ * shutdown()ed, and then sk2 has listen()ed on the same port
+ * OR
+ * - sk listen()ed without bind() (or with autobind), was
+ * shutdown()ed, and then listen()s on another port which
+ * sk2 listen()s on.
+ */
+ if (reuse->num_socks + reuse->num_closed_socks == reuse->max_socks) {
+ reuse = reuseport_grow(reuse);
+ if (!reuse)
+ return -ENOMEM;
+ }
+ }
+
+ __reuseport_detach_closed_sock(sk, old_reuse);
+ __reuseport_add_sock(sk, reuse);
+ rcu_assign_pointer(sk->sk_reuseport_cb, reuse);
+
+ if (old_reuse->num_socks + old_reuse->num_closed_socks == 0)
+ call_rcu(&old_reuse->rcu, reuseport_free_rcu);
+
+ return 0;
+}
+
void reuseport_detach_sock(struct sock *sk)
{
struct sock_reuseport *reuse;
- int i;
spin_lock_bh(&reuseport_lock);
reuse = rcu_dereference_protected(sk->sk_reuseport_cb,
lockdep_is_held(&reuseport_lock));
+ /* reuseport_grow() has detached a closed sk */
+ if (!reuse)
+ goto out;
+
/* Notify the bpf side. The sk may be added to a sockarray
* map. If so, sockarray logic will remove it from the map.
*
@@ -201,19 +363,52 @@ void reuseport_detach_sock(struct sock *sk)
rcu_assign_pointer(sk->sk_reuseport_cb, NULL);
- for (i = 0; i < reuse->num_socks; i++) {
- if (reuse->socks[i] == sk) {
- reuse->socks[i] = reuse->socks[reuse->num_socks - 1];
- reuse->num_socks--;
- if (reuse->num_socks == 0)
- call_rcu(&reuse->rcu, reuseport_free_rcu);
- break;
- }
- }
+ if (!__reuseport_detach_closed_sock(sk, reuse))
+ __reuseport_detach_sock(sk, reuse);
+
+ if (reuse->num_socks + reuse->num_closed_socks == 0)
+ call_rcu(&reuse->rcu, reuseport_free_rcu);
+
+out:
spin_unlock_bh(&reuseport_lock);
}
EXPORT_SYMBOL(reuseport_detach_sock);
+void reuseport_stop_listen_sock(struct sock *sk)
+{
+ if (sk->sk_protocol == IPPROTO_TCP) {
+ struct sock_reuseport *reuse;
+ struct bpf_prog *prog;
+
+ spin_lock_bh(&reuseport_lock);
+
+ reuse = rcu_dereference_protected(sk->sk_reuseport_cb,
+ lockdep_is_held(&reuseport_lock));
+ prog = rcu_dereference_protected(reuse->prog,
+ lockdep_is_held(&reuseport_lock));
+
+ if (sock_net(sk)->ipv4.sysctl_tcp_migrate_req ||
+ (prog && prog->expected_attach_type == BPF_SK_REUSEPORT_SELECT_OR_MIGRATE)) {
+ /* Migration capable, move sk from the listening section
+ * to the closed section.
+ */
+ bpf_sk_reuseport_detach(sk);
+
+ __reuseport_detach_sock(sk, reuse);
+ __reuseport_add_closed_sock(sk, reuse);
+
+ spin_unlock_bh(&reuseport_lock);
+ return;
+ }
+
+ spin_unlock_bh(&reuseport_lock);
+ }
+
+ /* Not capable to do migration, detach immediately */
+ reuseport_detach_sock(sk);
+}
+EXPORT_SYMBOL(reuseport_stop_listen_sock);
+
static struct sock *run_bpf_filter(struct sock_reuseport *reuse, u16 socks,
struct bpf_prog *prog, struct sk_buff *skb,
int hdr_len)
@@ -244,6 +439,23 @@ static struct sock *run_bpf_filter(struct sock_reuseport *reuse, u16 socks,
return reuse->socks[index];
}
+static struct sock *reuseport_select_sock_by_hash(struct sock_reuseport *reuse,
+ u32 hash, u16 num_socks)
+{
+ int i, j;
+
+ i = j = reciprocal_scale(hash, num_socks);
+ while (reuse->socks[i]->sk_state == TCP_ESTABLISHED) {
+ i++;
+ if (i >= num_socks)
+ i = 0;
+ if (i == j)
+ return NULL;
+ }
+
+ return reuse->socks[i];
+}
+
/**
* reuseport_select_sock - Select a socket from an SO_REUSEPORT group.
* @sk: First socket in the group.
@@ -274,32 +486,21 @@ struct sock *reuseport_select_sock(struct sock *sk,
prog = rcu_dereference(reuse->prog);
socks = READ_ONCE(reuse->num_socks);
if (likely(socks)) {
- /* paired with smp_wmb() in reuseport_add_sock() */
+ /* paired with smp_wmb() in __reuseport_add_sock() */
smp_rmb();
if (!prog || !skb)
goto select_by_hash;
if (prog->type == BPF_PROG_TYPE_SK_REUSEPORT)
- sk2 = bpf_run_sk_reuseport(reuse, sk, prog, skb, hash);
+ sk2 = bpf_run_sk_reuseport(reuse, sk, prog, skb, NULL, hash);
else
sk2 = run_bpf_filter(reuse, socks, prog, skb, hdr_len);
select_by_hash:
/* no bpf or invalid bpf result: fall back to hash usage */
- if (!sk2) {
- int i, j;
-
- i = j = reciprocal_scale(hash, socks);
- while (reuse->socks[i]->sk_state == TCP_ESTABLISHED) {
- i++;
- if (i >= socks)
- i = 0;
- if (i == j)
- goto out;
- }
- sk2 = reuse->socks[i];
- }
+ if (!sk2)
+ sk2 = reuseport_select_sock_by_hash(reuse, hash, socks);
}
out:
@@ -308,14 +509,90 @@ out:
}
EXPORT_SYMBOL(reuseport_select_sock);
+/**
+ * reuseport_migrate_sock - Select a socket from an SO_REUSEPORT group.
+ * @sk: close()ed or shutdown()ed socket in the group.
+ * @migrating_sk: ESTABLISHED/SYN_RECV full socket in the accept queue or
+ * NEW_SYN_RECV request socket during 3WHS.
+ * @skb: skb to run through BPF filter.
+ * Returns a socket (with sk_refcnt +1) that should accept the child socket
+ * (or NULL on error).
+ */
+struct sock *reuseport_migrate_sock(struct sock *sk,
+ struct sock *migrating_sk,
+ struct sk_buff *skb)
+{
+ struct sock_reuseport *reuse;
+ struct sock *nsk = NULL;
+ bool allocated = false;
+ struct bpf_prog *prog;
+ u16 socks;
+ u32 hash;
+
+ rcu_read_lock();
+
+ reuse = rcu_dereference(sk->sk_reuseport_cb);
+ if (!reuse)
+ goto out;
+
+ socks = READ_ONCE(reuse->num_socks);
+ if (unlikely(!socks))
+ goto failure;
+
+ /* paired with smp_wmb() in __reuseport_add_sock() */
+ smp_rmb();
+
+ hash = migrating_sk->sk_hash;
+ prog = rcu_dereference(reuse->prog);
+ if (!prog || prog->expected_attach_type != BPF_SK_REUSEPORT_SELECT_OR_MIGRATE) {
+ if (sock_net(sk)->ipv4.sysctl_tcp_migrate_req)
+ goto select_by_hash;
+ goto failure;
+ }
+
+ if (!skb) {
+ skb = alloc_skb(0, GFP_ATOMIC);
+ if (!skb)
+ goto failure;
+ allocated = true;
+ }
+
+ nsk = bpf_run_sk_reuseport(reuse, sk, prog, skb, migrating_sk, hash);
+
+ if (allocated)
+ kfree_skb(skb);
+
+select_by_hash:
+ if (!nsk)
+ nsk = reuseport_select_sock_by_hash(reuse, hash, socks);
+
+ if (IS_ERR_OR_NULL(nsk) || unlikely(!refcount_inc_not_zero(&nsk->sk_refcnt))) {
+ nsk = NULL;
+ goto failure;
+ }
+
+out:
+ rcu_read_unlock();
+ return nsk;
+
+failure:
+ __NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMIGRATEREQFAILURE);
+ goto out;
+}
+EXPORT_SYMBOL(reuseport_migrate_sock);
+
int reuseport_attach_prog(struct sock *sk, struct bpf_prog *prog)
{
struct sock_reuseport *reuse;
struct bpf_prog *old_prog;
- if (sk_unhashed(sk) && sk->sk_reuseport) {
- int err = reuseport_alloc(sk, false);
+ if (sk_unhashed(sk)) {
+ int err;
+ if (!sk->sk_reuseport)
+ return -EINVAL;
+
+ err = reuseport_alloc(sk, false);
if (err)
return err;
} else if (!rcu_access_pointer(sk->sk_reuseport_cb)) {
@@ -341,13 +618,24 @@ int reuseport_detach_prog(struct sock *sk)
struct sock_reuseport *reuse;
struct bpf_prog *old_prog;
- if (!rcu_access_pointer(sk->sk_reuseport_cb))
- return sk->sk_reuseport ? -ENOENT : -EINVAL;
-
old_prog = NULL;
spin_lock_bh(&reuseport_lock);
reuse = rcu_dereference_protected(sk->sk_reuseport_cb,
lockdep_is_held(&reuseport_lock));
+
+ /* reuse must be checked after acquiring the reuseport_lock
+ * because reuseport_grow() can detach a closed sk.
+ */
+ if (!reuse) {
+ spin_unlock_bh(&reuseport_lock);
+ return sk->sk_reuseport ? -ENOENT : -EINVAL;
+ }
+
+ if (sk_unhashed(sk) && reuse->num_closed_socks) {
+ spin_unlock_bh(&reuseport_lock);
+ return -ENOENT;
+ }
+
old_prog = rcu_replace_pointer(reuse->prog, old_prog,
lockdep_is_held(&reuseport_lock));
spin_unlock_bh(&reuseport_lock);
diff --git a/net/core/xdp.c b/net/core/xdp.c
index 858276e72c68..cc92ccb38432 100644
--- a/net/core/xdp.c
+++ b/net/core/xdp.c
@@ -113,8 +113,13 @@ static void mem_allocator_disconnect(void *allocator)
void xdp_rxq_info_unreg_mem_model(struct xdp_rxq_info *xdp_rxq)
{
struct xdp_mem_allocator *xa;
+ int type = xdp_rxq->mem.type;
int id = xdp_rxq->mem.id;
+ /* Reset mem info to defaults */
+ xdp_rxq->mem.id = 0;
+ xdp_rxq->mem.type = 0;
+
if (xdp_rxq->reg_state != REG_STATE_REGISTERED) {
WARN(1, "Missing register, driver bug");
return;
@@ -123,7 +128,7 @@ void xdp_rxq_info_unreg_mem_model(struct xdp_rxq_info *xdp_rxq)
if (id == 0)
return;
- if (xdp_rxq->mem.type == MEM_TYPE_PAGE_POOL) {
+ if (type == MEM_TYPE_PAGE_POOL) {
rcu_read_lock();
xa = rhashtable_lookup(mem_id_ht, &id, mem_id_rht_params);
page_pool_destroy(xa->page_pool);
@@ -144,10 +149,6 @@ void xdp_rxq_info_unreg(struct xdp_rxq_info *xdp_rxq)
xdp_rxq->reg_state = REG_STATE_UNREGISTERED;
xdp_rxq->dev = NULL;
-
- /* Reset mem info to defaults */
- xdp_rxq->mem.id = 0;
- xdp_rxq->mem.type = 0;
}
EXPORT_SYMBOL_GPL(xdp_rxq_info_unreg);
@@ -584,3 +585,31 @@ struct sk_buff *xdp_build_skb_from_frame(struct xdp_frame *xdpf,
return __xdp_build_skb_from_frame(xdpf, skb, dev);
}
EXPORT_SYMBOL_GPL(xdp_build_skb_from_frame);
+
+struct xdp_frame *xdpf_clone(struct xdp_frame *xdpf)
+{
+ unsigned int headroom, totalsize;
+ struct xdp_frame *nxdpf;
+ struct page *page;
+ void *addr;
+
+ headroom = xdpf->headroom + sizeof(*xdpf);
+ totalsize = headroom + xdpf->len;
+
+ if (unlikely(totalsize > PAGE_SIZE))
+ return NULL;
+ page = dev_alloc_page();
+ if (!page)
+ return NULL;
+ addr = page_to_virt(page);
+
+ memcpy(addr, xdpf, totalsize);
+
+ nxdpf = addr;
+ nxdpf->data = addr + headroom;
+ nxdpf->frame_sz = PAGE_SIZE;
+ nxdpf->mem.type = MEM_TYPE_PAGE_ORDER0;
+ nxdpf->mem.id = 0;
+
+ return nxdpf;
+}
diff --git a/net/dcb/dcbnl.c b/net/dcb/dcbnl.c
index 653e3bc9c87b..b441ab330fd3 100644
--- a/net/dcb/dcbnl.c
+++ b/net/dcb/dcbnl.c
@@ -1381,7 +1381,7 @@ static int dcbnl_notify(struct net_device *dev, int event, int cmd,
skb = dcbnl_newmsg(event, cmd, portid, seq, 0, &nlh);
if (!skb)
- return -ENOBUFS;
+ return -ENOMEM;
if (dcbx_ver == DCB_CAP_DCBX_VER_IEEE)
err = dcbnl_ieee_fill(skb, dev);
@@ -1781,7 +1781,7 @@ static int dcb_doit(struct sk_buff *skb, struct nlmsghdr *nlh,
reply_skb = dcbnl_newmsg(fn->type, dcb->cmd, portid, nlh->nlmsg_seq,
nlh->nlmsg_flags, &reply_nlh);
if (!reply_skb)
- return -ENOBUFS;
+ return -ENOMEM;
ret = fn->cb(netdev, nlh, nlh->nlmsg_seq, tb, reply_skb);
if (ret < 0) {
@@ -2075,8 +2075,6 @@ EXPORT_SYMBOL(dcb_ieee_getapp_default_prio_mask);
static int __init dcbnl_init(void)
{
- INIT_LIST_HEAD(&dcb_app_list);
-
rtnl_register(PF_UNSPEC, RTM_GETDCB, dcb_doit, NULL, 0);
rtnl_register(PF_UNSPEC, RTM_SETDCB, dcb_doit, NULL, 0);
diff --git a/net/dccp/ccids/lib/tfrc_equation.c b/net/dccp/ccids/lib/tfrc_equation.c
index e2a337fa9ff7..92a8c6bea316 100644
--- a/net/dccp/ccids/lib/tfrc_equation.c
+++ b/net/dccp/ccids/lib/tfrc_equation.c
@@ -688,6 +688,7 @@ u32 tfrc_calc_x_reverse_lookup(u32 fvalue)
/**
* tfrc_invert_loss_event_rate - Compute p so that 10^6 corresponds to 100%
+ * @loss_event_rate: loss event rate to invert
* When @loss_event_rate is large, there is a chance that p is truncated to 0.
* To avoid re-entering slow-start in that case, we set p = TFRC_SMALLEST_P > 0.
*/
diff --git a/net/dccp/dccp.h b/net/dccp/dccp.h
index 9cc9d1ee6cdb..c5c1d2b8045e 100644
--- a/net/dccp/dccp.h
+++ b/net/dccp/dccp.h
@@ -41,9 +41,9 @@ extern bool dccp_debug;
#define dccp_pr_debug_cat(format, a...) DCCP_PRINTK(dccp_debug, format, ##a)
#define dccp_debug(fmt, a...) dccp_pr_debug_cat(KERN_DEBUG fmt, ##a)
#else
-#define dccp_pr_debug(format, a...)
-#define dccp_pr_debug_cat(format, a...)
-#define dccp_debug(format, a...)
+#define dccp_pr_debug(format, a...) do {} while (0)
+#define dccp_pr_debug_cat(format, a...) do {} while (0)
+#define dccp_debug(format, a...) do {} while (0)
#endif
extern struct inet_hashinfo dccp_hashinfo;
diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c
index ffc601a3b329..0ea29270d7e5 100644
--- a/net/dccp/ipv4.c
+++ b/net/dccp/ipv4.c
@@ -329,7 +329,7 @@ static int dccp_v4_err(struct sk_buff *skb, u32 info)
__DCCP_INC_STATS(DCCP_MIB_ATTEMPTFAILS);
sk->sk_err = err;
- sk->sk_error_report(sk);
+ sk_error_report(sk);
dccp_done(sk);
} else
@@ -356,7 +356,7 @@ static int dccp_v4_err(struct sk_buff *skb, u32 info)
inet = inet_sk(sk);
if (!sock_owned_by_user(sk) && inet->recverr) {
sk->sk_err = err;
- sk->sk_error_report(sk);
+ sk_error_report(sk);
} else /* Only an error on timeout */
sk->sk_err_soft = err;
out:
@@ -977,7 +977,6 @@ static const struct net_protocol dccp_v4_protocol = {
.handler = dccp_v4_rcv,
.err_handler = dccp_v4_err,
.no_policy = 1,
- .netns_ok = 1,
.icmp_strict_tag_validation = 1,
};
diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c
index 6f5304db5a67..fa663518fa0e 100644
--- a/net/dccp/ipv6.c
+++ b/net/dccp/ipv6.c
@@ -172,7 +172,7 @@ static int dccp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
* Wake people up to see the error
* (see connect in sock.c)
*/
- sk->sk_error_report(sk);
+ sk_error_report(sk);
dccp_done(sk);
} else
sk->sk_err_soft = err;
@@ -181,7 +181,7 @@ static int dccp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
if (!sock_owned_by_user(sk) && np->recverr) {
sk->sk_err = err;
- sk->sk_error_report(sk);
+ sk_error_report(sk);
} else
sk->sk_err_soft = err;
diff --git a/net/dccp/proto.c b/net/dccp/proto.c
index 6d705d90c614..abb5c596a817 100644
--- a/net/dccp/proto.c
+++ b/net/dccp/proto.c
@@ -302,7 +302,7 @@ int dccp_disconnect(struct sock *sk, int flags)
WARN_ON(inet->inet_num && !icsk->icsk_bind_hash);
- sk->sk_error_report(sk);
+ sk_error_report(sk);
return 0;
}
@@ -1126,7 +1126,7 @@ static int __init dccp_init(void)
dccp_hashinfo.bind_bucket_cachep =
kmem_cache_create("dccp_bind_bucket",
sizeof(struct inet_bind_bucket), 0,
- SLAB_HWCACHE_ALIGN, NULL);
+ SLAB_HWCACHE_ALIGN | SLAB_ACCOUNT, NULL);
if (!dccp_hashinfo.bind_bucket_cachep)
goto out_free_hashinfo2;
diff --git a/net/dccp/timer.c b/net/dccp/timer.c
index db768f223ef7..27a3b37acd2e 100644
--- a/net/dccp/timer.c
+++ b/net/dccp/timer.c
@@ -20,7 +20,7 @@ int sysctl_dccp_retries2 __read_mostly = TCP_RETR2;
static void dccp_write_err(struct sock *sk)
{
sk->sk_err = sk->sk_err_soft ? : ETIMEDOUT;
- sk->sk_error_report(sk);
+ sk_error_report(sk);
dccp_send_reset(sk, DCCP_RESET_CODE_ABORTED);
dccp_done(sk);
diff --git a/net/decnet/af_decnet.c b/net/decnet/af_decnet.c
index 5dbd45dc35ad..dc92a67baea3 100644
--- a/net/decnet/af_decnet.c
+++ b/net/decnet/af_decnet.c
@@ -816,7 +816,7 @@ static int dn_auto_bind(struct socket *sock)
static int dn_confirm_accept(struct sock *sk, long *timeo, gfp_t allocation)
{
struct dn_scp *scp = DN_SK(sk);
- DEFINE_WAIT(wait);
+ DEFINE_WAIT_FUNC(wait, woken_wake_function);
int err;
if (scp->state != DN_CR)
@@ -826,11 +826,11 @@ static int dn_confirm_accept(struct sock *sk, long *timeo, gfp_t allocation)
scp->segsize_loc = dst_metric_advmss(__sk_dst_get(sk));
dn_send_conn_conf(sk, allocation);
- prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
+ add_wait_queue(sk_sleep(sk), &wait);
for(;;) {
release_sock(sk);
if (scp->state == DN_CC)
- *timeo = schedule_timeout(*timeo);
+ *timeo = wait_woken(&wait, TASK_INTERRUPTIBLE, *timeo);
lock_sock(sk);
err = 0;
if (scp->state == DN_RUN)
@@ -844,9 +844,8 @@ static int dn_confirm_accept(struct sock *sk, long *timeo, gfp_t allocation)
err = -EAGAIN;
if (!*timeo)
break;
- prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
}
- finish_wait(sk_sleep(sk), &wait);
+ remove_wait_queue(sk_sleep(sk), &wait);
if (err == 0) {
sk->sk_socket->state = SS_CONNECTED;
} else if (scp->state != DN_CC) {
@@ -858,7 +857,7 @@ static int dn_confirm_accept(struct sock *sk, long *timeo, gfp_t allocation)
static int dn_wait_run(struct sock *sk, long *timeo)
{
struct dn_scp *scp = DN_SK(sk);
- DEFINE_WAIT(wait);
+ DEFINE_WAIT_FUNC(wait, woken_wake_function);
int err = 0;
if (scp->state == DN_RUN)
@@ -867,11 +866,11 @@ static int dn_wait_run(struct sock *sk, long *timeo)
if (!*timeo)
return -EALREADY;
- prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
+ add_wait_queue(sk_sleep(sk), &wait);
for(;;) {
release_sock(sk);
if (scp->state == DN_CI || scp->state == DN_CC)
- *timeo = schedule_timeout(*timeo);
+ *timeo = wait_woken(&wait, TASK_INTERRUPTIBLE, *timeo);
lock_sock(sk);
err = 0;
if (scp->state == DN_RUN)
@@ -885,9 +884,8 @@ static int dn_wait_run(struct sock *sk, long *timeo)
err = -ETIMEDOUT;
if (!*timeo)
break;
- prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
}
- finish_wait(sk_sleep(sk), &wait);
+ remove_wait_queue(sk_sleep(sk), &wait);
out:
if (err == 0) {
sk->sk_socket->state = SS_CONNECTED;
@@ -1032,16 +1030,16 @@ static void dn_user_copy(struct sk_buff *skb, struct optdata_dn *opt)
static struct sk_buff *dn_wait_for_connect(struct sock *sk, long *timeo)
{
- DEFINE_WAIT(wait);
+ DEFINE_WAIT_FUNC(wait, woken_wake_function);
struct sk_buff *skb = NULL;
int err = 0;
- prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
+ add_wait_queue(sk_sleep(sk), &wait);
for(;;) {
release_sock(sk);
skb = skb_dequeue(&sk->sk_receive_queue);
if (skb == NULL) {
- *timeo = schedule_timeout(*timeo);
+ *timeo = wait_woken(&wait, TASK_INTERRUPTIBLE, *timeo);
skb = skb_dequeue(&sk->sk_receive_queue);
}
lock_sock(sk);
@@ -1056,9 +1054,8 @@ static struct sk_buff *dn_wait_for_connect(struct sock *sk, long *timeo)
err = -EAGAIN;
if (!*timeo)
break;
- prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
}
- finish_wait(sk_sleep(sk), &wait);
+ remove_wait_queue(sk_sleep(sk), &wait);
return skb == NULL ? ERR_PTR(err) : skb;
}
diff --git a/net/decnet/dn_dev.c b/net/decnet/dn_dev.c
index d1c50a48614b..0ee7d4c0c955 100644
--- a/net/decnet/dn_dev.c
+++ b/net/decnet/dn_dev.c
@@ -521,8 +521,7 @@ int dn_dev_set_default(struct net_device *dev, int force)
}
spin_unlock(&dndev_lock);
- if (old)
- dev_put(old);
+ dev_put(old);
return rv;
}
@@ -536,8 +535,7 @@ static void dn_dev_check_default(struct net_device *dev)
}
spin_unlock(&dndev_lock);
- if (dev)
- dev_put(dev);
+ dev_put(dev);
}
/*
diff --git a/net/decnet/dn_fib.c b/net/decnet/dn_fib.c
index 77fbf8e9df4b..269c029ad74f 100644
--- a/net/decnet/dn_fib.c
+++ b/net/decnet/dn_fib.c
@@ -92,8 +92,7 @@ void dn_fib_free_info(struct dn_fib_info *fi)
}
change_nexthops(fi) {
- if (nh->nh_dev)
- dev_put(nh->nh_dev);
+ dev_put(nh->nh_dev);
nh->nh_dev = NULL;
} endfor_nexthops(fi);
kfree(fi);
@@ -102,7 +101,7 @@ void dn_fib_free_info(struct dn_fib_info *fi)
void dn_fib_release_info(struct dn_fib_info *fi)
{
spin_lock(&dn_fib_info_lock);
- if (fi && --fi->fib_treeref == 0) {
+ if (fi && refcount_dec_and_test(&fi->fib_treeref)) {
if (fi->fib_next)
fi->fib_next->fib_prev = fi->fib_prev;
if (fi->fib_prev)
@@ -385,11 +384,11 @@ link_it:
if ((ofi = dn_fib_find_info(fi)) != NULL) {
fi->fib_dead = 1;
dn_fib_free_info(fi);
- ofi->fib_treeref++;
+ refcount_inc(&ofi->fib_treeref);
return ofi;
}
- fi->fib_treeref++;
+ refcount_set(&fi->fib_treeref, 1);
refcount_set(&fi->fib_clntref, 1);
spin_lock(&dn_fib_info_lock);
fi->fib_next = dn_fib_info_list;
diff --git a/net/decnet/dn_nsp_in.c b/net/decnet/dn_nsp_in.c
index 1a12912b88d6..7ab788f41a3f 100644
--- a/net/decnet/dn_nsp_in.c
+++ b/net/decnet/dn_nsp_in.c
@@ -870,7 +870,7 @@ int dn_nsp_backlog_rcv(struct sock *sk, struct sk_buff *skb)
/*
* Read out ack data here, this applies equally
- * to data, other data, link serivce and both
+ * to data, other data, link service and both
* ack data and ack otherdata.
*/
dn_process_ack(sk, skb, other);
diff --git a/net/decnet/dn_nsp_out.c b/net/decnet/dn_nsp_out.c
index 00f2ed721ec1..eadc89583168 100644
--- a/net/decnet/dn_nsp_out.c
+++ b/net/decnet/dn_nsp_out.c
@@ -179,7 +179,7 @@ static void dn_nsp_rtt(struct sock *sk, long rtt)
scp->nsp_srtt = 1;
/*
- * Add new rtt varience to smoothed varience
+ * Add new rtt variance to smoothed varience
*/
delta >>= 1;
rttvar += ((((delta>0)?(delta):(-delta)) - rttvar) >> 2);
diff --git a/net/decnet/dn_route.c b/net/decnet/dn_route.c
index 32b1bed8ae51..7e85f2a1ae25 100644
--- a/net/decnet/dn_route.c
+++ b/net/decnet/dn_route.c
@@ -604,7 +604,7 @@ drop_it:
static int dn_route_discard(struct net *net, struct sock *sk, struct sk_buff *skb)
{
/*
- * I know we drop the packet here, but thats considered success in
+ * I know we drop the packet here, but that's considered success in
* this case
*/
kfree_skb(skb);
@@ -1026,8 +1026,7 @@ source_ok:
if (!fld.daddr) {
fld.daddr = fld.saddr;
- if (dev_out)
- dev_put(dev_out);
+ dev_put(dev_out);
err = -EINVAL;
dev_out = init_net.loopback_dev;
if (!dev_out->dn_ptr)
@@ -1084,8 +1083,7 @@ source_ok:
neigh_release(neigh);
neigh = NULL;
} else {
- if (dev_out)
- dev_put(dev_out);
+ dev_put(dev_out);
if (dn_dev_islocal(neigh->dev, fld.daddr)) {
dev_out = init_net.loopback_dev;
res.type = RTN_LOCAL;
@@ -1144,8 +1142,7 @@ select_source:
if (res.type == RTN_LOCAL) {
if (!fld.saddr)
fld.saddr = fld.daddr;
- if (dev_out)
- dev_put(dev_out);
+ dev_put(dev_out);
dev_out = init_net.loopback_dev;
dev_hold(dev_out);
if (!dev_out->dn_ptr)
@@ -1168,8 +1165,7 @@ select_source:
if (!fld.saddr)
fld.saddr = DN_FIB_RES_PREFSRC(res);
- if (dev_out)
- dev_put(dev_out);
+ dev_put(dev_out);
dev_out = DN_FIB_RES_DEV(res);
dev_hold(dev_out);
fld.flowidn_oif = dev_out->ifindex;
@@ -1222,8 +1218,7 @@ done:
neigh_release(neigh);
if (free_res)
dn_fib_res_put(&res);
- if (dev_out)
- dev_put(dev_out);
+ dev_put(dev_out);
out:
return err;
@@ -1503,8 +1498,7 @@ done:
if (free_res)
dn_fib_res_put(&res);
dev_put(in_dev);
- if (out_dev)
- dev_put(out_dev);
+ dev_put(out_dev);
out:
return err;
diff --git a/net/devres.c b/net/devres.c
index 1f9be2133787..5ccf6ca311dc 100644
--- a/net/devres.c
+++ b/net/devres.c
@@ -60,7 +60,7 @@ static int netdev_devres_match(struct device *dev, void *this, void *match_data)
* @ndev: device to register
*
* This is a devres variant of register_netdev() for which the unregister
- * function will be call automatically when the managing device is
+ * function will be called automatically when the managing device is
* detached. Note: the net_device used must also be resource managed by
* the same struct device.
*/
diff --git a/net/dsa/Kconfig b/net/dsa/Kconfig
index 00bb89b2d86f..548285539752 100644
--- a/net/dsa/Kconfig
+++ b/net/dsa/Kconfig
@@ -18,16 +18,6 @@ if NET_DSA
# Drivers must select the appropriate tagging format(s)
-config NET_DSA_TAG_8021Q
- tristate
- select VLAN_8021Q
- help
- Unlike the other tagging protocols, the 802.1Q config option simply
- provides helpers for other tagging implementations that might rely on
- VLAN in one way or another. It is not a complete solution.
-
- Drivers which use these helpers should select this as dependency.
-
config NET_DSA_TAG_AR9331
tristate "Tag driver for Atheros AR9331 SoC with built-in switch"
help
@@ -126,7 +116,6 @@ config NET_DSA_TAG_OCELOT_8021Q
tristate "Tag driver for Ocelot family of switches, using VLAN"
depends on MSCC_OCELOT_SWITCH_LIB || \
(MSCC_OCELOT_SWITCH_LIB=n && COMPILE_TEST)
- select NET_DSA_TAG_8021Q
help
Say Y or M if you want to enable support for tagging frames with a
custom VLAN-based header. Frames that require timestamping, such as
@@ -149,7 +138,7 @@ config NET_DSA_TAG_LAN9303
config NET_DSA_TAG_SJA1105
tristate "Tag driver for NXP SJA1105 switches"
- select NET_DSA_TAG_8021Q
+ depends on NET_DSA_SJA1105 || !NET_DSA_SJA1105
select PACKING
help
Say Y or M if you want to enable support for tagging frames with the
diff --git a/net/dsa/Makefile b/net/dsa/Makefile
index 44bc79952b8b..67ea009f242c 100644
--- a/net/dsa/Makefile
+++ b/net/dsa/Makefile
@@ -1,10 +1,9 @@
# SPDX-License-Identifier: GPL-2.0
# the core
obj-$(CONFIG_NET_DSA) += dsa_core.o
-dsa_core-y += dsa.o dsa2.o master.o port.o slave.o switch.o
+dsa_core-y += dsa.o dsa2.o master.o port.o slave.o switch.o tag_8021q.o
# tagging formats
-obj-$(CONFIG_NET_DSA_TAG_8021Q) += tag_8021q.o
obj-$(CONFIG_NET_DSA_TAG_AR9331) += tag_ar9331.o
obj-$(CONFIG_NET_DSA_TAG_BRCM_COMMON) += tag_brcm.o
obj-$(CONFIG_NET_DSA_TAG_DSA_COMMON) += tag_dsa.o
diff --git a/net/dsa/dsa.c b/net/dsa/dsa.c
index 84cad1be9ce4..1dc45e40f961 100644
--- a/net/dsa/dsa.c
+++ b/net/dsa/dsa.c
@@ -238,7 +238,7 @@ static int dsa_switch_rcv(struct sk_buff *skb, struct net_device *dev,
if (!skb)
return 0;
- nskb = cpu_dp->rcv(skb, dev, pt);
+ nskb = cpu_dp->rcv(skb, dev);
if (!nskb) {
kfree_skb(skb);
return 0;
diff --git a/net/dsa/dsa2.c b/net/dsa/dsa2.c
index b71e87909f0e..1b2b25d7bd02 100644
--- a/net/dsa/dsa2.c
+++ b/net/dsa/dsa2.c
@@ -21,6 +21,9 @@
static DEFINE_MUTEX(dsa2_mutex);
LIST_HEAD(dsa_tree_list);
+/* Track the bridges with forwarding offload enabled */
+static unsigned long dsa_fwd_offloading_bridges;
+
/**
* dsa_tree_notify - Execute code for all switches in a DSA switch tree.
* @dst: collection of struct dsa_switch devices to notify.
@@ -49,6 +52,9 @@ int dsa_tree_notify(struct dsa_switch_tree *dst, unsigned long e, void *v)
* Can be used to notify the switching fabric of events such as cross-chip
* bridging between disjoint trees (such as islands of tagger-compatible
* switches bridged by an incompatible middle switch).
+ *
+ * WARNING: this function is not reliable during probe time, because probing
+ * between trees is asynchronous and not all DSA trees might have probed.
*/
int dsa_broadcast(unsigned long e, void *v)
{
@@ -123,6 +129,51 @@ void dsa_lag_unmap(struct dsa_switch_tree *dst, struct net_device *lag)
}
}
+static int dsa_bridge_num_find(const struct net_device *bridge_dev)
+{
+ struct dsa_switch_tree *dst;
+ struct dsa_port *dp;
+
+ /* When preparing the offload for a port, it will have a valid
+ * dp->bridge_dev pointer but a not yet valid dp->bridge_num.
+ * However there might be other ports having the same dp->bridge_dev
+ * and a valid dp->bridge_num, so just ignore this port.
+ */
+ list_for_each_entry(dst, &dsa_tree_list, list)
+ list_for_each_entry(dp, &dst->ports, list)
+ if (dp->bridge_dev == bridge_dev &&
+ dp->bridge_num != -1)
+ return dp->bridge_num;
+
+ return -1;
+}
+
+int dsa_bridge_num_get(const struct net_device *bridge_dev, int max)
+{
+ int bridge_num = dsa_bridge_num_find(bridge_dev);
+
+ if (bridge_num < 0) {
+ /* First port that offloads TX forwarding for this bridge */
+ bridge_num = find_first_zero_bit(&dsa_fwd_offloading_bridges,
+ DSA_MAX_NUM_OFFLOADING_BRIDGES);
+ if (bridge_num >= max)
+ return -1;
+
+ set_bit(bridge_num, &dsa_fwd_offloading_bridges);
+ }
+
+ return bridge_num;
+}
+
+void dsa_bridge_num_put(const struct net_device *bridge_dev, int bridge_num)
+{
+ /* Check if the bridge is still in use, otherwise it is time
+ * to clean it up so we can reuse this bridge_num later.
+ */
+ if (!dsa_bridge_num_find(bridge_dev))
+ clear_bit(bridge_num, &dsa_fwd_offloading_bridges);
+}
+
struct dsa_switch *dsa_switch_find(int tree_index, int sw_index)
{
struct dsa_switch_tree *dst;
@@ -219,21 +270,6 @@ static void dsa_tree_put(struct dsa_switch_tree *dst)
kref_put(&dst->refcount, dsa_tree_release);
}
-static bool dsa_port_is_dsa(struct dsa_port *port)
-{
- return port->type == DSA_PORT_TYPE_DSA;
-}
-
-static bool dsa_port_is_cpu(struct dsa_port *port)
-{
- return port->type == DSA_PORT_TYPE_CPU;
-}
-
-static bool dsa_port_is_user(struct dsa_port *dp)
-{
- return dp->type == DSA_PORT_TYPE_USER;
-}
-
static struct dsa_port *dsa_tree_find_port_by_node(struct dsa_switch_tree *dst,
struct device_node *dn)
{
@@ -326,6 +362,9 @@ static struct dsa_port *dsa_tree_find_first_cpu(struct dsa_switch_tree *dst)
return NULL;
}
+/* Assign the default CPU port (the first one in the tree) to all ports of the
+ * fabric which don't already have one as part of their own switch.
+ */
static int dsa_tree_setup_default_cpu(struct dsa_switch_tree *dst)
{
struct dsa_port *cpu_dp, *dp;
@@ -336,15 +375,48 @@ static int dsa_tree_setup_default_cpu(struct dsa_switch_tree *dst)
return -EINVAL;
}
- /* Assign the default CPU port to all ports of the fabric */
- list_for_each_entry(dp, &dst->ports, list)
+ list_for_each_entry(dp, &dst->ports, list) {
+ if (dp->cpu_dp)
+ continue;
+
if (dsa_port_is_user(dp) || dsa_port_is_dsa(dp))
dp->cpu_dp = cpu_dp;
+ }
return 0;
}
-static void dsa_tree_teardown_default_cpu(struct dsa_switch_tree *dst)
+/* Perform initial assignment of CPU ports to user ports and DSA links in the
+ * fabric, giving preference to CPU ports local to each switch. Default to
+ * using the first CPU port in the switch tree if the port does not have a CPU
+ * port local to this switch.
+ */
+static int dsa_tree_setup_cpu_ports(struct dsa_switch_tree *dst)
+{
+ struct dsa_port *cpu_dp, *dp;
+
+ list_for_each_entry(cpu_dp, &dst->ports, list) {
+ if (!dsa_port_is_cpu(cpu_dp))
+ continue;
+
+ list_for_each_entry(dp, &dst->ports, list) {
+ /* Prefer a local CPU port */
+ if (dp->ds != cpu_dp->ds)
+ continue;
+
+ /* Prefer the first local CPU port found */
+ if (dp->cpu_dp)
+ continue;
+
+ if (dsa_port_is_user(dp) || dsa_port_is_dsa(dp))
+ dp->cpu_dp = cpu_dp;
+ }
+ }
+
+ return dsa_tree_setup_default_cpu(dst);
+}
+
+static void dsa_tree_teardown_cpu_ports(struct dsa_switch_tree *dst)
{
struct dsa_port *dp;
@@ -363,6 +435,9 @@ static int dsa_port_setup(struct dsa_port *dp)
if (dp->setup)
return 0;
+ INIT_LIST_HEAD(&dp->fdbs);
+ INIT_LIST_HEAD(&dp->mdbs);
+
switch (dp->type) {
case DSA_PORT_TYPE_UNUSED:
dsa_port_disable(dp);
@@ -458,6 +533,7 @@ static int dsa_port_devlink_setup(struct dsa_port *dp)
static void dsa_port_teardown(struct dsa_port *dp)
{
struct devlink_port *dlp = &dp->devlink_port;
+ struct dsa_mac_addr *a, *tmp;
if (!dp->setup)
return;
@@ -483,6 +559,16 @@ static void dsa_port_teardown(struct dsa_port *dp)
break;
}
+ list_for_each_entry_safe(a, tmp, &dp->fdbs, list) {
+ list_del(&a->list);
+ kfree(a);
+ }
+
+ list_for_each_entry_safe(a, tmp, &dp->mdbs, list) {
+ list_del(&a->list);
+ kfree(a);
+ }
+
dp->setup = false;
}
@@ -711,13 +797,14 @@ static int dsa_switch_setup(struct dsa_switch *ds)
/* Add the switch to devlink before calling setup, so that setup can
* add dpipe tables
*/
- ds->devlink = devlink_alloc(&dsa_devlink_ops, sizeof(*dl_priv));
+ ds->devlink =
+ devlink_alloc(&dsa_devlink_ops, sizeof(*dl_priv), ds->dev);
if (!ds->devlink)
return -ENOMEM;
dl_priv = devlink_priv(ds->devlink);
dl_priv->ds = ds;
- err = devlink_register(ds->devlink, ds->dev);
+ err = devlink_register(ds->devlink);
if (err)
goto free_devlink;
@@ -922,13 +1009,13 @@ static int dsa_tree_setup(struct dsa_switch_tree *dst)
if (!complete)
return 0;
- err = dsa_tree_setup_default_cpu(dst);
+ err = dsa_tree_setup_cpu_ports(dst);
if (err)
return err;
err = dsa_tree_setup_switches(dst);
if (err)
- goto teardown_default_cpu;
+ goto teardown_cpu_ports;
err = dsa_tree_setup_master(dst);
if (err)
@@ -948,8 +1035,8 @@ teardown_master:
dsa_tree_teardown_master(dst);
teardown_switches:
dsa_tree_teardown_switches(dst);
-teardown_default_cpu:
- dsa_tree_teardown_default_cpu(dst);
+teardown_cpu_ports:
+ dsa_tree_teardown_cpu_ports(dst);
return err;
}
@@ -967,7 +1054,7 @@ static void dsa_tree_teardown(struct dsa_switch_tree *dst)
dsa_tree_teardown_switches(dst);
- dsa_tree_teardown_default_cpu(dst);
+ dsa_tree_teardown_cpu_ports(dst);
list_for_each_entry_safe(dl, next, &dst->rtable, list) {
list_del(&dl->list);
@@ -1045,6 +1132,7 @@ static struct dsa_port *dsa_port_touch(struct dsa_switch *ds, int index)
dp->ds = ds;
dp->index = index;
+ dp->bridge_num = -1;
INIT_LIST_HEAD(&dp->list);
list_add_tail(&dp->list, &dst->ports);
@@ -1259,6 +1347,16 @@ static int dsa_switch_parse_member_of(struct dsa_switch *ds,
if (!ds->dst)
return -ENOMEM;
+ if (dsa_switch_find(ds->dst->index, ds->index)) {
+ dev_err(ds->dev,
+ "A DSA switch with index %d already exists in tree %d\n",
+ ds->index, ds->dst->index);
+ return -EEXIST;
+ }
+
+ if (ds->dst->last_switch < ds->index)
+ ds->dst->last_switch = ds->index;
+
return 0;
}
diff --git a/net/dsa/dsa_priv.h b/net/dsa/dsa_priv.h
index 92282de54230..33ab7d7af9eb 100644
--- a/net/dsa/dsa_priv.h
+++ b/net/dsa/dsa_priv.h
@@ -14,12 +14,16 @@
#include <net/dsa.h>
#include <net/gro_cells.h>
+#define DSA_MAX_NUM_OFFLOADING_BRIDGES BITS_PER_LONG
+
enum {
DSA_NOTIFIER_AGEING_TIME,
DSA_NOTIFIER_BRIDGE_JOIN,
DSA_NOTIFIER_BRIDGE_LEAVE,
DSA_NOTIFIER_FDB_ADD,
DSA_NOTIFIER_FDB_DEL,
+ DSA_NOTIFIER_HOST_FDB_ADD,
+ DSA_NOTIFIER_HOST_FDB_DEL,
DSA_NOTIFIER_HSR_JOIN,
DSA_NOTIFIER_HSR_LEAVE,
DSA_NOTIFIER_LAG_CHANGE,
@@ -27,6 +31,8 @@ enum {
DSA_NOTIFIER_LAG_LEAVE,
DSA_NOTIFIER_MDB_ADD,
DSA_NOTIFIER_MDB_DEL,
+ DSA_NOTIFIER_HOST_MDB_ADD,
+ DSA_NOTIFIER_HOST_MDB_DEL,
DSA_NOTIFIER_VLAN_ADD,
DSA_NOTIFIER_VLAN_DEL,
DSA_NOTIFIER_MTU,
@@ -35,6 +41,8 @@ enum {
DSA_NOTIFIER_MRP_DEL,
DSA_NOTIFIER_MRP_ADD_RING_ROLE,
DSA_NOTIFIER_MRP_DEL_RING_ROLE,
+ DSA_NOTIFIER_TAG_8021Q_VLAN_ADD,
+ DSA_NOTIFIER_TAG_8021Q_VLAN_DEL,
};
/* DSA_NOTIFIER_AGEING_TIME */
@@ -84,7 +92,7 @@ struct dsa_notifier_vlan_info {
/* DSA_NOTIFIER_MTU */
struct dsa_notifier_mtu_info {
- bool propagate_upstream;
+ bool targeted_match;
int sw_index;
int port;
int mtu;
@@ -109,9 +117,18 @@ struct dsa_notifier_mrp_ring_role_info {
int port;
};
+/* DSA_NOTIFIER_TAG_8021Q_VLAN_* */
+struct dsa_notifier_tag_8021q_vlan_info {
+ int tree_index;
+ int sw_index;
+ int port;
+ u16 vid;
+};
+
struct dsa_switchdev_event_work {
struct dsa_switch *ds;
int port;
+ struct net_device *dev;
struct work_struct work;
unsigned long event;
/* Specific for SWITCHDEV_FDB_ADD_TO_DEVICE and
@@ -119,6 +136,7 @@ struct dsa_switchdev_event_work {
*/
unsigned char addr[ETH_ALEN];
u16 vid;
+ bool host_addr;
};
/* DSA_NOTIFIER_HSR_* */
@@ -154,6 +172,11 @@ const struct dsa_device_ops *dsa_find_tagger_by_name(const char *buf);
bool dsa_schedule_work(struct work_struct *work);
const char *dsa_tag_protocol_to_str(const struct dsa_device_ops *ops);
+static inline int dsa_tag_protocol_overhead(const struct dsa_device_ops *ops)
+{
+ return ops->needed_headroom + ops->needed_tailroom;
+}
+
/* master.c */
int dsa_master_setup(struct net_device *dev, struct dsa_port *cpu_dp);
void dsa_master_teardown(struct net_device *dev);
@@ -176,43 +199,51 @@ static inline struct net_device *dsa_master_find_slave(struct net_device *dev,
/* port.c */
void dsa_port_set_tag_protocol(struct dsa_port *cpu_dp,
const struct dsa_device_ops *tag_ops);
-int dsa_port_set_state(struct dsa_port *dp, u8 state);
+int dsa_port_set_state(struct dsa_port *dp, u8 state, bool do_fast_age);
int dsa_port_enable_rt(struct dsa_port *dp, struct phy_device *phy);
int dsa_port_enable(struct dsa_port *dp, struct phy_device *phy);
void dsa_port_disable_rt(struct dsa_port *dp);
void dsa_port_disable(struct dsa_port *dp);
int dsa_port_bridge_join(struct dsa_port *dp, struct net_device *br,
struct netlink_ext_ack *extack);
+void dsa_port_pre_bridge_leave(struct dsa_port *dp, struct net_device *br);
void dsa_port_bridge_leave(struct dsa_port *dp, struct net_device *br);
int dsa_port_lag_change(struct dsa_port *dp,
struct netdev_lag_lower_state_info *linfo);
int dsa_port_lag_join(struct dsa_port *dp, struct net_device *lag_dev,
struct netdev_lag_upper_info *uinfo,
struct netlink_ext_ack *extack);
+void dsa_port_pre_lag_leave(struct dsa_port *dp, struct net_device *lag_dev);
void dsa_port_lag_leave(struct dsa_port *dp, struct net_device *lag_dev);
int dsa_port_vlan_filtering(struct dsa_port *dp, bool vlan_filtering,
struct netlink_ext_ack *extack);
bool dsa_port_skip_vlan_configuration(struct dsa_port *dp);
int dsa_port_ageing_time(struct dsa_port *dp, clock_t ageing_clock);
int dsa_port_mtu_change(struct dsa_port *dp, int new_mtu,
- bool propagate_upstream);
+ bool targeted_match);
int dsa_port_fdb_add(struct dsa_port *dp, const unsigned char *addr,
u16 vid);
int dsa_port_fdb_del(struct dsa_port *dp, const unsigned char *addr,
u16 vid);
+int dsa_port_host_fdb_add(struct dsa_port *dp, const unsigned char *addr,
+ u16 vid);
+int dsa_port_host_fdb_del(struct dsa_port *dp, const unsigned char *addr,
+ u16 vid);
int dsa_port_fdb_dump(struct dsa_port *dp, dsa_fdb_dump_cb_t *cb, void *data);
int dsa_port_mdb_add(const struct dsa_port *dp,
const struct switchdev_obj_port_mdb *mdb);
int dsa_port_mdb_del(const struct dsa_port *dp,
const struct switchdev_obj_port_mdb *mdb);
+int dsa_port_host_mdb_add(const struct dsa_port *dp,
+ const struct switchdev_obj_port_mdb *mdb);
+int dsa_port_host_mdb_del(const struct dsa_port *dp,
+ const struct switchdev_obj_port_mdb *mdb);
int dsa_port_pre_bridge_flags(const struct dsa_port *dp,
struct switchdev_brport_flags flags,
struct netlink_ext_ack *extack);
-int dsa_port_bridge_flags(const struct dsa_port *dp,
+int dsa_port_bridge_flags(struct dsa_port *dp,
struct switchdev_brport_flags flags,
struct netlink_ext_ack *extack);
-int dsa_port_mrouter(struct dsa_port *dp, bool mrouter,
- struct netlink_ext_ack *extack);
int dsa_port_vlan_add(struct dsa_port *dp,
const struct switchdev_obj_port_vlan *vlan,
struct netlink_ext_ack *extack);
@@ -230,16 +261,18 @@ int dsa_port_link_register_of(struct dsa_port *dp);
void dsa_port_link_unregister_of(struct dsa_port *dp);
int dsa_port_hsr_join(struct dsa_port *dp, struct net_device *hsr);
void dsa_port_hsr_leave(struct dsa_port *dp, struct net_device *hsr);
+int dsa_port_tag_8021q_vlan_add(struct dsa_port *dp, u16 vid, bool broadcast);
+void dsa_port_tag_8021q_vlan_del(struct dsa_port *dp, u16 vid, bool broadcast);
extern const struct phylink_mac_ops dsa_port_phylink_mac_ops;
static inline bool dsa_port_offloads_bridge_port(struct dsa_port *dp,
- struct net_device *dev)
+ const struct net_device *dev)
{
return dsa_port_to_bridge_port(dp) == dev;
}
static inline bool dsa_port_offloads_bridge(struct dsa_port *dp,
- struct net_device *bridge_dev)
+ const struct net_device *bridge_dev)
{
/* DSA ports connected to a bridge, and event was emitted
* for the bridge.
@@ -249,7 +282,7 @@ static inline bool dsa_port_offloads_bridge(struct dsa_port *dp,
/* Returns true if any port of this tree offloads the given net_device */
static inline bool dsa_tree_offloads_bridge_port(struct dsa_switch_tree *dst,
- struct net_device *dev)
+ const struct net_device *dev)
{
struct dsa_port *dp;
@@ -260,6 +293,19 @@ static inline bool dsa_tree_offloads_bridge_port(struct dsa_switch_tree *dst,
return false;
}
+/* Returns true if any port of this tree offloads the given bridge */
+static inline bool dsa_tree_offloads_bridge(struct dsa_switch_tree *dst,
+ const struct net_device *bridge_dev)
+{
+ struct dsa_port *dp;
+
+ list_for_each_entry(dp, &dst->ports, list)
+ if (dsa_port_offloads_bridge(dp, bridge_dev))
+ return true;
+
+ return false;
+}
+
/* slave.c */
extern const struct dsa_device_ops notag_netdev_ops;
extern struct notifier_block dsa_slave_switchdev_notifier;
@@ -274,6 +320,8 @@ int dsa_slave_register_notifier(void);
void dsa_slave_unregister_notifier(void);
void dsa_slave_setup_tagger(struct net_device *slave);
int dsa_slave_change_mtu(struct net_device *dev, int new_mtu);
+int dsa_slave_manage_vlan_filtering(struct net_device *dev,
+ bool vlan_filtering);
static inline struct dsa_port *dsa_slave_to_port(const struct net_device *dev)
{
@@ -349,6 +397,141 @@ static inline struct sk_buff *dsa_untag_bridge_pvid(struct sk_buff *skb)
return skb;
}
+/* For switches without hardware support for DSA tagging to be able
+ * to support termination through the bridge.
+ */
+static inline struct net_device *
+dsa_find_designated_bridge_port_by_vid(struct net_device *master, u16 vid)
+{
+ struct dsa_port *cpu_dp = master->dsa_ptr;
+ struct dsa_switch_tree *dst = cpu_dp->dst;
+ struct bridge_vlan_info vinfo;
+ struct net_device *slave;
+ struct dsa_port *dp;
+ int err;
+
+ list_for_each_entry(dp, &dst->ports, list) {
+ if (dp->type != DSA_PORT_TYPE_USER)
+ continue;
+
+ if (!dp->bridge_dev)
+ continue;
+
+ if (dp->stp_state != BR_STATE_LEARNING &&
+ dp->stp_state != BR_STATE_FORWARDING)
+ continue;
+
+ /* Since the bridge might learn this packet, keep the CPU port
+ * affinity with the port that will be used for the reply on
+ * xmit.
+ */
+ if (dp->cpu_dp != cpu_dp)
+ continue;
+
+ slave = dp->slave;
+
+ err = br_vlan_get_info_rcu(slave, vid, &vinfo);
+ if (err)
+ continue;
+
+ return slave;
+ }
+
+ return NULL;
+}
+
+/* If the ingress port offloads the bridge, we mark the frame as autonomously
+ * forwarded by hardware, so the software bridge doesn't forward in twice, back
+ * to us, because we already did. However, if we're in fallback mode and we do
+ * software bridging, we are not offloading it, therefore the dp->bridge_dev
+ * pointer is not populated, and flooding needs to be done by software (we are
+ * effectively operating in standalone ports mode).
+ */
+static inline void dsa_default_offload_fwd_mark(struct sk_buff *skb)
+{
+ struct dsa_port *dp = dsa_slave_to_port(skb->dev);
+
+ skb->offload_fwd_mark = !!(dp->bridge_dev);
+}
+
+/* Helper for removing DSA header tags from packets in the RX path.
+ * Must not be called before skb_pull(len).
+ * skb->data
+ * |
+ * v
+ * | | | | | | | | | | | | | | | | | | |
+ * +-----------------------+-----------------------+---------------+-------+
+ * | Destination MAC | Source MAC | DSA header | EType |
+ * +-----------------------+-----------------------+---------------+-------+
+ * | |
+ * <----- len -----> <----- len ----->
+ * |
+ * >>>>>>> v
+ * >>>>>>> | | | | | | | | | | | | | | |
+ * >>>>>>> +-----------------------+-----------------------+-------+
+ * >>>>>>> | Destination MAC | Source MAC | EType |
+ * +-----------------------+-----------------------+-------+
+ * ^
+ * |
+ * skb->data
+ */
+static inline void dsa_strip_etype_header(struct sk_buff *skb, int len)
+{
+ memmove(skb->data - ETH_HLEN, skb->data - ETH_HLEN - len, 2 * ETH_ALEN);
+}
+
+/* Helper for creating space for DSA header tags in TX path packets.
+ * Must not be called before skb_push(len).
+ *
+ * Before:
+ *
+ * <<<<<<< | | | | | | | | | | | | | | |
+ * ^ <<<<<<< +-----------------------+-----------------------+-------+
+ * | <<<<<<< | Destination MAC | Source MAC | EType |
+ * | +-----------------------+-----------------------+-------+
+ * <----- len ----->
+ * |
+ * |
+ * skb->data
+ *
+ * After:
+ *
+ * | | | | | | | | | | | | | | | | | | |
+ * +-----------------------+-----------------------+---------------+-------+
+ * | Destination MAC | Source MAC | DSA header | EType |
+ * +-----------------------+-----------------------+---------------+-------+
+ * ^ | |
+ * | <----- len ----->
+ * skb->data
+ */
+static inline void dsa_alloc_etype_header(struct sk_buff *skb, int len)
+{
+ memmove(skb->data, skb->data + len, 2 * ETH_ALEN);
+}
+
+/* On RX, eth_type_trans() on the DSA master pulls ETH_HLEN bytes starting from
+ * skb_mac_header(skb), which leaves skb->data pointing at the first byte after
+ * what the DSA master perceives as the EtherType (the beginning of the L3
+ * protocol). Since DSA EtherType header taggers treat the EtherType as part of
+ * the DSA tag itself, and the EtherType is 2 bytes in length, the DSA header
+ * is located 2 bytes behind skb->data. Note that EtherType in this context
+ * means the first 2 bytes of the DSA header, not the encapsulated EtherType
+ * that will become visible after the DSA header is stripped.
+ */
+static inline void *dsa_etype_header_pos_rx(struct sk_buff *skb)
+{
+ return skb->data - 2;
+}
+
+/* On TX, skb->data points to skb_mac_header(skb), which means that EtherType
+ * header taggers start exactly where the EtherType is (the EtherType is
+ * treated as part of the DSA header).
+ */
+static inline void *dsa_etype_header_pos_tx(struct sk_buff *skb)
+{
+ return skb->data + 2 * ETH_ALEN;
+}
+
/* switch.c */
int dsa_switch_register_notifier(struct dsa_switch *ds);
void dsa_switch_unregister_notifier(struct dsa_switch *ds);
@@ -362,6 +545,18 @@ int dsa_tree_change_tag_proto(struct dsa_switch_tree *dst,
struct net_device *master,
const struct dsa_device_ops *tag_ops,
const struct dsa_device_ops *old_tag_ops);
+int dsa_bridge_num_get(const struct net_device *bridge_dev, int max);
+void dsa_bridge_num_put(const struct net_device *bridge_dev, int bridge_num);
+
+/* tag_8021q.c */
+int dsa_tag_8021q_bridge_join(struct dsa_switch *ds,
+ struct dsa_notifier_bridge_info *info);
+int dsa_tag_8021q_bridge_leave(struct dsa_switch *ds,
+ struct dsa_notifier_bridge_info *info);
+int dsa_switch_tag_8021q_vlan_add(struct dsa_switch *ds,
+ struct dsa_notifier_tag_8021q_vlan_info *info);
+int dsa_switch_tag_8021q_vlan_del(struct dsa_switch *ds,
+ struct dsa_notifier_tag_8021q_vlan_info *info);
extern struct list_head dsa_tree_list;
diff --git a/net/dsa/master.c b/net/dsa/master.c
index 63adbc21a735..e8e19857621b 100644
--- a/net/dsa/master.c
+++ b/net/dsa/master.c
@@ -210,14 +210,14 @@ static int dsa_master_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
break;
}
- if (dev->netdev_ops->ndo_do_ioctl)
- err = dev->netdev_ops->ndo_do_ioctl(dev, ifr, cmd);
+ if (dev->netdev_ops->ndo_eth_ioctl)
+ err = dev->netdev_ops->ndo_eth_ioctl(dev, ifr, cmd);
return err;
}
static const struct dsa_netdevice_ops dsa_netdev_ops = {
- .ndo_do_ioctl = dsa_master_ioctl,
+ .ndo_eth_ioctl = dsa_master_ioctl,
};
static int dsa_master_ethtool_setup(struct net_device *dev)
@@ -346,10 +346,12 @@ static struct lock_class_key dsa_master_addr_list_lock_key;
int dsa_master_setup(struct net_device *dev, struct dsa_port *cpu_dp)
{
- int mtu = ETH_DATA_LEN + cpu_dp->tag_ops->overhead;
+ const struct dsa_device_ops *tag_ops = cpu_dp->tag_ops;
struct dsa_switch *ds = cpu_dp->ds;
struct device_link *consumer_link;
- int ret;
+ int mtu, ret;
+
+ mtu = ETH_DATA_LEN + dsa_tag_protocol_overhead(tag_ops);
/* The DSA master must use SET_NETDEV_DEV for this to work. */
consumer_link = device_link_add(ds->dev, dev->dev.parent,
diff --git a/net/dsa/port.c b/net/dsa/port.c
index 6379d66a6bb3..616330a16d31 100644
--- a/net/dsa/port.c
+++ b/net/dsa/port.c
@@ -30,7 +30,52 @@ static int dsa_port_notify(const struct dsa_port *dp, unsigned long e, void *v)
return dsa_tree_notify(dp->ds->dst, e, v);
}
-int dsa_port_set_state(struct dsa_port *dp, u8 state)
+static void dsa_port_notify_bridge_fdb_flush(const struct dsa_port *dp)
+{
+ struct net_device *brport_dev = dsa_port_to_bridge_port(dp);
+ struct switchdev_notifier_fdb_info info = {
+ /* flush all VLANs */
+ .vid = 0,
+ };
+
+ /* When the port becomes standalone it has already left the bridge.
+ * Don't notify the bridge in that case.
+ */
+ if (!brport_dev)
+ return;
+
+ call_switchdev_notifiers(SWITCHDEV_FDB_FLUSH_TO_BRIDGE,
+ brport_dev, &info.info, NULL);
+}
+
+static void dsa_port_fast_age(const struct dsa_port *dp)
+{
+ struct dsa_switch *ds = dp->ds;
+
+ if (!ds->ops->port_fast_age)
+ return;
+
+ ds->ops->port_fast_age(ds, dp->index);
+
+ dsa_port_notify_bridge_fdb_flush(dp);
+}
+
+static bool dsa_port_can_configure_learning(struct dsa_port *dp)
+{
+ struct switchdev_brport_flags flags = {
+ .mask = BR_LEARNING,
+ };
+ struct dsa_switch *ds = dp->ds;
+ int err;
+
+ if (!ds->ops->port_bridge_flags || !ds->ops->port_pre_bridge_flags)
+ return false;
+
+ err = ds->ops->port_pre_bridge_flags(ds, dp->index, flags, NULL);
+ return !err;
+}
+
+int dsa_port_set_state(struct dsa_port *dp, u8 state, bool do_fast_age)
{
struct dsa_switch *ds = dp->ds;
int port = dp->index;
@@ -40,10 +85,14 @@ int dsa_port_set_state(struct dsa_port *dp, u8 state)
ds->ops->port_stp_state_set(ds, port, state);
- if (ds->ops->port_fast_age) {
+ if (!dsa_port_can_configure_learning(dp) ||
+ (do_fast_age && dp->learning)) {
/* Fast age FDB entries or flush appropriate forwarding database
* for the given port, if we are moving it from Learning or
* Forwarding state, to Disabled or Blocking or Listening state.
+ * Ports that were standalone before the STP state change don't
+ * need to fast age the FDB, since address learning is off in
+ * standalone mode.
*/
if ((dp->stp_state == BR_STATE_LEARNING ||
@@ -51,7 +100,7 @@ int dsa_port_set_state(struct dsa_port *dp, u8 state)
(state == BR_STATE_DISABLED ||
state == BR_STATE_BLOCKING ||
state == BR_STATE_LISTENING))
- ds->ops->port_fast_age(ds, port);
+ dsa_port_fast_age(dp);
}
dp->stp_state = state;
@@ -59,11 +108,12 @@ int dsa_port_set_state(struct dsa_port *dp, u8 state)
return 0;
}
-static void dsa_port_set_state_now(struct dsa_port *dp, u8 state)
+static void dsa_port_set_state_now(struct dsa_port *dp, u8 state,
+ bool do_fast_age)
{
int err;
- err = dsa_port_set_state(dp, state);
+ err = dsa_port_set_state(dp, state, do_fast_age);
if (err)
pr_err("DSA: failed to set STP state %u (%d)\n", state, err);
}
@@ -81,7 +131,7 @@ int dsa_port_enable_rt(struct dsa_port *dp, struct phy_device *phy)
}
if (!dp->bridge_dev)
- dsa_port_set_state_now(dp, BR_STATE_FORWARDING);
+ dsa_port_set_state_now(dp, BR_STATE_FORWARDING, false);
if (dp->pl)
phylink_start(dp->pl);
@@ -109,7 +159,7 @@ void dsa_port_disable_rt(struct dsa_port *dp)
phylink_stop(dp->pl);
if (!dp->bridge_dev)
- dsa_port_set_state_now(dp, BR_STATE_DISABLED);
+ dsa_port_set_state_now(dp, BR_STATE_DISABLED, false);
if (ds->ops->port_disable)
ds->ops->port_disable(ds, port);
@@ -167,8 +217,8 @@ static void dsa_port_clear_brport_flags(struct dsa_port *dp)
}
}
-static int dsa_port_switchdev_sync(struct dsa_port *dp,
- struct netlink_ext_ack *extack)
+static int dsa_port_switchdev_sync_attrs(struct dsa_port *dp,
+ struct netlink_ext_ack *extack)
{
struct net_device *brport_dev = dsa_port_to_bridge_port(dp);
struct net_device *br = dp->bridge_dev;
@@ -178,7 +228,7 @@ static int dsa_port_switchdev_sync(struct dsa_port *dp,
if (err)
return err;
- err = dsa_port_set_state(dp, br_port_get_stp_state(brport_dev));
+ err = dsa_port_set_state(dp, br_port_get_stp_state(brport_dev), false);
if (err && err != -EOPNOTSUPP)
return err;
@@ -186,34 +236,14 @@ static int dsa_port_switchdev_sync(struct dsa_port *dp,
if (err && err != -EOPNOTSUPP)
return err;
- err = dsa_port_mrouter(dp->cpu_dp, br_multicast_router(br), extack);
- if (err && err != -EOPNOTSUPP)
- return err;
-
err = dsa_port_ageing_time(dp, br_get_ageing_time(br));
if (err && err != -EOPNOTSUPP)
return err;
- err = br_mdb_replay(br, brport_dev,
- &dsa_slave_switchdev_blocking_notifier,
- extack);
- if (err && err != -EOPNOTSUPP)
- return err;
-
- err = br_fdb_replay(br, brport_dev, &dsa_slave_switchdev_notifier);
- if (err && err != -EOPNOTSUPP)
- return err;
-
- err = br_vlan_replay(br, brport_dev,
- &dsa_slave_switchdev_blocking_notifier,
- extack);
- if (err && err != -EOPNOTSUPP)
- return err;
-
return 0;
}
-static void dsa_port_switchdev_unsync(struct dsa_port *dp)
+static void dsa_port_switchdev_unsync_attrs(struct dsa_port *dp)
{
/* Configure the port for standalone mode (no address learning,
* flood everything).
@@ -231,21 +261,63 @@ static void dsa_port_switchdev_unsync(struct dsa_port *dp)
/* Port left the bridge, put in BR_STATE_DISABLED by the bridge layer,
* so allow it to be in BR_STATE_FORWARDING to be kept functional
*/
- dsa_port_set_state_now(dp, BR_STATE_FORWARDING);
+ dsa_port_set_state_now(dp, BR_STATE_FORWARDING, true);
/* VLAN filtering is handled by dsa_switch_bridge_leave */
- /* Some drivers treat the notification for having a local multicast
- * router by allowing multicast to be flooded to the CPU, so we should
- * allow this in standalone mode too.
- */
- dsa_port_mrouter(dp->cpu_dp, true, NULL);
-
/* Ageing time may be global to the switch chip, so don't change it
* here because we have no good reason (or value) to change it to.
*/
}
+static void dsa_port_bridge_tx_fwd_unoffload(struct dsa_port *dp,
+ struct net_device *bridge_dev)
+{
+ int bridge_num = dp->bridge_num;
+ struct dsa_switch *ds = dp->ds;
+
+ /* No bridge TX forwarding offload => do nothing */
+ if (!ds->ops->port_bridge_tx_fwd_unoffload || dp->bridge_num == -1)
+ return;
+
+ dp->bridge_num = -1;
+
+ dsa_bridge_num_put(bridge_dev, bridge_num);
+
+ /* Notify the chips only once the offload has been deactivated, so
+ * that they can update their configuration accordingly.
+ */
+ ds->ops->port_bridge_tx_fwd_unoffload(ds, dp->index, bridge_dev,
+ bridge_num);
+}
+
+static bool dsa_port_bridge_tx_fwd_offload(struct dsa_port *dp,
+ struct net_device *bridge_dev)
+{
+ struct dsa_switch *ds = dp->ds;
+ int bridge_num, err;
+
+ if (!ds->ops->port_bridge_tx_fwd_offload)
+ return false;
+
+ bridge_num = dsa_bridge_num_get(bridge_dev,
+ ds->num_fwd_offloading_bridges);
+ if (bridge_num < 0)
+ return false;
+
+ dp->bridge_num = bridge_num;
+
+ /* Notify the driver */
+ err = ds->ops->port_bridge_tx_fwd_offload(ds, dp->index, bridge_dev,
+ bridge_num);
+ if (err) {
+ dsa_port_bridge_tx_fwd_unoffload(dp, bridge_dev);
+ return false;
+ }
+
+ return true;
+}
+
int dsa_port_bridge_join(struct dsa_port *dp, struct net_device *br,
struct netlink_ext_ack *extack)
{
@@ -255,6 +327,9 @@ int dsa_port_bridge_join(struct dsa_port *dp, struct net_device *br,
.port = dp->index,
.br = br,
};
+ struct net_device *dev = dp->slave;
+ struct net_device *brport_dev;
+ bool tx_fwd_offload;
int err;
/* Here the interface is already bridged. Reflect the current
@@ -262,16 +337,31 @@ int dsa_port_bridge_join(struct dsa_port *dp, struct net_device *br,
*/
dp->bridge_dev = br;
+ brport_dev = dsa_port_to_bridge_port(dp);
+
err = dsa_broadcast(DSA_NOTIFIER_BRIDGE_JOIN, &info);
if (err)
goto out_rollback;
- err = dsa_port_switchdev_sync(dp, extack);
+ tx_fwd_offload = dsa_port_bridge_tx_fwd_offload(dp, br);
+
+ err = switchdev_bridge_port_offload(brport_dev, dev, dp,
+ &dsa_slave_switchdev_notifier,
+ &dsa_slave_switchdev_blocking_notifier,
+ tx_fwd_offload, extack);
if (err)
goto out_rollback_unbridge;
+ err = dsa_port_switchdev_sync_attrs(dp, extack);
+ if (err)
+ goto out_rollback_unoffload;
+
return 0;
+out_rollback_unoffload:
+ switchdev_bridge_port_unoffload(brport_dev, dp,
+ &dsa_slave_switchdev_notifier,
+ &dsa_slave_switchdev_blocking_notifier);
out_rollback_unbridge:
dsa_broadcast(DSA_NOTIFIER_BRIDGE_LEAVE, &info);
out_rollback:
@@ -279,6 +369,19 @@ out_rollback:
return err;
}
+void dsa_port_pre_bridge_leave(struct dsa_port *dp, struct net_device *br)
+{
+ struct net_device *brport_dev = dsa_port_to_bridge_port(dp);
+
+ /* Don't try to unoffload something that is not offloaded */
+ if (!brport_dev)
+ return;
+
+ switchdev_bridge_port_unoffload(brport_dev, dp,
+ &dsa_slave_switchdev_notifier,
+ &dsa_slave_switchdev_blocking_notifier);
+}
+
void dsa_port_bridge_leave(struct dsa_port *dp, struct net_device *br)
{
struct dsa_notifier_bridge_info info = {
@@ -294,11 +397,15 @@ void dsa_port_bridge_leave(struct dsa_port *dp, struct net_device *br)
*/
dp->bridge_dev = NULL;
+ dsa_port_bridge_tx_fwd_unoffload(dp, br);
+
err = dsa_broadcast(DSA_NOTIFIER_BRIDGE_LEAVE, &info);
if (err)
- pr_err("DSA: failed to notify DSA_NOTIFIER_BRIDGE_LEAVE\n");
+ dev_err(dp->ds->dev,
+ "port %d failed to notify DSA_NOTIFIER_BRIDGE_LEAVE: %pe\n",
+ dp->index, ERR_PTR(err));
- dsa_port_switchdev_unsync(dp);
+ dsa_port_switchdev_unsync_attrs(dp);
}
int dsa_port_lag_change(struct dsa_port *dp,
@@ -366,6 +473,12 @@ err_lag_join:
return err;
}
+void dsa_port_pre_lag_leave(struct dsa_port *dp, struct net_device *lag)
+{
+ if (dp->bridge_dev)
+ dsa_port_pre_bridge_leave(dp, dp->bridge_dev);
+}
+
void dsa_port_lag_leave(struct dsa_port *dp, struct net_device *lag)
{
struct dsa_notifier_lag_info info = {
@@ -389,8 +502,9 @@ void dsa_port_lag_leave(struct dsa_port *dp, struct net_device *lag)
err = dsa_port_notify(dp, DSA_NOTIFIER_LAG_LEAVE, &info);
if (err)
- pr_err("DSA: failed to notify DSA_NOTIFIER_LAG_LEAVE: %d\n",
- err);
+ dev_err(dp->ds->dev,
+ "port %d failed to notify DSA_NOTIFIER_LAG_LEAVE: %pe\n",
+ dp->index, ERR_PTR(err));
dsa_lag_unmap(dp->ds->dst, lag);
}
@@ -466,6 +580,7 @@ static bool dsa_port_can_apply_vlan_filtering(struct dsa_port *dp,
int dsa_port_vlan_filtering(struct dsa_port *dp, bool vlan_filtering,
struct netlink_ext_ack *extack)
{
+ bool old_vlan_filtering = dsa_port_is_vlan_filtering(dp);
struct dsa_switch *ds = dp->ds;
bool apply;
int err;
@@ -491,12 +606,49 @@ int dsa_port_vlan_filtering(struct dsa_port *dp, bool vlan_filtering,
if (err)
return err;
- if (ds->vlan_filtering_is_global)
+ if (ds->vlan_filtering_is_global) {
+ int port;
+
ds->vlan_filtering = vlan_filtering;
- else
+
+ for (port = 0; port < ds->num_ports; port++) {
+ struct net_device *slave;
+
+ if (!dsa_is_user_port(ds, port))
+ continue;
+
+ /* We might be called in the unbind path, so not
+ * all slave devices might still be registered.
+ */
+ slave = dsa_to_port(ds, port)->slave;
+ if (!slave)
+ continue;
+
+ err = dsa_slave_manage_vlan_filtering(slave,
+ vlan_filtering);
+ if (err)
+ goto restore;
+ }
+ } else {
dp->vlan_filtering = vlan_filtering;
+ err = dsa_slave_manage_vlan_filtering(dp->slave,
+ vlan_filtering);
+ if (err)
+ goto restore;
+ }
+
return 0;
+
+restore:
+ ds->ops->port_vlan_filtering(ds, dp->index, old_vlan_filtering, NULL);
+
+ if (ds->vlan_filtering_is_global)
+ ds->vlan_filtering = old_vlan_filtering;
+ else
+ dp->vlan_filtering = old_vlan_filtering;
+
+ return err;
}
/* This enforces legacy behavior for switch drivers which assume they can't
@@ -543,35 +695,43 @@ int dsa_port_pre_bridge_flags(const struct dsa_port *dp,
return ds->ops->port_pre_bridge_flags(ds, dp->index, flags, extack);
}
-int dsa_port_bridge_flags(const struct dsa_port *dp,
+int dsa_port_bridge_flags(struct dsa_port *dp,
struct switchdev_brport_flags flags,
struct netlink_ext_ack *extack)
{
struct dsa_switch *ds = dp->ds;
+ int err;
if (!ds->ops->port_bridge_flags)
return -EOPNOTSUPP;
- return ds->ops->port_bridge_flags(ds, dp->index, flags, extack);
-}
+ err = ds->ops->port_bridge_flags(ds, dp->index, flags, extack);
+ if (err)
+ return err;
-int dsa_port_mrouter(struct dsa_port *dp, bool mrouter,
- struct netlink_ext_ack *extack)
-{
- struct dsa_switch *ds = dp->ds;
+ if (flags.mask & BR_LEARNING) {
+ bool learning = flags.val & BR_LEARNING;
- if (!ds->ops->port_set_mrouter)
- return -EOPNOTSUPP;
+ if (learning == dp->learning)
+ return 0;
+
+ if ((dp->learning && !learning) &&
+ (dp->stp_state == BR_STATE_LEARNING ||
+ dp->stp_state == BR_STATE_FORWARDING))
+ dsa_port_fast_age(dp);
+
+ dp->learning = learning;
+ }
- return ds->ops->port_set_mrouter(ds, dp->index, mrouter, extack);
+ return 0;
}
int dsa_port_mtu_change(struct dsa_port *dp, int new_mtu,
- bool propagate_upstream)
+ bool targeted_match)
{
struct dsa_notifier_mtu_info info = {
.sw_index = dp->ds->index,
- .propagate_upstream = propagate_upstream,
+ .targeted_match = targeted_match,
.port = dp->index,
.mtu = new_mtu,
};
@@ -606,6 +766,44 @@ int dsa_port_fdb_del(struct dsa_port *dp, const unsigned char *addr,
return dsa_port_notify(dp, DSA_NOTIFIER_FDB_DEL, &info);
}
+int dsa_port_host_fdb_add(struct dsa_port *dp, const unsigned char *addr,
+ u16 vid)
+{
+ struct dsa_notifier_fdb_info info = {
+ .sw_index = dp->ds->index,
+ .port = dp->index,
+ .addr = addr,
+ .vid = vid,
+ };
+ struct dsa_port *cpu_dp = dp->cpu_dp;
+ int err;
+
+ err = dev_uc_add(cpu_dp->master, addr);
+ if (err)
+ return err;
+
+ return dsa_port_notify(dp, DSA_NOTIFIER_HOST_FDB_ADD, &info);
+}
+
+int dsa_port_host_fdb_del(struct dsa_port *dp, const unsigned char *addr,
+ u16 vid)
+{
+ struct dsa_notifier_fdb_info info = {
+ .sw_index = dp->ds->index,
+ .port = dp->index,
+ .addr = addr,
+ .vid = vid,
+ };
+ struct dsa_port *cpu_dp = dp->cpu_dp;
+ int err;
+
+ err = dev_uc_del(cpu_dp->master, addr);
+ if (err)
+ return err;
+
+ return dsa_port_notify(dp, DSA_NOTIFIER_HOST_FDB_DEL, &info);
+}
+
int dsa_port_fdb_dump(struct dsa_port *dp, dsa_fdb_dump_cb_t *cb, void *data)
{
struct dsa_switch *ds = dp->ds;
@@ -641,6 +839,42 @@ int dsa_port_mdb_del(const struct dsa_port *dp,
return dsa_port_notify(dp, DSA_NOTIFIER_MDB_DEL, &info);
}
+int dsa_port_host_mdb_add(const struct dsa_port *dp,
+ const struct switchdev_obj_port_mdb *mdb)
+{
+ struct dsa_notifier_mdb_info info = {
+ .sw_index = dp->ds->index,
+ .port = dp->index,
+ .mdb = mdb,
+ };
+ struct dsa_port *cpu_dp = dp->cpu_dp;
+ int err;
+
+ err = dev_mc_add(cpu_dp->master, mdb->addr);
+ if (err)
+ return err;
+
+ return dsa_port_notify(dp, DSA_NOTIFIER_HOST_MDB_ADD, &info);
+}
+
+int dsa_port_host_mdb_del(const struct dsa_port *dp,
+ const struct switchdev_obj_port_mdb *mdb)
+{
+ struct dsa_notifier_mdb_info info = {
+ .sw_index = dp->ds->index,
+ .port = dp->index,
+ .mdb = mdb,
+ };
+ struct dsa_port *cpu_dp = dp->cpu_dp;
+ int err;
+
+ err = dev_mc_del(cpu_dp->master, mdb->addr);
+ if (err)
+ return err;
+
+ return dsa_port_notify(dp, DSA_NOTIFIER_HOST_MDB_DEL, &info);
+}
+
int dsa_port_vlan_add(struct dsa_port *dp,
const struct switchdev_obj_port_vlan *vlan,
struct netlink_ext_ack *extack)
@@ -718,7 +952,6 @@ int dsa_port_mrp_del_ring_role(const struct dsa_port *dp,
void dsa_port_set_tag_protocol(struct dsa_port *cpu_dp,
const struct dsa_device_ops *tag_ops)
{
- cpu_dp->filter = tag_ops->filter;
cpu_dp->rcv = tag_ops->rcv;
cpu_dp->tag_ops = tag_ops;
}
@@ -1089,5 +1322,42 @@ void dsa_port_hsr_leave(struct dsa_port *dp, struct net_device *hsr)
err = dsa_port_notify(dp, DSA_NOTIFIER_HSR_LEAVE, &info);
if (err)
- pr_err("DSA: failed to notify DSA_NOTIFIER_HSR_LEAVE\n");
+ dev_err(dp->ds->dev,
+ "port %d failed to notify DSA_NOTIFIER_HSR_LEAVE: %pe\n",
+ dp->index, ERR_PTR(err));
+}
+
+int dsa_port_tag_8021q_vlan_add(struct dsa_port *dp, u16 vid, bool broadcast)
+{
+ struct dsa_notifier_tag_8021q_vlan_info info = {
+ .tree_index = dp->ds->dst->index,
+ .sw_index = dp->ds->index,
+ .port = dp->index,
+ .vid = vid,
+ };
+
+ if (broadcast)
+ return dsa_broadcast(DSA_NOTIFIER_TAG_8021Q_VLAN_ADD, &info);
+
+ return dsa_port_notify(dp, DSA_NOTIFIER_TAG_8021Q_VLAN_ADD, &info);
+}
+
+void dsa_port_tag_8021q_vlan_del(struct dsa_port *dp, u16 vid, bool broadcast)
+{
+ struct dsa_notifier_tag_8021q_vlan_info info = {
+ .tree_index = dp->ds->dst->index,
+ .sw_index = dp->ds->index,
+ .port = dp->index,
+ .vid = vid,
+ };
+ int err;
+
+ if (broadcast)
+ err = dsa_broadcast(DSA_NOTIFIER_TAG_8021Q_VLAN_DEL, &info);
+ else
+ err = dsa_port_notify(dp, DSA_NOTIFIER_TAG_8021Q_VLAN_DEL, &info);
+ if (err)
+ dev_err(dp->ds->dev,
+ "port %d failed to notify tag_8021q VLAN %d deletion: %pe\n",
+ dp->index, vid, ERR_PTR(err));
}
diff --git a/net/dsa/slave.c b/net/dsa/slave.c
index d4756b920108..662ff531d4e2 100644
--- a/net/dsa/slave.c
+++ b/net/dsa/slave.c
@@ -271,19 +271,22 @@ static int dsa_slave_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
return phylink_mii_ioctl(p->dp->pl, ifr, cmd);
}
-static int dsa_slave_port_attr_set(struct net_device *dev,
+static int dsa_slave_port_attr_set(struct net_device *dev, const void *ctx,
const struct switchdev_attr *attr,
struct netlink_ext_ack *extack)
{
struct dsa_port *dp = dsa_slave_to_port(dev);
int ret;
+ if (ctx && ctx != dp)
+ return 0;
+
switch (attr->id) {
case SWITCHDEV_ATTR_ID_PORT_STP_STATE:
if (!dsa_port_offloads_bridge_port(dp, attr->orig_dev))
return -EOPNOTSUPP;
- ret = dsa_port_set_state(dp, attr->u.stp_state);
+ ret = dsa_port_set_state(dp, attr->u.stp_state, true);
break;
case SWITCHDEV_ATTR_ID_BRIDGE_VLAN_FILTERING:
if (!dsa_port_offloads_bridge(dp, attr->orig_dev))
@@ -311,12 +314,6 @@ static int dsa_slave_port_attr_set(struct net_device *dev,
ret = dsa_port_bridge_flags(dp, attr->u.brport_flags, extack);
break;
- case SWITCHDEV_ATTR_ID_BRIDGE_MROUTER:
- if (!dsa_port_offloads_bridge(dp, attr->orig_dev))
- return -EOPNOTSUPP;
-
- ret = dsa_port_mrouter(dp->cpu_dp, attr->u.mrouter, extack);
- break;
default:
ret = -EOPNOTSUPP;
break;
@@ -394,13 +391,16 @@ static int dsa_slave_vlan_add(struct net_device *dev,
return vlan_vid_add(master, htons(ETH_P_8021Q), vlan.vid);
}
-static int dsa_slave_port_obj_add(struct net_device *dev,
+static int dsa_slave_port_obj_add(struct net_device *dev, const void *ctx,
const struct switchdev_obj *obj,
struct netlink_ext_ack *extack)
{
struct dsa_port *dp = dsa_slave_to_port(dev);
int err;
+ if (ctx && ctx != dp)
+ return 0;
+
switch (obj->id) {
case SWITCHDEV_OBJ_ID_PORT_MDB:
if (!dsa_port_offloads_bridge_port(dp, obj->orig_dev))
@@ -412,10 +412,7 @@ static int dsa_slave_port_obj_add(struct net_device *dev,
if (!dsa_port_offloads_bridge(dp, obj->orig_dev))
return -EOPNOTSUPP;
- /* DSA can directly translate this to a normal MDB add,
- * but on the CPU port.
- */
- err = dsa_port_mdb_add(dp->cpu_dp, SWITCHDEV_OBJ_PORT_MDB(obj));
+ err = dsa_port_host_mdb_add(dp, SWITCHDEV_OBJ_PORT_MDB(obj));
break;
case SWITCHDEV_OBJ_ID_PORT_VLAN:
if (!dsa_port_offloads_bridge_port(dp, obj->orig_dev))
@@ -469,12 +466,15 @@ static int dsa_slave_vlan_del(struct net_device *dev,
return 0;
}
-static int dsa_slave_port_obj_del(struct net_device *dev,
+static int dsa_slave_port_obj_del(struct net_device *dev, const void *ctx,
const struct switchdev_obj *obj)
{
struct dsa_port *dp = dsa_slave_to_port(dev);
int err;
+ if (ctx && ctx != dp)
+ return 0;
+
switch (obj->id) {
case SWITCHDEV_OBJ_ID_PORT_MDB:
if (!dsa_port_offloads_bridge_port(dp, obj->orig_dev))
@@ -486,10 +486,7 @@ static int dsa_slave_port_obj_del(struct net_device *dev,
if (!dsa_port_offloads_bridge(dp, obj->orig_dev))
return -EOPNOTSUPP;
- /* DSA can directly translate this to a normal MDB add,
- * but on the CPU port.
- */
- err = dsa_port_mdb_del(dp->cpu_dp, SWITCHDEV_OBJ_PORT_MDB(obj));
+ err = dsa_port_host_mdb_del(dp, SWITCHDEV_OBJ_PORT_MDB(obj));
break;
case SWITCHDEV_OBJ_ID_PORT_VLAN:
if (!dsa_port_offloads_bridge_port(dp, obj->orig_dev))
@@ -1412,6 +1409,76 @@ static int dsa_slave_vlan_rx_kill_vid(struct net_device *dev, __be16 proto,
return 0;
}
+static int dsa_slave_restore_vlan(struct net_device *vdev, int vid, void *arg)
+{
+ __be16 proto = vdev ? vlan_dev_vlan_proto(vdev) : htons(ETH_P_8021Q);
+
+ return dsa_slave_vlan_rx_add_vid(arg, proto, vid);
+}
+
+static int dsa_slave_clear_vlan(struct net_device *vdev, int vid, void *arg)
+{
+ __be16 proto = vdev ? vlan_dev_vlan_proto(vdev) : htons(ETH_P_8021Q);
+
+ return dsa_slave_vlan_rx_kill_vid(arg, proto, vid);
+}
+
+/* Keep the VLAN RX filtering list in sync with the hardware only if VLAN
+ * filtering is enabled. The baseline is that only ports that offload a
+ * VLAN-aware bridge are VLAN-aware, and standalone ports are VLAN-unaware,
+ * but there are exceptions for quirky hardware.
+ *
+ * If ds->vlan_filtering_is_global = true, then standalone ports which share
+ * the same switch with other ports that offload a VLAN-aware bridge are also
+ * inevitably VLAN-aware.
+ *
+ * To summarize, a DSA switch port offloads:
+ *
+ * - If standalone (this includes software bridge, software LAG):
+ * - if ds->needs_standalone_vlan_filtering = true, OR if
+ * (ds->vlan_filtering_is_global = true AND there are bridges spanning
+ * this switch chip which have vlan_filtering=1)
+ * - the 8021q upper VLANs
+ * - else (standalone VLAN filtering is not needed, VLAN filtering is not
+ * global, or it is, but no port is under a VLAN-aware bridge):
+ * - no VLAN (any 8021q upper is a software VLAN)
+ *
+ * - If under a vlan_filtering=0 bridge which it offload:
+ * - if ds->configure_vlan_while_not_filtering = true (default):
+ * - the bridge VLANs. These VLANs are committed to hardware but inactive.
+ * - else (deprecated):
+ * - no VLAN. The bridge VLANs are not restored when VLAN awareness is
+ * enabled, so this behavior is broken and discouraged.
+ *
+ * - If under a vlan_filtering=1 bridge which it offload:
+ * - the bridge VLANs
+ * - the 8021q upper VLANs
+ */
+int dsa_slave_manage_vlan_filtering(struct net_device *slave,
+ bool vlan_filtering)
+{
+ int err;
+
+ if (vlan_filtering) {
+ slave->features |= NETIF_F_HW_VLAN_CTAG_FILTER;
+
+ err = vlan_for_each(slave, dsa_slave_restore_vlan, slave);
+ if (err) {
+ vlan_for_each(slave, dsa_slave_clear_vlan, slave);
+ slave->features &= ~NETIF_F_HW_VLAN_CTAG_FILTER;
+ return err;
+ }
+ } else {
+ err = vlan_for_each(slave, dsa_slave_clear_vlan, slave);
+ if (err)
+ return err;
+
+ slave->features &= ~NETIF_F_HW_VLAN_CTAG_FILTER;
+ }
+
+ return 0;
+}
+
struct dsa_hw_port {
struct list_head list;
struct net_device *dev;
@@ -1528,6 +1595,7 @@ int dsa_slave_change_mtu(struct net_device *dev, int new_mtu)
struct dsa_port *dp = dsa_slave_to_port(dev);
struct dsa_slave_priv *p = netdev_priv(dev);
struct dsa_switch *ds = p->dp->ds;
+ struct dsa_port *dp_iter;
struct dsa_port *cpu_dp;
int port = p->dp->index;
int largest_mtu = 0;
@@ -1535,31 +1603,31 @@ int dsa_slave_change_mtu(struct net_device *dev, int new_mtu)
int old_master_mtu;
int mtu_limit;
int cpu_mtu;
- int err, i;
+ int err;
if (!ds->ops->port_change_mtu)
return -EOPNOTSUPP;
- for (i = 0; i < ds->num_ports; i++) {
+ list_for_each_entry(dp_iter, &ds->dst->ports, list) {
int slave_mtu;
- if (!dsa_is_user_port(ds, i))
+ if (!dsa_port_is_user(dp_iter))
continue;
/* During probe, this function will be called for each slave
* device, while not all of them have been allocated. That's
* ok, it doesn't change what the maximum is, so ignore it.
*/
- if (!dsa_to_port(ds, i)->slave)
+ if (!dp_iter->slave)
continue;
/* Pretend that we already applied the setting, which we
* actually haven't (still haven't done all integrity checks)
*/
- if (i == port)
+ if (dp_iter == dp)
slave_mtu = new_mtu;
else
- slave_mtu = dsa_to_port(ds, i)->slave->mtu;
+ slave_mtu = dp_iter->slave->mtu;
if (largest_mtu < slave_mtu)
largest_mtu = slave_mtu;
@@ -1569,7 +1637,7 @@ int dsa_slave_change_mtu(struct net_device *dev, int new_mtu)
mtu_limit = min_t(int, master->max_mtu, dev->max_mtu);
old_master_mtu = master->mtu;
- new_master_mtu = largest_mtu + cpu_dp->tag_ops->overhead;
+ new_master_mtu = largest_mtu + dsa_tag_protocol_overhead(cpu_dp->tag_ops);
if (new_master_mtu > mtu_limit)
return -ERANGE;
@@ -1585,14 +1653,15 @@ int dsa_slave_change_mtu(struct net_device *dev, int new_mtu)
goto out_master_failed;
/* We only need to propagate the MTU of the CPU port to
- * upstream switches.
+ * upstream switches, so create a non-targeted notifier which
+ * updates all switches.
*/
- err = dsa_port_mtu_change(cpu_dp, cpu_mtu, true);
+ err = dsa_port_mtu_change(cpu_dp, cpu_mtu, false);
if (err)
goto out_cpu_failed;
}
- err = dsa_port_mtu_change(dp, new_mtu, false);
+ err = dsa_port_mtu_change(dp, new_mtu, true);
if (err)
goto out_port_failed;
@@ -1605,8 +1674,8 @@ int dsa_slave_change_mtu(struct net_device *dev, int new_mtu)
out_port_failed:
if (new_master_mtu != old_master_mtu)
dsa_port_mtu_change(cpu_dp, old_master_mtu -
- cpu_dp->tag_ops->overhead,
- true);
+ dsa_tag_protocol_overhead(cpu_dp->tag_ops),
+ false);
out_cpu_failed:
if (new_master_mtu != old_master_mtu)
dev_set_mtu(master, old_master_mtu);
@@ -1640,27 +1709,6 @@ static const struct ethtool_ops dsa_slave_ethtool_ops = {
.self_test = dsa_slave_net_selftest,
};
-/* legacy way, bypassing the bridge *****************************************/
-static int dsa_legacy_fdb_add(struct ndmsg *ndm, struct nlattr *tb[],
- struct net_device *dev,
- const unsigned char *addr, u16 vid,
- u16 flags,
- struct netlink_ext_ack *extack)
-{
- struct dsa_port *dp = dsa_slave_to_port(dev);
-
- return dsa_port_fdb_add(dp, addr, vid);
-}
-
-static int dsa_legacy_fdb_del(struct ndmsg *ndm, struct nlattr *tb[],
- struct net_device *dev,
- const unsigned char *addr, u16 vid)
-{
- struct dsa_port *dp = dsa_slave_to_port(dev);
-
- return dsa_port_fdb_del(dp, addr, vid);
-}
-
static struct devlink_port *dsa_slave_get_devlink_port(struct net_device *dev)
{
struct dsa_port *dp = dsa_slave_to_port(dev);
@@ -1702,10 +1750,8 @@ static const struct net_device_ops dsa_slave_netdev_ops = {
.ndo_change_rx_flags = dsa_slave_change_rx_flags,
.ndo_set_rx_mode = dsa_slave_set_rx_mode,
.ndo_set_mac_address = dsa_slave_set_mac_address,
- .ndo_fdb_add = dsa_legacy_fdb_add,
- .ndo_fdb_del = dsa_legacy_fdb_del,
.ndo_fdb_dump = dsa_slave_fdb_dump,
- .ndo_do_ioctl = dsa_slave_ioctl,
+ .ndo_eth_ioctl = dsa_slave_ioctl,
.ndo_get_iflink = dsa_slave_get_iflink,
#ifdef CONFIG_NET_POLL_CONTROLLER
.ndo_netpoll_setup = dsa_slave_netpoll_setup,
@@ -1749,7 +1795,8 @@ static void dsa_slave_phylink_fixed_state(struct phylink_config *config,
}
/* slave device setup *******************************************************/
-static int dsa_slave_phy_connect(struct net_device *slave_dev, int addr)
+static int dsa_slave_phy_connect(struct net_device *slave_dev, int addr,
+ u32 flags)
{
struct dsa_port *dp = dsa_slave_to_port(slave_dev);
struct dsa_switch *ds = dp->ds;
@@ -1760,6 +1807,8 @@ static int dsa_slave_phy_connect(struct net_device *slave_dev, int addr)
return -ENODEV;
}
+ slave_dev->phydev->dev_flags |= flags;
+
return phylink_connect_phy(dp->pl, slave_dev->phydev);
}
@@ -1804,7 +1853,7 @@ static int dsa_slave_phy_setup(struct net_device *slave_dev)
/* We could not connect to a designated PHY or SFP, so try to
* use the switch internal MDIO bus instead
*/
- ret = dsa_slave_phy_connect(slave_dev, dp->index);
+ ret = dsa_slave_phy_connect(slave_dev, dp->index, phy_flags);
if (ret) {
netdev_err(slave_dev,
"failed to connect to port %d: %d\n",
@@ -1823,11 +1872,10 @@ void dsa_slave_setup_tagger(struct net_device *slave)
struct dsa_slave_priv *p = netdev_priv(slave);
const struct dsa_port *cpu_dp = dp->cpu_dp;
struct net_device *master = cpu_dp->master;
+ const struct dsa_switch *ds = dp->ds;
- if (cpu_dp->tag_ops->tail_tag)
- slave->needed_tailroom = cpu_dp->tag_ops->overhead;
- else
- slave->needed_headroom = cpu_dp->tag_ops->overhead;
+ slave->needed_headroom = cpu_dp->tag_ops->needed_headroom;
+ slave->needed_tailroom = cpu_dp->tag_ops->needed_tailroom;
/* Try to save one extra realloc later in the TX path (in the master)
* by also inheriting the master's needed headroom and tailroom.
* The 8021q driver also does this.
@@ -1836,6 +1884,14 @@ void dsa_slave_setup_tagger(struct net_device *slave)
slave->needed_tailroom += master->needed_tailroom;
p->xmit = cpu_dp->tag_ops->xmit;
+
+ slave->features = master->vlan_features | NETIF_F_HW_TC;
+ slave->hw_features |= NETIF_F_HW_TC;
+ slave->features |= NETIF_F_LLTX;
+ if (slave->needed_tailroom)
+ slave->features &= ~(NETIF_F_SG | NETIF_F_FRAGLIST);
+ if (ds->needs_standalone_vlan_filtering)
+ slave->features |= NETIF_F_HW_VLAN_CTAG_FILTER;
}
static struct lock_class_key dsa_slave_netdev_xmit_lock_key;
@@ -1898,11 +1954,6 @@ int dsa_slave_create(struct dsa_port *port)
if (slave_dev == NULL)
return -ENOMEM;
- slave_dev->features = master->vlan_features | NETIF_F_HW_TC;
- if (ds->ops->port_vlan_add && ds->ops->port_vlan_del)
- slave_dev->features |= NETIF_F_HW_VLAN_CTAG_FILTER;
- slave_dev->hw_features |= NETIF_F_HW_TC;
- slave_dev->features |= NETIF_F_LLTX;
slave_dev->ethtool_ops = &dsa_slave_ethtool_ops;
if (!is_zero_ether_addr(port->mac))
ether_addr_copy(slave_dev->dev_addr, port->mac);
@@ -2028,6 +2079,11 @@ static int dsa_slave_changeupper(struct net_device *dev,
err = dsa_port_bridge_join(dp, info->upper_dev, extack);
if (!err)
dsa_bridge_mtu_normalization(dp);
+ if (err == -EOPNOTSUPP) {
+ NL_SET_ERR_MSG_MOD(extack,
+ "Offloading not supported");
+ err = 0;
+ }
err = notifier_from_errno(err);
} else {
dsa_port_bridge_leave(dp, info->upper_dev);
@@ -2065,6 +2121,22 @@ static int dsa_slave_changeupper(struct net_device *dev,
return err;
}
+static int dsa_slave_prechangeupper(struct net_device *dev,
+ struct netdev_notifier_changeupper_info *info)
+{
+ struct dsa_port *dp = dsa_slave_to_port(dev);
+
+ if (netif_is_bridge_master(info->upper_dev) && !info->linking)
+ dsa_port_pre_bridge_leave(dp, info->upper_dev);
+ else if (netif_is_lag_master(info->upper_dev) && !info->linking)
+ dsa_port_pre_lag_leave(dp, info->upper_dev);
+ /* dsa_port_pre_hsr_leave is not yet necessary since hsr cannot be
+ * meaningfully enslaved to a bridge yet
+ */
+
+ return NOTIFY_DONE;
+}
+
static int
dsa_slave_lag_changeupper(struct net_device *dev,
struct netdev_notifier_changeupper_info *info)
@@ -2091,6 +2163,35 @@ dsa_slave_lag_changeupper(struct net_device *dev,
return err;
}
+/* Same as dsa_slave_lag_changeupper() except that it calls
+ * dsa_slave_prechangeupper()
+ */
+static int
+dsa_slave_lag_prechangeupper(struct net_device *dev,
+ struct netdev_notifier_changeupper_info *info)
+{
+ struct net_device *lower;
+ struct list_head *iter;
+ int err = NOTIFY_DONE;
+ struct dsa_port *dp;
+
+ netdev_for_each_lower_dev(dev, lower, iter) {
+ if (!dsa_slave_dev_check(lower))
+ continue;
+
+ dp = dsa_slave_to_port(lower);
+ if (!dp->lag_dev)
+ /* Software LAG */
+ continue;
+
+ err = dsa_slave_prechangeupper(lower, info);
+ if (notifier_to_errno(err))
+ break;
+ }
+
+ return err;
+}
+
static int
dsa_prevent_bridging_8021q_upper(struct net_device *dev,
struct netdev_notifier_changeupper_info *info)
@@ -2154,6 +2255,32 @@ dsa_slave_check_8021q_upper(struct net_device *dev,
return NOTIFY_DONE;
}
+static int
+dsa_slave_prechangeupper_sanity_check(struct net_device *dev,
+ struct netdev_notifier_changeupper_info *info)
+{
+ struct dsa_switch *ds;
+ struct dsa_port *dp;
+ int err;
+
+ if (!dsa_slave_dev_check(dev))
+ return dsa_prevent_bridging_8021q_upper(dev, info);
+
+ dp = dsa_slave_to_port(dev);
+ ds = dp->ds;
+
+ if (ds->ops->port_prechangeupper) {
+ err = ds->ops->port_prechangeupper(ds, dp->index, info);
+ if (err)
+ return notifier_from_errno(err);
+ }
+
+ if (is_vlan_dev(info->upper_dev))
+ return dsa_slave_check_8021q_upper(dev, info);
+
+ return NOTIFY_DONE;
+}
+
static int dsa_slave_netdevice_event(struct notifier_block *nb,
unsigned long event, void *ptr)
{
@@ -2162,24 +2289,18 @@ static int dsa_slave_netdevice_event(struct notifier_block *nb,
switch (event) {
case NETDEV_PRECHANGEUPPER: {
struct netdev_notifier_changeupper_info *info = ptr;
- struct dsa_switch *ds;
- struct dsa_port *dp;
int err;
- if (!dsa_slave_dev_check(dev))
- return dsa_prevent_bridging_8021q_upper(dev, ptr);
+ err = dsa_slave_prechangeupper_sanity_check(dev, info);
+ if (err != NOTIFY_DONE)
+ return err;
- dp = dsa_slave_to_port(dev);
- ds = dp->ds;
+ if (dsa_slave_dev_check(dev))
+ return dsa_slave_prechangeupper(dev, ptr);
- if (ds->ops->port_prechangeupper) {
- err = ds->ops->port_prechangeupper(ds, dp->index, info);
- if (err)
- return notifier_from_errno(err);
- }
+ if (netif_is_lag_master(dev))
+ return dsa_slave_lag_prechangeupper(dev, ptr);
- if (is_vlan_dev(info->upper_dev))
- return dsa_slave_check_8021q_upper(dev, ptr);
break;
}
case NETDEV_CHANGEUPPER:
@@ -2235,8 +2356,8 @@ static int dsa_slave_netdevice_event(struct notifier_block *nb,
static void
dsa_fdb_offload_notify(struct dsa_switchdev_event_work *switchdev_work)
{
+ struct switchdev_notifier_fdb_info info = {};
struct dsa_switch *ds = switchdev_work->ds;
- struct switchdev_notifier_fdb_info info;
struct dsa_port *dp;
if (!dsa_is_user_port(ds, switchdev_work->port))
@@ -2263,8 +2384,12 @@ static void dsa_slave_switchdev_event_work(struct work_struct *work)
rtnl_lock();
switch (switchdev_work->event) {
case SWITCHDEV_FDB_ADD_TO_DEVICE:
- err = dsa_port_fdb_add(dp, switchdev_work->addr,
- switchdev_work->vid);
+ if (switchdev_work->host_addr)
+ err = dsa_port_host_fdb_add(dp, switchdev_work->addr,
+ switchdev_work->vid);
+ else
+ err = dsa_port_fdb_add(dp, switchdev_work->addr,
+ switchdev_work->vid);
if (err) {
dev_err(ds->dev,
"port %d failed to add %pM vid %d to fdb: %d\n",
@@ -2276,8 +2401,12 @@ static void dsa_slave_switchdev_event_work(struct work_struct *work)
break;
case SWITCHDEV_FDB_DEL_TO_DEVICE:
- err = dsa_port_fdb_del(dp, switchdev_work->addr,
- switchdev_work->vid);
+ if (switchdev_work->host_addr)
+ err = dsa_port_host_fdb_del(dp, switchdev_work->addr,
+ switchdev_work->vid);
+ else
+ err = dsa_port_fdb_del(dp, switchdev_work->addr,
+ switchdev_work->vid);
if (err) {
dev_err(ds->dev,
"port %d failed to delete %pM vid %d from fdb: %d\n",
@@ -2289,31 +2418,102 @@ static void dsa_slave_switchdev_event_work(struct work_struct *work)
}
rtnl_unlock();
+ dev_put(switchdev_work->dev);
kfree(switchdev_work);
- if (dsa_is_user_port(ds, dp->index))
- dev_put(dp->slave);
}
-static int dsa_lower_dev_walk(struct net_device *lower_dev,
- struct netdev_nested_priv *priv)
+static bool dsa_foreign_dev_check(const struct net_device *dev,
+ const struct net_device *foreign_dev)
{
- if (dsa_slave_dev_check(lower_dev)) {
- priv->data = (void *)netdev_priv(lower_dev);
- return 1;
- }
+ const struct dsa_port *dp = dsa_slave_to_port(dev);
+ struct dsa_switch_tree *dst = dp->ds->dst;
- return 0;
+ if (netif_is_bridge_master(foreign_dev))
+ return !dsa_tree_offloads_bridge(dst, foreign_dev);
+
+ if (netif_is_bridge_port(foreign_dev))
+ return !dsa_tree_offloads_bridge_port(dst, foreign_dev);
+
+ /* Everything else is foreign */
+ return true;
}
-static struct dsa_slave_priv *dsa_slave_dev_lower_find(struct net_device *dev)
+static int dsa_slave_fdb_event(struct net_device *dev,
+ const struct net_device *orig_dev,
+ const void *ctx,
+ const struct switchdev_notifier_fdb_info *fdb_info,
+ unsigned long event)
{
- struct netdev_nested_priv priv = {
- .data = NULL,
- };
+ struct dsa_switchdev_event_work *switchdev_work;
+ struct dsa_port *dp = dsa_slave_to_port(dev);
+ bool host_addr = fdb_info->is_local;
+ struct dsa_switch *ds = dp->ds;
+
+ if (ctx && ctx != dp)
+ return 0;
- netdev_walk_all_lower_dev_rcu(dev, dsa_lower_dev_walk, &priv);
+ if (!ds->ops->port_fdb_add || !ds->ops->port_fdb_del)
+ return -EOPNOTSUPP;
- return (struct dsa_slave_priv *)priv.data;
+ if (dsa_slave_dev_check(orig_dev) &&
+ switchdev_fdb_is_dynamically_learned(fdb_info))
+ return 0;
+
+ /* FDB entries learned by the software bridge should be installed as
+ * host addresses only if the driver requests assisted learning.
+ */
+ if (switchdev_fdb_is_dynamically_learned(fdb_info) &&
+ !ds->assisted_learning_on_cpu_port)
+ return 0;
+
+ /* Also treat FDB entries on foreign interfaces bridged with us as host
+ * addresses.
+ */
+ if (dsa_foreign_dev_check(dev, orig_dev))
+ host_addr = true;
+
+ switchdev_work = kzalloc(sizeof(*switchdev_work), GFP_ATOMIC);
+ if (!switchdev_work)
+ return -ENOMEM;
+
+ netdev_dbg(dev, "%s FDB entry towards %s, addr %pM vid %d%s\n",
+ event == SWITCHDEV_FDB_ADD_TO_DEVICE ? "Adding" : "Deleting",
+ orig_dev->name, fdb_info->addr, fdb_info->vid,
+ host_addr ? " as host address" : "");
+
+ INIT_WORK(&switchdev_work->work, dsa_slave_switchdev_event_work);
+ switchdev_work->ds = ds;
+ switchdev_work->port = dp->index;
+ switchdev_work->event = event;
+ switchdev_work->dev = dev;
+
+ ether_addr_copy(switchdev_work->addr, fdb_info->addr);
+ switchdev_work->vid = fdb_info->vid;
+ switchdev_work->host_addr = host_addr;
+
+ /* Hold a reference for dsa_fdb_offload_notify */
+ dev_hold(dev);
+ dsa_schedule_work(&switchdev_work->work);
+
+ return 0;
+}
+
+static int
+dsa_slave_fdb_add_to_device(struct net_device *dev,
+ const struct net_device *orig_dev, const void *ctx,
+ const struct switchdev_notifier_fdb_info *fdb_info)
+{
+ return dsa_slave_fdb_event(dev, orig_dev, ctx, fdb_info,
+ SWITCHDEV_FDB_ADD_TO_DEVICE);
+}
+
+static int
+dsa_slave_fdb_del_to_device(struct net_device *dev,
+ const struct net_device *orig_dev, const void *ctx,
+ const struct switchdev_notifier_fdb_info *fdb_info)
+{
+ return dsa_slave_fdb_event(dev, orig_dev, ctx, fdb_info,
+ SWITCHDEV_FDB_DEL_TO_DEVICE);
}
/* Called under rcu_read_lock() */
@@ -2321,9 +2521,6 @@ static int dsa_slave_switchdev_event(struct notifier_block *unused,
unsigned long event, void *ptr)
{
struct net_device *dev = switchdev_notifier_info_to_dev(ptr);
- const struct switchdev_notifier_fdb_info *fdb_info;
- struct dsa_switchdev_event_work *switchdev_work;
- struct dsa_port *dp;
int err;
switch (event) {
@@ -2333,69 +2530,19 @@ static int dsa_slave_switchdev_event(struct notifier_block *unused,
dsa_slave_port_attr_set);
return notifier_from_errno(err);
case SWITCHDEV_FDB_ADD_TO_DEVICE:
+ err = switchdev_handle_fdb_add_to_device(dev, ptr,
+ dsa_slave_dev_check,
+ dsa_foreign_dev_check,
+ dsa_slave_fdb_add_to_device,
+ NULL);
+ return notifier_from_errno(err);
case SWITCHDEV_FDB_DEL_TO_DEVICE:
- fdb_info = ptr;
-
- if (dsa_slave_dev_check(dev)) {
- if (!fdb_info->added_by_user || fdb_info->is_local)
- return NOTIFY_OK;
-
- dp = dsa_slave_to_port(dev);
- } else {
- /* Snoop addresses learnt on foreign interfaces
- * bridged with us, for switches that don't
- * automatically learn SA from CPU-injected traffic
- */
- struct net_device *br_dev;
- struct dsa_slave_priv *p;
-
- br_dev = netdev_master_upper_dev_get_rcu(dev);
- if (!br_dev)
- return NOTIFY_DONE;
-
- if (!netif_is_bridge_master(br_dev))
- return NOTIFY_DONE;
-
- p = dsa_slave_dev_lower_find(br_dev);
- if (!p)
- return NOTIFY_DONE;
-
- dp = p->dp->cpu_dp;
-
- if (!dp->ds->assisted_learning_on_cpu_port)
- return NOTIFY_DONE;
-
- /* When the bridge learns an address on an offloaded
- * LAG we don't want to send traffic to the CPU, the
- * other ports bridged with the LAG should be able to
- * autonomously forward towards it.
- */
- if (dsa_tree_offloads_bridge_port(dp->ds->dst, dev))
- return NOTIFY_DONE;
- }
-
- if (!dp->ds->ops->port_fdb_add || !dp->ds->ops->port_fdb_del)
- return NOTIFY_DONE;
-
- switchdev_work = kzalloc(sizeof(*switchdev_work), GFP_ATOMIC);
- if (!switchdev_work)
- return NOTIFY_BAD;
-
- INIT_WORK(&switchdev_work->work,
- dsa_slave_switchdev_event_work);
- switchdev_work->ds = dp->ds;
- switchdev_work->port = dp->index;
- switchdev_work->event = event;
-
- ether_addr_copy(switchdev_work->addr,
- fdb_info->addr);
- switchdev_work->vid = fdb_info->vid;
-
- /* Hold a reference on the slave for dsa_fdb_offload_notify */
- if (dsa_is_user_port(dp->ds, dp->index))
- dev_hold(dev);
- dsa_schedule_work(&switchdev_work->work);
- break;
+ err = switchdev_handle_fdb_del_to_device(dev, ptr,
+ dsa_slave_dev_check,
+ dsa_foreign_dev_check,
+ dsa_slave_fdb_del_to_device,
+ NULL);
+ return notifier_from_errno(err);
default:
return NOTIFY_DONE;
}
diff --git a/net/dsa/switch.c b/net/dsa/switch.c
index 9bf8e20ecdf3..1c797ec8e2c2 100644
--- a/net/dsa/switch.c
+++ b/net/dsa/switch.c
@@ -52,10 +52,13 @@ static int dsa_switch_ageing_time(struct dsa_switch *ds,
static bool dsa_switch_mtu_match(struct dsa_switch *ds, int port,
struct dsa_notifier_mtu_info *info)
{
- if (ds->index == info->sw_index)
- return (port == info->port) || dsa_is_dsa_port(ds, port);
+ if (ds->index == info->sw_index && port == info->port)
+ return true;
- if (!info->propagate_upstream)
+ /* Do not propagate to other switches in the tree if the notifier was
+ * targeted for a single switch.
+ */
+ if (info->targeted_match)
return false;
if (dsa_is_dsa_port(ds, port) || dsa_is_cpu_port(ds, port))
@@ -87,38 +90,57 @@ static int dsa_switch_bridge_join(struct dsa_switch *ds,
struct dsa_notifier_bridge_info *info)
{
struct dsa_switch_tree *dst = ds->dst;
+ int err;
- if (dst->index == info->tree_index && ds->index == info->sw_index &&
- ds->ops->port_bridge_join)
- return ds->ops->port_bridge_join(ds, info->port, info->br);
+ if (dst->index == info->tree_index && ds->index == info->sw_index) {
+ if (!ds->ops->port_bridge_join)
+ return -EOPNOTSUPP;
+
+ err = ds->ops->port_bridge_join(ds, info->port, info->br);
+ if (err)
+ return err;
+ }
if ((dst->index != info->tree_index || ds->index != info->sw_index) &&
- ds->ops->crosschip_bridge_join)
- return ds->ops->crosschip_bridge_join(ds, info->tree_index,
- info->sw_index,
- info->port, info->br);
+ ds->ops->crosschip_bridge_join) {
+ err = ds->ops->crosschip_bridge_join(ds, info->tree_index,
+ info->sw_index,
+ info->port, info->br);
+ if (err)
+ return err;
+ }
- return 0;
+ return dsa_tag_8021q_bridge_join(ds, info);
}
static int dsa_switch_bridge_leave(struct dsa_switch *ds,
struct dsa_notifier_bridge_info *info)
{
- bool unset_vlan_filtering = br_vlan_enabled(info->br);
struct dsa_switch_tree *dst = ds->dst;
struct netlink_ext_ack extack = {0};
+ bool change_vlan_filtering = false;
+ bool vlan_filtering;
int err, port;
if (dst->index == info->tree_index && ds->index == info->sw_index &&
- ds->ops->port_bridge_join)
+ ds->ops->port_bridge_leave)
ds->ops->port_bridge_leave(ds, info->port, info->br);
if ((dst->index != info->tree_index || ds->index != info->sw_index) &&
- ds->ops->crosschip_bridge_join)
+ ds->ops->crosschip_bridge_leave)
ds->ops->crosschip_bridge_leave(ds, info->tree_index,
info->sw_index, info->port,
info->br);
+ if (ds->needs_standalone_vlan_filtering && !br_vlan_enabled(info->br)) {
+ change_vlan_filtering = true;
+ vlan_filtering = true;
+ } else if (!ds->needs_standalone_vlan_filtering &&
+ br_vlan_enabled(info->br)) {
+ change_vlan_filtering = true;
+ vlan_filtering = false;
+ }
+
/* If the bridge was vlan_filtering, the bridge core doesn't trigger an
* event for changing vlan_filtering setting upon slave ports leaving
* it. That is a good thing, because that lets us handle it and also
@@ -127,30 +149,240 @@ static int dsa_switch_bridge_leave(struct dsa_switch *ds,
* vlan_filtering callback is only when the last port leaves the last
* VLAN-aware bridge.
*/
- if (unset_vlan_filtering && ds->vlan_filtering_is_global) {
+ if (change_vlan_filtering && ds->vlan_filtering_is_global) {
for (port = 0; port < ds->num_ports; port++) {
struct net_device *bridge_dev;
bridge_dev = dsa_to_port(ds, port)->bridge_dev;
if (bridge_dev && br_vlan_enabled(bridge_dev)) {
- unset_vlan_filtering = false;
+ change_vlan_filtering = false;
break;
}
}
}
- if (unset_vlan_filtering) {
+
+ if (change_vlan_filtering) {
err = dsa_port_vlan_filtering(dsa_to_port(ds, info->port),
- false, &extack);
+ vlan_filtering, &extack);
if (extack._msg)
dev_err(ds->dev, "port %d: %s\n", info->port,
extack._msg);
if (err && err != EOPNOTSUPP)
return err;
}
+
+ return dsa_tag_8021q_bridge_leave(ds, info);
+}
+
+/* Matches for all upstream-facing ports (the CPU port and all upstream-facing
+ * DSA links) that sit between the targeted port on which the notifier was
+ * emitted and its dedicated CPU port.
+ */
+static bool dsa_switch_host_address_match(struct dsa_switch *ds, int port,
+ int info_sw_index, int info_port)
+{
+ struct dsa_port *targeted_dp, *cpu_dp;
+ struct dsa_switch *targeted_ds;
+
+ targeted_ds = dsa_switch_find(ds->dst->index, info_sw_index);
+ targeted_dp = dsa_to_port(targeted_ds, info_port);
+ cpu_dp = targeted_dp->cpu_dp;
+
+ if (dsa_switch_is_upstream_of(ds, targeted_ds))
+ return port == dsa_towards_port(ds, cpu_dp->ds->index,
+ cpu_dp->index);
+
+ return false;
+}
+
+static struct dsa_mac_addr *dsa_mac_addr_find(struct list_head *addr_list,
+ const unsigned char *addr,
+ u16 vid)
+{
+ struct dsa_mac_addr *a;
+
+ list_for_each_entry(a, addr_list, list)
+ if (ether_addr_equal(a->addr, addr) && a->vid == vid)
+ return a;
+
+ return NULL;
+}
+
+static int dsa_switch_do_mdb_add(struct dsa_switch *ds, int port,
+ const struct switchdev_obj_port_mdb *mdb)
+{
+ struct dsa_port *dp = dsa_to_port(ds, port);
+ struct dsa_mac_addr *a;
+ int err;
+
+ /* No need to bother with refcounting for user ports */
+ if (!(dsa_port_is_cpu(dp) || dsa_port_is_dsa(dp)))
+ return ds->ops->port_mdb_add(ds, port, mdb);
+
+ a = dsa_mac_addr_find(&dp->mdbs, mdb->addr, mdb->vid);
+ if (a) {
+ refcount_inc(&a->refcount);
+ return 0;
+ }
+
+ a = kzalloc(sizeof(*a), GFP_KERNEL);
+ if (!a)
+ return -ENOMEM;
+
+ err = ds->ops->port_mdb_add(ds, port, mdb);
+ if (err) {
+ kfree(a);
+ return err;
+ }
+
+ ether_addr_copy(a->addr, mdb->addr);
+ a->vid = mdb->vid;
+ refcount_set(&a->refcount, 1);
+ list_add_tail(&a->list, &dp->mdbs);
+
return 0;
}
+static int dsa_switch_do_mdb_del(struct dsa_switch *ds, int port,
+ const struct switchdev_obj_port_mdb *mdb)
+{
+ struct dsa_port *dp = dsa_to_port(ds, port);
+ struct dsa_mac_addr *a;
+ int err;
+
+ /* No need to bother with refcounting for user ports */
+ if (!(dsa_port_is_cpu(dp) || dsa_port_is_dsa(dp)))
+ return ds->ops->port_mdb_del(ds, port, mdb);
+
+ a = dsa_mac_addr_find(&dp->mdbs, mdb->addr, mdb->vid);
+ if (!a)
+ return -ENOENT;
+
+ if (!refcount_dec_and_test(&a->refcount))
+ return 0;
+
+ err = ds->ops->port_mdb_del(ds, port, mdb);
+ if (err) {
+ refcount_inc(&a->refcount);
+ return err;
+ }
+
+ list_del(&a->list);
+ kfree(a);
+
+ return 0;
+}
+
+static int dsa_switch_do_fdb_add(struct dsa_switch *ds, int port,
+ const unsigned char *addr, u16 vid)
+{
+ struct dsa_port *dp = dsa_to_port(ds, port);
+ struct dsa_mac_addr *a;
+ int err;
+
+ /* No need to bother with refcounting for user ports */
+ if (!(dsa_port_is_cpu(dp) || dsa_port_is_dsa(dp)))
+ return ds->ops->port_fdb_add(ds, port, addr, vid);
+
+ a = dsa_mac_addr_find(&dp->fdbs, addr, vid);
+ if (a) {
+ refcount_inc(&a->refcount);
+ return 0;
+ }
+
+ a = kzalloc(sizeof(*a), GFP_KERNEL);
+ if (!a)
+ return -ENOMEM;
+
+ err = ds->ops->port_fdb_add(ds, port, addr, vid);
+ if (err) {
+ kfree(a);
+ return err;
+ }
+
+ ether_addr_copy(a->addr, addr);
+ a->vid = vid;
+ refcount_set(&a->refcount, 1);
+ list_add_tail(&a->list, &dp->fdbs);
+
+ return 0;
+}
+
+static int dsa_switch_do_fdb_del(struct dsa_switch *ds, int port,
+ const unsigned char *addr, u16 vid)
+{
+ struct dsa_port *dp = dsa_to_port(ds, port);
+ struct dsa_mac_addr *a;
+ int err;
+
+ /* No need to bother with refcounting for user ports */
+ if (!(dsa_port_is_cpu(dp) || dsa_port_is_dsa(dp)))
+ return ds->ops->port_fdb_del(ds, port, addr, vid);
+
+ a = dsa_mac_addr_find(&dp->fdbs, addr, vid);
+ if (!a)
+ return -ENOENT;
+
+ if (!refcount_dec_and_test(&a->refcount))
+ return 0;
+
+ err = ds->ops->port_fdb_del(ds, port, addr, vid);
+ if (err) {
+ refcount_inc(&a->refcount);
+ return err;
+ }
+
+ list_del(&a->list);
+ kfree(a);
+
+ return 0;
+}
+
+static int dsa_switch_host_fdb_add(struct dsa_switch *ds,
+ struct dsa_notifier_fdb_info *info)
+{
+ int err = 0;
+ int port;
+
+ if (!ds->ops->port_fdb_add)
+ return -EOPNOTSUPP;
+
+ for (port = 0; port < ds->num_ports; port++) {
+ if (dsa_switch_host_address_match(ds, port, info->sw_index,
+ info->port)) {
+ err = dsa_switch_do_fdb_add(ds, port, info->addr,
+ info->vid);
+ if (err)
+ break;
+ }
+ }
+
+ return err;
+}
+
+static int dsa_switch_host_fdb_del(struct dsa_switch *ds,
+ struct dsa_notifier_fdb_info *info)
+{
+ int err = 0;
+ int port;
+
+ if (!ds->ops->port_fdb_del)
+ return -EOPNOTSUPP;
+
+ for (port = 0; port < ds->num_ports; port++) {
+ if (dsa_switch_host_address_match(ds, port, info->sw_index,
+ info->port)) {
+ err = dsa_switch_do_fdb_del(ds, port, info->addr,
+ info->vid);
+ if (err)
+ break;
+ }
+ }
+
+ return err;
+}
+
static int dsa_switch_fdb_add(struct dsa_switch *ds,
struct dsa_notifier_fdb_info *info)
{
@@ -159,7 +391,7 @@ static int dsa_switch_fdb_add(struct dsa_switch *ds,
if (!ds->ops->port_fdb_add)
return -EOPNOTSUPP;
- return ds->ops->port_fdb_add(ds, port, info->addr, info->vid);
+ return dsa_switch_do_fdb_add(ds, port, info->addr, info->vid);
}
static int dsa_switch_fdb_del(struct dsa_switch *ds,
@@ -170,7 +402,7 @@ static int dsa_switch_fdb_del(struct dsa_switch *ds,
if (!ds->ops->port_fdb_del)
return -EOPNOTSUPP;
- return ds->ops->port_fdb_del(ds, port, info->addr, info->vid);
+ return dsa_switch_do_fdb_del(ds, port, info->addr, info->vid);
}
static int dsa_switch_hsr_join(struct dsa_switch *ds,
@@ -216,7 +448,7 @@ static int dsa_switch_lag_join(struct dsa_switch *ds,
info->port, info->lag,
info->info);
- return 0;
+ return -EOPNOTSUPP;
}
static int dsa_switch_lag_leave(struct dsa_switch *ds,
@@ -229,24 +461,34 @@ static int dsa_switch_lag_leave(struct dsa_switch *ds,
return ds->ops->crosschip_lag_leave(ds, info->sw_index,
info->port, info->lag);
- return 0;
+ return -EOPNOTSUPP;
}
-static bool dsa_switch_mdb_match(struct dsa_switch *ds, int port,
- struct dsa_notifier_mdb_info *info)
+static int dsa_switch_mdb_add(struct dsa_switch *ds,
+ struct dsa_notifier_mdb_info *info)
{
- if (ds->index == info->sw_index && port == info->port)
- return true;
+ int port = dsa_towards_port(ds, info->sw_index, info->port);
- if (dsa_is_dsa_port(ds, port))
- return true;
+ if (!ds->ops->port_mdb_add)
+ return -EOPNOTSUPP;
- return false;
+ return dsa_switch_do_mdb_add(ds, port, info->mdb);
}
-static int dsa_switch_mdb_add(struct dsa_switch *ds,
+static int dsa_switch_mdb_del(struct dsa_switch *ds,
struct dsa_notifier_mdb_info *info)
{
+ int port = dsa_towards_port(ds, info->sw_index, info->port);
+
+ if (!ds->ops->port_mdb_del)
+ return -EOPNOTSUPP;
+
+ return dsa_switch_do_mdb_del(ds, port, info->mdb);
+}
+
+static int dsa_switch_host_mdb_add(struct dsa_switch *ds,
+ struct dsa_notifier_mdb_info *info)
+{
int err = 0;
int port;
@@ -254,8 +496,9 @@ static int dsa_switch_mdb_add(struct dsa_switch *ds,
return -EOPNOTSUPP;
for (port = 0; port < ds->num_ports; port++) {
- if (dsa_switch_mdb_match(ds, port, info)) {
- err = ds->ops->port_mdb_add(ds, port, info->mdb);
+ if (dsa_switch_host_address_match(ds, port, info->sw_index,
+ info->port)) {
+ err = dsa_switch_do_mdb_add(ds, port, info->mdb);
if (err)
break;
}
@@ -264,16 +507,25 @@ static int dsa_switch_mdb_add(struct dsa_switch *ds,
return err;
}
-static int dsa_switch_mdb_del(struct dsa_switch *ds,
- struct dsa_notifier_mdb_info *info)
+static int dsa_switch_host_mdb_del(struct dsa_switch *ds,
+ struct dsa_notifier_mdb_info *info)
{
+ int err = 0;
+ int port;
+
if (!ds->ops->port_mdb_del)
return -EOPNOTSUPP;
- if (ds->index == info->sw_index)
- return ds->ops->port_mdb_del(ds, info->port, info->mdb);
+ for (port = 0; port < ds->num_ports; port++) {
+ if (dsa_switch_host_address_match(ds, port, info->sw_index,
+ info->port)) {
+ err = dsa_switch_do_mdb_del(ds, port, info->mdb);
+ if (err)
+ break;
+ }
+ }
- return 0;
+ return err;
}
static bool dsa_switch_vlan_match(struct dsa_switch *ds, int port,
@@ -364,36 +616,16 @@ static int dsa_switch_change_tag_proto(struct dsa_switch *ds,
return 0;
}
-static bool dsa_switch_mrp_match(struct dsa_switch *ds, int port,
- struct dsa_notifier_mrp_info *info)
-{
- if (ds->index == info->sw_index && port == info->port)
- return true;
-
- if (dsa_is_dsa_port(ds, port))
- return true;
-
- return false;
-}
-
static int dsa_switch_mrp_add(struct dsa_switch *ds,
struct dsa_notifier_mrp_info *info)
{
- int err = 0;
- int port;
-
if (!ds->ops->port_mrp_add)
return -EOPNOTSUPP;
- for (port = 0; port < ds->num_ports; port++) {
- if (dsa_switch_mrp_match(ds, port, info)) {
- err = ds->ops->port_mrp_add(ds, port, info->mrp);
- if (err)
- break;
- }
- }
+ if (ds->index == info->sw_index)
+ return ds->ops->port_mrp_add(ds, info->port, info->mrp);
- return err;
+ return 0;
}
static int dsa_switch_mrp_del(struct dsa_switch *ds,
@@ -408,39 +640,18 @@ static int dsa_switch_mrp_del(struct dsa_switch *ds,
return 0;
}
-static bool
-dsa_switch_mrp_ring_role_match(struct dsa_switch *ds, int port,
- struct dsa_notifier_mrp_ring_role_info *info)
-{
- if (ds->index == info->sw_index && port == info->port)
- return true;
-
- if (dsa_is_dsa_port(ds, port))
- return true;
-
- return false;
-}
-
static int
dsa_switch_mrp_add_ring_role(struct dsa_switch *ds,
struct dsa_notifier_mrp_ring_role_info *info)
{
- int err = 0;
- int port;
-
if (!ds->ops->port_mrp_add)
return -EOPNOTSUPP;
- for (port = 0; port < ds->num_ports; port++) {
- if (dsa_switch_mrp_ring_role_match(ds, port, info)) {
- err = ds->ops->port_mrp_add_ring_role(ds, port,
- info->mrp);
- if (err)
- break;
- }
- }
+ if (ds->index == info->sw_index)
+ return ds->ops->port_mrp_add_ring_role(ds, info->port,
+ info->mrp);
- return err;
+ return 0;
}
static int
@@ -479,6 +690,12 @@ static int dsa_switch_event(struct notifier_block *nb,
case DSA_NOTIFIER_FDB_DEL:
err = dsa_switch_fdb_del(ds, info);
break;
+ case DSA_NOTIFIER_HOST_FDB_ADD:
+ err = dsa_switch_host_fdb_add(ds, info);
+ break;
+ case DSA_NOTIFIER_HOST_FDB_DEL:
+ err = dsa_switch_host_fdb_del(ds, info);
+ break;
case DSA_NOTIFIER_HSR_JOIN:
err = dsa_switch_hsr_join(ds, info);
break;
@@ -500,6 +717,12 @@ static int dsa_switch_event(struct notifier_block *nb,
case DSA_NOTIFIER_MDB_DEL:
err = dsa_switch_mdb_del(ds, info);
break;
+ case DSA_NOTIFIER_HOST_MDB_ADD:
+ err = dsa_switch_host_mdb_add(ds, info);
+ break;
+ case DSA_NOTIFIER_HOST_MDB_DEL:
+ err = dsa_switch_host_mdb_del(ds, info);
+ break;
case DSA_NOTIFIER_VLAN_ADD:
err = dsa_switch_vlan_add(ds, info);
break;
@@ -524,6 +747,12 @@ static int dsa_switch_event(struct notifier_block *nb,
case DSA_NOTIFIER_MRP_DEL_RING_ROLE:
err = dsa_switch_mrp_del_ring_role(ds, info);
break;
+ case DSA_NOTIFIER_TAG_8021Q_VLAN_ADD:
+ err = dsa_switch_tag_8021q_vlan_add(ds, info);
+ break;
+ case DSA_NOTIFIER_TAG_8021Q_VLAN_DEL:
+ err = dsa_switch_tag_8021q_vlan_del(ds, info);
+ break;
default:
err = -EOPNOTSUPP;
break;
diff --git a/net/dsa/tag_8021q.c b/net/dsa/tag_8021q.c
index 122ad5833fb1..f8f7b7c34e7d 100644
--- a/net/dsa/tag_8021q.c
+++ b/net/dsa/tag_8021q.c
@@ -17,7 +17,7 @@
*
* | 11 | 10 | 9 | 8 | 7 | 6 | 5 | 4 | 3 | 2 | 1 | 0 |
* +-----------+-----+-----------------+-----------+-----------------------+
- * | DIR | SVL | SWITCH_ID | SUBVLAN | PORT |
+ * | DIR | VBID| SWITCH_ID | VBID | PORT |
* +-----------+-----+-----------------+-----------+-----------------------+
*
* DIR - VID[11:10]:
@@ -27,24 +27,14 @@
* These values make the special VIDs of 0, 1 and 4095 to be left
* unused by this coding scheme.
*
- * SVL/SUBVLAN - { VID[9], VID[5:4] }:
- * Sub-VLAN encoding. Valid only when DIR indicates an RX VLAN.
- * * 0 (0b000): Field does not encode a sub-VLAN, either because
- * received traffic is untagged, PVID-tagged or because a second
- * VLAN tag is present after this tag and not inside of it.
- * * 1 (0b001): Received traffic is tagged with a VID value private
- * to the host. This field encodes the index in the host's lookup
- * table through which the value of the ingress VLAN ID can be
- * recovered.
- * * 2 (0b010): Field encodes a sub-VLAN.
- * ...
- * * 7 (0b111): Field encodes a sub-VLAN.
- * When DIR indicates a TX VLAN, SUBVLAN must be transmitted as zero
- * (by the host) and ignored on receive (by the switch).
- *
* SWITCH_ID - VID[8:6]:
* Index of switch within DSA tree. Must be between 0 and 7.
*
+ * VBID - { VID[9], VID[5:4] }:
+ * Virtual bridge ID. If between 1 and 7, packet targets the broadcast
+ * domain of a bridge. If transmitted as zero, packet targets a single
+ * port. Field only valid on transmit, must be ignored on receive.
+ *
* PORT - VID[3:0]:
* Index of switch port. Must be between 0 and 15.
*/
@@ -61,23 +51,30 @@
#define DSA_8021Q_SWITCH_ID(x) (((x) << DSA_8021Q_SWITCH_ID_SHIFT) & \
DSA_8021Q_SWITCH_ID_MASK)
-#define DSA_8021Q_SUBVLAN_HI_SHIFT 9
-#define DSA_8021Q_SUBVLAN_HI_MASK GENMASK(9, 9)
-#define DSA_8021Q_SUBVLAN_LO_SHIFT 4
-#define DSA_8021Q_SUBVLAN_LO_MASK GENMASK(5, 4)
-#define DSA_8021Q_SUBVLAN_HI(x) (((x) & GENMASK(2, 2)) >> 2)
-#define DSA_8021Q_SUBVLAN_LO(x) ((x) & GENMASK(1, 0))
-#define DSA_8021Q_SUBVLAN(x) \
- (((DSA_8021Q_SUBVLAN_LO(x) << DSA_8021Q_SUBVLAN_LO_SHIFT) & \
- DSA_8021Q_SUBVLAN_LO_MASK) | \
- ((DSA_8021Q_SUBVLAN_HI(x) << DSA_8021Q_SUBVLAN_HI_SHIFT) & \
- DSA_8021Q_SUBVLAN_HI_MASK))
+#define DSA_8021Q_VBID_HI_SHIFT 9
+#define DSA_8021Q_VBID_HI_MASK GENMASK(9, 9)
+#define DSA_8021Q_VBID_LO_SHIFT 4
+#define DSA_8021Q_VBID_LO_MASK GENMASK(5, 4)
+#define DSA_8021Q_VBID_HI(x) (((x) & GENMASK(2, 2)) >> 2)
+#define DSA_8021Q_VBID_LO(x) ((x) & GENMASK(1, 0))
+#define DSA_8021Q_VBID(x) \
+ (((DSA_8021Q_VBID_LO(x) << DSA_8021Q_VBID_LO_SHIFT) & \
+ DSA_8021Q_VBID_LO_MASK) | \
+ ((DSA_8021Q_VBID_HI(x) << DSA_8021Q_VBID_HI_SHIFT) & \
+ DSA_8021Q_VBID_HI_MASK))
#define DSA_8021Q_PORT_SHIFT 0
#define DSA_8021Q_PORT_MASK GENMASK(3, 0)
#define DSA_8021Q_PORT(x) (((x) << DSA_8021Q_PORT_SHIFT) & \
DSA_8021Q_PORT_MASK)
+u16 dsa_8021q_bridge_tx_fwd_offload_vid(int bridge_num)
+{
+ /* The VBID value of 0 is reserved for precise TX */
+ return DSA_8021Q_DIR_TX | DSA_8021Q_VBID(bridge_num + 1);
+}
+EXPORT_SYMBOL_GPL(dsa_8021q_bridge_tx_fwd_offload_vid);
+
/* Returns the VID to be inserted into the frame from xmit for switch steering
* instructions on egress. Encodes switch ID and port ID.
*/
@@ -98,13 +95,6 @@ u16 dsa_8021q_rx_vid(struct dsa_switch *ds, int port)
}
EXPORT_SYMBOL_GPL(dsa_8021q_rx_vid);
-u16 dsa_8021q_rx_vid_subvlan(struct dsa_switch *ds, int port, u16 subvlan)
-{
- return DSA_8021Q_DIR_RX | DSA_8021Q_SWITCH_ID(ds->index) |
- DSA_8021Q_PORT(port) | DSA_8021Q_SUBVLAN(subvlan);
-}
-EXPORT_SYMBOL_GPL(dsa_8021q_rx_vid_subvlan);
-
/* Returns the decoded switch ID from the RX VID. */
int dsa_8021q_rx_switch_id(u16 vid)
{
@@ -119,20 +109,6 @@ int dsa_8021q_rx_source_port(u16 vid)
}
EXPORT_SYMBOL_GPL(dsa_8021q_rx_source_port);
-/* Returns the decoded subvlan from the RX VID. */
-u16 dsa_8021q_rx_subvlan(u16 vid)
-{
- u16 svl_hi, svl_lo;
-
- svl_hi = (vid & DSA_8021Q_SUBVLAN_HI_MASK) >>
- DSA_8021Q_SUBVLAN_HI_SHIFT;
- svl_lo = (vid & DSA_8021Q_SUBVLAN_LO_MASK) >>
- DSA_8021Q_SUBVLAN_LO_SHIFT;
-
- return (svl_hi << 2) | svl_lo;
-}
-EXPORT_SYMBOL_GPL(dsa_8021q_rx_subvlan);
-
bool vid_is_dsa_8021q_rxvlan(u16 vid)
{
return (vid & DSA_8021Q_DIR_MASK) == DSA_8021Q_DIR_RX;
@@ -151,21 +127,152 @@ bool vid_is_dsa_8021q(u16 vid)
}
EXPORT_SYMBOL_GPL(vid_is_dsa_8021q);
-/* If @enabled is true, installs @vid with @flags into the switch port's HW
- * filter.
- * If @enabled is false, deletes @vid (ignores @flags) from the port. Had the
- * user explicitly configured this @vid through the bridge core, then the @vid
- * is installed again, but this time with the flags from the bridge layer.
- */
-static int dsa_8021q_vid_apply(struct dsa_8021q_context *ctx, int port, u16 vid,
- u16 flags, bool enabled)
+static struct dsa_tag_8021q_vlan *
+dsa_tag_8021q_vlan_find(struct dsa_8021q_context *ctx, int port, u16 vid)
+{
+ struct dsa_tag_8021q_vlan *v;
+
+ list_for_each_entry(v, &ctx->vlans, list)
+ if (v->vid == vid && v->port == port)
+ return v;
+
+ return NULL;
+}
+
+static int dsa_switch_do_tag_8021q_vlan_add(struct dsa_switch *ds, int port,
+ u16 vid, u16 flags)
{
- struct dsa_port *dp = dsa_to_port(ctx->ds, port);
+ struct dsa_8021q_context *ctx = ds->tag_8021q_ctx;
+ struct dsa_port *dp = dsa_to_port(ds, port);
+ struct dsa_tag_8021q_vlan *v;
+ int err;
+
+ /* No need to bother with refcounting for user ports */
+ if (!(dsa_port_is_cpu(dp) || dsa_port_is_dsa(dp)))
+ return ds->ops->tag_8021q_vlan_add(ds, port, vid, flags);
+
+ v = dsa_tag_8021q_vlan_find(ctx, port, vid);
+ if (v) {
+ refcount_inc(&v->refcount);
+ return 0;
+ }
+
+ v = kzalloc(sizeof(*v), GFP_KERNEL);
+ if (!v)
+ return -ENOMEM;
- if (enabled)
- return ctx->ops->vlan_add(ctx->ds, dp->index, vid, flags);
+ err = ds->ops->tag_8021q_vlan_add(ds, port, vid, flags);
+ if (err) {
+ kfree(v);
+ return err;
+ }
- return ctx->ops->vlan_del(ctx->ds, dp->index, vid);
+ v->vid = vid;
+ v->port = port;
+ refcount_set(&v->refcount, 1);
+ list_add_tail(&v->list, &ctx->vlans);
+
+ return 0;
+}
+
+static int dsa_switch_do_tag_8021q_vlan_del(struct dsa_switch *ds, int port,
+ u16 vid)
+{
+ struct dsa_8021q_context *ctx = ds->tag_8021q_ctx;
+ struct dsa_port *dp = dsa_to_port(ds, port);
+ struct dsa_tag_8021q_vlan *v;
+ int err;
+
+ /* No need to bother with refcounting for user ports */
+ if (!(dsa_port_is_cpu(dp) || dsa_port_is_dsa(dp)))
+ return ds->ops->tag_8021q_vlan_del(ds, port, vid);
+
+ v = dsa_tag_8021q_vlan_find(ctx, port, vid);
+ if (!v)
+ return -ENOENT;
+
+ if (!refcount_dec_and_test(&v->refcount))
+ return 0;
+
+ err = ds->ops->tag_8021q_vlan_del(ds, port, vid);
+ if (err) {
+ refcount_inc(&v->refcount);
+ return err;
+ }
+
+ list_del(&v->list);
+ kfree(v);
+
+ return 0;
+}
+
+static bool
+dsa_switch_tag_8021q_vlan_match(struct dsa_switch *ds, int port,
+ struct dsa_notifier_tag_8021q_vlan_info *info)
+{
+ if (dsa_is_dsa_port(ds, port) || dsa_is_cpu_port(ds, port))
+ return true;
+
+ if (ds->dst->index == info->tree_index && ds->index == info->sw_index)
+ return port == info->port;
+
+ return false;
+}
+
+int dsa_switch_tag_8021q_vlan_add(struct dsa_switch *ds,
+ struct dsa_notifier_tag_8021q_vlan_info *info)
+{
+ int port, err;
+
+ /* Since we use dsa_broadcast(), there might be other switches in other
+ * trees which don't support tag_8021q, so don't return an error.
+ * Or they might even support tag_8021q but have not registered yet to
+ * use it (maybe they use another tagger currently).
+ */
+ if (!ds->ops->tag_8021q_vlan_add || !ds->tag_8021q_ctx)
+ return 0;
+
+ for (port = 0; port < ds->num_ports; port++) {
+ if (dsa_switch_tag_8021q_vlan_match(ds, port, info)) {
+ u16 flags = 0;
+
+ if (dsa_is_user_port(ds, port))
+ flags |= BRIDGE_VLAN_INFO_UNTAGGED;
+
+ if (vid_is_dsa_8021q_rxvlan(info->vid) &&
+ dsa_8021q_rx_switch_id(info->vid) == ds->index &&
+ dsa_8021q_rx_source_port(info->vid) == port)
+ flags |= BRIDGE_VLAN_INFO_PVID;
+
+ err = dsa_switch_do_tag_8021q_vlan_add(ds, port,
+ info->vid,
+ flags);
+ if (err)
+ return err;
+ }
+ }
+
+ return 0;
+}
+
+int dsa_switch_tag_8021q_vlan_del(struct dsa_switch *ds,
+ struct dsa_notifier_tag_8021q_vlan_info *info)
+{
+ int port, err;
+
+ if (!ds->ops->tag_8021q_vlan_del || !ds->tag_8021q_ctx)
+ return 0;
+
+ for (port = 0; port < ds->num_ports; port++) {
+ if (dsa_switch_tag_8021q_vlan_match(ds, port, info)) {
+ err = dsa_switch_do_tag_8021q_vlan_del(ds, port,
+ info->vid);
+ if (err)
+ return err;
+ }
+ }
+
+ return 0;
}
/* RX VLAN tagging (left) and TX VLAN tagging (right) setup shown for a single
@@ -181,12 +288,6 @@ static int dsa_8021q_vid_apply(struct dsa_8021q_context *ctx, int port, u16 vid,
* force all switched traffic to pass through the CPU. So we must also make
* the other front-panel ports members of this VID we're adding, albeit
* we're not making it their PVID (they'll still have their own).
- * By the way - just because we're installing the same VID in multiple
- * switch ports doesn't mean that they'll start to talk to one another, even
- * while not bridged: the final forwarding decision is still an AND between
- * the L2 forwarding information (which is limiting forwarding in this case)
- * and the VLAN-based restrictions (of which there are none in this case,
- * since all ports are members).
* - On TX (ingress from CPU and towards network) we are faced with a problem.
* If we were to tag traffic (from within DSA) with the port's pvid, all
* would be well, assuming the switch ports were standalone. Frames would
@@ -200,9 +301,10 @@ static int dsa_8021q_vid_apply(struct dsa_8021q_context *ctx, int port, u16 vid,
* a member of the VID we're tagging the traffic with - the desired one.
*
* So at the end, each front-panel port will have one RX VID (also the PVID),
- * the RX VID of all other front-panel ports, and one TX VID. Whereas the CPU
- * port will have the RX and TX VIDs of all front-panel ports, and on top of
- * that, is also tagged-input and tagged-output (VLAN trunk).
+ * the RX VID of all other front-panel ports that are in the same bridge, and
+ * one TX VID. Whereas the CPU port will have the RX and TX VIDs of all
+ * front-panel ports, and on top of that, is also tagged-input and
+ * tagged-output (VLAN trunk).
*
* CPU port CPU port
* +-------------+-----+-------------+ +-------------+-----+-------------+
@@ -220,246 +322,246 @@ static int dsa_8021q_vid_apply(struct dsa_8021q_context *ctx, int port, u16 vid,
* +-+-----+-+-----+-+-----+-+-----+-+ +-+-----+-+-----+-+-----+-+-----+-+
* swp0 swp1 swp2 swp3 swp0 swp1 swp2 swp3
*/
-static int dsa_8021q_setup_port(struct dsa_8021q_context *ctx, int port,
- bool enabled)
+static bool dsa_tag_8021q_bridge_match(struct dsa_switch *ds, int port,
+ struct dsa_notifier_bridge_info *info)
+{
+ struct dsa_port *dp = dsa_to_port(ds, port);
+
+ /* Don't match on self */
+ if (ds->dst->index == info->tree_index &&
+ ds->index == info->sw_index &&
+ port == info->port)
+ return false;
+
+ if (dsa_port_is_user(dp))
+ return dp->bridge_dev == info->br;
+
+ return false;
+}
+
+int dsa_tag_8021q_bridge_join(struct dsa_switch *ds,
+ struct dsa_notifier_bridge_info *info)
+{
+ struct dsa_switch *targeted_ds;
+ struct dsa_port *targeted_dp;
+ u16 targeted_rx_vid;
+ int err, port;
+
+ if (!ds->tag_8021q_ctx)
+ return 0;
+
+ targeted_ds = dsa_switch_find(info->tree_index, info->sw_index);
+ targeted_dp = dsa_to_port(targeted_ds, info->port);
+ targeted_rx_vid = dsa_8021q_rx_vid(targeted_ds, info->port);
+
+ for (port = 0; port < ds->num_ports; port++) {
+ struct dsa_port *dp = dsa_to_port(ds, port);
+ u16 rx_vid = dsa_8021q_rx_vid(ds, port);
+
+ if (!dsa_tag_8021q_bridge_match(ds, port, info))
+ continue;
+
+ /* Install the RX VID of the targeted port in our VLAN table */
+ err = dsa_port_tag_8021q_vlan_add(dp, targeted_rx_vid, true);
+ if (err)
+ return err;
+
+ /* Install our RX VID into the targeted port's VLAN table */
+ err = dsa_port_tag_8021q_vlan_add(targeted_dp, rx_vid, true);
+ if (err)
+ return err;
+ }
+
+ return 0;
+}
+
+int dsa_tag_8021q_bridge_leave(struct dsa_switch *ds,
+ struct dsa_notifier_bridge_info *info)
+{
+ struct dsa_switch *targeted_ds;
+ struct dsa_port *targeted_dp;
+ u16 targeted_rx_vid;
+ int port;
+
+ if (!ds->tag_8021q_ctx)
+ return 0;
+
+ targeted_ds = dsa_switch_find(info->tree_index, info->sw_index);
+ targeted_dp = dsa_to_port(targeted_ds, info->port);
+ targeted_rx_vid = dsa_8021q_rx_vid(targeted_ds, info->port);
+
+ for (port = 0; port < ds->num_ports; port++) {
+ struct dsa_port *dp = dsa_to_port(ds, port);
+ u16 rx_vid = dsa_8021q_rx_vid(ds, port);
+
+ if (!dsa_tag_8021q_bridge_match(ds, port, info))
+ continue;
+
+ /* Remove the RX VID of the targeted port from our VLAN table */
+ dsa_port_tag_8021q_vlan_del(dp, targeted_rx_vid, true);
+
+ /* Remove our RX VID from the targeted port's VLAN table */
+ dsa_port_tag_8021q_vlan_del(targeted_dp, rx_vid, true);
+ }
+
+ return 0;
+}
+
+int dsa_tag_8021q_bridge_tx_fwd_offload(struct dsa_switch *ds, int port,
+ struct net_device *br,
+ int bridge_num)
{
- int upstream = dsa_upstream_port(ctx->ds, port);
- u16 rx_vid = dsa_8021q_rx_vid(ctx->ds, port);
- u16 tx_vid = dsa_8021q_tx_vid(ctx->ds, port);
+ u16 tx_vid = dsa_8021q_bridge_tx_fwd_offload_vid(bridge_num);
+
+ return dsa_port_tag_8021q_vlan_add(dsa_to_port(ds, port), tx_vid,
+ true);
+}
+EXPORT_SYMBOL_GPL(dsa_tag_8021q_bridge_tx_fwd_offload);
+
+void dsa_tag_8021q_bridge_tx_fwd_unoffload(struct dsa_switch *ds, int port,
+ struct net_device *br,
+ int bridge_num)
+{
+ u16 tx_vid = dsa_8021q_bridge_tx_fwd_offload_vid(bridge_num);
+
+ dsa_port_tag_8021q_vlan_del(dsa_to_port(ds, port), tx_vid, true);
+}
+EXPORT_SYMBOL_GPL(dsa_tag_8021q_bridge_tx_fwd_unoffload);
+
+/* Set up a port's tag_8021q RX and TX VLAN for standalone mode operation */
+static int dsa_tag_8021q_port_setup(struct dsa_switch *ds, int port)
+{
+ struct dsa_8021q_context *ctx = ds->tag_8021q_ctx;
+ struct dsa_port *dp = dsa_to_port(ds, port);
+ u16 rx_vid = dsa_8021q_rx_vid(ds, port);
+ u16 tx_vid = dsa_8021q_tx_vid(ds, port);
struct net_device *master;
- int i, err, subvlan;
+ int err;
/* The CPU port is implicitly configured by
* configuring the front-panel ports
*/
- if (!dsa_is_user_port(ctx->ds, port))
+ if (!dsa_port_is_user(dp))
return 0;
- master = dsa_to_port(ctx->ds, port)->cpu_dp->master;
+ master = dp->cpu_dp->master;
/* Add this user port's RX VID to the membership list of all others
* (including itself). This is so that bridging will not be hindered.
* L2 forwarding rules still take precedence when there are no VLAN
* restrictions, so there are no concerns about leaking traffic.
*/
- for (i = 0; i < ctx->ds->num_ports; i++) {
- u16 flags;
-
- if (i == upstream)
- continue;
- else if (i == port)
- /* The RX VID is pvid on this port */
- flags = BRIDGE_VLAN_INFO_UNTAGGED |
- BRIDGE_VLAN_INFO_PVID;
- else
- /* The RX VID is a regular VLAN on all others */
- flags = BRIDGE_VLAN_INFO_UNTAGGED;
-
- err = dsa_8021q_vid_apply(ctx, i, rx_vid, flags, enabled);
- if (err) {
- dev_err(ctx->ds->dev,
- "Failed to apply RX VID %d to port %d: %d\n",
- rx_vid, port, err);
- return err;
- }
- }
-
- /* CPU port needs to see this port's RX VID
- * as tagged egress.
- */
- err = dsa_8021q_vid_apply(ctx, upstream, rx_vid, 0, enabled);
+ err = dsa_port_tag_8021q_vlan_add(dp, rx_vid, false);
if (err) {
- dev_err(ctx->ds->dev,
- "Failed to apply RX VID %d to port %d: %d\n",
- rx_vid, port, err);
+ dev_err(ds->dev,
+ "Failed to apply RX VID %d to port %d: %pe\n",
+ rx_vid, port, ERR_PTR(err));
return err;
}
- /* Add to the master's RX filter not only @rx_vid, but in fact
- * the entire subvlan range, just in case this DSA switch might
- * want to use sub-VLANs.
- */
- for (subvlan = 0; subvlan < DSA_8021Q_N_SUBVLAN; subvlan++) {
- u16 vid = dsa_8021q_rx_vid_subvlan(ctx->ds, port, subvlan);
-
- if (enabled)
- vlan_vid_add(master, ctx->proto, vid);
- else
- vlan_vid_del(master, ctx->proto, vid);
- }
+ /* Add @rx_vid to the master's RX filter. */
+ vlan_vid_add(master, ctx->proto, rx_vid);
/* Finally apply the TX VID on this port and on the CPU port */
- err = dsa_8021q_vid_apply(ctx, port, tx_vid, BRIDGE_VLAN_INFO_UNTAGGED,
- enabled);
- if (err) {
- dev_err(ctx->ds->dev,
- "Failed to apply TX VID %d on port %d: %d\n",
- tx_vid, port, err);
- return err;
- }
- err = dsa_8021q_vid_apply(ctx, upstream, tx_vid, 0, enabled);
+ err = dsa_port_tag_8021q_vlan_add(dp, tx_vid, false);
if (err) {
- dev_err(ctx->ds->dev,
- "Failed to apply TX VID %d on port %d: %d\n",
- tx_vid, upstream, err);
+ dev_err(ds->dev,
+ "Failed to apply TX VID %d on port %d: %pe\n",
+ tx_vid, port, ERR_PTR(err));
return err;
}
return err;
}
-int dsa_8021q_setup(struct dsa_8021q_context *ctx, bool enabled)
+static void dsa_tag_8021q_port_teardown(struct dsa_switch *ds, int port)
{
- int rc, port;
+ struct dsa_8021q_context *ctx = ds->tag_8021q_ctx;
+ struct dsa_port *dp = dsa_to_port(ds, port);
+ u16 rx_vid = dsa_8021q_rx_vid(ds, port);
+ u16 tx_vid = dsa_8021q_tx_vid(ds, port);
+ struct net_device *master;
- ASSERT_RTNL();
+ /* The CPU port is implicitly configured by
+ * configuring the front-panel ports
+ */
+ if (!dsa_port_is_user(dp))
+ return;
- for (port = 0; port < ctx->ds->num_ports; port++) {
- rc = dsa_8021q_setup_port(ctx, port, enabled);
- if (rc < 0) {
- dev_err(ctx->ds->dev,
- "Failed to setup VLAN tagging for port %d: %d\n",
- port, rc);
- return rc;
- }
- }
+ master = dp->cpu_dp->master;
- return 0;
-}
-EXPORT_SYMBOL_GPL(dsa_8021q_setup);
+ dsa_port_tag_8021q_vlan_del(dp, rx_vid, false);
-static int dsa_8021q_crosschip_link_apply(struct dsa_8021q_context *ctx,
- int port,
- struct dsa_8021q_context *other_ctx,
- int other_port, bool enabled)
-{
- u16 rx_vid = dsa_8021q_rx_vid(ctx->ds, port);
+ vlan_vid_del(master, ctx->proto, rx_vid);
- /* @rx_vid of local @ds port @port goes to @other_port of
- * @other_ds
- */
- return dsa_8021q_vid_apply(other_ctx, other_port, rx_vid,
- BRIDGE_VLAN_INFO_UNTAGGED, enabled);
+ dsa_port_tag_8021q_vlan_del(dp, tx_vid, false);
}
-static int dsa_8021q_crosschip_link_add(struct dsa_8021q_context *ctx, int port,
- struct dsa_8021q_context *other_ctx,
- int other_port)
+static int dsa_tag_8021q_setup(struct dsa_switch *ds)
{
- struct dsa_8021q_crosschip_link *c;
+ int err, port;
- list_for_each_entry(c, &ctx->crosschip_links, list) {
- if (c->port == port && c->other_ctx == other_ctx &&
- c->other_port == other_port) {
- refcount_inc(&c->refcount);
- return 0;
+ ASSERT_RTNL();
+
+ for (port = 0; port < ds->num_ports; port++) {
+ err = dsa_tag_8021q_port_setup(ds, port);
+ if (err < 0) {
+ dev_err(ds->dev,
+ "Failed to setup VLAN tagging for port %d: %pe\n",
+ port, ERR_PTR(err));
+ return err;
}
}
- dev_dbg(ctx->ds->dev,
- "adding crosschip link from port %d to %s port %d\n",
- port, dev_name(other_ctx->ds->dev), other_port);
-
- c = kzalloc(sizeof(*c), GFP_KERNEL);
- if (!c)
- return -ENOMEM;
-
- c->port = port;
- c->other_ctx = other_ctx;
- c->other_port = other_port;
- refcount_set(&c->refcount, 1);
-
- list_add(&c->list, &ctx->crosschip_links);
-
return 0;
}
-static void dsa_8021q_crosschip_link_del(struct dsa_8021q_context *ctx,
- struct dsa_8021q_crosschip_link *c,
- bool *keep)
+static void dsa_tag_8021q_teardown(struct dsa_switch *ds)
{
- *keep = !refcount_dec_and_test(&c->refcount);
-
- if (*keep)
- return;
+ int port;
- dev_dbg(ctx->ds->dev,
- "deleting crosschip link from port %d to %s port %d\n",
- c->port, dev_name(c->other_ctx->ds->dev), c->other_port);
+ ASSERT_RTNL();
- list_del(&c->list);
- kfree(c);
+ for (port = 0; port < ds->num_ports; port++)
+ dsa_tag_8021q_port_teardown(ds, port);
}
-/* Make traffic from local port @port be received by remote port @other_port.
- * This means that our @rx_vid needs to be installed on @other_ds's upstream
- * and user ports. The user ports should be egress-untagged so that they can
- * pop the dsa_8021q VLAN. But the @other_upstream can be either egress-tagged
- * or untagged: it doesn't matter, since it should never egress a frame having
- * our @rx_vid.
- */
-int dsa_8021q_crosschip_bridge_join(struct dsa_8021q_context *ctx, int port,
- struct dsa_8021q_context *other_ctx,
- int other_port)
+int dsa_tag_8021q_register(struct dsa_switch *ds, __be16 proto)
{
- /* @other_upstream is how @other_ds reaches us. If we are part
- * of disjoint trees, then we are probably connected through
- * our CPU ports. If we're part of the same tree though, we should
- * probably use dsa_towards_port.
- */
- int other_upstream = dsa_upstream_port(other_ctx->ds, other_port);
- int rc;
+ struct dsa_8021q_context *ctx;
+
+ ctx = kzalloc(sizeof(*ctx), GFP_KERNEL);
+ if (!ctx)
+ return -ENOMEM;
- rc = dsa_8021q_crosschip_link_add(ctx, port, other_ctx, other_port);
- if (rc)
- return rc;
+ ctx->proto = proto;
+ ctx->ds = ds;
- rc = dsa_8021q_crosschip_link_apply(ctx, port, other_ctx,
- other_port, true);
- if (rc)
- return rc;
+ INIT_LIST_HEAD(&ctx->vlans);
- rc = dsa_8021q_crosschip_link_add(ctx, port, other_ctx, other_upstream);
- if (rc)
- return rc;
+ ds->tag_8021q_ctx = ctx;
- return dsa_8021q_crosschip_link_apply(ctx, port, other_ctx,
- other_upstream, true);
+ return dsa_tag_8021q_setup(ds);
}
-EXPORT_SYMBOL_GPL(dsa_8021q_crosschip_bridge_join);
+EXPORT_SYMBOL_GPL(dsa_tag_8021q_register);
-int dsa_8021q_crosschip_bridge_leave(struct dsa_8021q_context *ctx, int port,
- struct dsa_8021q_context *other_ctx,
- int other_port)
+void dsa_tag_8021q_unregister(struct dsa_switch *ds)
{
- int other_upstream = dsa_upstream_port(other_ctx->ds, other_port);
- struct dsa_8021q_crosschip_link *c, *n;
-
- list_for_each_entry_safe(c, n, &ctx->crosschip_links, list) {
- if (c->port == port && c->other_ctx == other_ctx &&
- (c->other_port == other_port ||
- c->other_port == other_upstream)) {
- struct dsa_8021q_context *other_ctx = c->other_ctx;
- int other_port = c->other_port;
- bool keep;
- int rc;
-
- dsa_8021q_crosschip_link_del(ctx, c, &keep);
- if (keep)
- continue;
-
- rc = dsa_8021q_crosschip_link_apply(ctx, port,
- other_ctx,
- other_port,
- false);
- if (rc)
- return rc;
- }
+ struct dsa_8021q_context *ctx = ds->tag_8021q_ctx;
+ struct dsa_tag_8021q_vlan *v, *n;
+
+ dsa_tag_8021q_teardown(ds);
+
+ list_for_each_entry_safe(v, n, &ctx->vlans, list) {
+ list_del(&v->list);
+ kfree(v);
}
- return 0;
+ ds->tag_8021q_ctx = NULL;
+
+ kfree(ctx);
}
-EXPORT_SYMBOL_GPL(dsa_8021q_crosschip_bridge_leave);
+EXPORT_SYMBOL_GPL(dsa_tag_8021q_unregister);
struct sk_buff *dsa_8021q_xmit(struct sk_buff *skb, struct net_device *netdev,
u16 tpid, u16 tci)
@@ -471,4 +573,23 @@ struct sk_buff *dsa_8021q_xmit(struct sk_buff *skb, struct net_device *netdev,
}
EXPORT_SYMBOL_GPL(dsa_8021q_xmit);
-MODULE_LICENSE("GPL v2");
+void dsa_8021q_rcv(struct sk_buff *skb, int *source_port, int *switch_id)
+{
+ u16 vid, tci;
+
+ skb_push_rcsum(skb, ETH_HLEN);
+ if (skb_vlan_tag_present(skb)) {
+ tci = skb_vlan_tag_get(skb);
+ __vlan_hwaccel_clear_tag(skb);
+ } else {
+ __skb_vlan_pop(skb, &tci);
+ }
+ skb_pull_rcsum(skb, ETH_HLEN);
+
+ vid = tci & VLAN_VID_MASK;
+
+ *source_port = dsa_8021q_rx_source_port(vid);
+ *switch_id = dsa_8021q_rx_switch_id(vid);
+ skb->priority = (tci & VLAN_PRIO_MASK) >> VLAN_PRIO_SHIFT;
+}
+EXPORT_SYMBOL_GPL(dsa_8021q_rcv);
diff --git a/net/dsa/tag_ar9331.c b/net/dsa/tag_ar9331.c
index 002cf7f952e2..8a02ac44282f 100644
--- a/net/dsa/tag_ar9331.c
+++ b/net/dsa/tag_ar9331.c
@@ -44,8 +44,7 @@ static struct sk_buff *ar9331_tag_xmit(struct sk_buff *skb,
}
static struct sk_buff *ar9331_tag_rcv(struct sk_buff *skb,
- struct net_device *ndev,
- struct packet_type *pt)
+ struct net_device *ndev)
{
u8 ver, port;
u16 hdr;
@@ -85,7 +84,7 @@ static const struct dsa_device_ops ar9331_netdev_ops = {
.proto = DSA_TAG_PROTO_AR9331,
.xmit = ar9331_tag_xmit,
.rcv = ar9331_tag_rcv,
- .overhead = AR9331_HDR_LEN,
+ .needed_headroom = AR9331_HDR_LEN,
};
MODULE_LICENSE("GPL v2");
diff --git a/net/dsa/tag_brcm.c b/net/dsa/tag_brcm.c
index 40e9f3098c8d..96dbb8ee2fee 100644
--- a/net/dsa/tag_brcm.c
+++ b/net/dsa/tag_brcm.c
@@ -99,7 +99,7 @@ static struct sk_buff *brcm_tag_xmit_ll(struct sk_buff *skb,
skb_push(skb, BRCM_TAG_LEN);
if (offset)
- memmove(skb->data, skb->data + BRCM_TAG_LEN, offset);
+ dsa_alloc_etype_header(skb, BRCM_TAG_LEN);
brcm_tag = skb->data + offset;
@@ -136,7 +136,6 @@ static struct sk_buff *brcm_tag_xmit_ll(struct sk_buff *skb,
*/
static struct sk_buff *brcm_tag_rcv_ll(struct sk_buff *skb,
struct net_device *dev,
- struct packet_type *pt,
unsigned int offset)
{
int source_port;
@@ -167,7 +166,7 @@ static struct sk_buff *brcm_tag_rcv_ll(struct sk_buff *skb,
/* Remove Broadcom tag and update checksum */
skb_pull_rcsum(skb, BRCM_TAG_LEN);
- skb->offload_fwd_mark = 1;
+ dsa_default_offload_fwd_mark(skb);
return skb;
}
@@ -182,20 +181,16 @@ static struct sk_buff *brcm_tag_xmit(struct sk_buff *skb,
}
-static struct sk_buff *brcm_tag_rcv(struct sk_buff *skb, struct net_device *dev,
- struct packet_type *pt)
+static struct sk_buff *brcm_tag_rcv(struct sk_buff *skb, struct net_device *dev)
{
struct sk_buff *nskb;
/* skb->data points to the EtherType, the tag is right before it */
- nskb = brcm_tag_rcv_ll(skb, dev, pt, 2);
+ nskb = brcm_tag_rcv_ll(skb, dev, 2);
if (!nskb)
return nskb;
- /* Move the Ethernet DA and SA */
- memmove(nskb->data - ETH_HLEN,
- nskb->data - ETH_HLEN - BRCM_TAG_LEN,
- 2 * ETH_ALEN);
+ dsa_strip_etype_header(skb, BRCM_TAG_LEN);
return nskb;
}
@@ -205,7 +200,7 @@ static const struct dsa_device_ops brcm_netdev_ops = {
.proto = DSA_TAG_PROTO_BRCM,
.xmit = brcm_tag_xmit,
.rcv = brcm_tag_rcv,
- .overhead = BRCM_TAG_LEN,
+ .needed_headroom = BRCM_TAG_LEN,
};
DSA_TAG_DRIVER(brcm_netdev_ops);
@@ -233,7 +228,7 @@ static struct sk_buff *brcm_leg_tag_xmit(struct sk_buff *skb,
skb_push(skb, BRCM_LEG_TAG_LEN);
- memmove(skb->data, skb->data + BRCM_LEG_TAG_LEN, 2 * ETH_ALEN);
+ dsa_alloc_etype_header(skb, BRCM_LEG_TAG_LEN);
brcm_tag = skb->data + 2 * ETH_ALEN;
@@ -251,8 +246,7 @@ static struct sk_buff *brcm_leg_tag_xmit(struct sk_buff *skb,
}
static struct sk_buff *brcm_leg_tag_rcv(struct sk_buff *skb,
- struct net_device *dev,
- struct packet_type *pt)
+ struct net_device *dev)
{
int source_port;
u8 *brcm_tag;
@@ -260,7 +254,7 @@ static struct sk_buff *brcm_leg_tag_rcv(struct sk_buff *skb,
if (unlikely(!pskb_may_pull(skb, BRCM_LEG_PORT_ID)))
return NULL;
- brcm_tag = skb->data - 2;
+ brcm_tag = dsa_etype_header_pos_rx(skb);
source_port = brcm_tag[5] & BRCM_LEG_PORT_ID;
@@ -271,12 +265,9 @@ static struct sk_buff *brcm_leg_tag_rcv(struct sk_buff *skb,
/* Remove Broadcom tag and update checksum */
skb_pull_rcsum(skb, BRCM_LEG_TAG_LEN);
- skb->offload_fwd_mark = 1;
+ dsa_default_offload_fwd_mark(skb);
- /* Move the Ethernet DA and SA */
- memmove(skb->data - ETH_HLEN,
- skb->data - ETH_HLEN - BRCM_LEG_TAG_LEN,
- 2 * ETH_ALEN);
+ dsa_strip_etype_header(skb, BRCM_LEG_TAG_LEN);
return skb;
}
@@ -286,7 +277,7 @@ static const struct dsa_device_ops brcm_legacy_netdev_ops = {
.proto = DSA_TAG_PROTO_BRCM_LEGACY,
.xmit = brcm_leg_tag_xmit,
.rcv = brcm_leg_tag_rcv,
- .overhead = BRCM_LEG_TAG_LEN,
+ .needed_headroom = BRCM_LEG_TAG_LEN,
};
DSA_TAG_DRIVER(brcm_legacy_netdev_ops);
@@ -302,11 +293,10 @@ static struct sk_buff *brcm_tag_xmit_prepend(struct sk_buff *skb,
}
static struct sk_buff *brcm_tag_rcv_prepend(struct sk_buff *skb,
- struct net_device *dev,
- struct packet_type *pt)
+ struct net_device *dev)
{
/* tag is prepended to the packet */
- return brcm_tag_rcv_ll(skb, dev, pt, ETH_HLEN);
+ return brcm_tag_rcv_ll(skb, dev, ETH_HLEN);
}
static const struct dsa_device_ops brcm_prepend_netdev_ops = {
@@ -314,7 +304,7 @@ static const struct dsa_device_ops brcm_prepend_netdev_ops = {
.proto = DSA_TAG_PROTO_BRCM_PREPEND,
.xmit = brcm_tag_xmit_prepend,
.rcv = brcm_tag_rcv_prepend,
- .overhead = BRCM_TAG_LEN,
+ .needed_headroom = BRCM_TAG_LEN,
};
DSA_TAG_DRIVER(brcm_prepend_netdev_ops);
diff --git a/net/dsa/tag_dsa.c b/net/dsa/tag_dsa.c
index 7e7b7decdf39..77d0ce89ab77 100644
--- a/net/dsa/tag_dsa.c
+++ b/net/dsa/tag_dsa.c
@@ -126,18 +126,53 @@ static struct sk_buff *dsa_xmit_ll(struct sk_buff *skb, struct net_device *dev,
u8 extra)
{
struct dsa_port *dp = dsa_slave_to_port(dev);
+ u8 tag_dev, tag_port;
+ enum dsa_cmd cmd;
u8 *dsa_header;
+ u16 pvid = 0;
+ int err;
+
+ if (skb->offload_fwd_mark) {
+ struct dsa_switch_tree *dst = dp->ds->dst;
+ struct net_device *br = dp->bridge_dev;
+
+ cmd = DSA_CMD_FORWARD;
+
+ /* When offloading forwarding for a bridge, inject FORWARD
+ * packets on behalf of a virtual switch device with an index
+ * past the physical switches.
+ */
+ tag_dev = dst->last_switch + 1 + dp->bridge_num;
+ tag_port = 0;
+
+ /* If we are offloading forwarding for a VLAN-unaware bridge,
+ * inject packets to hardware using the bridge's pvid, since
+ * that's where the packets ingressed from.
+ */
+ if (!br_vlan_enabled(br)) {
+ /* Safe because __dev_queue_xmit() runs under
+ * rcu_read_lock_bh()
+ */
+ err = br_vlan_get_pvid_rcu(br, &pvid);
+ if (err)
+ return NULL;
+ }
+ } else {
+ cmd = DSA_CMD_FROM_CPU;
+ tag_dev = dp->ds->index;
+ tag_port = dp->index;
+ }
if (skb->protocol == htons(ETH_P_8021Q)) {
if (extra) {
skb_push(skb, extra);
- memmove(skb->data, skb->data + extra, 2 * ETH_ALEN);
+ dsa_alloc_etype_header(skb, extra);
}
- /* Construct tagged FROM_CPU DSA tag from 802.1Q tag. */
- dsa_header = skb->data + 2 * ETH_ALEN + extra;
- dsa_header[0] = (DSA_CMD_FROM_CPU << 6) | 0x20 | dp->ds->index;
- dsa_header[1] = dp->index << 3;
+ /* Construct tagged DSA tag from 802.1Q tag. */
+ dsa_header = dsa_etype_header_pos_tx(skb) + extra;
+ dsa_header[0] = (cmd << 6) | 0x20 | tag_dev;
+ dsa_header[1] = tag_port << 3;
/* Move CFI field from byte 2 to byte 1. */
if (dsa_header[2] & 0x10) {
@@ -146,14 +181,15 @@ static struct sk_buff *dsa_xmit_ll(struct sk_buff *skb, struct net_device *dev,
}
} else {
skb_push(skb, DSA_HLEN + extra);
- memmove(skb->data, skb->data + DSA_HLEN + extra, 2 * ETH_ALEN);
-
- /* Construct untagged FROM_CPU DSA tag. */
- dsa_header = skb->data + 2 * ETH_ALEN + extra;
- dsa_header[0] = (DSA_CMD_FROM_CPU << 6) | dp->ds->index;
- dsa_header[1] = dp->index << 3;
- dsa_header[2] = 0x00;
- dsa_header[3] = 0x00;
+ dsa_alloc_etype_header(skb, DSA_HLEN + extra);
+
+ /* Construct untagged DSA tag. */
+ dsa_header = dsa_etype_header_pos_tx(skb) + extra;
+
+ dsa_header[0] = (cmd << 6) | tag_dev;
+ dsa_header[1] = tag_port << 3;
+ dsa_header[2] = pvid >> 8;
+ dsa_header[3] = pvid & 0xff;
}
return skb;
@@ -162,20 +198,18 @@ static struct sk_buff *dsa_xmit_ll(struct sk_buff *skb, struct net_device *dev,
static struct sk_buff *dsa_rcv_ll(struct sk_buff *skb, struct net_device *dev,
u8 extra)
{
+ bool trap = false, trunk = false;
int source_device, source_port;
- bool trunk = false;
enum dsa_code code;
enum dsa_cmd cmd;
u8 *dsa_header;
/* The ethertype field is part of the DSA header. */
- dsa_header = skb->data - 2;
+ dsa_header = dsa_etype_header_pos_rx(skb);
cmd = dsa_header[0] >> 6;
switch (cmd) {
case DSA_CMD_FORWARD:
- skb->offload_fwd_mark = 1;
-
trunk = !!(dsa_header[1] & 7);
break;
@@ -194,7 +228,6 @@ static struct sk_buff *dsa_rcv_ll(struct sk_buff *skb, struct net_device *dev,
* device (like a bridge) that forwarding has
* already been done by hardware.
*/
- skb->offload_fwd_mark = 1;
break;
case DSA_CODE_MGMT_TRAP:
case DSA_CODE_IGMP_MLD_TRAP:
@@ -202,6 +235,7 @@ static struct sk_buff *dsa_rcv_ll(struct sk_buff *skb, struct net_device *dev,
/* Traps have, by definition, not been
* forwarded by hardware, so don't mark them.
*/
+ trap = true;
break;
default:
/* Reserved code, this could be anything. Drop
@@ -235,6 +269,15 @@ static struct sk_buff *dsa_rcv_ll(struct sk_buff *skb, struct net_device *dev,
if (!skb->dev)
return NULL;
+ /* When using LAG offload, skb->dev is not a DSA slave interface,
+ * so we cannot call dsa_default_offload_fwd_mark and we need to
+ * special-case it.
+ */
+ if (trunk)
+ skb->offload_fwd_mark = true;
+ else if (!trap)
+ dsa_default_offload_fwd_mark(skb);
+
/* If the 'tagged' bit is set; convert the DSA tag to a 802.1Q
* tag, and delete the ethertype (extra) if applicable. If the
* 'tagged' bit is cleared; delete the DSA tag, and ethertype
@@ -269,14 +312,10 @@ static struct sk_buff *dsa_rcv_ll(struct sk_buff *skb, struct net_device *dev,
memcpy(dsa_header, new_header, DSA_HLEN);
if (extra)
- memmove(skb->data - ETH_HLEN,
- skb->data - ETH_HLEN - extra,
- 2 * ETH_ALEN);
+ dsa_strip_etype_header(skb, extra);
} else {
skb_pull_rcsum(skb, DSA_HLEN);
- memmove(skb->data - ETH_HLEN,
- skb->data - ETH_HLEN - DSA_HLEN - extra,
- 2 * ETH_ALEN);
+ dsa_strip_etype_header(skb, DSA_HLEN + extra);
}
return skb;
@@ -289,8 +328,7 @@ static struct sk_buff *dsa_xmit(struct sk_buff *skb, struct net_device *dev)
return dsa_xmit_ll(skb, dev, 0);
}
-static struct sk_buff *dsa_rcv(struct sk_buff *skb, struct net_device *dev,
- struct packet_type *pt)
+static struct sk_buff *dsa_rcv(struct sk_buff *skb, struct net_device *dev)
{
if (unlikely(!pskb_may_pull(skb, DSA_HLEN)))
return NULL;
@@ -303,7 +341,7 @@ static const struct dsa_device_ops dsa_netdev_ops = {
.proto = DSA_TAG_PROTO_DSA,
.xmit = dsa_xmit,
.rcv = dsa_rcv,
- .overhead = DSA_HLEN,
+ .needed_headroom = DSA_HLEN,
};
DSA_TAG_DRIVER(dsa_netdev_ops);
@@ -322,7 +360,7 @@ static struct sk_buff *edsa_xmit(struct sk_buff *skb, struct net_device *dev)
if (!skb)
return NULL;
- edsa_header = skb->data + 2 * ETH_ALEN;
+ edsa_header = dsa_etype_header_pos_tx(skb);
edsa_header[0] = (ETH_P_EDSA >> 8) & 0xff;
edsa_header[1] = ETH_P_EDSA & 0xff;
edsa_header[2] = 0x00;
@@ -330,8 +368,7 @@ static struct sk_buff *edsa_xmit(struct sk_buff *skb, struct net_device *dev)
return skb;
}
-static struct sk_buff *edsa_rcv(struct sk_buff *skb, struct net_device *dev,
- struct packet_type *pt)
+static struct sk_buff *edsa_rcv(struct sk_buff *skb, struct net_device *dev)
{
if (unlikely(!pskb_may_pull(skb, EDSA_HLEN)))
return NULL;
@@ -346,7 +383,7 @@ static const struct dsa_device_ops edsa_netdev_ops = {
.proto = DSA_TAG_PROTO_EDSA,
.xmit = edsa_xmit,
.rcv = edsa_rcv,
- .overhead = EDSA_HLEN,
+ .needed_headroom = EDSA_HLEN,
};
DSA_TAG_DRIVER(edsa_netdev_ops);
diff --git a/net/dsa/tag_gswip.c b/net/dsa/tag_gswip.c
index 2f5bd5e338ab..df7140984da3 100644
--- a/net/dsa/tag_gswip.c
+++ b/net/dsa/tag_gswip.c
@@ -75,8 +75,7 @@ static struct sk_buff *gswip_tag_xmit(struct sk_buff *skb,
}
static struct sk_buff *gswip_tag_rcv(struct sk_buff *skb,
- struct net_device *dev,
- struct packet_type *pt)
+ struct net_device *dev)
{
int port;
u8 *gswip_tag;
@@ -103,7 +102,7 @@ static const struct dsa_device_ops gswip_netdev_ops = {
.proto = DSA_TAG_PROTO_GSWIP,
.xmit = gswip_tag_xmit,
.rcv = gswip_tag_rcv,
- .overhead = GSWIP_RX_HEADER_LEN,
+ .needed_headroom = GSWIP_RX_HEADER_LEN,
};
MODULE_LICENSE("GPL");
diff --git a/net/dsa/tag_hellcreek.c b/net/dsa/tag_hellcreek.c
index a09805c8e1ab..f64b805303cd 100644
--- a/net/dsa/tag_hellcreek.c
+++ b/net/dsa/tag_hellcreek.c
@@ -29,8 +29,7 @@ static struct sk_buff *hellcreek_xmit(struct sk_buff *skb,
}
static struct sk_buff *hellcreek_rcv(struct sk_buff *skb,
- struct net_device *dev,
- struct packet_type *pt)
+ struct net_device *dev)
{
/* Tag decoding */
u8 *tag = skb_tail_pointer(skb) - HELLCREEK_TAG_LEN;
@@ -44,7 +43,7 @@ static struct sk_buff *hellcreek_rcv(struct sk_buff *skb,
pskb_trim_rcsum(skb, skb->len - HELLCREEK_TAG_LEN);
- skb->offload_fwd_mark = true;
+ dsa_default_offload_fwd_mark(skb);
return skb;
}
@@ -54,8 +53,7 @@ static const struct dsa_device_ops hellcreek_netdev_ops = {
.proto = DSA_TAG_PROTO_HELLCREEK,
.xmit = hellcreek_xmit,
.rcv = hellcreek_rcv,
- .overhead = HELLCREEK_TAG_LEN,
- .tail_tag = true,
+ .needed_tailroom = HELLCREEK_TAG_LEN,
};
MODULE_LICENSE("Dual MIT/GPL");
diff --git a/net/dsa/tag_ksz.c b/net/dsa/tag_ksz.c
index 4820dbcedfa2..fa1d60d13ad9 100644
--- a/net/dsa/tag_ksz.c
+++ b/net/dsa/tag_ksz.c
@@ -24,7 +24,7 @@ static struct sk_buff *ksz_common_rcv(struct sk_buff *skb,
pskb_trim_rcsum(skb, skb->len - len);
- skb->offload_fwd_mark = true;
+ dsa_default_offload_fwd_mark(skb);
return skb;
}
@@ -53,6 +53,9 @@ static struct sk_buff *ksz8795_xmit(struct sk_buff *skb, struct net_device *dev)
u8 *tag;
u8 *addr;
+ if (skb->ip_summed == CHECKSUM_PARTIAL && skb_checksum_help(skb))
+ return NULL;
+
/* Tag encoding */
tag = skb_put(skb, KSZ_INGRESS_TAG_LEN);
addr = skb_mac_header(skb);
@@ -64,8 +67,7 @@ static struct sk_buff *ksz8795_xmit(struct sk_buff *skb, struct net_device *dev)
return skb;
}
-static struct sk_buff *ksz8795_rcv(struct sk_buff *skb, struct net_device *dev,
- struct packet_type *pt)
+static struct sk_buff *ksz8795_rcv(struct sk_buff *skb, struct net_device *dev)
{
u8 *tag = skb_tail_pointer(skb) - KSZ_EGRESS_TAG_LEN;
@@ -77,8 +79,7 @@ static const struct dsa_device_ops ksz8795_netdev_ops = {
.proto = DSA_TAG_PROTO_KSZ8795,
.xmit = ksz8795_xmit,
.rcv = ksz8795_rcv,
- .overhead = KSZ_INGRESS_TAG_LEN,
- .tail_tag = true,
+ .needed_tailroom = KSZ_INGRESS_TAG_LEN,
};
DSA_TAG_DRIVER(ksz8795_netdev_ops);
@@ -115,6 +116,9 @@ static struct sk_buff *ksz9477_xmit(struct sk_buff *skb,
u8 *addr;
u16 val;
+ if (skb->ip_summed == CHECKSUM_PARTIAL && skb_checksum_help(skb))
+ return NULL;
+
/* Tag encoding */
tag = skb_put(skb, KSZ9477_INGRESS_TAG_LEN);
addr = skb_mac_header(skb);
@@ -129,8 +133,7 @@ static struct sk_buff *ksz9477_xmit(struct sk_buff *skb,
return skb;
}
-static struct sk_buff *ksz9477_rcv(struct sk_buff *skb, struct net_device *dev,
- struct packet_type *pt)
+static struct sk_buff *ksz9477_rcv(struct sk_buff *skb, struct net_device *dev)
{
/* Tag decoding */
u8 *tag = skb_tail_pointer(skb) - KSZ_EGRESS_TAG_LEN;
@@ -149,8 +152,7 @@ static const struct dsa_device_ops ksz9477_netdev_ops = {
.proto = DSA_TAG_PROTO_KSZ9477,
.xmit = ksz9477_xmit,
.rcv = ksz9477_rcv,
- .overhead = KSZ9477_INGRESS_TAG_LEN,
- .tail_tag = true,
+ .needed_tailroom = KSZ9477_INGRESS_TAG_LEN,
};
DSA_TAG_DRIVER(ksz9477_netdev_ops);
@@ -166,6 +168,9 @@ static struct sk_buff *ksz9893_xmit(struct sk_buff *skb,
u8 *addr;
u8 *tag;
+ if (skb->ip_summed == CHECKSUM_PARTIAL && skb_checksum_help(skb))
+ return NULL;
+
/* Tag encoding */
tag = skb_put(skb, KSZ_INGRESS_TAG_LEN);
addr = skb_mac_header(skb);
@@ -183,8 +188,7 @@ static const struct dsa_device_ops ksz9893_netdev_ops = {
.proto = DSA_TAG_PROTO_KSZ9893,
.xmit = ksz9893_xmit,
.rcv = ksz9477_rcv,
- .overhead = KSZ_INGRESS_TAG_LEN,
- .tail_tag = true,
+ .needed_tailroom = KSZ_INGRESS_TAG_LEN,
};
DSA_TAG_DRIVER(ksz9893_netdev_ops);
diff --git a/net/dsa/tag_lan9303.c b/net/dsa/tag_lan9303.c
index aa1318dccaf0..cb548188f813 100644
--- a/net/dsa/tag_lan9303.c
+++ b/net/dsa/tag_lan9303.c
@@ -62,9 +62,10 @@ static struct sk_buff *lan9303_xmit(struct sk_buff *skb, struct net_device *dev)
skb_push(skb, LAN9303_TAG_LEN);
/* make room between MACs and Ether-Type */
- memmove(skb->data, skb->data + LAN9303_TAG_LEN, 2 * ETH_ALEN);
+ dsa_alloc_etype_header(skb, LAN9303_TAG_LEN);
+
+ lan9303_tag = dsa_etype_header_pos_tx(skb);
- lan9303_tag = (__be16 *)(skb->data + 2 * ETH_ALEN);
tag = lan9303_xmit_use_arl(dp, skb->data) ?
LAN9303_TAG_TX_USE_ALR :
dp->index | LAN9303_TAG_TX_STP_OVERRIDE;
@@ -74,8 +75,7 @@ static struct sk_buff *lan9303_xmit(struct sk_buff *skb, struct net_device *dev)
return skb;
}
-static struct sk_buff *lan9303_rcv(struct sk_buff *skb, struct net_device *dev,
- struct packet_type *pt)
+static struct sk_buff *lan9303_rcv(struct sk_buff *skb, struct net_device *dev)
{
__be16 *lan9303_tag;
u16 lan9303_tag1;
@@ -87,13 +87,7 @@ static struct sk_buff *lan9303_rcv(struct sk_buff *skb, struct net_device *dev,
return NULL;
}
- /* '->data' points into the middle of our special VLAN tag information:
- *
- * ~ MAC src | 0x81 | 0x00 | 0xyy | 0xzz | ether type
- * ^
- * ->data
- */
- lan9303_tag = (__be16 *)(skb->data - 2);
+ lan9303_tag = dsa_etype_header_pos_rx(skb);
if (lan9303_tag[0] != htons(ETH_P_8021Q)) {
dev_warn_ratelimited(&dev->dev, "Dropping packet due to invalid VLAN marker\n");
@@ -113,9 +107,11 @@ static struct sk_buff *lan9303_rcv(struct sk_buff *skb, struct net_device *dev,
* and the current ethertype field.
*/
skb_pull_rcsum(skb, 2 + 2);
- memmove(skb->data - ETH_HLEN, skb->data - (ETH_HLEN + LAN9303_TAG_LEN),
- 2 * ETH_ALEN);
- skb->offload_fwd_mark = !(lan9303_tag1 & LAN9303_TAG_RX_TRAPPED_TO_CPU);
+
+ dsa_strip_etype_header(skb, LAN9303_TAG_LEN);
+
+ if (!(lan9303_tag1 & LAN9303_TAG_RX_TRAPPED_TO_CPU))
+ dsa_default_offload_fwd_mark(skb);
return skb;
}
@@ -125,7 +121,7 @@ static const struct dsa_device_ops lan9303_netdev_ops = {
.proto = DSA_TAG_PROTO_LAN9303,
.xmit = lan9303_xmit,
.rcv = lan9303_rcv,
- .overhead = LAN9303_TAG_LEN,
+ .needed_headroom = LAN9303_TAG_LEN,
};
MODULE_LICENSE("GPL");
diff --git a/net/dsa/tag_mtk.c b/net/dsa/tag_mtk.c
index f9b2966d1936..415d8ece242a 100644
--- a/net/dsa/tag_mtk.c
+++ b/net/dsa/tag_mtk.c
@@ -41,10 +41,10 @@ static struct sk_buff *mtk_tag_xmit(struct sk_buff *skb,
default:
xmit_tpid = MTK_HDR_XMIT_UNTAGGED;
skb_push(skb, MTK_HDR_LEN);
- memmove(skb->data, skb->data + MTK_HDR_LEN, 2 * ETH_ALEN);
+ dsa_alloc_etype_header(skb, MTK_HDR_LEN);
}
- mtk_tag = skb->data + 2 * ETH_ALEN;
+ mtk_tag = dsa_etype_header_pos_tx(skb);
/* Mark tag attribute on special tag insertion to notify hardware
* whether that's a combined special tag with 802.1Q header.
@@ -61,8 +61,7 @@ static struct sk_buff *mtk_tag_xmit(struct sk_buff *skb,
return skb;
}
-static struct sk_buff *mtk_tag_rcv(struct sk_buff *skb, struct net_device *dev,
- struct packet_type *pt)
+static struct sk_buff *mtk_tag_rcv(struct sk_buff *skb, struct net_device *dev)
{
u16 hdr;
int port;
@@ -71,19 +70,13 @@ static struct sk_buff *mtk_tag_rcv(struct sk_buff *skb, struct net_device *dev,
if (unlikely(!pskb_may_pull(skb, MTK_HDR_LEN)))
return NULL;
- /* The MTK header is added by the switch between src addr
- * and ethertype at this point, skb->data points to 2 bytes
- * after src addr so header should be 2 bytes right before.
- */
- phdr = (__be16 *)(skb->data - 2);
+ phdr = dsa_etype_header_pos_rx(skb);
hdr = ntohs(*phdr);
/* Remove MTK tag and recalculate checksum. */
skb_pull_rcsum(skb, MTK_HDR_LEN);
- memmove(skb->data - ETH_HLEN,
- skb->data - ETH_HLEN - MTK_HDR_LEN,
- 2 * ETH_ALEN);
+ dsa_strip_etype_header(skb, MTK_HDR_LEN);
/* Get source port information */
port = (hdr & MTK_HDR_RECV_SOURCE_PORT_MASK);
@@ -92,7 +85,7 @@ static struct sk_buff *mtk_tag_rcv(struct sk_buff *skb, struct net_device *dev,
if (!skb->dev)
return NULL;
- skb->offload_fwd_mark = 1;
+ dsa_default_offload_fwd_mark(skb);
return skb;
}
@@ -102,7 +95,7 @@ static const struct dsa_device_ops mtk_netdev_ops = {
.proto = DSA_TAG_PROTO_MTK,
.xmit = mtk_tag_xmit,
.rcv = mtk_tag_rcv,
- .overhead = MTK_HDR_LEN,
+ .needed_headroom = MTK_HDR_LEN,
};
MODULE_LICENSE("GPL");
diff --git a/net/dsa/tag_ocelot.c b/net/dsa/tag_ocelot.c
index 91f0fd1242cd..d37ab98e7fe1 100644
--- a/net/dsa/tag_ocelot.c
+++ b/net/dsa/tag_ocelot.c
@@ -55,8 +55,7 @@ static struct sk_buff *seville_xmit(struct sk_buff *skb,
}
static struct sk_buff *ocelot_rcv(struct sk_buff *skb,
- struct net_device *netdev,
- struct packet_type *pt)
+ struct net_device *netdev)
{
u64 src_port, qos_class;
u64 vlan_tci, tag_type;
@@ -104,7 +103,7 @@ static struct sk_buff *ocelot_rcv(struct sk_buff *skb,
*/
return NULL;
- skb->offload_fwd_mark = 1;
+ dsa_default_offload_fwd_mark(skb);
skb->priority = qos_class;
/* Ocelot switches copy frames unmodified to the CPU. However, it is
@@ -143,7 +142,7 @@ static const struct dsa_device_ops ocelot_netdev_ops = {
.proto = DSA_TAG_PROTO_OCELOT,
.xmit = ocelot_xmit,
.rcv = ocelot_rcv,
- .overhead = OCELOT_TOTAL_TAG_LEN,
+ .needed_headroom = OCELOT_TOTAL_TAG_LEN,
.promisc_on_master = true,
};
@@ -155,7 +154,7 @@ static const struct dsa_device_ops seville_netdev_ops = {
.proto = DSA_TAG_PROTO_SEVILLE,
.xmit = seville_xmit,
.rcv = ocelot_rcv,
- .overhead = OCELOT_TOTAL_TAG_LEN,
+ .needed_headroom = OCELOT_TOTAL_TAG_LEN,
.promisc_on_master = true,
};
diff --git a/net/dsa/tag_ocelot_8021q.c b/net/dsa/tag_ocelot_8021q.c
index 62a93303bd63..3038a257ba05 100644
--- a/net/dsa/tag_ocelot_8021q.c
+++ b/net/dsa/tag_ocelot_8021q.c
@@ -38,32 +38,17 @@ static struct sk_buff *ocelot_xmit(struct sk_buff *skb,
}
static struct sk_buff *ocelot_rcv(struct sk_buff *skb,
- struct net_device *netdev,
- struct packet_type *pt)
+ struct net_device *netdev)
{
- int src_port, switch_id, qos_class;
- u16 vid, tci;
+ int src_port, switch_id;
- skb_push_rcsum(skb, ETH_HLEN);
- if (skb_vlan_tag_present(skb)) {
- tci = skb_vlan_tag_get(skb);
- __vlan_hwaccel_clear_tag(skb);
- } else {
- __skb_vlan_pop(skb, &tci);
- }
- skb_pull_rcsum(skb, ETH_HLEN);
-
- vid = tci & VLAN_VID_MASK;
- src_port = dsa_8021q_rx_source_port(vid);
- switch_id = dsa_8021q_rx_switch_id(vid);
- qos_class = (tci & VLAN_PRIO_MASK) >> VLAN_PRIO_SHIFT;
+ dsa_8021q_rcv(skb, &src_port, &switch_id);
skb->dev = dsa_master_find_slave(netdev, switch_id, src_port);
if (!skb->dev)
return NULL;
- skb->offload_fwd_mark = 1;
- skb->priority = qos_class;
+ dsa_default_offload_fwd_mark(skb);
return skb;
}
@@ -73,7 +58,7 @@ static const struct dsa_device_ops ocelot_8021q_netdev_ops = {
.proto = DSA_TAG_PROTO_OCELOT_8021Q,
.xmit = ocelot_xmit,
.rcv = ocelot_rcv,
- .overhead = VLAN_HLEN,
+ .needed_headroom = VLAN_HLEN,
.promisc_on_master = true,
};
diff --git a/net/dsa/tag_qca.c b/net/dsa/tag_qca.c
index 88181b52f480..1ea9401b8ace 100644
--- a/net/dsa/tag_qca.c
+++ b/net/dsa/tag_qca.c
@@ -36,8 +36,8 @@ static struct sk_buff *qca_tag_xmit(struct sk_buff *skb, struct net_device *dev)
skb_push(skb, QCA_HDR_LEN);
- memmove(skb->data, skb->data + QCA_HDR_LEN, 2 * ETH_ALEN);
- phdr = (__be16 *)(skb->data + 2 * ETH_ALEN);
+ dsa_alloc_etype_header(skb, QCA_HDR_LEN);
+ phdr = dsa_etype_header_pos_tx(skb);
/* Set the version field, and set destination port information */
hdr = QCA_HDR_VERSION << QCA_HDR_XMIT_VERSION_S |
@@ -48,8 +48,7 @@ static struct sk_buff *qca_tag_xmit(struct sk_buff *skb, struct net_device *dev)
return skb;
}
-static struct sk_buff *qca_tag_rcv(struct sk_buff *skb, struct net_device *dev,
- struct packet_type *pt)
+static struct sk_buff *qca_tag_rcv(struct sk_buff *skb, struct net_device *dev)
{
u8 ver;
u16 hdr;
@@ -59,11 +58,7 @@ static struct sk_buff *qca_tag_rcv(struct sk_buff *skb, struct net_device *dev,
if (unlikely(!pskb_may_pull(skb, QCA_HDR_LEN)))
return NULL;
- /* The QCA header is added by the switch between src addr and Ethertype
- * At this point, skb->data points to ethertype so header should be
- * right before
- */
- phdr = (__be16 *)(skb->data - 2);
+ phdr = dsa_etype_header_pos_rx(skb);
hdr = ntohs(*phdr);
/* Make sure the version is correct */
@@ -73,8 +68,7 @@ static struct sk_buff *qca_tag_rcv(struct sk_buff *skb, struct net_device *dev,
/* Remove QCA tag and recalculate checksum */
skb_pull_rcsum(skb, QCA_HDR_LEN);
- memmove(skb->data - ETH_HLEN, skb->data - ETH_HLEN - QCA_HDR_LEN,
- ETH_HLEN - QCA_HDR_LEN);
+ dsa_strip_etype_header(skb, QCA_HDR_LEN);
/* Get source port information */
port = (hdr & QCA_HDR_RECV_SOURCE_PORT_MASK);
@@ -91,7 +85,7 @@ static const struct dsa_device_ops qca_netdev_ops = {
.proto = DSA_TAG_PROTO_QCA,
.xmit = qca_tag_xmit,
.rcv = qca_tag_rcv,
- .overhead = QCA_HDR_LEN,
+ .needed_headroom = QCA_HDR_LEN,
};
MODULE_LICENSE("GPL");
diff --git a/net/dsa/tag_rtl4_a.c b/net/dsa/tag_rtl4_a.c
index cf8ac316f4c7..f920487ae145 100644
--- a/net/dsa/tag_rtl4_a.c
+++ b/net/dsa/tag_rtl4_a.c
@@ -47,16 +47,17 @@ static struct sk_buff *rtl4a_tag_xmit(struct sk_buff *skb,
dp->index);
skb_push(skb, RTL4_A_HDR_LEN);
- memmove(skb->data, skb->data + RTL4_A_HDR_LEN, 2 * ETH_ALEN);
- tag = skb->data + 2 * ETH_ALEN;
+ dsa_alloc_etype_header(skb, RTL4_A_HDR_LEN);
+ tag = dsa_etype_header_pos_tx(skb);
/* Set Ethertype */
p = (__be16 *)tag;
*p = htons(RTL4_A_ETHERTYPE);
- out = (RTL4_A_PROTOCOL_RTL8366RB << 12) | (2 << 8);
- /* The lower bits is the port number */
- out |= (u8)dp->index;
+ out = (RTL4_A_PROTOCOL_RTL8366RB << RTL4_A_PROTOCOL_SHIFT) | (2 << 8);
+ /* The lower bits indicate the port number */
+ out |= BIT(dp->index);
+
p = (__be16 *)(tag + 2);
*p = htons(out);
@@ -64,8 +65,7 @@ static struct sk_buff *rtl4a_tag_xmit(struct sk_buff *skb,
}
static struct sk_buff *rtl4a_tag_rcv(struct sk_buff *skb,
- struct net_device *dev,
- struct packet_type *pt)
+ struct net_device *dev)
{
u16 protport;
__be16 *p;
@@ -77,12 +77,7 @@ static struct sk_buff *rtl4a_tag_rcv(struct sk_buff *skb,
if (unlikely(!pskb_may_pull(skb, RTL4_A_HDR_LEN)))
return NULL;
- /* The RTL4 header has its own custom Ethertype 0x8899 and that
- * starts right at the beginning of the packet, after the src
- * ethernet addr. Apparently skb->data always points 2 bytes in,
- * behind the Ethertype.
- */
- tag = skb->data - 2;
+ tag = dsa_etype_header_pos_rx(skb);
p = (__be16 *)tag;
etype = ntohs(*p);
if (etype != RTL4_A_ETHERTYPE) {
@@ -109,12 +104,9 @@ static struct sk_buff *rtl4a_tag_rcv(struct sk_buff *skb,
/* Remove RTL4 tag and recalculate checksum */
skb_pull_rcsum(skb, RTL4_A_HDR_LEN);
- /* Move ethernet DA and SA in front of the data */
- memmove(skb->data - ETH_HLEN,
- skb->data - ETH_HLEN - RTL4_A_HDR_LEN,
- 2 * ETH_ALEN);
+ dsa_strip_etype_header(skb, RTL4_A_HDR_LEN);
- skb->offload_fwd_mark = 1;
+ dsa_default_offload_fwd_mark(skb);
return skb;
}
@@ -124,7 +116,7 @@ static const struct dsa_device_ops rtl4a_netdev_ops = {
.proto = DSA_TAG_PROTO_RTL4_A,
.xmit = rtl4a_tag_xmit,
.rcv = rtl4a_tag_rcv,
- .overhead = RTL4_A_HDR_LEN,
+ .needed_headroom = RTL4_A_HDR_LEN,
};
module_dsa_tag_driver(rtl4a_netdev_ops);
diff --git a/net/dsa/tag_sja1105.c b/net/dsa/tag_sja1105.c
index 50496013cdb7..c054f48541c8 100644
--- a/net/dsa/tag_sja1105.c
+++ b/net/dsa/tag_sja1105.c
@@ -7,6 +7,52 @@
#include <linux/packing.h>
#include "dsa_priv.h"
+/* Is this a TX or an RX header? */
+#define SJA1110_HEADER_HOST_TO_SWITCH BIT(15)
+
+/* RX header */
+#define SJA1110_RX_HEADER_IS_METADATA BIT(14)
+#define SJA1110_RX_HEADER_HOST_ONLY BIT(13)
+#define SJA1110_RX_HEADER_HAS_TRAILER BIT(12)
+
+/* Trap-to-host format (no trailer present) */
+#define SJA1110_RX_HEADER_SRC_PORT(x) (((x) & GENMASK(7, 4)) >> 4)
+#define SJA1110_RX_HEADER_SWITCH_ID(x) ((x) & GENMASK(3, 0))
+
+/* Timestamp format (trailer present) */
+#define SJA1110_RX_HEADER_TRAILER_POS(x) ((x) & GENMASK(11, 0))
+
+#define SJA1110_RX_TRAILER_SWITCH_ID(x) (((x) & GENMASK(7, 4)) >> 4)
+#define SJA1110_RX_TRAILER_SRC_PORT(x) ((x) & GENMASK(3, 0))
+
+/* Meta frame format (for 2-step TX timestamps) */
+#define SJA1110_RX_HEADER_N_TS(x) (((x) & GENMASK(8, 4)) >> 4)
+
+/* TX header */
+#define SJA1110_TX_HEADER_UPDATE_TC BIT(14)
+#define SJA1110_TX_HEADER_TAKE_TS BIT(13)
+#define SJA1110_TX_HEADER_TAKE_TS_CASC BIT(12)
+#define SJA1110_TX_HEADER_HAS_TRAILER BIT(11)
+
+/* Only valid if SJA1110_TX_HEADER_HAS_TRAILER is false */
+#define SJA1110_TX_HEADER_PRIO(x) (((x) << 7) & GENMASK(10, 7))
+#define SJA1110_TX_HEADER_TSTAMP_ID(x) ((x) & GENMASK(7, 0))
+
+/* Only valid if SJA1110_TX_HEADER_HAS_TRAILER is true */
+#define SJA1110_TX_HEADER_TRAILER_POS(x) ((x) & GENMASK(10, 0))
+
+#define SJA1110_TX_TRAILER_TSTAMP_ID(x) (((x) << 24) & GENMASK(31, 24))
+#define SJA1110_TX_TRAILER_PRIO(x) (((x) << 21) & GENMASK(23, 21))
+#define SJA1110_TX_TRAILER_SWITCHID(x) (((x) << 12) & GENMASK(15, 12))
+#define SJA1110_TX_TRAILER_DESTPORTS(x) (((x) << 1) & GENMASK(11, 1))
+
+#define SJA1110_META_TSTAMP_SIZE 10
+
+#define SJA1110_HEADER_LEN 4
+#define SJA1110_RX_TRAILER_LEN 13
+#define SJA1110_TX_TRAILER_LEN 4
+#define SJA1110_MAX_PADDING_LEN 15
+
/* Similar to is_link_local_ether_addr(hdr->h_dest) but also covers PTP */
static inline bool sja1105_is_link_local(const struct sk_buff *skb)
{
@@ -69,56 +115,117 @@ static inline bool sja1105_is_meta_frame(const struct sk_buff *skb)
return true;
}
-static bool sja1105_can_use_vlan_as_tags(const struct sk_buff *skb)
+/* Calls sja1105_port_deferred_xmit in sja1105_main.c */
+static struct sk_buff *sja1105_defer_xmit(struct dsa_port *dp,
+ struct sk_buff *skb)
{
- struct vlan_ethhdr *hdr = vlan_eth_hdr(skb);
- u16 vlan_tci;
+ struct sja1105_port *sp = dp->priv;
- if (hdr->h_vlan_proto == htons(ETH_P_SJA1105))
- return true;
+ if (!dsa_port_is_sja1105(dp))
+ return skb;
- if (hdr->h_vlan_proto != htons(ETH_P_8021Q) &&
- !skb_vlan_tag_present(skb))
- return false;
-
- if (skb_vlan_tag_present(skb))
- vlan_tci = skb_vlan_tag_get(skb);
- else
- vlan_tci = ntohs(hdr->h_vlan_TCI);
+ /* Increase refcount so the kfree_skb in dsa_slave_xmit
+ * won't really free the packet.
+ */
+ skb_queue_tail(&sp->xmit_queue, skb_get(skb));
+ kthread_queue_work(sp->xmit_worker, &sp->xmit_work);
- return vid_is_dsa_8021q(vlan_tci & VLAN_VID_MASK);
+ return NULL;
}
-/* This is the first time the tagger sees the frame on RX.
- * Figure out if we can decode it.
+/* Send VLAN tags with a TPID that blends in with whatever VLAN protocol a
+ * bridge spanning ports of this switch might have.
*/
-static bool sja1105_filter(const struct sk_buff *skb, struct net_device *dev)
+static u16 sja1105_xmit_tpid(struct dsa_port *dp)
{
- if (sja1105_can_use_vlan_as_tags(skb))
- return true;
- if (sja1105_is_link_local(skb))
- return true;
- if (sja1105_is_meta_frame(skb))
- return true;
- return false;
+ struct dsa_switch *ds = dp->ds;
+ struct dsa_port *other_dp;
+ u16 proto;
+
+ /* Since VLAN awareness is global, then if this port is VLAN-unaware,
+ * all ports are. Use the VLAN-unaware TPID used for tag_8021q.
+ */
+ if (!dsa_port_is_vlan_filtering(dp))
+ return ETH_P_SJA1105;
+
+ /* Port is VLAN-aware, so there is a bridge somewhere (a single one,
+ * we're sure about that). It may not be on this port though, so we
+ * need to find it.
+ */
+ list_for_each_entry(other_dp, &ds->dst->ports, list) {
+ if (other_dp->ds != ds)
+ continue;
+
+ if (!other_dp->bridge_dev)
+ continue;
+
+ /* Error is returned only if CONFIG_BRIDGE_VLAN_FILTERING,
+ * which seems pointless to handle, as our port cannot become
+ * VLAN-aware in that case.
+ */
+ br_vlan_get_proto(other_dp->bridge_dev, &proto);
+
+ return proto;
+ }
+
+ WARN_ONCE(1, "Port is VLAN-aware but cannot find associated bridge!\n");
+
+ return ETH_P_SJA1105;
}
-/* Calls sja1105_port_deferred_xmit in sja1105_main.c */
-static struct sk_buff *sja1105_defer_xmit(struct sja1105_port *sp,
- struct sk_buff *skb)
+static struct sk_buff *sja1105_imprecise_xmit(struct sk_buff *skb,
+ struct net_device *netdev)
{
- /* Increase refcount so the kfree_skb in dsa_slave_xmit
- * won't really free the packet.
+ struct dsa_port *dp = dsa_slave_to_port(netdev);
+ struct net_device *br = dp->bridge_dev;
+ u16 tx_vid;
+
+ /* If the port is under a VLAN-aware bridge, just slide the
+ * VLAN-tagged packet into the FDB and hope for the best.
+ * This works because we support a single VLAN-aware bridge
+ * across the entire dst, and its VLANs cannot be shared with
+ * any standalone port.
*/
- skb_queue_tail(&sp->xmit_queue, skb_get(skb));
- kthread_queue_work(sp->xmit_worker, &sp->xmit_work);
+ if (br_vlan_enabled(br))
+ return skb;
- return NULL;
+ /* If the port is under a VLAN-unaware bridge, use an imprecise
+ * TX VLAN that targets the bridge's entire broadcast domain,
+ * instead of just the specific port.
+ */
+ tx_vid = dsa_8021q_bridge_tx_fwd_offload_vid(dp->bridge_num);
+
+ return dsa_8021q_xmit(skb, netdev, sja1105_xmit_tpid(dp), tx_vid);
}
-static u16 sja1105_xmit_tpid(struct sja1105_port *sp)
+/* Transform untagged control packets into pvid-tagged control packets so that
+ * all packets sent by this tagger are VLAN-tagged and we can configure the
+ * switch to drop untagged packets coming from the DSA master.
+ */
+static struct sk_buff *sja1105_pvid_tag_control_pkt(struct dsa_port *dp,
+ struct sk_buff *skb, u8 pcp)
{
- return sp->xmit_tpid;
+ __be16 xmit_tpid = htons(sja1105_xmit_tpid(dp));
+ struct vlan_ethhdr *hdr;
+
+ /* If VLAN tag is in hwaccel area, move it to the payload
+ * to deal with both cases uniformly and to ensure that
+ * the VLANs are added in the right order.
+ */
+ if (unlikely(skb_vlan_tag_present(skb))) {
+ skb = __vlan_hwaccel_push_inside(skb);
+ if (!skb)
+ return NULL;
+ }
+
+ hdr = (struct vlan_ethhdr *)skb_mac_header(skb);
+
+ /* If skb is already VLAN-tagged, leave that VLAN ID in place */
+ if (hdr->h_vlan_proto == xmit_tpid)
+ return skb;
+
+ return vlan_insert_tag(skb, xmit_tpid, (pcp << VLAN_PRIO_SHIFT) |
+ SJA1105_DEFAULT_VLAN);
}
static struct sk_buff *sja1105_xmit(struct sk_buff *skb,
@@ -129,17 +236,78 @@ static struct sk_buff *sja1105_xmit(struct sk_buff *skb,
u16 queue_mapping = skb_get_queue_mapping(skb);
u8 pcp = netdev_txq_to_tc(netdev, queue_mapping);
+ if (skb->offload_fwd_mark)
+ return sja1105_imprecise_xmit(skb, netdev);
+
/* Transmitting management traffic does not rely upon switch tagging,
* but instead SPI-installed management routes. Part 2 of this
* is the .port_deferred_xmit driver callback.
*/
- if (unlikely(sja1105_is_link_local(skb)))
- return sja1105_defer_xmit(dp->priv, skb);
+ if (unlikely(sja1105_is_link_local(skb))) {
+ skb = sja1105_pvid_tag_control_pkt(dp, skb, pcp);
+ if (!skb)
+ return NULL;
- return dsa_8021q_xmit(skb, netdev, sja1105_xmit_tpid(dp->priv),
+ return sja1105_defer_xmit(dp, skb);
+ }
+
+ return dsa_8021q_xmit(skb, netdev, sja1105_xmit_tpid(dp),
((pcp << VLAN_PRIO_SHIFT) | tx_vid));
}
+static struct sk_buff *sja1110_xmit(struct sk_buff *skb,
+ struct net_device *netdev)
+{
+ struct sk_buff *clone = SJA1105_SKB_CB(skb)->clone;
+ struct dsa_port *dp = dsa_slave_to_port(netdev);
+ u16 tx_vid = dsa_8021q_tx_vid(dp->ds, dp->index);
+ u16 queue_mapping = skb_get_queue_mapping(skb);
+ u8 pcp = netdev_txq_to_tc(netdev, queue_mapping);
+ __be32 *tx_trailer;
+ __be16 *tx_header;
+ int trailer_pos;
+
+ if (skb->offload_fwd_mark)
+ return sja1105_imprecise_xmit(skb, netdev);
+
+ /* Transmitting control packets is done using in-band control
+ * extensions, while data packets are transmitted using
+ * tag_8021q TX VLANs.
+ */
+ if (likely(!sja1105_is_link_local(skb)))
+ return dsa_8021q_xmit(skb, netdev, sja1105_xmit_tpid(dp),
+ ((pcp << VLAN_PRIO_SHIFT) | tx_vid));
+
+ skb = sja1105_pvid_tag_control_pkt(dp, skb, pcp);
+ if (!skb)
+ return NULL;
+
+ skb_push(skb, SJA1110_HEADER_LEN);
+
+ dsa_alloc_etype_header(skb, SJA1110_HEADER_LEN);
+
+ trailer_pos = skb->len;
+
+ tx_header = dsa_etype_header_pos_tx(skb);
+ tx_trailer = skb_put(skb, SJA1110_TX_TRAILER_LEN);
+
+ tx_header[0] = htons(ETH_P_SJA1110);
+ tx_header[1] = htons(SJA1110_HEADER_HOST_TO_SWITCH |
+ SJA1110_TX_HEADER_HAS_TRAILER |
+ SJA1110_TX_HEADER_TRAILER_POS(trailer_pos));
+ *tx_trailer = cpu_to_be32(SJA1110_TX_TRAILER_PRIO(pcp) |
+ SJA1110_TX_TRAILER_SWITCHID(dp->ds->index) |
+ SJA1110_TX_TRAILER_DESTPORTS(BIT(dp->index)));
+ if (clone) {
+ u8 ts_id = SJA1105_SKB_CB(clone)->ts_id;
+
+ tx_header[1] |= htons(SJA1110_TX_HEADER_TAKE_TS);
+ *tx_trailer |= cpu_to_be32(SJA1110_TX_TRAILER_TSTAMP_ID(ts_id));
+ }
+
+ return skb;
+}
+
static void sja1105_transfer_meta(struct sk_buff *skb,
const struct sja1105_meta *meta)
{
@@ -147,7 +315,7 @@ static void sja1105_transfer_meta(struct sk_buff *skb,
hdr->h_dest[3] = meta->dmac_byte_3;
hdr->h_dest[4] = meta->dmac_byte_4;
- SJA1105_SKB_CB(skb)->meta_tstamp = meta->tstamp;
+ SJA1105_SKB_CB(skb)->tstamp = meta->tstamp;
}
/* This is a simple state machine which follows the hardware mechanism of
@@ -176,16 +344,16 @@ static struct sk_buff
bool is_link_local,
bool is_meta)
{
- struct sja1105_port *sp;
- struct dsa_port *dp;
-
- dp = dsa_slave_to_port(skb->dev);
- sp = dp->priv;
-
/* Step 1: A timestampable frame was received.
* Buffer it until we get its meta frame.
*/
if (is_link_local) {
+ struct dsa_port *dp = dsa_slave_to_port(skb->dev);
+ struct sja1105_port *sp = dp->priv;
+
+ if (unlikely(!dsa_port_is_sja1105(dp)))
+ return skb;
+
if (!test_bit(SJA1105_HWTS_RX_EN, &sp->data->state))
/* Do normal processing. */
return skb;
@@ -218,8 +386,13 @@ static struct sk_buff
* frame, which serves no further purpose).
*/
} else if (is_meta) {
+ struct dsa_port *dp = dsa_slave_to_port(skb->dev);
+ struct sja1105_port *sp = dp->priv;
struct sk_buff *stampable_skb;
+ if (unlikely(!dsa_port_is_sja1105(dp)))
+ return skb;
+
/* Drop the meta frame if we're not in the right state
* to process it.
*/
@@ -261,60 +434,58 @@ static struct sk_buff
return skb;
}
-static void sja1105_decode_subvlan(struct sk_buff *skb, u16 subvlan)
+static bool sja1105_skb_has_tag_8021q(const struct sk_buff *skb)
{
- struct dsa_port *dp = dsa_slave_to_port(skb->dev);
- struct sja1105_port *sp = dp->priv;
- u16 vid = sp->subvlan_map[subvlan];
+ u16 tpid = ntohs(eth_hdr(skb)->h_proto);
+
+ return tpid == ETH_P_SJA1105 || tpid == ETH_P_8021Q ||
+ skb_vlan_tag_present(skb);
+}
+
+static bool sja1110_skb_has_inband_control_extension(const struct sk_buff *skb)
+{
+ return ntohs(eth_hdr(skb)->h_proto) == ETH_P_SJA1110;
+}
+
+/* If the VLAN in the packet is a tag_8021q one, set @source_port and
+ * @switch_id and strip the header. Otherwise set @vid and keep it in the
+ * packet.
+ */
+static void sja1105_vlan_rcv(struct sk_buff *skb, int *source_port,
+ int *switch_id, u16 *vid)
+{
+ struct vlan_ethhdr *hdr = (struct vlan_ethhdr *)skb_mac_header(skb);
u16 vlan_tci;
- if (vid == VLAN_N_VID)
- return;
+ if (skb_vlan_tag_present(skb))
+ vlan_tci = skb_vlan_tag_get(skb);
+ else
+ vlan_tci = ntohs(hdr->h_vlan_TCI);
+
+ if (vid_is_dsa_8021q_rxvlan(vlan_tci & VLAN_VID_MASK))
+ return dsa_8021q_rcv(skb, source_port, switch_id);
- vlan_tci = (skb->priority << VLAN_PRIO_SHIFT) | vid;
- __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), vlan_tci);
+ /* Try our best with imprecise RX */
+ *vid = vlan_tci & VLAN_VID_MASK;
}
static struct sk_buff *sja1105_rcv(struct sk_buff *skb,
- struct net_device *netdev,
- struct packet_type *pt)
+ struct net_device *netdev)
{
+ int source_port = -1, switch_id = -1;
struct sja1105_meta meta = {0};
- int source_port, switch_id;
struct ethhdr *hdr;
- u16 tpid, vid, tci;
bool is_link_local;
- u16 subvlan = 0;
- bool is_tagged;
bool is_meta;
+ u16 vid;
hdr = eth_hdr(skb);
- tpid = ntohs(hdr->h_proto);
- is_tagged = (tpid == ETH_P_SJA1105 || tpid == ETH_P_8021Q ||
- skb_vlan_tag_present(skb));
is_link_local = sja1105_is_link_local(skb);
is_meta = sja1105_is_meta_frame(skb);
- skb->offload_fwd_mark = 1;
-
- if (is_tagged) {
+ if (sja1105_skb_has_tag_8021q(skb)) {
/* Normal traffic path. */
- skb_push_rcsum(skb, ETH_HLEN);
- if (skb_vlan_tag_present(skb)) {
- tci = skb_vlan_tag_get(skb);
- __vlan_hwaccel_clear_tag(skb);
- } else {
- __skb_vlan_pop(skb, &tci);
- }
- skb_pull_rcsum(skb, ETH_HLEN);
- skb_reset_network_header(skb);
- skb_reset_transport_header(skb);
-
- vid = tci & VLAN_VID_MASK;
- source_port = dsa_8021q_rx_source_port(vid);
- switch_id = dsa_8021q_rx_switch_id(vid);
- skb->priority = (tci & VLAN_PRIO_MASK) >> VLAN_PRIO_SHIFT;
- subvlan = dsa_8021q_rx_subvlan(vid);
+ sja1105_vlan_rcv(skb, &source_port, &switch_id, &vid);
} else if (is_link_local) {
/* Management traffic path. Switch embeds the switch ID and
* port ID into bytes of the destination MAC, courtesy of
@@ -333,19 +504,157 @@ static struct sk_buff *sja1105_rcv(struct sk_buff *skb,
return NULL;
}
- skb->dev = dsa_master_find_slave(netdev, switch_id, source_port);
+ if (source_port == -1 || switch_id == -1)
+ skb->dev = dsa_find_designated_bridge_port_by_vid(netdev, vid);
+ else
+ skb->dev = dsa_master_find_slave(netdev, switch_id, source_port);
if (!skb->dev) {
netdev_warn(netdev, "Couldn't decode source port\n");
return NULL;
}
- if (subvlan)
- sja1105_decode_subvlan(skb, subvlan);
+ if (!is_link_local)
+ dsa_default_offload_fwd_mark(skb);
return sja1105_rcv_meta_state_machine(skb, &meta, is_link_local,
is_meta);
}
+static struct sk_buff *sja1110_rcv_meta(struct sk_buff *skb, u16 rx_header)
+{
+ u8 *buf = dsa_etype_header_pos_rx(skb) + SJA1110_HEADER_LEN;
+ int switch_id = SJA1110_RX_HEADER_SWITCH_ID(rx_header);
+ int n_ts = SJA1110_RX_HEADER_N_TS(rx_header);
+ struct net_device *master = skb->dev;
+ struct dsa_port *cpu_dp;
+ struct dsa_switch *ds;
+ int i;
+
+ cpu_dp = master->dsa_ptr;
+ ds = dsa_switch_find(cpu_dp->dst->index, switch_id);
+ if (!ds) {
+ net_err_ratelimited("%s: cannot find switch id %d\n",
+ master->name, switch_id);
+ return NULL;
+ }
+
+ for (i = 0; i <= n_ts; i++) {
+ u8 ts_id, source_port, dir;
+ u64 tstamp;
+
+ ts_id = buf[0];
+ source_port = (buf[1] & GENMASK(7, 4)) >> 4;
+ dir = (buf[1] & BIT(3)) >> 3;
+ tstamp = be64_to_cpu(*(__be64 *)(buf + 2));
+
+ sja1110_process_meta_tstamp(ds, source_port, ts_id, dir,
+ tstamp);
+
+ buf += SJA1110_META_TSTAMP_SIZE;
+ }
+
+ /* Discard the meta frame, we've consumed the timestamps it contained */
+ return NULL;
+}
+
+static struct sk_buff *sja1110_rcv_inband_control_extension(struct sk_buff *skb,
+ int *source_port,
+ int *switch_id,
+ bool *host_only)
+{
+ u16 rx_header;
+
+ if (unlikely(!pskb_may_pull(skb, SJA1110_HEADER_LEN)))
+ return NULL;
+
+ /* skb->data points to skb_mac_header(skb) + ETH_HLEN, which is exactly
+ * what we need because the caller has checked the EtherType (which is
+ * located 2 bytes back) and we just need a pointer to the header that
+ * comes afterwards.
+ */
+ rx_header = ntohs(*(__be16 *)skb->data);
+
+ if (rx_header & SJA1110_RX_HEADER_HOST_ONLY)
+ *host_only = true;
+
+ if (rx_header & SJA1110_RX_HEADER_IS_METADATA)
+ return sja1110_rcv_meta(skb, rx_header);
+
+ /* Timestamp frame, we have a trailer */
+ if (rx_header & SJA1110_RX_HEADER_HAS_TRAILER) {
+ int start_of_padding = SJA1110_RX_HEADER_TRAILER_POS(rx_header);
+ u8 *rx_trailer = skb_tail_pointer(skb) - SJA1110_RX_TRAILER_LEN;
+ u64 *tstamp = &SJA1105_SKB_CB(skb)->tstamp;
+ u8 last_byte = rx_trailer[12];
+
+ /* The timestamp is unaligned, so we need to use packing()
+ * to get it
+ */
+ packing(rx_trailer, tstamp, 63, 0, 8, UNPACK, 0);
+
+ *source_port = SJA1110_RX_TRAILER_SRC_PORT(last_byte);
+ *switch_id = SJA1110_RX_TRAILER_SWITCH_ID(last_byte);
+
+ /* skb->len counts from skb->data, while start_of_padding
+ * counts from the destination MAC address. Right now skb->data
+ * is still as set by the DSA master, so to trim away the
+ * padding and trailer we need to account for the fact that
+ * skb->data points to skb_mac_header(skb) + ETH_HLEN.
+ */
+ pskb_trim_rcsum(skb, start_of_padding - ETH_HLEN);
+ /* Trap-to-host frame, no timestamp trailer */
+ } else {
+ *source_port = SJA1110_RX_HEADER_SRC_PORT(rx_header);
+ *switch_id = SJA1110_RX_HEADER_SWITCH_ID(rx_header);
+ }
+
+ /* Advance skb->data past the DSA header */
+ skb_pull_rcsum(skb, SJA1110_HEADER_LEN);
+
+ dsa_strip_etype_header(skb, SJA1110_HEADER_LEN);
+
+ /* With skb->data in its final place, update the MAC header
+ * so that eth_hdr() continues to works properly.
+ */
+ skb_set_mac_header(skb, -ETH_HLEN);
+
+ return skb;
+}
+
+static struct sk_buff *sja1110_rcv(struct sk_buff *skb,
+ struct net_device *netdev)
+{
+ int source_port = -1, switch_id = -1;
+ bool host_only = false;
+ u16 vid = 0;
+
+ if (sja1110_skb_has_inband_control_extension(skb)) {
+ skb = sja1110_rcv_inband_control_extension(skb, &source_port,
+ &switch_id,
+ &host_only);
+ if (!skb)
+ return NULL;
+ }
+
+ /* Packets with in-band control extensions might still have RX VLANs */
+ if (likely(sja1105_skb_has_tag_8021q(skb)))
+ sja1105_vlan_rcv(skb, &source_port, &switch_id, &vid);
+
+ if (source_port == -1 || switch_id == -1)
+ skb->dev = dsa_find_designated_bridge_port_by_vid(netdev, vid);
+ else
+ skb->dev = dsa_master_find_slave(netdev, switch_id, source_port);
+ if (!skb->dev) {
+ netdev_warn(netdev, "Couldn't decode source port\n");
+ return NULL;
+ }
+
+ if (!host_only)
+ dsa_default_offload_fwd_mark(skb);
+
+ return skb;
+}
+
static void sja1105_flow_dissect(const struct sk_buff *skb, __be16 *proto,
int *offset)
{
@@ -356,18 +665,51 @@ static void sja1105_flow_dissect(const struct sk_buff *skb, __be16 *proto,
dsa_tag_generic_flow_dissect(skb, proto, offset);
}
+static void sja1110_flow_dissect(const struct sk_buff *skb, __be16 *proto,
+ int *offset)
+{
+ /* Management frames have 2 DSA tags on RX, so the needed_headroom we
+ * declared is fine for the generic dissector adjustment procedure.
+ */
+ if (unlikely(sja1105_is_link_local(skb)))
+ return dsa_tag_generic_flow_dissect(skb, proto, offset);
+
+ /* For the rest, there is a single DSA tag, the tag_8021q one */
+ *offset = VLAN_HLEN;
+ *proto = ((__be16 *)skb->data)[(VLAN_HLEN / 2) - 1];
+}
+
static const struct dsa_device_ops sja1105_netdev_ops = {
.name = "sja1105",
.proto = DSA_TAG_PROTO_SJA1105,
.xmit = sja1105_xmit,
.rcv = sja1105_rcv,
- .filter = sja1105_filter,
- .overhead = VLAN_HLEN,
+ .needed_headroom = VLAN_HLEN,
.flow_dissect = sja1105_flow_dissect,
.promisc_on_master = true,
};
-MODULE_LICENSE("GPL v2");
+DSA_TAG_DRIVER(sja1105_netdev_ops);
MODULE_ALIAS_DSA_TAG_DRIVER(DSA_TAG_PROTO_SJA1105);
-module_dsa_tag_driver(sja1105_netdev_ops);
+static const struct dsa_device_ops sja1110_netdev_ops = {
+ .name = "sja1110",
+ .proto = DSA_TAG_PROTO_SJA1110,
+ .xmit = sja1110_xmit,
+ .rcv = sja1110_rcv,
+ .flow_dissect = sja1110_flow_dissect,
+ .needed_headroom = SJA1110_HEADER_LEN + VLAN_HLEN,
+ .needed_tailroom = SJA1110_RX_TRAILER_LEN + SJA1110_MAX_PADDING_LEN,
+};
+
+DSA_TAG_DRIVER(sja1110_netdev_ops);
+MODULE_ALIAS_DSA_TAG_DRIVER(DSA_TAG_PROTO_SJA1110);
+
+static struct dsa_tag_driver *sja1105_tag_driver_array[] = {
+ &DSA_TAG_DRIVER_NAME(sja1105_netdev_ops),
+ &DSA_TAG_DRIVER_NAME(sja1110_netdev_ops),
+};
+
+module_dsa_tag_drivers(sja1105_tag_driver_array);
+
+MODULE_LICENSE("GPL v2");
diff --git a/net/dsa/tag_trailer.c b/net/dsa/tag_trailer.c
index 5b97ede56a0f..5749ba85c2b8 100644
--- a/net/dsa/tag_trailer.c
+++ b/net/dsa/tag_trailer.c
@@ -24,8 +24,7 @@ static struct sk_buff *trailer_xmit(struct sk_buff *skb, struct net_device *dev)
return skb;
}
-static struct sk_buff *trailer_rcv(struct sk_buff *skb, struct net_device *dev,
- struct packet_type *pt)
+static struct sk_buff *trailer_rcv(struct sk_buff *skb, struct net_device *dev)
{
u8 *trailer;
int source_port;
@@ -55,8 +54,7 @@ static const struct dsa_device_ops trailer_netdev_ops = {
.proto = DSA_TAG_PROTO_TRAILER,
.xmit = trailer_xmit,
.rcv = trailer_rcv,
- .overhead = 4,
- .tail_tag = true,
+ .needed_tailroom = 4,
};
MODULE_LICENSE("GPL");
diff --git a/net/dsa/tag_xrs700x.c b/net/dsa/tag_xrs700x.c
index 858cdf9d2913..ff442b8af636 100644
--- a/net/dsa/tag_xrs700x.c
+++ b/net/dsa/tag_xrs700x.c
@@ -25,8 +25,7 @@ static struct sk_buff *xrs700x_xmit(struct sk_buff *skb, struct net_device *dev)
return skb;
}
-static struct sk_buff *xrs700x_rcv(struct sk_buff *skb, struct net_device *dev,
- struct packet_type *pt)
+static struct sk_buff *xrs700x_rcv(struct sk_buff *skb, struct net_device *dev)
{
int source_port;
u8 *trailer;
@@ -46,7 +45,7 @@ static struct sk_buff *xrs700x_rcv(struct sk_buff *skb, struct net_device *dev,
return NULL;
/* Frame is forwarded by hardware, don't forward in software. */
- skb->offload_fwd_mark = 1;
+ dsa_default_offload_fwd_mark(skb);
return skb;
}
@@ -56,8 +55,7 @@ static const struct dsa_device_ops xrs700x_netdev_ops = {
.proto = DSA_TAG_PROTO_XRS700X,
.xmit = xrs700x_xmit,
.rcv = xrs700x_rcv,
- .overhead = 1,
- .tail_tag = true,
+ .needed_tailroom = 1,
};
MODULE_LICENSE("GPL");
diff --git a/net/ethernet/eth.c b/net/ethernet/eth.c
index 9cce612e8976..73fce9467467 100644
--- a/net/ethernet/eth.c
+++ b/net/ethernet/eth.c
@@ -62,8 +62,6 @@
#include <linux/uaccess.h>
#include <net/pkt_sched.h>
-__setup("ether=", netdev_boot_setup);
-
/**
* eth_header - create the Ethernet header
* @skb: buffer to alter
@@ -182,12 +180,8 @@ __be16 eth_type_trans(struct sk_buff *skb, struct net_device *dev)
* at all, so we check here whether one of those tagging
* variants has been configured on the receiving interface,
* and if so, set skb->protocol without looking at the packet.
- * The DSA tagging protocol may be able to decode some but not all
- * traffic (for example only for management). In that case give it the
- * option to filter the packets from which it can decode source port
- * information.
*/
- if (unlikely(netdev_uses_dsa(dev)) && dsa_can_decode(skb, dev))
+ if (unlikely(netdev_uses_dsa(dev)))
return htons(ETH_P_XDSA);
if (likely(eth_proto_is_802_3(eth->h_proto)))
diff --git a/net/ethtool/Makefile b/net/ethtool/Makefile
index 723c9a8a8cdf..0a19470efbfb 100644
--- a/net/ethtool/Makefile
+++ b/net/ethtool/Makefile
@@ -7,4 +7,4 @@ obj-$(CONFIG_ETHTOOL_NETLINK) += ethtool_nl.o
ethtool_nl-y := netlink.o bitset.o strset.o linkinfo.o linkmodes.o \
linkstate.o debug.o wol.o features.o privflags.o rings.o \
channels.o coalesce.o pause.o eee.o tsinfo.o cabletest.o \
- tunnels.o fec.o eeprom.o stats.o
+ tunnels.o fec.o eeprom.o stats.o phc_vclocks.o
diff --git a/net/ethtool/coalesce.c b/net/ethtool/coalesce.c
index 1d6bc132aa4d..46776ea42a92 100644
--- a/net/ethtool/coalesce.c
+++ b/net/ethtool/coalesce.c
@@ -10,6 +10,7 @@ struct coalesce_req_info {
struct coalesce_reply_data {
struct ethnl_reply_data base;
struct ethtool_coalesce coalesce;
+ struct kernel_ethtool_coalesce kernel_coalesce;
u32 supported_params;
};
@@ -61,6 +62,7 @@ static int coalesce_prepare_data(const struct ethnl_req_info *req_base,
struct genl_info *info)
{
struct coalesce_reply_data *data = COALESCE_REPDATA(reply_base);
+ struct netlink_ext_ack *extack = info ? info->extack : NULL;
struct net_device *dev = reply_base->dev;
int ret;
@@ -70,7 +72,8 @@ static int coalesce_prepare_data(const struct ethnl_req_info *req_base,
ret = ethnl_ops_begin(dev);
if (ret < 0)
return ret;
- ret = dev->ethtool_ops->get_coalesce(dev, &data->coalesce);
+ ret = dev->ethtool_ops->get_coalesce(dev, &data->coalesce,
+ &data->kernel_coalesce, extack);
ethnl_ops_complete(dev);
return ret;
@@ -100,7 +103,9 @@ static int coalesce_reply_size(const struct ethnl_req_info *req_base,
nla_total_size(sizeof(u32)) + /* _RX_MAX_FRAMES_HIGH */
nla_total_size(sizeof(u32)) + /* _TX_USECS_HIGH */
nla_total_size(sizeof(u32)) + /* _TX_MAX_FRAMES_HIGH */
- nla_total_size(sizeof(u32)); /* _RATE_SAMPLE_INTERVAL */
+ nla_total_size(sizeof(u32)) + /* _RATE_SAMPLE_INTERVAL */
+ nla_total_size(sizeof(u8)) + /* _USE_CQE_MODE_TX */
+ nla_total_size(sizeof(u8)); /* _USE_CQE_MODE_RX */
}
static bool coalesce_put_u32(struct sk_buff *skb, u16 attr_type, u32 val,
@@ -124,6 +129,7 @@ static int coalesce_fill_reply(struct sk_buff *skb,
const struct ethnl_reply_data *reply_base)
{
const struct coalesce_reply_data *data = COALESCE_REPDATA(reply_base);
+ const struct kernel_ethtool_coalesce *kcoal = &data->kernel_coalesce;
const struct ethtool_coalesce *coal = &data->coalesce;
u32 supported = data->supported_params;
@@ -170,7 +176,11 @@ static int coalesce_fill_reply(struct sk_buff *skb,
coalesce_put_u32(skb, ETHTOOL_A_COALESCE_TX_MAX_FRAMES_HIGH,
coal->tx_max_coalesced_frames_high, supported) ||
coalesce_put_u32(skb, ETHTOOL_A_COALESCE_RATE_SAMPLE_INTERVAL,
- coal->rate_sample_interval, supported))
+ coal->rate_sample_interval, supported) ||
+ coalesce_put_bool(skb, ETHTOOL_A_COALESCE_USE_CQE_MODE_TX,
+ kcoal->use_cqe_mode_tx, supported) ||
+ coalesce_put_bool(skb, ETHTOOL_A_COALESCE_USE_CQE_MODE_RX,
+ kcoal->use_cqe_mode_rx, supported))
return -EMSGSIZE;
return 0;
@@ -215,10 +225,13 @@ const struct nla_policy ethnl_coalesce_set_policy[] = {
[ETHTOOL_A_COALESCE_TX_USECS_HIGH] = { .type = NLA_U32 },
[ETHTOOL_A_COALESCE_TX_MAX_FRAMES_HIGH] = { .type = NLA_U32 },
[ETHTOOL_A_COALESCE_RATE_SAMPLE_INTERVAL] = { .type = NLA_U32 },
+ [ETHTOOL_A_COALESCE_USE_CQE_MODE_TX] = NLA_POLICY_MAX(NLA_U8, 1),
+ [ETHTOOL_A_COALESCE_USE_CQE_MODE_RX] = NLA_POLICY_MAX(NLA_U8, 1),
};
int ethnl_set_coalesce(struct sk_buff *skb, struct genl_info *info)
{
+ struct kernel_ethtool_coalesce kernel_coalesce = {};
struct ethtool_coalesce coalesce = {};
struct ethnl_req_info req_info = {};
struct nlattr **tb = info->attrs;
@@ -255,7 +268,8 @@ int ethnl_set_coalesce(struct sk_buff *skb, struct genl_info *info)
ret = ethnl_ops_begin(dev);
if (ret < 0)
goto out_rtnl;
- ret = ops->get_coalesce(dev, &coalesce);
+ ret = ops->get_coalesce(dev, &coalesce, &kernel_coalesce,
+ info->extack);
if (ret < 0)
goto out_ops;
@@ -303,11 +317,16 @@ int ethnl_set_coalesce(struct sk_buff *skb, struct genl_info *info)
tb[ETHTOOL_A_COALESCE_TX_MAX_FRAMES_HIGH], &mod);
ethnl_update_u32(&coalesce.rate_sample_interval,
tb[ETHTOOL_A_COALESCE_RATE_SAMPLE_INTERVAL], &mod);
+ ethnl_update_u8(&kernel_coalesce.use_cqe_mode_tx,
+ tb[ETHTOOL_A_COALESCE_USE_CQE_MODE_TX], &mod);
+ ethnl_update_u8(&kernel_coalesce.use_cqe_mode_rx,
+ tb[ETHTOOL_A_COALESCE_USE_CQE_MODE_RX], &mod);
ret = 0;
if (!mod)
goto out_ops;
- ret = dev->ethtool_ops->set_coalesce(dev, &coalesce);
+ ret = dev->ethtool_ops->set_coalesce(dev, &coalesce, &kernel_coalesce,
+ info->extack);
if (ret < 0)
goto out_ops;
ethtool_notify(dev, ETHTOOL_MSG_COALESCE_NTF, NULL);
diff --git a/net/ethtool/common.c b/net/ethtool/common.c
index f9dcbad84788..c63e0739dc6a 100644
--- a/net/ethtool/common.c
+++ b/net/ethtool/common.c
@@ -4,6 +4,7 @@
#include <linux/net_tstamp.h>
#include <linux/phy.h>
#include <linux/rtnetlink.h>
+#include <linux/ptp_clock_kernel.h>
#include "common.h"
@@ -397,6 +398,7 @@ const char sof_timestamping_names[][ETH_GSTRING_LEN] = {
[const_ilog2(SOF_TIMESTAMPING_OPT_STATS)] = "option-stats",
[const_ilog2(SOF_TIMESTAMPING_OPT_PKTINFO)] = "option-pktinfo",
[const_ilog2(SOF_TIMESTAMPING_OPT_TX_SWHW)] = "option-tx-swhw",
+ [const_ilog2(SOF_TIMESTAMPING_BIND_PHC)] = "bind-phc",
};
static_assert(ARRAY_SIZE(sof_timestamping_names) == __SOF_TIMESTAMPING_CNT);
@@ -554,6 +556,18 @@ int __ethtool_get_ts_info(struct net_device *dev, struct ethtool_ts_info *info)
return 0;
}
+int ethtool_get_phc_vclocks(struct net_device *dev, int **vclock_index)
+{
+ struct ethtool_ts_info info = { };
+ int num = 0;
+
+ if (!__ethtool_get_ts_info(dev, &info))
+ num = ptp_get_vclocks_index(info.phc_index, vclock_index);
+
+ return num;
+}
+EXPORT_SYMBOL(ethtool_get_phc_vclocks);
+
const struct ethtool_phy_ops *ethtool_phy_ops;
void ethtool_set_ethtool_phy_ops(const struct ethtool_phy_ops *ops)
diff --git a/net/ethtool/eeprom.c b/net/ethtool/eeprom.c
index 5d38e90895ac..7e6b37a54add 100644
--- a/net/ethtool/eeprom.c
+++ b/net/ethtool/eeprom.c
@@ -159,9 +159,6 @@ static int eeprom_parse_request(struct ethnl_req_info *req_info, struct nlattr *
request->offset = nla_get_u32(tb[ETHTOOL_A_MODULE_EEPROM_OFFSET]);
request->length = nla_get_u32(tb[ETHTOOL_A_MODULE_EEPROM_LENGTH]);
- if (!request->length)
- return -EINVAL;
-
/* The following set of conditions limit the API to only dump 1/2
* EEPROM page without crossing low page boundary located at offset 128.
* This means user may only request dumps of length limited to 128 from
@@ -180,10 +177,6 @@ static int eeprom_parse_request(struct ethnl_req_info *req_info, struct nlattr *
NL_SET_ERR_MSG_ATTR(extack, tb[ETHTOOL_A_MODULE_EEPROM_LENGTH],
"reading cross half page boundary is illegal");
return -EINVAL;
- } else if (request->offset >= ETH_MODULE_EEPROM_PAGE_LEN * 2) {
- NL_SET_ERR_MSG_ATTR(extack, tb[ETHTOOL_A_MODULE_EEPROM_OFFSET],
- "offset is out of bounds");
- return -EINVAL;
} else if (request->offset + request->length > ETH_MODULE_EEPROM_PAGE_LEN * 2) {
NL_SET_ERR_MSG_ATTR(extack, tb[ETHTOOL_A_MODULE_EEPROM_LENGTH],
"reading cross page boundary is illegal");
@@ -236,8 +229,10 @@ const struct ethnl_request_ops ethnl_module_eeprom_request_ops = {
const struct nla_policy ethnl_module_eeprom_get_policy[] = {
[ETHTOOL_A_MODULE_EEPROM_HEADER] = NLA_POLICY_NESTED(ethnl_header_policy),
- [ETHTOOL_A_MODULE_EEPROM_OFFSET] = { .type = NLA_U32 },
- [ETHTOOL_A_MODULE_EEPROM_LENGTH] = { .type = NLA_U32 },
+ [ETHTOOL_A_MODULE_EEPROM_OFFSET] =
+ NLA_POLICY_MAX(NLA_U32, ETH_MODULE_EEPROM_PAGE_LEN * 2 - 1),
+ [ETHTOOL_A_MODULE_EEPROM_LENGTH] =
+ NLA_POLICY_RANGE(NLA_U32, 1, ETH_MODULE_EEPROM_PAGE_LEN),
[ETHTOOL_A_MODULE_EEPROM_PAGE] = { .type = NLA_U8 },
[ETHTOOL_A_MODULE_EEPROM_BANK] = { .type = NLA_U8 },
[ETHTOOL_A_MODULE_EEPROM_I2C_ADDRESS] =
diff --git a/net/ethtool/ioctl.c b/net/ethtool/ioctl.c
index baa5d10043cb..f2abc3152888 100644
--- a/net/ethtool/ioctl.c
+++ b/net/ethtool/ioctl.c
@@ -7,6 +7,7 @@
* the information ethtool needs.
*/
+#include <linux/compat.h>
#include <linux/module.h>
#include <linux/types.h>
#include <linux/capability.h>
@@ -23,6 +24,7 @@
#include <linux/rtnetlink.h>
#include <linux/sched/signal.h>
#include <linux/net.h>
+#include <linux/pm_runtime.h>
#include <net/devlink.h>
#include <net/xdp_sock_drv.h>
#include <net/flow_offload.h>
@@ -807,6 +809,120 @@ out:
return ret;
}
+static noinline_for_stack int
+ethtool_rxnfc_copy_from_compat(struct ethtool_rxnfc *rxnfc,
+ const struct compat_ethtool_rxnfc __user *useraddr,
+ size_t size)
+{
+ struct compat_ethtool_rxnfc crxnfc = {};
+
+ /* We expect there to be holes between fs.m_ext and
+ * fs.ring_cookie and at the end of fs, but nowhere else.
+ * On non-x86, no conversion should be needed.
+ */
+ BUILD_BUG_ON(!IS_ENABLED(CONFIG_X86_64) &&
+ sizeof(struct compat_ethtool_rxnfc) !=
+ sizeof(struct ethtool_rxnfc));
+ BUILD_BUG_ON(offsetof(struct compat_ethtool_rxnfc, fs.m_ext) +
+ sizeof(useraddr->fs.m_ext) !=
+ offsetof(struct ethtool_rxnfc, fs.m_ext) +
+ sizeof(rxnfc->fs.m_ext));
+ BUILD_BUG_ON(offsetof(struct compat_ethtool_rxnfc, fs.location) -
+ offsetof(struct compat_ethtool_rxnfc, fs.ring_cookie) !=
+ offsetof(struct ethtool_rxnfc, fs.location) -
+ offsetof(struct ethtool_rxnfc, fs.ring_cookie));
+
+ if (copy_from_user(&crxnfc, useraddr, min(size, sizeof(crxnfc))))
+ return -EFAULT;
+
+ *rxnfc = (struct ethtool_rxnfc) {
+ .cmd = crxnfc.cmd,
+ .flow_type = crxnfc.flow_type,
+ .data = crxnfc.data,
+ .fs = {
+ .flow_type = crxnfc.fs.flow_type,
+ .h_u = crxnfc.fs.h_u,
+ .h_ext = crxnfc.fs.h_ext,
+ .m_u = crxnfc.fs.m_u,
+ .m_ext = crxnfc.fs.m_ext,
+ .ring_cookie = crxnfc.fs.ring_cookie,
+ .location = crxnfc.fs.location,
+ },
+ .rule_cnt = crxnfc.rule_cnt,
+ };
+
+ return 0;
+}
+
+static int ethtool_rxnfc_copy_from_user(struct ethtool_rxnfc *rxnfc,
+ const void __user *useraddr,
+ size_t size)
+{
+ if (compat_need_64bit_alignment_fixup())
+ return ethtool_rxnfc_copy_from_compat(rxnfc, useraddr, size);
+
+ if (copy_from_user(rxnfc, useraddr, size))
+ return -EFAULT;
+
+ return 0;
+}
+
+static int ethtool_rxnfc_copy_to_compat(void __user *useraddr,
+ const struct ethtool_rxnfc *rxnfc,
+ size_t size, const u32 *rule_buf)
+{
+ struct compat_ethtool_rxnfc crxnfc;
+
+ memset(&crxnfc, 0, sizeof(crxnfc));
+ crxnfc = (struct compat_ethtool_rxnfc) {
+ .cmd = rxnfc->cmd,
+ .flow_type = rxnfc->flow_type,
+ .data = rxnfc->data,
+ .fs = {
+ .flow_type = rxnfc->fs.flow_type,
+ .h_u = rxnfc->fs.h_u,
+ .h_ext = rxnfc->fs.h_ext,
+ .m_u = rxnfc->fs.m_u,
+ .m_ext = rxnfc->fs.m_ext,
+ .ring_cookie = rxnfc->fs.ring_cookie,
+ .location = rxnfc->fs.location,
+ },
+ .rule_cnt = rxnfc->rule_cnt,
+ };
+
+ if (copy_to_user(useraddr, &crxnfc, min(size, sizeof(crxnfc))))
+ return -EFAULT;
+
+ return 0;
+}
+
+static int ethtool_rxnfc_copy_to_user(void __user *useraddr,
+ const struct ethtool_rxnfc *rxnfc,
+ size_t size, const u32 *rule_buf)
+{
+ int ret;
+
+ if (compat_need_64bit_alignment_fixup()) {
+ ret = ethtool_rxnfc_copy_to_compat(useraddr, rxnfc, size,
+ rule_buf);
+ useraddr += offsetof(struct compat_ethtool_rxnfc, rule_locs);
+ } else {
+ ret = copy_to_user(useraddr, rxnfc, size);
+ useraddr += offsetof(struct ethtool_rxnfc, rule_locs);
+ }
+
+ if (ret)
+ return -EFAULT;
+
+ if (rule_buf) {
+ if (copy_to_user(useraddr, rule_buf,
+ rxnfc->rule_cnt * sizeof(u32)))
+ return -EFAULT;
+ }
+
+ return 0;
+}
+
static noinline_for_stack int ethtool_set_rxnfc(struct net_device *dev,
u32 cmd, void __user *useraddr)
{
@@ -825,7 +941,7 @@ static noinline_for_stack int ethtool_set_rxnfc(struct net_device *dev,
info_size = (offsetof(struct ethtool_rxnfc, data) +
sizeof(info.data));
- if (copy_from_user(&info, useraddr, info_size))
+ if (ethtool_rxnfc_copy_from_user(&info, useraddr, info_size))
return -EFAULT;
rc = dev->ethtool_ops->set_rxnfc(dev, &info);
@@ -833,7 +949,7 @@ static noinline_for_stack int ethtool_set_rxnfc(struct net_device *dev,
return rc;
if (cmd == ETHTOOL_SRXCLSRLINS &&
- copy_to_user(useraddr, &info, info_size))
+ ethtool_rxnfc_copy_to_user(useraddr, &info, info_size, NULL))
return -EFAULT;
return 0;
@@ -859,7 +975,7 @@ static noinline_for_stack int ethtool_get_rxnfc(struct net_device *dev,
info_size = (offsetof(struct ethtool_rxnfc, data) +
sizeof(info.data));
- if (copy_from_user(&info, useraddr, info_size))
+ if (ethtool_rxnfc_copy_from_user(&info, useraddr, info_size))
return -EFAULT;
/* If FLOW_RSS was requested then user-space must be using the
@@ -867,7 +983,7 @@ static noinline_for_stack int ethtool_get_rxnfc(struct net_device *dev,
*/
if (cmd == ETHTOOL_GRXFH && info.flow_type & FLOW_RSS) {
info_size = sizeof(info);
- if (copy_from_user(&info, useraddr, info_size))
+ if (ethtool_rxnfc_copy_from_user(&info, useraddr, info_size))
return -EFAULT;
/* Since malicious users may modify the original data,
* we need to check whether FLOW_RSS is still requested.
@@ -893,18 +1009,7 @@ static noinline_for_stack int ethtool_get_rxnfc(struct net_device *dev,
if (ret < 0)
goto err_out;
- ret = -EFAULT;
- if (copy_to_user(useraddr, &info, info_size))
- goto err_out;
-
- if (rule_buf) {
- useraddr += offsetof(struct ethtool_rxnfc, rule_locs);
- if (copy_to_user(useraddr, rule_buf,
- info.rule_cnt * sizeof(u32)))
- goto err_out;
- }
- ret = 0;
-
+ ret = ethtool_rxnfc_copy_to_user(useraddr, &info, info_size, rule_buf);
err_out:
kfree(rule_buf);
@@ -1514,12 +1619,14 @@ static noinline_for_stack int ethtool_get_coalesce(struct net_device *dev,
void __user *useraddr)
{
struct ethtool_coalesce coalesce = { .cmd = ETHTOOL_GCOALESCE };
+ struct kernel_ethtool_coalesce kernel_coalesce = {};
int ret;
if (!dev->ethtool_ops->get_coalesce)
return -EOPNOTSUPP;
- ret = dev->ethtool_ops->get_coalesce(dev, &coalesce);
+ ret = dev->ethtool_ops->get_coalesce(dev, &coalesce, &kernel_coalesce,
+ NULL);
if (ret)
return ret;
@@ -1586,19 +1693,26 @@ ethtool_set_coalesce_supported(struct net_device *dev,
static noinline_for_stack int ethtool_set_coalesce(struct net_device *dev,
void __user *useraddr)
{
+ struct kernel_ethtool_coalesce kernel_coalesce = {};
struct ethtool_coalesce coalesce;
int ret;
- if (!dev->ethtool_ops->set_coalesce)
+ if (!dev->ethtool_ops->set_coalesce && !dev->ethtool_ops->get_coalesce)
return -EOPNOTSUPP;
+ ret = dev->ethtool_ops->get_coalesce(dev, &coalesce, &kernel_coalesce,
+ NULL);
+ if (ret)
+ return ret;
+
if (copy_from_user(&coalesce, useraddr, sizeof(coalesce)))
return -EFAULT;
if (!ethtool_set_coalesce_supported(dev, &coalesce))
return -EOPNOTSUPP;
- ret = dev->ethtool_ops->set_coalesce(dev, &coalesce);
+ ret = dev->ethtool_ops->set_coalesce(dev, &coalesce, &kernel_coalesce,
+ NULL);
if (!ret)
ethtool_notify(dev, ETHTOOL_MSG_COALESCE_NTF, NULL);
return ret;
@@ -2581,15 +2695,14 @@ static int ethtool_set_fecparam(struct net_device *dev, void __user *useraddr)
/* The main entry point in this file. Called from net/core/dev_ioctl.c */
-int dev_ethtool(struct net *net, struct ifreq *ifr)
+int dev_ethtool(struct net *net, struct ifreq *ifr, void __user *useraddr)
{
struct net_device *dev = __dev_get_by_name(net, ifr->ifr_name);
- void __user *useraddr = ifr->ifr_data;
u32 ethcmd, sub_cmd;
int rc;
netdev_features_t old_features;
- if (!dev || !netif_device_present(dev))
+ if (!dev)
return -ENODEV;
if (copy_from_user(&ethcmd, useraddr, sizeof(ethcmd)))
@@ -2645,10 +2758,18 @@ int dev_ethtool(struct net *net, struct ifreq *ifr)
return -EPERM;
}
+ if (dev->dev.parent)
+ pm_runtime_get_sync(dev->dev.parent);
+
+ if (!netif_device_present(dev)) {
+ rc = -ENODEV;
+ goto out;
+ }
+
if (dev->ethtool_ops->begin) {
rc = dev->ethtool_ops->begin(dev);
- if (rc < 0)
- return rc;
+ if (rc < 0)
+ goto out;
}
old_features = dev->features;
@@ -2867,6 +2988,9 @@ int dev_ethtool(struct net *net, struct ifreq *ifr)
if (old_features != dev->features)
netdev_features_change(dev);
+out:
+ if (dev->dev.parent)
+ pm_runtime_put(dev->dev.parent);
return rc;
}
diff --git a/net/ethtool/netlink.c b/net/ethtool/netlink.c
index 88d8a0243f35..1797a0a90019 100644
--- a/net/ethtool/netlink.c
+++ b/net/ethtool/netlink.c
@@ -2,6 +2,7 @@
#include <net/sock.h>
#include <linux/ethtool_netlink.h>
+#include <linux/pm_runtime.h>
#include "netlink.h"
static struct genl_family ethtool_genl_family;
@@ -29,6 +30,44 @@ const struct nla_policy ethnl_header_policy_stats[] = {
ETHTOOL_FLAGS_STATS),
};
+int ethnl_ops_begin(struct net_device *dev)
+{
+ int ret;
+
+ if (!dev)
+ return -ENODEV;
+
+ if (dev->dev.parent)
+ pm_runtime_get_sync(dev->dev.parent);
+
+ if (!netif_device_present(dev)) {
+ ret = -ENODEV;
+ goto err;
+ }
+
+ if (dev->ethtool_ops->begin) {
+ ret = dev->ethtool_ops->begin(dev);
+ if (ret)
+ goto err;
+ }
+
+ return 0;
+err:
+ if (dev->dev.parent)
+ pm_runtime_put(dev->dev.parent);
+
+ return ret;
+}
+
+void ethnl_ops_complete(struct net_device *dev)
+{
+ if (dev->ethtool_ops->complete)
+ dev->ethtool_ops->complete(dev);
+
+ if (dev->dev.parent)
+ pm_runtime_put(dev->dev.parent);
+}
+
/**
* ethnl_parse_header_dev_get() - parse request header
* @req_info: structure to put results into
@@ -101,12 +140,6 @@ int ethnl_parse_header_dev_get(struct ethnl_req_info *req_info,
return -EINVAL;
}
- if (dev && !netif_device_present(dev)) {
- dev_put(dev);
- NL_SET_ERR_MSG(extack, "device not present");
- return -ENODEV;
- }
-
req_info->dev = dev;
req_info->flags = flags;
return 0;
@@ -248,6 +281,7 @@ ethnl_default_requests[__ETHTOOL_MSG_USER_CNT] = {
[ETHTOOL_MSG_TSINFO_GET] = &ethnl_tsinfo_request_ops,
[ETHTOOL_MSG_MODULE_EEPROM_GET] = &ethnl_module_eeprom_request_ops,
[ETHTOOL_MSG_STATS_GET] = &ethnl_stats_request_ops,
+ [ETHTOOL_MSG_PHC_VCLOCKS_GET] = &ethnl_phc_vclocks_request_ops,
};
static struct ethnl_dump_ctx *ethnl_dump_context(struct netlink_callback *cb)
@@ -315,9 +349,9 @@ static int ethnl_default_doit(struct sk_buff *skb, struct genl_info *info)
struct ethnl_req_info *req_info = NULL;
const u8 cmd = info->genlhdr->cmd;
const struct ethnl_request_ops *ops;
+ int hdr_len, reply_len;
struct sk_buff *rskb;
void *reply_payload;
- int reply_len;
int ret;
ops = ethnl_default_requests[cmd];
@@ -346,21 +380,25 @@ static int ethnl_default_doit(struct sk_buff *skb, struct genl_info *info)
ret = ops->reply_size(req_info, reply_data);
if (ret < 0)
goto err_cleanup;
- reply_len = ret + ethnl_reply_header_size();
+ reply_len = ret;
ret = -ENOMEM;
- rskb = ethnl_reply_init(reply_len, req_info->dev, ops->reply_cmd,
+ rskb = ethnl_reply_init(reply_len + ethnl_reply_header_size(),
+ req_info->dev, ops->reply_cmd,
ops->hdr_attr, info, &reply_payload);
if (!rskb)
goto err_cleanup;
+ hdr_len = rskb->len;
ret = ops->fill_reply(rskb, req_info, reply_data);
if (ret < 0)
goto err_msg;
+ WARN_ONCE(rskb->len - hdr_len > reply_len,
+ "ethnl cmd %d: calculated reply length %d, but consumed %d\n",
+ cmd, reply_len, rskb->len - hdr_len);
if (ops->cleanup_data)
ops->cleanup_data(reply_data);
genlmsg_end(rskb, reply_payload);
- if (req_info->dev)
- dev_put(req_info->dev);
+ dev_put(req_info->dev);
kfree(reply_data);
kfree(req_info);
return genlmsg_reply(rskb, info);
@@ -372,8 +410,7 @@ err_cleanup:
if (ops->cleanup_data)
ops->cleanup_data(reply_data);
err_dev:
- if (req_info->dev)
- dev_put(req_info->dev);
+ dev_put(req_info->dev);
kfree(reply_data);
kfree(req_info);
return ret;
@@ -953,6 +990,15 @@ static const struct genl_ops ethtool_genl_ops[] = {
.policy = ethnl_stats_get_policy,
.maxattr = ARRAY_SIZE(ethnl_stats_get_policy) - 1,
},
+ {
+ .cmd = ETHTOOL_MSG_PHC_VCLOCKS_GET,
+ .doit = ethnl_default_doit,
+ .start = ethnl_default_start,
+ .dumpit = ethnl_default_dumpit,
+ .done = ethnl_default_done,
+ .policy = ethnl_phc_vclocks_get_policy,
+ .maxattr = ARRAY_SIZE(ethnl_phc_vclocks_get_policy) - 1,
+ },
};
static const struct genl_multicast_group ethtool_nl_mcgrps[] = {
diff --git a/net/ethtool/netlink.h b/net/ethtool/netlink.h
index 8abcbc10796c..e8987e28036f 100644
--- a/net/ethtool/netlink.h
+++ b/net/ethtool/netlink.h
@@ -138,7 +138,7 @@ static inline void ethnl_update_bool32(u32 *dst, const struct nlattr *attr,
}
/**
- * ethnl_update_binary() - update binary data from NLA_BINARY atribute
+ * ethnl_update_binary() - update binary data from NLA_BINARY attribute
* @dst: value to update
* @len: destination buffer length
* @attr: netlink attribute with new value or null
@@ -247,19 +247,8 @@ struct ethnl_reply_data {
struct net_device *dev;
};
-static inline int ethnl_ops_begin(struct net_device *dev)
-{
- if (dev && dev->ethtool_ops->begin)
- return dev->ethtool_ops->begin(dev);
- else
- return 0;
-}
-
-static inline void ethnl_ops_complete(struct net_device *dev)
-{
- if (dev && dev->ethtool_ops->complete)
- dev->ethtool_ops->complete(dev);
-}
+int ethnl_ops_begin(struct net_device *dev);
+void ethnl_ops_complete(struct net_device *dev);
/**
* struct ethnl_request_ops - unified handling of GET requests
@@ -347,6 +336,7 @@ extern const struct ethnl_request_ops ethnl_tsinfo_request_ops;
extern const struct ethnl_request_ops ethnl_fec_request_ops;
extern const struct ethnl_request_ops ethnl_module_eeprom_request_ops;
extern const struct ethnl_request_ops ethnl_stats_request_ops;
+extern const struct ethnl_request_ops ethnl_phc_vclocks_request_ops;
extern const struct nla_policy ethnl_header_policy[ETHTOOL_A_HEADER_FLAGS + 1];
extern const struct nla_policy ethnl_header_policy_stats[ETHTOOL_A_HEADER_FLAGS + 1];
@@ -369,7 +359,7 @@ extern const struct nla_policy ethnl_rings_set_policy[ETHTOOL_A_RINGS_TX + 1];
extern const struct nla_policy ethnl_channels_get_policy[ETHTOOL_A_CHANNELS_HEADER + 1];
extern const struct nla_policy ethnl_channels_set_policy[ETHTOOL_A_CHANNELS_COMBINED_COUNT + 1];
extern const struct nla_policy ethnl_coalesce_get_policy[ETHTOOL_A_COALESCE_HEADER + 1];
-extern const struct nla_policy ethnl_coalesce_set_policy[ETHTOOL_A_COALESCE_RATE_SAMPLE_INTERVAL + 1];
+extern const struct nla_policy ethnl_coalesce_set_policy[ETHTOOL_A_COALESCE_MAX + 1];
extern const struct nla_policy ethnl_pause_get_policy[ETHTOOL_A_PAUSE_HEADER + 1];
extern const struct nla_policy ethnl_pause_set_policy[ETHTOOL_A_PAUSE_TX + 1];
extern const struct nla_policy ethnl_eee_get_policy[ETHTOOL_A_EEE_HEADER + 1];
@@ -380,8 +370,9 @@ extern const struct nla_policy ethnl_cable_test_tdr_act_policy[ETHTOOL_A_CABLE_T
extern const struct nla_policy ethnl_tunnel_info_get_policy[ETHTOOL_A_TUNNEL_INFO_HEADER + 1];
extern const struct nla_policy ethnl_fec_get_policy[ETHTOOL_A_FEC_HEADER + 1];
extern const struct nla_policy ethnl_fec_set_policy[ETHTOOL_A_FEC_AUTO + 1];
-extern const struct nla_policy ethnl_module_eeprom_get_policy[ETHTOOL_A_MODULE_EEPROM_DATA + 1];
+extern const struct nla_policy ethnl_module_eeprom_get_policy[ETHTOOL_A_MODULE_EEPROM_I2C_ADDRESS + 1];
extern const struct nla_policy ethnl_stats_get_policy[ETHTOOL_A_STATS_GROUPS + 1];
+extern const struct nla_policy ethnl_phc_vclocks_get_policy[ETHTOOL_A_PHC_VCLOCKS_HEADER + 1];
int ethnl_set_linkinfo(struct sk_buff *skb, struct genl_info *info);
int ethnl_set_linkmodes(struct sk_buff *skb, struct genl_info *info);
diff --git a/net/ethtool/phc_vclocks.c b/net/ethtool/phc_vclocks.c
new file mode 100644
index 000000000000..637b2f5297d5
--- /dev/null
+++ b/net/ethtool/phc_vclocks.c
@@ -0,0 +1,94 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright 2021 NXP
+ */
+#include "netlink.h"
+#include "common.h"
+
+struct phc_vclocks_req_info {
+ struct ethnl_req_info base;
+};
+
+struct phc_vclocks_reply_data {
+ struct ethnl_reply_data base;
+ int num;
+ int *index;
+};
+
+#define PHC_VCLOCKS_REPDATA(__reply_base) \
+ container_of(__reply_base, struct phc_vclocks_reply_data, base)
+
+const struct nla_policy ethnl_phc_vclocks_get_policy[] = {
+ [ETHTOOL_A_PHC_VCLOCKS_HEADER] = NLA_POLICY_NESTED(ethnl_header_policy),
+};
+
+static int phc_vclocks_prepare_data(const struct ethnl_req_info *req_base,
+ struct ethnl_reply_data *reply_base,
+ struct genl_info *info)
+{
+ struct phc_vclocks_reply_data *data = PHC_VCLOCKS_REPDATA(reply_base);
+ struct net_device *dev = reply_base->dev;
+ int ret;
+
+ ret = ethnl_ops_begin(dev);
+ if (ret < 0)
+ return ret;
+ data->num = ethtool_get_phc_vclocks(dev, &data->index);
+ ethnl_ops_complete(dev);
+
+ return ret;
+}
+
+static int phc_vclocks_reply_size(const struct ethnl_req_info *req_base,
+ const struct ethnl_reply_data *reply_base)
+{
+ const struct phc_vclocks_reply_data *data =
+ PHC_VCLOCKS_REPDATA(reply_base);
+ int len = 0;
+
+ if (data->num > 0) {
+ len += nla_total_size(sizeof(u32));
+ len += nla_total_size(sizeof(s32) * data->num);
+ }
+
+ return len;
+}
+
+static int phc_vclocks_fill_reply(struct sk_buff *skb,
+ const struct ethnl_req_info *req_base,
+ const struct ethnl_reply_data *reply_base)
+{
+ const struct phc_vclocks_reply_data *data =
+ PHC_VCLOCKS_REPDATA(reply_base);
+
+ if (data->num <= 0)
+ return 0;
+
+ if (nla_put_u32(skb, ETHTOOL_A_PHC_VCLOCKS_NUM, data->num) ||
+ nla_put(skb, ETHTOOL_A_PHC_VCLOCKS_INDEX,
+ sizeof(s32) * data->num, data->index))
+ return -EMSGSIZE;
+
+ return 0;
+}
+
+static void phc_vclocks_cleanup_data(struct ethnl_reply_data *reply_base)
+{
+ const struct phc_vclocks_reply_data *data =
+ PHC_VCLOCKS_REPDATA(reply_base);
+
+ kfree(data->index);
+}
+
+const struct ethnl_request_ops ethnl_phc_vclocks_request_ops = {
+ .request_cmd = ETHTOOL_MSG_PHC_VCLOCKS_GET,
+ .reply_cmd = ETHTOOL_MSG_PHC_VCLOCKS_GET_REPLY,
+ .hdr_attr = ETHTOOL_A_PHC_VCLOCKS_HEADER,
+ .req_info_size = sizeof(struct phc_vclocks_req_info),
+ .reply_data_size = sizeof(struct phc_vclocks_reply_data),
+
+ .prepare_data = phc_vclocks_prepare_data,
+ .reply_size = phc_vclocks_reply_size,
+ .fill_reply = phc_vclocks_fill_reply,
+ .cleanup_data = phc_vclocks_cleanup_data,
+};
diff --git a/net/hsr/hsr_framereg.c b/net/hsr/hsr_framereg.c
index bb1351c38397..e31949479305 100644
--- a/net/hsr/hsr_framereg.c
+++ b/net/hsr/hsr_framereg.c
@@ -397,7 +397,8 @@ void hsr_register_frame_in(struct hsr_node *node, struct hsr_port *port,
* ensures entries of restarted nodes gets pruned so that they can
* re-register and resume communications.
*/
- if (seq_nr_before(sequence_nr, node->seq_out[port->type]))
+ if (!(port->dev->features & NETIF_F_HW_HSR_TAG_RM) &&
+ seq_nr_before(sequence_nr, node->seq_out[port->type]))
return;
node->time_in[port->type] = jiffies;
diff --git a/net/ieee802154/nl-phy.c b/net/ieee802154/nl-phy.c
index 88215b5c93aa..dd5a45f8a78a 100644
--- a/net/ieee802154/nl-phy.c
+++ b/net/ieee802154/nl-phy.c
@@ -340,8 +340,7 @@ nla_put_failure:
out_dev:
wpan_phy_put(phy);
out:
- if (dev)
- dev_put(dev);
+ dev_put(dev);
return rc;
}
diff --git a/net/ieee802154/nl802154.c b/net/ieee802154/nl802154.c
index 0cf2374c143b..277124f206e0 100644
--- a/net/ieee802154/nl802154.c
+++ b/net/ieee802154/nl802154.c
@@ -2226,8 +2226,7 @@ static void nl802154_post_doit(const struct genl_ops *ops, struct sk_buff *skb,
if (ops->internal_flags & NL802154_FLAG_NEED_WPAN_DEV) {
struct wpan_dev *wpan_dev = info->user_ptr[1];
- if (wpan_dev->netdev)
- dev_put(wpan_dev->netdev);
+ dev_put(wpan_dev->netdev);
} else {
dev_put(info->user_ptr[1]);
}
diff --git a/net/ieee802154/socket.c b/net/ieee802154/socket.c
index a45a0401adc5..7bb9ef35c570 100644
--- a/net/ieee802154/socket.c
+++ b/net/ieee802154/socket.c
@@ -41,8 +41,7 @@ ieee802154_get_dev(struct net *net, const struct ieee802154_addr *addr)
ieee802154_devaddr_to_raw(hwaddr, addr->extended_addr);
rcu_read_lock();
dev = dev_getbyhwaddr_rcu(net, ARPHRD_IEEE802154, hwaddr);
- if (dev)
- dev_hold(dev);
+ dev_hold(dev);
rcu_read_unlock();
break;
case IEEE802154_ADDR_SHORT:
@@ -129,7 +128,7 @@ static int ieee802154_dev_ioctl(struct sock *sk, struct ifreq __user *arg,
int ret = -ENOIOCTLCMD;
struct net_device *dev;
- if (copy_from_user(&ifr, arg, sizeof(struct ifreq)))
+ if (get_user_ifreq(&ifr, NULL, arg))
return -EFAULT;
ifr.ifr_name[IFNAMSIZ-1] = 0;
@@ -143,7 +142,7 @@ static int ieee802154_dev_ioctl(struct sock *sk, struct ifreq __user *arg,
if (dev->type == ARPHRD_IEEE802154 && dev->netdev_ops->ndo_do_ioctl)
ret = dev->netdev_ops->ndo_do_ioctl(dev, &ifr, cmd);
- if (!ret && copy_to_user(arg, &ifr, sizeof(struct ifreq)))
+ if (!ret && put_user_ifreq(&ifr, arg))
ret = -EFAULT;
dev_put(dev);
@@ -984,6 +983,11 @@ static const struct proto_ops ieee802154_dgram_ops = {
.sendpage = sock_no_sendpage,
};
+static void ieee802154_sock_destruct(struct sock *sk)
+{
+ skb_queue_purge(&sk->sk_receive_queue);
+}
+
/* Create a socket. Initialise the socket, blank the addresses
* set the state.
*/
@@ -1024,7 +1028,7 @@ static int ieee802154_create(struct net *net, struct socket *sock,
sock->ops = ops;
sock_init_data(sock, sk);
- /* FIXME: sk->sk_destruct */
+ sk->sk_destruct = ieee802154_sock_destruct;
sk->sk_family = PF_IEEE802154;
/* Checksums on by default */
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index 2f94d221c00e..1d816a5fd3eb 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -318,7 +318,7 @@ lookup_protocol:
WARN_ON(!answer_prot->slab);
- err = -ENOBUFS;
+ err = -ENOMEM;
sk = sk_alloc(net, PF_INET, GFP_KERNEL, answer_prot, kern);
if (!sk)
goto out;
@@ -452,7 +452,7 @@ int inet_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
* changes context in a wrong way it will be caught.
*/
err = BPF_CGROUP_RUN_PROG_INET_BIND_LOCK(sk, uaddr,
- BPF_CGROUP_INET4_BIND, &flags);
+ CGROUP_INET4_BIND, &flags);
if (err)
return err;
@@ -781,7 +781,7 @@ int inet_getname(struct socket *sock, struct sockaddr *uaddr,
sin->sin_port = inet->inet_dport;
sin->sin_addr.s_addr = inet->inet_daddr;
BPF_CGROUP_RUN_SA_PROG_LOCK(sk, (struct sockaddr *)sin,
- BPF_CGROUP_INET4_GETPEERNAME,
+ CGROUP_INET4_GETPEERNAME,
NULL);
} else {
__be32 addr = inet->inet_rcv_saddr;
@@ -790,7 +790,7 @@ int inet_getname(struct socket *sock, struct sockaddr *uaddr,
sin->sin_port = inet->inet_sport;
sin->sin_addr.s_addr = addr;
BPF_CGROUP_RUN_SA_PROG_LOCK(sk, (struct sockaddr *)sin,
- BPF_CGROUP_INET4_GETSOCKNAME,
+ CGROUP_INET4_GETSOCKNAME,
NULL);
}
memset(sin->sin_zero, 0, sizeof(sin->sin_zero));
@@ -953,10 +953,10 @@ int inet_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
case SIOCGIFNETMASK:
case SIOCGIFDSTADDR:
case SIOCGIFPFLAGS:
- if (copy_from_user(&ifr, p, sizeof(struct ifreq)))
+ if (get_user_ifreq(&ifr, NULL, p))
return -EFAULT;
err = devinet_ioctl(net, cmd, &ifr);
- if (!err && copy_to_user(p, &ifr, sizeof(struct ifreq)))
+ if (!err && put_user_ifreq(&ifr, p))
err = -EFAULT;
break;
@@ -966,7 +966,7 @@ int inet_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
case SIOCSIFDSTADDR:
case SIOCSIFPFLAGS:
case SIOCSIFFLAGS:
- if (copy_from_user(&ifr, p, sizeof(struct ifreq)))
+ if (get_user_ifreq(&ifr, NULL, p))
return -EFAULT;
err = devinet_ioctl(net, cmd, &ifr);
break;
@@ -1720,7 +1720,6 @@ EXPORT_SYMBOL_GPL(snmp_fold_field64);
#ifdef CONFIG_IP_MULTICAST
static const struct net_protocol igmp_protocol = {
.handler = igmp_rcv,
- .netns_ok = 1,
};
#endif
@@ -1733,7 +1732,6 @@ static struct net_protocol tcp_protocol = {
.handler = tcp_v4_rcv,
.err_handler = tcp_v4_err,
.no_policy = 1,
- .netns_ok = 1,
.icmp_strict_tag_validation = 1,
};
@@ -1746,14 +1744,12 @@ static struct net_protocol udp_protocol = {
.handler = udp_rcv,
.err_handler = udp_err,
.no_policy = 1,
- .netns_ok = 1,
};
static const struct net_protocol icmp_protocol = {
.handler = icmp_rcv,
.err_handler = icmp_err,
.no_policy = 1,
- .netns_ok = 1,
};
static __net_init int ipv4_mib_init_net(struct net *net)
diff --git a/net/ipv4/ah4.c b/net/ipv4/ah4.c
index 36ed85bf2ad5..6eea1e9e998d 100644
--- a/net/ipv4/ah4.c
+++ b/net/ipv4/ah4.c
@@ -450,6 +450,7 @@ static int ah4_err(struct sk_buff *skb, u32 info)
case ICMP_DEST_UNREACH:
if (icmp_hdr(skb)->code != ICMP_FRAG_NEEDED)
return 0;
+ break;
case ICMP_REDIRECT:
break;
default:
@@ -554,7 +555,6 @@ static int ah4_rcv_cb(struct sk_buff *skb, int err)
static const struct xfrm_type ah_type =
{
- .description = "AH4",
.owner = THIS_MODULE,
.proto = IPPROTO_AH,
.flags = XFRM_TYPE_REPLAY_PROT,
diff --git a/net/ipv4/bpf_tcp_ca.c b/net/ipv4/bpf_tcp_ca.c
index 9e41eff4a685..0dcee9df1326 100644
--- a/net/ipv4/bpf_tcp_ca.c
+++ b/net/ipv4/bpf_tcp_ca.c
@@ -10,6 +10,9 @@
#include <net/tcp.h>
#include <net/bpf_sk_storage.h>
+/* "extern" is to avoid sparse warning. It is only used in bpf_struct_ops.c. */
+extern struct bpf_struct_ops bpf_tcp_congestion_ops;
+
static u32 optional_ops[] = {
offsetof(struct tcp_congestion_ops, init),
offsetof(struct tcp_congestion_ops, release),
@@ -163,6 +166,19 @@ static const struct bpf_func_proto bpf_tcp_send_ack_proto = {
.arg2_type = ARG_ANYTHING,
};
+static u32 prog_ops_moff(const struct bpf_prog *prog)
+{
+ const struct btf_member *m;
+ const struct btf_type *t;
+ u32 midx;
+
+ midx = prog->expected_attach_type;
+ t = bpf_tcp_congestion_ops.type;
+ m = &btf_type_member(t)[midx];
+
+ return btf_member_bit_offset(t, m) / 8;
+}
+
static const struct bpf_func_proto *
bpf_tcp_ca_get_func_proto(enum bpf_func_id func_id,
const struct bpf_prog *prog)
@@ -174,6 +190,28 @@ bpf_tcp_ca_get_func_proto(enum bpf_func_id func_id,
return &bpf_sk_storage_get_proto;
case BPF_FUNC_sk_storage_delete:
return &bpf_sk_storage_delete_proto;
+ case BPF_FUNC_setsockopt:
+ /* Does not allow release() to call setsockopt.
+ * release() is called when the current bpf-tcp-cc
+ * is retiring. It is not allowed to call
+ * setsockopt() to make further changes which
+ * may potentially allocate new resources.
+ */
+ if (prog_ops_moff(prog) !=
+ offsetof(struct tcp_congestion_ops, release))
+ return &bpf_sk_setsockopt_proto;
+ return NULL;
+ case BPF_FUNC_getsockopt:
+ /* Since get/setsockopt is usually expected to
+ * be available together, disable getsockopt for
+ * release also to avoid usage surprise.
+ * The bpf-tcp-cc already has a more powerful way
+ * to read tcp_sock from the PTR_TO_BTF_ID.
+ */
+ if (prog_ops_moff(prog) !=
+ offsetof(struct tcp_congestion_ops, release))
+ return &bpf_sk_getsockopt_proto;
+ return NULL;
default:
return bpf_base_func_proto(func_id);
}
@@ -286,9 +324,6 @@ static void bpf_tcp_ca_unreg(void *kdata)
tcp_unregister_congestion_control(kdata);
}
-/* Avoid sparse warning. It is only used in bpf_struct_ops.c. */
-extern struct bpf_struct_ops bpf_tcp_congestion_ops;
-
struct bpf_struct_ops bpf_tcp_congestion_ops = {
.verifier_ops = &bpf_tcp_ca_verifier_ops,
.reg = bpf_tcp_ca_reg,
diff --git a/net/ipv4/cipso_ipv4.c b/net/ipv4/cipso_ipv4.c
index e0480c6cebaa..099259fc826a 100644
--- a/net/ipv4/cipso_ipv4.c
+++ b/net/ipv4/cipso_ipv4.c
@@ -187,8 +187,7 @@ static int __init cipso_v4_cache_init(void)
* cipso_v4_cache_invalidate - Invalidates the current CIPSO cache
*
* Description:
- * Invalidates and frees any entries in the CIPSO cache. Returns zero on
- * success and negative values on failure.
+ * Invalidates and frees any entries in the CIPSO cache.
*
*/
void cipso_v4_cache_invalidate(void)
diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c
index 1c6429c353a9..f4468980b675 100644
--- a/net/ipv4/devinet.c
+++ b/net/ipv4/devinet.c
@@ -215,7 +215,7 @@ static void devinet_sysctl_unregister(struct in_device *idev)
static struct in_ifaddr *inet_alloc_ifa(void)
{
- return kzalloc(sizeof(struct in_ifaddr), GFP_KERNEL);
+ return kzalloc(sizeof(struct in_ifaddr), GFP_KERNEL_ACCOUNT);
}
static void inet_rcu_free_ifa(struct rcu_head *head)
@@ -1243,7 +1243,7 @@ out:
return ret;
}
-static int inet_gifconf(struct net_device *dev, char __user *buf, int len, int size)
+int inet_gifconf(struct net_device *dev, char __user *buf, int len, int size)
{
struct in_device *in_dev = __in_dev_get_rtnl(dev);
const struct in_ifaddr *ifa;
@@ -1950,16 +1950,17 @@ static const struct nla_policy inet_af_policy[IFLA_INET_MAX+1] = {
};
static int inet_validate_link_af(const struct net_device *dev,
- const struct nlattr *nla)
+ const struct nlattr *nla,
+ struct netlink_ext_ack *extack)
{
struct nlattr *a, *tb[IFLA_INET_MAX+1];
int err, rem;
- if (dev && !__in_dev_get_rcu(dev))
+ if (dev && !__in_dev_get_rtnl(dev))
return -EAFNOSUPPORT;
err = nla_parse_nested_deprecated(tb, IFLA_INET_MAX, nla,
- inet_af_policy, NULL);
+ inet_af_policy, extack);
if (err < 0)
return err;
@@ -1981,7 +1982,7 @@ static int inet_validate_link_af(const struct net_device *dev,
static int inet_set_link_af(struct net_device *dev, const struct nlattr *nla,
struct netlink_ext_ack *extack)
{
- struct in_device *in_dev = __in_dev_get_rcu(dev);
+ struct in_device *in_dev = __in_dev_get_rtnl(dev);
struct nlattr *a, *tb[IFLA_INET_MAX+1];
int rem;
@@ -2424,11 +2425,15 @@ static int devinet_sysctl_forward(struct ctl_table *ctl, int write,
int *valp = ctl->data;
int val = *valp;
loff_t pos = *ppos;
- int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
+ struct net *net = ctl->extra2;
+ int ret;
- if (write && *valp != val) {
- struct net *net = ctl->extra2;
+ if (write && !ns_capable(net->user_ns, CAP_NET_ADMIN))
+ return -EPERM;
+
+ ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
+ if (write && *valp != val) {
if (valp != &IPV4_DEVCONF_DFLT(net, FORWARDING)) {
if (!rtnl_trylock()) {
/* Restore the original values before restarting */
@@ -2762,8 +2767,6 @@ void __init devinet_init(void)
INIT_HLIST_HEAD(&inet_addr_lst[i]);
register_pernet_subsys(&devinet_ops);
-
- register_gifconf(PF_INET, inet_gifconf);
register_netdevice_notifier(&ip_netdev_notifier);
queue_delayed_work(system_power_efficient_wq, &check_lifetime_work, 0);
diff --git a/net/ipv4/esp4.c b/net/ipv4/esp4.c
index 35803ab7ac80..851f542928a3 100644
--- a/net/ipv4/esp4.c
+++ b/net/ipv4/esp4.c
@@ -97,7 +97,6 @@ static inline struct scatterlist *esp_req_sg(struct crypto_aead *aead,
static void esp_ssg_unref(struct xfrm_state *x, void *tmp)
{
- struct esp_output_extra *extra = esp_tmp_extra(tmp);
struct crypto_aead *aead = x->data;
int extralen = 0;
u8 *iv;
@@ -105,9 +104,8 @@ static void esp_ssg_unref(struct xfrm_state *x, void *tmp)
struct scatterlist *sg;
if (x->props.flags & XFRM_STATE_ESN)
- extralen += sizeof(*extra);
+ extralen += sizeof(struct esp_output_extra);
- extra = esp_tmp_extra(tmp);
iv = esp_tmp_iv(aead, tmp, extralen);
req = esp_tmp_req(aead, iv);
@@ -673,7 +671,7 @@ static int esp_output(struct xfrm_state *x, struct sk_buff *skb)
struct xfrm_dst *dst = (struct xfrm_dst *)skb_dst(skb);
u32 padto;
- padto = min(x->tfcpad, xfrm_state_mtu(x, dst->child_mtu_cached));
+ padto = min(x->tfcpad, __xfrm_state_mtu(x, dst->child_mtu_cached));
if (skb->len < padto)
esp.tfclen = padto - skb->len;
}
@@ -982,6 +980,7 @@ static int esp4_err(struct sk_buff *skb, u32 info)
case ICMP_DEST_UNREACH:
if (icmp_hdr(skb)->code != ICMP_FRAG_NEEDED)
return 0;
+ break;
case ICMP_REDIRECT:
break;
default:
@@ -1198,7 +1197,6 @@ static int esp4_rcv_cb(struct sk_buff *skb, int err)
static const struct xfrm_type esp_type =
{
- .description = "ESP4",
.owner = THIS_MODULE,
.proto = IPPROTO_ESP,
.flags = XFRM_TYPE_REPLAY_PROT,
diff --git a/net/ipv4/esp4_offload.c b/net/ipv4/esp4_offload.c
index 33687cf58286..8e4e9aa12130 100644
--- a/net/ipv4/esp4_offload.c
+++ b/net/ipv4/esp4_offload.c
@@ -33,12 +33,11 @@ static struct sk_buff *esp4_gro_receive(struct list_head *head,
struct xfrm_state *x;
__be32 seq;
__be32 spi;
- int err;
if (!pskb_pull(skb, offset))
return NULL;
- if ((err = xfrm_parse_spi(skb, IPPROTO_ESP, &spi, &seq)) != 0)
+ if (xfrm_parse_spi(skb, IPPROTO_ESP, &spi, &seq) != 0)
goto out;
xo = xfrm_offload(skb);
@@ -343,7 +342,6 @@ static const struct net_offload esp4_offload = {
};
static const struct xfrm_type_offload esp_type_offload = {
- .description = "ESP4 OFFLOAD",
.owner = THIS_MODULE,
.proto = IPPROTO_ESP,
.input_tail = esp_input_tail,
diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c
index 84bb707bd88d..9fe13e4f5d08 100644
--- a/net/ipv4/fib_frontend.c
+++ b/net/ipv4/fib_frontend.c
@@ -371,6 +371,8 @@ static int __fib_validate_source(struct sk_buff *skb, __be32 src, __be32 dst,
fl4.flowi4_proto = 0;
fl4.fl4_sport = 0;
fl4.fl4_dport = 0;
+ } else {
+ swap(fl4.fl4_sport, fl4.fl4_dport);
}
if (fib_lookup(net, &fl4, &res, 0))
@@ -1122,10 +1124,8 @@ void fib_add_ifaddr(struct in_ifaddr *ifa)
prefix, ifa->ifa_prefixlen, prim,
ifa->ifa_rt_priority);
- /* Add network specific broadcasts, when it takes a sense */
+ /* Add the network broadcast address, when it makes sense */
if (ifa->ifa_prefixlen < 31) {
- fib_magic(RTM_NEWROUTE, RTN_BROADCAST, prefix, 32,
- prim, 0);
fib_magic(RTM_NEWROUTE, RTN_BROADCAST, prefix | ~mask,
32, prim, 0);
}
@@ -1376,7 +1376,7 @@ static void nl_fib_input(struct sk_buff *skb)
portid = NETLINK_CB(skb).portid; /* netlink portid */
NETLINK_CB(skb).portid = 0; /* from kernel */
NETLINK_CB(skb).dst_group = 0; /* unicast */
- netlink_unicast(net->ipv4.fibnl, skb, portid, MSG_DONTWAIT);
+ nlmsg_unicast(net->ipv4.fibnl, skb, portid);
}
static int __net_init nl_fib_lookup_init(struct net *net)
@@ -1516,6 +1516,12 @@ static int __net_init ip_fib_net_init(struct net *net)
if (err)
return err;
+#ifdef CONFIG_IP_ROUTE_MULTIPATH
+ /* Default to 3-tuple */
+ net->ipv4.sysctl_fib_multipath_hash_fields =
+ FIB_MULTIPATH_HASH_FIELD_DEFAULT_MASK;
+#endif
+
/* Avoid false sharing : Use at least a full cache line */
size = max_t(size_t, size, L1_CACHE_BYTES);
diff --git a/net/ipv4/fib_lookup.h b/net/ipv4/fib_lookup.h
index b58db1ca4bfb..e184bcb19943 100644
--- a/net/ipv4/fib_lookup.h
+++ b/net/ipv4/fib_lookup.h
@@ -25,7 +25,7 @@ struct fib_alias {
#define FA_S_ACCESSED 0x01
-/* Dont write on fa_state unless needed, to keep it shared on all cpus */
+/* Don't write on fa_state unless needed, to keep it shared on all cpus */
static inline void fib_alias_accessed(struct fib_alias *fa)
{
if (!(fa->fa_state & FA_S_ACCESSED))
diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c
index a632b66bc13a..b42c429cebbe 100644
--- a/net/ipv4/fib_semantics.c
+++ b/net/ipv4/fib_semantics.c
@@ -208,9 +208,7 @@ static void rt_fibinfo_free_cpus(struct rtable __rcu * __percpu *rtp)
void fib_nh_common_release(struct fib_nh_common *nhc)
{
- if (nhc->nhc_dev)
- dev_put(nhc->nhc_dev);
-
+ dev_put(nhc->nhc_dev);
lwtstate_put(nhc->nhc_lwtstate);
rt_fibinfo_free_cpus(nhc->nhc_pcpu_rth_output);
rt_fibinfo_free(&nhc->nhc_rth_input);
@@ -260,7 +258,7 @@ EXPORT_SYMBOL_GPL(free_fib_info);
void fib_release_info(struct fib_info *fi)
{
spin_lock_bh(&fib_info_lock);
- if (fi && --fi->fib_treeref == 0) {
+ if (fi && refcount_dec_and_test(&fi->fib_treeref)) {
hlist_del(&fi->fib_hash);
if (fi->fib_prefsrc)
hlist_del(&fi->fib_lhash);
@@ -1373,7 +1371,7 @@ struct fib_info *fib_create_info(struct fib_config *cfg,
if (!cfg->fc_mx) {
fi = fib_find_info_nh(net, cfg);
if (fi) {
- fi->fib_treeref++;
+ refcount_inc(&fi->fib_treeref);
return fi;
}
}
@@ -1547,11 +1545,11 @@ link_it:
if (ofi) {
fi->fib_dead = 1;
free_fib_info(fi);
- ofi->fib_treeref++;
+ refcount_inc(&ofi->fib_treeref);
return ofi;
}
- fi->fib_treeref++;
+ refcount_set(&fi->fib_treeref, 1);
refcount_set(&fi->fib_clntref, 1);
spin_lock_bh(&fib_info_lock);
hlist_add_head(&fi->fib_hash,
@@ -1874,6 +1872,7 @@ static int call_fib_nh_notifiers(struct fib_nh *nh,
(nh->fib_nh_flags & RTNH_F_DEAD))
return call_fib4_notifiers(dev_net(nh->fib_nh_dev),
event_type, &info.info);
+ break;
default:
break;
}
diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c
index 25cf387cca5b..8060524f4256 100644
--- a/net/ipv4/fib_trie.c
+++ b/net/ipv4/fib_trie.c
@@ -2380,11 +2380,11 @@ void __init fib_trie_init(void)
{
fn_alias_kmem = kmem_cache_create("ip_fib_alias",
sizeof(struct fib_alias),
- 0, SLAB_PANIC, NULL);
+ 0, SLAB_PANIC | SLAB_ACCOUNT, NULL);
trie_leaf_kmem = kmem_cache_create("ip_fib_trie",
LEAF_SIZE,
- 0, SLAB_PANIC, NULL);
+ 0, SLAB_PANIC | SLAB_ACCOUNT, NULL);
}
struct fib_table *fib_trie_table(u32 id, struct fib_table *alias)
diff --git a/net/ipv4/fou.c b/net/ipv4/fou.c
index e5f69b0bf3df..8fcbc6258ec5 100644
--- a/net/ipv4/fou.c
+++ b/net/ipv4/fou.c
@@ -230,8 +230,8 @@ static struct sk_buff *fou_gro_receive(struct sock *sk,
struct list_head *head,
struct sk_buff *skb)
{
+ const struct net_offload __rcu **offloads;
u8 proto = fou_from_sock(sk)->protocol;
- const struct net_offload **offloads;
const struct net_offload *ops;
struct sk_buff *pp = NULL;
@@ -263,10 +263,10 @@ out_unlock:
static int fou_gro_complete(struct sock *sk, struct sk_buff *skb,
int nhoff)
{
- const struct net_offload *ops;
+ const struct net_offload __rcu **offloads;
u8 proto = fou_from_sock(sk)->protocol;
+ const struct net_offload *ops;
int err = -ENOSYS;
- const struct net_offload **offloads;
rcu_read_lock();
offloads = NAPI_GRO_CB(skb)->is_ipv6 ? inet6_offloads : inet_offloads;
@@ -311,7 +311,7 @@ static struct sk_buff *gue_gro_receive(struct sock *sk,
struct list_head *head,
struct sk_buff *skb)
{
- const struct net_offload **offloads;
+ const struct net_offload __rcu **offloads;
const struct net_offload *ops;
struct sk_buff *pp = NULL;
struct sk_buff *p;
@@ -457,8 +457,8 @@ out:
static int gue_gro_complete(struct sock *sk, struct sk_buff *skb, int nhoff)
{
- const struct net_offload **offloads;
struct guehdr *guehdr = (struct guehdr *)(skb->data + nhoff);
+ const struct net_offload __rcu **offloads;
const struct net_offload *ops;
unsigned int guehlen = 0;
u8 proto;
diff --git a/net/ipv4/gre_demux.c b/net/ipv4/gre_demux.c
index 5d1e6fe9d838..cbb2b4bb0dfa 100644
--- a/net/ipv4/gre_demux.c
+++ b/net/ipv4/gre_demux.c
@@ -195,7 +195,6 @@ static int gre_err(struct sk_buff *skb, u32 info)
static const struct net_protocol net_gre_protocol = {
.handler = gre_rcv,
.err_handler = gre_err,
- .netns_ok = 1,
};
static int __init gre_init(void)
diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c
index 752e392083e6..8b30cadff708 100644
--- a/net/ipv4/icmp.c
+++ b/net/ipv4/icmp.c
@@ -993,14 +993,8 @@ static bool icmp_redirect(struct sk_buff *skb)
static bool icmp_echo(struct sk_buff *skb)
{
- struct icmp_ext_hdr *ext_hdr, _ext_hdr;
- struct icmp_ext_echo_iio *iio, _iio;
struct icmp_bxm icmp_param;
- struct net_device *dev;
- char buff[IFNAMSIZ];
struct net *net;
- u16 ident_len;
- u8 status;
net = dev_net(skb_dst(skb)->dev);
/* should there be an ICMP stat for ignored echos? */
@@ -1013,20 +1007,46 @@ static bool icmp_echo(struct sk_buff *skb)
icmp_param.data_len = skb->len;
icmp_param.head_len = sizeof(struct icmphdr);
- if (icmp_param.data.icmph.type == ICMP_ECHO) {
+ if (icmp_param.data.icmph.type == ICMP_ECHO)
icmp_param.data.icmph.type = ICMP_ECHOREPLY;
- goto send_reply;
- }
- if (!net->ipv4.sysctl_icmp_echo_enable_probe)
+ else if (!icmp_build_probe(skb, &icmp_param.data.icmph))
return true;
+
+ icmp_reply(&icmp_param, skb);
+ return true;
+}
+
+/* Helper for icmp_echo and icmpv6_echo_reply.
+ * Searches for net_device that matches PROBE interface identifier
+ * and builds PROBE reply message in icmphdr.
+ *
+ * Returns false if PROBE responses are disabled via sysctl
+ */
+
+bool icmp_build_probe(struct sk_buff *skb, struct icmphdr *icmphdr)
+{
+ struct icmp_ext_hdr *ext_hdr, _ext_hdr;
+ struct icmp_ext_echo_iio *iio, _iio;
+ struct net *net = dev_net(skb->dev);
+ struct net_device *dev;
+ char buff[IFNAMSIZ];
+ u16 ident_len;
+ u8 status;
+
+ if (!net->ipv4.sysctl_icmp_echo_enable_probe)
+ return false;
+
/* We currently only support probing interfaces on the proxy node
* Check to ensure L-bit is set
*/
- if (!(ntohs(icmp_param.data.icmph.un.echo.sequence) & 1))
- return true;
+ if (!(ntohs(icmphdr->un.echo.sequence) & 1))
+ return false;
/* Clear status bits in reply message */
- icmp_param.data.icmph.un.echo.sequence &= htons(0xFF00);
- icmp_param.data.icmph.type = ICMP_EXT_ECHOREPLY;
+ icmphdr->un.echo.sequence &= htons(0xFF00);
+ if (icmphdr->type == ICMP_EXT_ECHO)
+ icmphdr->type = ICMP_EXT_ECHOREPLY;
+ else
+ icmphdr->type = ICMPV6_EXT_ECHO_REPLY;
ext_hdr = skb_header_pointer(skb, 0, sizeof(_ext_hdr), &_ext_hdr);
/* Size of iio is class_type dependent.
* Only check header here and assign length based on ctype in the switch statement
@@ -1066,7 +1086,7 @@ static bool icmp_echo(struct sk_buff *skb)
if (ident_len != sizeof(iio->ident.addr.ctype3_hdr) +
sizeof(struct in_addr))
goto send_mal_query;
- dev = ip_dev_find(net, iio->ident.addr.ip_addr.ipv4_addr.s_addr);
+ dev = ip_dev_find(net, iio->ident.addr.ip_addr.ipv4_addr);
break;
#if IS_ENABLED(CONFIG_IPV6)
case ICMP_AFI_IP6:
@@ -1075,8 +1095,7 @@ static bool icmp_echo(struct sk_buff *skb)
sizeof(struct in6_addr))
goto send_mal_query;
dev = ipv6_stub->ipv6_dev_find(net, &iio->ident.addr.ip_addr.ipv6_addr, dev);
- if (dev)
- dev_hold(dev);
+ dev_hold(dev);
break;
#endif
default:
@@ -1087,8 +1106,8 @@ static bool icmp_echo(struct sk_buff *skb)
goto send_mal_query;
}
if (!dev) {
- icmp_param.data.icmph.code = ICMP_EXT_CODE_NO_IF;
- goto send_reply;
+ icmphdr->code = ICMP_EXT_CODE_NO_IF;
+ return true;
}
/* Fill bits in reply message */
if (dev->flags & IFF_UP)
@@ -1098,14 +1117,13 @@ static bool icmp_echo(struct sk_buff *skb)
if (!list_empty(&rcu_dereference(dev->ip6_ptr)->addr_list))
status |= ICMP_EXT_ECHOREPLY_IPV6;
dev_put(dev);
- icmp_param.data.icmph.un.echo.sequence |= htons(status);
-send_reply:
- icmp_reply(&icmp_param, skb);
- return true;
+ icmphdr->un.echo.sequence |= htons(status);
+ return true;
send_mal_query:
- icmp_param.data.icmph.code = ICMP_EXT_CODE_MAL_QUERY;
- goto send_reply;
+ icmphdr->code = ICMP_EXT_CODE_MAL_QUERY;
+ return true;
}
+EXPORT_SYMBOL_GPL(icmp_build_probe);
/*
* Handle ICMP Timestamp requests.
diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c
index 6b3c558a4f23..d2e2b3d18c66 100644
--- a/net/ipv4/igmp.c
+++ b/net/ipv4/igmp.c
@@ -803,10 +803,17 @@ static void igmp_gq_timer_expire(struct timer_list *t)
static void igmp_ifc_timer_expire(struct timer_list *t)
{
struct in_device *in_dev = from_timer(in_dev, t, mr_ifc_timer);
+ u32 mr_ifc_count;
igmpv3_send_cr(in_dev);
- if (in_dev->mr_ifc_count) {
- in_dev->mr_ifc_count--;
+restart:
+ mr_ifc_count = READ_ONCE(in_dev->mr_ifc_count);
+
+ if (mr_ifc_count) {
+ if (cmpxchg(&in_dev->mr_ifc_count,
+ mr_ifc_count,
+ mr_ifc_count - 1) != mr_ifc_count)
+ goto restart;
igmp_ifc_start_timer(in_dev,
unsolicited_report_interval(in_dev));
}
@@ -818,7 +825,7 @@ static void igmp_ifc_event(struct in_device *in_dev)
struct net *net = dev_net(in_dev->dev);
if (IGMP_V1_SEEN(in_dev) || IGMP_V2_SEEN(in_dev))
return;
- in_dev->mr_ifc_count = in_dev->mr_qrv ?: net->ipv4.sysctl_igmp_qrv;
+ WRITE_ONCE(in_dev->mr_ifc_count, in_dev->mr_qrv ?: net->ipv4.sysctl_igmp_qrv);
igmp_ifc_start_timer(in_dev, 1);
}
@@ -957,7 +964,7 @@ static bool igmp_heard_query(struct in_device *in_dev, struct sk_buff *skb,
in_dev->mr_qri;
}
/* cancel the interface change timer */
- in_dev->mr_ifc_count = 0;
+ WRITE_ONCE(in_dev->mr_ifc_count, 0);
if (del_timer(&in_dev->mr_ifc_timer))
__in_dev_put(in_dev);
/* clear deleted report items */
@@ -1724,7 +1731,7 @@ void ip_mc_down(struct in_device *in_dev)
igmp_group_dropped(pmc);
#ifdef CONFIG_IP_MULTICAST
- in_dev->mr_ifc_count = 0;
+ WRITE_ONCE(in_dev->mr_ifc_count, 0);
if (del_timer(&in_dev->mr_ifc_timer))
__in_dev_put(in_dev);
in_dev->mr_gq_running = 0;
@@ -1941,7 +1948,7 @@ static int ip_mc_del_src(struct in_device *in_dev, __be32 *pmca, int sfmode,
pmc->sfmode = MCAST_INCLUDE;
#ifdef CONFIG_IP_MULTICAST
pmc->crcount = in_dev->mr_qrv ?: net->ipv4.sysctl_igmp_qrv;
- in_dev->mr_ifc_count = pmc->crcount;
+ WRITE_ONCE(in_dev->mr_ifc_count, pmc->crcount);
for (psf = pmc->sources; psf; psf = psf->sf_next)
psf->sf_crcount = 0;
igmp_ifc_event(pmc->interface);
@@ -2120,7 +2127,7 @@ static int ip_mc_add_src(struct in_device *in_dev, __be32 *pmca, int sfmode,
/* else no filters; keep old mode for reports */
pmc->crcount = in_dev->mr_qrv ?: net->ipv4.sysctl_igmp_qrv;
- in_dev->mr_ifc_count = pmc->crcount;
+ WRITE_ONCE(in_dev->mr_ifc_count, pmc->crcount);
for (psf = pmc->sources; psf; psf = psf->sf_next)
psf->sf_crcount = 0;
igmp_ifc_event(in_dev);
@@ -2233,7 +2240,7 @@ static int ip_mc_leave_src(struct sock *sk, struct ip_mc_socklist *iml,
iml->sfmode, psf->sl_count, psf->sl_addr, 0);
RCU_INIT_POINTER(iml->sflist, NULL);
/* decrease mem now to avoid the memleak warning */
- atomic_sub(IP_SFLSIZE(psf->sl_max), &sk->sk_omem_alloc);
+ atomic_sub(struct_size(psf, sl_addr, psf->sl_max), &sk->sk_omem_alloc);
kfree_rcu(psf, rcu);
return err;
}
@@ -2382,7 +2389,8 @@ int ip_mc_source(int add, int omode, struct sock *sk, struct
if (psl)
count += psl->sl_max;
- newpsl = sock_kmalloc(sk, IP_SFLSIZE(count), GFP_KERNEL);
+ newpsl = sock_kmalloc(sk, struct_size(newpsl, sl_addr, count),
+ GFP_KERNEL);
if (!newpsl) {
err = -ENOBUFS;
goto done;
@@ -2393,7 +2401,8 @@ int ip_mc_source(int add, int omode, struct sock *sk, struct
for (i = 0; i < psl->sl_count; i++)
newpsl->sl_addr[i] = psl->sl_addr[i];
/* decrease mem now to avoid the memleak warning */
- atomic_sub(IP_SFLSIZE(psl->sl_max), &sk->sk_omem_alloc);
+ atomic_sub(struct_size(psl, sl_addr, psl->sl_max),
+ &sk->sk_omem_alloc);
kfree_rcu(psl, rcu);
}
rcu_assign_pointer(pmc->sflist, newpsl);
@@ -2468,19 +2477,22 @@ int ip_mc_msfilter(struct sock *sk, struct ip_msfilter *msf, int ifindex)
goto done;
}
if (msf->imsf_numsrc) {
- newpsl = sock_kmalloc(sk, IP_SFLSIZE(msf->imsf_numsrc),
- GFP_KERNEL);
+ newpsl = sock_kmalloc(sk, struct_size(newpsl, sl_addr,
+ msf->imsf_numsrc),
+ GFP_KERNEL);
if (!newpsl) {
err = -ENOBUFS;
goto done;
}
newpsl->sl_max = newpsl->sl_count = msf->imsf_numsrc;
- memcpy(newpsl->sl_addr, msf->imsf_slist,
- msf->imsf_numsrc * sizeof(msf->imsf_slist[0]));
+ memcpy(newpsl->sl_addr, msf->imsf_slist_flex,
+ flex_array_size(msf, imsf_slist_flex, msf->imsf_numsrc));
err = ip_mc_add_src(in_dev, &msf->imsf_multiaddr,
msf->imsf_fmode, newpsl->sl_count, newpsl->sl_addr, 0);
if (err) {
- sock_kfree_s(sk, newpsl, IP_SFLSIZE(newpsl->sl_max));
+ sock_kfree_s(sk, newpsl,
+ struct_size(newpsl, sl_addr,
+ newpsl->sl_max));
goto done;
}
} else {
@@ -2493,7 +2505,8 @@ int ip_mc_msfilter(struct sock *sk, struct ip_msfilter *msf, int ifindex)
(void) ip_mc_del_src(in_dev, &msf->imsf_multiaddr, pmc->sfmode,
psl->sl_count, psl->sl_addr, 0);
/* decrease mem now to avoid the memleak warning */
- atomic_sub(IP_SFLSIZE(psl->sl_max), &sk->sk_omem_alloc);
+ atomic_sub(struct_size(psl, sl_addr, psl->sl_max),
+ &sk->sk_omem_alloc);
kfree_rcu(psl, rcu);
} else
(void) ip_mc_del_src(in_dev, &msf->imsf_multiaddr, pmc->sfmode,
@@ -2551,14 +2564,14 @@ int ip_mc_msfget(struct sock *sk, struct ip_msfilter *msf,
count = psl->sl_count;
}
copycount = count < msf->imsf_numsrc ? count : msf->imsf_numsrc;
- len = copycount * sizeof(psl->sl_addr[0]);
+ len = flex_array_size(psl, sl_addr, copycount);
msf->imsf_numsrc = count;
if (put_user(IP_MSFILTER_SIZE(copycount), optlen) ||
copy_to_user(optval, msf, IP_MSFILTER_SIZE(0))) {
return -EFAULT;
}
if (len &&
- copy_to_user(&optval->imsf_slist[0], psl->sl_addr, len))
+ copy_to_user(&optval->imsf_slist_flex[0], psl->sl_addr, len))
return -EFAULT;
return 0;
done:
@@ -2713,6 +2726,7 @@ int ip_check_mc_rcu(struct in_device *in_dev, __be32 mc_addr, __be32 src_addr, u
rv = 1;
} else if (im) {
if (src_addr) {
+ spin_lock_bh(&im->lock);
for (psf = im->sources; psf; psf = psf->sf_next) {
if (psf->sf_inaddr == src_addr)
break;
@@ -2723,6 +2737,7 @@ int ip_check_mc_rcu(struct in_device *in_dev, __be32 mc_addr, __be32 src_addr, u
im->sfcount[MCAST_EXCLUDE];
else
rv = im->sfcount[MCAST_EXCLUDE] != 0;
+ spin_unlock_bh(&im->lock);
} else
rv = 1; /* unspecified source; tentatively allow */
}
diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c
index fd472eae4f5c..f25d02ad4a8a 100644
--- a/net/ipv4/inet_connection_sock.c
+++ b/net/ipv4/inet_connection_sock.c
@@ -135,10 +135,18 @@ static int inet_csk_bind_conflict(const struct sock *sk,
bool relax, bool reuseport_ok)
{
struct sock *sk2;
+ bool reuseport_cb_ok;
bool reuse = sk->sk_reuse;
bool reuseport = !!sk->sk_reuseport;
+ struct sock_reuseport *reuseport_cb;
kuid_t uid = sock_i_uid((struct sock *)sk);
+ rcu_read_lock();
+ reuseport_cb = rcu_dereference(sk->sk_reuseport_cb);
+ /* paired with WRITE_ONCE() in __reuseport_(add|detach)_closed_sock */
+ reuseport_cb_ok = !reuseport_cb || READ_ONCE(reuseport_cb->num_closed_socks);
+ rcu_read_unlock();
+
/*
* Unlike other sk lookup places we do not check
* for sk_net here, since _all_ the socks listed
@@ -156,14 +164,14 @@ static int inet_csk_bind_conflict(const struct sock *sk,
if ((!relax ||
(!reuseport_ok &&
reuseport && sk2->sk_reuseport &&
- !rcu_access_pointer(sk->sk_reuseport_cb) &&
+ reuseport_cb_ok &&
(sk2->sk_state == TCP_TIME_WAIT ||
uid_eq(uid, sock_i_uid(sk2))))) &&
inet_rcv_saddr_equal(sk, sk2, true))
break;
} else if (!reuseport_ok ||
!reuseport || !sk2->sk_reuseport ||
- rcu_access_pointer(sk->sk_reuseport_cb) ||
+ !reuseport_cb_ok ||
(sk2->sk_state != TCP_TIME_WAIT &&
!uid_eq(uid, sock_i_uid(sk2)))) {
if (inet_rcv_saddr_equal(sk, sk2, true))
@@ -526,7 +534,8 @@ out:
atomic_read(&newsk->sk_rmem_alloc));
mem_cgroup_sk_alloc(newsk);
if (newsk->sk_memcg && amt)
- mem_cgroup_charge_skmem(newsk->sk_memcg, amt);
+ mem_cgroup_charge_skmem(newsk->sk_memcg, amt,
+ GFP_KERNEL | __GFP_NOFAIL);
release_sock(newsk);
}
@@ -687,6 +696,66 @@ int inet_rtx_syn_ack(const struct sock *parent, struct request_sock *req)
}
EXPORT_SYMBOL(inet_rtx_syn_ack);
+static struct request_sock *inet_reqsk_clone(struct request_sock *req,
+ struct sock *sk)
+{
+ struct sock *req_sk, *nreq_sk;
+ struct request_sock *nreq;
+
+ nreq = kmem_cache_alloc(req->rsk_ops->slab, GFP_ATOMIC | __GFP_NOWARN);
+ if (!nreq) {
+ __NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMIGRATEREQFAILURE);
+
+ /* paired with refcount_inc_not_zero() in reuseport_migrate_sock() */
+ sock_put(sk);
+ return NULL;
+ }
+
+ req_sk = req_to_sk(req);
+ nreq_sk = req_to_sk(nreq);
+
+ memcpy(nreq_sk, req_sk,
+ offsetof(struct sock, sk_dontcopy_begin));
+ memcpy(&nreq_sk->sk_dontcopy_end, &req_sk->sk_dontcopy_end,
+ req->rsk_ops->obj_size - offsetof(struct sock, sk_dontcopy_end));
+
+ sk_node_init(&nreq_sk->sk_node);
+ nreq_sk->sk_tx_queue_mapping = req_sk->sk_tx_queue_mapping;
+#ifdef CONFIG_XPS
+ nreq_sk->sk_rx_queue_mapping = req_sk->sk_rx_queue_mapping;
+#endif
+ nreq_sk->sk_incoming_cpu = req_sk->sk_incoming_cpu;
+
+ nreq->rsk_listener = sk;
+
+ /* We need not acquire fastopenq->lock
+ * because the child socket is locked in inet_csk_listen_stop().
+ */
+ if (sk->sk_protocol == IPPROTO_TCP && tcp_rsk(nreq)->tfo_listener)
+ rcu_assign_pointer(tcp_sk(nreq->sk)->fastopen_rsk, nreq);
+
+ return nreq;
+}
+
+static void reqsk_queue_migrated(struct request_sock_queue *queue,
+ const struct request_sock *req)
+{
+ if (req->num_timeout == 0)
+ atomic_inc(&queue->young);
+ atomic_inc(&queue->qlen);
+}
+
+static void reqsk_migrate_reset(struct request_sock *req)
+{
+ req->saved_syn = NULL;
+#if IS_ENABLED(CONFIG_IPV6)
+ inet_rsk(req)->ipv6_opt = NULL;
+ inet_rsk(req)->pktopts = NULL;
+#else
+ inet_rsk(req)->ireq_opt = NULL;
+#endif
+}
+
/* return true if req was found in the ehash table */
static bool reqsk_queue_unlink(struct request_sock *req)
{
@@ -727,15 +796,39 @@ EXPORT_SYMBOL(inet_csk_reqsk_queue_drop_and_put);
static void reqsk_timer_handler(struct timer_list *t)
{
struct request_sock *req = from_timer(req, t, rsk_timer);
+ struct request_sock *nreq = NULL, *oreq = req;
struct sock *sk_listener = req->rsk_listener;
- struct net *net = sock_net(sk_listener);
- struct inet_connection_sock *icsk = inet_csk(sk_listener);
- struct request_sock_queue *queue = &icsk->icsk_accept_queue;
+ struct inet_connection_sock *icsk;
+ struct request_sock_queue *queue;
+ struct net *net;
int max_syn_ack_retries, qlen, expire = 0, resend = 0;
- if (inet_sk_state_load(sk_listener) != TCP_LISTEN)
- goto drop;
+ if (inet_sk_state_load(sk_listener) != TCP_LISTEN) {
+ struct sock *nsk;
+
+ nsk = reuseport_migrate_sock(sk_listener, req_to_sk(req), NULL);
+ if (!nsk)
+ goto drop;
+ nreq = inet_reqsk_clone(req, nsk);
+ if (!nreq)
+ goto drop;
+
+ /* The new timer for the cloned req can decrease the 2
+ * by calling inet_csk_reqsk_queue_drop_and_put(), so
+ * hold another count to prevent use-after-free and
+ * call reqsk_put() just before return.
+ */
+ refcount_set(&nreq->rsk_refcnt, 2 + 1);
+ timer_setup(&nreq->rsk_timer, reqsk_timer_handler, TIMER_PINNED);
+ reqsk_queue_migrated(&inet_csk(nsk)->icsk_accept_queue, req);
+
+ req = nreq;
+ sk_listener = nsk;
+ }
+
+ icsk = inet_csk(sk_listener);
+ net = sock_net(sk_listener);
max_syn_ack_retries = icsk->icsk_syn_retries ? : net->ipv4.sysctl_tcp_synack_retries;
/* Normally all the openreqs are young and become mature
* (i.e. converted to established socket) for first timeout.
@@ -754,6 +847,7 @@ static void reqsk_timer_handler(struct timer_list *t)
* embrions; and abort old ones without pity, if old
* ones are about to clog our table.
*/
+ queue = &icsk->icsk_accept_queue;
qlen = reqsk_queue_len(queue);
if ((qlen << 1) > max(8U, READ_ONCE(sk_listener->sk_max_ack_backlog))) {
int young = reqsk_queue_len_young(queue) << 1;
@@ -778,10 +872,39 @@ static void reqsk_timer_handler(struct timer_list *t)
atomic_dec(&queue->young);
timeo = min(TCP_TIMEOUT_INIT << req->num_timeout, TCP_RTO_MAX);
mod_timer(&req->rsk_timer, jiffies + timeo);
+
+ if (!nreq)
+ return;
+
+ if (!inet_ehash_insert(req_to_sk(nreq), req_to_sk(oreq), NULL)) {
+ /* delete timer */
+ inet_csk_reqsk_queue_drop(sk_listener, nreq);
+ goto no_ownership;
+ }
+
+ __NET_INC_STATS(net, LINUX_MIB_TCPMIGRATEREQSUCCESS);
+ reqsk_migrate_reset(oreq);
+ reqsk_queue_removed(&inet_csk(oreq->rsk_listener)->icsk_accept_queue, oreq);
+ reqsk_put(oreq);
+
+ reqsk_put(nreq);
return;
}
+
+ /* Even if we can clone the req, we may need not retransmit any more
+ * SYN+ACKs (nreq->num_timeout > max_syn_ack_retries, etc), or another
+ * CPU may win the "own_req" race so that inet_ehash_insert() fails.
+ */
+ if (nreq) {
+ __NET_INC_STATS(net, LINUX_MIB_TCPMIGRATEREQFAILURE);
+no_ownership:
+ reqsk_migrate_reset(nreq);
+ reqsk_queue_removed(queue, nreq);
+ __reqsk_free(nreq);
+ }
+
drop:
- inet_csk_reqsk_queue_drop_and_put(sk_listener, req);
+ inet_csk_reqsk_queue_drop_and_put(oreq->rsk_listener, oreq);
}
static void reqsk_queue_hash_req(struct request_sock *req,
@@ -997,12 +1120,42 @@ struct sock *inet_csk_complete_hashdance(struct sock *sk, struct sock *child,
struct request_sock *req, bool own_req)
{
if (own_req) {
- inet_csk_reqsk_queue_drop(sk, req);
- reqsk_queue_removed(&inet_csk(sk)->icsk_accept_queue, req);
- if (inet_csk_reqsk_queue_add(sk, req, child))
+ inet_csk_reqsk_queue_drop(req->rsk_listener, req);
+ reqsk_queue_removed(&inet_csk(req->rsk_listener)->icsk_accept_queue, req);
+
+ if (sk != req->rsk_listener) {
+ /* another listening sk has been selected,
+ * migrate the req to it.
+ */
+ struct request_sock *nreq;
+
+ /* hold a refcnt for the nreq->rsk_listener
+ * which is assigned in inet_reqsk_clone()
+ */
+ sock_hold(sk);
+ nreq = inet_reqsk_clone(req, sk);
+ if (!nreq) {
+ inet_child_forget(sk, req, child);
+ goto child_put;
+ }
+
+ refcount_set(&nreq->rsk_refcnt, 1);
+ if (inet_csk_reqsk_queue_add(sk, nreq, child)) {
+ __NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMIGRATEREQSUCCESS);
+ reqsk_migrate_reset(req);
+ reqsk_put(req);
+ return child;
+ }
+
+ __NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMIGRATEREQFAILURE);
+ reqsk_migrate_reset(nreq);
+ __reqsk_free(nreq);
+ } else if (inet_csk_reqsk_queue_add(sk, req, child)) {
return child;
+ }
}
/* Too bad, another child took ownership of the request, undo. */
+child_put:
bh_unlock_sock(child);
sock_put(child);
return NULL;
@@ -1028,14 +1181,40 @@ void inet_csk_listen_stop(struct sock *sk)
* of the variants now. --ANK
*/
while ((req = reqsk_queue_remove(queue, sk)) != NULL) {
- struct sock *child = req->sk;
+ struct sock *child = req->sk, *nsk;
+ struct request_sock *nreq;
local_bh_disable();
bh_lock_sock(child);
WARN_ON(sock_owned_by_user(child));
sock_hold(child);
+ nsk = reuseport_migrate_sock(sk, child, NULL);
+ if (nsk) {
+ nreq = inet_reqsk_clone(req, nsk);
+ if (nreq) {
+ refcount_set(&nreq->rsk_refcnt, 1);
+
+ if (inet_csk_reqsk_queue_add(nsk, nreq, child)) {
+ __NET_INC_STATS(sock_net(nsk),
+ LINUX_MIB_TCPMIGRATEREQSUCCESS);
+ reqsk_migrate_reset(req);
+ } else {
+ __NET_INC_STATS(sock_net(nsk),
+ LINUX_MIB_TCPMIGRATEREQFAILURE);
+ reqsk_migrate_reset(nreq);
+ __reqsk_free(nreq);
+ }
+
+ /* inet_csk_reqsk_queue_add() has already
+ * called inet_child_forget() on failure case.
+ */
+ goto skip_child_forget;
+ }
+ }
+
inet_child_forget(sk, req, child);
+skip_child_forget:
reqsk_put(req);
bh_unlock_sock(child);
local_bh_enable();
diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c
index 93474b1bea4e..ef7897226f08 100644
--- a/net/ipv4/inet_diag.c
+++ b/net/ipv4/inet_diag.c
@@ -416,7 +416,7 @@ EXPORT_SYMBOL_GPL(inet_sk_diag_fill);
static int inet_twsk_diag_fill(struct sock *sk,
struct sk_buff *skb,
struct netlink_callback *cb,
- u16 nlmsg_flags)
+ u16 nlmsg_flags, bool net_admin)
{
struct inet_timewait_sock *tw = inet_twsk(sk);
struct inet_diag_msg *r;
@@ -444,6 +444,12 @@ static int inet_twsk_diag_fill(struct sock *sk,
r->idiag_uid = 0;
r->idiag_inode = 0;
+ if (net_admin && nla_put_u32(skb, INET_DIAG_MARK,
+ tw->tw_mark)) {
+ nlmsg_cancel(skb, nlh);
+ return -EMSGSIZE;
+ }
+
nlmsg_end(skb, nlh);
return 0;
}
@@ -494,7 +500,7 @@ static int sk_diag_fill(struct sock *sk, struct sk_buff *skb,
u16 nlmsg_flags, bool net_admin)
{
if (sk->sk_state == TCP_TIME_WAIT)
- return inet_twsk_diag_fill(sk, skb, cb, nlmsg_flags);
+ return inet_twsk_diag_fill(sk, skb, cb, nlmsg_flags, net_admin);
if (sk->sk_state == TCP_NEW_SYN_RECV)
return inet_req_diag_fill(sk, skb, cb, nlmsg_flags, net_admin);
@@ -574,10 +580,7 @@ int inet_diag_dump_one_icsk(struct inet_hashinfo *hashinfo,
nlmsg_free(rep);
goto out;
}
- err = netlink_unicast(net->diag_nlsk, rep, NETLINK_CB(in_skb).portid,
- MSG_DONTWAIT);
- if (err > 0)
- err = 0;
+ err = nlmsg_unicast(net->diag_nlsk, rep, NETLINK_CB(in_skb).portid);
out:
if (sk)
@@ -801,6 +804,8 @@ int inet_diag_bc_sk(const struct nlattr *bc, struct sock *sk)
entry.mark = sk->sk_mark;
else if (sk->sk_state == TCP_NEW_SYN_RECV)
entry.mark = inet_rsk(inet_reqsk(sk))->ir_mark;
+ else if (sk->sk_state == TCP_TIME_WAIT)
+ entry.mark = inet_twsk(sk)->tw_mark;
else
entry.mark = 0;
#ifdef CONFIG_SOCK_CGROUP_DATA
diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c
index c96866a53a66..80aeaf9e6e16 100644
--- a/net/ipv4/inet_hashtables.c
+++ b/net/ipv4/inet_hashtables.c
@@ -697,7 +697,7 @@ void inet_unhash(struct sock *sk)
goto unlock;
if (rcu_access_pointer(sk->sk_reuseport_cb))
- reuseport_detach_sock(sk);
+ reuseport_stop_listen_sock(sk);
if (ilb) {
inet_unhash2(hashinfo, sk);
ilb->count--;
diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c
index a68bf4c6fe9b..0fe6c936dc54 100644
--- a/net/ipv4/ip_gre.c
+++ b/net/ipv4/ip_gre.c
@@ -107,6 +107,8 @@ module_param(log_ecn_error, bool, 0644);
MODULE_PARM_DESC(log_ecn_error, "Log packets received with corrupted ECN");
static struct rtnl_link_ops ipgre_link_ops __read_mostly;
+static const struct header_ops ipgre_header_ops;
+
static int ipgre_tunnel_init(struct net_device *dev);
static void erspan_build_header(struct sk_buff *skb,
u32 id, u32 index,
@@ -364,7 +366,10 @@ static int __ipgre_rcv(struct sk_buff *skb, const struct tnl_ptk_info *tpi,
raw_proto, false) < 0)
goto drop;
- if (tunnel->dev->type != ARPHRD_NONE)
+ /* Special case for ipgre_header_parse(), which expects the
+ * mac_header to point to the outer IP header.
+ */
+ if (tunnel->dev->header_ops == &ipgre_header_ops)
skb_pop_mac_header(skb);
else
skb_reset_mac_header(skb);
@@ -625,15 +630,20 @@ static netdev_tx_t ipgre_xmit(struct sk_buff *skb,
}
if (dev->header_ops) {
+ const int pull_len = tunnel->hlen + sizeof(struct iphdr);
+
if (skb_cow_head(skb, 0))
goto free_skb;
tnl_params = (const struct iphdr *)skb->data;
+ if (pull_len > skb_transport_offset(skb))
+ goto free_skb;
+
/* Pull skb since ip_tunnel_xmit() needs skb->data pointing
* to gre header.
*/
- skb_pull(skb, tunnel->hlen + sizeof(struct iphdr));
+ skb_pull(skb, pull_len);
skb_reset_mac_header(skb);
} else {
if (skb_cow_head(skb, dev->needed_headroom))
@@ -918,7 +928,7 @@ static const struct net_device_ops ipgre_netdev_ops = {
.ndo_stop = ipgre_close,
#endif
.ndo_start_xmit = ipgre_xmit,
- .ndo_do_ioctl = ip_tunnel_ioctl,
+ .ndo_siocdevprivate = ip_tunnel_siocdevprivate,
.ndo_change_mtu = ip_tunnel_change_mtu,
.ndo_get_stats64 = dev_get_tstats64,
.ndo_get_iflink = ip_tunnel_get_iflink,
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index c3efc7d658f6..9bca57ef8b83 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -198,19 +198,10 @@ static int ip_finish_output2(struct net *net, struct sock *sk, struct sk_buff *s
} else if (rt->rt_type == RTN_BROADCAST)
IP_UPD_PO_STATS(net, IPSTATS_MIB_OUTBCAST, skb->len);
- /* Be paranoid, rather than too clever. */
if (unlikely(skb_headroom(skb) < hh_len && dev->header_ops)) {
- struct sk_buff *skb2;
-
- skb2 = skb_realloc_headroom(skb, LL_RESERVED_SPACE(dev));
- if (!skb2) {
- kfree_skb(skb);
+ skb = skb_expand_head(skb, hh_len);
+ if (!skb)
return -ENOMEM;
- }
- if (skb->sk)
- skb_set_owner_w(skb2, skb->sk);
- consume_skb(skb);
- skb = skb2;
}
if (lwtunnel_xmit_redirect(dst->lwtstate)) {
@@ -446,8 +437,9 @@ static void ip_copy_addrs(struct iphdr *iph, const struct flowi4 *fl4)
{
BUILD_BUG_ON(offsetof(typeof(*fl4), daddr) !=
offsetof(typeof(*fl4), saddr) + sizeof(fl4->saddr));
- memcpy(&iph->saddr, &fl4->saddr,
- sizeof(fl4->saddr) + sizeof(fl4->daddr));
+
+ iph->saddr = fl4->saddr;
+ iph->daddr = fl4->daddr;
}
/* Note: skb->sk can be different from sk, in case of tunnels */
@@ -614,18 +606,6 @@ void ip_fraglist_init(struct sk_buff *skb, struct iphdr *iph,
}
EXPORT_SYMBOL(ip_fraglist_init);
-static void ip_fraglist_ipcb_prepare(struct sk_buff *skb,
- struct ip_fraglist_iter *iter)
-{
- struct sk_buff *to = iter->frag;
-
- /* Copy the flags to each fragment. */
- IPCB(to)->flags = IPCB(skb)->flags;
-
- if (iter->offset == 0)
- ip_options_fragment(to);
-}
-
void ip_fraglist_prepare(struct sk_buff *skb, struct ip_fraglist_iter *iter)
{
unsigned int hlen = iter->hlen;
@@ -671,7 +651,7 @@ void ip_frag_init(struct sk_buff *skb, unsigned int hlen,
EXPORT_SYMBOL(ip_frag_init);
static void ip_frag_ipcb(struct sk_buff *from, struct sk_buff *to,
- bool first_frag, struct ip_frag_state *state)
+ bool first_frag)
{
/* Copy the flags to each fragment. */
IPCB(to)->flags = IPCB(from)->flags;
@@ -846,11 +826,14 @@ int ip_do_fragment(struct net *net, struct sock *sk, struct sk_buff *skb,
/* Everything is OK. Generate! */
ip_fraglist_init(skb, iph, hlen, &iter);
+ if (iter.frag)
+ ip_options_fragment(iter.frag);
+
for (;;) {
/* Prepare header of the next frame,
* before previous one went down. */
if (iter.frag) {
- ip_fraglist_ipcb_prepare(skb, &iter);
+ IPCB(iter.frag)->flags = IPCB(skb)->flags;
ip_fraglist_prepare(skb, &iter);
}
@@ -905,7 +888,7 @@ slow_path:
err = PTR_ERR(skb2);
goto fail;
}
- ip_frag_ipcb(skb, skb2, first_frag, &state);
+ ip_frag_ipcb(skb, skb2, first_frag);
/*
* Put this fragment into the sending queue.
@@ -1054,7 +1037,7 @@ static int __ip_append_data(struct sock *sk,
unsigned int datalen;
unsigned int fraglen;
unsigned int fraggap;
- unsigned int alloclen;
+ unsigned int alloclen, alloc_extra;
unsigned int pagedlen;
struct sk_buff *skb_prev;
alloc_new_skb:
@@ -1074,35 +1057,39 @@ alloc_new_skb:
fraglen = datalen + fragheaderlen;
pagedlen = 0;
+ alloc_extra = hh_len + 15;
+ alloc_extra += exthdrlen;
+
+ /* The last fragment gets additional space at tail.
+ * Note, with MSG_MORE we overallocate on fragments,
+ * because we have no idea what fragment will be
+ * the last.
+ */
+ if (datalen == length + fraggap)
+ alloc_extra += rt->dst.trailer_len;
+
if ((flags & MSG_MORE) &&
!(rt->dst.dev->features&NETIF_F_SG))
alloclen = mtu;
- else if (!paged)
+ else if (!paged &&
+ (fraglen + alloc_extra < SKB_MAX_ALLOC ||
+ !(rt->dst.dev->features & NETIF_F_SG)))
alloclen = fraglen;
else {
alloclen = min_t(int, fraglen, MAX_HEADER);
pagedlen = fraglen - alloclen;
}
- alloclen += exthdrlen;
-
- /* The last fragment gets additional space at tail.
- * Note, with MSG_MORE we overallocate on fragments,
- * because we have no idea what fragment will be
- * the last.
- */
- if (datalen == length + fraggap)
- alloclen += rt->dst.trailer_len;
+ alloclen += alloc_extra;
if (transhdrlen) {
- skb = sock_alloc_send_skb(sk,
- alloclen + hh_len + 15,
+ skb = sock_alloc_send_skb(sk, alloclen,
(flags & MSG_DONTWAIT), &err);
} else {
skb = NULL;
if (refcount_read(&sk->sk_wmem_alloc) + wmem_alloc_delta <=
2 * sk->sk_sndbuf)
- skb = alloc_skb(alloclen + hh_len + 15,
+ skb = alloc_skb(alloclen,
sk->sk_allocation);
if (unlikely(!skb))
err = -ENOBUFS;
diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c
index ec6036713e2c..b297bb28556e 100644
--- a/net/ipv4/ip_sockglue.c
+++ b/net/ipv4/ip_sockglue.c
@@ -663,12 +663,11 @@ static int set_mcast_msfilter(struct sock *sk, int ifindex,
struct sockaddr_storage *group,
struct sockaddr_storage *list)
{
- int msize = IP_MSFILTER_SIZE(numsrc);
struct ip_msfilter *msf;
struct sockaddr_in *psin;
int err, i;
- msf = kmalloc(msize, GFP_KERNEL);
+ msf = kmalloc(IP_MSFILTER_SIZE(numsrc), GFP_KERNEL);
if (!msf)
return -ENOBUFS;
@@ -684,7 +683,7 @@ static int set_mcast_msfilter(struct sock *sk, int ifindex,
if (psin->sin_family != AF_INET)
goto Eaddrnotavail;
- msf->imsf_slist[i] = psin->sin_addr.s_addr;
+ msf->imsf_slist_flex[i] = psin->sin_addr.s_addr;
}
err = ip_mc_msfilter(sk, msf, ifindex);
kfree(msf);
@@ -791,7 +790,8 @@ static int ip_set_mcast_msfilter(struct sock *sk, sockptr_t optval, int optlen)
goto out_free_gsf;
err = set_mcast_msfilter(sk, gsf->gf_interface, gsf->gf_numsrc,
- gsf->gf_fmode, &gsf->gf_group, gsf->gf_slist);
+ gsf->gf_fmode, &gsf->gf_group,
+ gsf->gf_slist_flex);
out_free_gsf:
kfree(gsf);
return err;
@@ -800,7 +800,7 @@ out_free_gsf:
static int compat_ip_set_mcast_msfilter(struct sock *sk, sockptr_t optval,
int optlen)
{
- const int size0 = offsetof(struct compat_group_filter, gf_slist);
+ const int size0 = offsetof(struct compat_group_filter, gf_slist_flex);
struct compat_group_filter *gf32;
unsigned int n;
void *p;
@@ -814,7 +814,7 @@ static int compat_ip_set_mcast_msfilter(struct sock *sk, sockptr_t optval,
p = kmalloc(optlen + 4, GFP_KERNEL);
if (!p)
return -ENOMEM;
- gf32 = p + 4; /* we want ->gf_group and ->gf_slist aligned */
+ gf32 = p + 4; /* we want ->gf_group and ->gf_slist_flex aligned */
err = -EFAULT;
if (copy_from_sockptr(gf32, optval, optlen))
@@ -827,7 +827,7 @@ static int compat_ip_set_mcast_msfilter(struct sock *sk, sockptr_t optval,
goto out_free_gsf;
err = -EINVAL;
- if (offsetof(struct compat_group_filter, gf_slist[n]) > optlen)
+ if (offsetof(struct compat_group_filter, gf_slist_flex[n]) > optlen)
goto out_free_gsf;
/* numsrc >= (4G-140)/128 overflow in 32 bits */
@@ -835,7 +835,7 @@ static int compat_ip_set_mcast_msfilter(struct sock *sk, sockptr_t optval,
if (n > sock_net(sk)->ipv4.sysctl_igmp_max_msf)
goto out_free_gsf;
err = set_mcast_msfilter(sk, gf32->gf_interface, n, gf32->gf_fmode,
- &gf32->gf_group, gf32->gf_slist);
+ &gf32->gf_group, gf32->gf_slist_flex);
out_free_gsf:
kfree(p);
return err;
@@ -1456,7 +1456,7 @@ static bool getsockopt_needs_rtnl(int optname)
static int ip_get_mcast_msfilter(struct sock *sk, void __user *optval,
int __user *optlen, int len)
{
- const int size0 = offsetof(struct group_filter, gf_slist);
+ const int size0 = offsetof(struct group_filter, gf_slist_flex);
struct group_filter __user *p = optval;
struct group_filter gsf;
int num;
@@ -1468,7 +1468,7 @@ static int ip_get_mcast_msfilter(struct sock *sk, void __user *optval,
return -EFAULT;
num = gsf.gf_numsrc;
- err = ip_mc_gsfget(sk, &gsf, p->gf_slist);
+ err = ip_mc_gsfget(sk, &gsf, p->gf_slist_flex);
if (err)
return err;
if (gsf.gf_numsrc < num)
@@ -1482,7 +1482,7 @@ static int ip_get_mcast_msfilter(struct sock *sk, void __user *optval,
static int compat_ip_get_mcast_msfilter(struct sock *sk, void __user *optval,
int __user *optlen, int len)
{
- const int size0 = offsetof(struct compat_group_filter, gf_slist);
+ const int size0 = offsetof(struct compat_group_filter, gf_slist_flex);
struct compat_group_filter __user *p = optval;
struct compat_group_filter gf32;
struct group_filter gf;
@@ -1499,7 +1499,7 @@ static int compat_ip_get_mcast_msfilter(struct sock *sk, void __user *optval,
num = gf.gf_numsrc = gf32.gf_numsrc;
gf.gf_group = gf32.gf_group;
- err = ip_mc_gsfget(sk, &gf, p->gf_slist);
+ err = ip_mc_gsfget(sk, &gf, p->gf_slist_flex);
if (err)
return err;
if (gf.gf_numsrc < num)
diff --git a/net/ipv4/ip_tunnel.c b/net/ipv4/ip_tunnel.c
index f6cc26de5ed3..fe9101d3d69e 100644
--- a/net/ipv4/ip_tunnel.c
+++ b/net/ipv4/ip_tunnel.c
@@ -317,7 +317,7 @@ static int ip_tunnel_bind_dev(struct net_device *dev)
}
dev->needed_headroom = t_hlen + hlen;
- mtu -= t_hlen;
+ mtu -= t_hlen + (dev->type == ARPHRD_ETHER ? dev->hard_header_len : 0);
if (mtu < IPV4_MIN_MTU)
mtu = IPV4_MIN_MTU;
@@ -348,6 +348,9 @@ static struct ip_tunnel *ip_tunnel_create(struct net *net,
t_hlen = nt->hlen + sizeof(struct iphdr);
dev->min_mtu = ETH_MIN_MTU;
dev->max_mtu = IP_MAX_MTU - t_hlen;
+ if (dev->type == ARPHRD_ETHER)
+ dev->max_mtu -= dev->hard_header_len;
+
ip_tunnel_add(itn, nt);
return nt;
@@ -387,7 +390,7 @@ int ip_tunnel_rcv(struct ip_tunnel *tunnel, struct sk_buff *skb,
tunnel->i_seqno = ntohl(tpi->seq) + 1;
}
- skb_reset_network_header(skb);
+ skb_set_network_header(skb, (tunnel->dev->type == ARPHRD_ETHER) ? ETH_HLEN : 0);
err = IP_ECN_decapsulate(iph, skb);
if (unlikely(err)) {
@@ -489,11 +492,14 @@ static int tnl_update_pmtu(struct net_device *dev, struct sk_buff *skb,
tunnel_hlen = md ? tunnel_hlen : tunnel->hlen;
pkt_size = skb->len - tunnel_hlen;
+ pkt_size -= dev->type == ARPHRD_ETHER ? dev->hard_header_len : 0;
- if (df)
+ if (df) {
mtu = dst_mtu(&rt->dst) - (sizeof(struct iphdr) + tunnel_hlen);
- else
+ mtu -= dev->type == ARPHRD_ETHER ? dev->hard_header_len : 0;
+ } else {
mtu = skb_valid_dst(skb) ? dst_mtu(skb_dst(skb)) : dev->mtu;
+ }
if (skb_valid_dst(skb))
skb_dst_update_pmtu_no_confirm(skb, mtu);
@@ -952,19 +958,20 @@ done:
}
EXPORT_SYMBOL_GPL(ip_tunnel_ctl);
-int ip_tunnel_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
+int ip_tunnel_siocdevprivate(struct net_device *dev, struct ifreq *ifr,
+ void __user *data, int cmd)
{
struct ip_tunnel_parm p;
int err;
- if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
+ if (copy_from_user(&p, data, sizeof(p)))
return -EFAULT;
err = dev->netdev_ops->ndo_tunnel_ctl(dev, &p, cmd);
- if (!err && copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof(p)))
+ if (!err && copy_to_user(data, &p, sizeof(p)))
return -EFAULT;
return err;
}
-EXPORT_SYMBOL_GPL(ip_tunnel_ioctl);
+EXPORT_SYMBOL_GPL(ip_tunnel_siocdevprivate);
int __ip_tunnel_change_mtu(struct net_device *dev, int new_mtu, bool strict)
{
@@ -972,6 +979,9 @@ int __ip_tunnel_change_mtu(struct net_device *dev, int new_mtu, bool strict)
int t_hlen = tunnel->hlen + sizeof(struct iphdr);
int max_mtu = IP_MAX_MTU - t_hlen;
+ if (dev->type == ARPHRD_ETHER)
+ max_mtu -= dev->hard_header_len;
+
if (new_mtu < ETH_MIN_MTU)
return -EINVAL;
@@ -1149,6 +1159,9 @@ int ip_tunnel_newlink(struct net_device *dev, struct nlattr *tb[],
if (tb[IFLA_MTU]) {
unsigned int max = IP_MAX_MTU - (nt->hlen + sizeof(struct iphdr));
+ if (dev->type == ARPHRD_ETHER)
+ max -= dev->hard_header_len;
+
mtu = clamp(dev->mtu, (unsigned int)ETH_MIN_MTU, max);
}
diff --git a/net/ipv4/ip_vti.c b/net/ipv4/ip_vti.c
index 31c6c6d99d5e..efe25a0172e6 100644
--- a/net/ipv4/ip_vti.c
+++ b/net/ipv4/ip_vti.c
@@ -351,6 +351,7 @@ static int vti4_err(struct sk_buff *skb, u32 info)
case ICMP_DEST_UNREACH:
if (icmp_hdr(skb)->code != ICMP_FRAG_NEEDED)
return 0;
+ break;
case ICMP_REDIRECT:
break;
default:
@@ -404,7 +405,7 @@ static const struct net_device_ops vti_netdev_ops = {
.ndo_init = vti_tunnel_init,
.ndo_uninit = ip_tunnel_uninit,
.ndo_start_xmit = vti_tunnel_xmit,
- .ndo_do_ioctl = ip_tunnel_ioctl,
+ .ndo_siocdevprivate = ip_tunnel_siocdevprivate,
.ndo_change_mtu = ip_tunnel_change_mtu,
.ndo_get_stats64 = dev_get_tstats64,
.ndo_get_iflink = ip_tunnel_get_iflink,
diff --git a/net/ipv4/ipcomp.c b/net/ipv4/ipcomp.c
index b42683212c65..366094c1ce6c 100644
--- a/net/ipv4/ipcomp.c
+++ b/net/ipv4/ipcomp.c
@@ -31,6 +31,7 @@ static int ipcomp4_err(struct sk_buff *skb, u32 info)
case ICMP_DEST_UNREACH:
if (icmp_hdr(skb)->code != ICMP_FRAG_NEEDED)
return 0;
+ break;
case ICMP_REDIRECT:
break;
default:
@@ -152,7 +153,6 @@ static int ipcomp4_rcv_cb(struct sk_buff *skb, int err)
}
static const struct xfrm_type ipcomp_type = {
- .description = "IPCOMP4",
.owner = THIS_MODULE,
.proto = IPPROTO_COMP,
.init_state = ipcomp4_init_state,
diff --git a/net/ipv4/ipip.c b/net/ipv4/ipip.c
index d5bfa087c23a..3aa78ccbec3e 100644
--- a/net/ipv4/ipip.c
+++ b/net/ipv4/ipip.c
@@ -242,6 +242,8 @@ static int ipip_tunnel_rcv(struct sk_buff *skb, u8 ipproto)
if (!tun_dst)
return 0;
}
+ skb_reset_mac_header(skb);
+
return ip_tunnel_rcv(tunnel, skb, tpi, tun_dst, log_ecn_error);
}
@@ -345,7 +347,7 @@ static const struct net_device_ops ipip_netdev_ops = {
.ndo_init = ipip_tunnel_init,
.ndo_uninit = ip_tunnel_uninit,
.ndo_start_xmit = ipip_tunnel_xmit,
- .ndo_do_ioctl = ip_tunnel_ioctl,
+ .ndo_siocdevprivate = ip_tunnel_siocdevprivate,
.ndo_change_mtu = ip_tunnel_change_mtu,
.ndo_get_stats64 = dev_get_tstats64,
.ndo_get_iflink = ip_tunnel_get_iflink,
diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c
index 939792a38814..2dda856ca260 100644
--- a/net/ipv4/ipmr.c
+++ b/net/ipv4/ipmr.c
@@ -1317,7 +1317,7 @@ static void mroute_clean_tables(struct mr_table *mrt, int flags)
}
/* called from ip_ra_control(), before an RCU grace period,
- * we dont need to call synchronize_rcu() here
+ * we don't need to call synchronize_rcu() here
*/
static void mrtsock_destruct(struct sock *sk)
{
@@ -1938,7 +1938,7 @@ static void ip_mr_forward(struct net *net, struct mr_table *mrt,
if (c->mfc_origin == htonl(INADDR_ANY) && true_vifi >= 0) {
struct mfc_cache *cache_proxy;
- /* For an (*,G) entry, we only check that the incomming
+ /* For an (*,G) entry, we only check that the incoming
* interface is part of the static tree.
*/
cache_proxy = mr_mfc_find_any_parent(mrt, vif);
@@ -2119,7 +2119,7 @@ int ip_mr_input(struct sk_buff *skb)
raw_rcv(mroute_sk, skb);
return 0;
}
- }
+ }
}
/* already under rcu_read_lock() */
@@ -3007,7 +3007,6 @@ static const struct seq_operations ipmr_mfc_seq_ops = {
#ifdef CONFIG_IP_PIMSM_V2
static const struct net_protocol pim_protocol = {
.handler = pim_rcv,
- .netns_ok = 1,
};
#endif
diff --git a/net/ipv4/netfilter/arptable_filter.c b/net/ipv4/netfilter/arptable_filter.c
index 6922612df456..3de78416ec76 100644
--- a/net/ipv4/netfilter/arptable_filter.c
+++ b/net/ipv4/netfilter/arptable_filter.c
@@ -18,15 +18,12 @@ MODULE_DESCRIPTION("arptables filter table");
#define FILTER_VALID_HOOKS ((1 << NF_ARP_IN) | (1 << NF_ARP_OUT) | \
(1 << NF_ARP_FORWARD))
-static int __net_init arptable_filter_table_init(struct net *net);
-
static const struct xt_table packet_filter = {
.name = "filter",
.valid_hooks = FILTER_VALID_HOOKS,
.me = THIS_MODULE,
.af = NFPROTO_ARP,
.priority = NF_IP_PRI_FILTER,
- .table_init = arptable_filter_table_init,
};
/* The work comes in here from netfilter.c */
@@ -39,7 +36,7 @@ arptable_filter_hook(void *priv, struct sk_buff *skb,
static struct nf_hook_ops *arpfilter_ops __read_mostly;
-static int __net_init arptable_filter_table_init(struct net *net)
+static int arptable_filter_table_init(struct net *net)
{
struct arpt_replace *repl;
int err;
@@ -69,30 +66,32 @@ static struct pernet_operations arptable_filter_net_ops = {
static int __init arptable_filter_init(void)
{
- int ret;
+ int ret = xt_register_template(&packet_filter,
+ arptable_filter_table_init);
+
+ if (ret < 0)
+ return ret;
arpfilter_ops = xt_hook_ops_alloc(&packet_filter, arptable_filter_hook);
- if (IS_ERR(arpfilter_ops))
+ if (IS_ERR(arpfilter_ops)) {
+ xt_unregister_template(&packet_filter);
return PTR_ERR(arpfilter_ops);
+ }
ret = register_pernet_subsys(&arptable_filter_net_ops);
if (ret < 0) {
+ xt_unregister_template(&packet_filter);
kfree(arpfilter_ops);
return ret;
}
- ret = arptable_filter_table_init(&init_net);
- if (ret) {
- unregister_pernet_subsys(&arptable_filter_net_ops);
- kfree(arpfilter_ops);
- }
-
return ret;
}
static void __exit arptable_filter_fini(void)
{
unregister_pernet_subsys(&arptable_filter_net_ops);
+ xt_unregister_template(&packet_filter);
kfree(arpfilter_ops);
}
diff --git a/net/ipv4/netfilter/ipt_CLUSTERIP.c b/net/ipv4/netfilter/ipt_CLUSTERIP.c
index 8f7ca67475b7..8fd1aba8af31 100644
--- a/net/ipv4/netfilter/ipt_CLUSTERIP.c
+++ b/net/ipv4/netfilter/ipt_CLUSTERIP.c
@@ -66,11 +66,22 @@ struct clusterip_net {
/* lock protects the configs list */
spinlock_t lock;
+ bool clusterip_deprecated_warning;
#ifdef CONFIG_PROC_FS
struct proc_dir_entry *procdir;
/* mutex protects the config->pde*/
struct mutex mutex;
#endif
+ unsigned int hook_users;
+};
+
+static unsigned int clusterip_arp_mangle(void *priv, struct sk_buff *skb, const struct nf_hook_state *state);
+
+static const struct nf_hook_ops cip_arp_ops = {
+ .hook = clusterip_arp_mangle,
+ .pf = NFPROTO_ARP,
+ .hooknum = NF_ARP_OUT,
+ .priority = -1
};
static unsigned int clusterip_net_id __read_mostly;
@@ -458,6 +469,7 @@ clusterip_tg(struct sk_buff *skb, const struct xt_action_param *par)
static int clusterip_tg_check(const struct xt_tgchk_param *par)
{
struct ipt_clusterip_tgt_info *cipinfo = par->targinfo;
+ struct clusterip_net *cn = clusterip_pernet(par->net);
const struct ipt_entry *e = par->entryinfo;
struct clusterip_config *config;
int ret, i;
@@ -467,6 +479,9 @@ static int clusterip_tg_check(const struct xt_tgchk_param *par)
return -EOPNOTSUPP;
}
+ if (cn->hook_users == UINT_MAX)
+ return -EOVERFLOW;
+
if (cipinfo->hash_mode != CLUSTERIP_HASHMODE_SIP &&
cipinfo->hash_mode != CLUSTERIP_HASHMODE_SIP_SPT &&
cipinfo->hash_mode != CLUSTERIP_HASHMODE_SIP_SPT_DPT) {
@@ -517,10 +532,23 @@ static int clusterip_tg_check(const struct xt_tgchk_param *par)
return ret;
}
- if (!par->net->xt.clusterip_deprecated_warning) {
+ if (cn->hook_users == 0) {
+ ret = nf_register_net_hook(par->net, &cip_arp_ops);
+
+ if (ret < 0) {
+ clusterip_config_entry_put(config);
+ clusterip_config_put(config);
+ nf_ct_netns_put(par->net, par->family);
+ return ret;
+ }
+ }
+
+ cn->hook_users++;
+
+ if (!cn->clusterip_deprecated_warning) {
pr_info("ipt_CLUSTERIP is deprecated and it will removed soon, "
"use xt_cluster instead\n");
- par->net->xt.clusterip_deprecated_warning = true;
+ cn->clusterip_deprecated_warning = true;
}
cipinfo->config = config;
@@ -531,6 +559,7 @@ static int clusterip_tg_check(const struct xt_tgchk_param *par)
static void clusterip_tg_destroy(const struct xt_tgdtor_param *par)
{
const struct ipt_clusterip_tgt_info *cipinfo = par->targinfo;
+ struct clusterip_net *cn = clusterip_pernet(par->net);
/* if no more entries are referencing the config, remove it
* from the list and destroy the proc entry */
@@ -539,6 +568,10 @@ static void clusterip_tg_destroy(const struct xt_tgdtor_param *par)
clusterip_config_put(cipinfo->config);
nf_ct_netns_put(par->net, par->family);
+ cn->hook_users--;
+
+ if (cn->hook_users == 0)
+ nf_unregister_net_hook(par->net, &cip_arp_ops);
}
#ifdef CONFIG_NETFILTER_XTABLES_COMPAT
@@ -602,9 +635,8 @@ static void arp_print(struct arp_payload *payload)
#endif
static unsigned int
-arp_mangle(void *priv,
- struct sk_buff *skb,
- const struct nf_hook_state *state)
+clusterip_arp_mangle(void *priv, struct sk_buff *skb,
+ const struct nf_hook_state *state)
{
struct arphdr *arp = arp_hdr(skb);
struct arp_payload *payload;
@@ -654,13 +686,6 @@ arp_mangle(void *priv,
return NF_ACCEPT;
}
-static const struct nf_hook_ops cip_arp_ops = {
- .hook = arp_mangle,
- .pf = NFPROTO_ARP,
- .hooknum = NF_ARP_OUT,
- .priority = -1
-};
-
/***********************************************************************
* PROC DIR HANDLING
***********************************************************************/
@@ -817,20 +842,14 @@ static const struct proc_ops clusterip_proc_ops = {
static int clusterip_net_init(struct net *net)
{
struct clusterip_net *cn = clusterip_pernet(net);
- int ret;
INIT_LIST_HEAD(&cn->configs);
spin_lock_init(&cn->lock);
- ret = nf_register_net_hook(net, &cip_arp_ops);
- if (ret < 0)
- return ret;
-
#ifdef CONFIG_PROC_FS
cn->procdir = proc_mkdir("ipt_CLUSTERIP", net->proc_net);
if (!cn->procdir) {
- nf_unregister_net_hook(net, &cip_arp_ops);
pr_err("Unable to proc dir entry\n");
return -ENOMEM;
}
@@ -850,7 +869,6 @@ static void clusterip_net_exit(struct net *net)
cn->procdir = NULL;
mutex_unlock(&cn->mutex);
#endif
- nf_unregister_net_hook(net, &cip_arp_ops);
}
static struct pernet_operations clusterip_net_ops = {
diff --git a/net/ipv4/netfilter/iptable_filter.c b/net/ipv4/netfilter/iptable_filter.c
index 8272df7c6ad5..0eb0e2ab9bfc 100644
--- a/net/ipv4/netfilter/iptable_filter.c
+++ b/net/ipv4/netfilter/iptable_filter.c
@@ -19,7 +19,6 @@ MODULE_DESCRIPTION("iptables filter table");
#define FILTER_VALID_HOOKS ((1 << NF_INET_LOCAL_IN) | \
(1 << NF_INET_FORWARD) | \
(1 << NF_INET_LOCAL_OUT))
-static int __net_init iptable_filter_table_init(struct net *net);
static const struct xt_table packet_filter = {
.name = "filter",
@@ -27,7 +26,6 @@ static const struct xt_table packet_filter = {
.me = THIS_MODULE,
.af = NFPROTO_IPV4,
.priority = NF_IP_PRI_FILTER,
- .table_init = iptable_filter_table_init,
};
static unsigned int
@@ -43,7 +41,7 @@ static struct nf_hook_ops *filter_ops __read_mostly;
static bool forward __read_mostly = true;
module_param(forward, bool, 0000);
-static int __net_init iptable_filter_table_init(struct net *net)
+static int iptable_filter_table_init(struct net *net)
{
struct ipt_replace *repl;
int err;
@@ -62,7 +60,7 @@ static int __net_init iptable_filter_table_init(struct net *net)
static int __net_init iptable_filter_net_init(struct net *net)
{
- if (net == &init_net || !forward)
+ if (!forward)
return iptable_filter_table_init(net);
return 0;
@@ -86,22 +84,32 @@ static struct pernet_operations iptable_filter_net_ops = {
static int __init iptable_filter_init(void)
{
- int ret;
+ int ret = xt_register_template(&packet_filter,
+ iptable_filter_table_init);
+
+ if (ret < 0)
+ return ret;
filter_ops = xt_hook_ops_alloc(&packet_filter, iptable_filter_hook);
- if (IS_ERR(filter_ops))
+ if (IS_ERR(filter_ops)) {
+ xt_unregister_template(&packet_filter);
return PTR_ERR(filter_ops);
+ }
ret = register_pernet_subsys(&iptable_filter_net_ops);
- if (ret < 0)
+ if (ret < 0) {
+ xt_unregister_template(&packet_filter);
kfree(filter_ops);
+ return ret;
+ }
- return ret;
+ return 0;
}
static void __exit iptable_filter_fini(void)
{
unregister_pernet_subsys(&iptable_filter_net_ops);
+ xt_unregister_template(&packet_filter);
kfree(filter_ops);
}
diff --git a/net/ipv4/netfilter/iptable_mangle.c b/net/ipv4/netfilter/iptable_mangle.c
index 2abc3836f391..40417a3f930b 100644
--- a/net/ipv4/netfilter/iptable_mangle.c
+++ b/net/ipv4/netfilter/iptable_mangle.c
@@ -25,15 +25,12 @@ MODULE_DESCRIPTION("iptables mangle table");
(1 << NF_INET_LOCAL_OUT) | \
(1 << NF_INET_POST_ROUTING))
-static int __net_init iptable_mangle_table_init(struct net *net);
-
static const struct xt_table packet_mangler = {
.name = "mangle",
.valid_hooks = MANGLE_VALID_HOOKS,
.me = THIS_MODULE,
.af = NFPROTO_IPV4,
.priority = NF_IP_PRI_MANGLE,
- .table_init = iptable_mangle_table_init,
};
static unsigned int
@@ -83,7 +80,7 @@ iptable_mangle_hook(void *priv,
}
static struct nf_hook_ops *mangle_ops __read_mostly;
-static int __net_init iptable_mangle_table_init(struct net *net)
+static int iptable_mangle_table_init(struct net *net)
{
struct ipt_replace *repl;
int ret;
@@ -113,32 +110,32 @@ static struct pernet_operations iptable_mangle_net_ops = {
static int __init iptable_mangle_init(void)
{
- int ret;
+ int ret = xt_register_template(&packet_mangler,
+ iptable_mangle_table_init);
+ if (ret < 0)
+ return ret;
mangle_ops = xt_hook_ops_alloc(&packet_mangler, iptable_mangle_hook);
if (IS_ERR(mangle_ops)) {
+ xt_unregister_template(&packet_mangler);
ret = PTR_ERR(mangle_ops);
return ret;
}
ret = register_pernet_subsys(&iptable_mangle_net_ops);
if (ret < 0) {
+ xt_unregister_template(&packet_mangler);
kfree(mangle_ops);
return ret;
}
- ret = iptable_mangle_table_init(&init_net);
- if (ret) {
- unregister_pernet_subsys(&iptable_mangle_net_ops);
- kfree(mangle_ops);
- }
-
return ret;
}
static void __exit iptable_mangle_fini(void)
{
unregister_pernet_subsys(&iptable_mangle_net_ops);
+ xt_unregister_template(&packet_mangler);
kfree(mangle_ops);
}
diff --git a/net/ipv4/netfilter/iptable_nat.c b/net/ipv4/netfilter/iptable_nat.c
index a9913842ef18..45d7e072e6a5 100644
--- a/net/ipv4/netfilter/iptable_nat.c
+++ b/net/ipv4/netfilter/iptable_nat.c
@@ -17,8 +17,6 @@ struct iptable_nat_pernet {
struct nf_hook_ops *nf_nat_ops;
};
-static int __net_init iptable_nat_table_init(struct net *net);
-
static unsigned int iptable_nat_net_id __read_mostly;
static const struct xt_table nf_nat_ipv4_table = {
@@ -29,7 +27,6 @@ static const struct xt_table nf_nat_ipv4_table = {
(1 << NF_INET_LOCAL_IN),
.me = THIS_MODULE,
.af = NFPROTO_IPV4,
- .table_init = iptable_nat_table_init,
};
static unsigned int iptable_nat_do_chain(void *priv,
@@ -113,7 +110,7 @@ static void ipt_nat_unregister_lookups(struct net *net)
kfree(ops);
}
-static int __net_init iptable_nat_table_init(struct net *net)
+static int iptable_nat_table_init(struct net *net)
{
struct ipt_replace *repl;
int ret;
@@ -155,20 +152,25 @@ static struct pernet_operations iptable_nat_net_ops = {
static int __init iptable_nat_init(void)
{
- int ret = register_pernet_subsys(&iptable_nat_net_ops);
+ int ret = xt_register_template(&nf_nat_ipv4_table,
+ iptable_nat_table_init);
+
+ if (ret < 0)
+ return ret;
- if (ret)
+ ret = register_pernet_subsys(&iptable_nat_net_ops);
+ if (ret < 0) {
+ xt_unregister_template(&nf_nat_ipv4_table);
return ret;
+ }
- ret = iptable_nat_table_init(&init_net);
- if (ret)
- unregister_pernet_subsys(&iptable_nat_net_ops);
return ret;
}
static void __exit iptable_nat_exit(void)
{
unregister_pernet_subsys(&iptable_nat_net_ops);
+ xt_unregister_template(&nf_nat_ipv4_table);
}
module_init(iptable_nat_init);
diff --git a/net/ipv4/netfilter/iptable_raw.c b/net/ipv4/netfilter/iptable_raw.c
index ceef397c1f5f..b88e0f36cd05 100644
--- a/net/ipv4/netfilter/iptable_raw.c
+++ b/net/ipv4/netfilter/iptable_raw.c
@@ -12,8 +12,6 @@
#define RAW_VALID_HOOKS ((1 << NF_INET_PRE_ROUTING) | (1 << NF_INET_LOCAL_OUT))
-static int __net_init iptable_raw_table_init(struct net *net);
-
static bool raw_before_defrag __read_mostly;
MODULE_PARM_DESC(raw_before_defrag, "Enable raw table before defrag");
module_param(raw_before_defrag, bool, 0000);
@@ -24,7 +22,6 @@ static const struct xt_table packet_raw = {
.me = THIS_MODULE,
.af = NFPROTO_IPV4,
.priority = NF_IP_PRI_RAW,
- .table_init = iptable_raw_table_init,
};
static const struct xt_table packet_raw_before_defrag = {
@@ -33,7 +30,6 @@ static const struct xt_table packet_raw_before_defrag = {
.me = THIS_MODULE,
.af = NFPROTO_IPV4,
.priority = NF_IP_PRI_RAW_BEFORE_DEFRAG,
- .table_init = iptable_raw_table_init,
};
/* The work comes in here from netfilter.c. */
@@ -89,22 +85,24 @@ static int __init iptable_raw_init(void)
pr_info("Enabling raw table before defrag\n");
}
+ ret = xt_register_template(table,
+ iptable_raw_table_init);
+ if (ret < 0)
+ return ret;
+
rawtable_ops = xt_hook_ops_alloc(table, iptable_raw_hook);
- if (IS_ERR(rawtable_ops))
+ if (IS_ERR(rawtable_ops)) {
+ xt_unregister_template(table);
return PTR_ERR(rawtable_ops);
+ }
ret = register_pernet_subsys(&iptable_raw_net_ops);
if (ret < 0) {
+ xt_unregister_template(table);
kfree(rawtable_ops);
return ret;
}
- ret = iptable_raw_table_init(&init_net);
- if (ret) {
- unregister_pernet_subsys(&iptable_raw_net_ops);
- kfree(rawtable_ops);
- }
-
return ret;
}
@@ -112,6 +110,7 @@ static void __exit iptable_raw_fini(void)
{
unregister_pernet_subsys(&iptable_raw_net_ops);
kfree(rawtable_ops);
+ xt_unregister_template(&packet_raw);
}
module_init(iptable_raw_init);
diff --git a/net/ipv4/netfilter/iptable_security.c b/net/ipv4/netfilter/iptable_security.c
index 77973f5fd8f6..f519162a2fa5 100644
--- a/net/ipv4/netfilter/iptable_security.c
+++ b/net/ipv4/netfilter/iptable_security.c
@@ -25,15 +25,12 @@ MODULE_DESCRIPTION("iptables security table, for MAC rules");
(1 << NF_INET_FORWARD) | \
(1 << NF_INET_LOCAL_OUT)
-static int __net_init iptable_security_table_init(struct net *net);
-
static const struct xt_table security_table = {
.name = "security",
.valid_hooks = SECURITY_VALID_HOOKS,
.me = THIS_MODULE,
.af = NFPROTO_IPV4,
.priority = NF_IP_PRI_SECURITY,
- .table_init = iptable_security_table_init,
};
static unsigned int
@@ -45,7 +42,7 @@ iptable_security_hook(void *priv, struct sk_buff *skb,
static struct nf_hook_ops *sectbl_ops __read_mostly;
-static int __net_init iptable_security_table_init(struct net *net)
+static int iptable_security_table_init(struct net *net)
{
struct ipt_replace *repl;
int ret;
@@ -75,24 +72,25 @@ static struct pernet_operations iptable_security_net_ops = {
static int __init iptable_security_init(void)
{
- int ret;
+ int ret = xt_register_template(&security_table,
+ iptable_security_table_init);
+
+ if (ret < 0)
+ return ret;
sectbl_ops = xt_hook_ops_alloc(&security_table, iptable_security_hook);
- if (IS_ERR(sectbl_ops))
+ if (IS_ERR(sectbl_ops)) {
+ xt_unregister_template(&security_table);
return PTR_ERR(sectbl_ops);
+ }
ret = register_pernet_subsys(&iptable_security_net_ops);
if (ret < 0) {
+ xt_unregister_template(&security_table);
kfree(sectbl_ops);
return ret;
}
- ret = iptable_security_table_init(&init_net);
- if (ret) {
- unregister_pernet_subsys(&iptable_security_net_ops);
- kfree(sectbl_ops);
- }
-
return ret;
}
@@ -100,6 +98,7 @@ static void __exit iptable_security_fini(void)
{
unregister_pernet_subsys(&iptable_security_net_ops);
kfree(sectbl_ops);
+ xt_unregister_template(&security_table);
}
module_init(iptable_security_init);
diff --git a/net/ipv4/netfilter/nft_reject_ipv4.c b/net/ipv4/netfilter/nft_reject_ipv4.c
index ff437e4ed6db..55fc23a8f7a7 100644
--- a/net/ipv4/netfilter/nft_reject_ipv4.c
+++ b/net/ipv4/netfilter/nft_reject_ipv4.c
@@ -27,7 +27,7 @@ static void nft_reject_ipv4_eval(const struct nft_expr *expr,
nf_send_unreach(pkt->skb, priv->icmp_code, nft_hook(pkt));
break;
case NFT_REJECT_TCP_RST:
- nf_send_reset(nft_net(pkt), pkt->xt.state->sk, pkt->skb,
+ nf_send_reset(nft_net(pkt), nft_sk(pkt), pkt->skb,
nft_hook(pkt));
break;
default:
diff --git a/net/ipv4/nexthop.c b/net/ipv4/nexthop.c
index 4075230b14c6..75ca4b6e484f 100644
--- a/net/ipv4/nexthop.c
+++ b/net/ipv4/nexthop.c
@@ -2490,6 +2490,7 @@ static int nh_create_ipv4(struct net *net, struct nexthop *nh,
.fc_gw4 = cfg->gw.ipv4,
.fc_gw_family = cfg->gw.ipv4 ? AF_INET : 0,
.fc_flags = cfg->nh_flags,
+ .fc_nlinfo = cfg->nlinfo,
.fc_encap = cfg->nh_encap,
.fc_encap_type = cfg->nh_encap_type,
};
@@ -2528,6 +2529,7 @@ static int nh_create_ipv6(struct net *net, struct nexthop *nh,
.fc_ifindex = cfg->nh_ifindex,
.fc_gateway = cfg->gw.ipv6,
.fc_flags = cfg->nh_flags,
+ .fc_nlinfo = cfg->nlinfo,
.fc_encap = cfg->nh_encap,
.fc_encap_type = cfg->nh_encap_type,
.fc_is_fdb = cfg->nh_fdb,
diff --git a/net/ipv4/ping.c b/net/ipv4/ping.c
index 95a718397fd1..1e44a43acfe2 100644
--- a/net/ipv4/ping.c
+++ b/net/ipv4/ping.c
@@ -573,7 +573,7 @@ void ping_err(struct sk_buff *skb, int offset, u32 info)
}
}
sk->sk_err = err;
- sk->sk_error_report(sk);
+ sk_error_report(sk);
out:
sock_put(sk);
}
diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c
index 6d46297a99f8..b0d3a09dc84e 100644
--- a/net/ipv4/proc.c
+++ b/net/ipv4/proc.c
@@ -295,6 +295,8 @@ static const struct snmp_mib snmp4_net_list[] = {
SNMP_MIB_ITEM("TcpDuplicateDataRehash", LINUX_MIB_TCPDUPLICATEDATAREHASH),
SNMP_MIB_ITEM("TCPDSACKRecvSegs", LINUX_MIB_TCPDSACKRECVSEGS),
SNMP_MIB_ITEM("TCPDSACKIgnoredDubious", LINUX_MIB_TCPDSACKIGNOREDDUBIOUS),
+ SNMP_MIB_ITEM("TCPMigrateReqSuccess", LINUX_MIB_TCPMIGRATEREQSUCCESS),
+ SNMP_MIB_ITEM("TCPMigrateReqFailure", LINUX_MIB_TCPMIGRATEREQFAILURE),
SNMP_MIB_SENTINEL
};
diff --git a/net/ipv4/protocol.c b/net/ipv4/protocol.c
index 9a8c0892622b..6913979948d7 100644
--- a/net/ipv4/protocol.c
+++ b/net/ipv4/protocol.c
@@ -31,12 +31,6 @@ EXPORT_SYMBOL(inet_offloads);
int inet_add_protocol(const struct net_protocol *prot, unsigned char protocol)
{
- if (!prot->netns_ok) {
- pr_err("Protocol %u is not namespace aware, cannot register.\n",
- protocol);
- return -EINVAL;
- }
-
return !cmpxchg((const struct net_protocol **)&inet_protos[protocol],
NULL, prot) ? 0 : -1;
}
diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c
index 50a73178d63a..bb446e60cf58 100644
--- a/net/ipv4/raw.c
+++ b/net/ipv4/raw.c
@@ -280,7 +280,7 @@ static void raw_err(struct sock *sk, struct sk_buff *skb, u32 info)
if (inet->recverr || harderr) {
sk->sk_err = err;
- sk->sk_error_report(sk);
+ sk_error_report(sk);
}
}
@@ -929,7 +929,7 @@ int raw_abort(struct sock *sk, int err)
lock_sock(sk);
sk->sk_err = err;
- sk->sk_error_report(sk);
+ sk_error_report(sk);
__udp_disconnect(sk, 0);
release_sock(sk);
diff --git a/net/ipv4/raw_diag.c b/net/ipv4/raw_diag.c
index 1b5b8af27aaf..ccacbde30a2c 100644
--- a/net/ipv4/raw_diag.c
+++ b/net/ipv4/raw_diag.c
@@ -119,11 +119,8 @@ static int raw_diag_dump_one(struct netlink_callback *cb,
return err;
}
- err = netlink_unicast(net->diag_nlsk, rep,
- NETLINK_CB(in_skb).portid,
- MSG_DONTWAIT);
- if (err > 0)
- err = 0;
+ err = nlmsg_unicast(net->diag_nlsk, rep, NETLINK_CB(in_skb).portid);
+
return err;
}
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index 6a36ac98476f..d6899ab5fb39 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -276,12 +276,13 @@ static int rt_cpu_seq_show(struct seq_file *seq, void *v)
struct rt_cache_stat *st = v;
if (v == SEQ_START_TOKEN) {
- seq_printf(seq, "entries in_hit in_slow_tot in_slow_mc in_no_route in_brd in_martian_dst in_martian_src out_hit out_slow_tot out_slow_mc gc_total gc_ignored gc_goal_miss gc_dst_overflow in_hlist_search out_hlist_search\n");
+ seq_puts(seq, "entries in_hit in_slow_tot in_slow_mc in_no_route in_brd in_martian_dst in_martian_src out_hit out_slow_tot out_slow_mc gc_total gc_ignored gc_goal_miss gc_dst_overflow in_hlist_search out_hlist_search\n");
return 0;
}
- seq_printf(seq,"%08x %08x %08x %08x %08x %08x %08x %08x "
- " %08x %08x %08x %08x %08x %08x %08x %08x %08x \n",
+ seq_printf(seq, "%08x %08x %08x %08x %08x %08x %08x "
+ "%08x %08x %08x %08x %08x %08x "
+ "%08x %08x %08x %08x\n",
dst_entries_get_slow(&ipv4_dst_ops),
0, /* st->in_hit */
st->in_slow_tot,
@@ -586,28 +587,35 @@ static void fnhe_flush_routes(struct fib_nh_exception *fnhe)
}
}
-static struct fib_nh_exception *fnhe_oldest(struct fnhe_hash_bucket *hash)
+static void fnhe_remove_oldest(struct fnhe_hash_bucket *hash)
{
- struct fib_nh_exception *fnhe, *oldest;
+ struct fib_nh_exception __rcu **fnhe_p, **oldest_p;
+ struct fib_nh_exception *fnhe, *oldest = NULL;
- oldest = rcu_dereference(hash->chain);
- for (fnhe = rcu_dereference(oldest->fnhe_next); fnhe;
- fnhe = rcu_dereference(fnhe->fnhe_next)) {
- if (time_before(fnhe->fnhe_stamp, oldest->fnhe_stamp))
+ for (fnhe_p = &hash->chain; ; fnhe_p = &fnhe->fnhe_next) {
+ fnhe = rcu_dereference_protected(*fnhe_p,
+ lockdep_is_held(&fnhe_lock));
+ if (!fnhe)
+ break;
+ if (!oldest ||
+ time_before(fnhe->fnhe_stamp, oldest->fnhe_stamp)) {
oldest = fnhe;
+ oldest_p = fnhe_p;
+ }
}
fnhe_flush_routes(oldest);
- return oldest;
+ *oldest_p = oldest->fnhe_next;
+ kfree_rcu(oldest, rcu);
}
-static inline u32 fnhe_hashfun(__be32 daddr)
+static u32 fnhe_hashfun(__be32 daddr)
{
- static u32 fnhe_hashrnd __read_mostly;
- u32 hval;
+ static siphash_key_t fnhe_hash_key __read_mostly;
+ u64 hval;
- net_get_random_once(&fnhe_hashrnd, sizeof(fnhe_hashrnd));
- hval = jhash_1word((__force u32)daddr, fnhe_hashrnd);
- return hash_32(hval, FNHE_HASH_SHIFT);
+ net_get_random_once(&fnhe_hash_key, sizeof(fnhe_hash_key));
+ hval = siphash_1u32((__force u32)daddr, &fnhe_hash_key);
+ return hash_64(hval, FNHE_HASH_SHIFT);
}
static void fill_route_from_fnhe(struct rtable *rt, struct fib_nh_exception *fnhe)
@@ -676,16 +684,21 @@ static void update_or_create_fnhe(struct fib_nh_common *nhc, __be32 daddr,
if (rt)
fill_route_from_fnhe(rt, fnhe);
} else {
- if (depth > FNHE_RECLAIM_DEPTH)
- fnhe = fnhe_oldest(hash);
- else {
- fnhe = kzalloc(sizeof(*fnhe), GFP_ATOMIC);
- if (!fnhe)
- goto out_unlock;
-
- fnhe->fnhe_next = hash->chain;
- rcu_assign_pointer(hash->chain, fnhe);
+ /* Randomize max depth to avoid some side channels attacks. */
+ int max_depth = FNHE_RECLAIM_DEPTH +
+ prandom_u32_max(FNHE_RECLAIM_DEPTH);
+
+ while (depth > max_depth) {
+ fnhe_remove_oldest(hash);
+ depth--;
}
+
+ fnhe = kzalloc(sizeof(*fnhe), GFP_ATOMIC);
+ if (!fnhe)
+ goto out_unlock;
+
+ fnhe->fnhe_next = hash->chain;
+
fnhe->fnhe_genid = genid;
fnhe->fnhe_daddr = daddr;
fnhe->fnhe_gw = gw;
@@ -693,6 +706,8 @@ static void update_or_create_fnhe(struct fib_nh_common *nhc, __be32 daddr,
fnhe->fnhe_mtu_locked = lock;
fnhe->fnhe_expires = max(1UL, expires);
+ rcu_assign_pointer(hash->chain, fnhe);
+
/* Exception created; mark the cached routes for the nexthop
* stale, so anyone caching it rechecks if this exception
* applies to them.
@@ -1299,25 +1314,7 @@ static unsigned int ipv4_default_advmss(const struct dst_entry *dst)
INDIRECT_CALLABLE_SCOPE unsigned int ipv4_mtu(const struct dst_entry *dst)
{
- const struct rtable *rt = (const struct rtable *)dst;
- unsigned int mtu = rt->rt_pmtu;
-
- if (!mtu || time_after_eq(jiffies, rt->dst.expires))
- mtu = dst_metric_raw(dst, RTAX_MTU);
-
- if (mtu)
- return mtu;
-
- mtu = READ_ONCE(dst->dev->mtu);
-
- if (unlikely(ip_mtu_locked(dst))) {
- if (rt->rt_uses_gateway && mtu > 576)
- mtu = 576;
- }
-
- mtu = min_t(unsigned int, mtu, IP_MAX_MTU);
-
- return mtu - lwtunnel_headroom(dst->lwtstate, mtu);
+ return ip_dst_mtu_maybe_forward(dst, false);
}
EXPORT_INDIRECT_CALLABLE(ipv4_mtu);
@@ -1906,13 +1903,128 @@ out:
hash_keys->addrs.v4addrs.dst = key_iph->daddr;
}
+static u32 fib_multipath_custom_hash_outer(const struct net *net,
+ const struct sk_buff *skb,
+ bool *p_has_inner)
+{
+ u32 hash_fields = net->ipv4.sysctl_fib_multipath_hash_fields;
+ struct flow_keys keys, hash_keys;
+
+ if (!(hash_fields & FIB_MULTIPATH_HASH_FIELD_OUTER_MASK))
+ return 0;
+
+ memset(&hash_keys, 0, sizeof(hash_keys));
+ skb_flow_dissect_flow_keys(skb, &keys, FLOW_DISSECTOR_F_STOP_AT_ENCAP);
+
+ hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV4_ADDRS;
+ if (hash_fields & FIB_MULTIPATH_HASH_FIELD_SRC_IP)
+ hash_keys.addrs.v4addrs.src = keys.addrs.v4addrs.src;
+ if (hash_fields & FIB_MULTIPATH_HASH_FIELD_DST_IP)
+ hash_keys.addrs.v4addrs.dst = keys.addrs.v4addrs.dst;
+ if (hash_fields & FIB_MULTIPATH_HASH_FIELD_IP_PROTO)
+ hash_keys.basic.ip_proto = keys.basic.ip_proto;
+ if (hash_fields & FIB_MULTIPATH_HASH_FIELD_SRC_PORT)
+ hash_keys.ports.src = keys.ports.src;
+ if (hash_fields & FIB_MULTIPATH_HASH_FIELD_DST_PORT)
+ hash_keys.ports.dst = keys.ports.dst;
+
+ *p_has_inner = !!(keys.control.flags & FLOW_DIS_ENCAPSULATION);
+ return flow_hash_from_keys(&hash_keys);
+}
+
+static u32 fib_multipath_custom_hash_inner(const struct net *net,
+ const struct sk_buff *skb,
+ bool has_inner)
+{
+ u32 hash_fields = net->ipv4.sysctl_fib_multipath_hash_fields;
+ struct flow_keys keys, hash_keys;
+
+ /* We assume the packet carries an encapsulation, but if none was
+ * encountered during dissection of the outer flow, then there is no
+ * point in calling the flow dissector again.
+ */
+ if (!has_inner)
+ return 0;
+
+ if (!(hash_fields & FIB_MULTIPATH_HASH_FIELD_INNER_MASK))
+ return 0;
+
+ memset(&hash_keys, 0, sizeof(hash_keys));
+ skb_flow_dissect_flow_keys(skb, &keys, 0);
+
+ if (!(keys.control.flags & FLOW_DIS_ENCAPSULATION))
+ return 0;
+
+ if (keys.control.addr_type == FLOW_DISSECTOR_KEY_IPV4_ADDRS) {
+ hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV4_ADDRS;
+ if (hash_fields & FIB_MULTIPATH_HASH_FIELD_INNER_SRC_IP)
+ hash_keys.addrs.v4addrs.src = keys.addrs.v4addrs.src;
+ if (hash_fields & FIB_MULTIPATH_HASH_FIELD_INNER_DST_IP)
+ hash_keys.addrs.v4addrs.dst = keys.addrs.v4addrs.dst;
+ } else if (keys.control.addr_type == FLOW_DISSECTOR_KEY_IPV6_ADDRS) {
+ hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS;
+ if (hash_fields & FIB_MULTIPATH_HASH_FIELD_INNER_SRC_IP)
+ hash_keys.addrs.v6addrs.src = keys.addrs.v6addrs.src;
+ if (hash_fields & FIB_MULTIPATH_HASH_FIELD_INNER_DST_IP)
+ hash_keys.addrs.v6addrs.dst = keys.addrs.v6addrs.dst;
+ if (hash_fields & FIB_MULTIPATH_HASH_FIELD_INNER_FLOWLABEL)
+ hash_keys.tags.flow_label = keys.tags.flow_label;
+ }
+
+ if (hash_fields & FIB_MULTIPATH_HASH_FIELD_INNER_IP_PROTO)
+ hash_keys.basic.ip_proto = keys.basic.ip_proto;
+ if (hash_fields & FIB_MULTIPATH_HASH_FIELD_INNER_SRC_PORT)
+ hash_keys.ports.src = keys.ports.src;
+ if (hash_fields & FIB_MULTIPATH_HASH_FIELD_INNER_DST_PORT)
+ hash_keys.ports.dst = keys.ports.dst;
+
+ return flow_hash_from_keys(&hash_keys);
+}
+
+static u32 fib_multipath_custom_hash_skb(const struct net *net,
+ const struct sk_buff *skb)
+{
+ u32 mhash, mhash_inner;
+ bool has_inner = true;
+
+ mhash = fib_multipath_custom_hash_outer(net, skb, &has_inner);
+ mhash_inner = fib_multipath_custom_hash_inner(net, skb, has_inner);
+
+ return jhash_2words(mhash, mhash_inner, 0);
+}
+
+static u32 fib_multipath_custom_hash_fl4(const struct net *net,
+ const struct flowi4 *fl4)
+{
+ u32 hash_fields = net->ipv4.sysctl_fib_multipath_hash_fields;
+ struct flow_keys hash_keys;
+
+ if (!(hash_fields & FIB_MULTIPATH_HASH_FIELD_OUTER_MASK))
+ return 0;
+
+ memset(&hash_keys, 0, sizeof(hash_keys));
+ hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV4_ADDRS;
+ if (hash_fields & FIB_MULTIPATH_HASH_FIELD_SRC_IP)
+ hash_keys.addrs.v4addrs.src = fl4->saddr;
+ if (hash_fields & FIB_MULTIPATH_HASH_FIELD_DST_IP)
+ hash_keys.addrs.v4addrs.dst = fl4->daddr;
+ if (hash_fields & FIB_MULTIPATH_HASH_FIELD_IP_PROTO)
+ hash_keys.basic.ip_proto = fl4->flowi4_proto;
+ if (hash_fields & FIB_MULTIPATH_HASH_FIELD_SRC_PORT)
+ hash_keys.ports.src = fl4->fl4_sport;
+ if (hash_fields & FIB_MULTIPATH_HASH_FIELD_DST_PORT)
+ hash_keys.ports.dst = fl4->fl4_dport;
+
+ return flow_hash_from_keys(&hash_keys);
+}
+
/* if skb is set it will be used and fl4 can be NULL */
int fib_multipath_hash(const struct net *net, const struct flowi4 *fl4,
const struct sk_buff *skb, struct flow_keys *flkeys)
{
u32 multipath_hash = fl4 ? fl4->flowi4_multipath_hash : 0;
struct flow_keys hash_keys;
- u32 mhash;
+ u32 mhash = 0;
switch (net->ipv4.sysctl_fib_multipath_hash_policy) {
case 0:
@@ -1924,6 +2036,7 @@ int fib_multipath_hash(const struct net *net, const struct flowi4 *fl4,
hash_keys.addrs.v4addrs.src = fl4->saddr;
hash_keys.addrs.v4addrs.dst = fl4->daddr;
}
+ mhash = flow_hash_from_keys(&hash_keys);
break;
case 1:
/* skb is currently provided only when forwarding */
@@ -1957,6 +2070,7 @@ int fib_multipath_hash(const struct net *net, const struct flowi4 *fl4,
hash_keys.ports.dst = fl4->fl4_dport;
hash_keys.basic.ip_proto = fl4->flowi4_proto;
}
+ mhash = flow_hash_from_keys(&hash_keys);
break;
case 2:
memset(&hash_keys, 0, sizeof(hash_keys));
@@ -1987,9 +2101,15 @@ int fib_multipath_hash(const struct net *net, const struct flowi4 *fl4,
hash_keys.addrs.v4addrs.src = fl4->saddr;
hash_keys.addrs.v4addrs.dst = fl4->daddr;
}
+ mhash = flow_hash_from_keys(&hash_keys);
+ break;
+ case 3:
+ if (skb)
+ mhash = fib_multipath_custom_hash_skb(net, skb);
+ else
+ mhash = fib_multipath_custom_hash_fl4(net, fl4);
break;
}
- mhash = flow_hash_from_keys(&hash_keys);
if (multipath_hash)
mhash = jhash_2words(mhash, multipath_hash, 0);
@@ -2707,8 +2827,7 @@ struct dst_entry *ipv4_blackhole_route(struct net *net, struct dst_entry *dst_or
new->output = dst_discard_out;
new->dev = net->loopback_dev;
- if (new->dev)
- dev_hold(new->dev);
+ dev_hold(new->dev);
rt->rt_is_input = ort->rt_is_input;
rt->rt_iif = ort->rt_iif;
@@ -3046,7 +3165,7 @@ static struct sk_buff *inet_rtm_getroute_build_skb(__be32 src, __be32 dst,
udph = skb_put_zero(skb, sizeof(struct udphdr));
udph->source = sport;
udph->dest = dport;
- udph->len = sizeof(struct udphdr);
+ udph->len = htons(sizeof(struct udphdr));
udph->check = 0;
break;
}
diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c
index a62934b9f15a..6f1e64d49232 100644
--- a/net/ipv4/sysctl_net_ipv4.c
+++ b/net/ipv4/sysctl_net_ipv4.c
@@ -19,6 +19,7 @@
#include <net/snmp.h>
#include <net/icmp.h>
#include <net/ip.h>
+#include <net/ip_fib.h>
#include <net/route.h>
#include <net/tcp.h>
#include <net/udp.h>
@@ -29,6 +30,7 @@
#include <net/netevent.h>
static int two = 2;
+static int three __maybe_unused = 3;
static int four = 4;
static int thousand = 1000;
static int tcp_retr1_max = 255;
@@ -48,6 +50,8 @@ static int ip_ping_group_range_min[] = { 0, 0 };
static int ip_ping_group_range_max[] = { GID_T_MAX, GID_T_MAX };
static u32 u32_max_div_HZ = UINT_MAX / HZ;
static int one_day_secs = 24 * 3600;
+static u32 fib_multipath_hash_fields_all_mask __maybe_unused =
+ FIB_MULTIPATH_HASH_FIELD_ALL_MASK;
/* obsolete */
static int sysctl_tcp_low_latency __read_mostly;
@@ -461,6 +465,22 @@ static int proc_fib_multipath_hash_policy(struct ctl_table *table, int write,
return ret;
}
+
+static int proc_fib_multipath_hash_fields(struct ctl_table *table, int write,
+ void *buffer, size_t *lenp,
+ loff_t *ppos)
+{
+ struct net *net;
+ int ret;
+
+ net = container_of(table->data, struct net,
+ ipv4.sysctl_fib_multipath_hash_fields);
+ ret = proc_douintvec_minmax(table, write, buffer, lenp, ppos);
+ if (write && ret == 0)
+ call_netevent_notifiers(NETEVENT_IPV4_MPATH_HASH_UPDATE, net);
+
+ return ret;
+}
#endif
static struct ctl_table ipv4_table[] = {
@@ -941,6 +961,15 @@ static struct ctl_table ipv4_net_table[] = {
},
#endif
{
+ .procname = "tcp_migrate_req",
+ .data = &init_net.ipv4.sysctl_tcp_migrate_req,
+ .maxlen = sizeof(u8),
+ .mode = 0644,
+ .proc_handler = proc_dou8vec_minmax,
+ .extra1 = SYSCTL_ZERO,
+ .extra2 = SYSCTL_ONE
+ },
+ {
.procname = "tcp_reordering",
.data = &init_net.ipv4.sysctl_tcp_reordering,
.maxlen = sizeof(int),
@@ -1050,7 +1079,16 @@ static struct ctl_table ipv4_net_table[] = {
.mode = 0644,
.proc_handler = proc_fib_multipath_hash_policy,
.extra1 = SYSCTL_ZERO,
- .extra2 = &two,
+ .extra2 = &three,
+ },
+ {
+ .procname = "fib_multipath_hash_fields",
+ .data = &init_net.ipv4.sysctl_fib_multipath_hash_fields,
+ .maxlen = sizeof(u32),
+ .mode = 0644,
+ .proc_handler = proc_fib_multipath_hash_fields,
+ .extra1 = SYSCTL_ONE,
+ .extra2 = &fib_multipath_hash_fields_all_mask,
},
#endif
{
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index f1c1f9e3de72..e8b48df73c85 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -1375,6 +1375,9 @@ new_segment:
}
pfrag->offset += copy;
} else {
+ if (!sk_wmem_schedule(sk, copy))
+ goto wait_for_space;
+
err = skb_zerocopy_iter_stream(sk, skb, msg, copy, uarg);
if (err == -EMSGSIZE || err == -EEXIST) {
tcp_mark_push(tp, skb);
@@ -1738,8 +1741,8 @@ int tcp_set_rcvlowat(struct sock *sk, int val)
}
EXPORT_SYMBOL(tcp_set_rcvlowat);
-static void tcp_update_recv_tstamps(struct sk_buff *skb,
- struct scm_timestamping_internal *tss)
+void tcp_update_recv_tstamps(struct sk_buff *skb,
+ struct scm_timestamping_internal *tss)
{
if (skb->tstamp)
tss->ts[0] = ktime_to_timespec64(skb->tstamp);
@@ -2024,8 +2027,6 @@ static int tcp_zerocopy_vm_insert_batch(struct vm_area_struct *vma,
}
#define TCP_VALID_ZC_MSG_FLAGS (TCP_CMSG_TS)
-static void tcp_recv_timestamp(struct msghdr *msg, const struct sock *sk,
- struct scm_timestamping_internal *tss);
static void tcp_zc_finalize_rx_tstamp(struct sock *sk,
struct tcp_zerocopy_receive *zc,
struct scm_timestamping_internal *tss)
@@ -2095,8 +2096,8 @@ static int tcp_zerocopy_receive(struct sock *sk,
mmap_read_lock(current->mm);
- vma = find_vma(current->mm, address);
- if (!vma || vma->vm_start > address || vma->vm_ops != &tcp_vm_ops) {
+ vma = vma_lookup(current->mm, address);
+ if (!vma || vma->vm_ops != &tcp_vm_ops) {
mmap_read_unlock(current->mm);
return -EINVAL;
}
@@ -2197,8 +2198,8 @@ out:
#endif
/* Similar to __sock_recv_timestamp, but does not require an skb */
-static void tcp_recv_timestamp(struct msghdr *msg, const struct sock *sk,
- struct scm_timestamping_internal *tss)
+void tcp_recv_timestamp(struct msghdr *msg, const struct sock *sk,
+ struct scm_timestamping_internal *tss)
{
int new_tstamp = sock_flag(sk, SOCK_TSTAMP_NEW);
bool has_timestamping = false;
@@ -3061,7 +3062,7 @@ int tcp_disconnect(struct sock *sk, int flags)
sk->sk_frag.offset = 0;
}
- sk->sk_error_report(sk);
+ sk_error_report(sk);
return 0;
}
EXPORT_SYMBOL(tcp_disconnect);
@@ -3337,6 +3338,7 @@ int tcp_set_window_clamp(struct sock *sk, int val)
} else {
tp->window_clamp = val < SOCK_MIN_RCVBUF / 2 ?
SOCK_MIN_RCVBUF / 2 : val;
+ tp->rcv_ssthresh = min(tp->rcv_wnd, tp->window_clamp);
}
return 0;
}
@@ -4450,7 +4452,7 @@ int tcp_abort(struct sock *sk, int err)
sk->sk_err = err;
/* This barrier is coupled with smp_rmb() in tcp_poll() */
smp_wmb();
- sk->sk_error_report(sk);
+ sk_error_report(sk);
if (tcp_need_reset(sk->sk_state))
tcp_send_active_reset(sk, GFP_ATOMIC);
tcp_done(sk);
@@ -4511,7 +4513,9 @@ void __init tcp_init(void)
tcp_hashinfo.bind_bucket_cachep =
kmem_cache_create("tcp_bind_bucket",
sizeof(struct inet_bind_bucket), 0,
- SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL);
+ SLAB_HWCACHE_ALIGN | SLAB_PANIC |
+ SLAB_ACCOUNT,
+ NULL);
/* Size and allocate the main established and bind bucket
* hash tables.
diff --git a/net/ipv4/tcp_bbr.c b/net/ipv4/tcp_bbr.c
index 6ea3dc2e4219..6274462b86b4 100644
--- a/net/ipv4/tcp_bbr.c
+++ b/net/ipv4/tcp_bbr.c
@@ -1041,7 +1041,7 @@ static void bbr_init(struct sock *sk)
bbr->prior_cwnd = 0;
tp->snd_ssthresh = TCP_INFINITE_SSTHRESH;
bbr->rtt_cnt = 0;
- bbr->next_rtt_delivered = 0;
+ bbr->next_rtt_delivered = tp->delivered;
bbr->prev_ca_state = TCP_CA_Open;
bbr->packet_conservation = 0;
diff --git a/net/ipv4/tcp_bpf.c b/net/ipv4/tcp_bpf.c
index ad9d17923fc5..d3e9386b493e 100644
--- a/net/ipv4/tcp_bpf.c
+++ b/net/ipv4/tcp_bpf.c
@@ -163,6 +163,28 @@ static bool tcp_bpf_stream_read(const struct sock *sk)
return !empty;
}
+static int tcp_msg_wait_data(struct sock *sk, struct sk_psock *psock,
+ long timeo)
+{
+ DEFINE_WAIT_FUNC(wait, woken_wake_function);
+ int ret = 0;
+
+ if (sk->sk_shutdown & RCV_SHUTDOWN)
+ return 1;
+
+ if (!timeo)
+ return ret;
+
+ add_wait_queue(sk_sleep(sk), &wait);
+ sk_set_bit(SOCKWQ_ASYNC_WAITDATA, sk);
+ ret = sk_wait_event(sk, &timeo,
+ !list_empty(&psock->ingress_msg) ||
+ !skb_queue_empty(&sk->sk_receive_queue), &wait);
+ sk_clear_bit(SOCKWQ_ASYNC_WAITDATA, sk);
+ remove_wait_queue(sk_sleep(sk), &wait);
+ return ret;
+}
+
static int tcp_bpf_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
int nonblock, int flags, int *addr_len)
{
@@ -184,11 +206,11 @@ static int tcp_bpf_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
msg_bytes_ready:
copied = sk_msg_recvmsg(sk, psock, msg, len, flags);
if (!copied) {
- int data, err = 0;
long timeo;
+ int data;
timeo = sock_rcvtimeo(sk, nonblock);
- data = sk_msg_wait_data(sk, psock, flags, timeo, &err);
+ data = tcp_msg_wait_data(sk, psock, timeo);
if (data) {
if (!sk_psock_queue_empty(psock))
goto msg_bytes_ready;
@@ -196,14 +218,9 @@ msg_bytes_ready:
sk_psock_put(sk, psock);
return tcp_recvmsg(sk, msg, len, nonblock, flags, addr_len);
}
- if (err) {
- ret = err;
- goto out;
- }
copied = -EAGAIN;
}
ret = copied;
-out:
release_sock(sk);
sk_psock_put(sk, psock);
return ret;
@@ -486,7 +503,7 @@ static int __init tcp_bpf_v4_build_proto(void)
tcp_bpf_rebuild_protos(tcp_bpf_prots[TCP_BPF_IPV4], &tcp_prot);
return 0;
}
-core_initcall(tcp_bpf_v4_build_proto);
+late_initcall(tcp_bpf_v4_build_proto);
static int tcp_bpf_assert_proto_ops(struct proto *ops)
{
diff --git a/net/ipv4/tcp_fastopen.c b/net/ipv4/tcp_fastopen.c
index af2814c9342a..59412d6354a0 100644
--- a/net/ipv4/tcp_fastopen.c
+++ b/net/ipv4/tcp_fastopen.c
@@ -55,12 +55,7 @@ void tcp_fastopen_ctx_destroy(struct net *net)
{
struct tcp_fastopen_context *ctxt;
- spin_lock(&net->ipv4.tcp_fastopen_ctx_lock);
-
- ctxt = rcu_dereference_protected(net->ipv4.tcp_fastopen_ctx,
- lockdep_is_held(&net->ipv4.tcp_fastopen_ctx_lock));
- rcu_assign_pointer(net->ipv4.tcp_fastopen_ctx, NULL);
- spin_unlock(&net->ipv4.tcp_fastopen_ctx_lock);
+ ctxt = xchg((__force struct tcp_fastopen_context **)&net->ipv4.tcp_fastopen_ctx, NULL);
if (ctxt)
call_rcu(&ctxt->rcu, tcp_fastopen_ctx_free);
@@ -89,18 +84,12 @@ int tcp_fastopen_reset_cipher(struct net *net, struct sock *sk,
ctx->num = 1;
}
- spin_lock(&net->ipv4.tcp_fastopen_ctx_lock);
if (sk) {
q = &inet_csk(sk)->icsk_accept_queue.fastopenq;
- octx = rcu_dereference_protected(q->ctx,
- lockdep_is_held(&net->ipv4.tcp_fastopen_ctx_lock));
- rcu_assign_pointer(q->ctx, ctx);
+ octx = xchg((__force struct tcp_fastopen_context **)&q->ctx, ctx);
} else {
- octx = rcu_dereference_protected(net->ipv4.tcp_fastopen_ctx,
- lockdep_is_held(&net->ipv4.tcp_fastopen_ctx_lock));
- rcu_assign_pointer(net->ipv4.tcp_fastopen_ctx, ctx);
+ octx = xchg((__force struct tcp_fastopen_context **)&net->ipv4.tcp_fastopen_ctx, ctx);
}
- spin_unlock(&net->ipv4.tcp_fastopen_ctx_lock);
if (octx)
call_rcu(&octx->rcu, tcp_fastopen_ctx_free);
@@ -379,8 +368,7 @@ struct sock *tcp_try_fastopen(struct sock *sk, struct sk_buff *skb,
return NULL;
}
- if (syn_data &&
- tcp_fastopen_no_cookie(sk, dst, TFO_SERVER_COOKIE_NOT_REQD))
+ if (tcp_fastopen_no_cookie(sk, dst, TFO_SERVER_COOKIE_NOT_REQD))
goto fastopen;
if (foc->len == 0) {
@@ -507,8 +495,18 @@ void tcp_fastopen_active_disable(struct sock *sk)
{
struct net *net = sock_net(sk);
+ if (!sock_net(sk)->ipv4.sysctl_tcp_fastopen_blackhole_timeout)
+ return;
+
+ /* Paired with READ_ONCE() in tcp_fastopen_active_should_disable() */
+ WRITE_ONCE(net->ipv4.tfo_active_disable_stamp, jiffies);
+
+ /* Paired with smp_rmb() in tcp_fastopen_active_should_disable().
+ * We want net->ipv4.tfo_active_disable_stamp to be updated first.
+ */
+ smp_mb__before_atomic();
atomic_inc(&net->ipv4.tfo_active_disable_times);
- net->ipv4.tfo_active_disable_stamp = jiffies;
+
NET_INC_STATS(net, LINUX_MIB_TCPFASTOPENBLACKHOLE);
}
@@ -519,17 +517,27 @@ void tcp_fastopen_active_disable(struct sock *sk)
bool tcp_fastopen_active_should_disable(struct sock *sk)
{
unsigned int tfo_bh_timeout = sock_net(sk)->ipv4.sysctl_tcp_fastopen_blackhole_timeout;
- int tfo_da_times = atomic_read(&sock_net(sk)->ipv4.tfo_active_disable_times);
unsigned long timeout;
+ int tfo_da_times;
int multiplier;
+ if (!tfo_bh_timeout)
+ return false;
+
+ tfo_da_times = atomic_read(&sock_net(sk)->ipv4.tfo_active_disable_times);
if (!tfo_da_times)
return false;
- /* Limit timout to max: 2^6 * initial timeout */
+ /* Paired with smp_mb__before_atomic() in tcp_fastopen_active_disable() */
+ smp_rmb();
+
+ /* Limit timeout to max: 2^6 * initial timeout */
multiplier = 1 << min(tfo_da_times - 1, 6);
- timeout = multiplier * tfo_bh_timeout * HZ;
- if (time_before(jiffies, sock_net(sk)->ipv4.tfo_active_disable_stamp + timeout))
+
+ /* Paired with the WRITE_ONCE() in tcp_fastopen_active_disable(). */
+ timeout = READ_ONCE(sock_net(sk)->ipv4.tfo_active_disable_stamp) +
+ multiplier * tfo_bh_timeout * HZ;
+ if (time_before(jiffies, timeout))
return true;
/* Mark check bit so we can check for successful active TFO
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 4cf4dd532d1c..3f7bd7ae7d7a 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -100,6 +100,7 @@ int sysctl_tcp_max_orphans __read_mostly = NR_FILE;
#define FLAG_UPDATE_TS_RECENT 0x4000 /* tcp_replace_ts_recent() */
#define FLAG_NO_CHALLENGE_ACK 0x8000 /* do not call tcp_send_challenge_ack() */
#define FLAG_ACK_MAYBE_DELAYED 0x10000 /* Likely a delayed ACK */
+#define FLAG_DSACK_TLP 0x20000 /* DSACK for tail loss probe */
#define FLAG_ACKED (FLAG_DATA_ACKED|FLAG_SYN_ACKED)
#define FLAG_NOT_DUP (FLAG_DATA|FLAG_WIN_UPDATE|FLAG_ACKED)
@@ -454,11 +455,12 @@ static void tcp_sndbuf_expand(struct sock *sk)
*/
/* Slow part of check#2. */
-static int __tcp_grow_window(const struct sock *sk, const struct sk_buff *skb)
+static int __tcp_grow_window(const struct sock *sk, const struct sk_buff *skb,
+ unsigned int skbtruesize)
{
struct tcp_sock *tp = tcp_sk(sk);
/* Optimize this! */
- int truesize = tcp_win_from_space(sk, skb->truesize) >> 1;
+ int truesize = tcp_win_from_space(sk, skbtruesize) >> 1;
int window = tcp_win_from_space(sk, sock_net(sk)->ipv4.sysctl_tcp_rmem[2]) >> 1;
while (tp->rcv_ssthresh <= window) {
@@ -471,7 +473,27 @@ static int __tcp_grow_window(const struct sock *sk, const struct sk_buff *skb)
return 0;
}
-static void tcp_grow_window(struct sock *sk, const struct sk_buff *skb)
+/* Even if skb appears to have a bad len/truesize ratio, TCP coalescing
+ * can play nice with us, as sk_buff and skb->head might be either
+ * freed or shared with up to MAX_SKB_FRAGS segments.
+ * Only give a boost to drivers using page frag(s) to hold the frame(s),
+ * and if no payload was pulled in skb->head before reaching us.
+ */
+static u32 truesize_adjust(bool adjust, const struct sk_buff *skb)
+{
+ u32 truesize = skb->truesize;
+
+ if (adjust && !skb_headlen(skb)) {
+ truesize -= SKB_TRUESIZE(skb_end_offset(skb));
+ /* paranoid check, some drivers might be buggy */
+ if (unlikely((int)truesize < (int)skb->len))
+ truesize = skb->truesize;
+ }
+ return truesize;
+}
+
+static void tcp_grow_window(struct sock *sk, const struct sk_buff *skb,
+ bool adjust)
{
struct tcp_sock *tp = tcp_sk(sk);
int room;
@@ -480,15 +502,16 @@ static void tcp_grow_window(struct sock *sk, const struct sk_buff *skb)
/* Check #1 */
if (room > 0 && !tcp_under_memory_pressure(sk)) {
+ unsigned int truesize = truesize_adjust(adjust, skb);
int incr;
/* Check #2. Increase window, if skb with such overhead
* will fit to rcvbuf in future.
*/
- if (tcp_win_from_space(sk, skb->truesize) <= skb->len)
+ if (tcp_win_from_space(sk, truesize) <= skb->len)
incr = 2 * tp->advmss;
else
- incr = __tcp_grow_window(sk, skb);
+ incr = __tcp_grow_window(sk, skb, truesize);
if (incr) {
incr = max_t(int, incr, 2 * skb->len);
@@ -782,7 +805,7 @@ static void tcp_event_data_recv(struct sock *sk, struct sk_buff *skb)
tcp_ecn_check_ce(sk, skb);
if (skb->len >= 128)
- tcp_grow_window(sk, skb);
+ tcp_grow_window(sk, skb, true);
}
/* Called to compute a smoothed rtt estimate. The data fed to this
@@ -969,6 +992,8 @@ static u32 tcp_dsack_seen(struct tcp_sock *tp, u32 start_seq,
return 0;
if (seq_len > tp->mss_cache)
dup_segs = DIV_ROUND_UP(seq_len, tp->mss_cache);
+ else if (tp->tlp_high_seq && tp->tlp_high_seq == end_seq)
+ state->flag |= FLAG_DSACK_TLP;
tp->dsack_dups += dup_segs;
/* Skip the DSACK if dup segs weren't retransmitted by sender */
@@ -976,7 +1001,14 @@ static u32 tcp_dsack_seen(struct tcp_sock *tp, u32 start_seq,
return 0;
tp->rx_opt.sack_ok |= TCP_DSACK_SEEN;
- tp->rack.dsack_seen = 1;
+ /* We increase the RACK ordering window in rounds where we receive
+ * DSACKs that may have been due to reordering causing RACK to trigger
+ * a spurious fast recovery. Thus RACK ignores DSACKs that happen
+ * without having seen reordering, or that match TLP probes (TLP
+ * is timer-driven, not triggered by RACK).
+ */
+ if (tp->reord_seen && !(state->flag & FLAG_DSACK_TLP))
+ tp->rack.dsack_seen = 1;
state->flag |= FLAG_DSACKING_ACK;
/* A spurious retransmission is delivered */
@@ -2816,8 +2848,17 @@ static void tcp_process_loss(struct sock *sk, int flag, int num_dupack,
*rexmit = REXMIT_LOST;
}
+static bool tcp_force_fast_retransmit(struct sock *sk)
+{
+ struct tcp_sock *tp = tcp_sk(sk);
+
+ return after(tcp_highest_sack_seq(tp),
+ tp->snd_una + tp->reordering * tp->mss_cache);
+}
+
/* Undo during fast recovery after partial ACK. */
-static bool tcp_try_undo_partial(struct sock *sk, u32 prior_snd_una)
+static bool tcp_try_undo_partial(struct sock *sk, u32 prior_snd_una,
+ bool *do_lost)
{
struct tcp_sock *tp = tcp_sk(sk);
@@ -2842,7 +2883,9 @@ static bool tcp_try_undo_partial(struct sock *sk, u32 prior_snd_una)
tcp_undo_cwnd_reduction(sk, true);
NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPPARTIALUNDO);
tcp_try_keep_open(sk);
- return true;
+ } else {
+ /* Partial ACK arrived. Force fast retransmit. */
+ *do_lost = tcp_force_fast_retransmit(sk);
}
return false;
}
@@ -2866,14 +2909,6 @@ static void tcp_identify_packet_loss(struct sock *sk, int *ack_flag)
}
}
-static bool tcp_force_fast_retransmit(struct sock *sk)
-{
- struct tcp_sock *tp = tcp_sk(sk);
-
- return after(tcp_highest_sack_seq(tp),
- tp->snd_una + tp->reordering * tp->mss_cache);
-}
-
/* Process an event, which can update packets-in-flight not trivially.
* Main goal of this function is to calculate new estimate for left_out,
* taking into account both packets sitting in receiver's buffer and
@@ -2943,17 +2978,21 @@ static void tcp_fastretrans_alert(struct sock *sk, const u32 prior_snd_una,
if (!(flag & FLAG_SND_UNA_ADVANCED)) {
if (tcp_is_reno(tp))
tcp_add_reno_sack(sk, num_dupack, ece_ack);
- } else {
- if (tcp_try_undo_partial(sk, prior_snd_una))
- return;
- /* Partial ACK arrived. Force fast retransmit. */
- do_lost = tcp_force_fast_retransmit(sk);
- }
- if (tcp_try_undo_dsack(sk)) {
- tcp_try_keep_open(sk);
+ } else if (tcp_try_undo_partial(sk, prior_snd_una, &do_lost))
return;
- }
+
+ if (tcp_try_undo_dsack(sk))
+ tcp_try_keep_open(sk);
+
tcp_identify_packet_loss(sk, ack_flag);
+ if (icsk->icsk_ca_state != TCP_CA_Recovery) {
+ if (!tcp_time_to_recover(sk, flag))
+ return;
+ /* Undo reverts the recovery state. If loss is evident,
+ * starts a new recovery (e.g. reordering then loss);
+ */
+ tcp_enter_recovery(sk, ece_ack);
+ }
break;
case TCP_CA_Loss:
tcp_process_loss(sk, flag, num_dupack, rexmit);
@@ -3621,7 +3660,7 @@ static void tcp_process_tlp_ack(struct sock *sk, u32 ack, int flag)
if (!tp->tlp_retrans) {
/* TLP of new data has been acknowledged */
tp->tlp_high_seq = 0;
- } else if (flag & FLAG_DSACKING_ACK) {
+ } else if (flag & FLAG_DSACK_TLP) {
/* This DSACK means original and TLP probe arrived; no loss */
tp->tlp_high_seq = 0;
} else if (after(ack, tp->tlp_high_seq)) {
@@ -4240,6 +4279,9 @@ void tcp_reset(struct sock *sk, struct sk_buff *skb)
{
trace_tcp_receive_reset(sk);
+ /* mptcp can't tell us to ignore reset pkts,
+ * so just ignore the return value of mptcp_incoming_options().
+ */
if (sk_is_mptcp(sk))
mptcp_incoming_options(sk, skb);
@@ -4263,7 +4305,7 @@ void tcp_reset(struct sock *sk, struct sk_buff *skb)
tcp_done(sk);
if (!sock_flag(sk, SOCK_DEAD))
- sk->sk_error_report(sk);
+ sk_error_report(sk);
}
/*
@@ -4759,7 +4801,7 @@ coalesce_done:
* and trigger fast retransmit.
*/
if (tcp_is_sack(tp))
- tcp_grow_window(sk, skb);
+ tcp_grow_window(sk, skb, true);
kfree_skb_partial(skb, fragstolen);
skb = NULL;
goto add_sack;
@@ -4847,7 +4889,7 @@ end:
* and trigger fast retransmit.
*/
if (tcp_is_sack(tp))
- tcp_grow_window(sk, skb);
+ tcp_grow_window(sk, skb, false);
skb_condense(skb);
skb_set_owner_r(skb, sk);
}
@@ -4934,8 +4976,13 @@ static void tcp_data_queue(struct sock *sk, struct sk_buff *skb)
bool fragstolen;
int eaten;
- if (sk_is_mptcp(sk))
- mptcp_incoming_options(sk, skb);
+ /* If a subflow has been reset, the packet should not continue
+ * to be processed, drop the packet.
+ */
+ if (sk_is_mptcp(sk) && !mptcp_incoming_options(sk, skb)) {
+ __kfree_skb(skb);
+ return;
+ }
if (TCP_SKB_CB(skb)->seq == TCP_SKB_CB(skb)->end_seq) {
__kfree_skb(skb);
@@ -5368,7 +5415,7 @@ static void tcp_new_space(struct sock *sk)
tp->snd_cwnd_stamp = tcp_jiffies32;
}
- sk->sk_write_space(sk);
+ INDIRECT_CALL_1(sk->sk_write_space, sk_stream_write_space, sk);
}
static void tcp_check_space(struct sock *sk)
@@ -5885,6 +5932,7 @@ step5:
return;
csum_error:
+ trace_tcp_bad_csum(skb);
TCP_INC_STATS(sock_net(sk), TCP_MIB_CSUMERRORS);
TCP_INC_STATS(sock_net(sk), TCP_MIB_INERRS);
@@ -5914,8 +5962,8 @@ void tcp_init_transfer(struct sock *sk, int bpf_op, struct sk_buff *skb)
tp->snd_cwnd = tcp_init_cwnd(tp, __sk_dst_get(sk));
tp->snd_cwnd_stamp = tcp_jiffies32;
- icsk->icsk_ca_initialized = 0;
bpf_skops_established(sk, bpf_op, skb);
+ /* Initialize congestion control unless BPF initialized it already: */
if (!icsk->icsk_ca_initialized)
tcp_init_congestion_control(sk);
tcp_init_buffer_space(sk);
@@ -6515,8 +6563,11 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb)
case TCP_CLOSING:
case TCP_LAST_ACK:
if (!before(TCP_SKB_CB(skb)->seq, tp->rcv_nxt)) {
- if (sk_is_mptcp(sk))
- mptcp_incoming_options(sk, skb);
+ /* If a subflow has been reset, the packet should not
+ * continue to be processed, drop the packet.
+ */
+ if (sk_is_mptcp(sk) && !mptcp_incoming_options(sk, skb))
+ goto discard;
break;
}
fallthrough;
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 312184cead57..2e62e0d6373a 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -342,7 +342,7 @@ void tcp_v4_mtu_reduced(struct sock *sk)
if ((1 << sk->sk_state) & (TCPF_LISTEN | TCPF_CLOSE))
return;
- mtu = tcp_sk(sk)->mtu_info;
+ mtu = READ_ONCE(tcp_sk(sk)->mtu_info);
dst = inet_csk_update_pmtu(sk, mtu);
if (!dst)
return;
@@ -546,7 +546,7 @@ int tcp_v4_err(struct sk_buff *skb, u32 info)
if (sk->sk_state == TCP_LISTEN)
goto out;
- tp->mtu_info = info;
+ WRITE_ONCE(tp->mtu_info, info);
if (!sock_owned_by_user(sk)) {
tcp_v4_mtu_reduced(sk);
} else {
@@ -585,7 +585,7 @@ int tcp_v4_err(struct sk_buff *skb, u32 info)
if (!sock_owned_by_user(sk)) {
sk->sk_err = err;
- sk->sk_error_report(sk);
+ sk_error_report(sk);
tcp_done(sk);
} else {
@@ -613,7 +613,7 @@ int tcp_v4_err(struct sk_buff *skb, u32 info)
inet = inet_sk(sk);
if (!sock_owned_by_user(sk) && inet->recverr) {
sk->sk_err = err;
- sk->sk_error_report(sk);
+ sk_error_report(sk);
} else { /* Only an error on timeout */
sk->sk_err_soft = err;
}
@@ -1731,6 +1731,7 @@ discard:
return 0;
csum_err:
+ trace_tcp_bad_csum(skb);
TCP_INC_STATS(sock_net(sk), TCP_MIB_CSUMERRORS);
TCP_INC_STATS(sock_net(sk), TCP_MIB_INERRS);
goto discard;
@@ -1801,6 +1802,7 @@ bool tcp_add_backlog(struct sock *sk, struct sk_buff *skb)
if (unlikely(tcp_checksum_complete(skb))) {
bh_unlock_sock(sk);
+ trace_tcp_bad_csum(skb);
__TCP_INC_STATS(sock_net(sk), TCP_MIB_CSUMERRORS);
__TCP_INC_STATS(sock_net(sk), TCP_MIB_INERRS);
return true;
@@ -2000,13 +2002,21 @@ process:
goto csum_error;
}
if (unlikely(sk->sk_state != TCP_LISTEN)) {
- inet_csk_reqsk_queue_drop_and_put(sk, req);
- goto lookup;
+ nsk = reuseport_migrate_sock(sk, req_to_sk(req), skb);
+ if (!nsk) {
+ inet_csk_reqsk_queue_drop_and_put(sk, req);
+ goto lookup;
+ }
+ sk = nsk;
+ /* reuseport_migrate_sock() has already held one sk_refcnt
+ * before returning.
+ */
+ } else {
+ /* We own a reference on the listener, increase it again
+ * as we might lose it too soon.
+ */
+ sock_hold(sk);
}
- /* We own a reference on the listener, increase it again
- * as we might lose it too soon.
- */
- sock_hold(sk);
refcounted = true;
nsk = NULL;
if (!tcp_filter(sk, skb)) {
@@ -2098,6 +2108,7 @@ no_tcp_socket:
if (tcp_checksum_complete(skb)) {
csum_error:
+ trace_tcp_bad_csum(skb);
__TCP_INC_STATS(net, TCP_MIB_CSUMERRORS);
bad_packet:
__TCP_INC_STATS(net, TCP_MIB_INERRS);
@@ -2266,51 +2277,72 @@ EXPORT_SYMBOL(tcp_v4_destroy_sock);
#ifdef CONFIG_PROC_FS
/* Proc filesystem TCP sock list dumping. */
-/*
- * Get next listener socket follow cur. If cur is NULL, get first socket
- * starting from bucket given in st->bucket; when st->bucket is zero the
- * very first socket in the hash table is returned.
+static unsigned short seq_file_family(const struct seq_file *seq);
+
+static bool seq_sk_match(struct seq_file *seq, const struct sock *sk)
+{
+ unsigned short family = seq_file_family(seq);
+
+ /* AF_UNSPEC is used as a match all */
+ return ((family == AF_UNSPEC || family == sk->sk_family) &&
+ net_eq(sock_net(sk), seq_file_net(seq)));
+}
+
+/* Find a non empty bucket (starting from st->bucket)
+ * and return the first sk from it.
*/
-static void *listening_get_next(struct seq_file *seq, void *cur)
+static void *listening_get_first(struct seq_file *seq)
{
- struct tcp_seq_afinfo *afinfo;
struct tcp_iter_state *st = seq->private;
- struct net *net = seq_file_net(seq);
- struct inet_listen_hashbucket *ilb;
- struct hlist_nulls_node *node;
- struct sock *sk = cur;
- if (st->bpf_seq_afinfo)
- afinfo = st->bpf_seq_afinfo;
- else
- afinfo = PDE_DATA(file_inode(seq->file));
+ st->offset = 0;
+ for (; st->bucket <= tcp_hashinfo.lhash2_mask; st->bucket++) {
+ struct inet_listen_hashbucket *ilb2;
+ struct inet_connection_sock *icsk;
+ struct sock *sk;
- if (!sk) {
-get_head:
- ilb = &tcp_hashinfo.listening_hash[st->bucket];
- spin_lock(&ilb->lock);
- sk = sk_nulls_head(&ilb->nulls_head);
- st->offset = 0;
- goto get_sk;
+ ilb2 = &tcp_hashinfo.lhash2[st->bucket];
+ if (hlist_empty(&ilb2->head))
+ continue;
+
+ spin_lock(&ilb2->lock);
+ inet_lhash2_for_each_icsk(icsk, &ilb2->head) {
+ sk = (struct sock *)icsk;
+ if (seq_sk_match(seq, sk))
+ return sk;
+ }
+ spin_unlock(&ilb2->lock);
}
- ilb = &tcp_hashinfo.listening_hash[st->bucket];
+
+ return NULL;
+}
+
+/* Find the next sk of "cur" within the same bucket (i.e. st->bucket).
+ * If "cur" is the last one in the st->bucket,
+ * call listening_get_first() to return the first sk of the next
+ * non empty bucket.
+ */
+static void *listening_get_next(struct seq_file *seq, void *cur)
+{
+ struct tcp_iter_state *st = seq->private;
+ struct inet_listen_hashbucket *ilb2;
+ struct inet_connection_sock *icsk;
+ struct sock *sk = cur;
+
++st->num;
++st->offset;
- sk = sk_nulls_next(sk);
-get_sk:
- sk_nulls_for_each_from(sk, node) {
- if (!net_eq(sock_net(sk), net))
- continue;
- if (afinfo->family == AF_UNSPEC ||
- sk->sk_family == afinfo->family)
+ icsk = inet_csk(sk);
+ inet_lhash2_for_each_icsk_continue(icsk) {
+ sk = (struct sock *)icsk;
+ if (seq_sk_match(seq, sk))
return sk;
}
- spin_unlock(&ilb->lock);
- st->offset = 0;
- if (++st->bucket < INET_LHTABLE_SIZE)
- goto get_head;
- return NULL;
+
+ ilb2 = &tcp_hashinfo.lhash2[st->bucket];
+ spin_unlock(&ilb2->lock);
+ ++st->bucket;
+ return listening_get_first(seq);
}
static void *listening_get_idx(struct seq_file *seq, loff_t *pos)
@@ -2320,7 +2352,7 @@ static void *listening_get_idx(struct seq_file *seq, loff_t *pos)
st->bucket = 0;
st->offset = 0;
- rc = listening_get_next(seq, NULL);
+ rc = listening_get_first(seq);
while (rc && *pos) {
rc = listening_get_next(seq, rc);
@@ -2340,15 +2372,7 @@ static inline bool empty_bucket(const struct tcp_iter_state *st)
*/
static void *established_get_first(struct seq_file *seq)
{
- struct tcp_seq_afinfo *afinfo;
struct tcp_iter_state *st = seq->private;
- struct net *net = seq_file_net(seq);
- void *rc = NULL;
-
- if (st->bpf_seq_afinfo)
- afinfo = st->bpf_seq_afinfo;
- else
- afinfo = PDE_DATA(file_inode(seq->file));
st->offset = 0;
for (; st->bucket <= tcp_hashinfo.ehash_mask; ++st->bucket) {
@@ -2362,32 +2386,20 @@ static void *established_get_first(struct seq_file *seq)
spin_lock_bh(lock);
sk_nulls_for_each(sk, node, &tcp_hashinfo.ehash[st->bucket].chain) {
- if ((afinfo->family != AF_UNSPEC &&
- sk->sk_family != afinfo->family) ||
- !net_eq(sock_net(sk), net)) {
- continue;
- }
- rc = sk;
- goto out;
+ if (seq_sk_match(seq, sk))
+ return sk;
}
spin_unlock_bh(lock);
}
-out:
- return rc;
+
+ return NULL;
}
static void *established_get_next(struct seq_file *seq, void *cur)
{
- struct tcp_seq_afinfo *afinfo;
struct sock *sk = cur;
struct hlist_nulls_node *node;
struct tcp_iter_state *st = seq->private;
- struct net *net = seq_file_net(seq);
-
- if (st->bpf_seq_afinfo)
- afinfo = st->bpf_seq_afinfo;
- else
- afinfo = PDE_DATA(file_inode(seq->file));
++st->num;
++st->offset;
@@ -2395,9 +2407,7 @@ static void *established_get_next(struct seq_file *seq, void *cur)
sk = sk_nulls_next(sk);
sk_nulls_for_each_from(sk, node) {
- if ((afinfo->family == AF_UNSPEC ||
- sk->sk_family == afinfo->family) &&
- net_eq(sock_net(sk), net))
+ if (seq_sk_match(seq, sk))
return sk;
}
@@ -2440,17 +2450,18 @@ static void *tcp_get_idx(struct seq_file *seq, loff_t pos)
static void *tcp_seek_last_pos(struct seq_file *seq)
{
struct tcp_iter_state *st = seq->private;
+ int bucket = st->bucket;
int offset = st->offset;
int orig_num = st->num;
void *rc = NULL;
switch (st->state) {
case TCP_SEQ_STATE_LISTENING:
- if (st->bucket >= INET_LHTABLE_SIZE)
+ if (st->bucket > tcp_hashinfo.lhash2_mask)
break;
st->state = TCP_SEQ_STATE_LISTENING;
- rc = listening_get_next(seq, NULL);
- while (offset-- && rc)
+ rc = listening_get_first(seq);
+ while (offset-- && rc && bucket == st->bucket)
rc = listening_get_next(seq, rc);
if (rc)
break;
@@ -2461,7 +2472,7 @@ static void *tcp_seek_last_pos(struct seq_file *seq)
if (st->bucket > tcp_hashinfo.ehash_mask)
break;
rc = established_get_first(seq);
- while (offset-- && rc)
+ while (offset-- && rc && bucket == st->bucket)
rc = established_get_next(seq, rc);
}
@@ -2531,7 +2542,7 @@ void tcp_seq_stop(struct seq_file *seq, void *v)
switch (st->state) {
case TCP_SEQ_STATE_LISTENING:
if (v != SEQ_START_TOKEN)
- spin_unlock(&tcp_hashinfo.listening_hash[st->bucket].lock);
+ spin_unlock(&tcp_hashinfo.lhash2[st->bucket].lock);
break;
case TCP_SEQ_STATE_ESTABLISHED:
if (v)
@@ -2676,6 +2687,15 @@ out:
}
#ifdef CONFIG_BPF_SYSCALL
+struct bpf_tcp_iter_state {
+ struct tcp_iter_state state;
+ unsigned int cur_sk;
+ unsigned int end_sk;
+ unsigned int max_sk;
+ struct sock **batch;
+ bool st_bucket_done;
+};
+
struct bpf_iter__tcp {
__bpf_md_ptr(struct bpf_iter_meta *, meta);
__bpf_md_ptr(struct sock_common *, sk_common);
@@ -2694,16 +2714,204 @@ static int tcp_prog_seq_show(struct bpf_prog *prog, struct bpf_iter_meta *meta,
return bpf_iter_run_prog(prog, &ctx);
}
+static void bpf_iter_tcp_put_batch(struct bpf_tcp_iter_state *iter)
+{
+ while (iter->cur_sk < iter->end_sk)
+ sock_put(iter->batch[iter->cur_sk++]);
+}
+
+static int bpf_iter_tcp_realloc_batch(struct bpf_tcp_iter_state *iter,
+ unsigned int new_batch_sz)
+{
+ struct sock **new_batch;
+
+ new_batch = kvmalloc(sizeof(*new_batch) * new_batch_sz,
+ GFP_USER | __GFP_NOWARN);
+ if (!new_batch)
+ return -ENOMEM;
+
+ bpf_iter_tcp_put_batch(iter);
+ kvfree(iter->batch);
+ iter->batch = new_batch;
+ iter->max_sk = new_batch_sz;
+
+ return 0;
+}
+
+static unsigned int bpf_iter_tcp_listening_batch(struct seq_file *seq,
+ struct sock *start_sk)
+{
+ struct bpf_tcp_iter_state *iter = seq->private;
+ struct tcp_iter_state *st = &iter->state;
+ struct inet_connection_sock *icsk;
+ unsigned int expected = 1;
+ struct sock *sk;
+
+ sock_hold(start_sk);
+ iter->batch[iter->end_sk++] = start_sk;
+
+ icsk = inet_csk(start_sk);
+ inet_lhash2_for_each_icsk_continue(icsk) {
+ sk = (struct sock *)icsk;
+ if (seq_sk_match(seq, sk)) {
+ if (iter->end_sk < iter->max_sk) {
+ sock_hold(sk);
+ iter->batch[iter->end_sk++] = sk;
+ }
+ expected++;
+ }
+ }
+ spin_unlock(&tcp_hashinfo.lhash2[st->bucket].lock);
+
+ return expected;
+}
+
+static unsigned int bpf_iter_tcp_established_batch(struct seq_file *seq,
+ struct sock *start_sk)
+{
+ struct bpf_tcp_iter_state *iter = seq->private;
+ struct tcp_iter_state *st = &iter->state;
+ struct hlist_nulls_node *node;
+ unsigned int expected = 1;
+ struct sock *sk;
+
+ sock_hold(start_sk);
+ iter->batch[iter->end_sk++] = start_sk;
+
+ sk = sk_nulls_next(start_sk);
+ sk_nulls_for_each_from(sk, node) {
+ if (seq_sk_match(seq, sk)) {
+ if (iter->end_sk < iter->max_sk) {
+ sock_hold(sk);
+ iter->batch[iter->end_sk++] = sk;
+ }
+ expected++;
+ }
+ }
+ spin_unlock_bh(inet_ehash_lockp(&tcp_hashinfo, st->bucket));
+
+ return expected;
+}
+
+static struct sock *bpf_iter_tcp_batch(struct seq_file *seq)
+{
+ struct bpf_tcp_iter_state *iter = seq->private;
+ struct tcp_iter_state *st = &iter->state;
+ unsigned int expected;
+ bool resized = false;
+ struct sock *sk;
+
+ /* The st->bucket is done. Directly advance to the next
+ * bucket instead of having the tcp_seek_last_pos() to skip
+ * one by one in the current bucket and eventually find out
+ * it has to advance to the next bucket.
+ */
+ if (iter->st_bucket_done) {
+ st->offset = 0;
+ st->bucket++;
+ if (st->state == TCP_SEQ_STATE_LISTENING &&
+ st->bucket > tcp_hashinfo.lhash2_mask) {
+ st->state = TCP_SEQ_STATE_ESTABLISHED;
+ st->bucket = 0;
+ }
+ }
+
+again:
+ /* Get a new batch */
+ iter->cur_sk = 0;
+ iter->end_sk = 0;
+ iter->st_bucket_done = false;
+
+ sk = tcp_seek_last_pos(seq);
+ if (!sk)
+ return NULL; /* Done */
+
+ if (st->state == TCP_SEQ_STATE_LISTENING)
+ expected = bpf_iter_tcp_listening_batch(seq, sk);
+ else
+ expected = bpf_iter_tcp_established_batch(seq, sk);
+
+ if (iter->end_sk == expected) {
+ iter->st_bucket_done = true;
+ return sk;
+ }
+
+ if (!resized && !bpf_iter_tcp_realloc_batch(iter, expected * 3 / 2)) {
+ resized = true;
+ goto again;
+ }
+
+ return sk;
+}
+
+static void *bpf_iter_tcp_seq_start(struct seq_file *seq, loff_t *pos)
+{
+ /* bpf iter does not support lseek, so it always
+ * continue from where it was stop()-ped.
+ */
+ if (*pos)
+ return bpf_iter_tcp_batch(seq);
+
+ return SEQ_START_TOKEN;
+}
+
+static void *bpf_iter_tcp_seq_next(struct seq_file *seq, void *v, loff_t *pos)
+{
+ struct bpf_tcp_iter_state *iter = seq->private;
+ struct tcp_iter_state *st = &iter->state;
+ struct sock *sk;
+
+ /* Whenever seq_next() is called, the iter->cur_sk is
+ * done with seq_show(), so advance to the next sk in
+ * the batch.
+ */
+ if (iter->cur_sk < iter->end_sk) {
+ /* Keeping st->num consistent in tcp_iter_state.
+ * bpf_iter_tcp does not use st->num.
+ * meta.seq_num is used instead.
+ */
+ st->num++;
+ /* Move st->offset to the next sk in the bucket such that
+ * the future start() will resume at st->offset in
+ * st->bucket. See tcp_seek_last_pos().
+ */
+ st->offset++;
+ sock_put(iter->batch[iter->cur_sk++]);
+ }
+
+ if (iter->cur_sk < iter->end_sk)
+ sk = iter->batch[iter->cur_sk];
+ else
+ sk = bpf_iter_tcp_batch(seq);
+
+ ++*pos;
+ /* Keeping st->last_pos consistent in tcp_iter_state.
+ * bpf iter does not do lseek, so st->last_pos always equals to *pos.
+ */
+ st->last_pos = *pos;
+ return sk;
+}
+
static int bpf_iter_tcp_seq_show(struct seq_file *seq, void *v)
{
struct bpf_iter_meta meta;
struct bpf_prog *prog;
struct sock *sk = v;
+ bool slow;
uid_t uid;
+ int ret;
if (v == SEQ_START_TOKEN)
return 0;
+ if (sk_fullsock(sk))
+ slow = lock_sock_fast(sk);
+
+ if (unlikely(sk_unhashed(sk))) {
+ ret = SEQ_SKIP;
+ goto unlock;
+ }
+
if (sk->sk_state == TCP_TIME_WAIT) {
uid = 0;
} else if (sk->sk_state == TCP_NEW_SYN_RECV) {
@@ -2717,11 +2925,18 @@ static int bpf_iter_tcp_seq_show(struct seq_file *seq, void *v)
meta.seq = seq;
prog = bpf_iter_get_info(&meta, false);
- return tcp_prog_seq_show(prog, &meta, v, uid);
+ ret = tcp_prog_seq_show(prog, &meta, v, uid);
+
+unlock:
+ if (sk_fullsock(sk))
+ unlock_sock_fast(sk, slow);
+ return ret;
+
}
static void bpf_iter_tcp_seq_stop(struct seq_file *seq, void *v)
{
+ struct bpf_tcp_iter_state *iter = seq->private;
struct bpf_iter_meta meta;
struct bpf_prog *prog;
@@ -2732,16 +2947,33 @@ static void bpf_iter_tcp_seq_stop(struct seq_file *seq, void *v)
(void)tcp_prog_seq_show(prog, &meta, v, 0);
}
- tcp_seq_stop(seq, v);
+ if (iter->cur_sk < iter->end_sk) {
+ bpf_iter_tcp_put_batch(iter);
+ iter->st_bucket_done = false;
+ }
}
static const struct seq_operations bpf_iter_tcp_seq_ops = {
.show = bpf_iter_tcp_seq_show,
- .start = tcp_seq_start,
- .next = tcp_seq_next,
+ .start = bpf_iter_tcp_seq_start,
+ .next = bpf_iter_tcp_seq_next,
.stop = bpf_iter_tcp_seq_stop,
};
#endif
+static unsigned short seq_file_family(const struct seq_file *seq)
+{
+ const struct tcp_seq_afinfo *afinfo;
+
+#ifdef CONFIG_BPF_SYSCALL
+ /* Iterated from bpf_iter. Let the bpf prog to filter instead. */
+ if (seq->op == &bpf_iter_tcp_seq_ops)
+ return AF_UNSPEC;
+#endif
+
+ /* Iterated from proc fs */
+ afinfo = PDE_DATA(file_inode(seq->file));
+ return afinfo->family;
+}
static const struct seq_operations tcp4_seq_ops = {
.show = tcp4_seq_show,
@@ -2953,8 +3185,7 @@ static int __net_init tcp_sk_init(struct net *net)
net->ipv4.sysctl_tcp_comp_sack_slack_ns = 100 * NSEC_PER_USEC;
net->ipv4.sysctl_tcp_comp_sack_nr = 44;
net->ipv4.sysctl_tcp_fastopen = TFO_CLIENT_ENABLE;
- spin_lock_init(&net->ipv4.tcp_fastopen_ctx_lock);
- net->ipv4.sysctl_tcp_fastopen_blackhole_timeout = 60 * 60;
+ net->ipv4.sysctl_tcp_fastopen_blackhole_timeout = 0;
atomic_set(&net->ipv4.tfo_active_disable_times, 0);
/* Reno is always built in */
@@ -2992,39 +3223,55 @@ static struct pernet_operations __net_initdata tcp_sk_ops = {
DEFINE_BPF_ITER_FUNC(tcp, struct bpf_iter_meta *meta,
struct sock_common *sk_common, uid_t uid)
+#define INIT_BATCH_SZ 16
+
static int bpf_iter_init_tcp(void *priv_data, struct bpf_iter_aux_info *aux)
{
- struct tcp_iter_state *st = priv_data;
- struct tcp_seq_afinfo *afinfo;
- int ret;
+ struct bpf_tcp_iter_state *iter = priv_data;
+ int err;
- afinfo = kmalloc(sizeof(*afinfo), GFP_USER | __GFP_NOWARN);
- if (!afinfo)
- return -ENOMEM;
+ err = bpf_iter_init_seq_net(priv_data, aux);
+ if (err)
+ return err;
- afinfo->family = AF_UNSPEC;
- st->bpf_seq_afinfo = afinfo;
- ret = bpf_iter_init_seq_net(priv_data, aux);
- if (ret)
- kfree(afinfo);
- return ret;
+ err = bpf_iter_tcp_realloc_batch(iter, INIT_BATCH_SZ);
+ if (err) {
+ bpf_iter_fini_seq_net(priv_data);
+ return err;
+ }
+
+ return 0;
}
static void bpf_iter_fini_tcp(void *priv_data)
{
- struct tcp_iter_state *st = priv_data;
+ struct bpf_tcp_iter_state *iter = priv_data;
- kfree(st->bpf_seq_afinfo);
bpf_iter_fini_seq_net(priv_data);
+ kvfree(iter->batch);
}
static const struct bpf_iter_seq_info tcp_seq_info = {
.seq_ops = &bpf_iter_tcp_seq_ops,
.init_seq_private = bpf_iter_init_tcp,
.fini_seq_private = bpf_iter_fini_tcp,
- .seq_priv_size = sizeof(struct tcp_iter_state),
+ .seq_priv_size = sizeof(struct bpf_tcp_iter_state),
};
+static const struct bpf_func_proto *
+bpf_iter_tcp_get_func_proto(enum bpf_func_id func_id,
+ const struct bpf_prog *prog)
+{
+ switch (func_id) {
+ case BPF_FUNC_setsockopt:
+ return &bpf_sk_setsockopt_proto;
+ case BPF_FUNC_getsockopt:
+ return &bpf_sk_getsockopt_proto;
+ default:
+ return NULL;
+ }
+}
+
static struct bpf_iter_reg tcp_reg_info = {
.target = "tcp",
.ctx_arg_info_size = 1,
@@ -3032,6 +3279,7 @@ static struct bpf_iter_reg tcp_reg_info = {
{ offsetof(struct bpf_iter__tcp, sk_common),
PTR_TO_BTF_ID_OR_NULL },
},
+ .get_func_proto = bpf_iter_tcp_get_func_proto,
.seq_info = &tcp_seq_info,
};
diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c
index 7513ba45553d..0a4f3f16140a 100644
--- a/net/ipv4/tcp_minisocks.c
+++ b/net/ipv4/tcp_minisocks.c
@@ -775,8 +775,8 @@ struct sock *tcp_check_req(struct sock *sk, struct sk_buff *skb,
goto listen_overflow;
if (own_req && rsk_drop_req(req)) {
- reqsk_queue_removed(&inet_csk(sk)->icsk_accept_queue, req);
- inet_csk_reqsk_queue_drop_and_put(sk, req);
+ reqsk_queue_removed(&inet_csk(req->rsk_listener)->icsk_accept_queue, req);
+ inet_csk_reqsk_queue_drop_and_put(req->rsk_listener, req);
return child;
}
@@ -786,6 +786,9 @@ struct sock *tcp_check_req(struct sock *sk, struct sk_buff *skb,
return inet_csk_complete_hashdance(sk, child, req, own_req);
listen_overflow:
+ if (sk != req->rsk_listener)
+ __NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMIGRATEREQFAILURE);
+
if (!sock_net(sk)->ipv4.sysctl_tcp_abort_on_overflow) {
inet_rsk(req)->acked = 1;
return NULL;
diff --git a/net/ipv4/tcp_offload.c b/net/ipv4/tcp_offload.c
index e09147ac9a99..fc61cd3fea65 100644
--- a/net/ipv4/tcp_offload.c
+++ b/net/ipv4/tcp_offload.c
@@ -298,6 +298,9 @@ int tcp_gro_complete(struct sk_buff *skb)
if (th->cwr)
skb_shinfo(skb)->gso_type |= SKB_GSO_TCP_ECN;
+ if (skb->encapsulation)
+ skb->inner_transport_header = skb->transport_header;
+
return 0;
}
EXPORT_SYMBOL(tcp_gro_complete);
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index bde781f46b41..6d72f3ea48c4 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -1732,6 +1732,7 @@ int tcp_mtu_to_mss(struct sock *sk, int pmtu)
return __tcp_mtu_to_mss(sk, pmtu) -
(tcp_sk(sk)->tcp_header_len - sizeof(struct tcphdr));
}
+EXPORT_SYMBOL(tcp_mtu_to_mss);
/* Inverse of above */
int tcp_mss_to_mtu(struct sock *sk, int mss)
@@ -3372,7 +3373,8 @@ void sk_forced_mem_schedule(struct sock *sk, int size)
sk_memory_allocated_add(sk, amt);
if (mem_cgroup_sockets_enabled && sk->sk_memcg)
- mem_cgroup_charge_skmem(sk->sk_memcg, amt);
+ mem_cgroup_charge_skmem(sk->sk_memcg, amt,
+ gfp_memcg_charge() | __GFP_NOFAIL);
}
/* Send a FIN. The caller locks the socket for us.
diff --git a/net/ipv4/tcp_recovery.c b/net/ipv4/tcp_recovery.c
index 6f1b4ac7fe99..fd113f6226ef 100644
--- a/net/ipv4/tcp_recovery.c
+++ b/net/ipv4/tcp_recovery.c
@@ -172,7 +172,8 @@ void tcp_rack_reo_timeout(struct sock *sk)
/* Updates the RACK's reo_wnd based on DSACK and no. of recoveries.
*
- * If DSACK is received, increment reo_wnd by min_rtt/4 (upper bounded
+ * If a DSACK is received that seems like it may have been due to reordering
+ * triggering fast recovery, increment reo_wnd by min_rtt/4 (upper bounded
* by srtt), since there is possibility that spurious retransmission was
* due to reordering delay longer than reo_wnd.
*
diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c
index 4ef08079ccfa..20cf4a98c69d 100644
--- a/net/ipv4/tcp_timer.c
+++ b/net/ipv4/tcp_timer.c
@@ -68,7 +68,7 @@ u32 tcp_clamp_probe0_to_user_timeout(const struct sock *sk, u32 when)
static void tcp_write_err(struct sock *sk)
{
sk->sk_err = sk->sk_err_soft ? : ETIMEDOUT;
- sk->sk_error_report(sk);
+ sk_error_report(sk);
tcp_write_queue_purge(sk);
tcp_done(sk);
@@ -441,7 +441,7 @@ static void tcp_fastopen_synack_timer(struct sock *sk, struct request_sock *req)
* This function gets called when the kernel timer for a TCP packet
* of this socket expires.
*
- * It handles retransmission, timer adjustment and other necesarry measures.
+ * It handles retransmission, timer adjustment and other necessary measures.
*
* Returns: Nothing (void)
*/
@@ -766,7 +766,7 @@ static enum hrtimer_restart tcp_compressed_ack_kick(struct hrtimer *timer)
if (!sock_owned_by_user(sk)) {
if (tp->compressed_ack) {
/* Since we have to send one ack finally,
- * substract one from tp->compressed_ack to keep
+ * subtract one from tp->compressed_ack to keep
* LINUX_MIB_TCPACKCOMPRESSED accurate.
*/
tp->compressed_ack--;
diff --git a/net/ipv4/tcp_yeah.c b/net/ipv4/tcp_yeah.c
index 3bb448761ca3..07c4c93b9fdb 100644
--- a/net/ipv4/tcp_yeah.c
+++ b/net/ipv4/tcp_yeah.c
@@ -221,7 +221,7 @@ static struct tcp_congestion_ops tcp_yeah __read_mostly = {
static int __init tcp_yeah_register(void)
{
- BUG_ON(sizeof(struct yeah) > ICSK_CA_PRIV_SIZE);
+ BUILD_BUG_ON(sizeof(struct yeah) > ICSK_CA_PRIV_SIZE);
tcp_register_congestion_control(&tcp_yeah);
return 0;
}
diff --git a/net/ipv4/tunnel4.c b/net/ipv4/tunnel4.c
index e44aaf41a138..5048c47c79b2 100644
--- a/net/ipv4/tunnel4.c
+++ b/net/ipv4/tunnel4.c
@@ -218,7 +218,6 @@ static const struct net_protocol tunnel4_protocol = {
.handler = tunnel4_rcv,
.err_handler = tunnel4_err,
.no_policy = 1,
- .netns_ok = 1,
};
#if IS_ENABLED(CONFIG_IPV6)
@@ -226,7 +225,6 @@ static const struct net_protocol tunnel64_protocol = {
.handler = tunnel64_rcv,
.err_handler = tunnel64_err,
.no_policy = 1,
- .netns_ok = 1,
};
#endif
@@ -235,7 +233,6 @@ static const struct net_protocol tunnelmpls4_protocol = {
.handler = tunnelmpls4_rcv,
.err_handler = tunnelmpls4_err,
.no_policy = 1,
- .netns_ok = 1,
};
#endif
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index 1307ad0d3b9e..8851c9463b4b 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -645,10 +645,12 @@ static struct sock *__udp4_lib_err_encap(struct net *net,
const struct iphdr *iph,
struct udphdr *uh,
struct udp_table *udptable,
+ struct sock *sk,
struct sk_buff *skb, u32 info)
{
+ int (*lookup)(struct sock *sk, struct sk_buff *skb);
int network_offset, transport_offset;
- struct sock *sk;
+ struct udp_sock *up;
network_offset = skb_network_offset(skb);
transport_offset = skb_transport_offset(skb);
@@ -659,18 +661,28 @@ static struct sock *__udp4_lib_err_encap(struct net *net,
/* Transport header needs to point to the UDP header */
skb_set_transport_header(skb, iph->ihl << 2);
+ if (sk) {
+ up = udp_sk(sk);
+
+ lookup = READ_ONCE(up->encap_err_lookup);
+ if (lookup && lookup(sk, skb))
+ sk = NULL;
+
+ goto out;
+ }
+
sk = __udp4_lib_lookup(net, iph->daddr, uh->source,
iph->saddr, uh->dest, skb->dev->ifindex, 0,
udptable, NULL);
if (sk) {
- int (*lookup)(struct sock *sk, struct sk_buff *skb);
- struct udp_sock *up = udp_sk(sk);
+ up = udp_sk(sk);
lookup = READ_ONCE(up->encap_err_lookup);
if (!lookup || lookup(sk, skb))
sk = NULL;
}
+out:
if (!sk)
sk = ERR_PTR(__udp4_lib_err_encap_no_sk(skb, info));
@@ -707,15 +719,16 @@ int __udp4_lib_err(struct sk_buff *skb, u32 info, struct udp_table *udptable)
sk = __udp4_lib_lookup(net, iph->daddr, uh->dest,
iph->saddr, uh->source, skb->dev->ifindex,
inet_sdif(skb), udptable, NULL);
+
if (!sk || udp_sk(sk)->encap_type) {
/* No socket for error: try tunnels before discarding */
- sk = ERR_PTR(-ENOENT);
if (static_branch_unlikely(&udp_encap_needed_key)) {
- sk = __udp4_lib_err_encap(net, iph, uh, udptable, skb,
+ sk = __udp4_lib_err_encap(net, iph, uh, udptable, sk, skb,
info);
if (!sk)
return 0;
- }
+ } else
+ sk = ERR_PTR(-ENOENT);
if (IS_ERR(sk)) {
__ICMP_INC_STATS(net, ICMP_MIB_INERRORS);
@@ -776,7 +789,7 @@ int __udp4_lib_err(struct sk_buff *skb, u32 info, struct udp_table *udptable)
ip_icmp_error(sk, skb, err, uh->dest, info, (u8 *)(uh+1));
sk->sk_err = err;
- sk->sk_error_report(sk);
+ sk_error_report(sk);
out:
return 0;
}
@@ -1102,7 +1115,7 @@ int udp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
}
ipcm_init_sk(&ipc, inet);
- ipc.gso_size = up->gso_size;
+ ipc.gso_size = READ_ONCE(up->gso_size);
if (msg->msg_controllen) {
err = udp_cmsg_send(sk, msg, &ipc.gso_size);
@@ -1130,7 +1143,7 @@ int udp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
rcu_read_unlock();
}
- if (cgroup_bpf_enabled(BPF_CGROUP_UDP4_SENDMSG) && !connected) {
+ if (cgroup_bpf_enabled(CGROUP_UDP4_SENDMSG) && !connected) {
err = BPF_CGROUP_RUN_PROG_UDP4_SENDMSG_LOCK(sk,
(struct sockaddr *)usin, &ipc.addr);
if (err)
@@ -1798,11 +1811,13 @@ int udp_read_sock(struct sock *sk, read_descriptor_t *desc,
if (used <= 0) {
if (!copied)
copied = used;
+ kfree_skb(skb);
break;
} else if (used <= skb->len) {
copied += used;
}
+ kfree_skb(skb);
if (!desc->count)
break;
}
@@ -2693,7 +2708,7 @@ int udp_lib_setsockopt(struct sock *sk, int level, int optname,
case UDP_SEGMENT:
if (val < 0 || val > USHRT_MAX)
return -EINVAL;
- up->gso_size = val;
+ WRITE_ONCE(up->gso_size, val);
break;
case UDP_GRO:
@@ -2788,7 +2803,7 @@ int udp_lib_getsockopt(struct sock *sk, int level, int optname,
break;
case UDP_SEGMENT:
- val = up->gso_size;
+ val = READ_ONCE(up->gso_size);
break;
case UDP_GRO:
@@ -2867,7 +2882,7 @@ int udp_abort(struct sock *sk, int err)
goto out;
sk->sk_err = err;
- sk->sk_error_report(sk);
+ sk_error_report(sk);
__udp_disconnect(sk, 0);
out:
diff --git a/net/ipv4/udp_bpf.c b/net/ipv4/udp_bpf.c
index 954c4591a6fd..7a1d5f473878 100644
--- a/net/ipv4/udp_bpf.c
+++ b/net/ipv4/udp_bpf.c
@@ -21,6 +21,45 @@ static int sk_udp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
return udp_prot.recvmsg(sk, msg, len, noblock, flags, addr_len);
}
+static bool udp_sk_has_data(struct sock *sk)
+{
+ return !skb_queue_empty(&udp_sk(sk)->reader_queue) ||
+ !skb_queue_empty(&sk->sk_receive_queue);
+}
+
+static bool psock_has_data(struct sk_psock *psock)
+{
+ return !skb_queue_empty(&psock->ingress_skb) ||
+ !sk_psock_queue_empty(psock);
+}
+
+#define udp_msg_has_data(__sk, __psock) \
+ ({ udp_sk_has_data(__sk) || psock_has_data(__psock); })
+
+static int udp_msg_wait_data(struct sock *sk, struct sk_psock *psock,
+ long timeo)
+{
+ DEFINE_WAIT_FUNC(wait, woken_wake_function);
+ int ret = 0;
+
+ if (sk->sk_shutdown & RCV_SHUTDOWN)
+ return 1;
+
+ if (!timeo)
+ return ret;
+
+ add_wait_queue(sk_sleep(sk), &wait);
+ sk_set_bit(SOCKWQ_ASYNC_WAITDATA, sk);
+ ret = udp_msg_has_data(sk, psock);
+ if (!ret) {
+ wait_woken(&wait, TASK_INTERRUPTIBLE, timeo);
+ ret = udp_msg_has_data(sk, psock);
+ }
+ sk_clear_bit(SOCKWQ_ASYNC_WAITDATA, sk);
+ remove_wait_queue(sk_sleep(sk), &wait);
+ return ret;
+}
+
static int udp_bpf_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
int nonblock, int flags, int *addr_len)
{
@@ -34,8 +73,7 @@ static int udp_bpf_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
if (unlikely(!psock))
return sk_udp_recvmsg(sk, msg, len, nonblock, flags, addr_len);
- lock_sock(sk);
- if (sk_psock_queue_empty(psock)) {
+ if (!psock_has_data(psock)) {
ret = sk_udp_recvmsg(sk, msg, len, nonblock, flags, addr_len);
goto out;
}
@@ -43,26 +81,21 @@ static int udp_bpf_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
msg_bytes_ready:
copied = sk_msg_recvmsg(sk, psock, msg, len, flags);
if (!copied) {
- int data, err = 0;
long timeo;
+ int data;
timeo = sock_rcvtimeo(sk, nonblock);
- data = sk_msg_wait_data(sk, psock, flags, timeo, &err);
+ data = udp_msg_wait_data(sk, psock, timeo);
if (data) {
- if (!sk_psock_queue_empty(psock))
+ if (psock_has_data(psock))
goto msg_bytes_ready;
ret = sk_udp_recvmsg(sk, msg, len, nonblock, flags, addr_len);
goto out;
}
- if (err) {
- ret = err;
- goto out;
- }
copied = -EAGAIN;
}
ret = copied;
out:
- release_sock(sk);
sk_psock_put(sk, psock);
return ret;
}
@@ -79,7 +112,6 @@ static struct proto udp_bpf_prots[UDP_BPF_NUM_PROTS];
static void udp_bpf_rebuild_protos(struct proto *prot, const struct proto *base)
{
*prot = *base;
- prot->unhash = sock_map_unhash;
prot->close = sock_map_close;
prot->recvmsg = udp_bpf_recvmsg;
}
@@ -101,7 +133,7 @@ static int __init udp_bpf_v4_build_proto(void)
udp_bpf_rebuild_protos(&udp_bpf_prots[UDP_BPF_IPV4], &udp_prot);
return 0;
}
-core_initcall(udp_bpf_v4_build_proto);
+late_initcall(udp_bpf_v4_build_proto);
int udp_bpf_update_proto(struct sock *sk, struct sk_psock *psock, bool restore)
{
diff --git a/net/ipv4/udp_diag.c b/net/ipv4/udp_diag.c
index b2cee9a307d4..1ed8c4d78e5c 100644
--- a/net/ipv4/udp_diag.c
+++ b/net/ipv4/udp_diag.c
@@ -77,10 +77,8 @@ static int udp_dump_one(struct udp_table *tbl,
kfree_skb(rep);
goto out;
}
- err = netlink_unicast(net->diag_nlsk, rep, NETLINK_CB(in_skb).portid,
- MSG_DONTWAIT);
- if (err > 0)
- err = 0;
+ err = nlmsg_unicast(net->diag_nlsk, rep, NETLINK_CB(in_skb).portid);
+
out:
if (sk)
sock_put(sk);
diff --git a/net/ipv4/udp_offload.c b/net/ipv4/udp_offload.c
index 54e06b88af69..86d32a1e62ac 100644
--- a/net/ipv4/udp_offload.c
+++ b/net/ipv4/udp_offload.c
@@ -152,8 +152,8 @@ struct sk_buff *skb_udp_tunnel_segment(struct sk_buff *skb,
netdev_features_t features,
bool is_ipv6)
{
+ const struct net_offload __rcu **offloads;
__be16 protocol = skb->protocol;
- const struct net_offload **offloads;
const struct net_offload *ops;
struct sk_buff *segs = ERR_PTR(-EINVAL);
struct sk_buff *(*gso_inner_segment)(struct sk_buff *skb,
@@ -525,8 +525,10 @@ struct sk_buff *udp_gro_receive(struct list_head *head, struct sk_buff *skb,
if ((!sk && (skb->dev->features & NETIF_F_GRO_UDP_FWD)) ||
(sk && udp_sk(sk)->gro_enabled) || NAPI_GRO_CB(skb)->is_flist)
- pp = call_gro_receive(udp_gro_receive_segment, head, skb);
- return pp;
+ return call_gro_receive(udp_gro_receive_segment, head, skb);
+
+ /* no GRO, be sure flush the current packet */
+ goto out;
}
if (NAPI_GRO_CB(skb)->encap_mark ||
@@ -622,6 +624,10 @@ static int udp_gro_complete_segment(struct sk_buff *skb)
skb_shinfo(skb)->gso_segs = NAPI_GRO_CB(skb)->count;
skb_shinfo(skb)->gso_type |= SKB_GSO_UDP_L4;
+
+ if (skb->encapsulation)
+ skb->inner_transport_header = skb->transport_header;
+
return 0;
}
diff --git a/net/ipv4/udplite.c b/net/ipv4/udplite.c
index bd8773b49e72..cd1cd68adeec 100644
--- a/net/ipv4/udplite.c
+++ b/net/ipv4/udplite.c
@@ -31,7 +31,6 @@ static const struct net_protocol udplite_protocol = {
.handler = udplite_rcv,
.err_handler = udplite_err,
.no_policy = 1,
- .netns_ok = 1,
};
struct proto udplite_prot = {
diff --git a/net/ipv4/xfrm4_protocol.c b/net/ipv4/xfrm4_protocol.c
index ea595c8549c7..2fe5860c21d6 100644
--- a/net/ipv4/xfrm4_protocol.c
+++ b/net/ipv4/xfrm4_protocol.c
@@ -181,21 +181,18 @@ static const struct net_protocol esp4_protocol = {
.handler = xfrm4_esp_rcv,
.err_handler = xfrm4_esp_err,
.no_policy = 1,
- .netns_ok = 1,
};
static const struct net_protocol ah4_protocol = {
.handler = xfrm4_ah_rcv,
.err_handler = xfrm4_ah_err,
.no_policy = 1,
- .netns_ok = 1,
};
static const struct net_protocol ipcomp4_protocol = {
.handler = xfrm4_ipcomp_rcv,
.err_handler = xfrm4_ipcomp_err,
.no_policy = 1,
- .netns_ok = 1,
};
static const struct xfrm_input_afinfo xfrm4_input_afinfo = {
diff --git a/net/ipv4/xfrm4_tunnel.c b/net/ipv4/xfrm4_tunnel.c
index fb0648e7fb32..f4555a88f86b 100644
--- a/net/ipv4/xfrm4_tunnel.c
+++ b/net/ipv4/xfrm4_tunnel.c
@@ -42,7 +42,6 @@ static void ipip_destroy(struct xfrm_state *x)
}
static const struct xfrm_type ipip_type = {
- .description = "IPIP",
.owner = THIS_MODULE,
.proto = IPPROTO_IPIP,
.init_state = ipip_init_state,
diff --git a/net/ipv6/Kconfig b/net/ipv6/Kconfig
index 747f56e0c636..e504204bca92 100644
--- a/net/ipv6/Kconfig
+++ b/net/ipv6/Kconfig
@@ -328,4 +328,15 @@ config IPV6_RPL_LWTUNNEL
If unsure, say N.
+config IPV6_IOAM6_LWTUNNEL
+ bool "IPv6: IOAM Pre-allocated Trace insertion support"
+ depends on IPV6
+ select LWTUNNEL
+ help
+ Support for the inline insertion of IOAM Pre-allocated
+ Trace Header (only on locally generated packets), using
+ the lightweight tunnels mechanism.
+
+ If unsure, say N.
+
endif # IPV6
diff --git a/net/ipv6/Makefile b/net/ipv6/Makefile
index cf7b47bdb9b3..1bc7e143217b 100644
--- a/net/ipv6/Makefile
+++ b/net/ipv6/Makefile
@@ -10,7 +10,7 @@ ipv6-objs := af_inet6.o anycast.o ip6_output.o ip6_input.o addrconf.o \
route.o ip6_fib.o ipv6_sockglue.o ndisc.o udp.o udplite.o \
raw.o icmp.o mcast.o reassembly.o tcp_ipv6.o ping.o \
exthdrs.o datagram.o ip6_flowlabel.o inet6_connection_sock.o \
- udp_offload.o seg6.o fib6_notifier.o rpl.o
+ udp_offload.o seg6.o fib6_notifier.o rpl.o ioam6.o
ipv6-offload := ip6_offload.o tcpv6_offload.o exthdrs_offload.o
@@ -27,6 +27,7 @@ ipv6-$(CONFIG_NETLABEL) += calipso.o
ipv6-$(CONFIG_IPV6_SEG6_LWTUNNEL) += seg6_iptunnel.o seg6_local.o
ipv6-$(CONFIG_IPV6_SEG6_HMAC) += seg6_hmac.o
ipv6-$(CONFIG_IPV6_RPL_LWTUNNEL) += rpl_iptunnel.o
+ipv6-$(CONFIG_IPV6_IOAM6_LWTUNNEL) += ioam6_iptunnel.o
ipv6-objs += $(ipv6-y)
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index 701eb82acd1c..c6a90b7bbb70 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -89,6 +89,7 @@
#include <linux/proc_fs.h>
#include <linux/seq_file.h>
#include <linux/export.h>
+#include <linux/ioam6.h>
#define INFINITY_LIFE_TIME 0xFFFFFFFF
@@ -237,6 +238,9 @@ static struct ipv6_devconf ipv6_devconf __read_mostly = {
.addr_gen_mode = IN6_ADDR_GEN_MODE_EUI64,
.disable_policy = 0,
.rpl_seg_enabled = 0,
+ .ioam6_enabled = 0,
+ .ioam6_id = IOAM6_DEFAULT_IF_ID,
+ .ioam6_id_wide = IOAM6_DEFAULT_IF_ID_WIDE,
};
static struct ipv6_devconf ipv6_devconf_dflt __read_mostly = {
@@ -293,6 +297,9 @@ static struct ipv6_devconf ipv6_devconf_dflt __read_mostly = {
.addr_gen_mode = IN6_ADDR_GEN_MODE_EUI64,
.disable_policy = 0,
.rpl_seg_enabled = 0,
+ .ioam6_enabled = 0,
+ .ioam6_id = IOAM6_DEFAULT_IF_ID,
+ .ioam6_id_wide = IOAM6_DEFAULT_IF_ID_WIDE,
};
/* Check if link is ready: is it up and is a valid qdisc available */
@@ -387,6 +394,7 @@ static struct inet6_dev *ipv6_add_dev(struct net_device *dev)
ndev->cnf.addr_gen_mode = IN6_ADDR_GEN_MODE_STABLE_PRIVACY;
ndev->cnf.mtu6 = dev->mtu;
+ ndev->ra_mtu = 0;
ndev->nd_parms = neigh_parms_alloc(dev, &nd_tbl);
if (!ndev->nd_parms) {
kfree(ndev);
@@ -694,8 +702,7 @@ static int inet6_netconf_get_devconf(struct sk_buff *in_skb,
errout:
if (in6_dev)
in6_dev_put(in6_dev);
- if (dev)
- dev_put(dev);
+ dev_put(dev);
return err;
}
@@ -1080,7 +1087,7 @@ ipv6_add_addr(struct inet6_dev *idev, struct ifa6_config *cfg,
goto out;
}
- ifa = kzalloc(sizeof(*ifa), gfp_flags);
+ ifa = kzalloc(sizeof(*ifa), gfp_flags | __GFP_ACCOUNT);
if (!ifa) {
err = -ENOBUFS;
goto out;
@@ -3085,19 +3092,22 @@ static void add_addr(struct inet6_dev *idev, const struct in6_addr *addr,
}
}
-#if IS_ENABLED(CONFIG_IPV6_SIT)
-static void sit_add_v4_addrs(struct inet6_dev *idev)
+#if IS_ENABLED(CONFIG_IPV6_SIT) || IS_ENABLED(CONFIG_NET_IPGRE) || IS_ENABLED(CONFIG_IPV6_GRE)
+static void add_v4_addrs(struct inet6_dev *idev)
{
struct in6_addr addr;
struct net_device *dev;
struct net *net = dev_net(idev->dev);
- int scope, plen;
+ int scope, plen, offset = 0;
u32 pflags = 0;
ASSERT_RTNL();
memset(&addr, 0, sizeof(struct in6_addr));
- memcpy(&addr.s6_addr32[3], idev->dev->dev_addr, 4);
+ /* in case of IP6GRE the dev_addr is an IPv6 and therefore we use only the last 4 bytes */
+ if (idev->dev->addr_len == sizeof(struct in6_addr))
+ offset = sizeof(struct in6_addr) - 4;
+ memcpy(&addr.s6_addr32[3], idev->dev->dev_addr + offset, 4);
if (idev->dev->flags&IFF_POINTOPOINT) {
addr.s6_addr32[0] = htonl(0xfe800000);
@@ -3335,8 +3345,6 @@ static void addrconf_dev_config(struct net_device *dev)
(dev->type != ARPHRD_IEEE1394) &&
(dev->type != ARPHRD_TUNNEL6) &&
(dev->type != ARPHRD_6LOWPAN) &&
- (dev->type != ARPHRD_IP6GRE) &&
- (dev->type != ARPHRD_IPGRE) &&
(dev->type != ARPHRD_TUNNEL) &&
(dev->type != ARPHRD_NONE) &&
(dev->type != ARPHRD_RAWIP)) {
@@ -3384,14 +3392,14 @@ static void addrconf_sit_config(struct net_device *dev)
return;
}
- sit_add_v4_addrs(idev);
+ add_v4_addrs(idev);
if (dev->flags&IFF_POINTOPOINT)
addrconf_add_mroute(dev);
}
#endif
-#if IS_ENABLED(CONFIG_NET_IPGRE)
+#if IS_ENABLED(CONFIG_NET_IPGRE) || IS_ENABLED(CONFIG_IPV6_GRE)
static void addrconf_gre_config(struct net_device *dev)
{
struct inet6_dev *idev;
@@ -3404,7 +3412,13 @@ static void addrconf_gre_config(struct net_device *dev)
return;
}
- addrconf_addr_gen(idev, true);
+ if (dev->type == ARPHRD_ETHER) {
+ addrconf_addr_gen(idev, true);
+ return;
+ }
+
+ add_v4_addrs(idev);
+
if (dev->flags & IFF_POINTOPOINT)
addrconf_add_mroute(dev);
}
@@ -3580,7 +3594,8 @@ static int addrconf_notify(struct notifier_block *this, unsigned long event,
addrconf_sit_config(dev);
break;
#endif
-#if IS_ENABLED(CONFIG_NET_IPGRE)
+#if IS_ENABLED(CONFIG_NET_IPGRE) || IS_ENABLED(CONFIG_IPV6_GRE)
+ case ARPHRD_IP6GRE:
case ARPHRD_IPGRE:
addrconf_gre_config(dev);
break;
@@ -3843,6 +3858,7 @@ restart:
}
idev->tstamp = jiffies;
+ idev->ra_mtu = 0;
/* Last: Shot the device (if unregistered) */
if (unregister) {
@@ -5211,8 +5227,7 @@ static int inet6_dump_addr(struct sk_buff *skb, struct netlink_callback *cb,
.netnsid = -1,
.type = type,
};
- struct net *net = sock_net(skb->sk);
- struct net *tgt_net = net;
+ struct net *tgt_net = sock_net(skb->sk);
int idx, s_idx, s_ip_idx;
int h, s_h;
struct net_device *dev;
@@ -5351,7 +5366,7 @@ static int inet6_rtm_valid_getaddr_req(struct sk_buff *skb,
static int inet6_rtm_getaddr(struct sk_buff *in_skb, struct nlmsghdr *nlh,
struct netlink_ext_ack *extack)
{
- struct net *net = sock_net(in_skb->sk);
+ struct net *tgt_net = sock_net(in_skb->sk);
struct inet6_fill_args fillargs = {
.portid = NETLINK_CB(in_skb).portid,
.seq = nlh->nlmsg_seq,
@@ -5359,7 +5374,6 @@ static int inet6_rtm_getaddr(struct sk_buff *in_skb, struct nlmsghdr *nlh,
.flags = 0,
.netnsid = -1,
};
- struct net *tgt_net = net;
struct ifaddrmsg *ifm;
struct nlattr *tb[IFA_MAX+1];
struct in6_addr *addr = NULL, *peer;
@@ -5412,8 +5426,7 @@ static int inet6_rtm_getaddr(struct sk_buff *in_skb, struct nlmsghdr *nlh,
errout_ifa:
in6_ifa_put(ifa);
errout:
- if (dev)
- dev_put(dev);
+ dev_put(dev);
if (fillargs.netnsid >= 0)
put_net(tgt_net);
@@ -5526,6 +5539,9 @@ static inline void ipv6_store_devconf(struct ipv6_devconf *cnf,
array[DEVCONF_DISABLE_POLICY] = cnf->disable_policy;
array[DEVCONF_NDISC_TCLASS] = cnf->ndisc_tclass;
array[DEVCONF_RPL_SEG_ENABLED] = cnf->rpl_seg_enabled;
+ array[DEVCONF_IOAM6_ENABLED] = cnf->ioam6_enabled;
+ array[DEVCONF_IOAM6_ID] = cnf->ioam6_id;
+ array[DEVCONF_IOAM6_ID_WIDE] = cnf->ioam6_id_wide;
}
static inline size_t inet6_ifla6_size(void)
@@ -5537,6 +5553,7 @@ static inline size_t inet6_ifla6_size(void)
+ nla_total_size(ICMP6_MIB_MAX * 8) /* IFLA_INET6_ICMP6STATS */
+ nla_total_size(sizeof(struct in6_addr)) /* IFLA_INET6_TOKEN */
+ nla_total_size(1) /* IFLA_INET6_ADDR_GEN_MODE */
+ + nla_total_size(4) /* IFLA_INET6_RA_MTU */
+ 0;
}
@@ -5645,6 +5662,10 @@ static int inet6_fill_ifla6_attrs(struct sk_buff *skb, struct inet6_dev *idev,
if (nla_put_u8(skb, IFLA_INET6_ADDR_GEN_MODE, idev->cnf.addr_gen_mode))
goto nla_put_failure;
+ if (idev->ra_mtu &&
+ nla_put_u32(skb, IFLA_INET6_RA_MTU, idev->ra_mtu))
+ goto nla_put_failure;
+
return 0;
nla_put_failure:
@@ -5761,6 +5782,9 @@ update_lft:
static const struct nla_policy inet6_af_policy[IFLA_INET6_MAX + 1] = {
[IFLA_INET6_ADDR_GEN_MODE] = { .type = NLA_U8 },
[IFLA_INET6_TOKEN] = { .len = sizeof(struct in6_addr) },
+ [IFLA_INET6_RA_MTU] = { .type = NLA_REJECT,
+ .reject_message =
+ "IFLA_INET6_RA_MTU can not be set" },
};
static int check_addr_gen_mode(int mode)
@@ -5784,7 +5808,8 @@ static int check_stable_privacy(struct inet6_dev *idev, struct net *net,
}
static int inet6_validate_link_af(const struct net_device *dev,
- const struct nlattr *nla)
+ const struct nlattr *nla,
+ struct netlink_ext_ack *extack)
{
struct nlattr *tb[IFLA_INET6_MAX + 1];
struct inet6_dev *idev = NULL;
@@ -5797,7 +5822,7 @@ static int inet6_validate_link_af(const struct net_device *dev,
}
err = nla_parse_nested_deprecated(tb, IFLA_INET6_MAX, nla,
- inet6_af_policy, NULL);
+ inet6_af_policy, extack);
if (err)
return err;
@@ -6540,6 +6565,7 @@ static int addrconf_sysctl_disable_policy(struct ctl_table *ctl, int write,
static int minus_one = -1;
static const int two_five_five = 255;
+static u32 ioam6_if_id_max = U16_MAX;
static const struct ctl_table addrconf_sysctl[] = {
{
@@ -6903,10 +6929,10 @@ static const struct ctl_table addrconf_sysctl[] = {
.proc_handler = proc_dointvec,
},
{
- .procname = "addr_gen_mode",
- .data = &ipv6_devconf.addr_gen_mode,
- .maxlen = sizeof(int),
- .mode = 0644,
+ .procname = "addr_gen_mode",
+ .data = &ipv6_devconf.addr_gen_mode,
+ .maxlen = sizeof(int),
+ .mode = 0644,
.proc_handler = addrconf_sysctl_addr_gen_mode,
},
{
@@ -6933,6 +6959,31 @@ static const struct ctl_table addrconf_sysctl[] = {
.proc_handler = proc_dointvec,
},
{
+ .procname = "ioam6_enabled",
+ .data = &ipv6_devconf.ioam6_enabled,
+ .maxlen = sizeof(u8),
+ .mode = 0644,
+ .proc_handler = proc_dou8vec_minmax,
+ .extra1 = (void *)SYSCTL_ZERO,
+ .extra2 = (void *)SYSCTL_ONE,
+ },
+ {
+ .procname = "ioam6_id",
+ .data = &ipv6_devconf.ioam6_id,
+ .maxlen = sizeof(u32),
+ .mode = 0644,
+ .proc_handler = proc_douintvec_minmax,
+ .extra1 = (void *)SYSCTL_ZERO,
+ .extra2 = (void *)&ioam6_if_id_max,
+ },
+ {
+ .procname = "ioam6_id_wide",
+ .data = &ipv6_devconf.ioam6_id_wide,
+ .maxlen = sizeof(u32),
+ .mode = 0644,
+ .proc_handler = proc_douintvec,
+ },
+ {
/* sentinel */
}
};
diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c
index 2389ff702f51..b5878bb8e419 100644
--- a/net/ipv6/af_inet6.c
+++ b/net/ipv6/af_inet6.c
@@ -62,6 +62,7 @@
#include <net/rpl.h>
#include <net/compat.h>
#include <net/xfrm.h>
+#include <net/ioam6.h>
#include <linux/uaccess.h>
#include <linux/mroute6.h>
@@ -454,7 +455,7 @@ int inet6_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
* changes context in a wrong way it will be caught.
*/
err = BPF_CGROUP_RUN_PROG_INET_BIND_LOCK(sk, uaddr,
- BPF_CGROUP_INET6_BIND, &flags);
+ CGROUP_INET6_BIND, &flags);
if (err)
return err;
@@ -531,7 +532,7 @@ int inet6_getname(struct socket *sock, struct sockaddr *uaddr,
if (np->sndflow)
sin->sin6_flowinfo = np->flow_label;
BPF_CGROUP_RUN_SA_PROG_LOCK(sk, (struct sockaddr *)sin,
- BPF_CGROUP_INET6_GETPEERNAME,
+ CGROUP_INET6_GETPEERNAME,
NULL);
} else {
if (ipv6_addr_any(&sk->sk_v6_rcv_saddr))
@@ -540,7 +541,7 @@ int inet6_getname(struct socket *sock, struct sockaddr *uaddr,
sin->sin6_addr = sk->sk_v6_rcv_saddr;
sin->sin6_port = inet->inet_sport;
BPF_CGROUP_RUN_SA_PROG_LOCK(sk, (struct sockaddr *)sin,
- BPF_CGROUP_INET6_GETSOCKNAME,
+ CGROUP_INET6_GETSOCKNAME,
NULL);
}
sin->sin6_scope_id = ipv6_iface_scope_id(&sin->sin6_addr,
@@ -961,6 +962,9 @@ static int __net_init inet6_net_init(struct net *net)
net->ipv6.sysctl.fib_notify_on_flag_change = 0;
atomic_set(&net->ipv6.fib6_sernum, 1);
+ net->ipv6.sysctl.ioam6_id = IOAM6_DEFAULT_ID;
+ net->ipv6.sysctl.ioam6_id_wide = IOAM6_DEFAULT_ID_WIDE;
+
err = ipv6_init_mibs(net);
if (err)
return err;
@@ -1191,6 +1195,10 @@ static int __init inet6_init(void)
if (err)
goto rpl_fail;
+ err = ioam6_init();
+ if (err)
+ goto ioam6_fail;
+
err = igmp6_late_init();
if (err)
goto igmp6_late_err;
@@ -1213,6 +1221,8 @@ sysctl_fail:
igmp6_late_cleanup();
#endif
igmp6_late_err:
+ ioam6_exit();
+ioam6_fail:
rpl_exit();
rpl_fail:
seg6_exit();
diff --git a/net/ipv6/ah6.c b/net/ipv6/ah6.c
index 20d492da725a..828e62514260 100644
--- a/net/ipv6/ah6.c
+++ b/net/ipv6/ah6.c
@@ -755,7 +755,6 @@ static int ah6_rcv_cb(struct sk_buff *skb, int err)
}
static const struct xfrm_type ah6_type = {
- .description = "AH6",
.owner = THIS_MODULE,
.proto = IPPROTO_AH,
.flags = XFRM_TYPE_REPLAY_PROT,
@@ -763,7 +762,6 @@ static const struct xfrm_type ah6_type = {
.destructor = ah6_destroy,
.input = ah6_input,
.output = ah6_output,
- .hdr_offset = xfrm6_find_1stfragopt,
};
static struct xfrm6_protocol ah6_protocol = {
diff --git a/net/ipv6/esp6.c b/net/ipv6/esp6.c
index 393ae2b78e7d..ed2f061b8768 100644
--- a/net/ipv6/esp6.c
+++ b/net/ipv6/esp6.c
@@ -708,7 +708,7 @@ static int esp6_output(struct xfrm_state *x, struct sk_buff *skb)
struct xfrm_dst *dst = (struct xfrm_dst *)skb_dst(skb);
u32 padto;
- padto = min(x->tfcpad, xfrm_state_mtu(x, dst->child_mtu_cached));
+ padto = min(x->tfcpad, __xfrm_state_mtu(x, dst->child_mtu_cached));
if (skb->len < padto)
esp.tfclen = padto - skb->len;
}
@@ -1243,7 +1243,6 @@ static int esp6_rcv_cb(struct sk_buff *skb, int err)
}
static const struct xfrm_type esp6_type = {
- .description = "ESP6",
.owner = THIS_MODULE,
.proto = IPPROTO_ESP,
.flags = XFRM_TYPE_REPLAY_PROT,
@@ -1251,7 +1250,6 @@ static const struct xfrm_type esp6_type = {
.destructor = esp6_destroy,
.input = esp6_input,
.output = esp6_output,
- .hdr_offset = xfrm6_find_1stfragopt,
};
static struct xfrm6_protocol esp6_protocol = {
diff --git a/net/ipv6/esp6_offload.c b/net/ipv6/esp6_offload.c
index 40ed4fcf1cf4..a349d4798077 100644
--- a/net/ipv6/esp6_offload.c
+++ b/net/ipv6/esp6_offload.c
@@ -377,7 +377,6 @@ static const struct net_offload esp6_offload = {
};
static const struct xfrm_type_offload esp6_type_offload = {
- .description = "ESP6 OFFLOAD",
.owner = THIS_MODULE,
.proto = IPPROTO_ESP,
.input_tail = esp6_input_tail,
diff --git a/net/ipv6/exthdrs.c b/net/ipv6/exthdrs.c
index 56e479d158b7..3a871a09f962 100644
--- a/net/ipv6/exthdrs.c
+++ b/net/ipv6/exthdrs.c
@@ -49,22 +49,12 @@
#include <net/seg6_hmac.h>
#endif
#include <net/rpl.h>
+#include <linux/ioam6.h>
+#include <net/ioam6.h>
+#include <net/dst_metadata.h>
#include <linux/uaccess.h>
-/*
- * Parsing tlv encoded headers.
- *
- * Parsing function "func" returns true, if parsing succeed
- * and false, if it failed.
- * It MUST NOT touch skb->h.
- */
-
-struct tlvtype_proc {
- int type;
- bool (*func)(struct sk_buff *skb, int offset);
-};
-
/*********************
Generic functions
*********************/
@@ -109,16 +99,23 @@ drop:
return false;
}
+static bool ipv6_hop_ra(struct sk_buff *skb, int optoff);
+static bool ipv6_hop_ioam(struct sk_buff *skb, int optoff);
+static bool ipv6_hop_jumbo(struct sk_buff *skb, int optoff);
+static bool ipv6_hop_calipso(struct sk_buff *skb, int optoff);
+#if IS_ENABLED(CONFIG_IPV6_MIP6)
+static bool ipv6_dest_hao(struct sk_buff *skb, int optoff);
+#endif
+
/* Parse tlv encoded option header (hop-by-hop or destination) */
-static bool ip6_parse_tlv(const struct tlvtype_proc *procs,
+static bool ip6_parse_tlv(bool hopbyhop,
struct sk_buff *skb,
int max_count)
{
int len = (skb_transport_header(skb)[1] + 1) << 3;
const unsigned char *nh = skb_network_header(skb);
int off = skb_network_header_len(skb);
- const struct tlvtype_proc *curr;
bool disallow_unknowns = false;
int tlv_count = 0;
int padlen = 0;
@@ -135,18 +132,23 @@ static bool ip6_parse_tlv(const struct tlvtype_proc *procs,
len -= 2;
while (len > 0) {
- int optlen = nh[off + 1] + 2;
- int i;
+ int optlen, i;
- switch (nh[off]) {
- case IPV6_TLV_PAD1:
- optlen = 1;
+ if (nh[off] == IPV6_TLV_PAD1) {
padlen++;
if (padlen > 7)
goto bad;
- break;
+ off++;
+ len--;
+ continue;
+ }
+ if (len < 2)
+ goto bad;
+ optlen = nh[off + 1] + 2;
+ if (optlen > len)
+ goto bad;
- case IPV6_TLV_PADN:
+ if (nh[off] == IPV6_TLV_PADN) {
/* RFC 2460 states that the purpose of PadN is
* to align the containing header to multiples
* of 8. 7 is therefore the highest valid value.
@@ -163,32 +165,51 @@ static bool ip6_parse_tlv(const struct tlvtype_proc *procs,
if (nh[off + i] != 0)
goto bad;
}
- break;
-
- default: /* Other TLV code so scan list */
- if (optlen > len)
- goto bad;
-
+ } else {
tlv_count++;
if (tlv_count > max_count)
goto bad;
- for (curr = procs; curr->type >= 0; curr++) {
- if (curr->type == nh[off]) {
- /* type specific length/alignment
- checks will be performed in the
- func(). */
- if (curr->func(skb, off) == false)
+ if (hopbyhop) {
+ switch (nh[off]) {
+ case IPV6_TLV_ROUTERALERT:
+ if (!ipv6_hop_ra(skb, off))
+ return false;
+ break;
+ case IPV6_TLV_IOAM:
+ if (!ipv6_hop_ioam(skb, off))
+ return false;
+ break;
+ case IPV6_TLV_JUMBO:
+ if (!ipv6_hop_jumbo(skb, off))
+ return false;
+ break;
+ case IPV6_TLV_CALIPSO:
+ if (!ipv6_hop_calipso(skb, off))
+ return false;
+ break;
+ default:
+ if (!ip6_tlvopt_unknown(skb, off,
+ disallow_unknowns))
+ return false;
+ break;
+ }
+ } else {
+ switch (nh[off]) {
+#if IS_ENABLED(CONFIG_IPV6_MIP6)
+ case IPV6_TLV_HAO:
+ if (!ipv6_dest_hao(skb, off))
+ return false;
+ break;
+#endif
+ default:
+ if (!ip6_tlvopt_unknown(skb, off,
+ disallow_unknowns))
return false;
break;
}
}
- if (curr->type < 0 &&
- !ip6_tlvopt_unknown(skb, off, disallow_unknowns))
- return false;
-
padlen = 0;
- break;
}
off += optlen;
len -= optlen;
@@ -265,16 +286,6 @@ static bool ipv6_dest_hao(struct sk_buff *skb, int optoff)
}
#endif
-static const struct tlvtype_proc tlvprocdestopt_lst[] = {
-#if IS_ENABLED(CONFIG_IPV6_MIP6)
- {
- .type = IPV6_TLV_HAO,
- .func = ipv6_dest_hao,
- },
-#endif
- {-1, NULL}
-};
-
static int ipv6_destopt_rcv(struct sk_buff *skb)
{
struct inet6_dev *idev = __in6_dev_get(skb->dev);
@@ -305,8 +316,7 @@ fail_and_free:
dstbuf = opt->dst1;
#endif
- if (ip6_parse_tlv(tlvprocdestopt_lst, skb,
- init_net.ipv6.sysctl.max_dst_opts_cnt)) {
+ if (ip6_parse_tlv(false, skb, net->ipv6.sysctl.max_dst_opts_cnt)) {
skb->transport_header += extlen;
opt = IP6CB(skb);
#if IS_ENABLED(CONFIG_IPV6_MIP6)
@@ -929,6 +939,60 @@ static bool ipv6_hop_ra(struct sk_buff *skb, int optoff)
return false;
}
+/* IOAM */
+
+static bool ipv6_hop_ioam(struct sk_buff *skb, int optoff)
+{
+ struct ioam6_trace_hdr *trace;
+ struct ioam6_namespace *ns;
+ struct ioam6_hdr *hdr;
+
+ /* Bad alignment (must be 4n-aligned) */
+ if (optoff & 3)
+ goto drop;
+
+ /* Ignore if IOAM is not enabled on ingress */
+ if (!__in6_dev_get(skb->dev)->cnf.ioam6_enabled)
+ goto ignore;
+
+ /* Truncated Option header */
+ hdr = (struct ioam6_hdr *)(skb_network_header(skb) + optoff);
+ if (hdr->opt_len < 2)
+ goto drop;
+
+ switch (hdr->type) {
+ case IOAM6_TYPE_PREALLOC:
+ /* Truncated Pre-allocated Trace header */
+ if (hdr->opt_len < 2 + sizeof(*trace))
+ goto drop;
+
+ /* Malformed Pre-allocated Trace header */
+ trace = (struct ioam6_trace_hdr *)((u8 *)hdr + sizeof(*hdr));
+ if (hdr->opt_len < 2 + sizeof(*trace) + trace->remlen * 4)
+ goto drop;
+
+ /* Ignore if the IOAM namespace is unknown */
+ ns = ioam6_namespace(ipv6_skb_net(skb), trace->namespace_id);
+ if (!ns)
+ goto ignore;
+
+ if (!skb_valid_dst(skb))
+ ip6_route_input(skb);
+
+ ioam6_fill_trace_data(skb, ns, trace);
+ break;
+ default:
+ break;
+ }
+
+ignore:
+ return true;
+
+drop:
+ kfree_skb(skb);
+ return false;
+}
+
/* Jumbo payload */
static bool ipv6_hop_jumbo(struct sk_buff *skb, int optoff)
@@ -995,22 +1059,6 @@ drop:
return false;
}
-static const struct tlvtype_proc tlvprochopopt_lst[] = {
- {
- .type = IPV6_TLV_ROUTERALERT,
- .func = ipv6_hop_ra,
- },
- {
- .type = IPV6_TLV_JUMBO,
- .func = ipv6_hop_jumbo,
- },
- {
- .type = IPV6_TLV_CALIPSO,
- .func = ipv6_hop_calipso,
- },
- { -1, }
-};
-
int ipv6_parse_hopopts(struct sk_buff *skb)
{
struct inet6_skb_parm *opt = IP6CB(skb);
@@ -1036,8 +1084,7 @@ fail_and_free:
goto fail_and_free;
opt->flags |= IP6SKB_HOPBYHOP;
- if (ip6_parse_tlv(tlvprochopopt_lst, skb,
- init_net.ipv6.sysctl.max_hbh_opts_cnt)) {
+ if (ip6_parse_tlv(true, skb, net->ipv6.sysctl.max_hbh_opts_cnt)) {
skb->transport_header += extlen;
opt = IP6CB(skb);
opt->nhoff = sizeof(struct ipv6hdr);
diff --git a/net/ipv6/fib6_rules.c b/net/ipv6/fib6_rules.c
index 8f9a83314de7..40f3e4f9f33a 100644
--- a/net/ipv6/fib6_rules.c
+++ b/net/ipv6/fib6_rules.c
@@ -467,7 +467,7 @@ static const struct fib_rules_ops __net_initconst fib6_rules_ops_template = {
static int __net_init fib6_rules_net_init(struct net *net)
{
struct fib_rules_ops *ops;
- int err = -ENOMEM;
+ int err;
ops = fib_rules_register(&fib6_rules_ops_template, net);
if (IS_ERR(ops))
diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c
index e8398ffb5e35..a7c31ab67c5d 100644
--- a/net/ipv6/icmp.c
+++ b/net/ipv6/icmp.c
@@ -725,6 +725,7 @@ static void icmpv6_echo_reply(struct sk_buff *skb)
struct ipcm6_cookie ipc6;
u32 mark = IP6_REPLY_MARK(net, skb->mark);
bool acast;
+ u8 type;
if (ipv6_addr_is_multicast(&ipv6_hdr(skb)->daddr) &&
net->ipv6.sysctl.icmpv6_echo_ignore_multicast)
@@ -740,8 +741,13 @@ static void icmpv6_echo_reply(struct sk_buff *skb)
!(net->ipv6.sysctl.anycast_src_echo_reply && acast))
saddr = NULL;
+ if (icmph->icmp6_type == ICMPV6_EXT_ECHO_REQUEST)
+ type = ICMPV6_EXT_ECHO_REPLY;
+ else
+ type = ICMPV6_ECHO_REPLY;
+
memcpy(&tmp_hdr, icmph, sizeof(tmp_hdr));
- tmp_hdr.icmp6_type = ICMPV6_ECHO_REPLY;
+ tmp_hdr.icmp6_type = type;
memset(&fl6, 0, sizeof(fl6));
if (net->ipv6.sysctl.flowlabel_reflect & FLOWLABEL_REFLECT_ICMPV6_ECHO_REPLIES)
@@ -752,7 +758,7 @@ static void icmpv6_echo_reply(struct sk_buff *skb)
if (saddr)
fl6.saddr = *saddr;
fl6.flowi6_oif = icmp6_iif(skb);
- fl6.fl6_icmp_type = ICMPV6_ECHO_REPLY;
+ fl6.fl6_icmp_type = type;
fl6.flowi6_mark = mark;
fl6.flowi6_uid = sock_net_uid(net, NULL);
security_skb_classify_flow(skb, flowi6_to_flowi_common(&fl6));
@@ -783,13 +789,17 @@ static void icmpv6_echo_reply(struct sk_buff *skb)
msg.skb = skb;
msg.offset = 0;
- msg.type = ICMPV6_ECHO_REPLY;
+ msg.type = type;
ipcm6_init_sk(&ipc6, np);
ipc6.hlimit = ip6_sk_dst_hoplimit(np, &fl6, dst);
ipc6.tclass = ipv6_get_dsfield(ipv6_hdr(skb));
ipc6.sockc.mark = mark;
+ if (icmph->icmp6_type == ICMPV6_EXT_ECHO_REQUEST)
+ if (!icmp_build_probe(skb, (struct icmphdr *)&tmp_hdr))
+ goto out_dst_release;
+
if (ip6_append_data(sk, icmpv6_getfrag, &msg,
skb->len + sizeof(struct icmp6hdr),
sizeof(struct icmp6hdr), &ipc6, &fl6,
@@ -911,6 +921,11 @@ static int icmpv6_rcv(struct sk_buff *skb)
if (!net->ipv6.sysctl.icmpv6_echo_ignore_all)
icmpv6_echo_reply(skb);
break;
+ case ICMPV6_EXT_ECHO_REQUEST:
+ if (!net->ipv6.sysctl.icmpv6_echo_ignore_all &&
+ net->ipv4.sysctl_icmp_echo_enable_probe)
+ icmpv6_echo_reply(skb);
+ break;
case ICMPV6_ECHO_REPLY:
success = ping_rcv(skb);
diff --git a/net/ipv6/ioam6.c b/net/ipv6/ioam6.c
new file mode 100644
index 000000000000..5e8961004832
--- /dev/null
+++ b/net/ipv6/ioam6.c
@@ -0,0 +1,910 @@
+// SPDX-License-Identifier: GPL-2.0+
+/*
+ * IPv6 IOAM implementation
+ *
+ * Author:
+ * Justin Iurman <justin.iurman@uliege.be>
+ */
+
+#include <linux/errno.h>
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/net.h>
+#include <linux/ioam6.h>
+#include <linux/ioam6_genl.h>
+#include <linux/rhashtable.h>
+
+#include <net/addrconf.h>
+#include <net/genetlink.h>
+#include <net/ioam6.h>
+
+static void ioam6_ns_release(struct ioam6_namespace *ns)
+{
+ kfree_rcu(ns, rcu);
+}
+
+static void ioam6_sc_release(struct ioam6_schema *sc)
+{
+ kfree_rcu(sc, rcu);
+}
+
+static void ioam6_free_ns(void *ptr, void *arg)
+{
+ struct ioam6_namespace *ns = (struct ioam6_namespace *)ptr;
+
+ if (ns)
+ ioam6_ns_release(ns);
+}
+
+static void ioam6_free_sc(void *ptr, void *arg)
+{
+ struct ioam6_schema *sc = (struct ioam6_schema *)ptr;
+
+ if (sc)
+ ioam6_sc_release(sc);
+}
+
+static int ioam6_ns_cmpfn(struct rhashtable_compare_arg *arg, const void *obj)
+{
+ const struct ioam6_namespace *ns = obj;
+
+ return (ns->id != *(__be16 *)arg->key);
+}
+
+static int ioam6_sc_cmpfn(struct rhashtable_compare_arg *arg, const void *obj)
+{
+ const struct ioam6_schema *sc = obj;
+
+ return (sc->id != *(u32 *)arg->key);
+}
+
+static const struct rhashtable_params rht_ns_params = {
+ .key_len = sizeof(__be16),
+ .key_offset = offsetof(struct ioam6_namespace, id),
+ .head_offset = offsetof(struct ioam6_namespace, head),
+ .automatic_shrinking = true,
+ .obj_cmpfn = ioam6_ns_cmpfn,
+};
+
+static const struct rhashtable_params rht_sc_params = {
+ .key_len = sizeof(u32),
+ .key_offset = offsetof(struct ioam6_schema, id),
+ .head_offset = offsetof(struct ioam6_schema, head),
+ .automatic_shrinking = true,
+ .obj_cmpfn = ioam6_sc_cmpfn,
+};
+
+static struct genl_family ioam6_genl_family;
+
+static const struct nla_policy ioam6_genl_policy_addns[] = {
+ [IOAM6_ATTR_NS_ID] = { .type = NLA_U16 },
+ [IOAM6_ATTR_NS_DATA] = { .type = NLA_U32 },
+ [IOAM6_ATTR_NS_DATA_WIDE] = { .type = NLA_U64 },
+};
+
+static const struct nla_policy ioam6_genl_policy_delns[] = {
+ [IOAM6_ATTR_NS_ID] = { .type = NLA_U16 },
+};
+
+static const struct nla_policy ioam6_genl_policy_addsc[] = {
+ [IOAM6_ATTR_SC_ID] = { .type = NLA_U32 },
+ [IOAM6_ATTR_SC_DATA] = { .type = NLA_BINARY,
+ .len = IOAM6_MAX_SCHEMA_DATA_LEN },
+};
+
+static const struct nla_policy ioam6_genl_policy_delsc[] = {
+ [IOAM6_ATTR_SC_ID] = { .type = NLA_U32 },
+};
+
+static const struct nla_policy ioam6_genl_policy_ns_sc[] = {
+ [IOAM6_ATTR_NS_ID] = { .type = NLA_U16 },
+ [IOAM6_ATTR_SC_ID] = { .type = NLA_U32 },
+ [IOAM6_ATTR_SC_NONE] = { .type = NLA_FLAG },
+};
+
+static int ioam6_genl_addns(struct sk_buff *skb, struct genl_info *info)
+{
+ struct ioam6_pernet_data *nsdata;
+ struct ioam6_namespace *ns;
+ u64 data64;
+ u32 data32;
+ __be16 id;
+ int err;
+
+ if (!info->attrs[IOAM6_ATTR_NS_ID])
+ return -EINVAL;
+
+ id = cpu_to_be16(nla_get_u16(info->attrs[IOAM6_ATTR_NS_ID]));
+ nsdata = ioam6_pernet(genl_info_net(info));
+
+ mutex_lock(&nsdata->lock);
+
+ ns = rhashtable_lookup_fast(&nsdata->namespaces, &id, rht_ns_params);
+ if (ns) {
+ err = -EEXIST;
+ goto out_unlock;
+ }
+
+ ns = kzalloc(sizeof(*ns), GFP_KERNEL);
+ if (!ns) {
+ err = -ENOMEM;
+ goto out_unlock;
+ }
+
+ ns->id = id;
+
+ if (!info->attrs[IOAM6_ATTR_NS_DATA])
+ data32 = IOAM6_U32_UNAVAILABLE;
+ else
+ data32 = nla_get_u32(info->attrs[IOAM6_ATTR_NS_DATA]);
+
+ if (!info->attrs[IOAM6_ATTR_NS_DATA_WIDE])
+ data64 = IOAM6_U64_UNAVAILABLE;
+ else
+ data64 = nla_get_u64(info->attrs[IOAM6_ATTR_NS_DATA_WIDE]);
+
+ ns->data = cpu_to_be32(data32);
+ ns->data_wide = cpu_to_be64(data64);
+
+ err = rhashtable_lookup_insert_fast(&nsdata->namespaces, &ns->head,
+ rht_ns_params);
+ if (err)
+ kfree(ns);
+
+out_unlock:
+ mutex_unlock(&nsdata->lock);
+ return err;
+}
+
+static int ioam6_genl_delns(struct sk_buff *skb, struct genl_info *info)
+{
+ struct ioam6_pernet_data *nsdata;
+ struct ioam6_namespace *ns;
+ struct ioam6_schema *sc;
+ __be16 id;
+ int err;
+
+ if (!info->attrs[IOAM6_ATTR_NS_ID])
+ return -EINVAL;
+
+ id = cpu_to_be16(nla_get_u16(info->attrs[IOAM6_ATTR_NS_ID]));
+ nsdata = ioam6_pernet(genl_info_net(info));
+
+ mutex_lock(&nsdata->lock);
+
+ ns = rhashtable_lookup_fast(&nsdata->namespaces, &id, rht_ns_params);
+ if (!ns) {
+ err = -ENOENT;
+ goto out_unlock;
+ }
+
+ sc = rcu_dereference_protected(ns->schema,
+ lockdep_is_held(&nsdata->lock));
+
+ err = rhashtable_remove_fast(&nsdata->namespaces, &ns->head,
+ rht_ns_params);
+ if (err)
+ goto out_unlock;
+
+ if (sc)
+ rcu_assign_pointer(sc->ns, NULL);
+
+ ioam6_ns_release(ns);
+
+out_unlock:
+ mutex_unlock(&nsdata->lock);
+ return err;
+}
+
+static int __ioam6_genl_dumpns_element(struct ioam6_namespace *ns,
+ u32 portid,
+ u32 seq,
+ u32 flags,
+ struct sk_buff *skb,
+ u8 cmd)
+{
+ struct ioam6_schema *sc;
+ u64 data64;
+ u32 data32;
+ void *hdr;
+
+ hdr = genlmsg_put(skb, portid, seq, &ioam6_genl_family, flags, cmd);
+ if (!hdr)
+ return -ENOMEM;
+
+ data32 = be32_to_cpu(ns->data);
+ data64 = be64_to_cpu(ns->data_wide);
+
+ if (nla_put_u16(skb, IOAM6_ATTR_NS_ID, be16_to_cpu(ns->id)) ||
+ (data32 != IOAM6_U32_UNAVAILABLE &&
+ nla_put_u32(skb, IOAM6_ATTR_NS_DATA, data32)) ||
+ (data64 != IOAM6_U64_UNAVAILABLE &&
+ nla_put_u64_64bit(skb, IOAM6_ATTR_NS_DATA_WIDE,
+ data64, IOAM6_ATTR_PAD)))
+ goto nla_put_failure;
+
+ rcu_read_lock();
+
+ sc = rcu_dereference(ns->schema);
+ if (sc && nla_put_u32(skb, IOAM6_ATTR_SC_ID, sc->id)) {
+ rcu_read_unlock();
+ goto nla_put_failure;
+ }
+
+ rcu_read_unlock();
+
+ genlmsg_end(skb, hdr);
+ return 0;
+
+nla_put_failure:
+ genlmsg_cancel(skb, hdr);
+ return -EMSGSIZE;
+}
+
+static int ioam6_genl_dumpns_start(struct netlink_callback *cb)
+{
+ struct ioam6_pernet_data *nsdata = ioam6_pernet(sock_net(cb->skb->sk));
+ struct rhashtable_iter *iter = (struct rhashtable_iter *)cb->args[0];
+
+ if (!iter) {
+ iter = kmalloc(sizeof(*iter), GFP_KERNEL);
+ if (!iter)
+ return -ENOMEM;
+
+ cb->args[0] = (long)iter;
+ }
+
+ rhashtable_walk_enter(&nsdata->namespaces, iter);
+
+ return 0;
+}
+
+static int ioam6_genl_dumpns_done(struct netlink_callback *cb)
+{
+ struct rhashtable_iter *iter = (struct rhashtable_iter *)cb->args[0];
+
+ rhashtable_walk_exit(iter);
+ kfree(iter);
+
+ return 0;
+}
+
+static int ioam6_genl_dumpns(struct sk_buff *skb, struct netlink_callback *cb)
+{
+ struct rhashtable_iter *iter;
+ struct ioam6_namespace *ns;
+ int err;
+
+ iter = (struct rhashtable_iter *)cb->args[0];
+ rhashtable_walk_start(iter);
+
+ for (;;) {
+ ns = rhashtable_walk_next(iter);
+
+ if (IS_ERR(ns)) {
+ if (PTR_ERR(ns) == -EAGAIN)
+ continue;
+ err = PTR_ERR(ns);
+ goto done;
+ } else if (!ns) {
+ break;
+ }
+
+ err = __ioam6_genl_dumpns_element(ns,
+ NETLINK_CB(cb->skb).portid,
+ cb->nlh->nlmsg_seq,
+ NLM_F_MULTI,
+ skb,
+ IOAM6_CMD_DUMP_NAMESPACES);
+ if (err)
+ goto done;
+ }
+
+ err = skb->len;
+
+done:
+ rhashtable_walk_stop(iter);
+ return err;
+}
+
+static int ioam6_genl_addsc(struct sk_buff *skb, struct genl_info *info)
+{
+ struct ioam6_pernet_data *nsdata;
+ int len, len_aligned, err;
+ struct ioam6_schema *sc;
+ u32 id;
+
+ if (!info->attrs[IOAM6_ATTR_SC_ID] || !info->attrs[IOAM6_ATTR_SC_DATA])
+ return -EINVAL;
+
+ id = nla_get_u32(info->attrs[IOAM6_ATTR_SC_ID]);
+ nsdata = ioam6_pernet(genl_info_net(info));
+
+ mutex_lock(&nsdata->lock);
+
+ sc = rhashtable_lookup_fast(&nsdata->schemas, &id, rht_sc_params);
+ if (sc) {
+ err = -EEXIST;
+ goto out_unlock;
+ }
+
+ len = nla_len(info->attrs[IOAM6_ATTR_SC_DATA]);
+ len_aligned = ALIGN(len, 4);
+
+ sc = kzalloc(sizeof(*sc) + len_aligned, GFP_KERNEL);
+ if (!sc) {
+ err = -ENOMEM;
+ goto out_unlock;
+ }
+
+ sc->id = id;
+ sc->len = len_aligned;
+ sc->hdr = cpu_to_be32(sc->id | ((u8)(sc->len / 4) << 24));
+ nla_memcpy(sc->data, info->attrs[IOAM6_ATTR_SC_DATA], len);
+
+ err = rhashtable_lookup_insert_fast(&nsdata->schemas, &sc->head,
+ rht_sc_params);
+ if (err)
+ goto free_sc;
+
+out_unlock:
+ mutex_unlock(&nsdata->lock);
+ return err;
+free_sc:
+ kfree(sc);
+ goto out_unlock;
+}
+
+static int ioam6_genl_delsc(struct sk_buff *skb, struct genl_info *info)
+{
+ struct ioam6_pernet_data *nsdata;
+ struct ioam6_namespace *ns;
+ struct ioam6_schema *sc;
+ int err;
+ u32 id;
+
+ if (!info->attrs[IOAM6_ATTR_SC_ID])
+ return -EINVAL;
+
+ id = nla_get_u32(info->attrs[IOAM6_ATTR_SC_ID]);
+ nsdata = ioam6_pernet(genl_info_net(info));
+
+ mutex_lock(&nsdata->lock);
+
+ sc = rhashtable_lookup_fast(&nsdata->schemas, &id, rht_sc_params);
+ if (!sc) {
+ err = -ENOENT;
+ goto out_unlock;
+ }
+
+ ns = rcu_dereference_protected(sc->ns, lockdep_is_held(&nsdata->lock));
+
+ err = rhashtable_remove_fast(&nsdata->schemas, &sc->head,
+ rht_sc_params);
+ if (err)
+ goto out_unlock;
+
+ if (ns)
+ rcu_assign_pointer(ns->schema, NULL);
+
+ ioam6_sc_release(sc);
+
+out_unlock:
+ mutex_unlock(&nsdata->lock);
+ return err;
+}
+
+static int __ioam6_genl_dumpsc_element(struct ioam6_schema *sc,
+ u32 portid, u32 seq, u32 flags,
+ struct sk_buff *skb, u8 cmd)
+{
+ struct ioam6_namespace *ns;
+ void *hdr;
+
+ hdr = genlmsg_put(skb, portid, seq, &ioam6_genl_family, flags, cmd);
+ if (!hdr)
+ return -ENOMEM;
+
+ if (nla_put_u32(skb, IOAM6_ATTR_SC_ID, sc->id) ||
+ nla_put(skb, IOAM6_ATTR_SC_DATA, sc->len, sc->data))
+ goto nla_put_failure;
+
+ rcu_read_lock();
+
+ ns = rcu_dereference(sc->ns);
+ if (ns && nla_put_u16(skb, IOAM6_ATTR_NS_ID, be16_to_cpu(ns->id))) {
+ rcu_read_unlock();
+ goto nla_put_failure;
+ }
+
+ rcu_read_unlock();
+
+ genlmsg_end(skb, hdr);
+ return 0;
+
+nla_put_failure:
+ genlmsg_cancel(skb, hdr);
+ return -EMSGSIZE;
+}
+
+static int ioam6_genl_dumpsc_start(struct netlink_callback *cb)
+{
+ struct ioam6_pernet_data *nsdata = ioam6_pernet(sock_net(cb->skb->sk));
+ struct rhashtable_iter *iter = (struct rhashtable_iter *)cb->args[0];
+
+ if (!iter) {
+ iter = kmalloc(sizeof(*iter), GFP_KERNEL);
+ if (!iter)
+ return -ENOMEM;
+
+ cb->args[0] = (long)iter;
+ }
+
+ rhashtable_walk_enter(&nsdata->schemas, iter);
+
+ return 0;
+}
+
+static int ioam6_genl_dumpsc_done(struct netlink_callback *cb)
+{
+ struct rhashtable_iter *iter = (struct rhashtable_iter *)cb->args[0];
+
+ rhashtable_walk_exit(iter);
+ kfree(iter);
+
+ return 0;
+}
+
+static int ioam6_genl_dumpsc(struct sk_buff *skb, struct netlink_callback *cb)
+{
+ struct rhashtable_iter *iter;
+ struct ioam6_schema *sc;
+ int err;
+
+ iter = (struct rhashtable_iter *)cb->args[0];
+ rhashtable_walk_start(iter);
+
+ for (;;) {
+ sc = rhashtable_walk_next(iter);
+
+ if (IS_ERR(sc)) {
+ if (PTR_ERR(sc) == -EAGAIN)
+ continue;
+ err = PTR_ERR(sc);
+ goto done;
+ } else if (!sc) {
+ break;
+ }
+
+ err = __ioam6_genl_dumpsc_element(sc,
+ NETLINK_CB(cb->skb).portid,
+ cb->nlh->nlmsg_seq,
+ NLM_F_MULTI,
+ skb,
+ IOAM6_CMD_DUMP_SCHEMAS);
+ if (err)
+ goto done;
+ }
+
+ err = skb->len;
+
+done:
+ rhashtable_walk_stop(iter);
+ return err;
+}
+
+static int ioam6_genl_ns_set_schema(struct sk_buff *skb, struct genl_info *info)
+{
+ struct ioam6_namespace *ns, *ns_ref;
+ struct ioam6_schema *sc, *sc_ref;
+ struct ioam6_pernet_data *nsdata;
+ __be16 ns_id;
+ u32 sc_id;
+ int err;
+
+ if (!info->attrs[IOAM6_ATTR_NS_ID] ||
+ (!info->attrs[IOAM6_ATTR_SC_ID] &&
+ !info->attrs[IOAM6_ATTR_SC_NONE]))
+ return -EINVAL;
+
+ ns_id = cpu_to_be16(nla_get_u16(info->attrs[IOAM6_ATTR_NS_ID]));
+ nsdata = ioam6_pernet(genl_info_net(info));
+
+ mutex_lock(&nsdata->lock);
+
+ ns = rhashtable_lookup_fast(&nsdata->namespaces, &ns_id, rht_ns_params);
+ if (!ns) {
+ err = -ENOENT;
+ goto out_unlock;
+ }
+
+ if (info->attrs[IOAM6_ATTR_SC_NONE]) {
+ sc = NULL;
+ } else {
+ sc_id = nla_get_u32(info->attrs[IOAM6_ATTR_SC_ID]);
+ sc = rhashtable_lookup_fast(&nsdata->schemas, &sc_id,
+ rht_sc_params);
+ if (!sc) {
+ err = -ENOENT;
+ goto out_unlock;
+ }
+ }
+
+ sc_ref = rcu_dereference_protected(ns->schema,
+ lockdep_is_held(&nsdata->lock));
+ if (sc_ref)
+ rcu_assign_pointer(sc_ref->ns, NULL);
+ rcu_assign_pointer(ns->schema, sc);
+
+ if (sc) {
+ ns_ref = rcu_dereference_protected(sc->ns,
+ lockdep_is_held(&nsdata->lock));
+ if (ns_ref)
+ rcu_assign_pointer(ns_ref->schema, NULL);
+ rcu_assign_pointer(sc->ns, ns);
+ }
+
+ err = 0;
+
+out_unlock:
+ mutex_unlock(&nsdata->lock);
+ return err;
+}
+
+static const struct genl_ops ioam6_genl_ops[] = {
+ {
+ .cmd = IOAM6_CMD_ADD_NAMESPACE,
+ .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
+ .doit = ioam6_genl_addns,
+ .flags = GENL_ADMIN_PERM,
+ .policy = ioam6_genl_policy_addns,
+ .maxattr = ARRAY_SIZE(ioam6_genl_policy_addns) - 1,
+ },
+ {
+ .cmd = IOAM6_CMD_DEL_NAMESPACE,
+ .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
+ .doit = ioam6_genl_delns,
+ .flags = GENL_ADMIN_PERM,
+ .policy = ioam6_genl_policy_delns,
+ .maxattr = ARRAY_SIZE(ioam6_genl_policy_delns) - 1,
+ },
+ {
+ .cmd = IOAM6_CMD_DUMP_NAMESPACES,
+ .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
+ .start = ioam6_genl_dumpns_start,
+ .dumpit = ioam6_genl_dumpns,
+ .done = ioam6_genl_dumpns_done,
+ .flags = GENL_ADMIN_PERM,
+ },
+ {
+ .cmd = IOAM6_CMD_ADD_SCHEMA,
+ .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
+ .doit = ioam6_genl_addsc,
+ .flags = GENL_ADMIN_PERM,
+ .policy = ioam6_genl_policy_addsc,
+ .maxattr = ARRAY_SIZE(ioam6_genl_policy_addsc) - 1,
+ },
+ {
+ .cmd = IOAM6_CMD_DEL_SCHEMA,
+ .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
+ .doit = ioam6_genl_delsc,
+ .flags = GENL_ADMIN_PERM,
+ .policy = ioam6_genl_policy_delsc,
+ .maxattr = ARRAY_SIZE(ioam6_genl_policy_delsc) - 1,
+ },
+ {
+ .cmd = IOAM6_CMD_DUMP_SCHEMAS,
+ .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
+ .start = ioam6_genl_dumpsc_start,
+ .dumpit = ioam6_genl_dumpsc,
+ .done = ioam6_genl_dumpsc_done,
+ .flags = GENL_ADMIN_PERM,
+ },
+ {
+ .cmd = IOAM6_CMD_NS_SET_SCHEMA,
+ .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
+ .doit = ioam6_genl_ns_set_schema,
+ .flags = GENL_ADMIN_PERM,
+ .policy = ioam6_genl_policy_ns_sc,
+ .maxattr = ARRAY_SIZE(ioam6_genl_policy_ns_sc) - 1,
+ },
+};
+
+static struct genl_family ioam6_genl_family __ro_after_init = {
+ .name = IOAM6_GENL_NAME,
+ .version = IOAM6_GENL_VERSION,
+ .netnsok = true,
+ .parallel_ops = true,
+ .ops = ioam6_genl_ops,
+ .n_ops = ARRAY_SIZE(ioam6_genl_ops),
+ .module = THIS_MODULE,
+};
+
+struct ioam6_namespace *ioam6_namespace(struct net *net, __be16 id)
+{
+ struct ioam6_pernet_data *nsdata = ioam6_pernet(net);
+
+ return rhashtable_lookup_fast(&nsdata->namespaces, &id, rht_ns_params);
+}
+
+static void __ioam6_fill_trace_data(struct sk_buff *skb,
+ struct ioam6_namespace *ns,
+ struct ioam6_trace_hdr *trace,
+ struct ioam6_schema *sc,
+ u8 sclen)
+{
+ struct __kernel_sock_timeval ts;
+ u64 raw64;
+ u32 raw32;
+ u16 raw16;
+ u8 *data;
+ u8 byte;
+
+ data = trace->data + trace->remlen * 4 - trace->nodelen * 4 - sclen * 4;
+
+ /* hop_lim and node_id */
+ if (trace->type.bit0) {
+ byte = ipv6_hdr(skb)->hop_limit;
+ if (skb->dev)
+ byte--;
+
+ raw32 = dev_net(skb_dst(skb)->dev)->ipv6.sysctl.ioam6_id;
+
+ *(__be32 *)data = cpu_to_be32((byte << 24) | raw32);
+ data += sizeof(__be32);
+ }
+
+ /* ingress_if_id and egress_if_id */
+ if (trace->type.bit1) {
+ if (!skb->dev)
+ raw16 = IOAM6_U16_UNAVAILABLE;
+ else
+ raw16 = (__force u16)__in6_dev_get(skb->dev)->cnf.ioam6_id;
+
+ *(__be16 *)data = cpu_to_be16(raw16);
+ data += sizeof(__be16);
+
+ if (skb_dst(skb)->dev->flags & IFF_LOOPBACK)
+ raw16 = IOAM6_U16_UNAVAILABLE;
+ else
+ raw16 = (__force u16)__in6_dev_get(skb_dst(skb)->dev)->cnf.ioam6_id;
+
+ *(__be16 *)data = cpu_to_be16(raw16);
+ data += sizeof(__be16);
+ }
+
+ /* timestamp seconds */
+ if (trace->type.bit2) {
+ if (!skb->dev) {
+ *(__be32 *)data = cpu_to_be32(IOAM6_U32_UNAVAILABLE);
+ } else {
+ if (!skb->tstamp)
+ __net_timestamp(skb);
+
+ skb_get_new_timestamp(skb, &ts);
+ *(__be32 *)data = cpu_to_be32((u32)ts.tv_sec);
+ }
+ data += sizeof(__be32);
+ }
+
+ /* timestamp subseconds */
+ if (trace->type.bit3) {
+ if (!skb->dev) {
+ *(__be32 *)data = cpu_to_be32(IOAM6_U32_UNAVAILABLE);
+ } else {
+ if (!skb->tstamp)
+ __net_timestamp(skb);
+
+ if (!trace->type.bit2)
+ skb_get_new_timestamp(skb, &ts);
+
+ *(__be32 *)data = cpu_to_be32((u32)ts.tv_usec);
+ }
+ data += sizeof(__be32);
+ }
+
+ /* transit delay */
+ if (trace->type.bit4) {
+ *(__be32 *)data = cpu_to_be32(IOAM6_U32_UNAVAILABLE);
+ data += sizeof(__be32);
+ }
+
+ /* namespace data */
+ if (trace->type.bit5) {
+ *(__be32 *)data = ns->data;
+ data += sizeof(__be32);
+ }
+
+ /* queue depth */
+ if (trace->type.bit6) {
+ *(__be32 *)data = cpu_to_be32(IOAM6_U32_UNAVAILABLE);
+ data += sizeof(__be32);
+ }
+
+ /* checksum complement */
+ if (trace->type.bit7) {
+ *(__be32 *)data = cpu_to_be32(IOAM6_U32_UNAVAILABLE);
+ data += sizeof(__be32);
+ }
+
+ /* hop_lim and node_id (wide) */
+ if (trace->type.bit8) {
+ byte = ipv6_hdr(skb)->hop_limit;
+ if (skb->dev)
+ byte--;
+
+ raw64 = dev_net(skb_dst(skb)->dev)->ipv6.sysctl.ioam6_id_wide;
+
+ *(__be64 *)data = cpu_to_be64(((u64)byte << 56) | raw64);
+ data += sizeof(__be64);
+ }
+
+ /* ingress_if_id and egress_if_id (wide) */
+ if (trace->type.bit9) {
+ if (!skb->dev)
+ raw32 = IOAM6_U32_UNAVAILABLE;
+ else
+ raw32 = __in6_dev_get(skb->dev)->cnf.ioam6_id_wide;
+
+ *(__be32 *)data = cpu_to_be32(raw32);
+ data += sizeof(__be32);
+
+ if (skb_dst(skb)->dev->flags & IFF_LOOPBACK)
+ raw32 = IOAM6_U32_UNAVAILABLE;
+ else
+ raw32 = __in6_dev_get(skb_dst(skb)->dev)->cnf.ioam6_id_wide;
+
+ *(__be32 *)data = cpu_to_be32(raw32);
+ data += sizeof(__be32);
+ }
+
+ /* namespace data (wide) */
+ if (trace->type.bit10) {
+ *(__be64 *)data = ns->data_wide;
+ data += sizeof(__be64);
+ }
+
+ /* buffer occupancy */
+ if (trace->type.bit11) {
+ *(__be32 *)data = cpu_to_be32(IOAM6_U32_UNAVAILABLE);
+ data += sizeof(__be32);
+ }
+
+ /* opaque state snapshot */
+ if (trace->type.bit22) {
+ if (!sc) {
+ *(__be32 *)data = cpu_to_be32(IOAM6_U32_UNAVAILABLE >> 8);
+ } else {
+ *(__be32 *)data = sc->hdr;
+ data += sizeof(__be32);
+
+ memcpy(data, sc->data, sc->len);
+ }
+ }
+}
+
+/* called with rcu_read_lock() */
+void ioam6_fill_trace_data(struct sk_buff *skb,
+ struct ioam6_namespace *ns,
+ struct ioam6_trace_hdr *trace)
+{
+ struct ioam6_schema *sc;
+ u8 sclen = 0;
+
+ /* Skip if Overflow flag is set OR
+ * if an unknown type (bit 12-21) is set
+ */
+ if (trace->overflow ||
+ trace->type.bit12 | trace->type.bit13 | trace->type.bit14 |
+ trace->type.bit15 | trace->type.bit16 | trace->type.bit17 |
+ trace->type.bit18 | trace->type.bit19 | trace->type.bit20 |
+ trace->type.bit21) {
+ return;
+ }
+
+ /* NodeLen does not include Opaque State Snapshot length. We need to
+ * take it into account if the corresponding bit is set (bit 22) and
+ * if the current IOAM namespace has an active schema attached to it
+ */
+ sc = rcu_dereference(ns->schema);
+ if (trace->type.bit22) {
+ sclen = sizeof_field(struct ioam6_schema, hdr) / 4;
+
+ if (sc)
+ sclen += sc->len / 4;
+ }
+
+ /* If there is no space remaining, we set the Overflow flag and we
+ * skip without filling the trace
+ */
+ if (!trace->remlen || trace->remlen < trace->nodelen + sclen) {
+ trace->overflow = 1;
+ return;
+ }
+
+ __ioam6_fill_trace_data(skb, ns, trace, sc, sclen);
+ trace->remlen -= trace->nodelen + sclen;
+}
+
+static int __net_init ioam6_net_init(struct net *net)
+{
+ struct ioam6_pernet_data *nsdata;
+ int err = -ENOMEM;
+
+ nsdata = kzalloc(sizeof(*nsdata), GFP_KERNEL);
+ if (!nsdata)
+ goto out;
+
+ mutex_init(&nsdata->lock);
+ net->ipv6.ioam6_data = nsdata;
+
+ err = rhashtable_init(&nsdata->namespaces, &rht_ns_params);
+ if (err)
+ goto free_nsdata;
+
+ err = rhashtable_init(&nsdata->schemas, &rht_sc_params);
+ if (err)
+ goto free_rht_ns;
+
+out:
+ return err;
+free_rht_ns:
+ rhashtable_destroy(&nsdata->namespaces);
+free_nsdata:
+ kfree(nsdata);
+ net->ipv6.ioam6_data = NULL;
+ goto out;
+}
+
+static void __net_exit ioam6_net_exit(struct net *net)
+{
+ struct ioam6_pernet_data *nsdata = ioam6_pernet(net);
+
+ rhashtable_free_and_destroy(&nsdata->namespaces, ioam6_free_ns, NULL);
+ rhashtable_free_and_destroy(&nsdata->schemas, ioam6_free_sc, NULL);
+
+ kfree(nsdata);
+}
+
+static struct pernet_operations ioam6_net_ops = {
+ .init = ioam6_net_init,
+ .exit = ioam6_net_exit,
+};
+
+int __init ioam6_init(void)
+{
+ int err = register_pernet_subsys(&ioam6_net_ops);
+ if (err)
+ goto out;
+
+ err = genl_register_family(&ioam6_genl_family);
+ if (err)
+ goto out_unregister_pernet_subsys;
+
+#ifdef CONFIG_IPV6_IOAM6_LWTUNNEL
+ err = ioam6_iptunnel_init();
+ if (err)
+ goto out_unregister_genl;
+#endif
+
+ pr_info("In-situ OAM (IOAM) with IPv6\n");
+
+out:
+ return err;
+#ifdef CONFIG_IPV6_IOAM6_LWTUNNEL
+out_unregister_genl:
+ genl_unregister_family(&ioam6_genl_family);
+#endif
+out_unregister_pernet_subsys:
+ unregister_pernet_subsys(&ioam6_net_ops);
+ goto out;
+}
+
+void ioam6_exit(void)
+{
+#ifdef CONFIG_IPV6_IOAM6_LWTUNNEL
+ ioam6_iptunnel_exit();
+#endif
+ genl_unregister_family(&ioam6_genl_family);
+ unregister_pernet_subsys(&ioam6_net_ops);
+}
diff --git a/net/ipv6/ioam6_iptunnel.c b/net/ipv6/ioam6_iptunnel.c
new file mode 100644
index 000000000000..f9ee04541c17
--- /dev/null
+++ b/net/ipv6/ioam6_iptunnel.c
@@ -0,0 +1,274 @@
+// SPDX-License-Identifier: GPL-2.0+
+/*
+ * IPv6 IOAM Lightweight Tunnel implementation
+ *
+ * Author:
+ * Justin Iurman <justin.iurman@uliege.be>
+ */
+
+#include <linux/kernel.h>
+#include <linux/skbuff.h>
+#include <linux/net.h>
+#include <linux/netlink.h>
+#include <linux/in6.h>
+#include <linux/ioam6.h>
+#include <linux/ioam6_iptunnel.h>
+#include <net/dst.h>
+#include <net/sock.h>
+#include <net/lwtunnel.h>
+#include <net/ioam6.h>
+
+#define IOAM6_MASK_SHORT_FIELDS 0xff100000
+#define IOAM6_MASK_WIDE_FIELDS 0xe00000
+
+struct ioam6_lwt_encap {
+ struct ipv6_hopopt_hdr eh;
+ u8 pad[2]; /* 2-octet padding for 4n-alignment */
+ struct ioam6_hdr ioamh;
+ struct ioam6_trace_hdr traceh;
+} __packed;
+
+struct ioam6_lwt {
+ struct ioam6_lwt_encap tuninfo;
+};
+
+static struct ioam6_lwt *ioam6_lwt_state(struct lwtunnel_state *lwt)
+{
+ return (struct ioam6_lwt *)lwt->data;
+}
+
+static struct ioam6_lwt_encap *ioam6_lwt_info(struct lwtunnel_state *lwt)
+{
+ return &ioam6_lwt_state(lwt)->tuninfo;
+}
+
+static struct ioam6_trace_hdr *ioam6_trace(struct lwtunnel_state *lwt)
+{
+ return &(ioam6_lwt_state(lwt)->tuninfo.traceh);
+}
+
+static const struct nla_policy ioam6_iptunnel_policy[IOAM6_IPTUNNEL_MAX + 1] = {
+ [IOAM6_IPTUNNEL_TRACE] = NLA_POLICY_EXACT_LEN(sizeof(struct ioam6_trace_hdr)),
+};
+
+static int nla_put_ioam6_trace(struct sk_buff *skb, int attrtype,
+ struct ioam6_trace_hdr *trace)
+{
+ struct ioam6_trace_hdr *data;
+ struct nlattr *nla;
+ int len;
+
+ len = sizeof(*trace);
+
+ nla = nla_reserve(skb, attrtype, len);
+ if (!nla)
+ return -EMSGSIZE;
+
+ data = nla_data(nla);
+ memcpy(data, trace, len);
+
+ return 0;
+}
+
+static bool ioam6_validate_trace_hdr(struct ioam6_trace_hdr *trace)
+{
+ u32 fields;
+
+ if (!trace->type_be32 || !trace->remlen ||
+ trace->remlen > IOAM6_TRACE_DATA_SIZE_MAX / 4)
+ return false;
+
+ trace->nodelen = 0;
+ fields = be32_to_cpu(trace->type_be32);
+
+ trace->nodelen += hweight32(fields & IOAM6_MASK_SHORT_FIELDS)
+ * (sizeof(__be32) / 4);
+ trace->nodelen += hweight32(fields & IOAM6_MASK_WIDE_FIELDS)
+ * (sizeof(__be64) / 4);
+
+ return true;
+}
+
+static int ioam6_build_state(struct net *net, struct nlattr *nla,
+ unsigned int family, const void *cfg,
+ struct lwtunnel_state **ts,
+ struct netlink_ext_ack *extack)
+{
+ struct nlattr *tb[IOAM6_IPTUNNEL_MAX + 1];
+ struct ioam6_lwt_encap *tuninfo;
+ struct ioam6_trace_hdr *trace;
+ struct lwtunnel_state *s;
+ int len_aligned;
+ int len, err;
+
+ if (family != AF_INET6)
+ return -EINVAL;
+
+ err = nla_parse_nested(tb, IOAM6_IPTUNNEL_MAX, nla,
+ ioam6_iptunnel_policy, extack);
+ if (err < 0)
+ return err;
+
+ if (!tb[IOAM6_IPTUNNEL_TRACE]) {
+ NL_SET_ERR_MSG(extack, "missing trace");
+ return -EINVAL;
+ }
+
+ trace = nla_data(tb[IOAM6_IPTUNNEL_TRACE]);
+ if (!ioam6_validate_trace_hdr(trace)) {
+ NL_SET_ERR_MSG_ATTR(extack, tb[IOAM6_IPTUNNEL_TRACE],
+ "invalid trace validation");
+ return -EINVAL;
+ }
+
+ len = sizeof(*tuninfo) + trace->remlen * 4;
+ len_aligned = ALIGN(len, 8);
+
+ s = lwtunnel_state_alloc(len_aligned);
+ if (!s)
+ return -ENOMEM;
+
+ tuninfo = ioam6_lwt_info(s);
+ tuninfo->eh.hdrlen = (len_aligned >> 3) - 1;
+ tuninfo->pad[0] = IPV6_TLV_PADN;
+ tuninfo->ioamh.type = IOAM6_TYPE_PREALLOC;
+ tuninfo->ioamh.opt_type = IPV6_TLV_IOAM;
+ tuninfo->ioamh.opt_len = sizeof(tuninfo->ioamh) - 2 + sizeof(*trace)
+ + trace->remlen * 4;
+
+ memcpy(&tuninfo->traceh, trace, sizeof(*trace));
+
+ len = len_aligned - len;
+ if (len == 1) {
+ tuninfo->traceh.data[trace->remlen * 4] = IPV6_TLV_PAD1;
+ } else if (len > 0) {
+ tuninfo->traceh.data[trace->remlen * 4] = IPV6_TLV_PADN;
+ tuninfo->traceh.data[trace->remlen * 4 + 1] = len - 2;
+ }
+
+ s->type = LWTUNNEL_ENCAP_IOAM6;
+ s->flags |= LWTUNNEL_STATE_OUTPUT_REDIRECT;
+
+ *ts = s;
+
+ return 0;
+}
+
+static int ioam6_do_inline(struct sk_buff *skb, struct ioam6_lwt_encap *tuninfo)
+{
+ struct ioam6_trace_hdr *trace;
+ struct ipv6hdr *oldhdr, *hdr;
+ struct ioam6_namespace *ns;
+ int hdrlen, err;
+
+ hdrlen = (tuninfo->eh.hdrlen + 1) << 3;
+
+ err = skb_cow_head(skb, hdrlen + skb->mac_len);
+ if (unlikely(err))
+ return err;
+
+ oldhdr = ipv6_hdr(skb);
+ skb_pull(skb, sizeof(*oldhdr));
+ skb_postpull_rcsum(skb, skb_network_header(skb), sizeof(*oldhdr));
+
+ skb_push(skb, sizeof(*oldhdr) + hdrlen);
+ skb_reset_network_header(skb);
+ skb_mac_header_rebuild(skb);
+
+ hdr = ipv6_hdr(skb);
+ memmove(hdr, oldhdr, sizeof(*oldhdr));
+ tuninfo->eh.nexthdr = hdr->nexthdr;
+
+ skb_set_transport_header(skb, sizeof(*hdr));
+ skb_postpush_rcsum(skb, hdr, sizeof(*hdr) + hdrlen);
+
+ memcpy(skb_transport_header(skb), (u8 *)tuninfo, hdrlen);
+
+ hdr->nexthdr = NEXTHDR_HOP;
+ hdr->payload_len = cpu_to_be16(skb->len - sizeof(*hdr));
+
+ trace = (struct ioam6_trace_hdr *)(skb_transport_header(skb)
+ + sizeof(struct ipv6_hopopt_hdr) + 2
+ + sizeof(struct ioam6_hdr));
+
+ ns = ioam6_namespace(dev_net(skb_dst(skb)->dev), trace->namespace_id);
+ if (ns)
+ ioam6_fill_trace_data(skb, ns, trace);
+
+ return 0;
+}
+
+static int ioam6_output(struct net *net, struct sock *sk, struct sk_buff *skb)
+{
+ struct lwtunnel_state *lwt = skb_dst(skb)->lwtstate;
+ int err = -EINVAL;
+
+ if (skb->protocol != htons(ETH_P_IPV6))
+ goto drop;
+
+ /* Only for packets we send and
+ * that do not contain a Hop-by-Hop yet
+ */
+ if (skb->dev || ipv6_hdr(skb)->nexthdr == NEXTHDR_HOP)
+ goto out;
+
+ err = ioam6_do_inline(skb, ioam6_lwt_info(lwt));
+ if (unlikely(err))
+ goto drop;
+
+ err = skb_cow_head(skb, LL_RESERVED_SPACE(skb_dst(skb)->dev));
+ if (unlikely(err))
+ goto drop;
+
+out:
+ return lwt->orig_output(net, sk, skb);
+
+drop:
+ kfree_skb(skb);
+ return err;
+}
+
+static int ioam6_fill_encap_info(struct sk_buff *skb,
+ struct lwtunnel_state *lwtstate)
+{
+ struct ioam6_trace_hdr *trace = ioam6_trace(lwtstate);
+
+ if (nla_put_ioam6_trace(skb, IOAM6_IPTUNNEL_TRACE, trace))
+ return -EMSGSIZE;
+
+ return 0;
+}
+
+static int ioam6_encap_nlsize(struct lwtunnel_state *lwtstate)
+{
+ struct ioam6_trace_hdr *trace = ioam6_trace(lwtstate);
+
+ return nla_total_size(sizeof(*trace));
+}
+
+static int ioam6_encap_cmp(struct lwtunnel_state *a, struct lwtunnel_state *b)
+{
+ struct ioam6_trace_hdr *a_hdr = ioam6_trace(a);
+ struct ioam6_trace_hdr *b_hdr = ioam6_trace(b);
+
+ return (a_hdr->namespace_id != b_hdr->namespace_id);
+}
+
+static const struct lwtunnel_encap_ops ioam6_iptun_ops = {
+ .build_state = ioam6_build_state,
+ .output = ioam6_output,
+ .fill_encap = ioam6_fill_encap_info,
+ .get_encap_size = ioam6_encap_nlsize,
+ .cmp_encap = ioam6_encap_cmp,
+ .owner = THIS_MODULE,
+};
+
+int __init ioam6_iptunnel_init(void)
+{
+ return lwtunnel_encap_add_ops(&ioam6_iptun_ops, LWTUNNEL_ENCAP_IOAM6);
+}
+
+void ioam6_iptunnel_exit(void)
+{
+ lwtunnel_encap_del_ops(&ioam6_iptun_ops, LWTUNNEL_ENCAP_IOAM6);
+}
diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c
index 679699e953f1..1bec5b22f80d 100644
--- a/net/ipv6/ip6_fib.c
+++ b/net/ipv6/ip6_fib.c
@@ -32,6 +32,7 @@
#include <net/lwtunnel.h>
#include <net/fib_notifier.h>
+#include <net/ip_fib.h>
#include <net/ip6_fib.h>
#include <net/ip6_route.h>
@@ -1340,7 +1341,7 @@ static void __fib6_update_sernum_upto_root(struct fib6_info *rt,
struct fib6_node *fn = rcu_dereference_protected(rt->fib6_node,
lockdep_is_held(&rt->fib6_table->tb6_lock));
- /* paired with smp_rmb() in rt6_get_cookie_safe() */
+ /* paired with smp_rmb() in fib6_get_cookie_safe() */
smp_wmb();
while (fn) {
fn->fn_sernum = sernum;
@@ -2355,6 +2356,10 @@ static int __net_init fib6_net_init(struct net *net)
if (err)
return err;
+ /* Default to 3-tuple */
+ net->ipv6.sysctl.multipath_hash_fields =
+ FIB_MULTIPATH_HASH_FIELD_DEFAULT_MASK;
+
spin_lock_init(&net->ipv6.fib6_gc_lock);
rwlock_init(&net->ipv6.fib6_walker_lock);
INIT_LIST_HEAD(&net->ipv6.fib6_walkers);
@@ -2362,7 +2367,7 @@ static int __net_init fib6_net_init(struct net *net)
net->ipv6.rt6_stats = kzalloc(sizeof(*net->ipv6.rt6_stats), GFP_KERNEL);
if (!net->ipv6.rt6_stats)
- goto out_timer;
+ goto out_notifier;
/* Avoid false sharing : Use at least a full cache line */
size = max_t(size_t, size, L1_CACHE_BYTES);
@@ -2407,7 +2412,7 @@ out_fib_table_hash:
kfree(net->ipv6.fib_table_hash);
out_rt6_stats:
kfree(net->ipv6.rt6_stats);
-out_timer:
+out_notifier:
fib6_notifier_exit(net);
return -ENOMEM;
}
@@ -2444,8 +2449,8 @@ int __init fib6_init(void)
int ret = -ENOMEM;
fib6_node_kmem = kmem_cache_create("fib6_nodes",
- sizeof(struct fib6_node),
- 0, SLAB_HWCACHE_ALIGN,
+ sizeof(struct fib6_node), 0,
+ SLAB_HWCACHE_ALIGN | SLAB_ACCOUNT,
NULL);
if (!fib6_node_kmem)
goto out;
diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c
index bc224f917bbd..3ad201d372d8 100644
--- a/net/ipv6/ip6_gre.c
+++ b/net/ipv6/ip6_gre.c
@@ -1244,8 +1244,9 @@ static void ip6gre_tnl_parm_to_user(struct ip6_tnl_parm2 *u,
memcpy(u->name, p->name, sizeof(u->name));
}
-static int ip6gre_tunnel_ioctl(struct net_device *dev,
- struct ifreq *ifr, int cmd)
+static int ip6gre_tunnel_siocdevprivate(struct net_device *dev,
+ struct ifreq *ifr, void __user *data,
+ int cmd)
{
int err = 0;
struct ip6_tnl_parm2 p;
@@ -1259,7 +1260,7 @@ static int ip6gre_tunnel_ioctl(struct net_device *dev,
switch (cmd) {
case SIOCGETTUNNEL:
if (dev == ign->fb_tunnel_dev) {
- if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p))) {
+ if (copy_from_user(&p, data, sizeof(p))) {
err = -EFAULT;
break;
}
@@ -1270,7 +1271,7 @@ static int ip6gre_tunnel_ioctl(struct net_device *dev,
}
memset(&p, 0, sizeof(p));
ip6gre_tnl_parm_to_user(&p, &t->parms);
- if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof(p)))
+ if (copy_to_user(data, &p, sizeof(p)))
err = -EFAULT;
break;
@@ -1281,7 +1282,7 @@ static int ip6gre_tunnel_ioctl(struct net_device *dev,
goto done;
err = -EFAULT;
- if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
+ if (copy_from_user(&p, data, sizeof(p)))
goto done;
err = -EINVAL;
@@ -1318,7 +1319,7 @@ static int ip6gre_tunnel_ioctl(struct net_device *dev,
memset(&p, 0, sizeof(p));
ip6gre_tnl_parm_to_user(&p, &t->parms);
- if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof(p)))
+ if (copy_to_user(data, &p, sizeof(p)))
err = -EFAULT;
} else
err = (cmd == SIOCADDTUNNEL ? -ENOBUFS : -ENOENT);
@@ -1331,7 +1332,7 @@ static int ip6gre_tunnel_ioctl(struct net_device *dev,
if (dev == ign->fb_tunnel_dev) {
err = -EFAULT;
- if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
+ if (copy_from_user(&p, data, sizeof(p)))
goto done;
err = -ENOENT;
ip6gre_tnl_parm_from_user(&p1, &p);
@@ -1398,7 +1399,7 @@ static const struct net_device_ops ip6gre_netdev_ops = {
.ndo_init = ip6gre_tunnel_init,
.ndo_uninit = ip6gre_tunnel_uninit,
.ndo_start_xmit = ip6gre_tunnel_xmit,
- .ndo_do_ioctl = ip6gre_tunnel_ioctl,
+ .ndo_siocdevprivate = ip6gre_tunnel_siocdevprivate,
.ndo_change_mtu = ip6_tnl_change_mtu,
.ndo_get_stats64 = dev_get_tstats64,
.ndo_get_iflink = ip6_tnl_get_iflink,
diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index ff4f9ebcf7f6..12f985f43bcc 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -60,18 +60,29 @@ static int ip6_finish_output2(struct net *net, struct sock *sk, struct sk_buff *
{
struct dst_entry *dst = skb_dst(skb);
struct net_device *dev = dst->dev;
- const struct in6_addr *nexthop;
+ struct inet6_dev *idev = ip6_dst_idev(dst);
+ unsigned int hh_len = LL_RESERVED_SPACE(dev);
+ const struct in6_addr *daddr, *nexthop;
+ struct ipv6hdr *hdr;
struct neighbour *neigh;
int ret;
- if (ipv6_addr_is_multicast(&ipv6_hdr(skb)->daddr)) {
- struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb));
+ /* Be paranoid, rather than too clever. */
+ if (unlikely(hh_len > skb_headroom(skb)) && dev->header_ops) {
+ skb = skb_expand_head(skb, hh_len);
+ if (!skb) {
+ IP6_INC_STATS(net, idev, IPSTATS_MIB_OUTDISCARDS);
+ return -ENOMEM;
+ }
+ }
+ hdr = ipv6_hdr(skb);
+ daddr = &hdr->daddr;
+ if (ipv6_addr_is_multicast(daddr)) {
if (!(dev->flags & IFF_LOOPBACK) && sk_mc_loop(sk) &&
((mroute6_is_socket(net, skb) &&
!(IP6CB(skb)->flags & IP6SKB_FORWARDED)) ||
- ipv6_chk_mcast_addr(dev, &ipv6_hdr(skb)->daddr,
- &ipv6_hdr(skb)->saddr))) {
+ ipv6_chk_mcast_addr(dev, daddr, &hdr->saddr))) {
struct sk_buff *newskb = skb_clone(skb, GFP_ATOMIC);
/* Do not check for IFF_ALLMULTI; multicast routing
@@ -82,7 +93,7 @@ static int ip6_finish_output2(struct net *net, struct sock *sk, struct sk_buff *
net, sk, newskb, NULL, newskb->dev,
dev_loopback_xmit);
- if (ipv6_hdr(skb)->hop_limit == 0) {
+ if (hdr->hop_limit == 0) {
IP6_INC_STATS(net, idev,
IPSTATS_MIB_OUTDISCARDS);
kfree_skb(skb);
@@ -91,9 +102,7 @@ static int ip6_finish_output2(struct net *net, struct sock *sk, struct sk_buff *
}
IP6_UPD_PO_STATS(net, idev, IPSTATS_MIB_OUTMCAST, skb->len);
-
- if (IPV6_ADDR_MC_SCOPE(&ipv6_hdr(skb)->daddr) <=
- IPV6_ADDR_SCOPE_NODELOCAL &&
+ if (IPV6_ADDR_MC_SCOPE(daddr) <= IPV6_ADDR_SCOPE_NODELOCAL &&
!(dev->flags & IFF_LOOPBACK)) {
kfree_skb(skb);
return 0;
@@ -108,10 +117,10 @@ static int ip6_finish_output2(struct net *net, struct sock *sk, struct sk_buff *
}
rcu_read_lock_bh();
- nexthop = rt6_nexthop((struct rt6_info *)dst, &ipv6_hdr(skb)->daddr);
- neigh = __ipv6_neigh_lookup_noref(dst->dev, nexthop);
+ nexthop = rt6_nexthop((struct rt6_info *)dst, daddr);
+ neigh = __ipv6_neigh_lookup_noref(dev, nexthop);
if (unlikely(!neigh))
- neigh = __neigh_create(&nd_tbl, nexthop, dst->dev, false);
+ neigh = __neigh_create(&nd_tbl, nexthop, dev, false);
if (!IS_ERR(neigh)) {
sock_confirm_neigh(skb, neigh);
ret = neigh_output(neigh, skb, false);
@@ -120,7 +129,7 @@ static int ip6_finish_output2(struct net *net, struct sock *sk, struct sk_buff *
}
rcu_read_unlock_bh();
- IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTNOROUTES);
+ IP6_INC_STATS(net, idev, IPSTATS_MIB_OUTNOROUTES);
kfree_skb(skb);
return -EINVAL;
}
@@ -240,6 +249,8 @@ int ip6_xmit(const struct sock *sk, struct sk_buff *skb, struct flowi6 *fl6,
const struct ipv6_pinfo *np = inet6_sk(sk);
struct in6_addr *first_hop = &fl6->daddr;
struct dst_entry *dst = skb_dst(skb);
+ struct net_device *dev = dst->dev;
+ struct inet6_dev *idev = ip6_dst_idev(dst);
unsigned int head_room;
struct ipv6hdr *hdr;
u8 proto = fl6->flowi6_proto;
@@ -247,22 +258,16 @@ int ip6_xmit(const struct sock *sk, struct sk_buff *skb, struct flowi6 *fl6,
int hlimit = -1;
u32 mtu;
- head_room = sizeof(struct ipv6hdr) + LL_RESERVED_SPACE(dst->dev);
+ head_room = sizeof(struct ipv6hdr) + LL_RESERVED_SPACE(dev);
if (opt)
head_room += opt->opt_nflen + opt->opt_flen;
- if (unlikely(skb_headroom(skb) < head_room)) {
- struct sk_buff *skb2 = skb_realloc_headroom(skb, head_room);
- if (!skb2) {
- IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
- IPSTATS_MIB_OUTDISCARDS);
- kfree_skb(skb);
+ if (unlikely(head_room > skb_headroom(skb))) {
+ skb = skb_expand_head(skb, head_room);
+ if (!skb) {
+ IP6_INC_STATS(net, idev, IPSTATS_MIB_OUTDISCARDS);
return -ENOBUFS;
}
- if (skb->sk)
- skb_set_owner_w(skb2, skb->sk);
- consume_skb(skb);
- skb = skb2;
}
if (opt) {
@@ -304,8 +309,7 @@ int ip6_xmit(const struct sock *sk, struct sk_buff *skb, struct flowi6 *fl6,
mtu = dst_mtu(dst);
if ((skb->len <= mtu) || skb->ignore_df || skb_is_gso(skb)) {
- IP6_UPD_PO_STATS(net, ip6_dst_idev(skb_dst(skb)),
- IPSTATS_MIB_OUT, skb->len);
+ IP6_UPD_PO_STATS(net, idev, IPSTATS_MIB_OUT, skb->len);
/* if egress device is enslaved to an L3 master device pass the
* skb to its handler for processing
@@ -318,17 +322,17 @@ int ip6_xmit(const struct sock *sk, struct sk_buff *skb, struct flowi6 *fl6,
* we promote our socket to non const
*/
return NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_OUT,
- net, (struct sock *)sk, skb, NULL, dst->dev,
+ net, (struct sock *)sk, skb, NULL, dev,
dst_output);
}
- skb->dev = dst->dev;
+ skb->dev = dev;
/* ipv6_local_error() does not require socket lock,
* we promote our socket to non const
*/
ipv6_local_error((struct sock *)sk, EMSGSIZE, fl6, mtu);
- IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), IPSTATS_MIB_FRAGFAILS);
+ IP6_INC_STATS(net, idev, IPSTATS_MIB_FRAGFAILS);
kfree_skb(skb);
return -EMSGSIZE;
}
@@ -479,7 +483,9 @@ int ip6_forward(struct sk_buff *skb)
if (skb_warn_if_lro(skb))
goto drop;
- if (!xfrm6_policy_check(NULL, XFRM_POLICY_FWD, skb)) {
+ if (!net->ipv6.devconf_all->disable_policy &&
+ !idev->cnf.disable_policy &&
+ !xfrm6_policy_check(NULL, XFRM_POLICY_FWD, skb)) {
__IP6_INC_STATS(net, idev, IPSTATS_MIB_INDISCARDS);
goto drop;
}
@@ -519,9 +525,10 @@ int ip6_forward(struct sk_buff *skb)
if (net->ipv6.devconf_all->proxy_ndp &&
pneigh_lookup(&nd_tbl, net, &hdr->daddr, skb->dev, 0)) {
int proxied = ip6_forward_proxy_check(skb);
- if (proxied > 0)
+ if (proxied > 0) {
+ hdr->hop_limit--;
return ip6_input(skb);
- else if (proxied < 0) {
+ } else if (proxied < 0) {
__IP6_INC_STATS(net, idev, IPSTATS_MIB_INDISCARDS);
goto drop;
}
@@ -577,7 +584,7 @@ int ip6_forward(struct sk_buff *skb)
}
}
- mtu = ip6_dst_mtu_forward(dst);
+ mtu = ip6_dst_mtu_maybe_forward(dst, true);
if (mtu < IPV6_MIN_MTU)
mtu = IPV6_MIN_MTU;
@@ -1055,13 +1062,11 @@ static int ip6_dst_lookup_tail(struct net *net, const struct sock *sk,
* ip6_route_output will fail given src=any saddr, though, so
* that's why we try it again later.
*/
- if (ipv6_addr_any(&fl6->saddr) && (!*dst || !(*dst)->error)) {
+ if (ipv6_addr_any(&fl6->saddr)) {
struct fib6_info *from;
struct rt6_info *rt;
- bool had_dst = *dst != NULL;
- if (!had_dst)
- *dst = ip6_route_output(net, sk, fl6);
+ *dst = ip6_route_output(net, sk, fl6);
rt = (*dst)->error ? NULL : (struct rt6_info *)*dst;
rcu_read_lock();
@@ -1078,7 +1083,7 @@ static int ip6_dst_lookup_tail(struct net *net, const struct sock *sk,
* never existed and let the SA-enabled version take
* over.
*/
- if (!had_dst && (*dst)->error) {
+ if ((*dst)->error) {
dst_release(*dst);
*dst = NULL;
}
@@ -1555,7 +1560,7 @@ emsgsize:
unsigned int datalen;
unsigned int fraglen;
unsigned int fraggap;
- unsigned int alloclen;
+ unsigned int alloclen, alloc_extra;
unsigned int pagedlen;
alloc_new_skb:
/* There's no room in the current skb */
@@ -1582,17 +1587,28 @@ alloc_new_skb:
fraglen = datalen + fragheaderlen;
pagedlen = 0;
+ alloc_extra = hh_len;
+ alloc_extra += dst_exthdrlen;
+ alloc_extra += rt->dst.trailer_len;
+
+ /* We just reserve space for fragment header.
+ * Note: this may be overallocation if the message
+ * (without MSG_MORE) fits into the MTU.
+ */
+ alloc_extra += sizeof(struct frag_hdr);
+
if ((flags & MSG_MORE) &&
!(rt->dst.dev->features&NETIF_F_SG))
alloclen = mtu;
- else if (!paged)
+ else if (!paged &&
+ (fraglen + alloc_extra < SKB_MAX_ALLOC ||
+ !(rt->dst.dev->features & NETIF_F_SG)))
alloclen = fraglen;
else {
alloclen = min_t(int, fraglen, MAX_HEADER);
pagedlen = fraglen - alloclen;
}
-
- alloclen += dst_exthdrlen;
+ alloclen += alloc_extra;
if (datalen != length + fraggap) {
/*
@@ -1602,30 +1618,21 @@ alloc_new_skb:
datalen += rt->dst.trailer_len;
}
- alloclen += rt->dst.trailer_len;
fraglen = datalen + fragheaderlen;
- /*
- * We just reserve space for fragment header.
- * Note: this may be overallocation if the message
- * (without MSG_MORE) fits into the MTU.
- */
- alloclen += sizeof(struct frag_hdr);
-
copy = datalen - transhdrlen - fraggap - pagedlen;
if (copy < 0) {
err = -EINVAL;
goto error;
}
if (transhdrlen) {
- skb = sock_alloc_send_skb(sk,
- alloclen + hh_len,
+ skb = sock_alloc_send_skb(sk, alloclen,
(flags & MSG_DONTWAIT), &err);
} else {
skb = NULL;
if (refcount_read(&sk->sk_wmem_alloc) + wmem_alloc_delta <=
2 * sk->sk_sndbuf)
- skb = alloc_skb(alloclen + hh_len,
+ skb = alloc_skb(alloclen,
sk->sk_allocation);
if (unlikely(!skb))
err = -ENOBUFS;
diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c
index 288bafded998..20a67efda47f 100644
--- a/net/ipv6/ip6_tunnel.c
+++ b/net/ipv6/ip6_tunnel.c
@@ -837,6 +837,7 @@ static int __ip6_tnl_rcv(struct ip6_tnl *tunnel, struct sk_buff *skb,
skb_postpull_rcsum(skb, eth_hdr(skb), ETH_HLEN);
} else {
skb->dev = tunnel->dev;
+ skb_reset_mac_header(skb);
}
skb_reset_network_header(skb);
@@ -1239,8 +1240,6 @@ route_lookup:
if (max_headroom > dev->needed_headroom)
dev->needed_headroom = max_headroom;
- skb_set_inner_ipproto(skb, proto);
-
err = ip6_tnl_encap(skb, t, &proto, fl6);
if (err)
return err;
@@ -1377,6 +1376,8 @@ ipxip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev,
if (iptunnel_handle_offloads(skb, SKB_GSO_IPXIP6))
return -1;
+ skb_set_inner_ipproto(skb, protocol);
+
err = ip6_tnl_xmit(skb, dev, dsfield, &fl6, encap_limit, &mtu,
protocol);
if (err != 0) {
@@ -1580,9 +1581,10 @@ ip6_tnl_parm_to_user(struct ip6_tnl_parm *u, const struct __ip6_tnl_parm *p)
}
/**
- * ip6_tnl_ioctl - configure ipv6 tunnels from userspace
+ * ip6_tnl_siocdevprivate - configure ipv6 tunnels from userspace
* @dev: virtual device associated with tunnel
- * @ifr: parameters passed from userspace
+ * @ifr: unused
+ * @data: parameters passed from userspace
* @cmd: command to be performed
*
* Description:
@@ -1608,7 +1610,8 @@ ip6_tnl_parm_to_user(struct ip6_tnl_parm *u, const struct __ip6_tnl_parm *p)
**/
static int
-ip6_tnl_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
+ip6_tnl_siocdevprivate(struct net_device *dev, struct ifreq *ifr,
+ void __user *data, int cmd)
{
int err = 0;
struct ip6_tnl_parm p;
@@ -1622,7 +1625,7 @@ ip6_tnl_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
switch (cmd) {
case SIOCGETTUNNEL:
if (dev == ip6n->fb_tnl_dev) {
- if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p))) {
+ if (copy_from_user(&p, data, sizeof(p))) {
err = -EFAULT;
break;
}
@@ -1634,9 +1637,8 @@ ip6_tnl_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
memset(&p, 0, sizeof(p));
}
ip6_tnl_parm_to_user(&p, &t->parms);
- if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof(p))) {
+ if (copy_to_user(data, &p, sizeof(p)))
err = -EFAULT;
- }
break;
case SIOCADDTUNNEL:
case SIOCCHGTUNNEL:
@@ -1644,7 +1646,7 @@ ip6_tnl_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
break;
err = -EFAULT;
- if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
+ if (copy_from_user(&p, data, sizeof(p)))
break;
err = -EINVAL;
if (p.proto != IPPROTO_IPV6 && p.proto != IPPROTO_IPIP &&
@@ -1668,7 +1670,7 @@ ip6_tnl_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
if (!IS_ERR(t)) {
err = 0;
ip6_tnl_parm_to_user(&p, &t->parms);
- if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof(p)))
+ if (copy_to_user(data, &p, sizeof(p)))
err = -EFAULT;
} else {
@@ -1682,7 +1684,7 @@ ip6_tnl_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
if (dev == ip6n->fb_tnl_dev) {
err = -EFAULT;
- if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
+ if (copy_from_user(&p, data, sizeof(p)))
break;
err = -ENOENT;
ip6_tnl_parm_from_user(&p1, &p);
@@ -1801,7 +1803,7 @@ static const struct net_device_ops ip6_tnl_netdev_ops = {
.ndo_init = ip6_tnl_dev_init,
.ndo_uninit = ip6_tnl_dev_uninit,
.ndo_start_xmit = ip6_tnl_start_xmit,
- .ndo_do_ioctl = ip6_tnl_ioctl,
+ .ndo_siocdevprivate = ip6_tnl_siocdevprivate,
.ndo_change_mtu = ip6_tnl_change_mtu,
.ndo_get_stats64 = dev_get_tstats64,
.ndo_get_iflink = ip6_tnl_get_iflink,
diff --git a/net/ipv6/ip6_vti.c b/net/ipv6/ip6_vti.c
index 2d048e21abbb..1d8e3ffa225d 100644
--- a/net/ipv6/ip6_vti.c
+++ b/net/ipv6/ip6_vti.c
@@ -771,13 +771,14 @@ vti6_parm_to_user(struct ip6_tnl_parm2 *u, const struct __ip6_tnl_parm *p)
}
/**
- * vti6_ioctl - configure vti6 tunnels from userspace
+ * vti6_siocdevprivate - configure vti6 tunnels from userspace
* @dev: virtual device associated with tunnel
- * @ifr: parameters passed from userspace
+ * @ifr: unused
+ * @data: parameters passed from userspace
* @cmd: command to be performed
*
* Description:
- * vti6_ioctl() is used for managing vti6 tunnels
+ * vti6_siocdevprivate() is used for managing vti6 tunnels
* from userspace.
*
* The possible commands are the following:
@@ -798,7 +799,7 @@ vti6_parm_to_user(struct ip6_tnl_parm2 *u, const struct __ip6_tnl_parm *p)
* %-ENODEV if attempting to change or delete a nonexisting device
**/
static int
-vti6_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
+vti6_siocdevprivate(struct net_device *dev, struct ifreq *ifr, void __user *data, int cmd)
{
int err = 0;
struct ip6_tnl_parm2 p;
@@ -810,7 +811,7 @@ vti6_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
switch (cmd) {
case SIOCGETTUNNEL:
if (dev == ip6n->fb_tnl_dev) {
- if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p))) {
+ if (copy_from_user(&p, data, sizeof(p))) {
err = -EFAULT;
break;
}
@@ -822,7 +823,7 @@ vti6_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
if (!t)
t = netdev_priv(dev);
vti6_parm_to_user(&p, &t->parms);
- if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof(p)))
+ if (copy_to_user(data, &p, sizeof(p)))
err = -EFAULT;
break;
case SIOCADDTUNNEL:
@@ -831,7 +832,7 @@ vti6_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
break;
err = -EFAULT;
- if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
+ if (copy_from_user(&p, data, sizeof(p)))
break;
err = -EINVAL;
if (p.proto != IPPROTO_IPV6 && p.proto != 0)
@@ -852,7 +853,7 @@ vti6_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
if (t) {
err = 0;
vti6_parm_to_user(&p, &t->parms);
- if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof(p)))
+ if (copy_to_user(data, &p, sizeof(p)))
err = -EFAULT;
} else
@@ -865,7 +866,7 @@ vti6_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
if (dev == ip6n->fb_tnl_dev) {
err = -EFAULT;
- if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
+ if (copy_from_user(&p, data, sizeof(p)))
break;
err = -ENOENT;
vti6_parm_from_user(&p1, &p);
@@ -890,7 +891,7 @@ static const struct net_device_ops vti6_netdev_ops = {
.ndo_init = vti6_dev_init,
.ndo_uninit = vti6_dev_uninit,
.ndo_start_xmit = vti6_tnl_xmit,
- .ndo_do_ioctl = vti6_ioctl,
+ .ndo_siocdevprivate = vti6_siocdevprivate,
.ndo_get_stats64 = dev_get_tstats64,
.ndo_get_iflink = ip6_tnl_get_iflink,
};
diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c
index 06b0d2c329b9..36ed9efb8825 100644
--- a/net/ipv6/ip6mr.c
+++ b/net/ipv6/ip6mr.c
@@ -559,8 +559,7 @@ static int pim6_rcv(struct sk_buff *skb)
read_lock(&mrt_lock);
if (reg_vif_num >= 0)
reg_dev = mrt->vif_table[reg_vif_num].dev;
- if (reg_dev)
- dev_hold(reg_dev);
+ dev_hold(reg_dev);
read_unlock(&mrt_lock);
if (!reg_dev)
diff --git a/net/ipv6/ipcomp6.c b/net/ipv6/ipcomp6.c
index daef890460b7..15f984be3570 100644
--- a/net/ipv6/ipcomp6.c
+++ b/net/ipv6/ipcomp6.c
@@ -172,14 +172,12 @@ static int ipcomp6_rcv_cb(struct sk_buff *skb, int err)
}
static const struct xfrm_type ipcomp6_type = {
- .description = "IPCOMP6",
.owner = THIS_MODULE,
.proto = IPPROTO_COMP,
.init_state = ipcomp6_init_state,
.destructor = ipcomp_destroy,
.input = ipcomp_input,
.output = ipcomp_output,
- .hdr_offset = xfrm6_find_1stfragopt,
};
static struct xfrm6_protocol ipcomp6_protocol = {
diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c
index a6804a7e34c1..e4bdb09c5586 100644
--- a/net/ipv6/ipv6_sockglue.c
+++ b/net/ipv6/ipv6_sockglue.c
@@ -225,7 +225,7 @@ static int ipv6_set_mcast_msfilter(struct sock *sk, sockptr_t optval,
if (GROUP_FILTER_SIZE(gsf->gf_numsrc) > optlen)
goto out_free_gsf;
- ret = ip6_mc_msfilter(sk, gsf, gsf->gf_slist);
+ ret = ip6_mc_msfilter(sk, gsf, gsf->gf_slist_flex);
out_free_gsf:
kfree(gsf);
return ret;
@@ -234,7 +234,7 @@ out_free_gsf:
static int compat_ipv6_set_mcast_msfilter(struct sock *sk, sockptr_t optval,
int optlen)
{
- const int size0 = offsetof(struct compat_group_filter, gf_slist);
+ const int size0 = offsetof(struct compat_group_filter, gf_slist_flex);
struct compat_group_filter *gf32;
void *p;
int ret;
@@ -249,7 +249,7 @@ static int compat_ipv6_set_mcast_msfilter(struct sock *sk, sockptr_t optval,
if (!p)
return -ENOMEM;
- gf32 = p + 4; /* we want ->gf_group and ->gf_slist aligned */
+ gf32 = p + 4; /* we want ->gf_group and ->gf_slist_flex aligned */
ret = -EFAULT;
if (copy_from_sockptr(gf32, optval, optlen))
goto out_free_p;
@@ -261,14 +261,14 @@ static int compat_ipv6_set_mcast_msfilter(struct sock *sk, sockptr_t optval,
goto out_free_p;
ret = -EINVAL;
- if (offsetof(struct compat_group_filter, gf_slist[n]) > optlen)
+ if (offsetof(struct compat_group_filter, gf_slist_flex[n]) > optlen)
goto out_free_p;
ret = ip6_mc_msfilter(sk, &(struct group_filter){
.gf_interface = gf32->gf_interface,
.gf_group = gf32->gf_group,
.gf_fmode = gf32->gf_fmode,
- .gf_numsrc = gf32->gf_numsrc}, gf32->gf_slist);
+ .gf_numsrc = gf32->gf_numsrc}, gf32->gf_slist_flex);
out_free_p:
kfree(p);
@@ -1048,7 +1048,7 @@ static int ipv6_getsockopt_sticky(struct sock *sk, struct ipv6_txoptions *opt,
static int ipv6_get_msfilter(struct sock *sk, void __user *optval,
int __user *optlen, int len)
{
- const int size0 = offsetof(struct group_filter, gf_slist);
+ const int size0 = offsetof(struct group_filter, gf_slist_flex);
struct group_filter __user *p = optval;
struct group_filter gsf;
int num;
@@ -1062,7 +1062,7 @@ static int ipv6_get_msfilter(struct sock *sk, void __user *optval,
return -EADDRNOTAVAIL;
num = gsf.gf_numsrc;
lock_sock(sk);
- err = ip6_mc_msfget(sk, &gsf, p->gf_slist);
+ err = ip6_mc_msfget(sk, &gsf, p->gf_slist_flex);
if (!err) {
if (num > gsf.gf_numsrc)
num = gsf.gf_numsrc;
@@ -1077,7 +1077,7 @@ static int ipv6_get_msfilter(struct sock *sk, void __user *optval,
static int compat_ipv6_get_msfilter(struct sock *sk, void __user *optval,
int __user *optlen)
{
- const int size0 = offsetof(struct compat_group_filter, gf_slist);
+ const int size0 = offsetof(struct compat_group_filter, gf_slist_flex);
struct compat_group_filter __user *p = optval;
struct compat_group_filter gf32;
struct group_filter gf;
@@ -1100,7 +1100,7 @@ static int compat_ipv6_get_msfilter(struct sock *sk, void __user *optval,
return -EADDRNOTAVAIL;
lock_sock(sk);
- err = ip6_mc_msfget(sk, &gf, p->gf_slist);
+ err = ip6_mc_msfget(sk, &gf, p->gf_slist_flex);
release_sock(sk);
if (err)
return err;
diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c
index d36ef9d25e73..bed8155508c8 100644
--- a/net/ipv6/mcast.c
+++ b/net/ipv6/mcast.c
@@ -447,7 +447,8 @@ int ip6_mc_source(int add, int omode, struct sock *sk,
if (psl)
count += psl->sl_max;
- newpsl = sock_kmalloc(sk, IP6_SFLSIZE(count), GFP_KERNEL);
+ newpsl = sock_kmalloc(sk, struct_size(newpsl, sl_addr, count),
+ GFP_KERNEL);
if (!newpsl) {
err = -ENOBUFS;
goto done;
@@ -457,7 +458,8 @@ int ip6_mc_source(int add, int omode, struct sock *sk,
if (psl) {
for (i = 0; i < psl->sl_count; i++)
newpsl->sl_addr[i] = psl->sl_addr[i];
- atomic_sub(IP6_SFLSIZE(psl->sl_max), &sk->sk_omem_alloc);
+ atomic_sub(struct_size(psl, sl_addr, psl->sl_max),
+ &sk->sk_omem_alloc);
kfree_rcu(psl, rcu);
}
psl = newpsl;
@@ -525,8 +527,9 @@ int ip6_mc_msfilter(struct sock *sk, struct group_filter *gsf,
goto done;
}
if (gsf->gf_numsrc) {
- newpsl = sock_kmalloc(sk, IP6_SFLSIZE(gsf->gf_numsrc),
- GFP_KERNEL);
+ newpsl = sock_kmalloc(sk, struct_size(newpsl, sl_addr,
+ gsf->gf_numsrc),
+ GFP_KERNEL);
if (!newpsl) {
err = -ENOBUFS;
goto done;
@@ -543,7 +546,8 @@ int ip6_mc_msfilter(struct sock *sk, struct group_filter *gsf,
newpsl->sl_count, newpsl->sl_addr, 0);
if (err) {
mutex_unlock(&idev->mc_lock);
- sock_kfree_s(sk, newpsl, IP6_SFLSIZE(newpsl->sl_max));
+ sock_kfree_s(sk, newpsl, struct_size(newpsl, sl_addr,
+ newpsl->sl_max));
goto done;
}
mutex_unlock(&idev->mc_lock);
@@ -559,7 +563,8 @@ int ip6_mc_msfilter(struct sock *sk, struct group_filter *gsf,
if (psl) {
ip6_mc_del_src(idev, group, pmc->sfmode,
psl->sl_count, psl->sl_addr, 0);
- atomic_sub(IP6_SFLSIZE(psl->sl_max), &sk->sk_omem_alloc);
+ atomic_sub(struct_size(psl, sl_addr, psl->sl_max),
+ &sk->sk_omem_alloc);
kfree_rcu(psl, rcu);
} else {
ip6_mc_del_src(idev, group, pmc->sfmode, 0, NULL, 0);
@@ -1351,8 +1356,8 @@ static int mld_process_v1(struct inet6_dev *idev, struct mld_msg *mld,
return 0;
}
-static int mld_process_v2(struct inet6_dev *idev, struct mld2_query *mld,
- unsigned long *max_delay)
+static void mld_process_v2(struct inet6_dev *idev, struct mld2_query *mld,
+ unsigned long *max_delay)
{
*max_delay = max(msecs_to_jiffies(mldv2_mrc(mld)), 1UL);
@@ -1362,7 +1367,7 @@ static int mld_process_v2(struct inet6_dev *idev, struct mld2_query *mld,
idev->mc_maxdelay = *max_delay;
- return 0;
+ return;
}
/* called with rcu_read_lock() */
@@ -1449,9 +1454,7 @@ static void __mld_query_work(struct sk_buff *skb)
mlh2 = (struct mld2_query *)skb_transport_header(skb);
- err = mld_process_v2(idev, mlh2, &max_delay);
- if (err < 0)
- goto out;
+ mld_process_v2(idev, mlh2, &max_delay);
if (group_type == IPV6_ADDR_ANY) { /* general query */
if (mlh2->mld2q_nsrcs)
@@ -1729,22 +1732,25 @@ static void ip6_mc_hdr(struct sock *sk, struct sk_buff *skb,
static struct sk_buff *mld_newpack(struct inet6_dev *idev, unsigned int mtu)
{
+ u8 ra[8] = { IPPROTO_ICMPV6, 0, IPV6_TLV_ROUTERALERT,
+ 2, 0, 0, IPV6_TLV_PADN, 0 };
struct net_device *dev = idev->dev;
- struct net *net = dev_net(dev);
- struct sock *sk = net->ipv6.igmp_sk;
- struct sk_buff *skb;
- struct mld2_report *pmr;
- struct in6_addr addr_buf;
- const struct in6_addr *saddr;
int hlen = LL_RESERVED_SPACE(dev);
int tlen = dev->needed_tailroom;
- unsigned int size = mtu + hlen + tlen;
+ struct net *net = dev_net(dev);
+ const struct in6_addr *saddr;
+ struct in6_addr addr_buf;
+ struct mld2_report *pmr;
+ struct sk_buff *skb;
+ unsigned int size;
+ struct sock *sk;
int err;
- u8 ra[8] = { IPPROTO_ICMPV6, 0,
- IPV6_TLV_ROUTERALERT, 2, 0, 0,
- IPV6_TLV_PADN, 0 };
- /* we assume size > sizeof(ra) here */
+ sk = net->ipv6.igmp_sk;
+ /* we assume size > sizeof(ra) here
+ * Also try to not allocate high-order pages for big MTU
+ */
+ size = min_t(int, mtu, PAGE_SIZE / 2) + hlen + tlen;
skb = sock_alloc_send_skb(sk, size, 1, &err);
if (!skb)
return NULL;
@@ -2604,7 +2610,8 @@ static int ip6_mc_leave_src(struct sock *sk, struct ipv6_mc_socklist *iml,
err = ip6_mc_del_src(idev, &iml->addr, iml->sfmode,
psl->sl_count, psl->sl_addr, 0);
RCU_INIT_POINTER(iml->sflist, NULL);
- atomic_sub(IP6_SFLSIZE(psl->sl_max), &sk->sk_omem_alloc);
+ atomic_sub(struct_size(psl, sl_addr, psl->sl_max),
+ &sk->sk_omem_alloc);
kfree_rcu(psl, rcu);
}
diff --git a/net/ipv6/mip6.c b/net/ipv6/mip6.c
index 878fcec14949..aeb35d26e474 100644
--- a/net/ipv6/mip6.c
+++ b/net/ipv6/mip6.c
@@ -247,54 +247,6 @@ static int mip6_destopt_reject(struct xfrm_state *x, struct sk_buff *skb,
return err;
}
-static int mip6_destopt_offset(struct xfrm_state *x, struct sk_buff *skb,
- u8 **nexthdr)
-{
- u16 offset = sizeof(struct ipv6hdr);
- struct ipv6_opt_hdr *exthdr =
- (struct ipv6_opt_hdr *)(ipv6_hdr(skb) + 1);
- const unsigned char *nh = skb_network_header(skb);
- unsigned int packet_len = skb_tail_pointer(skb) -
- skb_network_header(skb);
- int found_rhdr = 0;
-
- *nexthdr = &ipv6_hdr(skb)->nexthdr;
-
- while (offset + 1 <= packet_len) {
-
- switch (**nexthdr) {
- case NEXTHDR_HOP:
- break;
- case NEXTHDR_ROUTING:
- found_rhdr = 1;
- break;
- case NEXTHDR_DEST:
- /*
- * HAO MUST NOT appear more than once.
- * XXX: It is better to try to find by the end of
- * XXX: packet if HAO exists.
- */
- if (ipv6_find_tlv(skb, offset, IPV6_TLV_HAO) >= 0) {
- net_dbg_ratelimited("mip6: hao exists already, override\n");
- return offset;
- }
-
- if (found_rhdr)
- return offset;
-
- break;
- default:
- return offset;
- }
-
- offset += ipv6_optlen(exthdr);
- *nexthdr = &exthdr->nexthdr;
- exthdr = (struct ipv6_opt_hdr *)(nh + offset);
- }
-
- return offset;
-}
-
static int mip6_destopt_init_state(struct xfrm_state *x)
{
if (x->id.spi) {
@@ -324,7 +276,6 @@ static void mip6_destopt_destroy(struct xfrm_state *x)
}
static const struct xfrm_type mip6_destopt_type = {
- .description = "MIP6DESTOPT",
.owner = THIS_MODULE,
.proto = IPPROTO_DSTOPTS,
.flags = XFRM_TYPE_NON_FRAGMENT | XFRM_TYPE_LOCAL_COADDR,
@@ -333,7 +284,6 @@ static const struct xfrm_type mip6_destopt_type = {
.input = mip6_destopt_input,
.output = mip6_destopt_output,
.reject = mip6_destopt_reject,
- .hdr_offset = mip6_destopt_offset,
};
static int mip6_rthdr_input(struct xfrm_state *x, struct sk_buff *skb)
@@ -383,53 +333,6 @@ static int mip6_rthdr_output(struct xfrm_state *x, struct sk_buff *skb)
return 0;
}
-static int mip6_rthdr_offset(struct xfrm_state *x, struct sk_buff *skb,
- u8 **nexthdr)
-{
- u16 offset = sizeof(struct ipv6hdr);
- struct ipv6_opt_hdr *exthdr =
- (struct ipv6_opt_hdr *)(ipv6_hdr(skb) + 1);
- const unsigned char *nh = skb_network_header(skb);
- unsigned int packet_len = skb_tail_pointer(skb) -
- skb_network_header(skb);
- int found_rhdr = 0;
-
- *nexthdr = &ipv6_hdr(skb)->nexthdr;
-
- while (offset + 1 <= packet_len) {
-
- switch (**nexthdr) {
- case NEXTHDR_HOP:
- break;
- case NEXTHDR_ROUTING:
- if (offset + 3 <= packet_len) {
- struct ipv6_rt_hdr *rt;
- rt = (struct ipv6_rt_hdr *)(nh + offset);
- if (rt->type != 0)
- return offset;
- }
- found_rhdr = 1;
- break;
- case NEXTHDR_DEST:
- if (ipv6_find_tlv(skb, offset, IPV6_TLV_HAO) >= 0)
- return offset;
-
- if (found_rhdr)
- return offset;
-
- break;
- default:
- return offset;
- }
-
- offset += ipv6_optlen(exthdr);
- *nexthdr = &exthdr->nexthdr;
- exthdr = (struct ipv6_opt_hdr *)(nh + offset);
- }
-
- return offset;
-}
-
static int mip6_rthdr_init_state(struct xfrm_state *x)
{
if (x->id.spi) {
@@ -456,7 +359,6 @@ static void mip6_rthdr_destroy(struct xfrm_state *x)
}
static const struct xfrm_type mip6_rthdr_type = {
- .description = "MIP6RT",
.owner = THIS_MODULE,
.proto = IPPROTO_ROUTING,
.flags = XFRM_TYPE_NON_FRAGMENT | XFRM_TYPE_REMOTE_COADDR,
@@ -464,7 +366,6 @@ static const struct xfrm_type mip6_rthdr_type = {
.destructor = mip6_rthdr_destroy,
.input = mip6_rthdr_input,
.output = mip6_rthdr_output,
- .hdr_offset = mip6_rthdr_offset,
};
static int __init mip6_init(void)
diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c
index c467c6419893..4b098521a44c 100644
--- a/net/ipv6/ndisc.c
+++ b/net/ipv6/ndisc.c
@@ -1391,12 +1391,6 @@ skip_defrtr:
}
}
- /*
- * Send a notify if RA changed managed/otherconf flags or timer settings
- */
- if (send_ifinfo_notify)
- inet6_ifinfo_notify(RTM_NEWLINK, in6_dev);
-
skip_linkparms:
/*
@@ -1496,6 +1490,11 @@ skip_routeinfo:
memcpy(&n, ((u8 *)(ndopts.nd_opts_mtu+1))+2, sizeof(mtu));
mtu = ntohl(n);
+ if (in6_dev->ra_mtu != mtu) {
+ in6_dev->ra_mtu = mtu;
+ send_ifinfo_notify = true;
+ }
+
if (mtu < IPV6_MIN_MTU || mtu > skb->dev->mtu) {
ND_PRINTK(2, warn, "RA: invalid mtu: %d\n", mtu);
} else if (in6_dev->cnf.mtu6 != mtu) {
@@ -1519,6 +1518,12 @@ skip_routeinfo:
ND_PRINTK(2, warn, "RA: invalid RA options\n");
}
out:
+ /* Send a notify if RA changed managed/otherconf flags or
+ * timer settings or ra_mtu value
+ */
+ if (send_ifinfo_notify)
+ inet6_ifinfo_notify(RTM_NEWLINK, in6_dev);
+
fib6_info_release(rt);
if (neigh)
neigh_release(neigh);
diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c
index e810a23baf99..de2cf3943b91 100644
--- a/net/ipv6/netfilter/ip6_tables.c
+++ b/net/ipv6/netfilter/ip6_tables.c
@@ -51,7 +51,7 @@ ip6_packet_match(const struct sk_buff *skb,
const char *outdev,
const struct ip6t_ip6 *ip6info,
unsigned int *protoff,
- int *fragoff, bool *hotdrop)
+ u16 *fragoff, bool *hotdrop)
{
unsigned long ret;
const struct ipv6hdr *ipv6 = ipv6_hdr(skb);
diff --git a/net/ipv6/netfilter/ip6table_filter.c b/net/ipv6/netfilter/ip6table_filter.c
index bb784ea7bbd3..727ee8097012 100644
--- a/net/ipv6/netfilter/ip6table_filter.c
+++ b/net/ipv6/netfilter/ip6table_filter.c
@@ -19,15 +19,12 @@ MODULE_DESCRIPTION("ip6tables filter table");
(1 << NF_INET_FORWARD) | \
(1 << NF_INET_LOCAL_OUT))
-static int __net_init ip6table_filter_table_init(struct net *net);
-
static const struct xt_table packet_filter = {
.name = "filter",
.valid_hooks = FILTER_VALID_HOOKS,
.me = THIS_MODULE,
.af = NFPROTO_IPV6,
.priority = NF_IP6_PRI_FILTER,
- .table_init = ip6table_filter_table_init,
};
/* The work comes in here from netfilter.c. */
@@ -44,7 +41,7 @@ static struct nf_hook_ops *filter_ops __read_mostly;
static bool forward = true;
module_param(forward, bool, 0000);
-static int __net_init ip6table_filter_table_init(struct net *net)
+static int ip6table_filter_table_init(struct net *net)
{
struct ip6t_replace *repl;
int err;
@@ -63,7 +60,7 @@ static int __net_init ip6table_filter_table_init(struct net *net)
static int __net_init ip6table_filter_net_init(struct net *net)
{
- if (net == &init_net || !forward)
+ if (!forward)
return ip6table_filter_table_init(net);
return 0;
@@ -87,15 +84,24 @@ static struct pernet_operations ip6table_filter_net_ops = {
static int __init ip6table_filter_init(void)
{
- int ret;
+ int ret = xt_register_template(&packet_filter,
+ ip6table_filter_table_init);
+
+ if (ret < 0)
+ return ret;
filter_ops = xt_hook_ops_alloc(&packet_filter, ip6table_filter_hook);
- if (IS_ERR(filter_ops))
+ if (IS_ERR(filter_ops)) {
+ xt_unregister_template(&packet_filter);
return PTR_ERR(filter_ops);
+ }
ret = register_pernet_subsys(&ip6table_filter_net_ops);
- if (ret < 0)
+ if (ret < 0) {
+ xt_unregister_template(&packet_filter);
kfree(filter_ops);
+ return ret;
+ }
return ret;
}
@@ -103,6 +109,7 @@ static int __init ip6table_filter_init(void)
static void __exit ip6table_filter_fini(void)
{
unregister_pernet_subsys(&ip6table_filter_net_ops);
+ xt_unregister_template(&packet_filter);
kfree(filter_ops);
}
diff --git a/net/ipv6/netfilter/ip6table_mangle.c b/net/ipv6/netfilter/ip6table_mangle.c
index c76cffd63041..9b518ce37d6a 100644
--- a/net/ipv6/netfilter/ip6table_mangle.c
+++ b/net/ipv6/netfilter/ip6table_mangle.c
@@ -20,15 +20,12 @@ MODULE_DESCRIPTION("ip6tables mangle table");
(1 << NF_INET_LOCAL_OUT) | \
(1 << NF_INET_POST_ROUTING))
-static int __net_init ip6table_mangle_table_init(struct net *net);
-
static const struct xt_table packet_mangler = {
.name = "mangle",
.valid_hooks = MANGLE_VALID_HOOKS,
.me = THIS_MODULE,
.af = NFPROTO_IPV6,
.priority = NF_IP6_PRI_MANGLE,
- .table_init = ip6table_mangle_table_init,
};
static unsigned int
@@ -76,7 +73,7 @@ ip6table_mangle_hook(void *priv, struct sk_buff *skb,
}
static struct nf_hook_ops *mangle_ops __read_mostly;
-static int __net_init ip6table_mangle_table_init(struct net *net)
+static int ip6table_mangle_table_init(struct net *net)
{
struct ip6t_replace *repl;
int ret;
@@ -106,29 +103,32 @@ static struct pernet_operations ip6table_mangle_net_ops = {
static int __init ip6table_mangle_init(void)
{
- int ret;
+ int ret = xt_register_template(&packet_mangler,
+ ip6table_mangle_table_init);
+
+ if (ret < 0)
+ return ret;
mangle_ops = xt_hook_ops_alloc(&packet_mangler, ip6table_mangle_hook);
- if (IS_ERR(mangle_ops))
+ if (IS_ERR(mangle_ops)) {
+ xt_unregister_template(&packet_mangler);
return PTR_ERR(mangle_ops);
+ }
ret = register_pernet_subsys(&ip6table_mangle_net_ops);
if (ret < 0) {
+ xt_unregister_template(&packet_mangler);
kfree(mangle_ops);
return ret;
}
- ret = ip6table_mangle_table_init(&init_net);
- if (ret) {
- unregister_pernet_subsys(&ip6table_mangle_net_ops);
- kfree(mangle_ops);
- }
return ret;
}
static void __exit ip6table_mangle_fini(void)
{
unregister_pernet_subsys(&ip6table_mangle_net_ops);
+ xt_unregister_template(&packet_mangler);
kfree(mangle_ops);
}
diff --git a/net/ipv6/netfilter/ip6table_nat.c b/net/ipv6/netfilter/ip6table_nat.c
index b0292251e655..921c1723a01e 100644
--- a/net/ipv6/netfilter/ip6table_nat.c
+++ b/net/ipv6/netfilter/ip6table_nat.c
@@ -19,8 +19,6 @@ struct ip6table_nat_pernet {
struct nf_hook_ops *nf_nat_ops;
};
-static int __net_init ip6table_nat_table_init(struct net *net);
-
static unsigned int ip6table_nat_net_id __read_mostly;
static const struct xt_table nf_nat_ipv6_table = {
@@ -31,7 +29,6 @@ static const struct xt_table nf_nat_ipv6_table = {
(1 << NF_INET_LOCAL_IN),
.me = THIS_MODULE,
.af = NFPROTO_IPV6,
- .table_init = ip6table_nat_table_init,
};
static unsigned int ip6table_nat_do_chain(void *priv,
@@ -115,7 +112,7 @@ static void ip6t_nat_unregister_lookups(struct net *net)
kfree(ops);
}
-static int __net_init ip6table_nat_table_init(struct net *net)
+static int ip6table_nat_table_init(struct net *net)
{
struct ip6t_replace *repl;
int ret;
@@ -157,20 +154,23 @@ static struct pernet_operations ip6table_nat_net_ops = {
static int __init ip6table_nat_init(void)
{
- int ret = register_pernet_subsys(&ip6table_nat_net_ops);
+ int ret = xt_register_template(&nf_nat_ipv6_table,
+ ip6table_nat_table_init);
- if (ret)
+ if (ret < 0)
return ret;
- ret = ip6table_nat_table_init(&init_net);
+ ret = register_pernet_subsys(&ip6table_nat_net_ops);
if (ret)
- unregister_pernet_subsys(&ip6table_nat_net_ops);
+ xt_unregister_template(&nf_nat_ipv6_table);
+
return ret;
}
static void __exit ip6table_nat_exit(void)
{
unregister_pernet_subsys(&ip6table_nat_net_ops);
+ xt_unregister_template(&nf_nat_ipv6_table);
}
module_init(ip6table_nat_init);
diff --git a/net/ipv6/netfilter/ip6table_raw.c b/net/ipv6/netfilter/ip6table_raw.c
index f63c106c521e..4f2a04af71d3 100644
--- a/net/ipv6/netfilter/ip6table_raw.c
+++ b/net/ipv6/netfilter/ip6table_raw.c
@@ -11,8 +11,6 @@
#define RAW_VALID_HOOKS ((1 << NF_INET_PRE_ROUTING) | (1 << NF_INET_LOCAL_OUT))
-static int __net_init ip6table_raw_table_init(struct net *net);
-
static bool raw_before_defrag __read_mostly;
MODULE_PARM_DESC(raw_before_defrag, "Enable raw table before defrag");
module_param(raw_before_defrag, bool, 0000);
@@ -23,7 +21,6 @@ static const struct xt_table packet_raw = {
.me = THIS_MODULE,
.af = NFPROTO_IPV6,
.priority = NF_IP6_PRI_RAW,
- .table_init = ip6table_raw_table_init,
};
static const struct xt_table packet_raw_before_defrag = {
@@ -32,7 +29,6 @@ static const struct xt_table packet_raw_before_defrag = {
.me = THIS_MODULE,
.af = NFPROTO_IPV6,
.priority = NF_IP6_PRI_RAW_BEFORE_DEFRAG,
- .table_init = ip6table_raw_table_init,
};
/* The work comes in here from netfilter.c. */
@@ -45,7 +41,7 @@ ip6table_raw_hook(void *priv, struct sk_buff *skb,
static struct nf_hook_ops *rawtable_ops __read_mostly;
-static int __net_init ip6table_raw_table_init(struct net *net)
+static int ip6table_raw_table_init(struct net *net)
{
struct ip6t_replace *repl;
const struct xt_table *table = &packet_raw;
@@ -79,37 +75,39 @@ static struct pernet_operations ip6table_raw_net_ops = {
static int __init ip6table_raw_init(void)
{
- int ret;
const struct xt_table *table = &packet_raw;
+ int ret;
if (raw_before_defrag) {
table = &packet_raw_before_defrag;
-
pr_info("Enabling raw table before defrag\n");
}
+ ret = xt_register_template(table, ip6table_raw_table_init);
+ if (ret < 0)
+ return ret;
+
/* Register hooks */
rawtable_ops = xt_hook_ops_alloc(table, ip6table_raw_hook);
- if (IS_ERR(rawtable_ops))
+ if (IS_ERR(rawtable_ops)) {
+ xt_unregister_template(table);
return PTR_ERR(rawtable_ops);
+ }
ret = register_pernet_subsys(&ip6table_raw_net_ops);
if (ret < 0) {
kfree(rawtable_ops);
+ xt_unregister_template(table);
return ret;
}
- ret = ip6table_raw_table_init(&init_net);
- if (ret) {
- unregister_pernet_subsys(&ip6table_raw_net_ops);
- kfree(rawtable_ops);
- }
return ret;
}
static void __exit ip6table_raw_fini(void)
{
unregister_pernet_subsys(&ip6table_raw_net_ops);
+ xt_unregister_template(&packet_raw);
kfree(rawtable_ops);
}
diff --git a/net/ipv6/netfilter/ip6table_security.c b/net/ipv6/netfilter/ip6table_security.c
index 8dc335cf450b..931674034d8b 100644
--- a/net/ipv6/netfilter/ip6table_security.c
+++ b/net/ipv6/netfilter/ip6table_security.c
@@ -24,15 +24,12 @@ MODULE_DESCRIPTION("ip6tables security table, for MAC rules");
(1 << NF_INET_FORWARD) | \
(1 << NF_INET_LOCAL_OUT)
-static int __net_init ip6table_security_table_init(struct net *net);
-
static const struct xt_table security_table = {
.name = "security",
.valid_hooks = SECURITY_VALID_HOOKS,
.me = THIS_MODULE,
.af = NFPROTO_IPV6,
.priority = NF_IP6_PRI_SECURITY,
- .table_init = ip6table_security_table_init,
};
static unsigned int
@@ -44,7 +41,7 @@ ip6table_security_hook(void *priv, struct sk_buff *skb,
static struct nf_hook_ops *sectbl_ops __read_mostly;
-static int __net_init ip6table_security_table_init(struct net *net)
+static int ip6table_security_table_init(struct net *net)
{
struct ip6t_replace *repl;
int ret;
@@ -74,29 +71,32 @@ static struct pernet_operations ip6table_security_net_ops = {
static int __init ip6table_security_init(void)
{
- int ret;
+ int ret = xt_register_template(&security_table,
+ ip6table_security_table_init);
+
+ if (ret < 0)
+ return ret;
sectbl_ops = xt_hook_ops_alloc(&security_table, ip6table_security_hook);
- if (IS_ERR(sectbl_ops))
+ if (IS_ERR(sectbl_ops)) {
+ xt_unregister_template(&security_table);
return PTR_ERR(sectbl_ops);
+ }
ret = register_pernet_subsys(&ip6table_security_net_ops);
if (ret < 0) {
kfree(sectbl_ops);
+ xt_unregister_template(&security_table);
return ret;
}
- ret = ip6table_security_table_init(&init_net);
- if (ret) {
- unregister_pernet_subsys(&ip6table_security_net_ops);
- kfree(sectbl_ops);
- }
return ret;
}
static void __exit ip6table_security_fini(void)
{
unregister_pernet_subsys(&ip6table_security_net_ops);
+ xt_unregister_template(&security_table);
kfree(sectbl_ops);
}
diff --git a/net/ipv6/netfilter/nf_socket_ipv6.c b/net/ipv6/netfilter/nf_socket_ipv6.c
index 6fd54744cbc3..aa5bb8789ba0 100644
--- a/net/ipv6/netfilter/nf_socket_ipv6.c
+++ b/net/ipv6/netfilter/nf_socket_ipv6.c
@@ -99,7 +99,7 @@ struct sock *nf_sk_lookup_slow_v6(struct net *net, const struct sk_buff *skb,
{
__be16 dport, sport;
const struct in6_addr *daddr = NULL, *saddr = NULL;
- struct ipv6hdr *iph = ipv6_hdr(skb);
+ struct ipv6hdr *iph = ipv6_hdr(skb), ipv6_var;
struct sk_buff *data_skb = NULL;
int doff = 0;
int thoff = 0, tproto;
@@ -129,8 +129,6 @@ struct sock *nf_sk_lookup_slow_v6(struct net *net, const struct sk_buff *skb,
thoff + sizeof(*hp);
} else if (tproto == IPPROTO_ICMPV6) {
- struct ipv6hdr ipv6_var;
-
if (extract_icmp6_fields(skb, thoff, &tproto, &saddr, &daddr,
&sport, &dport, &ipv6_var))
return NULL;
diff --git a/net/ipv6/netfilter/nft_reject_ipv6.c b/net/ipv6/netfilter/nft_reject_ipv6.c
index 7969d1f3018d..ed69c768797e 100644
--- a/net/ipv6/netfilter/nft_reject_ipv6.c
+++ b/net/ipv6/netfilter/nft_reject_ipv6.c
@@ -28,7 +28,7 @@ static void nft_reject_ipv6_eval(const struct nft_expr *expr,
nft_hook(pkt));
break;
case NFT_REJECT_TCP_RST:
- nf_send_reset6(nft_net(pkt), pkt->xt.state->sk, pkt->skb,
+ nf_send_reset6(nft_net(pkt), nft_sk(pkt), pkt->skb,
nft_hook(pkt));
break;
default:
diff --git a/net/ipv6/output_core.c b/net/ipv6/output_core.c
index af36acc1a644..2880dc7d9a49 100644
--- a/net/ipv6/output_core.c
+++ b/net/ipv6/output_core.c
@@ -15,29 +15,11 @@ static u32 __ipv6_select_ident(struct net *net,
const struct in6_addr *dst,
const struct in6_addr *src)
{
- const struct {
- struct in6_addr dst;
- struct in6_addr src;
- } __aligned(SIPHASH_ALIGNMENT) combined = {
- .dst = *dst,
- .src = *src,
- };
- u32 hash, id;
-
- /* Note the following code is not safe, but this is okay. */
- if (unlikely(siphash_key_is_zero(&net->ipv4.ip_id_key)))
- get_random_bytes(&net->ipv4.ip_id_key,
- sizeof(net->ipv4.ip_id_key));
-
- hash = siphash(&combined, sizeof(combined), &net->ipv4.ip_id_key);
-
- /* Treat id of 0 as unset and if we get 0 back from ip_idents_reserve,
- * set the hight order instead thus minimizing possible future
- * collisions.
- */
- id = ip_idents_reserve(hash, 1);
- if (unlikely(!id))
- id = 1 << 31;
+ u32 id;
+
+ do {
+ id = prandom_u32();
+ } while (!id);
return id;
}
diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c
index bf3646b57c68..60f1e4f5be5a 100644
--- a/net/ipv6/raw.c
+++ b/net/ipv6/raw.c
@@ -354,7 +354,7 @@ static void rawv6_err(struct sock *sk, struct sk_buff *skb,
if (np->recverr || harderr) {
sk->sk_err = err;
- sk->sk_error_report(sk);
+ sk_error_report(sk);
}
}
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index d417e514bd52..dbc224023977 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -41,6 +41,7 @@
#include <linux/nsproxy.h>
#include <linux/slab.h>
#include <linux/jhash.h>
+#include <linux/siphash.h>
#include <net/net_namespace.h>
#include <net/snmp.h>
#include <net/ipv6.h>
@@ -1484,17 +1485,24 @@ static void rt6_exception_remove_oldest(struct rt6_exception_bucket *bucket)
static u32 rt6_exception_hash(const struct in6_addr *dst,
const struct in6_addr *src)
{
- static u32 seed __read_mostly;
- u32 val;
+ static siphash_key_t rt6_exception_key __read_mostly;
+ struct {
+ struct in6_addr dst;
+ struct in6_addr src;
+ } __aligned(SIPHASH_ALIGNMENT) combined = {
+ .dst = *dst,
+ };
+ u64 val;
- net_get_random_once(&seed, sizeof(seed));
- val = jhash2((const u32 *)dst, sizeof(*dst)/sizeof(u32), seed);
+ net_get_random_once(&rt6_exception_key, sizeof(rt6_exception_key));
#ifdef CONFIG_IPV6_SUBTREES
if (src)
- val = jhash2((const u32 *)src, sizeof(*src)/sizeof(u32), val);
+ combined.src = *src;
#endif
- return hash_32(val, FIB6_EXCEPTION_BUCKET_SIZE_SHIFT);
+ val = siphash(&combined, sizeof(combined), &rt6_exception_key);
+
+ return hash_64(val, FIB6_EXCEPTION_BUCKET_SIZE_SHIFT);
}
/* Helper function to find the cached rt in the hash table
@@ -1649,6 +1657,7 @@ static int rt6_insert_exception(struct rt6_info *nrt,
struct in6_addr *src_key = NULL;
struct rt6_exception *rt6_ex;
struct fib6_nh *nh = res->nh;
+ int max_depth;
int err = 0;
spin_lock_bh(&rt6_exception_lock);
@@ -1703,7 +1712,9 @@ static int rt6_insert_exception(struct rt6_info *nrt,
bucket->depth++;
net->ipv6.rt6_stats->fib_rt_cache++;
- if (bucket->depth > FIB6_MAX_DEPTH)
+ /* Randomize max depth to avoid some side channels attacks. */
+ max_depth = FIB6_MAX_DEPTH + prandom_u32_max(FIB6_MAX_DEPTH);
+ while (bucket->depth > max_depth)
rt6_exception_remove_oldest(bucket);
out:
@@ -2326,12 +2337,131 @@ out:
}
}
+static u32 rt6_multipath_custom_hash_outer(const struct net *net,
+ const struct sk_buff *skb,
+ bool *p_has_inner)
+{
+ u32 hash_fields = ip6_multipath_hash_fields(net);
+ struct flow_keys keys, hash_keys;
+
+ if (!(hash_fields & FIB_MULTIPATH_HASH_FIELD_OUTER_MASK))
+ return 0;
+
+ memset(&hash_keys, 0, sizeof(hash_keys));
+ skb_flow_dissect_flow_keys(skb, &keys, FLOW_DISSECTOR_F_STOP_AT_ENCAP);
+
+ hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS;
+ if (hash_fields & FIB_MULTIPATH_HASH_FIELD_SRC_IP)
+ hash_keys.addrs.v6addrs.src = keys.addrs.v6addrs.src;
+ if (hash_fields & FIB_MULTIPATH_HASH_FIELD_DST_IP)
+ hash_keys.addrs.v6addrs.dst = keys.addrs.v6addrs.dst;
+ if (hash_fields & FIB_MULTIPATH_HASH_FIELD_IP_PROTO)
+ hash_keys.basic.ip_proto = keys.basic.ip_proto;
+ if (hash_fields & FIB_MULTIPATH_HASH_FIELD_FLOWLABEL)
+ hash_keys.tags.flow_label = keys.tags.flow_label;
+ if (hash_fields & FIB_MULTIPATH_HASH_FIELD_SRC_PORT)
+ hash_keys.ports.src = keys.ports.src;
+ if (hash_fields & FIB_MULTIPATH_HASH_FIELD_DST_PORT)
+ hash_keys.ports.dst = keys.ports.dst;
+
+ *p_has_inner = !!(keys.control.flags & FLOW_DIS_ENCAPSULATION);
+ return flow_hash_from_keys(&hash_keys);
+}
+
+static u32 rt6_multipath_custom_hash_inner(const struct net *net,
+ const struct sk_buff *skb,
+ bool has_inner)
+{
+ u32 hash_fields = ip6_multipath_hash_fields(net);
+ struct flow_keys keys, hash_keys;
+
+ /* We assume the packet carries an encapsulation, but if none was
+ * encountered during dissection of the outer flow, then there is no
+ * point in calling the flow dissector again.
+ */
+ if (!has_inner)
+ return 0;
+
+ if (!(hash_fields & FIB_MULTIPATH_HASH_FIELD_INNER_MASK))
+ return 0;
+
+ memset(&hash_keys, 0, sizeof(hash_keys));
+ skb_flow_dissect_flow_keys(skb, &keys, 0);
+
+ if (!(keys.control.flags & FLOW_DIS_ENCAPSULATION))
+ return 0;
+
+ if (keys.control.addr_type == FLOW_DISSECTOR_KEY_IPV4_ADDRS) {
+ hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV4_ADDRS;
+ if (hash_fields & FIB_MULTIPATH_HASH_FIELD_INNER_SRC_IP)
+ hash_keys.addrs.v4addrs.src = keys.addrs.v4addrs.src;
+ if (hash_fields & FIB_MULTIPATH_HASH_FIELD_INNER_DST_IP)
+ hash_keys.addrs.v4addrs.dst = keys.addrs.v4addrs.dst;
+ } else if (keys.control.addr_type == FLOW_DISSECTOR_KEY_IPV6_ADDRS) {
+ hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS;
+ if (hash_fields & FIB_MULTIPATH_HASH_FIELD_INNER_SRC_IP)
+ hash_keys.addrs.v6addrs.src = keys.addrs.v6addrs.src;
+ if (hash_fields & FIB_MULTIPATH_HASH_FIELD_INNER_DST_IP)
+ hash_keys.addrs.v6addrs.dst = keys.addrs.v6addrs.dst;
+ if (hash_fields & FIB_MULTIPATH_HASH_FIELD_INNER_FLOWLABEL)
+ hash_keys.tags.flow_label = keys.tags.flow_label;
+ }
+
+ if (hash_fields & FIB_MULTIPATH_HASH_FIELD_INNER_IP_PROTO)
+ hash_keys.basic.ip_proto = keys.basic.ip_proto;
+ if (hash_fields & FIB_MULTIPATH_HASH_FIELD_INNER_SRC_PORT)
+ hash_keys.ports.src = keys.ports.src;
+ if (hash_fields & FIB_MULTIPATH_HASH_FIELD_INNER_DST_PORT)
+ hash_keys.ports.dst = keys.ports.dst;
+
+ return flow_hash_from_keys(&hash_keys);
+}
+
+static u32 rt6_multipath_custom_hash_skb(const struct net *net,
+ const struct sk_buff *skb)
+{
+ u32 mhash, mhash_inner;
+ bool has_inner = true;
+
+ mhash = rt6_multipath_custom_hash_outer(net, skb, &has_inner);
+ mhash_inner = rt6_multipath_custom_hash_inner(net, skb, has_inner);
+
+ return jhash_2words(mhash, mhash_inner, 0);
+}
+
+static u32 rt6_multipath_custom_hash_fl6(const struct net *net,
+ const struct flowi6 *fl6)
+{
+ u32 hash_fields = ip6_multipath_hash_fields(net);
+ struct flow_keys hash_keys;
+
+ if (!(hash_fields & FIB_MULTIPATH_HASH_FIELD_OUTER_MASK))
+ return 0;
+
+ memset(&hash_keys, 0, sizeof(hash_keys));
+ hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS;
+ if (hash_fields & FIB_MULTIPATH_HASH_FIELD_SRC_IP)
+ hash_keys.addrs.v6addrs.src = fl6->saddr;
+ if (hash_fields & FIB_MULTIPATH_HASH_FIELD_DST_IP)
+ hash_keys.addrs.v6addrs.dst = fl6->daddr;
+ if (hash_fields & FIB_MULTIPATH_HASH_FIELD_IP_PROTO)
+ hash_keys.basic.ip_proto = fl6->flowi6_proto;
+ if (hash_fields & FIB_MULTIPATH_HASH_FIELD_FLOWLABEL)
+ hash_keys.tags.flow_label = (__force u32)flowi6_get_flowlabel(fl6);
+ if (hash_fields & FIB_MULTIPATH_HASH_FIELD_SRC_PORT)
+ hash_keys.ports.src = fl6->fl6_sport;
+ if (hash_fields & FIB_MULTIPATH_HASH_FIELD_DST_PORT)
+ hash_keys.ports.dst = fl6->fl6_dport;
+
+ return flow_hash_from_keys(&hash_keys);
+}
+
/* if skb is set it will be used and fl6 can be NULL */
u32 rt6_multipath_hash(const struct net *net, const struct flowi6 *fl6,
const struct sk_buff *skb, struct flow_keys *flkeys)
{
struct flow_keys hash_keys;
- u32 mhash;
+ u32 mhash = 0;
switch (ip6_multipath_hash_policy(net)) {
case 0:
@@ -2345,6 +2475,7 @@ u32 rt6_multipath_hash(const struct net *net, const struct flowi6 *fl6,
hash_keys.tags.flow_label = (__force u32)flowi6_get_flowlabel(fl6);
hash_keys.basic.ip_proto = fl6->flowi6_proto;
}
+ mhash = flow_hash_from_keys(&hash_keys);
break;
case 1:
if (skb) {
@@ -2376,6 +2507,7 @@ u32 rt6_multipath_hash(const struct net *net, const struct flowi6 *fl6,
hash_keys.ports.dst = fl6->fl6_dport;
hash_keys.basic.ip_proto = fl6->flowi6_proto;
}
+ mhash = flow_hash_from_keys(&hash_keys);
break;
case 2:
memset(&hash_keys, 0, sizeof(hash_keys));
@@ -2412,9 +2544,15 @@ u32 rt6_multipath_hash(const struct net *net, const struct flowi6 *fl6,
hash_keys.tags.flow_label = (__force u32)flowi6_get_flowlabel(fl6);
hash_keys.basic.ip_proto = fl6->flowi6_proto;
}
+ mhash = flow_hash_from_keys(&hash_keys);
+ break;
+ case 3:
+ if (skb)
+ mhash = rt6_multipath_custom_hash_skb(net, skb);
+ else
+ mhash = rt6_multipath_custom_hash_fl6(net, fl6);
break;
}
- mhash = flow_hash_from_keys(&hash_keys);
return mhash >> 1;
}
@@ -3074,25 +3212,7 @@ static unsigned int ip6_default_advmss(const struct dst_entry *dst)
INDIRECT_CALLABLE_SCOPE unsigned int ip6_mtu(const struct dst_entry *dst)
{
- struct inet6_dev *idev;
- unsigned int mtu;
-
- mtu = dst_metric_raw(dst, RTAX_MTU);
- if (mtu)
- goto out;
-
- mtu = IPV6_MIN_MTU;
-
- rcu_read_lock();
- idev = __in6_dev_get(dst->dev);
- if (idev)
- mtu = idev->cnf.mtu6;
- rcu_read_unlock();
-
-out:
- mtu = min_t(unsigned int, mtu, IP6_MAX_MTU);
-
- return mtu - lwtunnel_headroom(dst->lwtstate, mtu);
+ return ip6_dst_mtu_maybe_forward(dst, false);
}
EXPORT_INDIRECT_CALLABLE(ip6_mtu);
@@ -3517,8 +3637,7 @@ out:
if (err) {
lwtstate_put(fib6_nh->fib_nh_lws);
fib6_nh->fib_nh_lws = NULL;
- if (dev)
- dev_put(dev);
+ dev_put(dev);
}
return err;
@@ -3642,7 +3761,7 @@ static struct fib6_info *ip6_route_info_create(struct fib6_config *cfg,
err = PTR_ERR(rt->fib6_metrics);
/* Do not leave garbage there. */
rt->fib6_metrics = (struct dst_metrics *)&dst_default_metrics;
- goto out;
+ goto out_free;
}
if (cfg->fc_flags & RTF_ADDRCONF)
@@ -6511,7 +6630,7 @@ int __init ip6_route_init(void)
ret = -ENOMEM;
ip6_dst_ops_template.kmem_cachep =
kmem_cache_create("ip6_dst_cache", sizeof(struct rt6_info), 0,
- SLAB_HWCACHE_ALIGN, NULL);
+ SLAB_HWCACHE_ALIGN | SLAB_ACCOUNT, NULL);
if (!ip6_dst_ops_template.kmem_cachep)
goto out;
diff --git a/net/ipv6/seg6_iptunnel.c b/net/ipv6/seg6_iptunnel.c
index 897fa59c47de..3adc5d9211ad 100644
--- a/net/ipv6/seg6_iptunnel.c
+++ b/net/ipv6/seg6_iptunnel.c
@@ -26,6 +26,7 @@
#ifdef CONFIG_IPV6_SEG6_HMAC
#include <net/seg6_hmac.h>
#endif
+#include <linux/netfilter.h>
static size_t seg6_lwt_headroom(struct seg6_iptunnel_encap *tuninfo)
{
@@ -295,11 +296,19 @@ static int seg6_do_srh(struct sk_buff *skb)
ipv6_hdr(skb)->payload_len = htons(skb->len - sizeof(struct ipv6hdr));
skb_set_transport_header(skb, sizeof(struct ipv6hdr));
+ nf_reset_ct(skb);
return 0;
}
-static int seg6_input(struct sk_buff *skb)
+static int seg6_input_finish(struct net *net, struct sock *sk,
+ struct sk_buff *skb)
+{
+ return dst_input(skb);
+}
+
+static int seg6_input_core(struct net *net, struct sock *sk,
+ struct sk_buff *skb)
{
struct dst_entry *orig_dst = skb_dst(skb);
struct dst_entry *dst = NULL;
@@ -337,15 +346,46 @@ static int seg6_input(struct sk_buff *skb)
if (unlikely(err))
return err;
- return dst_input(skb);
+ if (static_branch_unlikely(&nf_hooks_lwtunnel_enabled))
+ return NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_OUT,
+ dev_net(skb->dev), NULL, skb, NULL,
+ skb_dst(skb)->dev, seg6_input_finish);
+
+ return seg6_input_finish(dev_net(skb->dev), NULL, skb);
}
-static int seg6_output(struct net *net, struct sock *sk, struct sk_buff *skb)
+static int seg6_input_nf(struct sk_buff *skb)
+{
+ struct net_device *dev = skb_dst(skb)->dev;
+ struct net *net = dev_net(skb->dev);
+
+ switch (skb->protocol) {
+ case htons(ETH_P_IP):
+ return NF_HOOK(NFPROTO_IPV4, NF_INET_POST_ROUTING, net, NULL,
+ skb, NULL, dev, seg6_input_core);
+ case htons(ETH_P_IPV6):
+ return NF_HOOK(NFPROTO_IPV6, NF_INET_POST_ROUTING, net, NULL,
+ skb, NULL, dev, seg6_input_core);
+ }
+
+ return -EINVAL;
+}
+
+static int seg6_input(struct sk_buff *skb)
+{
+ if (static_branch_unlikely(&nf_hooks_lwtunnel_enabled))
+ return seg6_input_nf(skb);
+
+ return seg6_input_core(dev_net(skb->dev), NULL, skb);
+}
+
+static int seg6_output_core(struct net *net, struct sock *sk,
+ struct sk_buff *skb)
{
struct dst_entry *orig_dst = skb_dst(skb);
struct dst_entry *dst = NULL;
struct seg6_lwt *slwt;
- int err = -EINVAL;
+ int err;
err = seg6_do_srh(skb);
if (unlikely(err))
@@ -387,12 +427,40 @@ static int seg6_output(struct net *net, struct sock *sk, struct sk_buff *skb)
if (unlikely(err))
goto drop;
+ if (static_branch_unlikely(&nf_hooks_lwtunnel_enabled))
+ return NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_OUT, net, sk, skb,
+ NULL, skb_dst(skb)->dev, dst_output);
+
return dst_output(net, sk, skb);
drop:
kfree_skb(skb);
return err;
}
+static int seg6_output_nf(struct net *net, struct sock *sk, struct sk_buff *skb)
+{
+ struct net_device *dev = skb_dst(skb)->dev;
+
+ switch (skb->protocol) {
+ case htons(ETH_P_IP):
+ return NF_HOOK(NFPROTO_IPV4, NF_INET_POST_ROUTING, net, sk, skb,
+ NULL, dev, seg6_output_core);
+ case htons(ETH_P_IPV6):
+ return NF_HOOK(NFPROTO_IPV6, NF_INET_POST_ROUTING, net, sk, skb,
+ NULL, dev, seg6_output_core);
+ }
+
+ return -EINVAL;
+}
+
+static int seg6_output(struct net *net, struct sock *sk, struct sk_buff *skb)
+{
+ if (static_branch_unlikely(&nf_hooks_lwtunnel_enabled))
+ return seg6_output_nf(net, sk, skb);
+
+ return seg6_output_core(net, sk, skb);
+}
+
static int seg6_build_state(struct net *net, struct nlattr *nla,
unsigned int family, const void *cfg,
struct lwtunnel_state **ts,
diff --git a/net/ipv6/seg6_local.c b/net/ipv6/seg6_local.c
index 4ff38cb08f4b..2dc40b3f373e 100644
--- a/net/ipv6/seg6_local.c
+++ b/net/ipv6/seg6_local.c
@@ -30,6 +30,7 @@
#include <net/seg6_local.h>
#include <linux/etherdevice.h>
#include <linux/bpf.h>
+#include <linux/netfilter.h>
#define SEG6_F_ATTR(i) BIT(i)
@@ -87,10 +88,10 @@ struct seg6_end_dt_info {
int vrf_ifindex;
int vrf_table;
- /* tunneled packet proto and family (IPv4 or IPv6) */
- __be16 proto;
+ /* tunneled packet family (IPv4 or IPv6).
+ * Protocol and header length are inferred from family.
+ */
u16 family;
- int hdrlen;
};
struct pcpu_seg6_local_counters {
@@ -413,12 +414,33 @@ drop:
return -EINVAL;
}
+static int input_action_end_dx6_finish(struct net *net, struct sock *sk,
+ struct sk_buff *skb)
+{
+ struct dst_entry *orig_dst = skb_dst(skb);
+ struct in6_addr *nhaddr = NULL;
+ struct seg6_local_lwt *slwt;
+
+ slwt = seg6_local_lwtunnel(orig_dst->lwtstate);
+
+ /* The inner packet is not associated to any local interface,
+ * so we do not call netif_rx().
+ *
+ * If slwt->nh6 is set to ::, then lookup the nexthop for the
+ * inner packet's DA. Otherwise, use the specified nexthop.
+ */
+ if (!ipv6_addr_any(&slwt->nh6))
+ nhaddr = &slwt->nh6;
+
+ seg6_lookup_nexthop(skb, nhaddr, 0);
+
+ return dst_input(skb);
+}
+
/* decapsulate and forward to specified nexthop */
static int input_action_end_dx6(struct sk_buff *skb,
struct seg6_local_lwt *slwt)
{
- struct in6_addr *nhaddr = NULL;
-
/* this function accepts IPv6 encapsulated packets, with either
* an SRH with SL=0, or no SRH.
*/
@@ -429,40 +451,30 @@ static int input_action_end_dx6(struct sk_buff *skb,
if (!pskb_may_pull(skb, sizeof(struct ipv6hdr)))
goto drop;
- /* The inner packet is not associated to any local interface,
- * so we do not call netif_rx().
- *
- * If slwt->nh6 is set to ::, then lookup the nexthop for the
- * inner packet's DA. Otherwise, use the specified nexthop.
- */
-
- if (!ipv6_addr_any(&slwt->nh6))
- nhaddr = &slwt->nh6;
-
skb_set_transport_header(skb, sizeof(struct ipv6hdr));
+ nf_reset_ct(skb);
- seg6_lookup_nexthop(skb, nhaddr, 0);
+ if (static_branch_unlikely(&nf_hooks_lwtunnel_enabled))
+ return NF_HOOK(NFPROTO_IPV6, NF_INET_PRE_ROUTING,
+ dev_net(skb->dev), NULL, skb, NULL,
+ skb_dst(skb)->dev, input_action_end_dx6_finish);
- return dst_input(skb);
+ return input_action_end_dx6_finish(dev_net(skb->dev), NULL, skb);
drop:
kfree_skb(skb);
return -EINVAL;
}
-static int input_action_end_dx4(struct sk_buff *skb,
- struct seg6_local_lwt *slwt)
+static int input_action_end_dx4_finish(struct net *net, struct sock *sk,
+ struct sk_buff *skb)
{
+ struct dst_entry *orig_dst = skb_dst(skb);
+ struct seg6_local_lwt *slwt;
struct iphdr *iph;
__be32 nhaddr;
int err;
- if (!decap_and_validate(skb, IPPROTO_IPIP))
- goto drop;
-
- if (!pskb_may_pull(skb, sizeof(struct iphdr)))
- goto drop;
-
- skb->protocol = htons(ETH_P_IP);
+ slwt = seg6_local_lwtunnel(orig_dst->lwtstate);
iph = ip_hdr(skb);
@@ -470,14 +482,34 @@ static int input_action_end_dx4(struct sk_buff *skb,
skb_dst_drop(skb);
- skb_set_transport_header(skb, sizeof(struct iphdr));
-
err = ip_route_input(skb, nhaddr, iph->saddr, 0, skb->dev);
- if (err)
- goto drop;
+ if (err) {
+ kfree_skb(skb);
+ return -EINVAL;
+ }
return dst_input(skb);
+}
+
+static int input_action_end_dx4(struct sk_buff *skb,
+ struct seg6_local_lwt *slwt)
+{
+ if (!decap_and_validate(skb, IPPROTO_IPIP))
+ goto drop;
+ if (!pskb_may_pull(skb, sizeof(struct iphdr)))
+ goto drop;
+
+ skb->protocol = htons(ETH_P_IP);
+ skb_set_transport_header(skb, sizeof(struct iphdr));
+ nf_reset_ct(skb);
+
+ if (static_branch_unlikely(&nf_hooks_lwtunnel_enabled))
+ return NF_HOOK(NFPROTO_IPV4, NF_INET_PRE_ROUTING,
+ dev_net(skb->dev), NULL, skb, NULL,
+ skb_dst(skb)->dev, input_action_end_dx4_finish);
+
+ return input_action_end_dx4_finish(dev_net(skb->dev), NULL, skb);
drop:
kfree_skb(skb);
return -EINVAL;
@@ -521,19 +553,6 @@ static int __seg6_end_dt_vrf_build(struct seg6_local_lwt *slwt, const void *cfg,
info->net = net;
info->vrf_ifindex = vrf_ifindex;
- switch (family) {
- case AF_INET:
- info->proto = htons(ETH_P_IP);
- info->hdrlen = sizeof(struct iphdr);
- break;
- case AF_INET6:
- info->proto = htons(ETH_P_IPV6);
- info->hdrlen = sizeof(struct ipv6hdr);
- break;
- default:
- return -EINVAL;
- }
-
info->family = family;
info->mode = DT_VRF_MODE;
@@ -622,22 +641,45 @@ error:
}
static struct sk_buff *end_dt_vrf_core(struct sk_buff *skb,
- struct seg6_local_lwt *slwt)
+ struct seg6_local_lwt *slwt, u16 family)
{
struct seg6_end_dt_info *info = &slwt->dt_info;
struct net_device *vrf;
+ __be16 protocol;
+ int hdrlen;
vrf = end_dt_get_vrf_rcu(skb, info);
if (unlikely(!vrf))
goto drop;
- skb->protocol = info->proto;
+ switch (family) {
+ case AF_INET:
+ protocol = htons(ETH_P_IP);
+ hdrlen = sizeof(struct iphdr);
+ break;
+ case AF_INET6:
+ protocol = htons(ETH_P_IPV6);
+ hdrlen = sizeof(struct ipv6hdr);
+ break;
+ case AF_UNSPEC:
+ fallthrough;
+ default:
+ goto drop;
+ }
+
+ if (unlikely(info->family != AF_UNSPEC && info->family != family)) {
+ pr_warn_once("seg6local: SRv6 End.DT* family mismatch");
+ goto drop;
+ }
+
+ skb->protocol = protocol;
skb_dst_drop(skb);
- skb_set_transport_header(skb, info->hdrlen);
+ skb_set_transport_header(skb, hdrlen);
+ nf_reset_ct(skb);
- return end_dt_vrf_rcv(skb, info->family, vrf);
+ return end_dt_vrf_rcv(skb, family, vrf);
drop:
kfree_skb(skb);
@@ -656,7 +698,7 @@ static int input_action_end_dt4(struct sk_buff *skb,
if (!pskb_may_pull(skb, sizeof(struct iphdr)))
goto drop;
- skb = end_dt_vrf_core(skb, slwt);
+ skb = end_dt_vrf_core(skb, slwt, AF_INET);
if (!skb)
/* packet has been processed and consumed by the VRF */
return 0;
@@ -739,7 +781,7 @@ static int input_action_end_dt6(struct sk_buff *skb,
goto legacy_mode;
/* DT6_VRF_MODE */
- skb = end_dt_vrf_core(skb, slwt);
+ skb = end_dt_vrf_core(skb, slwt, AF_INET6);
if (!skb)
/* packet has been processed and consumed by the VRF */
return 0;
@@ -767,6 +809,36 @@ drop:
return -EINVAL;
}
+#ifdef CONFIG_NET_L3_MASTER_DEV
+static int seg6_end_dt46_build(struct seg6_local_lwt *slwt, const void *cfg,
+ struct netlink_ext_ack *extack)
+{
+ return __seg6_end_dt_vrf_build(slwt, cfg, AF_UNSPEC, extack);
+}
+
+static int input_action_end_dt46(struct sk_buff *skb,
+ struct seg6_local_lwt *slwt)
+{
+ unsigned int off = 0;
+ int nexthdr;
+
+ nexthdr = ipv6_find_hdr(skb, &off, -1, NULL, NULL);
+ if (unlikely(nexthdr < 0))
+ goto drop;
+
+ switch (nexthdr) {
+ case IPPROTO_IPIP:
+ return input_action_end_dt4(skb, slwt);
+ case IPPROTO_IPV6:
+ return input_action_end_dt6(skb, slwt);
+ }
+
+drop:
+ kfree_skb(skb);
+ return -EINVAL;
+}
+#endif
+
/* push an SRH on top of the current one */
static int input_action_end_b6(struct sk_buff *skb, struct seg6_local_lwt *slwt)
{
@@ -969,6 +1041,17 @@ static struct seg6_action_desc seg6_action_table[] = {
.input = input_action_end_dt6,
},
{
+ .action = SEG6_LOCAL_ACTION_END_DT46,
+ .attrs = SEG6_F_ATTR(SEG6_LOCAL_VRFTABLE),
+ .optattrs = SEG6_F_LOCAL_COUNTERS,
+#ifdef CONFIG_NET_L3_MASTER_DEV
+ .input = input_action_end_dt46,
+ .slwt_ops = {
+ .build_state = seg6_end_dt46_build,
+ },
+#endif
+ },
+ {
.action = SEG6_LOCAL_ACTION_END_B6,
.attrs = SEG6_F_ATTR(SEG6_LOCAL_SRH),
.optattrs = SEG6_F_LOCAL_COUNTERS,
@@ -1028,7 +1111,8 @@ static void seg6_local_update_counters(struct seg6_local_lwt *slwt,
u64_stats_update_end(&pcounters->syncp);
}
-static int seg6_local_input(struct sk_buff *skb)
+static int seg6_local_input_core(struct net *net, struct sock *sk,
+ struct sk_buff *skb)
{
struct dst_entry *orig_dst = skb_dst(skb);
struct seg6_action_desc *desc;
@@ -1036,11 +1120,6 @@ static int seg6_local_input(struct sk_buff *skb)
unsigned int len = skb->len;
int rc;
- if (skb->protocol != htons(ETH_P_IPV6)) {
- kfree_skb(skb);
- return -EINVAL;
- }
-
slwt = seg6_local_lwtunnel(orig_dst->lwtstate);
desc = slwt->desc;
@@ -1054,6 +1133,21 @@ static int seg6_local_input(struct sk_buff *skb)
return rc;
}
+static int seg6_local_input(struct sk_buff *skb)
+{
+ if (skb->protocol != htons(ETH_P_IPV6)) {
+ kfree_skb(skb);
+ return -EINVAL;
+ }
+
+ if (static_branch_unlikely(&nf_hooks_lwtunnel_enabled))
+ return NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_IN,
+ dev_net(skb->dev), NULL, skb, skb->dev, NULL,
+ seg6_local_input_core);
+
+ return seg6_local_input_core(dev_net(skb->dev), NULL, skb);
+}
+
static const struct nla_policy seg6_local_policy[SEG6_LOCAL_MAX + 1] = {
[SEG6_LOCAL_ACTION] = { .type = NLA_U32 },
[SEG6_LOCAL_SRH] = { .type = NLA_BINARY },
diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c
index f7c8110ece5f..ef0c7a7c18e2 100644
--- a/net/ipv6/sit.c
+++ b/net/ipv6/sit.c
@@ -299,9 +299,8 @@ __ipip6_tunnel_locate_prl(struct ip_tunnel *t, __be32 addr)
}
-static int ipip6_tunnel_get_prl(struct net_device *dev, struct ifreq *ifr)
+static int ipip6_tunnel_get_prl(struct net_device *dev, struct ip_tunnel_prl __user *a)
{
- struct ip_tunnel_prl __user *a = ifr->ifr_ifru.ifru_data;
struct ip_tunnel *t = netdev_priv(dev);
struct ip_tunnel_prl kprl, *kp;
struct ip_tunnel_prl_entry *prl;
@@ -321,7 +320,7 @@ static int ipip6_tunnel_get_prl(struct net_device *dev, struct ifreq *ifr)
* we try harder to allocate.
*/
kp = (cmax <= 1 || capable(CAP_NET_ADMIN)) ?
- kcalloc(cmax, sizeof(*kp), GFP_KERNEL | __GFP_NOWARN) :
+ kcalloc(cmax, sizeof(*kp), GFP_KERNEL_ACCOUNT | __GFP_NOWARN) :
NULL;
rcu_read_lock();
@@ -334,7 +333,8 @@ static int ipip6_tunnel_get_prl(struct net_device *dev, struct ifreq *ifr)
* For root users, retry allocating enough memory for
* the answer.
*/
- kp = kcalloc(ca, sizeof(*kp), GFP_ATOMIC);
+ kp = kcalloc(ca, sizeof(*kp), GFP_ATOMIC | __GFP_ACCOUNT |
+ __GFP_NOWARN);
if (!kp) {
ret = -ENOMEM;
goto out;
@@ -453,8 +453,8 @@ out:
return err;
}
-static int ipip6_tunnel_prl_ctl(struct net_device *dev, struct ifreq *ifr,
- int cmd)
+static int ipip6_tunnel_prl_ctl(struct net_device *dev,
+ struct ip_tunnel_prl __user *data, int cmd)
{
struct ip_tunnel *t = netdev_priv(dev);
struct ip_tunnel_prl prl;
@@ -465,7 +465,7 @@ static int ipip6_tunnel_prl_ctl(struct net_device *dev, struct ifreq *ifr,
if (dev == dev_to_sit_net(dev)->fb_tunnel_dev)
return -EINVAL;
- if (copy_from_user(&prl, ifr->ifr_ifru.ifru_data, sizeof(prl)))
+ if (copy_from_user(&prl, data, sizeof(prl)))
return -EFAULT;
switch (cmd) {
@@ -710,6 +710,8 @@ static int ipip6_rcv(struct sk_buff *skb)
* old iph is no longer valid
*/
iph = (const struct iphdr *)skb_mac_header(skb);
+ skb_reset_mac_header(skb);
+
err = IP_ECN_decapsulate(iph, skb);
if (unlikely(err)) {
if (log_ecn_error)
@@ -780,6 +782,8 @@ static int sit_tunnel_rcv(struct sk_buff *skb, u8 ipproto)
tpi = &ipip_tpi;
if (iptunnel_pull_header(skb, 0, tpi->proto, false))
goto drop;
+ skb_reset_mac_header(skb);
+
return ip_tunnel_rcv(tunnel, skb, tpi, NULL, log_ecn_error);
}
@@ -973,7 +977,7 @@ static netdev_tx_t ipip6_tunnel_xmit(struct sk_buff *skb,
if (df) {
mtu = dst_mtu(&rt->dst) - t_hlen;
- if (mtu < 68) {
+ if (mtu < IPV4_MIN_MTU) {
dev->stats.collisions++;
ip_rt_put(rt);
goto tx_error;
@@ -1193,14 +1197,14 @@ static int ipip6_tunnel_update_6rd(struct ip_tunnel *t,
}
static int
-ipip6_tunnel_get6rd(struct net_device *dev, struct ifreq *ifr)
+ipip6_tunnel_get6rd(struct net_device *dev, struct ip_tunnel_parm __user *data)
{
struct ip_tunnel *t = netdev_priv(dev);
struct ip_tunnel_6rd ip6rd;
struct ip_tunnel_parm p;
if (dev == dev_to_sit_net(dev)->fb_tunnel_dev) {
- if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
+ if (copy_from_user(&p, data, sizeof(p)))
return -EFAULT;
t = ipip6_tunnel_locate(t->net, &p, 0);
}
@@ -1211,13 +1215,14 @@ ipip6_tunnel_get6rd(struct net_device *dev, struct ifreq *ifr)
ip6rd.relay_prefix = t->ip6rd.relay_prefix;
ip6rd.prefixlen = t->ip6rd.prefixlen;
ip6rd.relay_prefixlen = t->ip6rd.relay_prefixlen;
- if (copy_to_user(ifr->ifr_ifru.ifru_data, &ip6rd, sizeof(ip6rd)))
+ if (copy_to_user(data, &ip6rd, sizeof(ip6rd)))
return -EFAULT;
return 0;
}
static int
-ipip6_tunnel_6rdctl(struct net_device *dev, struct ifreq *ifr, int cmd)
+ipip6_tunnel_6rdctl(struct net_device *dev, struct ip_tunnel_6rd __user *data,
+ int cmd)
{
struct ip_tunnel *t = netdev_priv(dev);
struct ip_tunnel_6rd ip6rd;
@@ -1225,7 +1230,7 @@ ipip6_tunnel_6rdctl(struct net_device *dev, struct ifreq *ifr, int cmd)
if (!ns_capable(t->net->user_ns, CAP_NET_ADMIN))
return -EPERM;
- if (copy_from_user(&ip6rd, ifr->ifr_ifru.ifru_data, sizeof(ip6rd)))
+ if (copy_from_user(&ip6rd, data, sizeof(ip6rd)))
return -EFAULT;
if (cmd != SIOCDEL6RD) {
@@ -1364,27 +1369,28 @@ ipip6_tunnel_ctl(struct net_device *dev, struct ip_tunnel_parm *p, int cmd)
}
static int
-ipip6_tunnel_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
+ipip6_tunnel_siocdevprivate(struct net_device *dev, struct ifreq *ifr,
+ void __user *data, int cmd)
{
switch (cmd) {
case SIOCGETTUNNEL:
case SIOCADDTUNNEL:
case SIOCCHGTUNNEL:
case SIOCDELTUNNEL:
- return ip_tunnel_ioctl(dev, ifr, cmd);
+ return ip_tunnel_siocdevprivate(dev, ifr, data, cmd);
case SIOCGETPRL:
- return ipip6_tunnel_get_prl(dev, ifr);
+ return ipip6_tunnel_get_prl(dev, data);
case SIOCADDPRL:
case SIOCDELPRL:
case SIOCCHGPRL:
- return ipip6_tunnel_prl_ctl(dev, ifr, cmd);
+ return ipip6_tunnel_prl_ctl(dev, data, cmd);
#ifdef CONFIG_IPV6_SIT_6RD
case SIOCGET6RD:
- return ipip6_tunnel_get6rd(dev, ifr);
+ return ipip6_tunnel_get6rd(dev, data);
case SIOCADD6RD:
case SIOCCHG6RD:
case SIOCDEL6RD:
- return ipip6_tunnel_6rdctl(dev, ifr, cmd);
+ return ipip6_tunnel_6rdctl(dev, data, cmd);
#endif
default:
return -EINVAL;
@@ -1395,7 +1401,7 @@ static const struct net_device_ops ipip6_netdev_ops = {
.ndo_init = ipip6_tunnel_init,
.ndo_uninit = ipip6_tunnel_uninit,
.ndo_start_xmit = sit_tunnel_xmit,
- .ndo_do_ioctl = ipip6_tunnel_ioctl,
+ .ndo_siocdevprivate = ipip6_tunnel_siocdevprivate,
.ndo_get_stats64 = dev_get_tstats64,
.ndo_get_iflink = ip_tunnel_get_iflink,
.ndo_tunnel_ctl = ipip6_tunnel_ctl,
diff --git a/net/ipv6/sysctl_net_ipv6.c b/net/ipv6/sysctl_net_ipv6.c
index 27102c3d6e1d..d53dd142bf87 100644
--- a/net/ipv6/sysctl_net_ipv6.c
+++ b/net/ipv6/sysctl_net_ipv6.c
@@ -17,13 +17,20 @@
#include <net/addrconf.h>
#include <net/inet_frag.h>
#include <net/netevent.h>
+#include <net/ip_fib.h>
#ifdef CONFIG_NETLABEL
#include <net/calipso.h>
#endif
+#include <linux/ioam6.h>
static int two = 2;
+static int three = 3;
static int flowlabel_reflect_max = 0x7;
static int auto_flowlabels_max = IP6_AUTO_FLOW_LABEL_MAX;
+static u32 rt6_multipath_hash_fields_all_mask =
+ FIB_MULTIPATH_HASH_FIELD_ALL_MASK;
+static u32 ioam6_id_max = IOAM6_DEFAULT_ID;
+static u64 ioam6_id_wide_max = IOAM6_DEFAULT_ID_WIDE;
static int proc_rt6_multipath_hash_policy(struct ctl_table *table, int write,
void *buffer, size_t *lenp, loff_t *ppos)
@@ -40,6 +47,22 @@ static int proc_rt6_multipath_hash_policy(struct ctl_table *table, int write,
return ret;
}
+static int
+proc_rt6_multipath_hash_fields(struct ctl_table *table, int write, void *buffer,
+ size_t *lenp, loff_t *ppos)
+{
+ struct net *net;
+ int ret;
+
+ net = container_of(table->data, struct net,
+ ipv6.sysctl.multipath_hash_fields);
+ ret = proc_douintvec_minmax(table, write, buffer, lenp, ppos);
+ if (write && ret == 0)
+ call_netevent_notifiers(NETEVENT_IPV6_MPATH_HASH_UPDATE, net);
+
+ return ret;
+}
+
static struct ctl_table ipv6_table_template[] = {
{
.procname = "bindv6only",
@@ -149,7 +172,16 @@ static struct ctl_table ipv6_table_template[] = {
.mode = 0644,
.proc_handler = proc_rt6_multipath_hash_policy,
.extra1 = SYSCTL_ZERO,
- .extra2 = &two,
+ .extra2 = &three,
+ },
+ {
+ .procname = "fib_multipath_hash_fields",
+ .data = &init_net.ipv6.sysctl.multipath_hash_fields,
+ .maxlen = sizeof(u32),
+ .mode = 0644,
+ .proc_handler = proc_rt6_multipath_hash_fields,
+ .extra1 = SYSCTL_ONE,
+ .extra2 = &rt6_multipath_hash_fields_all_mask,
},
{
.procname = "seg6_flowlabel",
@@ -167,6 +199,22 @@ static struct ctl_table ipv6_table_template[] = {
.extra1 = SYSCTL_ZERO,
.extra2 = &two,
},
+ {
+ .procname = "ioam6_id",
+ .data = &init_net.ipv6.sysctl.ioam6_id,
+ .maxlen = sizeof(u32),
+ .mode = 0644,
+ .proc_handler = proc_douintvec_minmax,
+ .extra2 = &ioam6_id_max,
+ },
+ {
+ .procname = "ioam6_id_wide",
+ .data = &init_net.ipv6.sysctl.ioam6_id_wide,
+ .maxlen = sizeof(u64),
+ .mode = 0644,
+ .proc_handler = proc_doulongvec_minmax,
+ .extra2 = &ioam6_id_wide_max,
+ },
{ }
};
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index 5f47c0b6e3de..0ce52d46e4f8 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -348,11 +348,20 @@ failure:
static void tcp_v6_mtu_reduced(struct sock *sk)
{
struct dst_entry *dst;
+ u32 mtu;
if ((1 << sk->sk_state) & (TCPF_LISTEN | TCPF_CLOSE))
return;
- dst = inet6_csk_update_pmtu(sk, tcp_sk(sk)->mtu_info);
+ mtu = READ_ONCE(tcp_sk(sk)->mtu_info);
+
+ /* Drop requests trying to increase our current mss.
+ * Check done in __ip6_rt_update_pmtu() is too late.
+ */
+ if (tcp_mtu_to_mss(sk, mtu) >= tcp_sk(sk)->mss_cache)
+ return;
+
+ dst = inet6_csk_update_pmtu(sk, mtu);
if (!dst)
return;
@@ -433,6 +442,8 @@ static int tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
}
if (type == ICMPV6_PKT_TOOBIG) {
+ u32 mtu = ntohl(info);
+
/* We are not interested in TCP_LISTEN and open_requests
* (SYN-ACKs send out by Linux are always <576bytes so
* they should go through unfragmented).
@@ -443,7 +454,11 @@ static int tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
if (!ip6_sk_accept_pmtu(sk))
goto out;
- tp->mtu_info = ntohl(info);
+ if (mtu < IPV6_MIN_MTU)
+ goto out;
+
+ WRITE_ONCE(tp->mtu_info, mtu);
+
if (!sock_owned_by_user(sk))
tcp_v6_mtu_reduced(sk);
else if (!test_and_set_bit(TCP_MTU_REDUCED_DEFERRED,
@@ -467,7 +482,7 @@ static int tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
if (!sock_owned_by_user(sk)) {
sk->sk_err = err;
- sk->sk_error_report(sk); /* Wake people up to see the error (see connect in sock.c) */
+ sk_error_report(sk); /* Wake people up to see the error (see connect in sock.c) */
tcp_done(sk);
} else
@@ -486,7 +501,7 @@ static int tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
if (!sock_owned_by_user(sk) && np->recverr) {
sk->sk_err = err;
- sk->sk_error_report(sk);
+ sk_error_report(sk);
} else
sk->sk_err_soft = err;
@@ -540,7 +555,7 @@ static int tcp_v6_send_synack(const struct sock *sk, struct dst_entry *dst,
opt = ireq->ipv6_opt;
if (!opt)
opt = rcu_dereference(np->opt);
- err = ip6_xmit(sk, skb, fl6, sk->sk_mark, opt,
+ err = ip6_xmit(sk, skb, fl6, skb->mark ? : sk->sk_mark, opt,
tclass, sk->sk_priority);
rcu_read_unlock();
err = net_xmit_eval(err);
@@ -1538,6 +1553,7 @@ discard:
kfree_skb(skb);
return 0;
csum_err:
+ trace_tcp_bad_csum(skb);
TCP_INC_STATS(sock_net(sk), TCP_MIB_CSUMERRORS);
TCP_INC_STATS(sock_net(sk), TCP_MIB_INERRS);
goto discard;
@@ -1663,10 +1679,18 @@ process:
goto csum_error;
}
if (unlikely(sk->sk_state != TCP_LISTEN)) {
- inet_csk_reqsk_queue_drop_and_put(sk, req);
- goto lookup;
+ nsk = reuseport_migrate_sock(sk, req_to_sk(req), skb);
+ if (!nsk) {
+ inet_csk_reqsk_queue_drop_and_put(sk, req);
+ goto lookup;
+ }
+ sk = nsk;
+ /* reuseport_migrate_sock() has already held one sk_refcnt
+ * before returning.
+ */
+ } else {
+ sock_hold(sk);
}
- sock_hold(sk);
refcounted = true;
nsk = NULL;
if (!tcp_filter(sk, skb)) {
@@ -1754,6 +1778,7 @@ no_tcp_socket:
if (tcp_checksum_complete(skb)) {
csum_error:
+ trace_tcp_bad_csum(skb);
__TCP_INC_STATS(net, TCP_MIB_CSUMERRORS);
bad_packet:
__TCP_INC_STATS(net, TCP_MIB_INERRS);
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index 3fcd86f4dfdc..ea53847b5b7e 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -502,12 +502,14 @@ static struct sock *__udp6_lib_err_encap(struct net *net,
const struct ipv6hdr *hdr, int offset,
struct udphdr *uh,
struct udp_table *udptable,
+ struct sock *sk,
struct sk_buff *skb,
struct inet6_skb_parm *opt,
u8 type, u8 code, __be32 info)
{
+ int (*lookup)(struct sock *sk, struct sk_buff *skb);
int network_offset, transport_offset;
- struct sock *sk;
+ struct udp_sock *up;
network_offset = skb_network_offset(skb);
transport_offset = skb_transport_offset(skb);
@@ -518,18 +520,28 @@ static struct sock *__udp6_lib_err_encap(struct net *net,
/* Transport header needs to point to the UDP header */
skb_set_transport_header(skb, offset);
+ if (sk) {
+ up = udp_sk(sk);
+
+ lookup = READ_ONCE(up->encap_err_lookup);
+ if (lookup && lookup(sk, skb))
+ sk = NULL;
+
+ goto out;
+ }
+
sk = __udp6_lib_lookup(net, &hdr->daddr, uh->source,
&hdr->saddr, uh->dest,
inet6_iif(skb), 0, udptable, skb);
if (sk) {
- int (*lookup)(struct sock *sk, struct sk_buff *skb);
- struct udp_sock *up = udp_sk(sk);
+ up = udp_sk(sk);
lookup = READ_ONCE(up->encap_err_lookup);
if (!lookup || lookup(sk, skb))
sk = NULL;
}
+out:
if (!sk) {
sk = ERR_PTR(__udp6_lib_err_encap_no_sk(skb, opt, type, code,
offset, info));
@@ -558,16 +570,17 @@ int __udp6_lib_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
sk = __udp6_lib_lookup(net, daddr, uh->dest, saddr, uh->source,
inet6_iif(skb), inet6_sdif(skb), udptable, NULL);
+
if (!sk || udp_sk(sk)->encap_type) {
/* No socket for error: try tunnels before discarding */
- sk = ERR_PTR(-ENOENT);
if (static_branch_unlikely(&udpv6_encap_needed_key)) {
sk = __udp6_lib_err_encap(net, hdr, offset, uh,
- udptable, skb,
+ udptable, sk, skb,
opt, type, code, info);
if (!sk)
return 0;
- }
+ } else
+ sk = ERR_PTR(-ENOENT);
if (IS_ERR(sk)) {
__ICMP6_INC_STATS(net, __in6_dev_get(skb->dev),
@@ -610,7 +623,7 @@ int __udp6_lib_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
}
sk->sk_err = err;
- sk->sk_error_report(sk);
+ sk_error_report(sk);
out:
return 0;
}
@@ -1296,7 +1309,7 @@ int udpv6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
int (*getfrag)(void *, char *, int, int, int, struct sk_buff *);
ipcm6_init(&ipc6);
- ipc6.gso_size = up->gso_size;
+ ipc6.gso_size = READ_ONCE(up->gso_size);
ipc6.sockc.tsflags = sk->sk_tsflags;
ipc6.sockc.mark = sk->sk_mark;
@@ -1462,7 +1475,7 @@ do_udp_sendmsg:
fl6.saddr = np->saddr;
fl6.fl6_sport = inet->inet_sport;
- if (cgroup_bpf_enabled(BPF_CGROUP_UDP6_SENDMSG) && !connected) {
+ if (cgroup_bpf_enabled(CGROUP_UDP6_SENDMSG) && !connected) {
err = BPF_CGROUP_RUN_PROG_UDP6_SENDMSG_LOCK(sk,
(struct sockaddr *)sin6, &fl6.saddr);
if (err)
diff --git a/net/ipv6/xfrm6_output.c b/net/ipv6/xfrm6_output.c
index 8b84d534b19d..d0d280077721 100644
--- a/net/ipv6/xfrm6_output.c
+++ b/net/ipv6/xfrm6_output.c
@@ -16,13 +16,6 @@
#include <net/ip6_route.h>
#include <net/xfrm.h>
-int xfrm6_find_1stfragopt(struct xfrm_state *x, struct sk_buff *skb,
- u8 **prevhdr)
-{
- return ip6_find_1stfragopt(skb, prevhdr);
-}
-EXPORT_SYMBOL(xfrm6_find_1stfragopt);
-
void xfrm6_local_rxpmtu(struct sk_buff *skb, u32 mtu)
{
struct flowi6 fl6;
@@ -56,7 +49,7 @@ static int __xfrm6_output(struct net *net, struct sock *sk, struct sk_buff *skb)
{
struct dst_entry *dst = skb_dst(skb);
struct xfrm_state *x = dst->xfrm;
- int mtu;
+ unsigned int mtu;
bool toobig;
#ifdef CONFIG_NETFILTER
diff --git a/net/ipv6/xfrm6_tunnel.c b/net/ipv6/xfrm6_tunnel.c
index f696d46e6910..2b31112c0856 100644
--- a/net/ipv6/xfrm6_tunnel.c
+++ b/net/ipv6/xfrm6_tunnel.c
@@ -291,7 +291,6 @@ static void xfrm6_tunnel_destroy(struct xfrm_state *x)
}
static const struct xfrm_type xfrm6_tunnel_type = {
- .description = "IP6IP6",
.owner = THIS_MODULE,
.proto = IPPROTO_IPV6,
.init_state = xfrm6_tunnel_init_state,
diff --git a/net/iucv/af_iucv.c b/net/iucv/af_iucv.c
index 0fdb389c3390..18316ee3c692 100644
--- a/net/iucv/af_iucv.c
+++ b/net/iucv/af_iucv.c
@@ -44,6 +44,7 @@ static struct proto iucv_proto = {
};
static struct iucv_interface *pr_iucv;
+static struct iucv_handler af_iucv_handler;
/* special AF_IUCV IPRM messages */
static const u8 iprm_shutdown[8] =
@@ -91,28 +92,11 @@ static void iucv_sock_close(struct sock *sk);
static void afiucv_hs_callback_txnotify(struct sock *sk, enum iucv_tx_notify);
-/* Call Back functions */
-static void iucv_callback_rx(struct iucv_path *, struct iucv_message *);
-static void iucv_callback_txdone(struct iucv_path *, struct iucv_message *);
-static void iucv_callback_connack(struct iucv_path *, u8 *);
-static int iucv_callback_connreq(struct iucv_path *, u8 *, u8 *);
-static void iucv_callback_connrej(struct iucv_path *, u8 *);
-static void iucv_callback_shutdown(struct iucv_path *, u8 *);
-
static struct iucv_sock_list iucv_sk_list = {
.lock = __RW_LOCK_UNLOCKED(iucv_sk_list.lock),
.autobind_name = ATOMIC_INIT(0)
};
-static struct iucv_handler af_iucv_handler = {
- .path_pending = iucv_callback_connreq,
- .path_complete = iucv_callback_connack,
- .path_severed = iucv_callback_connrej,
- .message_pending = iucv_callback_rx,
- .message_complete = iucv_callback_txdone,
- .path_quiesced = iucv_callback_shutdown,
-};
-
static inline void high_nmcpy(unsigned char *dst, char *src)
{
memcpy(dst, src, 8);
@@ -1060,7 +1044,7 @@ static int iucv_sock_sendmsg(struct socket *sock, struct msghdr *msg,
if (err == 0) {
atomic_dec(&iucv->skbs_in_xmit);
skb_unlink(skb, &iucv->send_skb_q);
- kfree_skb(skb);
+ consume_skb(skb);
}
/* this error should never happen since the */
@@ -1309,7 +1293,7 @@ static int iucv_sock_recvmsg(struct socket *sock, struct msghdr *msg,
}
}
- kfree_skb(skb);
+ consume_skb(skb);
if (iucv->transport == AF_IUCV_TRANS_HIPER) {
atomic_inc(&iucv->msg_recv);
if (atomic_read(&iucv->msg_recv) > iucv->msglimit) {
@@ -1772,7 +1756,7 @@ static void iucv_callback_txdone(struct iucv_path *path,
spin_unlock_irqrestore(&list->lock, flags);
if (this) {
- kfree_skb(this);
+ consume_skb(this);
/* wake up any process waiting for sending */
iucv_sock_wake_msglim(sk);
}
@@ -1817,6 +1801,15 @@ static void iucv_callback_shutdown(struct iucv_path *path, u8 ipuser[16])
bh_unlock_sock(sk);
}
+static struct iucv_handler af_iucv_handler = {
+ .path_pending = iucv_callback_connreq,
+ .path_complete = iucv_callback_connack,
+ .path_severed = iucv_callback_connrej,
+ .message_pending = iucv_callback_rx,
+ .message_complete = iucv_callback_txdone,
+ .path_quiesced = iucv_callback_shutdown,
+};
+
/***************** HiperSockets transport callbacks ********************/
static void afiucv_swap_src_dest(struct sk_buff *skb)
{
@@ -1910,17 +1903,17 @@ static int afiucv_hs_callback_synack(struct sock *sk, struct sk_buff *skb)
{
struct iucv_sock *iucv = iucv_sk(sk);
- if (!iucv)
- goto out;
- if (sk->sk_state != IUCV_BOUND)
- goto out;
+ if (!iucv || sk->sk_state != IUCV_BOUND) {
+ kfree_skb(skb);
+ return NET_RX_SUCCESS;
+ }
+
bh_lock_sock(sk);
iucv->msglimit_peer = iucv_trans_hdr(skb)->window;
sk->sk_state = IUCV_CONNECTED;
sk->sk_state_change(sk);
bh_unlock_sock(sk);
-out:
- kfree_skb(skb);
+ consume_skb(skb);
return NET_RX_SUCCESS;
}
@@ -1931,16 +1924,16 @@ static int afiucv_hs_callback_synfin(struct sock *sk, struct sk_buff *skb)
{
struct iucv_sock *iucv = iucv_sk(sk);
- if (!iucv)
- goto out;
- if (sk->sk_state != IUCV_BOUND)
- goto out;
+ if (!iucv || sk->sk_state != IUCV_BOUND) {
+ kfree_skb(skb);
+ return NET_RX_SUCCESS;
+ }
+
bh_lock_sock(sk);
sk->sk_state = IUCV_DISCONN;
sk->sk_state_change(sk);
bh_unlock_sock(sk);
-out:
- kfree_skb(skb);
+ consume_skb(skb);
return NET_RX_SUCCESS;
}
@@ -1952,16 +1945,18 @@ static int afiucv_hs_callback_fin(struct sock *sk, struct sk_buff *skb)
struct iucv_sock *iucv = iucv_sk(sk);
/* other end of connection closed */
- if (!iucv)
- goto out;
+ if (!iucv) {
+ kfree_skb(skb);
+ return NET_RX_SUCCESS;
+ }
+
bh_lock_sock(sk);
if (sk->sk_state == IUCV_CONNECTED) {
sk->sk_state = IUCV_DISCONN;
sk->sk_state_change(sk);
}
bh_unlock_sock(sk);
-out:
- kfree_skb(skb);
+ consume_skb(skb);
return NET_RX_SUCCESS;
}
@@ -2114,7 +2109,7 @@ static int afiucv_hs_rcv(struct sk_buff *skb, struct net_device *dev,
case (AF_IUCV_FLAG_WIN):
err = afiucv_hs_callback_win(sk, skb);
if (skb->len == sizeof(struct af_iucv_trans_hdr)) {
- kfree_skb(skb);
+ consume_skb(skb);
break;
}
fallthrough; /* and receive non-zero length data */
@@ -2269,21 +2264,11 @@ static struct packet_type iucv_packet_type = {
.func = afiucv_hs_rcv,
};
-static int afiucv_iucv_init(void)
-{
- return pr_iucv->iucv_register(&af_iucv_handler, 0);
-}
-
-static void afiucv_iucv_exit(void)
-{
- pr_iucv->iucv_unregister(&af_iucv_handler, 0);
-}
-
static int __init afiucv_init(void)
{
int err;
- if (MACHINE_IS_VM) {
+ if (MACHINE_IS_VM && IS_ENABLED(CONFIG_IUCV)) {
cpcmd("QUERY USERID", iucv_userid, sizeof(iucv_userid), &err);
if (unlikely(err)) {
WARN_ON(err);
@@ -2291,11 +2276,7 @@ static int __init afiucv_init(void)
goto out;
}
- pr_iucv = try_then_request_module(symbol_get(iucv_if), "iucv");
- if (!pr_iucv) {
- printk(KERN_WARNING "iucv_if lookup failed\n");
- memset(&iucv_userid, 0, sizeof(iucv_userid));
- }
+ pr_iucv = &iucv_if;
} else {
memset(&iucv_userid, 0, sizeof(iucv_userid));
pr_iucv = NULL;
@@ -2309,7 +2290,7 @@ static int __init afiucv_init(void)
goto out_proto;
if (pr_iucv) {
- err = afiucv_iucv_init();
+ err = pr_iucv->iucv_register(&af_iucv_handler, 0);
if (err)
goto out_sock;
}
@@ -2323,23 +2304,19 @@ static int __init afiucv_init(void)
out_notifier:
if (pr_iucv)
- afiucv_iucv_exit();
+ pr_iucv->iucv_unregister(&af_iucv_handler, 0);
out_sock:
sock_unregister(PF_IUCV);
out_proto:
proto_unregister(&iucv_proto);
out:
- if (pr_iucv)
- symbol_put(iucv_if);
return err;
}
static void __exit afiucv_exit(void)
{
- if (pr_iucv) {
- afiucv_iucv_exit();
- symbol_put(iucv_if);
- }
+ if (pr_iucv)
+ pr_iucv->iucv_unregister(&af_iucv_handler, 0);
unregister_netdevice_notifier(&afiucv_netdev_notifier);
dev_remove_pack(&iucv_packet_type);
diff --git a/net/iucv/iucv.c b/net/iucv/iucv.c
index 349c6ac3313f..f3343a8541a5 100644
--- a/net/iucv/iucv.c
+++ b/net/iucv/iucv.c
@@ -286,19 +286,19 @@ static union iucv_param *iucv_param_irq[NR_CPUS];
*/
static inline int __iucv_call_b2f0(int command, union iucv_param *parm)
{
- register unsigned long reg0 asm ("0");
- register unsigned long reg1 asm ("1");
- int ccode;
+ int cc;
- reg0 = command;
- reg1 = (unsigned long)parm;
asm volatile(
- " .long 0xb2f01000\n"
- " ipm %0\n"
- " srl %0,28\n"
- : "=d" (ccode), "=m" (*parm), "+d" (reg0), "+a" (reg1)
- : "m" (*parm) : "cc");
- return ccode;
+ " lgr 0,%[reg0]\n"
+ " lgr 1,%[reg1]\n"
+ " .long 0xb2f01000\n"
+ " ipm %[cc]\n"
+ " srl %[cc],28\n"
+ : [cc] "=&d" (cc), "+m" (*parm)
+ : [reg0] "d" ((unsigned long)command),
+ [reg1] "d" ((unsigned long)parm)
+ : "cc", "0", "1");
+ return cc;
}
static inline int iucv_call_b2f0(int command, union iucv_param *parm)
@@ -319,19 +319,21 @@ static inline int iucv_call_b2f0(int command, union iucv_param *parm)
*/
static int __iucv_query_maxconn(void *param, unsigned long *max_pathid)
{
- register unsigned long reg0 asm ("0");
- register unsigned long reg1 asm ("1");
- int ccode;
+ unsigned long reg1 = (unsigned long)param;
+ int cc;
- reg0 = IUCV_QUERY;
- reg1 = (unsigned long) param;
asm volatile (
+ " lghi 0,%[cmd]\n"
+ " lgr 1,%[reg1]\n"
" .long 0xb2f01000\n"
- " ipm %0\n"
- " srl %0,28\n"
- : "=d" (ccode), "+d" (reg0), "+d" (reg1) : : "cc");
+ " ipm %[cc]\n"
+ " srl %[cc],28\n"
+ " lgr %[reg1],1\n"
+ : [cc] "=&d" (cc), [reg1] "+&d" (reg1)
+ : [cmd] "K" (IUCV_QUERY)
+ : "cc", "0", "1");
*max_pathid = reg1;
- return ccode;
+ return cc;
}
static int iucv_query_maxconn(void)
@@ -500,14 +502,14 @@ static void iucv_setmask_mp(void)
{
int cpu;
- get_online_cpus();
+ cpus_read_lock();
for_each_online_cpu(cpu)
/* Enable all cpus with a declared buffer. */
if (cpumask_test_cpu(cpu, &iucv_buffer_cpumask) &&
!cpumask_test_cpu(cpu, &iucv_irq_cpumask))
smp_call_function_single(cpu, iucv_allow_cpu,
NULL, 1);
- put_online_cpus();
+ cpus_read_unlock();
}
/**
@@ -540,7 +542,7 @@ static int iucv_enable(void)
size_t alloc_size;
int cpu, rc;
- get_online_cpus();
+ cpus_read_lock();
rc = -ENOMEM;
alloc_size = iucv_max_pathid * sizeof(struct iucv_path);
iucv_path_table = kzalloc(alloc_size, GFP_KERNEL);
@@ -553,12 +555,12 @@ static int iucv_enable(void)
if (cpumask_empty(&iucv_buffer_cpumask))
/* No cpu could declare an iucv buffer. */
goto out;
- put_online_cpus();
+ cpus_read_unlock();
return 0;
out:
kfree(iucv_path_table);
iucv_path_table = NULL;
- put_online_cpus();
+ cpus_read_unlock();
return rc;
}
@@ -571,11 +573,11 @@ out:
*/
static void iucv_disable(void)
{
- get_online_cpus();
+ cpus_read_lock();
on_each_cpu(iucv_retrieve_cpu, NULL, 1);
kfree(iucv_path_table);
iucv_path_table = NULL;
- put_online_cpus();
+ cpus_read_unlock();
}
static int iucv_cpu_dead(unsigned int cpu)
@@ -784,7 +786,7 @@ static int iucv_reboot_event(struct notifier_block *this,
if (cpumask_empty(&iucv_irq_cpumask))
return NOTIFY_DONE;
- get_online_cpus();
+ cpus_read_lock();
on_each_cpu_mask(&iucv_irq_cpumask, iucv_block_cpu, NULL, 1);
preempt_disable();
for (i = 0; i < iucv_max_pathid; i++) {
@@ -792,7 +794,7 @@ static int iucv_reboot_event(struct notifier_block *this,
iucv_sever_pathid(i, NULL);
}
preempt_enable();
- put_online_cpus();
+ cpus_read_unlock();
iucv_disable();
return NOTIFY_DONE;
}
@@ -1635,14 +1637,16 @@ struct iucv_message_pending {
u8 iptype;
u32 ipmsgid;
u32 iptrgcls;
- union {
- u32 iprmmsg1_u32;
- u8 iprmmsg1[4];
- } ln1msg1;
- union {
- u32 ipbfln1f;
- u8 iprmmsg2[4];
- } ln1msg2;
+ struct {
+ union {
+ u32 iprmmsg1_u32;
+ u8 iprmmsg1[4];
+ } ln1msg1;
+ union {
+ u32 ipbfln1f;
+ u8 iprmmsg2[4];
+ } ln1msg2;
+ } rmmsg;
u32 res1[3];
u32 ipbfln2f;
u8 ippollfg;
@@ -1660,10 +1664,10 @@ static void iucv_message_pending(struct iucv_irq_data *data)
msg.id = imp->ipmsgid;
msg.class = imp->iptrgcls;
if (imp->ipflags1 & IUCV_IPRMDATA) {
- memcpy(msg.rmmsg, imp->ln1msg1.iprmmsg1, 8);
+ memcpy(msg.rmmsg, &imp->rmmsg, 8);
msg.length = 8;
} else
- msg.length = imp->ln1msg2.ipbfln1f;
+ msg.length = imp->rmmsg.ln1msg2.ipbfln1f;
msg.reply_size = imp->ipbfln2f;
path->handler->message_pending(path, &msg);
}
diff --git a/net/kcm/kcmsock.c b/net/kcm/kcmsock.c
index 6201965bd822..11a715d76a4f 100644
--- a/net/kcm/kcmsock.c
+++ b/net/kcm/kcmsock.c
@@ -47,7 +47,7 @@ static inline struct kcm_tx_msg *kcm_tx_msg(struct sk_buff *skb)
static void report_csk_error(struct sock *csk, int err)
{
csk->sk_err = EPIPE;
- csk->sk_error_report(csk);
+ sk_error_report(csk);
}
static void kcm_abort_tx_psock(struct kcm_psock *psock, int err,
diff --git a/net/key/af_key.c b/net/key/af_key.c
index ef9b4ac03e7b..de24a7d474df 100644
--- a/net/key/af_key.c
+++ b/net/key/af_key.c
@@ -141,7 +141,6 @@ static int pfkey_create(struct net *net, struct socket *sock, int protocol,
struct netns_pfkey *net_pfkey = net_generic(net, pfkey_net_id);
struct sock *sk;
struct pfkey_sock *pfk;
- int err;
if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
return -EPERM;
@@ -150,10 +149,9 @@ static int pfkey_create(struct net *net, struct socket *sock, int protocol,
if (protocol != PF_KEY_V2)
return -EPROTONOSUPPORT;
- err = -ENOMEM;
sk = sk_alloc(net, PF_KEY, GFP_KERNEL, &key_proto, kern);
if (sk == NULL)
- goto out;
+ return -ENOMEM;
pfk = pfkey_sk(sk);
mutex_init(&pfk->dump_lock);
@@ -169,8 +167,6 @@ static int pfkey_create(struct net *net, struct socket *sock, int protocol,
pfkey_insert(sk);
return 0;
-out:
- return err;
}
static int pfkey_release(struct socket *sock)
diff --git a/net/l2tp/l2tp_ip.c b/net/l2tp/l2tp_ip.c
index 97ae1255fcb6..b3edafa5fba4 100644
--- a/net/l2tp/l2tp_ip.c
+++ b/net/l2tp/l2tp_ip.c
@@ -488,7 +488,7 @@ static int l2tp_ip_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
}
}
- /* We dont need to clone dst here, it is guaranteed to not disappear.
+ /* We don't need to clone dst here, it is guaranteed to not disappear.
* __dev_xmit_skb() might force a refcount if needed.
*/
skb_dst_set_noref(skb, &rt->dst);
@@ -635,7 +635,6 @@ static struct inet_protosw l2tp_ip_protosw = {
static struct net_protocol l2tp_ip_protocol __read_mostly = {
.handler = l2tp_ip_recv,
- .netns_ok = 1,
};
static int __init l2tp_ip_init(void)
diff --git a/net/l2tp/l2tp_ppp.c b/net/l2tp/l2tp_ppp.c
index aea85f91f059..bf35710127dd 100644
--- a/net/l2tp/l2tp_ppp.c
+++ b/net/l2tp/l2tp_ppp.c
@@ -226,7 +226,7 @@ static void pppol2tp_recv(struct l2tp_session *session, struct sk_buff *skb, int
/* If the first two bytes are 0xFF03, consider that it is the PPP's
* Address and Control fields and skip them. The L2TP module has always
* worked this way, although, in theory, the use of these fields should
- * be negociated and handled at the PPP layer. These fields are
+ * be negotiated and handled at the PPP layer. These fields are
* constant: 0xFF is the All-Stations Address and 0x03 the Unnumbered
* Information command with Poll/Final bit set to zero (RFC 1662).
*/
diff --git a/net/lapb/lapb_iface.c b/net/lapb/lapb_iface.c
index 1078e14f1acf..0971ca48ba15 100644
--- a/net/lapb/lapb_iface.c
+++ b/net/lapb/lapb_iface.c
@@ -80,11 +80,9 @@ static void __lapb_insert_cb(struct lapb_cb *lapb)
static struct lapb_cb *__lapb_devtostruct(struct net_device *dev)
{
- struct list_head *entry;
struct lapb_cb *lapb, *use = NULL;
- list_for_each(entry, &lapb_list) {
- lapb = list_entry(entry, struct lapb_cb, node);
+ list_for_each_entry(lapb, &lapb_list, node) {
if (lapb->dev == dev) {
use = lapb;
break;
diff --git a/net/llc/af_llc.c b/net/llc/af_llc.c
index 7180979114e4..3086f4a6ae68 100644
--- a/net/llc/af_llc.c
+++ b/net/llc/af_llc.c
@@ -98,8 +98,16 @@ static inline u8 llc_ui_header_len(struct sock *sk, struct sockaddr_llc *addr)
{
u8 rc = LLC_PDU_LEN_U;
- if (addr->sllc_test || addr->sllc_xid)
+ if (addr->sllc_test)
rc = LLC_PDU_LEN_U;
+ else if (addr->sllc_xid)
+ /* We need to expand header to sizeof(struct llc_xid_info)
+ * since llc_pdu_init_as_xid_cmd() sets 4,5,6 bytes of LLC header
+ * as XID PDU. In llc_ui_sendmsg() we reserved header size and then
+ * filled all other space with user data. If we won't reserve this
+ * bytes, llc_pdu_init_as_xid_cmd() will overwrite user data
+ */
+ rc = LLC_PDU_LEN_U_XID;
else if (sk->sk_type == SOCK_STREAM)
rc = LLC_PDU_LEN_I;
return rc;
@@ -216,8 +224,7 @@ static int llc_ui_release(struct socket *sock)
} else {
release_sock(sk);
}
- if (llc->dev)
- dev_put(llc->dev);
+ dev_put(llc->dev);
sock_put(sk);
llc_sk_free(sk);
out:
@@ -355,8 +362,7 @@ static int llc_ui_bind(struct socket *sock, struct sockaddr *uaddr, int addrlen)
} else
llc->dev = dev_getbyhwaddr_rcu(&init_net, addr->sllc_arphrd,
addr->sllc_mac);
- if (llc->dev)
- dev_hold(llc->dev);
+ dev_hold(llc->dev);
rcu_read_unlock();
if (!llc->dev)
goto out;
diff --git a/net/llc/llc_s_ac.c b/net/llc/llc_s_ac.c
index b554f26c68ee..79d1cef8f15a 100644
--- a/net/llc/llc_s_ac.c
+++ b/net/llc/llc_s_ac.c
@@ -79,7 +79,7 @@ int llc_sap_action_send_xid_c(struct llc_sap *sap, struct sk_buff *skb)
struct llc_sap_state_ev *ev = llc_sap_ev(skb);
int rc;
- llc_pdu_header_init(skb, LLC_PDU_TYPE_U, ev->saddr.lsap,
+ llc_pdu_header_init(skb, LLC_PDU_TYPE_U_XID, ev->saddr.lsap,
ev->daddr.lsap, LLC_PDU_CMD);
llc_pdu_init_as_xid_cmd(skb, LLC_XID_NULL_CLASS_2, 0);
rc = llc_mac_hdr_init(skb, ev->saddr.mac, ev->daddr.mac);
diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c
index 7a99892e5aba..d69b31c20fe2 100644
--- a/net/mac80211/cfg.c
+++ b/net/mac80211/cfg.c
@@ -152,6 +152,8 @@ static int ieee80211_change_iface(struct wiphy *wiphy,
struct vif_params *params)
{
struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev);
+ struct ieee80211_local *local = sdata->local;
+ struct sta_info *sta;
int ret;
ret = ieee80211_if_change_type(sdata, type);
@@ -162,7 +164,24 @@ static int ieee80211_change_iface(struct wiphy *wiphy,
RCU_INIT_POINTER(sdata->u.vlan.sta, NULL);
ieee80211_check_fast_rx_iface(sdata);
} else if (type == NL80211_IFTYPE_STATION && params->use_4addr >= 0) {
+ struct ieee80211_if_managed *ifmgd = &sdata->u.mgd;
+
+ if (params->use_4addr == ifmgd->use_4addr)
+ return 0;
+
sdata->u.mgd.use_4addr = params->use_4addr;
+ if (!ifmgd->associated)
+ return 0;
+
+ mutex_lock(&local->sta_mtx);
+ sta = sta_info_get(sdata, ifmgd->bssid);
+ if (sta)
+ drv_sta_set_4addr(local, sdata, &sta->sta,
+ params->use_4addr);
+ mutex_unlock(&local->sta_mtx);
+
+ if (params->use_4addr)
+ ieee80211_send_4addr_nullfunc(local, sdata);
}
if (sdata->vif.type == NL80211_IFTYPE_MONITOR) {
@@ -809,9 +828,11 @@ static int ieee80211_set_monitor_channel(struct wiphy *wiphy,
return ret;
}
-static int ieee80211_set_probe_resp(struct ieee80211_sub_if_data *sdata,
- const u8 *resp, size_t resp_len,
- const struct ieee80211_csa_settings *csa)
+static int
+ieee80211_set_probe_resp(struct ieee80211_sub_if_data *sdata,
+ const u8 *resp, size_t resp_len,
+ const struct ieee80211_csa_settings *csa,
+ const struct ieee80211_color_change_settings *cca)
{
struct probe_resp *new, *old;
@@ -831,6 +852,8 @@ static int ieee80211_set_probe_resp(struct ieee80211_sub_if_data *sdata,
memcpy(new->cntdwn_counter_offsets, csa->counter_offsets_presp,
csa->n_counter_offsets_presp *
sizeof(new->cntdwn_counter_offsets[0]));
+ else if (cca)
+ new->cntdwn_counter_offsets[0] = cca->counter_offset_presp;
rcu_assign_pointer(sdata->u.ap.probe_resp, new);
if (old)
@@ -936,7 +959,8 @@ static int ieee80211_set_ftm_responder_params(
static int ieee80211_assign_beacon(struct ieee80211_sub_if_data *sdata,
struct cfg80211_beacon_data *params,
- const struct ieee80211_csa_settings *csa)
+ const struct ieee80211_csa_settings *csa,
+ const struct ieee80211_color_change_settings *cca)
{
struct beacon_data *new, *old;
int new_head_len, new_tail_len;
@@ -985,6 +1009,9 @@ static int ieee80211_assign_beacon(struct ieee80211_sub_if_data *sdata,
memcpy(new->cntdwn_counter_offsets, csa->counter_offsets_beacon,
csa->n_counter_offsets_beacon *
sizeof(new->cntdwn_counter_offsets[0]));
+ } else if (cca) {
+ new->cntdwn_current_counter = cca->count;
+ new->cntdwn_counter_offsets[0] = cca->counter_offset_beacon;
}
/* copy in head */
@@ -1001,7 +1028,7 @@ static int ieee80211_assign_beacon(struct ieee80211_sub_if_data *sdata,
memcpy(new->tail, old->tail, new_tail_len);
err = ieee80211_set_probe_resp(sdata, params->probe_resp,
- params->probe_resp_len, csa);
+ params->probe_resp_len, csa, cca);
if (err < 0) {
kfree(new);
return err;
@@ -1156,7 +1183,7 @@ static int ieee80211_start_ap(struct wiphy *wiphy, struct net_device *dev,
if (ieee80211_hw_check(&local->hw, HAS_RATE_CONTROL))
sdata->vif.bss_conf.beacon_tx_rate = params->beacon_rate;
- err = ieee80211_assign_beacon(sdata, &params->beacon, NULL);
+ err = ieee80211_assign_beacon(sdata, &params->beacon, NULL, NULL);
if (err < 0)
goto error;
changed |= err;
@@ -1211,17 +1238,17 @@ static int ieee80211_change_beacon(struct wiphy *wiphy, struct net_device *dev,
sdata = IEEE80211_DEV_TO_SUB_IF(dev);
sdata_assert_lock(sdata);
- /* don't allow changing the beacon while CSA is in place - offset
+ /* don't allow changing the beacon while a countdown is in place - offset
* of channel switch counter may change
*/
- if (sdata->vif.csa_active)
+ if (sdata->vif.csa_active || sdata->vif.color_change_active)
return -EBUSY;
old = sdata_dereference(sdata->u.ap.beacon, sdata);
if (!old)
return -ENOENT;
- err = ieee80211_assign_beacon(sdata, params, NULL);
+ err = ieee80211_assign_beacon(sdata, params, NULL, NULL);
if (err < 0)
return err;
ieee80211_bss_info_change_notify(sdata, err);
@@ -1442,6 +1469,38 @@ static void sta_apply_mesh_params(struct ieee80211_local *local,
#endif
}
+static void sta_apply_airtime_params(struct ieee80211_local *local,
+ struct sta_info *sta,
+ struct station_parameters *params)
+{
+ u8 ac;
+
+ for (ac = 0; ac < IEEE80211_NUM_ACS; ac++) {
+ struct airtime_sched_info *air_sched = &local->airtime[ac];
+ struct airtime_info *air_info = &sta->airtime[ac];
+ struct txq_info *txqi;
+ u8 tid;
+
+ spin_lock_bh(&air_sched->lock);
+ for (tid = 0; tid < IEEE80211_NUM_TIDS + 1; tid++) {
+ if (air_info->weight == params->airtime_weight ||
+ !sta->sta.txq[tid] ||
+ ac != ieee80211_ac_from_tid(tid))
+ continue;
+
+ airtime_weight_set(air_info, params->airtime_weight);
+
+ txqi = to_txq_info(sta->sta.txq[tid]);
+ if (RB_EMPTY_NODE(&txqi->schedule_order))
+ continue;
+
+ ieee80211_update_airtime_weight(local, air_sched,
+ 0, true);
+ }
+ spin_unlock_bh(&air_sched->lock);
+ }
+}
+
static int sta_apply_parameters(struct ieee80211_local *local,
struct sta_info *sta,
struct station_parameters *params)
@@ -1629,7 +1688,8 @@ static int sta_apply_parameters(struct ieee80211_local *local,
sta_apply_mesh_params(local, sta, params);
if (params->airtime_weight)
- sta->airtime_weight = params->airtime_weight;
+ sta_apply_airtime_params(local, sta, params);
+
/* set the STA state after all sta info from usermode has been set */
if (test_sta_flag(sta, WLAN_STA_TDLS_PEER) ||
@@ -1693,15 +1753,7 @@ static int ieee80211_add_station(struct wiphy *wiphy, struct net_device *dev,
test_sta_flag(sta, WLAN_STA_ASSOC))
rate_control_rate_init(sta);
- err = sta_info_insert_rcu(sta);
- if (err) {
- rcu_read_unlock();
- return err;
- }
-
- rcu_read_unlock();
-
- return 0;
+ return sta_info_insert(sta);
}
static int ieee80211_del_station(struct wiphy *wiphy, struct net_device *dev,
@@ -3112,7 +3164,7 @@ static int ieee80211_set_after_csa_beacon(struct ieee80211_sub_if_data *sdata,
switch (sdata->vif.type) {
case NL80211_IFTYPE_AP:
err = ieee80211_assign_beacon(sdata, sdata->u.ap.next_beacon,
- NULL);
+ NULL, NULL);
kfree(sdata->u.ap.next_beacon);
sdata->u.ap.next_beacon = NULL;
@@ -3278,7 +3330,7 @@ static int ieee80211_set_csa_beacon(struct ieee80211_sub_if_data *sdata,
csa.n_counter_offsets_presp = params->n_counter_offsets_presp;
csa.count = params->count;
- err = ieee80211_assign_beacon(sdata, &params->beacon_csa, &csa);
+ err = ieee80211_assign_beacon(sdata, &params->beacon_csa, &csa, NULL);
if (err < 0) {
kfree(sdata->u.ap.next_beacon);
return err;
@@ -3367,6 +3419,15 @@ static int ieee80211_set_csa_beacon(struct ieee80211_sub_if_data *sdata,
return 0;
}
+static void ieee80211_color_change_abort(struct ieee80211_sub_if_data *sdata)
+{
+ sdata->vif.color_change_active = false;
+ kfree(sdata->u.ap.next_beacon);
+ sdata->u.ap.next_beacon = NULL;
+
+ cfg80211_color_change_aborted_notify(sdata->dev);
+}
+
static int
__ieee80211_channel_switch(struct wiphy *wiphy, struct net_device *dev,
struct cfg80211_csa_settings *params)
@@ -3435,6 +3496,10 @@ __ieee80211_channel_switch(struct wiphy *wiphy, struct net_device *dev,
goto out;
}
+ /* if there is a color change in progress, abort it */
+ if (sdata->vif.color_change_active)
+ ieee80211_color_change_abort(sdata);
+
err = ieee80211_set_csa_beacon(sdata, params, &changed);
if (err) {
ieee80211_vif_unreserve_chanctx(sdata);
@@ -4086,6 +4151,196 @@ static int ieee80211_set_sar_specs(struct wiphy *wiphy,
return local->ops->set_sar_specs(&local->hw, sar);
}
+static int
+ieee80211_set_after_color_change_beacon(struct ieee80211_sub_if_data *sdata,
+ u32 *changed)
+{
+ switch (sdata->vif.type) {
+ case NL80211_IFTYPE_AP: {
+ int ret;
+
+ ret = ieee80211_assign_beacon(sdata, sdata->u.ap.next_beacon,
+ NULL, NULL);
+ kfree(sdata->u.ap.next_beacon);
+ sdata->u.ap.next_beacon = NULL;
+
+ if (ret < 0)
+ return ret;
+
+ *changed |= ret;
+ break;
+ }
+ default:
+ WARN_ON_ONCE(1);
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+static int
+ieee80211_set_color_change_beacon(struct ieee80211_sub_if_data *sdata,
+ struct cfg80211_color_change_settings *params,
+ u32 *changed)
+{
+ struct ieee80211_color_change_settings color_change = {};
+ int err;
+
+ switch (sdata->vif.type) {
+ case NL80211_IFTYPE_AP:
+ sdata->u.ap.next_beacon =
+ cfg80211_beacon_dup(&params->beacon_next);
+ if (!sdata->u.ap.next_beacon)
+ return -ENOMEM;
+
+ if (params->count <= 1)
+ break;
+
+ color_change.counter_offset_beacon =
+ params->counter_offset_beacon;
+ color_change.counter_offset_presp =
+ params->counter_offset_presp;
+ color_change.count = params->count;
+
+ err = ieee80211_assign_beacon(sdata, &params->beacon_color_change,
+ NULL, &color_change);
+ if (err < 0) {
+ kfree(sdata->u.ap.next_beacon);
+ return err;
+ }
+ *changed |= err;
+ break;
+ default:
+ return -EOPNOTSUPP;
+ }
+
+ return 0;
+}
+
+static void
+ieee80211_color_change_bss_config_notify(struct ieee80211_sub_if_data *sdata,
+ u8 color, int enable, u32 changed)
+{
+ sdata->vif.bss_conf.he_bss_color.color = color;
+ sdata->vif.bss_conf.he_bss_color.enabled = enable;
+ changed |= BSS_CHANGED_HE_BSS_COLOR;
+
+ ieee80211_bss_info_change_notify(sdata, changed);
+}
+
+static int ieee80211_color_change_finalize(struct ieee80211_sub_if_data *sdata)
+{
+ struct ieee80211_local *local = sdata->local;
+ u32 changed = 0;
+ int err;
+
+ sdata_assert_lock(sdata);
+ lockdep_assert_held(&local->mtx);
+
+ sdata->vif.color_change_active = false;
+
+ err = ieee80211_set_after_color_change_beacon(sdata, &changed);
+ if (err) {
+ cfg80211_color_change_aborted_notify(sdata->dev);
+ return err;
+ }
+
+ ieee80211_color_change_bss_config_notify(sdata,
+ sdata->vif.color_change_color,
+ 1, changed);
+ cfg80211_color_change_notify(sdata->dev);
+
+ return 0;
+}
+
+void ieee80211_color_change_finalize_work(struct work_struct *work)
+{
+ struct ieee80211_sub_if_data *sdata =
+ container_of(work, struct ieee80211_sub_if_data,
+ color_change_finalize_work);
+ struct ieee80211_local *local = sdata->local;
+
+ sdata_lock(sdata);
+ mutex_lock(&local->mtx);
+
+ /* AP might have been stopped while waiting for the lock. */
+ if (!sdata->vif.color_change_active)
+ goto unlock;
+
+ if (!ieee80211_sdata_running(sdata))
+ goto unlock;
+
+ ieee80211_color_change_finalize(sdata);
+
+unlock:
+ mutex_unlock(&local->mtx);
+ sdata_unlock(sdata);
+}
+
+void ieee80211_color_change_finish(struct ieee80211_vif *vif)
+{
+ struct ieee80211_sub_if_data *sdata = vif_to_sdata(vif);
+
+ ieee80211_queue_work(&sdata->local->hw,
+ &sdata->color_change_finalize_work);
+}
+EXPORT_SYMBOL_GPL(ieee80211_color_change_finish);
+
+void
+ieeee80211_obss_color_collision_notify(struct ieee80211_vif *vif,
+ u64 color_bitmap)
+{
+ struct ieee80211_sub_if_data *sdata = vif_to_sdata(vif);
+
+ if (sdata->vif.color_change_active || sdata->vif.csa_active)
+ return;
+
+ cfg80211_obss_color_collision_notify(sdata->dev, color_bitmap);
+}
+EXPORT_SYMBOL_GPL(ieeee80211_obss_color_collision_notify);
+
+static int
+ieee80211_color_change(struct wiphy *wiphy, struct net_device *dev,
+ struct cfg80211_color_change_settings *params)
+{
+ struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev);
+ struct ieee80211_local *local = sdata->local;
+ u32 changed = 0;
+ int err;
+
+ sdata_assert_lock(sdata);
+
+ mutex_lock(&local->mtx);
+
+ /* don't allow another color change if one is already active or if csa
+ * is active
+ */
+ if (sdata->vif.color_change_active || sdata->vif.csa_active) {
+ err = -EBUSY;
+ goto out;
+ }
+
+ err = ieee80211_set_color_change_beacon(sdata, params, &changed);
+ if (err)
+ goto out;
+
+ sdata->vif.color_change_active = true;
+ sdata->vif.color_change_color = params->color;
+
+ cfg80211_color_change_started_notify(sdata->dev, params->count);
+
+ if (changed)
+ ieee80211_color_change_bss_config_notify(sdata, 0, 0, changed);
+ else
+ /* if the beacon didn't change, we can finalize immediately */
+ ieee80211_color_change_finalize(sdata);
+
+out:
+ mutex_unlock(&local->mtx);
+
+ return err;
+}
+
const struct cfg80211_ops mac80211_config_ops = {
.add_virtual_intf = ieee80211_add_iface,
.del_virtual_intf = ieee80211_del_iface,
@@ -4189,4 +4444,5 @@ const struct cfg80211_ops mac80211_config_ops = {
.set_tid_config = ieee80211_set_tid_config,
.reset_tid_config = ieee80211_reset_tid_config,
.set_sar_specs = ieee80211_set_sar_specs,
+ .color_change = ieee80211_color_change,
};
diff --git a/net/mac80211/chan.c b/net/mac80211/chan.c
index 907bb1f748a1..76fc36a68750 100644
--- a/net/mac80211/chan.c
+++ b/net/mac80211/chan.c
@@ -1,6 +1,7 @@
// SPDX-License-Identifier: GPL-2.0-only
/*
* mac80211 - channel management
+ * Copyright 2020 - 2021 Intel Corporation
*/
#include <linux/nl80211.h>
@@ -308,8 +309,8 @@ ieee80211_get_chanctx_max_required_bw(struct ieee80211_local *local,
* the max of min required widths of all the interfaces bound to this
* channel context.
*/
-void ieee80211_recalc_chanctx_min_def(struct ieee80211_local *local,
- struct ieee80211_chanctx *ctx)
+static u32 _ieee80211_recalc_chanctx_min_def(struct ieee80211_local *local,
+ struct ieee80211_chanctx *ctx)
{
enum nl80211_chan_width max_bw;
struct cfg80211_chan_def min_def;
@@ -326,7 +327,7 @@ void ieee80211_recalc_chanctx_min_def(struct ieee80211_local *local,
ctx->conf.def.width == NL80211_CHAN_WIDTH_16 ||
ctx->conf.radar_enabled) {
ctx->conf.min_def = ctx->conf.def;
- return;
+ return 0;
}
max_bw = ieee80211_get_chanctx_max_required_bw(local, &ctx->conf);
@@ -337,17 +338,21 @@ void ieee80211_recalc_chanctx_min_def(struct ieee80211_local *local,
ieee80211_chandef_downgrade(&min_def);
if (cfg80211_chandef_identical(&ctx->conf.min_def, &min_def))
- return;
+ return 0;
ctx->conf.min_def = min_def;
if (!ctx->driver_present)
- return;
+ return 0;
- drv_change_chanctx(local, ctx, IEEE80211_CHANCTX_CHANGE_MIN_WIDTH);
+ return IEEE80211_CHANCTX_CHANGE_MIN_WIDTH;
}
+/* calling this function is assuming that station vif is updated to
+ * lates changes by calling ieee80211_vif_update_chandef
+ */
static void ieee80211_chan_bw_change(struct ieee80211_local *local,
- struct ieee80211_chanctx *ctx)
+ struct ieee80211_chanctx *ctx,
+ bool narrowed)
{
struct sta_info *sta;
struct ieee80211_supported_band *sband =
@@ -366,9 +371,16 @@ static void ieee80211_chan_bw_change(struct ieee80211_local *local,
continue;
new_sta_bw = ieee80211_sta_cur_vht_bw(sta);
+
+ /* nothing change */
if (new_sta_bw == sta->sta.bandwidth)
continue;
+ /* vif changed to narrow BW and narrow BW for station wasn't
+ * requested or vise versa */
+ if ((new_sta_bw < sta->sta.bandwidth) == !narrowed)
+ continue;
+
sta->sta.bandwidth = new_sta_bw;
rate_control_rate_update(local, sband, sta,
IEEE80211_RC_BW_CHANGED);
@@ -376,21 +388,34 @@ static void ieee80211_chan_bw_change(struct ieee80211_local *local,
rcu_read_unlock();
}
-static void ieee80211_change_chanctx(struct ieee80211_local *local,
- struct ieee80211_chanctx *ctx,
- const struct cfg80211_chan_def *chandef)
+/*
+ * recalc the min required chan width of the channel context, which is
+ * the max of min required widths of all the interfaces bound to this
+ * channel context.
+ */
+void ieee80211_recalc_chanctx_min_def(struct ieee80211_local *local,
+ struct ieee80211_chanctx *ctx)
{
- enum nl80211_chan_width width;
+ u32 changed = _ieee80211_recalc_chanctx_min_def(local, ctx);
- if (cfg80211_chandef_identical(&ctx->conf.def, chandef)) {
- ieee80211_recalc_chanctx_min_def(local, ctx);
+ if (!changed)
return;
- }
- WARN_ON(!cfg80211_chandef_compatible(&ctx->conf.def, chandef));
+ /* check is BW narrowed */
+ ieee80211_chan_bw_change(local, ctx, true);
- width = ctx->conf.def.width;
- ctx->conf.def = *chandef;
+ drv_change_chanctx(local, ctx, changed);
+
+ /* check is BW wider */
+ ieee80211_chan_bw_change(local, ctx, false);
+}
+
+static void ieee80211_change_chanctx(struct ieee80211_local *local,
+ struct ieee80211_chanctx *ctx,
+ struct ieee80211_chanctx *old_ctx,
+ const struct cfg80211_chan_def *chandef)
+{
+ u32 changed;
/* expected to handle only 20/40/80/160 channel widths */
switch (chandef->width) {
@@ -405,19 +430,33 @@ static void ieee80211_change_chanctx(struct ieee80211_local *local,
WARN_ON(1);
}
- if (chandef->width < width)
- ieee80211_chan_bw_change(local, ctx);
+ /* Check maybe BW narrowed - we do this _before_ calling recalc_chanctx_min_def
+ * due to maybe not returning from it, e.g in case new context was added
+ * first time with all parameters up to date.
+ */
+ ieee80211_chan_bw_change(local, old_ctx, true);
+
+ if (cfg80211_chandef_identical(&ctx->conf.def, chandef)) {
+ ieee80211_recalc_chanctx_min_def(local, ctx);
+ return;
+ }
- drv_change_chanctx(local, ctx, IEEE80211_CHANCTX_CHANGE_WIDTH);
- ieee80211_recalc_chanctx_min_def(local, ctx);
+ WARN_ON(!cfg80211_chandef_compatible(&ctx->conf.def, chandef));
+
+ ctx->conf.def = *chandef;
+
+ /* check if min chanctx also changed */
+ changed = IEEE80211_CHANCTX_CHANGE_WIDTH |
+ _ieee80211_recalc_chanctx_min_def(local, ctx);
+ drv_change_chanctx(local, ctx, changed);
if (!local->use_chanctx) {
local->_oper_chandef = *chandef;
ieee80211_hw_config(local, 0);
}
- if (chandef->width > width)
- ieee80211_chan_bw_change(local, ctx);
+ /* check is BW wider */
+ ieee80211_chan_bw_change(local, old_ctx, false);
}
static struct ieee80211_chanctx *
@@ -450,7 +489,7 @@ ieee80211_find_chanctx(struct ieee80211_local *local,
if (!compat)
continue;
- ieee80211_change_chanctx(local, ctx, compat);
+ ieee80211_change_chanctx(local, ctx, ctx, compat);
return ctx;
}
@@ -679,7 +718,7 @@ void ieee80211_recalc_chanctx_chantype(struct ieee80211_local *local,
if (!compat)
return;
- ieee80211_change_chanctx(local, ctx, compat);
+ ieee80211_change_chanctx(local, ctx, ctx, compat);
}
static void ieee80211_recalc_radar_chanctx(struct ieee80211_local *local,
@@ -1107,13 +1146,12 @@ ieee80211_vif_use_reserved_reassign(struct ieee80211_sub_if_data *sdata)
if (WARN_ON(!chandef))
return -EINVAL;
- if (old_ctx->conf.def.width > new_ctx->conf.def.width)
- ieee80211_chan_bw_change(local, new_ctx);
+ if (sdata->vif.bss_conf.chandef.width != sdata->reserved_chandef.width)
+ changed = BSS_CHANGED_BANDWIDTH;
- ieee80211_change_chanctx(local, new_ctx, chandef);
+ ieee80211_vif_update_chandef(sdata, &sdata->reserved_chandef);
- if (old_ctx->conf.def.width < new_ctx->conf.def.width)
- ieee80211_chan_bw_change(local, new_ctx);
+ ieee80211_change_chanctx(local, new_ctx, old_ctx, chandef);
vif_chsw[0].vif = &sdata->vif;
vif_chsw[0].old_ctx = &old_ctx->conf;
@@ -1142,14 +1180,9 @@ ieee80211_vif_use_reserved_reassign(struct ieee80211_sub_if_data *sdata)
if (ieee80211_chanctx_refcount(local, old_ctx) == 0)
ieee80211_free_chanctx(local, old_ctx);
- if (sdata->vif.bss_conf.chandef.width != sdata->reserved_chandef.width)
- changed = BSS_CHANGED_BANDWIDTH;
-
- ieee80211_vif_update_chandef(sdata, &sdata->reserved_chandef);
-
+ ieee80211_recalc_chanctx_min_def(local, new_ctx);
ieee80211_recalc_smps_chanctx(local, new_ctx);
ieee80211_recalc_radar_chanctx(local, new_ctx);
- ieee80211_recalc_chanctx_min_def(local, new_ctx);
if (changed)
ieee80211_bss_info_change_notify(sdata, changed);
@@ -1188,7 +1221,7 @@ ieee80211_vif_use_reserved_assign(struct ieee80211_sub_if_data *sdata)
if (WARN_ON(!chandef))
return -EINVAL;
- ieee80211_change_chanctx(local, new_ctx, chandef);
+ ieee80211_change_chanctx(local, new_ctx, new_ctx, chandef);
list_del(&sdata->reserved_chanctx_list);
sdata->reserved_chanctx = NULL;
@@ -1505,7 +1538,6 @@ static int ieee80211_vif_use_reserved_switch(struct ieee80211_local *local)
ieee80211_recalc_smps_chanctx(local, ctx);
ieee80211_recalc_radar_chanctx(local, ctx);
ieee80211_recalc_chanctx_min_def(local, ctx);
- ieee80211_chan_bw_change(local, ctx);
list_for_each_entry_safe(sdata, sdata_tmp, &ctx->reserved_vifs,
reserved_chanctx_list) {
diff --git a/net/mac80211/debugfs.c b/net/mac80211/debugfs.c
index fc34ae2b604c..8dbfe325ee66 100644
--- a/net/mac80211/debugfs.c
+++ b/net/mac80211/debugfs.c
@@ -216,14 +216,14 @@ static ssize_t aql_txq_limit_read(struct file *file,
"VI %u %u\n"
"BE %u %u\n"
"BK %u %u\n",
- local->aql_txq_limit_low[IEEE80211_AC_VO],
- local->aql_txq_limit_high[IEEE80211_AC_VO],
- local->aql_txq_limit_low[IEEE80211_AC_VI],
- local->aql_txq_limit_high[IEEE80211_AC_VI],
- local->aql_txq_limit_low[IEEE80211_AC_BE],
- local->aql_txq_limit_high[IEEE80211_AC_BE],
- local->aql_txq_limit_low[IEEE80211_AC_BK],
- local->aql_txq_limit_high[IEEE80211_AC_BK]);
+ local->airtime[IEEE80211_AC_VO].aql_txq_limit_low,
+ local->airtime[IEEE80211_AC_VO].aql_txq_limit_high,
+ local->airtime[IEEE80211_AC_VI].aql_txq_limit_low,
+ local->airtime[IEEE80211_AC_VI].aql_txq_limit_high,
+ local->airtime[IEEE80211_AC_BE].aql_txq_limit_low,
+ local->airtime[IEEE80211_AC_BE].aql_txq_limit_high,
+ local->airtime[IEEE80211_AC_BK].aql_txq_limit_low,
+ local->airtime[IEEE80211_AC_BK].aql_txq_limit_high);
return simple_read_from_buffer(user_buf, count, ppos,
buf, len);
}
@@ -255,11 +255,11 @@ static ssize_t aql_txq_limit_write(struct file *file,
if (ac >= IEEE80211_NUM_ACS)
return -EINVAL;
- q_limit_low_old = local->aql_txq_limit_low[ac];
- q_limit_high_old = local->aql_txq_limit_high[ac];
+ q_limit_low_old = local->airtime[ac].aql_txq_limit_low;
+ q_limit_high_old = local->airtime[ac].aql_txq_limit_high;
- local->aql_txq_limit_low[ac] = q_limit_low;
- local->aql_txq_limit_high[ac] = q_limit_high;
+ local->airtime[ac].aql_txq_limit_low = q_limit_low;
+ local->airtime[ac].aql_txq_limit_high = q_limit_high;
mutex_lock(&local->sta_mtx);
list_for_each_entry(sta, &local->sta_list, list) {
@@ -382,6 +382,46 @@ static const struct file_operations force_tx_status_ops = {
.llseek = default_llseek,
};
+static ssize_t airtime_read(struct file *file,
+ char __user *user_buf,
+ size_t count,
+ loff_t *ppos)
+{
+ struct ieee80211_local *local = file->private_data;
+ char buf[200];
+ u64 v_t[IEEE80211_NUM_ACS];
+ u64 wt[IEEE80211_NUM_ACS];
+ int len = 0, ac;
+
+ for (ac = 0; ac < IEEE80211_NUM_ACS; ac++) {
+ spin_lock_bh(&local->airtime[ac].lock);
+ v_t[ac] = local->airtime[ac].v_t;
+ wt[ac] = local->airtime[ac].weight_sum;
+ spin_unlock_bh(&local->airtime[ac].lock);
+ }
+ len = scnprintf(buf, sizeof(buf),
+ "\tVO VI BE BK\n"
+ "Virt-t\t%-10llu %-10llu %-10llu %-10llu\n"
+ "Weight\t%-10llu %-10llu %-10llu %-10llu\n",
+ v_t[0],
+ v_t[1],
+ v_t[2],
+ v_t[3],
+ wt[0],
+ wt[1],
+ wt[2],
+ wt[3]);
+
+ return simple_read_from_buffer(user_buf, count, ppos,
+ buf, len);
+}
+
+static const struct file_operations airtime_ops = {
+ .read = airtime_read,
+ .open = simple_open,
+ .llseek = default_llseek,
+};
+
#ifdef CONFIG_PM
static ssize_t reset_write(struct file *file, const char __user *user_buf,
size_t count, loff_t *ppos)
@@ -632,7 +672,11 @@ void debugfs_hw_add(struct ieee80211_local *local)
if (local->ops->wake_tx_queue)
DEBUGFS_ADD_MODE(aqm, 0600);
- DEBUGFS_ADD_MODE(airtime_flags, 0600);
+ if (wiphy_ext_feature_isset(local->hw.wiphy,
+ NL80211_EXT_FEATURE_AIRTIME_FAIRNESS)) {
+ DEBUGFS_ADD_MODE(airtime, 0600);
+ DEBUGFS_ADD_MODE(airtime_flags, 0600);
+ }
DEBUGFS_ADD(aql_txq_limit);
debugfs_create_u32("aql_threshold", 0600,
diff --git a/net/mac80211/debugfs_netdev.c b/net/mac80211/debugfs_netdev.c
index 0ad3860852ff..db724fc10a5f 100644
--- a/net/mac80211/debugfs_netdev.c
+++ b/net/mac80211/debugfs_netdev.c
@@ -57,7 +57,6 @@ static ssize_t ieee80211_if_write(
return -EFAULT;
buf[count] = '\0';
- ret = -ENODEV;
rtnl_lock();
ret = (*write)(sdata, buf, count);
rtnl_unlock();
@@ -513,6 +512,34 @@ static ssize_t ieee80211_if_fmt_aqm(
}
IEEE80211_IF_FILE_R(aqm);
+static ssize_t ieee80211_if_fmt_airtime(
+ const struct ieee80211_sub_if_data *sdata, char *buf, int buflen)
+{
+ struct ieee80211_local *local = sdata->local;
+ struct ieee80211_txq *txq = sdata->vif.txq;
+ struct airtime_info *air_info;
+ int len;
+
+ if (!txq)
+ return 0;
+
+ spin_lock_bh(&local->airtime[txq->ac].lock);
+ air_info = to_airtime_info(txq);
+ len = scnprintf(buf,
+ buflen,
+ "RX: %llu us\nTX: %llu us\nWeight: %u\n"
+ "Virt-T: %lld us\n",
+ air_info->rx_airtime,
+ air_info->tx_airtime,
+ air_info->weight,
+ air_info->v_t);
+ spin_unlock_bh(&local->airtime[txq->ac].lock);
+
+ return len;
+}
+
+IEEE80211_IF_FILE_R(airtime);
+
IEEE80211_IF_FILE(multicast_to_unicast, u.ap.multicast_to_unicast, HEX);
/* IBSS attributes */
@@ -658,8 +685,10 @@ static void add_common_files(struct ieee80211_sub_if_data *sdata)
if (sdata->local->ops->wake_tx_queue &&
sdata->vif.type != NL80211_IFTYPE_P2P_DEVICE &&
- sdata->vif.type != NL80211_IFTYPE_NAN)
+ sdata->vif.type != NL80211_IFTYPE_NAN) {
DEBUGFS_ADD(aqm);
+ DEBUGFS_ADD(airtime);
+ }
}
static void add_sta_files(struct ieee80211_sub_if_data *sdata)
diff --git a/net/mac80211/debugfs_sta.c b/net/mac80211/debugfs_sta.c
index 936c9dfa86c8..8be28cfd6f64 100644
--- a/net/mac80211/debugfs_sta.c
+++ b/net/mac80211/debugfs_sta.c
@@ -202,7 +202,7 @@ static ssize_t sta_airtime_read(struct file *file, char __user *userbuf,
size_t bufsz = 400;
char *buf = kzalloc(bufsz, GFP_KERNEL), *p = buf;
u64 rx_airtime = 0, tx_airtime = 0;
- s64 deficit[IEEE80211_NUM_ACS];
+ u64 v_t[IEEE80211_NUM_ACS];
ssize_t rv;
int ac;
@@ -210,18 +210,18 @@ static ssize_t sta_airtime_read(struct file *file, char __user *userbuf,
return -ENOMEM;
for (ac = 0; ac < IEEE80211_NUM_ACS; ac++) {
- spin_lock_bh(&local->active_txq_lock[ac]);
+ spin_lock_bh(&local->airtime[ac].lock);
rx_airtime += sta->airtime[ac].rx_airtime;
tx_airtime += sta->airtime[ac].tx_airtime;
- deficit[ac] = sta->airtime[ac].deficit;
- spin_unlock_bh(&local->active_txq_lock[ac]);
+ v_t[ac] = sta->airtime[ac].v_t;
+ spin_unlock_bh(&local->airtime[ac].lock);
}
p += scnprintf(p, bufsz + buf - p,
"RX: %llu us\nTX: %llu us\nWeight: %u\n"
- "Deficit: VO: %lld us VI: %lld us BE: %lld us BK: %lld us\n",
- rx_airtime, tx_airtime, sta->airtime_weight,
- deficit[0], deficit[1], deficit[2], deficit[3]);
+ "Virt-T: VO: %lld us VI: %lld us BE: %lld us BK: %lld us\n",
+ rx_airtime, tx_airtime, sta->airtime[0].weight,
+ v_t[0], v_t[1], v_t[2], v_t[3]);
rv = simple_read_from_buffer(userbuf, count, ppos, buf, p - buf);
kfree(buf);
@@ -236,11 +236,11 @@ static ssize_t sta_airtime_write(struct file *file, const char __user *userbuf,
int ac;
for (ac = 0; ac < IEEE80211_NUM_ACS; ac++) {
- spin_lock_bh(&local->active_txq_lock[ac]);
+ spin_lock_bh(&local->airtime[ac].lock);
sta->airtime[ac].rx_airtime = 0;
sta->airtime[ac].tx_airtime = 0;
- sta->airtime[ac].deficit = sta->airtime_weight;
- spin_unlock_bh(&local->active_txq_lock[ac]);
+ sta->airtime[ac].v_t = 0;
+ spin_unlock_bh(&local->airtime[ac].lock);
}
return count;
@@ -263,10 +263,10 @@ static ssize_t sta_aql_read(struct file *file, char __user *userbuf,
return -ENOMEM;
for (ac = 0; ac < IEEE80211_NUM_ACS; ac++) {
- spin_lock_bh(&local->active_txq_lock[ac]);
+ spin_lock_bh(&local->airtime[ac].lock);
q_limit_l[ac] = sta->airtime[ac].aql_limit_low;
q_limit_h[ac] = sta->airtime[ac].aql_limit_high;
- spin_unlock_bh(&local->active_txq_lock[ac]);
+ spin_unlock_bh(&local->airtime[ac].lock);
q_depth[ac] = atomic_read(&sta->airtime[ac].aql_tx_pending);
}
diff --git a/net/mac80211/driver-ops.h b/net/mac80211/driver-ops.h
index 604ca59937f0..cd3731cbf6c6 100644
--- a/net/mac80211/driver-ops.h
+++ b/net/mac80211/driver-ops.h
@@ -2,7 +2,7 @@
/*
* Portions of this file
* Copyright(c) 2016 Intel Deutschland GmbH
-* Copyright (C) 2018 - 2019 Intel Corporation
+* Copyright (C) 2018 - 2019, 2021 Intel Corporation
*/
#ifndef __MAC80211_DRIVER_OPS
@@ -821,7 +821,7 @@ drv_allow_buffered_frames(struct ieee80211_local *local,
static inline void drv_mgd_prepare_tx(struct ieee80211_local *local,
struct ieee80211_sub_if_data *sdata,
- u16 duration)
+ struct ieee80211_prep_tx_info *info)
{
might_sleep();
@@ -829,9 +829,27 @@ static inline void drv_mgd_prepare_tx(struct ieee80211_local *local,
return;
WARN_ON_ONCE(sdata->vif.type != NL80211_IFTYPE_STATION);
- trace_drv_mgd_prepare_tx(local, sdata, duration);
+ trace_drv_mgd_prepare_tx(local, sdata, info->duration,
+ info->subtype, info->success);
if (local->ops->mgd_prepare_tx)
- local->ops->mgd_prepare_tx(&local->hw, &sdata->vif, duration);
+ local->ops->mgd_prepare_tx(&local->hw, &sdata->vif, info);
+ trace_drv_return_void(local);
+}
+
+static inline void drv_mgd_complete_tx(struct ieee80211_local *local,
+ struct ieee80211_sub_if_data *sdata,
+ struct ieee80211_prep_tx_info *info)
+{
+ might_sleep();
+
+ if (!check_sdata_in_driver(sdata))
+ return;
+ WARN_ON_ONCE(sdata->vif.type != NL80211_IFTYPE_STATION);
+
+ trace_drv_mgd_complete_tx(local, sdata, info->duration,
+ info->subtype, info->success);
+ if (local->ops->mgd_complete_tx)
+ local->ops->mgd_complete_tx(&local->hw, &sdata->vif, info);
trace_drv_return_void(local);
}
@@ -1429,4 +1447,40 @@ static inline void drv_sta_set_decap_offload(struct ieee80211_local *local,
trace_drv_return_void(local);
}
+static inline void drv_add_twt_setup(struct ieee80211_local *local,
+ struct ieee80211_sub_if_data *sdata,
+ struct ieee80211_sta *sta,
+ struct ieee80211_twt_setup *twt)
+{
+ struct ieee80211_twt_params *twt_agrt;
+
+ might_sleep();
+
+ if (!check_sdata_in_driver(sdata))
+ return;
+
+ twt_agrt = (void *)twt->params;
+
+ trace_drv_add_twt_setup(local, sta, twt, twt_agrt);
+ local->ops->add_twt_setup(&local->hw, sta, twt);
+ trace_drv_return_void(local);
+}
+
+static inline void drv_twt_teardown_request(struct ieee80211_local *local,
+ struct ieee80211_sub_if_data *sdata,
+ struct ieee80211_sta *sta,
+ u8 flowid)
+{
+ might_sleep();
+ if (!check_sdata_in_driver(sdata))
+ return;
+
+ if (!local->ops->twt_teardown_request)
+ return;
+
+ trace_drv_twt_teardown_request(local, sta, flowid);
+ local->ops->twt_teardown_request(&local->hw, sta, flowid);
+ trace_drv_return_void(local);
+}
+
#endif /* __MAC80211_DRIVER_OPS */
diff --git a/net/mac80211/he.c b/net/mac80211/he.c
index 0c0b970835ce..c05af7018f79 100644
--- a/net/mac80211/he.c
+++ b/net/mac80211/he.c
@@ -111,7 +111,7 @@ ieee80211_he_cap_ie_to_sta_he_cap(struct ieee80211_sub_if_data *sdata,
struct sta_info *sta)
{
struct ieee80211_sta_he_cap *he_cap = &sta->sta.he_cap;
- struct ieee80211_sta_he_cap own_he_cap = sband->iftype_data->he_cap;
+ struct ieee80211_sta_he_cap own_he_cap;
struct ieee80211_he_cap_elem *he_cap_ie_elem = (void *)he_cap_ie;
u8 he_ppe_size;
u8 mcs_nss_size;
@@ -120,9 +120,13 @@ ieee80211_he_cap_ie_to_sta_he_cap(struct ieee80211_sub_if_data *sdata,
memset(he_cap, 0, sizeof(*he_cap));
- if (!he_cap_ie || !ieee80211_get_he_sta_cap(sband))
+ if (!he_cap_ie ||
+ !ieee80211_get_he_iftype_cap(sband,
+ ieee80211_vif_type_p2p(&sdata->vif)))
return;
+ own_he_cap = sband->iftype_data->he_cap;
+
/* Make sure size is OK */
mcs_nss_size = ieee80211_he_mcs_nss_size(he_cap_ie_elem);
he_ppe_size =
diff --git a/net/mac80211/ht.c b/net/mac80211/ht.c
index 3d62a80b5790..2eb7641f5556 100644
--- a/net/mac80211/ht.c
+++ b/net/mac80211/ht.c
@@ -9,7 +9,7 @@
* Copyright 2007, Michael Wu <flamingice@sourmilk.net>
* Copyright 2007-2010, Intel Corporation
* Copyright 2017 Intel Deutschland GmbH
- * Copyright(c) 2020 Intel Corporation
+ * Copyright(c) 2020-2021 Intel Corporation
*/
#include <linux/ieee80211.h>
@@ -555,17 +555,15 @@ void ieee80211_request_smps(struct ieee80211_vif *vif,
{
struct ieee80211_sub_if_data *sdata = vif_to_sdata(vif);
- if (WARN_ON_ONCE(vif->type != NL80211_IFTYPE_STATION &&
- vif->type != NL80211_IFTYPE_AP))
+ if (WARN_ON_ONCE(vif->type != NL80211_IFTYPE_STATION))
return;
- if (vif->type == NL80211_IFTYPE_STATION) {
- if (sdata->u.mgd.driver_smps_mode == smps_mode)
- return;
- sdata->u.mgd.driver_smps_mode = smps_mode;
- ieee80211_queue_work(&sdata->local->hw,
- &sdata->u.mgd.request_smps_work);
- }
+ if (sdata->u.mgd.driver_smps_mode == smps_mode)
+ return;
+
+ sdata->u.mgd.driver_smps_mode = smps_mode;
+ ieee80211_queue_work(&sdata->local->hw,
+ &sdata->u.mgd.request_smps_work);
}
/* this might change ... don't want non-open drivers using it */
EXPORT_SYMBOL_GPL(ieee80211_request_smps);
diff --git a/net/mac80211/ibss.c b/net/mac80211/ibss.c
index a7ac53a2f00d..5d6ca4c3e698 100644
--- a/net/mac80211/ibss.c
+++ b/net/mac80211/ibss.c
@@ -489,7 +489,6 @@ int ieee80211_ibss_csa_beacon(struct ieee80211_sub_if_data *sdata,
const struct cfg80211_bss_ies *ies;
u16 capability = WLAN_CAPABILITY_IBSS;
u64 tsf;
- int ret = 0;
sdata_assert_lock(sdata);
@@ -501,10 +500,8 @@ int ieee80211_ibss_csa_beacon(struct ieee80211_sub_if_data *sdata,
ifibss->ssid_len, IEEE80211_BSS_TYPE_IBSS,
IEEE80211_PRIVACY(ifibss->privacy));
- if (WARN_ON(!cbss)) {
- ret = -EINVAL;
- goto out;
- }
+ if (WARN_ON(!cbss))
+ return -EINVAL;
rcu_read_lock();
ies = rcu_dereference(cbss->ies);
@@ -520,18 +517,14 @@ int ieee80211_ibss_csa_beacon(struct ieee80211_sub_if_data *sdata,
sdata->vif.bss_conf.basic_rates,
capability, tsf, &ifibss->chandef,
NULL, csa_settings);
- if (!presp) {
- ret = -ENOMEM;
- goto out;
- }
+ if (!presp)
+ return -ENOMEM;
rcu_assign_pointer(ifibss->presp, presp);
if (old_presp)
kfree_rcu(old_presp, rcu_head);
return BSS_CHANGED_BEACON;
- out:
- return ret;
}
int ieee80211_ibss_finish_csa(struct ieee80211_sub_if_data *sdata)
diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h
index 648696b49f89..159af6c3ffb0 100644
--- a/net/mac80211/ieee80211_i.h
+++ b/net/mac80211/ieee80211_i.h
@@ -5,7 +5,7 @@
* Copyright 2006-2007 Jiri Benc <jbenc@suse.cz>
* Copyright 2007-2010 Johannes Berg <johannes@sipsolutions.net>
* Copyright 2013-2015 Intel Mobile Communications GmbH
- * Copyright (C) 2018-2020 Intel Corporation
+ * Copyright (C) 2018-2021 Intel Corporation
*/
#ifndef IEEE80211_I_H
@@ -25,6 +25,7 @@
#include <linux/leds.h>
#include <linux/idr.h>
#include <linux/rhashtable.h>
+#include <linux/rbtree.h>
#include <net/ieee80211_radiotap.h>
#include <net/cfg80211.h>
#include <net/mac80211.h>
@@ -244,6 +245,12 @@ struct ieee80211_csa_settings {
u8 count;
};
+struct ieee80211_color_change_settings {
+ u16 counter_offset_beacon;
+ u16 counter_offset_presp;
+ u8 count;
+};
+
struct beacon_data {
u8 *head, *tail;
int head_len, tail_len;
@@ -831,17 +838,16 @@ enum txq_info_flags {
* @def_flow: used as a fallback flow when a packet destined to @tin hashes to
* a fq_flow which is already owned by a different tin
* @def_cvars: codel vars for @def_flow
- * @frags: used to keep fragments created after dequeue
* @schedule_order: used with ieee80211_local->active_txqs
- * @schedule_round: counter to prevent infinite loops on TXQ scheduling
+ * @frags: used to keep fragments created after dequeue
*/
struct txq_info {
struct fq_tin tin;
struct codel_vars def_cvars;
struct codel_stats cstats;
+ struct rb_node schedule_order;
+
struct sk_buff_head frags;
- struct list_head schedule_order;
- u16 schedule_round;
unsigned long flags;
/* keep last! */
@@ -918,10 +924,14 @@ struct ieee80211_sub_if_data {
struct ieee80211_tx_queue_params tx_conf[IEEE80211_NUM_ACS];
struct mac80211_qos_map __rcu *qos_map;
+ struct airtime_info airtime[IEEE80211_NUM_ACS];
+
struct work_struct csa_finalize_work;
bool csa_block_tx; /* write-protected by sdata_lock and local->mtx */
struct cfg80211_chan_def csa_chandef;
+ struct work_struct color_change_finalize_work;
+
struct list_head assigned_chanctx_list; /* protected by chanctx_mtx */
struct list_head reserved_chanctx_list; /* protected by chanctx_mtx */
@@ -936,6 +946,7 @@ struct ieee80211_sub_if_data {
struct work_struct work;
struct sk_buff_head skb_queue;
+ struct sk_buff_head status_queue;
u8 needed_rx_chains;
enum ieee80211_smps_mode smps_mode;
@@ -1130,6 +1141,44 @@ enum mac80211_scan_state {
SCAN_ABORT,
};
+/**
+ * struct airtime_sched_info - state used for airtime scheduling and AQL
+ *
+ * @lock: spinlock that protects all the fields in this struct
+ * @active_txqs: rbtree of currently backlogged queues, sorted by virtual time
+ * @schedule_pos: the current position maintained while a driver walks the tree
+ * with ieee80211_next_txq()
+ * @active_list: list of struct airtime_info structs that were active within
+ * the last AIRTIME_ACTIVE_DURATION (100 ms), used to compute
+ * weight_sum
+ * @last_weight_update: used for rate limiting walking active_list
+ * @last_schedule_time: tracks the last time a transmission was scheduled; used
+ * for catching up v_t if no stations are eligible for
+ * transmission.
+ * @v_t: global virtual time; queues with v_t < this are eligible for
+ * transmission
+ * @weight_sum: total sum of all active stations used for dividing airtime
+ * @weight_sum_reciprocal: reciprocal of weight_sum (to avoid divisions in fast
+ * path - see comment above
+ * IEEE80211_RECIPROCAL_DIVISOR_64)
+ * @aql_txq_limit_low: AQL limit when total outstanding airtime
+ * is < IEEE80211_AQL_THRESHOLD
+ * @aql_txq_limit_high: AQL limit when total outstanding airtime
+ * is > IEEE80211_AQL_THRESHOLD
+ */
+struct airtime_sched_info {
+ spinlock_t lock;
+ struct rb_root_cached active_txqs;
+ struct rb_node *schedule_pos;
+ struct list_head active_list;
+ u64 last_weight_update;
+ u64 last_schedule_activity;
+ u64 v_t;
+ u64 weight_sum;
+ u64 weight_sum_reciprocal;
+ u32 aql_txq_limit_low;
+ u32 aql_txq_limit_high;
+};
DECLARE_STATIC_KEY_FALSE(aql_disable);
struct ieee80211_local {
@@ -1143,13 +1192,8 @@ struct ieee80211_local {
struct codel_params cparams;
/* protects active_txqs and txqi->schedule_order */
- spinlock_t active_txq_lock[IEEE80211_NUM_ACS];
- struct list_head active_txqs[IEEE80211_NUM_ACS];
- u16 schedule_round[IEEE80211_NUM_ACS];
-
+ struct airtime_sched_info airtime[IEEE80211_NUM_ACS];
u16 airtime_flags;
- u32 aql_txq_limit_low[IEEE80211_NUM_ACS];
- u32 aql_txq_limit_high[IEEE80211_NUM_ACS];
u32 aql_threshold;
atomic_t aql_total_pending_airtime;
@@ -1414,10 +1458,6 @@ struct ieee80211_local {
/* extended capabilities provided by mac80211 */
u8 ext_capa[8];
-
- /* TDLS channel switch */
- struct work_struct tdls_chsw_work;
- struct sk_buff_head skb_queue_tdls_chsw;
};
static inline struct ieee80211_sub_if_data *
@@ -1494,6 +1534,7 @@ struct ieee802_11_elems {
const struct ieee80211_he_spr *he_spr;
const struct ieee80211_mu_edca_param_set *mu_edca_param_set;
const struct ieee80211_he_6ghz_capa *he_6ghz_capa;
+ const struct ieee80211_tx_pwr_env *tx_pwr_env[IEEE80211_TPE_MAX_IE_COUNT];
const u8 *uora_element;
const u8 *mesh_id;
const u8 *peering;
@@ -1544,6 +1585,8 @@ struct ieee802_11_elems {
u8 perr_len;
u8 country_elem_len;
u8 bssid_index_len;
+ u8 tx_pwr_env_len[IEEE80211_TPE_MAX_IE_COUNT];
+ u8 tx_pwr_env_num;
/* whether a parse error occurred while retrieving these elements */
bool parse_error;
@@ -1567,6 +1610,125 @@ static inline bool txq_has_queue(struct ieee80211_txq *txq)
return !(skb_queue_empty(&txqi->frags) && !txqi->tin.backlog_packets);
}
+static inline struct airtime_info *to_airtime_info(struct ieee80211_txq *txq)
+{
+ struct ieee80211_sub_if_data *sdata;
+ struct sta_info *sta;
+
+ if (txq->sta) {
+ sta = container_of(txq->sta, struct sta_info, sta);
+ return &sta->airtime[txq->ac];
+ }
+
+ sdata = vif_to_sdata(txq->vif);
+ return &sdata->airtime[txq->ac];
+}
+
+/* To avoid divisions in the fast path, we keep pre-computed reciprocals for
+ * airtime weight calculations. There are two different weights to keep track
+ * of: The per-station weight and the sum of weights per phy.
+ *
+ * For the per-station weights (kept in airtime_info below), we use 32-bit
+ * reciprocals with a devisor of 2^19. This lets us keep the multiplications and
+ * divisions for the station weights as 32-bit operations at the cost of a bit
+ * of rounding error for high weights; but the choice of divisor keeps rounding
+ * errors <10% for weights <2^15, assuming no more than 8ms of airtime is
+ * reported at a time.
+ *
+ * For the per-phy sum of weights the values can get higher, so we use 64-bit
+ * operations for those with a 32-bit divisor, which should avoid any
+ * significant rounding errors.
+ */
+#define IEEE80211_RECIPROCAL_DIVISOR_64 0x100000000ULL
+#define IEEE80211_RECIPROCAL_SHIFT_64 32
+#define IEEE80211_RECIPROCAL_DIVISOR_32 0x80000U
+#define IEEE80211_RECIPROCAL_SHIFT_32 19
+
+static inline void airtime_weight_set(struct airtime_info *air_info, u16 weight)
+{
+ if (air_info->weight == weight)
+ return;
+
+ air_info->weight = weight;
+ if (weight) {
+ air_info->weight_reciprocal =
+ IEEE80211_RECIPROCAL_DIVISOR_32 / weight;
+ } else {
+ air_info->weight_reciprocal = 0;
+ }
+}
+
+static inline void airtime_weight_sum_set(struct airtime_sched_info *air_sched,
+ int weight_sum)
+{
+ if (air_sched->weight_sum == weight_sum)
+ return;
+
+ air_sched->weight_sum = weight_sum;
+ if (air_sched->weight_sum) {
+ air_sched->weight_sum_reciprocal = IEEE80211_RECIPROCAL_DIVISOR_64;
+ do_div(air_sched->weight_sum_reciprocal, air_sched->weight_sum);
+ } else {
+ air_sched->weight_sum_reciprocal = 0;
+ }
+}
+
+/* A problem when trying to enforce airtime fairness is that we want to divide
+ * the airtime between the currently *active* stations. However, basing this on
+ * the instantaneous queue state of stations doesn't work, as queues tend to
+ * oscillate very quickly between empty and occupied, leading to the scheduler
+ * thinking only a single station is active when deciding whether to allow
+ * transmission (and thus not throttling correctly).
+ *
+ * To fix this we use a timer-based notion of activity: a station is considered
+ * active if it has been scheduled within the last 100 ms; we keep a separate
+ * list of all the stations considered active in this manner, and lazily update
+ * the total weight of active stations from this list (filtering the stations in
+ * the list by their 'last active' time).
+ *
+ * We add one additional safeguard to guard against stations that manage to get
+ * scheduled every 100 ms but don't transmit a lot of data, and thus don't use
+ * up any airtime. Such stations would be able to get priority for an extended
+ * period of time if they do start transmitting at full capacity again, and so
+ * we add an explicit maximum for how far behind a station is allowed to fall in
+ * the virtual airtime domain. This limit is set to a relatively high value of
+ * 20 ms because the main mechanism for catching up idle stations is the active
+ * state as described above; i.e., the hard limit should only be hit in
+ * pathological cases.
+ */
+#define AIRTIME_ACTIVE_DURATION (100 * NSEC_PER_MSEC)
+#define AIRTIME_MAX_BEHIND 20000 /* 20 ms */
+
+static inline bool airtime_is_active(struct airtime_info *air_info, u64 now)
+{
+ return air_info->last_scheduled >= now - AIRTIME_ACTIVE_DURATION;
+}
+
+static inline void airtime_set_active(struct airtime_sched_info *air_sched,
+ struct airtime_info *air_info, u64 now)
+{
+ air_info->last_scheduled = now;
+ air_sched->last_schedule_activity = now;
+ list_move_tail(&air_info->list, &air_sched->active_list);
+}
+
+static inline bool airtime_catchup_v_t(struct airtime_sched_info *air_sched,
+ u64 v_t, u64 now)
+{
+ air_sched->v_t = v_t;
+ return true;
+}
+
+static inline void init_airtime_info(struct airtime_info *air_info,
+ struct airtime_sched_info *air_sched)
+{
+ atomic_set(&air_info->aql_tx_pending, 0);
+ air_info->aql_limit_low = air_sched->aql_txq_limit_low;
+ air_info->aql_limit_high = air_sched->aql_txq_limit_high;
+ airtime_weight_set(air_info, IEEE80211_DEFAULT_AIRTIME_WEIGHT);
+ INIT_LIST_HEAD(&air_info->list);
+}
+
static inline int ieee80211_bssid_match(const u8 *raddr, const u8 *addr)
{
return ether_addr_equal(raddr, addr) ||
@@ -1738,6 +1900,9 @@ void ieee80211_csa_finalize_work(struct work_struct *work);
int ieee80211_channel_switch(struct wiphy *wiphy, struct net_device *dev,
struct cfg80211_csa_settings *params);
+/* color change handling */
+void ieee80211_color_change_finalize_work(struct work_struct *work);
+
/* interface handling */
#define MAC80211_SUPPORTED_FEATURES_TX (NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM | \
NETIF_F_HW_CSUM | NETIF_F_SG | \
@@ -1809,6 +1974,14 @@ int ieee80211_tx_control_port(struct wiphy *wiphy, struct net_device *dev,
u64 *cookie);
int ieee80211_probe_mesh_link(struct wiphy *wiphy, struct net_device *dev,
const u8 *buf, size_t len);
+void ieee80211_resort_txq(struct ieee80211_hw *hw,
+ struct ieee80211_txq *txq);
+void ieee80211_unschedule_txq(struct ieee80211_hw *hw,
+ struct ieee80211_txq *txq,
+ bool purge);
+void ieee80211_update_airtime_weight(struct ieee80211_local *local,
+ struct airtime_sched_info *air_sched,
+ u64 now, bool force);
/* HT */
void ieee80211_apply_htcap_overrides(struct ieee80211_sub_if_data *sdata,
@@ -1879,7 +2052,6 @@ void ieee80211_sta_set_rx_nss(struct sta_info *sta);
enum ieee80211_sta_rx_bandwidth
ieee80211_chan_width_to_rx_bw(enum nl80211_chan_width width);
enum nl80211_chan_width ieee80211_sta_cap_chan_bw(struct sta_info *sta);
-void ieee80211_sta_set_rx_nss(struct sta_info *sta);
void ieee80211_process_mu_groups(struct ieee80211_sub_if_data *sdata,
struct ieee80211_mgmt *mgmt);
u32 __ieee80211_vht_handle_opmode(struct ieee80211_sub_if_data *sdata,
@@ -1912,6 +2084,11 @@ ieee80211_he_op_ie_to_bss_conf(struct ieee80211_vif *vif,
/* S1G */
void ieee80211_s1g_sta_rate_init(struct sta_info *sta);
+bool ieee80211_s1g_is_twt_setup(struct sk_buff *skb);
+void ieee80211_s1g_rx_twt_action(struct ieee80211_sub_if_data *sdata,
+ struct sk_buff *skb);
+void ieee80211_s1g_status_twt_action(struct ieee80211_sub_if_data *sdata,
+ struct sk_buff *skb);
/* Spectrum management */
void ieee80211_process_measurement_req(struct ieee80211_sub_if_data *sdata,
@@ -2045,6 +2222,8 @@ void ieee80211_dynamic_ps_timer(struct timer_list *t);
void ieee80211_send_nullfunc(struct ieee80211_local *local,
struct ieee80211_sub_if_data *sdata,
bool powersave);
+void ieee80211_send_4addr_nullfunc(struct ieee80211_local *local,
+ struct ieee80211_sub_if_data *sdata);
void ieee80211_sta_tx_notify(struct ieee80211_sub_if_data *sdata,
struct ieee80211_hdr *hdr, bool ack, u16 tx_time);
@@ -2287,9 +2466,13 @@ void ieee80211_tdls_cancel_channel_switch(struct wiphy *wiphy,
struct net_device *dev,
const u8 *addr);
void ieee80211_teardown_tdls_peers(struct ieee80211_sub_if_data *sdata);
-void ieee80211_tdls_chsw_work(struct work_struct *wk);
void ieee80211_tdls_handle_disconnect(struct ieee80211_sub_if_data *sdata,
const u8 *peer, u16 reason);
+void
+ieee80211_process_tdls_channel_switch(struct ieee80211_sub_if_data *sdata,
+ struct sk_buff *skb);
+
+
const char *ieee80211_get_reason_code_string(u16 reason_code);
u16 ieee80211_encode_usf(int val);
u8 *ieee80211_get_bssid(struct ieee80211_hdr *hdr, size_t len,
diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c
index 137fa4c50e07..62c95597704b 100644
--- a/net/mac80211/iface.c
+++ b/net/mac80211/iface.c
@@ -462,6 +462,7 @@ static void ieee80211_do_stop(struct ieee80211_sub_if_data *sdata, bool going_do
sdata_unlock(sdata);
cancel_work_sync(&sdata->csa_finalize_work);
+ cancel_work_sync(&sdata->color_change_finalize_work);
cancel_delayed_work_sync(&sdata->dfs_cac_timer_work);
@@ -551,6 +552,7 @@ static void ieee80211_do_stop(struct ieee80211_sub_if_data *sdata, bool going_do
*/
ieee80211_free_keys(sdata, true);
skb_queue_purge(&sdata->skb_queue);
+ skb_queue_purge(&sdata->status_queue);
}
spin_lock_irqsave(&local->queue_stop_reason_lock, flags);
@@ -983,6 +985,7 @@ int ieee80211_add_virtual_monitor(struct ieee80211_local *local)
}
skb_queue_head_init(&sdata->skb_queue);
+ skb_queue_head_init(&sdata->status_queue);
INIT_WORK(&sdata->work, ieee80211_iface_work);
return 0;
@@ -1318,13 +1321,158 @@ static void ieee80211_if_setup_no_queue(struct net_device *dev)
dev->priv_flags |= IFF_NO_QUEUE;
}
+static void ieee80211_iface_process_skb(struct ieee80211_local *local,
+ struct ieee80211_sub_if_data *sdata,
+ struct sk_buff *skb)
+{
+ struct ieee80211_mgmt *mgmt = (void *)skb->data;
+
+ if (ieee80211_is_action(mgmt->frame_control) &&
+ mgmt->u.action.category == WLAN_CATEGORY_BACK) {
+ struct sta_info *sta;
+ int len = skb->len;
+
+ mutex_lock(&local->sta_mtx);
+ sta = sta_info_get_bss(sdata, mgmt->sa);
+ if (sta) {
+ switch (mgmt->u.action.u.addba_req.action_code) {
+ case WLAN_ACTION_ADDBA_REQ:
+ ieee80211_process_addba_request(local, sta,
+ mgmt, len);
+ break;
+ case WLAN_ACTION_ADDBA_RESP:
+ ieee80211_process_addba_resp(local, sta,
+ mgmt, len);
+ break;
+ case WLAN_ACTION_DELBA:
+ ieee80211_process_delba(sdata, sta,
+ mgmt, len);
+ break;
+ default:
+ WARN_ON(1);
+ break;
+ }
+ }
+ mutex_unlock(&local->sta_mtx);
+ } else if (ieee80211_is_action(mgmt->frame_control) &&
+ mgmt->u.action.category == WLAN_CATEGORY_VHT) {
+ switch (mgmt->u.action.u.vht_group_notif.action_code) {
+ case WLAN_VHT_ACTION_OPMODE_NOTIF: {
+ struct ieee80211_rx_status *status;
+ enum nl80211_band band;
+ struct sta_info *sta;
+ u8 opmode;
+
+ status = IEEE80211_SKB_RXCB(skb);
+ band = status->band;
+ opmode = mgmt->u.action.u.vht_opmode_notif.operating_mode;
+
+ mutex_lock(&local->sta_mtx);
+ sta = sta_info_get_bss(sdata, mgmt->sa);
+
+ if (sta)
+ ieee80211_vht_handle_opmode(sdata, sta, opmode,
+ band);
+
+ mutex_unlock(&local->sta_mtx);
+ break;
+ }
+ case WLAN_VHT_ACTION_GROUPID_MGMT:
+ ieee80211_process_mu_groups(sdata, mgmt);
+ break;
+ default:
+ WARN_ON(1);
+ break;
+ }
+ } else if (ieee80211_is_action(mgmt->frame_control) &&
+ mgmt->u.action.category == WLAN_CATEGORY_S1G) {
+ switch (mgmt->u.action.u.s1g.action_code) {
+ case WLAN_S1G_TWT_TEARDOWN:
+ case WLAN_S1G_TWT_SETUP:
+ ieee80211_s1g_rx_twt_action(sdata, skb);
+ break;
+ default:
+ break;
+ }
+ } else if (ieee80211_is_ext(mgmt->frame_control)) {
+ if (sdata->vif.type == NL80211_IFTYPE_STATION)
+ ieee80211_sta_rx_queued_ext(sdata, skb);
+ else
+ WARN_ON(1);
+ } else if (ieee80211_is_data_qos(mgmt->frame_control)) {
+ struct ieee80211_hdr *hdr = (void *)mgmt;
+ struct sta_info *sta;
+
+ /*
+ * So the frame isn't mgmt, but frame_control
+ * is at the right place anyway, of course, so
+ * the if statement is correct.
+ *
+ * Warn if we have other data frame types here,
+ * they must not get here.
+ */
+ WARN_ON(hdr->frame_control &
+ cpu_to_le16(IEEE80211_STYPE_NULLFUNC));
+ WARN_ON(!(hdr->seq_ctrl &
+ cpu_to_le16(IEEE80211_SCTL_FRAG)));
+ /*
+ * This was a fragment of a frame, received while
+ * a block-ack session was active. That cannot be
+ * right, so terminate the session.
+ */
+ mutex_lock(&local->sta_mtx);
+ sta = sta_info_get_bss(sdata, mgmt->sa);
+ if (sta) {
+ u16 tid = ieee80211_get_tid(hdr);
+
+ __ieee80211_stop_rx_ba_session(
+ sta, tid, WLAN_BACK_RECIPIENT,
+ WLAN_REASON_QSTA_REQUIRE_SETUP,
+ true);
+ }
+ mutex_unlock(&local->sta_mtx);
+ } else switch (sdata->vif.type) {
+ case NL80211_IFTYPE_STATION:
+ ieee80211_sta_rx_queued_mgmt(sdata, skb);
+ break;
+ case NL80211_IFTYPE_ADHOC:
+ ieee80211_ibss_rx_queued_mgmt(sdata, skb);
+ break;
+ case NL80211_IFTYPE_MESH_POINT:
+ if (!ieee80211_vif_is_mesh(&sdata->vif))
+ break;
+ ieee80211_mesh_rx_queued_mgmt(sdata, skb);
+ break;
+ default:
+ WARN(1, "frame for unexpected interface type");
+ break;
+ }
+}
+
+static void ieee80211_iface_process_status(struct ieee80211_sub_if_data *sdata,
+ struct sk_buff *skb)
+{
+ struct ieee80211_mgmt *mgmt = (void *)skb->data;
+
+ if (ieee80211_is_action(mgmt->frame_control) &&
+ mgmt->u.action.category == WLAN_CATEGORY_S1G) {
+ switch (mgmt->u.action.u.s1g.action_code) {
+ case WLAN_S1G_TWT_TEARDOWN:
+ case WLAN_S1G_TWT_SETUP:
+ ieee80211_s1g_status_twt_action(sdata, skb);
+ break;
+ default:
+ break;
+ }
+ }
+}
+
static void ieee80211_iface_work(struct work_struct *work)
{
struct ieee80211_sub_if_data *sdata =
container_of(work, struct ieee80211_sub_if_data, work);
struct ieee80211_local *local = sdata->local;
struct sk_buff *skb;
- struct sta_info *sta;
if (!ieee80211_sdata_running(sdata))
return;
@@ -1337,118 +1485,24 @@ static void ieee80211_iface_work(struct work_struct *work)
/* first process frames */
while ((skb = skb_dequeue(&sdata->skb_queue))) {
- struct ieee80211_mgmt *mgmt = (void *)skb->data;
-
kcov_remote_start_common(skb_get_kcov_handle(skb));
- if (ieee80211_is_action(mgmt->frame_control) &&
- mgmt->u.action.category == WLAN_CATEGORY_BACK) {
- int len = skb->len;
- mutex_lock(&local->sta_mtx);
- sta = sta_info_get_bss(sdata, mgmt->sa);
- if (sta) {
- switch (mgmt->u.action.u.addba_req.action_code) {
- case WLAN_ACTION_ADDBA_REQ:
- ieee80211_process_addba_request(
- local, sta, mgmt, len);
- break;
- case WLAN_ACTION_ADDBA_RESP:
- ieee80211_process_addba_resp(local, sta,
- mgmt, len);
- break;
- case WLAN_ACTION_DELBA:
- ieee80211_process_delba(sdata, sta,
- mgmt, len);
- break;
- default:
- WARN_ON(1);
- break;
- }
- }
- mutex_unlock(&local->sta_mtx);
- } else if (ieee80211_is_action(mgmt->frame_control) &&
- mgmt->u.action.category == WLAN_CATEGORY_VHT) {
- switch (mgmt->u.action.u.vht_group_notif.action_code) {
- case WLAN_VHT_ACTION_OPMODE_NOTIF: {
- struct ieee80211_rx_status *status;
- enum nl80211_band band;
- u8 opmode;
-
- status = IEEE80211_SKB_RXCB(skb);
- band = status->band;
- opmode = mgmt->u.action.u.vht_opmode_notif.operating_mode;
-
- mutex_lock(&local->sta_mtx);
- sta = sta_info_get_bss(sdata, mgmt->sa);
-
- if (sta)
- ieee80211_vht_handle_opmode(sdata, sta,
- opmode,
- band);
-
- mutex_unlock(&local->sta_mtx);
- break;
- }
- case WLAN_VHT_ACTION_GROUPID_MGMT:
- ieee80211_process_mu_groups(sdata, mgmt);
- break;
- default:
- WARN_ON(1);
- break;
- }
- } else if (ieee80211_is_ext(mgmt->frame_control)) {
- if (sdata->vif.type == NL80211_IFTYPE_STATION)
- ieee80211_sta_rx_queued_ext(sdata, skb);
- else
- WARN_ON(1);
- } else if (ieee80211_is_data_qos(mgmt->frame_control)) {
- struct ieee80211_hdr *hdr = (void *)mgmt;
- /*
- * So the frame isn't mgmt, but frame_control
- * is at the right place anyway, of course, so
- * the if statement is correct.
- *
- * Warn if we have other data frame types here,
- * they must not get here.
- */
- WARN_ON(hdr->frame_control &
- cpu_to_le16(IEEE80211_STYPE_NULLFUNC));
- WARN_ON(!(hdr->seq_ctrl &
- cpu_to_le16(IEEE80211_SCTL_FRAG)));
- /*
- * This was a fragment of a frame, received while
- * a block-ack session was active. That cannot be
- * right, so terminate the session.
- */
- mutex_lock(&local->sta_mtx);
- sta = sta_info_get_bss(sdata, mgmt->sa);
- if (sta) {
- u16 tid = ieee80211_get_tid(hdr);
+ if (skb->protocol == cpu_to_be16(ETH_P_TDLS))
+ ieee80211_process_tdls_channel_switch(sdata, skb);
+ else
+ ieee80211_iface_process_skb(local, sdata, skb);
- __ieee80211_stop_rx_ba_session(
- sta, tid, WLAN_BACK_RECIPIENT,
- WLAN_REASON_QSTA_REQUIRE_SETUP,
- true);
- }
- mutex_unlock(&local->sta_mtx);
- } else switch (sdata->vif.type) {
- case NL80211_IFTYPE_STATION:
- ieee80211_sta_rx_queued_mgmt(sdata, skb);
- break;
- case NL80211_IFTYPE_ADHOC:
- ieee80211_ibss_rx_queued_mgmt(sdata, skb);
- break;
- case NL80211_IFTYPE_MESH_POINT:
- if (!ieee80211_vif_is_mesh(&sdata->vif))
- break;
- ieee80211_mesh_rx_queued_mgmt(sdata, skb);
- break;
- default:
- WARN(1, "frame for unexpected interface type");
- break;
- }
+ kfree_skb(skb);
+ kcov_remote_stop();
+ }
+ /* process status queue */
+ while ((skb = skb_dequeue(&sdata->status_queue))) {
+ kcov_remote_start_common(skb_get_kcov_handle(skb));
+
+ ieee80211_iface_process_status(sdata, skb);
kfree_skb(skb);
+
kcov_remote_stop();
}
@@ -1515,9 +1569,11 @@ static void ieee80211_setup_sdata(struct ieee80211_sub_if_data *sdata,
}
skb_queue_head_init(&sdata->skb_queue);
+ skb_queue_head_init(&sdata->status_queue);
INIT_WORK(&sdata->work, ieee80211_iface_work);
INIT_WORK(&sdata->recalc_smps, ieee80211_recalc_smps_work);
INIT_WORK(&sdata->csa_finalize_work, ieee80211_csa_finalize_work);
+ INIT_WORK(&sdata->color_change_finalize_work, ieee80211_color_change_finalize_work);
INIT_LIST_HEAD(&sdata->assigned_chanctx_list);
INIT_LIST_HEAD(&sdata->reserved_chanctx_list);
@@ -1964,6 +2020,9 @@ int ieee80211_if_add(struct ieee80211_local *local, const char *name,
}
}
+ for (i = 0; i < IEEE80211_NUM_ACS; i++)
+ init_airtime_info(&sdata->airtime[i], &local->airtime[i]);
+
ieee80211_set_default_queues(sdata);
sdata->ap_power_level = IEEE80211_UNSET_POWER_LEVEL;
@@ -1985,9 +2044,16 @@ int ieee80211_if_add(struct ieee80211_local *local, const char *name,
netdev_set_default_ethtool_ops(ndev, &ieee80211_ethtool_ops);
- /* MTU range: 256 - 2304 */
+ /* MTU range is normally 256 - 2304, where the upper limit is
+ * the maximum MSDU size. Monitor interfaces send and receive
+ * MPDU and A-MSDU frames which may be much larger so we do
+ * not impose an upper limit in that case.
+ */
ndev->min_mtu = 256;
- ndev->max_mtu = local->hw.max_mtu;
+ if (type == NL80211_IFTYPE_MONITOR)
+ ndev->max_mtu = 0;
+ else
+ ndev->max_mtu = local->hw.max_mtu;
ret = cfg80211_register_netdevice(ndev);
if (ret) {
diff --git a/net/mac80211/led.c b/net/mac80211/led.c
index b275c8853074..6de8d0ad5497 100644
--- a/net/mac80211/led.c
+++ b/net/mac80211/led.c
@@ -259,7 +259,6 @@ static void tpt_trig_timer(struct timer_list *t)
{
struct tpt_led_trigger *tpt_trig = from_timer(tpt_trig, t, timer);
struct ieee80211_local *local = tpt_trig->local;
- struct led_classdev *led_cdev;
unsigned long on, off, tpt;
int i;
@@ -283,10 +282,7 @@ static void tpt_trig_timer(struct timer_list *t)
}
}
- read_lock(&local->tpt_led.leddev_list_lock);
- list_for_each_entry(led_cdev, &local->tpt_led.led_cdevs, trig_list)
- led_blink_set(led_cdev, &on, &off);
- read_unlock(&local->tpt_led.leddev_list_lock);
+ led_trigger_blink(&local->tpt_led, &on, &off);
}
const char *
@@ -341,7 +337,6 @@ static void ieee80211_start_tpt_led_trig(struct ieee80211_local *local)
static void ieee80211_stop_tpt_led_trig(struct ieee80211_local *local)
{
struct tpt_led_trigger *tpt_trig = local->tpt_led_trigger;
- struct led_classdev *led_cdev;
if (!tpt_trig->running)
return;
@@ -349,10 +344,7 @@ static void ieee80211_stop_tpt_led_trig(struct ieee80211_local *local)
tpt_trig->running = false;
del_timer_sync(&tpt_trig->timer);
- read_lock(&local->tpt_led.leddev_list_lock);
- list_for_each_entry(led_cdev, &local->tpt_led.led_cdevs, trig_list)
- led_set_brightness(led_cdev, LED_OFF);
- read_unlock(&local->tpt_led.leddev_list_lock);
+ led_trigger_event(&local->tpt_led, LED_OFF);
}
void ieee80211_mod_tpt_led_trig(struct ieee80211_local *local,
diff --git a/net/mac80211/main.c b/net/mac80211/main.c
index f33a3acd7f96..45fb517591ee 100644
--- a/net/mac80211/main.c
+++ b/net/mac80211/main.c
@@ -257,14 +257,15 @@ static void ieee80211_restart_work(struct work_struct *work)
/* wait for scan work complete */
flush_workqueue(local->workqueue);
flush_work(&local->sched_scan_stopped_work);
+ flush_work(&local->radar_detected_work);
+
+ rtnl_lock();
+ /* we might do interface manipulations, so need both */
+ wiphy_lock(local->hw.wiphy);
WARN(test_bit(SCAN_HW_SCANNING, &local->scanning),
"%s called with hardware scan in progress\n", __func__);
- flush_work(&local->radar_detected_work);
- /* we might do interface manipulations, so need both */
- rtnl_lock();
- wiphy_lock(local->hw.wiphy);
list_for_each_entry(sdata, &local->interfaces, list) {
/*
* XXX: there may be more work for other vif types and even
@@ -706,10 +707,13 @@ struct ieee80211_hw *ieee80211_alloc_hw_nm(size_t priv_data_len,
spin_lock_init(&local->queue_stop_reason_lock);
for (i = 0; i < IEEE80211_NUM_ACS; i++) {
- INIT_LIST_HEAD(&local->active_txqs[i]);
- spin_lock_init(&local->active_txq_lock[i]);
- local->aql_txq_limit_low[i] = IEEE80211_DEFAULT_AQL_TXQ_LIMIT_L;
- local->aql_txq_limit_high[i] =
+ struct airtime_sched_info *air_sched = &local->airtime[i];
+
+ air_sched->active_txqs = RB_ROOT_CACHED;
+ INIT_LIST_HEAD(&air_sched->active_list);
+ spin_lock_init(&air_sched->lock);
+ air_sched->aql_txq_limit_low = IEEE80211_DEFAULT_AQL_TXQ_LIMIT_L;
+ air_sched->aql_txq_limit_high =
IEEE80211_DEFAULT_AQL_TXQ_LIMIT_H;
}
@@ -739,8 +743,6 @@ struct ieee80211_hw *ieee80211_alloc_hw_nm(size_t priv_data_len,
INIT_WORK(&local->sched_scan_stopped_work,
ieee80211_sched_scan_stopped_work);
- INIT_WORK(&local->tdls_chsw_work, ieee80211_tdls_chsw_work);
-
spin_lock_init(&local->ack_status_lock);
idr_init(&local->ack_status_frames);
@@ -757,7 +759,6 @@ struct ieee80211_hw *ieee80211_alloc_hw_nm(size_t priv_data_len,
skb_queue_head_init(&local->skb_queue);
skb_queue_head_init(&local->skb_queue_unreliable);
- skb_queue_head_init(&local->skb_queue_tdls_chsw);
ieee80211_alloc_led_names(local);
@@ -1014,8 +1015,13 @@ int ieee80211_register_hw(struct ieee80211_hw *hw)
supp_ht = supp_ht || sband->ht_cap.ht_supported;
supp_vht = supp_vht || sband->vht_cap.vht_supported;
- if (!supp_he)
- supp_he = !!ieee80211_get_he_sta_cap(sband);
+ for (i = 0; i < sband->n_iftype_data; i++) {
+ const struct ieee80211_sband_iftype_data *iftd;
+
+ iftd = &sband->iftype_data[i];
+
+ supp_he = supp_he || iftd->he_cap.has_he;
+ }
/* HT, VHT, HE require QoS, thus >= 4 queues */
if (WARN_ON(local->hw.queues < IEEE80211_NUM_ACS &&
@@ -1389,7 +1395,6 @@ void ieee80211_unregister_hw(struct ieee80211_hw *hw)
cancel_delayed_work_sync(&local->roc_work);
cancel_work_sync(&local->restart_work);
cancel_work_sync(&local->reconfig_filter);
- cancel_work_sync(&local->tdls_chsw_work);
flush_work(&local->sched_scan_stopped_work);
flush_work(&local->radar_detected_work);
@@ -1401,7 +1406,6 @@ void ieee80211_unregister_hw(struct ieee80211_hw *hw)
wiphy_warn(local->hw.wiphy, "skb_queue not empty\n");
skb_queue_purge(&local->skb_queue);
skb_queue_purge(&local->skb_queue_unreliable);
- skb_queue_purge(&local->skb_queue_tdls_chsw);
wiphy_unregister(local->hw.wiphy);
destroy_workqueue(local->workqueue);
diff --git a/net/mac80211/mesh.h b/net/mac80211/mesh.h
index 40492d1bd8fd..77080b4f87b8 100644
--- a/net/mac80211/mesh.h
+++ b/net/mac80211/mesh.h
@@ -134,7 +134,7 @@ struct mesh_path {
* gate's mpath may or may not be resolved and active.
* @gates_lock: protects updates to known_gates
* @rhead: the rhashtable containing struct mesh_paths, keyed by dest addr
- * @walk_head: linked list containging all mesh_path objects
+ * @walk_head: linked list containing all mesh_path objects
* @walk_lock: lock protecting walk_head
* @entries: number of entries in the table
*/
diff --git a/net/mac80211/mesh_hwmp.c b/net/mac80211/mesh_hwmp.c
index 3db514c4c63a..a05b615deb51 100644
--- a/net/mac80211/mesh_hwmp.c
+++ b/net/mac80211/mesh_hwmp.c
@@ -1124,7 +1124,7 @@ enddiscovery:
* forwarding information is found.
*
* Returns: 0 if the next hop was found and -ENOENT if the frame was queued.
- * skb is freeed here if no mpath could be allocated.
+ * skb is freed here if no mpath could be allocated.
*/
int mesh_nexthop_resolve(struct ieee80211_sub_if_data *sdata,
struct sk_buff *skb)
diff --git a/net/mac80211/mesh_pathtbl.c b/net/mac80211/mesh_pathtbl.c
index 620ecf922408..efbefcbac3ac 100644
--- a/net/mac80211/mesh_pathtbl.c
+++ b/net/mac80211/mesh_pathtbl.c
@@ -122,7 +122,7 @@ static void prepare_for_gate(struct sk_buff *skb, char *dst_addr,
hdr = (struct ieee80211_hdr *) skb->data;
/* we preserve the previous mesh header and only add
- * the new addreses */
+ * the new addresses */
mshdr = (struct ieee80211s_hdr *) (skb->data + hdrlen);
mshdr->flags = MESH_FLAGS_AE_A5_A6;
memcpy(mshdr->eaddr1, hdr->addr3, ETH_ALEN);
diff --git a/net/mac80211/mesh_plink.c b/net/mac80211/mesh_plink.c
index aca26df7587d..a6915847d78a 100644
--- a/net/mac80211/mesh_plink.c
+++ b/net/mac80211/mesh_plink.c
@@ -150,7 +150,7 @@ out:
* mesh STA in a MBSS. Three HT protection modes are supported for now, non-HT
* mixed mode, 20MHz-protection and no-protection mode. non-HT mixed mode is
* selected if any non-HT peers are present in our MBSS. 20MHz-protection mode
- * is selected if all peers in our 20/40MHz MBSS support HT and atleast one
+ * is selected if all peers in our 20/40MHz MBSS support HT and at least one
* HT20 peer is present. Otherwise no-protection mode is selected.
*/
static u32 mesh_set_ht_prot_mode(struct ieee80211_sub_if_data *sdata)
diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c
index 3f2aad2e7436..c0ea3b1aa9e1 100644
--- a/net/mac80211/mlme.c
+++ b/net/mac80211/mlme.c
@@ -8,7 +8,7 @@
* Copyright 2007, Michael Wu <flamingice@sourmilk.net>
* Copyright 2013-2014 Intel Mobile Communications GmbH
* Copyright (C) 2015 - 2017 Intel Deutschland GmbH
- * Copyright (C) 2018 - 2020 Intel Corporation
+ * Copyright (C) 2018 - 2021 Intel Corporation
*/
#include <linux/delay.h>
@@ -371,7 +371,6 @@ static int ieee80211_config_bw(struct ieee80211_sub_if_data *sdata,
struct cfg80211_chan_def chandef;
u16 ht_opmode;
u32 flags;
- enum ieee80211_sta_rx_bandwidth new_sta_bw;
u32 vht_cap_info = 0;
int ret;
@@ -385,7 +384,9 @@ static int ieee80211_config_bw(struct ieee80211_sub_if_data *sdata,
/* don't check HE if we associated as non-HE station */
if (ifmgd->flags & IEEE80211_STA_DISABLE_HE ||
- !ieee80211_get_he_sta_cap(sband))
+ !ieee80211_get_he_iftype_cap(sband,
+ ieee80211_vif_type_p2p(&sdata->vif)))
+
he_oper = NULL;
if (WARN_ON_ONCE(!sta))
@@ -445,40 +446,13 @@ static int ieee80211_config_bw(struct ieee80211_sub_if_data *sdata,
IEEE80211_STA_DISABLE_160MHZ)) ||
!cfg80211_chandef_valid(&chandef)) {
sdata_info(sdata,
- "AP %pM changed bandwidth in a way we can't support - disconnect\n",
- ifmgd->bssid);
- return -EINVAL;
- }
-
- switch (chandef.width) {
- case NL80211_CHAN_WIDTH_20_NOHT:
- case NL80211_CHAN_WIDTH_20:
- new_sta_bw = IEEE80211_STA_RX_BW_20;
- break;
- case NL80211_CHAN_WIDTH_40:
- new_sta_bw = IEEE80211_STA_RX_BW_40;
- break;
- case NL80211_CHAN_WIDTH_80:
- new_sta_bw = IEEE80211_STA_RX_BW_80;
- break;
- case NL80211_CHAN_WIDTH_80P80:
- case NL80211_CHAN_WIDTH_160:
- new_sta_bw = IEEE80211_STA_RX_BW_160;
- break;
- default:
+ "AP %pM changed caps/bw in a way we can't support (0x%x/0x%x) - disconnect\n",
+ ifmgd->bssid, flags, ifmgd->flags);
return -EINVAL;
}
- if (new_sta_bw > sta->cur_max_bandwidth)
- new_sta_bw = sta->cur_max_bandwidth;
-
- if (new_sta_bw < sta->sta.bandwidth) {
- sta->sta.bandwidth = new_sta_bw;
- rate_control_rate_update(local, sband, sta,
- IEEE80211_RC_BW_CHANGED);
- }
-
ret = ieee80211_vif_change_bandwidth(sdata, &chandef, changed);
+
if (ret) {
sdata_info(sdata,
"AP %pM changed bandwidth to incompatible one - disconnect\n",
@@ -486,12 +460,6 @@ static int ieee80211_config_bw(struct ieee80211_sub_if_data *sdata,
return ret;
}
- if (new_sta_bw > sta->sta.bandwidth) {
- sta->sta.bandwidth = new_sta_bw;
- rate_control_rate_update(local, sband, sta,
- IEEE80211_RC_BW_CHANGED);
- }
-
return 0;
}
@@ -617,7 +585,7 @@ static void ieee80211_add_vht_ie(struct ieee80211_sub_if_data *sdata,
cap &= ~IEEE80211_VHT_CAP_MU_BEAMFORMEE_CAPABLE;
/*
- * If some other vif is using the MU-MIMO capablity we cannot associate
+ * If some other vif is using the MU-MIMO capability we cannot associate
* using MU-MIMO - this will lead to contradictions in the group-id
* mechanism.
* Ownership is defined since association request, in order to avoid
@@ -676,7 +644,8 @@ static void ieee80211_add_he_ie(struct ieee80211_sub_if_data *sdata,
rcu_read_unlock();
- he_cap = ieee80211_get_he_sta_cap(sband);
+ he_cap = ieee80211_get_he_iftype_cap(sband,
+ ieee80211_vif_type_p2p(&sdata->vif));
if (!he_cap || !reg_cap)
return;
@@ -712,6 +681,9 @@ static void ieee80211_send_assoc(struct ieee80211_sub_if_data *sdata)
u32 rates = 0;
__le16 listen_int;
struct element *ext_capa = NULL;
+ enum nl80211_iftype iftype = ieee80211_vif_type_p2p(&sdata->vif);
+ const struct ieee80211_sband_iftype_data *iftd;
+ struct ieee80211_prep_tx_info info = {};
/* we know it's writable, cast away the const */
if (assoc_data->ie_len)
@@ -756,6 +728,8 @@ static void ieee80211_send_assoc(struct ieee80211_sub_if_data *sdata)
}
}
+ iftd = ieee80211_get_sband_iftype_data(sband, iftype);
+
skb = alloc_skb(local->hw.extra_tx_headroom +
sizeof(*mgmt) + /* bit too much but doesn't matter */
2 + assoc_data->ssid_len + /* SSID */
@@ -770,7 +744,8 @@ static void ieee80211_send_assoc(struct ieee80211_sub_if_data *sdata)
2 + 1 + sizeof(struct ieee80211_he_6ghz_capa) +
assoc_data->ie_len + /* extra IEs */
(assoc_data->fils_kek_len ? 16 /* AES-SIV */ : 0) +
- 9, /* WMM */
+ 9 + /* WMM */
+ (iftd ? iftd->vendor_elems.len : 0),
GFP_KERNEL);
if (!skb)
return;
@@ -810,12 +785,14 @@ static void ieee80211_send_assoc(struct ieee80211_sub_if_data *sdata)
mgmt->u.reassoc_req.listen_interval = listen_int;
memcpy(mgmt->u.reassoc_req.current_ap, assoc_data->prev_bssid,
ETH_ALEN);
+ info.subtype = IEEE80211_STYPE_REASSOC_REQ;
} else {
skb_put(skb, 4);
mgmt->frame_control = cpu_to_le16(IEEE80211_FTYPE_MGMT |
IEEE80211_STYPE_ASSOC_REQ);
mgmt->u.assoc_req.capab_info = cpu_to_le16(capab);
mgmt->u.assoc_req.listen_interval = listen_int;
+ info.subtype = IEEE80211_STYPE_ASSOC_REQ;
}
/* SSID */
@@ -1043,6 +1020,9 @@ skip_rates:
ieee80211_add_s1g_capab_ie(sdata, &sband->s1g_cap, skb);
}
+ if (iftd && iftd->vendor_elems.data && iftd->vendor_elems.len)
+ skb_put_data(skb, iftd->vendor_elems.data, iftd->vendor_elems.len);
+
/* add any remaining custom (i.e. vendor specific here) IEs */
if (assoc_data->ie_len) {
noffset = assoc_data->ie_len;
@@ -1060,7 +1040,7 @@ skip_rates:
ifmgd->assoc_req_ies = kmemdup(ie_start, pos - ie_start, GFP_ATOMIC);
ifmgd->assoc_req_ies_len = pos - ie_start;
- drv_mgd_prepare_tx(local, sdata, 0);
+ drv_mgd_prepare_tx(local, sdata, &info);
IEEE80211_SKB_CB(skb)->flags |= IEEE80211_TX_INTFL_DONT_ENCRYPT;
if (ieee80211_hw_check(&local->hw, REPORTS_TX_ACK_STATUS))
@@ -1094,11 +1074,6 @@ void ieee80211_send_nullfunc(struct ieee80211_local *local,
struct ieee80211_hdr_3addr *nullfunc;
struct ieee80211_if_managed *ifmgd = &sdata->u.mgd;
- /* Don't send NDPs when STA is connected HE */
- if (sdata->vif.type == NL80211_IFTYPE_STATION &&
- !(ifmgd->flags & IEEE80211_STA_DISABLE_HE))
- return;
-
skb = ieee80211_nullfunc_get(&local->hw, &sdata->vif,
!ieee80211_hw_check(&local->hw, DOESNT_SUPPORT_QOS_NDP));
if (!skb)
@@ -1120,8 +1095,8 @@ void ieee80211_send_nullfunc(struct ieee80211_local *local,
ieee80211_tx_skb(sdata, skb);
}
-static void ieee80211_send_4addr_nullfunc(struct ieee80211_local *local,
- struct ieee80211_sub_if_data *sdata)
+void ieee80211_send_4addr_nullfunc(struct ieee80211_local *local,
+ struct ieee80211_sub_if_data *sdata)
{
struct sk_buff *skb;
struct ieee80211_hdr *nullfunc;
@@ -1130,10 +1105,6 @@ static void ieee80211_send_4addr_nullfunc(struct ieee80211_local *local,
if (WARN_ON(sdata->vif.type != NL80211_IFTYPE_STATION))
return;
- /* Don't send NDPs when connected HE */
- if (!(sdata->u.mgd.flags & IEEE80211_STA_DISABLE_HE))
- return;
-
skb = dev_alloc_skb(local->hw.extra_tx_headroom + 30);
if (!skb)
return;
@@ -1183,10 +1154,6 @@ static void ieee80211_chswitch_work(struct work_struct *work)
*/
if (sdata->reserved_chanctx) {
- struct ieee80211_supported_band *sband = NULL;
- struct sta_info *mgd_sta = NULL;
- enum ieee80211_sta_rx_bandwidth bw = IEEE80211_STA_RX_BW_20;
-
/*
* with multi-vif csa driver may call ieee80211_csa_finish()
* many times while waiting for other interfaces to use their
@@ -1195,48 +1162,6 @@ static void ieee80211_chswitch_work(struct work_struct *work)
if (sdata->reserved_ready)
goto out;
- if (sdata->vif.bss_conf.chandef.width !=
- sdata->csa_chandef.width) {
- /*
- * For managed interface, we need to also update the AP
- * station bandwidth and align the rate scale algorithm
- * on the bandwidth change. Here we only consider the
- * bandwidth of the new channel definition (as channel
- * switch flow does not have the full HT/VHT/HE
- * information), assuming that if additional changes are
- * required they would be done as part of the processing
- * of the next beacon from the AP.
- */
- switch (sdata->csa_chandef.width) {
- case NL80211_CHAN_WIDTH_20_NOHT:
- case NL80211_CHAN_WIDTH_20:
- default:
- bw = IEEE80211_STA_RX_BW_20;
- break;
- case NL80211_CHAN_WIDTH_40:
- bw = IEEE80211_STA_RX_BW_40;
- break;
- case NL80211_CHAN_WIDTH_80:
- bw = IEEE80211_STA_RX_BW_80;
- break;
- case NL80211_CHAN_WIDTH_80P80:
- case NL80211_CHAN_WIDTH_160:
- bw = IEEE80211_STA_RX_BW_160;
- break;
- }
-
- mgd_sta = sta_info_get(sdata, ifmgd->bssid);
- sband =
- local->hw.wiphy->bands[sdata->csa_chandef.chan->band];
- }
-
- if (sdata->vif.bss_conf.chandef.width >
- sdata->csa_chandef.width) {
- mgd_sta->sta.bandwidth = bw;
- rate_control_rate_update(local, sband, mgd_sta,
- IEEE80211_RC_BW_CHANGED);
- }
-
ret = ieee80211_vif_use_reserved_context(sdata);
if (ret) {
sdata_info(sdata,
@@ -1247,13 +1172,6 @@ static void ieee80211_chswitch_work(struct work_struct *work)
goto out;
}
- if (sdata->vif.bss_conf.chandef.width <
- sdata->csa_chandef.width) {
- mgd_sta->sta.bandwidth = bw;
- rate_control_rate_update(local, sband, mgd_sta,
- IEEE80211_RC_BW_CHANGED);
- }
-
goto out;
}
@@ -2341,6 +2259,9 @@ static void ieee80211_set_disassoc(struct ieee80211_sub_if_data *sdata,
struct ieee80211_if_managed *ifmgd = &sdata->u.mgd;
struct ieee80211_local *local = sdata->local;
u32 changed = 0;
+ struct ieee80211_prep_tx_info info = {
+ .subtype = stype,
+ };
sdata_assert_lock(sdata);
@@ -2390,8 +2311,9 @@ static void ieee80211_set_disassoc(struct ieee80211_sub_if_data *sdata,
* driver requested so.
*/
if (ieee80211_hw_check(&local->hw, DEAUTH_NEED_MGD_TX_PREP) &&
- !ifmgd->have_beacon)
- drv_mgd_prepare_tx(sdata->local, sdata, 0);
+ !ifmgd->have_beacon) {
+ drv_mgd_prepare_tx(sdata->local, sdata, &info);
+ }
ieee80211_send_deauth_disassoc(sdata, ifmgd->bssid,
ifmgd->bssid, stype, reason,
@@ -2402,6 +2324,8 @@ static void ieee80211_set_disassoc(struct ieee80211_sub_if_data *sdata,
if (tx)
ieee80211_flush_queues(local, sdata, false);
+ drv_mgd_complete_tx(sdata->local, sdata, &info);
+
/* clear bssid only after building the needed mgmt frames */
eth_zero_addr(ifmgd->bssid);
@@ -2617,10 +2541,7 @@ static void ieee80211_mgd_probe_ap_send(struct ieee80211_sub_if_data *sdata)
if (ieee80211_hw_check(&sdata->local->hw, REPORTS_TX_ACK_STATUS)) {
ifmgd->nullfunc_failed = false;
- if (!(ifmgd->flags & IEEE80211_STA_DISABLE_HE))
- ifmgd->probe_send_count--;
- else
- ieee80211_send_nullfunc(sdata->local, sdata, false);
+ ieee80211_send_nullfunc(sdata->local, sdata, false);
} else {
int ssid_len;
@@ -2952,6 +2873,9 @@ static void ieee80211_auth_challenge(struct ieee80211_sub_if_data *sdata,
u8 *pos;
struct ieee802_11_elems elems;
u32 tx_flags = 0;
+ struct ieee80211_prep_tx_info info = {
+ .subtype = IEEE80211_STYPE_AUTH,
+ };
pos = mgmt->u.auth.variable;
ieee802_11_parse_elems(pos, len - (pos - (u8 *)mgmt), false, &elems,
@@ -2959,7 +2883,7 @@ static void ieee80211_auth_challenge(struct ieee80211_sub_if_data *sdata,
if (!elems.challenge)
return;
auth_data->expected_transaction = 4;
- drv_mgd_prepare_tx(sdata->local, sdata, 0);
+ drv_mgd_prepare_tx(sdata->local, sdata, &info);
if (ieee80211_hw_check(&local->hw, REPORTS_TX_ACK_STATUS))
tx_flags = IEEE80211_TX_CTL_REQ_TX_STATUS |
IEEE80211_TX_INTFL_MLME_CONN_TX;
@@ -3012,6 +2936,9 @@ static void ieee80211_rx_mgmt_auth(struct ieee80211_sub_if_data *sdata,
.type = MLME_EVENT,
.u.mlme.data = AUTH_EVENT,
};
+ struct ieee80211_prep_tx_info info = {
+ .subtype = IEEE80211_STYPE_AUTH,
+ };
sdata_assert_lock(sdata);
@@ -3040,7 +2967,7 @@ static void ieee80211_rx_mgmt_auth(struct ieee80211_sub_if_data *sdata,
mgmt->sa, auth_alg, ifmgd->auth_data->algorithm,
auth_transaction,
ifmgd->auth_data->expected_transaction);
- return;
+ goto notify_driver;
}
if (status_code != WLAN_STATUS_SUCCESS) {
@@ -3051,7 +2978,7 @@ static void ieee80211_rx_mgmt_auth(struct ieee80211_sub_if_data *sdata,
(auth_transaction == 1 &&
(status_code == WLAN_STATUS_SAE_HASH_TO_ELEMENT ||
status_code == WLAN_STATUS_SAE_PK))))
- return;
+ goto notify_driver;
sdata_info(sdata, "%pM denied authentication (status %d)\n",
mgmt->sa, status_code);
@@ -3059,7 +2986,7 @@ static void ieee80211_rx_mgmt_auth(struct ieee80211_sub_if_data *sdata,
event.u.mlme.status = MLME_DENIED;
event.u.mlme.reason = status_code;
drv_event_callback(sdata->local, sdata, &event);
- return;
+ goto notify_driver;
}
switch (ifmgd->auth_data->algorithm) {
@@ -3081,10 +3008,11 @@ static void ieee80211_rx_mgmt_auth(struct ieee80211_sub_if_data *sdata,
default:
WARN_ONCE(1, "invalid auth alg %d",
ifmgd->auth_data->algorithm);
- return;
+ goto notify_driver;
}
event.u.mlme.status = MLME_SUCCESS;
+ info.success = 1;
drv_event_callback(sdata->local, sdata, &event);
if (ifmgd->auth_data->algorithm != WLAN_AUTH_SAE ||
(auth_transaction == 2 &&
@@ -3098,6 +3026,8 @@ static void ieee80211_rx_mgmt_auth(struct ieee80211_sub_if_data *sdata,
}
cfg80211_rx_mlme_mgmt(sdata->dev, (u8 *)mgmt, len);
+notify_driver:
+ drv_mgd_complete_tx(sdata->local, sdata, &info);
}
#define case_WLAN(type) \
@@ -3314,6 +3244,23 @@ static int ieee80211_recalc_twt_req(struct ieee80211_sub_if_data *sdata,
return 0;
}
+static bool ieee80211_twt_bcast_support(struct ieee80211_sub_if_data *sdata,
+ struct ieee80211_bss_conf *bss_conf,
+ struct ieee80211_supported_band *sband,
+ struct sta_info *sta)
+{
+ const struct ieee80211_sta_he_cap *own_he_cap =
+ ieee80211_get_he_iftype_cap(sband,
+ ieee80211_vif_type_p2p(&sdata->vif));
+
+ return bss_conf->he_support &&
+ (sta->sta.he_cap.he_cap_elem.mac_cap_info[2] &
+ IEEE80211_HE_MAC_CAP2_BCAST_TWT) &&
+ own_he_cap &&
+ (own_he_cap->he_cap_elem.mac_cap_info[2] &
+ IEEE80211_HE_MAC_CAP2_BCAST_TWT);
+}
+
static bool ieee80211_assoc_success(struct ieee80211_sub_if_data *sdata,
struct cfg80211_bss *cbss,
struct ieee80211_mgmt *mgmt, size_t len,
@@ -3529,6 +3476,9 @@ static bool ieee80211_assoc_success(struct ieee80211_sub_if_data *sdata,
bss_conf->twt_protected = false;
}
+ bss_conf->twt_broadcast =
+ ieee80211_twt_bcast_support(sdata, bss_conf, sband, sta);
+
if (bss_conf->he_support) {
bss_conf->he_bss_color.color =
le32_get_bits(elems->he_operation->he_oper_params,
@@ -3699,6 +3649,7 @@ static void ieee80211_rx_mgmt_assoc_resp(struct ieee80211_sub_if_data *sdata,
.type = MLME_EVENT,
.u.mlme.data = ASSOC_EVENT,
};
+ struct ieee80211_prep_tx_info info = {};
sdata_assert_lock(sdata);
@@ -3728,6 +3679,15 @@ static void ieee80211_rx_mgmt_assoc_resp(struct ieee80211_sub_if_data *sdata,
aid = 0; /* TODO */
}
+ /*
+ * Note: this may not be perfect, AP might misbehave - if
+ * anyone needs to rely on perfect complete notification
+ * with the exact right subtype, then we need to track what
+ * we actually transmitted.
+ */
+ info.subtype = reassoc ? IEEE80211_STYPE_REASSOC_REQ :
+ IEEE80211_STYPE_ASSOC_REQ;
+
sdata_info(sdata,
"RX %sssocResp from %pM (capab=0x%x status=%d aid=%d)\n",
reassoc ? "Rea" : "A", mgmt->sa,
@@ -3753,7 +3713,7 @@ static void ieee80211_rx_mgmt_assoc_resp(struct ieee80211_sub_if_data *sdata,
assoc_data->timeout_started = true;
if (ms > IEEE80211_ASSOC_TIMEOUT)
run_again(sdata, assoc_data->timeout);
- return;
+ goto notify_driver;
}
if (status_code != WLAN_STATUS_SUCCESS) {
@@ -3768,7 +3728,7 @@ static void ieee80211_rx_mgmt_assoc_resp(struct ieee80211_sub_if_data *sdata,
/* oops -- internal error -- send timeout for now */
ieee80211_destroy_assoc_data(sdata, false, false);
cfg80211_assoc_timeout(sdata->dev, cbss);
- return;
+ goto notify_driver;
}
event.u.mlme.status = MLME_SUCCESS;
drv_event_callback(sdata->local, sdata, &event);
@@ -3786,10 +3746,14 @@ static void ieee80211_rx_mgmt_assoc_resp(struct ieee80211_sub_if_data *sdata,
for (ac = 0; ac < IEEE80211_NUM_ACS; ac++)
if (sdata->tx_conf[ac].uapsd)
uapsd_queues |= ieee80211_ac_to_qos_mask[ac];
+
+ info.success = 1;
}
cfg80211_rx_assoc_resp(sdata->dev, cbss, (u8 *)mgmt, len, uapsd_queues,
ifmgd->assoc_req_ies, ifmgd->assoc_req_ies_len);
+notify_driver:
+ drv_mgd_complete_tx(sdata->local, sdata, &info);
}
static void ieee80211_rx_bss_info(struct ieee80211_sub_if_data *sdata,
@@ -4408,7 +4372,9 @@ static int ieee80211_auth(struct ieee80211_sub_if_data *sdata)
u32 tx_flags = 0;
u16 trans = 1;
u16 status = 0;
- u16 prepare_tx_duration = 0;
+ struct ieee80211_prep_tx_info info = {
+ .subtype = IEEE80211_STYPE_AUTH,
+ };
sdata_assert_lock(sdata);
@@ -4431,10 +4397,9 @@ static int ieee80211_auth(struct ieee80211_sub_if_data *sdata)
}
if (auth_data->algorithm == WLAN_AUTH_SAE)
- prepare_tx_duration =
- jiffies_to_msecs(IEEE80211_AUTH_TIMEOUT_SAE);
+ info.duration = jiffies_to_msecs(IEEE80211_AUTH_TIMEOUT_SAE);
- drv_mgd_prepare_tx(local, sdata, prepare_tx_duration);
+ drv_mgd_prepare_tx(local, sdata, &info);
sdata_info(sdata, "send auth to %pM (try %d/%d)\n",
auth_data->bss->bssid, auth_data->tries,
@@ -4929,11 +4894,13 @@ static u8 ieee80211_ht_vht_rx_chains(struct ieee80211_sub_if_data *sdata,
}
static bool
-ieee80211_verify_sta_he_mcs_support(struct ieee80211_supported_band *sband,
+ieee80211_verify_sta_he_mcs_support(struct ieee80211_sub_if_data *sdata,
+ struct ieee80211_supported_band *sband,
const struct ieee80211_he_operation *he_op)
{
const struct ieee80211_sta_he_cap *sta_he_cap =
- ieee80211_get_he_sta_cap(sband);
+ ieee80211_get_he_iftype_cap(sband,
+ ieee80211_vif_type_p2p(&sdata->vif));
u16 ap_min_req_set;
int i;
@@ -5027,7 +4994,8 @@ static int ieee80211_prep_channel(struct ieee80211_sub_if_data *sdata,
ifmgd->flags |= IEEE80211_STA_DISABLE_HE;
}
- if (!ieee80211_get_he_sta_cap(sband))
+ if (!ieee80211_get_he_iftype_cap(sband,
+ ieee80211_vif_type_p2p(&sdata->vif)))
ifmgd->flags |= IEEE80211_STA_DISABLE_HE;
rcu_read_lock();
@@ -5085,7 +5053,7 @@ static int ieee80211_prep_channel(struct ieee80211_sub_if_data *sdata,
else
he_oper = NULL;
- if (!ieee80211_verify_sta_he_mcs_support(sband, he_oper))
+ if (!ieee80211_verify_sta_he_mcs_support(sdata, sband, he_oper))
ifmgd->flags |= IEEE80211_STA_DISABLE_HE;
}
@@ -5655,15 +5623,6 @@ int ieee80211_mgd_assoc(struct ieee80211_sub_if_data *sdata,
2 * FILS_NONCE_LEN);
assoc_data->bss = req->bss;
-
- if (ifmgd->req_smps == IEEE80211_SMPS_AUTOMATIC) {
- if (ifmgd->powersave)
- sdata->smps_mode = IEEE80211_SMPS_DYNAMIC;
- else
- sdata->smps_mode = IEEE80211_SMPS_OFF;
- } else
- sdata->smps_mode = ifmgd->req_smps;
-
assoc_data->capability = req->bss->capability;
assoc_data->supp_rates = bss->supp_rates;
assoc_data->supp_rates_len = bss->supp_rates_len;
@@ -5770,6 +5729,15 @@ int ieee80211_mgd_assoc(struct ieee80211_sub_if_data *sdata,
if (err)
goto err_clear;
+ if (ifmgd->req_smps == IEEE80211_SMPS_AUTOMATIC) {
+ if (ifmgd->powersave)
+ sdata->smps_mode = IEEE80211_SMPS_DYNAMIC;
+ else
+ sdata->smps_mode = IEEE80211_SMPS_OFF;
+ } else {
+ sdata->smps_mode = ifmgd->req_smps;
+ }
+
rcu_read_lock();
beacon_ies = rcu_dereference(req->bss->beacon_ies);
@@ -5854,6 +5822,9 @@ int ieee80211_mgd_deauth(struct ieee80211_sub_if_data *sdata,
struct ieee80211_if_managed *ifmgd = &sdata->u.mgd;
u8 frame_buf[IEEE80211_DEAUTH_FRAME_LEN];
bool tx = !req->local_state_change;
+ struct ieee80211_prep_tx_info info = {
+ .subtype = IEEE80211_STYPE_DEAUTH,
+ };
if (ifmgd->auth_data &&
ether_addr_equal(ifmgd->auth_data->bss->bssid, req->bssid)) {
@@ -5862,7 +5833,7 @@ int ieee80211_mgd_deauth(struct ieee80211_sub_if_data *sdata,
req->bssid, req->reason_code,
ieee80211_get_reason_code_string(req->reason_code));
- drv_mgd_prepare_tx(sdata->local, sdata, 0);
+ drv_mgd_prepare_tx(sdata->local, sdata, &info);
ieee80211_send_deauth_disassoc(sdata, req->bssid, req->bssid,
IEEE80211_STYPE_DEAUTH,
req->reason_code, tx,
@@ -5871,7 +5842,7 @@ int ieee80211_mgd_deauth(struct ieee80211_sub_if_data *sdata,
ieee80211_report_disconnect(sdata, frame_buf,
sizeof(frame_buf), true,
req->reason_code, false);
-
+ drv_mgd_complete_tx(sdata->local, sdata, &info);
return 0;
}
@@ -5882,7 +5853,7 @@ int ieee80211_mgd_deauth(struct ieee80211_sub_if_data *sdata,
req->bssid, req->reason_code,
ieee80211_get_reason_code_string(req->reason_code));
- drv_mgd_prepare_tx(sdata->local, sdata, 0);
+ drv_mgd_prepare_tx(sdata->local, sdata, &info);
ieee80211_send_deauth_disassoc(sdata, req->bssid, req->bssid,
IEEE80211_STYPE_DEAUTH,
req->reason_code, tx,
@@ -5906,6 +5877,7 @@ int ieee80211_mgd_deauth(struct ieee80211_sub_if_data *sdata,
ieee80211_report_disconnect(sdata, frame_buf,
sizeof(frame_buf), true,
req->reason_code, false);
+ drv_mgd_complete_tx(sdata->local, sdata, &info);
return 0;
}
diff --git a/net/mac80211/rate.c b/net/mac80211/rate.c
index 63652c39c8e0..e5935e3d7a07 100644
--- a/net/mac80211/rate.c
+++ b/net/mac80211/rate.c
@@ -297,15 +297,11 @@ void ieee80211_check_rate_mask(struct ieee80211_sub_if_data *sdata)
static bool rc_no_data_or_no_ack_use_min(struct ieee80211_tx_rate_control *txrc)
{
struct sk_buff *skb = txrc->skb;
- struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) skb->data;
struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb);
- __le16 fc;
-
- fc = hdr->frame_control;
return (info->flags & (IEEE80211_TX_CTL_NO_ACK |
IEEE80211_TX_CTL_USE_MINRATE)) ||
- !ieee80211_is_data(fc);
+ !ieee80211_is_tx_data(skb);
}
static void rc_send_low_basicrate(struct ieee80211_tx_rate *rate,
@@ -396,6 +392,10 @@ static bool rate_control_send_low(struct ieee80211_sta *pubsta,
int mcast_rate;
bool use_basicrate = false;
+ if (ieee80211_is_tx_data(txrc->skb) &&
+ info->flags & IEEE80211_TX_CTL_NO_ACK)
+ return false;
+
if (!pubsta || rc_no_data_or_no_ack_use_min(txrc)) {
__rate_control_send_low(txrc->hw, sband, pubsta, info,
txrc->rate_idx_mask);
@@ -870,7 +870,6 @@ void ieee80211_get_tx_rates(struct ieee80211_vif *vif,
int max_rates)
{
struct ieee80211_sub_if_data *sdata;
- struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) skb->data;
struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb);
struct ieee80211_supported_band *sband;
@@ -882,7 +881,7 @@ void ieee80211_get_tx_rates(struct ieee80211_vif *vif,
sdata = vif_to_sdata(vif);
sband = sdata->local->hw.wiphy->bands[info->band];
- if (ieee80211_is_data(hdr->frame_control))
+ if (ieee80211_is_tx_data(skb))
rate_control_apply_mask(sdata, sta, sband, dest, max_rates);
if (dest[0].idx < 0)
diff --git a/net/mac80211/rc80211_minstrel_ht.c b/net/mac80211/rc80211_minstrel_ht.c
index a6f3fb4a9197..72b44d4c42d0 100644
--- a/net/mac80211/rc80211_minstrel_ht.c
+++ b/net/mac80211/rc80211_minstrel_ht.c
@@ -434,7 +434,7 @@ minstrel_ht_get_tp_avg(struct minstrel_ht_sta *mi, int group, int rate,
unsigned int nsecs = 0, overhead = mi->overhead;
unsigned int ampdu_len = 1;
- /* do not account throughput if sucess prob is below 10% */
+ /* do not account throughput if success prob is below 10% */
if (prob_avg < MINSTREL_FRAC(10, 100))
return 0;
@@ -1176,29 +1176,6 @@ minstrel_downgrade_rate(struct minstrel_ht_sta *mi, u16 *idx, bool primary)
}
static void
-minstrel_aggr_check(struct ieee80211_sta *pubsta, struct sk_buff *skb)
-{
- struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) skb->data;
- struct sta_info *sta = container_of(pubsta, struct sta_info, sta);
- u16 tid;
-
- if (skb_get_queue_mapping(skb) == IEEE80211_AC_VO)
- return;
-
- if (unlikely(!ieee80211_is_data_qos(hdr->frame_control)))
- return;
-
- if (unlikely(skb->protocol == cpu_to_be16(ETH_P_PAE)))
- return;
-
- tid = ieee80211_get_tid(hdr);
- if (likely(sta->ampdu_mlme.tid_tx[tid]))
- return;
-
- ieee80211_start_tx_ba_session(pubsta, tid, 0);
-}
-
-static void
minstrel_ht_tx_status(void *priv, struct ieee80211_supported_band *sband,
void *priv_sta, struct ieee80211_tx_status *st)
{
@@ -1211,6 +1188,10 @@ minstrel_ht_tx_status(void *priv, struct ieee80211_supported_band *sband,
bool last, update = false;
int i;
+ /* Ignore packet that was sent with noAck flag */
+ if (info->flags & IEEE80211_TX_CTL_NO_ACK)
+ return;
+
/* This packet was aggregated but doesn't carry status info */
if ((info->flags & IEEE80211_TX_CTL_AMPDU) &&
!(info->flags & IEEE80211_TX_STAT_AMPDU))
@@ -1498,10 +1479,6 @@ minstrel_ht_get_rate(void *priv, struct ieee80211_sta *sta, void *priv_sta,
struct minstrel_priv *mp = priv;
u16 sample_idx;
- if (!(info->flags & IEEE80211_TX_CTL_AMPDU) &&
- !minstrel_ht_is_legacy_group(MI_RATE_GROUP(mi->max_prob_rate)))
- minstrel_aggr_check(sta, txrc->skb);
-
info->flags |= mi->tx_flags;
#ifdef CONFIG_MAC80211_DEBUGFS
@@ -1907,6 +1884,7 @@ static u32 minstrel_ht_get_expected_throughput(void *priv_sta)
static const struct rate_control_ops mac80211_minstrel_ht = {
.name = "minstrel_ht",
+ .capa = RATE_CTRL_CAPA_AMPDU_TRIGGER,
.tx_status_ext = minstrel_ht_tx_status,
.get_rate = minstrel_ht_get_rate,
.rate_init = minstrel_ht_rate_init,
diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c
index af0ef456eb0f..99ed68f7dc36 100644
--- a/net/mac80211/rx.c
+++ b/net/mac80211/rx.c
@@ -214,6 +214,24 @@ ieee80211_rx_radiotap_hdrlen(struct ieee80211_local *local,
return len;
}
+static void __ieee80211_queue_skb_to_iface(struct ieee80211_sub_if_data *sdata,
+ struct sta_info *sta,
+ struct sk_buff *skb)
+{
+ skb_queue_tail(&sdata->skb_queue, skb);
+ ieee80211_queue_work(&sdata->local->hw, &sdata->work);
+ if (sta)
+ sta->rx_stats.packets++;
+}
+
+static void ieee80211_queue_skb_to_iface(struct ieee80211_sub_if_data *sdata,
+ struct sta_info *sta,
+ struct sk_buff *skb)
+{
+ skb->protocol = 0;
+ __ieee80211_queue_skb_to_iface(sdata, sta, skb);
+}
+
static void ieee80211_handle_mu_mimo_mon(struct ieee80211_sub_if_data *sdata,
struct sk_buff *skb,
int rtap_space)
@@ -254,8 +272,7 @@ static void ieee80211_handle_mu_mimo_mon(struct ieee80211_sub_if_data *sdata,
if (!skb)
return;
- skb_queue_tail(&sdata->skb_queue, skb);
- ieee80211_queue_work(&sdata->local->hw, &sdata->work);
+ ieee80211_queue_skb_to_iface(sdata, NULL, skb);
}
/*
@@ -342,7 +359,12 @@ ieee80211_add_rx_radiotap_header(struct ieee80211_local *local,
put_unaligned_le32(it_present_val, it_present);
- pos = (void *)(it_present + 1);
+ /* This references through an offset into it_optional[] rather
+ * than via it_present otherwise later uses of pos will cause
+ * the compiler to think we have walked past the end of the
+ * struct member.
+ */
+ pos = (void *)&rthdr->it_optional[it_present - rthdr->it_optional];
/* the order of the following fields is important */
@@ -355,7 +377,7 @@ ieee80211_add_rx_radiotap_header(struct ieee80211_local *local,
ieee80211_calculate_rx_timestamp(local, status,
mpdulen, 0),
pos);
- rthdr->it_present |= cpu_to_le32(1 << IEEE80211_RADIOTAP_TSFT);
+ rthdr->it_present |= cpu_to_le32(BIT(IEEE80211_RADIOTAP_TSFT));
pos += 8;
}
@@ -379,7 +401,7 @@ ieee80211_add_rx_radiotap_header(struct ieee80211_local *local,
*pos = 0;
} else {
int shift = 0;
- rthdr->it_present |= cpu_to_le32(1 << IEEE80211_RADIOTAP_RATE);
+ rthdr->it_present |= cpu_to_le32(BIT(IEEE80211_RADIOTAP_RATE));
if (status->bw == RATE_INFO_BW_10)
shift = 1;
else if (status->bw == RATE_INFO_BW_5)
@@ -416,7 +438,7 @@ ieee80211_add_rx_radiotap_header(struct ieee80211_local *local,
!(status->flag & RX_FLAG_NO_SIGNAL_VAL)) {
*pos = status->signal;
rthdr->it_present |=
- cpu_to_le32(1 << IEEE80211_RADIOTAP_DBM_ANTSIGNAL);
+ cpu_to_le32(BIT(IEEE80211_RADIOTAP_DBM_ANTSIGNAL));
pos++;
}
@@ -442,7 +464,7 @@ ieee80211_add_rx_radiotap_header(struct ieee80211_local *local,
if (status->encoding == RX_ENC_HT) {
unsigned int stbc;
- rthdr->it_present |= cpu_to_le32(1 << IEEE80211_RADIOTAP_MCS);
+ rthdr->it_present |= cpu_to_le32(BIT(IEEE80211_RADIOTAP_MCS));
*pos++ = local->hw.radiotap_mcs_details;
*pos = 0;
if (status->enc_flags & RX_ENC_FLAG_SHORT_GI)
@@ -466,7 +488,7 @@ ieee80211_add_rx_radiotap_header(struct ieee80211_local *local,
while ((pos - (u8 *)rthdr) & 3)
pos++;
rthdr->it_present |=
- cpu_to_le32(1 << IEEE80211_RADIOTAP_AMPDU_STATUS);
+ cpu_to_le32(BIT(IEEE80211_RADIOTAP_AMPDU_STATUS));
put_unaligned_le32(status->ampdu_reference, pos);
pos += 4;
if (status->flag & RX_FLAG_AMPDU_LAST_KNOWN)
@@ -493,7 +515,7 @@ ieee80211_add_rx_radiotap_header(struct ieee80211_local *local,
if (status->encoding == RX_ENC_VHT) {
u16 known = local->hw.radiotap_vht_details;
- rthdr->it_present |= cpu_to_le32(1 << IEEE80211_RADIOTAP_VHT);
+ rthdr->it_present |= cpu_to_le32(BIT(IEEE80211_RADIOTAP_VHT));
put_unaligned_le16(known, pos);
pos += 2;
/* flags */
@@ -537,7 +559,7 @@ ieee80211_add_rx_radiotap_header(struct ieee80211_local *local,
u8 flags = IEEE80211_RADIOTAP_TIMESTAMP_FLAG_32BIT;
rthdr->it_present |=
- cpu_to_le32(1 << IEEE80211_RADIOTAP_TIMESTAMP);
+ cpu_to_le32(BIT(IEEE80211_RADIOTAP_TIMESTAMP));
/* ensure 8 byte alignment */
while ((pos - (u8 *)rthdr) & 7)
@@ -625,7 +647,7 @@ ieee80211_add_rx_radiotap_header(struct ieee80211_local *local,
/* ensure 2 byte alignment */
while ((pos - (u8 *)rthdr) & 1)
pos++;
- rthdr->it_present |= cpu_to_le32(1 << IEEE80211_RADIOTAP_HE);
+ rthdr->it_present |= cpu_to_le32(BIT(IEEE80211_RADIOTAP_HE));
memcpy(pos, &he, sizeof(he));
pos += sizeof(he);
}
@@ -635,14 +657,14 @@ ieee80211_add_rx_radiotap_header(struct ieee80211_local *local,
/* ensure 2 byte alignment */
while ((pos - (u8 *)rthdr) & 1)
pos++;
- rthdr->it_present |= cpu_to_le32(1 << IEEE80211_RADIOTAP_HE_MU);
+ rthdr->it_present |= cpu_to_le32(BIT(IEEE80211_RADIOTAP_HE_MU));
memcpy(pos, &he_mu, sizeof(he_mu));
pos += sizeof(he_mu);
}
if (status->flag & RX_FLAG_NO_PSDU) {
rthdr->it_present |=
- cpu_to_le32(1 << IEEE80211_RADIOTAP_ZERO_LEN_PSDU);
+ cpu_to_le32(BIT(IEEE80211_RADIOTAP_ZERO_LEN_PSDU));
*pos++ = status->zero_length_psdu_type;
}
@@ -650,7 +672,7 @@ ieee80211_add_rx_radiotap_header(struct ieee80211_local *local,
/* ensure 2 byte alignment */
while ((pos - (u8 *)rthdr) & 1)
pos++;
- rthdr->it_present |= cpu_to_le32(1 << IEEE80211_RADIOTAP_LSIG);
+ rthdr->it_present |= cpu_to_le32(BIT(IEEE80211_RADIOTAP_LSIG));
memcpy(pos, &lsig, sizeof(lsig));
pos += sizeof(lsig);
}
@@ -713,7 +735,8 @@ ieee80211_make_monitor_skb(struct ieee80211_local *local,
* Need to make a copy and possibly remove radiotap header
* and FCS from the original.
*/
- skb = skb_copy_expand(*origskb, needed_headroom, 0, GFP_ATOMIC);
+ skb = skb_copy_expand(*origskb, needed_headroom + NET_SKB_PAD,
+ 0, GFP_ATOMIC);
if (!skb)
return NULL;
@@ -1339,7 +1362,6 @@ static void ieee80211_rx_reorder_ampdu(struct ieee80211_rx_data *rx,
struct sk_buff_head *frames)
{
struct sk_buff *skb = rx->skb;
- struct ieee80211_local *local = rx->local;
struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) skb->data;
struct sta_info *sta = rx->sta;
struct tid_ampdu_rx *tid_agg_rx;
@@ -1391,8 +1413,7 @@ static void ieee80211_rx_reorder_ampdu(struct ieee80211_rx_data *rx,
/* if this mpdu is fragmented - terminate rx aggregation session */
sc = le16_to_cpu(hdr->seq_ctrl);
if (sc & IEEE80211_SCTL_FRAG) {
- skb_queue_tail(&rx->sdata->skb_queue, skb);
- ieee80211_queue_work(&local->hw, &rx->sdata->work);
+ ieee80211_queue_skb_to_iface(rx->sdata, NULL, skb);
return;
}
@@ -1563,12 +1584,8 @@ static void sta_ps_start(struct sta_info *sta)
for (tid = 0; tid < IEEE80211_NUM_TIDS; tid++) {
struct ieee80211_txq *txq = sta->sta.txq[tid];
- struct txq_info *txqi = to_txq_info(txq);
- spin_lock(&local->active_txq_lock[txq->ac]);
- if (!list_empty(&txqi->schedule_order))
- list_del_init(&txqi->schedule_order);
- spin_unlock(&local->active_txq_lock[txq->ac]);
+ ieee80211_unschedule_txq(&local->hw, txq, false);
if (txq_has_queue(txq))
set_bit(tid, &sta->txq_buffered_tids);
@@ -3009,11 +3026,8 @@ ieee80211_rx_h_data(struct ieee80211_rx_data *rx)
tf->category == WLAN_CATEGORY_TDLS &&
(tf->action_code == WLAN_TDLS_CHANNEL_SWITCH_REQUEST ||
tf->action_code == WLAN_TDLS_CHANNEL_SWITCH_RESPONSE)) {
- skb_queue_tail(&local->skb_queue_tdls_chsw, rx->skb);
- schedule_work(&local->tdls_chsw_work);
- if (rx->sta)
- rx->sta->rx_stats.packets++;
-
+ rx->skb->protocol = cpu_to_be16(ETH_P_TDLS);
+ __ieee80211_queue_skb_to_iface(sdata, rx->sta, rx->skb);
return RX_QUEUED;
}
}
@@ -3198,6 +3212,68 @@ ieee80211_rx_h_mgmt_check(struct ieee80211_rx_data *rx)
return RX_CONTINUE;
}
+static bool
+ieee80211_process_rx_twt_action(struct ieee80211_rx_data *rx)
+{
+ struct ieee80211_mgmt *mgmt = (struct ieee80211_mgmt *)rx->skb->data;
+ struct ieee80211_rx_status *status = IEEE80211_SKB_RXCB(rx->skb);
+ struct ieee80211_sub_if_data *sdata = rx->sdata;
+ const struct ieee80211_sta_he_cap *hecap;
+ struct ieee80211_supported_band *sband;
+
+ /* TWT actions are only supported in AP for the moment */
+ if (sdata->vif.type != NL80211_IFTYPE_AP)
+ return false;
+
+ if (!rx->local->ops->add_twt_setup)
+ return false;
+
+ sband = rx->local->hw.wiphy->bands[status->band];
+ hecap = ieee80211_get_he_iftype_cap(sband,
+ ieee80211_vif_type_p2p(&sdata->vif));
+ if (!hecap)
+ return false;
+
+ if (!(hecap->he_cap_elem.mac_cap_info[0] &
+ IEEE80211_HE_MAC_CAP0_TWT_RES))
+ return false;
+
+ if (!rx->sta)
+ return false;
+
+ switch (mgmt->u.action.u.s1g.action_code) {
+ case WLAN_S1G_TWT_SETUP: {
+ struct ieee80211_twt_setup *twt;
+
+ if (rx->skb->len < IEEE80211_MIN_ACTION_SIZE +
+ 1 + /* action code */
+ sizeof(struct ieee80211_twt_setup) +
+ 2 /* TWT req_type agrt */)
+ break;
+
+ twt = (void *)mgmt->u.action.u.s1g.variable;
+ if (twt->element_id != WLAN_EID_S1G_TWT)
+ break;
+
+ if (rx->skb->len < IEEE80211_MIN_ACTION_SIZE +
+ 4 + /* action code + token + tlv */
+ twt->length)
+ break;
+
+ return true; /* queue the frame */
+ }
+ case WLAN_S1G_TWT_TEARDOWN:
+ if (rx->skb->len < IEEE80211_MIN_ACTION_SIZE + 2)
+ break;
+
+ return true; /* queue the frame */
+ default:
+ break;
+ }
+
+ return false;
+}
+
static ieee80211_rx_result debug_noinline
ieee80211_rx_h_action(struct ieee80211_rx_data *rx)
{
@@ -3477,6 +3553,17 @@ ieee80211_rx_h_action(struct ieee80211_rx_data *rx)
!mesh_path_sel_is_hwmp(sdata))
break;
goto queue;
+ case WLAN_CATEGORY_S1G:
+ switch (mgmt->u.action.u.s1g.action_code) {
+ case WLAN_S1G_TWT_SETUP:
+ case WLAN_S1G_TWT_TEARDOWN:
+ if (ieee80211_process_rx_twt_action(rx))
+ goto queue;
+ break;
+ default:
+ break;
+ }
+ break;
}
return RX_CONTINUE;
@@ -3493,10 +3580,7 @@ ieee80211_rx_h_action(struct ieee80211_rx_data *rx)
return RX_QUEUED;
queue:
- skb_queue_tail(&sdata->skb_queue, rx->skb);
- ieee80211_queue_work(&local->hw, &sdata->work);
- if (rx->sta)
- rx->sta->rx_stats.packets++;
+ ieee80211_queue_skb_to_iface(sdata, rx->sta, rx->skb);
return RX_QUEUED;
}
@@ -3644,10 +3728,7 @@ ieee80211_rx_h_ext(struct ieee80211_rx_data *rx)
return RX_DROP_MONITOR;
/* for now only beacons are ext, so queue them */
- skb_queue_tail(&sdata->skb_queue, rx->skb);
- ieee80211_queue_work(&rx->local->hw, &sdata->work);
- if (rx->sta)
- rx->sta->rx_stats.packets++;
+ ieee80211_queue_skb_to_iface(sdata, rx->sta, rx->skb);
return RX_QUEUED;
}
@@ -3704,11 +3785,7 @@ ieee80211_rx_h_mgmt(struct ieee80211_rx_data *rx)
return RX_DROP_MONITOR;
}
- /* queue up frame and kick off work to process it */
- skb_queue_tail(&sdata->skb_queue, rx->skb);
- ieee80211_queue_work(&rx->local->hw, &sdata->work);
- if (rx->sta)
- rx->sta->rx_stats.packets++;
+ ieee80211_queue_skb_to_iface(sdata, rx->sta, rx->skb);
return RX_QUEUED;
}
diff --git a/net/mac80211/s1g.c b/net/mac80211/s1g.c
index c33f332b049a..7e35ab5b6166 100644
--- a/net/mac80211/s1g.c
+++ b/net/mac80211/s1g.c
@@ -6,6 +6,7 @@
#include <linux/ieee80211.h>
#include <net/mac80211.h>
#include "ieee80211_i.h"
+#include "driver-ops.h"
void ieee80211_s1g_sta_rate_init(struct sta_info *sta)
{
@@ -14,3 +15,182 @@ void ieee80211_s1g_sta_rate_init(struct sta_info *sta)
sta->rx_stats.last_rate =
STA_STATS_FIELD(TYPE, STA_STATS_RATE_TYPE_S1G);
}
+
+bool ieee80211_s1g_is_twt_setup(struct sk_buff *skb)
+{
+ struct ieee80211_mgmt *mgmt = (struct ieee80211_mgmt *)skb->data;
+
+ if (likely(!ieee80211_is_action(mgmt->frame_control)))
+ return false;
+
+ if (likely(mgmt->u.action.category != WLAN_CATEGORY_S1G))
+ return false;
+
+ return mgmt->u.action.u.s1g.action_code == WLAN_S1G_TWT_SETUP;
+}
+
+static void
+ieee80211_s1g_send_twt_setup(struct ieee80211_sub_if_data *sdata, const u8 *da,
+ const u8 *bssid, struct ieee80211_twt_setup *twt)
+{
+ int len = IEEE80211_MIN_ACTION_SIZE + 4 + twt->length;
+ struct ieee80211_local *local = sdata->local;
+ struct ieee80211_mgmt *mgmt;
+ struct sk_buff *skb;
+
+ skb = dev_alloc_skb(local->hw.extra_tx_headroom + len);
+ if (!skb)
+ return;
+
+ skb_reserve(skb, local->hw.extra_tx_headroom);
+ mgmt = skb_put_zero(skb, len);
+ mgmt->frame_control = cpu_to_le16(IEEE80211_FTYPE_MGMT |
+ IEEE80211_STYPE_ACTION);
+ memcpy(mgmt->da, da, ETH_ALEN);
+ memcpy(mgmt->sa, sdata->vif.addr, ETH_ALEN);
+ memcpy(mgmt->bssid, bssid, ETH_ALEN);
+
+ mgmt->u.action.category = WLAN_CATEGORY_S1G;
+ mgmt->u.action.u.s1g.action_code = WLAN_S1G_TWT_SETUP;
+ memcpy(mgmt->u.action.u.s1g.variable, twt, 3 + twt->length);
+
+ IEEE80211_SKB_CB(skb)->flags |= IEEE80211_TX_INTFL_DONT_ENCRYPT |
+ IEEE80211_TX_INTFL_MLME_CONN_TX |
+ IEEE80211_TX_CTL_REQ_TX_STATUS;
+ ieee80211_tx_skb(sdata, skb);
+}
+
+static void
+ieee80211_s1g_send_twt_teardown(struct ieee80211_sub_if_data *sdata,
+ const u8 *da, const u8 *bssid, u8 flowid)
+{
+ struct ieee80211_local *local = sdata->local;
+ struct ieee80211_mgmt *mgmt;
+ struct sk_buff *skb;
+ u8 *id;
+
+ skb = dev_alloc_skb(local->hw.extra_tx_headroom +
+ IEEE80211_MIN_ACTION_SIZE + 2);
+ if (!skb)
+ return;
+
+ skb_reserve(skb, local->hw.extra_tx_headroom);
+ mgmt = skb_put_zero(skb, IEEE80211_MIN_ACTION_SIZE + 2);
+ mgmt->frame_control = cpu_to_le16(IEEE80211_FTYPE_MGMT |
+ IEEE80211_STYPE_ACTION);
+ memcpy(mgmt->da, da, ETH_ALEN);
+ memcpy(mgmt->sa, sdata->vif.addr, ETH_ALEN);
+ memcpy(mgmt->bssid, bssid, ETH_ALEN);
+
+ mgmt->u.action.category = WLAN_CATEGORY_S1G;
+ mgmt->u.action.u.s1g.action_code = WLAN_S1G_TWT_TEARDOWN;
+ id = (u8 *)mgmt->u.action.u.s1g.variable;
+ *id = flowid;
+
+ IEEE80211_SKB_CB(skb)->flags |= IEEE80211_TX_INTFL_DONT_ENCRYPT |
+ IEEE80211_TX_CTL_REQ_TX_STATUS;
+ ieee80211_tx_skb(sdata, skb);
+}
+
+static void
+ieee80211_s1g_rx_twt_setup(struct ieee80211_sub_if_data *sdata,
+ struct sta_info *sta, struct sk_buff *skb)
+{
+ struct ieee80211_mgmt *mgmt = (void *)skb->data;
+ struct ieee80211_twt_setup *twt = (void *)mgmt->u.action.u.s1g.variable;
+ struct ieee80211_twt_params *twt_agrt = (void *)twt->params;
+
+ twt_agrt->req_type &= cpu_to_le16(~IEEE80211_TWT_REQTYPE_REQUEST);
+
+ /* broadcast TWT not supported yet */
+ if (twt->control & IEEE80211_TWT_CONTROL_NEG_TYPE_BROADCAST) {
+ le16p_replace_bits(&twt_agrt->req_type,
+ TWT_SETUP_CMD_REJECT,
+ IEEE80211_TWT_REQTYPE_SETUP_CMD);
+ goto out;
+ }
+
+ drv_add_twt_setup(sdata->local, sdata, &sta->sta, twt);
+out:
+ ieee80211_s1g_send_twt_setup(sdata, mgmt->sa, sdata->vif.addr, twt);
+}
+
+static void
+ieee80211_s1g_rx_twt_teardown(struct ieee80211_sub_if_data *sdata,
+ struct sta_info *sta, struct sk_buff *skb)
+{
+ struct ieee80211_mgmt *mgmt = (struct ieee80211_mgmt *)skb->data;
+
+ drv_twt_teardown_request(sdata->local, sdata, &sta->sta,
+ mgmt->u.action.u.s1g.variable[0]);
+}
+
+static void
+ieee80211_s1g_tx_twt_setup_fail(struct ieee80211_sub_if_data *sdata,
+ struct sta_info *sta, struct sk_buff *skb)
+{
+ struct ieee80211_mgmt *mgmt = (struct ieee80211_mgmt *)skb->data;
+ struct ieee80211_twt_setup *twt = (void *)mgmt->u.action.u.s1g.variable;
+ struct ieee80211_twt_params *twt_agrt = (void *)twt->params;
+ u8 flowid = le16_get_bits(twt_agrt->req_type,
+ IEEE80211_TWT_REQTYPE_FLOWID);
+
+ drv_twt_teardown_request(sdata->local, sdata, &sta->sta, flowid);
+
+ ieee80211_s1g_send_twt_teardown(sdata, mgmt->sa, sdata->vif.addr,
+ flowid);
+}
+
+void ieee80211_s1g_rx_twt_action(struct ieee80211_sub_if_data *sdata,
+ struct sk_buff *skb)
+{
+ struct ieee80211_mgmt *mgmt = (struct ieee80211_mgmt *)skb->data;
+ struct ieee80211_local *local = sdata->local;
+ struct sta_info *sta;
+
+ mutex_lock(&local->sta_mtx);
+
+ sta = sta_info_get_bss(sdata, mgmt->sa);
+ if (!sta)
+ goto out;
+
+ switch (mgmt->u.action.u.s1g.action_code) {
+ case WLAN_S1G_TWT_SETUP:
+ ieee80211_s1g_rx_twt_setup(sdata, sta, skb);
+ break;
+ case WLAN_S1G_TWT_TEARDOWN:
+ ieee80211_s1g_rx_twt_teardown(sdata, sta, skb);
+ break;
+ default:
+ break;
+ }
+
+out:
+ mutex_unlock(&local->sta_mtx);
+}
+
+void ieee80211_s1g_status_twt_action(struct ieee80211_sub_if_data *sdata,
+ struct sk_buff *skb)
+{
+ struct ieee80211_mgmt *mgmt = (struct ieee80211_mgmt *)skb->data;
+ struct ieee80211_local *local = sdata->local;
+ struct sta_info *sta;
+
+ mutex_lock(&local->sta_mtx);
+
+ sta = sta_info_get_bss(sdata, mgmt->da);
+ if (!sta)
+ goto out;
+
+ switch (mgmt->u.action.u.s1g.action_code) {
+ case WLAN_S1G_TWT_SETUP:
+ /* process failed twt setup frames */
+ ieee80211_s1g_tx_twt_setup_fail(sdata, sta, skb);
+ break;
+ default:
+ break;
+ }
+
+out:
+ mutex_unlock(&local->sta_mtx);
+}
diff --git a/net/mac80211/sta_info.c b/net/mac80211/sta_info.c
index f2fb69da9b6e..2b5acb37587f 100644
--- a/net/mac80211/sta_info.c
+++ b/net/mac80211/sta_info.c
@@ -425,15 +425,11 @@ struct sta_info *sta_info_alloc(struct ieee80211_sub_if_data *sdata,
if (sta_prepare_rate_control(local, sta, gfp))
goto free_txq;
- sta->airtime_weight = IEEE80211_DEFAULT_AIRTIME_WEIGHT;
for (i = 0; i < IEEE80211_NUM_ACS; i++) {
skb_queue_head_init(&sta->ps_tx_buf[i]);
skb_queue_head_init(&sta->tx_filtered[i]);
- sta->airtime[i].deficit = sta->airtime_weight;
- atomic_set(&sta->airtime[i].aql_tx_pending, 0);
- sta->airtime[i].aql_limit_low = local->aql_txq_limit_low[i];
- sta->airtime[i].aql_limit_high = local->aql_txq_limit_high[i];
+ init_airtime_info(&sta->airtime[i], &local->airtime[i]);
}
for (i = 0; i < IEEE80211_NUM_TIDS; i++)
@@ -547,7 +543,7 @@ static int sta_info_insert_check(struct sta_info *sta)
return -ENETDOWN;
if (WARN_ON(ether_addr_equal(sta->sta.addr, sdata->vif.addr) ||
- is_multicast_ether_addr(sta->sta.addr)))
+ !is_valid_ether_addr(sta->sta.addr)))
return -EINVAL;
/* The RCU read lock is required by rhashtable due to
@@ -1398,11 +1394,6 @@ static void ieee80211_send_null_response(struct sta_info *sta, int tid,
struct ieee80211_tx_info *info;
struct ieee80211_chanctx_conf *chanctx_conf;
- /* Don't send NDPs when STA is connected HE */
- if (sdata->vif.type == NL80211_IFTYPE_STATION &&
- !(sdata->u.mgd.flags & IEEE80211_STA_DISABLE_HE))
- return;
-
if (qos) {
fc = cpu_to_le16(IEEE80211_FTYPE_DATA |
IEEE80211_STYPE_QOS_NULLFUNC |
@@ -1897,24 +1888,59 @@ void ieee80211_sta_set_buffered(struct ieee80211_sta *pubsta,
}
EXPORT_SYMBOL(ieee80211_sta_set_buffered);
-void ieee80211_sta_register_airtime(struct ieee80211_sta *pubsta, u8 tid,
- u32 tx_airtime, u32 rx_airtime)
+void ieee80211_register_airtime(struct ieee80211_txq *txq,
+ u32 tx_airtime, u32 rx_airtime)
{
- struct sta_info *sta = container_of(pubsta, struct sta_info, sta);
- struct ieee80211_local *local = sta->sdata->local;
- u8 ac = ieee80211_ac_from_tid(tid);
+ struct ieee80211_sub_if_data *sdata = vif_to_sdata(txq->vif);
+ struct ieee80211_local *local = sdata->local;
+ u64 weight_sum, weight_sum_reciprocal;
+ struct airtime_sched_info *air_sched;
+ struct airtime_info *air_info;
u32 airtime = 0;
- if (sta->local->airtime_flags & AIRTIME_USE_TX)
+ air_sched = &local->airtime[txq->ac];
+ air_info = to_airtime_info(txq);
+
+ if (local->airtime_flags & AIRTIME_USE_TX)
airtime += tx_airtime;
- if (sta->local->airtime_flags & AIRTIME_USE_RX)
+ if (local->airtime_flags & AIRTIME_USE_RX)
airtime += rx_airtime;
- spin_lock_bh(&local->active_txq_lock[ac]);
- sta->airtime[ac].tx_airtime += tx_airtime;
- sta->airtime[ac].rx_airtime += rx_airtime;
- sta->airtime[ac].deficit -= airtime;
- spin_unlock_bh(&local->active_txq_lock[ac]);
+ /* Weights scale so the unit weight is 256 */
+ airtime <<= 8;
+
+ spin_lock_bh(&air_sched->lock);
+
+ air_info->tx_airtime += tx_airtime;
+ air_info->rx_airtime += rx_airtime;
+
+ if (air_sched->weight_sum) {
+ weight_sum = air_sched->weight_sum;
+ weight_sum_reciprocal = air_sched->weight_sum_reciprocal;
+ } else {
+ weight_sum = air_info->weight;
+ weight_sum_reciprocal = air_info->weight_reciprocal;
+ }
+
+ /* Round the calculation of global vt */
+ air_sched->v_t += (u64)((airtime + (weight_sum >> 1)) *
+ weight_sum_reciprocal) >> IEEE80211_RECIPROCAL_SHIFT_64;
+ air_info->v_t += (u32)((airtime + (air_info->weight >> 1)) *
+ air_info->weight_reciprocal) >> IEEE80211_RECIPROCAL_SHIFT_32;
+ ieee80211_resort_txq(&local->hw, txq);
+
+ spin_unlock_bh(&air_sched->lock);
+}
+
+void ieee80211_sta_register_airtime(struct ieee80211_sta *pubsta, u8 tid,
+ u32 tx_airtime, u32 rx_airtime)
+{
+ struct ieee80211_txq *txq = pubsta->txq[tid];
+
+ if (!txq)
+ return;
+
+ ieee80211_register_airtime(txq, tx_airtime, rx_airtime);
}
EXPORT_SYMBOL(ieee80211_sta_register_airtime);
@@ -2093,10 +2119,9 @@ static struct ieee80211_sta_rx_stats *
sta_get_last_rx_stats(struct sta_info *sta)
{
struct ieee80211_sta_rx_stats *stats = &sta->rx_stats;
- struct ieee80211_local *local = sta->local;
int cpu;
- if (!ieee80211_hw_check(&local->hw, USES_RSS))
+ if (!sta->pcpu_rx_stats)
return stats;
for_each_possible_cpu(cpu) {
@@ -2196,9 +2221,7 @@ static void sta_set_tidstats(struct sta_info *sta,
int cpu;
if (!(tidstats->filled & BIT(NL80211_TID_STATS_RX_MSDU))) {
- if (!ieee80211_hw_check(&local->hw, USES_RSS))
- tidstats->rx_msdu +=
- sta_get_tidstats_msdu(&sta->rx_stats, tid);
+ tidstats->rx_msdu += sta_get_tidstats_msdu(&sta->rx_stats, tid);
if (sta->pcpu_rx_stats) {
for_each_possible_cpu(cpu) {
@@ -2277,7 +2300,6 @@ void sta_set_sinfo(struct sta_info *sta, struct station_info *sinfo,
sinfo->rx_beacon = sdata->u.mgd.count_beacon_signal;
drv_sta_statistics(local, sdata, &sta->sta, sinfo);
-
sinfo->filled |= BIT_ULL(NL80211_STA_INFO_INACTIVE_TIME) |
BIT_ULL(NL80211_STA_INFO_STA_FLAGS) |
BIT_ULL(NL80211_STA_INFO_BSS_PARAM) |
@@ -2312,8 +2334,7 @@ void sta_set_sinfo(struct sta_info *sta, struct station_info *sinfo,
if (!(sinfo->filled & (BIT_ULL(NL80211_STA_INFO_RX_BYTES64) |
BIT_ULL(NL80211_STA_INFO_RX_BYTES)))) {
- if (!ieee80211_hw_check(&local->hw, USES_RSS))
- sinfo->rx_bytes += sta_get_stats_bytes(&sta->rx_stats);
+ sinfo->rx_bytes += sta_get_stats_bytes(&sta->rx_stats);
if (sta->pcpu_rx_stats) {
for_each_possible_cpu(cpu) {
@@ -2363,7 +2384,7 @@ void sta_set_sinfo(struct sta_info *sta, struct station_info *sinfo,
}
if (!(sinfo->filled & BIT_ULL(NL80211_STA_INFO_AIRTIME_WEIGHT))) {
- sinfo->airtime_weight = sta->airtime_weight;
+ sinfo->airtime_weight = sta->airtime[0].weight;
sinfo->filled |= BIT_ULL(NL80211_STA_INFO_AIRTIME_WEIGHT);
}
diff --git a/net/mac80211/sta_info.h b/net/mac80211/sta_info.h
index 0333072ebd98..ba2796782008 100644
--- a/net/mac80211/sta_info.h
+++ b/net/mac80211/sta_info.h
@@ -135,18 +135,25 @@ enum ieee80211_agg_stop_reason {
#define AIRTIME_USE_TX BIT(0)
#define AIRTIME_USE_RX BIT(1)
+
struct airtime_info {
u64 rx_airtime;
u64 tx_airtime;
- s64 deficit;
+ u64 v_t;
+ u64 last_scheduled;
+ struct list_head list;
atomic_t aql_tx_pending; /* Estimated airtime for frames pending */
u32 aql_limit_low;
u32 aql_limit_high;
+ u32 weight_reciprocal;
+ u16 weight;
};
void ieee80211_sta_update_pending_airtime(struct ieee80211_local *local,
struct sta_info *sta, u8 ac,
u16 tx_airtime, bool tx_completed);
+void ieee80211_register_airtime(struct ieee80211_txq *txq,
+ u32 tx_airtime, u32 rx_airtime);
struct sta_info;
@@ -515,7 +522,6 @@ struct ieee80211_fragment_cache {
* @tid_seq: per-TID sequence numbers for sending to this STA
* @airtime: per-AC struct airtime_info describing airtime statistics for this
* station
- * @airtime_weight: station weight for airtime fairness calculation purposes
* @ampdu_mlme: A-MPDU state machine state
* @mesh: mesh STA information
* @debugfs_dir: debug filesystem directory dentry
@@ -646,7 +652,6 @@ struct sta_info {
u16 tid_seq[IEEE80211_QOS_CTL_TID_MASK + 1];
struct airtime_info airtime[IEEE80211_NUM_ACS];
- u16 airtime_weight;
/*
* Aggregation information, locked with lock.
diff --git a/net/mac80211/status.c b/net/mac80211/status.c
index 9baf185ee4c7..f6f63a0b1b72 100644
--- a/net/mac80211/status.c
+++ b/net/mac80211/status.c
@@ -305,8 +305,8 @@ ieee80211_add_tx_radiotap_header(struct ieee80211_local *local,
memset(rthdr, 0, rtap_len);
rthdr->it_len = cpu_to_le16(rtap_len);
rthdr->it_present =
- cpu_to_le32((1 << IEEE80211_RADIOTAP_TX_FLAGS) |
- (1 << IEEE80211_RADIOTAP_DATA_RETRIES));
+ cpu_to_le32(BIT(IEEE80211_RADIOTAP_TX_FLAGS) |
+ BIT(IEEE80211_RADIOTAP_DATA_RETRIES));
pos = (unsigned char *)(rthdr + 1);
/*
@@ -331,7 +331,7 @@ ieee80211_add_tx_radiotap_header(struct ieee80211_local *local,
sband->bitrates[info->status.rates[0].idx].bitrate;
if (legacy_rate) {
- rthdr->it_present |= cpu_to_le32(1 << IEEE80211_RADIOTAP_RATE);
+ rthdr->it_present |= cpu_to_le32(BIT(IEEE80211_RADIOTAP_RATE));
*pos = DIV_ROUND_UP(legacy_rate, 5 * (1 << shift));
/* padding for tx flags */
pos += 2;
@@ -358,7 +358,7 @@ ieee80211_add_tx_radiotap_header(struct ieee80211_local *local,
if (status && status->rate &&
(status->rate->flags & RATE_INFO_FLAGS_MCS)) {
- rthdr->it_present |= cpu_to_le32(1 << IEEE80211_RADIOTAP_MCS);
+ rthdr->it_present |= cpu_to_le32(BIT(IEEE80211_RADIOTAP_MCS));
pos[0] = IEEE80211_RADIOTAP_MCS_HAVE_MCS |
IEEE80211_RADIOTAP_MCS_HAVE_GI |
IEEE80211_RADIOTAP_MCS_HAVE_BW;
@@ -374,7 +374,7 @@ ieee80211_add_tx_radiotap_header(struct ieee80211_local *local,
(IEEE80211_RADIOTAP_VHT_KNOWN_GI |
IEEE80211_RADIOTAP_VHT_KNOWN_BANDWIDTH);
- rthdr->it_present |= cpu_to_le32(1 << IEEE80211_RADIOTAP_VHT);
+ rthdr->it_present |= cpu_to_le32(BIT(IEEE80211_RADIOTAP_VHT));
/* required alignment from rthdr */
pos = (u8 *)rthdr + ALIGN(pos - (u8 *)rthdr, 2);
@@ -419,7 +419,7 @@ ieee80211_add_tx_radiotap_header(struct ieee80211_local *local,
(status->rate->flags & RATE_INFO_FLAGS_HE_MCS)) {
struct ieee80211_radiotap_he *he;
- rthdr->it_present |= cpu_to_le32(1 << IEEE80211_RADIOTAP_HE);
+ rthdr->it_present |= cpu_to_le32(BIT(IEEE80211_RADIOTAP_HE));
/* required alignment from rthdr */
pos = (u8 *)rthdr + ALIGN(pos - (u8 *)rthdr, 2);
@@ -495,7 +495,7 @@ ieee80211_add_tx_radiotap_header(struct ieee80211_local *local,
/* IEEE80211_RADIOTAP_MCS
* IEEE80211_RADIOTAP_VHT */
if (info->status.rates[0].flags & IEEE80211_TX_RC_MCS) {
- rthdr->it_present |= cpu_to_le32(1 << IEEE80211_RADIOTAP_MCS);
+ rthdr->it_present |= cpu_to_le32(BIT(IEEE80211_RADIOTAP_MCS));
pos[0] = IEEE80211_RADIOTAP_MCS_HAVE_MCS |
IEEE80211_RADIOTAP_MCS_HAVE_GI |
IEEE80211_RADIOTAP_MCS_HAVE_BW;
@@ -512,7 +512,7 @@ ieee80211_add_tx_radiotap_header(struct ieee80211_local *local,
(IEEE80211_RADIOTAP_VHT_KNOWN_GI |
IEEE80211_RADIOTAP_VHT_KNOWN_BANDWIDTH);
- rthdr->it_present |= cpu_to_le32(1 << IEEE80211_RADIOTAP_VHT);
+ rthdr->it_present |= cpu_to_le32(BIT(IEEE80211_RADIOTAP_VHT));
/* required alignment from rthdr */
pos = (u8 *)rthdr + ALIGN(pos - (u8 *)rthdr, 2);
@@ -705,13 +705,26 @@ static void ieee80211_report_used_skb(struct ieee80211_local *local,
/* Check to see if packet is a TDLS teardown packet */
if (ieee80211_is_data(hdr->frame_control) &&
(ieee80211_get_tdls_action(skb, hdr_size) ==
- WLAN_TDLS_TEARDOWN))
+ WLAN_TDLS_TEARDOWN)) {
ieee80211_tdls_td_tx_handle(local, sdata, skb,
info->flags);
- else
+ } else if (ieee80211_s1g_is_twt_setup(skb)) {
+ if (!acked) {
+ struct sk_buff *qskb;
+
+ qskb = skb_clone(skb, GFP_ATOMIC);
+ if (qskb) {
+ skb_queue_tail(&sdata->status_queue,
+ qskb);
+ ieee80211_queue_work(&local->hw,
+ &sdata->work);
+ }
+ }
+ } else {
ieee80211_mgd_conn_tx_status(sdata,
hdr->frame_control,
acked);
+ }
}
rcu_read_unlock();
@@ -970,6 +983,25 @@ static void __ieee80211_tx_status(struct ieee80211_hw *hw,
if (!(info->flags & IEEE80211_TX_CTL_INJECTED) && acked)
ieee80211_frame_acked(sta, skb);
+ } else if (wiphy_ext_feature_isset(local->hw.wiphy,
+ NL80211_EXT_FEATURE_AIRTIME_FAIRNESS)) {
+ struct ieee80211_sub_if_data *sdata;
+ struct ieee80211_txq *txq;
+ u32 airtime;
+
+ /* Account airtime to multicast queue */
+ sdata = ieee80211_sdata_from_skb(local, skb);
+
+ if (sdata && (txq = sdata->vif.txq)) {
+ airtime = info->status.tx_time ?:
+ ieee80211_calc_expected_tx_airtime(hw,
+ &sdata->vif,
+ NULL,
+ skb->len,
+ false);
+
+ ieee80211_register_airtime(txq, airtime, 0);
+ }
}
/* SNMP counters
@@ -1006,12 +1038,11 @@ static void __ieee80211_tx_status(struct ieee80211_hw *hw,
ieee80211_hw_check(&local->hw, REPORTS_TX_ACK_STATUS) &&
!(info->flags & IEEE80211_TX_CTL_INJECTED) &&
local->ps_sdata && !(local->scanning)) {
- if (info->flags & IEEE80211_TX_STAT_ACK) {
+ if (info->flags & IEEE80211_TX_STAT_ACK)
local->ps_sdata->u.mgd.flags |=
IEEE80211_STA_NULLFUNC_ACKED;
- } else
- mod_timer(&local->dynamic_ps_timer, jiffies +
- msecs_to_jiffies(10));
+ mod_timer(&local->dynamic_ps_timer,
+ jiffies + msecs_to_jiffies(10));
}
ieee80211_report_used_skb(local, skb, false);
diff --git a/net/mac80211/tdls.c b/net/mac80211/tdls.c
index f91d02b81b92..45e532ad1215 100644
--- a/net/mac80211/tdls.c
+++ b/net/mac80211/tdls.c
@@ -1920,7 +1920,7 @@ out:
return ret;
}
-static void
+void
ieee80211_process_tdls_channel_switch(struct ieee80211_sub_if_data *sdata,
struct sk_buff *skb)
{
@@ -1971,32 +1971,6 @@ void ieee80211_teardown_tdls_peers(struct ieee80211_sub_if_data *sdata)
rcu_read_unlock();
}
-void ieee80211_tdls_chsw_work(struct work_struct *wk)
-{
- struct ieee80211_local *local =
- container_of(wk, struct ieee80211_local, tdls_chsw_work);
- struct ieee80211_sub_if_data *sdata;
- struct sk_buff *skb;
- struct ieee80211_tdls_data *tf;
-
- wiphy_lock(local->hw.wiphy);
- while ((skb = skb_dequeue(&local->skb_queue_tdls_chsw))) {
- tf = (struct ieee80211_tdls_data *)skb->data;
- list_for_each_entry(sdata, &local->interfaces, list) {
- if (!ieee80211_sdata_running(sdata) ||
- sdata->vif.type != NL80211_IFTYPE_STATION ||
- !ether_addr_equal(tf->da, sdata->vif.addr))
- continue;
-
- ieee80211_process_tdls_channel_switch(sdata, skb);
- break;
- }
-
- kfree_skb(skb);
- }
- wiphy_unlock(local->hw.wiphy);
-}
-
void ieee80211_tdls_handle_disconnect(struct ieee80211_sub_if_data *sdata,
const u8 *peer, u16 reason)
{
diff --git a/net/mac80211/trace.h b/net/mac80211/trace.h
index 8fcc39056402..9e8381bef7ed 100644
--- a/net/mac80211/trace.h
+++ b/net/mac80211/trace.h
@@ -2,7 +2,7 @@
/*
* Portions of this file
* Copyright(c) 2016-2017 Intel Deutschland GmbH
-* Copyright (C) 2018 - 2020 Intel Corporation
+* Copyright (C) 2018 - 2021 Intel Corporation
*/
#if !defined(__MAC80211_DRIVER_TRACE) || defined(TRACE_HEADER_MULTI_READ)
@@ -1461,31 +1461,52 @@ DEFINE_EVENT(release_evt, drv_allow_buffered_frames,
TP_ARGS(local, sta, tids, num_frames, reason, more_data)
);
-TRACE_EVENT(drv_mgd_prepare_tx,
+DECLARE_EVENT_CLASS(mgd_prepare_complete_tx_evt,
TP_PROTO(struct ieee80211_local *local,
struct ieee80211_sub_if_data *sdata,
- u16 duration),
+ u16 duration, u16 subtype, bool success),
- TP_ARGS(local, sdata, duration),
+ TP_ARGS(local, sdata, duration, subtype, success),
TP_STRUCT__entry(
LOCAL_ENTRY
VIF_ENTRY
__field(u32, duration)
+ __field(u16, subtype)
+ __field(u8, success)
),
TP_fast_assign(
LOCAL_ASSIGN;
VIF_ASSIGN;
__entry->duration = duration;
+ __entry->subtype = subtype;
+ __entry->success = success;
),
TP_printk(
- LOCAL_PR_FMT VIF_PR_FMT " duration: %u",
- LOCAL_PR_ARG, VIF_PR_ARG, __entry->duration
+ LOCAL_PR_FMT VIF_PR_FMT " duration: %u, subtype:0x%x, success:%d",
+ LOCAL_PR_ARG, VIF_PR_ARG, __entry->duration,
+ __entry->subtype, __entry->success
)
);
+DEFINE_EVENT(mgd_prepare_complete_tx_evt, drv_mgd_prepare_tx,
+ TP_PROTO(struct ieee80211_local *local,
+ struct ieee80211_sub_if_data *sdata,
+ u16 duration, u16 subtype, bool success),
+
+ TP_ARGS(local, sdata, duration, subtype, success)
+);
+
+DEFINE_EVENT(mgd_prepare_complete_tx_evt, drv_mgd_complete_tx,
+ TP_PROTO(struct ieee80211_local *local,
+ struct ieee80211_sub_if_data *sdata,
+ u16 duration, u16 subtype, bool success),
+
+ TP_ARGS(local, sdata, duration, subtype, success)
+);
+
DEFINE_EVENT(local_sdata_evt, drv_mgd_protect_tdls_discover,
TP_PROTO(struct ieee80211_local *local,
struct ieee80211_sub_if_data *sdata),
@@ -2804,6 +2825,73 @@ DEFINE_EVENT(sta_flag_evt, drv_sta_set_decap_offload,
TP_ARGS(local, sdata, sta, enabled)
);
+TRACE_EVENT(drv_add_twt_setup,
+ TP_PROTO(struct ieee80211_local *local,
+ struct ieee80211_sta *sta,
+ struct ieee80211_twt_setup *twt,
+ struct ieee80211_twt_params *twt_agrt),
+
+ TP_ARGS(local, sta, twt, twt_agrt),
+
+ TP_STRUCT__entry(
+ LOCAL_ENTRY
+ STA_ENTRY
+ __field(u8, dialog_token)
+ __field(u8, control)
+ __field(__le16, req_type)
+ __field(__le64, twt)
+ __field(u8, duration)
+ __field(__le16, mantissa)
+ __field(u8, channel)
+ ),
+
+ TP_fast_assign(
+ LOCAL_ASSIGN;
+ STA_ASSIGN;
+ __entry->dialog_token = twt->dialog_token;
+ __entry->control = twt->control;
+ __entry->req_type = twt_agrt->req_type;
+ __entry->twt = twt_agrt->twt;
+ __entry->duration = twt_agrt->min_twt_dur;
+ __entry->mantissa = twt_agrt->mantissa;
+ __entry->channel = twt_agrt->channel;
+ ),
+
+ TP_printk(
+ LOCAL_PR_FMT STA_PR_FMT
+ " token:%d control:0x%02x req_type:0x%04x"
+ " twt:%llu duration:%d mantissa:%d channel:%d",
+ LOCAL_PR_ARG, STA_PR_ARG, __entry->dialog_token,
+ __entry->control, le16_to_cpu(__entry->req_type),
+ le64_to_cpu(__entry->twt), __entry->duration,
+ le16_to_cpu(__entry->mantissa), __entry->channel
+ )
+);
+
+TRACE_EVENT(drv_twt_teardown_request,
+ TP_PROTO(struct ieee80211_local *local,
+ struct ieee80211_sta *sta, u8 flowid),
+
+ TP_ARGS(local, sta, flowid),
+
+ TP_STRUCT__entry(
+ LOCAL_ENTRY
+ STA_ENTRY
+ __field(u8, flowid)
+ ),
+
+ TP_fast_assign(
+ LOCAL_ASSIGN;
+ STA_ASSIGN;
+ __entry->flowid = flowid;
+ ),
+
+ TP_printk(
+ LOCAL_PR_FMT STA_PR_FMT " flowid:%d",
+ LOCAL_PR_ARG, STA_PR_ARG, __entry->flowid
+ )
+);
+
#endif /* !__MAC80211_DRIVER_TRACE || TRACE_HEADER_MULTI_READ */
#undef TRACE_INCLUDE_PATH
diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c
index 2651498d05e8..2d1193ed3eb5 100644
--- a/net/mac80211/tx.c
+++ b/net/mac80211/tx.c
@@ -18,6 +18,7 @@
#include <linux/bitmap.h>
#include <linux/rcupdate.h>
#include <linux/export.h>
+#include <linux/timekeeping.h>
#include <net/net_namespace.h>
#include <net/ieee80211_radiotap.h>
#include <net/cfg80211.h>
@@ -666,6 +667,7 @@ ieee80211_tx_h_rate_ctrl(struct ieee80211_tx_data *tx)
u32 len;
struct ieee80211_tx_rate_control txrc;
struct ieee80211_sta_rates *ratetbl = NULL;
+ bool encap = info->flags & IEEE80211_TX_CTL_HW_80211_ENCAP;
bool assoc = false;
memset(&txrc, 0, sizeof(txrc));
@@ -707,7 +709,7 @@ ieee80211_tx_h_rate_ctrl(struct ieee80211_tx_data *tx)
* just wants a probe response.
*/
if (tx->sdata->vif.bss_conf.use_short_preamble &&
- (ieee80211_is_data(hdr->frame_control) ||
+ (ieee80211_is_tx_data(tx->skb) ||
(tx->sta && test_sta_flag(tx->sta, WLAN_STA_SHORT_PREAMBLE))))
txrc.short_preamble = true;
@@ -729,7 +731,8 @@ ieee80211_tx_h_rate_ctrl(struct ieee80211_tx_data *tx)
"%s: Dropped data frame as no usable bitrate found while "
"scanning and associated. Target station: "
"%pM on %d GHz band\n",
- tx->sdata->name, hdr->addr1,
+ tx->sdata->name,
+ encap ? ((struct ethhdr *)hdr)->h_dest : hdr->addr1,
info->band ? 5 : 2))
return TX_DROP;
@@ -763,7 +766,7 @@ ieee80211_tx_h_rate_ctrl(struct ieee80211_tx_data *tx)
if (txrc.reported_rate.idx < 0) {
txrc.reported_rate = tx->rate;
- if (tx->sta && ieee80211_is_data(hdr->frame_control))
+ if (tx->sta && ieee80211_is_tx_data(tx->skb))
tx->sta->tx_stats.last_rate = txrc.reported_rate;
} else if (tx->sta)
tx->sta->tx_stats.last_rate = txrc.reported_rate;
@@ -1144,6 +1147,29 @@ static bool ieee80211_tx_prep_agg(struct ieee80211_tx_data *tx,
return queued;
}
+static void
+ieee80211_aggr_check(struct ieee80211_sub_if_data *sdata,
+ struct sta_info *sta,
+ struct sk_buff *skb)
+{
+ struct rate_control_ref *ref = sdata->local->rate_ctrl;
+ u16 tid;
+
+ if (!ref || !(ref->ops->capa & RATE_CTRL_CAPA_AMPDU_TRIGGER))
+ return;
+
+ if (!sta || !sta->sta.ht_cap.ht_supported ||
+ !sta->sta.wme || skb_get_queue_mapping(skb) == IEEE80211_AC_VO ||
+ skb->protocol == sdata->control_port_protocol)
+ return;
+
+ tid = skb->priority & IEEE80211_QOS_CTL_TID_MASK;
+ if (likely(sta->ampdu_mlme.tid_tx[tid]))
+ return;
+
+ ieee80211_start_tx_ba_session(&sta->sta, tid, 0);
+}
+
/*
* initialises @tx
* pass %NULL for the station if unknown, a valid pointer if known
@@ -1157,6 +1183,7 @@ ieee80211_tx_prepare(struct ieee80211_sub_if_data *sdata,
struct ieee80211_local *local = sdata->local;
struct ieee80211_hdr *hdr;
struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb);
+ bool aggr_check = false;
int tid;
memset(tx, 0, sizeof(*tx));
@@ -1185,8 +1212,10 @@ ieee80211_tx_prepare(struct ieee80211_sub_if_data *sdata,
} else if (tx->sdata->control_port_protocol == tx->skb->protocol) {
tx->sta = sta_info_get_bss(sdata, hdr->addr1);
}
- if (!tx->sta && !is_multicast_ether_addr(hdr->addr1))
+ if (!tx->sta && !is_multicast_ether_addr(hdr->addr1)) {
tx->sta = sta_info_get(sdata, hdr->addr1);
+ aggr_check = true;
+ }
}
if (tx->sta && ieee80211_is_data_qos(hdr->frame_control) &&
@@ -1196,8 +1225,12 @@ ieee80211_tx_prepare(struct ieee80211_sub_if_data *sdata,
struct tid_ampdu_tx *tid_tx;
tid = ieee80211_get_tid(hdr);
-
tid_tx = rcu_dereference(tx->sta->ampdu_mlme.tid_tx[tid]);
+ if (!tid_tx && aggr_check) {
+ ieee80211_aggr_check(sdata, tx->sta, skb);
+ tid_tx = rcu_dereference(tx->sta->ampdu_mlme.tid_tx[tid]);
+ }
+
if (tid_tx) {
bool queued;
@@ -1447,7 +1480,7 @@ void ieee80211_txq_init(struct ieee80211_sub_if_data *sdata,
codel_vars_init(&txqi->def_cvars);
codel_stats_init(&txqi->cstats);
__skb_queue_head_init(&txqi->frags);
- INIT_LIST_HEAD(&txqi->schedule_order);
+ RB_CLEAR_NODE(&txqi->schedule_order);
txqi->txq.vif = &sdata->vif;
@@ -1491,9 +1524,7 @@ void ieee80211_txq_purge(struct ieee80211_local *local,
ieee80211_purge_tx_queue(&local->hw, &txqi->frags);
spin_unlock_bh(&fq->lock);
- spin_lock_bh(&local->active_txq_lock[txqi->txq.ac]);
- list_del_init(&txqi->schedule_order);
- spin_unlock_bh(&local->active_txq_lock[txqi->txq.ac]);
+ ieee80211_unschedule_txq(&local->hw, &txqi->txq, true);
}
void ieee80211_txq_set_params(struct ieee80211_local *local)
@@ -1768,8 +1799,6 @@ static int invoke_tx_handlers_early(struct ieee80211_tx_data *tx)
CALL_TXH(ieee80211_tx_h_ps_buf);
CALL_TXH(ieee80211_tx_h_check_control_port_protocol);
CALL_TXH(ieee80211_tx_h_select_key);
- if (!ieee80211_hw_check(&tx->local->hw, HAS_RATE_CONTROL))
- CALL_TXH(ieee80211_tx_h_rate_ctrl);
txh_done:
if (unlikely(res == TX_DROP)) {
@@ -1802,6 +1831,9 @@ static int invoke_tx_handlers_late(struct ieee80211_tx_data *tx)
goto txh_done;
}
+ if (!ieee80211_hw_check(&tx->local->hw, HAS_RATE_CONTROL))
+ CALL_TXH(ieee80211_tx_h_rate_ctrl);
+
CALL_TXH(ieee80211_tx_h_michael_mic_add);
CALL_TXH(ieee80211_tx_h_sequence);
CALL_TXH(ieee80211_tx_h_fragment);
@@ -3210,7 +3242,9 @@ static bool ieee80211_amsdu_prepare_head(struct ieee80211_sub_if_data *sdata,
if (info->control.flags & IEEE80211_TX_CTRL_AMSDU)
return true;
- if (!ieee80211_amsdu_realloc_pad(local, skb, sizeof(*amsdu_hdr)))
+ if (!ieee80211_amsdu_realloc_pad(local, skb,
+ sizeof(*amsdu_hdr) +
+ local->hw.extra_tx_headroom))
return false;
data = skb_push(skb, sizeof(*amsdu_hdr));
@@ -3284,6 +3318,9 @@ static bool ieee80211_amsdu_aggregate(struct ieee80211_sub_if_data *sdata,
if (!ieee80211_hw_check(&local->hw, TX_AMSDU))
return false;
+ if (sdata->vif.offload_flags & IEEE80211_OFFLOAD_ENCAP_ENABLED)
+ return false;
+
if (skb_is_gso(skb))
return false;
@@ -3389,15 +3426,21 @@ out:
* Can be called while the sta lock is held. Anything that can cause packets to
* be generated will cause deadlock!
*/
-static void ieee80211_xmit_fast_finish(struct ieee80211_sub_if_data *sdata,
- struct sta_info *sta, u8 pn_offs,
- struct ieee80211_key *key,
- struct sk_buff *skb)
+static ieee80211_tx_result
+ieee80211_xmit_fast_finish(struct ieee80211_sub_if_data *sdata,
+ struct sta_info *sta, u8 pn_offs,
+ struct ieee80211_key *key,
+ struct ieee80211_tx_data *tx)
{
+ struct sk_buff *skb = tx->skb;
struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb);
struct ieee80211_hdr *hdr = (void *)skb->data;
u8 tid = IEEE80211_NUM_TIDS;
+ if (!ieee80211_hw_check(&tx->local->hw, HAS_RATE_CONTROL) &&
+ ieee80211_tx_h_rate_ctrl(tx) != TX_CONTINUE)
+ return TX_DROP;
+
if (key)
info->control.hw_key = &key->conf;
@@ -3446,6 +3489,8 @@ static void ieee80211_xmit_fast_finish(struct ieee80211_sub_if_data *sdata,
break;
}
}
+
+ return TX_CONTINUE;
}
static bool ieee80211_xmit_fast(struct ieee80211_sub_if_data *sdata,
@@ -3549,24 +3594,17 @@ static bool ieee80211_xmit_fast(struct ieee80211_sub_if_data *sdata,
tx.sta = sta;
tx.key = fast_tx->key;
- if (!ieee80211_hw_check(&local->hw, HAS_RATE_CONTROL)) {
- tx.skb = skb;
- r = ieee80211_tx_h_rate_ctrl(&tx);
- skb = tx.skb;
- tx.skb = NULL;
-
- if (r != TX_CONTINUE) {
- if (r != TX_QUEUED)
- kfree_skb(skb);
- return true;
- }
- }
-
if (ieee80211_queue_skb(local, sdata, sta, skb))
return true;
- ieee80211_xmit_fast_finish(sdata, sta, fast_tx->pn_offs,
- fast_tx->key, skb);
+ tx.skb = skb;
+ r = ieee80211_xmit_fast_finish(sdata, sta, fast_tx->pn_offs,
+ fast_tx->key, &tx);
+ tx.skb = NULL;
+ if (r == TX_DROP) {
+ kfree_skb(skb);
+ return true;
+ }
if (sdata->vif.type == NL80211_IFTYPE_AP_VLAN)
sdata = container_of(sdata->bss,
@@ -3671,8 +3709,16 @@ begin:
else
info->flags &= ~IEEE80211_TX_CTL_AMPDU;
- if (info->flags & IEEE80211_TX_CTL_HW_80211_ENCAP)
+ if (info->flags & IEEE80211_TX_CTL_HW_80211_ENCAP) {
+ if (!ieee80211_hw_check(&local->hw, HAS_RATE_CONTROL)) {
+ r = ieee80211_tx_h_rate_ctrl(&tx);
+ if (r != TX_CONTINUE) {
+ ieee80211_free_txskb(&local->hw, skb);
+ goto begin;
+ }
+ }
goto encap_out;
+ }
if (info->control.flags & IEEE80211_TX_CTRL_FAST_XMIT) {
struct sta_info *sta = container_of(txq->sta, struct sta_info,
@@ -3683,8 +3729,12 @@ begin:
(tx.key->conf.flags & IEEE80211_KEY_FLAG_GENERATE_IV))
pn_offs = ieee80211_hdrlen(hdr->frame_control);
- ieee80211_xmit_fast_finish(sta->sdata, sta, pn_offs,
- tx.key, skb);
+ r = ieee80211_xmit_fast_finish(sta->sdata, sta, pn_offs,
+ tx.key, &tx);
+ if (r != TX_CONTINUE) {
+ ieee80211_free_txskb(&local->hw, skb);
+ goto begin;
+ }
} else {
if (invoke_tx_handlers_late(&tx))
goto begin;
@@ -3764,102 +3814,259 @@ EXPORT_SYMBOL(ieee80211_tx_dequeue);
struct ieee80211_txq *ieee80211_next_txq(struct ieee80211_hw *hw, u8 ac)
{
struct ieee80211_local *local = hw_to_local(hw);
+ struct airtime_sched_info *air_sched;
+ u64 now = ktime_get_boottime_ns();
struct ieee80211_txq *ret = NULL;
- struct txq_info *txqi = NULL, *head = NULL;
- bool found_eligible_txq = false;
+ struct airtime_info *air_info;
+ struct txq_info *txqi = NULL;
+ struct rb_node *node;
+ bool first = false;
- spin_lock_bh(&local->active_txq_lock[ac]);
+ air_sched = &local->airtime[ac];
+ spin_lock_bh(&air_sched->lock);
- begin:
- txqi = list_first_entry_or_null(&local->active_txqs[ac],
- struct txq_info,
- schedule_order);
- if (!txqi)
+ node = air_sched->schedule_pos;
+
+begin:
+ if (!node) {
+ node = rb_first_cached(&air_sched->active_txqs);
+ first = true;
+ } else {
+ node = rb_next(node);
+ }
+
+ if (!node)
goto out;
- if (txqi == head) {
- if (!found_eligible_txq)
- goto out;
- else
- found_eligible_txq = false;
+ txqi = container_of(node, struct txq_info, schedule_order);
+ air_info = to_airtime_info(&txqi->txq);
+
+ if (air_info->v_t > air_sched->v_t &&
+ (!first || !airtime_catchup_v_t(air_sched, air_info->v_t, now)))
+ goto out;
+
+ if (!ieee80211_txq_airtime_check(hw, &txqi->txq)) {
+ first = false;
+ goto begin;
}
- if (!head)
- head = txqi;
+ air_sched->schedule_pos = node;
+ air_sched->last_schedule_activity = now;
+ ret = &txqi->txq;
+out:
+ spin_unlock_bh(&air_sched->lock);
+ return ret;
+}
+EXPORT_SYMBOL(ieee80211_next_txq);
- if (txqi->txq.sta) {
- struct sta_info *sta = container_of(txqi->txq.sta,
- struct sta_info, sta);
- bool aql_check = ieee80211_txq_airtime_check(hw, &txqi->txq);
- s64 deficit = sta->airtime[txqi->txq.ac].deficit;
+static void __ieee80211_insert_txq(struct rb_root_cached *root,
+ struct txq_info *txqi)
+{
+ struct rb_node **new = &root->rb_root.rb_node;
+ struct airtime_info *old_air, *new_air;
+ struct rb_node *parent = NULL;
+ struct txq_info *__txqi;
+ bool leftmost = true;
+
+ while (*new) {
+ parent = *new;
+ __txqi = rb_entry(parent, struct txq_info, schedule_order);
+ old_air = to_airtime_info(&__txqi->txq);
+ new_air = to_airtime_info(&txqi->txq);
+
+ if (new_air->v_t <= old_air->v_t) {
+ new = &parent->rb_left;
+ } else {
+ new = &parent->rb_right;
+ leftmost = false;
+ }
+ }
- if (aql_check)
- found_eligible_txq = true;
+ rb_link_node(&txqi->schedule_order, parent, new);
+ rb_insert_color_cached(&txqi->schedule_order, root, leftmost);
+}
- if (deficit < 0)
- sta->airtime[txqi->txq.ac].deficit +=
- sta->airtime_weight;
+void ieee80211_resort_txq(struct ieee80211_hw *hw,
+ struct ieee80211_txq *txq)
+{
+ struct airtime_info *air_info = to_airtime_info(txq);
+ struct ieee80211_local *local = hw_to_local(hw);
+ struct txq_info *txqi = to_txq_info(txq);
+ struct airtime_sched_info *air_sched;
- if (deficit < 0 || !aql_check) {
- list_move_tail(&txqi->schedule_order,
- &local->active_txqs[txqi->txq.ac]);
- goto begin;
+ air_sched = &local->airtime[txq->ac];
+
+ lockdep_assert_held(&air_sched->lock);
+
+ if (!RB_EMPTY_NODE(&txqi->schedule_order)) {
+ struct airtime_info *a_prev = NULL, *a_next = NULL;
+ struct txq_info *t_prev, *t_next;
+ struct rb_node *n_prev, *n_next;
+
+ /* Erasing a node can cause an expensive rebalancing operation,
+ * so we check the previous and next nodes first and only remove
+ * and re-insert if the current node is not already in the
+ * correct position.
+ */
+ if ((n_prev = rb_prev(&txqi->schedule_order)) != NULL) {
+ t_prev = container_of(n_prev, struct txq_info,
+ schedule_order);
+ a_prev = to_airtime_info(&t_prev->txq);
}
+
+ if ((n_next = rb_next(&txqi->schedule_order)) != NULL) {
+ t_next = container_of(n_next, struct txq_info,
+ schedule_order);
+ a_next = to_airtime_info(&t_next->txq);
+ }
+
+ if ((!a_prev || a_prev->v_t <= air_info->v_t) &&
+ (!a_next || a_next->v_t > air_info->v_t))
+ return;
+
+ if (air_sched->schedule_pos == &txqi->schedule_order)
+ air_sched->schedule_pos = n_prev;
+
+ rb_erase_cached(&txqi->schedule_order,
+ &air_sched->active_txqs);
+ RB_CLEAR_NODE(&txqi->schedule_order);
+ __ieee80211_insert_txq(&air_sched->active_txqs, txqi);
+ }
+}
+
+void ieee80211_update_airtime_weight(struct ieee80211_local *local,
+ struct airtime_sched_info *air_sched,
+ u64 now, bool force)
+{
+ struct airtime_info *air_info, *tmp;
+ u64 weight_sum = 0;
+
+ if (unlikely(!now))
+ now = ktime_get_boottime_ns();
+
+ lockdep_assert_held(&air_sched->lock);
+
+ if (!force && (air_sched->last_weight_update <
+ now - AIRTIME_ACTIVE_DURATION))
+ return;
+
+ list_for_each_entry_safe(air_info, tmp,
+ &air_sched->active_list, list) {
+ if (airtime_is_active(air_info, now))
+ weight_sum += air_info->weight;
+ else
+ list_del_init(&air_info->list);
}
+ airtime_weight_sum_set(air_sched, weight_sum);
+ air_sched->last_weight_update = now;
+}
+
+void ieee80211_schedule_txq(struct ieee80211_hw *hw,
+ struct ieee80211_txq *txq)
+ __acquires(txq_lock) __releases(txq_lock)
+{
+ struct ieee80211_local *local = hw_to_local(hw);
+ struct txq_info *txqi = to_txq_info(txq);
+ struct airtime_sched_info *air_sched;
+ u64 now = ktime_get_boottime_ns();
+ struct airtime_info *air_info;
+ u8 ac = txq->ac;
+ bool was_active;
+
+ air_sched = &local->airtime[ac];
+ air_info = to_airtime_info(txq);
+ spin_lock_bh(&air_sched->lock);
+ was_active = airtime_is_active(air_info, now);
+ airtime_set_active(air_sched, air_info, now);
- if (txqi->schedule_round == local->schedule_round[ac])
+ if (!RB_EMPTY_NODE(&txqi->schedule_order))
goto out;
- list_del_init(&txqi->schedule_order);
- txqi->schedule_round = local->schedule_round[ac];
- ret = &txqi->txq;
+ /* If the station has been inactive for a while, catch up its v_t so it
+ * doesn't get indefinite priority; see comment above the definition of
+ * AIRTIME_MAX_BEHIND.
+ */
+ if ((!was_active && air_info->v_t < air_sched->v_t) ||
+ air_info->v_t < air_sched->v_t - AIRTIME_MAX_BEHIND)
+ air_info->v_t = air_sched->v_t;
+
+ ieee80211_update_airtime_weight(local, air_sched, now, !was_active);
+ __ieee80211_insert_txq(&air_sched->active_txqs, txqi);
out:
- spin_unlock_bh(&local->active_txq_lock[ac]);
- return ret;
+ spin_unlock_bh(&air_sched->lock);
}
-EXPORT_SYMBOL(ieee80211_next_txq);
+EXPORT_SYMBOL(ieee80211_schedule_txq);
-void __ieee80211_schedule_txq(struct ieee80211_hw *hw,
- struct ieee80211_txq *txq,
- bool force)
+static void __ieee80211_unschedule_txq(struct ieee80211_hw *hw,
+ struct ieee80211_txq *txq,
+ bool purge)
{
struct ieee80211_local *local = hw_to_local(hw);
struct txq_info *txqi = to_txq_info(txq);
+ struct airtime_sched_info *air_sched;
+ struct airtime_info *air_info;
- spin_lock_bh(&local->active_txq_lock[txq->ac]);
-
- if (list_empty(&txqi->schedule_order) &&
- (force || !skb_queue_empty(&txqi->frags) ||
- txqi->tin.backlog_packets)) {
- /* If airtime accounting is active, always enqueue STAs at the
- * head of the list to ensure that they only get moved to the
- * back by the airtime DRR scheduler once they have a negative
- * deficit. A station that already has a negative deficit will
- * get immediately moved to the back of the list on the next
- * call to ieee80211_next_txq().
- */
- if (txqi->txq.sta && local->airtime_flags &&
- wiphy_ext_feature_isset(local->hw.wiphy,
- NL80211_EXT_FEATURE_AIRTIME_FAIRNESS))
- list_add(&txqi->schedule_order,
- &local->active_txqs[txq->ac]);
- else
- list_add_tail(&txqi->schedule_order,
- &local->active_txqs[txq->ac]);
+ air_sched = &local->airtime[txq->ac];
+ air_info = to_airtime_info(&txqi->txq);
+
+ lockdep_assert_held(&air_sched->lock);
+
+ if (purge) {
+ list_del_init(&air_info->list);
+ ieee80211_update_airtime_weight(local, air_sched, 0, true);
}
- spin_unlock_bh(&local->active_txq_lock[txq->ac]);
+ if (RB_EMPTY_NODE(&txqi->schedule_order))
+ return;
+
+ if (air_sched->schedule_pos == &txqi->schedule_order)
+ air_sched->schedule_pos = rb_prev(&txqi->schedule_order);
+
+ if (!purge)
+ airtime_set_active(air_sched, air_info,
+ ktime_get_boottime_ns());
+
+ rb_erase_cached(&txqi->schedule_order,
+ &air_sched->active_txqs);
+ RB_CLEAR_NODE(&txqi->schedule_order);
}
-EXPORT_SYMBOL(__ieee80211_schedule_txq);
+
+void ieee80211_unschedule_txq(struct ieee80211_hw *hw,
+ struct ieee80211_txq *txq,
+ bool purge)
+ __acquires(txq_lock) __releases(txq_lock)
+{
+ struct ieee80211_local *local = hw_to_local(hw);
+
+ spin_lock_bh(&local->airtime[txq->ac].lock);
+ __ieee80211_unschedule_txq(hw, txq, purge);
+ spin_unlock_bh(&local->airtime[txq->ac].lock);
+}
+
+void ieee80211_return_txq(struct ieee80211_hw *hw,
+ struct ieee80211_txq *txq, bool force)
+{
+ struct ieee80211_local *local = hw_to_local(hw);
+ struct txq_info *txqi = to_txq_info(txq);
+
+ spin_lock_bh(&local->airtime[txq->ac].lock);
+
+ if (!RB_EMPTY_NODE(&txqi->schedule_order) && !force &&
+ !txq_has_queue(txq))
+ __ieee80211_unschedule_txq(hw, txq, false);
+
+ spin_unlock_bh(&local->airtime[txq->ac].lock);
+}
+EXPORT_SYMBOL(ieee80211_return_txq);
DEFINE_STATIC_KEY_FALSE(aql_disable);
bool ieee80211_txq_airtime_check(struct ieee80211_hw *hw,
struct ieee80211_txq *txq)
{
- struct sta_info *sta;
+ struct airtime_info *air_info = to_airtime_info(txq);
struct ieee80211_local *local = hw_to_local(hw);
if (!wiphy_ext_feature_isset(local->hw.wiphy, NL80211_EXT_FEATURE_AQL))
@@ -3874,15 +4081,12 @@ bool ieee80211_txq_airtime_check(struct ieee80211_hw *hw,
if (unlikely(txq->tid == IEEE80211_NUM_TIDS))
return true;
- sta = container_of(txq->sta, struct sta_info, sta);
- if (atomic_read(&sta->airtime[txq->ac].aql_tx_pending) <
- sta->airtime[txq->ac].aql_limit_low)
+ if (atomic_read(&air_info->aql_tx_pending) < air_info->aql_limit_low)
return true;
if (atomic_read(&local->aql_total_pending_airtime) <
local->aql_threshold &&
- atomic_read(&sta->airtime[txq->ac].aql_tx_pending) <
- sta->airtime[txq->ac].aql_limit_high)
+ atomic_read(&air_info->aql_tx_pending) < air_info->aql_limit_high)
return true;
return false;
@@ -3892,60 +4096,59 @@ EXPORT_SYMBOL(ieee80211_txq_airtime_check);
bool ieee80211_txq_may_transmit(struct ieee80211_hw *hw,
struct ieee80211_txq *txq)
{
+ struct txq_info *first_txqi = NULL, *txqi = to_txq_info(txq);
struct ieee80211_local *local = hw_to_local(hw);
- struct txq_info *iter, *tmp, *txqi = to_txq_info(txq);
- struct sta_info *sta;
- u8 ac = txq->ac;
+ struct airtime_sched_info *air_sched;
+ struct airtime_info *air_info;
+ struct rb_node *node = NULL;
+ bool ret = false;
+ u64 now;
- spin_lock_bh(&local->active_txq_lock[ac]);
- if (!txqi->txq.sta)
- goto out;
+ if (!ieee80211_txq_airtime_check(hw, txq))
+ return false;
- if (list_empty(&txqi->schedule_order))
+ air_sched = &local->airtime[txq->ac];
+ spin_lock_bh(&air_sched->lock);
+
+ if (RB_EMPTY_NODE(&txqi->schedule_order))
goto out;
- list_for_each_entry_safe(iter, tmp, &local->active_txqs[ac],
- schedule_order) {
- if (iter == txqi)
- break;
+ now = ktime_get_boottime_ns();
- if (!iter->txq.sta) {
- list_move_tail(&iter->schedule_order,
- &local->active_txqs[ac]);
- continue;
- }
- sta = container_of(iter->txq.sta, struct sta_info, sta);
- if (sta->airtime[ac].deficit < 0)
- sta->airtime[ac].deficit += sta->airtime_weight;
- list_move_tail(&iter->schedule_order, &local->active_txqs[ac]);
- }
+ /* Like in ieee80211_next_txq(), make sure the first station in the
+ * scheduling order is eligible for transmission to avoid starvation.
+ */
+ node = rb_first_cached(&air_sched->active_txqs);
+ if (node) {
+ first_txqi = container_of(node, struct txq_info,
+ schedule_order);
+ air_info = to_airtime_info(&first_txqi->txq);
- sta = container_of(txqi->txq.sta, struct sta_info, sta);
- if (sta->airtime[ac].deficit >= 0)
- goto out;
+ if (air_sched->v_t < air_info->v_t)
+ airtime_catchup_v_t(air_sched, air_info->v_t, now);
+ }
- sta->airtime[ac].deficit += sta->airtime_weight;
- list_move_tail(&txqi->schedule_order, &local->active_txqs[ac]);
- spin_unlock_bh(&local->active_txq_lock[ac]);
+ air_info = to_airtime_info(&txqi->txq);
+ if (air_info->v_t <= air_sched->v_t) {
+ air_sched->last_schedule_activity = now;
+ ret = true;
+ }
- return false;
out:
- if (!list_empty(&txqi->schedule_order))
- list_del_init(&txqi->schedule_order);
- spin_unlock_bh(&local->active_txq_lock[ac]);
-
- return true;
+ spin_unlock_bh(&air_sched->lock);
+ return ret;
}
EXPORT_SYMBOL(ieee80211_txq_may_transmit);
void ieee80211_txq_schedule_start(struct ieee80211_hw *hw, u8 ac)
{
struct ieee80211_local *local = hw_to_local(hw);
+ struct airtime_sched_info *air_sched = &local->airtime[ac];
- spin_lock_bh(&local->active_txq_lock[ac]);
- local->schedule_round[ac]++;
- spin_unlock_bh(&local->active_txq_lock[ac]);
+ spin_lock_bh(&air_sched->lock);
+ air_sched->schedule_pos = NULL;
+ spin_unlock_bh(&air_sched->lock);
}
EXPORT_SYMBOL(ieee80211_txq_schedule_start);
@@ -3979,6 +4182,8 @@ void __ieee80211_subif_start_xmit(struct sk_buff *skb,
skb_get_hash(skb);
}
+ ieee80211_aggr_check(sdata, sta, skb);
+
if (sta) {
struct ieee80211_fast_tx *fast_tx;
@@ -4242,6 +4447,8 @@ static void ieee80211_8023_xmit(struct ieee80211_sub_if_data *sdata,
memset(info, 0, sizeof(*info));
+ ieee80211_aggr_check(sdata, sta, skb);
+
tid = skb->priority & IEEE80211_QOS_CTL_TAG1D_MASK;
tid_tx = rcu_dereference(sta->ampdu_mlme.tid_tx[tid]);
if (tid_tx) {
@@ -4577,11 +4784,11 @@ static int ieee80211_beacon_add_tim(struct ieee80211_sub_if_data *sdata,
static void ieee80211_set_beacon_cntdwn(struct ieee80211_sub_if_data *sdata,
struct beacon_data *beacon)
{
+ u8 *beacon_data, count, max_count = 1;
struct probe_resp *resp;
- u8 *beacon_data;
size_t beacon_data_len;
+ u16 *bcn_offsets;
int i;
- u8 count = beacon->cntdwn_current_counter;
switch (sdata->vif.type) {
case NL80211_IFTYPE_AP:
@@ -4601,21 +4808,27 @@ static void ieee80211_set_beacon_cntdwn(struct ieee80211_sub_if_data *sdata,
}
rcu_read_lock();
- for (i = 0; i < IEEE80211_MAX_CNTDWN_COUNTERS_NUM; ++i) {
- resp = rcu_dereference(sdata->u.ap.probe_resp);
+ resp = rcu_dereference(sdata->u.ap.probe_resp);
- if (beacon->cntdwn_counter_offsets[i]) {
- if (WARN_ON_ONCE(beacon->cntdwn_counter_offsets[i] >=
- beacon_data_len)) {
+ bcn_offsets = beacon->cntdwn_counter_offsets;
+ count = beacon->cntdwn_current_counter;
+ if (sdata->vif.csa_active)
+ max_count = IEEE80211_MAX_CNTDWN_COUNTERS_NUM;
+
+ for (i = 0; i < max_count; ++i) {
+ if (bcn_offsets[i]) {
+ if (WARN_ON_ONCE(bcn_offsets[i] >= beacon_data_len)) {
rcu_read_unlock();
return;
}
-
- beacon_data[beacon->cntdwn_counter_offsets[i]] = count;
+ beacon_data[bcn_offsets[i]] = count;
}
- if (sdata->vif.type == NL80211_IFTYPE_AP && resp)
- resp->data[resp->cntdwn_counter_offsets[i]] = count;
+ if (sdata->vif.type == NL80211_IFTYPE_AP && resp) {
+ u16 *resp_offsets = resp->cntdwn_counter_offsets;
+
+ resp->data[resp_offsets[i]] = count;
+ }
}
rcu_read_unlock();
}
@@ -4825,6 +5038,7 @@ __ieee80211_beacon_get(struct ieee80211_hw *hw,
if (offs) {
offs->tim_offset = beacon->head_len;
offs->tim_length = skb->len - beacon->head_len;
+ offs->cntdwn_counter_offs[0] = beacon->cntdwn_counter_offsets[0];
/* for AP the csa offsets are from tail */
csa_off_base = skb->len;
diff --git a/net/mac80211/util.c b/net/mac80211/util.c
index 060059ef9668..49cb96d25169 100644
--- a/net/mac80211/util.c
+++ b/net/mac80211/util.c
@@ -6,7 +6,7 @@
* Copyright 2007 Johannes Berg <johannes@sipsolutions.net>
* Copyright 2013-2014 Intel Mobile Communications GmbH
* Copyright (C) 2015-2017 Intel Deutschland GmbH
- * Copyright (C) 2018-2020 Intel Corporation
+ * Copyright (C) 2018-2021 Intel Corporation
*
* utilities for mac80211
*/
@@ -1336,6 +1336,18 @@ _ieee802_11_parse_elems_crc(const u8 *start, size_t len, bool action,
elems->rsnx = pos;
elems->rsnx_len = elen;
break;
+ case WLAN_EID_TX_POWER_ENVELOPE:
+ if (elen < 1 ||
+ elen > sizeof(struct ieee80211_tx_pwr_env))
+ break;
+
+ if (elems->tx_pwr_env_num >= ARRAY_SIZE(elems->tx_pwr_env))
+ break;
+
+ elems->tx_pwr_env[elems->tx_pwr_env_num] = (void *)pos;
+ elems->tx_pwr_env_len[elems->tx_pwr_env_num] = elen;
+ elems->tx_pwr_env_num++;
+ break;
case WLAN_EID_EXTENSION:
ieee80211_parse_extension_element(calc_crc ?
&crc : NULL,
@@ -1693,7 +1705,10 @@ void ieee80211_send_auth(struct ieee80211_sub_if_data *sdata,
if (auth_alg == WLAN_AUTH_SHARED_KEY && transaction == 3) {
mgmt->frame_control |= cpu_to_le16(IEEE80211_FCTL_PROTECTED);
err = ieee80211_wep_encrypt(local, skb, key, key_len, key_idx);
- WARN_ON(err);
+ if (WARN_ON(err)) {
+ kfree_skb(skb);
+ return;
+ }
}
IEEE80211_SKB_CB(skb)->flags |= IEEE80211_TX_INTFL_DONT_ENCRYPT |
@@ -1934,13 +1949,26 @@ static int ieee80211_build_preq_ies_band(struct ieee80211_sub_if_data *sdata,
*offset = noffset;
}
- he_cap = ieee80211_get_he_sta_cap(sband);
- if (he_cap) {
+ he_cap = ieee80211_get_he_iftype_cap(sband,
+ ieee80211_vif_type_p2p(&sdata->vif));
+ if (he_cap &&
+ cfg80211_any_usable_channels(local->hw.wiphy, BIT(sband->band),
+ IEEE80211_CHAN_NO_HE)) {
pos = ieee80211_ie_build_he_cap(pos, he_cap, end);
if (!pos)
goto out_err;
+ }
+
+ if (cfg80211_any_usable_channels(local->hw.wiphy,
+ BIT(NL80211_BAND_6GHZ),
+ IEEE80211_CHAN_NO_HE)) {
+ struct ieee80211_supported_band *sband6;
+
+ sband6 = local->hw.wiphy->bands[NL80211_BAND_6GHZ];
+ he_cap = ieee80211_get_he_iftype_cap(sband6,
+ ieee80211_vif_type_p2p(&sdata->vif));
- if (sband->band == NL80211_BAND_6GHZ) {
+ if (he_cap) {
enum nl80211_iftype iftype =
ieee80211_vif_type_p2p(&sdata->vif);
__le16 cap = ieee80211_get_he_6ghz_capa(sband, iftype);
@@ -2944,12 +2972,15 @@ void ieee80211_ie_build_he_6ghz_cap(struct ieee80211_sub_if_data *sdata,
u8 *pos;
u16 cap;
- sband = ieee80211_get_sband(sdata);
- if (!sband)
+ if (!cfg80211_any_usable_channels(sdata->local->hw.wiphy,
+ BIT(NL80211_BAND_6GHZ),
+ IEEE80211_CHAN_NO_HE))
return;
+ sband = sdata->local->hw.wiphy->bands[NL80211_BAND_6GHZ];
+
iftd = ieee80211_get_sband_iftype_data(sband, iftype);
- if (WARN_ON(!iftd))
+ if (!iftd)
return;
/* Check for device HE 6 GHz capability before adding element */
diff --git a/net/mac802154/iface.c b/net/mac802154/iface.c
index 1cf5ac09edcb..323d3d2d986f 100644
--- a/net/mac802154/iface.c
+++ b/net/mac802154/iface.c
@@ -617,7 +617,7 @@ ieee802154_if_add(struct ieee802154_local *local, const char *name,
{
struct net_device *ndev = NULL;
struct ieee802154_sub_if_data *sdata = NULL;
- int ret = -ENOMEM;
+ int ret;
ASSERT_RTNL();
diff --git a/net/mctp/Kconfig b/net/mctp/Kconfig
new file mode 100644
index 000000000000..2cdf3d0a28c9
--- /dev/null
+++ b/net/mctp/Kconfig
@@ -0,0 +1,13 @@
+
+menuconfig MCTP
+ depends on NET
+ tristate "MCTP core protocol support"
+ help
+ Management Component Transport Protocol (MCTP) is an in-system
+ protocol for communicating between management controllers and
+ their managed devices (peripherals, host processors, etc.). The
+ protocol is defined by DMTF specification DSP0236.
+
+ This option enables core MCTP support. For communicating with other
+ devices, you'll want to enable a driver for a specific hardware
+ channel.
diff --git a/net/mctp/Makefile b/net/mctp/Makefile
new file mode 100644
index 000000000000..0171333384d7
--- /dev/null
+++ b/net/mctp/Makefile
@@ -0,0 +1,3 @@
+# SPDX-License-Identifier: GPL-2.0
+obj-$(CONFIG_MCTP) += mctp.o
+mctp-objs := af_mctp.o device.o route.o neigh.o
diff --git a/net/mctp/af_mctp.c b/net/mctp/af_mctp.c
new file mode 100644
index 000000000000..a9526ac29dff
--- /dev/null
+++ b/net/mctp/af_mctp.c
@@ -0,0 +1,395 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Management Component Transport Protocol (MCTP)
+ *
+ * Copyright (c) 2021 Code Construct
+ * Copyright (c) 2021 Google
+ */
+
+#include <linux/if_arp.h>
+#include <linux/net.h>
+#include <linux/mctp.h>
+#include <linux/module.h>
+#include <linux/socket.h>
+
+#include <net/mctp.h>
+#include <net/mctpdevice.h>
+#include <net/sock.h>
+
+/* socket implementation */
+
+static int mctp_release(struct socket *sock)
+{
+ struct sock *sk = sock->sk;
+
+ if (sk) {
+ sock->sk = NULL;
+ sk->sk_prot->close(sk, 0);
+ }
+
+ return 0;
+}
+
+static int mctp_bind(struct socket *sock, struct sockaddr *addr, int addrlen)
+{
+ struct sock *sk = sock->sk;
+ struct mctp_sock *msk = container_of(sk, struct mctp_sock, sk);
+ struct sockaddr_mctp *smctp;
+ int rc;
+
+ if (addrlen < sizeof(*smctp))
+ return -EINVAL;
+
+ if (addr->sa_family != AF_MCTP)
+ return -EAFNOSUPPORT;
+
+ if (!capable(CAP_NET_BIND_SERVICE))
+ return -EACCES;
+
+ /* it's a valid sockaddr for MCTP, cast and do protocol checks */
+ smctp = (struct sockaddr_mctp *)addr;
+
+ lock_sock(sk);
+
+ /* TODO: allow rebind */
+ if (sk_hashed(sk)) {
+ rc = -EADDRINUSE;
+ goto out_release;
+ }
+ msk->bind_net = smctp->smctp_network;
+ msk->bind_addr = smctp->smctp_addr.s_addr;
+ msk->bind_type = smctp->smctp_type & 0x7f; /* ignore the IC bit */
+
+ rc = sk->sk_prot->hash(sk);
+
+out_release:
+ release_sock(sk);
+
+ return rc;
+}
+
+static int mctp_sendmsg(struct socket *sock, struct msghdr *msg, size_t len)
+{
+ DECLARE_SOCKADDR(struct sockaddr_mctp *, addr, msg->msg_name);
+ const int hlen = MCTP_HEADER_MAXLEN + sizeof(struct mctp_hdr);
+ int rc, addrlen = msg->msg_namelen;
+ struct sock *sk = sock->sk;
+ struct mctp_skb_cb *cb;
+ struct mctp_route *rt;
+ struct sk_buff *skb;
+
+ if (addr) {
+ if (addrlen < sizeof(struct sockaddr_mctp))
+ return -EINVAL;
+ if (addr->smctp_family != AF_MCTP)
+ return -EINVAL;
+ if (addr->smctp_tag & ~(MCTP_TAG_MASK | MCTP_TAG_OWNER))
+ return -EINVAL;
+
+ } else {
+ /* TODO: connect()ed sockets */
+ return -EDESTADDRREQ;
+ }
+
+ if (!capable(CAP_NET_RAW))
+ return -EACCES;
+
+ if (addr->smctp_network == MCTP_NET_ANY)
+ addr->smctp_network = mctp_default_net(sock_net(sk));
+
+ rt = mctp_route_lookup(sock_net(sk), addr->smctp_network,
+ addr->smctp_addr.s_addr);
+ if (!rt)
+ return -EHOSTUNREACH;
+
+ skb = sock_alloc_send_skb(sk, hlen + 1 + len,
+ msg->msg_flags & MSG_DONTWAIT, &rc);
+ if (!skb)
+ return rc;
+
+ skb_reserve(skb, hlen);
+
+ /* set type as fist byte in payload */
+ *(u8 *)skb_put(skb, 1) = addr->smctp_type;
+
+ rc = memcpy_from_msg((void *)skb_put(skb, len), msg, len);
+ if (rc < 0) {
+ kfree_skb(skb);
+ return rc;
+ }
+
+ /* set up cb */
+ cb = __mctp_cb(skb);
+ cb->net = addr->smctp_network;
+
+ rc = mctp_local_output(sk, rt, skb, addr->smctp_addr.s_addr,
+ addr->smctp_tag);
+
+ return rc ? : len;
+}
+
+static int mctp_recvmsg(struct socket *sock, struct msghdr *msg, size_t len,
+ int flags)
+{
+ DECLARE_SOCKADDR(struct sockaddr_mctp *, addr, msg->msg_name);
+ struct sock *sk = sock->sk;
+ struct sk_buff *skb;
+ size_t msglen;
+ u8 type;
+ int rc;
+
+ if (flags & ~(MSG_DONTWAIT | MSG_TRUNC | MSG_PEEK))
+ return -EOPNOTSUPP;
+
+ skb = skb_recv_datagram(sk, flags, flags & MSG_DONTWAIT, &rc);
+ if (!skb)
+ return rc;
+
+ if (!skb->len) {
+ rc = 0;
+ goto out_free;
+ }
+
+ /* extract message type, remove from data */
+ type = *((u8 *)skb->data);
+ msglen = skb->len - 1;
+
+ if (len < msglen)
+ msg->msg_flags |= MSG_TRUNC;
+ else
+ len = msglen;
+
+ rc = skb_copy_datagram_msg(skb, 1, msg, len);
+ if (rc < 0)
+ goto out_free;
+
+ sock_recv_ts_and_drops(msg, sk, skb);
+
+ if (addr) {
+ struct mctp_skb_cb *cb = mctp_cb(skb);
+ /* TODO: expand mctp_skb_cb for header fields? */
+ struct mctp_hdr *hdr = mctp_hdr(skb);
+
+ addr = msg->msg_name;
+ addr->smctp_family = AF_MCTP;
+ addr->smctp_network = cb->net;
+ addr->smctp_addr.s_addr = hdr->src;
+ addr->smctp_type = type;
+ addr->smctp_tag = hdr->flags_seq_tag &
+ (MCTP_HDR_TAG_MASK | MCTP_HDR_FLAG_TO);
+ msg->msg_namelen = sizeof(*addr);
+ }
+
+ rc = len;
+
+ if (flags & MSG_TRUNC)
+ rc = msglen;
+
+out_free:
+ skb_free_datagram(sk, skb);
+ return rc;
+}
+
+static int mctp_setsockopt(struct socket *sock, int level, int optname,
+ sockptr_t optval, unsigned int optlen)
+{
+ return -EINVAL;
+}
+
+static int mctp_getsockopt(struct socket *sock, int level, int optname,
+ char __user *optval, int __user *optlen)
+{
+ return -EINVAL;
+}
+
+static const struct proto_ops mctp_dgram_ops = {
+ .family = PF_MCTP,
+ .release = mctp_release,
+ .bind = mctp_bind,
+ .connect = sock_no_connect,
+ .socketpair = sock_no_socketpair,
+ .accept = sock_no_accept,
+ .getname = sock_no_getname,
+ .poll = datagram_poll,
+ .ioctl = sock_no_ioctl,
+ .gettstamp = sock_gettstamp,
+ .listen = sock_no_listen,
+ .shutdown = sock_no_shutdown,
+ .setsockopt = mctp_setsockopt,
+ .getsockopt = mctp_getsockopt,
+ .sendmsg = mctp_sendmsg,
+ .recvmsg = mctp_recvmsg,
+ .mmap = sock_no_mmap,
+ .sendpage = sock_no_sendpage,
+};
+
+static int mctp_sk_init(struct sock *sk)
+{
+ struct mctp_sock *msk = container_of(sk, struct mctp_sock, sk);
+
+ INIT_HLIST_HEAD(&msk->keys);
+ return 0;
+}
+
+static void mctp_sk_close(struct sock *sk, long timeout)
+{
+ sk_common_release(sk);
+}
+
+static int mctp_sk_hash(struct sock *sk)
+{
+ struct net *net = sock_net(sk);
+
+ mutex_lock(&net->mctp.bind_lock);
+ sk_add_node_rcu(sk, &net->mctp.binds);
+ mutex_unlock(&net->mctp.bind_lock);
+
+ return 0;
+}
+
+static void mctp_sk_unhash(struct sock *sk)
+{
+ struct mctp_sock *msk = container_of(sk, struct mctp_sock, sk);
+ struct net *net = sock_net(sk);
+ struct mctp_sk_key *key;
+ struct hlist_node *tmp;
+ unsigned long flags;
+
+ /* remove from any type-based binds */
+ mutex_lock(&net->mctp.bind_lock);
+ sk_del_node_init_rcu(sk);
+ mutex_unlock(&net->mctp.bind_lock);
+
+ /* remove tag allocations */
+ spin_lock_irqsave(&net->mctp.keys_lock, flags);
+ hlist_for_each_entry_safe(key, tmp, &msk->keys, sklist) {
+ hlist_del_rcu(&key->sklist);
+ hlist_del_rcu(&key->hlist);
+
+ spin_lock(&key->reasm_lock);
+ if (key->reasm_head)
+ kfree_skb(key->reasm_head);
+ key->reasm_head = NULL;
+ key->reasm_dead = true;
+ spin_unlock(&key->reasm_lock);
+
+ kfree_rcu(key, rcu);
+ }
+ spin_unlock_irqrestore(&net->mctp.keys_lock, flags);
+
+ synchronize_rcu();
+}
+
+static struct proto mctp_proto = {
+ .name = "MCTP",
+ .owner = THIS_MODULE,
+ .obj_size = sizeof(struct mctp_sock),
+ .init = mctp_sk_init,
+ .close = mctp_sk_close,
+ .hash = mctp_sk_hash,
+ .unhash = mctp_sk_unhash,
+};
+
+static int mctp_pf_create(struct net *net, struct socket *sock,
+ int protocol, int kern)
+{
+ const struct proto_ops *ops;
+ struct proto *proto;
+ struct sock *sk;
+ int rc;
+
+ if (protocol)
+ return -EPROTONOSUPPORT;
+
+ /* only datagram sockets are supported */
+ if (sock->type != SOCK_DGRAM)
+ return -ESOCKTNOSUPPORT;
+
+ proto = &mctp_proto;
+ ops = &mctp_dgram_ops;
+
+ sock->state = SS_UNCONNECTED;
+ sock->ops = ops;
+
+ sk = sk_alloc(net, PF_MCTP, GFP_KERNEL, proto, kern);
+ if (!sk)
+ return -ENOMEM;
+
+ sock_init_data(sock, sk);
+
+ rc = 0;
+ if (sk->sk_prot->init)
+ rc = sk->sk_prot->init(sk);
+
+ if (rc)
+ goto err_sk_put;
+
+ return 0;
+
+err_sk_put:
+ sock_orphan(sk);
+ sock_put(sk);
+ return rc;
+}
+
+static struct net_proto_family mctp_pf = {
+ .family = PF_MCTP,
+ .create = mctp_pf_create,
+ .owner = THIS_MODULE,
+};
+
+static __init int mctp_init(void)
+{
+ int rc;
+
+ /* ensure our uapi tag definitions match the header format */
+ BUILD_BUG_ON(MCTP_TAG_OWNER != MCTP_HDR_FLAG_TO);
+ BUILD_BUG_ON(MCTP_TAG_MASK != MCTP_HDR_TAG_MASK);
+
+ pr_info("mctp: management component transport protocol core\n");
+
+ rc = sock_register(&mctp_pf);
+ if (rc)
+ return rc;
+
+ rc = proto_register(&mctp_proto, 0);
+ if (rc)
+ goto err_unreg_sock;
+
+ rc = mctp_routes_init();
+ if (rc)
+ goto err_unreg_proto;
+
+ rc = mctp_neigh_init();
+ if (rc)
+ goto err_unreg_proto;
+
+ mctp_device_init();
+
+ return 0;
+
+err_unreg_proto:
+ proto_unregister(&mctp_proto);
+err_unreg_sock:
+ sock_unregister(PF_MCTP);
+
+ return rc;
+}
+
+static __exit void mctp_exit(void)
+{
+ mctp_device_exit();
+ mctp_neigh_exit();
+ mctp_routes_exit();
+ proto_unregister(&mctp_proto);
+ sock_unregister(PF_MCTP);
+}
+
+module_init(mctp_init);
+module_exit(mctp_exit);
+
+MODULE_DESCRIPTION("MCTP core");
+MODULE_LICENSE("GPL v2");
+MODULE_AUTHOR("Jeremy Kerr <jk@codeconstruct.com.au>");
+
+MODULE_ALIAS_NETPROTO(PF_MCTP);
diff --git a/net/mctp/device.c b/net/mctp/device.c
new file mode 100644
index 000000000000..b9f38e765f61
--- /dev/null
+++ b/net/mctp/device.c
@@ -0,0 +1,423 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Management Component Transport Protocol (MCTP) - device implementation.
+ *
+ * Copyright (c) 2021 Code Construct
+ * Copyright (c) 2021 Google
+ */
+
+#include <linux/if_link.h>
+#include <linux/mctp.h>
+#include <linux/netdevice.h>
+#include <linux/rcupdate.h>
+#include <linux/rtnetlink.h>
+
+#include <net/addrconf.h>
+#include <net/netlink.h>
+#include <net/mctp.h>
+#include <net/mctpdevice.h>
+#include <net/sock.h>
+
+struct mctp_dump_cb {
+ int h;
+ int idx;
+ size_t a_idx;
+};
+
+/* unlocked: caller must hold rcu_read_lock */
+struct mctp_dev *__mctp_dev_get(const struct net_device *dev)
+{
+ return rcu_dereference(dev->mctp_ptr);
+}
+
+struct mctp_dev *mctp_dev_get_rtnl(const struct net_device *dev)
+{
+ return rtnl_dereference(dev->mctp_ptr);
+}
+
+static void mctp_dev_destroy(struct mctp_dev *mdev)
+{
+ struct net_device *dev = mdev->dev;
+
+ dev_put(dev);
+ kfree_rcu(mdev, rcu);
+}
+
+static int mctp_fill_addrinfo(struct sk_buff *skb, struct netlink_callback *cb,
+ struct mctp_dev *mdev, mctp_eid_t eid)
+{
+ struct ifaddrmsg *hdr;
+ struct nlmsghdr *nlh;
+
+ nlh = nlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq,
+ RTM_NEWADDR, sizeof(*hdr), NLM_F_MULTI);
+ if (!nlh)
+ return -EMSGSIZE;
+
+ hdr = nlmsg_data(nlh);
+ hdr->ifa_family = AF_MCTP;
+ hdr->ifa_prefixlen = 0;
+ hdr->ifa_flags = 0;
+ hdr->ifa_scope = 0;
+ hdr->ifa_index = mdev->dev->ifindex;
+
+ if (nla_put_u8(skb, IFA_LOCAL, eid))
+ goto cancel;
+
+ if (nla_put_u8(skb, IFA_ADDRESS, eid))
+ goto cancel;
+
+ nlmsg_end(skb, nlh);
+
+ return 0;
+
+cancel:
+ nlmsg_cancel(skb, nlh);
+ return -EMSGSIZE;
+}
+
+static int mctp_dump_dev_addrinfo(struct mctp_dev *mdev, struct sk_buff *skb,
+ struct netlink_callback *cb)
+{
+ struct mctp_dump_cb *mcb = (void *)cb->ctx;
+ int rc = 0;
+
+ for (; mcb->a_idx < mdev->num_addrs; mcb->a_idx++) {
+ rc = mctp_fill_addrinfo(skb, cb, mdev, mdev->addrs[mcb->a_idx]);
+ if (rc < 0)
+ break;
+ }
+
+ return rc;
+}
+
+static int mctp_dump_addrinfo(struct sk_buff *skb, struct netlink_callback *cb)
+{
+ struct mctp_dump_cb *mcb = (void *)cb->ctx;
+ struct net *net = sock_net(skb->sk);
+ struct hlist_head *head;
+ struct net_device *dev;
+ struct ifaddrmsg *hdr;
+ struct mctp_dev *mdev;
+ int ifindex;
+ int idx, rc;
+
+ hdr = nlmsg_data(cb->nlh);
+ // filter by ifindex if requested
+ ifindex = hdr->ifa_index;
+
+ rcu_read_lock();
+ for (; mcb->h < NETDEV_HASHENTRIES; mcb->h++, mcb->idx = 0) {
+ idx = 0;
+ head = &net->dev_index_head[mcb->h];
+ hlist_for_each_entry_rcu(dev, head, index_hlist) {
+ if (idx >= mcb->idx &&
+ (ifindex == 0 || ifindex == dev->ifindex)) {
+ mdev = __mctp_dev_get(dev);
+ if (mdev) {
+ rc = mctp_dump_dev_addrinfo(mdev,
+ skb, cb);
+ // Error indicates full buffer, this
+ // callback will get retried.
+ if (rc < 0)
+ goto out;
+ }
+ }
+ idx++;
+ // reset for next iteration
+ mcb->a_idx = 0;
+ }
+ }
+out:
+ rcu_read_unlock();
+ mcb->idx = idx;
+
+ return skb->len;
+}
+
+static const struct nla_policy ifa_mctp_policy[IFA_MAX + 1] = {
+ [IFA_ADDRESS] = { .type = NLA_U8 },
+ [IFA_LOCAL] = { .type = NLA_U8 },
+};
+
+static int mctp_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh,
+ struct netlink_ext_ack *extack)
+{
+ struct net *net = sock_net(skb->sk);
+ struct nlattr *tb[IFA_MAX + 1];
+ struct net_device *dev;
+ struct mctp_addr *addr;
+ struct mctp_dev *mdev;
+ struct ifaddrmsg *ifm;
+ unsigned long flags;
+ u8 *tmp_addrs;
+ int rc;
+
+ rc = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_mctp_policy,
+ extack);
+ if (rc < 0)
+ return rc;
+
+ ifm = nlmsg_data(nlh);
+
+ if (tb[IFA_LOCAL])
+ addr = nla_data(tb[IFA_LOCAL]);
+ else if (tb[IFA_ADDRESS])
+ addr = nla_data(tb[IFA_ADDRESS]);
+ else
+ return -EINVAL;
+
+ /* find device */
+ dev = __dev_get_by_index(net, ifm->ifa_index);
+ if (!dev)
+ return -ENODEV;
+
+ mdev = mctp_dev_get_rtnl(dev);
+ if (!mdev)
+ return -ENODEV;
+
+ if (!mctp_address_ok(addr->s_addr))
+ return -EINVAL;
+
+ /* Prevent duplicates. Under RTNL so don't need to lock for reading */
+ if (memchr(mdev->addrs, addr->s_addr, mdev->num_addrs))
+ return -EEXIST;
+
+ tmp_addrs = kmalloc(mdev->num_addrs + 1, GFP_KERNEL);
+ if (!tmp_addrs)
+ return -ENOMEM;
+ memcpy(tmp_addrs, mdev->addrs, mdev->num_addrs);
+ tmp_addrs[mdev->num_addrs] = addr->s_addr;
+
+ /* Lock to write */
+ spin_lock_irqsave(&mdev->addrs_lock, flags);
+ mdev->num_addrs++;
+ swap(mdev->addrs, tmp_addrs);
+ spin_unlock_irqrestore(&mdev->addrs_lock, flags);
+
+ kfree(tmp_addrs);
+
+ mctp_route_add_local(mdev, addr->s_addr);
+
+ return 0;
+}
+
+static int mctp_rtm_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh,
+ struct netlink_ext_ack *extack)
+{
+ struct net *net = sock_net(skb->sk);
+ struct nlattr *tb[IFA_MAX + 1];
+ struct net_device *dev;
+ struct mctp_addr *addr;
+ struct mctp_dev *mdev;
+ struct ifaddrmsg *ifm;
+ unsigned long flags;
+ u8 *pos;
+ int rc;
+
+ rc = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_mctp_policy,
+ extack);
+ if (rc < 0)
+ return rc;
+
+ ifm = nlmsg_data(nlh);
+
+ if (tb[IFA_LOCAL])
+ addr = nla_data(tb[IFA_LOCAL]);
+ else if (tb[IFA_ADDRESS])
+ addr = nla_data(tb[IFA_ADDRESS]);
+ else
+ return -EINVAL;
+
+ /* find device */
+ dev = __dev_get_by_index(net, ifm->ifa_index);
+ if (!dev)
+ return -ENODEV;
+
+ mdev = mctp_dev_get_rtnl(dev);
+ if (!mdev)
+ return -ENODEV;
+
+ pos = memchr(mdev->addrs, addr->s_addr, mdev->num_addrs);
+ if (!pos)
+ return -ENOENT;
+
+ rc = mctp_route_remove_local(mdev, addr->s_addr);
+ // we can ignore -ENOENT in the case a route was already removed
+ if (rc < 0 && rc != -ENOENT)
+ return rc;
+
+ spin_lock_irqsave(&mdev->addrs_lock, flags);
+ memmove(pos, pos + 1, mdev->num_addrs - 1 - (pos - mdev->addrs));
+ mdev->num_addrs--;
+ spin_unlock_irqrestore(&mdev->addrs_lock, flags);
+
+ return 0;
+}
+
+static struct mctp_dev *mctp_add_dev(struct net_device *dev)
+{
+ struct mctp_dev *mdev;
+
+ ASSERT_RTNL();
+
+ mdev = kzalloc(sizeof(*mdev), GFP_KERNEL);
+ if (!mdev)
+ return ERR_PTR(-ENOMEM);
+
+ spin_lock_init(&mdev->addrs_lock);
+
+ mdev->net = mctp_default_net(dev_net(dev));
+
+ /* associate to net_device */
+ rcu_assign_pointer(dev->mctp_ptr, mdev);
+ dev_hold(dev);
+ mdev->dev = dev;
+
+ return mdev;
+}
+
+static int mctp_fill_link_af(struct sk_buff *skb,
+ const struct net_device *dev, u32 ext_filter_mask)
+{
+ struct mctp_dev *mdev;
+
+ mdev = mctp_dev_get_rtnl(dev);
+ if (!mdev)
+ return -ENODATA;
+ if (nla_put_u32(skb, IFLA_MCTP_NET, mdev->net))
+ return -EMSGSIZE;
+ return 0;
+}
+
+static size_t mctp_get_link_af_size(const struct net_device *dev,
+ u32 ext_filter_mask)
+{
+ struct mctp_dev *mdev;
+ unsigned int ret;
+
+ /* caller holds RCU */
+ mdev = __mctp_dev_get(dev);
+ if (!mdev)
+ return 0;
+ ret = nla_total_size(4); /* IFLA_MCTP_NET */
+ return ret;
+}
+
+static const struct nla_policy ifla_af_mctp_policy[IFLA_MCTP_MAX + 1] = {
+ [IFLA_MCTP_NET] = { .type = NLA_U32 },
+};
+
+static int mctp_set_link_af(struct net_device *dev, const struct nlattr *attr,
+ struct netlink_ext_ack *extack)
+{
+ struct nlattr *tb[IFLA_MCTP_MAX + 1];
+ struct mctp_dev *mdev;
+ int rc;
+
+ rc = nla_parse_nested(tb, IFLA_MCTP_MAX, attr, ifla_af_mctp_policy,
+ NULL);
+ if (rc)
+ return rc;
+
+ mdev = mctp_dev_get_rtnl(dev);
+ if (!mdev)
+ return 0;
+
+ if (tb[IFLA_MCTP_NET])
+ WRITE_ONCE(mdev->net, nla_get_u32(tb[IFLA_MCTP_NET]));
+
+ return 0;
+}
+
+static void mctp_unregister(struct net_device *dev)
+{
+ struct mctp_dev *mdev;
+
+ mdev = mctp_dev_get_rtnl(dev);
+
+ if (!mdev)
+ return;
+
+ RCU_INIT_POINTER(mdev->dev->mctp_ptr, NULL);
+
+ mctp_route_remove_dev(mdev);
+ mctp_neigh_remove_dev(mdev);
+ kfree(mdev->addrs);
+
+ mctp_dev_destroy(mdev);
+}
+
+static int mctp_register(struct net_device *dev)
+{
+ struct mctp_dev *mdev;
+
+ /* Already registered? */
+ if (rtnl_dereference(dev->mctp_ptr))
+ return 0;
+
+ /* only register specific types; MCTP-specific and loopback for now */
+ if (dev->type != ARPHRD_MCTP && dev->type != ARPHRD_LOOPBACK)
+ return 0;
+
+ mdev = mctp_add_dev(dev);
+ if (IS_ERR(mdev))
+ return PTR_ERR(mdev);
+
+ return 0;
+}
+
+static int mctp_dev_notify(struct notifier_block *this, unsigned long event,
+ void *ptr)
+{
+ struct net_device *dev = netdev_notifier_info_to_dev(ptr);
+ int rc;
+
+ switch (event) {
+ case NETDEV_REGISTER:
+ rc = mctp_register(dev);
+ if (rc)
+ return notifier_from_errno(rc);
+ break;
+ case NETDEV_UNREGISTER:
+ mctp_unregister(dev);
+ break;
+ }
+
+ return NOTIFY_OK;
+}
+
+static struct rtnl_af_ops mctp_af_ops = {
+ .family = AF_MCTP,
+ .fill_link_af = mctp_fill_link_af,
+ .get_link_af_size = mctp_get_link_af_size,
+ .set_link_af = mctp_set_link_af,
+};
+
+static struct notifier_block mctp_dev_nb = {
+ .notifier_call = mctp_dev_notify,
+ .priority = ADDRCONF_NOTIFY_PRIORITY,
+};
+
+void __init mctp_device_init(void)
+{
+ register_netdevice_notifier(&mctp_dev_nb);
+
+ rtnl_register_module(THIS_MODULE, PF_MCTP, RTM_GETADDR,
+ NULL, mctp_dump_addrinfo, 0);
+ rtnl_register_module(THIS_MODULE, PF_MCTP, RTM_NEWADDR,
+ mctp_rtm_newaddr, NULL, 0);
+ rtnl_register_module(THIS_MODULE, PF_MCTP, RTM_DELADDR,
+ mctp_rtm_deladdr, NULL, 0);
+ rtnl_af_register(&mctp_af_ops);
+}
+
+void __exit mctp_device_exit(void)
+{
+ rtnl_af_unregister(&mctp_af_ops);
+ rtnl_unregister(PF_MCTP, RTM_DELADDR);
+ rtnl_unregister(PF_MCTP, RTM_NEWADDR);
+ rtnl_unregister(PF_MCTP, RTM_GETADDR);
+
+ unregister_netdevice_notifier(&mctp_dev_nb);
+}
diff --git a/net/mctp/neigh.c b/net/mctp/neigh.c
new file mode 100644
index 000000000000..90ed2f02d1fb
--- /dev/null
+++ b/net/mctp/neigh.c
@@ -0,0 +1,342 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Management Component Transport Protocol (MCTP) - routing
+ * implementation.
+ *
+ * This is currently based on a simple routing table, with no dst cache. The
+ * number of routes should stay fairly small, so the lookup cost is small.
+ *
+ * Copyright (c) 2021 Code Construct
+ * Copyright (c) 2021 Google
+ */
+
+#include <linux/idr.h>
+#include <linux/mctp.h>
+#include <linux/netdevice.h>
+#include <linux/rtnetlink.h>
+#include <linux/skbuff.h>
+
+#include <net/mctp.h>
+#include <net/mctpdevice.h>
+#include <net/netlink.h>
+#include <net/sock.h>
+
+static int mctp_neigh_add(struct mctp_dev *mdev, mctp_eid_t eid,
+ enum mctp_neigh_source source,
+ size_t lladdr_len, const void *lladdr)
+{
+ struct net *net = dev_net(mdev->dev);
+ struct mctp_neigh *neigh;
+ int rc;
+
+ mutex_lock(&net->mctp.neigh_lock);
+ if (mctp_neigh_lookup(mdev, eid, NULL) == 0) {
+ rc = -EEXIST;
+ goto out;
+ }
+
+ if (lladdr_len > sizeof(neigh->ha)) {
+ rc = -EINVAL;
+ goto out;
+ }
+
+ neigh = kzalloc(sizeof(*neigh), GFP_KERNEL);
+ if (!neigh) {
+ rc = -ENOMEM;
+ goto out;
+ }
+ INIT_LIST_HEAD(&neigh->list);
+ neigh->dev = mdev;
+ dev_hold(neigh->dev->dev);
+ neigh->eid = eid;
+ neigh->source = source;
+ memcpy(neigh->ha, lladdr, lladdr_len);
+
+ list_add_rcu(&neigh->list, &net->mctp.neighbours);
+ rc = 0;
+out:
+ mutex_unlock(&net->mctp.neigh_lock);
+ return rc;
+}
+
+static void __mctp_neigh_free(struct rcu_head *rcu)
+{
+ struct mctp_neigh *neigh = container_of(rcu, struct mctp_neigh, rcu);
+
+ dev_put(neigh->dev->dev);
+ kfree(neigh);
+}
+
+/* Removes all neighbour entries referring to a device */
+void mctp_neigh_remove_dev(struct mctp_dev *mdev)
+{
+ struct net *net = dev_net(mdev->dev);
+ struct mctp_neigh *neigh, *tmp;
+
+ mutex_lock(&net->mctp.neigh_lock);
+ list_for_each_entry_safe(neigh, tmp, &net->mctp.neighbours, list) {
+ if (neigh->dev == mdev) {
+ list_del_rcu(&neigh->list);
+ /* TODO: immediate RTM_DELNEIGH */
+ call_rcu(&neigh->rcu, __mctp_neigh_free);
+ }
+ }
+
+ mutex_unlock(&net->mctp.neigh_lock);
+}
+
+// TODO: add a "source" flag so netlink can only delete static neighbours?
+static int mctp_neigh_remove(struct mctp_dev *mdev, mctp_eid_t eid)
+{
+ struct net *net = dev_net(mdev->dev);
+ struct mctp_neigh *neigh, *tmp;
+ bool dropped = false;
+
+ mutex_lock(&net->mctp.neigh_lock);
+ list_for_each_entry_safe(neigh, tmp, &net->mctp.neighbours, list) {
+ if (neigh->dev == mdev && neigh->eid == eid) {
+ list_del_rcu(&neigh->list);
+ /* TODO: immediate RTM_DELNEIGH */
+ call_rcu(&neigh->rcu, __mctp_neigh_free);
+ dropped = true;
+ }
+ }
+
+ mutex_unlock(&net->mctp.neigh_lock);
+ return dropped ? 0 : -ENOENT;
+}
+
+static const struct nla_policy nd_mctp_policy[NDA_MAX + 1] = {
+ [NDA_DST] = { .type = NLA_U8 },
+ [NDA_LLADDR] = { .type = NLA_BINARY, .len = MAX_ADDR_LEN },
+};
+
+static int mctp_rtm_newneigh(struct sk_buff *skb, struct nlmsghdr *nlh,
+ struct netlink_ext_ack *extack)
+{
+ struct net *net = sock_net(skb->sk);
+ struct net_device *dev;
+ struct mctp_dev *mdev;
+ struct ndmsg *ndm;
+ struct nlattr *tb[NDA_MAX + 1];
+ int rc;
+ mctp_eid_t eid;
+ void *lladdr;
+ int lladdr_len;
+
+ rc = nlmsg_parse(nlh, sizeof(*ndm), tb, NDA_MAX, nd_mctp_policy,
+ extack);
+ if (rc < 0) {
+ NL_SET_ERR_MSG(extack, "lladdr too large?");
+ return rc;
+ }
+
+ if (!tb[NDA_DST]) {
+ NL_SET_ERR_MSG(extack, "Neighbour EID must be specified");
+ return -EINVAL;
+ }
+
+ if (!tb[NDA_LLADDR]) {
+ NL_SET_ERR_MSG(extack, "Neighbour lladdr must be specified");
+ return -EINVAL;
+ }
+
+ eid = nla_get_u8(tb[NDA_DST]);
+ if (!mctp_address_ok(eid)) {
+ NL_SET_ERR_MSG(extack, "Invalid neighbour EID");
+ return -EINVAL;
+ }
+
+ lladdr = nla_data(tb[NDA_LLADDR]);
+ lladdr_len = nla_len(tb[NDA_LLADDR]);
+
+ ndm = nlmsg_data(nlh);
+
+ dev = __dev_get_by_index(net, ndm->ndm_ifindex);
+ if (!dev)
+ return -ENODEV;
+
+ mdev = mctp_dev_get_rtnl(dev);
+ if (!mdev)
+ return -ENODEV;
+
+ if (lladdr_len != dev->addr_len) {
+ NL_SET_ERR_MSG(extack, "Wrong lladdr length");
+ return -EINVAL;
+ }
+
+ return mctp_neigh_add(mdev, eid, MCTP_NEIGH_STATIC,
+ lladdr_len, lladdr);
+}
+
+static int mctp_rtm_delneigh(struct sk_buff *skb, struct nlmsghdr *nlh,
+ struct netlink_ext_ack *extack)
+{
+ struct net *net = sock_net(skb->sk);
+ struct nlattr *tb[NDA_MAX + 1];
+ struct net_device *dev;
+ struct mctp_dev *mdev;
+ struct ndmsg *ndm;
+ int rc;
+ mctp_eid_t eid;
+
+ rc = nlmsg_parse(nlh, sizeof(*ndm), tb, NDA_MAX, nd_mctp_policy,
+ extack);
+ if (rc < 0) {
+ NL_SET_ERR_MSG(extack, "incorrect format");
+ return rc;
+ }
+
+ if (!tb[NDA_DST]) {
+ NL_SET_ERR_MSG(extack, "Neighbour EID must be specified");
+ return -EINVAL;
+ }
+ eid = nla_get_u8(tb[NDA_DST]);
+
+ ndm = nlmsg_data(nlh);
+ dev = __dev_get_by_index(net, ndm->ndm_ifindex);
+ if (!dev)
+ return -ENODEV;
+
+ mdev = mctp_dev_get_rtnl(dev);
+ if (!mdev)
+ return -ENODEV;
+
+ return mctp_neigh_remove(mdev, eid);
+}
+
+static int mctp_fill_neigh(struct sk_buff *skb, u32 portid, u32 seq, int event,
+ unsigned int flags, struct mctp_neigh *neigh)
+{
+ struct net_device *dev = neigh->dev->dev;
+ struct nlmsghdr *nlh;
+ struct ndmsg *hdr;
+
+ nlh = nlmsg_put(skb, portid, seq, event, sizeof(*hdr), flags);
+ if (!nlh)
+ return -EMSGSIZE;
+
+ hdr = nlmsg_data(nlh);
+ hdr->ndm_family = AF_MCTP;
+ hdr->ndm_ifindex = dev->ifindex;
+ hdr->ndm_state = 0; // TODO other state bits?
+ if (neigh->source == MCTP_NEIGH_STATIC)
+ hdr->ndm_state |= NUD_PERMANENT;
+ hdr->ndm_flags = 0;
+ hdr->ndm_type = RTN_UNICAST; // TODO: is loopback RTN_LOCAL?
+
+ if (nla_put_u8(skb, NDA_DST, neigh->eid))
+ goto cancel;
+
+ if (nla_put(skb, NDA_LLADDR, dev->addr_len, neigh->ha))
+ goto cancel;
+
+ nlmsg_end(skb, nlh);
+
+ return 0;
+cancel:
+ nlmsg_cancel(skb, nlh);
+ return -EMSGSIZE;
+}
+
+static int mctp_rtm_getneigh(struct sk_buff *skb, struct netlink_callback *cb)
+{
+ struct net *net = sock_net(skb->sk);
+ int rc, idx, req_ifindex;
+ struct mctp_neigh *neigh;
+ struct ndmsg *ndmsg;
+ struct {
+ int idx;
+ } *cbctx = (void *)cb->ctx;
+
+ ndmsg = nlmsg_data(cb->nlh);
+ req_ifindex = ndmsg->ndm_ifindex;
+
+ idx = 0;
+ rcu_read_lock();
+ list_for_each_entry_rcu(neigh, &net->mctp.neighbours, list) {
+ if (idx < cbctx->idx)
+ goto cont;
+
+ rc = 0;
+ if (req_ifindex == 0 || req_ifindex == neigh->dev->dev->ifindex)
+ rc = mctp_fill_neigh(skb, NETLINK_CB(cb->skb).portid,
+ cb->nlh->nlmsg_seq,
+ RTM_NEWNEIGH, NLM_F_MULTI, neigh);
+
+ if (rc)
+ break;
+cont:
+ idx++;
+ }
+ rcu_read_unlock();
+
+ cbctx->idx = idx;
+ return skb->len;
+}
+
+int mctp_neigh_lookup(struct mctp_dev *mdev, mctp_eid_t eid, void *ret_hwaddr)
+{
+ struct net *net = dev_net(mdev->dev);
+ struct mctp_neigh *neigh;
+ int rc = -EHOSTUNREACH; // TODO: or ENOENT?
+
+ rcu_read_lock();
+ list_for_each_entry_rcu(neigh, &net->mctp.neighbours, list) {
+ if (mdev == neigh->dev && eid == neigh->eid) {
+ if (ret_hwaddr)
+ memcpy(ret_hwaddr, neigh->ha,
+ sizeof(neigh->ha));
+ rc = 0;
+ break;
+ }
+ }
+ rcu_read_unlock();
+ return rc;
+}
+
+/* namespace registration */
+static int __net_init mctp_neigh_net_init(struct net *net)
+{
+ struct netns_mctp *ns = &net->mctp;
+
+ INIT_LIST_HEAD(&ns->neighbours);
+ mutex_init(&ns->neigh_lock);
+ return 0;
+}
+
+static void __net_exit mctp_neigh_net_exit(struct net *net)
+{
+ struct netns_mctp *ns = &net->mctp;
+ struct mctp_neigh *neigh;
+
+ list_for_each_entry(neigh, &ns->neighbours, list)
+ call_rcu(&neigh->rcu, __mctp_neigh_free);
+}
+
+/* net namespace implementation */
+
+static struct pernet_operations mctp_net_ops = {
+ .init = mctp_neigh_net_init,
+ .exit = mctp_neigh_net_exit,
+};
+
+int __init mctp_neigh_init(void)
+{
+ rtnl_register_module(THIS_MODULE, PF_MCTP, RTM_NEWNEIGH,
+ mctp_rtm_newneigh, NULL, 0);
+ rtnl_register_module(THIS_MODULE, PF_MCTP, RTM_DELNEIGH,
+ mctp_rtm_delneigh, NULL, 0);
+ rtnl_register_module(THIS_MODULE, PF_MCTP, RTM_GETNEIGH,
+ NULL, mctp_rtm_getneigh, 0);
+
+ return register_pernet_subsys(&mctp_net_ops);
+}
+
+void __exit mctp_neigh_exit(void)
+{
+ unregister_pernet_subsys(&mctp_net_ops);
+ rtnl_unregister(PF_MCTP, RTM_GETNEIGH);
+ rtnl_unregister(PF_MCTP, RTM_DELNEIGH);
+ rtnl_unregister(PF_MCTP, RTM_NEWNEIGH);
+}
diff --git a/net/mctp/route.c b/net/mctp/route.c
new file mode 100644
index 000000000000..5265525011ad
--- /dev/null
+++ b/net/mctp/route.c
@@ -0,0 +1,1116 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Management Component Transport Protocol (MCTP) - routing
+ * implementation.
+ *
+ * This is currently based on a simple routing table, with no dst cache. The
+ * number of routes should stay fairly small, so the lookup cost is small.
+ *
+ * Copyright (c) 2021 Code Construct
+ * Copyright (c) 2021 Google
+ */
+
+#include <linux/idr.h>
+#include <linux/mctp.h>
+#include <linux/netdevice.h>
+#include <linux/rtnetlink.h>
+#include <linux/skbuff.h>
+
+#include <uapi/linux/if_arp.h>
+
+#include <net/mctp.h>
+#include <net/mctpdevice.h>
+#include <net/netlink.h>
+#include <net/sock.h>
+
+static const unsigned int mctp_message_maxlen = 64 * 1024;
+
+/* route output callbacks */
+static int mctp_route_discard(struct mctp_route *route, struct sk_buff *skb)
+{
+ kfree_skb(skb);
+ return 0;
+}
+
+static struct mctp_sock *mctp_lookup_bind(struct net *net, struct sk_buff *skb)
+{
+ struct mctp_skb_cb *cb = mctp_cb(skb);
+ struct mctp_hdr *mh;
+ struct sock *sk;
+ u8 type;
+
+ WARN_ON(!rcu_read_lock_held());
+
+ /* TODO: look up in skb->cb? */
+ mh = mctp_hdr(skb);
+
+ if (!skb_headlen(skb))
+ return NULL;
+
+ type = (*(u8 *)skb->data) & 0x7f;
+
+ sk_for_each_rcu(sk, &net->mctp.binds) {
+ struct mctp_sock *msk = container_of(sk, struct mctp_sock, sk);
+
+ if (msk->bind_net != MCTP_NET_ANY && msk->bind_net != cb->net)
+ continue;
+
+ if (msk->bind_type != type)
+ continue;
+
+ if (msk->bind_addr != MCTP_ADDR_ANY &&
+ msk->bind_addr != mh->dest)
+ continue;
+
+ return msk;
+ }
+
+ return NULL;
+}
+
+static bool mctp_key_match(struct mctp_sk_key *key, mctp_eid_t local,
+ mctp_eid_t peer, u8 tag)
+{
+ if (key->local_addr != local)
+ return false;
+
+ if (key->peer_addr != peer)
+ return false;
+
+ if (key->tag != tag)
+ return false;
+
+ return true;
+}
+
+static struct mctp_sk_key *mctp_lookup_key(struct net *net, struct sk_buff *skb,
+ mctp_eid_t peer)
+{
+ struct mctp_sk_key *key, *ret;
+ struct mctp_hdr *mh;
+ u8 tag;
+
+ WARN_ON(!rcu_read_lock_held());
+
+ mh = mctp_hdr(skb);
+ tag = mh->flags_seq_tag & (MCTP_HDR_TAG_MASK | MCTP_HDR_FLAG_TO);
+
+ ret = NULL;
+
+ hlist_for_each_entry_rcu(key, &net->mctp.keys, hlist) {
+ if (mctp_key_match(key, mh->dest, peer, tag)) {
+ ret = key;
+ break;
+ }
+ }
+
+ return ret;
+}
+
+static struct mctp_sk_key *mctp_key_alloc(struct mctp_sock *msk,
+ mctp_eid_t local, mctp_eid_t peer,
+ u8 tag, gfp_t gfp)
+{
+ struct mctp_sk_key *key;
+
+ key = kzalloc(sizeof(*key), gfp);
+ if (!key)
+ return NULL;
+
+ key->peer_addr = peer;
+ key->local_addr = local;
+ key->tag = tag;
+ key->sk = &msk->sk;
+ spin_lock_init(&key->reasm_lock);
+
+ return key;
+}
+
+static int mctp_key_add(struct mctp_sk_key *key, struct mctp_sock *msk)
+{
+ struct net *net = sock_net(&msk->sk);
+ struct mctp_sk_key *tmp;
+ unsigned long flags;
+ int rc = 0;
+
+ spin_lock_irqsave(&net->mctp.keys_lock, flags);
+
+ hlist_for_each_entry(tmp, &net->mctp.keys, hlist) {
+ if (mctp_key_match(tmp, key->local_addr, key->peer_addr,
+ key->tag)) {
+ rc = -EEXIST;
+ break;
+ }
+ }
+
+ if (!rc) {
+ hlist_add_head(&key->hlist, &net->mctp.keys);
+ hlist_add_head(&key->sklist, &msk->keys);
+ }
+
+ spin_unlock_irqrestore(&net->mctp.keys_lock, flags);
+
+ return rc;
+}
+
+/* Must be called with key->reasm_lock, which it will release. Will schedule
+ * the key for an RCU free.
+ */
+static void __mctp_key_unlock_drop(struct mctp_sk_key *key, struct net *net,
+ unsigned long flags)
+ __releases(&key->reasm_lock)
+{
+ struct sk_buff *skb;
+
+ skb = key->reasm_head;
+ key->reasm_head = NULL;
+ key->reasm_dead = true;
+ spin_unlock_irqrestore(&key->reasm_lock, flags);
+
+ spin_lock_irqsave(&net->mctp.keys_lock, flags);
+ hlist_del_rcu(&key->hlist);
+ hlist_del_rcu(&key->sklist);
+ spin_unlock_irqrestore(&net->mctp.keys_lock, flags);
+ kfree_rcu(key, rcu);
+
+ if (skb)
+ kfree_skb(skb);
+}
+
+static int mctp_frag_queue(struct mctp_sk_key *key, struct sk_buff *skb)
+{
+ struct mctp_hdr *hdr = mctp_hdr(skb);
+ u8 exp_seq, this_seq;
+
+ this_seq = (hdr->flags_seq_tag >> MCTP_HDR_SEQ_SHIFT)
+ & MCTP_HDR_SEQ_MASK;
+
+ if (!key->reasm_head) {
+ key->reasm_head = skb;
+ key->reasm_tailp = &(skb_shinfo(skb)->frag_list);
+ key->last_seq = this_seq;
+ return 0;
+ }
+
+ exp_seq = (key->last_seq + 1) & MCTP_HDR_SEQ_MASK;
+
+ if (this_seq != exp_seq)
+ return -EINVAL;
+
+ if (key->reasm_head->len + skb->len > mctp_message_maxlen)
+ return -EINVAL;
+
+ skb->next = NULL;
+ skb->sk = NULL;
+ *key->reasm_tailp = skb;
+ key->reasm_tailp = &skb->next;
+
+ key->last_seq = this_seq;
+
+ key->reasm_head->data_len += skb->len;
+ key->reasm_head->len += skb->len;
+ key->reasm_head->truesize += skb->truesize;
+
+ return 0;
+}
+
+static int mctp_route_input(struct mctp_route *route, struct sk_buff *skb)
+{
+ struct net *net = dev_net(skb->dev);
+ struct mctp_sk_key *key;
+ struct mctp_sock *msk;
+ struct mctp_hdr *mh;
+ unsigned long f;
+ u8 tag, flags;
+ int rc;
+
+ msk = NULL;
+ rc = -EINVAL;
+
+ /* we may be receiving a locally-routed packet; drop source sk
+ * accounting
+ */
+ skb_orphan(skb);
+
+ /* ensure we have enough data for a header and a type */
+ if (skb->len < sizeof(struct mctp_hdr) + 1)
+ goto out;
+
+ /* grab header, advance data ptr */
+ mh = mctp_hdr(skb);
+ skb_pull(skb, sizeof(struct mctp_hdr));
+
+ if (mh->ver != 1)
+ goto out;
+
+ flags = mh->flags_seq_tag & (MCTP_HDR_FLAG_SOM | MCTP_HDR_FLAG_EOM);
+ tag = mh->flags_seq_tag & (MCTP_HDR_TAG_MASK | MCTP_HDR_FLAG_TO);
+
+ rcu_read_lock();
+
+ /* lookup socket / reasm context, exactly matching (src,dest,tag) */
+ key = mctp_lookup_key(net, skb, mh->src);
+
+ if (flags & MCTP_HDR_FLAG_SOM) {
+ if (key) {
+ msk = container_of(key->sk, struct mctp_sock, sk);
+ } else {
+ /* first response to a broadcast? do a more general
+ * key lookup to find the socket, but don't use this
+ * key for reassembly - we'll create a more specific
+ * one for future packets if required (ie, !EOM).
+ */
+ key = mctp_lookup_key(net, skb, MCTP_ADDR_ANY);
+ if (key) {
+ msk = container_of(key->sk,
+ struct mctp_sock, sk);
+ key = NULL;
+ }
+ }
+
+ if (!key && !msk && (tag & MCTP_HDR_FLAG_TO))
+ msk = mctp_lookup_bind(net, skb);
+
+ if (!msk) {
+ rc = -ENOENT;
+ goto out_unlock;
+ }
+
+ /* single-packet message? deliver to socket, clean up any
+ * pending key.
+ */
+ if (flags & MCTP_HDR_FLAG_EOM) {
+ sock_queue_rcv_skb(&msk->sk, skb);
+ if (key) {
+ spin_lock_irqsave(&key->reasm_lock, f);
+ /* we've hit a pending reassembly; not much we
+ * can do but drop it
+ */
+ __mctp_key_unlock_drop(key, net, f);
+ }
+ rc = 0;
+ goto out_unlock;
+ }
+
+ /* broadcast response or a bind() - create a key for further
+ * packets for this message
+ */
+ if (!key) {
+ key = mctp_key_alloc(msk, mh->dest, mh->src,
+ tag, GFP_ATOMIC);
+ if (!key) {
+ rc = -ENOMEM;
+ goto out_unlock;
+ }
+
+ /* we can queue without the reasm lock here, as the
+ * key isn't observable yet
+ */
+ mctp_frag_queue(key, skb);
+
+ /* if the key_add fails, we've raced with another
+ * SOM packet with the same src, dest and tag. There's
+ * no way to distinguish future packets, so all we
+ * can do is drop; we'll free the skb on exit from
+ * this function.
+ */
+ rc = mctp_key_add(key, msk);
+ if (rc)
+ kfree(key);
+
+ } else {
+ /* existing key: start reassembly */
+ spin_lock_irqsave(&key->reasm_lock, f);
+
+ if (key->reasm_head || key->reasm_dead) {
+ /* duplicate start? drop everything */
+ __mctp_key_unlock_drop(key, net, f);
+ rc = -EEXIST;
+ } else {
+ rc = mctp_frag_queue(key, skb);
+ spin_unlock_irqrestore(&key->reasm_lock, f);
+ }
+ }
+
+ } else if (key) {
+ /* this packet continues a previous message; reassemble
+ * using the message-specific key
+ */
+
+ spin_lock_irqsave(&key->reasm_lock, f);
+
+ /* we need to be continuing an existing reassembly... */
+ if (!key->reasm_head)
+ rc = -EINVAL;
+ else
+ rc = mctp_frag_queue(key, skb);
+
+ /* end of message? deliver to socket, and we're done with
+ * the reassembly/response key
+ */
+ if (!rc && flags & MCTP_HDR_FLAG_EOM) {
+ sock_queue_rcv_skb(key->sk, key->reasm_head);
+ key->reasm_head = NULL;
+ __mctp_key_unlock_drop(key, net, f);
+ } else {
+ spin_unlock_irqrestore(&key->reasm_lock, f);
+ }
+
+ } else {
+ /* not a start, no matching key */
+ rc = -ENOENT;
+ }
+
+out_unlock:
+ rcu_read_unlock();
+out:
+ if (rc)
+ kfree_skb(skb);
+ return rc;
+}
+
+static unsigned int mctp_route_mtu(struct mctp_route *rt)
+{
+ return rt->mtu ?: READ_ONCE(rt->dev->dev->mtu);
+}
+
+static int mctp_route_output(struct mctp_route *route, struct sk_buff *skb)
+{
+ struct mctp_hdr *hdr = mctp_hdr(skb);
+ char daddr_buf[MAX_ADDR_LEN];
+ char *daddr = NULL;
+ unsigned int mtu;
+ int rc;
+
+ skb->protocol = htons(ETH_P_MCTP);
+
+ mtu = READ_ONCE(skb->dev->mtu);
+ if (skb->len > mtu) {
+ kfree_skb(skb);
+ return -EMSGSIZE;
+ }
+
+ /* If lookup fails let the device handle daddr==NULL */
+ if (mctp_neigh_lookup(route->dev, hdr->dest, daddr_buf) == 0)
+ daddr = daddr_buf;
+
+ rc = dev_hard_header(skb, skb->dev, ntohs(skb->protocol),
+ daddr, skb->dev->dev_addr, skb->len);
+ if (rc) {
+ kfree_skb(skb);
+ return -EHOSTUNREACH;
+ }
+
+ rc = dev_queue_xmit(skb);
+ if (rc)
+ rc = net_xmit_errno(rc);
+
+ return rc;
+}
+
+/* route alloc/release */
+static void mctp_route_release(struct mctp_route *rt)
+{
+ if (refcount_dec_and_test(&rt->refs)) {
+ dev_put(rt->dev->dev);
+ kfree_rcu(rt, rcu);
+ }
+}
+
+/* returns a route with the refcount at 1 */
+static struct mctp_route *mctp_route_alloc(void)
+{
+ struct mctp_route *rt;
+
+ rt = kzalloc(sizeof(*rt), GFP_KERNEL);
+ if (!rt)
+ return NULL;
+
+ INIT_LIST_HEAD(&rt->list);
+ refcount_set(&rt->refs, 1);
+ rt->output = mctp_route_discard;
+
+ return rt;
+}
+
+unsigned int mctp_default_net(struct net *net)
+{
+ return READ_ONCE(net->mctp.default_net);
+}
+
+int mctp_default_net_set(struct net *net, unsigned int index)
+{
+ if (index == 0)
+ return -EINVAL;
+ WRITE_ONCE(net->mctp.default_net, index);
+ return 0;
+}
+
+/* tag management */
+static void mctp_reserve_tag(struct net *net, struct mctp_sk_key *key,
+ struct mctp_sock *msk)
+{
+ struct netns_mctp *mns = &net->mctp;
+
+ lockdep_assert_held(&mns->keys_lock);
+
+ /* we hold the net->key_lock here, allowing updates to both
+ * then net and sk
+ */
+ hlist_add_head_rcu(&key->hlist, &mns->keys);
+ hlist_add_head_rcu(&key->sklist, &msk->keys);
+}
+
+/* Allocate a locally-owned tag value for (saddr, daddr), and reserve
+ * it for the socket msk
+ */
+static int mctp_alloc_local_tag(struct mctp_sock *msk,
+ mctp_eid_t saddr, mctp_eid_t daddr, u8 *tagp)
+{
+ struct net *net = sock_net(&msk->sk);
+ struct netns_mctp *mns = &net->mctp;
+ struct mctp_sk_key *key, *tmp;
+ unsigned long flags;
+ int rc = -EAGAIN;
+ u8 tagbits;
+
+ /* be optimistic, alloc now */
+ key = mctp_key_alloc(msk, saddr, daddr, 0, GFP_KERNEL);
+ if (!key)
+ return -ENOMEM;
+
+ /* 8 possible tag values */
+ tagbits = 0xff;
+
+ spin_lock_irqsave(&mns->keys_lock, flags);
+
+ /* Walk through the existing keys, looking for potential conflicting
+ * tags. If we find a conflict, clear that bit from tagbits
+ */
+ hlist_for_each_entry(tmp, &mns->keys, hlist) {
+ /* if we don't own the tag, it can't conflict */
+ if (tmp->tag & MCTP_HDR_FLAG_TO)
+ continue;
+
+ if ((tmp->peer_addr == daddr ||
+ tmp->peer_addr == MCTP_ADDR_ANY) &&
+ tmp->local_addr == saddr)
+ tagbits &= ~(1 << tmp->tag);
+
+ if (!tagbits)
+ break;
+ }
+
+ if (tagbits) {
+ key->tag = __ffs(tagbits);
+ mctp_reserve_tag(net, key, msk);
+ *tagp = key->tag;
+ rc = 0;
+ }
+
+ spin_unlock_irqrestore(&mns->keys_lock, flags);
+
+ if (!tagbits)
+ kfree(key);
+
+ return rc;
+}
+
+/* routing lookups */
+static bool mctp_rt_match_eid(struct mctp_route *rt,
+ unsigned int net, mctp_eid_t eid)
+{
+ return READ_ONCE(rt->dev->net) == net &&
+ rt->min <= eid && rt->max >= eid;
+}
+
+/* compares match, used for duplicate prevention */
+static bool mctp_rt_compare_exact(struct mctp_route *rt1,
+ struct mctp_route *rt2)
+{
+ ASSERT_RTNL();
+ return rt1->dev->net == rt2->dev->net &&
+ rt1->min == rt2->min &&
+ rt1->max == rt2->max;
+}
+
+struct mctp_route *mctp_route_lookup(struct net *net, unsigned int dnet,
+ mctp_eid_t daddr)
+{
+ struct mctp_route *tmp, *rt = NULL;
+
+ list_for_each_entry_rcu(tmp, &net->mctp.routes, list) {
+ /* TODO: add metrics */
+ if (mctp_rt_match_eid(tmp, dnet, daddr)) {
+ if (refcount_inc_not_zero(&tmp->refs)) {
+ rt = tmp;
+ break;
+ }
+ }
+ }
+
+ return rt;
+}
+
+/* sends a skb to rt and releases the route. */
+int mctp_do_route(struct mctp_route *rt, struct sk_buff *skb)
+{
+ int rc;
+
+ rc = rt->output(rt, skb);
+ mctp_route_release(rt);
+ return rc;
+}
+
+static int mctp_do_fragment_route(struct mctp_route *rt, struct sk_buff *skb,
+ unsigned int mtu, u8 tag)
+{
+ const unsigned int hlen = sizeof(struct mctp_hdr);
+ struct mctp_hdr *hdr, *hdr2;
+ unsigned int pos, size;
+ struct sk_buff *skb2;
+ int rc;
+ u8 seq;
+
+ hdr = mctp_hdr(skb);
+ seq = 0;
+ rc = 0;
+
+ if (mtu < hlen + 1) {
+ kfree_skb(skb);
+ return -EMSGSIZE;
+ }
+
+ /* we've got the header */
+ skb_pull(skb, hlen);
+
+ for (pos = 0; pos < skb->len;) {
+ /* size of message payload */
+ size = min(mtu - hlen, skb->len - pos);
+
+ skb2 = alloc_skb(MCTP_HEADER_MAXLEN + hlen + size, GFP_KERNEL);
+ if (!skb2) {
+ rc = -ENOMEM;
+ break;
+ }
+
+ /* generic skb copy */
+ skb2->protocol = skb->protocol;
+ skb2->priority = skb->priority;
+ skb2->dev = skb->dev;
+ memcpy(skb2->cb, skb->cb, sizeof(skb2->cb));
+
+ if (skb->sk)
+ skb_set_owner_w(skb2, skb->sk);
+
+ /* establish packet */
+ skb_reserve(skb2, MCTP_HEADER_MAXLEN);
+ skb_reset_network_header(skb2);
+ skb_put(skb2, hlen + size);
+ skb2->transport_header = skb2->network_header + hlen;
+
+ /* copy header fields, calculate SOM/EOM flags & seq */
+ hdr2 = mctp_hdr(skb2);
+ hdr2->ver = hdr->ver;
+ hdr2->dest = hdr->dest;
+ hdr2->src = hdr->src;
+ hdr2->flags_seq_tag = tag &
+ (MCTP_HDR_TAG_MASK | MCTP_HDR_FLAG_TO);
+
+ if (pos == 0)
+ hdr2->flags_seq_tag |= MCTP_HDR_FLAG_SOM;
+
+ if (pos + size == skb->len)
+ hdr2->flags_seq_tag |= MCTP_HDR_FLAG_EOM;
+
+ hdr2->flags_seq_tag |= seq << MCTP_HDR_SEQ_SHIFT;
+
+ /* copy message payload */
+ skb_copy_bits(skb, pos, skb_transport_header(skb2), size);
+
+ /* do route, but don't drop the rt reference */
+ rc = rt->output(rt, skb2);
+ if (rc)
+ break;
+
+ seq = (seq + 1) & MCTP_HDR_SEQ_MASK;
+ pos += size;
+ }
+
+ mctp_route_release(rt);
+ consume_skb(skb);
+ return rc;
+}
+
+int mctp_local_output(struct sock *sk, struct mctp_route *rt,
+ struct sk_buff *skb, mctp_eid_t daddr, u8 req_tag)
+{
+ struct mctp_sock *msk = container_of(sk, struct mctp_sock, sk);
+ struct mctp_skb_cb *cb = mctp_cb(skb);
+ struct mctp_hdr *hdr;
+ unsigned long flags;
+ unsigned int mtu;
+ mctp_eid_t saddr;
+ int rc;
+ u8 tag;
+
+ if (WARN_ON(!rt->dev))
+ return -EINVAL;
+
+ spin_lock_irqsave(&rt->dev->addrs_lock, flags);
+ if (rt->dev->num_addrs == 0) {
+ rc = -EHOSTUNREACH;
+ } else {
+ /* use the outbound interface's first address as our source */
+ saddr = rt->dev->addrs[0];
+ rc = 0;
+ }
+ spin_unlock_irqrestore(&rt->dev->addrs_lock, flags);
+
+ if (rc)
+ return rc;
+
+ if (req_tag & MCTP_HDR_FLAG_TO) {
+ rc = mctp_alloc_local_tag(msk, saddr, daddr, &tag);
+ if (rc)
+ return rc;
+ tag |= MCTP_HDR_FLAG_TO;
+ } else {
+ tag = req_tag;
+ }
+
+
+ skb->protocol = htons(ETH_P_MCTP);
+ skb->priority = 0;
+ skb_reset_transport_header(skb);
+ skb_push(skb, sizeof(struct mctp_hdr));
+ skb_reset_network_header(skb);
+ skb->dev = rt->dev->dev;
+
+ /* cb->net will have been set on initial ingress */
+ cb->src = saddr;
+
+ /* set up common header fields */
+ hdr = mctp_hdr(skb);
+ hdr->ver = 1;
+ hdr->dest = daddr;
+ hdr->src = saddr;
+
+ mtu = mctp_route_mtu(rt);
+
+ if (skb->len + sizeof(struct mctp_hdr) <= mtu) {
+ hdr->flags_seq_tag = MCTP_HDR_FLAG_SOM | MCTP_HDR_FLAG_EOM |
+ tag;
+ return mctp_do_route(rt, skb);
+ } else {
+ return mctp_do_fragment_route(rt, skb, mtu, tag);
+ }
+}
+
+/* route management */
+static int mctp_route_add(struct mctp_dev *mdev, mctp_eid_t daddr_start,
+ unsigned int daddr_extent, unsigned int mtu,
+ unsigned char type)
+{
+ int (*rtfn)(struct mctp_route *rt, struct sk_buff *skb);
+ struct net *net = dev_net(mdev->dev);
+ struct mctp_route *rt, *ert;
+
+ if (!mctp_address_ok(daddr_start))
+ return -EINVAL;
+
+ if (daddr_extent > 0xff || daddr_start + daddr_extent >= 255)
+ return -EINVAL;
+
+ switch (type) {
+ case RTN_LOCAL:
+ rtfn = mctp_route_input;
+ break;
+ case RTN_UNICAST:
+ rtfn = mctp_route_output;
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ rt = mctp_route_alloc();
+ if (!rt)
+ return -ENOMEM;
+
+ rt->min = daddr_start;
+ rt->max = daddr_start + daddr_extent;
+ rt->mtu = mtu;
+ rt->dev = mdev;
+ dev_hold(rt->dev->dev);
+ rt->type = type;
+ rt->output = rtfn;
+
+ ASSERT_RTNL();
+ /* Prevent duplicate identical routes. */
+ list_for_each_entry(ert, &net->mctp.routes, list) {
+ if (mctp_rt_compare_exact(rt, ert)) {
+ mctp_route_release(rt);
+ return -EEXIST;
+ }
+ }
+
+ list_add_rcu(&rt->list, &net->mctp.routes);
+
+ return 0;
+}
+
+static int mctp_route_remove(struct mctp_dev *mdev, mctp_eid_t daddr_start,
+ unsigned int daddr_extent)
+{
+ struct net *net = dev_net(mdev->dev);
+ struct mctp_route *rt, *tmp;
+ mctp_eid_t daddr_end;
+ bool dropped;
+
+ if (daddr_extent > 0xff || daddr_start + daddr_extent >= 255)
+ return -EINVAL;
+
+ daddr_end = daddr_start + daddr_extent;
+ dropped = false;
+
+ ASSERT_RTNL();
+
+ list_for_each_entry_safe(rt, tmp, &net->mctp.routes, list) {
+ if (rt->dev == mdev &&
+ rt->min == daddr_start && rt->max == daddr_end) {
+ list_del_rcu(&rt->list);
+ /* TODO: immediate RTM_DELROUTE */
+ mctp_route_release(rt);
+ dropped = true;
+ }
+ }
+
+ return dropped ? 0 : -ENOENT;
+}
+
+int mctp_route_add_local(struct mctp_dev *mdev, mctp_eid_t addr)
+{
+ return mctp_route_add(mdev, addr, 0, 0, RTN_LOCAL);
+}
+
+int mctp_route_remove_local(struct mctp_dev *mdev, mctp_eid_t addr)
+{
+ return mctp_route_remove(mdev, addr, 0);
+}
+
+/* removes all entries for a given device */
+void mctp_route_remove_dev(struct mctp_dev *mdev)
+{
+ struct net *net = dev_net(mdev->dev);
+ struct mctp_route *rt, *tmp;
+
+ ASSERT_RTNL();
+ list_for_each_entry_safe(rt, tmp, &net->mctp.routes, list) {
+ if (rt->dev == mdev) {
+ list_del_rcu(&rt->list);
+ /* TODO: immediate RTM_DELROUTE */
+ mctp_route_release(rt);
+ }
+ }
+}
+
+/* Incoming packet-handling */
+
+static int mctp_pkttype_receive(struct sk_buff *skb, struct net_device *dev,
+ struct packet_type *pt,
+ struct net_device *orig_dev)
+{
+ struct net *net = dev_net(dev);
+ struct mctp_skb_cb *cb;
+ struct mctp_route *rt;
+ struct mctp_hdr *mh;
+
+ /* basic non-data sanity checks */
+ if (dev->type != ARPHRD_MCTP)
+ goto err_drop;
+
+ if (!pskb_may_pull(skb, sizeof(struct mctp_hdr)))
+ goto err_drop;
+
+ skb_reset_transport_header(skb);
+ skb_reset_network_header(skb);
+
+ /* We have enough for a header; decode and route */
+ mh = mctp_hdr(skb);
+ if (mh->ver < MCTP_VER_MIN || mh->ver > MCTP_VER_MAX)
+ goto err_drop;
+
+ cb = __mctp_cb(skb);
+ rcu_read_lock();
+ cb->net = READ_ONCE(__mctp_dev_get(dev)->net);
+ rcu_read_unlock();
+
+ rt = mctp_route_lookup(net, cb->net, mh->dest);
+ if (!rt)
+ goto err_drop;
+
+ mctp_do_route(rt, skb);
+
+ return NET_RX_SUCCESS;
+
+err_drop:
+ kfree_skb(skb);
+ return NET_RX_DROP;
+}
+
+static struct packet_type mctp_packet_type = {
+ .type = cpu_to_be16(ETH_P_MCTP),
+ .func = mctp_pkttype_receive,
+};
+
+/* netlink interface */
+
+static const struct nla_policy rta_mctp_policy[RTA_MAX + 1] = {
+ [RTA_DST] = { .type = NLA_U8 },
+ [RTA_METRICS] = { .type = NLA_NESTED },
+ [RTA_OIF] = { .type = NLA_U32 },
+};
+
+/* Common part for RTM_NEWROUTE and RTM_DELROUTE parsing.
+ * tb must hold RTA_MAX+1 elements.
+ */
+static int mctp_route_nlparse(struct sk_buff *skb, struct nlmsghdr *nlh,
+ struct netlink_ext_ack *extack,
+ struct nlattr **tb, struct rtmsg **rtm,
+ struct mctp_dev **mdev, mctp_eid_t *daddr_start)
+{
+ struct net *net = sock_net(skb->sk);
+ struct net_device *dev;
+ unsigned int ifindex;
+ int rc;
+
+ rc = nlmsg_parse(nlh, sizeof(struct rtmsg), tb, RTA_MAX,
+ rta_mctp_policy, extack);
+ if (rc < 0) {
+ NL_SET_ERR_MSG(extack, "incorrect format");
+ return rc;
+ }
+
+ if (!tb[RTA_DST]) {
+ NL_SET_ERR_MSG(extack, "dst EID missing");
+ return -EINVAL;
+ }
+ *daddr_start = nla_get_u8(tb[RTA_DST]);
+
+ if (!tb[RTA_OIF]) {
+ NL_SET_ERR_MSG(extack, "ifindex missing");
+ return -EINVAL;
+ }
+ ifindex = nla_get_u32(tb[RTA_OIF]);
+
+ *rtm = nlmsg_data(nlh);
+ if ((*rtm)->rtm_family != AF_MCTP) {
+ NL_SET_ERR_MSG(extack, "route family must be AF_MCTP");
+ return -EINVAL;
+ }
+
+ dev = __dev_get_by_index(net, ifindex);
+ if (!dev) {
+ NL_SET_ERR_MSG(extack, "bad ifindex");
+ return -ENODEV;
+ }
+ *mdev = mctp_dev_get_rtnl(dev);
+ if (!*mdev)
+ return -ENODEV;
+
+ if (dev->flags & IFF_LOOPBACK) {
+ NL_SET_ERR_MSG(extack, "no routes to loopback");
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+static int mctp_newroute(struct sk_buff *skb, struct nlmsghdr *nlh,
+ struct netlink_ext_ack *extack)
+{
+ struct nlattr *tb[RTA_MAX + 1];
+ mctp_eid_t daddr_start;
+ struct mctp_dev *mdev;
+ struct rtmsg *rtm;
+ unsigned int mtu;
+ int rc;
+
+ rc = mctp_route_nlparse(skb, nlh, extack, tb,
+ &rtm, &mdev, &daddr_start);
+ if (rc < 0)
+ return rc;
+
+ if (rtm->rtm_type != RTN_UNICAST) {
+ NL_SET_ERR_MSG(extack, "rtm_type must be RTN_UNICAST");
+ return -EINVAL;
+ }
+
+ /* TODO: parse mtu from nlparse */
+ mtu = 0;
+
+ if (rtm->rtm_type != RTN_UNICAST)
+ return -EINVAL;
+
+ rc = mctp_route_add(mdev, daddr_start, rtm->rtm_dst_len, mtu,
+ rtm->rtm_type);
+ return rc;
+}
+
+static int mctp_delroute(struct sk_buff *skb, struct nlmsghdr *nlh,
+ struct netlink_ext_ack *extack)
+{
+ struct nlattr *tb[RTA_MAX + 1];
+ mctp_eid_t daddr_start;
+ struct mctp_dev *mdev;
+ struct rtmsg *rtm;
+ int rc;
+
+ rc = mctp_route_nlparse(skb, nlh, extack, tb,
+ &rtm, &mdev, &daddr_start);
+ if (rc < 0)
+ return rc;
+
+ /* we only have unicast routes */
+ if (rtm->rtm_type != RTN_UNICAST)
+ return -EINVAL;
+
+ rc = mctp_route_remove(mdev, daddr_start, rtm->rtm_dst_len);
+ return rc;
+}
+
+static int mctp_fill_rtinfo(struct sk_buff *skb, struct mctp_route *rt,
+ u32 portid, u32 seq, int event, unsigned int flags)
+{
+ struct nlmsghdr *nlh;
+ struct rtmsg *hdr;
+ void *metrics;
+
+ nlh = nlmsg_put(skb, portid, seq, event, sizeof(*hdr), flags);
+ if (!nlh)
+ return -EMSGSIZE;
+
+ hdr = nlmsg_data(nlh);
+ hdr->rtm_family = AF_MCTP;
+
+ /* we use the _len fields as a number of EIDs, rather than
+ * a number of bits in the address
+ */
+ hdr->rtm_dst_len = rt->max - rt->min;
+ hdr->rtm_src_len = 0;
+ hdr->rtm_tos = 0;
+ hdr->rtm_table = RT_TABLE_DEFAULT;
+ hdr->rtm_protocol = RTPROT_STATIC; /* everything is user-defined */
+ hdr->rtm_scope = RT_SCOPE_LINK; /* TODO: scope in mctp_route? */
+ hdr->rtm_type = rt->type;
+
+ if (nla_put_u8(skb, RTA_DST, rt->min))
+ goto cancel;
+
+ metrics = nla_nest_start_noflag(skb, RTA_METRICS);
+ if (!metrics)
+ goto cancel;
+
+ if (rt->mtu) {
+ if (nla_put_u32(skb, RTAX_MTU, rt->mtu))
+ goto cancel;
+ }
+
+ nla_nest_end(skb, metrics);
+
+ if (rt->dev) {
+ if (nla_put_u32(skb, RTA_OIF, rt->dev->dev->ifindex))
+ goto cancel;
+ }
+
+ /* TODO: conditional neighbour physaddr? */
+
+ nlmsg_end(skb, nlh);
+
+ return 0;
+
+cancel:
+ nlmsg_cancel(skb, nlh);
+ return -EMSGSIZE;
+}
+
+static int mctp_dump_rtinfo(struct sk_buff *skb, struct netlink_callback *cb)
+{
+ struct net *net = sock_net(skb->sk);
+ struct mctp_route *rt;
+ int s_idx, idx;
+
+ /* TODO: allow filtering on route data, possibly under
+ * cb->strict_check
+ */
+
+ /* TODO: change to struct overlay */
+ s_idx = cb->args[0];
+ idx = 0;
+
+ rcu_read_lock();
+ list_for_each_entry_rcu(rt, &net->mctp.routes, list) {
+ if (idx++ < s_idx)
+ continue;
+ if (mctp_fill_rtinfo(skb, rt,
+ NETLINK_CB(cb->skb).portid,
+ cb->nlh->nlmsg_seq,
+ RTM_NEWROUTE, NLM_F_MULTI) < 0)
+ break;
+ }
+
+ rcu_read_unlock();
+ cb->args[0] = idx;
+
+ return skb->len;
+}
+
+/* net namespace implementation */
+static int __net_init mctp_routes_net_init(struct net *net)
+{
+ struct netns_mctp *ns = &net->mctp;
+
+ INIT_LIST_HEAD(&ns->routes);
+ INIT_HLIST_HEAD(&ns->binds);
+ mutex_init(&ns->bind_lock);
+ INIT_HLIST_HEAD(&ns->keys);
+ spin_lock_init(&ns->keys_lock);
+ WARN_ON(mctp_default_net_set(net, MCTP_INITIAL_DEFAULT_NET));
+ return 0;
+}
+
+static void __net_exit mctp_routes_net_exit(struct net *net)
+{
+ struct mctp_route *rt;
+
+ list_for_each_entry_rcu(rt, &net->mctp.routes, list)
+ mctp_route_release(rt);
+}
+
+static struct pernet_operations mctp_net_ops = {
+ .init = mctp_routes_net_init,
+ .exit = mctp_routes_net_exit,
+};
+
+int __init mctp_routes_init(void)
+{
+ dev_add_pack(&mctp_packet_type);
+
+ rtnl_register_module(THIS_MODULE, PF_MCTP, RTM_GETROUTE,
+ NULL, mctp_dump_rtinfo, 0);
+ rtnl_register_module(THIS_MODULE, PF_MCTP, RTM_NEWROUTE,
+ mctp_newroute, NULL, 0);
+ rtnl_register_module(THIS_MODULE, PF_MCTP, RTM_DELROUTE,
+ mctp_delroute, NULL, 0);
+
+ return register_pernet_subsys(&mctp_net_ops);
+}
+
+void __exit mctp_routes_exit(void)
+{
+ unregister_pernet_subsys(&mctp_net_ops);
+ rtnl_unregister(PF_MCTP, RTM_DELROUTE);
+ rtnl_unregister(PF_MCTP, RTM_NEWROUTE);
+ rtnl_unregister(PF_MCTP, RTM_GETROUTE);
+ dev_remove_pack(&mctp_packet_type);
+}
diff --git a/net/mpls/af_mpls.c b/net/mpls/af_mpls.c
index 05a21dd072df..ffeb2df8be7a 100644
--- a/net/mpls/af_mpls.c
+++ b/net/mpls/af_mpls.c
@@ -407,7 +407,6 @@ static int mpls_forward(struct sk_buff *skb, struct net_device *dev,
/* Verify ttl is valid */
if (dec.ttl <= 1)
goto err;
- dec.ttl -= 1;
/* Find the output device */
out_dev = rcu_dereference(nh->nh_dev);
@@ -431,6 +430,7 @@ static int mpls_forward(struct sk_buff *skb, struct net_device *dev,
skb->dev = out_dev;
skb->protocol = htons(ETH_P_MPLS_UC);
+ dec.ttl -= 1;
if (unlikely(!new_header_size && dec.bos)) {
/* Penultimate hop popping */
if (!mpls_egress(dev_net(out_dev), rt, skb, dec))
diff --git a/net/mptcp/ctrl.c b/net/mptcp/ctrl.c
index 96ba616f59bf..8b235468c88f 100644
--- a/net/mptcp/ctrl.c
+++ b/net/mptcp/ctrl.c
@@ -4,7 +4,9 @@
* Copyright (c) 2019, Tessares SA.
*/
+#ifdef CONFIG_SYSCTL
#include <linux/sysctl.h>
+#endif
#include <net/net_namespace.h>
#include <net/netns/generic.h>
@@ -15,36 +17,68 @@
static int mptcp_pernet_id;
struct mptcp_pernet {
+#ifdef CONFIG_SYSCTL
struct ctl_table_header *ctl_table_hdr;
+#endif
- int mptcp_enabled;
unsigned int add_addr_timeout;
+ unsigned int stale_loss_cnt;
+ u8 mptcp_enabled;
+ u8 checksum_enabled;
+ u8 allow_join_initial_addr_port;
};
-static struct mptcp_pernet *mptcp_get_pernet(struct net *net)
+static struct mptcp_pernet *mptcp_get_pernet(const struct net *net)
{
return net_generic(net, mptcp_pernet_id);
}
-int mptcp_is_enabled(struct net *net)
+int mptcp_is_enabled(const struct net *net)
{
return mptcp_get_pernet(net)->mptcp_enabled;
}
-unsigned int mptcp_get_add_addr_timeout(struct net *net)
+unsigned int mptcp_get_add_addr_timeout(const struct net *net)
{
return mptcp_get_pernet(net)->add_addr_timeout;
}
+int mptcp_is_checksum_enabled(const struct net *net)
+{
+ return mptcp_get_pernet(net)->checksum_enabled;
+}
+
+int mptcp_allow_join_id0(const struct net *net)
+{
+ return mptcp_get_pernet(net)->allow_join_initial_addr_port;
+}
+
+unsigned int mptcp_stale_loss_cnt(const struct net *net)
+{
+ return mptcp_get_pernet(net)->stale_loss_cnt;
+}
+
+static void mptcp_pernet_set_defaults(struct mptcp_pernet *pernet)
+{
+ pernet->mptcp_enabled = 1;
+ pernet->add_addr_timeout = TCP_RTO_MAX;
+ pernet->checksum_enabled = 0;
+ pernet->allow_join_initial_addr_port = 1;
+ pernet->stale_loss_cnt = 4;
+}
+
+#ifdef CONFIG_SYSCTL
static struct ctl_table mptcp_sysctl_table[] = {
{
.procname = "enabled",
- .maxlen = sizeof(int),
+ .maxlen = sizeof(u8),
.mode = 0644,
/* users with CAP_NET_ADMIN or root (not and) can change this
* value, same as other sysctl or the 'net' tree.
*/
- .proc_handler = proc_dointvec,
+ .proc_handler = proc_dou8vec_minmax,
+ .extra1 = SYSCTL_ZERO,
+ .extra2 = SYSCTL_ONE
},
{
.procname = "add_addr_timeout",
@@ -52,15 +86,31 @@ static struct ctl_table mptcp_sysctl_table[] = {
.mode = 0644,
.proc_handler = proc_dointvec_jiffies,
},
+ {
+ .procname = "checksum_enabled",
+ .maxlen = sizeof(u8),
+ .mode = 0644,
+ .proc_handler = proc_dou8vec_minmax,
+ .extra1 = SYSCTL_ZERO,
+ .extra2 = SYSCTL_ONE
+ },
+ {
+ .procname = "allow_join_initial_addr_port",
+ .maxlen = sizeof(u8),
+ .mode = 0644,
+ .proc_handler = proc_dou8vec_minmax,
+ .extra1 = SYSCTL_ZERO,
+ .extra2 = SYSCTL_ONE
+ },
+ {
+ .procname = "stale_loss_cnt",
+ .maxlen = sizeof(unsigned int),
+ .mode = 0644,
+ .proc_handler = proc_douintvec_minmax,
+ },
{}
};
-static void mptcp_pernet_set_defaults(struct mptcp_pernet *pernet)
-{
- pernet->mptcp_enabled = 1;
- pernet->add_addr_timeout = TCP_RTO_MAX;
-}
-
static int mptcp_pernet_new_table(struct net *net, struct mptcp_pernet *pernet)
{
struct ctl_table_header *hdr;
@@ -75,6 +125,9 @@ static int mptcp_pernet_new_table(struct net *net, struct mptcp_pernet *pernet)
table[0].data = &pernet->mptcp_enabled;
table[1].data = &pernet->add_addr_timeout;
+ table[2].data = &pernet->checksum_enabled;
+ table[3].data = &pernet->allow_join_initial_addr_port;
+ table[4].data = &pernet->stale_loss_cnt;
hdr = register_net_sysctl(net, MPTCP_SYSCTL_PATH, table);
if (!hdr)
@@ -100,6 +153,17 @@ static void mptcp_pernet_del_table(struct mptcp_pernet *pernet)
kfree(table);
}
+#else
+
+static int mptcp_pernet_new_table(struct net *net, struct mptcp_pernet *pernet)
+{
+ return 0;
+}
+
+static void mptcp_pernet_del_table(struct mptcp_pernet *pernet) {}
+
+#endif /* CONFIG_SYSCTL */
+
static int __net_init mptcp_net_init(struct net *net)
{
struct mptcp_pernet *pernet = mptcp_get_pernet(net);
diff --git a/net/mptcp/mib.c b/net/mptcp/mib.c
index eb2dc6dbe212..b21ff9be04c6 100644
--- a/net/mptcp/mib.c
+++ b/net/mptcp/mib.c
@@ -25,6 +25,8 @@ static const struct snmp_mib mptcp_snmp_list[] = {
SNMP_MIB_ITEM("MPJoinAckHMacFailure", MPTCP_MIB_JOINACKMAC),
SNMP_MIB_ITEM("DSSNotMatching", MPTCP_MIB_DSSNOMATCH),
SNMP_MIB_ITEM("InfiniteMapRx", MPTCP_MIB_INFINITEMAPRX),
+ SNMP_MIB_ITEM("DSSNoMatchTCP", MPTCP_MIB_DSSTCPMISMATCH),
+ SNMP_MIB_ITEM("DataCsumErr", MPTCP_MIB_DATACSUMERR),
SNMP_MIB_ITEM("OFOQueueTail", MPTCP_MIB_OFOQUEUETAIL),
SNMP_MIB_ITEM("OFOQueue", MPTCP_MIB_OFOQUEUE),
SNMP_MIB_ITEM("OFOMerge", MPTCP_MIB_OFOMERGE),
@@ -42,6 +44,11 @@ static const struct snmp_mib mptcp_snmp_list[] = {
SNMP_MIB_ITEM("RmSubflow", MPTCP_MIB_RMSUBFLOW),
SNMP_MIB_ITEM("MPPrioTx", MPTCP_MIB_MPPRIOTX),
SNMP_MIB_ITEM("MPPrioRx", MPTCP_MIB_MPPRIORX),
+ SNMP_MIB_ITEM("MPFailTx", MPTCP_MIB_MPFAILTX),
+ SNMP_MIB_ITEM("MPFailRx", MPTCP_MIB_MPFAILRX),
+ SNMP_MIB_ITEM("RcvPruned", MPTCP_MIB_RCVPRUNED),
+ SNMP_MIB_ITEM("SubflowStale", MPTCP_MIB_SUBFLOWSTALE),
+ SNMP_MIB_ITEM("SubflowRecover", MPTCP_MIB_SUBFLOWRECOVER),
SNMP_MIB_SENTINEL
};
diff --git a/net/mptcp/mib.h b/net/mptcp/mib.h
index f0da4f060fe1..ecd3d8b117e0 100644
--- a/net/mptcp/mib.h
+++ b/net/mptcp/mib.h
@@ -18,6 +18,8 @@ enum linux_mptcp_mib_field {
MPTCP_MIB_JOINACKMAC, /* HMAC was wrong on ACK + MP_JOIN */
MPTCP_MIB_DSSNOMATCH, /* Received a new mapping that did not match the previous one */
MPTCP_MIB_INFINITEMAPRX, /* Received an infinite mapping */
+ MPTCP_MIB_DSSTCPMISMATCH, /* DSS-mapping did not map with TCP's sequence numbers */
+ MPTCP_MIB_DATACSUMERR, /* The data checksum fail */
MPTCP_MIB_OFOQUEUETAIL, /* Segments inserted into OoO queue tail */
MPTCP_MIB_OFOQUEUE, /* Segments inserted into OoO queue */
MPTCP_MIB_OFOMERGE, /* Segments merged in OoO queue */
@@ -35,6 +37,11 @@ enum linux_mptcp_mib_field {
MPTCP_MIB_RMSUBFLOW, /* Remove a subflow */
MPTCP_MIB_MPPRIOTX, /* Transmit a MP_PRIO */
MPTCP_MIB_MPPRIORX, /* Received a MP_PRIO */
+ MPTCP_MIB_MPFAILTX, /* Transmit a MP_FAIL */
+ MPTCP_MIB_MPFAILRX, /* Received a MP_FAIL */
+ MPTCP_MIB_RCVPRUNED, /* Incoming packet dropped due to memory limit */
+ MPTCP_MIB_SUBFLOWSTALE, /* Subflows entered 'stale' status */
+ MPTCP_MIB_SUBFLOWRECOVER, /* Subflows returned to active status after being stale */
__MPTCP_MIB_MAX
};
diff --git a/net/mptcp/mptcp_diag.c b/net/mptcp/mptcp_diag.c
index f16d9b5ee978..f48eb6315bbb 100644
--- a/net/mptcp/mptcp_diag.c
+++ b/net/mptcp/mptcp_diag.c
@@ -57,10 +57,8 @@ static int mptcp_diag_dump_one(struct netlink_callback *cb,
kfree_skb(rep);
goto out;
}
- err = netlink_unicast(net->diag_nlsk, rep, NETLINK_CB(in_skb).portid,
- MSG_DONTWAIT);
- if (err > 0)
- err = 0;
+ err = nlmsg_unicast(net->diag_nlsk, rep, NETLINK_CB(in_skb).portid);
+
out:
sock_put(sk);
@@ -144,6 +142,7 @@ static void mptcp_diag_get_info(struct sock *sk, struct inet_diag_msg *r,
info->mptcpi_write_seq = READ_ONCE(msk->write_seq);
info->mptcpi_snd_una = READ_ONCE(msk->snd_una);
info->mptcpi_rcv_nxt = READ_ONCE(msk->ack_seq);
+ info->mptcpi_csum_enabled = READ_ONCE(msk->csum_enabled);
unlock_sock_fast(sk, slow);
}
diff --git a/net/mptcp/options.c b/net/mptcp/options.c
index 9b263f27ce9b..c41273cefc51 100644
--- a/net/mptcp/options.c
+++ b/net/mptcp/options.c
@@ -44,7 +44,20 @@ static void mptcp_parse_option(const struct sk_buff *skb,
else
expected_opsize = TCPOLEN_MPTCP_MPC_SYN;
}
- if (opsize != expected_opsize)
+
+ /* Cfr RFC 8684 Section 3.3.0:
+ * If a checksum is present but its use had
+ * not been negotiated in the MP_CAPABLE handshake, the receiver MUST
+ * close the subflow with a RST, as it is not behaving as negotiated.
+ * If a checksum is not present when its use has been negotiated, the
+ * receiver MUST close the subflow with a RST, as it is considered
+ * broken
+ * We parse even option with mismatching csum presence, so that
+ * later in subflow_data_ready we can trigger the reset.
+ */
+ if (opsize != expected_opsize &&
+ (expected_opsize != TCPOLEN_MPTCP_MPC_ACK_DATA ||
+ opsize != TCPOLEN_MPTCP_MPC_ACK_DATA_CSUM))
break;
/* try to be gentle vs future versions on the initial syn */
@@ -66,18 +79,13 @@ static void mptcp_parse_option(const struct sk_buff *skb,
* host requires the use of checksums, checksums MUST be used.
* In other words, the only way for checksums not to be used
* is if both hosts in their SYNs set A=0."
- *
- * Section 3.3.0:
- * "If a checksum is not present when its use has been
- * negotiated, the receiver MUST close the subflow with a RST as
- * it is considered broken."
- *
- * We don't implement DSS checksum - fall back to TCP.
*/
if (flags & MPTCP_CAP_CHECKSUM_REQD)
- break;
+ mp_opt->suboptions |= OPTION_MPTCP_CSUMREQD;
+
+ mp_opt->deny_join_id0 = !!(flags & MPTCP_CAP_DENY_JOIN_ID0);
- mp_opt->mp_capable = 1;
+ mp_opt->suboptions |= OPTIONS_MPTCP_MPC;
if (opsize >= TCPOLEN_MPTCP_MPC_SYNACK) {
mp_opt->sndr_key = get_unaligned_be64(ptr);
ptr += 8;
@@ -86,25 +94,30 @@ static void mptcp_parse_option(const struct sk_buff *skb,
mp_opt->rcvr_key = get_unaligned_be64(ptr);
ptr += 8;
}
- if (opsize == TCPOLEN_MPTCP_MPC_ACK_DATA) {
+ if (opsize >= TCPOLEN_MPTCP_MPC_ACK_DATA) {
/* Section 3.1.:
* "the data parameters in a MP_CAPABLE are semantically
* equivalent to those in a DSS option and can be used
* interchangeably."
*/
- mp_opt->dss = 1;
+ mp_opt->suboptions |= OPTION_MPTCP_DSS;
mp_opt->use_map = 1;
mp_opt->mpc_map = 1;
mp_opt->data_len = get_unaligned_be16(ptr);
ptr += 2;
}
- pr_debug("MP_CAPABLE version=%x, flags=%x, optlen=%d sndr=%llu, rcvr=%llu len=%d",
+ if (opsize == TCPOLEN_MPTCP_MPC_ACK_DATA_CSUM) {
+ mp_opt->csum = (__force __sum16)get_unaligned_be16(ptr);
+ mp_opt->suboptions |= OPTION_MPTCP_CSUMREQD;
+ ptr += 2;
+ }
+ pr_debug("MP_CAPABLE version=%x, flags=%x, optlen=%d sndr=%llu, rcvr=%llu len=%d csum=%u",
version, flags, opsize, mp_opt->sndr_key,
- mp_opt->rcvr_key, mp_opt->data_len);
+ mp_opt->rcvr_key, mp_opt->data_len, mp_opt->csum);
break;
case MPTCPOPT_MP_JOIN:
- mp_opt->mp_join = 1;
+ mp_opt->suboptions |= OPTIONS_MPTCP_MPJ;
if (opsize == TCPOLEN_MPTCP_MPJ_SYN) {
mp_opt->backup = *ptr++ & MPTCPOPT_BACKUP;
mp_opt->join_id = *ptr++;
@@ -130,7 +143,7 @@ static void mptcp_parse_option(const struct sk_buff *skb,
memcpy(mp_opt->hmac, ptr, MPTCPOPT_HMAC_LEN);
pr_debug("MP_JOIN hmac");
} else {
- mp_opt->mp_join = 0;
+ mp_opt->suboptions &= ~OPTIONS_MPTCP_MPJ;
}
break;
@@ -171,17 +184,14 @@ static void mptcp_parse_option(const struct sk_buff *skb,
expected_opsize += TCPOLEN_MPTCP_DSS_MAP32;
}
- /* RFC 6824, Section 3.3:
- * If a checksum is present, but its use had
- * not been negotiated in the MP_CAPABLE handshake,
- * the checksum field MUST be ignored.
+ /* Always parse any csum presence combination, we will enforce
+ * RFC 8684 Section 3.3.0 checks later in subflow_data_ready
*/
if (opsize != expected_opsize &&
opsize != expected_opsize + TCPOLEN_MPTCP_DSS_CHECKSUM)
break;
- mp_opt->dss = 1;
-
+ mp_opt->suboptions |= OPTION_MPTCP_DSS;
if (mp_opt->use_ack) {
if (mp_opt->ack64) {
mp_opt->data_ack = get_unaligned_be64(ptr);
@@ -209,9 +219,16 @@ static void mptcp_parse_option(const struct sk_buff *skb,
mp_opt->data_len = get_unaligned_be16(ptr);
ptr += 2;
- pr_debug("data_seq=%llu subflow_seq=%u data_len=%u",
+ if (opsize == expected_opsize + TCPOLEN_MPTCP_DSS_CHECKSUM) {
+ mp_opt->suboptions |= OPTION_MPTCP_CSUMREQD;
+ mp_opt->csum = (__force __sum16)get_unaligned_be16(ptr);
+ ptr += 2;
+ }
+
+ pr_debug("data_seq=%llu subflow_seq=%u data_len=%u csum=%d:%u",
mp_opt->data_seq, mp_opt->subflow_seq,
- mp_opt->data_len);
+ mp_opt->data_len, !!(mp_opt->suboptions & OPTION_MPTCP_CSUMREQD),
+ mp_opt->csum);
}
break;
@@ -242,8 +259,10 @@ static void mptcp_parse_option(const struct sk_buff *skb,
break;
}
- mp_opt->add_addr = 1;
+ mp_opt->suboptions |= OPTION_MPTCP_ADD_ADDR;
mp_opt->addr.id = *ptr++;
+ mp_opt->addr.port = 0;
+ mp_opt->ahmac = 0;
if (mp_opt->addr.family == AF_INET) {
memcpy((u8 *)&mp_opt->addr.addr.s_addr, (u8 *)ptr, 4);
ptr += 4;
@@ -280,7 +299,7 @@ static void mptcp_parse_option(const struct sk_buff *skb,
ptr++;
- mp_opt->rm_addr = 1;
+ mp_opt->suboptions |= OPTION_MPTCP_RM_ADDR;
mp_opt->rm_list.nr = opsize - TCPOLEN_MPTCP_RM_ADDR_BASE;
for (i = 0; i < mp_opt->rm_list.nr; i++)
mp_opt->rm_list.ids[i] = *ptr++;
@@ -291,7 +310,7 @@ static void mptcp_parse_option(const struct sk_buff *skb,
if (opsize != TCPOLEN_MPTCP_PRIO)
break;
- mp_opt->mp_prio = 1;
+ mp_opt->suboptions |= OPTION_MPTCP_PRIO;
mp_opt->backup = *ptr++ & MPTCP_PRIO_BKUP;
pr_debug("MP_PRIO: prio=%d", mp_opt->backup);
break;
@@ -303,7 +322,7 @@ static void mptcp_parse_option(const struct sk_buff *skb,
ptr += 2;
mp_opt->rcvr_key = get_unaligned_be64(ptr);
ptr += 8;
- mp_opt->fastclose = 1;
+ mp_opt->suboptions |= OPTION_MPTCP_FASTCLOSE;
break;
case MPTCPOPT_RST:
@@ -312,18 +331,30 @@ static void mptcp_parse_option(const struct sk_buff *skb,
if (!(TCP_SKB_CB(skb)->tcp_flags & TCPHDR_RST))
break;
- mp_opt->reset = 1;
+
+ mp_opt->suboptions |= OPTION_MPTCP_RST;
flags = *ptr++;
mp_opt->reset_transient = flags & MPTCP_RST_TRANSIENT;
mp_opt->reset_reason = *ptr;
break;
+ case MPTCPOPT_MP_FAIL:
+ if (opsize != TCPOLEN_MPTCP_FAIL)
+ break;
+
+ ptr += 2;
+ mp_opt->suboptions |= OPTION_MPTCP_FAIL;
+ mp_opt->fail_seq = get_unaligned_be64(ptr);
+ pr_debug("MP_FAIL: data_seq=%llu", mp_opt->fail_seq);
+ break;
+
default:
break;
}
}
-void mptcp_get_options(const struct sk_buff *skb,
+void mptcp_get_options(const struct sock *sk,
+ const struct sk_buff *skb,
struct mptcp_options_received *mp_opt)
{
const struct tcphdr *th = tcp_hdr(skb);
@@ -331,16 +362,7 @@ void mptcp_get_options(const struct sk_buff *skb,
int length;
/* initialize option status */
- mp_opt->mp_capable = 0;
- mp_opt->mp_join = 0;
- mp_opt->add_addr = 0;
- mp_opt->ahmac = 0;
- mp_opt->fastclose = 0;
- mp_opt->addr.port = 0;
- mp_opt->rm_addr = 0;
- mp_opt->dss = 0;
- mp_opt->mp_prio = 0;
- mp_opt->reset = 0;
+ mp_opt->suboptions = 0;
length = (th->doff * 4) - sizeof(struct tcphdr);
ptr = (const unsigned char *)(th + 1);
@@ -382,6 +404,8 @@ bool mptcp_syn_options(struct sock *sk, const struct sk_buff *skb,
subflow->snd_isn = TCP_SKB_CB(skb)->end_seq;
if (subflow->request_mptcp) {
opts->suboptions = OPTION_MPTCP_MPC_SYN;
+ opts->csum_reqd = mptcp_is_checksum_enabled(sock_net(sk));
+ opts->allow_join_id0 = mptcp_allow_join_id0(sock_net(sk));
*size = TCPOLEN_MPTCP_MPC_SYN;
return true;
} else if (subflow->request_join) {
@@ -437,8 +461,10 @@ static bool mptcp_established_options_mp(struct sock *sk, struct sk_buff *skb,
struct mptcp_out_options *opts)
{
struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk);
+ struct mptcp_sock *msk = mptcp_sk(subflow->conn);
struct mptcp_ext *mpext;
unsigned int data_len;
+ u8 len;
/* When skb is not available, we better over-estimate the emitted
* options len. A full DSS option (28 bytes) is longer than
@@ -467,16 +493,27 @@ static bool mptcp_established_options_mp(struct sock *sk, struct sk_buff *skb,
opts->suboptions = OPTION_MPTCP_MPC_ACK;
opts->sndr_key = subflow->local_key;
opts->rcvr_key = subflow->remote_key;
+ opts->csum_reqd = READ_ONCE(msk->csum_enabled);
+ opts->allow_join_id0 = mptcp_allow_join_id0(sock_net(sk));
/* Section 3.1.
* The MP_CAPABLE option is carried on the SYN, SYN/ACK, and ACK
* packets that start the first subflow of an MPTCP connection,
* as well as the first packet that carries data
*/
- if (data_len > 0)
- *size = ALIGN(TCPOLEN_MPTCP_MPC_ACK_DATA, 4);
- else
+ if (data_len > 0) {
+ len = TCPOLEN_MPTCP_MPC_ACK_DATA;
+ if (opts->csum_reqd) {
+ /* we need to propagate more info to csum the pseudo hdr */
+ opts->ext_copy.data_seq = mpext->data_seq;
+ opts->ext_copy.subflow_seq = mpext->subflow_seq;
+ opts->ext_copy.csum = mpext->csum;
+ len += TCPOLEN_MPTCP_DSS_CHECKSUM;
+ }
+ *size = ALIGN(len, 4);
+ } else {
*size = TCPOLEN_MPTCP_MPC_ACK;
+ }
pr_debug("subflow=%p, local_key=%llu, remote_key=%llu map_len=%d",
subflow, subflow->local_key, subflow->remote_key,
@@ -537,20 +574,24 @@ static bool mptcp_established_options_dss(struct sock *sk, struct sk_buff *skb,
bool ret = false;
u64 ack_seq;
+ opts->csum_reqd = READ_ONCE(msk->csum_enabled);
mpext = skb ? mptcp_get_ext(skb) : NULL;
if (!skb || (mpext && mpext->use_map) || snd_data_fin_enable) {
- unsigned int map_size;
+ unsigned int map_size = TCPOLEN_MPTCP_DSS_BASE + TCPOLEN_MPTCP_DSS_MAP64;
- map_size = TCPOLEN_MPTCP_DSS_BASE + TCPOLEN_MPTCP_DSS_MAP64;
+ if (mpext) {
+ if (opts->csum_reqd)
+ map_size += TCPOLEN_MPTCP_DSS_CHECKSUM;
- remaining -= map_size;
- dss_size = map_size;
- if (mpext)
opts->ext_copy = *mpext;
+ }
+ remaining -= map_size;
+ dss_size = map_size;
if (skb && snd_data_fin_enable)
mptcp_write_data_fin(subflow, skb, &opts->ext_copy);
+ opts->suboptions = OPTION_MPTCP_DSS;
ret = true;
}
@@ -574,6 +615,7 @@ static bool mptcp_established_options_dss(struct sock *sk, struct sk_buff *skb,
opts->ext_copy.ack64 = 0;
}
opts->ext_copy.use_ack = 1;
+ opts->suboptions = OPTION_MPTCP_DSS;
WRITE_ONCE(msk->old_wspace, __mptcp_space((struct sock *)msk));
/* Add kind/length/subtype/flag overhead if mapping is not populated */
@@ -626,29 +668,34 @@ static bool mptcp_established_options_add_addr(struct sock *sk, struct sk_buff *
bool port;
int len;
- if ((mptcp_pm_should_add_signal_ipv6(msk) ||
- mptcp_pm_should_add_signal_port(msk) ||
- mptcp_pm_should_add_signal_echo(msk)) &&
- skb && skb_is_tcp_pure_ack(skb)) {
- pr_debug("drop other suboptions");
- opts->suboptions = 0;
- opts->ext_copy.use_ack = 0;
- opts->ext_copy.use_map = 0;
- remaining += opt_size;
- drop_other_suboptions = true;
- }
-
+ /* add addr will strip the existing options, be sure to avoid breaking
+ * MPC/MPJ handshakes
+ */
if (!mptcp_pm_should_add_signal(msk) ||
- !(mptcp_pm_add_addr_signal(msk, remaining, &opts->addr, &echo, &port)))
+ (opts->suboptions & (OPTION_MPTCP_MPJ_ACK | OPTION_MPTCP_MPC_ACK)) ||
+ !mptcp_pm_add_addr_signal(msk, skb, opt_size, remaining, &opts->addr,
+ &echo, &port, &drop_other_suboptions))
return false;
+ if (drop_other_suboptions)
+ remaining += opt_size;
len = mptcp_add_addr_len(opts->addr.family, echo, port);
if (remaining < len)
return false;
*size = len;
- if (drop_other_suboptions)
+ if (drop_other_suboptions) {
+ pr_debug("drop other suboptions");
+ opts->suboptions = 0;
+
+ /* note that e.g. DSS could have written into the memory
+ * aliased by ahmac, we must reset the field here
+ * to avoid appending the hmac even for ADD_ADDR echo
+ * options
+ */
+ opts->ahmac = 0;
*size -= opt_size;
+ }
opts->suboptions |= OPTION_MPTCP_ADD_ADDR;
if (!echo) {
opts->ahmac = add_addr_generate_hmac(msk->local_key,
@@ -698,7 +745,12 @@ static bool mptcp_established_options_mp_prio(struct sock *sk,
{
struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk);
- if (!subflow->send_mp_prio)
+ /* can't send MP_PRIO with MPC, as they share the same option space:
+ * 'backup'. Also it makes no sense at all
+ */
+ if (!subflow->send_mp_prio ||
+ ((OPTION_MPTCP_MPC_SYN | OPTION_MPTCP_MPC_SYNACK |
+ OPTION_MPTCP_MPC_ACK) & opts->suboptions))
return false;
/* account for the trailing 'nop' option */
@@ -714,7 +766,7 @@ static bool mptcp_established_options_mp_prio(struct sock *sk,
return true;
}
-static noinline void mptcp_established_options_rst(struct sock *sk, struct sk_buff *skb,
+static noinline bool mptcp_established_options_rst(struct sock *sk, struct sk_buff *skb,
unsigned int *size,
unsigned int remaining,
struct mptcp_out_options *opts)
@@ -722,12 +774,36 @@ static noinline void mptcp_established_options_rst(struct sock *sk, struct sk_bu
const struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk);
if (remaining < TCPOLEN_MPTCP_RST)
- return;
+ return false;
*size = TCPOLEN_MPTCP_RST;
opts->suboptions |= OPTION_MPTCP_RST;
opts->reset_transient = subflow->reset_transient;
opts->reset_reason = subflow->reset_reason;
+
+ return true;
+}
+
+static bool mptcp_established_options_mp_fail(struct sock *sk,
+ unsigned int *size,
+ unsigned int remaining,
+ struct mptcp_out_options *opts)
+{
+ struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk);
+
+ if (likely(!subflow->send_mp_fail))
+ return false;
+
+ if (remaining < TCPOLEN_MPTCP_FAIL)
+ return false;
+
+ *size = TCPOLEN_MPTCP_FAIL;
+ opts->suboptions |= OPTION_MPTCP_FAIL;
+ opts->fail_seq = subflow->map_seq;
+
+ pr_debug("MP_FAIL fail_seq=%llu", opts->fail_seq);
+
+ return true;
}
bool mptcp_established_options(struct sock *sk, struct sk_buff *skb,
@@ -746,15 +822,28 @@ bool mptcp_established_options(struct sock *sk, struct sk_buff *skb,
return false;
if (unlikely(skb && TCP_SKB_CB(skb)->tcp_flags & TCPHDR_RST)) {
- mptcp_established_options_rst(sk, skb, size, remaining, opts);
+ if (mptcp_established_options_mp_fail(sk, &opt_size, remaining, opts)) {
+ *size += opt_size;
+ remaining -= opt_size;
+ }
+ if (mptcp_established_options_rst(sk, skb, &opt_size, remaining, opts)) {
+ *size += opt_size;
+ remaining -= opt_size;
+ }
return true;
}
snd_data_fin = mptcp_data_fin_enabled(msk);
if (mptcp_established_options_mp(sk, skb, snd_data_fin, &opt_size, remaining, opts))
ret = true;
- else if (mptcp_established_options_dss(sk, skb, snd_data_fin, &opt_size, remaining, opts))
+ else if (mptcp_established_options_dss(sk, skb, snd_data_fin, &opt_size, remaining, opts)) {
ret = true;
+ if (mptcp_established_options_mp_fail(sk, &opt_size, remaining, opts)) {
+ *size += opt_size;
+ remaining -= opt_size;
+ return true;
+ }
+ }
/* we reserved enough space for the above options, and exceeding the
* TCP option space would be fatal
@@ -791,6 +880,8 @@ bool mptcp_synack_options(const struct request_sock *req, unsigned int *size,
if (subflow_req->mp_capable) {
opts->suboptions = OPTION_MPTCP_MPC_SYNACK;
opts->sndr_key = subflow_req->local_key;
+ opts->csum_reqd = subflow_req->csum_reqd;
+ opts->allow_join_id0 = subflow_req->allow_join_id0;
*size = TCPOLEN_MPTCP_MPC_SYNACK;
pr_debug("subflow_req=%p, local_key=%llu",
subflow_req, subflow_req->local_key);
@@ -825,7 +916,7 @@ static bool check_fully_established(struct mptcp_sock *msk, struct sock *ssk,
*/
if (TCP_SKB_CB(skb)->seq == subflow->ssn_offset + 1 &&
TCP_SKB_CB(skb)->end_seq == TCP_SKB_CB(skb)->seq &&
- subflow->mp_join && mp_opt->mp_join &&
+ subflow->mp_join && (mp_opt->suboptions & OPTIONS_MPTCP_MPJ) &&
READ_ONCE(msk->pm.server_side))
tcp_send_ack(ssk);
goto fully_established;
@@ -842,25 +933,21 @@ static bool check_fully_established(struct mptcp_sock *msk, struct sock *ssk,
return subflow->mp_capable;
}
- if (mp_opt->dss && mp_opt->use_ack) {
+ if (((mp_opt->suboptions & OPTION_MPTCP_DSS) && mp_opt->use_ack) ||
+ ((mp_opt->suboptions & OPTION_MPTCP_ADD_ADDR) && !mp_opt->echo)) {
/* subflows are fully established as soon as we get any
- * additional ack.
+ * additional ack, including ADD_ADDR.
*/
subflow->fully_established = 1;
WRITE_ONCE(msk->fully_established, true);
goto fully_established;
}
- if (mp_opt->add_addr) {
- WRITE_ONCE(msk->fully_established, true);
- return true;
- }
-
/* If the first established packet does not contain MP_CAPABLE + data
* then fallback to TCP. Fallback scenarios requires a reset for
* MP_JOIN subflows.
*/
- if (!mp_opt->mp_capable) {
+ if (!(mp_opt->suboptions & OPTIONS_MPTCP_MPC)) {
if (subflow->mp_join)
goto reset;
subflow->mp_capable = 0;
@@ -869,6 +956,9 @@ static bool check_fully_established(struct mptcp_sock *msk, struct sock *ssk,
return false;
}
+ if (mp_opt->deny_join_id0)
+ WRITE_ONCE(msk->pm.remote_deny_join_id0, true);
+
if (unlikely(!READ_ONCE(msk->pm.server_side)))
pr_warn_once("bogus mpc option on established client sk");
mptcp_subflow_fully_established(subflow, mp_opt);
@@ -896,19 +986,20 @@ reset:
return false;
}
-static u64 expand_ack(u64 old_ack, u64 cur_ack, bool use_64bit)
+u64 __mptcp_expand_seq(u64 old_seq, u64 cur_seq)
{
- u32 old_ack32, cur_ack32;
-
- if (use_64bit)
- return cur_ack;
-
- old_ack32 = (u32)old_ack;
- cur_ack32 = (u32)cur_ack;
- cur_ack = (old_ack & GENMASK_ULL(63, 32)) + cur_ack32;
- if (unlikely(before(cur_ack32, old_ack32)))
- return cur_ack + (1LL << 32);
- return cur_ack;
+ u32 old_seq32, cur_seq32;
+
+ old_seq32 = (u32)old_seq;
+ cur_seq32 = (u32)cur_seq;
+ cur_seq = (old_seq & GENMASK_ULL(63, 32)) + cur_seq32;
+ if (unlikely(cur_seq32 < old_seq32 && before(old_seq32, cur_seq32)))
+ return cur_seq + (1LL << 32);
+
+ /* reverse wrap could happen, too */
+ if (unlikely(cur_seq32 > old_seq32 && after(old_seq32, cur_seq32)))
+ return cur_seq - (1LL << 32);
+ return cur_seq;
}
static void ack_update_msk(struct mptcp_sock *msk,
@@ -926,11 +1017,13 @@ static void ack_update_msk(struct mptcp_sock *msk,
* more dangerous than missing an ack
*/
old_snd_una = msk->snd_una;
- new_snd_una = expand_ack(old_snd_una, mp_opt->data_ack, mp_opt->ack64);
+ new_snd_una = mptcp_expand_seq(old_snd_una, mp_opt->data_ack, mp_opt->ack64);
- /* ACK for data not even sent yet? Ignore. */
- if (after64(new_snd_una, snd_nxt))
- new_snd_una = old_snd_una;
+ /* ACK for data not even sent yet and even above recovery bound? Ignore.*/
+ if (unlikely(after64(new_snd_una, snd_nxt))) {
+ if (!msk->recovery || after64(new_snd_una, msk->recovery_snd_nxt))
+ new_snd_una = old_snd_una;
+ }
new_wnd_end = new_snd_una + tcp_sk(ssk)->snd_wnd;
@@ -963,7 +1056,7 @@ bool mptcp_update_rcv_data_fin(struct mptcp_sock *msk, u64 data_fin_seq, bool us
return false;
WRITE_ONCE(msk->rcv_data_fin_seq,
- expand_ack(READ_ONCE(msk->ack_seq), data_fin_seq, use_64bit));
+ mptcp_expand_seq(READ_ONCE(msk->ack_seq), data_fin_seq, use_64bit));
WRITE_ONCE(msk->rcv_data_fin, 1);
return true;
@@ -988,7 +1081,8 @@ static bool add_addr_hmac_valid(struct mptcp_sock *msk,
return hmac == mp_opt->ahmac;
}
-void mptcp_incoming_options(struct sock *sk, struct sk_buff *skb)
+/* Return false if a subflow has been reset, else return true */
+bool mptcp_incoming_options(struct sock *sk, struct sk_buff *skb)
{
struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk);
struct mptcp_sock *msk = mptcp_sk(subflow->conn);
@@ -1006,55 +1100,62 @@ void mptcp_incoming_options(struct sock *sk, struct sk_buff *skb)
__mptcp_check_push(subflow->conn, sk);
__mptcp_data_acked(subflow->conn);
mptcp_data_unlock(subflow->conn);
- return;
+ return true;
}
- mptcp_get_options(skb, &mp_opt);
+ mptcp_get_options(sk, skb, &mp_opt);
+
+ /* The subflow can be in close state only if check_fully_established()
+ * just sent a reset. If so, tell the caller to ignore the current packet.
+ */
if (!check_fully_established(msk, sk, subflow, skb, &mp_opt))
- return;
+ return sk->sk_state != TCP_CLOSE;
- if (mp_opt.fastclose &&
- msk->local_key == mp_opt.rcvr_key) {
- WRITE_ONCE(msk->rcv_fastclose, true);
- mptcp_schedule_work((struct sock *)msk);
- }
+ if (unlikely(mp_opt.suboptions != OPTION_MPTCP_DSS)) {
+ if ((mp_opt.suboptions & OPTION_MPTCP_FASTCLOSE) &&
+ msk->local_key == mp_opt.rcvr_key) {
+ WRITE_ONCE(msk->rcv_fastclose, true);
+ mptcp_schedule_work((struct sock *)msk);
+ }
- if (mp_opt.add_addr && add_addr_hmac_valid(msk, &mp_opt)) {
- if (!mp_opt.echo) {
- mptcp_pm_add_addr_received(msk, &mp_opt.addr);
- MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_ADDADDR);
- } else {
- mptcp_pm_add_addr_echoed(msk, &mp_opt.addr);
- mptcp_pm_del_add_timer(msk, &mp_opt.addr, true);
- MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_ECHOADD);
+ if ((mp_opt.suboptions & OPTION_MPTCP_ADD_ADDR) &&
+ add_addr_hmac_valid(msk, &mp_opt)) {
+ if (!mp_opt.echo) {
+ mptcp_pm_add_addr_received(msk, &mp_opt.addr);
+ MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_ADDADDR);
+ } else {
+ mptcp_pm_add_addr_echoed(msk, &mp_opt.addr);
+ mptcp_pm_del_add_timer(msk, &mp_opt.addr, true);
+ MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_ECHOADD);
+ }
+
+ if (mp_opt.addr.port)
+ MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_PORTADD);
}
- if (mp_opt.addr.port)
- MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_PORTADD);
+ if (mp_opt.suboptions & OPTION_MPTCP_RM_ADDR)
+ mptcp_pm_rm_addr_received(msk, &mp_opt.rm_list);
- mp_opt.add_addr = 0;
- }
+ if (mp_opt.suboptions & OPTION_MPTCP_PRIO) {
+ mptcp_pm_mp_prio_received(sk, mp_opt.backup);
+ MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_MPPRIORX);
+ }
- if (mp_opt.rm_addr) {
- mptcp_pm_rm_addr_received(msk, &mp_opt.rm_list);
- mp_opt.rm_addr = 0;
- }
+ if (mp_opt.suboptions & OPTION_MPTCP_FAIL) {
+ mptcp_pm_mp_fail_received(sk, mp_opt.fail_seq);
+ MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_MPFAILRX);
+ }
- if (mp_opt.mp_prio) {
- mptcp_pm_mp_prio_received(sk, mp_opt.backup);
- MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_MPPRIORX);
- mp_opt.mp_prio = 0;
- }
+ if (mp_opt.suboptions & OPTION_MPTCP_RST) {
+ subflow->reset_seen = 1;
+ subflow->reset_reason = mp_opt.reset_reason;
+ subflow->reset_transient = mp_opt.reset_transient;
+ }
- if (mp_opt.reset) {
- subflow->reset_seen = 1;
- subflow->reset_reason = mp_opt.reset_reason;
- subflow->reset_transient = mp_opt.reset_transient;
+ if (!(mp_opt.suboptions & OPTION_MPTCP_DSS))
+ return true;
}
- if (!mp_opt.dss)
- return;
-
/* we can't wait for recvmsg() to update the ack_seq, otherwise
* monodirectional flows will stuck
*/
@@ -1072,16 +1173,16 @@ void mptcp_incoming_options(struct sock *sk, struct sk_buff *skb)
schedule_work(&msk->work))
sock_hold(subflow->conn);
- return;
+ return true;
}
mpext = skb_ext_add(skb, SKB_EXT_MPTCP);
if (!mpext)
- return;
+ return true;
memset(mpext, 0, sizeof(*mpext));
- if (mp_opt.use_map) {
+ if (likely(mp_opt.use_map)) {
if (mp_opt.mpc_map) {
/* this is an MP_CAPABLE carrying MPTCP data
* we know this map the first chunk of data
@@ -1101,7 +1202,13 @@ void mptcp_incoming_options(struct sock *sk, struct sk_buff *skb)
}
mpext->data_len = mp_opt.data_len;
mpext->use_map = 1;
+ mpext->csum_reqd = !!(mp_opt.suboptions & OPTION_MPTCP_CSUMREQD);
+
+ if (mpext->csum_reqd)
+ mpext->csum = mp_opt.csum;
}
+
+ return true;
}
static void mptcp_set_rwin(const struct tcp_sock *tp)
@@ -1120,25 +1227,133 @@ static void mptcp_set_rwin(const struct tcp_sock *tp)
WRITE_ONCE(msk->rcv_wnd_sent, ack_seq);
}
+static u16 mptcp_make_csum(const struct mptcp_ext *mpext)
+{
+ struct csum_pseudo_header header;
+ __wsum csum;
+
+ /* cfr RFC 8684 3.3.1.:
+ * the data sequence number used in the pseudo-header is
+ * always the 64-bit value, irrespective of what length is used in the
+ * DSS option itself.
+ */
+ header.data_seq = cpu_to_be64(mpext->data_seq);
+ header.subflow_seq = htonl(mpext->subflow_seq);
+ header.data_len = htons(mpext->data_len);
+ header.csum = 0;
+
+ csum = csum_partial(&header, sizeof(header), ~csum_unfold(mpext->csum));
+ return (__force u16)csum_fold(csum);
+}
+
void mptcp_write_options(__be32 *ptr, const struct tcp_sock *tp,
struct mptcp_out_options *opts)
{
- if ((OPTION_MPTCP_MPC_SYN | OPTION_MPTCP_MPC_SYNACK |
- OPTION_MPTCP_MPC_ACK) & opts->suboptions) {
- u8 len;
+ if (unlikely(OPTION_MPTCP_FAIL & opts->suboptions)) {
+ const struct sock *ssk = (const struct sock *)tp;
+ struct mptcp_subflow_context *subflow;
+
+ subflow = mptcp_subflow_ctx(ssk);
+ subflow->send_mp_fail = 0;
+
+ *ptr++ = mptcp_option(MPTCPOPT_MP_FAIL,
+ TCPOLEN_MPTCP_FAIL,
+ 0, 0);
+ put_unaligned_be64(opts->fail_seq, ptr);
+ ptr += 2;
+ }
+
+ /* RST is mutually exclusive with everything else */
+ if (unlikely(OPTION_MPTCP_RST & opts->suboptions)) {
+ *ptr++ = mptcp_option(MPTCPOPT_RST,
+ TCPOLEN_MPTCP_RST,
+ opts->reset_transient,
+ opts->reset_reason);
+ return;
+ }
+
+ /* DSS, MPC, MPJ and ADD_ADDR are mutually exclusive, see
+ * mptcp_established_options*()
+ */
+ if (likely(OPTION_MPTCP_DSS & opts->suboptions)) {
+ struct mptcp_ext *mpext = &opts->ext_copy;
+ u8 len = TCPOLEN_MPTCP_DSS_BASE;
+ u8 flags = 0;
+
+ if (mpext->use_ack) {
+ flags = MPTCP_DSS_HAS_ACK;
+ if (mpext->ack64) {
+ len += TCPOLEN_MPTCP_DSS_ACK64;
+ flags |= MPTCP_DSS_ACK64;
+ } else {
+ len += TCPOLEN_MPTCP_DSS_ACK32;
+ }
+ }
+
+ if (mpext->use_map) {
+ len += TCPOLEN_MPTCP_DSS_MAP64;
+
+ /* Use only 64-bit mapping flags for now, add
+ * support for optional 32-bit mappings later.
+ */
+ flags |= MPTCP_DSS_HAS_MAP | MPTCP_DSS_DSN64;
+ if (mpext->data_fin)
+ flags |= MPTCP_DSS_DATA_FIN;
+
+ if (opts->csum_reqd)
+ len += TCPOLEN_MPTCP_DSS_CHECKSUM;
+ }
+
+ *ptr++ = mptcp_option(MPTCPOPT_DSS, len, 0, flags);
- if (OPTION_MPTCP_MPC_SYN & opts->suboptions)
+ if (mpext->use_ack) {
+ if (mpext->ack64) {
+ put_unaligned_be64(mpext->data_ack, ptr);
+ ptr += 2;
+ } else {
+ put_unaligned_be32(mpext->data_ack32, ptr);
+ ptr += 1;
+ }
+ }
+
+ if (mpext->use_map) {
+ put_unaligned_be64(mpext->data_seq, ptr);
+ ptr += 2;
+ put_unaligned_be32(mpext->subflow_seq, ptr);
+ ptr += 1;
+ if (opts->csum_reqd) {
+ put_unaligned_be32(mpext->data_len << 16 |
+ mptcp_make_csum(mpext), ptr);
+ } else {
+ put_unaligned_be32(mpext->data_len << 16 |
+ TCPOPT_NOP << 8 | TCPOPT_NOP, ptr);
+ }
+ }
+ } else if ((OPTION_MPTCP_MPC_SYN | OPTION_MPTCP_MPC_SYNACK |
+ OPTION_MPTCP_MPC_ACK) & opts->suboptions) {
+ u8 len, flag = MPTCP_CAP_HMAC_SHA256;
+
+ if (OPTION_MPTCP_MPC_SYN & opts->suboptions) {
len = TCPOLEN_MPTCP_MPC_SYN;
- else if (OPTION_MPTCP_MPC_SYNACK & opts->suboptions)
+ } else if (OPTION_MPTCP_MPC_SYNACK & opts->suboptions) {
len = TCPOLEN_MPTCP_MPC_SYNACK;
- else if (opts->ext_copy.data_len)
+ } else if (opts->ext_copy.data_len) {
len = TCPOLEN_MPTCP_MPC_ACK_DATA;
- else
+ if (opts->csum_reqd)
+ len += TCPOLEN_MPTCP_DSS_CHECKSUM;
+ } else {
len = TCPOLEN_MPTCP_MPC_ACK;
+ }
+
+ if (opts->csum_reqd)
+ flag |= MPTCP_CAP_CHECKSUM_REQD;
+
+ if (!opts->allow_join_id0)
+ flag |= MPTCP_CAP_DENY_JOIN_ID0;
*ptr++ = mptcp_option(MPTCPOPT_MP_CAPABLE, len,
MPTCP_SUPPORTED_VERSION,
- MPTCP_CAP_HMAC_SHA256);
+ flag);
if (!((OPTION_MPTCP_MPC_SYNACK | OPTION_MPTCP_MPC_ACK) &
opts->suboptions))
@@ -1154,13 +1369,39 @@ void mptcp_write_options(__be32 *ptr, const struct tcp_sock *tp,
if (!opts->ext_copy.data_len)
goto mp_capable_done;
- put_unaligned_be32(opts->ext_copy.data_len << 16 |
- TCPOPT_NOP << 8 | TCPOPT_NOP, ptr);
+ if (opts->csum_reqd) {
+ put_unaligned_be32(opts->ext_copy.data_len << 16 |
+ mptcp_make_csum(&opts->ext_copy), ptr);
+ } else {
+ put_unaligned_be32(opts->ext_copy.data_len << 16 |
+ TCPOPT_NOP << 8 | TCPOPT_NOP, ptr);
+ }
ptr += 1;
- }
-mp_capable_done:
- if (OPTION_MPTCP_ADD_ADDR & opts->suboptions) {
+ /* MPC is additionally mutually exclusive with MP_PRIO */
+ goto mp_capable_done;
+ } else if (OPTION_MPTCP_MPJ_SYN & opts->suboptions) {
+ *ptr++ = mptcp_option(MPTCPOPT_MP_JOIN,
+ TCPOLEN_MPTCP_MPJ_SYN,
+ opts->backup, opts->join_id);
+ put_unaligned_be32(opts->token, ptr);
+ ptr += 1;
+ put_unaligned_be32(opts->nonce, ptr);
+ ptr += 1;
+ } else if (OPTION_MPTCP_MPJ_SYNACK & opts->suboptions) {
+ *ptr++ = mptcp_option(MPTCPOPT_MP_JOIN,
+ TCPOLEN_MPTCP_MPJ_SYNACK,
+ opts->backup, opts->join_id);
+ put_unaligned_be64(opts->thmac, ptr);
+ ptr += 2;
+ put_unaligned_be32(opts->nonce, ptr);
+ ptr += 1;
+ } else if (OPTION_MPTCP_MPJ_ACK & opts->suboptions) {
+ *ptr++ = mptcp_option(MPTCPOPT_MP_JOIN,
+ TCPOLEN_MPTCP_MPJ_ACK, 0, 0);
+ memcpy(ptr, opts->hmac, MPTCPOPT_HMAC_LEN);
+ ptr += 5;
+ } else if (OPTION_MPTCP_ADD_ADDR & opts->suboptions) {
u8 len = TCPOLEN_MPTCP_ADD_ADDR_BASE;
u8 echo = MPTCP_ADDR_ECHO;
@@ -1218,6 +1459,19 @@ mp_capable_done:
}
}
+ if (OPTION_MPTCP_PRIO & opts->suboptions) {
+ const struct sock *ssk = (const struct sock *)tp;
+ struct mptcp_subflow_context *subflow;
+
+ subflow = mptcp_subflow_ctx(ssk);
+ subflow->send_mp_prio = 0;
+
+ *ptr++ = mptcp_option(MPTCPOPT_MP_PRIO,
+ TCPOLEN_MPTCP_PRIO,
+ opts->backup, TCPOPT_NOP);
+ }
+
+mp_capable_done:
if (OPTION_MPTCP_RM_ADDR & opts->suboptions) {
u8 i = 1;
@@ -1238,99 +1492,6 @@ mp_capable_done:
}
}
- if (OPTION_MPTCP_PRIO & opts->suboptions) {
- const struct sock *ssk = (const struct sock *)tp;
- struct mptcp_subflow_context *subflow;
-
- subflow = mptcp_subflow_ctx(ssk);
- subflow->send_mp_prio = 0;
-
- *ptr++ = mptcp_option(MPTCPOPT_MP_PRIO,
- TCPOLEN_MPTCP_PRIO,
- opts->backup, TCPOPT_NOP);
- }
-
- if (OPTION_MPTCP_MPJ_SYN & opts->suboptions) {
- *ptr++ = mptcp_option(MPTCPOPT_MP_JOIN,
- TCPOLEN_MPTCP_MPJ_SYN,
- opts->backup, opts->join_id);
- put_unaligned_be32(opts->token, ptr);
- ptr += 1;
- put_unaligned_be32(opts->nonce, ptr);
- ptr += 1;
- }
-
- if (OPTION_MPTCP_MPJ_SYNACK & opts->suboptions) {
- *ptr++ = mptcp_option(MPTCPOPT_MP_JOIN,
- TCPOLEN_MPTCP_MPJ_SYNACK,
- opts->backup, opts->join_id);
- put_unaligned_be64(opts->thmac, ptr);
- ptr += 2;
- put_unaligned_be32(opts->nonce, ptr);
- ptr += 1;
- }
-
- if (OPTION_MPTCP_MPJ_ACK & opts->suboptions) {
- *ptr++ = mptcp_option(MPTCPOPT_MP_JOIN,
- TCPOLEN_MPTCP_MPJ_ACK, 0, 0);
- memcpy(ptr, opts->hmac, MPTCPOPT_HMAC_LEN);
- ptr += 5;
- }
-
- if (OPTION_MPTCP_RST & opts->suboptions)
- *ptr++ = mptcp_option(MPTCPOPT_RST,
- TCPOLEN_MPTCP_RST,
- opts->reset_transient,
- opts->reset_reason);
-
- if (opts->ext_copy.use_ack || opts->ext_copy.use_map) {
- struct mptcp_ext *mpext = &opts->ext_copy;
- u8 len = TCPOLEN_MPTCP_DSS_BASE;
- u8 flags = 0;
-
- if (mpext->use_ack) {
- flags = MPTCP_DSS_HAS_ACK;
- if (mpext->ack64) {
- len += TCPOLEN_MPTCP_DSS_ACK64;
- flags |= MPTCP_DSS_ACK64;
- } else {
- len += TCPOLEN_MPTCP_DSS_ACK32;
- }
- }
-
- if (mpext->use_map) {
- len += TCPOLEN_MPTCP_DSS_MAP64;
-
- /* Use only 64-bit mapping flags for now, add
- * support for optional 32-bit mappings later.
- */
- flags |= MPTCP_DSS_HAS_MAP | MPTCP_DSS_DSN64;
- if (mpext->data_fin)
- flags |= MPTCP_DSS_DATA_FIN;
- }
-
- *ptr++ = mptcp_option(MPTCPOPT_DSS, len, 0, flags);
-
- if (mpext->use_ack) {
- if (mpext->ack64) {
- put_unaligned_be64(mpext->data_ack, ptr);
- ptr += 2;
- } else {
- put_unaligned_be32(mpext->data_ack32, ptr);
- ptr += 1;
- }
- }
-
- if (mpext->use_map) {
- put_unaligned_be64(mpext->data_seq, ptr);
- ptr += 2;
- put_unaligned_be32(mpext->subflow_seq, ptr);
- ptr += 1;
- put_unaligned_be32(mpext->data_len << 16 |
- TCPOPT_NOP << 8 | TCPOPT_NOP, ptr);
- }
- }
-
if (tp)
mptcp_set_rwin(tp);
}
diff --git a/net/mptcp/pm.c b/net/mptcp/pm.c
index 9d00fa6d22e9..6ab386ff3294 100644
--- a/net/mptcp/pm.c
+++ b/net/mptcp/pm.c
@@ -10,6 +10,8 @@
#include <net/mptcp.h>
#include "protocol.h"
+#include "mib.h"
+
/* path manager command handlers */
int mptcp_pm_announce_addr(struct mptcp_sock *msk,
@@ -18,23 +20,23 @@ int mptcp_pm_announce_addr(struct mptcp_sock *msk,
{
u8 add_addr = READ_ONCE(msk->pm.addr_signal);
- pr_debug("msk=%p, local_id=%d", msk, addr->id);
+ pr_debug("msk=%p, local_id=%d, echo=%d", msk, addr->id, echo);
lockdep_assert_held(&msk->pm.lock);
- if (add_addr) {
- pr_warn("addr_signal error, add_addr=%d", add_addr);
+ if (add_addr &
+ (echo ? BIT(MPTCP_ADD_ADDR_ECHO) : BIT(MPTCP_ADD_ADDR_SIGNAL))) {
+ pr_warn("addr_signal error, add_addr=%d, echo=%d", add_addr, echo);
return -EINVAL;
}
- msk->pm.local = *addr;
- add_addr |= BIT(MPTCP_ADD_ADDR_SIGNAL);
- if (echo)
+ if (echo) {
+ msk->pm.remote = *addr;
add_addr |= BIT(MPTCP_ADD_ADDR_ECHO);
- if (addr->family == AF_INET6)
- add_addr |= BIT(MPTCP_ADD_ADDR_IPV6);
- if (addr->port)
- add_addr |= BIT(MPTCP_ADD_ADDR_PORT);
+ } else {
+ msk->pm.local = *addr;
+ add_addr |= BIT(MPTCP_ADD_ADDR_SIGNAL);
+ }
WRITE_ONCE(msk->pm.addr_signal, add_addr);
return 0;
}
@@ -247,12 +249,21 @@ void mptcp_pm_mp_prio_received(struct sock *sk, u8 bkup)
mptcp_event(MPTCP_EVENT_SUB_PRIORITY, mptcp_sk(subflow->conn), sk, GFP_ATOMIC);
}
+void mptcp_pm_mp_fail_received(struct sock *sk, u64 fail_seq)
+{
+ pr_debug("fail_seq=%llu", fail_seq);
+}
+
/* path manager helpers */
-bool mptcp_pm_add_addr_signal(struct mptcp_sock *msk, unsigned int remaining,
- struct mptcp_addr_info *saddr, bool *echo, bool *port)
+bool mptcp_pm_add_addr_signal(struct mptcp_sock *msk, struct sk_buff *skb,
+ unsigned int opt_size, unsigned int remaining,
+ struct mptcp_addr_info *addr, bool *echo,
+ bool *port, bool *drop_other_suboptions)
{
int ret = false;
+ u8 add_addr;
+ u8 family;
spin_lock_bh(&msk->pm.lock);
@@ -260,14 +271,30 @@ bool mptcp_pm_add_addr_signal(struct mptcp_sock *msk, unsigned int remaining,
if (!mptcp_pm_should_add_signal(msk))
goto out_unlock;
+ /* always drop every other options for pure ack ADD_ADDR; this is a
+ * plain dup-ack from TCP perspective. The other MPTCP-relevant info,
+ * if any, will be carried by the 'original' TCP ack
+ */
+ if (skb && skb_is_tcp_pure_ack(skb)) {
+ remaining += opt_size;
+ *drop_other_suboptions = true;
+ }
+
*echo = mptcp_pm_should_add_signal_echo(msk);
- *port = mptcp_pm_should_add_signal_port(msk);
+ *port = !!(*echo ? msk->pm.remote.port : msk->pm.local.port);
- if (remaining < mptcp_add_addr_len(msk->pm.local.family, *echo, *port))
+ family = *echo ? msk->pm.remote.family : msk->pm.local.family;
+ if (remaining < mptcp_add_addr_len(family, *echo, *port))
goto out_unlock;
- *saddr = msk->pm.local;
- WRITE_ONCE(msk->pm.addr_signal, 0);
+ if (*echo) {
+ *addr = msk->pm.remote;
+ add_addr = msk->pm.addr_signal & ~BIT(MPTCP_ADD_ADDR_ECHO);
+ } else {
+ *addr = msk->pm.local;
+ add_addr = msk->pm.addr_signal & ~BIT(MPTCP_ADD_ADDR_SIGNAL);
+ }
+ WRITE_ONCE(msk->pm.addr_signal, add_addr);
ret = true;
out_unlock:
@@ -279,6 +306,7 @@ bool mptcp_pm_rm_addr_signal(struct mptcp_sock *msk, unsigned int remaining,
struct mptcp_rm_list *rm_list)
{
int ret = false, len;
+ u8 rm_addr;
spin_lock_bh(&msk->pm.lock);
@@ -286,16 +314,17 @@ bool mptcp_pm_rm_addr_signal(struct mptcp_sock *msk, unsigned int remaining,
if (!mptcp_pm_should_rm_signal(msk))
goto out_unlock;
+ rm_addr = msk->pm.addr_signal & ~BIT(MPTCP_RM_ADDR_SIGNAL);
len = mptcp_rm_addr_len(&msk->pm.rm_list_tx);
if (len < 0) {
- WRITE_ONCE(msk->pm.addr_signal, 0);
+ WRITE_ONCE(msk->pm.addr_signal, rm_addr);
goto out_unlock;
}
if (remaining < len)
goto out_unlock;
*rm_list = msk->pm.rm_list_tx;
- WRITE_ONCE(msk->pm.addr_signal, 0);
+ WRITE_ONCE(msk->pm.addr_signal, rm_addr);
ret = true;
out_unlock:
@@ -308,6 +337,25 @@ int mptcp_pm_get_local_id(struct mptcp_sock *msk, struct sock_common *skc)
return mptcp_pm_nl_get_local_id(msk, skc);
}
+void mptcp_pm_subflow_chk_stale(const struct mptcp_sock *msk, struct sock *ssk)
+{
+ struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(ssk);
+ u32 rcv_tstamp = READ_ONCE(tcp_sk(ssk)->rcv_tstamp);
+
+ /* keep track of rtx periods with no progress */
+ if (!subflow->stale_count) {
+ subflow->stale_rcv_tstamp = rcv_tstamp;
+ subflow->stale_count++;
+ } else if (subflow->stale_rcv_tstamp == rcv_tstamp) {
+ if (subflow->stale_count < U8_MAX)
+ subflow->stale_count++;
+ mptcp_pm_nl_subflow_chk_stale(msk, ssk);
+ } else {
+ subflow->stale_count = 0;
+ mptcp_subflow_set_active(subflow);
+ }
+}
+
void mptcp_pm_data_init(struct mptcp_sock *msk)
{
msk->pm.add_addr_signaled = 0;
@@ -320,6 +368,7 @@ void mptcp_pm_data_init(struct mptcp_sock *msk)
WRITE_ONCE(msk->pm.addr_signal, 0);
WRITE_ONCE(msk->pm.accept_addr, false);
WRITE_ONCE(msk->pm.accept_subflow, false);
+ WRITE_ONCE(msk->pm.remote_deny_join_id0, false);
msk->pm.status = 0;
spin_lock_init(&msk->pm.lock);
diff --git a/net/mptcp/pm_netlink.c b/net/mptcp/pm_netlink.c
index 2469e06a3a9d..c4f9a5ce3815 100644
--- a/net/mptcp/pm_netlink.c
+++ b/net/mptcp/pm_netlink.c
@@ -27,7 +27,6 @@ struct mptcp_pm_addr_entry {
struct mptcp_addr_info addr;
u8 flags;
int ifindex;
- struct rcu_head rcu;
struct socket *lsk;
};
@@ -47,6 +46,7 @@ struct pm_nl_pernet {
spinlock_t lock;
struct list_head local_addr_list;
unsigned int addrs;
+ unsigned int stale_loss_cnt;
unsigned int add_addr_signal_max;
unsigned int add_addr_accept_max;
unsigned int local_addr_max;
@@ -317,14 +317,14 @@ static void mptcp_pm_add_timer(struct timer_list *timer)
if (!entry->addr.id)
return;
- if (mptcp_pm_should_add_signal(msk)) {
+ if (mptcp_pm_should_add_signal_addr(msk)) {
sk_reset_timer(sk, timer, jiffies + TCP_RTO_MAX / 8);
goto out;
}
spin_lock_bh(&msk->pm.lock);
- if (!mptcp_pm_should_add_signal(msk)) {
+ if (!mptcp_pm_should_add_signal_addr(msk)) {
pr_debug("retransmit ADD_ADDR id=%d", entry->addr.id);
mptcp_pm_announce_addr(msk, &entry->addr, false);
mptcp_pm_add_addr_send_ack(msk);
@@ -410,6 +410,55 @@ void mptcp_pm_free_anno_list(struct mptcp_sock *msk)
}
}
+static bool lookup_address_in_vec(struct mptcp_addr_info *addrs, unsigned int nr,
+ struct mptcp_addr_info *addr)
+{
+ int i;
+
+ for (i = 0; i < nr; i++) {
+ if (addresses_equal(&addrs[i], addr, addr->port))
+ return true;
+ }
+
+ return false;
+}
+
+/* Fill all the remote addresses into the array addrs[],
+ * and return the array size.
+ */
+static unsigned int fill_remote_addresses_vec(struct mptcp_sock *msk, bool fullmesh,
+ struct mptcp_addr_info *addrs)
+{
+ struct sock *sk = (struct sock *)msk, *ssk;
+ struct mptcp_subflow_context *subflow;
+ struct mptcp_addr_info remote = { 0 };
+ unsigned int subflows_max;
+ int i = 0;
+
+ subflows_max = mptcp_pm_get_subflows_max(msk);
+
+ /* Non-fullmesh endpoint, fill in the single entry
+ * corresponding to the primary MPC subflow remote address
+ */
+ if (!fullmesh) {
+ remote_address((struct sock_common *)sk, &remote);
+ msk->pm.subflows++;
+ addrs[i++] = remote;
+ } else {
+ mptcp_for_each_subflow(msk, subflow) {
+ ssk = mptcp_subflow_tcp_sock(subflow);
+ remote_address((struct sock_common *)ssk, &remote);
+ if (!lookup_address_in_vec(addrs, i, &remote) &&
+ msk->pm.subflows < subflows_max) {
+ msk->pm.subflows++;
+ addrs[i++] = remote;
+ }
+ }
+ }
+
+ return i;
+}
+
static void mptcp_pm_create_subflow_or_signal_addr(struct mptcp_sock *msk)
{
struct sock *sk = (struct sock *)msk;
@@ -451,18 +500,20 @@ static void mptcp_pm_create_subflow_or_signal_addr(struct mptcp_sock *msk)
/* check if should create a new subflow */
if (msk->pm.local_addr_used < local_addr_max &&
- msk->pm.subflows < subflows_max) {
+ msk->pm.subflows < subflows_max &&
+ !READ_ONCE(msk->pm.remote_deny_join_id0)) {
local = select_local_address(pernet, msk);
if (local) {
- struct mptcp_addr_info remote = { 0 };
+ bool fullmesh = !!(local->flags & MPTCP_PM_ADDR_FLAG_FULLMESH);
+ struct mptcp_addr_info addrs[MPTCP_PM_ADDR_MAX];
+ int i, nr;
msk->pm.local_addr_used++;
- msk->pm.subflows++;
check_work_pending(msk);
- remote_address((struct sock_common *)sk, &remote);
+ nr = fill_remote_addresses_vec(msk, fullmesh, addrs);
spin_unlock_bh(&msk->pm.lock);
- __mptcp_subflow_connect(sk, &local->addr, &remote,
- local->flags, local->ifindex);
+ for (i = 0; i < nr; i++)
+ __mptcp_subflow_connect(sk, &local->addr, &addrs[i]);
spin_lock_bh(&msk->pm.lock);
return;
}
@@ -483,13 +534,67 @@ static void mptcp_pm_nl_subflow_established(struct mptcp_sock *msk)
mptcp_pm_create_subflow_or_signal_addr(msk);
}
+/* Fill all the local addresses into the array addrs[],
+ * and return the array size.
+ */
+static unsigned int fill_local_addresses_vec(struct mptcp_sock *msk,
+ struct mptcp_addr_info *addrs)
+{
+ struct sock *sk = (struct sock *)msk;
+ struct mptcp_pm_addr_entry *entry;
+ struct mptcp_addr_info local;
+ struct pm_nl_pernet *pernet;
+ unsigned int subflows_max;
+ int i = 0;
+
+ pernet = net_generic(sock_net(sk), pm_nl_pernet_id);
+ subflows_max = mptcp_pm_get_subflows_max(msk);
+
+ rcu_read_lock();
+ __mptcp_flush_join_list(msk);
+ list_for_each_entry_rcu(entry, &pernet->local_addr_list, list) {
+ if (!(entry->flags & MPTCP_PM_ADDR_FLAG_FULLMESH))
+ continue;
+
+ if (entry->addr.family != sk->sk_family) {
+#if IS_ENABLED(CONFIG_MPTCP_IPV6)
+ if ((entry->addr.family == AF_INET &&
+ !ipv6_addr_v4mapped(&sk->sk_v6_daddr)) ||
+ (sk->sk_family == AF_INET &&
+ !ipv6_addr_v4mapped(&entry->addr.addr6)))
+#endif
+ continue;
+ }
+
+ if (msk->pm.subflows < subflows_max) {
+ msk->pm.subflows++;
+ addrs[i++] = entry->addr;
+ }
+ }
+ rcu_read_unlock();
+
+ /* If the array is empty, fill in the single
+ * 'IPADDRANY' local address
+ */
+ if (!i) {
+ memset(&local, 0, sizeof(local));
+ local.family = msk->pm.remote.family;
+
+ msk->pm.subflows++;
+ addrs[i++] = local;
+ }
+
+ return i;
+}
+
static void mptcp_pm_nl_add_addr_received(struct mptcp_sock *msk)
{
+ struct mptcp_addr_info addrs[MPTCP_PM_ADDR_MAX];
struct sock *sk = (struct sock *)msk;
unsigned int add_addr_accept_max;
struct mptcp_addr_info remote;
- struct mptcp_addr_info local;
unsigned int subflows_max;
+ int i, nr;
add_addr_accept_max = mptcp_pm_get_add_addr_accept_max(msk);
subflows_max = mptcp_pm_get_subflows_max(msk);
@@ -501,23 +606,22 @@ static void mptcp_pm_nl_add_addr_received(struct mptcp_sock *msk)
if (lookup_subflow_by_daddr(&msk->conn_list, &msk->pm.remote))
goto add_addr_echo;
- msk->pm.add_addr_accepted++;
- msk->pm.subflows++;
- if (msk->pm.add_addr_accepted >= add_addr_accept_max ||
- msk->pm.subflows >= subflows_max)
- WRITE_ONCE(msk->pm.accept_addr, false);
-
/* connect to the specified remote address, using whatever
* local address the routing configuration will pick.
*/
remote = msk->pm.remote;
if (!remote.port)
remote.port = sk->sk_dport;
- memset(&local, 0, sizeof(local));
- local.family = remote.family;
+ nr = fill_local_addresses_vec(msk, addrs);
+
+ msk->pm.add_addr_accepted++;
+ if (msk->pm.add_addr_accepted >= add_addr_accept_max ||
+ msk->pm.subflows >= subflows_max)
+ WRITE_ONCE(msk->pm.accept_addr, false);
spin_unlock_bh(&msk->pm.lock);
- __mptcp_subflow_connect(sk, &local, &remote, 0, 0);
+ for (i = 0; i < nr; i++)
+ __mptcp_subflow_connect(sk, &addrs[i], &remote);
spin_lock_bh(&msk->pm.lock);
add_addr_echo:
@@ -542,14 +646,10 @@ void mptcp_pm_nl_addr_send_ack(struct mptcp_sock *msk)
struct sock *ssk = mptcp_subflow_tcp_sock(subflow);
spin_unlock_bh(&msk->pm.lock);
- pr_debug("send ack for %s%s%s",
- mptcp_pm_should_add_signal(msk) ? "add_addr" : "rm_addr",
- mptcp_pm_should_add_signal_ipv6(msk) ? " [ipv6]" : "",
- mptcp_pm_should_add_signal_port(msk) ? " [port]" : "");
-
- lock_sock(ssk);
- tcp_send_ack(ssk);
- release_sock(ssk);
+ pr_debug("send ack for %s",
+ mptcp_pm_should_add_signal(msk) ? "add_addr" : "rm_addr");
+
+ mptcp_subflow_send_ack(ssk);
spin_lock_bh(&msk->pm.lock);
}
}
@@ -578,9 +678,7 @@ int mptcp_pm_nl_mp_prio_send_ack(struct mptcp_sock *msk,
spin_unlock_bh(&msk->pm.lock);
pr_debug("send ack for mp_prio");
- lock_sock(ssk);
- tcp_send_ack(ssk);
- release_sock(ssk);
+ mptcp_subflow_send_ack(ssk);
spin_lock_bh(&msk->pm.lock);
return 0;
@@ -897,6 +995,43 @@ static const struct nla_policy mptcp_pm_policy[MPTCP_PM_ATTR_MAX + 1] = {
[MPTCP_PM_ATTR_SUBFLOWS] = { .type = NLA_U32, },
};
+void mptcp_pm_nl_subflow_chk_stale(const struct mptcp_sock *msk, struct sock *ssk)
+{
+ struct mptcp_subflow_context *iter, *subflow = mptcp_subflow_ctx(ssk);
+ struct sock *sk = (struct sock *)msk;
+ unsigned int active_max_loss_cnt;
+ struct net *net = sock_net(sk);
+ unsigned int stale_loss_cnt;
+ bool slow;
+
+ stale_loss_cnt = mptcp_stale_loss_cnt(net);
+ if (subflow->stale || !stale_loss_cnt || subflow->stale_count <= stale_loss_cnt)
+ return;
+
+ /* look for another available subflow not in loss state */
+ active_max_loss_cnt = max_t(int, stale_loss_cnt - 1, 1);
+ mptcp_for_each_subflow(msk, iter) {
+ if (iter != subflow && mptcp_subflow_active(iter) &&
+ iter->stale_count < active_max_loss_cnt) {
+ /* we have some alternatives, try to mark this subflow as idle ...*/
+ slow = lock_sock_fast(ssk);
+ if (!tcp_rtx_and_write_queues_empty(ssk)) {
+ subflow->stale = 1;
+ __mptcp_retransmit_pending_data(sk);
+ MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_SUBFLOWSTALE);
+ }
+ unlock_sock_fast(ssk, slow);
+
+ /* always try to push the pending data regarless of re-injections:
+ * we can possibly use backup subflows now, and subflow selection
+ * is cheap under the msk socket lock
+ */
+ __mptcp_push_pending(sk, 0);
+ return;
+ }
+ }
+}
+
static int mptcp_pm_family_to_addr(int family)
{
#if IS_ENABLED(CONFIG_MPTCP_IPV6)
@@ -971,8 +1106,14 @@ skip_family:
if (tb[MPTCP_PM_ADDR_ATTR_FLAGS])
entry->flags = nla_get_u32(tb[MPTCP_PM_ADDR_ATTR_FLAGS]);
- if (tb[MPTCP_PM_ADDR_ATTR_PORT])
+ if (tb[MPTCP_PM_ADDR_ATTR_PORT]) {
+ if (!(entry->flags & MPTCP_PM_ADDR_FLAG_SIGNAL)) {
+ NL_SET_ERR_MSG_ATTR(info->extack, attr,
+ "flags must have signal when using port");
+ return -EINVAL;
+ }
entry->addr.port = htons(nla_get_u16(tb[MPTCP_PM_ADDR_ATTR_PORT]));
+ }
return 0;
}
@@ -1059,6 +1200,27 @@ __lookup_addr_by_id(struct pm_nl_pernet *pernet, unsigned int id)
return NULL;
}
+int mptcp_pm_get_flags_and_ifindex_by_id(struct net *net, unsigned int id,
+ u8 *flags, int *ifindex)
+{
+ struct mptcp_pm_addr_entry *entry;
+
+ *flags = 0;
+ *ifindex = 0;
+
+ if (id) {
+ rcu_read_lock();
+ entry = __lookup_addr_by_id(net_generic(net, pm_nl_pernet_id), id);
+ if (entry) {
+ *flags = entry->flags;
+ *ifindex = entry->ifindex;
+ }
+ rcu_read_unlock();
+ }
+
+ return 0;
+}
+
static bool remove_anno_list_by_saddr(struct mptcp_sock *msk,
struct mptcp_addr_info *addr)
{
@@ -1127,36 +1289,12 @@ next:
return 0;
}
-struct addr_entry_release_work {
- struct rcu_work rwork;
- struct mptcp_pm_addr_entry *entry;
-};
-
-static void mptcp_pm_release_addr_entry(struct work_struct *work)
-{
- struct addr_entry_release_work *w;
- struct mptcp_pm_addr_entry *entry;
-
- w = container_of(to_rcu_work(work), struct addr_entry_release_work, rwork);
- entry = w->entry;
- if (entry) {
- if (entry->lsk)
- sock_release(entry->lsk);
- kfree(entry);
- }
- kfree(w);
-}
-
-static void mptcp_pm_free_addr_entry(struct mptcp_pm_addr_entry *entry)
+/* caller must ensure the RCU grace period is already elapsed */
+static void __mptcp_pm_release_addr_entry(struct mptcp_pm_addr_entry *entry)
{
- struct addr_entry_release_work *w;
-
- w = kmalloc(sizeof(*w), GFP_ATOMIC);
- if (w) {
- INIT_RCU_WORK(&w->rwork, mptcp_pm_release_addr_entry);
- w->entry = entry;
- queue_rcu_work(system_wq, &w->rwork);
- }
+ if (entry->lsk)
+ sock_release(entry->lsk);
+ kfree(entry);
}
static int mptcp_nl_remove_id_zero_address(struct net *net,
@@ -1236,7 +1374,8 @@ static int mptcp_nl_cmd_del_addr(struct sk_buff *skb, struct genl_info *info)
spin_unlock_bh(&pernet->lock);
mptcp_nl_remove_subflow_and_signal_addr(sock_net(skb->sk), &entry->addr);
- mptcp_pm_free_addr_entry(entry);
+ synchronize_rcu();
+ __mptcp_pm_release_addr_entry(entry);
return ret;
}
@@ -1289,6 +1428,7 @@ static void mptcp_nl_remove_addrs_list(struct net *net,
}
}
+/* caller must ensure the RCU grace period is already elapsed */
static void __flush_addrs(struct list_head *list)
{
while (!list_empty(list)) {
@@ -1297,7 +1437,7 @@ static void __flush_addrs(struct list_head *list)
cur = list_entry(list->next,
struct mptcp_pm_addr_entry, list);
list_del_rcu(&cur->list);
- mptcp_pm_free_addr_entry(cur);
+ __mptcp_pm_release_addr_entry(cur);
}
}
@@ -1321,6 +1461,7 @@ static int mptcp_nl_cmd_flush_addrs(struct sk_buff *skb, struct genl_info *info)
bitmap_zero(pernet->id_bitmap, MAX_ADDR_ID + 1);
spin_unlock_bh(&pernet->lock);
mptcp_nl_remove_addrs_list(sock_net(skb->sk), &free_list);
+ synchronize_rcu();
__flush_addrs(&free_list);
return 0;
}
@@ -1913,10 +2054,14 @@ static int __net_init pm_nl_init_net(struct net *net)
struct pm_nl_pernet *pernet = net_generic(net, pm_nl_pernet_id);
INIT_LIST_HEAD_RCU(&pernet->local_addr_list);
- __reset_counters(pernet);
pernet->next_id = 1;
- bitmap_zero(pernet->id_bitmap, MAX_ADDR_ID + 1);
+ pernet->stale_loss_cnt = 4;
spin_lock_init(&pernet->lock);
+
+ /* No need to initialize other pernet fields, the struct is zeroed at
+ * allocation time.
+ */
+
return 0;
}
@@ -1928,7 +2073,8 @@ static void __net_exit pm_nl_exit_net(struct list_head *net_list)
struct pm_nl_pernet *pernet = net_generic(net, pm_nl_pernet_id);
/* net is removed from namespace list, can't race with
- * other modifiers
+ * other modifiers, also netns core already waited for a
+ * RCU grace period.
*/
__flush_addrs(&pernet->local_addr_list);
}
diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c
index 632350018fb6..2602f1386160 100644
--- a/net/mptcp/protocol.c
+++ b/net/mptcp/protocol.c
@@ -39,10 +39,15 @@ struct mptcp_skb_cb {
u64 map_seq;
u64 end_seq;
u32 offset;
+ u8 has_rxtstamp:1;
};
#define MPTCP_SKB_CB(__skb) ((struct mptcp_skb_cb *)&((__skb)->cb[0]))
+enum {
+ MPTCP_CMSG_TS = BIT(0),
+};
+
static struct percpu_counter mptcp_sockets_allocated;
static void __mptcp_destroy_sock(struct sock *sk);
@@ -272,6 +277,7 @@ static bool __mptcp_move_skb(struct mptcp_sock *msk, struct sock *ssk,
struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(ssk);
struct sock *sk = (struct sock *)msk;
struct sk_buff *tail;
+ bool has_rxtstamp;
__skb_unlink(skb, &ssk->sk_receive_queue);
@@ -289,6 +295,8 @@ static bool __mptcp_move_skb(struct mptcp_sock *msk, struct sock *ssk,
sk->sk_forward_alloc += amount;
}
+ has_rxtstamp = TCP_SKB_CB(skb)->has_rxtstamp;
+
/* the skb map_seq accounts for the skb offset:
* mptcp_subflow_get_mapped_dsn() is based on the current tp->copied_seq
* value
@@ -296,6 +304,7 @@ static bool __mptcp_move_skb(struct mptcp_sock *msk, struct sock *ssk,
MPTCP_SKB_CB(skb)->map_seq = mptcp_subflow_get_mapped_dsn(subflow);
MPTCP_SKB_CB(skb)->end_seq = MPTCP_SKB_CB(skb)->map_seq + copy_len;
MPTCP_SKB_CB(skb)->offset = offset;
+ MPTCP_SKB_CB(skb)->has_rxtstamp = has_rxtstamp;
if (MPTCP_SKB_CB(skb)->map_seq == msk->ack_seq) {
/* in sequence */
@@ -402,78 +411,93 @@ static void mptcp_set_datafin_timeout(const struct sock *sk)
TCP_RTO_MIN << icsk->icsk_retransmits);
}
-static void mptcp_set_timeout(const struct sock *sk, const struct sock *ssk)
+static void __mptcp_set_timeout(struct sock *sk, long tout)
{
- long tout = ssk && inet_csk(ssk)->icsk_pending ?
- inet_csk(ssk)->icsk_timeout - jiffies : 0;
-
- if (tout <= 0)
- tout = mptcp_sk(sk)->timer_ival;
mptcp_sk(sk)->timer_ival = tout > 0 ? tout : TCP_RTO_MIN;
}
+static long mptcp_timeout_from_subflow(const struct mptcp_subflow_context *subflow)
+{
+ const struct sock *ssk = mptcp_subflow_tcp_sock(subflow);
+
+ return inet_csk(ssk)->icsk_pending && !subflow->stale_count ?
+ inet_csk(ssk)->icsk_timeout - jiffies : 0;
+}
+
+static void mptcp_set_timeout(struct sock *sk)
+{
+ struct mptcp_subflow_context *subflow;
+ long tout = 0;
+
+ mptcp_for_each_subflow(mptcp_sk(sk), subflow)
+ tout = max(tout, mptcp_timeout_from_subflow(subflow));
+ __mptcp_set_timeout(sk, tout);
+}
+
static bool tcp_can_send_ack(const struct sock *ssk)
{
return !((1 << inet_sk_state_load(ssk)) &
(TCPF_SYN_SENT | TCPF_SYN_RECV | TCPF_TIME_WAIT | TCPF_CLOSE | TCPF_LISTEN));
}
+void mptcp_subflow_send_ack(struct sock *ssk)
+{
+ bool slow;
+
+ slow = lock_sock_fast(ssk);
+ if (tcp_can_send_ack(ssk))
+ tcp_send_ack(ssk);
+ unlock_sock_fast(ssk, slow);
+}
+
static void mptcp_send_ack(struct mptcp_sock *msk)
{
struct mptcp_subflow_context *subflow;
- mptcp_for_each_subflow(msk, subflow) {
- struct sock *ssk = mptcp_subflow_tcp_sock(subflow);
-
- lock_sock(ssk);
- if (tcp_can_send_ack(ssk))
- tcp_send_ack(ssk);
- release_sock(ssk);
- }
+ mptcp_for_each_subflow(msk, subflow)
+ mptcp_subflow_send_ack(mptcp_subflow_tcp_sock(subflow));
}
-static bool mptcp_subflow_cleanup_rbuf(struct sock *ssk)
+static void mptcp_subflow_cleanup_rbuf(struct sock *ssk)
{
- int ret;
+ bool slow;
- lock_sock(ssk);
- ret = tcp_can_send_ack(ssk);
- if (ret)
+ slow = lock_sock_fast(ssk);
+ if (tcp_can_send_ack(ssk))
tcp_cleanup_rbuf(ssk, 1);
- release_sock(ssk);
- return ret;
+ unlock_sock_fast(ssk, slow);
+}
+
+static bool mptcp_subflow_could_cleanup(const struct sock *ssk, bool rx_empty)
+{
+ const struct inet_connection_sock *icsk = inet_csk(ssk);
+ u8 ack_pending = READ_ONCE(icsk->icsk_ack.pending);
+ const struct tcp_sock *tp = tcp_sk(ssk);
+
+ return (ack_pending & ICSK_ACK_SCHED) &&
+ ((READ_ONCE(tp->rcv_nxt) - READ_ONCE(tp->rcv_wup) >
+ READ_ONCE(icsk->icsk_ack.rcv_mss)) ||
+ (rx_empty && ack_pending &
+ (ICSK_ACK_PUSHED2 | ICSK_ACK_PUSHED)));
}
static void mptcp_cleanup_rbuf(struct mptcp_sock *msk)
{
- struct sock *ack_hint = READ_ONCE(msk->ack_hint);
int old_space = READ_ONCE(msk->old_wspace);
struct mptcp_subflow_context *subflow;
struct sock *sk = (struct sock *)msk;
- bool cleanup;
+ int space = __mptcp_space(sk);
+ bool cleanup, rx_empty;
- /* this is a simple superset of what tcp_cleanup_rbuf() implements
- * so that we don't have to acquire the ssk socket lock most of the time
- * to do actually nothing
- */
- cleanup = __mptcp_space(sk) - old_space >= max(0, old_space);
- if (!cleanup)
- return;
+ cleanup = (space > 0) && (space >= (old_space << 1));
+ rx_empty = !__mptcp_rmem(sk);
- /* if the hinted ssk is still active, try to use it */
- if (likely(ack_hint)) {
- mptcp_for_each_subflow(msk, subflow) {
- struct sock *ssk = mptcp_subflow_tcp_sock(subflow);
+ mptcp_for_each_subflow(msk, subflow) {
+ struct sock *ssk = mptcp_subflow_tcp_sock(subflow);
- if (ack_hint == ssk && mptcp_subflow_cleanup_rbuf(ssk))
- return;
- }
+ if (cleanup || mptcp_subflow_could_cleanup(ssk, rx_empty))
+ mptcp_subflow_cleanup_rbuf(ssk);
}
-
- /* otherwise pick the first active subflow */
- mptcp_for_each_subflow(msk, subflow)
- if (mptcp_subflow_cleanup_rbuf(mptcp_subflow_tcp_sock(subflow)))
- return;
}
static bool mptcp_check_data_fin(struct sock *sk)
@@ -523,7 +547,6 @@ static bool mptcp_check_data_fin(struct sock *sk)
}
ret = true;
- mptcp_set_timeout(sk, NULL);
mptcp_send_ack(msk);
mptcp_close_wake_up(sk);
}
@@ -618,7 +641,6 @@ static bool __mptcp_move_skbs_from_subflow(struct mptcp_sock *msk,
break;
}
} while (more_data_avail);
- WRITE_ONCE(msk->ack_hint, ssk);
*bytes += moved;
return done;
@@ -675,9 +697,6 @@ static bool move_skbs_to_msk(struct mptcp_sock *msk, struct sock *ssk)
struct sock *sk = (struct sock *)msk;
unsigned int moved = 0;
- if (inet_sk_state_load(sk) == TCP_CLOSE)
- return false;
-
__mptcp_move_skbs_from_subflow(msk, ssk, &moved);
__mptcp_ofo_queue(msk);
if (unlikely(ssk->sk_err)) {
@@ -716,8 +735,10 @@ void mptcp_data_ready(struct sock *sk, struct sock *ssk)
sk_rbuf = ssk_rbuf;
/* over limit? can't append more skbs to msk, Also, no need to wake-up*/
- if (atomic_read(&sk->sk_rmem_alloc) > sk_rbuf)
+ if (__mptcp_rmem(sk) > sk_rbuf) {
+ MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_RCVPRUNED);
return;
+ }
/* Wake-up the reader only for in-sequence data */
mptcp_data_lock(sk);
@@ -785,10 +806,7 @@ static void mptcp_reset_timer(struct sock *sk)
if (unlikely(inet_sk_state_load(sk) == TCP_CLOSE))
return;
- /* should never be called with mptcp level timer cleared */
- tout = READ_ONCE(mptcp_sk(sk)->timer_ival);
- if (WARN_ON_ONCE(!tout))
- tout = TCP_RTO_MIN;
+ tout = mptcp_sk(sk)->timer_ival;
sk_reset_timer(sk, &icsk->icsk_retransmit_timer, jiffies + tout);
}
@@ -893,22 +911,14 @@ static bool mptcp_frag_can_collapse_to(const struct mptcp_sock *msk,
df->data_seq + df->data_len == msk->write_seq;
}
-static int mptcp_wmem_with_overhead(struct sock *sk, int size)
+static int mptcp_wmem_with_overhead(int size)
{
- struct mptcp_sock *msk = mptcp_sk(sk);
- int ret, skbs;
-
- ret = size + ((sizeof(struct mptcp_data_frag) * size) >> PAGE_SHIFT);
- skbs = (msk->tx_pending_data + size) / msk->size_goal_cache;
- if (skbs < msk->skb_tx_cache.qlen)
- return ret;
-
- return ret + (skbs - msk->skb_tx_cache.qlen) * SKB_TRUESIZE(MAX_TCP_HEADER);
+ return size + ((sizeof(struct mptcp_data_frag) * size) >> PAGE_SHIFT);
}
static void __mptcp_wmem_reserve(struct sock *sk, int size)
{
- int amount = mptcp_wmem_with_overhead(sk, size);
+ int amount = mptcp_wmem_with_overhead(size);
struct mptcp_sock *msk = mptcp_sk(sk);
WARN_ON_ONCE(msk->wmem_reserved);
@@ -996,6 +1006,13 @@ static void mptcp_wmem_uncharge(struct sock *sk, int size)
msk->wmem_reserved += size;
}
+static void __mptcp_mem_reclaim_partial(struct sock *sk)
+{
+ lockdep_assert_held_once(&sk->sk_lock.slock);
+ __mptcp_update_wmem(sk);
+ sk_mem_reclaim_partial(sk);
+}
+
static void mptcp_mem_reclaim_partial(struct sock *sk)
{
struct mptcp_sock *msk = mptcp_sk(sk);
@@ -1048,8 +1065,14 @@ static void __mptcp_clean_una(struct sock *sk)
if (after64(dfrag->data_seq + dfrag->data_len, snd_una))
break;
- if (WARN_ON_ONCE(dfrag == msk->first_pending))
- break;
+ if (unlikely(dfrag == msk->first_pending)) {
+ /* in recovery mode can see ack after the current snd head */
+ if (WARN_ON_ONCE(!msk->recovery))
+ break;
+
+ WRITE_ONCE(msk->first_pending, mptcp_send_next(sk));
+ }
+
dfrag_clear(sk, dfrag);
cleaned = true;
}
@@ -1058,8 +1081,14 @@ static void __mptcp_clean_una(struct sock *sk)
if (dfrag && after64(snd_una, dfrag->data_seq)) {
u64 delta = snd_una - dfrag->data_seq;
- if (WARN_ON_ONCE(delta > dfrag->already_sent))
- goto out;
+ /* prevent wrap around in recovery mode */
+ if (unlikely(delta > dfrag->already_sent)) {
+ if (WARN_ON_ONCE(!msk->recovery))
+ goto out;
+ if (WARN_ON_ONCE(delta > dfrag->data_len))
+ goto out;
+ dfrag->already_sent += delta - dfrag->already_sent;
+ }
dfrag->data_seq += delta;
dfrag->offset += delta;
@@ -1070,16 +1099,16 @@ static void __mptcp_clean_una(struct sock *sk)
cleaned = true;
}
+ /* all retransmitted data acked, recovery completed */
+ if (unlikely(msk->recovery) && after64(msk->snd_una, msk->recovery_snd_nxt))
+ msk->recovery = false;
+
out:
- if (cleaned) {
- if (tcp_under_memory_pressure(sk)) {
- __mptcp_update_wmem(sk);
- sk_mem_reclaim_partial(sk);
- }
- }
+ if (cleaned && tcp_under_memory_pressure(sk))
+ __mptcp_mem_reclaim_partial(sk);
- if (snd_una == READ_ONCE(msk->snd_nxt)) {
- if (msk->timer_ival && !mptcp_data_fin_enabled(msk))
+ if (snd_una == READ_ONCE(msk->snd_nxt) && !msk->recovery) {
+ if (mptcp_timer_pending(sk) && !mptcp_data_fin_enabled(msk))
mptcp_stop_timer(sk);
} else {
mptcp_reset_timer(sk);
@@ -1156,6 +1185,7 @@ struct mptcp_sendmsg_info {
u16 limit;
u16 sent;
unsigned int flags;
+ bool data_lock_held;
};
static int mptcp_check_allowed_size(struct mptcp_sock *msk, u64 data_seq,
@@ -1203,49 +1233,8 @@ static struct sk_buff *__mptcp_do_alloc_tx_skb(struct sock *sk, gfp_t gfp)
return NULL;
}
-static bool mptcp_tx_cache_refill(struct sock *sk, int size,
- struct sk_buff_head *skbs, int *total_ts)
-{
- struct mptcp_sock *msk = mptcp_sk(sk);
- struct sk_buff *skb;
- int space_needed;
-
- if (unlikely(tcp_under_memory_pressure(sk))) {
- mptcp_mem_reclaim_partial(sk);
-
- /* under pressure pre-allocate at most a single skb */
- if (msk->skb_tx_cache.qlen)
- return true;
- space_needed = msk->size_goal_cache;
- } else {
- space_needed = msk->tx_pending_data + size -
- msk->skb_tx_cache.qlen * msk->size_goal_cache;
- }
-
- while (space_needed > 0) {
- skb = __mptcp_do_alloc_tx_skb(sk, sk->sk_allocation);
- if (unlikely(!skb)) {
- /* under memory pressure, try to pass the caller a
- * single skb to allow forward progress
- */
- while (skbs->qlen > 1) {
- skb = __skb_dequeue_tail(skbs);
- *total_ts -= skb->truesize;
- __kfree_skb(skb);
- }
- return skbs->qlen > 0;
- }
-
- *total_ts += skb->truesize;
- __skb_queue_tail(skbs, skb);
- space_needed -= msk->size_goal_cache;
- }
- return true;
-}
-
static bool __mptcp_alloc_tx_skb(struct sock *sk, struct sock *ssk, gfp_t gfp)
{
- struct mptcp_sock *msk = mptcp_sk(sk);
struct sk_buff *skb;
if (ssk->sk_tx_skb_cache) {
@@ -1256,22 +1245,6 @@ static bool __mptcp_alloc_tx_skb(struct sock *sk, struct sock *ssk, gfp_t gfp)
return true;
}
- skb = skb_peek(&msk->skb_tx_cache);
- if (skb) {
- if (likely(sk_wmem_schedule(ssk, skb->truesize))) {
- skb = __skb_dequeue(&msk->skb_tx_cache);
- if (WARN_ON_ONCE(!skb))
- return false;
-
- mptcp_wmem_uncharge(sk, skb->truesize);
- ssk->sk_tx_skb_cache = skb;
- return true;
- }
-
- /* over memory limit, no point to try to allocate a new skb */
- return false;
- }
-
skb = __mptcp_do_alloc_tx_skb(sk, gfp);
if (!skb)
return false;
@@ -1284,18 +1257,29 @@ static bool __mptcp_alloc_tx_skb(struct sock *sk, struct sock *ssk, gfp_t gfp)
return false;
}
-static bool mptcp_must_reclaim_memory(struct sock *sk, struct sock *ssk)
+static bool mptcp_alloc_tx_skb(struct sock *sk, struct sock *ssk, bool data_lock_held)
{
- return !ssk->sk_tx_skb_cache &&
- !skb_peek(&mptcp_sk(sk)->skb_tx_cache) &&
- tcp_under_memory_pressure(sk);
+ gfp_t gfp = data_lock_held ? GFP_ATOMIC : sk->sk_allocation;
+
+ if (unlikely(tcp_under_memory_pressure(sk))) {
+ if (data_lock_held)
+ __mptcp_mem_reclaim_partial(sk);
+ else
+ mptcp_mem_reclaim_partial(sk);
+ }
+ return __mptcp_alloc_tx_skb(sk, ssk, gfp);
}
-static bool mptcp_alloc_tx_skb(struct sock *sk, struct sock *ssk)
+/* note: this always recompute the csum on the whole skb, even
+ * if we just appended a single frag. More status info needed
+ */
+static void mptcp_update_data_checksum(struct sk_buff *skb, int added)
{
- if (unlikely(mptcp_must_reclaim_memory(sk, ssk)))
- mptcp_mem_reclaim_partial(sk);
- return __mptcp_alloc_tx_skb(sk, ssk, sk->sk_allocation);
+ struct mptcp_ext *mpext = mptcp_get_ext(skb);
+ __wsum csum = ~csum_unfold(mpext->csum);
+ int offset = skb->len - added;
+
+ mpext->csum = csum_fold(csum_block_add(csum, skb_checksum(skb, offset, added, 0), offset));
}
static int mptcp_sendmsg_frag(struct sock *sk, struct sock *ssk,
@@ -1307,7 +1291,7 @@ static int mptcp_sendmsg_frag(struct sock *sk, struct sock *ssk,
bool zero_window_probe = false;
struct mptcp_ext *mpext = NULL;
struct sk_buff *skb, *tail;
- bool can_collapse = false;
+ bool must_collapse = false;
int size_bias = 0;
int avail_size;
size_t ret = 0;
@@ -1318,7 +1302,6 @@ static int mptcp_sendmsg_frag(struct sock *sk, struct sock *ssk,
/* compute send limit */
info->mss_now = tcp_send_mss(ssk, &info->size_goal, info->flags);
avail_size = info->size_goal;
- msk->size_goal_cache = info->size_goal;
skb = tcp_write_queue_tail(ssk);
if (skb) {
/* Limit the write to the size available in the
@@ -1328,16 +1311,24 @@ static int mptcp_sendmsg_frag(struct sock *sk, struct sock *ssk,
* SSN association set here
*/
mpext = skb_ext_find(skb, SKB_EXT_MPTCP);
- can_collapse = (info->size_goal - skb->len > 0) &&
- mptcp_skb_can_collapse_to(data_seq, skb, mpext);
- if (!can_collapse) {
+ if (!mptcp_skb_can_collapse_to(data_seq, skb, mpext)) {
TCP_SKB_CB(skb)->eor = 1;
- } else {
+ goto alloc_skb;
+ }
+
+ must_collapse = (info->size_goal - skb->len > 0) &&
+ (skb_shinfo(skb)->nr_frags < sysctl_max_skb_frags);
+ if (must_collapse) {
size_bias = skb->len;
avail_size = info->size_goal - skb->len;
}
}
+alloc_skb:
+ if (!must_collapse && !ssk->sk_tx_skb_cache &&
+ !mptcp_alloc_tx_skb(sk, ssk, info->data_lock_held))
+ return 0;
+
/* Zero window and all data acked? Probe. */
avail_size = mptcp_check_allowed_size(msk, data_seq, avail_size);
if (avail_size == 0) {
@@ -1367,7 +1358,6 @@ static int mptcp_sendmsg_frag(struct sock *sk, struct sock *ssk,
if (skb == tail) {
TCP_SKB_CB(tail)->tcp_flags &= ~TCPHDR_PSH;
mpext->data_len += ret;
- WARN_ON_ONCE(!can_collapse);
WARN_ON_ONCE(zero_window_probe);
goto out;
}
@@ -1392,10 +1382,14 @@ static int mptcp_sendmsg_frag(struct sock *sk, struct sock *ssk,
if (zero_window_probe) {
mptcp_subflow_ctx(ssk)->rel_write_seq += ret;
mpext->frozen = 1;
- ret = 0;
+ if (READ_ONCE(msk->csum_enabled))
+ mptcp_update_data_checksum(tail, ret);
tcp_push_pending_frames(ssk);
+ return 0;
}
out:
+ if (READ_ONCE(msk->csum_enabled))
+ mptcp_update_data_checksum(tail, ret);
mptcp_subflow_ctx(ssk)->rel_write_seq += ret;
return ret;
}
@@ -1411,16 +1405,44 @@ struct subflow_send_info {
u64 ratio;
};
+void mptcp_subflow_set_active(struct mptcp_subflow_context *subflow)
+{
+ if (!subflow->stale)
+ return;
+
+ subflow->stale = 0;
+ MPTCP_INC_STATS(sock_net(mptcp_subflow_tcp_sock(subflow)), MPTCP_MIB_SUBFLOWRECOVER);
+}
+
+bool mptcp_subflow_active(struct mptcp_subflow_context *subflow)
+{
+ if (unlikely(subflow->stale)) {
+ u32 rcv_tstamp = READ_ONCE(tcp_sk(mptcp_subflow_tcp_sock(subflow))->rcv_tstamp);
+
+ if (subflow->stale_rcv_tstamp == rcv_tstamp)
+ return false;
+
+ mptcp_subflow_set_active(subflow);
+ }
+ return __mptcp_subflow_active(subflow);
+}
+
+/* implement the mptcp packet scheduler;
+ * returns the subflow that will transmit the next DSS
+ * additionally updates the rtx timeout
+ */
static struct sock *mptcp_subflow_get_send(struct mptcp_sock *msk)
{
struct subflow_send_info send_info[2];
struct mptcp_subflow_context *subflow;
+ struct sock *sk = (struct sock *)msk;
int i, nr_active = 0;
struct sock *ssk;
+ long tout = 0;
u64 ratio;
u32 pace;
- sock_owned_by_me((struct sock *)msk);
+ sock_owned_by_me(sk);
if (__mptcp_check_fallback(msk)) {
if (!msk->first)
@@ -1431,8 +1453,10 @@ static struct sock *mptcp_subflow_get_send(struct mptcp_sock *msk)
/* re-use last subflow, if the burst allow that */
if (msk->last_snd && msk->snd_burst > 0 &&
sk_stream_memory_free(msk->last_snd) &&
- mptcp_subflow_active(mptcp_subflow_ctx(msk->last_snd)))
+ mptcp_subflow_active(mptcp_subflow_ctx(msk->last_snd))) {
+ mptcp_set_timeout(sk);
return msk->last_snd;
+ }
/* pick the subflow with the lower wmem/wspace ratio */
for (i = 0; i < 2; ++i) {
@@ -1445,6 +1469,7 @@ static struct sock *mptcp_subflow_get_send(struct mptcp_sock *msk)
if (!mptcp_subflow_active(subflow))
continue;
+ tout = max(tout, mptcp_timeout_from_subflow(subflow));
nr_active += !subflow->backup;
if (!sk_stream_memory_free(subflow->tcp_sock) || !tcp_sk(ssk)->snd_wnd)
continue;
@@ -1460,6 +1485,7 @@ static struct sock *mptcp_subflow_get_send(struct mptcp_sock *msk)
send_info[subflow->backup].ratio = ratio;
}
}
+ __mptcp_set_timeout(sk, tout);
/* pick the best backup if no other subflow is active */
if (!nr_active)
@@ -1478,12 +1504,11 @@ static struct sock *mptcp_subflow_get_send(struct mptcp_sock *msk)
static void mptcp_push_release(struct sock *sk, struct sock *ssk,
struct mptcp_sendmsg_info *info)
{
- mptcp_set_timeout(sk, ssk);
tcp_push(ssk, 0, info->mss_now, tcp_sk(ssk)->nonagle, info->size_goal);
release_sock(ssk);
}
-static void __mptcp_push_pending(struct sock *sk, unsigned int flags)
+void __mptcp_push_pending(struct sock *sk, unsigned int flags)
{
struct sock *prev_ssk = NULL, *ssk = NULL;
struct mptcp_sock *msk = mptcp_sk(sk);
@@ -1504,25 +1529,20 @@ static void __mptcp_push_pending(struct sock *sk, unsigned int flags)
mptcp_flush_join_list(msk);
ssk = mptcp_subflow_get_send(msk);
- /* try to keep the subflow socket lock across
- * consecutive xmit on the same socket
+ /* First check. If the ssk has changed since
+ * the last round, release prev_ssk
*/
if (ssk != prev_ssk && prev_ssk)
mptcp_push_release(sk, prev_ssk, &info);
if (!ssk)
goto out;
- if (ssk != prev_ssk || !prev_ssk)
- lock_sock(ssk);
-
- /* keep it simple and always provide a new skb for the
- * subflow, even if we will not use it when collapsing
- * on the pending one
+ /* Need to lock the new subflow only if different
+ * from the previous one, otherwise we are still
+ * helding the relevant lock
*/
- if (!mptcp_alloc_tx_skb(sk, ssk)) {
- mptcp_push_release(sk, ssk, &info);
- goto out;
- }
+ if (ssk != prev_ssk)
+ lock_sock(ssk);
ret = mptcp_sendmsg_frag(sk, ssk, dfrag, &info);
if (ret <= 0) {
@@ -1546,18 +1566,19 @@ static void __mptcp_push_pending(struct sock *sk, unsigned int flags)
mptcp_push_release(sk, ssk, &info);
out:
- if (copied) {
- /* start the timer, if it's not pending */
- if (!mptcp_timer_pending(sk))
- mptcp_reset_timer(sk);
+ /* ensure the rtx timer is running */
+ if (!mptcp_timer_pending(sk))
+ mptcp_reset_timer(sk);
+ if (copied)
__mptcp_check_send_data_fin(sk);
- }
}
static void __mptcp_subflow_push_pending(struct sock *sk, struct sock *ssk)
{
struct mptcp_sock *msk = mptcp_sk(sk);
- struct mptcp_sendmsg_info info;
+ struct mptcp_sendmsg_info info = {
+ .data_lock_held = true,
+ };
struct mptcp_data_frag *dfrag;
struct sock *xmit_ssk;
int len, copied = 0;
@@ -1583,13 +1604,6 @@ static void __mptcp_subflow_push_pending(struct sock *sk, struct sock *ssk)
goto out;
}
- if (unlikely(mptcp_must_reclaim_memory(sk, ssk))) {
- __mptcp_update_wmem(sk);
- sk_mem_reclaim_partial(sk);
- }
- if (!__mptcp_alloc_tx_skb(sk, ssk, GFP_ATOMIC))
- goto out;
-
ret = mptcp_sendmsg_frag(sk, ssk, dfrag, &info);
if (ret <= 0)
goto out;
@@ -1612,7 +1626,6 @@ out:
*/
__mptcp_update_wmem(sk);
if (copied) {
- mptcp_set_timeout(sk, ssk);
tcp_push(ssk, 0, info.mss_now, tcp_sk(ssk)->nonagle,
info.size_goal);
if (!mptcp_timer_pending(sk))
@@ -1663,7 +1676,6 @@ static int mptcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
while (msg_data_left(msg)) {
int total_ts, frag_truesize = 0;
struct mptcp_data_frag *dfrag;
- struct sk_buff_head skbs;
bool dfrag_collapsed;
size_t psize, offset;
@@ -1696,16 +1708,10 @@ static int mptcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
psize = pfrag->size - offset;
psize = min_t(size_t, psize, msg_data_left(msg));
total_ts = psize + frag_truesize;
- __skb_queue_head_init(&skbs);
- if (!mptcp_tx_cache_refill(sk, psize, &skbs, &total_ts))
- goto wait_for_memory;
- if (!mptcp_wmem_alloc(sk, total_ts)) {
- __skb_queue_purge(&skbs);
+ if (!mptcp_wmem_alloc(sk, total_ts))
goto wait_for_memory;
- }
- skb_queue_splice_tail(&skbs, &msk->skb_tx_cache);
if (copy_page_from_iter(dfrag->page, offset, psize,
&msg->msg_iter) != psize) {
mptcp_wmem_uncharge(sk, psize + frag_truesize);
@@ -1762,7 +1768,7 @@ static void mptcp_wait_data(struct sock *sk, long *timeo)
sk_set_bit(SOCKWQ_ASYNC_WAITDATA, sk);
sk_wait_event(sk, timeo,
- test_and_clear_bit(MPTCP_DATA_READY, &msk->flags), &wait);
+ test_bit(MPTCP_DATA_READY, &msk->flags), &wait);
sk_clear_bit(SOCKWQ_ASYNC_WAITDATA, sk);
remove_wait_queue(sk_sleep(sk), &wait);
@@ -1770,7 +1776,9 @@ static void mptcp_wait_data(struct sock *sk, long *timeo)
static int __mptcp_recvmsg_mskq(struct mptcp_sock *msk,
struct msghdr *msg,
- size_t len, int flags)
+ size_t len, int flags,
+ struct scm_timestamping_internal *tss,
+ int *cmsg_flags)
{
struct sk_buff *skb, *tmp;
int copied = 0;
@@ -1790,6 +1798,11 @@ static int __mptcp_recvmsg_mskq(struct mptcp_sock *msk,
}
}
+ if (MPTCP_SKB_CB(skb)->has_rxtstamp) {
+ tcp_update_recv_tstamps(skb, tss);
+ *cmsg_flags |= MPTCP_CMSG_TS;
+ }
+
copied += count;
if (count < data_len) {
@@ -1801,7 +1814,7 @@ static int __mptcp_recvmsg_mskq(struct mptcp_sock *msk,
if (!(flags & MSG_PEEK)) {
/* we will bulk release the skb memory later */
skb->destructor = NULL;
- msk->rmem_released += skb->truesize;
+ WRITE_ONCE(msk->rmem_released, msk->rmem_released + skb->truesize);
__skb_unlink(skb, &msk->receive_queue);
__kfree_skb(skb);
}
@@ -1920,7 +1933,7 @@ static void __mptcp_update_rmem(struct sock *sk)
atomic_sub(msk->rmem_released, &sk->sk_rmem_alloc);
sk_mem_uncharge(sk, msk->rmem_released);
- msk->rmem_released = 0;
+ WRITE_ONCE(msk->rmem_released, 0);
}
static void __mptcp_splice_receive_queue(struct sock *sk)
@@ -1953,7 +1966,6 @@ static bool __mptcp_move_skbs(struct mptcp_sock *msk)
__mptcp_update_rmem(sk);
done = __mptcp_move_skbs_from_subflow(msk, ssk, &moved);
mptcp_data_unlock(sk);
- tcp_cleanup_rbuf(ssk, moved);
if (unlikely(ssk->sk_err))
__mptcp_error_report(sk);
@@ -1969,7 +1981,6 @@ static bool __mptcp_move_skbs(struct mptcp_sock *msk)
ret |= __mptcp_ofo_queue(msk);
__mptcp_splice_receive_queue(sk);
mptcp_data_unlock(sk);
- mptcp_cleanup_rbuf(msk);
}
if (ret)
mptcp_check_data_fin((struct sock *)msk);
@@ -1980,7 +1991,8 @@ static int mptcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
int nonblock, int flags, int *addr_len)
{
struct mptcp_sock *msk = mptcp_sk(sk);
- int copied = 0;
+ struct scm_timestamping_internal tss;
+ int copied = 0, cmsg_flags = 0;
int target;
long timeo;
@@ -2002,7 +2014,7 @@ static int mptcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
while (copied < len) {
int bytes_read;
- bytes_read = __mptcp_recvmsg_mskq(msk, msg, len - copied, flags);
+ bytes_read = __mptcp_recvmsg_mskq(msk, msg, len - copied, flags, &tss, &cmsg_flags);
if (unlikely(bytes_read < 0)) {
if (!copied)
copied = bytes_read;
@@ -2078,11 +2090,14 @@ static int mptcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
*/
if (unlikely(__mptcp_move_skbs(msk)))
set_bit(MPTCP_DATA_READY, &msk->flags);
- } else if (unlikely(!test_bit(MPTCP_DATA_READY, &msk->flags))) {
- /* data to read but mptcp_wait_data() cleared DATA_READY */
- set_bit(MPTCP_DATA_READY, &msk->flags);
}
+
out_err:
+ if (cmsg_flags && copied >= 0) {
+ if (cmsg_flags & MPTCP_CMSG_TS)
+ tcp_recv_timestamp(msg, sk, &tss);
+ }
+
pr_debug("msk=%p data_ready=%d rx queue empty=%d copied=%d",
msk, test_bit(MPTCP_DATA_READY, &msk->flags),
skb_queue_empty_lockless(&sk->sk_receive_queue), copied);
@@ -2126,10 +2141,11 @@ static void mptcp_timeout_timer(struct timer_list *t)
*
* A backup subflow is returned only if that is the only kind available.
*/
-static struct sock *mptcp_subflow_get_retrans(const struct mptcp_sock *msk)
+static struct sock *mptcp_subflow_get_retrans(struct mptcp_sock *msk)
{
+ struct sock *backup = NULL, *pick = NULL;
struct mptcp_subflow_context *subflow;
- struct sock *backup = NULL;
+ int min_stale_count = INT_MAX;
sock_owned_by_me((const struct sock *)msk);
@@ -2139,14 +2155,14 @@ static struct sock *mptcp_subflow_get_retrans(const struct mptcp_sock *msk)
mptcp_for_each_subflow(msk, subflow) {
struct sock *ssk = mptcp_subflow_tcp_sock(subflow);
- if (!mptcp_subflow_active(subflow))
+ if (!__mptcp_subflow_active(subflow))
continue;
- /* still data outstanding at TCP level? Don't retransmit. */
- if (!tcp_write_queue_empty(ssk)) {
- if (inet_csk(ssk)->icsk_ca_state >= TCP_CA_Loss)
- continue;
- return NULL;
+ /* still data outstanding at TCP level? skip this */
+ if (!tcp_rtx_and_write_queues_empty(ssk)) {
+ mptcp_pm_subflow_chk_stale(msk, ssk);
+ min_stale_count = min_t(int, min_stale_count, subflow->stale_count);
+ continue;
}
if (subflow->backup) {
@@ -2155,10 +2171,15 @@ static struct sock *mptcp_subflow_get_retrans(const struct mptcp_sock *msk)
continue;
}
- return ssk;
+ if (!pick)
+ pick = ssk;
}
- return backup;
+ if (pick)
+ return pick;
+
+ /* use backup only if there are no progresses anywhere */
+ return min_stale_count > 1 ? backup : NULL;
}
static void mptcp_dispose_initial_subflow(struct mptcp_sock *msk)
@@ -2169,6 +2190,50 @@ static void mptcp_dispose_initial_subflow(struct mptcp_sock *msk)
}
}
+bool __mptcp_retransmit_pending_data(struct sock *sk)
+{
+ struct mptcp_data_frag *cur, *rtx_head;
+ struct mptcp_sock *msk = mptcp_sk(sk);
+
+ if (__mptcp_check_fallback(mptcp_sk(sk)))
+ return false;
+
+ if (tcp_rtx_and_write_queues_empty(sk))
+ return false;
+
+ /* the closing socket has some data untransmitted and/or unacked:
+ * some data in the mptcp rtx queue has not really xmitted yet.
+ * keep it simple and re-inject the whole mptcp level rtx queue
+ */
+ mptcp_data_lock(sk);
+ __mptcp_clean_una_wakeup(sk);
+ rtx_head = mptcp_rtx_head(sk);
+ if (!rtx_head) {
+ mptcp_data_unlock(sk);
+ return false;
+ }
+
+ /* will accept ack for reijected data before re-sending them */
+ if (!msk->recovery || after64(msk->snd_nxt, msk->recovery_snd_nxt))
+ msk->recovery_snd_nxt = msk->snd_nxt;
+ msk->recovery = true;
+ mptcp_data_unlock(sk);
+
+ msk->first_pending = rtx_head;
+ msk->tx_pending_data += msk->snd_nxt - rtx_head->data_seq;
+ msk->snd_nxt = rtx_head->data_seq;
+ msk->snd_burst = 0;
+
+ /* be sure to clear the "sent status" on all re-injected fragments */
+ list_for_each_entry(cur, &msk->rtx_queue, list) {
+ if (!cur->already_sent)
+ break;
+ cur->already_sent = 0;
+ }
+
+ return true;
+}
+
/* subflow sockets can be either outgoing (connect) or incoming
* (accept).
*
@@ -2181,6 +2246,7 @@ static void __mptcp_close_ssk(struct sock *sk, struct sock *ssk,
struct mptcp_subflow_context *subflow)
{
struct mptcp_sock *msk = mptcp_sk(sk);
+ bool need_push;
list_del(&subflow->node);
@@ -2192,6 +2258,7 @@ static void __mptcp_close_ssk(struct sock *sk, struct sock *ssk,
if (ssk->sk_socket)
sock_orphan(ssk);
+ need_push = __mptcp_retransmit_pending_data(sk);
subflow->disposable = 1;
/* if ssk hit tcp_done(), tcp_cleanup_ulp() cleared the related ops
@@ -2214,14 +2281,14 @@ static void __mptcp_close_ssk(struct sock *sk, struct sock *ssk,
if (ssk == msk->last_snd)
msk->last_snd = NULL;
- if (ssk == msk->ack_hint)
- msk->ack_hint = NULL;
-
if (ssk == msk->first)
msk->first = NULL;
if (msk->subflow && ssk == msk->subflow->sk)
mptcp_dispose_initial_subflow(msk);
+
+ if (need_push)
+ __mptcp_push_pending(sk, 0);
}
void mptcp_close_ssk(struct sock *sk, struct sock *ssk,
@@ -2288,13 +2355,14 @@ static void mptcp_check_fastclose(struct mptcp_sock *msk)
list_for_each_entry_safe(subflow, tmp, &msk->conn_list, node) {
struct sock *tcp_sk = mptcp_subflow_tcp_sock(subflow);
+ bool slow;
- lock_sock(tcp_sk);
+ slow = lock_sock_fast(tcp_sk);
if (tcp_sk->sk_state != TCP_CLOSE) {
tcp_send_active_reset(tcp_sk, GFP_ATOMIC);
tcp_set_state(tcp_sk, TCP_CLOSE);
}
- release_sock(tcp_sk);
+ unlock_sock_fast(tcp_sk, slow);
}
inet_sk_state_store(sk, TCP_CLOSE);
@@ -2339,11 +2407,8 @@ static void __mptcp_retrans(struct sock *sk)
/* limit retransmission to the bytes already sent on some subflows */
info.sent = 0;
- info.limit = dfrag->already_sent;
- while (info.sent < dfrag->already_sent) {
- if (!mptcp_alloc_tx_skb(sk, ssk))
- break;
-
+ info.limit = READ_ONCE(msk->csum_enabled) ? dfrag->data_len : dfrag->already_sent;
+ while (info.sent < info.limit) {
ret = mptcp_sendmsg_frag(sk, ssk, dfrag, &info);
if (ret <= 0)
break;
@@ -2352,11 +2417,12 @@ static void __mptcp_retrans(struct sock *sk)
copied += ret;
info.sent += ret;
}
- if (copied)
+ if (copied) {
+ dfrag->already_sent = max(dfrag->already_sent, info.sent);
tcp_push(ssk, 0, info.mss_now, tcp_sk(ssk)->nonagle,
info.size_goal);
+ }
- mptcp_set_timeout(sk, ssk);
release_sock(ssk);
reset_timer:
@@ -2422,17 +2488,17 @@ static int __mptcp_init_sock(struct sock *sk)
INIT_LIST_HEAD(&msk->rtx_queue);
INIT_WORK(&msk->work, mptcp_worker);
__skb_queue_head_init(&msk->receive_queue);
- __skb_queue_head_init(&msk->skb_tx_cache);
msk->out_of_order_queue = RB_ROOT;
msk->first_pending = NULL;
msk->wmem_reserved = 0;
- msk->rmem_released = 0;
+ WRITE_ONCE(msk->rmem_released, 0);
msk->tx_pending_data = 0;
- msk->size_goal_cache = TCP_BASE_MSS;
+ msk->timer_ival = TCP_RTO_MIN;
- msk->ack_hint = NULL;
msk->first = NULL;
inet_csk(sk)->icsk_sync_mss = mptcp_sync_mss;
+ WRITE_ONCE(msk->csum_enabled, mptcp_is_checksum_enabled(sock_net(sk)));
+ msk->recovery = false;
mptcp_pm_data_init(msk);
@@ -2484,15 +2550,10 @@ static void __mptcp_clear_xmit(struct sock *sk)
{
struct mptcp_sock *msk = mptcp_sk(sk);
struct mptcp_data_frag *dtmp, *dfrag;
- struct sk_buff *skb;
WRITE_ONCE(msk->first_pending, NULL);
list_for_each_entry_safe(dfrag, dtmp, &msk->rtx_queue, list)
dfrag_clear(sk, dfrag);
- while ((skb = __skb_dequeue(&msk->skb_tx_cache)) != NULL) {
- sk->sk_forward_alloc += skb->truesize;
- kfree_skb(skb);
- }
}
static void mptcp_cancel_work(struct sock *sk)
@@ -2522,7 +2583,6 @@ void mptcp_subflow_shutdown(struct sock *sk, struct sock *ssk, int how)
tcp_shutdown(ssk, how);
} else {
pr_debug("Sending DATA_FIN on subflow %p", ssk);
- mptcp_set_timeout(sk, ssk);
tcp_send_ack(ssk);
if (!mptcp_timer_pending(sk))
mptcp_reset_timer(sk);
@@ -2773,6 +2833,8 @@ struct sock *mptcp_sk_clone(const struct sock *sk,
msk->token = subflow_req->token;
msk->subflow = NULL;
WRITE_ONCE(msk->fully_established, false);
+ if (mp_opt->suboptions & OPTION_MPTCP_CSUMREQD)
+ WRITE_ONCE(msk->csum_enabled, true);
msk->write_seq = subflow_req->idsn + 1;
msk->snd_nxt = msk->write_seq;
@@ -2780,7 +2842,7 @@ struct sock *mptcp_sk_clone(const struct sock *sk,
msk->wnd_end = msk->snd_nxt + req->rsk_rcv_wnd;
msk->setsockopt_seq = mptcp_sk(sk)->setsockopt_seq;
- if (mp_opt->mp_capable) {
+ if (mp_opt->suboptions & OPTIONS_MPTCP_MPC) {
msk->can_ack = true;
msk->remote_key = mp_opt->sndr_key;
mptcp_crypto_key_sha(msk->remote_key, NULL, &ack_seq);
@@ -2946,6 +3008,11 @@ static void mptcp_release_cb(struct sock *sk)
spin_lock_bh(&sk->sk_lock.slock);
}
+ /* be sure to set the current sk state before tacking actions
+ * depending on sk_state
+ */
+ if (test_and_clear_bit(MPTCP_CONNECTED, &mptcp_sk(sk)->flags))
+ __mptcp_set_connected(sk);
if (test_and_clear_bit(MPTCP_CLEAN_UNA, &mptcp_sk(sk)->flags))
__mptcp_clean_una_wakeup(sk);
if (test_and_clear_bit(MPTCP_ERROR_REPORT, &mptcp_sk(sk)->flags))
diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h
index 385796f0ef19..d3e6fd1615f1 100644
--- a/net/mptcp/protocol.h
+++ b/net/mptcp/protocol.h
@@ -26,6 +26,15 @@
#define OPTION_MPTCP_FASTCLOSE BIT(8)
#define OPTION_MPTCP_PRIO BIT(9)
#define OPTION_MPTCP_RST BIT(10)
+#define OPTION_MPTCP_DSS BIT(11)
+#define OPTION_MPTCP_FAIL BIT(12)
+
+#define OPTION_MPTCP_CSUMREQD BIT(13)
+
+#define OPTIONS_MPTCP_MPC (OPTION_MPTCP_MPC_SYN | OPTION_MPTCP_MPC_SYNACK | \
+ OPTION_MPTCP_MPC_ACK)
+#define OPTIONS_MPTCP_MPJ (OPTION_MPTCP_MPJ_SYN | OPTION_MPTCP_MPJ_SYNACK | \
+ OPTION_MPTCP_MPJ_ACK)
/* MPTCP option subtypes */
#define MPTCPOPT_MP_CAPABLE 0
@@ -67,6 +76,9 @@
#define TCPOLEN_MPTCP_PRIO_ALIGN 4
#define TCPOLEN_MPTCP_FASTCLOSE 12
#define TCPOLEN_MPTCP_RST 4
+#define TCPOLEN_MPTCP_FAIL 12
+
+#define TCPOLEN_MPTCP_MPC_ACK_DATA_CSUM (TCPOLEN_MPTCP_DSS_CHECKSUM + TCPOLEN_MPTCP_MPC_ACK_DATA)
/* MPTCP MP_JOIN flags */
#define MPTCPOPT_BACKUP BIT(0)
@@ -77,8 +89,9 @@
#define MPTCP_VERSION_MASK (0x0F)
#define MPTCP_CAP_CHECKSUM_REQD BIT(7)
#define MPTCP_CAP_EXTENSIBILITY BIT(6)
+#define MPTCP_CAP_DENY_JOIN_ID0 BIT(5)
#define MPTCP_CAP_HMAC_SHA256 BIT(0)
-#define MPTCP_CAP_FLAG_MASK (0x3F)
+#define MPTCP_CAP_FLAG_MASK (0x1F)
/* MPTCP DSS flags */
#define MPTCP_DSS_DATA_FIN BIT(4)
@@ -109,6 +122,7 @@
#define MPTCP_ERROR_REPORT 8
#define MPTCP_RETRANSMIT 9
#define MPTCP_WORK_SYNC_SETSOCKOPT 10
+#define MPTCP_CONNECTED 11
static inline bool before64(__u64 seq1, __u64 seq2)
{
@@ -124,33 +138,29 @@ struct mptcp_options_received {
u64 data_seq;
u32 subflow_seq;
u16 data_len;
- u16 mp_capable : 1,
- mp_join : 1,
- fastclose : 1,
- reset : 1,
- dss : 1,
- add_addr : 1,
- rm_addr : 1,
- mp_prio : 1,
- echo : 1,
- backup : 1;
+ __sum16 csum;
+ u16 suboptions;
u32 token;
u32 nonce;
- u64 thmac;
- u8 hmac[MPTCPOPT_HMAC_LEN];
- u8 join_id;
- u8 use_map:1,
+ u16 use_map:1,
dsn64:1,
data_fin:1,
use_ack:1,
ack64:1,
mpc_map:1,
+ reset_reason:4,
+ reset_transient:1,
+ echo:1,
+ backup:1,
+ deny_join_id0:1,
__unused:2;
+ u8 join_id;
+ u64 thmac;
+ u8 hmac[MPTCPOPT_HMAC_LEN];
struct mptcp_addr_info addr;
struct mptcp_rm_list rm_list;
u64 ahmac;
- u8 reset_reason:4;
- u8 reset_transient:1;
+ u64 fail_seq;
};
static inline __be32 mptcp_option(u8 subopt, u8 len, u8 nib, u8 field)
@@ -171,8 +181,6 @@ enum mptcp_pm_status {
enum mptcp_addr_signal_status {
MPTCP_ADD_ADDR_SIGNAL,
MPTCP_ADD_ADDR_ECHO,
- MPTCP_ADD_ADDR_IPV6,
- MPTCP_ADD_ADDR_PORT,
MPTCP_RM_ADDR_SIGNAL,
};
@@ -188,6 +196,7 @@ struct mptcp_pm_data {
bool work_pending;
bool accept_addr;
bool accept_subflow;
+ bool remote_deny_join_id0;
u8 add_addr_signaled;
u8 add_addr_accepted;
u8 local_addr_used;
@@ -222,27 +231,30 @@ struct mptcp_sock {
struct sock *last_snd;
int snd_burst;
int old_wspace;
+ u64 recovery_snd_nxt; /* in recovery mode accept up to this seq;
+ * recovery related fields are under data_lock
+ * protection
+ */
u64 snd_una;
u64 wnd_end;
unsigned long timer_ival;
u32 token;
int rmem_released;
unsigned long flags;
+ bool recovery; /* closing subflow write queue reinjected */
bool can_ack;
bool fully_established;
bool rcv_data_fin;
bool snd_data_fin_enable;
bool rcv_fastclose;
bool use_64bit_ack; /* Set when we received a 64-bit DSN */
+ bool csum_enabled;
spinlock_t join_list_lock;
- struct sock *ack_hint;
struct work_struct work;
struct sk_buff *ooo_last_skb;
struct rb_root out_of_order_queue;
struct sk_buff_head receive_queue;
- struct sk_buff_head skb_tx_cache; /* this is wmem accounted */
int tx_pending_data;
- int size_goal_cache;
struct list_head conn_list;
struct list_head rtx_queue;
struct mptcp_data_frag *first_pending;
@@ -290,9 +302,17 @@ static inline struct mptcp_sock *mptcp_sk(const struct sock *sk)
return (struct mptcp_sock *)sk;
}
+/* the msk socket don't use the backlog, also account for the bulk
+ * free memory
+ */
+static inline int __mptcp_rmem(const struct sock *sk)
+{
+ return atomic_read(&sk->sk_rmem_alloc) - READ_ONCE(mptcp_sk(sk)->rmem_released);
+}
+
static inline int __mptcp_space(const struct sock *sk)
{
- return tcp_space(sk) + READ_ONCE(mptcp_sk(sk)->rmem_released);
+ return tcp_win_from_space(sk, READ_ONCE(sk->sk_rcvbuf) - __mptcp_rmem(sk));
}
static inline struct mptcp_data_frag *mptcp_send_head(const struct sock *sk)
@@ -335,11 +355,20 @@ static inline struct mptcp_data_frag *mptcp_rtx_head(const struct sock *sk)
return list_first_entry_or_null(&msk->rtx_queue, struct mptcp_data_frag, list);
}
+struct csum_pseudo_header {
+ __be64 data_seq;
+ __be32 subflow_seq;
+ __be16 data_len;
+ __sum16 csum;
+};
+
struct mptcp_subflow_request_sock {
struct tcp_request_sock sk;
u16 mp_capable : 1,
mp_join : 1,
- backup : 1;
+ backup : 1,
+ csum_reqd : 1,
+ allow_join_id0 : 1;
u8 local_id;
u8 remote_id;
u64 local_key;
@@ -386,6 +415,8 @@ struct mptcp_subflow_context {
u32 map_subflow_seq;
u32 ssn_offset;
u32 map_data_len;
+ __wsum map_data_csum;
+ u32 map_csum_len;
u32 request_mptcp : 1, /* send MP_CAPABLE */
request_join : 1, /* send MP_JOIN */
request_bkup : 1,
@@ -395,12 +426,16 @@ struct mptcp_subflow_context {
pm_notified : 1, /* PM hook called for established status */
conn_finished : 1,
map_valid : 1,
+ map_csum_reqd : 1,
+ map_data_fin : 1,
mpc_map : 1,
backup : 1,
send_mp_prio : 1,
+ send_mp_fail : 1,
rx_eof : 1,
can_ack : 1, /* only after processing the remote a key */
- disposable : 1; /* ctx can be free at ulp release time */
+ disposable : 1, /* ctx can be free at ulp release time */
+ stale : 1; /* unable to snd/rcv data, do not use for xmit */
enum mptcp_data_avail data_avail;
u32 remote_nonce;
u64 thmac;
@@ -412,11 +447,13 @@ struct mptcp_subflow_context {
u8 reset_seen:1;
u8 reset_transient:1;
u8 reset_reason:4;
+ u8 stale_count;
long delegated_status;
struct list_head delegated_node; /* link into delegated_action, protected by local BH */
- u32 setsockopt_seq;
+ u32 setsockopt_seq;
+ u32 stale_rcv_tstamp;
struct sock *tcp_sock; /* tcp sk backpointer */
struct sock *conn; /* parent mptcp_sock */
@@ -522,29 +559,34 @@ static inline void mptcp_subflow_delegated_done(struct mptcp_subflow_context *su
clear_bit(MPTCP_DELEGATE_SEND, &subflow->delegated_status);
}
-int mptcp_is_enabled(struct net *net);
-unsigned int mptcp_get_add_addr_timeout(struct net *net);
+int mptcp_is_enabled(const struct net *net);
+unsigned int mptcp_get_add_addr_timeout(const struct net *net);
+int mptcp_is_checksum_enabled(const struct net *net);
+int mptcp_allow_join_id0(const struct net *net);
+unsigned int mptcp_stale_loss_cnt(const struct net *net);
void mptcp_subflow_fully_established(struct mptcp_subflow_context *subflow,
struct mptcp_options_received *mp_opt);
+bool __mptcp_retransmit_pending_data(struct sock *sk);
+void __mptcp_push_pending(struct sock *sk, unsigned int flags);
bool mptcp_subflow_data_available(struct sock *sk);
void __init mptcp_subflow_init(void);
void mptcp_subflow_shutdown(struct sock *sk, struct sock *ssk, int how);
void mptcp_close_ssk(struct sock *sk, struct sock *ssk,
struct mptcp_subflow_context *subflow);
+void mptcp_subflow_send_ack(struct sock *ssk);
void mptcp_subflow_reset(struct sock *ssk);
void mptcp_sock_graft(struct sock *sk, struct socket *parent);
struct socket *__mptcp_nmpc_socket(const struct mptcp_sock *msk);
/* called with sk socket lock held */
int __mptcp_subflow_connect(struct sock *sk, const struct mptcp_addr_info *loc,
- const struct mptcp_addr_info *remote,
- u8 flags, int ifindex);
+ const struct mptcp_addr_info *remote);
int mptcp_subflow_create_socket(struct sock *sk, struct socket **new_sock);
void mptcp_info2sockaddr(const struct mptcp_addr_info *info,
struct sockaddr_storage *addr,
unsigned short family);
-static inline bool mptcp_subflow_active(struct mptcp_subflow_context *subflow)
+static inline bool __mptcp_subflow_active(struct mptcp_subflow_context *subflow)
{
struct sock *ssk = mptcp_subflow_tcp_sock(subflow);
@@ -556,6 +598,10 @@ static inline bool mptcp_subflow_active(struct mptcp_subflow_context *subflow)
return ((1 << ssk->sk_state) & (TCPF_ESTABLISHED | TCPF_CLOSE_WAIT));
}
+void mptcp_subflow_set_active(struct mptcp_subflow_context *subflow);
+
+bool mptcp_subflow_active(struct mptcp_subflow_context *subflow);
+
static inline void mptcp_subflow_tcp_fallback(struct sock *sk,
struct mptcp_subflow_context *ctx)
{
@@ -567,6 +613,19 @@ static inline void mptcp_subflow_tcp_fallback(struct sock *sk,
inet_csk(sk)->icsk_af_ops = ctx->icsk_af_ops;
}
+static inline bool mptcp_has_another_subflow(struct sock *ssk)
+{
+ struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(ssk), *tmp;
+ struct mptcp_sock *msk = mptcp_sk(subflow->conn);
+
+ mptcp_for_each_subflow(msk, tmp) {
+ if (tmp != subflow)
+ return true;
+ }
+
+ return false;
+}
+
void __init mptcp_proto_init(void);
#if IS_ENABLED(CONFIG_MPTCP_IPV6)
int __init mptcp_proto_v6_init(void);
@@ -575,10 +634,12 @@ int __init mptcp_proto_v6_init(void);
struct sock *mptcp_sk_clone(const struct sock *sk,
const struct mptcp_options_received *mp_opt,
struct request_sock *req);
-void mptcp_get_options(const struct sk_buff *skb,
+void mptcp_get_options(const struct sock *sk,
+ const struct sk_buff *skb,
struct mptcp_options_received *mp_opt);
void mptcp_finish_connect(struct sock *sk);
+void __mptcp_set_connected(struct sock *sk);
static inline bool mptcp_is_fully_established(struct sock *sk)
{
return inet_sk_state_load(sk) == TCP_ESTABLISHED &&
@@ -593,6 +654,14 @@ int mptcp_setsockopt(struct sock *sk, int level, int optname,
int mptcp_getsockopt(struct sock *sk, int level, int optname,
char __user *optval, int __user *option);
+u64 __mptcp_expand_seq(u64 old_seq, u64 cur_seq);
+static inline u64 mptcp_expand_seq(u64 old_seq, u64 cur_seq, bool use_64bit)
+{
+ if (use_64bit)
+ return cur_seq;
+
+ return __mptcp_expand_seq(old_seq, cur_seq);
+}
void __mptcp_check_push(struct sock *sk, struct sock *ssk);
void __mptcp_data_acked(struct sock *sk);
void __mptcp_error_report(struct sock *sk);
@@ -626,6 +695,8 @@ static inline void mptcp_write_space(struct sock *sk)
void mptcp_destroy_common(struct mptcp_sock *msk);
+#define MPTCP_TOKEN_MAX_RETRIES 4
+
void __init mptcp_token_init(void);
static inline void mptcp_token_init_request(struct request_sock *req)
{
@@ -649,6 +720,8 @@ void mptcp_crypto_hmac_sha(u64 key1, u64 key2, u8 *msg, int len, void *hmac);
void __init mptcp_pm_init(void);
void mptcp_pm_data_init(struct mptcp_sock *msk);
+void mptcp_pm_subflow_chk_stale(const struct mptcp_sock *msk, struct sock *ssk);
+void mptcp_pm_nl_subflow_chk_stale(const struct mptcp_sock *msk, struct sock *ssk);
void mptcp_pm_new_connection(struct mptcp_sock *msk, const struct sock *ssk, int server_side);
void mptcp_pm_fully_established(struct mptcp_sock *msk, const struct sock *ssk, gfp_t gfp);
bool mptcp_pm_allow_new_subflow(struct mptcp_sock *msk);
@@ -667,6 +740,7 @@ void mptcp_pm_mp_prio_received(struct sock *sk, u8 bkup);
int mptcp_pm_nl_mp_prio_send_ack(struct mptcp_sock *msk,
struct mptcp_addr_info *addr,
u8 bkup);
+void mptcp_pm_mp_fail_received(struct sock *sk, u64 fail_seq);
void mptcp_pm_free_anno_list(struct mptcp_sock *msk);
bool mptcp_pm_sport_in_anno_list(struct mptcp_sock *msk, const struct sock *sk);
struct mptcp_pm_add_entry *
@@ -675,6 +749,8 @@ mptcp_pm_del_add_timer(struct mptcp_sock *msk,
struct mptcp_pm_add_entry *
mptcp_lookup_anno_list_by_saddr(struct mptcp_sock *msk,
struct mptcp_addr_info *addr);
+int mptcp_pm_get_flags_and_ifindex_by_id(struct net *net, unsigned int id,
+ u8 *flags, int *ifindex);
int mptcp_pm_announce_addr(struct mptcp_sock *msk,
const struct mptcp_addr_info *addr,
@@ -689,22 +765,18 @@ void mptcp_event_addr_removed(const struct mptcp_sock *msk, u8 id);
static inline bool mptcp_pm_should_add_signal(struct mptcp_sock *msk)
{
- return READ_ONCE(msk->pm.addr_signal) & BIT(MPTCP_ADD_ADDR_SIGNAL);
+ return READ_ONCE(msk->pm.addr_signal) &
+ (BIT(MPTCP_ADD_ADDR_SIGNAL) | BIT(MPTCP_ADD_ADDR_ECHO));
}
-static inline bool mptcp_pm_should_add_signal_echo(struct mptcp_sock *msk)
-{
- return READ_ONCE(msk->pm.addr_signal) & BIT(MPTCP_ADD_ADDR_ECHO);
-}
-
-static inline bool mptcp_pm_should_add_signal_ipv6(struct mptcp_sock *msk)
+static inline bool mptcp_pm_should_add_signal_addr(struct mptcp_sock *msk)
{
- return READ_ONCE(msk->pm.addr_signal) & BIT(MPTCP_ADD_ADDR_IPV6);
+ return READ_ONCE(msk->pm.addr_signal) & BIT(MPTCP_ADD_ADDR_SIGNAL);
}
-static inline bool mptcp_pm_should_add_signal_port(struct mptcp_sock *msk)
+static inline bool mptcp_pm_should_add_signal_echo(struct mptcp_sock *msk)
{
- return READ_ONCE(msk->pm.addr_signal) & BIT(MPTCP_ADD_ADDR_PORT);
+ return READ_ONCE(msk->pm.addr_signal) & BIT(MPTCP_ADD_ADDR_ECHO);
}
static inline bool mptcp_pm_should_rm_signal(struct mptcp_sock *msk)
@@ -735,8 +807,10 @@ static inline int mptcp_rm_addr_len(const struct mptcp_rm_list *rm_list)
return TCPOLEN_MPTCP_RM_ADDR_BASE + roundup(rm_list->nr - 1, 4) + 1;
}
-bool mptcp_pm_add_addr_signal(struct mptcp_sock *msk, unsigned int remaining,
- struct mptcp_addr_info *saddr, bool *echo, bool *port);
+bool mptcp_pm_add_addr_signal(struct mptcp_sock *msk, struct sk_buff *skb,
+ unsigned int opt_size, unsigned int remaining,
+ struct mptcp_addr_info *addr, bool *echo,
+ bool *port, bool *drop_other_suboptions);
bool mptcp_pm_rm_addr_signal(struct mptcp_sock *msk, unsigned int remaining,
struct mptcp_rm_list *rm_list);
int mptcp_pm_get_local_id(struct mptcp_sock *msk, struct sock_common *skc);
@@ -752,9 +826,6 @@ unsigned int mptcp_pm_get_add_addr_accept_max(struct mptcp_sock *msk);
unsigned int mptcp_pm_get_subflows_max(struct mptcp_sock *msk);
unsigned int mptcp_pm_get_local_addr_max(struct mptcp_sock *msk);
-int mptcp_setsockopt(struct sock *sk, int level, int optname,
- sockptr_t optval, unsigned int optlen);
-
void mptcp_sockopt_sync(struct mptcp_sock *msk, struct sock *ssk);
void mptcp_sockopt_sync_all(struct mptcp_sock *msk);
diff --git a/net/mptcp/sockopt.c b/net/mptcp/sockopt.c
index a79798189599..8c03afac5ca0 100644
--- a/net/mptcp/sockopt.c
+++ b/net/mptcp/sockopt.c
@@ -140,8 +140,34 @@ static void mptcp_so_incoming_cpu(struct mptcp_sock *msk, int val)
mptcp_sol_socket_sync_intval(msk, SO_INCOMING_CPU, val);
}
+static int mptcp_setsockopt_sol_socket_tstamp(struct mptcp_sock *msk, int optname, int val)
+{
+ sockptr_t optval = KERNEL_SOCKPTR(&val);
+ struct mptcp_subflow_context *subflow;
+ struct sock *sk = (struct sock *)msk;
+ int ret;
+
+ ret = sock_setsockopt(sk->sk_socket, SOL_SOCKET, optname,
+ optval, sizeof(val));
+ if (ret)
+ return ret;
+
+ lock_sock(sk);
+ mptcp_for_each_subflow(msk, subflow) {
+ struct sock *ssk = mptcp_subflow_tcp_sock(subflow);
+ bool slow = lock_sock_fast(ssk);
+
+ sock_set_timestamp(sk, optname, !!val);
+ unlock_sock_fast(ssk, slow);
+ }
+
+ release_sock(sk);
+ return 0;
+}
+
static int mptcp_setsockopt_sol_socket_int(struct mptcp_sock *msk, int optname,
- sockptr_t optval, unsigned int optlen)
+ sockptr_t optval,
+ unsigned int optlen)
{
int val, ret;
@@ -164,11 +190,60 @@ static int mptcp_setsockopt_sol_socket_int(struct mptcp_sock *msk, int optname,
case SO_INCOMING_CPU:
mptcp_so_incoming_cpu(msk, val);
return 0;
+ case SO_TIMESTAMP_OLD:
+ case SO_TIMESTAMP_NEW:
+ case SO_TIMESTAMPNS_OLD:
+ case SO_TIMESTAMPNS_NEW:
+ return mptcp_setsockopt_sol_socket_tstamp(msk, optname, val);
}
return -ENOPROTOOPT;
}
+static int mptcp_setsockopt_sol_socket_timestamping(struct mptcp_sock *msk,
+ int optname,
+ sockptr_t optval,
+ unsigned int optlen)
+{
+ struct mptcp_subflow_context *subflow;
+ struct sock *sk = (struct sock *)msk;
+ struct so_timestamping timestamping;
+ int ret;
+
+ if (optlen == sizeof(timestamping)) {
+ if (copy_from_sockptr(&timestamping, optval,
+ sizeof(timestamping)))
+ return -EFAULT;
+ } else if (optlen == sizeof(int)) {
+ memset(&timestamping, 0, sizeof(timestamping));
+
+ if (copy_from_sockptr(&timestamping.flags, optval, sizeof(int)))
+ return -EFAULT;
+ } else {
+ return -EINVAL;
+ }
+
+ ret = sock_setsockopt(sk->sk_socket, SOL_SOCKET, optname,
+ KERNEL_SOCKPTR(&timestamping),
+ sizeof(timestamping));
+ if (ret)
+ return ret;
+
+ lock_sock(sk);
+
+ mptcp_for_each_subflow(msk, subflow) {
+ struct sock *ssk = mptcp_subflow_tcp_sock(subflow);
+ bool slow = lock_sock_fast(ssk);
+
+ sock_set_timestamping(sk, optname, timestamping);
+ unlock_sock_fast(ssk, slow);
+ }
+
+ release_sock(sk);
+
+ return 0;
+}
+
static int mptcp_setsockopt_sol_socket_linger(struct mptcp_sock *msk, sockptr_t optval,
unsigned int optlen)
{
@@ -251,9 +326,26 @@ static int mptcp_setsockopt_sol_socket(struct mptcp_sock *msk, int optname,
case SO_MARK:
case SO_INCOMING_CPU:
case SO_DEBUG:
- return mptcp_setsockopt_sol_socket_int(msk, optname, optval, optlen);
+ case SO_TIMESTAMP_OLD:
+ case SO_TIMESTAMP_NEW:
+ case SO_TIMESTAMPNS_OLD:
+ case SO_TIMESTAMPNS_NEW:
+ return mptcp_setsockopt_sol_socket_int(msk, optname, optval,
+ optlen);
+ case SO_TIMESTAMPING_OLD:
+ case SO_TIMESTAMPING_NEW:
+ return mptcp_setsockopt_sol_socket_timestamping(msk, optname,
+ optval, optlen);
case SO_LINGER:
return mptcp_setsockopt_sol_socket_linger(msk, optval, optlen);
+ case SO_RCVLOWAT:
+ case SO_RCVTIMEO_OLD:
+ case SO_RCVTIMEO_NEW:
+ case SO_BUSY_POLL:
+ case SO_PREFER_BUSY_POLL:
+ case SO_BUSY_POLL_BUDGET:
+ /* No need to copy: only relevant for msk */
+ return sock_setsockopt(sk->sk_socket, SOL_SOCKET, optname, optval, optlen);
case SO_NO_CHECK:
case SO_DONTROUTE:
case SO_BROADCAST:
@@ -267,7 +359,24 @@ static int mptcp_setsockopt_sol_socket(struct mptcp_sock *msk, int optname,
return 0;
}
- return sock_setsockopt(sk->sk_socket, SOL_SOCKET, optname, optval, optlen);
+ /* SO_OOBINLINE is not supported, let's avoid the related mess
+ * SO_ATTACH_FILTER, SO_ATTACH_BPF, SO_ATTACH_REUSEPORT_CBPF,
+ * SO_DETACH_REUSEPORT_BPF, SO_DETACH_FILTER, SO_LOCK_FILTER,
+ * we must be careful with subflows
+ *
+ * SO_ATTACH_REUSEPORT_EBPF is not supported, at it checks
+ * explicitly the sk_protocol field
+ *
+ * SO_PEEK_OFF is unsupported, as it is for plain TCP
+ * SO_MAX_PACING_RATE is unsupported, we must be careful with subflows
+ * SO_CNX_ADVICE is currently unsupported, could possibly be relevant,
+ * but likely needs careful design
+ *
+ * SO_ZEROCOPY is currently unsupported, TODO in sndmsg
+ * SO_TXTIME is currently unsupported
+ */
+
+ return -EOPNOTSUPP;
}
static int mptcp_setsockopt_v6(struct mptcp_sock *msk, int optname,
@@ -299,72 +408,6 @@ static int mptcp_setsockopt_v6(struct mptcp_sock *msk, int optname,
static bool mptcp_supported_sockopt(int level, int optname)
{
- if (level == SOL_SOCKET) {
- switch (optname) {
- case SO_DEBUG:
- case SO_REUSEPORT:
- case SO_REUSEADDR:
-
- /* the following ones need a better implementation,
- * but are quite common we want to preserve them
- */
- case SO_BINDTODEVICE:
- case SO_SNDBUF:
- case SO_SNDBUFFORCE:
- case SO_RCVBUF:
- case SO_RCVBUFFORCE:
- case SO_KEEPALIVE:
- case SO_PRIORITY:
- case SO_LINGER:
- case SO_TIMESTAMP_OLD:
- case SO_TIMESTAMP_NEW:
- case SO_TIMESTAMPNS_OLD:
- case SO_TIMESTAMPNS_NEW:
- case SO_TIMESTAMPING_OLD:
- case SO_TIMESTAMPING_NEW:
- case SO_RCVLOWAT:
- case SO_RCVTIMEO_OLD:
- case SO_RCVTIMEO_NEW:
- case SO_SNDTIMEO_OLD:
- case SO_SNDTIMEO_NEW:
- case SO_MARK:
- case SO_INCOMING_CPU:
- case SO_BINDTOIFINDEX:
- case SO_BUSY_POLL:
- case SO_PREFER_BUSY_POLL:
- case SO_BUSY_POLL_BUDGET:
-
- /* next ones are no-op for plain TCP */
- case SO_NO_CHECK:
- case SO_DONTROUTE:
- case SO_BROADCAST:
- case SO_BSDCOMPAT:
- case SO_PASSCRED:
- case SO_PASSSEC:
- case SO_RXQ_OVFL:
- case SO_WIFI_STATUS:
- case SO_NOFCS:
- case SO_SELECT_ERR_QUEUE:
- return true;
- }
-
- /* SO_OOBINLINE is not supported, let's avoid the related mess */
- /* SO_ATTACH_FILTER, SO_ATTACH_BPF, SO_ATTACH_REUSEPORT_CBPF,
- * SO_DETACH_REUSEPORT_BPF, SO_DETACH_FILTER, SO_LOCK_FILTER,
- * we must be careful with subflows
- */
- /* SO_ATTACH_REUSEPORT_EBPF is not supported, at it checks
- * explicitly the sk_protocol field
- */
- /* SO_PEEK_OFF is unsupported, as it is for plain TCP */
- /* SO_MAX_PACING_RATE is unsupported, we must be careful with subflows */
- /* SO_CNX_ADVICE is currently unsupported, could possibly be relevant,
- * but likely needs careful design
- */
- /* SO_ZEROCOPY is currently unsupported, TODO in sndmsg */
- /* SO_TXTIME is currently unsupported */
- return false;
- }
if (level == SOL_IP) {
switch (optname) {
/* should work fine */
@@ -574,12 +617,12 @@ int mptcp_setsockopt(struct sock *sk, int level, int optname,
pr_debug("msk=%p", msk);
- if (!mptcp_supported_sockopt(level, optname))
- return -ENOPROTOOPT;
-
if (level == SOL_SOCKET)
return mptcp_setsockopt_sol_socket(msk, optname, optval, optlen);
+ if (!mptcp_supported_sockopt(level, optname))
+ return -ENOPROTOOPT;
+
/* @@ the meaning of setsockopt() when the socket is connected and
* there are multiple subflows is not yet defined. It is up to the
* MPTCP-level socket to configure the subflows until the subflow
diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c
index be1de4084196..1de7ce883c37 100644
--- a/net/mptcp/subflow.c
+++ b/net/mptcp/subflow.c
@@ -108,6 +108,8 @@ static void subflow_init_req(struct request_sock *req, const struct sock *sk_lis
subflow_req->mp_capable = 0;
subflow_req->mp_join = 0;
+ subflow_req->csum_reqd = mptcp_is_checksum_enabled(sock_net(sk_listener));
+ subflow_req->allow_join_id0 = mptcp_allow_join_id0(sock_net(sk_listener));
subflow_req->msk = NULL;
mptcp_token_init_request(req);
}
@@ -139,6 +141,7 @@ static int subflow_check_req(struct request_sock *req,
struct mptcp_subflow_context *listener = mptcp_subflow_ctx(sk_listener);
struct mptcp_subflow_request_sock *subflow_req = mptcp_subflow_rsk(req);
struct mptcp_options_received mp_opt;
+ bool opt_mp_capable, opt_mp_join;
pr_debug("subflow_req=%p, listener=%p", subflow_req, listener);
@@ -150,19 +153,21 @@ static int subflow_check_req(struct request_sock *req,
return -EINVAL;
#endif
- mptcp_get_options(skb, &mp_opt);
+ mptcp_get_options(sk_listener, skb, &mp_opt);
- if (mp_opt.mp_capable) {
+ opt_mp_capable = !!(mp_opt.suboptions & OPTIONS_MPTCP_MPC);
+ opt_mp_join = !!(mp_opt.suboptions & OPTIONS_MPTCP_MPJ);
+ if (opt_mp_capable) {
SUBFLOW_REQ_INC_STATS(req, MPTCP_MIB_MPCAPABLEPASSIVE);
- if (mp_opt.mp_join)
+ if (opt_mp_join)
return 0;
- } else if (mp_opt.mp_join) {
+ } else if (opt_mp_join) {
SUBFLOW_REQ_INC_STATS(req, MPTCP_MIB_JOINSYNRX);
}
- if (mp_opt.mp_capable && listener->request_mptcp) {
- int err, retries = 4;
+ if (opt_mp_capable && listener->request_mptcp) {
+ int err, retries = MPTCP_TOKEN_MAX_RETRIES;
subflow_req->ssn_offset = TCP_SKB_CB(skb)->seq;
again:
@@ -192,7 +197,7 @@ again:
else
SUBFLOW_REQ_INC_STATS(req, MPTCP_MIB_TOKENFALLBACKINIT);
- } else if (mp_opt.mp_join && listener->request_mptcp) {
+ } else if (opt_mp_join && listener->request_mptcp) {
subflow_req->ssn_offset = TCP_SKB_CB(skb)->seq;
subflow_req->mp_join = 1;
subflow_req->backup = mp_opt.backup;
@@ -212,11 +217,6 @@ again:
ntohs(inet_sk(sk_listener)->inet_sport),
ntohs(inet_sk((struct sock *)subflow_req->msk)->inet_sport));
if (!mptcp_pm_sport_in_anno_list(subflow_req->msk, sk_listener)) {
- sock_put((struct sock *)subflow_req->msk);
- mptcp_token_destroy_request(req);
- tcp_request_sock_ops.destructor(req);
- subflow_req->msk = NULL;
- subflow_req->mp_join = 0;
SUBFLOW_REQ_INC_STATS(req, MPTCP_MIB_MISMATCHPORTSYNRX);
return -EPERM;
}
@@ -228,6 +228,8 @@ again:
if (unlikely(req->syncookie)) {
if (mptcp_can_accept_new_subflow(subflow_req->msk))
subflow_init_req_cookie_join_save(subflow_req, skb);
+ else
+ return -EPERM;
}
pr_debug("token=%u, remote_nonce=%u msk=%p", subflow_req->token,
@@ -244,15 +246,18 @@ int mptcp_subflow_init_cookie_req(struct request_sock *req,
struct mptcp_subflow_context *listener = mptcp_subflow_ctx(sk_listener);
struct mptcp_subflow_request_sock *subflow_req = mptcp_subflow_rsk(req);
struct mptcp_options_received mp_opt;
+ bool opt_mp_capable, opt_mp_join;
int err;
subflow_init_req(req, sk_listener);
- mptcp_get_options(skb, &mp_opt);
+ mptcp_get_options(sk_listener, skb, &mp_opt);
- if (mp_opt.mp_capable && mp_opt.mp_join)
+ opt_mp_capable = !!(mp_opt.suboptions & OPTIONS_MPTCP_MPC);
+ opt_mp_join = !!(mp_opt.suboptions & OPTIONS_MPTCP_MPJ);
+ if (opt_mp_capable && opt_mp_join)
return -EINVAL;
- if (mp_opt.mp_capable && listener->request_mptcp) {
+ if (opt_mp_capable && listener->request_mptcp) {
if (mp_opt.sndr_key == 0)
return -EINVAL;
@@ -263,13 +268,11 @@ int mptcp_subflow_init_cookie_req(struct request_sock *req,
subflow_req->mp_capable = 1;
subflow_req->ssn_offset = TCP_SKB_CB(skb)->seq - 1;
- } else if (mp_opt.mp_join && listener->request_mptcp) {
+ } else if (opt_mp_join && listener->request_mptcp) {
if (!mptcp_token_join_cookie_init_state(subflow_req, skb))
return -EINVAL;
- if (mptcp_can_accept_new_subflow(subflow_req->msk))
- subflow_req->mp_join = 1;
-
+ subflow_req->mp_join = 1;
subflow_req->ssn_offset = TCP_SKB_CB(skb)->seq - 1;
}
@@ -371,6 +374,24 @@ static bool subflow_use_different_dport(struct mptcp_sock *msk, const struct soc
return inet_sk(sk)->inet_dport != inet_sk((struct sock *)msk)->inet_dport;
}
+void __mptcp_set_connected(struct sock *sk)
+{
+ if (sk->sk_state == TCP_SYN_SENT) {
+ inet_sk_state_store(sk, TCP_ESTABLISHED);
+ sk->sk_state_change(sk);
+ }
+}
+
+static void mptcp_set_connected(struct sock *sk)
+{
+ mptcp_data_lock(sk);
+ if (!sock_owned_by_user(sk))
+ __mptcp_set_connected(sk);
+ else
+ set_bit(MPTCP_CONNECTED, &mptcp_sk(sk)->flags);
+ mptcp_data_unlock(sk);
+}
+
static void subflow_finish_connect(struct sock *sk, const struct sk_buff *skb)
{
struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk);
@@ -379,11 +400,6 @@ static void subflow_finish_connect(struct sock *sk, const struct sk_buff *skb)
subflow->icsk_af_ops->sk_rx_dst_set(sk, skb);
- if (inet_sk_state_load(parent) == TCP_SYN_SENT) {
- inet_sk_state_store(parent, TCP_ESTABLISHED);
- parent->sk_state_change(parent);
- }
-
/* be sure no special action on any packet other than syn-ack */
if (subflow->conn_finished)
return;
@@ -394,9 +410,9 @@ static void subflow_finish_connect(struct sock *sk, const struct sk_buff *skb)
subflow->ssn_offset = TCP_SKB_CB(skb)->seq;
pr_debug("subflow=%p synack seq=%x", subflow, subflow->ssn_offset);
- mptcp_get_options(skb, &mp_opt);
+ mptcp_get_options(sk, skb, &mp_opt);
if (subflow->request_mptcp) {
- if (!mp_opt.mp_capable) {
+ if (!(mp_opt.suboptions & OPTIONS_MPTCP_MPC)) {
MPTCP_INC_STATS(sock_net(sk),
MPTCP_MIB_MPCAPABLEACTIVEFALLBACK);
mptcp_do_fallback(sk);
@@ -404,6 +420,10 @@ static void subflow_finish_connect(struct sock *sk, const struct sk_buff *skb)
goto fallback;
}
+ if (mp_opt.suboptions & OPTION_MPTCP_CSUMREQD)
+ WRITE_ONCE(mptcp_sk(parent)->csum_enabled, true);
+ if (mp_opt.deny_join_id0)
+ WRITE_ONCE(mptcp_sk(parent)->pm.remote_deny_join_id0, true);
subflow->mp_capable = 1;
subflow->can_ack = 1;
subflow->remote_key = mp_opt.sndr_key;
@@ -411,18 +431,21 @@ static void subflow_finish_connect(struct sock *sk, const struct sk_buff *skb)
subflow->remote_key);
MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_MPCAPABLEACTIVEACK);
mptcp_finish_connect(sk);
+ mptcp_set_connected(parent);
} else if (subflow->request_join) {
u8 hmac[SHA256_DIGEST_SIZE];
- if (!mp_opt.mp_join) {
+ if (!(mp_opt.suboptions & OPTIONS_MPTCP_MPJ)) {
subflow->reset_reason = MPTCP_RST_EMPTCP;
goto do_reset;
}
+ subflow->backup = mp_opt.backup;
subflow->thmac = mp_opt.thmac;
subflow->remote_nonce = mp_opt.nonce;
- pr_debug("subflow=%p, thmac=%llu, remote_nonce=%u", subflow,
- subflow->thmac, subflow->remote_nonce);
+ pr_debug("subflow=%p, thmac=%llu, remote_nonce=%u backup=%d",
+ subflow, subflow->thmac, subflow->remote_nonce,
+ subflow->backup);
if (!subflow_thmac_valid(subflow)) {
MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_JOINACKMAC);
@@ -430,15 +453,15 @@ static void subflow_finish_connect(struct sock *sk, const struct sk_buff *skb)
goto do_reset;
}
+ if (!mptcp_finish_join(sk))
+ goto do_reset;
+
subflow_generate_hmac(subflow->local_key, subflow->remote_key,
subflow->local_nonce,
subflow->remote_nonce,
hmac);
memcpy(subflow->hmac, hmac, MPTCPOPT_HMAC_LEN);
- if (!mptcp_finish_join(sk))
- goto do_reset;
-
subflow->mp_join = 1;
MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_JOINSYNACKRX);
@@ -451,6 +474,7 @@ static void subflow_finish_connect(struct sock *sk, const struct sk_buff *skb)
} else if (mptcp_check_fallback(sk)) {
fallback:
mptcp_rcv_space_init(mptcp_sk(parent), sk);
+ mptcp_set_connected(parent);
}
return;
@@ -558,6 +582,7 @@ static void mptcp_sock_destruct(struct sock *sk)
static void mptcp_force_close(struct sock *sk)
{
+ /* the msk is not yet exposed to user-space */
inet_sk_state_store(sk, TCP_CLOSE);
sk_common_release(sk);
}
@@ -616,10 +641,10 @@ static struct sock *subflow_syn_recv_sock(const struct sock *sk,
pr_debug("listener=%p, req=%p, conn=%p", listener, req, listener->conn);
- /* After child creation we must look for 'mp_capable' even when options
+ /* After child creation we must look for MPC even when options
* are not parsed
*/
- mp_opt.mp_capable = 0;
+ mp_opt.suboptions = 0;
/* hopefully temporary handling for MP_JOIN+syncookie */
subflow_req = mptcp_subflow_rsk(req);
@@ -638,8 +663,8 @@ static struct sock *subflow_syn_recv_sock(const struct sock *sk,
* reordered MPC will cause fallback, but we don't have other
* options.
*/
- mptcp_get_options(skb, &mp_opt);
- if (!mp_opt.mp_capable) {
+ mptcp_get_options(sk, skb, &mp_opt);
+ if (!(mp_opt.suboptions & OPTIONS_MPTCP_MPC)) {
fallback = true;
goto create_child;
}
@@ -648,8 +673,9 @@ static struct sock *subflow_syn_recv_sock(const struct sock *sk,
if (!new_msk)
fallback = true;
} else if (subflow_req->mp_join) {
- mptcp_get_options(skb, &mp_opt);
- if (!mp_opt.mp_join || !subflow_hmac_valid(req, &mp_opt) ||
+ mptcp_get_options(sk, skb, &mp_opt);
+ if (!(mp_opt.suboptions & OPTIONS_MPTCP_MPJ) ||
+ !subflow_hmac_valid(req, &mp_opt) ||
!mptcp_can_accept_new_subflow(subflow_req->msk)) {
SUBFLOW_REQ_INC_STATS(req, MPTCP_MIB_JOINACKMAC);
fallback = true;
@@ -706,7 +732,7 @@ create_child:
/* with OoO packets we can reach here without ingress
* mpc option
*/
- if (mp_opt.mp_capable)
+ if (mp_opt.suboptions & OPTIONS_MPTCP_MPC)
mptcp_subflow_fully_established(ctx, &mp_opt);
} else if (ctx->mp_join) {
struct mptcp_sock *owner;
@@ -775,15 +801,6 @@ enum mapping_status {
MAPPING_DUMMY
};
-static u64 expand_seq(u64 old_seq, u16 old_data_len, u64 seq)
-{
- if ((u32)seq == (u32)old_seq)
- return old_seq;
-
- /* Assume map covers data not mapped yet. */
- return seq | ((old_seq + old_data_len + 1) & GENMASK_ULL(63, 32));
-}
-
static void dbg_bad_map(struct mptcp_subflow_context *subflow, u32 ssn)
{
pr_debug("Bad mapping: ssn=%d map_seq=%d map_data_len=%d",
@@ -824,10 +841,94 @@ static bool validate_mapping(struct sock *ssk, struct sk_buff *skb)
return true;
}
+static enum mapping_status validate_data_csum(struct sock *ssk, struct sk_buff *skb,
+ bool csum_reqd)
+{
+ struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(ssk);
+ struct csum_pseudo_header header;
+ u32 offset, seq, delta;
+ __wsum csum;
+ int len;
+
+ if (!csum_reqd)
+ return MAPPING_OK;
+
+ /* mapping already validated on previous traversal */
+ if (subflow->map_csum_len == subflow->map_data_len)
+ return MAPPING_OK;
+
+ /* traverse the receive queue, ensuring it contains a full
+ * DSS mapping and accumulating the related csum.
+ * Preserve the accoumlate csum across multiple calls, to compute
+ * the csum only once
+ */
+ delta = subflow->map_data_len - subflow->map_csum_len;
+ for (;;) {
+ seq = tcp_sk(ssk)->copied_seq + subflow->map_csum_len;
+ offset = seq - TCP_SKB_CB(skb)->seq;
+
+ /* if the current skb has not been accounted yet, csum its contents
+ * up to the amount covered by the current DSS
+ */
+ if (offset < skb->len) {
+ __wsum csum;
+
+ len = min(skb->len - offset, delta);
+ csum = skb_checksum(skb, offset, len, 0);
+ subflow->map_data_csum = csum_block_add(subflow->map_data_csum, csum,
+ subflow->map_csum_len);
+
+ delta -= len;
+ subflow->map_csum_len += len;
+ }
+ if (delta == 0)
+ break;
+
+ if (skb_queue_is_last(&ssk->sk_receive_queue, skb)) {
+ /* if this subflow is closed, the partial mapping
+ * will be never completed; flush the pending skbs, so
+ * that subflow_sched_work_if_closed() can kick in
+ */
+ if (unlikely(ssk->sk_state == TCP_CLOSE))
+ while ((skb = skb_peek(&ssk->sk_receive_queue)))
+ sk_eat_skb(ssk, skb);
+
+ /* not enough data to validate the csum */
+ return MAPPING_EMPTY;
+ }
+
+ /* the DSS mapping for next skbs will be validated later,
+ * when a get_mapping_status call will process such skb
+ */
+ skb = skb->next;
+ }
+
+ /* note that 'map_data_len' accounts only for the carried data, does
+ * not include the eventual seq increment due to the data fin,
+ * while the pseudo header requires the original DSS data len,
+ * including that
+ */
+ header.data_seq = cpu_to_be64(subflow->map_seq);
+ header.subflow_seq = htonl(subflow->map_subflow_seq);
+ header.data_len = htons(subflow->map_data_len + subflow->map_data_fin);
+ header.csum = 0;
+
+ csum = csum_partial(&header, sizeof(header), subflow->map_data_csum);
+ if (unlikely(csum_fold(csum))) {
+ MPTCP_INC_STATS(sock_net(ssk), MPTCP_MIB_DATACSUMERR);
+ subflow->send_mp_fail = 1;
+ MPTCP_INC_STATS(sock_net(ssk), MPTCP_MIB_MPFAILTX);
+ return subflow->mp_join ? MAPPING_INVALID : MAPPING_DUMMY;
+ }
+
+ return MAPPING_OK;
+}
+
static enum mapping_status get_mapping_status(struct sock *ssk,
struct mptcp_sock *msk)
{
struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(ssk);
+ bool csum_reqd = READ_ONCE(msk->csum_enabled);
struct mptcp_ext *mpext;
struct sk_buff *skb;
u16 data_len;
@@ -907,22 +1008,17 @@ static enum mapping_status get_mapping_status(struct sock *ssk,
data_len--;
}
- if (!mpext->dsn64) {
- map_seq = expand_seq(subflow->map_seq, subflow->map_data_len,
- mpext->data_seq);
- pr_debug("expanded seq=%llu", subflow->map_seq);
- } else {
- map_seq = mpext->data_seq;
- }
+ map_seq = mptcp_expand_seq(READ_ONCE(msk->ack_seq), mpext->data_seq, mpext->dsn64);
WRITE_ONCE(mptcp_sk(subflow->conn)->use_64bit_ack, !!mpext->dsn64);
if (subflow->map_valid) {
/* Allow replacing only with an identical map */
if (subflow->map_seq == map_seq &&
subflow->map_subflow_seq == mpext->subflow_seq &&
- subflow->map_data_len == data_len) {
+ subflow->map_data_len == data_len &&
+ subflow->map_csum_reqd == mpext->csum_reqd) {
skb_ext_del(skb, SKB_EXT_MPTCP);
- return MAPPING_OK;
+ goto validate_csum;
}
/* If this skb data are fully covered by the current mapping,
@@ -934,27 +1030,41 @@ static enum mapping_status get_mapping_status(struct sock *ssk,
}
/* will validate the next map after consuming the current one */
- return MAPPING_OK;
+ goto validate_csum;
}
subflow->map_seq = map_seq;
subflow->map_subflow_seq = mpext->subflow_seq;
subflow->map_data_len = data_len;
subflow->map_valid = 1;
+ subflow->map_data_fin = mpext->data_fin;
subflow->mpc_map = mpext->mpc_map;
- pr_debug("new map seq=%llu subflow_seq=%u data_len=%u",
+ subflow->map_csum_reqd = mpext->csum_reqd;
+ subflow->map_csum_len = 0;
+ subflow->map_data_csum = csum_unfold(mpext->csum);
+
+ /* Cfr RFC 8684 Section 3.3.0 */
+ if (unlikely(subflow->map_csum_reqd != csum_reqd))
+ return MAPPING_INVALID;
+
+ pr_debug("new map seq=%llu subflow_seq=%u data_len=%u csum=%d:%u",
subflow->map_seq, subflow->map_subflow_seq,
- subflow->map_data_len);
+ subflow->map_data_len, subflow->map_csum_reqd,
+ subflow->map_data_csum);
validate_seq:
/* we revalidate valid mapping on new skb, because we must ensure
* the current skb is completely covered by the available mapping
*/
- if (!validate_mapping(ssk, skb))
+ if (!validate_mapping(ssk, skb)) {
+ MPTCP_INC_STATS(sock_net(ssk), MPTCP_MIB_DSSTCPMISMATCH);
return MAPPING_INVALID;
+ }
skb_ext_del(skb, SKB_EXT_MPTCP);
- return MAPPING_OK;
+
+validate_csum:
+ return validate_data_csum(ssk, skb, csum_reqd);
}
static void mptcp_subflow_discard_data(struct sock *ssk, struct sk_buff *skb,
@@ -1055,6 +1165,20 @@ no_data:
fallback:
/* RFC 8684 section 3.7. */
+ if (subflow->send_mp_fail) {
+ if (mptcp_has_another_subflow(ssk)) {
+ while ((skb = skb_peek(&ssk->sk_receive_queue)))
+ sk_eat_skb(ssk, skb);
+ }
+ ssk->sk_err = EBADMSG;
+ tcp_set_state(ssk, TCP_CLOSE);
+ subflow->reset_transient = 0;
+ subflow->reset_reason = MPTCP_RST_EMIDDLEBOX;
+ tcp_send_active_reset(ssk, GFP_ATOMIC);
+ WRITE_ONCE(subflow->data_avail, 0);
+ return true;
+ }
+
if (subflow->mp_join || subflow->fully_established) {
/* fatal protocol error, close the socket.
* subflow_error_report() will introduce the appropriate barriers
@@ -1137,7 +1261,7 @@ void __mptcp_error_report(struct sock *sk)
/* This barrier is coupled with smp_rmb() in mptcp_poll() */
smp_wmb();
- sk->sk_error_report(sk);
+ sk_error_report(sk);
break;
}
}
@@ -1253,8 +1377,7 @@ void mptcp_info2sockaddr(const struct mptcp_addr_info *info,
}
int __mptcp_subflow_connect(struct sock *sk, const struct mptcp_addr_info *loc,
- const struct mptcp_addr_info *remote,
- u8 flags, int ifindex)
+ const struct mptcp_addr_info *remote)
{
struct mptcp_sock *msk = mptcp_sk(sk);
struct mptcp_subflow_context *subflow;
@@ -1265,6 +1388,8 @@ int __mptcp_subflow_connect(struct sock *sk, const struct mptcp_addr_info *loc,
struct sock *ssk;
u32 remote_token;
int addrlen;
+ int ifindex;
+ u8 flags;
int err;
if (!mptcp_is_fully_established(sk))
@@ -1288,6 +1413,8 @@ int __mptcp_subflow_connect(struct sock *sk, const struct mptcp_addr_info *loc,
local_id = err;
}
+ mptcp_pm_get_flags_and_ifindex_by_id(sock_net(sk), local_id,
+ &flags, &ifindex);
subflow->remote_key = msk->remote_key;
subflow->local_key = msk->local_key;
subflow->token = msk->token;
@@ -1489,10 +1616,7 @@ static void subflow_state_change(struct sock *sk)
mptcp_rcv_space_init(mptcp_sk(parent), sk);
pr_fallback(mptcp_sk(parent));
subflow->conn_finished = 1;
- if (inet_sk_state_load(parent) == TCP_SYN_SENT) {
- inet_sk_state_store(parent, TCP_ESTABLISHED);
- parent->sk_state_change(parent);
- }
+ mptcp_set_connected(parent);
}
/* as recvmsg() does not acquire the subflow socket for ssk selection
diff --git a/net/mptcp/syncookies.c b/net/mptcp/syncookies.c
index abe0fd099746..37127781aee9 100644
--- a/net/mptcp/syncookies.c
+++ b/net/mptcp/syncookies.c
@@ -37,7 +37,21 @@ static spinlock_t join_entry_locks[COOKIE_JOIN_SLOTS] __cacheline_aligned_in_smp
static u32 mptcp_join_entry_hash(struct sk_buff *skb, struct net *net)
{
- u32 i = skb_get_hash(skb) ^ net_hash_mix(net);
+ static u32 mptcp_join_hash_secret __read_mostly;
+ struct tcphdr *th = tcp_hdr(skb);
+ u32 seq, i;
+
+ net_get_random_once(&mptcp_join_hash_secret,
+ sizeof(mptcp_join_hash_secret));
+
+ if (th->syn)
+ seq = TCP_SKB_CB(skb)->seq;
+ else
+ seq = TCP_SKB_CB(skb)->seq - 1;
+
+ i = jhash_3words(seq, net_hash_mix(net),
+ (__force __u32)th->source << 16 | (__force __u32)th->dest,
+ mptcp_join_hash_secret);
return i % ARRAY_SIZE(join_entries);
}
diff --git a/net/mptcp/token.c b/net/mptcp/token.c
index 8f0270a780ce..a98e554b034f 100644
--- a/net/mptcp/token.c
+++ b/net/mptcp/token.c
@@ -33,7 +33,6 @@
#include <net/mptcp.h>
#include "protocol.h"
-#define TOKEN_MAX_RETRIES 4
#define TOKEN_MAX_CHAIN_LEN 4
struct token_bucket {
@@ -153,12 +152,9 @@ int mptcp_token_new_connect(struct sock *sk)
{
struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk);
struct mptcp_sock *msk = mptcp_sk(subflow->conn);
- int retries = TOKEN_MAX_RETRIES;
+ int retries = MPTCP_TOKEN_MAX_RETRIES;
struct token_bucket *bucket;
- pr_debug("ssk=%p, local_key=%llu, token=%u, idsn=%llu\n",
- sk, subflow->local_key, subflow->token, subflow->idsn);
-
again:
mptcp_crypto_key_gen_sha(&subflow->local_key, &subflow->token,
&subflow->idsn);
@@ -172,6 +168,9 @@ again:
goto again;
}
+ pr_debug("ssk=%p, local_key=%llu, token=%u, idsn=%llu\n",
+ sk, subflow->local_key, subflow->token, subflow->idsn);
+
WRITE_ONCE(msk->token, subflow->token);
__sk_nulls_add_node_rcu((struct sock *)msk, &bucket->msk_chain);
bucket->chain_len++;
diff --git a/net/ncsi/Kconfig b/net/ncsi/Kconfig
index 93309081f5a4..ea1dd32b6b1f 100644
--- a/net/ncsi/Kconfig
+++ b/net/ncsi/Kconfig
@@ -17,3 +17,9 @@ config NCSI_OEM_CMD_GET_MAC
help
This allows to get MAC address from NCSI firmware and set them back to
controller.
+config NCSI_OEM_CMD_KEEP_PHY
+ bool "Keep PHY Link up"
+ depends on NET_NCSI
+ help
+ This allows to keep PHY link up and prevents any channel resets during
+ the host load.
diff --git a/net/ncsi/internal.h b/net/ncsi/internal.h
index 49031f804276..03757e76bb6b 100644
--- a/net/ncsi/internal.h
+++ b/net/ncsi/internal.h
@@ -78,6 +78,10 @@ enum {
/* OEM Vendor Manufacture ID */
#define NCSI_OEM_MFR_MLX_ID 0x8119
#define NCSI_OEM_MFR_BCM_ID 0x113d
+#define NCSI_OEM_MFR_INTEL_ID 0x157
+/* Intel specific OEM command */
+#define NCSI_OEM_INTEL_CMD_GMA 0x06 /* CMD ID for Get MAC */
+#define NCSI_OEM_INTEL_CMD_KEEP_PHY 0x20 /* CMD ID for Keep PHY up */
/* Broadcom specific OEM Command */
#define NCSI_OEM_BCM_CMD_GMA 0x01 /* CMD ID for Get MAC */
/* Mellanox specific OEM Command */
@@ -86,6 +90,8 @@ enum {
#define NCSI_OEM_MLX_CMD_SMAF 0x01 /* CMD ID for Set MC Affinity */
#define NCSI_OEM_MLX_CMD_SMAF_PARAM 0x07 /* Parameter for SMAF */
/* OEM Command payload lengths*/
+#define NCSI_OEM_INTEL_CMD_GMA_LEN 5
+#define NCSI_OEM_INTEL_CMD_KEEP_PHY_LEN 7
#define NCSI_OEM_BCM_CMD_GMA_LEN 12
#define NCSI_OEM_MLX_CMD_GMA_LEN 8
#define NCSI_OEM_MLX_CMD_SMAF_LEN 60
@@ -95,6 +101,7 @@ enum {
/* Mac address offset in OEM response */
#define BCM_MAC_ADDR_OFFSET 28
#define MLX_MAC_ADDR_OFFSET 8
+#define INTEL_MAC_ADDR_OFFSET 1
struct ncsi_channel_version {
@@ -238,7 +245,7 @@ struct ncsi_package {
struct ncsi_dev_priv *ndp; /* NCSI device */
spinlock_t lock; /* Protect the package */
unsigned int channel_num; /* Number of channels */
- struct list_head channels; /* List of chanels */
+ struct list_head channels; /* List of channels */
struct list_head node; /* Form list of packages */
bool multi_channel; /* Enable multiple channels */
@@ -271,6 +278,7 @@ enum {
ncsi_dev_state_probe_mlx_gma,
ncsi_dev_state_probe_mlx_smaf,
ncsi_dev_state_probe_cis,
+ ncsi_dev_state_probe_keep_phy,
ncsi_dev_state_probe_gvi,
ncsi_dev_state_probe_gc,
ncsi_dev_state_probe_gls,
@@ -339,7 +347,7 @@ struct ncsi_cmd_arg {
unsigned char type; /* Command in the NCSI packet */
unsigned char id; /* Request ID (sequence number) */
unsigned char package; /* Destination package ID */
- unsigned char channel; /* Detination channel ID or 0x1f */
+ unsigned char channel; /* Destination channel ID or 0x1f */
unsigned short payload; /* Command packet payload length */
unsigned int req_flags; /* NCSI request properties */
union {
diff --git a/net/ncsi/ncsi-manage.c b/net/ncsi/ncsi-manage.c
index ffff8da707b8..7121ce2a47c0 100644
--- a/net/ncsi/ncsi-manage.c
+++ b/net/ncsi/ncsi-manage.c
@@ -627,7 +627,7 @@ static int clear_one_vid(struct ncsi_dev_priv *ndp, struct ncsi_channel *nc,
return 0;
}
-/* Find an outstanding VLAN tag and constuct a "Set VLAN Filter - Enable"
+/* Find an outstanding VLAN tag and construct a "Set VLAN Filter - Enable"
* packet.
*/
static int set_one_vid(struct ncsi_dev_priv *ndp, struct ncsi_channel *nc,
@@ -689,6 +689,35 @@ static int set_one_vid(struct ncsi_dev_priv *ndp, struct ncsi_channel *nc,
return 0;
}
+#if IS_ENABLED(CONFIG_NCSI_OEM_CMD_KEEP_PHY)
+
+static int ncsi_oem_keep_phy_intel(struct ncsi_cmd_arg *nca)
+{
+ unsigned char data[NCSI_OEM_INTEL_CMD_KEEP_PHY_LEN];
+ int ret = 0;
+
+ nca->payload = NCSI_OEM_INTEL_CMD_KEEP_PHY_LEN;
+
+ memset(data, 0, NCSI_OEM_INTEL_CMD_KEEP_PHY_LEN);
+ *(unsigned int *)data = ntohl((__force __be32)NCSI_OEM_MFR_INTEL_ID);
+
+ data[4] = NCSI_OEM_INTEL_CMD_KEEP_PHY;
+
+ /* PHY Link up attribute */
+ data[6] = 0x1;
+
+ nca->data = data;
+
+ ret = ncsi_xmit_cmd(nca);
+ if (ret)
+ netdev_err(nca->ndp->ndev.dev,
+ "NCSI: Failed to transmit cmd 0x%x during configure\n",
+ nca->type);
+ return ret;
+}
+
+#endif
+
#if IS_ENABLED(CONFIG_NCSI_OEM_CMD_GET_MAC)
/* NCSI OEM Command APIs */
@@ -700,7 +729,7 @@ static int ncsi_oem_gma_handler_bcm(struct ncsi_cmd_arg *nca)
nca->payload = NCSI_OEM_BCM_CMD_GMA_LEN;
memset(data, 0, NCSI_OEM_BCM_CMD_GMA_LEN);
- *(unsigned int *)data = ntohl(NCSI_OEM_MFR_BCM_ID);
+ *(unsigned int *)data = ntohl((__force __be32)NCSI_OEM_MFR_BCM_ID);
data[5] = NCSI_OEM_BCM_CMD_GMA;
nca->data = data;
@@ -724,7 +753,7 @@ static int ncsi_oem_gma_handler_mlx(struct ncsi_cmd_arg *nca)
nca->payload = NCSI_OEM_MLX_CMD_GMA_LEN;
memset(&u, 0, sizeof(u));
- u.data_u32[0] = ntohl(NCSI_OEM_MFR_MLX_ID);
+ u.data_u32[0] = ntohl((__force __be32)NCSI_OEM_MFR_MLX_ID);
u.data_u8[5] = NCSI_OEM_MLX_CMD_GMA;
u.data_u8[6] = NCSI_OEM_MLX_CMD_GMA_PARAM;
@@ -747,7 +776,7 @@ static int ncsi_oem_smaf_mlx(struct ncsi_cmd_arg *nca)
int ret = 0;
memset(&u, 0, sizeof(u));
- u.data_u32[0] = ntohl(NCSI_OEM_MFR_MLX_ID);
+ u.data_u32[0] = ntohl((__force __be32)NCSI_OEM_MFR_MLX_ID);
u.data_u8[5] = NCSI_OEM_MLX_CMD_SMAF;
u.data_u8[6] = NCSI_OEM_MLX_CMD_SMAF_PARAM;
memcpy(&u.data_u8[MLX_SMAF_MAC_ADDR_OFFSET],
@@ -766,13 +795,36 @@ static int ncsi_oem_smaf_mlx(struct ncsi_cmd_arg *nca)
return ret;
}
+static int ncsi_oem_gma_handler_intel(struct ncsi_cmd_arg *nca)
+{
+ unsigned char data[NCSI_OEM_INTEL_CMD_GMA_LEN];
+ int ret = 0;
+
+ nca->payload = NCSI_OEM_INTEL_CMD_GMA_LEN;
+
+ memset(data, 0, NCSI_OEM_INTEL_CMD_GMA_LEN);
+ *(unsigned int *)data = ntohl((__force __be32)NCSI_OEM_MFR_INTEL_ID);
+ data[4] = NCSI_OEM_INTEL_CMD_GMA;
+
+ nca->data = data;
+
+ ret = ncsi_xmit_cmd(nca);
+ if (ret)
+ netdev_err(nca->ndp->ndev.dev,
+ "NCSI: Failed to transmit cmd 0x%x during configure\n",
+ nca->type);
+
+ return ret;
+}
+
/* OEM Command handlers initialization */
static struct ncsi_oem_gma_handler {
unsigned int mfr_id;
int (*handler)(struct ncsi_cmd_arg *nca);
} ncsi_oem_gma_handlers[] = {
{ NCSI_OEM_MFR_BCM_ID, ncsi_oem_gma_handler_bcm },
- { NCSI_OEM_MFR_MLX_ID, ncsi_oem_gma_handler_mlx }
+ { NCSI_OEM_MFR_MLX_ID, ncsi_oem_gma_handler_mlx },
+ { NCSI_OEM_MFR_INTEL_ID, ncsi_oem_gma_handler_intel }
};
static int ncsi_gma_handler(struct ncsi_cmd_arg *nca, unsigned int mf_id)
@@ -1392,7 +1444,23 @@ static void ncsi_probe_channel(struct ncsi_dev_priv *ndp)
}
nd->state = ncsi_dev_state_probe_gvi;
+ if (IS_ENABLED(CONFIG_NCSI_OEM_CMD_KEEP_PHY))
+ nd->state = ncsi_dev_state_probe_keep_phy;
+ break;
+#if IS_ENABLED(CONFIG_NCSI_OEM_CMD_KEEP_PHY)
+ case ncsi_dev_state_probe_keep_phy:
+ ndp->pending_req_num = 1;
+
+ nca.type = NCSI_PKT_CMD_OEM;
+ nca.package = ndp->active_package->id;
+ nca.channel = 0;
+ ret = ncsi_oem_keep_phy_intel(&nca);
+ if (ret)
+ goto error;
+
+ nd->state = ncsi_dev_state_probe_gvi;
break;
+#endif /* CONFIG_NCSI_OEM_CMD_KEEP_PHY */
case ncsi_dev_state_probe_gvi:
case ncsi_dev_state_probe_gc:
case ncsi_dev_state_probe_gls:
diff --git a/net/ncsi/ncsi-pkt.h b/net/ncsi/ncsi-pkt.h
index 80938b338fee..ba66c7dc3a21 100644
--- a/net/ncsi/ncsi-pkt.h
+++ b/net/ncsi/ncsi-pkt.h
@@ -178,6 +178,12 @@ struct ncsi_rsp_oem_bcm_pkt {
unsigned char data[]; /* Cmd specific Data */
};
+/* Intel Response Data */
+struct ncsi_rsp_oem_intel_pkt {
+ unsigned char cmd; /* OEM Command ID */
+ unsigned char data[]; /* Cmd specific Data */
+};
+
/* Get Link Status */
struct ncsi_rsp_gls_pkt {
struct ncsi_rsp_pkt_hdr rsp; /* Response header */
diff --git a/net/ncsi/ncsi-rsp.c b/net/ncsi/ncsi-rsp.c
index 888ccc2d4e34..6447a09932f5 100644
--- a/net/ncsi/ncsi-rsp.c
+++ b/net/ncsi/ncsi-rsp.c
@@ -403,7 +403,7 @@ static int ncsi_rsp_handler_ev(struct ncsi_request *nr)
/* Update to VLAN mode */
cmd = (struct ncsi_cmd_ev_pkt *)skb_network_header(nr->cmd);
ncm->enable = 1;
- ncm->data[0] = ntohl(cmd->mode);
+ ncm->data[0] = ntohl((__force __be32)cmd->mode);
return 0;
}
@@ -699,12 +699,61 @@ static int ncsi_rsp_handler_oem_bcm(struct ncsi_request *nr)
return 0;
}
+/* Response handler for Intel command Get Mac Address */
+static int ncsi_rsp_handler_oem_intel_gma(struct ncsi_request *nr)
+{
+ struct ncsi_dev_priv *ndp = nr->ndp;
+ struct net_device *ndev = ndp->ndev.dev;
+ const struct net_device_ops *ops = ndev->netdev_ops;
+ struct ncsi_rsp_oem_pkt *rsp;
+ struct sockaddr saddr;
+ int ret = 0;
+
+ /* Get the response header */
+ rsp = (struct ncsi_rsp_oem_pkt *)skb_network_header(nr->rsp);
+
+ saddr.sa_family = ndev->type;
+ ndev->priv_flags |= IFF_LIVE_ADDR_CHANGE;
+ memcpy(saddr.sa_data, &rsp->data[INTEL_MAC_ADDR_OFFSET], ETH_ALEN);
+ /* Increase mac address by 1 for BMC's address */
+ eth_addr_inc((u8 *)saddr.sa_data);
+ if (!is_valid_ether_addr((const u8 *)saddr.sa_data))
+ return -ENXIO;
+
+ /* Set the flag for GMA command which should only be called once */
+ ndp->gma_flag = 1;
+
+ ret = ops->ndo_set_mac_address(ndev, &saddr);
+ if (ret < 0)
+ netdev_warn(ndev,
+ "NCSI: 'Writing mac address to device failed\n");
+
+ return ret;
+}
+
+/* Response handler for Intel card */
+static int ncsi_rsp_handler_oem_intel(struct ncsi_request *nr)
+{
+ struct ncsi_rsp_oem_intel_pkt *intel;
+ struct ncsi_rsp_oem_pkt *rsp;
+
+ /* Get the response header */
+ rsp = (struct ncsi_rsp_oem_pkt *)skb_network_header(nr->rsp);
+ intel = (struct ncsi_rsp_oem_intel_pkt *)(rsp->data);
+
+ if (intel->cmd == NCSI_OEM_INTEL_CMD_GMA)
+ return ncsi_rsp_handler_oem_intel_gma(nr);
+
+ return 0;
+}
+
static struct ncsi_rsp_oem_handler {
unsigned int mfr_id;
int (*handler)(struct ncsi_request *nr);
} ncsi_rsp_oem_handlers[] = {
{ NCSI_OEM_MFR_MLX_ID, ncsi_rsp_handler_oem_mlx },
- { NCSI_OEM_MFR_BCM_ID, ncsi_rsp_handler_oem_bcm }
+ { NCSI_OEM_MFR_BCM_ID, ncsi_rsp_handler_oem_bcm },
+ { NCSI_OEM_MFR_INTEL_ID, ncsi_rsp_handler_oem_intel }
};
/* Response handler for OEM command */
diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig
index 56a2531a3402..54395266339d 100644
--- a/net/netfilter/Kconfig
+++ b/net/netfilter/Kconfig
@@ -19,6 +19,16 @@ config NETFILTER_FAMILY_BRIDGE
config NETFILTER_FAMILY_ARP
bool
+config NETFILTER_NETLINK_HOOK
+ tristate "Netfilter base hook dump support"
+ depends on NETFILTER_ADVANCED
+ depends on NF_TABLES
+ select NETFILTER_NETLINK
+ help
+ If this option is enabled, the kernel will include support
+ to list the base netfilter hooks via NFNETLINK.
+ This is helpful for debugging.
+
config NETFILTER_NETLINK_ACCT
tristate "Netfilter NFACCT over NFNETLINK interface"
depends on NETFILTER_ADVANCED
@@ -816,7 +826,7 @@ config NETFILTER_XT_TARGET_CLASSIFY
the priority of a packet. Some qdiscs can use this value for
classification, among these are:
- atm, cbq, dsmark, pfifo_fast, htb, prio
+ atm, cbq, dsmark, pfifo_fast, htb, prio
To compile it as a module, choose M here. If unsure, say N.
diff --git a/net/netfilter/Makefile b/net/netfilter/Makefile
index e80e010354b1..aab20e575ecd 100644
--- a/net/netfilter/Makefile
+++ b/net/netfilter/Makefile
@@ -22,6 +22,7 @@ obj-$(CONFIG_NETFILTER_NETLINK_ACCT) += nfnetlink_acct.o
obj-$(CONFIG_NETFILTER_NETLINK_QUEUE) += nfnetlink_queue.o
obj-$(CONFIG_NETFILTER_NETLINK_LOG) += nfnetlink_log.o
obj-$(CONFIG_NETFILTER_NETLINK_OSF) += nfnetlink_osf.o
+obj-$(CONFIG_NETFILTER_NETLINK_HOOK) += nfnetlink_hook.o
# connection tracking
obj-$(CONFIG_NF_CONNTRACK) += nf_conntrack.o
@@ -73,7 +74,7 @@ obj-$(CONFIG_NF_DUP_NETDEV) += nf_dup_netdev.o
nf_tables-objs := nf_tables_core.o nf_tables_api.o nft_chain_filter.o \
nf_tables_trace.o nft_immediate.o nft_cmp.o nft_range.o \
nft_bitwise.o nft_byteorder.o nft_payload.o nft_lookup.o \
- nft_dynset.o nft_meta.o nft_rt.o nft_exthdr.o \
+ nft_dynset.o nft_meta.o nft_rt.o nft_exthdr.o nft_last.o \
nft_chain_route.o nf_tables_offload.o \
nft_set_hash.o nft_set_bitmap.o nft_set_rbtree.o \
nft_set_pipapo.o
@@ -211,3 +212,6 @@ obj-$(CONFIG_IP_SET) += ipset/
# IPVS
obj-$(CONFIG_IP_VS) += ipvs/
+
+# lwtunnel
+obj-$(CONFIG_LWTUNNEL) += nf_hooks_lwtunnel.o
diff --git a/net/netfilter/ipset/ip_set_core.c b/net/netfilter/ipset/ip_set_core.c
index de2d20c37cda..16ae92054baa 100644
--- a/net/netfilter/ipset/ip_set_core.c
+++ b/net/netfilter/ipset/ip_set_core.c
@@ -1685,8 +1685,8 @@ static const struct nla_policy ip_set_adt_policy[IPSET_ATTR_CMD_MAX + 1] = {
};
static int
-call_ad(struct sock *ctnl, struct sk_buff *skb, struct ip_set *set,
- struct nlattr *tb[], enum ipset_adt adt,
+call_ad(struct net *net, struct sock *ctnl, struct sk_buff *skb,
+ struct ip_set *set, struct nlattr *tb[], enum ipset_adt adt,
u32 flags, bool use_lineno)
{
int ret;
@@ -1738,8 +1738,7 @@ call_ad(struct sock *ctnl, struct sk_buff *skb, struct ip_set *set,
*errline = lineno;
- netlink_unicast(ctnl, skb2, NETLINK_CB(skb).portid,
- MSG_DONTWAIT);
+ nfnetlink_unicast(skb2, net, NETLINK_CB(skb).portid);
/* Signal netlink not to send its ACK/errmsg. */
return -EINTR;
}
@@ -1783,7 +1782,7 @@ static int ip_set_ad(struct net *net, struct sock *ctnl,
attr[IPSET_ATTR_DATA],
set->type->adt_policy, NULL))
return -IPSET_ERR_PROTOCOL;
- ret = call_ad(ctnl, skb, set, tb, adt, flags,
+ ret = call_ad(net, ctnl, skb, set, tb, adt, flags,
use_lineno);
} else {
int nla_rem;
@@ -1794,7 +1793,7 @@ static int ip_set_ad(struct net *net, struct sock *ctnl,
nla_parse_nested(tb, IPSET_ATTR_ADT_MAX, nla,
set->type->adt_policy, NULL))
return -IPSET_ERR_PROTOCOL;
- ret = call_ad(ctnl, skb, set, tb, adt,
+ ret = call_ad(net, ctnl, skb, set, tb, adt,
flags, use_lineno);
if (ret < 0)
return ret;
@@ -1859,7 +1858,6 @@ static int ip_set_header(struct sk_buff *skb, const struct nfnl_info *info,
const struct ip_set *set;
struct sk_buff *skb2;
struct nlmsghdr *nlh2;
- int ret = 0;
if (unlikely(protocol_min_failed(attr) ||
!attr[IPSET_ATTR_SETNAME]))
@@ -1885,12 +1883,7 @@ static int ip_set_header(struct sk_buff *skb, const struct nfnl_info *info,
goto nla_put_failure;
nlmsg_end(skb2, nlh2);
- ret = netlink_unicast(info->sk, skb2, NETLINK_CB(skb).portid,
- MSG_DONTWAIT);
- if (ret < 0)
- return ret;
-
- return 0;
+ return nfnetlink_unicast(skb2, info->net, NETLINK_CB(skb).portid);
nla_put_failure:
nlmsg_cancel(skb2, nlh2);
@@ -1945,12 +1938,7 @@ static int ip_set_type(struct sk_buff *skb, const struct nfnl_info *info,
nlmsg_end(skb2, nlh2);
pr_debug("Send TYPE, nlmsg_len: %u\n", nlh2->nlmsg_len);
- ret = netlink_unicast(info->sk, skb2, NETLINK_CB(skb).portid,
- MSG_DONTWAIT);
- if (ret < 0)
- return ret;
-
- return 0;
+ return nfnetlink_unicast(skb2, info->net, NETLINK_CB(skb).portid);
nla_put_failure:
nlmsg_cancel(skb2, nlh2);
@@ -1971,7 +1959,6 @@ static int ip_set_protocol(struct sk_buff *skb, const struct nfnl_info *info,
{
struct sk_buff *skb2;
struct nlmsghdr *nlh2;
- int ret = 0;
if (unlikely(!attr[IPSET_ATTR_PROTOCOL]))
return -IPSET_ERR_PROTOCOL;
@@ -1990,12 +1977,7 @@ static int ip_set_protocol(struct sk_buff *skb, const struct nfnl_info *info,
goto nla_put_failure;
nlmsg_end(skb2, nlh2);
- ret = netlink_unicast(info->sk, skb2, NETLINK_CB(skb).portid,
- MSG_DONTWAIT);
- if (ret < 0)
- return ret;
-
- return 0;
+ return nfnetlink_unicast(skb2, info->net, NETLINK_CB(skb).portid);
nla_put_failure:
nlmsg_cancel(skb2, nlh2);
@@ -2014,7 +1996,6 @@ static int ip_set_byname(struct sk_buff *skb, const struct nfnl_info *info,
struct nlmsghdr *nlh2;
ip_set_id_t id = IPSET_INVALID_ID;
const struct ip_set *set;
- int ret = 0;
if (unlikely(protocol_failed(attr) ||
!attr[IPSET_ATTR_SETNAME]))
@@ -2038,12 +2019,7 @@ static int ip_set_byname(struct sk_buff *skb, const struct nfnl_info *info,
goto nla_put_failure;
nlmsg_end(skb2, nlh2);
- ret = netlink_unicast(info->sk, skb2, NETLINK_CB(skb).portid,
- MSG_DONTWAIT);
- if (ret < 0)
- return ret;
-
- return 0;
+ return nfnetlink_unicast(skb2, info->net, NETLINK_CB(skb).portid);
nla_put_failure:
nlmsg_cancel(skb2, nlh2);
@@ -2065,7 +2041,6 @@ static int ip_set_byindex(struct sk_buff *skb, const struct nfnl_info *info,
struct nlmsghdr *nlh2;
ip_set_id_t id = IPSET_INVALID_ID;
const struct ip_set *set;
- int ret = 0;
if (unlikely(protocol_failed(attr) ||
!attr[IPSET_ATTR_INDEX]))
@@ -2091,12 +2066,7 @@ static int ip_set_byindex(struct sk_buff *skb, const struct nfnl_info *info,
goto nla_put_failure;
nlmsg_end(skb2, nlh2);
- ret = netlink_unicast(info->sk, skb2, NETLINK_CB(skb).portid,
- MSG_DONTWAIT);
- if (ret < 0)
- return ret;
-
- return 0;
+ return nfnetlink_unicast(skb2, info->net, NETLINK_CB(skb).portid);
nla_put_failure:
nlmsg_cancel(skb2, nlh2);
diff --git a/net/netfilter/ipset/ip_set_hash_ip.c b/net/netfilter/ipset/ip_set_hash_ip.c
index d1bef23fd4f5..dd30c03d5a23 100644
--- a/net/netfilter/ipset/ip_set_hash_ip.c
+++ b/net/netfilter/ipset/ip_set_hash_ip.c
@@ -132,8 +132,11 @@ hash_ip4_uadt(struct ip_set *set, struct nlattr *tb[],
ret = ip_set_get_hostipaddr4(tb[IPSET_ATTR_IP_TO], &ip_to);
if (ret)
return ret;
- if (ip > ip_to)
+ if (ip > ip_to) {
+ if (ip_to == 0)
+ return -IPSET_ERR_HASH_ELEM;
swap(ip, ip_to);
+ }
} else if (tb[IPSET_ATTR_CIDR]) {
u8 cidr = nla_get_u8(tb[IPSET_ATTR_CIDR]);
@@ -144,6 +147,10 @@ hash_ip4_uadt(struct ip_set *set, struct nlattr *tb[],
hosts = h->netmask == 32 ? 1 : 2 << (32 - h->netmask - 1);
+ /* 64bit division is not allowed on 32bit */
+ if (((u64)ip_to - ip + 1) >> (32 - h->netmask) > IPSET_MAX_RANGE)
+ return -ERANGE;
+
if (retried) {
ip = ntohl(h->next.ip);
e.ip = htonl(ip);
diff --git a/net/netfilter/ipset/ip_set_hash_ipmark.c b/net/netfilter/ipset/ip_set_hash_ipmark.c
index 18346d18aa16..153de3457423 100644
--- a/net/netfilter/ipset/ip_set_hash_ipmark.c
+++ b/net/netfilter/ipset/ip_set_hash_ipmark.c
@@ -121,6 +121,8 @@ hash_ipmark4_uadt(struct ip_set *set, struct nlattr *tb[],
e.mark = ntohl(nla_get_be32(tb[IPSET_ATTR_MARK]));
e.mark &= h->markmask;
+ if (e.mark == 0 && e.ip == 0)
+ return -IPSET_ERR_HASH_ELEM;
if (adt == IPSET_TEST ||
!(tb[IPSET_ATTR_IP_TO] || tb[IPSET_ATTR_CIDR])) {
@@ -133,8 +135,11 @@ hash_ipmark4_uadt(struct ip_set *set, struct nlattr *tb[],
ret = ip_set_get_hostipaddr4(tb[IPSET_ATTR_IP_TO], &ip_to);
if (ret)
return ret;
- if (ip > ip_to)
+ if (ip > ip_to) {
+ if (e.mark == 0 && ip_to == 0)
+ return -IPSET_ERR_HASH_ELEM;
swap(ip, ip_to);
+ }
} else if (tb[IPSET_ATTR_CIDR]) {
u8 cidr = nla_get_u8(tb[IPSET_ATTR_CIDR]);
@@ -143,6 +148,9 @@ hash_ipmark4_uadt(struct ip_set *set, struct nlattr *tb[],
ip_set_mask_from_to(ip, ip_to, cidr);
}
+ if (((u64)ip_to - ip + 1) > IPSET_MAX_RANGE)
+ return -ERANGE;
+
if (retried)
ip = ntohl(h->next.ip);
for (; ip <= ip_to; ip++) {
diff --git a/net/netfilter/ipset/ip_set_hash_ipport.c b/net/netfilter/ipset/ip_set_hash_ipport.c
index e1ca11196515..7303138e46be 100644
--- a/net/netfilter/ipset/ip_set_hash_ipport.c
+++ b/net/netfilter/ipset/ip_set_hash_ipport.c
@@ -173,6 +173,9 @@ hash_ipport4_uadt(struct ip_set *set, struct nlattr *tb[],
swap(port, port_to);
}
+ if (((u64)ip_to - ip + 1)*(port_to - port + 1) > IPSET_MAX_RANGE)
+ return -ERANGE;
+
if (retried)
ip = ntohl(h->next.ip);
for (; ip <= ip_to; ip++) {
diff --git a/net/netfilter/ipset/ip_set_hash_ipportip.c b/net/netfilter/ipset/ip_set_hash_ipportip.c
index ab179e064597..334fb1ad0e86 100644
--- a/net/netfilter/ipset/ip_set_hash_ipportip.c
+++ b/net/netfilter/ipset/ip_set_hash_ipportip.c
@@ -180,6 +180,9 @@ hash_ipportip4_uadt(struct ip_set *set, struct nlattr *tb[],
swap(port, port_to);
}
+ if (((u64)ip_to - ip + 1)*(port_to - port + 1) > IPSET_MAX_RANGE)
+ return -ERANGE;
+
if (retried)
ip = ntohl(h->next.ip);
for (; ip <= ip_to; ip++) {
diff --git a/net/netfilter/ipset/ip_set_hash_ipportnet.c b/net/netfilter/ipset/ip_set_hash_ipportnet.c
index 8f075b44cf64..7df94f437f60 100644
--- a/net/netfilter/ipset/ip_set_hash_ipportnet.c
+++ b/net/netfilter/ipset/ip_set_hash_ipportnet.c
@@ -253,6 +253,9 @@ hash_ipportnet4_uadt(struct ip_set *set, struct nlattr *tb[],
swap(port, port_to);
}
+ if (((u64)ip_to - ip + 1)*(port_to - port + 1) > IPSET_MAX_RANGE)
+ return -ERANGE;
+
ip2_to = ip2_from;
if (tb[IPSET_ATTR_IP2_TO]) {
ret = ip_set_get_hostipaddr4(tb[IPSET_ATTR_IP2_TO], &ip2_to);
diff --git a/net/netfilter/ipset/ip_set_hash_net.c b/net/netfilter/ipset/ip_set_hash_net.c
index c1a11f041ac6..1422739d9aa2 100644
--- a/net/netfilter/ipset/ip_set_hash_net.c
+++ b/net/netfilter/ipset/ip_set_hash_net.c
@@ -140,7 +140,7 @@ hash_net4_uadt(struct ip_set *set, struct nlattr *tb[],
ipset_adtfn adtfn = set->variant->adt[adt];
struct hash_net4_elem e = { .cidr = HOST_MASK };
struct ip_set_ext ext = IP_SET_INIT_UEXT(set);
- u32 ip = 0, ip_to = 0;
+ u32 ip = 0, ip_to = 0, ipn, n = 0;
int ret;
if (tb[IPSET_ATTR_LINENO])
@@ -188,6 +188,15 @@ hash_net4_uadt(struct ip_set *set, struct nlattr *tb[],
if (ip + UINT_MAX == ip_to)
return -IPSET_ERR_HASH_RANGE;
}
+ ipn = ip;
+ do {
+ ipn = ip_set_range_to_cidr(ipn, ip_to, &e.cidr);
+ n++;
+ } while (ipn++ < ip_to);
+
+ if (n > IPSET_MAX_RANGE)
+ return -ERANGE;
+
if (retried)
ip = ntohl(h->next.ip);
do {
diff --git a/net/netfilter/ipset/ip_set_hash_netiface.c b/net/netfilter/ipset/ip_set_hash_netiface.c
index ddd51c2e1cb3..9810f5bf63f5 100644
--- a/net/netfilter/ipset/ip_set_hash_netiface.c
+++ b/net/netfilter/ipset/ip_set_hash_netiface.c
@@ -202,7 +202,7 @@ hash_netiface4_uadt(struct ip_set *set, struct nlattr *tb[],
ipset_adtfn adtfn = set->variant->adt[adt];
struct hash_netiface4_elem e = { .cidr = HOST_MASK, .elem = 1 };
struct ip_set_ext ext = IP_SET_INIT_UEXT(set);
- u32 ip = 0, ip_to = 0;
+ u32 ip = 0, ip_to = 0, ipn, n = 0;
int ret;
if (tb[IPSET_ATTR_LINENO])
@@ -256,6 +256,14 @@ hash_netiface4_uadt(struct ip_set *set, struct nlattr *tb[],
} else {
ip_set_mask_from_to(ip, ip_to, e.cidr);
}
+ ipn = ip;
+ do {
+ ipn = ip_set_range_to_cidr(ipn, ip_to, &e.cidr);
+ n++;
+ } while (ipn++ < ip_to);
+
+ if (n > IPSET_MAX_RANGE)
+ return -ERANGE;
if (retried)
ip = ntohl(h->next.ip);
diff --git a/net/netfilter/ipset/ip_set_hash_netnet.c b/net/netfilter/ipset/ip_set_hash_netnet.c
index 6532f0505e66..3d09eefe998a 100644
--- a/net/netfilter/ipset/ip_set_hash_netnet.c
+++ b/net/netfilter/ipset/ip_set_hash_netnet.c
@@ -168,7 +168,8 @@ hash_netnet4_uadt(struct ip_set *set, struct nlattr *tb[],
struct hash_netnet4_elem e = { };
struct ip_set_ext ext = IP_SET_INIT_UEXT(set);
u32 ip = 0, ip_to = 0;
- u32 ip2 = 0, ip2_from = 0, ip2_to = 0;
+ u32 ip2 = 0, ip2_from = 0, ip2_to = 0, ipn;
+ u64 n = 0, m = 0;
int ret;
if (tb[IPSET_ATTR_LINENO])
@@ -244,6 +245,19 @@ hash_netnet4_uadt(struct ip_set *set, struct nlattr *tb[],
} else {
ip_set_mask_from_to(ip2_from, ip2_to, e.cidr[1]);
}
+ ipn = ip;
+ do {
+ ipn = ip_set_range_to_cidr(ipn, ip_to, &e.cidr[0]);
+ n++;
+ } while (ipn++ < ip_to);
+ ipn = ip2_from;
+ do {
+ ipn = ip_set_range_to_cidr(ipn, ip2_to, &e.cidr[1]);
+ m++;
+ } while (ipn++ < ip2_to);
+
+ if (n*m > IPSET_MAX_RANGE)
+ return -ERANGE;
if (retried) {
ip = ntohl(h->next.ip[0]);
diff --git a/net/netfilter/ipset/ip_set_hash_netport.c b/net/netfilter/ipset/ip_set_hash_netport.c
index ec1564a1cb5a..09cf72eb37f8 100644
--- a/net/netfilter/ipset/ip_set_hash_netport.c
+++ b/net/netfilter/ipset/ip_set_hash_netport.c
@@ -158,7 +158,8 @@ hash_netport4_uadt(struct ip_set *set, struct nlattr *tb[],
ipset_adtfn adtfn = set->variant->adt[adt];
struct hash_netport4_elem e = { .cidr = HOST_MASK - 1 };
struct ip_set_ext ext = IP_SET_INIT_UEXT(set);
- u32 port, port_to, p = 0, ip = 0, ip_to = 0;
+ u32 port, port_to, p = 0, ip = 0, ip_to = 0, ipn;
+ u64 n = 0;
bool with_ports = false;
u8 cidr;
int ret;
@@ -235,6 +236,14 @@ hash_netport4_uadt(struct ip_set *set, struct nlattr *tb[],
} else {
ip_set_mask_from_to(ip, ip_to, e.cidr + 1);
}
+ ipn = ip;
+ do {
+ ipn = ip_set_range_to_cidr(ipn, ip_to, &cidr);
+ n++;
+ } while (ipn++ < ip_to);
+
+ if (n*(port_to - port + 1) > IPSET_MAX_RANGE)
+ return -ERANGE;
if (retried) {
ip = ntohl(h->next.ip);
diff --git a/net/netfilter/ipset/ip_set_hash_netportnet.c b/net/netfilter/ipset/ip_set_hash_netportnet.c
index 0e91d1e82f1c..19bcdb3141f6 100644
--- a/net/netfilter/ipset/ip_set_hash_netportnet.c
+++ b/net/netfilter/ipset/ip_set_hash_netportnet.c
@@ -182,7 +182,8 @@ hash_netportnet4_uadt(struct ip_set *set, struct nlattr *tb[],
struct hash_netportnet4_elem e = { };
struct ip_set_ext ext = IP_SET_INIT_UEXT(set);
u32 ip = 0, ip_to = 0, p = 0, port, port_to;
- u32 ip2_from = 0, ip2_to = 0, ip2;
+ u32 ip2_from = 0, ip2_to = 0, ip2, ipn;
+ u64 n = 0, m = 0;
bool with_ports = false;
int ret;
@@ -284,6 +285,19 @@ hash_netportnet4_uadt(struct ip_set *set, struct nlattr *tb[],
} else {
ip_set_mask_from_to(ip2_from, ip2_to, e.cidr[1]);
}
+ ipn = ip;
+ do {
+ ipn = ip_set_range_to_cidr(ipn, ip_to, &e.cidr[0]);
+ n++;
+ } while (ipn++ < ip_to);
+ ipn = ip2_from;
+ do {
+ ipn = ip_set_range_to_cidr(ipn, ip2_to, &e.cidr[1]);
+ m++;
+ } while (ipn++ < ip2_to);
+
+ if (n*m*(port_to - port + 1) > IPSET_MAX_RANGE)
+ return -ERANGE;
if (retried) {
ip = ntohl(h->next.ip[0]);
diff --git a/net/netfilter/ipvs/Kconfig b/net/netfilter/ipvs/Kconfig
index d61886874940..271da8447b29 100644
--- a/net/netfilter/ipvs/Kconfig
+++ b/net/netfilter/ipvs/Kconfig
@@ -318,7 +318,7 @@ config IP_VS_MH_TAB_INDEX
comment 'IPVS application helper'
config IP_VS_FTP
- tristate "FTP protocol helper"
+ tristate "FTP protocol helper"
depends on IP_VS_PROTO_TCP && NF_CONNTRACK && NF_NAT && \
NF_CONNTRACK_FTP
select IP_VS_NFCT
diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
index e0befcf8113a..94e18fb9690d 100644
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -21,7 +21,6 @@
#include <linux/stddef.h>
#include <linux/slab.h>
#include <linux/random.h>
-#include <linux/jhash.h>
#include <linux/siphash.h>
#include <linux/err.h>
#include <linux/percpu.h>
@@ -55,8 +54,6 @@
#include "nf_internals.h"
-extern unsigned int nf_conntrack_net_id;
-
__cacheline_aligned_in_smp spinlock_t nf_conntrack_locks[CONNTRACK_LOCKS];
EXPORT_SYMBOL_GPL(nf_conntrack_locks);
@@ -68,26 +65,21 @@ EXPORT_SYMBOL_GPL(nf_conntrack_hash);
struct conntrack_gc_work {
struct delayed_work dwork;
- u32 last_bucket;
+ u32 next_bucket;
bool exiting;
bool early_drop;
- long next_gc_run;
};
static __read_mostly struct kmem_cache *nf_conntrack_cachep;
static DEFINE_SPINLOCK(nf_conntrack_locks_all_lock);
static __read_mostly bool nf_conntrack_locks_all;
-/* every gc cycle scans at most 1/GC_MAX_BUCKETS_DIV part of table */
-#define GC_MAX_BUCKETS_DIV 128u
-/* upper bound of full table scan */
-#define GC_MAX_SCAN_JIFFIES (16u * HZ)
-/* desired ratio of entries found to be expired */
-#define GC_EVICT_RATIO 50u
+#define GC_SCAN_INTERVAL (120u * HZ)
+#define GC_SCAN_MAX_DURATION msecs_to_jiffies(10)
-static struct conntrack_gc_work conntrack_gc_work;
+#define MAX_CHAINLEN 64u
-extern unsigned int nf_conntrack_net_id;
+static struct conntrack_gc_work conntrack_gc_work;
void nf_conntrack_lock(spinlock_t *lock) __acquires(lock)
{
@@ -153,7 +145,15 @@ static void nf_conntrack_all_lock(void)
spin_lock(&nf_conntrack_locks_all_lock);
- nf_conntrack_locks_all = true;
+ /* For nf_contrack_locks_all, only the latest time when another
+ * CPU will see an update is controlled, by the "release" of the
+ * spin_lock below.
+ * The earliest time is not controlled, an thus KCSAN could detect
+ * a race when nf_conntract_lock() reads the variable.
+ * WRITE_ONCE() is used to ensure the compiler will not
+ * optimize the write.
+ */
+ WRITE_ONCE(nf_conntrack_locks_all, true);
for (i = 0; i < CONNTRACK_LOCKS; i++) {
spin_lock(&nf_conntrack_locks[i]);
@@ -185,25 +185,31 @@ EXPORT_SYMBOL_GPL(nf_conntrack_htable_size);
unsigned int nf_conntrack_max __read_mostly;
EXPORT_SYMBOL_GPL(nf_conntrack_max);
seqcount_spinlock_t nf_conntrack_generation __read_mostly;
-static unsigned int nf_conntrack_hash_rnd __read_mostly;
+static siphash_key_t nf_conntrack_hash_rnd __read_mostly;
static u32 hash_conntrack_raw(const struct nf_conntrack_tuple *tuple,
const struct net *net)
{
- unsigned int n;
- u32 seed;
+ struct {
+ struct nf_conntrack_man src;
+ union nf_inet_addr dst_addr;
+ u32 net_mix;
+ u16 dport;
+ u16 proto;
+ } __aligned(SIPHASH_ALIGNMENT) combined;
get_random_once(&nf_conntrack_hash_rnd, sizeof(nf_conntrack_hash_rnd));
- /* The direction must be ignored, so we hash everything up to the
- * destination ports (which is a multiple of 4) and treat the last
- * three bytes manually.
- */
- seed = nf_conntrack_hash_rnd ^ net_hash_mix(net);
- n = (sizeof(tuple->src) + sizeof(tuple->dst.u3)) / sizeof(u32);
- return jhash2((u32 *)tuple, n, seed ^
- (((__force __u16)tuple->dst.u.all << 16) |
- tuple->dst.protonum));
+ memset(&combined, 0, sizeof(combined));
+
+ /* The direction must be ignored, so handle usable members manually. */
+ combined.src = tuple->src;
+ combined.dst_addr = tuple->dst.u3;
+ combined.net_mix = net_hash_mix(net);
+ combined.dport = (__force __u16)tuple->dst.u.all;
+ combined.proto = tuple->dst.protonum;
+
+ return (u32)siphash(&combined, sizeof(combined), &nf_conntrack_hash_rnd);
}
static u32 scale_hash(u32 hash)
@@ -666,8 +672,13 @@ bool nf_ct_delete(struct nf_conn *ct, u32 portid, int report)
return false;
tstamp = nf_conn_tstamp_find(ct);
- if (tstamp && tstamp->stop == 0)
+ if (tstamp) {
+ s32 timeout = ct->timeout - nfct_time_stamp;
+
tstamp->stop = ktime_get_real_ns();
+ if (timeout < 0)
+ tstamp->stop -= jiffies_to_nsecs(-timeout);
+ }
if (nf_conntrack_event_report(IPCT_DESTROY, ct,
portid, report) < 0) {
@@ -831,7 +842,9 @@ nf_conntrack_hash_check_insert(struct nf_conn *ct)
unsigned int hash, reply_hash;
struct nf_conntrack_tuple_hash *h;
struct hlist_nulls_node *n;
+ unsigned int chainlen = 0;
unsigned int sequence;
+ int err = -EEXIST;
zone = nf_ct_zone(ct);
@@ -845,15 +858,24 @@ nf_conntrack_hash_check_insert(struct nf_conn *ct)
} while (nf_conntrack_double_lock(net, hash, reply_hash, sequence));
/* See if there's one in the list already, including reverse */
- hlist_nulls_for_each_entry(h, n, &nf_conntrack_hash[hash], hnnode)
+ hlist_nulls_for_each_entry(h, n, &nf_conntrack_hash[hash], hnnode) {
if (nf_ct_key_equal(h, &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple,
zone, net))
goto out;
- hlist_nulls_for_each_entry(h, n, &nf_conntrack_hash[reply_hash], hnnode)
+ if (chainlen++ > MAX_CHAINLEN)
+ goto chaintoolong;
+ }
+
+ chainlen = 0;
+
+ hlist_nulls_for_each_entry(h, n, &nf_conntrack_hash[reply_hash], hnnode) {
if (nf_ct_key_equal(h, &ct->tuplehash[IP_CT_DIR_REPLY].tuple,
zone, net))
goto out;
+ if (chainlen++ > MAX_CHAINLEN)
+ goto chaintoolong;
+ }
smp_wmb();
/* The caller holds a reference to this object */
@@ -863,11 +885,13 @@ nf_conntrack_hash_check_insert(struct nf_conn *ct)
NF_CT_STAT_INC(net, insert);
local_bh_enable();
return 0;
-
+chaintoolong:
+ NF_CT_STAT_INC(net, chaintoolong);
+ err = -ENOSPC;
out:
nf_conntrack_double_unlock(hash, reply_hash);
local_bh_enable();
- return -EEXIST;
+ return err;
}
EXPORT_SYMBOL_GPL(nf_conntrack_hash_check_insert);
@@ -1080,6 +1104,7 @@ int
__nf_conntrack_confirm(struct sk_buff *skb)
{
const struct nf_conntrack_zone *zone;
+ unsigned int chainlen = 0, sequence;
unsigned int hash, reply_hash;
struct nf_conntrack_tuple_hash *h;
struct nf_conn *ct;
@@ -1087,7 +1112,6 @@ __nf_conntrack_confirm(struct sk_buff *skb)
struct hlist_nulls_node *n;
enum ip_conntrack_info ctinfo;
struct net *net;
- unsigned int sequence;
int ret = NF_DROP;
ct = nf_ct_get(skb, &ctinfo);
@@ -1147,15 +1171,28 @@ __nf_conntrack_confirm(struct sk_buff *skb)
/* See if there's one in the list already, including reverse:
NAT could have grabbed it without realizing, since we're
not in the hash. If there is, we lost race. */
- hlist_nulls_for_each_entry(h, n, &nf_conntrack_hash[hash], hnnode)
+ hlist_nulls_for_each_entry(h, n, &nf_conntrack_hash[hash], hnnode) {
if (nf_ct_key_equal(h, &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple,
zone, net))
goto out;
+ if (chainlen++ > MAX_CHAINLEN)
+ goto chaintoolong;
+ }
- hlist_nulls_for_each_entry(h, n, &nf_conntrack_hash[reply_hash], hnnode)
+ chainlen = 0;
+ hlist_nulls_for_each_entry(h, n, &nf_conntrack_hash[reply_hash], hnnode) {
if (nf_ct_key_equal(h, &ct->tuplehash[IP_CT_DIR_REPLY].tuple,
zone, net))
goto out;
+ if (chainlen++ > MAX_CHAINLEN) {
+chaintoolong:
+ nf_ct_add_to_dying_list(ct);
+ NF_CT_STAT_INC(net, chaintoolong);
+ NF_CT_STAT_INC(net, insert_failed);
+ ret = NF_DROP;
+ goto dying;
+ }
+ }
/* Timer relative to confirmation time, not original
setting time, otherwise we'd get timer wrap in
@@ -1354,17 +1391,13 @@ static bool gc_worker_can_early_drop(const struct nf_conn *ct)
static void gc_worker(struct work_struct *work)
{
- unsigned int min_interval = max(HZ / GC_MAX_BUCKETS_DIV, 1u);
- unsigned int i, goal, buckets = 0, expired_count = 0;
- unsigned int nf_conntrack_max95 = 0;
+ unsigned long end_time = jiffies + GC_SCAN_MAX_DURATION;
+ unsigned int i, hashsz, nf_conntrack_max95 = 0;
+ unsigned long next_run = GC_SCAN_INTERVAL;
struct conntrack_gc_work *gc_work;
- unsigned int ratio, scanned = 0;
- unsigned long next_run;
-
gc_work = container_of(work, struct conntrack_gc_work, dwork.work);
- goal = nf_conntrack_htable_size / GC_MAX_BUCKETS_DIV;
- i = gc_work->last_bucket;
+ i = gc_work->next_bucket;
if (gc_work->early_drop)
nf_conntrack_max95 = nf_conntrack_max / 100u * 95u;
@@ -1372,15 +1405,15 @@ static void gc_worker(struct work_struct *work)
struct nf_conntrack_tuple_hash *h;
struct hlist_nulls_head *ct_hash;
struct hlist_nulls_node *n;
- unsigned int hashsz;
struct nf_conn *tmp;
- i++;
rcu_read_lock();
nf_conntrack_get_ht(&ct_hash, &hashsz);
- if (i >= hashsz)
- i = 0;
+ if (i >= hashsz) {
+ rcu_read_unlock();
+ break;
+ }
hlist_nulls_for_each_entry_rcu(h, n, &ct_hash[i], hnnode) {
struct nf_conntrack_net *cnet;
@@ -1388,7 +1421,6 @@ static void gc_worker(struct work_struct *work)
tmp = nf_ct_tuplehash_to_ctrack(h);
- scanned++;
if (test_bit(IPS_OFFLOAD_BIT, &tmp->status)) {
nf_ct_offload_timeout(tmp);
continue;
@@ -1396,7 +1428,6 @@ static void gc_worker(struct work_struct *work)
if (nf_ct_is_expired(tmp)) {
nf_ct_gc_expired(tmp);
- expired_count++;
continue;
}
@@ -1404,7 +1435,7 @@ static void gc_worker(struct work_struct *work)
continue;
net = nf_ct_net(tmp);
- cnet = net_generic(net, nf_conntrack_net_id);
+ cnet = nf_ct_pernet(net);
if (atomic_read(&cnet->count) < nf_conntrack_max95)
continue;
@@ -1429,7 +1460,14 @@ static void gc_worker(struct work_struct *work)
*/
rcu_read_unlock();
cond_resched();
- } while (++buckets < goal);
+ i++;
+
+ if (time_after(jiffies, end_time) && i < hashsz) {
+ gc_work->next_bucket = i;
+ next_run = 0;
+ break;
+ }
+ } while (i < hashsz);
if (gc_work->exiting)
return;
@@ -1440,40 +1478,17 @@ static void gc_worker(struct work_struct *work)
*
* This worker is only here to reap expired entries when system went
* idle after a busy period.
- *
- * The heuristics below are supposed to balance conflicting goals:
- *
- * 1. Minimize time until we notice a stale entry
- * 2. Maximize scan intervals to not waste cycles
- *
- * Normally, expire ratio will be close to 0.
- *
- * As soon as a sizeable fraction of the entries have expired
- * increase scan frequency.
*/
- ratio = scanned ? expired_count * 100 / scanned : 0;
- if (ratio > GC_EVICT_RATIO) {
- gc_work->next_gc_run = min_interval;
- } else {
- unsigned int max = GC_MAX_SCAN_JIFFIES / GC_MAX_BUCKETS_DIV;
-
- BUILD_BUG_ON((GC_MAX_SCAN_JIFFIES / GC_MAX_BUCKETS_DIV) == 0);
-
- gc_work->next_gc_run += min_interval;
- if (gc_work->next_gc_run > max)
- gc_work->next_gc_run = max;
+ if (next_run) {
+ gc_work->early_drop = false;
+ gc_work->next_bucket = 0;
}
-
- next_run = gc_work->next_gc_run;
- gc_work->last_bucket = i;
- gc_work->early_drop = false;
queue_delayed_work(system_power_efficient_wq, &gc_work->dwork, next_run);
}
static void conntrack_gc_work_init(struct conntrack_gc_work *gc_work)
{
INIT_DEFERRABLE_WORK(&gc_work->dwork, gc_worker);
- gc_work->next_gc_run = HZ;
gc_work->exiting = false;
}
@@ -1484,7 +1499,7 @@ __nf_conntrack_alloc(struct net *net,
const struct nf_conntrack_tuple *repl,
gfp_t gfp, u32 hash)
{
- struct nf_conntrack_net *cnet = net_generic(net, nf_conntrack_net_id);
+ struct nf_conntrack_net *cnet = nf_ct_pernet(net);
unsigned int ct_count;
struct nf_conn *ct;
@@ -1556,7 +1571,7 @@ void nf_conntrack_free(struct nf_conn *ct)
nf_ct_ext_destroy(ct);
kmem_cache_free(nf_conntrack_cachep, ct);
- cnet = net_generic(net, nf_conntrack_net_id);
+ cnet = nf_ct_pernet(net);
smp_mb__before_atomic();
atomic_dec(&cnet->count);
@@ -1614,7 +1629,7 @@ init_conntrack(struct net *net, struct nf_conn *tmpl,
GFP_ATOMIC);
local_bh_disable();
- cnet = net_generic(net, nf_conntrack_net_id);
+ cnet = nf_ct_pernet(net);
if (cnet->expect_count) {
spin_lock(&nf_conntrack_expect_lock);
exp = nf_ct_find_expectation(net, zone, tuple);
@@ -2317,7 +2332,7 @@ __nf_ct_unconfirmed_destroy(struct net *net)
void nf_ct_unconfirmed_destroy(struct net *net)
{
- struct nf_conntrack_net *cnet = net_generic(net, nf_conntrack_net_id);
+ struct nf_conntrack_net *cnet = nf_ct_pernet(net);
might_sleep();
@@ -2333,7 +2348,7 @@ void nf_ct_iterate_cleanup_net(struct net *net,
int (*iter)(struct nf_conn *i, void *data),
void *data, u32 portid, int report)
{
- struct nf_conntrack_net *cnet = net_generic(net, nf_conntrack_net_id);
+ struct nf_conntrack_net *cnet = nf_ct_pernet(net);
struct iter_data d;
might_sleep();
@@ -2367,7 +2382,7 @@ nf_ct_iterate_destroy(int (*iter)(struct nf_conn *i, void *data), void *data)
down_read(&net_rwsem);
for_each_net(net) {
- struct nf_conntrack_net *cnet = net_generic(net, nf_conntrack_net_id);
+ struct nf_conntrack_net *cnet = nf_ct_pernet(net);
if (atomic_read(&cnet->count) == 0)
continue;
@@ -2449,7 +2464,7 @@ void nf_conntrack_cleanup_net_list(struct list_head *net_exit_list)
i_see_dead_people:
busy = 0;
list_for_each_entry(net, net_exit_list, exit_list) {
- struct nf_conntrack_net *cnet = net_generic(net, nf_conntrack_net_id);
+ struct nf_conntrack_net *cnet = nf_ct_pernet(net);
nf_ct_iterate_cleanup(kill_all, net, 0, 0);
if (atomic_read(&cnet->count) != 0)
@@ -2461,7 +2476,6 @@ i_see_dead_people:
}
list_for_each_entry(net, net_exit_list, exit_list) {
- nf_conntrack_proto_pernet_fini(net);
nf_conntrack_ecache_pernet_fini(net);
nf_conntrack_expect_pernet_fini(net);
free_percpu(net->ct.stat);
@@ -2613,26 +2627,24 @@ int nf_conntrack_init_start(void)
spin_lock_init(&nf_conntrack_locks[i]);
if (!nf_conntrack_htable_size) {
- /* Idea from tcp.c: use 1/16384 of memory.
- * On i386: 32MB machine has 512 buckets.
- * >= 1GB machines have 16384 buckets.
- * >= 4GB machines have 65536 buckets.
- */
nf_conntrack_htable_size
= (((nr_pages << PAGE_SHIFT) / 16384)
/ sizeof(struct hlist_head));
- if (nr_pages > (4 * (1024 * 1024 * 1024 / PAGE_SIZE)))
- nf_conntrack_htable_size = 65536;
+ if (BITS_PER_LONG >= 64 &&
+ nr_pages > (4 * (1024 * 1024 * 1024 / PAGE_SIZE)))
+ nf_conntrack_htable_size = 262144;
else if (nr_pages > (1024 * 1024 * 1024 / PAGE_SIZE))
- nf_conntrack_htable_size = 16384;
- if (nf_conntrack_htable_size < 32)
- nf_conntrack_htable_size = 32;
+ nf_conntrack_htable_size = 65536;
- /* Use a max. factor of four by default to get the same max as
- * with the old struct list_heads. When a table size is given
- * we use the old value of 8 to avoid reducing the max.
- * entries. */
- max_factor = 4;
+ if (nf_conntrack_htable_size < 1024)
+ nf_conntrack_htable_size = 1024;
+ /* Use a max. factor of one by default to keep the average
+ * hash chain length at 2 entries. Each entry has to be added
+ * twice (once for original direction, once for reply).
+ * When a table size is given we use the old value of 8 to
+ * avoid implicit reduction of the max entries setting.
+ */
+ max_factor = 1;
}
nf_conntrack_hash = nf_ct_alloc_hashtable(&nf_conntrack_htable_size, 1);
@@ -2733,7 +2745,7 @@ void nf_conntrack_init_end(void)
int nf_conntrack_init_net(struct net *net)
{
- struct nf_conntrack_net *cnet = net_generic(net, nf_conntrack_net_id);
+ struct nf_conntrack_net *cnet = nf_ct_pernet(net);
int ret = -ENOMEM;
int cpu;
diff --git a/net/netfilter/nf_conntrack_ecache.c b/net/netfilter/nf_conntrack_ecache.c
index 759d87aef95f..41768ff19464 100644
--- a/net/netfilter/nf_conntrack_ecache.c
+++ b/net/netfilter/nf_conntrack_ecache.c
@@ -27,8 +27,6 @@
#include <net/netfilter/nf_conntrack_ecache.h>
#include <net/netfilter/nf_conntrack_extend.h>
-extern unsigned int nf_conntrack_net_id;
-
static DEFINE_MUTEX(nf_ct_ecache_mutex);
#define ECACHE_RETRY_WAIT (HZ/10)
@@ -132,58 +130,77 @@ static void ecache_work(struct work_struct *work)
schedule_delayed_work(&cnet->ecache_dwork, delay);
}
-int nf_conntrack_eventmask_report(unsigned int eventmask, struct nf_conn *ct,
- u32 portid, int report)
+static int __nf_conntrack_eventmask_report(struct nf_conntrack_ecache *e,
+ const unsigned int events,
+ const unsigned long missed,
+ const struct nf_ct_event *item)
{
- int ret = 0;
- struct net *net = nf_ct_net(ct);
+ struct nf_conn *ct = item->ct;
+ struct net *net = nf_ct_net(item->ct);
struct nf_ct_event_notifier *notify;
- struct nf_conntrack_ecache *e;
+ int ret;
+
+ if (!((events | missed) & e->ctmask))
+ return 0;
rcu_read_lock();
+
notify = rcu_dereference(net->ct.nf_conntrack_event_cb);
- if (!notify)
- goto out_unlock;
+ if (!notify) {
+ rcu_read_unlock();
+ return 0;
+ }
+
+ ret = notify->ct_event(events | missed, item);
+ rcu_read_unlock();
+
+ if (likely(ret >= 0 && missed == 0))
+ return 0;
+
+ spin_lock_bh(&ct->lock);
+ if (ret < 0)
+ e->missed |= events;
+ else
+ e->missed &= ~missed;
+ spin_unlock_bh(&ct->lock);
+
+ return ret;
+}
+
+int nf_conntrack_eventmask_report(unsigned int events, struct nf_conn *ct,
+ u32 portid, int report)
+{
+ struct nf_conntrack_ecache *e;
+ struct nf_ct_event item;
+ unsigned long missed;
+ int ret;
+
+ if (!nf_ct_is_confirmed(ct))
+ return 0;
e = nf_ct_ecache_find(ct);
if (!e)
- goto out_unlock;
+ return 0;
- if (nf_ct_is_confirmed(ct)) {
- struct nf_ct_event item = {
- .ct = ct,
- .portid = e->portid ? e->portid : portid,
- .report = report
- };
- /* This is a resent of a destroy event? If so, skip missed */
- unsigned long missed = e->portid ? 0 : e->missed;
-
- if (!((eventmask | missed) & e->ctmask))
- goto out_unlock;
-
- ret = notify->fcn(eventmask | missed, &item);
- if (unlikely(ret < 0 || missed)) {
- spin_lock_bh(&ct->lock);
- if (ret < 0) {
- /* This is a destroy event that has been
- * triggered by a process, we store the PORTID
- * to include it in the retransmission.
- */
- if (eventmask & (1 << IPCT_DESTROY)) {
- if (e->portid == 0 && portid != 0)
- e->portid = portid;
- e->state = NFCT_ECACHE_DESTROY_FAIL;
- } else {
- e->missed |= eventmask;
- }
- } else {
- e->missed &= ~missed;
- }
- spin_unlock_bh(&ct->lock);
- }
+ memset(&item, 0, sizeof(item));
+
+ item.ct = ct;
+ item.portid = e->portid ? e->portid : portid;
+ item.report = report;
+
+ /* This is a resent of a destroy event? If so, skip missed */
+ missed = e->portid ? 0 : e->missed;
+
+ ret = __nf_conntrack_eventmask_report(e, events, missed, &item);
+ if (unlikely(ret < 0 && (events & (1 << IPCT_DESTROY)))) {
+ /* This is a destroy event that has been triggered by a process,
+ * we store the PORTID to include it in the retransmission.
+ */
+ if (e->portid == 0 && portid != 0)
+ e->portid = portid;
+ e->state = NFCT_ECACHE_DESTROY_FAIL;
}
-out_unlock:
- rcu_read_unlock();
+
return ret;
}
EXPORT_SYMBOL_GPL(nf_conntrack_eventmask_report);
@@ -192,53 +209,28 @@ EXPORT_SYMBOL_GPL(nf_conntrack_eventmask_report);
* disabled softirqs */
void nf_ct_deliver_cached_events(struct nf_conn *ct)
{
- struct net *net = nf_ct_net(ct);
- unsigned long events, missed;
- struct nf_ct_event_notifier *notify;
struct nf_conntrack_ecache *e;
struct nf_ct_event item;
- int ret;
-
- rcu_read_lock();
- notify = rcu_dereference(net->ct.nf_conntrack_event_cb);
- if (notify == NULL)
- goto out_unlock;
+ unsigned long events;
if (!nf_ct_is_confirmed(ct) || nf_ct_is_dying(ct))
- goto out_unlock;
+ return;
e = nf_ct_ecache_find(ct);
if (e == NULL)
- goto out_unlock;
+ return;
events = xchg(&e->cache, 0);
- /* We make a copy of the missed event cache without taking
- * the lock, thus we may send missed events twice. However,
- * this does not harm and it happens very rarely. */
- missed = e->missed;
-
- if (!((events | missed) & e->ctmask))
- goto out_unlock;
-
item.ct = ct;
item.portid = 0;
item.report = 0;
- ret = notify->fcn(events | missed, &item);
-
- if (likely(ret == 0 && !missed))
- goto out_unlock;
-
- spin_lock_bh(&ct->lock);
- if (ret < 0)
- e->missed |= events;
- else
- e->missed &= ~missed;
- spin_unlock_bh(&ct->lock);
-
-out_unlock:
- rcu_read_unlock();
+ /* We make a copy of the missed event cache without taking
+ * the lock, thus we may send missed events twice. However,
+ * this does not harm and it happens very rarely.
+ */
+ __nf_conntrack_eventmask_report(e, events, e->missed, &item);
}
EXPORT_SYMBOL_GPL(nf_ct_deliver_cached_events);
@@ -248,11 +240,11 @@ void nf_ct_expect_event_report(enum ip_conntrack_expect_events event,
{
struct net *net = nf_ct_exp_net(exp);
- struct nf_exp_event_notifier *notify;
+ struct nf_ct_event_notifier *notify;
struct nf_conntrack_ecache *e;
rcu_read_lock();
- notify = rcu_dereference(net->ct.nf_expect_event_cb);
+ notify = rcu_dereference(net->ct.nf_conntrack_event_cb);
if (!notify)
goto out_unlock;
@@ -266,89 +258,38 @@ void nf_ct_expect_event_report(enum ip_conntrack_expect_events event,
.portid = portid,
.report = report
};
- notify->fcn(1 << event, &item);
+ notify->exp_event(1 << event, &item);
}
out_unlock:
rcu_read_unlock();
}
-int nf_conntrack_register_notifier(struct net *net,
- struct nf_ct_event_notifier *new)
+void nf_conntrack_register_notifier(struct net *net,
+ const struct nf_ct_event_notifier *new)
{
- int ret;
struct nf_ct_event_notifier *notify;
mutex_lock(&nf_ct_ecache_mutex);
notify = rcu_dereference_protected(net->ct.nf_conntrack_event_cb,
lockdep_is_held(&nf_ct_ecache_mutex));
- if (notify != NULL) {
- ret = -EBUSY;
- goto out_unlock;
- }
+ WARN_ON_ONCE(notify);
rcu_assign_pointer(net->ct.nf_conntrack_event_cb, new);
- ret = 0;
-
-out_unlock:
mutex_unlock(&nf_ct_ecache_mutex);
- return ret;
}
EXPORT_SYMBOL_GPL(nf_conntrack_register_notifier);
-void nf_conntrack_unregister_notifier(struct net *net,
- struct nf_ct_event_notifier *new)
+void nf_conntrack_unregister_notifier(struct net *net)
{
- struct nf_ct_event_notifier *notify;
-
mutex_lock(&nf_ct_ecache_mutex);
- notify = rcu_dereference_protected(net->ct.nf_conntrack_event_cb,
- lockdep_is_held(&nf_ct_ecache_mutex));
- BUG_ON(notify != new);
RCU_INIT_POINTER(net->ct.nf_conntrack_event_cb, NULL);
mutex_unlock(&nf_ct_ecache_mutex);
- /* synchronize_rcu() is called from ctnetlink_exit. */
+ /* synchronize_rcu() is called after netns pre_exit */
}
EXPORT_SYMBOL_GPL(nf_conntrack_unregister_notifier);
-int nf_ct_expect_register_notifier(struct net *net,
- struct nf_exp_event_notifier *new)
-{
- int ret;
- struct nf_exp_event_notifier *notify;
-
- mutex_lock(&nf_ct_ecache_mutex);
- notify = rcu_dereference_protected(net->ct.nf_expect_event_cb,
- lockdep_is_held(&nf_ct_ecache_mutex));
- if (notify != NULL) {
- ret = -EBUSY;
- goto out_unlock;
- }
- rcu_assign_pointer(net->ct.nf_expect_event_cb, new);
- ret = 0;
-
-out_unlock:
- mutex_unlock(&nf_ct_ecache_mutex);
- return ret;
-}
-EXPORT_SYMBOL_GPL(nf_ct_expect_register_notifier);
-
-void nf_ct_expect_unregister_notifier(struct net *net,
- struct nf_exp_event_notifier *new)
-{
- struct nf_exp_event_notifier *notify;
-
- mutex_lock(&nf_ct_ecache_mutex);
- notify = rcu_dereference_protected(net->ct.nf_expect_event_cb,
- lockdep_is_held(&nf_ct_ecache_mutex));
- BUG_ON(notify != new);
- RCU_INIT_POINTER(net->ct.nf_expect_event_cb, NULL);
- mutex_unlock(&nf_ct_ecache_mutex);
- /* synchronize_rcu() is called from ctnetlink_exit. */
-}
-EXPORT_SYMBOL_GPL(nf_ct_expect_unregister_notifier);
-
void nf_conntrack_ecache_work(struct net *net, enum nf_ct_ecache_state state)
{
- struct nf_conntrack_net *cnet = net_generic(net, nf_conntrack_net_id);
+ struct nf_conntrack_net *cnet = nf_ct_pernet(net);
if (state == NFCT_ECACHE_DESTROY_FAIL &&
!delayed_work_pending(&cnet->ecache_dwork)) {
@@ -371,7 +312,7 @@ static const struct nf_ct_ext_type event_extend = {
void nf_conntrack_ecache_pernet_init(struct net *net)
{
- struct nf_conntrack_net *cnet = net_generic(net, nf_conntrack_net_id);
+ struct nf_conntrack_net *cnet = nf_ct_pernet(net);
net->ct.sysctl_events = nf_ct_events;
cnet->ct_net = &net->ct;
@@ -380,7 +321,7 @@ void nf_conntrack_ecache_pernet_init(struct net *net)
void nf_conntrack_ecache_pernet_fini(struct net *net)
{
- struct nf_conntrack_net *cnet = net_generic(net, nf_conntrack_net_id);
+ struct nf_conntrack_net *cnet = nf_ct_pernet(net);
cancel_delayed_work_sync(&cnet->ecache_dwork);
}
diff --git a/net/netfilter/nf_conntrack_expect.c b/net/netfilter/nf_conntrack_expect.c
index efdd391b3f72..f562eeef4234 100644
--- a/net/netfilter/nf_conntrack_expect.c
+++ b/net/netfilter/nf_conntrack_expect.c
@@ -17,7 +17,7 @@
#include <linux/err.h>
#include <linux/percpu.h>
#include <linux/kernel.h>
-#include <linux/jhash.h>
+#include <linux/siphash.h>
#include <linux/moduleparam.h>
#include <linux/export.h>
#include <net/net_namespace.h>
@@ -41,9 +41,7 @@ EXPORT_SYMBOL_GPL(nf_ct_expect_hash);
unsigned int nf_ct_expect_max __read_mostly;
static struct kmem_cache *nf_ct_expect_cachep __read_mostly;
-static unsigned int nf_ct_expect_hashrnd __read_mostly;
-
-extern unsigned int nf_conntrack_net_id;
+static siphash_key_t nf_ct_expect_hashrnd __read_mostly;
/* nf_conntrack_expect helper functions */
void nf_ct_unlink_expect_report(struct nf_conntrack_expect *exp,
@@ -58,7 +56,7 @@ void nf_ct_unlink_expect_report(struct nf_conntrack_expect *exp,
hlist_del_rcu(&exp->hnode);
- cnet = net_generic(net, nf_conntrack_net_id);
+ cnet = nf_ct_pernet(net);
cnet->expect_count--;
hlist_del_rcu(&exp->lnode);
@@ -83,15 +81,26 @@ static void nf_ct_expectation_timed_out(struct timer_list *t)
static unsigned int nf_ct_expect_dst_hash(const struct net *n, const struct nf_conntrack_tuple *tuple)
{
- unsigned int hash, seed;
+ struct {
+ union nf_inet_addr dst_addr;
+ u32 net_mix;
+ u16 dport;
+ u8 l3num;
+ u8 protonum;
+ } __aligned(SIPHASH_ALIGNMENT) combined;
+ u32 hash;
get_random_once(&nf_ct_expect_hashrnd, sizeof(nf_ct_expect_hashrnd));
- seed = nf_ct_expect_hashrnd ^ net_hash_mix(n);
+ memset(&combined, 0, sizeof(combined));
+
+ combined.dst_addr = tuple->dst.u3;
+ combined.net_mix = net_hash_mix(n);
+ combined.dport = (__force __u16)tuple->dst.u.all;
+ combined.l3num = tuple->src.l3num;
+ combined.protonum = tuple->dst.protonum;
- hash = jhash2(tuple->dst.u3.all, ARRAY_SIZE(tuple->dst.u3.all),
- (((tuple->dst.protonum ^ tuple->src.l3num) << 16) |
- (__force __u16)tuple->dst.u.all) ^ seed);
+ hash = siphash(&combined, sizeof(combined), &nf_ct_expect_hashrnd);
return reciprocal_scale(hash, nf_ct_expect_hsize);
}
@@ -123,7 +132,7 @@ __nf_ct_expect_find(struct net *net,
const struct nf_conntrack_zone *zone,
const struct nf_conntrack_tuple *tuple)
{
- struct nf_conntrack_net *cnet = net_generic(net, nf_conntrack_net_id);
+ struct nf_conntrack_net *cnet = nf_ct_pernet(net);
struct nf_conntrack_expect *i;
unsigned int h;
@@ -164,7 +173,7 @@ nf_ct_find_expectation(struct net *net,
const struct nf_conntrack_zone *zone,
const struct nf_conntrack_tuple *tuple)
{
- struct nf_conntrack_net *cnet = net_generic(net, nf_conntrack_net_id);
+ struct nf_conntrack_net *cnet = nf_ct_pernet(net);
struct nf_conntrack_expect *i, *exp = NULL;
unsigned int h;
@@ -397,7 +406,7 @@ static void nf_ct_expect_insert(struct nf_conntrack_expect *exp)
master_help->expecting[exp->class]++;
hlist_add_head_rcu(&exp->hnode, &nf_ct_expect_hash[h]);
- cnet = net_generic(net, nf_conntrack_net_id);
+ cnet = nf_ct_pernet(net);
cnet->expect_count++;
NF_CT_STAT_INC(net, expect_create);
@@ -468,7 +477,7 @@ static inline int __nf_ct_expect_check(struct nf_conntrack_expect *expect,
}
}
- cnet = net_generic(net, nf_conntrack_net_id);
+ cnet = nf_ct_pernet(net);
if (cnet->expect_count >= nf_ct_expect_max) {
net_warn_ratelimited("nf_conntrack: expectation table full\n");
ret = -EMFILE;
diff --git a/net/netfilter/nf_conntrack_h323_main.c b/net/netfilter/nf_conntrack_h323_main.c
index aafaff00baf1..2eb31ffb3d14 100644
--- a/net/netfilter/nf_conntrack_h323_main.c
+++ b/net/netfilter/nf_conntrack_h323_main.c
@@ -194,7 +194,7 @@ static int get_tpkt_data(struct sk_buff *skb, unsigned int protoff,
if (tcpdatalen == 4) { /* Separate TPKT header */
/* Netmeeting sends TPKT header and data separately */
pr_debug("nf_ct_h323: separate TPKT header indicates "
- "there will be TPKT data of %hu bytes\n",
+ "there will be TPKT data of %d bytes\n",
tpktlen - 4);
info->tpkt_len[dir] = tpktlen - 4;
return 0;
diff --git a/net/netfilter/nf_conntrack_helper.c b/net/netfilter/nf_conntrack_helper.c
index ac396cc8bfae..ae4488a13c70 100644
--- a/net/netfilter/nf_conntrack_helper.c
+++ b/net/netfilter/nf_conntrack_helper.c
@@ -43,8 +43,6 @@ MODULE_PARM_DESC(nf_conntrack_helper,
static DEFINE_MUTEX(nf_ct_nat_helpers_mutex);
static struct list_head nf_ct_nat_helpers __read_mostly;
-extern unsigned int nf_conntrack_net_id;
-
/* Stupid hash, but collision free for the default registrations of the
* helpers currently in the kernel. */
static unsigned int helper_hash(const struct nf_conntrack_tuple *tuple)
@@ -214,7 +212,7 @@ EXPORT_SYMBOL_GPL(nf_ct_helper_ext_add);
static struct nf_conntrack_helper *
nf_ct_lookup_helper(struct nf_conn *ct, struct net *net)
{
- struct nf_conntrack_net *cnet = net_generic(net, nf_conntrack_net_id);
+ struct nf_conntrack_net *cnet = nf_ct_pernet(net);
if (!cnet->sysctl_auto_assign_helper) {
if (cnet->auto_assign_helper_warned)
@@ -560,7 +558,7 @@ static const struct nf_ct_ext_type helper_extend = {
void nf_conntrack_helper_pernet_init(struct net *net)
{
- struct nf_conntrack_net *cnet = net_generic(net, nf_conntrack_net_id);
+ struct nf_conntrack_net *cnet = nf_ct_pernet(net);
cnet->sysctl_auto_assign_helper = nf_ct_auto_assign_helper;
}
diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c
index 8690fc07030f..f1e5443fe7c7 100644
--- a/net/netfilter/nf_conntrack_netlink.c
+++ b/net/netfilter/nf_conntrack_netlink.c
@@ -218,6 +218,7 @@ static int ctnetlink_dump_helpinfo(struct sk_buff *skb,
if (!help)
return 0;
+ rcu_read_lock();
helper = rcu_dereference(help->helper);
if (!helper)
goto out;
@@ -233,9 +234,11 @@ static int ctnetlink_dump_helpinfo(struct sk_buff *skb,
nla_nest_end(skb, nest_helper);
out:
+ rcu_read_unlock();
return 0;
nla_put_failure:
+ rcu_read_unlock();
return -1;
}
@@ -703,7 +706,7 @@ static size_t ctnetlink_nlmsg_size(const struct nf_conn *ct)
}
static int
-ctnetlink_conntrack_event(unsigned int events, struct nf_ct_event *item)
+ctnetlink_conntrack_event(unsigned int events, const struct nf_ct_event *item)
{
const struct nf_conntrack_zone *zone;
struct net *net;
@@ -849,6 +852,11 @@ static int ctnetlink_done(struct netlink_callback *cb)
return 0;
}
+struct ctnetlink_filter_u32 {
+ u32 val;
+ u32 mask;
+};
+
struct ctnetlink_filter {
u8 family;
@@ -859,10 +867,8 @@ struct ctnetlink_filter {
struct nf_conntrack_tuple reply;
struct nf_conntrack_zone zone;
- struct {
- u_int32_t val;
- u_int32_t mask;
- } mark;
+ struct ctnetlink_filter_u32 mark;
+ struct ctnetlink_filter_u32 status;
};
static const struct nla_policy cta_filter_nla_policy[CTA_FILTER_MAX + 1] = {
@@ -904,6 +910,46 @@ static int ctnetlink_parse_tuple_filter(const struct nlattr * const cda[],
struct nf_conntrack_zone *zone,
u_int32_t flags);
+static int ctnetlink_filter_parse_mark(struct ctnetlink_filter_u32 *mark,
+ const struct nlattr * const cda[])
+{
+#ifdef CONFIG_NF_CONNTRACK_MARK
+ if (cda[CTA_MARK]) {
+ mark->val = ntohl(nla_get_be32(cda[CTA_MARK]));
+
+ if (cda[CTA_MARK_MASK])
+ mark->mask = ntohl(nla_get_be32(cda[CTA_MARK_MASK]));
+ else
+ mark->mask = 0xffffffff;
+ } else if (cda[CTA_MARK_MASK]) {
+ return -EINVAL;
+ }
+#endif
+ return 0;
+}
+
+static int ctnetlink_filter_parse_status(struct ctnetlink_filter_u32 *status,
+ const struct nlattr * const cda[])
+{
+ if (cda[CTA_STATUS]) {
+ status->val = ntohl(nla_get_be32(cda[CTA_STATUS]));
+ if (cda[CTA_STATUS_MASK])
+ status->mask = ntohl(nla_get_be32(cda[CTA_STATUS_MASK]));
+ else
+ status->mask = status->val;
+
+ /* status->val == 0? always true, else always false. */
+ if (status->mask == 0)
+ return -EINVAL;
+ } else if (cda[CTA_STATUS_MASK]) {
+ return -EINVAL;
+ }
+
+ /* CTA_STATUS is NLA_U32, if this fires UAPI needs to be extended */
+ BUILD_BUG_ON(__IPS_MAX_BIT >= 32);
+ return 0;
+}
+
static struct ctnetlink_filter *
ctnetlink_alloc_filter(const struct nlattr * const cda[], u8 family)
{
@@ -921,18 +967,14 @@ ctnetlink_alloc_filter(const struct nlattr * const cda[], u8 family)
filter->family = family;
-#ifdef CONFIG_NF_CONNTRACK_MARK
- if (cda[CTA_MARK]) {
- filter->mark.val = ntohl(nla_get_be32(cda[CTA_MARK]));
- if (cda[CTA_MARK_MASK])
- filter->mark.mask = ntohl(nla_get_be32(cda[CTA_MARK_MASK]));
- else
- filter->mark.mask = 0xffffffff;
- } else if (cda[CTA_MARK_MASK]) {
- err = -EINVAL;
+ err = ctnetlink_filter_parse_mark(&filter->mark, cda);
+ if (err)
goto err_filter;
- }
-#endif
+
+ err = ctnetlink_filter_parse_status(&filter->status, cda);
+ if (err)
+ goto err_filter;
+
if (!cda[CTA_FILTER])
return filter;
@@ -986,7 +1028,7 @@ err_filter:
static bool ctnetlink_needs_filter(u8 family, const struct nlattr * const *cda)
{
- return family || cda[CTA_MARK] || cda[CTA_FILTER];
+ return family || cda[CTA_MARK] || cda[CTA_FILTER] || cda[CTA_STATUS];
}
static int ctnetlink_start(struct netlink_callback *cb)
@@ -1079,6 +1121,7 @@ static int ctnetlink_filter_match(struct nf_conn *ct, void *data)
{
struct ctnetlink_filter *filter = data;
struct nf_conntrack_tuple *tuple;
+ u32 status;
if (filter == NULL)
goto out;
@@ -1110,6 +1153,9 @@ static int ctnetlink_filter_match(struct nf_conn *ct, void *data)
if ((ct->mark & filter->mark.mask) != filter->mark.val)
goto ignore_entry;
#endif
+ status = (u32)READ_ONCE(ct->status);
+ if ((status & filter->status.mask) != filter->status.val)
+ goto ignore_entry;
out:
return 1;
@@ -1492,6 +1538,7 @@ static const struct nla_policy ct_nla_policy[CTA_MAX+1] = {
[CTA_LABELS_MASK] = { .type = NLA_BINARY,
.len = NF_CT_LABELS_MAX_SIZE },
[CTA_FILTER] = { .type = NLA_NESTED },
+ [CTA_STATUS_MASK] = { .type = NLA_U32 },
};
static int ctnetlink_flush_iterate(struct nf_conn *ct, void *data)
@@ -1528,7 +1575,7 @@ static int ctnetlink_del_conntrack(struct sk_buff *skb,
const struct nfnl_info *info,
const struct nlattr * const cda[])
{
- struct nfgenmsg *nfmsg = nlmsg_data(info->nlh);
+ u8 family = info->nfmsg->nfgen_family;
struct nf_conntrack_tuple_hash *h;
struct nf_conntrack_tuple tuple;
struct nf_conntrack_zone zone;
@@ -1541,12 +1588,12 @@ static int ctnetlink_del_conntrack(struct sk_buff *skb,
if (cda[CTA_TUPLE_ORIG])
err = ctnetlink_parse_tuple(cda, &tuple, CTA_TUPLE_ORIG,
- nfmsg->nfgen_family, &zone);
+ family, &zone);
else if (cda[CTA_TUPLE_REPLY])
err = ctnetlink_parse_tuple(cda, &tuple, CTA_TUPLE_REPLY,
- nfmsg->nfgen_family, &zone);
+ family, &zone);
else {
- u_int8_t u3 = nfmsg->version ? nfmsg->nfgen_family : AF_UNSPEC;
+ u_int8_t u3 = info->nfmsg->version ? family : AF_UNSPEC;
return ctnetlink_flush_conntrack(info->net, cda,
NETLINK_CB(skb).portid,
@@ -1586,8 +1633,7 @@ static int ctnetlink_get_conntrack(struct sk_buff *skb,
const struct nfnl_info *info,
const struct nlattr * const cda[])
{
- struct nfgenmsg *nfmsg = nlmsg_data(info->nlh);
- u_int8_t u3 = nfmsg->nfgen_family;
+ u_int8_t u3 = info->nfmsg->nfgen_family;
struct nf_conntrack_tuple_hash *h;
struct nf_conntrack_tuple tuple;
struct nf_conntrack_zone zone;
@@ -1628,9 +1674,8 @@ static int ctnetlink_get_conntrack(struct sk_buff *skb,
ct = nf_ct_tuplehash_to_ctrack(h);
- err = -ENOMEM;
skb2 = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
- if (skb2 == NULL) {
+ if (!skb2) {
nf_ct_put(ct);
return -ENOMEM;
}
@@ -1640,21 +1685,12 @@ static int ctnetlink_get_conntrack(struct sk_buff *skb,
NFNL_MSG_TYPE(info->nlh->nlmsg_type), ct,
true, 0);
nf_ct_put(ct);
- if (err <= 0)
- goto free;
-
- err = netlink_unicast(info->sk, skb2, NETLINK_CB(skb).portid,
- MSG_DONTWAIT);
- if (err < 0)
- goto out;
-
- return 0;
+ if (err <= 0) {
+ kfree_skb(skb2);
+ return -ENOMEM;
+ }
-free:
- kfree_skb(skb2);
-out:
- /* this avoids a loop in nfnetlink. */
- return err == -EAGAIN ? -ENOBUFS : err;
+ return nfnetlink_unicast(skb2, info->net, NETLINK_CB(skb).portid);
}
static int ctnetlink_done_list(struct netlink_callback *cb)
@@ -2373,10 +2409,9 @@ static int ctnetlink_new_conntrack(struct sk_buff *skb,
const struct nfnl_info *info,
const struct nlattr * const cda[])
{
- struct nfgenmsg *nfmsg = nlmsg_data(info->nlh);
struct nf_conntrack_tuple otuple, rtuple;
struct nf_conntrack_tuple_hash *h = NULL;
- u_int8_t u3 = nfmsg->nfgen_family;
+ u_int8_t u3 = info->nfmsg->nfgen_family;
struct nf_conntrack_zone zone;
struct nf_conn *ct;
int err;
@@ -2493,7 +2528,9 @@ ctnetlink_ct_stat_cpu_fill_info(struct sk_buff *skb, u32 portid, u32 seq,
nla_put_be32(skb, CTA_STATS_SEARCH_RESTART,
htonl(st->search_restart)) ||
nla_put_be32(skb, CTA_STATS_CLASH_RESOLVE,
- htonl(st->clash_resolve)))
+ htonl(st->clash_resolve)) ||
+ nla_put_be32(skb, CTA_STATS_CHAIN_TOOLONG,
+ htonl(st->chaintoolong)))
goto nla_put_failure;
nlmsg_end(skb, nlh);
@@ -2590,21 +2627,12 @@ static int ctnetlink_stat_ct(struct sk_buff *skb, const struct nfnl_info *info,
info->nlh->nlmsg_seq,
NFNL_MSG_TYPE(info->nlh->nlmsg_type),
sock_net(skb->sk));
- if (err <= 0)
- goto free;
-
- err = netlink_unicast(info->sk, skb2, NETLINK_CB(skb).portid,
- MSG_DONTWAIT);
- if (err < 0)
- goto out;
-
- return 0;
+ if (err <= 0) {
+ kfree_skb(skb2);
+ return -ENOMEM;
+ }
-free:
- kfree_skb(skb2);
-out:
- /* this avoids a loop in nfnetlink. */
- return err == -EAGAIN ? -ENOBUFS : err;
+ return nfnetlink_unicast(skb2, info->net, NETLINK_CB(skb).portid);
}
static const struct nla_policy exp_nla_policy[CTA_EXPECT_MAX+1] = {
@@ -2643,6 +2671,8 @@ ctnetlink_glue_build_size(const struct nf_conn *ct)
+ nla_total_size(0) /* CTA_HELP */
+ nla_total_size(NF_CT_HELPER_NAME_LEN) /* CTA_HELP_NAME */
+ ctnetlink_secctx_size(ct)
+ + ctnetlink_acct_size(ct)
+ + ctnetlink_timestamp_size(ct)
#if IS_ENABLED(CONFIG_NF_NAT)
+ 2 * nla_total_size(0) /* CTA_NAT_SEQ_ADJ_ORIG|REPL */
+ 6 * nla_total_size(sizeof(u_int32_t)) /* CTA_NAT_SEQ_OFFSET */
@@ -2700,6 +2730,10 @@ static int __ctnetlink_glue_build(struct sk_buff *skb, struct nf_conn *ct)
if (ctnetlink_dump_protoinfo(skb, ct, false) < 0)
goto nla_put_failure;
+ if (ctnetlink_dump_acct(skb, ct, IPCTNL_MSG_CT_GET) < 0 ||
+ ctnetlink_dump_timestamp(skb, ct) < 0)
+ goto nla_put_failure;
+
if (ctnetlink_dump_helpinfo(skb, ct) < 0)
goto nla_put_failure;
@@ -3078,7 +3112,7 @@ nla_put_failure:
#ifdef CONFIG_NF_CONNTRACK_EVENTS
static int
-ctnetlink_expect_event(unsigned int events, struct nf_exp_event *item)
+ctnetlink_expect_event(unsigned int events, const struct nf_exp_event *item)
{
struct nf_conntrack_expect *exp = item->exp;
struct net *net = nf_ct_exp_net(exp);
@@ -3278,8 +3312,7 @@ static int ctnetlink_get_expect(struct sk_buff *skb,
const struct nfnl_info *info,
const struct nlattr * const cda[])
{
- struct nfgenmsg *nfmsg = nlmsg_data(info->nlh);
- u_int8_t u3 = nfmsg->nfgen_family;
+ u_int8_t u3 = info->nfmsg->nfgen_family;
struct nf_conntrack_tuple tuple;
struct nf_conntrack_expect *exp;
struct nf_conntrack_zone zone;
@@ -3329,11 +3362,10 @@ static int ctnetlink_get_expect(struct sk_buff *skb,
}
}
- err = -ENOMEM;
skb2 = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
- if (skb2 == NULL) {
+ if (!skb2) {
nf_ct_expect_put(exp);
- goto out;
+ return -ENOMEM;
}
rcu_read_lock();
@@ -3342,21 +3374,12 @@ static int ctnetlink_get_expect(struct sk_buff *skb,
exp);
rcu_read_unlock();
nf_ct_expect_put(exp);
- if (err <= 0)
- goto free;
-
- err = netlink_unicast(info->sk, skb2, NETLINK_CB(skb).portid,
- MSG_DONTWAIT);
- if (err < 0)
- goto out;
-
- return 0;
+ if (err <= 0) {
+ kfree_skb(skb2);
+ return -ENOMEM;
+ }
-free:
- kfree_skb(skb2);
-out:
- /* this avoids a loop in nfnetlink. */
- return err == -EAGAIN ? -ENOBUFS : err;
+ return nfnetlink_unicast(skb2, info->net, NETLINK_CB(skb).portid);
}
static bool expect_iter_name(struct nf_conntrack_expect *exp, void *data)
@@ -3378,8 +3401,7 @@ static int ctnetlink_del_expect(struct sk_buff *skb,
const struct nfnl_info *info,
const struct nlattr * const cda[])
{
- struct nfgenmsg *nfmsg = nlmsg_data(info->nlh);
- u_int8_t u3 = nfmsg->nfgen_family;
+ u_int8_t u3 = info->nfmsg->nfgen_family;
struct nf_conntrack_expect *exp;
struct nf_conntrack_tuple tuple;
struct nf_conntrack_zone zone;
@@ -3630,8 +3652,7 @@ static int ctnetlink_new_expect(struct sk_buff *skb,
const struct nfnl_info *info,
const struct nlattr * const cda[])
{
- struct nfgenmsg *nfmsg = nlmsg_data(info->nlh);
- u_int8_t u3 = nfmsg->nfgen_family;
+ u_int8_t u3 = info->nfmsg->nfgen_family;
struct nf_conntrack_tuple tuple;
struct nf_conntrack_expect *exp;
struct nf_conntrack_zone zone;
@@ -3742,11 +3763,8 @@ static int ctnetlink_stat_exp_cpu(struct sk_buff *skb,
#ifdef CONFIG_NF_CONNTRACK_EVENTS
static struct nf_ct_event_notifier ctnl_notifier = {
- .fcn = ctnetlink_conntrack_event,
-};
-
-static struct nf_exp_event_notifier ctnl_notifier_exp = {
- .fcn = ctnetlink_expect_event,
+ .ct_event = ctnetlink_conntrack_event,
+ .exp_event = ctnetlink_expect_event,
};
#endif
@@ -3839,52 +3857,21 @@ MODULE_ALIAS_NFNL_SUBSYS(NFNL_SUBSYS_CTNETLINK_EXP);
static int __net_init ctnetlink_net_init(struct net *net)
{
#ifdef CONFIG_NF_CONNTRACK_EVENTS
- int ret;
-
- ret = nf_conntrack_register_notifier(net, &ctnl_notifier);
- if (ret < 0) {
- pr_err("ctnetlink_init: cannot register notifier.\n");
- goto err_out;
- }
-
- ret = nf_ct_expect_register_notifier(net, &ctnl_notifier_exp);
- if (ret < 0) {
- pr_err("ctnetlink_init: cannot expect register notifier.\n");
- goto err_unreg_notifier;
- }
+ nf_conntrack_register_notifier(net, &ctnl_notifier);
#endif
return 0;
-
-#ifdef CONFIG_NF_CONNTRACK_EVENTS
-err_unreg_notifier:
- nf_conntrack_unregister_notifier(net, &ctnl_notifier);
-err_out:
- return ret;
-#endif
}
-static void ctnetlink_net_exit(struct net *net)
+static void ctnetlink_net_pre_exit(struct net *net)
{
#ifdef CONFIG_NF_CONNTRACK_EVENTS
- nf_ct_expect_unregister_notifier(net, &ctnl_notifier_exp);
- nf_conntrack_unregister_notifier(net, &ctnl_notifier);
+ nf_conntrack_unregister_notifier(net);
#endif
}
-static void __net_exit ctnetlink_net_exit_batch(struct list_head *net_exit_list)
-{
- struct net *net;
-
- list_for_each_entry(net, net_exit_list, exit_list)
- ctnetlink_net_exit(net);
-
- /* wait for other cpus until they are done with ctnl_notifiers */
- synchronize_rcu();
-}
-
static struct pernet_operations ctnetlink_net_ops = {
.init = ctnetlink_net_init,
- .exit_batch = ctnetlink_net_exit_batch,
+ .pre_exit = ctnetlink_net_pre_exit,
};
static int __init ctnetlink_init(void)
diff --git a/net/netfilter/nf_conntrack_proto.c b/net/netfilter/nf_conntrack_proto.c
index dc9ca12b0489..8f7a9837349c 100644
--- a/net/netfilter/nf_conntrack_proto.c
+++ b/net/netfilter/nf_conntrack_proto.c
@@ -42,17 +42,16 @@
#include <net/ipv6.h>
#include <net/inet_frag.h>
-extern unsigned int nf_conntrack_net_id;
-
static DEFINE_MUTEX(nf_ct_proto_mutex);
#ifdef CONFIG_SYSCTL
-__printf(5, 6)
+__printf(4, 5)
void nf_l4proto_log_invalid(const struct sk_buff *skb,
- struct net *net,
- u16 pf, u8 protonum,
+ const struct nf_hook_state *state,
+ u8 protonum,
const char *fmt, ...)
{
+ struct net *net = state->net;
struct va_format vaf;
va_list args;
@@ -64,15 +63,16 @@ void nf_l4proto_log_invalid(const struct sk_buff *skb,
vaf.fmt = fmt;
vaf.va = &args;
- nf_log_packet(net, pf, 0, skb, NULL, NULL, NULL,
- "nf_ct_proto_%d: %pV ", protonum, &vaf);
+ nf_log_packet(net, state->pf, 0, skb, state->in, state->out,
+ NULL, "nf_ct_proto_%d: %pV ", protonum, &vaf);
va_end(args);
}
EXPORT_SYMBOL_GPL(nf_l4proto_log_invalid);
-__printf(3, 4)
+__printf(4, 5)
void nf_ct_l4proto_log_invalid(const struct sk_buff *skb,
const struct nf_conn *ct,
+ const struct nf_hook_state *state,
const char *fmt, ...)
{
struct va_format vaf;
@@ -87,7 +87,7 @@ void nf_ct_l4proto_log_invalid(const struct sk_buff *skb,
vaf.fmt = fmt;
vaf.va = &args;
- nf_l4proto_log_invalid(skb, net, nf_ct_l3num(ct),
+ nf_l4proto_log_invalid(skb, state,
nf_ct_protonum(ct), "%pV", &vaf);
va_end(args);
}
@@ -446,7 +446,7 @@ static struct nf_ct_bridge_info *nf_ct_bridge_info;
static int nf_ct_netns_do_get(struct net *net, u8 nfproto)
{
- struct nf_conntrack_net *cnet = net_generic(net, nf_conntrack_net_id);
+ struct nf_conntrack_net *cnet = nf_ct_pernet(net);
bool fixup_needed = false, retry = true;
int err = 0;
retry:
@@ -531,7 +531,7 @@ retry:
static void nf_ct_netns_do_put(struct net *net, u8 nfproto)
{
- struct nf_conntrack_net *cnet = net_generic(net, nf_conntrack_net_id);
+ struct nf_conntrack_net *cnet = nf_ct_pernet(net);
mutex_lock(&nf_ct_proto_mutex);
switch (nfproto) {
@@ -697,13 +697,6 @@ void nf_conntrack_proto_pernet_init(struct net *net)
#endif
}
-void nf_conntrack_proto_pernet_fini(struct net *net)
-{
-#ifdef CONFIG_NF_CT_PROTO_GRE
- nf_ct_gre_keymap_flush(net);
-#endif
-}
-
module_param_call(hashsize, nf_conntrack_set_hashsize, param_get_uint,
&nf_conntrack_htable_size, 0600);
diff --git a/net/netfilter/nf_conntrack_proto_dccp.c b/net/netfilter/nf_conntrack_proto_dccp.c
index 4f33307fa3cf..c1557d47ccd1 100644
--- a/net/netfilter/nf_conntrack_proto_dccp.c
+++ b/net/netfilter/nf_conntrack_proto_dccp.c
@@ -382,7 +382,8 @@ dccp_state_table[CT_DCCP_ROLE_MAX + 1][DCCP_PKT_SYNCACK + 1][CT_DCCP_MAX + 1] =
static noinline bool
dccp_new(struct nf_conn *ct, const struct sk_buff *skb,
- const struct dccp_hdr *dh)
+ const struct dccp_hdr *dh,
+ const struct nf_hook_state *hook_state)
{
struct net *net = nf_ct_net(ct);
struct nf_dccp_net *dn;
@@ -414,7 +415,7 @@ dccp_new(struct nf_conn *ct, const struct sk_buff *skb,
return true;
out_invalid:
- nf_ct_l4proto_log_invalid(skb, ct, "%s", msg);
+ nf_ct_l4proto_log_invalid(skb, ct, hook_state, "%s", msg);
return false;
}
@@ -464,8 +465,7 @@ static bool dccp_error(const struct dccp_hdr *dh,
}
return false;
out_invalid:
- nf_l4proto_log_invalid(skb, state->net, state->pf,
- IPPROTO_DCCP, "%s", msg);
+ nf_l4proto_log_invalid(skb, state, IPPROTO_DCCP, "%s", msg);
return true;
}
@@ -488,7 +488,7 @@ int nf_conntrack_dccp_packet(struct nf_conn *ct, struct sk_buff *skb,
return -NF_ACCEPT;
type = dh->dccph_type;
- if (!nf_ct_is_confirmed(ct) && !dccp_new(ct, skb, dh))
+ if (!nf_ct_is_confirmed(ct) && !dccp_new(ct, skb, dh, state))
return -NF_ACCEPT;
if (type == DCCP_PKT_RESET &&
@@ -543,11 +543,11 @@ int nf_conntrack_dccp_packet(struct nf_conn *ct, struct sk_buff *skb,
ct->proto.dccp.last_pkt = type;
spin_unlock_bh(&ct->lock);
- nf_ct_l4proto_log_invalid(skb, ct, "%s", "invalid packet");
+ nf_ct_l4proto_log_invalid(skb, ct, state, "%s", "invalid packet");
return NF_ACCEPT;
case CT_DCCP_INVALID:
spin_unlock_bh(&ct->lock);
- nf_ct_l4proto_log_invalid(skb, ct, "%s", "invalid state transition");
+ nf_ct_l4proto_log_invalid(skb, ct, state, "%s", "invalid state transition");
return -NF_ACCEPT;
}
diff --git a/net/netfilter/nf_conntrack_proto_gre.c b/net/netfilter/nf_conntrack_proto_gre.c
index db11e403d818..728eeb0aea87 100644
--- a/net/netfilter/nf_conntrack_proto_gre.c
+++ b/net/netfilter/nf_conntrack_proto_gre.c
@@ -55,19 +55,6 @@ static inline struct nf_gre_net *gre_pernet(struct net *net)
return &net->ct.nf_ct_proto.gre;
}
-void nf_ct_gre_keymap_flush(struct net *net)
-{
- struct nf_gre_net *net_gre = gre_pernet(net);
- struct nf_ct_gre_keymap *km, *tmp;
-
- spin_lock_bh(&keymap_lock);
- list_for_each_entry_safe(km, tmp, &net_gre->keymap_list, list) {
- list_del_rcu(&km->list);
- kfree_rcu(km, rcu);
- }
- spin_unlock_bh(&keymap_lock);
-}
-
static inline int gre_key_cmpfn(const struct nf_ct_gre_keymap *km,
const struct nf_conntrack_tuple *t)
{
diff --git a/net/netfilter/nf_conntrack_proto_icmp.c b/net/netfilter/nf_conntrack_proto_icmp.c
index 4efd8741c105..b38b7164acd5 100644
--- a/net/netfilter/nf_conntrack_proto_icmp.c
+++ b/net/netfilter/nf_conntrack_proto_icmp.c
@@ -170,12 +170,12 @@ int nf_conntrack_inet_error(struct nf_conn *tmpl, struct sk_buff *skb,
ct_daddr = &ct->tuplehash[dir].tuple.dst.u3;
if (!nf_inet_addr_cmp(outer_daddr, ct_daddr)) {
if (state->pf == AF_INET) {
- nf_l4proto_log_invalid(skb, state->net, state->pf,
+ nf_l4proto_log_invalid(skb, state,
l4proto,
"outer daddr %pI4 != inner %pI4",
&outer_daddr->ip, &ct_daddr->ip);
} else if (state->pf == AF_INET6) {
- nf_l4proto_log_invalid(skb, state->net, state->pf,
+ nf_l4proto_log_invalid(skb, state,
l4proto,
"outer daddr %pI6 != inner %pI6",
&outer_daddr->ip6, &ct_daddr->ip6);
@@ -197,8 +197,7 @@ static void icmp_error_log(const struct sk_buff *skb,
const struct nf_hook_state *state,
const char *msg)
{
- nf_l4proto_log_invalid(skb, state->net, state->pf,
- IPPROTO_ICMP, "%s", msg);
+ nf_l4proto_log_invalid(skb, state, IPPROTO_ICMP, "%s", msg);
}
/* Small and modified version of icmp_rcv */
diff --git a/net/netfilter/nf_conntrack_proto_icmpv6.c b/net/netfilter/nf_conntrack_proto_icmpv6.c
index facd8c64ec4e..61e3b05cf02c 100644
--- a/net/netfilter/nf_conntrack_proto_icmpv6.c
+++ b/net/netfilter/nf_conntrack_proto_icmpv6.c
@@ -126,8 +126,7 @@ static void icmpv6_error_log(const struct sk_buff *skb,
const struct nf_hook_state *state,
const char *msg)
{
- nf_l4proto_log_invalid(skb, state->net, state->pf,
- IPPROTO_ICMPV6, "%s", msg);
+ nf_l4proto_log_invalid(skb, state, IPPROTO_ICMPV6, "%s", msg);
}
int nf_conntrack_icmpv6_error(struct nf_conn *tmpl,
diff --git a/net/netfilter/nf_conntrack_proto_sctp.c b/net/netfilter/nf_conntrack_proto_sctp.c
index fb8dc02e502f..2394238d01c9 100644
--- a/net/netfilter/nf_conntrack_proto_sctp.c
+++ b/net/netfilter/nf_conntrack_proto_sctp.c
@@ -351,7 +351,7 @@ static bool sctp_error(struct sk_buff *skb,
}
return false;
out_invalid:
- nf_l4proto_log_invalid(skb, state->net, state->pf, IPPROTO_SCTP, "%s", logmsg);
+ nf_l4proto_log_invalid(skb, state, IPPROTO_SCTP, "%s", logmsg);
return true;
}
diff --git a/net/netfilter/nf_conntrack_proto_tcp.c b/net/netfilter/nf_conntrack_proto_tcp.c
index 34e22416a721..af5115e127cf 100644
--- a/net/netfilter/nf_conntrack_proto_tcp.c
+++ b/net/netfilter/nf_conntrack_proto_tcp.c
@@ -446,14 +446,15 @@ static void tcp_sack(const struct sk_buff *skb, unsigned int dataoff,
}
}
-static bool tcp_in_window(const struct nf_conn *ct,
- struct ip_ct_tcp *state,
+static bool tcp_in_window(struct nf_conn *ct,
enum ip_conntrack_dir dir,
unsigned int index,
const struct sk_buff *skb,
unsigned int dataoff,
- const struct tcphdr *tcph)
+ const struct tcphdr *tcph,
+ const struct nf_hook_state *hook_state)
{
+ struct ip_ct_tcp *state = &ct->proto.tcp;
struct net *net = nf_ct_net(ct);
struct nf_tcp_net *tn = nf_tcp_pernet(net);
struct ip_ct_tcp_state *sender = &state->seen[dir];
@@ -670,7 +671,7 @@ static bool tcp_in_window(const struct nf_conn *ct,
tn->tcp_be_liberal)
res = true;
if (!res) {
- nf_ct_l4proto_log_invalid(skb, ct,
+ nf_ct_l4proto_log_invalid(skb, ct, hook_state,
"%s",
before(seq, sender->td_maxend + 1) ?
in_recv_win ?
@@ -710,7 +711,7 @@ static void tcp_error_log(const struct sk_buff *skb,
const struct nf_hook_state *state,
const char *msg)
{
- nf_l4proto_log_invalid(skb, state->net, state->pf, IPPROTO_TCP, "%s", msg);
+ nf_l4proto_log_invalid(skb, state, IPPROTO_TCP, "%s", msg);
}
/* Protect conntrack agaist broken packets. Code taken from ipt_unclean.c. */
@@ -822,6 +823,22 @@ static noinline bool tcp_new(struct nf_conn *ct, const struct sk_buff *skb,
return true;
}
+static bool tcp_can_early_drop(const struct nf_conn *ct)
+{
+ switch (ct->proto.tcp.state) {
+ case TCP_CONNTRACK_FIN_WAIT:
+ case TCP_CONNTRACK_LAST_ACK:
+ case TCP_CONNTRACK_TIME_WAIT:
+ case TCP_CONNTRACK_CLOSE:
+ case TCP_CONNTRACK_CLOSE_WAIT:
+ return true;
+ default:
+ break;
+ }
+
+ return false;
+}
+
/* Returns verdict for packet, or -1 for invalid. */
int nf_conntrack_tcp_packet(struct nf_conn *ct,
struct sk_buff *skb,
@@ -970,7 +987,7 @@ int nf_conntrack_tcp_packet(struct nf_conn *ct,
IP_CT_EXP_CHALLENGE_ACK;
}
spin_unlock_bh(&ct->lock);
- nf_ct_l4proto_log_invalid(skb, ct,
+ nf_ct_l4proto_log_invalid(skb, ct, state,
"packet (index %d) in dir %d ignored, state %s",
index, dir,
tcp_conntrack_names[old_state]);
@@ -995,7 +1012,7 @@ int nf_conntrack_tcp_packet(struct nf_conn *ct,
pr_debug("nf_ct_tcp: Invalid dir=%i index=%u ostate=%u\n",
dir, get_conntrack_index(th), old_state);
spin_unlock_bh(&ct->lock);
- nf_ct_l4proto_log_invalid(skb, ct, "invalid state");
+ nf_ct_l4proto_log_invalid(skb, ct, state, "invalid state");
return -NF_ACCEPT;
case TCP_CONNTRACK_TIME_WAIT:
/* RFC5961 compliance cause stack to send "challenge-ACK"
@@ -1010,7 +1027,7 @@ int nf_conntrack_tcp_packet(struct nf_conn *ct,
/* Detected RFC5961 challenge ACK */
ct->proto.tcp.last_flags &= ~IP_CT_EXP_CHALLENGE_ACK;
spin_unlock_bh(&ct->lock);
- nf_ct_l4proto_log_invalid(skb, ct, "challenge-ack ignored");
+ nf_ct_l4proto_log_invalid(skb, ct, state, "challenge-ack ignored");
return NF_ACCEPT; /* Don't change state */
}
break;
@@ -1029,13 +1046,33 @@ int nf_conntrack_tcp_packet(struct nf_conn *ct,
if (index != TCP_RST_SET)
break;
- if (ct->proto.tcp.seen[!dir].flags & IP_CT_TCP_FLAG_MAXACK_SET) {
+ /* If we are closing, tuple might have been re-used already.
+ * last_index, last_ack, and all other ct fields used for
+ * sequence/window validation are outdated in that case.
+ *
+ * As the conntrack can already be expired by GC under pressure,
+ * just skip validation checks.
+ */
+ if (tcp_can_early_drop(ct))
+ goto in_window;
+
+ /* td_maxack might be outdated if we let a SYN through earlier */
+ if ((ct->proto.tcp.seen[!dir].flags & IP_CT_TCP_FLAG_MAXACK_SET) &&
+ ct->proto.tcp.last_index != TCP_SYN_SET) {
u32 seq = ntohl(th->seq);
- if (before(seq, ct->proto.tcp.seen[!dir].td_maxack)) {
+ /* If we are not in established state and SEQ=0 this is most
+ * likely an answer to a SYN we let go through above (last_index
+ * can be updated due to out-of-order ACKs).
+ */
+ if (seq == 0 && !nf_conntrack_tcp_established(ct))
+ break;
+
+ if (before(seq, ct->proto.tcp.seen[!dir].td_maxack) &&
+ !tn->tcp_ignore_invalid_rst) {
/* Invalid RST */
spin_unlock_bh(&ct->lock);
- nf_ct_l4proto_log_invalid(skb, ct, "invalid rst");
+ nf_ct_l4proto_log_invalid(skb, ct, state, "invalid rst");
return -NF_ACCEPT;
}
@@ -1079,8 +1116,8 @@ int nf_conntrack_tcp_packet(struct nf_conn *ct,
break;
}
- if (!tcp_in_window(ct, &ct->proto.tcp, dir, index,
- skb, dataoff, th)) {
+ if (!tcp_in_window(ct, dir, index,
+ skb, dataoff, th, state)) {
spin_unlock_bh(&ct->lock);
return -NF_ACCEPT;
}
@@ -1133,6 +1170,16 @@ int nf_conntrack_tcp_packet(struct nf_conn *ct,
nf_ct_kill_acct(ct, ctinfo, skb);
return NF_ACCEPT;
}
+
+ if (index == TCP_SYN_SET && old_state == TCP_CONNTRACK_SYN_SENT) {
+ /* do not renew timeout on SYN retransmit.
+ *
+ * Else port reuse by client or NAT middlebox can keep
+ * entry alive indefinitely (including nat info).
+ */
+ return NF_ACCEPT;
+ }
+
/* ESTABLISHED without SEEN_REPLY, i.e. mid-connection
* pickup with loose=1. Avoid large ESTABLISHED timeout.
*/
@@ -1154,22 +1201,6 @@ int nf_conntrack_tcp_packet(struct nf_conn *ct,
return NF_ACCEPT;
}
-static bool tcp_can_early_drop(const struct nf_conn *ct)
-{
- switch (ct->proto.tcp.state) {
- case TCP_CONNTRACK_FIN_WAIT:
- case TCP_CONNTRACK_LAST_ACK:
- case TCP_CONNTRACK_TIME_WAIT:
- case TCP_CONNTRACK_CLOSE:
- case TCP_CONNTRACK_CLOSE_WAIT:
- return true;
- default:
- break;
- }
-
- return false;
-}
-
#if IS_ENABLED(CONFIG_NF_CT_NETLINK)
#include <linux/netfilter/nfnetlink.h>
@@ -1436,11 +1467,18 @@ void nf_conntrack_tcp_init_net(struct net *net)
*/
tn->tcp_be_liberal = 0;
+ /* If it's non-zero, we turn off RST sequence number check */
+ tn->tcp_ignore_invalid_rst = 0;
+
/* Max number of the retransmitted packets without receiving an (acceptable)
* ACK from the destination. If this number is reached, a shorter timer
* will be started.
*/
tn->tcp_max_retrans = 3;
+
+#if IS_ENABLED(CONFIG_NF_FLOW_TABLE)
+ tn->offload_timeout = 30 * HZ;
+#endif
}
const struct nf_conntrack_l4proto nf_conntrack_l4proto_tcp =
diff --git a/net/netfilter/nf_conntrack_proto_udp.c b/net/netfilter/nf_conntrack_proto_udp.c
index af402f458ee0..f8e3c0d2602f 100644
--- a/net/netfilter/nf_conntrack_proto_udp.c
+++ b/net/netfilter/nf_conntrack_proto_udp.c
@@ -38,8 +38,7 @@ static void udp_error_log(const struct sk_buff *skb,
const struct nf_hook_state *state,
const char *msg)
{
- nf_l4proto_log_invalid(skb, state->net, state->pf,
- IPPROTO_UDP, "%s", msg);
+ nf_l4proto_log_invalid(skb, state, IPPROTO_UDP, "%s", msg);
}
static bool udp_error(struct sk_buff *skb,
@@ -130,8 +129,7 @@ static void udplite_error_log(const struct sk_buff *skb,
const struct nf_hook_state *state,
const char *msg)
{
- nf_l4proto_log_invalid(skb, state->net, state->pf,
- IPPROTO_UDPLITE, "%s", msg);
+ nf_l4proto_log_invalid(skb, state, IPPROTO_UDPLITE, "%s", msg);
}
static bool udplite_error(struct sk_buff *skb,
@@ -270,6 +268,10 @@ void nf_conntrack_udp_init_net(struct net *net)
for (i = 0; i < UDP_CT_MAX; i++)
un->timeouts[i] = udp_timeouts[i];
+
+#if IS_ENABLED(CONFIG_NF_FLOW_TABLE)
+ un->offload_timeout = 30 * HZ;
+#endif
}
const struct nf_conntrack_l4proto nf_conntrack_l4proto_udp =
diff --git a/net/netfilter/nf_conntrack_standalone.c b/net/netfilter/nf_conntrack_standalone.c
index aaa55246d0ca..80f675d884b2 100644
--- a/net/netfilter/nf_conntrack_standalone.c
+++ b/net/netfilter/nf_conntrack_standalone.c
@@ -22,6 +22,9 @@
#include <net/netfilter/nf_conntrack_acct.h>
#include <net/netfilter/nf_conntrack_zones.h>
#include <net/netfilter/nf_conntrack_timestamp.h>
+#ifdef CONFIG_LWTUNNEL
+#include <net/netfilter/nf_hooks_lwtunnel.h>
+#endif
#include <linux/rculist_nulls.h>
static bool enable_hooks __read_mostly;
@@ -429,7 +432,7 @@ static int ct_cpu_seq_show(struct seq_file *seq, void *v)
unsigned int nr_conntracks;
if (v == SEQ_START_TOKEN) {
- seq_puts(seq, "entries clashres found new invalid ignore delete delete_list insert insert_failed drop early_drop icmp_error expect_new expect_create expect_delete search_restart\n");
+ seq_puts(seq, "entries clashres found new invalid ignore delete chainlength insert insert_failed drop early_drop icmp_error expect_new expect_create expect_delete search_restart\n");
return 0;
}
@@ -444,7 +447,7 @@ static int ct_cpu_seq_show(struct seq_file *seq, void *v)
st->invalid,
0,
0,
- 0,
+ st->chaintoolong,
st->insert,
st->insert_failed,
st->drop,
@@ -512,9 +515,7 @@ static void nf_conntrack_standalone_fini_proc(struct net *net)
u32 nf_conntrack_count(const struct net *net)
{
- const struct nf_conntrack_net *cnet;
-
- cnet = net_generic(net, nf_conntrack_net_id);
+ const struct nf_conntrack_net *cnet = nf_ct_pernet(net);
return atomic_read(&cnet->count);
}
@@ -575,11 +576,18 @@ enum nf_ct_sysctl_index {
NF_SYSCTL_CT_PROTO_TIMEOUT_TCP_CLOSE,
NF_SYSCTL_CT_PROTO_TIMEOUT_TCP_RETRANS,
NF_SYSCTL_CT_PROTO_TIMEOUT_TCP_UNACK,
+#if IS_ENABLED(CONFIG_NF_FLOW_TABLE)
+ NF_SYSCTL_CT_PROTO_TIMEOUT_TCP_OFFLOAD,
+#endif
NF_SYSCTL_CT_PROTO_TCP_LOOSE,
NF_SYSCTL_CT_PROTO_TCP_LIBERAL,
+ NF_SYSCTL_CT_PROTO_TCP_IGNORE_INVALID_RST,
NF_SYSCTL_CT_PROTO_TCP_MAX_RETRANS,
NF_SYSCTL_CT_PROTO_TIMEOUT_UDP,
NF_SYSCTL_CT_PROTO_TIMEOUT_UDP_STREAM,
+#if IS_ENABLED(CONFIG_NF_FLOW_TABLE)
+ NF_SYSCTL_CT_PROTO_TIMEOUT_UDP_OFFLOAD,
+#endif
NF_SYSCTL_CT_PROTO_TIMEOUT_ICMP,
NF_SYSCTL_CT_PROTO_TIMEOUT_ICMPV6,
#ifdef CONFIG_NF_CT_PROTO_SCTP
@@ -607,6 +615,9 @@ enum nf_ct_sysctl_index {
NF_SYSCTL_CT_PROTO_TIMEOUT_GRE,
NF_SYSCTL_CT_PROTO_TIMEOUT_GRE_STREAM,
#endif
+#ifdef CONFIG_LWTUNNEL
+ NF_SYSCTL_CT_LWTUNNEL,
+#endif
__NF_SYSCTL_CT_LAST_SYSCTL,
};
@@ -762,6 +773,14 @@ static struct ctl_table nf_ct_sysctl_table[] = {
.mode = 0644,
.proc_handler = proc_dointvec_jiffies,
},
+#if IS_ENABLED(CONFIG_NF_FLOW_TABLE)
+ [NF_SYSCTL_CT_PROTO_TIMEOUT_TCP_OFFLOAD] = {
+ .procname = "nf_flowtable_tcp_timeout",
+ .maxlen = sizeof(unsigned int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec_jiffies,
+ },
+#endif
[NF_SYSCTL_CT_PROTO_TCP_LOOSE] = {
.procname = "nf_conntrack_tcp_loose",
.maxlen = sizeof(u8),
@@ -778,6 +797,14 @@ static struct ctl_table nf_ct_sysctl_table[] = {
.extra1 = SYSCTL_ZERO,
.extra2 = SYSCTL_ONE,
},
+ [NF_SYSCTL_CT_PROTO_TCP_IGNORE_INVALID_RST] = {
+ .procname = "nf_conntrack_tcp_ignore_invalid_rst",
+ .maxlen = sizeof(u8),
+ .mode = 0644,
+ .proc_handler = proc_dou8vec_minmax,
+ .extra1 = SYSCTL_ZERO,
+ .extra2 = SYSCTL_ONE,
+ },
[NF_SYSCTL_CT_PROTO_TCP_MAX_RETRANS] = {
.procname = "nf_conntrack_tcp_max_retrans",
.maxlen = sizeof(u8),
@@ -796,6 +823,14 @@ static struct ctl_table nf_ct_sysctl_table[] = {
.mode = 0644,
.proc_handler = proc_dointvec_jiffies,
},
+#if IS_ENABLED(CONFIG_NFT_FLOW_OFFLOAD)
+ [NF_SYSCTL_CT_PROTO_TIMEOUT_UDP_OFFLOAD] = {
+ .procname = "nf_flowtable_udp_timeout",
+ .maxlen = sizeof(unsigned int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec_jiffies,
+ },
+#endif
[NF_SYSCTL_CT_PROTO_TIMEOUT_ICMP] = {
.procname = "nf_conntrack_icmp_timeout",
.maxlen = sizeof(unsigned int),
@@ -930,6 +965,15 @@ static struct ctl_table nf_ct_sysctl_table[] = {
.proc_handler = proc_dointvec_jiffies,
},
#endif
+#ifdef CONFIG_LWTUNNEL
+ [NF_SYSCTL_CT_LWTUNNEL] = {
+ .procname = "nf_hooks_lwtunnel",
+ .data = NULL,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = nf_hooks_lwtunnel_sysctl_handler,
+ },
+#endif
{}
};
@@ -970,7 +1014,13 @@ static void nf_conntrack_standalone_init_tcp_sysctl(struct net *net,
XASSIGN(LOOSE, &tn->tcp_loose);
XASSIGN(LIBERAL, &tn->tcp_be_liberal);
XASSIGN(MAX_RETRANS, &tn->tcp_max_retrans);
+ XASSIGN(IGNORE_INVALID_RST, &tn->tcp_ignore_invalid_rst);
#undef XASSIGN
+
+#if IS_ENABLED(CONFIG_NF_FLOW_TABLE)
+ table[NF_SYSCTL_CT_PROTO_TIMEOUT_TCP_OFFLOAD].data = &tn->offload_timeout;
+#endif
+
}
static void nf_conntrack_standalone_init_sctp_sysctl(struct net *net,
@@ -1032,7 +1082,7 @@ static void nf_conntrack_standalone_init_gre_sysctl(struct net *net,
static int nf_conntrack_standalone_init_sysctl(struct net *net)
{
- struct nf_conntrack_net *cnet = net_generic(net, nf_conntrack_net_id);
+ struct nf_conntrack_net *cnet = nf_ct_pernet(net);
struct nf_udp_net *un = nf_udp_pernet(net);
struct ctl_table *table;
@@ -1059,6 +1109,9 @@ static int nf_conntrack_standalone_init_sysctl(struct net *net)
table[NF_SYSCTL_CT_PROTO_TIMEOUT_ICMPV6].data = &nf_icmpv6_pernet(net)->timeout;
table[NF_SYSCTL_CT_PROTO_TIMEOUT_UDP].data = &un->timeouts[UDP_CT_UNREPLIED];
table[NF_SYSCTL_CT_PROTO_TIMEOUT_UDP_STREAM].data = &un->timeouts[UDP_CT_REPLIED];
+#if IS_ENABLED(CONFIG_NF_FLOW_TABLE)
+ table[NF_SYSCTL_CT_PROTO_TIMEOUT_UDP_OFFLOAD].data = &un->offload_timeout;
+#endif
nf_conntrack_standalone_init_tcp_sysctl(net, table);
nf_conntrack_standalone_init_sctp_sysctl(net, table);
@@ -1085,7 +1138,7 @@ out_unregister_netfilter:
static void nf_conntrack_standalone_fini_sysctl(struct net *net)
{
- struct nf_conntrack_net *cnet = net_generic(net, nf_conntrack_net_id);
+ struct nf_conntrack_net *cnet = nf_ct_pernet(net);
struct ctl_table *table;
table = cnet->sysctl_header->ctl_table_arg;
diff --git a/net/netfilter/nf_flow_table_core.c b/net/netfilter/nf_flow_table_core.c
index 1d02650dd715..87a7388b6c89 100644
--- a/net/netfilter/nf_flow_table_core.c
+++ b/net/netfilter/nf_flow_table_core.c
@@ -99,7 +99,7 @@ static int flow_offload_fill_route(struct flow_offload *flow,
flow_tuple->mtu = ip_dst_mtu_maybe_forward(dst, true);
break;
case NFPROTO_IPV6:
- flow_tuple->mtu = ip6_dst_mtu_forward(dst);
+ flow_tuple->mtu = ip6_dst_mtu_maybe_forward(dst, true);
break;
}
@@ -178,25 +178,28 @@ static void flow_offload_fixup_tcp(struct ip_ct_tcp *tcp)
tcp->seen[1].td_maxwin = 0;
}
-#define NF_FLOWTABLE_TCP_PICKUP_TIMEOUT (120 * HZ)
-#define NF_FLOWTABLE_UDP_PICKUP_TIMEOUT (30 * HZ)
-
static void flow_offload_fixup_ct_timeout(struct nf_conn *ct)
{
- const struct nf_conntrack_l4proto *l4proto;
+ struct net *net = nf_ct_net(ct);
int l4num = nf_ct_protonum(ct);
- unsigned int timeout;
+ s32 timeout;
- l4proto = nf_ct_l4proto_find(l4num);
- if (!l4proto)
- return;
+ if (l4num == IPPROTO_TCP) {
+ struct nf_tcp_net *tn = nf_tcp_pernet(net);
- if (l4num == IPPROTO_TCP)
- timeout = NF_FLOWTABLE_TCP_PICKUP_TIMEOUT;
- else if (l4num == IPPROTO_UDP)
- timeout = NF_FLOWTABLE_UDP_PICKUP_TIMEOUT;
- else
+ timeout = tn->timeouts[TCP_CONNTRACK_ESTABLISHED];
+ timeout -= tn->offload_timeout;
+ } else if (l4num == IPPROTO_UDP) {
+ struct nf_udp_net *tn = nf_udp_pernet(net);
+
+ timeout = tn->timeouts[UDP_CT_REPLIED];
+ timeout -= tn->offload_timeout;
+ } else {
return;
+ }
+
+ if (timeout < 0)
+ timeout = 0;
if (nf_flow_timeout_delta(ct->timeout) > (__s32)timeout)
ct->timeout = nfct_time_stamp + timeout;
@@ -268,11 +271,30 @@ static const struct rhashtable_params nf_flow_offload_rhash_params = {
.automatic_shrinking = true,
};
+unsigned long flow_offload_get_timeout(struct flow_offload *flow)
+{
+ unsigned long timeout = NF_FLOW_TIMEOUT;
+ struct net *net = nf_ct_net(flow->ct);
+ int l4num = nf_ct_protonum(flow->ct);
+
+ if (l4num == IPPROTO_TCP) {
+ struct nf_tcp_net *tn = nf_tcp_pernet(net);
+
+ timeout = tn->offload_timeout;
+ } else if (l4num == IPPROTO_UDP) {
+ struct nf_udp_net *tn = nf_udp_pernet(net);
+
+ timeout = tn->offload_timeout;
+ }
+
+ return timeout;
+}
+
int flow_offload_add(struct nf_flowtable *flow_table, struct flow_offload *flow)
{
int err;
- flow->timeout = nf_flowtable_time_stamp + NF_FLOW_TIMEOUT;
+ flow->timeout = nf_flowtable_time_stamp + flow_offload_get_timeout(flow);
err = rhashtable_insert_fast(&flow_table->rhashtable,
&flow->tuplehash[0].node,
@@ -304,7 +326,11 @@ EXPORT_SYMBOL_GPL(flow_offload_add);
void flow_offload_refresh(struct nf_flowtable *flow_table,
struct flow_offload *flow)
{
- flow->timeout = nf_flowtable_time_stamp + NF_FLOW_TIMEOUT;
+ u32 timeout;
+
+ timeout = nf_flowtable_time_stamp + flow_offload_get_timeout(flow);
+ if (READ_ONCE(flow->timeout) != timeout)
+ WRITE_ONCE(flow->timeout, timeout);
if (likely(!nf_flowtable_hw_offload(flow_table)))
return;
diff --git a/net/netfilter/nf_flow_table_offload.c b/net/netfilter/nf_flow_table_offload.c
index 528b2f172684..d6bf1b2cd541 100644
--- a/net/netfilter/nf_flow_table_offload.c
+++ b/net/netfilter/nf_flow_table_offload.c
@@ -251,8 +251,7 @@ static int flow_offload_eth_src(struct net *net,
flow_offload_mangle(entry1, FLOW_ACT_MANGLE_HDR_TYPE_ETH, 8,
&val, &mask);
- if (dev)
- dev_put(dev);
+ dev_put(dev);
return 0;
}
@@ -937,7 +936,7 @@ static void flow_offload_work_stats(struct flow_offload_work *offload)
lastused = max_t(u64, stats[0].lastused, stats[1].lastused);
offload->flow->timeout = max_t(u64, offload->flow->timeout,
- lastused + NF_FLOW_TIMEOUT);
+ lastused + flow_offload_get_timeout(offload->flow));
if (offload->flowtable->flags & NF_FLOWTABLE_COUNTER) {
if (stats[0].pkts)
@@ -1041,7 +1040,7 @@ void nf_flow_offload_stats(struct nf_flowtable *flowtable,
__s32 delta;
delta = nf_flow_timeout_delta(flow->timeout);
- if ((delta >= (9 * NF_FLOW_TIMEOUT) / 10))
+ if ((delta >= (9 * flow_offload_get_timeout(flow)) / 10))
return;
offload = nf_flow_offload_work_alloc(flowtable, flow, FLOW_CLS_STATS);
@@ -1097,6 +1096,7 @@ static void nf_flow_table_block_offload_init(struct flow_block_offload *bo,
bo->command = cmd;
bo->binder_type = FLOW_BLOCK_BINDER_TYPE_CLSACT_INGRESS;
bo->extack = extack;
+ bo->cb_list_head = &flowtable->flow_block.cb_list;
INIT_LIST_HEAD(&bo->cb_list);
}
diff --git a/net/netfilter/nf_hooks_lwtunnel.c b/net/netfilter/nf_hooks_lwtunnel.c
new file mode 100644
index 000000000000..00e89ffd78f6
--- /dev/null
+++ b/net/netfilter/nf_hooks_lwtunnel.c
@@ -0,0 +1,53 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <linux/sysctl.h>
+#include <net/lwtunnel.h>
+#include <net/netfilter/nf_hooks_lwtunnel.h>
+
+static inline int nf_hooks_lwtunnel_get(void)
+{
+ if (static_branch_unlikely(&nf_hooks_lwtunnel_enabled))
+ return 1;
+ else
+ return 0;
+}
+
+static inline int nf_hooks_lwtunnel_set(int enable)
+{
+ if (static_branch_unlikely(&nf_hooks_lwtunnel_enabled)) {
+ if (!enable)
+ return -EBUSY;
+ } else if (enable) {
+ static_branch_enable(&nf_hooks_lwtunnel_enabled);
+ }
+
+ return 0;
+}
+
+#ifdef CONFIG_SYSCTL
+int nf_hooks_lwtunnel_sysctl_handler(struct ctl_table *table, int write,
+ void *buffer, size_t *lenp, loff_t *ppos)
+{
+ int proc_nf_hooks_lwtunnel_enabled = 0;
+ struct ctl_table tmp = {
+ .procname = table->procname,
+ .data = &proc_nf_hooks_lwtunnel_enabled,
+ .maxlen = sizeof(int),
+ .mode = table->mode,
+ .extra1 = SYSCTL_ZERO,
+ .extra2 = SYSCTL_ONE,
+ };
+ int ret;
+
+ if (!write)
+ proc_nf_hooks_lwtunnel_enabled = nf_hooks_lwtunnel_get();
+
+ ret = proc_dointvec_minmax(&tmp, write, buffer, lenp, ppos);
+
+ if (write && ret == 0)
+ ret = nf_hooks_lwtunnel_set(proc_nf_hooks_lwtunnel_enabled);
+
+ return ret;
+}
+EXPORT_SYMBOL_GPL(nf_hooks_lwtunnel_sysctl_handler);
+#endif /* CONFIG_SYSCTL */
diff --git a/net/netfilter/nf_nat_core.c b/net/netfilter/nf_nat_core.c
index 7de595ead06a..7008961f5cb0 100644
--- a/net/netfilter/nf_nat_core.c
+++ b/net/netfilter/nf_nat_core.c
@@ -13,7 +13,7 @@
#include <linux/skbuff.h>
#include <linux/gfp.h>
#include <net/xfrm.h>
-#include <linux/jhash.h>
+#include <linux/siphash.h>
#include <linux/rtnetlink.h>
#include <net/netfilter/nf_conntrack.h>
@@ -34,7 +34,7 @@ static unsigned int nat_net_id __read_mostly;
static struct hlist_head *nf_nat_bysource __read_mostly;
static unsigned int nf_nat_htable_size __read_mostly;
-static unsigned int nf_nat_hash_rnd __read_mostly;
+static siphash_key_t nf_nat_hash_rnd __read_mostly;
struct nf_nat_lookup_hook_priv {
struct nf_hook_entries __rcu *entries;
@@ -153,12 +153,22 @@ static unsigned int
hash_by_src(const struct net *n, const struct nf_conntrack_tuple *tuple)
{
unsigned int hash;
+ struct {
+ struct nf_conntrack_man src;
+ u32 net_mix;
+ u32 protonum;
+ } __aligned(SIPHASH_ALIGNMENT) combined;
get_random_once(&nf_nat_hash_rnd, sizeof(nf_nat_hash_rnd));
+ memset(&combined, 0, sizeof(combined));
+
/* Original src, to ensure we map it consistently if poss. */
- hash = jhash2((u32 *)&tuple->src, sizeof(tuple->src) / sizeof(u32),
- tuple->dst.protonum ^ nf_nat_hash_rnd ^ net_hash_mix(n));
+ combined.src = tuple->src;
+ combined.net_mix = net_hash_mix(n);
+ combined.protonum = tuple->dst.protonum;
+
+ hash = siphash(&combined, sizeof(combined), &nf_nat_hash_rnd);
return reciprocal_scale(hash, nf_nat_htable_size);
}
diff --git a/net/netfilter/nf_queue.c b/net/netfilter/nf_queue.c
index bbd1209694b8..6d12afabfe8a 100644
--- a/net/netfilter/nf_queue.c
+++ b/net/netfilter/nf_queue.c
@@ -21,6 +21,8 @@
#include "nf_internals.h"
+static const struct nf_queue_handler __rcu *nf_queue_handler;
+
/*
* Hook for nfnetlink_queue to register its queue handler.
* We do this so that most of the NFQUEUE code can be modular.
@@ -29,20 +31,18 @@
* receives, no matter what.
*/
-/* return EBUSY when somebody else is registered, return EEXIST if the
- * same handler is registered, return 0 in case of success. */
-void nf_register_queue_handler(struct net *net, const struct nf_queue_handler *qh)
+void nf_register_queue_handler(const struct nf_queue_handler *qh)
{
/* should never happen, we only have one queueing backend in kernel */
- WARN_ON(rcu_access_pointer(net->nf.queue_handler));
- rcu_assign_pointer(net->nf.queue_handler, qh);
+ WARN_ON(rcu_access_pointer(nf_queue_handler));
+ rcu_assign_pointer(nf_queue_handler, qh);
}
EXPORT_SYMBOL(nf_register_queue_handler);
/* The caller must flush their queue before this */
-void nf_unregister_queue_handler(struct net *net)
+void nf_unregister_queue_handler(void)
{
- RCU_INIT_POINTER(net->nf.queue_handler, NULL);
+ RCU_INIT_POINTER(nf_queue_handler, NULL);
}
EXPORT_SYMBOL(nf_unregister_queue_handler);
@@ -51,18 +51,14 @@ static void nf_queue_entry_release_refs(struct nf_queue_entry *entry)
struct nf_hook_state *state = &entry->state;
/* Release those devices we held, or Alexey will kill me. */
- if (state->in)
- dev_put(state->in);
- if (state->out)
- dev_put(state->out);
+ dev_put(state->in);
+ dev_put(state->out);
if (state->sk)
sock_put(state->sk);
#if IS_ENABLED(CONFIG_BRIDGE_NETFILTER)
- if (entry->physin)
- dev_put(entry->physin);
- if (entry->physout)
- dev_put(entry->physout);
+ dev_put(entry->physin);
+ dev_put(entry->physout);
#endif
}
@@ -95,18 +91,14 @@ void nf_queue_entry_get_refs(struct nf_queue_entry *entry)
{
struct nf_hook_state *state = &entry->state;
- if (state->in)
- dev_hold(state->in);
- if (state->out)
- dev_hold(state->out);
+ dev_hold(state->in);
+ dev_hold(state->out);
if (state->sk)
sock_hold(state->sk);
#if IS_ENABLED(CONFIG_BRIDGE_NETFILTER)
- if (entry->physin)
- dev_hold(entry->physin);
- if (entry->physout)
- dev_hold(entry->physout);
+ dev_hold(entry->physin);
+ dev_hold(entry->physout);
#endif
}
EXPORT_SYMBOL_GPL(nf_queue_entry_get_refs);
@@ -116,7 +108,7 @@ void nf_queue_nf_hook_drop(struct net *net)
const struct nf_queue_handler *qh;
rcu_read_lock();
- qh = rcu_dereference(net->nf.queue_handler);
+ qh = rcu_dereference(nf_queue_handler);
if (qh)
qh->nf_hook_drop(net);
rcu_read_unlock();
@@ -157,12 +149,11 @@ static int __nf_queue(struct sk_buff *skb, const struct nf_hook_state *state,
{
struct nf_queue_entry *entry = NULL;
const struct nf_queue_handler *qh;
- struct net *net = state->net;
unsigned int route_key_size;
int status;
/* QUEUE == DROP if no one is waiting, to be safe. */
- qh = rcu_dereference(net->nf.queue_handler);
+ qh = rcu_dereference(nf_queue_handler);
if (!qh)
return -ESRCH;
diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index bf4d6ec9fc55..081437dd75b7 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -571,7 +571,7 @@ static struct nft_table *nft_table_lookup(const struct net *net,
table->family == family &&
nft_active_genmask(table, genmask)) {
if (nft_table_has_owner(table) &&
- table->nlpid != nlpid)
+ nlpid && table->nlpid != nlpid)
return ERR_PTR(-EPERM);
return table;
@@ -583,7 +583,7 @@ static struct nft_table *nft_table_lookup(const struct net *net,
static struct nft_table *nft_table_lookup_byhandle(const struct net *net,
const struct nlattr *nla,
- u8 genmask)
+ u8 genmask, u32 nlpid)
{
struct nftables_pernet *nft_net;
struct nft_table *table;
@@ -591,8 +591,13 @@ static struct nft_table *nft_table_lookup_byhandle(const struct net *net,
nft_net = nft_pernet(net);
list_for_each_entry(table, &nft_net->tables, list) {
if (be64_to_cpu(nla_get_be64(nla)) == table->handle &&
- nft_active_genmask(table, genmask))
+ nft_active_genmask(table, genmask)) {
+ if (nft_table_has_owner(table) &&
+ nlpid && table->nlpid != nlpid)
+ return ERR_PTR(-EPERM);
+
return table;
+ }
}
return ERR_PTR(-ENOENT);
@@ -862,10 +867,9 @@ static int nft_netlink_dump_start_rcu(struct sock *nlsk, struct sk_buff *skb,
static int nf_tables_gettable(struct sk_buff *skb, const struct nfnl_info *info,
const struct nlattr * const nla[])
{
- const struct nfgenmsg *nfmsg = nlmsg_data(info->nlh);
struct netlink_ext_ack *extack = info->extack;
u8 genmask = nft_genmask_cur(info->net);
- int family = nfmsg->nfgen_family;
+ u8 family = info->nfmsg->nfgen_family;
const struct nft_table *table;
struct net *net = info->net;
struct sk_buff *skb2;
@@ -1068,10 +1072,9 @@ static int nf_tables_newtable(struct sk_buff *skb, const struct nfnl_info *info,
const struct nlattr * const nla[])
{
struct nftables_pernet *nft_net = nft_pernet(info->net);
- const struct nfgenmsg *nfmsg = nlmsg_data(info->nlh);
struct netlink_ext_ack *extack = info->extack;
u8 genmask = nft_genmask_next(info->net);
- int family = nfmsg->nfgen_family;
+ u8 family = info->nfmsg->nfgen_family;
struct net *net = info->net;
const struct nlattr *attr;
struct nft_table *table;
@@ -1263,10 +1266,9 @@ out:
static int nf_tables_deltable(struct sk_buff *skb, const struct nfnl_info *info,
const struct nlattr * const nla[])
{
- const struct nfgenmsg *nfmsg = nlmsg_data(info->nlh);
struct netlink_ext_ack *extack = info->extack;
u8 genmask = nft_genmask_next(info->net);
- int family = nfmsg->nfgen_family;
+ u8 family = info->nfmsg->nfgen_family;
struct net *net = info->net;
const struct nlattr *attr;
struct nft_table *table;
@@ -1279,7 +1281,8 @@ static int nf_tables_deltable(struct sk_buff *skb, const struct nfnl_info *info,
if (nla[NFTA_TABLE_HANDLE]) {
attr = nla[NFTA_TABLE_HANDLE];
- table = nft_table_lookup_byhandle(net, attr, genmask);
+ table = nft_table_lookup_byhandle(net, attr, genmask,
+ NETLINK_CB(skb).portid);
} else {
attr = nla[NFTA_TABLE_NAME];
table = nft_table_lookup(net, attr, family, genmask,
@@ -1636,10 +1639,9 @@ done:
static int nf_tables_getchain(struct sk_buff *skb, const struct nfnl_info *info,
const struct nlattr * const nla[])
{
- const struct nfgenmsg *nfmsg = nlmsg_data(info->nlh);
struct netlink_ext_ack *extack = info->extack;
u8 genmask = nft_genmask_cur(info->net);
- int family = nfmsg->nfgen_family;
+ u8 family = info->nfmsg->nfgen_family;
const struct nft_chain *chain;
struct net *net = info->net;
struct nft_table *table;
@@ -2015,11 +2017,12 @@ static void nft_basechain_hook_init(struct nf_hook_ops *ops, u8 family,
const struct nft_chain_hook *hook,
struct nft_chain *chain)
{
- ops->pf = family;
- ops->hooknum = hook->num;
- ops->priority = hook->priority;
- ops->priv = chain;
- ops->hook = hook->type->hooks[ops->hooknum];
+ ops->pf = family;
+ ops->hooknum = hook->num;
+ ops->priority = hook->priority;
+ ops->priv = chain;
+ ops->hook = hook->type->hooks[ops->hooknum];
+ ops->hook_ops_type = NF_HOOK_OP_NF_TABLES;
}
static int nft_basechain_init(struct nft_base_chain *basechain, u8 family,
@@ -2371,10 +2374,9 @@ static int nf_tables_newchain(struct sk_buff *skb, const struct nfnl_info *info,
const struct nlattr * const nla[])
{
struct nftables_pernet *nft_net = nft_pernet(info->net);
- const struct nfgenmsg *nfmsg = nlmsg_data(info->nlh);
struct netlink_ext_ack *extack = info->extack;
u8 genmask = nft_genmask_next(info->net);
- int family = nfmsg->nfgen_family;
+ u8 family = info->nfmsg->nfgen_family;
struct nft_chain *chain = NULL;
struct net *net = info->net;
const struct nlattr *attr;
@@ -2469,10 +2471,9 @@ static int nf_tables_newchain(struct sk_buff *skb, const struct nfnl_info *info,
static int nf_tables_delchain(struct sk_buff *skb, const struct nfnl_info *info,
const struct nlattr * const nla[])
{
- const struct nfgenmsg *nfmsg = nlmsg_data(info->nlh);
struct netlink_ext_ack *extack = info->extack;
u8 genmask = nft_genmask_next(info->net);
- int family = nfmsg->nfgen_family;
+ u8 family = info->nfmsg->nfgen_family;
struct net *net = info->net;
const struct nlattr *attr;
struct nft_table *table;
@@ -3096,10 +3097,9 @@ static int nf_tables_dump_rules_done(struct netlink_callback *cb)
static int nf_tables_getrule(struct sk_buff *skb, const struct nfnl_info *info,
const struct nlattr * const nla[])
{
- const struct nfgenmsg *nfmsg = nlmsg_data(info->nlh);
struct netlink_ext_ack *extack = info->extack;
u8 genmask = nft_genmask_cur(info->net);
- int family = nfmsg->nfgen_family;
+ u8 family = info->nfmsg->nfgen_family;
const struct nft_chain *chain;
const struct nft_rule *rule;
struct net *net = info->net;
@@ -3237,15 +3237,14 @@ static int nf_tables_newrule(struct sk_buff *skb, const struct nfnl_info *info,
const struct nlattr * const nla[])
{
struct nftables_pernet *nft_net = nft_pernet(info->net);
- const struct nfgenmsg *nfmsg = nlmsg_data(info->nlh);
struct netlink_ext_ack *extack = info->extack;
unsigned int size, i, n, ulen = 0, usize = 0;
u8 genmask = nft_genmask_next(info->net);
struct nft_rule *rule, *old_rule = NULL;
struct nft_expr_info *expr_info = NULL;
- int family = nfmsg->nfgen_family;
+ u8 family = info->nfmsg->nfgen_family;
+ struct nft_flow_rule *flow = NULL;
struct net *net = info->net;
- struct nft_flow_rule *flow;
struct nft_userdata *udata;
struct nft_table *table;
struct nft_chain *chain;
@@ -3340,13 +3339,13 @@ static int nf_tables_newrule(struct sk_buff *skb, const struct nfnl_info *info,
nla_for_each_nested(tmp, nla[NFTA_RULE_EXPRESSIONS], rem) {
err = -EINVAL;
if (nla_type(tmp) != NFTA_LIST_ELEM)
- goto err1;
+ goto err_release_expr;
if (n == NFT_RULE_MAXEXPRS)
- goto err1;
+ goto err_release_expr;
err = nf_tables_expr_parse(&ctx, tmp, &expr_info[n]);
if (err < 0) {
NL_SET_BAD_ATTR(extack, tmp);
- goto err1;
+ goto err_release_expr;
}
size += expr_info[n].ops->size;
n++;
@@ -3355,7 +3354,7 @@ static int nf_tables_newrule(struct sk_buff *skb, const struct nfnl_info *info,
/* Check for overflow of dlen field */
err = -EFBIG;
if (size >= 1 << 12)
- goto err1;
+ goto err_release_expr;
if (nla[NFTA_RULE_USERDATA]) {
ulen = nla_len(nla[NFTA_RULE_USERDATA]);
@@ -3366,7 +3365,7 @@ static int nf_tables_newrule(struct sk_buff *skb, const struct nfnl_info *info,
err = -ENOMEM;
rule = kzalloc(sizeof(*rule) + size + usize, GFP_KERNEL);
if (rule == NULL)
- goto err1;
+ goto err_release_expr;
nft_activate_next(net, rule);
@@ -3385,7 +3384,7 @@ static int nf_tables_newrule(struct sk_buff *skb, const struct nfnl_info *info,
err = nf_tables_newexpr(&ctx, &expr_info[i], expr);
if (err < 0) {
NL_SET_BAD_ATTR(extack, expr_info[i].attr);
- goto err2;
+ goto err_release_rule;
}
if (expr_info[i].ops->validate)
@@ -3395,16 +3394,24 @@ static int nf_tables_newrule(struct sk_buff *skb, const struct nfnl_info *info,
expr = nft_expr_next(expr);
}
+ if (chain->flags & NFT_CHAIN_HW_OFFLOAD) {
+ flow = nft_flow_rule_create(net, rule);
+ if (IS_ERR(flow)) {
+ err = PTR_ERR(flow);
+ goto err_release_rule;
+ }
+ }
+
if (info->nlh->nlmsg_flags & NLM_F_REPLACE) {
trans = nft_trans_rule_add(&ctx, NFT_MSG_NEWRULE, rule);
if (trans == NULL) {
err = -ENOMEM;
- goto err2;
+ goto err_destroy_flow_rule;
}
err = nft_delrule(&ctx, old_rule);
if (err < 0) {
nft_trans_destroy(trans);
- goto err2;
+ goto err_destroy_flow_rule;
}
list_add_tail_rcu(&rule->list, &old_rule->list);
@@ -3412,7 +3419,7 @@ static int nf_tables_newrule(struct sk_buff *skb, const struct nfnl_info *info,
trans = nft_trans_rule_add(&ctx, NFT_MSG_NEWRULE, rule);
if (!trans) {
err = -ENOMEM;
- goto err2;
+ goto err_destroy_flow_rule;
}
if (info->nlh->nlmsg_flags & NLM_F_APPEND) {
@@ -3430,21 +3437,20 @@ static int nf_tables_newrule(struct sk_buff *skb, const struct nfnl_info *info,
kvfree(expr_info);
chain->use++;
+ if (flow)
+ nft_trans_flow_rule(trans) = flow;
+
if (nft_net->validate_state == NFT_VALIDATE_DO)
return nft_table_validate(net, table);
- if (chain->flags & NFT_CHAIN_HW_OFFLOAD) {
- flow = nft_flow_rule_create(net, rule);
- if (IS_ERR(flow))
- return PTR_ERR(flow);
-
- nft_trans_flow_rule(trans) = flow;
- }
-
return 0;
-err2:
+
+err_destroy_flow_rule:
+ if (flow)
+ nft_flow_rule_destroy(flow);
+err_release_rule:
nf_tables_rule_release(&ctx, rule);
-err1:
+err_release_expr:
for (i = 0; i < n; i++) {
if (expr_info[i].ops) {
module_put(expr_info[i].ops->type->owner);
@@ -3477,15 +3483,15 @@ static struct nft_rule *nft_rule_lookup_byid(const struct net *net,
static int nf_tables_delrule(struct sk_buff *skb, const struct nfnl_info *info,
const struct nlattr * const nla[])
{
- const struct nfgenmsg *nfmsg = nlmsg_data(info->nlh);
struct netlink_ext_ack *extack = info->extack;
- int family = nfmsg->nfgen_family, err = 0;
u8 genmask = nft_genmask_next(info->net);
+ u8 family = info->nfmsg->nfgen_family;
struct nft_chain *chain = NULL;
struct net *net = info->net;
struct nft_table *table;
struct nft_rule *rule;
struct nft_ctx ctx;
+ int err = 0;
table = nft_table_lookup(net, nla[NFTA_RULE_TABLE], family, genmask,
NETLINK_CB(skb).portid);
@@ -3665,30 +3671,6 @@ static const struct nla_policy nft_set_desc_policy[NFTA_SET_DESC_MAX + 1] = {
[NFTA_SET_DESC_CONCAT] = { .type = NLA_NESTED },
};
-static int nft_ctx_init_from_setattr(struct nft_ctx *ctx, struct net *net,
- const struct sk_buff *skb,
- const struct nlmsghdr *nlh,
- const struct nlattr * const nla[],
- struct netlink_ext_ack *extack,
- u8 genmask, u32 nlpid)
-{
- const struct nfgenmsg *nfmsg = nlmsg_data(nlh);
- int family = nfmsg->nfgen_family;
- struct nft_table *table = NULL;
-
- if (nla[NFTA_SET_TABLE] != NULL) {
- table = nft_table_lookup(net, nla[NFTA_SET_TABLE], family,
- genmask, nlpid);
- if (IS_ERR(table)) {
- NL_SET_BAD_ATTR(extack, nla[NFTA_SET_TABLE]);
- return PTR_ERR(table);
- }
- }
-
- nft_ctx_init(ctx, net, skb, nlh, family, table, NULL, nla);
- return 0;
-}
-
static struct nft_set *nft_set_lookup(const struct nft_table *table,
const struct nlattr *nla, u8 genmask)
{
@@ -4068,20 +4050,26 @@ static int nf_tables_dump_sets_done(struct netlink_callback *cb)
static int nf_tables_getset(struct sk_buff *skb, const struct nfnl_info *info,
const struct nlattr * const nla[])
{
- const struct nfgenmsg *nfmsg = nlmsg_data(info->nlh);
struct netlink_ext_ack *extack = info->extack;
u8 genmask = nft_genmask_cur(info->net);
+ u8 family = info->nfmsg->nfgen_family;
+ struct nft_table *table = NULL;
struct net *net = info->net;
const struct nft_set *set;
struct sk_buff *skb2;
struct nft_ctx ctx;
int err;
- /* Verify existence before starting dump */
- err = nft_ctx_init_from_setattr(&ctx, net, skb, info->nlh, nla, extack,
- genmask, 0);
- if (err < 0)
- return err;
+ if (nla[NFTA_SET_TABLE]) {
+ table = nft_table_lookup(net, nla[NFTA_SET_TABLE], family,
+ genmask, 0);
+ if (IS_ERR(table)) {
+ NL_SET_BAD_ATTR(extack, nla[NFTA_SET_TABLE]);
+ return PTR_ERR(table);
+ }
+ }
+
+ nft_ctx_init(&ctx, net, skb, info->nlh, family, table, NULL, nla);
if (info->nlh->nlmsg_flags & NLM_F_DUMP) {
struct netlink_dump_control c = {
@@ -4096,12 +4084,12 @@ static int nf_tables_getset(struct sk_buff *skb, const struct nfnl_info *info,
}
/* Only accept unspec with dump */
- if (nfmsg->nfgen_family == NFPROTO_UNSPEC)
+ if (info->nfmsg->nfgen_family == NFPROTO_UNSPEC)
return -EAFNOSUPPORT;
if (!nla[NFTA_SET_TABLE])
return -EINVAL;
- set = nft_set_lookup(ctx.table, nla[NFTA_SET_NAME], genmask);
+ set = nft_set_lookup(table, nla[NFTA_SET_NAME], genmask);
if (IS_ERR(set))
return PTR_ERR(set);
@@ -4189,11 +4177,10 @@ static int nf_tables_set_desc_parse(struct nft_set_desc *desc,
static int nf_tables_newset(struct sk_buff *skb, const struct nfnl_info *info,
const struct nlattr * const nla[])
{
- const struct nfgenmsg *nfmsg = nlmsg_data(info->nlh);
u32 ktype, dtype, flags, policy, gc_int, objtype;
struct netlink_ext_ack *extack = info->extack;
u8 genmask = nft_genmask_next(info->net);
- int family = nfmsg->nfgen_family;
+ u8 family = info->nfmsg->nfgen_family;
const struct nft_set_ops *ops;
struct nft_expr *expr = NULL;
struct net *net = info->net;
@@ -4494,31 +4481,31 @@ static void nft_set_destroy(const struct nft_ctx *ctx, struct nft_set *set)
static int nf_tables_delset(struct sk_buff *skb, const struct nfnl_info *info,
const struct nlattr * const nla[])
{
- const struct nfgenmsg *nfmsg = nlmsg_data(info->nlh);
struct netlink_ext_ack *extack = info->extack;
u8 genmask = nft_genmask_next(info->net);
+ u8 family = info->nfmsg->nfgen_family;
struct net *net = info->net;
const struct nlattr *attr;
+ struct nft_table *table;
struct nft_set *set;
struct nft_ctx ctx;
- int err;
- if (nfmsg->nfgen_family == NFPROTO_UNSPEC)
+ if (info->nfmsg->nfgen_family == NFPROTO_UNSPEC)
return -EAFNOSUPPORT;
- if (nla[NFTA_SET_TABLE] == NULL)
- return -EINVAL;
- err = nft_ctx_init_from_setattr(&ctx, net, skb, info->nlh, nla, extack,
- genmask, NETLINK_CB(skb).portid);
- if (err < 0)
- return err;
+ table = nft_table_lookup(net, nla[NFTA_SET_TABLE], family,
+ genmask, NETLINK_CB(skb).portid);
+ if (IS_ERR(table)) {
+ NL_SET_BAD_ATTR(extack, nla[NFTA_SET_TABLE]);
+ return PTR_ERR(table);
+ }
if (nla[NFTA_SET_HANDLE]) {
attr = nla[NFTA_SET_HANDLE];
- set = nft_set_lookup_byhandle(ctx.table, attr, genmask);
+ set = nft_set_lookup_byhandle(table, attr, genmask);
} else {
attr = nla[NFTA_SET_NAME];
- set = nft_set_lookup(ctx.table, attr, genmask);
+ set = nft_set_lookup(table, attr, genmask);
}
if (IS_ERR(set)) {
@@ -4532,6 +4519,8 @@ static int nf_tables_delset(struct sk_buff *skb, const struct nfnl_info *info,
return -EBUSY;
}
+ nft_ctx_init(&ctx, net, skb, info->nlh, family, table, NULL, nla);
+
return nft_delset(&ctx, set);
}
@@ -4733,28 +4722,6 @@ static const struct nla_policy nft_set_elem_list_policy[NFTA_SET_ELEM_LIST_MAX +
[NFTA_SET_ELEM_LIST_SET_ID] = { .type = NLA_U32 },
};
-static int nft_ctx_init_from_elemattr(struct nft_ctx *ctx, struct net *net,
- const struct sk_buff *skb,
- const struct nlmsghdr *nlh,
- const struct nlattr * const nla[],
- struct netlink_ext_ack *extack,
- u8 genmask, u32 nlpid)
-{
- const struct nfgenmsg *nfmsg = nlmsg_data(nlh);
- int family = nfmsg->nfgen_family;
- struct nft_table *table;
-
- table = nft_table_lookup(net, nla[NFTA_SET_ELEM_LIST_TABLE], family,
- genmask, nlpid);
- if (IS_ERR(table)) {
- NL_SET_BAD_ATTR(extack, nla[NFTA_SET_ELEM_LIST_TABLE]);
- return PTR_ERR(table);
- }
-
- nft_ctx_init(ctx, net, skb, nlh, family, table, NULL, nla);
- return 0;
-}
-
static int nft_set_elem_expr_dump(struct sk_buff *skb,
const struct nft_set *set,
const struct nft_set_ext *ext)
@@ -5212,21 +5179,27 @@ static int nf_tables_getsetelem(struct sk_buff *skb,
{
struct netlink_ext_ack *extack = info->extack;
u8 genmask = nft_genmask_cur(info->net);
+ u8 family = info->nfmsg->nfgen_family;
struct net *net = info->net;
+ struct nft_table *table;
struct nft_set *set;
struct nlattr *attr;
struct nft_ctx ctx;
int rem, err = 0;
- err = nft_ctx_init_from_elemattr(&ctx, net, skb, info->nlh, nla, extack,
- genmask, NETLINK_CB(skb).portid);
- if (err < 0)
- return err;
+ table = nft_table_lookup(net, nla[NFTA_SET_ELEM_LIST_TABLE], family,
+ genmask, NETLINK_CB(skb).portid);
+ if (IS_ERR(table)) {
+ NL_SET_BAD_ATTR(extack, nla[NFTA_SET_ELEM_LIST_TABLE]);
+ return PTR_ERR(table);
+ }
- set = nft_set_lookup(ctx.table, nla[NFTA_SET_ELEM_LIST_SET], genmask);
+ set = nft_set_lookup(table, nla[NFTA_SET_ELEM_LIST_SET], genmask);
if (IS_ERR(set))
return PTR_ERR(set);
+ nft_ctx_init(&ctx, net, skb, info->nlh, family, table, NULL, nla);
+
if (info->nlh->nlmsg_flags & NLM_F_DUMP) {
struct netlink_dump_control c = {
.start = nf_tables_dump_set_start,
@@ -5995,8 +5968,10 @@ static int nf_tables_newsetelem(struct sk_buff *skb,
struct nftables_pernet *nft_net = nft_pernet(info->net);
struct netlink_ext_ack *extack = info->extack;
u8 genmask = nft_genmask_next(info->net);
+ u8 family = info->nfmsg->nfgen_family;
struct net *net = info->net;
const struct nlattr *attr;
+ struct nft_table *table;
struct nft_set *set;
struct nft_ctx ctx;
int rem, err;
@@ -6004,12 +5979,14 @@ static int nf_tables_newsetelem(struct sk_buff *skb,
if (nla[NFTA_SET_ELEM_LIST_ELEMENTS] == NULL)
return -EINVAL;
- err = nft_ctx_init_from_elemattr(&ctx, net, skb, info->nlh, nla, extack,
- genmask, NETLINK_CB(skb).portid);
- if (err < 0)
- return err;
+ table = nft_table_lookup(net, nla[NFTA_SET_ELEM_LIST_TABLE], family,
+ genmask, NETLINK_CB(skb).portid);
+ if (IS_ERR(table)) {
+ NL_SET_BAD_ATTR(extack, nla[NFTA_SET_ELEM_LIST_TABLE]);
+ return PTR_ERR(table);
+ }
- set = nft_set_lookup_global(net, ctx.table, nla[NFTA_SET_ELEM_LIST_SET],
+ set = nft_set_lookup_global(net, table, nla[NFTA_SET_ELEM_LIST_SET],
nla[NFTA_SET_ELEM_LIST_SET_ID], genmask);
if (IS_ERR(set))
return PTR_ERR(set);
@@ -6017,6 +5994,8 @@ static int nf_tables_newsetelem(struct sk_buff *skb,
if (!list_empty(&set->bindings) && set->flags & NFT_SET_CONSTANT)
return -EBUSY;
+ nft_ctx_init(&ctx, net, skb, info->nlh, family, table, NULL, nla);
+
nla_for_each_nested(attr, nla[NFTA_SET_ELEM_LIST_ELEMENTS], rem) {
err = nft_add_set_elem(&ctx, set, attr, info->nlh->nlmsg_flags);
if (err < 0)
@@ -6024,7 +6003,7 @@ static int nf_tables_newsetelem(struct sk_buff *skb,
}
if (nft_net->validate_state == NFT_VALIDATE_DO)
- return nft_table_validate(net, ctx.table);
+ return nft_table_validate(net, table);
return 0;
}
@@ -6262,23 +6241,29 @@ static int nf_tables_delsetelem(struct sk_buff *skb,
{
struct netlink_ext_ack *extack = info->extack;
u8 genmask = nft_genmask_next(info->net);
+ u8 family = info->nfmsg->nfgen_family;
struct net *net = info->net;
const struct nlattr *attr;
+ struct nft_table *table;
struct nft_set *set;
struct nft_ctx ctx;
int rem, err = 0;
- err = nft_ctx_init_from_elemattr(&ctx, net, skb, info->nlh, nla, extack,
- genmask, NETLINK_CB(skb).portid);
- if (err < 0)
- return err;
+ table = nft_table_lookup(net, nla[NFTA_SET_ELEM_LIST_TABLE], family,
+ genmask, NETLINK_CB(skb).portid);
+ if (IS_ERR(table)) {
+ NL_SET_BAD_ATTR(extack, nla[NFTA_SET_ELEM_LIST_TABLE]);
+ return PTR_ERR(table);
+ }
- set = nft_set_lookup(ctx.table, nla[NFTA_SET_ELEM_LIST_SET], genmask);
+ set = nft_set_lookup(table, nla[NFTA_SET_ELEM_LIST_SET], genmask);
if (IS_ERR(set))
return PTR_ERR(set);
if (!list_empty(&set->bindings) && set->flags & NFT_SET_CONSTANT)
return -EBUSY;
+ nft_ctx_init(&ctx, net, skb, info->nlh, family, table, NULL, nla);
+
if (!nla[NFTA_SET_ELEM_LIST_ELEMENTS])
return nft_set_flush(&ctx, set, genmask);
@@ -6546,11 +6531,10 @@ err_free_trans:
static int nf_tables_newobj(struct sk_buff *skb, const struct nfnl_info *info,
const struct nlattr * const nla[])
{
- const struct nfgenmsg *nfmsg = nlmsg_data(info->nlh);
struct netlink_ext_ack *extack = info->extack;
u8 genmask = nft_genmask_next(info->net);
+ u8 family = info->nfmsg->nfgen_family;
const struct nft_object_type *type;
- int family = nfmsg->nfgen_family;
struct net *net = info->net;
struct nft_table *table;
struct nft_object *obj;
@@ -6802,10 +6786,9 @@ static int nf_tables_dump_obj_done(struct netlink_callback *cb)
static int nf_tables_getobj(struct sk_buff *skb, const struct nfnl_info *info,
const struct nlattr * const nla[])
{
- const struct nfgenmsg *nfmsg = nlmsg_data(info->nlh);
struct netlink_ext_ack *extack = info->extack;
u8 genmask = nft_genmask_cur(info->net);
- int family = nfmsg->nfgen_family;
+ u8 family = info->nfmsg->nfgen_family;
const struct nft_table *table;
struct net *net = info->net;
struct nft_object *obj;
@@ -6892,10 +6875,9 @@ static void nft_obj_destroy(const struct nft_ctx *ctx, struct nft_object *obj)
static int nf_tables_delobj(struct sk_buff *skb, const struct nfnl_info *info,
const struct nlattr * const nla[])
{
- const struct nfgenmsg *nfmsg = nlmsg_data(info->nlh);
struct netlink_ext_ack *extack = info->extack;
u8 genmask = nft_genmask_next(info->net);
- int family = nfmsg->nfgen_family;
+ u8 family = info->nfmsg->nfgen_family;
struct net *net = info->net;
const struct nlattr *attr;
struct nft_table *table;
@@ -7323,12 +7305,11 @@ static int nf_tables_newflowtable(struct sk_buff *skb,
const struct nfnl_info *info,
const struct nlattr * const nla[])
{
- const struct nfgenmsg *nfmsg = nlmsg_data(info->nlh);
struct netlink_ext_ack *extack = info->extack;
struct nft_flowtable_hook flowtable_hook;
u8 genmask = nft_genmask_next(info->net);
+ u8 family = info->nfmsg->nfgen_family;
const struct nf_flowtable_type *type;
- int family = nfmsg->nfgen_family;
struct nft_flowtable *flowtable;
struct nft_hook *hook, *next;
struct net *net = info->net;
@@ -7512,10 +7493,9 @@ static int nf_tables_delflowtable(struct sk_buff *skb,
const struct nfnl_info *info,
const struct nlattr * const nla[])
{
- const struct nfgenmsg *nfmsg = nlmsg_data(info->nlh);
struct netlink_ext_ack *extack = info->extack;
u8 genmask = nft_genmask_next(info->net);
- int family = nfmsg->nfgen_family;
+ u8 family = info->nfmsg->nfgen_family;
struct nft_flowtable *flowtable;
struct net *net = info->net;
const struct nlattr *attr;
@@ -7707,9 +7687,8 @@ static int nf_tables_getflowtable(struct sk_buff *skb,
const struct nfnl_info *info,
const struct nlattr * const nla[])
{
- const struct nfgenmsg *nfmsg = nlmsg_data(info->nlh);
u8 genmask = nft_genmask_cur(info->net);
- int family = nfmsg->nfgen_family;
+ u8 family = info->nfmsg->nfgen_family;
struct nft_flowtable *flowtable;
const struct nft_table *table;
struct net *net = info->net;
@@ -8466,6 +8445,16 @@ static int nf_tables_commit_audit_alloc(struct list_head *adl,
return 0;
}
+static void nf_tables_commit_audit_free(struct list_head *adl)
+{
+ struct nft_audit_data *adp, *adn;
+
+ list_for_each_entry_safe(adp, adn, adl, list) {
+ list_del(&adp->list);
+ kfree(adp);
+ }
+}
+
static void nf_tables_commit_audit_collect(struct list_head *adl,
struct nft_table *table, u32 op)
{
@@ -8530,6 +8519,7 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb)
ret = nf_tables_commit_audit_alloc(&adl, trans->ctx.table);
if (ret) {
nf_tables_commit_chain_prepare_cancel(net);
+ nf_tables_commit_audit_free(&adl);
return ret;
}
if (trans->msg_type == NFT_MSG_NEWRULE ||
@@ -8539,6 +8529,7 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb)
ret = nf_tables_commit_chain_prepare(net, chain);
if (ret < 0) {
nf_tables_commit_chain_prepare_cancel(net);
+ nf_tables_commit_audit_free(&adl);
return ret;
}
}
@@ -8839,11 +8830,16 @@ static int __nf_tables_abort(struct net *net, enum nfnl_abort_action action)
nft_rule_expr_deactivate(&trans->ctx,
nft_trans_rule(trans),
NFT_TRANS_ABORT);
+ if (trans->ctx.chain->flags & NFT_CHAIN_HW_OFFLOAD)
+ nft_flow_rule_destroy(nft_trans_flow_rule(trans));
break;
case NFT_MSG_DELRULE:
trans->ctx.chain->use++;
nft_clear(trans->ctx.net, nft_trans_rule(trans));
nft_rule_expr_activate(&trans->ctx, nft_trans_rule(trans));
+ if (trans->ctx.chain->flags & NFT_CHAIN_HW_OFFLOAD)
+ nft_flow_rule_destroy(nft_trans_flow_rule(trans));
+
nft_trans_destroy(trans);
break;
case NFT_MSG_NEWSET:
diff --git a/net/netfilter/nf_tables_core.c b/net/netfilter/nf_tables_core.c
index dbc2e945c98e..866cfba04d6c 100644
--- a/net/netfilter/nf_tables_core.c
+++ b/net/netfilter/nf_tables_core.c
@@ -81,7 +81,7 @@ static bool nft_payload_fast_eval(const struct nft_expr *expr,
else {
if (!pkt->tprot_set)
return false;
- ptr = skb_network_header(skb) + pkt->xt.thoff;
+ ptr = skb_network_header(skb) + nft_thoff(pkt);
}
ptr += priv->offset;
@@ -268,6 +268,7 @@ static struct nft_expr_type *nft_basic_types[] = {
&nft_meta_type,
&nft_rt_type,
&nft_exthdr_type,
+ &nft_last_type,
};
static struct nft_object_type *nft_basic_objects[] = {
diff --git a/net/netfilter/nf_tables_offload.c b/net/netfilter/nf_tables_offload.c
index a48c5fd53a80..9656c1646222 100644
--- a/net/netfilter/nf_tables_offload.c
+++ b/net/netfilter/nf_tables_offload.c
@@ -54,15 +54,10 @@ static void nft_flow_rule_transfer_vlan(struct nft_offload_ctx *ctx,
struct nft_flow_rule *flow)
{
struct nft_flow_match *match = &flow->match;
- struct nft_offload_ethertype ethertype;
-
- if (match->dissector.used_keys & BIT(FLOW_DISSECTOR_KEY_CONTROL) &&
- match->key.basic.n_proto != htons(ETH_P_8021Q) &&
- match->key.basic.n_proto != htons(ETH_P_8021AD))
- return;
-
- ethertype.value = match->key.basic.n_proto;
- ethertype.mask = match->mask.basic.n_proto;
+ struct nft_offload_ethertype ethertype = {
+ .value = match->key.basic.n_proto,
+ .mask = match->mask.basic.n_proto,
+ };
if (match->dissector.used_keys & BIT(FLOW_DISSECTOR_KEY_VLAN) &&
(match->key.vlan.vlan_tpid == htons(ETH_P_8021Q) ||
@@ -76,7 +71,9 @@ static void nft_flow_rule_transfer_vlan(struct nft_offload_ctx *ctx,
match->dissector.offset[FLOW_DISSECTOR_KEY_CVLAN] =
offsetof(struct nft_flow_key, cvlan);
match->dissector.used_keys |= BIT(FLOW_DISSECTOR_KEY_CVLAN);
- } else {
+ } else if (match->dissector.used_keys & BIT(FLOW_DISSECTOR_KEY_BASIC) &&
+ (match->key.basic.n_proto == htons(ETH_P_8021Q) ||
+ match->key.basic.n_proto == htons(ETH_P_8021AD))) {
match->key.basic.n_proto = match->key.vlan.vlan_tpid;
match->mask.basic.n_proto = match->mask.vlan.vlan_tpid;
match->key.vlan.vlan_tpid = ethertype.value;
@@ -356,6 +353,7 @@ static void nft_flow_block_offload_init(struct flow_block_offload *bo,
bo->command = cmd;
bo->binder_type = FLOW_BLOCK_BINDER_TYPE_CLSACT_INGRESS;
bo->extack = extack;
+ bo->cb_list_head = &basechain->flow_block.cb_list;
INIT_LIST_HEAD(&bo->cb_list);
}
@@ -594,23 +592,6 @@ int nft_flow_rule_offload_commit(struct net *net)
}
}
- list_for_each_entry(trans, &nft_net->commit_list, list) {
- if (trans->ctx.family != NFPROTO_NETDEV)
- continue;
-
- switch (trans->msg_type) {
- case NFT_MSG_NEWRULE:
- case NFT_MSG_DELRULE:
- if (!(trans->ctx.chain->flags & NFT_CHAIN_HW_OFFLOAD))
- continue;
-
- nft_flow_rule_destroy(nft_trans_flow_rule(trans));
- break;
- default:
- break;
- }
- }
-
return err;
}
diff --git a/net/netfilter/nf_tables_trace.c b/net/netfilter/nf_tables_trace.c
index 0cf3278007ba..e4fe2f0780eb 100644
--- a/net/netfilter/nf_tables_trace.c
+++ b/net/netfilter/nf_tables_trace.c
@@ -113,17 +113,17 @@ static int nf_trace_fill_pkt_info(struct sk_buff *nlskb,
int off = skb_network_offset(skb);
unsigned int len, nh_end;
- nh_end = pkt->tprot_set ? pkt->xt.thoff : skb->len;
+ nh_end = pkt->tprot_set ? nft_thoff(pkt) : skb->len;
len = min_t(unsigned int, nh_end - skb_network_offset(skb),
NFT_TRACETYPE_NETWORK_HSIZE);
if (trace_fill_header(nlskb, NFTA_TRACE_NETWORK_HEADER, skb, off, len))
return -1;
if (pkt->tprot_set) {
- len = min_t(unsigned int, skb->len - pkt->xt.thoff,
+ len = min_t(unsigned int, skb->len - nft_thoff(pkt),
NFT_TRACETYPE_TRANSPORT_HSIZE);
if (trace_fill_header(nlskb, NFTA_TRACE_TRANSPORT_HEADER, skb,
- pkt->xt.thoff, len))
+ nft_thoff(pkt), len))
return -1;
}
diff --git a/net/netfilter/nfnetlink.c b/net/netfilter/nfnetlink.c
index e8dbd8379027..7e2c8dd01408 100644
--- a/net/netfilter/nfnetlink.c
+++ b/net/netfilter/nfnetlink.c
@@ -68,6 +68,7 @@ static const char *const nfnl_lockdep_names[NFNL_SUBSYS_COUNT] = {
[NFNL_SUBSYS_CTHELPER] = "nfnl_subsys_cthelper",
[NFNL_SUBSYS_NFTABLES] = "nfnl_subsys_nftables",
[NFNL_SUBSYS_NFT_COMPAT] = "nfnl_subsys_nftcompat",
+ [NFNL_SUBSYS_HOOK] = "nfnl_subsys_hook",
};
static const int nfnl_group2type[NFNLGRP_MAX+1] = {
@@ -256,6 +257,7 @@ replay:
.net = net,
.sk = nfnlnet->nfnl,
.nlh = nlh,
+ .nfmsg = nlmsg_data(nlh),
.extack = extack,
};
@@ -491,6 +493,7 @@ replay_abort:
.net = net,
.sk = nfnlnet->nfnl,
.nlh = nlh,
+ .nfmsg = nlmsg_data(nlh),
.extack = &extack,
};
diff --git a/net/netfilter/nfnetlink_acct.c b/net/netfilter/nfnetlink_acct.c
index 3c8cf8748cfb..505f46a32173 100644
--- a/net/netfilter/nfnetlink_acct.c
+++ b/net/netfilter/nfnetlink_acct.c
@@ -314,14 +314,11 @@ static int nfnl_acct_get(struct sk_buff *skb, const struct nfnl_info *info,
kfree_skb(skb2);
break;
}
- ret = netlink_unicast(info->sk, skb2, NETLINK_CB(skb).portid,
- MSG_DONTWAIT);
- if (ret > 0)
- ret = 0;
- /* this avoids a loop in nfnetlink. */
- return ret == -EAGAIN ? -ENOBUFS : ret;
+ ret = nfnetlink_unicast(skb2, info->net, NETLINK_CB(skb).portid);
+ break;
}
+
return ret;
}
diff --git a/net/netfilter/nfnetlink_cthelper.c b/net/netfilter/nfnetlink_cthelper.c
index 752b10cae524..5c622f55c9d6 100644
--- a/net/netfilter/nfnetlink_cthelper.c
+++ b/net/netfilter/nfnetlink_cthelper.c
@@ -667,14 +667,10 @@ static int nfnl_cthelper_get(struct sk_buff *skb, const struct nfnl_info *info,
break;
}
- ret = netlink_unicast(info->sk, skb2, NETLINK_CB(skb).portid,
- MSG_DONTWAIT);
- if (ret > 0)
- ret = 0;
-
- /* this avoids a loop in nfnetlink. */
- return ret == -EAGAIN ? -ENOBUFS : ret;
+ ret = nfnetlink_unicast(skb2, info->net, NETLINK_CB(skb).portid);
+ break;
}
+
return ret;
}
diff --git a/net/netfilter/nfnetlink_cttimeout.c b/net/netfilter/nfnetlink_cttimeout.c
index 38848ad68899..c57673d499be 100644
--- a/net/netfilter/nfnetlink_cttimeout.c
+++ b/net/netfilter/nfnetlink_cttimeout.c
@@ -287,14 +287,11 @@ static int cttimeout_get_timeout(struct sk_buff *skb,
kfree_skb(skb2);
break;
}
- ret = netlink_unicast(info->sk, skb2, NETLINK_CB(skb).portid,
- MSG_DONTWAIT);
- if (ret > 0)
- ret = 0;
- /* this avoids a loop in nfnetlink. */
- return ret == -EAGAIN ? -ENOBUFS : ret;
+ ret = nfnetlink_unicast(skb2, info->net, NETLINK_CB(skb).portid);
+ break;
}
+
return ret;
}
@@ -427,9 +424,9 @@ static int cttimeout_default_get(struct sk_buff *skb,
const struct nf_conntrack_l4proto *l4proto;
unsigned int *timeouts = NULL;
struct sk_buff *skb2;
- int ret, err;
__u16 l3num;
__u8 l4num;
+ int ret;
if (!cda[CTA_TIMEOUT_L3PROTO] || !cda[CTA_TIMEOUT_L4PROTO])
return -EINVAL;
@@ -438,9 +435,8 @@ static int cttimeout_default_get(struct sk_buff *skb,
l4num = nla_get_u8(cda[CTA_TIMEOUT_L4PROTO]);
l4proto = nf_ct_l4proto_find(l4num);
- err = -EOPNOTSUPP;
if (l4proto->l4proto != l4num)
- goto err;
+ return -EOPNOTSUPP;
switch (l4proto->l4proto) {
case IPPROTO_ICMP:
@@ -480,13 +476,11 @@ static int cttimeout_default_get(struct sk_buff *skb,
}
if (!timeouts)
- goto err;
+ return -EOPNOTSUPP;
skb2 = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
- if (skb2 == NULL) {
- err = -ENOMEM;
- goto err;
- }
+ if (!skb2)
+ return -ENOMEM;
ret = cttimeout_default_fill_info(info->net, skb2,
NETLINK_CB(skb).portid,
@@ -496,18 +490,10 @@ static int cttimeout_default_get(struct sk_buff *skb,
l3num, l4proto, timeouts);
if (ret <= 0) {
kfree_skb(skb2);
- err = -ENOMEM;
- goto err;
+ return -ENOMEM;
}
- ret = netlink_unicast(info->sk, skb2, NETLINK_CB(skb).portid,
- MSG_DONTWAIT);
- if (ret > 0)
- ret = 0;
- /* this avoids a loop in nfnetlink. */
- return ret == -EAGAIN ? -ENOBUFS : ret;
-err:
- return err;
+ return nfnetlink_unicast(skb2, info->net, NETLINK_CB(skb).portid);
}
static struct nf_ct_timeout *ctnl_timeout_find_get(struct net *net,
diff --git a/net/netfilter/nfnetlink_hook.c b/net/netfilter/nfnetlink_hook.c
new file mode 100644
index 000000000000..f554e2ea32ee
--- /dev/null
+++ b/net/netfilter/nfnetlink_hook.c
@@ -0,0 +1,391 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Copyright (c) 2021 Red Hat GmbH
+ *
+ * Author: Florian Westphal <fw@strlen.de>
+ */
+
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/types.h>
+#include <linux/skbuff.h>
+#include <linux/errno.h>
+#include <linux/netlink.h>
+#include <linux/slab.h>
+
+#include <linux/netfilter.h>
+
+#include <linux/netfilter/nfnetlink.h>
+#include <linux/netfilter/nfnetlink_hook.h>
+
+#include <net/netfilter/nf_tables.h>
+#include <net/sock.h>
+
+static const struct nla_policy nfnl_hook_nla_policy[NFNLA_HOOK_MAX + 1] = {
+ [NFNLA_HOOK_HOOKNUM] = { .type = NLA_U32 },
+ [NFNLA_HOOK_PRIORITY] = { .type = NLA_U32 },
+ [NFNLA_HOOK_DEV] = { .type = NLA_STRING,
+ .len = IFNAMSIZ - 1 },
+ [NFNLA_HOOK_FUNCTION_NAME] = { .type = NLA_NUL_STRING,
+ .len = KSYM_NAME_LEN, },
+ [NFNLA_HOOK_MODULE_NAME] = { .type = NLA_NUL_STRING,
+ .len = MODULE_NAME_LEN, },
+ [NFNLA_HOOK_CHAIN_INFO] = { .type = NLA_NESTED, },
+};
+
+static int nf_netlink_dump_start_rcu(struct sock *nlsk, struct sk_buff *skb,
+ const struct nlmsghdr *nlh,
+ struct netlink_dump_control *c)
+{
+ int err;
+
+ if (!try_module_get(THIS_MODULE))
+ return -EINVAL;
+
+ rcu_read_unlock();
+ err = netlink_dump_start(nlsk, skb, nlh, c);
+ rcu_read_lock();
+ module_put(THIS_MODULE);
+
+ return err;
+}
+
+struct nfnl_dump_hook_data {
+ char devname[IFNAMSIZ];
+ unsigned long headv;
+ u8 hook;
+};
+
+static int nfnl_hook_put_nft_chain_info(struct sk_buff *nlskb,
+ const struct nfnl_dump_hook_data *ctx,
+ unsigned int seq,
+ const struct nf_hook_ops *ops)
+{
+ struct net *net = sock_net(nlskb->sk);
+ struct nlattr *nest, *nest2;
+ struct nft_chain *chain;
+ int ret = 0;
+
+ if (ops->hook_ops_type != NF_HOOK_OP_NF_TABLES)
+ return 0;
+
+ chain = ops->priv;
+ if (WARN_ON_ONCE(!chain))
+ return 0;
+
+ if (!nft_is_active(net, chain))
+ return 0;
+
+ nest = nla_nest_start(nlskb, NFNLA_HOOK_CHAIN_INFO);
+ if (!nest)
+ return -EMSGSIZE;
+
+ ret = nla_put_be32(nlskb, NFNLA_HOOK_INFO_TYPE,
+ htonl(NFNL_HOOK_TYPE_NFTABLES));
+ if (ret)
+ goto cancel_nest;
+
+ nest2 = nla_nest_start(nlskb, NFNLA_HOOK_INFO_DESC);
+ if (!nest2)
+ goto cancel_nest;
+
+ ret = nla_put_string(nlskb, NFNLA_CHAIN_TABLE, chain->table->name);
+ if (ret)
+ goto cancel_nest;
+
+ ret = nla_put_string(nlskb, NFNLA_CHAIN_NAME, chain->name);
+ if (ret)
+ goto cancel_nest;
+
+ ret = nla_put_u8(nlskb, NFNLA_CHAIN_FAMILY, chain->table->family);
+ if (ret)
+ goto cancel_nest;
+
+ nla_nest_end(nlskb, nest2);
+ nla_nest_end(nlskb, nest);
+ return ret;
+
+cancel_nest:
+ nla_nest_cancel(nlskb, nest);
+ return -EMSGSIZE;
+}
+
+static int nfnl_hook_dump_one(struct sk_buff *nlskb,
+ const struct nfnl_dump_hook_data *ctx,
+ const struct nf_hook_ops *ops,
+ int family, unsigned int seq)
+{
+ u16 event = nfnl_msg_type(NFNL_SUBSYS_HOOK, NFNL_MSG_HOOK_GET);
+ unsigned int portid = NETLINK_CB(nlskb).portid;
+ struct nlmsghdr *nlh;
+ int ret = -EMSGSIZE;
+ u32 hooknum;
+#ifdef CONFIG_KALLSYMS
+ char sym[KSYM_SYMBOL_LEN];
+ char *module_name;
+#endif
+ nlh = nfnl_msg_put(nlskb, portid, seq, event,
+ NLM_F_MULTI, family, NFNETLINK_V0, 0);
+ if (!nlh)
+ goto nla_put_failure;
+
+#ifdef CONFIG_KALLSYMS
+ ret = snprintf(sym, sizeof(sym), "%ps", ops->hook);
+ if (ret >= sizeof(sym)) {
+ ret = -EINVAL;
+ goto nla_put_failure;
+ }
+
+ module_name = strstr(sym, " [");
+ if (module_name) {
+ char *end;
+
+ *module_name = '\0';
+ module_name += 2;
+ end = strchr(module_name, ']');
+ if (end) {
+ *end = 0;
+
+ ret = nla_put_string(nlskb, NFNLA_HOOK_MODULE_NAME, module_name);
+ if (ret)
+ goto nla_put_failure;
+ }
+ }
+
+ ret = nla_put_string(nlskb, NFNLA_HOOK_FUNCTION_NAME, sym);
+ if (ret)
+ goto nla_put_failure;
+#endif
+
+ if (ops->pf == NFPROTO_INET && ops->hooknum == NF_INET_INGRESS)
+ hooknum = NF_NETDEV_INGRESS;
+ else
+ hooknum = ops->hooknum;
+
+ ret = nla_put_be32(nlskb, NFNLA_HOOK_HOOKNUM, htonl(hooknum));
+ if (ret)
+ goto nla_put_failure;
+
+ ret = nla_put_be32(nlskb, NFNLA_HOOK_PRIORITY, htonl(ops->priority));
+ if (ret)
+ goto nla_put_failure;
+
+ ret = nfnl_hook_put_nft_chain_info(nlskb, ctx, seq, ops);
+ if (ret)
+ goto nla_put_failure;
+
+ nlmsg_end(nlskb, nlh);
+ return 0;
+nla_put_failure:
+ nlmsg_trim(nlskb, nlh);
+ return ret;
+}
+
+static const struct nf_hook_entries *
+nfnl_hook_entries_head(u8 pf, unsigned int hook, struct net *net, const char *dev)
+{
+ const struct nf_hook_entries *hook_head = NULL;
+#ifdef CONFIG_NETFILTER_INGRESS
+ struct net_device *netdev;
+#endif
+
+ switch (pf) {
+ case NFPROTO_IPV4:
+ if (hook >= ARRAY_SIZE(net->nf.hooks_ipv4))
+ return ERR_PTR(-EINVAL);
+ hook_head = rcu_dereference(net->nf.hooks_ipv4[hook]);
+ break;
+ case NFPROTO_IPV6:
+ if (hook >= ARRAY_SIZE(net->nf.hooks_ipv6))
+ return ERR_PTR(-EINVAL);
+ hook_head = rcu_dereference(net->nf.hooks_ipv6[hook]);
+ break;
+ case NFPROTO_ARP:
+#ifdef CONFIG_NETFILTER_FAMILY_ARP
+ if (hook >= ARRAY_SIZE(net->nf.hooks_arp))
+ return ERR_PTR(-EINVAL);
+ hook_head = rcu_dereference(net->nf.hooks_arp[hook]);
+#endif
+ break;
+ case NFPROTO_BRIDGE:
+#ifdef CONFIG_NETFILTER_FAMILY_BRIDGE
+ if (hook >= ARRAY_SIZE(net->nf.hooks_bridge))
+ return ERR_PTR(-EINVAL);
+ hook_head = rcu_dereference(net->nf.hooks_bridge[hook]);
+#endif
+ break;
+#if IS_ENABLED(CONFIG_DECNET)
+ case NFPROTO_DECNET:
+ if (hook >= ARRAY_SIZE(net->nf.hooks_decnet))
+ return ERR_PTR(-EINVAL);
+ hook_head = rcu_dereference(net->nf.hooks_decnet[hook]);
+ break;
+#endif
+#ifdef CONFIG_NETFILTER_INGRESS
+ case NFPROTO_NETDEV:
+ if (hook != NF_NETDEV_INGRESS)
+ return ERR_PTR(-EOPNOTSUPP);
+
+ if (!dev)
+ return ERR_PTR(-ENODEV);
+
+ netdev = dev_get_by_name_rcu(net, dev);
+ if (!netdev)
+ return ERR_PTR(-ENODEV);
+
+ return rcu_dereference(netdev->nf_hooks_ingress);
+#endif
+ default:
+ return ERR_PTR(-EPROTONOSUPPORT);
+ }
+
+ return hook_head;
+}
+
+static int nfnl_hook_dump(struct sk_buff *nlskb,
+ struct netlink_callback *cb)
+{
+ struct nfgenmsg *nfmsg = nlmsg_data(cb->nlh);
+ struct nfnl_dump_hook_data *ctx = cb->data;
+ int err, family = nfmsg->nfgen_family;
+ struct net *net = sock_net(nlskb->sk);
+ struct nf_hook_ops * const *ops;
+ const struct nf_hook_entries *e;
+ unsigned int i = cb->args[0];
+
+ rcu_read_lock();
+
+ e = nfnl_hook_entries_head(family, ctx->hook, net, ctx->devname);
+ if (!e)
+ goto done;
+
+ if (IS_ERR(e)) {
+ cb->seq++;
+ goto done;
+ }
+
+ if ((unsigned long)e != ctx->headv || i >= e->num_hook_entries)
+ cb->seq++;
+
+ ops = nf_hook_entries_get_hook_ops(e);
+
+ for (; i < e->num_hook_entries; i++) {
+ err = nfnl_hook_dump_one(nlskb, ctx, ops[i], family,
+ cb->nlh->nlmsg_seq);
+ if (err)
+ break;
+ }
+
+done:
+ nl_dump_check_consistent(cb, nlmsg_hdr(nlskb));
+ rcu_read_unlock();
+ cb->args[0] = i;
+ return nlskb->len;
+}
+
+static int nfnl_hook_dump_start(struct netlink_callback *cb)
+{
+ const struct nfgenmsg *nfmsg = nlmsg_data(cb->nlh);
+ const struct nlattr * const *nla = cb->data;
+ struct nfnl_dump_hook_data *ctx = NULL;
+ struct net *net = sock_net(cb->skb->sk);
+ u8 family = nfmsg->nfgen_family;
+ char name[IFNAMSIZ] = "";
+ const void *head;
+ u32 hooknum;
+
+ hooknum = ntohl(nla_get_be32(nla[NFNLA_HOOK_HOOKNUM]));
+ if (hooknum > 255)
+ return -EINVAL;
+
+ if (family == NFPROTO_NETDEV) {
+ if (!nla[NFNLA_HOOK_DEV])
+ return -EINVAL;
+
+ nla_strscpy(name, nla[NFNLA_HOOK_DEV], sizeof(name));
+ }
+
+ rcu_read_lock();
+ /* Not dereferenced; for consistency check only */
+ head = nfnl_hook_entries_head(family, hooknum, net, name);
+ rcu_read_unlock();
+
+ if (head && IS_ERR(head))
+ return PTR_ERR(head);
+
+ ctx = kzalloc(sizeof(*ctx), GFP_KERNEL);
+ if (!ctx)
+ return -ENOMEM;
+
+ strscpy(ctx->devname, name, sizeof(ctx->devname));
+ ctx->headv = (unsigned long)head;
+ ctx->hook = hooknum;
+
+ cb->seq = 1;
+ cb->data = ctx;
+
+ return 0;
+}
+
+static int nfnl_hook_dump_stop(struct netlink_callback *cb)
+{
+ kfree(cb->data);
+ return 0;
+}
+
+static int nfnl_hook_get(struct sk_buff *skb,
+ const struct nfnl_info *info,
+ const struct nlattr * const nla[])
+{
+ if (!nla[NFNLA_HOOK_HOOKNUM])
+ return -EINVAL;
+
+ if (info->nlh->nlmsg_flags & NLM_F_DUMP) {
+ struct netlink_dump_control c = {
+ .start = nfnl_hook_dump_start,
+ .done = nfnl_hook_dump_stop,
+ .dump = nfnl_hook_dump,
+ .module = THIS_MODULE,
+ .data = (void *)nla,
+ };
+
+ return nf_netlink_dump_start_rcu(info->sk, skb, info->nlh, &c);
+ }
+
+ return -EOPNOTSUPP;
+}
+
+static const struct nfnl_callback nfnl_hook_cb[NFNL_MSG_HOOK_MAX] = {
+ [NFNL_MSG_HOOK_GET] = {
+ .call = nfnl_hook_get,
+ .type = NFNL_CB_RCU,
+ .attr_count = NFNLA_HOOK_MAX,
+ .policy = nfnl_hook_nla_policy
+ },
+};
+
+static const struct nfnetlink_subsystem nfhook_subsys = {
+ .name = "nfhook",
+ .subsys_id = NFNL_SUBSYS_HOOK,
+ .cb_count = NFNL_MSG_HOOK_MAX,
+ .cb = nfnl_hook_cb,
+};
+
+MODULE_ALIAS_NFNL_SUBSYS(NFNL_SUBSYS_HOOK);
+
+static int __init nfnetlink_hook_init(void)
+{
+ return nfnetlink_subsys_register(&nfhook_subsys);
+}
+
+static void __exit nfnetlink_hook_exit(void)
+{
+ nfnetlink_subsys_unregister(&nfhook_subsys);
+}
+
+module_init(nfnetlink_hook_init);
+module_exit(nfnetlink_hook_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Florian Westphal <fw@strlen.de>");
+MODULE_DESCRIPTION("nfnetlink_hook: list registered netfilter hooks");
diff --git a/net/netfilter/nfnetlink_log.c b/net/netfilter/nfnetlink_log.c
index 587086b18c36..691ef4cffdd9 100644
--- a/net/netfilter/nfnetlink_log.c
+++ b/net/netfilter/nfnetlink_log.c
@@ -871,15 +871,14 @@ static int nfulnl_recv_config(struct sk_buff *skb, const struct nfnl_info *info,
const struct nlattr * const nfula[])
{
struct nfnl_log_net *log = nfnl_log_pernet(info->net);
- struct nfgenmsg *nfmsg = nlmsg_data(info->nlh);
- u_int16_t group_num = ntohs(nfmsg->res_id);
+ u_int16_t group_num = ntohs(info->nfmsg->res_id);
struct nfulnl_msg_config_cmd *cmd = NULL;
struct nfulnl_instance *inst;
u16 flags = 0;
int ret = 0;
if (nfula[NFULA_CFG_CMD]) {
- u_int8_t pf = nfmsg->nfgen_family;
+ u_int8_t pf = info->nfmsg->nfgen_family;
cmd = nla_data(nfula[NFULA_CFG_CMD]);
/* Commands without queue context */
diff --git a/net/netfilter/nfnetlink_queue.c b/net/netfilter/nfnetlink_queue.c
index f37a575ebd7f..4c3fbaaeb103 100644
--- a/net/netfilter/nfnetlink_queue.c
+++ b/net/netfilter/nfnetlink_queue.c
@@ -951,6 +951,16 @@ static void nfqnl_nf_hook_drop(struct net *net)
struct nfnl_queue_net *q = nfnl_queue_pernet(net);
int i;
+ /* This function is also called on net namespace error unwind,
+ * when pernet_ops->init() failed and ->exit() functions of the
+ * previous pernet_ops gets called.
+ *
+ * This may result in a call to nfqnl_nf_hook_drop() before
+ * struct nfnl_queue_net was allocated.
+ */
+ if (!q)
+ return;
+
for (i = 0; i < INSTANCE_BUCKETS; i++) {
struct nfqnl_instance *inst;
struct hlist_head *head = &q->instance_table[i];
@@ -1051,8 +1061,7 @@ static int nfqnl_recv_verdict_batch(struct sk_buff *skb,
const struct nlattr * const nfqa[])
{
struct nfnl_queue_net *q = nfnl_queue_pernet(info->net);
- struct nfgenmsg *nfmsg = nlmsg_data(info->nlh);
- u16 queue_num = ntohs(nfmsg->res_id);
+ u16 queue_num = ntohs(info->nfmsg->res_id);
struct nf_queue_entry *entry, *tmp;
struct nfqnl_msg_verdict_hdr *vhdr;
struct nfqnl_instance *queue;
@@ -1160,8 +1169,7 @@ static int nfqnl_recv_verdict(struct sk_buff *skb, const struct nfnl_info *info,
const struct nlattr * const nfqa[])
{
struct nfnl_queue_net *q = nfnl_queue_pernet(info->net);
- struct nfgenmsg *nfmsg = nlmsg_data(info->nlh);
- u_int16_t queue_num = ntohs(nfmsg->res_id);
+ u_int16_t queue_num = ntohs(info->nfmsg->res_id);
struct nfqnl_msg_verdict_hdr *vhdr;
enum ip_conntrack_info ctinfo;
struct nfqnl_instance *queue;
@@ -1243,8 +1251,7 @@ static int nfqnl_recv_config(struct sk_buff *skb, const struct nfnl_info *info,
const struct nlattr * const nfqa[])
{
struct nfnl_queue_net *q = nfnl_queue_pernet(info->net);
- struct nfgenmsg *nfmsg = nlmsg_data(info->nlh);
- u_int16_t queue_num = ntohs(nfmsg->res_id);
+ u_int16_t queue_num = ntohs(info->nfmsg->res_id);
struct nfqnl_msg_config_cmd *cmd = NULL;
struct nfqnl_instance *queue;
__u32 flags = 0, mask = 0;
@@ -1505,7 +1512,6 @@ static int __net_init nfnl_queue_net_init(struct net *net)
&nfqnl_seq_ops, sizeof(struct iter_state)))
return -ENOMEM;
#endif
- nf_register_queue_handler(net, &nfqh);
return 0;
}
@@ -1514,7 +1520,6 @@ static void __net_exit nfnl_queue_net_exit(struct net *net)
struct nfnl_queue_net *q = nfnl_queue_pernet(net);
unsigned int i;
- nf_unregister_queue_handler(net);
#ifdef CONFIG_PROC_FS
remove_proc_entry("nfnetlink_queue", net->nf.proc_netfilter);
#endif
@@ -1558,6 +1563,8 @@ static int __init nfnetlink_queue_init(void)
goto cleanup_netlink_subsys;
}
+ nf_register_queue_handler(&nfqh);
+
return status;
cleanup_netlink_subsys:
@@ -1571,6 +1578,7 @@ out:
static void __exit nfnetlink_queue_fini(void)
{
+ nf_unregister_queue_handler();
unregister_netdevice_notifier(&nfqnl_dev_notifier);
nfnetlink_subsys_unregister(&nfqnl_subsys);
netlink_unregister_notifier(&nfqnl_rtnl_notifier);
diff --git a/net/netfilter/nft_chain_filter.c b/net/netfilter/nft_chain_filter.c
index 363bdd7044ec..5b02408a920b 100644
--- a/net/netfilter/nft_chain_filter.c
+++ b/net/netfilter/nft_chain_filter.c
@@ -18,7 +18,7 @@ static unsigned int nft_do_chain_ipv4(void *priv,
struct nft_pktinfo pkt;
nft_set_pktinfo(&pkt, skb, state);
- nft_set_pktinfo_ipv4(&pkt, skb);
+ nft_set_pktinfo_ipv4(&pkt);
return nft_do_chain(&pkt, priv);
}
@@ -62,7 +62,7 @@ static unsigned int nft_do_chain_arp(void *priv, struct sk_buff *skb,
struct nft_pktinfo pkt;
nft_set_pktinfo(&pkt, skb, state);
- nft_set_pktinfo_unspec(&pkt, skb);
+ nft_set_pktinfo_unspec(&pkt);
return nft_do_chain(&pkt, priv);
}
@@ -102,7 +102,7 @@ static unsigned int nft_do_chain_ipv6(void *priv,
struct nft_pktinfo pkt;
nft_set_pktinfo(&pkt, skb, state);
- nft_set_pktinfo_ipv6(&pkt, skb);
+ nft_set_pktinfo_ipv6(&pkt);
return nft_do_chain(&pkt, priv);
}
@@ -149,10 +149,10 @@ static unsigned int nft_do_chain_inet(void *priv, struct sk_buff *skb,
switch (state->pf) {
case NFPROTO_IPV4:
- nft_set_pktinfo_ipv4(&pkt, skb);
+ nft_set_pktinfo_ipv4(&pkt);
break;
case NFPROTO_IPV6:
- nft_set_pktinfo_ipv6(&pkt, skb);
+ nft_set_pktinfo_ipv6(&pkt);
break;
default:
break;
@@ -174,7 +174,7 @@ static unsigned int nft_do_chain_inet_ingress(void *priv, struct sk_buff *skb,
ingress_state.hook = NF_INET_INGRESS;
nft_set_pktinfo(&pkt, skb, &ingress_state);
- if (nft_set_pktinfo_ipv4_ingress(&pkt, skb) < 0)
+ if (nft_set_pktinfo_ipv4_ingress(&pkt) < 0)
return NF_DROP;
break;
case htons(ETH_P_IPV6):
@@ -182,7 +182,7 @@ static unsigned int nft_do_chain_inet_ingress(void *priv, struct sk_buff *skb,
ingress_state.hook = NF_INET_INGRESS;
nft_set_pktinfo(&pkt, skb, &ingress_state);
- if (nft_set_pktinfo_ipv6_ingress(&pkt, skb) < 0)
+ if (nft_set_pktinfo_ipv6_ingress(&pkt) < 0)
return NF_DROP;
break;
default:
@@ -238,13 +238,13 @@ nft_do_chain_bridge(void *priv,
switch (eth_hdr(skb)->h_proto) {
case htons(ETH_P_IP):
- nft_set_pktinfo_ipv4_validate(&pkt, skb);
+ nft_set_pktinfo_ipv4_validate(&pkt);
break;
case htons(ETH_P_IPV6):
- nft_set_pktinfo_ipv6_validate(&pkt, skb);
+ nft_set_pktinfo_ipv6_validate(&pkt);
break;
default:
- nft_set_pktinfo_unspec(&pkt, skb);
+ nft_set_pktinfo_unspec(&pkt);
break;
}
@@ -293,13 +293,13 @@ static unsigned int nft_do_chain_netdev(void *priv, struct sk_buff *skb,
switch (skb->protocol) {
case htons(ETH_P_IP):
- nft_set_pktinfo_ipv4_validate(&pkt, skb);
+ nft_set_pktinfo_ipv4_validate(&pkt);
break;
case htons(ETH_P_IPV6):
- nft_set_pktinfo_ipv6_validate(&pkt, skb);
+ nft_set_pktinfo_ipv6_validate(&pkt);
break;
default:
- nft_set_pktinfo_unspec(&pkt, skb);
+ nft_set_pktinfo_unspec(&pkt);
break;
}
diff --git a/net/netfilter/nft_chain_nat.c b/net/netfilter/nft_chain_nat.c
index eac4a901233f..98e4946100c5 100644
--- a/net/netfilter/nft_chain_nat.c
+++ b/net/netfilter/nft_chain_nat.c
@@ -17,12 +17,12 @@ static unsigned int nft_nat_do_chain(void *priv, struct sk_buff *skb,
switch (state->pf) {
#ifdef CONFIG_NF_TABLES_IPV4
case NFPROTO_IPV4:
- nft_set_pktinfo_ipv4(&pkt, skb);
+ nft_set_pktinfo_ipv4(&pkt);
break;
#endif
#ifdef CONFIG_NF_TABLES_IPV6
case NFPROTO_IPV6:
- nft_set_pktinfo_ipv6(&pkt, skb);
+ nft_set_pktinfo_ipv6(&pkt);
break;
#endif
default:
diff --git a/net/netfilter/nft_chain_route.c b/net/netfilter/nft_chain_route.c
index edd02cda57fc..925db0dce48d 100644
--- a/net/netfilter/nft_chain_route.c
+++ b/net/netfilter/nft_chain_route.c
@@ -26,7 +26,7 @@ static unsigned int nf_route_table_hook4(void *priv,
u8 tos;
nft_set_pktinfo(&pkt, skb, state);
- nft_set_pktinfo_ipv4(&pkt, skb);
+ nft_set_pktinfo_ipv4(&pkt);
mark = skb->mark;
iph = ip_hdr(skb);
@@ -74,7 +74,7 @@ static unsigned int nf_route_table_hook6(void *priv,
int err;
nft_set_pktinfo(&pkt, skb, state);
- nft_set_pktinfo_ipv6(&pkt, skb);
+ nft_set_pktinfo_ipv6(&pkt);
/* save source/dest address, mark, hoplimit, flowlabel, priority */
memcpy(&saddr, &ipv6_hdr(skb)->saddr, sizeof(saddr));
diff --git a/net/netfilter/nft_compat.c b/net/netfilter/nft_compat.c
index 5415ab14400d..272bcdb1392d 100644
--- a/net/netfilter/nft_compat.c
+++ b/net/netfilter/nft_compat.c
@@ -57,8 +57,13 @@ union nft_entry {
};
static inline void
-nft_compat_set_par(struct xt_action_param *par, void *xt, const void *xt_info)
+nft_compat_set_par(struct xt_action_param *par,
+ const struct nft_pktinfo *pkt,
+ const void *xt, const void *xt_info)
{
+ par->state = pkt->state;
+ par->thoff = nft_thoff(pkt);
+ par->fragoff = pkt->fragoff;
par->target = xt;
par->targinfo = xt_info;
par->hotdrop = false;
@@ -71,13 +76,14 @@ static void nft_target_eval_xt(const struct nft_expr *expr,
void *info = nft_expr_priv(expr);
struct xt_target *target = expr->ops->data;
struct sk_buff *skb = pkt->skb;
+ struct xt_action_param xt;
int ret;
- nft_compat_set_par((struct xt_action_param *)&pkt->xt, target, info);
+ nft_compat_set_par(&xt, pkt, target, info);
- ret = target->target(skb, &pkt->xt);
+ ret = target->target(skb, &xt);
- if (pkt->xt.hotdrop)
+ if (xt.hotdrop)
ret = NF_DROP;
switch (ret) {
@@ -97,13 +103,14 @@ static void nft_target_eval_bridge(const struct nft_expr *expr,
void *info = nft_expr_priv(expr);
struct xt_target *target = expr->ops->data;
struct sk_buff *skb = pkt->skb;
+ struct xt_action_param xt;
int ret;
- nft_compat_set_par((struct xt_action_param *)&pkt->xt, target, info);
+ nft_compat_set_par(&xt, pkt, target, info);
- ret = target->target(skb, &pkt->xt);
+ ret = target->target(skb, &xt);
- if (pkt->xt.hotdrop)
+ if (xt.hotdrop)
ret = NF_DROP;
switch (ret) {
@@ -350,13 +357,14 @@ static void __nft_match_eval(const struct nft_expr *expr,
{
struct xt_match *match = expr->ops->data;
struct sk_buff *skb = pkt->skb;
+ struct xt_action_param xt;
bool ret;
- nft_compat_set_par((struct xt_action_param *)&pkt->xt, match, info);
+ nft_compat_set_par(&xt, pkt, match, info);
- ret = match->match(skb, (struct xt_action_param *)&pkt->xt);
+ ret = match->match(skb, &xt);
- if (pkt->xt.hotdrop) {
+ if (xt.hotdrop) {
regs->verdict.code = NF_DROP;
return;
}
@@ -617,7 +625,7 @@ static int nfnl_compat_get_rcu(struct sk_buff *skb,
const struct nfnl_info *info,
const struct nlattr * const tb[])
{
- struct nfgenmsg *nfmsg;
+ u8 family = info->nfmsg->nfgen_family;
const char *name, *fmt;
struct sk_buff *skb2;
int ret = 0, target;
@@ -632,9 +640,7 @@ static int nfnl_compat_get_rcu(struct sk_buff *skb,
rev = ntohl(nla_get_be32(tb[NFTA_COMPAT_REV]));
target = ntohl(nla_get_be32(tb[NFTA_COMPAT_TYPE]));
- nfmsg = nlmsg_data(info->nlh);
-
- switch(nfmsg->nfgen_family) {
+ switch(family) {
case AF_INET:
fmt = "ipt_%s";
break;
@@ -648,8 +654,7 @@ static int nfnl_compat_get_rcu(struct sk_buff *skb,
fmt = "arpt_%s";
break;
default:
- pr_err("nft_compat: unsupported protocol %d\n",
- nfmsg->nfgen_family);
+ pr_err("nft_compat: unsupported protocol %d\n", family);
return -EINVAL;
}
@@ -657,9 +662,8 @@ static int nfnl_compat_get_rcu(struct sk_buff *skb,
return -EINVAL;
rcu_read_unlock();
- try_then_request_module(xt_find_revision(nfmsg->nfgen_family, name,
- rev, target, &ret),
- fmt, name);
+ try_then_request_module(xt_find_revision(family, name, rev, target, &ret),
+ fmt, name);
if (ret < 0)
goto out_put;
@@ -674,20 +678,17 @@ static int nfnl_compat_get_rcu(struct sk_buff *skb,
info->nlh->nlmsg_seq,
NFNL_MSG_TYPE(info->nlh->nlmsg_type),
NFNL_MSG_COMPAT_GET,
- nfmsg->nfgen_family,
- name, ret, target) <= 0) {
+ family, name, ret, target) <= 0) {
kfree_skb(skb2);
goto out_put;
}
- ret = netlink_unicast(info->sk, skb2, NETLINK_CB(skb).portid,
- MSG_DONTWAIT);
- if (ret > 0)
- ret = 0;
+ ret = nfnetlink_unicast(skb2, info->net, NETLINK_CB(skb).portid);
out_put:
rcu_read_lock();
module_put(THIS_MODULE);
- return ret == -EAGAIN ? -ENOBUFS : ret;
+
+ return ret;
}
static const struct nla_policy nfnl_compat_policy_get[NFTA_COMPAT_MAX+1] = {
diff --git a/net/netfilter/nft_ct.c b/net/netfilter/nft_ct.c
index 337e22d8b40b..99b1de14ff7e 100644
--- a/net/netfilter/nft_ct.c
+++ b/net/netfilter/nft_ct.c
@@ -41,6 +41,7 @@ struct nft_ct_helper_obj {
#ifdef CONFIG_NF_CONNTRACK_ZONES
static DEFINE_PER_CPU(struct nf_conn *, nft_ct_pcpu_template);
static unsigned int nft_ct_pcpu_template_refcnt __read_mostly;
+static DEFINE_MUTEX(nft_ct_pcpu_mutex);
#endif
static u64 nft_ct_get_eval_counter(const struct nf_conn_counter *c,
@@ -525,8 +526,10 @@ static void __nft_ct_set_destroy(const struct nft_ctx *ctx, struct nft_ct *priv)
#endif
#ifdef CONFIG_NF_CONNTRACK_ZONES
case NFT_CT_ZONE:
+ mutex_lock(&nft_ct_pcpu_mutex);
if (--nft_ct_pcpu_template_refcnt == 0)
nft_ct_tmpl_put_pcpu();
+ mutex_unlock(&nft_ct_pcpu_mutex);
break;
#endif
default:
@@ -564,9 +567,13 @@ static int nft_ct_set_init(const struct nft_ctx *ctx,
#endif
#ifdef CONFIG_NF_CONNTRACK_ZONES
case NFT_CT_ZONE:
- if (!nft_ct_tmpl_alloc_pcpu())
+ mutex_lock(&nft_ct_pcpu_mutex);
+ if (!nft_ct_tmpl_alloc_pcpu()) {
+ mutex_unlock(&nft_ct_pcpu_mutex);
return -ENOMEM;
+ }
nft_ct_pcpu_template_refcnt++;
+ mutex_unlock(&nft_ct_pcpu_mutex);
len = sizeof(u16);
break;
#endif
diff --git a/net/netfilter/nft_exthdr.c b/net/netfilter/nft_exthdr.c
index f64f0017e9a5..af4ee874a067 100644
--- a/net/netfilter/nft_exthdr.c
+++ b/net/netfilter/nft_exthdr.c
@@ -10,8 +10,10 @@
#include <linux/netlink.h>
#include <linux/netfilter.h>
#include <linux/netfilter/nf_tables.h>
+#include <linux/sctp.h>
#include <net/netfilter/nf_tables_core.h>
#include <net/netfilter/nf_tables.h>
+#include <net/sctp/sctp.h>
#include <net/tcp.h>
struct nft_exthdr {
@@ -42,6 +44,9 @@ static void nft_exthdr_ipv6_eval(const struct nft_expr *expr,
unsigned int offset = 0;
int err;
+ if (pkt->skb->protocol != htons(ETH_P_IPV6))
+ goto err;
+
err = ipv6_find_hdr(pkt->skb, &offset, priv->type, NULL, NULL);
if (priv->flags & NFT_EXTHDR_F_PRESENT) {
nft_reg_store8(dest, err >= 0);
@@ -162,10 +167,10 @@ nft_tcp_header_pointer(const struct nft_pktinfo *pkt,
{
struct tcphdr *tcph;
- if (!pkt->tprot_set || pkt->tprot != IPPROTO_TCP)
+ if (pkt->tprot != IPPROTO_TCP)
return NULL;
- tcph = skb_header_pointer(pkt->skb, pkt->xt.thoff, sizeof(*tcph), buffer);
+ tcph = skb_header_pointer(pkt->skb, nft_thoff(pkt), sizeof(*tcph), buffer);
if (!tcph)
return NULL;
@@ -173,7 +178,7 @@ nft_tcp_header_pointer(const struct nft_pktinfo *pkt,
if (*tcphdr_len < sizeof(*tcph) || *tcphdr_len > len)
return NULL;
- return skb_header_pointer(pkt->skb, pkt->xt.thoff, *tcphdr_len, buffer);
+ return skb_header_pointer(pkt->skb, nft_thoff(pkt), *tcphdr_len, buffer);
}
static void nft_exthdr_tcp_eval(const struct nft_expr *expr,
@@ -249,7 +254,7 @@ static void nft_exthdr_tcp_set_eval(const struct nft_expr *expr,
return;
if (skb_ensure_writable(pkt->skb,
- pkt->xt.thoff + i + priv->len))
+ nft_thoff(pkt) + i + priv->len))
return;
tcph = nft_tcp_header_pointer(pkt, sizeof(buff), buff,
@@ -300,6 +305,48 @@ static void nft_exthdr_tcp_set_eval(const struct nft_expr *expr,
}
}
+static void nft_exthdr_sctp_eval(const struct nft_expr *expr,
+ struct nft_regs *regs,
+ const struct nft_pktinfo *pkt)
+{
+ unsigned int offset = nft_thoff(pkt) + sizeof(struct sctphdr);
+ struct nft_exthdr *priv = nft_expr_priv(expr);
+ u32 *dest = &regs->data[priv->dreg];
+ const struct sctp_chunkhdr *sch;
+ struct sctp_chunkhdr _sch;
+
+ if (pkt->tprot != IPPROTO_SCTP)
+ goto err;
+
+ do {
+ sch = skb_header_pointer(pkt->skb, offset, sizeof(_sch), &_sch);
+ if (!sch || !sch->length)
+ break;
+
+ if (sch->type == priv->type) {
+ if (priv->flags & NFT_EXTHDR_F_PRESENT) {
+ nft_reg_store8(dest, true);
+ return;
+ }
+ if (priv->offset + priv->len > ntohs(sch->length) ||
+ offset + ntohs(sch->length) > pkt->skb->len)
+ break;
+
+ dest[priv->len / NFT_REG32_SIZE] = 0;
+ if (skb_copy_bits(pkt->skb, offset + priv->offset,
+ dest, priv->len) < 0)
+ break;
+ return;
+ }
+ offset += SCTP_PAD4(ntohs(sch->length));
+ } while (offset < pkt->skb->len);
+err:
+ if (priv->flags & NFT_EXTHDR_F_PRESENT)
+ nft_reg_store8(dest, false);
+ else
+ regs->verdict.code = NFT_BREAK;
+}
+
static const struct nla_policy nft_exthdr_policy[NFTA_EXTHDR_MAX + 1] = {
[NFTA_EXTHDR_DREG] = { .type = NLA_U32 },
[NFTA_EXTHDR_TYPE] = { .type = NLA_U8 },
@@ -499,6 +546,14 @@ static const struct nft_expr_ops nft_exthdr_tcp_set_ops = {
.dump = nft_exthdr_dump_set,
};
+static const struct nft_expr_ops nft_exthdr_sctp_ops = {
+ .type = &nft_exthdr_type,
+ .size = NFT_EXPR_SIZE(sizeof(struct nft_exthdr)),
+ .eval = nft_exthdr_sctp_eval,
+ .init = nft_exthdr_init,
+ .dump = nft_exthdr_dump,
+};
+
static const struct nft_expr_ops *
nft_exthdr_select_ops(const struct nft_ctx *ctx,
const struct nlattr * const tb[])
@@ -529,6 +584,10 @@ nft_exthdr_select_ops(const struct nft_ctx *ctx,
return &nft_exthdr_ipv4_ops;
}
break;
+ case NFT_EXTHDR_OP_SCTP:
+ if (tb[NFTA_EXTHDR_DREG])
+ return &nft_exthdr_sctp_ops;
+ break;
}
return ERR_PTR(-EOPNOTSUPP);
diff --git a/net/netfilter/nft_flow_offload.c b/net/netfilter/nft_flow_offload.c
index 4843dd2b410c..0af34ad41479 100644
--- a/net/netfilter/nft_flow_offload.c
+++ b/net/netfilter/nft_flow_offload.c
@@ -291,7 +291,7 @@ static void nft_flow_offload_eval(const struct nft_expr *expr,
switch (ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.protonum) {
case IPPROTO_TCP:
- tcph = skb_header_pointer(pkt->skb, pkt->xt.thoff,
+ tcph = skb_header_pointer(pkt->skb, nft_thoff(pkt),
sizeof(_tcph), &_tcph);
if (unlikely(!tcph || tcph->fin || tcph->rst))
goto out;
diff --git a/net/netfilter/nft_last.c b/net/netfilter/nft_last.c
new file mode 100644
index 000000000000..304e33cbed9b
--- /dev/null
+++ b/net/netfilter/nft_last.c
@@ -0,0 +1,99 @@
+// SPDX-License-Identifier: GPL-2.0-only
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/netlink.h>
+#include <linux/netfilter.h>
+#include <linux/netfilter/nf_tables.h>
+#include <net/netfilter/nf_tables_core.h>
+#include <net/netfilter/nf_tables.h>
+
+struct nft_last_priv {
+ unsigned long last_jiffies;
+ unsigned int last_set;
+};
+
+static const struct nla_policy nft_last_policy[NFTA_LAST_MAX + 1] = {
+ [NFTA_LAST_SET] = { .type = NLA_U32 },
+ [NFTA_LAST_MSECS] = { .type = NLA_U64 },
+};
+
+static int nft_last_init(const struct nft_ctx *ctx, const struct nft_expr *expr,
+ const struct nlattr * const tb[])
+{
+ struct nft_last_priv *priv = nft_expr_priv(expr);
+ u64 last_jiffies;
+ u32 last_set = 0;
+ int err;
+
+ if (tb[NFTA_LAST_SET]) {
+ last_set = ntohl(nla_get_be32(tb[NFTA_LAST_SET]));
+ if (last_set == 1)
+ priv->last_set = 1;
+ }
+
+ if (last_set && tb[NFTA_LAST_MSECS]) {
+ err = nf_msecs_to_jiffies64(tb[NFTA_LAST_MSECS], &last_jiffies);
+ if (err < 0)
+ return err;
+
+ priv->last_jiffies = jiffies - (unsigned long)last_jiffies;
+ }
+
+ return 0;
+}
+
+static void nft_last_eval(const struct nft_expr *expr,
+ struct nft_regs *regs, const struct nft_pktinfo *pkt)
+{
+ struct nft_last_priv *priv = nft_expr_priv(expr);
+
+ if (READ_ONCE(priv->last_jiffies) != jiffies)
+ WRITE_ONCE(priv->last_jiffies, jiffies);
+ if (READ_ONCE(priv->last_set) == 0)
+ WRITE_ONCE(priv->last_set, 1);
+}
+
+static int nft_last_dump(struct sk_buff *skb, const struct nft_expr *expr)
+{
+ struct nft_last_priv *priv = nft_expr_priv(expr);
+ unsigned long last_jiffies = READ_ONCE(priv->last_jiffies);
+ u32 last_set = READ_ONCE(priv->last_set);
+ __be64 msecs;
+
+ if (time_before(jiffies, last_jiffies)) {
+ WRITE_ONCE(priv->last_set, 0);
+ last_set = 0;
+ }
+
+ if (last_set)
+ msecs = nf_jiffies64_to_msecs(jiffies - last_jiffies);
+ else
+ msecs = 0;
+
+ if (nla_put_be32(skb, NFTA_LAST_SET, htonl(last_set)) ||
+ nla_put_be64(skb, NFTA_LAST_MSECS, msecs, NFTA_LAST_PAD))
+ goto nla_put_failure;
+
+ return 0;
+
+nla_put_failure:
+ return -1;
+}
+
+static const struct nft_expr_ops nft_last_ops = {
+ .type = &nft_last_type,
+ .size = NFT_EXPR_SIZE(sizeof(struct nft_last_priv)),
+ .eval = nft_last_eval,
+ .init = nft_last_init,
+ .dump = nft_last_dump,
+};
+
+struct nft_expr_type nft_last_type __read_mostly = {
+ .name = "last",
+ .ops = &nft_last_ops,
+ .policy = nft_last_policy,
+ .maxattr = NFTA_LAST_MAX,
+ .flags = NFT_EXPR_STATEFUL,
+ .owner = THIS_MODULE,
+};
diff --git a/net/netfilter/nft_lookup.c b/net/netfilter/nft_lookup.c
index a479f8a1270c..90becbf5bff3 100644
--- a/net/netfilter/nft_lookup.c
+++ b/net/netfilter/nft_lookup.c
@@ -23,6 +23,37 @@ struct nft_lookup {
struct nft_set_binding binding;
};
+#ifdef CONFIG_RETPOLINE
+bool nft_set_do_lookup(const struct net *net, const struct nft_set *set,
+ const u32 *key, const struct nft_set_ext **ext)
+{
+ if (set->ops == &nft_set_hash_fast_type.ops)
+ return nft_hash_lookup_fast(net, set, key, ext);
+ if (set->ops == &nft_set_hash_type.ops)
+ return nft_hash_lookup(net, set, key, ext);
+
+ if (set->ops == &nft_set_rhash_type.ops)
+ return nft_rhash_lookup(net, set, key, ext);
+
+ if (set->ops == &nft_set_bitmap_type.ops)
+ return nft_bitmap_lookup(net, set, key, ext);
+
+ if (set->ops == &nft_set_pipapo_type.ops)
+ return nft_pipapo_lookup(net, set, key, ext);
+#if defined(CONFIG_X86_64) && !defined(CONFIG_UML)
+ if (set->ops == &nft_set_pipapo_avx2_type.ops)
+ return nft_pipapo_avx2_lookup(net, set, key, ext);
+#endif
+
+ if (set->ops == &nft_set_rbtree_type.ops)
+ return nft_rbtree_lookup(net, set, key, ext);
+
+ WARN_ON_ONCE(1);
+ return set->ops->lookup(net, set, key, ext);
+}
+EXPORT_SYMBOL_GPL(nft_set_do_lookup);
+#endif
+
void nft_lookup_eval(const struct nft_expr *expr,
struct nft_regs *regs,
const struct nft_pktinfo *pkt)
@@ -33,8 +64,8 @@ void nft_lookup_eval(const struct nft_expr *expr,
const struct net *net = nft_net(pkt);
bool found;
- found = set->ops->lookup(net, set, &regs->data[priv->sreg], &ext) ^
- priv->invert;
+ found = nft_set_do_lookup(net, set, &regs->data[priv->sreg], &ext) ^
+ priv->invert;
if (!found) {
ext = nft_set_catchall_lookup(net, set);
if (!ext) {
diff --git a/net/netfilter/nft_nat.c b/net/netfilter/nft_nat.c
index 0840c635b752..be1595d6979d 100644
--- a/net/netfilter/nft_nat.c
+++ b/net/netfilter/nft_nat.c
@@ -201,7 +201,9 @@ static int nft_nat_init(const struct nft_ctx *ctx, const struct nft_expr *expr,
alen = sizeof_field(struct nf_nat_range, min_addr.ip6);
break;
default:
- return -EAFNOSUPPORT;
+ if (tb[NFTA_NAT_REG_ADDR_MIN])
+ return -EAFNOSUPPORT;
+ break;
}
priv->family = family;
diff --git a/net/netfilter/nft_objref.c b/net/netfilter/nft_objref.c
index 7e47edee88ee..94b2327e71dc 100644
--- a/net/netfilter/nft_objref.c
+++ b/net/netfilter/nft_objref.c
@@ -9,7 +9,7 @@
#include <linux/netlink.h>
#include <linux/netfilter.h>
#include <linux/netfilter/nf_tables.h>
-#include <net/netfilter/nf_tables.h>
+#include <net/netfilter/nf_tables_core.h>
#define nft_objref_priv(expr) *((struct nft_object **)nft_expr_priv(expr))
@@ -110,7 +110,7 @@ static void nft_objref_map_eval(const struct nft_expr *expr,
struct nft_object *obj;
bool found;
- found = set->ops->lookup(net, set, &regs->data[priv->sreg], &ext);
+ found = nft_set_do_lookup(net, set, &regs->data[priv->sreg], &ext);
if (!found) {
ext = nft_set_catchall_lookup(net, set);
if (!ext) {
diff --git a/net/netfilter/nft_osf.c b/net/netfilter/nft_osf.c
index ac61f708b82d..d82677e83400 100644
--- a/net/netfilter/nft_osf.c
+++ b/net/netfilter/nft_osf.c
@@ -28,6 +28,11 @@ static void nft_osf_eval(const struct nft_expr *expr, struct nft_regs *regs,
struct nf_osf_data data;
struct tcphdr _tcph;
+ if (pkt->tprot != IPPROTO_TCP) {
+ regs->verdict.code = NFT_BREAK;
+ return;
+ }
+
tcp = skb_header_pointer(skb, ip_hdrlen(skb),
sizeof(struct tcphdr), &_tcph);
if (!tcp) {
diff --git a/net/netfilter/nft_payload.c b/net/netfilter/nft_payload.c
index 501c5b24cc39..a44b14f6c0dc 100644
--- a/net/netfilter/nft_payload.c
+++ b/net/netfilter/nft_payload.c
@@ -110,7 +110,7 @@ void nft_payload_eval(const struct nft_expr *expr,
case NFT_PAYLOAD_TRANSPORT_HEADER:
if (!pkt->tprot_set)
goto err;
- offset = pkt->xt.thoff;
+ offset = nft_thoff(pkt);
break;
default:
BUG();
@@ -507,7 +507,7 @@ static int nft_payload_l4csum_offset(const struct nft_pktinfo *pkt,
*l4csum_offset = offsetof(struct tcphdr, check);
break;
case IPPROTO_UDP:
- if (!nft_payload_udp_checksum(skb, pkt->xt.thoff))
+ if (!nft_payload_udp_checksum(skb, nft_thoff(pkt)))
return -1;
fallthrough;
case IPPROTO_UDPLITE:
@@ -520,7 +520,7 @@ static int nft_payload_l4csum_offset(const struct nft_pktinfo *pkt,
return -1;
}
- *l4csum_offset += pkt->xt.thoff;
+ *l4csum_offset += nft_thoff(pkt);
return 0;
}
@@ -612,7 +612,7 @@ static void nft_payload_set_eval(const struct nft_expr *expr,
case NFT_PAYLOAD_TRANSPORT_HEADER:
if (!pkt->tprot_set)
goto err;
- offset = pkt->xt.thoff;
+ offset = nft_thoff(pkt);
break;
default:
BUG();
@@ -643,7 +643,7 @@ static void nft_payload_set_eval(const struct nft_expr *expr,
if (priv->csum_type == NFT_PAYLOAD_CSUM_SCTP &&
pkt->tprot == IPPROTO_SCTP &&
skb->ip_summed != CHECKSUM_PARTIAL) {
- if (nft_payload_csum_sctp(skb, pkt->xt.thoff))
+ if (nft_payload_csum_sctp(skb, nft_thoff(pkt)))
goto err;
}
diff --git a/net/netfilter/nft_reject_inet.c b/net/netfilter/nft_reject_inet.c
index 95090186ee90..554caf967baa 100644
--- a/net/netfilter/nft_reject_inet.c
+++ b/net/netfilter/nft_reject_inet.c
@@ -28,7 +28,7 @@ static void nft_reject_inet_eval(const struct nft_expr *expr,
nft_hook(pkt));
break;
case NFT_REJECT_TCP_RST:
- nf_send_reset(nft_net(pkt), pkt->xt.state->sk,
+ nf_send_reset(nft_net(pkt), nft_sk(pkt),
pkt->skb, nft_hook(pkt));
break;
case NFT_REJECT_ICMPX_UNREACH:
@@ -45,7 +45,7 @@ static void nft_reject_inet_eval(const struct nft_expr *expr,
priv->icmp_code, nft_hook(pkt));
break;
case NFT_REJECT_TCP_RST:
- nf_send_reset6(nft_net(pkt), pkt->xt.state->sk,
+ nf_send_reset6(nft_net(pkt), nft_sk(pkt),
pkt->skb, nft_hook(pkt));
break;
case NFT_REJECT_ICMPX_UNREACH:
diff --git a/net/netfilter/nft_set_bitmap.c b/net/netfilter/nft_set_bitmap.c
index 2a81ea421819..e7ae5914971e 100644
--- a/net/netfilter/nft_set_bitmap.c
+++ b/net/netfilter/nft_set_bitmap.c
@@ -73,8 +73,9 @@ nft_bitmap_active(const u8 *bitmap, u32 idx, u32 off, u8 genmask)
return (bitmap[idx] & (0x3 << off)) & (genmask << off);
}
-static bool nft_bitmap_lookup(const struct net *net, const struct nft_set *set,
- const u32 *key, const struct nft_set_ext **ext)
+INDIRECT_CALLABLE_SCOPE
+bool nft_bitmap_lookup(const struct net *net, const struct nft_set *set,
+ const u32 *key, const struct nft_set_ext **ext)
{
const struct nft_bitmap *priv = nft_set_priv(set);
u8 genmask = nft_genmask_cur(net);
diff --git a/net/netfilter/nft_set_hash.c b/net/netfilter/nft_set_hash.c
index 7b3d0a78c569..df40314de21f 100644
--- a/net/netfilter/nft_set_hash.c
+++ b/net/netfilter/nft_set_hash.c
@@ -74,8 +74,9 @@ static const struct rhashtable_params nft_rhash_params = {
.automatic_shrinking = true,
};
-static bool nft_rhash_lookup(const struct net *net, const struct nft_set *set,
- const u32 *key, const struct nft_set_ext **ext)
+INDIRECT_CALLABLE_SCOPE
+bool nft_rhash_lookup(const struct net *net, const struct nft_set *set,
+ const u32 *key, const struct nft_set_ext **ext)
{
struct nft_rhash *priv = nft_set_priv(set);
const struct nft_rhash_elem *he;
@@ -446,8 +447,9 @@ struct nft_hash_elem {
struct nft_set_ext ext;
};
-static bool nft_hash_lookup(const struct net *net, const struct nft_set *set,
- const u32 *key, const struct nft_set_ext **ext)
+INDIRECT_CALLABLE_SCOPE
+bool nft_hash_lookup(const struct net *net, const struct nft_set *set,
+ const u32 *key, const struct nft_set_ext **ext)
{
struct nft_hash *priv = nft_set_priv(set);
u8 genmask = nft_genmask_cur(net);
@@ -484,9 +486,10 @@ static void *nft_hash_get(const struct net *net, const struct nft_set *set,
return ERR_PTR(-ENOENT);
}
-static bool nft_hash_lookup_fast(const struct net *net,
- const struct nft_set *set,
- const u32 *key, const struct nft_set_ext **ext)
+INDIRECT_CALLABLE_SCOPE
+bool nft_hash_lookup_fast(const struct net *net,
+ const struct nft_set *set,
+ const u32 *key, const struct nft_set_ext **ext)
{
struct nft_hash *priv = nft_set_priv(set);
u8 genmask = nft_genmask_cur(net);
diff --git a/net/netfilter/nft_set_pipapo.h b/net/netfilter/nft_set_pipapo.h
index d84afb8fa79a..25a75591583e 100644
--- a/net/netfilter/nft_set_pipapo.h
+++ b/net/netfilter/nft_set_pipapo.h
@@ -178,8 +178,6 @@ struct nft_pipapo_elem {
int pipapo_refill(unsigned long *map, int len, int rules, unsigned long *dst,
union nft_pipapo_map_bucket *mt, bool match_only);
-bool nft_pipapo_lookup(const struct net *net, const struct nft_set *set,
- const u32 *key, const struct nft_set_ext **ext);
/**
* pipapo_and_field_buckets_4bit() - Intersect 4-bit buckets
diff --git a/net/netfilter/nft_set_pipapo_avx2.c b/net/netfilter/nft_set_pipapo_avx2.c
index eabdb8d552ee..e517663e0cd1 100644
--- a/net/netfilter/nft_set_pipapo_avx2.c
+++ b/net/netfilter/nft_set_pipapo_avx2.c
@@ -142,7 +142,6 @@ static void nft_pipapo_avx2_fill(unsigned long *data, int start, int len)
* @map: Bitmap to be scanned for set bits
* @dst: Destination bitmap
* @mt: Mapping table containing bit set specifiers
- * @len: Length of bitmap in longs
* @last: Return index of first set bit, if this is the last field
*
* This is an alternative implementation of pipapo_refill() suitable for usage
@@ -1109,7 +1108,7 @@ bool nft_pipapo_avx2_estimate(const struct nft_set_desc *desc, u32 features,
* nft_pipapo_avx2_lookup() - Lookup function for AVX2 implementation
* @net: Network namespace
* @set: nftables API set representation
- * @elem: nftables API element representation containing key data
+ * @key: nftables API element representation containing key data
* @ext: nftables API extension pointer, filled with matching reference
*
* For more details, see DOC: Theory of Operation in nft_set_pipapo.c.
@@ -1136,8 +1135,13 @@ bool nft_pipapo_avx2_lookup(const struct net *net, const struct nft_set *set,
m = rcu_dereference(priv->match);
- /* This also protects access to all data related to scratch maps */
- kernel_fpu_begin();
+ /* This also protects access to all data related to scratch maps.
+ *
+ * Note that we don't need a valid MXCSR state for any of the
+ * operations we use here, so pass 0 as mask and spare a LDMXCSR
+ * instruction.
+ */
+ kernel_fpu_begin_mask(0);
scratch = *raw_cpu_ptr(m->scratch_aligned);
if (unlikely(!scratch)) {
diff --git a/net/netfilter/nft_set_pipapo_avx2.h b/net/netfilter/nft_set_pipapo_avx2.h
index 394bcb704db7..dbb6aaca8a7a 100644
--- a/net/netfilter/nft_set_pipapo_avx2.h
+++ b/net/netfilter/nft_set_pipapo_avx2.h
@@ -5,8 +5,6 @@
#include <asm/fpu/xstate.h>
#define NFT_PIPAPO_ALIGN (XSAVE_YMM_SIZE / BITS_PER_BYTE)
-bool nft_pipapo_avx2_lookup(const struct net *net, const struct nft_set *set,
- const u32 *key, const struct nft_set_ext **ext);
bool nft_pipapo_avx2_estimate(const struct nft_set_desc *desc, u32 features,
struct nft_set_estimate *est);
#endif /* defined(CONFIG_X86_64) && !defined(CONFIG_UML) */
diff --git a/net/netfilter/nft_set_rbtree.c b/net/netfilter/nft_set_rbtree.c
index 9e36eb4a7429..d600a566da32 100644
--- a/net/netfilter/nft_set_rbtree.c
+++ b/net/netfilter/nft_set_rbtree.c
@@ -107,8 +107,9 @@ static bool __nft_rbtree_lookup(const struct net *net, const struct nft_set *set
return false;
}
-static bool nft_rbtree_lookup(const struct net *net, const struct nft_set *set,
- const u32 *key, const struct nft_set_ext **ext)
+INDIRECT_CALLABLE_SCOPE
+bool nft_rbtree_lookup(const struct net *net, const struct nft_set *set,
+ const u32 *key, const struct nft_set_ext **ext)
{
struct nft_rbtree *priv = nft_set_priv(set);
unsigned int seq = read_seqcount_begin(&priv->count);
diff --git a/net/netfilter/nft_synproxy.c b/net/netfilter/nft_synproxy.c
index 4fda8b3f1762..a0109fa1e92d 100644
--- a/net/netfilter/nft_synproxy.c
+++ b/net/netfilter/nft_synproxy.c
@@ -109,7 +109,7 @@ static void nft_synproxy_do_eval(const struct nft_synproxy *priv,
{
struct synproxy_options opts = {};
struct sk_buff *skb = pkt->skb;
- int thoff = pkt->xt.thoff;
+ int thoff = nft_thoff(pkt);
const struct tcphdr *tcp;
struct tcphdr _tcph;
@@ -123,7 +123,7 @@ static void nft_synproxy_do_eval(const struct nft_synproxy *priv,
return;
}
- tcp = skb_header_pointer(skb, pkt->xt.thoff,
+ tcp = skb_header_pointer(skb, thoff,
sizeof(struct tcphdr),
&_tcph);
if (!tcp) {
diff --git a/net/netfilter/nft_tproxy.c b/net/netfilter/nft_tproxy.c
index accef672088c..b5b09a902c7a 100644
--- a/net/netfilter/nft_tproxy.c
+++ b/net/netfilter/nft_tproxy.c
@@ -30,6 +30,12 @@ static void nft_tproxy_eval_v4(const struct nft_expr *expr,
__be16 tport = 0;
struct sock *sk;
+ if (pkt->tprot != IPPROTO_TCP &&
+ pkt->tprot != IPPROTO_UDP) {
+ regs->verdict.code = NFT_BREAK;
+ return;
+ }
+
hp = skb_header_pointer(skb, ip_hdrlen(skb), sizeof(_hdr), &_hdr);
if (!hp) {
regs->verdict.code = NFT_BREAK;
@@ -82,16 +88,17 @@ static void nft_tproxy_eval_v6(const struct nft_expr *expr,
const struct nft_tproxy *priv = nft_expr_priv(expr);
struct sk_buff *skb = pkt->skb;
const struct ipv6hdr *iph = ipv6_hdr(skb);
- struct in6_addr taddr;
- int thoff = pkt->xt.thoff;
+ int thoff = nft_thoff(pkt);
struct udphdr _hdr, *hp;
+ struct in6_addr taddr;
__be16 tport = 0;
struct sock *sk;
int l4proto;
memset(&taddr, 0, sizeof(taddr));
- if (!pkt->tprot_set) {
+ if (pkt->tprot != IPPROTO_TCP &&
+ pkt->tprot != IPPROTO_UDP) {
regs->verdict.code = NFT_BREAK;
return;
}
diff --git a/net/netfilter/x_tables.c b/net/netfilter/x_tables.c
index 84e58ee501a4..25524e393349 100644
--- a/net/netfilter/x_tables.c
+++ b/net/netfilter/x_tables.c
@@ -39,6 +39,20 @@ MODULE_DESCRIPTION("{ip,ip6,arp,eb}_tables backend module");
#define XT_PCPU_BLOCK_SIZE 4096
#define XT_MAX_TABLE_SIZE (512 * 1024 * 1024)
+struct xt_template {
+ struct list_head list;
+
+ /* called when table is needed in the given netns */
+ int (*table_init)(struct net *net);
+
+ struct module *me;
+
+ /* A unique name... */
+ char name[XT_TABLE_MAXNAMELEN];
+};
+
+static struct list_head xt_templates[NFPROTO_NUMPROTO];
+
struct xt_pernet {
struct list_head tables[NFPROTO_NUMPROTO];
};
@@ -1221,48 +1235,43 @@ struct xt_table *xt_find_table_lock(struct net *net, u_int8_t af,
const char *name)
{
struct xt_pernet *xt_net = net_generic(net, xt_pernet_id);
- struct xt_table *t, *found = NULL;
+ struct module *owner = NULL;
+ struct xt_template *tmpl;
+ struct xt_table *t;
mutex_lock(&xt[af].mutex);
list_for_each_entry(t, &xt_net->tables[af], list)
if (strcmp(t->name, name) == 0 && try_module_get(t->me))
return t;
- if (net == &init_net)
- goto out;
-
- /* Table doesn't exist in this netns, re-try init */
- xt_net = net_generic(&init_net, xt_pernet_id);
- list_for_each_entry(t, &xt_net->tables[af], list) {
+ /* Table doesn't exist in this netns, check larval list */
+ list_for_each_entry(tmpl, &xt_templates[af], list) {
int err;
- if (strcmp(t->name, name))
+ if (strcmp(tmpl->name, name))
continue;
- if (!try_module_get(t->me))
+ if (!try_module_get(tmpl->me))
goto out;
+
+ owner = tmpl->me;
+
mutex_unlock(&xt[af].mutex);
- err = t->table_init(net);
+ err = tmpl->table_init(net);
if (err < 0) {
- module_put(t->me);
+ module_put(owner);
return ERR_PTR(err);
}
- found = t;
-
mutex_lock(&xt[af].mutex);
break;
}
- if (!found)
- goto out;
-
- xt_net = net_generic(net, xt_pernet_id);
/* and once again: */
list_for_each_entry(t, &xt_net->tables[af], list)
if (strcmp(t->name, name) == 0)
return t;
- module_put(found->me);
+ module_put(owner);
out:
mutex_unlock(&xt[af].mutex);
return ERR_PTR(-ENOENT);
@@ -1749,6 +1758,58 @@ xt_hook_ops_alloc(const struct xt_table *table, nf_hookfn *fn)
}
EXPORT_SYMBOL_GPL(xt_hook_ops_alloc);
+int xt_register_template(const struct xt_table *table,
+ int (*table_init)(struct net *net))
+{
+ int ret = -EEXIST, af = table->af;
+ struct xt_template *t;
+
+ mutex_lock(&xt[af].mutex);
+
+ list_for_each_entry(t, &xt_templates[af], list) {
+ if (WARN_ON_ONCE(strcmp(table->name, t->name) == 0))
+ goto out_unlock;
+ }
+
+ ret = -ENOMEM;
+ t = kzalloc(sizeof(*t), GFP_KERNEL);
+ if (!t)
+ goto out_unlock;
+
+ BUILD_BUG_ON(sizeof(t->name) != sizeof(table->name));
+
+ strscpy(t->name, table->name, sizeof(t->name));
+ t->table_init = table_init;
+ t->me = table->me;
+ list_add(&t->list, &xt_templates[af]);
+ ret = 0;
+out_unlock:
+ mutex_unlock(&xt[af].mutex);
+ return ret;
+}
+EXPORT_SYMBOL_GPL(xt_register_template);
+
+void xt_unregister_template(const struct xt_table *table)
+{
+ struct xt_template *t;
+ int af = table->af;
+
+ mutex_lock(&xt[af].mutex);
+ list_for_each_entry(t, &xt_templates[af], list) {
+ if (strcmp(table->name, t->name))
+ continue;
+
+ list_del(&t->list);
+ mutex_unlock(&xt[af].mutex);
+ kfree(t);
+ return;
+ }
+
+ mutex_unlock(&xt[af].mutex);
+ WARN_ON_ONCE(1);
+}
+EXPORT_SYMBOL_GPL(xt_unregister_template);
+
int xt_proto_init(struct net *net, u_int8_t af)
{
#ifdef CONFIG_PROC_FS
@@ -1937,6 +1998,7 @@ static int __init xt_init(void)
#endif
INIT_LIST_HEAD(&xt[i].target);
INIT_LIST_HEAD(&xt[i].match);
+ INIT_LIST_HEAD(&xt_templates[i]);
}
rv = register_pernet_subsys(&xt_net_ops);
if (rv < 0)
diff --git a/net/netfilter/xt_AUDIT.c b/net/netfilter/xt_AUDIT.c
index 9cdc16b0d0d8..b6a015aee0ce 100644
--- a/net/netfilter/xt_AUDIT.c
+++ b/net/netfilter/xt_AUDIT.c
@@ -117,7 +117,7 @@ static int audit_tg_check(const struct xt_tgchk_param *par)
const struct xt_audit_info *info = par->targinfo;
if (info->type > XT_AUDIT_TYPE_MAX) {
- pr_info_ratelimited("Audit type out of range (valid range: 0..%hhu)\n",
+ pr_info_ratelimited("Audit type out of range (valid range: 0..%u)\n",
XT_AUDIT_TYPE_MAX);
return -ERANGE;
}
diff --git a/net/netfilter/xt_CT.c b/net/netfilter/xt_CT.c
index d4deee39158b..0a913ce07425 100644
--- a/net/netfilter/xt_CT.c
+++ b/net/netfilter/xt_CT.c
@@ -172,7 +172,6 @@ static int xt_ct_tg_check(const struct xt_tgchk_param *par,
goto err2;
}
- ret = 0;
if ((info->ct_events || info->exp_events) &&
!nf_ct_ecache_ext_add(ct, info->ct_events, info->exp_events,
GFP_KERNEL)) {
@@ -352,21 +351,10 @@ notrack_tg(struct sk_buff *skb, const struct xt_action_param *par)
return XT_CONTINUE;
}
-static int notrack_chk(const struct xt_tgchk_param *par)
-{
- if (!par->net->xt.notrack_deprecated_warning) {
- pr_info("netfilter: NOTRACK target is deprecated, "
- "use CT instead or upgrade iptables\n");
- par->net->xt.notrack_deprecated_warning = true;
- }
- return 0;
-}
-
static struct xt_target notrack_tg_reg __read_mostly = {
.name = "NOTRACK",
.revision = 0,
.family = NFPROTO_UNSPEC,
- .checkentry = notrack_chk,
.target = notrack_tg,
.table = "raw",
.me = THIS_MODULE,
diff --git a/net/netfilter/xt_bpf.c b/net/netfilter/xt_bpf.c
index 13cf3f9b5938..849ac552a154 100644
--- a/net/netfilter/xt_bpf.c
+++ b/net/netfilter/xt_bpf.c
@@ -90,7 +90,7 @@ static bool bpf_mt(const struct sk_buff *skb, struct xt_action_param *par)
{
const struct xt_bpf_info *info = par->matchinfo;
- return BPF_PROG_RUN(info->filter, skb);
+ return bpf_prog_run(info->filter, skb);
}
static bool bpf_mt_v1(const struct sk_buff *skb, struct xt_action_param *par)
diff --git a/net/netfilter/xt_limit.c b/net/netfilter/xt_limit.c
index 24d4afb9988d..8b4fd27857f2 100644
--- a/net/netfilter/xt_limit.c
+++ b/net/netfilter/xt_limit.c
@@ -8,16 +8,14 @@
#include <linux/slab.h>
#include <linux/module.h>
#include <linux/skbuff.h>
-#include <linux/spinlock.h>
#include <linux/interrupt.h>
#include <linux/netfilter/x_tables.h>
#include <linux/netfilter/xt_limit.h>
struct xt_limit_priv {
- spinlock_t lock;
unsigned long prev;
- uint32_t credit;
+ u32 credit;
};
MODULE_LICENSE("GPL");
@@ -66,22 +64,31 @@ limit_mt(const struct sk_buff *skb, struct xt_action_param *par)
{
const struct xt_rateinfo *r = par->matchinfo;
struct xt_limit_priv *priv = r->master;
- unsigned long now = jiffies;
-
- spin_lock_bh(&priv->lock);
- priv->credit += (now - xchg(&priv->prev, now)) * CREDITS_PER_JIFFY;
- if (priv->credit > r->credit_cap)
- priv->credit = r->credit_cap;
-
- if (priv->credit >= r->cost) {
- /* We're not limited. */
- priv->credit -= r->cost;
- spin_unlock_bh(&priv->lock);
- return true;
- }
-
- spin_unlock_bh(&priv->lock);
- return false;
+ unsigned long now;
+ u32 old_credit, new_credit, credit_increase = 0;
+ bool ret;
+
+ /* fastpath if there is nothing to update */
+ if ((READ_ONCE(priv->credit) < r->cost) && (READ_ONCE(priv->prev) == jiffies))
+ return false;
+
+ do {
+ now = jiffies;
+ credit_increase += (now - xchg(&priv->prev, now)) * CREDITS_PER_JIFFY;
+ old_credit = READ_ONCE(priv->credit);
+ new_credit = old_credit;
+ new_credit += credit_increase;
+ if (new_credit > r->credit_cap)
+ new_credit = r->credit_cap;
+ if (new_credit >= r->cost) {
+ ret = true;
+ new_credit -= r->cost;
+ } else {
+ ret = false;
+ }
+ } while (cmpxchg(&priv->credit, old_credit, new_credit) != old_credit);
+
+ return ret;
}
/* Precision saver. */
@@ -122,7 +129,6 @@ static int limit_mt_check(const struct xt_mtchk_param *par)
r->credit_cap = priv->credit; /* Credits full. */
r->cost = user2credits(r->avg);
}
- spin_lock_init(&priv->lock);
return 0;
}
diff --git a/net/netlabel/netlabel_calipso.c b/net/netlabel/netlabel_calipso.c
index f28c8947c730..91a19c3ea1a3 100644
--- a/net/netlabel/netlabel_calipso.c
+++ b/net/netlabel/netlabel_calipso.c
@@ -105,7 +105,7 @@ static int netlbl_calipso_add(struct sk_buff *skb, struct genl_info *info)
!info->attrs[NLBL_CALIPSO_A_MTYPE])
return -EINVAL;
- netlbl_netlink_auditinfo(skb, &audit_info);
+ netlbl_netlink_auditinfo(&audit_info);
switch (nla_get_u32(info->attrs[NLBL_CALIPSO_A_MTYPE])) {
case CALIPSO_MAP_PASS:
ret_val = netlbl_calipso_add_pass(info, &audit_info);
@@ -287,7 +287,7 @@ static int netlbl_calipso_remove(struct sk_buff *skb, struct genl_info *info)
if (!info->attrs[NLBL_CALIPSO_A_DOI])
return -EINVAL;
- netlbl_netlink_auditinfo(skb, &audit_info);
+ netlbl_netlink_auditinfo(&audit_info);
cb_arg.doi = nla_get_u32(info->attrs[NLBL_CALIPSO_A_DOI]);
cb_arg.audit_info = &audit_info;
ret_val = netlbl_domhsh_walk(&skip_bkt, &skip_chain,
diff --git a/net/netlabel/netlabel_cipso_v4.c b/net/netlabel/netlabel_cipso_v4.c
index 4f50a64315cf..894e6b8f1a86 100644
--- a/net/netlabel/netlabel_cipso_v4.c
+++ b/net/netlabel/netlabel_cipso_v4.c
@@ -144,8 +144,8 @@ static int netlbl_cipsov4_add_std(struct genl_info *info,
return -ENOMEM;
doi_def->map.std = kzalloc(sizeof(*doi_def->map.std), GFP_KERNEL);
if (doi_def->map.std == NULL) {
- ret_val = -ENOMEM;
- goto add_std_failure;
+ kfree(doi_def);
+ return -ENOMEM;
}
doi_def->type = CIPSO_V4_MAP_TRANS;
@@ -187,14 +187,14 @@ static int netlbl_cipsov4_add_std(struct genl_info *info,
}
doi_def->map.std->lvl.local = kcalloc(doi_def->map.std->lvl.local_size,
sizeof(u32),
- GFP_KERNEL);
+ GFP_KERNEL | __GFP_NOWARN);
if (doi_def->map.std->lvl.local == NULL) {
ret_val = -ENOMEM;
goto add_std_failure;
}
doi_def->map.std->lvl.cipso = kcalloc(doi_def->map.std->lvl.cipso_size,
sizeof(u32),
- GFP_KERNEL);
+ GFP_KERNEL | __GFP_NOWARN);
if (doi_def->map.std->lvl.cipso == NULL) {
ret_val = -ENOMEM;
goto add_std_failure;
@@ -263,7 +263,7 @@ static int netlbl_cipsov4_add_std(struct genl_info *info,
doi_def->map.std->cat.local = kcalloc(
doi_def->map.std->cat.local_size,
sizeof(u32),
- GFP_KERNEL);
+ GFP_KERNEL | __GFP_NOWARN);
if (doi_def->map.std->cat.local == NULL) {
ret_val = -ENOMEM;
goto add_std_failure;
@@ -271,7 +271,7 @@ static int netlbl_cipsov4_add_std(struct genl_info *info,
doi_def->map.std->cat.cipso = kcalloc(
doi_def->map.std->cat.cipso_size,
sizeof(u32),
- GFP_KERNEL);
+ GFP_KERNEL | __GFP_NOWARN);
if (doi_def->map.std->cat.cipso == NULL) {
ret_val = -ENOMEM;
goto add_std_failure;
@@ -410,7 +410,7 @@ static int netlbl_cipsov4_add(struct sk_buff *skb, struct genl_info *info)
!info->attrs[NLBL_CIPSOV4_A_MTYPE])
return -EINVAL;
- netlbl_netlink_auditinfo(skb, &audit_info);
+ netlbl_netlink_auditinfo(&audit_info);
switch (nla_get_u32(info->attrs[NLBL_CIPSOV4_A_MTYPE])) {
case CIPSO_V4_MAP_TRANS:
ret_val = netlbl_cipsov4_add_std(info, &audit_info);
@@ -709,7 +709,7 @@ static int netlbl_cipsov4_remove(struct sk_buff *skb, struct genl_info *info)
if (!info->attrs[NLBL_CIPSOV4_A_DOI])
return -EINVAL;
- netlbl_netlink_auditinfo(skb, &audit_info);
+ netlbl_netlink_auditinfo(&audit_info);
cb_arg.doi = nla_get_u32(info->attrs[NLBL_CIPSOV4_A_DOI]);
cb_arg.audit_info = &audit_info;
ret_val = netlbl_domhsh_walk(&skip_bkt, &skip_chain,
diff --git a/net/netlabel/netlabel_domainhash.c b/net/netlabel/netlabel_domainhash.c
index dc8c39f51f7d..8158a25972b4 100644
--- a/net/netlabel/netlabel_domainhash.c
+++ b/net/netlabel/netlabel_domainhash.c
@@ -929,7 +929,7 @@ struct netlbl_dommap_def *netlbl_domhsh_getentry_af6(const char *domain,
* @cb_arg: argument for the callback function
*
* Description:
- * Interate over the domain mapping hash table, skipping the first @skip_bkt
+ * Iterate over the domain mapping hash table, skipping the first @skip_bkt
* buckets and @skip_chain entries. For each entry in the table call
* @callback, if @callback returns a negative value stop 'walking' through the
* table and return. Updates the values in @skip_bkt and @skip_chain on
diff --git a/net/netlabel/netlabel_kapi.c b/net/netlabel/netlabel_kapi.c
index 5e1239cef000..beb0e573266d 100644
--- a/net/netlabel/netlabel_kapi.c
+++ b/net/netlabel/netlabel_kapi.c
@@ -719,7 +719,7 @@ int netlbl_catmap_walkrng(struct netlbl_lsm_catmap *catmap, u32 offset)
* it in @bitmap. The @offset must be aligned to an unsigned long and will be
* updated on return if different from what was requested; if the catmap is
* empty at the requested offset and beyond, the @offset is set to (u32)-1.
- * Returns zero on sucess, negative values on failure.
+ * Returns zero on success, negative values on failure.
*
*/
int netlbl_catmap_getlong(struct netlbl_lsm_catmap *catmap,
diff --git a/net/netlabel/netlabel_mgmt.c b/net/netlabel/netlabel_mgmt.c
index ca52f5085989..032b7d7b32c7 100644
--- a/net/netlabel/netlabel_mgmt.c
+++ b/net/netlabel/netlabel_mgmt.c
@@ -76,6 +76,7 @@ static const struct nla_policy netlbl_mgmt_genl_policy[NLBL_MGMT_A_MAX + 1] = {
static int netlbl_mgmt_add_common(struct genl_info *info,
struct netlbl_audit *audit_info)
{
+ void *pmap = NULL;
int ret_val = -EINVAL;
struct netlbl_domaddr_map *addrmap = NULL;
struct cipso_v4_doi *cipsov4 = NULL;
@@ -175,6 +176,7 @@ static int netlbl_mgmt_add_common(struct genl_info *info,
ret_val = -ENOMEM;
goto add_free_addrmap;
}
+ pmap = map;
map->list.addr = addr->s_addr & mask->s_addr;
map->list.mask = mask->s_addr;
map->list.valid = 1;
@@ -183,10 +185,8 @@ static int netlbl_mgmt_add_common(struct genl_info *info,
map->def.cipso = cipsov4;
ret_val = netlbl_af4list_add(&map->list, &addrmap->list4);
- if (ret_val != 0) {
- kfree(map);
- goto add_free_addrmap;
- }
+ if (ret_val != 0)
+ goto add_free_map;
entry->family = AF_INET;
entry->def.type = NETLBL_NLTYPE_ADDRSELECT;
@@ -223,6 +223,7 @@ static int netlbl_mgmt_add_common(struct genl_info *info,
ret_val = -ENOMEM;
goto add_free_addrmap;
}
+ pmap = map;
map->list.addr = *addr;
map->list.addr.s6_addr32[0] &= mask->s6_addr32[0];
map->list.addr.s6_addr32[1] &= mask->s6_addr32[1];
@@ -235,10 +236,8 @@ static int netlbl_mgmt_add_common(struct genl_info *info,
map->def.calipso = calipso;
ret_val = netlbl_af6list_add(&map->list, &addrmap->list6);
- if (ret_val != 0) {
- kfree(map);
- goto add_free_addrmap;
- }
+ if (ret_val != 0)
+ goto add_free_map;
entry->family = AF_INET6;
entry->def.type = NETLBL_NLTYPE_ADDRSELECT;
@@ -248,10 +247,12 @@ static int netlbl_mgmt_add_common(struct genl_info *info,
ret_val = netlbl_domhsh_add(entry, audit_info);
if (ret_val != 0)
- goto add_free_addrmap;
+ goto add_free_map;
return 0;
+add_free_map:
+ kfree(pmap);
add_free_addrmap:
kfree(addrmap);
add_doi_put_def:
@@ -434,7 +435,7 @@ static int netlbl_mgmt_add(struct sk_buff *skb, struct genl_info *info)
(info->attrs[NLBL_MGMT_A_IPV6MASK] != NULL)))
return -EINVAL;
- netlbl_netlink_auditinfo(skb, &audit_info);
+ netlbl_netlink_auditinfo(&audit_info);
return netlbl_mgmt_add_common(info, &audit_info);
}
@@ -457,7 +458,7 @@ static int netlbl_mgmt_remove(struct sk_buff *skb, struct genl_info *info)
if (!info->attrs[NLBL_MGMT_A_DOMAIN])
return -EINVAL;
- netlbl_netlink_auditinfo(skb, &audit_info);
+ netlbl_netlink_auditinfo(&audit_info);
domain = nla_data(info->attrs[NLBL_MGMT_A_DOMAIN]);
return netlbl_domhsh_remove(domain, AF_UNSPEC, &audit_info);
@@ -557,7 +558,7 @@ static int netlbl_mgmt_adddef(struct sk_buff *skb, struct genl_info *info)
(info->attrs[NLBL_MGMT_A_IPV6MASK] != NULL)))
return -EINVAL;
- netlbl_netlink_auditinfo(skb, &audit_info);
+ netlbl_netlink_auditinfo(&audit_info);
return netlbl_mgmt_add_common(info, &audit_info);
}
@@ -576,7 +577,7 @@ static int netlbl_mgmt_removedef(struct sk_buff *skb, struct genl_info *info)
{
struct netlbl_audit audit_info;
- netlbl_netlink_auditinfo(skb, &audit_info);
+ netlbl_netlink_auditinfo(&audit_info);
return netlbl_domhsh_remove_default(AF_UNSPEC, &audit_info);
}
diff --git a/net/netlabel/netlabel_unlabeled.c b/net/netlabel/netlabel_unlabeled.c
index 3e6ac9b790b1..566ba4397ee4 100644
--- a/net/netlabel/netlabel_unlabeled.c
+++ b/net/netlabel/netlabel_unlabeled.c
@@ -492,8 +492,7 @@ static int netlbl_unlhsh_remove_addr4(struct net *net,
netlbl_af4list_audit_addr(audit_buf, 1,
(dev != NULL ? dev->name : NULL),
addr->s_addr, mask->s_addr);
- if (dev != NULL)
- dev_put(dev);
+ dev_put(dev);
if (entry != NULL &&
security_secid_to_secctx(entry->secid,
&secctx, &secctx_len) == 0) {
@@ -553,8 +552,7 @@ static int netlbl_unlhsh_remove_addr6(struct net *net,
netlbl_af6list_audit_addr(audit_buf, 1,
(dev != NULL ? dev->name : NULL),
addr, mask);
- if (dev != NULL)
- dev_put(dev);
+ dev_put(dev);
if (entry != NULL &&
security_secid_to_secctx(entry->secid,
&secctx, &secctx_len) == 0) {
@@ -814,7 +812,7 @@ static int netlbl_unlabel_accept(struct sk_buff *skb, struct genl_info *info)
if (info->attrs[NLBL_UNLABEL_A_ACPTFLG]) {
value = nla_get_u8(info->attrs[NLBL_UNLABEL_A_ACPTFLG]);
if (value == 1 || value == 0) {
- netlbl_netlink_auditinfo(skb, &audit_info);
+ netlbl_netlink_auditinfo(&audit_info);
netlbl_unlabel_acceptflg_set(value, &audit_info);
return 0;
}
@@ -897,7 +895,7 @@ static int netlbl_unlabel_staticadd(struct sk_buff *skb,
!info->attrs[NLBL_UNLABEL_A_IPV6MASK])))
return -EINVAL;
- netlbl_netlink_auditinfo(skb, &audit_info);
+ netlbl_netlink_auditinfo(&audit_info);
ret_val = netlbl_unlabel_addrinfo_get(info, &addr, &mask, &addr_len);
if (ret_val != 0)
@@ -947,7 +945,7 @@ static int netlbl_unlabel_staticadddef(struct sk_buff *skb,
!info->attrs[NLBL_UNLABEL_A_IPV6MASK])))
return -EINVAL;
- netlbl_netlink_auditinfo(skb, &audit_info);
+ netlbl_netlink_auditinfo(&audit_info);
ret_val = netlbl_unlabel_addrinfo_get(info, &addr, &mask, &addr_len);
if (ret_val != 0)
@@ -994,7 +992,7 @@ static int netlbl_unlabel_staticremove(struct sk_buff *skb,
!info->attrs[NLBL_UNLABEL_A_IPV6MASK])))
return -EINVAL;
- netlbl_netlink_auditinfo(skb, &audit_info);
+ netlbl_netlink_auditinfo(&audit_info);
ret_val = netlbl_unlabel_addrinfo_get(info, &addr, &mask, &addr_len);
if (ret_val != 0)
@@ -1034,7 +1032,7 @@ static int netlbl_unlabel_staticremovedef(struct sk_buff *skb,
!info->attrs[NLBL_UNLABEL_A_IPV6MASK])))
return -EINVAL;
- netlbl_netlink_auditinfo(skb, &audit_info);
+ netlbl_netlink_auditinfo(&audit_info);
ret_val = netlbl_unlabel_addrinfo_get(info, &addr, &mask, &addr_len);
if (ret_val != 0)
diff --git a/net/netlabel/netlabel_user.h b/net/netlabel/netlabel_user.h
index b9ba8112b3c5..6190cbf94bf0 100644
--- a/net/netlabel/netlabel_user.h
+++ b/net/netlabel/netlabel_user.h
@@ -28,11 +28,9 @@
/**
* netlbl_netlink_auditinfo - Fetch the audit information from a NETLINK msg
- * @skb: the packet
* @audit_info: NetLabel audit information
*/
-static inline void netlbl_netlink_auditinfo(struct sk_buff *skb,
- struct netlbl_audit *audit_info)
+static inline void netlbl_netlink_auditinfo(struct netlbl_audit *audit_info)
{
security_task_getsecid_subj(current, &audit_info->secid);
audit_info->loginuid = audit_get_loginuid(current);
diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c
index 6133e412b948..24b7cf447bc5 100644
--- a/net/netlink/af_netlink.c
+++ b/net/netlink/af_netlink.c
@@ -351,7 +351,7 @@ static void netlink_overrun(struct sock *sk)
if (!test_and_set_bit(NETLINK_S_CONGESTED,
&nlk_sk(sk)->state)) {
sk->sk_err = ENOBUFS;
- sk->sk_error_report(sk);
+ sk_error_report(sk);
}
}
atomic_inc(&sk->sk_drops);
@@ -1576,7 +1576,7 @@ static int do_one_set_err(struct sock *sk, struct netlink_set_err_data *p)
}
sk->sk_err = p->code;
- sk->sk_error_report(sk);
+ sk_error_report(sk);
out:
return ret;
}
@@ -2012,7 +2012,7 @@ static int netlink_recvmsg(struct socket *sock, struct msghdr *msg, size_t len,
ret = netlink_dump(sk);
if (ret) {
sk->sk_err = -ret;
- sk->sk_error_report(sk);
+ sk_error_report(sk);
}
}
@@ -2439,7 +2439,7 @@ void netlink_ack(struct sk_buff *in_skb, struct nlmsghdr *nlh, int err,
skb = nlmsg_new(payload + tlvlen, GFP_KERNEL);
if (!skb) {
NETLINK_CB(in_skb).sk->sk_err = ENOBUFS;
- NETLINK_CB(in_skb).sk->sk_error_report(NETLINK_CB(in_skb).sk);
+ sk_error_report(NETLINK_CB(in_skb).sk);
return;
}
@@ -2471,7 +2471,7 @@ void netlink_ack(struct sk_buff *in_skb, struct nlmsghdr *nlh, int err,
nlmsg_end(skb, rep);
- netlink_unicast(in_skb->sk, skb, NETLINK_CB(in_skb).portid, MSG_DONTWAIT);
+ nlmsg_unicast(in_skb->sk, skb, NETLINK_CB(in_skb).portid);
}
EXPORT_SYMBOL(netlink_ack);
@@ -2545,13 +2545,15 @@ int nlmsg_notify(struct sock *sk, struct sk_buff *skb, u32 portid,
/* errors reported via destination sk->sk_err, but propagate
* delivery errors if NETLINK_BROADCAST_ERROR flag is set */
err = nlmsg_multicast(sk, skb, exclude_portid, group, flags);
+ if (err == -ESRCH)
+ err = 0;
}
if (report) {
int err2;
err2 = nlmsg_unicast(sk, skb, portid);
- if (!err || err == -ESRCH)
+ if (!err)
err = err2;
}
diff --git a/net/netlink/genetlink.c b/net/netlink/genetlink.c
index 2d6fdf40df66..1afca2a6c2ac 100644
--- a/net/netlink/genetlink.c
+++ b/net/netlink/genetlink.c
@@ -40,14 +40,6 @@ void genl_unlock(void)
}
EXPORT_SYMBOL(genl_unlock);
-#ifdef CONFIG_LOCKDEP
-bool lockdep_genl_is_held(void)
-{
- return lockdep_is_held(&genl_mutex);
-}
-EXPORT_SYMBOL(lockdep_genl_is_held);
-#endif
-
static void genl_lock_all(void)
{
down_write(&cb_lock);
@@ -1485,6 +1477,7 @@ int genlmsg_multicast_allns(const struct genl_family *family,
{
if (WARN_ON_ONCE(group >= family->n_mcgrps))
return -EINVAL;
+
group = family->mcgrp_offset + group;
return genlmsg_mcast(skb, portid, group, flags);
}
@@ -1495,14 +1488,12 @@ void genl_notify(const struct genl_family *family, struct sk_buff *skb,
{
struct net *net = genl_info_net(info);
struct sock *sk = net->genl_sock;
- int report = 0;
-
- if (info->nlhdr)
- report = nlmsg_report(info->nlhdr);
if (WARN_ON_ONCE(group >= family->n_mcgrps))
return;
+
group = family->mcgrp_offset + group;
- nlmsg_notify(sk, skb, info->snd_portid, group, report, flags);
+ nlmsg_notify(sk, skb, info->snd_portid, group,
+ nlmsg_report(info->nlhdr), flags);
}
EXPORT_SYMBOL(genl_notify);
diff --git a/net/netrom/nr_loopback.c b/net/netrom/nr_loopback.c
index a880dd33e901..511819fbfa67 100644
--- a/net/netrom/nr_loopback.c
+++ b/net/netrom/nr_loopback.c
@@ -59,8 +59,7 @@ static void nr_loopback_timer(struct timer_list *unused)
if (dev == NULL || nr_rx_frame(skb, dev) == 0)
kfree_skb(skb);
- if (dev != NULL)
- dev_put(dev);
+ dev_put(dev);
if (!skb_queue_empty(&loopback_queue) && !nr_loopback_running())
mod_timer(&loopback_timer, jiffies + 10);
diff --git a/net/netrom/nr_route.c b/net/netrom/nr_route.c
index 78da5eab252a..ddd5cbd455e3 100644
--- a/net/netrom/nr_route.c
+++ b/net/netrom/nr_route.c
@@ -266,6 +266,7 @@ static int __must_check nr_add_node(ax25_address *nr, const char *mnemonic,
fallthrough;
case 2:
re_sort_routes(nr_node, 0, 1);
+ break;
case 1:
break;
}
@@ -359,6 +360,7 @@ static int nr_del_node(ax25_address *callsign, ax25_address *neighbour, struct n
fallthrough;
case 1:
nr_node->routes[1] = nr_node->routes[2];
+ fallthrough;
case 2:
break;
}
@@ -482,6 +484,7 @@ static int nr_dec_obs(void)
fallthrough;
case 1:
s->routes[1] = s->routes[2];
+ break;
case 2:
break;
}
@@ -529,6 +532,7 @@ void nr_rt_device_down(struct net_device *dev)
fallthrough;
case 1:
t->routes[1] = t->routes[2];
+ break;
case 2:
break;
}
@@ -578,8 +582,7 @@ struct net_device *nr_dev_first(void)
if (first == NULL || strncmp(dev->name, first->name, 3) < 0)
first = dev;
}
- if (first)
- dev_hold(first);
+ dev_hold(first);
rcu_read_unlock();
return first;
diff --git a/net/netrom/nr_timer.c b/net/netrom/nr_timer.c
index 9115f8a7dd45..a8da88db7893 100644
--- a/net/netrom/nr_timer.c
+++ b/net/netrom/nr_timer.c
@@ -121,11 +121,9 @@ static void nr_heartbeat_expiry(struct timer_list *t)
is accepted() it isn't 'dead' so doesn't get removed. */
if (sock_flag(sk, SOCK_DESTROY) ||
(sk->sk_state == TCP_LISTEN && sock_flag(sk, SOCK_DEAD))) {
- sock_hold(sk);
bh_unlock_sock(sk);
nr_destroy_socket(sk);
- sock_put(sk);
- return;
+ goto out;
}
break;
@@ -146,6 +144,8 @@ static void nr_heartbeat_expiry(struct timer_list *t)
nr_start_heartbeat(sk);
bh_unlock_sock(sk);
+out:
+ sock_put(sk);
}
static void nr_t2timer_expiry(struct timer_list *t)
@@ -159,6 +159,7 @@ static void nr_t2timer_expiry(struct timer_list *t)
nr_enquiry_response(sk);
}
bh_unlock_sock(sk);
+ sock_put(sk);
}
static void nr_t4timer_expiry(struct timer_list *t)
@@ -169,6 +170,7 @@ static void nr_t4timer_expiry(struct timer_list *t)
bh_lock_sock(sk);
nr_sk(sk)->condition &= ~NR_COND_PEER_RX_BUSY;
bh_unlock_sock(sk);
+ sock_put(sk);
}
static void nr_idletimer_expiry(struct timer_list *t)
@@ -197,6 +199,7 @@ static void nr_idletimer_expiry(struct timer_list *t)
sock_set_flag(sk, SOCK_DEAD);
}
bh_unlock_sock(sk);
+ sock_put(sk);
}
static void nr_t1timer_expiry(struct timer_list *t)
@@ -209,8 +212,7 @@ static void nr_t1timer_expiry(struct timer_list *t)
case NR_STATE_1:
if (nr->n2count == nr->n2) {
nr_disconnect(sk, ETIMEDOUT);
- bh_unlock_sock(sk);
- return;
+ goto out;
} else {
nr->n2count++;
nr_write_internal(sk, NR_CONNREQ);
@@ -220,8 +222,7 @@ static void nr_t1timer_expiry(struct timer_list *t)
case NR_STATE_2:
if (nr->n2count == nr->n2) {
nr_disconnect(sk, ETIMEDOUT);
- bh_unlock_sock(sk);
- return;
+ goto out;
} else {
nr->n2count++;
nr_write_internal(sk, NR_DISCREQ);
@@ -231,8 +232,7 @@ static void nr_t1timer_expiry(struct timer_list *t)
case NR_STATE_3:
if (nr->n2count == nr->n2) {
nr_disconnect(sk, ETIMEDOUT);
- bh_unlock_sock(sk);
- return;
+ goto out;
} else {
nr->n2count++;
nr_requeue_frames(sk);
@@ -241,5 +241,7 @@ static void nr_t1timer_expiry(struct timer_list *t)
}
nr_start_t1timer(sk);
+out:
bh_unlock_sock(sk);
+ sock_put(sk);
}
diff --git a/net/nfc/af_nfc.c b/net/nfc/af_nfc.c
index 4a9e72073564..6024fad905ff 100644
--- a/net/nfc/af_nfc.c
+++ b/net/nfc/af_nfc.c
@@ -79,7 +79,7 @@ int __init af_nfc_init(void)
return sock_register(&nfc_sock_family_ops);
}
-void af_nfc_exit(void)
+void __exit af_nfc_exit(void)
{
sock_unregister(PF_NFC);
}
diff --git a/net/nfc/core.c b/net/nfc/core.c
index 573c80c6ff7a..3c645c1d99c9 100644
--- a/net/nfc/core.c
+++ b/net/nfc/core.c
@@ -636,7 +636,7 @@ error:
return rc;
}
-int nfc_set_remote_general_bytes(struct nfc_dev *dev, u8 *gb, u8 gb_len)
+int nfc_set_remote_general_bytes(struct nfc_dev *dev, const u8 *gb, u8 gb_len)
{
pr_debug("dev_name=%s gb_len=%d\n", dev_name(&dev->dev), gb_len);
@@ -665,7 +665,7 @@ int nfc_tm_data_received(struct nfc_dev *dev, struct sk_buff *skb)
EXPORT_SYMBOL(nfc_tm_data_received);
int nfc_tm_activated(struct nfc_dev *dev, u32 protocol, u8 comm_mode,
- u8 *gb, size_t gb_len)
+ const u8 *gb, size_t gb_len)
{
int rc;
@@ -824,7 +824,7 @@ EXPORT_SYMBOL(nfc_targets_found);
*/
int nfc_target_lost(struct nfc_dev *dev, u32 target_idx)
{
- struct nfc_target *tg;
+ const struct nfc_target *tg;
int i;
pr_debug("dev_name %s n_target %d\n", dev_name(&dev->dev), target_idx);
@@ -1048,7 +1048,7 @@ struct nfc_dev *nfc_get_device(unsigned int idx)
* @tx_headroom: reserved space at beginning of skb
* @tx_tailroom: reserved space at end of skb
*/
-struct nfc_dev *nfc_allocate_device(struct nfc_ops *ops,
+struct nfc_dev *nfc_allocate_device(const struct nfc_ops *ops,
u32 supported_protocols,
int tx_headroom, int tx_tailroom)
{
diff --git a/net/nfc/digital_core.c b/net/nfc/digital_core.c
index 5044c7db577e..fefc03674f4f 100644
--- a/net/nfc/digital_core.c
+++ b/net/nfc/digital_core.c
@@ -732,7 +732,7 @@ exit:
return rc;
}
-static struct nfc_ops digital_nfc_ops = {
+static const struct nfc_ops digital_nfc_ops = {
.dev_up = digital_dev_up,
.dev_down = digital_dev_down,
.start_poll = digital_start_poll,
@@ -745,7 +745,7 @@ static struct nfc_ops digital_nfc_ops = {
.im_transceive = digital_in_send,
};
-struct nfc_digital_dev *nfc_digital_allocate_device(struct nfc_digital_ops *ops,
+struct nfc_digital_dev *nfc_digital_allocate_device(const struct nfc_digital_ops *ops,
__u32 supported_protocols,
__u32 driver_capabilities,
int tx_headroom, int tx_tailroom)
diff --git a/net/nfc/hci/command.c b/net/nfc/hci/command.c
index e02b9befce0b..3a89bd9b89fc 100644
--- a/net/nfc/hci/command.c
+++ b/net/nfc/hci/command.c
@@ -34,7 +34,7 @@ static int nfc_hci_execute_cmd_async(struct nfc_hci_dev *hdev, u8 pipe, u8 cmd,
* HCI command execution completion callback.
* err will be a standard linux error (may be converted from HCI response)
* skb contains the response data and must be disposed, or may be NULL if
- * an error occured
+ * an error occurred
*/
static void nfc_hci_execute_cb(void *context, struct sk_buff *skb, int err)
{
diff --git a/net/nfc/hci/core.c b/net/nfc/hci/core.c
index 43811b5219b5..ceb87db57cdb 100644
--- a/net/nfc/hci/core.c
+++ b/net/nfc/hci/core.c
@@ -128,7 +128,7 @@ static void nfc_hci_msg_rx_work(struct work_struct *work)
struct nfc_hci_dev *hdev = container_of(work, struct nfc_hci_dev,
msg_rx_work);
struct sk_buff *skb;
- struct hcp_message *message;
+ const struct hcp_message *message;
u8 pipe;
u8 type;
u8 instruction;
@@ -182,9 +182,9 @@ void nfc_hci_cmd_received(struct nfc_hci_dev *hdev, u8 pipe, u8 cmd,
struct sk_buff *skb)
{
u8 status = NFC_HCI_ANY_OK;
- struct hci_create_pipe_resp *create_info;
- struct hci_delete_pipe_noti *delete_info;
- struct hci_all_pipe_cleared_noti *cleared_info;
+ const struct hci_create_pipe_resp *create_info;
+ const struct hci_delete_pipe_noti *delete_info;
+ const struct hci_all_pipe_cleared_noti *cleared_info;
u8 gate;
pr_debug("from pipe %x cmd %x\n", pipe, cmd);
@@ -447,7 +447,7 @@ static void nfc_hci_cmd_timeout(struct timer_list *t)
}
static int hci_dev_connect_gates(struct nfc_hci_dev *hdev, u8 gate_count,
- struct nfc_hci_gate *gates)
+ const struct nfc_hci_gate *gates)
{
int r;
while (gate_count--) {
@@ -705,7 +705,7 @@ static void hci_transceive_cb(void *context, struct sk_buff *skb, int err)
/*
* TODO: Check RF Error indicator to make sure data is valid.
* It seems that HCI cmd can complete without error, but data
- * can be invalid if an RF error occured? Ignore for now.
+ * can be invalid if an RF error occurred? Ignore for now.
*/
if (err == 0)
skb_trim(skb, skb->len - 1); /* RF Err ind */
@@ -928,7 +928,7 @@ static int hci_fw_download(struct nfc_dev *nfc_dev, const char *firmware_name)
return hdev->ops->fw_download(hdev, firmware_name);
}
-static struct nfc_ops hci_nfc_ops = {
+static const struct nfc_ops hci_nfc_ops = {
.dev_up = hci_dev_up,
.dev_down = hci_dev_down,
.start_poll = hci_start_poll,
@@ -947,7 +947,7 @@ static struct nfc_ops hci_nfc_ops = {
.se_io = hci_se_io,
};
-struct nfc_hci_dev *nfc_hci_allocate_device(struct nfc_hci_ops *ops,
+struct nfc_hci_dev *nfc_hci_allocate_device(const struct nfc_hci_ops *ops,
struct nfc_hci_init_data *init_data,
unsigned long quirks,
u32 protocols,
diff --git a/net/nfc/hci/llc.c b/net/nfc/hci/llc.c
index 6ab40ea17662..2140f6724644 100644
--- a/net/nfc/hci/llc.c
+++ b/net/nfc/hci/llc.c
@@ -11,7 +11,7 @@
static LIST_HEAD(llc_engines);
-int nfc_llc_init(void)
+int __init nfc_llc_init(void)
{
int r;
@@ -41,7 +41,7 @@ void nfc_llc_exit(void)
}
}
-int nfc_llc_register(const char *name, struct nfc_llc_ops *ops)
+int nfc_llc_register(const char *name, const struct nfc_llc_ops *ops)
{
struct nfc_llc_engine *llc_engine;
diff --git a/net/nfc/hci/llc.h b/net/nfc/hci/llc.h
index 823ddb621e5d..d66271d211a5 100644
--- a/net/nfc/hci/llc.h
+++ b/net/nfc/hci/llc.h
@@ -26,20 +26,20 @@ struct nfc_llc_ops {
struct nfc_llc_engine {
const char *name;
- struct nfc_llc_ops *ops;
+ const struct nfc_llc_ops *ops;
struct list_head entry;
};
struct nfc_llc {
void *data;
- struct nfc_llc_ops *ops;
+ const struct nfc_llc_ops *ops;
int rx_headroom;
int rx_tailroom;
};
void *nfc_llc_get_data(struct nfc_llc *llc);
-int nfc_llc_register(const char *name, struct nfc_llc_ops *ops);
+int nfc_llc_register(const char *name, const struct nfc_llc_ops *ops);
void nfc_llc_unregister(const char *name);
int nfc_llc_nop_register(void);
diff --git a/net/nfc/hci/llc_nop.c b/net/nfc/hci/llc_nop.c
index a42852f36f2e..a58716f16954 100644
--- a/net/nfc/hci/llc_nop.c
+++ b/net/nfc/hci/llc_nop.c
@@ -71,7 +71,7 @@ static int llc_nop_xmit_from_hci(struct nfc_llc *llc, struct sk_buff *skb)
return llc_nop->xmit_to_drv(llc_nop->hdev, skb);
}
-static struct nfc_llc_ops llc_nop_ops = {
+static const struct nfc_llc_ops llc_nop_ops = {
.init = llc_nop_init,
.deinit = llc_nop_deinit,
.start = llc_nop_start,
diff --git a/net/nfc/hci/llc_shdlc.c b/net/nfc/hci/llc_shdlc.c
index c0c8fea3a186..aef750d7787c 100644
--- a/net/nfc/hci/llc_shdlc.c
+++ b/net/nfc/hci/llc_shdlc.c
@@ -123,7 +123,7 @@ static bool llc_shdlc_x_lteq_y_lt_z(int x, int y, int z)
return ((y >= x) || (y < z)) ? true : false;
}
-static struct sk_buff *llc_shdlc_alloc_skb(struct llc_shdlc *shdlc,
+static struct sk_buff *llc_shdlc_alloc_skb(const struct llc_shdlc *shdlc,
int payload_len)
{
struct sk_buff *skb;
@@ -137,7 +137,7 @@ static struct sk_buff *llc_shdlc_alloc_skb(struct llc_shdlc *shdlc,
}
/* immediately sends an S frame. */
-static int llc_shdlc_send_s_frame(struct llc_shdlc *shdlc,
+static int llc_shdlc_send_s_frame(const struct llc_shdlc *shdlc,
enum sframe_type sframe_type, int nr)
{
int r;
@@ -159,7 +159,7 @@ static int llc_shdlc_send_s_frame(struct llc_shdlc *shdlc,
}
/* immediately sends an U frame. skb may contain optional payload */
-static int llc_shdlc_send_u_frame(struct llc_shdlc *shdlc,
+static int llc_shdlc_send_u_frame(const struct llc_shdlc *shdlc,
struct sk_buff *skb,
enum uframe_modifier uframe_modifier)
{
@@ -361,7 +361,7 @@ static void llc_shdlc_connect_complete(struct llc_shdlc *shdlc, int r)
wake_up(shdlc->connect_wq);
}
-static int llc_shdlc_connect_initiate(struct llc_shdlc *shdlc)
+static int llc_shdlc_connect_initiate(const struct llc_shdlc *shdlc)
{
struct sk_buff *skb;
@@ -377,7 +377,7 @@ static int llc_shdlc_connect_initiate(struct llc_shdlc *shdlc)
return llc_shdlc_send_u_frame(shdlc, skb, U_FRAME_RSET);
}
-static int llc_shdlc_connect_send_ua(struct llc_shdlc *shdlc)
+static int llc_shdlc_connect_send_ua(const struct llc_shdlc *shdlc)
{
struct sk_buff *skb;
@@ -406,7 +406,7 @@ static void llc_shdlc_rcv_u_frame(struct llc_shdlc *shdlc,
case SHDLC_NEGOTIATING:
case SHDLC_CONNECTING:
/*
- * We sent RSET, but chip wants to negociate or we
+ * We sent RSET, but chip wants to negotiate or we
* got RSET before we managed to send out our.
*/
if (skb->len > 0)
@@ -820,7 +820,7 @@ static int llc_shdlc_xmit_from_hci(struct nfc_llc *llc, struct sk_buff *skb)
return 0;
}
-static struct nfc_llc_ops llc_shdlc_ops = {
+static const struct nfc_llc_ops llc_shdlc_ops = {
.init = llc_shdlc_init,
.deinit = llc_shdlc_deinit,
.start = llc_shdlc_start,
diff --git a/net/nfc/llcp.h b/net/nfc/llcp.h
index 97853c9cefc7..d49d4bf2e37c 100644
--- a/net/nfc/llcp.h
+++ b/net/nfc/llcp.h
@@ -221,15 +221,15 @@ struct sock *nfc_llcp_accept_dequeue(struct sock *sk, struct socket *newsock);
/* TLV API */
int nfc_llcp_parse_gb_tlv(struct nfc_llcp_local *local,
- u8 *tlv_array, u16 tlv_array_len);
+ const u8 *tlv_array, u16 tlv_array_len);
int nfc_llcp_parse_connection_tlv(struct nfc_llcp_sock *sock,
- u8 *tlv_array, u16 tlv_array_len);
+ const u8 *tlv_array, u16 tlv_array_len);
/* Commands API */
void nfc_llcp_recv(void *data, struct sk_buff *skb, int err);
-u8 *nfc_llcp_build_tlv(u8 type, u8 *value, u8 value_length, u8 *tlv_length);
+u8 *nfc_llcp_build_tlv(u8 type, const u8 *value, u8 value_length, u8 *tlv_length);
struct nfc_llcp_sdp_tlv *nfc_llcp_build_sdres_tlv(u8 tid, u8 sap);
-struct nfc_llcp_sdp_tlv *nfc_llcp_build_sdreq_tlv(u8 tid, char *uri,
+struct nfc_llcp_sdp_tlv *nfc_llcp_build_sdreq_tlv(u8 tid, const char *uri,
size_t uri_len);
void nfc_llcp_free_sdp_tlv(struct nfc_llcp_sdp_tlv *sdp);
void nfc_llcp_free_sdp_tlv_list(struct hlist_head *sdp_head);
diff --git a/net/nfc/llcp_commands.c b/net/nfc/llcp_commands.c
index 475061c79c44..3c4172a5aeb5 100644
--- a/net/nfc/llcp_commands.c
+++ b/net/nfc/llcp_commands.c
@@ -15,7 +15,7 @@
#include "nfc.h"
#include "llcp.h"
-static u8 llcp_tlv_length[LLCP_TLV_MAX] = {
+static const u8 llcp_tlv_length[LLCP_TLV_MAX] = {
0,
1, /* VERSION */
2, /* MIUX */
@@ -29,7 +29,7 @@ static u8 llcp_tlv_length[LLCP_TLV_MAX] = {
};
-static u8 llcp_tlv8(u8 *tlv, u8 type)
+static u8 llcp_tlv8(const u8 *tlv, u8 type)
{
if (tlv[0] != type || tlv[1] != llcp_tlv_length[tlv[0]])
return 0;
@@ -37,7 +37,7 @@ static u8 llcp_tlv8(u8 *tlv, u8 type)
return tlv[2];
}
-static u16 llcp_tlv16(u8 *tlv, u8 type)
+static u16 llcp_tlv16(const u8 *tlv, u8 type)
{
if (tlv[0] != type || tlv[1] != llcp_tlv_length[tlv[0]])
return 0;
@@ -46,37 +46,37 @@ static u16 llcp_tlv16(u8 *tlv, u8 type)
}
-static u8 llcp_tlv_version(u8 *tlv)
+static u8 llcp_tlv_version(const u8 *tlv)
{
return llcp_tlv8(tlv, LLCP_TLV_VERSION);
}
-static u16 llcp_tlv_miux(u8 *tlv)
+static u16 llcp_tlv_miux(const u8 *tlv)
{
return llcp_tlv16(tlv, LLCP_TLV_MIUX) & 0x7ff;
}
-static u16 llcp_tlv_wks(u8 *tlv)
+static u16 llcp_tlv_wks(const u8 *tlv)
{
return llcp_tlv16(tlv, LLCP_TLV_WKS);
}
-static u16 llcp_tlv_lto(u8 *tlv)
+static u16 llcp_tlv_lto(const u8 *tlv)
{
return llcp_tlv8(tlv, LLCP_TLV_LTO);
}
-static u8 llcp_tlv_opt(u8 *tlv)
+static u8 llcp_tlv_opt(const u8 *tlv)
{
return llcp_tlv8(tlv, LLCP_TLV_OPT);
}
-static u8 llcp_tlv_rw(u8 *tlv)
+static u8 llcp_tlv_rw(const u8 *tlv)
{
return llcp_tlv8(tlv, LLCP_TLV_RW) & 0xf;
}
-u8 *nfc_llcp_build_tlv(u8 type, u8 *value, u8 value_length, u8 *tlv_length)
+u8 *nfc_llcp_build_tlv(u8 type, const u8 *value, u8 value_length, u8 *tlv_length)
{
u8 *tlv, length;
@@ -130,7 +130,7 @@ struct nfc_llcp_sdp_tlv *nfc_llcp_build_sdres_tlv(u8 tid, u8 sap)
return sdres;
}
-struct nfc_llcp_sdp_tlv *nfc_llcp_build_sdreq_tlv(u8 tid, char *uri,
+struct nfc_llcp_sdp_tlv *nfc_llcp_build_sdreq_tlv(u8 tid, const char *uri,
size_t uri_len)
{
struct nfc_llcp_sdp_tlv *sdreq;
@@ -190,9 +190,10 @@ void nfc_llcp_free_sdp_tlv_list(struct hlist_head *head)
}
int nfc_llcp_parse_gb_tlv(struct nfc_llcp_local *local,
- u8 *tlv_array, u16 tlv_array_len)
+ const u8 *tlv_array, u16 tlv_array_len)
{
- u8 *tlv = tlv_array, type, length, offset = 0;
+ const u8 *tlv = tlv_array;
+ u8 type, length, offset = 0;
pr_debug("TLV array length %d\n", tlv_array_len);
@@ -239,9 +240,10 @@ int nfc_llcp_parse_gb_tlv(struct nfc_llcp_local *local,
}
int nfc_llcp_parse_connection_tlv(struct nfc_llcp_sock *sock,
- u8 *tlv_array, u16 tlv_array_len)
+ const u8 *tlv_array, u16 tlv_array_len)
{
- u8 *tlv = tlv_array, type, length, offset = 0;
+ const u8 *tlv = tlv_array;
+ u8 type, length, offset = 0;
pr_debug("TLV array length %d\n", tlv_array_len);
@@ -295,7 +297,7 @@ static struct sk_buff *llcp_add_header(struct sk_buff *pdu,
return pdu;
}
-static struct sk_buff *llcp_add_tlv(struct sk_buff *pdu, u8 *tlv,
+static struct sk_buff *llcp_add_tlv(struct sk_buff *pdu, const u8 *tlv,
u8 tlv_length)
{
/* XXX Add an skb length check */
@@ -389,9 +391,10 @@ int nfc_llcp_send_connect(struct nfc_llcp_sock *sock)
{
struct nfc_llcp_local *local;
struct sk_buff *skb;
- u8 *service_name_tlv = NULL, service_name_tlv_length;
- u8 *miux_tlv = NULL, miux_tlv_length;
- u8 *rw_tlv = NULL, rw_tlv_length, rw;
+ const u8 *service_name_tlv = NULL;
+ const u8 *miux_tlv = NULL;
+ const u8 *rw_tlv = NULL;
+ u8 service_name_tlv_length, miux_tlv_length, rw_tlv_length, rw;
int err;
u16 size = 0;
__be16 miux;
@@ -465,8 +468,9 @@ int nfc_llcp_send_cc(struct nfc_llcp_sock *sock)
{
struct nfc_llcp_local *local;
struct sk_buff *skb;
- u8 *miux_tlv = NULL, miux_tlv_length;
- u8 *rw_tlv = NULL, rw_tlv_length, rw;
+ const u8 *miux_tlv = NULL;
+ const u8 *rw_tlv = NULL;
+ u8 miux_tlv_length, rw_tlv_length, rw;
int err;
u16 size = 0;
__be16 miux;
diff --git a/net/nfc/llcp_core.c b/net/nfc/llcp_core.c
index cc997518f79d..eaeb2b1cfa6a 100644
--- a/net/nfc/llcp_core.c
+++ b/net/nfc/llcp_core.c
@@ -301,7 +301,7 @@ static char *wks[] = {
"urn:nfc:sn:snep",
};
-static int nfc_llcp_wks_sap(char *service_name, size_t service_name_len)
+static int nfc_llcp_wks_sap(const char *service_name, size_t service_name_len)
{
int sap, num_wks;
@@ -325,7 +325,7 @@ static int nfc_llcp_wks_sap(char *service_name, size_t service_name_len)
static
struct nfc_llcp_sock *nfc_llcp_sock_from_sn(struct nfc_llcp_local *local,
- u8 *sn, size_t sn_len)
+ const u8 *sn, size_t sn_len)
{
struct sock *sk;
struct nfc_llcp_sock *llcp_sock, *tmp_sock;
@@ -522,7 +522,7 @@ static int nfc_llcp_build_gb(struct nfc_llcp_local *local)
{
u8 *gb_cur, version, version_length;
u8 lto_length, wks_length, miux_length;
- u8 *version_tlv = NULL, *lto_tlv = NULL,
+ const u8 *version_tlv = NULL, *lto_tlv = NULL,
*wks_tlv = NULL, *miux_tlv = NULL;
__be16 wks = cpu_to_be16(local->local_wks);
u8 gb_len = 0;
@@ -612,7 +612,7 @@ u8 *nfc_llcp_general_bytes(struct nfc_dev *dev, size_t *general_bytes_len)
return local->gb;
}
-int nfc_llcp_set_remote_gb(struct nfc_dev *dev, u8 *gb, u8 gb_len)
+int nfc_llcp_set_remote_gb(struct nfc_dev *dev, const u8 *gb, u8 gb_len)
{
struct nfc_llcp_local *local;
@@ -639,27 +639,27 @@ int nfc_llcp_set_remote_gb(struct nfc_dev *dev, u8 *gb, u8 gb_len)
local->remote_gb_len - 3);
}
-static u8 nfc_llcp_dsap(struct sk_buff *pdu)
+static u8 nfc_llcp_dsap(const struct sk_buff *pdu)
{
return (pdu->data[0] & 0xfc) >> 2;
}
-static u8 nfc_llcp_ptype(struct sk_buff *pdu)
+static u8 nfc_llcp_ptype(const struct sk_buff *pdu)
{
return ((pdu->data[0] & 0x03) << 2) | ((pdu->data[1] & 0xc0) >> 6);
}
-static u8 nfc_llcp_ssap(struct sk_buff *pdu)
+static u8 nfc_llcp_ssap(const struct sk_buff *pdu)
{
return pdu->data[1] & 0x3f;
}
-static u8 nfc_llcp_ns(struct sk_buff *pdu)
+static u8 nfc_llcp_ns(const struct sk_buff *pdu)
{
return pdu->data[2] >> 4;
}
-static u8 nfc_llcp_nr(struct sk_buff *pdu)
+static u8 nfc_llcp_nr(const struct sk_buff *pdu)
{
return pdu->data[2] & 0xf;
}
@@ -801,7 +801,7 @@ out:
}
static struct nfc_llcp_sock *nfc_llcp_sock_get_sn(struct nfc_llcp_local *local,
- u8 *sn, size_t sn_len)
+ const u8 *sn, size_t sn_len)
{
struct nfc_llcp_sock *llcp_sock;
@@ -815,9 +815,10 @@ static struct nfc_llcp_sock *nfc_llcp_sock_get_sn(struct nfc_llcp_local *local,
return llcp_sock;
}
-static u8 *nfc_llcp_connect_sn(struct sk_buff *skb, size_t *sn_len)
+static const u8 *nfc_llcp_connect_sn(const struct sk_buff *skb, size_t *sn_len)
{
- u8 *tlv = &skb->data[2], type, length;
+ u8 type, length;
+ const u8 *tlv = &skb->data[2];
size_t tlv_array_len = skb->len - LLCP_HEADER_SIZE, offset = 0;
while (offset < tlv_array_len) {
@@ -875,7 +876,7 @@ static void nfc_llcp_recv_ui(struct nfc_llcp_local *local,
}
static void nfc_llcp_recv_connect(struct nfc_llcp_local *local,
- struct sk_buff *skb)
+ const struct sk_buff *skb)
{
struct sock *new_sk, *parent;
struct nfc_llcp_sock *sock, *new_sock;
@@ -893,7 +894,7 @@ static void nfc_llcp_recv_connect(struct nfc_llcp_local *local,
goto fail;
}
} else {
- u8 *sn;
+ const u8 *sn;
size_t sn_len;
sn = nfc_llcp_connect_sn(skb, &sn_len);
@@ -1112,7 +1113,7 @@ static void nfc_llcp_recv_hdlc(struct nfc_llcp_local *local,
}
static void nfc_llcp_recv_disc(struct nfc_llcp_local *local,
- struct sk_buff *skb)
+ const struct sk_buff *skb)
{
struct nfc_llcp_sock *llcp_sock;
struct sock *sk;
@@ -1155,7 +1156,8 @@ static void nfc_llcp_recv_disc(struct nfc_llcp_local *local,
nfc_llcp_sock_put(llcp_sock);
}
-static void nfc_llcp_recv_cc(struct nfc_llcp_local *local, struct sk_buff *skb)
+static void nfc_llcp_recv_cc(struct nfc_llcp_local *local,
+ const struct sk_buff *skb)
{
struct nfc_llcp_sock *llcp_sock;
struct sock *sk;
@@ -1188,7 +1190,8 @@ static void nfc_llcp_recv_cc(struct nfc_llcp_local *local, struct sk_buff *skb)
nfc_llcp_sock_put(llcp_sock);
}
-static void nfc_llcp_recv_dm(struct nfc_llcp_local *local, struct sk_buff *skb)
+static void nfc_llcp_recv_dm(struct nfc_llcp_local *local,
+ const struct sk_buff *skb)
{
struct nfc_llcp_sock *llcp_sock;
struct sock *sk;
@@ -1226,12 +1229,13 @@ static void nfc_llcp_recv_dm(struct nfc_llcp_local *local, struct sk_buff *skb)
}
static void nfc_llcp_recv_snl(struct nfc_llcp_local *local,
- struct sk_buff *skb)
+ const struct sk_buff *skb)
{
struct nfc_llcp_sock *llcp_sock;
- u8 dsap, ssap, *tlv, type, length, tid, sap;
+ u8 dsap, ssap, type, length, tid, sap;
+ const u8 *tlv;
u16 tlv_len, offset;
- char *service_name;
+ const char *service_name;
size_t service_name_len;
struct nfc_llcp_sdp_tlv *sdp;
HLIST_HEAD(llc_sdres_list);
diff --git a/net/nfc/nci/core.c b/net/nfc/nci/core.c
index da7fe9db1b00..82ab39d80726 100644
--- a/net/nfc/nci/core.c
+++ b/net/nfc/nci/core.c
@@ -53,9 +53,9 @@ struct nci_conn_info *nci_get_conn_info_by_conn_id(struct nci_dev *ndev,
}
int nci_get_conn_info_by_dest_type_params(struct nci_dev *ndev, u8 dest_type,
- struct dest_spec_params *params)
+ const struct dest_spec_params *params)
{
- struct nci_conn_info *conn_info;
+ const struct nci_conn_info *conn_info;
list_for_each_entry(conn_info, &ndev->conn_info_list, list) {
if (conn_info->dest_type == dest_type) {
@@ -95,8 +95,8 @@ static void nci_req_cancel(struct nci_dev *ndev, int err)
/* Execute request and wait for completion. */
static int __nci_request(struct nci_dev *ndev,
- void (*req)(struct nci_dev *ndev, unsigned long opt),
- unsigned long opt, __u32 timeout)
+ void (*req)(struct nci_dev *ndev, const void *opt),
+ const void *opt, __u32 timeout)
{
int rc = 0;
long completion_rc;
@@ -139,8 +139,8 @@ static int __nci_request(struct nci_dev *ndev,
inline int nci_request(struct nci_dev *ndev,
void (*req)(struct nci_dev *ndev,
- unsigned long opt),
- unsigned long opt, __u32 timeout)
+ const void *opt),
+ const void *opt, __u32 timeout)
{
int rc;
@@ -155,7 +155,7 @@ inline int nci_request(struct nci_dev *ndev,
return rc;
}
-static void nci_reset_req(struct nci_dev *ndev, unsigned long opt)
+static void nci_reset_req(struct nci_dev *ndev, const void *opt)
{
struct nci_core_reset_cmd cmd;
@@ -163,17 +163,17 @@ static void nci_reset_req(struct nci_dev *ndev, unsigned long opt)
nci_send_cmd(ndev, NCI_OP_CORE_RESET_CMD, 1, &cmd);
}
-static void nci_init_req(struct nci_dev *ndev, unsigned long opt)
+static void nci_init_req(struct nci_dev *ndev, const void *opt)
{
u8 plen = 0;
if (opt)
plen = sizeof(struct nci_core_init_v2_cmd);
- nci_send_cmd(ndev, NCI_OP_CORE_INIT_CMD, plen, (void *)opt);
+ nci_send_cmd(ndev, NCI_OP_CORE_INIT_CMD, plen, opt);
}
-static void nci_init_complete_req(struct nci_dev *ndev, unsigned long opt)
+static void nci_init_complete_req(struct nci_dev *ndev, const void *opt)
{
struct nci_rf_disc_map_cmd cmd;
struct disc_map_config *cfg = cmd.mapping_configs;
@@ -210,14 +210,14 @@ static void nci_init_complete_req(struct nci_dev *ndev, unsigned long opt)
}
struct nci_set_config_param {
- __u8 id;
- size_t len;
- __u8 *val;
+ __u8 id;
+ size_t len;
+ const __u8 *val;
};
-static void nci_set_config_req(struct nci_dev *ndev, unsigned long opt)
+static void nci_set_config_req(struct nci_dev *ndev, const void *opt)
{
- struct nci_set_config_param *param = (struct nci_set_config_param *)opt;
+ const struct nci_set_config_param *param = opt;
struct nci_core_set_config_cmd cmd;
BUG_ON(param->len > NCI_MAX_PARAM_LEN);
@@ -235,10 +235,9 @@ struct nci_rf_discover_param {
__u32 tm_protocols;
};
-static void nci_rf_discover_req(struct nci_dev *ndev, unsigned long opt)
+static void nci_rf_discover_req(struct nci_dev *ndev, const void *opt)
{
- struct nci_rf_discover_param *param =
- (struct nci_rf_discover_param *)opt;
+ const struct nci_rf_discover_param *param = opt;
struct nci_rf_disc_cmd cmd;
cmd.num_disc_configs = 0;
@@ -301,10 +300,9 @@ struct nci_rf_discover_select_param {
__u8 rf_protocol;
};
-static void nci_rf_discover_select_req(struct nci_dev *ndev, unsigned long opt)
+static void nci_rf_discover_select_req(struct nci_dev *ndev, const void *opt)
{
- struct nci_rf_discover_select_param *param =
- (struct nci_rf_discover_select_param *)opt;
+ const struct nci_rf_discover_select_param *param = opt;
struct nci_rf_discover_select_cmd cmd;
cmd.rf_discovery_id = param->rf_discovery_id;
@@ -328,11 +326,11 @@ static void nci_rf_discover_select_req(struct nci_dev *ndev, unsigned long opt)
sizeof(struct nci_rf_discover_select_cmd), &cmd);
}
-static void nci_rf_deactivate_req(struct nci_dev *ndev, unsigned long opt)
+static void nci_rf_deactivate_req(struct nci_dev *ndev, const void *opt)
{
struct nci_rf_deactivate_cmd cmd;
- cmd.type = opt;
+ cmd.type = (unsigned long)opt;
nci_send_cmd(ndev, NCI_OP_RF_DEACTIVATE_CMD,
sizeof(struct nci_rf_deactivate_cmd), &cmd);
@@ -341,18 +339,17 @@ static void nci_rf_deactivate_req(struct nci_dev *ndev, unsigned long opt)
struct nci_cmd_param {
__u16 opcode;
size_t len;
- __u8 *payload;
+ const __u8 *payload;
};
-static void nci_generic_req(struct nci_dev *ndev, unsigned long opt)
+static void nci_generic_req(struct nci_dev *ndev, const void *opt)
{
- struct nci_cmd_param *param =
- (struct nci_cmd_param *)opt;
+ const struct nci_cmd_param *param = opt;
nci_send_cmd(ndev, param->opcode, param->len, param->payload);
}
-int nci_prop_cmd(struct nci_dev *ndev, __u8 oid, size_t len, __u8 *payload)
+int nci_prop_cmd(struct nci_dev *ndev, __u8 oid, size_t len, const __u8 *payload)
{
struct nci_cmd_param param;
@@ -360,12 +357,13 @@ int nci_prop_cmd(struct nci_dev *ndev, __u8 oid, size_t len, __u8 *payload)
param.len = len;
param.payload = payload;
- return __nci_request(ndev, nci_generic_req, (unsigned long)&param,
+ return __nci_request(ndev, nci_generic_req, &param,
msecs_to_jiffies(NCI_CMD_TIMEOUT));
}
EXPORT_SYMBOL(nci_prop_cmd);
-int nci_core_cmd(struct nci_dev *ndev, __u16 opcode, size_t len, __u8 *payload)
+int nci_core_cmd(struct nci_dev *ndev, __u16 opcode, size_t len,
+ const __u8 *payload)
{
struct nci_cmd_param param;
@@ -373,21 +371,21 @@ int nci_core_cmd(struct nci_dev *ndev, __u16 opcode, size_t len, __u8 *payload)
param.len = len;
param.payload = payload;
- return __nci_request(ndev, nci_generic_req, (unsigned long)&param,
+ return __nci_request(ndev, nci_generic_req, &param,
msecs_to_jiffies(NCI_CMD_TIMEOUT));
}
EXPORT_SYMBOL(nci_core_cmd);
int nci_core_reset(struct nci_dev *ndev)
{
- return __nci_request(ndev, nci_reset_req, 0,
+ return __nci_request(ndev, nci_reset_req, (void *)0,
msecs_to_jiffies(NCI_RESET_TIMEOUT));
}
EXPORT_SYMBOL(nci_core_reset);
int nci_core_init(struct nci_dev *ndev)
{
- return __nci_request(ndev, nci_init_req, 0,
+ return __nci_request(ndev, nci_init_req, (void *)0,
msecs_to_jiffies(NCI_INIT_TIMEOUT));
}
EXPORT_SYMBOL(nci_core_init);
@@ -397,9 +395,9 @@ struct nci_loopback_data {
struct sk_buff *data;
};
-static void nci_send_data_req(struct nci_dev *ndev, unsigned long opt)
+static void nci_send_data_req(struct nci_dev *ndev, const void *opt)
{
- struct nci_loopback_data *data = (struct nci_loopback_data *)opt;
+ const struct nci_loopback_data *data = opt;
nci_send_data(ndev, data->conn_id, data->data);
}
@@ -407,7 +405,7 @@ static void nci_send_data_req(struct nci_dev *ndev, unsigned long opt)
static void nci_nfcc_loopback_cb(void *context, struct sk_buff *skb, int err)
{
struct nci_dev *ndev = (struct nci_dev *)context;
- struct nci_conn_info *conn_info;
+ struct nci_conn_info *conn_info;
conn_info = nci_get_conn_info_by_conn_id(ndev, ndev->cur_conn_id);
if (!conn_info) {
@@ -420,7 +418,7 @@ static void nci_nfcc_loopback_cb(void *context, struct sk_buff *skb, int err)
nci_req_complete(ndev, NCI_STATUS_OK);
}
-int nci_nfcc_loopback(struct nci_dev *ndev, void *data, size_t data_len,
+int nci_nfcc_loopback(struct nci_dev *ndev, const void *data, size_t data_len,
struct sk_buff **resp)
{
int r;
@@ -460,7 +458,7 @@ int nci_nfcc_loopback(struct nci_dev *ndev, void *data, size_t data_len,
loopback_data.data = skb;
ndev->cur_conn_id = conn_id;
- r = nci_request(ndev, nci_send_data_req, (unsigned long)&loopback_data,
+ r = nci_request(ndev, nci_send_data_req, &loopback_data,
msecs_to_jiffies(NCI_DATA_TIMEOUT));
if (r == NCI_STATUS_OK && resp)
*resp = conn_info->rx_skb;
@@ -493,7 +491,7 @@ static int nci_open_device(struct nci_dev *ndev)
rc = ndev->ops->init(ndev);
if (!rc) {
- rc = __nci_request(ndev, nci_reset_req, 0,
+ rc = __nci_request(ndev, nci_reset_req, (void *)0,
msecs_to_jiffies(NCI_RESET_TIMEOUT));
}
@@ -506,10 +504,10 @@ static int nci_open_device(struct nci_dev *ndev)
.feature1 = NCI_FEATURE_DISABLE,
.feature2 = NCI_FEATURE_DISABLE
};
- unsigned long opt = 0;
+ const void *opt = NULL;
if (ndev->nci_ver & NCI_VER_2_MASK)
- opt = (unsigned long)&nci_init_v2_cmd;
+ opt = &nci_init_v2_cmd;
rc = __nci_request(ndev, nci_init_req, opt,
msecs_to_jiffies(NCI_INIT_TIMEOUT));
@@ -519,7 +517,7 @@ static int nci_open_device(struct nci_dev *ndev)
rc = ndev->ops->post_setup(ndev);
if (!rc) {
- rc = __nci_request(ndev, nci_init_complete_req, 0,
+ rc = __nci_request(ndev, nci_init_complete_req, (void *)0,
msecs_to_jiffies(NCI_INIT_TIMEOUT));
}
@@ -569,7 +567,7 @@ static int nci_close_device(struct nci_dev *ndev)
atomic_set(&ndev->cmd_cnt, 1);
set_bit(NCI_INIT, &ndev->flags);
- __nci_request(ndev, nci_reset_req, 0,
+ __nci_request(ndev, nci_reset_req, (void *)0,
msecs_to_jiffies(NCI_RESET_TIMEOUT));
/* After this point our queues are empty
@@ -624,7 +622,7 @@ static int nci_dev_down(struct nfc_dev *nfc_dev)
return nci_close_device(ndev);
}
-int nci_set_config(struct nci_dev *ndev, __u8 id, size_t len, __u8 *val)
+int nci_set_config(struct nci_dev *ndev, __u8 id, size_t len, const __u8 *val)
{
struct nci_set_config_param param;
@@ -635,15 +633,15 @@ int nci_set_config(struct nci_dev *ndev, __u8 id, size_t len, __u8 *val)
param.len = len;
param.val = val;
- return __nci_request(ndev, nci_set_config_req, (unsigned long)&param,
+ return __nci_request(ndev, nci_set_config_req, &param,
msecs_to_jiffies(NCI_SET_CONFIG_TIMEOUT));
}
EXPORT_SYMBOL(nci_set_config);
-static void nci_nfcee_discover_req(struct nci_dev *ndev, unsigned long opt)
+static void nci_nfcee_discover_req(struct nci_dev *ndev, const void *opt)
{
struct nci_nfcee_discover_cmd cmd;
- __u8 action = opt;
+ __u8 action = (unsigned long)opt;
cmd.discovery_action = action;
@@ -652,15 +650,16 @@ static void nci_nfcee_discover_req(struct nci_dev *ndev, unsigned long opt)
int nci_nfcee_discover(struct nci_dev *ndev, u8 action)
{
- return __nci_request(ndev, nci_nfcee_discover_req, action,
+ unsigned long opt = action;
+
+ return __nci_request(ndev, nci_nfcee_discover_req, (void *)opt,
msecs_to_jiffies(NCI_CMD_TIMEOUT));
}
EXPORT_SYMBOL(nci_nfcee_discover);
-static void nci_nfcee_mode_set_req(struct nci_dev *ndev, unsigned long opt)
+static void nci_nfcee_mode_set_req(struct nci_dev *ndev, const void *opt)
{
- struct nci_nfcee_mode_set_cmd *cmd =
- (struct nci_nfcee_mode_set_cmd *)opt;
+ const struct nci_nfcee_mode_set_cmd *cmd = opt;
nci_send_cmd(ndev, NCI_OP_NFCEE_MODE_SET_CMD,
sizeof(struct nci_nfcee_mode_set_cmd), cmd);
@@ -673,16 +672,14 @@ int nci_nfcee_mode_set(struct nci_dev *ndev, u8 nfcee_id, u8 nfcee_mode)
cmd.nfcee_id = nfcee_id;
cmd.nfcee_mode = nfcee_mode;
- return __nci_request(ndev, nci_nfcee_mode_set_req,
- (unsigned long)&cmd,
+ return __nci_request(ndev, nci_nfcee_mode_set_req, &cmd,
msecs_to_jiffies(NCI_CMD_TIMEOUT));
}
EXPORT_SYMBOL(nci_nfcee_mode_set);
-static void nci_core_conn_create_req(struct nci_dev *ndev, unsigned long opt)
+static void nci_core_conn_create_req(struct nci_dev *ndev, const void *opt)
{
- struct core_conn_create_data *data =
- (struct core_conn_create_data *)opt;
+ const struct core_conn_create_data *data = opt;
nci_send_cmd(ndev, NCI_OP_CORE_CONN_CREATE_CMD, data->length, data->cmd);
}
@@ -690,7 +687,7 @@ static void nci_core_conn_create_req(struct nci_dev *ndev, unsigned long opt)
int nci_core_conn_create(struct nci_dev *ndev, u8 destination_type,
u8 number_destination_params,
size_t params_len,
- struct core_conn_create_dest_spec_params *params)
+ const struct core_conn_create_dest_spec_params *params)
{
int r;
struct nci_core_conn_create_cmd *cmd;
@@ -719,24 +716,26 @@ int nci_core_conn_create(struct nci_dev *ndev, u8 destination_type,
}
ndev->cur_dest_type = destination_type;
- r = __nci_request(ndev, nci_core_conn_create_req, (unsigned long)&data,
+ r = __nci_request(ndev, nci_core_conn_create_req, &data,
msecs_to_jiffies(NCI_CMD_TIMEOUT));
kfree(cmd);
return r;
}
EXPORT_SYMBOL(nci_core_conn_create);
-static void nci_core_conn_close_req(struct nci_dev *ndev, unsigned long opt)
+static void nci_core_conn_close_req(struct nci_dev *ndev, const void *opt)
{
- __u8 conn_id = opt;
+ __u8 conn_id = (unsigned long)opt;
nci_send_cmd(ndev, NCI_OP_CORE_CONN_CLOSE_CMD, 1, &conn_id);
}
int nci_core_conn_close(struct nci_dev *ndev, u8 conn_id)
{
+ unsigned long opt = conn_id;
+
ndev->cur_conn_id = conn_id;
- return __nci_request(ndev, nci_core_conn_close_req, conn_id,
+ return __nci_request(ndev, nci_core_conn_close_req, (void *)opt,
msecs_to_jiffies(NCI_CMD_TIMEOUT));
}
EXPORT_SYMBOL(nci_core_conn_close);
@@ -756,14 +755,14 @@ static int nci_set_local_general_bytes(struct nfc_dev *nfc_dev)
param.id = NCI_PN_ATR_REQ_GEN_BYTES;
- rc = nci_request(ndev, nci_set_config_req, (unsigned long)&param,
+ rc = nci_request(ndev, nci_set_config_req, &param,
msecs_to_jiffies(NCI_SET_CONFIG_TIMEOUT));
if (rc)
return rc;
param.id = NCI_LN_ATR_RES_GEN_BYTES;
- return nci_request(ndev, nci_set_config_req, (unsigned long)&param,
+ return nci_request(ndev, nci_set_config_req, &param,
msecs_to_jiffies(NCI_SET_CONFIG_TIMEOUT));
}
@@ -813,7 +812,7 @@ static int nci_start_poll(struct nfc_dev *nfc_dev,
pr_debug("target active or w4 select, implicitly deactivate\n");
rc = nci_request(ndev, nci_rf_deactivate_req,
- NCI_DEACTIVATE_TYPE_IDLE_MODE,
+ (void *)NCI_DEACTIVATE_TYPE_IDLE_MODE,
msecs_to_jiffies(NCI_RF_DEACTIVATE_TIMEOUT));
if (rc)
return -EBUSY;
@@ -835,7 +834,7 @@ static int nci_start_poll(struct nfc_dev *nfc_dev,
param.im_protocols = im_protocols;
param.tm_protocols = tm_protocols;
- rc = nci_request(ndev, nci_rf_discover_req, (unsigned long)&param,
+ rc = nci_request(ndev, nci_rf_discover_req, &param,
msecs_to_jiffies(NCI_RF_DISC_TIMEOUT));
if (!rc)
@@ -854,7 +853,8 @@ static void nci_stop_poll(struct nfc_dev *nfc_dev)
return;
}
- nci_request(ndev, nci_rf_deactivate_req, NCI_DEACTIVATE_TYPE_IDLE_MODE,
+ nci_request(ndev, nci_rf_deactivate_req,
+ (void *)NCI_DEACTIVATE_TYPE_IDLE_MODE,
msecs_to_jiffies(NCI_RF_DEACTIVATE_TIMEOUT));
}
@@ -863,7 +863,7 @@ static int nci_activate_target(struct nfc_dev *nfc_dev,
{
struct nci_dev *ndev = nfc_get_drvdata(nfc_dev);
struct nci_rf_discover_select_param param;
- struct nfc_target *nci_target = NULL;
+ const struct nfc_target *nci_target = NULL;
int i;
int rc = 0;
@@ -913,8 +913,7 @@ static int nci_activate_target(struct nfc_dev *nfc_dev,
else
param.rf_protocol = NCI_RF_PROTOCOL_NFC_DEP;
- rc = nci_request(ndev, nci_rf_discover_select_req,
- (unsigned long)&param,
+ rc = nci_request(ndev, nci_rf_discover_select_req, &param,
msecs_to_jiffies(NCI_RF_DISC_SELECT_TIMEOUT));
}
@@ -929,7 +928,7 @@ static void nci_deactivate_target(struct nfc_dev *nfc_dev,
__u8 mode)
{
struct nci_dev *ndev = nfc_get_drvdata(nfc_dev);
- u8 nci_mode = NCI_DEACTIVATE_TYPE_IDLE_MODE;
+ unsigned long nci_mode = NCI_DEACTIVATE_TYPE_IDLE_MODE;
pr_debug("entry\n");
@@ -947,7 +946,7 @@ static void nci_deactivate_target(struct nfc_dev *nfc_dev,
}
if (atomic_read(&ndev->state) == NCI_POLL_ACTIVE) {
- nci_request(ndev, nci_rf_deactivate_req, nci_mode,
+ nci_request(ndev, nci_rf_deactivate_req, (void *)nci_mode,
msecs_to_jiffies(NCI_RF_DEACTIVATE_TIMEOUT));
}
}
@@ -985,8 +984,8 @@ static int nci_dep_link_down(struct nfc_dev *nfc_dev)
} else {
if (atomic_read(&ndev->state) == NCI_LISTEN_ACTIVE ||
atomic_read(&ndev->state) == NCI_DISCOVERY) {
- nci_request(ndev, nci_rf_deactivate_req, 0,
- msecs_to_jiffies(NCI_RF_DEACTIVATE_TIMEOUT));
+ nci_request(ndev, nci_rf_deactivate_req, (void *)0,
+ msecs_to_jiffies(NCI_RF_DEACTIVATE_TIMEOUT));
}
rc = nfc_tm_deactivated(nfc_dev);
@@ -1004,7 +1003,7 @@ static int nci_transceive(struct nfc_dev *nfc_dev, struct nfc_target *target,
{
struct nci_dev *ndev = nfc_get_drvdata(nfc_dev);
int rc;
- struct nci_conn_info *conn_info;
+ struct nci_conn_info *conn_info;
conn_info = ndev->rf_conn_info;
if (!conn_info)
@@ -1102,7 +1101,7 @@ static int nci_fw_download(struct nfc_dev *nfc_dev, const char *firmware_name)
return ndev->ops->fw_download(ndev, firmware_name);
}
-static struct nfc_ops nci_nfc_ops = {
+static const struct nfc_ops nci_nfc_ops = {
.dev_up = nci_dev_up,
.dev_down = nci_dev_down,
.start_poll = nci_start_poll,
@@ -1129,7 +1128,7 @@ static struct nfc_ops nci_nfc_ops = {
* @tx_headroom: Reserved space at beginning of skb
* @tx_tailroom: Reserved space at end of skb
*/
-struct nci_dev *nci_allocate_device(struct nci_ops *ops,
+struct nci_dev *nci_allocate_device(const struct nci_ops *ops,
__u32 supported_protocols,
int tx_headroom, int tx_tailroom)
{
@@ -1152,8 +1151,7 @@ struct nci_dev *nci_allocate_device(struct nci_ops *ops,
if (ops->n_prop_ops > NCI_MAX_PROPRIETARY_CMD) {
pr_err("Too many proprietary commands: %zd\n",
ops->n_prop_ops);
- ops->prop_ops = NULL;
- ops->n_prop_ops = 0;
+ goto free_nci;
}
ndev->tx_headroom = tx_headroom;
@@ -1270,7 +1268,7 @@ EXPORT_SYMBOL(nci_register_device);
*/
void nci_unregister_device(struct nci_dev *ndev)
{
- struct nci_conn_info *conn_info, *n;
+ struct nci_conn_info *conn_info, *n;
nci_close_device(ndev);
@@ -1332,7 +1330,7 @@ int nci_send_frame(struct nci_dev *ndev, struct sk_buff *skb)
EXPORT_SYMBOL(nci_send_frame);
/* Send NCI command */
-int nci_send_cmd(struct nci_dev *ndev, __u16 opcode, __u8 plen, void *payload)
+int nci_send_cmd(struct nci_dev *ndev, __u16 opcode, __u8 plen, const void *payload)
{
struct nci_ctrl_hdr *hdr;
struct sk_buff *skb;
@@ -1364,12 +1362,12 @@ int nci_send_cmd(struct nci_dev *ndev, __u16 opcode, __u8 plen, void *payload)
EXPORT_SYMBOL(nci_send_cmd);
/* Proprietary commands API */
-static struct nci_driver_ops *ops_cmd_lookup(struct nci_driver_ops *ops,
- size_t n_ops,
- __u16 opcode)
+static const struct nci_driver_ops *ops_cmd_lookup(const struct nci_driver_ops *ops,
+ size_t n_ops,
+ __u16 opcode)
{
size_t i;
- struct nci_driver_ops *op;
+ const struct nci_driver_ops *op;
if (!ops || !n_ops)
return NULL;
@@ -1384,10 +1382,10 @@ static struct nci_driver_ops *ops_cmd_lookup(struct nci_driver_ops *ops,
}
static int nci_op_rsp_packet(struct nci_dev *ndev, __u16 rsp_opcode,
- struct sk_buff *skb, struct nci_driver_ops *ops,
+ struct sk_buff *skb, const struct nci_driver_ops *ops,
size_t n_ops)
{
- struct nci_driver_ops *op;
+ const struct nci_driver_ops *op;
op = ops_cmd_lookup(ops, n_ops, rsp_opcode);
if (!op || !op->rsp)
@@ -1397,10 +1395,10 @@ static int nci_op_rsp_packet(struct nci_dev *ndev, __u16 rsp_opcode,
}
static int nci_op_ntf_packet(struct nci_dev *ndev, __u16 ntf_opcode,
- struct sk_buff *skb, struct nci_driver_ops *ops,
+ struct sk_buff *skb, const struct nci_driver_ops *ops,
size_t n_ops)
{
- struct nci_driver_ops *op;
+ const struct nci_driver_ops *op;
op = ops_cmd_lookup(ops, n_ops, ntf_opcode);
if (!op || !op->ntf)
@@ -1442,7 +1440,7 @@ int nci_core_ntf_packet(struct nci_dev *ndev, __u16 opcode,
static void nci_tx_work(struct work_struct *work)
{
struct nci_dev *ndev = container_of(work, struct nci_dev, tx_work);
- struct nci_conn_info *conn_info;
+ struct nci_conn_info *conn_info;
struct sk_buff *skb;
conn_info = nci_get_conn_info_by_conn_id(ndev, ndev->cur_conn_id);
diff --git a/net/nfc/nci/data.c b/net/nfc/nci/data.c
index ce3382be937f..6055dc9a82aa 100644
--- a/net/nfc/nci/data.c
+++ b/net/nfc/nci/data.c
@@ -26,7 +26,7 @@
void nci_data_exchange_complete(struct nci_dev *ndev, struct sk_buff *skb,
__u8 conn_id, int err)
{
- struct nci_conn_info *conn_info;
+ const struct nci_conn_info *conn_info;
data_exchange_cb_t cb;
void *cb_context;
@@ -80,7 +80,7 @@ static inline void nci_push_data_hdr(struct nci_dev *ndev,
int nci_conn_max_data_pkt_payload_size(struct nci_dev *ndev, __u8 conn_id)
{
- struct nci_conn_info *conn_info;
+ const struct nci_conn_info *conn_info;
conn_info = nci_get_conn_info_by_conn_id(ndev, conn_id);
if (!conn_info)
@@ -93,9 +93,9 @@ EXPORT_SYMBOL(nci_conn_max_data_pkt_payload_size);
static int nci_queue_tx_data_frags(struct nci_dev *ndev,
__u8 conn_id,
struct sk_buff *skb) {
- struct nci_conn_info *conn_info;
+ const struct nci_conn_info *conn_info;
int total_len = skb->len;
- unsigned char *data = skb->data;
+ const unsigned char *data = skb->data;
unsigned long flags;
struct sk_buff_head frags_q;
struct sk_buff *skb_frag;
@@ -166,7 +166,7 @@ exit:
/* Send NCI data */
int nci_send_data(struct nci_dev *ndev, __u8 conn_id, struct sk_buff *skb)
{
- struct nci_conn_info *conn_info;
+ const struct nci_conn_info *conn_info;
int rc = 0;
pr_debug("conn_id 0x%x, plen %d\n", conn_id, skb->len);
@@ -269,7 +269,7 @@ void nci_rx_data_packet(struct nci_dev *ndev, struct sk_buff *skb)
__u8 pbf = nci_pbf(skb->data);
__u8 status = 0;
__u8 conn_id = nci_conn_id(skb->data);
- struct nci_conn_info *conn_info;
+ const struct nci_conn_info *conn_info;
pr_debug("len %d\n", skb->len);
diff --git a/net/nfc/nci/hci.c b/net/nfc/nci/hci.c
index 96865142104f..e199912ee1e5 100644
--- a/net/nfc/nci/hci.c
+++ b/net/nfc/nci/hci.c
@@ -16,11 +16,11 @@
#include <linux/nfc.h>
struct nci_data {
- u8 conn_id;
- u8 pipe;
- u8 cmd;
- const u8 *data;
- u32 data_len;
+ u8 conn_id;
+ u8 pipe;
+ u8 cmd;
+ const u8 *data;
+ u32 data_len;
} __packed;
struct nci_hci_create_pipe_params {
@@ -142,7 +142,7 @@ static int nci_hci_send_data(struct nci_dev *ndev, u8 pipe,
const u8 data_type, const u8 *data,
size_t data_len)
{
- struct nci_conn_info *conn_info;
+ const struct nci_conn_info *conn_info;
struct sk_buff *skb;
int len, i, r;
u8 cb = pipe;
@@ -161,8 +161,6 @@ static int nci_hci_send_data(struct nci_dev *ndev, u8 pipe,
*(u8 *)skb_push(skb, 1) = data_type;
do {
- len = conn_info->max_pkt_payload_len;
-
/* If last packet add NCI_HFP_NO_CHAINING */
if (i + conn_info->max_pkt_payload_len -
(skb->len + 1) >= data_len) {
@@ -197,9 +195,9 @@ static int nci_hci_send_data(struct nci_dev *ndev, u8 pipe,
return i;
}
-static void nci_hci_send_data_req(struct nci_dev *ndev, unsigned long opt)
+static void nci_hci_send_data_req(struct nci_dev *ndev, const void *opt)
{
- struct nci_data *data = (struct nci_data *)opt;
+ const struct nci_data *data = opt;
nci_hci_send_data(ndev, data->pipe, data->cmd,
data->data, data->data_len);
@@ -223,8 +221,8 @@ int nci_hci_send_cmd(struct nci_dev *ndev, u8 gate, u8 cmd,
const u8 *param, size_t param_len,
struct sk_buff **skb)
{
- struct nci_hcp_message *message;
- struct nci_conn_info *conn_info;
+ const struct nci_hcp_message *message;
+ const struct nci_conn_info *conn_info;
struct nci_data data;
int r;
u8 pipe = ndev->hci_dev->gate2pipe[gate];
@@ -242,7 +240,7 @@ int nci_hci_send_cmd(struct nci_dev *ndev, u8 gate, u8 cmd,
data.data = param;
data.data_len = param_len;
- r = nci_request(ndev, nci_hci_send_data_req, (unsigned long)&data,
+ r = nci_request(ndev, nci_hci_send_data_req, &data,
msecs_to_jiffies(NCI_DATA_TIMEOUT));
if (r == NCI_STATUS_OK) {
message = (struct nci_hcp_message *)conn_info->rx_skb->data;
@@ -365,7 +363,7 @@ exit:
static void nci_hci_resp_received(struct nci_dev *ndev, u8 pipe,
struct sk_buff *skb)
{
- struct nci_conn_info *conn_info;
+ struct nci_conn_info *conn_info;
conn_info = ndev->hci_dev->conn_info;
if (!conn_info)
@@ -408,7 +406,7 @@ static void nci_hci_msg_rx_work(struct work_struct *work)
struct nci_hci_dev *hdev =
container_of(work, struct nci_hci_dev, msg_rx_work);
struct sk_buff *skb;
- struct nci_hcp_message *message;
+ const struct nci_hcp_message *message;
u8 pipe, type, instruction;
while ((skb = skb_dequeue(&hdev->msg_rx_queue)) != NULL) {
@@ -500,7 +498,7 @@ void nci_hci_data_received_cb(void *context,
int nci_hci_open_pipe(struct nci_dev *ndev, u8 pipe)
{
struct nci_data data;
- struct nci_conn_info *conn_info;
+ const struct nci_conn_info *conn_info;
conn_info = ndev->hci_dev->conn_info;
if (!conn_info)
@@ -513,9 +511,8 @@ int nci_hci_open_pipe(struct nci_dev *ndev, u8 pipe)
data.data = NULL;
data.data_len = 0;
- return nci_request(ndev, nci_hci_send_data_req,
- (unsigned long)&data,
- msecs_to_jiffies(NCI_DATA_TIMEOUT));
+ return nci_request(ndev, nci_hci_send_data_req, &data,
+ msecs_to_jiffies(NCI_DATA_TIMEOUT));
}
EXPORT_SYMBOL(nci_hci_open_pipe);
@@ -525,7 +522,7 @@ static u8 nci_hci_create_pipe(struct nci_dev *ndev, u8 dest_host,
u8 pipe;
struct sk_buff *skb;
struct nci_hci_create_pipe_params params;
- struct nci_hci_create_pipe_resp *resp;
+ const struct nci_hci_create_pipe_resp *resp;
pr_debug("gate=%d\n", dest_gate);
@@ -559,8 +556,8 @@ static int nci_hci_delete_pipe(struct nci_dev *ndev, u8 pipe)
int nci_hci_set_param(struct nci_dev *ndev, u8 gate, u8 idx,
const u8 *param, size_t param_len)
{
- struct nci_hcp_message *message;
- struct nci_conn_info *conn_info;
+ const struct nci_hcp_message *message;
+ const struct nci_conn_info *conn_info;
struct nci_data data;
int r;
u8 *tmp;
@@ -589,8 +586,7 @@ int nci_hci_set_param(struct nci_dev *ndev, u8 gate, u8 idx,
data.data = tmp;
data.data_len = param_len + 1;
- r = nci_request(ndev, nci_hci_send_data_req,
- (unsigned long)&data,
+ r = nci_request(ndev, nci_hci_send_data_req, &data,
msecs_to_jiffies(NCI_DATA_TIMEOUT));
if (r == NCI_STATUS_OK) {
message = (struct nci_hcp_message *)conn_info->rx_skb->data;
@@ -607,8 +603,8 @@ EXPORT_SYMBOL(nci_hci_set_param);
int nci_hci_get_param(struct nci_dev *ndev, u8 gate, u8 idx,
struct sk_buff **skb)
{
- struct nci_hcp_message *message;
- struct nci_conn_info *conn_info;
+ const struct nci_hcp_message *message;
+ const struct nci_conn_info *conn_info;
struct nci_data data;
int r;
u8 pipe = ndev->hci_dev->gate2pipe[gate];
@@ -629,7 +625,7 @@ int nci_hci_get_param(struct nci_dev *ndev, u8 gate, u8 idx,
data.data = &idx;
data.data_len = 1;
- r = nci_request(ndev, nci_hci_send_data_req, (unsigned long)&data,
+ r = nci_request(ndev, nci_hci_send_data_req, &data,
msecs_to_jiffies(NCI_DATA_TIMEOUT));
if (r == NCI_STATUS_OK) {
@@ -699,7 +695,7 @@ EXPORT_SYMBOL(nci_hci_connect_gate);
static int nci_hci_dev_connect_gates(struct nci_dev *ndev,
u8 gate_count,
- struct nci_hci_gate *gates)
+ const struct nci_hci_gate *gates)
{
int r;
@@ -716,7 +712,7 @@ static int nci_hci_dev_connect_gates(struct nci_dev *ndev,
int nci_hci_dev_session_init(struct nci_dev *ndev)
{
- struct nci_conn_info *conn_info;
+ struct nci_conn_info *conn_info;
struct sk_buff *skb;
int r;
diff --git a/net/nfc/nci/ntf.c b/net/nfc/nci/ntf.c
index 98af04c86b2c..c5eacaac41ae 100644
--- a/net/nfc/nci/ntf.c
+++ b/net/nfc/nci/ntf.c
@@ -28,10 +28,10 @@
/* Handle NCI Notification packets */
static void nci_core_reset_ntf_packet(struct nci_dev *ndev,
- struct sk_buff *skb)
+ const struct sk_buff *skb)
{
/* Handle NCI 2.x core reset notification */
- struct nci_core_reset_ntf *ntf = (void *)skb->data;
+ const struct nci_core_reset_ntf *ntf = (void *)skb->data;
ndev->nci_ver = ntf->nci_ver;
pr_debug("nci_ver 0x%x, config_status 0x%x\n",
@@ -48,7 +48,7 @@ static void nci_core_conn_credits_ntf_packet(struct nci_dev *ndev,
struct sk_buff *skb)
{
struct nci_core_conn_credit_ntf *ntf = (void *) skb->data;
- struct nci_conn_info *conn_info;
+ struct nci_conn_info *conn_info;
int i;
pr_debug("num_entries %d\n", ntf->num_entries);
@@ -80,7 +80,7 @@ static void nci_core_conn_credits_ntf_packet(struct nci_dev *ndev,
}
static void nci_core_generic_error_ntf_packet(struct nci_dev *ndev,
- struct sk_buff *skb)
+ const struct sk_buff *skb)
{
__u8 status = skb->data[0];
@@ -107,9 +107,10 @@ static void nci_core_conn_intf_error_ntf_packet(struct nci_dev *ndev,
nci_data_exchange_complete(ndev, NULL, ntf->conn_id, -EIO);
}
-static __u8 *nci_extract_rf_params_nfca_passive_poll(struct nci_dev *ndev,
- struct rf_tech_specific_params_nfca_poll *nfca_poll,
- __u8 *data)
+static const __u8 *
+nci_extract_rf_params_nfca_passive_poll(struct nci_dev *ndev,
+ struct rf_tech_specific_params_nfca_poll *nfca_poll,
+ const __u8 *data)
{
nfca_poll->sens_res = __le16_to_cpu(*((__le16 *)data));
data += 2;
@@ -134,9 +135,10 @@ static __u8 *nci_extract_rf_params_nfca_passive_poll(struct nci_dev *ndev,
return data;
}
-static __u8 *nci_extract_rf_params_nfcb_passive_poll(struct nci_dev *ndev,
- struct rf_tech_specific_params_nfcb_poll *nfcb_poll,
- __u8 *data)
+static const __u8 *
+nci_extract_rf_params_nfcb_passive_poll(struct nci_dev *ndev,
+ struct rf_tech_specific_params_nfcb_poll *nfcb_poll,
+ const __u8 *data)
{
nfcb_poll->sensb_res_len = min_t(__u8, *data++, NFC_SENSB_RES_MAXSIZE);
@@ -148,9 +150,10 @@ static __u8 *nci_extract_rf_params_nfcb_passive_poll(struct nci_dev *ndev,
return data;
}
-static __u8 *nci_extract_rf_params_nfcf_passive_poll(struct nci_dev *ndev,
- struct rf_tech_specific_params_nfcf_poll *nfcf_poll,
- __u8 *data)
+static const __u8 *
+nci_extract_rf_params_nfcf_passive_poll(struct nci_dev *ndev,
+ struct rf_tech_specific_params_nfcf_poll *nfcf_poll,
+ const __u8 *data)
{
nfcf_poll->bit_rate = *data++;
nfcf_poll->sensf_res_len = min_t(__u8, *data++, NFC_SENSF_RES_MAXSIZE);
@@ -164,9 +167,10 @@ static __u8 *nci_extract_rf_params_nfcf_passive_poll(struct nci_dev *ndev,
return data;
}
-static __u8 *nci_extract_rf_params_nfcv_passive_poll(struct nci_dev *ndev,
- struct rf_tech_specific_params_nfcv_poll *nfcv_poll,
- __u8 *data)
+static const __u8 *
+nci_extract_rf_params_nfcv_passive_poll(struct nci_dev *ndev,
+ struct rf_tech_specific_params_nfcv_poll *nfcv_poll,
+ const __u8 *data)
{
++data;
nfcv_poll->dsfid = *data++;
@@ -175,9 +179,10 @@ static __u8 *nci_extract_rf_params_nfcv_passive_poll(struct nci_dev *ndev,
return data;
}
-static __u8 *nci_extract_rf_params_nfcf_passive_listen(struct nci_dev *ndev,
- struct rf_tech_specific_params_nfcf_listen *nfcf_listen,
- __u8 *data)
+static const __u8 *
+nci_extract_rf_params_nfcf_passive_listen(struct nci_dev *ndev,
+ struct rf_tech_specific_params_nfcf_listen *nfcf_listen,
+ const __u8 *data)
{
nfcf_listen->local_nfcid2_len = min_t(__u8, *data++,
NFC_NFCID2_MAXSIZE);
@@ -198,12 +203,12 @@ static int nci_add_new_protocol(struct nci_dev *ndev,
struct nfc_target *target,
__u8 rf_protocol,
__u8 rf_tech_and_mode,
- void *params)
+ const void *params)
{
- struct rf_tech_specific_params_nfca_poll *nfca_poll;
- struct rf_tech_specific_params_nfcb_poll *nfcb_poll;
- struct rf_tech_specific_params_nfcf_poll *nfcf_poll;
- struct rf_tech_specific_params_nfcv_poll *nfcv_poll;
+ const struct rf_tech_specific_params_nfca_poll *nfca_poll;
+ const struct rf_tech_specific_params_nfcb_poll *nfcb_poll;
+ const struct rf_tech_specific_params_nfcf_poll *nfcf_poll;
+ const struct rf_tech_specific_params_nfcv_poll *nfcv_poll;
__u32 protocol;
if (rf_protocol == NCI_RF_PROTOCOL_T1T)
@@ -274,7 +279,7 @@ static int nci_add_new_protocol(struct nci_dev *ndev,
}
static void nci_add_new_target(struct nci_dev *ndev,
- struct nci_rf_discover_ntf *ntf)
+ const struct nci_rf_discover_ntf *ntf)
{
struct nfc_target *target;
int i, rc;
@@ -319,10 +324,10 @@ void nci_clear_target_list(struct nci_dev *ndev)
}
static void nci_rf_discover_ntf_packet(struct nci_dev *ndev,
- struct sk_buff *skb)
+ const struct sk_buff *skb)
{
struct nci_rf_discover_ntf ntf;
- __u8 *data = skb->data;
+ const __u8 *data = skb->data;
bool add_target = true;
ntf.rf_discovery_id = *data++;
@@ -382,7 +387,8 @@ static void nci_rf_discover_ntf_packet(struct nci_dev *ndev,
}
static int nci_extract_activation_params_iso_dep(struct nci_dev *ndev,
- struct nci_rf_intf_activated_ntf *ntf, __u8 *data)
+ struct nci_rf_intf_activated_ntf *ntf,
+ const __u8 *data)
{
struct activation_params_nfca_poll_iso_dep *nfca_poll;
struct activation_params_nfcb_poll_iso_dep *nfcb_poll;
@@ -418,7 +424,8 @@ static int nci_extract_activation_params_iso_dep(struct nci_dev *ndev,
}
static int nci_extract_activation_params_nfc_dep(struct nci_dev *ndev,
- struct nci_rf_intf_activated_ntf *ntf, __u8 *data)
+ struct nci_rf_intf_activated_ntf *ntf,
+ const __u8 *data)
{
struct activation_params_poll_nfc_dep *poll;
struct activation_params_listen_nfc_dep *listen;
@@ -454,7 +461,7 @@ static int nci_extract_activation_params_nfc_dep(struct nci_dev *ndev,
}
static void nci_target_auto_activated(struct nci_dev *ndev,
- struct nci_rf_intf_activated_ntf *ntf)
+ const struct nci_rf_intf_activated_ntf *ntf)
{
struct nfc_target *target;
int rc;
@@ -477,7 +484,7 @@ static void nci_target_auto_activated(struct nci_dev *ndev,
}
static int nci_store_general_bytes_nfc_dep(struct nci_dev *ndev,
- struct nci_rf_intf_activated_ntf *ntf)
+ const struct nci_rf_intf_activated_ntf *ntf)
{
ndev->remote_gb_len = 0;
@@ -519,11 +526,11 @@ static int nci_store_general_bytes_nfc_dep(struct nci_dev *ndev,
}
static void nci_rf_intf_activated_ntf_packet(struct nci_dev *ndev,
- struct sk_buff *skb)
+ const struct sk_buff *skb)
{
- struct nci_conn_info *conn_info;
+ struct nci_conn_info *conn_info;
struct nci_rf_intf_activated_ntf ntf;
- __u8 *data = skb->data;
+ const __u8 *data = skb->data;
int err = NCI_STATUS_OK;
ntf.rf_discovery_id = *data++;
@@ -681,10 +688,10 @@ listen:
}
static void nci_rf_deactivate_ntf_packet(struct nci_dev *ndev,
- struct sk_buff *skb)
+ const struct sk_buff *skb)
{
- struct nci_conn_info *conn_info;
- struct nci_rf_deactivate_ntf *ntf = (void *) skb->data;
+ const struct nci_conn_info *conn_info;
+ const struct nci_rf_deactivate_ntf *ntf = (void *)skb->data;
pr_debug("entry, type 0x%x, reason 0x%x\n", ntf->type, ntf->reason);
@@ -725,10 +732,10 @@ static void nci_rf_deactivate_ntf_packet(struct nci_dev *ndev,
}
static void nci_nfcee_discover_ntf_packet(struct nci_dev *ndev,
- struct sk_buff *skb)
+ const struct sk_buff *skb)
{
u8 status = NCI_STATUS_OK;
- struct nci_nfcee_discover_ntf *nfcee_ntf =
+ const struct nci_nfcee_discover_ntf *nfcee_ntf =
(struct nci_nfcee_discover_ntf *)skb->data;
pr_debug("\n");
@@ -745,7 +752,7 @@ static void nci_nfcee_discover_ntf_packet(struct nci_dev *ndev,
}
static void nci_nfcee_action_ntf_packet(struct nci_dev *ndev,
- struct sk_buff *skb)
+ const struct sk_buff *skb)
{
pr_debug("\n");
}
diff --git a/net/nfc/nci/rsp.c b/net/nfc/nci/rsp.c
index e9605922a322..a2e72c003805 100644
--- a/net/nfc/nci/rsp.c
+++ b/net/nfc/nci/rsp.c
@@ -25,9 +25,10 @@
/* Handle NCI Response packets */
-static void nci_core_reset_rsp_packet(struct nci_dev *ndev, struct sk_buff *skb)
+static void nci_core_reset_rsp_packet(struct nci_dev *ndev,
+ const struct sk_buff *skb)
{
- struct nci_core_reset_rsp *rsp = (void *) skb->data;
+ const struct nci_core_reset_rsp *rsp = (void *)skb->data;
pr_debug("status 0x%x\n", rsp->status);
@@ -43,10 +44,11 @@ static void nci_core_reset_rsp_packet(struct nci_dev *ndev, struct sk_buff *skb)
}
}
-static u8 nci_core_init_rsp_packet_v1(struct nci_dev *ndev, struct sk_buff *skb)
+static u8 nci_core_init_rsp_packet_v1(struct nci_dev *ndev,
+ const struct sk_buff *skb)
{
- struct nci_core_init_rsp_1 *rsp_1 = (void *) skb->data;
- struct nci_core_init_rsp_2 *rsp_2;
+ const struct nci_core_init_rsp_1 *rsp_1 = (void *)skb->data;
+ const struct nci_core_init_rsp_2 *rsp_2;
pr_debug("status 0x%x\n", rsp_1->status);
@@ -81,10 +83,11 @@ static u8 nci_core_init_rsp_packet_v1(struct nci_dev *ndev, struct sk_buff *skb)
return NCI_STATUS_OK;
}
-static u8 nci_core_init_rsp_packet_v2(struct nci_dev *ndev, struct sk_buff *skb)
+static u8 nci_core_init_rsp_packet_v2(struct nci_dev *ndev,
+ const struct sk_buff *skb)
{
- struct nci_core_init_rsp_nci_ver2 *rsp = (void *)skb->data;
- u8 *supported_rf_interface = rsp->supported_rf_interfaces;
+ const struct nci_core_init_rsp_nci_ver2 *rsp = (void *)skb->data;
+ const u8 *supported_rf_interface = rsp->supported_rf_interfaces;
u8 rf_interface_idx = 0;
u8 rf_extension_cnt = 0;
@@ -118,7 +121,7 @@ static u8 nci_core_init_rsp_packet_v2(struct nci_dev *ndev, struct sk_buff *skb)
return NCI_STATUS_OK;
}
-static void nci_core_init_rsp_packet(struct nci_dev *ndev, struct sk_buff *skb)
+static void nci_core_init_rsp_packet(struct nci_dev *ndev, const struct sk_buff *skb)
{
u8 status = 0;
@@ -160,9 +163,9 @@ exit:
}
static void nci_core_set_config_rsp_packet(struct nci_dev *ndev,
- struct sk_buff *skb)
+ const struct sk_buff *skb)
{
- struct nci_core_set_config_rsp *rsp = (void *) skb->data;
+ const struct nci_core_set_config_rsp *rsp = (void *)skb->data;
pr_debug("status 0x%x\n", rsp->status);
@@ -170,7 +173,7 @@ static void nci_core_set_config_rsp_packet(struct nci_dev *ndev,
}
static void nci_rf_disc_map_rsp_packet(struct nci_dev *ndev,
- struct sk_buff *skb)
+ const struct sk_buff *skb)
{
__u8 status = skb->data[0];
@@ -179,9 +182,10 @@ static void nci_rf_disc_map_rsp_packet(struct nci_dev *ndev,
nci_req_complete(ndev, status);
}
-static void nci_rf_disc_rsp_packet(struct nci_dev *ndev, struct sk_buff *skb)
+static void nci_rf_disc_rsp_packet(struct nci_dev *ndev,
+ const struct sk_buff *skb)
{
- struct nci_conn_info *conn_info;
+ struct nci_conn_info *conn_info;
__u8 status = skb->data[0];
pr_debug("status 0x%x\n", status);
@@ -210,7 +214,7 @@ exit:
}
static void nci_rf_disc_select_rsp_packet(struct nci_dev *ndev,
- struct sk_buff *skb)
+ const struct sk_buff *skb)
{
__u8 status = skb->data[0];
@@ -222,7 +226,7 @@ static void nci_rf_disc_select_rsp_packet(struct nci_dev *ndev,
}
static void nci_rf_deactivate_rsp_packet(struct nci_dev *ndev,
- struct sk_buff *skb)
+ const struct sk_buff *skb)
{
__u8 status = skb->data[0];
@@ -238,9 +242,9 @@ static void nci_rf_deactivate_rsp_packet(struct nci_dev *ndev,
}
static void nci_nfcee_discover_rsp_packet(struct nci_dev *ndev,
- struct sk_buff *skb)
+ const struct sk_buff *skb)
{
- struct nci_nfcee_discover_rsp *discover_rsp;
+ const struct nci_nfcee_discover_rsp *discover_rsp;
if (skb->len != 2) {
nci_req_complete(ndev, NCI_STATUS_NFCEE_PROTOCOL_ERROR);
@@ -255,7 +259,7 @@ static void nci_nfcee_discover_rsp_packet(struct nci_dev *ndev,
}
static void nci_nfcee_mode_set_rsp_packet(struct nci_dev *ndev,
- struct sk_buff *skb)
+ const struct sk_buff *skb)
{
__u8 status = skb->data[0];
@@ -264,11 +268,11 @@ static void nci_nfcee_mode_set_rsp_packet(struct nci_dev *ndev,
}
static void nci_core_conn_create_rsp_packet(struct nci_dev *ndev,
- struct sk_buff *skb)
+ const struct sk_buff *skb)
{
__u8 status = skb->data[0];
struct nci_conn_info *conn_info = NULL;
- struct nci_core_conn_create_rsp *rsp;
+ const struct nci_core_conn_create_rsp *rsp;
pr_debug("status 0x%x\n", status);
@@ -319,7 +323,7 @@ exit:
}
static void nci_core_conn_close_rsp_packet(struct nci_dev *ndev,
- struct sk_buff *skb)
+ const struct sk_buff *skb)
{
struct nci_conn_info *conn_info;
__u8 status = skb->data[0];
diff --git a/net/nfc/nci/spi.c b/net/nfc/nci/spi.c
index 7d8e10e27c20..0935527d1d12 100644
--- a/net/nfc/nci/spi.c
+++ b/net/nfc/nci/spi.c
@@ -27,7 +27,7 @@
#define CRC_INIT 0xFFFF
-static int __nci_spi_send(struct nci_spi *nspi, struct sk_buff *skb,
+static int __nci_spi_send(struct nci_spi *nspi, const struct sk_buff *skb,
int cs_change)
{
struct spi_message m;
diff --git a/net/nfc/nci/uart.c b/net/nfc/nci/uart.c
index 1248faf4d6df..502e7a3f8948 100644
--- a/net/nfc/nci/uart.c
+++ b/net/nfc/nci/uart.c
@@ -308,7 +308,7 @@ static int nci_uart_default_recv_buf(struct nci_uart *nu, const u8 *data,
* Return Value: None
*/
static void nci_uart_tty_receive(struct tty_struct *tty, const u8 *data,
- char *flags, int count)
+ const char *flags, int count)
{
struct nci_uart *nu = (void *)tty->disc_data;
@@ -442,6 +442,7 @@ EXPORT_SYMBOL_GPL(nci_uart_set_config);
static struct tty_ldisc_ops nci_uart_ldisc = {
.owner = THIS_MODULE,
+ .num = N_NCI,
.name = "n_nci",
.open = nci_uart_tty_open,
.close = nci_uart_tty_close,
@@ -456,12 +457,12 @@ static struct tty_ldisc_ops nci_uart_ldisc = {
static int __init nci_uart_init(void)
{
- return tty_register_ldisc(N_NCI, &nci_uart_ldisc);
+ return tty_register_ldisc(&nci_uart_ldisc);
}
static void __exit nci_uart_exit(void)
{
- tty_unregister_ldisc(N_NCI);
+ tty_unregister_ldisc(&nci_uart_ldisc);
}
module_init(nci_uart_init);
diff --git a/net/nfc/netlink.c b/net/nfc/netlink.c
index 722f7ef891e1..49089c50872e 100644
--- a/net/nfc/netlink.c
+++ b/net/nfc/netlink.c
@@ -530,7 +530,7 @@ free_msg:
int nfc_genl_se_connectivity(struct nfc_dev *dev, u8 se_idx)
{
- struct nfc_se *se;
+ const struct nfc_se *se;
struct sk_buff *msg;
void *hdr;
@@ -1531,7 +1531,7 @@ static int nfc_genl_vendor_cmd(struct sk_buff *skb,
struct genl_info *info)
{
struct nfc_dev *dev;
- struct nfc_vendor_cmd *cmd;
+ const struct nfc_vendor_cmd *cmd;
u32 dev_idx, vid, subcmd;
u8 *data;
size_t data_len;
diff --git a/net/nfc/nfc.h b/net/nfc/nfc.h
index 889fefd64e56..de2ec66d7e83 100644
--- a/net/nfc/nfc.h
+++ b/net/nfc/nfc.h
@@ -48,7 +48,7 @@ void nfc_llcp_mac_is_up(struct nfc_dev *dev, u32 target_idx,
u8 comm_mode, u8 rf_mode);
int nfc_llcp_register_device(struct nfc_dev *dev);
void nfc_llcp_unregister_device(struct nfc_dev *dev);
-int nfc_llcp_set_remote_gb(struct nfc_dev *dev, u8 *gb, u8 gb_len);
+int nfc_llcp_set_remote_gb(struct nfc_dev *dev, const u8 *gb, u8 gb_len);
u8 *nfc_llcp_general_bytes(struct nfc_dev *dev, size_t *general_bytes_len);
int nfc_llcp_data_received(struct nfc_dev *dev, struct sk_buff *skb);
struct nfc_llcp_local *nfc_llcp_find_local(struct nfc_dev *dev);
diff --git a/net/nfc/rawsock.c b/net/nfc/rawsock.c
index 5f1d438a0a23..0ca214ab5aef 100644
--- a/net/nfc/rawsock.c
+++ b/net/nfc/rawsock.c
@@ -49,7 +49,7 @@ static void rawsock_report_error(struct sock *sk, int err)
sk->sk_shutdown = SHUTDOWN_MASK;
sk->sk_err = -err;
- sk->sk_error_report(sk);
+ sk_error_report(sk);
rawsock_write_queue_purge(sk);
}
@@ -140,7 +140,7 @@ static void rawsock_data_exchange_complete(void *context, struct sk_buff *skb,
{
struct sock *sk = (struct sock *) context;
- BUG_ON(in_irq());
+ BUG_ON(in_hardirq());
pr_debug("sk=%p err=%d\n", sk, err);
diff --git a/net/openvswitch/Makefile b/net/openvswitch/Makefile
index 41109c326f3a..28982630bef3 100644
--- a/net/openvswitch/Makefile
+++ b/net/openvswitch/Makefile
@@ -13,6 +13,7 @@ openvswitch-y := \
flow_netlink.o \
flow_table.o \
meter.o \
+ openvswitch_trace.o \
vport.o \
vport-internal_dev.o \
vport-netdev.o
@@ -24,3 +25,5 @@ endif
obj-$(CONFIG_OPENVSWITCH_VXLAN)+= vport-vxlan.o
obj-$(CONFIG_OPENVSWITCH_GENEVE)+= vport-geneve.o
obj-$(CONFIG_OPENVSWITCH_GRE) += vport-gre.o
+
+CFLAGS_openvswitch_trace.o = -I$(src)
diff --git a/net/openvswitch/actions.c b/net/openvswitch/actions.c
index 77d924ab8cdb..076774034bb9 100644
--- a/net/openvswitch/actions.c
+++ b/net/openvswitch/actions.c
@@ -30,6 +30,7 @@
#include "conntrack.h"
#include "vport.h"
#include "flow_netlink.h"
+#include "openvswitch_trace.h"
struct deferred_action {
struct sk_buff *skb;
@@ -923,7 +924,13 @@ static int output_userspace(struct datapath *dp, struct sk_buff *skb,
break;
case OVS_USERSPACE_ATTR_PID:
- upcall.portid = nla_get_u32(a);
+ if (dp->user_features &
+ OVS_DP_F_DISPATCH_UPCALL_PER_CPU)
+ upcall.portid =
+ ovs_dp_get_upcall_portid(dp,
+ smp_processor_id());
+ else
+ upcall.portid = nla_get_u32(a);
break;
case OVS_USERSPACE_ATTR_EGRESS_TUN_PORT: {
@@ -1242,6 +1249,9 @@ static int do_execute_actions(struct datapath *dp, struct sk_buff *skb,
a = nla_next(a, &rem)) {
int err = 0;
+ if (trace_ovs_do_execute_action_enabled())
+ trace_ovs_do_execute_action(dp, skb, key, a, rem);
+
switch (nla_type(a)) {
case OVS_ACTION_ATTR_OUTPUT: {
int port = nla_get_u32(a);
diff --git a/net/openvswitch/conntrack.c b/net/openvswitch/conntrack.c
index cadb6a29b285..1b5eae57bc90 100644
--- a/net/openvswitch/conntrack.c
+++ b/net/openvswitch/conntrack.c
@@ -967,8 +967,7 @@ static int __ovs_ct_lookup(struct net *net, struct sw_flow_key *key,
/* Associate skb with specified zone. */
if (tmpl) {
- if (skb_nfct(skb))
- nf_conntrack_put(skb_nfct(skb));
+ nf_conntrack_put(skb_nfct(skb));
nf_conntrack_get(&tmpl->ct_general);
nf_ct_set(skb, tmpl, IP_CT_NEW);
}
@@ -1329,11 +1328,9 @@ int ovs_ct_execute(struct net *net, struct sk_buff *skb,
int ovs_ct_clear(struct sk_buff *skb, struct sw_flow_key *key)
{
- if (skb_nfct(skb)) {
- nf_conntrack_put(skb_nfct(skb));
- nf_ct_set(skb, NULL, IP_CT_UNTRACKED);
- ovs_ct_fill_key(skb, key, false);
- }
+ nf_conntrack_put(skb_nfct(skb));
+ nf_ct_set(skb, NULL, IP_CT_UNTRACKED);
+ ovs_ct_fill_key(skb, key, false);
return 0;
}
diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c
index 9d6ef6cb9b26..67ad08320886 100644
--- a/net/openvswitch/datapath.c
+++ b/net/openvswitch/datapath.c
@@ -43,6 +43,7 @@
#include "flow_table.h"
#include "flow_netlink.h"
#include "meter.h"
+#include "openvswitch_trace.h"
#include "vport-internal_dev.h"
#include "vport-netdev.h"
@@ -132,6 +133,8 @@ static int queue_userspace_packet(struct datapath *dp, struct sk_buff *,
static void ovs_dp_masks_rebalance(struct work_struct *work);
+static int ovs_dp_set_upcall_portids(struct datapath *, const struct nlattr *);
+
/* Must be called with rcu_read_lock or ovs_mutex. */
const char *ovs_dp_name(const struct datapath *dp)
{
@@ -165,6 +168,7 @@ static void destroy_dp_rcu(struct rcu_head *rcu)
free_percpu(dp->stats_percpu);
kfree(dp->ports);
ovs_meters_exit(dp);
+ kfree(rcu_dereference_raw(dp->upcall_portids));
kfree(dp);
}
@@ -238,7 +242,13 @@ void ovs_dp_process_packet(struct sk_buff *skb, struct sw_flow_key *key)
memset(&upcall, 0, sizeof(upcall));
upcall.cmd = OVS_PACKET_CMD_MISS;
- upcall.portid = ovs_vport_find_upcall_portid(p, skb);
+
+ if (dp->user_features & OVS_DP_F_DISPATCH_UPCALL_PER_CPU)
+ upcall.portid =
+ ovs_dp_get_upcall_portid(dp, smp_processor_id());
+ else
+ upcall.portid = ovs_vport_find_upcall_portid(p, skb);
+
upcall.mru = OVS_CB(skb)->mru;
error = ovs_dp_upcall(dp, skb, key, &upcall, 0);
if (unlikely(error))
@@ -275,6 +285,9 @@ int ovs_dp_upcall(struct datapath *dp, struct sk_buff *skb,
struct dp_stats_percpu *stats;
int err;
+ if (trace_ovs_dp_upcall_enabled())
+ trace_ovs_dp_upcall(dp, skb, key, upcall_info);
+
if (upcall_info->portid == 0) {
err = -ENOTCONN;
goto err;
@@ -1590,16 +1603,70 @@ static void ovs_dp_reset_user_features(struct sk_buff *skb,
DEFINE_STATIC_KEY_FALSE(tc_recirc_sharing_support);
+static int ovs_dp_set_upcall_portids(struct datapath *dp,
+ const struct nlattr *ids)
+{
+ struct dp_nlsk_pids *old, *dp_nlsk_pids;
+
+ if (!nla_len(ids) || nla_len(ids) % sizeof(u32))
+ return -EINVAL;
+
+ old = ovsl_dereference(dp->upcall_portids);
+
+ dp_nlsk_pids = kmalloc(sizeof(*dp_nlsk_pids) + nla_len(ids),
+ GFP_KERNEL);
+ if (!dp_nlsk_pids)
+ return -ENOMEM;
+
+ dp_nlsk_pids->n_pids = nla_len(ids) / sizeof(u32);
+ nla_memcpy(dp_nlsk_pids->pids, ids, nla_len(ids));
+
+ rcu_assign_pointer(dp->upcall_portids, dp_nlsk_pids);
+
+ kfree_rcu(old, rcu);
+
+ return 0;
+}
+
+u32 ovs_dp_get_upcall_portid(const struct datapath *dp, uint32_t cpu_id)
+{
+ struct dp_nlsk_pids *dp_nlsk_pids;
+
+ dp_nlsk_pids = rcu_dereference(dp->upcall_portids);
+
+ if (dp_nlsk_pids) {
+ if (cpu_id < dp_nlsk_pids->n_pids) {
+ return dp_nlsk_pids->pids[cpu_id];
+ } else if (dp_nlsk_pids->n_pids > 0 &&
+ cpu_id >= dp_nlsk_pids->n_pids) {
+ /* If the number of netlink PIDs is mismatched with
+ * the number of CPUs as seen by the kernel, log this
+ * and send the upcall to an arbitrary socket (0) in
+ * order to not drop packets
+ */
+ pr_info_ratelimited("cpu_id mismatch with handler threads");
+ return dp_nlsk_pids->pids[cpu_id %
+ dp_nlsk_pids->n_pids];
+ } else {
+ return 0;
+ }
+ } else {
+ return 0;
+ }
+}
+
static int ovs_dp_change(struct datapath *dp, struct nlattr *a[])
{
u32 user_features = 0;
+ int err;
if (a[OVS_DP_ATTR_USER_FEATURES]) {
user_features = nla_get_u32(a[OVS_DP_ATTR_USER_FEATURES]);
if (user_features & ~(OVS_DP_F_VPORT_PIDS |
OVS_DP_F_UNALIGNED |
- OVS_DP_F_TC_RECIRC_SHARING))
+ OVS_DP_F_TC_RECIRC_SHARING |
+ OVS_DP_F_DISPATCH_UPCALL_PER_CPU))
return -EOPNOTSUPP;
#if !IS_ENABLED(CONFIG_NET_TC_SKB_EXT)
@@ -1620,6 +1687,15 @@ static int ovs_dp_change(struct datapath *dp, struct nlattr *a[])
dp->user_features = user_features;
+ if (dp->user_features & OVS_DP_F_DISPATCH_UPCALL_PER_CPU &&
+ a[OVS_DP_ATTR_PER_CPU_PIDS]) {
+ /* Upcall Netlink Port IDs have been updated */
+ err = ovs_dp_set_upcall_portids(dp,
+ a[OVS_DP_ATTR_PER_CPU_PIDS]);
+ if (err)
+ return err;
+ }
+
if (dp->user_features & OVS_DP_F_TC_RECIRC_SHARING)
static_branch_enable(&tc_recirc_sharing_support);
else
diff --git a/net/openvswitch/datapath.h b/net/openvswitch/datapath.h
index 38f7d3e66ca6..fcfe6cb46441 100644
--- a/net/openvswitch/datapath.h
+++ b/net/openvswitch/datapath.h
@@ -51,6 +51,21 @@ struct dp_stats_percpu {
};
/**
+ * struct dp_nlsk_pids - array of netlink portids of for a datapath.
+ * This is used when OVS_DP_F_DISPATCH_UPCALL_PER_CPU
+ * is enabled and must be protected by rcu.
+ * @rcu: RCU callback head for deferred destruction.
+ * @n_pids: Size of @pids array.
+ * @pids: Array storing the Netlink socket PIDs indexed by CPU ID for packets
+ * that miss the flow table.
+ */
+struct dp_nlsk_pids {
+ struct rcu_head rcu;
+ u32 n_pids;
+ u32 pids[];
+};
+
+/**
* struct datapath - datapath for flow-based packet switching
* @rcu: RCU callback head for deferred destruction.
* @list_node: Element in global 'dps' list.
@@ -61,6 +76,7 @@ struct dp_stats_percpu {
* @net: Reference to net namespace.
* @max_headroom: the maximum headroom of all vports in this datapath; it will
* be used by all the internal vports in this dp.
+ * @upcall_portids: RCU protected 'struct dp_nlsk_pids'.
*
* Context: See the comment on locking at the top of datapath.c for additional
* locking information.
@@ -87,6 +103,8 @@ struct datapath {
/* Switch meters. */
struct dp_meter_table meter_tbl;
+
+ struct dp_nlsk_pids __rcu *upcall_portids;
};
/**
@@ -243,6 +261,8 @@ int ovs_dp_upcall(struct datapath *, struct sk_buff *,
const struct sw_flow_key *, const struct dp_upcall_info *,
uint32_t cutlen);
+u32 ovs_dp_get_upcall_portid(const struct datapath *dp, uint32_t cpu_id);
+
const char *ovs_dp_name(const struct datapath *dp);
struct sk_buff *ovs_vport_cmd_build_info(struct vport *vport, struct net *net,
u32 portid, u32 seq, u8 cmd);
diff --git a/net/openvswitch/flow.c b/net/openvswitch/flow.c
index e586424d8b04..9713035b89e3 100644
--- a/net/openvswitch/flow.c
+++ b/net/openvswitch/flow.c
@@ -293,14 +293,14 @@ static bool icmp6hdr_ok(struct sk_buff *skb)
}
/**
- * Parse vlan tag from vlan header.
+ * parse_vlan_tag - Parse vlan tag from vlan header.
* @skb: skb containing frame to parse
* @key_vh: pointer to parsed vlan tag
* @untag_vlan: should the vlan header be removed from the frame
*
- * Returns ERROR on memory error.
- * Returns 0 if it encounters a non-vlan or incomplete packet.
- * Returns 1 after successfully parsing vlan tag.
+ * Return: ERROR on memory error.
+ * %0 if it encounters a non-vlan or incomplete packet.
+ * %1 after successfully parsing vlan tag.
*/
static int parse_vlan_tag(struct sk_buff *skb, struct vlan_head *key_vh,
bool untag_vlan)
@@ -532,6 +532,7 @@ static int parse_nsh(struct sk_buff *skb, struct sw_flow_key *key)
* L3 header
* @key: output flow key
*
+ * Return: %0 if successful, otherwise a negative errno value.
*/
static int key_extract_l3l4(struct sk_buff *skb, struct sw_flow_key *key)
{
@@ -748,8 +749,6 @@ static int key_extract_l3l4(struct sk_buff *skb, struct sw_flow_key *key)
*
* The caller must ensure that skb->len >= ETH_HLEN.
*
- * Returns 0 if successful, otherwise a negative errno value.
- *
* Initializes @skb header fields as follows:
*
* - skb->mac_header: the L2 header.
@@ -764,6 +763,8 @@ static int key_extract_l3l4(struct sk_buff *skb, struct sw_flow_key *key)
*
* - skb->protocol: the type of the data starting at skb->network_header.
* Equals to key->eth.type.
+ *
+ * Return: %0 if successful, otherwise a negative errno value.
*/
static int key_extract(struct sk_buff *skb, struct sw_flow_key *key)
{
diff --git a/net/openvswitch/flow_table.c b/net/openvswitch/flow_table.c
index c89c8da99f1a..d4a2db0b2299 100644
--- a/net/openvswitch/flow_table.c
+++ b/net/openvswitch/flow_table.c
@@ -670,13 +670,13 @@ static bool cmp_key(const struct sw_flow_key *key1,
{
const long *cp1 = (const long *)((const u8 *)key1 + key_start);
const long *cp2 = (const long *)((const u8 *)key2 + key_start);
- long diffs = 0;
int i;
for (i = key_start; i < key_end; i += sizeof(long))
- diffs |= *cp1++ ^ *cp2++;
+ if (*cp1++ ^ *cp2++)
+ return false;
- return diffs == 0;
+ return true;
}
static bool flow_cmp_masked_key(const struct sw_flow *flow,
diff --git a/net/openvswitch/openvswitch_trace.c b/net/openvswitch/openvswitch_trace.c
new file mode 100644
index 000000000000..62c5f7d6f023
--- /dev/null
+++ b/net/openvswitch/openvswitch_trace.c
@@ -0,0 +1,10 @@
+// SPDX-License-Identifier: GPL-2.0
+/* bug in tracepoint.h, it should include this */
+#include <linux/module.h>
+
+/* sparse isn't too happy with all macros... */
+#ifndef __CHECKER__
+#define CREATE_TRACE_POINTS
+#include "openvswitch_trace.h"
+
+#endif
diff --git a/net/openvswitch/openvswitch_trace.h b/net/openvswitch/openvswitch_trace.h
new file mode 100644
index 000000000000..3eb35d9eb700
--- /dev/null
+++ b/net/openvswitch/openvswitch_trace.h
@@ -0,0 +1,158 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM openvswitch
+
+#if !defined(_TRACE_OPENVSWITCH_H) || defined(TRACE_HEADER_MULTI_READ)
+#define _TRACE_OPENVSWITCH_H
+
+#include <linux/tracepoint.h>
+
+#include "datapath.h"
+
+TRACE_EVENT(ovs_do_execute_action,
+
+ TP_PROTO(struct datapath *dp, struct sk_buff *skb,
+ struct sw_flow_key *key, const struct nlattr *a, int rem),
+
+ TP_ARGS(dp, skb, key, a, rem),
+
+ TP_STRUCT__entry(
+ __field( void *, dpaddr )
+ __string( dp_name, ovs_dp_name(dp) )
+ __string( dev_name, skb->dev->name )
+ __field( void *, skbaddr )
+ __field( unsigned int, len )
+ __field( unsigned int, data_len )
+ __field( unsigned int, truesize )
+ __field( u8, nr_frags )
+ __field( u16, gso_size )
+ __field( u16, gso_type )
+ __field( u32, ovs_flow_hash )
+ __field( u32, recirc_id )
+ __field( void *, keyaddr )
+ __field( u16, key_eth_type )
+ __field( u8, key_ct_state )
+ __field( u8, key_ct_orig_proto )
+ __field( u16, key_ct_zone )
+ __field( unsigned int, flow_key_valid )
+ __field( u8, action_type )
+ __field( unsigned int, action_len )
+ __field( void *, action_data )
+ __field( u8, is_last )
+ ),
+
+ TP_fast_assign(
+ __entry->dpaddr = dp;
+ __assign_str(dp_name, ovs_dp_name(dp));
+ __assign_str(dev_name, skb->dev->name);
+ __entry->skbaddr = skb;
+ __entry->len = skb->len;
+ __entry->data_len = skb->data_len;
+ __entry->truesize = skb->truesize;
+ __entry->nr_frags = skb_shinfo(skb)->nr_frags;
+ __entry->gso_size = skb_shinfo(skb)->gso_size;
+ __entry->gso_type = skb_shinfo(skb)->gso_type;
+ __entry->ovs_flow_hash = key->ovs_flow_hash;
+ __entry->recirc_id = key->recirc_id;
+ __entry->keyaddr = key;
+ __entry->key_eth_type = key->eth.type;
+ __entry->key_ct_state = key->ct_state;
+ __entry->key_ct_orig_proto = key->ct_orig_proto;
+ __entry->key_ct_zone = key->ct_zone;
+ __entry->flow_key_valid = !(key->mac_proto & SW_FLOW_KEY_INVALID);
+ __entry->action_type = nla_type(a);
+ __entry->action_len = nla_len(a);
+ __entry->action_data = nla_data(a);
+ __entry->is_last = nla_is_last(a, rem);
+ ),
+
+ TP_printk("dpaddr=%p dp_name=%s dev=%s skbaddr=%p len=%u data_len=%u truesize=%u nr_frags=%d gso_size=%d gso_type=%#x ovs_flow_hash=0x%08x recirc_id=0x%08x keyaddr=%p eth_type=0x%04x ct_state=%02x ct_orig_proto=%02x ct_Zone=%04x flow_key_valid=%d action_type=%u action_len=%u action_data=%p is_last=%d",
+ __entry->dpaddr, __get_str(dp_name), __get_str(dev_name),
+ __entry->skbaddr, __entry->len, __entry->data_len,
+ __entry->truesize, __entry->nr_frags, __entry->gso_size,
+ __entry->gso_type, __entry->ovs_flow_hash,
+ __entry->recirc_id, __entry->keyaddr, __entry->key_eth_type,
+ __entry->key_ct_state, __entry->key_ct_orig_proto,
+ __entry->key_ct_zone,
+ __entry->flow_key_valid,
+ __entry->action_type, __entry->action_len,
+ __entry->action_data, __entry->is_last)
+);
+
+TRACE_EVENT(ovs_dp_upcall,
+
+ TP_PROTO(struct datapath *dp, struct sk_buff *skb,
+ const struct sw_flow_key *key,
+ const struct dp_upcall_info *upcall_info),
+
+ TP_ARGS(dp, skb, key, upcall_info),
+
+ TP_STRUCT__entry(
+ __field( void *, dpaddr )
+ __string( dp_name, ovs_dp_name(dp) )
+ __string( dev_name, skb->dev->name )
+ __field( void *, skbaddr )
+ __field( unsigned int, len )
+ __field( unsigned int, data_len )
+ __field( unsigned int, truesize )
+ __field( u8, nr_frags )
+ __field( u16, gso_size )
+ __field( u16, gso_type )
+ __field( u32, ovs_flow_hash )
+ __field( u32, recirc_id )
+ __field( const void *, keyaddr )
+ __field( u16, key_eth_type )
+ __field( u8, key_ct_state )
+ __field( u8, key_ct_orig_proto )
+ __field( u16, key_ct_zone )
+ __field( unsigned int, flow_key_valid )
+ __field( u8, upcall_cmd )
+ __field( u32, upcall_port )
+ __field( u16, upcall_mru )
+ ),
+
+ TP_fast_assign(
+ __entry->dpaddr = dp;
+ __assign_str(dp_name, ovs_dp_name(dp));
+ __assign_str(dev_name, skb->dev->name);
+ __entry->skbaddr = skb;
+ __entry->len = skb->len;
+ __entry->data_len = skb->data_len;
+ __entry->truesize = skb->truesize;
+ __entry->nr_frags = skb_shinfo(skb)->nr_frags;
+ __entry->gso_size = skb_shinfo(skb)->gso_size;
+ __entry->gso_type = skb_shinfo(skb)->gso_type;
+ __entry->ovs_flow_hash = key->ovs_flow_hash;
+ __entry->recirc_id = key->recirc_id;
+ __entry->keyaddr = key;
+ __entry->key_eth_type = key->eth.type;
+ __entry->key_ct_state = key->ct_state;
+ __entry->key_ct_orig_proto = key->ct_orig_proto;
+ __entry->key_ct_zone = key->ct_zone;
+ __entry->flow_key_valid = !(key->mac_proto & SW_FLOW_KEY_INVALID);
+ __entry->upcall_cmd = upcall_info->cmd;
+ __entry->upcall_port = upcall_info->portid;
+ __entry->upcall_mru = upcall_info->mru;
+ ),
+
+ TP_printk("dpaddr=%p dp_name=%s dev=%s skbaddr=%p len=%u data_len=%u truesize=%u nr_frags=%d gso_size=%d gso_type=%#x ovs_flow_hash=0x%08x recirc_id=0x%08x keyaddr=%p eth_type=0x%04x ct_state=%02x ct_orig_proto=%02x ct_zone=%04x flow_key_valid=%d upcall_cmd=%u upcall_port=%u upcall_mru=%u",
+ __entry->dpaddr, __get_str(dp_name), __get_str(dev_name),
+ __entry->skbaddr, __entry->len, __entry->data_len,
+ __entry->truesize, __entry->nr_frags, __entry->gso_size,
+ __entry->gso_type, __entry->ovs_flow_hash,
+ __entry->recirc_id, __entry->keyaddr, __entry->key_eth_type,
+ __entry->key_ct_state, __entry->key_ct_orig_proto,
+ __entry->key_ct_zone,
+ __entry->flow_key_valid,
+ __entry->upcall_cmd, __entry->upcall_port,
+ __entry->upcall_mru)
+);
+
+#endif /* _TRACE_OPENVSWITCH_H */
+
+/* This part must be outside protection */
+#undef TRACE_INCLUDE_PATH
+#define TRACE_INCLUDE_PATH .
+#undef TRACE_INCLUDE_FILE
+#define TRACE_INCLUDE_FILE openvswitch_trace
+#include <trace/define_trace.h>
diff --git a/net/openvswitch/vport.c b/net/openvswitch/vport.c
index 88deb5b41429..cf2ce5812489 100644
--- a/net/openvswitch/vport.c
+++ b/net/openvswitch/vport.c
@@ -507,6 +507,7 @@ void ovs_vport_send(struct vport *vport, struct sk_buff *skb, u8 mac_proto)
}
skb->dev = vport->dev;
+ skb->tstamp = 0;
vport->ops->send(skb);
return;
diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index 330ba68828e7..543365f58e97 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -250,8 +250,7 @@ static struct net_device *packet_cached_dev_get(struct packet_sock *po)
rcu_read_lock();
dev = rcu_dereference(po->cached_dev);
- if (likely(dev))
- dev_hold(dev);
+ dev_hold(dev);
rcu_read_unlock();
return dev;
@@ -1656,6 +1655,7 @@ static int fanout_add(struct sock *sk, struct fanout_args *args)
case PACKET_FANOUT_ROLLOVER:
if (type_flags & PACKET_FANOUT_FLAG_ROLLOVER)
return -EINVAL;
+ break;
case PACKET_FANOUT_HASH:
case PACKET_FANOUT_LB:
case PACKET_FANOUT_CPU:
@@ -3023,8 +3023,7 @@ static int packet_snd(struct socket *sock, struct msghdr *msg, size_t len)
out_free:
kfree_skb(skb);
out_unlock:
- if (dev)
- dev_put(dev);
+ dev_put(dev);
out:
return err;
}
@@ -3157,8 +3156,7 @@ static int packet_do_bind(struct sock *sk, const char *name, int ifindex,
}
}
- if (dev)
- dev_hold(dev);
+ dev_hold(dev);
proto_curr = po->prot_hook.type;
dev_curr = po->prot_hook.dev;
@@ -3195,8 +3193,7 @@ static int packet_do_bind(struct sock *sk, const char *name, int ifindex,
packet_cached_dev_assign(po, dev);
}
}
- if (dev_curr)
- dev_put(dev_curr);
+ dev_put(dev_curr);
if (proto == 0 || !need_rehook)
goto out_unlock;
@@ -3206,7 +3203,7 @@ static int packet_do_bind(struct sock *sk, const char *name, int ifindex,
} else {
sk->sk_err = ENETDOWN;
if (!sock_flag(sk, SOCK_DEAD))
- sk->sk_error_report(sk);
+ sk_error_report(sk);
}
out_unlock:
@@ -3934,12 +3931,9 @@ packet_setsockopt(struct socket *sock, int level, int optname, sockptr_t optval,
return -EFAULT;
lock_sock(sk);
- if (po->rx_ring.pg_vec || po->tx_ring.pg_vec) {
- ret = -EBUSY;
- } else {
+ if (!po->rx_ring.pg_vec && !po->tx_ring.pg_vec)
po->tp_tx_has_off = !!val;
- ret = 0;
- }
+
release_sock(sk);
return 0;
}
@@ -4106,13 +4100,12 @@ static int packet_notifier(struct notifier_block *this,
__unregister_prot_hook(sk, false);
sk->sk_err = ENETDOWN;
if (!sock_flag(sk, SOCK_DEAD))
- sk->sk_error_report(sk);
+ sk_error_report(sk);
}
if (msg == NETDEV_UNREGISTER) {
packet_cached_dev_reset(po);
WRITE_ONCE(po->ifindex, -1);
- if (po->prot_hook.dev)
- dev_put(po->prot_hook.dev);
+ dev_put(po->prot_hook.dev);
po->prot_hook.dev = NULL;
}
spin_unlock(&po->bind_lock);
diff --git a/net/phonet/af_phonet.c b/net/phonet/af_phonet.c
index ca6ae4c59433..65218b7ce9f9 100644
--- a/net/phonet/af_phonet.c
+++ b/net/phonet/af_phonet.c
@@ -275,8 +275,7 @@ int pn_skb_send(struct sock *sk, struct sk_buff *skb,
drop:
kfree_skb(skb);
- if (dev)
- dev_put(dev);
+ dev_put(dev);
return err;
}
EXPORT_SYMBOL(pn_skb_send);
diff --git a/net/phonet/pn_dev.c b/net/phonet/pn_dev.c
index ac0fae06cc15..cde671d29d5d 100644
--- a/net/phonet/pn_dev.c
+++ b/net/phonet/pn_dev.c
@@ -122,8 +122,7 @@ struct net_device *phonet_device_get(struct net *net)
break;
dev = NULL;
}
- if (dev)
- dev_hold(dev);
+ dev_hold(dev);
rcu_read_unlock();
return dev;
}
@@ -233,11 +232,11 @@ static int phonet_device_autoconf(struct net_device *dev)
struct if_phonet_req req;
int ret;
- if (!dev->netdev_ops->ndo_do_ioctl)
+ if (!dev->netdev_ops->ndo_siocdevprivate)
return -EOPNOTSUPP;
- ret = dev->netdev_ops->ndo_do_ioctl(dev, (struct ifreq *)&req,
- SIOCPNGAUTOCONF);
+ ret = dev->netdev_ops->ndo_siocdevprivate(dev, (struct ifreq *)&req,
+ NULL, SIOCPNGAUTOCONF);
if (ret < 0)
return ret;
@@ -411,8 +410,7 @@ struct net_device *phonet_route_output(struct net *net, u8 daddr)
daddr >>= 2;
rcu_read_lock();
dev = rcu_dereference(routes->table[daddr]);
- if (dev)
- dev_hold(dev);
+ dev_hold(dev);
rcu_read_unlock();
if (!dev)
diff --git a/net/phonet/socket.c b/net/phonet/socket.c
index 2599235d592e..71e2caf6ab85 100644
--- a/net/phonet/socket.c
+++ b/net/phonet/socket.c
@@ -379,8 +379,7 @@ static int pn_socket_ioctl(struct socket *sock, unsigned int cmd,
saddr = PN_NO_ADDR;
release_sock(sk);
- if (dev)
- dev_put(dev);
+ dev_put(dev);
if (saddr == PN_NO_ADDR)
return -EHOSTUNREACH;
diff --git a/net/qrtr/ns.c b/net/qrtr/ns.c
index 8d00dfe8139e..1990d496fcfc 100644
--- a/net/qrtr/ns.c
+++ b/net/qrtr/ns.c
@@ -775,8 +775,10 @@ int qrtr_ns_init(void)
}
qrtr_ns.workqueue = alloc_workqueue("qrtr_ns_handler", WQ_UNBOUND, 1);
- if (!qrtr_ns.workqueue)
+ if (!qrtr_ns.workqueue) {
+ ret = -ENOMEM;
goto err_sock;
+ }
qrtr_ns.sock->sk->sk_data_ready = qrtr_ns_data_ready;
diff --git a/net/qrtr/qrtr.c b/net/qrtr/qrtr.c
index f2efaa4225f9..ec2322529727 100644
--- a/net/qrtr/qrtr.c
+++ b/net/qrtr/qrtr.c
@@ -493,7 +493,7 @@ int qrtr_endpoint_post(struct qrtr_endpoint *ep, const void *data, size_t len)
goto err;
}
- if (len != ALIGN(size, 4) + hdrlen)
+ if (!size || len != ALIGN(size, 4) + hdrlen)
goto err;
if (cb->dst_port != QRTR_PORT_CTRL && cb->type != QRTR_TYPE_DATA &&
@@ -506,8 +506,12 @@ int qrtr_endpoint_post(struct qrtr_endpoint *ep, const void *data, size_t len)
if (cb->type == QRTR_TYPE_NEW_SERVER) {
/* Remote node endpoint can bridge other distant nodes */
- const struct qrtr_ctrl_pkt *pkt = data + hdrlen;
+ const struct qrtr_ctrl_pkt *pkt;
+ if (size < sizeof(*pkt))
+ goto err;
+
+ pkt = data + hdrlen;
qrtr_node_assign(node, le32_to_cpu(pkt->server.node));
}
@@ -518,8 +522,10 @@ int qrtr_endpoint_post(struct qrtr_endpoint *ep, const void *data, size_t len)
if (!ipc)
goto err;
- if (sock_queue_rcv_skb(&ipc->sk, skb))
+ if (sock_queue_rcv_skb(&ipc->sk, skb)) {
+ qrtr_port_put(ipc);
goto err;
+ }
qrtr_port_put(ipc);
}
@@ -751,7 +757,7 @@ static void qrtr_reset_ports(void)
xa_for_each_start(&qrtr_ports, index, ipc, 1) {
sock_hold(&ipc->sk);
ipc->sk.sk_err = ENETRESET;
- ipc->sk.sk_error_report(&ipc->sk);
+ sk_error_report(&ipc->sk);
sock_put(&ipc->sk);
}
rcu_read_unlock();
@@ -839,6 +845,8 @@ static int qrtr_local_enqueue(struct qrtr_node *node, struct sk_buff *skb,
ipc = qrtr_port_lookup(to->sq_port);
if (!ipc || &ipc->sk == skb->sk) { /* do not send to self */
+ if (ipc)
+ qrtr_port_put(ipc);
kfree_skb(skb);
return -ENODEV;
}
@@ -1153,14 +1161,14 @@ static int qrtr_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
rc = put_user(len, (int __user *)argp);
break;
case SIOCGIFADDR:
- if (copy_from_user(&ifr, argp, sizeof(ifr))) {
+ if (get_user_ifreq(&ifr, NULL, argp)) {
rc = -EFAULT;
break;
}
sq = (struct sockaddr_qrtr *)&ifr.ifr_addr;
*sq = ipc->us;
- if (copy_to_user(argp, &ifr, sizeof(ifr))) {
+ if (put_user_ifreq(&ifr, argp)) {
rc = -EFAULT;
break;
}
diff --git a/net/rds/ib_frmr.c b/net/rds/ib_frmr.c
index 9b6ffff72f2d..28c1b0022178 100644
--- a/net/rds/ib_frmr.c
+++ b/net/rds/ib_frmr.c
@@ -131,9 +131,9 @@ static int rds_ib_post_reg_frmr(struct rds_ib_mr *ibmr)
cpu_relax();
}
- ret = ib_map_mr_sg_zbva(frmr->mr, ibmr->sg, ibmr->sg_len,
+ ret = ib_map_mr_sg_zbva(frmr->mr, ibmr->sg, ibmr->sg_dma_len,
&off, PAGE_SIZE);
- if (unlikely(ret != ibmr->sg_len))
+ if (unlikely(ret != ibmr->sg_dma_len))
return ret < 0 ? ret : -EINVAL;
if (cmpxchg(&frmr->fr_state,
diff --git a/net/rds/ib_ring.c b/net/rds/ib_ring.c
index ff97e8eda858..006b2e441418 100644
--- a/net/rds/ib_ring.c
+++ b/net/rds/ib_ring.c
@@ -141,7 +141,7 @@ int rds_ib_ring_low(struct rds_ib_work_ring *ring)
}
/*
- * returns the oldest alloced ring entry. This will be the next one
+ * returns the oldest allocated ring entry. This will be the next one
* freed. This can't be called if there are none allocated.
*/
u32 rds_ib_ring_oldest(struct rds_ib_work_ring *ring)
diff --git a/net/rds/tcp_connect.c b/net/rds/tcp_connect.c
index 4e64598176b0..5461d77fff4f 100644
--- a/net/rds/tcp_connect.c
+++ b/net/rds/tcp_connect.c
@@ -78,6 +78,7 @@ void rds_tcp_state_change(struct sock *sk)
case TCP_CLOSE_WAIT:
case TCP_CLOSE:
rds_conn_path_drop(cp, false);
+ break;
default:
break;
}
diff --git a/net/rds/tcp_recv.c b/net/rds/tcp_recv.c
index 42c5ff1eda95..f4ee13da90c7 100644
--- a/net/rds/tcp_recv.c
+++ b/net/rds/tcp_recv.c
@@ -177,7 +177,7 @@ static int rds_tcp_data_recv(read_descriptor_t *desc, struct sk_buff *skb,
goto out;
}
tc->t_tinc = tinc;
- rdsdebug("alloced tinc %p\n", tinc);
+ rdsdebug("allocated tinc %p\n", tinc);
rds_inc_path_init(&tinc->ti_inc, cp,
&cp->cp_conn->c_faddr);
tinc->ti_inc.i_rx_lat_trace[RDS_MSG_RX_HDR] =
diff --git a/net/rds/threads.c b/net/rds/threads.c
index 32dc50f0a303..1f424cbfcbb4 100644
--- a/net/rds/threads.c
+++ b/net/rds/threads.c
@@ -208,6 +208,7 @@ void rds_send_worker(struct work_struct *work)
case -ENOMEM:
rds_stats_inc(s_send_delayed_retry);
queue_delayed_work(rds_wq, &cp->cp_send_w, 2);
+ break;
default:
break;
}
@@ -232,6 +233,7 @@ void rds_recv_worker(struct work_struct *work)
case -ENOMEM:
rds_stats_inc(s_recv_delayed_retry);
queue_delayed_work(rds_wq, &cp->cp_recv_w, 2);
+ break;
default:
break;
}
diff --git a/net/rxrpc/Kconfig b/net/rxrpc/Kconfig
index 0885b22e5c0e..accd35c05577 100644
--- a/net/rxrpc/Kconfig
+++ b/net/rxrpc/Kconfig
@@ -21,6 +21,8 @@ config AF_RXRPC
See Documentation/networking/rxrpc.rst.
+if AF_RXRPC
+
config AF_RXRPC_IPV6
bool "IPv6 support for RxRPC"
depends on (IPV6 = m && AF_RXRPC = m) || (IPV6 = y && AF_RXRPC)
@@ -30,7 +32,6 @@ config AF_RXRPC_IPV6
config AF_RXRPC_INJECT_LOSS
bool "Inject packet loss into RxRPC packet stream"
- depends on AF_RXRPC
help
Say Y here to inject packet loss by discarding some received and some
transmitted packets.
@@ -38,7 +39,6 @@ config AF_RXRPC_INJECT_LOSS
config AF_RXRPC_DEBUG
bool "RxRPC dynamic debugging"
- depends on AF_RXRPC
help
Say Y here to make runtime controllable debugging messages appear.
@@ -47,7 +47,6 @@ config AF_RXRPC_DEBUG
config RXKAD
bool "RxRPC Kerberos security"
- depends on AF_RXRPC
select CRYPTO
select CRYPTO_MANAGER
select CRYPTO_SKCIPHER
@@ -58,3 +57,5 @@ config RXKAD
through the use of the key retention service.
See Documentation/networking/rxrpc.rst.
+
+endif
diff --git a/net/rxrpc/af_rxrpc.c b/net/rxrpc/af_rxrpc.c
index 41671af6b33f..2b5f89713e36 100644
--- a/net/rxrpc/af_rxrpc.c
+++ b/net/rxrpc/af_rxrpc.c
@@ -471,6 +471,7 @@ static int rxrpc_connect(struct socket *sock, struct sockaddr *addr,
switch (rx->sk.sk_state) {
case RXRPC_UNBOUND:
rx->sk.sk_state = RXRPC_CLIENT_UNBOUND;
+ break;
case RXRPC_CLIENT_UNBOUND:
case RXRPC_CLIENT_BOUND:
break;
diff --git a/net/rxrpc/local_event.c b/net/rxrpc/local_event.c
index 3ce6d628cd75..19e929c7c38b 100644
--- a/net/rxrpc/local_event.c
+++ b/net/rxrpc/local_event.c
@@ -77,7 +77,7 @@ static void rxrpc_send_version_request(struct rxrpc_local *local,
}
/*
- * Process event packets targetted at a local endpoint.
+ * Process event packets targeted at a local endpoint.
*/
void rxrpc_process_local_events(struct rxrpc_local *local)
{
diff --git a/net/sched/act_api.c b/net/sched/act_api.c
index f6d5755d669e..7dd3a2dc5fa4 100644
--- a/net/sched/act_api.c
+++ b/net/sched/act_api.c
@@ -381,7 +381,8 @@ static int tcf_del_walker(struct tcf_idrinfo *idrinfo, struct sk_buff *skb,
}
mutex_unlock(&idrinfo->lock);
- if (nla_put_u32(skb, TCA_FCNT, n_i))
+ ret = nla_put_u32(skb, TCA_FCNT, n_i);
+ if (ret)
goto nla_put_failure;
nla_nest_end(skb, nest);
@@ -494,7 +495,7 @@ int tcf_idr_create(struct tc_action_net *tn, u32 index, struct nlattr *est,
p->tcfa_tm.install = jiffies;
p->tcfa_tm.lastuse = jiffies;
p->tcfa_tm.firstuse = 0;
- p->tcfa_flags = flags;
+ p->tcfa_flags = flags & TCA_ACT_FLAGS_USER_MASK;
if (est) {
err = gen_new_estimator(&p->tcfa_bstats, p->cpu_bstats,
&p->tcfa_rate_est,
@@ -940,7 +941,7 @@ void tcf_idr_insert_many(struct tc_action *actions[])
}
}
-struct tc_action_ops *tc_action_load_ops(char *name, struct nlattr *nla,
+struct tc_action_ops *tc_action_load_ops(struct nlattr *nla, bool police,
bool rtnl_held,
struct netlink_ext_ack *extack)
{
@@ -950,7 +951,7 @@ struct tc_action_ops *tc_action_load_ops(char *name, struct nlattr *nla,
struct nlattr *kind;
int err;
- if (name == NULL) {
+ if (!police) {
err = nla_parse_nested_deprecated(tb, TCA_ACT_MAX, nla,
tcf_action_policy, extack);
if (err < 0)
@@ -966,7 +967,7 @@ struct tc_action_ops *tc_action_load_ops(char *name, struct nlattr *nla,
return ERR_PTR(err);
}
} else {
- if (strlcpy(act_name, name, IFNAMSIZ) >= IFNAMSIZ) {
+ if (strlcpy(act_name, "police", IFNAMSIZ) >= IFNAMSIZ) {
NL_SET_ERR_MSG(extack, "TC action name too long");
return ERR_PTR(-EINVAL);
}
@@ -1003,12 +1004,11 @@ struct tc_action_ops *tc_action_load_ops(char *name, struct nlattr *nla,
struct tc_action *tcf_action_init_1(struct net *net, struct tcf_proto *tp,
struct nlattr *nla, struct nlattr *est,
- char *name, int ovr, int bind,
struct tc_action_ops *a_o, int *init_res,
- bool rtnl_held,
- struct netlink_ext_ack *extack)
+ u32 flags, struct netlink_ext_ack *extack)
{
- struct nla_bitfield32 flags = { 0, 0 };
+ bool police = flags & TCA_ACT_FLAGS_POLICE;
+ struct nla_bitfield32 userflags = { 0, 0 };
u8 hw_stats = TCA_ACT_HW_STATS_ANY;
struct nlattr *tb[TCA_ACT_MAX + 1];
struct tc_cookie *cookie = NULL;
@@ -1016,7 +1016,7 @@ struct tc_action *tcf_action_init_1(struct net *net, struct tcf_proto *tp,
int err;
/* backward compatibility for policer */
- if (name == NULL) {
+ if (!police) {
err = nla_parse_nested_deprecated(tb, TCA_ACT_MAX, nla,
tcf_action_policy, extack);
if (err < 0)
@@ -1031,22 +1031,22 @@ struct tc_action *tcf_action_init_1(struct net *net, struct tcf_proto *tp,
}
hw_stats = tcf_action_hw_stats_get(tb[TCA_ACT_HW_STATS]);
if (tb[TCA_ACT_FLAGS])
- flags = nla_get_bitfield32(tb[TCA_ACT_FLAGS]);
+ userflags = nla_get_bitfield32(tb[TCA_ACT_FLAGS]);
- err = a_o->init(net, tb[TCA_ACT_OPTIONS], est, &a, ovr, bind,
- rtnl_held, tp, flags.value, extack);
+ err = a_o->init(net, tb[TCA_ACT_OPTIONS], est, &a, tp,
+ userflags.value | flags, extack);
} else {
- err = a_o->init(net, nla, est, &a, ovr, bind, rtnl_held,
- tp, flags.value, extack);
+ err = a_o->init(net, nla, est, &a, tp, userflags.value | flags,
+ extack);
}
if (err < 0)
goto err_out;
*init_res = err;
- if (!name && tb[TCA_ACT_COOKIE])
+ if (!police && tb[TCA_ACT_COOKIE])
tcf_set_action_cookie(&a->act_cookie, cookie);
- if (!name)
+ if (!police)
a->hw_stats = hw_stats;
return a;
@@ -1062,9 +1062,9 @@ err_out:
/* Returns numbers of initialized actions or negative error. */
int tcf_action_init(struct net *net, struct tcf_proto *tp, struct nlattr *nla,
- struct nlattr *est, char *name, int ovr, int bind,
- struct tc_action *actions[], int init_res[], size_t *attr_size,
- bool rtnl_held, struct netlink_ext_ack *extack)
+ struct nlattr *est, struct tc_action *actions[],
+ int init_res[], size_t *attr_size, u32 flags,
+ struct netlink_ext_ack *extack)
{
struct tc_action_ops *ops[TCA_ACT_MAX_PRIO] = {};
struct nlattr *tb[TCA_ACT_MAX_PRIO + 1];
@@ -1081,7 +1081,9 @@ int tcf_action_init(struct net *net, struct tcf_proto *tp, struct nlattr *nla,
for (i = 1; i <= TCA_ACT_MAX_PRIO && tb[i]; i++) {
struct tc_action_ops *a_o;
- a_o = tc_action_load_ops(name, tb[i], rtnl_held, extack);
+ a_o = tc_action_load_ops(tb[i], flags & TCA_ACT_FLAGS_POLICE,
+ !(flags & TCA_ACT_FLAGS_NO_RTNL),
+ extack);
if (IS_ERR(a_o)) {
err = PTR_ERR(a_o);
goto err_mod;
@@ -1090,9 +1092,8 @@ int tcf_action_init(struct net *net, struct tcf_proto *tp, struct nlattr *nla,
}
for (i = 1; i <= TCA_ACT_MAX_PRIO && tb[i]; i++) {
- act = tcf_action_init_1(net, tp, tb[i], est, name, ovr, bind,
- ops[i - 1], &init_res[i - 1], rtnl_held,
- extack);
+ act = tcf_action_init_1(net, tp, tb[i], est, ops[i - 1],
+ &init_res[i - 1], flags, extack);
if (IS_ERR(act)) {
err = PTR_ERR(act);
goto err;
@@ -1112,7 +1113,7 @@ int tcf_action_init(struct net *net, struct tcf_proto *tp, struct nlattr *nla,
goto err_mod;
err:
- tcf_action_destroy(actions, bind);
+ tcf_action_destroy(actions, flags & TCA_ACT_FLAGS_BIND);
err_mod:
for (i = 0; i < TCA_ACT_MAX_PRIO; i++) {
if (ops[i])
@@ -1350,8 +1351,6 @@ static int tca_action_flush(struct net *net, struct nlattr *nla,
module_put(ops->owner);
err = rtnetlink_send(skb, net, portid, RTNLGRP_TC,
n->nlmsg_flags & NLM_F_ECHO);
- if (err > 0)
- return 0;
if (err < 0)
NL_SET_ERR_MSG(extack, "Failed to send TC action flush notification");
@@ -1422,8 +1421,6 @@ tcf_del_notify(struct net *net, struct nlmsghdr *n, struct tc_action *actions[],
ret = rtnetlink_send(skb, net, portid, RTNLGRP_TC,
n->nlmsg_flags & NLM_F_ECHO);
- if (ret > 0)
- return 0;
return ret;
}
@@ -1480,7 +1477,6 @@ tcf_add_notify(struct net *net, struct nlmsghdr *n, struct tc_action *actions[],
u32 portid, size_t attr_size, struct netlink_ext_ack *extack)
{
struct sk_buff *skb;
- int err = 0;
skb = alloc_skb(attr_size <= NLMSG_GOODSIZE ? NLMSG_GOODSIZE : attr_size,
GFP_KERNEL);
@@ -1494,15 +1490,12 @@ tcf_add_notify(struct net *net, struct nlmsghdr *n, struct tc_action *actions[],
return -EINVAL;
}
- err = rtnetlink_send(skb, net, portid, RTNLGRP_TC,
- n->nlmsg_flags & NLM_F_ECHO);
- if (err > 0)
- err = 0;
- return err;
+ return rtnetlink_send(skb, net, portid, RTNLGRP_TC,
+ n->nlmsg_flags & NLM_F_ECHO);
}
static int tcf_action_add(struct net *net, struct nlattr *nla,
- struct nlmsghdr *n, u32 portid, int ovr,
+ struct nlmsghdr *n, u32 portid, u32 flags,
struct netlink_ext_ack *extack)
{
size_t attr_size = 0;
@@ -1511,8 +1504,8 @@ static int tcf_action_add(struct net *net, struct nlattr *nla,
int init_res[TCA_ACT_MAX_PRIO] = {};
for (loop = 0; loop < 10; loop++) {
- ret = tcf_action_init(net, NULL, nla, NULL, NULL, ovr, 0,
- actions, init_res, &attr_size, true, extack);
+ ret = tcf_action_init(net, NULL, nla, NULL, actions, init_res,
+ &attr_size, flags, extack);
if (ret != -EAGAIN)
break;
}
@@ -1542,7 +1535,8 @@ static int tc_ctl_action(struct sk_buff *skb, struct nlmsghdr *n,
struct net *net = sock_net(skb->sk);
struct nlattr *tca[TCA_ROOT_MAX + 1];
u32 portid = NETLINK_CB(skb).portid;
- int ret = 0, ovr = 0;
+ u32 flags = 0;
+ int ret = 0;
if ((n->nlmsg_type != RTM_GETACTION) &&
!netlink_capable(skb, CAP_NET_ADMIN))
@@ -1568,8 +1562,8 @@ static int tc_ctl_action(struct sk_buff *skb, struct nlmsghdr *n,
* is zero) then just set this
*/
if (n->nlmsg_flags & NLM_F_REPLACE)
- ovr = 1;
- ret = tcf_action_add(net, tca[TCA_ACT_TAB], n, portid, ovr,
+ flags = TCA_ACT_FLAGS_REPLACE;
+ ret = tcf_action_add(net, tca[TCA_ACT_TAB], n, portid, flags,
extack);
break;
case RTM_DELACTION:
diff --git a/net/sched/act_bpf.c b/net/sched/act_bpf.c
index e48e980c3b93..5c36013339e1 100644
--- a/net/sched/act_bpf.c
+++ b/net/sched/act_bpf.c
@@ -43,20 +43,18 @@ static int tcf_bpf_act(struct sk_buff *skb, const struct tc_action *act,
tcf_lastuse_update(&prog->tcf_tm);
bstats_cpu_update(this_cpu_ptr(prog->common.cpu_bstats), skb);
- rcu_read_lock();
filter = rcu_dereference(prog->filter);
if (at_ingress) {
__skb_push(skb, skb->mac_len);
bpf_compute_data_pointers(skb);
- filter_res = BPF_PROG_RUN(filter, skb);
+ filter_res = bpf_prog_run(filter, skb);
__skb_pull(skb, skb->mac_len);
} else {
bpf_compute_data_pointers(skb);
- filter_res = BPF_PROG_RUN(filter, skb);
+ filter_res = bpf_prog_run(filter, skb);
}
if (skb_sk_is_prefetched(skb) && filter_res != TC_ACT_OK)
skb_orphan(skb);
- rcu_read_unlock();
/* A BPF program may overwrite the default action opcode.
* Similarly as in cls_bpf, if filter_res == -1 we use the
@@ -277,11 +275,11 @@ static void tcf_bpf_prog_fill_cfg(const struct tcf_bpf *prog,
static int tcf_bpf_init(struct net *net, struct nlattr *nla,
struct nlattr *est, struct tc_action **act,
- int replace, int bind, bool rtnl_held,
struct tcf_proto *tp, u32 flags,
struct netlink_ext_ack *extack)
{
struct tc_action_net *tn = net_generic(net, bpf_net_id);
+ bool bind = flags & TCA_ACT_FLAGS_BIND;
struct nlattr *tb[TCA_ACT_BPF_MAX + 1];
struct tcf_chain *goto_ch = NULL;
struct tcf_bpf_cfg cfg, old;
@@ -319,7 +317,7 @@ static int tcf_bpf_init(struct net *net, struct nlattr *nla,
if (bind)
return 0;
- if (!replace) {
+ if (!(flags & TCA_ACT_FLAGS_REPLACE)) {
tcf_idr_release(*act, bind);
return -EEXIST;
}
diff --git a/net/sched/act_connmark.c b/net/sched/act_connmark.c
index e19885d7fe2c..94e78ac7a748 100644
--- a/net/sched/act_connmark.c
+++ b/net/sched/act_connmark.c
@@ -96,12 +96,12 @@ static const struct nla_policy connmark_policy[TCA_CONNMARK_MAX + 1] = {
static int tcf_connmark_init(struct net *net, struct nlattr *nla,
struct nlattr *est, struct tc_action **a,
- int ovr, int bind, bool rtnl_held,
struct tcf_proto *tp, u32 flags,
struct netlink_ext_ack *extack)
{
struct tc_action_net *tn = net_generic(net, connmark_net_id);
struct nlattr *tb[TCA_CONNMARK_MAX + 1];
+ bool bind = flags & TCA_ACT_FLAGS_BIND;
struct tcf_chain *goto_ch = NULL;
struct tcf_connmark_info *ci;
struct tc_connmark *parm;
@@ -144,7 +144,7 @@ static int tcf_connmark_init(struct net *net, struct nlattr *nla,
ci = to_connmark(*a);
if (bind)
return 0;
- if (!ovr) {
+ if (!(flags & TCA_ACT_FLAGS_REPLACE)) {
tcf_idr_release(*a, bind);
return -EEXIST;
}
diff --git a/net/sched/act_csum.c b/net/sched/act_csum.c
index 4fa4fcb842ba..a15ec95e69c3 100644
--- a/net/sched/act_csum.c
+++ b/net/sched/act_csum.c
@@ -41,11 +41,12 @@ static unsigned int csum_net_id;
static struct tc_action_ops act_csum_ops;
static int tcf_csum_init(struct net *net, struct nlattr *nla,
- struct nlattr *est, struct tc_action **a, int ovr,
- int bind, bool rtnl_held, struct tcf_proto *tp,
+ struct nlattr *est, struct tc_action **a,
+ struct tcf_proto *tp,
u32 flags, struct netlink_ext_ack *extack)
{
struct tc_action_net *tn = net_generic(net, csum_net_id);
+ bool bind = flags & TCA_ACT_FLAGS_BIND;
struct tcf_csum_params *params_new;
struct nlattr *tb[TCA_CSUM_MAX + 1];
struct tcf_chain *goto_ch = NULL;
@@ -78,7 +79,7 @@ static int tcf_csum_init(struct net *net, struct nlattr *nla,
} else if (err > 0) {
if (bind)/* dont override defaults */
return 0;
- if (!ovr) {
+ if (!(flags & TCA_ACT_FLAGS_REPLACE)) {
tcf_idr_release(*a, bind);
return -EEXIST;
}
diff --git a/net/sched/act_ct.c b/net/sched/act_ct.c
index a656baa321fe..ad9df0cb4b98 100644
--- a/net/sched/act_ct.c
+++ b/net/sched/act_ct.c
@@ -322,11 +322,22 @@ err_alloc:
static void tcf_ct_flow_table_cleanup_work(struct work_struct *work)
{
+ struct flow_block_cb *block_cb, *tmp_cb;
struct tcf_ct_flow_table *ct_ft;
+ struct flow_block *block;
ct_ft = container_of(to_rcu_work(work), struct tcf_ct_flow_table,
rwork);
nf_flow_table_free(&ct_ft->nf_ft);
+
+ /* Remove any remaining callbacks before cleanup */
+ block = &ct_ft->nf_ft.flow_block;
+ down_write(&ct_ft->nf_ft.flow_block_lock);
+ list_for_each_entry_safe(block_cb, tmp_cb, &block->cb_list, list) {
+ list_del(&block_cb->list);
+ flow_block_cb_free(block_cb);
+ }
+ up_write(&ct_ft->nf_ft.flow_block_lock);
kfree(ct_ft);
module_put(THIS_MODULE);
@@ -1026,7 +1037,8 @@ do_nat:
/* This will take care of sending queued events
* even if the connection is already confirmed.
*/
- nf_conntrack_confirm(skb);
+ if (nf_conntrack_confirm(skb) != NF_ACCEPT)
+ goto drop;
}
if (!skip_add)
@@ -1223,11 +1235,11 @@ static int tcf_ct_fill_params(struct net *net,
static int tcf_ct_init(struct net *net, struct nlattr *nla,
struct nlattr *est, struct tc_action **a,
- int replace, int bind, bool rtnl_held,
struct tcf_proto *tp, u32 flags,
struct netlink_ext_ack *extack)
{
struct tc_action_net *tn = net_generic(net, ct_net_id);
+ bool bind = flags & TCA_ACT_FLAGS_BIND;
struct tcf_ct_params *params = NULL;
struct nlattr *tb[TCA_CT_MAX + 1];
struct tcf_chain *goto_ch = NULL;
@@ -1267,7 +1279,7 @@ static int tcf_ct_init(struct net *net, struct nlattr *nla,
if (bind)
return 0;
- if (!replace) {
+ if (!(flags & TCA_ACT_FLAGS_REPLACE)) {
tcf_idr_release(*a, bind);
return -EEXIST;
}
diff --git a/net/sched/act_ctinfo.c b/net/sched/act_ctinfo.c
index b20c8ce59905..549374a2d008 100644
--- a/net/sched/act_ctinfo.c
+++ b/net/sched/act_ctinfo.c
@@ -154,11 +154,11 @@ static const struct nla_policy ctinfo_policy[TCA_CTINFO_MAX + 1] = {
static int tcf_ctinfo_init(struct net *net, struct nlattr *nla,
struct nlattr *est, struct tc_action **a,
- int ovr, int bind, bool rtnl_held,
struct tcf_proto *tp, u32 flags,
struct netlink_ext_ack *extack)
{
struct tc_action_net *tn = net_generic(net, ctinfo_net_id);
+ bool bind = flags & TCA_ACT_FLAGS_BIND;
u32 dscpmask = 0, dscpstatemask, index;
struct nlattr *tb[TCA_CTINFO_MAX + 1];
struct tcf_ctinfo_params *cp_new;
@@ -221,7 +221,7 @@ static int tcf_ctinfo_init(struct net *net, struct nlattr *nla,
} else if (err > 0) {
if (bind) /* don't override defaults */
return 0;
- if (!ovr) {
+ if (!(flags & TCA_ACT_FLAGS_REPLACE)) {
tcf_idr_release(*a, bind);
return -EEXIST;
}
diff --git a/net/sched/act_gact.c b/net/sched/act_gact.c
index 73c3926358a0..d8dce173df37 100644
--- a/net/sched/act_gact.c
+++ b/net/sched/act_gact.c
@@ -52,11 +52,11 @@ static const struct nla_policy gact_policy[TCA_GACT_MAX + 1] = {
static int tcf_gact_init(struct net *net, struct nlattr *nla,
struct nlattr *est, struct tc_action **a,
- int ovr, int bind, bool rtnl_held,
struct tcf_proto *tp, u32 flags,
struct netlink_ext_ack *extack)
{
struct tc_action_net *tn = net_generic(net, gact_net_id);
+ bool bind = flags & TCA_ACT_FLAGS_BIND;
struct nlattr *tb[TCA_GACT_MAX + 1];
struct tcf_chain *goto_ch = NULL;
struct tc_gact *parm;
@@ -109,7 +109,7 @@ static int tcf_gact_init(struct net *net, struct nlattr *nla,
} else if (err > 0) {
if (bind)/* dont override defaults */
return 0;
- if (!ovr) {
+ if (!(flags & TCA_ACT_FLAGS_REPLACE)) {
tcf_idr_release(*a, bind);
return -EEXIST;
}
diff --git a/net/sched/act_gate.c b/net/sched/act_gate.c
index a78cb7965718..7df72a4197a3 100644
--- a/net/sched/act_gate.c
+++ b/net/sched/act_gate.c
@@ -295,12 +295,12 @@ static void gate_setup_timer(struct tcf_gate *gact, u64 basetime,
static int tcf_gate_init(struct net *net, struct nlattr *nla,
struct nlattr *est, struct tc_action **a,
- int ovr, int bind, bool rtnl_held,
struct tcf_proto *tp, u32 flags,
struct netlink_ext_ack *extack)
{
struct tc_action_net *tn = net_generic(net, gate_net_id);
enum tk_offsets tk_offset = TK_OFFS_TAI;
+ bool bind = flags & TCA_ACT_FLAGS_BIND;
struct nlattr *tb[TCA_GATE_MAX + 1];
struct tcf_chain *goto_ch = NULL;
u64 cycletime = 0, basetime = 0;
@@ -364,7 +364,7 @@ static int tcf_gate_init(struct net *net, struct nlattr *nla,
}
ret = ACT_P_CREATED;
- } else if (!ovr) {
+ } else if (!(flags & TCA_ACT_FLAGS_REPLACE)) {
tcf_idr_release(*a, bind);
return -EEXIST;
}
diff --git a/net/sched/act_ife.c b/net/sched/act_ife.c
index a2ddea04183a..7064a365a1a9 100644
--- a/net/sched/act_ife.c
+++ b/net/sched/act_ife.c
@@ -479,11 +479,11 @@ static int populate_metalist(struct tcf_ife_info *ife, struct nlattr **tb,
static int tcf_ife_init(struct net *net, struct nlattr *nla,
struct nlattr *est, struct tc_action **a,
- int ovr, int bind, bool rtnl_held,
struct tcf_proto *tp, u32 flags,
struct netlink_ext_ack *extack)
{
struct tc_action_net *tn = net_generic(net, ife_net_id);
+ bool bind = flags & TCA_ACT_FLAGS_BIND;
struct nlattr *tb[TCA_IFE_MAX + 1];
struct nlattr *tb2[IFE_META_MAX + 1];
struct tcf_chain *goto_ch = NULL;
@@ -532,7 +532,7 @@ static int tcf_ife_init(struct net *net, struct nlattr *nla,
kfree(p);
return err;
}
- err = load_metalist(tb2, rtnl_held);
+ err = load_metalist(tb2, !(flags & TCA_ACT_FLAGS_NO_RTNL));
if (err) {
kfree(p);
return err;
@@ -560,7 +560,7 @@ static int tcf_ife_init(struct net *net, struct nlattr *nla,
return ret;
}
ret = ACT_P_CREATED;
- } else if (!ovr) {
+ } else if (!(flags & TCA_ACT_FLAGS_REPLACE)) {
tcf_idr_release(*a, bind);
kfree(p);
return -EEXIST;
@@ -600,7 +600,8 @@ static int tcf_ife_init(struct net *net, struct nlattr *nla,
}
if (tb[TCA_IFE_METALST]) {
- err = populate_metalist(ife, tb2, exists, rtnl_held);
+ err = populate_metalist(ife, tb2, exists,
+ !(flags & TCA_ACT_FLAGS_NO_RTNL));
if (err)
goto metadata_parse_err;
} else {
diff --git a/net/sched/act_ipt.c b/net/sched/act_ipt.c
index ac7297f42355..265b1443e252 100644
--- a/net/sched/act_ipt.c
+++ b/net/sched/act_ipt.c
@@ -94,10 +94,11 @@ static const struct nla_policy ipt_policy[TCA_IPT_MAX + 1] = {
static int __tcf_ipt_init(struct net *net, unsigned int id, struct nlattr *nla,
struct nlattr *est, struct tc_action **a,
- const struct tc_action_ops *ops, int ovr, int bind,
+ const struct tc_action_ops *ops,
struct tcf_proto *tp, u32 flags)
{
struct tc_action_net *tn = net_generic(net, id);
+ bool bind = flags & TCA_ACT_FLAGS_BIND;
struct nlattr *tb[TCA_IPT_MAX + 1];
struct tcf_ipt *ipt;
struct xt_entry_target *td, *t;
@@ -154,7 +155,7 @@ static int __tcf_ipt_init(struct net *net, unsigned int id, struct nlattr *nla,
if (bind)/* dont override defaults */
return 0;
- if (!ovr) {
+ if (!(flags & TCA_ACT_FLAGS_REPLACE)) {
tcf_idr_release(*a, bind);
return -EEXIST;
}
@@ -201,21 +202,21 @@ err1:
}
static int tcf_ipt_init(struct net *net, struct nlattr *nla,
- struct nlattr *est, struct tc_action **a, int ovr,
- int bind, bool rtnl_held, struct tcf_proto *tp,
+ struct nlattr *est, struct tc_action **a,
+ struct tcf_proto *tp,
u32 flags, struct netlink_ext_ack *extack)
{
- return __tcf_ipt_init(net, ipt_net_id, nla, est, a, &act_ipt_ops, ovr,
- bind, tp, flags);
+ return __tcf_ipt_init(net, ipt_net_id, nla, est, a, &act_ipt_ops,
+ tp, flags);
}
static int tcf_xt_init(struct net *net, struct nlattr *nla,
- struct nlattr *est, struct tc_action **a, int ovr,
- int bind, bool unlocked, struct tcf_proto *tp,
+ struct nlattr *est, struct tc_action **a,
+ struct tcf_proto *tp,
u32 flags, struct netlink_ext_ack *extack)
{
- return __tcf_ipt_init(net, xt_net_id, nla, est, a, &act_xt_ops, ovr,
- bind, tp, flags);
+ return __tcf_ipt_init(net, xt_net_id, nla, est, a, &act_xt_ops,
+ tp, flags);
}
static int tcf_ipt_act(struct sk_buff *skb, const struct tc_action *a,
diff --git a/net/sched/act_mirred.c b/net/sched/act_mirred.c
index 7153c67f641e..d64b0eeccbe4 100644
--- a/net/sched/act_mirred.c
+++ b/net/sched/act_mirred.c
@@ -78,8 +78,7 @@ static void tcf_mirred_release(struct tc_action *a)
/* last reference to action, no need to lock */
dev = rcu_dereference_protected(m->tcfm_dev, 1);
- if (dev)
- dev_put(dev);
+ dev_put(dev);
}
static const struct nla_policy mirred_policy[TCA_MIRRED_MAX + 1] = {
@@ -91,11 +90,11 @@ static struct tc_action_ops act_mirred_ops;
static int tcf_mirred_init(struct net *net, struct nlattr *nla,
struct nlattr *est, struct tc_action **a,
- int ovr, int bind, bool rtnl_held,
struct tcf_proto *tp,
u32 flags, struct netlink_ext_ack *extack)
{
struct tc_action_net *tn = net_generic(net, mirred_net_id);
+ bool bind = flags & TCA_ACT_FLAGS_BIND;
struct nlattr *tb[TCA_MIRRED_MAX + 1];
struct tcf_chain *goto_ch = NULL;
bool mac_header_xmit = false;
@@ -155,7 +154,7 @@ static int tcf_mirred_init(struct net *net, struct nlattr *nla,
return ret;
}
ret = ACT_P_CREATED;
- } else if (!ovr) {
+ } else if (!(flags & TCA_ACT_FLAGS_REPLACE)) {
tcf_idr_release(*a, bind);
return -EEXIST;
}
@@ -180,8 +179,7 @@ static int tcf_mirred_init(struct net *net, struct nlattr *nla,
mac_header_xmit = dev_is_mac_header_xmit(dev);
dev = rcu_replace_pointer(m->tcfm_dev, dev,
lockdep_is_held(&m->tcf_lock));
- if (dev)
- dev_put(dev);
+ dev_put(dev);
m->tcfm_mac_header_xmit = mac_header_xmit;
}
goto_ch = tcf_action_set_ctrlact(*a, parm->action, goto_ch);
@@ -273,6 +271,9 @@ static int tcf_mirred_act(struct sk_buff *skb, const struct tc_action *a,
goto out;
}
+ /* All mirred/redirected skbs should clear previous ct info */
+ nf_reset_ct(skb2);
+
want_ingress = tcf_mirred_act_wants_ingress(m_eaction);
expects_nh = want_ingress || !m_mac_header_xmit;
diff --git a/net/sched/act_mpls.c b/net/sched/act_mpls.c
index d1486ea496a2..e4529b428cf4 100644
--- a/net/sched/act_mpls.c
+++ b/net/sched/act_mpls.c
@@ -152,11 +152,11 @@ static const struct nla_policy mpls_policy[TCA_MPLS_MAX + 1] = {
static int tcf_mpls_init(struct net *net, struct nlattr *nla,
struct nlattr *est, struct tc_action **a,
- int ovr, int bind, bool rtnl_held,
struct tcf_proto *tp, u32 flags,
struct netlink_ext_ack *extack)
{
struct tc_action_net *tn = net_generic(net, mpls_net_id);
+ bool bind = flags & TCA_ACT_FLAGS_BIND;
struct nlattr *tb[TCA_MPLS_MAX + 1];
struct tcf_chain *goto_ch = NULL;
struct tcf_mpls_params *p;
@@ -255,7 +255,7 @@ static int tcf_mpls_init(struct net *net, struct nlattr *nla,
}
ret = ACT_P_CREATED;
- } else if (!ovr) {
+ } else if (!(flags & TCA_ACT_FLAGS_REPLACE)) {
tcf_idr_release(*a, bind);
return -EEXIST;
}
diff --git a/net/sched/act_nat.c b/net/sched/act_nat.c
index 1ebd2a86d980..7dd6b586ba7f 100644
--- a/net/sched/act_nat.c
+++ b/net/sched/act_nat.c
@@ -34,11 +34,11 @@ static const struct nla_policy nat_policy[TCA_NAT_MAX + 1] = {
};
static int tcf_nat_init(struct net *net, struct nlattr *nla, struct nlattr *est,
- struct tc_action **a, int ovr, int bind,
- bool rtnl_held, struct tcf_proto *tp,
+ struct tc_action **a, struct tcf_proto *tp,
u32 flags, struct netlink_ext_ack *extack)
{
struct tc_action_net *tn = net_generic(net, nat_net_id);
+ bool bind = flags & TCA_ACT_FLAGS_BIND;
struct nlattr *tb[TCA_NAT_MAX + 1];
struct tcf_chain *goto_ch = NULL;
struct tc_nat *parm;
@@ -70,7 +70,7 @@ static int tcf_nat_init(struct net *net, struct nlattr *nla, struct nlattr *est,
} else if (err > 0) {
if (bind)
return 0;
- if (!ovr) {
+ if (!(flags & TCA_ACT_FLAGS_REPLACE)) {
tcf_idr_release(*a, bind);
return -EEXIST;
}
diff --git a/net/sched/act_pedit.c b/net/sched/act_pedit.c
index b45304446e13..c6c862c459cc 100644
--- a/net/sched/act_pedit.c
+++ b/net/sched/act_pedit.c
@@ -136,11 +136,11 @@ nla_failure:
static int tcf_pedit_init(struct net *net, struct nlattr *nla,
struct nlattr *est, struct tc_action **a,
- int ovr, int bind, bool rtnl_held,
struct tcf_proto *tp, u32 flags,
struct netlink_ext_ack *extack)
{
struct tc_action_net *tn = net_generic(net, pedit_net_id);
+ bool bind = flags & TCA_ACT_FLAGS_BIND;
struct nlattr *tb[TCA_PEDIT_MAX + 1];
struct tcf_chain *goto_ch = NULL;
struct tc_pedit_key *keys = NULL;
@@ -198,7 +198,7 @@ static int tcf_pedit_init(struct net *net, struct nlattr *nla,
} else if (err > 0) {
if (bind)
goto out_free;
- if (!ovr) {
+ if (!(flags & TCA_ACT_FLAGS_REPLACE)) {
ret = -EEXIST;
goto out_release;
}
diff --git a/net/sched/act_police.c b/net/sched/act_police.c
index 0fab8de176d2..832157a840fc 100644
--- a/net/sched/act_police.c
+++ b/net/sched/act_police.c
@@ -48,11 +48,11 @@ static const struct nla_policy police_policy[TCA_POLICE_MAX + 1] = {
static int tcf_police_init(struct net *net, struct nlattr *nla,
struct nlattr *est, struct tc_action **a,
- int ovr, int bind, bool rtnl_held,
struct tcf_proto *tp, u32 flags,
struct netlink_ext_ack *extack)
{
int ret = 0, tcfp_result = TC_ACT_OK, err, size;
+ bool bind = flags & TCA_ACT_FLAGS_BIND;
struct nlattr *tb[TCA_POLICE_MAX + 1];
struct tcf_chain *goto_ch = NULL;
struct tc_police *parm;
@@ -97,7 +97,7 @@ static int tcf_police_init(struct net *net, struct nlattr *nla,
}
ret = ACT_P_CREATED;
spin_lock_init(&(to_police(*a)->tcfp_lock));
- } else if (!ovr) {
+ } else if (!(flags & TCA_ACT_FLAGS_REPLACE)) {
tcf_idr_release(*a, bind);
return -EEXIST;
}
diff --git a/net/sched/act_sample.c b/net/sched/act_sample.c
index 6a0c16e4351d..230501eb9e06 100644
--- a/net/sched/act_sample.c
+++ b/net/sched/act_sample.c
@@ -34,11 +34,12 @@ static const struct nla_policy sample_policy[TCA_SAMPLE_MAX + 1] = {
};
static int tcf_sample_init(struct net *net, struct nlattr *nla,
- struct nlattr *est, struct tc_action **a, int ovr,
- int bind, bool rtnl_held, struct tcf_proto *tp,
+ struct nlattr *est, struct tc_action **a,
+ struct tcf_proto *tp,
u32 flags, struct netlink_ext_ack *extack)
{
struct tc_action_net *tn = net_generic(net, sample_net_id);
+ bool bind = flags & TCA_ACT_FLAGS_BIND;
struct nlattr *tb[TCA_SAMPLE_MAX + 1];
struct psample_group *psample_group;
u32 psample_group_num, rate, index;
@@ -75,7 +76,7 @@ static int tcf_sample_init(struct net *net, struct nlattr *nla,
return ret;
}
ret = ACT_P_CREATED;
- } else if (!ovr) {
+ } else if (!(flags & TCA_ACT_FLAGS_REPLACE)) {
tcf_idr_release(*a, bind);
return -EEXIST;
}
diff --git a/net/sched/act_simple.c b/net/sched/act_simple.c
index 726cc956d06f..cbbe1861d3a2 100644
--- a/net/sched/act_simple.c
+++ b/net/sched/act_simple.c
@@ -85,11 +85,11 @@ static const struct nla_policy simple_policy[TCA_DEF_MAX + 1] = {
static int tcf_simp_init(struct net *net, struct nlattr *nla,
struct nlattr *est, struct tc_action **a,
- int ovr, int bind, bool rtnl_held,
struct tcf_proto *tp, u32 flags,
struct netlink_ext_ack *extack)
{
struct tc_action_net *tn = net_generic(net, simp_net_id);
+ bool bind = flags & TCA_ACT_FLAGS_BIND;
struct nlattr *tb[TCA_DEF_MAX + 1];
struct tcf_chain *goto_ch = NULL;
struct tc_defact *parm;
@@ -147,7 +147,7 @@ static int tcf_simp_init(struct net *net, struct nlattr *nla,
tcf_action_set_ctrlact(*a, parm->action, goto_ch);
ret = ACT_P_CREATED;
} else {
- if (!ovr) {
+ if (!(flags & TCA_ACT_FLAGS_REPLACE)) {
err = -EEXIST;
goto release_idr;
}
diff --git a/net/sched/act_skbedit.c b/net/sched/act_skbedit.c
index e5f3fb8b00e3..605418538347 100644
--- a/net/sched/act_skbedit.c
+++ b/net/sched/act_skbedit.c
@@ -96,11 +96,11 @@ static const struct nla_policy skbedit_policy[TCA_SKBEDIT_MAX + 1] = {
static int tcf_skbedit_init(struct net *net, struct nlattr *nla,
struct nlattr *est, struct tc_action **a,
- int ovr, int bind, bool rtnl_held,
struct tcf_proto *tp, u32 act_flags,
struct netlink_ext_ack *extack)
{
struct tc_action_net *tn = net_generic(net, skbedit_net_id);
+ bool bind = act_flags & TCA_ACT_FLAGS_BIND;
struct tcf_skbedit_params *params_new;
struct nlattr *tb[TCA_SKBEDIT_MAX + 1];
struct tcf_chain *goto_ch = NULL;
@@ -186,7 +186,7 @@ static int tcf_skbedit_init(struct net *net, struct nlattr *nla,
ret = ACT_P_CREATED;
} else {
d = to_skbedit(*a);
- if (!ovr) {
+ if (!(act_flags & TCA_ACT_FLAGS_REPLACE)) {
tcf_idr_release(*a, bind);
return -EEXIST;
}
diff --git a/net/sched/act_skbmod.c b/net/sched/act_skbmod.c
index 81a1c67335be..ecb9ee666095 100644
--- a/net/sched/act_skbmod.c
+++ b/net/sched/act_skbmod.c
@@ -6,10 +6,12 @@
*/
#include <linux/module.h>
+#include <linux/if_arp.h>
#include <linux/init.h>
#include <linux/kernel.h>
#include <linux/skbuff.h>
#include <linux/rtnetlink.h>
+#include <net/inet_ecn.h>
#include <net/netlink.h>
#include <net/pkt_sched.h>
#include <net/pkt_cls.h>
@@ -20,33 +22,49 @@
static unsigned int skbmod_net_id;
static struct tc_action_ops act_skbmod_ops;
-#define MAX_EDIT_LEN ETH_HLEN
static int tcf_skbmod_act(struct sk_buff *skb, const struct tc_action *a,
struct tcf_result *res)
{
struct tcf_skbmod *d = to_skbmod(a);
- int action;
+ int action, max_edit_len, err;
struct tcf_skbmod_params *p;
u64 flags;
- int err;
tcf_lastuse_update(&d->tcf_tm);
bstats_cpu_update(this_cpu_ptr(d->common.cpu_bstats), skb);
- /* XXX: if you are going to edit more fields beyond ethernet header
- * (example when you add IP header replacement or vlan swap)
- * then MAX_EDIT_LEN needs to change appropriately
- */
- err = skb_ensure_writable(skb, MAX_EDIT_LEN);
- if (unlikely(err)) /* best policy is to drop on the floor */
- goto drop;
-
action = READ_ONCE(d->tcf_action);
if (unlikely(action == TC_ACT_SHOT))
goto drop;
+ max_edit_len = skb_mac_header_len(skb);
p = rcu_dereference_bh(d->skbmod_p);
flags = p->flags;
+
+ /* tcf_skbmod_init() guarantees "flags" to be one of the following:
+ * 1. a combination of SKBMOD_F_{DMAC,SMAC,ETYPE}
+ * 2. SKBMOD_F_SWAPMAC
+ * 3. SKBMOD_F_ECN
+ * SKBMOD_F_ECN only works with IP packets; all other flags only work with Ethernet
+ * packets.
+ */
+ if (flags == SKBMOD_F_ECN) {
+ switch (skb_protocol(skb, true)) {
+ case cpu_to_be16(ETH_P_IP):
+ case cpu_to_be16(ETH_P_IPV6):
+ max_edit_len += skb_network_header_len(skb);
+ break;
+ default:
+ goto out;
+ }
+ } else if (!skb->dev || skb->dev->type != ARPHRD_ETHER) {
+ goto out;
+ }
+
+ err = skb_ensure_writable(skb, max_edit_len);
+ if (unlikely(err)) /* best policy is to drop on the floor */
+ goto drop;
+
if (flags & SKBMOD_F_DMAC)
ether_addr_copy(eth_hdr(skb)->h_dest, p->eth_dst);
if (flags & SKBMOD_F_SMAC)
@@ -62,6 +80,10 @@ static int tcf_skbmod_act(struct sk_buff *skb, const struct tc_action *a,
ether_addr_copy(eth_hdr(skb)->h_source, (u8 *)tmpaddr);
}
+ if (flags & SKBMOD_F_ECN)
+ INET_ECN_set_ce(skb);
+
+out:
return action;
drop:
@@ -78,11 +100,12 @@ static const struct nla_policy skbmod_policy[TCA_SKBMOD_MAX + 1] = {
static int tcf_skbmod_init(struct net *net, struct nlattr *nla,
struct nlattr *est, struct tc_action **a,
- int ovr, int bind, bool rtnl_held,
struct tcf_proto *tp, u32 flags,
struct netlink_ext_ack *extack)
{
struct tc_action_net *tn = net_generic(net, skbmod_net_id);
+ bool ovr = flags & TCA_ACT_FLAGS_REPLACE;
+ bool bind = flags & TCA_ACT_FLAGS_BIND;
struct nlattr *tb[TCA_SKBMOD_MAX + 1];
struct tcf_skbmod_params *p, *p_old;
struct tcf_chain *goto_ch = NULL;
@@ -125,6 +148,8 @@ static int tcf_skbmod_init(struct net *net, struct nlattr *nla,
index = parm->index;
if (parm->flags & SKBMOD_F_SWAPMAC)
lflags = SKBMOD_F_SWAPMAC;
+ if (parm->flags & SKBMOD_F_ECN)
+ lflags = SKBMOD_F_ECN;
err = tcf_idr_check_alloc(tn, &index, a, bind);
if (err < 0)
diff --git a/net/sched/act_tunnel_key.c b/net/sched/act_tunnel_key.c
index 85c0d0d5b9da..d9cd174eecb7 100644
--- a/net/sched/act_tunnel_key.c
+++ b/net/sched/act_tunnel_key.c
@@ -355,11 +355,11 @@ static void tunnel_key_release_params(struct tcf_tunnel_key_params *p)
static int tunnel_key_init(struct net *net, struct nlattr *nla,
struct nlattr *est, struct tc_action **a,
- int ovr, int bind, bool rtnl_held,
struct tcf_proto *tp, u32 act_flags,
struct netlink_ext_ack *extack)
{
struct tc_action_net *tn = net_generic(net, tunnel_key_net_id);
+ bool bind = act_flags & TCA_ACT_FLAGS_BIND;
struct nlattr *tb[TCA_TUNNEL_KEY_MAX + 1];
struct tcf_tunnel_key_params *params_new;
struct metadata_dst *metadata = NULL;
@@ -504,7 +504,7 @@ static int tunnel_key_init(struct net *net, struct nlattr *nla,
}
ret = ACT_P_CREATED;
- } else if (!ovr) {
+ } else if (!(act_flags & TCA_ACT_FLAGS_REPLACE)) {
NL_SET_ERR_MSG(extack, "TC IDR already exists");
ret = -EEXIST;
goto release_tun_meta;
diff --git a/net/sched/act_vlan.c b/net/sched/act_vlan.c
index 1cac3c6fbb49..e4dc5a555bd8 100644
--- a/net/sched/act_vlan.c
+++ b/net/sched/act_vlan.c
@@ -70,7 +70,7 @@ static int tcf_vlan_act(struct sk_buff *skb, const struct tc_action *a,
/* replace the vid */
tci = (tci & ~VLAN_VID_MASK) | p->tcfv_push_vid;
/* replace prio bits, if tcfv_push_prio specified */
- if (p->tcfv_push_prio) {
+ if (p->tcfv_push_prio_exists) {
tci &= ~VLAN_PRIO_MASK;
tci |= p->tcfv_push_prio << VLAN_PRIO_SHIFT;
}
@@ -114,13 +114,14 @@ static const struct nla_policy vlan_policy[TCA_VLAN_MAX + 1] = {
static int tcf_vlan_init(struct net *net, struct nlattr *nla,
struct nlattr *est, struct tc_action **a,
- int ovr, int bind, bool rtnl_held,
struct tcf_proto *tp, u32 flags,
struct netlink_ext_ack *extack)
{
struct tc_action_net *tn = net_generic(net, vlan_net_id);
+ bool bind = flags & TCA_ACT_FLAGS_BIND;
struct nlattr *tb[TCA_VLAN_MAX + 1];
struct tcf_chain *goto_ch = NULL;
+ bool push_prio_exists = false;
struct tcf_vlan_params *p;
struct tc_vlan *parm;
struct tcf_vlan *v;
@@ -189,7 +190,8 @@ static int tcf_vlan_init(struct net *net, struct nlattr *nla,
push_proto = htons(ETH_P_8021Q);
}
- if (tb[TCA_VLAN_PUSH_VLAN_PRIORITY])
+ push_prio_exists = !!tb[TCA_VLAN_PUSH_VLAN_PRIORITY];
+ if (push_prio_exists)
push_prio = nla_get_u8(tb[TCA_VLAN_PUSH_VLAN_PRIORITY]);
break;
case TCA_VLAN_ACT_POP_ETH:
@@ -221,7 +223,7 @@ static int tcf_vlan_init(struct net *net, struct nlattr *nla,
}
ret = ACT_P_CREATED;
- } else if (!ovr) {
+ } else if (!(flags & TCA_ACT_FLAGS_REPLACE)) {
tcf_idr_release(*a, bind);
return -EEXIST;
}
@@ -241,6 +243,7 @@ static int tcf_vlan_init(struct net *net, struct nlattr *nla,
p->tcfv_action = action;
p->tcfv_push_vid = push_vid;
p->tcfv_push_prio = push_prio;
+ p->tcfv_push_prio_exists = push_prio_exists || action == TCA_VLAN_ACT_PUSH;
p->tcfv_push_proto = push_proto;
if (action == TCA_VLAN_ACT_PUSH_ETH) {
@@ -304,8 +307,8 @@ static int tcf_vlan_dump(struct sk_buff *skb, struct tc_action *a,
(nla_put_u16(skb, TCA_VLAN_PUSH_VLAN_ID, p->tcfv_push_vid) ||
nla_put_be16(skb, TCA_VLAN_PUSH_VLAN_PROTOCOL,
p->tcfv_push_proto) ||
- (nla_put_u8(skb, TCA_VLAN_PUSH_VLAN_PRIORITY,
- p->tcfv_push_prio))))
+ (p->tcfv_push_prio_exists &&
+ nla_put_u8(skb, TCA_VLAN_PUSH_VLAN_PRIORITY, p->tcfv_push_prio))))
goto nla_put_failure;
if (p->tcfv_action == TCA_VLAN_ACT_PUSH_ETH) {
diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c
index 279f9e2a2319..2ef8f5a6205a 100644
--- a/net/sched/cls_api.c
+++ b/net/sched/cls_api.c
@@ -634,6 +634,7 @@ static void tcf_block_offload_init(struct flow_block_offload *bo,
bo->block_shared = shared;
bo->extack = extack;
bo->sch = sch;
+ bo->cb_list_head = &flow_block->cb_list;
INIT_LIST_HEAD(&bo->cb_list);
}
@@ -1531,7 +1532,7 @@ static inline int __tcf_classify(struct sk_buff *skb,
u32 *last_executed_chain)
{
#ifdef CONFIG_NET_CLS_ACT
- const int max_reclassify_loop = 4;
+ const int max_reclassify_loop = 16;
const struct tcf_proto *first_tp;
int limit = 0;
@@ -1577,21 +1578,11 @@ reset:
#endif
}
-int tcf_classify(struct sk_buff *skb, const struct tcf_proto *tp,
+int tcf_classify(struct sk_buff *skb,
+ const struct tcf_block *block,
+ const struct tcf_proto *tp,
struct tcf_result *res, bool compat_mode)
{
- u32 last_executed_chain = 0;
-
- return __tcf_classify(skb, tp, tp, res, compat_mode,
- &last_executed_chain);
-}
-EXPORT_SYMBOL(tcf_classify);
-
-int tcf_classify_ingress(struct sk_buff *skb,
- const struct tcf_block *ingress_block,
- const struct tcf_proto *tp,
- struct tcf_result *res, bool compat_mode)
-{
#if !IS_ENABLED(CONFIG_NET_TC_SKB_EXT)
u32 last_executed_chain = 0;
@@ -1603,20 +1594,22 @@ int tcf_classify_ingress(struct sk_buff *skb,
struct tc_skb_ext *ext;
int ret;
- ext = skb_ext_find(skb, TC_SKB_EXT);
+ if (block) {
+ ext = skb_ext_find(skb, TC_SKB_EXT);
- if (ext && ext->chain) {
- struct tcf_chain *fchain;
+ if (ext && ext->chain) {
+ struct tcf_chain *fchain;
- fchain = tcf_chain_lookup_rcu(ingress_block, ext->chain);
- if (!fchain)
- return TC_ACT_SHOT;
+ fchain = tcf_chain_lookup_rcu(block, ext->chain);
+ if (!fchain)
+ return TC_ACT_SHOT;
- /* Consume, so cloned/redirect skbs won't inherit ext */
- skb_ext_del(skb, TC_SKB_EXT);
+ /* Consume, so cloned/redirect skbs won't inherit ext */
+ skb_ext_del(skb, TC_SKB_EXT);
- tp = rcu_dereference_bh(fchain->filter_chain);
- last_executed_chain = fchain->index;
+ tp = rcu_dereference_bh(fchain->filter_chain);
+ last_executed_chain = fchain->index;
+ }
}
ret = __tcf_classify(skb, tp, orig_tp, res, compat_mode,
@@ -1635,7 +1628,7 @@ int tcf_classify_ingress(struct sk_buff *skb,
return ret;
#endif
}
-EXPORT_SYMBOL(tcf_classify_ingress);
+EXPORT_SYMBOL(tcf_classify);
struct tcf_chain_info {
struct tcf_proto __rcu **pprev;
@@ -1870,13 +1863,10 @@ static int tfilter_notify(struct net *net, struct sk_buff *oskb,
}
if (unicast)
- err = netlink_unicast(net->rtnl, skb, portid, MSG_DONTWAIT);
+ err = rtnl_unicast(skb, net, portid);
else
err = rtnetlink_send(skb, net, portid, RTNLGRP_TC,
n->nlmsg_flags & NLM_F_ECHO);
-
- if (err > 0)
- err = 0;
return err;
}
@@ -1909,15 +1899,13 @@ static int tfilter_del_notify(struct net *net, struct sk_buff *oskb,
}
if (unicast)
- err = netlink_unicast(net->rtnl, skb, portid, MSG_DONTWAIT);
+ err = rtnl_unicast(skb, net, portid);
else
err = rtnetlink_send(skb, net, portid, RTNLGRP_TC,
n->nlmsg_flags & NLM_F_ECHO);
if (err < 0)
NL_SET_ERR_MSG(extack, "Failed to send filter delete notification");
- if (err > 0)
- err = 0;
return err;
}
@@ -1962,6 +1950,7 @@ static int tc_new_tfilter(struct sk_buff *skb, struct nlmsghdr *n,
int err;
int tp_created;
bool rtnl_held = false;
+ u32 flags;
if (!netlink_ns_capable(skb, net->user_ns, CAP_NET_ADMIN))
return -EPERM;
@@ -1982,6 +1971,7 @@ replay:
tp = NULL;
cl = 0;
block = NULL;
+ flags = 0;
if (prio == 0) {
/* If no priority is provided by the user,
@@ -2125,9 +2115,12 @@ replay:
goto errout;
}
+ if (!(n->nlmsg_flags & NLM_F_CREATE))
+ flags |= TCA_ACT_FLAGS_REPLACE;
+ if (!rtnl_held)
+ flags |= TCA_ACT_FLAGS_NO_RTNL;
err = tp->ops->change(net, skb, tp, cl, t->tcm_handle, tca, &fh,
- n->nlmsg_flags & NLM_F_CREATE ? TCA_ACT_NOREPLACE : TCA_ACT_REPLACE,
- rtnl_held, extack);
+ flags, extack);
if (err == 0) {
tfilter_notify(net, skb, n, tp, block, q, parent, fh,
RTM_NEWTFILTER, false, rtnl_held);
@@ -2711,13 +2704,11 @@ static int tc_chain_notify(struct tcf_chain *chain, struct sk_buff *oskb,
}
if (unicast)
- err = netlink_unicast(net->rtnl, skb, portid, MSG_DONTWAIT);
+ err = rtnl_unicast(skb, net, portid);
else
err = rtnetlink_send(skb, net, portid, RTNLGRP_TC,
flags & NLM_F_ECHO);
- if (err > 0)
- err = 0;
return err;
}
@@ -2741,7 +2732,7 @@ static int tc_chain_notify_delete(const struct tcf_proto_ops *tmplt_ops,
}
if (unicast)
- return netlink_unicast(net->rtnl, skb, portid, MSG_DONTWAIT);
+ return rtnl_unicast(skb, net, portid);
return rtnetlink_send(skb, net, portid, RTNLGRP_TC, flags & NLM_F_ECHO);
}
@@ -2904,7 +2895,7 @@ replay:
break;
case RTM_GETCHAIN:
err = tc_chain_notify(chain, skb, n->nlmsg_seq,
- n->nlmsg_seq, n->nlmsg_type, true);
+ n->nlmsg_flags, n->nlmsg_type, true);
if (err < 0)
NL_SET_ERR_MSG(extack, "Failed to send chain notify message");
break;
@@ -3035,8 +3026,8 @@ void tcf_exts_destroy(struct tcf_exts *exts)
EXPORT_SYMBOL(tcf_exts_destroy);
int tcf_exts_validate(struct net *net, struct tcf_proto *tp, struct nlattr **tb,
- struct nlattr *rate_tlv, struct tcf_exts *exts, bool ovr,
- bool rtnl_held, struct netlink_ext_ack *extack)
+ struct nlattr *rate_tlv, struct tcf_exts *exts,
+ u32 flags, struct netlink_ext_ack *extack)
{
#ifdef CONFIG_NET_CLS_ACT
{
@@ -3047,13 +3038,15 @@ int tcf_exts_validate(struct net *net, struct tcf_proto *tp, struct nlattr **tb,
if (exts->police && tb[exts->police]) {
struct tc_action_ops *a_o;
- a_o = tc_action_load_ops("police", tb[exts->police], rtnl_held, extack);
+ a_o = tc_action_load_ops(tb[exts->police], true,
+ !(flags & TCA_ACT_FLAGS_NO_RTNL),
+ extack);
if (IS_ERR(a_o))
return PTR_ERR(a_o);
+ flags |= TCA_ACT_FLAGS_POLICE | TCA_ACT_FLAGS_BIND;
act = tcf_action_init_1(net, tp, tb[exts->police],
- rate_tlv, "police", ovr,
- TCA_ACT_BIND, a_o, init_res,
- rtnl_held, extack);
+ rate_tlv, a_o, init_res, flags,
+ extack);
module_put(a_o->owner);
if (IS_ERR(act))
return PTR_ERR(act);
@@ -3065,10 +3058,10 @@ int tcf_exts_validate(struct net *net, struct tcf_proto *tp, struct nlattr **tb,
} else if (exts->action && tb[exts->action]) {
int err;
+ flags |= TCA_ACT_FLAGS_BIND;
err = tcf_action_init(net, tp, tb[exts->action],
- rate_tlv, NULL, ovr, TCA_ACT_BIND,
- exts->actions, init_res,
- &attr_size, rtnl_held, extack);
+ rate_tlv, exts->actions, init_res,
+ &attr_size, flags, extack);
if (err < 0)
return err;
exts->nr_actions = err;
@@ -3832,7 +3825,7 @@ struct sk_buff *tcf_qevent_handle(struct tcf_qevent *qe, struct Qdisc *sch, stru
fl = rcu_dereference_bh(qe->filter_chain);
- switch (tcf_classify(skb, fl, &cl_res, false)) {
+ switch (tcf_classify(skb, NULL, fl, &cl_res, false)) {
case TC_ACT_SHOT:
qdisc_qstats_drop(sch);
__qdisc_drop(skb, to_free);
diff --git a/net/sched/cls_basic.c b/net/sched/cls_basic.c
index f256a7c69093..8158fc9ee1ab 100644
--- a/net/sched/cls_basic.c
+++ b/net/sched/cls_basic.c
@@ -145,12 +145,12 @@ static const struct nla_policy basic_policy[TCA_BASIC_MAX + 1] = {
static int basic_set_parms(struct net *net, struct tcf_proto *tp,
struct basic_filter *f, unsigned long base,
struct nlattr **tb,
- struct nlattr *est, bool ovr,
+ struct nlattr *est, u32 flags,
struct netlink_ext_ack *extack)
{
int err;
- err = tcf_exts_validate(net, tp, tb, est, &f->exts, ovr, true, extack);
+ err = tcf_exts_validate(net, tp, tb, est, &f->exts, flags, extack);
if (err < 0)
return err;
@@ -169,8 +169,8 @@ static int basic_set_parms(struct net *net, struct tcf_proto *tp,
static int basic_change(struct net *net, struct sk_buff *in_skb,
struct tcf_proto *tp, unsigned long base, u32 handle,
- struct nlattr **tca, void **arg, bool ovr,
- bool rtnl_held, struct netlink_ext_ack *extack)
+ struct nlattr **tca, void **arg,
+ u32 flags, struct netlink_ext_ack *extack)
{
int err;
struct basic_head *head = rtnl_dereference(tp->root);
@@ -216,7 +216,7 @@ static int basic_change(struct net *net, struct sk_buff *in_skb,
goto errout;
}
- err = basic_set_parms(net, tp, fnew, base, tb, tca[TCA_RATE], ovr,
+ err = basic_set_parms(net, tp, fnew, base, tb, tca[TCA_RATE], flags,
extack);
if (err < 0) {
if (!fold)
diff --git a/net/sched/cls_bpf.c b/net/sched/cls_bpf.c
index 6e3e63db0e01..df19a847829e 100644
--- a/net/sched/cls_bpf.c
+++ b/net/sched/cls_bpf.c
@@ -85,8 +85,6 @@ static int cls_bpf_classify(struct sk_buff *skb, const struct tcf_proto *tp,
struct cls_bpf_prog *prog;
int ret = -1;
- /* Needed here for accessing maps. */
- rcu_read_lock();
list_for_each_entry_rcu(prog, &head->plist, link) {
int filter_res;
@@ -98,11 +96,11 @@ static int cls_bpf_classify(struct sk_buff *skb, const struct tcf_proto *tp,
/* It is safe to push/pull even if skb_shared() */
__skb_push(skb, skb->mac_len);
bpf_compute_data_pointers(skb);
- filter_res = BPF_PROG_RUN(prog->filter, skb);
+ filter_res = bpf_prog_run(prog->filter, skb);
__skb_pull(skb, skb->mac_len);
} else {
bpf_compute_data_pointers(skb);
- filter_res = BPF_PROG_RUN(prog->filter, skb);
+ filter_res = bpf_prog_run(prog->filter, skb);
}
if (prog->exts_integrated) {
@@ -131,7 +129,6 @@ static int cls_bpf_classify(struct sk_buff *skb, const struct tcf_proto *tp,
break;
}
- rcu_read_unlock();
return ret;
}
@@ -407,7 +404,7 @@ static int cls_bpf_prog_from_efd(struct nlattr **tb, struct cls_bpf_prog *prog,
static int cls_bpf_set_parms(struct net *net, struct tcf_proto *tp,
struct cls_bpf_prog *prog, unsigned long base,
- struct nlattr **tb, struct nlattr *est, bool ovr,
+ struct nlattr **tb, struct nlattr *est, u32 flags,
struct netlink_ext_ack *extack)
{
bool is_bpf, is_ebpf, have_exts = false;
@@ -419,7 +416,7 @@ static int cls_bpf_set_parms(struct net *net, struct tcf_proto *tp,
if ((!is_bpf && !is_ebpf) || (is_bpf && is_ebpf))
return -EINVAL;
- ret = tcf_exts_validate(net, tp, tb, est, &prog->exts, ovr, true,
+ ret = tcf_exts_validate(net, tp, tb, est, &prog->exts, flags,
extack);
if (ret < 0)
return ret;
@@ -458,7 +455,7 @@ static int cls_bpf_set_parms(struct net *net, struct tcf_proto *tp,
static int cls_bpf_change(struct net *net, struct sk_buff *in_skb,
struct tcf_proto *tp, unsigned long base,
u32 handle, struct nlattr **tca,
- void **arg, bool ovr, bool rtnl_held,
+ void **arg, u32 flags,
struct netlink_ext_ack *extack)
{
struct cls_bpf_head *head = rtnl_dereference(tp->root);
@@ -503,7 +500,7 @@ static int cls_bpf_change(struct net *net, struct sk_buff *in_skb,
goto errout;
prog->handle = handle;
- ret = cls_bpf_set_parms(net, tp, prog, base, tb, tca[TCA_RATE], ovr,
+ ret = cls_bpf_set_parms(net, tp, prog, base, tb, tca[TCA_RATE], flags,
extack);
if (ret < 0)
goto errout_idr;
diff --git a/net/sched/cls_cgroup.c b/net/sched/cls_cgroup.c
index fb881144fa01..ed00001b528a 100644
--- a/net/sched/cls_cgroup.c
+++ b/net/sched/cls_cgroup.c
@@ -76,7 +76,7 @@ static void cls_cgroup_destroy_work(struct work_struct *work)
static int cls_cgroup_change(struct net *net, struct sk_buff *in_skb,
struct tcf_proto *tp, unsigned long base,
u32 handle, struct nlattr **tca,
- void **arg, bool ovr, bool rtnl_held,
+ void **arg, u32 flags,
struct netlink_ext_ack *extack)
{
struct nlattr *tb[TCA_CGROUP_MAX + 1];
@@ -108,8 +108,8 @@ static int cls_cgroup_change(struct net *net, struct sk_buff *in_skb,
if (err < 0)
goto errout;
- err = tcf_exts_validate(net, tp, tb, tca[TCA_RATE], &new->exts, ovr,
- true, extack);
+ err = tcf_exts_validate(net, tp, tb, tca[TCA_RATE], &new->exts, flags,
+ extack);
if (err < 0)
goto errout;
diff --git a/net/sched/cls_flow.c b/net/sched/cls_flow.c
index 87398af2715a..972303aa8edd 100644
--- a/net/sched/cls_flow.c
+++ b/net/sched/cls_flow.c
@@ -387,7 +387,7 @@ static void flow_destroy_filter_work(struct work_struct *work)
static int flow_change(struct net *net, struct sk_buff *in_skb,
struct tcf_proto *tp, unsigned long base,
u32 handle, struct nlattr **tca,
- void **arg, bool ovr, bool rtnl_held,
+ void **arg, u32 flags,
struct netlink_ext_ack *extack)
{
struct flow_head *head = rtnl_dereference(tp->root);
@@ -442,8 +442,8 @@ static int flow_change(struct net *net, struct sk_buff *in_skb,
if (err < 0)
goto err2;
- err = tcf_exts_validate(net, tp, tb, tca[TCA_RATE], &fnew->exts, ovr,
- true, extack);
+ err = tcf_exts_validate(net, tp, tb, tca[TCA_RATE], &fnew->exts, flags,
+ extack);
if (err < 0)
goto err2;
diff --git a/net/sched/cls_flower.c b/net/sched/cls_flower.c
index d7869a984881..23b21253b3c3 100644
--- a/net/sched/cls_flower.c
+++ b/net/sched/cls_flower.c
@@ -1915,23 +1915,22 @@ errout_cleanup:
static int fl_set_parms(struct net *net, struct tcf_proto *tp,
struct cls_fl_filter *f, struct fl_flow_mask *mask,
unsigned long base, struct nlattr **tb,
- struct nlattr *est, bool ovr,
- struct fl_flow_tmplt *tmplt, bool rtnl_held,
+ struct nlattr *est,
+ struct fl_flow_tmplt *tmplt, u32 flags,
struct netlink_ext_ack *extack)
{
int err;
- err = tcf_exts_validate(net, tp, tb, est, &f->exts, ovr, rtnl_held,
- extack);
+ err = tcf_exts_validate(net, tp, tb, est, &f->exts, flags, extack);
if (err < 0)
return err;
if (tb[TCA_FLOWER_CLASSID]) {
f->res.classid = nla_get_u32(tb[TCA_FLOWER_CLASSID]);
- if (!rtnl_held)
+ if (flags & TCA_ACT_FLAGS_NO_RTNL)
rtnl_lock();
tcf_bind_filter(tp, &f->res, base);
- if (!rtnl_held)
+ if (flags & TCA_ACT_FLAGS_NO_RTNL)
rtnl_unlock();
}
@@ -1975,10 +1974,11 @@ static int fl_ht_insert_unique(struct cls_fl_filter *fnew,
static int fl_change(struct net *net, struct sk_buff *in_skb,
struct tcf_proto *tp, unsigned long base,
u32 handle, struct nlattr **tca,
- void **arg, bool ovr, bool rtnl_held,
+ void **arg, u32 flags,
struct netlink_ext_ack *extack)
{
struct cls_fl_head *head = fl_head_dereference(tp);
+ bool rtnl_held = !(flags & TCA_ACT_FLAGS_NO_RTNL);
struct cls_fl_filter *fold = *arg;
struct cls_fl_filter *fnew;
struct fl_flow_mask *mask;
@@ -2034,8 +2034,8 @@ static int fl_change(struct net *net, struct sk_buff *in_skb,
}
}
- err = fl_set_parms(net, tp, fnew, mask, base, tb, tca[TCA_RATE], ovr,
- tp->chain->tmplt_priv, rtnl_held, extack);
+ err = fl_set_parms(net, tp, fnew, mask, base, tb, tca[TCA_RATE],
+ tp->chain->tmplt_priv, flags, extack);
if (err)
goto errout;
diff --git a/net/sched/cls_fw.c b/net/sched/cls_fw.c
index ec945294626a..8654b0ce997c 100644
--- a/net/sched/cls_fw.c
+++ b/net/sched/cls_fw.c
@@ -198,15 +198,15 @@ static const struct nla_policy fw_policy[TCA_FW_MAX + 1] = {
static int fw_set_parms(struct net *net, struct tcf_proto *tp,
struct fw_filter *f, struct nlattr **tb,
- struct nlattr **tca, unsigned long base, bool ovr,
+ struct nlattr **tca, unsigned long base, u32 flags,
struct netlink_ext_ack *extack)
{
struct fw_head *head = rtnl_dereference(tp->root);
u32 mask;
int err;
- err = tcf_exts_validate(net, tp, tb, tca[TCA_RATE], &f->exts, ovr,
- true, extack);
+ err = tcf_exts_validate(net, tp, tb, tca[TCA_RATE], &f->exts, flags,
+ extack);
if (err < 0)
return err;
@@ -237,8 +237,7 @@ static int fw_set_parms(struct net *net, struct tcf_proto *tp,
static int fw_change(struct net *net, struct sk_buff *in_skb,
struct tcf_proto *tp, unsigned long base,
u32 handle, struct nlattr **tca, void **arg,
- bool ovr, bool rtnl_held,
- struct netlink_ext_ack *extack)
+ u32 flags, struct netlink_ext_ack *extack)
{
struct fw_head *head = rtnl_dereference(tp->root);
struct fw_filter *f = *arg;
@@ -277,7 +276,7 @@ static int fw_change(struct net *net, struct sk_buff *in_skb,
return err;
}
- err = fw_set_parms(net, tp, fnew, tb, tca, base, ovr, extack);
+ err = fw_set_parms(net, tp, fnew, tb, tca, base, flags, extack);
if (err < 0) {
tcf_exts_destroy(&fnew->exts);
kfree(fnew);
@@ -326,7 +325,7 @@ static int fw_change(struct net *net, struct sk_buff *in_skb,
f->id = handle;
f->tp = tp;
- err = fw_set_parms(net, tp, f, tb, tca, base, ovr, extack);
+ err = fw_set_parms(net, tp, f, tb, tca, base, flags, extack);
if (err < 0)
goto errout;
diff --git a/net/sched/cls_matchall.c b/net/sched/cls_matchall.c
index cafb84480bab..24f0046ce0b3 100644
--- a/net/sched/cls_matchall.c
+++ b/net/sched/cls_matchall.c
@@ -163,13 +163,12 @@ static const struct nla_policy mall_policy[TCA_MATCHALL_MAX + 1] = {
static int mall_set_parms(struct net *net, struct tcf_proto *tp,
struct cls_mall_head *head,
unsigned long base, struct nlattr **tb,
- struct nlattr *est, bool ovr,
+ struct nlattr *est, u32 flags,
struct netlink_ext_ack *extack)
{
int err;
- err = tcf_exts_validate(net, tp, tb, est, &head->exts, ovr, true,
- extack);
+ err = tcf_exts_validate(net, tp, tb, est, &head->exts, flags, extack);
if (err < 0)
return err;
@@ -183,13 +182,13 @@ static int mall_set_parms(struct net *net, struct tcf_proto *tp,
static int mall_change(struct net *net, struct sk_buff *in_skb,
struct tcf_proto *tp, unsigned long base,
u32 handle, struct nlattr **tca,
- void **arg, bool ovr, bool rtnl_held,
+ void **arg, u32 flags,
struct netlink_ext_ack *extack)
{
struct cls_mall_head *head = rtnl_dereference(tp->root);
struct nlattr *tb[TCA_MATCHALL_MAX + 1];
struct cls_mall_head *new;
- u32 flags = 0;
+ u32 userflags = 0;
int err;
if (!tca[TCA_OPTIONS])
@@ -204,8 +203,8 @@ static int mall_change(struct net *net, struct sk_buff *in_skb,
return err;
if (tb[TCA_MATCHALL_FLAGS]) {
- flags = nla_get_u32(tb[TCA_MATCHALL_FLAGS]);
- if (!tc_flags_valid(flags))
+ userflags = nla_get_u32(tb[TCA_MATCHALL_FLAGS]);
+ if (!tc_flags_valid(userflags))
return -EINVAL;
}
@@ -220,14 +219,14 @@ static int mall_change(struct net *net, struct sk_buff *in_skb,
if (!handle)
handle = 1;
new->handle = handle;
- new->flags = flags;
+ new->flags = userflags;
new->pf = alloc_percpu(struct tc_matchall_pcnt);
if (!new->pf) {
err = -ENOMEM;
goto err_alloc_percpu;
}
- err = mall_set_parms(net, tp, new, base, tb, tca[TCA_RATE], ovr,
+ err = mall_set_parms(net, tp, new, base, tb, tca[TCA_RATE], flags,
extack);
if (err)
goto err_set_parms;
diff --git a/net/sched/cls_route.c b/net/sched/cls_route.c
index 5efa3e7ace15..a35ab8c27866 100644
--- a/net/sched/cls_route.c
+++ b/net/sched/cls_route.c
@@ -382,7 +382,7 @@ static int route4_set_parms(struct net *net, struct tcf_proto *tp,
unsigned long base, struct route4_filter *f,
u32 handle, struct route4_head *head,
struct nlattr **tb, struct nlattr *est, int new,
- bool ovr, struct netlink_ext_ack *extack)
+ u32 flags, struct netlink_ext_ack *extack)
{
u32 id = 0, to = 0, nhandle = 0x8000;
struct route4_filter *fp;
@@ -390,7 +390,7 @@ static int route4_set_parms(struct net *net, struct tcf_proto *tp,
struct route4_bucket *b;
int err;
- err = tcf_exts_validate(net, tp, tb, est, &f->exts, ovr, true, extack);
+ err = tcf_exts_validate(net, tp, tb, est, &f->exts, flags, extack);
if (err < 0)
return err;
@@ -464,8 +464,8 @@ static int route4_set_parms(struct net *net, struct tcf_proto *tp,
static int route4_change(struct net *net, struct sk_buff *in_skb,
struct tcf_proto *tp, unsigned long base, u32 handle,
- struct nlattr **tca, void **arg, bool ovr,
- bool rtnl_held, struct netlink_ext_ack *extack)
+ struct nlattr **tca, void **arg, u32 flags,
+ struct netlink_ext_ack *extack)
{
struct route4_head *head = rtnl_dereference(tp->root);
struct route4_filter __rcu **fp;
@@ -510,7 +510,7 @@ static int route4_change(struct net *net, struct sk_buff *in_skb,
}
err = route4_set_parms(net, tp, base, f, handle, head, tb,
- tca[TCA_RATE], new, ovr, extack);
+ tca[TCA_RATE], new, flags, extack);
if (err < 0)
goto errout;
diff --git a/net/sched/cls_rsvp.h b/net/sched/cls_rsvp.h
index 2e288f88ff02..5cd9d6b143c4 100644
--- a/net/sched/cls_rsvp.h
+++ b/net/sched/cls_rsvp.h
@@ -7,7 +7,7 @@
/*
Comparing to general packet classification problem,
- RSVP needs only sevaral relatively simple rules:
+ RSVP needs only several relatively simple rules:
* (dst, protocol) are always specified,
so that we are able to hash them.
@@ -470,9 +470,8 @@ static const struct nla_policy rsvp_policy[TCA_RSVP_MAX + 1] = {
static int rsvp_change(struct net *net, struct sk_buff *in_skb,
struct tcf_proto *tp, unsigned long base,
- u32 handle,
- struct nlattr **tca,
- void **arg, bool ovr, bool rtnl_held,
+ u32 handle, struct nlattr **tca,
+ void **arg, u32 flags,
struct netlink_ext_ack *extack)
{
struct rsvp_head *data = rtnl_dereference(tp->root);
@@ -499,7 +498,7 @@ static int rsvp_change(struct net *net, struct sk_buff *in_skb,
err = tcf_exts_init(&e, net, TCA_RSVP_ACT, TCA_RSVP_POLICE);
if (err < 0)
return err;
- err = tcf_exts_validate(net, tp, tb, tca[TCA_RATE], &e, ovr, true,
+ err = tcf_exts_validate(net, tp, tb, tca[TCA_RATE], &e, flags,
extack);
if (err < 0)
goto errout2;
diff --git a/net/sched/cls_tcindex.c b/net/sched/cls_tcindex.c
index c4007b9cd16d..742c7d49a958 100644
--- a/net/sched/cls_tcindex.c
+++ b/net/sched/cls_tcindex.c
@@ -278,6 +278,8 @@ static int tcindex_filter_result_init(struct tcindex_filter_result *r,
TCA_TCINDEX_POLICE);
}
+static void tcindex_free_perfect_hash(struct tcindex_data *cp);
+
static void tcindex_partial_destroy_work(struct work_struct *work)
{
struct tcindex_data *p = container_of(to_rcu_work(work),
@@ -285,7 +287,8 @@ static void tcindex_partial_destroy_work(struct work_struct *work)
rwork);
rtnl_lock();
- kfree(p->perfect);
+ if (p->perfect)
+ tcindex_free_perfect_hash(p);
kfree(p);
rtnl_unlock();
}
@@ -304,7 +307,7 @@ static int tcindex_alloc_perfect_hash(struct net *net, struct tcindex_data *cp)
int i, err = 0;
cp->perfect = kcalloc(cp->hash, sizeof(struct tcindex_filter_result),
- GFP_KERNEL);
+ GFP_KERNEL | __GFP_NOWARN);
if (!cp->perfect)
return -ENOMEM;
@@ -327,7 +330,7 @@ static int
tcindex_set_parms(struct net *net, struct tcf_proto *tp, unsigned long base,
u32 handle, struct tcindex_data *p,
struct tcindex_filter_result *r, struct nlattr **tb,
- struct nlattr *est, bool ovr, struct netlink_ext_ack *extack)
+ struct nlattr *est, u32 flags, struct netlink_ext_ack *extack)
{
struct tcindex_filter_result new_filter_result, *old_r = r;
struct tcindex_data *cp = NULL, *oldp;
@@ -339,7 +342,7 @@ tcindex_set_parms(struct net *net, struct tcf_proto *tp, unsigned long base,
err = tcf_exts_init(&e, net, TCA_TCINDEX_ACT, TCA_TCINDEX_POLICE);
if (err < 0)
return err;
- err = tcf_exts_validate(net, tp, tb, est, &e, ovr, true, extack);
+ err = tcf_exts_validate(net, tp, tb, est, &e, flags, extack);
if (err < 0)
goto errout;
@@ -526,8 +529,8 @@ errout:
static int
tcindex_change(struct net *net, struct sk_buff *in_skb,
struct tcf_proto *tp, unsigned long base, u32 handle,
- struct nlattr **tca, void **arg, bool ovr,
- bool rtnl_held, struct netlink_ext_ack *extack)
+ struct nlattr **tca, void **arg, u32 flags,
+ struct netlink_ext_ack *extack)
{
struct nlattr *opt = tca[TCA_OPTIONS];
struct nlattr *tb[TCA_TCINDEX_MAX + 1];
@@ -548,7 +551,7 @@ tcindex_change(struct net *net, struct sk_buff *in_skb,
return err;
return tcindex_set_parms(net, tp, base, handle, p, r, tb,
- tca[TCA_RATE], ovr, extack);
+ tca[TCA_RATE], flags, extack);
}
static void tcindex_walk(struct tcf_proto *tp, struct tcf_walker *walker,
diff --git a/net/sched/cls_u32.c b/net/sched/cls_u32.c
index 6e1abe805448..4272814487f0 100644
--- a/net/sched/cls_u32.c
+++ b/net/sched/cls_u32.c
@@ -709,12 +709,12 @@ static const struct nla_policy u32_policy[TCA_U32_MAX + 1] = {
static int u32_set_parms(struct net *net, struct tcf_proto *tp,
unsigned long base,
struct tc_u_knode *n, struct nlattr **tb,
- struct nlattr *est, bool ovr,
+ struct nlattr *est, u32 flags,
struct netlink_ext_ack *extack)
{
int err;
- err = tcf_exts_validate(net, tp, tb, est, &n->exts, ovr, true, extack);
+ err = tcf_exts_validate(net, tp, tb, est, &n->exts, flags, extack);
if (err < 0)
return err;
@@ -840,7 +840,7 @@ static struct tc_u_knode *u32_init_knode(struct net *net, struct tcf_proto *tp,
static int u32_change(struct net *net, struct sk_buff *in_skb,
struct tcf_proto *tp, unsigned long base, u32 handle,
- struct nlattr **tca, void **arg, bool ovr, bool rtnl_held,
+ struct nlattr **tca, void **arg, u32 flags,
struct netlink_ext_ack *extack)
{
struct tc_u_common *tp_c = tp->data;
@@ -849,7 +849,7 @@ static int u32_change(struct net *net, struct sk_buff *in_skb,
struct tc_u32_sel *s;
struct nlattr *opt = tca[TCA_OPTIONS];
struct nlattr *tb[TCA_U32_MAX + 1];
- u32 htid, flags = 0;
+ u32 htid, userflags = 0;
size_t sel_size;
int err;
@@ -868,8 +868,8 @@ static int u32_change(struct net *net, struct sk_buff *in_skb,
return err;
if (tb[TCA_U32_FLAGS]) {
- flags = nla_get_u32(tb[TCA_U32_FLAGS]);
- if (!tc_flags_valid(flags)) {
+ userflags = nla_get_u32(tb[TCA_U32_FLAGS]);
+ if (!tc_flags_valid(userflags)) {
NL_SET_ERR_MSG_MOD(extack, "Invalid filter flags");
return -EINVAL;
}
@@ -884,7 +884,7 @@ static int u32_change(struct net *net, struct sk_buff *in_skb,
return -EINVAL;
}
- if ((n->flags ^ flags) &
+ if ((n->flags ^ userflags) &
~(TCA_CLS_FLAGS_IN_HW | TCA_CLS_FLAGS_NOT_IN_HW)) {
NL_SET_ERR_MSG_MOD(extack, "Key node flags do not match passed flags");
return -EINVAL;
@@ -895,7 +895,7 @@ static int u32_change(struct net *net, struct sk_buff *in_skb,
return -ENOMEM;
err = u32_set_parms(net, tp, base, new, tb,
- tca[TCA_RATE], ovr, extack);
+ tca[TCA_RATE], flags, extack);
if (err) {
u32_destroy_key(new, false);
@@ -955,9 +955,9 @@ static int u32_change(struct net *net, struct sk_buff *in_skb,
ht->handle = handle;
ht->prio = tp->prio;
idr_init(&ht->handle_idr);
- ht->flags = flags;
+ ht->flags = userflags;
- err = u32_replace_hw_hnode(tp, ht, flags, extack);
+ err = u32_replace_hw_hnode(tp, ht, userflags, extack);
if (err) {
idr_remove(&tp_c->handle_idr, handle);
kfree(ht);
@@ -1038,7 +1038,7 @@ static int u32_change(struct net *net, struct sk_buff *in_skb,
RCU_INIT_POINTER(n->ht_up, ht);
n->handle = handle;
n->fshift = s->hmask ? ffs(ntohl(s->hmask)) - 1 : 0;
- n->flags = flags;
+ n->flags = userflags;
err = tcf_exts_init(&n->exts, net, TCA_U32_ACT, TCA_U32_POLICE);
if (err < 0)
@@ -1060,7 +1060,7 @@ static int u32_change(struct net *net, struct sk_buff *in_skb,
}
#endif
- err = u32_set_parms(net, tp, base, n, tb, tca[TCA_RATE], ovr,
+ err = u32_set_parms(net, tp, base, n, tb, tca[TCA_RATE], flags,
extack);
if (err == 0) {
struct tc_u_knode __rcu **ins;
diff --git a/net/sched/ematch.c b/net/sched/ematch.c
index f885bea5b452..4ce681361851 100644
--- a/net/sched/ematch.c
+++ b/net/sched/ematch.c
@@ -141,7 +141,7 @@ errout:
EXPORT_SYMBOL(tcf_em_register);
/**
- * tcf_em_unregister - unregster and extended match
+ * tcf_em_unregister - unregister and extended match
*
* @ops: ematch operations lookup table
*
diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c
index f87d07736a14..5e90e9b160e3 100644
--- a/net/sched/sch_api.c
+++ b/net/sched/sch_api.c
@@ -1845,7 +1845,6 @@ static int tclass_notify(struct net *net, struct sk_buff *oskb,
{
struct sk_buff *skb;
u32 portid = oskb ? NETLINK_CB(oskb).portid : 0;
- int err = 0;
skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
if (!skb)
@@ -1856,11 +1855,8 @@ static int tclass_notify(struct net *net, struct sk_buff *oskb,
return -EINVAL;
}
- err = rtnetlink_send(skb, net, portid, RTNLGRP_TC,
- n->nlmsg_flags & NLM_F_ECHO);
- if (err > 0)
- err = 0;
- return err;
+ return rtnetlink_send(skb, net, portid, RTNLGRP_TC,
+ n->nlmsg_flags & NLM_F_ECHO);
}
static int tclass_del_notify(struct net *net,
@@ -1894,8 +1890,6 @@ static int tclass_del_notify(struct net *net,
err = rtnetlink_send(skb, net, portid, RTNLGRP_TC,
n->nlmsg_flags & NLM_F_ECHO);
- if (err > 0)
- err = 0;
return err;
}
diff --git a/net/sched/sch_atm.c b/net/sched/sch_atm.c
index d0c9a57398fc..7d8518176b45 100644
--- a/net/sched/sch_atm.c
+++ b/net/sched/sch_atm.c
@@ -394,7 +394,7 @@ static int atm_tc_enqueue(struct sk_buff *skb, struct Qdisc *sch,
list_for_each_entry(flow, &p->flows, list) {
fl = rcu_dereference_bh(flow->filter_list);
if (fl) {
- result = tcf_classify(skb, fl, &res, true);
+ result = tcf_classify(skb, NULL, fl, &res, true);
if (result < 0)
continue;
flow = (struct atm_flow_data *)res.class;
diff --git a/net/sched/sch_cake.c b/net/sched/sch_cake.c
index 951542843cab..3c2300d14468 100644
--- a/net/sched/sch_cake.c
+++ b/net/sched/sch_cake.c
@@ -720,7 +720,7 @@ static u32 cake_hash(struct cake_tin_data *q, const struct sk_buff *skb,
skip_hash:
if (flow_override)
flow_hash = flow_override - 1;
- else if (use_skbhash)
+ else if (use_skbhash && (flow_mode & CAKE_FLOW_FLOWS))
flow_hash = skb->hash;
if (host_override) {
dsthost_hash = host_override - 1;
@@ -1665,7 +1665,7 @@ static u32 cake_classify(struct Qdisc *sch, struct cake_tin_data **t,
goto hash;
*qerr = NET_XMIT_SUCCESS | __NET_XMIT_BYPASS;
- result = tcf_classify(skb, filter, &res, false);
+ result = tcf_classify(skb, NULL, filter, &res, false);
if (result >= 0) {
#ifdef CONFIG_NET_CLS_ACT
diff --git a/net/sched/sch_cbq.c b/net/sched/sch_cbq.c
index b79a7e27bb31..e0da15530f0e 100644
--- a/net/sched/sch_cbq.c
+++ b/net/sched/sch_cbq.c
@@ -228,7 +228,7 @@ cbq_classify(struct sk_buff *skb, struct Qdisc *sch, int *qerr)
/*
* Step 2+n. Apply classifier.
*/
- result = tcf_classify(skb, fl, &res, true);
+ result = tcf_classify(skb, NULL, fl, &res, true);
if (!fl || result < 0)
goto fallback;
@@ -1614,7 +1614,7 @@ cbq_change_class(struct Qdisc *sch, u32 classid, u32 parentid, struct nlattr **t
err = tcf_block_get(&cl->block, &cl->filter_list, sch, extack);
if (err) {
kfree(cl);
- return err;
+ goto failure;
}
if (tca[TCA_RATE]) {
diff --git a/net/sched/sch_drr.c b/net/sched/sch_drr.c
index fc1e47069593..642cd179b7a7 100644
--- a/net/sched/sch_drr.c
+++ b/net/sched/sch_drr.c
@@ -317,7 +317,7 @@ static struct drr_class *drr_classify(struct sk_buff *skb, struct Qdisc *sch,
*qerr = NET_XMIT_SUCCESS | __NET_XMIT_BYPASS;
fl = rcu_dereference_bh(q->filter_list);
- result = tcf_classify(skb, fl, &res, false);
+ result = tcf_classify(skb, NULL, fl, &res, false);
if (result >= 0) {
#ifdef CONFIG_NET_CLS_ACT
switch (result) {
diff --git a/net/sched/sch_dsmark.c b/net/sched/sch_dsmark.c
index d320bcfb2da2..4c100d105269 100644
--- a/net/sched/sch_dsmark.c
+++ b/net/sched/sch_dsmark.c
@@ -242,7 +242,7 @@ static int dsmark_enqueue(struct sk_buff *skb, struct Qdisc *sch,
else {
struct tcf_result res;
struct tcf_proto *fl = rcu_dereference_bh(p->filter_list);
- int result = tcf_classify(skb, fl, &res, false);
+ int result = tcf_classify(skb, NULL, fl, &res, false);
pr_debug("result %d class 0x%04x\n", result, res.classid);
diff --git a/net/sched/sch_ets.c b/net/sched/sch_ets.c
index c1e84d1eeaba..1f857ffd1ac2 100644
--- a/net/sched/sch_ets.c
+++ b/net/sched/sch_ets.c
@@ -390,7 +390,7 @@ static struct ets_class *ets_classify(struct sk_buff *skb, struct Qdisc *sch,
*qerr = NET_XMIT_SUCCESS | __NET_XMIT_BYPASS;
if (TC_H_MAJ(skb->priority) != sch->handle) {
fl = rcu_dereference_bh(q->filter_list);
- err = tcf_classify(skb, fl, &res, false);
+ err = tcf_classify(skb, NULL, fl, &res, false);
#ifdef CONFIG_NET_CLS_ACT
switch (err) {
case TC_ACT_STOLEN:
@@ -660,6 +660,13 @@ static int ets_qdisc_change(struct Qdisc *sch, struct nlattr *opt,
sch_tree_lock(sch);
q->nbands = nbands;
+ for (i = nstrict; i < q->nstrict; i++) {
+ INIT_LIST_HEAD(&q->classes[i].alist);
+ if (q->classes[i].qdisc->q.qlen) {
+ list_add_tail(&q->classes[i].alist, &q->active);
+ q->classes[i].deficit = quanta[i];
+ }
+ }
q->nstrict = nstrict;
memcpy(q->prio2band, priomap, sizeof(priomap));
diff --git a/net/sched/sch_fq_codel.c b/net/sched/sch_fq_codel.c
index bbd5f8753600..bb0cd6d3d2c2 100644
--- a/net/sched/sch_fq_codel.c
+++ b/net/sched/sch_fq_codel.c
@@ -91,7 +91,7 @@ static unsigned int fq_codel_classify(struct sk_buff *skb, struct Qdisc *sch,
return fq_codel_hash(q, skb) + 1;
*qerr = NET_XMIT_SUCCESS | __NET_XMIT_BYPASS;
- result = tcf_classify(skb, filter, &res, false);
+ result = tcf_classify(skb, NULL, filter, &res, false);
if (result >= 0) {
#ifdef CONFIG_NET_CLS_ACT
switch (result) {
@@ -369,6 +369,7 @@ static int fq_codel_change(struct Qdisc *sch, struct nlattr *opt,
{
struct fq_codel_sched_data *q = qdisc_priv(sch);
struct nlattr *tb[TCA_FQ_CODEL_MAX + 1];
+ u32 quantum = 0;
int err;
if (!opt)
@@ -386,6 +387,13 @@ static int fq_codel_change(struct Qdisc *sch, struct nlattr *opt,
q->flows_cnt > 65536)
return -EINVAL;
}
+ if (tb[TCA_FQ_CODEL_QUANTUM]) {
+ quantum = max(256U, nla_get_u32(tb[TCA_FQ_CODEL_QUANTUM]));
+ if (quantum > FQ_CODEL_QUANTUM_MAX) {
+ NL_SET_ERR_MSG(extack, "Invalid quantum");
+ return -EINVAL;
+ }
+ }
sch_tree_lock(sch);
if (tb[TCA_FQ_CODEL_TARGET]) {
@@ -412,8 +420,8 @@ static int fq_codel_change(struct Qdisc *sch, struct nlattr *opt,
if (tb[TCA_FQ_CODEL_ECN])
q->cparams.ecn = !!nla_get_u32(tb[TCA_FQ_CODEL_ECN]);
- if (tb[TCA_FQ_CODEL_QUANTUM])
- q->quantum = max(256U, nla_get_u32(tb[TCA_FQ_CODEL_QUANTUM]));
+ if (quantum)
+ q->quantum = quantum;
if (tb[TCA_FQ_CODEL_DROP_BATCH_SIZE])
q->drop_batch_size = max(1U, nla_get_u32(tb[TCA_FQ_CODEL_DROP_BATCH_SIZE]));
diff --git a/net/sched/sch_fq_pie.c b/net/sched/sch_fq_pie.c
index cac684952edc..830f3559f727 100644
--- a/net/sched/sch_fq_pie.c
+++ b/net/sched/sch_fq_pie.c
@@ -94,7 +94,7 @@ static unsigned int fq_pie_classify(struct sk_buff *skb, struct Qdisc *sch,
return fq_pie_hash(q, skb) + 1;
*qerr = NET_XMIT_SUCCESS | __NET_XMIT_BYPASS;
- result = tcf_classify(skb, filter, &res, false);
+ result = tcf_classify(skb, NULL, filter, &res, false);
if (result >= 0) {
#ifdef CONFIG_NET_CLS_ACT
switch (result) {
diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c
index fc8b56bcabf3..a8dd06c74e31 100644
--- a/net/sched/sch_generic.c
+++ b/net/sched/sch_generic.c
@@ -52,6 +52,8 @@ static void qdisc_maybe_clear_missed(struct Qdisc *q,
*/
if (!netif_xmit_frozen_or_stopped(txq))
set_bit(__QDISC_STATE_MISSED, &q->state);
+ else
+ set_bit(__QDISC_STATE_DRAINING, &q->state);
}
/* Main transmission queue. */
@@ -164,9 +166,13 @@ static inline void dev_requeue_skb(struct sk_buff *skb, struct Qdisc *q)
skb = next;
}
- if (lock)
+
+ if (lock) {
spin_unlock(lock);
- __netif_schedule(q);
+ set_bit(__QDISC_STATE_MISSED, &q->state);
+ } else {
+ __netif_schedule(q);
+ }
}
static void try_bulk_dequeue_skb(struct Qdisc *q,
@@ -409,7 +415,11 @@ void __qdisc_run(struct Qdisc *q)
while (qdisc_restart(q, &packets)) {
quota -= packets;
if (quota <= 0) {
- __netif_schedule(q);
+ if (q->flags & TCQ_F_NOLOCK)
+ set_bit(__QDISC_STATE_MISSED, &q->state);
+ else
+ __netif_schedule(q);
+
break;
}
}
@@ -540,6 +550,24 @@ void netif_carrier_off(struct net_device *dev)
}
EXPORT_SYMBOL(netif_carrier_off);
+/**
+ * netif_carrier_event - report carrier state event
+ * @dev: network device
+ *
+ * Device has detected a carrier event but the carrier state wasn't changed.
+ * Use in drivers when querying carrier state asynchronously, to avoid missing
+ * events (link flaps) if link recovers before it's queried.
+ */
+void netif_carrier_event(struct net_device *dev)
+{
+ if (dev->reg_state == NETREG_UNINITIALIZED)
+ return;
+ atomic_inc(&dev->carrier_up_count);
+ atomic_inc(&dev->carrier_down_count);
+ linkwatch_fire_event(dev);
+}
+EXPORT_SYMBOL_GPL(netif_carrier_event);
+
/* "NOOP" scheduler: the best scheduler, recommended for all interfaces
under all circumstances. It is difficult to invent anything faster or
cheaper.
@@ -680,13 +708,14 @@ retry:
if (likely(skb)) {
qdisc_update_stats_at_dequeue(qdisc, skb);
} else if (need_retry &&
- test_bit(__QDISC_STATE_MISSED, &qdisc->state)) {
+ READ_ONCE(qdisc->state) & QDISC_STATE_NON_EMPTY) {
/* Delay clearing the STATE_MISSED here to reduce
* the overhead of the second spin_trylock() in
* qdisc_run_begin() and __netif_schedule() calling
* in qdisc_run_end().
*/
clear_bit(__QDISC_STATE_MISSED, &qdisc->state);
+ clear_bit(__QDISC_STATE_DRAINING, &qdisc->state);
/* Make sure dequeuing happens after clearing
* STATE_MISSED.
@@ -696,8 +725,6 @@ retry:
need_retry = false;
goto retry;
- } else {
- WRITE_ONCE(qdisc->empty, true);
}
return skb;
@@ -886,7 +913,7 @@ struct Qdisc *qdisc_alloc(struct netdev_queue *dev_queue,
/* seqlock has the same scope of busylock, for NOLOCK qdisc */
spin_lock_init(&sch->seqlock);
- lockdep_set_class(&sch->busylock,
+ lockdep_set_class(&sch->seqlock,
dev->qdisc_tx_busylock ?: &qdisc_tx_busylock);
seqcount_init(&sch->running);
@@ -898,7 +925,6 @@ struct Qdisc *qdisc_alloc(struct netdev_queue *dev_queue,
sch->enqueue = ops->enqueue;
sch->dequeue = ops->dequeue;
sch->dev_queue = dev_queue;
- sch->empty = true;
dev_hold(dev);
refcount_set(&sch->refcnt, 1);
@@ -1204,6 +1230,7 @@ static void dev_reset_queue(struct net_device *dev,
spin_unlock_bh(qdisc_lock(qdisc));
if (nolock) {
clear_bit(__QDISC_STATE_MISSED, &qdisc->state);
+ clear_bit(__QDISC_STATE_DRAINING, &qdisc->state);
spin_unlock_bh(&qdisc->seqlock);
}
}
diff --git a/net/sched/sch_gred.c b/net/sched/sch_gred.c
index f4132dc25ac0..621dc6afde8f 100644
--- a/net/sched/sch_gred.c
+++ b/net/sched/sch_gred.c
@@ -6,7 +6,7 @@
*
* 991129: - Bug fix with grio mode
* - a better sing. AvgQ mode with Grio(WRED)
- * - A finer grained VQ dequeue based on sugestion
+ * - A finer grained VQ dequeue based on suggestion
* from Ren Liu
* - More error checks
*
diff --git a/net/sched/sch_hfsc.c b/net/sched/sch_hfsc.c
index bf0034c66e35..b7ac30cca035 100644
--- a/net/sched/sch_hfsc.c
+++ b/net/sched/sch_hfsc.c
@@ -1130,7 +1130,7 @@ hfsc_classify(struct sk_buff *skb, struct Qdisc *sch, int *qerr)
*qerr = NET_XMIT_SUCCESS | __NET_XMIT_BYPASS;
head = &q->root;
tcf = rcu_dereference_bh(q->root.filter_list);
- while (tcf && (result = tcf_classify(skb, tcf, &res, false)) >= 0) {
+ while (tcf && (result = tcf_classify(skb, NULL, tcf, &res, false)) >= 0) {
#ifdef CONFIG_NET_CLS_ACT
switch (result) {
case TC_ACT_QUEUED:
diff --git a/net/sched/sch_htb.c b/net/sched/sch_htb.c
index 8827987ba903..5067a6e5d4fd 100644
--- a/net/sched/sch_htb.c
+++ b/net/sched/sch_htb.c
@@ -52,7 +52,7 @@
*/
static int htb_hysteresis __read_mostly = 0; /* whether to use mode hysteresis for speedup */
-#define HTB_VER 0x30011 /* major must be matched with number suplied by TC as version */
+#define HTB_VER 0x30011 /* major must be matched with number supplied by TC as version */
#if HTB_VER >> 16 != TC_HTB_PROTOVER
#error "Mismatched sch_htb.c and pkt_sch.h"
@@ -125,6 +125,7 @@ struct htb_class {
struct htb_class_leaf {
int deficit[TC_HTB_MAXDEPTH];
struct Qdisc *q;
+ struct netdev_queue *offload_queue;
} leaf;
struct htb_class_inner {
struct htb_prio clprio[TC_HTB_NUMPRIO];
@@ -238,7 +239,7 @@ static struct htb_class *htb_classify(struct sk_buff *skb, struct Qdisc *sch,
}
*qerr = NET_XMIT_SUCCESS | __NET_XMIT_BYPASS;
- while (tcf && (result = tcf_classify(skb, tcf, &res, false)) >= 0) {
+ while (tcf && (result = tcf_classify(skb, NULL, tcf, &res, false)) >= 0) {
#ifdef CONFIG_NET_CLS_ACT
switch (result) {
case TC_ACT_QUEUED:
@@ -273,6 +274,9 @@ static struct htb_class *htb_classify(struct sk_buff *skb, struct Qdisc *sch,
/**
* htb_add_to_id_tree - adds class to the round robin list
+ * @root: the root of the tree
+ * @cl: the class to add
+ * @prio: the give prio in class
*
* Routine adds class to the list (actually tree) sorted by classid.
* Make sure that class is not already on such list for given prio.
@@ -298,6 +302,9 @@ static void htb_add_to_id_tree(struct rb_root *root,
/**
* htb_add_to_wait_tree - adds class to the event queue with delay
+ * @q: the priority event queue
+ * @cl: the class to add
+ * @delay: delay in microseconds
*
* The class is added to priority event queue to indicate that class will
* change its mode in cl->pq_key microseconds. Make sure that class is not
@@ -331,6 +338,7 @@ static void htb_add_to_wait_tree(struct htb_sched *q,
/**
* htb_next_rb_node - finds next node in binary tree
+ * @n: the current node in binary tree
*
* When we are past last key we return NULL.
* Average complexity is 2 steps per call.
@@ -342,6 +350,9 @@ static inline void htb_next_rb_node(struct rb_node **n)
/**
* htb_add_class_to_row - add class to its row
+ * @q: the priority event queue
+ * @cl: the class to add
+ * @mask: the given priorities in class in bitmap
*
* The class is added to row at priorities marked in mask.
* It does nothing if mask == 0.
@@ -371,6 +382,9 @@ static void htb_safe_rb_erase(struct rb_node *rb, struct rb_root *root)
/**
* htb_remove_class_from_row - removes class from its row
+ * @q: the priority event queue
+ * @cl: the class to add
+ * @mask: the given priorities in class in bitmap
*
* The class is removed from row at priorities marked in mask.
* It does nothing if mask == 0.
@@ -398,6 +412,8 @@ static inline void htb_remove_class_from_row(struct htb_sched *q,
/**
* htb_activate_prios - creates active classe's feed chain
+ * @q: the priority event queue
+ * @cl: the class to activate
*
* The class is connected to ancestors and/or appropriate rows
* for priorities it is participating on. cl->cmode must be new
@@ -433,6 +449,8 @@ static void htb_activate_prios(struct htb_sched *q, struct htb_class *cl)
/**
* htb_deactivate_prios - remove class from feed chain
+ * @q: the priority event queue
+ * @cl: the class to deactivate
*
* cl->cmode must represent old mode (before deactivation). It does
* nothing if cl->prio_activity == 0. Class is removed from all feed
@@ -493,6 +511,8 @@ static inline s64 htb_hiwater(const struct htb_class *cl)
/**
* htb_class_mode - computes and returns current class mode
+ * @cl: the target class
+ * @diff: diff time in microseconds
*
* It computes cl's mode at time cl->t_c+diff and returns it. If mode
* is not HTB_CAN_SEND then cl->pq_key is updated to time difference
@@ -521,9 +541,12 @@ htb_class_mode(struct htb_class *cl, s64 *diff)
/**
* htb_change_class_mode - changes classe's mode
+ * @q: the priority event queue
+ * @cl: the target class
+ * @diff: diff time in microseconds
*
* This should be the only way how to change classe's mode under normal
- * cirsumstances. Routine will update feed lists linkage, change mode
+ * circumstances. Routine will update feed lists linkage, change mode
* and add class to the wait event queue if appropriate. New mode should
* be different from old one and cl->pq_key has to be valid if changing
* to mode other than HTB_CAN_SEND (see htb_add_to_wait_tree).
@@ -553,6 +576,8 @@ htb_change_class_mode(struct htb_sched *q, struct htb_class *cl, s64 *diff)
/**
* htb_activate - inserts leaf cl into appropriate active feeds
+ * @q: the priority event queue
+ * @cl: the target class
*
* Routine learns (new) priority of leaf and activates feed chain
* for the prio. It can be called on already active leaf safely.
@@ -570,6 +595,8 @@ static inline void htb_activate(struct htb_sched *q, struct htb_class *cl)
/**
* htb_deactivate - remove leaf cl from active feeds
+ * @q: the priority event queue
+ * @cl: the target class
*
* Make sure that leaf is active. In the other words it can't be called
* with non-active leaf. It also removes class from the drop list.
@@ -649,6 +676,10 @@ static inline void htb_accnt_ctokens(struct htb_class *cl, int bytes, s64 diff)
/**
* htb_charge_class - charges amount "bytes" to leaf and ancestors
+ * @q: the priority event queue
+ * @cl: the class to start iterate
+ * @level: the minimum level to account
+ * @skb: the socket buffer
*
* Routine assumes that packet "bytes" long was dequeued from leaf cl
* borrowing from "level". It accounts bytes to ceil leaky bucket for
@@ -698,6 +729,9 @@ static void htb_charge_class(struct htb_sched *q, struct htb_class *cl,
/**
* htb_do_events - make mode changes to classes at the level
+ * @q: the priority event queue
+ * @level: which wait_pq in 'q->hlevel'
+ * @start: start jiffies
*
* Scans event queue for pending events and applies them. Returns time of
* next pending event (0 for no event in pq, q->now for too many events).
@@ -766,6 +800,8 @@ static struct rb_node *htb_id_find_next_upper(int prio, struct rb_node *n,
/**
* htb_lookup_leaf - returns next leaf class in DRR order
+ * @hprio: the current one
+ * @prio: which prio in class
*
* Find leaf where current feed pointers points to.
*/
@@ -1376,24 +1412,47 @@ htb_graft_helper(struct netdev_queue *dev_queue, struct Qdisc *new_q)
return old_q;
}
-static void htb_offload_move_qdisc(struct Qdisc *sch, u16 qid_old, u16 qid_new)
+static struct netdev_queue *htb_offload_get_queue(struct htb_class *cl)
+{
+ struct netdev_queue *queue;
+
+ queue = cl->leaf.offload_queue;
+ if (!(cl->leaf.q->flags & TCQ_F_BUILTIN))
+ WARN_ON(cl->leaf.q->dev_queue != queue);
+
+ return queue;
+}
+
+static void htb_offload_move_qdisc(struct Qdisc *sch, struct htb_class *cl_old,
+ struct htb_class *cl_new, bool destroying)
{
struct netdev_queue *queue_old, *queue_new;
struct net_device *dev = qdisc_dev(sch);
- struct Qdisc *qdisc;
- queue_old = netdev_get_tx_queue(dev, qid_old);
- queue_new = netdev_get_tx_queue(dev, qid_new);
+ queue_old = htb_offload_get_queue(cl_old);
+ queue_new = htb_offload_get_queue(cl_new);
- if (dev->flags & IFF_UP)
- dev_deactivate(dev);
- qdisc = dev_graft_qdisc(queue_old, NULL);
- qdisc->dev_queue = queue_new;
- qdisc = dev_graft_qdisc(queue_new, qdisc);
- if (dev->flags & IFF_UP)
- dev_activate(dev);
+ if (!destroying) {
+ struct Qdisc *qdisc;
- WARN_ON(!(qdisc->flags & TCQ_F_BUILTIN));
+ if (dev->flags & IFF_UP)
+ dev_deactivate(dev);
+ qdisc = dev_graft_qdisc(queue_old, NULL);
+ WARN_ON(qdisc != cl_old->leaf.q);
+ }
+
+ if (!(cl_old->leaf.q->flags & TCQ_F_BUILTIN))
+ cl_old->leaf.q->dev_queue = queue_new;
+ cl_old->leaf.offload_queue = queue_new;
+
+ if (!destroying) {
+ struct Qdisc *qdisc;
+
+ qdisc = dev_graft_qdisc(queue_new, cl_old->leaf.q);
+ if (dev->flags & IFF_UP)
+ dev_activate(dev);
+ WARN_ON(!(qdisc->flags & TCQ_F_BUILTIN));
+ }
}
static int htb_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new,
@@ -1407,10 +1466,8 @@ static int htb_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new,
if (cl->level)
return -EINVAL;
- if (q->offload) {
- dev_queue = new->dev_queue;
- WARN_ON(dev_queue != cl->leaf.q->dev_queue);
- }
+ if (q->offload)
+ dev_queue = htb_offload_get_queue(cl);
if (!new) {
new = qdisc_create_dflt(dev_queue, &pfifo_qdisc_ops,
@@ -1479,6 +1536,8 @@ static void htb_parent_to_leaf(struct Qdisc *sch, struct htb_class *cl,
parent->ctokens = parent->cbuffer;
parent->t_c = ktime_get_ns();
parent->cmode = HTB_CAN_SEND;
+ if (q->offload)
+ parent->leaf.offload_queue = cl->leaf.offload_queue;
}
static void htb_parent_to_leaf_offload(struct Qdisc *sch,
@@ -1499,6 +1558,7 @@ static int htb_destroy_class_offload(struct Qdisc *sch, struct htb_class *cl,
struct netlink_ext_ack *extack)
{
struct tc_htb_qopt_offload offload_opt;
+ struct netdev_queue *dev_queue;
struct Qdisc *q = cl->leaf.q;
struct Qdisc *old = NULL;
int err;
@@ -1507,16 +1567,15 @@ static int htb_destroy_class_offload(struct Qdisc *sch, struct htb_class *cl,
return -EINVAL;
WARN_ON(!q);
- if (!destroying) {
- /* On destroy of HTB, two cases are possible:
- * 1. q is a normal qdisc, but q->dev_queue has noop qdisc.
- * 2. q is a noop qdisc (for nodes that were inner),
- * q->dev_queue is noop_netdev_queue.
+ dev_queue = htb_offload_get_queue(cl);
+ old = htb_graft_helper(dev_queue, NULL);
+ if (destroying)
+ /* Before HTB is destroyed, the kernel grafts noop_qdisc to
+ * all queues.
*/
- old = htb_graft_helper(q->dev_queue, NULL);
- WARN_ON(!old);
+ WARN_ON(!(old->flags & TCQ_F_BUILTIN));
+ else
WARN_ON(old != q);
- }
if (cl->parent) {
cl->parent->bstats_bias.bytes += q->bstats.bytes;
@@ -1535,18 +1594,17 @@ static int htb_destroy_class_offload(struct Qdisc *sch, struct htb_class *cl,
if (!err || destroying)
qdisc_put(old);
else
- htb_graft_helper(q->dev_queue, old);
+ htb_graft_helper(dev_queue, old);
if (last_child)
return err;
- if (!err && offload_opt.moved_qid != 0) {
- if (destroying)
- q->dev_queue = netdev_get_tx_queue(qdisc_dev(sch),
- offload_opt.qid);
- else
- htb_offload_move_qdisc(sch, offload_opt.moved_qid,
- offload_opt.qid);
+ if (!err && offload_opt.classid != TC_H_MIN(cl->common.classid)) {
+ u32 classid = TC_H_MAJ(sch->handle) |
+ TC_H_MIN(offload_opt.classid);
+ struct htb_class *moved_cl = htb_find(classid, sch);
+
+ htb_offload_move_qdisc(sch, moved_cl, cl, destroying);
}
return err;
@@ -1669,9 +1727,11 @@ static int htb_delete(struct Qdisc *sch, unsigned long arg,
}
if (last_child) {
- struct netdev_queue *dev_queue;
+ struct netdev_queue *dev_queue = sch->dev_queue;
+
+ if (q->offload)
+ dev_queue = htb_offload_get_queue(cl);
- dev_queue = q->offload ? cl->leaf.q->dev_queue : sch->dev_queue;
new_q = qdisc_create_dflt(dev_queue, &pfifo_qdisc_ops,
cl->parent->common.classid,
NULL);
@@ -1843,7 +1903,7 @@ static int htb_change_class(struct Qdisc *sch, u32 classid,
}
dev_queue = netdev_get_tx_queue(dev, offload_opt.qid);
} else { /* First child. */
- dev_queue = parent->leaf.q->dev_queue;
+ dev_queue = htb_offload_get_queue(parent);
old_q = htb_graft_helper(dev_queue, NULL);
WARN_ON(old_q != parent->leaf.q);
offload_opt = (struct tc_htb_qopt_offload) {
@@ -1900,6 +1960,8 @@ static int htb_change_class(struct Qdisc *sch, u32 classid,
/* leaf (we) needs elementary qdisc */
cl->leaf.q = new_q ? new_q : &noop_qdisc;
+ if (q->offload)
+ cl->leaf.offload_queue = dev_queue;
cl->parent = parent;
diff --git a/net/sched/sch_multiq.c b/net/sched/sch_multiq.c
index 5c27b4270b90..e282e7382117 100644
--- a/net/sched/sch_multiq.c
+++ b/net/sched/sch_multiq.c
@@ -36,7 +36,7 @@ multiq_classify(struct sk_buff *skb, struct Qdisc *sch, int *qerr)
int err;
*qerr = NET_XMIT_SUCCESS | __NET_XMIT_BYPASS;
- err = tcf_classify(skb, fl, &res, false);
+ err = tcf_classify(skb, NULL, fl, &res, false);
#ifdef CONFIG_NET_CLS_ACT
switch (err) {
case TC_ACT_STOLEN:
diff --git a/net/sched/sch_prio.c b/net/sched/sch_prio.c
index 3eabb871a1d5..03fdf31ccb6a 100644
--- a/net/sched/sch_prio.c
+++ b/net/sched/sch_prio.c
@@ -39,7 +39,7 @@ prio_classify(struct sk_buff *skb, struct Qdisc *sch, int *qerr)
*qerr = NET_XMIT_SUCCESS | __NET_XMIT_BYPASS;
if (TC_H_MAJ(skb->priority) != sch->handle) {
fl = rcu_dereference_bh(q->filter_list);
- err = tcf_classify(skb, fl, &res, false);
+ err = tcf_classify(skb, NULL, fl, &res, false);
#ifdef CONFIG_NET_CLS_ACT
switch (err) {
case TC_ACT_STOLEN:
diff --git a/net/sched/sch_qfq.c b/net/sched/sch_qfq.c
index 1db9d4a2ef5e..58a9d42b52b8 100644
--- a/net/sched/sch_qfq.c
+++ b/net/sched/sch_qfq.c
@@ -485,11 +485,6 @@ static int qfq_change_class(struct Qdisc *sch, u32 classid, u32 parentid,
if (cl->qdisc != &noop_qdisc)
qdisc_hash_add(cl->qdisc, true);
- sch_tree_lock(sch);
- qdisc_class_hash_insert(&q->clhash, &cl->common);
- sch_tree_unlock(sch);
-
- qdisc_class_hash_grow(sch, &q->clhash);
set_change_agg:
sch_tree_lock(sch);
@@ -507,8 +502,11 @@ set_change_agg:
}
if (existing)
qfq_deact_rm_from_agg(q, cl);
+ else
+ qdisc_class_hash_insert(&q->clhash, &cl->common);
qfq_add_to_agg(q, new_agg, cl);
sch_tree_unlock(sch);
+ qdisc_class_hash_grow(sch, &q->clhash);
*arg = (unsigned long)cl;
return 0;
@@ -692,7 +690,7 @@ static struct qfq_class *qfq_classify(struct sk_buff *skb, struct Qdisc *sch,
*qerr = NET_XMIT_SUCCESS | __NET_XMIT_BYPASS;
fl = rcu_dereference_bh(q->filter_list);
- result = tcf_classify(skb, fl, &res, false);
+ result = tcf_classify(skb, NULL, fl, &res, false);
if (result >= 0) {
#ifdef CONFIG_NET_CLS_ACT
switch (result) {
diff --git a/net/sched/sch_sfb.c b/net/sched/sch_sfb.c
index dde829d4b9f8..3d061a13d7ed 100644
--- a/net/sched/sch_sfb.c
+++ b/net/sched/sch_sfb.c
@@ -257,7 +257,7 @@ static bool sfb_classify(struct sk_buff *skb, struct tcf_proto *fl,
struct tcf_result res;
int result;
- result = tcf_classify(skb, fl, &res, false);
+ result = tcf_classify(skb, NULL, fl, &res, false);
if (result >= 0) {
#ifdef CONFIG_NET_CLS_ACT
switch (result) {
diff --git a/net/sched/sch_sfq.c b/net/sched/sch_sfq.c
index 066754a18569..f8e569f79f13 100644
--- a/net/sched/sch_sfq.c
+++ b/net/sched/sch_sfq.c
@@ -178,7 +178,7 @@ static unsigned int sfq_classify(struct sk_buff *skb, struct Qdisc *sch,
return sfq_hash(q, skb) + 1;
*qerr = NET_XMIT_SUCCESS | __NET_XMIT_BYPASS;
- result = tcf_classify(skb, fl, &res, false);
+ result = tcf_classify(skb, NULL, fl, &res, false);
if (result >= 0) {
#ifdef CONFIG_NET_CLS_ACT
switch (result) {
diff --git a/net/sched/sch_taprio.c b/net/sched/sch_taprio.c
index 5c91df52b8c2..1ab2fc933a21 100644
--- a/net/sched/sch_taprio.c
+++ b/net/sched/sch_taprio.c
@@ -114,9 +114,6 @@ static void taprio_free_sched_cb(struct rcu_head *head)
struct sched_gate_list *sched = container_of(head, struct sched_gate_list, rcu);
struct sched_entry *entry, *n;
- if (!sched)
- return;
-
list_for_each_entry_safe(entry, n, &sched->entries, list) {
list_del(&entry->list);
kfree(entry);
@@ -438,6 +435,11 @@ static int taprio_enqueue(struct sk_buff *skb, struct Qdisc *sch,
struct Qdisc *child;
int queue;
+ if (unlikely(FULL_OFFLOAD_IS_ENABLED(q->flags))) {
+ WARN_ONCE(1, "Trying to enqueue skb into the root of a taprio qdisc configured with full offload\n");
+ return qdisc_drop(skb, sch, to_free);
+ }
+
queue = skb_get_queue_mapping(skb);
child = q->qdiscs[queue];
@@ -529,23 +531,7 @@ static struct sk_buff *taprio_peek_soft(struct Qdisc *sch)
static struct sk_buff *taprio_peek_offload(struct Qdisc *sch)
{
- struct taprio_sched *q = qdisc_priv(sch);
- struct net_device *dev = qdisc_dev(sch);
- struct sk_buff *skb;
- int i;
-
- for (i = 0; i < dev->num_tx_queues; i++) {
- struct Qdisc *child = q->qdiscs[i];
-
- if (unlikely(!child))
- continue;
-
- skb = child->ops->peek(child);
- if (!skb)
- continue;
-
- return skb;
- }
+ WARN_ONCE(1, "Trying to peek into the root of a taprio qdisc configured with full offload\n");
return NULL;
}
@@ -578,7 +564,7 @@ static struct sk_buff *taprio_dequeue_soft(struct Qdisc *sch)
/* if there's no entry, it means that the schedule didn't
* start yet, so force all gates to be open, this is in
* accordance to IEEE 802.1Qbv-2015 Section 8.6.9.4.5
- * "AdminGateSates"
+ * "AdminGateStates"
*/
gate_mask = entry ? entry->gate_mask : TAPRIO_ALL_GATES_OPEN;
@@ -654,27 +640,7 @@ done:
static struct sk_buff *taprio_dequeue_offload(struct Qdisc *sch)
{
- struct taprio_sched *q = qdisc_priv(sch);
- struct net_device *dev = qdisc_dev(sch);
- struct sk_buff *skb;
- int i;
-
- for (i = 0; i < dev->num_tx_queues; i++) {
- struct Qdisc *child = q->qdiscs[i];
-
- if (unlikely(!child))
- continue;
-
- skb = child->ops->dequeue(child);
- if (unlikely(!skb))
- continue;
-
- qdisc_bstats_update(sch, skb);
- qdisc_qstats_backlog_dec(sch, skb);
- sch->q.qlen--;
-
- return skb;
- }
+ WARN_ONCE(1, "Trying to dequeue from the root of a taprio qdisc configured with full offload\n");
return NULL;
}
@@ -1547,7 +1513,9 @@ static int taprio_change(struct Qdisc *sch, struct nlattr *opt,
taprio_set_picos_per_byte(dev, q);
if (mqprio) {
- netdev_set_num_tc(dev, mqprio->num_tc);
+ err = netdev_set_num_tc(dev, mqprio->num_tc);
+ if (err)
+ goto free_sched;
for (i = 0; i < mqprio->num_tc; i++)
netdev_set_tc_queue(dev, i,
mqprio->count[i],
@@ -1759,6 +1727,35 @@ static int taprio_init(struct Qdisc *sch, struct nlattr *opt,
return taprio_change(sch, opt, extack);
}
+static void taprio_attach(struct Qdisc *sch)
+{
+ struct taprio_sched *q = qdisc_priv(sch);
+ struct net_device *dev = qdisc_dev(sch);
+ unsigned int ntx;
+
+ /* Attach underlying qdisc */
+ for (ntx = 0; ntx < dev->num_tx_queues; ntx++) {
+ struct Qdisc *qdisc = q->qdiscs[ntx];
+ struct Qdisc *old;
+
+ if (FULL_OFFLOAD_IS_ENABLED(q->flags)) {
+ qdisc->flags |= TCQ_F_ONETXQUEUE | TCQ_F_NOPARENT;
+ old = dev_graft_qdisc(qdisc->dev_queue, qdisc);
+ } else {
+ old = dev_graft_qdisc(qdisc->dev_queue, sch);
+ qdisc_refcount_inc(sch);
+ }
+ if (old)
+ qdisc_put(old);
+ }
+
+ /* access to the child qdiscs is not needed in offload mode */
+ if (FULL_OFFLOAD_IS_ENABLED(q->flags)) {
+ kfree(q->qdiscs);
+ q->qdiscs = NULL;
+ }
+}
+
static struct netdev_queue *taprio_queue_get(struct Qdisc *sch,
unsigned long cl)
{
@@ -1785,8 +1782,12 @@ static int taprio_graft(struct Qdisc *sch, unsigned long cl,
if (dev->flags & IFF_UP)
dev_deactivate(dev);
- *old = q->qdiscs[cl - 1];
- q->qdiscs[cl - 1] = new;
+ if (FULL_OFFLOAD_IS_ENABLED(q->flags)) {
+ *old = dev_graft_qdisc(dev_queue, new);
+ } else {
+ *old = q->qdiscs[cl - 1];
+ q->qdiscs[cl - 1] = new;
+ }
if (new)
new->flags |= TCQ_F_ONETXQUEUE | TCQ_F_NOPARENT;
@@ -2020,6 +2021,7 @@ static struct Qdisc_ops taprio_qdisc_ops __read_mostly = {
.change = taprio_change,
.destroy = taprio_destroy,
.reset = taprio_reset,
+ .attach = taprio_attach,
.peek = taprio_peek,
.dequeue = taprio_dequeue,
.enqueue = taprio_enqueue,
diff --git a/net/sctp/associola.c b/net/sctp/associola.c
index 336df4b36655..be29da09cc7a 100644
--- a/net/sctp/associola.c
+++ b/net/sctp/associola.c
@@ -98,6 +98,7 @@ static struct sctp_association *sctp_association_init(
* sock configured value.
*/
asoc->hbinterval = msecs_to_jiffies(sp->hbinterval);
+ asoc->probe_interval = msecs_to_jiffies(sp->probe_interval);
asoc->encap_port = sp->encap_port;
@@ -625,6 +626,7 @@ struct sctp_transport *sctp_assoc_add_peer(struct sctp_association *asoc,
* association configured value.
*/
peer->hbinterval = asoc->hbinterval;
+ peer->probe_interval = asoc->probe_interval;
peer->encap_port = asoc->encap_port;
@@ -714,6 +716,8 @@ struct sctp_transport *sctp_assoc_add_peer(struct sctp_association *asoc,
return NULL;
}
+ sctp_transport_pl_reset(peer);
+
/* Attach the remote transport to our asoc. */
list_add_tail_rcu(&peer->transports, &asoc->peer.transport_addr_list);
asoc->peer.transport_count++;
@@ -812,6 +816,7 @@ void sctp_assoc_control_transport(struct sctp_association *asoc,
spc_state = SCTP_ADDR_CONFIRMED;
transport->state = SCTP_ACTIVE;
+ sctp_transport_pl_reset(transport);
break;
case SCTP_TRANSPORT_DOWN:
@@ -821,6 +826,7 @@ void sctp_assoc_control_transport(struct sctp_association *asoc,
*/
if (transport->state != SCTP_UNCONFIRMED) {
transport->state = SCTP_INACTIVE;
+ sctp_transport_pl_reset(transport);
spc_state = SCTP_ADDR_UNREACHABLE;
} else {
sctp_transport_dst_release(transport);
diff --git a/net/sctp/auth.c b/net/sctp/auth.c
index 6f8319b828b0..db6b7373d16c 100644
--- a/net/sctp/auth.c
+++ b/net/sctp/auth.c
@@ -857,12 +857,18 @@ int sctp_auth_set_key(struct sctp_endpoint *ep,
memcpy(key->data, &auth_key->sca_key[0], auth_key->sca_keylength);
cur_key->key = key;
- if (replace) {
- list_del_init(&shkey->key_list);
- sctp_auth_shkey_release(shkey);
+ if (!replace) {
+ list_add(&cur_key->key_list, sh_keys);
+ return 0;
}
+
+ list_del_init(&shkey->key_list);
+ sctp_auth_shkey_release(shkey);
list_add(&cur_key->key_list, sh_keys);
+ if (asoc && asoc->active_key_id == auth_key->sca_keynumber)
+ sctp_auth_asoc_init_active_key(asoc, GFP_KERNEL);
+
return 0;
}
diff --git a/net/sctp/bind_addr.c b/net/sctp/bind_addr.c
index 53e5ed79f63f..59e653b528b1 100644
--- a/net/sctp/bind_addr.c
+++ b/net/sctp/bind_addr.c
@@ -270,22 +270,19 @@ int sctp_raw_to_bind_addrs(struct sctp_bind_addr *bp, __u8 *raw_addr_list,
rawaddr = (union sctp_addr_param *)raw_addr_list;
af = sctp_get_af_specific(param_type2af(param->type));
- if (unlikely(!af)) {
+ if (unlikely(!af) ||
+ !af->from_addr_param(&addr, rawaddr, htons(port), 0)) {
retval = -EINVAL;
- sctp_bind_addr_clean(bp);
- break;
+ goto out_err;
}
- af->from_addr_param(&addr, rawaddr, htons(port), 0);
if (sctp_bind_addr_state(bp, &addr) != -1)
goto next;
retval = sctp_add_bind_addr(bp, &addr, sizeof(addr),
SCTP_ADDR_SRC, gfp);
- if (retval) {
+ if (retval)
/* Can't finish building the list, clean up. */
- sctp_bind_addr_clean(bp);
- break;
- }
+ goto out_err;
next:
len = ntohs(param->length);
@@ -294,6 +291,12 @@ next:
}
return retval;
+
+out_err:
+ if (retval)
+ sctp_bind_addr_clean(bp);
+
+ return retval;
}
/********************************************************************
diff --git a/net/sctp/debug.c b/net/sctp/debug.c
index c4d9c7feffb9..ccd773e4c371 100644
--- a/net/sctp/debug.c
+++ b/net/sctp/debug.c
@@ -154,6 +154,7 @@ static const char *const sctp_timer_tbl[] = {
"TIMEOUT_T5_SHUTDOWN_GUARD",
"TIMEOUT_HEARTBEAT",
"TIMEOUT_RECONF",
+ "TIMEOUT_PROBE",
"TIMEOUT_SACK",
"TIMEOUT_AUTOCLOSE",
};
diff --git a/net/sctp/diag.c b/net/sctp/diag.c
index 493fc01e5d2b..760b367644c1 100644
--- a/net/sctp/diag.c
+++ b/net/sctp/diag.c
@@ -284,10 +284,8 @@ static int sctp_tsp_dump_one(struct sctp_transport *tsp, void *p)
goto out;
}
- err = netlink_unicast(net->diag_nlsk, rep, NETLINK_CB(in_skb).portid,
- MSG_DONTWAIT);
- if (err > 0)
- err = 0;
+ err = nlmsg_unicast(net->diag_nlsk, rep, NETLINK_CB(in_skb).portid);
+
out:
return err;
}
diff --git a/net/sctp/input.c b/net/sctp/input.c
index d508f6f3dd08..5ef86fdb1176 100644
--- a/net/sctp/input.c
+++ b/net/sctp/input.c
@@ -385,7 +385,9 @@ static int sctp_add_backlog(struct sock *sk, struct sk_buff *skb)
void sctp_icmp_frag_needed(struct sock *sk, struct sctp_association *asoc,
struct sctp_transport *t, __u32 pmtu)
{
- if (!t || (t->pathmtu <= pmtu))
+ if (!t ||
+ (t->pathmtu <= pmtu &&
+ t->pl.probe_size + sctp_transport_pl_hlen(t) <= pmtu))
return;
if (sock_owned_by_user(sk)) {
@@ -554,6 +556,50 @@ void sctp_err_finish(struct sock *sk, struct sctp_transport *t)
sctp_transport_put(t);
}
+static void sctp_v4_err_handle(struct sctp_transport *t, struct sk_buff *skb,
+ __u8 type, __u8 code, __u32 info)
+{
+ struct sctp_association *asoc = t->asoc;
+ struct sock *sk = asoc->base.sk;
+ int err = 0;
+
+ switch (type) {
+ case ICMP_PARAMETERPROB:
+ err = EPROTO;
+ break;
+ case ICMP_DEST_UNREACH:
+ if (code > NR_ICMP_UNREACH)
+ return;
+ if (code == ICMP_FRAG_NEEDED) {
+ sctp_icmp_frag_needed(sk, asoc, t, SCTP_TRUNC4(info));
+ return;
+ }
+ if (code == ICMP_PROT_UNREACH) {
+ sctp_icmp_proto_unreachable(sk, asoc, t);
+ return;
+ }
+ err = icmp_err_convert[code].errno;
+ break;
+ case ICMP_TIME_EXCEEDED:
+ if (code == ICMP_EXC_FRAGTIME)
+ return;
+
+ err = EHOSTUNREACH;
+ break;
+ case ICMP_REDIRECT:
+ sctp_icmp_redirect(sk, t, skb);
+ return;
+ default:
+ return;
+ }
+ if (!sock_owned_by_user(sk) && inet_sk(sk)->recverr) {
+ sk->sk_err = err;
+ sk_error_report(sk);
+ } else { /* Only an error on timeout */
+ sk->sk_err_soft = err;
+ }
+}
+
/*
* This routine is called by the ICMP module when it gets some
* sort of error condition. If err < 0 then the socket should
@@ -572,22 +618,19 @@ void sctp_err_finish(struct sock *sk, struct sctp_transport *t)
int sctp_v4_err(struct sk_buff *skb, __u32 info)
{
const struct iphdr *iph = (const struct iphdr *)skb->data;
- const int ihlen = iph->ihl * 4;
const int type = icmp_hdr(skb)->type;
const int code = icmp_hdr(skb)->code;
- struct sock *sk;
- struct sctp_association *asoc = NULL;
+ struct net *net = dev_net(skb->dev);
struct sctp_transport *transport;
- struct inet_sock *inet;
+ struct sctp_association *asoc;
__u16 saveip, savesctp;
- int err;
- struct net *net = dev_net(skb->dev);
+ struct sock *sk;
/* Fix up skb to look at the embedded net header. */
saveip = skb->network_header;
savesctp = skb->transport_header;
skb_reset_network_header(skb);
- skb_set_transport_header(skb, ihlen);
+ skb_set_transport_header(skb, iph->ihl * 4);
sk = sctp_err_lookup(net, AF_INET, skb, sctp_hdr(skb), &asoc, &transport);
/* Put back, the original values. */
skb->network_header = saveip;
@@ -596,59 +639,41 @@ int sctp_v4_err(struct sk_buff *skb, __u32 info)
__ICMP_INC_STATS(net, ICMP_MIB_INERRORS);
return -ENOENT;
}
- /* Warning: The sock lock is held. Remember to call
- * sctp_err_finish!
- */
- switch (type) {
- case ICMP_PARAMETERPROB:
- err = EPROTO;
- break;
- case ICMP_DEST_UNREACH:
- if (code > NR_ICMP_UNREACH)
- goto out_unlock;
+ sctp_v4_err_handle(transport, skb, type, code, info);
+ sctp_err_finish(sk, transport);
- /* PMTU discovery (RFC1191) */
- if (ICMP_FRAG_NEEDED == code) {
- sctp_icmp_frag_needed(sk, asoc, transport,
- SCTP_TRUNC4(info));
- goto out_unlock;
- } else {
- if (ICMP_PROT_UNREACH == code) {
- sctp_icmp_proto_unreachable(sk, asoc,
- transport);
- goto out_unlock;
- }
- }
- err = icmp_err_convert[code].errno;
- break;
- case ICMP_TIME_EXCEEDED:
- /* Ignore any time exceeded errors due to fragment reassembly
- * timeouts.
- */
- if (ICMP_EXC_FRAGTIME == code)
- goto out_unlock;
+ return 0;
+}
- err = EHOSTUNREACH;
- break;
- case ICMP_REDIRECT:
- sctp_icmp_redirect(sk, transport, skb);
- /* Fall through to out_unlock. */
- default:
- goto out_unlock;
+int sctp_udp_v4_err(struct sock *sk, struct sk_buff *skb)
+{
+ struct net *net = dev_net(skb->dev);
+ struct sctp_association *asoc;
+ struct sctp_transport *t;
+ struct icmphdr *hdr;
+ __u32 info = 0;
+
+ skb->transport_header += sizeof(struct udphdr);
+ sk = sctp_err_lookup(net, AF_INET, skb, sctp_hdr(skb), &asoc, &t);
+ if (!sk) {
+ __ICMP_INC_STATS(net, ICMP_MIB_INERRORS);
+ return -ENOENT;
}
- inet = inet_sk(sk);
- if (!sock_owned_by_user(sk) && inet->recverr) {
- sk->sk_err = err;
- sk->sk_error_report(sk);
- } else { /* Only an error on timeout */
- sk->sk_err_soft = err;
+ skb->transport_header -= sizeof(struct udphdr);
+ hdr = (struct icmphdr *)(skb_network_header(skb) - sizeof(struct icmphdr));
+ if (hdr->type == ICMP_REDIRECT) {
+ /* can't be handled without outer iphdr known, leave it to udp_err */
+ sctp_err_finish(sk, t);
+ return 0;
}
+ if (hdr->type == ICMP_DEST_UNREACH && hdr->code == ICMP_FRAG_NEEDED)
+ info = ntohs(hdr->un.frag.mtu);
+ sctp_v4_err_handle(t, skb, hdr->type, hdr->code, info);
-out_unlock:
- sctp_err_finish(sk, transport);
- return 0;
+ sctp_err_finish(sk, t);
+ return 1;
}
/*
@@ -1131,7 +1156,8 @@ static struct sctp_association *__sctp_rcv_init_lookup(struct net *net,
if (!af)
continue;
- af->from_addr_param(paddr, params.addr, sh->source, 0);
+ if (!af->from_addr_param(paddr, params.addr, sh->source, 0))
+ continue;
asoc = __sctp_lookup_association(net, laddr, paddr, transportp);
if (asoc)
@@ -1167,6 +1193,9 @@ static struct sctp_association *__sctp_rcv_asconf_lookup(
union sctp_addr_param *param;
union sctp_addr paddr;
+ if (ntohs(ch->length) < sizeof(*asconf) + sizeof(struct sctp_paramhdr))
+ return NULL;
+
/* Skip over the ADDIP header and find the Address parameter */
param = (union sctp_addr_param *)(asconf + 1);
@@ -1174,7 +1203,8 @@ static struct sctp_association *__sctp_rcv_asconf_lookup(
if (unlikely(!af))
return NULL;
- af->from_addr_param(&paddr, param, peer_port, 0);
+ if (!af->from_addr_param(&paddr, param, peer_port, 0))
+ return NULL;
return __sctp_lookup_association(net, laddr, &paddr, transportp);
}
@@ -1236,6 +1266,7 @@ static struct sctp_association *__sctp_rcv_walk_lookup(struct net *net,
net, ch, laddr,
sctp_hdr(skb)->source,
transportp);
+ break;
default:
break;
}
@@ -1245,7 +1276,7 @@ static struct sctp_association *__sctp_rcv_walk_lookup(struct net *net,
ch = (struct sctp_chunkhdr *)ch_end;
chunk_num++;
- } while (ch_end < skb_tail_pointer(skb));
+ } while (ch_end + sizeof(*ch) < skb_tail_pointer(skb));
return asoc;
}
diff --git a/net/sctp/ipv6.c b/net/sctp/ipv6.c
index bd08807c9e44..470dbdc27d58 100644
--- a/net/sctp/ipv6.c
+++ b/net/sctp/ipv6.c
@@ -100,8 +100,9 @@ static int sctp_inet6addr_event(struct notifier_block *this, unsigned long ev,
list_for_each_entry_safe(addr, temp,
&net->sctp.local_addr_list, list) {
if (addr->a.sa.sa_family == AF_INET6 &&
- ipv6_addr_equal(&addr->a.v6.sin6_addr,
- &ifa->addr)) {
+ ipv6_addr_equal(&addr->a.v6.sin6_addr,
+ &ifa->addr) &&
+ addr->a.v6.sin6_scope_id == ifa->idev->dev->ifindex) {
sctp_addr_wq_mgmt(net, addr, SCTP_ADDR_DEL);
found = 1;
addr->valid = 0;
@@ -122,54 +123,28 @@ static struct notifier_block sctp_inet6addr_notifier = {
.notifier_call = sctp_inet6addr_event,
};
-/* ICMP error handler. */
-static int sctp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
- u8 type, u8 code, int offset, __be32 info)
+static void sctp_v6_err_handle(struct sctp_transport *t, struct sk_buff *skb,
+ __u8 type, __u8 code, __u32 info)
{
- struct inet6_dev *idev;
- struct sock *sk;
- struct sctp_association *asoc;
- struct sctp_transport *transport;
+ struct sctp_association *asoc = t->asoc;
+ struct sock *sk = asoc->base.sk;
struct ipv6_pinfo *np;
- __u16 saveip, savesctp;
- int err, ret = 0;
- struct net *net = dev_net(skb->dev);
-
- idev = in6_dev_get(skb->dev);
-
- /* Fix up skb to look at the embedded net header. */
- saveip = skb->network_header;
- savesctp = skb->transport_header;
- skb_reset_network_header(skb);
- skb_set_transport_header(skb, offset);
- sk = sctp_err_lookup(net, AF_INET6, skb, sctp_hdr(skb), &asoc, &transport);
- /* Put back, the original pointers. */
- skb->network_header = saveip;
- skb->transport_header = savesctp;
- if (!sk) {
- __ICMP6_INC_STATS(net, idev, ICMP6_MIB_INERRORS);
- ret = -ENOENT;
- goto out;
- }
-
- /* Warning: The sock lock is held. Remember to call
- * sctp_err_finish!
- */
+ int err = 0;
switch (type) {
case ICMPV6_PKT_TOOBIG:
if (ip6_sk_accept_pmtu(sk))
- sctp_icmp_frag_needed(sk, asoc, transport, ntohl(info));
- goto out_unlock;
+ sctp_icmp_frag_needed(sk, asoc, t, info);
+ return;
case ICMPV6_PARAMPROB:
if (ICMPV6_UNK_NEXTHDR == code) {
- sctp_icmp_proto_unreachable(sk, asoc, transport);
- goto out_unlock;
+ sctp_icmp_proto_unreachable(sk, asoc, t);
+ return;
}
break;
case NDISC_REDIRECT:
- sctp_icmp_redirect(sk, transport, skb);
- goto out_unlock;
+ sctp_icmp_redirect(sk, t, skb);
+ return;
default:
break;
}
@@ -178,18 +153,70 @@ static int sctp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
icmpv6_err_convert(type, code, &err);
if (!sock_owned_by_user(sk) && np->recverr) {
sk->sk_err = err;
- sk->sk_error_report(sk);
- } else { /* Only an error on timeout */
+ sk_error_report(sk);
+ } else {
sk->sk_err_soft = err;
}
+}
+
+/* ICMP error handler. */
+static int sctp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
+ u8 type, u8 code, int offset, __be32 info)
+{
+ struct net *net = dev_net(skb->dev);
+ struct sctp_transport *transport;
+ struct sctp_association *asoc;
+ __u16 saveip, savesctp;
+ struct sock *sk;
+
+ /* Fix up skb to look at the embedded net header. */
+ saveip = skb->network_header;
+ savesctp = skb->transport_header;
+ skb_reset_network_header(skb);
+ skb_set_transport_header(skb, offset);
+ sk = sctp_err_lookup(net, AF_INET6, skb, sctp_hdr(skb), &asoc, &transport);
+ /* Put back, the original pointers. */
+ skb->network_header = saveip;
+ skb->transport_header = savesctp;
+ if (!sk) {
+ __ICMP6_INC_STATS(net, __in6_dev_get(skb->dev), ICMP6_MIB_INERRORS);
+ return -ENOENT;
+ }
-out_unlock:
+ sctp_v6_err_handle(transport, skb, type, code, ntohl(info));
sctp_err_finish(sk, transport);
-out:
- if (likely(idev != NULL))
- in6_dev_put(idev);
- return ret;
+ return 0;
+}
+
+int sctp_udp_v6_err(struct sock *sk, struct sk_buff *skb)
+{
+ struct net *net = dev_net(skb->dev);
+ struct sctp_association *asoc;
+ struct sctp_transport *t;
+ struct icmp6hdr *hdr;
+ __u32 info = 0;
+
+ skb->transport_header += sizeof(struct udphdr);
+ sk = sctp_err_lookup(net, AF_INET6, skb, sctp_hdr(skb), &asoc, &t);
+ if (!sk) {
+ __ICMP6_INC_STATS(net, __in6_dev_get(skb->dev), ICMP6_MIB_INERRORS);
+ return -ENOENT;
+ }
+
+ skb->transport_header -= sizeof(struct udphdr);
+ hdr = (struct icmp6hdr *)(skb_network_header(skb) - sizeof(struct icmp6hdr));
+ if (hdr->icmp6_type == NDISC_REDIRECT) {
+ /* can't be handled without outer ip6hdr known, leave it to udpv6_err */
+ sctp_err_finish(sk, t);
+ return 0;
+ }
+ if (hdr->icmp6_type == ICMPV6_PKT_TOOBIG)
+ info = ntohl(hdr->icmp6_mtu);
+ sctp_v6_err_handle(t, skb, hdr->icmp6_type, hdr->icmp6_code, info);
+
+ sctp_err_finish(sk, t);
+ return 1;
}
static int sctp_v6_xmit(struct sk_buff *skb, struct sctp_transport *t)
@@ -551,15 +578,20 @@ static void sctp_v6_to_sk_daddr(union sctp_addr *addr, struct sock *sk)
}
/* Initialize a sctp_addr from an address parameter. */
-static void sctp_v6_from_addr_param(union sctp_addr *addr,
+static bool sctp_v6_from_addr_param(union sctp_addr *addr,
union sctp_addr_param *param,
__be16 port, int iif)
{
+ if (ntohs(param->v6.param_hdr.length) < sizeof(struct sctp_ipv6addr_param))
+ return false;
+
addr->v6.sin6_family = AF_INET6;
addr->v6.sin6_port = port;
addr->v6.sin6_flowinfo = 0; /* BUG */
addr->v6.sin6_addr = param->v6.addr;
addr->v6.sin6_scope_id = iif;
+
+ return true;
}
/* Initialize an address parameter from a sctp_addr and return the length
diff --git a/net/sctp/output.c b/net/sctp/output.c
index a6aa17df09ef..4dfb5ea82b05 100644
--- a/net/sctp/output.c
+++ b/net/sctp/output.c
@@ -103,8 +103,9 @@ void sctp_packet_config(struct sctp_packet *packet, __u32 vtag,
sctp_transport_route(tp, NULL, sp);
if (asoc->param_flags & SPP_PMTUD_ENABLE)
sctp_assoc_sync_pmtu(asoc);
- } else if (!sctp_transport_pmtu_check(tp)) {
- if (asoc->param_flags & SPP_PMTUD_ENABLE)
+ } else if (!sctp_transport_pl_enabled(tp) &&
+ asoc->param_flags & SPP_PMTUD_ENABLE) {
+ if (!sctp_transport_pmtu_check(tp))
sctp_assoc_sync_pmtu(asoc);
}
@@ -211,6 +212,30 @@ enum sctp_xmit sctp_packet_transmit_chunk(struct sctp_packet *packet,
return retval;
}
+/* Try to bundle a pad chunk into a packet with a heartbeat chunk for PLPMTUTD probe */
+static enum sctp_xmit sctp_packet_bundle_pad(struct sctp_packet *pkt, struct sctp_chunk *chunk)
+{
+ struct sctp_transport *t = pkt->transport;
+ struct sctp_chunk *pad;
+ int overhead = 0;
+
+ if (!chunk->pmtu_probe)
+ return SCTP_XMIT_OK;
+
+ /* calculate the Padding Data size for the pad chunk */
+ overhead += sizeof(struct sctphdr) + sizeof(struct sctp_chunkhdr);
+ overhead += sizeof(struct sctp_sender_hb_info) + sizeof(struct sctp_pad_chunk);
+ pad = sctp_make_pad(t->asoc, t->pl.probe_size - overhead);
+ if (!pad)
+ return SCTP_XMIT_DELAY;
+
+ list_add_tail(&pad->list, &pkt->chunk_list);
+ pkt->size += SCTP_PAD4(ntohs(pad->chunk_hdr->length));
+ chunk->transport = t;
+
+ return SCTP_XMIT_OK;
+}
+
/* Try to bundle an auth chunk into the packet. */
static enum sctp_xmit sctp_packet_bundle_auth(struct sctp_packet *pkt,
struct sctp_chunk *chunk)
@@ -382,6 +407,10 @@ enum sctp_xmit sctp_packet_append_chunk(struct sctp_packet *packet,
goto finish;
retval = __sctp_packet_append_chunk(packet, chunk);
+ if (retval != SCTP_XMIT_OK)
+ goto finish;
+
+ retval = sctp_packet_bundle_pad(packet, chunk);
finish:
return retval;
@@ -553,7 +582,7 @@ int sctp_packet_transmit(struct sctp_packet *packet, gfp_t gfp)
sk = chunk->skb->sk;
/* check gso */
- if (packet->size > tp->pathmtu && !packet->ipfragok) {
+ if (packet->size > tp->pathmtu && !packet->ipfragok && !chunk->pmtu_probe) {
if (!sk_can_gso(sk)) {
pr_err_once("Trying to GSO but underlying device doesn't support it.");
goto out;
diff --git a/net/sctp/outqueue.c b/net/sctp/outqueue.c
index 5cb1aa5f067b..ff47091c385e 100644
--- a/net/sctp/outqueue.c
+++ b/net/sctp/outqueue.c
@@ -769,7 +769,11 @@ static int sctp_packet_singleton(struct sctp_transport *transport,
sctp_packet_init(&singleton, transport, sport, dport);
sctp_packet_config(&singleton, vtag, 0);
- sctp_packet_append_chunk(&singleton, chunk);
+ if (sctp_packet_append_chunk(&singleton, chunk) != SCTP_XMIT_OK) {
+ list_del_init(&chunk->list);
+ sctp_chunk_free(chunk);
+ return -ENOMEM;
+ }
return sctp_packet_transmit(&singleton, gfp);
}
@@ -929,8 +933,13 @@ static void sctp_outq_flush_ctrl(struct sctp_flush_ctx *ctx)
one_packet = 1;
fallthrough;
- case SCTP_CID_SACK:
case SCTP_CID_HEARTBEAT:
+ if (chunk->pmtu_probe) {
+ sctp_packet_singleton(ctx->transport, chunk, ctx->gfp);
+ break;
+ }
+ fallthrough;
+ case SCTP_CID_SACK:
case SCTP_CID_SHUTDOWN:
case SCTP_CID_ECN_ECNE:
case SCTP_CID_ASCONF:
diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c
index 6f2bbfeec3a4..ec0f52567c16 100644
--- a/net/sctp/protocol.c
+++ b/net/sctp/protocol.c
@@ -254,14 +254,19 @@ static void sctp_v4_to_sk_daddr(union sctp_addr *addr, struct sock *sk)
}
/* Initialize a sctp_addr from an address parameter. */
-static void sctp_v4_from_addr_param(union sctp_addr *addr,
+static bool sctp_v4_from_addr_param(union sctp_addr *addr,
union sctp_addr_param *param,
__be16 port, int iif)
{
+ if (ntohs(param->v4.param_hdr.length) < sizeof(struct sctp_ipv4addr_param))
+ return false;
+
addr->v4.sin_family = AF_INET;
addr->v4.sin_port = port;
addr->v4.sin_addr.s_addr = param->v4.addr.s_addr;
memset(addr->v4.sin_zero, 0, sizeof(addr->v4.sin_zero));
+
+ return true;
}
/* Initialize an address parameter from a sctp_addr and return the length
@@ -393,7 +398,8 @@ static enum sctp_scope sctp_v4_scope(union sctp_addr *addr)
retval = SCTP_SCOPE_LINK;
} else if (ipv4_is_private_10(addr->v4.sin_addr.s_addr) ||
ipv4_is_private_172(addr->v4.sin_addr.s_addr) ||
- ipv4_is_private_192(addr->v4.sin_addr.s_addr)) {
+ ipv4_is_private_192(addr->v4.sin_addr.s_addr) ||
+ ipv4_is_test_198(addr->v4.sin_addr.s_addr)) {
retval = SCTP_SCOPE_PRIVATE;
} else {
retval = SCTP_SCOPE_GLOBAL;
@@ -850,23 +856,6 @@ static int sctp_udp_rcv(struct sock *sk, struct sk_buff *skb)
return 0;
}
-static int sctp_udp_err_lookup(struct sock *sk, struct sk_buff *skb)
-{
- struct sctp_association *asoc;
- struct sctp_transport *t;
- int family;
-
- skb->transport_header += sizeof(struct udphdr);
- family = (ip_hdr(skb)->version == 4) ? AF_INET : AF_INET6;
- sk = sctp_err_lookup(dev_net(skb->dev), family, skb, sctp_hdr(skb),
- &asoc, &t);
- if (!sk)
- return -ENOENT;
-
- sctp_err_finish(sk, t);
- return 0;
-}
-
int sctp_udp_sock_start(struct net *net)
{
struct udp_tunnel_sock_cfg tuncfg = {NULL};
@@ -885,7 +874,7 @@ int sctp_udp_sock_start(struct net *net)
tuncfg.encap_type = 1;
tuncfg.encap_rcv = sctp_udp_rcv;
- tuncfg.encap_err_lookup = sctp_udp_err_lookup;
+ tuncfg.encap_err_lookup = sctp_udp_v4_err;
setup_udp_tunnel_sock(net, sock, &tuncfg);
net->sctp.udp4_sock = sock->sk;
@@ -907,7 +896,7 @@ int sctp_udp_sock_start(struct net *net)
tuncfg.encap_type = 1;
tuncfg.encap_rcv = sctp_udp_rcv;
- tuncfg.encap_err_lookup = sctp_udp_err_lookup;
+ tuncfg.encap_err_lookup = sctp_udp_v6_err;
setup_udp_tunnel_sock(net, sock, &tuncfg);
net->sctp.udp6_sock = sock->sk;
#endif
@@ -1171,7 +1160,6 @@ static const struct net_protocol sctp_protocol = {
.handler = sctp4_rcv,
.err_handler = sctp_v4_err,
.no_policy = 1,
- .netns_ok = 1,
.icmp_strict_tag_validation = 1,
};
diff --git a/net/sctp/sm_make_chunk.c b/net/sctp/sm_make_chunk.c
index 5b44d228b6ca..b8fa8f1a7277 100644
--- a/net/sctp/sm_make_chunk.c
+++ b/net/sctp/sm_make_chunk.c
@@ -1160,9 +1160,10 @@ nodata:
/* Make a HEARTBEAT chunk. */
struct sctp_chunk *sctp_make_heartbeat(const struct sctp_association *asoc,
- const struct sctp_transport *transport)
+ const struct sctp_transport *transport,
+ __u32 probe_size)
{
- struct sctp_sender_hb_info hbinfo;
+ struct sctp_sender_hb_info hbinfo = {};
struct sctp_chunk *retval;
retval = sctp_make_control(asoc, SCTP_CID_HEARTBEAT, 0,
@@ -1176,6 +1177,7 @@ struct sctp_chunk *sctp_make_heartbeat(const struct sctp_association *asoc,
hbinfo.daddr = transport->ipaddr;
hbinfo.sent_at = jiffies;
hbinfo.hb_nonce = transport->hb_nonce;
+ hbinfo.probe_size = probe_size;
/* Cast away the 'const', as this is just telling the chunk
* what transport it belongs to.
@@ -1183,6 +1185,7 @@ struct sctp_chunk *sctp_make_heartbeat(const struct sctp_association *asoc,
retval->transport = (struct sctp_transport *) transport;
retval->subh.hbs_hdr = sctp_addto_chunk(retval, sizeof(hbinfo),
&hbinfo);
+ retval->pmtu_probe = !!probe_size;
nodata:
return retval;
@@ -1218,6 +1221,32 @@ nodata:
return retval;
}
+/* RFC4820 3. Padding Chunk (PAD)
+ * 0 1 2 3
+ * 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * | Type = 0x84 | Flags=0 | Length |
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * | |
+ * \ Padding Data /
+ * / \
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ */
+struct sctp_chunk *sctp_make_pad(const struct sctp_association *asoc, int len)
+{
+ struct sctp_chunk *retval;
+
+ retval = sctp_make_control(asoc, SCTP_CID_PAD, 0, len, GFP_ATOMIC);
+ if (!retval)
+ return NULL;
+
+ skb_put_zero(retval->skb, len);
+ retval->chunk_hdr->length = htons(ntohs(retval->chunk_hdr->length) + len);
+ retval->chunk_end = skb_tail_pointer(retval->skb);
+
+ return retval;
+}
+
/* Create an Operation Error chunk with the specified space reserved.
* This routine can be used for containing multiple causes in the chunk.
*/
@@ -2166,9 +2195,16 @@ static enum sctp_ierror sctp_verify_param(struct net *net,
break;
case SCTP_PARAM_SET_PRIMARY:
- if (ep->asconf_enable)
- break;
- goto unhandled;
+ if (!ep->asconf_enable)
+ goto unhandled;
+
+ if (ntohs(param.p->length) < sizeof(struct sctp_addip_param) +
+ sizeof(struct sctp_paramhdr)) {
+ sctp_process_inv_paramlength(asoc, param.p,
+ chunk, err_chunk);
+ retval = SCTP_IERROR_ABORT;
+ }
+ break;
case SCTP_PARAM_HOST_NAME_ADDRESS:
/* Tell the peer, we won't support this param. */
@@ -2346,11 +2382,13 @@ int sctp_process_init(struct sctp_association *asoc, struct sctp_chunk *chunk,
/* Process the initialization parameters. */
sctp_walk_params(param, peer_init, init_hdr.params) {
- if (!src_match && (param.p->type == SCTP_PARAM_IPV4_ADDRESS ||
- param.p->type == SCTP_PARAM_IPV6_ADDRESS)) {
+ if (!src_match &&
+ (param.p->type == SCTP_PARAM_IPV4_ADDRESS ||
+ param.p->type == SCTP_PARAM_IPV6_ADDRESS)) {
af = sctp_get_af_specific(param_type2af(param.p->type));
- af->from_addr_param(&addr, param.addr,
- chunk->sctp_hdr->source, 0);
+ if (!af->from_addr_param(&addr, param.addr,
+ chunk->sctp_hdr->source, 0))
+ continue;
if (sctp_cmp_addr_exact(sctp_source(chunk), &addr))
src_match = 1;
}
@@ -2531,7 +2569,8 @@ static int sctp_process_param(struct sctp_association *asoc,
break;
do_addr_param:
af = sctp_get_af_specific(param_type2af(param.p->type));
- af->from_addr_param(&addr, param.addr, htons(asoc->peer.port), 0);
+ if (!af->from_addr_param(&addr, param.addr, htons(asoc->peer.port), 0))
+ break;
scope = sctp_scope(peer_addr);
if (sctp_in_scope(net, &addr, scope))
if (!sctp_assoc_add_peer(asoc, &addr, gfp, SCTP_UNCONFIRMED))
@@ -2632,15 +2671,13 @@ do_addr_param:
addr_param = param.v + sizeof(struct sctp_addip_param);
af = sctp_get_af_specific(param_type2af(addr_param->p.type));
- if (af == NULL)
+ if (!af)
break;
- af->from_addr_param(&addr, addr_param,
- htons(asoc->peer.port), 0);
+ if (!af->from_addr_param(&addr, addr_param,
+ htons(asoc->peer.port), 0))
+ break;
- /* if the address is invalid, we can't process it.
- * XXX: see spec for what to do.
- */
if (!af->addr_valid(&addr, NULL, NULL))
break;
@@ -3054,7 +3091,8 @@ static __be16 sctp_process_asconf_param(struct sctp_association *asoc,
if (unlikely(!af))
return SCTP_ERROR_DNS_FAILED;
- af->from_addr_param(&addr, addr_param, htons(asoc->peer.port), 0);
+ if (!af->from_addr_param(&addr, addr_param, htons(asoc->peer.port), 0))
+ return SCTP_ERROR_DNS_FAILED;
/* ADDIP 4.2.1 This parameter MUST NOT contain a broadcast
* or multicast address.
@@ -3331,7 +3369,8 @@ static void sctp_asconf_param_success(struct sctp_association *asoc,
/* We have checked the packet before, so we do not check again. */
af = sctp_get_af_specific(param_type2af(addr_param->p.type));
- af->from_addr_param(&addr, addr_param, htons(bp->port), 0);
+ if (!af->from_addr_param(&addr, addr_param, htons(bp->port), 0))
+ return;
switch (asconf_param->param_hdr.type) {
case SCTP_PARAM_ADD_IP:
diff --git a/net/sctp/sm_sideeffect.c b/net/sctp/sm_sideeffect.c
index ce15d590a615..b3815b568e8e 100644
--- a/net/sctp/sm_sideeffect.c
+++ b/net/sctp/sm_sideeffect.c
@@ -471,6 +471,38 @@ out_unlock:
sctp_transport_put(transport);
}
+/* Handle the timeout of the probe timer. */
+void sctp_generate_probe_event(struct timer_list *t)
+{
+ struct sctp_transport *transport = from_timer(transport, t, probe_timer);
+ struct sctp_association *asoc = transport->asoc;
+ struct sock *sk = asoc->base.sk;
+ struct net *net = sock_net(sk);
+ int error = 0;
+
+ bh_lock_sock(sk);
+ if (sock_owned_by_user(sk)) {
+ pr_debug("%s: sock is busy\n", __func__);
+
+ /* Try again later. */
+ if (!mod_timer(&transport->probe_timer, jiffies + (HZ / 20)))
+ sctp_transport_hold(transport);
+ goto out_unlock;
+ }
+
+ error = sctp_do_sm(net, SCTP_EVENT_T_TIMEOUT,
+ SCTP_ST_TIMEOUT(SCTP_EVENT_TIMEOUT_PROBE),
+ asoc->state, asoc->ep, asoc,
+ transport, GFP_ATOMIC);
+
+ if (error)
+ sk->sk_err = -error;
+
+out_unlock:
+ bh_unlock_sock(sk);
+ sctp_transport_put(transport);
+}
+
/* Inject a SACK Timeout event into the state machine. */
static void sctp_generate_sack_event(struct timer_list *t)
{
@@ -1641,6 +1673,11 @@ static int sctp_cmd_interpreter(enum sctp_event_type event_type,
sctp_cmd_hb_timers_stop(commands, asoc);
break;
+ case SCTP_CMD_PROBE_TIMER_UPDATE:
+ t = cmd->obj.transport;
+ sctp_transport_reset_probe_timer(t);
+ break;
+
case SCTP_CMD_REPORT_ERROR:
error = cmd->obj.error;
break;
diff --git a/net/sctp/sm_statefuns.c b/net/sctp/sm_statefuns.c
index fd1e319eda00..32df65f68c12 100644
--- a/net/sctp/sm_statefuns.c
+++ b/net/sctp/sm_statefuns.c
@@ -361,7 +361,7 @@ enum sctp_disposition sctp_sf_do_5_1B_init(struct net *net,
/* If the INIT is coming toward a closing socket, we'll send back
* and ABORT. Essentially, this catches the race of INIT being
- * backloged to the socket at the same time as the user isses close().
+ * backloged to the socket at the same time as the user issues close().
* Since the socket and all its associations are going away, we
* can treat this OOTB
*/
@@ -608,8 +608,8 @@ enum sctp_disposition sctp_sf_do_5_1C_ack(struct net *net,
sctp_add_cmd_sf(commands, SCTP_CMD_NEW_STATE,
SCTP_STATE(SCTP_STATE_COOKIE_ECHOED));
- /* SCTP-AUTH: genereate the assocition shared keys so that
- * we can potentially signe the COOKIE-ECHO.
+ /* SCTP-AUTH: generate the association shared keys so that
+ * we can potentially sign the COOKIE-ECHO.
*/
sctp_add_cmd_sf(commands, SCTP_CMD_ASSOC_SHKEY, SCTP_NULL());
@@ -787,7 +787,7 @@ enum sctp_disposition sctp_sf_do_5_1D_ce(struct net *net,
goto nomem_init;
/* SCTP-AUTH: Now that we've populate required fields in
- * sctp_process_init, set up the assocaition shared keys as
+ * sctp_process_init, set up the association shared keys as
* necessary so that we can potentially authenticate the ACK
*/
error = sctp_auth_asoc_init_active_key(new_asoc, GFP_ATOMIC);
@@ -838,7 +838,7 @@ enum sctp_disposition sctp_sf_do_5_1D_ce(struct net *net,
/* Add all the state machine commands now since we've created
* everything. This way we don't introduce memory corruptions
- * during side-effect processing and correclty count established
+ * during side-effect processing and correctly count established
* associations.
*/
sctp_add_cmd_sf(commands, SCTP_CMD_NEW_ASOC, SCTP_ASOC(new_asoc));
@@ -923,7 +923,7 @@ enum sctp_disposition sctp_sf_do_5_1E_ca(struct net *net,
commands);
/* Reset init error count upon receipt of COOKIE-ACK,
- * to avoid problems with the managemement of this
+ * to avoid problems with the management of this
* counter in stale cookie situations when a transition back
* from the COOKIE-ECHOED state to the COOKIE-WAIT
* state is performed.
@@ -1004,7 +1004,7 @@ static enum sctp_disposition sctp_sf_heartbeat(
struct sctp_chunk *reply;
/* Send a heartbeat to our peer. */
- reply = sctp_make_heartbeat(asoc, transport);
+ reply = sctp_make_heartbeat(asoc, transport, 0);
if (!reply)
return SCTP_DISPOSITION_NOMEM;
@@ -1095,6 +1095,32 @@ enum sctp_disposition sctp_sf_send_reconf(struct net *net,
return SCTP_DISPOSITION_CONSUME;
}
+/* send hb chunk with padding for PLPMUTD. */
+enum sctp_disposition sctp_sf_send_probe(struct net *net,
+ const struct sctp_endpoint *ep,
+ const struct sctp_association *asoc,
+ const union sctp_subtype type,
+ void *arg,
+ struct sctp_cmd_seq *commands)
+{
+ struct sctp_transport *transport = (struct sctp_transport *)arg;
+ struct sctp_chunk *reply;
+
+ if (!sctp_transport_pl_enabled(transport))
+ return SCTP_DISPOSITION_CONSUME;
+
+ if (sctp_transport_pl_send(transport)) {
+ reply = sctp_make_heartbeat(asoc, transport, transport->pl.probe_size);
+ if (!reply)
+ return SCTP_DISPOSITION_NOMEM;
+ sctp_add_cmd_sf(commands, SCTP_CMD_REPLY, SCTP_CHUNK(reply));
+ }
+ sctp_add_cmd_sf(commands, SCTP_CMD_PROBE_TIMER_UPDATE,
+ SCTP_TRANSPORT(transport));
+
+ return SCTP_DISPOSITION_CONSUME;
+}
+
/*
* Process an heartbeat request.
*
@@ -1243,6 +1269,17 @@ enum sctp_disposition sctp_sf_backbeat_8_3(struct net *net,
if (hbinfo->hb_nonce != link->hb_nonce)
return SCTP_DISPOSITION_DISCARD;
+ if (hbinfo->probe_size) {
+ if (hbinfo->probe_size != link->pl.probe_size ||
+ !sctp_transport_pl_enabled(link))
+ return SCTP_DISPOSITION_DISCARD;
+
+ if (sctp_transport_pl_recv(link))
+ return SCTP_DISPOSITION_CONSUME;
+
+ return sctp_sf_send_probe(net, ep, asoc, type, link, commands);
+ }
+
max_interval = link->hbinterval + link->rto;
/* Check if the timestamp looks valid. */
@@ -2950,7 +2987,7 @@ enum sctp_disposition sctp_sf_do_9_2_reshutack(
commands);
/* Since we are not going to really process this INIT, there
- * is no point in verifying chunk boundries. Just generate
+ * is no point in verifying chunk boundaries. Just generate
* the SHUTDOWN ACK.
*/
reply = sctp_make_shutdown_ack(asoc, chunk);
@@ -3560,7 +3597,7 @@ enum sctp_disposition sctp_sf_do_9_2_final(struct net *net,
goto nomem_chunk;
/* Do all the commands now (after allocation), so that we
- * have consistent state if memory allocation failes
+ * have consistent state if memory allocation fails
*/
sctp_add_cmd_sf(commands, SCTP_CMD_EVENT_ULP, SCTP_ULPEVENT(ev));
@@ -3747,7 +3784,7 @@ static enum sctp_disposition sctp_sf_shut_8_4_5(
return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands);
/* We need to discard the rest of the packet to prevent
- * potential bomming attacks from additional bundled chunks.
+ * potential boomming attacks from additional bundled chunks.
* This is documented in SCTP Threats ID.
*/
return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands);
@@ -4257,7 +4294,7 @@ gen_shutdown:
}
/*
- * SCTP-AUTH Section 6.3 Receiving authenticated chukns
+ * SCTP-AUTH Section 6.3 Receiving authenticated chunks
*
* The receiver MUST use the HMAC algorithm indicated in the HMAC
* Identifier field. If this algorithm was not specified by the
@@ -4812,7 +4849,7 @@ static enum sctp_disposition sctp_sf_violation_ctsn(
/* Handle protocol violation of an invalid chunk bundling. For example,
* when we have an association and we receive bundled INIT-ACK, or
- * SHUDOWN-COMPLETE, our peer is clearly violationg the "MUST NOT bundle"
+ * SHUTDOWN-COMPLETE, our peer is clearly violating the "MUST NOT bundle"
* statement from the specs. Additionally, there might be an attacker
* on the path and we may not want to continue this communication.
*/
@@ -5208,7 +5245,7 @@ enum sctp_disposition sctp_sf_cookie_wait_prm_shutdown(
* Inputs
* (endpoint, asoc)
*
- * The RFC does not explcitly address this issue, but is the route through the
+ * The RFC does not explicitly address this issue, but is the route through the
* state table when someone issues a shutdown while in COOKIE_ECHOED state.
*
* Outputs
@@ -5932,7 +5969,7 @@ enum sctp_disposition sctp_sf_t1_cookie_timer_expire(
/* RFC2960 9.2 If the timer expires, the endpoint must re-send the SHUTDOWN
* with the updated last sequential TSN received from its peer.
*
- * An endpoint should limit the number of retransmissions of the
+ * An endpoint should limit the number of retransmission of the
* SHUTDOWN chunk to the protocol parameter 'Association.Max.Retrans'.
* If this threshold is exceeded the endpoint should destroy the TCB and
* MUST report the peer endpoint unreachable to the upper layer (and
@@ -6010,7 +6047,7 @@ nomem:
}
/*
- * ADDIP Section 4.1 ASCONF CHunk Procedures
+ * ADDIP Section 4.1 ASCONF Chunk Procedures
* If the T4 RTO timer expires the endpoint should do B1 to B5
*/
enum sctp_disposition sctp_sf_t4_timer_expire(
@@ -6441,7 +6478,7 @@ static int sctp_eat_data(const struct sctp_association *asoc,
chunk->ecn_ce_done = 1;
if (af->is_ce(sctp_gso_headskb(chunk->skb))) {
- /* Do real work as sideffect. */
+ /* Do real work as side effect. */
sctp_add_cmd_sf(commands, SCTP_CMD_ECN_CE,
SCTP_U32(tsn));
}
diff --git a/net/sctp/sm_statetable.c b/net/sctp/sm_statetable.c
index 88ea87f4f0e7..1816a4410b2b 100644
--- a/net/sctp/sm_statetable.c
+++ b/net/sctp/sm_statetable.c
@@ -527,6 +527,26 @@ auth_chunk_event_table[SCTP_NUM_AUTH_CHUNK_TYPES][SCTP_STATE_NUM_STATES] = {
}; /*state_fn_t auth_chunk_event_table[][] */
static const struct sctp_sm_table_entry
+pad_chunk_event_table[SCTP_STATE_NUM_STATES] = {
+ /* SCTP_STATE_CLOSED */
+ TYPE_SCTP_FUNC(sctp_sf_discard_chunk),
+ /* SCTP_STATE_COOKIE_WAIT */
+ TYPE_SCTP_FUNC(sctp_sf_discard_chunk),
+ /* SCTP_STATE_COOKIE_ECHOED */
+ TYPE_SCTP_FUNC(sctp_sf_discard_chunk),
+ /* SCTP_STATE_ESTABLISHED */
+ TYPE_SCTP_FUNC(sctp_sf_discard_chunk),
+ /* SCTP_STATE_SHUTDOWN_PENDING */
+ TYPE_SCTP_FUNC(sctp_sf_discard_chunk),
+ /* SCTP_STATE_SHUTDOWN_SENT */
+ TYPE_SCTP_FUNC(sctp_sf_discard_chunk),
+ /* SCTP_STATE_SHUTDOWN_RECEIVED */
+ TYPE_SCTP_FUNC(sctp_sf_discard_chunk),
+ /* SCTP_STATE_SHUTDOWN_ACK_SENT */
+ TYPE_SCTP_FUNC(sctp_sf_discard_chunk),
+}; /* chunk pad */
+
+static const struct sctp_sm_table_entry
chunk_event_table_unknown[SCTP_STATE_NUM_STATES] = {
/* SCTP_STATE_CLOSED */
TYPE_SCTP_FUNC(sctp_sf_ootb),
@@ -947,6 +967,25 @@ other_event_table[SCTP_NUM_OTHER_TYPES][SCTP_STATE_NUM_STATES] = {
TYPE_SCTP_FUNC(sctp_sf_timer_ignore), \
}
+#define TYPE_SCTP_EVENT_TIMEOUT_PROBE { \
+ /* SCTP_STATE_CLOSED */ \
+ TYPE_SCTP_FUNC(sctp_sf_timer_ignore), \
+ /* SCTP_STATE_COOKIE_WAIT */ \
+ TYPE_SCTP_FUNC(sctp_sf_timer_ignore), \
+ /* SCTP_STATE_COOKIE_ECHOED */ \
+ TYPE_SCTP_FUNC(sctp_sf_timer_ignore), \
+ /* SCTP_STATE_ESTABLISHED */ \
+ TYPE_SCTP_FUNC(sctp_sf_send_probe), \
+ /* SCTP_STATE_SHUTDOWN_PENDING */ \
+ TYPE_SCTP_FUNC(sctp_sf_timer_ignore), \
+ /* SCTP_STATE_SHUTDOWN_SENT */ \
+ TYPE_SCTP_FUNC(sctp_sf_timer_ignore), \
+ /* SCTP_STATE_SHUTDOWN_RECEIVED */ \
+ TYPE_SCTP_FUNC(sctp_sf_timer_ignore), \
+ /* SCTP_STATE_SHUTDOWN_ACK_SENT */ \
+ TYPE_SCTP_FUNC(sctp_sf_timer_ignore), \
+}
+
static const struct sctp_sm_table_entry
timeout_event_table[SCTP_NUM_TIMEOUT_TYPES][SCTP_STATE_NUM_STATES] = {
TYPE_SCTP_EVENT_TIMEOUT_NONE,
@@ -958,6 +997,7 @@ timeout_event_table[SCTP_NUM_TIMEOUT_TYPES][SCTP_STATE_NUM_STATES] = {
TYPE_SCTP_EVENT_TIMEOUT_T5_SHUTDOWN_GUARD,
TYPE_SCTP_EVENT_TIMEOUT_HEARTBEAT,
TYPE_SCTP_EVENT_TIMEOUT_RECONF,
+ TYPE_SCTP_EVENT_TIMEOUT_PROBE,
TYPE_SCTP_EVENT_TIMEOUT_SACK,
TYPE_SCTP_EVENT_TIMEOUT_AUTOCLOSE,
};
@@ -992,6 +1032,9 @@ static const struct sctp_sm_table_entry *sctp_chunk_event_lookup(
case SCTP_CID_AUTH:
return &auth_chunk_event_table[0][state];
+
+ case SCTP_CID_PAD:
+ return &pad_chunk_event_table[state];
}
return &chunk_event_table_unknown[state];
diff --git a/net/sctp/socket.c b/net/sctp/socket.c
index a79d193ff872..6b937bfd4751 100644
--- a/net/sctp/socket.c
+++ b/net/sctp/socket.c
@@ -2496,6 +2496,7 @@ static int sctp_apply_peer_addr_params(struct sctp_paddrparams *params,
sctp_transport_pmtu(trans, sctp_opt2sk(sp));
sctp_assoc_sync_pmtu(asoc);
}
+ sctp_transport_pl_reset(trans);
} else if (asoc) {
asoc->param_flags =
(asoc->param_flags & ~SPP_PMTUD) | pmtud_change;
@@ -4481,6 +4482,61 @@ static int sctp_setsockopt_encap_port(struct sock *sk,
return 0;
}
+static int sctp_setsockopt_probe_interval(struct sock *sk,
+ struct sctp_probeinterval *params,
+ unsigned int optlen)
+{
+ struct sctp_association *asoc;
+ struct sctp_transport *t;
+ __u32 probe_interval;
+
+ if (optlen != sizeof(*params))
+ return -EINVAL;
+
+ probe_interval = params->spi_interval;
+ if (probe_interval && probe_interval < SCTP_PROBE_TIMER_MIN)
+ return -EINVAL;
+
+ /* If an address other than INADDR_ANY is specified, and
+ * no transport is found, then the request is invalid.
+ */
+ if (!sctp_is_any(sk, (union sctp_addr *)&params->spi_address)) {
+ t = sctp_addr_id2transport(sk, &params->spi_address,
+ params->spi_assoc_id);
+ if (!t)
+ return -EINVAL;
+
+ t->probe_interval = msecs_to_jiffies(probe_interval);
+ sctp_transport_pl_reset(t);
+ return 0;
+ }
+
+ /* Get association, if assoc_id != SCTP_FUTURE_ASSOC and the
+ * socket is a one to many style socket, and an association
+ * was not found, then the id was invalid.
+ */
+ asoc = sctp_id2assoc(sk, params->spi_assoc_id);
+ if (!asoc && params->spi_assoc_id != SCTP_FUTURE_ASSOC &&
+ sctp_style(sk, UDP))
+ return -EINVAL;
+
+ /* If changes are for association, also apply probe_interval to
+ * each transport.
+ */
+ if (asoc) {
+ list_for_each_entry(t, &asoc->peer.transport_addr_list, transports) {
+ t->probe_interval = msecs_to_jiffies(probe_interval);
+ sctp_transport_pl_reset(t);
+ }
+
+ asoc->probe_interval = msecs_to_jiffies(probe_interval);
+ return 0;
+ }
+
+ sctp_sk(sk)->probe_interval = probe_interval;
+ return 0;
+}
+
/* API 6.2 setsockopt(), getsockopt()
*
* Applications use setsockopt() and getsockopt() to set or retrieve
@@ -4521,6 +4577,10 @@ static int sctp_setsockopt(struct sock *sk, int level, int optname,
}
if (optlen > 0) {
+ /* Trim it to the biggest size sctp sockopt may need if necessary */
+ optlen = min_t(unsigned int, optlen,
+ PAGE_ALIGN(USHRT_MAX +
+ sizeof(__u16) * sizeof(struct sctp_reset_streams)));
kopt = memdup_sockptr(optval, optlen);
if (IS_ERR(kopt))
return PTR_ERR(kopt);
@@ -4703,6 +4763,9 @@ static int sctp_setsockopt(struct sock *sk, int level, int optname,
case SCTP_REMOTE_UDP_ENCAPS_PORT:
retval = sctp_setsockopt_encap_port(sk, kopt, optlen);
break;
+ case SCTP_PLPMTUD_PROBE_INTERVAL:
+ retval = sctp_setsockopt_probe_interval(sk, kopt, optlen);
+ break;
default:
retval = -ENOPROTOOPT;
break;
@@ -4989,6 +5052,7 @@ static int sctp_init_sock(struct sock *sk)
atomic_set(&sp->pd_mode, 0);
skb_queue_head_init(&sp->pd_lobby);
sp->frag_interleave = 0;
+ sp->probe_interval = net->sctp.probe_interval;
/* Create a per socket endpoint structure. Even if we
* change the data structure relationships, this may still
@@ -7905,6 +7969,66 @@ out:
return 0;
}
+static int sctp_getsockopt_probe_interval(struct sock *sk, int len,
+ char __user *optval,
+ int __user *optlen)
+{
+ struct sctp_probeinterval params;
+ struct sctp_association *asoc;
+ struct sctp_transport *t;
+ __u32 probe_interval;
+
+ if (len < sizeof(params))
+ return -EINVAL;
+
+ len = sizeof(params);
+ if (copy_from_user(&params, optval, len))
+ return -EFAULT;
+
+ /* If an address other than INADDR_ANY is specified, and
+ * no transport is found, then the request is invalid.
+ */
+ if (!sctp_is_any(sk, (union sctp_addr *)&params.spi_address)) {
+ t = sctp_addr_id2transport(sk, &params.spi_address,
+ params.spi_assoc_id);
+ if (!t) {
+ pr_debug("%s: failed no transport\n", __func__);
+ return -EINVAL;
+ }
+
+ probe_interval = jiffies_to_msecs(t->probe_interval);
+ goto out;
+ }
+
+ /* Get association, if assoc_id != SCTP_FUTURE_ASSOC and the
+ * socket is a one to many style socket, and an association
+ * was not found, then the id was invalid.
+ */
+ asoc = sctp_id2assoc(sk, params.spi_assoc_id);
+ if (!asoc && params.spi_assoc_id != SCTP_FUTURE_ASSOC &&
+ sctp_style(sk, UDP)) {
+ pr_debug("%s: failed no association\n", __func__);
+ return -EINVAL;
+ }
+
+ if (asoc) {
+ probe_interval = jiffies_to_msecs(asoc->probe_interval);
+ goto out;
+ }
+
+ probe_interval = sctp_sk(sk)->probe_interval;
+
+out:
+ params.spi_interval = probe_interval;
+ if (copy_to_user(optval, &params, len))
+ return -EFAULT;
+
+ if (put_user(len, optlen))
+ return -EFAULT;
+
+ return 0;
+}
+
static int sctp_getsockopt(struct sock *sk, int level, int optname,
char __user *optval, int __user *optlen)
{
@@ -8128,6 +8252,9 @@ static int sctp_getsockopt(struct sock *sk, int level, int optname,
case SCTP_REMOTE_UDP_ENCAPS_PORT:
retval = sctp_getsockopt_encap_port(sk, len, optval, optlen);
break;
+ case SCTP_PLPMTUD_PROBE_INTERVAL:
+ retval = sctp_getsockopt_probe_interval(sk, len, optval, optlen);
+ break;
default:
retval = -ENOPROTOOPT;
break;
diff --git a/net/sctp/sysctl.c b/net/sctp/sysctl.c
index 55871b277f47..b46a416787ec 100644
--- a/net/sctp/sysctl.c
+++ b/net/sctp/sysctl.c
@@ -55,6 +55,8 @@ static int proc_sctp_do_alpha_beta(struct ctl_table *ctl, int write,
void *buffer, size_t *lenp, loff_t *ppos);
static int proc_sctp_do_auth(struct ctl_table *ctl, int write,
void *buffer, size_t *lenp, loff_t *ppos);
+static int proc_sctp_do_probe_interval(struct ctl_table *ctl, int write,
+ void *buffer, size_t *lenp, loff_t *ppos);
static struct ctl_table sctp_table[] = {
{
@@ -294,6 +296,13 @@ static struct ctl_table sctp_net_table[] = {
.proc_handler = proc_dointvec,
},
{
+ .procname = "plpmtud_probe_interval",
+ .data = &init_net.sctp.probe_interval,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_sctp_do_probe_interval,
+ },
+ {
.procname = "udp_port",
.data = &init_net.sctp.udp_port,
.maxlen = sizeof(int),
@@ -539,6 +548,32 @@ static int proc_sctp_do_udp_port(struct ctl_table *ctl, int write,
return ret;
}
+static int proc_sctp_do_probe_interval(struct ctl_table *ctl, int write,
+ void *buffer, size_t *lenp, loff_t *ppos)
+{
+ struct net *net = current->nsproxy->net_ns;
+ struct ctl_table tbl;
+ int ret, new_value;
+
+ memset(&tbl, 0, sizeof(struct ctl_table));
+ tbl.maxlen = sizeof(unsigned int);
+
+ if (write)
+ tbl.data = &new_value;
+ else
+ tbl.data = &net->sctp.probe_interval;
+
+ ret = proc_dointvec(&tbl, write, buffer, lenp, ppos);
+ if (write && ret == 0) {
+ if (new_value && new_value < SCTP_PROBE_TIMER_MIN)
+ return -EINVAL;
+
+ net->sctp.probe_interval = new_value;
+ }
+
+ return ret;
+}
+
int sctp_sysctl_net_register(struct net *net)
{
struct ctl_table *table;
diff --git a/net/sctp/transport.c b/net/sctp/transport.c
index bf0ac467e757..a3d3ca6dd63d 100644
--- a/net/sctp/transport.c
+++ b/net/sctp/transport.c
@@ -75,6 +75,7 @@ static struct sctp_transport *sctp_transport_init(struct net *net,
timer_setup(&peer->T3_rtx_timer, sctp_generate_t3_rtx_event, 0);
timer_setup(&peer->hb_timer, sctp_generate_heartbeat_event, 0);
timer_setup(&peer->reconf_timer, sctp_generate_reconf_event, 0);
+ timer_setup(&peer->probe_timer, sctp_generate_probe_event, 0);
timer_setup(&peer->proto_unreach_timer,
sctp_generate_proto_unreach_event, 0);
@@ -131,6 +132,9 @@ void sctp_transport_free(struct sctp_transport *transport)
if (del_timer(&transport->reconf_timer))
sctp_transport_put(transport);
+ if (del_timer(&transport->probe_timer))
+ sctp_transport_put(transport);
+
/* Delete the ICMP proto unreachable timer if it's active. */
if (del_timer(&transport->proto_unreach_timer))
sctp_transport_put(transport);
@@ -207,6 +211,15 @@ void sctp_transport_reset_reconf_timer(struct sctp_transport *transport)
sctp_transport_hold(transport);
}
+void sctp_transport_reset_probe_timer(struct sctp_transport *transport)
+{
+ if (timer_pending(&transport->probe_timer))
+ return;
+ if (!mod_timer(&transport->probe_timer,
+ jiffies + transport->probe_interval))
+ sctp_transport_hold(transport);
+}
+
/* This transport has been assigned to an association.
* Initialize fields from the association or from the sock itself.
* Register the reference count in the association.
@@ -241,12 +254,155 @@ void sctp_transport_pmtu(struct sctp_transport *transport, struct sock *sk)
transport->pathmtu = sctp_dst_mtu(transport->dst);
else
transport->pathmtu = SCTP_DEFAULT_MAXSEGMENT;
+
+ sctp_transport_pl_update(transport);
+}
+
+bool sctp_transport_pl_send(struct sctp_transport *t)
+{
+ if (t->pl.probe_count < SCTP_MAX_PROBES)
+ goto out;
+
+ t->pl.last_rtx_chunks = t->asoc->rtx_data_chunks;
+ t->pl.probe_count = 0;
+ if (t->pl.state == SCTP_PL_BASE) {
+ if (t->pl.probe_size == SCTP_BASE_PLPMTU) { /* BASE_PLPMTU Confirmation Failed */
+ t->pl.state = SCTP_PL_ERROR; /* Base -> Error */
+
+ t->pl.pmtu = SCTP_MIN_PLPMTU;
+ t->pathmtu = t->pl.pmtu + sctp_transport_pl_hlen(t);
+ sctp_assoc_sync_pmtu(t->asoc);
+ }
+ } else if (t->pl.state == SCTP_PL_SEARCH) {
+ if (t->pl.pmtu == t->pl.probe_size) { /* Black Hole Detected */
+ t->pl.state = SCTP_PL_BASE; /* Search -> Base */
+ t->pl.probe_size = SCTP_BASE_PLPMTU;
+ t->pl.probe_high = 0;
+
+ t->pl.pmtu = SCTP_BASE_PLPMTU;
+ t->pathmtu = t->pl.pmtu + sctp_transport_pl_hlen(t);
+ sctp_assoc_sync_pmtu(t->asoc);
+ } else { /* Normal probe failure. */
+ t->pl.probe_high = t->pl.probe_size;
+ t->pl.probe_size = t->pl.pmtu;
+ }
+ } else if (t->pl.state == SCTP_PL_COMPLETE) {
+ if (t->pl.pmtu == t->pl.probe_size) { /* Black Hole Detected */
+ t->pl.state = SCTP_PL_BASE; /* Search Complete -> Base */
+ t->pl.probe_size = SCTP_BASE_PLPMTU;
+
+ t->pl.pmtu = SCTP_BASE_PLPMTU;
+ t->pathmtu = t->pl.pmtu + sctp_transport_pl_hlen(t);
+ sctp_assoc_sync_pmtu(t->asoc);
+ }
+ }
+
+out:
+ if (t->pl.state == SCTP_PL_COMPLETE && t->pl.raise_count < 30 &&
+ !t->pl.probe_count && t->pl.last_rtx_chunks == t->asoc->rtx_data_chunks) {
+ t->pl.raise_count++;
+ return false;
+ }
+
+ pr_debug("%s: PLPMTUD: transport: %p, state: %d, pmtu: %d, size: %d, high: %d\n",
+ __func__, t, t->pl.state, t->pl.pmtu, t->pl.probe_size, t->pl.probe_high);
+
+ t->pl.probe_count++;
+ return true;
+}
+
+bool sctp_transport_pl_recv(struct sctp_transport *t)
+{
+ pr_debug("%s: PLPMTUD: transport: %p, state: %d, pmtu: %d, size: %d, high: %d\n",
+ __func__, t, t->pl.state, t->pl.pmtu, t->pl.probe_size, t->pl.probe_high);
+
+ t->pl.last_rtx_chunks = t->asoc->rtx_data_chunks;
+ t->pl.pmtu = t->pl.probe_size;
+ t->pl.probe_count = 0;
+ if (t->pl.state == SCTP_PL_BASE) {
+ t->pl.state = SCTP_PL_SEARCH; /* Base -> Search */
+ t->pl.probe_size += SCTP_PL_BIG_STEP;
+ } else if (t->pl.state == SCTP_PL_ERROR) {
+ t->pl.state = SCTP_PL_SEARCH; /* Error -> Search */
+
+ t->pl.pmtu = t->pl.probe_size;
+ t->pathmtu = t->pl.pmtu + sctp_transport_pl_hlen(t);
+ sctp_assoc_sync_pmtu(t->asoc);
+ t->pl.probe_size += SCTP_PL_BIG_STEP;
+ } else if (t->pl.state == SCTP_PL_SEARCH) {
+ if (!t->pl.probe_high) {
+ t->pl.probe_size = min(t->pl.probe_size + SCTP_PL_BIG_STEP,
+ SCTP_MAX_PLPMTU);
+ return false;
+ }
+ t->pl.probe_size += SCTP_PL_MIN_STEP;
+ if (t->pl.probe_size >= t->pl.probe_high) {
+ t->pl.probe_high = 0;
+ t->pl.raise_count = 0;
+ t->pl.state = SCTP_PL_COMPLETE; /* Search -> Search Complete */
+
+ t->pl.probe_size = t->pl.pmtu;
+ t->pathmtu = t->pl.pmtu + sctp_transport_pl_hlen(t);
+ sctp_assoc_sync_pmtu(t->asoc);
+ }
+ } else if (t->pl.state == SCTP_PL_COMPLETE && t->pl.raise_count == 30) {
+ /* Raise probe_size again after 30 * interval in Search Complete */
+ t->pl.state = SCTP_PL_SEARCH; /* Search Complete -> Search */
+ t->pl.probe_size += SCTP_PL_MIN_STEP;
+ }
+
+ return t->pl.state == SCTP_PL_COMPLETE;
+}
+
+static bool sctp_transport_pl_toobig(struct sctp_transport *t, u32 pmtu)
+{
+ pr_debug("%s: PLPMTUD: transport: %p, state: %d, pmtu: %d, size: %d, ptb: %d\n",
+ __func__, t, t->pl.state, t->pl.pmtu, t->pl.probe_size, pmtu);
+
+ if (pmtu < SCTP_MIN_PLPMTU || pmtu >= t->pl.probe_size)
+ return false;
+
+ if (t->pl.state == SCTP_PL_BASE) {
+ if (pmtu >= SCTP_MIN_PLPMTU && pmtu < SCTP_BASE_PLPMTU) {
+ t->pl.state = SCTP_PL_ERROR; /* Base -> Error */
+
+ t->pl.pmtu = SCTP_MIN_PLPMTU;
+ t->pathmtu = t->pl.pmtu + sctp_transport_pl_hlen(t);
+ }
+ } else if (t->pl.state == SCTP_PL_SEARCH) {
+ if (pmtu >= SCTP_BASE_PLPMTU && pmtu < t->pl.pmtu) {
+ t->pl.state = SCTP_PL_BASE; /* Search -> Base */
+ t->pl.probe_size = SCTP_BASE_PLPMTU;
+ t->pl.probe_count = 0;
+
+ t->pl.probe_high = 0;
+ t->pl.pmtu = SCTP_BASE_PLPMTU;
+ t->pathmtu = t->pl.pmtu + sctp_transport_pl_hlen(t);
+ } else if (pmtu > t->pl.pmtu && pmtu < t->pl.probe_size) {
+ t->pl.probe_size = pmtu;
+ t->pl.probe_count = 0;
+
+ return false;
+ }
+ } else if (t->pl.state == SCTP_PL_COMPLETE) {
+ if (pmtu >= SCTP_BASE_PLPMTU && pmtu < t->pl.pmtu) {
+ t->pl.state = SCTP_PL_BASE; /* Complete -> Base */
+ t->pl.probe_size = SCTP_BASE_PLPMTU;
+ t->pl.probe_count = 0;
+
+ t->pl.probe_high = 0;
+ t->pl.pmtu = SCTP_BASE_PLPMTU;
+ t->pathmtu = t->pl.pmtu + sctp_transport_pl_hlen(t);
+ }
+ }
+
+ return true;
}
bool sctp_transport_update_pmtu(struct sctp_transport *t, u32 pmtu)
{
- struct dst_entry *dst = sctp_transport_dst_check(t);
struct sock *sk = t->asoc->base.sk;
+ struct dst_entry *dst;
bool change = true;
if (unlikely(pmtu < SCTP_DEFAULT_MINSEGMENT)) {
@@ -257,6 +413,10 @@ bool sctp_transport_update_pmtu(struct sctp_transport *t, u32 pmtu)
}
pmtu = SCTP_TRUNC4(pmtu);
+ if (sctp_transport_pl_enabled(t))
+ return sctp_transport_pl_toobig(t, pmtu - sctp_transport_pl_hlen(t));
+
+ dst = sctp_transport_dst_check(t);
if (dst) {
struct sctp_pf *pf = sctp_get_pf_specific(dst->ops->family);
union sctp_addr addr;
diff --git a/net/smc/Makefile b/net/smc/Makefile
index 77e54fe42b1c..99a0186cba5b 100644
--- a/net/smc/Makefile
+++ b/net/smc/Makefile
@@ -2,4 +2,4 @@
obj-$(CONFIG_SMC) += smc.o
obj-$(CONFIG_SMC_DIAG) += smc_diag.o
smc-y := af_smc.o smc_pnet.o smc_ib.o smc_clc.o smc_core.o smc_wr.o smc_llc.o
-smc-y += smc_cdc.o smc_tx.o smc_rx.o smc_close.o smc_ism.o smc_netlink.o
+smc-y += smc_cdc.o smc_tx.o smc_rx.o smc_close.o smc_ism.o smc_netlink.o smc_stats.o
diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c
index 5eff7cccceff..c038efc23ce3 100644
--- a/net/smc/af_smc.c
+++ b/net/smc/af_smc.c
@@ -49,6 +49,7 @@
#include "smc_tx.h"
#include "smc_rx.h"
#include "smc_close.h"
+#include "smc_stats.h"
static DEFINE_MUTEX(smc_server_lgr_pending); /* serialize link group
* creation on server
@@ -508,9 +509,44 @@ static void smc_link_save_peer_info(struct smc_link *link,
link->peer_mtu = clc->r0.qp_mtu;
}
-static void smc_switch_to_fallback(struct smc_sock *smc)
+static void smc_stat_inc_fback_rsn_cnt(struct smc_sock *smc,
+ struct smc_stats_fback *fback_arr)
+{
+ int cnt;
+
+ for (cnt = 0; cnt < SMC_MAX_FBACK_RSN_CNT; cnt++) {
+ if (fback_arr[cnt].fback_code == smc->fallback_rsn) {
+ fback_arr[cnt].count++;
+ break;
+ }
+ if (!fback_arr[cnt].fback_code) {
+ fback_arr[cnt].fback_code = smc->fallback_rsn;
+ fback_arr[cnt].count++;
+ break;
+ }
+ }
+}
+
+static void smc_stat_fallback(struct smc_sock *smc)
+{
+ struct net *net = sock_net(&smc->sk);
+
+ mutex_lock(&net->smc.mutex_fback_rsn);
+ if (smc->listen_smc) {
+ smc_stat_inc_fback_rsn_cnt(smc, net->smc.fback_rsn->srv);
+ net->smc.fback_rsn->srv_fback_cnt++;
+ } else {
+ smc_stat_inc_fback_rsn_cnt(smc, net->smc.fback_rsn->clnt);
+ net->smc.fback_rsn->clnt_fback_cnt++;
+ }
+ mutex_unlock(&net->smc.mutex_fback_rsn);
+}
+
+static void smc_switch_to_fallback(struct smc_sock *smc, int reason_code)
{
smc->use_fallback = true;
+ smc->fallback_rsn = reason_code;
+ smc_stat_fallback(smc);
if (smc->sk.sk_socket && smc->sk.sk_socket->file) {
smc->clcsock->file = smc->sk.sk_socket->file;
smc->clcsock->file->private_data = smc->clcsock;
@@ -522,8 +558,7 @@ static void smc_switch_to_fallback(struct smc_sock *smc)
/* fall back during connect */
static int smc_connect_fallback(struct smc_sock *smc, int reason_code)
{
- smc_switch_to_fallback(smc);
- smc->fallback_rsn = reason_code;
+ smc_switch_to_fallback(smc, reason_code);
smc_copy_sock_settings_to_clc(smc);
smc->connect_nonblock = 0;
if (smc->sk.sk_state == SMC_INIT)
@@ -535,9 +570,11 @@ static int smc_connect_fallback(struct smc_sock *smc, int reason_code)
static int smc_connect_decline_fallback(struct smc_sock *smc, int reason_code,
u8 version)
{
+ struct net *net = sock_net(&smc->sk);
int rc;
if (reason_code < 0) { /* error, fallback is not possible */
+ this_cpu_inc(net->smc.smc_stats->clnt_hshake_err_cnt);
if (smc->sk.sk_state == SMC_INIT)
sock_put(&smc->sk); /* passive closing */
return reason_code;
@@ -545,6 +582,7 @@ static int smc_connect_decline_fallback(struct smc_sock *smc, int reason_code,
if (reason_code != SMC_CLC_DECL_PEERDECL) {
rc = smc_clc_send_decline(smc, reason_code, version);
if (rc < 0) {
+ this_cpu_inc(net->smc.smc_stats->clnt_hshake_err_cnt);
if (smc->sk.sk_state == SMC_INIT)
sock_put(&smc->sk); /* passive closing */
return rc;
@@ -757,7 +795,7 @@ static int smc_connect_rdma(struct smc_sock *smc,
reason_code = SMC_CLC_DECL_NOSRVLINK;
goto connect_abort;
}
- smc->conn.lnk = link;
+ smc_switch_link_and_count(&smc->conn, link);
}
/* create send buffer and rmb */
@@ -992,6 +1030,7 @@ static int __smc_connect(struct smc_sock *smc)
if (rc)
goto vlan_cleanup;
+ SMC_STAT_CLNT_SUCC_INC(sock_net(smc->clcsock->sk), aclc);
smc_connect_ism_vlan_cleanup(smc, ini);
kfree(buf);
kfree(ini);
@@ -1307,7 +1346,9 @@ static void smc_listen_out_connected(struct smc_sock *new_smc)
static void smc_listen_out_err(struct smc_sock *new_smc)
{
struct sock *newsmcsk = &new_smc->sk;
+ struct net *net = sock_net(newsmcsk);
+ this_cpu_inc(net->smc.smc_stats->srv_hshake_err_cnt);
if (newsmcsk->sk_state == SMC_INIT)
sock_put(&new_smc->sk); /* passive closing */
newsmcsk->sk_state = SMC_CLOSED;
@@ -1325,8 +1366,7 @@ static void smc_listen_decline(struct smc_sock *new_smc, int reason_code,
smc_listen_out_err(new_smc);
return;
}
- smc_switch_to_fallback(new_smc);
- new_smc->fallback_rsn = reason_code;
+ smc_switch_to_fallback(new_smc, reason_code);
if (reason_code && reason_code != SMC_CLC_DECL_PEERDECL) {
if (smc_clc_send_decline(new_smc, reason_code, version) < 0) {
smc_listen_out_err(new_smc);
@@ -1699,8 +1739,7 @@ static void smc_listen_work(struct work_struct *work)
/* check if peer is smc capable */
if (!tcp_sk(newclcsock->sk)->syn_smc) {
- smc_switch_to_fallback(new_smc);
- new_smc->fallback_rsn = SMC_CLC_DECL_PEERNOSMC;
+ smc_switch_to_fallback(new_smc, SMC_CLC_DECL_PEERNOSMC);
smc_listen_out_connected(new_smc);
return;
}
@@ -1778,6 +1817,7 @@ static void smc_listen_work(struct work_struct *work)
}
smc_conn_save_peer_info(new_smc, cclc);
smc_listen_out_connected(new_smc);
+ SMC_STAT_SERV_SUCC_INC(sock_net(newclcsock->sk), ini);
goto out_free;
out_unlock:
@@ -1984,18 +2024,19 @@ static int smc_sendmsg(struct socket *sock, struct msghdr *msg, size_t len)
if (msg->msg_flags & MSG_FASTOPEN) {
if (sk->sk_state == SMC_INIT && !smc->connect_nonblock) {
- smc_switch_to_fallback(smc);
- smc->fallback_rsn = SMC_CLC_DECL_OPTUNSUPP;
+ smc_switch_to_fallback(smc, SMC_CLC_DECL_OPTUNSUPP);
} else {
rc = -EINVAL;
goto out;
}
}
- if (smc->use_fallback)
+ if (smc->use_fallback) {
rc = smc->clcsock->ops->sendmsg(smc->clcsock, msg, len);
- else
+ } else {
rc = smc_tx_sendmsg(smc, msg, len);
+ SMC_STAT_TX_PAYLOAD(smc, len, rc);
+ }
out:
release_sock(sk);
return rc;
@@ -2030,6 +2071,7 @@ static int smc_recvmsg(struct socket *sock, struct msghdr *msg, size_t len,
} else {
msg->msg_namelen = 0;
rc = smc_rx_recvmsg(smc, msg, NULL, len, flags);
+ SMC_STAT_RX_PAYLOAD(smc, rc, rc);
}
out:
@@ -2176,7 +2218,7 @@ static int smc_setsockopt(struct socket *sock, int level, int optname,
optval, optlen);
if (smc->clcsock->sk->sk_err) {
sk->sk_err = smc->clcsock->sk->sk_err;
- sk->sk_error_report(sk);
+ sk_error_report(sk);
}
if (optlen < sizeof(int))
@@ -2194,8 +2236,7 @@ static int smc_setsockopt(struct socket *sock, int level, int optname,
case TCP_FASTOPEN_NO_COOKIE:
/* option not supported by SMC */
if (sk->sk_state == SMC_INIT && !smc->connect_nonblock) {
- smc_switch_to_fallback(smc);
- smc->fallback_rsn = SMC_CLC_DECL_OPTUNSUPP;
+ smc_switch_to_fallback(smc, SMC_CLC_DECL_OPTUNSUPP);
} else {
rc = -EINVAL;
}
@@ -2204,18 +2245,22 @@ static int smc_setsockopt(struct socket *sock, int level, int optname,
if (sk->sk_state != SMC_INIT &&
sk->sk_state != SMC_LISTEN &&
sk->sk_state != SMC_CLOSED) {
- if (val)
+ if (val) {
+ SMC_STAT_INC(smc, ndly_cnt);
mod_delayed_work(smc->conn.lgr->tx_wq,
&smc->conn.tx_work, 0);
+ }
}
break;
case TCP_CORK:
if (sk->sk_state != SMC_INIT &&
sk->sk_state != SMC_LISTEN &&
sk->sk_state != SMC_CLOSED) {
- if (!val)
+ if (!val) {
+ SMC_STAT_INC(smc, cork_cnt);
mod_delayed_work(smc->conn.lgr->tx_wq,
&smc->conn.tx_work, 0);
+ }
}
break;
case TCP_DEFER_ACCEPT:
@@ -2338,11 +2383,13 @@ static ssize_t smc_sendpage(struct socket *sock, struct page *page,
goto out;
}
release_sock(sk);
- if (smc->use_fallback)
+ if (smc->use_fallback) {
rc = kernel_sendpage(smc->clcsock, page, offset,
size, flags);
- else
+ } else {
+ SMC_STAT_INC(smc, sendpage_cnt);
rc = sock_no_sendpage(sock, page, offset, size, flags);
+ }
out:
return rc;
@@ -2391,6 +2438,7 @@ static ssize_t smc_splice_read(struct socket *sock, loff_t *ppos,
flags = MSG_DONTWAIT;
else
flags = 0;
+ SMC_STAT_INC(smc, splice_cnt);
rc = smc_rx_recvmsg(smc, NULL, pipe, len, flags);
}
out:
@@ -2479,6 +2527,16 @@ static void __net_exit smc_net_exit(struct net *net)
smc_pnet_net_exit(net);
}
+static __net_init int smc_net_stat_init(struct net *net)
+{
+ return smc_stats_init(net);
+}
+
+static void __net_exit smc_net_stat_exit(struct net *net)
+{
+ smc_stats_exit(net);
+}
+
static struct pernet_operations smc_net_ops = {
.init = smc_net_init,
.exit = smc_net_exit,
@@ -2486,6 +2544,11 @@ static struct pernet_operations smc_net_ops = {
.size = sizeof(struct smc_net),
};
+static struct pernet_operations smc_net_stat_ops = {
+ .init = smc_net_stat_init,
+ .exit = smc_net_stat_exit,
+};
+
static int __init smc_init(void)
{
int rc;
@@ -2494,6 +2557,10 @@ static int __init smc_init(void)
if (rc)
return rc;
+ rc = register_pernet_subsys(&smc_net_stat_ops);
+ if (rc)
+ return rc;
+
smc_ism_init();
smc_clc_init();
@@ -2595,6 +2662,7 @@ static void __exit smc_exit(void)
proto_unregister(&smc_proto);
smc_pnet_exit();
smc_nl_exit();
+ unregister_pernet_subsys(&smc_net_stat_ops);
unregister_pernet_subsys(&smc_net_ops);
rcu_barrier();
}
diff --git a/net/smc/smc_core.c b/net/smc/smc_core.c
index 0df85a12651e..af227b65669e 100644
--- a/net/smc/smc_core.c
+++ b/net/smc/smc_core.c
@@ -33,6 +33,7 @@
#include "smc_close.h"
#include "smc_ism.h"
#include "smc_netlink.h"
+#include "smc_stats.h"
#define SMC_LGR_NUM_INCR 256
#define SMC_LGR_FREE_DELAY_SERV (600 * HZ)
@@ -916,8 +917,8 @@ static int smc_switch_cursor(struct smc_sock *smc, struct smc_cdc_tx_pend *pend,
return rc;
}
-static void smc_switch_link_and_count(struct smc_connection *conn,
- struct smc_link *to_lnk)
+void smc_switch_link_and_count(struct smc_connection *conn,
+ struct smc_link *to_lnk)
{
atomic_dec(&conn->lnk->conn_cnt);
conn->lnk = to_lnk;
@@ -1235,20 +1236,6 @@ static void smc_lgr_free(struct smc_link_group *lgr)
kfree(lgr);
}
-static void smcd_unregister_all_dmbs(struct smc_link_group *lgr)
-{
- int i;
-
- for (i = 0; i < SMC_RMBE_SIZES; i++) {
- struct smc_buf_desc *buf_desc;
-
- list_for_each_entry(buf_desc, &lgr->rmbs[i], list) {
- buf_desc->len += sizeof(struct smcd_cdc_msg);
- smc_ism_unregister_dmb(lgr->smcd, buf_desc);
- }
- }
-}
-
static void smc_sk_wake_ups(struct smc_sock *smc)
{
smc->sk.sk_write_space(&smc->sk);
@@ -1285,7 +1272,6 @@ static void smc_lgr_cleanup(struct smc_link_group *lgr)
{
if (lgr->is_smcd) {
smc_ism_signal_shutdown(lgr);
- smcd_unregister_all_dmbs(lgr);
} else {
u32 rsn = lgr->llc_termination_rsn;
@@ -1766,21 +1752,30 @@ out:
return rc;
}
-/* convert the RMB size into the compressed notation - minimum 16K.
+#define SMCD_DMBE_SIZES 6 /* 0 -> 16KB, 1 -> 32KB, .. 6 -> 1MB */
+#define SMCR_RMBE_SIZES 5 /* 0 -> 16KB, 1 -> 32KB, .. 5 -> 512KB */
+
+/* convert the RMB size into the compressed notation (minimum 16K, see
+ * SMCD/R_DMBE_SIZES.
* In contrast to plain ilog2, this rounds towards the next power of 2,
* so the socket application gets at least its desired sndbuf / rcvbuf size.
*/
-static u8 smc_compress_bufsize(int size)
+static u8 smc_compress_bufsize(int size, bool is_smcd, bool is_rmb)
{
+ const unsigned int max_scat = SG_MAX_SINGLE_ALLOC * PAGE_SIZE;
u8 compressed;
if (size <= SMC_BUF_MIN_SIZE)
return 0;
- size = (size - 1) >> 14;
- compressed = ilog2(size) + 1;
- if (compressed >= SMC_RMBE_SIZES)
- compressed = SMC_RMBE_SIZES - 1;
+ size = (size - 1) >> 14; /* convert to 16K multiple */
+ compressed = min_t(u8, ilog2(size) + 1,
+ is_smcd ? SMCD_DMBE_SIZES : SMCR_RMBE_SIZES);
+
+ if (!is_smcd && is_rmb)
+ /* RMBs are backed by & limited to max size of scatterlists */
+ compressed = min_t(u8, compressed, ilog2(max_scat >> 14));
+
return compressed;
}
@@ -1996,17 +1991,12 @@ out:
return rc;
}
-#define SMCD_DMBE_SIZES 6 /* 0 -> 16KB, 1 -> 32KB, .. 6 -> 1MB */
-
static struct smc_buf_desc *smcd_new_buf_create(struct smc_link_group *lgr,
bool is_dmb, int bufsize)
{
struct smc_buf_desc *buf_desc;
int rc;
- if (smc_compress_bufsize(bufsize) > SMCD_DMBE_SIZES)
- return ERR_PTR(-EAGAIN);
-
/* try to alloc a new DMB */
buf_desc = kzalloc(sizeof(*buf_desc), GFP_KERNEL);
if (!buf_desc)
@@ -2044,6 +2034,7 @@ static int __smc_buf_create(struct smc_sock *smc, bool is_smcd, bool is_rmb)
struct smc_link_group *lgr = conn->lgr;
struct list_head *buf_list;
int bufsize, bufsize_short;
+ bool is_dgraded = false;
struct mutex *lock; /* lock buffer list */
int sk_buf_size;
@@ -2054,9 +2045,8 @@ static int __smc_buf_create(struct smc_sock *smc, bool is_smcd, bool is_rmb)
/* use socket send buffer size (w/o overhead) as start value */
sk_buf_size = smc->sk.sk_sndbuf / 2;
- for (bufsize_short = smc_compress_bufsize(sk_buf_size);
+ for (bufsize_short = smc_compress_bufsize(sk_buf_size, is_smcd, is_rmb);
bufsize_short >= 0; bufsize_short--) {
-
if (is_rmb) {
lock = &lgr->rmbs_lock;
buf_list = &lgr->rmbs[bufsize_short];
@@ -2065,12 +2055,12 @@ static int __smc_buf_create(struct smc_sock *smc, bool is_smcd, bool is_rmb)
buf_list = &lgr->sndbufs[bufsize_short];
}
bufsize = smc_uncompress_bufsize(bufsize_short);
- if ((1 << get_order(bufsize)) > SG_MAX_SINGLE_ALLOC)
- continue;
/* check for reusable slot in the link group */
buf_desc = smc_buf_get_slot(bufsize_short, lock, buf_list);
if (buf_desc) {
+ SMC_STAT_RMB_SIZE(smc, is_smcd, is_rmb, bufsize);
+ SMC_STAT_BUF_REUSE(smc, is_smcd, is_rmb);
memset(buf_desc->cpu_addr, 0, bufsize);
break; /* found reusable slot */
}
@@ -2082,9 +2072,16 @@ static int __smc_buf_create(struct smc_sock *smc, bool is_smcd, bool is_rmb)
if (PTR_ERR(buf_desc) == -ENOMEM)
break;
- if (IS_ERR(buf_desc))
+ if (IS_ERR(buf_desc)) {
+ if (!is_dgraded) {
+ is_dgraded = true;
+ SMC_STAT_RMB_DOWNGRADED(smc, is_smcd, is_rmb);
+ }
continue;
+ }
+ SMC_STAT_RMB_ALLOC(smc, is_smcd, is_rmb);
+ SMC_STAT_RMB_SIZE(smc, is_smcd, is_rmb, bufsize);
buf_desc->used = 1;
mutex_lock(lock);
list_add(&buf_desc->list, buf_list);
diff --git a/net/smc/smc_core.h b/net/smc/smc_core.h
index 6d6fd1397c87..c043ecdca5c4 100644
--- a/net/smc/smc_core.h
+++ b/net/smc/smc_core.h
@@ -97,6 +97,7 @@ struct smc_link {
unsigned long *wr_tx_mask; /* bit mask of used indexes */
u32 wr_tx_cnt; /* number of WR send buffers */
wait_queue_head_t wr_tx_wait; /* wait for free WR send buf */
+ atomic_t wr_tx_refcnt; /* tx refs to link */
struct smc_wr_buf *wr_rx_bufs; /* WR recv payload buffers */
struct ib_recv_wr *wr_rx_ibs; /* WR recv meta data */
@@ -109,6 +110,7 @@ struct smc_link {
struct ib_reg_wr wr_reg; /* WR register memory region */
wait_queue_head_t wr_reg_wait; /* wait for wr_reg result */
+ atomic_t wr_reg_refcnt; /* reg refs to link */
enum smc_wr_reg_state wr_reg_state; /* state of wr_reg request */
u8 gid[SMC_GID_SIZE];/* gid matching used vlan id*/
@@ -444,6 +446,8 @@ void smc_core_exit(void);
int smcr_link_init(struct smc_link_group *lgr, struct smc_link *lnk,
u8 link_idx, struct smc_init_info *ini);
void smcr_link_clear(struct smc_link *lnk, bool log);
+void smc_switch_link_and_count(struct smc_connection *conn,
+ struct smc_link *to_lnk);
int smcr_buf_map_lgr(struct smc_link *lnk);
int smcr_buf_reg_lgr(struct smc_link *lnk);
void smcr_lgr_set_type(struct smc_link_group *lgr, enum smc_lgr_type new_type);
diff --git a/net/smc/smc_ib.c b/net/smc/smc_ib.c
index 7d7ba0320d5a..a8845343d183 100644
--- a/net/smc/smc_ib.c
+++ b/net/smc/smc_ib.c
@@ -753,8 +753,7 @@ void smc_ib_ndev_change(struct net_device *ndev, unsigned long event)
if (!libdev->ops.get_netdev)
continue;
lndev = libdev->ops.get_netdev(libdev, i + 1);
- if (lndev)
- dev_put(lndev);
+ dev_put(lndev);
if (lndev != ndev)
continue;
if (event == NETDEV_REGISTER)
diff --git a/net/smc/smc_ism.c b/net/smc/smc_ism.c
index 967712ba52a0..9cb2df289963 100644
--- a/net/smc/smc_ism.c
+++ b/net/smc/smc_ism.c
@@ -470,7 +470,6 @@ void smcd_unregister_dev(struct smcd_dev *smcd)
mutex_unlock(&smcd_dev_list.mutex);
smcd->going_away = 1;
smc_smcd_terminate_all(smcd);
- flush_workqueue(smcd->event_wq);
destroy_workqueue(smcd->event_wq);
device_del(&smcd->dev);
diff --git a/net/smc/smc_llc.c b/net/smc/smc_llc.c
index 273eaf1bfe49..2e7560eba981 100644
--- a/net/smc/smc_llc.c
+++ b/net/smc/smc_llc.c
@@ -888,6 +888,7 @@ int smc_llc_cli_add_link(struct smc_link *link, struct smc_llc_qentry *qentry)
if (!rc)
goto out;
out_clear_lnk:
+ lnk_new->state = SMC_LNK_INACTIVE;
smcr_link_clear(lnk_new, false);
out_reject:
smc_llc_cli_add_link_reject(qentry);
@@ -1184,6 +1185,7 @@ int smc_llc_srv_add_link(struct smc_link *link)
goto out_err;
return 0;
out_err:
+ link_new->state = SMC_LNK_INACTIVE;
smcr_link_clear(link_new, false);
return rc;
}
@@ -1286,10 +1288,8 @@ static void smc_llc_process_cli_delete_link(struct smc_link_group *lgr)
del_llc->reason = 0;
smc_llc_send_message(lnk, &qentry->msg); /* response */
- if (smc_link_downing(&lnk_del->state)) {
- if (smc_switch_conns(lgr, lnk_del, false))
- smc_wr_tx_wait_no_pending_sends(lnk_del);
- }
+ if (smc_link_downing(&lnk_del->state))
+ smc_switch_conns(lgr, lnk_del, false);
smcr_link_clear(lnk_del, true);
active_links = smc_llc_active_link_count(lgr);
@@ -1805,8 +1805,6 @@ void smc_llc_link_clear(struct smc_link *link, bool log)
link->smcibdev->ibdev->name, link->ibport);
complete(&link->llc_testlink_resp);
cancel_delayed_work_sync(&link->llc_testlink_wrk);
- smc_wr_wakeup_reg_wait(link);
- smc_wr_wakeup_tx_wait(link);
}
/* register a new rtoken at the remote peer (for all links) */
diff --git a/net/smc/smc_netlink.c b/net/smc/smc_netlink.c
index 140419a19dbf..6fb6f96c1d17 100644
--- a/net/smc/smc_netlink.c
+++ b/net/smc/smc_netlink.c
@@ -19,6 +19,7 @@
#include "smc_core.h"
#include "smc_ism.h"
#include "smc_ib.h"
+#include "smc_stats.h"
#include "smc_netlink.h"
#define SMC_CMD_MAX_ATTR 1
@@ -55,6 +56,16 @@ static const struct genl_ops smc_gen_nl_ops[] = {
/* can be retrieved by unprivileged users */
.dumpit = smcr_nl_get_device,
},
+ {
+ .cmd = SMC_NETLINK_GET_STATS,
+ /* can be retrieved by unprivileged users */
+ .dumpit = smc_nl_get_stats,
+ },
+ {
+ .cmd = SMC_NETLINK_GET_FBACK_STATS,
+ /* can be retrieved by unprivileged users */
+ .dumpit = smc_nl_get_fback_stats,
+ },
};
static const struct nla_policy smc_gen_nl_policy[2] = {
diff --git a/net/smc/smc_netlink.h b/net/smc/smc_netlink.h
index 3477265cba6c..5ce2c0a89ccd 100644
--- a/net/smc/smc_netlink.h
+++ b/net/smc/smc_netlink.h
@@ -18,7 +18,7 @@
extern struct genl_family smc_gen_nl_family;
struct smc_nl_dmp_ctx {
- int pos[2];
+ int pos[3];
};
static inline struct smc_nl_dmp_ctx *smc_nl_dmp_ctx(struct netlink_callback *c)
diff --git a/net/smc/smc_pnet.c b/net/smc/smc_pnet.c
index 6f6d33edb135..4a964e9190b0 100644
--- a/net/smc/smc_pnet.c
+++ b/net/smc/smc_pnet.c
@@ -394,8 +394,7 @@ static int smc_pnet_add_eth(struct smc_pnettable *pnettable, struct net *net,
return 0;
out_put:
- if (ndev)
- dev_put(ndev);
+ dev_put(ndev);
return rc;
}
diff --git a/net/smc/smc_rx.c b/net/smc/smc_rx.c
index fcfac59f8b72..170b733bc736 100644
--- a/net/smc/smc_rx.c
+++ b/net/smc/smc_rx.c
@@ -21,6 +21,7 @@
#include "smc_cdc.h"
#include "smc_tx.h" /* smc_tx_consumer_update() */
#include "smc_rx.h"
+#include "smc_stats.h"
/* callback implementation to wakeup consumers blocked with smc_rx_wait().
* indirectly called by smc_cdc_msg_recv_action().
@@ -227,6 +228,7 @@ static int smc_rx_recv_urg(struct smc_sock *smc, struct msghdr *msg, int len,
conn->urg_state == SMC_URG_READ)
return -EINVAL;
+ SMC_STAT_INC(smc, urg_data_cnt);
if (conn->urg_state == SMC_URG_VALID) {
if (!(flags & MSG_PEEK))
smc->conn.urg_state = SMC_URG_READ;
@@ -303,6 +305,12 @@ int smc_rx_recvmsg(struct smc_sock *smc, struct msghdr *msg,
timeo = sock_rcvtimeo(sk, flags & MSG_DONTWAIT);
target = sock_rcvlowat(sk, flags & MSG_WAITALL, len);
+ readable = atomic_read(&conn->bytes_to_rcv);
+ if (readable >= conn->rmb_desc->len)
+ SMC_STAT_RMB_RX_FULL(smc, !conn->lnk);
+
+ if (len < readable)
+ SMC_STAT_RMB_RX_SIZE_SMALL(smc, !conn->lnk);
/* we currently use 1 RMBE per RMB, so RMBE == RMB base addr */
rcvbuf_base = conn->rx_off + conn->rmb_desc->cpu_addr;
diff --git a/net/smc/smc_stats.c b/net/smc/smc_stats.c
new file mode 100644
index 000000000000..e80e34f7ac15
--- /dev/null
+++ b/net/smc/smc_stats.c
@@ -0,0 +1,413 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Shared Memory Communications over RDMA (SMC-R) and RoCE
+ *
+ * SMC statistics netlink routines
+ *
+ * Copyright IBM Corp. 2021
+ *
+ * Author(s): Guvenc Gulce
+ */
+#include <linux/init.h>
+#include <linux/mutex.h>
+#include <linux/percpu.h>
+#include <linux/ctype.h>
+#include <linux/smc.h>
+#include <net/genetlink.h>
+#include <net/sock.h>
+#include "smc_netlink.h"
+#include "smc_stats.h"
+
+int smc_stats_init(struct net *net)
+{
+ net->smc.fback_rsn = kzalloc(sizeof(*net->smc.fback_rsn), GFP_KERNEL);
+ if (!net->smc.fback_rsn)
+ goto err_fback;
+ net->smc.smc_stats = alloc_percpu(struct smc_stats);
+ if (!net->smc.smc_stats)
+ goto err_stats;
+ mutex_init(&net->smc.mutex_fback_rsn);
+ return 0;
+
+err_stats:
+ kfree(net->smc.fback_rsn);
+err_fback:
+ return -ENOMEM;
+}
+
+void smc_stats_exit(struct net *net)
+{
+ kfree(net->smc.fback_rsn);
+ if (net->smc.smc_stats)
+ free_percpu(net->smc.smc_stats);
+}
+
+static int smc_nl_fill_stats_rmb_data(struct sk_buff *skb,
+ struct smc_stats *stats, int tech,
+ int type)
+{
+ struct smc_stats_rmbcnt *stats_rmb_cnt;
+ struct nlattr *attrs;
+
+ if (type == SMC_NLA_STATS_T_TX_RMB_STATS)
+ stats_rmb_cnt = &stats->smc[tech].rmb_tx;
+ else
+ stats_rmb_cnt = &stats->smc[tech].rmb_rx;
+
+ attrs = nla_nest_start(skb, type);
+ if (!attrs)
+ goto errout;
+ if (nla_put_u64_64bit(skb, SMC_NLA_STATS_RMB_REUSE_CNT,
+ stats_rmb_cnt->reuse_cnt,
+ SMC_NLA_STATS_RMB_PAD))
+ goto errattr;
+ if (nla_put_u64_64bit(skb, SMC_NLA_STATS_RMB_SIZE_SM_PEER_CNT,
+ stats_rmb_cnt->buf_size_small_peer_cnt,
+ SMC_NLA_STATS_RMB_PAD))
+ goto errattr;
+ if (nla_put_u64_64bit(skb, SMC_NLA_STATS_RMB_SIZE_SM_CNT,
+ stats_rmb_cnt->buf_size_small_cnt,
+ SMC_NLA_STATS_RMB_PAD))
+ goto errattr;
+ if (nla_put_u64_64bit(skb, SMC_NLA_STATS_RMB_FULL_PEER_CNT,
+ stats_rmb_cnt->buf_full_peer_cnt,
+ SMC_NLA_STATS_RMB_PAD))
+ goto errattr;
+ if (nla_put_u64_64bit(skb, SMC_NLA_STATS_RMB_FULL_CNT,
+ stats_rmb_cnt->buf_full_cnt,
+ SMC_NLA_STATS_RMB_PAD))
+ goto errattr;
+ if (nla_put_u64_64bit(skb, SMC_NLA_STATS_RMB_ALLOC_CNT,
+ stats_rmb_cnt->alloc_cnt,
+ SMC_NLA_STATS_RMB_PAD))
+ goto errattr;
+ if (nla_put_u64_64bit(skb, SMC_NLA_STATS_RMB_DGRADE_CNT,
+ stats_rmb_cnt->dgrade_cnt,
+ SMC_NLA_STATS_RMB_PAD))
+ goto errattr;
+
+ nla_nest_end(skb, attrs);
+ return 0;
+
+errattr:
+ nla_nest_cancel(skb, attrs);
+errout:
+ return -EMSGSIZE;
+}
+
+static int smc_nl_fill_stats_bufsize_data(struct sk_buff *skb,
+ struct smc_stats *stats, int tech,
+ int type)
+{
+ struct smc_stats_memsize *stats_pload;
+ struct nlattr *attrs;
+
+ if (type == SMC_NLA_STATS_T_TXPLOAD_SIZE)
+ stats_pload = &stats->smc[tech].tx_pd;
+ else if (type == SMC_NLA_STATS_T_RXPLOAD_SIZE)
+ stats_pload = &stats->smc[tech].rx_pd;
+ else if (type == SMC_NLA_STATS_T_TX_RMB_SIZE)
+ stats_pload = &stats->smc[tech].tx_rmbsize;
+ else if (type == SMC_NLA_STATS_T_RX_RMB_SIZE)
+ stats_pload = &stats->smc[tech].rx_rmbsize;
+ else
+ goto errout;
+
+ attrs = nla_nest_start(skb, type);
+ if (!attrs)
+ goto errout;
+ if (nla_put_u64_64bit(skb, SMC_NLA_STATS_PLOAD_8K,
+ stats_pload->buf[SMC_BUF_8K],
+ SMC_NLA_STATS_PLOAD_PAD))
+ goto errattr;
+ if (nla_put_u64_64bit(skb, SMC_NLA_STATS_PLOAD_16K,
+ stats_pload->buf[SMC_BUF_16K],
+ SMC_NLA_STATS_PLOAD_PAD))
+ goto errattr;
+ if (nla_put_u64_64bit(skb, SMC_NLA_STATS_PLOAD_32K,
+ stats_pload->buf[SMC_BUF_32K],
+ SMC_NLA_STATS_PLOAD_PAD))
+ goto errattr;
+ if (nla_put_u64_64bit(skb, SMC_NLA_STATS_PLOAD_64K,
+ stats_pload->buf[SMC_BUF_64K],
+ SMC_NLA_STATS_PLOAD_PAD))
+ goto errattr;
+ if (nla_put_u64_64bit(skb, SMC_NLA_STATS_PLOAD_128K,
+ stats_pload->buf[SMC_BUF_128K],
+ SMC_NLA_STATS_PLOAD_PAD))
+ goto errattr;
+ if (nla_put_u64_64bit(skb, SMC_NLA_STATS_PLOAD_256K,
+ stats_pload->buf[SMC_BUF_256K],
+ SMC_NLA_STATS_PLOAD_PAD))
+ goto errattr;
+ if (nla_put_u64_64bit(skb, SMC_NLA_STATS_PLOAD_512K,
+ stats_pload->buf[SMC_BUF_512K],
+ SMC_NLA_STATS_PLOAD_PAD))
+ goto errattr;
+ if (nla_put_u64_64bit(skb, SMC_NLA_STATS_PLOAD_1024K,
+ stats_pload->buf[SMC_BUF_1024K],
+ SMC_NLA_STATS_PLOAD_PAD))
+ goto errattr;
+ if (nla_put_u64_64bit(skb, SMC_NLA_STATS_PLOAD_G_1024K,
+ stats_pload->buf[SMC_BUF_G_1024K],
+ SMC_NLA_STATS_PLOAD_PAD))
+ goto errattr;
+
+ nla_nest_end(skb, attrs);
+ return 0;
+
+errattr:
+ nla_nest_cancel(skb, attrs);
+errout:
+ return -EMSGSIZE;
+}
+
+static int smc_nl_fill_stats_tech_data(struct sk_buff *skb,
+ struct smc_stats *stats, int tech)
+{
+ struct smc_stats_tech *smc_tech;
+ struct nlattr *attrs;
+
+ smc_tech = &stats->smc[tech];
+ if (tech == SMC_TYPE_D)
+ attrs = nla_nest_start(skb, SMC_NLA_STATS_SMCD_TECH);
+ else
+ attrs = nla_nest_start(skb, SMC_NLA_STATS_SMCR_TECH);
+
+ if (!attrs)
+ goto errout;
+ if (smc_nl_fill_stats_rmb_data(skb, stats, tech,
+ SMC_NLA_STATS_T_TX_RMB_STATS))
+ goto errattr;
+ if (smc_nl_fill_stats_rmb_data(skb, stats, tech,
+ SMC_NLA_STATS_T_RX_RMB_STATS))
+ goto errattr;
+ if (smc_nl_fill_stats_bufsize_data(skb, stats, tech,
+ SMC_NLA_STATS_T_TXPLOAD_SIZE))
+ goto errattr;
+ if (smc_nl_fill_stats_bufsize_data(skb, stats, tech,
+ SMC_NLA_STATS_T_RXPLOAD_SIZE))
+ goto errattr;
+ if (smc_nl_fill_stats_bufsize_data(skb, stats, tech,
+ SMC_NLA_STATS_T_TX_RMB_SIZE))
+ goto errattr;
+ if (smc_nl_fill_stats_bufsize_data(skb, stats, tech,
+ SMC_NLA_STATS_T_RX_RMB_SIZE))
+ goto errattr;
+ if (nla_put_u64_64bit(skb, SMC_NLA_STATS_T_CLNT_V1_SUCC,
+ smc_tech->clnt_v1_succ_cnt,
+ SMC_NLA_STATS_PAD))
+ goto errattr;
+ if (nla_put_u64_64bit(skb, SMC_NLA_STATS_T_CLNT_V2_SUCC,
+ smc_tech->clnt_v2_succ_cnt,
+ SMC_NLA_STATS_PAD))
+ goto errattr;
+ if (nla_put_u64_64bit(skb, SMC_NLA_STATS_T_SRV_V1_SUCC,
+ smc_tech->srv_v1_succ_cnt,
+ SMC_NLA_STATS_PAD))
+ goto errattr;
+ if (nla_put_u64_64bit(skb, SMC_NLA_STATS_T_SRV_V2_SUCC,
+ smc_tech->srv_v2_succ_cnt,
+ SMC_NLA_STATS_PAD))
+ goto errattr;
+ if (nla_put_u64_64bit(skb, SMC_NLA_STATS_T_RX_BYTES,
+ smc_tech->rx_bytes,
+ SMC_NLA_STATS_PAD))
+ goto errattr;
+ if (nla_put_u64_64bit(skb, SMC_NLA_STATS_T_TX_BYTES,
+ smc_tech->tx_bytes,
+ SMC_NLA_STATS_PAD))
+ goto errattr;
+ if (nla_put_u64_64bit(skb, SMC_NLA_STATS_T_RX_CNT,
+ smc_tech->rx_cnt,
+ SMC_NLA_STATS_PAD))
+ goto errattr;
+ if (nla_put_u64_64bit(skb, SMC_NLA_STATS_T_TX_CNT,
+ smc_tech->tx_cnt,
+ SMC_NLA_STATS_PAD))
+ goto errattr;
+ if (nla_put_u64_64bit(skb, SMC_NLA_STATS_T_SENDPAGE_CNT,
+ smc_tech->sendpage_cnt,
+ SMC_NLA_STATS_PAD))
+ goto errattr;
+ if (nla_put_u64_64bit(skb, SMC_NLA_STATS_T_CORK_CNT,
+ smc_tech->cork_cnt,
+ SMC_NLA_STATS_PAD))
+ goto errattr;
+ if (nla_put_u64_64bit(skb, SMC_NLA_STATS_T_NDLY_CNT,
+ smc_tech->ndly_cnt,
+ SMC_NLA_STATS_PAD))
+ goto errattr;
+ if (nla_put_u64_64bit(skb, SMC_NLA_STATS_T_SPLICE_CNT,
+ smc_tech->splice_cnt,
+ SMC_NLA_STATS_PAD))
+ goto errattr;
+ if (nla_put_u64_64bit(skb, SMC_NLA_STATS_T_URG_DATA_CNT,
+ smc_tech->urg_data_cnt,
+ SMC_NLA_STATS_PAD))
+ goto errattr;
+
+ nla_nest_end(skb, attrs);
+ return 0;
+
+errattr:
+ nla_nest_cancel(skb, attrs);
+errout:
+ return -EMSGSIZE;
+}
+
+int smc_nl_get_stats(struct sk_buff *skb,
+ struct netlink_callback *cb)
+{
+ struct smc_nl_dmp_ctx *cb_ctx = smc_nl_dmp_ctx(cb);
+ struct net *net = sock_net(skb->sk);
+ struct smc_stats *stats;
+ struct nlattr *attrs;
+ int cpu, i, size;
+ void *nlh;
+ u64 *src;
+ u64 *sum;
+
+ if (cb_ctx->pos[0])
+ goto errmsg;
+ nlh = genlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq,
+ &smc_gen_nl_family, NLM_F_MULTI,
+ SMC_NETLINK_GET_STATS);
+ if (!nlh)
+ goto errmsg;
+
+ attrs = nla_nest_start(skb, SMC_GEN_STATS);
+ if (!attrs)
+ goto errnest;
+ stats = kzalloc(sizeof(*stats), GFP_KERNEL);
+ if (!stats)
+ goto erralloc;
+ size = sizeof(*stats) / sizeof(u64);
+ for_each_possible_cpu(cpu) {
+ src = (u64 *)per_cpu_ptr(net->smc.smc_stats, cpu);
+ sum = (u64 *)stats;
+ for (i = 0; i < size; i++)
+ *(sum++) += *(src++);
+ }
+ if (smc_nl_fill_stats_tech_data(skb, stats, SMC_TYPE_D))
+ goto errattr;
+ if (smc_nl_fill_stats_tech_data(skb, stats, SMC_TYPE_R))
+ goto errattr;
+ if (nla_put_u64_64bit(skb, SMC_NLA_STATS_CLNT_HS_ERR_CNT,
+ stats->clnt_hshake_err_cnt,
+ SMC_NLA_STATS_PAD))
+ goto errattr;
+ if (nla_put_u64_64bit(skb, SMC_NLA_STATS_SRV_HS_ERR_CNT,
+ stats->srv_hshake_err_cnt,
+ SMC_NLA_STATS_PAD))
+ goto errattr;
+
+ nla_nest_end(skb, attrs);
+ genlmsg_end(skb, nlh);
+ cb_ctx->pos[0] = 1;
+ kfree(stats);
+ return skb->len;
+
+errattr:
+ kfree(stats);
+erralloc:
+ nla_nest_cancel(skb, attrs);
+errnest:
+ genlmsg_cancel(skb, nlh);
+errmsg:
+ return skb->len;
+}
+
+static int smc_nl_get_fback_details(struct sk_buff *skb,
+ struct netlink_callback *cb, int pos,
+ bool is_srv)
+{
+ struct smc_nl_dmp_ctx *cb_ctx = smc_nl_dmp_ctx(cb);
+ struct net *net = sock_net(skb->sk);
+ int cnt_reported = cb_ctx->pos[2];
+ struct smc_stats_fback *trgt_arr;
+ struct nlattr *attrs;
+ int rc = 0;
+ void *nlh;
+
+ if (is_srv)
+ trgt_arr = &net->smc.fback_rsn->srv[0];
+ else
+ trgt_arr = &net->smc.fback_rsn->clnt[0];
+ if (!trgt_arr[pos].fback_code)
+ return -ENODATA;
+ nlh = genlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq,
+ &smc_gen_nl_family, NLM_F_MULTI,
+ SMC_NETLINK_GET_FBACK_STATS);
+ if (!nlh)
+ goto errmsg;
+ attrs = nla_nest_start(skb, SMC_GEN_FBACK_STATS);
+ if (!attrs)
+ goto errout;
+ if (nla_put_u8(skb, SMC_NLA_FBACK_STATS_TYPE, is_srv))
+ goto errattr;
+ if (!cnt_reported) {
+ if (nla_put_u64_64bit(skb, SMC_NLA_FBACK_STATS_SRV_CNT,
+ net->smc.fback_rsn->srv_fback_cnt,
+ SMC_NLA_FBACK_STATS_PAD))
+ goto errattr;
+ if (nla_put_u64_64bit(skb, SMC_NLA_FBACK_STATS_CLNT_CNT,
+ net->smc.fback_rsn->clnt_fback_cnt,
+ SMC_NLA_FBACK_STATS_PAD))
+ goto errattr;
+ cnt_reported = 1;
+ }
+
+ if (nla_put_u32(skb, SMC_NLA_FBACK_STATS_RSN_CODE,
+ trgt_arr[pos].fback_code))
+ goto errattr;
+ if (nla_put_u16(skb, SMC_NLA_FBACK_STATS_RSN_CNT,
+ trgt_arr[pos].count))
+ goto errattr;
+
+ cb_ctx->pos[2] = cnt_reported;
+ nla_nest_end(skb, attrs);
+ genlmsg_end(skb, nlh);
+ return rc;
+
+errattr:
+ nla_nest_cancel(skb, attrs);
+errout:
+ genlmsg_cancel(skb, nlh);
+errmsg:
+ return -EMSGSIZE;
+}
+
+int smc_nl_get_fback_stats(struct sk_buff *skb, struct netlink_callback *cb)
+{
+ struct smc_nl_dmp_ctx *cb_ctx = smc_nl_dmp_ctx(cb);
+ struct net *net = sock_net(skb->sk);
+ int rc_srv = 0, rc_clnt = 0, k;
+ int skip_serv = cb_ctx->pos[1];
+ int snum = cb_ctx->pos[0];
+ bool is_srv = true;
+
+ mutex_lock(&net->smc.mutex_fback_rsn);
+ for (k = 0; k < SMC_MAX_FBACK_RSN_CNT; k++) {
+ if (k < snum)
+ continue;
+ if (!skip_serv) {
+ rc_srv = smc_nl_get_fback_details(skb, cb, k, is_srv);
+ if (rc_srv && rc_srv != -ENODATA)
+ break;
+ } else {
+ skip_serv = 0;
+ }
+ rc_clnt = smc_nl_get_fback_details(skb, cb, k, !is_srv);
+ if (rc_clnt && rc_clnt != -ENODATA) {
+ skip_serv = 1;
+ break;
+ }
+ if (rc_clnt == -ENODATA && rc_srv == -ENODATA)
+ break;
+ }
+ mutex_unlock(&net->smc.mutex_fback_rsn);
+ cb_ctx->pos[1] = skip_serv;
+ cb_ctx->pos[0] = k;
+ return skb->len;
+}
diff --git a/net/smc/smc_stats.h b/net/smc/smc_stats.h
new file mode 100644
index 000000000000..84b7ecd8c05c
--- /dev/null
+++ b/net/smc/smc_stats.h
@@ -0,0 +1,266 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Shared Memory Communications over RDMA (SMC-R) and RoCE
+ *
+ * Macros for SMC statistics
+ *
+ * Copyright IBM Corp. 2021
+ *
+ * Author(s): Guvenc Gulce
+ */
+
+#ifndef NET_SMC_SMC_STATS_H_
+#define NET_SMC_SMC_STATS_H_
+#include <linux/init.h>
+#include <linux/mutex.h>
+#include <linux/percpu.h>
+#include <linux/ctype.h>
+#include <linux/smc.h>
+
+#include "smc_clc.h"
+
+#define SMC_MAX_FBACK_RSN_CNT 30
+
+enum {
+ SMC_BUF_8K,
+ SMC_BUF_16K,
+ SMC_BUF_32K,
+ SMC_BUF_64K,
+ SMC_BUF_128K,
+ SMC_BUF_256K,
+ SMC_BUF_512K,
+ SMC_BUF_1024K,
+ SMC_BUF_G_1024K,
+ SMC_BUF_MAX,
+};
+
+struct smc_stats_fback {
+ int fback_code;
+ u16 count;
+};
+
+struct smc_stats_rsn {
+ struct smc_stats_fback srv[SMC_MAX_FBACK_RSN_CNT];
+ struct smc_stats_fback clnt[SMC_MAX_FBACK_RSN_CNT];
+ u64 srv_fback_cnt;
+ u64 clnt_fback_cnt;
+};
+
+struct smc_stats_rmbcnt {
+ u64 buf_size_small_peer_cnt;
+ u64 buf_size_small_cnt;
+ u64 buf_full_peer_cnt;
+ u64 buf_full_cnt;
+ u64 reuse_cnt;
+ u64 alloc_cnt;
+ u64 dgrade_cnt;
+};
+
+struct smc_stats_memsize {
+ u64 buf[SMC_BUF_MAX];
+};
+
+struct smc_stats_tech {
+ struct smc_stats_memsize tx_rmbsize;
+ struct smc_stats_memsize rx_rmbsize;
+ struct smc_stats_memsize tx_pd;
+ struct smc_stats_memsize rx_pd;
+ struct smc_stats_rmbcnt rmb_tx;
+ struct smc_stats_rmbcnt rmb_rx;
+ u64 clnt_v1_succ_cnt;
+ u64 clnt_v2_succ_cnt;
+ u64 srv_v1_succ_cnt;
+ u64 srv_v2_succ_cnt;
+ u64 sendpage_cnt;
+ u64 urg_data_cnt;
+ u64 splice_cnt;
+ u64 cork_cnt;
+ u64 ndly_cnt;
+ u64 rx_bytes;
+ u64 tx_bytes;
+ u64 rx_cnt;
+ u64 tx_cnt;
+};
+
+struct smc_stats {
+ struct smc_stats_tech smc[2];
+ u64 clnt_hshake_err_cnt;
+ u64 srv_hshake_err_cnt;
+};
+
+#define SMC_STAT_PAYLOAD_SUB(_smc_stats, _tech, key, _len, _rc) \
+do { \
+ typeof(_smc_stats) stats = (_smc_stats); \
+ typeof(_tech) t = (_tech); \
+ typeof(_len) l = (_len); \
+ int _pos = fls64((l) >> 13); \
+ typeof(_rc) r = (_rc); \
+ int m = SMC_BUF_MAX - 1; \
+ this_cpu_inc((*stats).smc[t].key ## _cnt); \
+ if (r <= 0) \
+ break; \
+ _pos = (_pos < m) ? ((l == 1 << (_pos + 12)) ? _pos - 1 : _pos) : m; \
+ this_cpu_inc((*stats).smc[t].key ## _pd.buf[_pos]); \
+ this_cpu_add((*stats).smc[t].key ## _bytes, r); \
+} \
+while (0)
+
+#define SMC_STAT_TX_PAYLOAD(_smc, length, rcode) \
+do { \
+ typeof(_smc) __smc = _smc; \
+ struct net *_net = sock_net(&__smc->sk); \
+ struct smc_stats __percpu *_smc_stats = _net->smc.smc_stats; \
+ typeof(length) _len = (length); \
+ typeof(rcode) _rc = (rcode); \
+ bool is_smcd = !__smc->conn.lnk; \
+ if (is_smcd) \
+ SMC_STAT_PAYLOAD_SUB(_smc_stats, SMC_TYPE_D, tx, _len, _rc); \
+ else \
+ SMC_STAT_PAYLOAD_SUB(_smc_stats, SMC_TYPE_R, tx, _len, _rc); \
+} \
+while (0)
+
+#define SMC_STAT_RX_PAYLOAD(_smc, length, rcode) \
+do { \
+ typeof(_smc) __smc = _smc; \
+ struct net *_net = sock_net(&__smc->sk); \
+ struct smc_stats __percpu *_smc_stats = _net->smc.smc_stats; \
+ typeof(length) _len = (length); \
+ typeof(rcode) _rc = (rcode); \
+ bool is_smcd = !__smc->conn.lnk; \
+ if (is_smcd) \
+ SMC_STAT_PAYLOAD_SUB(_smc_stats, SMC_TYPE_D, rx, _len, _rc); \
+ else \
+ SMC_STAT_PAYLOAD_SUB(_smc_stats, SMC_TYPE_R, rx, _len, _rc); \
+} \
+while (0)
+
+#define SMC_STAT_RMB_SIZE_SUB(_smc_stats, _tech, k, _len) \
+do { \
+ typeof(_len) _l = (_len); \
+ typeof(_tech) t = (_tech); \
+ int _pos = fls((_l) >> 13); \
+ int m = SMC_BUF_MAX - 1; \
+ _pos = (_pos < m) ? ((_l == 1 << (_pos + 12)) ? _pos - 1 : _pos) : m; \
+ this_cpu_inc((*(_smc_stats)).smc[t].k ## _rmbsize.buf[_pos]); \
+} \
+while (0)
+
+#define SMC_STAT_RMB_SUB(_smc_stats, type, t, key) \
+ this_cpu_inc((*(_smc_stats)).smc[t].rmb ## _ ## key.type ## _cnt)
+
+#define SMC_STAT_RMB_SIZE(_smc, _is_smcd, _is_rx, _len) \
+do { \
+ struct net *_net = sock_net(&(_smc)->sk); \
+ struct smc_stats __percpu *_smc_stats = _net->smc.smc_stats; \
+ typeof(_is_smcd) is_d = (_is_smcd); \
+ typeof(_is_rx) is_r = (_is_rx); \
+ typeof(_len) l = (_len); \
+ if ((is_d) && (is_r)) \
+ SMC_STAT_RMB_SIZE_SUB(_smc_stats, SMC_TYPE_D, rx, l); \
+ if ((is_d) && !(is_r)) \
+ SMC_STAT_RMB_SIZE_SUB(_smc_stats, SMC_TYPE_D, tx, l); \
+ if (!(is_d) && (is_r)) \
+ SMC_STAT_RMB_SIZE_SUB(_smc_stats, SMC_TYPE_R, rx, l); \
+ if (!(is_d) && !(is_r)) \
+ SMC_STAT_RMB_SIZE_SUB(_smc_stats, SMC_TYPE_R, tx, l); \
+} \
+while (0)
+
+#define SMC_STAT_RMB(_smc, type, _is_smcd, _is_rx) \
+do { \
+ struct net *net = sock_net(&(_smc)->sk); \
+ struct smc_stats __percpu *_smc_stats = net->smc.smc_stats; \
+ typeof(_is_smcd) is_d = (_is_smcd); \
+ typeof(_is_rx) is_r = (_is_rx); \
+ if ((is_d) && (is_r)) \
+ SMC_STAT_RMB_SUB(_smc_stats, type, SMC_TYPE_D, rx); \
+ if ((is_d) && !(is_r)) \
+ SMC_STAT_RMB_SUB(_smc_stats, type, SMC_TYPE_D, tx); \
+ if (!(is_d) && (is_r)) \
+ SMC_STAT_RMB_SUB(_smc_stats, type, SMC_TYPE_R, rx); \
+ if (!(is_d) && !(is_r)) \
+ SMC_STAT_RMB_SUB(_smc_stats, type, SMC_TYPE_R, tx); \
+} \
+while (0)
+
+#define SMC_STAT_BUF_REUSE(smc, is_smcd, is_rx) \
+ SMC_STAT_RMB(smc, reuse, is_smcd, is_rx)
+
+#define SMC_STAT_RMB_ALLOC(smc, is_smcd, is_rx) \
+ SMC_STAT_RMB(smc, alloc, is_smcd, is_rx)
+
+#define SMC_STAT_RMB_DOWNGRADED(smc, is_smcd, is_rx) \
+ SMC_STAT_RMB(smc, dgrade, is_smcd, is_rx)
+
+#define SMC_STAT_RMB_TX_PEER_FULL(smc, is_smcd) \
+ SMC_STAT_RMB(smc, buf_full_peer, is_smcd, false)
+
+#define SMC_STAT_RMB_TX_FULL(smc, is_smcd) \
+ SMC_STAT_RMB(smc, buf_full, is_smcd, false)
+
+#define SMC_STAT_RMB_TX_PEER_SIZE_SMALL(smc, is_smcd) \
+ SMC_STAT_RMB(smc, buf_size_small_peer, is_smcd, false)
+
+#define SMC_STAT_RMB_TX_SIZE_SMALL(smc, is_smcd) \
+ SMC_STAT_RMB(smc, buf_size_small, is_smcd, false)
+
+#define SMC_STAT_RMB_RX_SIZE_SMALL(smc, is_smcd) \
+ SMC_STAT_RMB(smc, buf_size_small, is_smcd, true)
+
+#define SMC_STAT_RMB_RX_FULL(smc, is_smcd) \
+ SMC_STAT_RMB(smc, buf_full, is_smcd, true)
+
+#define SMC_STAT_INC(_smc, type) \
+do { \
+ typeof(_smc) __smc = _smc; \
+ bool is_smcd = !(__smc)->conn.lnk; \
+ struct net *net = sock_net(&(__smc)->sk); \
+ struct smc_stats __percpu *smc_stats = net->smc.smc_stats; \
+ if ((is_smcd)) \
+ this_cpu_inc(smc_stats->smc[SMC_TYPE_D].type); \
+ else \
+ this_cpu_inc(smc_stats->smc[SMC_TYPE_R].type); \
+} \
+while (0)
+
+#define SMC_STAT_CLNT_SUCC_INC(net, _aclc) \
+do { \
+ typeof(_aclc) acl = (_aclc); \
+ bool is_v2 = (acl->hdr.version == SMC_V2); \
+ bool is_smcd = (acl->hdr.typev1 == SMC_TYPE_D); \
+ struct smc_stats __percpu *smc_stats = (net)->smc.smc_stats; \
+ if (is_v2 && is_smcd) \
+ this_cpu_inc(smc_stats->smc[SMC_TYPE_D].clnt_v2_succ_cnt); \
+ else if (is_v2 && !is_smcd) \
+ this_cpu_inc(smc_stats->smc[SMC_TYPE_R].clnt_v2_succ_cnt); \
+ else if (!is_v2 && is_smcd) \
+ this_cpu_inc(smc_stats->smc[SMC_TYPE_D].clnt_v1_succ_cnt); \
+ else if (!is_v2 && !is_smcd) \
+ this_cpu_inc(smc_stats->smc[SMC_TYPE_R].clnt_v1_succ_cnt); \
+} \
+while (0)
+
+#define SMC_STAT_SERV_SUCC_INC(net, _ini) \
+do { \
+ typeof(_ini) i = (_ini); \
+ bool is_v2 = (i->smcd_version & SMC_V2); \
+ bool is_smcd = (i->is_smcd); \
+ typeof(net->smc.smc_stats) smc_stats = (net)->smc.smc_stats; \
+ if (is_v2 && is_smcd) \
+ this_cpu_inc(smc_stats->smc[SMC_TYPE_D].srv_v2_succ_cnt); \
+ else if (is_v2 && !is_smcd) \
+ this_cpu_inc(smc_stats->smc[SMC_TYPE_R].srv_v2_succ_cnt); \
+ else if (!is_v2 && is_smcd) \
+ this_cpu_inc(smc_stats->smc[SMC_TYPE_D].srv_v1_succ_cnt); \
+ else if (!is_v2 && !is_smcd) \
+ this_cpu_inc(smc_stats->smc[SMC_TYPE_R].srv_v1_succ_cnt); \
+} \
+while (0)
+
+int smc_nl_get_stats(struct sk_buff *skb, struct netlink_callback *cb);
+int smc_nl_get_fback_stats(struct sk_buff *skb, struct netlink_callback *cb);
+int smc_stats_init(struct net *net);
+void smc_stats_exit(struct net *net);
+
+#endif /* NET_SMC_SMC_STATS_H_ */
diff --git a/net/smc/smc_tx.c b/net/smc/smc_tx.c
index 4532c16bf85e..c79361dfcdfb 100644
--- a/net/smc/smc_tx.c
+++ b/net/smc/smc_tx.c
@@ -27,6 +27,7 @@
#include "smc_close.h"
#include "smc_ism.h"
#include "smc_tx.h"
+#include "smc_stats.h"
#define SMC_TX_WORK_DELAY 0
#define SMC_TX_CORK_DELAY (HZ >> 2) /* 250 ms */
@@ -45,6 +46,8 @@ static void smc_tx_write_space(struct sock *sk)
/* similar to sk_stream_write_space */
if (atomic_read(&smc->conn.sndbuf_space) && sock) {
+ if (test_bit(SOCK_NOSPACE, &sock->flags))
+ SMC_STAT_RMB_TX_FULL(smc, !smc->conn.lnk);
clear_bit(SOCK_NOSPACE, &sock->flags);
rcu_read_lock();
wq = rcu_dereference(sk->sk_wq);
@@ -151,9 +154,19 @@ int smc_tx_sendmsg(struct smc_sock *smc, struct msghdr *msg, size_t len)
goto out_err;
}
+ if (sk->sk_state == SMC_INIT)
+ return -ENOTCONN;
+
+ if (len > conn->sndbuf_desc->len)
+ SMC_STAT_RMB_TX_SIZE_SMALL(smc, !conn->lnk);
+
+ if (len > conn->peer_rmbe_size)
+ SMC_STAT_RMB_TX_PEER_SIZE_SMALL(smc, !conn->lnk);
+
+ if (msg->msg_flags & MSG_OOB)
+ SMC_STAT_INC(smc, urg_data_cnt);
+
while (msg_data_left(msg)) {
- if (sk->sk_state == SMC_INIT)
- return -ENOTCONN;
if (smc->sk.sk_shutdown & SEND_SHUTDOWN ||
(smc->sk.sk_err == ECONNABORTED) ||
conn->killed)
@@ -419,8 +432,12 @@ static int smc_tx_rdma_writes(struct smc_connection *conn,
/* destination: RMBE */
/* cf. snd_wnd */
rmbespace = atomic_read(&conn->peer_rmbe_space);
- if (rmbespace <= 0)
+ if (rmbespace <= 0) {
+ struct smc_sock *smc = container_of(conn, struct smc_sock,
+ conn);
+ SMC_STAT_RMB_TX_PEER_FULL(smc, !conn->lnk);
return 0;
+ }
smc_curs_copy(&prod, &conn->local_tx_ctrl.prod, conn);
smc_curs_copy(&cons, &conn->local_rx_ctrl.cons, conn);
@@ -479,7 +496,7 @@ static int smc_tx_rdma_writes(struct smc_connection *conn,
/* Wakeup sndbuf consumers from any context (IRQ or process)
* since there is more data to transmit; usable snd_wnd as max transmit
*/
-static int smcr_tx_sndbuf_nonempty(struct smc_connection *conn)
+static int _smcr_tx_sndbuf_nonempty(struct smc_connection *conn)
{
struct smc_cdc_producer_flags *pflags = &conn->local_tx_ctrl.prod_flags;
struct smc_link *link = conn->lnk;
@@ -533,6 +550,22 @@ out_unlock:
return rc;
}
+static int smcr_tx_sndbuf_nonempty(struct smc_connection *conn)
+{
+ struct smc_link *link = conn->lnk;
+ int rc = -ENOLINK;
+
+ if (!link)
+ return rc;
+
+ atomic_inc(&link->wr_tx_refcnt);
+ if (smc_link_usable(link))
+ rc = _smcr_tx_sndbuf_nonempty(conn);
+ if (atomic_dec_and_test(&link->wr_tx_refcnt))
+ wake_up_all(&link->wr_tx_wait);
+ return rc;
+}
+
static int smcd_tx_sndbuf_nonempty(struct smc_connection *conn)
{
struct smc_cdc_producer_flags *pflags = &conn->local_tx_ctrl.prod_flags;
diff --git a/net/smc/smc_wr.c b/net/smc/smc_wr.c
index cbc73a7e4d59..a419e9af36b9 100644
--- a/net/smc/smc_wr.c
+++ b/net/smc/smc_wr.c
@@ -322,9 +322,12 @@ int smc_wr_reg_send(struct smc_link *link, struct ib_mr *mr)
if (rc)
return rc;
+ atomic_inc(&link->wr_reg_refcnt);
rc = wait_event_interruptible_timeout(link->wr_reg_wait,
(link->wr_reg_state != POSTED),
SMC_WR_REG_MR_WAIT_TIME);
+ if (atomic_dec_and_test(&link->wr_reg_refcnt))
+ wake_up_all(&link->wr_reg_wait);
if (!rc) {
/* timeout - terminate link */
smcr_link_down_cond_sched(link);
@@ -566,10 +569,15 @@ void smc_wr_free_link(struct smc_link *lnk)
return;
ibdev = lnk->smcibdev->ibdev;
+ smc_wr_wakeup_reg_wait(lnk);
+ smc_wr_wakeup_tx_wait(lnk);
+
if (smc_wr_tx_wait_no_pending_sends(lnk))
memset(lnk->wr_tx_mask, 0,
BITS_TO_LONGS(SMC_WR_BUF_CNT) *
sizeof(*lnk->wr_tx_mask));
+ wait_event(lnk->wr_reg_wait, (!atomic_read(&lnk->wr_reg_refcnt)));
+ wait_event(lnk->wr_tx_wait, (!atomic_read(&lnk->wr_tx_refcnt)));
if (lnk->wr_rx_dma_addr) {
ib_dma_unmap_single(ibdev, lnk->wr_rx_dma_addr,
@@ -728,7 +736,9 @@ int smc_wr_create_link(struct smc_link *lnk)
memset(lnk->wr_tx_mask, 0,
BITS_TO_LONGS(SMC_WR_BUF_CNT) * sizeof(*lnk->wr_tx_mask));
init_waitqueue_head(&lnk->wr_tx_wait);
+ atomic_set(&lnk->wr_tx_refcnt, 0);
init_waitqueue_head(&lnk->wr_reg_wait);
+ atomic_set(&lnk->wr_reg_refcnt, 0);
return rc;
dma_unmap:
diff --git a/net/socket.c b/net/socket.c
index 4f2c6d2795d0..7f64a6eccf63 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -104,6 +104,7 @@
#include <linux/sockios.h>
#include <net/busy_poll.h>
#include <linux/errqueue.h>
+#include <linux/ptp_clock_kernel.h>
#ifdef CONFIG_NET_RX_BUSY_POLL
unsigned int sysctl_net_busy_read __read_mostly;
@@ -165,6 +166,55 @@ static const struct file_operations socket_file_ops = {
.show_fdinfo = sock_show_fdinfo,
};
+static const char * const pf_family_names[] = {
+ [PF_UNSPEC] = "PF_UNSPEC",
+ [PF_UNIX] = "PF_UNIX/PF_LOCAL",
+ [PF_INET] = "PF_INET",
+ [PF_AX25] = "PF_AX25",
+ [PF_IPX] = "PF_IPX",
+ [PF_APPLETALK] = "PF_APPLETALK",
+ [PF_NETROM] = "PF_NETROM",
+ [PF_BRIDGE] = "PF_BRIDGE",
+ [PF_ATMPVC] = "PF_ATMPVC",
+ [PF_X25] = "PF_X25",
+ [PF_INET6] = "PF_INET6",
+ [PF_ROSE] = "PF_ROSE",
+ [PF_DECnet] = "PF_DECnet",
+ [PF_NETBEUI] = "PF_NETBEUI",
+ [PF_SECURITY] = "PF_SECURITY",
+ [PF_KEY] = "PF_KEY",
+ [PF_NETLINK] = "PF_NETLINK/PF_ROUTE",
+ [PF_PACKET] = "PF_PACKET",
+ [PF_ASH] = "PF_ASH",
+ [PF_ECONET] = "PF_ECONET",
+ [PF_ATMSVC] = "PF_ATMSVC",
+ [PF_RDS] = "PF_RDS",
+ [PF_SNA] = "PF_SNA",
+ [PF_IRDA] = "PF_IRDA",
+ [PF_PPPOX] = "PF_PPPOX",
+ [PF_WANPIPE] = "PF_WANPIPE",
+ [PF_LLC] = "PF_LLC",
+ [PF_IB] = "PF_IB",
+ [PF_MPLS] = "PF_MPLS",
+ [PF_CAN] = "PF_CAN",
+ [PF_TIPC] = "PF_TIPC",
+ [PF_BLUETOOTH] = "PF_BLUETOOTH",
+ [PF_IUCV] = "PF_IUCV",
+ [PF_RXRPC] = "PF_RXRPC",
+ [PF_ISDN] = "PF_ISDN",
+ [PF_PHONET] = "PF_PHONET",
+ [PF_IEEE802154] = "PF_IEEE802154",
+ [PF_CAIF] = "PF_CAIF",
+ [PF_ALG] = "PF_ALG",
+ [PF_NFC] = "PF_NFC",
+ [PF_VSOCK] = "PF_VSOCK",
+ [PF_KCM] = "PF_KCM",
+ [PF_QIPCRTR] = "PF_QIPCRTR",
+ [PF_SMC] = "PF_SMC",
+ [PF_XDP] = "PF_XDP",
+ [PF_MCTP] = "PF_MCTP",
+};
+
/*
* The protocol list. Each protocol is registered in here.
*/
@@ -825,12 +875,18 @@ void __sock_recv_timestamp(struct msghdr *msg, struct sock *sk,
empty = 0;
if (shhwtstamps &&
(sk->sk_tsflags & SOF_TIMESTAMPING_RAW_HARDWARE) &&
- !skb_is_swtx_tstamp(skb, false_tstamp) &&
- ktime_to_timespec64_cond(shhwtstamps->hwtstamp, tss.ts + 2)) {
- empty = 0;
- if ((sk->sk_tsflags & SOF_TIMESTAMPING_OPT_PKTINFO) &&
- !skb_is_err_queue(skb))
- put_ts_pktinfo(msg, skb);
+ !skb_is_swtx_tstamp(skb, false_tstamp)) {
+ if (sk->sk_tsflags & SOF_TIMESTAMPING_BIND_PHC)
+ ptp_convert_timestamp(shhwtstamps, sk->sk_bind_phc);
+
+ if (ktime_to_timespec64_cond(shhwtstamps->hwtstamp,
+ tss.ts + 2)) {
+ empty = 0;
+
+ if ((sk->sk_tsflags & SOF_TIMESTAMPING_OPT_PKTINFO) &&
+ !skb_is_err_queue(skb))
+ put_ts_pktinfo(msg, skb);
+ }
}
if (!empty) {
if (sock_flag(sk, SOCK_TSTAMP_NEW))
@@ -1009,9 +1065,13 @@ static ssize_t sock_write_iter(struct kiocb *iocb, struct iov_iter *from)
*/
static DEFINE_MUTEX(br_ioctl_mutex);
-static int (*br_ioctl_hook) (struct net *, unsigned int cmd, void __user *arg);
+static int (*br_ioctl_hook)(struct net *net, struct net_bridge *br,
+ unsigned int cmd, struct ifreq *ifr,
+ void __user *uarg);
-void brioctl_set(int (*hook) (struct net *, unsigned int, void __user *))
+void brioctl_set(int (*hook)(struct net *net, struct net_bridge *br,
+ unsigned int cmd, struct ifreq *ifr,
+ void __user *uarg))
{
mutex_lock(&br_ioctl_mutex);
br_ioctl_hook = hook;
@@ -1019,6 +1079,22 @@ void brioctl_set(int (*hook) (struct net *, unsigned int, void __user *))
}
EXPORT_SYMBOL(brioctl_set);
+int br_ioctl_call(struct net *net, struct net_bridge *br, unsigned int cmd,
+ struct ifreq *ifr, void __user *uarg)
+{
+ int err = -ENOPKG;
+
+ if (!br_ioctl_hook)
+ request_module("bridge");
+
+ mutex_lock(&br_ioctl_mutex);
+ if (br_ioctl_hook)
+ err = br_ioctl_hook(net, br, cmd, ifr, uarg);
+ mutex_unlock(&br_ioctl_mutex);
+
+ return err;
+}
+
static DEFINE_MUTEX(vlan_ioctl_mutex);
static int (*vlan_ioctl_hook) (struct net *, void __user *arg);
@@ -1033,8 +1109,11 @@ EXPORT_SYMBOL(vlan_ioctl_set);
static long sock_do_ioctl(struct net *net, struct socket *sock,
unsigned int cmd, unsigned long arg)
{
+ struct ifreq ifr;
+ bool need_copyout;
int err;
void __user *argp = (void __user *)arg;
+ void __user *data;
err = sock->ops->ioctl(sock, cmd, arg);
@@ -1045,25 +1124,16 @@ static long sock_do_ioctl(struct net *net, struct socket *sock,
if (err != -ENOIOCTLCMD)
return err;
- if (cmd == SIOCGIFCONF) {
- struct ifconf ifc;
- if (copy_from_user(&ifc, argp, sizeof(struct ifconf)))
- return -EFAULT;
- rtnl_lock();
- err = dev_ifconf(net, &ifc, sizeof(struct ifreq));
- rtnl_unlock();
- if (!err && copy_to_user(argp, &ifc, sizeof(struct ifconf)))
- err = -EFAULT;
- } else {
- struct ifreq ifr;
- bool need_copyout;
- if (copy_from_user(&ifr, argp, sizeof(struct ifreq)))
+ if (!is_socket_ioctl_cmd(cmd))
+ return -ENOTTY;
+
+ if (get_user_ifreq(&ifr, &data, argp))
+ return -EFAULT;
+ err = dev_ioctl(net, cmd, &ifr, data, &need_copyout);
+ if (!err && need_copyout)
+ if (put_user_ifreq(&ifr, argp))
return -EFAULT;
- err = dev_ioctl(net, cmd, &ifr, &need_copyout);
- if (!err && need_copyout)
- if (copy_to_user(argp, &ifr, sizeof(struct ifreq)))
- return -EFAULT;
- }
+
return err;
}
@@ -1085,12 +1155,13 @@ static long sock_ioctl(struct file *file, unsigned cmd, unsigned long arg)
net = sock_net(sk);
if (unlikely(cmd >= SIOCDEVPRIVATE && cmd <= (SIOCDEVPRIVATE + 15))) {
struct ifreq ifr;
+ void __user *data;
bool need_copyout;
- if (copy_from_user(&ifr, argp, sizeof(struct ifreq)))
+ if (get_user_ifreq(&ifr, &data, argp))
return -EFAULT;
- err = dev_ioctl(net, cmd, &ifr, &need_copyout);
+ err = dev_ioctl(net, cmd, &ifr, data, &need_copyout);
if (!err && need_copyout)
- if (copy_to_user(argp, &ifr, sizeof(struct ifreq)))
+ if (put_user_ifreq(&ifr, argp))
return -EFAULT;
} else
#ifdef CONFIG_WEXT_CORE
@@ -1115,14 +1186,7 @@ static long sock_ioctl(struct file *file, unsigned cmd, unsigned long arg)
case SIOCSIFBR:
case SIOCBRADDBR:
case SIOCBRDELBR:
- err = -ENOPKG;
- if (!br_ioctl_hook)
- request_module("bridge");
-
- mutex_lock(&br_ioctl_mutex);
- if (br_ioctl_hook)
- err = br_ioctl_hook(net, cmd, argp);
- mutex_unlock(&br_ioctl_mutex);
+ err = br_ioctl_call(net, NULL, cmd, NULL, argp);
break;
case SIOCGIFVLAN:
case SIOCSIFVLAN:
@@ -1162,6 +1226,11 @@ static long sock_ioctl(struct file *file, unsigned cmd, unsigned long arg)
cmd == SIOCGSTAMP_NEW,
false);
break;
+
+ case SIOCGIFCONF:
+ err = dev_ifconf(net, argp);
+ break;
+
default:
err = sock_do_ioctl(net, sock, cmd, arg);
break;
@@ -1667,32 +1736,22 @@ SYSCALL_DEFINE2(listen, int, fd, int, backlog)
return __sys_listen(fd, backlog);
}
-int __sys_accept4_file(struct file *file, unsigned file_flags,
+struct file *do_accept(struct file *file, unsigned file_flags,
struct sockaddr __user *upeer_sockaddr,
- int __user *upeer_addrlen, int flags,
- unsigned long nofile)
+ int __user *upeer_addrlen, int flags)
{
struct socket *sock, *newsock;
struct file *newfile;
- int err, len, newfd;
+ int err, len;
struct sockaddr_storage address;
- if (flags & ~(SOCK_CLOEXEC | SOCK_NONBLOCK))
- return -EINVAL;
-
- if (SOCK_NONBLOCK != O_NONBLOCK && (flags & SOCK_NONBLOCK))
- flags = (flags & ~SOCK_NONBLOCK) | O_NONBLOCK;
-
sock = sock_from_file(file);
- if (!sock) {
- err = -ENOTSOCK;
- goto out;
- }
+ if (!sock)
+ return ERR_PTR(-ENOTSOCK);
- err = -ENFILE;
newsock = sock_alloc();
if (!newsock)
- goto out;
+ return ERR_PTR(-ENFILE);
newsock->type = sock->type;
newsock->ops = sock->ops;
@@ -1703,18 +1762,9 @@ int __sys_accept4_file(struct file *file, unsigned file_flags,
*/
__module_get(newsock->ops->owner);
- newfd = __get_unused_fd_flags(flags, nofile);
- if (unlikely(newfd < 0)) {
- err = newfd;
- sock_release(newsock);
- goto out;
- }
newfile = sock_alloc_file(newsock, flags, sock->sk->sk_prot_creator->name);
- if (IS_ERR(newfile)) {
- err = PTR_ERR(newfile);
- put_unused_fd(newfd);
- goto out;
- }
+ if (IS_ERR(newfile))
+ return newfile;
err = security_socket_accept(sock, newsock);
if (err)
@@ -1739,16 +1789,38 @@ int __sys_accept4_file(struct file *file, unsigned file_flags,
}
/* File flags are not inherited via accept() unlike another OSes. */
-
- fd_install(newfd, newfile);
- err = newfd;
-out:
- return err;
+ return newfile;
out_fd:
fput(newfile);
- put_unused_fd(newfd);
- goto out;
+ return ERR_PTR(err);
+}
+int __sys_accept4_file(struct file *file, unsigned file_flags,
+ struct sockaddr __user *upeer_sockaddr,
+ int __user *upeer_addrlen, int flags,
+ unsigned long nofile)
+{
+ struct file *newfile;
+ int newfd;
+
+ if (flags & ~(SOCK_CLOEXEC | SOCK_NONBLOCK))
+ return -EINVAL;
+
+ if (SOCK_NONBLOCK != O_NONBLOCK && (flags & SOCK_NONBLOCK))
+ flags = (flags & ~SOCK_NONBLOCK) | O_NONBLOCK;
+
+ newfd = __get_unused_fd_flags(flags, nofile);
+ if (unlikely(newfd < 0))
+ return newfd;
+
+ newfile = do_accept(file, file_flags, upeer_sockaddr, upeer_addrlen,
+ flags);
+ if (IS_ERR(newfile)) {
+ put_unused_fd(newfd);
+ return PTR_ERR(newfile);
+ }
+ fd_install(newfd, newfile);
+ return newfd;
}
/*
@@ -2975,7 +3047,7 @@ int sock_register(const struct net_proto_family *ops)
}
spin_unlock(&net_family_lock);
- pr_info("NET: Registered protocol family %d\n", ops->family);
+ pr_info("NET: Registered %s protocol family\n", pf_family_names[ops->family]);
return err;
}
EXPORT_SYMBOL(sock_register);
@@ -3003,7 +3075,7 @@ void sock_unregister(int family)
synchronize_rcu();
- pr_info("NET: Unregistered protocol family %d\n", family);
+ pr_info("NET: Unregistered %s protocol family\n", pf_family_names[family]);
}
EXPORT_SYMBOL(sock_unregister);
@@ -3071,154 +3143,55 @@ void socket_seq_show(struct seq_file *seq)
}
#endif /* CONFIG_PROC_FS */
-#ifdef CONFIG_COMPAT
-static int compat_dev_ifconf(struct net *net, struct compat_ifconf __user *uifc32)
+/* Handle the fact that while struct ifreq has the same *layout* on
+ * 32/64 for everything but ifreq::ifru_ifmap and ifreq::ifru_data,
+ * which are handled elsewhere, it still has different *size* due to
+ * ifreq::ifru_ifmap (which is 16 bytes on 32 bit, 24 bytes on 64-bit,
+ * resulting in struct ifreq being 32 and 40 bytes respectively).
+ * As a result, if the struct happens to be at the end of a page and
+ * the next page isn't readable/writable, we get a fault. To prevent
+ * that, copy back and forth to the full size.
+ */
+int get_user_ifreq(struct ifreq *ifr, void __user **ifrdata, void __user *arg)
{
- struct compat_ifconf ifc32;
- struct ifconf ifc;
- int err;
+ if (in_compat_syscall()) {
+ struct compat_ifreq *ifr32 = (struct compat_ifreq *)ifr;
- if (copy_from_user(&ifc32, uifc32, sizeof(struct compat_ifconf)))
- return -EFAULT;
+ memset(ifr, 0, sizeof(*ifr));
+ if (copy_from_user(ifr32, arg, sizeof(*ifr32)))
+ return -EFAULT;
- ifc.ifc_len = ifc32.ifc_len;
- ifc.ifc_req = compat_ptr(ifc32.ifcbuf);
+ if (ifrdata)
+ *ifrdata = compat_ptr(ifr32->ifr_data);
- rtnl_lock();
- err = dev_ifconf(net, &ifc, sizeof(struct compat_ifreq));
- rtnl_unlock();
- if (err)
- return err;
+ return 0;
+ }
- ifc32.ifc_len = ifc.ifc_len;
- if (copy_to_user(uifc32, &ifc32, sizeof(struct compat_ifconf)))
+ if (copy_from_user(ifr, arg, sizeof(*ifr)))
return -EFAULT;
+ if (ifrdata)
+ *ifrdata = ifr->ifr_data;
+
return 0;
}
+EXPORT_SYMBOL(get_user_ifreq);
-static int ethtool_ioctl(struct net *net, struct compat_ifreq __user *ifr32)
+int put_user_ifreq(struct ifreq *ifr, void __user *arg)
{
- struct compat_ethtool_rxnfc __user *compat_rxnfc;
- bool convert_in = false, convert_out = false;
- size_t buf_size = 0;
- struct ethtool_rxnfc __user *rxnfc = NULL;
- struct ifreq ifr;
- u32 rule_cnt = 0, actual_rule_cnt;
- u32 ethcmd;
- u32 data;
- int ret;
+ size_t size = sizeof(*ifr);
- if (get_user(data, &ifr32->ifr_ifru.ifru_data))
- return -EFAULT;
-
- compat_rxnfc = compat_ptr(data);
-
- if (get_user(ethcmd, &compat_rxnfc->cmd))
- return -EFAULT;
-
- /* Most ethtool structures are defined without padding.
- * Unfortunately struct ethtool_rxnfc is an exception.
- */
- switch (ethcmd) {
- default:
- break;
- case ETHTOOL_GRXCLSRLALL:
- /* Buffer size is variable */
- if (get_user(rule_cnt, &compat_rxnfc->rule_cnt))
- return -EFAULT;
- if (rule_cnt > KMALLOC_MAX_SIZE / sizeof(u32))
- return -ENOMEM;
- buf_size += rule_cnt * sizeof(u32);
- fallthrough;
- case ETHTOOL_GRXRINGS:
- case ETHTOOL_GRXCLSRLCNT:
- case ETHTOOL_GRXCLSRULE:
- case ETHTOOL_SRXCLSRLINS:
- convert_out = true;
- fallthrough;
- case ETHTOOL_SRXCLSRLDEL:
- buf_size += sizeof(struct ethtool_rxnfc);
- convert_in = true;
- rxnfc = compat_alloc_user_space(buf_size);
- break;
- }
+ if (in_compat_syscall())
+ size = sizeof(struct compat_ifreq);
- if (copy_from_user(&ifr.ifr_name, &ifr32->ifr_name, IFNAMSIZ))
+ if (copy_to_user(arg, ifr, size))
return -EFAULT;
- ifr.ifr_data = convert_in ? rxnfc : (void __user *)compat_rxnfc;
-
- if (convert_in) {
- /* We expect there to be holes between fs.m_ext and
- * fs.ring_cookie and at the end of fs, but nowhere else.
- */
- BUILD_BUG_ON(offsetof(struct compat_ethtool_rxnfc, fs.m_ext) +
- sizeof(compat_rxnfc->fs.m_ext) !=
- offsetof(struct ethtool_rxnfc, fs.m_ext) +
- sizeof(rxnfc->fs.m_ext));
- BUILD_BUG_ON(
- offsetof(struct compat_ethtool_rxnfc, fs.location) -
- offsetof(struct compat_ethtool_rxnfc, fs.ring_cookie) !=
- offsetof(struct ethtool_rxnfc, fs.location) -
- offsetof(struct ethtool_rxnfc, fs.ring_cookie));
-
- if (copy_in_user(rxnfc, compat_rxnfc,
- (void __user *)(&rxnfc->fs.m_ext + 1) -
- (void __user *)rxnfc) ||
- copy_in_user(&rxnfc->fs.ring_cookie,
- &compat_rxnfc->fs.ring_cookie,
- (void __user *)(&rxnfc->fs.location + 1) -
- (void __user *)&rxnfc->fs.ring_cookie))
- return -EFAULT;
- if (ethcmd == ETHTOOL_GRXCLSRLALL) {
- if (put_user(rule_cnt, &rxnfc->rule_cnt))
- return -EFAULT;
- } else if (copy_in_user(&rxnfc->rule_cnt,
- &compat_rxnfc->rule_cnt,
- sizeof(rxnfc->rule_cnt)))
- return -EFAULT;
- }
-
- ret = dev_ioctl(net, SIOCETHTOOL, &ifr, NULL);
- if (ret)
- return ret;
-
- if (convert_out) {
- if (copy_in_user(compat_rxnfc, rxnfc,
- (const void __user *)(&rxnfc->fs.m_ext + 1) -
- (const void __user *)rxnfc) ||
- copy_in_user(&compat_rxnfc->fs.ring_cookie,
- &rxnfc->fs.ring_cookie,
- (const void __user *)(&rxnfc->fs.location + 1) -
- (const void __user *)&rxnfc->fs.ring_cookie) ||
- copy_in_user(&compat_rxnfc->rule_cnt, &rxnfc->rule_cnt,
- sizeof(rxnfc->rule_cnt)))
- return -EFAULT;
-
- if (ethcmd == ETHTOOL_GRXCLSRLALL) {
- /* As an optimisation, we only copy the actual
- * number of rules that the underlying
- * function returned. Since Mallory might
- * change the rule count in user memory, we
- * check that it is less than the rule count
- * originally given (as the user buffer size),
- * which has been range-checked.
- */
- if (get_user(actual_rule_cnt, &rxnfc->rule_cnt))
- return -EFAULT;
- if (actual_rule_cnt < rule_cnt)
- rule_cnt = actual_rule_cnt;
- if (copy_in_user(&compat_rxnfc->rule_locs[0],
- &rxnfc->rule_locs[0],
- rule_cnt * sizeof(u32)))
- return -EFAULT;
- }
- }
-
return 0;
}
+EXPORT_SYMBOL(put_user_ifreq);
+#ifdef CONFIG_COMPAT
static int compat_siocwandev(struct net *net, struct compat_ifreq __user *uifr32)
{
compat_uptr_t uptr32;
@@ -3226,7 +3199,7 @@ static int compat_siocwandev(struct net *net, struct compat_ifreq __user *uifr32
void __user *saved;
int err;
- if (copy_from_user(&ifr, uifr32, sizeof(struct compat_ifreq)))
+ if (get_user_ifreq(&ifr, NULL, uifr32))
return -EFAULT;
if (get_user(uptr32, &uifr32->ifr_settings.ifs_ifsu))
@@ -3235,10 +3208,10 @@ static int compat_siocwandev(struct net *net, struct compat_ifreq __user *uifr32
saved = ifr.ifr_settings.ifs_ifsu.raw_hdlc;
ifr.ifr_settings.ifs_ifsu.raw_hdlc = compat_ptr(uptr32);
- err = dev_ioctl(net, SIOCWANDEV, &ifr, NULL);
+ err = dev_ioctl(net, SIOCWANDEV, &ifr, NULL, NULL);
if (!err) {
ifr.ifr_settings.ifs_ifsu.raw_hdlc = saved;
- if (copy_to_user(uifr32, &ifr, sizeof(struct compat_ifreq)))
+ if (put_user_ifreq(&ifr, uifr32))
err = -EFAULT;
}
return err;
@@ -3249,97 +3222,15 @@ static int compat_ifr_data_ioctl(struct net *net, unsigned int cmd,
struct compat_ifreq __user *u_ifreq32)
{
struct ifreq ifreq;
- u32 data32;
-
- if (copy_from_user(ifreq.ifr_name, u_ifreq32->ifr_name, IFNAMSIZ))
- return -EFAULT;
- if (get_user(data32, &u_ifreq32->ifr_data))
- return -EFAULT;
- ifreq.ifr_data = compat_ptr(data32);
-
- return dev_ioctl(net, cmd, &ifreq, NULL);
-}
-
-static int compat_ifreq_ioctl(struct net *net, struct socket *sock,
- unsigned int cmd,
- struct compat_ifreq __user *uifr32)
-{
- struct ifreq __user *uifr;
- int err;
-
- /* Handle the fact that while struct ifreq has the same *layout* on
- * 32/64 for everything but ifreq::ifru_ifmap and ifreq::ifru_data,
- * which are handled elsewhere, it still has different *size* due to
- * ifreq::ifru_ifmap (which is 16 bytes on 32 bit, 24 bytes on 64-bit,
- * resulting in struct ifreq being 32 and 40 bytes respectively).
- * As a result, if the struct happens to be at the end of a page and
- * the next page isn't readable/writable, we get a fault. To prevent
- * that, copy back and forth to the full size.
- */
-
- uifr = compat_alloc_user_space(sizeof(*uifr));
- if (copy_in_user(uifr, uifr32, sizeof(*uifr32)))
- return -EFAULT;
-
- err = sock_do_ioctl(net, sock, cmd, (unsigned long)uifr);
-
- if (!err) {
- switch (cmd) {
- case SIOCGIFFLAGS:
- case SIOCGIFMETRIC:
- case SIOCGIFMTU:
- case SIOCGIFMEM:
- case SIOCGIFHWADDR:
- case SIOCGIFINDEX:
- case SIOCGIFADDR:
- case SIOCGIFBRDADDR:
- case SIOCGIFDSTADDR:
- case SIOCGIFNETMASK:
- case SIOCGIFPFLAGS:
- case SIOCGIFTXQLEN:
- case SIOCGMIIPHY:
- case SIOCGMIIREG:
- case SIOCGIFNAME:
- if (copy_in_user(uifr32, uifr, sizeof(*uifr32)))
- err = -EFAULT;
- break;
- }
- }
- return err;
-}
+ void __user *data;
-static int compat_sioc_ifmap(struct net *net, unsigned int cmd,
- struct compat_ifreq __user *uifr32)
-{
- struct ifreq ifr;
- struct compat_ifmap __user *uifmap32;
- int err;
-
- uifmap32 = &uifr32->ifr_ifru.ifru_map;
- err = copy_from_user(&ifr, uifr32, sizeof(ifr.ifr_name));
- err |= get_user(ifr.ifr_map.mem_start, &uifmap32->mem_start);
- err |= get_user(ifr.ifr_map.mem_end, &uifmap32->mem_end);
- err |= get_user(ifr.ifr_map.base_addr, &uifmap32->base_addr);
- err |= get_user(ifr.ifr_map.irq, &uifmap32->irq);
- err |= get_user(ifr.ifr_map.dma, &uifmap32->dma);
- err |= get_user(ifr.ifr_map.port, &uifmap32->port);
- if (err)
+ if (!is_socket_ioctl_cmd(cmd))
+ return -ENOTTY;
+ if (get_user_ifreq(&ifreq, &data, u_ifreq32))
return -EFAULT;
+ ifreq.ifr_data = data;
- err = dev_ioctl(net, cmd, &ifr, NULL);
-
- if (cmd == SIOCGIFMAP && !err) {
- err = copy_to_user(uifr32, &ifr, sizeof(ifr.ifr_name));
- err |= put_user(ifr.ifr_map.mem_start, &uifmap32->mem_start);
- err |= put_user(ifr.ifr_map.mem_end, &uifmap32->mem_end);
- err |= put_user(ifr.ifr_map.base_addr, &uifmap32->base_addr);
- err |= put_user(ifr.ifr_map.irq, &uifmap32->irq);
- err |= put_user(ifr.ifr_map.dma, &uifmap32->dma);
- err |= put_user(ifr.ifr_map.port, &uifmap32->port);
- if (err)
- err = -EFAULT;
- }
- return err;
+ return dev_ioctl(net, cmd, &ifreq, data, NULL);
}
/* Since old style bridge ioctl's endup using SIOCDEVPRIVATE
@@ -3365,21 +3256,14 @@ static int compat_sock_ioctl_trans(struct file *file, struct socket *sock,
struct net *net = sock_net(sk);
if (cmd >= SIOCDEVPRIVATE && cmd <= (SIOCDEVPRIVATE + 15))
- return compat_ifr_data_ioctl(net, cmd, argp);
+ return sock_ioctl(file, cmd, (unsigned long)argp);
switch (cmd) {
case SIOCSIFBR:
case SIOCGIFBR:
return old_bridge_ioctl(argp);
- case SIOCGIFCONF:
- return compat_dev_ifconf(net, argp);
- case SIOCETHTOOL:
- return ethtool_ioctl(net, argp);
case SIOCWANDEV:
return compat_siocwandev(net, argp);
- case SIOCGIFMAP:
- case SIOCSIFMAP:
- return compat_sioc_ifmap(net, cmd, argp);
case SIOCGSTAMP_OLD:
case SIOCGSTAMPNS_OLD:
if (!sock->ops->gettstamp)
@@ -3387,6 +3271,7 @@ static int compat_sock_ioctl_trans(struct file *file, struct socket *sock,
return sock->ops->gettstamp(sock, argp, cmd == SIOCGSTAMP_OLD,
!COMPAT_USE_64BIT_TIME);
+ case SIOCETHTOOL:
case SIOCBONDSLAVEINFOQUERY:
case SIOCBONDINFOQUERY:
case SIOCSHWTSTAMP:
@@ -3404,10 +3289,13 @@ static int compat_sock_ioctl_trans(struct file *file, struct socket *sock,
case SIOCGSKNS:
case SIOCGSTAMP_NEW:
case SIOCGSTAMPNS_NEW:
+ case SIOCGIFCONF:
return sock_ioctl(file, cmd, arg);
case SIOCGIFFLAGS:
case SIOCSIFFLAGS:
+ case SIOCGIFMAP:
+ case SIOCSIFMAP:
case SIOCGIFMETRIC:
case SIOCSIFMETRIC:
case SIOCGIFMTU:
@@ -3444,8 +3332,6 @@ static int compat_sock_ioctl_trans(struct file *file, struct socket *sock,
case SIOCBONDRELEASE:
case SIOCBONDSETHWADDR:
case SIOCBONDCHANGEACTIVE:
- return compat_ifreq_ioctl(net, sock, cmd, argp);
-
case SIOCSARP:
case SIOCGARP:
case SIOCDARP:
diff --git a/net/strparser/strparser.c b/net/strparser/strparser.c
index b3815c1e8f2e..9c0343568d2a 100644
--- a/net/strparser/strparser.c
+++ b/net/strparser/strparser.c
@@ -58,7 +58,7 @@ static void strp_abort_strp(struct strparser *strp, int err)
/* Report an error on the lower socket */
sk->sk_err = -err;
- sk->sk_error_report(sk);
+ sk_error_report(sk);
}
}
diff --git a/net/sunrpc/Makefile b/net/sunrpc/Makefile
index 9488600451e8..1c8de397d6ad 100644
--- a/net/sunrpc/Makefile
+++ b/net/sunrpc/Makefile
@@ -12,7 +12,7 @@ sunrpc-y := clnt.o xprt.o socklib.o xprtsock.o sched.o \
auth.o auth_null.o auth_unix.o \
svc.o svcsock.o svcauth.o svcauth_unix.o \
addr.o rpcb_clnt.o timer.o xdr.o \
- sunrpc_syms.o cache.o rpc_pipe.o \
+ sunrpc_syms.o cache.o rpc_pipe.o sysfs.o \
svc_xprt.o \
xprtmultipath.o
sunrpc-$(CONFIG_SUNRPC_DEBUG) += debugfs.o
diff --git a/net/sunrpc/auth_gss/gss_rpc_upcall.c b/net/sunrpc/auth_gss/gss_rpc_upcall.c
index d1c003a25b0f..61c276bddaf2 100644
--- a/net/sunrpc/auth_gss/gss_rpc_upcall.c
+++ b/net/sunrpc/auth_gss/gss_rpc_upcall.c
@@ -160,7 +160,7 @@ static struct rpc_clnt *get_gssp_clnt(struct sunrpc_net *sn)
mutex_lock(&sn->gssp_lock);
clnt = sn->gssp_clnt;
if (clnt)
- atomic_inc(&clnt->cl_count);
+ refcount_inc(&clnt->cl_count);
mutex_unlock(&sn->gssp_lock);
return clnt;
}
diff --git a/net/sunrpc/auth_gss/svcauth_gss.c b/net/sunrpc/auth_gss/svcauth_gss.c
index 6dff64374bfe..3e776e3dff91 100644
--- a/net/sunrpc/auth_gss/svcauth_gss.c
+++ b/net/sunrpc/auth_gss/svcauth_gss.c
@@ -194,6 +194,8 @@ static void rsi_request(struct cache_detail *cd,
qword_addhex(bpp, blen, rsii->in_handle.data, rsii->in_handle.len);
qword_addhex(bpp, blen, rsii->in_token.data, rsii->in_token.len);
(*bpp)[-1] = '\n';
+ WARN_ONCE(*blen < 0,
+ "RPCSEC/GSS credential too large - please use gssproxy\n");
}
static int rsi_parse(struct cache_detail *cd,
@@ -707,11 +709,11 @@ svc_safe_putnetobj(struct kvec *resv, struct xdr_netobj *o)
/*
* Verify the checksum on the header and return SVC_OK on success.
* Otherwise, return SVC_DROP (in the case of a bad sequence number)
- * or return SVC_DENIED and indicate error in authp.
+ * or return SVC_DENIED and indicate error in rqstp->rq_auth_stat.
*/
static int
gss_verify_header(struct svc_rqst *rqstp, struct rsc *rsci,
- __be32 *rpcstart, struct rpc_gss_wire_cred *gc, __be32 *authp)
+ __be32 *rpcstart, struct rpc_gss_wire_cred *gc)
{
struct gss_ctx *ctx_id = rsci->mechctx;
struct xdr_buf rpchdr;
@@ -725,7 +727,7 @@ gss_verify_header(struct svc_rqst *rqstp, struct rsc *rsci,
iov.iov_len = (u8 *)argv->iov_base - (u8 *)rpcstart;
xdr_buf_from_iov(&iov, &rpchdr);
- *authp = rpc_autherr_badverf;
+ rqstp->rq_auth_stat = rpc_autherr_badverf;
if (argv->iov_len < 4)
return SVC_DENIED;
flavor = svc_getnl(argv);
@@ -737,13 +739,13 @@ gss_verify_header(struct svc_rqst *rqstp, struct rsc *rsci,
if (rqstp->rq_deferred) /* skip verification of revisited request */
return SVC_OK;
if (gss_verify_mic(ctx_id, &rpchdr, &checksum) != GSS_S_COMPLETE) {
- *authp = rpcsec_gsserr_credproblem;
+ rqstp->rq_auth_stat = rpcsec_gsserr_credproblem;
return SVC_DENIED;
}
if (gc->gc_seq > MAXSEQ) {
trace_rpcgss_svc_seqno_large(rqstp, gc->gc_seq);
- *authp = rpcsec_gsserr_ctxproblem;
+ rqstp->rq_auth_stat = rpcsec_gsserr_ctxproblem;
return SVC_DENIED;
}
if (!gss_check_seq_num(rqstp, rsci, gc->gc_seq))
@@ -1038,6 +1040,8 @@ svcauth_gss_set_client(struct svc_rqst *rqstp)
struct rpc_gss_wire_cred *gc = &svcdata->clcred;
int stat;
+ rqstp->rq_auth_stat = rpc_autherr_badcred;
+
/*
* A gss export can be specified either by:
* export *(sec=krb5,rw)
@@ -1053,6 +1057,8 @@ svcauth_gss_set_client(struct svc_rqst *rqstp)
stat = svcauth_unix_set_client(rqstp);
if (stat == SVC_DROP || stat == SVC_CLOSE)
return stat;
+
+ rqstp->rq_auth_stat = rpc_auth_ok;
return SVC_OK;
}
@@ -1142,7 +1148,7 @@ static void gss_free_in_token_pages(struct gssp_in_token *in_token)
}
static int gss_read_proxy_verf(struct svc_rqst *rqstp,
- struct rpc_gss_wire_cred *gc, __be32 *authp,
+ struct rpc_gss_wire_cred *gc,
struct xdr_netobj *in_handle,
struct gssp_in_token *in_token)
{
@@ -1151,7 +1157,7 @@ static int gss_read_proxy_verf(struct svc_rqst *rqstp,
int pages, i, res, pgto, pgfrom;
size_t inlen, to_offs, from_offs;
- res = gss_read_common_verf(gc, argv, authp, in_handle);
+ res = gss_read_common_verf(gc, argv, &rqstp->rq_auth_stat, in_handle);
if (res)
return res;
@@ -1227,7 +1233,7 @@ gss_write_resv(struct kvec *resv, size_t size_limit,
* Otherwise, drop the request pending an answer to the upcall.
*/
static int svcauth_gss_legacy_init(struct svc_rqst *rqstp,
- struct rpc_gss_wire_cred *gc, __be32 *authp)
+ struct rpc_gss_wire_cred *gc)
{
struct kvec *argv = &rqstp->rq_arg.head[0];
struct kvec *resv = &rqstp->rq_res.head[0];
@@ -1236,7 +1242,7 @@ static int svcauth_gss_legacy_init(struct svc_rqst *rqstp,
struct sunrpc_net *sn = net_generic(SVC_NET(rqstp), sunrpc_net_id);
memset(&rsikey, 0, sizeof(rsikey));
- ret = gss_read_verf(gc, argv, authp,
+ ret = gss_read_verf(gc, argv, &rqstp->rq_auth_stat,
&rsikey.in_handle, &rsikey.in_token);
if (ret)
return ret;
@@ -1275,7 +1281,7 @@ static int gss_proxy_save_rsc(struct cache_detail *cd,
long long ctxh;
struct gss_api_mech *gm = NULL;
time64_t expiry;
- int status = -EINVAL;
+ int status;
memset(&rsci, 0, sizeof(rsci));
/* context handle */
@@ -1339,7 +1345,7 @@ out:
}
static int svcauth_gss_proxy_init(struct svc_rqst *rqstp,
- struct rpc_gss_wire_cred *gc, __be32 *authp)
+ struct rpc_gss_wire_cred *gc)
{
struct kvec *resv = &rqstp->rq_res.head[0];
struct xdr_netobj cli_handle;
@@ -1351,8 +1357,7 @@ static int svcauth_gss_proxy_init(struct svc_rqst *rqstp,
struct sunrpc_net *sn = net_generic(net, sunrpc_net_id);
memset(&ud, 0, sizeof(ud));
- ret = gss_read_proxy_verf(rqstp, gc, authp,
- &ud.in_handle, &ud.in_token);
+ ret = gss_read_proxy_verf(rqstp, gc, &ud.in_handle, &ud.in_token);
if (ret)
return ret;
@@ -1525,7 +1530,7 @@ static void destroy_use_gss_proxy_proc_entry(struct net *net) {}
* response here and return SVC_COMPLETE.
*/
static int
-svcauth_gss_accept(struct svc_rqst *rqstp, __be32 *authp)
+svcauth_gss_accept(struct svc_rqst *rqstp)
{
struct kvec *argv = &rqstp->rq_arg.head[0];
struct kvec *resv = &rqstp->rq_res.head[0];
@@ -1538,7 +1543,7 @@ svcauth_gss_accept(struct svc_rqst *rqstp, __be32 *authp)
int ret;
struct sunrpc_net *sn = net_generic(SVC_NET(rqstp), sunrpc_net_id);
- *authp = rpc_autherr_badcred;
+ rqstp->rq_auth_stat = rpc_autherr_badcred;
if (!svcdata)
svcdata = kmalloc(sizeof(*svcdata), GFP_KERNEL);
if (!svcdata)
@@ -1575,22 +1580,22 @@ svcauth_gss_accept(struct svc_rqst *rqstp, __be32 *authp)
if ((gc->gc_proc != RPC_GSS_PROC_DATA) && (rqstp->rq_proc != 0))
goto auth_err;
- *authp = rpc_autherr_badverf;
+ rqstp->rq_auth_stat = rpc_autherr_badverf;
switch (gc->gc_proc) {
case RPC_GSS_PROC_INIT:
case RPC_GSS_PROC_CONTINUE_INIT:
if (use_gss_proxy(SVC_NET(rqstp)))
- return svcauth_gss_proxy_init(rqstp, gc, authp);
+ return svcauth_gss_proxy_init(rqstp, gc);
else
- return svcauth_gss_legacy_init(rqstp, gc, authp);
+ return svcauth_gss_legacy_init(rqstp, gc);
case RPC_GSS_PROC_DATA:
case RPC_GSS_PROC_DESTROY:
/* Look up the context, and check the verifier: */
- *authp = rpcsec_gsserr_credproblem;
+ rqstp->rq_auth_stat = rpcsec_gsserr_credproblem;
rsci = gss_svc_searchbyctx(sn->rsc_cache, &gc->gc_ctx);
if (!rsci)
goto auth_err;
- switch (gss_verify_header(rqstp, rsci, rpcstart, gc, authp)) {
+ switch (gss_verify_header(rqstp, rsci, rpcstart, gc)) {
case SVC_OK:
break;
case SVC_DENIED:
@@ -1600,7 +1605,7 @@ svcauth_gss_accept(struct svc_rqst *rqstp, __be32 *authp)
}
break;
default:
- *authp = rpc_autherr_rejectedcred;
+ rqstp->rq_auth_stat = rpc_autherr_rejectedcred;
goto auth_err;
}
@@ -1616,13 +1621,13 @@ svcauth_gss_accept(struct svc_rqst *rqstp, __be32 *authp)
svc_putnl(resv, RPC_SUCCESS);
goto complete;
case RPC_GSS_PROC_DATA:
- *authp = rpcsec_gsserr_ctxproblem;
+ rqstp->rq_auth_stat = rpcsec_gsserr_ctxproblem;
svcdata->verf_start = resv->iov_base + resv->iov_len;
if (gss_write_verf(rqstp, rsci->mechctx, gc->gc_seq))
goto auth_err;
rqstp->rq_cred = rsci->cred;
get_group_info(rsci->cred.cr_group_info);
- *authp = rpc_autherr_badcred;
+ rqstp->rq_auth_stat = rpc_autherr_badcred;
switch (gc->gc_svc) {
case RPC_GSS_SVC_NONE:
break;
@@ -1980,7 +1985,7 @@ gss_svc_init_net(struct net *net)
goto out2;
return 0;
out2:
- destroy_use_gss_proxy_proc_entry(net);
+ rsi_cache_destroy_net(net);
out1:
rsc_cache_destroy_net(net);
return rv;
diff --git a/net/sunrpc/cache.c b/net/sunrpc/cache.c
index 1a2c1c44bb00..59641803472c 100644
--- a/net/sunrpc/cache.c
+++ b/net/sunrpc/cache.c
@@ -803,7 +803,7 @@ static int cache_request(struct cache_detail *detail,
detail->cache_request(detail, crq->item, &bp, &len);
if (len < 0)
- return -EAGAIN;
+ return -E2BIG;
return PAGE_SIZE - len;
}
diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c
index 42623d6b8f0e..f056ff931444 100644
--- a/net/sunrpc/clnt.c
+++ b/net/sunrpc/clnt.c
@@ -41,6 +41,7 @@
#include <trace/events/sunrpc.h>
#include "sunrpc.h"
+#include "sysfs.h"
#include "netns.h"
#if IS_ENABLED(CONFIG_SUNRPC_DEBUG)
@@ -166,7 +167,7 @@ static int rpc_clnt_skip_event(struct rpc_clnt *clnt, unsigned long event)
case RPC_PIPEFS_MOUNT:
if (clnt->cl_pipedir_objects.pdh_dentry != NULL)
return 1;
- if (atomic_read(&clnt->cl_count) == 0)
+ if (refcount_read(&clnt->cl_count) == 0)
return 1;
break;
case RPC_PIPEFS_UMOUNT:
@@ -327,6 +328,7 @@ err_auth:
out:
if (pipefs_sb)
rpc_put_sb_net(net);
+ rpc_sysfs_client_destroy(clnt);
rpc_clnt_debugfs_unregister(clnt);
return err;
}
@@ -410,24 +412,26 @@ static struct rpc_clnt * rpc_new_client(const struct rpc_create_args *args,
}
rpc_clnt_set_transport(clnt, xprt, timeout);
+ xprt->main = true;
xprt_iter_init(&clnt->cl_xpi, xps);
xprt_switch_put(xps);
clnt->cl_rtt = &clnt->cl_rtt_default;
rpc_init_rtt(&clnt->cl_rtt_default, clnt->cl_timeout->to_initval);
- atomic_set(&clnt->cl_count, 1);
+ refcount_set(&clnt->cl_count, 1);
if (nodename == NULL)
nodename = utsname()->nodename;
/* save the nodename */
rpc_clnt_set_nodename(clnt, nodename);
+ rpc_sysfs_client_setup(clnt, xps, rpc_net_ns(clnt));
err = rpc_client_register(clnt, args->authflavor, args->client_name);
if (err)
goto out_no_path;
if (parent)
- atomic_inc(&parent->cl_count);
+ refcount_inc(&parent->cl_count);
trace_rpc_clnt_new(clnt, xprt, program->name, args->servername);
return clnt;
@@ -733,6 +737,7 @@ int rpc_switch_client_transport(struct rpc_clnt *clnt,
rpc_unregister_client(clnt);
__rpc_clnt_remove_pipedir(clnt);
+ rpc_sysfs_client_destroy(clnt);
rpc_clnt_debugfs_unregister(clnt);
/*
@@ -879,6 +884,7 @@ static void rpc_free_client_work(struct work_struct *work)
* so they cannot be called in rpciod, so they are handled separately
* here.
*/
+ rpc_sysfs_client_destroy(clnt);
rpc_clnt_debugfs_unregister(clnt);
rpc_free_clid(clnt);
rpc_clnt_remove_pipedir(clnt);
@@ -912,18 +918,16 @@ rpc_free_client(struct rpc_clnt *clnt)
static struct rpc_clnt *
rpc_free_auth(struct rpc_clnt *clnt)
{
- if (clnt->cl_auth == NULL)
- return rpc_free_client(clnt);
-
/*
* Note: RPCSEC_GSS may need to send NULL RPC calls in order to
* release remaining GSS contexts. This mechanism ensures
* that it can do so safely.
*/
- atomic_inc(&clnt->cl_count);
- rpcauth_release(clnt->cl_auth);
- clnt->cl_auth = NULL;
- if (atomic_dec_and_test(&clnt->cl_count))
+ if (clnt->cl_auth != NULL) {
+ rpcauth_release(clnt->cl_auth);
+ clnt->cl_auth = NULL;
+ }
+ if (refcount_dec_and_test(&clnt->cl_count))
return rpc_free_client(clnt);
return NULL;
}
@@ -937,7 +941,7 @@ rpc_release_client(struct rpc_clnt *clnt)
do {
if (list_empty(&clnt->cl_tasks))
wake_up(&destroy_wait);
- if (!atomic_dec_and_test(&clnt->cl_count))
+ if (refcount_dec_not_one(&clnt->cl_count))
break;
clnt = rpc_free_auth(clnt);
} while (clnt != NULL);
@@ -1076,7 +1080,7 @@ void rpc_task_set_client(struct rpc_task *task, struct rpc_clnt *clnt)
if (clnt != NULL) {
rpc_task_set_transport(task, clnt);
task->tk_client = clnt;
- atomic_inc(&clnt->cl_count);
+ refcount_inc(&clnt->cl_count);
if (clnt->cl_softrtry)
task->tk_flags |= RPC_TASK_SOFT;
if (clnt->cl_softerr)
@@ -2100,6 +2104,30 @@ call_connect_status(struct rpc_task *task)
case -ENOTCONN:
case -EAGAIN:
case -ETIMEDOUT:
+ if (!(task->tk_flags & RPC_TASK_NO_ROUND_ROBIN) &&
+ (task->tk_flags & RPC_TASK_MOVEABLE) &&
+ test_bit(XPRT_REMOVE, &xprt->state)) {
+ struct rpc_xprt *saved = task->tk_xprt;
+ struct rpc_xprt_switch *xps;
+
+ rcu_read_lock();
+ xps = xprt_switch_get(rcu_dereference(clnt->cl_xpi.xpi_xpswitch));
+ rcu_read_unlock();
+ if (xps->xps_nxprts > 1) {
+ long value;
+
+ xprt_release(task);
+ value = atomic_long_dec_return(&xprt->queuelen);
+ if (value == 0)
+ rpc_xprt_switch_remove_xprt(xps, saved);
+ xprt_put(saved);
+ task->tk_xprt = NULL;
+ task->tk_action = call_start;
+ }
+ xprt_switch_put(xps);
+ if (!task->tk_xprt)
+ return;
+ }
goto out_retry;
case -ENOBUFS:
rpc_delay(task, HZ >> 2);
@@ -2664,17 +2692,18 @@ static const struct rpc_procinfo rpcproc_null = {
.p_decode = rpcproc_decode_null,
};
-static int rpc_ping(struct rpc_clnt *clnt)
+static void
+rpc_null_call_prepare(struct rpc_task *task, void *data)
{
- struct rpc_message msg = {
- .rpc_proc = &rpcproc_null,
- };
- int err;
- err = rpc_call_sync(clnt, &msg, RPC_TASK_SOFT | RPC_TASK_SOFTCONN |
- RPC_TASK_NULLCREDS);
- return err;
+ task->tk_flags &= ~RPC_TASK_NO_RETRANS_TIMEOUT;
+ rpc_call_start(task);
}
+static const struct rpc_call_ops rpc_null_ops = {
+ .rpc_call_prepare = rpc_null_call_prepare,
+ .rpc_call_done = rpc_default_callback,
+};
+
static
struct rpc_task *rpc_call_null_helper(struct rpc_clnt *clnt,
struct rpc_xprt *xprt, struct rpc_cred *cred, int flags,
@@ -2688,7 +2717,7 @@ struct rpc_task *rpc_call_null_helper(struct rpc_clnt *clnt,
.rpc_xprt = xprt,
.rpc_message = &msg,
.rpc_op_cred = cred,
- .callback_ops = (ops != NULL) ? ops : &rpc_default_ops,
+ .callback_ops = ops ?: &rpc_null_ops,
.callback_data = data,
.flags = flags | RPC_TASK_SOFT | RPC_TASK_SOFTCONN |
RPC_TASK_NULLCREDS,
@@ -2703,6 +2732,19 @@ struct rpc_task *rpc_call_null(struct rpc_clnt *clnt, struct rpc_cred *cred, int
}
EXPORT_SYMBOL_GPL(rpc_call_null);
+static int rpc_ping(struct rpc_clnt *clnt)
+{
+ struct rpc_task *task;
+ int status;
+
+ task = rpc_call_null_helper(clnt, NULL, NULL, 0, NULL, NULL);
+ if (IS_ERR(task))
+ return PTR_ERR(task);
+ status = task->tk_status;
+ rpc_put_task(task);
+ return status;
+}
+
struct rpc_cb_add_xprt_calldata {
struct rpc_xprt_switch *xps;
struct rpc_xprt *xprt;
@@ -2726,6 +2768,7 @@ static void rpc_cb_add_xprt_release(void *calldata)
}
static const struct rpc_call_ops rpc_cb_add_xprt_call_ops = {
+ .rpc_call_prepare = rpc_null_call_prepare,
.rpc_call_done = rpc_cb_add_xprt_done,
.rpc_release = rpc_cb_add_xprt_release,
};
@@ -2744,6 +2787,15 @@ int rpc_clnt_test_and_add_xprt(struct rpc_clnt *clnt,
struct rpc_cb_add_xprt_calldata *data;
struct rpc_task *task;
+ if (xps->xps_nunique_destaddr_xprts + 1 > clnt->cl_max_connect) {
+ rcu_read_lock();
+ pr_warn("SUNRPC: reached max allowed number (%d) did not add "
+ "transport to server: %s\n", clnt->cl_max_connect,
+ rpc_peeraddr2str(clnt, RPC_DISPLAY_ADDR));
+ rcu_read_unlock();
+ return -EINVAL;
+ }
+
data = kmalloc(sizeof(*data), GFP_NOFS);
if (!data)
return -ENOMEM;
@@ -2756,7 +2808,7 @@ int rpc_clnt_test_and_add_xprt(struct rpc_clnt *clnt,
task = rpc_call_null_helper(clnt, xprt, NULL, RPC_TASK_ASYNC,
&rpc_cb_add_xprt_call_ops, data);
-
+ data->xps->xps_nunique_destaddr_xprts++;
rpc_put_task(task);
success:
return 1;
diff --git a/net/sunrpc/debugfs.c b/net/sunrpc/debugfs.c
index 56029e3af6ff..7dc9cc929bfd 100644
--- a/net/sunrpc/debugfs.c
+++ b/net/sunrpc/debugfs.c
@@ -8,14 +8,14 @@
#include <linux/debugfs.h>
#include <linux/sunrpc/sched.h>
#include <linux/sunrpc/clnt.h>
+
#include "netns.h"
+#include "fail.h"
static struct dentry *topdir;
static struct dentry *rpc_clnt_dir;
static struct dentry *rpc_xprt_dir;
-unsigned int rpc_inject_disconnect;
-
static int
tasks_show(struct seq_file *f, void *v)
{
@@ -90,7 +90,7 @@ static int tasks_open(struct inode *inode, struct file *filp)
struct seq_file *seq = filp->private_data;
struct rpc_clnt *clnt = seq->private = inode->i_private;
- if (!atomic_inc_not_zero(&clnt->cl_count)) {
+ if (!refcount_inc_not_zero(&clnt->cl_count)) {
seq_release(inode, filp);
ret = -EINVAL;
}
@@ -235,8 +235,6 @@ rpc_xprt_debugfs_register(struct rpc_xprt *xprt)
/* make tasks file */
debugfs_create_file("info", S_IFREG | 0400, xprt->debugfs, xprt,
&xprt_info_fops);
-
- atomic_set(&xprt->inject_disconnect, rpc_inject_disconnect);
}
void
@@ -246,56 +244,30 @@ rpc_xprt_debugfs_unregister(struct rpc_xprt *xprt)
xprt->debugfs = NULL;
}
-static int
-fault_open(struct inode *inode, struct file *filp)
-{
- filp->private_data = kmalloc(128, GFP_KERNEL);
- if (!filp->private_data)
- return -ENOMEM;
- return 0;
-}
+#if IS_ENABLED(CONFIG_FAIL_SUNRPC)
+struct fail_sunrpc_attr fail_sunrpc = {
+ .attr = FAULT_ATTR_INITIALIZER,
+};
+EXPORT_SYMBOL_GPL(fail_sunrpc);
-static int
-fault_release(struct inode *inode, struct file *filp)
+static void fail_sunrpc_init(void)
{
- kfree(filp->private_data);
- return 0;
-}
+ struct dentry *dir;
-static ssize_t
-fault_disconnect_read(struct file *filp, char __user *user_buf,
- size_t len, loff_t *offset)
-{
- char *buffer = (char *)filp->private_data;
- size_t size;
+ dir = fault_create_debugfs_attr("fail_sunrpc", NULL,
+ &fail_sunrpc.attr);
- size = sprintf(buffer, "%u\n", rpc_inject_disconnect);
- return simple_read_from_buffer(user_buf, len, offset, buffer, size);
-}
+ debugfs_create_bool("ignore-client-disconnect", S_IFREG | 0600, dir,
+ &fail_sunrpc.ignore_client_disconnect);
-static ssize_t
-fault_disconnect_write(struct file *filp, const char __user *user_buf,
- size_t len, loff_t *offset)
+ debugfs_create_bool("ignore-server-disconnect", S_IFREG | 0600, dir,
+ &fail_sunrpc.ignore_server_disconnect);
+}
+#else
+static void fail_sunrpc_init(void)
{
- char buffer[16];
-
- if (len >= sizeof(buffer))
- len = sizeof(buffer) - 1;
- if (copy_from_user(buffer, user_buf, len))
- return -EFAULT;
- buffer[len] = '\0';
- if (kstrtouint(buffer, 10, &rpc_inject_disconnect))
- return -EINVAL;
- return len;
}
-
-static const struct file_operations fault_disconnect_fops = {
- .owner = THIS_MODULE,
- .open = fault_open,
- .read = fault_disconnect_read,
- .write = fault_disconnect_write,
- .release = fault_release,
-};
+#endif
void __exit
sunrpc_debugfs_exit(void)
@@ -309,16 +281,11 @@ sunrpc_debugfs_exit(void)
void __init
sunrpc_debugfs_init(void)
{
- struct dentry *rpc_fault_dir;
-
topdir = debugfs_create_dir("sunrpc", NULL);
rpc_clnt_dir = debugfs_create_dir("rpc_clnt", topdir);
rpc_xprt_dir = debugfs_create_dir("rpc_xprt", topdir);
- rpc_fault_dir = debugfs_create_dir("inject_fault", topdir);
-
- debugfs_create_file("disconnect", S_IFREG | 0400, rpc_fault_dir, NULL,
- &fault_disconnect_fops);
+ fail_sunrpc_init();
}
diff --git a/net/sunrpc/fail.h b/net/sunrpc/fail.h
new file mode 100644
index 000000000000..69dc30cc44b8
--- /dev/null
+++ b/net/sunrpc/fail.h
@@ -0,0 +1,25 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (C) 2021, Oracle. All rights reserved.
+ */
+
+#ifndef _NET_SUNRPC_FAIL_H_
+#define _NET_SUNRPC_FAIL_H_
+
+#include <linux/fault-inject.h>
+
+#if IS_ENABLED(CONFIG_FAULT_INJECTION)
+
+struct fail_sunrpc_attr {
+ struct fault_attr attr;
+
+ bool ignore_client_disconnect;
+
+ bool ignore_server_disconnect;
+};
+
+extern struct fail_sunrpc_attr fail_sunrpc;
+
+#endif /* CONFIG_FAULT_INJECTION */
+
+#endif /* _NET_SUNRPC_FAIL_H_ */
diff --git a/net/sunrpc/rpc_pipe.c b/net/sunrpc/rpc_pipe.c
index 09c000d490a1..ee5336d73fdd 100644
--- a/net/sunrpc/rpc_pipe.c
+++ b/net/sunrpc/rpc_pipe.c
@@ -423,7 +423,7 @@ rpc_info_open(struct inode *inode, struct file *file)
spin_lock(&file->f_path.dentry->d_lock);
if (!d_unhashed(file->f_path.dentry))
clnt = RPC_I(inode)->private;
- if (clnt != NULL && atomic_inc_not_zero(&clnt->cl_count)) {
+ if (clnt != NULL && refcount_inc_not_zero(&clnt->cl_count)) {
spin_unlock(&file->f_path.dentry->d_lock);
m->private = clnt;
} else {
diff --git a/net/sunrpc/sched.c b/net/sunrpc/sched.c
index 39ed0e0afe6d..c045f63d11fa 100644
--- a/net/sunrpc/sched.c
+++ b/net/sunrpc/sched.c
@@ -592,10 +592,20 @@ static struct rpc_task *__rpc_find_next_queued_priority(struct rpc_wait_queue *q
struct rpc_task *task;
/*
+ * Service the privileged queue.
+ */
+ q = &queue->tasks[RPC_NR_PRIORITY - 1];
+ if (queue->maxpriority > RPC_PRIORITY_PRIVILEGED && !list_empty(q)) {
+ task = list_first_entry(q, struct rpc_task, u.tk_wait.list);
+ goto out;
+ }
+
+ /*
* Service a batch of tasks from a single owner.
*/
q = &queue->tasks[queue->priority];
- if (!list_empty(q) && --queue->nr) {
+ if (!list_empty(q) && queue->nr) {
+ queue->nr--;
task = list_first_entry(q, struct rpc_task, u.tk_wait.list);
goto out;
}
diff --git a/net/sunrpc/sunrpc_syms.c b/net/sunrpc/sunrpc_syms.c
index 236fadc4a439..691c0000e9ea 100644
--- a/net/sunrpc/sunrpc_syms.c
+++ b/net/sunrpc/sunrpc_syms.c
@@ -24,6 +24,7 @@
#include <linux/sunrpc/xprtsock.h>
#include "sunrpc.h"
+#include "sysfs.h"
#include "netns.h"
unsigned int sunrpc_net_id;
@@ -103,6 +104,10 @@ init_sunrpc(void)
if (err)
goto out4;
+ err = rpc_sysfs_init();
+ if (err)
+ goto out5;
+
sunrpc_debugfs_init();
#if IS_ENABLED(CONFIG_SUNRPC_DEBUG)
rpc_register_sysctl();
@@ -111,6 +116,8 @@ init_sunrpc(void)
init_socket_xprt(); /* clnt sock transport */
return 0;
+out5:
+ unregister_rpc_pipefs();
out4:
unregister_pernet_subsys(&sunrpc_net_ops);
out3:
@@ -124,7 +131,10 @@ out:
static void __exit
cleanup_sunrpc(void)
{
+ rpc_sysfs_exit();
rpc_cleanup_clids();
+ xprt_cleanup_ids();
+ xprt_multipath_cleanup_ids();
rpcauth_remove_module();
cleanup_socket_xprt();
svc_cleanup_xprt_sock();
diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c
index 0de918cb3d90..a3bbe5ce4570 100644
--- a/net/sunrpc/svc.c
+++ b/net/sunrpc/svc.c
@@ -31,6 +31,8 @@
#include <trace/events/sunrpc.h>
+#include "fail.h"
+
#define RPCDBG_FACILITY RPCDBG_SVCDSP
static void svc_unregister(const struct svc_serv *serv, struct net *net);
@@ -838,6 +840,27 @@ svc_set_num_threads_sync(struct svc_serv *serv, struct svc_pool *pool, int nrser
}
EXPORT_SYMBOL_GPL(svc_set_num_threads_sync);
+/**
+ * svc_rqst_replace_page - Replace one page in rq_pages[]
+ * @rqstp: svc_rqst with pages to replace
+ * @page: replacement page
+ *
+ * When replacing a page in rq_pages, batch the release of the
+ * replaced pages to avoid hammering the page allocator.
+ */
+void svc_rqst_replace_page(struct svc_rqst *rqstp, struct page *page)
+{
+ if (*rqstp->rq_next_page) {
+ if (!pagevec_space(&rqstp->rq_pvec))
+ __pagevec_release(&rqstp->rq_pvec);
+ pagevec_add(&rqstp->rq_pvec, *rqstp->rq_next_page);
+ }
+
+ get_page(page);
+ *(rqstp->rq_next_page++) = page;
+}
+EXPORT_SYMBOL_GPL(svc_rqst_replace_page);
+
/*
* Called from a server thread as it's exiting. Caller must hold the "service
* mutex" for the service.
@@ -1163,22 +1186,6 @@ void svc_printk(struct svc_rqst *rqstp, const char *fmt, ...)
static __printf(2,3) void svc_printk(struct svc_rqst *rqstp, const char *fmt, ...) {}
#endif
-__be32
-svc_return_autherr(struct svc_rqst *rqstp, __be32 auth_err)
-{
- set_bit(RQ_AUTHERR, &rqstp->rq_flags);
- return auth_err;
-}
-EXPORT_SYMBOL_GPL(svc_return_autherr);
-
-static __be32
-svc_get_autherr(struct svc_rqst *rqstp, __be32 *statp)
-{
- if (test_and_clear_bit(RQ_AUTHERR, &rqstp->rq_flags))
- return *statp;
- return rpc_auth_ok;
-}
-
static int
svc_generic_dispatch(struct svc_rqst *rqstp, __be32 *statp)
{
@@ -1202,7 +1209,7 @@ svc_generic_dispatch(struct svc_rqst *rqstp, __be32 *statp)
test_bit(RQ_DROPME, &rqstp->rq_flags))
return 0;
- if (test_bit(RQ_AUTHERR, &rqstp->rq_flags))
+ if (rqstp->rq_auth_stat != rpc_auth_ok)
return 1;
if (*statp != rpc_success)
@@ -1283,7 +1290,7 @@ svc_process_common(struct svc_rqst *rqstp, struct kvec *argv, struct kvec *resv)
struct svc_process_info process;
__be32 *statp;
u32 prog, vers;
- __be32 auth_stat, rpc_stat;
+ __be32 rpc_stat;
int auth_res;
__be32 *reply_statp;
@@ -1326,14 +1333,12 @@ svc_process_common(struct svc_rqst *rqstp, struct kvec *argv, struct kvec *resv)
* We do this before anything else in order to get a decent
* auth verifier.
*/
- auth_res = svc_authenticate(rqstp, &auth_stat);
+ auth_res = svc_authenticate(rqstp);
/* Also give the program a chance to reject this call: */
- if (auth_res == SVC_OK && progp) {
- auth_stat = rpc_autherr_badcred;
+ if (auth_res == SVC_OK && progp)
auth_res = progp->pg_authenticate(rqstp);
- }
if (auth_res != SVC_OK)
- trace_svc_authenticate(rqstp, auth_res, auth_stat);
+ trace_svc_authenticate(rqstp, auth_res);
switch (auth_res) {
case SVC_OK:
break;
@@ -1392,15 +1397,15 @@ svc_process_common(struct svc_rqst *rqstp, struct kvec *argv, struct kvec *resv)
goto release_dropit;
if (*statp == rpc_garbage_args)
goto err_garbage;
- auth_stat = svc_get_autherr(rqstp, statp);
- if (auth_stat != rpc_auth_ok)
- goto err_release_bad_auth;
} else {
dprintk("svc: calling dispatcher\n");
if (!process.dispatch(rqstp, statp))
goto release_dropit; /* Release reply info */
}
+ if (rqstp->rq_auth_stat != rpc_auth_ok)
+ goto err_release_bad_auth;
+
/* Check RPC status result */
if (*statp != rpc_success)
resv->iov_len = ((void*)statp) - resv->iov_base + 4;
@@ -1450,13 +1455,14 @@ err_release_bad_auth:
if (procp->pc_release)
procp->pc_release(rqstp);
err_bad_auth:
- dprintk("svc: authentication failed (%d)\n", ntohl(auth_stat));
+ dprintk("svc: authentication failed (%d)\n",
+ be32_to_cpu(rqstp->rq_auth_stat));
serv->sv_stats->rpcbadauth++;
/* Restore write pointer to location of accept status: */
xdr_ressize_check(rqstp, reply_statp);
svc_putnl(resv, 1); /* REJECT */
svc_putnl(resv, 1); /* AUTH_ERROR */
- svc_putnl(resv, ntohl(auth_stat)); /* status */
+ svc_putu32(resv, rqstp->rq_auth_stat); /* status */
goto sendit;
err_bad_prog:
@@ -1503,6 +1509,12 @@ svc_process(struct svc_rqst *rqstp)
struct svc_serv *serv = rqstp->rq_server;
u32 dir;
+#if IS_ENABLED(CONFIG_FAIL_SUNRPC)
+ if (!fail_sunrpc.ignore_server_disconnect &&
+ should_fail(&fail_sunrpc.attr, 1))
+ svc_xprt_deferred_close(rqstp->rq_xprt);
+#endif
+
/*
* Setup response xdr_buf.
* Initially it has just one page
@@ -1630,6 +1642,21 @@ u32 svc_max_payload(const struct svc_rqst *rqstp)
EXPORT_SYMBOL_GPL(svc_max_payload);
/**
+ * svc_proc_name - Return RPC procedure name in string form
+ * @rqstp: svc_rqst to operate on
+ *
+ * Return value:
+ * Pointer to a NUL-terminated string
+ */
+const char *svc_proc_name(const struct svc_rqst *rqstp)
+{
+ if (rqstp && rqstp->rq_procinfo)
+ return rqstp->rq_procinfo->pc_name;
+ return "unknown";
+}
+
+
+/**
* svc_encode_result_payload - mark a range of bytes as a result payload
* @rqstp: svc_rqst to operate on
* @offset: payload's byte offset in rqstp->rq_res
diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c
index d66a8e44a1ae..6316bd2b8f37 100644
--- a/net/sunrpc/svc_xprt.c
+++ b/net/sunrpc/svc_xprt.c
@@ -539,6 +539,7 @@ static void svc_xprt_release(struct svc_rqst *rqstp)
kfree(rqstp->rq_deferred);
rqstp->rq_deferred = NULL;
+ pagevec_release(&rqstp->rq_pvec);
svc_free_res_pages(rqstp);
rqstp->rq_res.page_len = 0;
rqstp->rq_res.page_base = 0;
@@ -662,7 +663,9 @@ static int svc_alloc_arg(struct svc_rqst *rqstp)
{
struct svc_serv *serv = rqstp->rq_server;
struct xdr_buf *arg = &rqstp->rq_arg;
- unsigned long pages, filled;
+ unsigned long pages, filled, ret;
+
+ pagevec_init(&rqstp->rq_pvec);
pages = (serv->sv_max_mesg + 2 * PAGE_SIZE) >> PAGE_SHIFT;
if (pages > RPCSVC_MAXPAGES) {
@@ -672,11 +675,12 @@ static int svc_alloc_arg(struct svc_rqst *rqstp)
pages = RPCSVC_MAXPAGES;
}
- for (;;) {
- filled = alloc_pages_bulk_array(GFP_KERNEL, pages,
- rqstp->rq_pages);
- if (filled == pages)
- break;
+ for (filled = 0; filled < pages; filled = ret) {
+ ret = alloc_pages_bulk_array(GFP_KERNEL, pages,
+ rqstp->rq_pages);
+ if (ret > filled)
+ /* Made progress, don't sleep yet */
+ continue;
set_current_state(TASK_INTERRUPTIBLE);
if (signalled() || kthread_should_stop()) {
@@ -835,7 +839,8 @@ static int svc_handle_xprt(struct svc_rqst *rqstp, struct svc_xprt *xprt)
rqstp->rq_stime = ktime_get();
rqstp->rq_reserved = serv->sv_max_mesg;
atomic_add(rqstp->rq_reserved, &xprt->xpt_reserved);
- }
+ } else
+ svc_xprt_received(xprt);
out:
trace_svc_handle_xprt(xprt, len);
return len;
diff --git a/net/sunrpc/svcauth.c b/net/sunrpc/svcauth.c
index 998b196b6176..5a8b8e03fdd4 100644
--- a/net/sunrpc/svcauth.c
+++ b/net/sunrpc/svcauth.c
@@ -59,12 +59,12 @@ svc_put_auth_ops(struct auth_ops *aops)
}
int
-svc_authenticate(struct svc_rqst *rqstp, __be32 *authp)
+svc_authenticate(struct svc_rqst *rqstp)
{
rpc_authflavor_t flavor;
struct auth_ops *aops;
- *authp = rpc_auth_ok;
+ rqstp->rq_auth_stat = rpc_auth_ok;
flavor = svc_getnl(&rqstp->rq_arg.head[0]);
@@ -72,7 +72,7 @@ svc_authenticate(struct svc_rqst *rqstp, __be32 *authp)
aops = svc_get_auth_ops(flavor);
if (aops == NULL) {
- *authp = rpc_autherr_badcred;
+ rqstp->rq_auth_stat = rpc_autherr_badcred;
return SVC_DENIED;
}
@@ -80,7 +80,7 @@ svc_authenticate(struct svc_rqst *rqstp, __be32 *authp)
init_svc_cred(&rqstp->rq_cred);
rqstp->rq_authop = aops;
- return aops->accept(rqstp, authp);
+ return aops->accept(rqstp);
}
EXPORT_SYMBOL_GPL(svc_authenticate);
diff --git a/net/sunrpc/svcauth_unix.c b/net/sunrpc/svcauth_unix.c
index 35b7966ac3b3..d7ed7d49115a 100644
--- a/net/sunrpc/svcauth_unix.c
+++ b/net/sunrpc/svcauth_unix.c
@@ -681,8 +681,9 @@ svcauth_unix_set_client(struct svc_rqst *rqstp)
rqstp->rq_client = NULL;
if (rqstp->rq_proc == 0)
- return SVC_OK;
+ goto out;
+ rqstp->rq_auth_stat = rpc_autherr_badcred;
ipm = ip_map_cached_get(xprt);
if (ipm == NULL)
ipm = __ip_map_lookup(sn->ip_map_cache, rqstp->rq_server->sv_program->pg_class,
@@ -719,13 +720,16 @@ svcauth_unix_set_client(struct svc_rqst *rqstp)
put_group_info(cred->cr_group_info);
cred->cr_group_info = gi;
}
+
+out:
+ rqstp->rq_auth_stat = rpc_auth_ok;
return SVC_OK;
}
EXPORT_SYMBOL_GPL(svcauth_unix_set_client);
static int
-svcauth_null_accept(struct svc_rqst *rqstp, __be32 *authp)
+svcauth_null_accept(struct svc_rqst *rqstp)
{
struct kvec *argv = &rqstp->rq_arg.head[0];
struct kvec *resv = &rqstp->rq_res.head[0];
@@ -736,12 +740,12 @@ svcauth_null_accept(struct svc_rqst *rqstp, __be32 *authp)
if (svc_getu32(argv) != 0) {
dprintk("svc: bad null cred\n");
- *authp = rpc_autherr_badcred;
+ rqstp->rq_auth_stat = rpc_autherr_badcred;
return SVC_DENIED;
}
if (svc_getu32(argv) != htonl(RPC_AUTH_NULL) || svc_getu32(argv) != 0) {
dprintk("svc: bad null verf\n");
- *authp = rpc_autherr_badverf;
+ rqstp->rq_auth_stat = rpc_autherr_badverf;
return SVC_DENIED;
}
@@ -785,7 +789,7 @@ struct auth_ops svcauth_null = {
static int
-svcauth_unix_accept(struct svc_rqst *rqstp, __be32 *authp)
+svcauth_unix_accept(struct svc_rqst *rqstp)
{
struct kvec *argv = &rqstp->rq_arg.head[0];
struct kvec *resv = &rqstp->rq_res.head[0];
@@ -827,7 +831,7 @@ svcauth_unix_accept(struct svc_rqst *rqstp, __be32 *authp)
}
groups_sort(cred->cr_group_info);
if (svc_getu32(argv) != htonl(RPC_AUTH_NULL) || svc_getu32(argv) != 0) {
- *authp = rpc_autherr_badverf;
+ rqstp->rq_auth_stat = rpc_autherr_badverf;
return SVC_DENIED;
}
@@ -839,7 +843,7 @@ svcauth_unix_accept(struct svc_rqst *rqstp, __be32 *authp)
return SVC_OK;
badcred:
- *authp = rpc_autherr_badcred;
+ rqstp->rq_auth_stat = rpc_autherr_badcred;
return SVC_DENIED;
}
diff --git a/net/sunrpc/sysfs.c b/net/sunrpc/sysfs.c
new file mode 100644
index 000000000000..9a6f17e18f73
--- /dev/null
+++ b/net/sunrpc/sysfs.c
@@ -0,0 +1,618 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (c) 2020 Anna Schumaker <Anna.Schumaker@Netapp.com>
+ */
+#include <linux/sunrpc/clnt.h>
+#include <linux/kobject.h>
+#include <linux/sunrpc/addr.h>
+#include <linux/sunrpc/xprtsock.h>
+
+#include "sysfs.h"
+
+struct xprt_addr {
+ const char *addr;
+ struct rcu_head rcu;
+};
+
+static void free_xprt_addr(struct rcu_head *head)
+{
+ struct xprt_addr *addr = container_of(head, struct xprt_addr, rcu);
+
+ kfree(addr->addr);
+ kfree(addr);
+}
+
+static struct kset *rpc_sunrpc_kset;
+static struct kobject *rpc_sunrpc_client_kobj, *rpc_sunrpc_xprt_switch_kobj;
+
+static void rpc_sysfs_object_release(struct kobject *kobj)
+{
+ kfree(kobj);
+}
+
+static const struct kobj_ns_type_operations *
+rpc_sysfs_object_child_ns_type(struct kobject *kobj)
+{
+ return &net_ns_type_operations;
+}
+
+static struct kobj_type rpc_sysfs_object_type = {
+ .release = rpc_sysfs_object_release,
+ .sysfs_ops = &kobj_sysfs_ops,
+ .child_ns_type = rpc_sysfs_object_child_ns_type,
+};
+
+static struct kobject *rpc_sysfs_object_alloc(const char *name,
+ struct kset *kset,
+ struct kobject *parent)
+{
+ struct kobject *kobj;
+
+ kobj = kzalloc(sizeof(*kobj), GFP_KERNEL);
+ if (kobj) {
+ kobj->kset = kset;
+ if (kobject_init_and_add(kobj, &rpc_sysfs_object_type,
+ parent, "%s", name) == 0)
+ return kobj;
+ kobject_put(kobj);
+ }
+ return NULL;
+}
+
+static inline struct rpc_xprt *
+rpc_sysfs_xprt_kobj_get_xprt(struct kobject *kobj)
+{
+ struct rpc_sysfs_xprt *x = container_of(kobj,
+ struct rpc_sysfs_xprt, kobject);
+
+ return xprt_get(x->xprt);
+}
+
+static inline struct rpc_xprt_switch *
+rpc_sysfs_xprt_kobj_get_xprt_switch(struct kobject *kobj)
+{
+ struct rpc_sysfs_xprt *x = container_of(kobj,
+ struct rpc_sysfs_xprt, kobject);
+
+ return xprt_switch_get(x->xprt_switch);
+}
+
+static inline struct rpc_xprt_switch *
+rpc_sysfs_xprt_switch_kobj_get_xprt(struct kobject *kobj)
+{
+ struct rpc_sysfs_xprt_switch *x = container_of(kobj,
+ struct rpc_sysfs_xprt_switch, kobject);
+
+ return xprt_switch_get(x->xprt_switch);
+}
+
+static ssize_t rpc_sysfs_xprt_dstaddr_show(struct kobject *kobj,
+ struct kobj_attribute *attr,
+ char *buf)
+{
+ struct rpc_xprt *xprt = rpc_sysfs_xprt_kobj_get_xprt(kobj);
+ ssize_t ret;
+
+ if (!xprt)
+ return 0;
+ ret = sprintf(buf, "%s\n", xprt->address_strings[RPC_DISPLAY_ADDR]);
+ xprt_put(xprt);
+ return ret + 1;
+}
+
+static ssize_t rpc_sysfs_xprt_srcaddr_show(struct kobject *kobj,
+ struct kobj_attribute *attr,
+ char *buf)
+{
+ struct rpc_xprt *xprt = rpc_sysfs_xprt_kobj_get_xprt(kobj);
+ struct sockaddr_storage saddr;
+ struct sock_xprt *sock;
+ ssize_t ret = -1;
+
+ if (!xprt)
+ return 0;
+
+ sock = container_of(xprt, struct sock_xprt, xprt);
+ if (kernel_getsockname(sock->sock, (struct sockaddr *)&saddr) < 0)
+ goto out;
+
+ ret = sprintf(buf, "%pISc\n", &saddr);
+out:
+ xprt_put(xprt);
+ return ret + 1;
+}
+
+static ssize_t rpc_sysfs_xprt_info_show(struct kobject *kobj,
+ struct kobj_attribute *attr,
+ char *buf)
+{
+ struct rpc_xprt *xprt = rpc_sysfs_xprt_kobj_get_xprt(kobj);
+ ssize_t ret;
+
+ if (!xprt)
+ return 0;
+
+ ret = sprintf(buf, "last_used=%lu\ncur_cong=%lu\ncong_win=%lu\n"
+ "max_num_slots=%u\nmin_num_slots=%u\nnum_reqs=%u\n"
+ "binding_q_len=%u\nsending_q_len=%u\npending_q_len=%u\n"
+ "backlog_q_len=%u\nmain_xprt=%d\nsrc_port=%u\n"
+ "tasks_queuelen=%ld\ndst_port=%s\n",
+ xprt->last_used, xprt->cong, xprt->cwnd, xprt->max_reqs,
+ xprt->min_reqs, xprt->num_reqs, xprt->binding.qlen,
+ xprt->sending.qlen, xprt->pending.qlen,
+ xprt->backlog.qlen, xprt->main,
+ (xprt->xprt_class->ident == XPRT_TRANSPORT_TCP) ?
+ get_srcport(xprt) : 0,
+ atomic_long_read(&xprt->queuelen),
+ (xprt->xprt_class->ident == XPRT_TRANSPORT_TCP) ?
+ xprt->address_strings[RPC_DISPLAY_PORT] : "0");
+ xprt_put(xprt);
+ return ret + 1;
+}
+
+static ssize_t rpc_sysfs_xprt_state_show(struct kobject *kobj,
+ struct kobj_attribute *attr,
+ char *buf)
+{
+ struct rpc_xprt *xprt = rpc_sysfs_xprt_kobj_get_xprt(kobj);
+ ssize_t ret;
+ int locked, connected, connecting, close_wait, bound, binding,
+ closing, congested, cwnd_wait, write_space, offline, remove;
+
+ if (!xprt)
+ return 0;
+
+ if (!xprt->state) {
+ ret = sprintf(buf, "state=CLOSED\n");
+ } else {
+ locked = test_bit(XPRT_LOCKED, &xprt->state);
+ connected = test_bit(XPRT_CONNECTED, &xprt->state);
+ connecting = test_bit(XPRT_CONNECTING, &xprt->state);
+ close_wait = test_bit(XPRT_CLOSE_WAIT, &xprt->state);
+ bound = test_bit(XPRT_BOUND, &xprt->state);
+ binding = test_bit(XPRT_BINDING, &xprt->state);
+ closing = test_bit(XPRT_CLOSING, &xprt->state);
+ congested = test_bit(XPRT_CONGESTED, &xprt->state);
+ cwnd_wait = test_bit(XPRT_CWND_WAIT, &xprt->state);
+ write_space = test_bit(XPRT_WRITE_SPACE, &xprt->state);
+ offline = test_bit(XPRT_OFFLINE, &xprt->state);
+ remove = test_bit(XPRT_REMOVE, &xprt->state);
+
+ ret = sprintf(buf, "state=%s %s %s %s %s %s %s %s %s %s %s %s\n",
+ locked ? "LOCKED" : "",
+ connected ? "CONNECTED" : "",
+ connecting ? "CONNECTING" : "",
+ close_wait ? "CLOSE_WAIT" : "",
+ bound ? "BOUND" : "",
+ binding ? "BOUNDING" : "",
+ closing ? "CLOSING" : "",
+ congested ? "CONGESTED" : "",
+ cwnd_wait ? "CWND_WAIT" : "",
+ write_space ? "WRITE_SPACE" : "",
+ offline ? "OFFLINE" : "",
+ remove ? "REMOVE" : "");
+ }
+
+ xprt_put(xprt);
+ return ret + 1;
+}
+
+static ssize_t rpc_sysfs_xprt_switch_info_show(struct kobject *kobj,
+ struct kobj_attribute *attr,
+ char *buf)
+{
+ struct rpc_xprt_switch *xprt_switch =
+ rpc_sysfs_xprt_switch_kobj_get_xprt(kobj);
+ ssize_t ret;
+
+ if (!xprt_switch)
+ return 0;
+ ret = sprintf(buf, "num_xprts=%u\nnum_active=%u\n"
+ "num_unique_destaddr=%u\nqueue_len=%ld\n",
+ xprt_switch->xps_nxprts, xprt_switch->xps_nactive,
+ xprt_switch->xps_nunique_destaddr_xprts,
+ atomic_long_read(&xprt_switch->xps_queuelen));
+ xprt_switch_put(xprt_switch);
+ return ret + 1;
+}
+
+static ssize_t rpc_sysfs_xprt_dstaddr_store(struct kobject *kobj,
+ struct kobj_attribute *attr,
+ const char *buf, size_t count)
+{
+ struct rpc_xprt *xprt = rpc_sysfs_xprt_kobj_get_xprt(kobj);
+ struct sockaddr *saddr;
+ char *dst_addr;
+ int port;
+ struct xprt_addr *saved_addr;
+ size_t buf_len;
+
+ if (!xprt)
+ return 0;
+ if (!(xprt->xprt_class->ident == XPRT_TRANSPORT_TCP ||
+ xprt->xprt_class->ident == XPRT_TRANSPORT_RDMA)) {
+ xprt_put(xprt);
+ return -EOPNOTSUPP;
+ }
+
+ if (wait_on_bit_lock(&xprt->state, XPRT_LOCKED, TASK_KILLABLE)) {
+ count = -EINTR;
+ goto out_put;
+ }
+ saddr = (struct sockaddr *)&xprt->addr;
+ port = rpc_get_port(saddr);
+
+ /* buf_len is the len until the first occurence of either
+ * '\n' or '\0'
+ */
+ buf_len = strcspn(buf, "\n");
+
+ dst_addr = kstrndup(buf, buf_len, GFP_KERNEL);
+ if (!dst_addr)
+ goto out_err;
+ saved_addr = kzalloc(sizeof(*saved_addr), GFP_KERNEL);
+ if (!saved_addr)
+ goto out_err_free;
+ saved_addr->addr =
+ rcu_dereference_raw(xprt->address_strings[RPC_DISPLAY_ADDR]);
+ rcu_assign_pointer(xprt->address_strings[RPC_DISPLAY_ADDR], dst_addr);
+ call_rcu(&saved_addr->rcu, free_xprt_addr);
+ xprt->addrlen = rpc_pton(xprt->xprt_net, buf, buf_len, saddr,
+ sizeof(*saddr));
+ rpc_set_port(saddr, port);
+
+ xprt_force_disconnect(xprt);
+out:
+ xprt_release_write(xprt, NULL);
+out_put:
+ xprt_put(xprt);
+ return count;
+out_err_free:
+ kfree(dst_addr);
+out_err:
+ count = -ENOMEM;
+ goto out;
+}
+
+static ssize_t rpc_sysfs_xprt_state_change(struct kobject *kobj,
+ struct kobj_attribute *attr,
+ const char *buf, size_t count)
+{
+ struct rpc_xprt *xprt = rpc_sysfs_xprt_kobj_get_xprt(kobj);
+ int offline = 0, online = 0, remove = 0;
+ struct rpc_xprt_switch *xps = rpc_sysfs_xprt_kobj_get_xprt_switch(kobj);
+
+ if (!xprt)
+ return 0;
+
+ if (!strncmp(buf, "offline", 7))
+ offline = 1;
+ else if (!strncmp(buf, "online", 6))
+ online = 1;
+ else if (!strncmp(buf, "remove", 6))
+ remove = 1;
+ else
+ return -EINVAL;
+
+ if (wait_on_bit_lock(&xprt->state, XPRT_LOCKED, TASK_KILLABLE)) {
+ count = -EINTR;
+ goto out_put;
+ }
+ if (xprt->main) {
+ count = -EINVAL;
+ goto release_tasks;
+ }
+ if (offline) {
+ set_bit(XPRT_OFFLINE, &xprt->state);
+ spin_lock(&xps->xps_lock);
+ xps->xps_nactive--;
+ spin_unlock(&xps->xps_lock);
+ } else if (online) {
+ clear_bit(XPRT_OFFLINE, &xprt->state);
+ spin_lock(&xps->xps_lock);
+ xps->xps_nactive++;
+ spin_unlock(&xps->xps_lock);
+ } else if (remove) {
+ if (test_bit(XPRT_OFFLINE, &xprt->state)) {
+ set_bit(XPRT_REMOVE, &xprt->state);
+ xprt_force_disconnect(xprt);
+ if (test_bit(XPRT_CONNECTED, &xprt->state)) {
+ if (!xprt->sending.qlen &&
+ !xprt->pending.qlen &&
+ !xprt->backlog.qlen &&
+ !atomic_long_read(&xprt->queuelen))
+ rpc_xprt_switch_remove_xprt(xps, xprt);
+ }
+ } else {
+ count = -EINVAL;
+ }
+ }
+
+release_tasks:
+ xprt_release_write(xprt, NULL);
+out_put:
+ xprt_put(xprt);
+ xprt_switch_put(xps);
+ return count;
+}
+
+int rpc_sysfs_init(void)
+{
+ rpc_sunrpc_kset = kset_create_and_add("sunrpc", NULL, kernel_kobj);
+ if (!rpc_sunrpc_kset)
+ return -ENOMEM;
+ rpc_sunrpc_client_kobj =
+ rpc_sysfs_object_alloc("rpc-clients", rpc_sunrpc_kset, NULL);
+ if (!rpc_sunrpc_client_kobj)
+ goto err_client;
+ rpc_sunrpc_xprt_switch_kobj =
+ rpc_sysfs_object_alloc("xprt-switches", rpc_sunrpc_kset, NULL);
+ if (!rpc_sunrpc_xprt_switch_kobj)
+ goto err_switch;
+ return 0;
+err_switch:
+ kobject_put(rpc_sunrpc_client_kobj);
+ rpc_sunrpc_client_kobj = NULL;
+err_client:
+ kset_unregister(rpc_sunrpc_kset);
+ rpc_sunrpc_kset = NULL;
+ return -ENOMEM;
+}
+
+static void rpc_sysfs_client_release(struct kobject *kobj)
+{
+ struct rpc_sysfs_client *c;
+
+ c = container_of(kobj, struct rpc_sysfs_client, kobject);
+ kfree(c);
+}
+
+static void rpc_sysfs_xprt_switch_release(struct kobject *kobj)
+{
+ struct rpc_sysfs_xprt_switch *xprt_switch;
+
+ xprt_switch = container_of(kobj, struct rpc_sysfs_xprt_switch, kobject);
+ kfree(xprt_switch);
+}
+
+static void rpc_sysfs_xprt_release(struct kobject *kobj)
+{
+ struct rpc_sysfs_xprt *xprt;
+
+ xprt = container_of(kobj, struct rpc_sysfs_xprt, kobject);
+ kfree(xprt);
+}
+
+static const void *rpc_sysfs_client_namespace(struct kobject *kobj)
+{
+ return container_of(kobj, struct rpc_sysfs_client, kobject)->net;
+}
+
+static const void *rpc_sysfs_xprt_switch_namespace(struct kobject *kobj)
+{
+ return container_of(kobj, struct rpc_sysfs_xprt_switch, kobject)->net;
+}
+
+static const void *rpc_sysfs_xprt_namespace(struct kobject *kobj)
+{
+ return container_of(kobj, struct rpc_sysfs_xprt,
+ kobject)->xprt->xprt_net;
+}
+
+static struct kobj_attribute rpc_sysfs_xprt_dstaddr = __ATTR(dstaddr,
+ 0644, rpc_sysfs_xprt_dstaddr_show, rpc_sysfs_xprt_dstaddr_store);
+
+static struct kobj_attribute rpc_sysfs_xprt_srcaddr = __ATTR(srcaddr,
+ 0644, rpc_sysfs_xprt_srcaddr_show, NULL);
+
+static struct kobj_attribute rpc_sysfs_xprt_info = __ATTR(xprt_info,
+ 0444, rpc_sysfs_xprt_info_show, NULL);
+
+static struct kobj_attribute rpc_sysfs_xprt_change_state = __ATTR(xprt_state,
+ 0644, rpc_sysfs_xprt_state_show, rpc_sysfs_xprt_state_change);
+
+static struct attribute *rpc_sysfs_xprt_attrs[] = {
+ &rpc_sysfs_xprt_dstaddr.attr,
+ &rpc_sysfs_xprt_srcaddr.attr,
+ &rpc_sysfs_xprt_info.attr,
+ &rpc_sysfs_xprt_change_state.attr,
+ NULL,
+};
+
+static struct kobj_attribute rpc_sysfs_xprt_switch_info =
+ __ATTR(xprt_switch_info, 0444, rpc_sysfs_xprt_switch_info_show, NULL);
+
+static struct attribute *rpc_sysfs_xprt_switch_attrs[] = {
+ &rpc_sysfs_xprt_switch_info.attr,
+ NULL,
+};
+
+static struct kobj_type rpc_sysfs_client_type = {
+ .release = rpc_sysfs_client_release,
+ .sysfs_ops = &kobj_sysfs_ops,
+ .namespace = rpc_sysfs_client_namespace,
+};
+
+static struct kobj_type rpc_sysfs_xprt_switch_type = {
+ .release = rpc_sysfs_xprt_switch_release,
+ .default_attrs = rpc_sysfs_xprt_switch_attrs,
+ .sysfs_ops = &kobj_sysfs_ops,
+ .namespace = rpc_sysfs_xprt_switch_namespace,
+};
+
+static struct kobj_type rpc_sysfs_xprt_type = {
+ .release = rpc_sysfs_xprt_release,
+ .default_attrs = rpc_sysfs_xprt_attrs,
+ .sysfs_ops = &kobj_sysfs_ops,
+ .namespace = rpc_sysfs_xprt_namespace,
+};
+
+void rpc_sysfs_exit(void)
+{
+ kobject_put(rpc_sunrpc_client_kobj);
+ kobject_put(rpc_sunrpc_xprt_switch_kobj);
+ kset_unregister(rpc_sunrpc_kset);
+}
+
+static struct rpc_sysfs_client *rpc_sysfs_client_alloc(struct kobject *parent,
+ struct net *net,
+ int clid)
+{
+ struct rpc_sysfs_client *p;
+
+ p = kzalloc(sizeof(*p), GFP_KERNEL);
+ if (p) {
+ p->net = net;
+ p->kobject.kset = rpc_sunrpc_kset;
+ if (kobject_init_and_add(&p->kobject, &rpc_sysfs_client_type,
+ parent, "clnt-%d", clid) == 0)
+ return p;
+ kobject_put(&p->kobject);
+ }
+ return NULL;
+}
+
+static struct rpc_sysfs_xprt_switch *
+rpc_sysfs_xprt_switch_alloc(struct kobject *parent,
+ struct rpc_xprt_switch *xprt_switch,
+ struct net *net,
+ gfp_t gfp_flags)
+{
+ struct rpc_sysfs_xprt_switch *p;
+
+ p = kzalloc(sizeof(*p), gfp_flags);
+ if (p) {
+ p->net = net;
+ p->kobject.kset = rpc_sunrpc_kset;
+ if (kobject_init_and_add(&p->kobject,
+ &rpc_sysfs_xprt_switch_type,
+ parent, "switch-%d",
+ xprt_switch->xps_id) == 0)
+ return p;
+ kobject_put(&p->kobject);
+ }
+ return NULL;
+}
+
+static struct rpc_sysfs_xprt *rpc_sysfs_xprt_alloc(struct kobject *parent,
+ struct rpc_xprt *xprt,
+ gfp_t gfp_flags)
+{
+ struct rpc_sysfs_xprt *p;
+
+ p = kzalloc(sizeof(*p), gfp_flags);
+ if (!p)
+ goto out;
+ p->kobject.kset = rpc_sunrpc_kset;
+ if (kobject_init_and_add(&p->kobject, &rpc_sysfs_xprt_type,
+ parent, "xprt-%d-%s", xprt->id,
+ xprt->address_strings[RPC_DISPLAY_PROTO]) == 0)
+ return p;
+ kobject_put(&p->kobject);
+out:
+ return NULL;
+}
+
+void rpc_sysfs_client_setup(struct rpc_clnt *clnt,
+ struct rpc_xprt_switch *xprt_switch,
+ struct net *net)
+{
+ struct rpc_sysfs_client *rpc_client;
+
+ rpc_client = rpc_sysfs_client_alloc(rpc_sunrpc_client_kobj,
+ net, clnt->cl_clid);
+ if (rpc_client) {
+ char name[] = "switch";
+ struct rpc_sysfs_xprt_switch *xswitch =
+ (struct rpc_sysfs_xprt_switch *)xprt_switch->xps_sysfs;
+ int ret;
+
+ clnt->cl_sysfs = rpc_client;
+ rpc_client->clnt = clnt;
+ rpc_client->xprt_switch = xprt_switch;
+ kobject_uevent(&rpc_client->kobject, KOBJ_ADD);
+ ret = sysfs_create_link_nowarn(&rpc_client->kobject,
+ &xswitch->kobject, name);
+ if (ret)
+ pr_warn("can't create link to %s in sysfs (%d)\n",
+ name, ret);
+ }
+}
+
+void rpc_sysfs_xprt_switch_setup(struct rpc_xprt_switch *xprt_switch,
+ struct rpc_xprt *xprt,
+ gfp_t gfp_flags)
+{
+ struct rpc_sysfs_xprt_switch *rpc_xprt_switch;
+ struct net *net;
+
+ if (xprt_switch->xps_net)
+ net = xprt_switch->xps_net;
+ else
+ net = xprt->xprt_net;
+ rpc_xprt_switch =
+ rpc_sysfs_xprt_switch_alloc(rpc_sunrpc_xprt_switch_kobj,
+ xprt_switch, net, gfp_flags);
+ if (rpc_xprt_switch) {
+ xprt_switch->xps_sysfs = rpc_xprt_switch;
+ rpc_xprt_switch->xprt_switch = xprt_switch;
+ rpc_xprt_switch->xprt = xprt;
+ kobject_uevent(&rpc_xprt_switch->kobject, KOBJ_ADD);
+ }
+}
+
+void rpc_sysfs_xprt_setup(struct rpc_xprt_switch *xprt_switch,
+ struct rpc_xprt *xprt,
+ gfp_t gfp_flags)
+{
+ struct rpc_sysfs_xprt *rpc_xprt;
+ struct rpc_sysfs_xprt_switch *switch_obj =
+ (struct rpc_sysfs_xprt_switch *)xprt_switch->xps_sysfs;
+
+ rpc_xprt = rpc_sysfs_xprt_alloc(&switch_obj->kobject, xprt, gfp_flags);
+ if (rpc_xprt) {
+ xprt->xprt_sysfs = rpc_xprt;
+ rpc_xprt->xprt = xprt;
+ rpc_xprt->xprt_switch = xprt_switch;
+ kobject_uevent(&rpc_xprt->kobject, KOBJ_ADD);
+ }
+}
+
+void rpc_sysfs_client_destroy(struct rpc_clnt *clnt)
+{
+ struct rpc_sysfs_client *rpc_client = clnt->cl_sysfs;
+
+ if (rpc_client) {
+ char name[] = "switch";
+
+ sysfs_remove_link(&rpc_client->kobject, name);
+ kobject_uevent(&rpc_client->kobject, KOBJ_REMOVE);
+ kobject_del(&rpc_client->kobject);
+ kobject_put(&rpc_client->kobject);
+ clnt->cl_sysfs = NULL;
+ }
+}
+
+void rpc_sysfs_xprt_switch_destroy(struct rpc_xprt_switch *xprt_switch)
+{
+ struct rpc_sysfs_xprt_switch *rpc_xprt_switch = xprt_switch->xps_sysfs;
+
+ if (rpc_xprt_switch) {
+ kobject_uevent(&rpc_xprt_switch->kobject, KOBJ_REMOVE);
+ kobject_del(&rpc_xprt_switch->kobject);
+ kobject_put(&rpc_xprt_switch->kobject);
+ xprt_switch->xps_sysfs = NULL;
+ }
+}
+
+void rpc_sysfs_xprt_destroy(struct rpc_xprt *xprt)
+{
+ struct rpc_sysfs_xprt *rpc_xprt = xprt->xprt_sysfs;
+
+ if (rpc_xprt) {
+ kobject_uevent(&rpc_xprt->kobject, KOBJ_REMOVE);
+ kobject_del(&rpc_xprt->kobject);
+ kobject_put(&rpc_xprt->kobject);
+ xprt->xprt_sysfs = NULL;
+ }
+}
diff --git a/net/sunrpc/sysfs.h b/net/sunrpc/sysfs.h
new file mode 100644
index 000000000000..6620cebd1037
--- /dev/null
+++ b/net/sunrpc/sysfs.h
@@ -0,0 +1,42 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (c) 2020 Anna Schumaker <Anna.Schumaker@Netapp.com>
+ */
+#ifndef __SUNRPC_SYSFS_H
+#define __SUNRPC_SYSFS_H
+
+struct rpc_sysfs_client {
+ struct kobject kobject;
+ struct net *net;
+ struct rpc_clnt *clnt;
+ struct rpc_xprt_switch *xprt_switch;
+};
+
+struct rpc_sysfs_xprt_switch {
+ struct kobject kobject;
+ struct net *net;
+ struct rpc_xprt_switch *xprt_switch;
+ struct rpc_xprt *xprt;
+};
+
+struct rpc_sysfs_xprt {
+ struct kobject kobject;
+ struct rpc_xprt *xprt;
+ struct rpc_xprt_switch *xprt_switch;
+};
+
+int rpc_sysfs_init(void);
+void rpc_sysfs_exit(void);
+
+void rpc_sysfs_client_setup(struct rpc_clnt *clnt,
+ struct rpc_xprt_switch *xprt_switch,
+ struct net *net);
+void rpc_sysfs_client_destroy(struct rpc_clnt *clnt);
+void rpc_sysfs_xprt_switch_setup(struct rpc_xprt_switch *xprt_switch,
+ struct rpc_xprt *xprt, gfp_t gfp_flags);
+void rpc_sysfs_xprt_switch_destroy(struct rpc_xprt_switch *xprt);
+void rpc_sysfs_xprt_setup(struct rpc_xprt_switch *xprt_switch,
+ struct rpc_xprt *xprt, gfp_t gfp_flags);
+void rpc_sysfs_xprt_destroy(struct rpc_xprt *xprt);
+
+#endif
diff --git a/net/sunrpc/xdr.c b/net/sunrpc/xdr.c
index 3964ff74ee51..ca10ba2626f2 100644
--- a/net/sunrpc/xdr.c
+++ b/net/sunrpc/xdr.c
@@ -1230,10 +1230,9 @@ static unsigned int xdr_set_page_base(struct xdr_stream *xdr,
void *kaddr;
maxlen = xdr->buf->page_len;
- if (base >= maxlen) {
- base = maxlen;
- maxlen = 0;
- } else
+ if (base >= maxlen)
+ return 0;
+ else
maxlen -= base;
if (len > maxlen)
len = maxlen;
diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c
index 3509a7f139b9..cfd681700d1a 100644
--- a/net/sunrpc/xprt.c
+++ b/net/sunrpc/xprt.c
@@ -55,6 +55,8 @@
#include <trace/events/sunrpc.h>
#include "sunrpc.h"
+#include "sysfs.h"
+#include "fail.h"
/*
* Local variables
@@ -443,7 +445,7 @@ void xprt_release_xprt_cong(struct rpc_xprt *xprt, struct rpc_task *task)
}
EXPORT_SYMBOL_GPL(xprt_release_xprt_cong);
-static inline void xprt_release_write(struct rpc_xprt *xprt, struct rpc_task *task)
+void xprt_release_write(struct rpc_xprt *xprt, struct rpc_task *task)
{
if (xprt->snd_task != task)
return;
@@ -760,6 +762,20 @@ void xprt_disconnect_done(struct rpc_xprt *xprt)
EXPORT_SYMBOL_GPL(xprt_disconnect_done);
/**
+ * xprt_schedule_autoclose_locked - Try to schedule an autoclose RPC call
+ * @xprt: transport to disconnect
+ */
+static void xprt_schedule_autoclose_locked(struct rpc_xprt *xprt)
+{
+ set_bit(XPRT_CLOSE_WAIT, &xprt->state);
+ if (test_and_set_bit(XPRT_LOCKED, &xprt->state) == 0)
+ queue_work(xprtiod_workqueue, &xprt->task_cleanup);
+ else if (xprt->snd_task && !test_bit(XPRT_SND_IS_COOKIE, &xprt->state))
+ rpc_wake_up_queued_task_set_status(&xprt->pending,
+ xprt->snd_task, -ENOTCONN);
+}
+
+/**
* xprt_force_disconnect - force a transport to disconnect
* @xprt: transport to disconnect
*
@@ -770,13 +786,7 @@ void xprt_force_disconnect(struct rpc_xprt *xprt)
/* Don't race with the test_bit() in xprt_clear_locked() */
spin_lock(&xprt->transport_lock);
- set_bit(XPRT_CLOSE_WAIT, &xprt->state);
- /* Try to schedule an autoclose RPC call */
- if (test_and_set_bit(XPRT_LOCKED, &xprt->state) == 0)
- queue_work(xprtiod_workqueue, &xprt->task_cleanup);
- else if (xprt->snd_task)
- rpc_wake_up_queued_task_set_status(&xprt->pending,
- xprt->snd_task, -ENOTCONN);
+ xprt_schedule_autoclose_locked(xprt);
spin_unlock(&xprt->transport_lock);
}
EXPORT_SYMBOL_GPL(xprt_force_disconnect);
@@ -816,11 +826,7 @@ void xprt_conditional_disconnect(struct rpc_xprt *xprt, unsigned int cookie)
goto out;
if (test_bit(XPRT_CLOSING, &xprt->state))
goto out;
- set_bit(XPRT_CLOSE_WAIT, &xprt->state);
- /* Try to schedule an autoclose RPC call */
- if (test_and_set_bit(XPRT_LOCKED, &xprt->state) == 0)
- queue_work(xprtiod_workqueue, &xprt->task_cleanup);
- xprt_wake_pending_tasks(xprt, -EAGAIN);
+ xprt_schedule_autoclose_locked(xprt);
out:
spin_unlock(&xprt->transport_lock);
}
@@ -854,6 +860,19 @@ xprt_init_autodisconnect(struct timer_list *t)
queue_work(xprtiod_workqueue, &xprt->task_cleanup);
}
+#if IS_ENABLED(CONFIG_FAIL_SUNRPC)
+static void xprt_inject_disconnect(struct rpc_xprt *xprt)
+{
+ if (!fail_sunrpc.ignore_client_disconnect &&
+ should_fail(&fail_sunrpc.attr, 1))
+ xprt->ops->inject_disconnect(xprt);
+}
+#else
+static inline void xprt_inject_disconnect(struct rpc_xprt *xprt)
+{
+}
+#endif
+
bool xprt_lock_connect(struct rpc_xprt *xprt,
struct rpc_task *task,
void *cookie)
@@ -865,12 +884,14 @@ bool xprt_lock_connect(struct rpc_xprt *xprt,
goto out;
if (xprt->snd_task != task)
goto out;
+ set_bit(XPRT_SND_IS_COOKIE, &xprt->state);
xprt->snd_task = cookie;
ret = true;
out:
spin_unlock(&xprt->transport_lock);
return ret;
}
+EXPORT_SYMBOL_GPL(xprt_lock_connect);
void xprt_unlock_connect(struct rpc_xprt *xprt, void *cookie)
{
@@ -880,12 +901,14 @@ void xprt_unlock_connect(struct rpc_xprt *xprt, void *cookie)
if (!test_bit(XPRT_LOCKED, &xprt->state))
goto out;
xprt->snd_task =NULL;
+ clear_bit(XPRT_SND_IS_COOKIE, &xprt->state);
xprt->ops->release_xprt(xprt, NULL);
xprt_schedule_autodisconnect(xprt);
out:
spin_unlock(&xprt->transport_lock);
wake_up_bit(&xprt->state, XPRT_LOCKED);
}
+EXPORT_SYMBOL_GPL(xprt_unlock_connect);
/**
* xprt_connect - schedule a transport connect operation
@@ -1746,6 +1769,30 @@ static void xprt_free_all_slots(struct rpc_xprt *xprt)
}
}
+static DEFINE_IDA(rpc_xprt_ids);
+
+void xprt_cleanup_ids(void)
+{
+ ida_destroy(&rpc_xprt_ids);
+}
+
+static int xprt_alloc_id(struct rpc_xprt *xprt)
+{
+ int id;
+
+ id = ida_simple_get(&rpc_xprt_ids, 0, 0, GFP_KERNEL);
+ if (id < 0)
+ return id;
+
+ xprt->id = id;
+ return 0;
+}
+
+static void xprt_free_id(struct rpc_xprt *xprt)
+{
+ ida_simple_remove(&rpc_xprt_ids, xprt->id);
+}
+
struct rpc_xprt *xprt_alloc(struct net *net, size_t size,
unsigned int num_prealloc,
unsigned int max_alloc)
@@ -1758,6 +1805,7 @@ struct rpc_xprt *xprt_alloc(struct net *net, size_t size,
if (xprt == NULL)
goto out;
+ xprt_alloc_id(xprt);
xprt_init(xprt, net);
for (i = 0; i < num_prealloc; i++) {
@@ -1786,6 +1834,8 @@ void xprt_free(struct rpc_xprt *xprt)
{
put_net(xprt->xprt_net);
xprt_free_all_slots(xprt);
+ xprt_free_id(xprt);
+ rpc_sysfs_xprt_destroy(xprt);
kfree_rcu(xprt, rcu);
}
EXPORT_SYMBOL_GPL(xprt_free);
diff --git a/net/sunrpc/xprtmultipath.c b/net/sunrpc/xprtmultipath.c
index 78c075a68c04..1693f81aae37 100644
--- a/net/sunrpc/xprtmultipath.c
+++ b/net/sunrpc/xprtmultipath.c
@@ -7,18 +7,20 @@
* Trond Myklebust <trond.myklebust@primarydata.com>
*
*/
+#include <linux/atomic.h>
#include <linux/types.h>
#include <linux/kref.h>
#include <linux/list.h>
#include <linux/rcupdate.h>
#include <linux/rculist.h>
#include <linux/slab.h>
-#include <asm/cmpxchg.h>
#include <linux/spinlock.h>
#include <linux/sunrpc/xprt.h>
#include <linux/sunrpc/addr.h>
#include <linux/sunrpc/xprtmultipath.h>
+#include "sysfs.h"
+
typedef struct rpc_xprt *(*xprt_switch_find_xprt_t)(struct rpc_xprt_switch *xps,
const struct rpc_xprt *cur);
@@ -55,6 +57,7 @@ void rpc_xprt_switch_add_xprt(struct rpc_xprt_switch *xps,
if (xps->xps_net == xprt->xprt_net || xps->xps_net == NULL)
xprt_switch_add_xprt_locked(xps, xprt);
spin_unlock(&xps->xps_lock);
+ rpc_sysfs_xprt_setup(xps, xprt, GFP_KERNEL);
}
static void xprt_switch_remove_xprt_locked(struct rpc_xprt_switch *xps,
@@ -62,7 +65,8 @@ static void xprt_switch_remove_xprt_locked(struct rpc_xprt_switch *xps,
{
if (unlikely(xprt == NULL))
return;
- xps->xps_nactive--;
+ if (!test_bit(XPRT_OFFLINE, &xprt->state))
+ xps->xps_nactive--;
xps->xps_nxprts--;
if (xps->xps_nxprts == 0)
xps->xps_net = NULL;
@@ -86,6 +90,30 @@ void rpc_xprt_switch_remove_xprt(struct rpc_xprt_switch *xps,
xprt_put(xprt);
}
+static DEFINE_IDA(rpc_xprtswitch_ids);
+
+void xprt_multipath_cleanup_ids(void)
+{
+ ida_destroy(&rpc_xprtswitch_ids);
+}
+
+static int xprt_switch_alloc_id(struct rpc_xprt_switch *xps, gfp_t gfp_flags)
+{
+ int id;
+
+ id = ida_simple_get(&rpc_xprtswitch_ids, 0, 0, gfp_flags);
+ if (id < 0)
+ return id;
+
+ xps->xps_id = id;
+ return 0;
+}
+
+static void xprt_switch_free_id(struct rpc_xprt_switch *xps)
+{
+ ida_simple_remove(&rpc_xprtswitch_ids, xps->xps_id);
+}
+
/**
* xprt_switch_alloc - Allocate a new struct rpc_xprt_switch
* @xprt: pointer to struct rpc_xprt
@@ -103,12 +131,16 @@ struct rpc_xprt_switch *xprt_switch_alloc(struct rpc_xprt *xprt,
if (xps != NULL) {
spin_lock_init(&xps->xps_lock);
kref_init(&xps->xps_kref);
+ xprt_switch_alloc_id(xps, gfp_flags);
xps->xps_nxprts = xps->xps_nactive = 0;
atomic_long_set(&xps->xps_queuelen, 0);
xps->xps_net = NULL;
INIT_LIST_HEAD(&xps->xps_xprt_list);
xps->xps_iter_ops = &rpc_xprt_iter_singular;
+ rpc_sysfs_xprt_switch_setup(xps, xprt, gfp_flags);
xprt_switch_add_xprt_locked(xps, xprt);
+ xps->xps_nunique_destaddr_xprts = 1;
+ rpc_sysfs_xprt_setup(xps, xprt, gfp_flags);
}
return xps;
@@ -136,6 +168,8 @@ static void xprt_switch_free(struct kref *kref)
struct rpc_xprt_switch, xps_kref);
xprt_switch_free_entries(xps);
+ rpc_sysfs_xprt_switch_destroy(xps);
+ xprt_switch_free_id(xps);
kfree_rcu(xps, xps_rcu);
}
@@ -198,7 +232,8 @@ void xprt_iter_default_rewind(struct rpc_xprt_iter *xpi)
static
bool xprt_is_active(const struct rpc_xprt *xprt)
{
- return kref_read(&xprt->kref) != 0;
+ return (kref_read(&xprt->kref) != 0 &&
+ !test_bit(XPRT_OFFLINE, &xprt->state));
}
static
diff --git a/net/sunrpc/xprtrdma/backchannel.c b/net/sunrpc/xprtrdma/backchannel.c
index 1151efd09b27..17f174d6ea3b 100644
--- a/net/sunrpc/xprtrdma/backchannel.c
+++ b/net/sunrpc/xprtrdma/backchannel.c
@@ -115,7 +115,7 @@ int xprt_rdma_bc_send_reply(struct rpc_rqst *rqst)
if (rc < 0)
goto failed_marshal;
- if (rpcrdma_post_sends(r_xprt, req))
+ if (frwr_send(r_xprt, req))
goto drop_connection;
return 0;
diff --git a/net/sunrpc/xprtrdma/frwr_ops.c b/net/sunrpc/xprtrdma/frwr_ops.c
index 229fcc9a9064..f700b34a5bfd 100644
--- a/net/sunrpc/xprtrdma/frwr_ops.c
+++ b/net/sunrpc/xprtrdma/frwr_ops.c
@@ -394,6 +394,7 @@ int frwr_send(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req)
struct rpcrdma_ep *ep = r_xprt->rx_ep;
struct rpcrdma_mr *mr;
unsigned int num_wrs;
+ int ret;
num_wrs = 1;
post_wr = send_wr;
@@ -420,7 +421,10 @@ int frwr_send(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req)
}
trace_xprtrdma_post_send(req);
- return ib_post_send(ep->re_id->qp, post_wr, NULL);
+ ret = ib_post_send(ep->re_id->qp, post_wr, NULL);
+ if (ret)
+ trace_xprtrdma_post_send_err(r_xprt, req, ret);
+ return ret;
}
/**
@@ -557,6 +561,10 @@ void frwr_unmap_sync(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req)
/* On error, the MRs get destroyed once the QP has drained. */
trace_xprtrdma_post_linv_err(req, rc);
+
+ /* Force a connection loss to ensure complete recovery.
+ */
+ rpcrdma_force_disconnect(ep);
}
/**
@@ -653,4 +661,8 @@ void frwr_unmap_async(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req)
* retransmission.
*/
rpcrdma_unpin_rqst(req->rl_reply);
+
+ /* Force a connection loss to ensure complete recovery.
+ */
+ rpcrdma_force_disconnect(ep);
}
diff --git a/net/sunrpc/xprtrdma/svc_rdma_rw.c b/net/sunrpc/xprtrdma/svc_rdma_rw.c
index 5238bc829235..e27433f08ca7 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_rw.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_rw.c
@@ -35,6 +35,7 @@ static void svc_rdma_wc_read_done(struct ib_cq *cq, struct ib_wc *wc);
* controlling svcxprt_rdma is destroyed.
*/
struct svc_rdma_rw_ctxt {
+ struct llist_node rw_node;
struct list_head rw_list;
struct rdma_rw_ctx rw_ctx;
unsigned int rw_nents;
@@ -53,19 +54,19 @@ static struct svc_rdma_rw_ctxt *
svc_rdma_get_rw_ctxt(struct svcxprt_rdma *rdma, unsigned int sges)
{
struct svc_rdma_rw_ctxt *ctxt;
+ struct llist_node *node;
spin_lock(&rdma->sc_rw_ctxt_lock);
-
- ctxt = svc_rdma_next_ctxt(&rdma->sc_rw_ctxts);
- if (ctxt) {
- list_del(&ctxt->rw_list);
- spin_unlock(&rdma->sc_rw_ctxt_lock);
+ node = llist_del_first(&rdma->sc_rw_ctxts);
+ spin_unlock(&rdma->sc_rw_ctxt_lock);
+ if (node) {
+ ctxt = llist_entry(node, struct svc_rdma_rw_ctxt, rw_node);
} else {
- spin_unlock(&rdma->sc_rw_ctxt_lock);
ctxt = kmalloc(struct_size(ctxt, rw_first_sgl, SG_CHUNK_SIZE),
GFP_KERNEL);
if (!ctxt)
goto out_noctx;
+
INIT_LIST_HEAD(&ctxt->rw_list);
}
@@ -83,14 +84,18 @@ out_noctx:
return NULL;
}
-static void svc_rdma_put_rw_ctxt(struct svcxprt_rdma *rdma,
- struct svc_rdma_rw_ctxt *ctxt)
+static void __svc_rdma_put_rw_ctxt(struct svcxprt_rdma *rdma,
+ struct svc_rdma_rw_ctxt *ctxt,
+ struct llist_head *list)
{
sg_free_table_chained(&ctxt->rw_sg_table, SG_CHUNK_SIZE);
+ llist_add(&ctxt->rw_node, list);
+}
- spin_lock(&rdma->sc_rw_ctxt_lock);
- list_add(&ctxt->rw_list, &rdma->sc_rw_ctxts);
- spin_unlock(&rdma->sc_rw_ctxt_lock);
+static void svc_rdma_put_rw_ctxt(struct svcxprt_rdma *rdma,
+ struct svc_rdma_rw_ctxt *ctxt)
+{
+ __svc_rdma_put_rw_ctxt(rdma, ctxt, &rdma->sc_rw_ctxts);
}
/**
@@ -101,9 +106,10 @@ static void svc_rdma_put_rw_ctxt(struct svcxprt_rdma *rdma,
void svc_rdma_destroy_rw_ctxts(struct svcxprt_rdma *rdma)
{
struct svc_rdma_rw_ctxt *ctxt;
+ struct llist_node *node;
- while ((ctxt = svc_rdma_next_ctxt(&rdma->sc_rw_ctxts)) != NULL) {
- list_del(&ctxt->rw_list);
+ while ((node = llist_del_first(&rdma->sc_rw_ctxts)) != NULL) {
+ ctxt = llist_entry(node, struct svc_rdma_rw_ctxt, rw_node);
kfree(ctxt);
}
}
@@ -171,20 +177,35 @@ static void svc_rdma_cc_init(struct svcxprt_rdma *rdma,
cc->cc_sqecount = 0;
}
+/*
+ * The consumed rw_ctx's are cleaned and placed on a local llist so
+ * that only one atomic llist operation is needed to put them all
+ * back on the free list.
+ */
static void svc_rdma_cc_release(struct svc_rdma_chunk_ctxt *cc,
enum dma_data_direction dir)
{
struct svcxprt_rdma *rdma = cc->cc_rdma;
+ struct llist_node *first, *last;
struct svc_rdma_rw_ctxt *ctxt;
+ LLIST_HEAD(free);
+ first = last = NULL;
while ((ctxt = svc_rdma_next_ctxt(&cc->cc_rwctxts)) != NULL) {
list_del(&ctxt->rw_list);
rdma_rw_ctx_destroy(&ctxt->rw_ctx, rdma->sc_qp,
rdma->sc_port_num, ctxt->rw_sg_table.sgl,
ctxt->rw_nents, dir);
- svc_rdma_put_rw_ctxt(rdma, ctxt);
+ __svc_rdma_put_rw_ctxt(rdma, ctxt, &free);
+
+ ctxt->rw_node.next = first;
+ first = &ctxt->rw_node;
+ if (!last)
+ last = first;
}
+ if (first)
+ llist_add_batch(first, last, &rdma->sc_rw_ctxts);
}
/* State for sending a Write or Reply chunk.
@@ -248,8 +269,7 @@ static void svc_rdma_write_done(struct ib_cq *cq, struct ib_wc *wc)
trace_svcrdma_wc_write(wc, &cc->cc_cid);
- atomic_add(cc->cc_sqecount, &rdma->sc_sq_avail);
- wake_up(&rdma->sc_send_wait);
+ svc_rdma_wake_send_waiters(rdma, cc->cc_sqecount);
if (unlikely(wc->status != IB_WC_SUCCESS))
svc_xprt_deferred_close(&rdma->sc_xprt);
@@ -304,9 +324,7 @@ static void svc_rdma_wc_read_done(struct ib_cq *cq, struct ib_wc *wc)
trace_svcrdma_wc_read(wc, &cc->cc_cid);
- atomic_add(cc->cc_sqecount, &rdma->sc_sq_avail);
- wake_up(&rdma->sc_send_wait);
-
+ svc_rdma_wake_send_waiters(rdma, cc->cc_sqecount);
cc->cc_status = wc->status;
complete(&cc->cc_done);
return;
@@ -483,7 +501,7 @@ out_overflow:
* @iov: kvec to write
*
* Returns:
- * On succes, returns zero
+ * On success, returns zero
* %-E2BIG if the client-provided Write chunk is too small
* %-ENOMEM if a resource has been exhausted
* %-EIO if an rdma-rw error occurred
@@ -504,7 +522,7 @@ static int svc_rdma_iov_write(struct svc_rdma_write_info *info,
* @length: number of bytes to write
*
* Returns:
- * On succes, returns zero
+ * On success, returns zero
* %-E2BIG if the client-provided Write chunk is too small
* %-ENOMEM if a resource has been exhausted
* %-EIO if an rdma-rw error occurred
@@ -526,7 +544,7 @@ static int svc_rdma_pages_write(struct svc_rdma_write_info *info,
* @data: pointer to write arguments
*
* Returns:
- * On succes, returns zero
+ * On success, returns zero
* %-E2BIG if the client-provided Write chunk is too small
* %-ENOMEM if a resource has been exhausted
* %-EIO if an rdma-rw error occurred
diff --git a/net/sunrpc/xprtrdma/svc_rdma_sendto.c b/net/sunrpc/xprtrdma/svc_rdma_sendto.c
index d6bbafb773e1..599021b2391d 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_sendto.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_sendto.c
@@ -113,13 +113,6 @@
static void svc_rdma_wc_send(struct ib_cq *cq, struct ib_wc *wc);
-static inline struct svc_rdma_send_ctxt *
-svc_rdma_next_send_ctxt(struct list_head *list)
-{
- return list_first_entry_or_null(list, struct svc_rdma_send_ctxt,
- sc_list);
-}
-
static void svc_rdma_send_cid_init(struct svcxprt_rdma *rdma,
struct rpc_rdma_cid *cid)
{
@@ -182,9 +175,10 @@ fail0:
void svc_rdma_send_ctxts_destroy(struct svcxprt_rdma *rdma)
{
struct svc_rdma_send_ctxt *ctxt;
+ struct llist_node *node;
- while ((ctxt = svc_rdma_next_send_ctxt(&rdma->sc_send_ctxts))) {
- list_del(&ctxt->sc_list);
+ while ((node = llist_del_first(&rdma->sc_send_ctxts)) != NULL) {
+ ctxt = llist_entry(node, struct svc_rdma_send_ctxt, sc_node);
ib_dma_unmap_single(rdma->sc_pd->device,
ctxt->sc_sges[0].addr,
rdma->sc_max_req_size,
@@ -204,12 +198,13 @@ void svc_rdma_send_ctxts_destroy(struct svcxprt_rdma *rdma)
struct svc_rdma_send_ctxt *svc_rdma_send_ctxt_get(struct svcxprt_rdma *rdma)
{
struct svc_rdma_send_ctxt *ctxt;
+ struct llist_node *node;
spin_lock(&rdma->sc_send_lock);
- ctxt = svc_rdma_next_send_ctxt(&rdma->sc_send_ctxts);
- if (!ctxt)
+ node = llist_del_first(&rdma->sc_send_ctxts);
+ if (!node)
goto out_empty;
- list_del(&ctxt->sc_list);
+ ctxt = llist_entry(node, struct svc_rdma_send_ctxt, sc_node);
spin_unlock(&rdma->sc_send_lock);
out:
@@ -253,9 +248,21 @@ void svc_rdma_send_ctxt_put(struct svcxprt_rdma *rdma,
ctxt->sc_sges[i].length);
}
- spin_lock(&rdma->sc_send_lock);
- list_add(&ctxt->sc_list, &rdma->sc_send_ctxts);
- spin_unlock(&rdma->sc_send_lock);
+ llist_add(&ctxt->sc_node, &rdma->sc_send_ctxts);
+}
+
+/**
+ * svc_rdma_wake_send_waiters - manage Send Queue accounting
+ * @rdma: controlling transport
+ * @avail: Number of additional SQEs that are now available
+ *
+ */
+void svc_rdma_wake_send_waiters(struct svcxprt_rdma *rdma, int avail)
+{
+ atomic_add(avail, &rdma->sc_sq_avail);
+ smp_mb__after_atomic();
+ if (unlikely(waitqueue_active(&rdma->sc_send_wait)))
+ wake_up(&rdma->sc_send_wait);
}
/**
@@ -275,11 +282,9 @@ static void svc_rdma_wc_send(struct ib_cq *cq, struct ib_wc *wc)
trace_svcrdma_wc_send(wc, &ctxt->sc_cid);
+ svc_rdma_wake_send_waiters(rdma, 1);
complete(&ctxt->sc_done);
- atomic_inc(&rdma->sc_sq_avail);
- wake_up(&rdma->sc_send_wait);
-
if (unlikely(wc->status != IB_WC_SUCCESS))
svc_xprt_deferred_close(&rdma->sc_xprt);
}
diff --git a/net/sunrpc/xprtrdma/svc_rdma_transport.c b/net/sunrpc/xprtrdma/svc_rdma_transport.c
index d94b7759ada1..94b20fb47135 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_transport.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_transport.c
@@ -136,9 +136,9 @@ static struct svcxprt_rdma *svc_rdma_create_xprt(struct svc_serv *serv,
svc_xprt_init(net, &svc_rdma_class, &cma_xprt->sc_xprt, serv);
INIT_LIST_HEAD(&cma_xprt->sc_accept_q);
INIT_LIST_HEAD(&cma_xprt->sc_rq_dto_q);
- INIT_LIST_HEAD(&cma_xprt->sc_send_ctxts);
+ init_llist_head(&cma_xprt->sc_send_ctxts);
init_llist_head(&cma_xprt->sc_recv_ctxts);
- INIT_LIST_HEAD(&cma_xprt->sc_rw_ctxts);
+ init_llist_head(&cma_xprt->sc_rw_ctxts);
init_waitqueue_head(&cma_xprt->sc_send_wait);
spin_lock_init(&cma_xprt->sc_lock);
@@ -545,7 +545,6 @@ static void __svc_rdma_free(struct work_struct *work)
{
struct svcxprt_rdma *rdma =
container_of(work, struct svcxprt_rdma, sc_work);
- struct svc_xprt *xprt = &rdma->sc_xprt;
/* This blocks until the Completion Queues are empty */
if (rdma->sc_qp && !IS_ERR(rdma->sc_qp))
@@ -553,12 +552,6 @@ static void __svc_rdma_free(struct work_struct *work)
svc_rdma_flush_recv_queues(rdma);
- /* Final put of backchannel client transport */
- if (xprt->xpt_bc_xprt) {
- xprt_put(xprt->xpt_bc_xprt);
- xprt->xpt_bc_xprt = NULL;
- }
-
svc_rdma_destroy_rw_ctxts(rdma);
svc_rdma_send_ctxts_destroy(rdma);
svc_rdma_recv_ctxts_destroy(rdma);
diff --git a/net/sunrpc/xprtrdma/transport.c b/net/sunrpc/xprtrdma/transport.c
index 19a49d26b1e4..16e5696314a4 100644
--- a/net/sunrpc/xprtrdma/transport.c
+++ b/net/sunrpc/xprtrdma/transport.c
@@ -73,6 +73,7 @@ unsigned int xprt_rdma_max_inline_read = RPCRDMA_DEF_INLINE;
unsigned int xprt_rdma_max_inline_write = RPCRDMA_DEF_INLINE;
unsigned int xprt_rdma_memreg_strategy = RPCRDMA_FRWR;
int xprt_rdma_pad_optimize;
+static struct xprt_class xprt_rdma;
#if IS_ENABLED(CONFIG_SUNRPC_DEBUG)
@@ -249,12 +250,9 @@ xprt_rdma_connect_worker(struct work_struct *work)
xprt->stat.connect_start;
xprt_set_connected(xprt);
rc = -EAGAIN;
- } else {
- /* Force a call to xprt_rdma_close to clean up */
- spin_lock(&xprt->transport_lock);
- set_bit(XPRT_CLOSE_WAIT, &xprt->state);
- spin_unlock(&xprt->transport_lock);
- }
+ } else
+ rpcrdma_xprt_disconnect(r_xprt);
+ xprt_unlock_connect(xprt, r_xprt);
xprt_wake_pending_tasks(xprt, rc);
}
@@ -349,6 +347,7 @@ xprt_setup_rdma(struct xprt_create *args)
/* Ensure xprt->addr holds valid server TCP (not RDMA)
* address, for any side protocols which peek at it */
xprt->prot = IPPROTO_TCP;
+ xprt->xprt_class = &xprt_rdma;
xprt->addrlen = args->addrlen;
memcpy(&xprt->addr, sap, xprt->addrlen);
@@ -487,6 +486,8 @@ xprt_rdma_connect(struct rpc_xprt *xprt, struct rpc_task *task)
struct rpcrdma_ep *ep = r_xprt->rx_ep;
unsigned long delay;
+ WARN_ON_ONCE(!xprt_lock_connect(xprt, task, r_xprt));
+
delay = 0;
if (ep && ep->re_connect_status != 0) {
delay = xprt_reconnect_delay(xprt);
@@ -659,7 +660,7 @@ xprt_rdma_send_request(struct rpc_rqst *rqst)
goto drop_connection;
rqst->rq_xtime = ktime_get();
- if (rpcrdma_post_sends(r_xprt, req))
+ if (frwr_send(r_xprt, req))
goto drop_connection;
rqst->rq_xmit_bytes_sent += rqst->rq_snd_buf.len;
diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c
index 649c23518ec0..aaec3c9be8db 100644
--- a/net/sunrpc/xprtrdma/verbs.c
+++ b/net/sunrpc/xprtrdma/verbs.c
@@ -124,7 +124,7 @@ static void rpcrdma_xprt_drain(struct rpcrdma_xprt *r_xprt)
* connection is closed or lost. (The important thing is it needs
* to be invoked "at least" once).
*/
-static void rpcrdma_force_disconnect(struct rpcrdma_ep *ep)
+void rpcrdma_force_disconnect(struct rpcrdma_ep *ep)
{
if (atomic_add_unless(&ep->re_force_disconnect, 1, 1))
xprt_force_disconnect(ep->re_xprt);
@@ -1350,21 +1350,6 @@ static void rpcrdma_regbuf_free(struct rpcrdma_regbuf *rb)
}
/**
- * rpcrdma_post_sends - Post WRs to a transport's Send Queue
- * @r_xprt: controlling transport instance
- * @req: rpcrdma_req containing the Send WR to post
- *
- * Returns 0 if the post was successful, otherwise -ENOTCONN
- * is returned.
- */
-int rpcrdma_post_sends(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req)
-{
- if (frwr_send(r_xprt, req))
- return -ENOTCONN;
- return 0;
-}
-
-/**
* rpcrdma_post_recvs - Refill the Receive Queue
* @r_xprt: controlling transport instance
* @needed: current credit grant
@@ -1416,12 +1401,8 @@ void rpcrdma_post_recvs(struct rpcrdma_xprt *r_xprt, int needed, bool temp)
rc = ib_post_recv(ep->re_id->qp, wr,
(const struct ib_recv_wr **)&bad_wr);
- if (atomic_dec_return(&ep->re_receiving) > 0)
- complete(&ep->re_done);
-
-out:
- trace_xprtrdma_post_recvs(r_xprt, count, rc);
if (rc) {
+ trace_xprtrdma_post_recvs_err(r_xprt, rc);
for (wr = bad_wr; wr;) {
struct rpcrdma_rep *rep;
@@ -1431,6 +1412,11 @@ out:
--count;
}
}
+ if (atomic_dec_return(&ep->re_receiving) > 0)
+ complete(&ep->re_done);
+
+out:
+ trace_xprtrdma_post_recvs(r_xprt, count);
ep->re_receive_count += count;
return;
}
diff --git a/net/sunrpc/xprtrdma/xprt_rdma.h b/net/sunrpc/xprtrdma/xprt_rdma.h
index 5d231d94e944..d91f54eae00b 100644
--- a/net/sunrpc/xprtrdma/xprt_rdma.h
+++ b/net/sunrpc/xprtrdma/xprt_rdma.h
@@ -454,11 +454,11 @@ extern unsigned int xprt_rdma_memreg_strategy;
/*
* Endpoint calls - xprtrdma/verbs.c
*/
+void rpcrdma_force_disconnect(struct rpcrdma_ep *ep);
void rpcrdma_flush_disconnect(struct rpcrdma_xprt *r_xprt, struct ib_wc *wc);
int rpcrdma_xprt_connect(struct rpcrdma_xprt *r_xprt);
void rpcrdma_xprt_disconnect(struct rpcrdma_xprt *r_xprt);
-int rpcrdma_post_sends(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req);
void rpcrdma_post_recvs(struct rpcrdma_xprt *r_xprt, int needed, bool temp);
/*
diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c
index 316d04945587..04f1b78bcbca 100644
--- a/net/sunrpc/xprtsock.c
+++ b/net/sunrpc/xprtsock.c
@@ -91,6 +91,11 @@ static unsigned int xprt_max_resvport_limit = RPC_MAX_RESVPORT;
static struct ctl_table_header *sunrpc_table_header;
+static struct xprt_class xs_local_transport;
+static struct xprt_class xs_udp_transport;
+static struct xprt_class xs_tcp_transport;
+static struct xprt_class xs_bc_tcp_transport;
+
/*
* FIXME: changing the UDP slot table size should also resize the UDP
* socket buffers for existing UDP transports
@@ -1648,6 +1653,13 @@ static int xs_get_srcport(struct sock_xprt *transport)
return port;
}
+unsigned short get_srcport(struct rpc_xprt *xprt)
+{
+ struct sock_xprt *sock = container_of(xprt, struct sock_xprt, xprt);
+ return xs_sock_getport(sock->sock);
+}
+EXPORT_SYMBOL(get_srcport);
+
static unsigned short xs_next_srcport(struct sock_xprt *transport, unsigned short port)
{
if (transport->srcport != 0)
@@ -1689,7 +1701,8 @@ static int xs_bind(struct sock_xprt *transport, struct socket *sock)
err = kernel_bind(sock, (struct sockaddr *)&myaddr,
transport->xprt.addrlen);
if (err == 0) {
- transport->srcport = port;
+ if (transport->xprt.reuseport)
+ transport->srcport = port;
break;
}
last = port;
@@ -2086,13 +2099,20 @@ static void xs_tcp_shutdown(struct rpc_xprt *xprt)
if (sock == NULL)
return;
+ if (!xprt->reuseport) {
+ xs_close(xprt);
+ return;
+ }
switch (skst) {
- default:
+ case TCP_FIN_WAIT1:
+ case TCP_FIN_WAIT2:
+ break;
+ case TCP_ESTABLISHED:
+ case TCP_CLOSE_WAIT:
kernel_sock_shutdown(sock, SHUT_RDWR);
trace_rpc_socket_shutdown(xprt, sock);
break;
- case TCP_CLOSE:
- case TCP_TIME_WAIT:
+ default:
xs_reset_transport(transport);
}
}
@@ -2779,6 +2799,7 @@ static struct rpc_xprt *xs_setup_local(struct xprt_create *args)
transport = container_of(xprt, struct sock_xprt, xprt);
xprt->prot = 0;
+ xprt->xprt_class = &xs_local_transport;
xprt->max_payload = RPC_MAX_FRAGMENT_SIZE;
xprt->bind_timeout = XS_BIND_TO;
@@ -2848,6 +2869,7 @@ static struct rpc_xprt *xs_setup_udp(struct xprt_create *args)
transport = container_of(xprt, struct sock_xprt, xprt);
xprt->prot = IPPROTO_UDP;
+ xprt->xprt_class = &xs_udp_transport;
/* XXX: header size can vary due to auth type, IPv6, etc. */
xprt->max_payload = (1U << 16) - (MAX_HEADER << 3);
@@ -2928,6 +2950,7 @@ static struct rpc_xprt *xs_setup_tcp(struct xprt_create *args)
transport = container_of(xprt, struct sock_xprt, xprt);
xprt->prot = IPPROTO_TCP;
+ xprt->xprt_class = &xs_tcp_transport;
xprt->max_payload = RPC_MAX_FRAGMENT_SIZE;
xprt->bind_timeout = XS_BIND_TO;
@@ -3001,6 +3024,7 @@ static struct rpc_xprt *xs_setup_bc_tcp(struct xprt_create *args)
transport = container_of(xprt, struct sock_xprt, xprt);
xprt->prot = IPPROTO_TCP;
+ xprt->xprt_class = &xs_bc_tcp_transport;
xprt->max_payload = RPC_MAX_FRAGMENT_SIZE;
xprt->timeout = &xs_tcp_default_timeout;
@@ -3132,24 +3156,6 @@ void cleanup_socket_xprt(void)
xprt_unregister_transport(&xs_bc_tcp_transport);
}
-static int param_set_uint_minmax(const char *val,
- const struct kernel_param *kp,
- unsigned int min, unsigned int max)
-{
- unsigned int num;
- int ret;
-
- if (!val)
- return -EINVAL;
- ret = kstrtouint(val, 0, &num);
- if (ret)
- return ret;
- if (num < min || num > max)
- return -EINVAL;
- *((unsigned int *)kp->arg) = num;
- return 0;
-}
-
static int param_set_portnr(const char *val, const struct kernel_param *kp)
{
return param_set_uint_minmax(val, kp,
diff --git a/net/switchdev/switchdev.c b/net/switchdev/switchdev.c
index 89a36db47ab4..0b2c18efc079 100644
--- a/net/switchdev/switchdev.c
+++ b/net/switchdev/switchdev.c
@@ -378,22 +378,283 @@ int call_switchdev_blocking_notifiers(unsigned long val, struct net_device *dev,
}
EXPORT_SYMBOL_GPL(call_switchdev_blocking_notifiers);
+struct switchdev_nested_priv {
+ bool (*check_cb)(const struct net_device *dev);
+ bool (*foreign_dev_check_cb)(const struct net_device *dev,
+ const struct net_device *foreign_dev);
+ const struct net_device *dev;
+ struct net_device *lower_dev;
+};
+
+static int switchdev_lower_dev_walk(struct net_device *lower_dev,
+ struct netdev_nested_priv *priv)
+{
+ struct switchdev_nested_priv *switchdev_priv = priv->data;
+ bool (*foreign_dev_check_cb)(const struct net_device *dev,
+ const struct net_device *foreign_dev);
+ bool (*check_cb)(const struct net_device *dev);
+ const struct net_device *dev;
+
+ check_cb = switchdev_priv->check_cb;
+ foreign_dev_check_cb = switchdev_priv->foreign_dev_check_cb;
+ dev = switchdev_priv->dev;
+
+ if (check_cb(lower_dev) && !foreign_dev_check_cb(lower_dev, dev)) {
+ switchdev_priv->lower_dev = lower_dev;
+ return 1;
+ }
+
+ return 0;
+}
+
+static struct net_device *
+switchdev_lower_dev_find(struct net_device *dev,
+ bool (*check_cb)(const struct net_device *dev),
+ bool (*foreign_dev_check_cb)(const struct net_device *dev,
+ const struct net_device *foreign_dev))
+{
+ struct switchdev_nested_priv switchdev_priv = {
+ .check_cb = check_cb,
+ .foreign_dev_check_cb = foreign_dev_check_cb,
+ .dev = dev,
+ .lower_dev = NULL,
+ };
+ struct netdev_nested_priv priv = {
+ .data = &switchdev_priv,
+ };
+
+ netdev_walk_all_lower_dev_rcu(dev, switchdev_lower_dev_walk, &priv);
+
+ return switchdev_priv.lower_dev;
+}
+
+static int __switchdev_handle_fdb_add_to_device(struct net_device *dev,
+ const struct net_device *orig_dev,
+ const struct switchdev_notifier_fdb_info *fdb_info,
+ bool (*check_cb)(const struct net_device *dev),
+ bool (*foreign_dev_check_cb)(const struct net_device *dev,
+ const struct net_device *foreign_dev),
+ int (*add_cb)(struct net_device *dev,
+ const struct net_device *orig_dev, const void *ctx,
+ const struct switchdev_notifier_fdb_info *fdb_info),
+ int (*lag_add_cb)(struct net_device *dev,
+ const struct net_device *orig_dev, const void *ctx,
+ const struct switchdev_notifier_fdb_info *fdb_info))
+{
+ const struct switchdev_notifier_info *info = &fdb_info->info;
+ struct net_device *br, *lower_dev;
+ struct list_head *iter;
+ int err = -EOPNOTSUPP;
+
+ if (check_cb(dev))
+ return add_cb(dev, orig_dev, info->ctx, fdb_info);
+
+ if (netif_is_lag_master(dev)) {
+ if (!switchdev_lower_dev_find(dev, check_cb, foreign_dev_check_cb))
+ goto maybe_bridged_with_us;
+
+ /* This is a LAG interface that we offload */
+ if (!lag_add_cb)
+ return -EOPNOTSUPP;
+
+ return lag_add_cb(dev, orig_dev, info->ctx, fdb_info);
+ }
+
+ /* Recurse through lower interfaces in case the FDB entry is pointing
+ * towards a bridge device.
+ */
+ if (netif_is_bridge_master(dev)) {
+ if (!switchdev_lower_dev_find(dev, check_cb, foreign_dev_check_cb))
+ return 0;
+
+ /* This is a bridge interface that we offload */
+ netdev_for_each_lower_dev(dev, lower_dev, iter) {
+ /* Do not propagate FDB entries across bridges */
+ if (netif_is_bridge_master(lower_dev))
+ continue;
+
+ /* Bridge ports might be either us, or LAG interfaces
+ * that we offload.
+ */
+ if (!check_cb(lower_dev) &&
+ !switchdev_lower_dev_find(lower_dev, check_cb,
+ foreign_dev_check_cb))
+ continue;
+
+ err = __switchdev_handle_fdb_add_to_device(lower_dev, orig_dev,
+ fdb_info, check_cb,
+ foreign_dev_check_cb,
+ add_cb, lag_add_cb);
+ if (err && err != -EOPNOTSUPP)
+ return err;
+ }
+
+ return 0;
+ }
+
+maybe_bridged_with_us:
+ /* Event is neither on a bridge nor a LAG. Check whether it is on an
+ * interface that is in a bridge with us.
+ */
+ br = netdev_master_upper_dev_get_rcu(dev);
+ if (!br || !netif_is_bridge_master(br))
+ return 0;
+
+ if (!switchdev_lower_dev_find(br, check_cb, foreign_dev_check_cb))
+ return 0;
+
+ return __switchdev_handle_fdb_add_to_device(br, orig_dev, fdb_info,
+ check_cb, foreign_dev_check_cb,
+ add_cb, lag_add_cb);
+}
+
+int switchdev_handle_fdb_add_to_device(struct net_device *dev,
+ const struct switchdev_notifier_fdb_info *fdb_info,
+ bool (*check_cb)(const struct net_device *dev),
+ bool (*foreign_dev_check_cb)(const struct net_device *dev,
+ const struct net_device *foreign_dev),
+ int (*add_cb)(struct net_device *dev,
+ const struct net_device *orig_dev, const void *ctx,
+ const struct switchdev_notifier_fdb_info *fdb_info),
+ int (*lag_add_cb)(struct net_device *dev,
+ const struct net_device *orig_dev, const void *ctx,
+ const struct switchdev_notifier_fdb_info *fdb_info))
+{
+ int err;
+
+ err = __switchdev_handle_fdb_add_to_device(dev, dev, fdb_info,
+ check_cb,
+ foreign_dev_check_cb,
+ add_cb, lag_add_cb);
+ if (err == -EOPNOTSUPP)
+ err = 0;
+
+ return err;
+}
+EXPORT_SYMBOL_GPL(switchdev_handle_fdb_add_to_device);
+
+static int __switchdev_handle_fdb_del_to_device(struct net_device *dev,
+ const struct net_device *orig_dev,
+ const struct switchdev_notifier_fdb_info *fdb_info,
+ bool (*check_cb)(const struct net_device *dev),
+ bool (*foreign_dev_check_cb)(const struct net_device *dev,
+ const struct net_device *foreign_dev),
+ int (*del_cb)(struct net_device *dev,
+ const struct net_device *orig_dev, const void *ctx,
+ const struct switchdev_notifier_fdb_info *fdb_info),
+ int (*lag_del_cb)(struct net_device *dev,
+ const struct net_device *orig_dev, const void *ctx,
+ const struct switchdev_notifier_fdb_info *fdb_info))
+{
+ const struct switchdev_notifier_info *info = &fdb_info->info;
+ struct net_device *br, *lower_dev;
+ struct list_head *iter;
+ int err = -EOPNOTSUPP;
+
+ if (check_cb(dev))
+ return del_cb(dev, orig_dev, info->ctx, fdb_info);
+
+ if (netif_is_lag_master(dev)) {
+ if (!switchdev_lower_dev_find(dev, check_cb, foreign_dev_check_cb))
+ goto maybe_bridged_with_us;
+
+ /* This is a LAG interface that we offload */
+ if (!lag_del_cb)
+ return -EOPNOTSUPP;
+
+ return lag_del_cb(dev, orig_dev, info->ctx, fdb_info);
+ }
+
+ /* Recurse through lower interfaces in case the FDB entry is pointing
+ * towards a bridge device.
+ */
+ if (netif_is_bridge_master(dev)) {
+ if (!switchdev_lower_dev_find(dev, check_cb, foreign_dev_check_cb))
+ return 0;
+
+ /* This is a bridge interface that we offload */
+ netdev_for_each_lower_dev(dev, lower_dev, iter) {
+ /* Do not propagate FDB entries across bridges */
+ if (netif_is_bridge_master(lower_dev))
+ continue;
+
+ /* Bridge ports might be either us, or LAG interfaces
+ * that we offload.
+ */
+ if (!check_cb(lower_dev) &&
+ !switchdev_lower_dev_find(lower_dev, check_cb,
+ foreign_dev_check_cb))
+ continue;
+
+ err = __switchdev_handle_fdb_del_to_device(lower_dev, orig_dev,
+ fdb_info, check_cb,
+ foreign_dev_check_cb,
+ del_cb, lag_del_cb);
+ if (err && err != -EOPNOTSUPP)
+ return err;
+ }
+
+ return 0;
+ }
+
+maybe_bridged_with_us:
+ /* Event is neither on a bridge nor a LAG. Check whether it is on an
+ * interface that is in a bridge with us.
+ */
+ br = netdev_master_upper_dev_get_rcu(dev);
+ if (!br || !netif_is_bridge_master(br))
+ return 0;
+
+ if (!switchdev_lower_dev_find(br, check_cb, foreign_dev_check_cb))
+ return 0;
+
+ return __switchdev_handle_fdb_del_to_device(br, orig_dev, fdb_info,
+ check_cb, foreign_dev_check_cb,
+ del_cb, lag_del_cb);
+}
+
+int switchdev_handle_fdb_del_to_device(struct net_device *dev,
+ const struct switchdev_notifier_fdb_info *fdb_info,
+ bool (*check_cb)(const struct net_device *dev),
+ bool (*foreign_dev_check_cb)(const struct net_device *dev,
+ const struct net_device *foreign_dev),
+ int (*del_cb)(struct net_device *dev,
+ const struct net_device *orig_dev, const void *ctx,
+ const struct switchdev_notifier_fdb_info *fdb_info),
+ int (*lag_del_cb)(struct net_device *dev,
+ const struct net_device *orig_dev, const void *ctx,
+ const struct switchdev_notifier_fdb_info *fdb_info))
+{
+ int err;
+
+ err = __switchdev_handle_fdb_del_to_device(dev, dev, fdb_info,
+ check_cb,
+ foreign_dev_check_cb,
+ del_cb, lag_del_cb);
+ if (err == -EOPNOTSUPP)
+ err = 0;
+
+ return err;
+}
+EXPORT_SYMBOL_GPL(switchdev_handle_fdb_del_to_device);
+
static int __switchdev_handle_port_obj_add(struct net_device *dev,
struct switchdev_notifier_port_obj_info *port_obj_info,
bool (*check_cb)(const struct net_device *dev),
- int (*add_cb)(struct net_device *dev,
+ int (*add_cb)(struct net_device *dev, const void *ctx,
const struct switchdev_obj *obj,
struct netlink_ext_ack *extack))
{
+ struct switchdev_notifier_info *info = &port_obj_info->info;
struct netlink_ext_ack *extack;
struct net_device *lower_dev;
struct list_head *iter;
int err = -EOPNOTSUPP;
- extack = switchdev_notifier_info_to_extack(&port_obj_info->info);
+ extack = switchdev_notifier_info_to_extack(info);
if (check_cb(dev)) {
- err = add_cb(dev, port_obj_info->obj, extack);
+ err = add_cb(dev, info->ctx, port_obj_info->obj, extack);
if (err != -EOPNOTSUPP)
port_obj_info->handled = true;
return err;
@@ -422,7 +683,7 @@ static int __switchdev_handle_port_obj_add(struct net_device *dev,
int switchdev_handle_port_obj_add(struct net_device *dev,
struct switchdev_notifier_port_obj_info *port_obj_info,
bool (*check_cb)(const struct net_device *dev),
- int (*add_cb)(struct net_device *dev,
+ int (*add_cb)(struct net_device *dev, const void *ctx,
const struct switchdev_obj *obj,
struct netlink_ext_ack *extack))
{
@@ -439,15 +700,16 @@ EXPORT_SYMBOL_GPL(switchdev_handle_port_obj_add);
static int __switchdev_handle_port_obj_del(struct net_device *dev,
struct switchdev_notifier_port_obj_info *port_obj_info,
bool (*check_cb)(const struct net_device *dev),
- int (*del_cb)(struct net_device *dev,
+ int (*del_cb)(struct net_device *dev, const void *ctx,
const struct switchdev_obj *obj))
{
+ struct switchdev_notifier_info *info = &port_obj_info->info;
struct net_device *lower_dev;
struct list_head *iter;
int err = -EOPNOTSUPP;
if (check_cb(dev)) {
- err = del_cb(dev, port_obj_info->obj);
+ err = del_cb(dev, info->ctx, port_obj_info->obj);
if (err != -EOPNOTSUPP)
port_obj_info->handled = true;
return err;
@@ -476,7 +738,7 @@ static int __switchdev_handle_port_obj_del(struct net_device *dev,
int switchdev_handle_port_obj_del(struct net_device *dev,
struct switchdev_notifier_port_obj_info *port_obj_info,
bool (*check_cb)(const struct net_device *dev),
- int (*del_cb)(struct net_device *dev,
+ int (*del_cb)(struct net_device *dev, const void *ctx,
const struct switchdev_obj *obj))
{
int err;
@@ -492,19 +754,20 @@ EXPORT_SYMBOL_GPL(switchdev_handle_port_obj_del);
static int __switchdev_handle_port_attr_set(struct net_device *dev,
struct switchdev_notifier_port_attr_info *port_attr_info,
bool (*check_cb)(const struct net_device *dev),
- int (*set_cb)(struct net_device *dev,
+ int (*set_cb)(struct net_device *dev, const void *ctx,
const struct switchdev_attr *attr,
struct netlink_ext_ack *extack))
{
+ struct switchdev_notifier_info *info = &port_attr_info->info;
struct netlink_ext_ack *extack;
struct net_device *lower_dev;
struct list_head *iter;
int err = -EOPNOTSUPP;
- extack = switchdev_notifier_info_to_extack(&port_attr_info->info);
+ extack = switchdev_notifier_info_to_extack(info);
if (check_cb(dev)) {
- err = set_cb(dev, port_attr_info->attr, extack);
+ err = set_cb(dev, info->ctx, port_attr_info->attr, extack);
if (err != -EOPNOTSUPP)
port_attr_info->handled = true;
return err;
@@ -533,7 +796,7 @@ static int __switchdev_handle_port_attr_set(struct net_device *dev,
int switchdev_handle_port_attr_set(struct net_device *dev,
struct switchdev_notifier_port_attr_info *port_attr_info,
bool (*check_cb)(const struct net_device *dev),
- int (*set_cb)(struct net_device *dev,
+ int (*set_cb)(struct net_device *dev, const void *ctx,
const struct switchdev_attr *attr,
struct netlink_ext_ack *extack))
{
@@ -546,3 +809,51 @@ int switchdev_handle_port_attr_set(struct net_device *dev,
return err;
}
EXPORT_SYMBOL_GPL(switchdev_handle_port_attr_set);
+
+int switchdev_bridge_port_offload(struct net_device *brport_dev,
+ struct net_device *dev, const void *ctx,
+ struct notifier_block *atomic_nb,
+ struct notifier_block *blocking_nb,
+ bool tx_fwd_offload,
+ struct netlink_ext_ack *extack)
+{
+ struct switchdev_notifier_brport_info brport_info = {
+ .brport = {
+ .dev = dev,
+ .ctx = ctx,
+ .atomic_nb = atomic_nb,
+ .blocking_nb = blocking_nb,
+ .tx_fwd_offload = tx_fwd_offload,
+ },
+ };
+ int err;
+
+ ASSERT_RTNL();
+
+ err = call_switchdev_blocking_notifiers(SWITCHDEV_BRPORT_OFFLOADED,
+ brport_dev, &brport_info.info,
+ extack);
+ return notifier_to_errno(err);
+}
+EXPORT_SYMBOL_GPL(switchdev_bridge_port_offload);
+
+void switchdev_bridge_port_unoffload(struct net_device *brport_dev,
+ const void *ctx,
+ struct notifier_block *atomic_nb,
+ struct notifier_block *blocking_nb)
+{
+ struct switchdev_notifier_brport_info brport_info = {
+ .brport = {
+ .ctx = ctx,
+ .atomic_nb = atomic_nb,
+ .blocking_nb = blocking_nb,
+ },
+ };
+
+ ASSERT_RTNL();
+
+ call_switchdev_blocking_notifiers(SWITCHDEV_BRPORT_UNOFFLOADED,
+ brport_dev, &brport_info.info,
+ NULL);
+}
+EXPORT_SYMBOL_GPL(switchdev_bridge_port_unoffload);
diff --git a/net/tipc/bcast.c b/net/tipc/bcast.c
index d4beca895992..593846d25214 100644
--- a/net/tipc/bcast.c
+++ b/net/tipc/bcast.c
@@ -699,7 +699,7 @@ int tipc_bcast_init(struct net *net)
spin_lock_init(&tipc_net(net)->bclock);
if (!tipc_link_bc_create(net, 0, 0, NULL,
- FB_MTU,
+ one_page_mtu,
BCLINK_WIN_DEFAULT,
BCLINK_WIN_DEFAULT,
0,
diff --git a/net/tipc/crypto.c b/net/tipc/crypto.c
index e5c43d4d5a75..c9391d38de85 100644
--- a/net/tipc/crypto.c
+++ b/net/tipc/crypto.c
@@ -898,16 +898,10 @@ static int tipc_aead_decrypt(struct net *net, struct tipc_aead *aead,
if (unlikely(!aead))
return -ENOKEY;
- /* Cow skb data if needed */
- if (likely(!skb_cloned(skb) &&
- (!skb_is_nonlinear(skb) || !skb_has_frag_list(skb)))) {
- nsg = 1 + skb_shinfo(skb)->nr_frags;
- } else {
- nsg = skb_cow_data(skb, 0, &unused);
- if (unlikely(nsg < 0)) {
- pr_err("RX: skb_cow_data() returned %d\n", nsg);
- return nsg;
- }
+ nsg = skb_cow_data(skb, 0, &unused);
+ if (unlikely(nsg < 0)) {
+ pr_err("RX: skb_cow_data() returned %d\n", nsg);
+ return nsg;
}
/* Allocate memory for the AEAD operation */
diff --git a/net/tipc/link.c b/net/tipc/link.c
index c44b4bfaaee6..1b7a487c8841 100644
--- a/net/tipc/link.c
+++ b/net/tipc/link.c
@@ -654,6 +654,7 @@ int tipc_link_fsm_evt(struct tipc_link *l, int evt)
break;
case LINK_FAILOVER_BEGIN_EVT:
l->state = LINK_FAILINGOVER;
+ break;
case LINK_FAILURE_EVT:
case LINK_RESET_EVT:
case LINK_ESTABLISH_EVT:
diff --git a/net/tipc/msg.c b/net/tipc/msg.c
index ce6ab54822d8..5c9fd4791c4b 100644
--- a/net/tipc/msg.c
+++ b/net/tipc/msg.c
@@ -41,19 +41,18 @@
#include "name_table.h"
#include "crypto.h"
+#define BUF_ALIGN(x) ALIGN(x, 4)
#define MAX_FORWARD_SIZE 1024
#ifdef CONFIG_TIPC_CRYPTO
#define BUF_HEADROOM ALIGN(((LL_MAX_HEADER + 48) + EHDR_MAX_SIZE), 16)
-#define BUF_TAILROOM (TIPC_AES_GCM_TAG_SIZE)
+#define BUF_OVERHEAD (BUF_HEADROOM + TIPC_AES_GCM_TAG_SIZE)
#else
#define BUF_HEADROOM (LL_MAX_HEADER + 48)
-#define BUF_TAILROOM 16
+#define BUF_OVERHEAD BUF_HEADROOM
#endif
-static unsigned int align(unsigned int i)
-{
- return (i + 3) & ~3u;
-}
+const int one_page_mtu = PAGE_SIZE - SKB_DATA_ALIGN(BUF_OVERHEAD) -
+ SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
/**
* tipc_buf_acquire - creates a TIPC message buffer
@@ -69,13 +68,8 @@ static unsigned int align(unsigned int i)
struct sk_buff *tipc_buf_acquire(u32 size, gfp_t gfp)
{
struct sk_buff *skb;
-#ifdef CONFIG_TIPC_CRYPTO
- unsigned int buf_size = (BUF_HEADROOM + size + BUF_TAILROOM + 3) & ~3u;
-#else
- unsigned int buf_size = (BUF_HEADROOM + size + 3) & ~3u;
-#endif
- skb = alloc_skb_fclone(buf_size, gfp);
+ skb = alloc_skb_fclone(BUF_OVERHEAD + size, gfp);
if (skb) {
skb_reserve(skb, BUF_HEADROOM);
skb_put(skb, size);
@@ -395,7 +389,8 @@ int tipc_msg_build(struct tipc_msg *mhdr, struct msghdr *m, int offset,
if (unlikely(!skb)) {
if (pktmax != MAX_MSG_SIZE)
return -ENOMEM;
- rc = tipc_msg_build(mhdr, m, offset, dsz, FB_MTU, list);
+ rc = tipc_msg_build(mhdr, m, offset, dsz,
+ one_page_mtu, list);
if (rc != dsz)
return rc;
if (tipc_msg_assemble(list))
@@ -490,7 +485,7 @@ static bool tipc_msg_bundle(struct sk_buff *bskb, struct tipc_msg *msg,
msz = msg_size(msg);
bsz = msg_size(bmsg);
- offset = align(bsz);
+ offset = BUF_ALIGN(bsz);
pad = offset - bsz;
if (unlikely(skb_tailroom(bskb) < (pad + msz)))
@@ -547,7 +542,7 @@ bool tipc_msg_try_bundle(struct sk_buff *tskb, struct sk_buff **skb, u32 mss,
/* Make a new bundle of the two messages if possible */
tsz = msg_size(buf_msg(tskb));
- if (unlikely(mss < align(INT_H_SIZE + tsz) + msg_size(msg)))
+ if (unlikely(mss < BUF_ALIGN(INT_H_SIZE + tsz) + msg_size(msg)))
return true;
if (unlikely(pskb_expand_head(tskb, INT_H_SIZE, mss - tsz - INT_H_SIZE,
GFP_ATOMIC)))
@@ -606,7 +601,7 @@ bool tipc_msg_extract(struct sk_buff *skb, struct sk_buff **iskb, int *pos)
if (unlikely(!tipc_msg_validate(iskb)))
goto none;
- *pos += align(imsz);
+ *pos += BUF_ALIGN(imsz);
return true;
none:
kfree_skb(skb);
diff --git a/net/tipc/msg.h b/net/tipc/msg.h
index 5d64596ba987..64ae4c4c44f8 100644
--- a/net/tipc/msg.h
+++ b/net/tipc/msg.h
@@ -99,9 +99,10 @@ struct plist;
#define MAX_H_SIZE 60 /* Largest possible TIPC header size */
#define MAX_MSG_SIZE (MAX_H_SIZE + TIPC_MAX_USER_MSG_SIZE)
-#define FB_MTU 3744
#define TIPC_MEDIA_INFO_OFFSET 5
+extern const int one_page_mtu;
+
struct tipc_skb_cb {
union {
struct {
diff --git a/net/tipc/name_table.c b/net/tipc/name_table.c
index fecab516bf41..01396dd1c899 100644
--- a/net/tipc/name_table.c
+++ b/net/tipc/name_table.c
@@ -673,12 +673,12 @@ exit:
* Returns a list of local sockets
*/
void tipc_nametbl_lookup_mcast_sockets(struct net *net, struct tipc_uaddr *ua,
- bool exact, struct list_head *dports)
+ struct list_head *dports)
{
struct service_range *sr;
struct tipc_service *sc;
struct publication *p;
- u32 scope = ua->scope;
+ u8 scope = ua->scope;
rcu_read_lock();
sc = tipc_service_find(net, ua);
@@ -688,7 +688,7 @@ void tipc_nametbl_lookup_mcast_sockets(struct net *net, struct tipc_uaddr *ua,
spin_lock_bh(&sc->lock);
service_range_foreach_match(sr, sc, ua->sr.lower, ua->sr.upper) {
list_for_each_entry(p, &sr->local_publ, local_publ) {
- if (p->scope == scope || (!exact && p->scope < scope))
+ if (scope == p->scope || scope == TIPC_ANY_SCOPE)
tipc_dest_push(dports, 0, p->sk.ref);
}
}
diff --git a/net/tipc/name_table.h b/net/tipc/name_table.h
index c7c9a3ddd420..259f95e3d99c 100644
--- a/net/tipc/name_table.h
+++ b/net/tipc/name_table.h
@@ -51,6 +51,8 @@ struct tipc_uaddr;
#define TIPC_PUBL_SCOPE_NUM (TIPC_NODE_SCOPE + 1)
#define TIPC_NAMETBL_SIZE 1024 /* must be a power of 2 */
+#define TIPC_ANY_SCOPE 10 /* Both node and cluster scope will match */
+
/**
* struct publication - info about a published service address or range
* @sr: service range represented by this publication
@@ -113,7 +115,7 @@ int tipc_nl_name_table_dump(struct sk_buff *skb, struct netlink_callback *cb);
bool tipc_nametbl_lookup_anycast(struct net *net, struct tipc_uaddr *ua,
struct tipc_socket_addr *sk);
void tipc_nametbl_lookup_mcast_sockets(struct net *net, struct tipc_uaddr *ua,
- bool exact, struct list_head *dports);
+ struct list_head *dports);
void tipc_nametbl_lookup_mcast_nodes(struct net *net, struct tipc_uaddr *ua,
struct tipc_nlist *nodes);
bool tipc_nametbl_lookup_group(struct net *net, struct tipc_uaddr *ua,
diff --git a/net/tipc/node.c b/net/tipc/node.c
index 81af92954c6c..9947b7dfe1d2 100644
--- a/net/tipc/node.c
+++ b/net/tipc/node.c
@@ -1214,7 +1214,7 @@ void tipc_node_check_dest(struct net *net, u32 addr,
/* Peer has changed i/f address without rebooting.
* If so, the link will reset soon, and the next
* discovery will be accepted. So we can ignore it.
- * It may also be an cloned or malicious peer having
+ * It may also be a cloned or malicious peer having
* chosen the same node address and signature as an
* existing one.
* Ignore requests until the link goes down, if ever.
diff --git a/net/tipc/socket.c b/net/tipc/socket.c
index 53af72824c9c..a0a27d87f631 100644
--- a/net/tipc/socket.c
+++ b/net/tipc/socket.c
@@ -73,9 +73,6 @@ struct sockaddr_pair {
/**
* struct tipc_sock - TIPC socket structure
* @sk: socket - interacts with 'port' and with user via the socket API
- * @conn_type: TIPC type used when connection was established
- * @conn_instance: TIPC instance used when connection was established
- * @published: non-zero if port has one or more associated names
* @max_pkt: maximum packet size "hint" used when building messages sent by port
* @maxnagle: maximum size of msg which can be subject to nagle
* @portid: unique port identity in TIPC socket hash table
@@ -106,11 +103,11 @@ struct sockaddr_pair {
* @expect_ack: whether this TIPC socket is expecting an ack
* @nodelay: setsockopt() TIPC_NODELAY setting
* @group_is_open: TIPC socket group is fully open (FIXME)
+ * @published: true if port has one or more associated names
+ * @conn_addrtype: address type used when establishing connection
*/
struct tipc_sock {
struct sock sk;
- u32 conn_type;
- u32 conn_instance;
u32 max_pkt;
u32 maxnagle;
u32 portid;
@@ -141,6 +138,7 @@ struct tipc_sock {
bool nodelay;
bool group_is_open;
bool published;
+ u8 conn_addrtype;
};
static int tipc_sk_backlog_rcv(struct sock *sk, struct sk_buff *skb);
@@ -160,6 +158,7 @@ static void tipc_sk_remove(struct tipc_sock *tsk);
static int __tipc_sendstream(struct socket *sock, struct msghdr *m, size_t dsz);
static int __tipc_sendmsg(struct socket *sock, struct msghdr *m, size_t dsz);
static void tipc_sk_push_backlog(struct tipc_sock *tsk, bool nagle_ack);
+static int tipc_wait_for_connect(struct socket *sock, long *timeo_p);
static const struct proto_ops packet_ops;
static const struct proto_ops stream_ops;
@@ -664,7 +663,7 @@ static int tipc_release(struct socket *sock)
* @skaddr: socket address describing name(s) and desired operation
* @alen: size of socket address data structure
*
- * Name and name sequence binding is indicated using a positive scope value;
+ * Name and name sequence binding are indicated using a positive scope value;
* a negative scope value unbinds the specified name. Specifying no name
* (i.e. a socket address length of 0) unbinds all names from the socket.
*
@@ -1202,12 +1201,12 @@ void tipc_sk_mcast_rcv(struct net *net, struct sk_buff_head *arrvq,
struct tipc_msg *hdr;
struct tipc_uaddr ua;
int user, mtyp, hlen;
- bool exact;
__skb_queue_head_init(&tmpq);
INIT_LIST_HEAD(&dports);
ua.addrtype = TIPC_SERVICE_RANGE;
+ /* tipc_skb_peek() increments the head skb's reference counter */
skb = tipc_skb_peek(arrvq, &inputq->lock);
for (; skb; skb = tipc_skb_peek(arrvq, &inputq->lock)) {
hdr = buf_msg(skb);
@@ -1216,6 +1215,12 @@ void tipc_sk_mcast_rcv(struct net *net, struct sk_buff_head *arrvq,
hlen = skb_headroom(skb) + msg_hdr_sz(hdr);
onode = msg_orignode(hdr);
ua.sr.type = msg_nametype(hdr);
+ ua.sr.lower = msg_namelower(hdr);
+ ua.sr.upper = msg_nameupper(hdr);
+ if (onode == self)
+ ua.scope = TIPC_ANY_SCOPE;
+ else
+ ua.scope = TIPC_CLUSTER_SCOPE;
if (mtyp == TIPC_GRP_UCAST_MSG || user == GROUP_PROTOCOL) {
spin_lock_bh(&inputq->lock);
@@ -1233,20 +1238,10 @@ void tipc_sk_mcast_rcv(struct net *net, struct sk_buff_head *arrvq,
ua.sr.lower = 0;
ua.sr.upper = ~0;
ua.scope = msg_lookup_scope(hdr);
- exact = true;
- } else {
- /* TIPC_NODE_SCOPE means "any scope" in this context */
- if (onode == self)
- ua.scope = TIPC_NODE_SCOPE;
- else
- ua.scope = TIPC_CLUSTER_SCOPE;
- exact = false;
- ua.sr.lower = msg_namelower(hdr);
- ua.sr.upper = msg_nameupper(hdr);
}
/* Create destination port list: */
- tipc_nametbl_lookup_mcast_sockets(net, &ua, exact, &dports);
+ tipc_nametbl_lookup_mcast_sockets(net, &ua, &dports);
/* Clone message per destination */
while (tipc_dest_pop(&dports, NULL, &portid)) {
@@ -1258,13 +1253,11 @@ void tipc_sk_mcast_rcv(struct net *net, struct sk_buff_head *arrvq,
}
pr_warn("Failed to clone mcast rcv buffer\n");
}
- /* Append to inputq if not already done by other thread */
+ /* Append clones to inputq only if skb is still head of arrvq */
spin_lock_bh(&inputq->lock);
if (skb_peek(arrvq) == skb) {
skb_queue_splice_tail_init(&tmpq, inputq);
- /* Decrease the skb's refcnt as increasing in the
- * function tipc_skb_peek
- */
+ /* Decrement the skb's refcnt */
kfree_skb(__skb_dequeue(arrvq));
}
spin_unlock_bh(&inputq->lock);
@@ -1433,7 +1426,7 @@ static int __tipc_sendmsg(struct socket *sock, struct msghdr *m, size_t dlen)
if (ua) {
if (!tipc_uaddr_valid(ua, m->msg_namelen))
return -EINVAL;
- atype = ua->addrtype;
+ atype = ua->addrtype;
}
/* If socket belongs to a communication group follow other paths */
@@ -1463,10 +1456,8 @@ static int __tipc_sendmsg(struct socket *sock, struct msghdr *m, size_t dlen)
return -EISCONN;
if (tsk->published)
return -EOPNOTSUPP;
- if (atype == TIPC_SERVICE_ADDR) {
- tsk->conn_type = ua->sa.type;
- tsk->conn_instance = ua->sa.instance;
- }
+ if (atype == TIPC_SERVICE_ADDR)
+ tsk->conn_addrtype = atype;
msg_set_syn(hdr, 1);
}
@@ -1525,8 +1516,13 @@ static int __tipc_sendmsg(struct socket *sock, struct msghdr *m, size_t dlen)
rc = 0;
}
- if (unlikely(syn && !rc))
+ if (unlikely(syn && !rc)) {
tipc_set_sk_state(sk, TIPC_CONNECTING);
+ if (dlen && timeout) {
+ timeout = msecs_to_jiffies(timeout);
+ tipc_wait_for_connect(sock, &timeout);
+ }
+ }
return rc ? rc : dlen;
}
@@ -1574,7 +1570,7 @@ static int __tipc_sendstream(struct socket *sock, struct msghdr *m, size_t dlen)
return -EMSGSIZE;
/* Handle implicit connection setup */
- if (unlikely(dest)) {
+ if (unlikely(dest && sk->sk_state == TIPC_OPEN)) {
rc = __tipc_sendmsg(sock, m, dlen);
if (dlen && dlen == rc) {
tsk->peer_caps = tipc_node_get_capabilities(net, dnode);
@@ -1737,67 +1733,58 @@ static void tipc_sk_set_orig_addr(struct msghdr *m, struct sk_buff *skb)
static int tipc_sk_anc_data_recv(struct msghdr *m, struct sk_buff *skb,
struct tipc_sock *tsk)
{
- struct tipc_msg *msg;
- u32 anc_data[3];
- u32 err;
- u32 dest_type;
- int has_name;
- int res;
+ struct tipc_msg *hdr;
+ u32 data[3] = {0,};
+ bool has_addr;
+ int dlen, rc;
if (likely(m->msg_controllen == 0))
return 0;
- msg = buf_msg(skb);
- /* Optionally capture errored message object(s) */
- err = msg ? msg_errcode(msg) : 0;
- if (unlikely(err)) {
- anc_data[0] = err;
- anc_data[1] = msg_data_sz(msg);
- res = put_cmsg(m, SOL_TIPC, TIPC_ERRINFO, 8, anc_data);
- if (res)
- return res;
- if (anc_data[1]) {
- if (skb_linearize(skb))
- return -ENOMEM;
- msg = buf_msg(skb);
- res = put_cmsg(m, SOL_TIPC, TIPC_RETDATA, anc_data[1],
- msg_data(msg));
- if (res)
- return res;
- }
+ hdr = buf_msg(skb);
+ dlen = msg_data_sz(hdr);
+
+ /* Capture errored message object, if any */
+ if (msg_errcode(hdr)) {
+ if (skb_linearize(skb))
+ return -ENOMEM;
+ hdr = buf_msg(skb);
+ data[0] = msg_errcode(hdr);
+ data[1] = dlen;
+ rc = put_cmsg(m, SOL_TIPC, TIPC_ERRINFO, 8, data);
+ if (rc || !dlen)
+ return rc;
+ rc = put_cmsg(m, SOL_TIPC, TIPC_RETDATA, dlen, msg_data(hdr));
+ if (rc)
+ return rc;
}
- /* Optionally capture message destination object */
- dest_type = msg ? msg_type(msg) : TIPC_DIRECT_MSG;
- switch (dest_type) {
+ /* Capture TIPC_SERVICE_ADDR/RANGE destination address, if any */
+ switch (msg_type(hdr)) {
case TIPC_NAMED_MSG:
- has_name = 1;
- anc_data[0] = msg_nametype(msg);
- anc_data[1] = msg_namelower(msg);
- anc_data[2] = msg_namelower(msg);
+ has_addr = true;
+ data[0] = msg_nametype(hdr);
+ data[1] = msg_namelower(hdr);
+ data[2] = data[1];
break;
case TIPC_MCAST_MSG:
- has_name = 1;
- anc_data[0] = msg_nametype(msg);
- anc_data[1] = msg_namelower(msg);
- anc_data[2] = msg_nameupper(msg);
+ has_addr = true;
+ data[0] = msg_nametype(hdr);
+ data[1] = msg_namelower(hdr);
+ data[2] = msg_nameupper(hdr);
break;
case TIPC_CONN_MSG:
- has_name = (tsk->conn_type != 0);
- anc_data[0] = tsk->conn_type;
- anc_data[1] = tsk->conn_instance;
- anc_data[2] = tsk->conn_instance;
+ has_addr = !!tsk->conn_addrtype;
+ data[0] = msg_nametype(&tsk->phdr);
+ data[1] = msg_nameinst(&tsk->phdr);
+ data[2] = data[1];
break;
default:
- has_name = 0;
- }
- if (has_name) {
- res = put_cmsg(m, SOL_TIPC, TIPC_DESTNAME, 12, anc_data);
- if (res)
- return res;
+ has_addr = false;
}
-
- return 0;
+ if (!has_addr)
+ return 0;
+ return put_cmsg(m, SOL_TIPC, TIPC_DESTNAME, 12, data);
}
static struct sk_buff *tipc_sk_build_ack(struct tipc_sock *tsk)
@@ -1899,6 +1886,7 @@ static int tipc_recvmsg(struct socket *sock, struct msghdr *m,
bool connected = !tipc_sk_type_connectionless(sk);
struct tipc_sock *tsk = tipc_sk(sk);
int rc, err, hlen, dlen, copy;
+ struct tipc_skb_cb *skb_cb;
struct sk_buff_head xmitq;
struct tipc_msg *hdr;
struct sk_buff *skb;
@@ -1922,6 +1910,7 @@ static int tipc_recvmsg(struct socket *sock, struct msghdr *m,
if (unlikely(rc))
goto exit;
skb = skb_peek(&sk->sk_receive_queue);
+ skb_cb = TIPC_SKB_CB(skb);
hdr = buf_msg(skb);
dlen = msg_data_sz(hdr);
hlen = msg_hdr_sz(hdr);
@@ -1941,18 +1930,33 @@ static int tipc_recvmsg(struct socket *sock, struct msghdr *m,
/* Capture data if non-error msg, otherwise just set return value */
if (likely(!err)) {
- copy = min_t(int, dlen, buflen);
- if (unlikely(copy != dlen))
- m->msg_flags |= MSG_TRUNC;
- rc = skb_copy_datagram_msg(skb, hlen, m, copy);
+ int offset = skb_cb->bytes_read;
+
+ copy = min_t(int, dlen - offset, buflen);
+ rc = skb_copy_datagram_msg(skb, hlen + offset, m, copy);
+ if (unlikely(rc))
+ goto exit;
+ if (unlikely(offset + copy < dlen)) {
+ if (flags & MSG_EOR) {
+ if (!(flags & MSG_PEEK))
+ skb_cb->bytes_read = offset + copy;
+ } else {
+ m->msg_flags |= MSG_TRUNC;
+ skb_cb->bytes_read = 0;
+ }
+ } else {
+ if (flags & MSG_EOR)
+ m->msg_flags |= MSG_EOR;
+ skb_cb->bytes_read = 0;
+ }
} else {
copy = 0;
rc = 0;
- if (err != TIPC_CONN_SHUTDOWN && connected && !m->msg_control)
+ if (err != TIPC_CONN_SHUTDOWN && connected && !m->msg_control) {
rc = -ECONNRESET;
+ goto exit;
+ }
}
- if (unlikely(rc))
- goto exit;
/* Mark message as group event if applicable */
if (unlikely(grp_evt)) {
@@ -1975,6 +1979,9 @@ static int tipc_recvmsg(struct socket *sock, struct msghdr *m,
tipc_node_distr_xmit(sock_net(sk), &xmitq);
}
+ if (skb_cb->bytes_read)
+ goto exit;
+
tsk_advance_rx_queue(sk);
if (likely(!connected))
@@ -2665,7 +2672,7 @@ static int tipc_listen(struct socket *sock, int len)
static int tipc_wait_for_accept(struct socket *sock, long timeo)
{
struct sock *sk = sock->sk;
- DEFINE_WAIT(wait);
+ DEFINE_WAIT_FUNC(wait, woken_wake_function);
int err;
/* True wake-one mechanism for incoming connections: only
@@ -2674,12 +2681,12 @@ static int tipc_wait_for_accept(struct socket *sock, long timeo)
* anymore, the common case will execute the loop only once.
*/
for (;;) {
- prepare_to_wait_exclusive(sk_sleep(sk), &wait,
- TASK_INTERRUPTIBLE);
if (timeo && skb_queue_empty(&sk->sk_receive_queue)) {
+ add_wait_queue(sk_sleep(sk), &wait);
release_sock(sk);
- timeo = schedule_timeout(timeo);
+ timeo = wait_woken(&wait, TASK_INTERRUPTIBLE, timeo);
lock_sock(sk);
+ remove_wait_queue(sk_sleep(sk), &wait);
}
err = 0;
if (!skb_queue_empty(&sk->sk_receive_queue))
@@ -2691,7 +2698,6 @@ static int tipc_wait_for_accept(struct socket *sock, long timeo)
if (signal_pending(current))
break;
}
- finish_wait(sk_sleep(sk), &wait);
return err;
}
@@ -2708,9 +2714,10 @@ static int tipc_accept(struct socket *sock, struct socket *new_sock, int flags,
bool kern)
{
struct sock *new_sk, *sk = sock->sk;
- struct sk_buff *buf;
struct tipc_sock *new_tsock;
+ struct msghdr m = {NULL,};
struct tipc_msg *msg;
+ struct sk_buff *buf;
long timeo;
int res;
@@ -2750,24 +2757,23 @@ static int tipc_accept(struct socket *sock, struct socket *new_sock, int flags,
tsk_set_importance(new_sk, msg_importance(msg));
if (msg_named(msg)) {
- new_tsock->conn_type = msg_nametype(msg);
- new_tsock->conn_instance = msg_nameinst(msg);
+ new_tsock->conn_addrtype = TIPC_SERVICE_ADDR;
+ msg_set_nametype(&new_tsock->phdr, msg_nametype(msg));
+ msg_set_nameinst(&new_tsock->phdr, msg_nameinst(msg));
}
/*
- * Respond to 'SYN-' by discarding it & returning 'ACK'-.
- * Respond to 'SYN+' by queuing it on new socket.
+ * Respond to 'SYN-' by discarding it & returning 'ACK'.
+ * Respond to 'SYN+' by queuing it on new socket & returning 'ACK'.
*/
if (!msg_data_sz(msg)) {
- struct msghdr m = {NULL,};
-
tsk_advance_rx_queue(sk);
- __tipc_sendstream(new_sock, &m, 0);
} else {
__skb_dequeue(&sk->sk_receive_queue);
__skb_queue_head(&new_sk->sk_receive_queue, buf);
skb_set_owner_r(buf, new_sk);
}
+ __tipc_sendstream(new_sock, &m, 0);
release_sock(new_sk);
exit:
release_sock(sk);
@@ -3455,13 +3461,14 @@ void tipc_socket_stop(void)
/* Caller should hold socket lock for the passed tipc socket. */
static int __tipc_nl_add_sk_con(struct sk_buff *skb, struct tipc_sock *tsk)
{
- u32 peer_node;
- u32 peer_port;
+ u32 peer_node, peer_port;
+ u32 conn_type, conn_instance;
struct nlattr *nest;
peer_node = tsk_peer_node(tsk);
peer_port = tsk_peer_port(tsk);
-
+ conn_type = msg_nametype(&tsk->phdr);
+ conn_instance = msg_nameinst(&tsk->phdr);
nest = nla_nest_start_noflag(skb, TIPC_NLA_SOCK_CON);
if (!nest)
return -EMSGSIZE;
@@ -3471,12 +3478,12 @@ static int __tipc_nl_add_sk_con(struct sk_buff *skb, struct tipc_sock *tsk)
if (nla_put_u32(skb, TIPC_NLA_CON_SOCK, peer_port))
goto msg_full;
- if (tsk->conn_type != 0) {
+ if (tsk->conn_addrtype != 0) {
if (nla_put_flag(skb, TIPC_NLA_CON_FLAG))
goto msg_full;
- if (nla_put_u32(skb, TIPC_NLA_CON_TYPE, tsk->conn_type))
+ if (nla_put_u32(skb, TIPC_NLA_CON_TYPE, conn_type))
goto msg_full;
- if (nla_put_u32(skb, TIPC_NLA_CON_INST, tsk->conn_instance))
+ if (nla_put_u32(skb, TIPC_NLA_CON_INST, conn_instance))
goto msg_full;
}
nla_nest_end(skb, nest);
@@ -3866,9 +3873,9 @@ bool tipc_sk_filtering(struct sock *sk)
}
if (!tipc_sk_type_connectionless(sk)) {
- type = tsk->conn_type;
- lower = tsk->conn_instance;
- upper = tsk->conn_instance;
+ type = msg_nametype(&tsk->phdr);
+ lower = msg_nameinst(&tsk->phdr);
+ upper = lower;
}
if ((_type && _type != type) || (_lower && _lower != lower) ||
@@ -3933,6 +3940,7 @@ int tipc_sk_dump(struct sock *sk, u16 dqueues, char *buf)
{
int i = 0;
size_t sz = (dqueues) ? SK_LMAX : SK_LMIN;
+ u32 conn_type, conn_instance;
struct tipc_sock *tsk;
struct publication *p;
bool tsk_connected;
@@ -3953,8 +3961,10 @@ int tipc_sk_dump(struct sock *sk, u16 dqueues, char *buf)
if (tsk_connected) {
i += scnprintf(buf + i, sz - i, " %x", tsk_peer_node(tsk));
i += scnprintf(buf + i, sz - i, " %u", tsk_peer_port(tsk));
- i += scnprintf(buf + i, sz - i, " %u", tsk->conn_type);
- i += scnprintf(buf + i, sz - i, " %u", tsk->conn_instance);
+ conn_type = msg_nametype(&tsk->phdr);
+ conn_instance = msg_nameinst(&tsk->phdr);
+ i += scnprintf(buf + i, sz - i, " %u", conn_type);
+ i += scnprintf(buf + i, sz - i, " %u", conn_instance);
}
i += scnprintf(buf + i, sz - i, " | %u", tsk->published);
if (tsk->published) {
diff --git a/net/tipc/subscr.c b/net/tipc/subscr.c
index 8e00d739f03a..05d49ad81290 100644
--- a/net/tipc/subscr.c
+++ b/net/tipc/subscr.c
@@ -66,7 +66,7 @@ static void tipc_sub_send_event(struct tipc_subscription *sub,
/**
* tipc_sub_check_overlap - test for subscription overlap with the given values
* @subscribed: the service range subscribed for
- * @found: the service range we are checning for match
+ * @found: the service range we are checking for match
*
* Returns true if there is overlap, otherwise false.
*/
diff --git a/net/tls/tls_device.c b/net/tls/tls_device.c
index bd9f1567aa39..b932469ee69c 100644
--- a/net/tls/tls_device.c
+++ b/net/tls/tls_device.c
@@ -128,7 +128,7 @@ static void destroy_record(struct tls_record_info *record)
int i;
for (i = 0; i < record->num_frags; i++)
- __skb_frag_unref(&record->frags[i]);
+ __skb_frag_unref(&record->frags[i], false);
kfree(record);
}
diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c
index 694de024d0ee..4feb95e34b64 100644
--- a/net/tls/tls_sw.c
+++ b/net/tls/tls_sw.c
@@ -1153,7 +1153,7 @@ static int tls_sw_do_sendpage(struct sock *sk, struct page *page,
int ret = 0;
bool eor;
- eor = !(flags & (MSG_MORE | MSG_SENDPAGE_NOTLAST));
+ eor = !(flags & MSG_SENDPAGE_NOTLAST);
sk_clear_bit(SOCKWQ_ASYNC_NOSPACE, sk);
/* Call the sk_stream functions to manage the sndbuf mem. */
@@ -2019,8 +2019,7 @@ ssize_t tls_sw_splice_read(struct socket *sock, loff_t *ppos,
if (copied < 0)
goto splice_read_end;
- if (likely(!(flags & MSG_PEEK)))
- tls_sw_advance_skb(sk, skb, copied);
+ tls_sw_advance_skb(sk, skb, copied);
splice_read_end:
release_sock(sk);
diff --git a/net/unix/Kconfig b/net/unix/Kconfig
index b6c4282899ec..b7f811216820 100644
--- a/net/unix/Kconfig
+++ b/net/unix/Kconfig
@@ -25,6 +25,11 @@ config UNIX_SCM
depends on UNIX
default y
+config AF_UNIX_OOB
+ bool
+ depends on UNIX
+ default y
+
config UNIX_DIAG
tristate "UNIX: socket monitoring interface"
depends on UNIX
diff --git a/net/unix/Makefile b/net/unix/Makefile
index 54e58cc4f945..20491825b4d0 100644
--- a/net/unix/Makefile
+++ b/net/unix/Makefile
@@ -7,6 +7,7 @@ obj-$(CONFIG_UNIX) += unix.o
unix-y := af_unix.o garbage.o
unix-$(CONFIG_SYSCTL) += sysctl_net_unix.o
+unix-$(CONFIG_BPF_SYSCALL) += unix_bpf.o
obj-$(CONFIG_UNIX_DIAG) += unix_diag.o
unix_diag-y := diag.o
diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
index 5d1192ceb139..eb47b9de2380 100644
--- a/net/unix/af_unix.c
+++ b/net/unix/af_unix.c
@@ -113,6 +113,7 @@
#include <linux/security.h>
#include <linux/freezer.h>
#include <linux/file.h>
+#include <linux/btf_ids.h>
#include "scm.h"
@@ -262,6 +263,14 @@ static void __unix_insert_socket(struct hlist_head *list, struct sock *sk)
sk_add_node(sk, list);
}
+static void __unix_set_addr(struct sock *sk, struct unix_address *addr,
+ unsigned hash)
+{
+ __unix_remove_socket(sk);
+ smp_store_release(&unix_sk(sk)->addr, addr);
+ __unix_insert_socket(&unix_socket_table[hash], sk);
+}
+
static inline void unix_remove_socket(struct sock *sk)
{
spin_lock(&unix_table_lock);
@@ -278,11 +287,11 @@ static inline void unix_insert_socket(struct hlist_head *list, struct sock *sk)
static struct sock *__unix_find_socket_byname(struct net *net,
struct sockaddr_un *sunname,
- int len, int type, unsigned int hash)
+ int len, unsigned int hash)
{
struct sock *s;
- sk_for_each(s, &unix_socket_table[hash ^ type]) {
+ sk_for_each(s, &unix_socket_table[hash]) {
struct unix_sock *u = unix_sk(s);
if (!net_eq(sock_net(s), net))
@@ -297,13 +306,12 @@ static struct sock *__unix_find_socket_byname(struct net *net,
static inline struct sock *unix_find_socket_byname(struct net *net,
struct sockaddr_un *sunname,
- int len, int type,
- unsigned int hash)
+ int len, unsigned int hash)
{
struct sock *s;
spin_lock(&unix_table_lock);
- s = __unix_find_socket_byname(net, sunname, len, type, hash);
+ s = __unix_find_socket_byname(net, sunname, len, hash);
if (s)
sock_hold(s);
spin_unlock(&unix_table_lock);
@@ -484,9 +492,10 @@ static void unix_dgram_disconnected(struct sock *sk, struct sock *other)
*/
if (!sock_flag(other, SOCK_DEAD) && unix_peer(other) == sk) {
other->sk_err = ECONNRESET;
- other->sk_error_report(other);
+ sk_error_report(other);
}
}
+ other->sk_state = TCP_CLOSE;
}
static void unix_sock_destructor(struct sock *sk)
@@ -495,6 +504,12 @@ static void unix_sock_destructor(struct sock *sk)
skb_queue_purge(&sk->sk_receive_queue);
+#if IS_ENABLED(CONFIG_AF_UNIX_OOB)
+ if (u->oob_skb) {
+ kfree_skb(u->oob_skb);
+ u->oob_skb = NULL;
+ }
+#endif
WARN_ON(refcount_read(&sk->sk_wmem_alloc));
WARN_ON(!sk_unhashed(sk));
WARN_ON(sk->sk_socket);
@@ -662,6 +677,10 @@ static ssize_t unix_stream_splice_read(struct socket *, loff_t *ppos,
unsigned int flags);
static int unix_dgram_sendmsg(struct socket *, struct msghdr *, size_t);
static int unix_dgram_recvmsg(struct socket *, struct msghdr *, size_t, int);
+static int unix_read_sock(struct sock *sk, read_descriptor_t *desc,
+ sk_read_actor_t recv_actor);
+static int unix_stream_read_sock(struct sock *sk, read_descriptor_t *desc,
+ sk_read_actor_t recv_actor);
static int unix_dgram_connect(struct socket *, struct sockaddr *,
int, int);
static int unix_seqpacket_sendmsg(struct socket *, struct msghdr *, size_t);
@@ -715,6 +734,7 @@ static const struct proto_ops unix_stream_ops = {
.shutdown = unix_shutdown,
.sendmsg = unix_stream_sendmsg,
.recvmsg = unix_stream_recvmsg,
+ .read_sock = unix_stream_read_sock,
.mmap = sock_no_mmap,
.sendpage = unix_stream_sendpage,
.splice_read = unix_stream_splice_read,
@@ -739,6 +759,7 @@ static const struct proto_ops unix_dgram_ops = {
.listen = sock_no_listen,
.shutdown = unix_shutdown,
.sendmsg = unix_dgram_sendmsg,
+ .read_sock = unix_read_sock,
.recvmsg = unix_dgram_recvmsg,
.mmap = sock_no_mmap,
.sendpage = sock_no_sendpage,
@@ -770,13 +791,42 @@ static const struct proto_ops unix_seqpacket_ops = {
.show_fdinfo = unix_show_fdinfo,
};
-static struct proto unix_proto = {
- .name = "UNIX",
+static void unix_close(struct sock *sk, long timeout)
+{
+ /* Nothing to do here, unix socket does not need a ->close().
+ * This is merely for sockmap.
+ */
+}
+
+static void unix_unhash(struct sock *sk)
+{
+ /* Nothing to do here, unix socket does not need a ->unhash().
+ * This is merely for sockmap.
+ */
+}
+
+struct proto unix_dgram_proto = {
+ .name = "UNIX-DGRAM",
+ .owner = THIS_MODULE,
+ .obj_size = sizeof(struct unix_sock),
+ .close = unix_close,
+#ifdef CONFIG_BPF_SYSCALL
+ .psock_update_sk_prot = unix_dgram_bpf_update_proto,
+#endif
+};
+
+struct proto unix_stream_proto = {
+ .name = "UNIX-STREAM",
.owner = THIS_MODULE,
.obj_size = sizeof(struct unix_sock),
+ .close = unix_close,
+ .unhash = unix_unhash,
+#ifdef CONFIG_BPF_SYSCALL
+ .psock_update_sk_prot = unix_stream_bpf_update_proto,
+#endif
};
-static struct sock *unix_create1(struct net *net, struct socket *sock, int kern)
+static struct sock *unix_create1(struct net *net, struct socket *sock, int kern, int type)
{
struct sock *sk = NULL;
struct unix_sock *u;
@@ -785,7 +835,11 @@ static struct sock *unix_create1(struct net *net, struct socket *sock, int kern)
if (atomic_long_read(&unix_nr_socks) > 2 * get_max_files())
goto out;
- sk = sk_alloc(net, PF_UNIX, GFP_KERNEL, &unix_proto, kern);
+ if (type == SOCK_STREAM)
+ sk = sk_alloc(net, PF_UNIX, GFP_KERNEL, &unix_stream_proto, kern);
+ else /*dgram and seqpacket */
+ sk = sk_alloc(net, PF_UNIX, GFP_KERNEL, &unix_dgram_proto, kern);
+
if (!sk)
goto out;
@@ -847,7 +901,7 @@ static int unix_create(struct net *net, struct socket *sock, int protocol,
return -ESOCKTNOSUPPORT;
}
- return unix_create1(net, sock, kern) ? 0 : -ENOMEM;
+ return unix_create1(net, sock, kern, sock->type) ? 0 : -ENOMEM;
}
static int unix_release(struct socket *sock)
@@ -857,6 +911,7 @@ static int unix_release(struct socket *sock)
if (!sk)
return 0;
+ sk->sk_prot->close(sk, 0);
unix_release_sock(sk, 0);
sock->sk = NULL;
@@ -891,12 +946,12 @@ static int unix_autobind(struct socket *sock)
retry:
addr->len = sprintf(addr->name->sun_path+1, "%05x", ordernum) + 1 + sizeof(short);
addr->hash = unix_hash_fold(csum_partial(addr->name, addr->len, 0));
+ addr->hash ^= sk->sk_type;
spin_lock(&unix_table_lock);
ordernum = (ordernum+1)&0xFFFFF;
- if (__unix_find_socket_byname(net, addr->name, addr->len, sock->type,
- addr->hash)) {
+ if (__unix_find_socket_byname(net, addr->name, addr->len, addr->hash)) {
spin_unlock(&unix_table_lock);
/*
* __unix_find_socket_byname() may take long time if many names
@@ -911,11 +966,8 @@ retry:
}
goto retry;
}
- addr->hash ^= sk->sk_type;
- __unix_remove_socket(sk);
- smp_store_release(&u->addr, addr);
- __unix_insert_socket(&unix_socket_table[addr->hash], sk);
+ __unix_set_addr(sk, addr, addr->hash);
spin_unlock(&unix_table_lock);
err = 0;
@@ -960,7 +1012,7 @@ static struct sock *unix_find_other(struct net *net,
}
} else {
err = -ECONNREFUSED;
- u = unix_find_socket_byname(net, sunname, len, type, hash);
+ u = unix_find_socket_byname(net, sunname, len, type ^ hash);
if (u) {
struct dentry *dentry;
dentry = unix_sk(u)->path.dentry;
@@ -978,125 +1030,125 @@ fail:
return NULL;
}
-static int unix_mknod(const char *sun_path, umode_t mode, struct path *res)
+static int unix_bind_bsd(struct sock *sk, struct unix_address *addr)
{
+ struct unix_sock *u = unix_sk(sk);
+ umode_t mode = S_IFSOCK |
+ (SOCK_INODE(sk->sk_socket)->i_mode & ~current_umask());
+ struct user_namespace *ns; // barf...
+ struct path parent;
struct dentry *dentry;
- struct path path;
- int err = 0;
+ unsigned int hash;
+ int err;
+
/*
* Get the parent directory, calculate the hash for last
* component.
*/
- dentry = kern_path_create(AT_FDCWD, sun_path, &path, 0);
- err = PTR_ERR(dentry);
+ dentry = kern_path_create(AT_FDCWD, addr->name->sun_path, &parent, 0);
if (IS_ERR(dentry))
- return err;
+ return PTR_ERR(dentry);
+ ns = mnt_user_ns(parent.mnt);
/*
* All right, let's create it.
*/
- err = security_path_mknod(&path, dentry, mode, 0);
- if (!err) {
- err = vfs_mknod(mnt_user_ns(path.mnt), d_inode(path.dentry),
- dentry, mode, 0);
- if (!err) {
- res->mnt = mntget(path.mnt);
- res->dentry = dget(dentry);
- }
- }
- done_path_create(&path, dentry);
+ err = security_path_mknod(&parent, dentry, mode, 0);
+ if (!err)
+ err = vfs_mknod(ns, d_inode(parent.dentry), dentry, mode, 0);
+ if (err)
+ goto out;
+ err = mutex_lock_interruptible(&u->bindlock);
+ if (err)
+ goto out_unlink;
+ if (u->addr)
+ goto out_unlock;
+
+ addr->hash = UNIX_HASH_SIZE;
+ hash = d_backing_inode(dentry)->i_ino & (UNIX_HASH_SIZE - 1);
+ spin_lock(&unix_table_lock);
+ u->path.mnt = mntget(parent.mnt);
+ u->path.dentry = dget(dentry);
+ __unix_set_addr(sk, addr, hash);
+ spin_unlock(&unix_table_lock);
+ mutex_unlock(&u->bindlock);
+ done_path_create(&parent, dentry);
+ return 0;
+
+out_unlock:
+ mutex_unlock(&u->bindlock);
+ err = -EINVAL;
+out_unlink:
+ /* failed after successful mknod? unlink what we'd created... */
+ vfs_unlink(ns, d_inode(parent.dentry), dentry, NULL);
+out:
+ done_path_create(&parent, dentry);
return err;
}
+static int unix_bind_abstract(struct sock *sk, struct unix_address *addr)
+{
+ struct unix_sock *u = unix_sk(sk);
+ int err;
+
+ err = mutex_lock_interruptible(&u->bindlock);
+ if (err)
+ return err;
+
+ if (u->addr) {
+ mutex_unlock(&u->bindlock);
+ return -EINVAL;
+ }
+
+ spin_lock(&unix_table_lock);
+ if (__unix_find_socket_byname(sock_net(sk), addr->name, addr->len,
+ addr->hash)) {
+ spin_unlock(&unix_table_lock);
+ mutex_unlock(&u->bindlock);
+ return -EADDRINUSE;
+ }
+ __unix_set_addr(sk, addr, addr->hash);
+ spin_unlock(&unix_table_lock);
+ mutex_unlock(&u->bindlock);
+ return 0;
+}
+
static int unix_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
{
struct sock *sk = sock->sk;
- struct net *net = sock_net(sk);
- struct unix_sock *u = unix_sk(sk);
struct sockaddr_un *sunaddr = (struct sockaddr_un *)uaddr;
char *sun_path = sunaddr->sun_path;
int err;
unsigned int hash;
struct unix_address *addr;
- struct hlist_head *list;
- struct path path = { };
- err = -EINVAL;
if (addr_len < offsetofend(struct sockaddr_un, sun_family) ||
sunaddr->sun_family != AF_UNIX)
- goto out;
+ return -EINVAL;
- if (addr_len == sizeof(short)) {
- err = unix_autobind(sock);
- goto out;
- }
+ if (addr_len == sizeof(short))
+ return unix_autobind(sock);
err = unix_mkname(sunaddr, addr_len, &hash);
if (err < 0)
- goto out;
+ return err;
addr_len = err;
-
- if (sun_path[0]) {
- umode_t mode = S_IFSOCK |
- (SOCK_INODE(sock)->i_mode & ~current_umask());
- err = unix_mknod(sun_path, mode, &path);
- if (err) {
- if (err == -EEXIST)
- err = -EADDRINUSE;
- goto out;
- }
- }
-
- err = mutex_lock_interruptible(&u->bindlock);
- if (err)
- goto out_put;
-
- err = -EINVAL;
- if (u->addr)
- goto out_up;
-
- err = -ENOMEM;
addr = kmalloc(sizeof(*addr)+addr_len, GFP_KERNEL);
if (!addr)
- goto out_up;
+ return -ENOMEM;
memcpy(addr->name, sunaddr, addr_len);
addr->len = addr_len;
addr->hash = hash ^ sk->sk_type;
refcount_set(&addr->refcnt, 1);
- if (sun_path[0]) {
- addr->hash = UNIX_HASH_SIZE;
- hash = d_backing_inode(path.dentry)->i_ino & (UNIX_HASH_SIZE - 1);
- spin_lock(&unix_table_lock);
- u->path = path;
- list = &unix_socket_table[hash];
- } else {
- spin_lock(&unix_table_lock);
- err = -EADDRINUSE;
- if (__unix_find_socket_byname(net, sunaddr, addr_len,
- sk->sk_type, hash)) {
- unix_release_addr(addr);
- goto out_unlock;
- }
-
- list = &unix_socket_table[addr->hash];
- }
-
- err = 0;
- __unix_remove_socket(sk);
- smp_store_release(&u->addr, addr);
- __unix_insert_socket(list, sk);
-
-out_unlock:
- spin_unlock(&unix_table_lock);
-out_up:
- mutex_unlock(&u->bindlock);
-out_put:
+ if (sun_path[0])
+ err = unix_bind_bsd(sk, addr);
+ else
+ err = unix_bind_abstract(sk, addr);
if (err)
- path_put(&path);
-out:
- return err;
+ unix_release_addr(addr);
+ return err == -EEXIST ? -EADDRINUSE : err;
}
static void unix_state_double_lock(struct sock *sk1, struct sock *sk2)
@@ -1170,6 +1222,7 @@ restart:
if (err)
goto out_unlock;
+ sk->sk_state = other->sk_state = TCP_ESTABLISHED;
} else {
/*
* 1003.1g breaking connected state with AF_UNSPEC
@@ -1183,7 +1236,10 @@ restart:
*/
if (unix_peer(sk)) {
struct sock *old_peer = unix_peer(sk);
+
unix_peer(sk) = other;
+ if (!other)
+ sk->sk_state = TCP_CLOSE;
unix_dgram_peer_wake_disconnect_wakeup(sk, old_peer);
unix_state_double_unlock(sk, other);
@@ -1195,6 +1251,7 @@ restart:
unix_peer(sk) = other;
unix_state_double_unlock(sk, other);
}
+
return 0;
out_unlock:
@@ -1260,7 +1317,7 @@ static int unix_stream_connect(struct socket *sock, struct sockaddr *uaddr,
err = -ENOMEM;
/* create new sock for complete connection */
- newsk = unix_create1(sock_net(sk), NULL, 0);
+ newsk = unix_create1(sock_net(sk), NULL, 0, sock->type);
if (newsk == NULL)
goto out;
@@ -1393,7 +1450,7 @@ restart:
unix_state_unlock(sk);
- /* take ten and and send info to listening sock */
+ /* take ten and send info to listening sock */
spin_lock(&other->sk_receive_queue.lock);
__skb_queue_tail(&other->sk_receive_queue, skb);
spin_unlock(&other->sk_receive_queue.lock);
@@ -1427,12 +1484,10 @@ static int unix_socketpair(struct socket *socka, struct socket *sockb)
init_peercred(ska);
init_peercred(skb);
- if (ska->sk_type != SOCK_DGRAM) {
- ska->sk_state = TCP_ESTABLISHED;
- skb->sk_state = TCP_ESTABLISHED;
- socka->state = SS_CONNECTED;
- sockb->state = SS_CONNECTED;
- }
+ ska->sk_state = TCP_ESTABLISHED;
+ skb->sk_state = TCP_ESTABLISHED;
+ socka->state = SS_CONNECTED;
+ sockb->state = SS_CONNECTED;
return 0;
}
@@ -1522,6 +1577,53 @@ out:
return err;
}
+static void unix_peek_fds(struct scm_cookie *scm, struct sk_buff *skb)
+{
+ scm->fp = scm_fp_dup(UNIXCB(skb).fp);
+
+ /*
+ * Garbage collection of unix sockets starts by selecting a set of
+ * candidate sockets which have reference only from being in flight
+ * (total_refs == inflight_refs). This condition is checked once during
+ * the candidate collection phase, and candidates are marked as such, so
+ * that non-candidates can later be ignored. While inflight_refs is
+ * protected by unix_gc_lock, total_refs (file count) is not, hence this
+ * is an instantaneous decision.
+ *
+ * Once a candidate, however, the socket must not be reinstalled into a
+ * file descriptor while the garbage collection is in progress.
+ *
+ * If the above conditions are met, then the directed graph of
+ * candidates (*) does not change while unix_gc_lock is held.
+ *
+ * Any operations that changes the file count through file descriptors
+ * (dup, close, sendmsg) does not change the graph since candidates are
+ * not installed in fds.
+ *
+ * Dequeing a candidate via recvmsg would install it into an fd, but
+ * that takes unix_gc_lock to decrement the inflight count, so it's
+ * serialized with garbage collection.
+ *
+ * MSG_PEEK is special in that it does not change the inflight count,
+ * yet does install the socket into an fd. The following lock/unlock
+ * pair is to ensure serialization with garbage collection. It must be
+ * done between incrementing the file count and installing the file into
+ * an fd.
+ *
+ * If garbage collection starts after the barrier provided by the
+ * lock/unlock, then it will see the elevated refcount and not mark this
+ * as a candidate. If a garbage collection is already in progress
+ * before the file count was incremented, then the lock/unlock pair will
+ * ensure that garbage collection is finished before progressing to
+ * installing the fd.
+ *
+ * (*) A -> B where B is on the queue of A or B is on the queue of C
+ * which is on the queue of listening socket A.
+ */
+ spin_lock(&unix_gc_lock);
+ spin_unlock(&unix_gc_lock);
+}
+
static int unix_scm_to_skb(struct scm_cookie *scm, struct sk_buff *skb, bool send_fds)
{
int err = 0;
@@ -1731,6 +1833,7 @@ restart_locked:
unix_state_unlock(sk);
+ sk->sk_state = TCP_CLOSE;
unix_dgram_disconnected(sk, other);
sock_put(other);
err = -ECONNREFUSED;
@@ -1821,6 +1924,53 @@ out:
*/
#define UNIX_SKB_FRAGS_SZ (PAGE_SIZE << get_order(32768))
+#if (IS_ENABLED(CONFIG_AF_UNIX_OOB))
+static int queue_oob(struct socket *sock, struct msghdr *msg, struct sock *other)
+{
+ struct unix_sock *ousk = unix_sk(other);
+ struct sk_buff *skb;
+ int err = 0;
+
+ skb = sock_alloc_send_skb(sock->sk, 1, msg->msg_flags & MSG_DONTWAIT, &err);
+
+ if (!skb)
+ return err;
+
+ skb_put(skb, 1);
+ err = skb_copy_datagram_from_iter(skb, 0, &msg->msg_iter, 1);
+
+ if (err) {
+ kfree_skb(skb);
+ return err;
+ }
+
+ unix_state_lock(other);
+
+ if (sock_flag(other, SOCK_DEAD) ||
+ (other->sk_shutdown & RCV_SHUTDOWN)) {
+ unix_state_unlock(other);
+ kfree_skb(skb);
+ return -EPIPE;
+ }
+
+ maybe_add_creds(skb, sock, other);
+ skb_get(skb);
+
+ if (ousk->oob_skb)
+ consume_skb(ousk->oob_skb);
+
+ ousk->oob_skb = skb;
+
+ scm_stat_add(other, skb);
+ skb_queue_tail(&other->sk_receive_queue, skb);
+ sk_send_sigurg(other);
+ unix_state_unlock(other);
+ other->sk_data_ready(other);
+
+ return err;
+}
+#endif
+
static int unix_stream_sendmsg(struct socket *sock, struct msghdr *msg,
size_t len)
{
@@ -1839,8 +1989,14 @@ static int unix_stream_sendmsg(struct socket *sock, struct msghdr *msg,
return err;
err = -EOPNOTSUPP;
- if (msg->msg_flags&MSG_OOB)
- goto out_err;
+ if (msg->msg_flags & MSG_OOB) {
+#if (IS_ENABLED(CONFIG_AF_UNIX_OOB))
+ if (len)
+ len--;
+ else
+#endif
+ goto out_err;
+ }
if (msg->msg_namelen) {
err = sk->sk_state == TCP_ESTABLISHED ? -EISCONN : -EOPNOTSUPP;
@@ -1905,6 +2061,15 @@ static int unix_stream_sendmsg(struct socket *sock, struct msghdr *msg,
sent += size;
}
+#if (IS_ENABLED(CONFIG_AF_UNIX_OOB))
+ if (msg->msg_flags & MSG_OOB) {
+ err = queue_oob(sock, msg, other);
+ if (err)
+ goto out_err;
+ sent++;
+ }
+#endif
+
scm_destroy(&scm);
return sent;
@@ -2077,11 +2242,11 @@ static void unix_copy_addr(struct msghdr *msg, struct sock *sk)
}
}
-static int unix_dgram_recvmsg(struct socket *sock, struct msghdr *msg,
- size_t size, int flags)
+int __unix_dgram_recvmsg(struct sock *sk, struct msghdr *msg, size_t size,
+ int flags)
{
struct scm_cookie scm;
- struct sock *sk = sock->sk;
+ struct socket *sock = sk->sk_socket;
struct unix_sock *u = unix_sk(sk);
struct sk_buff *skb, *last;
long timeo;
@@ -2171,7 +2336,7 @@ static int unix_dgram_recvmsg(struct socket *sock, struct msghdr *msg,
sk_peek_offset_fwd(sk, size);
if (UNIXCB(skb).fp)
- scm.fp = scm_fp_dup(UNIXCB(skb).fp);
+ unix_peek_fds(&scm, skb);
}
err = (flags & MSG_TRUNC) ? skb->len - skip : size;
@@ -2184,6 +2349,55 @@ out:
return err;
}
+static int unix_dgram_recvmsg(struct socket *sock, struct msghdr *msg, size_t size,
+ int flags)
+{
+ struct sock *sk = sock->sk;
+
+#ifdef CONFIG_BPF_SYSCALL
+ const struct proto *prot = READ_ONCE(sk->sk_prot);
+
+ if (prot != &unix_dgram_proto)
+ return prot->recvmsg(sk, msg, size, flags & MSG_DONTWAIT,
+ flags & ~MSG_DONTWAIT, NULL);
+#endif
+ return __unix_dgram_recvmsg(sk, msg, size, flags);
+}
+
+static int unix_read_sock(struct sock *sk, read_descriptor_t *desc,
+ sk_read_actor_t recv_actor)
+{
+ int copied = 0;
+
+ while (1) {
+ struct unix_sock *u = unix_sk(sk);
+ struct sk_buff *skb;
+ int used, err;
+
+ mutex_lock(&u->iolock);
+ skb = skb_recv_datagram(sk, 0, 1, &err);
+ mutex_unlock(&u->iolock);
+ if (!skb)
+ return err;
+
+ used = recv_actor(desc, skb, 0, skb->len);
+ if (used <= 0) {
+ if (!copied)
+ copied = used;
+ kfree_skb(skb);
+ break;
+ } else if (used <= skb->len) {
+ copied += used;
+ }
+
+ kfree_skb(skb);
+ if (!desc->count)
+ break;
+ }
+
+ return copied;
+}
+
/*
* Sleep until more data has arrived. But check for races..
*/
@@ -2243,6 +2457,86 @@ struct unix_stream_read_state {
unsigned int splice_flags;
};
+#if IS_ENABLED(CONFIG_AF_UNIX_OOB)
+static int unix_stream_recv_urg(struct unix_stream_read_state *state)
+{
+ struct socket *sock = state->socket;
+ struct sock *sk = sock->sk;
+ struct unix_sock *u = unix_sk(sk);
+ int chunk = 1;
+ struct sk_buff *oob_skb;
+
+ mutex_lock(&u->iolock);
+ unix_state_lock(sk);
+
+ if (sock_flag(sk, SOCK_URGINLINE) || !u->oob_skb) {
+ unix_state_unlock(sk);
+ mutex_unlock(&u->iolock);
+ return -EINVAL;
+ }
+
+ oob_skb = u->oob_skb;
+
+ if (!(state->flags & MSG_PEEK)) {
+ u->oob_skb = NULL;
+ }
+
+ unix_state_unlock(sk);
+
+ chunk = state->recv_actor(oob_skb, 0, chunk, state);
+
+ if (!(state->flags & MSG_PEEK)) {
+ UNIXCB(oob_skb).consumed += 1;
+ kfree_skb(oob_skb);
+ }
+
+ mutex_unlock(&u->iolock);
+
+ if (chunk < 0)
+ return -EFAULT;
+
+ state->msg->msg_flags |= MSG_OOB;
+ return 1;
+}
+
+static struct sk_buff *manage_oob(struct sk_buff *skb, struct sock *sk,
+ int flags, int copied)
+{
+ struct unix_sock *u = unix_sk(sk);
+
+ if (!unix_skb_len(skb) && !(flags & MSG_PEEK)) {
+ skb_unlink(skb, &sk->sk_receive_queue);
+ consume_skb(skb);
+ skb = NULL;
+ } else {
+ if (skb == u->oob_skb) {
+ if (copied) {
+ skb = NULL;
+ } else if (sock_flag(sk, SOCK_URGINLINE)) {
+ if (!(flags & MSG_PEEK)) {
+ u->oob_skb = NULL;
+ consume_skb(skb);
+ }
+ } else if (!(flags & MSG_PEEK)) {
+ skb_unlink(skb, &sk->sk_receive_queue);
+ consume_skb(skb);
+ skb = skb_peek(&sk->sk_receive_queue);
+ }
+ }
+ }
+ return skb;
+}
+#endif
+
+static int unix_stream_read_sock(struct sock *sk, read_descriptor_t *desc,
+ sk_read_actor_t recv_actor)
+{
+ if (unlikely(sk->sk_state != TCP_ESTABLISHED))
+ return -ENOTCONN;
+
+ return unix_read_sock(sk, desc, recv_actor);
+}
+
static int unix_stream_read_generic(struct unix_stream_read_state *state,
bool freezable)
{
@@ -2268,6 +2562,9 @@ static int unix_stream_read_generic(struct unix_stream_read_state *state,
if (unlikely(flags & MSG_OOB)) {
err = -EOPNOTSUPP;
+#if IS_ENABLED(CONFIG_AF_UNIX_OOB)
+ err = unix_stream_recv_urg(state);
+#endif
goto out;
}
@@ -2296,6 +2593,18 @@ redo:
}
last = skb = skb_peek(&sk->sk_receive_queue);
last_len = last ? last->len : 0;
+
+#if IS_ENABLED(CONFIG_AF_UNIX_OOB)
+ if (skb) {
+ skb = manage_oob(skb, sk, flags, copied);
+ if (!skb) {
+ unix_state_unlock(sk);
+ if (copied)
+ break;
+ goto redo;
+ }
+ }
+#endif
again:
if (skb == NULL) {
if (copied >= target)
@@ -2414,7 +2723,7 @@ unlock:
/* It is questionable, see note in unix_dgram_recvmsg.
*/
if (UNIXCB(skb).fp)
- scm.fp = scm_fp_dup(UNIXCB(skb).fp);
+ unix_peek_fds(&scm, skb);
sk_peek_offset_fwd(sk, chunk);
@@ -2453,6 +2762,20 @@ static int unix_stream_read_actor(struct sk_buff *skb,
return ret ?: chunk;
}
+int __unix_stream_recvmsg(struct sock *sk, struct msghdr *msg,
+ size_t size, int flags)
+{
+ struct unix_stream_read_state state = {
+ .recv_actor = unix_stream_read_actor,
+ .socket = sk->sk_socket,
+ .msg = msg,
+ .size = size,
+ .flags = flags
+ };
+
+ return unix_stream_read_generic(&state, true);
+}
+
static int unix_stream_recvmsg(struct socket *sock, struct msghdr *msg,
size_t size, int flags)
{
@@ -2464,6 +2787,14 @@ static int unix_stream_recvmsg(struct socket *sock, struct msghdr *msg,
.flags = flags
};
+#ifdef CONFIG_BPF_SYSCALL
+ struct sock *sk = sock->sk;
+ const struct proto *prot = READ_ONCE(sk->sk_prot);
+
+ if (prot != &unix_stream_proto)
+ return prot->recvmsg(sk, msg, size, flags & MSG_DONTWAIT,
+ flags & ~MSG_DONTWAIT, NULL);
+#endif
return unix_stream_read_generic(&state, true);
}
@@ -2524,7 +2855,10 @@ static int unix_shutdown(struct socket *sock, int mode)
(sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET)) {
int peer_mode = 0;
+ const struct proto *prot = READ_ONCE(other->sk_prot);
+ if (prot->unhash)
+ prot->unhash(other);
if (mode&RCV_SHUTDOWN)
peer_mode |= SEND_SHUTDOWN;
if (mode&SEND_SHUTDOWN)
@@ -2533,10 +2867,12 @@ static int unix_shutdown(struct socket *sock, int mode)
other->sk_shutdown |= peer_mode;
unix_state_unlock(other);
other->sk_state_change(other);
- if (peer_mode == SHUTDOWN_MASK)
+ if (peer_mode == SHUTDOWN_MASK) {
sk_wake_async(other, SOCK_WAKE_WAITD, POLL_HUP);
- else if (peer_mode & RCV_SHUTDOWN)
+ other->sk_state = TCP_CLOSE;
+ } else if (peer_mode & RCV_SHUTDOWN) {
sk_wake_async(other, SOCK_WAKE_WAITD, POLL_IN);
+ }
}
if (other)
sock_put(other);
@@ -2631,6 +2967,20 @@ static int unix_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
case SIOCUNIXFILE:
err = unix_open_file(sk);
break;
+#if IS_ENABLED(CONFIG_AF_UNIX_OOB)
+ case SIOCATMARK:
+ {
+ struct sk_buff *skb;
+ struct unix_sock *u = unix_sk(sk);
+ int answ = 0;
+
+ skb = skb_peek(&sk->sk_receive_queue);
+ if (skb && skb == u->oob_skb)
+ answ = 1;
+ err = put_user(answ, (int __user *)arg);
+ }
+ break;
+#endif
default:
err = -ENOIOCTLCMD;
break;
@@ -2867,6 +3217,64 @@ static const struct seq_operations unix_seq_ops = {
.stop = unix_seq_stop,
.show = unix_seq_show,
};
+
+#if IS_BUILTIN(CONFIG_UNIX) && defined(CONFIG_BPF_SYSCALL)
+struct bpf_iter__unix {
+ __bpf_md_ptr(struct bpf_iter_meta *, meta);
+ __bpf_md_ptr(struct unix_sock *, unix_sk);
+ uid_t uid __aligned(8);
+};
+
+static int unix_prog_seq_show(struct bpf_prog *prog, struct bpf_iter_meta *meta,
+ struct unix_sock *unix_sk, uid_t uid)
+{
+ struct bpf_iter__unix ctx;
+
+ meta->seq_num--; /* skip SEQ_START_TOKEN */
+ ctx.meta = meta;
+ ctx.unix_sk = unix_sk;
+ ctx.uid = uid;
+ return bpf_iter_run_prog(prog, &ctx);
+}
+
+static int bpf_iter_unix_seq_show(struct seq_file *seq, void *v)
+{
+ struct bpf_iter_meta meta;
+ struct bpf_prog *prog;
+ struct sock *sk = v;
+ uid_t uid;
+
+ if (v == SEQ_START_TOKEN)
+ return 0;
+
+ uid = from_kuid_munged(seq_user_ns(seq), sock_i_uid(sk));
+ meta.seq = seq;
+ prog = bpf_iter_get_info(&meta, false);
+ return unix_prog_seq_show(prog, &meta, v, uid);
+}
+
+static void bpf_iter_unix_seq_stop(struct seq_file *seq, void *v)
+{
+ struct bpf_iter_meta meta;
+ struct bpf_prog *prog;
+
+ if (!v) {
+ meta.seq = seq;
+ prog = bpf_iter_get_info(&meta, true);
+ if (prog)
+ (void)unix_prog_seq_show(prog, &meta, v, 0);
+ }
+
+ unix_seq_stop(seq, v);
+}
+
+static const struct seq_operations bpf_iter_unix_seq_ops = {
+ .start = unix_seq_start,
+ .next = unix_seq_next,
+ .stop = bpf_iter_unix_seq_stop,
+ .show = bpf_iter_unix_seq_show,
+};
+#endif
#endif
static const struct net_proto_family unix_family_ops = {
@@ -2907,13 +3315,48 @@ static struct pernet_operations unix_net_ops = {
.exit = unix_net_exit,
};
+#if IS_BUILTIN(CONFIG_UNIX) && defined(CONFIG_BPF_SYSCALL) && defined(CONFIG_PROC_FS)
+DEFINE_BPF_ITER_FUNC(unix, struct bpf_iter_meta *meta,
+ struct unix_sock *unix_sk, uid_t uid)
+
+static const struct bpf_iter_seq_info unix_seq_info = {
+ .seq_ops = &bpf_iter_unix_seq_ops,
+ .init_seq_private = bpf_iter_init_seq_net,
+ .fini_seq_private = bpf_iter_fini_seq_net,
+ .seq_priv_size = sizeof(struct seq_net_private),
+};
+
+static struct bpf_iter_reg unix_reg_info = {
+ .target = "unix",
+ .ctx_arg_info_size = 1,
+ .ctx_arg_info = {
+ { offsetof(struct bpf_iter__unix, unix_sk),
+ PTR_TO_BTF_ID_OR_NULL },
+ },
+ .seq_info = &unix_seq_info,
+};
+
+static void __init bpf_iter_register(void)
+{
+ unix_reg_info.ctx_arg_info[0].btf_id = btf_sock_ids[BTF_SOCK_TYPE_UNIX];
+ if (bpf_iter_reg_target(&unix_reg_info))
+ pr_warn("Warning: could not register bpf iterator unix\n");
+}
+#endif
+
static int __init af_unix_init(void)
{
int rc = -1;
BUILD_BUG_ON(sizeof(struct unix_skb_parms) > sizeof_field(struct sk_buff, cb));
- rc = proto_register(&unix_proto, 1);
+ rc = proto_register(&unix_dgram_proto, 1);
+ if (rc != 0) {
+ pr_crit("%s: Cannot create unix_sock SLAB cache!\n", __func__);
+ goto out;
+ }
+
+ rc = proto_register(&unix_stream_proto, 1);
if (rc != 0) {
pr_crit("%s: Cannot create unix_sock SLAB cache!\n", __func__);
goto out;
@@ -2921,6 +3364,12 @@ static int __init af_unix_init(void)
sock_register(&unix_family_ops);
register_pernet_subsys(&unix_net_ops);
+ unix_bpf_build_proto();
+
+#if IS_BUILTIN(CONFIG_UNIX) && defined(CONFIG_BPF_SYSCALL) && defined(CONFIG_PROC_FS)
+ bpf_iter_register();
+#endif
+
out:
return rc;
}
@@ -2928,7 +3377,8 @@ out:
static void __exit af_unix_exit(void)
{
sock_unregister(PF_UNIX);
- proto_unregister(&unix_proto);
+ proto_unregister(&unix_dgram_proto);
+ proto_unregister(&unix_stream_proto);
unregister_pernet_subsys(&unix_net_ops);
}
diff --git a/net/unix/diag.c b/net/unix/diag.c
index 9ff64f9df1f3..7e7d7f45685a 100644
--- a/net/unix/diag.c
+++ b/net/unix/diag.c
@@ -295,10 +295,8 @@ again:
goto again;
}
- err = netlink_unicast(net->diag_nlsk, rep, NETLINK_CB(in_skb).portid,
- MSG_DONTWAIT);
- if (err > 0)
- err = 0;
+ err = nlmsg_unicast(net->diag_nlsk, rep, NETLINK_CB(in_skb).portid);
+
out:
if (sk)
sock_put(sk);
diff --git a/net/unix/unix_bpf.c b/net/unix/unix_bpf.c
new file mode 100644
index 000000000000..b927e2baae50
--- /dev/null
+++ b/net/unix/unix_bpf.c
@@ -0,0 +1,174 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2021 Cong Wang <cong.wang@bytedance.com> */
+
+#include <linux/skmsg.h>
+#include <linux/bpf.h>
+#include <net/sock.h>
+#include <net/af_unix.h>
+
+#define unix_sk_has_data(__sk, __psock) \
+ ({ !skb_queue_empty(&__sk->sk_receive_queue) || \
+ !skb_queue_empty(&__psock->ingress_skb) || \
+ !list_empty(&__psock->ingress_msg); \
+ })
+
+static int unix_msg_wait_data(struct sock *sk, struct sk_psock *psock,
+ long timeo)
+{
+ DEFINE_WAIT_FUNC(wait, woken_wake_function);
+ struct unix_sock *u = unix_sk(sk);
+ int ret = 0;
+
+ if (sk->sk_shutdown & RCV_SHUTDOWN)
+ return 1;
+
+ if (!timeo)
+ return ret;
+
+ add_wait_queue(sk_sleep(sk), &wait);
+ sk_set_bit(SOCKWQ_ASYNC_WAITDATA, sk);
+ if (!unix_sk_has_data(sk, psock)) {
+ mutex_unlock(&u->iolock);
+ wait_woken(&wait, TASK_INTERRUPTIBLE, timeo);
+ mutex_lock(&u->iolock);
+ ret = unix_sk_has_data(sk, psock);
+ }
+ sk_clear_bit(SOCKWQ_ASYNC_WAITDATA, sk);
+ remove_wait_queue(sk_sleep(sk), &wait);
+ return ret;
+}
+
+static int __unix_recvmsg(struct sock *sk, struct msghdr *msg,
+ size_t len, int flags)
+{
+ if (sk->sk_type == SOCK_DGRAM)
+ return __unix_dgram_recvmsg(sk, msg, len, flags);
+ else
+ return __unix_stream_recvmsg(sk, msg, len, flags);
+}
+
+static int unix_bpf_recvmsg(struct sock *sk, struct msghdr *msg,
+ size_t len, int nonblock, int flags,
+ int *addr_len)
+{
+ struct unix_sock *u = unix_sk(sk);
+ struct sk_psock *psock;
+ int copied;
+
+ psock = sk_psock_get(sk);
+ if (unlikely(!psock))
+ return __unix_recvmsg(sk, msg, len, flags);
+
+ mutex_lock(&u->iolock);
+ if (!skb_queue_empty(&sk->sk_receive_queue) &&
+ sk_psock_queue_empty(psock)) {
+ mutex_unlock(&u->iolock);
+ sk_psock_put(sk, psock);
+ return __unix_recvmsg(sk, msg, len, flags);
+ }
+
+msg_bytes_ready:
+ copied = sk_msg_recvmsg(sk, psock, msg, len, flags);
+ if (!copied) {
+ long timeo;
+ int data;
+
+ timeo = sock_rcvtimeo(sk, nonblock);
+ data = unix_msg_wait_data(sk, psock, timeo);
+ if (data) {
+ if (!sk_psock_queue_empty(psock))
+ goto msg_bytes_ready;
+ mutex_unlock(&u->iolock);
+ sk_psock_put(sk, psock);
+ return __unix_recvmsg(sk, msg, len, flags);
+ }
+ copied = -EAGAIN;
+ }
+ mutex_unlock(&u->iolock);
+ sk_psock_put(sk, psock);
+ return copied;
+}
+
+static struct proto *unix_dgram_prot_saved __read_mostly;
+static DEFINE_SPINLOCK(unix_dgram_prot_lock);
+static struct proto unix_dgram_bpf_prot;
+
+static struct proto *unix_stream_prot_saved __read_mostly;
+static DEFINE_SPINLOCK(unix_stream_prot_lock);
+static struct proto unix_stream_bpf_prot;
+
+static void unix_dgram_bpf_rebuild_protos(struct proto *prot, const struct proto *base)
+{
+ *prot = *base;
+ prot->close = sock_map_close;
+ prot->recvmsg = unix_bpf_recvmsg;
+}
+
+static void unix_stream_bpf_rebuild_protos(struct proto *prot,
+ const struct proto *base)
+{
+ *prot = *base;
+ prot->close = sock_map_close;
+ prot->recvmsg = unix_bpf_recvmsg;
+ prot->unhash = sock_map_unhash;
+}
+
+static void unix_dgram_bpf_check_needs_rebuild(struct proto *ops)
+{
+ if (unlikely(ops != smp_load_acquire(&unix_dgram_prot_saved))) {
+ spin_lock_bh(&unix_dgram_prot_lock);
+ if (likely(ops != unix_dgram_prot_saved)) {
+ unix_dgram_bpf_rebuild_protos(&unix_dgram_bpf_prot, ops);
+ smp_store_release(&unix_dgram_prot_saved, ops);
+ }
+ spin_unlock_bh(&unix_dgram_prot_lock);
+ }
+}
+
+static void unix_stream_bpf_check_needs_rebuild(struct proto *ops)
+{
+ if (unlikely(ops != smp_load_acquire(&unix_stream_prot_saved))) {
+ spin_lock_bh(&unix_stream_prot_lock);
+ if (likely(ops != unix_stream_prot_saved)) {
+ unix_stream_bpf_rebuild_protos(&unix_stream_bpf_prot, ops);
+ smp_store_release(&unix_stream_prot_saved, ops);
+ }
+ spin_unlock_bh(&unix_stream_prot_lock);
+ }
+}
+
+int unix_dgram_bpf_update_proto(struct sock *sk, struct sk_psock *psock, bool restore)
+{
+ if (sk->sk_type != SOCK_DGRAM)
+ return -EOPNOTSUPP;
+
+ if (restore) {
+ sk->sk_write_space = psock->saved_write_space;
+ WRITE_ONCE(sk->sk_prot, psock->sk_proto);
+ return 0;
+ }
+
+ unix_dgram_bpf_check_needs_rebuild(psock->sk_proto);
+ WRITE_ONCE(sk->sk_prot, &unix_dgram_bpf_prot);
+ return 0;
+}
+
+int unix_stream_bpf_update_proto(struct sock *sk, struct sk_psock *psock, bool restore)
+{
+ if (restore) {
+ sk->sk_write_space = psock->saved_write_space;
+ WRITE_ONCE(sk->sk_prot, psock->sk_proto);
+ return 0;
+ }
+
+ unix_stream_bpf_check_needs_rebuild(psock->sk_proto);
+ WRITE_ONCE(sk->sk_prot, &unix_stream_bpf_prot);
+ return 0;
+}
+
+void __init unix_bpf_build_proto(void)
+{
+ unix_dgram_bpf_rebuild_protos(&unix_dgram_bpf_prot, &unix_dgram_proto);
+ unix_stream_bpf_rebuild_protos(&unix_stream_bpf_prot, &unix_stream_proto);
+
+}
diff --git a/net/vmw_vsock/af_vsock.c b/net/vmw_vsock/af_vsock.c
index 92a72f0e0d94..e2c0cfb334d2 100644
--- a/net/vmw_vsock/af_vsock.c
+++ b/net/vmw_vsock/af_vsock.c
@@ -415,8 +415,8 @@ static void vsock_deassign_transport(struct vsock_sock *vsk)
/* Assign a transport to a socket and call the .init transport callback.
*
- * Note: for stream socket this must be called when vsk->remote_addr is set
- * (e.g. during the connect() or when a connection request on a listener
+ * Note: for connection oriented socket this must be called when vsk->remote_addr
+ * is set (e.g. during the connect() or when a connection request on a listener
* socket is received).
* The vsk->remote_addr is used to decide which transport to use:
* - remote CID == VMADDR_CID_LOCAL or g2h->local_cid or VMADDR_CID_HOST if
@@ -452,6 +452,7 @@ int vsock_assign_transport(struct vsock_sock *vsk, struct vsock_sock *psk)
new_transport = transport_dgram;
break;
case SOCK_STREAM:
+ case SOCK_SEQPACKET:
if (vsock_use_local_transport(remote_cid))
new_transport = transport_local;
else if (remote_cid <= VMADDR_CID_HOST || !transport_h2g ||
@@ -469,10 +470,10 @@ int vsock_assign_transport(struct vsock_sock *vsk, struct vsock_sock *psk)
return 0;
/* transport->release() must be called with sock lock acquired.
- * This path can only be taken during vsock_stream_connect(),
- * where we have already held the sock lock.
- * In the other cases, this function is called on a new socket
- * which is not assigned to any transport.
+ * This path can only be taken during vsock_connect(), where we
+ * have already held the sock lock. In the other cases, this
+ * function is called on a new socket which is not assigned to
+ * any transport.
*/
vsk->transport->release(vsk);
vsock_deassign_transport(vsk);
@@ -484,6 +485,14 @@ int vsock_assign_transport(struct vsock_sock *vsk, struct vsock_sock *psk)
if (!new_transport || !try_module_get(new_transport->module))
return -ENODEV;
+ if (sk->sk_type == SOCK_SEQPACKET) {
+ if (!new_transport->seqpacket_allow ||
+ !new_transport->seqpacket_allow(remote_cid)) {
+ module_put(new_transport->module);
+ return -ESOCKTNOSUPPORT;
+ }
+ }
+
ret = new_transport->init(vsk, psk);
if (ret) {
module_put(new_transport->module);
@@ -604,8 +613,8 @@ out:
/**** SOCKET OPERATIONS ****/
-static int __vsock_bind_stream(struct vsock_sock *vsk,
- struct sockaddr_vm *addr)
+static int __vsock_bind_connectible(struct vsock_sock *vsk,
+ struct sockaddr_vm *addr)
{
static u32 port;
struct sockaddr_vm new_addr;
@@ -649,9 +658,10 @@ static int __vsock_bind_stream(struct vsock_sock *vsk,
vsock_addr_init(&vsk->local_addr, new_addr.svm_cid, new_addr.svm_port);
- /* Remove stream sockets from the unbound list and add them to the hash
- * table for easy lookup by its address. The unbound list is simply an
- * extra entry at the end of the hash table, a trick used by AF_UNIX.
+ /* Remove connection oriented sockets from the unbound list and add them
+ * to the hash table for easy lookup by its address. The unbound list
+ * is simply an extra entry at the end of the hash table, a trick used
+ * by AF_UNIX.
*/
__vsock_remove_bound(vsk);
__vsock_insert_bound(vsock_bound_sockets(&vsk->local_addr), vsk);
@@ -684,8 +694,9 @@ static int __vsock_bind(struct sock *sk, struct sockaddr_vm *addr)
switch (sk->sk_socket->type) {
case SOCK_STREAM:
+ case SOCK_SEQPACKET:
spin_lock_bh(&vsock_table_lock);
- retval = __vsock_bind_stream(vsk, addr);
+ retval = __vsock_bind_connectible(vsk, addr);
spin_unlock_bh(&vsock_table_lock);
break;
@@ -768,6 +779,11 @@ static struct sock *__vsock_create(struct net *net,
return sk;
}
+static bool sock_type_connectible(u16 type)
+{
+ return (type == SOCK_STREAM) || (type == SOCK_SEQPACKET);
+}
+
static void __vsock_release(struct sock *sk, int level)
{
if (sk) {
@@ -786,7 +802,7 @@ static void __vsock_release(struct sock *sk, int level)
if (vsk->transport)
vsk->transport->release(vsk);
- else if (sk->sk_type == SOCK_STREAM)
+ else if (sock_type_connectible(sk->sk_type))
vsock_remove_sock(vsk);
sock_orphan(sk);
@@ -844,6 +860,16 @@ s64 vsock_stream_has_data(struct vsock_sock *vsk)
}
EXPORT_SYMBOL_GPL(vsock_stream_has_data);
+static s64 vsock_connectible_has_data(struct vsock_sock *vsk)
+{
+ struct sock *sk = sk_vsock(vsk);
+
+ if (sk->sk_type == SOCK_SEQPACKET)
+ return vsk->transport->seqpacket_has_data(vsk);
+ else
+ return vsock_stream_has_data(vsk);
+}
+
s64 vsock_stream_has_space(struct vsock_sock *vsk)
{
return vsk->transport->stream_has_space(vsk);
@@ -937,10 +963,10 @@ static int vsock_shutdown(struct socket *sock, int mode)
if ((mode & ~SHUTDOWN_MASK) || !mode)
return -EINVAL;
- /* If this is a STREAM socket and it is not connected then bail out
- * immediately. If it is a DGRAM socket then we must first kick the
- * socket so that it wakes up from any sleeping calls, for example
- * recv(), and then afterwards return the error.
+ /* If this is a connection oriented socket and it is not connected then
+ * bail out immediately. If it is a DGRAM socket then we must first
+ * kick the socket so that it wakes up from any sleeping calls, for
+ * example recv(), and then afterwards return the error.
*/
sk = sock->sk;
@@ -948,7 +974,7 @@ static int vsock_shutdown(struct socket *sock, int mode)
lock_sock(sk);
if (sock->state == SS_UNCONNECTED) {
err = -ENOTCONN;
- if (sk->sk_type == SOCK_STREAM)
+ if (sock_type_connectible(sk->sk_type))
goto out;
} else {
sock->state = SS_DISCONNECTING;
@@ -961,7 +987,7 @@ static int vsock_shutdown(struct socket *sock, int mode)
sk->sk_shutdown |= mode;
sk->sk_state_change(sk);
- if (sk->sk_type == SOCK_STREAM) {
+ if (sock_type_connectible(sk->sk_type)) {
sock_reset_flag(sk, SOCK_DONE);
vsock_send_shutdown(sk, mode);
}
@@ -1016,7 +1042,7 @@ static __poll_t vsock_poll(struct file *file, struct socket *sock,
if (!(sk->sk_shutdown & SEND_SHUTDOWN))
mask |= EPOLLOUT | EPOLLWRNORM | EPOLLWRBAND;
- } else if (sock->type == SOCK_STREAM) {
+ } else if (sock_type_connectible(sk->sk_type)) {
const struct vsock_transport *transport;
lock_sock(sk);
@@ -1255,7 +1281,7 @@ static void vsock_connect_timeout(struct work_struct *work)
(sk->sk_shutdown != SHUTDOWN_MASK)) {
sk->sk_state = TCP_CLOSE;
sk->sk_err = ETIMEDOUT;
- sk->sk_error_report(sk);
+ sk_error_report(sk);
vsock_transport_cancel_pkt(vsk);
}
release_sock(sk);
@@ -1263,8 +1289,8 @@ static void vsock_connect_timeout(struct work_struct *work)
sock_put(sk);
}
-static int vsock_stream_connect(struct socket *sock, struct sockaddr *addr,
- int addr_len, int flags)
+static int vsock_connect(struct socket *sock, struct sockaddr *addr,
+ int addr_len, int flags)
{
int err;
struct sock *sk;
@@ -1369,7 +1395,7 @@ static int vsock_stream_connect(struct socket *sock, struct sockaddr *addr,
if (signal_pending(current)) {
err = sock_intr_errno(timeout);
- sk->sk_state = TCP_CLOSE;
+ sk->sk_state = sk->sk_state == TCP_ESTABLISHED ? TCP_CLOSING : TCP_CLOSE;
sock->state = SS_UNCONNECTED;
vsock_transport_cancel_pkt(vsk);
goto out_wait;
@@ -1414,7 +1440,7 @@ static int vsock_accept(struct socket *sock, struct socket *newsock, int flags,
lock_sock(listener);
- if (sock->type != SOCK_STREAM) {
+ if (!sock_type_connectible(sock->type)) {
err = -EOPNOTSUPP;
goto out;
}
@@ -1491,7 +1517,7 @@ static int vsock_listen(struct socket *sock, int backlog)
lock_sock(sk);
- if (sock->type != SOCK_STREAM) {
+ if (!sock_type_connectible(sk->sk_type)) {
err = -EOPNOTSUPP;
goto out;
}
@@ -1535,11 +1561,11 @@ static void vsock_update_buffer_size(struct vsock_sock *vsk,
vsk->buffer_size = val;
}
-static int vsock_stream_setsockopt(struct socket *sock,
- int level,
- int optname,
- sockptr_t optval,
- unsigned int optlen)
+static int vsock_connectible_setsockopt(struct socket *sock,
+ int level,
+ int optname,
+ sockptr_t optval,
+ unsigned int optlen)
{
int err;
struct sock *sk;
@@ -1617,10 +1643,10 @@ exit:
return err;
}
-static int vsock_stream_getsockopt(struct socket *sock,
- int level, int optname,
- char __user *optval,
- int __user *optlen)
+static int vsock_connectible_getsockopt(struct socket *sock,
+ int level, int optname,
+ char __user *optval,
+ int __user *optlen)
{
int err;
int len;
@@ -1688,8 +1714,8 @@ static int vsock_stream_getsockopt(struct socket *sock,
return 0;
}
-static int vsock_stream_sendmsg(struct socket *sock, struct msghdr *msg,
- size_t len)
+static int vsock_connectible_sendmsg(struct socket *sock, struct msghdr *msg,
+ size_t len)
{
struct sock *sk;
struct vsock_sock *vsk;
@@ -1712,7 +1738,9 @@ static int vsock_stream_sendmsg(struct socket *sock, struct msghdr *msg,
transport = vsk->transport;
- /* Callers should not provide a destination with stream sockets. */
+ /* Callers should not provide a destination with connection oriented
+ * sockets.
+ */
if (msg->msg_namelen) {
err = sk->sk_state == TCP_ESTABLISHED ? -EISCONN : -EOPNOTSUPP;
goto out;
@@ -1803,9 +1831,13 @@ static int vsock_stream_sendmsg(struct socket *sock, struct msghdr *msg,
* responsibility to check how many bytes we were able to send.
*/
- written = transport->stream_enqueue(
- vsk, msg,
- len - total_written);
+ if (sk->sk_type == SOCK_SEQPACKET) {
+ written = transport->seqpacket_enqueue(vsk,
+ msg, len - total_written);
+ } else {
+ written = transport->stream_enqueue(vsk,
+ msg, len - total_written);
+ }
if (written < 0) {
err = -ENOMEM;
goto out_err;
@@ -1821,72 +1853,98 @@ static int vsock_stream_sendmsg(struct socket *sock, struct msghdr *msg,
}
out_err:
- if (total_written > 0)
- err = total_written;
+ if (total_written > 0) {
+ /* Return number of written bytes only if:
+ * 1) SOCK_STREAM socket.
+ * 2) SOCK_SEQPACKET socket when whole buffer is sent.
+ */
+ if (sk->sk_type == SOCK_STREAM || total_written == len)
+ err = total_written;
+ }
out:
release_sock(sk);
return err;
}
-
-static int
-vsock_stream_recvmsg(struct socket *sock, struct msghdr *msg, size_t len,
- int flags)
+static int vsock_connectible_wait_data(struct sock *sk,
+ struct wait_queue_entry *wait,
+ long timeout,
+ struct vsock_transport_recv_notify_data *recv_data,
+ size_t target)
{
- struct sock *sk;
- struct vsock_sock *vsk;
const struct vsock_transport *transport;
+ struct vsock_sock *vsk;
+ s64 data;
int err;
- size_t target;
- ssize_t copied;
- long timeout;
- struct vsock_transport_recv_notify_data recv_data;
- DEFINE_WAIT(wait);
-
- sk = sock->sk;
vsk = vsock_sk(sk);
err = 0;
+ transport = vsk->transport;
- lock_sock(sk);
+ while ((data = vsock_connectible_has_data(vsk)) == 0) {
+ prepare_to_wait(sk_sleep(sk), wait, TASK_INTERRUPTIBLE);
- transport = vsk->transport;
+ if (sk->sk_err != 0 ||
+ (sk->sk_shutdown & RCV_SHUTDOWN) ||
+ (vsk->peer_shutdown & SEND_SHUTDOWN)) {
+ break;
+ }
- if (!transport || sk->sk_state != TCP_ESTABLISHED) {
- /* Recvmsg is supposed to return 0 if a peer performs an
- * orderly shutdown. Differentiate between that case and when a
- * peer has not connected or a local shutdown occurred with the
- * SOCK_DONE flag.
- */
- if (sock_flag(sk, SOCK_DONE))
- err = 0;
- else
- err = -ENOTCONN;
+ /* Don't wait for non-blocking sockets. */
+ if (timeout == 0) {
+ err = -EAGAIN;
+ break;
+ }
- goto out;
- }
+ if (recv_data) {
+ err = transport->notify_recv_pre_block(vsk, target, recv_data);
+ if (err < 0)
+ break;
+ }
- if (flags & MSG_OOB) {
- err = -EOPNOTSUPP;
- goto out;
- }
+ release_sock(sk);
+ timeout = schedule_timeout(timeout);
+ lock_sock(sk);
- /* We don't check peer_shutdown flag here since peer may actually shut
- * down, but there can be data in the queue that a local socket can
- * receive.
- */
- if (sk->sk_shutdown & RCV_SHUTDOWN) {
- err = 0;
- goto out;
+ if (signal_pending(current)) {
+ err = sock_intr_errno(timeout);
+ break;
+ } else if (timeout == 0) {
+ err = -EAGAIN;
+ break;
+ }
}
- /* It is valid on Linux to pass in a zero-length receive buffer. This
- * is not an error. We may as well bail out now.
+ finish_wait(sk_sleep(sk), wait);
+
+ if (err)
+ return err;
+
+ /* Internal transport error when checking for available
+ * data. XXX This should be changed to a connection
+ * reset in a later change.
*/
- if (!len) {
- err = 0;
- goto out;
- }
+ if (data < 0)
+ return -ENOMEM;
+
+ return data;
+}
+
+static int __vsock_stream_recvmsg(struct sock *sk, struct msghdr *msg,
+ size_t len, int flags)
+{
+ struct vsock_transport_recv_notify_data recv_data;
+ const struct vsock_transport *transport;
+ struct vsock_sock *vsk;
+ ssize_t copied;
+ size_t target;
+ long timeout;
+ int err;
+
+ DEFINE_WAIT(wait);
+
+ vsk = vsock_sk(sk);
+ transport = vsk->transport;
/* We must not copy less than target bytes into the user's buffer
* before returning successfully, so we wait for the consume queue to
@@ -1908,94 +1966,158 @@ vsock_stream_recvmsg(struct socket *sock, struct msghdr *msg, size_t len,
while (1) {
- s64 ready;
+ ssize_t read;
- prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
- ready = vsock_stream_has_data(vsk);
+ err = vsock_connectible_wait_data(sk, &wait, timeout,
+ &recv_data, target);
+ if (err <= 0)
+ break;
- if (ready == 0) {
- if (sk->sk_err != 0 ||
- (sk->sk_shutdown & RCV_SHUTDOWN) ||
- (vsk->peer_shutdown & SEND_SHUTDOWN)) {
- finish_wait(sk_sleep(sk), &wait);
- break;
- }
- /* Don't wait for non-blocking sockets. */
- if (timeout == 0) {
- err = -EAGAIN;
- finish_wait(sk_sleep(sk), &wait);
- break;
- }
+ err = transport->notify_recv_pre_dequeue(vsk, target,
+ &recv_data);
+ if (err < 0)
+ break;
- err = transport->notify_recv_pre_block(
- vsk, target, &recv_data);
- if (err < 0) {
- finish_wait(sk_sleep(sk), &wait);
- break;
- }
- release_sock(sk);
- timeout = schedule_timeout(timeout);
- lock_sock(sk);
+ read = transport->stream_dequeue(vsk, msg, len - copied, flags);
+ if (read < 0) {
+ err = -ENOMEM;
+ break;
+ }
- if (signal_pending(current)) {
- err = sock_intr_errno(timeout);
- finish_wait(sk_sleep(sk), &wait);
- break;
- } else if (timeout == 0) {
- err = -EAGAIN;
- finish_wait(sk_sleep(sk), &wait);
- break;
- }
- } else {
- ssize_t read;
+ copied += read;
- finish_wait(sk_sleep(sk), &wait);
+ err = transport->notify_recv_post_dequeue(vsk, target, read,
+ !(flags & MSG_PEEK), &recv_data);
+ if (err < 0)
+ goto out;
- if (ready < 0) {
- /* Invalid queue pair content. XXX This should
- * be changed to a connection reset in a later
- * change.
- */
+ if (read >= target || flags & MSG_PEEK)
+ break;
- err = -ENOMEM;
- goto out;
- }
+ target -= read;
+ }
- err = transport->notify_recv_pre_dequeue(
- vsk, target, &recv_data);
- if (err < 0)
- break;
+ if (sk->sk_err)
+ err = -sk->sk_err;
+ else if (sk->sk_shutdown & RCV_SHUTDOWN)
+ err = 0;
- read = transport->stream_dequeue(
- vsk, msg,
- len - copied, flags);
- if (read < 0) {
- err = -ENOMEM;
- break;
- }
+ if (copied > 0)
+ err = copied;
+
+out:
+ return err;
+}
- copied += read;
+static int __vsock_seqpacket_recvmsg(struct sock *sk, struct msghdr *msg,
+ size_t len, int flags)
+{
+ const struct vsock_transport *transport;
+ struct vsock_sock *vsk;
+ ssize_t msg_len;
+ long timeout;
+ int err = 0;
+ DEFINE_WAIT(wait);
- err = transport->notify_recv_post_dequeue(
- vsk, target, read,
- !(flags & MSG_PEEK), &recv_data);
- if (err < 0)
- goto out;
+ vsk = vsock_sk(sk);
+ transport = vsk->transport;
- if (read >= target || flags & MSG_PEEK)
- break;
+ timeout = sock_rcvtimeo(sk, flags & MSG_DONTWAIT);
- target -= read;
- }
+ err = vsock_connectible_wait_data(sk, &wait, timeout, NULL, 0);
+ if (err <= 0)
+ goto out;
+
+ msg_len = transport->seqpacket_dequeue(vsk, msg, flags);
+
+ if (msg_len < 0) {
+ err = -ENOMEM;
+ goto out;
}
- if (sk->sk_err)
+ if (sk->sk_err) {
err = -sk->sk_err;
- else if (sk->sk_shutdown & RCV_SHUTDOWN)
+ } else if (sk->sk_shutdown & RCV_SHUTDOWN) {
err = 0;
+ } else {
+ /* User sets MSG_TRUNC, so return real length of
+ * packet.
+ */
+ if (flags & MSG_TRUNC)
+ err = msg_len;
+ else
+ err = len - msg_data_left(msg);
- if (copied > 0)
- err = copied;
+ /* Always set MSG_TRUNC if real length of packet is
+ * bigger than user's buffer.
+ */
+ if (msg_len > len)
+ msg->msg_flags |= MSG_TRUNC;
+ }
+
+out:
+ return err;
+}
+
+static int
+vsock_connectible_recvmsg(struct socket *sock, struct msghdr *msg, size_t len,
+ int flags)
+{
+ struct sock *sk;
+ struct vsock_sock *vsk;
+ const struct vsock_transport *transport;
+ int err;
+
+ DEFINE_WAIT(wait);
+
+ sk = sock->sk;
+ vsk = vsock_sk(sk);
+ err = 0;
+
+ lock_sock(sk);
+
+ transport = vsk->transport;
+
+ if (!transport || sk->sk_state != TCP_ESTABLISHED) {
+ /* Recvmsg is supposed to return 0 if a peer performs an
+ * orderly shutdown. Differentiate between that case and when a
+ * peer has not connected or a local shutdown occurred with the
+ * SOCK_DONE flag.
+ */
+ if (sock_flag(sk, SOCK_DONE))
+ err = 0;
+ else
+ err = -ENOTCONN;
+
+ goto out;
+ }
+
+ if (flags & MSG_OOB) {
+ err = -EOPNOTSUPP;
+ goto out;
+ }
+
+ /* We don't check peer_shutdown flag here since peer may actually shut
+ * down, but there can be data in the queue that a local socket can
+ * receive.
+ */
+ if (sk->sk_shutdown & RCV_SHUTDOWN) {
+ err = 0;
+ goto out;
+ }
+
+ /* It is valid on Linux to pass in a zero-length receive buffer. This
+ * is not an error. We may as well bail out now.
+ */
+ if (!len) {
+ err = 0;
+ goto out;
+ }
+
+ if (sk->sk_type == SOCK_STREAM)
+ err = __vsock_stream_recvmsg(sk, msg, len, flags);
+ else
+ err = __vsock_seqpacket_recvmsg(sk, msg, len, flags);
out:
release_sock(sk);
@@ -2007,7 +2129,7 @@ static const struct proto_ops vsock_stream_ops = {
.owner = THIS_MODULE,
.release = vsock_release,
.bind = vsock_bind,
- .connect = vsock_stream_connect,
+ .connect = vsock_connect,
.socketpair = sock_no_socketpair,
.accept = vsock_accept,
.getname = vsock_getname,
@@ -2015,10 +2137,31 @@ static const struct proto_ops vsock_stream_ops = {
.ioctl = sock_no_ioctl,
.listen = vsock_listen,
.shutdown = vsock_shutdown,
- .setsockopt = vsock_stream_setsockopt,
- .getsockopt = vsock_stream_getsockopt,
- .sendmsg = vsock_stream_sendmsg,
- .recvmsg = vsock_stream_recvmsg,
+ .setsockopt = vsock_connectible_setsockopt,
+ .getsockopt = vsock_connectible_getsockopt,
+ .sendmsg = vsock_connectible_sendmsg,
+ .recvmsg = vsock_connectible_recvmsg,
+ .mmap = sock_no_mmap,
+ .sendpage = sock_no_sendpage,
+};
+
+static const struct proto_ops vsock_seqpacket_ops = {
+ .family = PF_VSOCK,
+ .owner = THIS_MODULE,
+ .release = vsock_release,
+ .bind = vsock_bind,
+ .connect = vsock_connect,
+ .socketpair = sock_no_socketpair,
+ .accept = vsock_accept,
+ .getname = vsock_getname,
+ .poll = vsock_poll,
+ .ioctl = sock_no_ioctl,
+ .listen = vsock_listen,
+ .shutdown = vsock_shutdown,
+ .setsockopt = vsock_connectible_setsockopt,
+ .getsockopt = vsock_connectible_getsockopt,
+ .sendmsg = vsock_connectible_sendmsg,
+ .recvmsg = vsock_connectible_recvmsg,
.mmap = sock_no_mmap,
.sendpage = sock_no_sendpage,
};
@@ -2043,6 +2186,9 @@ static int vsock_create(struct net *net, struct socket *sock,
case SOCK_STREAM:
sock->ops = &vsock_stream_ops;
break;
+ case SOCK_SEQPACKET:
+ sock->ops = &vsock_seqpacket_ops;
+ break;
default:
return -ESOCKTNOSUPPORT;
}
diff --git a/net/vmw_vsock/hyperv_transport.c b/net/vmw_vsock/hyperv_transport.c
index cc3bae2659e7..19189cf30a72 100644
--- a/net/vmw_vsock/hyperv_transport.c
+++ b/net/vmw_vsock/hyperv_transport.c
@@ -596,7 +596,7 @@ static ssize_t hvs_stream_dequeue(struct vsock_sock *vsk, struct msghdr *msg,
return -EOPNOTSUPP;
if (need_refill) {
- hvs->recv_desc = hv_pkt_iter_first(hvs->chan);
+ hvs->recv_desc = hv_pkt_iter_first_raw(hvs->chan);
ret = hvs_update_recv_data(hvs);
if (ret)
return ret;
@@ -610,7 +610,7 @@ static ssize_t hvs_stream_dequeue(struct vsock_sock *vsk, struct msghdr *msg,
hvs->recv_data_len -= to_read;
if (hvs->recv_data_len == 0) {
- hvs->recv_desc = hv_pkt_iter_next(hvs->chan, hvs->recv_desc);
+ hvs->recv_desc = hv_pkt_iter_next_raw(hvs->chan, hvs->recv_desc);
if (hvs->recv_desc) {
ret = hvs_update_recv_data(hvs);
if (ret)
diff --git a/net/vmw_vsock/virtio_transport.c b/net/vmw_vsock/virtio_transport.c
index 2700a63ab095..4f7c99dfd16c 100644
--- a/net/vmw_vsock/virtio_transport.c
+++ b/net/vmw_vsock/virtio_transport.c
@@ -62,6 +62,7 @@ struct virtio_vsock {
struct virtio_vsock_event event_list[8];
u32 guest_cid;
+ bool seqpacket_allow;
};
static u32 virtio_transport_get_local_cid(void)
@@ -356,11 +357,14 @@ static void virtio_vsock_event_fill(struct virtio_vsock *vsock)
static void virtio_vsock_reset_sock(struct sock *sk)
{
- lock_sock(sk);
+ /* vmci_transport.c doesn't take sk_lock here either. At least we're
+ * under vsock_table_lock so the sock cannot disappear while we're
+ * executing.
+ */
+
sk->sk_state = TCP_CLOSE;
sk->sk_err = ECONNRESET;
- sk->sk_error_report(sk);
- release_sock(sk);
+ sk_error_report(sk);
}
static void virtio_vsock_update_guest_cid(struct virtio_vsock *vsock)
@@ -443,6 +447,8 @@ static void virtio_vsock_rx_done(struct virtqueue *vq)
queue_work(virtio_vsock_workqueue, &vsock->rx_work);
}
+static bool virtio_transport_seqpacket_allow(u32 remote_cid);
+
static struct virtio_transport virtio_transport = {
.transport = {
.module = THIS_MODULE,
@@ -469,6 +475,11 @@ static struct virtio_transport virtio_transport = {
.stream_is_active = virtio_transport_stream_is_active,
.stream_allow = virtio_transport_stream_allow,
+ .seqpacket_dequeue = virtio_transport_seqpacket_dequeue,
+ .seqpacket_enqueue = virtio_transport_seqpacket_enqueue,
+ .seqpacket_allow = virtio_transport_seqpacket_allow,
+ .seqpacket_has_data = virtio_transport_seqpacket_has_data,
+
.notify_poll_in = virtio_transport_notify_poll_in,
.notify_poll_out = virtio_transport_notify_poll_out,
.notify_recv_init = virtio_transport_notify_recv_init,
@@ -485,6 +496,21 @@ static struct virtio_transport virtio_transport = {
.send_pkt = virtio_transport_send_pkt,
};
+static bool virtio_transport_seqpacket_allow(u32 remote_cid)
+{
+ struct virtio_vsock *vsock;
+ bool seqpacket_allow;
+
+ seqpacket_allow = false;
+ rcu_read_lock();
+ vsock = rcu_dereference(the_virtio_vsock);
+ if (vsock)
+ seqpacket_allow = vsock->seqpacket_allow;
+ rcu_read_unlock();
+
+ return seqpacket_allow;
+}
+
static void virtio_transport_rx_work(struct work_struct *work)
{
struct virtio_vsock *vsock =
@@ -608,10 +634,14 @@ static int virtio_vsock_probe(struct virtio_device *vdev)
vsock->event_run = true;
mutex_unlock(&vsock->event_lock);
+ if (virtio_has_feature(vdev, VIRTIO_VSOCK_F_SEQPACKET))
+ vsock->seqpacket_allow = true;
+
vdev->priv = vsock;
rcu_assign_pointer(the_virtio_vsock, vsock);
mutex_unlock(&the_virtio_vsock_mutex);
+
return 0;
out:
@@ -695,6 +725,7 @@ static struct virtio_device_id id_table[] = {
};
static unsigned int features[] = {
+ VIRTIO_VSOCK_F_SEQPACKET
};
static struct virtio_driver virtio_vsock_driver = {
diff --git a/net/vmw_vsock/virtio_transport_common.c b/net/vmw_vsock/virtio_transport_common.c
index 902cb6dd710b..59ee1be5a6dd 100644
--- a/net/vmw_vsock/virtio_transport_common.c
+++ b/net/vmw_vsock/virtio_transport_common.c
@@ -74,6 +74,14 @@ virtio_transport_alloc_pkt(struct virtio_vsock_pkt_info *info,
err = memcpy_from_msg(pkt->buf, info->msg, len);
if (err)
goto out;
+
+ if (msg_data_left(info->msg) == 0 &&
+ info->type == VIRTIO_VSOCK_TYPE_SEQPACKET) {
+ pkt->hdr.flags |= cpu_to_le32(VIRTIO_VSOCK_SEQ_EOM);
+
+ if (info->msg->msg_flags & MSG_EOR)
+ pkt->hdr.flags |= cpu_to_le32(VIRTIO_VSOCK_SEQ_EOR);
+ }
}
trace_virtio_transport_alloc_pkt(src_cid, src_port,
@@ -165,6 +173,14 @@ void virtio_transport_deliver_tap_pkt(struct virtio_vsock_pkt *pkt)
}
EXPORT_SYMBOL_GPL(virtio_transport_deliver_tap_pkt);
+static u16 virtio_transport_get_type(struct sock *sk)
+{
+ if (sk->sk_type == SOCK_STREAM)
+ return VIRTIO_VSOCK_TYPE_STREAM;
+ else
+ return VIRTIO_VSOCK_TYPE_SEQPACKET;
+}
+
/* This function can only be used on connecting/connected sockets,
* since a socket assigned to a transport is required.
*
@@ -179,6 +195,8 @@ static int virtio_transport_send_pkt_info(struct vsock_sock *vsk,
struct virtio_vsock_pkt *pkt;
u32 pkt_len = info->pkt_len;
+ info->type = virtio_transport_get_type(sk_vsock(vsk));
+
t_ops = virtio_transport_get_ops(vsk);
if (unlikely(!t_ops))
return -EFAULT;
@@ -269,13 +287,10 @@ void virtio_transport_put_credit(struct virtio_vsock_sock *vvs, u32 credit)
}
EXPORT_SYMBOL_GPL(virtio_transport_put_credit);
-static int virtio_transport_send_credit_update(struct vsock_sock *vsk,
- int type,
- struct virtio_vsock_hdr *hdr)
+static int virtio_transport_send_credit_update(struct vsock_sock *vsk)
{
struct virtio_vsock_pkt_info info = {
.op = VIRTIO_VSOCK_OP_CREDIT_UPDATE,
- .type = type,
.vsk = vsk,
};
@@ -383,11 +398,8 @@ virtio_transport_stream_do_dequeue(struct vsock_sock *vsk,
* messages, we set the limit to a high value. TODO: experiment
* with different values.
*/
- if (free_space < VIRTIO_VSOCK_MAX_PKT_BUF_SIZE) {
- virtio_transport_send_credit_update(vsk,
- VIRTIO_VSOCK_TYPE_STREAM,
- NULL);
- }
+ if (free_space < VIRTIO_VSOCK_MAX_PKT_BUF_SIZE)
+ virtio_transport_send_credit_update(vsk);
return total;
@@ -397,6 +409,78 @@ out:
return err;
}
+static int virtio_transport_seqpacket_do_dequeue(struct vsock_sock *vsk,
+ struct msghdr *msg,
+ int flags)
+{
+ struct virtio_vsock_sock *vvs = vsk->trans;
+ struct virtio_vsock_pkt *pkt;
+ int dequeued_len = 0;
+ size_t user_buf_len = msg_data_left(msg);
+ bool msg_ready = false;
+
+ spin_lock_bh(&vvs->rx_lock);
+
+ if (vvs->msg_count == 0) {
+ spin_unlock_bh(&vvs->rx_lock);
+ return 0;
+ }
+
+ while (!msg_ready) {
+ pkt = list_first_entry(&vvs->rx_queue, struct virtio_vsock_pkt, list);
+
+ if (dequeued_len >= 0) {
+ size_t pkt_len;
+ size_t bytes_to_copy;
+
+ pkt_len = (size_t)le32_to_cpu(pkt->hdr.len);
+ bytes_to_copy = min(user_buf_len, pkt_len);
+
+ if (bytes_to_copy) {
+ int err;
+
+ /* sk_lock is held by caller so no one else can dequeue.
+ * Unlock rx_lock since memcpy_to_msg() may sleep.
+ */
+ spin_unlock_bh(&vvs->rx_lock);
+
+ err = memcpy_to_msg(msg, pkt->buf, bytes_to_copy);
+ if (err) {
+ /* Copy of message failed. Rest of
+ * fragments will be freed without copy.
+ */
+ dequeued_len = err;
+ } else {
+ user_buf_len -= bytes_to_copy;
+ }
+
+ spin_lock_bh(&vvs->rx_lock);
+ }
+
+ if (dequeued_len >= 0)
+ dequeued_len += pkt_len;
+ }
+
+ if (le32_to_cpu(pkt->hdr.flags) & VIRTIO_VSOCK_SEQ_EOM) {
+ msg_ready = true;
+ vvs->msg_count--;
+
+ if (le32_to_cpu(pkt->hdr.flags) & VIRTIO_VSOCK_SEQ_EOR)
+ msg->msg_flags |= MSG_EOR;
+ }
+
+ virtio_transport_dec_rx_pkt(vvs, pkt);
+ list_del(&pkt->list);
+ virtio_transport_free_pkt(pkt);
+ }
+
+ spin_unlock_bh(&vvs->rx_lock);
+
+ virtio_transport_send_credit_update(vsk);
+
+ return dequeued_len;
+}
+
ssize_t
virtio_transport_stream_dequeue(struct vsock_sock *vsk,
struct msghdr *msg,
@@ -409,6 +493,38 @@ virtio_transport_stream_dequeue(struct vsock_sock *vsk,
}
EXPORT_SYMBOL_GPL(virtio_transport_stream_dequeue);
+ssize_t
+virtio_transport_seqpacket_dequeue(struct vsock_sock *vsk,
+ struct msghdr *msg,
+ int flags)
+{
+ if (flags & MSG_PEEK)
+ return -EOPNOTSUPP;
+
+ return virtio_transport_seqpacket_do_dequeue(vsk, msg, flags);
+}
+EXPORT_SYMBOL_GPL(virtio_transport_seqpacket_dequeue);
+
+int
+virtio_transport_seqpacket_enqueue(struct vsock_sock *vsk,
+ struct msghdr *msg,
+ size_t len)
+{
+ struct virtio_vsock_sock *vvs = vsk->trans;
+
+ spin_lock_bh(&vvs->tx_lock);
+
+ if (len > vvs->peer_buf_alloc) {
+ spin_unlock_bh(&vvs->tx_lock);
+ return -EMSGSIZE;
+ }
+
+ spin_unlock_bh(&vvs->tx_lock);
+
+ return virtio_transport_stream_enqueue(vsk, msg, len);
+}
+EXPORT_SYMBOL_GPL(virtio_transport_seqpacket_enqueue);
+
int
virtio_transport_dgram_dequeue(struct vsock_sock *vsk,
struct msghdr *msg,
@@ -431,6 +547,19 @@ s64 virtio_transport_stream_has_data(struct vsock_sock *vsk)
}
EXPORT_SYMBOL_GPL(virtio_transport_stream_has_data);
+u32 virtio_transport_seqpacket_has_data(struct vsock_sock *vsk)
+{
+ struct virtio_vsock_sock *vvs = vsk->trans;
+ u32 msg_count;
+
+ spin_lock_bh(&vvs->rx_lock);
+ msg_count = vvs->msg_count;
+ spin_unlock_bh(&vvs->rx_lock);
+
+ return msg_count;
+}
+EXPORT_SYMBOL_GPL(virtio_transport_seqpacket_has_data);
+
static s64 virtio_transport_has_space(struct vsock_sock *vsk)
{
struct virtio_vsock_sock *vvs = vsk->trans;
@@ -496,8 +625,7 @@ void virtio_transport_notify_buffer_size(struct vsock_sock *vsk, u64 *val)
vvs->buf_alloc = *val;
- virtio_transport_send_credit_update(vsk, VIRTIO_VSOCK_TYPE_STREAM,
- NULL);
+ virtio_transport_send_credit_update(vsk);
}
EXPORT_SYMBOL_GPL(virtio_transport_notify_buffer_size);
@@ -624,7 +752,6 @@ int virtio_transport_connect(struct vsock_sock *vsk)
{
struct virtio_vsock_pkt_info info = {
.op = VIRTIO_VSOCK_OP_REQUEST,
- .type = VIRTIO_VSOCK_TYPE_STREAM,
.vsk = vsk,
};
@@ -636,7 +763,6 @@ int virtio_transport_shutdown(struct vsock_sock *vsk, int mode)
{
struct virtio_vsock_pkt_info info = {
.op = VIRTIO_VSOCK_OP_SHUTDOWN,
- .type = VIRTIO_VSOCK_TYPE_STREAM,
.flags = (mode & RCV_SHUTDOWN ?
VIRTIO_VSOCK_SHUTDOWN_RCV : 0) |
(mode & SEND_SHUTDOWN ?
@@ -665,7 +791,6 @@ virtio_transport_stream_enqueue(struct vsock_sock *vsk,
{
struct virtio_vsock_pkt_info info = {
.op = VIRTIO_VSOCK_OP_RW,
- .type = VIRTIO_VSOCK_TYPE_STREAM,
.msg = msg,
.pkt_len = len,
.vsk = vsk,
@@ -688,7 +813,6 @@ static int virtio_transport_reset(struct vsock_sock *vsk,
{
struct virtio_vsock_pkt_info info = {
.op = VIRTIO_VSOCK_OP_RST,
- .type = VIRTIO_VSOCK_TYPE_STREAM,
.reply = !!pkt,
.vsk = vsk,
};
@@ -848,7 +972,7 @@ void virtio_transport_release(struct vsock_sock *vsk)
struct sock *sk = &vsk->sk;
bool remove_sock = true;
- if (sk->sk_type == SOCK_STREAM)
+ if (sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET)
remove_sock = virtio_transport_close(vsk);
if (remove_sock) {
@@ -890,7 +1014,7 @@ destroy:
virtio_transport_reset(vsk, pkt);
sk->sk_state = TCP_CLOSE;
sk->sk_err = skerr;
- sk->sk_error_report(sk);
+ sk_error_report(sk);
return err;
}
@@ -912,6 +1036,9 @@ virtio_transport_recv_enqueue(struct vsock_sock *vsk,
goto out;
}
+ if (le32_to_cpu(pkt->hdr.flags) & VIRTIO_VSOCK_SEQ_EOM)
+ vvs->msg_count++;
+
/* Try to copy small packets into the buffer of last packet queued,
* to avoid wasting memory queueing the entire buffer with a small
* payload.
@@ -923,13 +1050,18 @@ virtio_transport_recv_enqueue(struct vsock_sock *vsk,
struct virtio_vsock_pkt, list);
/* If there is space in the last packet queued, we copy the
- * new packet in its buffer.
+ * new packet in its buffer. We avoid this if the last packet
+ * queued has VIRTIO_VSOCK_SEQ_EOM set, because this is
+ * delimiter of SEQPACKET message, so 'pkt' is the first packet
+ * of a new message.
*/
- if (pkt->len <= last_pkt->buf_len - last_pkt->len) {
+ if ((pkt->len <= last_pkt->buf_len - last_pkt->len) &&
+ !(le32_to_cpu(last_pkt->hdr.flags) & VIRTIO_VSOCK_SEQ_EOM)) {
memcpy(last_pkt->buf + last_pkt->len, pkt->buf,
pkt->len);
last_pkt->len += pkt->len;
free_pkt = true;
+ last_pkt->hdr.flags |= pkt->hdr.flags;
goto out;
}
}
@@ -954,6 +1086,9 @@ virtio_transport_recv_connected(struct sock *sk,
virtio_transport_recv_enqueue(vsk, pkt);
sk->sk_data_ready(sk);
return err;
+ case VIRTIO_VSOCK_OP_CREDIT_REQUEST:
+ virtio_transport_send_credit_update(vsk);
+ break;
case VIRTIO_VSOCK_OP_CREDIT_UPDATE:
sk->sk_write_space(sk);
break;
@@ -1000,7 +1135,6 @@ virtio_transport_send_response(struct vsock_sock *vsk,
{
struct virtio_vsock_pkt_info info = {
.op = VIRTIO_VSOCK_OP_RESPONSE,
- .type = VIRTIO_VSOCK_TYPE_STREAM,
.remote_cid = le64_to_cpu(pkt->hdr.src_cid),
.remote_port = le32_to_cpu(pkt->hdr.src_port),
.reply = true,
@@ -1096,6 +1230,12 @@ virtio_transport_recv_listen(struct sock *sk, struct virtio_vsock_pkt *pkt,
return 0;
}
+static bool virtio_transport_valid_type(u16 type)
+{
+ return (type == VIRTIO_VSOCK_TYPE_STREAM) ||
+ (type == VIRTIO_VSOCK_TYPE_SEQPACKET);
+}
+
/* We are under the virtio-vsock's vsock->rx_lock or vhost-vsock's vq->mutex
* lock.
*/
@@ -1121,7 +1261,7 @@ void virtio_transport_recv_pkt(struct virtio_transport *t,
le32_to_cpu(pkt->hdr.buf_alloc),
le32_to_cpu(pkt->hdr.fwd_cnt));
- if (le16_to_cpu(pkt->hdr.type) != VIRTIO_VSOCK_TYPE_STREAM) {
+ if (!virtio_transport_valid_type(le16_to_cpu(pkt->hdr.type))) {
(void)virtio_transport_reset_no_sock(t, pkt);
goto free_pkt;
}
@@ -1138,6 +1278,12 @@ void virtio_transport_recv_pkt(struct virtio_transport *t,
}
}
+ if (virtio_transport_get_type(sk) != le16_to_cpu(pkt->hdr.type)) {
+ (void)virtio_transport_reset_no_sock(t, pkt);
+ sock_put(sk);
+ goto free_pkt;
+ }
+
vsk = vsock_sk(sk);
lock_sock(sk);
diff --git a/net/vmw_vsock/vmci_transport.c b/net/vmw_vsock/vmci_transport.c
index c99bc4ce78e2..7aef34e32bdf 100644
--- a/net/vmw_vsock/vmci_transport.c
+++ b/net/vmw_vsock/vmci_transport.c
@@ -831,7 +831,7 @@ static void vmci_transport_handle_detach(struct sock *sk)
sk->sk_state = TCP_CLOSE;
sk->sk_err = ECONNRESET;
- sk->sk_error_report(sk);
+ sk_error_report(sk);
return;
}
sk->sk_state = TCP_CLOSE;
@@ -1248,7 +1248,7 @@ vmci_transport_recv_connecting_server(struct sock *listener,
vsock_remove_pending(listener, pending);
vsock_enqueue_accept(listener, pending);
- /* Callers of accept() will be be waiting on the listening socket, not
+ /* Callers of accept() will be waiting on the listening socket, not
* the pending socket.
*/
listener->sk_data_ready(listener);
@@ -1365,7 +1365,7 @@ destroy:
sk->sk_state = TCP_CLOSE;
sk->sk_err = skerr;
- sk->sk_error_report(sk);
+ sk_error_report(sk);
return err;
}
diff --git a/net/vmw_vsock/vsock_loopback.c b/net/vmw_vsock/vsock_loopback.c
index a45f7ffca8c5..169a8cf65b39 100644
--- a/net/vmw_vsock/vsock_loopback.c
+++ b/net/vmw_vsock/vsock_loopback.c
@@ -63,6 +63,8 @@ static int vsock_loopback_cancel_pkt(struct vsock_sock *vsk)
return 0;
}
+static bool vsock_loopback_seqpacket_allow(u32 remote_cid);
+
static struct virtio_transport loopback_transport = {
.transport = {
.module = THIS_MODULE,
@@ -89,6 +91,11 @@ static struct virtio_transport loopback_transport = {
.stream_is_active = virtio_transport_stream_is_active,
.stream_allow = virtio_transport_stream_allow,
+ .seqpacket_dequeue = virtio_transport_seqpacket_dequeue,
+ .seqpacket_enqueue = virtio_transport_seqpacket_enqueue,
+ .seqpacket_allow = vsock_loopback_seqpacket_allow,
+ .seqpacket_has_data = virtio_transport_seqpacket_has_data,
+
.notify_poll_in = virtio_transport_notify_poll_in,
.notify_poll_out = virtio_transport_notify_poll_out,
.notify_recv_init = virtio_transport_notify_recv_init,
@@ -105,6 +112,11 @@ static struct virtio_transport loopback_transport = {
.send_pkt = vsock_loopback_send_pkt,
};
+static bool vsock_loopback_seqpacket_allow(u32 remote_cid)
+{
+ return true;
+}
+
static void vsock_loopback_work(struct work_struct *work)
{
struct vsock_loopback *vsock =
diff --git a/net/wireless/chan.c b/net/wireless/chan.c
index 285b8076054b..869c43d4414c 100644
--- a/net/wireless/chan.c
+++ b/net/wireless/chan.c
@@ -6,7 +6,7 @@
*
* Copyright 2009 Johannes Berg <johannes@sipsolutions.net>
* Copyright 2013-2014 Intel Mobile Communications GmbH
- * Copyright 2018-2020 Intel Corporation
+ * Copyright 2018-2021 Intel Corporation
*/
#include <linux/export.h>
@@ -942,7 +942,7 @@ bool cfg80211_chandef_usable(struct wiphy *wiphy,
struct ieee80211_sta_vht_cap *vht_cap;
struct ieee80211_edmg *edmg_cap;
u32 width, control_freq, cap;
- bool support_80_80 = false;
+ bool ext_nss_cap, support_80_80 = false;
if (WARN_ON(!cfg80211_chandef_valid(chandef)))
return false;
@@ -950,6 +950,8 @@ bool cfg80211_chandef_usable(struct wiphy *wiphy,
ht_cap = &wiphy->bands[chandef->chan->band]->ht_cap;
vht_cap = &wiphy->bands[chandef->chan->band]->vht_cap;
edmg_cap = &wiphy->bands[chandef->chan->band]->edmg_cap;
+ ext_nss_cap = __le16_to_cpu(vht_cap->vht_mcs.tx_highest) &
+ IEEE80211_VHT_EXT_NSS_BW_CAPABLE;
if (edmg_cap->channels &&
!cfg80211_edmg_usable(wiphy,
@@ -1015,7 +1017,8 @@ bool cfg80211_chandef_usable(struct wiphy *wiphy,
(cap & IEEE80211_VHT_CAP_SUPP_CHAN_WIDTH_160_80PLUS80MHZ) ||
(cap & IEEE80211_VHT_CAP_SUPP_CHAN_WIDTH_160MHZ &&
cap & IEEE80211_VHT_CAP_EXT_NSS_BW_MASK) ||
- u32_get_bits(cap, IEEE80211_VHT_CAP_EXT_NSS_BW_MASK) > 1;
+ (ext_nss_cap &&
+ u32_get_bits(cap, IEEE80211_VHT_CAP_EXT_NSS_BW_MASK) > 1);
if (chandef->chan->band != NL80211_BAND_6GHZ && !support_80_80)
return false;
fallthrough;
@@ -1037,7 +1040,8 @@ bool cfg80211_chandef_usable(struct wiphy *wiphy,
cap = vht_cap->cap & IEEE80211_VHT_CAP_SUPP_CHAN_WIDTH_MASK;
if (cap != IEEE80211_VHT_CAP_SUPP_CHAN_WIDTH_160MHZ &&
cap != IEEE80211_VHT_CAP_SUPP_CHAN_WIDTH_160_80PLUS80MHZ &&
- !(vht_cap->cap & IEEE80211_VHT_CAP_EXT_NSS_BW_MASK))
+ !(ext_nss_cap &&
+ (vht_cap->cap & IEEE80211_VHT_CAP_EXT_NSS_BW_MASK)))
return false;
break;
default:
@@ -1335,3 +1339,34 @@ cfg80211_get_chan_state(struct wireless_dev *wdev,
WARN_ON(1);
}
}
+
+bool cfg80211_any_usable_channels(struct wiphy *wiphy,
+ unsigned long sband_mask,
+ u32 prohibited_flags)
+{
+ int idx;
+
+ prohibited_flags |= IEEE80211_CHAN_DISABLED;
+
+ for_each_set_bit(idx, &sband_mask, NUM_NL80211_BANDS) {
+ struct ieee80211_supported_band *sband = wiphy->bands[idx];
+ int chanidx;
+
+ if (!sband)
+ continue;
+
+ for (chanidx = 0; chanidx < sband->n_channels; chanidx++) {
+ struct ieee80211_channel *chan;
+
+ chan = &sband->channels[chanidx];
+
+ if (chan->flags & prohibited_flags)
+ continue;
+
+ return true;
+ }
+ }
+
+ return false;
+}
+EXPORT_SYMBOL(cfg80211_any_usable_channels);
diff --git a/net/wireless/core.c b/net/wireless/core.c
index 8d0883e81093..03323121ca50 100644
--- a/net/wireless/core.c
+++ b/net/wireless/core.c
@@ -5,7 +5,7 @@
* Copyright 2006-2010 Johannes Berg <johannes@sipsolutions.net>
* Copyright 2013-2014 Intel Mobile Communications GmbH
* Copyright 2015-2017 Intel Deutschland GmbH
- * Copyright (C) 2018-2020 Intel Corporation
+ * Copyright (C) 2018-2021 Intel Corporation
*/
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
@@ -532,11 +532,11 @@ use_default_name:
wiphy_net_set(&rdev->wiphy, &init_net);
rdev->rfkill_ops.set_block = cfg80211_rfkill_set_block;
- rdev->rfkill = rfkill_alloc(dev_name(&rdev->wiphy.dev),
- &rdev->wiphy.dev, RFKILL_TYPE_WLAN,
- &rdev->rfkill_ops, rdev);
+ rdev->wiphy.rfkill = rfkill_alloc(dev_name(&rdev->wiphy.dev),
+ &rdev->wiphy.dev, RFKILL_TYPE_WLAN,
+ &rdev->rfkill_ops, rdev);
- if (!rdev->rfkill) {
+ if (!rdev->wiphy.rfkill) {
wiphy_free(&rdev->wiphy);
return NULL;
}
@@ -589,14 +589,6 @@ static int wiphy_verify_combinations(struct wiphy *wiphy)
if (WARN_ON(!c->num_different_channels))
return -EINVAL;
- /*
- * Put a sane limit on maximum number of different
- * channels to simplify channel accounting code.
- */
- if (WARN_ON(c->num_different_channels >
- CFG80211_MAX_NUM_DIFFERENT_CHANNELS))
- return -EINVAL;
-
/* DFS only works on one channel. */
if (WARN_ON(c->radar_detect_widths &&
(c->num_different_channels > 1)))
@@ -936,9 +928,6 @@ int wiphy_register(struct wiphy *wiphy)
return res;
}
- /* set up regulatory info */
- wiphy_regulatory_register(wiphy);
-
list_add_rcu(&rdev->list, &cfg80211_rdev_list);
cfg80211_rdev_list_generation++;
@@ -949,6 +938,9 @@ int wiphy_register(struct wiphy *wiphy)
cfg80211_debugfs_rdev_add(rdev);
nl80211_notify_wiphy(rdev, NL80211_CMD_NEW_WIPHY);
+ /* set up regulatory info */
+ wiphy_regulatory_register(wiphy);
+
if (wiphy->regulatory_flags & REGULATORY_CUSTOM_REG) {
struct regulatory_request request;
@@ -993,10 +985,10 @@ int wiphy_register(struct wiphy *wiphy)
rdev->wiphy.registered = true;
rtnl_unlock();
- res = rfkill_register(rdev->rfkill);
+ res = rfkill_register(rdev->wiphy.rfkill);
if (res) {
- rfkill_destroy(rdev->rfkill);
- rdev->rfkill = NULL;
+ rfkill_destroy(rdev->wiphy.rfkill);
+ rdev->wiphy.rfkill = NULL;
wiphy_unregister(&rdev->wiphy);
return res;
}
@@ -1012,18 +1004,10 @@ void wiphy_rfkill_start_polling(struct wiphy *wiphy)
if (!rdev->ops->rfkill_poll)
return;
rdev->rfkill_ops.poll = cfg80211_rfkill_poll;
- rfkill_resume_polling(rdev->rfkill);
+ rfkill_resume_polling(wiphy->rfkill);
}
EXPORT_SYMBOL(wiphy_rfkill_start_polling);
-void wiphy_rfkill_stop_polling(struct wiphy *wiphy)
-{
- struct cfg80211_registered_device *rdev = wiphy_to_rdev(wiphy);
-
- rfkill_pause_polling(rdev->rfkill);
-}
-EXPORT_SYMBOL(wiphy_rfkill_stop_polling);
-
void wiphy_unregister(struct wiphy *wiphy)
{
struct cfg80211_registered_device *rdev = wiphy_to_rdev(wiphy);
@@ -1035,8 +1019,8 @@ void wiphy_unregister(struct wiphy *wiphy)
wiphy_unlock(&rdev->wiphy);
__count == 0; }));
- if (rdev->rfkill)
- rfkill_unregister(rdev->rfkill);
+ if (rdev->wiphy.rfkill)
+ rfkill_unregister(rdev->wiphy.rfkill);
rtnl_lock();
wiphy_lock(&rdev->wiphy);
@@ -1088,7 +1072,7 @@ void cfg80211_dev_free(struct cfg80211_registered_device *rdev)
{
struct cfg80211_internal_bss *scan, *tmp;
struct cfg80211_beacon_registration *reg, *treg;
- rfkill_destroy(rdev->rfkill);
+ rfkill_destroy(rdev->wiphy.rfkill);
list_for_each_entry_safe(reg, treg, &rdev->beacon_registrations, list) {
list_del(&reg->list);
kfree(reg);
@@ -1110,7 +1094,7 @@ void wiphy_rfkill_set_hw_state_reason(struct wiphy *wiphy, bool blocked,
{
struct cfg80211_registered_device *rdev = wiphy_to_rdev(wiphy);
- if (rfkill_set_hw_state_reason(rdev->rfkill, blocked, reason))
+ if (rfkill_set_hw_state_reason(wiphy->rfkill, blocked, reason))
schedule_work(&rdev->rfkill_block);
}
EXPORT_SYMBOL(wiphy_rfkill_set_hw_state_reason);
@@ -1503,7 +1487,7 @@ static int cfg80211_netdev_notifier_call(struct notifier_block *nb,
wdev->use_4addr, 0))
return notifier_from_errno(-EOPNOTSUPP);
- if (rfkill_blocked(rdev->rfkill))
+ if (rfkill_blocked(rdev->wiphy.rfkill))
return notifier_from_errno(-ERFKILL);
break;
default:
diff --git a/net/wireless/core.h b/net/wireless/core.h
index a7d19b4b40ac..b35d0db12f1d 100644
--- a/net/wireless/core.h
+++ b/net/wireless/core.h
@@ -3,7 +3,7 @@
* Wireless configuration interface internals.
*
* Copyright 2006-2010 Johannes Berg <johannes@sipsolutions.net>
- * Copyright (C) 2018-2020 Intel Corporation
+ * Copyright (C) 2018-2021 Intel Corporation
*/
#ifndef __NET_WIRELESS_CORE_H
#define __NET_WIRELESS_CORE_H
@@ -27,7 +27,6 @@ struct cfg80211_registered_device {
/* rfkill support */
struct rfkill_ops rfkill_ops;
- struct rfkill *rfkill;
struct work_struct rfkill_block;
/* ISO / IEC 3166 alpha2 for which this device is receiving
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index fc9286afe3c9..bf7cd4752547 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -330,7 +330,7 @@ nl80211_pmsr_req_attr_policy[NL80211_PMSR_REQ_ATTR_MAX + 1] = {
};
static const struct nla_policy
-nl80211_psmr_peer_attr_policy[NL80211_PMSR_PEER_ATTR_MAX + 1] = {
+nl80211_pmsr_peer_attr_policy[NL80211_PMSR_PEER_ATTR_MAX + 1] = {
[NL80211_PMSR_PEER_ATTR_ADDR] = NLA_POLICY_ETH_ADDR,
[NL80211_PMSR_PEER_ATTR_CHAN] = NLA_POLICY_NESTED(nl80211_policy),
[NL80211_PMSR_PEER_ATTR_REQ] =
@@ -345,7 +345,7 @@ nl80211_pmsr_attr_policy[NL80211_PMSR_ATTR_MAX + 1] = {
[NL80211_PMSR_ATTR_RANDOMIZE_MAC_ADDR] = { .type = NLA_REJECT },
[NL80211_PMSR_ATTR_TYPE_CAPA] = { .type = NLA_REJECT },
[NL80211_PMSR_ATTR_PEERS] =
- NLA_POLICY_NESTED_ARRAY(nl80211_psmr_peer_attr_policy),
+ NLA_POLICY_NESTED_ARRAY(nl80211_pmsr_peer_attr_policy),
};
static const struct nla_policy
@@ -759,6 +759,10 @@ static const struct nla_policy nl80211_policy[NUM_NL80211_ATTR] = {
[NL80211_ATTR_RECONNECT_REQUESTED] = { .type = NLA_REJECT },
[NL80211_ATTR_SAR_SPEC] = NLA_POLICY_NESTED(sar_policy),
[NL80211_ATTR_DISABLE_HE] = { .type = NLA_FLAG },
+ [NL80211_ATTR_OBSS_COLOR_BITMAP] = { .type = NLA_U64 },
+ [NL80211_ATTR_COLOR_CHANGE_COUNT] = { .type = NLA_U8 },
+ [NL80211_ATTR_COLOR_CHANGE_COLOR] = { .type = NLA_U8 },
+ [NL80211_ATTR_COLOR_CHANGE_ELEMS] = NLA_POLICY_NESTED(nl80211_policy),
};
/* policy for the key attributes */
@@ -1731,6 +1735,11 @@ nl80211_send_iftype_data(struct sk_buff *msg,
&iftdata->he_6ghz_capa))
return -ENOBUFS;
+ if (iftdata->vendor_elems.data && iftdata->vendor_elems.len &&
+ nla_put(msg, NL80211_BAND_IFTYPE_ATTR_VENDOR_ELEMS,
+ iftdata->vendor_elems.len, iftdata->vendor_elems.data))
+ return -ENOBUFS;
+
return 0;
}
@@ -2346,7 +2355,10 @@ static int nl80211_send_wiphy(struct cfg80211_registered_device *rdev,
goto nla_put_failure;
for (band = state->band_start;
- band < NUM_NL80211_BANDS; band++) {
+ band < (state->split ?
+ NUM_NL80211_BANDS :
+ NL80211_BAND_60GHZ + 1);
+ band++) {
struct ieee80211_supported_band *sband;
/* omit higher bands for ancient software */
@@ -4781,11 +4793,10 @@ static int nl80211_parse_tx_bitrate_mask(struct genl_info *info,
sband->ht_cap.mcs.rx_mask,
sizeof(mask->control[i].ht_mcs));
- if (!sband->vht_cap.vht_supported)
- continue;
-
- vht_tx_mcs_map = le16_to_cpu(sband->vht_cap.vht_mcs.tx_mcs_map);
- vht_build_mcs_mask(vht_tx_mcs_map, mask->control[i].vht_mcs);
+ if (sband->vht_cap.vht_supported) {
+ vht_tx_mcs_map = le16_to_cpu(sband->vht_cap.vht_mcs.tx_mcs_map);
+ vht_build_mcs_mask(vht_tx_mcs_map, mask->control[i].vht_mcs);
+ }
he_cap = ieee80211_get_he_iftype_cap(sband, wdev->iftype);
if (!he_cap)
@@ -6520,8 +6531,7 @@ static int nl80211_set_station(struct sk_buff *skb, struct genl_info *info)
err = rdev_change_station(rdev, dev, mac_addr, &params);
out_put_vlan:
- if (params.vlan)
- dev_put(params.vlan);
+ dev_put(params.vlan);
return err;
}
@@ -6756,8 +6766,7 @@ static int nl80211_new_station(struct sk_buff *skb, struct genl_info *info)
err = rdev_add_station(rdev, dev, mac_addr, &params);
- if (params.vlan)
- dev_put(params.vlan);
+ dev_put(params.vlan);
return err;
}
@@ -8482,8 +8491,7 @@ static int nl80211_trigger_scan(struct sk_buff *skb, struct genl_info *info)
goto out_free;
nl80211_send_scan_start(rdev, wdev);
- if (wdev->netdev)
- dev_hold(wdev->netdev);
+ dev_hold(wdev->netdev);
return 0;
@@ -13042,7 +13050,7 @@ static int nl80211_start_p2p_device(struct sk_buff *skb, struct genl_info *info)
if (wdev_running(wdev))
return 0;
- if (rfkill_blocked(rdev->rfkill))
+ if (rfkill_blocked(rdev->wiphy.rfkill))
return -ERFKILL;
err = rdev_start_p2p_device(rdev, wdev);
@@ -13084,7 +13092,7 @@ static int nl80211_start_nan(struct sk_buff *skb, struct genl_info *info)
if (wdev_running(wdev))
return -EEXIST;
- if (rfkill_blocked(rdev->rfkill))
+ if (rfkill_blocked(rdev->wiphy.rfkill))
return -ERFKILL;
if (!info->attrs[NL80211_ATTR_NAN_MASTER_PREF])
@@ -14796,6 +14804,106 @@ bad_tid_conf:
return ret;
}
+static int nl80211_color_change(struct sk_buff *skb, struct genl_info *info)
+{
+ struct cfg80211_registered_device *rdev = info->user_ptr[0];
+ struct cfg80211_color_change_settings params = {};
+ struct net_device *dev = info->user_ptr[1];
+ struct wireless_dev *wdev = dev->ieee80211_ptr;
+ struct nlattr **tb;
+ u16 offset;
+ int err;
+
+ if (!rdev->ops->color_change)
+ return -EOPNOTSUPP;
+
+ if (!wiphy_ext_feature_isset(&rdev->wiphy,
+ NL80211_EXT_FEATURE_BSS_COLOR))
+ return -EOPNOTSUPP;
+
+ if (wdev->iftype != NL80211_IFTYPE_AP)
+ return -EOPNOTSUPP;
+
+ if (!info->attrs[NL80211_ATTR_COLOR_CHANGE_COUNT] ||
+ !info->attrs[NL80211_ATTR_COLOR_CHANGE_COLOR] ||
+ !info->attrs[NL80211_ATTR_COLOR_CHANGE_ELEMS])
+ return -EINVAL;
+
+ params.count = nla_get_u8(info->attrs[NL80211_ATTR_COLOR_CHANGE_COUNT]);
+ params.color = nla_get_u8(info->attrs[NL80211_ATTR_COLOR_CHANGE_COLOR]);
+
+ err = nl80211_parse_beacon(rdev, info->attrs, &params.beacon_next);
+ if (err)
+ return err;
+
+ tb = kcalloc(NL80211_ATTR_MAX + 1, sizeof(*tb), GFP_KERNEL);
+ if (!tb)
+ return -ENOMEM;
+
+ err = nla_parse_nested(tb, NL80211_ATTR_MAX,
+ info->attrs[NL80211_ATTR_COLOR_CHANGE_ELEMS],
+ nl80211_policy, info->extack);
+ if (err)
+ goto out;
+
+ err = nl80211_parse_beacon(rdev, tb, &params.beacon_color_change);
+ if (err)
+ goto out;
+
+ if (!tb[NL80211_ATTR_CNTDWN_OFFS_BEACON]) {
+ err = -EINVAL;
+ goto out;
+ }
+
+ if (nla_len(tb[NL80211_ATTR_CNTDWN_OFFS_BEACON]) != sizeof(u16)) {
+ err = -EINVAL;
+ goto out;
+ }
+
+ offset = nla_get_u16(tb[NL80211_ATTR_CNTDWN_OFFS_BEACON]);
+ if (offset >= params.beacon_color_change.tail_len) {
+ err = -EINVAL;
+ goto out;
+ }
+
+ if (params.beacon_color_change.tail[offset] != params.count) {
+ err = -EINVAL;
+ goto out;
+ }
+
+ params.counter_offset_beacon = offset;
+
+ if (tb[NL80211_ATTR_CNTDWN_OFFS_PRESP]) {
+ if (nla_len(tb[NL80211_ATTR_CNTDWN_OFFS_PRESP]) !=
+ sizeof(u16)) {
+ err = -EINVAL;
+ goto out;
+ }
+
+ offset = nla_get_u16(tb[NL80211_ATTR_CNTDWN_OFFS_PRESP]);
+ if (offset >= params.beacon_color_change.probe_resp_len) {
+ err = -EINVAL;
+ goto out;
+ }
+
+ if (params.beacon_color_change.probe_resp[offset] !=
+ params.count) {
+ err = -EINVAL;
+ goto out;
+ }
+
+ params.counter_offset_presp = offset;
+ }
+
+ wdev_lock(wdev);
+ err = rdev_color_change(rdev, dev, &params);
+ wdev_unlock(wdev);
+
+out:
+ kfree(tb);
+ return err;
+}
+
#define NL80211_FLAG_NEED_WIPHY 0x01
#define NL80211_FLAG_NEED_NETDEV 0x02
#define NL80211_FLAG_NEED_RTNL 0x04
@@ -14853,9 +14961,7 @@ static int nl80211_pre_doit(const struct genl_ops *ops, struct sk_buff *skb,
return -ENETDOWN;
}
- if (dev)
- dev_hold(dev);
-
+ dev_hold(dev);
info->user_ptr[0] = rdev;
}
@@ -14877,8 +14983,7 @@ static void nl80211_post_doit(const struct genl_ops *ops, struct sk_buff *skb,
if (ops->internal_flags & NL80211_FLAG_NEED_WDEV) {
struct wireless_dev *wdev = info->user_ptr[1];
- if (wdev->netdev)
- dev_put(wdev->netdev);
+ dev_put(wdev->netdev);
} else {
dev_put(info->user_ptr[1]);
}
@@ -15794,6 +15899,14 @@ static const struct genl_small_ops nl80211_small_ops[] = {
.internal_flags = NL80211_FLAG_NEED_WIPHY |
NL80211_FLAG_NEED_RTNL,
},
+ {
+ .cmd = NL80211_CMD_COLOR_CHANGE_REQUEST,
+ .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
+ .doit = nl80211_color_change,
+ .flags = GENL_UNS_ADMIN_PERM,
+ .internal_flags = NL80211_FLAG_NEED_NETDEV_UP |
+ NL80211_FLAG_NEED_RTNL,
+ },
};
static struct genl_family nl80211_fam __ro_after_init = {
@@ -17423,6 +17536,51 @@ void cfg80211_ch_switch_started_notify(struct net_device *dev,
}
EXPORT_SYMBOL(cfg80211_ch_switch_started_notify);
+int cfg80211_bss_color_notify(struct net_device *dev, gfp_t gfp,
+ enum nl80211_commands cmd, u8 count,
+ u64 color_bitmap)
+{
+ struct wireless_dev *wdev = dev->ieee80211_ptr;
+ struct wiphy *wiphy = wdev->wiphy;
+ struct cfg80211_registered_device *rdev = wiphy_to_rdev(wiphy);
+ struct sk_buff *msg;
+ void *hdr;
+
+ ASSERT_WDEV_LOCK(wdev);
+
+ trace_cfg80211_bss_color_notify(dev, cmd, count, color_bitmap);
+
+ msg = nlmsg_new(NLMSG_DEFAULT_SIZE, gfp);
+ if (!msg)
+ return -ENOMEM;
+
+ hdr = nl80211hdr_put(msg, 0, 0, 0, cmd);
+ if (!hdr)
+ goto nla_put_failure;
+
+ if (nla_put_u32(msg, NL80211_ATTR_IFINDEX, dev->ifindex))
+ goto nla_put_failure;
+
+ if (cmd == NL80211_CMD_COLOR_CHANGE_STARTED &&
+ nla_put_u32(msg, NL80211_ATTR_COLOR_CHANGE_COUNT, count))
+ goto nla_put_failure;
+
+ if (cmd == NL80211_CMD_OBSS_COLOR_COLLISION &&
+ nla_put_u64_64bit(msg, NL80211_ATTR_OBSS_COLOR_BITMAP,
+ color_bitmap, NL80211_ATTR_PAD))
+ goto nla_put_failure;
+
+ genlmsg_end(msg, hdr);
+
+ return genlmsg_multicast_netns(&nl80211_fam, wiphy_net(&rdev->wiphy),
+ msg, 0, NL80211_MCGRP_MLME, gfp);
+
+nla_put_failure:
+ nlmsg_free(msg);
+ return -EINVAL;
+}
+EXPORT_SYMBOL(cfg80211_bss_color_notify);
+
void
nl80211_radar_notify(struct cfg80211_registered_device *rdev,
const struct cfg80211_chan_def *chandef,
diff --git a/net/wireless/pmsr.c b/net/wireless/pmsr.c
index d245968b74cb..328cf54bda82 100644
--- a/net/wireless/pmsr.c
+++ b/net/wireless/pmsr.c
@@ -168,6 +168,18 @@ static int pmsr_parse_ftm(struct cfg80211_registered_device *rdev,
return -EINVAL;
}
+ if (tb[NL80211_PMSR_FTM_REQ_ATTR_BSS_COLOR]) {
+ if (!out->ftm.non_trigger_based && !out->ftm.trigger_based) {
+ NL_SET_ERR_MSG_ATTR(info->extack,
+ tb[NL80211_PMSR_FTM_REQ_ATTR_BSS_COLOR],
+ "FTM: BSS color set for EDCA based ranging");
+ return -EINVAL;
+ }
+
+ out->ftm.bss_color =
+ nla_get_u8(tb[NL80211_PMSR_FTM_REQ_ATTR_BSS_COLOR]);
+ }
+
return 0;
}
diff --git a/net/wireless/radiotap.c b/net/wireless/radiotap.c
index 36f1b59a78bf..ae2e1a896461 100644
--- a/net/wireless/radiotap.c
+++ b/net/wireless/radiotap.c
@@ -115,23 +115,22 @@ int ieee80211_radiotap_iterator_init(
iterator->_max_length = get_unaligned_le16(&radiotap_header->it_len);
iterator->_arg_index = 0;
iterator->_bitmap_shifter = get_unaligned_le32(&radiotap_header->it_present);
- iterator->_arg = (uint8_t *)radiotap_header + sizeof(*radiotap_header);
+ iterator->_arg = (uint8_t *)radiotap_header->it_optional;
iterator->_reset_on_ext = 0;
- iterator->_next_bitmap = &radiotap_header->it_present;
- iterator->_next_bitmap++;
+ iterator->_next_bitmap = radiotap_header->it_optional;
iterator->_vns = vns;
iterator->current_namespace = &radiotap_ns;
iterator->is_radiotap_ns = 1;
/* find payload start allowing for extended bitmap(s) */
- if (iterator->_bitmap_shifter & (1<<IEEE80211_RADIOTAP_EXT)) {
+ if (iterator->_bitmap_shifter & (BIT(IEEE80211_RADIOTAP_EXT))) {
if ((unsigned long)iterator->_arg -
(unsigned long)iterator->_rtheader + sizeof(uint32_t) >
(unsigned long)iterator->_max_length)
return -EINVAL;
while (get_unaligned_le32(iterator->_arg) &
- (1 << IEEE80211_RADIOTAP_EXT)) {
+ (BIT(IEEE80211_RADIOTAP_EXT))) {
iterator->_arg += sizeof(uint32_t);
/*
diff --git a/net/wireless/rdev-ops.h b/net/wireless/rdev-ops.h
index 8b1358d04ca2..ce6bf218a1a3 100644
--- a/net/wireless/rdev-ops.h
+++ b/net/wireless/rdev-ops.h
@@ -464,8 +464,18 @@ static inline int rdev_assoc(struct cfg80211_registered_device *rdev,
struct net_device *dev,
struct cfg80211_assoc_request *req)
{
+ const struct cfg80211_bss_ies *bss_ies;
int ret;
- trace_rdev_assoc(&rdev->wiphy, dev, req);
+
+ /*
+ * Note: we might trace not exactly the data that's processed,
+ * due to races and the driver/mac80211 getting a newer copy.
+ */
+ rcu_read_lock();
+ bss_ies = rcu_dereference(req->bss->ies);
+ trace_rdev_assoc(&rdev->wiphy, dev, req, bss_ies);
+ rcu_read_unlock();
+
ret = rdev->ops->assoc(&rdev->wiphy, dev, req);
trace_rdev_return_int(&rdev->wiphy, ret);
return ret;
@@ -1358,4 +1368,17 @@ static inline int rdev_set_sar_specs(struct cfg80211_registered_device *rdev,
return ret;
}
+static inline int rdev_color_change(struct cfg80211_registered_device *rdev,
+ struct net_device *dev,
+ struct cfg80211_color_change_settings *params)
+{
+ int ret;
+
+ trace_rdev_color_change(&rdev->wiphy, dev, params);
+ ret = rdev->ops->color_change(&rdev->wiphy, dev, params);
+ trace_rdev_return_int(&rdev->wiphy, ret);
+
+ return ret;
+}
+
#endif /* __CFG80211_RDEV_OPS */
diff --git a/net/wireless/reg.c b/net/wireless/reg.c
index 0406ce7334fa..df87c7f3a049 100644
--- a/net/wireless/reg.c
+++ b/net/wireless/reg.c
@@ -171,9 +171,11 @@ enum nl80211_dfs_regions reg_get_dfs_region(struct wiphy *wiphy)
{
const struct ieee80211_regdomain *regd = NULL;
const struct ieee80211_regdomain *wiphy_regd = NULL;
+ enum nl80211_dfs_regions dfs_region;
rcu_read_lock();
regd = get_cfg80211_regdom();
+ dfs_region = regd->dfs_region;
if (!wiphy)
goto out;
@@ -182,6 +184,11 @@ enum nl80211_dfs_regions reg_get_dfs_region(struct wiphy *wiphy)
if (!wiphy_regd)
goto out;
+ if (wiphy->regulatory_flags & REGULATORY_WIPHY_SELF_MANAGED) {
+ dfs_region = wiphy_regd->dfs_region;
+ goto out;
+ }
+
if (wiphy_regd->dfs_region == regd->dfs_region)
goto out;
@@ -193,7 +200,7 @@ enum nl80211_dfs_regions reg_get_dfs_region(struct wiphy *wiphy)
out:
rcu_read_unlock();
- return regd->dfs_region;
+ return dfs_region;
}
static void rcu_free_regdom(const struct ieee80211_regdomain *r)
@@ -3975,7 +3982,9 @@ static int __regulatory_set_wiphy_regd(struct wiphy *wiphy,
"wiphy should have REGULATORY_WIPHY_SELF_MANAGED\n"))
return -EPERM;
- if (WARN(!is_valid_rd(rd), "Invalid regulatory domain detected\n")) {
+ if (WARN(!is_valid_rd(rd),
+ "Invalid regulatory domain detected: %c%c\n",
+ rd->alpha2[0], rd->alpha2[1])) {
print_regdomain_info(rd);
return -EINVAL;
}
@@ -4049,6 +4058,7 @@ void wiphy_regulatory_register(struct wiphy *wiphy)
wiphy_update_regulatory(wiphy, lr->initiator);
wiphy_all_share_dfs_chan_state(wiphy);
+ reg_process_self_managed_hints();
}
void wiphy_regulatory_deregister(struct wiphy *wiphy)
diff --git a/net/wireless/scan.c b/net/wireless/scan.c
index 4f06c1825029..11c68b159324 100644
--- a/net/wireless/scan.c
+++ b/net/wireless/scan.c
@@ -5,7 +5,7 @@
* Copyright 2008 Johannes Berg <johannes@sipsolutions.net>
* Copyright 2013-2014 Intel Mobile Communications GmbH
* Copyright 2016 Intel Deutschland GmbH
- * Copyright (C) 2018-2020 Intel Corporation
+ * Copyright (C) 2018-2021 Intel Corporation
*/
#include <linux/kernel.h>
#include <linux/slab.h>
@@ -618,7 +618,7 @@ static int cfg80211_parse_colocated_ap(const struct cfg80211_bss_ies *ies,
freq = ieee80211_channel_to_frequency(ap_info->channel, band);
- if (end - pos < count * ap_info->tbtt_info_len)
+ if (end - pos < count * length)
break;
/*
@@ -630,7 +630,7 @@ static int cfg80211_parse_colocated_ap(const struct cfg80211_bss_ies *ies,
if (band != NL80211_BAND_6GHZ ||
(length != IEEE80211_TBTT_INFO_OFFSET_BSSID_BSS_PARAM &&
length < IEEE80211_TBTT_INFO_OFFSET_BSSID_SSSID_BSS_PARAM)) {
- pos += count * ap_info->tbtt_info_len;
+ pos += count * length;
continue;
}
@@ -653,7 +653,7 @@ static int cfg80211_parse_colocated_ap(const struct cfg80211_bss_ies *ies,
kfree(entry);
}
- pos += ap_info->tbtt_info_len;
+ pos += length;
}
}
@@ -757,7 +757,8 @@ static int cfg80211_scan_6ghz(struct cfg80211_registered_device *rdev)
}
request = kzalloc(struct_size(request, channels, n_channels) +
- sizeof(*request->scan_6ghz_params) * count,
+ sizeof(*request->scan_6ghz_params) * count +
+ sizeof(*request->ssids) * rdev_req->n_ssids,
GFP_KERNEL);
if (!request) {
cfg80211_free_coloc_ap_list(&coloc_ap_list);
@@ -848,10 +849,19 @@ skip:
if (request->n_channels) {
struct cfg80211_scan_request *old = rdev->int_scan_req;
-
rdev->int_scan_req = request;
/*
+ * Add the ssids from the parent scan request to the new scan
+ * request, so the driver would be able to use them in its
+ * probe requests to discover hidden APs on PSC channels.
+ */
+ request->ssids = (void *)&request->channels[request->n_channels];
+ request->n_ssids = rdev_req->n_ssids;
+ memcpy(request->ssids, rdev_req->ssids, sizeof(*request->ssids) *
+ request->n_ssids);
+
+ /*
* If this scan follows a previous scan, save the scan start
* info from the first part of the scan
*/
@@ -965,8 +975,7 @@ void ___cfg80211_scan_done(struct cfg80211_registered_device *rdev,
}
#endif
- if (wdev->netdev)
- dev_put(wdev->netdev);
+ dev_put(wdev->netdev);
kfree(rdev->int_scan_req);
rdev->int_scan_req = NULL;
@@ -1744,16 +1753,14 @@ cfg80211_bss_update(struct cfg80211_registered_device *rdev,
* be grouped with this beacon for updates ...
*/
if (!cfg80211_combine_bsses(rdev, new)) {
- kfree(new);
+ bss_ref_put(rdev, new);
goto drop;
}
}
if (rdev->bss_entries >= bss_entries_limit &&
!cfg80211_bss_expire_oldest(rdev)) {
- if (!list_empty(&new->hidden_list))
- list_del(&new->hidden_list);
- kfree(new);
+ bss_ref_put(rdev, new);
goto drop;
}
diff --git a/net/wireless/trace.h b/net/wireless/trace.h
index 76b777d5903f..19b78d472283 100644
--- a/net/wireless/trace.h
+++ b/net/wireless/trace.h
@@ -1195,8 +1195,9 @@ TRACE_EVENT(rdev_auth,
TRACE_EVENT(rdev_assoc,
TP_PROTO(struct wiphy *wiphy, struct net_device *netdev,
- struct cfg80211_assoc_request *req),
- TP_ARGS(wiphy, netdev, req),
+ struct cfg80211_assoc_request *req,
+ const struct cfg80211_bss_ies *bss_ies),
+ TP_ARGS(wiphy, netdev, req, bss_ies),
TP_STRUCT__entry(
WIPHY_ENTRY
NETDEV_ENTRY
@@ -1204,6 +1205,17 @@ TRACE_EVENT(rdev_assoc,
MAC_ENTRY(prev_bssid)
__field(bool, use_mfp)
__field(u32, flags)
+ __dynamic_array(u8, bss_elements, bss_ies->len)
+ __field(bool, bss_elements_bcon)
+ __field(u64, bss_elements_tsf)
+ __dynamic_array(u8, elements, req->ie_len)
+ __array(u8, ht_capa, sizeof(struct ieee80211_ht_cap))
+ __array(u8, ht_capa_mask, sizeof(struct ieee80211_ht_cap))
+ __array(u8, vht_capa, sizeof(struct ieee80211_vht_cap))
+ __array(u8, vht_capa_mask, sizeof(struct ieee80211_vht_cap))
+ __dynamic_array(u8, fils_kek, req->fils_kek_len)
+ __dynamic_array(u8, fils_nonces,
+ req->fils_nonces ? 2 * FILS_NONCE_LEN : 0)
),
TP_fast_assign(
WIPHY_ASSIGN;
@@ -1215,6 +1227,26 @@ TRACE_EVENT(rdev_assoc,
MAC_ASSIGN(prev_bssid, req->prev_bssid);
__entry->use_mfp = req->use_mfp;
__entry->flags = req->flags;
+ if (bss_ies->len)
+ memcpy(__get_dynamic_array(bss_elements),
+ bss_ies->data, bss_ies->len);
+ __entry->bss_elements_bcon = bss_ies->from_beacon;
+ __entry->bss_elements_tsf = bss_ies->tsf;
+ if (req->ie)
+ memcpy(__get_dynamic_array(elements),
+ req->ie, req->ie_len);
+ memcpy(__entry->ht_capa, &req->ht_capa, sizeof(req->ht_capa));
+ memcpy(__entry->ht_capa_mask, &req->ht_capa_mask,
+ sizeof(req->ht_capa_mask));
+ memcpy(__entry->vht_capa, &req->vht_capa, sizeof(req->vht_capa));
+ memcpy(__entry->vht_capa_mask, &req->vht_capa_mask,
+ sizeof(req->vht_capa_mask));
+ if (req->fils_kek)
+ memcpy(__get_dynamic_array(fils_kek),
+ req->fils_kek, req->fils_kek_len);
+ if (req->fils_nonces)
+ memcpy(__get_dynamic_array(fils_nonces),
+ req->fils_nonces, 2 * FILS_NONCE_LEN);
),
TP_printk(WIPHY_PR_FMT ", " NETDEV_PR_FMT ", bssid: " MAC_PR_FMT
", previous bssid: " MAC_PR_FMT ", use mfp: %s, flags: %u",
@@ -3565,6 +3597,52 @@ TRACE_EVENT(rdev_set_sar_specs,
WIPHY_PR_ARG, __entry->type, __entry->num)
);
+TRACE_EVENT(rdev_color_change,
+ TP_PROTO(struct wiphy *wiphy, struct net_device *netdev,
+ struct cfg80211_color_change_settings *params),
+ TP_ARGS(wiphy, netdev, params),
+ TP_STRUCT__entry(
+ WIPHY_ENTRY
+ NETDEV_ENTRY
+ __field(u8, count)
+ __field(u16, bcn_ofs)
+ __field(u16, pres_ofs)
+ ),
+ TP_fast_assign(
+ WIPHY_ASSIGN;
+ NETDEV_ASSIGN;
+ __entry->count = params->count;
+ __entry->bcn_ofs = params->counter_offset_beacon;
+ __entry->pres_ofs = params->counter_offset_presp;
+ ),
+ TP_printk(WIPHY_PR_FMT ", " NETDEV_PR_FMT
+ ", count: %u",
+ WIPHY_PR_ARG, NETDEV_PR_ARG,
+ __entry->count)
+);
+
+TRACE_EVENT(cfg80211_bss_color_notify,
+ TP_PROTO(struct net_device *netdev,
+ enum nl80211_commands cmd,
+ u8 count, u64 color_bitmap),
+ TP_ARGS(netdev, cmd, count, color_bitmap),
+ TP_STRUCT__entry(
+ NETDEV_ENTRY
+ __field(u32, cmd)
+ __field(u8, count)
+ __field(u64, color_bitmap)
+ ),
+ TP_fast_assign(
+ NETDEV_ASSIGN;
+ __entry->cmd = cmd;
+ __entry->count = count;
+ __entry->color_bitmap = color_bitmap;
+ ),
+ TP_printk(NETDEV_PR_FMT ", cmd: %x, count: %u, bitmap: %llx",
+ NETDEV_PR_ARG, __entry->cmd, __entry->count,
+ __entry->color_bitmap)
+);
+
#endif /* !__RDEV_OPS_TRACE || TRACE_HEADER_MULTI_READ */
#undef TRACE_INCLUDE_PATH
diff --git a/net/wireless/wext-compat.c b/net/wireless/wext-compat.c
index a8320dc59af7..a32065d600a1 100644
--- a/net/wireless/wext-compat.c
+++ b/net/wireless/wext-compat.c
@@ -902,7 +902,7 @@ static int cfg80211_wext_siwtxpower(struct net_device *dev,
/* only change when not disabling */
if (!data->txpower.disabled) {
- rfkill_set_sw_state(rdev->rfkill, false);
+ rfkill_set_sw_state(rdev->wiphy.rfkill, false);
if (data->txpower.fixed) {
/*
@@ -927,7 +927,7 @@ static int cfg80211_wext_siwtxpower(struct net_device *dev,
}
}
} else {
- if (rfkill_set_sw_state(rdev->rfkill, true))
+ if (rfkill_set_sw_state(rdev->wiphy.rfkill, true))
schedule_work(&rdev->rfkill_block);
return 0;
}
@@ -963,7 +963,7 @@ static int cfg80211_wext_giwtxpower(struct net_device *dev,
/* well... oh well */
data->txpower.fixed = 1;
- data->txpower.disabled = rfkill_blocked(rdev->rfkill);
+ data->txpower.disabled = rfkill_blocked(rdev->wiphy.rfkill);
data->txpower.value = val;
data->txpower.flags = IW_TXPOW_DBM;
@@ -1167,7 +1167,7 @@ static int cfg80211_wext_siwpower(struct net_device *dev,
{
struct wireless_dev *wdev = dev->ieee80211_ptr;
struct cfg80211_registered_device *rdev = wiphy_to_rdev(wdev->wiphy);
- bool ps = wdev->ps;
+ bool ps;
int timeout = wdev->ps_timeout;
int err;
diff --git a/net/wireless/wext-spy.c b/net/wireless/wext-spy.c
index 33bef22e44e9..b379a0371653 100644
--- a/net/wireless/wext-spy.c
+++ b/net/wireless/wext-spy.c
@@ -120,8 +120,8 @@ int iw_handler_set_thrspy(struct net_device * dev,
return -EOPNOTSUPP;
/* Just do it */
- memcpy(&(spydata->spy_thr_low), &(threshold->low),
- 2 * sizeof(struct iw_quality));
+ spydata->spy_thr_low = threshold->low;
+ spydata->spy_thr_high = threshold->high;
/* Clear flag */
memset(spydata->spy_thr_under, '\0', sizeof(spydata->spy_thr_under));
@@ -147,8 +147,8 @@ int iw_handler_get_thrspy(struct net_device * dev,
return -EOPNOTSUPP;
/* Just do it */
- memcpy(&(threshold->low), &(spydata->spy_thr_low),
- 2 * sizeof(struct iw_quality));
+ threshold->low = spydata->spy_thr_low;
+ threshold->high = spydata->spy_thr_high;
return 0;
}
@@ -173,10 +173,10 @@ static void iw_send_thrspy_event(struct net_device * dev,
memcpy(threshold.addr.sa_data, address, ETH_ALEN);
threshold.addr.sa_family = ARPHRD_ETHER;
/* Copy stats */
- memcpy(&(threshold.qual), wstats, sizeof(struct iw_quality));
+ threshold.qual = *wstats;
/* Copy also thresholds */
- memcpy(&(threshold.low), &(spydata->spy_thr_low),
- 2 * sizeof(struct iw_quality));
+ threshold.low = spydata->spy_thr_low;
+ threshold.high = spydata->spy_thr_high;
/* Send event to user space */
wireless_send_event(dev, SIOCGIWTHRSPY, &wrqu, (char *) &threshold);
diff --git a/net/x25/af_x25.c b/net/x25/af_x25.c
index 1816899499ce..3583354a7d7f 100644
--- a/net/x25/af_x25.c
+++ b/net/x25/af_x25.c
@@ -366,7 +366,7 @@ static void x25_destroy_timer(struct timer_list *t)
/*
* This is called from user mode and the timers. Thus it protects itself
- * against interrupt users but doesn't worry about being called during
+ * against interrupting users but doesn't worry about being called during
* work. Once it is removed from the queue no interrupt or bottom half
* will touch it and we are (fairly 8-) ) safe.
* Not static as it's used by the timer
diff --git a/net/x25/x25_forward.c b/net/x25/x25_forward.c
index d48ad6d29197..21b30b56e889 100644
--- a/net/x25/x25_forward.c
+++ b/net/x25/x25_forward.c
@@ -19,7 +19,6 @@ int x25_forward_call(struct x25_address *dest_addr, struct x25_neigh *from,
{
struct x25_route *rt;
struct x25_neigh *neigh_new = NULL;
- struct list_head *entry;
struct x25_forward *x25_frwd, *new_frwd;
struct sk_buff *skbn;
short same_lci = 0;
@@ -46,8 +45,7 @@ int x25_forward_call(struct x25_address *dest_addr, struct x25_neigh *from,
* established LCI? It shouldn't happen, just in case..
*/
read_lock_bh(&x25_forward_list_lock);
- list_for_each(entry, &x25_forward_list) {
- x25_frwd = list_entry(entry, struct x25_forward, node);
+ list_for_each_entry(x25_frwd, &x25_forward_list, node) {
if (x25_frwd->lci == lci) {
pr_warn("call request for lci which is already registered!, transmitting but not registering new pair\n");
same_lci = 1;
@@ -92,15 +90,13 @@ out_no_route:
int x25_forward_data(int lci, struct x25_neigh *from, struct sk_buff *skb) {
struct x25_forward *frwd;
- struct list_head *entry;
struct net_device *peer = NULL;
struct x25_neigh *nb;
struct sk_buff *skbn;
int rc = 0;
read_lock_bh(&x25_forward_list_lock);
- list_for_each(entry, &x25_forward_list) {
- frwd = list_entry(entry, struct x25_forward, node);
+ list_for_each_entry(frwd, &x25_forward_list, node) {
if (frwd->lci == lci) {
/* The call is established, either side can send */
if (from->dev == frwd->dev1) {
diff --git a/net/x25/x25_link.c b/net/x25/x25_link.c
index 57a81100c5da..5460b9146dd8 100644
--- a/net/x25/x25_link.c
+++ b/net/x25/x25_link.c
@@ -332,12 +332,9 @@ void x25_link_device_down(struct net_device *dev)
struct x25_neigh *x25_get_neigh(struct net_device *dev)
{
struct x25_neigh *nb, *use = NULL;
- struct list_head *entry;
read_lock_bh(&x25_neigh_list_lock);
- list_for_each(entry, &x25_neigh_list) {
- nb = list_entry(entry, struct x25_neigh, node);
-
+ list_for_each_entry(nb, &x25_neigh_list, node) {
if (nb->dev == dev) {
use = nb;
break;
diff --git a/net/x25/x25_route.c b/net/x25/x25_route.c
index 9fbe4bb38d94..647f325ed867 100644
--- a/net/x25/x25_route.c
+++ b/net/x25/x25_route.c
@@ -27,14 +27,11 @@ static int x25_add_route(struct x25_address *address, unsigned int sigdigits,
struct net_device *dev)
{
struct x25_route *rt;
- struct list_head *entry;
int rc = -EINVAL;
write_lock_bh(&x25_route_list_lock);
- list_for_each(entry, &x25_route_list) {
- rt = list_entry(entry, struct x25_route, node);
-
+ list_for_each_entry(rt, &x25_route_list, node) {
if (!memcmp(&rt->address, address, sigdigits) &&
rt->sigdigits == sigdigits)
goto out;
@@ -78,14 +75,11 @@ static int x25_del_route(struct x25_address *address, unsigned int sigdigits,
struct net_device *dev)
{
struct x25_route *rt;
- struct list_head *entry;
int rc = -EINVAL;
write_lock_bh(&x25_route_list_lock);
- list_for_each(entry, &x25_route_list) {
- rt = list_entry(entry, struct x25_route, node);
-
+ list_for_each_entry(rt, &x25_route_list, node) {
if (!memcmp(&rt->address, address, sigdigits) &&
rt->sigdigits == sigdigits && rt->dev == dev) {
__x25_remove_route(rt);
@@ -141,13 +135,10 @@ struct net_device *x25_dev_get(char *devname)
struct x25_route *x25_get_route(struct x25_address *addr)
{
struct x25_route *rt, *use = NULL;
- struct list_head *entry;
read_lock_bh(&x25_route_list_lock);
- list_for_each(entry, &x25_route_list) {
- rt = list_entry(entry, struct x25_route, node);
-
+ list_for_each_entry(rt, &x25_route_list, node) {
if (!memcmp(&rt->address, addr, rt->sigdigits)) {
if (!use)
use = rt;
diff --git a/net/xdp/xdp_umem.c b/net/xdp/xdp_umem.c
index 56a28a686988..f01ef6bda390 100644
--- a/net/xdp/xdp_umem.c
+++ b/net/xdp/xdp_umem.c
@@ -27,7 +27,7 @@ static void xdp_umem_unpin_pages(struct xdp_umem *umem)
{
unpin_user_pages_dirty_lock(umem->pgs, umem->npgs, true);
- kfree(umem->pgs);
+ kvfree(umem->pgs);
umem->pgs = NULL;
}
@@ -99,8 +99,7 @@ static int xdp_umem_pin_pages(struct xdp_umem *umem, unsigned long address)
long npgs;
int err;
- umem->pgs = kcalloc(umem->npgs, sizeof(*umem->pgs),
- GFP_KERNEL | __GFP_NOWARN);
+ umem->pgs = kvcalloc(umem->npgs, sizeof(*umem->pgs), GFP_KERNEL | __GFP_NOWARN);
if (!umem->pgs)
return -ENOMEM;
@@ -123,7 +122,7 @@ static int xdp_umem_pin_pages(struct xdp_umem *umem, unsigned long address)
out_pin:
xdp_umem_unpin_pages(umem);
out_pgs:
- kfree(umem->pgs);
+ kvfree(umem->pgs);
umem->pgs = NULL;
return err;
}
diff --git a/net/xdp/xsk.c b/net/xdp/xsk.c
index cd62d4ba87a9..d6b500dc4208 100644
--- a/net/xdp/xsk.c
+++ b/net/xdp/xsk.c
@@ -749,7 +749,7 @@ static void xsk_unbind_dev(struct xdp_sock *xs)
}
static struct xsk_map *xsk_get_map_list_entry(struct xdp_sock *xs,
- struct xdp_sock ***map_entry)
+ struct xdp_sock __rcu ***map_entry)
{
struct xsk_map *map = NULL;
struct xsk_map_node *node;
@@ -785,7 +785,7 @@ static void xsk_delete_from_maps(struct xdp_sock *xs)
* might be updates to the map between
* xsk_get_map_list_entry() and xsk_map_try_sock_delete().
*/
- struct xdp_sock **map_entry = NULL;
+ struct xdp_sock __rcu **map_entry = NULL;
struct xsk_map *map;
while ((map = xsk_get_map_list_entry(xs, &map_entry))) {
@@ -1313,7 +1313,7 @@ static int xsk_notifier(struct notifier_block *this,
if (xs->dev == dev) {
sk->sk_err = ENETDOWN;
if (!sock_flag(sk, SOCK_DEAD))
- sk->sk_error_report(sk);
+ sk_error_report(sk);
xsk_unbind_dev(xs);
diff --git a/net/xdp/xsk.h b/net/xdp/xsk.h
index edcf249ad1f1..a4bc4749faac 100644
--- a/net/xdp/xsk.h
+++ b/net/xdp/xsk.h
@@ -31,7 +31,7 @@ struct xdp_mmap_offsets_v1 {
struct xsk_map_node {
struct list_head node;
struct xsk_map *map;
- struct xdp_sock **map_entry;
+ struct xdp_sock __rcu **map_entry;
};
static inline struct xdp_sock *xdp_sk(struct sock *sk)
@@ -40,7 +40,7 @@ static inline struct xdp_sock *xdp_sk(struct sock *sk)
}
void xsk_map_try_sock_delete(struct xsk_map *map, struct xdp_sock *xs,
- struct xdp_sock **map_entry);
+ struct xdp_sock __rcu **map_entry);
void xsk_clear_pool_at_qid(struct net_device *dev, u16 queue_id);
int xsk_reg_pool_at_qid(struct net_device *dev, struct xsk_buff_pool *pool,
u16 queue_id);
diff --git a/net/xdp/xsk_queue.h b/net/xdp/xsk_queue.h
index 9d2a89d793c0..9ae13cccfb28 100644
--- a/net/xdp/xsk_queue.h
+++ b/net/xdp/xsk_queue.h
@@ -128,12 +128,15 @@ static inline bool xskq_cons_read_addr_unchecked(struct xsk_queue *q, u64 *addr)
static inline bool xp_aligned_validate_desc(struct xsk_buff_pool *pool,
struct xdp_desc *desc)
{
- u64 chunk;
-
- if (desc->len > pool->chunk_size)
- return false;
+ u64 chunk, chunk_end;
chunk = xp_aligned_extract_addr(pool, desc->addr);
+ if (likely(desc->len)) {
+ chunk_end = xp_aligned_extract_addr(pool, desc->addr + desc->len - 1);
+ if (chunk != chunk_end)
+ return false;
+ }
+
if (chunk >= pool->addrs_cnt)
return false;
diff --git a/net/xdp/xskmap.c b/net/xdp/xskmap.c
index 67b4ce504852..2e48d0e094d9 100644
--- a/net/xdp/xskmap.c
+++ b/net/xdp/xskmap.c
@@ -12,7 +12,7 @@
#include "xsk.h"
static struct xsk_map_node *xsk_map_node_alloc(struct xsk_map *map,
- struct xdp_sock **map_entry)
+ struct xdp_sock __rcu **map_entry)
{
struct xsk_map_node *node;
@@ -42,7 +42,7 @@ static void xsk_map_sock_add(struct xdp_sock *xs, struct xsk_map_node *node)
}
static void xsk_map_sock_delete(struct xdp_sock *xs,
- struct xdp_sock **map_entry)
+ struct xdp_sock __rcu **map_entry)
{
struct xsk_map_node *n, *tmp;
@@ -124,6 +124,10 @@ static int xsk_map_gen_lookup(struct bpf_map *map, struct bpf_insn *insn_buf)
return insn - insn_buf;
}
+/* Elements are kept alive by RCU; either by rcu_read_lock() (from syscall) or
+ * by local_bh_disable() (from XDP calls inside NAPI). The
+ * rcu_read_lock_bh_held() below makes lockdep accept both.
+ */
static void *__xsk_map_lookup_elem(struct bpf_map *map, u32 key)
{
struct xsk_map *m = container_of(map, struct xsk_map, map);
@@ -131,12 +135,11 @@ static void *__xsk_map_lookup_elem(struct bpf_map *map, u32 key)
if (key >= map->max_entries)
return NULL;
- return READ_ONCE(m->xsk_map[key]);
+ return rcu_dereference_check(m->xsk_map[key], rcu_read_lock_bh_held());
}
static void *xsk_map_lookup_elem(struct bpf_map *map, void *key)
{
- WARN_ON_ONCE(!rcu_read_lock_held());
return __xsk_map_lookup_elem(map, *(u32 *)key);
}
@@ -149,7 +152,8 @@ static int xsk_map_update_elem(struct bpf_map *map, void *key, void *value,
u64 map_flags)
{
struct xsk_map *m = container_of(map, struct xsk_map, map);
- struct xdp_sock *xs, *old_xs, **map_entry;
+ struct xdp_sock __rcu **map_entry;
+ struct xdp_sock *xs, *old_xs;
u32 i = *(u32 *)key, fd = *(u32 *)value;
struct xsk_map_node *node;
struct socket *sock;
@@ -179,7 +183,7 @@ static int xsk_map_update_elem(struct bpf_map *map, void *key, void *value,
}
spin_lock_bh(&m->lock);
- old_xs = READ_ONCE(*map_entry);
+ old_xs = rcu_dereference_protected(*map_entry, lockdep_is_held(&m->lock));
if (old_xs == xs) {
err = 0;
goto out;
@@ -191,7 +195,7 @@ static int xsk_map_update_elem(struct bpf_map *map, void *key, void *value,
goto out;
}
xsk_map_sock_add(xs, node);
- WRITE_ONCE(*map_entry, xs);
+ rcu_assign_pointer(*map_entry, xs);
if (old_xs)
xsk_map_sock_delete(old_xs, map_entry);
spin_unlock_bh(&m->lock);
@@ -208,7 +212,8 @@ out:
static int xsk_map_delete_elem(struct bpf_map *map, void *key)
{
struct xsk_map *m = container_of(map, struct xsk_map, map);
- struct xdp_sock *old_xs, **map_entry;
+ struct xdp_sock __rcu **map_entry;
+ struct xdp_sock *old_xs;
int k = *(u32 *)key;
if (k >= map->max_entries)
@@ -216,7 +221,7 @@ static int xsk_map_delete_elem(struct bpf_map *map, void *key)
spin_lock_bh(&m->lock);
map_entry = &m->xsk_map[k];
- old_xs = xchg(map_entry, NULL);
+ old_xs = unrcu_pointer(xchg(map_entry, NULL));
if (old_xs)
xsk_map_sock_delete(old_xs, map_entry);
spin_unlock_bh(&m->lock);
@@ -226,15 +231,16 @@ static int xsk_map_delete_elem(struct bpf_map *map, void *key)
static int xsk_map_redirect(struct bpf_map *map, u32 ifindex, u64 flags)
{
- return __bpf_xdp_redirect_map(map, ifindex, flags, __xsk_map_lookup_elem);
+ return __bpf_xdp_redirect_map(map, ifindex, flags, 0,
+ __xsk_map_lookup_elem);
}
void xsk_map_try_sock_delete(struct xsk_map *map, struct xdp_sock *xs,
- struct xdp_sock **map_entry)
+ struct xdp_sock __rcu **map_entry)
{
spin_lock_bh(&map->lock);
- if (READ_ONCE(*map_entry) == xs) {
- WRITE_ONCE(*map_entry, NULL);
+ if (rcu_access_pointer(*map_entry) == xs) {
+ rcu_assign_pointer(*map_entry, NULL);
xsk_map_sock_delete(xs, map_entry);
}
spin_unlock_bh(&map->lock);
diff --git a/net/xfrm/xfrm_compat.c b/net/xfrm/xfrm_compat.c
index a20aec9d7393..2bf269390163 100644
--- a/net/xfrm/xfrm_compat.c
+++ b/net/xfrm/xfrm_compat.c
@@ -298,8 +298,16 @@ static int xfrm_xlate64(struct sk_buff *dst, const struct nlmsghdr *nlh_src)
len = nlmsg_attrlen(nlh_src, xfrm_msg_min[type]);
nla_for_each_attr(nla, attrs, len, remaining) {
- int err = xfrm_xlate64_attr(dst, nla);
+ int err;
+ switch (type) {
+ case XFRM_MSG_NEWSPDINFO:
+ err = xfrm_nla_cpy(dst, nla, nla_len(nla));
+ break;
+ default:
+ err = xfrm_xlate64_attr(dst, nla);
+ break;
+ }
if (err)
return err;
}
@@ -341,7 +349,8 @@ static int xfrm_alloc_compat(struct sk_buff *skb, const struct nlmsghdr *nlh_src
/* Calculates len of translated 64-bit message. */
static size_t xfrm_user_rcv_calculate_len64(const struct nlmsghdr *src,
- struct nlattr *attrs[XFRMA_MAX+1])
+ struct nlattr *attrs[XFRMA_MAX + 1],
+ int maxtype)
{
size_t len = nlmsg_len(src);
@@ -358,10 +367,20 @@ static size_t xfrm_user_rcv_calculate_len64(const struct nlmsghdr *src,
case XFRM_MSG_POLEXPIRE:
len += 8;
break;
+ case XFRM_MSG_NEWSPDINFO:
+ /* attirbutes are xfrm_spdattr_type_t, not xfrm_attr_type_t */
+ return len;
default:
break;
}
+ /* Unexpected for anything, but XFRM_MSG_NEWSPDINFO, please
+ * correct both 64=>32-bit and 32=>64-bit translators to copy
+ * new attributes.
+ */
+ if (WARN_ON_ONCE(maxtype))
+ return len;
+
if (attrs[XFRMA_SA])
len += 4;
if (attrs[XFRMA_POLICY])
@@ -440,7 +459,8 @@ static int xfrm_xlate32_attr(void *dst, const struct nlattr *nla,
static int xfrm_xlate32(struct nlmsghdr *dst, const struct nlmsghdr *src,
struct nlattr *attrs[XFRMA_MAX+1],
- size_t size, u8 type, struct netlink_ext_ack *extack)
+ size_t size, u8 type, int maxtype,
+ struct netlink_ext_ack *extack)
{
size_t pos;
int i;
@@ -520,6 +540,25 @@ static int xfrm_xlate32(struct nlmsghdr *dst, const struct nlmsghdr *src,
}
pos = dst->nlmsg_len;
+ if (maxtype) {
+ /* attirbutes are xfrm_spdattr_type_t, not xfrm_attr_type_t */
+ WARN_ON_ONCE(src->nlmsg_type != XFRM_MSG_NEWSPDINFO);
+
+ for (i = 1; i <= maxtype; i++) {
+ int err;
+
+ if (!attrs[i])
+ continue;
+
+ /* just copy - no need for translation */
+ err = xfrm_attr_cpy32(dst, &pos, attrs[i], size,
+ nla_len(attrs[i]), nla_len(attrs[i]));
+ if (err)
+ return err;
+ }
+ return 0;
+ }
+
for (i = 1; i < XFRMA_MAX + 1; i++) {
int err;
@@ -564,7 +603,7 @@ static struct nlmsghdr *xfrm_user_rcv_msg_compat(const struct nlmsghdr *h32,
if (err < 0)
return ERR_PTR(err);
- len = xfrm_user_rcv_calculate_len64(h32, attrs);
+ len = xfrm_user_rcv_calculate_len64(h32, attrs, maxtype);
/* The message doesn't need translation */
if (len == nlmsg_len(h32))
return NULL;
@@ -574,7 +613,7 @@ static struct nlmsghdr *xfrm_user_rcv_msg_compat(const struct nlmsghdr *h32,
if (!h64)
return ERR_PTR(-ENOMEM);
- err = xfrm_xlate32(h64, h32, attrs, len, type, extack);
+ err = xfrm_xlate32(h64, h32, attrs, len, type, maxtype, extack);
if (err < 0) {
kvfree(h64);
return ERR_PTR(err);
diff --git a/net/xfrm/xfrm_device.c b/net/xfrm/xfrm_device.c
index 6d6917b68856..e843b0d9e2a6 100644
--- a/net/xfrm/xfrm_device.c
+++ b/net/xfrm/xfrm_device.c
@@ -268,6 +268,7 @@ int xfrm_dev_state_add(struct net *net, struct xfrm_state *x,
xso->num_exthdrs = 0;
xso->flags = 0;
xso->dev = NULL;
+ xso->real_dev = NULL;
dev_put(dev);
if (err != -EOPNOTSUPP)
diff --git a/net/xfrm/xfrm_hash.h b/net/xfrm/xfrm_hash.h
index ce66323102f9..d12bb906c9c9 100644
--- a/net/xfrm/xfrm_hash.h
+++ b/net/xfrm/xfrm_hash.h
@@ -131,6 +131,13 @@ __xfrm_spi_hash(const xfrm_address_t *daddr, __be32 spi, u8 proto,
return (h ^ (h >> 10) ^ (h >> 20)) & hmask;
}
+static inline unsigned int
+__xfrm_seq_hash(u32 seq, unsigned int hmask)
+{
+ unsigned int h = seq;
+ return (h ^ (h >> 10) ^ (h >> 20)) & hmask;
+}
+
static inline unsigned int __idx_hash(u32 index, unsigned int hmask)
{
return (index ^ (index >> 8)) & hmask;
diff --git a/net/xfrm/xfrm_input.c b/net/xfrm/xfrm_input.c
index 1158cd0311d7..3df0861d4390 100644
--- a/net/xfrm/xfrm_input.c
+++ b/net/xfrm/xfrm_input.c
@@ -612,7 +612,7 @@ lock:
goto drop_unlock;
}
- if (x->repl->check(x, skb, seq)) {
+ if (xfrm_replay_check(x, skb, seq)) {
XFRM_INC_STATS(net, LINUX_MIB_XFRMINSTATESEQERROR);
goto drop_unlock;
}
@@ -660,12 +660,12 @@ resume:
/* only the first xfrm gets the encap type */
encap_type = 0;
- if (x->repl->recheck(x, skb, seq)) {
+ if (xfrm_replay_recheck(x, skb, seq)) {
XFRM_INC_STATS(net, LINUX_MIB_XFRMINSTATESEQERROR);
goto drop_unlock;
}
- x->repl->advance(x, seq);
+ xfrm_replay_advance(x, seq);
x->curlft.bytes += skb->len;
x->curlft.packets++;
diff --git a/net/xfrm/xfrm_interface.c b/net/xfrm/xfrm_interface.c
index 8831f5a9e992..41de46b5ffa9 100644
--- a/net/xfrm/xfrm_interface.c
+++ b/net/xfrm/xfrm_interface.c
@@ -432,6 +432,7 @@ static int xfrmi4_err(struct sk_buff *skb, u32 info)
case ICMP_DEST_UNREACH:
if (icmp_hdr(skb)->code != ICMP_FRAG_NEEDED)
return 0;
+ break;
case ICMP_REDIRECT:
break;
default:
diff --git a/net/xfrm/xfrm_ipcomp.c b/net/xfrm/xfrm_ipcomp.c
index 2e8afe078d61..cb40ff0ff28d 100644
--- a/net/xfrm/xfrm_ipcomp.c
+++ b/net/xfrm/xfrm_ipcomp.c
@@ -241,7 +241,7 @@ static void ipcomp_free_tfms(struct crypto_comp * __percpu *tfms)
break;
}
- WARN_ON(!pos);
+ WARN_ON(list_entry_is_head(pos, &ipcomp_tfms_list, list));
if (--pos->users)
return;
diff --git a/net/xfrm/xfrm_output.c b/net/xfrm/xfrm_output.c
index e4cb0ff4dcf4..229544bc70c2 100644
--- a/net/xfrm/xfrm_output.c
+++ b/net/xfrm/xfrm_output.c
@@ -77,6 +77,83 @@ static int xfrm4_transport_output(struct xfrm_state *x, struct sk_buff *skb)
return 0;
}
+#if IS_ENABLED(CONFIG_IPV6_MIP6)
+static int mip6_rthdr_offset(struct sk_buff *skb, u8 **nexthdr, int type)
+{
+ const unsigned char *nh = skb_network_header(skb);
+ unsigned int offset = sizeof(struct ipv6hdr);
+ unsigned int packet_len;
+ int found_rhdr = 0;
+
+ packet_len = skb_tail_pointer(skb) - nh;
+ *nexthdr = &ipv6_hdr(skb)->nexthdr;
+
+ while (offset <= packet_len) {
+ struct ipv6_opt_hdr *exthdr;
+
+ switch (**nexthdr) {
+ case NEXTHDR_HOP:
+ break;
+ case NEXTHDR_ROUTING:
+ if (type == IPPROTO_ROUTING && offset + 3 <= packet_len) {
+ struct ipv6_rt_hdr *rt;
+
+ rt = (struct ipv6_rt_hdr *)(nh + offset);
+ if (rt->type != 0)
+ return offset;
+ }
+ found_rhdr = 1;
+ break;
+ case NEXTHDR_DEST:
+ /* HAO MUST NOT appear more than once.
+ * XXX: It is better to try to find by the end of
+ * XXX: packet if HAO exists.
+ */
+ if (ipv6_find_tlv(skb, offset, IPV6_TLV_HAO) >= 0) {
+ net_dbg_ratelimited("mip6: hao exists already, override\n");
+ return offset;
+ }
+
+ if (found_rhdr)
+ return offset;
+
+ break;
+ default:
+ return offset;
+ }
+
+ if (offset + sizeof(struct ipv6_opt_hdr) > packet_len)
+ return -EINVAL;
+
+ exthdr = (struct ipv6_opt_hdr *)(skb_network_header(skb) +
+ offset);
+ offset += ipv6_optlen(exthdr);
+ if (offset > IPV6_MAXPLEN)
+ return -EINVAL;
+ *nexthdr = &exthdr->nexthdr;
+ }
+
+ return -EINVAL;
+}
+#endif
+
+#if IS_ENABLED(CONFIG_IPV6)
+static int xfrm6_hdr_offset(struct xfrm_state *x, struct sk_buff *skb, u8 **prevhdr)
+{
+ switch (x->type->proto) {
+#if IS_ENABLED(CONFIG_IPV6_MIP6)
+ case IPPROTO_DSTOPTS:
+ case IPPROTO_ROUTING:
+ return mip6_rthdr_offset(skb, prevhdr, x->type->proto);
+#endif
+ default:
+ break;
+ }
+
+ return ip6_find_1stfragopt(skb, prevhdr);
+}
+#endif
+
/* Add encapsulation header.
*
* The IP header and mutable extension headers will be moved forward to make
@@ -92,7 +169,7 @@ static int xfrm6_transport_output(struct xfrm_state *x, struct sk_buff *skb)
iph = ipv6_hdr(skb);
skb_set_inner_transport_header(skb, skb_transport_offset(skb));
- hdr_len = x->type->hdr_offset(x, skb, &prevhdr);
+ hdr_len = xfrm6_hdr_offset(x, skb, &prevhdr);
if (hdr_len < 0)
return hdr_len;
skb_set_mac_header(skb,
@@ -122,7 +199,7 @@ static int xfrm6_ro_output(struct xfrm_state *x, struct sk_buff *skb)
iph = ipv6_hdr(skb);
- hdr_len = x->type->hdr_offset(x, skb, &prevhdr);
+ hdr_len = xfrm6_hdr_offset(x, skb, &prevhdr);
if (hdr_len < 0)
return hdr_len;
skb_set_mac_header(skb,
@@ -448,7 +525,7 @@ static int xfrm_output_one(struct sk_buff *skb, int err)
goto error;
}
- err = x->repl->overflow(x, skb);
+ err = xfrm_replay_overflow(x, skb);
if (err) {
XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTSTATESEQERROR);
goto error;
@@ -565,6 +642,42 @@ static int xfrm_output_gso(struct net *net, struct sock *sk, struct sk_buff *skb
return 0;
}
+/* For partial checksum offload, the outer header checksum is calculated
+ * by software and the inner header checksum is calculated by hardware.
+ * This requires hardware to know the inner packet type to calculate
+ * the inner header checksum. Save inner ip protocol here to avoid
+ * traversing the packet in the vendor's xmit code.
+ * If the encap type is IPIP, just save skb->inner_ipproto. Otherwise,
+ * get the ip protocol from the IP header.
+ */
+static void xfrm_get_inner_ipproto(struct sk_buff *skb)
+{
+ struct xfrm_offload *xo = xfrm_offload(skb);
+ const struct ethhdr *eth;
+
+ if (!xo)
+ return;
+
+ if (skb->inner_protocol_type == ENCAP_TYPE_IPPROTO) {
+ xo->inner_ipproto = skb->inner_ipproto;
+ return;
+ }
+
+ if (skb->inner_protocol_type != ENCAP_TYPE_ETHER)
+ return;
+
+ eth = (struct ethhdr *)skb_inner_mac_header(skb);
+
+ switch (ntohs(eth->h_proto)) {
+ case ETH_P_IPV6:
+ xo->inner_ipproto = inner_ipv6_hdr(skb)->nexthdr;
+ break;
+ case ETH_P_IP:
+ xo->inner_ipproto = inner_ip_hdr(skb)->protocol;
+ break;
+ }
+}
+
int xfrm_output(struct sock *sk, struct sk_buff *skb)
{
struct net *net = dev_net(skb_dst(skb)->dev);
@@ -594,12 +707,15 @@ int xfrm_output(struct sock *sk, struct sk_buff *skb)
kfree_skb(skb);
return -ENOMEM;
}
- skb->encapsulation = 1;
sp->olen++;
sp->xvec[sp->len++] = x;
xfrm_state_hold(x);
+ if (skb->encapsulation)
+ xfrm_get_inner_ipproto(skb);
+ skb->encapsulation = 1;
+
if (skb_is_gso(skb)) {
if (skb->inner_protocol)
return xfrm_output_gso(net, sk, skb);
@@ -711,15 +827,8 @@ out:
static int xfrm6_extract_output(struct xfrm_state *x, struct sk_buff *skb)
{
#if IS_ENABLED(CONFIG_IPV6)
- unsigned int ptr = 0;
int err;
- if (x->outer_mode.encap == XFRM_MODE_BEET &&
- ipv6_find_hdr(skb, &ptr, NEXTHDR_FRAGMENT, NULL, NULL) >= 0) {
- net_warn_ratelimited("BEET mode doesn't support inner IPv6 fragments\n");
- return -EAFNOSUPPORT;
- }
-
err = xfrm6_tunnel_check_size(skb);
if (err)
return err;
diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c
index ce500f847b99..37d17a79617c 100644
--- a/net/xfrm/xfrm_policy.c
+++ b/net/xfrm/xfrm_policy.c
@@ -155,7 +155,6 @@ static struct xfrm_policy_afinfo const __rcu *xfrm_policy_afinfo[AF_INET6 + 1]
__read_mostly;
static struct kmem_cache *xfrm_dst_cache __ro_after_init;
-static __read_mostly seqcount_mutex_t xfrm_policy_hash_generation;
static struct rhashtable xfrm_policy_inexact_table;
static const struct rhashtable_params xfrm_pol_inexact_params;
@@ -585,7 +584,7 @@ static void xfrm_bydst_resize(struct net *net, int dir)
return;
spin_lock_bh(&net->xfrm.xfrm_policy_lock);
- write_seqcount_begin(&xfrm_policy_hash_generation);
+ write_seqcount_begin(&net->xfrm.xfrm_policy_hash_generation);
odst = rcu_dereference_protected(net->xfrm.policy_bydst[dir].table,
lockdep_is_held(&net->xfrm.xfrm_policy_lock));
@@ -596,7 +595,7 @@ static void xfrm_bydst_resize(struct net *net, int dir)
rcu_assign_pointer(net->xfrm.policy_bydst[dir].table, ndst);
net->xfrm.policy_bydst[dir].hmask = nhashmask;
- write_seqcount_end(&xfrm_policy_hash_generation);
+ write_seqcount_end(&net->xfrm.xfrm_policy_hash_generation);
spin_unlock_bh(&net->xfrm.xfrm_policy_lock);
synchronize_rcu();
@@ -1245,7 +1244,7 @@ static void xfrm_hash_rebuild(struct work_struct *work)
} while (read_seqretry(&net->xfrm.policy_hthresh.lock, seq));
spin_lock_bh(&net->xfrm.xfrm_policy_lock);
- write_seqcount_begin(&xfrm_policy_hash_generation);
+ write_seqcount_begin(&net->xfrm.xfrm_policy_hash_generation);
/* make sure that we can insert the indirect policies again before
* we start with destructive action.
@@ -1354,7 +1353,7 @@ static void xfrm_hash_rebuild(struct work_struct *work)
out_unlock:
__xfrm_policy_inexact_flush(net);
- write_seqcount_end(&xfrm_policy_hash_generation);
+ write_seqcount_end(&net->xfrm.xfrm_policy_hash_generation);
spin_unlock_bh(&net->xfrm.xfrm_policy_lock);
mutex_unlock(&hash_resize_mutex);
@@ -1902,8 +1901,7 @@ static int xfrm_policy_match(const struct xfrm_policy *pol,
match = xfrm_selector_match(sel, fl, family);
if (match)
- ret = security_xfrm_policy_lookup(pol->security, fl->flowi_secid,
- dir);
+ ret = security_xfrm_policy_lookup(pol->security, fl->flowi_secid);
return ret;
}
@@ -2095,9 +2093,9 @@ static struct xfrm_policy *xfrm_policy_lookup_bytype(struct net *net, u8 type,
rcu_read_lock();
retry:
do {
- sequence = read_seqcount_begin(&xfrm_policy_hash_generation);
+ sequence = read_seqcount_begin(&net->xfrm.xfrm_policy_hash_generation);
chain = policy_hash_direct(net, daddr, saddr, family, dir);
- } while (read_seqcount_retry(&xfrm_policy_hash_generation, sequence));
+ } while (read_seqcount_retry(&net->xfrm.xfrm_policy_hash_generation, sequence));
ret = NULL;
hlist_for_each_entry_rcu(pol, chain, bydst) {
@@ -2128,7 +2126,7 @@ static struct xfrm_policy *xfrm_policy_lookup_bytype(struct net *net, u8 type,
}
skip_inexact:
- if (read_seqcount_retry(&xfrm_policy_hash_generation, sequence))
+ if (read_seqcount_retry(&net->xfrm.xfrm_policy_hash_generation, sequence))
goto retry;
if (ret && !xfrm_pol_hold_rcu(ret))
@@ -2181,8 +2179,7 @@ static struct xfrm_policy *xfrm_sk_policy_lookup(const struct sock *sk, int dir,
goto out;
}
err = security_xfrm_policy_lookup(pol->security,
- fl->flowi_secid,
- dir);
+ fl->flowi_secid);
if (!err) {
if (!xfrm_pol_hold_rcu(pol))
goto again;
@@ -3160,6 +3157,11 @@ ok:
return dst;
nopol:
+ if (!(dst_orig->dev->flags & IFF_LOOPBACK) &&
+ !xfrm_default_allow(net, dir)) {
+ err = -EPERM;
+ goto error;
+ }
if (!(flags & XFRM_LOOKUP_ICMP)) {
dst = dst_orig;
goto ok;
@@ -3247,7 +3249,7 @@ xfrm_state_ok(const struct xfrm_tmpl *tmpl, const struct xfrm_state *x,
/*
* 0 or more than 0 is returned when validation is succeeded (either bypass
- * because of optional transport mode, or next index of the mathced secpath
+ * because of optional transport mode, or next index of the matched secpath
* state with the template.
* -1 is returned when no matching template is found.
* Otherwise "-2 - errored_index" is returned.
@@ -3548,6 +3550,11 @@ int __xfrm_policy_check(struct sock *sk, int dir, struct sk_buff *skb,
}
if (!pol) {
+ if (!xfrm_default_allow(net, dir)) {
+ XFRM_INC_STATS(net, LINUX_MIB_XFRMINNOPOLS);
+ return 0;
+ }
+
if (sp && secpath_has_nontransport(sp, 0, &xerr_idx)) {
xfrm_secpath_reject(xerr_idx, skb, &fl);
XFRM_INC_STATS(net, LINUX_MIB_XFRMINNOPOLS);
@@ -3602,6 +3609,12 @@ int __xfrm_policy_check(struct sock *sk, int dir, struct sk_buff *skb,
tpp[ti++] = &pols[pi]->xfrm_vec[i];
}
xfrm_nr = ti;
+
+ if (!xfrm_default_allow(net, dir) && !xfrm_nr) {
+ XFRM_INC_STATS(net, LINUX_MIB_XFRMINNOSTATES);
+ goto reject;
+ }
+
if (npols > 1) {
xfrm_tmpl_sort(stp, tpp, xfrm_nr, family);
tpp = stp;
@@ -4084,6 +4097,7 @@ static int __net_init xfrm_net_init(struct net *net)
/* Initialize the per-net locks here */
spin_lock_init(&net->xfrm.xfrm_state_lock);
spin_lock_init(&net->xfrm.xfrm_policy_lock);
+ seqcount_spinlock_init(&net->xfrm.xfrm_policy_hash_generation, &net->xfrm.xfrm_policy_lock);
mutex_init(&net->xfrm.xfrm_cfg_mutex);
rv = xfrm_statistics_init(net);
@@ -4128,7 +4142,6 @@ void __init xfrm_init(void)
{
register_pernet_subsys(&xfrm_net_ops);
xfrm_dev_init();
- seqcount_mutex_init(&xfrm_policy_hash_generation, &hash_resize_mutex);
xfrm_input_init();
#ifdef CONFIG_XFRM_ESPINTCP
diff --git a/net/xfrm/xfrm_replay.c b/net/xfrm/xfrm_replay.c
index c6a4338a0d08..9277d81b344c 100644
--- a/net/xfrm/xfrm_replay.c
+++ b/net/xfrm/xfrm_replay.c
@@ -34,8 +34,11 @@ u32 xfrm_replay_seqhi(struct xfrm_state *x, __be32 net_seq)
return seq_hi;
}
EXPORT_SYMBOL(xfrm_replay_seqhi);
-;
-static void xfrm_replay_notify(struct xfrm_state *x, int event)
+
+static void xfrm_replay_notify_bmp(struct xfrm_state *x, int event);
+static void xfrm_replay_notify_esn(struct xfrm_state *x, int event);
+
+void xfrm_replay_notify(struct xfrm_state *x, int event)
{
struct km_event c;
/* we send notify messages in case
@@ -48,6 +51,17 @@ static void xfrm_replay_notify(struct xfrm_state *x, int event)
* The state structure must be locked!
*/
+ switch (x->repl_mode) {
+ case XFRM_REPLAY_MODE_LEGACY:
+ break;
+ case XFRM_REPLAY_MODE_BMP:
+ xfrm_replay_notify_bmp(x, event);
+ return;
+ case XFRM_REPLAY_MODE_ESN:
+ xfrm_replay_notify_esn(x, event);
+ return;
+ }
+
switch (event) {
case XFRM_REPLAY_UPDATE:
if (!x->replay_maxdiff ||
@@ -81,7 +95,7 @@ static void xfrm_replay_notify(struct xfrm_state *x, int event)
x->xflags &= ~XFRM_TIME_DEFER;
}
-static int xfrm_replay_overflow(struct xfrm_state *x, struct sk_buff *skb)
+static int __xfrm_replay_overflow(struct xfrm_state *x, struct sk_buff *skb)
{
int err = 0;
struct net *net = xs_net(x);
@@ -98,14 +112,14 @@ static int xfrm_replay_overflow(struct xfrm_state *x, struct sk_buff *skb)
return err;
}
if (xfrm_aevent_is_on(net))
- x->repl->notify(x, XFRM_REPLAY_UPDATE);
+ xfrm_replay_notify(x, XFRM_REPLAY_UPDATE);
}
return err;
}
-static int xfrm_replay_check(struct xfrm_state *x,
- struct sk_buff *skb, __be32 net_seq)
+static int xfrm_replay_check_legacy(struct xfrm_state *x,
+ struct sk_buff *skb, __be32 net_seq)
{
u32 diff;
u32 seq = ntohl(net_seq);
@@ -136,14 +150,26 @@ err:
return -EINVAL;
}
-static void xfrm_replay_advance(struct xfrm_state *x, __be32 net_seq)
+static void xfrm_replay_advance_bmp(struct xfrm_state *x, __be32 net_seq);
+static void xfrm_replay_advance_esn(struct xfrm_state *x, __be32 net_seq);
+
+void xfrm_replay_advance(struct xfrm_state *x, __be32 net_seq)
{
- u32 diff;
- u32 seq = ntohl(net_seq);
+ u32 diff, seq;
+
+ switch (x->repl_mode) {
+ case XFRM_REPLAY_MODE_LEGACY:
+ break;
+ case XFRM_REPLAY_MODE_BMP:
+ return xfrm_replay_advance_bmp(x, net_seq);
+ case XFRM_REPLAY_MODE_ESN:
+ return xfrm_replay_advance_esn(x, net_seq);
+ }
if (!x->props.replay_window)
return;
+ seq = ntohl(net_seq);
if (seq > x->replay.seq) {
diff = seq - x->replay.seq;
if (diff < x->props.replay_window)
@@ -157,7 +183,7 @@ static void xfrm_replay_advance(struct xfrm_state *x, __be32 net_seq)
}
if (xfrm_aevent_is_on(xs_net(x)))
- x->repl->notify(x, XFRM_REPLAY_UPDATE);
+ xfrm_replay_notify(x, XFRM_REPLAY_UPDATE);
}
static int xfrm_replay_overflow_bmp(struct xfrm_state *x, struct sk_buff *skb)
@@ -178,7 +204,7 @@ static int xfrm_replay_overflow_bmp(struct xfrm_state *x, struct sk_buff *skb)
return err;
}
if (xfrm_aevent_is_on(net))
- x->repl->notify(x, XFRM_REPLAY_UPDATE);
+ xfrm_replay_notify(x, XFRM_REPLAY_UPDATE);
}
return err;
@@ -273,7 +299,7 @@ static void xfrm_replay_advance_bmp(struct xfrm_state *x, __be32 net_seq)
replay_esn->bmp[nr] |= (1U << bitnr);
if (xfrm_aevent_is_on(xs_net(x)))
- x->repl->notify(x, XFRM_REPLAY_UPDATE);
+ xfrm_replay_notify(x, XFRM_REPLAY_UPDATE);
}
static void xfrm_replay_notify_bmp(struct xfrm_state *x, int event)
@@ -416,7 +442,7 @@ static int xfrm_replay_overflow_esn(struct xfrm_state *x, struct sk_buff *skb)
}
}
if (xfrm_aevent_is_on(net))
- x->repl->notify(x, XFRM_REPLAY_UPDATE);
+ xfrm_replay_notify(x, XFRM_REPLAY_UPDATE);
}
return err;
@@ -481,6 +507,21 @@ err:
return -EINVAL;
}
+int xfrm_replay_check(struct xfrm_state *x,
+ struct sk_buff *skb, __be32 net_seq)
+{
+ switch (x->repl_mode) {
+ case XFRM_REPLAY_MODE_LEGACY:
+ break;
+ case XFRM_REPLAY_MODE_BMP:
+ return xfrm_replay_check_bmp(x, skb, net_seq);
+ case XFRM_REPLAY_MODE_ESN:
+ return xfrm_replay_check_esn(x, skb, net_seq);
+ }
+
+ return xfrm_replay_check_legacy(x, skb, net_seq);
+}
+
static int xfrm_replay_recheck_esn(struct xfrm_state *x,
struct sk_buff *skb, __be32 net_seq)
{
@@ -493,6 +534,22 @@ static int xfrm_replay_recheck_esn(struct xfrm_state *x,
return xfrm_replay_check_esn(x, skb, net_seq);
}
+int xfrm_replay_recheck(struct xfrm_state *x,
+ struct sk_buff *skb, __be32 net_seq)
+{
+ switch (x->repl_mode) {
+ case XFRM_REPLAY_MODE_LEGACY:
+ break;
+ case XFRM_REPLAY_MODE_BMP:
+ /* no special recheck treatment */
+ return xfrm_replay_check_bmp(x, skb, net_seq);
+ case XFRM_REPLAY_MODE_ESN:
+ return xfrm_replay_recheck_esn(x, skb, net_seq);
+ }
+
+ return xfrm_replay_check_legacy(x, skb, net_seq);
+}
+
static void xfrm_replay_advance_esn(struct xfrm_state *x, __be32 net_seq)
{
unsigned int bitnr, nr, i;
@@ -548,7 +605,7 @@ static void xfrm_replay_advance_esn(struct xfrm_state *x, __be32 net_seq)
replay_esn->bmp[nr] |= (1U << bitnr);
if (xfrm_aevent_is_on(xs_net(x)))
- x->repl->notify(x, XFRM_REPLAY_UPDATE);
+ xfrm_replay_notify(x, XFRM_REPLAY_UPDATE);
}
#ifdef CONFIG_XFRM_OFFLOAD
@@ -560,7 +617,7 @@ static int xfrm_replay_overflow_offload(struct xfrm_state *x, struct sk_buff *sk
__u32 oseq = x->replay.oseq;
if (!xo)
- return xfrm_replay_overflow(x, skb);
+ return __xfrm_replay_overflow(x, skb);
if (x->type->flags & XFRM_TYPE_REPLAY_PROT) {
if (!skb_is_gso(skb)) {
@@ -585,7 +642,7 @@ static int xfrm_replay_overflow_offload(struct xfrm_state *x, struct sk_buff *sk
x->replay.oseq = oseq;
if (xfrm_aevent_is_on(net))
- x->repl->notify(x, XFRM_REPLAY_UPDATE);
+ xfrm_replay_notify(x, XFRM_REPLAY_UPDATE);
}
return err;
@@ -625,7 +682,7 @@ static int xfrm_replay_overflow_offload_bmp(struct xfrm_state *x, struct sk_buff
}
if (xfrm_aevent_is_on(net))
- x->repl->notify(x, XFRM_REPLAY_UPDATE);
+ xfrm_replay_notify(x, XFRM_REPLAY_UPDATE);
}
return err;
@@ -674,59 +731,39 @@ static int xfrm_replay_overflow_offload_esn(struct xfrm_state *x, struct sk_buff
replay_esn->oseq = oseq;
if (xfrm_aevent_is_on(net))
- x->repl->notify(x, XFRM_REPLAY_UPDATE);
+ xfrm_replay_notify(x, XFRM_REPLAY_UPDATE);
}
return err;
}
-static const struct xfrm_replay xfrm_replay_legacy = {
- .advance = xfrm_replay_advance,
- .check = xfrm_replay_check,
- .recheck = xfrm_replay_check,
- .notify = xfrm_replay_notify,
- .overflow = xfrm_replay_overflow_offload,
-};
-
-static const struct xfrm_replay xfrm_replay_bmp = {
- .advance = xfrm_replay_advance_bmp,
- .check = xfrm_replay_check_bmp,
- .recheck = xfrm_replay_check_bmp,
- .notify = xfrm_replay_notify_bmp,
- .overflow = xfrm_replay_overflow_offload_bmp,
-};
-
-static const struct xfrm_replay xfrm_replay_esn = {
- .advance = xfrm_replay_advance_esn,
- .check = xfrm_replay_check_esn,
- .recheck = xfrm_replay_recheck_esn,
- .notify = xfrm_replay_notify_esn,
- .overflow = xfrm_replay_overflow_offload_esn,
-};
+int xfrm_replay_overflow(struct xfrm_state *x, struct sk_buff *skb)
+{
+ switch (x->repl_mode) {
+ case XFRM_REPLAY_MODE_LEGACY:
+ break;
+ case XFRM_REPLAY_MODE_BMP:
+ return xfrm_replay_overflow_offload_bmp(x, skb);
+ case XFRM_REPLAY_MODE_ESN:
+ return xfrm_replay_overflow_offload_esn(x, skb);
+ }
+
+ return xfrm_replay_overflow_offload(x, skb);
+}
#else
-static const struct xfrm_replay xfrm_replay_legacy = {
- .advance = xfrm_replay_advance,
- .check = xfrm_replay_check,
- .recheck = xfrm_replay_check,
- .notify = xfrm_replay_notify,
- .overflow = xfrm_replay_overflow,
-};
-
-static const struct xfrm_replay xfrm_replay_bmp = {
- .advance = xfrm_replay_advance_bmp,
- .check = xfrm_replay_check_bmp,
- .recheck = xfrm_replay_check_bmp,
- .notify = xfrm_replay_notify_bmp,
- .overflow = xfrm_replay_overflow_bmp,
-};
-
-static const struct xfrm_replay xfrm_replay_esn = {
- .advance = xfrm_replay_advance_esn,
- .check = xfrm_replay_check_esn,
- .recheck = xfrm_replay_recheck_esn,
- .notify = xfrm_replay_notify_esn,
- .overflow = xfrm_replay_overflow_esn,
-};
+int xfrm_replay_overflow(struct xfrm_state *x, struct sk_buff *skb)
+{
+ switch (x->repl_mode) {
+ case XFRM_REPLAY_MODE_LEGACY:
+ break;
+ case XFRM_REPLAY_MODE_BMP:
+ return xfrm_replay_overflow_bmp(x, skb);
+ case XFRM_REPLAY_MODE_ESN:
+ return xfrm_replay_overflow_esn(x, skb);
+ }
+
+ return __xfrm_replay_overflow(x, skb);
+}
#endif
int xfrm_init_replay(struct xfrm_state *x)
@@ -741,12 +778,12 @@ int xfrm_init_replay(struct xfrm_state *x)
if (x->props.flags & XFRM_STATE_ESN) {
if (replay_esn->replay_window == 0)
return -EINVAL;
- x->repl = &xfrm_replay_esn;
+ x->repl_mode = XFRM_REPLAY_MODE_ESN;
} else {
- x->repl = &xfrm_replay_bmp;
+ x->repl_mode = XFRM_REPLAY_MODE_BMP;
}
} else {
- x->repl = &xfrm_replay_legacy;
+ x->repl_mode = XFRM_REPLAY_MODE_LEGACY;
}
return 0;
diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c
index 4496f7efa220..a2f4001221d1 100644
--- a/net/xfrm/xfrm_state.c
+++ b/net/xfrm/xfrm_state.c
@@ -78,10 +78,16 @@ xfrm_spi_hash(struct net *net, const xfrm_address_t *daddr,
return __xfrm_spi_hash(daddr, spi, proto, family, net->xfrm.state_hmask);
}
+static unsigned int xfrm_seq_hash(struct net *net, u32 seq)
+{
+ return __xfrm_seq_hash(seq, net->xfrm.state_hmask);
+}
+
static void xfrm_hash_transfer(struct hlist_head *list,
struct hlist_head *ndsttable,
struct hlist_head *nsrctable,
struct hlist_head *nspitable,
+ struct hlist_head *nseqtable,
unsigned int nhashmask)
{
struct hlist_node *tmp;
@@ -106,6 +112,11 @@ static void xfrm_hash_transfer(struct hlist_head *list,
nhashmask);
hlist_add_head_rcu(&x->byspi, nspitable + h);
}
+
+ if (x->km.seq) {
+ h = __xfrm_seq_hash(x->km.seq, nhashmask);
+ hlist_add_head_rcu(&x->byseq, nseqtable + h);
+ }
}
}
@@ -117,7 +128,7 @@ static unsigned long xfrm_hash_new_size(unsigned int state_hmask)
static void xfrm_hash_resize(struct work_struct *work)
{
struct net *net = container_of(work, struct net, xfrm.state_hash_work);
- struct hlist_head *ndst, *nsrc, *nspi, *odst, *osrc, *ospi;
+ struct hlist_head *ndst, *nsrc, *nspi, *nseq, *odst, *osrc, *ospi, *oseq;
unsigned long nsize, osize;
unsigned int nhashmask, ohashmask;
int i;
@@ -137,6 +148,13 @@ static void xfrm_hash_resize(struct work_struct *work)
xfrm_hash_free(nsrc, nsize);
return;
}
+ nseq = xfrm_hash_alloc(nsize);
+ if (!nseq) {
+ xfrm_hash_free(ndst, nsize);
+ xfrm_hash_free(nsrc, nsize);
+ xfrm_hash_free(nspi, nsize);
+ return;
+ }
spin_lock_bh(&net->xfrm.xfrm_state_lock);
write_seqcount_begin(&net->xfrm.xfrm_state_hash_generation);
@@ -144,15 +162,17 @@ static void xfrm_hash_resize(struct work_struct *work)
nhashmask = (nsize / sizeof(struct hlist_head)) - 1U;
odst = xfrm_state_deref_prot(net->xfrm.state_bydst, net);
for (i = net->xfrm.state_hmask; i >= 0; i--)
- xfrm_hash_transfer(odst + i, ndst, nsrc, nspi, nhashmask);
+ xfrm_hash_transfer(odst + i, ndst, nsrc, nspi, nseq, nhashmask);
osrc = xfrm_state_deref_prot(net->xfrm.state_bysrc, net);
ospi = xfrm_state_deref_prot(net->xfrm.state_byspi, net);
+ oseq = xfrm_state_deref_prot(net->xfrm.state_byseq, net);
ohashmask = net->xfrm.state_hmask;
rcu_assign_pointer(net->xfrm.state_bydst, ndst);
rcu_assign_pointer(net->xfrm.state_bysrc, nsrc);
rcu_assign_pointer(net->xfrm.state_byspi, nspi);
+ rcu_assign_pointer(net->xfrm.state_byseq, nseq);
net->xfrm.state_hmask = nhashmask;
write_seqcount_end(&net->xfrm.xfrm_state_hash_generation);
@@ -165,6 +185,7 @@ static void xfrm_hash_resize(struct work_struct *work)
xfrm_hash_free(odst, osize);
xfrm_hash_free(osrc, osize);
xfrm_hash_free(ospi, osize);
+ xfrm_hash_free(oseq, osize);
}
static DEFINE_SPINLOCK(xfrm_state_afinfo_lock);
@@ -621,6 +642,7 @@ struct xfrm_state *xfrm_state_alloc(struct net *net)
INIT_HLIST_NODE(&x->bydst);
INIT_HLIST_NODE(&x->bysrc);
INIT_HLIST_NODE(&x->byspi);
+ INIT_HLIST_NODE(&x->byseq);
hrtimer_init(&x->mtimer, CLOCK_BOOTTIME, HRTIMER_MODE_ABS_SOFT);
x->mtimer.function = xfrm_timer_handler;
timer_setup(&x->rtimer, xfrm_replay_timer_handler, 0);
@@ -664,6 +686,8 @@ int __xfrm_state_delete(struct xfrm_state *x)
list_del(&x->km.all);
hlist_del_rcu(&x->bydst);
hlist_del_rcu(&x->bysrc);
+ if (x->km.seq)
+ hlist_del_rcu(&x->byseq);
if (x->id.spi)
hlist_del_rcu(&x->byspi);
net->xfrm.state_num--;
@@ -1148,6 +1172,10 @@ found:
h = xfrm_spi_hash(net, &x->id.daddr, x->id.spi, x->id.proto, encap_family);
hlist_add_head_rcu(&x->byspi, net->xfrm.state_byspi + h);
}
+ if (x->km.seq) {
+ h = xfrm_seq_hash(net, x->km.seq);
+ hlist_add_head_rcu(&x->byseq, net->xfrm.state_byseq + h);
+ }
x->lft.hard_add_expires_seconds = net->xfrm.sysctl_acq_expires;
hrtimer_start(&x->mtimer,
ktime_set(net->xfrm.sysctl_acq_expires, 0),
@@ -1263,6 +1291,12 @@ static void __xfrm_state_insert(struct xfrm_state *x)
hlist_add_head_rcu(&x->byspi, net->xfrm.state_byspi + h);
}
+ if (x->km.seq) {
+ h = xfrm_seq_hash(net, x->km.seq);
+
+ hlist_add_head_rcu(&x->byseq, net->xfrm.state_byseq + h);
+ }
+
hrtimer_start(&x->mtimer, ktime_set(1, 0), HRTIMER_MODE_REL_SOFT);
if (x->replay_maxage)
mod_timer(&x->rtimer, jiffies + x->replay_maxage);
@@ -1932,20 +1966,18 @@ xfrm_state_sort(struct xfrm_state **dst, struct xfrm_state **src, int n,
static struct xfrm_state *__xfrm_find_acq_byseq(struct net *net, u32 mark, u32 seq)
{
- int i;
-
- for (i = 0; i <= net->xfrm.state_hmask; i++) {
- struct xfrm_state *x;
+ unsigned int h = xfrm_seq_hash(net, seq);
+ struct xfrm_state *x;
- hlist_for_each_entry(x, net->xfrm.state_bydst+i, bydst) {
- if (x->km.seq == seq &&
- (mark & x->mark.m) == x->mark.v &&
- x->km.state == XFRM_STATE_ACQ) {
- xfrm_state_hold(x);
- return x;
- }
+ hlist_for_each_entry_rcu(x, net->xfrm.state_byseq + h, byseq) {
+ if (x->km.seq == seq &&
+ (mark & x->mark.m) == x->mark.v &&
+ x->km.state == XFRM_STATE_ACQ) {
+ xfrm_state_hold(x);
+ return x;
}
}
+
return NULL;
}
@@ -2145,7 +2177,7 @@ static void xfrm_replay_timer_handler(struct timer_list *t)
if (x->km.state == XFRM_STATE_VALID) {
if (xfrm_aevent_is_on(xs_net(x)))
- x->repl->notify(x, XFRM_REPLAY_TIMEOUT);
+ xfrm_replay_notify(x, XFRM_REPLAY_TIMEOUT);
else
x->xflags |= XFRM_TIME_DEFER;
}
@@ -2518,7 +2550,7 @@ void xfrm_state_delete_tunnel(struct xfrm_state *x)
}
EXPORT_SYMBOL(xfrm_state_delete_tunnel);
-u32 xfrm_state_mtu(struct xfrm_state *x, int mtu)
+u32 __xfrm_state_mtu(struct xfrm_state *x, int mtu)
{
const struct xfrm_type *type = READ_ONCE(x->type);
struct crypto_aead *aead;
@@ -2549,7 +2581,17 @@ u32 xfrm_state_mtu(struct xfrm_state *x, int mtu)
return ((mtu - x->props.header_len - crypto_aead_authsize(aead) -
net_adj) & ~(blksize - 1)) + net_adj - 2;
}
-EXPORT_SYMBOL_GPL(xfrm_state_mtu);
+EXPORT_SYMBOL_GPL(__xfrm_state_mtu);
+
+u32 xfrm_state_mtu(struct xfrm_state *x, int mtu)
+{
+ mtu = __xfrm_state_mtu(x, mtu);
+
+ if (x->props.family == AF_INET6 && mtu < IPV6_MIN_MTU)
+ return IPV6_MIN_MTU;
+
+ return mtu;
+}
int __xfrm_init_state(struct xfrm_state *x, bool init_replay, bool offload)
{
@@ -2660,6 +2702,9 @@ int __net_init xfrm_state_init(struct net *net)
net->xfrm.state_byspi = xfrm_hash_alloc(sz);
if (!net->xfrm.state_byspi)
goto out_byspi;
+ net->xfrm.state_byseq = xfrm_hash_alloc(sz);
+ if (!net->xfrm.state_byseq)
+ goto out_byseq;
net->xfrm.state_hmask = ((sz / sizeof(struct hlist_head)) - 1);
net->xfrm.state_num = 0;
@@ -2669,6 +2714,8 @@ int __net_init xfrm_state_init(struct net *net)
&net->xfrm.xfrm_state_lock);
return 0;
+out_byseq:
+ xfrm_hash_free(net->xfrm.state_byspi, sz);
out_byspi:
xfrm_hash_free(net->xfrm.state_bysrc, sz);
out_bysrc:
@@ -2688,6 +2735,8 @@ void xfrm_state_fini(struct net *net)
WARN_ON(!list_empty(&net->xfrm.state_all));
sz = (net->xfrm.state_hmask + 1) * sizeof(struct hlist_head);
+ WARN_ON(!hlist_empty(net->xfrm.state_byseq));
+ xfrm_hash_free(net->xfrm.state_byseq, sz);
WARN_ON(!hlist_empty(net->xfrm.state_byspi));
xfrm_hash_free(net->xfrm.state_byspi, sz);
WARN_ON(!hlist_empty(net->xfrm.state_bysrc));
diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c
index f0aecee4d539..03b66d154b2b 100644
--- a/net/xfrm/xfrm_user.c
+++ b/net/xfrm/xfrm_user.c
@@ -580,6 +580,20 @@ static struct xfrm_state *xfrm_state_construct(struct net *net,
copy_from_user_state(x, p);
+ if (attrs[XFRMA_ENCAP]) {
+ x->encap = kmemdup(nla_data(attrs[XFRMA_ENCAP]),
+ sizeof(*x->encap), GFP_KERNEL);
+ if (x->encap == NULL)
+ goto error;
+ }
+
+ if (attrs[XFRMA_COADDR]) {
+ x->coaddr = kmemdup(nla_data(attrs[XFRMA_COADDR]),
+ sizeof(*x->coaddr), GFP_KERNEL);
+ if (x->coaddr == NULL)
+ goto error;
+ }
+
if (attrs[XFRMA_SA_EXTRA_FLAGS])
x->props.extra_flags = nla_get_u32(attrs[XFRMA_SA_EXTRA_FLAGS]);
@@ -600,23 +614,9 @@ static struct xfrm_state *xfrm_state_construct(struct net *net,
attrs[XFRMA_ALG_COMP])))
goto error;
- if (attrs[XFRMA_ENCAP]) {
- x->encap = kmemdup(nla_data(attrs[XFRMA_ENCAP]),
- sizeof(*x->encap), GFP_KERNEL);
- if (x->encap == NULL)
- goto error;
- }
-
if (attrs[XFRMA_TFCPAD])
x->tfcpad = nla_get_u32(attrs[XFRMA_TFCPAD]);
- if (attrs[XFRMA_COADDR]) {
- x->coaddr = kmemdup(nla_data(attrs[XFRMA_COADDR]),
- sizeof(*x->coaddr), GFP_KERNEL);
- if (x->coaddr == NULL)
- goto error;
- }
-
xfrm_mark_get(attrs, &x->mark);
xfrm_smark_init(attrs, &x->props.smark);
@@ -1961,6 +1961,59 @@ static struct sk_buff *xfrm_policy_netlink(struct sk_buff *in_skb,
return skb;
}
+static int xfrm_set_default(struct sk_buff *skb, struct nlmsghdr *nlh,
+ struct nlattr **attrs)
+{
+ struct net *net = sock_net(skb->sk);
+ struct xfrm_userpolicy_default *up = nlmsg_data(nlh);
+ u8 dirmask;
+ u8 old_default = net->xfrm.policy_default;
+
+ if (up->dirmask >= XFRM_USERPOLICY_DIRMASK_MAX)
+ return -EINVAL;
+
+ dirmask = (1 << up->dirmask) & XFRM_POL_DEFAULT_MASK;
+
+ net->xfrm.policy_default = (old_default & (0xff ^ dirmask))
+ | (up->action << up->dirmask);
+
+ rt_genid_bump_all(net);
+
+ return 0;
+}
+
+static int xfrm_get_default(struct sk_buff *skb, struct nlmsghdr *nlh,
+ struct nlattr **attrs)
+{
+ struct sk_buff *r_skb;
+ struct nlmsghdr *r_nlh;
+ struct net *net = sock_net(skb->sk);
+ struct xfrm_userpolicy_default *r_up, *up;
+ int len = NLMSG_ALIGN(sizeof(struct xfrm_userpolicy_default));
+ u32 portid = NETLINK_CB(skb).portid;
+ u32 seq = nlh->nlmsg_seq;
+
+ up = nlmsg_data(nlh);
+
+ r_skb = nlmsg_new(len, GFP_ATOMIC);
+ if (!r_skb)
+ return -ENOMEM;
+
+ r_nlh = nlmsg_put(r_skb, portid, seq, XFRM_MSG_GETDEFAULT, sizeof(*r_up), 0);
+ if (!r_nlh) {
+ kfree_skb(r_skb);
+ return -EMSGSIZE;
+ }
+
+ r_up = nlmsg_data(r_nlh);
+
+ r_up->action = ((net->xfrm.policy_default & (1 << up->dirmask)) >> up->dirmask);
+ r_up->dirmask = up->dirmask;
+ nlmsg_end(r_skb, r_nlh);
+
+ return nlmsg_unicast(net->xfrm.nlsk, r_skb, portid);
+}
+
static int xfrm_get_policy(struct sk_buff *skb, struct nlmsghdr *nlh,
struct nlattr **attrs)
{
@@ -2664,6 +2717,8 @@ const int xfrm_msg_min[XFRM_NR_MSGTYPES] = {
[XFRM_MSG_GETSADINFO - XFRM_MSG_BASE] = sizeof(u32),
[XFRM_MSG_NEWSPDINFO - XFRM_MSG_BASE] = sizeof(u32),
[XFRM_MSG_GETSPDINFO - XFRM_MSG_BASE] = sizeof(u32),
+ [XFRM_MSG_SETDEFAULT - XFRM_MSG_BASE] = XMSGSIZE(xfrm_userpolicy_default),
+ [XFRM_MSG_GETDEFAULT - XFRM_MSG_BASE] = XMSGSIZE(xfrm_userpolicy_default),
};
EXPORT_SYMBOL_GPL(xfrm_msg_min);
@@ -2743,6 +2798,8 @@ static const struct xfrm_link {
.nla_pol = xfrma_spd_policy,
.nla_max = XFRMA_SPD_MAX },
[XFRM_MSG_GETSPDINFO - XFRM_MSG_BASE] = { .doit = xfrm_get_spdinfo },
+ [XFRM_MSG_SETDEFAULT - XFRM_MSG_BASE] = { .doit = xfrm_set_default },
+ [XFRM_MSG_GETDEFAULT - XFRM_MSG_BASE] = { .doit = xfrm_get_default },
};
static int xfrm_user_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh,
@@ -2811,6 +2868,16 @@ static int xfrm_user_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh,
err = link->doit(skb, nlh, attrs);
+ /* We need to free skb allocated in xfrm_alloc_compat() before
+ * returning from this function, because consume_skb() won't take
+ * care of frag_list since netlink destructor sets
+ * sbk->head to NULL. (see netlink_skb_destructor())
+ */
+ if (skb_has_frag_list(skb)) {
+ kfree_skb(skb_shinfo(skb)->frag_list);
+ skb_shinfo(skb)->frag_list = NULL;
+ }
+
err:
kvfree(nlh64);
return err;