summaryrefslogtreecommitdiff
path: root/net
diff options
context:
space:
mode:
Diffstat (limited to 'net')
-rw-r--r--net/8021q/vlan_core.c9
-rw-r--r--net/8021q/vlan_dev.c52
-rw-r--r--net/8021q/vlanproc.c2
-rw-r--r--net/9p/trans_fd.c2
-rw-r--r--net/Kconfig2
-rw-r--r--net/appletalk/atalk_proc.c3
-rw-r--r--net/atm/common.c1
-rw-r--r--net/atm/pvc.c1
-rw-r--r--net/atm/resources.c2
-rw-r--r--net/ax25/ax25_uid.c21
-rw-r--r--net/batman-adv/bat_iv_ogm.c99
-rw-r--r--net/batman-adv/bitarray.h6
-rw-r--r--net/batman-adv/bridge_loop_avoidance.c214
-rw-r--r--net/batman-adv/bridge_loop_avoidance.h11
-rw-r--r--net/batman-adv/debugfs.c12
-rw-r--r--net/batman-adv/gateway_client.c57
-rw-r--r--net/batman-adv/hard-interface.c13
-rw-r--r--net/batman-adv/main.c27
-rw-r--r--net/batman-adv/main.h29
-rw-r--r--net/batman-adv/packet.h35
-rw-r--r--net/batman-adv/routing.c85
-rw-r--r--net/batman-adv/send.c8
-rw-r--r--net/batman-adv/soft-interface.c86
-rw-r--r--net/batman-adv/soft-interface.h5
-rw-r--r--net/batman-adv/translation-table.c417
-rw-r--r--net/batman-adv/translation-table.h4
-rw-r--r--net/batman-adv/types.h120
-rw-r--r--net/batman-adv/unicast.c16
-rw-r--r--net/batman-adv/vis.c144
-rw-r--r--net/batman-adv/vis.h2
-rw-r--r--net/bluetooth/a2mp.c16
-rw-r--r--net/bluetooth/af_bluetooth.c139
-rw-r--r--net/bluetooth/bnep/sock.c26
-rw-r--r--net/bluetooth/cmtp/sock.c27
-rw-r--r--net/bluetooth/hci_conn.c104
-rw-r--r--net/bluetooth/hci_core.c14
-rw-r--r--net/bluetooth/hci_event.c218
-rw-r--r--net/bluetooth/hci_sock.c31
-rw-r--r--net/bluetooth/hidp/sock.c26
-rw-r--r--net/bluetooth/l2cap_core.c51
-rw-r--r--net/bluetooth/l2cap_sock.c27
-rw-r--r--net/bluetooth/mgmt.c78
-rw-r--r--net/bluetooth/rfcomm/sock.c16
-rw-r--r--net/bluetooth/rfcomm/tty.c12
-rw-r--r--net/bluetooth/sco.c35
-rw-r--r--net/bluetooth/smp.c15
-rw-r--r--net/bridge/br_device.c30
-rw-r--r--net/bridge/br_fdb.c17
-rw-r--r--net/bridge/br_forward.c2
-rw-r--r--net/bridge/br_if.c6
-rw-r--r--net/bridge/br_netlink.c2
-rw-r--r--net/bridge/br_private.h10
-rw-r--r--net/bridge/br_stp_timer.c2
-rw-r--r--net/bridge/br_sysfs_if.c6
-rw-r--r--net/bridge/netfilter/ebt_log.c2
-rw-r--r--net/bridge/netfilter/ebt_ulog.c3
-rw-r--r--net/bridge/netfilter/ebtable_filter.c4
-rw-r--r--net/bridge/netfilter/ebtable_nat.c4
-rw-r--r--net/caif/caif_socket.c2
-rw-r--r--net/caif/cfsrvl.c5
-rw-r--r--net/caif/chnl_net.c4
-rw-r--r--net/can/gw.c2
-rw-r--r--net/ceph/ceph_common.c26
-rw-r--r--net/ceph/crush/mapper.c13
-rw-r--r--net/ceph/crypto.c1
-rw-r--r--net/ceph/crypto.h3
-rw-r--r--net/ceph/debugfs.c4
-rw-r--r--net/ceph/messenger.c937
-rw-r--r--net/ceph/mon_client.c127
-rw-r--r--net/ceph/msgpool.c7
-rw-r--r--net/ceph/osd_client.c77
-rw-r--r--net/ceph/osdmap.c59
-rw-r--r--net/core/dev.c238
-rw-r--r--net/core/dev_addr_lists.c40
-rw-r--r--net/core/dst.c16
-rw-r--r--net/core/ethtool.c12
-rw-r--r--net/core/fib_rules.c6
-rw-r--r--net/core/filter.c35
-rw-r--r--net/core/link_watch.c29
-rw-r--r--net/core/neighbour.c10
-rw-r--r--net/core/net-sysfs.c18
-rw-r--r--net/core/netpoll.c104
-rw-r--r--net/core/netprio_cgroup.c96
-rw-r--r--net/core/pktgen.c2
-rw-r--r--net/core/request_sock.c95
-rw-r--r--net/core/rtnetlink.c55
-rw-r--r--net/core/scm.c48
-rw-r--r--net/core/secure_seq.c1
-rw-r--r--net/core/skbuff.c212
-rw-r--r--net/core/sock.c159
-rw-r--r--net/core/sock_diag.c3
-rw-r--r--net/core/utils.c20
-rw-r--r--net/dcb/dcbnl.c18
-rw-r--r--net/dccp/ccid.h4
-rw-r--r--net/dccp/ccids/ccid3.c1
-rw-r--r--net/decnet/af_decnet.c4
-rw-r--r--net/decnet/dn_dev.c6
-rw-r--r--net/decnet/dn_route.c16
-rw-r--r--net/decnet/dn_table.c12
-rw-r--r--net/decnet/netfilter/dn_rtmsg.c3
-rw-r--r--net/dns_resolver/dns_key.c3
-rw-r--r--net/dsa/dsa.c2
-rw-r--r--net/ieee802154/6lowpan.c53
-rw-r--r--net/ieee802154/nl-mac.c6
-rw-r--r--net/ieee802154/nl-phy.c6
-rw-r--r--net/ipv4/Makefile2
-rw-r--r--net/ipv4/af_inet.c27
-rw-r--r--net/ipv4/arp.c4
-rw-r--r--net/ipv4/devinet.c77
-rw-r--r--net/ipv4/fib_frontend.c42
-rw-r--r--net/ipv4/fib_rules.c2
-rw-r--r--net/ipv4/fib_semantics.c52
-rw-r--r--net/ipv4/fib_trie.c76
-rw-r--r--net/ipv4/igmp.c38
-rw-r--r--net/ipv4/inet_connection_sock.c64
-rw-r--r--net/ipv4/inet_diag.c45
-rw-r--r--net/ipv4/inet_fragment.c9
-rw-r--r--net/ipv4/inetpeer.c7
-rw-r--r--net/ipv4/ip_fragment.c17
-rw-r--r--net/ipv4/ip_gre.c128
-rw-r--r--net/ipv4/ip_input.c7
-rw-r--r--net/ipv4/ip_output.c84
-rw-r--r--net/ipv4/ip_vti.c5
-rw-r--r--net/ipv4/ipconfig.c43
-rw-r--r--net/ipv4/ipip.c51
-rw-r--r--net/ipv4/ipmr.c26
-rw-r--r--net/ipv4/netfilter.c41
-rw-r--r--net/ipv4/netfilter/Kconfig90
-rw-r--r--net/ipv4/netfilter/Makefile18
-rw-r--r--net/ipv4/netfilter/ipt_MASQUERADE.c18
-rw-r--r--net/ipv4/netfilter/ipt_NETMAP.c98
-rw-r--r--net/ipv4/netfilter/ipt_REDIRECT.c110
-rw-r--r--net/ipv4/netfilter/ipt_ULOG.c3
-rw-r--r--net/ipv4/netfilter/ipt_rpfilter.c2
-rw-r--r--net/ipv4/netfilter/iptable_filter.c10
-rw-r--r--net/ipv4/netfilter/iptable_mangle.c10
-rw-r--r--net/ipv4/netfilter/iptable_nat.c (renamed from net/ipv4/netfilter/nf_nat_standalone.c)264
-rw-r--r--net/ipv4/netfilter/iptable_raw.c10
-rw-r--r--net/ipv4/netfilter/iptable_security.c5
-rw-r--r--net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c8
-rw-r--r--net/ipv4/netfilter/nf_nat_h323.c71
-rw-r--r--net/ipv4/netfilter/nf_nat_l3proto_ipv4.c281
-rw-r--r--net/ipv4/netfilter/nf_nat_pptp.c21
-rw-r--r--net/ipv4/netfilter/nf_nat_proto_gre.c30
-rw-r--r--net/ipv4/netfilter/nf_nat_proto_icmp.c24
-rw-r--r--net/ipv4/netfilter/nf_nat_rule.c214
-rw-r--r--net/ipv4/ping.c22
-rw-r--r--net/ipv4/proc.c4
-rw-r--r--net/ipv4/raw.c18
-rw-r--r--net/ipv4/route.c307
-rw-r--r--net/ipv4/syncookies.c1
-rw-r--r--net/ipv4/sysctl_net_ipv4.c102
-rw-r--r--net/ipv4/tcp.c167
-rw-r--r--net/ipv4/tcp_cong.c3
-rw-r--r--net/ipv4/tcp_fastopen.c83
-rw-r--r--net/ipv4/tcp_input.c330
-rw-r--r--net/ipv4/tcp_ipv4.c373
-rw-r--r--net/ipv4/tcp_metrics.c366
-rw-r--r--net/ipv4/tcp_minisocks.c77
-rw-r--r--net/ipv4/tcp_output.c76
-rw-r--r--net/ipv4/tcp_timer.c45
-rw-r--r--net/ipv4/udp.c11
-rw-r--r--net/ipv4/udp_diag.c9
-rw-r--r--net/ipv4/xfrm4_input.c4
-rw-r--r--net/ipv4/xfrm4_policy.c1
-rw-r--r--net/ipv6/Kconfig16
-rw-r--r--net/ipv6/Makefile1
-rw-r--r--net/ipv6/addrconf.c102
-rw-r--r--net/ipv6/addrlabel.c24
-rw-r--r--net/ipv6/esp6.c6
-rw-r--r--net/ipv6/inet6_connection_sock.c23
-rw-r--r--net/ipv6/ip6_fib.c18
-rw-r--r--net/ipv6/ip6_flowlabel.c47
-rw-r--r--net/ipv6/ip6_gre.c1770
-rw-r--r--net/ipv6/ip6_input.c11
-rw-r--r--net/ipv6/ip6_output.c85
-rw-r--r--net/ipv6/ip6_tunnel.c91
-rw-r--r--net/ipv6/ip6mr.c10
-rw-r--r--net/ipv6/mip6.c20
-rw-r--r--net/ipv6/netfilter.c8
-rw-r--r--net/ipv6/netfilter/Kconfig37
-rw-r--r--net/ipv6/netfilter/Makefile6
-rw-r--r--net/ipv6/netfilter/ip6t_MASQUERADE.c135
-rw-r--r--net/ipv6/netfilter/ip6t_NPT.c165
-rw-r--r--net/ipv6/netfilter/ip6table_filter.c4
-rw-r--r--net/ipv6/netfilter/ip6table_mangle.c4
-rw-r--r--net/ipv6/netfilter/ip6table_nat.c321
-rw-r--r--net/ipv6/netfilter/ip6table_raw.c4
-rw-r--r--net/ipv6/netfilter/ip6table_security.c5
-rw-r--r--net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c137
-rw-r--r--net/ipv6/netfilter/nf_conntrack_reasm.c218
-rw-r--r--net/ipv6/netfilter/nf_nat_l3proto_ipv6.c288
-rw-r--r--net/ipv6/netfilter/nf_nat_proto_icmpv6.c90
-rw-r--r--net/ipv6/proc.c4
-rw-r--r--net/ipv6/raw.c24
-rw-r--r--net/ipv6/reassembly.c89
-rw-r--r--net/ipv6/route.c134
-rw-r--r--net/ipv6/sit.c6
-rw-r--r--net/ipv6/syncookies.c1
-rw-r--r--net/ipv6/tcp_ipv6.c124
-rw-r--r--net/ipv6/udp.c14
-rw-r--r--net/ipv6/xfrm6_policy.c8
-rw-r--r--net/ipx/ipx_proc.c3
-rw-r--r--net/irda/af_irda.c2
-rw-r--r--net/irda/ircomm/ircomm_param.c5
-rw-r--r--net/irda/ircomm/ircomm_tty.c320
-rw-r--r--net/irda/ircomm/ircomm_tty_attach.c40
-rw-r--r--net/irda/ircomm/ircomm_tty_ioctl.c33
-rw-r--r--net/irda/irnetlink.c2
-rw-r--r--net/irda/irttp.c2
-rw-r--r--net/key/af_key.c41
-rw-r--r--net/l2tp/Kconfig1
-rw-r--r--net/l2tp/l2tp_core.c7
-rw-r--r--net/l2tp/l2tp_core.h1
-rw-r--r--net/l2tp/l2tp_eth.c5
-rw-r--r--net/l2tp/l2tp_ip6.c1
-rw-r--r--net/l2tp/l2tp_netlink.c36
-rw-r--r--net/llc/af_llc.c8
-rw-r--r--net/llc/llc_input.c21
-rw-r--r--net/llc/llc_proc.c2
-rw-r--r--net/llc/llc_station.c623
-rw-r--r--net/llc/sysctl_net_llc.c7
-rw-r--r--net/mac80211/aes_cmac.c6
-rw-r--r--net/mac80211/agg-tx.c2
-rw-r--r--net/mac80211/cfg.c103
-rw-r--r--net/mac80211/chan.c67
-rw-r--r--net/mac80211/debugfs.c36
-rw-r--r--net/mac80211/driver-ops.h11
-rw-r--r--net/mac80211/ibss.c50
-rw-r--r--net/mac80211/ieee80211_i.h53
-rw-r--r--net/mac80211/iface.c316
-rw-r--r--net/mac80211/key.c2
-rw-r--r--net/mac80211/led.c2
-rw-r--r--net/mac80211/main.c31
-rw-r--r--net/mac80211/mesh.c49
-rw-r--r--net/mac80211/mesh.h5
-rw-r--r--net/mac80211/mesh_hwmp.c2
-rw-r--r--net/mac80211/mesh_pathtbl.c44
-rw-r--r--net/mac80211/mesh_plink.c85
-rw-r--r--net/mac80211/mesh_sync.c3
-rw-r--r--net/mac80211/mlme.c443
-rw-r--r--net/mac80211/offchannel.c9
-rw-r--r--net/mac80211/rate.h2
-rw-r--r--net/mac80211/rx.c60
-rw-r--r--net/mac80211/scan.c53
-rw-r--r--net/mac80211/sta_info.c123
-rw-r--r--net/mac80211/sta_info.h2
-rw-r--r--net/mac80211/status.c46
-rw-r--r--net/mac80211/trace.h11
-rw-r--r--net/mac80211/tx.c95
-rw-r--r--net/mac80211/util.c108
-rw-r--r--net/netfilter/Kconfig83
-rw-r--r--net/netfilter/Makefile21
-rw-r--r--net/netfilter/core.c21
-rw-r--r--net/netfilter/ipset/ip_set_bitmap_ip.c19
-rw-r--r--net/netfilter/ipset/ip_set_bitmap_ipmac.c18
-rw-r--r--net/netfilter/ipset/ip_set_bitmap_port.c9
-rw-r--r--net/netfilter/ipset/ip_set_core.c39
-rw-r--r--net/netfilter/ipset/ip_set_hash_ip.c15
-rw-r--r--net/netfilter/ipset/ip_set_hash_ipport.c24
-rw-r--r--net/netfilter/ipset/ip_set_hash_ipportip.c24
-rw-r--r--net/netfilter/ipset/ip_set_hash_ipportnet.c47
-rw-r--r--net/netfilter/ipset/ip_set_hash_net.c25
-rw-r--r--net/netfilter/ipset/ip_set_hash_netiface.c66
-rw-r--r--net/netfilter/ipset/ip_set_hash_netport.c36
-rw-r--r--net/netfilter/ipset/ip_set_list_set.c9
-rw-r--r--net/netfilter/ipvs/Kconfig3
-rw-r--r--net/netfilter/ipvs/ip_vs_app.c58
-rw-r--r--net/netfilter/ipvs/ip_vs_core.c76
-rw-r--r--net/netfilter/ipvs/ip_vs_ctl.c30
-rw-r--r--net/netfilter/ipvs/ip_vs_ftp.c22
-rw-r--r--net/netfilter/ipvs/ip_vs_xmit.c111
-rw-r--r--net/netfilter/nf_conntrack_amanda.c5
-rw-r--r--net/netfilter/nf_conntrack_core.c31
-rw-r--r--net/netfilter/nf_conntrack_ecache.c2
-rw-r--r--net/netfilter/nf_conntrack_expect.c29
-rw-r--r--net/netfilter/nf_conntrack_ftp.c24
-rw-r--r--net/netfilter/nf_conntrack_h323_main.c232
-rw-r--r--net/netfilter/nf_conntrack_irc.c3
-rw-r--r--net/netfilter/nf_conntrack_netlink.c124
-rw-r--r--net/netfilter/nf_conntrack_pptp.c18
-rw-r--r--net/netfilter/nf_conntrack_proto.c5
-rw-r--r--net/netfilter/nf_conntrack_proto_tcp.c37
-rw-r--r--net/netfilter/nf_conntrack_sip.c227
-rw-r--r--net/netfilter/nf_internals.h4
-rw-r--r--net/netfilter/nf_nat_amanda.c (renamed from net/ipv4/netfilter/nf_nat_amanda.c)4
-rw-r--r--net/netfilter/nf_nat_core.c (renamed from net/ipv4/netfilter/nf_nat_core.c)679
-rw-r--r--net/netfilter/nf_nat_ftp.c (renamed from net/ipv4/netfilter/nf_nat_ftp.c)34
-rw-r--r--net/netfilter/nf_nat_helper.c (renamed from net/ipv4/netfilter/nf_nat_helper.c)109
-rw-r--r--net/netfilter/nf_nat_irc.c (renamed from net/ipv4/netfilter/nf_nat_irc.c)10
-rw-r--r--net/netfilter/nf_nat_proto_common.c (renamed from net/ipv4/netfilter/nf_nat_proto_common.c)54
-rw-r--r--net/netfilter/nf_nat_proto_dccp.c (renamed from net/ipv4/netfilter/nf_nat_proto_dccp.c)56
-rw-r--r--net/netfilter/nf_nat_proto_sctp.c (renamed from net/ipv4/netfilter/nf_nat_proto_sctp.c)53
-rw-r--r--net/netfilter/nf_nat_proto_tcp.c (renamed from net/ipv4/netfilter/nf_nat_proto_tcp.c)40
-rw-r--r--net/netfilter/nf_nat_proto_udp.c (renamed from net/ipv4/netfilter/nf_nat_proto_udp.c)42
-rw-r--r--net/netfilter/nf_nat_proto_udplite.c (renamed from net/ipv4/netfilter/nf_nat_proto_udplite.c)58
-rw-r--r--net/netfilter/nf_nat_proto_unknown.c (renamed from net/ipv4/netfilter/nf_nat_proto_unknown.c)16
-rw-r--r--net/netfilter/nf_nat_sip.c (renamed from net/ipv4/netfilter/nf_nat_sip.c)284
-rw-r--r--net/netfilter/nf_nat_tftp.c (renamed from net/ipv4/netfilter/nf_nat_tftp.c)1
-rw-r--r--net/netfilter/nf_queue.c10
-rw-r--r--net/netfilter/nfnetlink.c2
-rw-r--r--net/netfilter/nfnetlink_acct.c16
-rw-r--r--net/netfilter/nfnetlink_cthelper.c17
-rw-r--r--net/netfilter/nfnetlink_cttimeout.c12
-rw-r--r--net/netfilter/nfnetlink_log.c47
-rw-r--r--net/netfilter/nfnetlink_queue_core.c50
-rw-r--r--net/netfilter/xt_CT.c262
-rw-r--r--net/netfilter/xt_LOG.c39
-rw-r--r--net/netfilter/xt_NETMAP.c165
-rw-r--r--net/netfilter/xt_NFQUEUE.c8
-rw-r--r--net/netfilter/xt_NOTRACK.c53
-rw-r--r--net/netfilter/xt_REDIRECT.c190
-rw-r--r--net/netfilter/xt_limit.c8
-rw-r--r--net/netfilter/xt_nat.c170
-rw-r--r--net/netfilter/xt_osf.c2
-rw-r--r--net/netfilter/xt_owner.c30
-rw-r--r--net/netfilter/xt_recent.c13
-rw-r--r--net/netfilter/xt_set.c22
-rw-r--r--net/netfilter/xt_socket.c12
-rw-r--r--net/netfilter/xt_time.c24
-rw-r--r--net/netlabel/netlabel_cipso_v4.c2
-rw-r--r--net/netlabel/netlabel_mgmt.c4
-rw-r--r--net/netlabel/netlabel_unlabeled.c4
-rw-r--r--net/netlabel/netlabel_user.c2
-rw-r--r--net/netlink/af_netlink.c216
-rw-r--r--net/netlink/genetlink.c48
-rw-r--r--net/netrom/af_netrom.c9
-rw-r--r--net/nfc/core.c13
-rw-r--r--net/nfc/hci/Makefile4
-rw-r--r--net/nfc/hci/command.c45
-rw-r--r--net/nfc/hci/core.c336
-rw-r--r--net/nfc/hci/hci.h15
-rw-r--r--net/nfc/hci/hcp.c6
-rw-r--r--net/nfc/hci/llc.c170
-rw-r--r--net/nfc/hci/llc.h69
-rw-r--r--net/nfc/hci/llc_nop.c99
-rw-r--r--net/nfc/hci/llc_shdlc.c (renamed from net/nfc/hci/shdlc.c)544
-rw-r--r--net/nfc/llcp/commands.c2
-rw-r--r--net/nfc/llcp/llcp.c131
-rw-r--r--net/nfc/llcp/llcp.h6
-rw-r--r--net/nfc/llcp/sock.c101
-rw-r--r--net/nfc/nci/core.c91
-rw-r--r--net/nfc/nci/ntf.c52
-rw-r--r--net/nfc/nci/rsp.c14
-rw-r--r--net/nfc/netlink.c68
-rw-r--r--net/openvswitch/actions.c11
-rw-r--r--net/openvswitch/datapath.c459
-rw-r--r--net/openvswitch/datapath.h52
-rw-r--r--net/openvswitch/dp_notify.c8
-rw-r--r--net/openvswitch/flow.c21
-rw-r--r--net/openvswitch/flow.h11
-rw-r--r--net/openvswitch/vport-internal_dev.c7
-rw-r--r--net/openvswitch/vport-netdev.c2
-rw-r--r--net/openvswitch/vport.c27
-rw-r--r--net/openvswitch/vport.h13
-rw-r--r--net/packet/Kconfig8
-rw-r--r--net/packet/Makefile2
-rw-r--r--net/packet/af_packet.c157
-rw-r--r--net/packet/diag.c242
-rw-r--r--net/packet/internal.h121
-rw-r--r--net/phonet/pn_netlink.c14
-rw-r--r--net/phonet/socket.c6
-rw-r--r--net/rds/tcp_connect.c4
-rw-r--r--net/rds/tcp_listen.c4
-rw-r--r--net/rds/tcp_recv.c4
-rw-r--r--net/rds/tcp_send.c4
-rw-r--r--net/rfkill/core.c22
-rw-r--r--net/rfkill/input.c3
-rw-r--r--net/rxrpc/ar-key.c6
-rw-r--r--net/sched/act_api.c52
-rw-r--r--net/sched/act_gact.c14
-rw-r--r--net/sched/act_ipt.c7
-rw-r--r--net/sched/act_mirred.c11
-rw-r--r--net/sched/act_pedit.c5
-rw-r--r--net/sched/act_simple.c5
-rw-r--r--net/sched/cls_api.c16
-rw-r--r--net/sched/cls_basic.c3
-rw-r--r--net/sched/cls_cgroup.c25
-rw-r--r--net/sched/cls_flow.c19
-rw-r--r--net/sched/cls_fw.c3
-rw-r--r--net/sched/cls_route.c3
-rw-r--r--net/sched/cls_rsvp.h3
-rw-r--r--net/sched/cls_tcindex.c3
-rw-r--r--net/sched/cls_u32.c3
-rw-r--r--net/sched/em_meta.c2
-rw-r--r--net/sched/sch_api.c44
-rw-r--r--net/sched/sch_cbq.c5
-rw-r--r--net/sched/sch_drr.c2
-rw-r--r--net/sched/sch_fq_codel.c2
-rw-r--r--net/sched/sch_generic.c27
-rw-r--r--net/sched/sch_gred.c38
-rw-r--r--net/sched/sch_qfq.c102
-rw-r--r--net/sctp/associola.c25
-rw-r--r--net/sctp/auth.c20
-rw-r--r--net/sctp/bind_addr.c20
-rw-r--r--net/sctp/chunk.c2
-rw-r--r--net/sctp/endpointola.c25
-rw-r--r--net/sctp/input.c117
-rw-r--r--net/sctp/ipv6.c36
-rw-r--r--net/sctp/objcnt.c8
-rw-r--r--net/sctp/output.c23
-rw-r--r--net/sctp/outqueue.c43
-rw-r--r--net/sctp/primitive.c4
-rw-r--r--net/sctp/proc.c61
-rw-r--r--net/sctp/protocol.c454
-rw-r--r--net/sctp/sm_make_chunk.c61
-rw-r--r--net/sctp/sm_sideeffect.c30
-rw-r--r--net/sctp/sm_statefuns.c727
-rw-r--r--net/sctp/sm_statetable.c17
-rw-r--r--net/sctp/socket.c119
-rw-r--r--net/sctp/sysctl.c198
-rw-r--r--net/sctp/transport.c23
-rw-r--r--net/sctp/ulpevent.c3
-rw-r--r--net/sctp/ulpqueue.c18
-rw-r--r--net/socket.c94
-rw-r--r--net/sunrpc/Kconfig5
-rw-r--r--net/sunrpc/auth.c54
-rw-r--r--net/sunrpc/auth_gss/auth_gss.c1
-rw-r--r--net/sunrpc/auth_gss/gss_mech_switch.c20
-rw-r--r--net/sunrpc/cache.c7
-rw-r--r--net/sunrpc/clnt.c12
-rw-r--r--net/sunrpc/rpcb_clnt.c4
-rw-r--r--net/sunrpc/sched.c14
-rw-r--r--net/sunrpc/svc_xprt.c10
-rw-r--r--net/sunrpc/svcsock.c2
-rw-r--r--net/sunrpc/xdr.c127
-rw-r--r--net/sunrpc/xprt.c34
-rw-r--r--net/sunrpc/xprtrdma/transport.c4
-rw-r--r--net/sunrpc/xprtsock.c56
-rw-r--r--net/tipc/bearer.c21
-rw-r--r--net/tipc/config.c48
-rw-r--r--net/tipc/core.c22
-rw-r--r--net/tipc/core.h18
-rw-r--r--net/tipc/eth_media.c29
-rw-r--r--net/tipc/handler.c2
-rw-r--r--net/tipc/link.c4
-rw-r--r--net/tipc/name_table.c16
-rw-r--r--net/tipc/net.c3
-rw-r--r--net/tipc/net.h2
-rw-r--r--net/tipc/netlink.c2
-rw-r--r--net/tipc/socket.c1
-rw-r--r--net/tipc/subscr.c4
-rw-r--r--net/unix/af_unix.c114
-rw-r--r--net/unix/diag.c14
-rw-r--r--net/wireless/chan.c7
-rw-r--r--net/wireless/core.c58
-rw-r--r--net/wireless/core.h3
-rw-r--r--net/wireless/mlme.c37
-rw-r--r--net/wireless/nl80211.c266
-rw-r--r--net/wireless/nl80211.h5
-rw-r--r--net/wireless/radiotap.c2
-rw-r--r--net/wireless/reg.c57
-rw-r--r--net/wireless/scan.c2
-rw-r--r--net/wireless/util.c38
-rw-r--r--net/wireless/wext-core.c8
-rw-r--r--net/xfrm/xfrm_input.c2
-rw-r--r--net/xfrm/xfrm_policy.c96
-rw-r--r--net/xfrm/xfrm_replay.c15
-rw-r--r--net/xfrm/xfrm_state.c43
-rw-r--r--net/xfrm/xfrm_user.c146
460 files changed, 17843 insertions, 9632 deletions
diff --git a/net/8021q/vlan_core.c b/net/8021q/vlan_core.c
index 8ca533c95de0..add69d0fd99d 100644
--- a/net/8021q/vlan_core.c
+++ b/net/8021q/vlan_core.c
@@ -105,7 +105,6 @@ static struct sk_buff *vlan_reorder_header(struct sk_buff *skb)
return NULL;
memmove(skb->data - ETH_HLEN, skb->data - VLAN_ETH_HLEN, 2 * ETH_ALEN);
skb->mac_header += VLAN_HLEN;
- skb_reset_mac_len(skb);
return skb;
}
@@ -139,6 +138,8 @@ struct sk_buff *vlan_untag(struct sk_buff *skb)
skb_reset_network_header(skb);
skb_reset_transport_header(skb);
+ skb_reset_mac_len(skb);
+
return skb;
err_free:
@@ -368,3 +369,9 @@ void vlan_vids_del_by_dev(struct net_device *dev,
vlan_vid_del(dev, vid_info->vid);
}
EXPORT_SYMBOL(vlan_vids_del_by_dev);
+
+bool vlan_uses_dev(const struct net_device *dev)
+{
+ return rtnl_dereference(dev->vlan_info) ? true : false;
+}
+EXPORT_SYMBOL(vlan_uses_dev);
diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c
index 73a2a83ee2da..402442402af7 100644
--- a/net/8021q/vlan_dev.c
+++ b/net/8021q/vlan_dev.c
@@ -137,9 +137,21 @@ static int vlan_dev_hard_header(struct sk_buff *skb, struct net_device *dev,
return rc;
}
+static inline netdev_tx_t vlan_netpoll_send_skb(struct vlan_dev_priv *vlan, struct sk_buff *skb)
+{
+#ifdef CONFIG_NET_POLL_CONTROLLER
+ if (vlan->netpoll)
+ netpoll_send_skb(vlan->netpoll, skb);
+#else
+ BUG();
+#endif
+ return NETDEV_TX_OK;
+}
+
static netdev_tx_t vlan_dev_hard_start_xmit(struct sk_buff *skb,
struct net_device *dev)
{
+ struct vlan_dev_priv *vlan = vlan_dev_priv(dev);
struct vlan_ethhdr *veth = (struct vlan_ethhdr *)(skb->data);
unsigned int len;
int ret;
@@ -150,29 +162,30 @@ static netdev_tx_t vlan_dev_hard_start_xmit(struct sk_buff *skb,
* OTHER THINGS LIKE FDDI/TokenRing/802.3 SNAPs...
*/
if (veth->h_vlan_proto != htons(ETH_P_8021Q) ||
- vlan_dev_priv(dev)->flags & VLAN_FLAG_REORDER_HDR) {
+ vlan->flags & VLAN_FLAG_REORDER_HDR) {
u16 vlan_tci;
- vlan_tci = vlan_dev_priv(dev)->vlan_id;
+ vlan_tci = vlan->vlan_id;
vlan_tci |= vlan_dev_get_egress_qos_mask(dev, skb);
skb = __vlan_hwaccel_put_tag(skb, vlan_tci);
}
- skb->dev = vlan_dev_priv(dev)->real_dev;
+ skb->dev = vlan->real_dev;
len = skb->len;
- if (netpoll_tx_running(dev))
- return skb->dev->netdev_ops->ndo_start_xmit(skb, skb->dev);
+ if (unlikely(netpoll_tx_running(dev)))
+ return vlan_netpoll_send_skb(vlan, skb);
+
ret = dev_queue_xmit(skb);
if (likely(ret == NET_XMIT_SUCCESS || ret == NET_XMIT_CN)) {
struct vlan_pcpu_stats *stats;
- stats = this_cpu_ptr(vlan_dev_priv(dev)->vlan_pcpu_stats);
+ stats = this_cpu_ptr(vlan->vlan_pcpu_stats);
u64_stats_update_begin(&stats->syncp);
stats->tx_packets++;
stats->tx_bytes += len;
u64_stats_update_end(&stats->syncp);
} else {
- this_cpu_inc(vlan_dev_priv(dev)->vlan_pcpu_stats->tx_dropped);
+ this_cpu_inc(vlan->vlan_pcpu_stats->tx_dropped);
}
return ret;
@@ -669,25 +682,26 @@ static void vlan_dev_poll_controller(struct net_device *dev)
return;
}
-static int vlan_dev_netpoll_setup(struct net_device *dev, struct netpoll_info *npinfo)
+static int vlan_dev_netpoll_setup(struct net_device *dev, struct netpoll_info *npinfo,
+ gfp_t gfp)
{
- struct vlan_dev_priv *info = vlan_dev_priv(dev);
- struct net_device *real_dev = info->real_dev;
+ struct vlan_dev_priv *vlan = vlan_dev_priv(dev);
+ struct net_device *real_dev = vlan->real_dev;
struct netpoll *netpoll;
int err = 0;
- netpoll = kzalloc(sizeof(*netpoll), GFP_KERNEL);
+ netpoll = kzalloc(sizeof(*netpoll), gfp);
err = -ENOMEM;
if (!netpoll)
goto out;
- err = __netpoll_setup(netpoll, real_dev);
+ err = __netpoll_setup(netpoll, real_dev, gfp);
if (err) {
kfree(netpoll);
goto out;
}
- info->netpoll = netpoll;
+ vlan->netpoll = netpoll;
out:
return err;
@@ -695,19 +709,15 @@ out:
static void vlan_dev_netpoll_cleanup(struct net_device *dev)
{
- struct vlan_dev_priv *info = vlan_dev_priv(dev);
- struct netpoll *netpoll = info->netpoll;
+ struct vlan_dev_priv *vlan= vlan_dev_priv(dev);
+ struct netpoll *netpoll = vlan->netpoll;
if (!netpoll)
return;
- info->netpoll = NULL;
-
- /* Wait for transmitting packets to finish before freeing. */
- synchronize_rcu_bh();
+ vlan->netpoll = NULL;
- __netpoll_cleanup(netpoll);
- kfree(netpoll);
+ __netpoll_free_rcu(netpoll);
}
#endif /* CONFIG_NET_POLL_CONTROLLER */
diff --git a/net/8021q/vlanproc.c b/net/8021q/vlanproc.c
index c718fd3664b6..4de77ea5fa37 100644
--- a/net/8021q/vlanproc.c
+++ b/net/8021q/vlanproc.c
@@ -105,7 +105,7 @@ static const struct file_operations vlandev_fops = {
};
/*
- * Proc filesystem derectory entries.
+ * Proc filesystem directory entries.
*/
/* Strings */
diff --git a/net/9p/trans_fd.c b/net/9p/trans_fd.c
index 6449bae15702..505f0ce3f10b 100644
--- a/net/9p/trans_fd.c
+++ b/net/9p/trans_fd.c
@@ -1083,7 +1083,7 @@ int p9_trans_fd_init(void)
void p9_trans_fd_exit(void)
{
- flush_work_sync(&p9_poll_work);
+ flush_work(&p9_poll_work);
v9fs_unregister_trans(&p9_tcp_trans);
v9fs_unregister_trans(&p9_unix_trans);
v9fs_unregister_trans(&p9_fd_trans);
diff --git a/net/Kconfig b/net/Kconfig
index 245831bec09a..30b48f523135 100644
--- a/net/Kconfig
+++ b/net/Kconfig
@@ -52,6 +52,8 @@ source "net/iucv/Kconfig"
config INET
bool "TCP/IP networking"
+ select CRYPTO
+ select CRYPTO_AES
---help---
These are the protocols used on the Internet and on most local
Ethernets. It is highly recommended to say Y here (this will enlarge
diff --git a/net/appletalk/atalk_proc.c b/net/appletalk/atalk_proc.c
index b5b1a221c242..c30f3a0717fb 100644
--- a/net/appletalk/atalk_proc.c
+++ b/net/appletalk/atalk_proc.c
@@ -183,7 +183,8 @@ static int atalk_seq_socket_show(struct seq_file *seq, void *v)
ntohs(at->dest_net), at->dest_node, at->dest_port,
sk_wmem_alloc_get(s),
sk_rmem_alloc_get(s),
- s->sk_state, SOCK_INODE(s->sk_socket)->i_uid);
+ s->sk_state,
+ from_kuid_munged(seq_user_ns(seq), sock_i_uid(s)));
out:
return 0;
}
diff --git a/net/atm/common.c b/net/atm/common.c
index b4b44dbed645..0c0ad930a632 100644
--- a/net/atm/common.c
+++ b/net/atm/common.c
@@ -812,6 +812,7 @@ int vcc_getsockopt(struct socket *sock, int level, int optname,
if (!vcc->dev || !test_bit(ATM_VF_ADDR, &vcc->flags))
return -ENOTCONN;
+ memset(&pvc, 0, sizeof(pvc));
pvc.sap_family = AF_ATMPVC;
pvc.sap_addr.itf = vcc->dev->number;
pvc.sap_addr.vpi = vcc->vpi;
diff --git a/net/atm/pvc.c b/net/atm/pvc.c
index 3a734919c36c..ae0324021407 100644
--- a/net/atm/pvc.c
+++ b/net/atm/pvc.c
@@ -95,6 +95,7 @@ static int pvc_getname(struct socket *sock, struct sockaddr *sockaddr,
return -ENOTCONN;
*sockaddr_len = sizeof(struct sockaddr_atmpvc);
addr = (struct sockaddr_atmpvc *)sockaddr;
+ memset(addr, 0, sizeof(*addr));
addr->sap_family = AF_ATMPVC;
addr->sap_addr.itf = vcc->dev->number;
addr->sap_addr.vpi = vcc->vpi;
diff --git a/net/atm/resources.c b/net/atm/resources.c
index 23f45ce6f351..0447d5d0b639 100644
--- a/net/atm/resources.c
+++ b/net/atm/resources.c
@@ -432,7 +432,7 @@ int atm_dev_ioctl(unsigned int cmd, void __user *arg, int compat)
size = dev->ops->ioctl(dev, cmd, buf);
}
if (size < 0) {
- error = (size == -ENOIOCTLCMD ? -EINVAL : size);
+ error = (size == -ENOIOCTLCMD ? -ENOTTY : size);
goto done;
}
}
diff --git a/net/ax25/ax25_uid.c b/net/ax25/ax25_uid.c
index e3c579ba6325..957999e43ff7 100644
--- a/net/ax25/ax25_uid.c
+++ b/net/ax25/ax25_uid.c
@@ -51,14 +51,14 @@ int ax25_uid_policy;
EXPORT_SYMBOL(ax25_uid_policy);
-ax25_uid_assoc *ax25_findbyuid(uid_t uid)
+ax25_uid_assoc *ax25_findbyuid(kuid_t uid)
{
ax25_uid_assoc *ax25_uid, *res = NULL;
struct hlist_node *node;
read_lock(&ax25_uid_lock);
ax25_uid_for_each(ax25_uid, node, &ax25_uid_list) {
- if (ax25_uid->uid == uid) {
+ if (uid_eq(ax25_uid->uid, uid)) {
ax25_uid_hold(ax25_uid);
res = ax25_uid;
break;
@@ -84,7 +84,7 @@ int ax25_uid_ioctl(int cmd, struct sockaddr_ax25 *sax)
read_lock(&ax25_uid_lock);
ax25_uid_for_each(ax25_uid, node, &ax25_uid_list) {
if (ax25cmp(&sax->sax25_call, &ax25_uid->call) == 0) {
- res = ax25_uid->uid;
+ res = from_kuid_munged(current_user_ns(), ax25_uid->uid);
break;
}
}
@@ -93,9 +93,14 @@ int ax25_uid_ioctl(int cmd, struct sockaddr_ax25 *sax)
return res;
case SIOCAX25ADDUID:
+ {
+ kuid_t sax25_kuid;
if (!capable(CAP_NET_ADMIN))
return -EPERM;
- user = ax25_findbyuid(sax->sax25_uid);
+ sax25_kuid = make_kuid(current_user_ns(), sax->sax25_uid);
+ if (!uid_valid(sax25_kuid))
+ return -EINVAL;
+ user = ax25_findbyuid(sax25_kuid);
if (user) {
ax25_uid_put(user);
return -EEXIST;
@@ -106,7 +111,7 @@ int ax25_uid_ioctl(int cmd, struct sockaddr_ax25 *sax)
return -ENOMEM;
atomic_set(&ax25_uid->refcount, 1);
- ax25_uid->uid = sax->sax25_uid;
+ ax25_uid->uid = sax25_kuid;
ax25_uid->call = sax->sax25_call;
write_lock(&ax25_uid_lock);
@@ -114,7 +119,7 @@ int ax25_uid_ioctl(int cmd, struct sockaddr_ax25 *sax)
write_unlock(&ax25_uid_lock);
return 0;
-
+ }
case SIOCAX25DELUID:
if (!capable(CAP_NET_ADMIN))
return -EPERM;
@@ -172,7 +177,9 @@ static int ax25_uid_seq_show(struct seq_file *seq, void *v)
struct ax25_uid_assoc *pt;
pt = hlist_entry(v, struct ax25_uid_assoc, uid_node);
- seq_printf(seq, "%6d %s\n", pt->uid, ax2asc(buf, &pt->call));
+ seq_printf(seq, "%6d %s\n",
+ from_kuid_munged(seq_user_ns(seq), pt->uid),
+ ax2asc(buf, &pt->call));
}
return 0;
}
diff --git a/net/batman-adv/bat_iv_ogm.c b/net/batman-adv/bat_iv_ogm.c
index e877af8bdd1e..b02b75dae3a8 100644
--- a/net/batman-adv/bat_iv_ogm.c
+++ b/net/batman-adv/bat_iv_ogm.c
@@ -166,13 +166,15 @@ static void batadv_iv_ogm_send_to_if(struct batadv_forw_packet *forw_packet,
int16_t buff_pos;
struct batadv_ogm_packet *batadv_ogm_packet;
struct sk_buff *skb;
+ uint8_t *packet_pos;
if (hard_iface->if_status != BATADV_IF_ACTIVE)
return;
packet_num = 0;
buff_pos = 0;
- batadv_ogm_packet = (struct batadv_ogm_packet *)forw_packet->skb->data;
+ packet_pos = forw_packet->skb->data;
+ batadv_ogm_packet = (struct batadv_ogm_packet *)packet_pos;
/* adjust all flags and log packets */
while (batadv_iv_ogm_aggr_packet(buff_pos, forw_packet->packet_len,
@@ -181,15 +183,17 @@ static void batadv_iv_ogm_send_to_if(struct batadv_forw_packet *forw_packet,
/* we might have aggregated direct link packets with an
* ordinary base packet
*/
- if ((forw_packet->direct_link_flags & (1 << packet_num)) &&
- (forw_packet->if_incoming == hard_iface))
+ if (forw_packet->direct_link_flags & BIT(packet_num) &&
+ forw_packet->if_incoming == hard_iface)
batadv_ogm_packet->flags |= BATADV_DIRECTLINK;
else
batadv_ogm_packet->flags &= ~BATADV_DIRECTLINK;
- fwd_str = (packet_num > 0 ? "Forwarding" : (forw_packet->own ?
- "Sending own" :
- "Forwarding"));
+ if (packet_num > 0 || !forw_packet->own)
+ fwd_str = "Forwarding";
+ else
+ fwd_str = "Sending own";
+
batadv_dbg(BATADV_DBG_BATMAN, bat_priv,
"%s %spacket (originator %pM, seqno %u, TQ %d, TTL %d, IDF %s, ttvn %d) on interface %s [%pM]\n",
fwd_str, (packet_num > 0 ? "aggregated " : ""),
@@ -204,8 +208,8 @@ static void batadv_iv_ogm_send_to_if(struct batadv_forw_packet *forw_packet,
buff_pos += BATADV_OGM_HLEN;
buff_pos += batadv_tt_len(batadv_ogm_packet->tt_num_changes);
packet_num++;
- batadv_ogm_packet = (struct batadv_ogm_packet *)
- (forw_packet->skb->data + buff_pos);
+ packet_pos = forw_packet->skb->data + buff_pos;
+ batadv_ogm_packet = (struct batadv_ogm_packet *)packet_pos;
}
/* create clone because function is called more than once */
@@ -227,9 +231,10 @@ static void batadv_iv_ogm_emit(struct batadv_forw_packet *forw_packet)
struct batadv_hard_iface *primary_if = NULL;
struct batadv_ogm_packet *batadv_ogm_packet;
unsigned char directlink;
+ uint8_t *packet_pos;
- batadv_ogm_packet = (struct batadv_ogm_packet *)
- (forw_packet->skb->data);
+ packet_pos = forw_packet->skb->data;
+ batadv_ogm_packet = (struct batadv_ogm_packet *)packet_pos;
directlink = (batadv_ogm_packet->flags & BATADV_DIRECTLINK ? 1 : 0);
if (!forw_packet->if_incoming) {
@@ -454,6 +459,7 @@ static void batadv_iv_ogm_aggregate(struct batadv_forw_packet *forw_packet_aggr,
int packet_len, bool direct_link)
{
unsigned char *skb_buff;
+ unsigned long new_direct_link_flag;
skb_buff = skb_put(forw_packet_aggr->skb, packet_len);
memcpy(skb_buff, packet_buff, packet_len);
@@ -461,9 +467,10 @@ static void batadv_iv_ogm_aggregate(struct batadv_forw_packet *forw_packet_aggr,
forw_packet_aggr->num_packets++;
/* save packet direct link flag status */
- if (direct_link)
- forw_packet_aggr->direct_link_flags |=
- (1 << forw_packet_aggr->num_packets);
+ if (direct_link) {
+ new_direct_link_flag = BIT(forw_packet_aggr->num_packets);
+ forw_packet_aggr->direct_link_flags |= new_direct_link_flag;
+ }
}
static void batadv_iv_ogm_queue_add(struct batadv_priv *bat_priv,
@@ -586,6 +593,8 @@ static void batadv_iv_ogm_schedule(struct batadv_hard_iface *hard_iface)
struct batadv_ogm_packet *batadv_ogm_packet;
struct batadv_hard_iface *primary_if;
int vis_server, tt_num_changes = 0;
+ uint32_t seqno;
+ uint8_t bandwidth;
vis_server = atomic_read(&bat_priv->vis_mode);
primary_if = batadv_primary_if_get_selected(bat_priv);
@@ -599,12 +608,12 @@ static void batadv_iv_ogm_schedule(struct batadv_hard_iface *hard_iface)
batadv_ogm_packet = (struct batadv_ogm_packet *)hard_iface->packet_buff;
/* change sequence number to network order */
- batadv_ogm_packet->seqno =
- htonl((uint32_t)atomic_read(&hard_iface->seqno));
+ seqno = (uint32_t)atomic_read(&hard_iface->seqno);
+ batadv_ogm_packet->seqno = htonl(seqno);
atomic_inc(&hard_iface->seqno);
- batadv_ogm_packet->ttvn = atomic_read(&bat_priv->ttvn);
- batadv_ogm_packet->tt_crc = htons(bat_priv->tt_crc);
+ batadv_ogm_packet->ttvn = atomic_read(&bat_priv->tt.vn);
+ batadv_ogm_packet->tt_crc = htons(bat_priv->tt.local_crc);
if (tt_num_changes >= 0)
batadv_ogm_packet->tt_num_changes = tt_num_changes;
@@ -613,12 +622,13 @@ static void batadv_iv_ogm_schedule(struct batadv_hard_iface *hard_iface)
else
batadv_ogm_packet->flags &= ~BATADV_VIS_SERVER;
- if ((hard_iface == primary_if) &&
- (atomic_read(&bat_priv->gw_mode) == BATADV_GW_MODE_SERVER))
- batadv_ogm_packet->gw_flags =
- (uint8_t)atomic_read(&bat_priv->gw_bandwidth);
- else
+ if (hard_iface == primary_if &&
+ atomic_read(&bat_priv->gw_mode) == BATADV_GW_MODE_SERVER) {
+ bandwidth = (uint8_t)atomic_read(&bat_priv->gw_bandwidth);
+ batadv_ogm_packet->gw_flags = bandwidth;
+ } else {
batadv_ogm_packet->gw_flags = BATADV_NO_FLAGS;
+ }
batadv_slide_own_bcast_window(hard_iface);
batadv_iv_ogm_queue_add(bat_priv, hard_iface->packet_buff,
@@ -642,8 +652,10 @@ batadv_iv_ogm_orig_update(struct batadv_priv *bat_priv,
struct batadv_neigh_node *router = NULL;
struct batadv_orig_node *orig_node_tmp;
struct hlist_node *node;
- uint8_t bcast_own_sum_orig, bcast_own_sum_neigh;
+ int if_num;
+ uint8_t sum_orig, sum_neigh;
uint8_t *neigh_addr;
+ uint8_t tq_avg;
batadv_dbg(BATADV_DBG_BATMAN, bat_priv,
"update_originator(): Searching and updating originator entry of received packet\n");
@@ -667,8 +679,8 @@ batadv_iv_ogm_orig_update(struct batadv_priv *bat_priv,
spin_lock_bh(&tmp_neigh_node->lq_update_lock);
batadv_ring_buffer_set(tmp_neigh_node->tq_recv,
&tmp_neigh_node->tq_index, 0);
- tmp_neigh_node->tq_avg =
- batadv_ring_buffer_avg(tmp_neigh_node->tq_recv);
+ tq_avg = batadv_ring_buffer_avg(tmp_neigh_node->tq_recv);
+ tmp_neigh_node->tq_avg = tq_avg;
spin_unlock_bh(&tmp_neigh_node->lq_update_lock);
}
@@ -727,17 +739,17 @@ batadv_iv_ogm_orig_update(struct batadv_priv *bat_priv,
if (router && (neigh_node->tq_avg == router->tq_avg)) {
orig_node_tmp = router->orig_node;
spin_lock_bh(&orig_node_tmp->ogm_cnt_lock);
- bcast_own_sum_orig =
- orig_node_tmp->bcast_own_sum[if_incoming->if_num];
+ if_num = router->if_incoming->if_num;
+ sum_orig = orig_node_tmp->bcast_own_sum[if_num];
spin_unlock_bh(&orig_node_tmp->ogm_cnt_lock);
orig_node_tmp = neigh_node->orig_node;
spin_lock_bh(&orig_node_tmp->ogm_cnt_lock);
- bcast_own_sum_neigh =
- orig_node_tmp->bcast_own_sum[if_incoming->if_num];
+ if_num = neigh_node->if_incoming->if_num;
+ sum_neigh = orig_node_tmp->bcast_own_sum[if_num];
spin_unlock_bh(&orig_node_tmp->ogm_cnt_lock);
- if (bcast_own_sum_orig >= bcast_own_sum_neigh)
+ if (sum_orig >= sum_neigh)
goto update_tt;
}
@@ -835,8 +847,10 @@ static int batadv_iv_ogm_calc_tq(struct batadv_orig_node *orig_node,
spin_unlock_bh(&orig_node->ogm_cnt_lock);
/* pay attention to not get a value bigger than 100 % */
- total_count = (orig_eq_count > neigh_rq_count ?
- neigh_rq_count : orig_eq_count);
+ if (orig_eq_count > neigh_rq_count)
+ total_count = neigh_rq_count;
+ else
+ total_count = orig_eq_count;
/* if we have too few packets (too less data) we set tq_own to zero
* if we receive too few packets it is not considered bidirectional
@@ -910,6 +924,7 @@ batadv_iv_ogm_update_seqnos(const struct ethhdr *ethhdr,
int set_mark, ret = -1;
uint32_t seqno = ntohl(batadv_ogm_packet->seqno);
uint8_t *neigh_addr;
+ uint8_t packet_count;
orig_node = batadv_get_orig_node(bat_priv, batadv_ogm_packet->orig);
if (!orig_node)
@@ -944,9 +959,9 @@ batadv_iv_ogm_update_seqnos(const struct ethhdr *ethhdr,
tmp_neigh_node->real_bits,
seq_diff, set_mark);
- tmp_neigh_node->real_packet_count =
- bitmap_weight(tmp_neigh_node->real_bits,
- BATADV_TQ_LOCAL_WINDOW_SIZE);
+ packet_count = bitmap_weight(tmp_neigh_node->real_bits,
+ BATADV_TQ_LOCAL_WINDOW_SIZE);
+ tmp_neigh_node->real_packet_count = packet_count;
}
rcu_read_unlock();
@@ -1163,9 +1178,12 @@ static void batadv_iv_ogm_process(const struct ethhdr *ethhdr,
/* if sender is a direct neighbor the sender mac equals
* originator mac
*/
- orig_neigh_node = (is_single_hop_neigh ?
- orig_node :
- batadv_get_orig_node(bat_priv, ethhdr->h_source));
+ if (is_single_hop_neigh)
+ orig_neigh_node = orig_node;
+ else
+ orig_neigh_node = batadv_get_orig_node(bat_priv,
+ ethhdr->h_source);
+
if (!orig_neigh_node)
goto out;
@@ -1251,6 +1269,7 @@ static int batadv_iv_ogm_receive(struct sk_buff *skb,
int buff_pos = 0, packet_len;
unsigned char *tt_buff, *packet_buff;
bool ret;
+ uint8_t *packet_pos;
ret = batadv_check_management_packet(skb, if_incoming, BATADV_OGM_HLEN);
if (!ret)
@@ -1281,8 +1300,8 @@ static int batadv_iv_ogm_receive(struct sk_buff *skb,
buff_pos += BATADV_OGM_HLEN;
buff_pos += batadv_tt_len(batadv_ogm_packet->tt_num_changes);
- batadv_ogm_packet = (struct batadv_ogm_packet *)
- (packet_buff + buff_pos);
+ packet_pos = packet_buff + buff_pos;
+ batadv_ogm_packet = (struct batadv_ogm_packet *)packet_pos;
} while (batadv_iv_ogm_aggr_packet(buff_pos, packet_len,
batadv_ogm_packet->tt_num_changes));
diff --git a/net/batman-adv/bitarray.h b/net/batman-adv/bitarray.h
index a081ce1c0514..cebaae7e148b 100644
--- a/net/batman-adv/bitarray.h
+++ b/net/batman-adv/bitarray.h
@@ -20,8 +20,8 @@
#ifndef _NET_BATMAN_ADV_BITARRAY_H_
#define _NET_BATMAN_ADV_BITARRAY_H_
-/* returns true if the corresponding bit in the given seq_bits indicates true
- * and curr_seqno is within range of last_seqno
+/* Returns 1 if the corresponding bit in the given seq_bits indicates true
+ * and curr_seqno is within range of last_seqno. Otherwise returns 0.
*/
static inline int batadv_test_bit(const unsigned long *seq_bits,
uint32_t last_seqno, uint32_t curr_seqno)
@@ -32,7 +32,7 @@ static inline int batadv_test_bit(const unsigned long *seq_bits,
if (diff < 0 || diff >= BATADV_TQ_LOCAL_WINDOW_SIZE)
return 0;
else
- return test_bit(diff, seq_bits);
+ return test_bit(diff, seq_bits) != 0;
}
/* turn corresponding bit on, so we can remember that we got the packet */
diff --git a/net/batman-adv/bridge_loop_avoidance.c b/net/batman-adv/bridge_loop_avoidance.c
index 6705d35b17ce..0a9084ad19a6 100644
--- a/net/batman-adv/bridge_loop_avoidance.c
+++ b/net/batman-adv/bridge_loop_avoidance.c
@@ -133,7 +133,7 @@ static void batadv_claim_free_ref(struct batadv_claim *claim)
static struct batadv_claim *batadv_claim_hash_find(struct batadv_priv *bat_priv,
struct batadv_claim *data)
{
- struct batadv_hashtable *hash = bat_priv->claim_hash;
+ struct batadv_hashtable *hash = bat_priv->bla.claim_hash;
struct hlist_head *head;
struct hlist_node *node;
struct batadv_claim *claim;
@@ -174,7 +174,7 @@ static struct batadv_backbone_gw *
batadv_backbone_hash_find(struct batadv_priv *bat_priv,
uint8_t *addr, short vid)
{
- struct batadv_hashtable *hash = bat_priv->backbone_hash;
+ struct batadv_hashtable *hash = bat_priv->bla.backbone_hash;
struct hlist_head *head;
struct hlist_node *node;
struct batadv_backbone_gw search_entry, *backbone_gw;
@@ -218,7 +218,7 @@ batadv_bla_del_backbone_claims(struct batadv_backbone_gw *backbone_gw)
int i;
spinlock_t *list_lock; /* protects write access to the hash lists */
- hash = backbone_gw->bat_priv->claim_hash;
+ hash = backbone_gw->bat_priv->bla.claim_hash;
if (!hash)
return;
@@ -265,7 +265,7 @@ static void batadv_bla_send_claim(struct batadv_priv *bat_priv, uint8_t *mac,
if (!primary_if)
return;
- memcpy(&local_claim_dest, &bat_priv->claim_dest,
+ memcpy(&local_claim_dest, &bat_priv->bla.claim_dest,
sizeof(local_claim_dest));
local_claim_dest.type = claimtype;
@@ -281,7 +281,7 @@ static void batadv_bla_send_claim(struct batadv_priv *bat_priv, uint8_t *mac,
NULL,
/* Ethernet SRC/HW SRC: originator mac */
primary_if->net_dev->dev_addr,
- /* HW DST: FF:43:05:XX:00:00
+ /* HW DST: FF:43:05:XX:YY:YY
* with XX = claim type
* and YY:YY = group id
*/
@@ -295,7 +295,7 @@ static void batadv_bla_send_claim(struct batadv_priv *bat_priv, uint8_t *mac,
/* now we pretend that the client would have sent this ... */
switch (claimtype) {
- case BATADV_CLAIM_TYPE_ADD:
+ case BATADV_CLAIM_TYPE_CLAIM:
/* normal claim frame
* set Ethernet SRC to the clients mac
*/
@@ -303,7 +303,7 @@ static void batadv_bla_send_claim(struct batadv_priv *bat_priv, uint8_t *mac,
batadv_dbg(BATADV_DBG_BLA, bat_priv,
"bla_send_claim(): CLAIM %pM on vid %d\n", mac, vid);
break;
- case BATADV_CLAIM_TYPE_DEL:
+ case BATADV_CLAIM_TYPE_UNCLAIM:
/* unclaim frame
* set HW SRC to the clients mac
*/
@@ -323,7 +323,8 @@ static void batadv_bla_send_claim(struct batadv_priv *bat_priv, uint8_t *mac,
break;
case BATADV_CLAIM_TYPE_REQUEST:
/* request frame
- * set HW SRC to the special mac containg the crc
+ * set HW SRC and header destination to the receiving backbone
+ * gws mac
*/
memcpy(hw_src, mac, ETH_ALEN);
memcpy(ethhdr->h_dest, mac, ETH_ALEN);
@@ -339,8 +340,9 @@ static void batadv_bla_send_claim(struct batadv_priv *bat_priv, uint8_t *mac,
skb_reset_mac_header(skb);
skb->protocol = eth_type_trans(skb, soft_iface);
- bat_priv->stats.rx_packets++;
- bat_priv->stats.rx_bytes += skb->len + ETH_HLEN;
+ batadv_inc_counter(bat_priv, BATADV_CNT_RX);
+ batadv_add_counter(bat_priv, BATADV_CNT_RX_BYTES,
+ skb->len + ETH_HLEN);
soft_iface->last_rx = jiffies;
netif_rx(skb);
@@ -389,7 +391,7 @@ batadv_bla_get_backbone_gw(struct batadv_priv *bat_priv, uint8_t *orig,
/* one for the hash, one for returning */
atomic_set(&entry->refcount, 2);
- hash_added = batadv_hash_add(bat_priv->backbone_hash,
+ hash_added = batadv_hash_add(bat_priv->bla.backbone_hash,
batadv_compare_backbone_gw,
batadv_choose_backbone_gw, entry,
&entry->hash_entry);
@@ -456,7 +458,7 @@ static void batadv_bla_answer_request(struct batadv_priv *bat_priv,
if (!backbone_gw)
return;
- hash = bat_priv->claim_hash;
+ hash = bat_priv->bla.claim_hash;
for (i = 0; i < hash->size; i++) {
head = &hash->table[i];
@@ -467,7 +469,7 @@ static void batadv_bla_answer_request(struct batadv_priv *bat_priv,
continue;
batadv_bla_send_claim(bat_priv, claim->addr, claim->vid,
- BATADV_CLAIM_TYPE_ADD);
+ BATADV_CLAIM_TYPE_CLAIM);
}
rcu_read_unlock();
}
@@ -497,7 +499,7 @@ static void batadv_bla_send_request(struct batadv_backbone_gw *backbone_gw)
/* no local broadcasts should be sent or received, for now. */
if (!atomic_read(&backbone_gw->request_sent)) {
- atomic_inc(&backbone_gw->bat_priv->bla_num_requests);
+ atomic_inc(&backbone_gw->bat_priv->bla.num_requests);
atomic_set(&backbone_gw->request_sent, 1);
}
}
@@ -557,7 +559,7 @@ static void batadv_bla_add_claim(struct batadv_priv *bat_priv,
batadv_dbg(BATADV_DBG_BLA, bat_priv,
"bla_add_claim(): adding new entry %pM, vid %d to hash ...\n",
mac, vid);
- hash_added = batadv_hash_add(bat_priv->claim_hash,
+ hash_added = batadv_hash_add(bat_priv->bla.claim_hash,
batadv_compare_claim,
batadv_choose_claim, claim,
&claim->hash_entry);
@@ -577,8 +579,7 @@ static void batadv_bla_add_claim(struct batadv_priv *bat_priv,
"bla_add_claim(): changing ownership for %pM, vid %d\n",
mac, vid);
- claim->backbone_gw->crc ^=
- crc16(0, claim->addr, ETH_ALEN);
+ claim->backbone_gw->crc ^= crc16(0, claim->addr, ETH_ALEN);
batadv_backbone_gw_free_ref(claim->backbone_gw);
}
@@ -610,7 +611,7 @@ static void batadv_bla_del_claim(struct batadv_priv *bat_priv,
batadv_dbg(BATADV_DBG_BLA, bat_priv, "bla_del_claim(): %pM, vid %d\n",
mac, vid);
- batadv_hash_remove(bat_priv->claim_hash, batadv_compare_claim,
+ batadv_hash_remove(bat_priv->bla.claim_hash, batadv_compare_claim,
batadv_choose_claim, claim);
batadv_claim_free_ref(claim); /* reference from the hash is gone */
@@ -657,7 +658,7 @@ static int batadv_handle_announce(struct batadv_priv *bat_priv,
* we can allow traffic again.
*/
if (atomic_read(&backbone_gw->request_sent)) {
- atomic_dec(&backbone_gw->bat_priv->bla_num_requests);
+ atomic_dec(&backbone_gw->bat_priv->bla.num_requests);
atomic_set(&backbone_gw->request_sent, 0);
}
}
@@ -702,7 +703,7 @@ static int batadv_handle_unclaim(struct batadv_priv *bat_priv,
if (primary_if && batadv_compare_eth(backbone_addr,
primary_if->net_dev->dev_addr))
batadv_bla_send_claim(bat_priv, claim_addr, vid,
- BATADV_CLAIM_TYPE_DEL);
+ BATADV_CLAIM_TYPE_UNCLAIM);
backbone_gw = batadv_backbone_hash_find(bat_priv, backbone_addr, vid);
@@ -738,7 +739,7 @@ static int batadv_handle_claim(struct batadv_priv *bat_priv,
batadv_bla_add_claim(bat_priv, claim_addr, vid, backbone_gw);
if (batadv_compare_eth(backbone_addr, primary_if->net_dev->dev_addr))
batadv_bla_send_claim(bat_priv, claim_addr, vid,
- BATADV_CLAIM_TYPE_ADD);
+ BATADV_CLAIM_TYPE_CLAIM);
/* TODO: we could call something like tt_local_del() here. */
@@ -772,7 +773,7 @@ static int batadv_check_claim_group(struct batadv_priv *bat_priv,
struct batadv_bla_claim_dst *bla_dst, *bla_dst_own;
bla_dst = (struct batadv_bla_claim_dst *)hw_dst;
- bla_dst_own = &bat_priv->claim_dest;
+ bla_dst_own = &bat_priv->bla.claim_dest;
/* check if it is a claim packet in general */
if (memcmp(bla_dst->magic, bla_dst_own->magic,
@@ -783,12 +784,12 @@ static int batadv_check_claim_group(struct batadv_priv *bat_priv,
* otherwise assume it is in the hw_src
*/
switch (bla_dst->type) {
- case BATADV_CLAIM_TYPE_ADD:
+ case BATADV_CLAIM_TYPE_CLAIM:
backbone_addr = hw_src;
break;
case BATADV_CLAIM_TYPE_REQUEST:
case BATADV_CLAIM_TYPE_ANNOUNCE:
- case BATADV_CLAIM_TYPE_DEL:
+ case BATADV_CLAIM_TYPE_UNCLAIM:
backbone_addr = ethhdr->h_source;
break;
default:
@@ -904,12 +905,12 @@ static int batadv_bla_process_claim(struct batadv_priv *bat_priv,
/* check for the different types of claim frames ... */
switch (bla_dst->type) {
- case BATADV_CLAIM_TYPE_ADD:
+ case BATADV_CLAIM_TYPE_CLAIM:
if (batadv_handle_claim(bat_priv, primary_if, hw_src,
ethhdr->h_source, vid))
return 1;
break;
- case BATADV_CLAIM_TYPE_DEL:
+ case BATADV_CLAIM_TYPE_UNCLAIM:
if (batadv_handle_unclaim(bat_priv, primary_if,
ethhdr->h_source, hw_src, vid))
return 1;
@@ -945,7 +946,7 @@ static void batadv_bla_purge_backbone_gw(struct batadv_priv *bat_priv, int now)
spinlock_t *list_lock; /* protects write access to the hash lists */
int i;
- hash = bat_priv->backbone_hash;
+ hash = bat_priv->bla.backbone_hash;
if (!hash)
return;
@@ -969,7 +970,7 @@ static void batadv_bla_purge_backbone_gw(struct batadv_priv *bat_priv, int now)
purge_now:
/* don't wait for the pending request anymore */
if (atomic_read(&backbone_gw->request_sent))
- atomic_dec(&bat_priv->bla_num_requests);
+ atomic_dec(&bat_priv->bla.num_requests);
batadv_bla_del_backbone_claims(backbone_gw);
@@ -999,7 +1000,7 @@ static void batadv_bla_purge_claims(struct batadv_priv *bat_priv,
struct batadv_hashtable *hash;
int i;
- hash = bat_priv->claim_hash;
+ hash = bat_priv->bla.claim_hash;
if (!hash)
return;
@@ -1046,11 +1047,12 @@ void batadv_bla_update_orig_address(struct batadv_priv *bat_priv,
struct hlist_node *node;
struct hlist_head *head;
struct batadv_hashtable *hash;
+ __be16 group;
int i;
/* reset bridge loop avoidance group id */
- bat_priv->claim_dest.group =
- htons(crc16(0, primary_if->net_dev->dev_addr, ETH_ALEN));
+ group = htons(crc16(0, primary_if->net_dev->dev_addr, ETH_ALEN));
+ bat_priv->bla.claim_dest.group = group;
if (!oldif) {
batadv_bla_purge_claims(bat_priv, NULL, 1);
@@ -1058,7 +1060,7 @@ void batadv_bla_update_orig_address(struct batadv_priv *bat_priv,
return;
}
- hash = bat_priv->backbone_hash;
+ hash = bat_priv->bla.backbone_hash;
if (!hash)
return;
@@ -1088,8 +1090,8 @@ void batadv_bla_update_orig_address(struct batadv_priv *bat_priv,
/* (re)start the timer */
static void batadv_bla_start_timer(struct batadv_priv *bat_priv)
{
- INIT_DELAYED_WORK(&bat_priv->bla_work, batadv_bla_periodic_work);
- queue_delayed_work(batadv_event_workqueue, &bat_priv->bla_work,
+ INIT_DELAYED_WORK(&bat_priv->bla.work, batadv_bla_periodic_work);
+ queue_delayed_work(batadv_event_workqueue, &bat_priv->bla.work,
msecs_to_jiffies(BATADV_BLA_PERIOD_LENGTH));
}
@@ -1099,9 +1101,9 @@ static void batadv_bla_start_timer(struct batadv_priv *bat_priv)
*/
static void batadv_bla_periodic_work(struct work_struct *work)
{
- struct delayed_work *delayed_work =
- container_of(work, struct delayed_work, work);
+ struct delayed_work *delayed_work;
struct batadv_priv *bat_priv;
+ struct batadv_priv_bla *priv_bla;
struct hlist_node *node;
struct hlist_head *head;
struct batadv_backbone_gw *backbone_gw;
@@ -1109,7 +1111,9 @@ static void batadv_bla_periodic_work(struct work_struct *work)
struct batadv_hard_iface *primary_if;
int i;
- bat_priv = container_of(delayed_work, struct batadv_priv, bla_work);
+ delayed_work = container_of(work, struct delayed_work, work);
+ priv_bla = container_of(delayed_work, struct batadv_priv_bla, work);
+ bat_priv = container_of(priv_bla, struct batadv_priv, bla);
primary_if = batadv_primary_if_get_selected(bat_priv);
if (!primary_if)
goto out;
@@ -1120,7 +1124,7 @@ static void batadv_bla_periodic_work(struct work_struct *work)
if (!atomic_read(&bat_priv->bridge_loop_avoidance))
goto out;
- hash = bat_priv->backbone_hash;
+ hash = bat_priv->bla.backbone_hash;
if (!hash)
goto out;
@@ -1160,40 +1164,41 @@ int batadv_bla_init(struct batadv_priv *bat_priv)
int i;
uint8_t claim_dest[ETH_ALEN] = {0xff, 0x43, 0x05, 0x00, 0x00, 0x00};
struct batadv_hard_iface *primary_if;
+ uint16_t crc;
+ unsigned long entrytime;
batadv_dbg(BATADV_DBG_BLA, bat_priv, "bla hash registering\n");
/* setting claim destination address */
- memcpy(&bat_priv->claim_dest.magic, claim_dest, 3);
- bat_priv->claim_dest.type = 0;
+ memcpy(&bat_priv->bla.claim_dest.magic, claim_dest, 3);
+ bat_priv->bla.claim_dest.type = 0;
primary_if = batadv_primary_if_get_selected(bat_priv);
if (primary_if) {
- bat_priv->claim_dest.group =
- htons(crc16(0, primary_if->net_dev->dev_addr,
- ETH_ALEN));
+ crc = crc16(0, primary_if->net_dev->dev_addr, ETH_ALEN);
+ bat_priv->bla.claim_dest.group = htons(crc);
batadv_hardif_free_ref(primary_if);
} else {
- bat_priv->claim_dest.group = 0; /* will be set later */
+ bat_priv->bla.claim_dest.group = 0; /* will be set later */
}
/* initialize the duplicate list */
+ entrytime = jiffies - msecs_to_jiffies(BATADV_DUPLIST_TIMEOUT);
for (i = 0; i < BATADV_DUPLIST_SIZE; i++)
- bat_priv->bcast_duplist[i].entrytime =
- jiffies - msecs_to_jiffies(BATADV_DUPLIST_TIMEOUT);
- bat_priv->bcast_duplist_curr = 0;
+ bat_priv->bla.bcast_duplist[i].entrytime = entrytime;
+ bat_priv->bla.bcast_duplist_curr = 0;
- if (bat_priv->claim_hash)
+ if (bat_priv->bla.claim_hash)
return 0;
- bat_priv->claim_hash = batadv_hash_new(128);
- bat_priv->backbone_hash = batadv_hash_new(32);
+ bat_priv->bla.claim_hash = batadv_hash_new(128);
+ bat_priv->bla.backbone_hash = batadv_hash_new(32);
- if (!bat_priv->claim_hash || !bat_priv->backbone_hash)
+ if (!bat_priv->bla.claim_hash || !bat_priv->bla.backbone_hash)
return -ENOMEM;
- batadv_hash_set_lock_class(bat_priv->claim_hash,
+ batadv_hash_set_lock_class(bat_priv->bla.claim_hash,
&batadv_claim_hash_lock_class_key);
- batadv_hash_set_lock_class(bat_priv->backbone_hash,
+ batadv_hash_set_lock_class(bat_priv->bla.backbone_hash,
&batadv_backbone_hash_lock_class_key);
batadv_dbg(BATADV_DBG_BLA, bat_priv, "bla hashes initialized\n");
@@ -1234,8 +1239,9 @@ int batadv_bla_check_bcast_duplist(struct batadv_priv *bat_priv,
crc = crc16(0, content, length);
for (i = 0; i < BATADV_DUPLIST_SIZE; i++) {
- curr = (bat_priv->bcast_duplist_curr + i) % BATADV_DUPLIST_SIZE;
- entry = &bat_priv->bcast_duplist[curr];
+ curr = (bat_priv->bla.bcast_duplist_curr + i);
+ curr %= BATADV_DUPLIST_SIZE;
+ entry = &bat_priv->bla.bcast_duplist[curr];
/* we can stop searching if the entry is too old ;
* later entries will be even older
@@ -1256,13 +1262,13 @@ int batadv_bla_check_bcast_duplist(struct batadv_priv *bat_priv,
return 1;
}
/* not found, add a new entry (overwrite the oldest entry) */
- curr = (bat_priv->bcast_duplist_curr + BATADV_DUPLIST_SIZE - 1);
+ curr = (bat_priv->bla.bcast_duplist_curr + BATADV_DUPLIST_SIZE - 1);
curr %= BATADV_DUPLIST_SIZE;
- entry = &bat_priv->bcast_duplist[curr];
+ entry = &bat_priv->bla.bcast_duplist[curr];
entry->crc = crc;
entry->entrytime = jiffies;
memcpy(entry->orig, bcast_packet->orig, ETH_ALEN);
- bat_priv->bcast_duplist_curr = curr;
+ bat_priv->bla.bcast_duplist_curr = curr;
/* allow it, its the first occurence. */
return 0;
@@ -1279,7 +1285,7 @@ int batadv_bla_check_bcast_duplist(struct batadv_priv *bat_priv,
*/
int batadv_bla_is_backbone_gw_orig(struct batadv_priv *bat_priv, uint8_t *orig)
{
- struct batadv_hashtable *hash = bat_priv->backbone_hash;
+ struct batadv_hashtable *hash = bat_priv->bla.backbone_hash;
struct hlist_head *head;
struct hlist_node *node;
struct batadv_backbone_gw *backbone_gw;
@@ -1339,8 +1345,7 @@ int batadv_bla_is_backbone_gw(struct sk_buff *skb,
if (!pskb_may_pull(skb, hdr_size + sizeof(struct vlan_ethhdr)))
return 0;
- vhdr = (struct vlan_ethhdr *)(((uint8_t *)skb->data) +
- hdr_size);
+ vhdr = (struct vlan_ethhdr *)(skb->data + hdr_size);
vid = ntohs(vhdr->h_vlan_TCI) & VLAN_VID_MASK;
}
@@ -1359,18 +1364,18 @@ void batadv_bla_free(struct batadv_priv *bat_priv)
{
struct batadv_hard_iface *primary_if;
- cancel_delayed_work_sync(&bat_priv->bla_work);
+ cancel_delayed_work_sync(&bat_priv->bla.work);
primary_if = batadv_primary_if_get_selected(bat_priv);
- if (bat_priv->claim_hash) {
+ if (bat_priv->bla.claim_hash) {
batadv_bla_purge_claims(bat_priv, primary_if, 1);
- batadv_hash_destroy(bat_priv->claim_hash);
- bat_priv->claim_hash = NULL;
+ batadv_hash_destroy(bat_priv->bla.claim_hash);
+ bat_priv->bla.claim_hash = NULL;
}
- if (bat_priv->backbone_hash) {
+ if (bat_priv->bla.backbone_hash) {
batadv_bla_purge_backbone_gw(bat_priv, 1);
- batadv_hash_destroy(bat_priv->backbone_hash);
- bat_priv->backbone_hash = NULL;
+ batadv_hash_destroy(bat_priv->bla.backbone_hash);
+ bat_priv->bla.backbone_hash = NULL;
}
if (primary_if)
batadv_hardif_free_ref(primary_if);
@@ -1409,7 +1414,7 @@ int batadv_bla_rx(struct batadv_priv *bat_priv, struct sk_buff *skb, short vid,
goto allow;
- if (unlikely(atomic_read(&bat_priv->bla_num_requests)))
+ if (unlikely(atomic_read(&bat_priv->bla.num_requests)))
/* don't allow broadcasts while requests are in flight */
if (is_multicast_ether_addr(ethhdr->h_dest) && is_bcast)
goto handled;
@@ -1508,7 +1513,7 @@ int batadv_bla_tx(struct batadv_priv *bat_priv, struct sk_buff *skb, short vid)
ethhdr = (struct ethhdr *)skb_mac_header(skb);
- if (unlikely(atomic_read(&bat_priv->bla_num_requests)))
+ if (unlikely(atomic_read(&bat_priv->bla.num_requests)))
/* don't allow broadcasts while requests are in flight */
if (is_multicast_ether_addr(ethhdr->h_dest))
goto handled;
@@ -1564,7 +1569,7 @@ int batadv_bla_claim_table_seq_print_text(struct seq_file *seq, void *offset)
{
struct net_device *net_dev = (struct net_device *)seq->private;
struct batadv_priv *bat_priv = netdev_priv(net_dev);
- struct batadv_hashtable *hash = bat_priv->claim_hash;
+ struct batadv_hashtable *hash = bat_priv->bla.claim_hash;
struct batadv_claim *claim;
struct batadv_hard_iface *primary_if;
struct hlist_node *node;
@@ -1593,7 +1598,7 @@ int batadv_bla_claim_table_seq_print_text(struct seq_file *seq, void *offset)
seq_printf(seq,
"Claims announced for the mesh %s (orig %pM, group id %04x)\n",
net_dev->name, primary_addr,
- ntohs(bat_priv->claim_dest.group));
+ ntohs(bat_priv->bla.claim_dest.group));
seq_printf(seq, " %-17s %-5s %-17s [o] (%-4s)\n",
"Client", "VID", "Originator", "CRC");
for (i = 0; i < hash->size; i++) {
@@ -1616,3 +1621,68 @@ out:
batadv_hardif_free_ref(primary_if);
return ret;
}
+
+int batadv_bla_backbone_table_seq_print_text(struct seq_file *seq, void *offset)
+{
+ struct net_device *net_dev = (struct net_device *)seq->private;
+ struct batadv_priv *bat_priv = netdev_priv(net_dev);
+ struct batadv_hashtable *hash = bat_priv->bla.backbone_hash;
+ struct batadv_backbone_gw *backbone_gw;
+ struct batadv_hard_iface *primary_if;
+ struct hlist_node *node;
+ struct hlist_head *head;
+ int secs, msecs;
+ uint32_t i;
+ bool is_own;
+ int ret = 0;
+ uint8_t *primary_addr;
+
+ primary_if = batadv_primary_if_get_selected(bat_priv);
+ if (!primary_if) {
+ ret = seq_printf(seq,
+ "BATMAN mesh %s disabled - please specify interfaces to enable it\n",
+ net_dev->name);
+ goto out;
+ }
+
+ if (primary_if->if_status != BATADV_IF_ACTIVE) {
+ ret = seq_printf(seq,
+ "BATMAN mesh %s disabled - primary interface not active\n",
+ net_dev->name);
+ goto out;
+ }
+
+ primary_addr = primary_if->net_dev->dev_addr;
+ seq_printf(seq,
+ "Backbones announced for the mesh %s (orig %pM, group id %04x)\n",
+ net_dev->name, primary_addr,
+ ntohs(bat_priv->bla.claim_dest.group));
+ seq_printf(seq, " %-17s %-5s %-9s (%-4s)\n",
+ "Originator", "VID", "last seen", "CRC");
+ for (i = 0; i < hash->size; i++) {
+ head = &hash->table[i];
+
+ rcu_read_lock();
+ hlist_for_each_entry_rcu(backbone_gw, node, head, hash_entry) {
+ msecs = jiffies_to_msecs(jiffies -
+ backbone_gw->lasttime);
+ secs = msecs / 1000;
+ msecs = msecs % 1000;
+
+ is_own = batadv_compare_eth(backbone_gw->orig,
+ primary_addr);
+ if (is_own)
+ continue;
+
+ seq_printf(seq,
+ " * %pM on % 5d % 4i.%03is (%04x)\n",
+ backbone_gw->orig, backbone_gw->vid,
+ secs, msecs, backbone_gw->crc);
+ }
+ rcu_read_unlock();
+ }
+out:
+ if (primary_if)
+ batadv_hardif_free_ref(primary_if);
+ return ret;
+}
diff --git a/net/batman-adv/bridge_loop_avoidance.h b/net/batman-adv/bridge_loop_avoidance.h
index 563cfbf94a7f..789cb73bde67 100644
--- a/net/batman-adv/bridge_loop_avoidance.h
+++ b/net/batman-adv/bridge_loop_avoidance.h
@@ -27,6 +27,8 @@ int batadv_bla_tx(struct batadv_priv *bat_priv, struct sk_buff *skb, short vid);
int batadv_bla_is_backbone_gw(struct sk_buff *skb,
struct batadv_orig_node *orig_node, int hdr_size);
int batadv_bla_claim_table_seq_print_text(struct seq_file *seq, void *offset);
+int batadv_bla_backbone_table_seq_print_text(struct seq_file *seq,
+ void *offset);
int batadv_bla_is_backbone_gw_orig(struct batadv_priv *bat_priv, uint8_t *orig);
int batadv_bla_check_bcast_duplist(struct batadv_priv *bat_priv,
struct batadv_bcast_packet *bcast_packet,
@@ -41,8 +43,7 @@ void batadv_bla_free(struct batadv_priv *bat_priv);
#else /* ifdef CONFIG_BATMAN_ADV_BLA */
static inline int batadv_bla_rx(struct batadv_priv *bat_priv,
- struct sk_buff *skb, short vid,
- bool is_bcast)
+ struct sk_buff *skb, short vid, bool is_bcast)
{
return 0;
}
@@ -66,6 +67,12 @@ static inline int batadv_bla_claim_table_seq_print_text(struct seq_file *seq,
return 0;
}
+static inline int batadv_bla_backbone_table_seq_print_text(struct seq_file *seq,
+ void *offset)
+{
+ return 0;
+}
+
static inline int batadv_bla_is_backbone_gw_orig(struct batadv_priv *bat_priv,
uint8_t *orig)
{
diff --git a/net/batman-adv/debugfs.c b/net/batman-adv/debugfs.c
index 34fbb1667bcd..391d4fb2026f 100644
--- a/net/batman-adv/debugfs.c
+++ b/net/batman-adv/debugfs.c
@@ -267,6 +267,15 @@ static int batadv_bla_claim_table_open(struct inode *inode, struct file *file)
return single_open(file, batadv_bla_claim_table_seq_print_text,
net_dev);
}
+
+static int batadv_bla_backbone_table_open(struct inode *inode,
+ struct file *file)
+{
+ struct net_device *net_dev = (struct net_device *)inode->i_private;
+ return single_open(file, batadv_bla_backbone_table_seq_print_text,
+ net_dev);
+}
+
#endif
static int batadv_transtable_local_open(struct inode *inode, struct file *file)
@@ -305,6 +314,8 @@ static BATADV_DEBUGINFO(transtable_global, S_IRUGO,
batadv_transtable_global_open);
#ifdef CONFIG_BATMAN_ADV_BLA
static BATADV_DEBUGINFO(bla_claim_table, S_IRUGO, batadv_bla_claim_table_open);
+static BATADV_DEBUGINFO(bla_backbone_table, S_IRUGO,
+ batadv_bla_backbone_table_open);
#endif
static BATADV_DEBUGINFO(transtable_local, S_IRUGO,
batadv_transtable_local_open);
@@ -316,6 +327,7 @@ static struct batadv_debuginfo *batadv_mesh_debuginfos[] = {
&batadv_debuginfo_transtable_global,
#ifdef CONFIG_BATMAN_ADV_BLA
&batadv_debuginfo_bla_claim_table,
+ &batadv_debuginfo_bla_backbone_table,
#endif
&batadv_debuginfo_transtable_local,
&batadv_debuginfo_vis_data,
diff --git a/net/batman-adv/gateway_client.c b/net/batman-adv/gateway_client.c
index b421cc49d2cd..15d67abc10a4 100644
--- a/net/batman-adv/gateway_client.c
+++ b/net/batman-adv/gateway_client.c
@@ -48,7 +48,7 @@ batadv_gw_get_selected_gw_node(struct batadv_priv *bat_priv)
struct batadv_gw_node *gw_node;
rcu_read_lock();
- gw_node = rcu_dereference(bat_priv->curr_gw);
+ gw_node = rcu_dereference(bat_priv->gw.curr_gw);
if (!gw_node)
goto out;
@@ -91,23 +91,23 @@ static void batadv_gw_select(struct batadv_priv *bat_priv,
{
struct batadv_gw_node *curr_gw_node;
- spin_lock_bh(&bat_priv->gw_list_lock);
+ spin_lock_bh(&bat_priv->gw.list_lock);
if (new_gw_node && !atomic_inc_not_zero(&new_gw_node->refcount))
new_gw_node = NULL;
- curr_gw_node = rcu_dereference_protected(bat_priv->curr_gw, 1);
- rcu_assign_pointer(bat_priv->curr_gw, new_gw_node);
+ curr_gw_node = rcu_dereference_protected(bat_priv->gw.curr_gw, 1);
+ rcu_assign_pointer(bat_priv->gw.curr_gw, new_gw_node);
if (curr_gw_node)
batadv_gw_node_free_ref(curr_gw_node);
- spin_unlock_bh(&bat_priv->gw_list_lock);
+ spin_unlock_bh(&bat_priv->gw.list_lock);
}
void batadv_gw_deselect(struct batadv_priv *bat_priv)
{
- atomic_set(&bat_priv->gw_reselect, 1);
+ atomic_set(&bat_priv->gw.reselect, 1);
}
static struct batadv_gw_node *
@@ -117,12 +117,17 @@ batadv_gw_get_best_gw_node(struct batadv_priv *bat_priv)
struct hlist_node *node;
struct batadv_gw_node *gw_node, *curr_gw = NULL;
uint32_t max_gw_factor = 0, tmp_gw_factor = 0;
+ uint32_t gw_divisor;
uint8_t max_tq = 0;
int down, up;
+ uint8_t tq_avg;
struct batadv_orig_node *orig_node;
+ gw_divisor = BATADV_TQ_LOCAL_WINDOW_SIZE * BATADV_TQ_LOCAL_WINDOW_SIZE;
+ gw_divisor *= 64;
+
rcu_read_lock();
- hlist_for_each_entry_rcu(gw_node, node, &bat_priv->gw_list, list) {
+ hlist_for_each_entry_rcu(gw_node, node, &bat_priv->gw.list, list) {
if (gw_node->deleted)
continue;
@@ -134,19 +139,19 @@ batadv_gw_get_best_gw_node(struct batadv_priv *bat_priv)
if (!atomic_inc_not_zero(&gw_node->refcount))
goto next;
+ tq_avg = router->tq_avg;
+
switch (atomic_read(&bat_priv->gw_sel_class)) {
case 1: /* fast connection */
batadv_gw_bandwidth_to_kbit(orig_node->gw_flags,
&down, &up);
- tmp_gw_factor = (router->tq_avg * router->tq_avg *
- down * 100 * 100) /
- (BATADV_TQ_LOCAL_WINDOW_SIZE *
- BATADV_TQ_LOCAL_WINDOW_SIZE * 64);
+ tmp_gw_factor = tq_avg * tq_avg * down * 100 * 100;
+ tmp_gw_factor /= gw_divisor;
if ((tmp_gw_factor > max_gw_factor) ||
((tmp_gw_factor == max_gw_factor) &&
- (router->tq_avg > max_tq))) {
+ (tq_avg > max_tq))) {
if (curr_gw)
batadv_gw_node_free_ref(curr_gw);
curr_gw = gw_node;
@@ -161,7 +166,7 @@ batadv_gw_get_best_gw_node(struct batadv_priv *bat_priv)
* soon as a better gateway appears which has
* $routing_class more tq points)
*/
- if (router->tq_avg > max_tq) {
+ if (tq_avg > max_tq) {
if (curr_gw)
batadv_gw_node_free_ref(curr_gw);
curr_gw = gw_node;
@@ -170,8 +175,8 @@ batadv_gw_get_best_gw_node(struct batadv_priv *bat_priv)
break;
}
- if (router->tq_avg > max_tq)
- max_tq = router->tq_avg;
+ if (tq_avg > max_tq)
+ max_tq = tq_avg;
if (tmp_gw_factor > max_gw_factor)
max_gw_factor = tmp_gw_factor;
@@ -200,11 +205,11 @@ void batadv_gw_election(struct batadv_priv *bat_priv)
if (atomic_read(&bat_priv->gw_mode) != BATADV_GW_MODE_CLIENT)
goto out;
- if (!batadv_atomic_dec_not_zero(&bat_priv->gw_reselect))
- goto out;
-
curr_gw = batadv_gw_get_selected_gw_node(bat_priv);
+ if (!batadv_atomic_dec_not_zero(&bat_priv->gw.reselect) && curr_gw)
+ goto out;
+
next_gw = batadv_gw_get_best_gw_node(bat_priv);
if (curr_gw == next_gw)
@@ -321,9 +326,9 @@ static void batadv_gw_node_add(struct batadv_priv *bat_priv,
gw_node->orig_node = orig_node;
atomic_set(&gw_node->refcount, 1);
- spin_lock_bh(&bat_priv->gw_list_lock);
- hlist_add_head_rcu(&gw_node->list, &bat_priv->gw_list);
- spin_unlock_bh(&bat_priv->gw_list_lock);
+ spin_lock_bh(&bat_priv->gw.list_lock);
+ hlist_add_head_rcu(&gw_node->list, &bat_priv->gw.list);
+ spin_unlock_bh(&bat_priv->gw.list_lock);
batadv_gw_bandwidth_to_kbit(new_gwflags, &down, &up);
batadv_dbg(BATADV_DBG_BATMAN, bat_priv,
@@ -350,7 +355,7 @@ void batadv_gw_node_update(struct batadv_priv *bat_priv,
curr_gw = batadv_gw_get_selected_gw_node(bat_priv);
rcu_read_lock();
- hlist_for_each_entry_rcu(gw_node, node, &bat_priv->gw_list, list) {
+ hlist_for_each_entry_rcu(gw_node, node, &bat_priv->gw.list, list) {
if (gw_node->orig_node != orig_node)
continue;
@@ -404,10 +409,10 @@ void batadv_gw_node_purge(struct batadv_priv *bat_priv)
curr_gw = batadv_gw_get_selected_gw_node(bat_priv);
- spin_lock_bh(&bat_priv->gw_list_lock);
+ spin_lock_bh(&bat_priv->gw.list_lock);
hlist_for_each_entry_safe(gw_node, node, node_tmp,
- &bat_priv->gw_list, list) {
+ &bat_priv->gw.list, list) {
if (((!gw_node->deleted) ||
(time_before(jiffies, gw_node->deleted + timeout))) &&
atomic_read(&bat_priv->mesh_state) == BATADV_MESH_ACTIVE)
@@ -420,7 +425,7 @@ void batadv_gw_node_purge(struct batadv_priv *bat_priv)
batadv_gw_node_free_ref(gw_node);
}
- spin_unlock_bh(&bat_priv->gw_list_lock);
+ spin_unlock_bh(&bat_priv->gw.list_lock);
/* gw_deselect() needs to acquire the gw_list_lock */
if (do_deselect)
@@ -496,7 +501,7 @@ int batadv_gw_client_seq_print_text(struct seq_file *seq, void *offset)
primary_if->net_dev->dev_addr, net_dev->name);
rcu_read_lock();
- hlist_for_each_entry_rcu(gw_node, node, &bat_priv->gw_list, list) {
+ hlist_for_each_entry_rcu(gw_node, node, &bat_priv->gw.list, list) {
if (gw_node->deleted)
continue;
diff --git a/net/batman-adv/hard-interface.c b/net/batman-adv/hard-interface.c
index 282bf6e9353e..d112fd6750b0 100644
--- a/net/batman-adv/hard-interface.c
+++ b/net/batman-adv/hard-interface.c
@@ -103,13 +103,14 @@ static void batadv_primary_if_update_addr(struct batadv_priv *bat_priv,
{
struct batadv_vis_packet *vis_packet;
struct batadv_hard_iface *primary_if;
+ struct sk_buff *skb;
primary_if = batadv_primary_if_get_selected(bat_priv);
if (!primary_if)
goto out;
- vis_packet = (struct batadv_vis_packet *)
- bat_priv->my_vis_info->skb_packet->data;
+ skb = bat_priv->vis.my_info->skb_packet;
+ vis_packet = (struct batadv_vis_packet *)skb->data;
memcpy(vis_packet->vis_orig, primary_if->net_dev->dev_addr, ETH_ALEN);
memcpy(vis_packet->sender_orig,
primary_if->net_dev->dev_addr, ETH_ALEN);
@@ -313,7 +314,13 @@ int batadv_hardif_enable_interface(struct batadv_hard_iface *hard_iface,
hard_iface->if_num = bat_priv->num_ifaces;
bat_priv->num_ifaces++;
hard_iface->if_status = BATADV_IF_INACTIVE;
- batadv_orig_hash_add_if(hard_iface, bat_priv->num_ifaces);
+ ret = batadv_orig_hash_add_if(hard_iface, bat_priv->num_ifaces);
+ if (ret < 0) {
+ bat_priv->bat_algo_ops->bat_iface_disable(hard_iface);
+ bat_priv->num_ifaces--;
+ hard_iface->if_status = BATADV_IF_NOT_IN_USE;
+ goto err_dev;
+ }
hard_iface->batman_adv_ptype.type = ethertype;
hard_iface->batman_adv_ptype.func = batadv_batman_skb_recv;
diff --git a/net/batman-adv/main.c b/net/batman-adv/main.c
index 13c88b25ab31..b4aa470bc4a6 100644
--- a/net/batman-adv/main.c
+++ b/net/batman-adv/main.c
@@ -58,9 +58,6 @@ static int __init batadv_init(void)
batadv_iv_init();
- /* the name should not be longer than 10 chars - see
- * http://lwn.net/Articles/23634/
- */
batadv_event_workqueue = create_singlethread_workqueue("bat_events");
if (!batadv_event_workqueue)
@@ -97,20 +94,20 @@ int batadv_mesh_init(struct net_device *soft_iface)
spin_lock_init(&bat_priv->forw_bat_list_lock);
spin_lock_init(&bat_priv->forw_bcast_list_lock);
- spin_lock_init(&bat_priv->tt_changes_list_lock);
- spin_lock_init(&bat_priv->tt_req_list_lock);
- spin_lock_init(&bat_priv->tt_roam_list_lock);
- spin_lock_init(&bat_priv->tt_buff_lock);
- spin_lock_init(&bat_priv->gw_list_lock);
- spin_lock_init(&bat_priv->vis_hash_lock);
- spin_lock_init(&bat_priv->vis_list_lock);
+ spin_lock_init(&bat_priv->tt.changes_list_lock);
+ spin_lock_init(&bat_priv->tt.req_list_lock);
+ spin_lock_init(&bat_priv->tt.roam_list_lock);
+ spin_lock_init(&bat_priv->tt.last_changeset_lock);
+ spin_lock_init(&bat_priv->gw.list_lock);
+ spin_lock_init(&bat_priv->vis.hash_lock);
+ spin_lock_init(&bat_priv->vis.list_lock);
INIT_HLIST_HEAD(&bat_priv->forw_bat_list);
INIT_HLIST_HEAD(&bat_priv->forw_bcast_list);
- INIT_HLIST_HEAD(&bat_priv->gw_list);
- INIT_LIST_HEAD(&bat_priv->tt_changes_list);
- INIT_LIST_HEAD(&bat_priv->tt_req_list);
- INIT_LIST_HEAD(&bat_priv->tt_roam_list);
+ INIT_HLIST_HEAD(&bat_priv->gw.list);
+ INIT_LIST_HEAD(&bat_priv->tt.changes_list);
+ INIT_LIST_HEAD(&bat_priv->tt.req_list);
+ INIT_LIST_HEAD(&bat_priv->tt.roam_list);
ret = batadv_originator_init(bat_priv);
if (ret < 0)
@@ -131,7 +128,7 @@ int batadv_mesh_init(struct net_device *soft_iface)
if (ret < 0)
goto err;
- atomic_set(&bat_priv->gw_reselect, 0);
+ atomic_set(&bat_priv->gw.reselect, 0);
atomic_set(&bat_priv->mesh_state, BATADV_MESH_ACTIVE);
return 0;
diff --git a/net/batman-adv/main.h b/net/batman-adv/main.h
index 5d8fa0757947..d57b746219de 100644
--- a/net/batman-adv/main.h
+++ b/net/batman-adv/main.h
@@ -26,7 +26,7 @@
#define BATADV_DRIVER_DEVICE "batman-adv"
#ifndef BATADV_SOURCE_VERSION
-#define BATADV_SOURCE_VERSION "2012.3.0"
+#define BATADV_SOURCE_VERSION "2012.4.0"
#endif
/* B.A.T.M.A.N. parameters */
@@ -41,13 +41,14 @@
* -> TODO: check influence on BATADV_TQ_LOCAL_WINDOW_SIZE
*/
#define BATADV_PURGE_TIMEOUT 200000 /* 200 seconds */
-#define BATADV_TT_LOCAL_TIMEOUT 3600000 /* in miliseconds */
-#define BATADV_TT_CLIENT_ROAM_TIMEOUT 600000 /* in miliseconds */
+#define BATADV_TT_LOCAL_TIMEOUT 3600000 /* in milliseconds */
+#define BATADV_TT_CLIENT_ROAM_TIMEOUT 600000 /* in milliseconds */
+#define BATADV_TT_CLIENT_TEMP_TIMEOUT 600000 /* in milliseconds */
/* sliding packet range of received originator messages in sequence numbers
* (should be a multiple of our word size)
*/
#define BATADV_TQ_LOCAL_WINDOW_SIZE 64
-/* miliseconds we have to keep pending tt_req */
+/* milliseconds we have to keep pending tt_req */
#define BATADV_TT_REQUEST_TIMEOUT 3000
#define BATADV_TQ_GLOBAL_WINDOW_SIZE 5
@@ -59,7 +60,7 @@
#define BATADV_TT_OGM_APPEND_MAX 3
/* Time in which a client can roam at most ROAMING_MAX_COUNT times in
- * miliseconds
+ * milliseconds
*/
#define BATADV_ROAMING_MAX_TIME 20000
#define BATADV_ROAMING_MAX_COUNT 5
@@ -123,15 +124,6 @@ enum batadv_uev_type {
/* Append 'batman-adv: ' before kernel messages */
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
-/* all messages related to routing / flooding / broadcasting / etc */
-enum batadv_dbg_level {
- BATADV_DBG_BATMAN = 1 << 0,
- BATADV_DBG_ROUTES = 1 << 1, /* route added / changed / deleted */
- BATADV_DBG_TT = 1 << 2, /* translation table operations */
- BATADV_DBG_BLA = 1 << 3, /* bridge loop avoidance */
- BATADV_DBG_ALL = 15,
-};
-
/* Kernel headers */
#include <linux/mutex.h> /* mutex */
@@ -173,6 +165,15 @@ int batadv_algo_register(struct batadv_algo_ops *bat_algo_ops);
int batadv_algo_select(struct batadv_priv *bat_priv, char *name);
int batadv_algo_seq_print_text(struct seq_file *seq, void *offset);
+/* all messages related to routing / flooding / broadcasting / etc */
+enum batadv_dbg_level {
+ BATADV_DBG_BATMAN = BIT(0),
+ BATADV_DBG_ROUTES = BIT(1), /* route added / changed / deleted */
+ BATADV_DBG_TT = BIT(2), /* translation table operations */
+ BATADV_DBG_BLA = BIT(3), /* bridge loop avoidance */
+ BATADV_DBG_ALL = 15,
+};
+
#ifdef CONFIG_BATMAN_ADV_DEBUG
int batadv_debug_log(struct batadv_priv *bat_priv, const char *fmt, ...)
__printf(2, 3);
diff --git a/net/batman-adv/packet.h b/net/batman-adv/packet.h
index 8d3e55a96adc..2d23a14c220e 100644
--- a/net/batman-adv/packet.h
+++ b/net/batman-adv/packet.h
@@ -37,10 +37,10 @@ enum batadv_packettype {
#define BATADV_COMPAT_VERSION 14
enum batadv_iv_flags {
- BATADV_NOT_BEST_NEXT_HOP = 1 << 3,
- BATADV_PRIMARIES_FIRST_HOP = 1 << 4,
- BATADV_VIS_SERVER = 1 << 5,
- BATADV_DIRECTLINK = 1 << 6,
+ BATADV_NOT_BEST_NEXT_HOP = BIT(3),
+ BATADV_PRIMARIES_FIRST_HOP = BIT(4),
+ BATADV_VIS_SERVER = BIT(5),
+ BATADV_DIRECTLINK = BIT(6),
};
/* ICMP message types */
@@ -60,8 +60,8 @@ enum batadv_vis_packettype {
/* fragmentation defines */
enum batadv_unicast_frag_flags {
- BATADV_UNI_FRAG_HEAD = 1 << 0,
- BATADV_UNI_FRAG_LARGETAIL = 1 << 1,
+ BATADV_UNI_FRAG_HEAD = BIT(0),
+ BATADV_UNI_FRAG_LARGETAIL = BIT(1),
};
/* TT_QUERY subtypes */
@@ -74,26 +74,27 @@ enum batadv_tt_query_packettype {
/* TT_QUERY flags */
enum batadv_tt_query_flags {
- BATADV_TT_FULL_TABLE = 1 << 2,
+ BATADV_TT_FULL_TABLE = BIT(2),
};
/* BATADV_TT_CLIENT flags.
- * Flags from 1 to 1 << 7 are sent on the wire, while flags from 1 << 8 to
- * 1 << 15 are used for local computation only
+ * Flags from BIT(0) to BIT(7) are sent on the wire, while flags from BIT(8) to
+ * BIT(15) are used for local computation only
*/
enum batadv_tt_client_flags {
- BATADV_TT_CLIENT_DEL = 1 << 0,
- BATADV_TT_CLIENT_ROAM = 1 << 1,
- BATADV_TT_CLIENT_WIFI = 1 << 2,
- BATADV_TT_CLIENT_NOPURGE = 1 << 8,
- BATADV_TT_CLIENT_NEW = 1 << 9,
- BATADV_TT_CLIENT_PENDING = 1 << 10,
+ BATADV_TT_CLIENT_DEL = BIT(0),
+ BATADV_TT_CLIENT_ROAM = BIT(1),
+ BATADV_TT_CLIENT_WIFI = BIT(2),
+ BATADV_TT_CLIENT_TEMP = BIT(3),
+ BATADV_TT_CLIENT_NOPURGE = BIT(8),
+ BATADV_TT_CLIENT_NEW = BIT(9),
+ BATADV_TT_CLIENT_PENDING = BIT(10),
};
/* claim frame types for the bridge loop avoidance */
enum batadv_bla_claimframe {
- BATADV_CLAIM_TYPE_ADD = 0x00,
- BATADV_CLAIM_TYPE_DEL = 0x01,
+ BATADV_CLAIM_TYPE_CLAIM = 0x00,
+ BATADV_CLAIM_TYPE_UNCLAIM = 0x01,
BATADV_CLAIM_TYPE_ANNOUNCE = 0x02,
BATADV_CLAIM_TYPE_REQUEST = 0x03,
};
diff --git a/net/batman-adv/routing.c b/net/batman-adv/routing.c
index bc2b88bbea1f..939fc01371df 100644
--- a/net/batman-adv/routing.c
+++ b/net/batman-adv/routing.c
@@ -579,32 +579,45 @@ batadv_find_ifalter_router(struct batadv_orig_node *primary_orig,
return router;
}
-int batadv_recv_tt_query(struct sk_buff *skb, struct batadv_hard_iface *recv_if)
+static int batadv_check_unicast_packet(struct sk_buff *skb, int hdr_size)
{
- struct batadv_priv *bat_priv = netdev_priv(recv_if->soft_iface);
- struct batadv_tt_query_packet *tt_query;
- uint16_t tt_size;
struct ethhdr *ethhdr;
- char tt_flag;
- size_t packet_size;
/* drop packet if it has not necessary minimum size */
- if (unlikely(!pskb_may_pull(skb,
- sizeof(struct batadv_tt_query_packet))))
- goto out;
-
- /* I could need to modify it */
- if (skb_cow(skb, sizeof(struct batadv_tt_query_packet)) < 0)
- goto out;
+ if (unlikely(!pskb_may_pull(skb, hdr_size)))
+ return -1;
ethhdr = (struct ethhdr *)skb_mac_header(skb);
/* packet with unicast indication but broadcast recipient */
if (is_broadcast_ether_addr(ethhdr->h_dest))
- goto out;
+ return -1;
/* packet with broadcast sender address */
if (is_broadcast_ether_addr(ethhdr->h_source))
+ return -1;
+
+ /* not for me */
+ if (!batadv_is_my_mac(ethhdr->h_dest))
+ return -1;
+
+ return 0;
+}
+
+int batadv_recv_tt_query(struct sk_buff *skb, struct batadv_hard_iface *recv_if)
+{
+ struct batadv_priv *bat_priv = netdev_priv(recv_if->soft_iface);
+ struct batadv_tt_query_packet *tt_query;
+ uint16_t tt_size;
+ int hdr_size = sizeof(*tt_query);
+ char tt_flag;
+ size_t packet_size;
+
+ if (batadv_check_unicast_packet(skb, hdr_size) < 0)
+ return NET_RX_DROP;
+
+ /* I could need to modify it */
+ if (skb_cow(skb, sizeof(struct batadv_tt_query_packet)) < 0)
goto out;
tt_query = (struct batadv_tt_query_packet *)skb->data;
@@ -721,7 +734,7 @@ int batadv_recv_roam_adv(struct sk_buff *skb, struct batadv_hard_iface *recv_if)
* been incremented yet. This flag will make me check all the incoming
* packets for the correct destination.
*/
- bat_priv->tt_poss_change = true;
+ bat_priv->tt.poss_change = true;
batadv_orig_node_free_ref(orig_node);
out:
@@ -819,31 +832,6 @@ err:
return NULL;
}
-static int batadv_check_unicast_packet(struct sk_buff *skb, int hdr_size)
-{
- struct ethhdr *ethhdr;
-
- /* drop packet if it has not necessary minimum size */
- if (unlikely(!pskb_may_pull(skb, hdr_size)))
- return -1;
-
- ethhdr = (struct ethhdr *)skb_mac_header(skb);
-
- /* packet with unicast indication but broadcast recipient */
- if (is_broadcast_ether_addr(ethhdr->h_dest))
- return -1;
-
- /* packet with broadcast sender address */
- if (is_broadcast_ether_addr(ethhdr->h_source))
- return -1;
-
- /* not for me */
- if (!batadv_is_my_mac(ethhdr->h_dest))
- return -1;
-
- return 0;
-}
-
static int batadv_route_unicast_packet(struct sk_buff *skb,
struct batadv_hard_iface *recv_if)
{
@@ -947,8 +935,8 @@ static int batadv_check_unicast_ttvn(struct batadv_priv *bat_priv,
unicast_packet = (struct batadv_unicast_packet *)skb->data;
if (batadv_is_my_mac(unicast_packet->dest)) {
- tt_poss_change = bat_priv->tt_poss_change;
- curr_ttvn = (uint8_t)atomic_read(&bat_priv->ttvn);
+ tt_poss_change = bat_priv->tt.poss_change;
+ curr_ttvn = (uint8_t)atomic_read(&bat_priv->tt.vn);
} else {
orig_node = batadv_orig_hash_find(bat_priv,
unicast_packet->dest);
@@ -993,8 +981,7 @@ static int batadv_check_unicast_ttvn(struct batadv_priv *bat_priv,
} else {
memcpy(unicast_packet->dest, orig_node->orig,
ETH_ALEN);
- curr_ttvn = (uint8_t)
- atomic_read(&orig_node->last_ttvn);
+ curr_ttvn = (uint8_t)atomic_read(&orig_node->last_ttvn);
batadv_orig_node_free_ref(orig_node);
}
@@ -1025,8 +1012,9 @@ int batadv_recv_unicast_packet(struct sk_buff *skb,
/* packet for me */
if (batadv_is_my_mac(unicast_packet->dest)) {
- batadv_interface_rx(recv_if->soft_iface, skb, recv_if,
- hdr_size);
+ batadv_interface_rx(recv_if->soft_iface, skb, recv_if, hdr_size,
+ NULL);
+
return NET_RX_SUCCESS;
}
@@ -1063,7 +1051,7 @@ int batadv_recv_ucast_frag_packet(struct sk_buff *skb,
return NET_RX_SUCCESS;
batadv_interface_rx(recv_if->soft_iface, new_skb, recv_if,
- sizeof(struct batadv_unicast_packet));
+ sizeof(struct batadv_unicast_packet), NULL);
return NET_RX_SUCCESS;
}
@@ -1150,7 +1138,8 @@ int batadv_recv_bcast_packet(struct sk_buff *skb,
goto out;
/* broadcast for me */
- batadv_interface_rx(recv_if->soft_iface, skb, recv_if, hdr_size);
+ batadv_interface_rx(recv_if->soft_iface, skb, recv_if, hdr_size,
+ orig_node);
ret = NET_RX_SUCCESS;
goto out;
diff --git a/net/batman-adv/send.c b/net/batman-adv/send.c
index 3b4b2daa3b3e..570a8bce0364 100644
--- a/net/batman-adv/send.c
+++ b/net/batman-adv/send.c
@@ -190,13 +190,13 @@ out:
static void batadv_send_outstanding_bcast_packet(struct work_struct *work)
{
struct batadv_hard_iface *hard_iface;
- struct delayed_work *delayed_work =
- container_of(work, struct delayed_work, work);
+ struct delayed_work *delayed_work;
struct batadv_forw_packet *forw_packet;
struct sk_buff *skb1;
struct net_device *soft_iface;
struct batadv_priv *bat_priv;
+ delayed_work = container_of(work, struct delayed_work, work);
forw_packet = container_of(delayed_work, struct batadv_forw_packet,
delayed_work);
soft_iface = forw_packet->if_incoming->soft_iface;
@@ -239,11 +239,11 @@ out:
void batadv_send_outstanding_bat_ogm_packet(struct work_struct *work)
{
- struct delayed_work *delayed_work =
- container_of(work, struct delayed_work, work);
+ struct delayed_work *delayed_work;
struct batadv_forw_packet *forw_packet;
struct batadv_priv *bat_priv;
+ delayed_work = container_of(work, struct delayed_work, work);
forw_packet = container_of(delayed_work, struct batadv_forw_packet,
delayed_work);
bat_priv = netdev_priv(forw_packet->if_incoming->soft_iface);
diff --git a/net/batman-adv/soft-interface.c b/net/batman-adv/soft-interface.c
index 109ea2aae96c..b9a28d2dd3e8 100644
--- a/net/batman-adv/soft-interface.c
+++ b/net/batman-adv/soft-interface.c
@@ -93,25 +93,35 @@ static int batadv_interface_release(struct net_device *dev)
static struct net_device_stats *batadv_interface_stats(struct net_device *dev)
{
struct batadv_priv *bat_priv = netdev_priv(dev);
- return &bat_priv->stats;
+ struct net_device_stats *stats = &bat_priv->stats;
+
+ stats->tx_packets = batadv_sum_counter(bat_priv, BATADV_CNT_TX);
+ stats->tx_bytes = batadv_sum_counter(bat_priv, BATADV_CNT_TX_BYTES);
+ stats->tx_dropped = batadv_sum_counter(bat_priv, BATADV_CNT_TX_DROPPED);
+ stats->rx_packets = batadv_sum_counter(bat_priv, BATADV_CNT_RX);
+ stats->rx_bytes = batadv_sum_counter(bat_priv, BATADV_CNT_RX_BYTES);
+ return stats;
}
static int batadv_interface_set_mac_addr(struct net_device *dev, void *p)
{
struct batadv_priv *bat_priv = netdev_priv(dev);
struct sockaddr *addr = p;
+ uint8_t old_addr[ETH_ALEN];
if (!is_valid_ether_addr(addr->sa_data))
return -EADDRNOTAVAIL;
+ memcpy(old_addr, dev->dev_addr, ETH_ALEN);
+ memcpy(dev->dev_addr, addr->sa_data, ETH_ALEN);
+
/* only modify transtable if it has been initialized before */
if (atomic_read(&bat_priv->mesh_state) == BATADV_MESH_ACTIVE) {
- batadv_tt_local_remove(bat_priv, dev->dev_addr,
+ batadv_tt_local_remove(bat_priv, old_addr,
"mac address changed", false);
batadv_tt_local_add(dev, addr->sa_data, BATADV_NULL_IFINDEX);
}
- memcpy(dev->dev_addr, addr->sa_data, ETH_ALEN);
dev->addr_assign_type &= ~NET_ADDR_RANDOM;
return 0;
}
@@ -142,6 +152,7 @@ static int batadv_interface_tx(struct sk_buff *skb,
int data_len = skb->len, ret;
short vid __maybe_unused = -1;
bool do_bcast = false;
+ uint32_t seqno;
if (atomic_read(&bat_priv->mesh_state) != BATADV_MESH_ACTIVE)
goto dropped;
@@ -223,8 +234,8 @@ static int batadv_interface_tx(struct sk_buff *skb,
primary_if->net_dev->dev_addr, ETH_ALEN);
/* set broadcast sequence number */
- bcast_packet->seqno =
- htonl(atomic_inc_return(&bat_priv->bcast_seqno));
+ seqno = atomic_inc_return(&bat_priv->bcast_seqno);
+ bcast_packet->seqno = htonl(seqno);
batadv_add_bcast_packet_to_list(bat_priv, skb, 1);
@@ -246,14 +257,14 @@ static int batadv_interface_tx(struct sk_buff *skb,
goto dropped_freed;
}
- bat_priv->stats.tx_packets++;
- bat_priv->stats.tx_bytes += data_len;
+ batadv_inc_counter(bat_priv, BATADV_CNT_TX);
+ batadv_add_counter(bat_priv, BATADV_CNT_TX_BYTES, data_len);
goto end;
dropped:
kfree_skb(skb);
dropped_freed:
- bat_priv->stats.tx_dropped++;
+ batadv_inc_counter(bat_priv, BATADV_CNT_TX_DROPPED);
end:
if (primary_if)
batadv_hardif_free_ref(primary_if);
@@ -262,7 +273,7 @@ end:
void batadv_interface_rx(struct net_device *soft_iface,
struct sk_buff *skb, struct batadv_hard_iface *recv_if,
- int hdr_size)
+ int hdr_size, struct batadv_orig_node *orig_node)
{
struct batadv_priv *bat_priv = netdev_priv(soft_iface);
struct ethhdr *ethhdr;
@@ -308,11 +319,16 @@ void batadv_interface_rx(struct net_device *soft_iface,
/* skb->ip_summed = CHECKSUM_UNNECESSARY; */
- bat_priv->stats.rx_packets++;
- bat_priv->stats.rx_bytes += skb->len + ETH_HLEN;
+ batadv_inc_counter(bat_priv, BATADV_CNT_RX);
+ batadv_add_counter(bat_priv, BATADV_CNT_RX_BYTES,
+ skb->len + ETH_HLEN);
soft_iface->last_rx = jiffies;
+ if (orig_node)
+ batadv_tt_add_temporary_global_entry(bat_priv, orig_node,
+ ethhdr->h_source);
+
if (batadv_is_ap_isolated(bat_priv, ethhdr->h_source, ethhdr->h_dest))
goto dropped;
@@ -379,15 +395,22 @@ struct net_device *batadv_softif_create(const char *name)
if (!soft_iface)
goto out;
+ bat_priv = netdev_priv(soft_iface);
+
+ /* batadv_interface_stats() needs to be available as soon as
+ * register_netdevice() has been called
+ */
+ bat_priv->bat_counters = __alloc_percpu(cnt_len, __alignof__(uint64_t));
+ if (!bat_priv->bat_counters)
+ goto free_soft_iface;
+
ret = register_netdevice(soft_iface);
if (ret < 0) {
pr_err("Unable to register the batman interface '%s': %i\n",
name, ret);
- goto free_soft_iface;
+ goto free_bat_counters;
}
- bat_priv = netdev_priv(soft_iface);
-
atomic_set(&bat_priv->aggregated_ogms, 1);
atomic_set(&bat_priv->bonding, 0);
atomic_set(&bat_priv->bridge_loop_avoidance, 0);
@@ -405,29 +428,26 @@ struct net_device *batadv_softif_create(const char *name)
atomic_set(&bat_priv->mesh_state, BATADV_MESH_INACTIVE);
atomic_set(&bat_priv->bcast_seqno, 1);
- atomic_set(&bat_priv->ttvn, 0);
- atomic_set(&bat_priv->tt_local_changes, 0);
- atomic_set(&bat_priv->tt_ogm_append_cnt, 0);
- atomic_set(&bat_priv->bla_num_requests, 0);
-
- bat_priv->tt_buff = NULL;
- bat_priv->tt_buff_len = 0;
- bat_priv->tt_poss_change = false;
+ atomic_set(&bat_priv->tt.vn, 0);
+ atomic_set(&bat_priv->tt.local_changes, 0);
+ atomic_set(&bat_priv->tt.ogm_append_cnt, 0);
+#ifdef CONFIG_BATMAN_ADV_BLA
+ atomic_set(&bat_priv->bla.num_requests, 0);
+#endif
+ bat_priv->tt.last_changeset = NULL;
+ bat_priv->tt.last_changeset_len = 0;
+ bat_priv->tt.poss_change = false;
bat_priv->primary_if = NULL;
bat_priv->num_ifaces = 0;
- bat_priv->bat_counters = __alloc_percpu(cnt_len, __alignof__(uint64_t));
- if (!bat_priv->bat_counters)
- goto unreg_soft_iface;
-
ret = batadv_algo_select(bat_priv, batadv_routing_algo);
if (ret < 0)
- goto free_bat_counters;
+ goto unreg_soft_iface;
ret = batadv_sysfs_add_meshif(soft_iface);
if (ret < 0)
- goto free_bat_counters;
+ goto unreg_soft_iface;
ret = batadv_debugfs_add_meshif(soft_iface);
if (ret < 0)
@@ -443,12 +463,13 @@ unreg_debugfs:
batadv_debugfs_del_meshif(soft_iface);
unreg_sysfs:
batadv_sysfs_del_meshif(soft_iface);
-free_bat_counters:
- free_percpu(bat_priv->bat_counters);
unreg_soft_iface:
+ free_percpu(bat_priv->bat_counters);
unregister_netdevice(soft_iface);
return NULL;
+free_bat_counters:
+ free_percpu(bat_priv->bat_counters);
free_soft_iface:
free_netdev(soft_iface);
out:
@@ -518,6 +539,11 @@ static u32 batadv_get_link(struct net_device *dev)
static const struct {
const char name[ETH_GSTRING_LEN];
} batadv_counters_strings[] = {
+ { "tx" },
+ { "tx_bytes" },
+ { "tx_dropped" },
+ { "rx" },
+ { "rx_bytes" },
{ "forward" },
{ "forward_bytes" },
{ "mgmt_tx" },
diff --git a/net/batman-adv/soft-interface.h b/net/batman-adv/soft-interface.h
index 852c683b06a1..07a08fed28b9 100644
--- a/net/batman-adv/soft-interface.h
+++ b/net/batman-adv/soft-interface.h
@@ -21,8 +21,9 @@
#define _NET_BATMAN_ADV_SOFT_INTERFACE_H_
int batadv_skb_head_push(struct sk_buff *skb, unsigned int len);
-void batadv_interface_rx(struct net_device *soft_iface, struct sk_buff *skb,
- struct batadv_hard_iface *recv_if, int hdr_size);
+void batadv_interface_rx(struct net_device *soft_iface,
+ struct sk_buff *skb, struct batadv_hard_iface *recv_if,
+ int hdr_size, struct batadv_orig_node *orig_node);
struct net_device *batadv_softif_create(const char *name);
void batadv_softif_destroy(struct net_device *soft_iface);
int batadv_softif_is_valid(const struct net_device *net_dev);
diff --git a/net/batman-adv/translation-table.c b/net/batman-adv/translation-table.c
index a438f4b582fc..112edd371b2f 100644
--- a/net/batman-adv/translation-table.c
+++ b/net/batman-adv/translation-table.c
@@ -34,6 +34,10 @@ static void batadv_send_roam_adv(struct batadv_priv *bat_priv, uint8_t *client,
static void batadv_tt_purge(struct work_struct *work);
static void
batadv_tt_global_del_orig_list(struct batadv_tt_global_entry *tt_global_entry);
+static void batadv_tt_global_del(struct batadv_priv *bat_priv,
+ struct batadv_orig_node *orig_node,
+ const unsigned char *addr,
+ const char *message, bool roaming);
/* returns 1 if they are the same mac addr */
static int batadv_compare_tt(const struct hlist_node *node, const void *data2)
@@ -46,8 +50,8 @@ static int batadv_compare_tt(const struct hlist_node *node, const void *data2)
static void batadv_tt_start_timer(struct batadv_priv *bat_priv)
{
- INIT_DELAYED_WORK(&bat_priv->tt_work, batadv_tt_purge);
- queue_delayed_work(batadv_event_workqueue, &bat_priv->tt_work,
+ INIT_DELAYED_WORK(&bat_priv->tt.work, batadv_tt_purge);
+ queue_delayed_work(batadv_event_workqueue, &bat_priv->tt.work,
msecs_to_jiffies(5000));
}
@@ -88,7 +92,7 @@ batadv_tt_local_hash_find(struct batadv_priv *bat_priv, const void *data)
struct batadv_tt_common_entry *tt_common_entry;
struct batadv_tt_local_entry *tt_local_entry = NULL;
- tt_common_entry = batadv_tt_hash_find(bat_priv->tt_local_hash, data);
+ tt_common_entry = batadv_tt_hash_find(bat_priv->tt.local_hash, data);
if (tt_common_entry)
tt_local_entry = container_of(tt_common_entry,
struct batadv_tt_local_entry,
@@ -102,7 +106,7 @@ batadv_tt_global_hash_find(struct batadv_priv *bat_priv, const void *data)
struct batadv_tt_common_entry *tt_common_entry;
struct batadv_tt_global_entry *tt_global_entry = NULL;
- tt_common_entry = batadv_tt_hash_find(bat_priv->tt_global_hash, data);
+ tt_common_entry = batadv_tt_hash_find(bat_priv->tt.global_hash, data);
if (tt_common_entry)
tt_global_entry = container_of(tt_common_entry,
struct batadv_tt_global_entry,
@@ -152,6 +156,8 @@ static void batadv_tt_orig_list_entry_free_rcu(struct rcu_head *rcu)
static void
batadv_tt_orig_list_entry_free_ref(struct batadv_tt_orig_list_entry *orig_entry)
{
+ if (!atomic_dec_and_test(&orig_entry->refcount))
+ return;
/* to avoid race conditions, immediately decrease the tt counter */
atomic_dec(&orig_entry->orig_node->tt_size);
call_rcu(&orig_entry->rcu, batadv_tt_orig_list_entry_free_rcu);
@@ -175,8 +181,8 @@ static void batadv_tt_local_event(struct batadv_priv *bat_priv,
del_op_requested = flags & BATADV_TT_CLIENT_DEL;
/* check for ADD+DEL or DEL+ADD events */
- spin_lock_bh(&bat_priv->tt_changes_list_lock);
- list_for_each_entry_safe(entry, safe, &bat_priv->tt_changes_list,
+ spin_lock_bh(&bat_priv->tt.changes_list_lock);
+ list_for_each_entry_safe(entry, safe, &bat_priv->tt.changes_list,
list) {
if (!batadv_compare_eth(entry->change.addr, addr))
continue;
@@ -197,20 +203,21 @@ static void batadv_tt_local_event(struct batadv_priv *bat_priv,
del:
list_del(&entry->list);
kfree(entry);
+ kfree(tt_change_node);
event_removed = true;
goto unlock;
}
/* track the change in the OGMinterval list */
- list_add_tail(&tt_change_node->list, &bat_priv->tt_changes_list);
+ list_add_tail(&tt_change_node->list, &bat_priv->tt.changes_list);
unlock:
- spin_unlock_bh(&bat_priv->tt_changes_list_lock);
+ spin_unlock_bh(&bat_priv->tt.changes_list_lock);
if (event_removed)
- atomic_dec(&bat_priv->tt_local_changes);
+ atomic_dec(&bat_priv->tt.local_changes);
else
- atomic_inc(&bat_priv->tt_local_changes);
+ atomic_inc(&bat_priv->tt.local_changes);
}
int batadv_tt_len(int changes_num)
@@ -220,12 +227,12 @@ int batadv_tt_len(int changes_num)
static int batadv_tt_local_init(struct batadv_priv *bat_priv)
{
- if (bat_priv->tt_local_hash)
+ if (bat_priv->tt.local_hash)
return 0;
- bat_priv->tt_local_hash = batadv_hash_new(1024);
+ bat_priv->tt.local_hash = batadv_hash_new(1024);
- if (!bat_priv->tt_local_hash)
+ if (!bat_priv->tt.local_hash)
return -ENOMEM;
return 0;
@@ -257,7 +264,7 @@ void batadv_tt_local_add(struct net_device *soft_iface, const uint8_t *addr,
batadv_dbg(BATADV_DBG_TT, bat_priv,
"Creating new local tt entry: %pM (ttvn: %d)\n", addr,
- (uint8_t)atomic_read(&bat_priv->ttvn));
+ (uint8_t)atomic_read(&bat_priv->tt.vn));
memcpy(tt_local_entry->common.addr, addr, ETH_ALEN);
tt_local_entry->common.flags = BATADV_NO_FLAGS;
@@ -265,6 +272,7 @@ void batadv_tt_local_add(struct net_device *soft_iface, const uint8_t *addr,
tt_local_entry->common.flags |= BATADV_TT_CLIENT_WIFI;
atomic_set(&tt_local_entry->common.refcount, 2);
tt_local_entry->last_seen = jiffies;
+ tt_local_entry->common.added_at = tt_local_entry->last_seen;
/* the batman interface mac address should never be purged */
if (batadv_compare_eth(addr, soft_iface->dev_addr))
@@ -276,7 +284,7 @@ void batadv_tt_local_add(struct net_device *soft_iface, const uint8_t *addr,
*/
tt_local_entry->common.flags |= BATADV_TT_CLIENT_NEW;
- hash_added = batadv_hash_add(bat_priv->tt_local_hash, batadv_compare_tt,
+ hash_added = batadv_hash_add(bat_priv->tt.local_hash, batadv_compare_tt,
batadv_choose_orig,
&tt_local_entry->common,
&tt_local_entry->common.hash_entry);
@@ -347,7 +355,7 @@ static void batadv_tt_prepare_packet_buff(struct batadv_priv *bat_priv,
primary_if = batadv_primary_if_get_selected(bat_priv);
req_len = min_packet_len;
- req_len += batadv_tt_len(atomic_read(&bat_priv->tt_local_changes));
+ req_len += batadv_tt_len(atomic_read(&bat_priv->tt.local_changes));
/* if we have too many changes for one packet don't send any
* and wait for the tt table request which will be fragmented
@@ -380,10 +388,10 @@ static int batadv_tt_changes_fill_buff(struct batadv_priv *bat_priv,
if (new_len > 0)
tot_changes = new_len / batadv_tt_len(1);
- spin_lock_bh(&bat_priv->tt_changes_list_lock);
- atomic_set(&bat_priv->tt_local_changes, 0);
+ spin_lock_bh(&bat_priv->tt.changes_list_lock);
+ atomic_set(&bat_priv->tt.local_changes, 0);
- list_for_each_entry_safe(entry, safe, &bat_priv->tt_changes_list,
+ list_for_each_entry_safe(entry, safe, &bat_priv->tt.changes_list,
list) {
if (count < tot_changes) {
memcpy(tt_buff + batadv_tt_len(count),
@@ -393,25 +401,25 @@ static int batadv_tt_changes_fill_buff(struct batadv_priv *bat_priv,
list_del(&entry->list);
kfree(entry);
}
- spin_unlock_bh(&bat_priv->tt_changes_list_lock);
+ spin_unlock_bh(&bat_priv->tt.changes_list_lock);
/* Keep the buffer for possible tt_request */
- spin_lock_bh(&bat_priv->tt_buff_lock);
- kfree(bat_priv->tt_buff);
- bat_priv->tt_buff_len = 0;
- bat_priv->tt_buff = NULL;
+ spin_lock_bh(&bat_priv->tt.last_changeset_lock);
+ kfree(bat_priv->tt.last_changeset);
+ bat_priv->tt.last_changeset_len = 0;
+ bat_priv->tt.last_changeset = NULL;
/* check whether this new OGM has no changes due to size problems */
if (new_len > 0) {
/* if kmalloc() fails we will reply with the full table
* instead of providing the diff
*/
- bat_priv->tt_buff = kmalloc(new_len, GFP_ATOMIC);
- if (bat_priv->tt_buff) {
- memcpy(bat_priv->tt_buff, tt_buff, new_len);
- bat_priv->tt_buff_len = new_len;
+ bat_priv->tt.last_changeset = kmalloc(new_len, GFP_ATOMIC);
+ if (bat_priv->tt.last_changeset) {
+ memcpy(bat_priv->tt.last_changeset, tt_buff, new_len);
+ bat_priv->tt.last_changeset_len = new_len;
}
}
- spin_unlock_bh(&bat_priv->tt_buff_lock);
+ spin_unlock_bh(&bat_priv->tt.last_changeset_lock);
return count;
}
@@ -420,7 +428,7 @@ int batadv_tt_local_seq_print_text(struct seq_file *seq, void *offset)
{
struct net_device *net_dev = (struct net_device *)seq->private;
struct batadv_priv *bat_priv = netdev_priv(net_dev);
- struct batadv_hashtable *hash = bat_priv->tt_local_hash;
+ struct batadv_hashtable *hash = bat_priv->tt.local_hash;
struct batadv_tt_common_entry *tt_common_entry;
struct batadv_hard_iface *primary_if;
struct hlist_node *node;
@@ -445,7 +453,7 @@ int batadv_tt_local_seq_print_text(struct seq_file *seq, void *offset)
seq_printf(seq,
"Locally retrieved addresses (from %s) announced via TT (TTVN: %u):\n",
- net_dev->name, (uint8_t)atomic_read(&bat_priv->ttvn));
+ net_dev->name, (uint8_t)atomic_read(&bat_priv->tt.vn));
for (i = 0; i < hash->size; i++) {
head = &hash->table[i];
@@ -543,7 +551,7 @@ static void batadv_tt_local_purge_list(struct batadv_priv *bat_priv,
static void batadv_tt_local_purge(struct batadv_priv *bat_priv)
{
- struct batadv_hashtable *hash = bat_priv->tt_local_hash;
+ struct batadv_hashtable *hash = bat_priv->tt.local_hash;
struct hlist_head *head;
spinlock_t *list_lock; /* protects write access to the hash lists */
uint32_t i;
@@ -569,10 +577,10 @@ static void batadv_tt_local_table_free(struct batadv_priv *bat_priv)
struct hlist_head *head;
uint32_t i;
- if (!bat_priv->tt_local_hash)
+ if (!bat_priv->tt.local_hash)
return;
- hash = bat_priv->tt_local_hash;
+ hash = bat_priv->tt.local_hash;
for (i = 0; i < hash->size; i++) {
head = &hash->table[i];
@@ -592,17 +600,17 @@ static void batadv_tt_local_table_free(struct batadv_priv *bat_priv)
batadv_hash_destroy(hash);
- bat_priv->tt_local_hash = NULL;
+ bat_priv->tt.local_hash = NULL;
}
static int batadv_tt_global_init(struct batadv_priv *bat_priv)
{
- if (bat_priv->tt_global_hash)
+ if (bat_priv->tt.global_hash)
return 0;
- bat_priv->tt_global_hash = batadv_hash_new(1024);
+ bat_priv->tt.global_hash = batadv_hash_new(1024);
- if (!bat_priv->tt_global_hash)
+ if (!bat_priv->tt.global_hash)
return -ENOMEM;
return 0;
@@ -612,62 +620,99 @@ static void batadv_tt_changes_list_free(struct batadv_priv *bat_priv)
{
struct batadv_tt_change_node *entry, *safe;
- spin_lock_bh(&bat_priv->tt_changes_list_lock);
+ spin_lock_bh(&bat_priv->tt.changes_list_lock);
- list_for_each_entry_safe(entry, safe, &bat_priv->tt_changes_list,
+ list_for_each_entry_safe(entry, safe, &bat_priv->tt.changes_list,
list) {
list_del(&entry->list);
kfree(entry);
}
- atomic_set(&bat_priv->tt_local_changes, 0);
- spin_unlock_bh(&bat_priv->tt_changes_list_lock);
+ atomic_set(&bat_priv->tt.local_changes, 0);
+ spin_unlock_bh(&bat_priv->tt.changes_list_lock);
}
-/* find out if an orig_node is already in the list of a tt_global_entry.
- * returns 1 if found, 0 otherwise
+/* retrieves the orig_tt_list_entry belonging to orig_node from the
+ * batadv_tt_global_entry list
+ *
+ * returns it with an increased refcounter, NULL if not found
*/
-static bool
-batadv_tt_global_entry_has_orig(const struct batadv_tt_global_entry *entry,
- const struct batadv_orig_node *orig_node)
+static struct batadv_tt_orig_list_entry *
+batadv_tt_global_orig_entry_find(const struct batadv_tt_global_entry *entry,
+ const struct batadv_orig_node *orig_node)
{
- struct batadv_tt_orig_list_entry *tmp_orig_entry;
+ struct batadv_tt_orig_list_entry *tmp_orig_entry, *orig_entry = NULL;
const struct hlist_head *head;
struct hlist_node *node;
- bool found = false;
rcu_read_lock();
head = &entry->orig_list;
hlist_for_each_entry_rcu(tmp_orig_entry, node, head, list) {
- if (tmp_orig_entry->orig_node == orig_node) {
- found = true;
- break;
- }
+ if (tmp_orig_entry->orig_node != orig_node)
+ continue;
+ if (!atomic_inc_not_zero(&tmp_orig_entry->refcount))
+ continue;
+
+ orig_entry = tmp_orig_entry;
+ break;
}
rcu_read_unlock();
+
+ return orig_entry;
+}
+
+/* find out if an orig_node is already in the list of a tt_global_entry.
+ * returns true if found, false otherwise
+ */
+static bool
+batadv_tt_global_entry_has_orig(const struct batadv_tt_global_entry *entry,
+ const struct batadv_orig_node *orig_node)
+{
+ struct batadv_tt_orig_list_entry *orig_entry;
+ bool found = false;
+
+ orig_entry = batadv_tt_global_orig_entry_find(entry, orig_node);
+ if (orig_entry) {
+ found = true;
+ batadv_tt_orig_list_entry_free_ref(orig_entry);
+ }
+
return found;
}
static void
-batadv_tt_global_add_orig_entry(struct batadv_tt_global_entry *tt_global_entry,
+batadv_tt_global_orig_entry_add(struct batadv_tt_global_entry *tt_global,
struct batadv_orig_node *orig_node, int ttvn)
{
struct batadv_tt_orig_list_entry *orig_entry;
+ orig_entry = batadv_tt_global_orig_entry_find(tt_global, orig_node);
+ if (orig_entry) {
+ /* refresh the ttvn: the current value could be a bogus one that
+ * was added during a "temporary client detection"
+ */
+ orig_entry->ttvn = ttvn;
+ goto out;
+ }
+
orig_entry = kzalloc(sizeof(*orig_entry), GFP_ATOMIC);
if (!orig_entry)
- return;
+ goto out;
INIT_HLIST_NODE(&orig_entry->list);
atomic_inc(&orig_node->refcount);
atomic_inc(&orig_node->tt_size);
orig_entry->orig_node = orig_node;
orig_entry->ttvn = ttvn;
+ atomic_set(&orig_entry->refcount, 2);
- spin_lock_bh(&tt_global_entry->list_lock);
+ spin_lock_bh(&tt_global->list_lock);
hlist_add_head_rcu(&orig_entry->list,
- &tt_global_entry->orig_list);
- spin_unlock_bh(&tt_global_entry->list_lock);
+ &tt_global->orig_list);
+ spin_unlock_bh(&tt_global->list_lock);
+out:
+ if (orig_entry)
+ batadv_tt_orig_list_entry_free_ref(orig_entry);
}
/* caller must hold orig_node refcount */
@@ -694,11 +739,12 @@ int batadv_tt_global_add(struct batadv_priv *bat_priv,
common->flags = flags;
tt_global_entry->roam_at = 0;
atomic_set(&common->refcount, 2);
+ common->added_at = jiffies;
INIT_HLIST_HEAD(&tt_global_entry->orig_list);
spin_lock_init(&tt_global_entry->list_lock);
- hash_added = batadv_hash_add(bat_priv->tt_global_hash,
+ hash_added = batadv_hash_add(bat_priv->tt.global_hash,
batadv_compare_tt,
batadv_choose_orig, common,
&common->hash_entry);
@@ -708,11 +754,20 @@ int batadv_tt_global_add(struct batadv_priv *bat_priv,
batadv_tt_global_entry_free_ref(tt_global_entry);
goto out_remove;
}
-
- batadv_tt_global_add_orig_entry(tt_global_entry, orig_node,
- ttvn);
} else {
- /* there is already a global entry, use this one. */
+ /* If there is already a global entry, we can use this one for
+ * our processing.
+ * But if we are trying to add a temporary client we can exit
+ * directly because the temporary information should never
+ * override any already known client state (whatever it is)
+ */
+ if (flags & BATADV_TT_CLIENT_TEMP)
+ goto out;
+
+ /* if the client was temporary added before receiving the first
+ * OGM announcing it, we have to clear the TEMP flag
+ */
+ tt_global_entry->common.flags &= ~BATADV_TT_CLIENT_TEMP;
/* If there is the BATADV_TT_CLIENT_ROAM flag set, there is only
* one originator left in the list and we previously received a
@@ -726,12 +781,9 @@ int batadv_tt_global_add(struct batadv_priv *bat_priv,
tt_global_entry->common.flags &= ~BATADV_TT_CLIENT_ROAM;
tt_global_entry->roam_at = 0;
}
-
- if (!batadv_tt_global_entry_has_orig(tt_global_entry,
- orig_node))
- batadv_tt_global_add_orig_entry(tt_global_entry,
- orig_node, ttvn);
}
+ /* add the new orig_entry (if needed) or update it */
+ batadv_tt_global_orig_entry_add(tt_global_entry, orig_node, ttvn);
batadv_dbg(BATADV_DBG_TT, bat_priv,
"Creating new global tt entry: %pM (via %pM)\n",
@@ -770,11 +822,12 @@ batadv_tt_global_print_entry(struct batadv_tt_global_entry *tt_global_entry,
hlist_for_each_entry_rcu(orig_entry, node, head, list) {
flags = tt_common_entry->flags;
last_ttvn = atomic_read(&orig_entry->orig_node->last_ttvn);
- seq_printf(seq, " * %pM (%3u) via %pM (%3u) [%c%c]\n",
+ seq_printf(seq, " * %pM (%3u) via %pM (%3u) [%c%c%c]\n",
tt_global_entry->common.addr, orig_entry->ttvn,
orig_entry->orig_node->orig, last_ttvn,
(flags & BATADV_TT_CLIENT_ROAM ? 'R' : '.'),
- (flags & BATADV_TT_CLIENT_WIFI ? 'W' : '.'));
+ (flags & BATADV_TT_CLIENT_WIFI ? 'W' : '.'),
+ (flags & BATADV_TT_CLIENT_TEMP ? 'T' : '.'));
}
}
@@ -782,7 +835,7 @@ int batadv_tt_global_seq_print_text(struct seq_file *seq, void *offset)
{
struct net_device *net_dev = (struct net_device *)seq->private;
struct batadv_priv *bat_priv = netdev_priv(net_dev);
- struct batadv_hashtable *hash = bat_priv->tt_global_hash;
+ struct batadv_hashtable *hash = bat_priv->tt.global_hash;
struct batadv_tt_common_entry *tt_common_entry;
struct batadv_tt_global_entry *tt_global;
struct batadv_hard_iface *primary_if;
@@ -883,7 +936,7 @@ batadv_tt_global_del_struct(struct batadv_priv *bat_priv,
"Deleting global tt entry %pM: %s\n",
tt_global_entry->common.addr, message);
- batadv_hash_remove(bat_priv->tt_global_hash, batadv_compare_tt,
+ batadv_hash_remove(bat_priv->tt.global_hash, batadv_compare_tt,
batadv_choose_orig, tt_global_entry->common.addr);
batadv_tt_global_entry_free_ref(tt_global_entry);
@@ -994,7 +1047,7 @@ void batadv_tt_global_del_orig(struct batadv_priv *bat_priv,
struct batadv_tt_global_entry *tt_global;
struct batadv_tt_common_entry *tt_common_entry;
uint32_t i;
- struct batadv_hashtable *hash = bat_priv->tt_global_hash;
+ struct batadv_hashtable *hash = bat_priv->tt.global_hash;
struct hlist_node *node, *safe;
struct hlist_head *head;
spinlock_t *list_lock; /* protects write access to the hash lists */
@@ -1029,49 +1082,63 @@ void batadv_tt_global_del_orig(struct batadv_priv *bat_priv,
orig_node->tt_initialised = false;
}
-static void batadv_tt_global_roam_purge_list(struct batadv_priv *bat_priv,
- struct hlist_head *head)
+static bool batadv_tt_global_to_purge(struct batadv_tt_global_entry *tt_global,
+ char **msg)
{
- struct batadv_tt_common_entry *tt_common_entry;
- struct batadv_tt_global_entry *tt_global_entry;
- struct hlist_node *node, *node_tmp;
-
- hlist_for_each_entry_safe(tt_common_entry, node, node_tmp, head,
- hash_entry) {
- tt_global_entry = container_of(tt_common_entry,
- struct batadv_tt_global_entry,
- common);
- if (!(tt_global_entry->common.flags & BATADV_TT_CLIENT_ROAM))
- continue;
- if (!batadv_has_timed_out(tt_global_entry->roam_at,
- BATADV_TT_CLIENT_ROAM_TIMEOUT))
- continue;
+ bool purge = false;
+ unsigned long roam_timeout = BATADV_TT_CLIENT_ROAM_TIMEOUT;
+ unsigned long temp_timeout = BATADV_TT_CLIENT_TEMP_TIMEOUT;
- batadv_dbg(BATADV_DBG_TT, bat_priv,
- "Deleting global tt entry (%pM): Roaming timeout\n",
- tt_global_entry->common.addr);
+ if ((tt_global->common.flags & BATADV_TT_CLIENT_ROAM) &&
+ batadv_has_timed_out(tt_global->roam_at, roam_timeout)) {
+ purge = true;
+ *msg = "Roaming timeout\n";
+ }
- hlist_del_rcu(node);
- batadv_tt_global_entry_free_ref(tt_global_entry);
+ if ((tt_global->common.flags & BATADV_TT_CLIENT_TEMP) &&
+ batadv_has_timed_out(tt_global->common.added_at, temp_timeout)) {
+ purge = true;
+ *msg = "Temporary client timeout\n";
}
+
+ return purge;
}
-static void batadv_tt_global_roam_purge(struct batadv_priv *bat_priv)
+static void batadv_tt_global_purge(struct batadv_priv *bat_priv)
{
- struct batadv_hashtable *hash = bat_priv->tt_global_hash;
+ struct batadv_hashtable *hash = bat_priv->tt.global_hash;
struct hlist_head *head;
+ struct hlist_node *node, *node_tmp;
spinlock_t *list_lock; /* protects write access to the hash lists */
uint32_t i;
+ char *msg = NULL;
+ struct batadv_tt_common_entry *tt_common;
+ struct batadv_tt_global_entry *tt_global;
for (i = 0; i < hash->size; i++) {
head = &hash->table[i];
list_lock = &hash->list_locks[i];
spin_lock_bh(list_lock);
- batadv_tt_global_roam_purge_list(bat_priv, head);
+ hlist_for_each_entry_safe(tt_common, node, node_tmp, head,
+ hash_entry) {
+ tt_global = container_of(tt_common,
+ struct batadv_tt_global_entry,
+ common);
+
+ if (!batadv_tt_global_to_purge(tt_global, &msg))
+ continue;
+
+ batadv_dbg(BATADV_DBG_TT, bat_priv,
+ "Deleting global tt entry (%pM): %s\n",
+ tt_global->common.addr, msg);
+
+ hlist_del_rcu(node);
+
+ batadv_tt_global_entry_free_ref(tt_global);
+ }
spin_unlock_bh(list_lock);
}
-
}
static void batadv_tt_global_table_free(struct batadv_priv *bat_priv)
@@ -1084,10 +1151,10 @@ static void batadv_tt_global_table_free(struct batadv_priv *bat_priv)
struct hlist_head *head;
uint32_t i;
- if (!bat_priv->tt_global_hash)
+ if (!bat_priv->tt.global_hash)
return;
- hash = bat_priv->tt_global_hash;
+ hash = bat_priv->tt.global_hash;
for (i = 0; i < hash->size; i++) {
head = &hash->table[i];
@@ -1107,7 +1174,7 @@ static void batadv_tt_global_table_free(struct batadv_priv *bat_priv)
batadv_hash_destroy(hash);
- bat_priv->tt_global_hash = NULL;
+ bat_priv->tt.global_hash = NULL;
}
static bool
@@ -1186,7 +1253,7 @@ static uint16_t batadv_tt_global_crc(struct batadv_priv *bat_priv,
struct batadv_orig_node *orig_node)
{
uint16_t total = 0, total_one;
- struct batadv_hashtable *hash = bat_priv->tt_global_hash;
+ struct batadv_hashtable *hash = bat_priv->tt.global_hash;
struct batadv_tt_common_entry *tt_common;
struct batadv_tt_global_entry *tt_global;
struct hlist_node *node;
@@ -1209,6 +1276,12 @@ static uint16_t batadv_tt_global_crc(struct batadv_priv *bat_priv,
*/
if (tt_common->flags & BATADV_TT_CLIENT_ROAM)
continue;
+ /* Temporary clients have not been announced yet, so
+ * they have to be skipped while computing the global
+ * crc
+ */
+ if (tt_common->flags & BATADV_TT_CLIENT_TEMP)
+ continue;
/* find out if this global entry is announced by this
* originator
@@ -1233,7 +1306,7 @@ static uint16_t batadv_tt_global_crc(struct batadv_priv *bat_priv,
static uint16_t batadv_tt_local_crc(struct batadv_priv *bat_priv)
{
uint16_t total = 0, total_one;
- struct batadv_hashtable *hash = bat_priv->tt_local_hash;
+ struct batadv_hashtable *hash = bat_priv->tt.local_hash;
struct batadv_tt_common_entry *tt_common;
struct hlist_node *node;
struct hlist_head *head;
@@ -1266,14 +1339,14 @@ static void batadv_tt_req_list_free(struct batadv_priv *bat_priv)
{
struct batadv_tt_req_node *node, *safe;
- spin_lock_bh(&bat_priv->tt_req_list_lock);
+ spin_lock_bh(&bat_priv->tt.req_list_lock);
- list_for_each_entry_safe(node, safe, &bat_priv->tt_req_list, list) {
+ list_for_each_entry_safe(node, safe, &bat_priv->tt.req_list, list) {
list_del(&node->list);
kfree(node);
}
- spin_unlock_bh(&bat_priv->tt_req_list_lock);
+ spin_unlock_bh(&bat_priv->tt.req_list_lock);
}
static void batadv_tt_save_orig_buffer(struct batadv_priv *bat_priv,
@@ -1303,15 +1376,15 @@ static void batadv_tt_req_purge(struct batadv_priv *bat_priv)
{
struct batadv_tt_req_node *node, *safe;
- spin_lock_bh(&bat_priv->tt_req_list_lock);
- list_for_each_entry_safe(node, safe, &bat_priv->tt_req_list, list) {
+ spin_lock_bh(&bat_priv->tt.req_list_lock);
+ list_for_each_entry_safe(node, safe, &bat_priv->tt.req_list, list) {
if (batadv_has_timed_out(node->issued_at,
BATADV_TT_REQUEST_TIMEOUT)) {
list_del(&node->list);
kfree(node);
}
}
- spin_unlock_bh(&bat_priv->tt_req_list_lock);
+ spin_unlock_bh(&bat_priv->tt.req_list_lock);
}
/* returns the pointer to the new tt_req_node struct if no request
@@ -1323,8 +1396,8 @@ batadv_new_tt_req_node(struct batadv_priv *bat_priv,
{
struct batadv_tt_req_node *tt_req_node_tmp, *tt_req_node = NULL;
- spin_lock_bh(&bat_priv->tt_req_list_lock);
- list_for_each_entry(tt_req_node_tmp, &bat_priv->tt_req_list, list) {
+ spin_lock_bh(&bat_priv->tt.req_list_lock);
+ list_for_each_entry(tt_req_node_tmp, &bat_priv->tt.req_list, list) {
if (batadv_compare_eth(tt_req_node_tmp, orig_node) &&
!batadv_has_timed_out(tt_req_node_tmp->issued_at,
BATADV_TT_REQUEST_TIMEOUT))
@@ -1338,9 +1411,9 @@ batadv_new_tt_req_node(struct batadv_priv *bat_priv,
memcpy(tt_req_node->addr, orig_node->orig, ETH_ALEN);
tt_req_node->issued_at = jiffies;
- list_add(&tt_req_node->list, &bat_priv->tt_req_list);
+ list_add(&tt_req_node->list, &bat_priv->tt.req_list);
unlock:
- spin_unlock_bh(&bat_priv->tt_req_list_lock);
+ spin_unlock_bh(&bat_priv->tt.req_list_lock);
return tt_req_node;
}
@@ -1362,7 +1435,8 @@ static int batadv_tt_global_valid(const void *entry_ptr,
const struct batadv_tt_global_entry *tt_global_entry;
const struct batadv_orig_node *orig_node = data_ptr;
- if (tt_common_entry->flags & BATADV_TT_CLIENT_ROAM)
+ if (tt_common_entry->flags & BATADV_TT_CLIENT_ROAM ||
+ tt_common_entry->flags & BATADV_TT_CLIENT_TEMP)
return 0;
tt_global_entry = container_of(tt_common_entry,
@@ -1506,9 +1580,9 @@ out:
if (ret)
kfree_skb(skb);
if (ret && tt_req_node) {
- spin_lock_bh(&bat_priv->tt_req_list_lock);
+ spin_lock_bh(&bat_priv->tt.req_list_lock);
list_del(&tt_req_node->list);
- spin_unlock_bh(&bat_priv->tt_req_list_lock);
+ spin_unlock_bh(&bat_priv->tt.req_list_lock);
kfree(tt_req_node);
}
return ret;
@@ -1529,6 +1603,7 @@ batadv_send_other_tt_response(struct batadv_priv *bat_priv,
uint16_t tt_len, tt_tot;
struct sk_buff *skb = NULL;
struct batadv_tt_query_packet *tt_response;
+ uint8_t *packet_pos;
size_t len;
batadv_dbg(BATADV_DBG_TT, bat_priv,
@@ -1582,8 +1657,8 @@ batadv_send_other_tt_response(struct batadv_priv *bat_priv,
goto unlock;
skb_reserve(skb, ETH_HLEN);
- tt_response = (struct batadv_tt_query_packet *)skb_put(skb,
- len);
+ packet_pos = skb_put(skb, len);
+ tt_response = (struct batadv_tt_query_packet *)packet_pos;
tt_response->ttvn = req_ttvn;
tt_response->tt_data = htons(tt_tot);
@@ -1599,7 +1674,7 @@ batadv_send_other_tt_response(struct batadv_priv *bat_priv,
ttvn = (uint8_t)atomic_read(&req_dst_orig_node->last_ttvn);
skb = batadv_tt_response_fill_table(tt_len, ttvn,
- bat_priv->tt_global_hash,
+ bat_priv->tt.global_hash,
primary_if,
batadv_tt_global_valid,
req_dst_orig_node);
@@ -1662,6 +1737,7 @@ batadv_send_my_tt_response(struct batadv_priv *bat_priv,
uint16_t tt_len, tt_tot;
struct sk_buff *skb = NULL;
struct batadv_tt_query_packet *tt_response;
+ uint8_t *packet_pos;
size_t len;
batadv_dbg(BATADV_DBG_TT, bat_priv,
@@ -1670,7 +1746,7 @@ batadv_send_my_tt_response(struct batadv_priv *bat_priv,
(tt_request->flags & BATADV_TT_FULL_TABLE ? 'F' : '.'));
- my_ttvn = (uint8_t)atomic_read(&bat_priv->ttvn);
+ my_ttvn = (uint8_t)atomic_read(&bat_priv->tt.vn);
req_ttvn = tt_request->ttvn;
orig_node = batadv_orig_hash_find(bat_priv, tt_request->src);
@@ -1689,7 +1765,7 @@ batadv_send_my_tt_response(struct batadv_priv *bat_priv,
* is too big send the whole local translation table
*/
if (tt_request->flags & BATADV_TT_FULL_TABLE || my_ttvn != req_ttvn ||
- !bat_priv->tt_buff)
+ !bat_priv->tt.last_changeset)
full_table = true;
else
full_table = false;
@@ -1698,8 +1774,8 @@ batadv_send_my_tt_response(struct batadv_priv *bat_priv,
* I'll send only one packet with as much TT entries as I can
*/
if (!full_table) {
- spin_lock_bh(&bat_priv->tt_buff_lock);
- tt_len = bat_priv->tt_buff_len;
+ spin_lock_bh(&bat_priv->tt.last_changeset_lock);
+ tt_len = bat_priv->tt.last_changeset_len;
tt_tot = tt_len / sizeof(struct batadv_tt_change);
len = sizeof(*tt_response) + tt_len;
@@ -1708,22 +1784,22 @@ batadv_send_my_tt_response(struct batadv_priv *bat_priv,
goto unlock;
skb_reserve(skb, ETH_HLEN);
- tt_response = (struct batadv_tt_query_packet *)skb_put(skb,
- len);
+ packet_pos = skb_put(skb, len);
+ tt_response = (struct batadv_tt_query_packet *)packet_pos;
tt_response->ttvn = req_ttvn;
tt_response->tt_data = htons(tt_tot);
tt_buff = skb->data + sizeof(*tt_response);
- memcpy(tt_buff, bat_priv->tt_buff,
- bat_priv->tt_buff_len);
- spin_unlock_bh(&bat_priv->tt_buff_lock);
+ memcpy(tt_buff, bat_priv->tt.last_changeset,
+ bat_priv->tt.last_changeset_len);
+ spin_unlock_bh(&bat_priv->tt.last_changeset_lock);
} else {
- tt_len = (uint16_t)atomic_read(&bat_priv->num_local_tt);
+ tt_len = (uint16_t)atomic_read(&bat_priv->tt.local_entry_num);
tt_len *= sizeof(struct batadv_tt_change);
- ttvn = (uint8_t)atomic_read(&bat_priv->ttvn);
+ ttvn = (uint8_t)atomic_read(&bat_priv->tt.vn);
skb = batadv_tt_response_fill_table(tt_len, ttvn,
- bat_priv->tt_local_hash,
+ bat_priv->tt.local_hash,
primary_if,
batadv_tt_local_valid_entry,
NULL);
@@ -1755,7 +1831,7 @@ batadv_send_my_tt_response(struct batadv_priv *bat_priv,
goto out;
unlock:
- spin_unlock_bh(&bat_priv->tt_buff_lock);
+ spin_unlock_bh(&bat_priv->tt.last_changeset_lock);
out:
if (orig_node)
batadv_orig_node_free_ref(orig_node);
@@ -1908,14 +1984,14 @@ void batadv_handle_tt_response(struct batadv_priv *bat_priv,
}
/* Delete the tt_req_node from pending tt_requests list */
- spin_lock_bh(&bat_priv->tt_req_list_lock);
- list_for_each_entry_safe(node, safe, &bat_priv->tt_req_list, list) {
+ spin_lock_bh(&bat_priv->tt.req_list_lock);
+ list_for_each_entry_safe(node, safe, &bat_priv->tt.req_list, list) {
if (!batadv_compare_eth(node->addr, tt_response->src))
continue;
list_del(&node->list);
kfree(node);
}
- spin_unlock_bh(&bat_priv->tt_req_list_lock);
+ spin_unlock_bh(&bat_priv->tt.req_list_lock);
/* Recalculate the CRC for this orig_node and store it */
orig_node->tt_crc = batadv_tt_global_crc(bat_priv, orig_node);
@@ -1949,22 +2025,22 @@ static void batadv_tt_roam_list_free(struct batadv_priv *bat_priv)
{
struct batadv_tt_roam_node *node, *safe;
- spin_lock_bh(&bat_priv->tt_roam_list_lock);
+ spin_lock_bh(&bat_priv->tt.roam_list_lock);
- list_for_each_entry_safe(node, safe, &bat_priv->tt_roam_list, list) {
+ list_for_each_entry_safe(node, safe, &bat_priv->tt.roam_list, list) {
list_del(&node->list);
kfree(node);
}
- spin_unlock_bh(&bat_priv->tt_roam_list_lock);
+ spin_unlock_bh(&bat_priv->tt.roam_list_lock);
}
static void batadv_tt_roam_purge(struct batadv_priv *bat_priv)
{
struct batadv_tt_roam_node *node, *safe;
- spin_lock_bh(&bat_priv->tt_roam_list_lock);
- list_for_each_entry_safe(node, safe, &bat_priv->tt_roam_list, list) {
+ spin_lock_bh(&bat_priv->tt.roam_list_lock);
+ list_for_each_entry_safe(node, safe, &bat_priv->tt.roam_list, list) {
if (!batadv_has_timed_out(node->first_time,
BATADV_ROAMING_MAX_TIME))
continue;
@@ -1972,7 +2048,7 @@ static void batadv_tt_roam_purge(struct batadv_priv *bat_priv)
list_del(&node->list);
kfree(node);
}
- spin_unlock_bh(&bat_priv->tt_roam_list_lock);
+ spin_unlock_bh(&bat_priv->tt.roam_list_lock);
}
/* This function checks whether the client already reached the
@@ -1987,11 +2063,11 @@ static bool batadv_tt_check_roam_count(struct batadv_priv *bat_priv,
struct batadv_tt_roam_node *tt_roam_node;
bool ret = false;
- spin_lock_bh(&bat_priv->tt_roam_list_lock);
+ spin_lock_bh(&bat_priv->tt.roam_list_lock);
/* The new tt_req will be issued only if I'm not waiting for a
* reply from the same orig_node yet
*/
- list_for_each_entry(tt_roam_node, &bat_priv->tt_roam_list, list) {
+ list_for_each_entry(tt_roam_node, &bat_priv->tt.roam_list, list) {
if (!batadv_compare_eth(tt_roam_node->addr, client))
continue;
@@ -2016,12 +2092,12 @@ static bool batadv_tt_check_roam_count(struct batadv_priv *bat_priv,
BATADV_ROAMING_MAX_COUNT - 1);
memcpy(tt_roam_node->addr, client, ETH_ALEN);
- list_add(&tt_roam_node->list, &bat_priv->tt_roam_list);
+ list_add(&tt_roam_node->list, &bat_priv->tt.roam_list);
ret = true;
}
unlock:
- spin_unlock_bh(&bat_priv->tt_roam_list_lock);
+ spin_unlock_bh(&bat_priv->tt.roam_list_lock);
return ret;
}
@@ -2085,13 +2161,15 @@ out:
static void batadv_tt_purge(struct work_struct *work)
{
struct delayed_work *delayed_work;
+ struct batadv_priv_tt *priv_tt;
struct batadv_priv *bat_priv;
delayed_work = container_of(work, struct delayed_work, work);
- bat_priv = container_of(delayed_work, struct batadv_priv, tt_work);
+ priv_tt = container_of(delayed_work, struct batadv_priv_tt, work);
+ bat_priv = container_of(priv_tt, struct batadv_priv, tt);
batadv_tt_local_purge(bat_priv);
- batadv_tt_global_roam_purge(bat_priv);
+ batadv_tt_global_purge(bat_priv);
batadv_tt_req_purge(bat_priv);
batadv_tt_roam_purge(bat_priv);
@@ -2100,7 +2178,7 @@ static void batadv_tt_purge(struct work_struct *work)
void batadv_tt_free(struct batadv_priv *bat_priv)
{
- cancel_delayed_work_sync(&bat_priv->tt_work);
+ cancel_delayed_work_sync(&bat_priv->tt.work);
batadv_tt_local_table_free(bat_priv);
batadv_tt_global_table_free(bat_priv);
@@ -2108,7 +2186,7 @@ void batadv_tt_free(struct batadv_priv *bat_priv)
batadv_tt_changes_list_free(bat_priv);
batadv_tt_roam_list_free(bat_priv);
- kfree(bat_priv->tt_buff);
+ kfree(bat_priv->tt.last_changeset);
}
/* This function will enable or disable the specified flags for all the entries
@@ -2152,7 +2230,7 @@ out:
/* Purge out all the tt local entries marked with BATADV_TT_CLIENT_PENDING */
static void batadv_tt_local_purge_pending_clients(struct batadv_priv *bat_priv)
{
- struct batadv_hashtable *hash = bat_priv->tt_local_hash;
+ struct batadv_hashtable *hash = bat_priv->tt.local_hash;
struct batadv_tt_common_entry *tt_common;
struct batadv_tt_local_entry *tt_local;
struct hlist_node *node, *node_tmp;
@@ -2177,7 +2255,7 @@ static void batadv_tt_local_purge_pending_clients(struct batadv_priv *bat_priv)
"Deleting local tt entry (%pM): pending\n",
tt_common->addr);
- atomic_dec(&bat_priv->num_local_tt);
+ atomic_dec(&bat_priv->tt.local_entry_num);
hlist_del_rcu(node);
tt_local = container_of(tt_common,
struct batadv_tt_local_entry,
@@ -2195,26 +2273,26 @@ static int batadv_tt_commit_changes(struct batadv_priv *bat_priv,
{
uint16_t changed_num = 0;
- if (atomic_read(&bat_priv->tt_local_changes) < 1)
+ if (atomic_read(&bat_priv->tt.local_changes) < 1)
return -ENOENT;
- changed_num = batadv_tt_set_flags(bat_priv->tt_local_hash,
+ changed_num = batadv_tt_set_flags(bat_priv->tt.local_hash,
BATADV_TT_CLIENT_NEW, false);
/* all reset entries have to be counted as local entries */
- atomic_add(changed_num, &bat_priv->num_local_tt);
+ atomic_add(changed_num, &bat_priv->tt.local_entry_num);
batadv_tt_local_purge_pending_clients(bat_priv);
- bat_priv->tt_crc = batadv_tt_local_crc(bat_priv);
+ bat_priv->tt.local_crc = batadv_tt_local_crc(bat_priv);
/* Increment the TTVN only once per OGM interval */
- atomic_inc(&bat_priv->ttvn);
+ atomic_inc(&bat_priv->tt.vn);
batadv_dbg(BATADV_DBG_TT, bat_priv,
"Local changes committed, updating to ttvn %u\n",
- (uint8_t)atomic_read(&bat_priv->ttvn));
- bat_priv->tt_poss_change = false;
+ (uint8_t)atomic_read(&bat_priv->tt.vn));
+ bat_priv->tt.poss_change = false;
/* reset the sending counter */
- atomic_set(&bat_priv->tt_ogm_append_cnt, BATADV_TT_OGM_APPEND_MAX);
+ atomic_set(&bat_priv->tt.ogm_append_cnt, BATADV_TT_OGM_APPEND_MAX);
return batadv_tt_changes_fill_buff(bat_priv, packet_buff,
packet_buff_len, packet_min_len);
@@ -2234,7 +2312,7 @@ int batadv_tt_append_diff(struct batadv_priv *bat_priv,
/* if the changes have been sent often enough */
if ((tt_num_changes < 0) &&
- (!batadv_atomic_dec_not_zero(&bat_priv->tt_ogm_append_cnt))) {
+ (!batadv_atomic_dec_not_zero(&bat_priv->tt.ogm_append_cnt))) {
batadv_tt_realloc_packet_buff(packet_buff, packet_buff_len,
packet_min_len, packet_min_len);
tt_num_changes = 0;
@@ -2365,3 +2443,22 @@ bool batadv_tt_global_client_is_roaming(struct batadv_priv *bat_priv,
out:
return ret;
}
+
+bool batadv_tt_add_temporary_global_entry(struct batadv_priv *bat_priv,
+ struct batadv_orig_node *orig_node,
+ const unsigned char *addr)
+{
+ bool ret = false;
+
+ if (!batadv_tt_global_add(bat_priv, orig_node, addr,
+ BATADV_TT_CLIENT_TEMP,
+ atomic_read(&orig_node->last_ttvn)))
+ goto out;
+
+ batadv_dbg(BATADV_DBG_TT, bat_priv,
+ "Added temporary global client (addr: %pM orig: %pM)\n",
+ addr, orig_node->orig);
+ ret = true;
+out:
+ return ret;
+}
diff --git a/net/batman-adv/translation-table.h b/net/batman-adv/translation-table.h
index ffa87355096b..811fffd4760c 100644
--- a/net/batman-adv/translation-table.h
+++ b/net/batman-adv/translation-table.h
@@ -59,6 +59,8 @@ int batadv_tt_append_diff(struct batadv_priv *bat_priv,
int packet_min_len);
bool batadv_tt_global_client_is_roaming(struct batadv_priv *bat_priv,
uint8_t *addr);
-
+bool batadv_tt_add_temporary_global_entry(struct batadv_priv *bat_priv,
+ struct batadv_orig_node *orig_node,
+ const unsigned char *addr);
#endif /* _NET_BATMAN_ADV_TRANSLATION_TABLE_H_ */
diff --git a/net/batman-adv/types.h b/net/batman-adv/types.h
index 12635fd2c3d3..2ed82caacdca 100644
--- a/net/batman-adv/types.h
+++ b/net/batman-adv/types.h
@@ -145,6 +145,11 @@ struct batadv_bcast_duplist_entry {
#endif
enum batadv_counters {
+ BATADV_CNT_TX,
+ BATADV_CNT_TX_BYTES,
+ BATADV_CNT_TX_DROPPED,
+ BATADV_CNT_RX,
+ BATADV_CNT_RX_BYTES,
BATADV_CNT_FORWARD,
BATADV_CNT_FORWARD_BYTES,
BATADV_CNT_MGMT_TX,
@@ -160,6 +165,67 @@ enum batadv_counters {
BATADV_CNT_NUM,
};
+/**
+ * struct batadv_priv_tt - per mesh interface translation table data
+ * @vn: translation table version number
+ * @local_changes: changes registered in an originator interval
+ * @poss_change: Detect an ongoing roaming phase. If true, then this node
+ * received a roaming_adv and has to inspect every packet directed to it to
+ * check whether it still is the true destination or not. This flag will be
+ * reset to false as soon as the this node's ttvn is increased
+ * @changes_list: tracks tt local changes within an originator interval
+ * @req_list: list of pending tt_requests
+ * @local_crc: Checksum of the local table, recomputed before sending a new OGM
+ */
+struct batadv_priv_tt {
+ atomic_t vn;
+ atomic_t ogm_append_cnt;
+ atomic_t local_changes;
+ bool poss_change;
+ struct list_head changes_list;
+ struct batadv_hashtable *local_hash;
+ struct batadv_hashtable *global_hash;
+ struct list_head req_list;
+ struct list_head roam_list;
+ spinlock_t changes_list_lock; /* protects changes */
+ spinlock_t req_list_lock; /* protects req_list */
+ spinlock_t roam_list_lock; /* protects roam_list */
+ atomic_t local_entry_num;
+ uint16_t local_crc;
+ unsigned char *last_changeset;
+ int16_t last_changeset_len;
+ spinlock_t last_changeset_lock; /* protects last_changeset */
+ struct delayed_work work;
+};
+
+#ifdef CONFIG_BATMAN_ADV_BLA
+struct batadv_priv_bla {
+ atomic_t num_requests; /* number of bla requests in flight */
+ struct batadv_hashtable *claim_hash;
+ struct batadv_hashtable *backbone_hash;
+ struct batadv_bcast_duplist_entry bcast_duplist[BATADV_DUPLIST_SIZE];
+ int bcast_duplist_curr;
+ struct batadv_bla_claim_dst claim_dest;
+ struct delayed_work work;
+};
+#endif
+
+struct batadv_priv_gw {
+ struct hlist_head list;
+ spinlock_t list_lock; /* protects gw_list and curr_gw */
+ struct batadv_gw_node __rcu *curr_gw; /* rcu protected pointer */
+ atomic_t reselect;
+};
+
+struct batadv_priv_vis {
+ struct list_head send_list;
+ struct batadv_hashtable *hash;
+ spinlock_t hash_lock; /* protects hash */
+ spinlock_t list_lock; /* protects info::recv_list */
+ struct delayed_work work;
+ struct batadv_vis_info *my_info;
+};
+
struct batadv_priv {
atomic_t mesh_state;
struct net_device_stats stats;
@@ -179,64 +245,24 @@ struct batadv_priv {
atomic_t bcast_seqno;
atomic_t bcast_queue_left;
atomic_t batman_queue_left;
- atomic_t ttvn; /* translation table version number */
- atomic_t tt_ogm_append_cnt;
- atomic_t tt_local_changes; /* changes registered in a OGM interval */
- atomic_t bla_num_requests; /* number of bla requests in flight */
- /* The tt_poss_change flag is used to detect an ongoing roaming phase.
- * If true, then I received a Roaming_adv and I have to inspect every
- * packet directed to me to check whether I am still the true
- * destination or not. This flag will be reset to false as soon as I
- * increase my TTVN
- */
- bool tt_poss_change;
char num_ifaces;
struct batadv_debug_log *debug_log;
struct kobject *mesh_obj;
struct dentry *debug_dir;
struct hlist_head forw_bat_list;
struct hlist_head forw_bcast_list;
- struct hlist_head gw_list;
- struct list_head tt_changes_list; /* tracks changes in a OGM int */
- struct list_head vis_send_list;
struct batadv_hashtable *orig_hash;
- struct batadv_hashtable *tt_local_hash;
- struct batadv_hashtable *tt_global_hash;
-#ifdef CONFIG_BATMAN_ADV_BLA
- struct batadv_hashtable *claim_hash;
- struct batadv_hashtable *backbone_hash;
-#endif
- struct list_head tt_req_list; /* list of pending tt_requests */
- struct list_head tt_roam_list;
- struct batadv_hashtable *vis_hash;
-#ifdef CONFIG_BATMAN_ADV_BLA
- struct batadv_bcast_duplist_entry bcast_duplist[BATADV_DUPLIST_SIZE];
- int bcast_duplist_curr;
- struct batadv_bla_claim_dst claim_dest;
-#endif
spinlock_t forw_bat_list_lock; /* protects forw_bat_list */
spinlock_t forw_bcast_list_lock; /* protects */
- spinlock_t tt_changes_list_lock; /* protects tt_changes */
- spinlock_t tt_req_list_lock; /* protects tt_req_list */
- spinlock_t tt_roam_list_lock; /* protects tt_roam_list */
- spinlock_t gw_list_lock; /* protects gw_list and curr_gw */
- spinlock_t vis_hash_lock; /* protects vis_hash */
- spinlock_t vis_list_lock; /* protects vis_info::recv_list */
- atomic_t num_local_tt;
- /* Checksum of the local table, recomputed before sending a new OGM */
- uint16_t tt_crc;
- unsigned char *tt_buff;
- int16_t tt_buff_len;
- spinlock_t tt_buff_lock; /* protects tt_buff */
- struct delayed_work tt_work;
struct delayed_work orig_work;
- struct delayed_work vis_work;
- struct delayed_work bla_work;
- struct batadv_gw_node __rcu *curr_gw; /* rcu protected pointer */
- atomic_t gw_reselect;
struct batadv_hard_iface __rcu *primary_if; /* rcu protected pointer */
- struct batadv_vis_info *my_vis_info;
struct batadv_algo_ops *bat_algo_ops;
+#ifdef CONFIG_BATMAN_ADV_BLA
+ struct batadv_priv_bla bla;
+#endif
+ struct batadv_priv_gw gw;
+ struct batadv_priv_tt tt;
+ struct batadv_priv_vis vis;
};
struct batadv_socket_client {
@@ -258,6 +284,7 @@ struct batadv_tt_common_entry {
uint8_t addr[ETH_ALEN];
struct hlist_node hash_entry;
uint16_t flags;
+ unsigned long added_at;
atomic_t refcount;
struct rcu_head rcu;
};
@@ -277,6 +304,7 @@ struct batadv_tt_global_entry {
struct batadv_tt_orig_list_entry {
struct batadv_orig_node *orig_node;
uint8_t ttvn;
+ atomic_t refcount;
struct rcu_head rcu;
struct hlist_node list;
};
diff --git a/net/batman-adv/unicast.c b/net/batman-adv/unicast.c
index 00164645b3f7..f39723281ca1 100644
--- a/net/batman-adv/unicast.c
+++ b/net/batman-adv/unicast.c
@@ -39,6 +39,7 @@ batadv_frag_merge_packet(struct list_head *head,
struct batadv_unicast_packet *unicast_packet;
int hdr_len = sizeof(*unicast_packet);
int uni_diff = sizeof(*up) - hdr_len;
+ uint8_t *packet_pos;
up = (struct batadv_unicast_frag_packet *)skb->data;
/* set skb to the first part and tmp_skb to the second part */
@@ -65,8 +66,8 @@ batadv_frag_merge_packet(struct list_head *head,
kfree_skb(tmp_skb);
memmove(skb->data + uni_diff, skb->data, hdr_len);
- unicast_packet = (struct batadv_unicast_packet *)skb_pull(skb,
- uni_diff);
+ packet_pos = skb_pull(skb, uni_diff);
+ unicast_packet = (struct batadv_unicast_packet *)packet_pos;
unicast_packet->header.packet_type = BATADV_UNICAST;
return skb;
@@ -121,6 +122,7 @@ batadv_frag_search_packet(struct list_head *head,
{
struct batadv_frag_packet_list_entry *tfp;
struct batadv_unicast_frag_packet *tmp_up = NULL;
+ int is_head_tmp, is_head;
uint16_t search_seqno;
if (up->flags & BATADV_UNI_FRAG_HEAD)
@@ -128,6 +130,8 @@ batadv_frag_search_packet(struct list_head *head,
else
search_seqno = ntohs(up->seqno)-1;
+ is_head = !!(up->flags & BATADV_UNI_FRAG_HEAD);
+
list_for_each_entry(tfp, head, list) {
if (!tfp->skb)
@@ -139,9 +143,8 @@ batadv_frag_search_packet(struct list_head *head,
tmp_up = (struct batadv_unicast_frag_packet *)tfp->skb->data;
if (tfp->seqno == search_seqno) {
-
- if ((tmp_up->flags & BATADV_UNI_FRAG_HEAD) !=
- (up->flags & BATADV_UNI_FRAG_HEAD))
+ is_head_tmp = !!(tmp_up->flags & BATADV_UNI_FRAG_HEAD);
+ if (is_head_tmp != is_head)
return tfp;
else
goto mov_tail;
@@ -334,8 +337,7 @@ find_router:
/* copy the destination for faster routing */
memcpy(unicast_packet->dest, orig_node->orig, ETH_ALEN);
/* set the destination tt version number */
- unicast_packet->ttvn =
- (uint8_t)atomic_read(&orig_node->last_ttvn);
+ unicast_packet->ttvn = (uint8_t)atomic_read(&orig_node->last_ttvn);
/* inform the destination node that we are still missing a correct route
* for this client. The destination will receive this packet and will
diff --git a/net/batman-adv/vis.c b/net/batman-adv/vis.c
index 2a2ea0681469..5abd1454fb07 100644
--- a/net/batman-adv/vis.c
+++ b/net/batman-adv/vis.c
@@ -41,13 +41,13 @@ static void batadv_free_info(struct kref *ref)
bat_priv = info->bat_priv;
list_del_init(&info->send_list);
- spin_lock_bh(&bat_priv->vis_list_lock);
+ spin_lock_bh(&bat_priv->vis.list_lock);
list_for_each_entry_safe(entry, tmp, &info->recv_list, list) {
list_del(&entry->list);
kfree(entry);
}
- spin_unlock_bh(&bat_priv->vis_list_lock);
+ spin_unlock_bh(&bat_priv->vis.list_lock);
kfree_skb(info->skb_packet);
kfree(info);
}
@@ -94,7 +94,7 @@ static uint32_t batadv_vis_info_choose(const void *data, uint32_t size)
static struct batadv_vis_info *
batadv_vis_hash_find(struct batadv_priv *bat_priv, const void *data)
{
- struct batadv_hashtable *hash = bat_priv->vis_hash;
+ struct batadv_hashtable *hash = bat_priv->vis.hash;
struct hlist_head *head;
struct hlist_node *node;
struct batadv_vis_info *vis_info, *vis_info_tmp = NULL;
@@ -252,7 +252,7 @@ int batadv_vis_seq_print_text(struct seq_file *seq, void *offset)
struct hlist_head *head;
struct net_device *net_dev = (struct net_device *)seq->private;
struct batadv_priv *bat_priv = netdev_priv(net_dev);
- struct batadv_hashtable *hash = bat_priv->vis_hash;
+ struct batadv_hashtable *hash = bat_priv->vis.hash;
uint32_t i;
int ret = 0;
int vis_server = atomic_read(&bat_priv->vis_mode);
@@ -264,12 +264,12 @@ int batadv_vis_seq_print_text(struct seq_file *seq, void *offset)
if (vis_server == BATADV_VIS_TYPE_CLIENT_UPDATE)
goto out;
- spin_lock_bh(&bat_priv->vis_hash_lock);
+ spin_lock_bh(&bat_priv->vis.hash_lock);
for (i = 0; i < hash->size; i++) {
head = &hash->table[i];
batadv_vis_seq_print_text_bucket(seq, head);
}
- spin_unlock_bh(&bat_priv->vis_hash_lock);
+ spin_unlock_bh(&bat_priv->vis.hash_lock);
out:
if (primary_if)
@@ -285,7 +285,7 @@ static void batadv_send_list_add(struct batadv_priv *bat_priv,
{
if (list_empty(&info->send_list)) {
kref_get(&info->refcount);
- list_add_tail(&info->send_list, &bat_priv->vis_send_list);
+ list_add_tail(&info->send_list, &bat_priv->vis.send_list);
}
}
@@ -311,9 +311,9 @@ static void batadv_recv_list_add(struct batadv_priv *bat_priv,
return;
memcpy(entry->mac, mac, ETH_ALEN);
- spin_lock_bh(&bat_priv->vis_list_lock);
+ spin_lock_bh(&bat_priv->vis.list_lock);
list_add_tail(&entry->list, recv_list);
- spin_unlock_bh(&bat_priv->vis_list_lock);
+ spin_unlock_bh(&bat_priv->vis.list_lock);
}
/* returns 1 if this mac is in the recv_list */
@@ -323,14 +323,14 @@ static int batadv_recv_list_is_in(struct batadv_priv *bat_priv,
{
const struct batadv_recvlist_node *entry;
- spin_lock_bh(&bat_priv->vis_list_lock);
+ spin_lock_bh(&bat_priv->vis.list_lock);
list_for_each_entry(entry, recv_list, list) {
if (batadv_compare_eth(entry->mac, mac)) {
- spin_unlock_bh(&bat_priv->vis_list_lock);
+ spin_unlock_bh(&bat_priv->vis.list_lock);
return 1;
}
}
- spin_unlock_bh(&bat_priv->vis_list_lock);
+ spin_unlock_bh(&bat_priv->vis.list_lock);
return 0;
}
@@ -354,7 +354,7 @@ batadv_add_packet(struct batadv_priv *bat_priv,
*is_new = 0;
/* sanity check */
- if (!bat_priv->vis_hash)
+ if (!bat_priv->vis.hash)
return NULL;
/* see if the packet is already in vis_hash */
@@ -385,7 +385,7 @@ batadv_add_packet(struct batadv_priv *bat_priv,
}
}
/* remove old entry */
- batadv_hash_remove(bat_priv->vis_hash, batadv_vis_info_cmp,
+ batadv_hash_remove(bat_priv->vis.hash, batadv_vis_info_cmp,
batadv_vis_info_choose, old_info);
batadv_send_list_del(old_info);
kref_put(&old_info->refcount, batadv_free_info);
@@ -426,7 +426,7 @@ batadv_add_packet(struct batadv_priv *bat_priv,
batadv_recv_list_add(bat_priv, &info->recv_list, packet->sender_orig);
/* try to add it */
- hash_added = batadv_hash_add(bat_priv->vis_hash, batadv_vis_info_cmp,
+ hash_added = batadv_hash_add(bat_priv->vis.hash, batadv_vis_info_cmp,
batadv_vis_info_choose, info,
&info->hash_entry);
if (hash_added != 0) {
@@ -449,7 +449,7 @@ void batadv_receive_server_sync_packet(struct batadv_priv *bat_priv,
make_broadcast = (vis_server == BATADV_VIS_TYPE_SERVER_SYNC);
- spin_lock_bh(&bat_priv->vis_hash_lock);
+ spin_lock_bh(&bat_priv->vis.hash_lock);
info = batadv_add_packet(bat_priv, vis_packet, vis_info_len,
&is_new, make_broadcast);
if (!info)
@@ -461,7 +461,7 @@ void batadv_receive_server_sync_packet(struct batadv_priv *bat_priv,
if (vis_server == BATADV_VIS_TYPE_SERVER_SYNC && is_new)
batadv_send_list_add(bat_priv, info);
end:
- spin_unlock_bh(&bat_priv->vis_hash_lock);
+ spin_unlock_bh(&bat_priv->vis.hash_lock);
}
/* handle an incoming client update packet and schedule forward if needed. */
@@ -484,7 +484,7 @@ void batadv_receive_client_update_packet(struct batadv_priv *bat_priv,
batadv_is_my_mac(vis_packet->target_orig))
are_target = 1;
- spin_lock_bh(&bat_priv->vis_hash_lock);
+ spin_lock_bh(&bat_priv->vis.hash_lock);
info = batadv_add_packet(bat_priv, vis_packet, vis_info_len,
&is_new, are_target);
@@ -505,7 +505,7 @@ void batadv_receive_client_update_packet(struct batadv_priv *bat_priv,
}
end:
- spin_unlock_bh(&bat_priv->vis_hash_lock);
+ spin_unlock_bh(&bat_priv->vis.hash_lock);
}
/* Walk the originators and find the VIS server with the best tq. Set the packet
@@ -574,10 +574,11 @@ static int batadv_generate_vis_packet(struct batadv_priv *bat_priv)
struct hlist_head *head;
struct batadv_orig_node *orig_node;
struct batadv_neigh_node *router;
- struct batadv_vis_info *info = bat_priv->my_vis_info;
+ struct batadv_vis_info *info = bat_priv->vis.my_info;
struct batadv_vis_packet *packet;
struct batadv_vis_info_entry *entry;
struct batadv_tt_common_entry *tt_common_entry;
+ uint8_t *packet_pos;
int best_tq = -1;
uint32_t i;
@@ -618,8 +619,8 @@ static int batadv_generate_vis_packet(struct batadv_priv *bat_priv)
goto next;
/* fill one entry into buffer. */
- entry = (struct batadv_vis_info_entry *)
- skb_put(info->skb_packet, sizeof(*entry));
+ packet_pos = skb_put(info->skb_packet, sizeof(*entry));
+ entry = (struct batadv_vis_info_entry *)packet_pos;
memcpy(entry->src,
router->if_incoming->net_dev->dev_addr,
ETH_ALEN);
@@ -636,7 +637,7 @@ next:
rcu_read_unlock();
}
- hash = bat_priv->tt_local_hash;
+ hash = bat_priv->tt.local_hash;
for (i = 0; i < hash->size; i++) {
head = &hash->table[i];
@@ -644,9 +645,8 @@ next:
rcu_read_lock();
hlist_for_each_entry_rcu(tt_common_entry, node, head,
hash_entry) {
- entry = (struct batadv_vis_info_entry *)
- skb_put(info->skb_packet,
- sizeof(*entry));
+ packet_pos = skb_put(info->skb_packet, sizeof(*entry));
+ entry = (struct batadv_vis_info_entry *)packet_pos;
memset(entry->src, 0, ETH_ALEN);
memcpy(entry->dest, tt_common_entry->addr, ETH_ALEN);
entry->quality = 0; /* 0 means TT */
@@ -671,7 +671,7 @@ unlock:
static void batadv_purge_vis_packets(struct batadv_priv *bat_priv)
{
uint32_t i;
- struct batadv_hashtable *hash = bat_priv->vis_hash;
+ struct batadv_hashtable *hash = bat_priv->vis.hash;
struct hlist_node *node, *node_tmp;
struct hlist_head *head;
struct batadv_vis_info *info;
@@ -682,7 +682,7 @@ static void batadv_purge_vis_packets(struct batadv_priv *bat_priv)
hlist_for_each_entry_safe(info, node, node_tmp,
head, hash_entry) {
/* never purge own data. */
- if (info == bat_priv->my_vis_info)
+ if (info == bat_priv->vis.my_info)
continue;
if (batadv_has_timed_out(info->first_seen,
@@ -814,34 +814,36 @@ out:
/* called from timer; send (and maybe generate) vis packet. */
static void batadv_send_vis_packets(struct work_struct *work)
{
- struct delayed_work *delayed_work =
- container_of(work, struct delayed_work, work);
+ struct delayed_work *delayed_work;
struct batadv_priv *bat_priv;
+ struct batadv_priv_vis *priv_vis;
struct batadv_vis_info *info;
- bat_priv = container_of(delayed_work, struct batadv_priv, vis_work);
- spin_lock_bh(&bat_priv->vis_hash_lock);
+ delayed_work = container_of(work, struct delayed_work, work);
+ priv_vis = container_of(delayed_work, struct batadv_priv_vis, work);
+ bat_priv = container_of(priv_vis, struct batadv_priv, vis);
+ spin_lock_bh(&bat_priv->vis.hash_lock);
batadv_purge_vis_packets(bat_priv);
if (batadv_generate_vis_packet(bat_priv) == 0) {
/* schedule if generation was successful */
- batadv_send_list_add(bat_priv, bat_priv->my_vis_info);
+ batadv_send_list_add(bat_priv, bat_priv->vis.my_info);
}
- while (!list_empty(&bat_priv->vis_send_list)) {
- info = list_first_entry(&bat_priv->vis_send_list,
+ while (!list_empty(&bat_priv->vis.send_list)) {
+ info = list_first_entry(&bat_priv->vis.send_list,
typeof(*info), send_list);
kref_get(&info->refcount);
- spin_unlock_bh(&bat_priv->vis_hash_lock);
+ spin_unlock_bh(&bat_priv->vis.hash_lock);
batadv_send_vis_packet(bat_priv, info);
- spin_lock_bh(&bat_priv->vis_hash_lock);
+ spin_lock_bh(&bat_priv->vis.hash_lock);
batadv_send_list_del(info);
kref_put(&info->refcount, batadv_free_info);
}
- spin_unlock_bh(&bat_priv->vis_hash_lock);
+ spin_unlock_bh(&bat_priv->vis.hash_lock);
batadv_start_vis_timer(bat_priv);
}
@@ -856,37 +858,37 @@ int batadv_vis_init(struct batadv_priv *bat_priv)
unsigned long first_seen;
struct sk_buff *tmp_skb;
- if (bat_priv->vis_hash)
+ if (bat_priv->vis.hash)
return 0;
- spin_lock_bh(&bat_priv->vis_hash_lock);
+ spin_lock_bh(&bat_priv->vis.hash_lock);
- bat_priv->vis_hash = batadv_hash_new(256);
- if (!bat_priv->vis_hash) {
+ bat_priv->vis.hash = batadv_hash_new(256);
+ if (!bat_priv->vis.hash) {
pr_err("Can't initialize vis_hash\n");
goto err;
}
- bat_priv->my_vis_info = kmalloc(BATADV_MAX_VIS_PACKET_SIZE, GFP_ATOMIC);
- if (!bat_priv->my_vis_info)
+ bat_priv->vis.my_info = kmalloc(BATADV_MAX_VIS_PACKET_SIZE, GFP_ATOMIC);
+ if (!bat_priv->vis.my_info)
goto err;
len = sizeof(*packet) + BATADV_MAX_VIS_PACKET_SIZE + ETH_HLEN;
- bat_priv->my_vis_info->skb_packet = dev_alloc_skb(len);
- if (!bat_priv->my_vis_info->skb_packet)
+ bat_priv->vis.my_info->skb_packet = dev_alloc_skb(len);
+ if (!bat_priv->vis.my_info->skb_packet)
goto free_info;
- skb_reserve(bat_priv->my_vis_info->skb_packet, ETH_HLEN);
- tmp_skb = bat_priv->my_vis_info->skb_packet;
+ skb_reserve(bat_priv->vis.my_info->skb_packet, ETH_HLEN);
+ tmp_skb = bat_priv->vis.my_info->skb_packet;
packet = (struct batadv_vis_packet *)skb_put(tmp_skb, sizeof(*packet));
/* prefill the vis info */
first_seen = jiffies - msecs_to_jiffies(BATADV_VIS_INTERVAL);
- bat_priv->my_vis_info->first_seen = first_seen;
- INIT_LIST_HEAD(&bat_priv->my_vis_info->recv_list);
- INIT_LIST_HEAD(&bat_priv->my_vis_info->send_list);
- kref_init(&bat_priv->my_vis_info->refcount);
- bat_priv->my_vis_info->bat_priv = bat_priv;
+ bat_priv->vis.my_info->first_seen = first_seen;
+ INIT_LIST_HEAD(&bat_priv->vis.my_info->recv_list);
+ INIT_LIST_HEAD(&bat_priv->vis.my_info->send_list);
+ kref_init(&bat_priv->vis.my_info->refcount);
+ bat_priv->vis.my_info->bat_priv = bat_priv;
packet->header.version = BATADV_COMPAT_VERSION;
packet->header.packet_type = BATADV_VIS;
packet->header.ttl = BATADV_TTL;
@@ -894,28 +896,28 @@ int batadv_vis_init(struct batadv_priv *bat_priv)
packet->reserved = 0;
packet->entries = 0;
- INIT_LIST_HEAD(&bat_priv->vis_send_list);
+ INIT_LIST_HEAD(&bat_priv->vis.send_list);
- hash_added = batadv_hash_add(bat_priv->vis_hash, batadv_vis_info_cmp,
+ hash_added = batadv_hash_add(bat_priv->vis.hash, batadv_vis_info_cmp,
batadv_vis_info_choose,
- bat_priv->my_vis_info,
- &bat_priv->my_vis_info->hash_entry);
+ bat_priv->vis.my_info,
+ &bat_priv->vis.my_info->hash_entry);
if (hash_added != 0) {
pr_err("Can't add own vis packet into hash\n");
/* not in hash, need to remove it manually. */
- kref_put(&bat_priv->my_vis_info->refcount, batadv_free_info);
+ kref_put(&bat_priv->vis.my_info->refcount, batadv_free_info);
goto err;
}
- spin_unlock_bh(&bat_priv->vis_hash_lock);
+ spin_unlock_bh(&bat_priv->vis.hash_lock);
batadv_start_vis_timer(bat_priv);
return 0;
free_info:
- kfree(bat_priv->my_vis_info);
- bat_priv->my_vis_info = NULL;
+ kfree(bat_priv->vis.my_info);
+ bat_priv->vis.my_info = NULL;
err:
- spin_unlock_bh(&bat_priv->vis_hash_lock);
+ spin_unlock_bh(&bat_priv->vis.hash_lock);
batadv_vis_quit(bat_priv);
return -ENOMEM;
}
@@ -933,23 +935,23 @@ static void batadv_free_info_ref(struct hlist_node *node, void *arg)
/* shutdown vis-server */
void batadv_vis_quit(struct batadv_priv *bat_priv)
{
- if (!bat_priv->vis_hash)
+ if (!bat_priv->vis.hash)
return;
- cancel_delayed_work_sync(&bat_priv->vis_work);
+ cancel_delayed_work_sync(&bat_priv->vis.work);
- spin_lock_bh(&bat_priv->vis_hash_lock);
+ spin_lock_bh(&bat_priv->vis.hash_lock);
/* properly remove, kill timers ... */
- batadv_hash_delete(bat_priv->vis_hash, batadv_free_info_ref, NULL);
- bat_priv->vis_hash = NULL;
- bat_priv->my_vis_info = NULL;
- spin_unlock_bh(&bat_priv->vis_hash_lock);
+ batadv_hash_delete(bat_priv->vis.hash, batadv_free_info_ref, NULL);
+ bat_priv->vis.hash = NULL;
+ bat_priv->vis.my_info = NULL;
+ spin_unlock_bh(&bat_priv->vis.hash_lock);
}
/* schedule packets for (re)transmission */
static void batadv_start_vis_timer(struct batadv_priv *bat_priv)
{
- INIT_DELAYED_WORK(&bat_priv->vis_work, batadv_send_vis_packets);
- queue_delayed_work(batadv_event_workqueue, &bat_priv->vis_work,
+ INIT_DELAYED_WORK(&bat_priv->vis.work, batadv_send_vis_packets);
+ queue_delayed_work(batadv_event_workqueue, &bat_priv->vis.work,
msecs_to_jiffies(BATADV_VIS_INTERVAL));
}
diff --git a/net/batman-adv/vis.h b/net/batman-adv/vis.h
index 84e716ed8963..873282fa86da 100644
--- a/net/batman-adv/vis.h
+++ b/net/batman-adv/vis.h
@@ -20,7 +20,7 @@
#ifndef _NET_BATMAN_ADV_VIS_H_
#define _NET_BATMAN_ADV_VIS_H_
-/* timeout of vis packets in miliseconds */
+/* timeout of vis packets in milliseconds */
#define BATADV_VIS_TIMEOUT 200000
int batadv_vis_seq_print_text(struct seq_file *seq, void *offset);
diff --git a/net/bluetooth/a2mp.c b/net/bluetooth/a2mp.c
index 4ff0bf3ba9a5..0760d1fed6f0 100644
--- a/net/bluetooth/a2mp.c
+++ b/net/bluetooth/a2mp.c
@@ -316,7 +316,7 @@ send_rsp:
static inline int a2mp_cmd_rsp(struct amp_mgr *mgr, struct sk_buff *skb,
struct a2mp_cmd *hdr)
{
- BT_DBG("ident %d code %d", hdr->ident, hdr->code);
+ BT_DBG("ident %d code 0x%2.2x", hdr->ident, hdr->code);
skb_pull(skb, le16_to_cpu(hdr->len));
return 0;
@@ -325,17 +325,19 @@ static inline int a2mp_cmd_rsp(struct amp_mgr *mgr, struct sk_buff *skb,
/* Handle A2MP signalling */
static int a2mp_chan_recv_cb(struct l2cap_chan *chan, struct sk_buff *skb)
{
- struct a2mp_cmd *hdr = (void *) skb->data;
+ struct a2mp_cmd *hdr;
struct amp_mgr *mgr = chan->data;
int err = 0;
amp_mgr_get(mgr);
while (skb->len >= sizeof(*hdr)) {
- struct a2mp_cmd *hdr = (void *) skb->data;
- u16 len = le16_to_cpu(hdr->len);
+ u16 len;
- BT_DBG("code 0x%02x id %d len %d", hdr->code, hdr->ident, len);
+ hdr = (void *) skb->data;
+ len = le16_to_cpu(hdr->len);
+
+ BT_DBG("code 0x%2.2x id %d len %u", hdr->code, hdr->ident, len);
skb_pull(skb, sizeof(*hdr));
@@ -393,7 +395,9 @@ static int a2mp_chan_recv_cb(struct l2cap_chan *chan, struct sk_buff *skb)
if (err) {
struct a2mp_cmd_rej rej;
+
rej.reason = __constant_cpu_to_le16(0);
+ hdr = (void *) skb->data;
BT_DBG("Send A2MP Rej: cmd 0x%2.2x err %d", hdr->code, err);
@@ -412,7 +416,7 @@ static int a2mp_chan_recv_cb(struct l2cap_chan *chan, struct sk_buff *skb)
static void a2mp_chan_close_cb(struct l2cap_chan *chan)
{
- l2cap_chan_destroy(chan);
+ l2cap_chan_put(chan);
}
static void a2mp_chan_state_change_cb(struct l2cap_chan *chan, int state)
diff --git a/net/bluetooth/af_bluetooth.c b/net/bluetooth/af_bluetooth.c
index f7db5792ec64..9d49ee6d7219 100644
--- a/net/bluetooth/af_bluetooth.c
+++ b/net/bluetooth/af_bluetooth.c
@@ -28,6 +28,7 @@
#include <asm/ioctls.h>
#include <net/bluetooth/bluetooth.h>
+#include <linux/proc_fs.h>
#define VERSION "2.16"
@@ -532,6 +533,144 @@ int bt_sock_wait_state(struct sock *sk, int state, unsigned long timeo)
}
EXPORT_SYMBOL(bt_sock_wait_state);
+#ifdef CONFIG_PROC_FS
+struct bt_seq_state {
+ struct bt_sock_list *l;
+};
+
+static void *bt_seq_start(struct seq_file *seq, loff_t *pos)
+ __acquires(seq->private->l->lock)
+{
+ struct bt_seq_state *s = seq->private;
+ struct bt_sock_list *l = s->l;
+
+ read_lock(&l->lock);
+ return seq_hlist_start_head(&l->head, *pos);
+}
+
+static void *bt_seq_next(struct seq_file *seq, void *v, loff_t *pos)
+{
+ struct bt_seq_state *s = seq->private;
+ struct bt_sock_list *l = s->l;
+
+ return seq_hlist_next(v, &l->head, pos);
+}
+
+static void bt_seq_stop(struct seq_file *seq, void *v)
+ __releases(seq->private->l->lock)
+{
+ struct bt_seq_state *s = seq->private;
+ struct bt_sock_list *l = s->l;
+
+ read_unlock(&l->lock);
+}
+
+static int bt_seq_show(struct seq_file *seq, void *v)
+{
+ struct bt_seq_state *s = seq->private;
+ struct bt_sock_list *l = s->l;
+ bdaddr_t src_baswapped, dst_baswapped;
+
+ if (v == SEQ_START_TOKEN) {
+ seq_puts(seq ,"sk RefCnt Rmem Wmem User Inode Src Dst Parent");
+
+ if (l->custom_seq_show) {
+ seq_putc(seq, ' ');
+ l->custom_seq_show(seq, v);
+ }
+
+ seq_putc(seq, '\n');
+ } else {
+ struct sock *sk = sk_entry(v);
+ struct bt_sock *bt = bt_sk(sk);
+ baswap(&src_baswapped, &bt->src);
+ baswap(&dst_baswapped, &bt->dst);
+
+ seq_printf(seq, "%pK %-6d %-6u %-6u %-6u %-6lu %pM %pM %-6lu",
+ sk,
+ atomic_read(&sk->sk_refcnt),
+ sk_rmem_alloc_get(sk),
+ sk_wmem_alloc_get(sk),
+ sock_i_uid(sk),
+ sock_i_ino(sk),
+ &src_baswapped,
+ &dst_baswapped,
+ bt->parent? sock_i_ino(bt->parent): 0LU);
+
+ if (l->custom_seq_show) {
+ seq_putc(seq, ' ');
+ l->custom_seq_show(seq, v);
+ }
+
+ seq_putc(seq, '\n');
+ }
+ return 0;
+}
+
+static struct seq_operations bt_seq_ops = {
+ .start = bt_seq_start,
+ .next = bt_seq_next,
+ .stop = bt_seq_stop,
+ .show = bt_seq_show,
+};
+
+static int bt_seq_open(struct inode *inode, struct file *file)
+{
+ struct bt_sock_list *sk_list;
+ struct bt_seq_state *s;
+
+ sk_list = PDE(inode)->data;
+ s = __seq_open_private(file, &bt_seq_ops,
+ sizeof(struct bt_seq_state));
+ if (!s)
+ return -ENOMEM;
+
+ s->l = sk_list;
+ return 0;
+}
+
+int bt_procfs_init(struct module* module, struct net *net, const char *name,
+ struct bt_sock_list* sk_list,
+ int (* seq_show)(struct seq_file *, void *))
+{
+ struct proc_dir_entry * pde;
+
+ sk_list->custom_seq_show = seq_show;
+
+ sk_list->fops.owner = module;
+ sk_list->fops.open = bt_seq_open;
+ sk_list->fops.read = seq_read;
+ sk_list->fops.llseek = seq_lseek;
+ sk_list->fops.release = seq_release_private;
+
+ pde = proc_net_fops_create(net, name, 0, &sk_list->fops);
+ if (!pde)
+ return -ENOMEM;
+
+ pde->data = sk_list;
+
+ return 0;
+}
+
+void bt_procfs_cleanup(struct net *net, const char *name)
+{
+ proc_net_remove(net, name);
+}
+#else
+int bt_procfs_init(struct module* module, struct net *net, const char *name,
+ struct bt_sock_list* sk_list,
+ int (* seq_show)(struct seq_file *, void *))
+{
+ return 0;
+}
+
+void bt_procfs_cleanup(struct net *net, const char *name)
+{
+}
+#endif
+EXPORT_SYMBOL(bt_procfs_init);
+EXPORT_SYMBOL(bt_procfs_cleanup);
+
static struct net_proto_family bt_sock_family_ops = {
.owner = THIS_MODULE,
.family = PF_BLUETOOTH,
diff --git a/net/bluetooth/bnep/sock.c b/net/bluetooth/bnep/sock.c
index 5e5f5b410e0b..e7154a58465f 100644
--- a/net/bluetooth/bnep/sock.c
+++ b/net/bluetooth/bnep/sock.c
@@ -29,6 +29,10 @@
#include "bnep.h"
+static struct bt_sock_list bnep_sk_list = {
+ .lock = __RW_LOCK_UNLOCKED(bnep_sk_list.lock)
+};
+
static int bnep_sock_release(struct socket *sock)
{
struct sock *sk = sock->sk;
@@ -38,6 +42,8 @@ static int bnep_sock_release(struct socket *sock)
if (!sk)
return 0;
+ bt_sock_unlink(&bnep_sk_list, sk);
+
sock_orphan(sk);
sock_put(sk);
return 0;
@@ -58,7 +64,7 @@ static int bnep_sock_ioctl(struct socket *sock, unsigned int cmd, unsigned long
switch (cmd) {
case BNEPCONNADD:
if (!capable(CAP_NET_ADMIN))
- return -EACCES;
+ return -EPERM;
if (copy_from_user(&ca, argp, sizeof(ca)))
return -EFAULT;
@@ -84,7 +90,7 @@ static int bnep_sock_ioctl(struct socket *sock, unsigned int cmd, unsigned long
case BNEPCONNDEL:
if (!capable(CAP_NET_ADMIN))
- return -EACCES;
+ return -EPERM;
if (copy_from_user(&cd, argp, sizeof(cd)))
return -EFAULT;
@@ -204,6 +210,7 @@ static int bnep_sock_create(struct net *net, struct socket *sock, int protocol,
sk->sk_protocol = protocol;
sk->sk_state = BT_OPEN;
+ bt_sock_link(&bnep_sk_list, sk);
return 0;
}
@@ -222,19 +229,30 @@ int __init bnep_sock_init(void)
return err;
err = bt_sock_register(BTPROTO_BNEP, &bnep_sock_family_ops);
- if (err < 0)
+ if (err < 0) {
+ BT_ERR("Can't register BNEP socket");
goto error;
+ }
+
+ err = bt_procfs_init(THIS_MODULE, &init_net, "bnep", &bnep_sk_list, NULL);
+ if (err < 0) {
+ BT_ERR("Failed to create BNEP proc file");
+ bt_sock_unregister(BTPROTO_BNEP);
+ goto error;
+ }
+
+ BT_INFO("BNEP socket layer initialized");
return 0;
error:
- BT_ERR("Can't register BNEP socket");
proto_unregister(&bnep_proto);
return err;
}
void __exit bnep_sock_cleanup(void)
{
+ bt_procfs_cleanup(&init_net, "bnep");
if (bt_sock_unregister(BTPROTO_BNEP) < 0)
BT_ERR("Can't unregister BNEP socket");
diff --git a/net/bluetooth/cmtp/sock.c b/net/bluetooth/cmtp/sock.c
index 311668d14571..aacb802d1ee4 100644
--- a/net/bluetooth/cmtp/sock.c
+++ b/net/bluetooth/cmtp/sock.c
@@ -42,6 +42,10 @@
#include "cmtp.h"
+static struct bt_sock_list cmtp_sk_list = {
+ .lock = __RW_LOCK_UNLOCKED(cmtp_sk_list.lock)
+};
+
static int cmtp_sock_release(struct socket *sock)
{
struct sock *sk = sock->sk;
@@ -51,6 +55,8 @@ static int cmtp_sock_release(struct socket *sock)
if (!sk)
return 0;
+ bt_sock_unlink(&cmtp_sk_list, sk);
+
sock_orphan(sk);
sock_put(sk);
@@ -72,7 +78,7 @@ static int cmtp_sock_ioctl(struct socket *sock, unsigned int cmd, unsigned long
switch (cmd) {
case CMTPCONNADD:
if (!capable(CAP_NET_ADMIN))
- return -EACCES;
+ return -EPERM;
if (copy_from_user(&ca, argp, sizeof(ca)))
return -EFAULT;
@@ -97,7 +103,7 @@ static int cmtp_sock_ioctl(struct socket *sock, unsigned int cmd, unsigned long
case CMTPCONNDEL:
if (!capable(CAP_NET_ADMIN))
- return -EACCES;
+ return -EPERM;
if (copy_from_user(&cd, argp, sizeof(cd)))
return -EFAULT;
@@ -214,6 +220,8 @@ static int cmtp_sock_create(struct net *net, struct socket *sock, int protocol,
sk->sk_protocol = protocol;
sk->sk_state = BT_OPEN;
+ bt_sock_link(&cmtp_sk_list, sk);
+
return 0;
}
@@ -232,19 +240,30 @@ int cmtp_init_sockets(void)
return err;
err = bt_sock_register(BTPROTO_CMTP, &cmtp_sock_family_ops);
- if (err < 0)
+ if (err < 0) {
+ BT_ERR("Can't register CMTP socket");
goto error;
+ }
+
+ err = bt_procfs_init(THIS_MODULE, &init_net, "cmtp", &cmtp_sk_list, NULL);
+ if (err < 0) {
+ BT_ERR("Failed to create CMTP proc file");
+ bt_sock_unregister(BTPROTO_HIDP);
+ goto error;
+ }
+
+ BT_INFO("CMTP socket layer initialized");
return 0;
error:
- BT_ERR("Can't register CMTP socket");
proto_unregister(&cmtp_proto);
return err;
}
void cmtp_cleanup_sockets(void)
{
+ bt_procfs_cleanup(&init_net, "cmtp");
if (bt_sock_unregister(BTPROTO_CMTP) < 0)
BT_ERR("Can't unregister CMTP socket");
diff --git a/net/bluetooth/hci_conn.c b/net/bluetooth/hci_conn.c
index 5ad7da217474..b9196a44f759 100644
--- a/net/bluetooth/hci_conn.c
+++ b/net/bluetooth/hci_conn.c
@@ -29,8 +29,9 @@
#include <net/bluetooth/bluetooth.h>
#include <net/bluetooth/hci_core.h>
#include <net/bluetooth/a2mp.h>
+#include <net/bluetooth/smp.h>
-static void hci_le_connect(struct hci_conn *conn)
+static void hci_le_create_connection(struct hci_conn *conn)
{
struct hci_dev *hdev = conn->hdev;
struct hci_cp_le_create_conn cp;
@@ -54,12 +55,12 @@ static void hci_le_connect(struct hci_conn *conn)
hci_send_cmd(hdev, HCI_OP_LE_CREATE_CONN, sizeof(cp), &cp);
}
-static void hci_le_connect_cancel(struct hci_conn *conn)
+static void hci_le_create_connection_cancel(struct hci_conn *conn)
{
hci_send_cmd(conn->hdev, HCI_OP_LE_CREATE_CONN_CANCEL, 0, NULL);
}
-void hci_acl_connect(struct hci_conn *conn)
+static void hci_acl_create_connection(struct hci_conn *conn)
{
struct hci_dev *hdev = conn->hdev;
struct inquiry_entry *ie;
@@ -103,7 +104,7 @@ void hci_acl_connect(struct hci_conn *conn)
hci_send_cmd(hdev, HCI_OP_CREATE_CONN, sizeof(cp), &cp);
}
-static void hci_acl_connect_cancel(struct hci_conn *conn)
+static void hci_acl_create_connection_cancel(struct hci_conn *conn)
{
struct hci_cp_create_conn_cancel cp;
@@ -129,7 +130,7 @@ void hci_acl_disconn(struct hci_conn *conn, __u8 reason)
hci_send_cmd(conn->hdev, HCI_OP_DISCONNECT, sizeof(cp), &cp);
}
-void hci_add_sco(struct hci_conn *conn, __u16 handle)
+static void hci_add_sco(struct hci_conn *conn, __u16 handle)
{
struct hci_dev *hdev = conn->hdev;
struct hci_cp_add_sco cp;
@@ -245,9 +246,9 @@ static void hci_conn_timeout(struct work_struct *work)
case BT_CONNECT2:
if (conn->out) {
if (conn->type == ACL_LINK)
- hci_acl_connect_cancel(conn);
+ hci_acl_create_connection_cancel(conn);
else if (conn->type == LE_LINK)
- hci_le_connect_cancel(conn);
+ hci_le_create_connection_cancel(conn);
}
break;
case BT_CONFIG:
@@ -470,40 +471,37 @@ struct hci_dev *hci_get_route(bdaddr_t *dst, bdaddr_t *src)
}
EXPORT_SYMBOL(hci_get_route);
-/* Create SCO, ACL or LE connection.
- * Device _must_ be locked */
-struct hci_conn *hci_connect(struct hci_dev *hdev, int type, bdaddr_t *dst,
- __u8 dst_type, __u8 sec_level, __u8 auth_type)
+static struct hci_conn *hci_connect_le(struct hci_dev *hdev, bdaddr_t *dst,
+ u8 dst_type, u8 sec_level, u8 auth_type)
{
- struct hci_conn *acl;
- struct hci_conn *sco;
struct hci_conn *le;
- BT_DBG("%s dst %s", hdev->name, batostr(dst));
+ le = hci_conn_hash_lookup_ba(hdev, LE_LINK, dst);
+ if (!le) {
+ le = hci_conn_hash_lookup_state(hdev, LE_LINK, BT_CONNECT);
+ if (le)
+ return ERR_PTR(-EBUSY);
- if (type == LE_LINK) {
- le = hci_conn_hash_lookup_ba(hdev, LE_LINK, dst);
- if (!le) {
- le = hci_conn_hash_lookup_state(hdev, LE_LINK,
- BT_CONNECT);
- if (le)
- return ERR_PTR(-EBUSY);
+ le = hci_conn_add(hdev, LE_LINK, dst);
+ if (!le)
+ return ERR_PTR(-ENOMEM);
- le = hci_conn_add(hdev, LE_LINK, dst);
- if (!le)
- return ERR_PTR(-ENOMEM);
+ le->dst_type = bdaddr_to_le(dst_type);
+ hci_le_create_connection(le);
+ }
- le->dst_type = bdaddr_to_le(dst_type);
- hci_le_connect(le);
- }
+ le->pending_sec_level = sec_level;
+ le->auth_type = auth_type;
- le->pending_sec_level = sec_level;
- le->auth_type = auth_type;
+ hci_conn_hold(le);
- hci_conn_hold(le);
+ return le;
+}
- return le;
- }
+static struct hci_conn *hci_connect_acl(struct hci_dev *hdev, bdaddr_t *dst,
+ u8 sec_level, u8 auth_type)
+{
+ struct hci_conn *acl;
acl = hci_conn_hash_lookup_ba(hdev, ACL_LINK, dst);
if (!acl) {
@@ -518,10 +516,20 @@ struct hci_conn *hci_connect(struct hci_dev *hdev, int type, bdaddr_t *dst,
acl->sec_level = BT_SECURITY_LOW;
acl->pending_sec_level = sec_level;
acl->auth_type = auth_type;
- hci_acl_connect(acl);
+ hci_acl_create_connection(acl);
}
- if (type == ACL_LINK)
+ return acl;
+}
+
+static struct hci_conn *hci_connect_sco(struct hci_dev *hdev, int type,
+ bdaddr_t *dst, u8 sec_level, u8 auth_type)
+{
+ struct hci_conn *acl;
+ struct hci_conn *sco;
+
+ acl = hci_connect_acl(hdev, dst, sec_level, auth_type);
+ if (IS_ERR(acl))
return acl;
sco = hci_conn_hash_lookup_ba(hdev, type, dst);
@@ -555,6 +563,25 @@ struct hci_conn *hci_connect(struct hci_dev *hdev, int type, bdaddr_t *dst,
return sco;
}
+/* Create SCO, ACL or LE connection. */
+struct hci_conn *hci_connect(struct hci_dev *hdev, int type, bdaddr_t *dst,
+ __u8 dst_type, __u8 sec_level, __u8 auth_type)
+{
+ BT_DBG("%s dst %s type 0x%x", hdev->name, batostr(dst), type);
+
+ switch (type) {
+ case LE_LINK:
+ return hci_connect_le(hdev, dst, dst_type, sec_level, auth_type);
+ case ACL_LINK:
+ return hci_connect_acl(hdev, dst, sec_level, auth_type);
+ case SCO_LINK:
+ case ESCO_LINK:
+ return hci_connect_sco(hdev, type, dst, sec_level, auth_type);
+ }
+
+ return ERR_PTR(-EINVAL);
+}
+
/* Check link security requirement */
int hci_conn_check_link_mode(struct hci_conn *conn)
{
@@ -619,6 +646,9 @@ int hci_conn_security(struct hci_conn *conn, __u8 sec_level, __u8 auth_type)
{
BT_DBG("hcon %p", conn);
+ if (conn->type == LE_LINK)
+ return smp_conn_security(conn, sec_level);
+
/* For sdp we don't need the link key. */
if (sec_level == BT_SECURITY_SDP)
return 1;
@@ -771,7 +801,7 @@ void hci_conn_check_pending(struct hci_dev *hdev)
conn = hci_conn_hash_lookup_state(hdev, ACL_LINK, BT_CONNECT2);
if (conn)
- hci_acl_connect(conn);
+ hci_acl_create_connection(conn);
hci_dev_unlock(hdev);
}
@@ -909,7 +939,7 @@ struct hci_chan *hci_chan_create(struct hci_conn *conn)
return chan;
}
-int hci_chan_del(struct hci_chan *chan)
+void hci_chan_del(struct hci_chan *chan)
{
struct hci_conn *conn = chan->conn;
struct hci_dev *hdev = conn->hdev;
@@ -922,8 +952,6 @@ int hci_chan_del(struct hci_chan *chan)
skb_queue_purge(&chan->data_q);
kfree(chan);
-
- return 0;
}
void hci_chan_list_flush(struct hci_conn *conn)
diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c
index d4de5db18d5a..8a0ce706aebd 100644
--- a/net/bluetooth/hci_core.c
+++ b/net/bluetooth/hci_core.c
@@ -231,6 +231,9 @@ static void amp_init(struct hci_dev *hdev)
/* Read Local AMP Info */
hci_send_cmd(hdev, HCI_OP_READ_LOCAL_AMP_INFO, 0, NULL);
+
+ /* Read Data Blk size */
+ hci_send_cmd(hdev, HCI_OP_READ_DATA_BLOCK_SIZE, 0, NULL);
}
static void hci_init_req(struct hci_dev *hdev, unsigned long opt)
@@ -268,7 +271,6 @@ static void hci_init_req(struct hci_dev *hdev, unsigned long opt)
BT_ERR("Unknown device type %d", hdev->dev_type);
break;
}
-
}
static void hci_le_init_req(struct hci_dev *hdev, unsigned long opt)
@@ -696,7 +698,8 @@ int hci_dev_open(__u16 dev)
hci_dev_hold(hdev);
set_bit(HCI_UP, &hdev->flags);
hci_notify(hdev, HCI_DEV_UP);
- if (!test_bit(HCI_SETUP, &hdev->dev_flags)) {
+ if (!test_bit(HCI_SETUP, &hdev->dev_flags) &&
+ mgmt_valid_hdev(hdev)) {
hci_dev_lock(hdev);
mgmt_powered(hdev, 1);
hci_dev_unlock(hdev);
@@ -734,6 +737,8 @@ static int hci_dev_do_close(struct hci_dev *hdev)
cancel_work_sync(&hdev->le_scan);
+ cancel_delayed_work(&hdev->power_off);
+
hci_req_cancel(hdev, ENODEV);
hci_req_lock(hdev);
@@ -797,7 +802,8 @@ static int hci_dev_do_close(struct hci_dev *hdev)
* and no tasks are scheduled. */
hdev->close(hdev);
- if (!test_and_clear_bit(HCI_AUTO_OFF, &hdev->dev_flags)) {
+ if (!test_and_clear_bit(HCI_AUTO_OFF, &hdev->dev_flags) &&
+ mgmt_valid_hdev(hdev)) {
hci_dev_lock(hdev);
mgmt_powered(hdev, 0);
hci_dev_unlock(hdev);
@@ -1650,6 +1656,7 @@ struct hci_dev *hci_alloc_dev(void)
INIT_LIST_HEAD(&hdev->link_keys);
INIT_LIST_HEAD(&hdev->long_term_keys);
INIT_LIST_HEAD(&hdev->remote_oob_data);
+ INIT_LIST_HEAD(&hdev->conn_hash.list);
INIT_WORK(&hdev->rx_work, hci_rx_work);
INIT_WORK(&hdev->cmd_work, hci_cmd_work);
@@ -1672,7 +1679,6 @@ struct hci_dev *hci_alloc_dev(void)
hci_init_sysfs(hdev);
discovery_init(hdev);
- hci_conn_hash_init(hdev);
return hdev;
}
diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c
index 41ff978a33f9..2022b43c7353 100644
--- a/net/bluetooth/hci_event.c
+++ b/net/bluetooth/hci_event.c
@@ -29,6 +29,7 @@
#include <net/bluetooth/bluetooth.h>
#include <net/bluetooth/hci_core.h>
+#include <net/bluetooth/mgmt.h>
/* Handle HCI Event packets */
@@ -303,7 +304,7 @@ static void hci_cc_write_scan_enable(struct hci_dev *hdev, struct sk_buff *skb)
hci_dev_lock(hdev);
- if (status != 0) {
+ if (status) {
mgmt_write_scan_failed(hdev, param, status);
hdev->discov_timeout = 0;
goto done;
@@ -513,7 +514,7 @@ static void hci_setup_event_mask(struct hci_dev *hdev)
if (hdev->features[3] & LMP_RSSI_INQ)
events[4] |= 0x02; /* Inquiry Result with RSSI */
- if (hdev->features[5] & LMP_SNIFF_SUBR)
+ if (lmp_sniffsubr_capable(hdev))
events[5] |= 0x20; /* Sniff Subrating */
if (hdev->features[5] & LMP_PAUSE_ENC)
@@ -522,13 +523,13 @@ static void hci_setup_event_mask(struct hci_dev *hdev)
if (hdev->features[6] & LMP_EXT_INQ)
events[5] |= 0x40; /* Extended Inquiry Result */
- if (hdev->features[6] & LMP_NO_FLUSH)
+ if (lmp_no_flush_capable(hdev))
events[7] |= 0x01; /* Enhanced Flush Complete */
if (hdev->features[7] & LMP_LSTO)
events[6] |= 0x80; /* Link Supervision Timeout Changed */
- if (hdev->features[6] & LMP_SIMPLE_PAIR) {
+ if (lmp_ssp_capable(hdev)) {
events[6] |= 0x01; /* IO Capability Request */
events[6] |= 0x02; /* IO Capability Response */
events[6] |= 0x04; /* User Confirmation Request */
@@ -541,7 +542,7 @@ static void hci_setup_event_mask(struct hci_dev *hdev)
* Features Notification */
}
- if (hdev->features[4] & LMP_LE)
+ if (lmp_le_capable(hdev))
events[7] |= 0x20; /* LE Meta-Event */
hci_send_cmd(hdev, HCI_OP_SET_EVENT_MASK, sizeof(events), events);
@@ -623,11 +624,11 @@ static void hci_setup_link_policy(struct hci_dev *hdev)
struct hci_cp_write_def_link_policy cp;
u16 link_policy = 0;
- if (hdev->features[0] & LMP_RSWITCH)
+ if (lmp_rswitch_capable(hdev))
link_policy |= HCI_LP_RSWITCH;
if (hdev->features[0] & LMP_HOLD)
link_policy |= HCI_LP_HOLD;
- if (hdev->features[0] & LMP_SNIFF)
+ if (lmp_sniff_capable(hdev))
link_policy |= HCI_LP_SNIFF;
if (hdev->features[1] & LMP_PARK)
link_policy |= HCI_LP_PARK;
@@ -686,7 +687,7 @@ static void hci_cc_read_local_features(struct hci_dev *hdev,
hdev->esco_type |= (ESCO_HV3);
}
- if (hdev->features[3] & LMP_ESCO)
+ if (lmp_esco_capable(hdev))
hdev->esco_type |= (ESCO_EV3);
if (hdev->features[4] & LMP_EV4)
@@ -746,7 +747,7 @@ static void hci_cc_read_local_ext_features(struct hci_dev *hdev,
break;
}
- if (test_bit(HCI_INIT, &hdev->flags) && hdev->features[4] & LMP_LE)
+ if (test_bit(HCI_INIT, &hdev->flags) && lmp_le_capable(hdev))
hci_set_le_support(hdev);
done:
@@ -925,7 +926,7 @@ static void hci_cc_pin_code_reply(struct hci_dev *hdev, struct sk_buff *skb)
if (test_bit(HCI_MGMT, &hdev->dev_flags))
mgmt_pin_code_reply_complete(hdev, &rp->bdaddr, rp->status);
- if (rp->status != 0)
+ if (rp->status)
goto unlock;
cp = hci_sent_cmd_data(hdev, HCI_OP_PIN_CODE_REPLY);
@@ -1365,6 +1366,9 @@ static bool hci_resolve_next_name(struct hci_dev *hdev)
return false;
e = hci_inquiry_cache_lookup_resolve(hdev, BDADDR_ANY, NAME_NEEDED);
+ if (!e)
+ return false;
+
if (hci_resolve_name(hdev, e) == 0) {
e->name_state = NAME_PENDING;
return true;
@@ -1393,12 +1397,20 @@ static void hci_check_pending_name(struct hci_dev *hdev, struct hci_conn *conn,
return;
e = hci_inquiry_cache_lookup_resolve(hdev, bdaddr, NAME_PENDING);
- if (e) {
+ /* If the device was not found in a list of found devices names of which
+ * are pending. there is no need to continue resolving a next name as it
+ * will be done upon receiving another Remote Name Request Complete
+ * Event */
+ if (!e)
+ return;
+
+ list_del(&e->list);
+ if (name) {
e->name_state = NAME_KNOWN;
- list_del(&e->list);
- if (name)
- mgmt_remote_name(hdev, bdaddr, ACL_LINK, 0x00,
- e->data.rssi, name, name_len);
+ mgmt_remote_name(hdev, bdaddr, ACL_LINK, 0x00,
+ e->data.rssi, name, name_len);
+ } else {
+ e->name_state = NAME_NOT_KNOWN;
}
if (hci_resolve_next_name(hdev))
@@ -1614,43 +1626,30 @@ static void hci_cs_disconnect(struct hci_dev *hdev, u8 status)
static void hci_cs_le_create_conn(struct hci_dev *hdev, __u8 status)
{
- struct hci_cp_le_create_conn *cp;
struct hci_conn *conn;
BT_DBG("%s status 0x%2.2x", hdev->name, status);
- cp = hci_sent_cmd_data(hdev, HCI_OP_LE_CREATE_CONN);
- if (!cp)
- return;
+ if (status) {
+ hci_dev_lock(hdev);
- hci_dev_lock(hdev);
+ conn = hci_conn_hash_lookup_state(hdev, LE_LINK, BT_CONNECT);
+ if (!conn) {
+ hci_dev_unlock(hdev);
+ return;
+ }
- conn = hci_conn_hash_lookup_ba(hdev, LE_LINK, &cp->peer_addr);
+ BT_DBG("%s bdaddr %s conn %p", hdev->name, batostr(&conn->dst),
+ conn);
- BT_DBG("%s bdaddr %s conn %p", hdev->name, batostr(&cp->peer_addr),
- conn);
+ conn->state = BT_CLOSED;
+ mgmt_connect_failed(hdev, &conn->dst, conn->type,
+ conn->dst_type, status);
+ hci_proto_connect_cfm(conn, status);
+ hci_conn_del(conn);
- if (status) {
- if (conn && conn->state == BT_CONNECT) {
- conn->state = BT_CLOSED;
- mgmt_connect_failed(hdev, &cp->peer_addr, conn->type,
- conn->dst_type, status);
- hci_proto_connect_cfm(conn, status);
- hci_conn_del(conn);
- }
- } else {
- if (!conn) {
- conn = hci_conn_add(hdev, LE_LINK, &cp->peer_addr);
- if (conn) {
- conn->dst_type = cp->peer_addr_type;
- conn->out = true;
- } else {
- BT_ERR("No memory for new connection");
- }
- }
+ hci_dev_unlock(hdev);
}
-
- hci_dev_unlock(hdev);
}
static void hci_cs_le_start_enc(struct hci_dev *hdev, u8 status)
@@ -1762,7 +1761,12 @@ static void hci_conn_complete_evt(struct hci_dev *hdev, struct sk_buff *skb)
if (conn->type == ACL_LINK) {
conn->state = BT_CONFIG;
hci_conn_hold(conn);
- conn->disc_timeout = HCI_DISCONN_TIMEOUT;
+
+ if (!conn->out && !hci_conn_ssp_enabled(conn) &&
+ !hci_find_link_key(hdev, &ev->bdaddr))
+ conn->disc_timeout = HCI_PAIRING_TIMEOUT;
+ else
+ conn->disc_timeout = HCI_DISCONN_TIMEOUT;
} else
conn->state = BT_CONNECTED;
@@ -1888,6 +1892,22 @@ static void hci_conn_request_evt(struct hci_dev *hdev, struct sk_buff *skb)
}
}
+static u8 hci_to_mgmt_reason(u8 err)
+{
+ switch (err) {
+ case HCI_ERROR_CONNECTION_TIMEOUT:
+ return MGMT_DEV_DISCONN_TIMEOUT;
+ case HCI_ERROR_REMOTE_USER_TERM:
+ case HCI_ERROR_REMOTE_LOW_RESOURCES:
+ case HCI_ERROR_REMOTE_POWER_OFF:
+ return MGMT_DEV_DISCONN_REMOTE;
+ case HCI_ERROR_LOCAL_HOST_TERM:
+ return MGMT_DEV_DISCONN_LOCAL_HOST;
+ default:
+ return MGMT_DEV_DISCONN_UNKNOWN;
+ }
+}
+
static void hci_disconn_complete_evt(struct hci_dev *hdev, struct sk_buff *skb)
{
struct hci_ev_disconn_complete *ev = (void *) skb->data;
@@ -1906,12 +1926,15 @@ static void hci_disconn_complete_evt(struct hci_dev *hdev, struct sk_buff *skb)
if (test_and_clear_bit(HCI_CONN_MGMT_CONNECTED, &conn->flags) &&
(conn->type == ACL_LINK || conn->type == LE_LINK)) {
- if (ev->status != 0)
+ if (ev->status) {
mgmt_disconnect_failed(hdev, &conn->dst, conn->type,
conn->dst_type, ev->status);
- else
+ } else {
+ u8 reason = hci_to_mgmt_reason(ev->reason);
+
mgmt_device_disconnected(hdev, &conn->dst, conn->type,
- conn->dst_type);
+ conn->dst_type, reason);
+ }
}
if (ev->status == 0) {
@@ -3252,12 +3275,67 @@ static void hci_user_passkey_request_evt(struct hci_dev *hdev,
BT_DBG("%s", hdev->name);
- hci_dev_lock(hdev);
-
if (test_bit(HCI_MGMT, &hdev->dev_flags))
mgmt_user_passkey_request(hdev, &ev->bdaddr, ACL_LINK, 0);
+}
- hci_dev_unlock(hdev);
+static void hci_user_passkey_notify_evt(struct hci_dev *hdev,
+ struct sk_buff *skb)
+{
+ struct hci_ev_user_passkey_notify *ev = (void *) skb->data;
+ struct hci_conn *conn;
+
+ BT_DBG("%s", hdev->name);
+
+ conn = hci_conn_hash_lookup_ba(hdev, ACL_LINK, &ev->bdaddr);
+ if (!conn)
+ return;
+
+ conn->passkey_notify = __le32_to_cpu(ev->passkey);
+ conn->passkey_entered = 0;
+
+ if (test_bit(HCI_MGMT, &hdev->dev_flags))
+ mgmt_user_passkey_notify(hdev, &conn->dst, conn->type,
+ conn->dst_type, conn->passkey_notify,
+ conn->passkey_entered);
+}
+
+static void hci_keypress_notify_evt(struct hci_dev *hdev, struct sk_buff *skb)
+{
+ struct hci_ev_keypress_notify *ev = (void *) skb->data;
+ struct hci_conn *conn;
+
+ BT_DBG("%s", hdev->name);
+
+ conn = hci_conn_hash_lookup_ba(hdev, ACL_LINK, &ev->bdaddr);
+ if (!conn)
+ return;
+
+ switch (ev->type) {
+ case HCI_KEYPRESS_STARTED:
+ conn->passkey_entered = 0;
+ return;
+
+ case HCI_KEYPRESS_ENTERED:
+ conn->passkey_entered++;
+ break;
+
+ case HCI_KEYPRESS_ERASED:
+ conn->passkey_entered--;
+ break;
+
+ case HCI_KEYPRESS_CLEARED:
+ conn->passkey_entered = 0;
+ break;
+
+ case HCI_KEYPRESS_COMPLETED:
+ return;
+ }
+
+ if (test_bit(HCI_MGMT, &hdev->dev_flags))
+ mgmt_user_passkey_notify(hdev, &conn->dst, conn->type,
+ conn->dst_type, conn->passkey_notify,
+ conn->passkey_entered);
}
static void hci_simple_pair_complete_evt(struct hci_dev *hdev,
@@ -3279,7 +3357,7 @@ static void hci_simple_pair_complete_evt(struct hci_dev *hdev,
* initiated the authentication. A traditional auth_complete
* event gets always produced as initiator and is also mapped to
* the mgmt_auth_failed event */
- if (!test_bit(HCI_CONN_AUTH_PEND, &conn->flags) && ev->status != 0)
+ if (!test_bit(HCI_CONN_AUTH_PEND, &conn->flags) && ev->status)
mgmt_auth_failed(hdev, &conn->dst, conn->type, conn->dst_type,
ev->status);
@@ -3350,11 +3428,23 @@ static void hci_le_conn_complete_evt(struct hci_dev *hdev, struct sk_buff *skb)
hci_dev_lock(hdev);
- if (ev->status) {
- conn = hci_conn_hash_lookup_state(hdev, LE_LINK, BT_CONNECT);
- if (!conn)
+ conn = hci_conn_hash_lookup_state(hdev, LE_LINK, BT_CONNECT);
+ if (!conn) {
+ conn = hci_conn_add(hdev, LE_LINK, &ev->bdaddr);
+ if (!conn) {
+ BT_ERR("No memory for new connection");
goto unlock;
+ }
+ conn->dst_type = ev->bdaddr_type;
+
+ if (ev->role == LE_CONN_ROLE_MASTER) {
+ conn->out = true;
+ conn->link_mode |= HCI_LM_MASTER;
+ }
+ }
+
+ if (ev->status) {
mgmt_connect_failed(hdev, &conn->dst, conn->type,
conn->dst_type, ev->status);
hci_proto_connect_cfm(conn, ev->status);
@@ -3363,18 +3453,6 @@ static void hci_le_conn_complete_evt(struct hci_dev *hdev, struct sk_buff *skb)
goto unlock;
}
- conn = hci_conn_hash_lookup_ba(hdev, LE_LINK, &ev->bdaddr);
- if (!conn) {
- conn = hci_conn_add(hdev, LE_LINK, &ev->bdaddr);
- if (!conn) {
- BT_ERR("No memory for new connection");
- hci_dev_unlock(hdev);
- return;
- }
-
- conn->dst_type = ev->bdaddr_type;
- }
-
if (!test_and_set_bit(HCI_CONN_MGMT_CONNECTED, &conn->flags))
mgmt_device_connected(hdev, &ev->bdaddr, conn->type,
conn->dst_type, 0, NULL, 0, NULL);
@@ -3624,6 +3702,14 @@ void hci_event_packet(struct hci_dev *hdev, struct sk_buff *skb)
hci_user_passkey_request_evt(hdev, skb);
break;
+ case HCI_EV_USER_PASSKEY_NOTIFY:
+ hci_user_passkey_notify_evt(hdev, skb);
+ break;
+
+ case HCI_EV_KEYPRESS_NOTIFY:
+ hci_keypress_notify_evt(hdev, skb);
+ break;
+
case HCI_EV_SIMPLE_PAIR_COMPLETE:
hci_simple_pair_complete_evt(hdev, skb);
break;
diff --git a/net/bluetooth/hci_sock.c b/net/bluetooth/hci_sock.c
index a7f04de03d79..07f073935811 100644
--- a/net/bluetooth/hci_sock.c
+++ b/net/bluetooth/hci_sock.c
@@ -490,7 +490,7 @@ static int hci_sock_bound_ioctl(struct sock *sk, unsigned int cmd,
switch (cmd) {
case HCISETRAW:
if (!capable(CAP_NET_ADMIN))
- return -EACCES;
+ return -EPERM;
if (test_bit(HCI_QUIRK_RAW_DEVICE, &hdev->quirks))
return -EPERM;
@@ -510,12 +510,12 @@ static int hci_sock_bound_ioctl(struct sock *sk, unsigned int cmd,
case HCIBLOCKADDR:
if (!capable(CAP_NET_ADMIN))
- return -EACCES;
+ return -EPERM;
return hci_sock_blacklist_add(hdev, (void __user *) arg);
case HCIUNBLOCKADDR:
if (!capable(CAP_NET_ADMIN))
- return -EACCES;
+ return -EPERM;
return hci_sock_blacklist_del(hdev, (void __user *) arg);
default:
@@ -546,22 +546,22 @@ static int hci_sock_ioctl(struct socket *sock, unsigned int cmd,
case HCIDEVUP:
if (!capable(CAP_NET_ADMIN))
- return -EACCES;
+ return -EPERM;
return hci_dev_open(arg);
case HCIDEVDOWN:
if (!capable(CAP_NET_ADMIN))
- return -EACCES;
+ return -EPERM;
return hci_dev_close(arg);
case HCIDEVRESET:
if (!capable(CAP_NET_ADMIN))
- return -EACCES;
+ return -EPERM;
return hci_dev_reset(arg);
case HCIDEVRESTAT:
if (!capable(CAP_NET_ADMIN))
- return -EACCES;
+ return -EPERM;
return hci_dev_reset_stat(arg);
case HCISETSCAN:
@@ -573,7 +573,7 @@ static int hci_sock_ioctl(struct socket *sock, unsigned int cmd,
case HCISETACLMTU:
case HCISETSCOMTU:
if (!capable(CAP_NET_ADMIN))
- return -EACCES;
+ return -EPERM;
return hci_dev_cmd(cmd, argp);
case HCIINQUIRY:
@@ -694,6 +694,7 @@ static int hci_sock_getname(struct socket *sock, struct sockaddr *addr,
*addr_len = sizeof(*haddr);
haddr->hci_family = AF_BLUETOOTH;
haddr->hci_dev = hdev->id;
+ haddr->hci_channel= 0;
release_sock(sk);
return 0;
@@ -1009,6 +1010,7 @@ static int hci_sock_getsockopt(struct socket *sock, int level, int optname,
{
struct hci_filter *f = &hci_pi(sk)->filter;
+ memset(&uf, 0, sizeof(uf));
uf.type_mask = f->type_mask;
uf.opcode = f->opcode;
uf.event_mask[0] = *((u32 *) f->event_mask + 0);
@@ -1100,21 +1102,30 @@ int __init hci_sock_init(void)
return err;
err = bt_sock_register(BTPROTO_HCI, &hci_sock_family_ops);
- if (err < 0)
+ if (err < 0) {
+ BT_ERR("HCI socket registration failed");
+ goto error;
+ }
+
+ err = bt_procfs_init(THIS_MODULE, &init_net, "hci", &hci_sk_list, NULL);
+ if (err < 0) {
+ BT_ERR("Failed to create HCI proc file");
+ bt_sock_unregister(BTPROTO_HCI);
goto error;
+ }
BT_INFO("HCI socket layer initialized");
return 0;
error:
- BT_ERR("HCI socket registration failed");
proto_unregister(&hci_sk_proto);
return err;
}
void hci_sock_cleanup(void)
{
+ bt_procfs_cleanup(&init_net, "hci");
if (bt_sock_unregister(BTPROTO_HCI) < 0)
BT_ERR("HCI socket unregistration failed");
diff --git a/net/bluetooth/hidp/sock.c b/net/bluetooth/hidp/sock.c
index 18b3f6892a36..82a829d90b0f 100644
--- a/net/bluetooth/hidp/sock.c
+++ b/net/bluetooth/hidp/sock.c
@@ -25,6 +25,10 @@
#include "hidp.h"
+static struct bt_sock_list hidp_sk_list = {
+ .lock = __RW_LOCK_UNLOCKED(hidp_sk_list.lock)
+};
+
static int hidp_sock_release(struct socket *sock)
{
struct sock *sk = sock->sk;
@@ -34,6 +38,8 @@ static int hidp_sock_release(struct socket *sock)
if (!sk)
return 0;
+ bt_sock_unlink(&hidp_sk_list, sk);
+
sock_orphan(sk);
sock_put(sk);
@@ -56,7 +62,7 @@ static int hidp_sock_ioctl(struct socket *sock, unsigned int cmd, unsigned long
switch (cmd) {
case HIDPCONNADD:
if (!capable(CAP_NET_ADMIN))
- return -EACCES;
+ return -EPERM;
if (copy_from_user(&ca, argp, sizeof(ca)))
return -EFAULT;
@@ -91,7 +97,7 @@ static int hidp_sock_ioctl(struct socket *sock, unsigned int cmd, unsigned long
case HIDPCONNDEL:
if (!capable(CAP_NET_ADMIN))
- return -EACCES;
+ return -EPERM;
if (copy_from_user(&cd, argp, sizeof(cd)))
return -EFAULT;
@@ -253,6 +259,8 @@ static int hidp_sock_create(struct net *net, struct socket *sock, int protocol,
sk->sk_protocol = protocol;
sk->sk_state = BT_OPEN;
+ bt_sock_link(&hidp_sk_list, sk);
+
return 0;
}
@@ -271,8 +279,19 @@ int __init hidp_init_sockets(void)
return err;
err = bt_sock_register(BTPROTO_HIDP, &hidp_sock_family_ops);
- if (err < 0)
+ if (err < 0) {
+ BT_ERR("Can't register HIDP socket");
goto error;
+ }
+
+ err = bt_procfs_init(THIS_MODULE, &init_net, "hidp", &hidp_sk_list, NULL);
+ if (err < 0) {
+ BT_ERR("Failed to create HIDP proc file");
+ bt_sock_unregister(BTPROTO_HIDP);
+ goto error;
+ }
+
+ BT_INFO("HIDP socket layer initialized");
return 0;
@@ -284,6 +303,7 @@ error:
void __exit hidp_cleanup_sockets(void)
{
+ bt_procfs_cleanup(&init_net, "hidp");
if (bt_sock_unregister(BTPROTO_HIDP) < 0)
BT_ERR("Can't unregister HIDP socket");
diff --git a/net/bluetooth/l2cap_core.c b/net/bluetooth/l2cap_core.c
index a8964db04bfb..a91239dcda41 100644
--- a/net/bluetooth/l2cap_core.c
+++ b/net/bluetooth/l2cap_core.c
@@ -406,7 +406,7 @@ struct l2cap_chan *l2cap_chan_create(void)
chan->state = BT_OPEN;
- atomic_set(&chan->refcnt, 1);
+ kref_init(&chan->kref);
/* This flag is cleared in l2cap_chan_ready() */
set_bit(CONF_NOT_COMPLETE, &chan->conf_state);
@@ -416,13 +416,31 @@ struct l2cap_chan *l2cap_chan_create(void)
return chan;
}
-void l2cap_chan_destroy(struct l2cap_chan *chan)
+static void l2cap_chan_destroy(struct kref *kref)
{
+ struct l2cap_chan *chan = container_of(kref, struct l2cap_chan, kref);
+
+ BT_DBG("chan %p", chan);
+
write_lock(&chan_list_lock);
list_del(&chan->global_l);
write_unlock(&chan_list_lock);
- l2cap_chan_put(chan);
+ kfree(chan);
+}
+
+void l2cap_chan_hold(struct l2cap_chan *c)
+{
+ BT_DBG("chan %p orig refcnt %d", c, atomic_read(&c->kref.refcount));
+
+ kref_get(&c->kref);
+}
+
+void l2cap_chan_put(struct l2cap_chan *c)
+{
+ BT_DBG("chan %p orig refcnt %d", c, atomic_read(&c->kref.refcount));
+
+ kref_put(&c->kref, l2cap_chan_destroy);
}
void l2cap_chan_set_defaults(struct l2cap_chan *chan)
@@ -1008,7 +1026,7 @@ static void l2cap_send_disconn_req(struct l2cap_conn *conn, struct l2cap_chan *c
if (!conn)
return;
- if (chan->mode == L2CAP_MODE_ERTM) {
+ if (chan->mode == L2CAP_MODE_ERTM && chan->state == BT_CONNECTED) {
__clear_retrans_timer(chan);
__clear_monitor_timer(chan);
__clear_ack_timer(chan);
@@ -1181,6 +1199,7 @@ static void l2cap_le_conn_ready(struct l2cap_conn *conn)
sk = chan->sk;
hci_conn_hold(conn->hcon);
+ conn->hcon->disc_timeout = HCI_DISCONN_TIMEOUT;
bacpy(&bt_sk(sk)->src, conn->src);
bacpy(&bt_sk(sk)->dst, conn->dst);
@@ -1198,14 +1217,15 @@ clean:
static void l2cap_conn_ready(struct l2cap_conn *conn)
{
struct l2cap_chan *chan;
+ struct hci_conn *hcon = conn->hcon;
BT_DBG("conn %p", conn);
- if (!conn->hcon->out && conn->hcon->type == LE_LINK)
+ if (!hcon->out && hcon->type == LE_LINK)
l2cap_le_conn_ready(conn);
- if (conn->hcon->out && conn->hcon->type == LE_LINK)
- smp_conn_security(conn, conn->hcon->pending_sec_level);
+ if (hcon->out && hcon->type == LE_LINK)
+ smp_conn_security(hcon, hcon->pending_sec_level);
mutex_lock(&conn->chan_lock);
@@ -1218,8 +1238,8 @@ static void l2cap_conn_ready(struct l2cap_conn *conn)
continue;
}
- if (conn->hcon->type == LE_LINK) {
- if (smp_conn_security(conn, chan->sec_level))
+ if (hcon->type == LE_LINK) {
+ if (smp_conn_security(hcon, chan->sec_level))
l2cap_chan_ready(chan);
} else if (chan->chan_type != L2CAP_CHAN_CONN_ORIENTED) {
@@ -1429,7 +1449,7 @@ int l2cap_chan_connect(struct l2cap_chan *chan, __le16 psm, u16 cid,
int err;
BT_DBG("%s -> %s (type %u) psm 0x%2.2x", batostr(src), batostr(dst),
- dst_type, __le16_to_cpu(chan->psm));
+ dst_type, __le16_to_cpu(psm));
hdev = hci_get_route(dst, src);
if (!hdev)
@@ -5329,7 +5349,7 @@ int l2cap_connect_ind(struct hci_dev *hdev, bdaddr_t *bdaddr)
return exact ? lm1 : lm2;
}
-int l2cap_connect_cfm(struct hci_conn *hcon, u8 status)
+void l2cap_connect_cfm(struct hci_conn *hcon, u8 status)
{
struct l2cap_conn *conn;
@@ -5342,7 +5362,6 @@ int l2cap_connect_cfm(struct hci_conn *hcon, u8 status)
} else
l2cap_conn_del(hcon, bt_to_errno(status));
- return 0;
}
int l2cap_disconn_ind(struct hci_conn *hcon)
@@ -5356,12 +5375,11 @@ int l2cap_disconn_ind(struct hci_conn *hcon)
return conn->disc_reason;
}
-int l2cap_disconn_cfm(struct hci_conn *hcon, u8 reason)
+void l2cap_disconn_cfm(struct hci_conn *hcon, u8 reason)
{
BT_DBG("hcon %p reason %d", hcon, reason);
l2cap_conn_del(hcon, bt_to_errno(reason));
- return 0;
}
static inline void l2cap_check_encryption(struct l2cap_chan *chan, u8 encrypt)
@@ -5404,6 +5422,11 @@ int l2cap_security_cfm(struct hci_conn *hcon, u8 status, u8 encrypt)
BT_DBG("chan %p scid 0x%4.4x state %s", chan, chan->scid,
state_to_string(chan->state));
+ if (chan->chan_type == L2CAP_CHAN_CONN_FIX_A2MP) {
+ l2cap_chan_unlock(chan);
+ continue;
+ }
+
if (chan->scid == L2CAP_CID_LE_DATA) {
if (!status && encrypt) {
chan->sec_level = hcon->sec_level;
diff --git a/net/bluetooth/l2cap_sock.c b/net/bluetooth/l2cap_sock.c
index a4bb27e8427e..083f2bf065d4 100644
--- a/net/bluetooth/l2cap_sock.c
+++ b/net/bluetooth/l2cap_sock.c
@@ -34,6 +34,10 @@
#include <net/bluetooth/l2cap.h>
#include <net/bluetooth/smp.h>
+static struct bt_sock_list l2cap_sk_list = {
+ .lock = __RW_LOCK_UNLOCKED(l2cap_sk_list.lock)
+};
+
static const struct proto_ops l2cap_sock_ops;
static void l2cap_sock_init(struct sock *sk, struct sock *parent);
static struct sock *l2cap_sock_alloc(struct net *net, struct socket *sock, int proto, gfp_t prio);
@@ -245,6 +249,7 @@ static int l2cap_sock_getname(struct socket *sock, struct sockaddr *addr, int *l
BT_DBG("sock %p, sk %p", sock, sk);
+ memset(la, 0, sizeof(struct sockaddr_l2));
addr->sa_family = AF_BLUETOOTH;
*len = sizeof(struct sockaddr_l2);
@@ -615,7 +620,7 @@ static int l2cap_sock_setsockopt(struct socket *sock, int level, int optname, ch
break;
}
- if (smp_conn_security(conn, sec.level))
+ if (smp_conn_security(conn->hcon, sec.level))
break;
sk->sk_state = BT_CONFIG;
chan->state = BT_CONFIG;
@@ -823,7 +828,7 @@ static void l2cap_sock_kill(struct sock *sk)
/* Kill poor orphan */
- l2cap_chan_destroy(l2cap_pi(sk)->chan);
+ l2cap_chan_put(l2cap_pi(sk)->chan);
sock_set_flag(sk, SOCK_DEAD);
sock_put(sk);
}
@@ -886,6 +891,8 @@ static int l2cap_sock_release(struct socket *sock)
if (!sk)
return 0;
+ bt_sock_unlink(&l2cap_sk_list, sk);
+
err = l2cap_sock_shutdown(sock, 2);
sock_orphan(sk);
@@ -1174,7 +1181,7 @@ static struct sock *l2cap_sock_alloc(struct net *net, struct socket *sock, int p
chan = l2cap_chan_create();
if (!chan) {
- l2cap_sock_kill(sk);
+ sk_free(sk);
return NULL;
}
@@ -1210,6 +1217,7 @@ static int l2cap_sock_create(struct net *net, struct socket *sock, int protocol,
return -ENOMEM;
l2cap_sock_init(sk, NULL);
+ bt_sock_link(&l2cap_sk_list, sk);
return 0;
}
@@ -1248,21 +1256,30 @@ int __init l2cap_init_sockets(void)
return err;
err = bt_sock_register(BTPROTO_L2CAP, &l2cap_sock_family_ops);
- if (err < 0)
+ if (err < 0) {
+ BT_ERR("L2CAP socket registration failed");
goto error;
+ }
+
+ err = bt_procfs_init(THIS_MODULE, &init_net, "l2cap", &l2cap_sk_list, NULL);
+ if (err < 0) {
+ BT_ERR("Failed to create L2CAP proc file");
+ bt_sock_unregister(BTPROTO_L2CAP);
+ goto error;
+ }
BT_INFO("L2CAP socket layer initialized");
return 0;
error:
- BT_ERR("L2CAP socket registration failed");
proto_unregister(&l2cap_proto);
return err;
}
void l2cap_cleanup_sockets(void)
{
+ bt_procfs_cleanup(&init_net, "l2cap");
if (bt_sock_unregister(BTPROTO_L2CAP) < 0)
BT_ERR("L2CAP socket unregistration failed");
diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c
index ad6613d17ca6..aa2ea0a8142c 100644
--- a/net/bluetooth/mgmt.c
+++ b/net/bluetooth/mgmt.c
@@ -35,7 +35,7 @@
bool enable_hs;
#define MGMT_VERSION 1
-#define MGMT_REVISION 1
+#define MGMT_REVISION 2
static const u16 mgmt_commands[] = {
MGMT_OP_READ_INDEX_LIST,
@@ -99,6 +99,7 @@ static const u16 mgmt_events[] = {
MGMT_EV_DEVICE_BLOCKED,
MGMT_EV_DEVICE_UNBLOCKED,
MGMT_EV_DEVICE_UNPAIRED,
+ MGMT_EV_PASSKEY_NOTIFY,
};
/*
@@ -193,6 +194,11 @@ static u8 mgmt_status_table[] = {
MGMT_STATUS_CONNECT_FAILED, /* MAC Connection Failed */
};
+bool mgmt_valid_hdev(struct hci_dev *hdev)
+{
+ return hdev->dev_type == HCI_BREDR;
+}
+
static u8 mgmt_status(u8 hci_status)
{
if (hci_status < ARRAY_SIZE(mgmt_status_table))
@@ -317,7 +323,6 @@ static int read_index_list(struct sock *sk, struct hci_dev *hdev, void *data,
u16 data_len)
{
struct mgmt_rp_read_index_list *rp;
- struct list_head *p;
struct hci_dev *d;
size_t rp_len;
u16 count;
@@ -328,7 +333,10 @@ static int read_index_list(struct sock *sk, struct hci_dev *hdev, void *data,
read_lock(&hci_dev_list_lock);
count = 0;
- list_for_each(p, &hci_dev_list) {
+ list_for_each_entry(d, &hci_dev_list, list) {
+ if (!mgmt_valid_hdev(d))
+ continue;
+
count++;
}
@@ -346,6 +354,9 @@ static int read_index_list(struct sock *sk, struct hci_dev *hdev, void *data,
if (test_bit(HCI_SETUP, &d->dev_flags))
continue;
+ if (!mgmt_valid_hdev(d))
+ continue;
+
rp->index[i++] = cpu_to_le16(d->id);
BT_DBG("Added hci%u", d->id);
}
@@ -370,10 +381,10 @@ static u32 get_supported_settings(struct hci_dev *hdev)
settings |= MGMT_SETTING_DISCOVERABLE;
settings |= MGMT_SETTING_PAIRABLE;
- if (hdev->features[6] & LMP_SIMPLE_PAIR)
+ if (lmp_ssp_capable(hdev))
settings |= MGMT_SETTING_SSP;
- if (!(hdev->features[4] & LMP_NO_BREDR)) {
+ if (lmp_bredr_capable(hdev)) {
settings |= MGMT_SETTING_BREDR;
settings |= MGMT_SETTING_LINK_SECURITY;
}
@@ -381,7 +392,7 @@ static u32 get_supported_settings(struct hci_dev *hdev)
if (enable_hs)
settings |= MGMT_SETTING_HS;
- if (hdev->features[4] & LMP_LE)
+ if (lmp_le_capable(hdev))
settings |= MGMT_SETTING_LE;
return settings;
@@ -403,7 +414,7 @@ static u32 get_current_settings(struct hci_dev *hdev)
if (test_bit(HCI_PAIRABLE, &hdev->dev_flags))
settings |= MGMT_SETTING_PAIRABLE;
- if (!(hdev->features[4] & LMP_NO_BREDR))
+ if (lmp_bredr_capable(hdev))
settings |= MGMT_SETTING_BREDR;
if (test_bit(HCI_LE_ENABLED, &hdev->dev_flags))
@@ -1111,7 +1122,7 @@ static int set_ssp(struct sock *sk, struct hci_dev *hdev, void *data, u16 len)
hci_dev_lock(hdev);
- if (!(hdev->features[6] & LMP_SIMPLE_PAIR)) {
+ if (!lmp_ssp_capable(hdev)) {
err = cmd_status(sk, hdev->id, MGMT_OP_SET_SSP,
MGMT_STATUS_NOT_SUPPORTED);
goto failed;
@@ -1195,7 +1206,7 @@ static int set_le(struct sock *sk, struct hci_dev *hdev, void *data, u16 len)
hci_dev_lock(hdev);
- if (!(hdev->features[4] & LMP_LE)) {
+ if (!lmp_le_capable(hdev)) {
err = cmd_status(sk, hdev->id, MGMT_OP_SET_LE,
MGMT_STATUS_NOT_SUPPORTED);
goto unlock;
@@ -2191,7 +2202,7 @@ static int read_local_oob_data(struct sock *sk, struct hci_dev *hdev,
goto unlock;
}
- if (!(hdev->features[6] & LMP_SIMPLE_PAIR)) {
+ if (!lmp_ssp_capable(hdev)) {
err = cmd_status(sk, hdev->id, MGMT_OP_READ_LOCAL_OOB_DATA,
MGMT_STATUS_NOT_SUPPORTED);
goto unlock;
@@ -2820,6 +2831,9 @@ static void cmd_status_rsp(struct pending_cmd *cmd, void *data)
int mgmt_index_added(struct hci_dev *hdev)
{
+ if (!mgmt_valid_hdev(hdev))
+ return -ENOTSUPP;
+
return mgmt_event(MGMT_EV_INDEX_ADDED, hdev, NULL, 0, NULL);
}
@@ -2827,6 +2841,9 @@ int mgmt_index_removed(struct hci_dev *hdev)
{
u8 status = MGMT_STATUS_INVALID_INDEX;
+ if (!mgmt_valid_hdev(hdev))
+ return -ENOTSUPP;
+
mgmt_pending_foreach(0, hdev, cmd_status_rsp, &status);
return mgmt_event(MGMT_EV_INDEX_REMOVED, hdev, NULL, 0, NULL);
@@ -2875,6 +2892,22 @@ int mgmt_powered(struct hci_dev *hdev, u8 powered)
if (scan)
hci_send_cmd(hdev, HCI_OP_WRITE_SCAN_ENABLE, 1, &scan);
+ if (test_bit(HCI_SSP_ENABLED, &hdev->dev_flags)) {
+ u8 ssp = 1;
+
+ hci_send_cmd(hdev, HCI_OP_WRITE_SSP_MODE, 1, &ssp);
+ }
+
+ if (test_bit(HCI_LE_ENABLED, &hdev->dev_flags)) {
+ struct hci_cp_write_le_host_supported cp;
+
+ cp.le = 1;
+ cp.simul = !!(hdev->features[6] & LMP_SIMUL_LE_BR);
+
+ hci_send_cmd(hdev, HCI_OP_WRITE_LE_HOST_SUPPORTED,
+ sizeof(cp), &cp);
+ }
+
update_class(hdev);
update_name(hdev, hdev->dev_name);
update_eir(hdev);
@@ -3061,16 +3094,17 @@ static void unpair_device_rsp(struct pending_cmd *cmd, void *data)
}
int mgmt_device_disconnected(struct hci_dev *hdev, bdaddr_t *bdaddr,
- u8 link_type, u8 addr_type)
+ u8 link_type, u8 addr_type, u8 reason)
{
- struct mgmt_addr_info ev;
+ struct mgmt_ev_device_disconnected ev;
struct sock *sk = NULL;
int err;
mgmt_pending_foreach(MGMT_OP_DISCONNECT, hdev, disconnect_rsp, &sk);
- bacpy(&ev.bdaddr, bdaddr);
- ev.type = link_to_bdaddr(link_type, addr_type);
+ bacpy(&ev.addr.bdaddr, bdaddr);
+ ev.addr.type = link_to_bdaddr(link_type, addr_type);
+ ev.reason = reason;
err = mgmt_event(MGMT_EV_DEVICE_DISCONNECTED, hdev, &ev, sizeof(ev),
sk);
@@ -3259,6 +3293,22 @@ int mgmt_user_passkey_neg_reply_complete(struct hci_dev *hdev, bdaddr_t *bdaddr,
MGMT_OP_USER_PASSKEY_NEG_REPLY);
}
+int mgmt_user_passkey_notify(struct hci_dev *hdev, bdaddr_t *bdaddr,
+ u8 link_type, u8 addr_type, u32 passkey,
+ u8 entered)
+{
+ struct mgmt_ev_passkey_notify ev;
+
+ BT_DBG("%s", hdev->name);
+
+ bacpy(&ev.addr.bdaddr, bdaddr);
+ ev.addr.type = link_to_bdaddr(link_type, addr_type);
+ ev.passkey = __cpu_to_le32(passkey);
+ ev.entered = entered;
+
+ return mgmt_event(MGMT_EV_PASSKEY_NOTIFY, hdev, &ev, sizeof(ev), NULL);
+}
+
int mgmt_auth_failed(struct hci_dev *hdev, bdaddr_t *bdaddr, u8 link_type,
u8 addr_type, u8 status)
{
diff --git a/net/bluetooth/rfcomm/sock.c b/net/bluetooth/rfcomm/sock.c
index 7e1e59645c05..b3226f3658cf 100644
--- a/net/bluetooth/rfcomm/sock.c
+++ b/net/bluetooth/rfcomm/sock.c
@@ -528,6 +528,7 @@ static int rfcomm_sock_getname(struct socket *sock, struct sockaddr *addr, int *
BT_DBG("sock %p, sk %p", sock, sk);
+ memset(sa, 0, sizeof(*sa));
sa->rc_family = AF_BLUETOOTH;
sa->rc_channel = rfcomm_pi(sk)->channel;
if (peer)
@@ -822,6 +823,7 @@ static int rfcomm_sock_getsockopt(struct socket *sock, int level, int optname, c
}
sec.level = rfcomm_pi(sk)->sec_level;
+ sec.key_size = 0;
len = min_t(unsigned int, len, sizeof(sec));
if (copy_to_user(optval, (char *) &sec, len))
@@ -1033,8 +1035,17 @@ int __init rfcomm_init_sockets(void)
return err;
err = bt_sock_register(BTPROTO_RFCOMM, &rfcomm_sock_family_ops);
- if (err < 0)
+ if (err < 0) {
+ BT_ERR("RFCOMM socket layer registration failed");
+ goto error;
+ }
+
+ err = bt_procfs_init(THIS_MODULE, &init_net, "rfcomm", &rfcomm_sk_list, NULL);
+ if (err < 0) {
+ BT_ERR("Failed to create RFCOMM proc file");
+ bt_sock_unregister(BTPROTO_RFCOMM);
goto error;
+ }
if (bt_debugfs) {
rfcomm_sock_debugfs = debugfs_create_file("rfcomm", 0444,
@@ -1048,13 +1059,14 @@ int __init rfcomm_init_sockets(void)
return 0;
error:
- BT_ERR("RFCOMM socket layer registration failed");
proto_unregister(&rfcomm_proto);
return err;
}
void __exit rfcomm_cleanup_sockets(void)
{
+ bt_procfs_cleanup(&init_net, "rfcomm");
+
debugfs_remove(rfcomm_sock_debugfs);
if (bt_sock_unregister(BTPROTO_RFCOMM) < 0)
diff --git a/net/bluetooth/rfcomm/tty.c b/net/bluetooth/rfcomm/tty.c
index cb960773c002..ccc248791d50 100644
--- a/net/bluetooth/rfcomm/tty.c
+++ b/net/bluetooth/rfcomm/tty.c
@@ -278,8 +278,8 @@ out:
if (err < 0)
goto free;
- dev->tty_dev = tty_register_device(rfcomm_tty_driver, dev->id, NULL);
-
+ dev->tty_dev = tty_port_register_device(&dev->port, rfcomm_tty_driver,
+ dev->id, NULL);
if (IS_ERR(dev->tty_dev)) {
err = PTR_ERR(dev->tty_dev);
list_del(&dev->list);
@@ -456,7 +456,7 @@ static int rfcomm_get_dev_list(void __user *arg)
size = sizeof(*dl) + dev_num * sizeof(*di);
- dl = kmalloc(size, GFP_KERNEL);
+ dl = kzalloc(size, GFP_KERNEL);
if (!dl)
return -ENOMEM;
@@ -705,9 +705,9 @@ static int rfcomm_tty_open(struct tty_struct *tty, struct file *filp)
break;
}
- tty_unlock();
+ tty_unlock(tty);
schedule();
- tty_lock();
+ tty_lock(tty);
}
set_current_state(TASK_RUNNING);
remove_wait_queue(&dev->wait, &wait);
@@ -861,7 +861,7 @@ static int rfcomm_tty_ioctl(struct tty_struct *tty, unsigned int cmd, unsigned l
static void rfcomm_tty_set_termios(struct tty_struct *tty, struct ktermios *old)
{
- struct ktermios *new = tty->termios;
+ struct ktermios *new = &tty->termios;
int old_baud_rate = tty_termios_baud_rate(old);
int new_baud_rate = tty_termios_baud_rate(new);
diff --git a/net/bluetooth/sco.c b/net/bluetooth/sco.c
index 40bbe25dcff7..dc42b917aaaf 100644
--- a/net/bluetooth/sco.c
+++ b/net/bluetooth/sco.c
@@ -131,6 +131,15 @@ static int sco_conn_del(struct hci_conn *hcon, int err)
sco_sock_clear_timer(sk);
sco_chan_del(sk, err);
bh_unlock_sock(sk);
+
+ sco_conn_lock(conn);
+ conn->sk = NULL;
+ sco_pi(sk)->conn = NULL;
+ sco_conn_unlock(conn);
+
+ if (conn->hcon)
+ hci_conn_put(conn->hcon);
+
sco_sock_kill(sk);
}
@@ -821,16 +830,6 @@ static void sco_chan_del(struct sock *sk, int err)
BT_DBG("sk %p, conn %p, err %d", sk, conn, err);
- if (conn) {
- sco_conn_lock(conn);
- conn->sk = NULL;
- sco_pi(sk)->conn = NULL;
- sco_conn_unlock(conn);
-
- if (conn->hcon)
- hci_conn_put(conn->hcon);
- }
-
sk->sk_state = BT_CLOSED;
sk->sk_err = err;
sk->sk_state_change(sk);
@@ -913,7 +912,7 @@ int sco_connect_ind(struct hci_dev *hdev, bdaddr_t *bdaddr)
return lm;
}
-int sco_connect_cfm(struct hci_conn *hcon, __u8 status)
+void sco_connect_cfm(struct hci_conn *hcon, __u8 status)
{
BT_DBG("hcon %p bdaddr %s status %d", hcon, batostr(&hcon->dst), status);
if (!status) {
@@ -924,16 +923,13 @@ int sco_connect_cfm(struct hci_conn *hcon, __u8 status)
sco_conn_ready(conn);
} else
sco_conn_del(hcon, bt_to_errno(status));
-
- return 0;
}
-int sco_disconn_cfm(struct hci_conn *hcon, __u8 reason)
+void sco_disconn_cfm(struct hci_conn *hcon, __u8 reason)
{
BT_DBG("hcon %p reason %d", hcon, reason);
sco_conn_del(hcon, bt_to_errno(reason));
- return 0;
}
int sco_recv_scodata(struct hci_conn *hcon, struct sk_buff *skb)
@@ -1026,6 +1022,13 @@ int __init sco_init(void)
goto error;
}
+ err = bt_procfs_init(THIS_MODULE, &init_net, "sco", &sco_sk_list, NULL);
+ if (err < 0) {
+ BT_ERR("Failed to create SCO proc file");
+ bt_sock_unregister(BTPROTO_SCO);
+ goto error;
+ }
+
if (bt_debugfs) {
sco_debugfs = debugfs_create_file("sco", 0444, bt_debugfs,
NULL, &sco_debugfs_fops);
@@ -1044,6 +1047,8 @@ error:
void __exit sco_exit(void)
{
+ bt_procfs_cleanup(&init_net, "sco");
+
debugfs_remove(sco_debugfs);
if (bt_sock_unregister(BTPROTO_SCO) < 0)
diff --git a/net/bluetooth/smp.c b/net/bluetooth/smp.c
index 16ef0dc85a0a..8c225ef349cd 100644
--- a/net/bluetooth/smp.c
+++ b/net/bluetooth/smp.c
@@ -267,10 +267,10 @@ static void smp_failure(struct l2cap_conn *conn, u8 reason, u8 send)
mgmt_auth_failed(conn->hcon->hdev, conn->dst, hcon->type,
hcon->dst_type, reason);
- if (test_and_clear_bit(HCI_CONN_LE_SMP_PEND, &conn->hcon->flags)) {
- cancel_delayed_work_sync(&conn->security_timer);
+ cancel_delayed_work_sync(&conn->security_timer);
+
+ if (test_and_clear_bit(HCI_CONN_LE_SMP_PEND, &conn->hcon->flags))
smp_chan_destroy(conn);
- }
}
#define JUST_WORKS 0x00
@@ -579,8 +579,11 @@ static u8 smp_cmd_pairing_req(struct l2cap_conn *conn, struct sk_buff *skb)
if (!test_and_set_bit(HCI_CONN_LE_SMP_PEND, &conn->hcon->flags))
smp = smp_chan_create(conn);
+ else
+ smp = conn->smp_chan;
- smp = conn->smp_chan;
+ if (!smp)
+ return SMP_UNSPECIFIED;
smp->preq[0] = SMP_CMD_PAIRING_REQ;
memcpy(&smp->preq[1], req, sizeof(*req));
@@ -757,9 +760,9 @@ static u8 smp_cmd_security_req(struct l2cap_conn *conn, struct sk_buff *skb)
return 0;
}
-int smp_conn_security(struct l2cap_conn *conn, __u8 sec_level)
+int smp_conn_security(struct hci_conn *hcon, __u8 sec_level)
{
- struct hci_conn *hcon = conn->hcon;
+ struct l2cap_conn *conn = hcon->l2cap_data;
struct smp_chan *smp = conn->smp_chan;
__u8 authreq;
diff --git a/net/bridge/br_device.c b/net/bridge/br_device.c
index 333484537600..070e8a68cfc6 100644
--- a/net/bridge/br_device.c
+++ b/net/bridge/br_device.c
@@ -31,9 +31,11 @@ netdev_tx_t br_dev_xmit(struct sk_buff *skb, struct net_device *dev)
struct net_bridge_mdb_entry *mdst;
struct br_cpu_netstats *brstats = this_cpu_ptr(br->stats);
+ rcu_read_lock();
#ifdef CONFIG_BRIDGE_NETFILTER
if (skb->nf_bridge && (skb->nf_bridge->mask & BRNF_BRIDGED_DNAT)) {
br_nf_pre_routing_finish_bridge_slow(skb);
+ rcu_read_unlock();
return NETDEV_TX_OK;
}
#endif
@@ -48,7 +50,6 @@ netdev_tx_t br_dev_xmit(struct sk_buff *skb, struct net_device *dev)
skb_reset_mac_header(skb);
skb_pull(skb, ETH_HLEN);
- rcu_read_lock();
if (is_broadcast_ether_addr(dest))
br_flood_deliver(br, skb);
else if (is_multicast_ether_addr(dest)) {
@@ -206,24 +207,23 @@ static void br_poll_controller(struct net_device *br_dev)
static void br_netpoll_cleanup(struct net_device *dev)
{
struct net_bridge *br = netdev_priv(dev);
- struct net_bridge_port *p, *n;
+ struct net_bridge_port *p;
- list_for_each_entry_safe(p, n, &br->port_list, list) {
+ list_for_each_entry(p, &br->port_list, list)
br_netpoll_disable(p);
- }
}
-static int br_netpoll_setup(struct net_device *dev, struct netpoll_info *ni)
+static int br_netpoll_setup(struct net_device *dev, struct netpoll_info *ni,
+ gfp_t gfp)
{
struct net_bridge *br = netdev_priv(dev);
- struct net_bridge_port *p, *n;
+ struct net_bridge_port *p;
int err = 0;
- list_for_each_entry_safe(p, n, &br->port_list, list) {
+ list_for_each_entry(p, &br->port_list, list) {
if (!p->dev)
continue;
-
- err = br_netpoll_enable(p);
+ err = br_netpoll_enable(p, gfp);
if (err)
goto fail;
}
@@ -236,17 +236,17 @@ fail:
goto out;
}
-int br_netpoll_enable(struct net_bridge_port *p)
+int br_netpoll_enable(struct net_bridge_port *p, gfp_t gfp)
{
struct netpoll *np;
int err = 0;
- np = kzalloc(sizeof(*p->np), GFP_KERNEL);
+ np = kzalloc(sizeof(*p->np), gfp);
err = -ENOMEM;
if (!np)
goto out;
- err = __netpoll_setup(np, p->dev);
+ err = __netpoll_setup(np, p->dev, gfp);
if (err) {
kfree(np);
goto out;
@@ -267,11 +267,7 @@ void br_netpoll_disable(struct net_bridge_port *p)
p->np = NULL;
- /* Wait for transmitting packets to finish before freeing. */
- synchronize_rcu_bh();
-
- __netpoll_cleanup(np);
- kfree(np);
+ __netpoll_free_rcu(np);
}
#endif
diff --git a/net/bridge/br_fdb.c b/net/bridge/br_fdb.c
index d21f32383517..d9576e6de2b8 100644
--- a/net/bridge/br_fdb.c
+++ b/net/bridge/br_fdb.c
@@ -312,7 +312,7 @@ int br_fdb_fillbuf(struct net_bridge *br, void *buf,
fe->is_local = f->is_local;
if (!f->is_static)
- fe->ageing_timer_value = jiffies_to_clock_t(jiffies - f->updated);
+ fe->ageing_timer_value = jiffies_delta_to_clock_t(jiffies - f->updated);
++fe;
++num;
}
@@ -467,14 +467,14 @@ static int fdb_to_nud(const struct net_bridge_fdb_entry *fdb)
static int fdb_fill_info(struct sk_buff *skb, const struct net_bridge *br,
const struct net_bridge_fdb_entry *fdb,
- u32 pid, u32 seq, int type, unsigned int flags)
+ u32 portid, u32 seq, int type, unsigned int flags)
{
unsigned long now = jiffies;
struct nda_cacheinfo ci;
struct nlmsghdr *nlh;
struct ndmsg *ndm;
- nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ndm), flags);
+ nlh = nlmsg_put(skb, portid, seq, type, sizeof(*ndm), flags);
if (nlh == NULL)
return -EMSGSIZE;
@@ -555,7 +555,7 @@ int br_fdb_dump(struct sk_buff *skb,
goto skip;
if (fdb_fill_info(skb, br, f,
- NETLINK_CB(cb->skb).pid,
+ NETLINK_CB(cb->skb).portid,
cb->nlh->nlmsg_seq,
RTM_NEWNEIGH,
NLM_F_MULTI) < 0)
@@ -608,8 +608,9 @@ static int fdb_add_entry(struct net_bridge_port *source, const __u8 *addr,
}
/* Add new permanent fdb entry with RTM_NEWNEIGH */
-int br_fdb_add(struct ndmsg *ndm, struct net_device *dev,
- unsigned char *addr, u16 nlh_flags)
+int br_fdb_add(struct ndmsg *ndm, struct nlattr *tb[],
+ struct net_device *dev,
+ const unsigned char *addr, u16 nlh_flags)
{
struct net_bridge_port *p;
int err = 0;
@@ -639,7 +640,7 @@ int br_fdb_add(struct ndmsg *ndm, struct net_device *dev,
return err;
}
-static int fdb_delete_by_addr(struct net_bridge_port *p, u8 *addr)
+static int fdb_delete_by_addr(struct net_bridge_port *p, const u8 *addr)
{
struct net_bridge *br = p->br;
struct hlist_head *head = &br->hash[br_mac_hash(addr)];
@@ -655,7 +656,7 @@ static int fdb_delete_by_addr(struct net_bridge_port *p, u8 *addr)
/* Remove neighbor entry with RTM_DELNEIGH */
int br_fdb_delete(struct ndmsg *ndm, struct net_device *dev,
- unsigned char *addr)
+ const unsigned char *addr)
{
struct net_bridge_port *p;
int err;
diff --git a/net/bridge/br_forward.c b/net/bridge/br_forward.c
index e9466d412707..02015a505d2a 100644
--- a/net/bridge/br_forward.c
+++ b/net/bridge/br_forward.c
@@ -65,7 +65,7 @@ static void __br_deliver(const struct net_bridge_port *to, struct sk_buff *skb)
{
skb->dev = to->dev;
- if (unlikely(netpoll_tx_running(to->dev))) {
+ if (unlikely(netpoll_tx_running(to->br->dev))) {
if (packet_length(skb) > skb->dev->mtu && !skb_is_gso(skb))
kfree_skb(skb);
else {
diff --git a/net/bridge/br_if.c b/net/bridge/br_if.c
index e1144e1617be..1c8fdc3558cd 100644
--- a/net/bridge/br_if.c
+++ b/net/bridge/br_if.c
@@ -361,7 +361,7 @@ int br_add_if(struct net_bridge *br, struct net_device *dev)
if (err)
goto err2;
- if (br_netpoll_info(br) && ((err = br_netpoll_enable(p))))
+ if (br_netpoll_info(br) && ((err = br_netpoll_enable(p, GFP_KERNEL))))
goto err3;
err = netdev_set_master(dev, br->dev);
@@ -427,6 +427,10 @@ int br_del_if(struct net_bridge *br, struct net_device *dev)
if (!p || p->br != br)
return -EINVAL;
+ /* Since more than one interface can be attached to a bridge,
+ * there still maybe an alternate path for netconsole to use;
+ * therefore there is no reason for a NETDEV_RELEASE event.
+ */
del_nbp(p);
spin_lock_bh(&br->lock);
diff --git a/net/bridge/br_netlink.c b/net/bridge/br_netlink.c
index fe41260fbf38..093f527276a3 100644
--- a/net/bridge/br_netlink.c
+++ b/net/bridge/br_netlink.c
@@ -127,7 +127,7 @@ static int br_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb)
goto skip;
if (br_fill_ifinfo(skb, port,
- NETLINK_CB(cb->skb).pid,
+ NETLINK_CB(cb->skb).portid,
cb->nlh->nlmsg_seq, RTM_NEWLINK,
NLM_F_MULTI) < 0)
break;
diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h
index a768b2408edf..9b278c4ebee1 100644
--- a/net/bridge/br_private.h
+++ b/net/bridge/br_private.h
@@ -316,7 +316,7 @@ static inline void br_netpoll_send_skb(const struct net_bridge_port *p,
netpoll_send_skb(np, skb);
}
-extern int br_netpoll_enable(struct net_bridge_port *p);
+extern int br_netpoll_enable(struct net_bridge_port *p, gfp_t gfp);
extern void br_netpoll_disable(struct net_bridge_port *p);
#else
static inline struct netpoll_info *br_netpoll_info(struct net_bridge *br)
@@ -329,7 +329,7 @@ static inline void br_netpoll_send_skb(const struct net_bridge_port *p,
{
}
-static inline int br_netpoll_enable(struct net_bridge_port *p)
+static inline int br_netpoll_enable(struct net_bridge_port *p, gfp_t gfp)
{
return 0;
}
@@ -363,10 +363,10 @@ extern void br_fdb_update(struct net_bridge *br,
extern int br_fdb_delete(struct ndmsg *ndm,
struct net_device *dev,
- unsigned char *addr);
-extern int br_fdb_add(struct ndmsg *nlh,
+ const unsigned char *addr);
+extern int br_fdb_add(struct ndmsg *nlh, struct nlattr *tb[],
struct net_device *dev,
- unsigned char *addr,
+ const unsigned char *addr,
u16 nlh_flags);
extern int br_fdb_dump(struct sk_buff *skb,
struct netlink_callback *cb,
diff --git a/net/bridge/br_stp_timer.c b/net/bridge/br_stp_timer.c
index a6747e673426..c3530a81a33b 100644
--- a/net/bridge/br_stp_timer.c
+++ b/net/bridge/br_stp_timer.c
@@ -170,5 +170,5 @@ void br_stp_port_timer_init(struct net_bridge_port *p)
unsigned long br_timer_value(const struct timer_list *timer)
{
return timer_pending(timer)
- ? jiffies_to_clock_t(timer->expires - jiffies) : 0;
+ ? jiffies_delta_to_clock_t(timer->expires - jiffies) : 0;
}
diff --git a/net/bridge/br_sysfs_if.c b/net/bridge/br_sysfs_if.c
index 6229b62749e8..13b36bdc76a7 100644
--- a/net/bridge/br_sysfs_if.c
+++ b/net/bridge/br_sysfs_if.c
@@ -27,7 +27,7 @@ struct brport_attribute {
};
#define BRPORT_ATTR(_name,_mode,_show,_store) \
-struct brport_attribute brport_attr_##_name = { \
+const struct brport_attribute brport_attr_##_name = { \
.attr = {.name = __stringify(_name), \
.mode = _mode }, \
.show = _show, \
@@ -164,7 +164,7 @@ static BRPORT_ATTR(multicast_router, S_IRUGO | S_IWUSR, show_multicast_router,
store_multicast_router);
#endif
-static struct brport_attribute *brport_attrs[] = {
+static const struct brport_attribute *brport_attrs[] = {
&brport_attr_path_cost,
&brport_attr_priority,
&brport_attr_port_id,
@@ -241,7 +241,7 @@ const struct sysfs_ops brport_sysfs_ops = {
int br_sysfs_addif(struct net_bridge_port *p)
{
struct net_bridge *br = p->br;
- struct brport_attribute **a;
+ const struct brport_attribute **a;
int err;
err = sysfs_create_link(&p->kobj, &br->dev->dev.kobj,
diff --git a/net/bridge/netfilter/ebt_log.c b/net/bridge/netfilter/ebt_log.c
index f88ee537fb2b..92de5e5f9db2 100644
--- a/net/bridge/netfilter/ebt_log.c
+++ b/net/bridge/netfilter/ebt_log.c
@@ -80,7 +80,7 @@ ebt_log_packet(u_int8_t pf, unsigned int hooknum,
unsigned int bitmask;
spin_lock_bh(&ebt_log_lock);
- printk("<%c>%s IN=%s OUT=%s MAC source = %pM MAC dest = %pM proto = 0x%04x",
+ printk(KERN_SOH "%c%s IN=%s OUT=%s MAC source = %pM MAC dest = %pM proto = 0x%04x",
'0' + loginfo->u.log.level, prefix,
in ? in->name : "", out ? out->name : "",
eth_hdr(skb)->h_source, eth_hdr(skb)->h_dest,
diff --git a/net/bridge/netfilter/ebt_ulog.c b/net/bridge/netfilter/ebt_ulog.c
index 19063473c71f..3476ec469740 100644
--- a/net/bridge/netfilter/ebt_ulog.c
+++ b/net/bridge/netfilter/ebt_ulog.c
@@ -298,8 +298,7 @@ static int __init ebt_ulog_init(void)
spin_lock_init(&ulog_buffers[i].lock);
}
- ebtulognl = netlink_kernel_create(&init_net, NETLINK_NFLOG,
- THIS_MODULE, &cfg);
+ ebtulognl = netlink_kernel_create(&init_net, NETLINK_NFLOG, &cfg);
if (!ebtulognl)
ret = -ENOMEM;
else if ((ret = xt_register_target(&ebt_ulog_tg_reg)) != 0)
diff --git a/net/bridge/netfilter/ebtable_filter.c b/net/bridge/netfilter/ebtable_filter.c
index 42e6bd094574..3c2e9dced9e0 100644
--- a/net/bridge/netfilter/ebtable_filter.c
+++ b/net/bridge/netfilter/ebtable_filter.c
@@ -100,9 +100,7 @@ static struct nf_hook_ops ebt_ops_filter[] __read_mostly = {
static int __net_init frame_filter_net_init(struct net *net)
{
net->xt.frame_filter = ebt_register_table(net, &frame_filter);
- if (IS_ERR(net->xt.frame_filter))
- return PTR_ERR(net->xt.frame_filter);
- return 0;
+ return PTR_RET(net->xt.frame_filter);
}
static void __net_exit frame_filter_net_exit(struct net *net)
diff --git a/net/bridge/netfilter/ebtable_nat.c b/net/bridge/netfilter/ebtable_nat.c
index 6dc2f878ae05..10871bc77908 100644
--- a/net/bridge/netfilter/ebtable_nat.c
+++ b/net/bridge/netfilter/ebtable_nat.c
@@ -100,9 +100,7 @@ static struct nf_hook_ops ebt_ops_nat[] __read_mostly = {
static int __net_init frame_nat_net_init(struct net *net)
{
net->xt.frame_nat = ebt_register_table(net, &frame_nat);
- if (IS_ERR(net->xt.frame_nat))
- return PTR_ERR(net->xt.frame_nat);
- return 0;
+ return PTR_RET(net->xt.frame_nat);
}
static void __net_exit frame_nat_net_exit(struct net *net)
diff --git a/net/caif/caif_socket.c b/net/caif/caif_socket.c
index 78f1cdad5b33..095259f83902 100644
--- a/net/caif/caif_socket.c
+++ b/net/caif/caif_socket.c
@@ -141,7 +141,7 @@ static int caif_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
err = sk_filter(sk, skb);
if (err)
return err;
- if (!sk_rmem_schedule(sk, skb->truesize) && rx_flow_is_on(cf_sk)) {
+ if (!sk_rmem_schedule(sk, skb, skb->truesize) && rx_flow_is_on(cf_sk)) {
set_rx_flow_off(cf_sk);
net_dbg_ratelimited("sending flow OFF due to rmem_schedule\n");
caif_flow_ctrl(sk, CAIF_MODEMCMD_FLOW_OFF_REQ);
diff --git a/net/caif/cfsrvl.c b/net/caif/cfsrvl.c
index dd485f6128e8..ba217e90765e 100644
--- a/net/caif/cfsrvl.c
+++ b/net/caif/cfsrvl.c
@@ -211,9 +211,10 @@ void caif_client_register_refcnt(struct cflayer *adapt_layer,
void (*put)(struct cflayer *lyr))
{
struct cfsrvl *service;
- service = container_of(adapt_layer->dn, struct cfsrvl, layer);
- WARN_ON(adapt_layer == NULL || adapt_layer->dn == NULL);
+ if (WARN_ON(adapt_layer == NULL || adapt_layer->dn == NULL))
+ return;
+ service = container_of(adapt_layer->dn, struct cfsrvl, layer);
service->hold = hold;
service->put = put;
}
diff --git a/net/caif/chnl_net.c b/net/caif/chnl_net.c
index 69771c04ba8f..e597733affb8 100644
--- a/net/caif/chnl_net.c
+++ b/net/caif/chnl_net.c
@@ -94,6 +94,10 @@ static int chnl_recv_cb(struct cflayer *layr, struct cfpkt *pkt)
/* check the version of IP */
ip_version = skb_header_pointer(skb, 0, 1, &buf);
+ if (!ip_version) {
+ kfree_skb(skb);
+ return -EINVAL;
+ }
switch (*ip_version >> 4) {
case 4:
diff --git a/net/can/gw.c b/net/can/gw.c
index b54d5e695b03..127879c55fb6 100644
--- a/net/can/gw.c
+++ b/net/can/gw.c
@@ -549,7 +549,7 @@ static int cgw_dump_jobs(struct sk_buff *skb, struct netlink_callback *cb)
if (idx < s_idx)
goto cont;
- if (cgw_put_job(skb, gwj, RTM_NEWROUTE, NETLINK_CB(cb->skb).pid,
+ if (cgw_put_job(skb, gwj, RTM_NEWROUTE, NETLINK_CB(cb->skb).portid,
cb->nlh->nlmsg_seq, NLM_F_MULTI) < 0)
break;
cont:
diff --git a/net/ceph/ceph_common.c b/net/ceph/ceph_common.c
index ba4323bce0e9..a8020293f342 100644
--- a/net/ceph/ceph_common.c
+++ b/net/ceph/ceph_common.c
@@ -17,6 +17,7 @@
#include <linux/string.h>
+#include <linux/ceph/ceph_features.h>
#include <linux/ceph/libceph.h>
#include <linux/ceph/debugfs.h>
#include <linux/ceph/decode.h>
@@ -83,7 +84,6 @@ int ceph_check_fsid(struct ceph_client *client, struct ceph_fsid *fsid)
return -1;
}
} else {
- pr_info("client%lld fsid %pU\n", ceph_client_id(client), fsid);
memcpy(&client->fsid, fsid, sizeof(*fsid));
}
return 0;
@@ -460,27 +460,23 @@ struct ceph_client *ceph_create_client(struct ceph_options *opt, void *private,
client->auth_err = 0;
client->extra_mon_dispatch = NULL;
- client->supported_features = CEPH_FEATURE_SUPPORTED_DEFAULT |
+ client->supported_features = CEPH_FEATURES_SUPPORTED_DEFAULT |
supported_features;
- client->required_features = CEPH_FEATURE_REQUIRED_DEFAULT |
+ client->required_features = CEPH_FEATURES_REQUIRED_DEFAULT |
required_features;
/* msgr */
if (ceph_test_opt(client, MYIP))
myaddr = &client->options->my_addr;
- client->msgr = ceph_messenger_create(myaddr,
- client->supported_features,
- client->required_features);
- if (IS_ERR(client->msgr)) {
- err = PTR_ERR(client->msgr);
- goto fail;
- }
- client->msgr->nocrc = ceph_test_opt(client, NOCRC);
+ ceph_messenger_init(&client->msgr, myaddr,
+ client->supported_features,
+ client->required_features,
+ ceph_test_opt(client, NOCRC));
/* subsystems */
err = ceph_monc_init(&client->monc, client);
if (err < 0)
- goto fail_msgr;
+ goto fail;
err = ceph_osdc_init(&client->osdc, client);
if (err < 0)
goto fail_monc;
@@ -489,8 +485,6 @@ struct ceph_client *ceph_create_client(struct ceph_options *opt, void *private,
fail_monc:
ceph_monc_stop(&client->monc);
-fail_msgr:
- ceph_messenger_destroy(client->msgr);
fail:
kfree(client);
return ERR_PTR(err);
@@ -501,6 +495,8 @@ void ceph_destroy_client(struct ceph_client *client)
{
dout("destroy_client %p\n", client);
+ atomic_set(&client->msgr.stopping, 1);
+
/* unmount */
ceph_osdc_stop(&client->osdc);
@@ -508,8 +504,6 @@ void ceph_destroy_client(struct ceph_client *client)
ceph_debugfs_client_cleanup(client);
- ceph_messenger_destroy(client->msgr);
-
ceph_destroy_options(client->options);
kfree(client);
diff --git a/net/ceph/crush/mapper.c b/net/ceph/crush/mapper.c
index d7edc24333b8..35fce755ce10 100644
--- a/net/ceph/crush/mapper.c
+++ b/net/ceph/crush/mapper.c
@@ -306,7 +306,6 @@ static int crush_choose(const struct crush_map *map,
int item = 0;
int itemtype;
int collide, reject;
- const unsigned int orig_tries = 5; /* attempts before we fall back to search */
dprintk("CHOOSE%s bucket %d x %d outpos %d numrep %d\n", recurse_to_leaf ? "_LEAF" : "",
bucket->id, x, outpos, numrep);
@@ -351,8 +350,9 @@ static int crush_choose(const struct crush_map *map,
reject = 1;
goto reject;
}
- if (flocal >= (in->size>>1) &&
- flocal > orig_tries)
+ if (map->choose_local_fallback_tries > 0 &&
+ flocal >= (in->size>>1) &&
+ flocal > map->choose_local_fallback_tries)
item = bucket_perm_choose(in, x, r);
else
item = crush_bucket_choose(in, x, r);
@@ -422,13 +422,14 @@ reject:
ftotal++;
flocal++;
- if (collide && flocal < 3)
+ if (collide && flocal <= map->choose_local_tries)
/* retry locally a few times */
retry_bucket = 1;
- else if (flocal <= in->size + orig_tries)
+ else if (map->choose_local_fallback_tries > 0 &&
+ flocal <= in->size + map->choose_local_fallback_tries)
/* exhaustive bucket search */
retry_bucket = 1;
- else if (ftotal < 20)
+ else if (ftotal <= map->choose_total_tries)
/* then retry descent */
retry_descent = 1;
else
diff --git a/net/ceph/crypto.c b/net/ceph/crypto.c
index b780cb7947dd..9da7fdd3cd8a 100644
--- a/net/ceph/crypto.c
+++ b/net/ceph/crypto.c
@@ -466,6 +466,7 @@ void ceph_key_destroy(struct key *key) {
struct ceph_crypto_key *ckey = key->payload.data;
ceph_crypto_key_destroy(ckey);
+ kfree(ckey);
}
struct key_type key_type_ceph = {
diff --git a/net/ceph/crypto.h b/net/ceph/crypto.h
index 1919d1550d75..3572dc518bc9 100644
--- a/net/ceph/crypto.h
+++ b/net/ceph/crypto.h
@@ -16,7 +16,8 @@ struct ceph_crypto_key {
static inline void ceph_crypto_key_destroy(struct ceph_crypto_key *key)
{
- kfree(key->key);
+ if (key)
+ kfree(key->key);
}
extern int ceph_crypto_key_clone(struct ceph_crypto_key *dst,
diff --git a/net/ceph/debugfs.c b/net/ceph/debugfs.c
index 54b531a01121..38b5dc1823d4 100644
--- a/net/ceph/debugfs.c
+++ b/net/ceph/debugfs.c
@@ -189,6 +189,9 @@ int ceph_debugfs_client_init(struct ceph_client *client)
snprintf(name, sizeof(name), "%pU.client%lld", &client->fsid,
client->monc.auth->global_id);
+ dout("ceph_debugfs_client_init %p %s\n", client, name);
+
+ BUG_ON(client->debugfs_dir);
client->debugfs_dir = debugfs_create_dir(name, ceph_debugfs_dir);
if (!client->debugfs_dir)
goto out;
@@ -234,6 +237,7 @@ out:
void ceph_debugfs_client_cleanup(struct ceph_client *client)
{
+ dout("ceph_debugfs_client_cleanup %p\n", client);
debugfs_remove(client->debugfs_osdmap);
debugfs_remove(client->debugfs_monmap);
debugfs_remove(client->osdc.debugfs_file);
diff --git a/net/ceph/messenger.c b/net/ceph/messenger.c
index 10255e81be79..159aa8bef9e7 100644
--- a/net/ceph/messenger.c
+++ b/net/ceph/messenger.c
@@ -29,6 +29,74 @@
* the sender.
*/
+/*
+ * We track the state of the socket on a given connection using
+ * values defined below. The transition to a new socket state is
+ * handled by a function which verifies we aren't coming from an
+ * unexpected state.
+ *
+ * --------
+ * | NEW* | transient initial state
+ * --------
+ * | con_sock_state_init()
+ * v
+ * ----------
+ * | CLOSED | initialized, but no socket (and no
+ * ---------- TCP connection)
+ * ^ \
+ * | \ con_sock_state_connecting()
+ * | ----------------------
+ * | \
+ * + con_sock_state_closed() \
+ * |+--------------------------- \
+ * | \ \ \
+ * | ----------- \ \
+ * | | CLOSING | socket event; \ \
+ * | ----------- await close \ \
+ * | ^ \ |
+ * | | \ |
+ * | + con_sock_state_closing() \ |
+ * | / \ | |
+ * | / --------------- | |
+ * | / \ v v
+ * | / --------------
+ * | / -----------------| CONNECTING | socket created, TCP
+ * | | / -------------- connect initiated
+ * | | | con_sock_state_connected()
+ * | | v
+ * -------------
+ * | CONNECTED | TCP connection established
+ * -------------
+ *
+ * State values for ceph_connection->sock_state; NEW is assumed to be 0.
+ */
+
+#define CON_SOCK_STATE_NEW 0 /* -> CLOSED */
+#define CON_SOCK_STATE_CLOSED 1 /* -> CONNECTING */
+#define CON_SOCK_STATE_CONNECTING 2 /* -> CONNECTED or -> CLOSING */
+#define CON_SOCK_STATE_CONNECTED 3 /* -> CLOSING or -> CLOSED */
+#define CON_SOCK_STATE_CLOSING 4 /* -> CLOSED */
+
+/*
+ * connection states
+ */
+#define CON_STATE_CLOSED 1 /* -> PREOPEN */
+#define CON_STATE_PREOPEN 2 /* -> CONNECTING, CLOSED */
+#define CON_STATE_CONNECTING 3 /* -> NEGOTIATING, CLOSED */
+#define CON_STATE_NEGOTIATING 4 /* -> OPEN, CLOSED */
+#define CON_STATE_OPEN 5 /* -> STANDBY, CLOSED */
+#define CON_STATE_STANDBY 6 /* -> PREOPEN, CLOSED */
+
+/*
+ * ceph_connection flag bits
+ */
+#define CON_FLAG_LOSSYTX 0 /* we can close channel or drop
+ * messages on errors */
+#define CON_FLAG_KEEPALIVE_PENDING 1 /* we need to send a keepalive */
+#define CON_FLAG_WRITE_PENDING 2 /* we have data ready to send */
+#define CON_FLAG_SOCK_CLOSED 3 /* socket state changed to closed */
+#define CON_FLAG_BACKOFF 4 /* need to retry queuing delayed work */
+
/* static tag bytes (protocol control messages) */
static char tag_msg = CEPH_MSGR_TAG_MSG;
static char tag_ack = CEPH_MSGR_TAG_ACK;
@@ -147,72 +215,130 @@ void ceph_msgr_flush(void)
}
EXPORT_SYMBOL(ceph_msgr_flush);
+/* Connection socket state transition functions */
+
+static void con_sock_state_init(struct ceph_connection *con)
+{
+ int old_state;
+
+ old_state = atomic_xchg(&con->sock_state, CON_SOCK_STATE_CLOSED);
+ if (WARN_ON(old_state != CON_SOCK_STATE_NEW))
+ printk("%s: unexpected old state %d\n", __func__, old_state);
+ dout("%s con %p sock %d -> %d\n", __func__, con, old_state,
+ CON_SOCK_STATE_CLOSED);
+}
+
+static void con_sock_state_connecting(struct ceph_connection *con)
+{
+ int old_state;
+
+ old_state = atomic_xchg(&con->sock_state, CON_SOCK_STATE_CONNECTING);
+ if (WARN_ON(old_state != CON_SOCK_STATE_CLOSED))
+ printk("%s: unexpected old state %d\n", __func__, old_state);
+ dout("%s con %p sock %d -> %d\n", __func__, con, old_state,
+ CON_SOCK_STATE_CONNECTING);
+}
+
+static void con_sock_state_connected(struct ceph_connection *con)
+{
+ int old_state;
+
+ old_state = atomic_xchg(&con->sock_state, CON_SOCK_STATE_CONNECTED);
+ if (WARN_ON(old_state != CON_SOCK_STATE_CONNECTING))
+ printk("%s: unexpected old state %d\n", __func__, old_state);
+ dout("%s con %p sock %d -> %d\n", __func__, con, old_state,
+ CON_SOCK_STATE_CONNECTED);
+}
+
+static void con_sock_state_closing(struct ceph_connection *con)
+{
+ int old_state;
+
+ old_state = atomic_xchg(&con->sock_state, CON_SOCK_STATE_CLOSING);
+ if (WARN_ON(old_state != CON_SOCK_STATE_CONNECTING &&
+ old_state != CON_SOCK_STATE_CONNECTED &&
+ old_state != CON_SOCK_STATE_CLOSING))
+ printk("%s: unexpected old state %d\n", __func__, old_state);
+ dout("%s con %p sock %d -> %d\n", __func__, con, old_state,
+ CON_SOCK_STATE_CLOSING);
+}
+
+static void con_sock_state_closed(struct ceph_connection *con)
+{
+ int old_state;
+
+ old_state = atomic_xchg(&con->sock_state, CON_SOCK_STATE_CLOSED);
+ if (WARN_ON(old_state != CON_SOCK_STATE_CONNECTED &&
+ old_state != CON_SOCK_STATE_CLOSING &&
+ old_state != CON_SOCK_STATE_CONNECTING &&
+ old_state != CON_SOCK_STATE_CLOSED))
+ printk("%s: unexpected old state %d\n", __func__, old_state);
+ dout("%s con %p sock %d -> %d\n", __func__, con, old_state,
+ CON_SOCK_STATE_CLOSED);
+}
/*
* socket callback functions
*/
/* data available on socket, or listen socket received a connect */
-static void ceph_data_ready(struct sock *sk, int count_unused)
+static void ceph_sock_data_ready(struct sock *sk, int count_unused)
{
struct ceph_connection *con = sk->sk_user_data;
+ if (atomic_read(&con->msgr->stopping)) {
+ return;
+ }
if (sk->sk_state != TCP_CLOSE_WAIT) {
- dout("ceph_data_ready on %p state = %lu, queueing work\n",
+ dout("%s on %p state = %lu, queueing work\n", __func__,
con, con->state);
queue_con(con);
}
}
/* socket has buffer space for writing */
-static void ceph_write_space(struct sock *sk)
+static void ceph_sock_write_space(struct sock *sk)
{
struct ceph_connection *con = sk->sk_user_data;
/* only queue to workqueue if there is data we want to write,
* and there is sufficient space in the socket buffer to accept
- * more data. clear SOCK_NOSPACE so that ceph_write_space()
+ * more data. clear SOCK_NOSPACE so that ceph_sock_write_space()
* doesn't get called again until try_write() fills the socket
* buffer. See net/ipv4/tcp_input.c:tcp_check_space()
* and net/core/stream.c:sk_stream_write_space().
*/
- if (test_bit(WRITE_PENDING, &con->state)) {
+ if (test_bit(CON_FLAG_WRITE_PENDING, &con->flags)) {
if (sk_stream_wspace(sk) >= sk_stream_min_wspace(sk)) {
- dout("ceph_write_space %p queueing write work\n", con);
+ dout("%s %p queueing write work\n", __func__, con);
clear_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
queue_con(con);
}
} else {
- dout("ceph_write_space %p nothing to write\n", con);
+ dout("%s %p nothing to write\n", __func__, con);
}
}
/* socket's state has changed */
-static void ceph_state_change(struct sock *sk)
+static void ceph_sock_state_change(struct sock *sk)
{
struct ceph_connection *con = sk->sk_user_data;
- dout("ceph_state_change %p state = %lu sk_state = %u\n",
+ dout("%s %p state = %lu sk_state = %u\n", __func__,
con, con->state, sk->sk_state);
- if (test_bit(CLOSED, &con->state))
- return;
-
switch (sk->sk_state) {
case TCP_CLOSE:
- dout("ceph_state_change TCP_CLOSE\n");
+ dout("%s TCP_CLOSE\n", __func__);
case TCP_CLOSE_WAIT:
- dout("ceph_state_change TCP_CLOSE_WAIT\n");
- if (test_and_set_bit(SOCK_CLOSED, &con->state) == 0) {
- if (test_bit(CONNECTING, &con->state))
- con->error_msg = "connection failed";
- else
- con->error_msg = "socket closed";
- queue_con(con);
- }
+ dout("%s TCP_CLOSE_WAIT\n", __func__);
+ con_sock_state_closing(con);
+ set_bit(CON_FLAG_SOCK_CLOSED, &con->flags);
+ queue_con(con);
break;
case TCP_ESTABLISHED:
- dout("ceph_state_change TCP_ESTABLISHED\n");
+ dout("%s TCP_ESTABLISHED\n", __func__);
+ con_sock_state_connected(con);
queue_con(con);
break;
default: /* Everything else is uninteresting */
@@ -228,9 +354,9 @@ static void set_sock_callbacks(struct socket *sock,
{
struct sock *sk = sock->sk;
sk->sk_user_data = con;
- sk->sk_data_ready = ceph_data_ready;
- sk->sk_write_space = ceph_write_space;
- sk->sk_state_change = ceph_state_change;
+ sk->sk_data_ready = ceph_sock_data_ready;
+ sk->sk_write_space = ceph_sock_write_space;
+ sk->sk_state_change = ceph_sock_state_change;
}
@@ -262,6 +388,7 @@ static int ceph_tcp_connect(struct ceph_connection *con)
dout("connect %s\n", ceph_pr_addr(&con->peer_addr.in_addr));
+ con_sock_state_connecting(con);
ret = sock->ops->connect(sock, (struct sockaddr *)paddr, sizeof(*paddr),
O_NONBLOCK);
if (ret == -EINPROGRESS) {
@@ -277,7 +404,6 @@ static int ceph_tcp_connect(struct ceph_connection *con)
return ret;
}
con->sock = sock;
-
return 0;
}
@@ -333,16 +459,24 @@ static int ceph_tcp_sendpage(struct socket *sock, struct page *page,
*/
static int con_close_socket(struct ceph_connection *con)
{
- int rc;
+ int rc = 0;
dout("con_close_socket on %p sock %p\n", con, con->sock);
- if (!con->sock)
- return 0;
- set_bit(SOCK_CLOSED, &con->state);
- rc = con->sock->ops->shutdown(con->sock, SHUT_RDWR);
- sock_release(con->sock);
- con->sock = NULL;
- clear_bit(SOCK_CLOSED, &con->state);
+ if (con->sock) {
+ rc = con->sock->ops->shutdown(con->sock, SHUT_RDWR);
+ sock_release(con->sock);
+ con->sock = NULL;
+ }
+
+ /*
+ * Forcibly clear the SOCK_CLOSED flag. It gets set
+ * independent of the connection mutex, and we could have
+ * received a socket close event before we had the chance to
+ * shut the socket down.
+ */
+ clear_bit(CON_FLAG_SOCK_CLOSED, &con->flags);
+
+ con_sock_state_closed(con);
return rc;
}
@@ -353,6 +487,10 @@ static int con_close_socket(struct ceph_connection *con)
static void ceph_msg_remove(struct ceph_msg *msg)
{
list_del_init(&msg->list_head);
+ BUG_ON(msg->con == NULL);
+ msg->con->ops->put(msg->con);
+ msg->con = NULL;
+
ceph_msg_put(msg);
}
static void ceph_msg_remove_list(struct list_head *head)
@@ -372,8 +510,11 @@ static void reset_connection(struct ceph_connection *con)
ceph_msg_remove_list(&con->out_sent);
if (con->in_msg) {
+ BUG_ON(con->in_msg->con != con);
+ con->in_msg->con = NULL;
ceph_msg_put(con->in_msg);
con->in_msg = NULL;
+ con->ops->put(con);
}
con->connect_seq = 0;
@@ -391,32 +532,44 @@ static void reset_connection(struct ceph_connection *con)
*/
void ceph_con_close(struct ceph_connection *con)
{
+ mutex_lock(&con->mutex);
dout("con_close %p peer %s\n", con,
ceph_pr_addr(&con->peer_addr.in_addr));
- set_bit(CLOSED, &con->state); /* in case there's queued work */
- clear_bit(STANDBY, &con->state); /* avoid connect_seq bump */
- clear_bit(LOSSYTX, &con->state); /* so we retry next connect */
- clear_bit(KEEPALIVE_PENDING, &con->state);
- clear_bit(WRITE_PENDING, &con->state);
- mutex_lock(&con->mutex);
+ con->state = CON_STATE_CLOSED;
+
+ clear_bit(CON_FLAG_LOSSYTX, &con->flags); /* so we retry next connect */
+ clear_bit(CON_FLAG_KEEPALIVE_PENDING, &con->flags);
+ clear_bit(CON_FLAG_WRITE_PENDING, &con->flags);
+ clear_bit(CON_FLAG_KEEPALIVE_PENDING, &con->flags);
+ clear_bit(CON_FLAG_BACKOFF, &con->flags);
+
reset_connection(con);
con->peer_global_seq = 0;
cancel_delayed_work(&con->work);
+ con_close_socket(con);
mutex_unlock(&con->mutex);
- queue_con(con);
}
EXPORT_SYMBOL(ceph_con_close);
/*
* Reopen a closed connection, with a new peer address.
*/
-void ceph_con_open(struct ceph_connection *con, struct ceph_entity_addr *addr)
+void ceph_con_open(struct ceph_connection *con,
+ __u8 entity_type, __u64 entity_num,
+ struct ceph_entity_addr *addr)
{
+ mutex_lock(&con->mutex);
dout("con_open %p %s\n", con, ceph_pr_addr(&addr->in_addr));
- set_bit(OPENING, &con->state);
- clear_bit(CLOSED, &con->state);
+
+ BUG_ON(con->state != CON_STATE_CLOSED);
+ con->state = CON_STATE_PREOPEN;
+
+ con->peer_name.type = (__u8) entity_type;
+ con->peer_name.num = cpu_to_le64(entity_num);
+
memcpy(&con->peer_addr, addr, sizeof(*addr));
con->delay = 0; /* reset backoff memory */
+ mutex_unlock(&con->mutex);
queue_con(con);
}
EXPORT_SYMBOL(ceph_con_open);
@@ -430,42 +583,26 @@ bool ceph_con_opened(struct ceph_connection *con)
}
/*
- * generic get/put
- */
-struct ceph_connection *ceph_con_get(struct ceph_connection *con)
-{
- int nref = __atomic_add_unless(&con->nref, 1, 0);
-
- dout("con_get %p nref = %d -> %d\n", con, nref, nref + 1);
-
- return nref ? con : NULL;
-}
-
-void ceph_con_put(struct ceph_connection *con)
-{
- int nref = atomic_dec_return(&con->nref);
-
- BUG_ON(nref < 0);
- if (nref == 0) {
- BUG_ON(con->sock);
- kfree(con);
- }
- dout("con_put %p nref = %d -> %d\n", con, nref + 1, nref);
-}
-
-/*
* initialize a new connection.
*/
-void ceph_con_init(struct ceph_messenger *msgr, struct ceph_connection *con)
+void ceph_con_init(struct ceph_connection *con, void *private,
+ const struct ceph_connection_operations *ops,
+ struct ceph_messenger *msgr)
{
dout("con_init %p\n", con);
memset(con, 0, sizeof(*con));
- atomic_set(&con->nref, 1);
+ con->private = private;
+ con->ops = ops;
con->msgr = msgr;
+
+ con_sock_state_init(con);
+
mutex_init(&con->mutex);
INIT_LIST_HEAD(&con->out_queue);
INIT_LIST_HEAD(&con->out_sent);
INIT_DELAYED_WORK(&con->work, con_work);
+
+ con->state = CON_STATE_CLOSED;
}
EXPORT_SYMBOL(ceph_con_init);
@@ -486,14 +623,14 @@ static u32 get_global_seq(struct ceph_messenger *msgr, u32 gt)
return ret;
}
-static void ceph_con_out_kvec_reset(struct ceph_connection *con)
+static void con_out_kvec_reset(struct ceph_connection *con)
{
con->out_kvec_left = 0;
con->out_kvec_bytes = 0;
con->out_kvec_cur = &con->out_kvec[0];
}
-static void ceph_con_out_kvec_add(struct ceph_connection *con,
+static void con_out_kvec_add(struct ceph_connection *con,
size_t size, void *data)
{
int index;
@@ -507,6 +644,53 @@ static void ceph_con_out_kvec_add(struct ceph_connection *con,
con->out_kvec_bytes += size;
}
+#ifdef CONFIG_BLOCK
+static void init_bio_iter(struct bio *bio, struct bio **iter, int *seg)
+{
+ if (!bio) {
+ *iter = NULL;
+ *seg = 0;
+ return;
+ }
+ *iter = bio;
+ *seg = bio->bi_idx;
+}
+
+static void iter_bio_next(struct bio **bio_iter, int *seg)
+{
+ if (*bio_iter == NULL)
+ return;
+
+ BUG_ON(*seg >= (*bio_iter)->bi_vcnt);
+
+ (*seg)++;
+ if (*seg == (*bio_iter)->bi_vcnt)
+ init_bio_iter((*bio_iter)->bi_next, bio_iter, seg);
+}
+#endif
+
+static void prepare_write_message_data(struct ceph_connection *con)
+{
+ struct ceph_msg *msg = con->out_msg;
+
+ BUG_ON(!msg);
+ BUG_ON(!msg->hdr.data_len);
+
+ /* initialize page iterator */
+ con->out_msg_pos.page = 0;
+ if (msg->pages)
+ con->out_msg_pos.page_pos = msg->page_alignment;
+ else
+ con->out_msg_pos.page_pos = 0;
+#ifdef CONFIG_BLOCK
+ if (msg->bio)
+ init_bio_iter(msg->bio, &msg->bio_iter, &msg->bio_seg);
+#endif
+ con->out_msg_pos.data_pos = 0;
+ con->out_msg_pos.did_page_crc = false;
+ con->out_more = 1; /* data + footer will follow */
+}
+
/*
* Prepare footer for currently outgoing message, and finish things
* off. Assumes out_kvec* are already valid.. we just add on to the end.
@@ -516,6 +700,8 @@ static void prepare_write_message_footer(struct ceph_connection *con)
struct ceph_msg *m = con->out_msg;
int v = con->out_kvec_left;
+ m->footer.flags |= CEPH_MSG_FOOTER_COMPLETE;
+
dout("prepare_write_message_footer %p\n", con);
con->out_kvec_is_msg = true;
con->out_kvec[v].iov_base = &m->footer;
@@ -534,7 +720,7 @@ static void prepare_write_message(struct ceph_connection *con)
struct ceph_msg *m;
u32 crc;
- ceph_con_out_kvec_reset(con);
+ con_out_kvec_reset(con);
con->out_kvec_is_msg = true;
con->out_msg_done = false;
@@ -542,14 +728,16 @@ static void prepare_write_message(struct ceph_connection *con)
* TCP packet that's a good thing. */
if (con->in_seq > con->in_seq_acked) {
con->in_seq_acked = con->in_seq;
- ceph_con_out_kvec_add(con, sizeof (tag_ack), &tag_ack);
+ con_out_kvec_add(con, sizeof (tag_ack), &tag_ack);
con->out_temp_ack = cpu_to_le64(con->in_seq_acked);
- ceph_con_out_kvec_add(con, sizeof (con->out_temp_ack),
+ con_out_kvec_add(con, sizeof (con->out_temp_ack),
&con->out_temp_ack);
}
+ BUG_ON(list_empty(&con->out_queue));
m = list_first_entry(&con->out_queue, struct ceph_msg, list_head);
con->out_msg = m;
+ BUG_ON(m->con != con);
/* put message on sent list */
ceph_msg_get(m);
@@ -576,18 +764,18 @@ static void prepare_write_message(struct ceph_connection *con)
BUG_ON(le32_to_cpu(m->hdr.front_len) != m->front.iov_len);
/* tag + hdr + front + middle */
- ceph_con_out_kvec_add(con, sizeof (tag_msg), &tag_msg);
- ceph_con_out_kvec_add(con, sizeof (m->hdr), &m->hdr);
- ceph_con_out_kvec_add(con, m->front.iov_len, m->front.iov_base);
+ con_out_kvec_add(con, sizeof (tag_msg), &tag_msg);
+ con_out_kvec_add(con, sizeof (m->hdr), &m->hdr);
+ con_out_kvec_add(con, m->front.iov_len, m->front.iov_base);
if (m->middle)
- ceph_con_out_kvec_add(con, m->middle->vec.iov_len,
+ con_out_kvec_add(con, m->middle->vec.iov_len,
m->middle->vec.iov_base);
/* fill in crc (except data pages), footer */
crc = crc32c(0, &m->hdr, offsetof(struct ceph_msg_header, crc));
con->out_msg->hdr.crc = cpu_to_le32(crc);
- con->out_msg->footer.flags = CEPH_MSG_FOOTER_COMPLETE;
+ con->out_msg->footer.flags = 0;
crc = crc32c(0, m->front.iov_base, m->front.iov_len);
con->out_msg->footer.front_crc = cpu_to_le32(crc);
@@ -597,28 +785,19 @@ static void prepare_write_message(struct ceph_connection *con)
con->out_msg->footer.middle_crc = cpu_to_le32(crc);
} else
con->out_msg->footer.middle_crc = 0;
- con->out_msg->footer.data_crc = 0;
- dout("prepare_write_message front_crc %u data_crc %u\n",
+ dout("%s front_crc %u middle_crc %u\n", __func__,
le32_to_cpu(con->out_msg->footer.front_crc),
le32_to_cpu(con->out_msg->footer.middle_crc));
/* is there a data payload? */
- if (le32_to_cpu(m->hdr.data_len) > 0) {
- /* initialize page iterator */
- con->out_msg_pos.page = 0;
- if (m->pages)
- con->out_msg_pos.page_pos = m->page_alignment;
- else
- con->out_msg_pos.page_pos = 0;
- con->out_msg_pos.data_pos = 0;
- con->out_msg_pos.did_page_crc = false;
- con->out_more = 1; /* data + footer will follow */
- } else {
+ con->out_msg->footer.data_crc = 0;
+ if (m->hdr.data_len)
+ prepare_write_message_data(con);
+ else
/* no, queue up footer too and be done */
prepare_write_message_footer(con);
- }
- set_bit(WRITE_PENDING, &con->state);
+ set_bit(CON_FLAG_WRITE_PENDING, &con->flags);
}
/*
@@ -630,16 +809,16 @@ static void prepare_write_ack(struct ceph_connection *con)
con->in_seq_acked, con->in_seq);
con->in_seq_acked = con->in_seq;
- ceph_con_out_kvec_reset(con);
+ con_out_kvec_reset(con);
- ceph_con_out_kvec_add(con, sizeof (tag_ack), &tag_ack);
+ con_out_kvec_add(con, sizeof (tag_ack), &tag_ack);
con->out_temp_ack = cpu_to_le64(con->in_seq_acked);
- ceph_con_out_kvec_add(con, sizeof (con->out_temp_ack),
+ con_out_kvec_add(con, sizeof (con->out_temp_ack),
&con->out_temp_ack);
con->out_more = 1; /* more will follow.. eventually.. */
- set_bit(WRITE_PENDING, &con->state);
+ set_bit(CON_FLAG_WRITE_PENDING, &con->flags);
}
/*
@@ -648,9 +827,9 @@ static void prepare_write_ack(struct ceph_connection *con)
static void prepare_write_keepalive(struct ceph_connection *con)
{
dout("prepare_write_keepalive %p\n", con);
- ceph_con_out_kvec_reset(con);
- ceph_con_out_kvec_add(con, sizeof (tag_keepalive), &tag_keepalive);
- set_bit(WRITE_PENDING, &con->state);
+ con_out_kvec_reset(con);
+ con_out_kvec_add(con, sizeof (tag_keepalive), &tag_keepalive);
+ set_bit(CON_FLAG_WRITE_PENDING, &con->flags);
}
/*
@@ -665,27 +844,21 @@ static struct ceph_auth_handshake *get_connect_authorizer(struct ceph_connection
if (!con->ops->get_authorizer) {
con->out_connect.authorizer_protocol = CEPH_AUTH_UNKNOWN;
con->out_connect.authorizer_len = 0;
-
return NULL;
}
/* Can't hold the mutex while getting authorizer */
-
mutex_unlock(&con->mutex);
-
auth = con->ops->get_authorizer(con, auth_proto, con->auth_retry);
-
mutex_lock(&con->mutex);
if (IS_ERR(auth))
return auth;
- if (test_bit(CLOSED, &con->state) || test_bit(OPENING, &con->state))
+ if (con->state != CON_STATE_NEGOTIATING)
return ERR_PTR(-EAGAIN);
con->auth_reply_buf = auth->authorizer_reply_buf;
con->auth_reply_buf_len = auth->authorizer_reply_buf_len;
-
-
return auth;
}
@@ -694,12 +867,12 @@ static struct ceph_auth_handshake *get_connect_authorizer(struct ceph_connection
*/
static void prepare_write_banner(struct ceph_connection *con)
{
- ceph_con_out_kvec_add(con, strlen(CEPH_BANNER), CEPH_BANNER);
- ceph_con_out_kvec_add(con, sizeof (con->msgr->my_enc_addr),
+ con_out_kvec_add(con, strlen(CEPH_BANNER), CEPH_BANNER);
+ con_out_kvec_add(con, sizeof (con->msgr->my_enc_addr),
&con->msgr->my_enc_addr);
con->out_more = 0;
- set_bit(WRITE_PENDING, &con->state);
+ set_bit(CON_FLAG_WRITE_PENDING, &con->flags);
}
static int prepare_write_connect(struct ceph_connection *con)
@@ -742,14 +915,14 @@ static int prepare_write_connect(struct ceph_connection *con)
con->out_connect.authorizer_len = auth ?
cpu_to_le32(auth->authorizer_buf_len) : 0;
- ceph_con_out_kvec_add(con, sizeof (con->out_connect),
+ con_out_kvec_add(con, sizeof (con->out_connect),
&con->out_connect);
if (auth && auth->authorizer_buf_len)
- ceph_con_out_kvec_add(con, auth->authorizer_buf_len,
+ con_out_kvec_add(con, auth->authorizer_buf_len,
auth->authorizer_buf);
con->out_more = 0;
- set_bit(WRITE_PENDING, &con->state);
+ set_bit(CON_FLAG_WRITE_PENDING, &con->flags);
return 0;
}
@@ -797,30 +970,34 @@ out:
return ret; /* done! */
}
-#ifdef CONFIG_BLOCK
-static void init_bio_iter(struct bio *bio, struct bio **iter, int *seg)
+static void out_msg_pos_next(struct ceph_connection *con, struct page *page,
+ size_t len, size_t sent, bool in_trail)
{
- if (!bio) {
- *iter = NULL;
- *seg = 0;
- return;
- }
- *iter = bio;
- *seg = bio->bi_idx;
-}
+ struct ceph_msg *msg = con->out_msg;
-static void iter_bio_next(struct bio **bio_iter, int *seg)
-{
- if (*bio_iter == NULL)
- return;
+ BUG_ON(!msg);
+ BUG_ON(!sent);
- BUG_ON(*seg >= (*bio_iter)->bi_vcnt);
+ con->out_msg_pos.data_pos += sent;
+ con->out_msg_pos.page_pos += sent;
+ if (sent < len)
+ return;
- (*seg)++;
- if (*seg == (*bio_iter)->bi_vcnt)
- init_bio_iter((*bio_iter)->bi_next, bio_iter, seg);
-}
+ BUG_ON(sent != len);
+ con->out_msg_pos.page_pos = 0;
+ con->out_msg_pos.page++;
+ con->out_msg_pos.did_page_crc = false;
+ if (in_trail)
+ list_move_tail(&page->lru,
+ &msg->trail->head);
+ else if (msg->pagelist)
+ list_move_tail(&page->lru,
+ &msg->pagelist->head);
+#ifdef CONFIG_BLOCK
+ else if (msg->bio)
+ iter_bio_next(&msg->bio_iter, &msg->bio_seg);
#endif
+}
/*
* Write as much message data payload as we can. If we finish, queue
@@ -837,41 +1014,36 @@ static int write_partial_msg_pages(struct ceph_connection *con)
bool do_datacrc = !con->msgr->nocrc;
int ret;
int total_max_write;
- int in_trail = 0;
- size_t trail_len = (msg->trail ? msg->trail->length : 0);
+ bool in_trail = false;
+ const size_t trail_len = (msg->trail ? msg->trail->length : 0);
+ const size_t trail_off = data_len - trail_len;
dout("write_partial_msg_pages %p msg %p page %d/%d offset %d\n",
- con, con->out_msg, con->out_msg_pos.page, con->out_msg->nr_pages,
+ con, msg, con->out_msg_pos.page, msg->nr_pages,
con->out_msg_pos.page_pos);
-#ifdef CONFIG_BLOCK
- if (msg->bio && !msg->bio_iter)
- init_bio_iter(msg->bio, &msg->bio_iter, &msg->bio_seg);
-#endif
-
+ /*
+ * Iterate through each page that contains data to be
+ * written, and send as much as possible for each.
+ *
+ * If we are calculating the data crc (the default), we will
+ * need to map the page. If we have no pages, they have
+ * been revoked, so use the zero page.
+ */
while (data_len > con->out_msg_pos.data_pos) {
struct page *page = NULL;
int max_write = PAGE_SIZE;
int bio_offset = 0;
- total_max_write = data_len - trail_len -
- con->out_msg_pos.data_pos;
-
- /*
- * if we are calculating the data crc (the default), we need
- * to map the page. if our pages[] has been revoked, use the
- * zero page.
- */
-
- /* have we reached the trail part of the data? */
- if (con->out_msg_pos.data_pos >= data_len - trail_len) {
- in_trail = 1;
+ in_trail = in_trail || con->out_msg_pos.data_pos >= trail_off;
+ if (!in_trail)
+ total_max_write = trail_off - con->out_msg_pos.data_pos;
+ if (in_trail) {
total_max_write = data_len - con->out_msg_pos.data_pos;
page = list_first_entry(&msg->trail->head,
struct page, lru);
- max_write = PAGE_SIZE;
} else if (msg->pages) {
page = msg->pages[con->out_msg_pos.page];
} else if (msg->pagelist) {
@@ -894,52 +1066,32 @@ static int write_partial_msg_pages(struct ceph_connection *con)
if (do_datacrc && !con->out_msg_pos.did_page_crc) {
void *base;
- u32 crc;
- u32 tmpcrc = le32_to_cpu(con->out_msg->footer.data_crc);
+ u32 crc = le32_to_cpu(msg->footer.data_crc);
char *kaddr;
kaddr = kmap(page);
BUG_ON(kaddr == NULL);
base = kaddr + con->out_msg_pos.page_pos + bio_offset;
- crc = crc32c(tmpcrc, base, len);
- con->out_msg->footer.data_crc = cpu_to_le32(crc);
+ crc = crc32c(crc, base, len);
+ kunmap(page);
+ msg->footer.data_crc = cpu_to_le32(crc);
con->out_msg_pos.did_page_crc = true;
}
ret = ceph_tcp_sendpage(con->sock, page,
con->out_msg_pos.page_pos + bio_offset,
len, 1);
-
- if (do_datacrc)
- kunmap(page);
-
if (ret <= 0)
goto out;
- con->out_msg_pos.data_pos += ret;
- con->out_msg_pos.page_pos += ret;
- if (ret == len) {
- con->out_msg_pos.page_pos = 0;
- con->out_msg_pos.page++;
- con->out_msg_pos.did_page_crc = false;
- if (in_trail)
- list_move_tail(&page->lru,
- &msg->trail->head);
- else if (msg->pagelist)
- list_move_tail(&page->lru,
- &msg->pagelist->head);
-#ifdef CONFIG_BLOCK
- else if (msg->bio)
- iter_bio_next(&msg->bio_iter, &msg->bio_seg);
-#endif
- }
+ out_msg_pos_next(con, page, len, (size_t) ret, in_trail);
}
dout("write_partial_msg_pages %p msg %p done\n", con, msg);
/* prepare and queue up footer, too */
if (!do_datacrc)
- con->out_msg->footer.flags |= CEPH_MSG_FOOTER_NOCRC;
- ceph_con_out_kvec_reset(con);
+ msg->footer.flags |= CEPH_MSG_FOOTER_NOCRC;
+ con_out_kvec_reset(con);
prepare_write_message_footer(con);
ret = 1;
out:
@@ -1351,20 +1503,14 @@ static int process_banner(struct ceph_connection *con)
ceph_pr_addr(&con->msgr->inst.addr.in_addr));
}
- set_bit(NEGOTIATING, &con->state);
- prepare_read_connect(con);
return 0;
}
static void fail_protocol(struct ceph_connection *con)
{
reset_connection(con);
- set_bit(CLOSED, &con->state); /* in case there's queued work */
-
- mutex_unlock(&con->mutex);
- if (con->ops->bad_proto)
- con->ops->bad_proto(con);
- mutex_lock(&con->mutex);
+ BUG_ON(con->state != CON_STATE_NEGOTIATING);
+ con->state = CON_STATE_CLOSED;
}
static int process_connect(struct ceph_connection *con)
@@ -1407,7 +1553,7 @@ static int process_connect(struct ceph_connection *con)
return -1;
}
con->auth_retry = 1;
- ceph_con_out_kvec_reset(con);
+ con_out_kvec_reset(con);
ret = prepare_write_connect(con);
if (ret < 0)
return ret;
@@ -1428,7 +1574,7 @@ static int process_connect(struct ceph_connection *con)
ENTITY_NAME(con->peer_name),
ceph_pr_addr(&con->peer_addr.in_addr));
reset_connection(con);
- ceph_con_out_kvec_reset(con);
+ con_out_kvec_reset(con);
ret = prepare_write_connect(con);
if (ret < 0)
return ret;
@@ -1440,8 +1586,7 @@ static int process_connect(struct ceph_connection *con)
if (con->ops->peer_reset)
con->ops->peer_reset(con);
mutex_lock(&con->mutex);
- if (test_bit(CLOSED, &con->state) ||
- test_bit(OPENING, &con->state))
+ if (con->state != CON_STATE_NEGOTIATING)
return -EAGAIN;
break;
@@ -1454,7 +1599,7 @@ static int process_connect(struct ceph_connection *con)
le32_to_cpu(con->out_connect.connect_seq),
le32_to_cpu(con->in_reply.connect_seq));
con->connect_seq = le32_to_cpu(con->in_reply.connect_seq);
- ceph_con_out_kvec_reset(con);
+ con_out_kvec_reset(con);
ret = prepare_write_connect(con);
if (ret < 0)
return ret;
@@ -1471,7 +1616,7 @@ static int process_connect(struct ceph_connection *con)
le32_to_cpu(con->in_reply.global_seq));
get_global_seq(con->msgr,
le32_to_cpu(con->in_reply.global_seq));
- ceph_con_out_kvec_reset(con);
+ con_out_kvec_reset(con);
ret = prepare_write_connect(con);
if (ret < 0)
return ret;
@@ -1489,7 +1634,10 @@ static int process_connect(struct ceph_connection *con)
fail_protocol(con);
return -1;
}
- clear_bit(CONNECTING, &con->state);
+
+ BUG_ON(con->state != CON_STATE_NEGOTIATING);
+ con->state = CON_STATE_OPEN;
+
con->peer_global_seq = le32_to_cpu(con->in_reply.global_seq);
con->connect_seq++;
con->peer_features = server_feat;
@@ -1501,7 +1649,9 @@ static int process_connect(struct ceph_connection *con)
le32_to_cpu(con->in_reply.connect_seq));
if (con->in_reply.flags & CEPH_MSG_CONNECT_LOSSY)
- set_bit(LOSSYTX, &con->state);
+ set_bit(CON_FLAG_LOSSYTX, &con->flags);
+
+ con->delay = 0; /* reset backoff memory */
prepare_read_tag(con);
break;
@@ -1587,10 +1737,7 @@ static int read_partial_message_section(struct ceph_connection *con,
return 1;
}
-static struct ceph_msg *ceph_alloc_msg(struct ceph_connection *con,
- struct ceph_msg_header *hdr,
- int *skip);
-
+static int ceph_con_in_msg_alloc(struct ceph_connection *con, int *skip);
static int read_partial_message_pages(struct ceph_connection *con,
struct page **pages,
@@ -1633,9 +1780,6 @@ static int read_partial_message_bio(struct ceph_connection *con,
void *p;
int ret, left;
- if (IS_ERR(bv))
- return PTR_ERR(bv);
-
left = min((int)(data_len - con->in_msg_pos.data_pos),
(int)(bv->bv_len - con->in_msg_pos.page_pos));
@@ -1672,7 +1816,6 @@ static int read_partial_message(struct ceph_connection *con)
int ret;
unsigned int front_len, middle_len, data_len;
bool do_datacrc = !con->msgr->nocrc;
- int skip;
u64 seq;
u32 crc;
@@ -1723,10 +1866,13 @@ static int read_partial_message(struct ceph_connection *con)
/* allocate message? */
if (!con->in_msg) {
+ int skip = 0;
+
dout("got hdr type %d front %d data %d\n", con->in_hdr.type,
con->in_hdr.front_len, con->in_hdr.data_len);
- skip = 0;
- con->in_msg = ceph_alloc_msg(con, &con->in_hdr, &skip);
+ ret = ceph_con_in_msg_alloc(con, &skip);
+ if (ret < 0)
+ return ret;
if (skip) {
/* skip this message */
dout("alloc_msg said skip message\n");
@@ -1737,11 +1883,9 @@ static int read_partial_message(struct ceph_connection *con)
con->in_seq++;
return 0;
}
- if (!con->in_msg) {
- con->error_msg =
- "error allocating memory for incoming message";
- return -ENOMEM;
- }
+
+ BUG_ON(!con->in_msg);
+ BUG_ON(con->in_msg->con != con);
m = con->in_msg;
m->front.iov_len = 0; /* haven't read it yet */
if (m->middle)
@@ -1753,6 +1897,11 @@ static int read_partial_message(struct ceph_connection *con)
else
con->in_msg_pos.page_pos = 0;
con->in_msg_pos.data_pos = 0;
+
+#ifdef CONFIG_BLOCK
+ if (m->bio)
+ init_bio_iter(m->bio, &m->bio_iter, &m->bio_seg);
+#endif
}
/* front */
@@ -1769,10 +1918,6 @@ static int read_partial_message(struct ceph_connection *con)
if (ret <= 0)
return ret;
}
-#ifdef CONFIG_BLOCK
- if (m->bio && !m->bio_iter)
- init_bio_iter(m->bio, &m->bio_iter, &m->bio_seg);
-#endif
/* (page) data */
while (con->in_msg_pos.data_pos < data_len) {
@@ -1783,7 +1928,7 @@ static int read_partial_message(struct ceph_connection *con)
return ret;
#ifdef CONFIG_BLOCK
} else if (m->bio) {
-
+ BUG_ON(!m->bio_iter);
ret = read_partial_message_bio(con,
&m->bio_iter, &m->bio_seg,
data_len, do_datacrc);
@@ -1837,8 +1982,11 @@ static void process_message(struct ceph_connection *con)
{
struct ceph_msg *msg;
+ BUG_ON(con->in_msg->con != con);
+ con->in_msg->con = NULL;
msg = con->in_msg;
con->in_msg = NULL;
+ con->ops->put(con);
/* if first message, set peer_name */
if (con->peer_name.type == 0)
@@ -1858,7 +2006,6 @@ static void process_message(struct ceph_connection *con)
con->ops->dispatch(con, msg);
mutex_lock(&con->mutex);
- prepare_read_tag(con);
}
@@ -1870,22 +2017,19 @@ static int try_write(struct ceph_connection *con)
{
int ret = 1;
- dout("try_write start %p state %lu nref %d\n", con, con->state,
- atomic_read(&con->nref));
+ dout("try_write start %p state %lu\n", con, con->state);
more:
dout("try_write out_kvec_bytes %d\n", con->out_kvec_bytes);
/* open the socket first? */
- if (con->sock == NULL) {
- ceph_con_out_kvec_reset(con);
+ if (con->state == CON_STATE_PREOPEN) {
+ BUG_ON(con->sock);
+ con->state = CON_STATE_CONNECTING;
+
+ con_out_kvec_reset(con);
prepare_write_banner(con);
- ret = prepare_write_connect(con);
- if (ret < 0)
- goto out;
prepare_read_banner(con);
- set_bit(CONNECTING, &con->state);
- clear_bit(NEGOTIATING, &con->state);
BUG_ON(con->in_msg);
con->in_tag = CEPH_MSGR_TAG_READY;
@@ -1932,7 +2076,7 @@ more_kvec:
}
do_next:
- if (!test_bit(CONNECTING, &con->state)) {
+ if (con->state == CON_STATE_OPEN) {
/* is anything else pending? */
if (!list_empty(&con->out_queue)) {
prepare_write_message(con);
@@ -1942,14 +2086,15 @@ do_next:
prepare_write_ack(con);
goto more;
}
- if (test_and_clear_bit(KEEPALIVE_PENDING, &con->state)) {
+ if (test_and_clear_bit(CON_FLAG_KEEPALIVE_PENDING,
+ &con->flags)) {
prepare_write_keepalive(con);
goto more;
}
}
/* Nothing to do! */
- clear_bit(WRITE_PENDING, &con->state);
+ clear_bit(CON_FLAG_WRITE_PENDING, &con->flags);
dout("try_write nothing else to write.\n");
ret = 0;
out:
@@ -1966,38 +2111,46 @@ static int try_read(struct ceph_connection *con)
{
int ret = -1;
- if (!con->sock)
- return 0;
-
- if (test_bit(STANDBY, &con->state))
+more:
+ dout("try_read start on %p state %lu\n", con, con->state);
+ if (con->state != CON_STATE_CONNECTING &&
+ con->state != CON_STATE_NEGOTIATING &&
+ con->state != CON_STATE_OPEN)
return 0;
- dout("try_read start on %p\n", con);
+ BUG_ON(!con->sock);
-more:
dout("try_read tag %d in_base_pos %d\n", (int)con->in_tag,
con->in_base_pos);
- /*
- * process_connect and process_message drop and re-take
- * con->mutex. make sure we handle a racing close or reopen.
- */
- if (test_bit(CLOSED, &con->state) ||
- test_bit(OPENING, &con->state)) {
- ret = -EAGAIN;
+ if (con->state == CON_STATE_CONNECTING) {
+ dout("try_read connecting\n");
+ ret = read_partial_banner(con);
+ if (ret <= 0)
+ goto out;
+ ret = process_banner(con);
+ if (ret < 0)
+ goto out;
+
+ BUG_ON(con->state != CON_STATE_CONNECTING);
+ con->state = CON_STATE_NEGOTIATING;
+
+ /*
+ * Received banner is good, exchange connection info.
+ * Do not reset out_kvec, as sending our banner raced
+ * with receiving peer banner after connect completed.
+ */
+ ret = prepare_write_connect(con);
+ if (ret < 0)
+ goto out;
+ prepare_read_connect(con);
+
+ /* Send connection info before awaiting response */
goto out;
}
- if (test_bit(CONNECTING, &con->state)) {
- if (!test_bit(NEGOTIATING, &con->state)) {
- dout("try_read connecting\n");
- ret = read_partial_banner(con);
- if (ret <= 0)
- goto out;
- ret = process_banner(con);
- if (ret < 0)
- goto out;
- }
+ if (con->state == CON_STATE_NEGOTIATING) {
+ dout("try_read negotiating\n");
ret = read_partial_connect(con);
if (ret <= 0)
goto out;
@@ -2007,6 +2160,8 @@ more:
goto more;
}
+ BUG_ON(con->state != CON_STATE_OPEN);
+
if (con->in_base_pos < 0) {
/*
* skipping + discarding content.
@@ -2040,7 +2195,8 @@ more:
prepare_read_ack(con);
break;
case CEPH_MSGR_TAG_CLOSE:
- set_bit(CLOSED, &con->state); /* fixme */
+ con_close_socket(con);
+ con->state = CON_STATE_CLOSED;
goto out;
default:
goto bad_tag;
@@ -2063,6 +2219,8 @@ more:
if (con->in_tag == CEPH_MSGR_TAG_READY)
goto more;
process_message(con);
+ if (con->state == CON_STATE_OPEN)
+ prepare_read_tag(con);
goto more;
}
if (con->in_tag == CEPH_MSGR_TAG_ACK) {
@@ -2091,12 +2249,6 @@ bad_tag:
*/
static void queue_con(struct ceph_connection *con)
{
- if (test_bit(DEAD, &con->state)) {
- dout("queue_con %p ignoring: DEAD\n",
- con);
- return;
- }
-
if (!con->ops->get(con)) {
dout("queue_con %p ref count 0\n", con);
return;
@@ -2121,7 +2273,26 @@ static void con_work(struct work_struct *work)
mutex_lock(&con->mutex);
restart:
- if (test_and_clear_bit(BACKOFF, &con->state)) {
+ if (test_and_clear_bit(CON_FLAG_SOCK_CLOSED, &con->flags)) {
+ switch (con->state) {
+ case CON_STATE_CONNECTING:
+ con->error_msg = "connection failed";
+ break;
+ case CON_STATE_NEGOTIATING:
+ con->error_msg = "negotiation failed";
+ break;
+ case CON_STATE_OPEN:
+ con->error_msg = "socket closed";
+ break;
+ default:
+ dout("unrecognized con state %d\n", (int)con->state);
+ con->error_msg = "unrecognized con state";
+ BUG();
+ }
+ goto fault;
+ }
+
+ if (test_and_clear_bit(CON_FLAG_BACKOFF, &con->flags)) {
dout("con_work %p backing off\n", con);
if (queue_delayed_work(ceph_msgr_wq, &con->work,
round_jiffies_relative(con->delay))) {
@@ -2135,35 +2306,35 @@ restart:
}
}
- if (test_bit(STANDBY, &con->state)) {
+ if (con->state == CON_STATE_STANDBY) {
dout("con_work %p STANDBY\n", con);
goto done;
}
- if (test_bit(CLOSED, &con->state)) { /* e.g. if we are replaced */
- dout("con_work CLOSED\n");
- con_close_socket(con);
+ if (con->state == CON_STATE_CLOSED) {
+ dout("con_work %p CLOSED\n", con);
+ BUG_ON(con->sock);
goto done;
}
- if (test_and_clear_bit(OPENING, &con->state)) {
- /* reopen w/ new peer */
+ if (con->state == CON_STATE_PREOPEN) {
dout("con_work OPENING\n");
- con_close_socket(con);
+ BUG_ON(con->sock);
}
- if (test_and_clear_bit(SOCK_CLOSED, &con->state))
- goto fault;
-
ret = try_read(con);
if (ret == -EAGAIN)
goto restart;
- if (ret < 0)
+ if (ret < 0) {
+ con->error_msg = "socket error on read";
goto fault;
+ }
ret = try_write(con);
if (ret == -EAGAIN)
goto restart;
- if (ret < 0)
+ if (ret < 0) {
+ con->error_msg = "socket error on write";
goto fault;
+ }
done:
mutex_unlock(&con->mutex);
@@ -2172,7 +2343,6 @@ done_unlocked:
return;
fault:
- mutex_unlock(&con->mutex);
ceph_fault(con); /* error/fault path */
goto done_unlocked;
}
@@ -2183,26 +2353,31 @@ fault:
* exponential backoff
*/
static void ceph_fault(struct ceph_connection *con)
+ __releases(con->mutex)
{
pr_err("%s%lld %s %s\n", ENTITY_NAME(con->peer_name),
ceph_pr_addr(&con->peer_addr.in_addr), con->error_msg);
dout("fault %p state %lu to peer %s\n",
con, con->state, ceph_pr_addr(&con->peer_addr.in_addr));
- if (test_bit(LOSSYTX, &con->state)) {
- dout("fault on LOSSYTX channel\n");
- goto out;
- }
-
- mutex_lock(&con->mutex);
- if (test_bit(CLOSED, &con->state))
- goto out_unlock;
+ BUG_ON(con->state != CON_STATE_CONNECTING &&
+ con->state != CON_STATE_NEGOTIATING &&
+ con->state != CON_STATE_OPEN);
con_close_socket(con);
+ if (test_bit(CON_FLAG_LOSSYTX, &con->flags)) {
+ dout("fault on LOSSYTX channel, marking CLOSED\n");
+ con->state = CON_STATE_CLOSED;
+ goto out_unlock;
+ }
+
if (con->in_msg) {
+ BUG_ON(con->in_msg->con != con);
+ con->in_msg->con = NULL;
ceph_msg_put(con->in_msg);
con->in_msg = NULL;
+ con->ops->put(con);
}
/* Requeue anything that hasn't been acked */
@@ -2211,12 +2386,13 @@ static void ceph_fault(struct ceph_connection *con)
/* If there are no messages queued or keepalive pending, place
* the connection in a STANDBY state */
if (list_empty(&con->out_queue) &&
- !test_bit(KEEPALIVE_PENDING, &con->state)) {
+ !test_bit(CON_FLAG_KEEPALIVE_PENDING, &con->flags)) {
dout("fault %p setting STANDBY clearing WRITE_PENDING\n", con);
- clear_bit(WRITE_PENDING, &con->state);
- set_bit(STANDBY, &con->state);
+ clear_bit(CON_FLAG_WRITE_PENDING, &con->flags);
+ con->state = CON_STATE_STANDBY;
} else {
/* retry after a delay. */
+ con->state = CON_STATE_PREOPEN;
if (con->delay == 0)
con->delay = BASE_DELAY_INTERVAL;
else if (con->delay < MAX_DELAY_INTERVAL)
@@ -2237,13 +2413,12 @@ static void ceph_fault(struct ceph_connection *con)
* that when con_work restarts we schedule the
* delay then.
*/
- set_bit(BACKOFF, &con->state);
+ set_bit(CON_FLAG_BACKOFF, &con->flags);
}
}
out_unlock:
mutex_unlock(&con->mutex);
-out:
/*
* in case we faulted due to authentication, invalidate our
* current tickets so that we can get new ones.
@@ -2260,18 +2435,14 @@ out:
/*
- * create a new messenger instance
+ * initialize a new messenger instance
*/
-struct ceph_messenger *ceph_messenger_create(struct ceph_entity_addr *myaddr,
- u32 supported_features,
- u32 required_features)
+void ceph_messenger_init(struct ceph_messenger *msgr,
+ struct ceph_entity_addr *myaddr,
+ u32 supported_features,
+ u32 required_features,
+ bool nocrc)
{
- struct ceph_messenger *msgr;
-
- msgr = kzalloc(sizeof(*msgr), GFP_KERNEL);
- if (msgr == NULL)
- return ERR_PTR(-ENOMEM);
-
msgr->supported_features = supported_features;
msgr->required_features = required_features;
@@ -2284,30 +2455,23 @@ struct ceph_messenger *ceph_messenger_create(struct ceph_entity_addr *myaddr,
msgr->inst.addr.type = 0;
get_random_bytes(&msgr->inst.addr.nonce, sizeof(msgr->inst.addr.nonce));
encode_my_addr(msgr);
+ msgr->nocrc = nocrc;
- dout("messenger_create %p\n", msgr);
- return msgr;
-}
-EXPORT_SYMBOL(ceph_messenger_create);
+ atomic_set(&msgr->stopping, 0);
-void ceph_messenger_destroy(struct ceph_messenger *msgr)
-{
- dout("destroy %p\n", msgr);
- kfree(msgr);
- dout("destroyed messenger %p\n", msgr);
+ dout("%s %p\n", __func__, msgr);
}
-EXPORT_SYMBOL(ceph_messenger_destroy);
+EXPORT_SYMBOL(ceph_messenger_init);
static void clear_standby(struct ceph_connection *con)
{
/* come back from STANDBY? */
- if (test_and_clear_bit(STANDBY, &con->state)) {
- mutex_lock(&con->mutex);
+ if (con->state == CON_STATE_STANDBY) {
dout("clear_standby %p and ++connect_seq\n", con);
+ con->state = CON_STATE_PREOPEN;
con->connect_seq++;
- WARN_ON(test_bit(WRITE_PENDING, &con->state));
- WARN_ON(test_bit(KEEPALIVE_PENDING, &con->state));
- mutex_unlock(&con->mutex);
+ WARN_ON(test_bit(CON_FLAG_WRITE_PENDING, &con->flags));
+ WARN_ON(test_bit(CON_FLAG_KEEPALIVE_PENDING, &con->flags));
}
}
@@ -2316,21 +2480,24 @@ static void clear_standby(struct ceph_connection *con)
*/
void ceph_con_send(struct ceph_connection *con, struct ceph_msg *msg)
{
- if (test_bit(CLOSED, &con->state)) {
- dout("con_send %p closed, dropping %p\n", con, msg);
- ceph_msg_put(msg);
- return;
- }
-
/* set src+dst */
msg->hdr.src = con->msgr->inst.name;
-
BUG_ON(msg->front.iov_len != le32_to_cpu(msg->hdr.front_len));
-
msg->needs_out_seq = true;
- /* queue */
mutex_lock(&con->mutex);
+
+ if (con->state == CON_STATE_CLOSED) {
+ dout("con_send %p closed, dropping %p\n", con, msg);
+ ceph_msg_put(msg);
+ mutex_unlock(&con->mutex);
+ return;
+ }
+
+ BUG_ON(msg->con != NULL);
+ msg->con = con->ops->get(con);
+ BUG_ON(msg->con == NULL);
+
BUG_ON(!list_empty(&msg->list_head));
list_add_tail(&msg->list_head, &con->out_queue);
dout("----- %p to %s%lld %d=%s len %d+%d+%d -----\n", msg,
@@ -2339,12 +2506,13 @@ void ceph_con_send(struct ceph_connection *con, struct ceph_msg *msg)
le32_to_cpu(msg->hdr.front_len),
le32_to_cpu(msg->hdr.middle_len),
le32_to_cpu(msg->hdr.data_len));
+
+ clear_standby(con);
mutex_unlock(&con->mutex);
/* if there wasn't anything waiting to send before, queue
* new work */
- clear_standby(con);
- if (test_and_set_bit(WRITE_PENDING, &con->state) == 0)
+ if (test_and_set_bit(CON_FLAG_WRITE_PENDING, &con->flags) == 0)
queue_con(con);
}
EXPORT_SYMBOL(ceph_con_send);
@@ -2352,24 +2520,34 @@ EXPORT_SYMBOL(ceph_con_send);
/*
* Revoke a message that was previously queued for send
*/
-void ceph_con_revoke(struct ceph_connection *con, struct ceph_msg *msg)
+void ceph_msg_revoke(struct ceph_msg *msg)
{
+ struct ceph_connection *con = msg->con;
+
+ if (!con)
+ return; /* Message not in our possession */
+
mutex_lock(&con->mutex);
if (!list_empty(&msg->list_head)) {
- dout("con_revoke %p msg %p - was on queue\n", con, msg);
+ dout("%s %p msg %p - was on queue\n", __func__, con, msg);
list_del_init(&msg->list_head);
- ceph_msg_put(msg);
+ BUG_ON(msg->con == NULL);
+ msg->con->ops->put(msg->con);
+ msg->con = NULL;
msg->hdr.seq = 0;
+
+ ceph_msg_put(msg);
}
if (con->out_msg == msg) {
- dout("con_revoke %p msg %p - was sending\n", con, msg);
+ dout("%s %p msg %p - was sending\n", __func__, con, msg);
con->out_msg = NULL;
if (con->out_kvec_is_msg) {
con->out_skip = con->out_kvec_bytes;
con->out_kvec_is_msg = false;
}
- ceph_msg_put(msg);
msg->hdr.seq = 0;
+
+ ceph_msg_put(msg);
}
mutex_unlock(&con->mutex);
}
@@ -2377,17 +2555,27 @@ void ceph_con_revoke(struct ceph_connection *con, struct ceph_msg *msg)
/*
* Revoke a message that we may be reading data into
*/
-void ceph_con_revoke_message(struct ceph_connection *con, struct ceph_msg *msg)
+void ceph_msg_revoke_incoming(struct ceph_msg *msg)
{
+ struct ceph_connection *con;
+
+ BUG_ON(msg == NULL);
+ if (!msg->con) {
+ dout("%s msg %p null con\n", __func__, msg);
+
+ return; /* Message not in our possession */
+ }
+
+ con = msg->con;
mutex_lock(&con->mutex);
- if (con->in_msg && con->in_msg == msg) {
+ if (con->in_msg == msg) {
unsigned int front_len = le32_to_cpu(con->in_hdr.front_len);
unsigned int middle_len = le32_to_cpu(con->in_hdr.middle_len);
unsigned int data_len = le32_to_cpu(con->in_hdr.data_len);
/* skip rest of message */
- dout("con_revoke_pages %p msg %p revoked\n", con, msg);
- con->in_base_pos = con->in_base_pos -
+ dout("%s %p msg %p revoked\n", __func__, con, msg);
+ con->in_base_pos = con->in_base_pos -
sizeof(struct ceph_msg_header) -
front_len -
middle_len -
@@ -2398,8 +2586,8 @@ void ceph_con_revoke_message(struct ceph_connection *con, struct ceph_msg *msg)
con->in_tag = CEPH_MSGR_TAG_READY;
con->in_seq++;
} else {
- dout("con_revoke_pages %p msg %p pages %p no-op\n",
- con, con->in_msg, msg);
+ dout("%s %p in_msg %p msg %p no-op\n",
+ __func__, con, con->in_msg, msg);
}
mutex_unlock(&con->mutex);
}
@@ -2410,9 +2598,11 @@ void ceph_con_revoke_message(struct ceph_connection *con, struct ceph_msg *msg)
void ceph_con_keepalive(struct ceph_connection *con)
{
dout("con_keepalive %p\n", con);
+ mutex_lock(&con->mutex);
clear_standby(con);
- if (test_and_set_bit(KEEPALIVE_PENDING, &con->state) == 0 &&
- test_and_set_bit(WRITE_PENDING, &con->state) == 0)
+ mutex_unlock(&con->mutex);
+ if (test_and_set_bit(CON_FLAG_KEEPALIVE_PENDING, &con->flags) == 0 &&
+ test_and_set_bit(CON_FLAG_WRITE_PENDING, &con->flags) == 0)
queue_con(con);
}
EXPORT_SYMBOL(ceph_con_keepalive);
@@ -2431,6 +2621,8 @@ struct ceph_msg *ceph_msg_new(int type, int front_len, gfp_t flags,
if (m == NULL)
goto out;
kref_init(&m->kref);
+
+ m->con = NULL;
INIT_LIST_HEAD(&m->list_head);
m->hdr.tid = 0;
@@ -2526,46 +2718,77 @@ static int ceph_alloc_middle(struct ceph_connection *con, struct ceph_msg *msg)
}
/*
- * Generic message allocator, for incoming messages.
+ * Allocate a message for receiving an incoming message on a
+ * connection, and save the result in con->in_msg. Uses the
+ * connection's private alloc_msg op if available.
+ *
+ * Returns 0 on success, or a negative error code.
+ *
+ * On success, if we set *skip = 1:
+ * - the next message should be skipped and ignored.
+ * - con->in_msg == NULL
+ * or if we set *skip = 0:
+ * - con->in_msg is non-null.
+ * On error (ENOMEM, EAGAIN, ...),
+ * - con->in_msg == NULL
*/
-static struct ceph_msg *ceph_alloc_msg(struct ceph_connection *con,
- struct ceph_msg_header *hdr,
- int *skip)
+static int ceph_con_in_msg_alloc(struct ceph_connection *con, int *skip)
{
+ struct ceph_msg_header *hdr = &con->in_hdr;
int type = le16_to_cpu(hdr->type);
int front_len = le32_to_cpu(hdr->front_len);
int middle_len = le32_to_cpu(hdr->middle_len);
- struct ceph_msg *msg = NULL;
- int ret;
+ int ret = 0;
+
+ BUG_ON(con->in_msg != NULL);
if (con->ops->alloc_msg) {
+ struct ceph_msg *msg;
+
mutex_unlock(&con->mutex);
msg = con->ops->alloc_msg(con, hdr, skip);
mutex_lock(&con->mutex);
- if (!msg || *skip)
- return NULL;
+ if (con->state != CON_STATE_OPEN) {
+ ceph_msg_put(msg);
+ return -EAGAIN;
+ }
+ con->in_msg = msg;
+ if (con->in_msg) {
+ con->in_msg->con = con->ops->get(con);
+ BUG_ON(con->in_msg->con == NULL);
+ }
+ if (*skip) {
+ con->in_msg = NULL;
+ return 0;
+ }
+ if (!con->in_msg) {
+ con->error_msg =
+ "error allocating memory for incoming message";
+ return -ENOMEM;
+ }
}
- if (!msg) {
- *skip = 0;
- msg = ceph_msg_new(type, front_len, GFP_NOFS, false);
- if (!msg) {
+ if (!con->in_msg) {
+ con->in_msg = ceph_msg_new(type, front_len, GFP_NOFS, false);
+ if (!con->in_msg) {
pr_err("unable to allocate msg type %d len %d\n",
type, front_len);
- return NULL;
+ return -ENOMEM;
}
- msg->page_alignment = le16_to_cpu(hdr->data_off);
+ con->in_msg->con = con->ops->get(con);
+ BUG_ON(con->in_msg->con == NULL);
+ con->in_msg->page_alignment = le16_to_cpu(hdr->data_off);
}
- memcpy(&msg->hdr, &con->in_hdr, sizeof(con->in_hdr));
+ memcpy(&con->in_msg->hdr, &con->in_hdr, sizeof(con->in_hdr));
- if (middle_len && !msg->middle) {
- ret = ceph_alloc_middle(con, msg);
+ if (middle_len && !con->in_msg->middle) {
+ ret = ceph_alloc_middle(con, con->in_msg);
if (ret < 0) {
- ceph_msg_put(msg);
- return NULL;
+ ceph_msg_put(con->in_msg);
+ con->in_msg = NULL;
}
}
- return msg;
+ return ret;
}
diff --git a/net/ceph/mon_client.c b/net/ceph/mon_client.c
index d0649a9655be..900ea0f043fc 100644
--- a/net/ceph/mon_client.c
+++ b/net/ceph/mon_client.c
@@ -106,9 +106,9 @@ static void __send_prepared_auth_request(struct ceph_mon_client *monc, int len)
monc->pending_auth = 1;
monc->m_auth->front.iov_len = len;
monc->m_auth->hdr.front_len = cpu_to_le32(len);
- ceph_con_revoke(monc->con, monc->m_auth);
+ ceph_msg_revoke(monc->m_auth);
ceph_msg_get(monc->m_auth); /* keep our ref */
- ceph_con_send(monc->con, monc->m_auth);
+ ceph_con_send(&monc->con, monc->m_auth);
}
/*
@@ -117,8 +117,11 @@ static void __send_prepared_auth_request(struct ceph_mon_client *monc, int len)
static void __close_session(struct ceph_mon_client *monc)
{
dout("__close_session closing mon%d\n", monc->cur_mon);
- ceph_con_revoke(monc->con, monc->m_auth);
- ceph_con_close(monc->con);
+ ceph_msg_revoke(monc->m_auth);
+ ceph_msg_revoke_incoming(monc->m_auth_reply);
+ ceph_msg_revoke(monc->m_subscribe);
+ ceph_msg_revoke_incoming(monc->m_subscribe_ack);
+ ceph_con_close(&monc->con);
monc->cur_mon = -1;
monc->pending_auth = 0;
ceph_auth_reset(monc->auth);
@@ -142,9 +145,8 @@ static int __open_session(struct ceph_mon_client *monc)
monc->want_next_osdmap = !!monc->want_next_osdmap;
dout("open_session mon%d opening\n", monc->cur_mon);
- monc->con->peer_name.type = CEPH_ENTITY_TYPE_MON;
- monc->con->peer_name.num = cpu_to_le64(monc->cur_mon);
- ceph_con_open(monc->con,
+ ceph_con_open(&monc->con,
+ CEPH_ENTITY_TYPE_MON, monc->cur_mon,
&monc->monmap->mon_inst[monc->cur_mon].addr);
/* initiatiate authentication handshake */
@@ -226,8 +228,8 @@ static void __send_subscribe(struct ceph_mon_client *monc)
msg->front.iov_len = p - msg->front.iov_base;
msg->hdr.front_len = cpu_to_le32(msg->front.iov_len);
- ceph_con_revoke(monc->con, msg);
- ceph_con_send(monc->con, ceph_msg_get(msg));
+ ceph_msg_revoke(msg);
+ ceph_con_send(&monc->con, ceph_msg_get(msg));
monc->sub_sent = jiffies | 1; /* never 0 */
}
@@ -247,7 +249,7 @@ static void handle_subscribe_ack(struct ceph_mon_client *monc,
if (monc->hunting) {
pr_info("mon%d %s session established\n",
monc->cur_mon,
- ceph_pr_addr(&monc->con->peer_addr.in_addr));
+ ceph_pr_addr(&monc->con.peer_addr.in_addr));
monc->hunting = false;
}
dout("handle_subscribe_ack after %d seconds\n", seconds);
@@ -309,6 +311,17 @@ int ceph_monc_open_session(struct ceph_mon_client *monc)
EXPORT_SYMBOL(ceph_monc_open_session);
/*
+ * We require the fsid and global_id in order to initialize our
+ * debugfs dir.
+ */
+static bool have_debugfs_info(struct ceph_mon_client *monc)
+{
+ dout("have_debugfs_info fsid %d globalid %lld\n",
+ (int)monc->client->have_fsid, monc->auth->global_id);
+ return monc->client->have_fsid && monc->auth->global_id > 0;
+}
+
+/*
* The monitor responds with mount ack indicate mount success. The
* included client ticket allows the client to talk to MDSs and OSDs.
*/
@@ -318,9 +331,12 @@ static void ceph_monc_handle_map(struct ceph_mon_client *monc,
struct ceph_client *client = monc->client;
struct ceph_monmap *monmap = NULL, *old = monc->monmap;
void *p, *end;
+ int had_debugfs_info, init_debugfs = 0;
mutex_lock(&monc->mutex);
+ had_debugfs_info = have_debugfs_info(monc);
+
dout("handle_monmap\n");
p = msg->front.iov_base;
end = p + msg->front.iov_len;
@@ -342,12 +358,22 @@ static void ceph_monc_handle_map(struct ceph_mon_client *monc,
if (!client->have_fsid) {
client->have_fsid = true;
+ if (!had_debugfs_info && have_debugfs_info(monc)) {
+ pr_info("client%lld fsid %pU\n",
+ ceph_client_id(monc->client),
+ &monc->client->fsid);
+ init_debugfs = 1;
+ }
mutex_unlock(&monc->mutex);
- /*
- * do debugfs initialization without mutex to avoid
- * creating a locking dependency
- */
- ceph_debugfs_client_init(client);
+
+ if (init_debugfs) {
+ /*
+ * do debugfs initialization without mutex to avoid
+ * creating a locking dependency
+ */
+ ceph_debugfs_client_init(monc->client);
+ }
+
goto out_unlocked;
}
out:
@@ -439,6 +465,7 @@ static struct ceph_msg *get_generic_reply(struct ceph_connection *con,
m = NULL;
} else {
dout("get_generic_reply %lld got %p\n", tid, req->reply);
+ *skip = 0;
m = ceph_msg_get(req->reply);
/*
* we don't need to track the connection reading into
@@ -461,7 +488,7 @@ static int do_generic_request(struct ceph_mon_client *monc,
req->request->hdr.tid = cpu_to_le64(req->tid);
__insert_generic_request(monc, req);
monc->num_generic_requests++;
- ceph_con_send(monc->con, ceph_msg_get(req->request));
+ ceph_con_send(&monc->con, ceph_msg_get(req->request));
mutex_unlock(&monc->mutex);
err = wait_for_completion_interruptible(&req->completion);
@@ -684,8 +711,9 @@ static void __resend_generic_request(struct ceph_mon_client *monc)
for (p = rb_first(&monc->generic_request_tree); p; p = rb_next(p)) {
req = rb_entry(p, struct ceph_mon_generic_request, node);
- ceph_con_revoke(monc->con, req->request);
- ceph_con_send(monc->con, ceph_msg_get(req->request));
+ ceph_msg_revoke(req->request);
+ ceph_msg_revoke_incoming(req->reply);
+ ceph_con_send(&monc->con, ceph_msg_get(req->request));
}
}
@@ -705,7 +733,7 @@ static void delayed_work(struct work_struct *work)
__close_session(monc);
__open_session(monc); /* continue hunting */
} else {
- ceph_con_keepalive(monc->con);
+ ceph_con_keepalive(&monc->con);
__validate_auth(monc);
@@ -760,19 +788,12 @@ int ceph_monc_init(struct ceph_mon_client *monc, struct ceph_client *cl)
goto out;
/* connection */
- monc->con = kmalloc(sizeof(*monc->con), GFP_KERNEL);
- if (!monc->con)
- goto out_monmap;
- ceph_con_init(monc->client->msgr, monc->con);
- monc->con->private = monc;
- monc->con->ops = &mon_con_ops;
-
/* authentication */
monc->auth = ceph_auth_init(cl->options->name,
cl->options->key);
if (IS_ERR(monc->auth)) {
err = PTR_ERR(monc->auth);
- goto out_con;
+ goto out_monmap;
}
monc->auth->want_keys =
CEPH_ENTITY_TYPE_AUTH | CEPH_ENTITY_TYPE_MON |
@@ -801,6 +822,9 @@ int ceph_monc_init(struct ceph_mon_client *monc, struct ceph_client *cl)
if (!monc->m_auth)
goto out_auth_reply;
+ ceph_con_init(&monc->con, monc, &mon_con_ops,
+ &monc->client->msgr);
+
monc->cur_mon = -1;
monc->hunting = true;
monc->sub_renew_after = jiffies;
@@ -824,8 +848,6 @@ out_subscribe_ack:
ceph_msg_put(monc->m_subscribe_ack);
out_auth:
ceph_auth_destroy(monc->auth);
-out_con:
- monc->con->ops->put(monc->con);
out_monmap:
kfree(monc->monmap);
out:
@@ -841,10 +863,6 @@ void ceph_monc_stop(struct ceph_mon_client *monc)
mutex_lock(&monc->mutex);
__close_session(monc);
- monc->con->private = NULL;
- monc->con->ops->put(monc->con);
- monc->con = NULL;
-
mutex_unlock(&monc->mutex);
/*
@@ -871,8 +889,10 @@ static void handle_auth_reply(struct ceph_mon_client *monc,
{
int ret;
int was_auth = 0;
+ int had_debugfs_info, init_debugfs = 0;
mutex_lock(&monc->mutex);
+ had_debugfs_info = have_debugfs_info(monc);
if (monc->auth->ops)
was_auth = monc->auth->ops->is_authenticated(monc->auth);
monc->pending_auth = 0;
@@ -888,14 +908,29 @@ static void handle_auth_reply(struct ceph_mon_client *monc,
} else if (!was_auth && monc->auth->ops->is_authenticated(monc->auth)) {
dout("authenticated, starting session\n");
- monc->client->msgr->inst.name.type = CEPH_ENTITY_TYPE_CLIENT;
- monc->client->msgr->inst.name.num =
+ monc->client->msgr.inst.name.type = CEPH_ENTITY_TYPE_CLIENT;
+ monc->client->msgr.inst.name.num =
cpu_to_le64(monc->auth->global_id);
__send_subscribe(monc);
__resend_generic_request(monc);
}
+
+ if (!had_debugfs_info && have_debugfs_info(monc)) {
+ pr_info("client%lld fsid %pU\n",
+ ceph_client_id(monc->client),
+ &monc->client->fsid);
+ init_debugfs = 1;
+ }
mutex_unlock(&monc->mutex);
+
+ if (init_debugfs) {
+ /*
+ * do debugfs initialization without mutex to avoid
+ * creating a locking dependency
+ */
+ ceph_debugfs_client_init(monc->client);
+ }
}
static int __validate_auth(struct ceph_mon_client *monc)
@@ -1000,6 +1035,8 @@ static struct ceph_msg *mon_alloc_msg(struct ceph_connection *con,
case CEPH_MSG_MDS_MAP:
case CEPH_MSG_OSD_MAP:
m = ceph_msg_new(type, front_len, GFP_NOFS, false);
+ if (!m)
+ return NULL; /* ENOMEM--return skip == 0 */
break;
}
@@ -1029,7 +1066,7 @@ static void mon_fault(struct ceph_connection *con)
if (!monc->hunting)
pr_info("mon%d %s session lost, "
"hunting for new mon\n", monc->cur_mon,
- ceph_pr_addr(&monc->con->peer_addr.in_addr));
+ ceph_pr_addr(&monc->con.peer_addr.in_addr));
__close_session(monc);
if (!monc->hunting) {
@@ -1044,9 +1081,23 @@ out:
mutex_unlock(&monc->mutex);
}
+/*
+ * We can ignore refcounting on the connection struct, as all references
+ * will come from the messenger workqueue, which is drained prior to
+ * mon_client destruction.
+ */
+static struct ceph_connection *con_get(struct ceph_connection *con)
+{
+ return con;
+}
+
+static void con_put(struct ceph_connection *con)
+{
+}
+
static const struct ceph_connection_operations mon_con_ops = {
- .get = ceph_con_get,
- .put = ceph_con_put,
+ .get = con_get,
+ .put = con_put,
.dispatch = dispatch,
.fault = mon_fault,
.alloc_msg = mon_alloc_msg,
diff --git a/net/ceph/msgpool.c b/net/ceph/msgpool.c
index 11d5f4196a73..ddec1c10ac80 100644
--- a/net/ceph/msgpool.c
+++ b/net/ceph/msgpool.c
@@ -12,7 +12,7 @@ static void *msgpool_alloc(gfp_t gfp_mask, void *arg)
struct ceph_msgpool *pool = arg;
struct ceph_msg *msg;
- msg = ceph_msg_new(0, pool->front_len, gfp_mask, true);
+ msg = ceph_msg_new(pool->type, pool->front_len, gfp_mask, true);
if (!msg) {
dout("msgpool_alloc %s failed\n", pool->name);
} else {
@@ -32,10 +32,11 @@ static void msgpool_free(void *element, void *arg)
ceph_msg_put(msg);
}
-int ceph_msgpool_init(struct ceph_msgpool *pool,
+int ceph_msgpool_init(struct ceph_msgpool *pool, int type,
int front_len, int size, bool blocking, const char *name)
{
dout("msgpool %s init\n", name);
+ pool->type = type;
pool->front_len = front_len;
pool->pool = mempool_create(size, msgpool_alloc, msgpool_free, pool);
if (!pool->pool)
@@ -61,7 +62,7 @@ struct ceph_msg *ceph_msgpool_get(struct ceph_msgpool *pool,
WARN_ON(1);
/* try to alloc a fresh message */
- return ceph_msg_new(0, front_len, GFP_NOFS, false);
+ return ceph_msg_new(pool->type, front_len, GFP_NOFS, false);
}
msg = mempool_alloc(pool->pool, GFP_NOFS);
diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c
index ca59e66c9787..42119c05e82c 100644
--- a/net/ceph/osd_client.c
+++ b/net/ceph/osd_client.c
@@ -140,10 +140,9 @@ void ceph_osdc_release_request(struct kref *kref)
if (req->r_request)
ceph_msg_put(req->r_request);
if (req->r_con_filling_msg) {
- dout("release_request revoking pages %p from con %p\n",
+ dout("%s revoking pages %p from con %p\n", __func__,
req->r_pages, req->r_con_filling_msg);
- ceph_con_revoke_message(req->r_con_filling_msg,
- req->r_reply);
+ ceph_msg_revoke_incoming(req->r_reply);
req->r_con_filling_msg->ops->put(req->r_con_filling_msg);
}
if (req->r_reply)
@@ -214,10 +213,13 @@ struct ceph_osd_request *ceph_osdc_alloc_request(struct ceph_osd_client *osdc,
kref_init(&req->r_kref);
init_completion(&req->r_completion);
init_completion(&req->r_safe_completion);
+ rb_init_node(&req->r_node);
INIT_LIST_HEAD(&req->r_unsafe_item);
INIT_LIST_HEAD(&req->r_linger_item);
INIT_LIST_HEAD(&req->r_linger_osd);
INIT_LIST_HEAD(&req->r_req_lru_item);
+ INIT_LIST_HEAD(&req->r_osd_item);
+
req->r_flags = flags;
WARN_ON((flags & (CEPH_OSD_FLAG_READ|CEPH_OSD_FLAG_WRITE)) == 0);
@@ -243,6 +245,7 @@ struct ceph_osd_request *ceph_osdc_alloc_request(struct ceph_osd_client *osdc,
}
ceph_pagelist_init(req->r_trail);
}
+
/* create request message; allow space for oid */
msg_size += MAX_OBJ_NAME_SIZE;
if (snapc)
@@ -256,7 +259,6 @@ struct ceph_osd_request *ceph_osdc_alloc_request(struct ceph_osd_client *osdc,
return NULL;
}
- msg->hdr.type = cpu_to_le16(CEPH_MSG_OSD_OP);
memset(msg->front.iov_base, 0, msg->front.iov_len);
req->r_request = msg;
@@ -624,7 +626,7 @@ static void osd_reset(struct ceph_connection *con)
/*
* Track open sessions with osds.
*/
-static struct ceph_osd *create_osd(struct ceph_osd_client *osdc)
+static struct ceph_osd *create_osd(struct ceph_osd_client *osdc, int onum)
{
struct ceph_osd *osd;
@@ -634,15 +636,13 @@ static struct ceph_osd *create_osd(struct ceph_osd_client *osdc)
atomic_set(&osd->o_ref, 1);
osd->o_osdc = osdc;
+ osd->o_osd = onum;
INIT_LIST_HEAD(&osd->o_requests);
INIT_LIST_HEAD(&osd->o_linger_requests);
INIT_LIST_HEAD(&osd->o_osd_lru);
osd->o_incarnation = 1;
- ceph_con_init(osdc->client->msgr, &osd->o_con);
- osd->o_con.private = osd;
- osd->o_con.ops = &osd_con_ops;
- osd->o_con.peer_name.type = CEPH_ENTITY_TYPE_OSD;
+ ceph_con_init(&osd->o_con, osd, &osd_con_ops, &osdc->client->msgr);
INIT_LIST_HEAD(&osd->o_keepalive_item);
return osd;
@@ -688,7 +688,7 @@ static void __remove_osd(struct ceph_osd_client *osdc, struct ceph_osd *osd)
static void remove_all_osds(struct ceph_osd_client *osdc)
{
- dout("__remove_old_osds %p\n", osdc);
+ dout("%s %p\n", __func__, osdc);
mutex_lock(&osdc->request_mutex);
while (!RB_EMPTY_ROOT(&osdc->osds)) {
struct ceph_osd *osd = rb_entry(rb_first(&osdc->osds),
@@ -752,7 +752,8 @@ static int __reset_osd(struct ceph_osd_client *osdc, struct ceph_osd *osd)
ret = -EAGAIN;
} else {
ceph_con_close(&osd->o_con);
- ceph_con_open(&osd->o_con, &osdc->osdmap->osd_addr[osd->o_osd]);
+ ceph_con_open(&osd->o_con, CEPH_ENTITY_TYPE_OSD, osd->o_osd,
+ &osdc->osdmap->osd_addr[osd->o_osd]);
osd->o_incarnation++;
}
return ret;
@@ -853,7 +854,7 @@ static void __unregister_request(struct ceph_osd_client *osdc,
if (req->r_osd) {
/* make sure the original request isn't in flight. */
- ceph_con_revoke(&req->r_osd->o_con, req->r_request);
+ ceph_msg_revoke(req->r_request);
list_del_init(&req->r_osd_item);
if (list_empty(&req->r_osd->o_requests) &&
@@ -880,7 +881,7 @@ static void __unregister_request(struct ceph_osd_client *osdc,
static void __cancel_request(struct ceph_osd_request *req)
{
if (req->r_sent && req->r_osd) {
- ceph_con_revoke(&req->r_osd->o_con, req->r_request);
+ ceph_msg_revoke(req->r_request);
req->r_sent = 0;
}
}
@@ -890,7 +891,9 @@ static void __register_linger_request(struct ceph_osd_client *osdc,
{
dout("__register_linger_request %p\n", req);
list_add_tail(&req->r_linger_item, &osdc->req_linger);
- list_add_tail(&req->r_linger_osd, &req->r_osd->o_linger_requests);
+ if (req->r_osd)
+ list_add_tail(&req->r_linger_osd,
+ &req->r_osd->o_linger_requests);
}
static void __unregister_linger_request(struct ceph_osd_client *osdc,
@@ -998,18 +1001,18 @@ static int __map_request(struct ceph_osd_client *osdc,
req->r_osd = __lookup_osd(osdc, o);
if (!req->r_osd && o >= 0) {
err = -ENOMEM;
- req->r_osd = create_osd(osdc);
+ req->r_osd = create_osd(osdc, o);
if (!req->r_osd) {
list_move(&req->r_req_lru_item, &osdc->req_notarget);
goto out;
}
dout("map_request osd %p is osd%d\n", req->r_osd, o);
- req->r_osd->o_osd = o;
- req->r_osd->o_con.peer_name.num = cpu_to_le64(o);
__insert_osd(osdc, req->r_osd);
- ceph_con_open(&req->r_osd->o_con, &osdc->osdmap->osd_addr[o]);
+ ceph_con_open(&req->r_osd->o_con,
+ CEPH_ENTITY_TYPE_OSD, o,
+ &osdc->osdmap->osd_addr[o]);
}
if (req->r_osd) {
@@ -1304,8 +1307,9 @@ static void kick_requests(struct ceph_osd_client *osdc, int force_resend)
dout("kick_requests %s\n", force_resend ? " (force resend)" : "");
mutex_lock(&osdc->request_mutex);
- for (p = rb_first(&osdc->requests); p; p = rb_next(p)) {
+ for (p = rb_first(&osdc->requests); p; ) {
req = rb_entry(p, struct ceph_osd_request, r_node);
+ p = rb_next(p);
err = __map_request(osdc, req, force_resend);
if (err < 0)
continue; /* error */
@@ -1313,10 +1317,23 @@ static void kick_requests(struct ceph_osd_client *osdc, int force_resend)
dout("%p tid %llu maps to no osd\n", req, req->r_tid);
needmap++; /* request a newer map */
} else if (err > 0) {
- dout("%p tid %llu requeued on osd%d\n", req, req->r_tid,
- req->r_osd ? req->r_osd->o_osd : -1);
- if (!req->r_linger)
+ if (!req->r_linger) {
+ dout("%p tid %llu requeued on osd%d\n", req,
+ req->r_tid,
+ req->r_osd ? req->r_osd->o_osd : -1);
req->r_flags |= CEPH_OSD_FLAG_RETRY;
+ }
+ }
+ if (req->r_linger && list_empty(&req->r_linger_item)) {
+ /*
+ * register as a linger so that we will
+ * re-submit below and get a new tid
+ */
+ dout("%p tid %llu restart on osd%d\n",
+ req, req->r_tid,
+ req->r_osd ? req->r_osd->o_osd : -1);
+ __register_linger_request(osdc, req);
+ __unregister_request(osdc, req);
}
}
@@ -1391,7 +1408,7 @@ void ceph_osdc_handle_map(struct ceph_osd_client *osdc, struct ceph_msg *msg)
epoch, maplen);
newmap = osdmap_apply_incremental(&p, next,
osdc->osdmap,
- osdc->client->msgr);
+ &osdc->client->msgr);
if (IS_ERR(newmap)) {
err = PTR_ERR(newmap);
goto bad;
@@ -1839,11 +1856,12 @@ int ceph_osdc_init(struct ceph_osd_client *osdc, struct ceph_client *client)
if (!osdc->req_mempool)
goto out;
- err = ceph_msgpool_init(&osdc->msgpool_op, OSD_OP_FRONT_LEN, 10, true,
+ err = ceph_msgpool_init(&osdc->msgpool_op, CEPH_MSG_OSD_OP,
+ OSD_OP_FRONT_LEN, 10, true,
"osd_op");
if (err < 0)
goto out_mempool;
- err = ceph_msgpool_init(&osdc->msgpool_op_reply,
+ err = ceph_msgpool_init(&osdc->msgpool_op_reply, CEPH_MSG_OSD_OPREPLY,
OSD_OPREPLY_FRONT_LEN, 10, true,
"osd_op_reply");
if (err < 0)
@@ -2019,15 +2037,15 @@ static struct ceph_msg *get_reply(struct ceph_connection *con,
if (!req) {
*skip = 1;
m = NULL;
- pr_info("get_reply unknown tid %llu from osd%d\n", tid,
- osd->o_osd);
+ dout("get_reply unknown tid %llu from osd%d\n", tid,
+ osd->o_osd);
goto out;
}
if (req->r_con_filling_msg) {
- dout("get_reply revoking msg %p from old con %p\n",
+ dout("%s revoking msg %p from old con %p\n", __func__,
req->r_reply, req->r_con_filling_msg);
- ceph_con_revoke_message(req->r_con_filling_msg, req->r_reply);
+ ceph_msg_revoke_incoming(req->r_reply);
req->r_con_filling_msg->ops->put(req->r_con_filling_msg);
req->r_con_filling_msg = NULL;
}
@@ -2080,6 +2098,7 @@ static struct ceph_msg *alloc_msg(struct ceph_connection *con,
int type = le16_to_cpu(hdr->type);
int front = le32_to_cpu(hdr->front_len);
+ *skip = 0;
switch (type) {
case CEPH_MSG_OSD_MAP:
case CEPH_MSG_WATCH_NOTIFY:
diff --git a/net/ceph/osdmap.c b/net/ceph/osdmap.c
index 81e3b84a77ef..3124b71a8883 100644
--- a/net/ceph/osdmap.c
+++ b/net/ceph/osdmap.c
@@ -135,6 +135,21 @@ bad:
return -EINVAL;
}
+static int skip_name_map(void **p, void *end)
+{
+ int len;
+ ceph_decode_32_safe(p, end, len ,bad);
+ while (len--) {
+ int strlen;
+ *p += sizeof(u32);
+ ceph_decode_32_safe(p, end, strlen, bad);
+ *p += strlen;
+}
+ return 0;
+bad:
+ return -EINVAL;
+}
+
static struct crush_map *crush_decode(void *pbyval, void *end)
{
struct crush_map *c;
@@ -143,6 +158,7 @@ static struct crush_map *crush_decode(void *pbyval, void *end)
void **p = &pbyval;
void *start = pbyval;
u32 magic;
+ u32 num_name_maps;
dout("crush_decode %p to %p len %d\n", *p, end, (int)(end - *p));
@@ -150,6 +166,11 @@ static struct crush_map *crush_decode(void *pbyval, void *end)
if (c == NULL)
return ERR_PTR(-ENOMEM);
+ /* set tunables to default values */
+ c->choose_local_tries = 2;
+ c->choose_local_fallback_tries = 5;
+ c->choose_total_tries = 19;
+
ceph_decode_need(p, end, 4*sizeof(u32), bad);
magic = ceph_decode_32(p);
if (magic != CRUSH_MAGIC) {
@@ -297,7 +318,25 @@ static struct crush_map *crush_decode(void *pbyval, void *end)
}
/* ignore trailing name maps. */
+ for (num_name_maps = 0; num_name_maps < 3; num_name_maps++) {
+ err = skip_name_map(p, end);
+ if (err < 0)
+ goto done;
+ }
+
+ /* tunables */
+ ceph_decode_need(p, end, 3*sizeof(u32), done);
+ c->choose_local_tries = ceph_decode_32(p);
+ c->choose_local_fallback_tries = ceph_decode_32(p);
+ c->choose_total_tries = ceph_decode_32(p);
+ dout("crush decode tunable choose_local_tries = %d",
+ c->choose_local_tries);
+ dout("crush decode tunable choose_local_fallback_tries = %d",
+ c->choose_local_fallback_tries);
+ dout("crush decode tunable choose_total_tries = %d",
+ c->choose_total_tries);
+done:
dout("crush_decode success\n");
return c;
@@ -488,15 +527,16 @@ static int __decode_pool_names(void **p, void *end, struct ceph_osdmap *map)
ceph_decode_32_safe(p, end, pool, bad);
ceph_decode_32_safe(p, end, len, bad);
dout(" pool %d len %d\n", pool, len);
+ ceph_decode_need(p, end, len, bad);
pi = __lookup_pg_pool(&map->pg_pools, pool);
if (pi) {
+ char *name = kstrndup(*p, len, GFP_NOFS);
+
+ if (!name)
+ return -ENOMEM;
kfree(pi->name);
- pi->name = kmalloc(len + 1, GFP_NOFS);
- if (pi->name) {
- memcpy(pi->name, *p, len);
- pi->name[len] = '\0';
- dout(" name is %s\n", pi->name);
- }
+ pi->name = name;
+ dout(" name is %s\n", pi->name);
}
*p += len;
}
@@ -666,6 +706,9 @@ struct ceph_osdmap *osdmap_decode(void **p, void *end)
ceph_decode_need(p, end, sizeof(u32) + sizeof(u64), bad);
ceph_decode_copy(p, &pgid, sizeof(pgid));
n = ceph_decode_32(p);
+ err = -EINVAL;
+ if (n > (UINT_MAX - sizeof(*pg)) / sizeof(u32))
+ goto bad;
ceph_decode_need(p, end, n * sizeof(u32), bad);
err = -ENOMEM;
pg = kmalloc(sizeof(*pg) + n*sizeof(u32), GFP_NOFS);
@@ -889,6 +932,10 @@ struct ceph_osdmap *osdmap_apply_incremental(void **p, void *end,
(void) __remove_pg_mapping(&map->pg_temp, pgid);
/* insert */
+ if (pglen > (UINT_MAX - sizeof(*pg)) / sizeof(u32)) {
+ err = -EINVAL;
+ goto bad;
+ }
pg = kmalloc(sizeof(*pg) + sizeof(u32)*pglen, GFP_NOFS);
if (!pg) {
err = -ENOMEM;
diff --git a/net/core/dev.c b/net/core/dev.c
index 0ebaea16632f..1e0a1847c3bb 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -959,18 +959,30 @@ int dev_alloc_name(struct net_device *dev, const char *name)
}
EXPORT_SYMBOL(dev_alloc_name);
-static int dev_get_valid_name(struct net_device *dev, const char *name)
+static int dev_alloc_name_ns(struct net *net,
+ struct net_device *dev,
+ const char *name)
{
- struct net *net;
+ char buf[IFNAMSIZ];
+ int ret;
- BUG_ON(!dev_net(dev));
- net = dev_net(dev);
+ ret = __dev_alloc_name(net, name, buf);
+ if (ret >= 0)
+ strlcpy(dev->name, buf, IFNAMSIZ);
+ return ret;
+}
+
+static int dev_get_valid_name(struct net *net,
+ struct net_device *dev,
+ const char *name)
+{
+ BUG_ON(!net);
if (!dev_valid_name(name))
return -EINVAL;
if (strchr(name, '%'))
- return dev_alloc_name(dev, name);
+ return dev_alloc_name_ns(net, dev, name);
else if (__dev_get_by_name(net, name))
return -EEXIST;
else if (dev->name != name)
@@ -1006,7 +1018,7 @@ int dev_change_name(struct net_device *dev, const char *newname)
memcpy(oldname, dev->name, IFNAMSIZ);
- err = dev_get_valid_name(dev, newname);
+ err = dev_get_valid_name(net, dev, newname);
if (err < 0)
return err;
@@ -1055,6 +1067,8 @@ rollback:
*/
int dev_set_alias(struct net_device *dev, const char *alias, size_t len)
{
+ char *new_ifalias;
+
ASSERT_RTNL();
if (len >= IFALIASZ)
@@ -1068,9 +1082,10 @@ int dev_set_alias(struct net_device *dev, const char *alias, size_t len)
return 0;
}
- dev->ifalias = krealloc(dev->ifalias, len + 1, GFP_KERNEL);
- if (!dev->ifalias)
+ new_ifalias = krealloc(dev->ifalias, len + 1, GFP_KERNEL);
+ if (!new_ifalias)
return -ENOMEM;
+ dev->ifalias = new_ifalias;
strlcpy(dev->ifalias, alias, len+1);
return len;
@@ -1106,11 +1121,23 @@ void netdev_state_change(struct net_device *dev)
}
EXPORT_SYMBOL(netdev_state_change);
-int netdev_bonding_change(struct net_device *dev, unsigned long event)
+/**
+ * netdev_notify_peers - notify network peers about existence of @dev
+ * @dev: network device
+ *
+ * Generate traffic such that interested network peers are aware of
+ * @dev, such as by generating a gratuitous ARP. This may be used when
+ * a device wants to inform the rest of the network about some sort of
+ * reconfiguration such as a failover event or virtual machine
+ * migration.
+ */
+void netdev_notify_peers(struct net_device *dev)
{
- return call_netdevice_notifiers(event, dev);
+ rtnl_lock();
+ call_netdevice_notifiers(NETDEV_NOTIFY_PEERS, dev);
+ rtnl_unlock();
}
-EXPORT_SYMBOL(netdev_bonding_change);
+EXPORT_SYMBOL(netdev_notify_peers);
/**
* dev_load - load a network module
@@ -1172,6 +1199,7 @@ static int __dev_open(struct net_device *dev)
net_dmaengine_get();
dev_set_rx_mode(dev);
dev_activate(dev);
+ add_device_randomness(dev->dev_addr, dev->addr_len);
}
return ret;
@@ -1390,7 +1418,6 @@ rollback:
nb->notifier_call(nb, NETDEV_DOWN, dev);
}
nb->notifier_call(nb, NETDEV_UNREGISTER, dev);
- nb->notifier_call(nb, NETDEV_UNREGISTER_BATCH, dev);
}
}
@@ -1432,7 +1459,6 @@ int unregister_netdevice_notifier(struct notifier_block *nb)
nb->notifier_call(nb, NETDEV_DOWN, dev);
}
nb->notifier_call(nb, NETDEV_UNREGISTER, dev);
- nb->notifier_call(nb, NETDEV_UNREGISTER_BATCH, dev);
}
}
unlock:
@@ -1638,6 +1664,19 @@ static inline int deliver_skb(struct sk_buff *skb,
return pt_prev->func(skb, skb->dev, pt_prev, orig_dev);
}
+static inline bool skb_loop_sk(struct packet_type *ptype, struct sk_buff *skb)
+{
+ if (ptype->af_packet_priv == NULL)
+ return false;
+
+ if (ptype->id_match)
+ return ptype->id_match(ptype, skb->sk);
+ else if ((struct sock *)ptype->af_packet_priv == skb->sk)
+ return true;
+
+ return false;
+}
+
/*
* Support routine. Sends outgoing frames to any network
* taps currently in use.
@@ -1655,8 +1694,7 @@ static void dev_queue_xmit_nit(struct sk_buff *skb, struct net_device *dev)
* they originated from - MvS (miquels@drinkel.ow.org)
*/
if ((ptype->dev == dev || !ptype->dev) &&
- (ptype->af_packet_priv == NULL ||
- (struct sock *)ptype->af_packet_priv != skb->sk)) {
+ (!skb_loop_sk(ptype, skb))) {
if (pt_prev) {
deliver_skb(skb2, pt_prev, skb->dev);
pt_prev = ptype;
@@ -2118,7 +2156,8 @@ static bool can_checksum_protocol(netdev_features_t features, __be16 protocol)
static netdev_features_t harmonize_features(struct sk_buff *skb,
__be16 protocol, netdev_features_t features)
{
- if (!can_checksum_protocol(features, protocol)) {
+ if (skb->ip_summed != CHECKSUM_NONE &&
+ !can_checksum_protocol(features, protocol)) {
features &= ~NETIF_F_ALL_CSUM;
features &= ~NETIF_F_SG;
} else if (illegal_highdma(skb->dev, skb)) {
@@ -2133,6 +2172,9 @@ netdev_features_t netif_skb_features(struct sk_buff *skb)
__be16 protocol = skb->protocol;
netdev_features_t features = skb->dev->features;
+ if (skb_shinfo(skb)->gso_segs > skb->dev->gso_max_segs)
+ features &= ~NETIF_F_GSO_MASK;
+
if (protocol == htons(ETH_P_8021Q)) {
struct vlan_ethhdr *veh = (struct vlan_ethhdr *)skb->data;
protocol = veh->h_vlan_encapsulated_proto;
@@ -2155,9 +2197,7 @@ EXPORT_SYMBOL(netif_skb_features);
/*
* Returns true if either:
* 1. skb has frag_list and the device doesn't support FRAGLIST, or
- * 2. skb is fragmented and the device does not support SG, or if
- * at least one of fragments is in highmem and device does not
- * support DMA from it.
+ * 2. skb is fragmented and the device does not support SG.
*/
static inline int skb_needs_linearize(struct sk_buff *skb,
int features)
@@ -2186,9 +2226,6 @@ int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev,
if (dev->priv_flags & IFF_XMIT_DST_RELEASE)
skb_dst_drop(skb);
- if (!list_empty(&ptype_all))
- dev_queue_xmit_nit(skb, dev);
-
features = netif_skb_features(skb);
if (vlan_tx_tag_present(skb) &&
@@ -2223,6 +2260,9 @@ int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev,
}
}
+ if (!list_empty(&ptype_all))
+ dev_queue_xmit_nit(skb, dev);
+
skb_len = skb->len;
rc = ops->ndo_start_xmit(skb, dev);
trace_net_dev_xmit(skb, rc, dev, skb_len);
@@ -2245,6 +2285,9 @@ gso:
if (dev->priv_flags & IFF_XMIT_DST_RELEASE)
skb_dst_drop(nskb);
+ if (!list_empty(&ptype_all))
+ dev_queue_xmit_nit(nskb, dev);
+
skb_len = nskb->len;
rc = ops->ndo_start_xmit(nskb, dev);
trace_net_dev_xmit(nskb, rc, dev, skb_len);
@@ -2354,8 +2397,8 @@ static inline int get_xps_queue(struct net_device *dev, struct sk_buff *skb)
#endif
}
-static struct netdev_queue *dev_pick_tx(struct net_device *dev,
- struct sk_buff *skb)
+struct netdev_queue *netdev_pick_tx(struct net_device *dev,
+ struct sk_buff *skb)
{
int queue_index;
const struct net_device_ops *ops = dev->netdev_ops;
@@ -2529,7 +2572,7 @@ int dev_queue_xmit(struct sk_buff *skb)
skb_update_prio(skb);
- txq = dev_pick_tx(dev, skb);
+ txq = netdev_pick_tx(dev, skb);
q = rcu_dereference_bh(txq->qdisc);
#ifdef CONFIG_NET_CLS_ACT
@@ -2602,6 +2645,8 @@ EXPORT_SYMBOL(dev_queue_xmit);
=======================================================================*/
int netdev_max_backlog __read_mostly = 1000;
+EXPORT_SYMBOL(netdev_max_backlog);
+
int netdev_tstamp_prequeue __read_mostly = 1;
int netdev_budget __read_mostly = 300;
int weight_p __read_mostly = 64; /* old backlog weight */
@@ -2628,15 +2673,16 @@ void __skb_get_rxhash(struct sk_buff *skb)
if (!skb_flow_dissect(skb, &keys))
return;
- if (keys.ports) {
- if ((__force u16)keys.port16[1] < (__force u16)keys.port16[0])
- swap(keys.port16[0], keys.port16[1]);
+ if (keys.ports)
skb->l4_rxhash = 1;
- }
/* get a consistent hash (same value on both flow directions) */
- if ((__force u32)keys.dst < (__force u32)keys.src)
+ if (((__force u32)keys.dst < (__force u32)keys.src) ||
+ (((__force u32)keys.dst == (__force u32)keys.src) &&
+ ((__force u16)keys.port16[1] < (__force u16)keys.port16[0]))) {
swap(keys.dst, keys.src);
+ swap(keys.port16[0], keys.port16[1]);
+ }
hash = jhash_3words((__force u32)keys.dst,
(__force u32)keys.src,
@@ -3155,6 +3201,23 @@ void netdev_rx_handler_unregister(struct net_device *dev)
}
EXPORT_SYMBOL_GPL(netdev_rx_handler_unregister);
+/*
+ * Limit the use of PFMEMALLOC reserves to those protocols that implement
+ * the special handling of PFMEMALLOC skbs.
+ */
+static bool skb_pfmemalloc_protocol(struct sk_buff *skb)
+{
+ switch (skb->protocol) {
+ case __constant_htons(ETH_P_ARP):
+ case __constant_htons(ETH_P_IP):
+ case __constant_htons(ETH_P_IPV6):
+ case __constant_htons(ETH_P_8021Q):
+ return true;
+ default:
+ return false;
+ }
+}
+
static int __netif_receive_skb(struct sk_buff *skb)
{
struct packet_type *ptype, *pt_prev;
@@ -3164,14 +3227,27 @@ static int __netif_receive_skb(struct sk_buff *skb)
bool deliver_exact = false;
int ret = NET_RX_DROP;
__be16 type;
+ unsigned long pflags = current->flags;
net_timestamp_check(!netdev_tstamp_prequeue, skb);
trace_netif_receive_skb(skb);
+ /*
+ * PFMEMALLOC skbs are special, they should
+ * - be delivered to SOCK_MEMALLOC sockets only
+ * - stay away from userspace
+ * - have bounded memory usage
+ *
+ * Use PF_MEMALLOC as this saves us from propagating the allocation
+ * context down to all allocation sites.
+ */
+ if (sk_memalloc_socks() && skb_pfmemalloc(skb))
+ current->flags |= PF_MEMALLOC;
+
/* if we've gotten here through NAPI, check netpoll */
if (netpoll_receive_skb(skb))
- return NET_RX_DROP;
+ goto out;
orig_dev = skb->dev;
@@ -3191,7 +3267,7 @@ another_round:
if (skb->protocol == cpu_to_be16(ETH_P_8021Q)) {
skb = vlan_untag(skb);
if (unlikely(!skb))
- goto out;
+ goto unlock;
}
#ifdef CONFIG_NET_CLS_ACT
@@ -3201,6 +3277,9 @@ another_round:
}
#endif
+ if (sk_memalloc_socks() && skb_pfmemalloc(skb))
+ goto skip_taps;
+
list_for_each_entry_rcu(ptype, &ptype_all, list) {
if (!ptype->dev || ptype->dev == skb->dev) {
if (pt_prev)
@@ -3209,13 +3288,18 @@ another_round:
}
}
+skip_taps:
#ifdef CONFIG_NET_CLS_ACT
skb = handle_ing(skb, &pt_prev, &ret, orig_dev);
if (!skb)
- goto out;
+ goto unlock;
ncls:
#endif
+ if (sk_memalloc_socks() && skb_pfmemalloc(skb)
+ && !skb_pfmemalloc_protocol(skb))
+ goto drop;
+
rx_handler = rcu_dereference(skb->dev->rx_handler);
if (vlan_tx_tag_present(skb)) {
if (pt_prev) {
@@ -3225,7 +3309,7 @@ ncls:
if (vlan_do_receive(&skb, !rx_handler))
goto another_round;
else if (unlikely(!skb))
- goto out;
+ goto unlock;
}
if (rx_handler) {
@@ -3235,7 +3319,7 @@ ncls:
}
switch (rx_handler(&skb)) {
case RX_HANDLER_CONSUMED:
- goto out;
+ goto unlock;
case RX_HANDLER_ANOTHER:
goto another_round;
case RX_HANDLER_EXACT:
@@ -3264,10 +3348,11 @@ ncls:
if (pt_prev) {
if (unlikely(skb_orphan_frags(skb, GFP_ATOMIC)))
- ret = -ENOMEM;
+ goto drop;
else
ret = pt_prev->func(skb, skb->dev, pt_prev, orig_dev);
} else {
+drop:
atomic_long_inc(&skb->dev->rx_dropped);
kfree_skb(skb);
/* Jamal, now you will not able to escape explaining
@@ -3276,8 +3361,10 @@ ncls:
ret = NET_RX_DROP;
}
-out:
+unlock:
rcu_read_unlock();
+out:
+ tsk_restore_flags(current, pflags, PF_MEMALLOC);
return ret;
}
@@ -4450,8 +4537,8 @@ static void dev_change_rx_flags(struct net_device *dev, int flags)
static int __dev_set_promiscuity(struct net_device *dev, int inc)
{
unsigned int old_flags = dev->flags;
- uid_t uid;
- gid_t gid;
+ kuid_t uid;
+ kgid_t gid;
ASSERT_RTNL();
@@ -4482,8 +4569,9 @@ static int __dev_set_promiscuity(struct net_device *dev, int inc)
"dev=%s prom=%d old_prom=%d auid=%u uid=%u gid=%u ses=%u",
dev->name, (dev->flags & IFF_PROMISC),
(old_flags & IFF_PROMISC),
- audit_get_loginuid(current),
- uid, gid,
+ from_kuid(&init_user_ns, audit_get_loginuid(current)),
+ from_kuid(&init_user_ns, uid),
+ from_kgid(&init_user_ns, gid),
audit_get_sessionid(current));
}
@@ -4801,6 +4889,7 @@ int dev_set_mac_address(struct net_device *dev, struct sockaddr *sa)
err = ops->ndo_set_mac_address(dev, sa);
if (!err)
call_netdevice_notifiers(NETDEV_CHANGEADDR, dev);
+ add_device_randomness(dev->dev_addr, dev->addr_len);
return err;
}
EXPORT_SYMBOL(dev_set_mac_address);
@@ -5175,12 +5264,12 @@ int dev_ioctl(struct net *net, unsigned int cmd, void __user *arg)
*/
static int dev_new_index(struct net *net)
{
- static int ifindex;
+ int ifindex = net->ifindex;
for (;;) {
if (++ifindex <= 0)
ifindex = 1;
if (!__dev_get_by_index(net, ifindex))
- return ifindex;
+ return net->ifindex = ifindex;
}
}
@@ -5258,10 +5347,6 @@ static void rollback_registered_many(struct list_head *head)
netdev_unregister_kobject(dev);
}
- /* Process any work delayed until the end of the batch */
- dev = list_first_entry(head, struct net_device, unreg_list);
- call_netdevice_notifiers(NETDEV_UNREGISTER_BATCH, dev);
-
synchronize_net();
list_for_each_entry(dev, head, unreg_list)
@@ -5519,7 +5604,7 @@ int register_netdevice(struct net_device *dev)
dev->iflink = -1;
- ret = dev_get_valid_name(dev, dev->name);
+ ret = dev_get_valid_name(net, dev, dev->name);
if (ret < 0)
goto out;
@@ -5533,7 +5618,12 @@ int register_netdevice(struct net_device *dev)
}
}
- dev->ifindex = dev_new_index(net);
+ ret = -EBUSY;
+ if (!dev->ifindex)
+ dev->ifindex = dev_new_index(net);
+ else if (__dev_get_by_index(net, dev->ifindex))
+ goto err_uninit;
+
if (dev->iflink == -1)
dev->iflink = dev->ifindex;
@@ -5576,9 +5666,12 @@ int register_netdevice(struct net_device *dev)
set_bit(__LINK_STATE_PRESENT, &dev->state);
+ linkwatch_init_dev(dev);
+
dev_init_scheduler(dev);
dev_hold(dev);
list_netdevice(dev);
+ add_device_randomness(dev->dev_addr, dev->addr_len);
/* Notify protocols, that a new device appeared. */
ret = call_netdevice_notifiers(NETDEV_REGISTER, dev);
@@ -5682,6 +5775,7 @@ EXPORT_SYMBOL(netdev_refcnt_read);
/**
* netdev_wait_allrefs - wait until all references are gone.
+ * @dev: target net_device
*
* This is called when unregistering network devices.
*
@@ -5707,9 +5801,12 @@ static void netdev_wait_allrefs(struct net_device *dev)
/* Rebroadcast unregister notification */
call_netdevice_notifiers(NETDEV_UNREGISTER, dev);
- /* don't resend NETDEV_UNREGISTER_BATCH, _BATCH users
- * should have already handle it the first time */
+ __rtnl_unlock();
+ rcu_barrier();
+ rtnl_lock();
+
+ call_netdevice_notifiers(NETDEV_UNREGISTER_FINAL, dev);
if (test_bit(__LINK_STATE_LINKWATCH_PENDING,
&dev->state)) {
/* We must not have linkwatch events
@@ -5771,9 +5868,8 @@ void netdev_run_todo(void)
__rtnl_unlock();
- /* Wait for rcu callbacks to finish before attempting to drain
- * the device list. This usually avoids a 250ms wait.
- */
+
+ /* Wait for rcu callbacks to finish before next phase */
if (!list_empty(&list))
rcu_barrier();
@@ -5782,6 +5878,10 @@ void netdev_run_todo(void)
= list_first_entry(&list, struct net_device, todo_list);
list_del(&dev->todo_list);
+ rtnl_lock();
+ call_netdevice_notifiers(NETDEV_UNREGISTER_FINAL, dev);
+ __rtnl_unlock();
+
if (unlikely(dev->reg_state != NETREG_UNREGISTERING)) {
pr_err("network todo '%s' but state %d\n",
dev->name, dev->reg_state);
@@ -5877,6 +5977,8 @@ struct netdev_queue *dev_ingress_queue_create(struct net_device *dev)
return queue;
}
+static const struct ethtool_ops default_ethtool_ops;
+
/**
* alloc_netdev_mqs - allocate network device
* @sizeof_priv: size of private data to allocate space for
@@ -5942,6 +6044,7 @@ struct net_device *alloc_netdev_mqs(int sizeof_priv, const char *name,
dev_net_set(dev, &init_net);
dev->gso_max_size = GSO_MAX_SIZE;
+ dev->gso_max_segs = GSO_MAX_SEGS;
INIT_LIST_HEAD(&dev->napi_list);
INIT_LIST_HEAD(&dev->unreg_list);
@@ -5963,6 +6066,8 @@ struct net_device *alloc_netdev_mqs(int sizeof_priv, const char *name,
strcpy(dev->name, name);
dev->group = INIT_NETDEV_GROUP;
+ if (!dev->ethtool_ops)
+ dev->ethtool_ops = &default_ethtool_ops;
return dev;
free_all:
@@ -6147,7 +6252,7 @@ int dev_change_net_namespace(struct net_device *dev, struct net *net, const char
/* We get here if we can't use the current device name */
if (!pat)
goto out;
- if (dev_get_valid_name(dev, pat) < 0)
+ if (dev_get_valid_name(net, dev, pat) < 0)
goto out;
}
@@ -6175,7 +6280,8 @@ int dev_change_net_namespace(struct net_device *dev, struct net *net, const char
the device is just moving and can keep their slaves up.
*/
call_netdevice_notifiers(NETDEV_UNREGISTER, dev);
- call_netdevice_notifiers(NETDEV_UNREGISTER_BATCH, dev);
+ rcu_barrier();
+ call_netdevice_notifiers(NETDEV_UNREGISTER_FINAL, dev);
rtmsg_ifinfo(RTM_DELLINK, dev, ~0U);
/*
@@ -6358,22 +6464,26 @@ const char *netdev_drivername(const struct net_device *dev)
return empty;
}
-int __netdev_printk(const char *level, const struct net_device *dev,
+static int __netdev_printk(const char *level, const struct net_device *dev,
struct va_format *vaf)
{
int r;
- if (dev && dev->dev.parent)
- r = dev_printk(level, dev->dev.parent, "%s: %pV",
- netdev_name(dev), vaf);
- else if (dev)
+ if (dev && dev->dev.parent) {
+ r = dev_printk_emit(level[1] - '0',
+ dev->dev.parent,
+ "%s %s %s: %pV",
+ dev_driver_string(dev->dev.parent),
+ dev_name(dev->dev.parent),
+ netdev_name(dev), vaf);
+ } else if (dev) {
r = printk("%s%s: %pV", level, netdev_name(dev), vaf);
- else
+ } else {
r = printk("%s(NULL net_device): %pV", level, vaf);
+ }
return r;
}
-EXPORT_SYMBOL(__netdev_printk);
int netdev_printk(const char *level, const struct net_device *dev,
const char *format, ...)
@@ -6388,6 +6498,7 @@ int netdev_printk(const char *level, const struct net_device *dev,
vaf.va = &args;
r = __netdev_printk(level, dev, &vaf);
+
va_end(args);
return r;
@@ -6407,6 +6518,7 @@ int func(const struct net_device *dev, const char *fmt, ...) \
vaf.va = &args; \
\
r = __netdev_printk(level, dev, &vaf); \
+ \
va_end(args); \
\
return r; \
diff --git a/net/core/dev_addr_lists.c b/net/core/dev_addr_lists.c
index c4cc2bc49f06..87cc17db2d56 100644
--- a/net/core/dev_addr_lists.c
+++ b/net/core/dev_addr_lists.c
@@ -22,7 +22,7 @@
*/
static int __hw_addr_create_ex(struct netdev_hw_addr_list *list,
- unsigned char *addr, int addr_len,
+ const unsigned char *addr, int addr_len,
unsigned char addr_type, bool global)
{
struct netdev_hw_addr *ha;
@@ -46,7 +46,7 @@ static int __hw_addr_create_ex(struct netdev_hw_addr_list *list,
}
static int __hw_addr_add_ex(struct netdev_hw_addr_list *list,
- unsigned char *addr, int addr_len,
+ const unsigned char *addr, int addr_len,
unsigned char addr_type, bool global)
{
struct netdev_hw_addr *ha;
@@ -72,14 +72,15 @@ static int __hw_addr_add_ex(struct netdev_hw_addr_list *list,
return __hw_addr_create_ex(list, addr, addr_len, addr_type, global);
}
-static int __hw_addr_add(struct netdev_hw_addr_list *list, unsigned char *addr,
- int addr_len, unsigned char addr_type)
+static int __hw_addr_add(struct netdev_hw_addr_list *list,
+ const unsigned char *addr, int addr_len,
+ unsigned char addr_type)
{
return __hw_addr_add_ex(list, addr, addr_len, addr_type, false);
}
static int __hw_addr_del_ex(struct netdev_hw_addr_list *list,
- unsigned char *addr, int addr_len,
+ const unsigned char *addr, int addr_len,
unsigned char addr_type, bool global)
{
struct netdev_hw_addr *ha;
@@ -104,8 +105,9 @@ static int __hw_addr_del_ex(struct netdev_hw_addr_list *list,
return -ENOENT;
}
-static int __hw_addr_del(struct netdev_hw_addr_list *list, unsigned char *addr,
- int addr_len, unsigned char addr_type)
+static int __hw_addr_del(struct netdev_hw_addr_list *list,
+ const unsigned char *addr, int addr_len,
+ unsigned char addr_type)
{
return __hw_addr_del_ex(list, addr, addr_len, addr_type, false);
}
@@ -278,7 +280,7 @@ EXPORT_SYMBOL(dev_addr_init);
*
* The caller must hold the rtnl_mutex.
*/
-int dev_addr_add(struct net_device *dev, unsigned char *addr,
+int dev_addr_add(struct net_device *dev, const unsigned char *addr,
unsigned char addr_type)
{
int err;
@@ -303,7 +305,7 @@ EXPORT_SYMBOL(dev_addr_add);
*
* The caller must hold the rtnl_mutex.
*/
-int dev_addr_del(struct net_device *dev, unsigned char *addr,
+int dev_addr_del(struct net_device *dev, const unsigned char *addr,
unsigned char addr_type)
{
int err;
@@ -390,7 +392,7 @@ EXPORT_SYMBOL(dev_addr_del_multiple);
* @dev: device
* @addr: address to add
*/
-int dev_uc_add_excl(struct net_device *dev, unsigned char *addr)
+int dev_uc_add_excl(struct net_device *dev, const unsigned char *addr)
{
struct netdev_hw_addr *ha;
int err;
@@ -421,7 +423,7 @@ EXPORT_SYMBOL(dev_uc_add_excl);
* Add a secondary unicast address to the device or increase
* the reference count if it already exists.
*/
-int dev_uc_add(struct net_device *dev, unsigned char *addr)
+int dev_uc_add(struct net_device *dev, const unsigned char *addr)
{
int err;
@@ -443,7 +445,7 @@ EXPORT_SYMBOL(dev_uc_add);
* Release reference to a secondary unicast address and remove it
* from the device if the reference count drops to zero.
*/
-int dev_uc_del(struct net_device *dev, unsigned char *addr)
+int dev_uc_del(struct net_device *dev, const unsigned char *addr)
{
int err;
@@ -543,7 +545,7 @@ EXPORT_SYMBOL(dev_uc_init);
* @dev: device
* @addr: address to add
*/
-int dev_mc_add_excl(struct net_device *dev, unsigned char *addr)
+int dev_mc_add_excl(struct net_device *dev, const unsigned char *addr)
{
struct netdev_hw_addr *ha;
int err;
@@ -566,7 +568,7 @@ out:
}
EXPORT_SYMBOL(dev_mc_add_excl);
-static int __dev_mc_add(struct net_device *dev, unsigned char *addr,
+static int __dev_mc_add(struct net_device *dev, const unsigned char *addr,
bool global)
{
int err;
@@ -587,7 +589,7 @@ static int __dev_mc_add(struct net_device *dev, unsigned char *addr,
* Add a multicast address to the device or increase
* the reference count if it already exists.
*/
-int dev_mc_add(struct net_device *dev, unsigned char *addr)
+int dev_mc_add(struct net_device *dev, const unsigned char *addr)
{
return __dev_mc_add(dev, addr, false);
}
@@ -600,13 +602,13 @@ EXPORT_SYMBOL(dev_mc_add);
*
* Add a global multicast address to the device.
*/
-int dev_mc_add_global(struct net_device *dev, unsigned char *addr)
+int dev_mc_add_global(struct net_device *dev, const unsigned char *addr)
{
return __dev_mc_add(dev, addr, true);
}
EXPORT_SYMBOL(dev_mc_add_global);
-static int __dev_mc_del(struct net_device *dev, unsigned char *addr,
+static int __dev_mc_del(struct net_device *dev, const unsigned char *addr,
bool global)
{
int err;
@@ -628,7 +630,7 @@ static int __dev_mc_del(struct net_device *dev, unsigned char *addr,
* Release reference to a multicast address and remove it
* from the device if the reference count drops to zero.
*/
-int dev_mc_del(struct net_device *dev, unsigned char *addr)
+int dev_mc_del(struct net_device *dev, const unsigned char *addr)
{
return __dev_mc_del(dev, addr, false);
}
@@ -642,7 +644,7 @@ EXPORT_SYMBOL(dev_mc_del);
* Release reference to a multicast address and remove it
* from the device if the reference count drops to zero.
*/
-int dev_mc_del_global(struct net_device *dev, unsigned char *addr)
+int dev_mc_del_global(struct net_device *dev, const unsigned char *addr)
{
return __dev_mc_del(dev, addr, true);
}
diff --git a/net/core/dst.c b/net/core/dst.c
index 069d51d29414..ee6153e2cf43 100644
--- a/net/core/dst.c
+++ b/net/core/dst.c
@@ -149,7 +149,15 @@ int dst_discard(struct sk_buff *skb)
}
EXPORT_SYMBOL(dst_discard);
-const u32 dst_default_metrics[RTAX_MAX];
+const u32 dst_default_metrics[RTAX_MAX + 1] = {
+ /* This initializer is needed to force linker to place this variable
+ * into const section. Otherwise it might end into bss section.
+ * We really want to avoid false sharing on this variable, and catch
+ * any writes on it.
+ */
+ [RTAX_MAX] = 0xdeadbeef,
+};
+
void *dst_alloc(struct dst_ops *ops, struct net_device *dev,
int initial_ref, int initial_obsolete, unsigned short flags)
@@ -214,8 +222,8 @@ void __dst_free(struct dst_entry *dst)
if (dst_garbage.timer_inc > DST_GC_INC) {
dst_garbage.timer_inc = DST_GC_INC;
dst_garbage.timer_expires = DST_GC_MIN;
- cancel_delayed_work(&dst_gc_work);
- schedule_delayed_work(&dst_gc_work, dst_garbage.timer_expires);
+ mod_delayed_work(system_wq, &dst_gc_work,
+ dst_garbage.timer_expires);
}
spin_unlock_bh(&dst_garbage.lock);
}
@@ -366,7 +374,7 @@ static int dst_dev_event(struct notifier_block *this, unsigned long event,
struct dst_entry *dst, *last = NULL;
switch (event) {
- case NETDEV_UNREGISTER:
+ case NETDEV_UNREGISTER_FINAL:
case NETDEV_DOWN:
mutex_lock(&dst_gc_mutex);
for (dst = dst_busy_list; dst; dst = dst->next) {
diff --git a/net/core/ethtool.c b/net/core/ethtool.c
index cbf033dcaf1f..4d64cc2e3fa9 100644
--- a/net/core/ethtool.c
+++ b/net/core/ethtool.c
@@ -1426,18 +1426,6 @@ int dev_ethtool(struct net *net, struct ifreq *ifr)
if (copy_from_user(&ethcmd, useraddr, sizeof(ethcmd)))
return -EFAULT;
- if (!dev->ethtool_ops) {
- /* A few commands do not require any driver support,
- * are unprivileged, and do not change anything, so we
- * can take a shortcut to them. */
- if (ethcmd == ETHTOOL_GDRVINFO)
- return ethtool_get_drvinfo(dev, useraddr);
- else if (ethcmd == ETHTOOL_GET_TS_INFO)
- return ethtool_get_ts_info(dev, useraddr);
- else
- return -EOPNOTSUPP;
- }
-
/* Allow some commands to be done by anyone */
switch (ethcmd) {
case ETHTOOL_GSET:
diff --git a/net/core/fib_rules.c b/net/core/fib_rules.c
index ab7db83236c9..58a4ba27dfe3 100644
--- a/net/core/fib_rules.c
+++ b/net/core/fib_rules.c
@@ -402,7 +402,7 @@ static int fib_nl_newrule(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
if (unresolved)
ops->unresolved_rules++;
- notify_rule_change(RTM_NEWRULE, rule, ops, nlh, NETLINK_CB(skb).pid);
+ notify_rule_change(RTM_NEWRULE, rule, ops, nlh, NETLINK_CB(skb).portid);
flush_route_cache(ops);
rules_ops_put(ops);
return 0;
@@ -500,7 +500,7 @@ static int fib_nl_delrule(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
}
notify_rule_change(RTM_DELRULE, rule, ops, nlh,
- NETLINK_CB(skb).pid);
+ NETLINK_CB(skb).portid);
if (ops->delete)
ops->delete(rule);
fib_rule_put(rule);
@@ -601,7 +601,7 @@ static int dump_rules(struct sk_buff *skb, struct netlink_callback *cb,
if (idx < cb->args[1])
goto skip;
- if (fib_nl_fill_rule(skb, rule, NETLINK_CB(cb->skb).pid,
+ if (fib_nl_fill_rule(skb, rule, NETLINK_CB(cb->skb).portid,
cb->nlh->nlmsg_seq, RTM_NEWRULE,
NLM_F_MULTI, ops) < 0)
break;
diff --git a/net/core/filter.c b/net/core/filter.c
index d4ce2dc712e3..3d92ebb7fbcf 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -83,6 +83,14 @@ int sk_filter(struct sock *sk, struct sk_buff *skb)
int err;
struct sk_filter *filter;
+ /*
+ * If the skb was allocated from pfmemalloc reserves, only
+ * allow SOCK_MEMALLOC sockets to use it as this socket is
+ * helping free memory
+ */
+ if (skb_pfmemalloc(skb) && !sock_flag(sk, SOCK_MEMALLOC))
+ return -ENOMEM;
+
err = security_sock_rcv_skb(sk, skb);
if (err)
return err;
@@ -159,6 +167,14 @@ unsigned int sk_run_filter(const struct sk_buff *skb,
case BPF_S_ALU_DIV_K:
A = reciprocal_divide(A, K);
continue;
+ case BPF_S_ALU_MOD_X:
+ if (X == 0)
+ return 0;
+ A %= X;
+ continue;
+ case BPF_S_ALU_MOD_K:
+ A %= K;
+ continue;
case BPF_S_ALU_AND_X:
A &= X;
continue;
@@ -171,6 +187,13 @@ unsigned int sk_run_filter(const struct sk_buff *skb,
case BPF_S_ALU_OR_K:
A |= K;
continue;
+ case BPF_S_ANC_ALU_XOR_X:
+ case BPF_S_ALU_XOR_X:
+ A ^= X;
+ continue;
+ case BPF_S_ALU_XOR_K:
+ A ^= K;
+ continue;
case BPF_S_ALU_LSH_X:
A <<= X;
continue;
@@ -318,9 +341,6 @@ load_b:
case BPF_S_ANC_CPU:
A = raw_smp_processor_id();
continue;
- case BPF_S_ANC_ALU_XOR_X:
- A ^= X;
- continue;
case BPF_S_ANC_NLATTR: {
struct nlattr *nla;
@@ -461,10 +481,14 @@ int sk_chk_filter(struct sock_filter *filter, unsigned int flen)
[BPF_ALU|BPF_MUL|BPF_K] = BPF_S_ALU_MUL_K,
[BPF_ALU|BPF_MUL|BPF_X] = BPF_S_ALU_MUL_X,
[BPF_ALU|BPF_DIV|BPF_X] = BPF_S_ALU_DIV_X,
+ [BPF_ALU|BPF_MOD|BPF_K] = BPF_S_ALU_MOD_K,
+ [BPF_ALU|BPF_MOD|BPF_X] = BPF_S_ALU_MOD_X,
[BPF_ALU|BPF_AND|BPF_K] = BPF_S_ALU_AND_K,
[BPF_ALU|BPF_AND|BPF_X] = BPF_S_ALU_AND_X,
[BPF_ALU|BPF_OR|BPF_K] = BPF_S_ALU_OR_K,
[BPF_ALU|BPF_OR|BPF_X] = BPF_S_ALU_OR_X,
+ [BPF_ALU|BPF_XOR|BPF_K] = BPF_S_ALU_XOR_K,
+ [BPF_ALU|BPF_XOR|BPF_X] = BPF_S_ALU_XOR_X,
[BPF_ALU|BPF_LSH|BPF_K] = BPF_S_ALU_LSH_K,
[BPF_ALU|BPF_LSH|BPF_X] = BPF_S_ALU_LSH_X,
[BPF_ALU|BPF_RSH|BPF_K] = BPF_S_ALU_RSH_K,
@@ -523,6 +547,11 @@ int sk_chk_filter(struct sock_filter *filter, unsigned int flen)
return -EINVAL;
ftest->k = reciprocal_value(ftest->k);
break;
+ case BPF_S_ALU_MOD_K:
+ /* check for division by zero */
+ if (ftest->k == 0)
+ return -EINVAL;
+ break;
case BPF_S_LD_MEM:
case BPF_S_LDX_MEM:
case BPF_S_ST:
diff --git a/net/core/link_watch.c b/net/core/link_watch.c
index c3519c6d1b16..8f82a5cc3851 100644
--- a/net/core/link_watch.c
+++ b/net/core/link_watch.c
@@ -76,6 +76,14 @@ static void rfc2863_policy(struct net_device *dev)
}
+void linkwatch_init_dev(struct net_device *dev)
+{
+ /* Handle pre-registration link state changes */
+ if (!netif_carrier_ok(dev) || netif_dormant(dev))
+ rfc2863_policy(dev);
+}
+
+
static bool linkwatch_urgent_event(struct net_device *dev)
{
if (!netif_running(dev))
@@ -120,22 +128,13 @@ static void linkwatch_schedule_work(int urgent)
delay = 0;
/*
- * This is true if we've scheduled it immeditately or if we don't
- * need an immediate execution and it's already pending.
+ * If urgent, schedule immediate execution; otherwise, don't
+ * override the existing timer.
*/
- if (schedule_delayed_work(&linkwatch_work, delay) == !delay)
- return;
-
- /* Don't bother if there is nothing urgent. */
- if (!test_bit(LW_URGENT, &linkwatch_flags))
- return;
-
- /* It's already running which is good enough. */
- if (!__cancel_delayed_work(&linkwatch_work))
- return;
-
- /* Otherwise we reschedule it again for immediate execution. */
- schedule_delayed_work(&linkwatch_work, 0);
+ if (test_bit(LW_URGENT, &linkwatch_flags))
+ mod_delayed_work(system_wq, &linkwatch_work, 0);
+ else
+ schedule_delayed_work(&linkwatch_work, delay);
}
diff --git a/net/core/neighbour.c b/net/core/neighbour.c
index 117afaf51268..baca771caae2 100644
--- a/net/core/neighbour.c
+++ b/net/core/neighbour.c
@@ -1545,7 +1545,7 @@ static void neigh_table_init_no_netlink(struct neigh_table *tbl)
panic("cannot allocate neighbour cache hashes");
rwlock_init(&tbl->lock);
- INIT_DELAYED_WORK_DEFERRABLE(&tbl->gc_work, neigh_periodic_work);
+ INIT_DEFERRABLE_WORK(&tbl->gc_work, neigh_periodic_work);
schedule_delayed_work(&tbl->gc_work, tbl->parms.reachable_time);
setup_timer(&tbl->proxy_timer, neigh_proxy_process, (unsigned long)tbl);
skb_queue_head_init_class(&tbl->proxy_queue,
@@ -2102,7 +2102,7 @@ static int neightbl_dump_info(struct sk_buff *skb, struct netlink_callback *cb)
if (tidx < tbl_skip || (family && tbl->family != family))
continue;
- if (neightbl_fill_info(skb, tbl, NETLINK_CB(cb->skb).pid,
+ if (neightbl_fill_info(skb, tbl, NETLINK_CB(cb->skb).portid,
cb->nlh->nlmsg_seq, RTM_NEWNEIGHTBL,
NLM_F_MULTI) <= 0)
break;
@@ -2115,7 +2115,7 @@ static int neightbl_dump_info(struct sk_buff *skb, struct netlink_callback *cb)
goto next;
if (neightbl_fill_param_info(skb, tbl, p,
- NETLINK_CB(cb->skb).pid,
+ NETLINK_CB(cb->skb).portid,
cb->nlh->nlmsg_seq,
RTM_NEWNEIGHTBL,
NLM_F_MULTI) <= 0)
@@ -2244,7 +2244,7 @@ static int neigh_dump_table(struct neigh_table *tbl, struct sk_buff *skb,
continue;
if (idx < s_idx)
goto next;
- if (neigh_fill_info(skb, n, NETLINK_CB(cb->skb).pid,
+ if (neigh_fill_info(skb, n, NETLINK_CB(cb->skb).portid,
cb->nlh->nlmsg_seq,
RTM_NEWNEIGH,
NLM_F_MULTI) <= 0) {
@@ -2281,7 +2281,7 @@ static int pneigh_dump_table(struct neigh_table *tbl, struct sk_buff *skb,
continue;
if (idx < s_idx)
goto next;
- if (pneigh_fill_info(skb, n, NETLINK_CB(cb->skb).pid,
+ if (pneigh_fill_info(skb, n, NETLINK_CB(cb->skb).portid,
cb->nlh->nlmsg_seq,
RTM_NEWNEIGH,
NLM_F_MULTI, tbl) <= 0) {
diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c
index 72607174ea5a..bcf02f608cbf 100644
--- a/net/core/net-sysfs.c
+++ b/net/core/net-sysfs.c
@@ -166,9 +166,21 @@ static ssize_t show_duplex(struct device *dev,
if (netif_running(netdev)) {
struct ethtool_cmd cmd;
- if (!__ethtool_get_settings(netdev, &cmd))
- ret = sprintf(buf, "%s\n",
- cmd.duplex ? "full" : "half");
+ if (!__ethtool_get_settings(netdev, &cmd)) {
+ const char *duplex;
+ switch (cmd.duplex) {
+ case DUPLEX_HALF:
+ duplex = "half";
+ break;
+ case DUPLEX_FULL:
+ duplex = "full";
+ break;
+ default:
+ duplex = "unknown";
+ break;
+ }
+ ret = sprintf(buf, "%s\n", duplex);
+ }
}
rtnl_unlock();
return ret;
diff --git a/net/core/netpoll.c b/net/core/netpoll.c
index b4c90e42b443..77a0388fc3be 100644
--- a/net/core/netpoll.c
+++ b/net/core/netpoll.c
@@ -26,6 +26,7 @@
#include <linux/workqueue.h>
#include <linux/slab.h>
#include <linux/export.h>
+#include <linux/if_vlan.h>
#include <net/tcp.h>
#include <net/udp.h>
#include <asm/unaligned.h>
@@ -54,7 +55,7 @@ static atomic_t trapped;
MAX_UDP_CHUNK)
static void zap_completion_queue(void);
-static void arp_reply(struct sk_buff *skb);
+static void netpoll_arp_reply(struct sk_buff *skb, struct netpoll_info *npinfo);
static unsigned int carrier_timeout = 4;
module_param(carrier_timeout, uint, 0644);
@@ -170,7 +171,8 @@ static void poll_napi(struct net_device *dev)
list_for_each_entry(napi, &dev->napi_list, dev_list) {
if (napi->poll_owner != smp_processor_id() &&
spin_trylock(&napi->poll_lock)) {
- budget = poll_one_napi(dev->npinfo, napi, budget);
+ budget = poll_one_napi(rcu_dereference_bh(dev->npinfo),
+ napi, budget);
spin_unlock(&napi->poll_lock);
if (!budget)
@@ -185,13 +187,14 @@ static void service_arp_queue(struct netpoll_info *npi)
struct sk_buff *skb;
while ((skb = skb_dequeue(&npi->arp_tx)))
- arp_reply(skb);
+ netpoll_arp_reply(skb, npi);
}
}
static void netpoll_poll_dev(struct net_device *dev)
{
const struct net_device_ops *ops;
+ struct netpoll_info *ni = rcu_dereference_bh(dev->npinfo);
if (!dev || !netif_running(dev))
return;
@@ -206,17 +209,18 @@ static void netpoll_poll_dev(struct net_device *dev)
poll_napi(dev);
if (dev->flags & IFF_SLAVE) {
- if (dev->npinfo) {
+ if (ni) {
struct net_device *bond_dev = dev->master;
struct sk_buff *skb;
- while ((skb = skb_dequeue(&dev->npinfo->arp_tx))) {
+ struct netpoll_info *bond_ni = rcu_dereference_bh(bond_dev->npinfo);
+ while ((skb = skb_dequeue(&ni->arp_tx))) {
skb->dev = bond_dev;
- skb_queue_tail(&bond_dev->npinfo->arp_tx, skb);
+ skb_queue_tail(&bond_ni->arp_tx, skb);
}
}
}
- service_arp_queue(dev->npinfo);
+ service_arp_queue(ni);
zap_completion_queue();
}
@@ -302,6 +306,7 @@ static int netpoll_owner_active(struct net_device *dev)
return 0;
}
+/* call with IRQ disabled */
void netpoll_send_skb_on_dev(struct netpoll *np, struct sk_buff *skb,
struct net_device *dev)
{
@@ -309,8 +314,11 @@ void netpoll_send_skb_on_dev(struct netpoll *np, struct sk_buff *skb,
unsigned long tries;
const struct net_device_ops *ops = dev->netdev_ops;
/* It is up to the caller to keep npinfo alive. */
- struct netpoll_info *npinfo = np->dev->npinfo;
+ struct netpoll_info *npinfo;
+
+ WARN_ON_ONCE(!irqs_disabled());
+ npinfo = rcu_dereference_bh(np->dev->npinfo);
if (!npinfo || !netif_running(dev) || !netif_device_present(dev)) {
__kfree_skb(skb);
return;
@@ -319,16 +327,22 @@ void netpoll_send_skb_on_dev(struct netpoll *np, struct sk_buff *skb,
/* don't get messages out of order, and no recursion */
if (skb_queue_len(&npinfo->txq) == 0 && !netpoll_owner_active(dev)) {
struct netdev_queue *txq;
- unsigned long flags;
- txq = netdev_get_tx_queue(dev, skb_get_queue_mapping(skb));
+ txq = netdev_pick_tx(dev, skb);
- local_irq_save(flags);
/* try until next clock tick */
for (tries = jiffies_to_usecs(1)/USEC_PER_POLL;
tries > 0; --tries) {
if (__netif_tx_trylock(txq)) {
if (!netif_xmit_stopped(txq)) {
+ if (vlan_tx_tag_present(skb) &&
+ !(netif_skb_features(skb) & NETIF_F_HW_VLAN_TX)) {
+ skb = __vlan_put_tag(skb, vlan_tx_tag_get(skb));
+ if (unlikely(!skb))
+ break;
+ skb->vlan_tci = 0;
+ }
+
status = ops->ndo_start_xmit(skb, dev);
if (status == NETDEV_TX_OK)
txq_trans_update(txq);
@@ -347,10 +361,9 @@ void netpoll_send_skb_on_dev(struct netpoll *np, struct sk_buff *skb,
}
WARN_ONCE(!irqs_disabled(),
- "netpoll_send_skb(): %s enabled interrupts in poll (%pF)\n",
+ "netpoll_send_skb_on_dev(): %s enabled interrupts in poll (%pF)\n",
dev->name, ops->ndo_start_xmit);
- local_irq_restore(flags);
}
if (status != NETDEV_TX_OK) {
@@ -367,6 +380,7 @@ void netpoll_send_udp(struct netpoll *np, const char *msg, int len)
struct udphdr *udph;
struct iphdr *iph;
struct ethhdr *eth;
+ static atomic_t ip_ident;
udp_len = len + sizeof(*udph);
ip_len = udp_len + sizeof(*iph);
@@ -402,7 +416,7 @@ void netpoll_send_udp(struct netpoll *np, const char *msg, int len)
put_unaligned(0x45, (unsigned char *)iph);
iph->tos = 0;
put_unaligned(htons(ip_len), &(iph->tot_len));
- iph->id = 0;
+ iph->id = htons(atomic_inc_return(&ip_ident));
iph->frag_off = 0;
iph->ttl = 64;
iph->protocol = IPPROTO_UDP;
@@ -423,9 +437,8 @@ void netpoll_send_udp(struct netpoll *np, const char *msg, int len)
}
EXPORT_SYMBOL(netpoll_send_udp);
-static void arp_reply(struct sk_buff *skb)
+static void netpoll_arp_reply(struct sk_buff *skb, struct netpoll_info *npinfo)
{
- struct netpoll_info *npinfo = skb->dev->npinfo;
struct arphdr *arp;
unsigned char *arp_ptr;
int size, type = ARPOP_REPLY, ptype = ETH_P_ARP;
@@ -543,13 +556,12 @@ static void arp_reply(struct sk_buff *skb)
spin_unlock_irqrestore(&npinfo->rx_lock, flags);
}
-int __netpoll_rx(struct sk_buff *skb)
+int __netpoll_rx(struct sk_buff *skb, struct netpoll_info *npinfo)
{
int proto, len, ulen;
int hits = 0;
const struct iphdr *iph;
struct udphdr *uh;
- struct netpoll_info *npinfo = skb->dev->npinfo;
struct netpoll *np, *tmp;
if (list_empty(&npinfo->rx_np))
@@ -565,6 +577,12 @@ int __netpoll_rx(struct sk_buff *skb)
return 1;
}
+ if (skb->protocol == cpu_to_be16(ETH_P_8021Q)) {
+ skb = vlan_untag(skb);
+ if (unlikely(!skb))
+ goto out;
+ }
+
proto = ntohs(eth_hdr(skb)->h_proto);
if (proto != ETH_P_IP)
goto out;
@@ -715,7 +733,7 @@ int netpoll_parse_options(struct netpoll *np, char *opt)
}
EXPORT_SYMBOL(netpoll_parse_options);
-int __netpoll_setup(struct netpoll *np, struct net_device *ndev)
+int __netpoll_setup(struct netpoll *np, struct net_device *ndev, gfp_t gfp)
{
struct netpoll_info *npinfo;
const struct net_device_ops *ops;
@@ -734,7 +752,7 @@ int __netpoll_setup(struct netpoll *np, struct net_device *ndev)
}
if (!ndev->npinfo) {
- npinfo = kmalloc(sizeof(*npinfo), GFP_KERNEL);
+ npinfo = kmalloc(sizeof(*npinfo), gfp);
if (!npinfo) {
err = -ENOMEM;
goto out;
@@ -752,7 +770,7 @@ int __netpoll_setup(struct netpoll *np, struct net_device *ndev)
ops = np->dev->netdev_ops;
if (ops->ndo_netpoll_setup) {
- err = ops->ndo_netpoll_setup(ndev, npinfo);
+ err = ops->ndo_netpoll_setup(ndev, npinfo, gfp);
if (err)
goto free_npinfo;
}
@@ -857,7 +875,7 @@ int netpoll_setup(struct netpoll *np)
refill_skbs();
rtnl_lock();
- err = __netpoll_setup(np, ndev);
+ err = __netpoll_setup(np, ndev, GFP_KERNEL);
rtnl_unlock();
if (err)
@@ -878,6 +896,24 @@ static int __init netpoll_init(void)
}
core_initcall(netpoll_init);
+static void rcu_cleanup_netpoll_info(struct rcu_head *rcu_head)
+{
+ struct netpoll_info *npinfo =
+ container_of(rcu_head, struct netpoll_info, rcu);
+
+ skb_queue_purge(&npinfo->arp_tx);
+ skb_queue_purge(&npinfo->txq);
+
+ /* we can't call cancel_delayed_work_sync here, as we are in softirq */
+ cancel_delayed_work(&npinfo->tx_work);
+
+ /* clean after last, unfinished work */
+ __skb_queue_purge(&npinfo->txq);
+ /* now cancel it again */
+ cancel_delayed_work(&npinfo->tx_work);
+ kfree(npinfo);
+}
+
void __netpoll_cleanup(struct netpoll *np)
{
struct netpoll_info *npinfo;
@@ -903,20 +939,24 @@ void __netpoll_cleanup(struct netpoll *np)
ops->ndo_netpoll_cleanup(np->dev);
RCU_INIT_POINTER(np->dev->npinfo, NULL);
+ call_rcu_bh(&npinfo->rcu, rcu_cleanup_netpoll_info);
+ }
+}
+EXPORT_SYMBOL_GPL(__netpoll_cleanup);
- /* avoid racing with NAPI reading npinfo */
- synchronize_rcu_bh();
+static void rcu_cleanup_netpoll(struct rcu_head *rcu_head)
+{
+ struct netpoll *np = container_of(rcu_head, struct netpoll, rcu);
- skb_queue_purge(&npinfo->arp_tx);
- skb_queue_purge(&npinfo->txq);
- cancel_delayed_work_sync(&npinfo->tx_work);
+ __netpoll_cleanup(np);
+ kfree(np);
+}
- /* clean after last, unfinished work */
- __skb_queue_purge(&npinfo->txq);
- kfree(npinfo);
- }
+void __netpoll_free_rcu(struct netpoll *np)
+{
+ call_rcu_bh(&np->rcu, rcu_cleanup_netpoll);
}
-EXPORT_SYMBOL_GPL(__netpoll_cleanup);
+EXPORT_SYMBOL_GPL(__netpoll_free_rcu);
void netpoll_cleanup(struct netpoll *np)
{
diff --git a/net/core/netprio_cgroup.c b/net/core/netprio_cgroup.c
index 63d15e8f80e9..4a83fb3c8e87 100644
--- a/net/core/netprio_cgroup.c
+++ b/net/core/netprio_cgroup.c
@@ -73,7 +73,6 @@ static int extend_netdev_table(struct net_device *dev, u32 new_len)
((sizeof(u32) * new_len));
struct netprio_map *new_priomap = kzalloc(new_size, GFP_KERNEL);
struct netprio_map *old_priomap;
- int i;
old_priomap = rtnl_dereference(dev->priomap);
@@ -82,10 +81,10 @@ static int extend_netdev_table(struct net_device *dev, u32 new_len)
return -ENOMEM;
}
- for (i = 0;
- old_priomap && (i < old_priomap->priomap_len);
- i++)
- new_priomap->priomap[i] = old_priomap->priomap[i];
+ if (old_priomap)
+ memcpy(new_priomap->priomap, old_priomap->priomap,
+ old_priomap->priomap_len *
+ sizeof(old_priomap->priomap[0]));
new_priomap->priomap_len = new_len;
@@ -101,39 +100,11 @@ static int write_update_netdev_table(struct net_device *dev)
u32 max_len;
struct netprio_map *map;
- rtnl_lock();
max_len = atomic_read(&max_prioidx) + 1;
map = rtnl_dereference(dev->priomap);
if (!map || map->priomap_len < max_len)
ret = extend_netdev_table(dev, max_len);
- rtnl_unlock();
-
- return ret;
-}
-
-static int update_netdev_tables(void)
-{
- int ret = 0;
- struct net_device *dev;
- u32 max_len;
- struct netprio_map *map;
- rtnl_lock();
- max_len = atomic_read(&max_prioidx) + 1;
- for_each_netdev(&init_net, dev) {
- map = rtnl_dereference(dev->priomap);
- /*
- * don't allocate priomap if we didn't
- * change net_prio.ifpriomap (map == NULL),
- * this will speed up skb_update_prio.
- */
- if (map && map->priomap_len < max_len) {
- ret = extend_netdev_table(dev, max_len);
- if (ret < 0)
- break;
- }
- }
- rtnl_unlock();
return ret;
}
@@ -155,12 +126,6 @@ static struct cgroup_subsys_state *cgrp_create(struct cgroup *cgrp)
goto out;
}
- ret = update_netdev_tables();
- if (ret < 0) {
- put_prioidx(cs->prioidx);
- goto out;
- }
-
return &cs->css;
out:
kfree(cs);
@@ -234,7 +199,7 @@ static int write_priomap(struct cgroup *cgrp, struct cftype *cft,
/*
*Separate the devname from the associated priority
- *and advance the priostr poitner to the priority value
+ *and advance the priostr pointer to the priority value
*/
*priostr = '\0';
priostr++;
@@ -256,17 +221,17 @@ static int write_priomap(struct cgroup *cgrp, struct cftype *cft,
if (!dev)
goto out_free_devname;
+ rtnl_lock();
ret = write_update_netdev_table(dev);
if (ret < 0)
goto out_put_dev;
- rcu_read_lock();
- map = rcu_dereference(dev->priomap);
+ map = rtnl_dereference(dev->priomap);
if (map)
map->priomap[prioidx] = priority;
- rcu_read_unlock();
out_put_dev:
+ rtnl_unlock();
dev_put(dev);
out_free_devname:
@@ -277,12 +242,6 @@ out_free_devname:
void net_prio_attach(struct cgroup *cgrp, struct cgroup_taskset *tset)
{
struct task_struct *p;
- char *tmp = kzalloc(sizeof(char) * PATH_MAX, GFP_KERNEL);
-
- if (!tmp) {
- pr_warn("Unable to attach cgrp due to alloc failure!\n");
- return;
- }
cgroup_taskset_for_each(p, cgrp, tset) {
unsigned int fd;
@@ -296,32 +255,24 @@ void net_prio_attach(struct cgroup *cgrp, struct cgroup_taskset *tset)
continue;
}
- rcu_read_lock();
+ spin_lock(&files->file_lock);
fdt = files_fdtable(files);
for (fd = 0; fd < fdt->max_fds; fd++) {
- char *path;
struct file *file;
struct socket *sock;
- unsigned long s;
- int rv, err = 0;
+ int err;
file = fcheck_files(files, fd);
if (!file)
continue;
- path = d_path(&file->f_path, tmp, PAGE_SIZE);
- rv = sscanf(path, "socket:[%lu]", &s);
- if (rv <= 0)
- continue;
-
sock = sock_from_file(file, &err);
- if (!err)
+ if (sock)
sock_update_netprioidx(sock->sk, p);
}
- rcu_read_unlock();
+ spin_unlock(&files->file_lock);
task_unlock(p);
}
- kfree(tmp);
}
static struct cftype ss_files[] = {
@@ -342,11 +293,19 @@ struct cgroup_subsys net_prio_subsys = {
.create = cgrp_create,
.destroy = cgrp_destroy,
.attach = net_prio_attach,
-#ifdef CONFIG_NETPRIO_CGROUP
.subsys_id = net_prio_subsys_id,
-#endif
.base_cftypes = ss_files,
- .module = THIS_MODULE
+ .module = THIS_MODULE,
+
+ /*
+ * net_prio has artificial limit on the number of cgroups and
+ * disallows nesting making it impossible to co-mount it with other
+ * hierarchical subsystems. Remove the artificially low PRIOIDX_SZ
+ * limit and properly nest configuration such that children follow
+ * their parents' configurations by default and are allowed to
+ * override and remove the following.
+ */
+ .broken_hierarchy = true,
};
static int netprio_device_event(struct notifier_block *unused,
@@ -382,10 +341,6 @@ static int __init init_cgroup_netprio(void)
ret = cgroup_load_subsys(&net_prio_subsys);
if (ret)
goto out;
-#ifndef CONFIG_NETPRIO_CGROUP
- smp_wmb();
- net_prio_subsys_id = net_prio_subsys.subsys_id;
-#endif
register_netdevice_notifier(&netprio_device_notifier);
@@ -402,11 +357,6 @@ static void __exit exit_cgroup_netprio(void)
cgroup_unload_subsys(&net_prio_subsys);
-#ifndef CONFIG_NETPRIO_CGROUP
- net_prio_subsys_id = -1;
- synchronize_rcu();
-#endif
-
rtnl_lock();
for_each_netdev(&init_net, dev) {
old = rtnl_dereference(dev->priomap);
diff --git a/net/core/pktgen.c b/net/core/pktgen.c
index cce9e53528b1..148e73d2c451 100644
--- a/net/core/pktgen.c
+++ b/net/core/pktgen.c
@@ -2721,7 +2721,7 @@ static struct sk_buff *fill_packet_ipv4(struct net_device *odev,
/* Eth + IPh + UDPh + mpls */
datalen = pkt_dev->cur_pkt_size - 14 - 20 - 8 -
pkt_dev->pkt_overhead;
- if (datalen < sizeof(struct pktgen_hdr))
+ if (datalen < 0 || datalen < sizeof(struct pktgen_hdr))
datalen = sizeof(struct pktgen_hdr);
udph->source = htons(pkt_dev->cur_udp_src);
diff --git a/net/core/request_sock.c b/net/core/request_sock.c
index 9b570a6a33c5..c31d9e8668c3 100644
--- a/net/core/request_sock.c
+++ b/net/core/request_sock.c
@@ -15,6 +15,7 @@
#include <linux/random.h>
#include <linux/slab.h>
#include <linux/string.h>
+#include <linux/tcp.h>
#include <linux/vmalloc.h>
#include <net/request_sock.h>
@@ -130,3 +131,97 @@ void reqsk_queue_destroy(struct request_sock_queue *queue)
kfree(lopt);
}
+/*
+ * This function is called to set a Fast Open socket's "fastopen_rsk" field
+ * to NULL when a TFO socket no longer needs to access the request_sock.
+ * This happens only after 3WHS has been either completed or aborted (e.g.,
+ * RST is received).
+ *
+ * Before TFO, a child socket is created only after 3WHS is completed,
+ * hence it never needs to access the request_sock. things get a lot more
+ * complex with TFO. A child socket, accepted or not, has to access its
+ * request_sock for 3WHS processing, e.g., to retransmit SYN-ACK pkts,
+ * until 3WHS is either completed or aborted. Afterwards the req will stay
+ * until either the child socket is accepted, or in the rare case when the
+ * listener is closed before the child is accepted.
+ *
+ * In short, a request socket is only freed after BOTH 3WHS has completed
+ * (or aborted) and the child socket has been accepted (or listener closed).
+ * When a child socket is accepted, its corresponding req->sk is set to
+ * NULL since it's no longer needed. More importantly, "req->sk == NULL"
+ * will be used by the code below to determine if a child socket has been
+ * accepted or not, and the check is protected by the fastopenq->lock
+ * described below.
+ *
+ * Note that fastopen_rsk is only accessed from the child socket's context
+ * with its socket lock held. But a request_sock (req) can be accessed by
+ * both its child socket through fastopen_rsk, and a listener socket through
+ * icsk_accept_queue.rskq_accept_head. To protect the access a simple spin
+ * lock per listener "icsk->icsk_accept_queue.fastopenq->lock" is created.
+ * only in the rare case when both the listener and the child locks are held,
+ * e.g., in inet_csk_listen_stop() do we not need to acquire the lock.
+ * The lock also protects other fields such as fastopenq->qlen, which is
+ * decremented by this function when fastopen_rsk is no longer needed.
+ *
+ * Note that another solution was to simply use the existing socket lock
+ * from the listener. But first socket lock is difficult to use. It is not
+ * a simple spin lock - one must consider sock_owned_by_user() and arrange
+ * to use sk_add_backlog() stuff. But what really makes it infeasible is the
+ * locking hierarchy violation. E.g., inet_csk_listen_stop() may try to
+ * acquire a child's lock while holding listener's socket lock. A corner
+ * case might also exist in tcp_v4_hnd_req() that will trigger this locking
+ * order.
+ *
+ * When a TFO req is created, it needs to sock_hold its listener to prevent
+ * the latter data structure from going away.
+ *
+ * This function also sets "treq->listener" to NULL and unreference listener
+ * socket. treq->listener is used by the listener so it is protected by the
+ * fastopenq->lock in this function.
+ */
+void reqsk_fastopen_remove(struct sock *sk, struct request_sock *req,
+ bool reset)
+{
+ struct sock *lsk = tcp_rsk(req)->listener;
+ struct fastopen_queue *fastopenq =
+ inet_csk(lsk)->icsk_accept_queue.fastopenq;
+
+ BUG_ON(!spin_is_locked(&sk->sk_lock.slock) && !sock_owned_by_user(sk));
+
+ tcp_sk(sk)->fastopen_rsk = NULL;
+ spin_lock_bh(&fastopenq->lock);
+ fastopenq->qlen--;
+ tcp_rsk(req)->listener = NULL;
+ if (req->sk) /* the child socket hasn't been accepted yet */
+ goto out;
+
+ if (!reset || lsk->sk_state != TCP_LISTEN) {
+ /* If the listener has been closed don't bother with the
+ * special RST handling below.
+ */
+ spin_unlock_bh(&fastopenq->lock);
+ sock_put(lsk);
+ reqsk_free(req);
+ return;
+ }
+ /* Wait for 60secs before removing a req that has triggered RST.
+ * This is a simple defense against TFO spoofing attack - by
+ * counting the req against fastopen.max_qlen, and disabling
+ * TFO when the qlen exceeds max_qlen.
+ *
+ * For more details see CoNext'11 "TCP Fast Open" paper.
+ */
+ req->expires = jiffies + 60*HZ;
+ if (fastopenq->rskq_rst_head == NULL)
+ fastopenq->rskq_rst_head = req;
+ else
+ fastopenq->rskq_rst_tail->dl_next = req;
+
+ req->dl_next = NULL;
+ fastopenq->rskq_rst_tail = req;
+ fastopenq->qlen++;
+out:
+ spin_unlock_bh(&fastopenq->lock);
+ sock_put(lsk);
+ return;
+}
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index 334b930e0de3..76d4c2c3c89b 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -618,16 +618,20 @@ int rtnl_put_cacheinfo(struct sk_buff *skb, struct dst_entry *dst, u32 id,
long expires, u32 error)
{
struct rta_cacheinfo ci = {
- .rta_lastuse = jiffies_to_clock_t(jiffies - dst->lastuse),
+ .rta_lastuse = jiffies_delta_to_clock_t(jiffies - dst->lastuse),
.rta_used = dst->__use,
.rta_clntref = atomic_read(&(dst->__refcnt)),
.rta_error = error,
.rta_id = id,
};
- if (expires)
- ci.rta_expires = jiffies_to_clock_t(expires);
+ if (expires) {
+ unsigned long clock;
+ clock = jiffies_to_clock_t(abs(expires));
+ clock = min_t(unsigned long, clock, INT_MAX);
+ ci.rta_expires = (expires > 0) ? clock : -clock;
+ }
return nla_put(skb, RTA_CACHEINFO, sizeof(ci), &ci);
}
EXPORT_SYMBOL_GPL(rtnl_put_cacheinfo);
@@ -659,6 +663,12 @@ static void set_operstate(struct net_device *dev, unsigned char transition)
}
}
+static unsigned int rtnl_dev_get_flags(const struct net_device *dev)
+{
+ return (dev->flags & ~(IFF_PROMISC | IFF_ALLMULTI)) |
+ (dev->gflags & (IFF_PROMISC | IFF_ALLMULTI));
+}
+
static unsigned int rtnl_dev_combine_flags(const struct net_device *dev,
const struct ifinfomsg *ifm)
{
@@ -667,7 +677,7 @@ static unsigned int rtnl_dev_combine_flags(const struct net_device *dev,
/* bugwards compatibility: ifi_change == 0 is treated as ~0 */
if (ifm->ifi_change)
flags = (flags & ifm->ifi_change) |
- (dev->flags & ~ifm->ifi_change);
+ (rtnl_dev_get_flags(dev) & ~ifm->ifi_change);
return flags;
}
@@ -1071,7 +1081,7 @@ static int rtnl_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb)
if (idx < s_idx)
goto cont;
if (rtnl_fill_ifinfo(skb, dev, RTM_NEWLINK,
- NETLINK_CB(cb->skb).pid,
+ NETLINK_CB(cb->skb).portid,
cb->nlh->nlmsg_seq, 0,
NLM_F_MULTI,
ext_filter_mask) <= 0)
@@ -1371,6 +1381,7 @@ static int do_setlink(struct net_device *dev, struct ifinfomsg *ifm,
goto errout;
send_addr_notify = 1;
modified = 1;
+ add_device_randomness(dev->dev_addr, dev->addr_len);
}
if (tb[IFLA_MTU]) {
@@ -1801,8 +1812,6 @@ replay:
return -ENODEV;
}
- if (ifm->ifi_index)
- return -EOPNOTSUPP;
if (tb[IFLA_MAP] || tb[IFLA_MASTER] || tb[IFLA_PROTINFO])
return -EOPNOTSUPP;
@@ -1828,10 +1837,14 @@ replay:
return PTR_ERR(dest_net);
dev = rtnl_create_link(net, dest_net, ifname, ops, tb);
-
- if (IS_ERR(dev))
+ if (IS_ERR(dev)) {
err = PTR_ERR(dev);
- else if (ops->newlink)
+ goto out;
+ }
+
+ dev->ifindex = ifm->ifi_index;
+
+ if (ops->newlink)
err = ops->newlink(net, dev, tb, data);
else
err = register_netdevice(dev);
@@ -1886,14 +1899,14 @@ static int rtnl_getlink(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
if (nskb == NULL)
return -ENOBUFS;
- err = rtnl_fill_ifinfo(nskb, dev, RTM_NEWLINK, NETLINK_CB(skb).pid,
+ err = rtnl_fill_ifinfo(nskb, dev, RTM_NEWLINK, NETLINK_CB(skb).portid,
nlh->nlmsg_seq, 0, 0, ext_filter_mask);
if (err < 0) {
/* -EMSGSIZE implies BUG in if_nlmsg_size */
WARN_ON(err == -EMSGSIZE);
kfree_skb(nskb);
} else
- err = rtnl_unicast(nskb, net, NETLINK_CB(skb).pid);
+ err = rtnl_unicast(nskb, net, NETLINK_CB(skb).portid);
return err;
}
@@ -2077,7 +2090,8 @@ static int rtnl_fdb_add(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
if ((!ndm->ndm_flags || ndm->ndm_flags & NTF_MASTER) &&
(dev->priv_flags & IFF_BRIDGE_PORT)) {
master = dev->master;
- err = master->netdev_ops->ndo_fdb_add(ndm, dev, addr,
+ err = master->netdev_ops->ndo_fdb_add(ndm, tb,
+ dev, addr,
nlh->nlmsg_flags);
if (err)
goto out;
@@ -2087,7 +2101,8 @@ static int rtnl_fdb_add(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
/* Embedded bridge, macvlan, and any other device support */
if ((ndm->ndm_flags & NTF_SELF) && dev->netdev_ops->ndo_fdb_add) {
- err = dev->netdev_ops->ndo_fdb_add(ndm, dev, addr,
+ err = dev->netdev_ops->ndo_fdb_add(ndm, tb,
+ dev, addr,
nlh->nlmsg_flags);
if (!err) {
@@ -2167,9 +2182,9 @@ static int nlmsg_populate_fdb(struct sk_buff *skb,
{
struct netdev_hw_addr *ha;
int err;
- u32 pid, seq;
+ u32 portid, seq;
- pid = NETLINK_CB(cb->skb).pid;
+ portid = NETLINK_CB(cb->skb).portid;
seq = cb->nlh->nlmsg_seq;
list_for_each_entry(ha, &list->list, list) {
@@ -2177,7 +2192,7 @@ static int nlmsg_populate_fdb(struct sk_buff *skb,
goto skip;
err = nlmsg_populate_fdb_fill(skb, dev, ha->addr,
- pid, seq, 0, NTF_SELF);
+ portid, seq, 0, NTF_SELF);
if (err < 0)
return err;
skip:
@@ -2345,7 +2360,7 @@ static int rtnetlink_event(struct notifier_block *this, unsigned long event, voi
case NETDEV_PRE_TYPE_CHANGE:
case NETDEV_GOING_DOWN:
case NETDEV_UNREGISTER:
- case NETDEV_UNREGISTER_BATCH:
+ case NETDEV_UNREGISTER_FINAL:
case NETDEV_RELEASE:
case NETDEV_JOIN:
break;
@@ -2368,9 +2383,10 @@ static int __net_init rtnetlink_net_init(struct net *net)
.groups = RTNLGRP_MAX,
.input = rtnetlink_rcv,
.cb_mutex = &rtnl_mutex,
+ .flags = NL_CFG_F_NONROOT_RECV,
};
- sk = netlink_kernel_create(net, NETLINK_ROUTE, THIS_MODULE, &cfg);
+ sk = netlink_kernel_create(net, NETLINK_ROUTE, &cfg);
if (!sk)
return -ENOMEM;
net->rtnl = sk;
@@ -2403,7 +2419,6 @@ void __init rtnetlink_init(void)
if (register_pernet_subsys(&rtnetlink_net_ops))
panic("rtnetlink_init: cannot initialize rtnetlink\n");
- netlink_set_nonroot(NETLINK_ROUTE, NL_NONROOT_RECV);
register_netdevice_notifier(&rtnetlink_dev_notifier);
rtnl_register(PF_UNSPEC, RTM_GETLINK, rtnl_getlink,
diff --git a/net/core/scm.c b/net/core/scm.c
index 8f6ccfd68ef4..9c1c63da3ca8 100644
--- a/net/core/scm.c
+++ b/net/core/scm.c
@@ -45,12 +45,17 @@
static __inline__ int scm_check_creds(struct ucred *creds)
{
const struct cred *cred = current_cred();
+ kuid_t uid = make_kuid(cred->user_ns, creds->uid);
+ kgid_t gid = make_kgid(cred->user_ns, creds->gid);
+
+ if (!uid_valid(uid) || !gid_valid(gid))
+ return -EINVAL;
if ((creds->pid == task_tgid_vnr(current) || capable(CAP_SYS_ADMIN)) &&
- ((creds->uid == cred->uid || creds->uid == cred->euid ||
- creds->uid == cred->suid) || capable(CAP_SETUID)) &&
- ((creds->gid == cred->gid || creds->gid == cred->egid ||
- creds->gid == cred->sgid) || capable(CAP_SETGID))) {
+ ((uid_eq(uid, cred->uid) || uid_eq(uid, cred->euid) ||
+ uid_eq(uid, cred->suid)) || capable(CAP_SETUID)) &&
+ ((gid_eq(gid, cred->gid) || gid_eq(gid, cred->egid) ||
+ gid_eq(gid, cred->sgid)) || capable(CAP_SETGID))) {
return 0;
}
return -EPERM;
@@ -149,39 +154,54 @@ int __scm_send(struct socket *sock, struct msghdr *msg, struct scm_cookie *p)
goto error;
break;
case SCM_CREDENTIALS:
+ {
+ struct ucred creds;
+ kuid_t uid;
+ kgid_t gid;
if (cmsg->cmsg_len != CMSG_LEN(sizeof(struct ucred)))
goto error;
- memcpy(&p->creds, CMSG_DATA(cmsg), sizeof(struct ucred));
- err = scm_check_creds(&p->creds);
+ memcpy(&creds, CMSG_DATA(cmsg), sizeof(struct ucred));
+ err = scm_check_creds(&creds);
if (err)
goto error;
- if (!p->pid || pid_vnr(p->pid) != p->creds.pid) {
+ p->creds.pid = creds.pid;
+ if (!p->pid || pid_vnr(p->pid) != creds.pid) {
struct pid *pid;
err = -ESRCH;
- pid = find_get_pid(p->creds.pid);
+ pid = find_get_pid(creds.pid);
if (!pid)
goto error;
put_pid(p->pid);
p->pid = pid;
}
+ err = -EINVAL;
+ uid = make_kuid(current_user_ns(), creds.uid);
+ gid = make_kgid(current_user_ns(), creds.gid);
+ if (!uid_valid(uid) || !gid_valid(gid))
+ goto error;
+
+ p->creds.uid = uid;
+ p->creds.gid = gid;
+
if (!p->cred ||
- (p->cred->euid != p->creds.uid) ||
- (p->cred->egid != p->creds.gid)) {
+ !uid_eq(p->cred->euid, uid) ||
+ !gid_eq(p->cred->egid, gid)) {
struct cred *cred;
err = -ENOMEM;
cred = prepare_creds();
if (!cred)
goto error;
- cred->uid = cred->euid = p->creds.uid;
- cred->gid = cred->egid = p->creds.gid;
+ cred->uid = cred->euid = uid;
+ cred->gid = cred->egid = gid;
if (p->cred)
put_cred(p->cred);
p->cred = cred;
}
break;
+ }
default:
goto error;
}
@@ -265,6 +285,7 @@ void scm_detach_fds(struct msghdr *msg, struct scm_cookie *scm)
for (i=0, cmfptr=(__force int __user *)CMSG_DATA(cm); i<fdmax;
i++, cmfptr++)
{
+ struct socket *sock;
int new_fd;
err = security_file_receive(fp[i]);
if (err)
@@ -281,6 +302,9 @@ void scm_detach_fds(struct msghdr *msg, struct scm_cookie *scm)
}
/* Bump the usage count and install the file. */
get_file(fp[i]);
+ sock = sock_from_file(fp[i], &err);
+ if (sock)
+ sock_update_netprioidx(sock->sk, current);
fd_install(new_fd, fp[i]);
}
diff --git a/net/core/secure_seq.c b/net/core/secure_seq.c
index 99b2596531bb..e61a8bb7fce7 100644
--- a/net/core/secure_seq.c
+++ b/net/core/secure_seq.c
@@ -76,6 +76,7 @@ u32 secure_ipv6_port_ephemeral(const __be32 *saddr, const __be32 *daddr,
return hash[0];
}
+EXPORT_SYMBOL(secure_ipv6_port_ephemeral);
#endif
#ifdef CONFIG_INET
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 368f65c15e4f..cdc28598f4ef 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -145,6 +145,43 @@ static void skb_under_panic(struct sk_buff *skb, int sz, void *here)
BUG();
}
+
+/*
+ * kmalloc_reserve is a wrapper around kmalloc_node_track_caller that tells
+ * the caller if emergency pfmemalloc reserves are being used. If it is and
+ * the socket is later found to be SOCK_MEMALLOC then PFMEMALLOC reserves
+ * may be used. Otherwise, the packet data may be discarded until enough
+ * memory is free
+ */
+#define kmalloc_reserve(size, gfp, node, pfmemalloc) \
+ __kmalloc_reserve(size, gfp, node, _RET_IP_, pfmemalloc)
+void *__kmalloc_reserve(size_t size, gfp_t flags, int node, unsigned long ip,
+ bool *pfmemalloc)
+{
+ void *obj;
+ bool ret_pfmemalloc = false;
+
+ /*
+ * Try a regular allocation, when that fails and we're not entitled
+ * to the reserves, fail.
+ */
+ obj = kmalloc_node_track_caller(size,
+ flags | __GFP_NOMEMALLOC | __GFP_NOWARN,
+ node);
+ if (obj || !(gfp_pfmemalloc_allowed(flags)))
+ goto out;
+
+ /* Try again but now we are using pfmemalloc reserves */
+ ret_pfmemalloc = true;
+ obj = kmalloc_node_track_caller(size, flags, node);
+
+out:
+ if (pfmemalloc)
+ *pfmemalloc = ret_pfmemalloc;
+
+ return obj;
+}
+
/* Allocate a new skbuff. We do this ourselves so we can fill in a few
* 'private' fields and also do memory statistics to find all the
* [BEEP] leaks.
@@ -155,8 +192,10 @@ static void skb_under_panic(struct sk_buff *skb, int sz, void *here)
* __alloc_skb - allocate a network buffer
* @size: size to allocate
* @gfp_mask: allocation mask
- * @fclone: allocate from fclone cache instead of head cache
- * and allocate a cloned (child) skb
+ * @flags: If SKB_ALLOC_FCLONE is set, allocate from fclone cache
+ * instead of head cache and allocate a cloned (child) skb.
+ * If SKB_ALLOC_RX is set, __GFP_MEMALLOC will be used for
+ * allocations in case the data is required for writeback
* @node: numa node to allocate memory on
*
* Allocate a new &sk_buff. The returned buffer has no headroom and a
@@ -167,14 +206,19 @@ static void skb_under_panic(struct sk_buff *skb, int sz, void *here)
* %GFP_ATOMIC.
*/
struct sk_buff *__alloc_skb(unsigned int size, gfp_t gfp_mask,
- int fclone, int node)
+ int flags, int node)
{
struct kmem_cache *cache;
struct skb_shared_info *shinfo;
struct sk_buff *skb;
u8 *data;
+ bool pfmemalloc;
+
+ cache = (flags & SKB_ALLOC_FCLONE)
+ ? skbuff_fclone_cache : skbuff_head_cache;
- cache = fclone ? skbuff_fclone_cache : skbuff_head_cache;
+ if (sk_memalloc_socks() && (flags & SKB_ALLOC_RX))
+ gfp_mask |= __GFP_MEMALLOC;
/* Get the HEAD */
skb = kmem_cache_alloc_node(cache, gfp_mask & ~__GFP_DMA, node);
@@ -189,7 +233,7 @@ struct sk_buff *__alloc_skb(unsigned int size, gfp_t gfp_mask,
*/
size = SKB_DATA_ALIGN(size);
size += SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
- data = kmalloc_node_track_caller(size, gfp_mask, node);
+ data = kmalloc_reserve(size, gfp_mask, node, &pfmemalloc);
if (!data)
goto nodata;
/* kmalloc(size) might give us more room than requested.
@@ -207,6 +251,7 @@ struct sk_buff *__alloc_skb(unsigned int size, gfp_t gfp_mask,
memset(skb, 0, offsetof(struct sk_buff, tail));
/* Account for allocated memory : skb + skb->head */
skb->truesize = SKB_TRUESIZE(size);
+ skb->pfmemalloc = pfmemalloc;
atomic_set(&skb->users, 1);
skb->head = data;
skb->data = data;
@@ -222,7 +267,7 @@ struct sk_buff *__alloc_skb(unsigned int size, gfp_t gfp_mask,
atomic_set(&shinfo->dataref, 1);
kmemcheck_annotate_variable(shinfo->destructor_arg);
- if (fclone) {
+ if (flags & SKB_ALLOC_FCLONE) {
struct sk_buff *child = skb + 1;
atomic_t *fclone_ref = (atomic_t *) (child + 1);
@@ -232,6 +277,7 @@ struct sk_buff *__alloc_skb(unsigned int size, gfp_t gfp_mask,
atomic_set(fclone_ref, 1);
child->fclone = SKB_FCLONE_UNAVAILABLE;
+ child->pfmemalloc = pfmemalloc;
}
out:
return skb;
@@ -294,55 +340,74 @@ struct sk_buff *build_skb(void *data, unsigned int frag_size)
EXPORT_SYMBOL(build_skb);
struct netdev_alloc_cache {
- struct page *page;
- unsigned int offset;
- unsigned int pagecnt_bias;
+ struct page_frag frag;
+ /* we maintain a pagecount bias, so that we dont dirty cache line
+ * containing page->_count every time we allocate a fragment.
+ */
+ unsigned int pagecnt_bias;
};
static DEFINE_PER_CPU(struct netdev_alloc_cache, netdev_alloc_cache);
-#define NETDEV_PAGECNT_BIAS (PAGE_SIZE / SMP_CACHE_BYTES)
+#define NETDEV_FRAG_PAGE_MAX_ORDER get_order(32768)
+#define NETDEV_FRAG_PAGE_MAX_SIZE (PAGE_SIZE << NETDEV_FRAG_PAGE_MAX_ORDER)
+#define NETDEV_PAGECNT_MAX_BIAS NETDEV_FRAG_PAGE_MAX_SIZE
-/**
- * netdev_alloc_frag - allocate a page fragment
- * @fragsz: fragment size
- *
- * Allocates a frag from a page for receive buffer.
- * Uses GFP_ATOMIC allocations.
- */
-void *netdev_alloc_frag(unsigned int fragsz)
+static void *__netdev_alloc_frag(unsigned int fragsz, gfp_t gfp_mask)
{
struct netdev_alloc_cache *nc;
void *data = NULL;
+ int order;
unsigned long flags;
local_irq_save(flags);
nc = &__get_cpu_var(netdev_alloc_cache);
- if (unlikely(!nc->page)) {
+ if (unlikely(!nc->frag.page)) {
refill:
- nc->page = alloc_page(GFP_ATOMIC | __GFP_COLD);
- if (unlikely(!nc->page))
- goto end;
+ for (order = NETDEV_FRAG_PAGE_MAX_ORDER; ;) {
+ gfp_t gfp = gfp_mask;
+
+ if (order)
+ gfp |= __GFP_COMP | __GFP_NOWARN;
+ nc->frag.page = alloc_pages(gfp, order);
+ if (likely(nc->frag.page))
+ break;
+ if (--order < 0)
+ goto end;
+ }
+ nc->frag.size = PAGE_SIZE << order;
recycle:
- atomic_set(&nc->page->_count, NETDEV_PAGECNT_BIAS);
- nc->pagecnt_bias = NETDEV_PAGECNT_BIAS;
- nc->offset = 0;
+ atomic_set(&nc->frag.page->_count, NETDEV_PAGECNT_MAX_BIAS);
+ nc->pagecnt_bias = NETDEV_PAGECNT_MAX_BIAS;
+ nc->frag.offset = 0;
}
- if (nc->offset + fragsz > PAGE_SIZE) {
+ if (nc->frag.offset + fragsz > nc->frag.size) {
/* avoid unnecessary locked operations if possible */
- if ((atomic_read(&nc->page->_count) == nc->pagecnt_bias) ||
- atomic_sub_and_test(nc->pagecnt_bias, &nc->page->_count))
+ if ((atomic_read(&nc->frag.page->_count) == nc->pagecnt_bias) ||
+ atomic_sub_and_test(nc->pagecnt_bias, &nc->frag.page->_count))
goto recycle;
goto refill;
}
- data = page_address(nc->page) + nc->offset;
- nc->offset += fragsz;
+ data = page_address(nc->frag.page) + nc->frag.offset;
+ nc->frag.offset += fragsz;
nc->pagecnt_bias--;
end:
local_irq_restore(flags);
return data;
}
+
+/**
+ * netdev_alloc_frag - allocate a page fragment
+ * @fragsz: fragment size
+ *
+ * Allocates a frag from a page for receive buffer.
+ * Uses GFP_ATOMIC allocations.
+ */
+void *netdev_alloc_frag(unsigned int fragsz)
+{
+ return __netdev_alloc_frag(fragsz, GFP_ATOMIC | __GFP_COLD);
+}
EXPORT_SYMBOL(netdev_alloc_frag);
/**
@@ -366,7 +431,12 @@ struct sk_buff *__netdev_alloc_skb(struct net_device *dev,
SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
if (fragsz <= PAGE_SIZE && !(gfp_mask & (__GFP_WAIT | GFP_DMA))) {
- void *data = netdev_alloc_frag(fragsz);
+ void *data;
+
+ if (sk_memalloc_socks())
+ gfp_mask |= __GFP_MEMALLOC;
+
+ data = __netdev_alloc_frag(fragsz, gfp_mask);
if (likely(data)) {
skb = build_skb(data, fragsz);
@@ -374,7 +444,8 @@ struct sk_buff *__netdev_alloc_skb(struct net_device *dev,
put_page(virt_to_head_page(data));
}
} else {
- skb = __alloc_skb(length + NET_SKB_PAD, gfp_mask, 0, NUMA_NO_NODE);
+ skb = __alloc_skb(length + NET_SKB_PAD, gfp_mask,
+ SKB_ALLOC_RX, NUMA_NO_NODE);
}
if (likely(skb)) {
skb_reserve(skb, NET_SKB_PAD);
@@ -656,6 +727,7 @@ static void __copy_skb_header(struct sk_buff *new, const struct sk_buff *old)
#if IS_ENABLED(CONFIG_IP_VS)
new->ipvs_property = old->ipvs_property;
#endif
+ new->pfmemalloc = old->pfmemalloc;
new->protocol = old->protocol;
new->mark = old->mark;
new->skb_iif = old->skb_iif;
@@ -814,6 +886,9 @@ struct sk_buff *skb_clone(struct sk_buff *skb, gfp_t gfp_mask)
n->fclone = SKB_FCLONE_CLONE;
atomic_inc(fclone_ref);
} else {
+ if (skb_pfmemalloc(skb))
+ gfp_mask |= __GFP_MEMALLOC;
+
n = kmem_cache_alloc(skbuff_head_cache, gfp_mask);
if (!n)
return NULL;
@@ -850,6 +925,13 @@ static void copy_skb_header(struct sk_buff *new, const struct sk_buff *old)
skb_shinfo(new)->gso_type = skb_shinfo(old)->gso_type;
}
+static inline int skb_alloc_rx_flag(const struct sk_buff *skb)
+{
+ if (skb_pfmemalloc(skb))
+ return SKB_ALLOC_RX;
+ return 0;
+}
+
/**
* skb_copy - create private copy of an sk_buff
* @skb: buffer to copy
@@ -871,7 +953,8 @@ struct sk_buff *skb_copy(const struct sk_buff *skb, gfp_t gfp_mask)
{
int headerlen = skb_headroom(skb);
unsigned int size = skb_end_offset(skb) + skb->data_len;
- struct sk_buff *n = alloc_skb(size, gfp_mask);
+ struct sk_buff *n = __alloc_skb(size, gfp_mask,
+ skb_alloc_rx_flag(skb), NUMA_NO_NODE);
if (!n)
return NULL;
@@ -906,7 +989,8 @@ EXPORT_SYMBOL(skb_copy);
struct sk_buff *__pskb_copy(struct sk_buff *skb, int headroom, gfp_t gfp_mask)
{
unsigned int size = skb_headlen(skb) + headroom;
- struct sk_buff *n = alloc_skb(size, gfp_mask);
+ struct sk_buff *n = __alloc_skb(size, gfp_mask,
+ skb_alloc_rx_flag(skb), NUMA_NO_NODE);
if (!n)
goto out;
@@ -979,8 +1063,10 @@ int pskb_expand_head(struct sk_buff *skb, int nhead, int ntail,
size = SKB_DATA_ALIGN(size);
- data = kmalloc(size + SKB_DATA_ALIGN(sizeof(struct skb_shared_info)),
- gfp_mask);
+ if (skb_pfmemalloc(skb))
+ gfp_mask |= __GFP_MEMALLOC;
+ data = kmalloc_reserve(size + SKB_DATA_ALIGN(sizeof(struct skb_shared_info)),
+ gfp_mask, NUMA_NO_NODE, NULL);
if (!data)
goto nodata;
size = SKB_WITH_OVERHEAD(ksize(data));
@@ -1092,8 +1178,9 @@ struct sk_buff *skb_copy_expand(const struct sk_buff *skb,
/*
* Allocate the copy buffer
*/
- struct sk_buff *n = alloc_skb(newheadroom + skb->len + newtailroom,
- gfp_mask);
+ struct sk_buff *n = __alloc_skb(newheadroom + skb->len + newtailroom,
+ gfp_mask, skb_alloc_rx_flag(skb),
+ NUMA_NO_NODE);
int oldheadroom = skb_headroom(skb);
int head_copy_len, head_copy_off;
int off;
@@ -1582,38 +1669,19 @@ static struct page *linear_to_page(struct page *page, unsigned int *len,
unsigned int *offset,
struct sk_buff *skb, struct sock *sk)
{
- struct page *p = sk->sk_sndmsg_page;
- unsigned int off;
+ struct page_frag *pfrag = sk_page_frag(sk);
- if (!p) {
-new_page:
- p = sk->sk_sndmsg_page = alloc_pages(sk->sk_allocation, 0);
- if (!p)
- return NULL;
-
- off = sk->sk_sndmsg_off = 0;
- /* hold one ref to this page until it's full */
- } else {
- unsigned int mlen;
-
- /* If we are the only user of the page, we can reset offset */
- if (page_count(p) == 1)
- sk->sk_sndmsg_off = 0;
- off = sk->sk_sndmsg_off;
- mlen = PAGE_SIZE - off;
- if (mlen < 64 && mlen < *len) {
- put_page(p);
- goto new_page;
- }
+ if (!sk_page_frag_refill(sk, pfrag))
+ return NULL;
- *len = min_t(unsigned int, *len, mlen);
- }
+ *len = min_t(unsigned int, *len, pfrag->size - pfrag->offset);
- memcpy(page_address(p) + off, page_address(page) + *offset, *len);
- sk->sk_sndmsg_off += *len;
- *offset = off;
+ memcpy(page_address(pfrag->page) + pfrag->offset,
+ page_address(page) + *offset, *len);
+ *offset = pfrag->offset;
+ pfrag->offset += *len;
- return p;
+ return pfrag->page;
}
static bool spd_can_coalesce(const struct splice_pipe_desc *spd,
@@ -2775,8 +2843,9 @@ struct sk_buff *skb_segment(struct sk_buff *skb, netdev_features_t features)
skb_release_head_state(nskb);
__skb_push(nskb, doffset);
} else {
- nskb = alloc_skb(hsize + doffset + headroom,
- GFP_ATOMIC);
+ nskb = __alloc_skb(hsize + doffset + headroom,
+ GFP_ATOMIC, skb_alloc_rx_flag(skb),
+ NUMA_NO_NODE);
if (unlikely(!nskb))
goto err;
@@ -3414,8 +3483,7 @@ bool skb_try_coalesce(struct sk_buff *to, struct sk_buff *from,
skb_shinfo(from)->nr_frags > MAX_SKB_FRAGS)
return false;
- delta = from->truesize -
- SKB_TRUESIZE(skb_end_pointer(from) - from->head);
+ delta = from->truesize - SKB_TRUESIZE(skb_end_offset(from));
}
WARN_ON_ONCE(delta < len);
@@ -3428,7 +3496,9 @@ bool skb_try_coalesce(struct sk_buff *to, struct sk_buff *from,
if (!skb_cloned(from))
skb_shinfo(from)->nr_frags = 0;
- /* if the skb is cloned this does nothing since we set nr_frags to 0 */
+ /* if the skb is not cloned this does nothing
+ * since we set nr_frags to 0.
+ */
for (i = 0; i < skb_shinfo(from)->nr_frags; i++)
skb_frag_ref(from, i);
diff --git a/net/core/sock.c b/net/core/sock.c
index 2676a88f533e..8a146cfcc366 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -142,7 +142,7 @@
static DEFINE_MUTEX(proto_list_mutex);
static LIST_HEAD(proto_list);
-#ifdef CONFIG_CGROUP_MEM_RES_CTLR_KMEM
+#ifdef CONFIG_MEMCG_KMEM
int mem_cgroup_sockets_init(struct mem_cgroup *memcg, struct cgroup_subsys *ss)
{
struct proto *proto;
@@ -271,16 +271,60 @@ __u32 sysctl_rmem_default __read_mostly = SK_RMEM_MAX;
int sysctl_optmem_max __read_mostly = sizeof(unsigned long)*(2*UIO_MAXIOV+512);
EXPORT_SYMBOL(sysctl_optmem_max);
-#if defined(CONFIG_CGROUPS)
-#if !defined(CONFIG_NET_CLS_CGROUP)
-int net_cls_subsys_id = -1;
-EXPORT_SYMBOL_GPL(net_cls_subsys_id);
-#endif
-#if !defined(CONFIG_NETPRIO_CGROUP)
-int net_prio_subsys_id = -1;
-EXPORT_SYMBOL_GPL(net_prio_subsys_id);
-#endif
-#endif
+struct static_key memalloc_socks = STATIC_KEY_INIT_FALSE;
+EXPORT_SYMBOL_GPL(memalloc_socks);
+
+/**
+ * sk_set_memalloc - sets %SOCK_MEMALLOC
+ * @sk: socket to set it on
+ *
+ * Set %SOCK_MEMALLOC on a socket for access to emergency reserves.
+ * It's the responsibility of the admin to adjust min_free_kbytes
+ * to meet the requirements
+ */
+void sk_set_memalloc(struct sock *sk)
+{
+ sock_set_flag(sk, SOCK_MEMALLOC);
+ sk->sk_allocation |= __GFP_MEMALLOC;
+ static_key_slow_inc(&memalloc_socks);
+}
+EXPORT_SYMBOL_GPL(sk_set_memalloc);
+
+void sk_clear_memalloc(struct sock *sk)
+{
+ sock_reset_flag(sk, SOCK_MEMALLOC);
+ sk->sk_allocation &= ~__GFP_MEMALLOC;
+ static_key_slow_dec(&memalloc_socks);
+
+ /*
+ * SOCK_MEMALLOC is allowed to ignore rmem limits to ensure forward
+ * progress of swapping. However, if SOCK_MEMALLOC is cleared while
+ * it has rmem allocations there is a risk that the user of the
+ * socket cannot make forward progress due to exceeding the rmem
+ * limits. By rights, sk_clear_memalloc() should only be called
+ * on sockets being torn down but warn and reset the accounting if
+ * that assumption breaks.
+ */
+ if (WARN_ON(sk->sk_forward_alloc))
+ sk_mem_reclaim(sk);
+}
+EXPORT_SYMBOL_GPL(sk_clear_memalloc);
+
+int __sk_backlog_rcv(struct sock *sk, struct sk_buff *skb)
+{
+ int ret;
+ unsigned long pflags = current->flags;
+
+ /* these should have been dropped before queueing */
+ BUG_ON(!sock_flag(sk, SOCK_MEMALLOC));
+
+ current->flags |= PF_MEMALLOC;
+ ret = sk->sk_backlog_rcv(sk, skb);
+ tsk_restore_flags(current, pflags, PF_MEMALLOC);
+
+ return ret;
+}
+EXPORT_SYMBOL(__sk_backlog_rcv);
static int sock_set_timeout(long *timeo_p, char __user *optval, int optlen)
{
@@ -353,7 +397,7 @@ int sock_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
if (err)
return err;
- if (!sk_rmem_schedule(sk, skb->truesize)) {
+ if (!sk_rmem_schedule(sk, skb, skb->truesize)) {
atomic_inc(&sk->sk_drops);
return -ENOBUFS;
}
@@ -636,7 +680,8 @@ set_rcvbuf:
case SO_KEEPALIVE:
#ifdef CONFIG_INET
- if (sk->sk_protocol == IPPROTO_TCP)
+ if (sk->sk_protocol == IPPROTO_TCP &&
+ sk->sk_type == SOCK_STREAM)
tcp_set_keepalive(sk, valbool);
#endif
sock_valbool_flag(sk, SOCK_KEEPOPEN, valbool);
@@ -813,8 +858,8 @@ void cred_to_ucred(struct pid *pid, const struct cred *cred,
if (cred) {
struct user_namespace *current_ns = current_user_ns();
- ucred->uid = from_kuid(current_ns, cred->euid);
- ucred->gid = from_kgid(current_ns, cred->egid);
+ ucred->uid = from_kuid_munged(current_ns, cred->euid);
+ ucred->gid = from_kgid_munged(current_ns, cred->egid);
}
}
EXPORT_SYMBOL_GPL(cred_to_ucred);
@@ -1168,6 +1213,7 @@ static void sk_prot_free(struct proto *prot, struct sock *sk)
}
#ifdef CONFIG_CGROUPS
+#if IS_ENABLED(CONFIG_NET_CLS_CGROUP)
void sock_update_classid(struct sock *sk)
{
u32 classid;
@@ -1175,11 +1221,13 @@ void sock_update_classid(struct sock *sk)
rcu_read_lock(); /* doing current task, which cannot vanish. */
classid = task_cls_classid(current);
rcu_read_unlock();
- if (classid && classid != sk->sk_classid)
+ if (classid != sk->sk_classid)
sk->sk_classid = classid;
}
EXPORT_SYMBOL(sock_update_classid);
+#endif
+#if IS_ENABLED(CONFIG_NETPRIO_CGROUP)
void sock_update_netprioidx(struct sock *sk, struct task_struct *task)
{
if (in_interrupt())
@@ -1189,6 +1237,7 @@ void sock_update_netprioidx(struct sock *sk, struct task_struct *task)
}
EXPORT_SYMBOL_GPL(sock_update_netprioidx);
#endif
+#endif
/**
* sk_alloc - All socket objects are allocated here
@@ -1403,24 +1452,12 @@ void sk_setup_caps(struct sock *sk, struct dst_entry *dst)
} else {
sk->sk_route_caps |= NETIF_F_SG | NETIF_F_HW_CSUM;
sk->sk_gso_max_size = dst->dev->gso_max_size;
+ sk->sk_gso_max_segs = dst->dev->gso_max_segs;
}
}
}
EXPORT_SYMBOL_GPL(sk_setup_caps);
-void __init sk_init(void)
-{
- if (totalram_pages <= 4096) {
- sysctl_wmem_max = 32767;
- sysctl_rmem_max = 32767;
- sysctl_wmem_default = 32767;
- sysctl_rmem_default = 32767;
- } else if (totalram_pages >= 131072) {
- sysctl_wmem_max = 131071;
- sysctl_rmem_max = 131071;
- }
-}
-
/*
* Simple resource managers for sockets.
*/
@@ -1467,16 +1504,23 @@ EXPORT_SYMBOL(sock_rfree);
void sock_edemux(struct sk_buff *skb)
{
- sock_put(skb->sk);
+ struct sock *sk = skb->sk;
+
+#ifdef CONFIG_INET
+ if (sk->sk_state == TCP_TIME_WAIT)
+ inet_twsk_put(inet_twsk(sk));
+ else
+#endif
+ sock_put(sk);
}
EXPORT_SYMBOL(sock_edemux);
-int sock_i_uid(struct sock *sk)
+kuid_t sock_i_uid(struct sock *sk)
{
- int uid;
+ kuid_t uid;
read_lock_bh(&sk->sk_callback_lock);
- uid = sk->sk_socket ? SOCK_INODE(sk->sk_socket)->i_uid : 0;
+ uid = sk->sk_socket ? SOCK_INODE(sk->sk_socket)->i_uid : GLOBAL_ROOT_UID;
read_unlock_bh(&sk->sk_callback_lock);
return uid;
}
@@ -1681,6 +1725,45 @@ struct sk_buff *sock_alloc_send_skb(struct sock *sk, unsigned long size,
}
EXPORT_SYMBOL(sock_alloc_send_skb);
+/* On 32bit arches, an skb frag is limited to 2^15 */
+#define SKB_FRAG_PAGE_ORDER get_order(32768)
+
+bool sk_page_frag_refill(struct sock *sk, struct page_frag *pfrag)
+{
+ int order;
+
+ if (pfrag->page) {
+ if (atomic_read(&pfrag->page->_count) == 1) {
+ pfrag->offset = 0;
+ return true;
+ }
+ if (pfrag->offset < pfrag->size)
+ return true;
+ put_page(pfrag->page);
+ }
+
+ /* We restrict high order allocations to users that can afford to wait */
+ order = (sk->sk_allocation & __GFP_WAIT) ? SKB_FRAG_PAGE_ORDER : 0;
+
+ do {
+ gfp_t gfp = sk->sk_allocation;
+
+ if (order)
+ gfp |= __GFP_COMP | __GFP_NOWARN;
+ pfrag->page = alloc_pages(gfp, order);
+ if (likely(pfrag->page)) {
+ pfrag->offset = 0;
+ pfrag->size = PAGE_SIZE << order;
+ return true;
+ }
+ } while (--order >= 0);
+
+ sk_enter_memory_pressure(sk);
+ sk_stream_moderate_sndbuf(sk);
+ return false;
+}
+EXPORT_SYMBOL(sk_page_frag_refill);
+
static void __lock_sock(struct sock *sk)
__releases(&sk->sk_lock.slock)
__acquires(&sk->sk_lock.slock)
@@ -2110,8 +2193,8 @@ void sock_init_data(struct socket *sock, struct sock *sk)
sk->sk_error_report = sock_def_error_report;
sk->sk_destruct = sock_def_destruct;
- sk->sk_sndmsg_page = NULL;
- sk->sk_sndmsg_off = 0;
+ sk->sk_frag.page = NULL;
+ sk->sk_frag.offset = 0;
sk->sk_peek_off = -1;
sk->sk_peer_pid = NULL;
@@ -2354,6 +2437,12 @@ void sk_common_release(struct sock *sk)
xfrm_sk_free_policy(sk);
sk_refcnt_debug_release(sk);
+
+ if (sk->sk_frag.page) {
+ put_page(sk->sk_frag.page);
+ sk->sk_frag.page = NULL;
+ }
+
sock_put(sk);
}
EXPORT_SYMBOL(sk_common_release);
diff --git a/net/core/sock_diag.c b/net/core/sock_diag.c
index 9d8755e4a7a5..602cd637182e 100644
--- a/net/core/sock_diag.c
+++ b/net/core/sock_diag.c
@@ -172,8 +172,7 @@ static int __net_init diag_net_init(struct net *net)
.input = sock_diag_rcv,
};
- net->diag_nlsk = netlink_kernel_create(net, NETLINK_SOCK_DIAG,
- THIS_MODULE, &cfg);
+ net->diag_nlsk = netlink_kernel_create(net, NETLINK_SOCK_DIAG, &cfg);
return net->diag_nlsk == NULL ? -ENOMEM : 0;
}
diff --git a/net/core/utils.c b/net/core/utils.c
index 39895a65e54a..f5613d569c23 100644
--- a/net/core/utils.c
+++ b/net/core/utils.c
@@ -294,6 +294,26 @@ void inet_proto_csum_replace4(__sum16 *sum, struct sk_buff *skb,
}
EXPORT_SYMBOL(inet_proto_csum_replace4);
+void inet_proto_csum_replace16(__sum16 *sum, struct sk_buff *skb,
+ const __be32 *from, const __be32 *to,
+ int pseudohdr)
+{
+ __be32 diff[] = {
+ ~from[0], ~from[1], ~from[2], ~from[3],
+ to[0], to[1], to[2], to[3],
+ };
+ if (skb->ip_summed != CHECKSUM_PARTIAL) {
+ *sum = csum_fold(csum_partial(diff, sizeof(diff),
+ ~csum_unfold(*sum)));
+ if (skb->ip_summed == CHECKSUM_COMPLETE && pseudohdr)
+ skb->csum = ~csum_partial(diff, sizeof(diff),
+ ~skb->csum);
+ } else if (pseudohdr)
+ *sum = ~csum_fold(csum_partial(diff, sizeof(diff),
+ csum_unfold(*sum)));
+}
+EXPORT_SYMBOL(inet_proto_csum_replace16);
+
int mac_pton(const char *s, u8 *mac)
{
int i;
diff --git a/net/dcb/dcbnl.c b/net/dcb/dcbnl.c
index 81f2bb62dea3..70989e672304 100644
--- a/net/dcb/dcbnl.c
+++ b/net/dcb/dcbnl.c
@@ -1319,7 +1319,7 @@ nla_put_failure:
}
static int dcbnl_notify(struct net_device *dev, int event, int cmd,
- u32 seq, u32 pid, int dcbx_ver)
+ u32 seq, u32 portid, int dcbx_ver)
{
struct net *net = dev_net(dev);
struct sk_buff *skb;
@@ -1330,7 +1330,7 @@ static int dcbnl_notify(struct net_device *dev, int event, int cmd,
if (!ops)
return -EOPNOTSUPP;
- skb = dcbnl_newmsg(event, cmd, pid, seq, 0, &nlh);
+ skb = dcbnl_newmsg(event, cmd, portid, seq, 0, &nlh);
if (!skb)
return -ENOBUFS;
@@ -1353,16 +1353,16 @@ static int dcbnl_notify(struct net_device *dev, int event, int cmd,
}
int dcbnl_ieee_notify(struct net_device *dev, int event, int cmd,
- u32 seq, u32 pid)
+ u32 seq, u32 portid)
{
- return dcbnl_notify(dev, event, cmd, seq, pid, DCB_CAP_DCBX_VER_IEEE);
+ return dcbnl_notify(dev, event, cmd, seq, portid, DCB_CAP_DCBX_VER_IEEE);
}
EXPORT_SYMBOL(dcbnl_ieee_notify);
int dcbnl_cee_notify(struct net_device *dev, int event, int cmd,
- u32 seq, u32 pid)
+ u32 seq, u32 portid)
{
- return dcbnl_notify(dev, event, cmd, seq, pid, DCB_CAP_DCBX_VER_CEE);
+ return dcbnl_notify(dev, event, cmd, seq, portid, DCB_CAP_DCBX_VER_CEE);
}
EXPORT_SYMBOL(dcbnl_cee_notify);
@@ -1656,7 +1656,7 @@ static int dcb_doit(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
struct net_device *netdev;
struct dcbmsg *dcb = nlmsg_data(nlh);
struct nlattr *tb[DCB_ATTR_MAX + 1];
- u32 pid = skb ? NETLINK_CB(skb).pid : 0;
+ u32 portid = skb ? NETLINK_CB(skb).portid : 0;
int ret = -EINVAL;
struct sk_buff *reply_skb;
struct nlmsghdr *reply_nlh = NULL;
@@ -1690,7 +1690,7 @@ static int dcb_doit(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
goto out;
}
- reply_skb = dcbnl_newmsg(fn->type, dcb->cmd, pid, nlh->nlmsg_seq,
+ reply_skb = dcbnl_newmsg(fn->type, dcb->cmd, portid, nlh->nlmsg_seq,
nlh->nlmsg_flags, &reply_nlh);
if (!reply_skb) {
ret = -ENOBUFS;
@@ -1705,7 +1705,7 @@ static int dcb_doit(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
nlmsg_end(reply_skb, reply_nlh);
- ret = rtnl_unicast(reply_skb, &init_net, pid);
+ ret = rtnl_unicast(reply_skb, &init_net, portid);
out:
dev_put(netdev);
return ret;
diff --git a/net/dccp/ccid.h b/net/dccp/ccid.h
index 75c3582a7678..fb85d371a8de 100644
--- a/net/dccp/ccid.h
+++ b/net/dccp/ccid.h
@@ -246,7 +246,7 @@ static inline int ccid_hc_rx_getsockopt(struct ccid *ccid, struct sock *sk,
u32 __user *optval, int __user *optlen)
{
int rc = -ENOPROTOOPT;
- if (ccid->ccid_ops->ccid_hc_rx_getsockopt != NULL)
+ if (ccid != NULL && ccid->ccid_ops->ccid_hc_rx_getsockopt != NULL)
rc = ccid->ccid_ops->ccid_hc_rx_getsockopt(sk, optname, len,
optval, optlen);
return rc;
@@ -257,7 +257,7 @@ static inline int ccid_hc_tx_getsockopt(struct ccid *ccid, struct sock *sk,
u32 __user *optval, int __user *optlen)
{
int rc = -ENOPROTOOPT;
- if (ccid->ccid_ops->ccid_hc_tx_getsockopt != NULL)
+ if (ccid != NULL && ccid->ccid_ops->ccid_hc_tx_getsockopt != NULL)
rc = ccid->ccid_ops->ccid_hc_tx_getsockopt(sk, optname, len,
optval, optlen);
return rc;
diff --git a/net/dccp/ccids/ccid3.c b/net/dccp/ccids/ccid3.c
index d65e98798eca..119c04317d48 100644
--- a/net/dccp/ccids/ccid3.c
+++ b/net/dccp/ccids/ccid3.c
@@ -535,6 +535,7 @@ static int ccid3_hc_tx_getsockopt(struct sock *sk, const int optname, int len,
case DCCP_SOCKOPT_CCID_TX_INFO:
if (len < sizeof(tfrc))
return -EINVAL;
+ memset(&tfrc, 0, sizeof(tfrc));
tfrc.tfrctx_x = hc->tx_x;
tfrc.tfrctx_x_recv = hc->tx_x_recv;
tfrc.tfrctx_x_calc = hc->tx_x_calc;
diff --git a/net/decnet/af_decnet.c b/net/decnet/af_decnet.c
index 2ba1a2814c24..307c322d53bb 100644
--- a/net/decnet/af_decnet.c
+++ b/net/decnet/af_decnet.c
@@ -1313,10 +1313,10 @@ static int dn_shutdown(struct socket *sock, int how)
if (scp->state == DN_O)
goto out;
- if (how != SHUTDOWN_MASK)
+ if (how != SHUT_RDWR)
goto out;
- sk->sk_shutdown = how;
+ sk->sk_shutdown = SHUTDOWN_MASK;
dn_destroy_sock(sk);
err = 0;
diff --git a/net/decnet/dn_dev.c b/net/decnet/dn_dev.c
index f3924ab1e019..7b7e561412d3 100644
--- a/net/decnet/dn_dev.c
+++ b/net/decnet/dn_dev.c
@@ -667,12 +667,12 @@ static inline size_t dn_ifaddr_nlmsg_size(void)
}
static int dn_nl_fill_ifaddr(struct sk_buff *skb, struct dn_ifaddr *ifa,
- u32 pid, u32 seq, int event, unsigned int flags)
+ u32 portid, u32 seq, int event, unsigned int flags)
{
struct ifaddrmsg *ifm;
struct nlmsghdr *nlh;
- nlh = nlmsg_put(skb, pid, seq, event, sizeof(*ifm), flags);
+ nlh = nlmsg_put(skb, portid, seq, event, sizeof(*ifm), flags);
if (nlh == NULL)
return -EMSGSIZE;
@@ -753,7 +753,7 @@ static int dn_nl_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb)
if (dn_idx < skip_naddr)
continue;
- if (dn_nl_fill_ifaddr(skb, ifa, NETLINK_CB(cb->skb).pid,
+ if (dn_nl_fill_ifaddr(skb, ifa, NETLINK_CB(cb->skb).portid,
cb->nlh->nlmsg_seq, RTM_NEWADDR,
NLM_F_MULTI) < 0)
goto done;
diff --git a/net/decnet/dn_route.c b/net/decnet/dn_route.c
index 85a3604c87c8..b57419cc41a4 100644
--- a/net/decnet/dn_route.c
+++ b/net/decnet/dn_route.c
@@ -961,7 +961,7 @@ static int dn_route_output_slow(struct dst_entry **pprt, const struct flowidn *o
.saddr = oldflp->saddr,
.flowidn_scope = RT_SCOPE_UNIVERSE,
.flowidn_mark = oldflp->flowidn_mark,
- .flowidn_iif = init_net.loopback_dev->ifindex,
+ .flowidn_iif = LOOPBACK_IFINDEX,
.flowidn_oif = oldflp->flowidn_oif,
};
struct dn_route *rt = NULL;
@@ -979,7 +979,7 @@ static int dn_route_output_slow(struct dst_entry **pprt, const struct flowidn *o
"dn_route_output_slow: dst=%04x src=%04x mark=%d"
" iif=%d oif=%d\n", le16_to_cpu(oldflp->daddr),
le16_to_cpu(oldflp->saddr),
- oldflp->flowidn_mark, init_net.loopback_dev->ifindex,
+ oldflp->flowidn_mark, LOOPBACK_IFINDEX,
oldflp->flowidn_oif);
/* If we have an output interface, verify its a DECnet device */
@@ -1042,7 +1042,7 @@ source_ok:
if (!fld.daddr)
goto out;
}
- fld.flowidn_oif = init_net.loopback_dev->ifindex;
+ fld.flowidn_oif = LOOPBACK_IFINDEX;
res.type = RTN_LOCAL;
goto make_route;
}
@@ -1543,7 +1543,7 @@ static int dn_route_input(struct sk_buff *skb)
return dn_route_input_slow(skb);
}
-static int dn_rt_fill_info(struct sk_buff *skb, u32 pid, u32 seq,
+static int dn_rt_fill_info(struct sk_buff *skb, u32 portid, u32 seq,
int event, int nowait, unsigned int flags)
{
struct dn_route *rt = (struct dn_route *)skb_dst(skb);
@@ -1551,7 +1551,7 @@ static int dn_rt_fill_info(struct sk_buff *skb, u32 pid, u32 seq,
struct nlmsghdr *nlh;
long expires;
- nlh = nlmsg_put(skb, pid, seq, event, sizeof(*r), flags);
+ nlh = nlmsg_put(skb, portid, seq, event, sizeof(*r), flags);
if (!nlh)
return -EMSGSIZE;
@@ -1685,7 +1685,7 @@ static int dn_cache_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh, void
if (rtm->rtm_flags & RTM_F_NOTIFY)
rt->rt_flags |= RTCF_NOTIFY;
- err = dn_rt_fill_info(skb, NETLINK_CB(in_skb).pid, nlh->nlmsg_seq, RTM_NEWROUTE, 0, 0);
+ err = dn_rt_fill_info(skb, NETLINK_CB(in_skb).portid, nlh->nlmsg_seq, RTM_NEWROUTE, 0, 0);
if (err == 0)
goto out_free;
@@ -1694,7 +1694,7 @@ static int dn_cache_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh, void
goto out_free;
}
- return rtnl_unicast(skb, &init_net, NETLINK_CB(in_skb).pid);
+ return rtnl_unicast(skb, &init_net, NETLINK_CB(in_skb).portid);
out_free:
kfree_skb(skb);
@@ -1737,7 +1737,7 @@ int dn_cache_dump(struct sk_buff *skb, struct netlink_callback *cb)
if (idx < s_idx)
continue;
skb_dst_set(skb, dst_clone(&rt->dst));
- if (dn_rt_fill_info(skb, NETLINK_CB(cb->skb).pid,
+ if (dn_rt_fill_info(skb, NETLINK_CB(cb->skb).portid,
cb->nlh->nlmsg_seq, RTM_NEWROUTE,
1, NLM_F_MULTI) <= 0) {
skb_dst_drop(skb);
diff --git a/net/decnet/dn_table.c b/net/decnet/dn_table.c
index 16c986ab1228..f968c1b58f47 100644
--- a/net/decnet/dn_table.c
+++ b/net/decnet/dn_table.c
@@ -291,14 +291,14 @@ static inline size_t dn_fib_nlmsg_size(struct dn_fib_info *fi)
return payload;
}
-static int dn_fib_dump_info(struct sk_buff *skb, u32 pid, u32 seq, int event,
+static int dn_fib_dump_info(struct sk_buff *skb, u32 portid, u32 seq, int event,
u32 tb_id, u8 type, u8 scope, void *dst, int dst_len,
struct dn_fib_info *fi, unsigned int flags)
{
struct rtmsg *rtm;
struct nlmsghdr *nlh;
- nlh = nlmsg_put(skb, pid, seq, event, sizeof(*rtm), flags);
+ nlh = nlmsg_put(skb, portid, seq, event, sizeof(*rtm), flags);
if (!nlh)
return -EMSGSIZE;
@@ -374,14 +374,14 @@ static void dn_rtmsg_fib(int event, struct dn_fib_node *f, int z, u32 tb_id,
struct nlmsghdr *nlh, struct netlink_skb_parms *req)
{
struct sk_buff *skb;
- u32 pid = req ? req->pid : 0;
+ u32 portid = req ? req->portid : 0;
int err = -ENOBUFS;
skb = nlmsg_new(dn_fib_nlmsg_size(DN_FIB_INFO(f)), GFP_KERNEL);
if (skb == NULL)
goto errout;
- err = dn_fib_dump_info(skb, pid, nlh->nlmsg_seq, event, tb_id,
+ err = dn_fib_dump_info(skb, portid, nlh->nlmsg_seq, event, tb_id,
f->fn_type, f->fn_scope, &f->fn_key, z,
DN_FIB_INFO(f), 0);
if (err < 0) {
@@ -390,7 +390,7 @@ static void dn_rtmsg_fib(int event, struct dn_fib_node *f, int z, u32 tb_id,
kfree_skb(skb);
goto errout;
}
- rtnl_notify(skb, &init_net, pid, RTNLGRP_DECnet_ROUTE, nlh, GFP_KERNEL);
+ rtnl_notify(skb, &init_net, portid, RTNLGRP_DECnet_ROUTE, nlh, GFP_KERNEL);
return;
errout:
if (err < 0)
@@ -411,7 +411,7 @@ static __inline__ int dn_hash_dump_bucket(struct sk_buff *skb,
continue;
if (f->fn_state & DN_S_ZOMBIE)
continue;
- if (dn_fib_dump_info(skb, NETLINK_CB(cb->skb).pid,
+ if (dn_fib_dump_info(skb, NETLINK_CB(cb->skb).portid,
cb->nlh->nlmsg_seq,
RTM_NEWROUTE,
tb->n,
diff --git a/net/decnet/netfilter/dn_rtmsg.c b/net/decnet/netfilter/dn_rtmsg.c
index 11db0ecf342f..dfe42012a044 100644
--- a/net/decnet/netfilter/dn_rtmsg.c
+++ b/net/decnet/netfilter/dn_rtmsg.c
@@ -130,8 +130,7 @@ static int __init dn_rtmsg_init(void)
.input = dnrmg_receive_user_skb,
};
- dnrmg = netlink_kernel_create(&init_net,
- NETLINK_DNRTMSG, THIS_MODULE, &cfg);
+ dnrmg = netlink_kernel_create(&init_net, NETLINK_DNRTMSG, &cfg);
if (dnrmg == NULL) {
printk(KERN_ERR "dn_rtmsg: Cannot create netlink socket");
return -ENOMEM;
diff --git a/net/dns_resolver/dns_key.c b/net/dns_resolver/dns_key.c
index d9507dd05818..9807945a56d9 100644
--- a/net/dns_resolver/dns_key.c
+++ b/net/dns_resolver/dns_key.c
@@ -259,7 +259,8 @@ static int __init init_dns_resolver(void)
if (!cred)
return -ENOMEM;
- keyring = key_alloc(&key_type_keyring, ".dns_resolver", 0, 0, cred,
+ keyring = key_alloc(&key_type_keyring, ".dns_resolver",
+ GLOBAL_ROOT_UID, GLOBAL_ROOT_GID, cred,
(KEY_POS_ALL & ~KEY_POS_SETATTR) |
KEY_USR_VIEW | KEY_USR_READ,
KEY_ALLOC_NOT_IN_QUOTA);
diff --git a/net/dsa/dsa.c b/net/dsa/dsa.c
index 88e7c2f3fa0d..45295ca09571 100644
--- a/net/dsa/dsa.c
+++ b/net/dsa/dsa.c
@@ -370,7 +370,7 @@ static int dsa_remove(struct platform_device *pdev)
if (dst->link_poll_needed)
del_timer_sync(&dst->link_poll_timer);
- flush_work_sync(&dst->link_poll_work);
+ flush_work(&dst->link_poll_work);
for (i = 0; i < dst->pd->nr_chips; i++) {
struct dsa_switch *ds = dst->ds[i];
diff --git a/net/ieee802154/6lowpan.c b/net/ieee802154/6lowpan.c
index 6a095225148e..6d42c17af96b 100644
--- a/net/ieee802154/6lowpan.c
+++ b/net/ieee802154/6lowpan.c
@@ -1063,12 +1063,6 @@ out:
return (err < 0 ? NETDEV_TX_BUSY : NETDEV_TX_OK);
}
-static void lowpan_dev_free(struct net_device *dev)
-{
- dev_put(lowpan_dev_info(dev)->real_dev);
- free_netdev(dev);
-}
-
static struct wpan_phy *lowpan_get_phy(const struct net_device *dev)
{
struct net_device *real_dev = lowpan_dev_info(dev)->real_dev;
@@ -1118,7 +1112,7 @@ static void lowpan_setup(struct net_device *dev)
dev->netdev_ops = &lowpan_netdev_ops;
dev->header_ops = &lowpan_header_ops;
dev->ml_priv = &lowpan_mlme;
- dev->destructor = lowpan_dev_free;
+ dev->destructor = free_netdev;
}
static int lowpan_validate(struct nlattr *tb[], struct nlattr *data[])
@@ -1133,6 +1127,8 @@ static int lowpan_validate(struct nlattr *tb[], struct nlattr *data[])
static int lowpan_rcv(struct sk_buff *skb, struct net_device *dev,
struct packet_type *pt, struct net_device *orig_dev)
{
+ struct sk_buff *local_skb;
+
if (!netif_running(dev))
goto drop;
@@ -1144,7 +1140,12 @@ static int lowpan_rcv(struct sk_buff *skb, struct net_device *dev,
case LOWPAN_DISPATCH_IPHC: /* ipv6 datagram */
case LOWPAN_DISPATCH_FRAG1: /* first fragment header */
case LOWPAN_DISPATCH_FRAGN: /* next fragments headers */
- lowpan_process_data(skb);
+ local_skb = skb_clone(skb, GFP_ATOMIC);
+ if (!local_skb)
+ goto drop;
+ lowpan_process_data(local_skb);
+
+ kfree_skb(skb);
break;
default:
break;
@@ -1237,6 +1238,34 @@ static inline void __init lowpan_netlink_fini(void)
rtnl_link_unregister(&lowpan_link_ops);
}
+static int lowpan_device_event(struct notifier_block *unused,
+ unsigned long event,
+ void *ptr)
+{
+ struct net_device *dev = ptr;
+ LIST_HEAD(del_list);
+ struct lowpan_dev_record *entry, *tmp;
+
+ if (dev->type != ARPHRD_IEEE802154)
+ goto out;
+
+ if (event == NETDEV_UNREGISTER) {
+ list_for_each_entry_safe(entry, tmp, &lowpan_devices, list) {
+ if (lowpan_dev_info(entry->ldev)->real_dev == dev)
+ lowpan_dellink(entry->ldev, &del_list);
+ }
+
+ unregister_netdevice_many(&del_list);
+ }
+
+out:
+ return NOTIFY_DONE;
+}
+
+static struct notifier_block lowpan_dev_notifier = {
+ .notifier_call = lowpan_device_event,
+};
+
static struct packet_type lowpan_packet_type = {
.type = __constant_htons(ETH_P_IEEE802154),
.func = lowpan_rcv,
@@ -1251,6 +1280,12 @@ static int __init lowpan_init_module(void)
goto out;
dev_add_pack(&lowpan_packet_type);
+
+ err = register_netdevice_notifier(&lowpan_dev_notifier);
+ if (err < 0) {
+ dev_remove_pack(&lowpan_packet_type);
+ lowpan_netlink_fini();
+ }
out:
return err;
}
@@ -1263,6 +1298,8 @@ static void __exit lowpan_cleanup_module(void)
dev_remove_pack(&lowpan_packet_type);
+ unregister_netdevice_notifier(&lowpan_dev_notifier);
+
/* Now 6lowpan packet_type is removed, so no new fragments are
* expected on RX, therefore that's the time to clean incomplete
* fragments.
diff --git a/net/ieee802154/nl-mac.c b/net/ieee802154/nl-mac.c
index 1e9917124e75..96bb08abece2 100644
--- a/net/ieee802154/nl-mac.c
+++ b/net/ieee802154/nl-mac.c
@@ -246,7 +246,7 @@ nla_put_failure:
}
EXPORT_SYMBOL(ieee802154_nl_start_confirm);
-static int ieee802154_nl_fill_iface(struct sk_buff *msg, u32 pid,
+static int ieee802154_nl_fill_iface(struct sk_buff *msg, u32 portid,
u32 seq, int flags, struct net_device *dev)
{
void *hdr;
@@ -534,7 +534,7 @@ static int ieee802154_list_iface(struct sk_buff *skb,
if (!msg)
goto out_dev;
- rc = ieee802154_nl_fill_iface(msg, info->snd_pid, info->snd_seq,
+ rc = ieee802154_nl_fill_iface(msg, info->snd_portid, info->snd_seq,
0, dev);
if (rc < 0)
goto out_free;
@@ -565,7 +565,7 @@ static int ieee802154_dump_iface(struct sk_buff *skb,
if (idx < s_idx || (dev->type != ARPHRD_IEEE802154))
goto cont;
- if (ieee802154_nl_fill_iface(skb, NETLINK_CB(cb->skb).pid,
+ if (ieee802154_nl_fill_iface(skb, NETLINK_CB(cb->skb).portid,
cb->nlh->nlmsg_seq, NLM_F_MULTI, dev) < 0)
break;
cont:
diff --git a/net/ieee802154/nl-phy.c b/net/ieee802154/nl-phy.c
index d54be34cca94..22b1a7058fd3 100644
--- a/net/ieee802154/nl-phy.c
+++ b/net/ieee802154/nl-phy.c
@@ -35,7 +35,7 @@
#include "ieee802154.h"
-static int ieee802154_nl_fill_phy(struct sk_buff *msg, u32 pid,
+static int ieee802154_nl_fill_phy(struct sk_buff *msg, u32 portid,
u32 seq, int flags, struct wpan_phy *phy)
{
void *hdr;
@@ -105,7 +105,7 @@ static int ieee802154_list_phy(struct sk_buff *skb,
if (!msg)
goto out_dev;
- rc = ieee802154_nl_fill_phy(msg, info->snd_pid, info->snd_seq,
+ rc = ieee802154_nl_fill_phy(msg, info->snd_portid, info->snd_seq,
0, phy);
if (rc < 0)
goto out_free;
@@ -138,7 +138,7 @@ static int ieee802154_dump_phy_iter(struct wpan_phy *phy, void *_data)
return 0;
rc = ieee802154_nl_fill_phy(data->skb,
- NETLINK_CB(data->cb->skb).pid,
+ NETLINK_CB(data->cb->skb).portid,
data->cb->nlh->nlmsg_seq,
NLM_F_MULTI,
phy);
diff --git a/net/ipv4/Makefile b/net/ipv4/Makefile
index ae2ccf2890e4..15ca63ec604e 100644
--- a/net/ipv4/Makefile
+++ b/net/ipv4/Makefile
@@ -49,7 +49,7 @@ obj-$(CONFIG_TCP_CONG_SCALABLE) += tcp_scalable.o
obj-$(CONFIG_TCP_CONG_LP) += tcp_lp.o
obj-$(CONFIG_TCP_CONG_YEAH) += tcp_yeah.o
obj-$(CONFIG_TCP_CONG_ILLINOIS) += tcp_illinois.o
-obj-$(CONFIG_CGROUP_MEM_RES_CTLR_KMEM) += tcp_memcontrol.o
+obj-$(CONFIG_MEMCG_KMEM) += tcp_memcontrol.o
obj-$(CONFIG_NETLABEL) += cipso_ipv4.o
obj-$(CONFIG_XFRM) += xfrm4_policy.o xfrm4_state.o xfrm4_input.o \
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index fe4582ca969a..766c59658563 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -212,6 +212,26 @@ int inet_listen(struct socket *sock, int backlog)
* we can only allow the backlog to be adjusted.
*/
if (old_state != TCP_LISTEN) {
+ /* Check special setups for testing purpose to enable TFO w/o
+ * requiring TCP_FASTOPEN sockopt.
+ * Note that only TCP sockets (SOCK_STREAM) will reach here.
+ * Also fastopenq may already been allocated because this
+ * socket was in TCP_LISTEN state previously but was
+ * shutdown() (rather than close()).
+ */
+ if ((sysctl_tcp_fastopen & TFO_SERVER_ENABLE) != 0 &&
+ inet_csk(sk)->icsk_accept_queue.fastopenq == NULL) {
+ if ((sysctl_tcp_fastopen & TFO_SERVER_WO_SOCKOPT1) != 0)
+ err = fastopen_init_queue(sk, backlog);
+ else if ((sysctl_tcp_fastopen &
+ TFO_SERVER_WO_SOCKOPT2) != 0)
+ err = fastopen_init_queue(sk,
+ ((uint)sysctl_tcp_fastopen) >> 16);
+ else
+ err = 0;
+ if (err)
+ goto out;
+ }
err = inet_csk_listen_start(sk, backlog);
if (err)
goto out;
@@ -701,7 +721,8 @@ int inet_accept(struct socket *sock, struct socket *newsock, int flags)
sock_rps_record_flow(sk2);
WARN_ON(!((1 << sk2->sk_state) &
- (TCPF_ESTABLISHED | TCPF_CLOSE_WAIT | TCPF_CLOSE)));
+ (TCPF_ESTABLISHED | TCPF_SYN_RECV |
+ TCPF_CLOSE_WAIT | TCPF_CLOSE)));
sock_graft(sk2, newsock);
@@ -1364,7 +1385,7 @@ static struct sk_buff **inet_gro_receive(struct sk_buff **head,
if (*(u8 *)iph != 0x45)
goto out_unlock;
- if (unlikely(ip_fast_csum((u8 *)iph, iph->ihl)))
+ if (unlikely(ip_fast_csum((u8 *)iph, 5)))
goto out_unlock;
id = ntohl(*(__be32 *)&iph->id);
@@ -1380,7 +1401,6 @@ static struct sk_buff **inet_gro_receive(struct sk_buff **head,
iph2 = ip_hdr(p);
if ((iph->protocol ^ iph2->protocol) |
- (iph->tos ^ iph2->tos) |
((__force u32)iph->saddr ^ (__force u32)iph2->saddr) |
((__force u32)iph->daddr ^ (__force u32)iph2->daddr)) {
NAPI_GRO_CB(p)->same_flow = 0;
@@ -1390,6 +1410,7 @@ static struct sk_buff **inet_gro_receive(struct sk_buff **head,
/* All fields must match except length and checksum. */
NAPI_GRO_CB(p)->flush |=
(iph->ttl ^ iph2->ttl) |
+ (iph->tos ^ iph2->tos) |
((u16)(ntohs(iph2->id) + NAPI_GRO_CB(p)->count) ^ id);
NAPI_GRO_CB(p)->flush |= flush;
diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c
index a0124eb7dbea..47800459e4cb 100644
--- a/net/ipv4/arp.c
+++ b/net/ipv4/arp.c
@@ -827,7 +827,7 @@ static int arp_process(struct sk_buff *skb)
}
if (arp->ar_op == htons(ARPOP_REQUEST) &&
- ip_route_input(skb, tip, sip, 0, dev) == 0) {
+ ip_route_input_noref(skb, tip, sip, 0, dev) == 0) {
rt = skb_rtable(skb);
addr_type = rt->rt_type;
@@ -1225,7 +1225,7 @@ static int arp_netdev_event(struct notifier_block *this, unsigned long event,
switch (event) {
case NETDEV_CHANGEADDR:
neigh_changeaddr(&arp_tbl, dev);
- rt_cache_flush(dev_net(dev), 0);
+ rt_cache_flush(dev_net(dev));
break;
default:
break;
diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c
index 44bf82e3aef7..2a6abc163ed2 100644
--- a/net/ipv4/devinet.c
+++ b/net/ipv4/devinet.c
@@ -94,25 +94,22 @@ static const struct nla_policy ifa_ipv4_policy[IFA_MAX+1] = {
[IFA_LABEL] = { .type = NLA_STRING, .len = IFNAMSIZ - 1 },
};
-/* inet_addr_hash's shifting is dependent upon this IN4_ADDR_HSIZE
- * value. So if you change this define, make appropriate changes to
- * inet_addr_hash as well.
- */
-#define IN4_ADDR_HSIZE 256
+#define IN4_ADDR_HSIZE_SHIFT 8
+#define IN4_ADDR_HSIZE (1U << IN4_ADDR_HSIZE_SHIFT)
+
static struct hlist_head inet_addr_lst[IN4_ADDR_HSIZE];
static DEFINE_SPINLOCK(inet_addr_hash_lock);
-static inline unsigned int inet_addr_hash(struct net *net, __be32 addr)
+static u32 inet_addr_hash(struct net *net, __be32 addr)
{
- u32 val = (__force u32) addr ^ hash_ptr(net, 8);
+ u32 val = (__force u32) addr ^ net_hash_mix(net);
- return ((val ^ (val >> 8) ^ (val >> 16) ^ (val >> 24)) &
- (IN4_ADDR_HSIZE - 1));
+ return hash_32(val, IN4_ADDR_HSIZE_SHIFT);
}
static void inet_hash_insert(struct net *net, struct in_ifaddr *ifa)
{
- unsigned int hash = inet_addr_hash(net, ifa->ifa_local);
+ u32 hash = inet_addr_hash(net, ifa->ifa_local);
spin_lock(&inet_addr_hash_lock);
hlist_add_head_rcu(&ifa->hash, &inet_addr_lst[hash]);
@@ -136,18 +133,18 @@ static void inet_hash_remove(struct in_ifaddr *ifa)
*/
struct net_device *__ip_dev_find(struct net *net, __be32 addr, bool devref)
{
- unsigned int hash = inet_addr_hash(net, addr);
+ u32 hash = inet_addr_hash(net, addr);
struct net_device *result = NULL;
struct in_ifaddr *ifa;
struct hlist_node *node;
rcu_read_lock();
hlist_for_each_entry_rcu(ifa, node, &inet_addr_lst[hash], hash) {
- struct net_device *dev = ifa->ifa_dev->dev;
-
- if (!net_eq(dev_net(dev), net))
- continue;
if (ifa->ifa_local == addr) {
+ struct net_device *dev = ifa->ifa_dev->dev;
+
+ if (!net_eq(dev_net(dev), net))
+ continue;
result = dev;
break;
}
@@ -182,10 +179,10 @@ static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
static void devinet_sysctl_register(struct in_device *idev);
static void devinet_sysctl_unregister(struct in_device *idev);
#else
-static inline void devinet_sysctl_register(struct in_device *idev)
+static void devinet_sysctl_register(struct in_device *idev)
{
}
-static inline void devinet_sysctl_unregister(struct in_device *idev)
+static void devinet_sysctl_unregister(struct in_device *idev)
{
}
#endif
@@ -205,7 +202,7 @@ static void inet_rcu_free_ifa(struct rcu_head *head)
kfree(ifa);
}
-static inline void inet_free_ifa(struct in_ifaddr *ifa)
+static void inet_free_ifa(struct in_ifaddr *ifa)
{
call_rcu(&ifa->rcu_head, inet_rcu_free_ifa);
}
@@ -314,7 +311,7 @@ int inet_addr_onlink(struct in_device *in_dev, __be32 a, __be32 b)
}
static void __inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
- int destroy, struct nlmsghdr *nlh, u32 pid)
+ int destroy, struct nlmsghdr *nlh, u32 portid)
{
struct in_ifaddr *promote = NULL;
struct in_ifaddr *ifa, *ifa1 = *ifap;
@@ -348,7 +345,7 @@ static void __inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
inet_hash_remove(ifa);
*ifap1 = ifa->ifa_next;
- rtmsg_ifa(RTM_DELADDR, ifa, nlh, pid);
+ rtmsg_ifa(RTM_DELADDR, ifa, nlh, portid);
blocking_notifier_call_chain(&inetaddr_chain,
NETDEV_DOWN, ifa);
inet_free_ifa(ifa);
@@ -385,7 +382,7 @@ static void __inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
is valid, it will try to restore deleted routes... Grr.
So that, this order is correct.
*/
- rtmsg_ifa(RTM_DELADDR, ifa1, nlh, pid);
+ rtmsg_ifa(RTM_DELADDR, ifa1, nlh, portid);
blocking_notifier_call_chain(&inetaddr_chain, NETDEV_DOWN, ifa1);
if (promote) {
@@ -398,7 +395,7 @@ static void __inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
}
promote->ifa_flags &= ~IFA_F_SECONDARY;
- rtmsg_ifa(RTM_NEWADDR, promote, nlh, pid);
+ rtmsg_ifa(RTM_NEWADDR, promote, nlh, portid);
blocking_notifier_call_chain(&inetaddr_chain,
NETDEV_UP, promote);
for (ifa = next_sec; ifa; ifa = ifa->ifa_next) {
@@ -420,7 +417,7 @@ static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
}
static int __inet_insert_ifa(struct in_ifaddr *ifa, struct nlmsghdr *nlh,
- u32 pid)
+ u32 portid)
{
struct in_device *in_dev = ifa->ifa_dev;
struct in_ifaddr *ifa1, **ifap, **last_primary;
@@ -467,7 +464,7 @@ static int __inet_insert_ifa(struct in_ifaddr *ifa, struct nlmsghdr *nlh,
/* Send message first, then call notifier.
Notifier will trigger FIB update, so that
listeners of netlink will know about new ifaddr */
- rtmsg_ifa(RTM_NEWADDR, ifa, nlh, pid);
+ rtmsg_ifa(RTM_NEWADDR, ifa, nlh, portid);
blocking_notifier_call_chain(&inetaddr_chain, NETDEV_UP, ifa);
return 0;
@@ -566,7 +563,7 @@ static int inet_rtm_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg
!inet_ifa_match(nla_get_be32(tb[IFA_ADDRESS]), ifa)))
continue;
- __inet_del_ifa(in_dev, ifap, 1, nlh, NETLINK_CB(skb).pid);
+ __inet_del_ifa(in_dev, ifap, 1, nlh, NETLINK_CB(skb).portid);
return 0;
}
@@ -652,14 +649,14 @@ static int inet_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg
if (IS_ERR(ifa))
return PTR_ERR(ifa);
- return __inet_insert_ifa(ifa, nlh, NETLINK_CB(skb).pid);
+ return __inet_insert_ifa(ifa, nlh, NETLINK_CB(skb).portid);
}
/*
* Determine a default network mask, based on the IP address.
*/
-static inline int inet_abc_len(__be32 addr)
+static int inet_abc_len(__be32 addr)
{
int rc = -1; /* Something else, probably a multicast. */
@@ -725,7 +722,7 @@ int devinet_ioctl(struct net *net, unsigned int cmd, void __user *arg)
break;
case SIOCSIFFLAGS:
- ret = -EACCES;
+ ret = -EPERM;
if (!capable(CAP_NET_ADMIN))
goto out;
break;
@@ -733,7 +730,7 @@ int devinet_ioctl(struct net *net, unsigned int cmd, void __user *arg)
case SIOCSIFBRDADDR: /* Set the broadcast address */
case SIOCSIFDSTADDR: /* Set the destination address */
case SIOCSIFNETMASK: /* Set the netmask for the interface */
- ret = -EACCES;
+ ret = -EPERM;
if (!capable(CAP_NET_ADMIN))
goto out;
ret = -EINVAL;
@@ -1124,7 +1121,7 @@ skip:
}
}
-static inline bool inetdev_valid_mtu(unsigned int mtu)
+static bool inetdev_valid_mtu(unsigned int mtu)
{
return mtu >= 68;
}
@@ -1239,7 +1236,7 @@ static struct notifier_block ip_netdev_notifier = {
.notifier_call = inetdev_event,
};
-static inline size_t inet_nlmsg_size(void)
+static size_t inet_nlmsg_size(void)
{
return NLMSG_ALIGN(sizeof(struct ifaddrmsg))
+ nla_total_size(4) /* IFA_ADDRESS */
@@ -1249,12 +1246,12 @@ static inline size_t inet_nlmsg_size(void)
}
static int inet_fill_ifaddr(struct sk_buff *skb, struct in_ifaddr *ifa,
- u32 pid, u32 seq, int event, unsigned int flags)
+ u32 portid, u32 seq, int event, unsigned int flags)
{
struct ifaddrmsg *ifm;
struct nlmsghdr *nlh;
- nlh = nlmsg_put(skb, pid, seq, event, sizeof(*ifm), flags);
+ nlh = nlmsg_put(skb, portid, seq, event, sizeof(*ifm), flags);
if (nlh == NULL)
return -EMSGSIZE;
@@ -1316,7 +1313,7 @@ static int inet_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb)
if (ip_idx < s_ip_idx)
continue;
if (inet_fill_ifaddr(skb, ifa,
- NETLINK_CB(cb->skb).pid,
+ NETLINK_CB(cb->skb).portid,
cb->nlh->nlmsg_seq,
RTM_NEWADDR, NLM_F_MULTI) <= 0) {
rcu_read_unlock();
@@ -1338,7 +1335,7 @@ done:
}
static void rtmsg_ifa(int event, struct in_ifaddr *ifa, struct nlmsghdr *nlh,
- u32 pid)
+ u32 portid)
{
struct sk_buff *skb;
u32 seq = nlh ? nlh->nlmsg_seq : 0;
@@ -1350,14 +1347,14 @@ static void rtmsg_ifa(int event, struct in_ifaddr *ifa, struct nlmsghdr *nlh,
if (skb == NULL)
goto errout;
- err = inet_fill_ifaddr(skb, ifa, pid, seq, event, 0);
+ err = inet_fill_ifaddr(skb, ifa, portid, seq, event, 0);
if (err < 0) {
/* -EMSGSIZE implies BUG in inet_nlmsg_size() */
WARN_ON(err == -EMSGSIZE);
kfree_skb(skb);
goto errout;
}
- rtnl_notify(skb, net, pid, RTNLGRP_IPV4_IFADDR, nlh, GFP_KERNEL);
+ rtnl_notify(skb, net, portid, RTNLGRP_IPV4_IFADDR, nlh, GFP_KERNEL);
return;
errout:
if (err < 0)
@@ -1503,7 +1500,7 @@ static int devinet_conf_proc(ctl_table *ctl, int write,
if (i == IPV4_DEVCONF_ACCEPT_LOCAL - 1 ||
i == IPV4_DEVCONF_ROUTE_LOCALNET - 1)
if ((new_value == 0) && (old_value != 0))
- rt_cache_flush(net, 0);
+ rt_cache_flush(net);
}
return ret;
@@ -1537,7 +1534,7 @@ static int devinet_sysctl_forward(ctl_table *ctl, int write,
dev_disable_lro(idev->dev);
}
rtnl_unlock();
- rt_cache_flush(net, 0);
+ rt_cache_flush(net);
}
}
@@ -1554,7 +1551,7 @@ static int ipv4_doint_and_flush(ctl_table *ctl, int write,
struct net *net = ctl->extra2;
if (write && *valp != val)
- rt_cache_flush(net, 0);
+ rt_cache_flush(net);
return ret;
}
diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c
index 8732cc7920ed..68c93d1bb03a 100644
--- a/net/ipv4/fib_frontend.c
+++ b/net/ipv4/fib_frontend.c
@@ -148,7 +148,7 @@ static void fib_flush(struct net *net)
}
if (flushed)
- rt_cache_flush(net, -1);
+ rt_cache_flush(net);
}
/*
@@ -218,7 +218,7 @@ __be32 fib_compute_spec_dst(struct sk_buff *skb)
scope = RT_SCOPE_UNIVERSE;
if (!ipv4_is_zeronet(ip_hdr(skb)->saddr)) {
fl4.flowi4_oif = 0;
- fl4.flowi4_iif = net->loopback_dev->ifindex;
+ fl4.flowi4_iif = LOOPBACK_IFINDEX;
fl4.daddr = ip_hdr(skb)->saddr;
fl4.saddr = 0;
fl4.flowi4_tos = RT_TOS(ip_hdr(skb)->tos);
@@ -557,7 +557,7 @@ static int rtm_to_fib_config(struct net *net, struct sk_buff *skb,
cfg->fc_flags = rtm->rtm_flags;
cfg->fc_nlflags = nlh->nlmsg_flags;
- cfg->fc_nlinfo.pid = NETLINK_CB(skb).pid;
+ cfg->fc_nlinfo.portid = NETLINK_CB(skb).portid;
cfg->fc_nlinfo.nlh = nlh;
cfg->fc_nlinfo.nl_net = net;
@@ -955,7 +955,7 @@ static void nl_fib_input(struct sk_buff *skb)
struct fib_result_nl *frn;
struct nlmsghdr *nlh;
struct fib_table *tb;
- u32 pid;
+ u32 portid;
net = sock_net(skb->sk);
nlh = nlmsg_hdr(skb);
@@ -973,10 +973,10 @@ static void nl_fib_input(struct sk_buff *skb)
nl_fib_lookup(frn, tb);
- pid = NETLINK_CB(skb).pid; /* pid of sending process */
- NETLINK_CB(skb).pid = 0; /* from kernel */
+ portid = NETLINK_CB(skb).portid; /* pid of sending process */
+ NETLINK_CB(skb).portid = 0; /* from kernel */
NETLINK_CB(skb).dst_group = 0; /* unicast */
- netlink_unicast(net->ipv4.fibnl, skb, pid, MSG_DONTWAIT);
+ netlink_unicast(net->ipv4.fibnl, skb, portid, MSG_DONTWAIT);
}
static int __net_init nl_fib_lookup_init(struct net *net)
@@ -986,7 +986,7 @@ static int __net_init nl_fib_lookup_init(struct net *net)
.input = nl_fib_input,
};
- sk = netlink_kernel_create(net, NETLINK_FIB_LOOKUP, THIS_MODULE, &cfg);
+ sk = netlink_kernel_create(net, NETLINK_FIB_LOOKUP, &cfg);
if (sk == NULL)
return -EAFNOSUPPORT;
net->ipv4.fibnl = sk;
@@ -999,11 +999,11 @@ static void nl_fib_lookup_exit(struct net *net)
net->ipv4.fibnl = NULL;
}
-static void fib_disable_ip(struct net_device *dev, int force, int delay)
+static void fib_disable_ip(struct net_device *dev, int force)
{
if (fib_sync_down_dev(dev, force))
fib_flush(dev_net(dev));
- rt_cache_flush(dev_net(dev), delay);
+ rt_cache_flush(dev_net(dev));
arp_ifdown(dev);
}
@@ -1020,7 +1020,7 @@ static int fib_inetaddr_event(struct notifier_block *this, unsigned long event,
fib_sync_up(dev);
#endif
atomic_inc(&net->ipv4.dev_addr_genid);
- rt_cache_flush(dev_net(dev), -1);
+ rt_cache_flush(dev_net(dev));
break;
case NETDEV_DOWN:
fib_del_ifaddr(ifa, NULL);
@@ -1029,9 +1029,9 @@ static int fib_inetaddr_event(struct notifier_block *this, unsigned long event,
/* Last address was deleted from this interface.
* Disable IP.
*/
- fib_disable_ip(dev, 1, 0);
+ fib_disable_ip(dev, 1);
} else {
- rt_cache_flush(dev_net(dev), -1);
+ rt_cache_flush(dev_net(dev));
}
break;
}
@@ -1041,16 +1041,16 @@ static int fib_inetaddr_event(struct notifier_block *this, unsigned long event,
static int fib_netdev_event(struct notifier_block *this, unsigned long event, void *ptr)
{
struct net_device *dev = ptr;
- struct in_device *in_dev = __in_dev_get_rtnl(dev);
+ struct in_device *in_dev;
struct net *net = dev_net(dev);
if (event == NETDEV_UNREGISTER) {
- fib_disable_ip(dev, 2, -1);
+ fib_disable_ip(dev, 2);
+ rt_flush_dev(dev);
return NOTIFY_DONE;
}
- if (!in_dev)
- return NOTIFY_DONE;
+ in_dev = __in_dev_get_rtnl(dev);
switch (event) {
case NETDEV_UP:
@@ -1061,16 +1061,14 @@ static int fib_netdev_event(struct notifier_block *this, unsigned long event, vo
fib_sync_up(dev);
#endif
atomic_inc(&net->ipv4.dev_addr_genid);
- rt_cache_flush(dev_net(dev), -1);
+ rt_cache_flush(net);
break;
case NETDEV_DOWN:
- fib_disable_ip(dev, 0, 0);
+ fib_disable_ip(dev, 0);
break;
case NETDEV_CHANGEMTU:
case NETDEV_CHANGE:
- rt_cache_flush(dev_net(dev), 0);
- break;
- case NETDEV_UNREGISTER_BATCH:
+ rt_cache_flush(net);
break;
}
return NOTIFY_DONE;
diff --git a/net/ipv4/fib_rules.c b/net/ipv4/fib_rules.c
index a83d74e498d2..274309d3aded 100644
--- a/net/ipv4/fib_rules.c
+++ b/net/ipv4/fib_rules.c
@@ -259,7 +259,7 @@ static size_t fib4_rule_nlmsg_payload(struct fib_rule *rule)
static void fib4_rule_flush_cache(struct fib_rules_ops *ops)
{
- rt_cache_flush(ops->fro_net, -1);
+ rt_cache_flush(ops->fro_net);
}
static const struct fib_rules_ops __net_initdata fib4_rules_ops_template = {
diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c
index e55171f184f9..267753060ffc 100644
--- a/net/ipv4/fib_semantics.c
+++ b/net/ipv4/fib_semantics.c
@@ -140,6 +140,21 @@ const struct fib_prop fib_props[RTN_MAX + 1] = {
},
};
+static void rt_fibinfo_free(struct rtable __rcu **rtp)
+{
+ struct rtable *rt = rcu_dereference_protected(*rtp, 1);
+
+ if (!rt)
+ return;
+
+ /* Not even needed : RCU_INIT_POINTER(*rtp, NULL);
+ * because we waited an RCU grace period before calling
+ * free_fib_info_rcu()
+ */
+
+ dst_free(&rt->dst);
+}
+
static void free_nh_exceptions(struct fib_nh *nh)
{
struct fnhe_hash_bucket *hash = nh->nh_exceptions;
@@ -153,6 +168,9 @@ static void free_nh_exceptions(struct fib_nh *nh)
struct fib_nh_exception *next;
next = rcu_dereference_protected(fnhe->fnhe_next, 1);
+
+ rt_fibinfo_free(&fnhe->fnhe_rth);
+
kfree(fnhe);
fnhe = next;
@@ -161,6 +179,23 @@ static void free_nh_exceptions(struct fib_nh *nh)
kfree(hash);
}
+static void rt_fibinfo_free_cpus(struct rtable __rcu * __percpu *rtp)
+{
+ int cpu;
+
+ if (!rtp)
+ return;
+
+ for_each_possible_cpu(cpu) {
+ struct rtable *rt;
+
+ rt = rcu_dereference_protected(*per_cpu_ptr(rtp, cpu), 1);
+ if (rt)
+ dst_free(&rt->dst);
+ }
+ free_percpu(rtp);
+}
+
/* Release a nexthop info record */
static void free_fib_info_rcu(struct rcu_head *head)
{
@@ -171,10 +206,8 @@ static void free_fib_info_rcu(struct rcu_head *head)
dev_put(nexthop_nh->nh_dev);
if (nexthop_nh->nh_exceptions)
free_nh_exceptions(nexthop_nh);
- if (nexthop_nh->nh_rth_output)
- dst_release(&nexthop_nh->nh_rth_output->dst);
- if (nexthop_nh->nh_rth_input)
- dst_release(&nexthop_nh->nh_rth_input->dst);
+ rt_fibinfo_free_cpus(nexthop_nh->nh_pcpu_rth_output);
+ rt_fibinfo_free(&nexthop_nh->nh_rth_input);
} endfor_nexthops(fi);
release_net(fi->fib_net);
@@ -281,6 +314,7 @@ static struct fib_info *fib_find_info(const struct fib_info *nfi)
nfi->fib_scope == fi->fib_scope &&
nfi->fib_prefsrc == fi->fib_prefsrc &&
nfi->fib_priority == fi->fib_priority &&
+ nfi->fib_type == fi->fib_type &&
memcmp(nfi->fib_metrics, fi->fib_metrics,
sizeof(u32) * RTAX_MAX) == 0 &&
((nfi->fib_flags ^ fi->fib_flags) & ~RTNH_F_DEAD) == 0 &&
@@ -358,7 +392,7 @@ void rtmsg_fib(int event, __be32 key, struct fib_alias *fa,
if (skb == NULL)
goto errout;
- err = fib_dump_info(skb, info->pid, seq, event, tb_id,
+ err = fib_dump_info(skb, info->portid, seq, event, tb_id,
fa->fa_type, key, dst_len,
fa->fa_tos, fa->fa_info, nlm_flags);
if (err < 0) {
@@ -367,7 +401,7 @@ void rtmsg_fib(int event, __be32 key, struct fib_alias *fa,
kfree_skb(skb);
goto errout;
}
- rtnl_notify(skb, info->nl_net, info->pid, RTNLGRP_IPV4_ROUTE,
+ rtnl_notify(skb, info->nl_net, info->portid, RTNLGRP_IPV4_ROUTE,
info->nlh, GFP_KERNEL);
return;
errout:
@@ -800,10 +834,12 @@ struct fib_info *fib_create_info(struct fib_config *cfg)
fi->fib_flags = cfg->fc_flags;
fi->fib_priority = cfg->fc_priority;
fi->fib_prefsrc = cfg->fc_prefsrc;
+ fi->fib_type = cfg->fc_type;
fi->fib_nhs = nhs;
change_nexthops(fi) {
nexthop_nh->nh_parent = fi;
+ nexthop_nh->nh_pcpu_rth_output = alloc_percpu(struct rtable __rcu *);
} endfor_nexthops(fi)
if (cfg->fc_mx) {
@@ -955,14 +991,14 @@ failure:
return ERR_PTR(err);
}
-int fib_dump_info(struct sk_buff *skb, u32 pid, u32 seq, int event,
+int fib_dump_info(struct sk_buff *skb, u32 portid, u32 seq, int event,
u32 tb_id, u8 type, __be32 dst, int dst_len, u8 tos,
struct fib_info *fi, unsigned int flags)
{
struct nlmsghdr *nlh;
struct rtmsg *rtm;
- nlh = nlmsg_put(skb, pid, seq, event, sizeof(*rtm), flags);
+ nlh = nlmsg_put(skb, portid, seq, event, sizeof(*rtm), flags);
if (nlh == NULL)
return -EMSGSIZE;
diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c
index 18cbc15b20d5..31d771ca9a70 100644
--- a/net/ipv4/fib_trie.c
+++ b/net/ipv4/fib_trie.c
@@ -159,7 +159,6 @@ struct trie {
#endif
};
-static void put_child(struct trie *t, struct tnode *tn, int i, struct rt_trie_node *n);
static void tnode_put_child_reorg(struct tnode *tn, int i, struct rt_trie_node *n,
int wasfull);
static struct rt_trie_node *resize(struct trie *t, struct tnode *tn);
@@ -368,7 +367,7 @@ static void __leaf_free_rcu(struct rcu_head *head)
static inline void free_leaf(struct leaf *l)
{
- call_rcu_bh(&l->rcu, __leaf_free_rcu);
+ call_rcu(&l->rcu, __leaf_free_rcu);
}
static inline void free_leaf_info(struct leaf_info *leaf)
@@ -473,7 +472,7 @@ static struct tnode *tnode_new(t_key key, int pos, int bits)
}
pr_debug("AT %p s=%zu %zu\n", tn, sizeof(struct tnode),
- sizeof(struct rt_trie_node) << bits);
+ sizeof(struct rt_trie_node *) << bits);
return tn;
}
@@ -490,7 +489,7 @@ static inline int tnode_full(const struct tnode *tn, const struct rt_trie_node *
return ((struct tnode *) n)->pos == tn->pos + tn->bits;
}
-static inline void put_child(struct trie *t, struct tnode *tn, int i,
+static inline void put_child(struct tnode *tn, int i,
struct rt_trie_node *n)
{
tnode_put_child_reorg(tn, i, n, -1);
@@ -754,8 +753,8 @@ static struct tnode *inflate(struct trie *t, struct tnode *tn)
goto nomem;
}
- put_child(t, tn, 2*i, (struct rt_trie_node *) left);
- put_child(t, tn, 2*i+1, (struct rt_trie_node *) right);
+ put_child(tn, 2*i, (struct rt_trie_node *) left);
+ put_child(tn, 2*i+1, (struct rt_trie_node *) right);
}
}
@@ -776,9 +775,9 @@ static struct tnode *inflate(struct trie *t, struct tnode *tn)
if (tkey_extract_bits(node->key,
oldtnode->pos + oldtnode->bits,
1) == 0)
- put_child(t, tn, 2*i, node);
+ put_child(tn, 2*i, node);
else
- put_child(t, tn, 2*i+1, node);
+ put_child(tn, 2*i+1, node);
continue;
}
@@ -786,8 +785,8 @@ static struct tnode *inflate(struct trie *t, struct tnode *tn)
inode = (struct tnode *) node;
if (inode->bits == 1) {
- put_child(t, tn, 2*i, rtnl_dereference(inode->child[0]));
- put_child(t, tn, 2*i+1, rtnl_dereference(inode->child[1]));
+ put_child(tn, 2*i, rtnl_dereference(inode->child[0]));
+ put_child(tn, 2*i+1, rtnl_dereference(inode->child[1]));
tnode_free_safe(inode);
continue;
@@ -817,22 +816,22 @@ static struct tnode *inflate(struct trie *t, struct tnode *tn)
*/
left = (struct tnode *) tnode_get_child(tn, 2*i);
- put_child(t, tn, 2*i, NULL);
+ put_child(tn, 2*i, NULL);
BUG_ON(!left);
right = (struct tnode *) tnode_get_child(tn, 2*i+1);
- put_child(t, tn, 2*i+1, NULL);
+ put_child(tn, 2*i+1, NULL);
BUG_ON(!right);
size = tnode_child_length(left);
for (j = 0; j < size; j++) {
- put_child(t, left, j, rtnl_dereference(inode->child[j]));
- put_child(t, right, j, rtnl_dereference(inode->child[j + size]));
+ put_child(left, j, rtnl_dereference(inode->child[j]));
+ put_child(right, j, rtnl_dereference(inode->child[j + size]));
}
- put_child(t, tn, 2*i, resize(t, left));
- put_child(t, tn, 2*i+1, resize(t, right));
+ put_child(tn, 2*i, resize(t, left));
+ put_child(tn, 2*i+1, resize(t, right));
tnode_free_safe(inode);
}
@@ -877,7 +876,7 @@ static struct tnode *halve(struct trie *t, struct tnode *tn)
if (!newn)
goto nomem;
- put_child(t, tn, i/2, (struct rt_trie_node *)newn);
+ put_child(tn, i/2, (struct rt_trie_node *)newn);
}
}
@@ -892,21 +891,21 @@ static struct tnode *halve(struct trie *t, struct tnode *tn)
if (left == NULL) {
if (right == NULL) /* Both are empty */
continue;
- put_child(t, tn, i/2, right);
+ put_child(tn, i/2, right);
continue;
}
if (right == NULL) {
- put_child(t, tn, i/2, left);
+ put_child(tn, i/2, left);
continue;
}
/* Two nonempty children */
newBinNode = (struct tnode *) tnode_get_child(tn, i/2);
- put_child(t, tn, i/2, NULL);
- put_child(t, newBinNode, 0, left);
- put_child(t, newBinNode, 1, right);
- put_child(t, tn, i/2, resize(t, newBinNode));
+ put_child(tn, i/2, NULL);
+ put_child(newBinNode, 0, left);
+ put_child(newBinNode, 1, right);
+ put_child(tn, i/2, resize(t, newBinNode));
}
tnode_free_safe(oldtnode);
return tn;
@@ -1125,7 +1124,7 @@ static struct list_head *fib_insert_node(struct trie *t, u32 key, int plen)
node_set_parent((struct rt_trie_node *)l, tp);
cindex = tkey_extract_bits(key, tp->pos, tp->bits);
- put_child(t, tp, cindex, (struct rt_trie_node *)l);
+ put_child(tp, cindex, (struct rt_trie_node *)l);
} else {
/* Case 3: n is a LEAF or a TNODE and the key doesn't match. */
/*
@@ -1155,12 +1154,12 @@ static struct list_head *fib_insert_node(struct trie *t, u32 key, int plen)
node_set_parent((struct rt_trie_node *)tn, tp);
missbit = tkey_extract_bits(key, newpos, 1);
- put_child(t, tn, missbit, (struct rt_trie_node *)l);
- put_child(t, tn, 1-missbit, n);
+ put_child(tn, missbit, (struct rt_trie_node *)l);
+ put_child(tn, 1-missbit, n);
if (tp) {
cindex = tkey_extract_bits(key, tp->pos, tp->bits);
- put_child(t, tp, cindex, (struct rt_trie_node *)tn);
+ put_child(tp, cindex, (struct rt_trie_node *)tn);
} else {
rcu_assign_pointer(t->trie, (struct rt_trie_node *)tn);
tp = tn;
@@ -1287,7 +1286,7 @@ int fib_table_insert(struct fib_table *tb, struct fib_config *cfg)
fib_release_info(fi_drop);
if (state & FA_S_ACCESSED)
- rt_cache_flush(cfg->fc_nlinfo.nl_net, -1);
+ rt_cache_flush(cfg->fc_nlinfo.nl_net);
rtmsg_fib(RTM_NEWROUTE, htonl(key), new_fa, plen,
tb->tb_id, &cfg->fc_nlinfo, NLM_F_REPLACE);
@@ -1334,7 +1333,7 @@ int fib_table_insert(struct fib_table *tb, struct fib_config *cfg)
list_add_tail_rcu(&new_fa->fa_list,
(fa ? &fa->fa_list : fa_head));
- rt_cache_flush(cfg->fc_nlinfo.nl_net, -1);
+ rt_cache_flush(cfg->fc_nlinfo.nl_net);
rtmsg_fib(RTM_NEWROUTE, htonl(key), new_fa, plen, tb->tb_id,
&cfg->fc_nlinfo, 0);
succeeded:
@@ -1551,7 +1550,8 @@ int fib_table_lookup(struct fib_table *tb, const struct flowi4 *flp,
* state.directly.
*/
if (pref_mismatch) {
- int mp = KEYLENGTH - fls(pref_mismatch);
+ /* fls(x) = __fls(x) + 1 */
+ int mp = KEYLENGTH - __fls(pref_mismatch) - 1;
if (tkey_extract_bits(cn->key, mp, cn->pos - mp) != 0)
goto backtrace;
@@ -1619,7 +1619,7 @@ static void trie_leaf_remove(struct trie *t, struct leaf *l)
if (tp) {
t_key cindex = tkey_extract_bits(l->key, tp->pos, tp->bits);
- put_child(t, tp, cindex, NULL);
+ put_child(tp, cindex, NULL);
trie_rebalance(t, tp);
} else
RCU_INIT_POINTER(t->trie, NULL);
@@ -1656,7 +1656,12 @@ int fib_table_delete(struct fib_table *tb, struct fib_config *cfg)
if (!l)
return -ESRCH;
- fa_head = get_fa_head(l, plen);
+ li = find_leaf_info(l, plen);
+
+ if (!li)
+ return -ESRCH;
+
+ fa_head = &li->falh;
fa = fib_find_alias(fa_head, tos, 0);
if (!fa)
@@ -1692,9 +1697,6 @@ int fib_table_delete(struct fib_table *tb, struct fib_config *cfg)
rtmsg_fib(RTM_DELROUTE, htonl(key), fa, plen, tb->tb_id,
&cfg->fc_nlinfo, 0);
- l = fib_find_node(t, key);
- li = find_leaf_info(l, plen);
-
list_del_rcu(&fa->fa_list);
if (!plen)
@@ -1709,7 +1711,7 @@ int fib_table_delete(struct fib_table *tb, struct fib_config *cfg)
trie_leaf_remove(t, l);
if (fa->fa_state & FA_S_ACCESSED)
- rt_cache_flush(cfg->fc_nlinfo.nl_net, -1);
+ rt_cache_flush(cfg->fc_nlinfo.nl_net);
fib_release_info(fa->fa_info);
alias_free_mem_rcu(fa);
@@ -1871,7 +1873,7 @@ static int fn_trie_dump_fa(t_key key, int plen, struct list_head *fah,
continue;
}
- if (fib_dump_info(skb, NETLINK_CB(cb->skb).pid,
+ if (fib_dump_info(skb, NETLINK_CB(cb->skb).portid,
cb->nlh->nlmsg_seq,
RTM_NEWROUTE,
tb->tb_id,
diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c
index 6699f23e6f55..736ab70fd179 100644
--- a/net/ipv4/igmp.c
+++ b/net/ipv4/igmp.c
@@ -815,14 +815,15 @@ static int igmp_marksources(struct ip_mc_list *pmc, int nsrcs, __be32 *srcs)
return 1;
}
-static void igmp_heard_report(struct in_device *in_dev, __be32 group)
+/* return true if packet was dropped */
+static bool igmp_heard_report(struct in_device *in_dev, __be32 group)
{
struct ip_mc_list *im;
/* Timers are only set for non-local groups */
if (group == IGMP_ALL_HOSTS)
- return;
+ return false;
rcu_read_lock();
for_each_pmc_rcu(in_dev, im) {
@@ -832,9 +833,11 @@ static void igmp_heard_report(struct in_device *in_dev, __be32 group)
}
}
rcu_read_unlock();
+ return false;
}
-static void igmp_heard_query(struct in_device *in_dev, struct sk_buff *skb,
+/* return true if packet was dropped */
+static bool igmp_heard_query(struct in_device *in_dev, struct sk_buff *skb,
int len)
{
struct igmphdr *ih = igmp_hdr(skb);
@@ -866,7 +869,7 @@ static void igmp_heard_query(struct in_device *in_dev, struct sk_buff *skb,
/* clear deleted report items */
igmpv3_clear_delrec(in_dev);
} else if (len < 12) {
- return; /* ignore bogus packet; freed by caller */
+ return true; /* ignore bogus packet; freed by caller */
} else if (IGMP_V1_SEEN(in_dev)) {
/* This is a v3 query with v1 queriers present */
max_delay = IGMP_Query_Response_Interval;
@@ -883,13 +886,13 @@ static void igmp_heard_query(struct in_device *in_dev, struct sk_buff *skb,
max_delay = 1; /* can't mod w/ 0 */
} else { /* v3 */
if (!pskb_may_pull(skb, sizeof(struct igmpv3_query)))
- return;
+ return true;
ih3 = igmpv3_query_hdr(skb);
if (ih3->nsrcs) {
if (!pskb_may_pull(skb, sizeof(struct igmpv3_query)
+ ntohs(ih3->nsrcs)*sizeof(__be32)))
- return;
+ return true;
ih3 = igmpv3_query_hdr(skb);
}
@@ -901,9 +904,9 @@ static void igmp_heard_query(struct in_device *in_dev, struct sk_buff *skb,
in_dev->mr_qrv = ih3->qrv;
if (!group) { /* general query */
if (ih3->nsrcs)
- return; /* no sources allowed */
+ return false; /* no sources allowed */
igmp_gq_start_timer(in_dev);
- return;
+ return false;
}
/* mark sources to include, if group & source-specific */
mark = ih3->nsrcs != 0;
@@ -939,6 +942,7 @@ static void igmp_heard_query(struct in_device *in_dev, struct sk_buff *skb,
igmp_mod_timer(im, max_delay);
}
rcu_read_unlock();
+ return false;
}
/* called in rcu_read_lock() section */
@@ -948,6 +952,7 @@ int igmp_rcv(struct sk_buff *skb)
struct igmphdr *ih;
struct in_device *in_dev = __in_dev_get_rcu(skb->dev);
int len = skb->len;
+ bool dropped = true;
if (in_dev == NULL)
goto drop;
@@ -969,7 +974,7 @@ int igmp_rcv(struct sk_buff *skb)
ih = igmp_hdr(skb);
switch (ih->type) {
case IGMP_HOST_MEMBERSHIP_QUERY:
- igmp_heard_query(in_dev, skb, len);
+ dropped = igmp_heard_query(in_dev, skb, len);
break;
case IGMP_HOST_MEMBERSHIP_REPORT:
case IGMPV2_HOST_MEMBERSHIP_REPORT:
@@ -979,7 +984,7 @@ int igmp_rcv(struct sk_buff *skb)
/* don't rely on MC router hearing unicast reports */
if (skb->pkt_type == PACKET_MULTICAST ||
skb->pkt_type == PACKET_BROADCAST)
- igmp_heard_report(in_dev, ih->group);
+ dropped = igmp_heard_report(in_dev, ih->group);
break;
case IGMP_PIM:
#ifdef CONFIG_IP_PIMSM_V1
@@ -997,7 +1002,10 @@ int igmp_rcv(struct sk_buff *skb)
}
drop:
- kfree_skb(skb);
+ if (dropped)
+ kfree_skb(skb);
+ else
+ consume_skb(skb);
return 0;
}
@@ -1896,6 +1904,7 @@ int ip_mc_leave_group(struct sock *sk, struct ip_mreqn *imr)
rtnl_unlock();
return ret;
}
+EXPORT_SYMBOL(ip_mc_leave_group);
int ip_mc_source(int add, int omode, struct sock *sk, struct
ip_mreq_source *mreqs, int ifindex)
@@ -2435,6 +2444,8 @@ static int igmp_mc_seq_show(struct seq_file *seq, void *v)
struct ip_mc_list *im = (struct ip_mc_list *)v;
struct igmp_mc_iter_state *state = igmp_mc_seq_private(seq);
char *querier;
+ long delta;
+
#ifdef CONFIG_IP_MULTICAST
querier = IGMP_V1_SEEN(state->in_dev) ? "V1" :
IGMP_V2_SEEN(state->in_dev) ? "V2" :
@@ -2448,11 +2459,12 @@ static int igmp_mc_seq_show(struct seq_file *seq, void *v)
state->dev->ifindex, state->dev->name, state->in_dev->mc_count, querier);
}
+ delta = im->timer.expires - jiffies;
seq_printf(seq,
"\t\t\t\t%08X %5d %d:%08lX\t\t%d\n",
im->multiaddr, im->users,
- im->tm_running, im->tm_running ?
- jiffies_to_clock_t(im->timer.expires-jiffies) : 0,
+ im->tm_running,
+ im->tm_running ? jiffies_delta_to_clock_t(delta) : 0,
im->reporter);
}
return 0;
diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c
index db0cf17c00f7..f0c5b9c1a957 100644
--- a/net/ipv4/inet_connection_sock.c
+++ b/net/ipv4/inet_connection_sock.c
@@ -283,7 +283,9 @@ static int inet_csk_wait_for_connect(struct sock *sk, long timeo)
struct sock *inet_csk_accept(struct sock *sk, int flags, int *err)
{
struct inet_connection_sock *icsk = inet_csk(sk);
+ struct request_sock_queue *queue = &icsk->icsk_accept_queue;
struct sock *newsk;
+ struct request_sock *req;
int error;
lock_sock(sk);
@@ -296,7 +298,7 @@ struct sock *inet_csk_accept(struct sock *sk, int flags, int *err)
goto out_err;
/* Find already established connection */
- if (reqsk_queue_empty(&icsk->icsk_accept_queue)) {
+ if (reqsk_queue_empty(queue)) {
long timeo = sock_rcvtimeo(sk, flags & O_NONBLOCK);
/* If this is a non blocking socket don't sleep */
@@ -308,14 +310,32 @@ struct sock *inet_csk_accept(struct sock *sk, int flags, int *err)
if (error)
goto out_err;
}
-
- newsk = reqsk_queue_get_child(&icsk->icsk_accept_queue, sk);
- WARN_ON(newsk->sk_state == TCP_SYN_RECV);
+ req = reqsk_queue_remove(queue);
+ newsk = req->sk;
+
+ sk_acceptq_removed(sk);
+ if (sk->sk_protocol == IPPROTO_TCP && queue->fastopenq != NULL) {
+ spin_lock_bh(&queue->fastopenq->lock);
+ if (tcp_rsk(req)->listener) {
+ /* We are still waiting for the final ACK from 3WHS
+ * so can't free req now. Instead, we set req->sk to
+ * NULL to signify that the child socket is taken
+ * so reqsk_fastopen_remove() will free the req
+ * when 3WHS finishes (or is aborted).
+ */
+ req->sk = NULL;
+ req = NULL;
+ }
+ spin_unlock_bh(&queue->fastopenq->lock);
+ }
out:
release_sock(sk);
+ if (req)
+ __reqsk_free(req);
return newsk;
out_err:
newsk = NULL;
+ req = NULL;
*err = error;
goto out;
}
@@ -404,12 +424,15 @@ struct dst_entry *inet_csk_route_child_sock(struct sock *sk,
{
const struct inet_request_sock *ireq = inet_rsk(req);
struct inet_sock *newinet = inet_sk(newsk);
- struct ip_options_rcu *opt = ireq->opt;
+ struct ip_options_rcu *opt;
struct net *net = sock_net(sk);
struct flowi4 *fl4;
struct rtable *rt;
fl4 = &newinet->cork.fl.u.ip4;
+
+ rcu_read_lock();
+ opt = rcu_dereference(newinet->inet_opt);
flowi4_init_output(fl4, sk->sk_bound_dev_if, sk->sk_mark,
RT_CONN_FLAGS(sk), RT_SCOPE_UNIVERSE,
sk->sk_protocol, inet_sk_flowi_flags(sk),
@@ -421,11 +444,13 @@ struct dst_entry *inet_csk_route_child_sock(struct sock *sk,
goto no_route;
if (opt && opt->opt.is_strictroute && rt->rt_gateway)
goto route_err;
+ rcu_read_unlock();
return &rt->dst;
route_err:
ip_rt_put(rt);
no_route:
+ rcu_read_unlock();
IP_INC_STATS_BH(net, IPSTATS_MIB_OUTNOROUTES);
return NULL;
}
@@ -715,13 +740,14 @@ EXPORT_SYMBOL_GPL(inet_csk_listen_start);
void inet_csk_listen_stop(struct sock *sk)
{
struct inet_connection_sock *icsk = inet_csk(sk);
+ struct request_sock_queue *queue = &icsk->icsk_accept_queue;
struct request_sock *acc_req;
struct request_sock *req;
inet_csk_delete_keepalive_timer(sk);
/* make all the listen_opt local to us */
- acc_req = reqsk_queue_yank_acceptq(&icsk->icsk_accept_queue);
+ acc_req = reqsk_queue_yank_acceptq(queue);
/* Following specs, it would be better either to send FIN
* (and enter FIN-WAIT-1, it is normal close)
@@ -731,7 +757,7 @@ void inet_csk_listen_stop(struct sock *sk)
* To be honest, we are not able to make either
* of the variants now. --ANK
*/
- reqsk_queue_destroy(&icsk->icsk_accept_queue);
+ reqsk_queue_destroy(queue);
while ((req = acc_req) != NULL) {
struct sock *child = req->sk;
@@ -749,6 +775,19 @@ void inet_csk_listen_stop(struct sock *sk)
percpu_counter_inc(sk->sk_prot->orphan_count);
+ if (sk->sk_protocol == IPPROTO_TCP && tcp_rsk(req)->listener) {
+ BUG_ON(tcp_sk(child)->fastopen_rsk != req);
+ BUG_ON(sk != tcp_rsk(req)->listener);
+
+ /* Paranoid, to prevent race condition if
+ * an inbound pkt destined for child is
+ * blocked by sock lock in tcp_v4_rcv().
+ * Also to satisfy an assertion in
+ * tcp_v4_destroy_sock().
+ */
+ tcp_sk(child)->fastopen_rsk = NULL;
+ sock_put(sk);
+ }
inet_csk_destroy_sock(child);
bh_unlock_sock(child);
@@ -758,6 +797,17 @@ void inet_csk_listen_stop(struct sock *sk)
sk_acceptq_removed(sk);
__reqsk_free(req);
}
+ if (queue->fastopenq != NULL) {
+ /* Free all the reqs queued in rskq_rst_head. */
+ spin_lock_bh(&queue->fastopenq->lock);
+ acc_req = queue->fastopenq->rskq_rst_head;
+ queue->fastopenq->rskq_rst_head = NULL;
+ spin_unlock_bh(&queue->fastopenq->lock);
+ while ((req = acc_req) != NULL) {
+ acc_req = req->dl_next;
+ __reqsk_free(req);
+ }
+ }
WARN_ON(sk->sk_ack_backlog);
}
EXPORT_SYMBOL_GPL(inet_csk_listen_stop);
diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c
index 570e61f9611f..535584c00f91 100644
--- a/net/ipv4/inet_diag.c
+++ b/net/ipv4/inet_diag.c
@@ -69,7 +69,8 @@ static inline void inet_diag_unlock_handler(
int inet_sk_diag_fill(struct sock *sk, struct inet_connection_sock *icsk,
struct sk_buff *skb, struct inet_diag_req_v2 *req,
- u32 pid, u32 seq, u16 nlmsg_flags,
+ struct user_namespace *user_ns,
+ u32 portid, u32 seq, u16 nlmsg_flags,
const struct nlmsghdr *unlh)
{
const struct inet_sock *inet = inet_sk(sk);
@@ -83,7 +84,7 @@ int inet_sk_diag_fill(struct sock *sk, struct inet_connection_sock *icsk,
handler = inet_diag_table[req->sdiag_protocol];
BUG_ON(handler == NULL);
- nlh = nlmsg_put(skb, pid, seq, unlh->nlmsg_type, sizeof(*r),
+ nlh = nlmsg_put(skb, portid, seq, unlh->nlmsg_type, sizeof(*r),
nlmsg_flags);
if (!nlh)
return -EMSGSIZE;
@@ -124,7 +125,7 @@ int inet_sk_diag_fill(struct sock *sk, struct inet_connection_sock *icsk,
}
#endif
- r->idiag_uid = sock_i_uid(sk);
+ r->idiag_uid = from_kuid_munged(user_ns, sock_i_uid(sk));
r->idiag_inode = sock_i_ino(sk);
if (ext & (1 << (INET_DIAG_MEMINFO - 1))) {
@@ -199,23 +200,24 @@ EXPORT_SYMBOL_GPL(inet_sk_diag_fill);
static int inet_csk_diag_fill(struct sock *sk,
struct sk_buff *skb, struct inet_diag_req_v2 *req,
- u32 pid, u32 seq, u16 nlmsg_flags,
+ struct user_namespace *user_ns,
+ u32 portid, u32 seq, u16 nlmsg_flags,
const struct nlmsghdr *unlh)
{
return inet_sk_diag_fill(sk, inet_csk(sk),
- skb, req, pid, seq, nlmsg_flags, unlh);
+ skb, req, user_ns, portid, seq, nlmsg_flags, unlh);
}
static int inet_twsk_diag_fill(struct inet_timewait_sock *tw,
struct sk_buff *skb, struct inet_diag_req_v2 *req,
- u32 pid, u32 seq, u16 nlmsg_flags,
+ u32 portid, u32 seq, u16 nlmsg_flags,
const struct nlmsghdr *unlh)
{
long tmo;
struct inet_diag_msg *r;
struct nlmsghdr *nlh;
- nlh = nlmsg_put(skb, pid, seq, unlh->nlmsg_type, sizeof(*r),
+ nlh = nlmsg_put(skb, portid, seq, unlh->nlmsg_type, sizeof(*r),
nlmsg_flags);
if (!nlh)
return -EMSGSIZE;
@@ -256,14 +258,16 @@ static int inet_twsk_diag_fill(struct inet_timewait_sock *tw,
}
static int sk_diag_fill(struct sock *sk, struct sk_buff *skb,
- struct inet_diag_req_v2 *r, u32 pid, u32 seq, u16 nlmsg_flags,
+ struct inet_diag_req_v2 *r,
+ struct user_namespace *user_ns,
+ u32 portid, u32 seq, u16 nlmsg_flags,
const struct nlmsghdr *unlh)
{
if (sk->sk_state == TCP_TIME_WAIT)
return inet_twsk_diag_fill((struct inet_timewait_sock *)sk,
- skb, r, pid, seq, nlmsg_flags,
+ skb, r, portid, seq, nlmsg_flags,
unlh);
- return inet_csk_diag_fill(sk, skb, r, pid, seq, nlmsg_flags, unlh);
+ return inet_csk_diag_fill(sk, skb, r, user_ns, portid, seq, nlmsg_flags, unlh);
}
int inet_diag_dump_one_icsk(struct inet_hashinfo *hashinfo, struct sk_buff *in_skb,
@@ -311,14 +315,15 @@ int inet_diag_dump_one_icsk(struct inet_hashinfo *hashinfo, struct sk_buff *in_s
}
err = sk_diag_fill(sk, rep, req,
- NETLINK_CB(in_skb).pid,
+ sk_user_ns(NETLINK_CB(in_skb).ssk),
+ NETLINK_CB(in_skb).portid,
nlh->nlmsg_seq, 0, nlh);
if (err < 0) {
WARN_ON(err == -EMSGSIZE);
nlmsg_free(rep);
goto out;
}
- err = netlink_unicast(net->diag_nlsk, rep, NETLINK_CB(in_skb).pid,
+ err = netlink_unicast(net->diag_nlsk, rep, NETLINK_CB(in_skb).portid,
MSG_DONTWAIT);
if (err > 0)
err = 0;
@@ -551,7 +556,8 @@ static int inet_csk_diag_dump(struct sock *sk,
return 0;
return inet_csk_diag_fill(sk, skb, r,
- NETLINK_CB(cb->skb).pid,
+ sk_user_ns(NETLINK_CB(cb->skb).ssk),
+ NETLINK_CB(cb->skb).portid,
cb->nlh->nlmsg_seq, NLM_F_MULTI, cb->nlh);
}
@@ -586,12 +592,14 @@ static int inet_twsk_diag_dump(struct inet_timewait_sock *tw,
}
return inet_twsk_diag_fill(tw, skb, r,
- NETLINK_CB(cb->skb).pid,
+ NETLINK_CB(cb->skb).portid,
cb->nlh->nlmsg_seq, NLM_F_MULTI, cb->nlh);
}
static int inet_diag_fill_req(struct sk_buff *skb, struct sock *sk,
- struct request_sock *req, u32 pid, u32 seq,
+ struct request_sock *req,
+ struct user_namespace *user_ns,
+ u32 portid, u32 seq,
const struct nlmsghdr *unlh)
{
const struct inet_request_sock *ireq = inet_rsk(req);
@@ -600,7 +608,7 @@ static int inet_diag_fill_req(struct sk_buff *skb, struct sock *sk,
struct nlmsghdr *nlh;
long tmo;
- nlh = nlmsg_put(skb, pid, seq, unlh->nlmsg_type, sizeof(*r),
+ nlh = nlmsg_put(skb, portid, seq, unlh->nlmsg_type, sizeof(*r),
NLM_F_MULTI);
if (!nlh)
return -EMSGSIZE;
@@ -625,7 +633,7 @@ static int inet_diag_fill_req(struct sk_buff *skb, struct sock *sk,
r->idiag_expires = jiffies_to_msecs(tmo);
r->idiag_rqueue = 0;
r->idiag_wqueue = 0;
- r->idiag_uid = sock_i_uid(sk);
+ r->idiag_uid = from_kuid_munged(user_ns, sock_i_uid(sk));
r->idiag_inode = 0;
#if IS_ENABLED(CONFIG_IPV6)
if (r->idiag_family == AF_INET6) {
@@ -702,7 +710,8 @@ static int inet_diag_dump_reqs(struct sk_buff *skb, struct sock *sk,
}
err = inet_diag_fill_req(skb, sk, req,
- NETLINK_CB(cb->skb).pid,
+ sk_user_ns(NETLINK_CB(cb->skb).ssk),
+ NETLINK_CB(cb->skb).portid,
cb->nlh->nlmsg_seq, cb->nlh);
if (err < 0) {
cb->args[3] = j + 1;
diff --git a/net/ipv4/inet_fragment.c b/net/ipv4/inet_fragment.c
index 85190e69297b..4750d2b74d79 100644
--- a/net/ipv4/inet_fragment.c
+++ b/net/ipv4/inet_fragment.c
@@ -89,7 +89,7 @@ void inet_frags_exit_net(struct netns_frags *nf, struct inet_frags *f)
nf->low_thresh = 0;
local_bh_disable();
- inet_frag_evictor(nf, f);
+ inet_frag_evictor(nf, f, true);
local_bh_enable();
}
EXPORT_SYMBOL(inet_frags_exit_net);
@@ -158,11 +158,16 @@ void inet_frag_destroy(struct inet_frag_queue *q, struct inet_frags *f,
}
EXPORT_SYMBOL(inet_frag_destroy);
-int inet_frag_evictor(struct netns_frags *nf, struct inet_frags *f)
+int inet_frag_evictor(struct netns_frags *nf, struct inet_frags *f, bool force)
{
struct inet_frag_queue *q;
int work, evicted = 0;
+ if (!force) {
+ if (atomic_read(&nf->mem) <= nf->high_thresh)
+ return 0;
+ }
+
work = atomic_read(&nf->mem) - nf->low_thresh;
while (work > 0) {
read_lock(&f->lock);
diff --git a/net/ipv4/inetpeer.c b/net/ipv4/inetpeer.c
index e1e0a4e8fd34..000e3d239d64 100644
--- a/net/ipv4/inetpeer.c
+++ b/net/ipv4/inetpeer.c
@@ -194,7 +194,7 @@ void __init inet_initpeers(void)
0, SLAB_HWCACHE_ALIGN | SLAB_PANIC,
NULL);
- INIT_DELAYED_WORK_DEFERRABLE(&gc_work, inetpeer_gc_worker);
+ INIT_DEFERRABLE_WORK(&gc_work, inetpeer_gc_worker);
}
static int addr_compare(const struct inetpeer_addr *a,
@@ -510,7 +510,10 @@ relookup:
secure_ipv6_id(daddr->addr.a6));
p->metrics[RTAX_LOCK-1] = INETPEER_METRICS_NEW;
p->rate_tokens = 0;
- p->rate_last = 0;
+ /* 60*HZ is arbitrary, but chosen enough high so that the first
+ * calculation of tokens is at its maximum.
+ */
+ p->rate_last = jiffies - 60*HZ;
INIT_LIST_HEAD(&p->gc_list);
/* Link the node. */
diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c
index 7ad88e5e7110..448e68546827 100644
--- a/net/ipv4/ip_fragment.c
+++ b/net/ipv4/ip_fragment.c
@@ -219,7 +219,7 @@ static void ip_evictor(struct net *net)
{
int evicted;
- evicted = inet_frag_evictor(&net->ipv4.frags, &ip4_frags);
+ evicted = inet_frag_evictor(&net->ipv4.frags, &ip4_frags, false);
if (evicted)
IP_ADD_STATS_BH(net, IPSTATS_MIB_REASMFAILS, evicted);
}
@@ -258,8 +258,8 @@ static void ip_expire(unsigned long arg)
/* skb dst is stale, drop it, and perform route lookup again */
skb_dst_drop(head);
iph = ip_hdr(head);
- err = ip_route_input(head, iph->daddr, iph->saddr,
- iph->tos, head->dev);
+ err = ip_route_input_noref(head, iph->daddr, iph->saddr,
+ iph->tos, head->dev);
if (err)
goto out_rcu_unlock;
@@ -523,6 +523,10 @@ found:
if (offset == 0)
qp->q.last_in |= INET_FRAG_FIRST_IN;
+ if (ip_hdr(skb)->frag_off & htons(IP_DF) &&
+ skb->len + ihl > qp->q.max_size)
+ qp->q.max_size = skb->len + ihl;
+
if (qp->q.last_in == (INET_FRAG_FIRST_IN | INET_FRAG_LAST_IN) &&
qp->q.meat == qp->q.len)
return ip_frag_reasm(qp, prev, dev);
@@ -646,9 +650,11 @@ static int ip_frag_reasm(struct ipq *qp, struct sk_buff *prev,
head->next = NULL;
head->dev = dev;
head->tstamp = qp->q.stamp;
+ IPCB(head)->frag_max_size = qp->q.max_size;
iph = ip_hdr(head);
- iph->frag_off = 0;
+ /* max_size != 0 implies at least one fragment had IP_DF set */
+ iph->frag_off = qp->q.max_size ? htons(IP_DF) : 0;
iph->tot_len = htons(len);
iph->tos |= ecn;
IP_INC_STATS_BH(net, IPSTATS_MIB_REASMOKS);
@@ -678,8 +684,7 @@ int ip_defrag(struct sk_buff *skb, u32 user)
IP_INC_STATS_BH(net, IPSTATS_MIB_REASMREQDS);
/* Start by cleaning up the memory. */
- if (atomic_read(&net->ipv4.frags.mem) > net->ipv4.frags.high_thresh)
- ip_evictor(net);
+ ip_evictor(net);
/* Lookup (or create) queue header */
if ((qp = ip_find(net, ip_hdr(skb), user)) != NULL) {
diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c
index b062a98574f2..7240f8e2dd45 100644
--- a/net/ipv4/ip_gre.c
+++ b/net/ipv4/ip_gre.c
@@ -120,6 +120,10 @@
Alexey Kuznetsov.
*/
+static bool log_ecn_error = true;
+module_param(log_ecn_error, bool, 0644);
+MODULE_PARM_DESC(log_ecn_error, "Log packets received with corrupted ECN");
+
static struct rtnl_link_ops ipgre_link_ops __read_mostly;
static int ipgre_tunnel_init(struct net_device *dev);
static void ipgre_tunnel_setup(struct net_device *dev);
@@ -204,7 +208,9 @@ static struct rtnl_link_stats64 *ipgre_get_stats64(struct net_device *dev,
tot->rx_crc_errors = dev->stats.rx_crc_errors;
tot->rx_fifo_errors = dev->stats.rx_fifo_errors;
tot->rx_length_errors = dev->stats.rx_length_errors;
+ tot->rx_frame_errors = dev->stats.rx_frame_errors;
tot->rx_errors = dev->stats.rx_errors;
+
tot->tx_fifo_errors = dev->stats.tx_fifo_errors;
tot->tx_carrier_errors = dev->stats.tx_carrier_errors;
tot->tx_dropped = dev->stats.tx_dropped;
@@ -214,11 +220,25 @@ static struct rtnl_link_stats64 *ipgre_get_stats64(struct net_device *dev,
return tot;
}
+/* Does key in tunnel parameters match packet */
+static bool ipgre_key_match(const struct ip_tunnel_parm *p,
+ __be16 flags, __be32 key)
+{
+ if (p->i_flags & GRE_KEY) {
+ if (flags & GRE_KEY)
+ return key == p->i_key;
+ else
+ return false; /* key expected, none present */
+ } else
+ return !(flags & GRE_KEY);
+}
+
/* Given src, dst and key, find appropriate for input tunnel. */
static struct ip_tunnel *ipgre_tunnel_lookup(struct net_device *dev,
__be32 remote, __be32 local,
- __be32 key, __be16 gre_proto)
+ __be16 flags, __be32 key,
+ __be16 gre_proto)
{
struct net *net = dev_net(dev);
int link = dev->ifindex;
@@ -233,10 +253,12 @@ static struct ip_tunnel *ipgre_tunnel_lookup(struct net_device *dev,
for_each_ip_tunnel_rcu(ign->tunnels_r_l[h0 ^ h1]) {
if (local != t->parms.iph.saddr ||
remote != t->parms.iph.daddr ||
- key != t->parms.i_key ||
!(t->dev->flags & IFF_UP))
continue;
+ if (!ipgre_key_match(&t->parms, flags, key))
+ continue;
+
if (t->dev->type != ARPHRD_IPGRE &&
t->dev->type != dev_type)
continue;
@@ -257,10 +279,12 @@ static struct ip_tunnel *ipgre_tunnel_lookup(struct net_device *dev,
for_each_ip_tunnel_rcu(ign->tunnels_r[h0 ^ h1]) {
if (remote != t->parms.iph.daddr ||
- key != t->parms.i_key ||
!(t->dev->flags & IFF_UP))
continue;
+ if (!ipgre_key_match(&t->parms, flags, key))
+ continue;
+
if (t->dev->type != ARPHRD_IPGRE &&
t->dev->type != dev_type)
continue;
@@ -283,10 +307,12 @@ static struct ip_tunnel *ipgre_tunnel_lookup(struct net_device *dev,
if ((local != t->parms.iph.saddr &&
(local != t->parms.iph.daddr ||
!ipv4_is_multicast(local))) ||
- key != t->parms.i_key ||
!(t->dev->flags & IFF_UP))
continue;
+ if (!ipgre_key_match(&t->parms, flags, key))
+ continue;
+
if (t->dev->type != ARPHRD_IPGRE &&
t->dev->type != dev_type)
continue;
@@ -489,6 +515,7 @@ static void ipgre_err(struct sk_buff *skb, u32 info)
const int code = icmp_hdr(skb)->code;
struct ip_tunnel *t;
__be16 flags;
+ __be32 key = 0;
flags = p[0];
if (flags&(GRE_CSUM|GRE_KEY|GRE_SEQ|GRE_ROUTING|GRE_VERSION)) {
@@ -505,6 +532,9 @@ static void ipgre_err(struct sk_buff *skb, u32 info)
if (skb_headlen(skb) < grehlen)
return;
+ if (flags & GRE_KEY)
+ key = *(((__be32 *)p) + (grehlen / 4) - 1);
+
switch (type) {
default:
case ICMP_PARAMETERPROB:
@@ -533,49 +563,34 @@ static void ipgre_err(struct sk_buff *skb, u32 info)
break;
}
- rcu_read_lock();
t = ipgre_tunnel_lookup(skb->dev, iph->daddr, iph->saddr,
- flags & GRE_KEY ?
- *(((__be32 *)p) + (grehlen / 4) - 1) : 0,
- p[1]);
+ flags, key, p[1]);
+
if (t == NULL)
- goto out;
+ return;
if (type == ICMP_DEST_UNREACH && code == ICMP_FRAG_NEEDED) {
ipv4_update_pmtu(skb, dev_net(skb->dev), info,
t->parms.link, 0, IPPROTO_GRE, 0);
- goto out;
+ return;
}
if (type == ICMP_REDIRECT) {
ipv4_redirect(skb, dev_net(skb->dev), t->parms.link, 0,
IPPROTO_GRE, 0);
- goto out;
+ return;
}
if (t->parms.iph.daddr == 0 ||
ipv4_is_multicast(t->parms.iph.daddr))
- goto out;
+ return;
if (t->parms.iph.ttl == 0 && type == ICMP_TIME_EXCEEDED)
- goto out;
+ return;
if (time_before(jiffies, t->err_time + IPTUNNEL_ERR_TIMEO))
t->err_count++;
else
t->err_count = 1;
t->err_time = jiffies;
-out:
- rcu_read_unlock();
-}
-
-static inline void ipgre_ecn_decapsulate(const struct iphdr *iph, struct sk_buff *skb)
-{
- if (INET_ECN_is_ce(iph->tos)) {
- if (skb->protocol == htons(ETH_P_IP)) {
- IP_ECN_set_ce(ip_hdr(skb));
- } else if (skb->protocol == htons(ETH_P_IPV6)) {
- IP6_ECN_set_ce(ipv6_hdr(skb));
- }
- }
}
static inline u8
@@ -600,9 +615,10 @@ static int ipgre_rcv(struct sk_buff *skb)
struct ip_tunnel *tunnel;
int offset = 4;
__be16 gre_proto;
+ int err;
if (!pskb_may_pull(skb, 16))
- goto drop_nolock;
+ goto drop;
iph = ip_hdr(skb);
h = skb->data;
@@ -613,7 +629,7 @@ static int ipgre_rcv(struct sk_buff *skb)
- We do not support routing headers.
*/
if (flags&(GRE_VERSION|GRE_ROUTING))
- goto drop_nolock;
+ goto drop;
if (flags&GRE_CSUM) {
switch (skb->ip_summed) {
@@ -641,10 +657,10 @@ static int ipgre_rcv(struct sk_buff *skb)
gre_proto = *(__be16 *)(h + 2);
- rcu_read_lock();
- if ((tunnel = ipgre_tunnel_lookup(skb->dev,
- iph->saddr, iph->daddr, key,
- gre_proto))) {
+ tunnel = ipgre_tunnel_lookup(skb->dev,
+ iph->saddr, iph->daddr, flags, key,
+ gre_proto);
+ if (tunnel) {
struct pcpu_tstats *tstats;
secpath_reset(skb);
@@ -703,27 +719,33 @@ static int ipgre_rcv(struct sk_buff *skb)
skb_postpull_rcsum(skb, eth_hdr(skb), ETH_HLEN);
}
+ __skb_tunnel_rx(skb, tunnel->dev);
+
+ skb_reset_network_header(skb);
+ err = IP_ECN_decapsulate(iph, skb);
+ if (unlikely(err)) {
+ if (log_ecn_error)
+ net_info_ratelimited("non-ECT from %pI4 with TOS=%#x\n",
+ &iph->saddr, iph->tos);
+ if (err > 1) {
+ ++tunnel->dev->stats.rx_frame_errors;
+ ++tunnel->dev->stats.rx_errors;
+ goto drop;
+ }
+ }
+
tstats = this_cpu_ptr(tunnel->dev->tstats);
u64_stats_update_begin(&tstats->syncp);
tstats->rx_packets++;
tstats->rx_bytes += skb->len;
u64_stats_update_end(&tstats->syncp);
- __skb_tunnel_rx(skb, tunnel->dev);
-
- skb_reset_network_header(skb);
- ipgre_ecn_decapsulate(iph, skb);
-
- netif_rx(skb);
-
- rcu_read_unlock();
+ gro_cells_receive(&tunnel->gro_cells, skb);
return 0;
}
icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PORT_UNREACH, 0);
drop:
- rcu_read_unlock();
-drop_nolock:
kfree_skb(skb);
return 0;
}
@@ -745,6 +767,10 @@ static netdev_tx_t ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev
__be32 dst;
int mtu;
+ if (skb->ip_summed == CHECKSUM_PARTIAL &&
+ skb_checksum_help(skb))
+ goto tx_error;
+
if (dev->type == ARPHRD_ETHER)
IPCB(skb)->flags = 0;
@@ -1292,10 +1318,18 @@ static const struct net_device_ops ipgre_netdev_ops = {
static void ipgre_dev_free(struct net_device *dev)
{
+ struct ip_tunnel *tunnel = netdev_priv(dev);
+
+ gro_cells_destroy(&tunnel->gro_cells);
free_percpu(dev->tstats);
free_netdev(dev);
}
+#define GRE_FEATURES (NETIF_F_SG | \
+ NETIF_F_FRAGLIST | \
+ NETIF_F_HIGHDMA | \
+ NETIF_F_HW_CSUM)
+
static void ipgre_tunnel_setup(struct net_device *dev)
{
dev->netdev_ops = &ipgre_netdev_ops;
@@ -1309,12 +1343,16 @@ static void ipgre_tunnel_setup(struct net_device *dev)
dev->addr_len = 4;
dev->features |= NETIF_F_NETNS_LOCAL;
dev->priv_flags &= ~IFF_XMIT_DST_RELEASE;
+
+ dev->features |= GRE_FEATURES;
+ dev->hw_features |= GRE_FEATURES;
}
static int ipgre_tunnel_init(struct net_device *dev)
{
struct ip_tunnel *tunnel;
struct iphdr *iph;
+ int err;
tunnel = netdev_priv(dev);
iph = &tunnel->parms.iph;
@@ -1341,6 +1379,12 @@ static int ipgre_tunnel_init(struct net_device *dev)
if (!dev->tstats)
return -ENOMEM;
+ err = gro_cells_init(&tunnel->gro_cells, dev);
+ if (err) {
+ free_percpu(dev->tstats);
+ return err;
+ }
+
return 0;
}
diff --git a/net/ipv4/ip_input.c b/net/ipv4/ip_input.c
index 93134b0eab0c..f1395a6fb35f 100644
--- a/net/ipv4/ip_input.c
+++ b/net/ipv4/ip_input.c
@@ -314,6 +314,7 @@ drop:
}
int sysctl_ip_early_demux __read_mostly = 1;
+EXPORT_SYMBOL(sysctl_ip_early_demux);
static int ip_rcv_finish(struct sk_buff *skb)
{
@@ -324,14 +325,12 @@ static int ip_rcv_finish(struct sk_buff *skb)
const struct net_protocol *ipprot;
int protocol = iph->protocol;
- rcu_read_lock();
ipprot = rcu_dereference(inet_protos[protocol]);
if (ipprot && ipprot->early_demux) {
ipprot->early_demux(skb);
/* must reload iph, skb->head might have changed */
iph = ip_hdr(skb);
}
- rcu_read_unlock();
}
/*
@@ -339,8 +338,8 @@ static int ip_rcv_finish(struct sk_buff *skb)
* how the packet travels inside Linux networking.
*/
if (!skb_dst(skb)) {
- int err = ip_route_input(skb, iph->daddr, iph->saddr,
- iph->tos, skb->dev);
+ int err = ip_route_input_noref(skb, iph->daddr, iph->saddr,
+ iph->tos, skb->dev);
if (unlikely(err)) {
if (err == -EXDEV)
NET_INC_STATS_BH(dev_net(skb->dev),
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index ba39a52d18c1..24a29a39e9a8 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -197,7 +197,7 @@ static inline int ip_finish_output2(struct sk_buff *skb)
neigh = __ipv4_neigh_lookup_noref(dev, nexthop);
if (unlikely(!neigh))
neigh = __neigh_create(&arp_tbl, &nexthop, dev, false);
- if (neigh) {
+ if (!IS_ERR(neigh)) {
int res = dst_neigh_output(dst, neigh, skb);
rcu_read_unlock_bh();
@@ -467,7 +467,9 @@ int ip_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *))
iph = ip_hdr(skb);
- if (unlikely((iph->frag_off & htons(IP_DF)) && !skb->local_df)) {
+ if (unlikely(((iph->frag_off & htons(IP_DF)) && !skb->local_df) ||
+ (IPCB(skb)->frag_max_size &&
+ IPCB(skb)->frag_max_size > dst_mtu(&rt->dst)))) {
IP_INC_STATS(dev_net(dev), IPSTATS_MIB_FRAGFAILS);
icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED,
htonl(ip_skb_dst_mtu(skb)));
@@ -791,6 +793,7 @@ static int __ip_append_data(struct sock *sk,
struct flowi4 *fl4,
struct sk_buff_head *queue,
struct inet_cork *cork,
+ struct page_frag *pfrag,
int getfrag(void *from, char *to, int offset,
int len, int odd, struct sk_buff *skb),
void *from, int length, int transhdrlen,
@@ -985,47 +988,30 @@ alloc_new_skb:
}
} else {
int i = skb_shinfo(skb)->nr_frags;
- skb_frag_t *frag = &skb_shinfo(skb)->frags[i-1];
- struct page *page = cork->page;
- int off = cork->off;
- unsigned int left;
-
- if (page && (left = PAGE_SIZE - off) > 0) {
- if (copy >= left)
- copy = left;
- if (page != skb_frag_page(frag)) {
- if (i == MAX_SKB_FRAGS) {
- err = -EMSGSIZE;
- goto error;
- }
- skb_fill_page_desc(skb, i, page, off, 0);
- skb_frag_ref(skb, i);
- frag = &skb_shinfo(skb)->frags[i];
- }
- } else if (i < MAX_SKB_FRAGS) {
- if (copy > PAGE_SIZE)
- copy = PAGE_SIZE;
- page = alloc_pages(sk->sk_allocation, 0);
- if (page == NULL) {
- err = -ENOMEM;
- goto error;
- }
- cork->page = page;
- cork->off = 0;
- skb_fill_page_desc(skb, i, page, 0, 0);
- frag = &skb_shinfo(skb)->frags[i];
- } else {
- err = -EMSGSIZE;
- goto error;
- }
- if (getfrag(from, skb_frag_address(frag)+skb_frag_size(frag),
- offset, copy, skb->len, skb) < 0) {
- err = -EFAULT;
+ err = -ENOMEM;
+ if (!sk_page_frag_refill(sk, pfrag))
goto error;
+
+ if (!skb_can_coalesce(skb, i, pfrag->page,
+ pfrag->offset)) {
+ err = -EMSGSIZE;
+ if (i == MAX_SKB_FRAGS)
+ goto error;
+
+ __skb_fill_page_desc(skb, i, pfrag->page,
+ pfrag->offset, 0);
+ skb_shinfo(skb)->nr_frags = ++i;
+ get_page(pfrag->page);
}
- cork->off += copy;
- skb_frag_size_add(frag, copy);
+ copy = min_t(int, copy, pfrag->size - pfrag->offset);
+ if (getfrag(from,
+ page_address(pfrag->page) + pfrag->offset,
+ offset, copy, skb->len, skb) < 0)
+ goto error_efault;
+
+ pfrag->offset += copy;
+ skb_frag_size_add(&skb_shinfo(skb)->frags[i - 1], copy);
skb->len += copy;
skb->data_len += copy;
skb->truesize += copy;
@@ -1037,6 +1023,8 @@ alloc_new_skb:
return 0;
+error_efault:
+ err = -EFAULT;
error:
cork->length -= length;
IP_INC_STATS(sock_net(sk), IPSTATS_MIB_OUTDISCARDS);
@@ -1077,8 +1065,6 @@ static int ip_setup_cork(struct sock *sk, struct inet_cork *cork,
cork->dst = &rt->dst;
cork->length = 0;
cork->tx_flags = ipc->tx_flags;
- cork->page = NULL;
- cork->off = 0;
return 0;
}
@@ -1115,7 +1101,8 @@ int ip_append_data(struct sock *sk, struct flowi4 *fl4,
transhdrlen = 0;
}
- return __ip_append_data(sk, fl4, &sk->sk_write_queue, &inet->cork.base, getfrag,
+ return __ip_append_data(sk, fl4, &sk->sk_write_queue, &inet->cork.base,
+ sk_page_frag(sk), getfrag,
from, length, transhdrlen, flags);
}
@@ -1338,10 +1325,10 @@ struct sk_buff *__ip_make_skb(struct sock *sk,
iph->ihl = 5;
iph->tos = inet->tos;
iph->frag_off = df;
- ip_select_ident(iph, &rt->dst, sk);
iph->ttl = ttl;
iph->protocol = sk->sk_protocol;
ip_copy_addrs(iph, fl4);
+ ip_select_ident(iph, &rt->dst, sk);
if (opt) {
iph->ihl += opt->optlen>>2;
@@ -1366,9 +1353,8 @@ out:
return skb;
}
-int ip_send_skb(struct sk_buff *skb)
+int ip_send_skb(struct net *net, struct sk_buff *skb)
{
- struct net *net = sock_net(skb->sk);
int err;
err = ip_local_out(skb);
@@ -1391,7 +1377,7 @@ int ip_push_pending_frames(struct sock *sk, struct flowi4 *fl4)
return 0;
/* Netfilter gets whole the not fragmented skb. */
- return ip_send_skb(skb);
+ return ip_send_skb(sock_net(sk), skb);
}
/*
@@ -1438,7 +1424,8 @@ struct sk_buff *ip_make_skb(struct sock *sk,
if (err)
return ERR_PTR(err);
- err = __ip_append_data(sk, fl4, &queue, &cork, getfrag,
+ err = __ip_append_data(sk, fl4, &queue, &cork,
+ &current->task_frag, getfrag,
from, length, transhdrlen, flags);
if (err) {
__ip_flush_pending_frames(sk, &queue, &cork);
@@ -1536,6 +1523,7 @@ void ip_send_unicast_reply(struct net *net, struct sk_buff *skb, __be32 daddr,
arg->csumoffset) = csum_fold(csum_add(nskb->csum,
arg->csum));
nskb->ip_summed = CHECKSUM_NONE;
+ skb_orphan(nskb);
skb_set_queue_mapping(nskb, skb_get_queue_mapping(skb));
ip_push_pending_frames(sk, &fl4);
}
diff --git a/net/ipv4/ip_vti.c b/net/ipv4/ip_vti.c
index 3511ffba7bd4..978bca4818ae 100644
--- a/net/ipv4/ip_vti.c
+++ b/net/ipv4/ip_vti.c
@@ -304,7 +304,6 @@ static int vti_err(struct sk_buff *skb, u32 info)
err = -ENOENT;
- rcu_read_lock();
t = vti_tunnel_lookup(dev_net(skb->dev), iph->daddr, iph->saddr);
if (t == NULL)
goto out;
@@ -326,7 +325,6 @@ static int vti_err(struct sk_buff *skb, u32 info)
t->err_count = 1;
t->err_time = jiffies;
out:
- rcu_read_unlock();
return err;
}
@@ -336,7 +334,6 @@ static int vti_rcv(struct sk_buff *skb)
struct ip_tunnel *tunnel;
const struct iphdr *iph = ip_hdr(skb);
- rcu_read_lock();
tunnel = vti_tunnel_lookup(dev_net(skb->dev), iph->saddr, iph->daddr);
if (tunnel != NULL) {
struct pcpu_tstats *tstats;
@@ -348,10 +345,8 @@ static int vti_rcv(struct sk_buff *skb)
u64_stats_update_end(&tstats->syncp);
skb->dev = tunnel->dev;
- rcu_read_unlock();
return 1;
}
- rcu_read_unlock();
return -1;
}
diff --git a/net/ipv4/ipconfig.c b/net/ipv4/ipconfig.c
index 67e8a6b086ea..798358b10717 100644
--- a/net/ipv4/ipconfig.c
+++ b/net/ipv4/ipconfig.c
@@ -583,6 +583,17 @@ static void __init ic_rarp_send_if(struct ic_device *d)
#endif
/*
+ * Predefine Nameservers
+ */
+static inline void __init ic_nameservers_predef(void)
+{
+ int i;
+
+ for (i = 0; i < CONF_NAMESERVERS_MAX; i++)
+ ic_nameservers[i] = NONE;
+}
+
+/*
* DHCP/BOOTP support.
*/
@@ -747,10 +758,7 @@ static void __init ic_bootp_init_ext(u8 *e)
*/
static inline void __init ic_bootp_init(void)
{
- int i;
-
- for (i = 0; i < CONF_NAMESERVERS_MAX; i++)
- ic_nameservers[i] = NONE;
+ ic_nameservers_predef();
dev_add_pack(&bootp_packet_type);
}
@@ -1379,6 +1387,7 @@ static int __init ip_auto_config(void)
int retries = CONF_OPEN_RETRIES;
#endif
int err;
+ unsigned int i;
#ifdef CONFIG_PROC_FS
proc_net_fops_create(&init_net, "pnp", S_IRUGO, &pnp_seq_fops);
@@ -1499,7 +1508,15 @@ static int __init ip_auto_config(void)
&ic_servaddr, &root_server_addr, root_server_path);
if (ic_dev_mtu)
pr_cont(", mtu=%d", ic_dev_mtu);
- pr_cont("\n");
+ for (i = 0; i < CONF_NAMESERVERS_MAX; i++)
+ if (ic_nameservers[i] != NONE) {
+ pr_info(" nameserver%u=%pI4",
+ i, &ic_nameservers[i]);
+ break;
+ }
+ for (i++; i < CONF_NAMESERVERS_MAX; i++)
+ if (ic_nameservers[i] != NONE)
+ pr_cont(", nameserver%u=%pI4\n", i, &ic_nameservers[i]);
#endif /* !SILENT */
return 0;
@@ -1570,6 +1587,8 @@ static int __init ip_auto_config_setup(char *addrs)
return 1;
}
+ ic_nameservers_predef();
+
/* Parse string for static IP assignment. */
ip = addrs;
while (ip && *ip) {
@@ -1613,6 +1632,20 @@ static int __init ip_auto_config_setup(char *addrs)
ic_enable = 0;
}
break;
+ case 7:
+ if (CONF_NAMESERVERS_MAX >= 1) {
+ ic_nameservers[0] = in_aton(ip);
+ if (ic_nameservers[0] == ANY)
+ ic_nameservers[0] = NONE;
+ }
+ break;
+ case 8:
+ if (CONF_NAMESERVERS_MAX >= 2) {
+ ic_nameservers[1] = in_aton(ip);
+ if (ic_nameservers[1] == ANY)
+ ic_nameservers[1] = NONE;
+ }
+ break;
}
}
ip = cp;
diff --git a/net/ipv4/ipip.c b/net/ipv4/ipip.c
index 99af1f0cc658..e15b45297c09 100644
--- a/net/ipv4/ipip.c
+++ b/net/ipv4/ipip.c
@@ -120,6 +120,10 @@
#define HASH_SIZE 16
#define HASH(addr) (((__force u32)addr^((__force u32)addr>>4))&0xF)
+static bool log_ecn_error = true;
+module_param(log_ecn_error, bool, 0644);
+MODULE_PARM_DESC(log_ecn_error, "Log packets received with corrupted ECN");
+
static int ipip_net_id __read_mostly;
struct ipip_net {
struct ip_tunnel __rcu *tunnels_r_l[HASH_SIZE];
@@ -365,8 +369,6 @@ static int ipip_err(struct sk_buff *skb, u32 info)
}
err = -ENOENT;
-
- rcu_read_lock();
t = ipip_tunnel_lookup(dev_net(skb->dev), iph->daddr, iph->saddr);
if (t == NULL)
goto out;
@@ -398,34 +400,22 @@ static int ipip_err(struct sk_buff *skb, u32 info)
t->err_count = 1;
t->err_time = jiffies;
out:
- rcu_read_unlock();
- return err;
-}
-
-static inline void ipip_ecn_decapsulate(const struct iphdr *outer_iph,
- struct sk_buff *skb)
-{
- struct iphdr *inner_iph = ip_hdr(skb);
- if (INET_ECN_is_ce(outer_iph->tos))
- IP_ECN_set_ce(inner_iph);
+ return err;
}
static int ipip_rcv(struct sk_buff *skb)
{
struct ip_tunnel *tunnel;
const struct iphdr *iph = ip_hdr(skb);
+ int err;
- rcu_read_lock();
tunnel = ipip_tunnel_lookup(dev_net(skb->dev), iph->saddr, iph->daddr);
if (tunnel != NULL) {
struct pcpu_tstats *tstats;
- if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) {
- rcu_read_unlock();
- kfree_skb(skb);
- return 0;
- }
+ if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb))
+ goto drop;
secpath_reset(skb);
@@ -434,24 +424,35 @@ static int ipip_rcv(struct sk_buff *skb)
skb->protocol = htons(ETH_P_IP);
skb->pkt_type = PACKET_HOST;
+ __skb_tunnel_rx(skb, tunnel->dev);
+
+ err = IP_ECN_decapsulate(iph, skb);
+ if (unlikely(err)) {
+ if (log_ecn_error)
+ net_info_ratelimited("non-ECT from %pI4 with TOS=%#x\n",
+ &iph->saddr, iph->tos);
+ if (err > 1) {
+ ++tunnel->dev->stats.rx_frame_errors;
+ ++tunnel->dev->stats.rx_errors;
+ goto drop;
+ }
+ }
+
tstats = this_cpu_ptr(tunnel->dev->tstats);
u64_stats_update_begin(&tstats->syncp);
tstats->rx_packets++;
tstats->rx_bytes += skb->len;
u64_stats_update_end(&tstats->syncp);
- __skb_tunnel_rx(skb, tunnel->dev);
-
- ipip_ecn_decapsulate(iph, skb);
-
netif_rx(skb);
-
- rcu_read_unlock();
return 0;
}
- rcu_read_unlock();
return -1;
+
+drop:
+ kfree_skb(skb);
+ return 0;
}
/*
diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c
index 8eec8f4a0536..1daa95c2a0ba 100644
--- a/net/ipv4/ipmr.c
+++ b/net/ipv4/ipmr.c
@@ -124,6 +124,8 @@ static DEFINE_SPINLOCK(mfc_unres_lock);
static struct kmem_cache *mrt_cachep __read_mostly;
static struct mr_table *ipmr_new_table(struct net *net, u32 id);
+static void ipmr_free_table(struct mr_table *mrt);
+
static int ip_mr_forward(struct net *net, struct mr_table *mrt,
struct sk_buff *skb, struct mfc_cache *cache,
int local);
@@ -131,6 +133,7 @@ static int ipmr_cache_report(struct mr_table *mrt,
struct sk_buff *pkt, vifi_t vifi, int assert);
static int __ipmr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb,
struct mfc_cache *c, struct rtmsg *rtm);
+static void mroute_clean_tables(struct mr_table *mrt);
static void ipmr_expire_process(unsigned long arg);
#ifdef CONFIG_IP_MROUTE_MULTIPLE_TABLES
@@ -271,7 +274,7 @@ static void __net_exit ipmr_rules_exit(struct net *net)
list_for_each_entry_safe(mrt, next, &net->ipv4.mr_tables, list) {
list_del(&mrt->list);
- kfree(mrt);
+ ipmr_free_table(mrt);
}
fib_rules_unregister(net->ipv4.mr_rules_ops);
}
@@ -299,7 +302,7 @@ static int __net_init ipmr_rules_init(struct net *net)
static void __net_exit ipmr_rules_exit(struct net *net)
{
- kfree(net->ipv4.mrt);
+ ipmr_free_table(net->ipv4.mrt);
}
#endif
@@ -336,6 +339,13 @@ static struct mr_table *ipmr_new_table(struct net *net, u32 id)
return mrt;
}
+static void ipmr_free_table(struct mr_table *mrt)
+{
+ del_timer_sync(&mrt->ipmr_expire_timer);
+ mroute_clean_tables(mrt);
+ kfree(mrt);
+}
+
/* Service routines creating virtual interfaces: DVMRP tunnels and PIMREG */
static void ipmr_del_tunnel(struct net_device *dev, struct vifctl *v)
@@ -616,7 +626,7 @@ static void ipmr_destroy_unres(struct mr_table *mrt, struct mfc_cache *c)
e->error = -ETIMEDOUT;
memset(&e->msg, 0, sizeof(e->msg));
- rtnl_unicast(skb, net, NETLINK_CB(skb).pid);
+ rtnl_unicast(skb, net, NETLINK_CB(skb).portid);
} else {
kfree_skb(skb);
}
@@ -860,7 +870,7 @@ static void ipmr_cache_resolve(struct net *net, struct mr_table *mrt,
memset(&e->msg, 0, sizeof(e->msg));
}
- rtnl_unicast(skb, net, NETLINK_CB(skb).pid);
+ rtnl_unicast(skb, net, NETLINK_CB(skb).portid);
} else {
ip_mr_forward(net, mrt, skb, c, 0);
}
@@ -1798,7 +1808,7 @@ static struct mr_table *ipmr_rt_fib_lookup(struct net *net, struct sk_buff *skb)
.flowi4_oif = (rt_is_output_route(rt) ?
skb->dev->ifindex : 0),
.flowi4_iif = (rt_is_output_route(rt) ?
- net->loopback_dev->ifindex :
+ LOOPBACK_IFINDEX :
skb->dev->ifindex),
.flowi4_mark = skb->mark,
};
@@ -2107,12 +2117,12 @@ int ipmr_get_route(struct net *net, struct sk_buff *skb,
}
static int ipmr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb,
- u32 pid, u32 seq, struct mfc_cache *c)
+ u32 portid, u32 seq, struct mfc_cache *c)
{
struct nlmsghdr *nlh;
struct rtmsg *rtm;
- nlh = nlmsg_put(skb, pid, seq, RTM_NEWROUTE, sizeof(*rtm), NLM_F_MULTI);
+ nlh = nlmsg_put(skb, portid, seq, RTM_NEWROUTE, sizeof(*rtm), NLM_F_MULTI);
if (nlh == NULL)
return -EMSGSIZE;
@@ -2166,7 +2176,7 @@ static int ipmr_rtm_dumproute(struct sk_buff *skb, struct netlink_callback *cb)
if (e < s_e)
goto next_entry;
if (ipmr_fill_mroute(mrt, skb,
- NETLINK_CB(cb->skb).pid,
+ NETLINK_CB(cb->skb).portid,
cb->nlh->nlmsg_seq,
mfc) < 0)
goto done;
diff --git a/net/ipv4/netfilter.c b/net/ipv4/netfilter.c
index ed1b36783192..4c0cf63dd92e 100644
--- a/net/ipv4/netfilter.c
+++ b/net/ipv4/netfilter.c
@@ -72,43 +72,6 @@ int ip_route_me_harder(struct sk_buff *skb, unsigned int addr_type)
}
EXPORT_SYMBOL(ip_route_me_harder);
-#ifdef CONFIG_XFRM
-int ip_xfrm_me_harder(struct sk_buff *skb)
-{
- struct flowi fl;
- unsigned int hh_len;
- struct dst_entry *dst;
-
- if (IPCB(skb)->flags & IPSKB_XFRM_TRANSFORMED)
- return 0;
- if (xfrm_decode_session(skb, &fl, AF_INET) < 0)
- return -1;
-
- dst = skb_dst(skb);
- if (dst->xfrm)
- dst = ((struct xfrm_dst *)dst)->route;
- dst_hold(dst);
-
- dst = xfrm_lookup(dev_net(dst->dev), dst, &fl, skb->sk, 0);
- if (IS_ERR(dst))
- return -1;
-
- skb_dst_drop(skb);
- skb_dst_set(skb, dst);
-
- /* Change in oif may mean change in hh_len. */
- hh_len = skb_dst(skb)->dev->hard_header_len;
- if (skb_headroom(skb) < hh_len &&
- pskb_expand_head(skb, hh_len - skb_headroom(skb), 0, GFP_ATOMIC))
- return -1;
- return 0;
-}
-EXPORT_SYMBOL(ip_xfrm_me_harder);
-#endif
-
-void (*ip_nat_decode_session)(struct sk_buff *, struct flowi *);
-EXPORT_SYMBOL(ip_nat_decode_session);
-
/*
* Extra routing may needed on local out, as the QUEUE target never
* returns control to the table.
@@ -225,12 +188,12 @@ static const struct nf_afinfo nf_ip_afinfo = {
.route_key_size = sizeof(struct ip_rt_info),
};
-static int ipv4_netfilter_init(void)
+static int __init ipv4_netfilter_init(void)
{
return nf_register_afinfo(&nf_ip_afinfo);
}
-static void ipv4_netfilter_fini(void)
+static void __exit ipv4_netfilter_fini(void)
{
nf_unregister_afinfo(&nf_ip_afinfo);
}
diff --git a/net/ipv4/netfilter/Kconfig b/net/ipv4/netfilter/Kconfig
index fcc543cd987a..d8d6f2a5bf12 100644
--- a/net/ipv4/netfilter/Kconfig
+++ b/net/ipv4/netfilter/Kconfig
@@ -143,25 +143,22 @@ config IP_NF_TARGET_ULOG
To compile it as a module, choose M here. If unsure, say N.
# NAT + specific targets: nf_conntrack
-config NF_NAT
- tristate "Full NAT"
+config NF_NAT_IPV4
+ tristate "IPv4 NAT"
depends on NF_CONNTRACK_IPV4
default m if NETFILTER_ADVANCED=n
+ select NF_NAT
help
- The Full NAT option allows masquerading, port forwarding and other
+ The IPv4 NAT option allows masquerading, port forwarding and other
forms of full Network Address Port Translation. It is controlled by
the `nat' table in iptables: see the man page for iptables(8).
To compile it as a module, choose M here. If unsure, say N.
-config NF_NAT_NEEDED
- bool
- depends on NF_NAT
- default y
+if NF_NAT_IPV4
config IP_NF_TARGET_MASQUERADE
tristate "MASQUERADE target support"
- depends on NF_NAT
default m if NETFILTER_ADVANCED=n
help
Masquerading is a special case of NAT: all outgoing connections are
@@ -174,30 +171,27 @@ config IP_NF_TARGET_MASQUERADE
config IP_NF_TARGET_NETMAP
tristate "NETMAP target support"
- depends on NF_NAT
depends on NETFILTER_ADVANCED
- help
- NETMAP is an implementation of static 1:1 NAT mapping of network
- addresses. It maps the network address part, while keeping the host
- address part intact.
-
- To compile it as a module, choose M here. If unsure, say N.
+ select NETFILTER_XT_TARGET_NETMAP
+ ---help---
+ This is a backwards-compat option for the user's convenience
+ (e.g. when running oldconfig). It selects
+ CONFIG_NETFILTER_XT_TARGET_NETMAP.
config IP_NF_TARGET_REDIRECT
tristate "REDIRECT target support"
- depends on NF_NAT
depends on NETFILTER_ADVANCED
- help
- REDIRECT is a special case of NAT: all incoming connections are
- mapped onto the incoming interface's address, causing the packets to
- come to the local machine instead of passing through. This is
- useful for transparent proxies.
+ select NETFILTER_XT_TARGET_REDIRECT
+ ---help---
+ This is a backwards-compat option for the user's convenience
+ (e.g. when running oldconfig). It selects
+ CONFIG_NETFILTER_XT_TARGET_REDIRECT.
- To compile it as a module, choose M here. If unsure, say N.
+endif
config NF_NAT_SNMP_BASIC
tristate "Basic SNMP-ALG support"
- depends on NF_CONNTRACK_SNMP && NF_NAT
+ depends on NF_CONNTRACK_SNMP && NF_NAT_IPV4
depends on NETFILTER_ADVANCED
default NF_NAT && NF_CONNTRACK_SNMP
---help---
@@ -219,61 +213,21 @@ config NF_NAT_SNMP_BASIC
# <expr> '&&' <expr> (6)
#
# (6) Returns the result of min(/expr/, /expr/).
-config NF_NAT_PROTO_DCCP
- tristate
- depends on NF_NAT && NF_CT_PROTO_DCCP
- default NF_NAT && NF_CT_PROTO_DCCP
config NF_NAT_PROTO_GRE
tristate
- depends on NF_NAT && NF_CT_PROTO_GRE
-
-config NF_NAT_PROTO_UDPLITE
- tristate
- depends on NF_NAT && NF_CT_PROTO_UDPLITE
- default NF_NAT && NF_CT_PROTO_UDPLITE
-
-config NF_NAT_PROTO_SCTP
- tristate
- default NF_NAT && NF_CT_PROTO_SCTP
- depends on NF_NAT && NF_CT_PROTO_SCTP
- select LIBCRC32C
-
-config NF_NAT_FTP
- tristate
- depends on NF_CONNTRACK && NF_NAT
- default NF_NAT && NF_CONNTRACK_FTP
-
-config NF_NAT_IRC
- tristate
- depends on NF_CONNTRACK && NF_NAT
- default NF_NAT && NF_CONNTRACK_IRC
-
-config NF_NAT_TFTP
- tristate
- depends on NF_CONNTRACK && NF_NAT
- default NF_NAT && NF_CONNTRACK_TFTP
-
-config NF_NAT_AMANDA
- tristate
- depends on NF_CONNTRACK && NF_NAT
- default NF_NAT && NF_CONNTRACK_AMANDA
+ depends on NF_NAT_IPV4 && NF_CT_PROTO_GRE
config NF_NAT_PPTP
tristate
- depends on NF_CONNTRACK && NF_NAT
- default NF_NAT && NF_CONNTRACK_PPTP
+ depends on NF_CONNTRACK && NF_NAT_IPV4
+ default NF_NAT_IPV4 && NF_CONNTRACK_PPTP
select NF_NAT_PROTO_GRE
config NF_NAT_H323
tristate
- depends on NF_CONNTRACK && NF_NAT
- default NF_NAT && NF_CONNTRACK_H323
-
-config NF_NAT_SIP
- tristate
- depends on NF_CONNTRACK && NF_NAT
- default NF_NAT && NF_CONNTRACK_SIP
+ depends on NF_CONNTRACK && NF_NAT_IPV4
+ default NF_NAT_IPV4 && NF_CONNTRACK_H323
# mangle + specific targets
config IP_NF_MANGLE
diff --git a/net/ipv4/netfilter/Makefile b/net/ipv4/netfilter/Makefile
index c20674dc9452..007b128eecc9 100644
--- a/net/ipv4/netfilter/Makefile
+++ b/net/ipv4/netfilter/Makefile
@@ -10,32 +10,22 @@ nf_conntrack_ipv4-objs += nf_conntrack_l3proto_ipv4_compat.o
endif
endif
-nf_nat-y := nf_nat_core.o nf_nat_helper.o nf_nat_proto_unknown.o nf_nat_proto_common.o nf_nat_proto_tcp.o nf_nat_proto_udp.o nf_nat_proto_icmp.o
-iptable_nat-y := nf_nat_rule.o nf_nat_standalone.o
-
# connection tracking
obj-$(CONFIG_NF_CONNTRACK_IPV4) += nf_conntrack_ipv4.o
-obj-$(CONFIG_NF_NAT) += nf_nat.o
+nf_nat_ipv4-y := nf_nat_l3proto_ipv4.o nf_nat_proto_icmp.o
+obj-$(CONFIG_NF_NAT_IPV4) += nf_nat_ipv4.o
# defrag
obj-$(CONFIG_NF_DEFRAG_IPV4) += nf_defrag_ipv4.o
# NAT helpers (nf_conntrack)
-obj-$(CONFIG_NF_NAT_AMANDA) += nf_nat_amanda.o
-obj-$(CONFIG_NF_NAT_FTP) += nf_nat_ftp.o
obj-$(CONFIG_NF_NAT_H323) += nf_nat_h323.o
-obj-$(CONFIG_NF_NAT_IRC) += nf_nat_irc.o
obj-$(CONFIG_NF_NAT_PPTP) += nf_nat_pptp.o
-obj-$(CONFIG_NF_NAT_SIP) += nf_nat_sip.o
obj-$(CONFIG_NF_NAT_SNMP_BASIC) += nf_nat_snmp_basic.o
-obj-$(CONFIG_NF_NAT_TFTP) += nf_nat_tftp.o
# NAT protocols (nf_nat)
-obj-$(CONFIG_NF_NAT_PROTO_DCCP) += nf_nat_proto_dccp.o
obj-$(CONFIG_NF_NAT_PROTO_GRE) += nf_nat_proto_gre.o
-obj-$(CONFIG_NF_NAT_PROTO_UDPLITE) += nf_nat_proto_udplite.o
-obj-$(CONFIG_NF_NAT_PROTO_SCTP) += nf_nat_proto_sctp.o
# generic IP tables
obj-$(CONFIG_IP_NF_IPTABLES) += ip_tables.o
@@ -43,7 +33,7 @@ obj-$(CONFIG_IP_NF_IPTABLES) += ip_tables.o
# the three instances of ip_tables
obj-$(CONFIG_IP_NF_FILTER) += iptable_filter.o
obj-$(CONFIG_IP_NF_MANGLE) += iptable_mangle.o
-obj-$(CONFIG_NF_NAT) += iptable_nat.o
+obj-$(CONFIG_NF_NAT_IPV4) += iptable_nat.o
obj-$(CONFIG_IP_NF_RAW) += iptable_raw.o
obj-$(CONFIG_IP_NF_SECURITY) += iptable_security.o
@@ -55,8 +45,6 @@ obj-$(CONFIG_IP_NF_MATCH_RPFILTER) += ipt_rpfilter.o
obj-$(CONFIG_IP_NF_TARGET_CLUSTERIP) += ipt_CLUSTERIP.o
obj-$(CONFIG_IP_NF_TARGET_ECN) += ipt_ECN.o
obj-$(CONFIG_IP_NF_TARGET_MASQUERADE) += ipt_MASQUERADE.o
-obj-$(CONFIG_IP_NF_TARGET_NETMAP) += ipt_NETMAP.o
-obj-$(CONFIG_IP_NF_TARGET_REDIRECT) += ipt_REDIRECT.o
obj-$(CONFIG_IP_NF_TARGET_REJECT) += ipt_REJECT.o
obj-$(CONFIG_IP_NF_TARGET_ULOG) += ipt_ULOG.o
diff --git a/net/ipv4/netfilter/ipt_MASQUERADE.c b/net/ipv4/netfilter/ipt_MASQUERADE.c
index cbb6a1a6f6f7..5d5d4d1be9c2 100644
--- a/net/ipv4/netfilter/ipt_MASQUERADE.c
+++ b/net/ipv4/netfilter/ipt_MASQUERADE.c
@@ -19,9 +19,9 @@
#include <net/ip.h>
#include <net/checksum.h>
#include <net/route.h>
-#include <net/netfilter/nf_nat_rule.h>
#include <linux/netfilter_ipv4.h>
#include <linux/netfilter/x_tables.h>
+#include <net/netfilter/nf_nat.h>
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Netfilter Core Team <coreteam@netfilter.org>");
@@ -49,7 +49,7 @@ masquerade_tg(struct sk_buff *skb, const struct xt_action_param *par)
struct nf_conn *ct;
struct nf_conn_nat *nat;
enum ip_conntrack_info ctinfo;
- struct nf_nat_ipv4_range newrange;
+ struct nf_nat_range newrange;
const struct nf_nat_ipv4_multi_range_compat *mr;
const struct rtable *rt;
__be32 newsrc, nh;
@@ -80,10 +80,13 @@ masquerade_tg(struct sk_buff *skb, const struct xt_action_param *par)
nat->masq_index = par->out->ifindex;
/* Transfer from original range. */
- newrange = ((struct nf_nat_ipv4_range)
- { mr->range[0].flags | NF_NAT_RANGE_MAP_IPS,
- newsrc, newsrc,
- mr->range[0].min, mr->range[0].max });
+ memset(&newrange.min_addr, 0, sizeof(newrange.min_addr));
+ memset(&newrange.max_addr, 0, sizeof(newrange.max_addr));
+ newrange.flags = mr->range[0].flags | NF_NAT_RANGE_MAP_IPS;
+ newrange.min_addr.ip = newsrc;
+ newrange.max_addr.ip = newsrc;
+ newrange.min_proto = mr->range[0].min;
+ newrange.max_proto = mr->range[0].max;
/* Hand modified range to generic setup. */
return nf_nat_setup_info(ct, &newrange, NF_NAT_MANIP_SRC);
@@ -96,7 +99,8 @@ device_cmp(struct nf_conn *i, void *ifindex)
if (!nat)
return 0;
-
+ if (nf_ct_l3num(i) != NFPROTO_IPV4)
+ return 0;
return nat->masq_index == (int)(long)ifindex;
}
diff --git a/net/ipv4/netfilter/ipt_NETMAP.c b/net/ipv4/netfilter/ipt_NETMAP.c
deleted file mode 100644
index b5bfbbabf70d..000000000000
--- a/net/ipv4/netfilter/ipt_NETMAP.c
+++ /dev/null
@@ -1,98 +0,0 @@
-/* NETMAP - static NAT mapping of IP network addresses (1:1).
- * The mapping can be applied to source (POSTROUTING),
- * destination (PREROUTING), or both (with separate rules).
- */
-
-/* (C) 2000-2001 Svenning Soerensen <svenning@post5.tele.dk>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
-#include <linux/ip.h>
-#include <linux/module.h>
-#include <linux/netdevice.h>
-#include <linux/netfilter.h>
-#include <linux/netfilter_ipv4.h>
-#include <linux/netfilter/x_tables.h>
-#include <net/netfilter/nf_nat_rule.h>
-
-MODULE_LICENSE("GPL");
-MODULE_AUTHOR("Svenning Soerensen <svenning@post5.tele.dk>");
-MODULE_DESCRIPTION("Xtables: 1:1 NAT mapping of IPv4 subnets");
-
-static int netmap_tg_check(const struct xt_tgchk_param *par)
-{
- const struct nf_nat_ipv4_multi_range_compat *mr = par->targinfo;
-
- if (!(mr->range[0].flags & NF_NAT_RANGE_MAP_IPS)) {
- pr_debug("bad MAP_IPS.\n");
- return -EINVAL;
- }
- if (mr->rangesize != 1) {
- pr_debug("bad rangesize %u.\n", mr->rangesize);
- return -EINVAL;
- }
- return 0;
-}
-
-static unsigned int
-netmap_tg(struct sk_buff *skb, const struct xt_action_param *par)
-{
- struct nf_conn *ct;
- enum ip_conntrack_info ctinfo;
- __be32 new_ip, netmask;
- const struct nf_nat_ipv4_multi_range_compat *mr = par->targinfo;
- struct nf_nat_ipv4_range newrange;
-
- NF_CT_ASSERT(par->hooknum == NF_INET_PRE_ROUTING ||
- par->hooknum == NF_INET_POST_ROUTING ||
- par->hooknum == NF_INET_LOCAL_OUT ||
- par->hooknum == NF_INET_LOCAL_IN);
- ct = nf_ct_get(skb, &ctinfo);
-
- netmask = ~(mr->range[0].min_ip ^ mr->range[0].max_ip);
-
- if (par->hooknum == NF_INET_PRE_ROUTING ||
- par->hooknum == NF_INET_LOCAL_OUT)
- new_ip = ip_hdr(skb)->daddr & ~netmask;
- else
- new_ip = ip_hdr(skb)->saddr & ~netmask;
- new_ip |= mr->range[0].min_ip & netmask;
-
- newrange = ((struct nf_nat_ipv4_range)
- { mr->range[0].flags | NF_NAT_RANGE_MAP_IPS,
- new_ip, new_ip,
- mr->range[0].min, mr->range[0].max });
-
- /* Hand modified range to generic setup. */
- return nf_nat_setup_info(ct, &newrange, HOOK2MANIP(par->hooknum));
-}
-
-static struct xt_target netmap_tg_reg __read_mostly = {
- .name = "NETMAP",
- .family = NFPROTO_IPV4,
- .target = netmap_tg,
- .targetsize = sizeof(struct nf_nat_ipv4_multi_range_compat),
- .table = "nat",
- .hooks = (1 << NF_INET_PRE_ROUTING) |
- (1 << NF_INET_POST_ROUTING) |
- (1 << NF_INET_LOCAL_OUT) |
- (1 << NF_INET_LOCAL_IN),
- .checkentry = netmap_tg_check,
- .me = THIS_MODULE
-};
-
-static int __init netmap_tg_init(void)
-{
- return xt_register_target(&netmap_tg_reg);
-}
-
-static void __exit netmap_tg_exit(void)
-{
- xt_unregister_target(&netmap_tg_reg);
-}
-
-module_init(netmap_tg_init);
-module_exit(netmap_tg_exit);
diff --git a/net/ipv4/netfilter/ipt_REDIRECT.c b/net/ipv4/netfilter/ipt_REDIRECT.c
deleted file mode 100644
index 7c0103a5203e..000000000000
--- a/net/ipv4/netfilter/ipt_REDIRECT.c
+++ /dev/null
@@ -1,110 +0,0 @@
-/* Redirect. Simple mapping which alters dst to a local IP address. */
-/* (C) 1999-2001 Paul `Rusty' Russell
- * (C) 2002-2006 Netfilter Core Team <coreteam@netfilter.org>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
-#include <linux/types.h>
-#include <linux/ip.h>
-#include <linux/timer.h>
-#include <linux/module.h>
-#include <linux/netfilter.h>
-#include <linux/netdevice.h>
-#include <linux/if.h>
-#include <linux/inetdevice.h>
-#include <net/protocol.h>
-#include <net/checksum.h>
-#include <linux/netfilter_ipv4.h>
-#include <linux/netfilter/x_tables.h>
-#include <net/netfilter/nf_nat_rule.h>
-
-MODULE_LICENSE("GPL");
-MODULE_AUTHOR("Netfilter Core Team <coreteam@netfilter.org>");
-MODULE_DESCRIPTION("Xtables: Connection redirection to localhost");
-
-/* FIXME: Take multiple ranges --RR */
-static int redirect_tg_check(const struct xt_tgchk_param *par)
-{
- const struct nf_nat_ipv4_multi_range_compat *mr = par->targinfo;
-
- if (mr->range[0].flags & NF_NAT_RANGE_MAP_IPS) {
- pr_debug("bad MAP_IPS.\n");
- return -EINVAL;
- }
- if (mr->rangesize != 1) {
- pr_debug("bad rangesize %u.\n", mr->rangesize);
- return -EINVAL;
- }
- return 0;
-}
-
-static unsigned int
-redirect_tg(struct sk_buff *skb, const struct xt_action_param *par)
-{
- struct nf_conn *ct;
- enum ip_conntrack_info ctinfo;
- __be32 newdst;
- const struct nf_nat_ipv4_multi_range_compat *mr = par->targinfo;
- struct nf_nat_ipv4_range newrange;
-
- NF_CT_ASSERT(par->hooknum == NF_INET_PRE_ROUTING ||
- par->hooknum == NF_INET_LOCAL_OUT);
-
- ct = nf_ct_get(skb, &ctinfo);
- NF_CT_ASSERT(ct && (ctinfo == IP_CT_NEW || ctinfo == IP_CT_RELATED));
-
- /* Local packets: make them go to loopback */
- if (par->hooknum == NF_INET_LOCAL_OUT)
- newdst = htonl(0x7F000001);
- else {
- struct in_device *indev;
- struct in_ifaddr *ifa;
-
- newdst = 0;
-
- rcu_read_lock();
- indev = __in_dev_get_rcu(skb->dev);
- if (indev && (ifa = indev->ifa_list))
- newdst = ifa->ifa_local;
- rcu_read_unlock();
-
- if (!newdst)
- return NF_DROP;
- }
-
- /* Transfer from original range. */
- newrange = ((struct nf_nat_ipv4_range)
- { mr->range[0].flags | NF_NAT_RANGE_MAP_IPS,
- newdst, newdst,
- mr->range[0].min, mr->range[0].max });
-
- /* Hand modified range to generic setup. */
- return nf_nat_setup_info(ct, &newrange, NF_NAT_MANIP_DST);
-}
-
-static struct xt_target redirect_tg_reg __read_mostly = {
- .name = "REDIRECT",
- .family = NFPROTO_IPV4,
- .target = redirect_tg,
- .targetsize = sizeof(struct nf_nat_ipv4_multi_range_compat),
- .table = "nat",
- .hooks = (1 << NF_INET_PRE_ROUTING) | (1 << NF_INET_LOCAL_OUT),
- .checkentry = redirect_tg_check,
- .me = THIS_MODULE,
-};
-
-static int __init redirect_tg_init(void)
-{
- return xt_register_target(&redirect_tg_reg);
-}
-
-static void __exit redirect_tg_exit(void)
-{
- xt_unregister_target(&redirect_tg_reg);
-}
-
-module_init(redirect_tg_init);
-module_exit(redirect_tg_exit);
diff --git a/net/ipv4/netfilter/ipt_ULOG.c b/net/ipv4/netfilter/ipt_ULOG.c
index 1109f7f6c254..b5ef3cba2250 100644
--- a/net/ipv4/netfilter/ipt_ULOG.c
+++ b/net/ipv4/netfilter/ipt_ULOG.c
@@ -396,8 +396,7 @@ static int __init ulog_tg_init(void)
for (i = 0; i < ULOG_MAXNLGROUPS; i++)
setup_timer(&ulog_buffers[i].timer, ulog_timer, i);
- nflognl = netlink_kernel_create(&init_net, NETLINK_NFLOG,
- THIS_MODULE, &cfg);
+ nflognl = netlink_kernel_create(&init_net, NETLINK_NFLOG, &cfg);
if (!nflognl)
return -ENOMEM;
diff --git a/net/ipv4/netfilter/ipt_rpfilter.c b/net/ipv4/netfilter/ipt_rpfilter.c
index 31371be8174b..c30130062cd6 100644
--- a/net/ipv4/netfilter/ipt_rpfilter.c
+++ b/net/ipv4/netfilter/ipt_rpfilter.c
@@ -85,7 +85,7 @@ static bool rpfilter_mt(const struct sk_buff *skb, struct xt_action_param *par)
return ipv4_is_local_multicast(iph->daddr) ^ invert;
flow.flowi4_iif = 0;
} else {
- flow.flowi4_iif = dev_net(par->in)->loopback_dev->ifindex;
+ flow.flowi4_iif = LOOPBACK_IFINDEX;
}
flow.daddr = iph->saddr;
diff --git a/net/ipv4/netfilter/iptable_filter.c b/net/ipv4/netfilter/iptable_filter.c
index 851acec852d2..6b3da5cf54e9 100644
--- a/net/ipv4/netfilter/iptable_filter.c
+++ b/net/ipv4/netfilter/iptable_filter.c
@@ -69,9 +69,7 @@ static int __net_init iptable_filter_net_init(struct net *net)
net->ipv4.iptable_filter =
ipt_register_table(net, &packet_filter, repl);
kfree(repl);
- if (IS_ERR(net->ipv4.iptable_filter))
- return PTR_ERR(net->ipv4.iptable_filter);
- return 0;
+ return PTR_RET(net->ipv4.iptable_filter);
}
static void __net_exit iptable_filter_net_exit(struct net *net)
@@ -96,14 +94,10 @@ static int __init iptable_filter_init(void)
filter_ops = xt_hook_link(&packet_filter, iptable_filter_hook);
if (IS_ERR(filter_ops)) {
ret = PTR_ERR(filter_ops);
- goto cleanup_table;
+ unregister_pernet_subsys(&iptable_filter_net_ops);
}
return ret;
-
- cleanup_table:
- unregister_pernet_subsys(&iptable_filter_net_ops);
- return ret;
}
static void __exit iptable_filter_fini(void)
diff --git a/net/ipv4/netfilter/iptable_mangle.c b/net/ipv4/netfilter/iptable_mangle.c
index aef5d1fbe77d..85d88f206447 100644
--- a/net/ipv4/netfilter/iptable_mangle.c
+++ b/net/ipv4/netfilter/iptable_mangle.c
@@ -104,9 +104,7 @@ static int __net_init iptable_mangle_net_init(struct net *net)
net->ipv4.iptable_mangle =
ipt_register_table(net, &packet_mangler, repl);
kfree(repl);
- if (IS_ERR(net->ipv4.iptable_mangle))
- return PTR_ERR(net->ipv4.iptable_mangle);
- return 0;
+ return PTR_RET(net->ipv4.iptable_mangle);
}
static void __net_exit iptable_mangle_net_exit(struct net *net)
@@ -131,14 +129,10 @@ static int __init iptable_mangle_init(void)
mangle_ops = xt_hook_link(&packet_mangler, iptable_mangle_hook);
if (IS_ERR(mangle_ops)) {
ret = PTR_ERR(mangle_ops);
- goto cleanup_table;
+ unregister_pernet_subsys(&iptable_mangle_net_ops);
}
return ret;
-
- cleanup_table:
- unregister_pernet_subsys(&iptable_mangle_net_ops);
- return ret;
}
static void __exit iptable_mangle_fini(void)
diff --git a/net/ipv4/netfilter/nf_nat_standalone.c b/net/ipv4/netfilter/iptable_nat.c
index 3828a4229822..9e0ffaf1d942 100644
--- a/net/ipv4/netfilter/nf_nat_standalone.c
+++ b/net/ipv4/netfilter/iptable_nat.c
@@ -1,84 +1,71 @@
/* (C) 1999-2001 Paul `Rusty' Russell
* (C) 2002-2006 Netfilter Core Team <coreteam@netfilter.org>
+ * (C) 2011 Patrick McHardy <kaber@trash.net>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
* published by the Free Software Foundation.
*/
-#include <linux/types.h>
-#include <linux/icmp.h>
-#include <linux/gfp.h>
-#include <linux/ip.h>
+
+#include <linux/module.h>
#include <linux/netfilter.h>
#include <linux/netfilter_ipv4.h>
-#include <linux/module.h>
-#include <linux/skbuff.h>
-#include <linux/proc_fs.h>
+#include <linux/netfilter_ipv4/ip_tables.h>
+#include <linux/ip.h>
#include <net/ip.h>
-#include <net/checksum.h>
-#include <linux/spinlock.h>
-#include <net/netfilter/nf_conntrack.h>
-#include <net/netfilter/nf_conntrack_core.h>
-#include <net/netfilter/nf_conntrack_extend.h>
#include <net/netfilter/nf_nat.h>
-#include <net/netfilter/nf_nat_rule.h>
-#include <net/netfilter/nf_nat_protocol.h>
#include <net/netfilter/nf_nat_core.h>
-#include <net/netfilter/nf_nat_helper.h>
-#include <linux/netfilter_ipv4/ip_tables.h>
+#include <net/netfilter/nf_nat_l3proto.h>
+
+static const struct xt_table nf_nat_ipv4_table = {
+ .name = "nat",
+ .valid_hooks = (1 << NF_INET_PRE_ROUTING) |
+ (1 << NF_INET_POST_ROUTING) |
+ (1 << NF_INET_LOCAL_OUT) |
+ (1 << NF_INET_LOCAL_IN),
+ .me = THIS_MODULE,
+ .af = NFPROTO_IPV4,
+};
-#ifdef CONFIG_XFRM
-static void nat_decode_session(struct sk_buff *skb, struct flowi *fl)
+static unsigned int alloc_null_binding(struct nf_conn *ct, unsigned int hooknum)
{
- struct flowi4 *fl4 = &fl->u.ip4;
- const struct nf_conn *ct;
- const struct nf_conntrack_tuple *t;
- enum ip_conntrack_info ctinfo;
- enum ip_conntrack_dir dir;
- unsigned long statusbit;
-
- ct = nf_ct_get(skb, &ctinfo);
- if (ct == NULL)
- return;
- dir = CTINFO2DIR(ctinfo);
- t = &ct->tuplehash[dir].tuple;
-
- if (dir == IP_CT_DIR_ORIGINAL)
- statusbit = IPS_DST_NAT;
- else
- statusbit = IPS_SRC_NAT;
-
- if (ct->status & statusbit) {
- fl4->daddr = t->dst.u3.ip;
- if (t->dst.protonum == IPPROTO_TCP ||
- t->dst.protonum == IPPROTO_UDP ||
- t->dst.protonum == IPPROTO_UDPLITE ||
- t->dst.protonum == IPPROTO_DCCP ||
- t->dst.protonum == IPPROTO_SCTP)
- fl4->fl4_dport = t->dst.u.tcp.port;
- }
+ /* Force range to this IP; let proto decide mapping for
+ * per-proto parts (hence not IP_NAT_RANGE_PROTO_SPECIFIED).
+ */
+ struct nf_nat_range range;
+
+ range.flags = 0;
+ pr_debug("Allocating NULL binding for %p (%pI4)\n", ct,
+ HOOK2MANIP(hooknum) == NF_NAT_MANIP_SRC ?
+ &ct->tuplehash[IP_CT_DIR_REPLY].tuple.dst.u3.ip :
+ &ct->tuplehash[IP_CT_DIR_REPLY].tuple.src.u3.ip);
+
+ return nf_nat_setup_info(ct, &range, HOOK2MANIP(hooknum));
+}
- statusbit ^= IPS_NAT_MASK;
+static unsigned int nf_nat_rule_find(struct sk_buff *skb, unsigned int hooknum,
+ const struct net_device *in,
+ const struct net_device *out,
+ struct nf_conn *ct)
+{
+ struct net *net = nf_ct_net(ct);
+ unsigned int ret;
- if (ct->status & statusbit) {
- fl4->saddr = t->src.u3.ip;
- if (t->dst.protonum == IPPROTO_TCP ||
- t->dst.protonum == IPPROTO_UDP ||
- t->dst.protonum == IPPROTO_UDPLITE ||
- t->dst.protonum == IPPROTO_DCCP ||
- t->dst.protonum == IPPROTO_SCTP)
- fl4->fl4_sport = t->src.u.tcp.port;
+ ret = ipt_do_table(skb, hooknum, in, out, net->ipv4.nat_table);
+ if (ret == NF_ACCEPT) {
+ if (!nf_nat_initialized(ct, HOOK2MANIP(hooknum)))
+ ret = alloc_null_binding(ct, hooknum);
}
+ return ret;
}
-#endif
static unsigned int
-nf_nat_fn(unsigned int hooknum,
- struct sk_buff *skb,
- const struct net_device *in,
- const struct net_device *out,
- int (*okfn)(struct sk_buff *))
+nf_nat_ipv4_fn(unsigned int hooknum,
+ struct sk_buff *skb,
+ const struct net_device *in,
+ const struct net_device *out,
+ int (*okfn)(struct sk_buff *))
{
struct nf_conn *ct;
enum ip_conntrack_info ctinfo;
@@ -87,14 +74,16 @@ nf_nat_fn(unsigned int hooknum,
enum nf_nat_manip_type maniptype = HOOK2MANIP(hooknum);
/* We never see fragments: conntrack defrags on pre-routing
- and local-out, and nf_nat_out protects post-routing. */
+ * and local-out, and nf_nat_out protects post-routing.
+ */
NF_CT_ASSERT(!ip_is_fragment(ip_hdr(skb)));
ct = nf_ct_get(skb, &ctinfo);
/* Can't track? It's not due to stress, or conntrack would
- have dropped it. Hence it's the user's responsibilty to
- packet filter it out, or implement conntrack/NAT for that
- protocol. 8) --RR */
+ * have dropped it. Hence it's the user's responsibilty to
+ * packet filter it out, or implement conntrack/NAT for that
+ * protocol. 8) --RR
+ */
if (!ct)
return NF_ACCEPT;
@@ -118,17 +107,17 @@ nf_nat_fn(unsigned int hooknum,
case IP_CT_RELATED:
case IP_CT_RELATED_REPLY:
if (ip_hdr(skb)->protocol == IPPROTO_ICMP) {
- if (!nf_nat_icmp_reply_translation(ct, ctinfo,
- hooknum, skb))
+ if (!nf_nat_icmp_reply_translation(skb, ct, ctinfo,
+ hooknum))
return NF_DROP;
else
return NF_ACCEPT;
}
/* Fall thru... (Only ICMPs can be IP_CT_IS_REPLY) */
case IP_CT_NEW:
-
/* Seen it before? This can happen for loopback, retrans,
- or local packets.. */
+ * or local packets.
+ */
if (!nf_nat_initialized(ct, maniptype)) {
unsigned int ret;
@@ -151,16 +140,16 @@ nf_nat_fn(unsigned int hooknum,
}
static unsigned int
-nf_nat_in(unsigned int hooknum,
- struct sk_buff *skb,
- const struct net_device *in,
- const struct net_device *out,
- int (*okfn)(struct sk_buff *))
+nf_nat_ipv4_in(unsigned int hooknum,
+ struct sk_buff *skb,
+ const struct net_device *in,
+ const struct net_device *out,
+ int (*okfn)(struct sk_buff *))
{
unsigned int ret;
__be32 daddr = ip_hdr(skb)->daddr;
- ret = nf_nat_fn(hooknum, skb, in, out, okfn);
+ ret = nf_nat_ipv4_fn(hooknum, skb, in, out, okfn);
if (ret != NF_DROP && ret != NF_STOLEN &&
daddr != ip_hdr(skb)->daddr)
skb_dst_drop(skb);
@@ -169,11 +158,11 @@ nf_nat_in(unsigned int hooknum,
}
static unsigned int
-nf_nat_out(unsigned int hooknum,
- struct sk_buff *skb,
- const struct net_device *in,
- const struct net_device *out,
- int (*okfn)(struct sk_buff *))
+nf_nat_ipv4_out(unsigned int hooknum,
+ struct sk_buff *skb,
+ const struct net_device *in,
+ const struct net_device *out,
+ int (*okfn)(struct sk_buff *))
{
#ifdef CONFIG_XFRM
const struct nf_conn *ct;
@@ -186,29 +175,30 @@ nf_nat_out(unsigned int hooknum,
ip_hdrlen(skb) < sizeof(struct iphdr))
return NF_ACCEPT;
- ret = nf_nat_fn(hooknum, skb, in, out, okfn);
+ ret = nf_nat_ipv4_fn(hooknum, skb, in, out, okfn);
#ifdef CONFIG_XFRM
if (ret != NF_DROP && ret != NF_STOLEN &&
+ !(IPCB(skb)->flags & IPSKB_XFRM_TRANSFORMED) &&
(ct = nf_ct_get(skb, &ctinfo)) != NULL) {
enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);
if ((ct->tuplehash[dir].tuple.src.u3.ip !=
ct->tuplehash[!dir].tuple.dst.u3.ip) ||
(ct->tuplehash[dir].tuple.src.u.all !=
- ct->tuplehash[!dir].tuple.dst.u.all)
- )
- return ip_xfrm_me_harder(skb) == 0 ? ret : NF_DROP;
+ ct->tuplehash[!dir].tuple.dst.u.all))
+ if (nf_xfrm_me_harder(skb, AF_INET) < 0)
+ ret = NF_DROP;
}
#endif
return ret;
}
static unsigned int
-nf_nat_local_fn(unsigned int hooknum,
- struct sk_buff *skb,
- const struct net_device *in,
- const struct net_device *out,
- int (*okfn)(struct sk_buff *))
+nf_nat_ipv4_local_fn(unsigned int hooknum,
+ struct sk_buff *skb,
+ const struct net_device *in,
+ const struct net_device *out,
+ int (*okfn)(struct sk_buff *))
{
const struct nf_conn *ct;
enum ip_conntrack_info ctinfo;
@@ -219,7 +209,7 @@ nf_nat_local_fn(unsigned int hooknum,
ip_hdrlen(skb) < sizeof(struct iphdr))
return NF_ACCEPT;
- ret = nf_nat_fn(hooknum, skb, in, out, okfn);
+ ret = nf_nat_ipv4_fn(hooknum, skb, in, out, okfn);
if (ret != NF_DROP && ret != NF_STOLEN &&
(ct = nf_ct_get(skb, &ctinfo)) != NULL) {
enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);
@@ -230,21 +220,20 @@ nf_nat_local_fn(unsigned int hooknum,
ret = NF_DROP;
}
#ifdef CONFIG_XFRM
- else if (ct->tuplehash[dir].tuple.dst.u.all !=
+ else if (!(IPCB(skb)->flags & IPSKB_XFRM_TRANSFORMED) &&
+ ct->tuplehash[dir].tuple.dst.u.all !=
ct->tuplehash[!dir].tuple.src.u.all)
- if (ip_xfrm_me_harder(skb))
+ if (nf_xfrm_me_harder(skb, AF_INET) < 0)
ret = NF_DROP;
#endif
}
return ret;
}
-/* We must be after connection tracking and before packet filtering. */
-
-static struct nf_hook_ops nf_nat_ops[] __read_mostly = {
+static struct nf_hook_ops nf_nat_ipv4_ops[] __read_mostly = {
/* Before packet filtering, change destination */
{
- .hook = nf_nat_in,
+ .hook = nf_nat_ipv4_in,
.owner = THIS_MODULE,
.pf = NFPROTO_IPV4,
.hooknum = NF_INET_PRE_ROUTING,
@@ -252,7 +241,7 @@ static struct nf_hook_ops nf_nat_ops[] __read_mostly = {
},
/* After packet filtering, change source */
{
- .hook = nf_nat_out,
+ .hook = nf_nat_ipv4_out,
.owner = THIS_MODULE,
.pf = NFPROTO_IPV4,
.hooknum = NF_INET_POST_ROUTING,
@@ -260,7 +249,7 @@ static struct nf_hook_ops nf_nat_ops[] __read_mostly = {
},
/* Before packet filtering, change destination */
{
- .hook = nf_nat_local_fn,
+ .hook = nf_nat_ipv4_local_fn,
.owner = THIS_MODULE,
.pf = NFPROTO_IPV4,
.hooknum = NF_INET_LOCAL_OUT,
@@ -268,7 +257,7 @@ static struct nf_hook_ops nf_nat_ops[] __read_mostly = {
},
/* After packet filtering, change source */
{
- .hook = nf_nat_fn,
+ .hook = nf_nat_ipv4_fn,
.owner = THIS_MODULE,
.pf = NFPROTO_IPV4,
.hooknum = NF_INET_LOCAL_IN,
@@ -276,51 +265,56 @@ static struct nf_hook_ops nf_nat_ops[] __read_mostly = {
},
};
-static int __init nf_nat_standalone_init(void)
+static int __net_init iptable_nat_net_init(struct net *net)
{
- int ret = 0;
+ struct ipt_replace *repl;
+
+ repl = ipt_alloc_initial_table(&nf_nat_ipv4_table);
+ if (repl == NULL)
+ return -ENOMEM;
+ net->ipv4.nat_table = ipt_register_table(net, &nf_nat_ipv4_table, repl);
+ kfree(repl);
+ if (IS_ERR(net->ipv4.nat_table))
+ return PTR_ERR(net->ipv4.nat_table);
+ return 0;
+}
- need_ipv4_conntrack();
+static void __net_exit iptable_nat_net_exit(struct net *net)
+{
+ ipt_unregister_table(net, net->ipv4.nat_table);
+}
-#ifdef CONFIG_XFRM
- BUG_ON(ip_nat_decode_session != NULL);
- RCU_INIT_POINTER(ip_nat_decode_session, nat_decode_session);
-#endif
- ret = nf_nat_rule_init();
- if (ret < 0) {
- pr_err("nf_nat_init: can't setup rules.\n");
- goto cleanup_decode_session;
- }
- ret = nf_register_hooks(nf_nat_ops, ARRAY_SIZE(nf_nat_ops));
- if (ret < 0) {
- pr_err("nf_nat_init: can't register hooks.\n");
- goto cleanup_rule_init;
- }
- return ret;
+static struct pernet_operations iptable_nat_net_ops = {
+ .init = iptable_nat_net_init,
+ .exit = iptable_nat_net_exit,
+};
- cleanup_rule_init:
- nf_nat_rule_cleanup();
- cleanup_decode_session:
-#ifdef CONFIG_XFRM
- RCU_INIT_POINTER(ip_nat_decode_session, NULL);
- synchronize_net();
-#endif
- return ret;
+static int __init iptable_nat_init(void)
+{
+ int err;
+
+ err = register_pernet_subsys(&iptable_nat_net_ops);
+ if (err < 0)
+ goto err1;
+
+ err = nf_register_hooks(nf_nat_ipv4_ops, ARRAY_SIZE(nf_nat_ipv4_ops));
+ if (err < 0)
+ goto err2;
+ return 0;
+
+err2:
+ unregister_pernet_subsys(&iptable_nat_net_ops);
+err1:
+ return err;
}
-static void __exit nf_nat_standalone_fini(void)
+static void __exit iptable_nat_exit(void)
{
- nf_unregister_hooks(nf_nat_ops, ARRAY_SIZE(nf_nat_ops));
- nf_nat_rule_cleanup();
-#ifdef CONFIG_XFRM
- RCU_INIT_POINTER(ip_nat_decode_session, NULL);
- synchronize_net();
-#endif
- /* Conntrack caches are unregistered in nf_conntrack_cleanup */
+ nf_unregister_hooks(nf_nat_ipv4_ops, ARRAY_SIZE(nf_nat_ipv4_ops));
+ unregister_pernet_subsys(&iptable_nat_net_ops);
}
-module_init(nf_nat_standalone_init);
-module_exit(nf_nat_standalone_fini);
+module_init(iptable_nat_init);
+module_exit(iptable_nat_exit);
MODULE_LICENSE("GPL");
-MODULE_ALIAS("ip_nat");
diff --git a/net/ipv4/netfilter/iptable_raw.c b/net/ipv4/netfilter/iptable_raw.c
index 07fb710cd722..03d9696d3c6e 100644
--- a/net/ipv4/netfilter/iptable_raw.c
+++ b/net/ipv4/netfilter/iptable_raw.c
@@ -48,9 +48,7 @@ static int __net_init iptable_raw_net_init(struct net *net)
net->ipv4.iptable_raw =
ipt_register_table(net, &packet_raw, repl);
kfree(repl);
- if (IS_ERR(net->ipv4.iptable_raw))
- return PTR_ERR(net->ipv4.iptable_raw);
- return 0;
+ return PTR_RET(net->ipv4.iptable_raw);
}
static void __net_exit iptable_raw_net_exit(struct net *net)
@@ -75,14 +73,10 @@ static int __init iptable_raw_init(void)
rawtable_ops = xt_hook_link(&packet_raw, iptable_raw_hook);
if (IS_ERR(rawtable_ops)) {
ret = PTR_ERR(rawtable_ops);
- goto cleanup_table;
+ unregister_pernet_subsys(&iptable_raw_net_ops);
}
return ret;
-
- cleanup_table:
- unregister_pernet_subsys(&iptable_raw_net_ops);
- return ret;
}
static void __exit iptable_raw_fini(void)
diff --git a/net/ipv4/netfilter/iptable_security.c b/net/ipv4/netfilter/iptable_security.c
index be45bdc4c602..b283d8e2601a 100644
--- a/net/ipv4/netfilter/iptable_security.c
+++ b/net/ipv4/netfilter/iptable_security.c
@@ -66,10 +66,7 @@ static int __net_init iptable_security_net_init(struct net *net)
net->ipv4.iptable_security =
ipt_register_table(net, &security_table, repl);
kfree(repl);
- if (IS_ERR(net->ipv4.iptable_security))
- return PTR_ERR(net->ipv4.iptable_security);
-
- return 0;
+ return PTR_RET(net->ipv4.iptable_security);
}
static void __net_exit iptable_security_net_exit(struct net *net)
diff --git a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
index e7ff2dcab6ce..fcdd0c2406e6 100644
--- a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
+++ b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
@@ -29,11 +29,6 @@
#include <net/netfilter/ipv4/nf_defrag_ipv4.h>
#include <net/netfilter/nf_log.h>
-int (*nf_nat_seq_adjust_hook)(struct sk_buff *skb,
- struct nf_conn *ct,
- enum ip_conntrack_info ctinfo);
-EXPORT_SYMBOL_GPL(nf_nat_seq_adjust_hook);
-
static bool ipv4_pkt_to_tuple(const struct sk_buff *skb, unsigned int nhoff,
struct nf_conntrack_tuple *tuple)
{
@@ -149,7 +144,8 @@ static unsigned int ipv4_confirm(unsigned int hooknum,
typeof(nf_nat_seq_adjust_hook) seq_adjust;
seq_adjust = rcu_dereference(nf_nat_seq_adjust_hook);
- if (!seq_adjust || !seq_adjust(skb, ct, ctinfo)) {
+ if (!seq_adjust ||
+ !seq_adjust(skb, ct, ctinfo, ip_hdrlen(skb))) {
NF_CT_STAT_INC_ATOMIC(nf_ct_net(ct), drop);
return NF_DROP;
}
diff --git a/net/ipv4/netfilter/nf_nat_h323.c b/net/ipv4/netfilter/nf_nat_h323.c
index c6784a18c1c4..9c3db10b22d3 100644
--- a/net/ipv4/netfilter/nf_nat_h323.c
+++ b/net/ipv4/netfilter/nf_nat_h323.c
@@ -15,13 +15,12 @@
#include <net/netfilter/nf_nat.h>
#include <net/netfilter/nf_nat_helper.h>
-#include <net/netfilter/nf_nat_rule.h>
#include <net/netfilter/nf_conntrack_helper.h>
#include <net/netfilter/nf_conntrack_expect.h>
#include <linux/netfilter/nf_conntrack_h323.h>
/****************************************************************************/
-static int set_addr(struct sk_buff *skb,
+static int set_addr(struct sk_buff *skb, unsigned int protoff,
unsigned char **data, int dataoff,
unsigned int addroff, __be32 ip, __be16 port)
{
@@ -40,7 +39,7 @@ static int set_addr(struct sk_buff *skb,
if (ip_hdr(skb)->protocol == IPPROTO_TCP) {
if (!nf_nat_mangle_tcp_packet(skb, ct, ctinfo,
- addroff, sizeof(buf),
+ protoff, addroff, sizeof(buf),
(char *) &buf, sizeof(buf))) {
net_notice_ratelimited("nf_nat_h323: nf_nat_mangle_tcp_packet error\n");
return -1;
@@ -54,7 +53,7 @@ static int set_addr(struct sk_buff *skb,
*data = skb->data + ip_hdrlen(skb) + th->doff * 4 + dataoff;
} else {
if (!nf_nat_mangle_udp_packet(skb, ct, ctinfo,
- addroff, sizeof(buf),
+ protoff, addroff, sizeof(buf),
(char *) &buf, sizeof(buf))) {
net_notice_ratelimited("nf_nat_h323: nf_nat_mangle_udp_packet error\n");
return -1;
@@ -69,22 +68,22 @@ static int set_addr(struct sk_buff *skb,
}
/****************************************************************************/
-static int set_h225_addr(struct sk_buff *skb,
+static int set_h225_addr(struct sk_buff *skb, unsigned int protoff,
unsigned char **data, int dataoff,
TransportAddress *taddr,
union nf_inet_addr *addr, __be16 port)
{
- return set_addr(skb, data, dataoff, taddr->ipAddress.ip,
+ return set_addr(skb, protoff, data, dataoff, taddr->ipAddress.ip,
addr->ip, port);
}
/****************************************************************************/
-static int set_h245_addr(struct sk_buff *skb,
+static int set_h245_addr(struct sk_buff *skb, unsigned protoff,
unsigned char **data, int dataoff,
H245_TransportAddress *taddr,
union nf_inet_addr *addr, __be16 port)
{
- return set_addr(skb, data, dataoff,
+ return set_addr(skb, protoff, data, dataoff,
taddr->unicastAddress.iPAddress.network,
addr->ip, port);
}
@@ -92,7 +91,7 @@ static int set_h245_addr(struct sk_buff *skb,
/****************************************************************************/
static int set_sig_addr(struct sk_buff *skb, struct nf_conn *ct,
enum ip_conntrack_info ctinfo,
- unsigned char **data,
+ unsigned int protoff, unsigned char **data,
TransportAddress *taddr, int count)
{
const struct nf_ct_h323_master *info = nfct_help_data(ct);
@@ -118,7 +117,8 @@ static int set_sig_addr(struct sk_buff *skb, struct nf_conn *ct,
&addr.ip, port,
&ct->tuplehash[!dir].tuple.dst.u3.ip,
info->sig_port[!dir]);
- return set_h225_addr(skb, data, 0, &taddr[i],
+ return set_h225_addr(skb, protoff, data, 0,
+ &taddr[i],
&ct->tuplehash[!dir].
tuple.dst.u3,
info->sig_port[!dir]);
@@ -129,7 +129,8 @@ static int set_sig_addr(struct sk_buff *skb, struct nf_conn *ct,
&addr.ip, port,
&ct->tuplehash[!dir].tuple.src.u3.ip,
info->sig_port[!dir]);
- return set_h225_addr(skb, data, 0, &taddr[i],
+ return set_h225_addr(skb, protoff, data, 0,
+ &taddr[i],
&ct->tuplehash[!dir].
tuple.src.u3,
info->sig_port[!dir]);
@@ -143,7 +144,7 @@ static int set_sig_addr(struct sk_buff *skb, struct nf_conn *ct,
/****************************************************************************/
static int set_ras_addr(struct sk_buff *skb, struct nf_conn *ct,
enum ip_conntrack_info ctinfo,
- unsigned char **data,
+ unsigned int protoff, unsigned char **data,
TransportAddress *taddr, int count)
{
int dir = CTINFO2DIR(ctinfo);
@@ -159,7 +160,7 @@ static int set_ras_addr(struct sk_buff *skb, struct nf_conn *ct,
&addr.ip, ntohs(port),
&ct->tuplehash[!dir].tuple.dst.u3.ip,
ntohs(ct->tuplehash[!dir].tuple.dst.u.udp.port));
- return set_h225_addr(skb, data, 0, &taddr[i],
+ return set_h225_addr(skb, protoff, data, 0, &taddr[i],
&ct->tuplehash[!dir].tuple.dst.u3,
ct->tuplehash[!dir].tuple.
dst.u.udp.port);
@@ -172,7 +173,7 @@ static int set_ras_addr(struct sk_buff *skb, struct nf_conn *ct,
/****************************************************************************/
static int nat_rtp_rtcp(struct sk_buff *skb, struct nf_conn *ct,
enum ip_conntrack_info ctinfo,
- unsigned char **data, int dataoff,
+ unsigned int protoff, unsigned char **data, int dataoff,
H245_TransportAddress *taddr,
__be16 port, __be16 rtp_port,
struct nf_conntrack_expect *rtp_exp,
@@ -244,7 +245,7 @@ static int nat_rtp_rtcp(struct sk_buff *skb, struct nf_conn *ct,
}
/* Modify signal */
- if (set_h245_addr(skb, data, dataoff, taddr,
+ if (set_h245_addr(skb, protoff, data, dataoff, taddr,
&ct->tuplehash[!dir].tuple.dst.u3,
htons((port & htons(1)) ? nated_port + 1 :
nated_port)) == 0) {
@@ -275,7 +276,7 @@ static int nat_rtp_rtcp(struct sk_buff *skb, struct nf_conn *ct,
/****************************************************************************/
static int nat_t120(struct sk_buff *skb, struct nf_conn *ct,
enum ip_conntrack_info ctinfo,
- unsigned char **data, int dataoff,
+ unsigned int protoff, unsigned char **data, int dataoff,
H245_TransportAddress *taddr, __be16 port,
struct nf_conntrack_expect *exp)
{
@@ -307,7 +308,7 @@ static int nat_t120(struct sk_buff *skb, struct nf_conn *ct,
}
/* Modify signal */
- if (set_h245_addr(skb, data, dataoff, taddr,
+ if (set_h245_addr(skb, protoff, data, dataoff, taddr,
&ct->tuplehash[!dir].tuple.dst.u3,
htons(nated_port)) < 0) {
nf_ct_unexpect_related(exp);
@@ -326,7 +327,7 @@ static int nat_t120(struct sk_buff *skb, struct nf_conn *ct,
/****************************************************************************/
static int nat_h245(struct sk_buff *skb, struct nf_conn *ct,
enum ip_conntrack_info ctinfo,
- unsigned char **data, int dataoff,
+ unsigned int protoff, unsigned char **data, int dataoff,
TransportAddress *taddr, __be16 port,
struct nf_conntrack_expect *exp)
{
@@ -363,7 +364,7 @@ static int nat_h245(struct sk_buff *skb, struct nf_conn *ct,
}
/* Modify signal */
- if (set_h225_addr(skb, data, dataoff, taddr,
+ if (set_h225_addr(skb, protoff, data, dataoff, taddr,
&ct->tuplehash[!dir].tuple.dst.u3,
htons(nated_port)) == 0) {
/* Save ports */
@@ -390,7 +391,7 @@ static int nat_h245(struct sk_buff *skb, struct nf_conn *ct,
static void ip_nat_q931_expect(struct nf_conn *new,
struct nf_conntrack_expect *this)
{
- struct nf_nat_ipv4_range range;
+ struct nf_nat_range range;
if (this->tuple.src.u3.ip != 0) { /* Only accept calls from GK */
nf_nat_follow_master(new, this);
@@ -402,21 +403,23 @@ static void ip_nat_q931_expect(struct nf_conn *new,
/* Change src to where master sends to */
range.flags = NF_NAT_RANGE_MAP_IPS;
- range.min_ip = range.max_ip = new->tuplehash[!this->dir].tuple.src.u3.ip;
+ range.min_addr = range.max_addr =
+ new->tuplehash[!this->dir].tuple.src.u3;
nf_nat_setup_info(new, &range, NF_NAT_MANIP_SRC);
/* For DST manip, map port here to where it's expected. */
range.flags = (NF_NAT_RANGE_MAP_IPS | NF_NAT_RANGE_PROTO_SPECIFIED);
- range.min = range.max = this->saved_proto;
- range.min_ip = range.max_ip =
- new->master->tuplehash[!this->dir].tuple.src.u3.ip;
+ range.min_proto = range.max_proto = this->saved_proto;
+ range.min_addr = range.max_addr =
+ new->master->tuplehash[!this->dir].tuple.src.u3;
nf_nat_setup_info(new, &range, NF_NAT_MANIP_DST);
}
/****************************************************************************/
static int nat_q931(struct sk_buff *skb, struct nf_conn *ct,
enum ip_conntrack_info ctinfo,
- unsigned char **data, TransportAddress *taddr, int idx,
+ unsigned int protoff, unsigned char **data,
+ TransportAddress *taddr, int idx,
__be16 port, struct nf_conntrack_expect *exp)
{
struct nf_ct_h323_master *info = nfct_help_data(ct);
@@ -453,7 +456,7 @@ static int nat_q931(struct sk_buff *skb, struct nf_conn *ct,
}
/* Modify signal */
- if (set_h225_addr(skb, data, 0, &taddr[idx],
+ if (set_h225_addr(skb, protoff, data, 0, &taddr[idx],
&ct->tuplehash[!dir].tuple.dst.u3,
htons(nated_port)) == 0) {
/* Save ports */
@@ -464,7 +467,7 @@ static int nat_q931(struct sk_buff *skb, struct nf_conn *ct,
if (idx > 0 &&
get_h225_addr(ct, *data, &taddr[0], &addr, &port) &&
(ntohl(addr.ip) & 0xff000000) == 0x7f000000) {
- set_h225_addr(skb, data, 0, &taddr[0],
+ set_h225_addr(skb, protoff, data, 0, &taddr[0],
&ct->tuplehash[!dir].tuple.dst.u3,
info->sig_port[!dir]);
}
@@ -487,26 +490,28 @@ static int nat_q931(struct sk_buff *skb, struct nf_conn *ct,
static void ip_nat_callforwarding_expect(struct nf_conn *new,
struct nf_conntrack_expect *this)
{
- struct nf_nat_ipv4_range range;
+ struct nf_nat_range range;
/* This must be a fresh one. */
BUG_ON(new->status & IPS_NAT_DONE_MASK);
/* Change src to where master sends to */
range.flags = NF_NAT_RANGE_MAP_IPS;
- range.min_ip = range.max_ip = new->tuplehash[!this->dir].tuple.src.u3.ip;
+ range.min_addr = range.max_addr =
+ new->tuplehash[!this->dir].tuple.src.u3;
nf_nat_setup_info(new, &range, NF_NAT_MANIP_SRC);
/* For DST manip, map port here to where it's expected. */
range.flags = (NF_NAT_RANGE_MAP_IPS | NF_NAT_RANGE_PROTO_SPECIFIED);
- range.min = range.max = this->saved_proto;
- range.min_ip = range.max_ip = this->saved_ip;
+ range.min_proto = range.max_proto = this->saved_proto;
+ range.min_addr = range.max_addr = this->saved_addr;
nf_nat_setup_info(new, &range, NF_NAT_MANIP_DST);
}
/****************************************************************************/
static int nat_callforwarding(struct sk_buff *skb, struct nf_conn *ct,
enum ip_conntrack_info ctinfo,
+ unsigned int protoff,
unsigned char **data, int dataoff,
TransportAddress *taddr, __be16 port,
struct nf_conntrack_expect *exp)
@@ -515,7 +520,7 @@ static int nat_callforwarding(struct sk_buff *skb, struct nf_conn *ct,
u_int16_t nated_port;
/* Set expectations for NAT */
- exp->saved_ip = exp->tuple.dst.u3.ip;
+ exp->saved_addr = exp->tuple.dst.u3;
exp->tuple.dst.u3.ip = ct->tuplehash[!dir].tuple.dst.u3.ip;
exp->saved_proto.tcp.port = exp->tuple.dst.u.tcp.port;
exp->expectfn = ip_nat_callforwarding_expect;
@@ -541,7 +546,7 @@ static int nat_callforwarding(struct sk_buff *skb, struct nf_conn *ct,
}
/* Modify signal */
- if (!set_h225_addr(skb, data, dataoff, taddr,
+ if (!set_h225_addr(skb, protoff, data, dataoff, taddr,
&ct->tuplehash[!dir].tuple.dst.u3,
htons(nated_port)) == 0) {
nf_ct_unexpect_related(exp);
diff --git a/net/ipv4/netfilter/nf_nat_l3proto_ipv4.c b/net/ipv4/netfilter/nf_nat_l3proto_ipv4.c
new file mode 100644
index 000000000000..d8b2e14efddc
--- /dev/null
+++ b/net/ipv4/netfilter/nf_nat_l3proto_ipv4.c
@@ -0,0 +1,281 @@
+/*
+ * (C) 1999-2001 Paul `Rusty' Russell
+ * (C) 2002-2006 Netfilter Core Team <coreteam@netfilter.org>
+ * (C) 2011 Patrick McHardy <kaber@trash.net>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/types.h>
+#include <linux/module.h>
+#include <linux/skbuff.h>
+#include <linux/ip.h>
+#include <linux/icmp.h>
+#include <linux/netfilter.h>
+#include <linux/netfilter_ipv4.h>
+#include <net/secure_seq.h>
+#include <net/checksum.h>
+#include <net/route.h>
+#include <net/ip.h>
+
+#include <net/netfilter/nf_conntrack_core.h>
+#include <net/netfilter/nf_conntrack.h>
+#include <net/netfilter/nf_nat_core.h>
+#include <net/netfilter/nf_nat_l3proto.h>
+#include <net/netfilter/nf_nat_l4proto.h>
+
+static const struct nf_nat_l3proto nf_nat_l3proto_ipv4;
+
+#ifdef CONFIG_XFRM
+static void nf_nat_ipv4_decode_session(struct sk_buff *skb,
+ const struct nf_conn *ct,
+ enum ip_conntrack_dir dir,
+ unsigned long statusbit,
+ struct flowi *fl)
+{
+ const struct nf_conntrack_tuple *t = &ct->tuplehash[dir].tuple;
+ struct flowi4 *fl4 = &fl->u.ip4;
+
+ if (ct->status & statusbit) {
+ fl4->daddr = t->dst.u3.ip;
+ if (t->dst.protonum == IPPROTO_TCP ||
+ t->dst.protonum == IPPROTO_UDP ||
+ t->dst.protonum == IPPROTO_UDPLITE ||
+ t->dst.protonum == IPPROTO_DCCP ||
+ t->dst.protonum == IPPROTO_SCTP)
+ fl4->fl4_dport = t->dst.u.all;
+ }
+
+ statusbit ^= IPS_NAT_MASK;
+
+ if (ct->status & statusbit) {
+ fl4->saddr = t->src.u3.ip;
+ if (t->dst.protonum == IPPROTO_TCP ||
+ t->dst.protonum == IPPROTO_UDP ||
+ t->dst.protonum == IPPROTO_UDPLITE ||
+ t->dst.protonum == IPPROTO_DCCP ||
+ t->dst.protonum == IPPROTO_SCTP)
+ fl4->fl4_sport = t->src.u.all;
+ }
+}
+#endif /* CONFIG_XFRM */
+
+static bool nf_nat_ipv4_in_range(const struct nf_conntrack_tuple *t,
+ const struct nf_nat_range *range)
+{
+ return ntohl(t->src.u3.ip) >= ntohl(range->min_addr.ip) &&
+ ntohl(t->src.u3.ip) <= ntohl(range->max_addr.ip);
+}
+
+static u32 nf_nat_ipv4_secure_port(const struct nf_conntrack_tuple *t,
+ __be16 dport)
+{
+ return secure_ipv4_port_ephemeral(t->src.u3.ip, t->dst.u3.ip, dport);
+}
+
+static bool nf_nat_ipv4_manip_pkt(struct sk_buff *skb,
+ unsigned int iphdroff,
+ const struct nf_nat_l4proto *l4proto,
+ const struct nf_conntrack_tuple *target,
+ enum nf_nat_manip_type maniptype)
+{
+ struct iphdr *iph;
+ unsigned int hdroff;
+
+ if (!skb_make_writable(skb, iphdroff + sizeof(*iph)))
+ return false;
+
+ iph = (void *)skb->data + iphdroff;
+ hdroff = iphdroff + iph->ihl * 4;
+
+ if (!l4proto->manip_pkt(skb, &nf_nat_l3proto_ipv4, iphdroff, hdroff,
+ target, maniptype))
+ return false;
+ iph = (void *)skb->data + iphdroff;
+
+ if (maniptype == NF_NAT_MANIP_SRC) {
+ csum_replace4(&iph->check, iph->saddr, target->src.u3.ip);
+ iph->saddr = target->src.u3.ip;
+ } else {
+ csum_replace4(&iph->check, iph->daddr, target->dst.u3.ip);
+ iph->daddr = target->dst.u3.ip;
+ }
+ return true;
+}
+
+static void nf_nat_ipv4_csum_update(struct sk_buff *skb,
+ unsigned int iphdroff, __sum16 *check,
+ const struct nf_conntrack_tuple *t,
+ enum nf_nat_manip_type maniptype)
+{
+ struct iphdr *iph = (struct iphdr *)(skb->data + iphdroff);
+ __be32 oldip, newip;
+
+ if (maniptype == NF_NAT_MANIP_SRC) {
+ oldip = iph->saddr;
+ newip = t->src.u3.ip;
+ } else {
+ oldip = iph->daddr;
+ newip = t->dst.u3.ip;
+ }
+ inet_proto_csum_replace4(check, skb, oldip, newip, 1);
+}
+
+static void nf_nat_ipv4_csum_recalc(struct sk_buff *skb,
+ u8 proto, void *data, __sum16 *check,
+ int datalen, int oldlen)
+{
+ const struct iphdr *iph = ip_hdr(skb);
+ struct rtable *rt = skb_rtable(skb);
+
+ if (skb->ip_summed != CHECKSUM_PARTIAL) {
+ if (!(rt->rt_flags & RTCF_LOCAL) &&
+ (!skb->dev || skb->dev->features & NETIF_F_V4_CSUM)) {
+ skb->ip_summed = CHECKSUM_PARTIAL;
+ skb->csum_start = skb_headroom(skb) +
+ skb_network_offset(skb) +
+ ip_hdrlen(skb);
+ skb->csum_offset = (void *)check - data;
+ *check = ~csum_tcpudp_magic(iph->saddr, iph->daddr,
+ datalen, proto, 0);
+ } else {
+ *check = 0;
+ *check = csum_tcpudp_magic(iph->saddr, iph->daddr,
+ datalen, proto,
+ csum_partial(data, datalen,
+ 0));
+ if (proto == IPPROTO_UDP && !*check)
+ *check = CSUM_MANGLED_0;
+ }
+ } else
+ inet_proto_csum_replace2(check, skb,
+ htons(oldlen), htons(datalen), 1);
+}
+
+static int nf_nat_ipv4_nlattr_to_range(struct nlattr *tb[],
+ struct nf_nat_range *range)
+{
+ if (tb[CTA_NAT_V4_MINIP]) {
+ range->min_addr.ip = nla_get_be32(tb[CTA_NAT_V4_MINIP]);
+ range->flags |= NF_NAT_RANGE_MAP_IPS;
+ }
+
+ if (tb[CTA_NAT_V4_MAXIP])
+ range->max_addr.ip = nla_get_be32(tb[CTA_NAT_V4_MAXIP]);
+ else
+ range->max_addr.ip = range->min_addr.ip;
+
+ return 0;
+}
+
+static const struct nf_nat_l3proto nf_nat_l3proto_ipv4 = {
+ .l3proto = NFPROTO_IPV4,
+ .in_range = nf_nat_ipv4_in_range,
+ .secure_port = nf_nat_ipv4_secure_port,
+ .manip_pkt = nf_nat_ipv4_manip_pkt,
+ .csum_update = nf_nat_ipv4_csum_update,
+ .csum_recalc = nf_nat_ipv4_csum_recalc,
+ .nlattr_to_range = nf_nat_ipv4_nlattr_to_range,
+#ifdef CONFIG_XFRM
+ .decode_session = nf_nat_ipv4_decode_session,
+#endif
+};
+
+int nf_nat_icmp_reply_translation(struct sk_buff *skb,
+ struct nf_conn *ct,
+ enum ip_conntrack_info ctinfo,
+ unsigned int hooknum)
+{
+ struct {
+ struct icmphdr icmp;
+ struct iphdr ip;
+ } *inside;
+ enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);
+ enum nf_nat_manip_type manip = HOOK2MANIP(hooknum);
+ unsigned int hdrlen = ip_hdrlen(skb);
+ const struct nf_nat_l4proto *l4proto;
+ struct nf_conntrack_tuple target;
+ unsigned long statusbit;
+
+ NF_CT_ASSERT(ctinfo == IP_CT_RELATED || ctinfo == IP_CT_RELATED_REPLY);
+
+ if (!skb_make_writable(skb, hdrlen + sizeof(*inside)))
+ return 0;
+ if (nf_ip_checksum(skb, hooknum, hdrlen, 0))
+ return 0;
+
+ inside = (void *)skb->data + hdrlen;
+ if (inside->icmp.type == ICMP_REDIRECT) {
+ if ((ct->status & IPS_NAT_DONE_MASK) != IPS_NAT_DONE_MASK)
+ return 0;
+ if (ct->status & IPS_NAT_MASK)
+ return 0;
+ }
+
+ if (manip == NF_NAT_MANIP_SRC)
+ statusbit = IPS_SRC_NAT;
+ else
+ statusbit = IPS_DST_NAT;
+
+ /* Invert if this is reply direction */
+ if (dir == IP_CT_DIR_REPLY)
+ statusbit ^= IPS_NAT_MASK;
+
+ if (!(ct->status & statusbit))
+ return 1;
+
+ l4proto = __nf_nat_l4proto_find(NFPROTO_IPV4, inside->ip.protocol);
+ if (!nf_nat_ipv4_manip_pkt(skb, hdrlen + sizeof(inside->icmp),
+ l4proto, &ct->tuplehash[!dir].tuple, !manip))
+ return 0;
+
+ if (skb->ip_summed != CHECKSUM_PARTIAL) {
+ /* Reloading "inside" here since manip_pkt may reallocate */
+ inside = (void *)skb->data + hdrlen;
+ inside->icmp.checksum = 0;
+ inside->icmp.checksum =
+ csum_fold(skb_checksum(skb, hdrlen,
+ skb->len - hdrlen, 0));
+ }
+
+ /* Change outer to look like the reply to an incoming packet */
+ nf_ct_invert_tuplepr(&target, &ct->tuplehash[!dir].tuple);
+ l4proto = __nf_nat_l4proto_find(NFPROTO_IPV4, 0);
+ if (!nf_nat_ipv4_manip_pkt(skb, 0, l4proto, &target, manip))
+ return 0;
+
+ return 1;
+}
+EXPORT_SYMBOL_GPL(nf_nat_icmp_reply_translation);
+
+static int __init nf_nat_l3proto_ipv4_init(void)
+{
+ int err;
+
+ err = nf_nat_l4proto_register(NFPROTO_IPV4, &nf_nat_l4proto_icmp);
+ if (err < 0)
+ goto err1;
+ err = nf_nat_l3proto_register(&nf_nat_l3proto_ipv4);
+ if (err < 0)
+ goto err2;
+ return err;
+
+err2:
+ nf_nat_l4proto_unregister(NFPROTO_IPV4, &nf_nat_l4proto_icmp);
+err1:
+ return err;
+}
+
+static void __exit nf_nat_l3proto_ipv4_exit(void)
+{
+ nf_nat_l3proto_unregister(&nf_nat_l3proto_ipv4);
+ nf_nat_l4proto_unregister(NFPROTO_IPV4, &nf_nat_l4proto_icmp);
+}
+
+MODULE_LICENSE("GPL");
+MODULE_ALIAS("nf-nat-" __stringify(AF_INET));
+
+module_init(nf_nat_l3proto_ipv4_init);
+module_exit(nf_nat_l3proto_ipv4_exit);
diff --git a/net/ipv4/netfilter/nf_nat_pptp.c b/net/ipv4/netfilter/nf_nat_pptp.c
index 388140881ebe..a06d7d74817d 100644
--- a/net/ipv4/netfilter/nf_nat_pptp.c
+++ b/net/ipv4/netfilter/nf_nat_pptp.c
@@ -22,7 +22,6 @@
#include <net/netfilter/nf_nat.h>
#include <net/netfilter/nf_nat_helper.h>
-#include <net/netfilter/nf_nat_rule.h>
#include <net/netfilter/nf_conntrack_helper.h>
#include <net/netfilter/nf_conntrack_expect.h>
#include <net/netfilter/nf_conntrack_zones.h>
@@ -47,7 +46,7 @@ static void pptp_nat_expected(struct nf_conn *ct,
struct nf_conntrack_tuple t;
const struct nf_ct_pptp_master *ct_pptp_info;
const struct nf_nat_pptp *nat_pptp_info;
- struct nf_nat_ipv4_range range;
+ struct nf_nat_range range;
ct_pptp_info = nfct_help_data(master);
nat_pptp_info = &nfct_nat(master)->help.nat_pptp_info;
@@ -89,21 +88,21 @@ static void pptp_nat_expected(struct nf_conn *ct,
/* Change src to where master sends to */
range.flags = NF_NAT_RANGE_MAP_IPS;
- range.min_ip = range.max_ip
- = ct->master->tuplehash[!exp->dir].tuple.dst.u3.ip;
+ range.min_addr = range.max_addr
+ = ct->master->tuplehash[!exp->dir].tuple.dst.u3;
if (exp->dir == IP_CT_DIR_ORIGINAL) {
range.flags |= NF_NAT_RANGE_PROTO_SPECIFIED;
- range.min = range.max = exp->saved_proto;
+ range.min_proto = range.max_proto = exp->saved_proto;
}
nf_nat_setup_info(ct, &range, NF_NAT_MANIP_SRC);
/* For DST manip, map port here to where it's expected. */
range.flags = NF_NAT_RANGE_MAP_IPS;
- range.min_ip = range.max_ip
- = ct->master->tuplehash[!exp->dir].tuple.src.u3.ip;
+ range.min_addr = range.max_addr
+ = ct->master->tuplehash[!exp->dir].tuple.src.u3;
if (exp->dir == IP_CT_DIR_REPLY) {
range.flags |= NF_NAT_RANGE_PROTO_SPECIFIED;
- range.min = range.max = exp->saved_proto;
+ range.min_proto = range.max_proto = exp->saved_proto;
}
nf_nat_setup_info(ct, &range, NF_NAT_MANIP_DST);
}
@@ -113,6 +112,7 @@ static int
pptp_outbound_pkt(struct sk_buff *skb,
struct nf_conn *ct,
enum ip_conntrack_info ctinfo,
+ unsigned int protoff,
struct PptpControlHeader *ctlh,
union pptp_ctrl_union *pptpReq)
@@ -175,7 +175,7 @@ pptp_outbound_pkt(struct sk_buff *skb,
ntohs(REQ_CID(pptpReq, cid_off)), ntohs(new_callid));
/* mangle packet */
- if (nf_nat_mangle_tcp_packet(skb, ct, ctinfo,
+ if (nf_nat_mangle_tcp_packet(skb, ct, ctinfo, protoff,
cid_off + sizeof(struct pptp_pkt_hdr) +
sizeof(struct PptpControlHeader),
sizeof(new_callid), (char *)&new_callid,
@@ -216,6 +216,7 @@ static int
pptp_inbound_pkt(struct sk_buff *skb,
struct nf_conn *ct,
enum ip_conntrack_info ctinfo,
+ unsigned int protoff,
struct PptpControlHeader *ctlh,
union pptp_ctrl_union *pptpReq)
{
@@ -268,7 +269,7 @@ pptp_inbound_pkt(struct sk_buff *skb,
pr_debug("altering peer call id from 0x%04x to 0x%04x\n",
ntohs(REQ_CID(pptpReq, pcid_off)), ntohs(new_pcid));
- if (nf_nat_mangle_tcp_packet(skb, ct, ctinfo,
+ if (nf_nat_mangle_tcp_packet(skb, ct, ctinfo, protoff,
pcid_off + sizeof(struct pptp_pkt_hdr) +
sizeof(struct PptpControlHeader),
sizeof(new_pcid), (char *)&new_pcid,
diff --git a/net/ipv4/netfilter/nf_nat_proto_gre.c b/net/ipv4/netfilter/nf_nat_proto_gre.c
index 46ba0b9ab985..ea44f02563b5 100644
--- a/net/ipv4/netfilter/nf_nat_proto_gre.c
+++ b/net/ipv4/netfilter/nf_nat_proto_gre.c
@@ -28,8 +28,7 @@
#include <linux/ip.h>
#include <net/netfilter/nf_nat.h>
-#include <net/netfilter/nf_nat_rule.h>
-#include <net/netfilter/nf_nat_protocol.h>
+#include <net/netfilter/nf_nat_l4proto.h>
#include <linux/netfilter/nf_conntrack_proto_gre.h>
MODULE_LICENSE("GPL");
@@ -38,8 +37,9 @@ MODULE_DESCRIPTION("Netfilter NAT protocol helper module for GRE");
/* generate unique tuple ... */
static void
-gre_unique_tuple(struct nf_conntrack_tuple *tuple,
- const struct nf_nat_ipv4_range *range,
+gre_unique_tuple(const struct nf_nat_l3proto *l3proto,
+ struct nf_conntrack_tuple *tuple,
+ const struct nf_nat_range *range,
enum nf_nat_manip_type maniptype,
const struct nf_conn *ct)
{
@@ -62,8 +62,8 @@ gre_unique_tuple(struct nf_conntrack_tuple *tuple,
min = 1;
range_size = 0xffff;
} else {
- min = ntohs(range->min.gre.key);
- range_size = ntohs(range->max.gre.key) - min + 1;
+ min = ntohs(range->min_proto.gre.key);
+ range_size = ntohs(range->max_proto.gre.key) - min + 1;
}
pr_debug("min = %u, range_size = %u\n", min, range_size);
@@ -80,14 +80,14 @@ gre_unique_tuple(struct nf_conntrack_tuple *tuple,
/* manipulate a GRE packet according to maniptype */
static bool
-gre_manip_pkt(struct sk_buff *skb, unsigned int iphdroff,
+gre_manip_pkt(struct sk_buff *skb,
+ const struct nf_nat_l3proto *l3proto,
+ unsigned int iphdroff, unsigned int hdroff,
const struct nf_conntrack_tuple *tuple,
enum nf_nat_manip_type maniptype)
{
const struct gre_hdr *greh;
struct gre_hdr_pptp *pgreh;
- const struct iphdr *iph = (struct iphdr *)(skb->data + iphdroff);
- unsigned int hdroff = iphdroff + iph->ihl * 4;
/* pgreh includes two optional 32bit fields which are not required
* to be there. That's where the magic '8' comes from */
@@ -117,24 +117,24 @@ gre_manip_pkt(struct sk_buff *skb, unsigned int iphdroff,
return true;
}
-static const struct nf_nat_protocol gre = {
- .protonum = IPPROTO_GRE,
+static const struct nf_nat_l4proto gre = {
+ .l4proto = IPPROTO_GRE,
.manip_pkt = gre_manip_pkt,
- .in_range = nf_nat_proto_in_range,
+ .in_range = nf_nat_l4proto_in_range,
.unique_tuple = gre_unique_tuple,
#if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE)
- .nlattr_to_range = nf_nat_proto_nlattr_to_range,
+ .nlattr_to_range = nf_nat_l4proto_nlattr_to_range,
#endif
};
static int __init nf_nat_proto_gre_init(void)
{
- return nf_nat_protocol_register(&gre);
+ return nf_nat_l4proto_register(NFPROTO_IPV4, &gre);
}
static void __exit nf_nat_proto_gre_fini(void)
{
- nf_nat_protocol_unregister(&gre);
+ nf_nat_l4proto_unregister(NFPROTO_IPV4, &gre);
}
module_init(nf_nat_proto_gre_init);
diff --git a/net/ipv4/netfilter/nf_nat_proto_icmp.c b/net/ipv4/netfilter/nf_nat_proto_icmp.c
index b35172851bae..eb303471bcf6 100644
--- a/net/ipv4/netfilter/nf_nat_proto_icmp.c
+++ b/net/ipv4/netfilter/nf_nat_proto_icmp.c
@@ -15,8 +15,7 @@
#include <linux/netfilter.h>
#include <net/netfilter/nf_nat.h>
#include <net/netfilter/nf_nat_core.h>
-#include <net/netfilter/nf_nat_rule.h>
-#include <net/netfilter/nf_nat_protocol.h>
+#include <net/netfilter/nf_nat_l4proto.h>
static bool
icmp_in_range(const struct nf_conntrack_tuple *tuple,
@@ -29,8 +28,9 @@ icmp_in_range(const struct nf_conntrack_tuple *tuple,
}
static void
-icmp_unique_tuple(struct nf_conntrack_tuple *tuple,
- const struct nf_nat_ipv4_range *range,
+icmp_unique_tuple(const struct nf_nat_l3proto *l3proto,
+ struct nf_conntrack_tuple *tuple,
+ const struct nf_nat_range *range,
enum nf_nat_manip_type maniptype,
const struct nf_conn *ct)
{
@@ -38,13 +38,14 @@ icmp_unique_tuple(struct nf_conntrack_tuple *tuple,
unsigned int range_size;
unsigned int i;
- range_size = ntohs(range->max.icmp.id) - ntohs(range->min.icmp.id) + 1;
+ range_size = ntohs(range->max_proto.icmp.id) -
+ ntohs(range->min_proto.icmp.id) + 1;
/* If no range specified... */
if (!(range->flags & NF_NAT_RANGE_PROTO_SPECIFIED))
range_size = 0xFFFF;
for (i = 0; ; ++id) {
- tuple->src.u.icmp.id = htons(ntohs(range->min.icmp.id) +
+ tuple->src.u.icmp.id = htons(ntohs(range->min_proto.icmp.id) +
(id % range_size));
if (++i == range_size || !nf_nat_used_tuple(tuple, ct))
return;
@@ -54,13 +55,12 @@ icmp_unique_tuple(struct nf_conntrack_tuple *tuple,
static bool
icmp_manip_pkt(struct sk_buff *skb,
- unsigned int iphdroff,
+ const struct nf_nat_l3proto *l3proto,
+ unsigned int iphdroff, unsigned int hdroff,
const struct nf_conntrack_tuple *tuple,
enum nf_nat_manip_type maniptype)
{
- const struct iphdr *iph = (struct iphdr *)(skb->data + iphdroff);
struct icmphdr *hdr;
- unsigned int hdroff = iphdroff + iph->ihl*4;
if (!skb_make_writable(skb, hdroff + sizeof(*hdr)))
return false;
@@ -72,12 +72,12 @@ icmp_manip_pkt(struct sk_buff *skb,
return true;
}
-const struct nf_nat_protocol nf_nat_protocol_icmp = {
- .protonum = IPPROTO_ICMP,
+const struct nf_nat_l4proto nf_nat_l4proto_icmp = {
+ .l4proto = IPPROTO_ICMP,
.manip_pkt = icmp_manip_pkt,
.in_range = icmp_in_range,
.unique_tuple = icmp_unique_tuple,
#if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE)
- .nlattr_to_range = nf_nat_proto_nlattr_to_range,
+ .nlattr_to_range = nf_nat_l4proto_nlattr_to_range,
#endif
};
diff --git a/net/ipv4/netfilter/nf_nat_rule.c b/net/ipv4/netfilter/nf_nat_rule.c
deleted file mode 100644
index d2a9dc314e0e..000000000000
--- a/net/ipv4/netfilter/nf_nat_rule.c
+++ /dev/null
@@ -1,214 +0,0 @@
-/* (C) 1999-2001 Paul `Rusty' Russell
- * (C) 2002-2006 Netfilter Core Team <coreteam@netfilter.org>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-
-/* Everything about the rules for NAT. */
-#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
-#include <linux/types.h>
-#include <linux/ip.h>
-#include <linux/netfilter.h>
-#include <linux/netfilter_ipv4.h>
-#include <linux/module.h>
-#include <linux/kmod.h>
-#include <linux/skbuff.h>
-#include <linux/proc_fs.h>
-#include <linux/slab.h>
-#include <net/checksum.h>
-#include <net/route.h>
-#include <linux/bitops.h>
-
-#include <linux/netfilter_ipv4/ip_tables.h>
-#include <net/netfilter/nf_nat.h>
-#include <net/netfilter/nf_nat_core.h>
-#include <net/netfilter/nf_nat_rule.h>
-
-#define NAT_VALID_HOOKS ((1 << NF_INET_PRE_ROUTING) | \
- (1 << NF_INET_POST_ROUTING) | \
- (1 << NF_INET_LOCAL_OUT) | \
- (1 << NF_INET_LOCAL_IN))
-
-static const struct xt_table nat_table = {
- .name = "nat",
- .valid_hooks = NAT_VALID_HOOKS,
- .me = THIS_MODULE,
- .af = NFPROTO_IPV4,
-};
-
-/* Source NAT */
-static unsigned int
-ipt_snat_target(struct sk_buff *skb, const struct xt_action_param *par)
-{
- struct nf_conn *ct;
- enum ip_conntrack_info ctinfo;
- const struct nf_nat_ipv4_multi_range_compat *mr = par->targinfo;
-
- NF_CT_ASSERT(par->hooknum == NF_INET_POST_ROUTING ||
- par->hooknum == NF_INET_LOCAL_IN);
-
- ct = nf_ct_get(skb, &ctinfo);
-
- /* Connection must be valid and new. */
- NF_CT_ASSERT(ct && (ctinfo == IP_CT_NEW || ctinfo == IP_CT_RELATED ||
- ctinfo == IP_CT_RELATED_REPLY));
- NF_CT_ASSERT(par->out != NULL);
-
- return nf_nat_setup_info(ct, &mr->range[0], NF_NAT_MANIP_SRC);
-}
-
-static unsigned int
-ipt_dnat_target(struct sk_buff *skb, const struct xt_action_param *par)
-{
- struct nf_conn *ct;
- enum ip_conntrack_info ctinfo;
- const struct nf_nat_ipv4_multi_range_compat *mr = par->targinfo;
-
- NF_CT_ASSERT(par->hooknum == NF_INET_PRE_ROUTING ||
- par->hooknum == NF_INET_LOCAL_OUT);
-
- ct = nf_ct_get(skb, &ctinfo);
-
- /* Connection must be valid and new. */
- NF_CT_ASSERT(ct && (ctinfo == IP_CT_NEW || ctinfo == IP_CT_RELATED));
-
- return nf_nat_setup_info(ct, &mr->range[0], NF_NAT_MANIP_DST);
-}
-
-static int ipt_snat_checkentry(const struct xt_tgchk_param *par)
-{
- const struct nf_nat_ipv4_multi_range_compat *mr = par->targinfo;
-
- /* Must be a valid range */
- if (mr->rangesize != 1) {
- pr_info("SNAT: multiple ranges no longer supported\n");
- return -EINVAL;
- }
- return 0;
-}
-
-static int ipt_dnat_checkentry(const struct xt_tgchk_param *par)
-{
- const struct nf_nat_ipv4_multi_range_compat *mr = par->targinfo;
-
- /* Must be a valid range */
- if (mr->rangesize != 1) {
- pr_info("DNAT: multiple ranges no longer supported\n");
- return -EINVAL;
- }
- return 0;
-}
-
-static unsigned int
-alloc_null_binding(struct nf_conn *ct, unsigned int hooknum)
-{
- /* Force range to this IP; let proto decide mapping for
- per-proto parts (hence not NF_NAT_RANGE_PROTO_SPECIFIED).
- */
- struct nf_nat_ipv4_range range;
-
- range.flags = 0;
- pr_debug("Allocating NULL binding for %p (%pI4)\n", ct,
- HOOK2MANIP(hooknum) == NF_NAT_MANIP_SRC ?
- &ct->tuplehash[IP_CT_DIR_REPLY].tuple.dst.u3.ip :
- &ct->tuplehash[IP_CT_DIR_REPLY].tuple.src.u3.ip);
-
- return nf_nat_setup_info(ct, &range, HOOK2MANIP(hooknum));
-}
-
-int nf_nat_rule_find(struct sk_buff *skb,
- unsigned int hooknum,
- const struct net_device *in,
- const struct net_device *out,
- struct nf_conn *ct)
-{
- struct net *net = nf_ct_net(ct);
- int ret;
-
- ret = ipt_do_table(skb, hooknum, in, out, net->ipv4.nat_table);
-
- if (ret == NF_ACCEPT) {
- if (!nf_nat_initialized(ct, HOOK2MANIP(hooknum)))
- /* NUL mapping */
- ret = alloc_null_binding(ct, hooknum);
- }
- return ret;
-}
-
-static struct xt_target ipt_snat_reg __read_mostly = {
- .name = "SNAT",
- .target = ipt_snat_target,
- .targetsize = sizeof(struct nf_nat_ipv4_multi_range_compat),
- .table = "nat",
- .hooks = (1 << NF_INET_POST_ROUTING) | (1 << NF_INET_LOCAL_IN),
- .checkentry = ipt_snat_checkentry,
- .family = AF_INET,
-};
-
-static struct xt_target ipt_dnat_reg __read_mostly = {
- .name = "DNAT",
- .target = ipt_dnat_target,
- .targetsize = sizeof(struct nf_nat_ipv4_multi_range_compat),
- .table = "nat",
- .hooks = (1 << NF_INET_PRE_ROUTING) | (1 << NF_INET_LOCAL_OUT),
- .checkentry = ipt_dnat_checkentry,
- .family = AF_INET,
-};
-
-static int __net_init nf_nat_rule_net_init(struct net *net)
-{
- struct ipt_replace *repl;
-
- repl = ipt_alloc_initial_table(&nat_table);
- if (repl == NULL)
- return -ENOMEM;
- net->ipv4.nat_table = ipt_register_table(net, &nat_table, repl);
- kfree(repl);
- if (IS_ERR(net->ipv4.nat_table))
- return PTR_ERR(net->ipv4.nat_table);
- return 0;
-}
-
-static void __net_exit nf_nat_rule_net_exit(struct net *net)
-{
- ipt_unregister_table(net, net->ipv4.nat_table);
-}
-
-static struct pernet_operations nf_nat_rule_net_ops = {
- .init = nf_nat_rule_net_init,
- .exit = nf_nat_rule_net_exit,
-};
-
-int __init nf_nat_rule_init(void)
-{
- int ret;
-
- ret = register_pernet_subsys(&nf_nat_rule_net_ops);
- if (ret != 0)
- goto out;
- ret = xt_register_target(&ipt_snat_reg);
- if (ret != 0)
- goto unregister_table;
-
- ret = xt_register_target(&ipt_dnat_reg);
- if (ret != 0)
- goto unregister_snat;
-
- return ret;
-
- unregister_snat:
- xt_unregister_target(&ipt_snat_reg);
- unregister_table:
- unregister_pernet_subsys(&nf_nat_rule_net_ops);
- out:
- return ret;
-}
-
-void nf_nat_rule_cleanup(void)
-{
- xt_unregister_target(&ipt_dnat_reg);
- xt_unregister_target(&ipt_snat_reg);
- unregister_pernet_subsys(&nf_nat_rule_net_ops);
-}
diff --git a/net/ipv4/ping.c b/net/ipv4/ping.c
index 6232d476f37e..8f3d05424a3e 100644
--- a/net/ipv4/ping.c
+++ b/net/ipv4/ping.c
@@ -185,10 +185,10 @@ exit:
return sk;
}
-static void inet_get_ping_group_range_net(struct net *net, gid_t *low,
- gid_t *high)
+static void inet_get_ping_group_range_net(struct net *net, kgid_t *low,
+ kgid_t *high)
{
- gid_t *data = net->ipv4.sysctl_ping_group_range;
+ kgid_t *data = net->ipv4.sysctl_ping_group_range;
unsigned int seq;
do {
@@ -203,19 +203,13 @@ static void inet_get_ping_group_range_net(struct net *net, gid_t *low,
static int ping_init_sock(struct sock *sk)
{
struct net *net = sock_net(sk);
- gid_t group = current_egid();
- gid_t range[2];
+ kgid_t group = current_egid();
struct group_info *group_info = get_current_groups();
int i, j, count = group_info->ngroups;
kgid_t low, high;
- inet_get_ping_group_range_net(net, range, range+1);
- low = make_kgid(&init_user_ns, range[0]);
- high = make_kgid(&init_user_ns, range[1]);
- if (!gid_valid(low) || !gid_valid(high) || gid_lt(high, low))
- return -EACCES;
-
- if (range[0] <= group && group <= range[1])
+ inet_get_ping_group_range_net(net, &low, &high);
+ if (gid_lte(low, group) && gid_lte(group, high))
return 0;
for (i = 0; i < group_info->nblocks; i++) {
@@ -845,7 +839,9 @@ static void ping_format_sock(struct sock *sp, struct seq_file *f,
bucket, src, srcp, dest, destp, sp->sk_state,
sk_wmem_alloc_get(sp),
sk_rmem_alloc_get(sp),
- 0, 0L, 0, sock_i_uid(sp), 0, sock_i_ino(sp),
+ 0, 0L, 0,
+ from_kuid_munged(seq_user_ns(f), sock_i_uid(sp)),
+ 0, sock_i_ino(sp),
atomic_read(&sp->sk_refcnt), sp,
atomic_read(&sp->sk_drops), len);
}
diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c
index 957acd12250b..8de53e1ddd54 100644
--- a/net/ipv4/proc.c
+++ b/net/ipv4/proc.c
@@ -263,6 +263,10 @@ static const struct snmp_mib snmp4_net_list[] = {
SNMP_MIB_ITEM("TCPChallengeACK", LINUX_MIB_TCPCHALLENGEACK),
SNMP_MIB_ITEM("TCPSYNChallenge", LINUX_MIB_TCPSYNCHALLENGE),
SNMP_MIB_ITEM("TCPFastOpenActive", LINUX_MIB_TCPFASTOPENACTIVE),
+ SNMP_MIB_ITEM("TCPFastOpenPassive", LINUX_MIB_TCPFASTOPENPASSIVE),
+ SNMP_MIB_ITEM("TCPFastOpenPassiveFail", LINUX_MIB_TCPFASTOPENPASSIVEFAIL),
+ SNMP_MIB_ITEM("TCPFastOpenListenOverflow", LINUX_MIB_TCPFASTOPENLISTENOVERFLOW),
+ SNMP_MIB_ITEM("TCPFastOpenCookieReqd", LINUX_MIB_TCPFASTOPENCOOKIEREQD),
SNMP_MIB_SENTINEL
};
diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c
index ff0f071969ea..73d1e4df4bf6 100644
--- a/net/ipv4/raw.c
+++ b/net/ipv4/raw.c
@@ -131,18 +131,20 @@ found:
* 0 - deliver
* 1 - block
*/
-static __inline__ int icmp_filter(struct sock *sk, struct sk_buff *skb)
+static int icmp_filter(const struct sock *sk, const struct sk_buff *skb)
{
- int type;
+ struct icmphdr _hdr;
+ const struct icmphdr *hdr;
- if (!pskb_may_pull(skb, sizeof(struct icmphdr)))
+ hdr = skb_header_pointer(skb, skb_transport_offset(skb),
+ sizeof(_hdr), &_hdr);
+ if (!hdr)
return 1;
- type = icmp_hdr(skb)->type;
- if (type < 32) {
+ if (hdr->type < 32) {
__u32 data = raw_sk(sk)->filter.data;
- return ((1 << type) & data) != 0;
+ return ((1U << hdr->type) & data) != 0;
}
/* Do not block unknown ICMP types */
@@ -992,7 +994,9 @@ static void raw_sock_seq_show(struct seq_file *seq, struct sock *sp, int i)
i, src, srcp, dest, destp, sp->sk_state,
sk_wmem_alloc_get(sp),
sk_rmem_alloc_get(sp),
- 0, 0L, 0, sock_i_uid(sp), 0, sock_i_ino(sp),
+ 0, 0L, 0,
+ from_kuid_munged(seq_user_ns(seq), sock_i_uid(sp)),
+ 0, sock_i_ino(sp),
atomic_read(&sp->sk_refcnt), sp, atomic_read(&sp->sk_drops));
}
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index 3f7bb7185c50..ff622069fcef 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -70,7 +70,6 @@
#include <linux/types.h>
#include <linux/kernel.h>
#include <linux/mm.h>
-#include <linux/bootmem.h>
#include <linux/string.h>
#include <linux/socket.h>
#include <linux/sockios.h>
@@ -80,7 +79,6 @@
#include <linux/netdevice.h>
#include <linux/proc_fs.h>
#include <linux/init.h>
-#include <linux/workqueue.h>
#include <linux/skbuff.h>
#include <linux/inetdevice.h>
#include <linux/igmp.h>
@@ -88,11 +86,9 @@
#include <linux/mroute.h>
#include <linux/netfilter_ipv4.h>
#include <linux/random.h>
-#include <linux/jhash.h>
#include <linux/rcupdate.h>
#include <linux/times.h>
#include <linux/slab.h>
-#include <linux/prefetch.h>
#include <net/dst.h>
#include <net/net_namespace.h>
#include <net/protocol.h>
@@ -147,6 +143,7 @@ static void ip_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
struct sk_buff *skb, u32 mtu);
static void ip_do_redirect(struct dst_entry *dst, struct sock *sk,
struct sk_buff *skb);
+static void ipv4_dst_destroy(struct dst_entry *dst);
static void ipv4_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
int how)
@@ -170,6 +167,7 @@ static struct dst_ops ipv4_dst_ops = {
.default_advmss = ipv4_default_advmss,
.mtu = ipv4_mtu,
.cow_metrics = ipv4_cow_metrics,
+ .destroy = ipv4_dst_destroy,
.ifdown = ipv4_dst_ifdown,
.negative_advice = ipv4_negative_advice,
.link_failure = ipv4_link_failure,
@@ -204,11 +202,6 @@ EXPORT_SYMBOL(ip_tos2prio);
static DEFINE_PER_CPU(struct rt_cache_stat, rt_cache_stat);
#define RT_CACHE_STAT_INC(field) __this_cpu_inc(rt_cache_stat.field)
-static inline int rt_genid(struct net *net)
-{
- return atomic_read(&net->ipv4.rt_genid);
-}
-
#ifdef CONFIG_PROC_FS
static void *rt_cache_seq_start(struct seq_file *seq, loff_t *pos)
{
@@ -449,27 +442,9 @@ static inline bool rt_is_expired(const struct rtable *rth)
return rth->rt_genid != rt_genid(dev_net(rth->dst.dev));
}
-/*
- * Perturbation of rt_genid by a small quantity [1..256]
- * Using 8 bits of shuffling ensure we can call rt_cache_invalidate()
- * many times (2^24) without giving recent rt_genid.
- * Jenkins hash is strong enough that litle changes of rt_genid are OK.
- */
-static void rt_cache_invalidate(struct net *net)
-{
- unsigned char shuffle;
-
- get_random_bytes(&shuffle, sizeof(shuffle));
- atomic_add(shuffle + 1U, &net->ipv4.rt_genid);
-}
-
-/*
- * delay < 0 : invalidate cache (fast : entries will be deleted later)
- * delay >= 0 : invalidate & flush cache (can be long)
- */
-void rt_cache_flush(struct net *net, int delay)
+void rt_cache_flush(struct net *net)
{
- rt_cache_invalidate(net);
+ rt_genid_bump(net);
}
static struct neighbour *ipv4_neigh_lookup(const struct dst_entry *dst,
@@ -587,11 +562,17 @@ static void ip_rt_build_flow_key(struct flowi4 *fl4, const struct sock *sk,
build_sk_flow_key(fl4, sk);
}
-static DEFINE_SEQLOCK(fnhe_seqlock);
+static inline void rt_free(struct rtable *rt)
+{
+ call_rcu(&rt->dst.rcu_head, dst_rcu_free);
+}
+
+static DEFINE_SPINLOCK(fnhe_lock);
static struct fib_nh_exception *fnhe_oldest(struct fnhe_hash_bucket *hash)
{
struct fib_nh_exception *fnhe, *oldest;
+ struct rtable *orig;
oldest = rcu_dereference(hash->chain);
for (fnhe = rcu_dereference(oldest->fnhe_next); fnhe;
@@ -599,6 +580,11 @@ static struct fib_nh_exception *fnhe_oldest(struct fnhe_hash_bucket *hash)
if (time_before(fnhe->fnhe_stamp, oldest->fnhe_stamp))
oldest = fnhe;
}
+ orig = rcu_dereference(oldest->fnhe_rth);
+ if (orig) {
+ RCU_INIT_POINTER(oldest->fnhe_rth, NULL);
+ rt_free(orig);
+ }
return oldest;
}
@@ -620,7 +606,7 @@ static void update_or_create_fnhe(struct fib_nh *nh, __be32 daddr, __be32 gw,
int depth;
u32 hval = fnhe_hashfun(daddr);
- write_seqlock_bh(&fnhe_seqlock);
+ spin_lock_bh(&fnhe_lock);
hash = nh->nh_exceptions;
if (!hash) {
@@ -667,7 +653,7 @@ static void update_or_create_fnhe(struct fib_nh *nh, __be32 daddr, __be32 gw,
fnhe->fnhe_stamp = jiffies;
out_unlock:
- write_sequnlock_bh(&fnhe_seqlock);
+ spin_unlock_bh(&fnhe_lock);
return;
}
@@ -925,12 +911,14 @@ static u32 __ip_rt_update_pmtu(struct rtable *rt, struct flowi4 *fl4, u32 mtu)
if (mtu < ip_rt_min_pmtu)
mtu = ip_rt_min_pmtu;
+ rcu_read_lock();
if (fib_lookup(dev_net(rt->dst.dev), fl4, &res) == 0) {
struct fib_nh *nh = &FIB_RES_NH(res);
update_or_create_fnhe(nh, fl4->daddr, 0, mtu,
jiffies + ip_rt_mtu_expires);
}
+ rcu_read_unlock();
return mtu;
}
@@ -947,7 +935,7 @@ static void ip_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
dst->obsolete = DST_OBSOLETE_KILL;
} else {
rt->rt_pmtu = mtu;
- dst_set_expires(&rt->dst, ip_rt_mtu_expires);
+ rt->dst.expires = max(1UL, jiffies + ip_rt_mtu_expires);
}
}
@@ -1123,10 +1111,7 @@ static unsigned int ipv4_mtu(const struct dst_entry *dst)
const struct rtable *rt = (const struct rtable *) dst;
unsigned int mtu = rt->rt_pmtu;
- if (mtu && time_after_eq(jiffies, rt->dst.expires))
- mtu = 0;
-
- if (!mtu)
+ if (!mtu || time_after_eq(jiffies, rt->dst.expires))
mtu = dst_metric_raw(dst, RTAX_MTU);
if (mtu && rt_is_output_route(rt))
@@ -1164,61 +1149,118 @@ static struct fib_nh_exception *find_exception(struct fib_nh *nh, __be32 daddr)
return NULL;
}
-static void rt_bind_exception(struct rtable *rt, struct fib_nh_exception *fnhe,
+static bool rt_bind_exception(struct rtable *rt, struct fib_nh_exception *fnhe,
__be32 daddr)
{
- __be32 fnhe_daddr, gw;
- unsigned long expires;
- unsigned int seq;
- u32 pmtu;
-
-restart:
- seq = read_seqbegin(&fnhe_seqlock);
- fnhe_daddr = fnhe->fnhe_daddr;
- gw = fnhe->fnhe_gw;
- pmtu = fnhe->fnhe_pmtu;
- expires = fnhe->fnhe_expires;
- if (read_seqretry(&fnhe_seqlock, seq))
- goto restart;
-
- if (daddr != fnhe_daddr)
- return;
+ bool ret = false;
+
+ spin_lock_bh(&fnhe_lock);
- if (pmtu) {
- unsigned long diff = expires - jiffies;
+ if (daddr == fnhe->fnhe_daddr) {
+ struct rtable *orig;
- if (time_before(jiffies, expires)) {
- rt->rt_pmtu = pmtu;
- dst_set_expires(&rt->dst, diff);
+ if (fnhe->fnhe_pmtu) {
+ unsigned long expires = fnhe->fnhe_expires;
+ unsigned long diff = expires - jiffies;
+
+ if (time_before(jiffies, expires)) {
+ rt->rt_pmtu = fnhe->fnhe_pmtu;
+ dst_set_expires(&rt->dst, diff);
+ }
}
+ if (fnhe->fnhe_gw) {
+ rt->rt_flags |= RTCF_REDIRECTED;
+ rt->rt_gateway = fnhe->fnhe_gw;
+ }
+
+ orig = rcu_dereference(fnhe->fnhe_rth);
+ rcu_assign_pointer(fnhe->fnhe_rth, rt);
+ if (orig)
+ rt_free(orig);
+
+ fnhe->fnhe_stamp = jiffies;
+ ret = true;
+ } else {
+ /* Routes we intend to cache in nexthop exception have
+ * the DST_NOCACHE bit clear. However, if we are
+ * unsuccessful at storing this route into the cache
+ * we really need to set it.
+ */
+ rt->dst.flags |= DST_NOCACHE;
}
- if (gw) {
- rt->rt_flags |= RTCF_REDIRECTED;
- rt->rt_gateway = gw;
- }
- fnhe->fnhe_stamp = jiffies;
-}
+ spin_unlock_bh(&fnhe_lock);
-static inline void rt_release_rcu(struct rcu_head *head)
-{
- struct dst_entry *dst = container_of(head, struct dst_entry, rcu_head);
- dst_release(dst);
+ return ret;
}
-static void rt_cache_route(struct fib_nh *nh, struct rtable *rt)
+static bool rt_cache_route(struct fib_nh *nh, struct rtable *rt)
{
- struct rtable *orig, *prev, **p = &nh->nh_rth_output;
-
- if (rt_is_input_route(rt))
- p = &nh->nh_rth_input;
+ struct rtable *orig, *prev, **p;
+ bool ret = true;
+ if (rt_is_input_route(rt)) {
+ p = (struct rtable **)&nh->nh_rth_input;
+ } else {
+ if (!nh->nh_pcpu_rth_output)
+ goto nocache;
+ p = (struct rtable **)__this_cpu_ptr(nh->nh_pcpu_rth_output);
+ }
orig = *p;
prev = cmpxchg(p, orig, rt);
if (prev == orig) {
- dst_clone(&rt->dst);
if (orig)
- call_rcu_bh(&orig->dst.rcu_head, rt_release_rcu);
+ rt_free(orig);
+ } else {
+ /* Routes we intend to cache in the FIB nexthop have
+ * the DST_NOCACHE bit clear. However, if we are
+ * unsuccessful at storing this route into the cache
+ * we really need to set it.
+ */
+nocache:
+ rt->dst.flags |= DST_NOCACHE;
+ ret = false;
+ }
+
+ return ret;
+}
+
+static DEFINE_SPINLOCK(rt_uncached_lock);
+static LIST_HEAD(rt_uncached_list);
+
+static void rt_add_uncached_list(struct rtable *rt)
+{
+ spin_lock_bh(&rt_uncached_lock);
+ list_add_tail(&rt->rt_uncached, &rt_uncached_list);
+ spin_unlock_bh(&rt_uncached_lock);
+}
+
+static void ipv4_dst_destroy(struct dst_entry *dst)
+{
+ struct rtable *rt = (struct rtable *) dst;
+
+ if (!list_empty(&rt->rt_uncached)) {
+ spin_lock_bh(&rt_uncached_lock);
+ list_del(&rt->rt_uncached);
+ spin_unlock_bh(&rt_uncached_lock);
+ }
+}
+
+void rt_flush_dev(struct net_device *dev)
+{
+ if (!list_empty(&rt_uncached_list)) {
+ struct net *net = dev_net(dev);
+ struct rtable *rt;
+
+ spin_lock_bh(&rt_uncached_lock);
+ list_for_each_entry(rt, &rt_uncached_list, rt_uncached) {
+ if (rt->dst.dev != dev)
+ continue;
+ rt->dst.dev = net->loopback_dev;
+ dev_hold(rt->dst.dev);
+ dev_put(dev);
+ }
+ spin_unlock_bh(&rt_uncached_lock);
}
}
@@ -1234,20 +1276,24 @@ static void rt_set_nexthop(struct rtable *rt, __be32 daddr,
struct fib_nh_exception *fnhe,
struct fib_info *fi, u16 type, u32 itag)
{
+ bool cached = false;
+
if (fi) {
struct fib_nh *nh = &FIB_RES_NH(*res);
if (nh->nh_gw && nh->nh_scope == RT_SCOPE_LINK)
rt->rt_gateway = nh->nh_gw;
- if (unlikely(fnhe))
- rt_bind_exception(rt, fnhe, daddr);
dst_init_metrics(&rt->dst, fi->fib_metrics, true);
#ifdef CONFIG_IP_ROUTE_CLASSID
rt->dst.tclassid = nh->nh_tclassid;
#endif
- if (!(rt->dst.flags & DST_HOST))
- rt_cache_route(nh, rt);
+ if (unlikely(fnhe))
+ cached = rt_bind_exception(rt, fnhe, daddr);
+ else if (!(rt->dst.flags & DST_NOCACHE))
+ cached = rt_cache_route(nh, rt);
}
+ if (unlikely(!cached))
+ rt_add_uncached_list(rt);
#ifdef CONFIG_IP_ROUTE_CLASSID
#ifdef CONFIG_IP_MULTIPLE_TABLES
@@ -1261,7 +1307,7 @@ static struct rtable *rt_dst_alloc(struct net_device *dev,
bool nopolicy, bool noxfrm, bool will_cache)
{
return dst_alloc(&ipv4_dst_ops, dev, 1, DST_OBSOLETE_FORCE_CHK,
- (will_cache ? 0 : DST_HOST) | DST_NOCACHE |
+ (will_cache ? 0 : (DST_HOST | DST_NOCACHE)) |
(nopolicy ? DST_NOPOLICY : 0) |
(noxfrm ? DST_NOXFRM : 0));
}
@@ -1314,6 +1360,7 @@ static int ip_route_input_mc(struct sk_buff *skb, __be32 daddr, __be32 saddr,
rth->rt_iif = 0;
rth->rt_pmtu = 0;
rth->rt_gateway = 0;
+ INIT_LIST_HEAD(&rth->rt_uncached);
if (our) {
rth->dst.input= ip_local_deliver;
rth->rt_flags |= RTCF_LOCAL;
@@ -1366,8 +1413,7 @@ static void ip_handle_martian_source(struct net_device *dev,
static int __mkroute_input(struct sk_buff *skb,
const struct fib_result *res,
struct in_device *in_dev,
- __be32 daddr, __be32 saddr, u32 tos,
- struct rtable **result)
+ __be32 daddr, __be32 saddr, u32 tos)
{
struct rtable *rth;
int err;
@@ -1416,9 +1462,9 @@ static int __mkroute_input(struct sk_buff *skb,
do_cache = false;
if (res->fi) {
if (!itag) {
- rth = FIB_RES_NH(*res).nh_rth_input;
+ rth = rcu_dereference(FIB_RES_NH(*res).nh_rth_input);
if (rt_cache_valid(rth)) {
- dst_hold(&rth->dst);
+ skb_dst_set_noref(skb, &rth->dst);
goto out;
}
do_cache = true;
@@ -1440,13 +1486,14 @@ static int __mkroute_input(struct sk_buff *skb,
rth->rt_iif = 0;
rth->rt_pmtu = 0;
rth->rt_gateway = 0;
+ INIT_LIST_HEAD(&rth->rt_uncached);
rth->dst.input = ip_forward;
rth->dst.output = ip_output;
rt_set_nexthop(rth, daddr, res, NULL, res->fi, res->type, itag);
+ skb_dst_set(skb, &rth->dst);
out:
- *result = rth;
err = 0;
cleanup:
return err;
@@ -1458,21 +1505,13 @@ static int ip_mkroute_input(struct sk_buff *skb,
struct in_device *in_dev,
__be32 daddr, __be32 saddr, u32 tos)
{
- struct rtable *rth = NULL;
- int err;
-
#ifdef CONFIG_IP_ROUTE_MULTIPATH
if (res->fi && res->fi->fib_nhs > 1)
fib_select_multipath(res);
#endif
/* create a routing cache entry */
- err = __mkroute_input(skb, res, in_dev, daddr, saddr, tos, &rth);
- if (err)
- return err;
-
- skb_dst_set(skb, &rth->dst);
- return 0;
+ return __mkroute_input(skb, res, in_dev, daddr, saddr, tos);
}
/*
@@ -1524,11 +1563,14 @@ static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr,
if (ipv4_is_zeronet(daddr))
goto martian_destination;
- if (likely(!IN_DEV_ROUTE_LOCALNET(in_dev))) {
- if (ipv4_is_loopback(daddr))
+ /* Following code try to avoid calling IN_DEV_NET_ROUTE_LOCALNET(),
+ * and call it once if daddr or/and saddr are loopback addresses
+ */
+ if (ipv4_is_loopback(daddr)) {
+ if (!IN_DEV_NET_ROUTE_LOCALNET(in_dev, net))
goto martian_destination;
-
- if (ipv4_is_loopback(saddr))
+ } else if (ipv4_is_loopback(saddr)) {
+ if (!IN_DEV_NET_ROUTE_LOCALNET(in_dev, net))
goto martian_source;
}
@@ -1553,7 +1595,7 @@ static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr,
if (res.type == RTN_LOCAL) {
err = fib_validate_source(skb, saddr, daddr, tos,
- net->loopback_dev->ifindex,
+ LOOPBACK_IFINDEX,
dev, in_dev, &itag);
if (err < 0)
goto martian_source_keep_err;
@@ -1586,10 +1628,11 @@ local_input:
do_cache = false;
if (res.fi) {
if (!itag) {
- rth = FIB_RES_NH(res).nh_rth_input;
+ rth = rcu_dereference(FIB_RES_NH(res).nh_rth_input);
if (rt_cache_valid(rth)) {
- dst_hold(&rth->dst);
- goto set_and_out;
+ skb_dst_set_noref(skb, &rth->dst);
+ err = 0;
+ goto out;
}
do_cache = true;
}
@@ -1613,6 +1656,7 @@ local_input:
rth->rt_iif = 0;
rth->rt_pmtu = 0;
rth->rt_gateway = 0;
+ INIT_LIST_HEAD(&rth->rt_uncached);
if (res.type == RTN_UNREACHABLE) {
rth->dst.input= ip_error;
rth->dst.error= -err;
@@ -1620,7 +1664,6 @@ local_input:
}
if (do_cache)
rt_cache_route(&FIB_RES_NH(res), rth);
-set_and_out:
skb_dst_set(skb, &rth->dst);
err = 0;
goto out;
@@ -1658,8 +1701,8 @@ martian_source_keep_err:
goto out;
}
-int ip_route_input(struct sk_buff *skb, __be32 daddr, __be32 saddr,
- u8 tos, struct net_device *dev)
+int ip_route_input_noref(struct sk_buff *skb, __be32 daddr, __be32 saddr,
+ u8 tos, struct net_device *dev)
{
int res;
@@ -1702,7 +1745,7 @@ int ip_route_input(struct sk_buff *skb, __be32 daddr, __be32 saddr,
rcu_read_unlock();
return res;
}
-EXPORT_SYMBOL(ip_route_input);
+EXPORT_SYMBOL(ip_route_input_noref);
/* called with rcu_read_lock() */
static struct rtable *__mkroute_output(const struct fib_result *res,
@@ -1752,19 +1795,23 @@ static struct rtable *__mkroute_output(const struct fib_result *res,
fnhe = NULL;
if (fi) {
+ struct rtable __rcu **prth;
+
fnhe = find_exception(&FIB_RES_NH(*res), fl4->daddr);
- if (!fnhe) {
- rth = FIB_RES_NH(*res).nh_rth_output;
- if (rt_cache_valid(rth)) {
- dst_hold(&rth->dst);
- return rth;
- }
+ if (fnhe)
+ prth = &fnhe->fnhe_rth;
+ else
+ prth = __this_cpu_ptr(FIB_RES_NH(*res).nh_pcpu_rth_output);
+ rth = rcu_dereference(*prth);
+ if (rt_cache_valid(rth)) {
+ dst_hold(&rth->dst);
+ return rth;
}
}
rth = rt_dst_alloc(dev_out,
IN_DEV_CONF_GET(in_dev, NOPOLICY),
IN_DEV_CONF_GET(in_dev, NOXFRM),
- fi && !fnhe);
+ fi);
if (!rth)
return ERR_PTR(-ENOBUFS);
@@ -1777,6 +1824,7 @@ static struct rtable *__mkroute_output(const struct fib_result *res,
rth->rt_iif = orig_oif ? : 0;
rth->rt_pmtu = 0;
rth->rt_gateway = 0;
+ INIT_LIST_HEAD(&rth->rt_uncached);
RT_CACHE_STAT_INC(out_slow_tot);
@@ -1823,7 +1871,7 @@ struct rtable *__ip_route_output_key(struct net *net, struct flowi4 *fl4)
orig_oif = fl4->flowi4_oif;
- fl4->flowi4_iif = net->loopback_dev->ifindex;
+ fl4->flowi4_iif = LOOPBACK_IFINDEX;
fl4->flowi4_tos = tos & IPTOS_RT_MASK;
fl4->flowi4_scope = ((tos & RTO_ONLINK) ?
RT_SCOPE_LINK : RT_SCOPE_UNIVERSE);
@@ -1912,7 +1960,7 @@ struct rtable *__ip_route_output_key(struct net *net, struct flowi4 *fl4)
if (!fl4->daddr)
fl4->daddr = fl4->saddr = htonl(INADDR_LOOPBACK);
dev_out = net->loopback_dev;
- fl4->flowi4_oif = net->loopback_dev->ifindex;
+ fl4->flowi4_oif = LOOPBACK_IFINDEX;
res.type = RTN_LOCAL;
flags |= RTCF_LOCAL;
goto make_route;
@@ -1959,7 +2007,6 @@ struct rtable *__ip_route_output_key(struct net *net, struct flowi4 *fl4)
}
dev_out = net->loopback_dev;
fl4->flowi4_oif = dev_out->ifindex;
- res.fi = NULL;
flags |= RTCF_LOCAL;
goto make_route;
}
@@ -2056,6 +2103,8 @@ struct dst_entry *ipv4_blackhole_route(struct net *net, struct dst_entry *dst_or
rt->rt_type = ort->rt_type;
rt->rt_gateway = ort->rt_gateway;
+ INIT_LIST_HEAD(&rt->rt_uncached);
+
dst_free(new);
}
@@ -2082,7 +2131,7 @@ struct rtable *ip_route_output_flow(struct net *net, struct flowi4 *flp4,
EXPORT_SYMBOL_GPL(ip_route_output_flow);
static int rt_fill_info(struct net *net, __be32 dst, __be32 src,
- struct flowi4 *fl4, struct sk_buff *skb, u32 pid,
+ struct flowi4 *fl4, struct sk_buff *skb, u32 portid,
u32 seq, int event, int nowait, unsigned int flags)
{
struct rtable *rt = skb_rtable(skb);
@@ -2092,7 +2141,7 @@ static int rt_fill_info(struct net *net, __be32 dst, __be32 src,
u32 error;
u32 metrics[RTAX_MAX];
- nlh = nlmsg_put(skb, pid, seq, event, sizeof(*r), flags);
+ nlh = nlmsg_put(skb, portid, seq, event, sizeof(*r), flags);
if (nlh == NULL)
return -EMSGSIZE;
@@ -2252,12 +2301,12 @@ static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh, void
rt->rt_flags |= RTCF_NOTIFY;
err = rt_fill_info(net, dst, src, &fl4, skb,
- NETLINK_CB(in_skb).pid, nlh->nlmsg_seq,
+ NETLINK_CB(in_skb).portid, nlh->nlmsg_seq,
RTM_NEWROUTE, 0, 0);
if (err <= 0)
goto errout_free;
- err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).pid);
+ err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid);
errout:
return err;
@@ -2273,7 +2322,7 @@ int ip_rt_dump(struct sk_buff *skb, struct netlink_callback *cb)
void ip_rt_multicast_event(struct in_device *in_dev)
{
- rt_cache_flush(dev_net(in_dev->dev), 0);
+ rt_cache_flush(dev_net(in_dev->dev));
}
#ifdef CONFIG_SYSCTL
@@ -2282,16 +2331,7 @@ static int ipv4_sysctl_rtcache_flush(ctl_table *__ctl, int write,
size_t *lenp, loff_t *ppos)
{
if (write) {
- int flush_delay;
- ctl_table ctl;
- struct net *net;
-
- memcpy(&ctl, __ctl, sizeof(ctl));
- ctl.data = &flush_delay;
- proc_dointvec(&ctl, write, buffer, lenp, ppos);
-
- net = (struct net *)__ctl->extra1;
- rt_cache_flush(net, flush_delay);
+ rt_cache_flush((struct net *)__ctl->extra1);
return 0;
}
@@ -2461,8 +2501,7 @@ static __net_initdata struct pernet_operations sysctl_route_ops = {
static __net_init int rt_genid_init(struct net *net)
{
- get_random_bytes(&net->ipv4.rt_genid,
- sizeof(net->ipv4.rt_genid));
+ atomic_set(&net->rt_genid, 0);
get_random_bytes(&net->ipv4.dev_addr_genid,
sizeof(net->ipv4.dev_addr_genid));
return 0;
diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c
index 650e1528e1e6..ba48e799b031 100644
--- a/net/ipv4/syncookies.c
+++ b/net/ipv4/syncookies.c
@@ -319,6 +319,7 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb,
ireq->tstamp_ok = tcp_opt.saw_tstamp;
req->ts_recent = tcp_opt.saw_tstamp ? tcp_opt.rcv_tsval : 0;
treq->snt_synack = tcp_opt.saw_tstamp ? tcp_opt.rcv_tsecr : 0;
+ treq->listener = NULL;
/* We throwed the options of the initial SYN away, so we hope
* the ACK carries the same options again (see RFC1122 4.2.3.8)
diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c
index 5840c3255721..9205e492dc9d 100644
--- a/net/ipv4/sysctl_net_ipv4.c
+++ b/net/ipv4/sysctl_net_ipv4.c
@@ -76,9 +76,9 @@ static int ipv4_local_port_range(ctl_table *table, int write,
}
-static void inet_get_ping_group_range_table(struct ctl_table *table, gid_t *low, gid_t *high)
+static void inet_get_ping_group_range_table(struct ctl_table *table, kgid_t *low, kgid_t *high)
{
- gid_t *data = table->data;
+ kgid_t *data = table->data;
unsigned int seq;
do {
seq = read_seqbegin(&sysctl_local_ports.lock);
@@ -89,12 +89,12 @@ static void inet_get_ping_group_range_table(struct ctl_table *table, gid_t *low,
}
/* Update system visible IP port range */
-static void set_ping_group_range(struct ctl_table *table, gid_t range[2])
+static void set_ping_group_range(struct ctl_table *table, kgid_t low, kgid_t high)
{
- gid_t *data = table->data;
+ kgid_t *data = table->data;
write_seqlock(&sysctl_local_ports.lock);
- data[0] = range[0];
- data[1] = range[1];
+ data[0] = low;
+ data[1] = high;
write_sequnlock(&sysctl_local_ports.lock);
}
@@ -103,21 +103,33 @@ static int ipv4_ping_group_range(ctl_table *table, int write,
void __user *buffer,
size_t *lenp, loff_t *ppos)
{
+ struct user_namespace *user_ns = current_user_ns();
int ret;
- gid_t range[2];
+ gid_t urange[2];
+ kgid_t low, high;
ctl_table tmp = {
- .data = &range,
- .maxlen = sizeof(range),
+ .data = &urange,
+ .maxlen = sizeof(urange),
.mode = table->mode,
.extra1 = &ip_ping_group_range_min,
.extra2 = &ip_ping_group_range_max,
};
- inet_get_ping_group_range_table(table, range, range + 1);
+ inet_get_ping_group_range_table(table, &low, &high);
+ urange[0] = from_kgid_munged(user_ns, low);
+ urange[1] = from_kgid_munged(user_ns, high);
ret = proc_dointvec_minmax(&tmp, write, buffer, lenp, ppos);
- if (write && ret == 0)
- set_ping_group_range(table, range);
+ if (write && ret == 0) {
+ low = make_kgid(user_ns, urange[0]);
+ high = make_kgid(user_ns, urange[1]);
+ if (!gid_valid(low) || !gid_valid(high) ||
+ (urange[1] < urange[0]) || gid_lt(high, low)) {
+ low = make_kgid(&init_user_ns, 1);
+ high = make_kgid(&init_user_ns, 0);
+ }
+ set_ping_group_range(table, low, high);
+ }
return ret;
}
@@ -184,7 +196,7 @@ static int ipv4_tcp_mem(ctl_table *ctl, int write,
int ret;
unsigned long vec[3];
struct net *net = current->nsproxy->net_ns;
-#ifdef CONFIG_CGROUP_MEM_RES_CTLR_KMEM
+#ifdef CONFIG_MEMCG_KMEM
struct mem_cgroup *memcg;
#endif
@@ -203,7 +215,7 @@ static int ipv4_tcp_mem(ctl_table *ctl, int write,
if (ret)
return ret;
-#ifdef CONFIG_CGROUP_MEM_RES_CTLR_KMEM
+#ifdef CONFIG_MEMCG_KMEM
rcu_read_lock();
memcg = mem_cgroup_from_task(current);
@@ -220,6 +232,45 @@ static int ipv4_tcp_mem(ctl_table *ctl, int write,
return 0;
}
+int proc_tcp_fastopen_key(ctl_table *ctl, int write, void __user *buffer,
+ size_t *lenp, loff_t *ppos)
+{
+ ctl_table tbl = { .maxlen = (TCP_FASTOPEN_KEY_LENGTH * 2 + 10) };
+ struct tcp_fastopen_context *ctxt;
+ int ret;
+ u32 user_key[4]; /* 16 bytes, matching TCP_FASTOPEN_KEY_LENGTH */
+
+ tbl.data = kmalloc(tbl.maxlen, GFP_KERNEL);
+ if (!tbl.data)
+ return -ENOMEM;
+
+ rcu_read_lock();
+ ctxt = rcu_dereference(tcp_fastopen_ctx);
+ if (ctxt)
+ memcpy(user_key, ctxt->key, TCP_FASTOPEN_KEY_LENGTH);
+ rcu_read_unlock();
+
+ snprintf(tbl.data, tbl.maxlen, "%08x-%08x-%08x-%08x",
+ user_key[0], user_key[1], user_key[2], user_key[3]);
+ ret = proc_dostring(&tbl, write, buffer, lenp, ppos);
+
+ if (write && ret == 0) {
+ if (sscanf(tbl.data, "%x-%x-%x-%x", user_key, user_key + 1,
+ user_key + 2, user_key + 3) != 4) {
+ ret = -EINVAL;
+ goto bad_key;
+ }
+ tcp_fastopen_reset_cipher(user_key, TCP_FASTOPEN_KEY_LENGTH);
+ }
+
+bad_key:
+ pr_debug("proc FO key set 0x%x-%x-%x-%x <- 0x%s: %u\n",
+ user_key[0], user_key[1], user_key[2], user_key[3],
+ (char *)tbl.data, ret);
+ kfree(tbl.data);
+ return ret;
+}
+
static struct ctl_table ipv4_table[] = {
{
.procname = "tcp_timestamps",
@@ -374,6 +425,12 @@ static struct ctl_table ipv4_table[] = {
.proc_handler = proc_dointvec,
},
{
+ .procname = "tcp_fastopen_key",
+ .mode = 0600,
+ .maxlen = ((TCP_FASTOPEN_KEY_LENGTH * 2) + 10),
+ .proc_handler = proc_tcp_fastopen_key,
+ },
+ {
.procname = "tcp_tw_recycle",
.data = &tcp_death_row.sysctl_tw_recycle,
.maxlen = sizeof(int),
@@ -784,16 +841,9 @@ static struct ctl_table ipv4_net_table[] = {
.proc_handler = proc_dointvec
},
{
- .procname = "rt_cache_rebuild_count",
- .data = &init_net.ipv4.sysctl_rt_cache_rebuild_count,
- .maxlen = sizeof(int),
- .mode = 0644,
- .proc_handler = proc_dointvec
- },
- {
.procname = "ping_group_range",
.data = &init_net.ipv4.sysctl_ping_group_range,
- .maxlen = sizeof(init_net.ipv4.sysctl_ping_group_range),
+ .maxlen = sizeof(gid_t)*2,
.mode = 0644,
.proc_handler = ipv4_ping_group_range,
},
@@ -829,8 +879,6 @@ static __net_init int ipv4_sysctl_init_net(struct net *net)
table[5].data =
&net->ipv4.sysctl_icmp_ratemask;
table[6].data =
- &net->ipv4.sysctl_rt_cache_rebuild_count;
- table[7].data =
&net->ipv4.sysctl_ping_group_range;
}
@@ -839,10 +887,8 @@ static __net_init int ipv4_sysctl_init_net(struct net *net)
* Sane defaults - nobody may create ping sockets.
* Boot scripts should set this to distro-specific group.
*/
- net->ipv4.sysctl_ping_group_range[0] = 1;
- net->ipv4.sysctl_ping_group_range[1] = 0;
-
- net->ipv4.sysctl_rt_cache_rebuild_count = 4;
+ net->ipv4.sysctl_ping_group_range[0] = make_kgid(&init_user_ns, 1);
+ net->ipv4.sysctl_ping_group_range[1] = make_kgid(&init_user_ns, 0);
tcp_init_mem(net);
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 581ecf02c6b5..f32c02e2a543 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -486,8 +486,9 @@ unsigned int tcp_poll(struct file *file, struct socket *sock, poll_table *wait)
if (sk->sk_shutdown & RCV_SHUTDOWN)
mask |= POLLIN | POLLRDNORM | POLLRDHUP;
- /* Connected? */
- if ((1 << sk->sk_state) & ~(TCPF_SYN_SENT | TCPF_SYN_RECV)) {
+ /* Connected or passive Fast Open socket? */
+ if (sk->sk_state != TCP_SYN_SENT &&
+ (sk->sk_state != TCP_SYN_RECV || tp->fastopen_rsk != NULL)) {
int target = sock_rcvlowat(sk, 0, INT_MAX);
if (tp->urg_seq == tp->copied_seq &&
@@ -811,7 +812,9 @@ static unsigned int tcp_xmit_size_goal(struct sock *sk, u32 mss_now,
old_size_goal + mss_now > xmit_size_goal)) {
xmit_size_goal = old_size_goal;
} else {
- tp->xmit_size_goal_segs = xmit_size_goal / mss_now;
+ tp->xmit_size_goal_segs =
+ min_t(u16, xmit_size_goal / mss_now,
+ sk->sk_gso_max_segs);
xmit_size_goal = tp->xmit_size_goal_segs * mss_now;
}
}
@@ -838,10 +841,15 @@ static ssize_t do_tcp_sendpages(struct sock *sk, struct page **pages, int poffse
ssize_t copied;
long timeo = sock_sndtimeo(sk, flags & MSG_DONTWAIT);
- /* Wait for a connection to finish. */
- if ((1 << sk->sk_state) & ~(TCPF_ESTABLISHED | TCPF_CLOSE_WAIT))
+ /* Wait for a connection to finish. One exception is TCP Fast Open
+ * (passive side) where data is allowed to be sent before a connection
+ * is fully established.
+ */
+ if (((1 << sk->sk_state) & ~(TCPF_ESTABLISHED | TCPF_CLOSE_WAIT)) &&
+ !tcp_passive_fastopen(sk)) {
if ((err = sk_stream_wait_connect(sk, &timeo)) != 0)
goto out_err;
+ }
clear_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags);
@@ -1040,10 +1048,15 @@ int tcp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
timeo = sock_sndtimeo(sk, flags & MSG_DONTWAIT);
- /* Wait for a connection to finish. */
- if ((1 << sk->sk_state) & ~(TCPF_ESTABLISHED | TCPF_CLOSE_WAIT))
+ /* Wait for a connection to finish. One exception is TCP Fast Open
+ * (passive side) where data is allowed to be sent before a connection
+ * is fully established.
+ */
+ if (((1 << sk->sk_state) & ~(TCPF_ESTABLISHED | TCPF_CLOSE_WAIT)) &&
+ !tcp_passive_fastopen(sk)) {
if ((err = sk_stream_wait_connect(sk, &timeo)) != 0)
goto do_error;
+ }
if (unlikely(tp->repair)) {
if (tp->repair_queue == TCP_RECV_QUEUE) {
@@ -1137,78 +1150,43 @@ new_segment:
if (err)
goto do_fault;
} else {
- bool merge = false;
+ bool merge = true;
int i = skb_shinfo(skb)->nr_frags;
- struct page *page = sk->sk_sndmsg_page;
- int off;
-
- if (page && page_count(page) == 1)
- sk->sk_sndmsg_off = 0;
-
- off = sk->sk_sndmsg_off;
-
- if (skb_can_coalesce(skb, i, page, off) &&
- off != PAGE_SIZE) {
- /* We can extend the last page
- * fragment. */
- merge = true;
- } else if (i == MAX_SKB_FRAGS || !sg) {
- /* Need to add new fragment and cannot
- * do this because interface is non-SG,
- * or because all the page slots are
- * busy. */
- tcp_mark_push(tp, skb);
- goto new_segment;
- } else if (page) {
- if (off == PAGE_SIZE) {
- put_page(page);
- sk->sk_sndmsg_page = page = NULL;
- off = 0;
+ struct page_frag *pfrag = sk_page_frag(sk);
+
+ if (!sk_page_frag_refill(sk, pfrag))
+ goto wait_for_memory;
+
+ if (!skb_can_coalesce(skb, i, pfrag->page,
+ pfrag->offset)) {
+ if (i == MAX_SKB_FRAGS || !sg) {
+ tcp_mark_push(tp, skb);
+ goto new_segment;
}
- } else
- off = 0;
+ merge = false;
+ }
- if (copy > PAGE_SIZE - off)
- copy = PAGE_SIZE - off;
+ copy = min_t(int, copy, pfrag->size - pfrag->offset);
if (!sk_wmem_schedule(sk, copy))
goto wait_for_memory;
- if (!page) {
- /* Allocate new cache page. */
- if (!(page = sk_stream_alloc_page(sk)))
- goto wait_for_memory;
- }
-
- /* Time to copy data. We are close to
- * the end! */
err = skb_copy_to_page_nocache(sk, from, skb,
- page, off, copy);
- if (err) {
- /* If this page was new, give it to the
- * socket so it does not get leaked.
- */
- if (!sk->sk_sndmsg_page) {
- sk->sk_sndmsg_page = page;
- sk->sk_sndmsg_off = 0;
- }
+ pfrag->page,
+ pfrag->offset,
+ copy);
+ if (err)
goto do_error;
- }
/* Update the skb. */
if (merge) {
skb_frag_size_add(&skb_shinfo(skb)->frags[i - 1], copy);
} else {
- skb_fill_page_desc(skb, i, page, off, copy);
- if (sk->sk_sndmsg_page) {
- get_page(page);
- } else if (off + copy < PAGE_SIZE) {
- get_page(page);
- sk->sk_sndmsg_page = page;
- }
+ skb_fill_page_desc(skb, i, pfrag->page,
+ pfrag->offset, copy);
+ get_page(pfrag->page);
}
-
- sk->sk_sndmsg_off = off + copy;
+ pfrag->offset += copy;
}
if (!copied)
@@ -1760,8 +1738,14 @@ int tcp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
}
#ifdef CONFIG_NET_DMA
- if (tp->ucopy.dma_chan)
- dma_async_memcpy_issue_pending(tp->ucopy.dma_chan);
+ if (tp->ucopy.dma_chan) {
+ if (tp->rcv_wnd == 0 &&
+ !skb_queue_empty(&sk->sk_async_wait_queue)) {
+ tcp_service_net_dma(sk, true);
+ tcp_cleanup_rbuf(sk, copied);
+ } else
+ dma_async_memcpy_issue_pending(tp->ucopy.dma_chan);
+ }
#endif
if (copied >= target) {
/* Do not sleep, just process backlog. */
@@ -2142,6 +2126,10 @@ void tcp_close(struct sock *sk, long timeout)
* they look as CLOSING or LAST_ACK for Linux)
* Probably, I missed some more holelets.
* --ANK
+ * XXX (TFO) - To start off we don't support SYN+ACK+FIN
+ * in a single packet! (May consider it later but will
+ * probably need API support or TCP_CORK SYN-ACK until
+ * data is written and socket is closed.)
*/
tcp_send_fin(sk);
}
@@ -2213,8 +2201,16 @@ adjudge_to_death:
}
}
- if (sk->sk_state == TCP_CLOSE)
+ if (sk->sk_state == TCP_CLOSE) {
+ struct request_sock *req = tcp_sk(sk)->fastopen_rsk;
+ /* We could get here with a non-NULL req if the socket is
+ * aborted (e.g., closed with unread data) before 3WHS
+ * finishes.
+ */
+ if (req != NULL)
+ reqsk_fastopen_remove(sk, req, false);
inet_csk_destroy_sock(sk);
+ }
/* Otherwise, socket is reprieved until protocol close. */
out:
@@ -2300,6 +2296,13 @@ int tcp_disconnect(struct sock *sk, int flags)
}
EXPORT_SYMBOL(tcp_disconnect);
+void tcp_sock_destruct(struct sock *sk)
+{
+ inet_sock_destruct(sk);
+
+ kfree(inet_csk(sk)->icsk_accept_queue.fastopenq);
+}
+
static inline bool tcp_can_repair_sock(const struct sock *sk)
{
return capable(CAP_NET_ADMIN) &&
@@ -2323,10 +2326,17 @@ static int tcp_repair_options_est(struct tcp_sock *tp,
tp->rx_opt.mss_clamp = opt.opt_val;
break;
case TCPOPT_WINDOW:
- if (opt.opt_val > 14)
- return -EFBIG;
+ {
+ u16 snd_wscale = opt.opt_val & 0xFFFF;
+ u16 rcv_wscale = opt.opt_val >> 16;
+
+ if (snd_wscale > 14 || rcv_wscale > 14)
+ return -EFBIG;
- tp->rx_opt.snd_wscale = opt.opt_val;
+ tp->rx_opt.snd_wscale = snd_wscale;
+ tp->rx_opt.rcv_wscale = rcv_wscale;
+ tp->rx_opt.wscale_ok = 1;
+ }
break;
case TCPOPT_SACK_PERM:
if (opt.opt_val != 0)
@@ -2681,7 +2691,18 @@ static int do_tcp_setsockopt(struct sock *sk, int level,
/* Cap the max timeout in ms TCP will retry/retrans
* before giving up and aborting (ETIMEDOUT) a connection.
*/
- icsk->icsk_user_timeout = msecs_to_jiffies(val);
+ if (val < 0)
+ err = -EINVAL;
+ else
+ icsk->icsk_user_timeout = msecs_to_jiffies(val);
+ break;
+
+ case TCP_FASTOPEN:
+ if (val >= 0 && ((1 << sk->sk_state) & (TCPF_CLOSE |
+ TCPF_LISTEN)))
+ err = fastopen_init_queue(sk, val);
+ else
+ err = -EINVAL;
break;
default:
err = -ENOPROTOOPT;
@@ -3496,11 +3517,15 @@ EXPORT_SYMBOL(tcp_cookie_generator);
void tcp_done(struct sock *sk)
{
+ struct request_sock *req = tcp_sk(sk)->fastopen_rsk;
+
if (sk->sk_state == TCP_SYN_SENT || sk->sk_state == TCP_SYN_RECV)
TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_ATTEMPTFAILS);
tcp_set_state(sk, TCP_CLOSE);
tcp_clear_xmit_timers(sk);
+ if (req != NULL)
+ reqsk_fastopen_remove(sk, req, false);
sk->sk_shutdown = SHUTDOWN_MASK;
diff --git a/net/ipv4/tcp_cong.c b/net/ipv4/tcp_cong.c
index 4d4db16e336e..1432cdb0644c 100644
--- a/net/ipv4/tcp_cong.c
+++ b/net/ipv4/tcp_cong.c
@@ -291,7 +291,8 @@ bool tcp_is_cwnd_limited(const struct sock *sk, u32 in_flight)
left = tp->snd_cwnd - in_flight;
if (sk_can_gso(sk) &&
left * sysctl_tcp_tso_win_divisor < tp->snd_cwnd &&
- left * tp->mss_cache < sk->sk_gso_max_size)
+ left * tp->mss_cache < sk->sk_gso_max_size &&
+ left < sk->sk_gso_max_segs)
return true;
return left <= tcp_max_tso_deferred_mss(tp);
}
diff --git a/net/ipv4/tcp_fastopen.c b/net/ipv4/tcp_fastopen.c
index a7f729c409d7..8f7ef0ad80e5 100644
--- a/net/ipv4/tcp_fastopen.c
+++ b/net/ipv4/tcp_fastopen.c
@@ -1,10 +1,91 @@
+#include <linux/err.h>
#include <linux/init.h>
#include <linux/kernel.h>
+#include <linux/list.h>
+#include <linux/tcp.h>
+#include <linux/rcupdate.h>
+#include <linux/rculist.h>
+#include <net/inetpeer.h>
+#include <net/tcp.h>
-int sysctl_tcp_fastopen;
+int sysctl_tcp_fastopen __read_mostly;
+
+struct tcp_fastopen_context __rcu *tcp_fastopen_ctx;
+
+static DEFINE_SPINLOCK(tcp_fastopen_ctx_lock);
+
+static void tcp_fastopen_ctx_free(struct rcu_head *head)
+{
+ struct tcp_fastopen_context *ctx =
+ container_of(head, struct tcp_fastopen_context, rcu);
+ crypto_free_cipher(ctx->tfm);
+ kfree(ctx);
+}
+
+int tcp_fastopen_reset_cipher(void *key, unsigned int len)
+{
+ int err;
+ struct tcp_fastopen_context *ctx, *octx;
+
+ ctx = kmalloc(sizeof(*ctx), GFP_KERNEL);
+ if (!ctx)
+ return -ENOMEM;
+ ctx->tfm = crypto_alloc_cipher("aes", 0, 0);
+
+ if (IS_ERR(ctx->tfm)) {
+ err = PTR_ERR(ctx->tfm);
+error: kfree(ctx);
+ pr_err("TCP: TFO aes cipher alloc error: %d\n", err);
+ return err;
+ }
+ err = crypto_cipher_setkey(ctx->tfm, key, len);
+ if (err) {
+ pr_err("TCP: TFO cipher key error: %d\n", err);
+ crypto_free_cipher(ctx->tfm);
+ goto error;
+ }
+ memcpy(ctx->key, key, len);
+
+ spin_lock(&tcp_fastopen_ctx_lock);
+
+ octx = rcu_dereference_protected(tcp_fastopen_ctx,
+ lockdep_is_held(&tcp_fastopen_ctx_lock));
+ rcu_assign_pointer(tcp_fastopen_ctx, ctx);
+ spin_unlock(&tcp_fastopen_ctx_lock);
+
+ if (octx)
+ call_rcu(&octx->rcu, tcp_fastopen_ctx_free);
+ return err;
+}
+
+/* Computes the fastopen cookie for the peer.
+ * The peer address is a 128 bits long (pad with zeros for IPv4).
+ *
+ * The caller must check foc->len to determine if a valid cookie
+ * has been generated successfully.
+*/
+void tcp_fastopen_cookie_gen(__be32 addr, struct tcp_fastopen_cookie *foc)
+{
+ __be32 peer_addr[4] = { addr, 0, 0, 0 };
+ struct tcp_fastopen_context *ctx;
+
+ rcu_read_lock();
+ ctx = rcu_dereference(tcp_fastopen_ctx);
+ if (ctx) {
+ crypto_cipher_encrypt_one(ctx->tfm,
+ foc->val,
+ (__u8 *)peer_addr);
+ foc->len = TCP_FASTOPEN_COOKIE_SIZE;
+ }
+ rcu_read_unlock();
+}
static int __init tcp_fastopen_init(void)
{
+ __u8 key[TCP_FASTOPEN_KEY_LENGTH];
+
+ get_random_bytes(key, sizeof(key));
+ tcp_fastopen_reset_cipher(key, sizeof(key));
return 0;
}
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 3e07a64ca44e..432c36649db3 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -237,7 +237,11 @@ static inline void TCP_ECN_check_ce(struct tcp_sock *tp, const struct sk_buff *s
tcp_enter_quickack_mode((struct sock *)tp);
break;
case INET_ECN_CE:
- tp->ecn_flags |= TCP_ECN_DEMAND_CWR;
+ if (!(tp->ecn_flags & TCP_ECN_DEMAND_CWR)) {
+ /* Better not delay acks, sender can have a very low cwnd */
+ tcp_enter_quickack_mode((struct sock *)tp);
+ tp->ecn_flags |= TCP_ECN_DEMAND_CWR;
+ }
/* fallinto */
default:
tp->ecn_flags |= TCP_ECN_SEEN;
@@ -374,7 +378,7 @@ static void tcp_fixup_rcvbuf(struct sock *sk)
/* 4. Try to fixup all. It is made immediately after connection enters
* established state.
*/
-static void tcp_init_buffer_space(struct sock *sk)
+void tcp_init_buffer_space(struct sock *sk)
{
struct tcp_sock *tp = tcp_sk(sk);
int maxwin;
@@ -739,29 +743,6 @@ __u32 tcp_init_cwnd(const struct tcp_sock *tp, const struct dst_entry *dst)
return min_t(__u32, cwnd, tp->snd_cwnd_clamp);
}
-/* Set slow start threshold and cwnd not falling to slow start */
-void tcp_enter_cwr(struct sock *sk, const int set_ssthresh)
-{
- struct tcp_sock *tp = tcp_sk(sk);
- const struct inet_connection_sock *icsk = inet_csk(sk);
-
- tp->prior_ssthresh = 0;
- tp->bytes_acked = 0;
- if (icsk->icsk_ca_state < TCP_CA_CWR) {
- tp->undo_marker = 0;
- if (set_ssthresh)
- tp->snd_ssthresh = icsk->icsk_ca_ops->ssthresh(sk);
- tp->snd_cwnd = min(tp->snd_cwnd,
- tcp_packets_in_flight(tp) + 1U);
- tp->snd_cwnd_cnt = 0;
- tp->high_seq = tp->snd_nxt;
- tp->snd_cwnd_stamp = tcp_time_stamp;
- TCP_ECN_queue_cwr(tp);
-
- tcp_set_ca_state(sk, TCP_CA_CWR);
- }
-}
-
/*
* Packet counting of FACK is based on in-order assumptions, therefore TCP
* disables it when reordering is detected
@@ -2489,35 +2470,6 @@ static inline void tcp_moderate_cwnd(struct tcp_sock *tp)
tp->snd_cwnd_stamp = tcp_time_stamp;
}
-/* Lower bound on congestion window is slow start threshold
- * unless congestion avoidance choice decides to overide it.
- */
-static inline u32 tcp_cwnd_min(const struct sock *sk)
-{
- const struct tcp_congestion_ops *ca_ops = inet_csk(sk)->icsk_ca_ops;
-
- return ca_ops->min_cwnd ? ca_ops->min_cwnd(sk) : tcp_sk(sk)->snd_ssthresh;
-}
-
-/* Decrease cwnd each second ack. */
-static void tcp_cwnd_down(struct sock *sk, int flag)
-{
- struct tcp_sock *tp = tcp_sk(sk);
- int decr = tp->snd_cwnd_cnt + 1;
-
- if ((flag & (FLAG_ANY_PROGRESS | FLAG_DSACKING_ACK)) ||
- (tcp_is_reno(tp) && !(flag & FLAG_NOT_DUP))) {
- tp->snd_cwnd_cnt = decr & 1;
- decr >>= 1;
-
- if (decr && tp->snd_cwnd > tcp_cwnd_min(sk))
- tp->snd_cwnd -= decr;
-
- tp->snd_cwnd = min(tp->snd_cwnd, tcp_packets_in_flight(tp) + 1);
- tp->snd_cwnd_stamp = tcp_time_stamp;
- }
-}
-
/* Nothing was retransmitted or returned timestamp is less
* than timestamp of the first retransmission.
*/
@@ -2719,24 +2671,80 @@ static bool tcp_try_undo_loss(struct sock *sk)
return false;
}
-static inline void tcp_complete_cwr(struct sock *sk)
+/* The cwnd reduction in CWR and Recovery use the PRR algorithm
+ * https://datatracker.ietf.org/doc/draft-ietf-tcpm-proportional-rate-reduction/
+ * It computes the number of packets to send (sndcnt) based on packets newly
+ * delivered:
+ * 1) If the packets in flight is larger than ssthresh, PRR spreads the
+ * cwnd reductions across a full RTT.
+ * 2) If packets in flight is lower than ssthresh (such as due to excess
+ * losses and/or application stalls), do not perform any further cwnd
+ * reductions, but instead slow start up to ssthresh.
+ */
+static void tcp_init_cwnd_reduction(struct sock *sk, const bool set_ssthresh)
+{
+ struct tcp_sock *tp = tcp_sk(sk);
+
+ tp->high_seq = tp->snd_nxt;
+ tp->bytes_acked = 0;
+ tp->snd_cwnd_cnt = 0;
+ tp->prior_cwnd = tp->snd_cwnd;
+ tp->prr_delivered = 0;
+ tp->prr_out = 0;
+ if (set_ssthresh)
+ tp->snd_ssthresh = inet_csk(sk)->icsk_ca_ops->ssthresh(sk);
+ TCP_ECN_queue_cwr(tp);
+}
+
+static void tcp_cwnd_reduction(struct sock *sk, int newly_acked_sacked,
+ int fast_rexmit)
{
struct tcp_sock *tp = tcp_sk(sk);
+ int sndcnt = 0;
+ int delta = tp->snd_ssthresh - tcp_packets_in_flight(tp);
- /* Do not moderate cwnd if it's already undone in cwr or recovery. */
- if (tp->undo_marker) {
- if (inet_csk(sk)->icsk_ca_state == TCP_CA_CWR) {
- tp->snd_cwnd = min(tp->snd_cwnd, tp->snd_ssthresh);
- tp->snd_cwnd_stamp = tcp_time_stamp;
- } else if (tp->snd_ssthresh < TCP_INFINITE_SSTHRESH) {
- /* PRR algorithm. */
- tp->snd_cwnd = tp->snd_ssthresh;
- tp->snd_cwnd_stamp = tcp_time_stamp;
- }
+ tp->prr_delivered += newly_acked_sacked;
+ if (tcp_packets_in_flight(tp) > tp->snd_ssthresh) {
+ u64 dividend = (u64)tp->snd_ssthresh * tp->prr_delivered +
+ tp->prior_cwnd - 1;
+ sndcnt = div_u64(dividend, tp->prior_cwnd) - tp->prr_out;
+ } else {
+ sndcnt = min_t(int, delta,
+ max_t(int, tp->prr_delivered - tp->prr_out,
+ newly_acked_sacked) + 1);
+ }
+
+ sndcnt = max(sndcnt, (fast_rexmit ? 1 : 0));
+ tp->snd_cwnd = tcp_packets_in_flight(tp) + sndcnt;
+}
+
+static inline void tcp_end_cwnd_reduction(struct sock *sk)
+{
+ struct tcp_sock *tp = tcp_sk(sk);
+
+ /* Reset cwnd to ssthresh in CWR or Recovery (unless it's undone) */
+ if (inet_csk(sk)->icsk_ca_state == TCP_CA_CWR ||
+ (tp->undo_marker && tp->snd_ssthresh < TCP_INFINITE_SSTHRESH)) {
+ tp->snd_cwnd = tp->snd_ssthresh;
+ tp->snd_cwnd_stamp = tcp_time_stamp;
}
tcp_ca_event(sk, CA_EVENT_COMPLETE_CWR);
}
+/* Enter CWR state. Disable cwnd undo since congestion is proven with ECN */
+void tcp_enter_cwr(struct sock *sk, const int set_ssthresh)
+{
+ struct tcp_sock *tp = tcp_sk(sk);
+
+ tp->prior_ssthresh = 0;
+ tp->bytes_acked = 0;
+ if (inet_csk(sk)->icsk_ca_state < TCP_CA_CWR) {
+ tp->undo_marker = 0;
+ tcp_init_cwnd_reduction(sk, set_ssthresh);
+ tcp_set_ca_state(sk, TCP_CA_CWR);
+ }
+}
+
static void tcp_try_keep_open(struct sock *sk)
{
struct tcp_sock *tp = tcp_sk(sk);
@@ -2751,7 +2759,7 @@ static void tcp_try_keep_open(struct sock *sk)
}
}
-static void tcp_try_to_open(struct sock *sk, int flag)
+static void tcp_try_to_open(struct sock *sk, int flag, int newly_acked_sacked)
{
struct tcp_sock *tp = tcp_sk(sk);
@@ -2768,7 +2776,7 @@ static void tcp_try_to_open(struct sock *sk, int flag)
if (inet_csk(sk)->icsk_ca_state != TCP_CA_Open)
tcp_moderate_cwnd(tp);
} else {
- tcp_cwnd_down(sk, flag);
+ tcp_cwnd_reduction(sk, newly_acked_sacked, 0);
}
}
@@ -2850,38 +2858,6 @@ void tcp_simple_retransmit(struct sock *sk)
}
EXPORT_SYMBOL(tcp_simple_retransmit);
-/* This function implements the PRR algorithm, specifcally the PRR-SSRB
- * (proportional rate reduction with slow start reduction bound) as described in
- * http://www.ietf.org/id/draft-mathis-tcpm-proportional-rate-reduction-01.txt.
- * It computes the number of packets to send (sndcnt) based on packets newly
- * delivered:
- * 1) If the packets in flight is larger than ssthresh, PRR spreads the
- * cwnd reductions across a full RTT.
- * 2) If packets in flight is lower than ssthresh (such as due to excess
- * losses and/or application stalls), do not perform any further cwnd
- * reductions, but instead slow start up to ssthresh.
- */
-static void tcp_update_cwnd_in_recovery(struct sock *sk, int newly_acked_sacked,
- int fast_rexmit, int flag)
-{
- struct tcp_sock *tp = tcp_sk(sk);
- int sndcnt = 0;
- int delta = tp->snd_ssthresh - tcp_packets_in_flight(tp);
-
- if (tcp_packets_in_flight(tp) > tp->snd_ssthresh) {
- u64 dividend = (u64)tp->snd_ssthresh * tp->prr_delivered +
- tp->prior_cwnd - 1;
- sndcnt = div_u64(dividend, tp->prior_cwnd) - tp->prr_out;
- } else {
- sndcnt = min_t(int, delta,
- max_t(int, tp->prr_delivered - tp->prr_out,
- newly_acked_sacked) + 1);
- }
-
- sndcnt = max(sndcnt, (fast_rexmit ? 1 : 0));
- tp->snd_cwnd = tcp_packets_in_flight(tp) + sndcnt;
-}
-
static void tcp_enter_recovery(struct sock *sk, bool ece_ack)
{
struct tcp_sock *tp = tcp_sk(sk);
@@ -2894,7 +2870,6 @@ static void tcp_enter_recovery(struct sock *sk, bool ece_ack)
NET_INC_STATS_BH(sock_net(sk), mib_idx);
- tp->high_seq = tp->snd_nxt;
tp->prior_ssthresh = 0;
tp->undo_marker = tp->snd_una;
tp->undo_retrans = tp->retrans_out;
@@ -2902,15 +2877,8 @@ static void tcp_enter_recovery(struct sock *sk, bool ece_ack)
if (inet_csk(sk)->icsk_ca_state < TCP_CA_CWR) {
if (!ece_ack)
tp->prior_ssthresh = tcp_current_ssthresh(sk);
- tp->snd_ssthresh = inet_csk(sk)->icsk_ca_ops->ssthresh(sk);
- TCP_ECN_queue_cwr(tp);
+ tcp_init_cwnd_reduction(sk, true);
}
-
- tp->bytes_acked = 0;
- tp->snd_cwnd_cnt = 0;
- tp->prior_cwnd = tp->snd_cwnd;
- tp->prr_delivered = 0;
- tp->prr_out = 0;
tcp_set_ca_state(sk, TCP_CA_Recovery);
}
@@ -2926,13 +2894,14 @@ static void tcp_enter_recovery(struct sock *sk, bool ece_ack)
* tcp_xmit_retransmit_queue().
*/
static void tcp_fastretrans_alert(struct sock *sk, int pkts_acked,
- int newly_acked_sacked, bool is_dupack,
+ int prior_sacked, bool is_dupack,
int flag)
{
struct inet_connection_sock *icsk = inet_csk(sk);
struct tcp_sock *tp = tcp_sk(sk);
int do_lost = is_dupack || ((flag & FLAG_DATA_SACKED) &&
(tcp_fackets_out(tp) > tp->reordering));
+ int newly_acked_sacked = 0;
int fast_rexmit = 0;
if (WARN_ON(!tp->packets_out && tp->sacked_out))
@@ -2969,7 +2938,7 @@ static void tcp_fastretrans_alert(struct sock *sk, int pkts_acked,
/* CWR is to be held something *above* high_seq
* is ACKed for CWR bit to reach receiver. */
if (tp->snd_una != tp->high_seq) {
- tcp_complete_cwr(sk);
+ tcp_end_cwnd_reduction(sk);
tcp_set_ca_state(sk, TCP_CA_Open);
}
break;
@@ -2979,7 +2948,7 @@ static void tcp_fastretrans_alert(struct sock *sk, int pkts_acked,
tcp_reset_reno_sack(tp);
if (tcp_try_undo_recovery(sk))
return;
- tcp_complete_cwr(sk);
+ tcp_end_cwnd_reduction(sk);
break;
}
}
@@ -2992,6 +2961,7 @@ static void tcp_fastretrans_alert(struct sock *sk, int pkts_acked,
tcp_add_reno_sack(sk);
} else
do_lost = tcp_try_undo_partial(sk, pkts_acked);
+ newly_acked_sacked = pkts_acked + tp->sacked_out - prior_sacked;
break;
case TCP_CA_Loss:
if (flag & FLAG_DATA_ACKED)
@@ -3013,12 +2983,13 @@ static void tcp_fastretrans_alert(struct sock *sk, int pkts_acked,
if (is_dupack)
tcp_add_reno_sack(sk);
}
+ newly_acked_sacked = pkts_acked + tp->sacked_out - prior_sacked;
if (icsk->icsk_ca_state <= TCP_CA_Disorder)
tcp_try_undo_dsack(sk);
if (!tcp_time_to_recover(sk, flag)) {
- tcp_try_to_open(sk, flag);
+ tcp_try_to_open(sk, flag, newly_acked_sacked);
return;
}
@@ -3040,8 +3011,7 @@ static void tcp_fastretrans_alert(struct sock *sk, int pkts_acked,
if (do_lost || (tcp_is_fack(tp) && tcp_head_timedout(sk)))
tcp_update_scoreboard(sk, fast_rexmit);
- tp->prr_delivered += newly_acked_sacked;
- tcp_update_cwnd_in_recovery(sk, newly_acked_sacked, fast_rexmit, flag);
+ tcp_cwnd_reduction(sk, newly_acked_sacked, fast_rexmit);
tcp_xmit_retransmit_queue(sk);
}
@@ -3120,6 +3090,12 @@ void tcp_rearm_rto(struct sock *sk)
{
struct tcp_sock *tp = tcp_sk(sk);
+ /* If the retrans timer is currently being used by Fast Open
+ * for SYN-ACK retrans purpose, stay put.
+ */
+ if (tp->fastopen_rsk)
+ return;
+
if (!tp->packets_out) {
inet_csk_clear_xmit_timer(sk, ICSK_TIME_RETRANS);
} else {
@@ -3381,7 +3357,7 @@ static inline bool tcp_may_raise_cwnd(const struct sock *sk, const int flag)
{
const struct tcp_sock *tp = tcp_sk(sk);
return (!(flag & FLAG_ECE) || tp->snd_cwnd < tp->snd_ssthresh) &&
- !((1 << inet_csk(sk)->icsk_ca_state) & (TCPF_CA_Recovery | TCPF_CA_CWR));
+ !tcp_in_cwnd_reduction(sk);
}
/* Check that window update is acceptable.
@@ -3449,9 +3425,9 @@ static void tcp_conservative_spur_to_response(struct tcp_sock *tp)
}
/* A conservative spurious RTO response algorithm: reduce cwnd using
- * rate halving and continue in congestion avoidance.
+ * PRR and continue in congestion avoidance.
*/
-static void tcp_ratehalving_spur_to_response(struct sock *sk)
+static void tcp_cwr_spur_to_response(struct sock *sk)
{
tcp_enter_cwr(sk, 0);
}
@@ -3459,7 +3435,7 @@ static void tcp_ratehalving_spur_to_response(struct sock *sk)
static void tcp_undo_spur_to_response(struct sock *sk, int flag)
{
if (flag & FLAG_ECE)
- tcp_ratehalving_spur_to_response(sk);
+ tcp_cwr_spur_to_response(sk);
else
tcp_undo_cwr(sk, true);
}
@@ -3566,7 +3542,7 @@ static bool tcp_process_frto(struct sock *sk, int flag)
tcp_conservative_spur_to_response(tp);
break;
default:
- tcp_ratehalving_spur_to_response(sk);
+ tcp_cwr_spur_to_response(sk);
break;
}
tp->frto_counter = 0;
@@ -3590,7 +3566,6 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag)
int prior_packets;
int prior_sacked = tp->sacked_out;
int pkts_acked = 0;
- int newly_acked_sacked = 0;
bool frto_cwnd = false;
/* If the ack is older than previous acks
@@ -3666,8 +3641,6 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag)
flag |= tcp_clean_rtx_queue(sk, prior_fackets, prior_snd_una);
pkts_acked = prior_packets - tp->packets_out;
- newly_acked_sacked = (prior_packets - prior_sacked) -
- (tp->packets_out - tp->sacked_out);
if (tp->frto_counter)
frto_cwnd = tcp_process_frto(sk, flag);
@@ -3681,7 +3654,7 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag)
tcp_may_raise_cwnd(sk, flag))
tcp_cong_avoid(sk, ack, prior_in_flight);
is_dupack = !(flag & (FLAG_SND_UNA_ADVANCED | FLAG_NOT_DUP));
- tcp_fastretrans_alert(sk, pkts_acked, newly_acked_sacked,
+ tcp_fastretrans_alert(sk, pkts_acked, prior_sacked,
is_dupack, flag);
} else {
if ((flag & FLAG_DATA_ACKED) && !frto_cwnd)
@@ -3698,7 +3671,7 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag)
no_queue:
/* If data was DSACKed, see if we can undo a cwnd reduction. */
if (flag & FLAG_DSACKING_ACK)
- tcp_fastretrans_alert(sk, pkts_acked, newly_acked_sacked,
+ tcp_fastretrans_alert(sk, pkts_acked, prior_sacked,
is_dupack, flag);
/* If this ack opens up a zero window, clear backoff. It was
* being used to time the probes, and is probably far higher than
@@ -3718,8 +3691,7 @@ old_ack:
*/
if (TCP_SKB_CB(skb)->sacked) {
flag |= tcp_sacktag_write_queue(sk, skb, prior_snd_una);
- newly_acked_sacked = tp->sacked_out - prior_sacked;
- tcp_fastretrans_alert(sk, pkts_acked, newly_acked_sacked,
+ tcp_fastretrans_alert(sk, pkts_acked, prior_sacked,
is_dupack, flag);
}
@@ -4035,7 +4007,7 @@ static inline bool tcp_sequence(const struct tcp_sock *tp, u32 seq, u32 end_seq)
}
/* When we get a reset we do this. */
-static void tcp_reset(struct sock *sk)
+void tcp_reset(struct sock *sk)
{
/* We want the right error as BSD sees it (and indeed as we do). */
switch (sk->sk_state) {
@@ -4351,19 +4323,20 @@ static void tcp_ofo_queue(struct sock *sk)
static bool tcp_prune_ofo_queue(struct sock *sk);
static int tcp_prune_queue(struct sock *sk);
-static int tcp_try_rmem_schedule(struct sock *sk, unsigned int size)
+static int tcp_try_rmem_schedule(struct sock *sk, struct sk_buff *skb,
+ unsigned int size)
{
if (atomic_read(&sk->sk_rmem_alloc) > sk->sk_rcvbuf ||
- !sk_rmem_schedule(sk, size)) {
+ !sk_rmem_schedule(sk, skb, size)) {
if (tcp_prune_queue(sk) < 0)
return -1;
- if (!sk_rmem_schedule(sk, size)) {
+ if (!sk_rmem_schedule(sk, skb, size)) {
if (!tcp_prune_ofo_queue(sk))
return -1;
- if (!sk_rmem_schedule(sk, size))
+ if (!sk_rmem_schedule(sk, skb, size))
return -1;
}
}
@@ -4418,7 +4391,7 @@ static void tcp_data_queue_ofo(struct sock *sk, struct sk_buff *skb)
TCP_ECN_check_ce(tp, skb);
- if (unlikely(tcp_try_rmem_schedule(sk, skb->truesize))) {
+ if (unlikely(tcp_try_rmem_schedule(sk, skb, skb->truesize))) {
NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPOFODROP);
__kfree_skb(skb);
return;
@@ -4552,17 +4525,17 @@ static int __must_check tcp_queue_rcv(struct sock *sk, struct sk_buff *skb, int
int tcp_send_rcvq(struct sock *sk, struct msghdr *msg, size_t size)
{
- struct sk_buff *skb;
+ struct sk_buff *skb = NULL;
struct tcphdr *th;
bool fragstolen;
- if (tcp_try_rmem_schedule(sk, size + sizeof(*th)))
- goto err;
-
skb = alloc_skb(size + sizeof(*th), sk->sk_allocation);
if (!skb)
goto err;
+ if (tcp_try_rmem_schedule(sk, skb, size + sizeof(*th)))
+ goto err_free;
+
th = (struct tcphdr *)skb_put(skb, sizeof(*th));
skb_reset_transport_header(skb);
memset(th, 0, sizeof(*th));
@@ -4633,7 +4606,7 @@ static void tcp_data_queue(struct sock *sk, struct sk_buff *skb)
if (eaten <= 0) {
queue_and_out:
if (eaten < 0 &&
- tcp_try_rmem_schedule(sk, skb->truesize))
+ tcp_try_rmem_schedule(sk, skb, skb->truesize))
goto drop;
eaten = tcp_queue_rcv(sk, skb, 0, &fragstolen);
@@ -4661,7 +4634,7 @@ queue_and_out:
if (eaten > 0)
kfree_skb_partial(skb, fragstolen);
- else if (!sock_flag(sk, SOCK_DEAD))
+ if (!sock_flag(sk, SOCK_DEAD))
sk->sk_data_ready(sk, 0);
return;
}
@@ -5391,6 +5364,8 @@ int tcp_rcv_established(struct sock *sk, struct sk_buff *skb,
{
struct tcp_sock *tp = tcp_sk(sk);
+ if (unlikely(sk->sk_rx_dst == NULL))
+ inet_csk(sk)->icsk_af_ops->sk_rx_dst_set(sk, skb);
/*
* Header prediction.
* The code loosely follows the one in the famous
@@ -5475,7 +5450,9 @@ int tcp_rcv_established(struct sock *sk, struct sk_buff *skb,
if (tp->copied_seq == tp->rcv_nxt &&
len - tcp_header_len <= tp->ucopy.len) {
#ifdef CONFIG_NET_DMA
- if (tcp_dma_try_early_copy(sk, skb, tcp_header_len)) {
+ if (tp->ucopy.task == current &&
+ sock_owned_by_user(sk) &&
+ tcp_dma_try_early_copy(sk, skb, tcp_header_len)) {
copied_early = 1;
eaten = 1;
}
@@ -5552,8 +5529,7 @@ no_ack:
#endif
if (eaten)
kfree_skb_partial(skb, fragstolen);
- else
- sk->sk_data_ready(sk, 0);
+ sk->sk_data_ready(sk, 0);
return 0;
}
}
@@ -5602,7 +5578,7 @@ void tcp_finish_connect(struct sock *sk, struct sk_buff *skb)
tcp_set_state(sk, TCP_ESTABLISHED);
if (skb != NULL) {
- sk->sk_rx_dst = dst_clone(skb_dst(skb));
+ icsk->icsk_af_ops->sk_rx_dst_set(sk, skb);
security_inet_conn_established(sk, skb);
}
@@ -5737,7 +5713,7 @@ static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb,
TCP_ECN_rcv_synack(tp, th);
- tp->snd_wl1 = TCP_SKB_CB(skb)->seq;
+ tcp_init_wl(tp, TCP_SKB_CB(skb)->seq);
tcp_ack(sk, skb, FLAG_SLOWPATH);
/* Ok.. it's good. Set up sequence numbers and
@@ -5750,7 +5726,6 @@ static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb,
* never scaled.
*/
tp->snd_wnd = ntohs(th->window);
- tcp_init_wl(tp, TCP_SKB_CB(skb)->seq);
if (!tp->rx_opt.wscale_ok) {
tp->rx_opt.snd_wscale = tp->rx_opt.rcv_wscale = 0;
@@ -5888,7 +5863,9 @@ discard:
tcp_send_synack(sk);
#if 0
/* Note, we could accept data and URG from this segment.
- * There are no obstacles to make this.
+ * There are no obstacles to make this (except that we must
+ * either change tcp_recvmsg() to prevent it from returning data
+ * before 3WHS completes per RFC793, or employ TCP Fast Open).
*
* However, if we ignore data in ACKless segments sometimes,
* we have no reasons to accept it sometimes.
@@ -5928,6 +5905,7 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb,
{
struct tcp_sock *tp = tcp_sk(sk);
struct inet_connection_sock *icsk = inet_csk(sk);
+ struct request_sock *req;
int queued = 0;
tp->rx_opt.saw_tstamp = 0;
@@ -5983,6 +5961,14 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb,
return 0;
}
+ req = tp->fastopen_rsk;
+ if (req != NULL) {
+ BUG_ON(sk->sk_state != TCP_SYN_RECV &&
+ sk->sk_state != TCP_FIN_WAIT1);
+
+ if (tcp_check_req(sk, skb, req, NULL, true) == NULL)
+ goto discard;
+ }
if (!tcp_validate_incoming(sk, skb, th, 0))
return 0;
@@ -5993,7 +5979,25 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb,
switch (sk->sk_state) {
case TCP_SYN_RECV:
if (acceptable) {
- tp->copied_seq = tp->rcv_nxt;
+ /* Once we leave TCP_SYN_RECV, we no longer
+ * need req so release it.
+ */
+ if (req) {
+ tcp_synack_rtt_meas(sk, req);
+ tp->total_retrans = req->retrans;
+
+ reqsk_fastopen_remove(sk, req, false);
+ } else {
+ /* Make sure socket is routed, for
+ * correct metrics.
+ */
+ icsk->icsk_af_ops->rebuild_header(sk);
+ tcp_init_congestion_control(sk);
+
+ tcp_mtup_init(sk);
+ tcp_init_buffer_space(sk);
+ tp->copied_seq = tp->rcv_nxt;
+ }
smp_mb();
tcp_set_state(sk, TCP_ESTABLISHED);
sk->sk_state_change(sk);
@@ -6015,23 +6019,27 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb,
if (tp->rx_opt.tstamp_ok)
tp->advmss -= TCPOLEN_TSTAMP_ALIGNED;
- /* Make sure socket is routed, for
- * correct metrics.
- */
- icsk->icsk_af_ops->rebuild_header(sk);
-
- tcp_init_metrics(sk);
-
- tcp_init_congestion_control(sk);
+ if (req) {
+ /* Re-arm the timer because data may
+ * have been sent out. This is similar
+ * to the regular data transmission case
+ * when new data has just been ack'ed.
+ *
+ * (TFO) - we could try to be more
+ * aggressive and retranmitting any data
+ * sooner based on when they were sent
+ * out.
+ */
+ tcp_rearm_rto(sk);
+ } else
+ tcp_init_metrics(sk);
/* Prevent spurious tcp_cwnd_restart() on
* first data packet.
*/
tp->lsndtime = tcp_time_stamp;
- tcp_mtup_init(sk);
tcp_initialize_rcv_mss(sk);
- tcp_init_buffer_space(sk);
tcp_fast_path_on(tp);
} else {
return 1;
@@ -6039,6 +6047,16 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb,
break;
case TCP_FIN_WAIT1:
+ /* If we enter the TCP_FIN_WAIT1 state and we are a
+ * Fast Open socket and this is the first acceptable
+ * ACK we have received, this would have acknowledged
+ * our SYNACK so stop the SYNACK timer.
+ */
+ if (acceptable && req != NULL) {
+ /* We no longer need the request sock. */
+ reqsk_fastopen_remove(sk, req, false);
+ tcp_rearm_rto(sk);
+ }
if (tp->snd_una == tp->write_seq) {
struct dst_entry *dst;
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index b6b07c93924c..75735c9a6a9d 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -352,6 +352,7 @@ void tcp_v4_err(struct sk_buff *icmp_skb, u32 info)
const int code = icmp_hdr(icmp_skb)->code;
struct sock *sk;
struct sk_buff *skb;
+ struct request_sock *req;
__u32 seq;
__u32 remaining;
int err;
@@ -394,9 +395,12 @@ void tcp_v4_err(struct sk_buff *icmp_skb, u32 info)
icsk = inet_csk(sk);
tp = tcp_sk(sk);
+ req = tp->fastopen_rsk;
seq = ntohl(th->seq);
if (sk->sk_state != TCP_LISTEN &&
- !between(seq, tp->snd_una, tp->snd_nxt)) {
+ !between(seq, tp->snd_una, tp->snd_nxt) &&
+ (req == NULL || seq != tcp_rsk(req)->snt_isn)) {
+ /* For a Fast Open socket, allow seq to be snt_isn. */
NET_INC_STATS_BH(net, LINUX_MIB_OUTOFWINDOWICMPS);
goto out;
}
@@ -417,10 +421,12 @@ void tcp_v4_err(struct sk_buff *icmp_skb, u32 info)
if (code == ICMP_FRAG_NEEDED) { /* PMTU discovery (RFC1191) */
tp->mtu_info = info;
- if (!sock_owned_by_user(sk))
+ if (!sock_owned_by_user(sk)) {
tcp_v4_mtu_reduced(sk);
- else
- set_bit(TCP_MTU_REDUCED_DEFERRED, &tp->tsq_flags);
+ } else {
+ if (!test_and_set_bit(TCP_MTU_REDUCED_DEFERRED, &tp->tsq_flags))
+ sock_hold(sk);
+ }
goto out;
}
@@ -433,6 +439,8 @@ void tcp_v4_err(struct sk_buff *icmp_skb, u32 info)
!icsk->icsk_backoff)
break;
+ /* XXX (TFO) - revisit the following logic for TFO */
+
if (sock_owned_by_user(sk))
break;
@@ -464,6 +472,14 @@ void tcp_v4_err(struct sk_buff *icmp_skb, u32 info)
goto out;
}
+ /* XXX (TFO) - if it's a TFO socket and has been accepted, rather
+ * than following the TCP_SYN_RECV case and closing the socket,
+ * we ignore the ICMP error and keep trying like a fully established
+ * socket. Is this the right thing to do?
+ */
+ if (req && req->sk == NULL)
+ goto out;
+
switch (sk->sk_state) {
struct request_sock *req, **prev;
case TCP_LISTEN:
@@ -496,7 +512,8 @@ void tcp_v4_err(struct sk_buff *icmp_skb, u32 info)
case TCP_SYN_SENT:
case TCP_SYN_RECV: /* Cannot happen.
- It can f.e. if SYNs crossed.
+ It can f.e. if SYNs crossed,
+ or Fast Open.
*/
if (!sock_owned_by_user(sk)) {
sk->sk_err = err;
@@ -807,8 +824,12 @@ static void tcp_v4_timewait_ack(struct sock *sk, struct sk_buff *skb)
static void tcp_v4_reqsk_send_ack(struct sock *sk, struct sk_buff *skb,
struct request_sock *req)
{
- tcp_v4_send_ack(skb, tcp_rsk(req)->snt_isn + 1,
- tcp_rsk(req)->rcv_isn + 1, req->rcv_wnd,
+ /* sk->sk_state == TCP_LISTEN -> for regular TCP_SYN_RECV
+ * sk->sk_state == TCP_SYN_RECV -> for Fast Open.
+ */
+ tcp_v4_send_ack(skb, (sk->sk_state == TCP_LISTEN) ?
+ tcp_rsk(req)->snt_isn + 1 : tcp_sk(sk)->snd_nxt,
+ tcp_rsk(req)->rcv_nxt, req->rcv_wnd,
req->ts_recent,
0,
tcp_md5_do_lookup(sk, (union tcp_md5_addr *)&ip_hdr(skb)->daddr,
@@ -837,7 +858,7 @@ static int tcp_v4_send_synack(struct sock *sk, struct dst_entry *dst,
if (!dst && (dst = inet_csk_route_req(sk, &fl4, req)) == NULL)
return -1;
- skb = tcp_make_synack(sk, dst, req, rvp);
+ skb = tcp_make_synack(sk, dst, req, rvp, NULL);
if (skb) {
__tcp_v4_send_check(skb, ireq->loc_addr, ireq->rmt_addr);
@@ -847,6 +868,8 @@ static int tcp_v4_send_synack(struct sock *sk, struct dst_entry *dst,
ireq->rmt_addr,
ireq->opt);
err = net_xmit_eval(err);
+ if (!tcp_rsk(req)->snt_synack && !err)
+ tcp_rsk(req)->snt_synack = tcp_time_stamp;
}
return err;
@@ -902,8 +925,7 @@ EXPORT_SYMBOL(tcp_syn_flood_action);
/*
* Save and compile IPv4 options into the request_sock if needed.
*/
-static struct ip_options_rcu *tcp_v4_save_options(struct sock *sk,
- struct sk_buff *skb)
+static struct ip_options_rcu *tcp_v4_save_options(struct sk_buff *skb)
{
const struct ip_options *opt = &(IPCB(skb)->opt);
struct ip_options_rcu *dopt = NULL;
@@ -1270,6 +1292,182 @@ static const struct tcp_request_sock_ops tcp_request_sock_ipv4_ops = {
};
#endif
+static bool tcp_fastopen_check(struct sock *sk, struct sk_buff *skb,
+ struct request_sock *req,
+ struct tcp_fastopen_cookie *foc,
+ struct tcp_fastopen_cookie *valid_foc)
+{
+ bool skip_cookie = false;
+ struct fastopen_queue *fastopenq;
+
+ if (likely(!fastopen_cookie_present(foc))) {
+ /* See include/net/tcp.h for the meaning of these knobs */
+ if ((sysctl_tcp_fastopen & TFO_SERVER_ALWAYS) ||
+ ((sysctl_tcp_fastopen & TFO_SERVER_COOKIE_NOT_REQD) &&
+ (TCP_SKB_CB(skb)->end_seq != TCP_SKB_CB(skb)->seq + 1)))
+ skip_cookie = true; /* no cookie to validate */
+ else
+ return false;
+ }
+ fastopenq = inet_csk(sk)->icsk_accept_queue.fastopenq;
+ /* A FO option is present; bump the counter. */
+ NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPFASTOPENPASSIVE);
+
+ /* Make sure the listener has enabled fastopen, and we don't
+ * exceed the max # of pending TFO requests allowed before trying
+ * to validating the cookie in order to avoid burning CPU cycles
+ * unnecessarily.
+ *
+ * XXX (TFO) - The implication of checking the max_qlen before
+ * processing a cookie request is that clients can't differentiate
+ * between qlen overflow causing Fast Open to be disabled
+ * temporarily vs a server not supporting Fast Open at all.
+ */
+ if ((sysctl_tcp_fastopen & TFO_SERVER_ENABLE) == 0 ||
+ fastopenq == NULL || fastopenq->max_qlen == 0)
+ return false;
+
+ if (fastopenq->qlen >= fastopenq->max_qlen) {
+ struct request_sock *req1;
+ spin_lock(&fastopenq->lock);
+ req1 = fastopenq->rskq_rst_head;
+ if ((req1 == NULL) || time_after(req1->expires, jiffies)) {
+ spin_unlock(&fastopenq->lock);
+ NET_INC_STATS_BH(sock_net(sk),
+ LINUX_MIB_TCPFASTOPENLISTENOVERFLOW);
+ /* Avoid bumping LINUX_MIB_TCPFASTOPENPASSIVEFAIL*/
+ foc->len = -1;
+ return false;
+ }
+ fastopenq->rskq_rst_head = req1->dl_next;
+ fastopenq->qlen--;
+ spin_unlock(&fastopenq->lock);
+ reqsk_free(req1);
+ }
+ if (skip_cookie) {
+ tcp_rsk(req)->rcv_nxt = TCP_SKB_CB(skb)->end_seq;
+ return true;
+ }
+ if (foc->len == TCP_FASTOPEN_COOKIE_SIZE) {
+ if ((sysctl_tcp_fastopen & TFO_SERVER_COOKIE_NOT_CHKED) == 0) {
+ tcp_fastopen_cookie_gen(ip_hdr(skb)->saddr, valid_foc);
+ if ((valid_foc->len != TCP_FASTOPEN_COOKIE_SIZE) ||
+ memcmp(&foc->val[0], &valid_foc->val[0],
+ TCP_FASTOPEN_COOKIE_SIZE) != 0)
+ return false;
+ valid_foc->len = -1;
+ }
+ /* Acknowledge the data received from the peer. */
+ tcp_rsk(req)->rcv_nxt = TCP_SKB_CB(skb)->end_seq;
+ return true;
+ } else if (foc->len == 0) { /* Client requesting a cookie */
+ tcp_fastopen_cookie_gen(ip_hdr(skb)->saddr, valid_foc);
+ NET_INC_STATS_BH(sock_net(sk),
+ LINUX_MIB_TCPFASTOPENCOOKIEREQD);
+ } else {
+ /* Client sent a cookie with wrong size. Treat it
+ * the same as invalid and return a valid one.
+ */
+ tcp_fastopen_cookie_gen(ip_hdr(skb)->saddr, valid_foc);
+ }
+ return false;
+}
+
+static int tcp_v4_conn_req_fastopen(struct sock *sk,
+ struct sk_buff *skb,
+ struct sk_buff *skb_synack,
+ struct request_sock *req,
+ struct request_values *rvp)
+{
+ struct tcp_sock *tp = tcp_sk(sk);
+ struct request_sock_queue *queue = &inet_csk(sk)->icsk_accept_queue;
+ const struct inet_request_sock *ireq = inet_rsk(req);
+ struct sock *child;
+ int err;
+
+ req->retrans = 0;
+ req->sk = NULL;
+
+ child = inet_csk(sk)->icsk_af_ops->syn_recv_sock(sk, skb, req, NULL);
+ if (child == NULL) {
+ NET_INC_STATS_BH(sock_net(sk),
+ LINUX_MIB_TCPFASTOPENPASSIVEFAIL);
+ kfree_skb(skb_synack);
+ return -1;
+ }
+ err = ip_build_and_send_pkt(skb_synack, sk, ireq->loc_addr,
+ ireq->rmt_addr, ireq->opt);
+ err = net_xmit_eval(err);
+ if (!err)
+ tcp_rsk(req)->snt_synack = tcp_time_stamp;
+ /* XXX (TFO) - is it ok to ignore error and continue? */
+
+ spin_lock(&queue->fastopenq->lock);
+ queue->fastopenq->qlen++;
+ spin_unlock(&queue->fastopenq->lock);
+
+ /* Initialize the child socket. Have to fix some values to take
+ * into account the child is a Fast Open socket and is created
+ * only out of the bits carried in the SYN packet.
+ */
+ tp = tcp_sk(child);
+
+ tp->fastopen_rsk = req;
+ /* Do a hold on the listner sk so that if the listener is being
+ * closed, the child that has been accepted can live on and still
+ * access listen_lock.
+ */
+ sock_hold(sk);
+ tcp_rsk(req)->listener = sk;
+
+ /* RFC1323: The window in SYN & SYN/ACK segments is never
+ * scaled. So correct it appropriately.
+ */
+ tp->snd_wnd = ntohs(tcp_hdr(skb)->window);
+
+ /* Activate the retrans timer so that SYNACK can be retransmitted.
+ * The request socket is not added to the SYN table of the parent
+ * because it's been added to the accept queue directly.
+ */
+ inet_csk_reset_xmit_timer(child, ICSK_TIME_RETRANS,
+ TCP_TIMEOUT_INIT, TCP_RTO_MAX);
+
+ /* Add the child socket directly into the accept queue */
+ inet_csk_reqsk_queue_add(sk, req, child);
+
+ /* Now finish processing the fastopen child socket. */
+ inet_csk(child)->icsk_af_ops->rebuild_header(child);
+ tcp_init_congestion_control(child);
+ tcp_mtup_init(child);
+ tcp_init_buffer_space(child);
+ tcp_init_metrics(child);
+
+ /* Queue the data carried in the SYN packet. We need to first
+ * bump skb's refcnt because the caller will attempt to free it.
+ *
+ * XXX (TFO) - we honor a zero-payload TFO request for now.
+ * (Any reason not to?)
+ */
+ if (TCP_SKB_CB(skb)->end_seq == TCP_SKB_CB(skb)->seq + 1) {
+ /* Don't queue the skb if there is no payload in SYN.
+ * XXX (TFO) - How about SYN+FIN?
+ */
+ tp->rcv_nxt = TCP_SKB_CB(skb)->end_seq;
+ } else {
+ skb = skb_get(skb);
+ skb_dst_drop(skb);
+ __skb_pull(skb, tcp_hdr(skb)->doff * 4);
+ skb_set_owner_r(skb, child);
+ __skb_queue_tail(&child->sk_receive_queue, skb);
+ tp->rcv_nxt = TCP_SKB_CB(skb)->end_seq;
+ }
+ sk->sk_data_ready(sk, 0);
+ bh_unlock_sock(child);
+ sock_put(child);
+ WARN_ON(req->sk == NULL);
+ return 0;
+}
+
int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
{
struct tcp_extend_values tmp_ext;
@@ -1283,6 +1481,11 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
__be32 daddr = ip_hdr(skb)->daddr;
__u32 isn = TCP_SKB_CB(skb)->when;
bool want_cookie = false;
+ struct flowi4 fl4;
+ struct tcp_fastopen_cookie foc = { .len = -1 };
+ struct tcp_fastopen_cookie valid_foc = { .len = -1 };
+ struct sk_buff *skb_synack;
+ int do_fastopen;
/* Never answer to SYNs send to broadcast or multicast */
if (skb_rtable(skb)->rt_flags & (RTCF_BROADCAST | RTCF_MULTICAST))
@@ -1317,7 +1520,8 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
tcp_clear_options(&tmp_opt);
tmp_opt.mss_clamp = TCP_MSS_DEFAULT;
tmp_opt.user_mss = tp->rx_opt.user_mss;
- tcp_parse_options(skb, &tmp_opt, &hash_location, 0, NULL);
+ tcp_parse_options(skb, &tmp_opt, &hash_location, 0,
+ want_cookie ? NULL : &foc);
if (tmp_opt.cookie_plus > 0 &&
tmp_opt.saw_tstamp &&
@@ -1363,7 +1567,7 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
ireq->loc_addr = daddr;
ireq->rmt_addr = saddr;
ireq->no_srccheck = inet_sk(sk)->transparent;
- ireq->opt = tcp_v4_save_options(sk, skb);
+ ireq->opt = tcp_v4_save_options(skb);
if (security_inet_conn_request(sk, skb, req))
goto drop_and_free;
@@ -1375,8 +1579,6 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
isn = cookie_v4_init_sequence(sk, skb, &req->mss);
req->cookie_ts = tmp_opt.tstamp_ok;
} else if (!isn) {
- struct flowi4 fl4;
-
/* VJ's idea. We save last timestamp seen
* from the destination in peer table, when entering
* state TIME-WAIT, and check against it before
@@ -1415,16 +1617,54 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
isn = tcp_v4_init_sequence(skb);
}
tcp_rsk(req)->snt_isn = isn;
- tcp_rsk(req)->snt_synack = tcp_time_stamp;
- if (tcp_v4_send_synack(sk, dst, req,
- (struct request_values *)&tmp_ext,
- skb_get_queue_mapping(skb),
- want_cookie) ||
- want_cookie)
+ if (dst == NULL) {
+ dst = inet_csk_route_req(sk, &fl4, req);
+ if (dst == NULL)
+ goto drop_and_free;
+ }
+ do_fastopen = tcp_fastopen_check(sk, skb, req, &foc, &valid_foc);
+
+ /* We don't call tcp_v4_send_synack() directly because we need
+ * to make sure a child socket can be created successfully before
+ * sending back synack!
+ *
+ * XXX (TFO) - Ideally one would simply call tcp_v4_send_synack()
+ * (or better yet, call tcp_send_synack() in the child context
+ * directly, but will have to fix bunch of other code first)
+ * after syn_recv_sock() except one will need to first fix the
+ * latter to remove its dependency on the current implementation
+ * of tcp_v4_send_synack()->tcp_select_initial_window().
+ */
+ skb_synack = tcp_make_synack(sk, dst, req,
+ (struct request_values *)&tmp_ext,
+ fastopen_cookie_present(&valid_foc) ? &valid_foc : NULL);
+
+ if (skb_synack) {
+ __tcp_v4_send_check(skb_synack, ireq->loc_addr, ireq->rmt_addr);
+ skb_set_queue_mapping(skb_synack, skb_get_queue_mapping(skb));
+ } else
+ goto drop_and_free;
+
+ if (likely(!do_fastopen)) {
+ int err;
+ err = ip_build_and_send_pkt(skb_synack, sk, ireq->loc_addr,
+ ireq->rmt_addr, ireq->opt);
+ err = net_xmit_eval(err);
+ if (err || want_cookie)
+ goto drop_and_free;
+
+ tcp_rsk(req)->snt_synack = tcp_time_stamp;
+ tcp_rsk(req)->listener = NULL;
+ /* Add the request_sock to the SYN table */
+ inet_csk_reqsk_queue_hash_add(sk, req, TCP_TIMEOUT_INIT);
+ if (fastopen_cookie_present(&foc) && foc.len != 0)
+ NET_INC_STATS_BH(sock_net(sk),
+ LINUX_MIB_TCPFASTOPENPASSIVEFAIL);
+ } else if (tcp_v4_conn_req_fastopen(sk, skb, skb_synack, req,
+ (struct request_values *)&tmp_ext))
goto drop_and_free;
- inet_csk_reqsk_queue_hash_add(sk, req, TCP_TIMEOUT_INIT);
return 0;
drop_and_release:
@@ -1462,6 +1702,7 @@ struct sock *tcp_v4_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
goto exit_nonewsk;
newsk->sk_gso_type = SKB_GSO_TCPV4;
+ inet_sk_rx_dst_set(newsk, skb);
newtp = tcp_sk(newsk);
newinet = inet_sk(newsk);
@@ -1497,9 +1738,7 @@ struct sock *tcp_v4_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
newtp->advmss = tcp_sk(sk)->rx_opt.user_mss;
tcp_initialize_rcv_mss(newsk);
- if (tcp_rsk(req)->snt_synack)
- tcp_valid_rtt_meas(newsk,
- tcp_time_stamp - tcp_rsk(req)->snt_synack);
+ tcp_synack_rtt_meas(newsk, req);
newtp->total_retrans = req->retrans;
#ifdef CONFIG_TCP_MD5SIG
@@ -1551,7 +1790,7 @@ static struct sock *tcp_v4_hnd_req(struct sock *sk, struct sk_buff *skb)
struct request_sock *req = inet_csk_search_req(sk, &prev, th->source,
iph->saddr, iph->daddr);
if (req)
- return tcp_check_req(sk, skb, req, prev);
+ return tcp_check_req(sk, skb, req, prev, false);
nsk = inet_lookup_established(sock_net(sk), &tcp_hashinfo, iph->saddr,
th->source, iph->daddr, th->dest, inet_iif(skb));
@@ -1617,21 +1856,16 @@ int tcp_v4_do_rcv(struct sock *sk, struct sk_buff *skb)
#endif
if (sk->sk_state == TCP_ESTABLISHED) { /* Fast path */
+ struct dst_entry *dst = sk->sk_rx_dst;
+
sock_rps_save_rxhash(sk, skb);
- if (sk->sk_rx_dst) {
- struct dst_entry *dst = sk->sk_rx_dst;
- if (dst->ops->check(dst, 0) == NULL) {
+ if (dst) {
+ if (inet_sk(sk)->rx_dst_ifindex != skb->skb_iif ||
+ dst->ops->check(dst, 0) == NULL) {
dst_release(dst);
sk->sk_rx_dst = NULL;
}
}
- if (unlikely(sk->sk_rx_dst == NULL)) {
- struct inet_sock *icsk = inet_sk(sk);
- struct rtable *rt = skb_rtable(skb);
-
- sk->sk_rx_dst = dst_clone(&rt->dst);
- icsk->rx_dst_ifindex = inet_iif(skb);
- }
if (tcp_rcv_established(sk, skb, tcp_hdr(skb), skb->len)) {
rsk = sk;
goto reset;
@@ -1709,11 +1943,11 @@ void tcp_v4_early_demux(struct sk_buff *skb)
skb->destructor = sock_edemux;
if (sk->sk_state != TCP_TIME_WAIT) {
struct dst_entry *dst = sk->sk_rx_dst;
- struct inet_sock *icsk = inet_sk(sk);
+
if (dst)
dst = dst_check(dst, 0);
if (dst &&
- icsk->rx_dst_ifindex == skb->skb_iif)
+ inet_sk(sk)->rx_dst_ifindex == skb->skb_iif)
skb_dst_set_noref(skb, dst);
}
}
@@ -1874,10 +2108,21 @@ static struct timewait_sock_ops tcp_timewait_sock_ops = {
.twsk_destructor= tcp_twsk_destructor,
};
+void inet_sk_rx_dst_set(struct sock *sk, const struct sk_buff *skb)
+{
+ struct dst_entry *dst = skb_dst(skb);
+
+ dst_hold(dst);
+ sk->sk_rx_dst = dst;
+ inet_sk(sk)->rx_dst_ifindex = skb->skb_iif;
+}
+EXPORT_SYMBOL(inet_sk_rx_dst_set);
+
const struct inet_connection_sock_af_ops ipv4_specific = {
.queue_xmit = ip_queue_xmit,
.send_check = tcp_v4_send_check,
.rebuild_header = inet_sk_rebuild_header,
+ .sk_rx_dst_set = inet_sk_rx_dst_set,
.conn_request = tcp_v4_conn_request,
.syn_recv_sock = tcp_v4_syn_recv_sock,
.net_header_len = sizeof(struct iphdr),
@@ -1954,20 +2199,13 @@ void tcp_v4_destroy_sock(struct sock *sk)
if (inet_csk(sk)->icsk_bind_hash)
inet_put_port(sk);
- /*
- * If sendmsg cached page exists, toss it.
- */
- if (sk->sk_sndmsg_page) {
- __free_page(sk->sk_sndmsg_page);
- sk->sk_sndmsg_page = NULL;
- }
-
/* TCP Cookie Transactions */
if (tp->cookie_values != NULL) {
kref_put(&tp->cookie_values->kref,
tcp_cookie_values_release);
tp->cookie_values = NULL;
}
+ BUG_ON(tp->fastopen_rsk != NULL);
/* If socket is aborted during connect operation */
tcp_free_fastopen_req(tp);
@@ -2384,10 +2622,10 @@ void tcp_proc_unregister(struct net *net, struct tcp_seq_afinfo *afinfo)
EXPORT_SYMBOL(tcp_proc_unregister);
static void get_openreq4(const struct sock *sk, const struct request_sock *req,
- struct seq_file *f, int i, int uid, int *len)
+ struct seq_file *f, int i, kuid_t uid, int *len)
{
const struct inet_request_sock *ireq = inet_rsk(req);
- int ttd = req->expires - jiffies;
+ long delta = req->expires - jiffies;
seq_printf(f, "%4d: %08X:%04X %08X:%04X"
" %02X %08X:%08X %02X:%08lX %08X %5d %8d %u %d %pK%n",
@@ -2399,9 +2637,9 @@ static void get_openreq4(const struct sock *sk, const struct request_sock *req,
TCP_SYN_RECV,
0, 0, /* could print option size, but that is af dependent. */
1, /* timers active (only the expire timer) */
- jiffies_to_clock_t(ttd),
+ jiffies_delta_to_clock_t(delta),
req->retrans,
- uid,
+ from_kuid_munged(seq_user_ns(f), uid),
0, /* non standard timer */
0, /* open_requests have no inode */
atomic_read(&sk->sk_refcnt),
@@ -2416,6 +2654,7 @@ static void get_tcp4_sock(struct sock *sk, struct seq_file *f, int i, int *len)
const struct tcp_sock *tp = tcp_sk(sk);
const struct inet_connection_sock *icsk = inet_csk(sk);
const struct inet_sock *inet = inet_sk(sk);
+ struct fastopen_queue *fastopenq = icsk->icsk_accept_queue.fastopenq;
__be32 dest = inet->inet_daddr;
__be32 src = inet->inet_rcv_saddr;
__u16 destp = ntohs(inet->inet_dport);
@@ -2450,9 +2689,9 @@ static void get_tcp4_sock(struct sock *sk, struct seq_file *f, int i, int *len)
tp->write_seq - tp->snd_una,
rx_queue,
timer_active,
- jiffies_to_clock_t(timer_expires - jiffies),
+ jiffies_delta_to_clock_t(timer_expires - jiffies),
icsk->icsk_retransmits,
- sock_i_uid(sk),
+ from_kuid_munged(seq_user_ns(f), sock_i_uid(sk)),
icsk->icsk_probes_out,
sock_i_ino(sk),
atomic_read(&sk->sk_refcnt), sk,
@@ -2460,7 +2699,9 @@ static void get_tcp4_sock(struct sock *sk, struct seq_file *f, int i, int *len)
jiffies_to_clock_t(icsk->icsk_ack.ato),
(icsk->icsk_ack.quick << 1) | icsk->icsk_ack.pingpong,
tp->snd_cwnd,
- tcp_in_initial_slowstart(tp) ? -1 : tp->snd_ssthresh,
+ sk->sk_state == TCP_LISTEN ?
+ (fastopenq ? fastopenq->max_qlen : 0) :
+ (tcp_in_initial_slowstart(tp) ? -1 : tp->snd_ssthresh),
len);
}
@@ -2469,10 +2710,7 @@ static void get_timewait4_sock(const struct inet_timewait_sock *tw,
{
__be32 dest, src;
__u16 destp, srcp;
- int ttd = tw->tw_ttd - jiffies;
-
- if (ttd < 0)
- ttd = 0;
+ long delta = tw->tw_ttd - jiffies;
dest = tw->tw_daddr;
src = tw->tw_rcv_saddr;
@@ -2482,7 +2720,7 @@ static void get_timewait4_sock(const struct inet_timewait_sock *tw,
seq_printf(f, "%4d: %08X:%04X %08X:%04X"
" %02X %08X:%08X %02X:%08lX %08X %5d %8d %d %d %pK%n",
i, src, srcp, dest, destp, tw->tw_substate, 0, 0,
- 3, jiffies_to_clock_t(ttd), 0, 0, 0, 0,
+ 3, jiffies_delta_to_clock_t(delta), 0, 0, 0, 0,
atomic_read(&tw->tw_refcnt), tw, len);
}
@@ -2565,6 +2803,8 @@ void tcp4_proc_exit(void)
struct sk_buff **tcp4_gro_receive(struct sk_buff **head, struct sk_buff *skb)
{
const struct iphdr *iph = skb_gro_network_header(skb);
+ __wsum wsum;
+ __sum16 sum;
switch (skb->ip_summed) {
case CHECKSUM_COMPLETE:
@@ -2573,11 +2813,22 @@ struct sk_buff **tcp4_gro_receive(struct sk_buff **head, struct sk_buff *skb)
skb->ip_summed = CHECKSUM_UNNECESSARY;
break;
}
-
- /* fall through */
- case CHECKSUM_NONE:
+flush:
NAPI_GRO_CB(skb)->flush = 1;
return NULL;
+
+ case CHECKSUM_NONE:
+ wsum = csum_tcpudp_nofold(iph->saddr, iph->daddr,
+ skb_gro_len(skb), IPPROTO_TCP, 0);
+ sum = csum_fold(skb_checksum(skb,
+ skb_gro_offset(skb),
+ skb_gro_len(skb),
+ wsum));
+ if (sum)
+ goto flush;
+
+ skb->ip_summed = CHECKSUM_UNNECESSARY;
+ break;
}
return tcp_gro_receive(head, skb);
@@ -2635,7 +2886,7 @@ struct proto tcp_prot = {
.compat_setsockopt = compat_tcp_setsockopt,
.compat_getsockopt = compat_tcp_getsockopt,
#endif
-#ifdef CONFIG_CGROUP_MEM_RES_CTLR_KMEM
+#ifdef CONFIG_MEMCG_KMEM
.init_cgroup = tcp_init_cgroup,
.destroy_cgroup = tcp_destroy_cgroup,
.proto_cgroup = tcp_proto_cgroup,
diff --git a/net/ipv4/tcp_metrics.c b/net/ipv4/tcp_metrics.c
index 2288a6399e1e..4c752a6e0bcd 100644
--- a/net/ipv4/tcp_metrics.c
+++ b/net/ipv4/tcp_metrics.c
@@ -8,6 +8,7 @@
#include <linux/init.h>
#include <linux/tcp.h>
#include <linux/hash.h>
+#include <linux/tcp_metrics.h>
#include <net/inet_connection_sock.h>
#include <net/net_namespace.h>
@@ -17,20 +18,10 @@
#include <net/ipv6.h>
#include <net/dst.h>
#include <net/tcp.h>
+#include <net/genetlink.h>
int sysctl_tcp_nometrics_save __read_mostly;
-enum tcp_metric_index {
- TCP_METRIC_RTT,
- TCP_METRIC_RTTVAR,
- TCP_METRIC_SSTHRESH,
- TCP_METRIC_CWND,
- TCP_METRIC_REORDERING,
-
- /* Always last. */
- TCP_METRIC_MAX,
-};
-
struct tcp_fastopen_metrics {
u16 mss;
u16 syn_loss:10; /* Recurring Fast Open SYN losses */
@@ -45,8 +36,10 @@ struct tcp_metrics_block {
u32 tcpm_ts;
u32 tcpm_ts_stamp;
u32 tcpm_lock;
- u32 tcpm_vals[TCP_METRIC_MAX];
+ u32 tcpm_vals[TCP_METRIC_MAX + 1];
struct tcp_fastopen_metrics tcpm_fastopen;
+
+ struct rcu_head rcu_head;
};
static bool tcp_metric_locked(struct tcp_metrics_block *tm,
@@ -690,6 +683,325 @@ void tcp_fastopen_cache_set(struct sock *sk, u16 mss,
rcu_read_unlock();
}
+static struct genl_family tcp_metrics_nl_family = {
+ .id = GENL_ID_GENERATE,
+ .hdrsize = 0,
+ .name = TCP_METRICS_GENL_NAME,
+ .version = TCP_METRICS_GENL_VERSION,
+ .maxattr = TCP_METRICS_ATTR_MAX,
+ .netnsok = true,
+};
+
+static struct nla_policy tcp_metrics_nl_policy[TCP_METRICS_ATTR_MAX + 1] = {
+ [TCP_METRICS_ATTR_ADDR_IPV4] = { .type = NLA_U32, },
+ [TCP_METRICS_ATTR_ADDR_IPV6] = { .type = NLA_BINARY,
+ .len = sizeof(struct in6_addr), },
+ /* Following attributes are not received for GET/DEL,
+ * we keep them for reference
+ */
+#if 0
+ [TCP_METRICS_ATTR_AGE] = { .type = NLA_MSECS, },
+ [TCP_METRICS_ATTR_TW_TSVAL] = { .type = NLA_U32, },
+ [TCP_METRICS_ATTR_TW_TS_STAMP] = { .type = NLA_S32, },
+ [TCP_METRICS_ATTR_VALS] = { .type = NLA_NESTED, },
+ [TCP_METRICS_ATTR_FOPEN_MSS] = { .type = NLA_U16, },
+ [TCP_METRICS_ATTR_FOPEN_SYN_DROPS] = { .type = NLA_U16, },
+ [TCP_METRICS_ATTR_FOPEN_SYN_DROP_TS] = { .type = NLA_MSECS, },
+ [TCP_METRICS_ATTR_FOPEN_COOKIE] = { .type = NLA_BINARY,
+ .len = TCP_FASTOPEN_COOKIE_MAX, },
+#endif
+};
+
+/* Add attributes, caller cancels its header on failure */
+static int tcp_metrics_fill_info(struct sk_buff *msg,
+ struct tcp_metrics_block *tm)
+{
+ struct nlattr *nest;
+ int i;
+
+ switch (tm->tcpm_addr.family) {
+ case AF_INET:
+ if (nla_put_be32(msg, TCP_METRICS_ATTR_ADDR_IPV4,
+ tm->tcpm_addr.addr.a4) < 0)
+ goto nla_put_failure;
+ break;
+ case AF_INET6:
+ if (nla_put(msg, TCP_METRICS_ATTR_ADDR_IPV6, 16,
+ tm->tcpm_addr.addr.a6) < 0)
+ goto nla_put_failure;
+ break;
+ default:
+ return -EAFNOSUPPORT;
+ }
+
+ if (nla_put_msecs(msg, TCP_METRICS_ATTR_AGE,
+ jiffies - tm->tcpm_stamp) < 0)
+ goto nla_put_failure;
+ if (tm->tcpm_ts_stamp) {
+ if (nla_put_s32(msg, TCP_METRICS_ATTR_TW_TS_STAMP,
+ (s32) (get_seconds() - tm->tcpm_ts_stamp)) < 0)
+ goto nla_put_failure;
+ if (nla_put_u32(msg, TCP_METRICS_ATTR_TW_TSVAL,
+ tm->tcpm_ts) < 0)
+ goto nla_put_failure;
+ }
+
+ {
+ int n = 0;
+
+ nest = nla_nest_start(msg, TCP_METRICS_ATTR_VALS);
+ if (!nest)
+ goto nla_put_failure;
+ for (i = 0; i < TCP_METRIC_MAX + 1; i++) {
+ if (!tm->tcpm_vals[i])
+ continue;
+ if (nla_put_u32(msg, i + 1, tm->tcpm_vals[i]) < 0)
+ goto nla_put_failure;
+ n++;
+ }
+ if (n)
+ nla_nest_end(msg, nest);
+ else
+ nla_nest_cancel(msg, nest);
+ }
+
+ {
+ struct tcp_fastopen_metrics tfom_copy[1], *tfom;
+ unsigned int seq;
+
+ do {
+ seq = read_seqbegin(&fastopen_seqlock);
+ tfom_copy[0] = tm->tcpm_fastopen;
+ } while (read_seqretry(&fastopen_seqlock, seq));
+
+ tfom = tfom_copy;
+ if (tfom->mss &&
+ nla_put_u16(msg, TCP_METRICS_ATTR_FOPEN_MSS,
+ tfom->mss) < 0)
+ goto nla_put_failure;
+ if (tfom->syn_loss &&
+ (nla_put_u16(msg, TCP_METRICS_ATTR_FOPEN_SYN_DROPS,
+ tfom->syn_loss) < 0 ||
+ nla_put_msecs(msg, TCP_METRICS_ATTR_FOPEN_SYN_DROP_TS,
+ jiffies - tfom->last_syn_loss) < 0))
+ goto nla_put_failure;
+ if (tfom->cookie.len > 0 &&
+ nla_put(msg, TCP_METRICS_ATTR_FOPEN_COOKIE,
+ tfom->cookie.len, tfom->cookie.val) < 0)
+ goto nla_put_failure;
+ }
+
+ return 0;
+
+nla_put_failure:
+ return -EMSGSIZE;
+}
+
+static int tcp_metrics_dump_info(struct sk_buff *skb,
+ struct netlink_callback *cb,
+ struct tcp_metrics_block *tm)
+{
+ void *hdr;
+
+ hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq,
+ &tcp_metrics_nl_family, NLM_F_MULTI,
+ TCP_METRICS_CMD_GET);
+ if (!hdr)
+ return -EMSGSIZE;
+
+ if (tcp_metrics_fill_info(skb, tm) < 0)
+ goto nla_put_failure;
+
+ return genlmsg_end(skb, hdr);
+
+nla_put_failure:
+ genlmsg_cancel(skb, hdr);
+ return -EMSGSIZE;
+}
+
+static int tcp_metrics_nl_dump(struct sk_buff *skb,
+ struct netlink_callback *cb)
+{
+ struct net *net = sock_net(skb->sk);
+ unsigned int max_rows = 1U << net->ipv4.tcp_metrics_hash_log;
+ unsigned int row, s_row = cb->args[0];
+ int s_col = cb->args[1], col = s_col;
+
+ for (row = s_row; row < max_rows; row++, s_col = 0) {
+ struct tcp_metrics_block *tm;
+ struct tcpm_hash_bucket *hb = net->ipv4.tcp_metrics_hash + row;
+
+ rcu_read_lock();
+ for (col = 0, tm = rcu_dereference(hb->chain); tm;
+ tm = rcu_dereference(tm->tcpm_next), col++) {
+ if (col < s_col)
+ continue;
+ if (tcp_metrics_dump_info(skb, cb, tm) < 0) {
+ rcu_read_unlock();
+ goto done;
+ }
+ }
+ rcu_read_unlock();
+ }
+
+done:
+ cb->args[0] = row;
+ cb->args[1] = col;
+ return skb->len;
+}
+
+static int parse_nl_addr(struct genl_info *info, struct inetpeer_addr *addr,
+ unsigned int *hash, int optional)
+{
+ struct nlattr *a;
+
+ a = info->attrs[TCP_METRICS_ATTR_ADDR_IPV4];
+ if (a) {
+ addr->family = AF_INET;
+ addr->addr.a4 = nla_get_be32(a);
+ *hash = (__force unsigned int) addr->addr.a4;
+ return 0;
+ }
+ a = info->attrs[TCP_METRICS_ATTR_ADDR_IPV6];
+ if (a) {
+ if (nla_len(a) != sizeof(sizeof(struct in6_addr)))
+ return -EINVAL;
+ addr->family = AF_INET6;
+ memcpy(addr->addr.a6, nla_data(a), sizeof(addr->addr.a6));
+ *hash = ipv6_addr_hash((struct in6_addr *) addr->addr.a6);
+ return 0;
+ }
+ return optional ? 1 : -EAFNOSUPPORT;
+}
+
+static int tcp_metrics_nl_cmd_get(struct sk_buff *skb, struct genl_info *info)
+{
+ struct tcp_metrics_block *tm;
+ struct inetpeer_addr addr;
+ unsigned int hash;
+ struct sk_buff *msg;
+ struct net *net = genl_info_net(info);
+ void *reply;
+ int ret;
+
+ ret = parse_nl_addr(info, &addr, &hash, 0);
+ if (ret < 0)
+ return ret;
+
+ msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
+ if (!msg)
+ return -ENOMEM;
+
+ reply = genlmsg_put_reply(msg, info, &tcp_metrics_nl_family, 0,
+ info->genlhdr->cmd);
+ if (!reply)
+ goto nla_put_failure;
+
+ hash = hash_32(hash, net->ipv4.tcp_metrics_hash_log);
+ ret = -ESRCH;
+ rcu_read_lock();
+ for (tm = rcu_dereference(net->ipv4.tcp_metrics_hash[hash].chain); tm;
+ tm = rcu_dereference(tm->tcpm_next)) {
+ if (addr_same(&tm->tcpm_addr, &addr)) {
+ ret = tcp_metrics_fill_info(msg, tm);
+ break;
+ }
+ }
+ rcu_read_unlock();
+ if (ret < 0)
+ goto out_free;
+
+ genlmsg_end(msg, reply);
+ return genlmsg_reply(msg, info);
+
+nla_put_failure:
+ ret = -EMSGSIZE;
+
+out_free:
+ nlmsg_free(msg);
+ return ret;
+}
+
+#define deref_locked_genl(p) \
+ rcu_dereference_protected(p, lockdep_genl_is_held() && \
+ lockdep_is_held(&tcp_metrics_lock))
+
+#define deref_genl(p) rcu_dereference_protected(p, lockdep_genl_is_held())
+
+static int tcp_metrics_flush_all(struct net *net)
+{
+ unsigned int max_rows = 1U << net->ipv4.tcp_metrics_hash_log;
+ struct tcpm_hash_bucket *hb = net->ipv4.tcp_metrics_hash;
+ struct tcp_metrics_block *tm;
+ unsigned int row;
+
+ for (row = 0; row < max_rows; row++, hb++) {
+ spin_lock_bh(&tcp_metrics_lock);
+ tm = deref_locked_genl(hb->chain);
+ if (tm)
+ hb->chain = NULL;
+ spin_unlock_bh(&tcp_metrics_lock);
+ while (tm) {
+ struct tcp_metrics_block *next;
+
+ next = deref_genl(tm->tcpm_next);
+ kfree_rcu(tm, rcu_head);
+ tm = next;
+ }
+ }
+ return 0;
+}
+
+static int tcp_metrics_nl_cmd_del(struct sk_buff *skb, struct genl_info *info)
+{
+ struct tcpm_hash_bucket *hb;
+ struct tcp_metrics_block *tm;
+ struct tcp_metrics_block __rcu **pp;
+ struct inetpeer_addr addr;
+ unsigned int hash;
+ struct net *net = genl_info_net(info);
+ int ret;
+
+ ret = parse_nl_addr(info, &addr, &hash, 1);
+ if (ret < 0)
+ return ret;
+ if (ret > 0)
+ return tcp_metrics_flush_all(net);
+
+ hash = hash_32(hash, net->ipv4.tcp_metrics_hash_log);
+ hb = net->ipv4.tcp_metrics_hash + hash;
+ pp = &hb->chain;
+ spin_lock_bh(&tcp_metrics_lock);
+ for (tm = deref_locked_genl(*pp); tm;
+ pp = &tm->tcpm_next, tm = deref_locked_genl(*pp)) {
+ if (addr_same(&tm->tcpm_addr, &addr)) {
+ *pp = tm->tcpm_next;
+ break;
+ }
+ }
+ spin_unlock_bh(&tcp_metrics_lock);
+ if (!tm)
+ return -ESRCH;
+ kfree_rcu(tm, rcu_head);
+ return 0;
+}
+
+static struct genl_ops tcp_metrics_nl_ops[] = {
+ {
+ .cmd = TCP_METRICS_CMD_GET,
+ .doit = tcp_metrics_nl_cmd_get,
+ .dumpit = tcp_metrics_nl_dump,
+ .policy = tcp_metrics_nl_policy,
+ .flags = GENL_ADMIN_PERM,
+ },
+ {
+ .cmd = TCP_METRICS_CMD_DEL,
+ .doit = tcp_metrics_nl_cmd_del,
+ .policy = tcp_metrics_nl_policy,
+ .flags = GENL_ADMIN_PERM,
+ },
+};
+
static unsigned int tcpmhash_entries;
static int __init set_tcpmhash_entries(char *str)
{
@@ -731,6 +1043,18 @@ static int __net_init tcp_net_metrics_init(struct net *net)
static void __net_exit tcp_net_metrics_exit(struct net *net)
{
+ unsigned int i;
+
+ for (i = 0; i < (1U << net->ipv4.tcp_metrics_hash_log) ; i++) {
+ struct tcp_metrics_block *tm, *next;
+
+ tm = rcu_dereference_protected(net->ipv4.tcp_metrics_hash[i].chain, 1);
+ while (tm) {
+ next = rcu_dereference_protected(tm->tcpm_next, 1);
+ kfree(tm);
+ tm = next;
+ }
+ }
kfree(net->ipv4.tcp_metrics_hash);
}
@@ -741,5 +1065,21 @@ static __net_initdata struct pernet_operations tcp_net_metrics_ops = {
void __init tcp_metrics_init(void)
{
- register_pernet_subsys(&tcp_net_metrics_ops);
+ int ret;
+
+ ret = register_pernet_subsys(&tcp_net_metrics_ops);
+ if (ret < 0)
+ goto cleanup;
+ ret = genl_register_family_with_ops(&tcp_metrics_nl_family,
+ tcp_metrics_nl_ops,
+ ARRAY_SIZE(tcp_metrics_nl_ops));
+ if (ret < 0)
+ goto cleanup_subsys;
+ return;
+
+cleanup_subsys:
+ unregister_pernet_subsys(&tcp_net_metrics_ops);
+
+cleanup:
+ return;
}
diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c
index 5912ac3fd240..27536ba16c9d 100644
--- a/net/ipv4/tcp_minisocks.c
+++ b/net/ipv4/tcp_minisocks.c
@@ -85,6 +85,8 @@ static bool tcp_in_window(u32 seq, u32 end_seq, u32 s_win, u32 e_win)
* spinlock it. I do not want! Well, probability of misbehaviour
* is ridiculously low and, seems, we could use some mb() tricks
* to avoid misread sequence numbers, states etc. --ANK
+ *
+ * We don't need to initialize tmp_out.sack_ok as we don't use the results
*/
enum tcp_tw_status
tcp_timewait_state_process(struct inet_timewait_sock *tw, struct sk_buff *skb,
@@ -387,8 +389,6 @@ struct sock *tcp_create_openreq_child(struct sock *sk, struct request_sock *req,
struct tcp_sock *oldtp = tcp_sk(sk);
struct tcp_cookie_values *oldcvp = oldtp->cookie_values;
- newsk->sk_rx_dst = dst_clone(skb_dst(skb));
-
/* TCP Cookie Transactions require space for the cookie pair,
* as it differs for each connection. There is no need to
* copy any s_data_payload stored at the original socket.
@@ -509,6 +509,7 @@ struct sock *tcp_create_openreq_child(struct sock *sk, struct request_sock *req,
newicsk->icsk_ack.last_seg_size = skb->len - newtp->tcp_header_len;
newtp->rx_opt.mss_clamp = req->mss;
TCP_ECN_openreq_child(newtp, req);
+ newtp->fastopen_rsk = NULL;
TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_PASSIVEOPENS);
}
@@ -517,13 +518,20 @@ struct sock *tcp_create_openreq_child(struct sock *sk, struct request_sock *req,
EXPORT_SYMBOL(tcp_create_openreq_child);
/*
- * Process an incoming packet for SYN_RECV sockets represented
- * as a request_sock.
+ * Process an incoming packet for SYN_RECV sockets represented as a
+ * request_sock. Normally sk is the listener socket but for TFO it
+ * points to the child socket.
+ *
+ * XXX (TFO) - The current impl contains a special check for ack
+ * validation and inside tcp_v4_reqsk_send_ack(). Can we do better?
+ *
+ * We don't need to initialize tmp_opt.sack_ok as we don't use the results
*/
struct sock *tcp_check_req(struct sock *sk, struct sk_buff *skb,
struct request_sock *req,
- struct request_sock **prev)
+ struct request_sock **prev,
+ bool fastopen)
{
struct tcp_options_received tmp_opt;
const u8 *hash_location;
@@ -532,6 +540,8 @@ struct sock *tcp_check_req(struct sock *sk, struct sk_buff *skb,
__be32 flg = tcp_flag_word(th) & (TCP_FLAG_RST|TCP_FLAG_SYN|TCP_FLAG_ACK);
bool paws_reject = false;
+ BUG_ON(fastopen == (sk->sk_state == TCP_LISTEN));
+
tmp_opt.saw_tstamp = 0;
if (th->doff > (sizeof(struct tcphdr)>>2)) {
tcp_parse_options(skb, &tmp_opt, &hash_location, 0, NULL);
@@ -567,6 +577,9 @@ struct sock *tcp_check_req(struct sock *sk, struct sk_buff *skb,
*
* Enforce "SYN-ACK" according to figure 8, figure 6
* of RFC793, fixed by RFC1122.
+ *
+ * Note that even if there is new data in the SYN packet
+ * they will be thrown away too.
*/
req->rsk_ops->rtx_syn_ack(sk, req, NULL);
return NULL;
@@ -624,9 +637,12 @@ struct sock *tcp_check_req(struct sock *sk, struct sk_buff *skb,
* sent (the segment carries an unacceptable ACK) ...
* a reset is sent."
*
- * Invalid ACK: reset will be sent by listening socket
+ * Invalid ACK: reset will be sent by listening socket.
+ * Note that the ACK validity check for a Fast Open socket is done
+ * elsewhere and is checked directly against the child socket rather
+ * than req because user data may have been sent out.
*/
- if ((flg & TCP_FLAG_ACK) &&
+ if ((flg & TCP_FLAG_ACK) && !fastopen &&
(TCP_SKB_CB(skb)->ack_seq !=
tcp_rsk(req)->snt_isn + 1 + tcp_s_data_size(tcp_sk(sk))))
return sk;
@@ -639,7 +655,7 @@ struct sock *tcp_check_req(struct sock *sk, struct sk_buff *skb,
/* RFC793: "first check sequence number". */
if (paws_reject || !tcp_in_window(TCP_SKB_CB(skb)->seq, TCP_SKB_CB(skb)->end_seq,
- tcp_rsk(req)->rcv_isn + 1, tcp_rsk(req)->rcv_isn + 1 + req->rcv_wnd)) {
+ tcp_rsk(req)->rcv_nxt, tcp_rsk(req)->rcv_nxt + req->rcv_wnd)) {
/* Out of window: send ACK and drop. */
if (!(flg & TCP_FLAG_RST))
req->rsk_ops->send_ack(sk, skb, req);
@@ -650,7 +666,7 @@ struct sock *tcp_check_req(struct sock *sk, struct sk_buff *skb,
/* In sequence, PAWS is OK. */
- if (tmp_opt.saw_tstamp && !after(TCP_SKB_CB(skb)->seq, tcp_rsk(req)->rcv_isn + 1))
+ if (tmp_opt.saw_tstamp && !after(TCP_SKB_CB(skb)->seq, tcp_rsk(req)->rcv_nxt))
req->ts_recent = tmp_opt.rcv_tsval;
if (TCP_SKB_CB(skb)->seq == tcp_rsk(req)->rcv_isn) {
@@ -669,10 +685,25 @@ struct sock *tcp_check_req(struct sock *sk, struct sk_buff *skb,
/* ACK sequence verified above, just make sure ACK is
* set. If ACK not set, just silently drop the packet.
+ *
+ * XXX (TFO) - if we ever allow "data after SYN", the
+ * following check needs to be removed.
*/
if (!(flg & TCP_FLAG_ACK))
return NULL;
+ /* Got ACK for our SYNACK, so update baseline for SYNACK RTT sample. */
+ if (tmp_opt.saw_tstamp && tmp_opt.rcv_tsecr)
+ tcp_rsk(req)->snt_synack = tmp_opt.rcv_tsecr;
+ else if (req->retrans) /* don't take RTT sample if retrans && ~TS */
+ tcp_rsk(req)->snt_synack = 0;
+
+ /* For Fast Open no more processing is needed (sk is the
+ * child socket).
+ */
+ if (fastopen)
+ return sk;
+
/* While TCP_DEFER_ACCEPT is active, drop bare ACK. */
if (req->retrans < inet_csk(sk)->icsk_accept_queue.rskq_defer_accept &&
TCP_SKB_CB(skb)->end_seq == tcp_rsk(req)->rcv_isn + 1) {
@@ -680,10 +711,6 @@ struct sock *tcp_check_req(struct sock *sk, struct sk_buff *skb,
NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPDEFERACCEPTDROP);
return NULL;
}
- if (tmp_opt.saw_tstamp && tmp_opt.rcv_tsecr)
- tcp_rsk(req)->snt_synack = tmp_opt.rcv_tsecr;
- else if (req->retrans) /* don't take RTT sample if retrans && ~TS */
- tcp_rsk(req)->snt_synack = 0;
/* OK, ACK is valid, create big socket and
* feed this segment to it. It will repeat all
@@ -708,11 +735,21 @@ listen_overflow:
}
embryonic_reset:
- NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_EMBRYONICRSTS);
- if (!(flg & TCP_FLAG_RST))
+ if (!(flg & TCP_FLAG_RST)) {
+ /* Received a bad SYN pkt - for TFO We try not to reset
+ * the local connection unless it's really necessary to
+ * avoid becoming vulnerable to outside attack aiming at
+ * resetting legit local connections.
+ */
req->rsk_ops->send_reset(sk, skb);
-
- inet_csk_reqsk_queue_drop(sk, req, prev);
+ } else if (fastopen) { /* received a valid RST pkt */
+ reqsk_fastopen_remove(sk, req, true);
+ tcp_reset(sk);
+ }
+ if (!fastopen) {
+ inet_csk_reqsk_queue_drop(sk, req, prev);
+ NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_EMBRYONICRSTS);
+ }
return NULL;
}
EXPORT_SYMBOL(tcp_check_req);
@@ -721,6 +758,12 @@ EXPORT_SYMBOL(tcp_check_req);
* Queue segment on the new socket if the new socket is active,
* otherwise we just shortcircuit this and continue with
* the new socket.
+ *
+ * For the vast majority of cases child->sk_state will be TCP_SYN_RECV
+ * when entering. But other states are possible due to a race condition
+ * where after __inet_lookup_established() fails but before the listener
+ * locked is obtained, other packets cause the same connection to
+ * be created.
*/
int tcp_child_process(struct sock *parent, struct sock *child,
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 33cd065cfbd8..cfe6ffe1c177 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -702,7 +702,8 @@ static unsigned int tcp_synack_options(struct sock *sk,
unsigned int mss, struct sk_buff *skb,
struct tcp_out_options *opts,
struct tcp_md5sig_key **md5,
- struct tcp_extend_values *xvp)
+ struct tcp_extend_values *xvp,
+ struct tcp_fastopen_cookie *foc)
{
struct inet_request_sock *ireq = inet_rsk(req);
unsigned int remaining = MAX_TCP_OPTION_SPACE;
@@ -747,7 +748,15 @@ static unsigned int tcp_synack_options(struct sock *sk,
if (unlikely(!ireq->tstamp_ok))
remaining -= TCPOLEN_SACKPERM_ALIGNED;
}
-
+ if (foc != NULL) {
+ u32 need = TCPOLEN_EXP_FASTOPEN_BASE + foc->len;
+ need = (need + 3) & ~3U; /* Align to 32 bits */
+ if (remaining >= need) {
+ opts->options |= OPTION_FAST_OPEN_COOKIE;
+ opts->fastopen_cookie = foc;
+ remaining -= need;
+ }
+ }
/* Similar rationale to tcp_syn_options() applies here, too.
* If the <SYN> options fit, the same options should fit now!
*/
@@ -910,14 +919,18 @@ void tcp_release_cb(struct sock *sk)
if (flags & (1UL << TCP_TSQ_DEFERRED))
tcp_tsq_handler(sk);
- if (flags & (1UL << TCP_WRITE_TIMER_DEFERRED))
+ if (flags & (1UL << TCP_WRITE_TIMER_DEFERRED)) {
tcp_write_timer_handler(sk);
-
- if (flags & (1UL << TCP_DELACK_TIMER_DEFERRED))
+ __sock_put(sk);
+ }
+ if (flags & (1UL << TCP_DELACK_TIMER_DEFERRED)) {
tcp_delack_timer_handler(sk);
-
- if (flags & (1UL << TCP_MTU_REDUCED_DEFERRED))
+ __sock_put(sk);
+ }
+ if (flags & (1UL << TCP_MTU_REDUCED_DEFERRED)) {
sk->sk_prot->mtu_reduced(sk);
+ __sock_put(sk);
+ }
}
EXPORT_SYMBOL(tcp_release_cb);
@@ -940,7 +953,7 @@ void __init tcp_tasklet_init(void)
* We cant xmit new skbs from this context, as we might already
* hold qdisc lock.
*/
-void tcp_wfree(struct sk_buff *skb)
+static void tcp_wfree(struct sk_buff *skb)
{
struct sock *sk = skb->sk;
struct tcp_sock *tp = tcp_sk(sk);
@@ -1522,21 +1535,21 @@ static void tcp_cwnd_validate(struct sock *sk)
* when we would be allowed to send the split-due-to-Nagle skb fully.
*/
static unsigned int tcp_mss_split_point(const struct sock *sk, const struct sk_buff *skb,
- unsigned int mss_now, unsigned int cwnd)
+ unsigned int mss_now, unsigned int max_segs)
{
const struct tcp_sock *tp = tcp_sk(sk);
- u32 needed, window, cwnd_len;
+ u32 needed, window, max_len;
window = tcp_wnd_end(tp) - TCP_SKB_CB(skb)->seq;
- cwnd_len = mss_now * cwnd;
+ max_len = mss_now * max_segs;
- if (likely(cwnd_len <= window && skb != tcp_write_queue_tail(sk)))
- return cwnd_len;
+ if (likely(max_len <= window && skb != tcp_write_queue_tail(sk)))
+ return max_len;
needed = min(skb->len, window);
- if (cwnd_len <= needed)
- return cwnd_len;
+ if (max_len <= needed)
+ return max_len;
return needed - needed % mss_now;
}
@@ -1765,7 +1778,8 @@ static bool tcp_tso_should_defer(struct sock *sk, struct sk_buff *skb)
limit = min(send_win, cong_win);
/* If a full-sized TSO skb can be sent, do it. */
- if (limit >= sk->sk_gso_max_size)
+ if (limit >= min_t(unsigned int, sk->sk_gso_max_size,
+ sk->sk_gso_max_segs * tp->mss_cache))
goto send_now;
/* Middle in queue won't get any more data, full sendable already? */
@@ -1999,7 +2013,9 @@ static bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle,
limit = mss_now;
if (tso_segs > 1 && !tcp_urg_mode(tp))
limit = tcp_mss_split_point(sk, skb, mss_now,
- cwnd_quota);
+ min_t(unsigned int,
+ cwnd_quota,
+ sk->sk_gso_max_segs));
if (skb->len > limit &&
unlikely(tso_fragment(sk, skb, limit, mss_now, gfp)))
@@ -2021,10 +2037,10 @@ static bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle,
if (push_one)
break;
}
- if (inet_csk(sk)->icsk_ca_state == TCP_CA_Recovery)
- tp->prr_out += sent_pkts;
if (likely(sent_pkts)) {
+ if (tcp_in_cwnd_reduction(sk))
+ tp->prr_out += sent_pkts;
tcp_cwnd_validate(sk);
return false;
}
@@ -2045,7 +2061,8 @@ void __tcp_push_pending_frames(struct sock *sk, unsigned int cur_mss,
if (unlikely(sk->sk_state == TCP_CLOSE))
return;
- if (tcp_write_xmit(sk, cur_mss, nonagle, 0, GFP_ATOMIC))
+ if (tcp_write_xmit(sk, cur_mss, nonagle, 0,
+ sk_gfp_atomic(sk, GFP_ATOMIC)))
tcp_check_probe_timer(sk);
}
@@ -2525,7 +2542,7 @@ begin_fwd:
}
NET_INC_STATS_BH(sock_net(sk), mib_idx);
- if (inet_csk(sk)->icsk_ca_state == TCP_CA_Recovery)
+ if (tcp_in_cwnd_reduction(sk))
tp->prr_out += tcp_skb_pcount(skb);
if (skb == tcp_write_queue_head(sk))
@@ -2650,7 +2667,8 @@ int tcp_send_synack(struct sock *sk)
*/
struct sk_buff *tcp_make_synack(struct sock *sk, struct dst_entry *dst,
struct request_sock *req,
- struct request_values *rvp)
+ struct request_values *rvp,
+ struct tcp_fastopen_cookie *foc)
{
struct tcp_out_options opts;
struct tcp_extend_values *xvp = tcp_xv(rvp);
@@ -2666,7 +2684,8 @@ struct sk_buff *tcp_make_synack(struct sock *sk, struct dst_entry *dst,
if (cvp != NULL && cvp->s_data_constant && cvp->s_data_desired)
s_data_desired = cvp->s_data_desired;
- skb = alloc_skb(MAX_TCP_HEADER + 15 + s_data_desired, GFP_ATOMIC);
+ skb = alloc_skb(MAX_TCP_HEADER + 15 + s_data_desired,
+ sk_gfp_atomic(sk, GFP_ATOMIC));
if (unlikely(!skb)) {
dst_release(dst);
return NULL;
@@ -2709,7 +2728,7 @@ struct sk_buff *tcp_make_synack(struct sock *sk, struct dst_entry *dst,
#endif
TCP_SKB_CB(skb)->when = tcp_time_stamp;
tcp_header_size = tcp_synack_options(sk, req, mss,
- skb, &opts, &md5, xvp)
+ skb, &opts, &md5, xvp, foc)
+ sizeof(*th);
skb_push(skb, tcp_header_size);
@@ -2763,7 +2782,8 @@ struct sk_buff *tcp_make_synack(struct sock *sk, struct dst_entry *dst,
}
th->seq = htonl(TCP_SKB_CB(skb)->seq);
- th->ack_seq = htonl(tcp_rsk(req)->rcv_isn + 1);
+ /* XXX data is queued and acked as is. No buffer/window check */
+ th->ack_seq = htonl(tcp_rsk(req)->rcv_nxt);
/* RFC1323: The window in SYN & SYN/ACK segments is never scaled. */
th->window = htons(min(req->rcv_wnd, 65535U));
@@ -3064,7 +3084,7 @@ void tcp_send_ack(struct sock *sk)
* tcp_transmit_skb() will set the ownership to this
* sock.
*/
- buff = alloc_skb(MAX_TCP_HEADER, GFP_ATOMIC);
+ buff = alloc_skb(MAX_TCP_HEADER, sk_gfp_atomic(sk, GFP_ATOMIC));
if (buff == NULL) {
inet_csk_schedule_ack(sk);
inet_csk(sk)->icsk_ack.ato = TCP_ATO_MIN;
@@ -3079,7 +3099,7 @@ void tcp_send_ack(struct sock *sk)
/* Send it off, this clears delayed acks for us. */
TCP_SKB_CB(buff)->when = tcp_time_stamp;
- tcp_transmit_skb(sk, buff, 0, GFP_ATOMIC);
+ tcp_transmit_skb(sk, buff, 0, sk_gfp_atomic(sk, GFP_ATOMIC));
}
/* This routine sends a packet with an out of date sequence
@@ -3099,7 +3119,7 @@ static int tcp_xmit_probe_skb(struct sock *sk, int urgent)
struct sk_buff *skb;
/* We don't queue it, tcp_transmit_skb() sets ownership. */
- skb = alloc_skb(MAX_TCP_HEADER, GFP_ATOMIC);
+ skb = alloc_skb(MAX_TCP_HEADER, sk_gfp_atomic(sk, GFP_ATOMIC));
if (skb == NULL)
return -1;
diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c
index 6df36ad55a38..fc04711e80c8 100644
--- a/net/ipv4/tcp_timer.c
+++ b/net/ipv4/tcp_timer.c
@@ -252,7 +252,8 @@ static void tcp_delack_timer(unsigned long data)
inet_csk(sk)->icsk_ack.blocked = 1;
NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_DELAYEDACKLOCKED);
/* deleguate our work to tcp_release_cb() */
- set_bit(TCP_WRITE_TIMER_DEFERRED, &tcp_sk(sk)->tsq_flags);
+ if (!test_and_set_bit(TCP_DELACK_TIMER_DEFERRED, &tcp_sk(sk)->tsq_flags))
+ sock_hold(sk);
}
bh_unlock_sock(sk);
sock_put(sk);
@@ -304,6 +305,35 @@ static void tcp_probe_timer(struct sock *sk)
}
/*
+ * Timer for Fast Open socket to retransmit SYNACK. Note that the
+ * sk here is the child socket, not the parent (listener) socket.
+ */
+static void tcp_fastopen_synack_timer(struct sock *sk)
+{
+ struct inet_connection_sock *icsk = inet_csk(sk);
+ int max_retries = icsk->icsk_syn_retries ? :
+ sysctl_tcp_synack_retries + 1; /* add one more retry for fastopen */
+ struct request_sock *req;
+
+ req = tcp_sk(sk)->fastopen_rsk;
+ req->rsk_ops->syn_ack_timeout(sk, req);
+
+ if (req->retrans >= max_retries) {
+ tcp_write_err(sk);
+ return;
+ }
+ /* XXX (TFO) - Unlike regular SYN-ACK retransmit, we ignore error
+ * returned from rtx_syn_ack() to make it more persistent like
+ * regular retransmit because if the child socket has been accepted
+ * it's not good to give up too easily.
+ */
+ req->rsk_ops->rtx_syn_ack(sk, req, NULL);
+ req->retrans++;
+ inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS,
+ TCP_TIMEOUT_INIT << req->retrans, TCP_RTO_MAX);
+}
+
+/*
* The TCP retransmit timer.
*/
@@ -316,7 +346,15 @@ void tcp_retransmit_timer(struct sock *sk)
tcp_resume_early_retransmit(sk);
return;
}
-
+ if (tp->fastopen_rsk) {
+ BUG_ON(sk->sk_state != TCP_SYN_RECV &&
+ sk->sk_state != TCP_FIN_WAIT1);
+ tcp_fastopen_synack_timer(sk);
+ /* Before we receive ACK to our SYN-ACK don't retransmit
+ * anything else (e.g., data or FIN segments).
+ */
+ return;
+ }
if (!tp->packets_out)
goto out;
@@ -481,7 +519,8 @@ static void tcp_write_timer(unsigned long data)
tcp_write_timer_handler(sk);
} else {
/* deleguate our work to tcp_release_cb() */
- set_bit(TCP_WRITE_TIMER_DEFERRED, &tcp_sk(sk)->tsq_flags);
+ if (!test_and_set_bit(TCP_WRITE_TIMER_DEFERRED, &tcp_sk(sk)->tsq_flags))
+ sock_hold(sk);
}
bh_unlock_sock(sk);
sock_put(sk);
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index b4c3582a991f..79c8dbe59b54 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -758,7 +758,7 @@ static int udp_send_skb(struct sk_buff *skb, struct flowi4 *fl4)
uh->check = CSUM_MANGLED_0;
send:
- err = ip_send_skb(skb);
+ err = ip_send_skb(sock_net(sk), skb);
if (err) {
if (err == -ENOBUFS && !inet->recverr) {
UDP_INC_STATS_USER(sock_net(sk),
@@ -1226,6 +1226,11 @@ try_again:
if (unlikely(err)) {
trace_kfree_skb(skb, udp_recvmsg);
+ if (!peeked) {
+ atomic_inc(&sk->sk_drops);
+ UDP_INC_STATS_USER(sock_net(sk),
+ UDP_MIB_INERRORS, is_udplite);
+ }
goto out_free;
}
@@ -2110,7 +2115,9 @@ static void udp4_format_sock(struct sock *sp, struct seq_file *f,
bucket, src, srcp, dest, destp, sp->sk_state,
sk_wmem_alloc_get(sp),
sk_rmem_alloc_get(sp),
- 0, 0L, 0, sock_i_uid(sp), 0, sock_i_ino(sp),
+ 0, 0L, 0,
+ from_kuid_munged(seq_user_ns(f), sock_i_uid(sp)),
+ 0, sock_i_ino(sp),
atomic_read(&sp->sk_refcnt), sp,
atomic_read(&sp->sk_drops), len);
}
diff --git a/net/ipv4/udp_diag.c b/net/ipv4/udp_diag.c
index 16d0960062be..505b30ad9182 100644
--- a/net/ipv4/udp_diag.c
+++ b/net/ipv4/udp_diag.c
@@ -24,7 +24,9 @@ static int sk_diag_dump(struct sock *sk, struct sk_buff *skb,
if (!inet_diag_bc_sk(bc, sk))
return 0;
- return inet_sk_diag_fill(sk, NULL, skb, req, NETLINK_CB(cb->skb).pid,
+ return inet_sk_diag_fill(sk, NULL, skb, req,
+ sk_user_ns(NETLINK_CB(cb->skb).ssk),
+ NETLINK_CB(cb->skb).portid,
cb->nlh->nlmsg_seq, NLM_F_MULTI, cb->nlh);
}
@@ -69,14 +71,15 @@ static int udp_dump_one(struct udp_table *tbl, struct sk_buff *in_skb,
goto out;
err = inet_sk_diag_fill(sk, NULL, rep, req,
- NETLINK_CB(in_skb).pid,
+ sk_user_ns(NETLINK_CB(in_skb).ssk),
+ NETLINK_CB(in_skb).portid,
nlh->nlmsg_seq, 0, nlh);
if (err < 0) {
WARN_ON(err == -EMSGSIZE);
kfree_skb(rep);
goto out;
}
- err = netlink_unicast(net->diag_nlsk, rep, NETLINK_CB(in_skb).pid,
+ err = netlink_unicast(net->diag_nlsk, rep, NETLINK_CB(in_skb).portid,
MSG_DONTWAIT);
if (err > 0)
err = 0;
diff --git a/net/ipv4/xfrm4_input.c b/net/ipv4/xfrm4_input.c
index 58d23a572509..06814b6216dc 100644
--- a/net/ipv4/xfrm4_input.c
+++ b/net/ipv4/xfrm4_input.c
@@ -27,8 +27,8 @@ static inline int xfrm4_rcv_encap_finish(struct sk_buff *skb)
if (skb_dst(skb) == NULL) {
const struct iphdr *iph = ip_hdr(skb);
- if (ip_route_input(skb, iph->daddr, iph->saddr,
- iph->tos, skb->dev))
+ if (ip_route_input_noref(skb, iph->daddr, iph->saddr,
+ iph->tos, skb->dev))
goto drop;
}
return dst_input(skb);
diff --git a/net/ipv4/xfrm4_policy.c b/net/ipv4/xfrm4_policy.c
index c6281847f16a..681ea2f413e2 100644
--- a/net/ipv4/xfrm4_policy.c
+++ b/net/ipv4/xfrm4_policy.c
@@ -92,6 +92,7 @@ static int xfrm4_fill_dst(struct xfrm_dst *xdst, struct net_device *dev,
xdst->u.rt.rt_type = rt->rt_type;
xdst->u.rt.rt_gateway = rt->rt_gateway;
xdst->u.rt.rt_pmtu = rt->rt_pmtu;
+ INIT_LIST_HEAD(&xdst->u.rt.rt_uncached);
return 0;
}
diff --git a/net/ipv6/Kconfig b/net/ipv6/Kconfig
index 5728695b5449..4f7fe7270e37 100644
--- a/net/ipv6/Kconfig
+++ b/net/ipv6/Kconfig
@@ -201,6 +201,22 @@ config IPV6_TUNNEL
If unsure, say N.
+config IPV6_GRE
+ tristate "IPv6: GRE tunnel"
+ select IPV6_TUNNEL
+ ---help---
+ Tunneling means encapsulating data of one protocol type within
+ another protocol and sending it over a channel that understands the
+ encapsulating protocol. This particular tunneling driver implements
+ GRE (Generic Routing Encapsulation) and at this time allows
+ encapsulating of IPv4 or IPv6 over existing IPv6 infrastructure.
+ This driver is useful if the other endpoint is a Cisco router: Cisco
+ likes GRE much better than the other Linux tunneling driver ("IP
+ tunneling" above). In addition, GRE allows multicast redistribution
+ through the tunnel.
+
+ Saying M here will produce a module called ip6_gre. If unsure, say N.
+
config IPV6_MULTIPLE_TABLES
bool "IPv6: Multiple Routing Tables"
depends on EXPERIMENTAL
diff --git a/net/ipv6/Makefile b/net/ipv6/Makefile
index 686934acfac1..b6d3f79151e2 100644
--- a/net/ipv6/Makefile
+++ b/net/ipv6/Makefile
@@ -36,6 +36,7 @@ obj-$(CONFIG_NETFILTER) += netfilter/
obj-$(CONFIG_IPV6_SIT) += sit.o
obj-$(CONFIG_IPV6_TUNNEL) += ip6_tunnel.o
+obj-$(CONFIG_IPV6_GRE) += ip6_gre.o
obj-y += addrconf_core.o exthdrs_core.o
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index 79181819a24f..d7c56f8a5b4e 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -127,8 +127,8 @@ static inline void addrconf_sysctl_unregister(struct inet6_dev *idev)
#endif
#ifdef CONFIG_IPV6_PRIVACY
-static int __ipv6_regen_rndid(struct inet6_dev *idev);
-static int __ipv6_try_regen_rndid(struct inet6_dev *idev, struct in6_addr *tmpaddr);
+static void __ipv6_regen_rndid(struct inet6_dev *idev);
+static void __ipv6_try_regen_rndid(struct inet6_dev *idev, struct in6_addr *tmpaddr);
static void ipv6_regen_rndid(unsigned long data);
#endif
@@ -494,8 +494,7 @@ static void addrconf_forward_change(struct net *net, __s32 newf)
struct net_device *dev;
struct inet6_dev *idev;
- rcu_read_lock();
- for_each_netdev_rcu(net, dev) {
+ for_each_netdev(net, dev) {
idev = __in6_dev_get(dev);
if (idev) {
int changed = (!idev->cnf.forwarding) ^ (!newf);
@@ -504,7 +503,6 @@ static void addrconf_forward_change(struct net *net, __s32 newf)
dev_forward_change(idev);
}
}
- rcu_read_unlock();
}
static int addrconf_fixup_forwarding(struct ctl_table *table, int *p, int newf)
@@ -790,10 +788,16 @@ static void ipv6_del_addr(struct inet6_ifaddr *ifp)
struct in6_addr prefix;
struct rt6_info *rt;
struct net *net = dev_net(ifp->idev->dev);
+ struct flowi6 fl6 = {};
+
ipv6_addr_prefix(&prefix, &ifp->addr, ifp->prefix_len);
- rt = rt6_lookup(net, &prefix, NULL, ifp->idev->dev->ifindex, 1);
+ fl6.flowi6_oif = ifp->idev->dev->ifindex;
+ fl6.daddr = prefix;
+ rt = (struct rt6_info *)ip6_route_lookup(net, &fl6,
+ RT6_LOOKUP_F_IFACE);
- if (rt && addrconf_is_prefix_route(rt)) {
+ if (rt != net->ipv6.ip6_null_entry &&
+ addrconf_is_prefix_route(rt)) {
if (onlink == 0) {
ip6_del_rt(rt);
rt = NULL;
@@ -854,16 +858,7 @@ retry:
}
in6_ifa_hold(ifp);
memcpy(addr.s6_addr, ifp->addr.s6_addr, 8);
- if (__ipv6_try_regen_rndid(idev, tmpaddr) < 0) {
- spin_unlock_bh(&ifp->lock);
- write_unlock(&idev->lock);
- pr_warn("%s: regeneration of randomized interface id failed\n",
- __func__);
- in6_ifa_put(ifp);
- in6_dev_put(idev);
- ret = -1;
- goto out;
- }
+ __ipv6_try_regen_rndid(idev, tmpaddr);
memcpy(&addr.s6_addr[8], idev->rndid, 8);
age = (now - ifp->tstamp) / HZ;
tmp_valid_lft = min_t(__u32,
@@ -1081,8 +1076,10 @@ static int ipv6_get_saddr_eval(struct net *net,
break;
case IPV6_SADDR_RULE_PREFIX:
/* Rule 8: Use longest matching prefix */
- score->matchlen = ret = ipv6_addr_diff(&score->ifa->addr,
- dst->addr);
+ ret = ipv6_addr_diff(&score->ifa->addr, dst->addr);
+ if (ret > score->ifa->prefix_len)
+ ret = score->ifa->prefix_len;
+ score->matchlen = ret;
break;
default:
ret = 0;
@@ -1095,7 +1092,7 @@ out:
return ret;
}
-int ipv6_dev_get_saddr(struct net *net, struct net_device *dst_dev,
+int ipv6_dev_get_saddr(struct net *net, const struct net_device *dst_dev,
const struct in6_addr *daddr, unsigned int prefs,
struct in6_addr *saddr)
{
@@ -1602,7 +1599,7 @@ static int ipv6_inherit_eui64(u8 *eui, struct inet6_dev *idev)
#ifdef CONFIG_IPV6_PRIVACY
/* (re)generation of randomized interface identifier (RFC 3041 3.2, 3.5) */
-static int __ipv6_regen_rndid(struct inet6_dev *idev)
+static void __ipv6_regen_rndid(struct inet6_dev *idev)
{
regen:
get_random_bytes(idev->rndid, sizeof(idev->rndid));
@@ -1629,8 +1626,6 @@ regen:
if ((idev->rndid[2]|idev->rndid[3]|idev->rndid[4]|idev->rndid[5]|idev->rndid[6]|idev->rndid[7]) == 0x00)
goto regen;
}
-
- return 0;
}
static void ipv6_regen_rndid(unsigned long data)
@@ -1644,8 +1639,7 @@ static void ipv6_regen_rndid(unsigned long data)
if (idev->dead)
goto out;
- if (__ipv6_regen_rndid(idev) < 0)
- goto out;
+ __ipv6_regen_rndid(idev);
expires = jiffies +
idev->cnf.temp_prefered_lft * HZ -
@@ -1666,13 +1660,10 @@ out:
in6_dev_put(idev);
}
-static int __ipv6_try_regen_rndid(struct inet6_dev *idev, struct in6_addr *tmpaddr)
+static void __ipv6_try_regen_rndid(struct inet6_dev *idev, struct in6_addr *tmpaddr)
{
- int ret = 0;
-
if (tmpaddr && memcmp(idev->rndid, &tmpaddr->s6_addr[8], 8) == 0)
- ret = __ipv6_regen_rndid(idev);
- return ret;
+ __ipv6_regen_rndid(idev);
}
#endif
@@ -1723,7 +1714,7 @@ static struct rt6_info *addrconf_get_prefix_route(const struct in6_addr *pfx,
if (table == NULL)
return NULL;
- write_lock_bh(&table->tb6_lock);
+ read_lock_bh(&table->tb6_lock);
fn = fib6_locate(&table->tb6_root, pfx, plen, NULL, 0);
if (!fn)
goto out;
@@ -1738,7 +1729,7 @@ static struct rt6_info *addrconf_get_prefix_route(const struct in6_addr *pfx,
break;
}
out:
- write_unlock_bh(&table->tb6_lock);
+ read_unlock_bh(&table->tb6_lock);
return rt;
}
@@ -1778,14 +1769,6 @@ static void sit_route_add(struct net_device *dev)
}
#endif
-static void addrconf_add_lroute(struct net_device *dev)
-{
- struct in6_addr addr;
-
- ipv6_addr_set(&addr, htonl(0xFE800000), 0, 0, 0);
- addrconf_prefix_route(&addr, 64, dev, 0, 0);
-}
-
static struct inet6_dev *addrconf_add_dev(struct net_device *dev)
{
struct inet6_dev *idev;
@@ -1803,8 +1786,6 @@ static struct inet6_dev *addrconf_add_dev(struct net_device *dev)
if (!(dev->flags & IFF_LOOPBACK))
addrconf_add_mroute(dev);
- /* Add link local route */
- addrconf_add_lroute(dev);
return idev;
}
@@ -2483,10 +2464,9 @@ static void addrconf_sit_config(struct net_device *dev)
sit_add_v4_addrs(idev);
- if (dev->flags&IFF_POINTOPOINT) {
+ if (dev->flags&IFF_POINTOPOINT)
addrconf_add_mroute(dev);
- addrconf_add_lroute(dev);
- } else
+ else
sit_route_add(dev);
}
#endif
@@ -3551,12 +3531,12 @@ static inline int inet6_ifaddr_msgsize(void)
}
static int inet6_fill_ifaddr(struct sk_buff *skb, struct inet6_ifaddr *ifa,
- u32 pid, u32 seq, int event, unsigned int flags)
+ u32 portid, u32 seq, int event, unsigned int flags)
{
struct nlmsghdr *nlh;
u32 preferred, valid;
- nlh = nlmsg_put(skb, pid, seq, event, sizeof(struct ifaddrmsg), flags);
+ nlh = nlmsg_put(skb, portid, seq, event, sizeof(struct ifaddrmsg), flags);
if (nlh == NULL)
return -EMSGSIZE;
@@ -3594,7 +3574,7 @@ static int inet6_fill_ifaddr(struct sk_buff *skb, struct inet6_ifaddr *ifa,
}
static int inet6_fill_ifmcaddr(struct sk_buff *skb, struct ifmcaddr6 *ifmca,
- u32 pid, u32 seq, int event, u16 flags)
+ u32 portid, u32 seq, int event, u16 flags)
{
struct nlmsghdr *nlh;
u8 scope = RT_SCOPE_UNIVERSE;
@@ -3603,7 +3583,7 @@ static int inet6_fill_ifmcaddr(struct sk_buff *skb, struct ifmcaddr6 *ifmca,
if (ipv6_addr_scope(&ifmca->mca_addr) & IFA_SITE)
scope = RT_SCOPE_SITE;
- nlh = nlmsg_put(skb, pid, seq, event, sizeof(struct ifaddrmsg), flags);
+ nlh = nlmsg_put(skb, portid, seq, event, sizeof(struct ifaddrmsg), flags);
if (nlh == NULL)
return -EMSGSIZE;
@@ -3619,7 +3599,7 @@ static int inet6_fill_ifmcaddr(struct sk_buff *skb, struct ifmcaddr6 *ifmca,
}
static int inet6_fill_ifacaddr(struct sk_buff *skb, struct ifacaddr6 *ifaca,
- u32 pid, u32 seq, int event, unsigned int flags)
+ u32 portid, u32 seq, int event, unsigned int flags)
{
struct nlmsghdr *nlh;
u8 scope = RT_SCOPE_UNIVERSE;
@@ -3628,7 +3608,7 @@ static int inet6_fill_ifacaddr(struct sk_buff *skb, struct ifacaddr6 *ifaca,
if (ipv6_addr_scope(&ifaca->aca_addr) & IFA_SITE)
scope = RT_SCOPE_SITE;
- nlh = nlmsg_put(skb, pid, seq, event, sizeof(struct ifaddrmsg), flags);
+ nlh = nlmsg_put(skb, portid, seq, event, sizeof(struct ifaddrmsg), flags);
if (nlh == NULL)
return -EMSGSIZE;
@@ -3669,7 +3649,7 @@ static int in6_dump_addrs(struct inet6_dev *idev, struct sk_buff *skb,
if (++ip_idx < s_ip_idx)
continue;
err = inet6_fill_ifaddr(skb, ifa,
- NETLINK_CB(cb->skb).pid,
+ NETLINK_CB(cb->skb).portid,
cb->nlh->nlmsg_seq,
RTM_NEWADDR,
NLM_F_MULTI);
@@ -3685,7 +3665,7 @@ static int in6_dump_addrs(struct inet6_dev *idev, struct sk_buff *skb,
if (ip_idx < s_ip_idx)
continue;
err = inet6_fill_ifmcaddr(skb, ifmca,
- NETLINK_CB(cb->skb).pid,
+ NETLINK_CB(cb->skb).portid,
cb->nlh->nlmsg_seq,
RTM_GETMULTICAST,
NLM_F_MULTI);
@@ -3700,7 +3680,7 @@ static int in6_dump_addrs(struct inet6_dev *idev, struct sk_buff *skb,
if (ip_idx < s_ip_idx)
continue;
err = inet6_fill_ifacaddr(skb, ifaca,
- NETLINK_CB(cb->skb).pid,
+ NETLINK_CB(cb->skb).portid,
cb->nlh->nlmsg_seq,
RTM_GETANYCAST,
NLM_F_MULTI);
@@ -3822,7 +3802,7 @@ static int inet6_rtm_getaddr(struct sk_buff *in_skb, struct nlmsghdr *nlh,
goto errout_ifa;
}
- err = inet6_fill_ifaddr(skb, ifa, NETLINK_CB(in_skb).pid,
+ err = inet6_fill_ifaddr(skb, ifa, NETLINK_CB(in_skb).portid,
nlh->nlmsg_seq, RTM_NEWADDR, 0);
if (err < 0) {
/* -EMSGSIZE implies BUG in inet6_ifaddr_msgsize() */
@@ -3830,7 +3810,7 @@ static int inet6_rtm_getaddr(struct sk_buff *in_skb, struct nlmsghdr *nlh,
kfree_skb(skb);
goto errout_ifa;
}
- err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).pid);
+ err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid);
errout_ifa:
in6_ifa_put(ifa);
errout:
@@ -4032,14 +4012,14 @@ static int inet6_fill_link_af(struct sk_buff *skb, const struct net_device *dev)
}
static int inet6_fill_ifinfo(struct sk_buff *skb, struct inet6_dev *idev,
- u32 pid, u32 seq, int event, unsigned int flags)
+ u32 portid, u32 seq, int event, unsigned int flags)
{
struct net_device *dev = idev->dev;
struct ifinfomsg *hdr;
struct nlmsghdr *nlh;
void *protoinfo;
- nlh = nlmsg_put(skb, pid, seq, event, sizeof(*hdr), flags);
+ nlh = nlmsg_put(skb, portid, seq, event, sizeof(*hdr), flags);
if (nlh == NULL)
return -EMSGSIZE;
@@ -4097,7 +4077,7 @@ static int inet6_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb)
if (!idev)
goto cont;
if (inet6_fill_ifinfo(skb, idev,
- NETLINK_CB(cb->skb).pid,
+ NETLINK_CB(cb->skb).portid,
cb->nlh->nlmsg_seq,
RTM_NEWLINK, NLM_F_MULTI) <= 0)
goto out;
@@ -4145,14 +4125,14 @@ static inline size_t inet6_prefix_nlmsg_size(void)
}
static int inet6_fill_prefix(struct sk_buff *skb, struct inet6_dev *idev,
- struct prefix_info *pinfo, u32 pid, u32 seq,
+ struct prefix_info *pinfo, u32 portid, u32 seq,
int event, unsigned int flags)
{
struct prefixmsg *pmsg;
struct nlmsghdr *nlh;
struct prefix_cacheinfo ci;
- nlh = nlmsg_put(skb, pid, seq, event, sizeof(*pmsg), flags);
+ nlh = nlmsg_put(skb, portid, seq, event, sizeof(*pmsg), flags);
if (nlh == NULL)
return -EMSGSIZE;
diff --git a/net/ipv6/addrlabel.c b/net/ipv6/addrlabel.c
index eb6a63632d3c..4be23da32b89 100644
--- a/net/ipv6/addrlabel.c
+++ b/net/ipv6/addrlabel.c
@@ -57,7 +57,7 @@ struct net *ip6addrlbl_net(const struct ip6addrlbl_entry *lbl)
}
/*
- * Default policy table (RFC3484 + extensions)
+ * Default policy table (RFC6724 + extensions)
*
* prefix addr_type label
* -------------------------------------------------------------------------
@@ -69,8 +69,12 @@ struct net *ip6addrlbl_net(const struct ip6addrlbl_entry *lbl)
* fc00::/7 N/A 5 ULA (RFC 4193)
* 2001::/32 N/A 6 Teredo (RFC 4380)
* 2001:10::/28 N/A 7 ORCHID (RFC 4843)
+ * fec0::/10 N/A 11 Site-local
+ * (deprecated by RFC3879)
+ * 3ffe::/16 N/A 12 6bone
*
* Note: 0xffffffff is used if we do not have any policies.
+ * Note: Labels for ULA and 6to4 are different from labels listed in RFC6724.
*/
#define IPV6_ADDR_LABEL_DEFAULT 0xffffffffUL
@@ -88,10 +92,18 @@ static const __net_initdata struct ip6addrlbl_init_table
.prefix = &(struct in6_addr){{{ 0xfc }}},
.prefixlen = 7,
.label = 5,
+ },{ /* fec0::/10 */
+ .prefix = &(struct in6_addr){{{ 0xfe, 0xc0 }}},
+ .prefixlen = 10,
+ .label = 11,
},{ /* 2002::/16 */
.prefix = &(struct in6_addr){{{ 0x20, 0x02 }}},
.prefixlen = 16,
.label = 2,
+ },{ /* 3ffe::/16 */
+ .prefix = &(struct in6_addr){{{ 0x3f, 0xfe }}},
+ .prefixlen = 16,
+ .label = 12,
},{ /* 2001::/32 */
.prefix = &(struct in6_addr){{{ 0x20, 0x01 }}},
.prefixlen = 32,
@@ -470,10 +482,10 @@ static void ip6addrlbl_putmsg(struct nlmsghdr *nlh,
static int ip6addrlbl_fill(struct sk_buff *skb,
struct ip6addrlbl_entry *p,
u32 lseq,
- u32 pid, u32 seq, int event,
+ u32 portid, u32 seq, int event,
unsigned int flags)
{
- struct nlmsghdr *nlh = nlmsg_put(skb, pid, seq, event,
+ struct nlmsghdr *nlh = nlmsg_put(skb, portid, seq, event,
sizeof(struct ifaddrlblmsg), flags);
if (!nlh)
return -EMSGSIZE;
@@ -503,7 +515,7 @@ static int ip6addrlbl_dump(struct sk_buff *skb, struct netlink_callback *cb)
net_eq(ip6addrlbl_net(p), net)) {
if ((err = ip6addrlbl_fill(skb, p,
ip6addrlbl_table.seq,
- NETLINK_CB(cb->skb).pid,
+ NETLINK_CB(cb->skb).portid,
cb->nlh->nlmsg_seq,
RTM_NEWADDRLABEL,
NLM_F_MULTI)) <= 0)
@@ -574,7 +586,7 @@ static int ip6addrlbl_get(struct sk_buff *in_skb, struct nlmsghdr* nlh,
}
err = ip6addrlbl_fill(skb, p, lseq,
- NETLINK_CB(in_skb).pid, nlh->nlmsg_seq,
+ NETLINK_CB(in_skb).portid, nlh->nlmsg_seq,
RTM_NEWADDRLABEL, 0);
ip6addrlbl_put(p);
@@ -585,7 +597,7 @@ static int ip6addrlbl_get(struct sk_buff *in_skb, struct nlmsghdr* nlh,
goto out;
}
- err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).pid);
+ err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid);
out:
return err;
}
diff --git a/net/ipv6/esp6.c b/net/ipv6/esp6.c
index 6dc7fd353ef5..282f3723ee19 100644
--- a/net/ipv6/esp6.c
+++ b/net/ipv6/esp6.c
@@ -167,8 +167,6 @@ static int esp6_output(struct xfrm_state *x, struct sk_buff *skb)
struct esp_data *esp = x->data;
/* skb is pure payload to encrypt */
- err = -ENOMEM;
-
aead = esp->aead;
alen = crypto_aead_authsize(aead);
@@ -203,8 +201,10 @@ static int esp6_output(struct xfrm_state *x, struct sk_buff *skb)
}
tmp = esp_alloc_tmp(aead, nfrags + sglists, seqhilen);
- if (!tmp)
+ if (!tmp) {
+ err = -ENOMEM;
goto error;
+ }
seqhi = esp_tmp_seqhi(tmp);
iv = esp_tmp_iv(aead, tmp, seqhilen);
diff --git a/net/ipv6/inet6_connection_sock.c b/net/ipv6/inet6_connection_sock.c
index 0251a6005be8..c4f934176cab 100644
--- a/net/ipv6/inet6_connection_sock.c
+++ b/net/ipv6/inet6_connection_sock.c
@@ -175,33 +175,12 @@ void __inet6_csk_dst_store(struct sock *sk, struct dst_entry *dst,
const struct in6_addr *saddr)
{
__ip6_dst_store(sk, dst, daddr, saddr);
-
-#ifdef CONFIG_XFRM
- {
- struct rt6_info *rt = (struct rt6_info *)dst;
- rt->rt6i_flow_cache_genid = atomic_read(&flow_cache_genid);
- }
-#endif
}
static inline
struct dst_entry *__inet6_csk_dst_check(struct sock *sk, u32 cookie)
{
- struct dst_entry *dst;
-
- dst = __sk_dst_check(sk, cookie);
-
-#ifdef CONFIG_XFRM
- if (dst) {
- struct rt6_info *rt = (struct rt6_info *)dst;
- if (rt->rt6i_flow_cache_genid != atomic_read(&flow_cache_genid)) {
- __sk_dst_reset(sk);
- dst = NULL;
- }
- }
-#endif
-
- return dst;
+ return __sk_dst_check(sk, cookie);
}
static struct dst_entry *inet6_csk_route_socket(struct sock *sk,
diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c
index 13690d650c3e..24995a93ef8c 100644
--- a/net/ipv6/ip6_fib.c
+++ b/net/ipv6/ip6_fib.c
@@ -514,7 +514,7 @@ static struct fib6_node * fib6_add_1(struct fib6_node *root, void *addr,
ln = node_alloc();
if (!ln)
- return NULL;
+ return ERR_PTR(-ENOMEM);
ln->fn_bit = plen;
ln->parent = pn;
@@ -561,7 +561,7 @@ insert_above:
node_free(in);
if (ln)
node_free(ln);
- return NULL;
+ return ERR_PTR(-ENOMEM);
}
/*
@@ -611,7 +611,7 @@ insert_above:
ln = node_alloc();
if (!ln)
- return NULL;
+ return ERR_PTR(-ENOMEM);
ln->fn_bit = plen;
@@ -777,11 +777,8 @@ int fib6_add(struct fib6_node *root, struct rt6_info *rt, struct nl_info *info)
if (IS_ERR(fn)) {
err = PTR_ERR(fn);
- fn = NULL;
- }
-
- if (!fn)
goto out;
+ }
pn = fn;
@@ -819,12 +816,13 @@ int fib6_add(struct fib6_node *root, struct rt6_info *rt, struct nl_info *info)
offsetof(struct rt6_info, rt6i_src),
allow_create, replace_required);
- if (!sn) {
+ if (IS_ERR(sn)) {
/* If it is failed, discard just allocated
root, and then (in st_failure) stale node
in main tree.
*/
node_free(sfn);
+ err = PTR_ERR(sn);
goto st_failure;
}
@@ -839,10 +837,8 @@ int fib6_add(struct fib6_node *root, struct rt6_info *rt, struct nl_info *info)
if (IS_ERR(sn)) {
err = PTR_ERR(sn);
- sn = NULL;
- }
- if (!sn)
goto st_failure;
+ }
}
if (!fn->leaf) {
diff --git a/net/ipv6/ip6_flowlabel.c b/net/ipv6/ip6_flowlabel.c
index 9772fbd8a3f5..90bbefb57943 100644
--- a/net/ipv6/ip6_flowlabel.c
+++ b/net/ipv6/ip6_flowlabel.c
@@ -22,6 +22,7 @@
#include <linux/seq_file.h>
#include <linux/slab.h>
#include <linux/export.h>
+#include <linux/pid_namespace.h>
#include <net/net_namespace.h>
#include <net/sock.h>
@@ -91,6 +92,8 @@ static struct ip6_flowlabel *fl_lookup(struct net *net, __be32 label)
static void fl_free(struct ip6_flowlabel *fl)
{
if (fl) {
+ if (fl->share == IPV6_FL_S_PROCESS)
+ put_pid(fl->owner.pid);
release_net(fl->fl_net);
kfree(fl->opt);
}
@@ -394,10 +397,10 @@ fl_create(struct net *net, struct sock *sk, struct in6_flowlabel_req *freq,
case IPV6_FL_S_ANY:
break;
case IPV6_FL_S_PROCESS:
- fl->owner = current->pid;
+ fl->owner.pid = get_task_pid(current, PIDTYPE_PID);
break;
case IPV6_FL_S_USER:
- fl->owner = current_euid();
+ fl->owner.uid = current_euid();
break;
default:
err = -EINVAL;
@@ -561,7 +564,10 @@ recheck:
err = -EPERM;
if (fl1->share == IPV6_FL_S_EXCL ||
fl1->share != fl->share ||
- fl1->owner != fl->owner)
+ ((fl1->share == IPV6_FL_S_PROCESS) &&
+ (fl1->owner.pid == fl->owner.pid)) ||
+ ((fl1->share == IPV6_FL_S_USER) &&
+ uid_eq(fl1->owner.uid, fl->owner.uid)))
goto release;
err = -EINVAL;
@@ -621,6 +627,7 @@ done:
struct ip6fl_iter_state {
struct seq_net_private p;
+ struct pid_namespace *pid_ns;
int bucket;
};
@@ -699,6 +706,7 @@ static void ip6fl_seq_stop(struct seq_file *seq, void *v)
static int ip6fl_seq_show(struct seq_file *seq, void *v)
{
+ struct ip6fl_iter_state *state = ip6fl_seq_private(seq);
if (v == SEQ_START_TOKEN)
seq_printf(seq, "%-5s %-1s %-6s %-6s %-6s %-8s %-32s %s\n",
"Label", "S", "Owner", "Users", "Linger", "Expires", "Dst", "Opt");
@@ -708,7 +716,11 @@ static int ip6fl_seq_show(struct seq_file *seq, void *v)
"%05X %-1d %-6d %-6d %-6ld %-8ld %pi6 %-4d\n",
(unsigned int)ntohl(fl->label),
fl->share,
- (int)fl->owner,
+ ((fl->share == IPV6_FL_S_PROCESS) ?
+ pid_nr_ns(fl->owner.pid, state->pid_ns) :
+ ((fl->share == IPV6_FL_S_USER) ?
+ from_kuid_munged(seq_user_ns(seq), fl->owner.uid) :
+ 0)),
atomic_read(&fl->users),
fl->linger/HZ,
(long)(fl->expires - jiffies)/HZ,
@@ -727,8 +739,29 @@ static const struct seq_operations ip6fl_seq_ops = {
static int ip6fl_seq_open(struct inode *inode, struct file *file)
{
- return seq_open_net(inode, file, &ip6fl_seq_ops,
- sizeof(struct ip6fl_iter_state));
+ struct seq_file *seq;
+ struct ip6fl_iter_state *state;
+ int err;
+
+ err = seq_open_net(inode, file, &ip6fl_seq_ops,
+ sizeof(struct ip6fl_iter_state));
+
+ if (!err) {
+ seq = file->private_data;
+ state = ip6fl_seq_private(seq);
+ rcu_read_lock();
+ state->pid_ns = get_pid_ns(task_active_pid_ns(current));
+ rcu_read_unlock();
+ }
+ return err;
+}
+
+static int ip6fl_seq_release(struct inode *inode, struct file *file)
+{
+ struct seq_file *seq = file->private_data;
+ struct ip6fl_iter_state *state = ip6fl_seq_private(seq);
+ put_pid_ns(state->pid_ns);
+ return seq_release_net(inode, file);
}
static const struct file_operations ip6fl_seq_fops = {
@@ -736,7 +769,7 @@ static const struct file_operations ip6fl_seq_fops = {
.open = ip6fl_seq_open,
.read = seq_read,
.llseek = seq_lseek,
- .release = seq_release_net,
+ .release = ip6fl_seq_release,
};
static int __net_init ip6_flowlabel_proc_init(struct net *net)
diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c
new file mode 100644
index 000000000000..0185679c5f53
--- /dev/null
+++ b/net/ipv6/ip6_gre.c
@@ -0,0 +1,1770 @@
+/*
+ * GRE over IPv6 protocol decoder.
+ *
+ * Authors: Dmitry Kozlov (xeb@mail.ru)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ */
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#include <linux/capability.h>
+#include <linux/module.h>
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/slab.h>
+#include <linux/uaccess.h>
+#include <linux/skbuff.h>
+#include <linux/netdevice.h>
+#include <linux/in.h>
+#include <linux/tcp.h>
+#include <linux/udp.h>
+#include <linux/if_arp.h>
+#include <linux/mroute.h>
+#include <linux/init.h>
+#include <linux/in6.h>
+#include <linux/inetdevice.h>
+#include <linux/igmp.h>
+#include <linux/netfilter_ipv4.h>
+#include <linux/etherdevice.h>
+#include <linux/if_ether.h>
+#include <linux/hash.h>
+#include <linux/if_tunnel.h>
+#include <linux/ip6_tunnel.h>
+
+#include <net/sock.h>
+#include <net/ip.h>
+#include <net/icmp.h>
+#include <net/protocol.h>
+#include <net/addrconf.h>
+#include <net/arp.h>
+#include <net/checksum.h>
+#include <net/dsfield.h>
+#include <net/inet_ecn.h>
+#include <net/xfrm.h>
+#include <net/net_namespace.h>
+#include <net/netns/generic.h>
+#include <net/rtnetlink.h>
+
+#include <net/ipv6.h>
+#include <net/ip6_fib.h>
+#include <net/ip6_route.h>
+#include <net/ip6_tunnel.h>
+
+
+static bool log_ecn_error = true;
+module_param(log_ecn_error, bool, 0644);
+MODULE_PARM_DESC(log_ecn_error, "Log packets received with corrupted ECN");
+
+#define IPV6_TCLASS_MASK (IPV6_FLOWINFO_MASK & ~IPV6_FLOWLABEL_MASK)
+#define IPV6_TCLASS_SHIFT 20
+
+#define HASH_SIZE_SHIFT 5
+#define HASH_SIZE (1 << HASH_SIZE_SHIFT)
+
+static int ip6gre_net_id __read_mostly;
+struct ip6gre_net {
+ struct ip6_tnl __rcu *tunnels[4][HASH_SIZE];
+
+ struct net_device *fb_tunnel_dev;
+};
+
+static struct rtnl_link_ops ip6gre_link_ops __read_mostly;
+static int ip6gre_tunnel_init(struct net_device *dev);
+static void ip6gre_tunnel_setup(struct net_device *dev);
+static void ip6gre_tunnel_link(struct ip6gre_net *ign, struct ip6_tnl *t);
+static void ip6gre_tnl_link_config(struct ip6_tnl *t, int set_mtu);
+
+/* Tunnel hash table */
+
+/*
+ 4 hash tables:
+
+ 3: (remote,local)
+ 2: (remote,*)
+ 1: (*,local)
+ 0: (*,*)
+
+ We require exact key match i.e. if a key is present in packet
+ it will match only tunnel with the same key; if it is not present,
+ it will match only keyless tunnel.
+
+ All keysless packets, if not matched configured keyless tunnels
+ will match fallback tunnel.
+ */
+
+#define HASH_KEY(key) (((__force u32)key^((__force u32)key>>4))&(HASH_SIZE - 1))
+static u32 HASH_ADDR(const struct in6_addr *addr)
+{
+ u32 hash = ipv6_addr_hash(addr);
+
+ return hash_32(hash, HASH_SIZE_SHIFT);
+}
+
+#define tunnels_r_l tunnels[3]
+#define tunnels_r tunnels[2]
+#define tunnels_l tunnels[1]
+#define tunnels_wc tunnels[0]
+/*
+ * Locking : hash tables are protected by RCU and RTNL
+ */
+
+#define for_each_ip_tunnel_rcu(start) \
+ for (t = rcu_dereference(start); t; t = rcu_dereference(t->next))
+
+/* often modified stats are per cpu, other are shared (netdev->stats) */
+struct pcpu_tstats {
+ u64 rx_packets;
+ u64 rx_bytes;
+ u64 tx_packets;
+ u64 tx_bytes;
+ struct u64_stats_sync syncp;
+};
+
+static struct rtnl_link_stats64 *ip6gre_get_stats64(struct net_device *dev,
+ struct rtnl_link_stats64 *tot)
+{
+ int i;
+
+ for_each_possible_cpu(i) {
+ const struct pcpu_tstats *tstats = per_cpu_ptr(dev->tstats, i);
+ u64 rx_packets, rx_bytes, tx_packets, tx_bytes;
+ unsigned int start;
+
+ do {
+ start = u64_stats_fetch_begin_bh(&tstats->syncp);
+ rx_packets = tstats->rx_packets;
+ tx_packets = tstats->tx_packets;
+ rx_bytes = tstats->rx_bytes;
+ tx_bytes = tstats->tx_bytes;
+ } while (u64_stats_fetch_retry_bh(&tstats->syncp, start));
+
+ tot->rx_packets += rx_packets;
+ tot->tx_packets += tx_packets;
+ tot->rx_bytes += rx_bytes;
+ tot->tx_bytes += tx_bytes;
+ }
+
+ tot->multicast = dev->stats.multicast;
+ tot->rx_crc_errors = dev->stats.rx_crc_errors;
+ tot->rx_fifo_errors = dev->stats.rx_fifo_errors;
+ tot->rx_length_errors = dev->stats.rx_length_errors;
+ tot->rx_frame_errors = dev->stats.rx_frame_errors;
+ tot->rx_errors = dev->stats.rx_errors;
+
+ tot->tx_fifo_errors = dev->stats.tx_fifo_errors;
+ tot->tx_carrier_errors = dev->stats.tx_carrier_errors;
+ tot->tx_dropped = dev->stats.tx_dropped;
+ tot->tx_aborted_errors = dev->stats.tx_aborted_errors;
+ tot->tx_errors = dev->stats.tx_errors;
+
+ return tot;
+}
+
+/* Given src, dst and key, find appropriate for input tunnel. */
+
+static struct ip6_tnl *ip6gre_tunnel_lookup(struct net_device *dev,
+ const struct in6_addr *remote, const struct in6_addr *local,
+ __be32 key, __be16 gre_proto)
+{
+ struct net *net = dev_net(dev);
+ int link = dev->ifindex;
+ unsigned int h0 = HASH_ADDR(remote);
+ unsigned int h1 = HASH_KEY(key);
+ struct ip6_tnl *t, *cand = NULL;
+ struct ip6gre_net *ign = net_generic(net, ip6gre_net_id);
+ int dev_type = (gre_proto == htons(ETH_P_TEB)) ?
+ ARPHRD_ETHER : ARPHRD_IP6GRE;
+ int score, cand_score = 4;
+
+ for_each_ip_tunnel_rcu(ign->tunnels_r_l[h0 ^ h1]) {
+ if (!ipv6_addr_equal(local, &t->parms.laddr) ||
+ !ipv6_addr_equal(remote, &t->parms.raddr) ||
+ key != t->parms.i_key ||
+ !(t->dev->flags & IFF_UP))
+ continue;
+
+ if (t->dev->type != ARPHRD_IP6GRE &&
+ t->dev->type != dev_type)
+ continue;
+
+ score = 0;
+ if (t->parms.link != link)
+ score |= 1;
+ if (t->dev->type != dev_type)
+ score |= 2;
+ if (score == 0)
+ return t;
+
+ if (score < cand_score) {
+ cand = t;
+ cand_score = score;
+ }
+ }
+
+ for_each_ip_tunnel_rcu(ign->tunnels_r[h0 ^ h1]) {
+ if (!ipv6_addr_equal(remote, &t->parms.raddr) ||
+ key != t->parms.i_key ||
+ !(t->dev->flags & IFF_UP))
+ continue;
+
+ if (t->dev->type != ARPHRD_IP6GRE &&
+ t->dev->type != dev_type)
+ continue;
+
+ score = 0;
+ if (t->parms.link != link)
+ score |= 1;
+ if (t->dev->type != dev_type)
+ score |= 2;
+ if (score == 0)
+ return t;
+
+ if (score < cand_score) {
+ cand = t;
+ cand_score = score;
+ }
+ }
+
+ for_each_ip_tunnel_rcu(ign->tunnels_l[h1]) {
+ if ((!ipv6_addr_equal(local, &t->parms.laddr) &&
+ (!ipv6_addr_equal(local, &t->parms.raddr) ||
+ !ipv6_addr_is_multicast(local))) ||
+ key != t->parms.i_key ||
+ !(t->dev->flags & IFF_UP))
+ continue;
+
+ if (t->dev->type != ARPHRD_IP6GRE &&
+ t->dev->type != dev_type)
+ continue;
+
+ score = 0;
+ if (t->parms.link != link)
+ score |= 1;
+ if (t->dev->type != dev_type)
+ score |= 2;
+ if (score == 0)
+ return t;
+
+ if (score < cand_score) {
+ cand = t;
+ cand_score = score;
+ }
+ }
+
+ for_each_ip_tunnel_rcu(ign->tunnels_wc[h1]) {
+ if (t->parms.i_key != key ||
+ !(t->dev->flags & IFF_UP))
+ continue;
+
+ if (t->dev->type != ARPHRD_IP6GRE &&
+ t->dev->type != dev_type)
+ continue;
+
+ score = 0;
+ if (t->parms.link != link)
+ score |= 1;
+ if (t->dev->type != dev_type)
+ score |= 2;
+ if (score == 0)
+ return t;
+
+ if (score < cand_score) {
+ cand = t;
+ cand_score = score;
+ }
+ }
+
+ if (cand != NULL)
+ return cand;
+
+ dev = ign->fb_tunnel_dev;
+ if (dev->flags & IFF_UP)
+ return netdev_priv(dev);
+
+ return NULL;
+}
+
+static struct ip6_tnl __rcu **__ip6gre_bucket(struct ip6gre_net *ign,
+ const struct __ip6_tnl_parm *p)
+{
+ const struct in6_addr *remote = &p->raddr;
+ const struct in6_addr *local = &p->laddr;
+ unsigned int h = HASH_KEY(p->i_key);
+ int prio = 0;
+
+ if (!ipv6_addr_any(local))
+ prio |= 1;
+ if (!ipv6_addr_any(remote) && !ipv6_addr_is_multicast(remote)) {
+ prio |= 2;
+ h ^= HASH_ADDR(remote);
+ }
+
+ return &ign->tunnels[prio][h];
+}
+
+static inline struct ip6_tnl __rcu **ip6gre_bucket(struct ip6gre_net *ign,
+ const struct ip6_tnl *t)
+{
+ return __ip6gre_bucket(ign, &t->parms);
+}
+
+static void ip6gre_tunnel_link(struct ip6gre_net *ign, struct ip6_tnl *t)
+{
+ struct ip6_tnl __rcu **tp = ip6gre_bucket(ign, t);
+
+ rcu_assign_pointer(t->next, rtnl_dereference(*tp));
+ rcu_assign_pointer(*tp, t);
+}
+
+static void ip6gre_tunnel_unlink(struct ip6gre_net *ign, struct ip6_tnl *t)
+{
+ struct ip6_tnl __rcu **tp;
+ struct ip6_tnl *iter;
+
+ for (tp = ip6gre_bucket(ign, t);
+ (iter = rtnl_dereference(*tp)) != NULL;
+ tp = &iter->next) {
+ if (t == iter) {
+ rcu_assign_pointer(*tp, t->next);
+ break;
+ }
+ }
+}
+
+static struct ip6_tnl *ip6gre_tunnel_find(struct net *net,
+ const struct __ip6_tnl_parm *parms,
+ int type)
+{
+ const struct in6_addr *remote = &parms->raddr;
+ const struct in6_addr *local = &parms->laddr;
+ __be32 key = parms->i_key;
+ int link = parms->link;
+ struct ip6_tnl *t;
+ struct ip6_tnl __rcu **tp;
+ struct ip6gre_net *ign = net_generic(net, ip6gre_net_id);
+
+ for (tp = __ip6gre_bucket(ign, parms);
+ (t = rtnl_dereference(*tp)) != NULL;
+ tp = &t->next)
+ if (ipv6_addr_equal(local, &t->parms.laddr) &&
+ ipv6_addr_equal(remote, &t->parms.raddr) &&
+ key == t->parms.i_key &&
+ link == t->parms.link &&
+ type == t->dev->type)
+ break;
+
+ return t;
+}
+
+static struct ip6_tnl *ip6gre_tunnel_locate(struct net *net,
+ const struct __ip6_tnl_parm *parms, int create)
+{
+ struct ip6_tnl *t, *nt;
+ struct net_device *dev;
+ char name[IFNAMSIZ];
+ struct ip6gre_net *ign = net_generic(net, ip6gre_net_id);
+
+ t = ip6gre_tunnel_find(net, parms, ARPHRD_IP6GRE);
+ if (t || !create)
+ return t;
+
+ if (parms->name[0])
+ strlcpy(name, parms->name, IFNAMSIZ);
+ else
+ strcpy(name, "ip6gre%d");
+
+ dev = alloc_netdev(sizeof(*t), name, ip6gre_tunnel_setup);
+ if (!dev)
+ return NULL;
+
+ dev_net_set(dev, net);
+
+ nt = netdev_priv(dev);
+ nt->parms = *parms;
+ dev->rtnl_link_ops = &ip6gre_link_ops;
+
+ nt->dev = dev;
+ ip6gre_tnl_link_config(nt, 1);
+
+ if (register_netdevice(dev) < 0)
+ goto failed_free;
+
+ /* Can use a lockless transmit, unless we generate output sequences */
+ if (!(nt->parms.o_flags & GRE_SEQ))
+ dev->features |= NETIF_F_LLTX;
+
+ dev_hold(dev);
+ ip6gre_tunnel_link(ign, nt);
+ return nt;
+
+failed_free:
+ free_netdev(dev);
+ return NULL;
+}
+
+static void ip6gre_tunnel_uninit(struct net_device *dev)
+{
+ struct net *net = dev_net(dev);
+ struct ip6gre_net *ign = net_generic(net, ip6gre_net_id);
+
+ ip6gre_tunnel_unlink(ign, netdev_priv(dev));
+ dev_put(dev);
+}
+
+
+static void ip6gre_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
+ u8 type, u8 code, int offset, __be32 info)
+{
+ const struct ipv6hdr *ipv6h = (const struct ipv6hdr *)skb->data;
+ __be16 *p = (__be16 *)(skb->data + offset);
+ int grehlen = offset + 4;
+ struct ip6_tnl *t;
+ __be16 flags;
+
+ flags = p[0];
+ if (flags&(GRE_CSUM|GRE_KEY|GRE_SEQ|GRE_ROUTING|GRE_VERSION)) {
+ if (flags&(GRE_VERSION|GRE_ROUTING))
+ return;
+ if (flags&GRE_KEY) {
+ grehlen += 4;
+ if (flags&GRE_CSUM)
+ grehlen += 4;
+ }
+ }
+
+ /* If only 8 bytes returned, keyed message will be dropped here */
+ if (!pskb_may_pull(skb, grehlen))
+ return;
+ ipv6h = (const struct ipv6hdr *)skb->data;
+ p = (__be16 *)(skb->data + offset);
+
+ t = ip6gre_tunnel_lookup(skb->dev, &ipv6h->daddr, &ipv6h->saddr,
+ flags & GRE_KEY ?
+ *(((__be32 *)p) + (grehlen / 4) - 1) : 0,
+ p[1]);
+ if (t == NULL)
+ return;
+
+ switch (type) {
+ __u32 teli;
+ struct ipv6_tlv_tnl_enc_lim *tel;
+ __u32 mtu;
+ case ICMPV6_DEST_UNREACH:
+ net_warn_ratelimited("%s: Path to destination invalid or inactive!\n",
+ t->parms.name);
+ break;
+ case ICMPV6_TIME_EXCEED:
+ if (code == ICMPV6_EXC_HOPLIMIT) {
+ net_warn_ratelimited("%s: Too small hop limit or routing loop in tunnel!\n",
+ t->parms.name);
+ }
+ break;
+ case ICMPV6_PARAMPROB:
+ teli = 0;
+ if (code == ICMPV6_HDR_FIELD)
+ teli = ip6_tnl_parse_tlv_enc_lim(skb, skb->data);
+
+ if (teli && teli == info - 2) {
+ tel = (struct ipv6_tlv_tnl_enc_lim *) &skb->data[teli];
+ if (tel->encap_limit == 0) {
+ net_warn_ratelimited("%s: Too small encapsulation limit or routing loop in tunnel!\n",
+ t->parms.name);
+ }
+ } else {
+ net_warn_ratelimited("%s: Recipient unable to parse tunneled packet!\n",
+ t->parms.name);
+ }
+ break;
+ case ICMPV6_PKT_TOOBIG:
+ mtu = info - offset;
+ if (mtu < IPV6_MIN_MTU)
+ mtu = IPV6_MIN_MTU;
+ t->dev->mtu = mtu;
+ break;
+ }
+
+ if (time_before(jiffies, t->err_time + IP6TUNNEL_ERR_TIMEO))
+ t->err_count++;
+ else
+ t->err_count = 1;
+ t->err_time = jiffies;
+}
+
+static int ip6gre_rcv(struct sk_buff *skb)
+{
+ const struct ipv6hdr *ipv6h;
+ u8 *h;
+ __be16 flags;
+ __sum16 csum = 0;
+ __be32 key = 0;
+ u32 seqno = 0;
+ struct ip6_tnl *tunnel;
+ int offset = 4;
+ __be16 gre_proto;
+ int err;
+
+ if (!pskb_may_pull(skb, sizeof(struct in6_addr)))
+ goto drop;
+
+ ipv6h = ipv6_hdr(skb);
+ h = skb->data;
+ flags = *(__be16 *)h;
+
+ if (flags&(GRE_CSUM|GRE_KEY|GRE_ROUTING|GRE_SEQ|GRE_VERSION)) {
+ /* - Version must be 0.
+ - We do not support routing headers.
+ */
+ if (flags&(GRE_VERSION|GRE_ROUTING))
+ goto drop;
+
+ if (flags&GRE_CSUM) {
+ switch (skb->ip_summed) {
+ case CHECKSUM_COMPLETE:
+ csum = csum_fold(skb->csum);
+ if (!csum)
+ break;
+ /* fall through */
+ case CHECKSUM_NONE:
+ skb->csum = 0;
+ csum = __skb_checksum_complete(skb);
+ skb->ip_summed = CHECKSUM_COMPLETE;
+ }
+ offset += 4;
+ }
+ if (flags&GRE_KEY) {
+ key = *(__be32 *)(h + offset);
+ offset += 4;
+ }
+ if (flags&GRE_SEQ) {
+ seqno = ntohl(*(__be32 *)(h + offset));
+ offset += 4;
+ }
+ }
+
+ gre_proto = *(__be16 *)(h + 2);
+
+ tunnel = ip6gre_tunnel_lookup(skb->dev,
+ &ipv6h->saddr, &ipv6h->daddr, key,
+ gre_proto);
+ if (tunnel) {
+ struct pcpu_tstats *tstats;
+
+ if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb))
+ goto drop;
+
+ if (!ip6_tnl_rcv_ctl(tunnel, &ipv6h->daddr, &ipv6h->saddr)) {
+ tunnel->dev->stats.rx_dropped++;
+ goto drop;
+ }
+
+ secpath_reset(skb);
+
+ skb->protocol = gre_proto;
+ /* WCCP version 1 and 2 protocol decoding.
+ * - Change protocol to IP
+ * - When dealing with WCCPv2, Skip extra 4 bytes in GRE header
+ */
+ if (flags == 0 && gre_proto == htons(ETH_P_WCCP)) {
+ skb->protocol = htons(ETH_P_IP);
+ if ((*(h + offset) & 0xF0) != 0x40)
+ offset += 4;
+ }
+
+ skb->mac_header = skb->network_header;
+ __pskb_pull(skb, offset);
+ skb_postpull_rcsum(skb, skb_transport_header(skb), offset);
+ skb->pkt_type = PACKET_HOST;
+
+ if (((flags&GRE_CSUM) && csum) ||
+ (!(flags&GRE_CSUM) && tunnel->parms.i_flags&GRE_CSUM)) {
+ tunnel->dev->stats.rx_crc_errors++;
+ tunnel->dev->stats.rx_errors++;
+ goto drop;
+ }
+ if (tunnel->parms.i_flags&GRE_SEQ) {
+ if (!(flags&GRE_SEQ) ||
+ (tunnel->i_seqno &&
+ (s32)(seqno - tunnel->i_seqno) < 0)) {
+ tunnel->dev->stats.rx_fifo_errors++;
+ tunnel->dev->stats.rx_errors++;
+ goto drop;
+ }
+ tunnel->i_seqno = seqno + 1;
+ }
+
+ /* Warning: All skb pointers will be invalidated! */
+ if (tunnel->dev->type == ARPHRD_ETHER) {
+ if (!pskb_may_pull(skb, ETH_HLEN)) {
+ tunnel->dev->stats.rx_length_errors++;
+ tunnel->dev->stats.rx_errors++;
+ goto drop;
+ }
+
+ ipv6h = ipv6_hdr(skb);
+ skb->protocol = eth_type_trans(skb, tunnel->dev);
+ skb_postpull_rcsum(skb, eth_hdr(skb), ETH_HLEN);
+ }
+
+ __skb_tunnel_rx(skb, tunnel->dev);
+
+ skb_reset_network_header(skb);
+
+ err = IP6_ECN_decapsulate(ipv6h, skb);
+ if (unlikely(err)) {
+ if (log_ecn_error)
+ net_info_ratelimited("non-ECT from %pI6 with dsfield=%#x\n",
+ &ipv6h->saddr,
+ ipv6_get_dsfield(ipv6h));
+ if (err > 1) {
+ ++tunnel->dev->stats.rx_frame_errors;
+ ++tunnel->dev->stats.rx_errors;
+ goto drop;
+ }
+ }
+
+ tstats = this_cpu_ptr(tunnel->dev->tstats);
+ u64_stats_update_begin(&tstats->syncp);
+ tstats->rx_packets++;
+ tstats->rx_bytes += skb->len;
+ u64_stats_update_end(&tstats->syncp);
+
+ netif_rx(skb);
+
+ return 0;
+ }
+ icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_PORT_UNREACH, 0);
+
+drop:
+ kfree_skb(skb);
+ return 0;
+}
+
+struct ipv6_tel_txoption {
+ struct ipv6_txoptions ops;
+ __u8 dst_opt[8];
+};
+
+static void init_tel_txopt(struct ipv6_tel_txoption *opt, __u8 encap_limit)
+{
+ memset(opt, 0, sizeof(struct ipv6_tel_txoption));
+
+ opt->dst_opt[2] = IPV6_TLV_TNL_ENCAP_LIMIT;
+ opt->dst_opt[3] = 1;
+ opt->dst_opt[4] = encap_limit;
+ opt->dst_opt[5] = IPV6_TLV_PADN;
+ opt->dst_opt[6] = 1;
+
+ opt->ops.dst0opt = (struct ipv6_opt_hdr *) opt->dst_opt;
+ opt->ops.opt_nflen = 8;
+}
+
+static netdev_tx_t ip6gre_xmit2(struct sk_buff *skb,
+ struct net_device *dev,
+ __u8 dsfield,
+ struct flowi6 *fl6,
+ int encap_limit,
+ __u32 *pmtu)
+{
+ struct net *net = dev_net(dev);
+ struct ip6_tnl *tunnel = netdev_priv(dev);
+ struct net_device *tdev; /* Device to other host */
+ struct ipv6hdr *ipv6h; /* Our new IP header */
+ unsigned int max_headroom; /* The extra header space needed */
+ int gre_hlen;
+ struct ipv6_tel_txoption opt;
+ int mtu;
+ struct dst_entry *dst = NULL, *ndst = NULL;
+ struct net_device_stats *stats = &tunnel->dev->stats;
+ int err = -1;
+ u8 proto;
+ int pkt_len;
+ struct sk_buff *new_skb;
+
+ if (dev->type == ARPHRD_ETHER)
+ IPCB(skb)->flags = 0;
+
+ if (dev->header_ops && dev->type == ARPHRD_IP6GRE) {
+ gre_hlen = 0;
+ ipv6h = (struct ipv6hdr *)skb->data;
+ fl6->daddr = ipv6h->daddr;
+ } else {
+ gre_hlen = tunnel->hlen;
+ fl6->daddr = tunnel->parms.raddr;
+ }
+
+ if (!fl6->flowi6_mark)
+ dst = ip6_tnl_dst_check(tunnel);
+
+ if (!dst) {
+ ndst = ip6_route_output(net, NULL, fl6);
+
+ if (ndst->error)
+ goto tx_err_link_failure;
+ ndst = xfrm_lookup(net, ndst, flowi6_to_flowi(fl6), NULL, 0);
+ if (IS_ERR(ndst)) {
+ err = PTR_ERR(ndst);
+ ndst = NULL;
+ goto tx_err_link_failure;
+ }
+ dst = ndst;
+ }
+
+ tdev = dst->dev;
+
+ if (tdev == dev) {
+ stats->collisions++;
+ net_warn_ratelimited("%s: Local routing loop detected!\n",
+ tunnel->parms.name);
+ goto tx_err_dst_release;
+ }
+
+ mtu = dst_mtu(dst) - sizeof(*ipv6h);
+ if (encap_limit >= 0) {
+ max_headroom += 8;
+ mtu -= 8;
+ }
+ if (mtu < IPV6_MIN_MTU)
+ mtu = IPV6_MIN_MTU;
+ if (skb_dst(skb))
+ skb_dst(skb)->ops->update_pmtu(skb_dst(skb), NULL, skb, mtu);
+ if (skb->len > mtu) {
+ *pmtu = mtu;
+ err = -EMSGSIZE;
+ goto tx_err_dst_release;
+ }
+
+ if (tunnel->err_count > 0) {
+ if (time_before(jiffies,
+ tunnel->err_time + IP6TUNNEL_ERR_TIMEO)) {
+ tunnel->err_count--;
+
+ dst_link_failure(skb);
+ } else
+ tunnel->err_count = 0;
+ }
+
+ max_headroom = LL_RESERVED_SPACE(tdev) + gre_hlen + dst->header_len;
+
+ if (skb_headroom(skb) < max_headroom || skb_shared(skb) ||
+ (skb_cloned(skb) && !skb_clone_writable(skb, 0))) {
+ new_skb = skb_realloc_headroom(skb, max_headroom);
+ if (max_headroom > dev->needed_headroom)
+ dev->needed_headroom = max_headroom;
+ if (!new_skb)
+ goto tx_err_dst_release;
+
+ if (skb->sk)
+ skb_set_owner_w(new_skb, skb->sk);
+ consume_skb(skb);
+ skb = new_skb;
+ }
+
+ skb_dst_drop(skb);
+
+ if (fl6->flowi6_mark) {
+ skb_dst_set(skb, dst);
+ ndst = NULL;
+ } else {
+ skb_dst_set_noref(skb, dst);
+ }
+
+ skb->transport_header = skb->network_header;
+
+ proto = NEXTHDR_GRE;
+ if (encap_limit >= 0) {
+ init_tel_txopt(&opt, encap_limit);
+ ipv6_push_nfrag_opts(skb, &opt.ops, &proto, NULL);
+ }
+
+ skb_push(skb, gre_hlen);
+ skb_reset_network_header(skb);
+
+ /*
+ * Push down and install the IP header.
+ */
+ ipv6h = ipv6_hdr(skb);
+ *(__be32 *)ipv6h = fl6->flowlabel | htonl(0x60000000);
+ dsfield = INET_ECN_encapsulate(0, dsfield);
+ ipv6_change_dsfield(ipv6h, ~INET_ECN_MASK, dsfield);
+ ipv6h->hop_limit = tunnel->parms.hop_limit;
+ ipv6h->nexthdr = proto;
+ ipv6h->saddr = fl6->saddr;
+ ipv6h->daddr = fl6->daddr;
+
+ ((__be16 *)(ipv6h + 1))[0] = tunnel->parms.o_flags;
+ ((__be16 *)(ipv6h + 1))[1] = (dev->type == ARPHRD_ETHER) ?
+ htons(ETH_P_TEB) : skb->protocol;
+
+ if (tunnel->parms.o_flags&(GRE_KEY|GRE_CSUM|GRE_SEQ)) {
+ __be32 *ptr = (__be32 *)(((u8 *)ipv6h) + tunnel->hlen - 4);
+
+ if (tunnel->parms.o_flags&GRE_SEQ) {
+ ++tunnel->o_seqno;
+ *ptr = htonl(tunnel->o_seqno);
+ ptr--;
+ }
+ if (tunnel->parms.o_flags&GRE_KEY) {
+ *ptr = tunnel->parms.o_key;
+ ptr--;
+ }
+ if (tunnel->parms.o_flags&GRE_CSUM) {
+ *ptr = 0;
+ *(__sum16 *)ptr = ip_compute_csum((void *)(ipv6h+1),
+ skb->len - sizeof(struct ipv6hdr));
+ }
+ }
+
+ nf_reset(skb);
+ pkt_len = skb->len;
+ err = ip6_local_out(skb);
+
+ if (net_xmit_eval(err) == 0) {
+ struct pcpu_tstats *tstats = this_cpu_ptr(tunnel->dev->tstats);
+
+ tstats->tx_bytes += pkt_len;
+ tstats->tx_packets++;
+ } else {
+ stats->tx_errors++;
+ stats->tx_aborted_errors++;
+ }
+
+ if (ndst)
+ ip6_tnl_dst_store(tunnel, ndst);
+
+ return 0;
+tx_err_link_failure:
+ stats->tx_carrier_errors++;
+ dst_link_failure(skb);
+tx_err_dst_release:
+ dst_release(ndst);
+ return err;
+}
+
+static inline int ip6gre_xmit_ipv4(struct sk_buff *skb, struct net_device *dev)
+{
+ struct ip6_tnl *t = netdev_priv(dev);
+ const struct iphdr *iph = ip_hdr(skb);
+ int encap_limit = -1;
+ struct flowi6 fl6;
+ __u8 dsfield;
+ __u32 mtu;
+ int err;
+
+ if (!(t->parms.flags & IP6_TNL_F_IGN_ENCAP_LIMIT))
+ encap_limit = t->parms.encap_limit;
+
+ memcpy(&fl6, &t->fl.u.ip6, sizeof(fl6));
+ fl6.flowi6_proto = IPPROTO_IPIP;
+
+ dsfield = ipv4_get_dsfield(iph);
+
+ if (t->parms.flags & IP6_TNL_F_USE_ORIG_TCLASS)
+ fl6.flowlabel |= htonl((__u32)iph->tos << IPV6_TCLASS_SHIFT)
+ & IPV6_TCLASS_MASK;
+ if (t->parms.flags & IP6_TNL_F_USE_ORIG_FWMARK)
+ fl6.flowi6_mark = skb->mark;
+
+ err = ip6gre_xmit2(skb, dev, dsfield, &fl6, encap_limit, &mtu);
+ if (err != 0) {
+ /* XXX: send ICMP error even if DF is not set. */
+ if (err == -EMSGSIZE)
+ icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED,
+ htonl(mtu));
+ return -1;
+ }
+
+ return 0;
+}
+
+static inline int ip6gre_xmit_ipv6(struct sk_buff *skb, struct net_device *dev)
+{
+ struct ip6_tnl *t = netdev_priv(dev);
+ struct ipv6hdr *ipv6h = ipv6_hdr(skb);
+ int encap_limit = -1;
+ __u16 offset;
+ struct flowi6 fl6;
+ __u8 dsfield;
+ __u32 mtu;
+ int err;
+
+ if (ipv6_addr_equal(&t->parms.raddr, &ipv6h->saddr))
+ return -1;
+
+ offset = ip6_tnl_parse_tlv_enc_lim(skb, skb_network_header(skb));
+ if (offset > 0) {
+ struct ipv6_tlv_tnl_enc_lim *tel;
+ tel = (struct ipv6_tlv_tnl_enc_lim *)&skb_network_header(skb)[offset];
+ if (tel->encap_limit == 0) {
+ icmpv6_send(skb, ICMPV6_PARAMPROB,
+ ICMPV6_HDR_FIELD, offset + 2);
+ return -1;
+ }
+ encap_limit = tel->encap_limit - 1;
+ } else if (!(t->parms.flags & IP6_TNL_F_IGN_ENCAP_LIMIT))
+ encap_limit = t->parms.encap_limit;
+
+ memcpy(&fl6, &t->fl.u.ip6, sizeof(fl6));
+ fl6.flowi6_proto = IPPROTO_IPV6;
+
+ dsfield = ipv6_get_dsfield(ipv6h);
+ if (t->parms.flags & IP6_TNL_F_USE_ORIG_TCLASS)
+ fl6.flowlabel |= (*(__be32 *) ipv6h & IPV6_TCLASS_MASK);
+ if (t->parms.flags & IP6_TNL_F_USE_ORIG_FLOWLABEL)
+ fl6.flowlabel |= (*(__be32 *) ipv6h & IPV6_FLOWLABEL_MASK);
+ if (t->parms.flags & IP6_TNL_F_USE_ORIG_FWMARK)
+ fl6.flowi6_mark = skb->mark;
+
+ err = ip6gre_xmit2(skb, dev, dsfield, &fl6, encap_limit, &mtu);
+ if (err != 0) {
+ if (err == -EMSGSIZE)
+ icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
+ return -1;
+ }
+
+ return 0;
+}
+
+/**
+ * ip6_tnl_addr_conflict - compare packet addresses to tunnel's own
+ * @t: the outgoing tunnel device
+ * @hdr: IPv6 header from the incoming packet
+ *
+ * Description:
+ * Avoid trivial tunneling loop by checking that tunnel exit-point
+ * doesn't match source of incoming packet.
+ *
+ * Return:
+ * 1 if conflict,
+ * 0 else
+ **/
+
+static inline bool ip6gre_tnl_addr_conflict(const struct ip6_tnl *t,
+ const struct ipv6hdr *hdr)
+{
+ return ipv6_addr_equal(&t->parms.raddr, &hdr->saddr);
+}
+
+static int ip6gre_xmit_other(struct sk_buff *skb, struct net_device *dev)
+{
+ struct ip6_tnl *t = netdev_priv(dev);
+ int encap_limit = -1;
+ struct flowi6 fl6;
+ __u32 mtu;
+ int err;
+
+ if (!(t->parms.flags & IP6_TNL_F_IGN_ENCAP_LIMIT))
+ encap_limit = t->parms.encap_limit;
+
+ memcpy(&fl6, &t->fl.u.ip6, sizeof(fl6));
+ fl6.flowi6_proto = skb->protocol;
+
+ err = ip6gre_xmit2(skb, dev, 0, &fl6, encap_limit, &mtu);
+
+ return err;
+}
+
+static netdev_tx_t ip6gre_tunnel_xmit(struct sk_buff *skb,
+ struct net_device *dev)
+{
+ struct ip6_tnl *t = netdev_priv(dev);
+ struct net_device_stats *stats = &t->dev->stats;
+ int ret;
+
+ if (!ip6_tnl_xmit_ctl(t))
+ return -1;
+
+ switch (skb->protocol) {
+ case htons(ETH_P_IP):
+ ret = ip6gre_xmit_ipv4(skb, dev);
+ break;
+ case htons(ETH_P_IPV6):
+ ret = ip6gre_xmit_ipv6(skb, dev);
+ break;
+ default:
+ ret = ip6gre_xmit_other(skb, dev);
+ break;
+ }
+
+ if (ret < 0)
+ goto tx_err;
+
+ return NETDEV_TX_OK;
+
+tx_err:
+ stats->tx_errors++;
+ stats->tx_dropped++;
+ kfree_skb(skb);
+ return NETDEV_TX_OK;
+}
+
+static void ip6gre_tnl_link_config(struct ip6_tnl *t, int set_mtu)
+{
+ struct net_device *dev = t->dev;
+ struct __ip6_tnl_parm *p = &t->parms;
+ struct flowi6 *fl6 = &t->fl.u.ip6;
+ int addend = sizeof(struct ipv6hdr) + 4;
+
+ if (dev->type != ARPHRD_ETHER) {
+ memcpy(dev->dev_addr, &p->laddr, sizeof(struct in6_addr));
+ memcpy(dev->broadcast, &p->raddr, sizeof(struct in6_addr));
+ }
+
+ /* Set up flowi template */
+ fl6->saddr = p->laddr;
+ fl6->daddr = p->raddr;
+ fl6->flowi6_oif = p->link;
+ fl6->flowlabel = 0;
+
+ if (!(p->flags&IP6_TNL_F_USE_ORIG_TCLASS))
+ fl6->flowlabel |= IPV6_TCLASS_MASK & p->flowinfo;
+ if (!(p->flags&IP6_TNL_F_USE_ORIG_FLOWLABEL))
+ fl6->flowlabel |= IPV6_FLOWLABEL_MASK & p->flowinfo;
+
+ p->flags &= ~(IP6_TNL_F_CAP_XMIT|IP6_TNL_F_CAP_RCV|IP6_TNL_F_CAP_PER_PACKET);
+ p->flags |= ip6_tnl_get_cap(t, &p->laddr, &p->raddr);
+
+ if (p->flags&IP6_TNL_F_CAP_XMIT &&
+ p->flags&IP6_TNL_F_CAP_RCV && dev->type != ARPHRD_ETHER)
+ dev->flags |= IFF_POINTOPOINT;
+ else
+ dev->flags &= ~IFF_POINTOPOINT;
+
+ dev->iflink = p->link;
+
+ /* Precalculate GRE options length */
+ if (t->parms.o_flags&(GRE_CSUM|GRE_KEY|GRE_SEQ)) {
+ if (t->parms.o_flags&GRE_CSUM)
+ addend += 4;
+ if (t->parms.o_flags&GRE_KEY)
+ addend += 4;
+ if (t->parms.o_flags&GRE_SEQ)
+ addend += 4;
+ }
+
+ if (p->flags & IP6_TNL_F_CAP_XMIT) {
+ int strict = (ipv6_addr_type(&p->raddr) &
+ (IPV6_ADDR_MULTICAST|IPV6_ADDR_LINKLOCAL));
+
+ struct rt6_info *rt = rt6_lookup(dev_net(dev),
+ &p->raddr, &p->laddr,
+ p->link, strict);
+
+ if (rt == NULL)
+ return;
+
+ if (rt->dst.dev) {
+ dev->hard_header_len = rt->dst.dev->hard_header_len + addend;
+
+ if (set_mtu) {
+ dev->mtu = rt->dst.dev->mtu - addend;
+ if (!(t->parms.flags & IP6_TNL_F_IGN_ENCAP_LIMIT))
+ dev->mtu -= 8;
+
+ if (dev->mtu < IPV6_MIN_MTU)
+ dev->mtu = IPV6_MIN_MTU;
+ }
+ }
+ dst_release(&rt->dst);
+ }
+
+ t->hlen = addend;
+}
+
+static int ip6gre_tnl_change(struct ip6_tnl *t,
+ const struct __ip6_tnl_parm *p, int set_mtu)
+{
+ t->parms.laddr = p->laddr;
+ t->parms.raddr = p->raddr;
+ t->parms.flags = p->flags;
+ t->parms.hop_limit = p->hop_limit;
+ t->parms.encap_limit = p->encap_limit;
+ t->parms.flowinfo = p->flowinfo;
+ t->parms.link = p->link;
+ t->parms.proto = p->proto;
+ t->parms.i_key = p->i_key;
+ t->parms.o_key = p->o_key;
+ t->parms.i_flags = p->i_flags;
+ t->parms.o_flags = p->o_flags;
+ ip6_tnl_dst_reset(t);
+ ip6gre_tnl_link_config(t, set_mtu);
+ return 0;
+}
+
+static void ip6gre_tnl_parm_from_user(struct __ip6_tnl_parm *p,
+ const struct ip6_tnl_parm2 *u)
+{
+ p->laddr = u->laddr;
+ p->raddr = u->raddr;
+ p->flags = u->flags;
+ p->hop_limit = u->hop_limit;
+ p->encap_limit = u->encap_limit;
+ p->flowinfo = u->flowinfo;
+ p->link = u->link;
+ p->i_key = u->i_key;
+ p->o_key = u->o_key;
+ p->i_flags = u->i_flags;
+ p->o_flags = u->o_flags;
+ memcpy(p->name, u->name, sizeof(u->name));
+}
+
+static void ip6gre_tnl_parm_to_user(struct ip6_tnl_parm2 *u,
+ const struct __ip6_tnl_parm *p)
+{
+ u->proto = IPPROTO_GRE;
+ u->laddr = p->laddr;
+ u->raddr = p->raddr;
+ u->flags = p->flags;
+ u->hop_limit = p->hop_limit;
+ u->encap_limit = p->encap_limit;
+ u->flowinfo = p->flowinfo;
+ u->link = p->link;
+ u->i_key = p->i_key;
+ u->o_key = p->o_key;
+ u->i_flags = p->i_flags;
+ u->o_flags = p->o_flags;
+ memcpy(u->name, p->name, sizeof(u->name));
+}
+
+static int ip6gre_tunnel_ioctl(struct net_device *dev,
+ struct ifreq *ifr, int cmd)
+{
+ int err = 0;
+ struct ip6_tnl_parm2 p;
+ struct __ip6_tnl_parm p1;
+ struct ip6_tnl *t;
+ struct net *net = dev_net(dev);
+ struct ip6gre_net *ign = net_generic(net, ip6gre_net_id);
+
+ switch (cmd) {
+ case SIOCGETTUNNEL:
+ t = NULL;
+ if (dev == ign->fb_tunnel_dev) {
+ if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p))) {
+ err = -EFAULT;
+ break;
+ }
+ ip6gre_tnl_parm_from_user(&p1, &p);
+ t = ip6gre_tunnel_locate(net, &p1, 0);
+ }
+ if (t == NULL)
+ t = netdev_priv(dev);
+ ip6gre_tnl_parm_to_user(&p, &t->parms);
+ if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof(p)))
+ err = -EFAULT;
+ break;
+
+ case SIOCADDTUNNEL:
+ case SIOCCHGTUNNEL:
+ err = -EPERM;
+ if (!capable(CAP_NET_ADMIN))
+ goto done;
+
+ err = -EFAULT;
+ if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
+ goto done;
+
+ err = -EINVAL;
+ if ((p.i_flags|p.o_flags)&(GRE_VERSION|GRE_ROUTING))
+ goto done;
+
+ if (!(p.i_flags&GRE_KEY))
+ p.i_key = 0;
+ if (!(p.o_flags&GRE_KEY))
+ p.o_key = 0;
+
+ ip6gre_tnl_parm_from_user(&p1, &p);
+ t = ip6gre_tunnel_locate(net, &p1, cmd == SIOCADDTUNNEL);
+
+ if (dev != ign->fb_tunnel_dev && cmd == SIOCCHGTUNNEL) {
+ if (t != NULL) {
+ if (t->dev != dev) {
+ err = -EEXIST;
+ break;
+ }
+ } else {
+ t = netdev_priv(dev);
+
+ ip6gre_tunnel_unlink(ign, t);
+ synchronize_net();
+ ip6gre_tnl_change(t, &p1, 1);
+ ip6gre_tunnel_link(ign, t);
+ netdev_state_change(dev);
+ }
+ }
+
+ if (t) {
+ err = 0;
+
+ ip6gre_tnl_parm_to_user(&p, &t->parms);
+ if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof(p)))
+ err = -EFAULT;
+ } else
+ err = (cmd == SIOCADDTUNNEL ? -ENOBUFS : -ENOENT);
+ break;
+
+ case SIOCDELTUNNEL:
+ err = -EPERM;
+ if (!capable(CAP_NET_ADMIN))
+ goto done;
+
+ if (dev == ign->fb_tunnel_dev) {
+ err = -EFAULT;
+ if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
+ goto done;
+ err = -ENOENT;
+ ip6gre_tnl_parm_from_user(&p1, &p);
+ t = ip6gre_tunnel_locate(net, &p1, 0);
+ if (t == NULL)
+ goto done;
+ err = -EPERM;
+ if (t == netdev_priv(ign->fb_tunnel_dev))
+ goto done;
+ dev = t->dev;
+ }
+ unregister_netdevice(dev);
+ err = 0;
+ break;
+
+ default:
+ err = -EINVAL;
+ }
+
+done:
+ return err;
+}
+
+static int ip6gre_tunnel_change_mtu(struct net_device *dev, int new_mtu)
+{
+ struct ip6_tnl *tunnel = netdev_priv(dev);
+ if (new_mtu < 68 ||
+ new_mtu > 0xFFF8 - dev->hard_header_len - tunnel->hlen)
+ return -EINVAL;
+ dev->mtu = new_mtu;
+ return 0;
+}
+
+static int ip6gre_header(struct sk_buff *skb, struct net_device *dev,
+ unsigned short type,
+ const void *daddr, const void *saddr, unsigned int len)
+{
+ struct ip6_tnl *t = netdev_priv(dev);
+ struct ipv6hdr *ipv6h = (struct ipv6hdr *)skb_push(skb, t->hlen);
+ __be16 *p = (__be16 *)(ipv6h+1);
+
+ *(__be32 *)ipv6h = t->fl.u.ip6.flowlabel | htonl(0x60000000);
+ ipv6h->hop_limit = t->parms.hop_limit;
+ ipv6h->nexthdr = NEXTHDR_GRE;
+ ipv6h->saddr = t->parms.laddr;
+ ipv6h->daddr = t->parms.raddr;
+
+ p[0] = t->parms.o_flags;
+ p[1] = htons(type);
+
+ /*
+ * Set the source hardware address.
+ */
+
+ if (saddr)
+ memcpy(&ipv6h->saddr, saddr, sizeof(struct in6_addr));
+ if (daddr)
+ memcpy(&ipv6h->daddr, daddr, sizeof(struct in6_addr));
+ if (!ipv6_addr_any(&ipv6h->daddr))
+ return t->hlen;
+
+ return -t->hlen;
+}
+
+static const struct header_ops ip6gre_header_ops = {
+ .create = ip6gre_header,
+};
+
+static const struct net_device_ops ip6gre_netdev_ops = {
+ .ndo_init = ip6gre_tunnel_init,
+ .ndo_uninit = ip6gre_tunnel_uninit,
+ .ndo_start_xmit = ip6gre_tunnel_xmit,
+ .ndo_do_ioctl = ip6gre_tunnel_ioctl,
+ .ndo_change_mtu = ip6gre_tunnel_change_mtu,
+ .ndo_get_stats64 = ip6gre_get_stats64,
+};
+
+static void ip6gre_dev_free(struct net_device *dev)
+{
+ free_percpu(dev->tstats);
+ free_netdev(dev);
+}
+
+static void ip6gre_tunnel_setup(struct net_device *dev)
+{
+ struct ip6_tnl *t;
+
+ dev->netdev_ops = &ip6gre_netdev_ops;
+ dev->destructor = ip6gre_dev_free;
+
+ dev->type = ARPHRD_IP6GRE;
+ dev->hard_header_len = LL_MAX_HEADER + sizeof(struct ipv6hdr) + 4;
+ dev->mtu = ETH_DATA_LEN - sizeof(struct ipv6hdr) - 4;
+ t = netdev_priv(dev);
+ if (!(t->parms.flags & IP6_TNL_F_IGN_ENCAP_LIMIT))
+ dev->mtu -= 8;
+ dev->flags |= IFF_NOARP;
+ dev->iflink = 0;
+ dev->addr_len = sizeof(struct in6_addr);
+ dev->features |= NETIF_F_NETNS_LOCAL;
+ dev->priv_flags &= ~IFF_XMIT_DST_RELEASE;
+}
+
+static int ip6gre_tunnel_init(struct net_device *dev)
+{
+ struct ip6_tnl *tunnel;
+
+ tunnel = netdev_priv(dev);
+
+ tunnel->dev = dev;
+ strcpy(tunnel->parms.name, dev->name);
+
+ memcpy(dev->dev_addr, &tunnel->parms.laddr, sizeof(struct in6_addr));
+ memcpy(dev->broadcast, &tunnel->parms.raddr, sizeof(struct in6_addr));
+
+ if (ipv6_addr_any(&tunnel->parms.raddr))
+ dev->header_ops = &ip6gre_header_ops;
+
+ dev->tstats = alloc_percpu(struct pcpu_tstats);
+ if (!dev->tstats)
+ return -ENOMEM;
+
+ return 0;
+}
+
+static void ip6gre_fb_tunnel_init(struct net_device *dev)
+{
+ struct ip6_tnl *tunnel = netdev_priv(dev);
+
+ tunnel->dev = dev;
+ strcpy(tunnel->parms.name, dev->name);
+
+ tunnel->hlen = sizeof(struct ipv6hdr) + 4;
+
+ dev_hold(dev);
+}
+
+
+static struct inet6_protocol ip6gre_protocol __read_mostly = {
+ .handler = ip6gre_rcv,
+ .err_handler = ip6gre_err,
+ .flags = INET6_PROTO_NOPOLICY|INET6_PROTO_FINAL,
+};
+
+static void ip6gre_destroy_tunnels(struct ip6gre_net *ign,
+ struct list_head *head)
+{
+ int prio;
+
+ for (prio = 0; prio < 4; prio++) {
+ int h;
+ for (h = 0; h < HASH_SIZE; h++) {
+ struct ip6_tnl *t;
+
+ t = rtnl_dereference(ign->tunnels[prio][h]);
+
+ while (t != NULL) {
+ unregister_netdevice_queue(t->dev, head);
+ t = rtnl_dereference(t->next);
+ }
+ }
+ }
+}
+
+static int __net_init ip6gre_init_net(struct net *net)
+{
+ struct ip6gre_net *ign = net_generic(net, ip6gre_net_id);
+ int err;
+
+ ign->fb_tunnel_dev = alloc_netdev(sizeof(struct ip6_tnl), "ip6gre0",
+ ip6gre_tunnel_setup);
+ if (!ign->fb_tunnel_dev) {
+ err = -ENOMEM;
+ goto err_alloc_dev;
+ }
+ dev_net_set(ign->fb_tunnel_dev, net);
+
+ ip6gre_fb_tunnel_init(ign->fb_tunnel_dev);
+ ign->fb_tunnel_dev->rtnl_link_ops = &ip6gre_link_ops;
+
+ err = register_netdev(ign->fb_tunnel_dev);
+ if (err)
+ goto err_reg_dev;
+
+ rcu_assign_pointer(ign->tunnels_wc[0],
+ netdev_priv(ign->fb_tunnel_dev));
+ return 0;
+
+err_reg_dev:
+ ip6gre_dev_free(ign->fb_tunnel_dev);
+err_alloc_dev:
+ return err;
+}
+
+static void __net_exit ip6gre_exit_net(struct net *net)
+{
+ struct ip6gre_net *ign;
+ LIST_HEAD(list);
+
+ ign = net_generic(net, ip6gre_net_id);
+ rtnl_lock();
+ ip6gre_destroy_tunnels(ign, &list);
+ unregister_netdevice_many(&list);
+ rtnl_unlock();
+}
+
+static struct pernet_operations ip6gre_net_ops = {
+ .init = ip6gre_init_net,
+ .exit = ip6gre_exit_net,
+ .id = &ip6gre_net_id,
+ .size = sizeof(struct ip6gre_net),
+};
+
+static int ip6gre_tunnel_validate(struct nlattr *tb[], struct nlattr *data[])
+{
+ __be16 flags;
+
+ if (!data)
+ return 0;
+
+ flags = 0;
+ if (data[IFLA_GRE_IFLAGS])
+ flags |= nla_get_be16(data[IFLA_GRE_IFLAGS]);
+ if (data[IFLA_GRE_OFLAGS])
+ flags |= nla_get_be16(data[IFLA_GRE_OFLAGS]);
+ if (flags & (GRE_VERSION|GRE_ROUTING))
+ return -EINVAL;
+
+ return 0;
+}
+
+static int ip6gre_tap_validate(struct nlattr *tb[], struct nlattr *data[])
+{
+ struct in6_addr daddr;
+
+ if (tb[IFLA_ADDRESS]) {
+ if (nla_len(tb[IFLA_ADDRESS]) != ETH_ALEN)
+ return -EINVAL;
+ if (!is_valid_ether_addr(nla_data(tb[IFLA_ADDRESS])))
+ return -EADDRNOTAVAIL;
+ }
+
+ if (!data)
+ goto out;
+
+ if (data[IFLA_GRE_REMOTE]) {
+ nla_memcpy(&daddr, data[IFLA_GRE_REMOTE], sizeof(struct in6_addr));
+ if (ipv6_addr_any(&daddr))
+ return -EINVAL;
+ }
+
+out:
+ return ip6gre_tunnel_validate(tb, data);
+}
+
+
+static void ip6gre_netlink_parms(struct nlattr *data[],
+ struct __ip6_tnl_parm *parms)
+{
+ memset(parms, 0, sizeof(*parms));
+
+ if (!data)
+ return;
+
+ if (data[IFLA_GRE_LINK])
+ parms->link = nla_get_u32(data[IFLA_GRE_LINK]);
+
+ if (data[IFLA_GRE_IFLAGS])
+ parms->i_flags = nla_get_be16(data[IFLA_GRE_IFLAGS]);
+
+ if (data[IFLA_GRE_OFLAGS])
+ parms->o_flags = nla_get_be16(data[IFLA_GRE_OFLAGS]);
+
+ if (data[IFLA_GRE_IKEY])
+ parms->i_key = nla_get_be32(data[IFLA_GRE_IKEY]);
+
+ if (data[IFLA_GRE_OKEY])
+ parms->o_key = nla_get_be32(data[IFLA_GRE_OKEY]);
+
+ if (data[IFLA_GRE_LOCAL])
+ nla_memcpy(&parms->laddr, data[IFLA_GRE_LOCAL], sizeof(struct in6_addr));
+
+ if (data[IFLA_GRE_REMOTE])
+ nla_memcpy(&parms->raddr, data[IFLA_GRE_REMOTE], sizeof(struct in6_addr));
+
+ if (data[IFLA_GRE_TTL])
+ parms->hop_limit = nla_get_u8(data[IFLA_GRE_TTL]);
+
+ if (data[IFLA_GRE_ENCAP_LIMIT])
+ parms->encap_limit = nla_get_u8(data[IFLA_GRE_ENCAP_LIMIT]);
+
+ if (data[IFLA_GRE_FLOWINFO])
+ parms->flowinfo = nla_get_u32(data[IFLA_GRE_FLOWINFO]);
+
+ if (data[IFLA_GRE_FLAGS])
+ parms->flags = nla_get_u32(data[IFLA_GRE_FLAGS]);
+}
+
+static int ip6gre_tap_init(struct net_device *dev)
+{
+ struct ip6_tnl *tunnel;
+
+ tunnel = netdev_priv(dev);
+
+ tunnel->dev = dev;
+ strcpy(tunnel->parms.name, dev->name);
+
+ ip6gre_tnl_link_config(tunnel, 1);
+
+ dev->tstats = alloc_percpu(struct pcpu_tstats);
+ if (!dev->tstats)
+ return -ENOMEM;
+
+ return 0;
+}
+
+static const struct net_device_ops ip6gre_tap_netdev_ops = {
+ .ndo_init = ip6gre_tap_init,
+ .ndo_uninit = ip6gre_tunnel_uninit,
+ .ndo_start_xmit = ip6gre_tunnel_xmit,
+ .ndo_set_mac_address = eth_mac_addr,
+ .ndo_validate_addr = eth_validate_addr,
+ .ndo_change_mtu = ip6gre_tunnel_change_mtu,
+ .ndo_get_stats64 = ip6gre_get_stats64,
+};
+
+static void ip6gre_tap_setup(struct net_device *dev)
+{
+
+ ether_setup(dev);
+
+ dev->netdev_ops = &ip6gre_tap_netdev_ops;
+ dev->destructor = ip6gre_dev_free;
+
+ dev->iflink = 0;
+ dev->features |= NETIF_F_NETNS_LOCAL;
+}
+
+static int ip6gre_newlink(struct net *src_net, struct net_device *dev,
+ struct nlattr *tb[], struct nlattr *data[])
+{
+ struct ip6_tnl *nt;
+ struct net *net = dev_net(dev);
+ struct ip6gre_net *ign = net_generic(net, ip6gre_net_id);
+ int err;
+
+ nt = netdev_priv(dev);
+ ip6gre_netlink_parms(data, &nt->parms);
+
+ if (ip6gre_tunnel_find(net, &nt->parms, dev->type))
+ return -EEXIST;
+
+ if (dev->type == ARPHRD_ETHER && !tb[IFLA_ADDRESS])
+ eth_hw_addr_random(dev);
+
+ nt->dev = dev;
+ ip6gre_tnl_link_config(nt, !tb[IFLA_MTU]);
+
+ /* Can use a lockless transmit, unless we generate output sequences */
+ if (!(nt->parms.o_flags & GRE_SEQ))
+ dev->features |= NETIF_F_LLTX;
+
+ err = register_netdevice(dev);
+ if (err)
+ goto out;
+
+ dev_hold(dev);
+ ip6gre_tunnel_link(ign, nt);
+
+out:
+ return err;
+}
+
+static int ip6gre_changelink(struct net_device *dev, struct nlattr *tb[],
+ struct nlattr *data[])
+{
+ struct ip6_tnl *t, *nt;
+ struct net *net = dev_net(dev);
+ struct ip6gre_net *ign = net_generic(net, ip6gre_net_id);
+ struct __ip6_tnl_parm p;
+
+ if (dev == ign->fb_tunnel_dev)
+ return -EINVAL;
+
+ nt = netdev_priv(dev);
+ ip6gre_netlink_parms(data, &p);
+
+ t = ip6gre_tunnel_locate(net, &p, 0);
+
+ if (t) {
+ if (t->dev != dev)
+ return -EEXIST;
+ } else {
+ t = nt;
+
+ ip6gre_tunnel_unlink(ign, t);
+ ip6gre_tnl_change(t, &p, !tb[IFLA_MTU]);
+ ip6gre_tunnel_link(ign, t);
+ netdev_state_change(dev);
+ }
+
+ return 0;
+}
+
+static size_t ip6gre_get_size(const struct net_device *dev)
+{
+ return
+ /* IFLA_GRE_LINK */
+ nla_total_size(4) +
+ /* IFLA_GRE_IFLAGS */
+ nla_total_size(2) +
+ /* IFLA_GRE_OFLAGS */
+ nla_total_size(2) +
+ /* IFLA_GRE_IKEY */
+ nla_total_size(4) +
+ /* IFLA_GRE_OKEY */
+ nla_total_size(4) +
+ /* IFLA_GRE_LOCAL */
+ nla_total_size(4) +
+ /* IFLA_GRE_REMOTE */
+ nla_total_size(4) +
+ /* IFLA_GRE_TTL */
+ nla_total_size(1) +
+ /* IFLA_GRE_TOS */
+ nla_total_size(1) +
+ /* IFLA_GRE_ENCAP_LIMIT */
+ nla_total_size(1) +
+ /* IFLA_GRE_FLOWINFO */
+ nla_total_size(4) +
+ /* IFLA_GRE_FLAGS */
+ nla_total_size(4) +
+ 0;
+}
+
+static int ip6gre_fill_info(struct sk_buff *skb, const struct net_device *dev)
+{
+ struct ip6_tnl *t = netdev_priv(dev);
+ struct __ip6_tnl_parm *p = &t->parms;
+
+ if (nla_put_u32(skb, IFLA_GRE_LINK, p->link) ||
+ nla_put_be16(skb, IFLA_GRE_IFLAGS, p->i_flags) ||
+ nla_put_be16(skb, IFLA_GRE_OFLAGS, p->o_flags) ||
+ nla_put_be32(skb, IFLA_GRE_IKEY, p->i_key) ||
+ nla_put_be32(skb, IFLA_GRE_OKEY, p->o_key) ||
+ nla_put(skb, IFLA_GRE_LOCAL, sizeof(struct in6_addr), &p->raddr) ||
+ nla_put(skb, IFLA_GRE_REMOTE, sizeof(struct in6_addr), &p->laddr) ||
+ nla_put_u8(skb, IFLA_GRE_TTL, p->hop_limit) ||
+ /*nla_put_u8(skb, IFLA_GRE_TOS, t->priority) ||*/
+ nla_put_u8(skb, IFLA_GRE_ENCAP_LIMIT, p->encap_limit) ||
+ nla_put_be32(skb, IFLA_GRE_FLOWINFO, p->flowinfo) ||
+ nla_put_u32(skb, IFLA_GRE_FLAGS, p->flags))
+ goto nla_put_failure;
+ return 0;
+
+nla_put_failure:
+ return -EMSGSIZE;
+}
+
+static const struct nla_policy ip6gre_policy[IFLA_GRE_MAX + 1] = {
+ [IFLA_GRE_LINK] = { .type = NLA_U32 },
+ [IFLA_GRE_IFLAGS] = { .type = NLA_U16 },
+ [IFLA_GRE_OFLAGS] = { .type = NLA_U16 },
+ [IFLA_GRE_IKEY] = { .type = NLA_U32 },
+ [IFLA_GRE_OKEY] = { .type = NLA_U32 },
+ [IFLA_GRE_LOCAL] = { .len = FIELD_SIZEOF(struct ipv6hdr, saddr) },
+ [IFLA_GRE_REMOTE] = { .len = FIELD_SIZEOF(struct ipv6hdr, daddr) },
+ [IFLA_GRE_TTL] = { .type = NLA_U8 },
+ [IFLA_GRE_ENCAP_LIMIT] = { .type = NLA_U8 },
+ [IFLA_GRE_FLOWINFO] = { .type = NLA_U32 },
+ [IFLA_GRE_FLAGS] = { .type = NLA_U32 },
+};
+
+static struct rtnl_link_ops ip6gre_link_ops __read_mostly = {
+ .kind = "ip6gre",
+ .maxtype = IFLA_GRE_MAX,
+ .policy = ip6gre_policy,
+ .priv_size = sizeof(struct ip6_tnl),
+ .setup = ip6gre_tunnel_setup,
+ .validate = ip6gre_tunnel_validate,
+ .newlink = ip6gre_newlink,
+ .changelink = ip6gre_changelink,
+ .get_size = ip6gre_get_size,
+ .fill_info = ip6gre_fill_info,
+};
+
+static struct rtnl_link_ops ip6gre_tap_ops __read_mostly = {
+ .kind = "ip6gretap",
+ .maxtype = IFLA_GRE_MAX,
+ .policy = ip6gre_policy,
+ .priv_size = sizeof(struct ip6_tnl),
+ .setup = ip6gre_tap_setup,
+ .validate = ip6gre_tap_validate,
+ .newlink = ip6gre_newlink,
+ .changelink = ip6gre_changelink,
+ .get_size = ip6gre_get_size,
+ .fill_info = ip6gre_fill_info,
+};
+
+/*
+ * And now the modules code and kernel interface.
+ */
+
+static int __init ip6gre_init(void)
+{
+ int err;
+
+ pr_info("GRE over IPv6 tunneling driver\n");
+
+ err = register_pernet_device(&ip6gre_net_ops);
+ if (err < 0)
+ return err;
+
+ err = inet6_add_protocol(&ip6gre_protocol, IPPROTO_GRE);
+ if (err < 0) {
+ pr_info("%s: can't add protocol\n", __func__);
+ goto add_proto_failed;
+ }
+
+ err = rtnl_link_register(&ip6gre_link_ops);
+ if (err < 0)
+ goto rtnl_link_failed;
+
+ err = rtnl_link_register(&ip6gre_tap_ops);
+ if (err < 0)
+ goto tap_ops_failed;
+
+out:
+ return err;
+
+tap_ops_failed:
+ rtnl_link_unregister(&ip6gre_link_ops);
+rtnl_link_failed:
+ inet6_del_protocol(&ip6gre_protocol, IPPROTO_GRE);
+add_proto_failed:
+ unregister_pernet_device(&ip6gre_net_ops);
+ goto out;
+}
+
+static void __exit ip6gre_fini(void)
+{
+ rtnl_link_unregister(&ip6gre_tap_ops);
+ rtnl_link_unregister(&ip6gre_link_ops);
+ inet6_del_protocol(&ip6gre_protocol, IPPROTO_GRE);
+ unregister_pernet_device(&ip6gre_net_ops);
+}
+
+module_init(ip6gre_init);
+module_exit(ip6gre_fini);
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("D. Kozlov (xeb@mail.ru)");
+MODULE_DESCRIPTION("GRE over IPv6 tunneling device");
+MODULE_ALIAS_RTNL_LINK("ip6gre");
+MODULE_ALIAS_NETDEV("ip6gre0");
diff --git a/net/ipv6/ip6_input.c b/net/ipv6/ip6_input.c
index 5ab923e51af3..a52d864d562b 100644
--- a/net/ipv6/ip6_input.c
+++ b/net/ipv6/ip6_input.c
@@ -47,9 +47,16 @@
-inline int ip6_rcv_finish( struct sk_buff *skb)
+int ip6_rcv_finish(struct sk_buff *skb)
{
- if (skb_dst(skb) == NULL)
+ if (sysctl_ip_early_demux && !skb_dst(skb)) {
+ const struct inet6_protocol *ipprot;
+
+ ipprot = rcu_dereference(inet6_protos[ipv6_hdr(skb)->nexthdr]);
+ if (ipprot && ipprot->early_demux)
+ ipprot->early_demux(skb);
+ }
+ if (!skb_dst(skb))
ip6_route_input(skb);
return dst_input(skb);
diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index 5b2d63ed793e..aece3e792f84 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -123,16 +123,11 @@ static int ip6_finish_output2(struct sk_buff *skb)
skb->len);
}
- rcu_read_lock();
rt = (struct rt6_info *) dst;
neigh = rt->n;
- if (neigh) {
- int res = dst_neigh_output(dst, neigh, skb);
+ if (neigh)
+ return dst_neigh_output(dst, neigh, skb);
- rcu_read_unlock();
- return res;
- }
- rcu_read_unlock();
IP6_INC_STATS_BH(dev_net(dst->dev),
ip6_dst_idev(dst), IPSTATS_MIB_OUTNOROUTES);
kfree_skb(skb);
@@ -493,7 +488,8 @@ int ip6_forward(struct sk_buff *skb)
if (mtu < IPV6_MIN_MTU)
mtu = IPV6_MIN_MTU;
- if (skb->len > mtu && !skb_is_gso(skb)) {
+ if ((!skb->local_df && skb->len > mtu && !skb_is_gso(skb)) ||
+ (IP6CB(skb)->frag_max_size && IP6CB(skb)->frag_max_size > mtu)) {
/* Again, force OUTPUT device used as source address */
skb->dev = dst->dev;
icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
@@ -636,7 +632,9 @@ int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *))
/* We must not fragment if the socket is set to force MTU discovery
* or if the skb it not generated by a local socket.
*/
- if (unlikely(!skb->local_df && skb->len > mtu)) {
+ if (unlikely(!skb->local_df && skb->len > mtu) ||
+ (IP6CB(skb)->frag_max_size &&
+ IP6CB(skb)->frag_max_size > mtu)) {
if (skb->sk && dst_allfrag(skb_dst(skb)))
sk_nocaps_add(skb->sk, NETIF_F_GSO_MASK);
@@ -980,7 +978,6 @@ static int ip6_dst_lookup_tail(struct sock *sk,
* dst entry and replace it instead with the
* dst entry of the nexthop router
*/
- rcu_read_lock();
rt = (struct rt6_info *) *dst;
n = rt->n;
if (n && !(n->nud_state & NUD_VALID)) {
@@ -988,7 +985,6 @@ static int ip6_dst_lookup_tail(struct sock *sk,
struct flowi6 fl_gw6;
int redirect;
- rcu_read_unlock();
ifp = ipv6_get_ifaddr(net, &fl6->saddr,
(*dst)->dev, 1);
@@ -1008,8 +1004,6 @@ static int ip6_dst_lookup_tail(struct sock *sk,
if ((err = (*dst)->error))
goto out_err_release;
}
- } else {
- rcu_read_unlock();
}
#endif
@@ -1285,8 +1279,6 @@ int ip6_append_data(struct sock *sk, int getfrag(void *from, char *to,
if (dst_allfrag(rt->dst.path))
cork->flags |= IPCORK_ALLFRAG;
cork->length = 0;
- sk->sk_sndmsg_page = NULL;
- sk->sk_sndmsg_off = 0;
exthdrlen = (opt ? opt->opt_flen : 0) - rt->rt6i_nfheader_len;
length += exthdrlen;
transhdrlen += exthdrlen;
@@ -1510,48 +1502,31 @@ alloc_new_skb:
}
} else {
int i = skb_shinfo(skb)->nr_frags;
- skb_frag_t *frag = &skb_shinfo(skb)->frags[i-1];
- struct page *page = sk->sk_sndmsg_page;
- int off = sk->sk_sndmsg_off;
- unsigned int left;
-
- if (page && (left = PAGE_SIZE - off) > 0) {
- if (copy >= left)
- copy = left;
- if (page != skb_frag_page(frag)) {
- if (i == MAX_SKB_FRAGS) {
- err = -EMSGSIZE;
- goto error;
- }
- skb_fill_page_desc(skb, i, page, sk->sk_sndmsg_off, 0);
- skb_frag_ref(skb, i);
- frag = &skb_shinfo(skb)->frags[i];
- }
- } else if(i < MAX_SKB_FRAGS) {
- if (copy > PAGE_SIZE)
- copy = PAGE_SIZE;
- page = alloc_pages(sk->sk_allocation, 0);
- if (page == NULL) {
- err = -ENOMEM;
- goto error;
- }
- sk->sk_sndmsg_page = page;
- sk->sk_sndmsg_off = 0;
+ struct page_frag *pfrag = sk_page_frag(sk);
- skb_fill_page_desc(skb, i, page, 0, 0);
- frag = &skb_shinfo(skb)->frags[i];
- } else {
- err = -EMSGSIZE;
+ err = -ENOMEM;
+ if (!sk_page_frag_refill(sk, pfrag))
goto error;
+
+ if (!skb_can_coalesce(skb, i, pfrag->page,
+ pfrag->offset)) {
+ err = -EMSGSIZE;
+ if (i == MAX_SKB_FRAGS)
+ goto error;
+
+ __skb_fill_page_desc(skb, i, pfrag->page,
+ pfrag->offset, 0);
+ skb_shinfo(skb)->nr_frags = ++i;
+ get_page(pfrag->page);
}
+ copy = min_t(int, copy, pfrag->size - pfrag->offset);
if (getfrag(from,
- skb_frag_address(frag) + skb_frag_size(frag),
- offset, copy, skb->len, skb) < 0) {
- err = -EFAULT;
- goto error;
- }
- sk->sk_sndmsg_off += copy;
- skb_frag_size_add(frag, copy);
+ page_address(pfrag->page) + pfrag->offset,
+ offset, copy, skb->len, skb) < 0)
+ goto error_efault;
+
+ pfrag->offset += copy;
+ skb_frag_size_add(&skb_shinfo(skb)->frags[i - 1], copy);
skb->len += copy;
skb->data_len += copy;
skb->truesize += copy;
@@ -1560,7 +1535,11 @@ alloc_new_skb:
offset += copy;
length -= copy;
}
+
return 0;
+
+error_efault:
+ err = -EFAULT;
error:
cork->length -= length;
IP6_INC_STATS(sock_net(sk), rt->rt6i_idev, IPSTATS_MIB_OUTDISCARDS);
diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c
index 9a1d5fe6aef8..cb7e2ded6f08 100644
--- a/net/ipv6/ip6_tunnel.c
+++ b/net/ipv6/ip6_tunnel.c
@@ -126,7 +126,7 @@ static struct net_device_stats *ip6_get_stats(struct net_device *dev)
* Locking : hash tables are protected by RCU and RTNL
*/
-static inline struct dst_entry *ip6_tnl_dst_check(struct ip6_tnl *t)
+struct dst_entry *ip6_tnl_dst_check(struct ip6_tnl *t)
{
struct dst_entry *dst = t->dst_cache;
@@ -139,20 +139,23 @@ static inline struct dst_entry *ip6_tnl_dst_check(struct ip6_tnl *t)
return dst;
}
+EXPORT_SYMBOL_GPL(ip6_tnl_dst_check);
-static inline void ip6_tnl_dst_reset(struct ip6_tnl *t)
+void ip6_tnl_dst_reset(struct ip6_tnl *t)
{
dst_release(t->dst_cache);
t->dst_cache = NULL;
}
+EXPORT_SYMBOL_GPL(ip6_tnl_dst_reset);
-static inline void ip6_tnl_dst_store(struct ip6_tnl *t, struct dst_entry *dst)
+void ip6_tnl_dst_store(struct ip6_tnl *t, struct dst_entry *dst)
{
struct rt6_info *rt = (struct rt6_info *) dst;
t->dst_cookie = rt->rt6i_node ? rt->rt6i_node->fn_sernum : 0;
dst_release(t->dst_cache);
t->dst_cache = dst;
}
+EXPORT_SYMBOL_GPL(ip6_tnl_dst_store);
/**
* ip6_tnl_lookup - fetch tunnel matching the end-point addresses
@@ -200,7 +203,7 @@ ip6_tnl_lookup(struct net *net, const struct in6_addr *remote, const struct in6_
**/
static struct ip6_tnl __rcu **
-ip6_tnl_bucket(struct ip6_tnl_net *ip6n, const struct ip6_tnl_parm *p)
+ip6_tnl_bucket(struct ip6_tnl_net *ip6n, const struct __ip6_tnl_parm *p)
{
const struct in6_addr *remote = &p->raddr;
const struct in6_addr *local = &p->laddr;
@@ -267,7 +270,7 @@ static void ip6_dev_free(struct net_device *dev)
* created tunnel or NULL
**/
-static struct ip6_tnl *ip6_tnl_create(struct net *net, struct ip6_tnl_parm *p)
+static struct ip6_tnl *ip6_tnl_create(struct net *net, struct __ip6_tnl_parm *p)
{
struct net_device *dev;
struct ip6_tnl *t;
@@ -322,7 +325,7 @@ failed:
**/
static struct ip6_tnl *ip6_tnl_locate(struct net *net,
- struct ip6_tnl_parm *p, int create)
+ struct __ip6_tnl_parm *p, int create)
{
const struct in6_addr *remote = &p->raddr;
const struct in6_addr *local = &p->laddr;
@@ -374,8 +377,7 @@ ip6_tnl_dev_uninit(struct net_device *dev)
* else index to encapsulation limit
**/
-static __u16
-parse_tlv_tnl_enc_lim(struct sk_buff *skb, __u8 * raw)
+__u16 ip6_tnl_parse_tlv_enc_lim(struct sk_buff *skb, __u8 *raw)
{
const struct ipv6hdr *ipv6h = (const struct ipv6hdr *) raw;
__u8 nexthdr = ipv6h->nexthdr;
@@ -425,6 +427,7 @@ parse_tlv_tnl_enc_lim(struct sk_buff *skb, __u8 * raw)
}
return 0;
}
+EXPORT_SYMBOL(ip6_tnl_parse_tlv_enc_lim);
/**
* ip6_tnl_err - tunnel error handler
@@ -480,7 +483,7 @@ ip6_tnl_err(struct sk_buff *skb, __u8 ipproto, struct inet6_skb_parm *opt,
case ICMPV6_PARAMPROB:
teli = 0;
if ((*code) == ICMPV6_HDR_FIELD)
- teli = parse_tlv_tnl_enc_lim(skb, skb->data);
+ teli = ip6_tnl_parse_tlv_enc_lim(skb, skb->data);
if (teli && teli == *info - 2) {
tel = (struct ipv6_tlv_tnl_enc_lim *) &skb->data[teli];
@@ -693,11 +696,11 @@ static void ip6ip6_dscp_ecn_decapsulate(const struct ip6_tnl *t,
IP6_ECN_set_ce(ipv6_hdr(skb));
}
-static __u32 ip6_tnl_get_cap(struct ip6_tnl *t,
+__u32 ip6_tnl_get_cap(struct ip6_tnl *t,
const struct in6_addr *laddr,
const struct in6_addr *raddr)
{
- struct ip6_tnl_parm *p = &t->parms;
+ struct __ip6_tnl_parm *p = &t->parms;
int ltype = ipv6_addr_type(laddr);
int rtype = ipv6_addr_type(raddr);
__u32 flags = 0;
@@ -715,13 +718,14 @@ static __u32 ip6_tnl_get_cap(struct ip6_tnl *t,
}
return flags;
}
+EXPORT_SYMBOL(ip6_tnl_get_cap);
/* called with rcu_read_lock() */
-static inline int ip6_tnl_rcv_ctl(struct ip6_tnl *t,
+int ip6_tnl_rcv_ctl(struct ip6_tnl *t,
const struct in6_addr *laddr,
const struct in6_addr *raddr)
{
- struct ip6_tnl_parm *p = &t->parms;
+ struct __ip6_tnl_parm *p = &t->parms;
int ret = 0;
struct net *net = dev_net(t->dev);
@@ -740,6 +744,7 @@ static inline int ip6_tnl_rcv_ctl(struct ip6_tnl *t,
}
return ret;
}
+EXPORT_SYMBOL_GPL(ip6_tnl_rcv_ctl);
/**
* ip6_tnl_rcv - decapsulate IPv6 packet and retransmit it locally
@@ -859,9 +864,9 @@ ip6_tnl_addr_conflict(const struct ip6_tnl *t, const struct ipv6hdr *hdr)
return ipv6_addr_equal(&t->parms.raddr, &hdr->saddr);
}
-static inline int ip6_tnl_xmit_ctl(struct ip6_tnl *t)
+int ip6_tnl_xmit_ctl(struct ip6_tnl *t)
{
- struct ip6_tnl_parm *p = &t->parms;
+ struct __ip6_tnl_parm *p = &t->parms;
int ret = 0;
struct net *net = dev_net(t->dev);
@@ -885,6 +890,8 @@ static inline int ip6_tnl_xmit_ctl(struct ip6_tnl *t)
}
return ret;
}
+EXPORT_SYMBOL_GPL(ip6_tnl_xmit_ctl);
+
/**
* ip6_tnl_xmit2 - encapsulate packet and send
* @skb: the outgoing socket buffer
@@ -1085,7 +1092,7 @@ ip6ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev)
!ip6_tnl_xmit_ctl(t) || ip6_tnl_addr_conflict(t, ipv6h))
return -1;
- offset = parse_tlv_tnl_enc_lim(skb, skb_network_header(skb));
+ offset = ip6_tnl_parse_tlv_enc_lim(skb, skb_network_header(skb));
if (offset > 0) {
struct ipv6_tlv_tnl_enc_lim *tel;
tel = (struct ipv6_tlv_tnl_enc_lim *)&skb_network_header(skb)[offset];
@@ -1152,7 +1159,7 @@ tx_err:
static void ip6_tnl_link_config(struct ip6_tnl *t)
{
struct net_device *dev = t->dev;
- struct ip6_tnl_parm *p = &t->parms;
+ struct __ip6_tnl_parm *p = &t->parms;
struct flowi6 *fl6 = &t->fl.u.ip6;
memcpy(dev->dev_addr, &p->laddr, sizeof(struct in6_addr));
@@ -1215,7 +1222,7 @@ static void ip6_tnl_link_config(struct ip6_tnl *t)
**/
static int
-ip6_tnl_change(struct ip6_tnl *t, struct ip6_tnl_parm *p)
+ip6_tnl_change(struct ip6_tnl *t, const struct __ip6_tnl_parm *p)
{
t->parms.laddr = p->laddr;
t->parms.raddr = p->raddr;
@@ -1230,6 +1237,34 @@ ip6_tnl_change(struct ip6_tnl *t, struct ip6_tnl_parm *p)
return 0;
}
+static void
+ip6_tnl_parm_from_user(struct __ip6_tnl_parm *p, const struct ip6_tnl_parm *u)
+{
+ p->laddr = u->laddr;
+ p->raddr = u->raddr;
+ p->flags = u->flags;
+ p->hop_limit = u->hop_limit;
+ p->encap_limit = u->encap_limit;
+ p->flowinfo = u->flowinfo;
+ p->link = u->link;
+ p->proto = u->proto;
+ memcpy(p->name, u->name, sizeof(u->name));
+}
+
+static void
+ip6_tnl_parm_to_user(struct ip6_tnl_parm *u, const struct __ip6_tnl_parm *p)
+{
+ u->laddr = p->laddr;
+ u->raddr = p->raddr;
+ u->flags = p->flags;
+ u->hop_limit = p->hop_limit;
+ u->encap_limit = p->encap_limit;
+ u->flowinfo = p->flowinfo;
+ u->link = p->link;
+ u->proto = p->proto;
+ memcpy(u->name, p->name, sizeof(u->name));
+}
+
/**
* ip6_tnl_ioctl - configure ipv6 tunnels from userspace
* @dev: virtual device associated with tunnel
@@ -1263,6 +1298,7 @@ ip6_tnl_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
{
int err = 0;
struct ip6_tnl_parm p;
+ struct __ip6_tnl_parm p1;
struct ip6_tnl *t = NULL;
struct net *net = dev_net(dev);
struct ip6_tnl_net *ip6n = net_generic(net, ip6_tnl_net_id);
@@ -1274,11 +1310,14 @@ ip6_tnl_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
err = -EFAULT;
break;
}
- t = ip6_tnl_locate(net, &p, 0);
+ ip6_tnl_parm_from_user(&p1, &p);
+ t = ip6_tnl_locate(net, &p1, 0);
+ } else {
+ memset(&p, 0, sizeof(p));
}
if (t == NULL)
t = netdev_priv(dev);
- memcpy(&p, &t->parms, sizeof (p));
+ ip6_tnl_parm_to_user(&p, &t->parms);
if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof (p))) {
err = -EFAULT;
}
@@ -1295,7 +1334,8 @@ ip6_tnl_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
if (p.proto != IPPROTO_IPV6 && p.proto != IPPROTO_IPIP &&
p.proto != 0)
break;
- t = ip6_tnl_locate(net, &p, cmd == SIOCADDTUNNEL);
+ ip6_tnl_parm_from_user(&p1, &p);
+ t = ip6_tnl_locate(net, &p1, cmd == SIOCADDTUNNEL);
if (dev != ip6n->fb_tnl_dev && cmd == SIOCCHGTUNNEL) {
if (t != NULL) {
if (t->dev != dev) {
@@ -1307,13 +1347,14 @@ ip6_tnl_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
ip6_tnl_unlink(ip6n, t);
synchronize_net();
- err = ip6_tnl_change(t, &p);
+ err = ip6_tnl_change(t, &p1);
ip6_tnl_link(ip6n, t);
netdev_state_change(dev);
}
if (t) {
err = 0;
- if (copy_to_user(ifr->ifr_ifru.ifru_data, &t->parms, sizeof (p)))
+ ip6_tnl_parm_to_user(&p, &t->parms);
+ if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof(p)))
err = -EFAULT;
} else
@@ -1329,7 +1370,9 @@ ip6_tnl_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof (p)))
break;
err = -ENOENT;
- if ((t = ip6_tnl_locate(net, &p, 0)) == NULL)
+ ip6_tnl_parm_from_user(&p1, &p);
+ t = ip6_tnl_locate(net, &p1, 0);
+ if (t == NULL)
break;
err = -EPERM;
if (t->dev == ip6n->fb_tnl_dev)
diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c
index 4532973f0dd4..08ea3f0b6e55 100644
--- a/net/ipv6/ip6mr.c
+++ b/net/ipv6/ip6mr.c
@@ -838,7 +838,7 @@ static void ip6mr_destroy_unres(struct mr6_table *mrt, struct mfc6_cache *c)
nlh->nlmsg_len = NLMSG_LENGTH(sizeof(struct nlmsgerr));
skb_trim(skb, nlh->nlmsg_len);
((struct nlmsgerr *)NLMSG_DATA(nlh))->error = -ETIMEDOUT;
- rtnl_unicast(skb, net, NETLINK_CB(skb).pid);
+ rtnl_unicast(skb, net, NETLINK_CB(skb).portid);
} else
kfree_skb(skb);
}
@@ -1052,7 +1052,7 @@ static void ip6mr_cache_resolve(struct net *net, struct mr6_table *mrt,
skb_trim(skb, nlh->nlmsg_len);
((struct nlmsgerr *)NLMSG_DATA(nlh))->error = -EMSGSIZE;
}
- rtnl_unicast(skb, net, NETLINK_CB(skb).pid);
+ rtnl_unicast(skb, net, NETLINK_CB(skb).portid);
} else
ip6_mr_forward(net, mrt, skb, c);
}
@@ -2202,12 +2202,12 @@ int ip6mr_get_route(struct net *net,
}
static int ip6mr_fill_mroute(struct mr6_table *mrt, struct sk_buff *skb,
- u32 pid, u32 seq, struct mfc6_cache *c)
+ u32 portid, u32 seq, struct mfc6_cache *c)
{
struct nlmsghdr *nlh;
struct rtmsg *rtm;
- nlh = nlmsg_put(skb, pid, seq, RTM_NEWROUTE, sizeof(*rtm), NLM_F_MULTI);
+ nlh = nlmsg_put(skb, portid, seq, RTM_NEWROUTE, sizeof(*rtm), NLM_F_MULTI);
if (nlh == NULL)
return -EMSGSIZE;
@@ -2260,7 +2260,7 @@ static int ip6mr_rtm_dumproute(struct sk_buff *skb, struct netlink_callback *cb)
if (e < s_e)
goto next_entry;
if (ip6mr_fill_mroute(mrt, skb,
- NETLINK_CB(cb->skb).pid,
+ NETLINK_CB(cb->skb).portid,
cb->nlh->nlmsg_seq,
mfc) < 0)
goto done;
diff --git a/net/ipv6/mip6.c b/net/ipv6/mip6.c
index 5b087c31d87b..0f9bdc5ee9f3 100644
--- a/net/ipv6/mip6.c
+++ b/net/ipv6/mip6.c
@@ -86,28 +86,30 @@ static int mip6_mh_len(int type)
static int mip6_mh_filter(struct sock *sk, struct sk_buff *skb)
{
- struct ip6_mh *mh;
+ struct ip6_mh _hdr;
+ const struct ip6_mh *mh;
- if (!pskb_may_pull(skb, (skb_transport_offset(skb)) + 8) ||
- !pskb_may_pull(skb, (skb_transport_offset(skb) +
- ((skb_transport_header(skb)[1] + 1) << 3))))
+ mh = skb_header_pointer(skb, skb_transport_offset(skb),
+ sizeof(_hdr), &_hdr);
+ if (!mh)
return -1;
- mh = (struct ip6_mh *)skb_transport_header(skb);
+ if (((mh->ip6mh_hdrlen + 1) << 3) > skb->len)
+ return -1;
if (mh->ip6mh_hdrlen < mip6_mh_len(mh->ip6mh_type)) {
LIMIT_NETDEBUG(KERN_DEBUG "mip6: MH message too short: %d vs >=%d\n",
mh->ip6mh_hdrlen, mip6_mh_len(mh->ip6mh_type));
- mip6_param_prob(skb, 0, ((&mh->ip6mh_hdrlen) -
- skb_network_header(skb)));
+ mip6_param_prob(skb, 0, offsetof(struct ip6_mh, ip6mh_hdrlen) +
+ skb_network_header_len(skb));
return -1;
}
if (mh->ip6mh_proto != IPPROTO_NONE) {
LIMIT_NETDEBUG(KERN_DEBUG "mip6: MH invalid payload proto = %d\n",
mh->ip6mh_proto);
- mip6_param_prob(skb, 0, ((&mh->ip6mh_proto) -
- skb_network_header(skb)));
+ mip6_param_prob(skb, 0, offsetof(struct ip6_mh, ip6mh_proto) +
+ skb_network_header_len(skb));
return -1;
}
diff --git a/net/ipv6/netfilter.c b/net/ipv6/netfilter.c
index db31561cc8df..429089cb073d 100644
--- a/net/ipv6/netfilter.c
+++ b/net/ipv6/netfilter.c
@@ -15,6 +15,7 @@ int ip6_route_me_harder(struct sk_buff *skb)
{
struct net *net = dev_net(skb_dst(skb)->dev);
const struct ipv6hdr *iph = ipv6_hdr(skb);
+ unsigned int hh_len;
struct dst_entry *dst;
struct flowi6 fl6 = {
.flowi6_oif = skb->sk ? skb->sk->sk_bound_dev_if : 0,
@@ -47,6 +48,13 @@ int ip6_route_me_harder(struct sk_buff *skb)
}
#endif
+ /* Change in oif may mean change in hh_len. */
+ hh_len = skb_dst(skb)->dev->hard_header_len;
+ if (skb_headroom(skb) < hh_len &&
+ pskb_expand_head(skb, HH_DATA_ALIGN(hh_len - skb_headroom(skb)),
+ 0, GFP_ATOMIC))
+ return -1;
+
return 0;
}
EXPORT_SYMBOL(ip6_route_me_harder);
diff --git a/net/ipv6/netfilter/Kconfig b/net/ipv6/netfilter/Kconfig
index 10135342799e..c72532a60d88 100644
--- a/net/ipv6/netfilter/Kconfig
+++ b/net/ipv6/netfilter/Kconfig
@@ -181,9 +181,44 @@ config IP6_NF_SECURITY
help
This option adds a `security' table to iptables, for use
with Mandatory Access Control (MAC) policy.
-
+
If unsure, say N.
+config NF_NAT_IPV6
+ tristate "IPv6 NAT"
+ depends on NF_CONNTRACK_IPV6
+ depends on NETFILTER_ADVANCED
+ select NF_NAT
+ help
+ The IPv6 NAT option allows masquerading, port forwarding and other
+ forms of full Network Address Port Translation. It is controlled by
+ the `nat' table in ip6tables, see the man page for ip6tables(8).
+
+ To compile it as a module, choose M here. If unsure, say N.
+
+if NF_NAT_IPV6
+
+config IP6_NF_TARGET_MASQUERADE
+ tristate "MASQUERADE target support"
+ help
+ Masquerading is a special case of NAT: all outgoing connections are
+ changed to seem to come from a particular interface's address, and
+ if the interface goes down, those connections are lost. This is
+ only useful for dialup accounts with dynamic IP address (ie. your IP
+ address will be different on next dialup).
+
+ To compile it as a module, choose M here. If unsure, say N.
+
+config IP6_NF_TARGET_NPT
+ tristate "NPT (Network Prefix translation) target support"
+ help
+ This option adds the `SNPT' and `DNPT' target, which perform
+ stateless IPv6-to-IPv6 Network Prefix Translation per RFC 6296.
+
+ To compile it as a module, choose M here. If unsure, say N.
+
+endif # NF_NAT_IPV6
+
endif # IP6_NF_IPTABLES
endmenu
diff --git a/net/ipv6/netfilter/Makefile b/net/ipv6/netfilter/Makefile
index 534d3f216f7b..2d11fcc2cf3c 100644
--- a/net/ipv6/netfilter/Makefile
+++ b/net/ipv6/netfilter/Makefile
@@ -8,6 +8,7 @@ obj-$(CONFIG_IP6_NF_FILTER) += ip6table_filter.o
obj-$(CONFIG_IP6_NF_MANGLE) += ip6table_mangle.o
obj-$(CONFIG_IP6_NF_RAW) += ip6table_raw.o
obj-$(CONFIG_IP6_NF_SECURITY) += ip6table_security.o
+obj-$(CONFIG_NF_NAT_IPV6) += ip6table_nat.o
# objects for l3 independent conntrack
nf_conntrack_ipv6-y := nf_conntrack_l3proto_ipv6.o nf_conntrack_proto_icmpv6.o
@@ -15,6 +16,9 @@ nf_conntrack_ipv6-y := nf_conntrack_l3proto_ipv6.o nf_conntrack_proto_icmpv6.o
# l3 independent conntrack
obj-$(CONFIG_NF_CONNTRACK_IPV6) += nf_conntrack_ipv6.o nf_defrag_ipv6.o
+nf_nat_ipv6-y := nf_nat_l3proto_ipv6.o nf_nat_proto_icmpv6.o
+obj-$(CONFIG_NF_NAT_IPV6) += nf_nat_ipv6.o
+
# defrag
nf_defrag_ipv6-y := nf_defrag_ipv6_hooks.o nf_conntrack_reasm.o
obj-$(CONFIG_NF_DEFRAG_IPV6) += nf_defrag_ipv6.o
@@ -30,4 +34,6 @@ obj-$(CONFIG_IP6_NF_MATCH_RPFILTER) += ip6t_rpfilter.o
obj-$(CONFIG_IP6_NF_MATCH_RT) += ip6t_rt.o
# targets
+obj-$(CONFIG_IP6_NF_TARGET_MASQUERADE) += ip6t_MASQUERADE.o
+obj-$(CONFIG_IP6_NF_TARGET_NPT) += ip6t_NPT.o
obj-$(CONFIG_IP6_NF_TARGET_REJECT) += ip6t_REJECT.o
diff --git a/net/ipv6/netfilter/ip6t_MASQUERADE.c b/net/ipv6/netfilter/ip6t_MASQUERADE.c
new file mode 100644
index 000000000000..60e9053bab05
--- /dev/null
+++ b/net/ipv6/netfilter/ip6t_MASQUERADE.c
@@ -0,0 +1,135 @@
+/*
+ * Copyright (c) 2011 Patrick McHardy <kaber@trash.net>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * Based on Rusty Russell's IPv6 MASQUERADE target. Development of IPv6
+ * NAT funded by Astaro.
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/netdevice.h>
+#include <linux/ipv6.h>
+#include <linux/netfilter.h>
+#include <linux/netfilter_ipv6.h>
+#include <linux/netfilter/x_tables.h>
+#include <net/netfilter/nf_nat.h>
+#include <net/addrconf.h>
+#include <net/ipv6.h>
+
+static unsigned int
+masquerade_tg6(struct sk_buff *skb, const struct xt_action_param *par)
+{
+ const struct nf_nat_range *range = par->targinfo;
+ enum ip_conntrack_info ctinfo;
+ struct in6_addr src;
+ struct nf_conn *ct;
+ struct nf_nat_range newrange;
+
+ ct = nf_ct_get(skb, &ctinfo);
+ NF_CT_ASSERT(ct && (ctinfo == IP_CT_NEW || ctinfo == IP_CT_RELATED ||
+ ctinfo == IP_CT_RELATED_REPLY));
+
+ if (ipv6_dev_get_saddr(dev_net(par->out), par->out,
+ &ipv6_hdr(skb)->daddr, 0, &src) < 0)
+ return NF_DROP;
+
+ nfct_nat(ct)->masq_index = par->out->ifindex;
+
+ newrange.flags = range->flags | NF_NAT_RANGE_MAP_IPS;
+ newrange.min_addr.in6 = src;
+ newrange.max_addr.in6 = src;
+ newrange.min_proto = range->min_proto;
+ newrange.max_proto = range->max_proto;
+
+ return nf_nat_setup_info(ct, &newrange, NF_NAT_MANIP_SRC);
+}
+
+static int masquerade_tg6_checkentry(const struct xt_tgchk_param *par)
+{
+ const struct nf_nat_range *range = par->targinfo;
+
+ if (range->flags & NF_NAT_RANGE_MAP_IPS)
+ return -EINVAL;
+ return 0;
+}
+
+static int device_cmp(struct nf_conn *ct, void *ifindex)
+{
+ const struct nf_conn_nat *nat = nfct_nat(ct);
+
+ if (!nat)
+ return 0;
+ if (nf_ct_l3num(ct) != NFPROTO_IPV6)
+ return 0;
+ return nat->masq_index == (int)(long)ifindex;
+}
+
+static int masq_device_event(struct notifier_block *this,
+ unsigned long event, void *ptr)
+{
+ const struct net_device *dev = ptr;
+ struct net *net = dev_net(dev);
+
+ if (event == NETDEV_DOWN)
+ nf_ct_iterate_cleanup(net, device_cmp,
+ (void *)(long)dev->ifindex);
+
+ return NOTIFY_DONE;
+}
+
+static struct notifier_block masq_dev_notifier = {
+ .notifier_call = masq_device_event,
+};
+
+static int masq_inet_event(struct notifier_block *this,
+ unsigned long event, void *ptr)
+{
+ struct inet6_ifaddr *ifa = ptr;
+
+ return masq_device_event(this, event, ifa->idev->dev);
+}
+
+static struct notifier_block masq_inet_notifier = {
+ .notifier_call = masq_inet_event,
+};
+
+static struct xt_target masquerade_tg6_reg __read_mostly = {
+ .name = "MASQUERADE",
+ .family = NFPROTO_IPV6,
+ .checkentry = masquerade_tg6_checkentry,
+ .target = masquerade_tg6,
+ .targetsize = sizeof(struct nf_nat_range),
+ .table = "nat",
+ .hooks = 1 << NF_INET_POST_ROUTING,
+ .me = THIS_MODULE,
+};
+
+static int __init masquerade_tg6_init(void)
+{
+ int err;
+
+ err = xt_register_target(&masquerade_tg6_reg);
+ if (err == 0) {
+ register_netdevice_notifier(&masq_dev_notifier);
+ register_inet6addr_notifier(&masq_inet_notifier);
+ }
+
+ return err;
+}
+static void __exit masquerade_tg6_exit(void)
+{
+ unregister_inet6addr_notifier(&masq_inet_notifier);
+ unregister_netdevice_notifier(&masq_dev_notifier);
+ xt_unregister_target(&masquerade_tg6_reg);
+}
+
+module_init(masquerade_tg6_init);
+module_exit(masquerade_tg6_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>");
+MODULE_DESCRIPTION("Xtables: automatic address SNAT");
diff --git a/net/ipv6/netfilter/ip6t_NPT.c b/net/ipv6/netfilter/ip6t_NPT.c
new file mode 100644
index 000000000000..e9486915eff6
--- /dev/null
+++ b/net/ipv6/netfilter/ip6t_NPT.c
@@ -0,0 +1,165 @@
+/*
+ * Copyright (c) 2011, 2012 Patrick McHardy <kaber@trash.net>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/module.h>
+#include <linux/skbuff.h>
+#include <linux/ipv6.h>
+#include <linux/netfilter.h>
+#include <linux/netfilter_ipv6.h>
+#include <linux/netfilter_ipv6/ip6t_NPT.h>
+#include <linux/netfilter/x_tables.h>
+
+static __sum16 csum16_complement(__sum16 a)
+{
+ return (__force __sum16)(0xffff - (__force u16)a);
+}
+
+static __sum16 csum16_add(__sum16 a, __sum16 b)
+{
+ u16 sum;
+
+ sum = (__force u16)a + (__force u16)b;
+ sum += (__force u16)a < (__force u16)b;
+ return (__force __sum16)sum;
+}
+
+static __sum16 csum16_sub(__sum16 a, __sum16 b)
+{
+ return csum16_add(a, csum16_complement(b));
+}
+
+static int ip6t_npt_checkentry(const struct xt_tgchk_param *par)
+{
+ struct ip6t_npt_tginfo *npt = par->targinfo;
+ __sum16 src_sum = 0, dst_sum = 0;
+ unsigned int i;
+
+ if (npt->src_pfx_len > 64 || npt->dst_pfx_len > 64)
+ return -EINVAL;
+
+ for (i = 0; i < ARRAY_SIZE(npt->src_pfx.in6.s6_addr16); i++) {
+ src_sum = csum16_add(src_sum,
+ (__force __sum16)npt->src_pfx.in6.s6_addr16[i]);
+ dst_sum = csum16_add(dst_sum,
+ (__force __sum16)npt->dst_pfx.in6.s6_addr16[i]);
+ }
+
+ npt->adjustment = csum16_sub(src_sum, dst_sum);
+ return 0;
+}
+
+static bool ip6t_npt_map_pfx(const struct ip6t_npt_tginfo *npt,
+ struct in6_addr *addr)
+{
+ unsigned int pfx_len;
+ unsigned int i, idx;
+ __be32 mask;
+ __sum16 sum;
+
+ pfx_len = max(npt->src_pfx_len, npt->dst_pfx_len);
+ for (i = 0; i < pfx_len; i += 32) {
+ if (pfx_len - i >= 32)
+ mask = 0;
+ else
+ mask = htonl(~((1 << (pfx_len - i)) - 1));
+
+ idx = i / 32;
+ addr->s6_addr32[idx] &= mask;
+ addr->s6_addr32[idx] |= npt->dst_pfx.in6.s6_addr32[idx];
+ }
+
+ if (pfx_len <= 48)
+ idx = 3;
+ else {
+ for (idx = 4; idx < ARRAY_SIZE(addr->s6_addr16); idx++) {
+ if ((__force __sum16)addr->s6_addr16[idx] !=
+ CSUM_MANGLED_0)
+ break;
+ }
+ if (idx == ARRAY_SIZE(addr->s6_addr16))
+ return false;
+ }
+
+ sum = csum16_add((__force __sum16)addr->s6_addr16[idx],
+ npt->adjustment);
+ if (sum == CSUM_MANGLED_0)
+ sum = 0;
+ *(__force __sum16 *)&addr->s6_addr16[idx] = sum;
+
+ return true;
+}
+
+static unsigned int
+ip6t_snpt_tg(struct sk_buff *skb, const struct xt_action_param *par)
+{
+ const struct ip6t_npt_tginfo *npt = par->targinfo;
+
+ if (!ip6t_npt_map_pfx(npt, &ipv6_hdr(skb)->saddr)) {
+ icmpv6_send(skb, ICMPV6_PARAMPROB, ICMPV6_HDR_FIELD,
+ offsetof(struct ipv6hdr, saddr));
+ return NF_DROP;
+ }
+ return XT_CONTINUE;
+}
+
+static unsigned int
+ip6t_dnpt_tg(struct sk_buff *skb, const struct xt_action_param *par)
+{
+ const struct ip6t_npt_tginfo *npt = par->targinfo;
+
+ if (!ip6t_npt_map_pfx(npt, &ipv6_hdr(skb)->daddr)) {
+ icmpv6_send(skb, ICMPV6_PARAMPROB, ICMPV6_HDR_FIELD,
+ offsetof(struct ipv6hdr, daddr));
+ return NF_DROP;
+ }
+ return XT_CONTINUE;
+}
+
+static struct xt_target ip6t_npt_target_reg[] __read_mostly = {
+ {
+ .name = "SNPT",
+ .target = ip6t_snpt_tg,
+ .targetsize = sizeof(struct ip6t_npt_tginfo),
+ .checkentry = ip6t_npt_checkentry,
+ .family = NFPROTO_IPV6,
+ .hooks = (1 << NF_INET_LOCAL_IN) |
+ (1 << NF_INET_POST_ROUTING),
+ .me = THIS_MODULE,
+ },
+ {
+ .name = "DNPT",
+ .target = ip6t_dnpt_tg,
+ .targetsize = sizeof(struct ip6t_npt_tginfo),
+ .checkentry = ip6t_npt_checkentry,
+ .family = NFPROTO_IPV6,
+ .hooks = (1 << NF_INET_PRE_ROUTING) |
+ (1 << NF_INET_LOCAL_OUT),
+ .me = THIS_MODULE,
+ },
+};
+
+static int __init ip6t_npt_init(void)
+{
+ return xt_register_targets(ip6t_npt_target_reg,
+ ARRAY_SIZE(ip6t_npt_target_reg));
+}
+
+static void __exit ip6t_npt_exit(void)
+{
+ xt_unregister_targets(ip6t_npt_target_reg,
+ ARRAY_SIZE(ip6t_npt_target_reg));
+}
+
+module_init(ip6t_npt_init);
+module_exit(ip6t_npt_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("IPv6-to-IPv6 Network Prefix Translation (RFC 6296)");
+MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>");
+MODULE_ALIAS("ip6t_SNPT");
+MODULE_ALIAS("ip6t_DNPT");
diff --git a/net/ipv6/netfilter/ip6table_filter.c b/net/ipv6/netfilter/ip6table_filter.c
index 325e59a0224f..beb5777d2043 100644
--- a/net/ipv6/netfilter/ip6table_filter.c
+++ b/net/ipv6/netfilter/ip6table_filter.c
@@ -61,9 +61,7 @@ static int __net_init ip6table_filter_net_init(struct net *net)
net->ipv6.ip6table_filter =
ip6t_register_table(net, &packet_filter, repl);
kfree(repl);
- if (IS_ERR(net->ipv6.ip6table_filter))
- return PTR_ERR(net->ipv6.ip6table_filter);
- return 0;
+ return PTR_RET(net->ipv6.ip6table_filter);
}
static void __net_exit ip6table_filter_net_exit(struct net *net)
diff --git a/net/ipv6/netfilter/ip6table_mangle.c b/net/ipv6/netfilter/ip6table_mangle.c
index 4d782405f125..7431121b87de 100644
--- a/net/ipv6/netfilter/ip6table_mangle.c
+++ b/net/ipv6/netfilter/ip6table_mangle.c
@@ -97,9 +97,7 @@ static int __net_init ip6table_mangle_net_init(struct net *net)
net->ipv6.ip6table_mangle =
ip6t_register_table(net, &packet_mangler, repl);
kfree(repl);
- if (IS_ERR(net->ipv6.ip6table_mangle))
- return PTR_ERR(net->ipv6.ip6table_mangle);
- return 0;
+ return PTR_RET(net->ipv6.ip6table_mangle);
}
static void __net_exit ip6table_mangle_net_exit(struct net *net)
diff --git a/net/ipv6/netfilter/ip6table_nat.c b/net/ipv6/netfilter/ip6table_nat.c
new file mode 100644
index 000000000000..e418bd6350a4
--- /dev/null
+++ b/net/ipv6/netfilter/ip6table_nat.c
@@ -0,0 +1,321 @@
+/*
+ * Copyright (c) 2011 Patrick McHardy <kaber@trash.net>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * Based on Rusty Russell's IPv4 NAT code. Development of IPv6 NAT
+ * funded by Astaro.
+ */
+
+#include <linux/module.h>
+#include <linux/netfilter.h>
+#include <linux/netfilter_ipv6.h>
+#include <linux/netfilter_ipv6/ip6_tables.h>
+#include <linux/ipv6.h>
+#include <net/ipv6.h>
+
+#include <net/netfilter/nf_nat.h>
+#include <net/netfilter/nf_nat_core.h>
+#include <net/netfilter/nf_nat_l3proto.h>
+
+static const struct xt_table nf_nat_ipv6_table = {
+ .name = "nat",
+ .valid_hooks = (1 << NF_INET_PRE_ROUTING) |
+ (1 << NF_INET_POST_ROUTING) |
+ (1 << NF_INET_LOCAL_OUT) |
+ (1 << NF_INET_LOCAL_IN),
+ .me = THIS_MODULE,
+ .af = NFPROTO_IPV6,
+};
+
+static unsigned int alloc_null_binding(struct nf_conn *ct, unsigned int hooknum)
+{
+ /* Force range to this IP; let proto decide mapping for
+ * per-proto parts (hence not IP_NAT_RANGE_PROTO_SPECIFIED).
+ */
+ struct nf_nat_range range;
+
+ range.flags = 0;
+ pr_debug("Allocating NULL binding for %p (%pI6)\n", ct,
+ HOOK2MANIP(hooknum) == NF_NAT_MANIP_SRC ?
+ &ct->tuplehash[IP_CT_DIR_REPLY].tuple.dst.u3.ip6 :
+ &ct->tuplehash[IP_CT_DIR_REPLY].tuple.src.u3.ip6);
+
+ return nf_nat_setup_info(ct, &range, HOOK2MANIP(hooknum));
+}
+
+static unsigned int nf_nat_rule_find(struct sk_buff *skb, unsigned int hooknum,
+ const struct net_device *in,
+ const struct net_device *out,
+ struct nf_conn *ct)
+{
+ struct net *net = nf_ct_net(ct);
+ unsigned int ret;
+
+ ret = ip6t_do_table(skb, hooknum, in, out, net->ipv6.ip6table_nat);
+ if (ret == NF_ACCEPT) {
+ if (!nf_nat_initialized(ct, HOOK2MANIP(hooknum)))
+ ret = alloc_null_binding(ct, hooknum);
+ }
+ return ret;
+}
+
+static unsigned int
+nf_nat_ipv6_fn(unsigned int hooknum,
+ struct sk_buff *skb,
+ const struct net_device *in,
+ const struct net_device *out,
+ int (*okfn)(struct sk_buff *))
+{
+ struct nf_conn *ct;
+ enum ip_conntrack_info ctinfo;
+ struct nf_conn_nat *nat;
+ enum nf_nat_manip_type maniptype = HOOK2MANIP(hooknum);
+ __be16 frag_off;
+ int hdrlen;
+ u8 nexthdr;
+
+ ct = nf_ct_get(skb, &ctinfo);
+ /* Can't track? It's not due to stress, or conntrack would
+ * have dropped it. Hence it's the user's responsibilty to
+ * packet filter it out, or implement conntrack/NAT for that
+ * protocol. 8) --RR
+ */
+ if (!ct)
+ return NF_ACCEPT;
+
+ /* Don't try to NAT if this packet is not conntracked */
+ if (nf_ct_is_untracked(ct))
+ return NF_ACCEPT;
+
+ nat = nfct_nat(ct);
+ if (!nat) {
+ /* NAT module was loaded late. */
+ if (nf_ct_is_confirmed(ct))
+ return NF_ACCEPT;
+ nat = nf_ct_ext_add(ct, NF_CT_EXT_NAT, GFP_ATOMIC);
+ if (nat == NULL) {
+ pr_debug("failed to add NAT extension\n");
+ return NF_ACCEPT;
+ }
+ }
+
+ switch (ctinfo) {
+ case IP_CT_RELATED:
+ case IP_CT_RELATED_REPLY:
+ nexthdr = ipv6_hdr(skb)->nexthdr;
+ hdrlen = ipv6_skip_exthdr(skb, sizeof(struct ipv6hdr),
+ &nexthdr, &frag_off);
+
+ if (hdrlen >= 0 && nexthdr == IPPROTO_ICMPV6) {
+ if (!nf_nat_icmpv6_reply_translation(skb, ct, ctinfo,
+ hooknum, hdrlen))
+ return NF_DROP;
+ else
+ return NF_ACCEPT;
+ }
+ /* Fall thru... (Only ICMPs can be IP_CT_IS_REPLY) */
+ case IP_CT_NEW:
+ /* Seen it before? This can happen for loopback, retrans,
+ * or local packets.
+ */
+ if (!nf_nat_initialized(ct, maniptype)) {
+ unsigned int ret;
+
+ ret = nf_nat_rule_find(skb, hooknum, in, out, ct);
+ if (ret != NF_ACCEPT)
+ return ret;
+ } else
+ pr_debug("Already setup manip %s for ct %p\n",
+ maniptype == NF_NAT_MANIP_SRC ? "SRC" : "DST",
+ ct);
+ break;
+
+ default:
+ /* ESTABLISHED */
+ NF_CT_ASSERT(ctinfo == IP_CT_ESTABLISHED ||
+ ctinfo == IP_CT_ESTABLISHED_REPLY);
+ }
+
+ return nf_nat_packet(ct, ctinfo, hooknum, skb);
+}
+
+static unsigned int
+nf_nat_ipv6_in(unsigned int hooknum,
+ struct sk_buff *skb,
+ const struct net_device *in,
+ const struct net_device *out,
+ int (*okfn)(struct sk_buff *))
+{
+ unsigned int ret;
+ struct in6_addr daddr = ipv6_hdr(skb)->daddr;
+
+ ret = nf_nat_ipv6_fn(hooknum, skb, in, out, okfn);
+ if (ret != NF_DROP && ret != NF_STOLEN &&
+ ipv6_addr_cmp(&daddr, &ipv6_hdr(skb)->daddr))
+ skb_dst_drop(skb);
+
+ return ret;
+}
+
+static unsigned int
+nf_nat_ipv6_out(unsigned int hooknum,
+ struct sk_buff *skb,
+ const struct net_device *in,
+ const struct net_device *out,
+ int (*okfn)(struct sk_buff *))
+{
+#ifdef CONFIG_XFRM
+ const struct nf_conn *ct;
+ enum ip_conntrack_info ctinfo;
+#endif
+ unsigned int ret;
+
+ /* root is playing with raw sockets. */
+ if (skb->len < sizeof(struct ipv6hdr))
+ return NF_ACCEPT;
+
+ ret = nf_nat_ipv6_fn(hooknum, skb, in, out, okfn);
+#ifdef CONFIG_XFRM
+ if (ret != NF_DROP && ret != NF_STOLEN &&
+ !(IP6CB(skb)->flags & IP6SKB_XFRM_TRANSFORMED) &&
+ (ct = nf_ct_get(skb, &ctinfo)) != NULL) {
+ enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);
+
+ if (!nf_inet_addr_cmp(&ct->tuplehash[dir].tuple.src.u3,
+ &ct->tuplehash[!dir].tuple.dst.u3) ||
+ (ct->tuplehash[dir].tuple.src.u.all !=
+ ct->tuplehash[!dir].tuple.dst.u.all))
+ if (nf_xfrm_me_harder(skb, AF_INET6) < 0)
+ ret = NF_DROP;
+ }
+#endif
+ return ret;
+}
+
+static unsigned int
+nf_nat_ipv6_local_fn(unsigned int hooknum,
+ struct sk_buff *skb,
+ const struct net_device *in,
+ const struct net_device *out,
+ int (*okfn)(struct sk_buff *))
+{
+ const struct nf_conn *ct;
+ enum ip_conntrack_info ctinfo;
+ unsigned int ret;
+
+ /* root is playing with raw sockets. */
+ if (skb->len < sizeof(struct ipv6hdr))
+ return NF_ACCEPT;
+
+ ret = nf_nat_ipv6_fn(hooknum, skb, in, out, okfn);
+ if (ret != NF_DROP && ret != NF_STOLEN &&
+ (ct = nf_ct_get(skb, &ctinfo)) != NULL) {
+ enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);
+
+ if (!nf_inet_addr_cmp(&ct->tuplehash[dir].tuple.dst.u3,
+ &ct->tuplehash[!dir].tuple.src.u3)) {
+ if (ip6_route_me_harder(skb))
+ ret = NF_DROP;
+ }
+#ifdef CONFIG_XFRM
+ else if (!(IP6CB(skb)->flags & IP6SKB_XFRM_TRANSFORMED) &&
+ ct->tuplehash[dir].tuple.dst.u.all !=
+ ct->tuplehash[!dir].tuple.src.u.all)
+ if (nf_xfrm_me_harder(skb, AF_INET6))
+ ret = NF_DROP;
+#endif
+ }
+ return ret;
+}
+
+static struct nf_hook_ops nf_nat_ipv6_ops[] __read_mostly = {
+ /* Before packet filtering, change destination */
+ {
+ .hook = nf_nat_ipv6_in,
+ .owner = THIS_MODULE,
+ .pf = NFPROTO_IPV6,
+ .hooknum = NF_INET_PRE_ROUTING,
+ .priority = NF_IP6_PRI_NAT_DST,
+ },
+ /* After packet filtering, change source */
+ {
+ .hook = nf_nat_ipv6_out,
+ .owner = THIS_MODULE,
+ .pf = NFPROTO_IPV6,
+ .hooknum = NF_INET_POST_ROUTING,
+ .priority = NF_IP6_PRI_NAT_SRC,
+ },
+ /* Before packet filtering, change destination */
+ {
+ .hook = nf_nat_ipv6_local_fn,
+ .owner = THIS_MODULE,
+ .pf = NFPROTO_IPV6,
+ .hooknum = NF_INET_LOCAL_OUT,
+ .priority = NF_IP6_PRI_NAT_DST,
+ },
+ /* After packet filtering, change source */
+ {
+ .hook = nf_nat_ipv6_fn,
+ .owner = THIS_MODULE,
+ .pf = NFPROTO_IPV6,
+ .hooknum = NF_INET_LOCAL_IN,
+ .priority = NF_IP6_PRI_NAT_SRC,
+ },
+};
+
+static int __net_init ip6table_nat_net_init(struct net *net)
+{
+ struct ip6t_replace *repl;
+
+ repl = ip6t_alloc_initial_table(&nf_nat_ipv6_table);
+ if (repl == NULL)
+ return -ENOMEM;
+ net->ipv6.ip6table_nat = ip6t_register_table(net, &nf_nat_ipv6_table, repl);
+ kfree(repl);
+ if (IS_ERR(net->ipv6.ip6table_nat))
+ return PTR_ERR(net->ipv6.ip6table_nat);
+ return 0;
+}
+
+static void __net_exit ip6table_nat_net_exit(struct net *net)
+{
+ ip6t_unregister_table(net, net->ipv6.ip6table_nat);
+}
+
+static struct pernet_operations ip6table_nat_net_ops = {
+ .init = ip6table_nat_net_init,
+ .exit = ip6table_nat_net_exit,
+};
+
+static int __init ip6table_nat_init(void)
+{
+ int err;
+
+ err = register_pernet_subsys(&ip6table_nat_net_ops);
+ if (err < 0)
+ goto err1;
+
+ err = nf_register_hooks(nf_nat_ipv6_ops, ARRAY_SIZE(nf_nat_ipv6_ops));
+ if (err < 0)
+ goto err2;
+ return 0;
+
+err2:
+ unregister_pernet_subsys(&ip6table_nat_net_ops);
+err1:
+ return err;
+}
+
+static void __exit ip6table_nat_exit(void)
+{
+ nf_unregister_hooks(nf_nat_ipv6_ops, ARRAY_SIZE(nf_nat_ipv6_ops));
+ unregister_pernet_subsys(&ip6table_nat_net_ops);
+}
+
+module_init(ip6table_nat_init);
+module_exit(ip6table_nat_exit);
+
+MODULE_LICENSE("GPL");
diff --git a/net/ipv6/netfilter/ip6table_raw.c b/net/ipv6/netfilter/ip6table_raw.c
index 5b9926a011bd..60d1bddff7a0 100644
--- a/net/ipv6/netfilter/ip6table_raw.c
+++ b/net/ipv6/netfilter/ip6table_raw.c
@@ -40,9 +40,7 @@ static int __net_init ip6table_raw_net_init(struct net *net)
net->ipv6.ip6table_raw =
ip6t_register_table(net, &packet_raw, repl);
kfree(repl);
- if (IS_ERR(net->ipv6.ip6table_raw))
- return PTR_ERR(net->ipv6.ip6table_raw);
- return 0;
+ return PTR_RET(net->ipv6.ip6table_raw);
}
static void __net_exit ip6table_raw_net_exit(struct net *net)
diff --git a/net/ipv6/netfilter/ip6table_security.c b/net/ipv6/netfilter/ip6table_security.c
index 91aa2b4d83c9..db155351339c 100644
--- a/net/ipv6/netfilter/ip6table_security.c
+++ b/net/ipv6/netfilter/ip6table_security.c
@@ -58,10 +58,7 @@ static int __net_init ip6table_security_net_init(struct net *net)
net->ipv6.ip6table_security =
ip6t_register_table(net, &security_table, repl);
kfree(repl);
- if (IS_ERR(net->ipv6.ip6table_security))
- return PTR_ERR(net->ipv6.ip6table_security);
-
- return 0;
+ return PTR_RET(net->ipv6.ip6table_security);
}
static void __net_exit ip6table_security_net_exit(struct net *net)
diff --git a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c
index 4794f96cf2e0..8860d23e61cf 100644
--- a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c
+++ b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c
@@ -28,6 +28,7 @@
#include <net/netfilter/nf_conntrack_core.h>
#include <net/netfilter/nf_conntrack_zones.h>
#include <net/netfilter/ipv6/nf_conntrack_ipv6.h>
+#include <net/netfilter/nf_nat_helper.h>
#include <net/netfilter/ipv6/nf_defrag_ipv6.h>
#include <net/netfilter/nf_log.h>
@@ -64,82 +65,31 @@ static int ipv6_print_tuple(struct seq_file *s,
tuple->src.u3.ip6, tuple->dst.u3.ip6);
}
-/*
- * Based on ipv6_skip_exthdr() in net/ipv6/exthdr.c
- *
- * This function parses (probably truncated) exthdr set "hdr"
- * of length "len". "nexthdrp" initially points to some place,
- * where type of the first header can be found.
- *
- * It skips all well-known exthdrs, and returns pointer to the start
- * of unparsable area i.e. the first header with unknown type.
- * if success, *nexthdr is updated by type/protocol of this header.
- *
- * NOTES: - it may return pointer pointing beyond end of packet,
- * if the last recognized header is truncated in the middle.
- * - if packet is truncated, so that all parsed headers are skipped,
- * it returns -1.
- * - if packet is fragmented, return pointer of the fragment header.
- * - ESP is unparsable for now and considered like
- * normal payload protocol.
- * - Note also special handling of AUTH header. Thanks to IPsec wizards.
- */
-
-static int nf_ct_ipv6_skip_exthdr(const struct sk_buff *skb, int start,
- u8 *nexthdrp, int len)
-{
- u8 nexthdr = *nexthdrp;
-
- while (ipv6_ext_hdr(nexthdr)) {
- struct ipv6_opt_hdr hdr;
- int hdrlen;
-
- if (len < (int)sizeof(struct ipv6_opt_hdr))
- return -1;
- if (nexthdr == NEXTHDR_NONE)
- break;
- if (nexthdr == NEXTHDR_FRAGMENT)
- break;
- if (skb_copy_bits(skb, start, &hdr, sizeof(hdr)))
- BUG();
- if (nexthdr == NEXTHDR_AUTH)
- hdrlen = (hdr.hdrlen+2)<<2;
- else
- hdrlen = ipv6_optlen(&hdr);
-
- nexthdr = hdr.nexthdr;
- len -= hdrlen;
- start += hdrlen;
- }
-
- *nexthdrp = nexthdr;
- return start;
-}
-
static int ipv6_get_l4proto(const struct sk_buff *skb, unsigned int nhoff,
unsigned int *dataoff, u_int8_t *protonum)
{
unsigned int extoff = nhoff + sizeof(struct ipv6hdr);
- unsigned char pnum;
+ __be16 frag_off;
int protoff;
+ u8 nexthdr;
if (skb_copy_bits(skb, nhoff + offsetof(struct ipv6hdr, nexthdr),
- &pnum, sizeof(pnum)) != 0) {
+ &nexthdr, sizeof(nexthdr)) != 0) {
pr_debug("ip6_conntrack_core: can't get nexthdr\n");
return -NF_ACCEPT;
}
- protoff = nf_ct_ipv6_skip_exthdr(skb, extoff, &pnum, skb->len - extoff);
+ protoff = ipv6_skip_exthdr(skb, extoff, &nexthdr, &frag_off);
/*
* (protoff == skb->len) mean that the packet doesn't have no data
* except of IPv6 & ext headers. but it's tracked anyway. - YK
*/
- if ((protoff < 0) || (protoff > skb->len)) {
+ if (protoff < 0 || (frag_off & htons(~0x7)) != 0) {
pr_debug("ip6_conntrack_core: can't find proto in pkt\n");
return -NF_ACCEPT;
}
*dataoff = protoff;
- *protonum = pnum;
+ *protonum = nexthdr;
return NF_ACCEPT;
}
@@ -153,10 +103,10 @@ static unsigned int ipv6_helper(unsigned int hooknum,
const struct nf_conn_help *help;
const struct nf_conntrack_helper *helper;
enum ip_conntrack_info ctinfo;
- unsigned int ret, protoff;
- unsigned int extoff = (u8 *)(ipv6_hdr(skb) + 1) - skb->data;
- unsigned char pnum = ipv6_hdr(skb)->nexthdr;
-
+ unsigned int ret;
+ __be16 frag_off;
+ int protoff;
+ u8 nexthdr;
/* This is where we call the helper: as the packet goes out. */
ct = nf_ct_get(skb, &ctinfo);
@@ -171,9 +121,10 @@ static unsigned int ipv6_helper(unsigned int hooknum,
if (!helper)
return NF_ACCEPT;
- protoff = nf_ct_ipv6_skip_exthdr(skb, extoff, &pnum,
- skb->len - extoff);
- if (protoff > skb->len || pnum == NEXTHDR_FRAGMENT) {
+ nexthdr = ipv6_hdr(skb)->nexthdr;
+ protoff = ipv6_skip_exthdr(skb, sizeof(struct ipv6hdr), &nexthdr,
+ &frag_off);
+ if (protoff < 0 || (frag_off & htons(~0x7)) != 0) {
pr_debug("proto header not found\n");
return NF_ACCEPT;
}
@@ -192,6 +143,36 @@ static unsigned int ipv6_confirm(unsigned int hooknum,
const struct net_device *out,
int (*okfn)(struct sk_buff *))
{
+ struct nf_conn *ct;
+ enum ip_conntrack_info ctinfo;
+ unsigned char pnum = ipv6_hdr(skb)->nexthdr;
+ int protoff;
+ __be16 frag_off;
+
+ ct = nf_ct_get(skb, &ctinfo);
+ if (!ct || ctinfo == IP_CT_RELATED_REPLY)
+ goto out;
+
+ protoff = ipv6_skip_exthdr(skb, sizeof(struct ipv6hdr), &pnum,
+ &frag_off);
+ if (protoff < 0 || (frag_off & htons(~0x7)) != 0) {
+ pr_debug("proto header not found\n");
+ goto out;
+ }
+
+ /* adjust seqs for loopback traffic only in outgoing direction */
+ if (test_bit(IPS_SEQ_ADJUST_BIT, &ct->status) &&
+ !nf_is_loopback_packet(skb)) {
+ typeof(nf_nat_seq_adjust_hook) seq_adjust;
+
+ seq_adjust = rcu_dereference(nf_nat_seq_adjust_hook);
+ if (!seq_adjust ||
+ !seq_adjust(skb, ct, ctinfo, protoff)) {
+ NF_CT_STAT_INC_ATOMIC(nf_ct_net(ct), drop);
+ return NF_DROP;
+ }
+ }
+out:
/* We've seen it coming out the other side: confirm it */
return nf_conntrack_confirm(skb);
}
@@ -199,9 +180,14 @@ static unsigned int ipv6_confirm(unsigned int hooknum,
static unsigned int __ipv6_conntrack_in(struct net *net,
unsigned int hooknum,
struct sk_buff *skb,
+ const struct net_device *in,
+ const struct net_device *out,
int (*okfn)(struct sk_buff *))
{
struct sk_buff *reasm = skb->nfct_reasm;
+ const struct nf_conn_help *help;
+ struct nf_conn *ct;
+ enum ip_conntrack_info ctinfo;
/* This packet is fragmented and has reassembled packet. */
if (reasm) {
@@ -213,6 +199,25 @@ static unsigned int __ipv6_conntrack_in(struct net *net,
if (ret != NF_ACCEPT)
return ret;
}
+
+ /* Conntrack helpers need the entire reassembled packet in the
+ * POST_ROUTING hook. In case of unconfirmed connections NAT
+ * might reassign a helper, so the entire packet is also
+ * required.
+ */
+ ct = nf_ct_get(reasm, &ctinfo);
+ if (ct != NULL && !nf_ct_is_untracked(ct)) {
+ help = nfct_help(ct);
+ if ((help && help->helper) || !nf_ct_is_confirmed(ct)) {
+ nf_conntrack_get_reasm(skb);
+ NF_HOOK_THRESH(NFPROTO_IPV6, hooknum, reasm,
+ (struct net_device *)in,
+ (struct net_device *)out,
+ okfn, NF_IP6_PRI_CONNTRACK + 1);
+ return NF_DROP_ERR(-ECANCELED);
+ }
+ }
+
nf_conntrack_get(reasm->nfct);
skb->nfct = reasm->nfct;
skb->nfctinfo = reasm->nfctinfo;
@@ -228,7 +233,7 @@ static unsigned int ipv6_conntrack_in(unsigned int hooknum,
const struct net_device *out,
int (*okfn)(struct sk_buff *))
{
- return __ipv6_conntrack_in(dev_net(in), hooknum, skb, okfn);
+ return __ipv6_conntrack_in(dev_net(in), hooknum, skb, in, out, okfn);
}
static unsigned int ipv6_conntrack_local(unsigned int hooknum,
@@ -242,7 +247,7 @@ static unsigned int ipv6_conntrack_local(unsigned int hooknum,
net_notice_ratelimited("ipv6_conntrack_local: packet too short\n");
return NF_ACCEPT;
}
- return __ipv6_conntrack_in(dev_net(out), hooknum, skb, okfn);
+ return __ipv6_conntrack_in(dev_net(out), hooknum, skb, in, out, okfn);
}
static struct nf_hook_ops ipv6_conntrack_ops[] __read_mostly = {
diff --git a/net/ipv6/netfilter/nf_conntrack_reasm.c b/net/ipv6/netfilter/nf_conntrack_reasm.c
index c9c78c2e666b..18bd9bbbd1c6 100644
--- a/net/ipv6/netfilter/nf_conntrack_reasm.c
+++ b/net/ipv6/netfilter/nf_conntrack_reasm.c
@@ -57,41 +57,27 @@ struct nf_ct_frag6_skb_cb
#define NFCT_FRAG6_CB(skb) ((struct nf_ct_frag6_skb_cb*)((skb)->cb))
-struct nf_ct_frag6_queue
-{
- struct inet_frag_queue q;
-
- __be32 id; /* fragment id */
- u32 user;
- struct in6_addr saddr;
- struct in6_addr daddr;
-
- unsigned int csum;
- __u16 nhoffset;
-};
-
static struct inet_frags nf_frags;
-static struct netns_frags nf_init_frags;
#ifdef CONFIG_SYSCTL
static struct ctl_table nf_ct_frag6_sysctl_table[] = {
{
.procname = "nf_conntrack_frag6_timeout",
- .data = &nf_init_frags.timeout,
+ .data = &init_net.nf_frag.frags.timeout,
.maxlen = sizeof(unsigned int),
.mode = 0644,
.proc_handler = proc_dointvec_jiffies,
},
{
.procname = "nf_conntrack_frag6_low_thresh",
- .data = &nf_init_frags.low_thresh,
+ .data = &init_net.nf_frag.frags.low_thresh,
.maxlen = sizeof(unsigned int),
.mode = 0644,
.proc_handler = proc_dointvec,
},
{
.procname = "nf_conntrack_frag6_high_thresh",
- .data = &nf_init_frags.high_thresh,
+ .data = &init_net.nf_frag.frags.high_thresh,
.maxlen = sizeof(unsigned int),
.mode = 0644,
.proc_handler = proc_dointvec,
@@ -99,68 +85,86 @@ static struct ctl_table nf_ct_frag6_sysctl_table[] = {
{ }
};
-static struct ctl_table_header *nf_ct_frag6_sysctl_header;
-#endif
-
-static unsigned int nf_hashfn(struct inet_frag_queue *q)
+static int __net_init nf_ct_frag6_sysctl_register(struct net *net)
{
- const struct nf_ct_frag6_queue *nq;
+ struct ctl_table *table;
+ struct ctl_table_header *hdr;
+
+ table = nf_ct_frag6_sysctl_table;
+ if (!net_eq(net, &init_net)) {
+ table = kmemdup(table, sizeof(nf_ct_frag6_sysctl_table),
+ GFP_KERNEL);
+ if (table == NULL)
+ goto err_alloc;
+
+ table[0].data = &net->ipv6.frags.high_thresh;
+ table[1].data = &net->ipv6.frags.low_thresh;
+ table[2].data = &net->ipv6.frags.timeout;
+ }
- nq = container_of(q, struct nf_ct_frag6_queue, q);
- return inet6_hash_frag(nq->id, &nq->saddr, &nq->daddr, nf_frags.rnd);
+ hdr = register_net_sysctl(net, "net/netfilter", table);
+ if (hdr == NULL)
+ goto err_reg;
+
+ net->nf_frag.sysctl.frags_hdr = hdr;
+ return 0;
+
+err_reg:
+ if (!net_eq(net, &init_net))
+ kfree(table);
+err_alloc:
+ return -ENOMEM;
}
-static void nf_skb_free(struct sk_buff *skb)
+static void __net_exit nf_ct_frags6_sysctl_unregister(struct net *net)
{
- if (NFCT_FRAG6_CB(skb)->orig)
- kfree_skb(NFCT_FRAG6_CB(skb)->orig);
-}
+ struct ctl_table *table;
-/* Destruction primitives. */
+ table = net->nf_frag.sysctl.frags_hdr->ctl_table_arg;
+ unregister_net_sysctl_table(net->nf_frag.sysctl.frags_hdr);
+ if (!net_eq(net, &init_net))
+ kfree(table);
+}
-static __inline__ void fq_put(struct nf_ct_frag6_queue *fq)
+#else
+static int __net_init nf_ct_frag6_sysctl_register(struct net *net)
{
- inet_frag_put(&fq->q, &nf_frags);
+ return 0;
}
+static void __net_exit nf_ct_frags6_sysctl_unregister(struct net *net)
+{
+}
+#endif
-/* Kill fq entry. It is not destroyed immediately,
- * because caller (and someone more) holds reference count.
- */
-static __inline__ void fq_kill(struct nf_ct_frag6_queue *fq)
+static unsigned int nf_hashfn(struct inet_frag_queue *q)
{
- inet_frag_kill(&fq->q, &nf_frags);
+ const struct frag_queue *nq;
+
+ nq = container_of(q, struct frag_queue, q);
+ return inet6_hash_frag(nq->id, &nq->saddr, &nq->daddr, nf_frags.rnd);
}
-static void nf_ct_frag6_evictor(void)
+static void nf_skb_free(struct sk_buff *skb)
{
- local_bh_disable();
- inet_frag_evictor(&nf_init_frags, &nf_frags);
- local_bh_enable();
+ if (NFCT_FRAG6_CB(skb)->orig)
+ kfree_skb(NFCT_FRAG6_CB(skb)->orig);
}
static void nf_ct_frag6_expire(unsigned long data)
{
- struct nf_ct_frag6_queue *fq;
-
- fq = container_of((struct inet_frag_queue *)data,
- struct nf_ct_frag6_queue, q);
-
- spin_lock(&fq->q.lock);
+ struct frag_queue *fq;
+ struct net *net;
- if (fq->q.last_in & INET_FRAG_COMPLETE)
- goto out;
+ fq = container_of((struct inet_frag_queue *)data, struct frag_queue, q);
+ net = container_of(fq->q.net, struct net, nf_frag.frags);
- fq_kill(fq);
-
-out:
- spin_unlock(&fq->q.lock);
- fq_put(fq);
+ ip6_expire_frag_queue(net, fq, &nf_frags);
}
/* Creation primitives. */
-
-static __inline__ struct nf_ct_frag6_queue *
-fq_find(__be32 id, u32 user, struct in6_addr *src, struct in6_addr *dst)
+static inline struct frag_queue *fq_find(struct net *net, __be32 id,
+ u32 user, struct in6_addr *src,
+ struct in6_addr *dst)
{
struct inet_frag_queue *q;
struct ip6_create_arg arg;
@@ -174,22 +178,23 @@ fq_find(__be32 id, u32 user, struct in6_addr *src, struct in6_addr *dst)
read_lock_bh(&nf_frags.lock);
hash = inet6_hash_frag(id, src, dst, nf_frags.rnd);
- q = inet_frag_find(&nf_init_frags, &nf_frags, &arg, hash);
+ q = inet_frag_find(&net->nf_frag.frags, &nf_frags, &arg, hash);
local_bh_enable();
if (q == NULL)
goto oom;
- return container_of(q, struct nf_ct_frag6_queue, q);
+ return container_of(q, struct frag_queue, q);
oom:
return NULL;
}
-static int nf_ct_frag6_queue(struct nf_ct_frag6_queue *fq, struct sk_buff *skb,
+static int nf_ct_frag6_queue(struct frag_queue *fq, struct sk_buff *skb,
const struct frag_hdr *fhdr, int nhoff)
{
struct sk_buff *prev, *next;
+ unsigned int payload_len;
int offset, end;
if (fq->q.last_in & INET_FRAG_COMPLETE) {
@@ -197,8 +202,10 @@ static int nf_ct_frag6_queue(struct nf_ct_frag6_queue *fq, struct sk_buff *skb,
goto err;
}
+ payload_len = ntohs(ipv6_hdr(skb)->payload_len);
+
offset = ntohs(fhdr->frag_off) & ~0x7;
- end = offset + (ntohs(ipv6_hdr(skb)->payload_len) -
+ end = offset + (payload_len -
((u8 *)(fhdr + 1) - (u8 *)(ipv6_hdr(skb) + 1)));
if ((unsigned int)end > IPV6_MAXPLEN) {
@@ -307,7 +314,9 @@ found:
skb->dev = NULL;
fq->q.stamp = skb->tstamp;
fq->q.meat += skb->len;
- atomic_add(skb->truesize, &nf_init_frags.mem);
+ if (payload_len > fq->q.max_size)
+ fq->q.max_size = payload_len;
+ atomic_add(skb->truesize, &fq->q.net->mem);
/* The first fragment.
* nhoffset is obtained from the first fragment, of course.
@@ -317,12 +326,12 @@ found:
fq->q.last_in |= INET_FRAG_FIRST_IN;
}
write_lock(&nf_frags.lock);
- list_move_tail(&fq->q.lru_list, &nf_init_frags.lru_list);
+ list_move_tail(&fq->q.lru_list, &fq->q.net->lru_list);
write_unlock(&nf_frags.lock);
return 0;
discard_fq:
- fq_kill(fq);
+ inet_frag_kill(&fq->q, &nf_frags);
err:
return -1;
}
@@ -337,12 +346,12 @@ err:
* the last and the first frames arrived and all the bits are here.
*/
static struct sk_buff *
-nf_ct_frag6_reasm(struct nf_ct_frag6_queue *fq, struct net_device *dev)
+nf_ct_frag6_reasm(struct frag_queue *fq, struct net_device *dev)
{
struct sk_buff *fp, *op, *head = fq->q.fragments;
int payload_len;
- fq_kill(fq);
+ inet_frag_kill(&fq->q, &nf_frags);
WARN_ON(head == NULL);
WARN_ON(NFCT_FRAG6_CB(head)->offset != 0);
@@ -386,7 +395,7 @@ nf_ct_frag6_reasm(struct nf_ct_frag6_queue *fq, struct net_device *dev)
clone->ip_summed = head->ip_summed;
NFCT_FRAG6_CB(clone)->orig = NULL;
- atomic_add(clone->truesize, &nf_init_frags.mem);
+ atomic_add(clone->truesize, &fq->q.net->mem);
}
/* We have to remove fragment header from datagram and to relocate
@@ -410,12 +419,14 @@ nf_ct_frag6_reasm(struct nf_ct_frag6_queue *fq, struct net_device *dev)
head->csum = csum_add(head->csum, fp->csum);
head->truesize += fp->truesize;
}
- atomic_sub(head->truesize, &nf_init_frags.mem);
+ atomic_sub(head->truesize, &fq->q.net->mem);
+ head->local_df = 1;
head->next = NULL;
head->dev = dev;
head->tstamp = fq->q.stamp;
ipv6_hdr(head)->payload_len = htons(payload_len);
+ IP6CB(head)->frag_max_size = sizeof(struct ipv6hdr) + fq->q.max_size;
/* Yes, and fold redundant checksum back. 8) */
if (head->ip_summed == CHECKSUM_COMPLETE)
@@ -520,8 +531,10 @@ struct sk_buff *nf_ct_frag6_gather(struct sk_buff *skb, u32 user)
{
struct sk_buff *clone;
struct net_device *dev = skb->dev;
+ struct net *net = skb_dst(skb) ? dev_net(skb_dst(skb)->dev)
+ : dev_net(skb->dev);
struct frag_hdr *fhdr;
- struct nf_ct_frag6_queue *fq;
+ struct frag_queue *fq;
struct ipv6hdr *hdr;
int fhoff, nhoff;
u8 prevhdr;
@@ -553,10 +566,11 @@ struct sk_buff *nf_ct_frag6_gather(struct sk_buff *skb, u32 user)
hdr = ipv6_hdr(clone);
fhdr = (struct frag_hdr *)skb_transport_header(clone);
- if (atomic_read(&nf_init_frags.mem) > nf_init_frags.high_thresh)
- nf_ct_frag6_evictor();
+ local_bh_disable();
+ inet_frag_evictor(&net->nf_frag.frags, &nf_frags, false);
+ local_bh_enable();
- fq = fq_find(fhdr->identification, user, &hdr->saddr, &hdr->daddr);
+ fq = fq_find(net, fhdr->identification, user, &hdr->saddr, &hdr->daddr);
if (fq == NULL) {
pr_debug("Can't find and can't create new queue\n");
goto ret_orig;
@@ -567,7 +581,7 @@ struct sk_buff *nf_ct_frag6_gather(struct sk_buff *skb, u32 user)
if (nf_ct_frag6_queue(fq, clone, fhdr, nhoff) < 0) {
spin_unlock_bh(&fq->q.lock);
pr_debug("Can't insert skb to queue\n");
- fq_put(fq);
+ inet_frag_put(&fq->q, &nf_frags);
goto ret_orig;
}
@@ -579,7 +593,7 @@ struct sk_buff *nf_ct_frag6_gather(struct sk_buff *skb, u32 user)
}
spin_unlock_bh(&fq->q.lock);
- fq_put(fq);
+ inet_frag_put(&fq->q, &nf_frags);
return ret_skb;
ret_orig:
@@ -592,6 +606,7 @@ void nf_ct_frag6_output(unsigned int hooknum, struct sk_buff *skb,
int (*okfn)(struct sk_buff *))
{
struct sk_buff *s, *s2;
+ unsigned int ret = 0;
for (s = NFCT_FRAG6_CB(skb)->orig; s;) {
nf_conntrack_put_reasm(s->nfct_reasm);
@@ -601,49 +616,62 @@ void nf_ct_frag6_output(unsigned int hooknum, struct sk_buff *skb,
s2 = s->next;
s->next = NULL;
- NF_HOOK_THRESH(NFPROTO_IPV6, hooknum, s, in, out, okfn,
- NF_IP6_PRI_CONNTRACK_DEFRAG + 1);
+ if (ret != -ECANCELED)
+ ret = NF_HOOK_THRESH(NFPROTO_IPV6, hooknum, s,
+ in, out, okfn,
+ NF_IP6_PRI_CONNTRACK_DEFRAG + 1);
+ else
+ kfree_skb(s);
+
s = s2;
}
nf_conntrack_put_reasm(skb);
}
+static int nf_ct_net_init(struct net *net)
+{
+ net->nf_frag.frags.high_thresh = IPV6_FRAG_HIGH_THRESH;
+ net->nf_frag.frags.low_thresh = IPV6_FRAG_LOW_THRESH;
+ net->nf_frag.frags.timeout = IPV6_FRAG_TIMEOUT;
+ inet_frags_init_net(&net->nf_frag.frags);
+
+ return nf_ct_frag6_sysctl_register(net);
+}
+
+static void nf_ct_net_exit(struct net *net)
+{
+ nf_ct_frags6_sysctl_unregister(net);
+ inet_frags_exit_net(&net->nf_frag.frags, &nf_frags);
+}
+
+static struct pernet_operations nf_ct_net_ops = {
+ .init = nf_ct_net_init,
+ .exit = nf_ct_net_exit,
+};
+
int nf_ct_frag6_init(void)
{
+ int ret = 0;
+
nf_frags.hashfn = nf_hashfn;
nf_frags.constructor = ip6_frag_init;
nf_frags.destructor = NULL;
nf_frags.skb_free = nf_skb_free;
- nf_frags.qsize = sizeof(struct nf_ct_frag6_queue);
+ nf_frags.qsize = sizeof(struct frag_queue);
nf_frags.match = ip6_frag_match;
nf_frags.frag_expire = nf_ct_frag6_expire;
nf_frags.secret_interval = 10 * 60 * HZ;
- nf_init_frags.timeout = IPV6_FRAG_TIMEOUT;
- nf_init_frags.high_thresh = IPV6_FRAG_HIGH_THRESH;
- nf_init_frags.low_thresh = IPV6_FRAG_LOW_THRESH;
- inet_frags_init_net(&nf_init_frags);
inet_frags_init(&nf_frags);
-#ifdef CONFIG_SYSCTL
- nf_ct_frag6_sysctl_header = register_net_sysctl(&init_net, "net/netfilter",
- nf_ct_frag6_sysctl_table);
- if (!nf_ct_frag6_sysctl_header) {
+ ret = register_pernet_subsys(&nf_ct_net_ops);
+ if (ret)
inet_frags_fini(&nf_frags);
- return -ENOMEM;
- }
-#endif
- return 0;
+ return ret;
}
void nf_ct_frag6_cleanup(void)
{
-#ifdef CONFIG_SYSCTL
- unregister_net_sysctl_table(nf_ct_frag6_sysctl_header);
- nf_ct_frag6_sysctl_header = NULL;
-#endif
+ unregister_pernet_subsys(&nf_ct_net_ops);
inet_frags_fini(&nf_frags);
-
- nf_init_frags.low_thresh = 0;
- nf_ct_frag6_evictor();
}
diff --git a/net/ipv6/netfilter/nf_nat_l3proto_ipv6.c b/net/ipv6/netfilter/nf_nat_l3proto_ipv6.c
new file mode 100644
index 000000000000..abfe75a2e316
--- /dev/null
+++ b/net/ipv6/netfilter/nf_nat_l3proto_ipv6.c
@@ -0,0 +1,288 @@
+/*
+ * Copyright (c) 2011 Patrick McHardy <kaber@trash.net>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * Development of IPv6 NAT funded by Astaro.
+ */
+#include <linux/types.h>
+#include <linux/module.h>
+#include <linux/skbuff.h>
+#include <linux/ipv6.h>
+#include <linux/netfilter.h>
+#include <linux/netfilter_ipv6.h>
+#include <net/secure_seq.h>
+#include <net/checksum.h>
+#include <net/ip6_checksum.h>
+#include <net/ip6_route.h>
+#include <net/ipv6.h>
+
+#include <net/netfilter/nf_conntrack_core.h>
+#include <net/netfilter/nf_conntrack.h>
+#include <net/netfilter/nf_nat_core.h>
+#include <net/netfilter/nf_nat_l3proto.h>
+#include <net/netfilter/nf_nat_l4proto.h>
+
+static const struct nf_nat_l3proto nf_nat_l3proto_ipv6;
+
+#ifdef CONFIG_XFRM
+static void nf_nat_ipv6_decode_session(struct sk_buff *skb,
+ const struct nf_conn *ct,
+ enum ip_conntrack_dir dir,
+ unsigned long statusbit,
+ struct flowi *fl)
+{
+ const struct nf_conntrack_tuple *t = &ct->tuplehash[dir].tuple;
+ struct flowi6 *fl6 = &fl->u.ip6;
+
+ if (ct->status & statusbit) {
+ fl6->daddr = t->dst.u3.in6;
+ if (t->dst.protonum == IPPROTO_TCP ||
+ t->dst.protonum == IPPROTO_UDP ||
+ t->dst.protonum == IPPROTO_UDPLITE ||
+ t->dst.protonum == IPPROTO_DCCP ||
+ t->dst.protonum == IPPROTO_SCTP)
+ fl6->fl6_dport = t->dst.u.all;
+ }
+
+ statusbit ^= IPS_NAT_MASK;
+
+ if (ct->status & statusbit) {
+ fl6->saddr = t->src.u3.in6;
+ if (t->dst.protonum == IPPROTO_TCP ||
+ t->dst.protonum == IPPROTO_UDP ||
+ t->dst.protonum == IPPROTO_UDPLITE ||
+ t->dst.protonum == IPPROTO_DCCP ||
+ t->dst.protonum == IPPROTO_SCTP)
+ fl6->fl6_sport = t->src.u.all;
+ }
+}
+#endif
+
+static bool nf_nat_ipv6_in_range(const struct nf_conntrack_tuple *t,
+ const struct nf_nat_range *range)
+{
+ return ipv6_addr_cmp(&t->src.u3.in6, &range->min_addr.in6) >= 0 &&
+ ipv6_addr_cmp(&t->src.u3.in6, &range->max_addr.in6) <= 0;
+}
+
+static u32 nf_nat_ipv6_secure_port(const struct nf_conntrack_tuple *t,
+ __be16 dport)
+{
+ return secure_ipv6_port_ephemeral(t->src.u3.ip6, t->dst.u3.ip6, dport);
+}
+
+static bool nf_nat_ipv6_manip_pkt(struct sk_buff *skb,
+ unsigned int iphdroff,
+ const struct nf_nat_l4proto *l4proto,
+ const struct nf_conntrack_tuple *target,
+ enum nf_nat_manip_type maniptype)
+{
+ struct ipv6hdr *ipv6h;
+ __be16 frag_off;
+ int hdroff;
+ u8 nexthdr;
+
+ if (!skb_make_writable(skb, iphdroff + sizeof(*ipv6h)))
+ return false;
+
+ ipv6h = (void *)skb->data + iphdroff;
+ nexthdr = ipv6h->nexthdr;
+ hdroff = ipv6_skip_exthdr(skb, iphdroff + sizeof(*ipv6h),
+ &nexthdr, &frag_off);
+ if (hdroff < 0)
+ goto manip_addr;
+
+ if ((frag_off & htons(~0x7)) == 0 &&
+ !l4proto->manip_pkt(skb, &nf_nat_l3proto_ipv6, iphdroff, hdroff,
+ target, maniptype))
+ return false;
+manip_addr:
+ if (maniptype == NF_NAT_MANIP_SRC)
+ ipv6h->saddr = target->src.u3.in6;
+ else
+ ipv6h->daddr = target->dst.u3.in6;
+
+ return true;
+}
+
+static void nf_nat_ipv6_csum_update(struct sk_buff *skb,
+ unsigned int iphdroff, __sum16 *check,
+ const struct nf_conntrack_tuple *t,
+ enum nf_nat_manip_type maniptype)
+{
+ const struct ipv6hdr *ipv6h = (struct ipv6hdr *)(skb->data + iphdroff);
+ const struct in6_addr *oldip, *newip;
+
+ if (maniptype == NF_NAT_MANIP_SRC) {
+ oldip = &ipv6h->saddr;
+ newip = &t->src.u3.in6;
+ } else {
+ oldip = &ipv6h->daddr;
+ newip = &t->dst.u3.in6;
+ }
+ inet_proto_csum_replace16(check, skb, oldip->s6_addr32,
+ newip->s6_addr32, 1);
+}
+
+static void nf_nat_ipv6_csum_recalc(struct sk_buff *skb,
+ u8 proto, void *data, __sum16 *check,
+ int datalen, int oldlen)
+{
+ const struct ipv6hdr *ipv6h = ipv6_hdr(skb);
+ struct rt6_info *rt = (struct rt6_info *)skb_dst(skb);
+
+ if (skb->ip_summed != CHECKSUM_PARTIAL) {
+ if (!(rt->rt6i_flags & RTF_LOCAL) &&
+ (!skb->dev || skb->dev->features & NETIF_F_V6_CSUM)) {
+ skb->ip_summed = CHECKSUM_PARTIAL;
+ skb->csum_start = skb_headroom(skb) +
+ skb_network_offset(skb) +
+ (data - (void *)skb->data);
+ skb->csum_offset = (void *)check - data;
+ *check = ~csum_ipv6_magic(&ipv6h->saddr, &ipv6h->daddr,
+ datalen, proto, 0);
+ } else {
+ *check = 0;
+ *check = csum_ipv6_magic(&ipv6h->saddr, &ipv6h->daddr,
+ datalen, proto,
+ csum_partial(data, datalen,
+ 0));
+ if (proto == IPPROTO_UDP && !*check)
+ *check = CSUM_MANGLED_0;
+ }
+ } else
+ inet_proto_csum_replace2(check, skb,
+ htons(oldlen), htons(datalen), 1);
+}
+
+static int nf_nat_ipv6_nlattr_to_range(struct nlattr *tb[],
+ struct nf_nat_range *range)
+{
+ if (tb[CTA_NAT_V6_MINIP]) {
+ nla_memcpy(&range->min_addr.ip6, tb[CTA_NAT_V6_MINIP],
+ sizeof(struct in6_addr));
+ range->flags |= NF_NAT_RANGE_MAP_IPS;
+ }
+
+ if (tb[CTA_NAT_V6_MAXIP])
+ nla_memcpy(&range->max_addr.ip6, tb[CTA_NAT_V6_MAXIP],
+ sizeof(struct in6_addr));
+ else
+ range->max_addr = range->min_addr;
+
+ return 0;
+}
+
+static const struct nf_nat_l3proto nf_nat_l3proto_ipv6 = {
+ .l3proto = NFPROTO_IPV6,
+ .secure_port = nf_nat_ipv6_secure_port,
+ .in_range = nf_nat_ipv6_in_range,
+ .manip_pkt = nf_nat_ipv6_manip_pkt,
+ .csum_update = nf_nat_ipv6_csum_update,
+ .csum_recalc = nf_nat_ipv6_csum_recalc,
+ .nlattr_to_range = nf_nat_ipv6_nlattr_to_range,
+#ifdef CONFIG_XFRM
+ .decode_session = nf_nat_ipv6_decode_session,
+#endif
+};
+
+int nf_nat_icmpv6_reply_translation(struct sk_buff *skb,
+ struct nf_conn *ct,
+ enum ip_conntrack_info ctinfo,
+ unsigned int hooknum,
+ unsigned int hdrlen)
+{
+ struct {
+ struct icmp6hdr icmp6;
+ struct ipv6hdr ip6;
+ } *inside;
+ enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);
+ enum nf_nat_manip_type manip = HOOK2MANIP(hooknum);
+ const struct nf_nat_l4proto *l4proto;
+ struct nf_conntrack_tuple target;
+ unsigned long statusbit;
+
+ NF_CT_ASSERT(ctinfo == IP_CT_RELATED || ctinfo == IP_CT_RELATED_REPLY);
+
+ if (!skb_make_writable(skb, hdrlen + sizeof(*inside)))
+ return 0;
+ if (nf_ip6_checksum(skb, hooknum, hdrlen, IPPROTO_ICMPV6))
+ return 0;
+
+ inside = (void *)skb->data + hdrlen;
+ if (inside->icmp6.icmp6_type == NDISC_REDIRECT) {
+ if ((ct->status & IPS_NAT_DONE_MASK) != IPS_NAT_DONE_MASK)
+ return 0;
+ if (ct->status & IPS_NAT_MASK)
+ return 0;
+ }
+
+ if (manip == NF_NAT_MANIP_SRC)
+ statusbit = IPS_SRC_NAT;
+ else
+ statusbit = IPS_DST_NAT;
+
+ /* Invert if this is reply direction */
+ if (dir == IP_CT_DIR_REPLY)
+ statusbit ^= IPS_NAT_MASK;
+
+ if (!(ct->status & statusbit))
+ return 1;
+
+ l4proto = __nf_nat_l4proto_find(NFPROTO_IPV6, inside->ip6.nexthdr);
+ if (!nf_nat_ipv6_manip_pkt(skb, hdrlen + sizeof(inside->icmp6),
+ l4proto, &ct->tuplehash[!dir].tuple, !manip))
+ return 0;
+
+ if (skb->ip_summed != CHECKSUM_PARTIAL) {
+ struct ipv6hdr *ipv6h = ipv6_hdr(skb);
+ inside = (void *)skb->data + hdrlen;
+ inside->icmp6.icmp6_cksum = 0;
+ inside->icmp6.icmp6_cksum =
+ csum_ipv6_magic(&ipv6h->saddr, &ipv6h->daddr,
+ skb->len - hdrlen, IPPROTO_ICMPV6,
+ csum_partial(&inside->icmp6,
+ skb->len - hdrlen, 0));
+ }
+
+ nf_ct_invert_tuplepr(&target, &ct->tuplehash[!dir].tuple);
+ l4proto = __nf_nat_l4proto_find(NFPROTO_IPV6, IPPROTO_ICMPV6);
+ if (!nf_nat_ipv6_manip_pkt(skb, 0, l4proto, &target, manip))
+ return 0;
+
+ return 1;
+}
+EXPORT_SYMBOL_GPL(nf_nat_icmpv6_reply_translation);
+
+static int __init nf_nat_l3proto_ipv6_init(void)
+{
+ int err;
+
+ err = nf_nat_l4proto_register(NFPROTO_IPV6, &nf_nat_l4proto_icmpv6);
+ if (err < 0)
+ goto err1;
+ err = nf_nat_l3proto_register(&nf_nat_l3proto_ipv6);
+ if (err < 0)
+ goto err2;
+ return err;
+
+err2:
+ nf_nat_l4proto_unregister(NFPROTO_IPV6, &nf_nat_l4proto_icmpv6);
+err1:
+ return err;
+}
+
+static void __exit nf_nat_l3proto_ipv6_exit(void)
+{
+ nf_nat_l3proto_unregister(&nf_nat_l3proto_ipv6);
+ nf_nat_l4proto_unregister(NFPROTO_IPV6, &nf_nat_l4proto_icmpv6);
+}
+
+MODULE_LICENSE("GPL");
+MODULE_ALIAS("nf-nat-" __stringify(AF_INET6));
+
+module_init(nf_nat_l3proto_ipv6_init);
+module_exit(nf_nat_l3proto_ipv6_exit);
diff --git a/net/ipv6/netfilter/nf_nat_proto_icmpv6.c b/net/ipv6/netfilter/nf_nat_proto_icmpv6.c
new file mode 100644
index 000000000000..5d6da784305b
--- /dev/null
+++ b/net/ipv6/netfilter/nf_nat_proto_icmpv6.c
@@ -0,0 +1,90 @@
+/*
+ * Copyright (c) 2011 Patrick Mchardy <kaber@trash.net>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * Based on Rusty Russell's IPv4 ICMP NAT code. Development of IPv6
+ * NAT funded by Astaro.
+ */
+
+#include <linux/types.h>
+#include <linux/init.h>
+#include <linux/icmpv6.h>
+
+#include <linux/netfilter.h>
+#include <net/netfilter/nf_nat.h>
+#include <net/netfilter/nf_nat_core.h>
+#include <net/netfilter/nf_nat_l3proto.h>
+#include <net/netfilter/nf_nat_l4proto.h>
+
+static bool
+icmpv6_in_range(const struct nf_conntrack_tuple *tuple,
+ enum nf_nat_manip_type maniptype,
+ const union nf_conntrack_man_proto *min,
+ const union nf_conntrack_man_proto *max)
+{
+ return ntohs(tuple->src.u.icmp.id) >= ntohs(min->icmp.id) &&
+ ntohs(tuple->src.u.icmp.id) <= ntohs(max->icmp.id);
+}
+
+static void
+icmpv6_unique_tuple(const struct nf_nat_l3proto *l3proto,
+ struct nf_conntrack_tuple *tuple,
+ const struct nf_nat_range *range,
+ enum nf_nat_manip_type maniptype,
+ const struct nf_conn *ct)
+{
+ static u16 id;
+ unsigned int range_size;
+ unsigned int i;
+
+ range_size = ntohs(range->max_proto.icmp.id) -
+ ntohs(range->min_proto.icmp.id) + 1;
+
+ if (!(range->flags & NF_NAT_RANGE_PROTO_SPECIFIED))
+ range_size = 0xffff;
+
+ for (i = 0; ; ++id) {
+ tuple->src.u.icmp.id = htons(ntohs(range->min_proto.icmp.id) +
+ (id % range_size));
+ if (++i == range_size || !nf_nat_used_tuple(tuple, ct))
+ return;
+ }
+}
+
+static bool
+icmpv6_manip_pkt(struct sk_buff *skb,
+ const struct nf_nat_l3proto *l3proto,
+ unsigned int iphdroff, unsigned int hdroff,
+ const struct nf_conntrack_tuple *tuple,
+ enum nf_nat_manip_type maniptype)
+{
+ struct icmp6hdr *hdr;
+
+ if (!skb_make_writable(skb, hdroff + sizeof(*hdr)))
+ return false;
+
+ hdr = (struct icmp6hdr *)(skb->data + hdroff);
+ l3proto->csum_update(skb, iphdroff, &hdr->icmp6_cksum,
+ tuple, maniptype);
+ if (hdr->icmp6_code == ICMPV6_ECHO_REQUEST ||
+ hdr->icmp6_code == ICMPV6_ECHO_REPLY) {
+ inet_proto_csum_replace2(&hdr->icmp6_cksum, skb,
+ hdr->icmp6_identifier,
+ tuple->src.u.icmp.id, 0);
+ hdr->icmp6_identifier = tuple->src.u.icmp.id;
+ }
+ return true;
+}
+
+const struct nf_nat_l4proto nf_nat_l4proto_icmpv6 = {
+ .l4proto = IPPROTO_ICMPV6,
+ .manip_pkt = icmpv6_manip_pkt,
+ .in_range = icmpv6_in_range,
+ .unique_tuple = icmpv6_unique_tuple,
+#if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE)
+ .nlattr_to_range = nf_nat_l4proto_nlattr_to_range,
+#endif
+};
diff --git a/net/ipv6/proc.c b/net/ipv6/proc.c
index da2e92d05c15..745a32042950 100644
--- a/net/ipv6/proc.c
+++ b/net/ipv6/proc.c
@@ -307,10 +307,10 @@ static int __net_init ipv6_proc_init_net(struct net *net)
goto proc_dev_snmp6_fail;
return 0;
+proc_dev_snmp6_fail:
+ proc_net_remove(net, "snmp6");
proc_snmp6_fail:
proc_net_remove(net, "sockstat6");
-proc_dev_snmp6_fail:
- proc_net_remove(net, "dev_snmp6");
return -ENOMEM;
}
diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c
index ef0579d5bca6..d8e95c77db99 100644
--- a/net/ipv6/raw.c
+++ b/net/ipv6/raw.c
@@ -107,21 +107,20 @@ found:
* 0 - deliver
* 1 - block
*/
-static __inline__ int icmpv6_filter(struct sock *sk, struct sk_buff *skb)
+static int icmpv6_filter(const struct sock *sk, const struct sk_buff *skb)
{
- struct icmp6hdr *icmph;
- struct raw6_sock *rp = raw6_sk(sk);
-
- if (pskb_may_pull(skb, sizeof(struct icmp6hdr))) {
- __u32 *data = &rp->filter.data[0];
- int bit_nr;
+ struct icmp6hdr *_hdr;
+ const struct icmp6hdr *hdr;
- icmph = (struct icmp6hdr *) skb->data;
- bit_nr = icmph->icmp6_type;
+ hdr = skb_header_pointer(skb, skb_transport_offset(skb),
+ sizeof(_hdr), &_hdr);
+ if (hdr) {
+ const __u32 *data = &raw6_sk(sk)->filter.data[0];
+ unsigned int type = hdr->icmp6_type;
- return (data[bit_nr >> 5] & (1 << (bit_nr & 31))) != 0;
+ return (data[type >> 5] & (1U << (type & 31))) != 0;
}
- return 0;
+ return 1;
}
#if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE)
@@ -1251,7 +1250,8 @@ static void raw6_sock_seq_show(struct seq_file *seq, struct sock *sp, int i)
sk_wmem_alloc_get(sp),
sk_rmem_alloc_get(sp),
0, 0L, 0,
- sock_i_uid(sp), 0,
+ from_kuid_munged(seq_user_ns(seq), sock_i_uid(sp)),
+ 0,
sock_i_ino(sp),
atomic_read(&sp->sk_refcnt), sp, atomic_read(&sp->sk_drops));
}
diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c
index 4ff9af628e72..da8a4e301b1b 100644
--- a/net/ipv6/reassembly.c
+++ b/net/ipv6/reassembly.c
@@ -65,36 +65,8 @@ struct ip6frag_skb_cb
#define FRAG6_CB(skb) ((struct ip6frag_skb_cb*)((skb)->cb))
-/*
- * Equivalent of ipv4 struct ipq
- */
-
-struct frag_queue
-{
- struct inet_frag_queue q;
-
- __be32 id; /* fragment id */
- u32 user;
- struct in6_addr saddr;
- struct in6_addr daddr;
-
- int iif;
- unsigned int csum;
- __u16 nhoffset;
-};
-
static struct inet_frags ip6_frags;
-int ip6_frag_nqueues(struct net *net)
-{
- return net->ipv6.frags.nqueues;
-}
-
-int ip6_frag_mem(struct net *net)
-{
- return atomic_read(&net->ipv6.frags.mem);
-}
-
static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff *prev,
struct net_device *dev);
@@ -159,46 +131,18 @@ void ip6_frag_init(struct inet_frag_queue *q, void *a)
}
EXPORT_SYMBOL(ip6_frag_init);
-/* Destruction primitives. */
-
-static __inline__ void fq_put(struct frag_queue *fq)
-{
- inet_frag_put(&fq->q, &ip6_frags);
-}
-
-/* Kill fq entry. It is not destroyed immediately,
- * because caller (and someone more) holds reference count.
- */
-static __inline__ void fq_kill(struct frag_queue *fq)
-{
- inet_frag_kill(&fq->q, &ip6_frags);
-}
-
-static void ip6_evictor(struct net *net, struct inet6_dev *idev)
+void ip6_expire_frag_queue(struct net *net, struct frag_queue *fq,
+ struct inet_frags *frags)
{
- int evicted;
-
- evicted = inet_frag_evictor(&net->ipv6.frags, &ip6_frags);
- if (evicted)
- IP6_ADD_STATS_BH(net, idev, IPSTATS_MIB_REASMFAILS, evicted);
-}
-
-static void ip6_frag_expire(unsigned long data)
-{
- struct frag_queue *fq;
struct net_device *dev = NULL;
- struct net *net;
-
- fq = container_of((struct inet_frag_queue *)data, struct frag_queue, q);
spin_lock(&fq->q.lock);
if (fq->q.last_in & INET_FRAG_COMPLETE)
goto out;
- fq_kill(fq);
+ inet_frag_kill(&fq->q, frags);
- net = container_of(fq->q.net, struct net, ipv6.frags);
rcu_read_lock();
dev = dev_get_by_index_rcu(net, fq->iif);
if (!dev)
@@ -222,7 +166,19 @@ out_rcu_unlock:
rcu_read_unlock();
out:
spin_unlock(&fq->q.lock);
- fq_put(fq);
+ inet_frag_put(&fq->q, frags);
+}
+EXPORT_SYMBOL(ip6_expire_frag_queue);
+
+static void ip6_frag_expire(unsigned long data)
+{
+ struct frag_queue *fq;
+ struct net *net;
+
+ fq = container_of((struct inet_frag_queue *)data, struct frag_queue, q);
+ net = container_of(fq->q.net, struct net, ipv6.frags);
+
+ ip6_expire_frag_queue(net, fq, &ip6_frags);
}
static __inline__ struct frag_queue *
@@ -391,7 +347,7 @@ found:
return -1;
discard_fq:
- fq_kill(fq);
+ inet_frag_kill(&fq->q, &ip6_frags);
err:
IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
IPSTATS_MIB_REASMFAILS);
@@ -417,7 +373,7 @@ static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff *prev,
unsigned int nhoff;
int sum_truesize;
- fq_kill(fq);
+ inet_frag_kill(&fq->q, &ip6_frags);
/* Make the one we just received the head. */
if (prev) {
@@ -550,6 +506,7 @@ static int ipv6_frag_rcv(struct sk_buff *skb)
struct frag_queue *fq;
const struct ipv6hdr *hdr = ipv6_hdr(skb);
struct net *net = dev_net(skb_dst(skb)->dev);
+ int evicted;
IP6_INC_STATS_BH(net, ip6_dst_idev(skb_dst(skb)), IPSTATS_MIB_REASMREQDS);
@@ -574,8 +531,10 @@ static int ipv6_frag_rcv(struct sk_buff *skb)
return 1;
}
- if (atomic_read(&net->ipv6.frags.mem) > net->ipv6.frags.high_thresh)
- ip6_evictor(net, ip6_dst_idev(skb_dst(skb)));
+ evicted = inet_frag_evictor(&net->ipv6.frags, &ip6_frags, false);
+ if (evicted)
+ IP6_ADD_STATS_BH(net, ip6_dst_idev(skb_dst(skb)),
+ IPSTATS_MIB_REASMFAILS, evicted);
fq = fq_find(net, fhdr->identification, &hdr->saddr, &hdr->daddr);
if (fq != NULL) {
@@ -586,7 +545,7 @@ static int ipv6_frag_rcv(struct sk_buff *skb)
ret = ip6_frag_queue(fq, skb, fhdr, IP6CB(skb)->nhoff);
spin_unlock(&fq->q.lock);
- fq_put(fq);
+ inet_frag_put(&fq->q, &ip6_frags);
return ret;
}
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index cf02cb97bbdd..7c7e963260e1 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -222,11 +222,11 @@ static const u32 ip6_template_metrics[RTAX_MAX] = {
[RTAX_HOPLIMIT - 1] = 255,
};
-static struct rt6_info ip6_null_entry_template = {
+static const struct rt6_info ip6_null_entry_template = {
.dst = {
.__refcnt = ATOMIC_INIT(1),
.__use = 1,
- .obsolete = -1,
+ .obsolete = DST_OBSOLETE_FORCE_CHK,
.error = -ENETUNREACH,
.input = ip6_pkt_discard,
.output = ip6_pkt_discard_out,
@@ -242,11 +242,11 @@ static struct rt6_info ip6_null_entry_template = {
static int ip6_pkt_prohibit(struct sk_buff *skb);
static int ip6_pkt_prohibit_out(struct sk_buff *skb);
-static struct rt6_info ip6_prohibit_entry_template = {
+static const struct rt6_info ip6_prohibit_entry_template = {
.dst = {
.__refcnt = ATOMIC_INIT(1),
.__use = 1,
- .obsolete = -1,
+ .obsolete = DST_OBSOLETE_FORCE_CHK,
.error = -EACCES,
.input = ip6_pkt_prohibit,
.output = ip6_pkt_prohibit_out,
@@ -257,11 +257,11 @@ static struct rt6_info ip6_prohibit_entry_template = {
.rt6i_ref = ATOMIC_INIT(1),
};
-static struct rt6_info ip6_blk_hole_entry_template = {
+static const struct rt6_info ip6_blk_hole_entry_template = {
.dst = {
.__refcnt = ATOMIC_INIT(1),
.__use = 1,
- .obsolete = -1,
+ .obsolete = DST_OBSOLETE_FORCE_CHK,
.error = -EINVAL,
.input = dst_discard,
.output = dst_discard,
@@ -281,13 +281,14 @@ static inline struct rt6_info *ip6_dst_alloc(struct net *net,
struct fib6_table *table)
{
struct rt6_info *rt = dst_alloc(&net->ipv6.ip6_dst_ops, dev,
- 0, DST_OBSOLETE_NONE, flags);
+ 0, DST_OBSOLETE_FORCE_CHK, flags);
if (rt) {
struct dst_entry *dst = &rt->dst;
memset(dst + 1, 0, sizeof(*rt) - sizeof(*dst));
rt6_init_peer(rt, table ? &table->tb6_peers : net->ipv6.peers);
+ rt->rt6i_genid = rt_genid(net);
}
return rt;
}
@@ -369,15 +370,11 @@ static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
static bool rt6_check_expired(const struct rt6_info *rt)
{
- struct rt6_info *ort = NULL;
-
if (rt->rt6i_flags & RTF_EXPIRES) {
if (time_after(jiffies, rt->dst.expires))
return true;
} else if (rt->dst.from) {
- ort = (struct rt6_info *) rt->dst.from;
- return (ort->rt6i_flags & RTF_EXPIRES) &&
- time_after(jiffies, ort->dst.expires);
+ return rt6_check_expired((struct rt6_info *) rt->dst.from);
}
return false;
}
@@ -451,10 +448,9 @@ static void rt6_probe(struct rt6_info *rt)
* Router Reachability Probe MUST be rate-limited
* to no more than one per minute.
*/
- rcu_read_lock();
neigh = rt ? rt->n : NULL;
if (!neigh || (neigh->nud_state & NUD_VALID))
- goto out;
+ return;
read_lock_bh(&neigh->lock);
if (!(neigh->nud_state & NUD_VALID) &&
time_after(jiffies, neigh->updated + rt->rt6i_idev->cnf.rtr_probe_interval)) {
@@ -470,8 +466,6 @@ static void rt6_probe(struct rt6_info *rt)
} else {
read_unlock_bh(&neigh->lock);
}
-out:
- rcu_read_unlock();
}
#else
static inline void rt6_probe(struct rt6_info *rt)
@@ -498,7 +492,6 @@ static inline int rt6_check_neigh(struct rt6_info *rt)
struct neighbour *neigh;
int m;
- rcu_read_lock();
neigh = rt->n;
if (rt->rt6i_flags & RTF_NONEXTHOP ||
!(rt->rt6i_flags & RTF_GATEWAY))
@@ -516,7 +509,6 @@ static inline int rt6_check_neigh(struct rt6_info *rt)
read_unlock_bh(&neigh->lock);
} else
m = 0;
- rcu_read_unlock();
return m;
}
@@ -965,7 +957,7 @@ struct dst_entry * ip6_route_output(struct net *net, const struct sock *sk,
{
int flags = 0;
- fl6->flowi6_iif = net->loopback_dev->ifindex;
+ fl6->flowi6_iif = LOOPBACK_IFINDEX;
if ((sk && sk->sk_bound_dev_if) || rt6_need_strict(&fl6->daddr))
flags |= RT6_LOOKUP_F_IFACE;
@@ -1031,6 +1023,13 @@ static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie)
rt = (struct rt6_info *) dst;
+ /* All IPV6 dsts are created with ->obsolete set to the value
+ * DST_OBSOLETE_FORCE_CHK which forces validation calls down
+ * into this function always.
+ */
+ if (rt->rt6i_genid != rt_genid(dev_net(rt->dst.dev)))
+ return NULL;
+
if (rt->rt6i_node && (rt->rt6i_node->fn_sernum == cookie)) {
if (rt->rt6i_peer_genid != rt6_peer_genid()) {
if (!rt6_has_peer(rt))
@@ -1397,8 +1396,6 @@ int ip6_route_add(struct fib6_config *cfg)
goto out;
}
- rt->dst.obsolete = -1;
-
if (cfg->fc_flags & RTF_EXPIRES)
rt6_set_expires(rt, jiffies +
clock_t_to_jiffies(cfg->fc_expires));
@@ -1463,8 +1460,21 @@ int ip6_route_add(struct fib6_config *cfg)
}
rt->dst.output = ip6_pkt_discard_out;
rt->dst.input = ip6_pkt_discard;
- rt->dst.error = -ENETUNREACH;
rt->rt6i_flags = RTF_REJECT|RTF_NONEXTHOP;
+ switch (cfg->fc_type) {
+ case RTN_BLACKHOLE:
+ rt->dst.error = -EINVAL;
+ break;
+ case RTN_PROHIBIT:
+ rt->dst.error = -EACCES;
+ break;
+ case RTN_THROW:
+ rt->dst.error = -EAGAIN;
+ break;
+ default:
+ rt->dst.error = -ENETUNREACH;
+ break;
+ }
goto install_route;
}
@@ -1583,17 +1593,18 @@ static int __ip6_del_rt(struct rt6_info *rt, struct nl_info *info)
struct fib6_table *table;
struct net *net = dev_net(rt->dst.dev);
- if (rt == net->ipv6.ip6_null_entry)
- return -ENOENT;
+ if (rt == net->ipv6.ip6_null_entry) {
+ err = -ENOENT;
+ goto out;
+ }
table = rt->rt6i_table;
write_lock_bh(&table->tb6_lock);
-
err = fib6_del(rt, info);
- dst_release(&rt->dst);
-
write_unlock_bh(&table->tb6_lock);
+out:
+ dst_release(&rt->dst);
return err;
}
@@ -1829,7 +1840,7 @@ static struct rt6_info *rt6_get_route_info(struct net *net,
if (!table)
return NULL;
- write_lock_bh(&table->tb6_lock);
+ read_lock_bh(&table->tb6_lock);
fn = fib6_locate(&table->tb6_root, prefix ,prefixlen, NULL, 0);
if (!fn)
goto out;
@@ -1845,7 +1856,7 @@ static struct rt6_info *rt6_get_route_info(struct net *net,
break;
}
out:
- write_unlock_bh(&table->tb6_lock);
+ read_unlock_bh(&table->tb6_lock);
return rt;
}
@@ -1861,7 +1872,7 @@ static struct rt6_info *rt6_add_route_info(struct net *net,
.fc_dst_len = prefixlen,
.fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_ROUTEINFO |
RTF_UP | RTF_PREF(pref),
- .fc_nlinfo.pid = 0,
+ .fc_nlinfo.portid = 0,
.fc_nlinfo.nlh = NULL,
.fc_nlinfo.nl_net = net,
};
@@ -1888,7 +1899,7 @@ struct rt6_info *rt6_get_dflt_router(const struct in6_addr *addr, struct net_dev
if (!table)
return NULL;
- write_lock_bh(&table->tb6_lock);
+ read_lock_bh(&table->tb6_lock);
for (rt = table->tb6_root.leaf; rt; rt=rt->dst.rt6_next) {
if (dev == rt->dst.dev &&
((rt->rt6i_flags & (RTF_ADDRCONF | RTF_DEFAULT)) == (RTF_ADDRCONF | RTF_DEFAULT)) &&
@@ -1897,7 +1908,7 @@ struct rt6_info *rt6_get_dflt_router(const struct in6_addr *addr, struct net_dev
}
if (rt)
dst_hold(&rt->dst);
- write_unlock_bh(&table->tb6_lock);
+ read_unlock_bh(&table->tb6_lock);
return rt;
}
@@ -1911,7 +1922,7 @@ struct rt6_info *rt6_add_dflt_router(const struct in6_addr *gwaddr,
.fc_ifindex = dev->ifindex,
.fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT |
RTF_UP | RTF_EXPIRES | RTF_PREF(pref),
- .fc_nlinfo.pid = 0,
+ .fc_nlinfo.portid = 0,
.fc_nlinfo.nlh = NULL,
.fc_nlinfo.nl_net = dev_net(dev),
};
@@ -2080,7 +2091,6 @@ struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev,
rt->dst.input = ip6_input;
rt->dst.output = ip6_output;
rt->rt6i_idev = idev;
- rt->dst.obsolete = -1;
rt->rt6i_flags = RTF_UP | RTF_NONEXTHOP;
if (anycast)
@@ -2261,14 +2271,18 @@ static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh,
cfg->fc_src_len = rtm->rtm_src_len;
cfg->fc_flags = RTF_UP;
cfg->fc_protocol = rtm->rtm_protocol;
+ cfg->fc_type = rtm->rtm_type;
- if (rtm->rtm_type == RTN_UNREACHABLE)
+ if (rtm->rtm_type == RTN_UNREACHABLE ||
+ rtm->rtm_type == RTN_BLACKHOLE ||
+ rtm->rtm_type == RTN_PROHIBIT ||
+ rtm->rtm_type == RTN_THROW)
cfg->fc_flags |= RTF_REJECT;
if (rtm->rtm_type == RTN_LOCAL)
cfg->fc_flags |= RTF_LOCAL;
- cfg->fc_nlinfo.pid = NETLINK_CB(skb).pid;
+ cfg->fc_nlinfo.portid = NETLINK_CB(skb).portid;
cfg->fc_nlinfo.nlh = nlh;
cfg->fc_nlinfo.nl_net = sock_net(skb->sk);
@@ -2359,7 +2373,7 @@ static inline size_t rt6_nlmsg_size(void)
static int rt6_fill_node(struct net *net,
struct sk_buff *skb, struct rt6_info *rt,
struct in6_addr *dst, struct in6_addr *src,
- int iif, int type, u32 pid, u32 seq,
+ int iif, int type, u32 portid, u32 seq,
int prefix, int nowait, unsigned int flags)
{
struct rtmsg *rtm;
@@ -2375,7 +2389,7 @@ static int rt6_fill_node(struct net *net,
}
}
- nlh = nlmsg_put(skb, pid, seq, type, sizeof(*rtm), flags);
+ nlh = nlmsg_put(skb, portid, seq, type, sizeof(*rtm), flags);
if (!nlh)
return -EMSGSIZE;
@@ -2391,8 +2405,22 @@ static int rt6_fill_node(struct net *net,
rtm->rtm_table = table;
if (nla_put_u32(skb, RTA_TABLE, table))
goto nla_put_failure;
- if (rt->rt6i_flags & RTF_REJECT)
- rtm->rtm_type = RTN_UNREACHABLE;
+ if (rt->rt6i_flags & RTF_REJECT) {
+ switch (rt->dst.error) {
+ case -EINVAL:
+ rtm->rtm_type = RTN_BLACKHOLE;
+ break;
+ case -EACCES:
+ rtm->rtm_type = RTN_PROHIBIT;
+ break;
+ case -EAGAIN:
+ rtm->rtm_type = RTN_THROW;
+ break;
+ default:
+ rtm->rtm_type = RTN_UNREACHABLE;
+ break;
+ }
+ }
else if (rt->rt6i_flags & RTF_LOCAL)
rtm->rtm_type = RTN_LOCAL;
else if (rt->dst.dev && (rt->dst.dev->flags & IFF_LOOPBACK))
@@ -2465,27 +2493,19 @@ static int rt6_fill_node(struct net *net,
if (rtnetlink_put_metrics(skb, dst_metrics_ptr(&rt->dst)) < 0)
goto nla_put_failure;
- rcu_read_lock();
n = rt->n;
if (n) {
- if (nla_put(skb, RTA_GATEWAY, 16, &n->primary_key) < 0) {
- rcu_read_unlock();
+ if (nla_put(skb, RTA_GATEWAY, 16, &n->primary_key) < 0)
goto nla_put_failure;
- }
}
- rcu_read_unlock();
if (rt->dst.dev &&
nla_put_u32(skb, RTA_OIF, rt->dst.dev->ifindex))
goto nla_put_failure;
if (nla_put_u32(skb, RTA_PRIORITY, rt->rt6i_metric))
goto nla_put_failure;
- if (!(rt->rt6i_flags & RTF_EXPIRES))
- expires = 0;
- else if (rt->dst.expires - jiffies < INT_MAX)
- expires = rt->dst.expires - jiffies;
- else
- expires = INT_MAX;
+
+ expires = (rt->rt6i_flags & RTF_EXPIRES) ? rt->dst.expires - jiffies : 0;
if (rtnl_put_cacheinfo(skb, &rt->dst, 0, expires, rt->dst.error) < 0)
goto nla_put_failure;
@@ -2510,7 +2530,7 @@ int rt6_dump_route(struct rt6_info *rt, void *p_arg)
return rt6_fill_node(arg->net,
arg->skb, rt, NULL, NULL, 0, RTM_NEWROUTE,
- NETLINK_CB(arg->cb->skb).pid, arg->cb->nlh->nlmsg_seq,
+ NETLINK_CB(arg->cb->skb).portid, arg->cb->nlh->nlmsg_seq,
prefix, 0, NLM_F_MULTI);
}
@@ -2590,14 +2610,14 @@ static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void
skb_dst_set(skb, &rt->dst);
err = rt6_fill_node(net, skb, rt, &fl6.daddr, &fl6.saddr, iif,
- RTM_NEWROUTE, NETLINK_CB(in_skb).pid,
+ RTM_NEWROUTE, NETLINK_CB(in_skb).portid,
nlh->nlmsg_seq, 0, 0, 0);
if (err < 0) {
kfree_skb(skb);
goto errout;
}
- err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).pid);
+ err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid);
errout:
return err;
}
@@ -2617,14 +2637,14 @@ void inet6_rt_notify(int event, struct rt6_info *rt, struct nl_info *info)
goto errout;
err = rt6_fill_node(net, skb, rt, NULL, NULL, 0,
- event, info->pid, seq, 0, 0, 0);
+ event, info->portid, seq, 0, 0, 0);
if (err < 0) {
/* -EMSGSIZE implies BUG in rt6_nlmsg_size() */
WARN_ON(err == -EMSGSIZE);
kfree_skb(skb);
goto errout;
}
- rtnl_notify(skb, net, info->pid, RTNLGRP_IPV6_ROUTE,
+ rtnl_notify(skb, net, info->portid, RTNLGRP_IPV6_ROUTE,
info->nlh, gfp_any());
return;
errout:
@@ -2679,14 +2699,12 @@ static int rt6_info_route(struct rt6_info *rt, void *p_arg)
#else
seq_puts(m, "00000000000000000000000000000000 00 ");
#endif
- rcu_read_lock();
n = rt->n;
if (n) {
seq_printf(m, "%pi6", n->primary_key);
} else {
seq_puts(m, "00000000000000000000000000000000");
}
- rcu_read_unlock();
seq_printf(m, " %08x %08x %08x %08x %8s\n",
rt->rt6i_metric, atomic_read(&rt->dst.__refcnt),
rt->dst.__use, rt->rt6i_flags,
diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c
index 3bd1bfc01f85..3ed54ffd8d50 100644
--- a/net/ipv6/sit.c
+++ b/net/ipv6/sit.c
@@ -545,7 +545,6 @@ static int ipip6_err(struct sk_buff *skb, u32 info)
err = -ENOENT;
- rcu_read_lock();
t = ipip6_tunnel_lookup(dev_net(skb->dev),
skb->dev,
iph->daddr,
@@ -579,7 +578,6 @@ static int ipip6_err(struct sk_buff *skb, u32 info)
t->err_count = 1;
t->err_time = jiffies;
out:
- rcu_read_unlock();
return err;
}
@@ -599,7 +597,6 @@ static int ipip6_rcv(struct sk_buff *skb)
iph = ip_hdr(skb);
- rcu_read_lock();
tunnel = ipip6_tunnel_lookup(dev_net(skb->dev), skb->dev,
iph->saddr, iph->daddr);
if (tunnel != NULL) {
@@ -615,7 +612,6 @@ static int ipip6_rcv(struct sk_buff *skb)
if ((tunnel->dev->priv_flags & IFF_ISATAP) &&
!isatap_chksrc(skb, iph, tunnel)) {
tunnel->dev->stats.rx_errors++;
- rcu_read_unlock();
kfree_skb(skb);
return 0;
}
@@ -630,12 +626,10 @@ static int ipip6_rcv(struct sk_buff *skb)
netif_rx(skb);
- rcu_read_unlock();
return 0;
}
/* no tunnel matched, let upstream know, ipsec may handle it */
- rcu_read_unlock();
return 1;
out:
kfree_skb(skb);
diff --git a/net/ipv6/syncookies.c b/net/ipv6/syncookies.c
index bb46061c813a..182ab9a85d6c 100644
--- a/net/ipv6/syncookies.c
+++ b/net/ipv6/syncookies.c
@@ -190,6 +190,7 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb)
ireq = inet_rsk(req);
ireq6 = inet6_rsk(req);
treq = tcp_rsk(req);
+ treq->listener = NULL;
if (security_inet_conn_request(sk, skb, req))
goto out_free;
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index f49476e2d884..49c890386ce9 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -94,6 +94,18 @@ static struct tcp_md5sig_key *tcp_v6_md5_do_lookup(struct sock *sk,
}
#endif
+static void inet6_sk_rx_dst_set(struct sock *sk, const struct sk_buff *skb)
+{
+ struct dst_entry *dst = skb_dst(skb);
+ const struct rt6_info *rt = (const struct rt6_info *)dst;
+
+ dst_hold(dst);
+ sk->sk_rx_dst = dst;
+ inet_sk(sk)->rx_dst_ifindex = skb->skb_iif;
+ if (rt->rt6i_node)
+ inet6_sk(sk)->rx_dst_cookie = rt->rt6i_node->fn_sernum;
+}
+
static void tcp_v6_hash(struct sock *sk)
{
if (sk->sk_state != TCP_CLOSE) {
@@ -391,8 +403,9 @@ static void tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
tp->mtu_info = ntohl(info);
if (!sock_owned_by_user(sk))
tcp_v6_mtu_reduced(sk);
- else
- set_bit(TCP_MTU_REDUCED_DEFERRED, &tp->tsq_flags);
+ else if (!test_and_set_bit(TCP_MTU_REDUCED_DEFERRED,
+ &tp->tsq_flags))
+ sock_hold(sk);
goto out;
}
@@ -463,7 +476,7 @@ static int tcp_v6_send_synack(struct sock *sk, struct dst_entry *dst,
if (!dst && (dst = inet6_csk_route_req(sk, fl6, req)) == NULL)
goto done;
- skb = tcp_make_synack(sk, dst, req, rvp);
+ skb = tcp_make_synack(sk, dst, req, rvp, NULL);
if (skb) {
__tcp_v6_send_check(skb, &treq->loc_addr, &treq->rmt_addr);
@@ -750,6 +763,8 @@ static struct sk_buff **tcp6_gro_receive(struct sk_buff **head,
struct sk_buff *skb)
{
const struct ipv6hdr *iph = skb_gro_network_header(skb);
+ __wsum wsum;
+ __sum16 sum;
switch (skb->ip_summed) {
case CHECKSUM_COMPLETE:
@@ -758,11 +773,23 @@ static struct sk_buff **tcp6_gro_receive(struct sk_buff **head,
skb->ip_summed = CHECKSUM_UNNECESSARY;
break;
}
-
- /* fall through */
- case CHECKSUM_NONE:
+flush:
NAPI_GRO_CB(skb)->flush = 1;
return NULL;
+
+ case CHECKSUM_NONE:
+ wsum = ~csum_unfold(csum_ipv6_magic(&iph->saddr, &iph->daddr,
+ skb_gro_len(skb),
+ IPPROTO_TCP, 0));
+ sum = csum_fold(skb_checksum(skb,
+ skb_gro_offset(skb),
+ skb_gro_len(skb),
+ wsum));
+ if (sum)
+ goto flush;
+
+ skb->ip_summed = CHECKSUM_UNNECESSARY;
+ break;
}
return tcp_gro_receive(head, skb);
@@ -975,7 +1002,7 @@ static struct sock *tcp_v6_hnd_req(struct sock *sk,struct sk_buff *skb)
&ipv6_hdr(skb)->saddr,
&ipv6_hdr(skb)->daddr, inet6_iif(skb));
if (req)
- return tcp_check_req(sk, skb, req, prev);
+ return tcp_check_req(sk, skb, req, prev, false);
nsk = __inet6_lookup_established(sock_net(sk), &tcp_hashinfo,
&ipv6_hdr(skb)->saddr, th->source,
@@ -1156,7 +1183,6 @@ static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
}
have_isn:
tcp_rsk(req)->snt_isn = isn;
- tcp_rsk(req)->snt_synack = tcp_time_stamp;
if (security_inet_conn_request(sk, skb, req))
goto drop_and_release;
@@ -1167,6 +1193,8 @@ have_isn:
want_cookie)
goto drop_and_free;
+ tcp_rsk(req)->snt_synack = tcp_time_stamp;
+ tcp_rsk(req)->listener = NULL;
inet6_csk_reqsk_queue_hash_add(sk, req, TCP_TIMEOUT_INIT);
return 0;
@@ -1270,6 +1298,7 @@ static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
newsk->sk_gso_type = SKB_GSO_TCPV6;
__ip6_dst_store(newsk, dst, NULL, NULL);
+ inet6_sk_rx_dst_set(newsk, skb);
newtcp6sk = (struct tcp6_sock *)newsk;
inet_sk(newsk)->pinet6 = &newtcp6sk->inet6;
@@ -1299,7 +1328,8 @@ static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
/* Clone pktoptions received with SYN */
newnp->pktoptions = NULL;
if (treq->pktopts != NULL) {
- newnp->pktoptions = skb_clone(treq->pktopts, GFP_ATOMIC);
+ newnp->pktoptions = skb_clone(treq->pktopts,
+ sk_gfp_atomic(sk, GFP_ATOMIC));
consume_skb(treq->pktopts);
treq->pktopts = NULL;
if (newnp->pktoptions)
@@ -1332,9 +1362,7 @@ static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
newtp->advmss = tcp_sk(sk)->rx_opt.user_mss;
tcp_initialize_rcv_mss(newsk);
- if (tcp_rsk(req)->snt_synack)
- tcp_valid_rtt_meas(newsk,
- tcp_time_stamp - tcp_rsk(req)->snt_synack);
+ tcp_synack_rtt_meas(newsk, req);
newtp->total_retrans = req->retrans;
newinet->inet_daddr = newinet->inet_saddr = LOOPBACK4_IPV6;
@@ -1349,7 +1377,8 @@ static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
* across. Shucks.
*/
tcp_md5_do_add(newsk, (union tcp_md5_addr *)&newnp->daddr,
- AF_INET6, key->key, key->keylen, GFP_ATOMIC);
+ AF_INET6, key->key, key->keylen,
+ sk_gfp_atomic(sk, GFP_ATOMIC));
}
#endif
@@ -1442,10 +1471,20 @@ static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb)
--ANK (980728)
*/
if (np->rxopt.all)
- opt_skb = skb_clone(skb, GFP_ATOMIC);
+ opt_skb = skb_clone(skb, sk_gfp_atomic(sk, GFP_ATOMIC));
if (sk->sk_state == TCP_ESTABLISHED) { /* Fast path */
+ struct dst_entry *dst = sk->sk_rx_dst;
+
sock_rps_save_rxhash(sk, skb);
+ if (dst) {
+ if (inet_sk(sk)->rx_dst_ifindex != skb->skb_iif ||
+ dst->ops->check(dst, np->rx_dst_cookie) == NULL) {
+ dst_release(dst);
+ sk->sk_rx_dst = NULL;
+ }
+ }
+
if (tcp_rcv_established(sk, skb, tcp_hdr(skb), skb->len))
goto reset;
if (opt_skb)
@@ -1674,6 +1713,43 @@ do_time_wait:
goto discard_it;
}
+static void tcp_v6_early_demux(struct sk_buff *skb)
+{
+ const struct ipv6hdr *hdr;
+ const struct tcphdr *th;
+ struct sock *sk;
+
+ if (skb->pkt_type != PACKET_HOST)
+ return;
+
+ if (!pskb_may_pull(skb, skb_transport_offset(skb) + sizeof(struct tcphdr)))
+ return;
+
+ hdr = ipv6_hdr(skb);
+ th = tcp_hdr(skb);
+
+ if (th->doff < sizeof(struct tcphdr) / 4)
+ return;
+
+ sk = __inet6_lookup_established(dev_net(skb->dev), &tcp_hashinfo,
+ &hdr->saddr, th->source,
+ &hdr->daddr, ntohs(th->dest),
+ inet6_iif(skb));
+ if (sk) {
+ skb->sk = sk;
+ skb->destructor = sock_edemux;
+ if (sk->sk_state != TCP_TIME_WAIT) {
+ struct dst_entry *dst = sk->sk_rx_dst;
+ struct inet_sock *icsk = inet_sk(sk);
+ if (dst)
+ dst = dst_check(dst, inet6_sk(sk)->rx_dst_cookie);
+ if (dst &&
+ icsk->rx_dst_ifindex == skb->skb_iif)
+ skb_dst_set_noref(skb, dst);
+ }
+ }
+}
+
static struct timewait_sock_ops tcp6_timewait_sock_ops = {
.twsk_obj_size = sizeof(struct tcp6_timewait_sock),
.twsk_unique = tcp_twsk_unique,
@@ -1684,6 +1760,7 @@ static const struct inet_connection_sock_af_ops ipv6_specific = {
.queue_xmit = inet6_csk_xmit,
.send_check = tcp_v6_send_check,
.rebuild_header = inet6_sk_rebuild_header,
+ .sk_rx_dst_set = inet6_sk_rx_dst_set,
.conn_request = tcp_v6_conn_request,
.syn_recv_sock = tcp_v6_syn_recv_sock,
.net_header_len = sizeof(struct ipv6hdr),
@@ -1715,6 +1792,7 @@ static const struct inet_connection_sock_af_ops ipv6_mapped = {
.queue_xmit = ip_queue_xmit,
.send_check = tcp_v4_send_check,
.rebuild_header = inet_sk_rebuild_header,
+ .sk_rx_dst_set = inet_sk_rx_dst_set,
.conn_request = tcp_v6_conn_request,
.syn_recv_sock = tcp_v6_syn_recv_sock,
.net_header_len = sizeof(struct iphdr),
@@ -1764,7 +1842,7 @@ static void tcp_v6_destroy_sock(struct sock *sk)
#ifdef CONFIG_PROC_FS
/* Proc filesystem TCPv6 sock list dumping. */
static void get_openreq6(struct seq_file *seq,
- const struct sock *sk, struct request_sock *req, int i, int uid)
+ const struct sock *sk, struct request_sock *req, int i, kuid_t uid)
{
int ttd = req->expires - jiffies;
const struct in6_addr *src = &inet6_rsk(req)->loc_addr;
@@ -1788,7 +1866,7 @@ static void get_openreq6(struct seq_file *seq,
1, /* timers active (only the expire timer) */
jiffies_to_clock_t(ttd),
req->retrans,
- uid,
+ from_kuid_munged(seq_user_ns(seq), uid),
0, /* non standard timer */
0, /* open_requests have no inode */
0, req);
@@ -1836,9 +1914,9 @@ static void get_tcp6_sock(struct seq_file *seq, struct sock *sp, int i)
tp->write_seq-tp->snd_una,
(sp->sk_state == TCP_LISTEN) ? sp->sk_ack_backlog : (tp->rcv_nxt - tp->copied_seq),
timer_active,
- jiffies_to_clock_t(timer_expires - jiffies),
+ jiffies_delta_to_clock_t(timer_expires - jiffies),
icsk->icsk_retransmits,
- sock_i_uid(sp),
+ from_kuid_munged(seq_user_ns(seq), sock_i_uid(sp)),
icsk->icsk_probes_out,
sock_i_ino(sp),
atomic_read(&sp->sk_refcnt), sp,
@@ -1856,10 +1934,7 @@ static void get_timewait6_sock(struct seq_file *seq,
const struct in6_addr *dest, *src;
__u16 destp, srcp;
const struct inet6_timewait_sock *tw6 = inet6_twsk((struct sock *)tw);
- int ttd = tw->tw_ttd - jiffies;
-
- if (ttd < 0)
- ttd = 0;
+ long delta = tw->tw_ttd - jiffies;
dest = &tw6->tw_v6_daddr;
src = &tw6->tw_v6_rcv_saddr;
@@ -1875,7 +1950,7 @@ static void get_timewait6_sock(struct seq_file *seq,
dest->s6_addr32[0], dest->s6_addr32[1],
dest->s6_addr32[2], dest->s6_addr32[3], destp,
tw->tw_substate, 0, 0,
- 3, jiffies_to_clock_t(ttd), 0, 0, 0, 0,
+ 3, jiffies_delta_to_clock_t(delta), 0, 0, 0, 0,
atomic_read(&tw->tw_refcnt), tw);
}
@@ -1978,12 +2053,13 @@ struct proto tcpv6_prot = {
.compat_setsockopt = compat_tcp_setsockopt,
.compat_getsockopt = compat_tcp_getsockopt,
#endif
-#ifdef CONFIG_CGROUP_MEM_RES_CTLR_KMEM
+#ifdef CONFIG_MEMCG_KMEM
.proto_cgroup = tcp_proto_cgroup,
#endif
};
static const struct inet6_protocol tcpv6_protocol = {
+ .early_demux = tcp_v6_early_demux,
.handler = tcp_v6_rcv,
.err_handler = tcp_v6_err,
.gso_send_check = tcp_v6_gso_send_check,
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index 99d0077b56b8..fc9997260a6b 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -394,6 +394,17 @@ try_again:
}
if (unlikely(err)) {
trace_kfree_skb(skb, udpv6_recvmsg);
+ if (!peeked) {
+ atomic_inc(&sk->sk_drops);
+ if (is_udp4)
+ UDP_INC_STATS_USER(sock_net(sk),
+ UDP_MIB_INERRORS,
+ is_udplite);
+ else
+ UDP6_INC_STATS_USER(sock_net(sk),
+ UDP_MIB_INERRORS,
+ is_udplite);
+ }
goto out_free;
}
if (!peeked) {
@@ -1458,7 +1469,8 @@ static void udp6_sock_seq_show(struct seq_file *seq, struct sock *sp, int bucket
sk_wmem_alloc_get(sp),
sk_rmem_alloc_get(sp),
0, 0L, 0,
- sock_i_uid(sp), 0,
+ from_kuid_munged(seq_user_ns(seq), sock_i_uid(sp)),
+ 0,
sock_i_ino(sp),
atomic_read(&sp->sk_refcnt), sp,
atomic_read(&sp->sk_drops));
diff --git a/net/ipv6/xfrm6_policy.c b/net/ipv6/xfrm6_policy.c
index ef39812107b1..f8c4c08ffb60 100644
--- a/net/ipv6/xfrm6_policy.c
+++ b/net/ipv6/xfrm6_policy.c
@@ -73,6 +73,13 @@ static int xfrm6_get_tos(const struct flowi *fl)
return 0;
}
+static void xfrm6_init_dst(struct net *net, struct xfrm_dst *xdst)
+{
+ struct rt6_info *rt = (struct rt6_info *)xdst;
+
+ rt6_init_peer(rt, net->ipv6.peers);
+}
+
static int xfrm6_init_path(struct xfrm_dst *path, struct dst_entry *dst,
int nfheader_len)
{
@@ -286,6 +293,7 @@ static struct xfrm_policy_afinfo xfrm6_policy_afinfo = {
.get_saddr = xfrm6_get_saddr,
.decode_session = _decode_session6,
.get_tos = xfrm6_get_tos,
+ .init_dst = xfrm6_init_dst,
.init_path = xfrm6_init_path,
.fill_dst = xfrm6_fill_dst,
.blackhole_route = ip6_blackhole_route,
diff --git a/net/ipx/ipx_proc.c b/net/ipx/ipx_proc.c
index f8ba30dfecae..02ff7f2f60d4 100644
--- a/net/ipx/ipx_proc.c
+++ b/net/ipx/ipx_proc.c
@@ -217,7 +217,8 @@ static int ipx_seq_socket_show(struct seq_file *seq, void *v)
seq_printf(seq, "%08X %08X %02X %03d\n",
sk_wmem_alloc_get(s),
sk_rmem_alloc_get(s),
- s->sk_state, SOCK_INODE(s->sk_socket)->i_uid);
+ s->sk_state,
+ from_kuid_munged(seq_user_ns(seq), sock_i_uid(s)));
out:
return 0;
}
diff --git a/net/irda/af_irda.c b/net/irda/af_irda.c
index bb738c9f9146..b833677d83d6 100644
--- a/net/irda/af_irda.c
+++ b/net/irda/af_irda.c
@@ -468,7 +468,7 @@ static int irda_open_tsap(struct irda_sock *self, __u8 tsap_sel, char *name)
notify_t notify;
if (self->tsap) {
- IRDA_WARNING("%s: busy!\n", __func__);
+ IRDA_DEBUG(0, "%s: busy!\n", __func__);
return -EBUSY;
}
diff --git a/net/irda/ircomm/ircomm_param.c b/net/irda/ircomm/ircomm_param.c
index 8b915f3ac3b9..308939128359 100644
--- a/net/irda/ircomm/ircomm_param.c
+++ b/net/irda/ircomm/ircomm_param.c
@@ -99,7 +99,6 @@ pi_param_info_t ircomm_param_info = { pi_major_call_table, 3, 0x0f, 4 };
*/
int ircomm_param_request(struct ircomm_tty_cb *self, __u8 pi, int flush)
{
- struct tty_struct *tty;
unsigned long flags;
struct sk_buff *skb;
int count;
@@ -109,10 +108,6 @@ int ircomm_param_request(struct ircomm_tty_cb *self, __u8 pi, int flush)
IRDA_ASSERT(self != NULL, return -1;);
IRDA_ASSERT(self->magic == IRCOMM_TTY_MAGIC, return -1;);
- tty = self->tty;
- if (!tty)
- return 0;
-
/* Make sure we don't send parameters for raw mode */
if (self->service_type == IRCOMM_3_WIRE_RAW)
return 0;
diff --git a/net/irda/ircomm/ircomm_tty.c b/net/irda/ircomm/ircomm_tty.c
index 6b9d5a0e42f9..95a3a7a336ba 100644
--- a/net/irda/ircomm/ircomm_tty.c
+++ b/net/irda/ircomm/ircomm_tty.c
@@ -52,6 +52,8 @@
#include <net/irda/ircomm_tty_attach.h>
#include <net/irda/ircomm_tty.h>
+static int ircomm_tty_install(struct tty_driver *driver,
+ struct tty_struct *tty);
static int ircomm_tty_open(struct tty_struct *tty, struct file *filp);
static void ircomm_tty_close(struct tty_struct * tty, struct file *filp);
static int ircomm_tty_write(struct tty_struct * tty,
@@ -82,6 +84,7 @@ static struct tty_driver *driver;
static hashbin_t *ircomm_tty = NULL;
static const struct tty_operations ops = {
+ .install = ircomm_tty_install,
.open = ircomm_tty_open,
.close = ircomm_tty_close,
.write = ircomm_tty_write,
@@ -104,6 +107,35 @@ static const struct tty_operations ops = {
#endif /* CONFIG_PROC_FS */
};
+static void ircomm_port_raise_dtr_rts(struct tty_port *port, int raise)
+{
+ struct ircomm_tty_cb *self = container_of(port, struct ircomm_tty_cb,
+ port);
+ /*
+ * Here, we use to lock those two guys, but as ircomm_param_request()
+ * does it itself, I don't see the point (and I see the deadlock).
+ * Jean II
+ */
+ if (raise)
+ self->settings.dte |= IRCOMM_RTS | IRCOMM_DTR;
+ else
+ self->settings.dte &= ~(IRCOMM_RTS | IRCOMM_DTR);
+
+ ircomm_param_request(self, IRCOMM_DTE, TRUE);
+}
+
+static int ircomm_port_carrier_raised(struct tty_port *port)
+{
+ struct ircomm_tty_cb *self = container_of(port, struct ircomm_tty_cb,
+ port);
+ return self->settings.dce & IRCOMM_CD;
+}
+
+static const struct tty_port_operations ircomm_port_ops = {
+ .dtr_rts = ircomm_port_raise_dtr_rts,
+ .carrier_raised = ircomm_port_carrier_raised,
+};
+
/*
* Function ircomm_tty_init()
*
@@ -194,7 +226,7 @@ static int ircomm_tty_startup(struct ircomm_tty_cb *self)
IRDA_ASSERT(self->magic == IRCOMM_TTY_MAGIC, return -1;);
/* Check if already open */
- if (test_and_set_bit(ASYNC_B_INITIALIZED, &self->flags)) {
+ if (test_and_set_bit(ASYNCB_INITIALIZED, &self->port.flags)) {
IRDA_DEBUG(2, "%s(), already open so break out!\n", __func__ );
return 0;
}
@@ -231,7 +263,7 @@ static int ircomm_tty_startup(struct ircomm_tty_cb *self)
return 0;
err:
- clear_bit(ASYNC_B_INITIALIZED, &self->flags);
+ clear_bit(ASYNCB_INITIALIZED, &self->port.flags);
return ret;
}
@@ -242,72 +274,62 @@ err:
*
*/
static int ircomm_tty_block_til_ready(struct ircomm_tty_cb *self,
- struct file *filp)
+ struct tty_struct *tty, struct file *filp)
{
+ struct tty_port *port = &self->port;
DECLARE_WAITQUEUE(wait, current);
int retval;
int do_clocal = 0, extra_count = 0;
unsigned long flags;
- struct tty_struct *tty;
IRDA_DEBUG(2, "%s()\n", __func__ );
- tty = self->tty;
-
/*
* If non-blocking mode is set, or the port is not enabled,
* then make the check up front and then exit.
*/
if (filp->f_flags & O_NONBLOCK || tty->flags & (1 << TTY_IO_ERROR)){
/* nonblock mode is set or port is not enabled */
- self->flags |= ASYNC_NORMAL_ACTIVE;
+ port->flags |= ASYNC_NORMAL_ACTIVE;
IRDA_DEBUG(1, "%s(), O_NONBLOCK requested!\n", __func__ );
return 0;
}
- if (tty->termios->c_cflag & CLOCAL) {
+ if (tty->termios.c_cflag & CLOCAL) {
IRDA_DEBUG(1, "%s(), doing CLOCAL!\n", __func__ );
do_clocal = 1;
}
/* Wait for carrier detect and the line to become
* free (i.e., not in use by the callout). While we are in
- * this loop, self->open_count is dropped by one, so that
+ * this loop, port->count is dropped by one, so that
* mgsl_close() knows when to free things. We restore it upon
* exit, either normal or abnormal.
*/
retval = 0;
- add_wait_queue(&self->open_wait, &wait);
+ add_wait_queue(&port->open_wait, &wait);
IRDA_DEBUG(2, "%s(%d):block_til_ready before block on %s open_count=%d\n",
- __FILE__,__LINE__, tty->driver->name, self->open_count );
+ __FILE__, __LINE__, tty->driver->name, port->count);
- /* As far as I can see, we protect open_count - Jean II */
- spin_lock_irqsave(&self->spinlock, flags);
+ spin_lock_irqsave(&port->lock, flags);
if (!tty_hung_up_p(filp)) {
extra_count = 1;
- self->open_count--;
+ port->count--;
}
- spin_unlock_irqrestore(&self->spinlock, flags);
- self->blocked_open++;
+ spin_unlock_irqrestore(&port->lock, flags);
+ port->blocked_open++;
while (1) {
- if (tty->termios->c_cflag & CBAUD) {
- /* Here, we use to lock those two guys, but
- * as ircomm_param_request() does it itself,
- * I don't see the point (and I see the deadlock).
- * Jean II */
- self->settings.dte |= IRCOMM_RTS + IRCOMM_DTR;
-
- ircomm_param_request(self, IRCOMM_DTE, TRUE);
- }
+ if (tty->termios.c_cflag & CBAUD)
+ tty_port_raise_dtr_rts(port);
current->state = TASK_INTERRUPTIBLE;
if (tty_hung_up_p(filp) ||
- !test_bit(ASYNC_B_INITIALIZED, &self->flags)) {
- retval = (self->flags & ASYNC_HUP_NOTIFY) ?
+ !test_bit(ASYNCB_INITIALIZED, &port->flags)) {
+ retval = (port->flags & ASYNC_HUP_NOTIFY) ?
-EAGAIN : -ERESTARTSYS;
break;
}
@@ -317,8 +339,8 @@ static int ircomm_tty_block_til_ready(struct ircomm_tty_cb *self,
* specified, we cannot return before the IrCOMM link is
* ready
*/
- if (!test_bit(ASYNC_B_CLOSING, &self->flags) &&
- (do_clocal || (self->settings.dce & IRCOMM_CD)) &&
+ if (!test_bit(ASYNCB_CLOSING, &port->flags) &&
+ (do_clocal || tty_port_carrier_raised(port)) &&
self->state == IRCOMM_TTY_READY)
{
break;
@@ -330,46 +352,36 @@ static int ircomm_tty_block_til_ready(struct ircomm_tty_cb *self,
}
IRDA_DEBUG(1, "%s(%d):block_til_ready blocking on %s open_count=%d\n",
- __FILE__,__LINE__, tty->driver->name, self->open_count );
+ __FILE__, __LINE__, tty->driver->name, port->count);
schedule();
}
__set_current_state(TASK_RUNNING);
- remove_wait_queue(&self->open_wait, &wait);
+ remove_wait_queue(&port->open_wait, &wait);
if (extra_count) {
/* ++ is not atomic, so this should be protected - Jean II */
- spin_lock_irqsave(&self->spinlock, flags);
- self->open_count++;
- spin_unlock_irqrestore(&self->spinlock, flags);
+ spin_lock_irqsave(&port->lock, flags);
+ port->count++;
+ spin_unlock_irqrestore(&port->lock, flags);
}
- self->blocked_open--;
+ port->blocked_open--;
IRDA_DEBUG(1, "%s(%d):block_til_ready after blocking on %s open_count=%d\n",
- __FILE__,__LINE__, tty->driver->name, self->open_count);
+ __FILE__, __LINE__, tty->driver->name, port->count);
if (!retval)
- self->flags |= ASYNC_NORMAL_ACTIVE;
+ port->flags |= ASYNC_NORMAL_ACTIVE;
return retval;
}
-/*
- * Function ircomm_tty_open (tty, filp)
- *
- * This routine is called when a particular tty device is opened. This
- * routine is mandatory; if this routine is not filled in, the attempted
- * open will fail with ENODEV.
- */
-static int ircomm_tty_open(struct tty_struct *tty, struct file *filp)
+
+static int ircomm_tty_install(struct tty_driver *driver, struct tty_struct *tty)
{
struct ircomm_tty_cb *self;
unsigned int line = tty->index;
- unsigned long flags;
- int ret;
-
- IRDA_DEBUG(2, "%s()\n", __func__ );
/* Check if instance already exists */
self = hashbin_lock_find(ircomm_tty, line, NULL);
@@ -381,6 +393,8 @@ static int ircomm_tty_open(struct tty_struct *tty, struct file *filp)
return -ENOMEM;
}
+ tty_port_init(&self->port);
+ self->port.ops = &ircomm_port_ops;
self->magic = IRCOMM_TTY_MAGIC;
self->flow = FLOW_STOP;
@@ -388,13 +402,9 @@ static int ircomm_tty_open(struct tty_struct *tty, struct file *filp)
INIT_WORK(&self->tqueue, ircomm_tty_do_softint);
self->max_header_size = IRCOMM_TTY_HDR_UNINITIALISED;
self->max_data_size = IRCOMM_TTY_DATA_UNINITIALISED;
- self->close_delay = 5*HZ/10;
- self->closing_wait = 30*HZ;
/* Init some important stuff */
init_timer(&self->watchdog_timer);
- init_waitqueue_head(&self->open_wait);
- init_waitqueue_head(&self->close_wait);
spin_lock_init(&self->spinlock);
/*
@@ -404,31 +414,48 @@ static int ircomm_tty_open(struct tty_struct *tty, struct file *filp)
*
* Note this is completely usafe and doesn't work properly
*/
- tty->termios->c_iflag = 0;
- tty->termios->c_oflag = 0;
+ tty->termios.c_iflag = 0;
+ tty->termios.c_oflag = 0;
/* Insert into hash */
hashbin_insert(ircomm_tty, (irda_queue_t *) self, line, NULL);
}
- /* ++ is not atomic, so this should be protected - Jean II */
- spin_lock_irqsave(&self->spinlock, flags);
- self->open_count++;
- tty->driver_data = self;
- self->tty = tty;
- spin_unlock_irqrestore(&self->spinlock, flags);
+ return tty_port_install(&self->port, driver, tty);
+}
+
+/*
+ * Function ircomm_tty_open (tty, filp)
+ *
+ * This routine is called when a particular tty device is opened. This
+ * routine is mandatory; if this routine is not filled in, the attempted
+ * open will fail with ENODEV.
+ */
+static int ircomm_tty_open(struct tty_struct *tty, struct file *filp)
+{
+ struct ircomm_tty_cb *self = tty->driver_data;
+ unsigned long flags;
+ int ret;
+
+ IRDA_DEBUG(2, "%s()\n", __func__ );
+
+ /* ++ is not atomic, so this should be protected - Jean II */
+ spin_lock_irqsave(&self->port.lock, flags);
+ self->port.count++;
+ spin_unlock_irqrestore(&self->port.lock, flags);
+ tty_port_tty_set(&self->port, tty);
IRDA_DEBUG(1, "%s(), %s%d, count = %d\n", __func__ , tty->driver->name,
- self->line, self->open_count);
+ self->line, self->port.count);
/* Not really used by us, but lets do it anyway */
- self->tty->low_latency = (self->flags & ASYNC_LOW_LATENCY) ? 1 : 0;
+ tty->low_latency = (self->port.flags & ASYNC_LOW_LATENCY) ? 1 : 0;
/*
* If the port is the middle of closing, bail out now
*/
if (tty_hung_up_p(filp) ||
- test_bit(ASYNC_B_CLOSING, &self->flags)) {
+ test_bit(ASYNCB_CLOSING, &self->port.flags)) {
/* Hm, why are we blocking on ASYNC_CLOSING if we
* do return -EAGAIN/-ERESTARTSYS below anyway?
@@ -438,14 +465,15 @@ static int ircomm_tty_open(struct tty_struct *tty, struct file *filp)
* probably better sleep uninterruptible?
*/
- if (wait_event_interruptible(self->close_wait, !test_bit(ASYNC_B_CLOSING, &self->flags))) {
+ if (wait_event_interruptible(self->port.close_wait,
+ !test_bit(ASYNCB_CLOSING, &self->port.flags))) {
IRDA_WARNING("%s - got signal while blocking on ASYNC_CLOSING!\n",
__func__);
return -ERESTARTSYS;
}
#ifdef SERIAL_DO_RESTART
- return (self->flags & ASYNC_HUP_NOTIFY) ?
+ return (self->port.flags & ASYNC_HUP_NOTIFY) ?
-EAGAIN : -ERESTARTSYS;
#else
return -EAGAIN;
@@ -453,7 +481,7 @@ static int ircomm_tty_open(struct tty_struct *tty, struct file *filp)
}
/* Check if this is a "normal" ircomm device, or an irlpt device */
- if (line < 0x10) {
+ if (self->line < 0x10) {
self->service_type = IRCOMM_3_WIRE | IRCOMM_9_WIRE;
self->settings.service_type = IRCOMM_9_WIRE; /* 9 wire as default */
/* Jan Kiszka -> add DSR/RI -> Conform to IrCOMM spec */
@@ -469,7 +497,7 @@ static int ircomm_tty_open(struct tty_struct *tty, struct file *filp)
if (ret)
return ret;
- ret = ircomm_tty_block_til_ready(self, filp);
+ ret = ircomm_tty_block_til_ready(self, tty, filp);
if (ret) {
IRDA_DEBUG(2,
"%s(), returning after block_til_ready with %d\n", __func__ ,
@@ -489,81 +517,22 @@ static int ircomm_tty_open(struct tty_struct *tty, struct file *filp)
static void ircomm_tty_close(struct tty_struct *tty, struct file *filp)
{
struct ircomm_tty_cb *self = (struct ircomm_tty_cb *) tty->driver_data;
- unsigned long flags;
+ struct tty_port *port = &self->port;
IRDA_DEBUG(0, "%s()\n", __func__ );
IRDA_ASSERT(self != NULL, return;);
IRDA_ASSERT(self->magic == IRCOMM_TTY_MAGIC, return;);
- spin_lock_irqsave(&self->spinlock, flags);
-
- if (tty_hung_up_p(filp)) {
- spin_unlock_irqrestore(&self->spinlock, flags);
-
- IRDA_DEBUG(0, "%s(), returning 1\n", __func__ );
- return;
- }
-
- if ((tty->count == 1) && (self->open_count != 1)) {
- /*
- * Uh, oh. tty->count is 1, which means that the tty
- * structure will be freed. state->count should always
- * be one in these conditions. If it's greater than
- * one, we've got real problems, since it means the
- * serial port won't be shutdown.
- */
- IRDA_DEBUG(0, "%s(), bad serial port count; "
- "tty->count is 1, state->count is %d\n", __func__ ,
- self->open_count);
- self->open_count = 1;
- }
-
- if (--self->open_count < 0) {
- IRDA_ERROR("%s(), bad serial port count for ttys%d: %d\n",
- __func__, self->line, self->open_count);
- self->open_count = 0;
- }
- if (self->open_count) {
- spin_unlock_irqrestore(&self->spinlock, flags);
-
- IRDA_DEBUG(0, "%s(), open count > 0\n", __func__ );
+ if (tty_port_close_start(port, tty, filp) == 0)
return;
- }
-
- /* Hum... Should be test_and_set_bit ??? - Jean II */
- set_bit(ASYNC_B_CLOSING, &self->flags);
-
- /* We need to unlock here (we were unlocking at the end of this
- * function), because tty_wait_until_sent() may schedule.
- * I don't know if the rest should be protected somehow,
- * so someone should check. - Jean II */
- spin_unlock_irqrestore(&self->spinlock, flags);
-
- /*
- * Now we wait for the transmit buffer to clear; and we notify
- * the line discipline to only process XON/XOFF characters.
- */
- tty->closing = 1;
- if (self->closing_wait != ASYNC_CLOSING_WAIT_NONE)
- tty_wait_until_sent_from_close(tty, self->closing_wait);
ircomm_tty_shutdown(self);
tty_driver_flush_buffer(tty);
- tty_ldisc_flush(tty);
-
- tty->closing = 0;
- self->tty = NULL;
- if (self->blocked_open) {
- if (self->close_delay)
- schedule_timeout_interruptible(self->close_delay);
- wake_up_interruptible(&self->open_wait);
- }
-
- self->flags &= ~(ASYNC_NORMAL_ACTIVE|ASYNC_CLOSING);
- wake_up_interruptible(&self->close_wait);
+ tty_port_close_end(port, tty);
+ tty_port_tty_set(port, NULL);
}
/*
@@ -606,7 +575,7 @@ static void ircomm_tty_do_softint(struct work_struct *work)
if (!self || self->magic != IRCOMM_TTY_MAGIC)
return;
- tty = self->tty;
+ tty = tty_port_tty_get(&self->port);
if (!tty)
return;
@@ -627,7 +596,7 @@ static void ircomm_tty_do_softint(struct work_struct *work)
}
if (tty->hw_stopped)
- return;
+ goto put;
/* Unlink transmit buffer */
spin_lock_irqsave(&self->spinlock, flags);
@@ -646,6 +615,8 @@ static void ircomm_tty_do_softint(struct work_struct *work)
/* Check if user (still) wants to be waken up */
tty_wakeup(tty);
+put:
+ tty_kref_put(tty);
}
/*
@@ -880,7 +851,7 @@ static void ircomm_tty_throttle(struct tty_struct *tty)
ircomm_tty_send_xchar(tty, STOP_CHAR(tty));
/* Hardware flow control? */
- if (tty->termios->c_cflag & CRTSCTS) {
+ if (tty->termios.c_cflag & CRTSCTS) {
self->settings.dte &= ~IRCOMM_RTS;
self->settings.dte |= IRCOMM_DELTA_RTS;
@@ -912,7 +883,7 @@ static void ircomm_tty_unthrottle(struct tty_struct *tty)
}
/* Using hardware flow control? */
- if (tty->termios->c_cflag & CRTSCTS) {
+ if (tty->termios.c_cflag & CRTSCTS) {
self->settings.dte |= (IRCOMM_RTS|IRCOMM_DELTA_RTS);
ircomm_param_request(self, IRCOMM_DTE, TRUE);
@@ -955,7 +926,7 @@ static void ircomm_tty_shutdown(struct ircomm_tty_cb *self)
IRDA_DEBUG(0, "%s()\n", __func__ );
- if (!test_and_clear_bit(ASYNC_B_INITIALIZED, &self->flags))
+ if (!test_and_clear_bit(ASYNCB_INITIALIZED, &self->port.flags))
return;
ircomm_tty_detach_cable(self);
@@ -994,6 +965,7 @@ static void ircomm_tty_shutdown(struct ircomm_tty_cb *self)
static void ircomm_tty_hangup(struct tty_struct *tty)
{
struct ircomm_tty_cb *self = (struct ircomm_tty_cb *) tty->driver_data;
+ struct tty_port *port = &self->port;
unsigned long flags;
IRDA_DEBUG(0, "%s()\n", __func__ );
@@ -1004,14 +976,17 @@ static void ircomm_tty_hangup(struct tty_struct *tty)
/* ircomm_tty_flush_buffer(tty); */
ircomm_tty_shutdown(self);
- /* I guess we need to lock here - Jean II */
- spin_lock_irqsave(&self->spinlock, flags);
- self->flags &= ~ASYNC_NORMAL_ACTIVE;
- self->tty = NULL;
- self->open_count = 0;
- spin_unlock_irqrestore(&self->spinlock, flags);
+ spin_lock_irqsave(&port->lock, flags);
+ port->flags &= ~ASYNC_NORMAL_ACTIVE;
+ if (port->tty) {
+ set_bit(TTY_IO_ERROR, &port->tty->flags);
+ tty_kref_put(port->tty);
+ }
+ port->tty = NULL;
+ port->count = 0;
+ spin_unlock_irqrestore(&port->lock, flags);
- wake_up_interruptible(&self->open_wait);
+ wake_up_interruptible(&port->open_wait);
}
/*
@@ -1071,20 +1046,20 @@ void ircomm_tty_check_modem_status(struct ircomm_tty_cb *self)
IRDA_ASSERT(self != NULL, return;);
IRDA_ASSERT(self->magic == IRCOMM_TTY_MAGIC, return;);
- tty = self->tty;
+ tty = tty_port_tty_get(&self->port);
status = self->settings.dce;
if (status & IRCOMM_DCE_DELTA_ANY) {
/*wake_up_interruptible(&self->delta_msr_wait);*/
}
- if ((self->flags & ASYNC_CHECK_CD) && (status & IRCOMM_DELTA_CD)) {
+ if ((self->port.flags & ASYNC_CHECK_CD) && (status & IRCOMM_DELTA_CD)) {
IRDA_DEBUG(2,
"%s(), ircomm%d CD now %s...\n", __func__ , self->line,
(status & IRCOMM_CD) ? "on" : "off");
if (status & IRCOMM_CD) {
- wake_up_interruptible(&self->open_wait);
+ wake_up_interruptible(&self->port.open_wait);
} else {
IRDA_DEBUG(2,
"%s(), Doing serial hangup..\n", __func__ );
@@ -1092,10 +1067,10 @@ void ircomm_tty_check_modem_status(struct ircomm_tty_cb *self)
tty_hangup(tty);
/* Hangup will remote the tty, so better break out */
- return;
+ goto put;
}
}
- if (self->flags & ASYNC_CTS_FLOW) {
+ if (tty && tty_port_cts_enabled(&self->port)) {
if (tty->hw_stopped) {
if (status & IRCOMM_CTS) {
IRDA_DEBUG(2,
@@ -1103,10 +1078,10 @@ void ircomm_tty_check_modem_status(struct ircomm_tty_cb *self)
tty->hw_stopped = 0;
/* Wake up processes blocked on open */
- wake_up_interruptible(&self->open_wait);
+ wake_up_interruptible(&self->port.open_wait);
schedule_work(&self->tqueue);
- return;
+ goto put;
}
} else {
if (!(status & IRCOMM_CTS)) {
@@ -1116,6 +1091,8 @@ void ircomm_tty_check_modem_status(struct ircomm_tty_cb *self)
}
}
}
+put:
+ tty_kref_put(tty);
}
/*
@@ -1128,6 +1105,7 @@ static int ircomm_tty_data_indication(void *instance, void *sap,
struct sk_buff *skb)
{
struct ircomm_tty_cb *self = (struct ircomm_tty_cb *) instance;
+ struct tty_struct *tty;
IRDA_DEBUG(2, "%s()\n", __func__ );
@@ -1135,7 +1113,8 @@ static int ircomm_tty_data_indication(void *instance, void *sap,
IRDA_ASSERT(self->magic == IRCOMM_TTY_MAGIC, return -1;);
IRDA_ASSERT(skb != NULL, return -1;);
- if (!self->tty) {
+ tty = tty_port_tty_get(&self->port);
+ if (!tty) {
IRDA_DEBUG(0, "%s(), no tty!\n", __func__ );
return 0;
}
@@ -1146,7 +1125,7 @@ static int ircomm_tty_data_indication(void *instance, void *sap,
* Devices like WinCE can do this, and since they don't send any
* params, we can just as well declare the hardware for running.
*/
- if (self->tty->hw_stopped && (self->flow == FLOW_START)) {
+ if (tty->hw_stopped && (self->flow == FLOW_START)) {
IRDA_DEBUG(0, "%s(), polling for line settings!\n", __func__ );
ircomm_param_request(self, IRCOMM_POLL, TRUE);
@@ -1159,8 +1138,9 @@ static int ircomm_tty_data_indication(void *instance, void *sap,
* Use flip buffer functions since the code may be called from interrupt
* context
*/
- tty_insert_flip_string(self->tty, skb->data, skb->len);
- tty_flip_buffer_push(self->tty);
+ tty_insert_flip_string(tty, skb->data, skb->len);
+ tty_flip_buffer_push(tty);
+ tty_kref_put(tty);
/* No need to kfree_skb - see ircomm_ttp_data_indication() */
@@ -1211,12 +1191,13 @@ static void ircomm_tty_flow_indication(void *instance, void *sap,
IRDA_ASSERT(self != NULL, return;);
IRDA_ASSERT(self->magic == IRCOMM_TTY_MAGIC, return;);
- tty = self->tty;
+ tty = tty_port_tty_get(&self->port);
switch (cmd) {
case FLOW_START:
IRDA_DEBUG(2, "%s(), hw start!\n", __func__ );
- tty->hw_stopped = 0;
+ if (tty)
+ tty->hw_stopped = 0;
/* ircomm_tty_do_softint will take care of the rest */
schedule_work(&self->tqueue);
@@ -1224,15 +1205,19 @@ static void ircomm_tty_flow_indication(void *instance, void *sap,
default: /* If we get here, something is very wrong, better stop */
case FLOW_STOP:
IRDA_DEBUG(2, "%s(), hw stopped!\n", __func__ );
- tty->hw_stopped = 1;
+ if (tty)
+ tty->hw_stopped = 1;
break;
}
+
+ tty_kref_put(tty);
self->flow = cmd;
}
#ifdef CONFIG_PROC_FS
static void ircomm_tty_line_info(struct ircomm_tty_cb *self, struct seq_file *m)
{
+ struct tty_struct *tty;
char sep;
seq_printf(m, "State: %s\n", ircomm_tty_state[self->state]);
@@ -1328,40 +1313,43 @@ static void ircomm_tty_line_info(struct ircomm_tty_cb *self, struct seq_file *m)
seq_puts(m, "Flags:");
sep = ' ';
- if (self->flags & ASYNC_CTS_FLOW) {
+ if (tty_port_cts_enabled(&self->port)) {
seq_printf(m, "%cASYNC_CTS_FLOW", sep);
sep = '|';
}
- if (self->flags & ASYNC_CHECK_CD) {
+ if (self->port.flags & ASYNC_CHECK_CD) {
seq_printf(m, "%cASYNC_CHECK_CD", sep);
sep = '|';
}
- if (self->flags & ASYNC_INITIALIZED) {
+ if (self->port.flags & ASYNC_INITIALIZED) {
seq_printf(m, "%cASYNC_INITIALIZED", sep);
sep = '|';
}
- if (self->flags & ASYNC_LOW_LATENCY) {
+ if (self->port.flags & ASYNC_LOW_LATENCY) {
seq_printf(m, "%cASYNC_LOW_LATENCY", sep);
sep = '|';
}
- if (self->flags & ASYNC_CLOSING) {
+ if (self->port.flags & ASYNC_CLOSING) {
seq_printf(m, "%cASYNC_CLOSING", sep);
sep = '|';
}
- if (self->flags & ASYNC_NORMAL_ACTIVE) {
+ if (self->port.flags & ASYNC_NORMAL_ACTIVE) {
seq_printf(m, "%cASYNC_NORMAL_ACTIVE", sep);
sep = '|';
}
seq_putc(m, '\n');
seq_printf(m, "Role: %s\n", self->client ? "client" : "server");
- seq_printf(m, "Open count: %d\n", self->open_count);
+ seq_printf(m, "Open count: %d\n", self->port.count);
seq_printf(m, "Max data size: %d\n", self->max_data_size);
seq_printf(m, "Max header size: %d\n", self->max_header_size);
- if (self->tty)
+ tty = tty_port_tty_get(&self->port);
+ if (tty) {
seq_printf(m, "Hardware: %s\n",
- self->tty->hw_stopped ? "Stopped" : "Running");
+ tty->hw_stopped ? "Stopped" : "Running");
+ tty_kref_put(tty);
+ }
}
static int ircomm_tty_proc_show(struct seq_file *m, void *v)
diff --git a/net/irda/ircomm/ircomm_tty_attach.c b/net/irda/ircomm/ircomm_tty_attach.c
index b65d66e0d817..edab393e0c82 100644
--- a/net/irda/ircomm/ircomm_tty_attach.c
+++ b/net/irda/ircomm/ircomm_tty_attach.c
@@ -130,6 +130,8 @@ static int (*state[])(struct ircomm_tty_cb *self, IRCOMM_TTY_EVENT event,
*/
int ircomm_tty_attach_cable(struct ircomm_tty_cb *self)
{
+ struct tty_struct *tty;
+
IRDA_DEBUG(0, "%s()\n", __func__ );
IRDA_ASSERT(self != NULL, return -1;);
@@ -142,7 +144,11 @@ int ircomm_tty_attach_cable(struct ircomm_tty_cb *self)
}
/* Make sure nobody tries to write before the link is up */
- self->tty->hw_stopped = 1;
+ tty = tty_port_tty_get(&self->port);
+ if (tty) {
+ tty->hw_stopped = 1;
+ tty_kref_put(tty);
+ }
ircomm_tty_ias_register(self);
@@ -398,23 +404,26 @@ void ircomm_tty_disconnect_indication(void *instance, void *sap,
struct sk_buff *skb)
{
struct ircomm_tty_cb *self = (struct ircomm_tty_cb *) instance;
+ struct tty_struct *tty;
IRDA_DEBUG(2, "%s()\n", __func__ );
IRDA_ASSERT(self != NULL, return;);
IRDA_ASSERT(self->magic == IRCOMM_TTY_MAGIC, return;);
- if (!self->tty)
+ tty = tty_port_tty_get(&self->port);
+ if (!tty)
return;
/* This will stop control data transfers */
self->flow = FLOW_STOP;
/* Stop data transfers */
- self->tty->hw_stopped = 1;
+ tty->hw_stopped = 1;
ircomm_tty_do_event(self, IRCOMM_TTY_DISCONNECT_INDICATION, NULL,
NULL);
+ tty_kref_put(tty);
}
/*
@@ -550,12 +559,15 @@ void ircomm_tty_connect_indication(void *instance, void *sap,
*/
void ircomm_tty_link_established(struct ircomm_tty_cb *self)
{
+ struct tty_struct *tty;
+
IRDA_DEBUG(2, "%s()\n", __func__ );
IRDA_ASSERT(self != NULL, return;);
IRDA_ASSERT(self->magic == IRCOMM_TTY_MAGIC, return;);
- if (!self->tty)
+ tty = tty_port_tty_get(&self->port);
+ if (!tty)
return;
del_timer(&self->watchdog_timer);
@@ -566,19 +578,22 @@ void ircomm_tty_link_established(struct ircomm_tty_cb *self)
* will have to wait for the peer device (DCE) to raise the CTS
* line.
*/
- if ((self->flags & ASYNC_CTS_FLOW) && ((self->settings.dce & IRCOMM_CTS) == 0)) {
+ if (tty_port_cts_enabled(&self->port) &&
+ ((self->settings.dce & IRCOMM_CTS) == 0)) {
IRDA_DEBUG(0, "%s(), waiting for CTS ...\n", __func__ );
- return;
+ goto put;
} else {
IRDA_DEBUG(1, "%s(), starting hardware!\n", __func__ );
- self->tty->hw_stopped = 0;
+ tty->hw_stopped = 0;
/* Wake up processes blocked on open */
- wake_up_interruptible(&self->open_wait);
+ wake_up_interruptible(&self->port.open_wait);
}
schedule_work(&self->tqueue);
+put:
+ tty_kref_put(tty);
}
/*
@@ -977,14 +992,17 @@ static int ircomm_tty_state_ready(struct ircomm_tty_cb *self,
ircomm_tty_next_state(self, IRCOMM_TTY_SEARCH);
ircomm_tty_start_watchdog_timer(self, 3*HZ);
- if (self->flags & ASYNC_CHECK_CD) {
+ if (self->port.flags & ASYNC_CHECK_CD) {
/* Drop carrier */
self->settings.dce = IRCOMM_DELTA_CD;
ircomm_tty_check_modem_status(self);
} else {
+ struct tty_struct *tty = tty_port_tty_get(&self->port);
IRDA_DEBUG(0, "%s(), hanging up!\n", __func__ );
- if (self->tty)
- tty_hangup(self->tty);
+ if (tty) {
+ tty_hangup(tty);
+ tty_kref_put(tty);
+ }
}
break;
default:
diff --git a/net/irda/ircomm/ircomm_tty_ioctl.c b/net/irda/ircomm/ircomm_tty_ioctl.c
index d0667d68351d..b343f50dc8d7 100644
--- a/net/irda/ircomm/ircomm_tty_ioctl.c
+++ b/net/irda/ircomm/ircomm_tty_ioctl.c
@@ -52,17 +52,18 @@
* Change speed of the driver. If the remote device is a DCE, then this
* should make it change the speed of its serial port
*/
-static void ircomm_tty_change_speed(struct ircomm_tty_cb *self)
+static void ircomm_tty_change_speed(struct ircomm_tty_cb *self,
+ struct tty_struct *tty)
{
unsigned int cflag, cval;
int baud;
IRDA_DEBUG(2, "%s()\n", __func__ );
- if (!self->tty || !self->tty->termios || !self->ircomm)
+ if (!self->ircomm)
return;
- cflag = self->tty->termios->c_cflag;
+ cflag = tty->termios.c_cflag;
/* byte size and parity */
switch (cflag & CSIZE) {
@@ -81,7 +82,7 @@ static void ircomm_tty_change_speed(struct ircomm_tty_cb *self)
cval |= IRCOMM_PARITY_EVEN;
/* Determine divisor based on baud rate */
- baud = tty_get_baud_rate(self->tty);
+ baud = tty_get_baud_rate(tty);
if (!baud)
baud = 9600; /* B0 transition handled in rs_set_termios */
@@ -90,19 +91,19 @@ static void ircomm_tty_change_speed(struct ircomm_tty_cb *self)
/* CTS flow control flag and modem status interrupts */
if (cflag & CRTSCTS) {
- self->flags |= ASYNC_CTS_FLOW;
+ self->port.flags |= ASYNC_CTS_FLOW;
self->settings.flow_control |= IRCOMM_RTS_CTS_IN;
/* This got me. Bummer. Jean II */
if (self->service_type == IRCOMM_3_WIRE_RAW)
IRDA_WARNING("%s(), enabling RTS/CTS on link that doesn't support it (3-wire-raw)\n", __func__);
} else {
- self->flags &= ~ASYNC_CTS_FLOW;
+ self->port.flags &= ~ASYNC_CTS_FLOW;
self->settings.flow_control &= ~IRCOMM_RTS_CTS_IN;
}
if (cflag & CLOCAL)
- self->flags &= ~ASYNC_CHECK_CD;
+ self->port.flags &= ~ASYNC_CHECK_CD;
else
- self->flags |= ASYNC_CHECK_CD;
+ self->port.flags |= ASYNC_CHECK_CD;
#if 0
/*
* Set up parity check flag
@@ -148,18 +149,18 @@ void ircomm_tty_set_termios(struct tty_struct *tty,
struct ktermios *old_termios)
{
struct ircomm_tty_cb *self = (struct ircomm_tty_cb *) tty->driver_data;
- unsigned int cflag = tty->termios->c_cflag;
+ unsigned int cflag = tty->termios.c_cflag;
IRDA_DEBUG(2, "%s()\n", __func__ );
if ((cflag == old_termios->c_cflag) &&
- (RELEVANT_IFLAG(tty->termios->c_iflag) ==
+ (RELEVANT_IFLAG(tty->termios.c_iflag) ==
RELEVANT_IFLAG(old_termios->c_iflag)))
{
return;
}
- ircomm_tty_change_speed(self);
+ ircomm_tty_change_speed(self, tty);
/* Handle transition to B0 status */
if ((old_termios->c_cflag & CBAUD) &&
@@ -172,7 +173,7 @@ void ircomm_tty_set_termios(struct tty_struct *tty,
if (!(old_termios->c_cflag & CBAUD) &&
(cflag & CBAUD)) {
self->settings.dte |= IRCOMM_DTR;
- if (!(tty->termios->c_cflag & CRTSCTS) ||
+ if (!(tty->termios.c_cflag & CRTSCTS) ||
!test_bit(TTY_THROTTLED, &tty->flags)) {
self->settings.dte |= IRCOMM_RTS;
}
@@ -181,7 +182,7 @@ void ircomm_tty_set_termios(struct tty_struct *tty,
/* Handle turning off CRTSCTS */
if ((old_termios->c_cflag & CRTSCTS) &&
- !(tty->termios->c_cflag & CRTSCTS))
+ !(tty->termios.c_cflag & CRTSCTS))
{
tty->hw_stopped = 0;
ircomm_tty_start(tty);
@@ -270,10 +271,10 @@ static int ircomm_tty_get_serial_info(struct ircomm_tty_cb *self,
memset(&info, 0, sizeof(info));
info.line = self->line;
- info.flags = self->flags;
+ info.flags = self->port.flags;
info.baud_base = self->settings.data_rate;
- info.close_delay = self->close_delay;
- info.closing_wait = self->closing_wait;
+ info.close_delay = self->port.close_delay;
+ info.closing_wait = self->port.closing_wait;
/* For compatibility */
info.type = PORT_16550A;
diff --git a/net/irda/irnetlink.c b/net/irda/irnetlink.c
index 6c7c4b92e4f8..c32971269280 100644
--- a/net/irda/irnetlink.c
+++ b/net/irda/irnetlink.c
@@ -100,7 +100,7 @@ static int irda_nl_get_mode(struct sk_buff *skb, struct genl_info *info)
goto err_out;
}
- hdr = genlmsg_put(msg, info->snd_pid, info->snd_seq,
+ hdr = genlmsg_put(msg, info->snd_portid, info->snd_seq,
&irda_nl_family, 0, IRDA_NL_CMD_GET_MODE);
if (hdr == NULL) {
ret = -EMSGSIZE;
diff --git a/net/irda/irttp.c b/net/irda/irttp.c
index 5c93f2952b08..1002e3396f72 100644
--- a/net/irda/irttp.c
+++ b/net/irda/irttp.c
@@ -440,7 +440,7 @@ struct tsap_cb *irttp_open_tsap(__u8 stsap_sel, int credit, notify_t *notify)
*/
lsap = irlmp_open_lsap(stsap_sel, &ttp_notify, 0);
if (lsap == NULL) {
- IRDA_WARNING("%s: unable to allocate LSAP!!\n", __func__);
+ IRDA_DEBUG(0, "%s: unable to allocate LSAP!!\n", __func__);
return NULL;
}
diff --git a/net/key/af_key.c b/net/key/af_key.c
index 34e418508a67..08897a3c7ec7 100644
--- a/net/key/af_key.c
+++ b/net/key/af_key.c
@@ -54,7 +54,7 @@ struct pfkey_sock {
struct {
uint8_t msg_version;
- uint32_t msg_pid;
+ uint32_t msg_portid;
int (*dump)(struct pfkey_sock *sk);
void (*done)(struct pfkey_sock *sk);
union {
@@ -1447,7 +1447,7 @@ static int key_notify_sa(struct xfrm_state *x, const struct km_event *c)
hdr->sadb_msg_errno = 0;
hdr->sadb_msg_reserved = 0;
hdr->sadb_msg_seq = c->seq;
- hdr->sadb_msg_pid = c->pid;
+ hdr->sadb_msg_pid = c->portid;
pfkey_broadcast(skb, GFP_ATOMIC, BROADCAST_ALL, NULL, xs_net(x));
@@ -1486,7 +1486,7 @@ static int pfkey_add(struct sock *sk, struct sk_buff *skb, const struct sadb_msg
else
c.event = XFRM_MSG_UPDSA;
c.seq = hdr->sadb_msg_seq;
- c.pid = hdr->sadb_msg_pid;
+ c.portid = hdr->sadb_msg_pid;
km_state_notify(x, &c);
out:
xfrm_state_put(x);
@@ -1523,7 +1523,7 @@ static int pfkey_delete(struct sock *sk, struct sk_buff *skb, const struct sadb_
goto out;
c.seq = hdr->sadb_msg_seq;
- c.pid = hdr->sadb_msg_pid;
+ c.portid = hdr->sadb_msg_pid;
c.event = XFRM_MSG_DELSA;
km_state_notify(x, &c);
out:
@@ -1701,7 +1701,7 @@ static int key_notify_sa_flush(const struct km_event *c)
hdr->sadb_msg_satype = pfkey_proto2satype(c->data.proto);
hdr->sadb_msg_type = SADB_FLUSH;
hdr->sadb_msg_seq = c->seq;
- hdr->sadb_msg_pid = c->pid;
+ hdr->sadb_msg_pid = c->portid;
hdr->sadb_msg_version = PF_KEY_V2;
hdr->sadb_msg_errno = (uint8_t) 0;
hdr->sadb_msg_len = (sizeof(struct sadb_msg) / sizeof(uint64_t));
@@ -1736,7 +1736,7 @@ static int pfkey_flush(struct sock *sk, struct sk_buff *skb, const struct sadb_m
c.data.proto = proto;
c.seq = hdr->sadb_msg_seq;
- c.pid = hdr->sadb_msg_pid;
+ c.portid = hdr->sadb_msg_pid;
c.event = XFRM_MSG_FLUSHSA;
c.net = net;
km_state_notify(NULL, &c);
@@ -1764,7 +1764,7 @@ static int dump_sa(struct xfrm_state *x, int count, void *ptr)
out_hdr->sadb_msg_errno = 0;
out_hdr->sadb_msg_reserved = 0;
out_hdr->sadb_msg_seq = count + 1;
- out_hdr->sadb_msg_pid = pfk->dump.msg_pid;
+ out_hdr->sadb_msg_pid = pfk->dump.msg_portid;
if (pfk->dump.skb)
pfkey_broadcast(pfk->dump.skb, GFP_ATOMIC, BROADCAST_ONE,
@@ -1798,7 +1798,7 @@ static int pfkey_dump(struct sock *sk, struct sk_buff *skb, const struct sadb_ms
return -EINVAL;
pfk->dump.msg_version = hdr->sadb_msg_version;
- pfk->dump.msg_pid = hdr->sadb_msg_pid;
+ pfk->dump.msg_portid = hdr->sadb_msg_pid;
pfk->dump.dump = pfkey_dump_sa;
pfk->dump.done = pfkey_dump_sa_done;
xfrm_state_walk_init(&pfk->dump.u.state, proto);
@@ -1923,6 +1923,9 @@ parse_ipsecrequests(struct xfrm_policy *xp, struct sadb_x_policy *pol)
int len = pol->sadb_x_policy_len*8 - sizeof(struct sadb_x_policy);
struct sadb_x_ipsecrequest *rq = (void*)(pol+1);
+ if (pol->sadb_x_policy_len * 8 < sizeof(struct sadb_x_policy))
+ return -EINVAL;
+
while (len >= sizeof(struct sadb_x_ipsecrequest)) {
if ((err = parse_ipsecrequest(xp, rq)) < 0)
return err;
@@ -2157,7 +2160,7 @@ static int key_notify_policy(struct xfrm_policy *xp, int dir, const struct km_ev
out_hdr->sadb_msg_type = event2poltype(c->event);
out_hdr->sadb_msg_errno = 0;
out_hdr->sadb_msg_seq = c->seq;
- out_hdr->sadb_msg_pid = c->pid;
+ out_hdr->sadb_msg_pid = c->portid;
pfkey_broadcast(out_skb, GFP_ATOMIC, BROADCAST_ALL, NULL, xp_net(xp));
return 0;
@@ -2272,7 +2275,7 @@ static int pfkey_spdadd(struct sock *sk, struct sk_buff *skb, const struct sadb_
c.event = XFRM_MSG_NEWPOLICY;
c.seq = hdr->sadb_msg_seq;
- c.pid = hdr->sadb_msg_pid;
+ c.portid = hdr->sadb_msg_pid;
km_policy_notify(xp, pol->sadb_x_policy_dir-1, &c);
xfrm_pol_put(xp);
@@ -2351,7 +2354,7 @@ static int pfkey_spddelete(struct sock *sk, struct sk_buff *skb, const struct sa
goto out;
c.seq = hdr->sadb_msg_seq;
- c.pid = hdr->sadb_msg_pid;
+ c.portid = hdr->sadb_msg_pid;
c.data.byid = 0;
c.event = XFRM_MSG_DELPOLICY;
km_policy_notify(xp, pol->sadb_x_policy_dir-1, &c);
@@ -2597,7 +2600,7 @@ static int pfkey_spdget(struct sock *sk, struct sk_buff *skb, const struct sadb_
if (err)
goto out;
c.seq = hdr->sadb_msg_seq;
- c.pid = hdr->sadb_msg_pid;
+ c.portid = hdr->sadb_msg_pid;
c.data.byid = 1;
c.event = XFRM_MSG_DELPOLICY;
km_policy_notify(xp, dir, &c);
@@ -2634,7 +2637,7 @@ static int dump_sp(struct xfrm_policy *xp, int dir, int count, void *ptr)
out_hdr->sadb_msg_satype = SADB_SATYPE_UNSPEC;
out_hdr->sadb_msg_errno = 0;
out_hdr->sadb_msg_seq = count + 1;
- out_hdr->sadb_msg_pid = pfk->dump.msg_pid;
+ out_hdr->sadb_msg_pid = pfk->dump.msg_portid;
if (pfk->dump.skb)
pfkey_broadcast(pfk->dump.skb, GFP_ATOMIC, BROADCAST_ONE,
@@ -2663,7 +2666,7 @@ static int pfkey_spddump(struct sock *sk, struct sk_buff *skb, const struct sadb
return -EBUSY;
pfk->dump.msg_version = hdr->sadb_msg_version;
- pfk->dump.msg_pid = hdr->sadb_msg_pid;
+ pfk->dump.msg_portid = hdr->sadb_msg_pid;
pfk->dump.dump = pfkey_dump_sp;
pfk->dump.done = pfkey_dump_sp_done;
xfrm_policy_walk_init(&pfk->dump.u.policy, XFRM_POLICY_TYPE_MAIN);
@@ -2682,7 +2685,7 @@ static int key_notify_policy_flush(const struct km_event *c)
hdr = (struct sadb_msg *) skb_put(skb_out, sizeof(struct sadb_msg));
hdr->sadb_msg_type = SADB_X_SPDFLUSH;
hdr->sadb_msg_seq = c->seq;
- hdr->sadb_msg_pid = c->pid;
+ hdr->sadb_msg_pid = c->portid;
hdr->sadb_msg_version = PF_KEY_V2;
hdr->sadb_msg_errno = (uint8_t) 0;
hdr->sadb_msg_len = (sizeof(struct sadb_msg) / sizeof(uint64_t));
@@ -2711,7 +2714,7 @@ static int pfkey_spdflush(struct sock *sk, struct sk_buff *skb, const struct sad
c.data.type = XFRM_POLICY_TYPE_MAIN;
c.event = XFRM_MSG_FLUSHPOLICY;
- c.pid = hdr->sadb_msg_pid;
+ c.portid = hdr->sadb_msg_pid;
c.seq = hdr->sadb_msg_seq;
c.net = net;
km_policy_notify(NULL, 0, &c);
@@ -3024,7 +3027,7 @@ static u32 get_acqseq(void)
return res;
}
-static int pfkey_send_acquire(struct xfrm_state *x, struct xfrm_tmpl *t, struct xfrm_policy *xp, int dir)
+static int pfkey_send_acquire(struct xfrm_state *x, struct xfrm_tmpl *t, struct xfrm_policy *xp)
{
struct sk_buff *skb;
struct sadb_msg *hdr;
@@ -3105,7 +3108,7 @@ static int pfkey_send_acquire(struct xfrm_state *x, struct xfrm_tmpl *t, struct
pol->sadb_x_policy_len = sizeof(struct sadb_x_policy)/sizeof(uint64_t);
pol->sadb_x_policy_exttype = SADB_X_EXT_POLICY;
pol->sadb_x_policy_type = IPSEC_POLICY_IPSEC;
- pol->sadb_x_policy_dir = dir+1;
+ pol->sadb_x_policy_dir = XFRM_POLICY_OUT + 1;
pol->sadb_x_policy_id = xp->index;
/* Set sadb_comb's. */
@@ -3661,7 +3664,7 @@ static int pfkey_seq_show(struct seq_file *f, void *v)
atomic_read(&s->sk_refcnt),
sk_rmem_alloc_get(s),
sk_wmem_alloc_get(s),
- sock_i_uid(s),
+ from_kuid_munged(seq_user_ns(f), sock_i_uid(s)),
sock_i_ino(s)
);
return 0;
diff --git a/net/l2tp/Kconfig b/net/l2tp/Kconfig
index 4b1e71751e10..147a8fd47a17 100644
--- a/net/l2tp/Kconfig
+++ b/net/l2tp/Kconfig
@@ -4,6 +4,7 @@
menuconfig L2TP
tristate "Layer Two Tunneling Protocol (L2TP)"
+ depends on (IPV6 || IPV6=n)
depends on INET
---help---
Layer Two Tunneling Protocol
diff --git a/net/l2tp/l2tp_core.c b/net/l2tp/l2tp_core.c
index 393355d37b47..1a9f3723c13c 100644
--- a/net/l2tp/l2tp_core.c
+++ b/net/l2tp/l2tp_core.c
@@ -1347,11 +1347,10 @@ static void l2tp_tunnel_free(struct l2tp_tunnel *tunnel)
/* Remove from tunnel list */
spin_lock_bh(&pn->l2tp_tunnel_list_lock);
list_del_rcu(&tunnel->list);
+ kfree_rcu(tunnel, rcu);
spin_unlock_bh(&pn->l2tp_tunnel_list_lock);
- synchronize_rcu();
atomic_dec(&l2tp_tunnel_count);
- kfree(tunnel);
}
/* Create a socket for the tunnel, if one isn't set up by
@@ -1502,6 +1501,8 @@ out:
return err;
}
+static struct lock_class_key l2tp_socket_class;
+
int l2tp_tunnel_create(struct net *net, int fd, int version, u32 tunnel_id, u32 peer_tunnel_id, struct l2tp_tunnel_cfg *cfg, struct l2tp_tunnel **tunnelp)
{
struct l2tp_tunnel *tunnel = NULL;
@@ -1606,6 +1607,8 @@ int l2tp_tunnel_create(struct net *net, int fd, int version, u32 tunnel_id, u32
tunnel->old_sk_destruct = sk->sk_destruct;
sk->sk_destruct = &l2tp_tunnel_destruct;
tunnel->sock = sk;
+ lockdep_set_class_and_name(&sk->sk_lock.slock, &l2tp_socket_class, "l2tp_sock");
+
sk->sk_allocation = GFP_ATOMIC;
/* Add tunnel to our list */
diff --git a/net/l2tp/l2tp_core.h b/net/l2tp/l2tp_core.h
index a38ec6cdeee1..56d583e083a7 100644
--- a/net/l2tp/l2tp_core.h
+++ b/net/l2tp/l2tp_core.h
@@ -163,6 +163,7 @@ struct l2tp_tunnel_cfg {
struct l2tp_tunnel {
int magic; /* Should be L2TP_TUNNEL_MAGIC */
+ struct rcu_head rcu;
rwlock_t hlist_lock; /* protect session_hlist */
struct hlist_head session_hlist[L2TP_HASH_SIZE];
/* hashed list of sessions,
diff --git a/net/l2tp/l2tp_eth.c b/net/l2tp/l2tp_eth.c
index f9ee74deeac2..37b8b8ba31f7 100644
--- a/net/l2tp/l2tp_eth.c
+++ b/net/l2tp/l2tp_eth.c
@@ -67,6 +67,7 @@ static inline struct l2tp_eth_net *l2tp_eth_pernet(struct net *net)
return net_generic(net, l2tp_eth_net_id);
}
+static struct lock_class_key l2tp_eth_tx_busylock;
static int l2tp_eth_dev_init(struct net_device *dev)
{
struct l2tp_eth *priv = netdev_priv(dev);
@@ -74,7 +75,7 @@ static int l2tp_eth_dev_init(struct net_device *dev)
priv->dev = dev;
eth_hw_addr_random(dev);
memset(&dev->broadcast[0], 0xff, 6);
-
+ dev->qdisc_tx_busylock = &l2tp_eth_tx_busylock;
return 0;
}
@@ -153,7 +154,7 @@ static void l2tp_eth_dev_recv(struct l2tp_session *session, struct sk_buff *skb,
print_hex_dump_bytes("", DUMP_PREFIX_OFFSET, skb->data, length);
}
- if (!pskb_may_pull(skb, sizeof(ETH_HLEN)))
+ if (!pskb_may_pull(skb, ETH_HLEN))
goto error;
secpath_reset(skb);
diff --git a/net/l2tp/l2tp_ip6.c b/net/l2tp/l2tp_ip6.c
index 35e1e4bde587..927547171bc7 100644
--- a/net/l2tp/l2tp_ip6.c
+++ b/net/l2tp/l2tp_ip6.c
@@ -410,6 +410,7 @@ static int l2tp_ip6_getname(struct socket *sock, struct sockaddr *uaddr,
lsa->l2tp_family = AF_INET6;
lsa->l2tp_flowinfo = 0;
lsa->l2tp_scope_id = 0;
+ lsa->l2tp_unused = 0;
if (peer) {
if (!lsk->peer_conn_id)
return -ENOTCONN;
diff --git a/net/l2tp/l2tp_netlink.c b/net/l2tp/l2tp_netlink.c
index d71cd9229a47..6c4cc12c7414 100644
--- a/net/l2tp/l2tp_netlink.c
+++ b/net/l2tp/l2tp_netlink.c
@@ -78,16 +78,16 @@ static int l2tp_nl_cmd_noop(struct sk_buff *skb, struct genl_info *info)
goto out;
}
- hdr = genlmsg_put(msg, info->snd_pid, info->snd_seq,
+ hdr = genlmsg_put(msg, info->snd_portid, info->snd_seq,
&l2tp_nl_family, 0, L2TP_CMD_NOOP);
- if (IS_ERR(hdr)) {
- ret = PTR_ERR(hdr);
+ if (!hdr) {
+ ret = -EMSGSIZE;
goto err_out;
}
genlmsg_end(msg, hdr);
- return genlmsg_unicast(genl_info_net(info), msg, info->snd_pid);
+ return genlmsg_unicast(genl_info_net(info), msg, info->snd_portid);
err_out:
nlmsg_free(msg);
@@ -235,7 +235,7 @@ out:
return ret;
}
-static int l2tp_nl_tunnel_send(struct sk_buff *skb, u32 pid, u32 seq, int flags,
+static int l2tp_nl_tunnel_send(struct sk_buff *skb, u32 portid, u32 seq, int flags,
struct l2tp_tunnel *tunnel)
{
void *hdr;
@@ -248,10 +248,10 @@ static int l2tp_nl_tunnel_send(struct sk_buff *skb, u32 pid, u32 seq, int flags,
struct l2tp_stats stats;
unsigned int start;
- hdr = genlmsg_put(skb, pid, seq, &l2tp_nl_family, flags,
+ hdr = genlmsg_put(skb, portid, seq, &l2tp_nl_family, flags,
L2TP_CMD_TUNNEL_GET);
- if (IS_ERR(hdr))
- return PTR_ERR(hdr);
+ if (!hdr)
+ return -EMSGSIZE;
if (nla_put_u8(skb, L2TP_ATTR_PROTO_VERSION, tunnel->version) ||
nla_put_u32(skb, L2TP_ATTR_CONN_ID, tunnel->tunnel_id) ||
@@ -359,12 +359,12 @@ static int l2tp_nl_cmd_tunnel_get(struct sk_buff *skb, struct genl_info *info)
goto out;
}
- ret = l2tp_nl_tunnel_send(msg, info->snd_pid, info->snd_seq,
+ ret = l2tp_nl_tunnel_send(msg, info->snd_portid, info->snd_seq,
NLM_F_ACK, tunnel);
if (ret < 0)
goto err_out;
- return genlmsg_unicast(net, msg, info->snd_pid);
+ return genlmsg_unicast(net, msg, info->snd_portid);
err_out:
nlmsg_free(msg);
@@ -384,7 +384,7 @@ static int l2tp_nl_cmd_tunnel_dump(struct sk_buff *skb, struct netlink_callback
if (tunnel == NULL)
goto out;
- if (l2tp_nl_tunnel_send(skb, NETLINK_CB(cb->skb).pid,
+ if (l2tp_nl_tunnel_send(skb, NETLINK_CB(cb->skb).portid,
cb->nlh->nlmsg_seq, NLM_F_MULTI,
tunnel) <= 0)
goto out;
@@ -604,7 +604,7 @@ out:
return ret;
}
-static int l2tp_nl_session_send(struct sk_buff *skb, u32 pid, u32 seq, int flags,
+static int l2tp_nl_session_send(struct sk_buff *skb, u32 portid, u32 seq, int flags,
struct l2tp_session *session)
{
void *hdr;
@@ -616,9 +616,9 @@ static int l2tp_nl_session_send(struct sk_buff *skb, u32 pid, u32 seq, int flags
sk = tunnel->sock;
- hdr = genlmsg_put(skb, pid, seq, &l2tp_nl_family, flags, L2TP_CMD_SESSION_GET);
- if (IS_ERR(hdr))
- return PTR_ERR(hdr);
+ hdr = genlmsg_put(skb, portid, seq, &l2tp_nl_family, flags, L2TP_CMD_SESSION_GET);
+ if (!hdr)
+ return -EMSGSIZE;
if (nla_put_u32(skb, L2TP_ATTR_CONN_ID, tunnel->tunnel_id) ||
nla_put_u32(skb, L2TP_ATTR_SESSION_ID, session->session_id) ||
@@ -705,12 +705,12 @@ static int l2tp_nl_cmd_session_get(struct sk_buff *skb, struct genl_info *info)
goto out;
}
- ret = l2tp_nl_session_send(msg, info->snd_pid, info->snd_seq,
+ ret = l2tp_nl_session_send(msg, info->snd_portid, info->snd_seq,
0, session);
if (ret < 0)
goto err_out;
- return genlmsg_unicast(genl_info_net(info), msg, info->snd_pid);
+ return genlmsg_unicast(genl_info_net(info), msg, info->snd_portid);
err_out:
nlmsg_free(msg);
@@ -742,7 +742,7 @@ static int l2tp_nl_cmd_session_dump(struct sk_buff *skb, struct netlink_callback
continue;
}
- if (l2tp_nl_session_send(skb, NETLINK_CB(cb->skb).pid,
+ if (l2tp_nl_session_send(skb, NETLINK_CB(cb->skb).portid,
cb->nlh->nlmsg_seq, NLM_F_MULTI,
session) <= 0)
break;
diff --git a/net/llc/af_llc.c b/net/llc/af_llc.c
index f6fe4d400502..c2190005a114 100644
--- a/net/llc/af_llc.c
+++ b/net/llc/af_llc.c
@@ -969,14 +969,13 @@ static int llc_ui_getname(struct socket *sock, struct sockaddr *uaddr,
struct sockaddr_llc sllc;
struct sock *sk = sock->sk;
struct llc_sock *llc = llc_sk(sk);
- int rc = 0;
+ int rc = -EBADF;
memset(&sllc, 0, sizeof(sllc));
lock_sock(sk);
if (sock_flag(sk, SOCK_ZAPPED))
goto out;
*uaddrlen = sizeof(sllc);
- memset(uaddr, 0, *uaddrlen);
if (peer) {
rc = -ENOTCONN;
if (sk->sk_state != TCP_ESTABLISHED)
@@ -1206,7 +1205,7 @@ static int __init llc2_init(void)
rc = llc_proc_init();
if (rc != 0) {
printk(llc_proc_err_msg);
- goto out_unregister_llc_proto;
+ goto out_station;
}
rc = llc_sysctl_init();
if (rc) {
@@ -1226,7 +1225,8 @@ out_sysctl:
llc_sysctl_exit();
out_proc:
llc_proc_exit();
-out_unregister_llc_proto:
+out_station:
+ llc_station_exit();
proto_unregister(&llc_proto);
goto out;
}
diff --git a/net/llc/llc_input.c b/net/llc/llc_input.c
index e32cab44ea95..dd3e83328ad5 100644
--- a/net/llc/llc_input.c
+++ b/net/llc/llc_input.c
@@ -42,6 +42,7 @@ static void (*llc_type_handlers[2])(struct llc_sap *sap,
void llc_add_pack(int type, void (*handler)(struct llc_sap *sap,
struct sk_buff *skb))
{
+ smp_wmb(); /* ensure initialisation is complete before it's called */
if (type == LLC_DEST_SAP || type == LLC_DEST_CONN)
llc_type_handlers[type - 1] = handler;
}
@@ -50,11 +51,19 @@ void llc_remove_pack(int type)
{
if (type == LLC_DEST_SAP || type == LLC_DEST_CONN)
llc_type_handlers[type - 1] = NULL;
+ synchronize_net();
}
void llc_set_station_handler(void (*handler)(struct sk_buff *skb))
{
+ /* Ensure initialisation is complete before it's called */
+ if (handler)
+ smp_wmb();
+
llc_station_handler = handler;
+
+ if (!handler)
+ synchronize_net();
}
/**
@@ -150,6 +159,8 @@ int llc_rcv(struct sk_buff *skb, struct net_device *dev,
int dest;
int (*rcv)(struct sk_buff *, struct net_device *,
struct packet_type *, struct net_device *);
+ void (*sta_handler)(struct sk_buff *skb);
+ void (*sap_handler)(struct llc_sap *sap, struct sk_buff *skb);
if (!net_eq(dev_net(dev), &init_net))
goto drop;
@@ -182,7 +193,8 @@ int llc_rcv(struct sk_buff *skb, struct net_device *dev,
*/
rcv = rcu_dereference(sap->rcv_func);
dest = llc_pdu_type(skb);
- if (unlikely(!dest || !llc_type_handlers[dest - 1])) {
+ sap_handler = dest ? ACCESS_ONCE(llc_type_handlers[dest - 1]) : NULL;
+ if (unlikely(!sap_handler)) {
if (rcv)
rcv(skb, dev, pt, orig_dev);
else
@@ -193,7 +205,7 @@ int llc_rcv(struct sk_buff *skb, struct net_device *dev,
if (cskb)
rcv(cskb, dev, pt, orig_dev);
}
- llc_type_handlers[dest - 1](sap, skb);
+ sap_handler(sap, skb);
}
llc_sap_put(sap);
out:
@@ -202,9 +214,10 @@ drop:
kfree_skb(skb);
goto out;
handle_station:
- if (!llc_station_handler)
+ sta_handler = ACCESS_ONCE(llc_station_handler);
+ if (!sta_handler)
goto drop;
- llc_station_handler(skb);
+ sta_handler(skb);
goto out;
}
diff --git a/net/llc/llc_proc.c b/net/llc/llc_proc.c
index a1839c004357..7b4799cfbf8d 100644
--- a/net/llc/llc_proc.c
+++ b/net/llc/llc_proc.c
@@ -151,7 +151,7 @@ static int llc_seq_socket_show(struct seq_file *seq, void *v)
sk_wmem_alloc_get(sk),
sk_rmem_alloc_get(sk) - llc->copied_seq,
sk->sk_state,
- sk->sk_socket ? SOCK_INODE(sk->sk_socket)->i_uid : -1,
+ from_kuid_munged(seq_user_ns(seq), sock_i_uid(sk)),
llc->link);
out:
return 0;
diff --git a/net/llc/llc_station.c b/net/llc/llc_station.c
index 39a8d8924b9c..204a8351efff 100644
--- a/net/llc/llc_station.c
+++ b/net/llc/llc_station.c
@@ -25,253 +25,26 @@
#include <net/llc_s_st.h>
#include <net/llc_pdu.h>
-/**
- * struct llc_station - LLC station component
- *
- * SAP and connection resource manager, one per adapter.
- *
- * @state: state of station
- * @xid_r_count: XID response PDU counter
- * @mac_sa: MAC source address
- * @sap_list: list of related SAPs
- * @ev_q: events entering state mach.
- * @mac_pdu_q: PDUs ready to send to MAC
- */
-struct llc_station {
- u8 state;
- u8 xid_r_count;
- struct timer_list ack_timer;
- u8 retry_count;
- u8 maximum_retry;
- struct {
- struct sk_buff_head list;
- spinlock_t lock;
- } ev_q;
- struct sk_buff_head mac_pdu_q;
-};
-
-#define LLC_STATION_ACK_TIME (3 * HZ)
-
-int sysctl_llc_station_ack_timeout = LLC_STATION_ACK_TIME;
-
-/* Types of events (possible values in 'ev->type') */
-#define LLC_STATION_EV_TYPE_SIMPLE 1
-#define LLC_STATION_EV_TYPE_CONDITION 2
-#define LLC_STATION_EV_TYPE_PRIM 3
-#define LLC_STATION_EV_TYPE_PDU 4 /* command/response PDU */
-#define LLC_STATION_EV_TYPE_ACK_TMR 5
-#define LLC_STATION_EV_TYPE_RPT_STATUS 6
-
-/* Events */
-#define LLC_STATION_EV_ENABLE_WITH_DUP_ADDR_CHECK 1
-#define LLC_STATION_EV_ENABLE_WITHOUT_DUP_ADDR_CHECK 2
-#define LLC_STATION_EV_ACK_TMR_EXP_LT_RETRY_CNT_MAX_RETRY 3
-#define LLC_STATION_EV_ACK_TMR_EXP_EQ_RETRY_CNT_MAX_RETRY 4
-#define LLC_STATION_EV_RX_NULL_DSAP_XID_C 5
-#define LLC_STATION_EV_RX_NULL_DSAP_0_XID_R_XID_R_CNT_EQ 6
-#define LLC_STATION_EV_RX_NULL_DSAP_1_XID_R_XID_R_CNT_EQ 7
-#define LLC_STATION_EV_RX_NULL_DSAP_TEST_C 8
-#define LLC_STATION_EV_DISABLE_REQ 9
-
-struct llc_station_state_ev {
- u8 type;
- u8 prim;
- u8 prim_type;
- u8 reason;
- struct list_head node; /* node in station->ev_q.list */
-};
-
-static __inline__ struct llc_station_state_ev *
- llc_station_ev(struct sk_buff *skb)
-{
- return (struct llc_station_state_ev *)skb->cb;
-}
-
-typedef int (*llc_station_ev_t)(struct sk_buff *skb);
-
-#define LLC_STATION_STATE_DOWN 1 /* initial state */
-#define LLC_STATION_STATE_DUP_ADDR_CHK 2
-#define LLC_STATION_STATE_UP 3
-
-#define LLC_NBR_STATION_STATES 3 /* size of state table */
-
-typedef int (*llc_station_action_t)(struct sk_buff *skb);
-
-/* Station component state table structure */
-struct llc_station_state_trans {
- llc_station_ev_t ev;
- u8 next_state;
- llc_station_action_t *ev_actions;
-};
-
-struct llc_station_state {
- u8 curr_state;
- struct llc_station_state_trans **transitions;
-};
-
-static struct llc_station llc_main_station;
-
-static int llc_stat_ev_enable_with_dup_addr_check(struct sk_buff *skb)
-{
- struct llc_station_state_ev *ev = llc_station_ev(skb);
-
- return ev->type == LLC_STATION_EV_TYPE_SIMPLE &&
- ev->prim_type ==
- LLC_STATION_EV_ENABLE_WITH_DUP_ADDR_CHECK ? 0 : 1;
-}
-
-static int llc_stat_ev_enable_without_dup_addr_check(struct sk_buff *skb)
-{
- struct llc_station_state_ev *ev = llc_station_ev(skb);
-
- return ev->type == LLC_STATION_EV_TYPE_SIMPLE &&
- ev->prim_type ==
- LLC_STATION_EV_ENABLE_WITHOUT_DUP_ADDR_CHECK ? 0 : 1;
-}
-
-static int llc_stat_ev_ack_tmr_exp_lt_retry_cnt_max_retry(struct sk_buff *skb)
-{
- struct llc_station_state_ev *ev = llc_station_ev(skb);
-
- return ev->type == LLC_STATION_EV_TYPE_ACK_TMR &&
- llc_main_station.retry_count <
- llc_main_station.maximum_retry ? 0 : 1;
-}
-
-static int llc_stat_ev_ack_tmr_exp_eq_retry_cnt_max_retry(struct sk_buff *skb)
-{
- struct llc_station_state_ev *ev = llc_station_ev(skb);
-
- return ev->type == LLC_STATION_EV_TYPE_ACK_TMR &&
- llc_main_station.retry_count ==
- llc_main_station.maximum_retry ? 0 : 1;
-}
-
static int llc_stat_ev_rx_null_dsap_xid_c(struct sk_buff *skb)
{
- struct llc_station_state_ev *ev = llc_station_ev(skb);
struct llc_pdu_un *pdu = llc_pdu_un_hdr(skb);
- return ev->type == LLC_STATION_EV_TYPE_PDU &&
- LLC_PDU_IS_CMD(pdu) && /* command PDU */
+ return LLC_PDU_IS_CMD(pdu) && /* command PDU */
LLC_PDU_TYPE_IS_U(pdu) && /* U type PDU */
LLC_U_PDU_CMD(pdu) == LLC_1_PDU_CMD_XID &&
!pdu->dsap ? 0 : 1; /* NULL DSAP value */
}
-static int llc_stat_ev_rx_null_dsap_0_xid_r_xid_r_cnt_eq(struct sk_buff *skb)
-{
- struct llc_station_state_ev *ev = llc_station_ev(skb);
- struct llc_pdu_un *pdu = llc_pdu_un_hdr(skb);
-
- return ev->type == LLC_STATION_EV_TYPE_PDU &&
- LLC_PDU_IS_RSP(pdu) && /* response PDU */
- LLC_PDU_TYPE_IS_U(pdu) && /* U type PDU */
- LLC_U_PDU_RSP(pdu) == LLC_1_PDU_CMD_XID &&
- !pdu->dsap && /* NULL DSAP value */
- !llc_main_station.xid_r_count ? 0 : 1;
-}
-
-static int llc_stat_ev_rx_null_dsap_1_xid_r_xid_r_cnt_eq(struct sk_buff *skb)
-{
- struct llc_station_state_ev *ev = llc_station_ev(skb);
- struct llc_pdu_un *pdu = llc_pdu_un_hdr(skb);
-
- return ev->type == LLC_STATION_EV_TYPE_PDU &&
- LLC_PDU_IS_RSP(pdu) && /* response PDU */
- LLC_PDU_TYPE_IS_U(pdu) && /* U type PDU */
- LLC_U_PDU_RSP(pdu) == LLC_1_PDU_CMD_XID &&
- !pdu->dsap && /* NULL DSAP value */
- llc_main_station.xid_r_count == 1 ? 0 : 1;
-}
-
static int llc_stat_ev_rx_null_dsap_test_c(struct sk_buff *skb)
{
- struct llc_station_state_ev *ev = llc_station_ev(skb);
struct llc_pdu_un *pdu = llc_pdu_un_hdr(skb);
- return ev->type == LLC_STATION_EV_TYPE_PDU &&
- LLC_PDU_IS_CMD(pdu) && /* command PDU */
+ return LLC_PDU_IS_CMD(pdu) && /* command PDU */
LLC_PDU_TYPE_IS_U(pdu) && /* U type PDU */
LLC_U_PDU_CMD(pdu) == LLC_1_PDU_CMD_TEST &&
!pdu->dsap ? 0 : 1; /* NULL DSAP */
}
-static int llc_stat_ev_disable_req(struct sk_buff *skb)
-{
- struct llc_station_state_ev *ev = llc_station_ev(skb);
-
- return ev->type == LLC_STATION_EV_TYPE_PRIM &&
- ev->prim == LLC_DISABLE_PRIM &&
- ev->prim_type == LLC_PRIM_TYPE_REQ ? 0 : 1;
-}
-
-/**
- * llc_station_send_pdu - queues PDU to send
- * @skb: Address of the PDU
- *
- * Queues a PDU to send to the MAC layer.
- */
-static void llc_station_send_pdu(struct sk_buff *skb)
-{
- skb_queue_tail(&llc_main_station.mac_pdu_q, skb);
- while ((skb = skb_dequeue(&llc_main_station.mac_pdu_q)) != NULL)
- if (dev_queue_xmit(skb))
- break;
-}
-
-static int llc_station_ac_start_ack_timer(struct sk_buff *skb)
-{
- mod_timer(&llc_main_station.ack_timer,
- jiffies + sysctl_llc_station_ack_timeout);
- return 0;
-}
-
-static int llc_station_ac_set_retry_cnt_0(struct sk_buff *skb)
-{
- llc_main_station.retry_count = 0;
- return 0;
-}
-
-static int llc_station_ac_inc_retry_cnt_by_1(struct sk_buff *skb)
-{
- llc_main_station.retry_count++;
- return 0;
-}
-
-static int llc_station_ac_set_xid_r_cnt_0(struct sk_buff *skb)
-{
- llc_main_station.xid_r_count = 0;
- return 0;
-}
-
-static int llc_station_ac_inc_xid_r_cnt_by_1(struct sk_buff *skb)
-{
- llc_main_station.xid_r_count++;
- return 0;
-}
-
-static int llc_station_ac_send_null_dsap_xid_c(struct sk_buff *skb)
-{
- int rc = 1;
- struct sk_buff *nskb = llc_alloc_frame(NULL, skb->dev, LLC_PDU_TYPE_U,
- sizeof(struct llc_xid_info));
-
- if (!nskb)
- goto out;
- llc_pdu_header_init(nskb, LLC_PDU_TYPE_U, 0, 0, LLC_PDU_CMD);
- llc_pdu_init_as_xid_cmd(nskb, LLC_XID_NULL_CLASS_2, 127);
- rc = llc_mac_hdr_init(nskb, skb->dev->dev_addr, skb->dev->dev_addr);
- if (unlikely(rc))
- goto free;
- llc_station_send_pdu(nskb);
-out:
- return rc;
-free:
- kfree_skb(skb);
- goto out;
-}
-
static int llc_station_ac_send_xid_r(struct sk_buff *skb)
{
u8 mac_da[ETH_ALEN], dsap;
@@ -289,11 +62,11 @@ static int llc_station_ac_send_xid_r(struct sk_buff *skb)
rc = llc_mac_hdr_init(nskb, skb->dev->dev_addr, mac_da);
if (unlikely(rc))
goto free;
- llc_station_send_pdu(nskb);
+ dev_queue_xmit(nskb);
out:
return rc;
free:
- kfree_skb(skb);
+ kfree_skb(nskb);
goto out;
}
@@ -318,360 +91,14 @@ static int llc_station_ac_send_test_r(struct sk_buff *skb)
rc = llc_mac_hdr_init(nskb, skb->dev->dev_addr, mac_da);
if (unlikely(rc))
goto free;
- llc_station_send_pdu(nskb);
+ dev_queue_xmit(nskb);
out:
return rc;
free:
- kfree_skb(skb);
+ kfree_skb(nskb);
goto out;
}
-static int llc_station_ac_report_status(struct sk_buff *skb)
-{
- return 0;
-}
-
-/* COMMON STATION STATE transitions */
-
-/* dummy last-transition indicator; common to all state transition groups
- * last entry for this state
- * all members are zeros, .bss zeroes it
- */
-static struct llc_station_state_trans llc_stat_state_trans_end;
-
-/* DOWN STATE transitions */
-
-/* state transition for LLC_STATION_EV_ENABLE_WITH_DUP_ADDR_CHECK event */
-static llc_station_action_t llc_stat_down_state_actions_1[] = {
- [0] = llc_station_ac_start_ack_timer,
- [1] = llc_station_ac_set_retry_cnt_0,
- [2] = llc_station_ac_set_xid_r_cnt_0,
- [3] = llc_station_ac_send_null_dsap_xid_c,
- [4] = NULL,
-};
-
-static struct llc_station_state_trans llc_stat_down_state_trans_1 = {
- .ev = llc_stat_ev_enable_with_dup_addr_check,
- .next_state = LLC_STATION_STATE_DUP_ADDR_CHK,
- .ev_actions = llc_stat_down_state_actions_1,
-};
-
-/* state transition for LLC_STATION_EV_ENABLE_WITHOUT_DUP_ADDR_CHECK event */
-static llc_station_action_t llc_stat_down_state_actions_2[] = {
- [0] = llc_station_ac_report_status, /* STATION UP */
- [1] = NULL,
-};
-
-static struct llc_station_state_trans llc_stat_down_state_trans_2 = {
- .ev = llc_stat_ev_enable_without_dup_addr_check,
- .next_state = LLC_STATION_STATE_UP,
- .ev_actions = llc_stat_down_state_actions_2,
-};
-
-/* array of pointers; one to each transition */
-static struct llc_station_state_trans *llc_stat_dwn_state_trans[] = {
- [0] = &llc_stat_down_state_trans_1,
- [1] = &llc_stat_down_state_trans_2,
- [2] = &llc_stat_state_trans_end,
-};
-
-/* UP STATE transitions */
-/* state transition for LLC_STATION_EV_DISABLE_REQ event */
-static llc_station_action_t llc_stat_up_state_actions_1[] = {
- [0] = llc_station_ac_report_status, /* STATION DOWN */
- [1] = NULL,
-};
-
-static struct llc_station_state_trans llc_stat_up_state_trans_1 = {
- .ev = llc_stat_ev_disable_req,
- .next_state = LLC_STATION_STATE_DOWN,
- .ev_actions = llc_stat_up_state_actions_1,
-};
-
-/* state transition for LLC_STATION_EV_RX_NULL_DSAP_XID_C event */
-static llc_station_action_t llc_stat_up_state_actions_2[] = {
- [0] = llc_station_ac_send_xid_r,
- [1] = NULL,
-};
-
-static struct llc_station_state_trans llc_stat_up_state_trans_2 = {
- .ev = llc_stat_ev_rx_null_dsap_xid_c,
- .next_state = LLC_STATION_STATE_UP,
- .ev_actions = llc_stat_up_state_actions_2,
-};
-
-/* state transition for LLC_STATION_EV_RX_NULL_DSAP_TEST_C event */
-static llc_station_action_t llc_stat_up_state_actions_3[] = {
- [0] = llc_station_ac_send_test_r,
- [1] = NULL,
-};
-
-static struct llc_station_state_trans llc_stat_up_state_trans_3 = {
- .ev = llc_stat_ev_rx_null_dsap_test_c,
- .next_state = LLC_STATION_STATE_UP,
- .ev_actions = llc_stat_up_state_actions_3,
-};
-
-/* array of pointers; one to each transition */
-static struct llc_station_state_trans *llc_stat_up_state_trans [] = {
- [0] = &llc_stat_up_state_trans_1,
- [1] = &llc_stat_up_state_trans_2,
- [2] = &llc_stat_up_state_trans_3,
- [3] = &llc_stat_state_trans_end,
-};
-
-/* DUP ADDR CHK STATE transitions */
-/* state transition for LLC_STATION_EV_RX_NULL_DSAP_0_XID_R_XID_R_CNT_EQ
- * event
- */
-static llc_station_action_t llc_stat_dupaddr_state_actions_1[] = {
- [0] = llc_station_ac_inc_xid_r_cnt_by_1,
- [1] = NULL,
-};
-
-static struct llc_station_state_trans llc_stat_dupaddr_state_trans_1 = {
- .ev = llc_stat_ev_rx_null_dsap_0_xid_r_xid_r_cnt_eq,
- .next_state = LLC_STATION_STATE_DUP_ADDR_CHK,
- .ev_actions = llc_stat_dupaddr_state_actions_1,
-};
-
-/* state transition for LLC_STATION_EV_RX_NULL_DSAP_1_XID_R_XID_R_CNT_EQ
- * event
- */
-static llc_station_action_t llc_stat_dupaddr_state_actions_2[] = {
- [0] = llc_station_ac_report_status, /* DUPLICATE ADDRESS FOUND */
- [1] = NULL,
-};
-
-static struct llc_station_state_trans llc_stat_dupaddr_state_trans_2 = {
- .ev = llc_stat_ev_rx_null_dsap_1_xid_r_xid_r_cnt_eq,
- .next_state = LLC_STATION_STATE_DOWN,
- .ev_actions = llc_stat_dupaddr_state_actions_2,
-};
-
-/* state transition for LLC_STATION_EV_RX_NULL_DSAP_XID_C event */
-static llc_station_action_t llc_stat_dupaddr_state_actions_3[] = {
- [0] = llc_station_ac_send_xid_r,
- [1] = NULL,
-};
-
-static struct llc_station_state_trans llc_stat_dupaddr_state_trans_3 = {
- .ev = llc_stat_ev_rx_null_dsap_xid_c,
- .next_state = LLC_STATION_STATE_DUP_ADDR_CHK,
- .ev_actions = llc_stat_dupaddr_state_actions_3,
-};
-
-/* state transition for LLC_STATION_EV_ACK_TMR_EXP_LT_RETRY_CNT_MAX_RETRY
- * event
- */
-static llc_station_action_t llc_stat_dupaddr_state_actions_4[] = {
- [0] = llc_station_ac_start_ack_timer,
- [1] = llc_station_ac_inc_retry_cnt_by_1,
- [2] = llc_station_ac_set_xid_r_cnt_0,
- [3] = llc_station_ac_send_null_dsap_xid_c,
- [4] = NULL,
-};
-
-static struct llc_station_state_trans llc_stat_dupaddr_state_trans_4 = {
- .ev = llc_stat_ev_ack_tmr_exp_lt_retry_cnt_max_retry,
- .next_state = LLC_STATION_STATE_DUP_ADDR_CHK,
- .ev_actions = llc_stat_dupaddr_state_actions_4,
-};
-
-/* state transition for LLC_STATION_EV_ACK_TMR_EXP_EQ_RETRY_CNT_MAX_RETRY
- * event
- */
-static llc_station_action_t llc_stat_dupaddr_state_actions_5[] = {
- [0] = llc_station_ac_report_status, /* STATION UP */
- [1] = NULL,
-};
-
-static struct llc_station_state_trans llc_stat_dupaddr_state_trans_5 = {
- .ev = llc_stat_ev_ack_tmr_exp_eq_retry_cnt_max_retry,
- .next_state = LLC_STATION_STATE_UP,
- .ev_actions = llc_stat_dupaddr_state_actions_5,
-};
-
-/* state transition for LLC_STATION_EV_DISABLE_REQ event */
-static llc_station_action_t llc_stat_dupaddr_state_actions_6[] = {
- [0] = llc_station_ac_report_status, /* STATION DOWN */
- [1] = NULL,
-};
-
-static struct llc_station_state_trans llc_stat_dupaddr_state_trans_6 = {
- .ev = llc_stat_ev_disable_req,
- .next_state = LLC_STATION_STATE_DOWN,
- .ev_actions = llc_stat_dupaddr_state_actions_6,
-};
-
-/* array of pointers; one to each transition */
-static struct llc_station_state_trans *llc_stat_dupaddr_state_trans[] = {
- [0] = &llc_stat_dupaddr_state_trans_6, /* Request */
- [1] = &llc_stat_dupaddr_state_trans_4, /* Timer */
- [2] = &llc_stat_dupaddr_state_trans_5,
- [3] = &llc_stat_dupaddr_state_trans_1, /* Receive frame */
- [4] = &llc_stat_dupaddr_state_trans_2,
- [5] = &llc_stat_dupaddr_state_trans_3,
- [6] = &llc_stat_state_trans_end,
-};
-
-static struct llc_station_state
- llc_station_state_table[LLC_NBR_STATION_STATES] = {
- [LLC_STATION_STATE_DOWN - 1] = {
- .curr_state = LLC_STATION_STATE_DOWN,
- .transitions = llc_stat_dwn_state_trans,
- },
- [LLC_STATION_STATE_DUP_ADDR_CHK - 1] = {
- .curr_state = LLC_STATION_STATE_DUP_ADDR_CHK,
- .transitions = llc_stat_dupaddr_state_trans,
- },
- [LLC_STATION_STATE_UP - 1] = {
- .curr_state = LLC_STATION_STATE_UP,
- .transitions = llc_stat_up_state_trans,
- },
-};
-
-/**
- * llc_exec_station_trans_actions - executes actions for transition
- * @trans: Address of the transition
- * @skb: Address of the event that caused the transition
- *
- * Executes actions of a transition of the station state machine. Returns
- * 0 if all actions complete successfully, nonzero otherwise.
- */
-static u16 llc_exec_station_trans_actions(struct llc_station_state_trans *trans,
- struct sk_buff *skb)
-{
- u16 rc = 0;
- llc_station_action_t *next_action = trans->ev_actions;
-
- for (; next_action && *next_action; next_action++)
- if ((*next_action)(skb))
- rc = 1;
- return rc;
-}
-
-/**
- * llc_find_station_trans - finds transition for this event
- * @skb: Address of the event
- *
- * Search thru events of the current state of the station until list
- * exhausted or it's obvious that the event is not valid for the current
- * state. Returns the address of the transition if cound, %NULL otherwise.
- */
-static struct llc_station_state_trans *
- llc_find_station_trans(struct sk_buff *skb)
-{
- int i = 0;
- struct llc_station_state_trans *rc = NULL;
- struct llc_station_state_trans **next_trans;
- struct llc_station_state *curr_state =
- &llc_station_state_table[llc_main_station.state - 1];
-
- for (next_trans = curr_state->transitions; next_trans[i]->ev; i++)
- if (!next_trans[i]->ev(skb)) {
- rc = next_trans[i];
- break;
- }
- return rc;
-}
-
-/**
- * llc_station_free_ev - frees an event
- * @skb: Address of the event
- *
- * Frees an event.
- */
-static void llc_station_free_ev(struct sk_buff *skb)
-{
- struct llc_station_state_ev *ev = llc_station_ev(skb);
-
- if (ev->type == LLC_STATION_EV_TYPE_PDU)
- kfree_skb(skb);
-}
-
-/**
- * llc_station_next_state - processes event and goes to the next state
- * @skb: Address of the event
- *
- * Processes an event, executes any transitions related to that event and
- * updates the state of the station.
- */
-static u16 llc_station_next_state(struct sk_buff *skb)
-{
- u16 rc = 1;
- struct llc_station_state_trans *trans;
-
- if (llc_main_station.state > LLC_NBR_STATION_STATES)
- goto out;
- trans = llc_find_station_trans(skb);
- if (trans) {
- /* got the state to which we next transition; perform the
- * actions associated with this transition before actually
- * transitioning to the next state
- */
- rc = llc_exec_station_trans_actions(trans, skb);
- if (!rc)
- /* transition station to next state if all actions
- * execute successfully; done; wait for next event
- */
- llc_main_station.state = trans->next_state;
- } else
- /* event not recognized in current state; re-queue it for
- * processing again at a later time; return failure
- */
- rc = 0;
-out:
- llc_station_free_ev(skb);
- return rc;
-}
-
-/**
- * llc_station_service_events - service events in the queue
- *
- * Get an event from the station event queue (if any); attempt to service
- * the event; if event serviced, get the next event (if any) on the event
- * queue; if event not service, re-queue the event on the event queue and
- * attempt to service the next event; when serviced all events in queue,
- * finished; if don't transition to different state, just service all
- * events once; if transition to new state, service all events again.
- * Caller must hold llc_main_station.ev_q.lock.
- */
-static void llc_station_service_events(void)
-{
- struct sk_buff *skb;
-
- while ((skb = skb_dequeue(&llc_main_station.ev_q.list)) != NULL)
- llc_station_next_state(skb);
-}
-
-/**
- * llc_station_state_process - queue event and try to process queue.
- * @skb: Address of the event
- *
- * Queues an event (on the station event queue) for handling by the
- * station state machine and attempts to process any queued-up events.
- */
-static void llc_station_state_process(struct sk_buff *skb)
-{
- spin_lock_bh(&llc_main_station.ev_q.lock);
- skb_queue_tail(&llc_main_station.ev_q.list, skb);
- llc_station_service_events();
- spin_unlock_bh(&llc_main_station.ev_q.lock);
-}
-
-static void llc_station_ack_tmr_cb(unsigned long timeout_data)
-{
- struct sk_buff *skb = alloc_skb(0, GFP_ATOMIC);
-
- if (skb) {
- struct llc_station_state_ev *ev = llc_station_ev(skb);
-
- ev->type = LLC_STATION_EV_TYPE_ACK_TMR;
- llc_station_state_process(skb);
- }
-}
-
/**
* llc_station_rcv - send received pdu to the station state machine
* @skb: received frame.
@@ -680,43 +107,19 @@ static void llc_station_ack_tmr_cb(unsigned long timeout_data)
*/
static void llc_station_rcv(struct sk_buff *skb)
{
- struct llc_station_state_ev *ev = llc_station_ev(skb);
-
- ev->type = LLC_STATION_EV_TYPE_PDU;
- ev->reason = 0;
- llc_station_state_process(skb);
+ if (llc_stat_ev_rx_null_dsap_xid_c(skb))
+ llc_station_ac_send_xid_r(skb);
+ else if (llc_stat_ev_rx_null_dsap_test_c(skb))
+ llc_station_ac_send_test_r(skb);
+ kfree_skb(skb);
}
-int __init llc_station_init(void)
+void __init llc_station_init(void)
{
- int rc = -ENOBUFS;
- struct sk_buff *skb;
- struct llc_station_state_ev *ev;
-
- skb_queue_head_init(&llc_main_station.mac_pdu_q);
- skb_queue_head_init(&llc_main_station.ev_q.list);
- spin_lock_init(&llc_main_station.ev_q.lock);
- setup_timer(&llc_main_station.ack_timer, llc_station_ack_tmr_cb,
- (unsigned long)&llc_main_station);
- llc_main_station.ack_timer.expires = jiffies +
- sysctl_llc_station_ack_timeout;
- skb = alloc_skb(0, GFP_ATOMIC);
- if (!skb)
- goto out;
- rc = 0;
llc_set_station_handler(llc_station_rcv);
- ev = llc_station_ev(skb);
- memset(ev, 0, sizeof(*ev));
- llc_main_station.maximum_retry = 1;
- llc_main_station.state = LLC_STATION_STATE_DOWN;
- ev->type = LLC_STATION_EV_TYPE_SIMPLE;
- ev->prim_type = LLC_STATION_EV_ENABLE_WITHOUT_DUP_ADDR_CHECK;
- rc = llc_station_next_state(skb);
-out:
- return rc;
}
-void __exit llc_station_exit(void)
+void llc_station_exit(void)
{
llc_set_station_handler(NULL);
}
diff --git a/net/llc/sysctl_net_llc.c b/net/llc/sysctl_net_llc.c
index d75306b9c2f3..612a5ddaf93b 100644
--- a/net/llc/sysctl_net_llc.c
+++ b/net/llc/sysctl_net_llc.c
@@ -47,13 +47,6 @@ static struct ctl_table llc2_timeout_table[] = {
};
static struct ctl_table llc_station_table[] = {
- {
- .procname = "ack_timeout",
- .data = &sysctl_llc_station_ack_timeout,
- .maxlen = sizeof(long),
- .mode = 0644,
- .proc_handler = proc_dointvec_jiffies,
- },
{ },
};
diff --git a/net/mac80211/aes_cmac.c b/net/mac80211/aes_cmac.c
index 8dfd70d8fcfb..a04752e91023 100644
--- a/net/mac80211/aes_cmac.c
+++ b/net/mac80211/aes_cmac.c
@@ -38,14 +38,10 @@ static void gf_mulx(u8 *pad)
static void aes_128_cmac_vector(struct crypto_cipher *tfm, size_t num_elem,
const u8 *addr[], const size_t *len, u8 *mac)
{
- u8 scratch[2 * AES_BLOCK_SIZE];
- u8 *cbc, *pad;
+ u8 cbc[AES_BLOCK_SIZE], pad[AES_BLOCK_SIZE];
const u8 *pos, *end;
size_t i, e, left, total_len;
- cbc = scratch;
- pad = scratch + AES_BLOCK_SIZE;
-
memset(cbc, 0, AES_BLOCK_SIZE);
total_len = 0;
diff --git a/net/mac80211/agg-tx.c b/net/mac80211/agg-tx.c
index d0deb3edae21..3195a6307f50 100644
--- a/net/mac80211/agg-tx.c
+++ b/net/mac80211/agg-tx.c
@@ -869,7 +869,7 @@ void ieee80211_process_addba_resp(struct ieee80211_local *local,
} else {
___ieee80211_stop_tx_ba_session(sta, tid, WLAN_BACK_INITIATOR,
- true);
+ false);
}
out:
diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c
index a58c0b649ba1..05f3a313db88 100644
--- a/net/mac80211/cfg.c
+++ b/net/mac80211/cfg.c
@@ -20,7 +20,8 @@
#include "rate.h"
#include "mesh.h"
-static struct wireless_dev *ieee80211_add_iface(struct wiphy *wiphy, char *name,
+static struct wireless_dev *ieee80211_add_iface(struct wiphy *wiphy,
+ const char *name,
enum nl80211_iftype type,
u32 *flags,
struct vif_params *params)
@@ -102,6 +103,18 @@ static int ieee80211_change_iface(struct wiphy *wiphy,
return 0;
}
+static int ieee80211_start_p2p_device(struct wiphy *wiphy,
+ struct wireless_dev *wdev)
+{
+ return ieee80211_do_open(wdev, true);
+}
+
+static void ieee80211_stop_p2p_device(struct wiphy *wiphy,
+ struct wireless_dev *wdev)
+{
+ ieee80211_sdata_stop(IEEE80211_WDEV_TO_SUB_IF(wdev));
+}
+
static int ieee80211_set_noack_map(struct wiphy *wiphy,
struct net_device *dev,
u16 noack_map)
@@ -158,6 +171,38 @@ static int ieee80211_add_key(struct wiphy *wiphy, struct net_device *dev,
}
}
+ switch (sdata->vif.type) {
+ case NL80211_IFTYPE_STATION:
+ if (sdata->u.mgd.mfp != IEEE80211_MFP_DISABLED)
+ key->conf.flags |= IEEE80211_KEY_FLAG_RX_MGMT;
+ break;
+ case NL80211_IFTYPE_AP:
+ case NL80211_IFTYPE_AP_VLAN:
+ /* Keys without a station are used for TX only */
+ if (key->sta && test_sta_flag(key->sta, WLAN_STA_MFP))
+ key->conf.flags |= IEEE80211_KEY_FLAG_RX_MGMT;
+ break;
+ case NL80211_IFTYPE_ADHOC:
+ /* no MFP (yet) */
+ break;
+ case NL80211_IFTYPE_MESH_POINT:
+#ifdef CONFIG_MAC80211_MESH
+ if (sdata->u.mesh.security != IEEE80211_MESH_SEC_NONE)
+ key->conf.flags |= IEEE80211_KEY_FLAG_RX_MGMT;
+ break;
+#endif
+ case NL80211_IFTYPE_WDS:
+ case NL80211_IFTYPE_MONITOR:
+ case NL80211_IFTYPE_P2P_DEVICE:
+ case NL80211_IFTYPE_UNSPECIFIED:
+ case NUM_NL80211_IFTYPES:
+ case NL80211_IFTYPE_P2P_CLIENT:
+ case NL80211_IFTYPE_P2P_GO:
+ /* shouldn't happen */
+ WARN_ON_ONCE(1);
+ break;
+ }
+
err = ieee80211_key_link(key, sdata, sta);
if (err)
ieee80211_key_free(sdata->local, key);
@@ -330,7 +375,7 @@ static void rate_idx_to_bitrate(struct rate_info *rate, struct sta_info *sta, in
if (!(rate->flags & RATE_INFO_FLAGS_MCS)) {
struct ieee80211_supported_band *sband;
sband = sta->local->hw.wiphy->bands[
- sta->local->hw.conf.channel->band];
+ sta->local->oper_channel->band];
rate->legacy = sband->bitrates[idx].bitrate;
} else
rate->mcs = idx;
@@ -725,25 +770,23 @@ static int ieee80211_set_monitor_channel(struct wiphy *wiphy,
static int ieee80211_set_probe_resp(struct ieee80211_sub_if_data *sdata,
const u8 *resp, size_t resp_len)
{
- struct sk_buff *new, *old;
+ struct probe_resp *new, *old;
if (!resp || !resp_len)
return 1;
old = rtnl_dereference(sdata->u.ap.probe_resp);
- new = dev_alloc_skb(resp_len);
+ new = kzalloc(sizeof(struct probe_resp) + resp_len, GFP_KERNEL);
if (!new)
return -ENOMEM;
- memcpy(skb_put(new, resp_len), resp, resp_len);
+ new->len = resp_len;
+ memcpy(new->data, resp, resp_len);
rcu_assign_pointer(sdata->u.ap.probe_resp, new);
- if (old) {
- /* TODO: use call_rcu() */
- synchronize_rcu();
- dev_kfree_skb(old);
- }
+ if (old)
+ kfree_rcu(old, rcu_head);
return 0;
}
@@ -950,7 +993,7 @@ static void ieee80211_send_layer2_update(struct sta_info *sta)
/* 802.2 Type 1 Logical Link Control (LLC) Exchange Identifier (XID)
* Update response frame; IEEE Std 802.2-1998, 5.4.1.2.1 */
- memset(msg->da, 0xff, ETH_ALEN);
+ eth_broadcast_addr(msg->da);
memcpy(msg->sa, sta->sta.addr, ETH_ALEN);
msg->len = htons(6);
msg->dsap = 0;
@@ -1285,9 +1328,10 @@ static int ieee80211_change_station(struct wiphy *wiphy,
mutex_unlock(&local->sta_mtx);
if (sdata->vif.type == NL80211_IFTYPE_STATION &&
- params->sta_flags_mask & BIT(NL80211_STA_FLAG_AUTHORIZED))
+ params->sta_flags_mask & BIT(NL80211_STA_FLAG_AUTHORIZED)) {
ieee80211_recalc_ps(local, -1);
-
+ ieee80211_recalc_ps_vif(sdata);
+ }
return 0;
}
@@ -1660,7 +1704,7 @@ static int ieee80211_change_bss(struct wiphy *wiphy,
}
if (!sdata->vif.bss_conf.use_short_slot &&
- sdata->local->hw.conf.channel->band == IEEE80211_BAND_5GHZ) {
+ sdata->local->oper_channel->band == IEEE80211_BAND_5GHZ) {
sdata->vif.bss_conf.use_short_slot = true;
changed |= BSS_CHANGED_ERP_SLOT;
}
@@ -1774,6 +1818,7 @@ static int ieee80211_scan(struct wiphy *wiphy,
case NL80211_IFTYPE_ADHOC:
case NL80211_IFTYPE_MESH_POINT:
case NL80211_IFTYPE_P2P_CLIENT:
+ case NL80211_IFTYPE_P2P_DEVICE:
break;
case NL80211_IFTYPE_P2P_GO:
if (sdata->local->ops->hw_scan)
@@ -1926,7 +1971,7 @@ static int ieee80211_set_tx_power(struct wiphy *wiphy,
enum nl80211_tx_power_setting type, int mbm)
{
struct ieee80211_local *local = wiphy_priv(wiphy);
- struct ieee80211_channel *chan = local->hw.conf.channel;
+ struct ieee80211_channel *chan = local->oper_channel;
u32 changes = 0;
switch (type) {
@@ -2026,9 +2071,7 @@ int __ieee80211_request_smps(struct ieee80211_sub_if_data *sdata,
*/
if (!sdata->u.mgd.associated ||
sdata->vif.bss_conf.channel_type == NL80211_CHAN_NO_HT) {
- mutex_lock(&sdata->local->iflist_mtx);
ieee80211_recalc_smps(sdata->local);
- mutex_unlock(&sdata->local->iflist_mtx);
return 0;
}
@@ -2078,6 +2121,7 @@ static int ieee80211_set_power_mgmt(struct wiphy *wiphy, struct net_device *dev,
ieee80211_hw_config(local, IEEE80211_CONF_CHANGE_PS);
ieee80211_recalc_ps(local, -1);
+ ieee80211_recalc_ps_vif(sdata);
return 0;
}
@@ -2460,6 +2504,9 @@ static int ieee80211_mgmt_tx(struct wiphy *wiphy, struct wireless_dev *wdev,
if (!sdata->u.mgd.associated)
need_offchan = true;
break;
+ case NL80211_IFTYPE_P2P_DEVICE:
+ need_offchan = true;
+ break;
default:
return -EOPNOTSUPP;
}
@@ -2652,6 +2699,7 @@ ieee80211_prep_tdls_encap_data(struct wiphy *wiphy, struct net_device *dev,
u16 status_code, struct sk_buff *skb)
{
struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev);
+ struct ieee80211_local *local = sdata->local;
struct ieee80211_tdls_data *tf;
tf = (void *)skb_put(skb, offsetof(struct ieee80211_tdls_data, u));
@@ -2671,8 +2719,10 @@ ieee80211_prep_tdls_encap_data(struct wiphy *wiphy, struct net_device *dev,
tf->u.setup_req.capability =
cpu_to_le16(ieee80211_get_tdls_sta_capab(sdata));
- ieee80211_add_srates_ie(sdata, skb, false);
- ieee80211_add_ext_srates_ie(sdata, skb, false);
+ ieee80211_add_srates_ie(sdata, skb, false,
+ local->oper_channel->band);
+ ieee80211_add_ext_srates_ie(sdata, skb, false,
+ local->oper_channel->band);
ieee80211_tdls_add_ext_capab(skb);
break;
case WLAN_TDLS_SETUP_RESPONSE:
@@ -2685,8 +2735,10 @@ ieee80211_prep_tdls_encap_data(struct wiphy *wiphy, struct net_device *dev,
tf->u.setup_resp.capability =
cpu_to_le16(ieee80211_get_tdls_sta_capab(sdata));
- ieee80211_add_srates_ie(sdata, skb, false);
- ieee80211_add_ext_srates_ie(sdata, skb, false);
+ ieee80211_add_srates_ie(sdata, skb, false,
+ local->oper_channel->band);
+ ieee80211_add_ext_srates_ie(sdata, skb, false,
+ local->oper_channel->band);
ieee80211_tdls_add_ext_capab(skb);
break;
case WLAN_TDLS_SETUP_CONFIRM:
@@ -2724,6 +2776,7 @@ ieee80211_prep_tdls_direct(struct wiphy *wiphy, struct net_device *dev,
u16 status_code, struct sk_buff *skb)
{
struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev);
+ struct ieee80211_local *local = sdata->local;
struct ieee80211_mgmt *mgmt;
mgmt = (void *)skb_put(skb, 24);
@@ -2746,8 +2799,10 @@ ieee80211_prep_tdls_direct(struct wiphy *wiphy, struct net_device *dev,
mgmt->u.action.u.tdls_discover_resp.capability =
cpu_to_le16(ieee80211_get_tdls_sta_capab(sdata));
- ieee80211_add_srates_ie(sdata, skb, false);
- ieee80211_add_ext_srates_ie(sdata, skb, false);
+ ieee80211_add_srates_ie(sdata, skb, false,
+ local->oper_channel->band);
+ ieee80211_add_ext_srates_ie(sdata, skb, false,
+ local->oper_channel->band);
ieee80211_tdls_add_ext_capab(skb);
break;
default:
@@ -3004,6 +3059,8 @@ struct cfg80211_ops mac80211_config_ops = {
.add_virtual_intf = ieee80211_add_iface,
.del_virtual_intf = ieee80211_del_iface,
.change_virtual_intf = ieee80211_change_iface,
+ .start_p2p_device = ieee80211_start_p2p_device,
+ .stop_p2p_device = ieee80211_stop_p2p_device,
.add_key = ieee80211_add_key,
.del_key = ieee80211_del_key,
.get_key = ieee80211_get_key,
diff --git a/net/mac80211/chan.c b/net/mac80211/chan.c
index f0f87e5a1d35..0bfc914ddd15 100644
--- a/net/mac80211/chan.c
+++ b/net/mac80211/chan.c
@@ -68,16 +68,14 @@ ieee80211_get_channel_mode(struct ieee80211_local *local,
return mode;
}
-bool ieee80211_set_channel_type(struct ieee80211_local *local,
- struct ieee80211_sub_if_data *sdata,
- enum nl80211_channel_type chantype)
+static enum nl80211_channel_type
+ieee80211_get_superchan(struct ieee80211_local *local,
+ struct ieee80211_sub_if_data *sdata)
{
- struct ieee80211_sub_if_data *tmp;
enum nl80211_channel_type superchan = NL80211_CHAN_NO_HT;
- bool result;
+ struct ieee80211_sub_if_data *tmp;
mutex_lock(&local->iflist_mtx);
-
list_for_each_entry(tmp, &local->interfaces, list) {
if (tmp == sdata)
continue;
@@ -103,39 +101,70 @@ bool ieee80211_set_channel_type(struct ieee80211_local *local,
break;
}
}
+ mutex_unlock(&local->iflist_mtx);
- switch (superchan) {
+ return superchan;
+}
+
+static bool
+ieee80211_channel_types_are_compatible(enum nl80211_channel_type chantype1,
+ enum nl80211_channel_type chantype2,
+ enum nl80211_channel_type *compat)
+{
+ /*
+ * start out with chantype1 being the result,
+ * overwriting later if needed
+ */
+ if (compat)
+ *compat = chantype1;
+
+ switch (chantype1) {
case NL80211_CHAN_NO_HT:
+ if (compat)
+ *compat = chantype2;
+ break;
case NL80211_CHAN_HT20:
/*
* allow any change that doesn't go to no-HT
* (if it already is no-HT no change is needed)
*/
- if (chantype == NL80211_CHAN_NO_HT)
+ if (chantype2 == NL80211_CHAN_NO_HT)
break;
- superchan = chantype;
+ if (compat)
+ *compat = chantype2;
break;
case NL80211_CHAN_HT40PLUS:
case NL80211_CHAN_HT40MINUS:
/* allow smaller bandwidth and same */
- if (chantype == NL80211_CHAN_NO_HT)
+ if (chantype2 == NL80211_CHAN_NO_HT)
break;
- if (chantype == NL80211_CHAN_HT20)
+ if (chantype2 == NL80211_CHAN_HT20)
break;
- if (superchan == chantype)
+ if (chantype2 == chantype1)
break;
- result = false;
- goto out;
+ return false;
}
- local->_oper_channel_type = superchan;
+ return true;
+}
+
+bool ieee80211_set_channel_type(struct ieee80211_local *local,
+ struct ieee80211_sub_if_data *sdata,
+ enum nl80211_channel_type chantype)
+{
+ enum nl80211_channel_type superchan;
+ enum nl80211_channel_type compatchan;
+
+ superchan = ieee80211_get_superchan(local, sdata);
+ if (!ieee80211_channel_types_are_compatible(superchan, chantype,
+ &compatchan))
+ return false;
+
+ local->_oper_channel_type = compatchan;
if (sdata)
sdata->vif.bss_conf.channel_type = chantype;
- result = true;
- out:
- mutex_unlock(&local->iflist_mtx);
+ return true;
- return result;
}
diff --git a/net/mac80211/debugfs.c b/net/mac80211/debugfs.c
index b8dfb440c8ef..466f4b45dd94 100644
--- a/net/mac80211/debugfs.c
+++ b/net/mac80211/debugfs.c
@@ -63,8 +63,6 @@ DEBUGFS_READONLY_FILE(user_power, "%d",
local->user_power_level);
DEBUGFS_READONLY_FILE(power, "%d",
local->hw.conf.power_level);
-DEBUGFS_READONLY_FILE(frequency, "%d",
- local->hw.conf.channel->center_freq);
DEBUGFS_READONLY_FILE(total_ps_buffered, "%d",
local->total_ps_buffered);
DEBUGFS_READONLY_FILE(wep_iv, "%#08x",
@@ -72,6 +70,7 @@ DEBUGFS_READONLY_FILE(wep_iv, "%#08x",
DEBUGFS_READONLY_FILE(rate_ctrl_alg, "%s",
local->rate_ctrl ? local->rate_ctrl->ops->name : "hw/driver");
+#ifdef CONFIG_PM
static ssize_t reset_write(struct file *file, const char __user *user_buf,
size_t count, loff_t *ppos)
{
@@ -90,33 +89,7 @@ static const struct file_operations reset_ops = {
.open = simple_open,
.llseek = noop_llseek,
};
-
-static ssize_t channel_type_read(struct file *file, char __user *user_buf,
- size_t count, loff_t *ppos)
-{
- struct ieee80211_local *local = file->private_data;
- const char *buf;
-
- switch (local->hw.conf.channel_type) {
- case NL80211_CHAN_NO_HT:
- buf = "no ht\n";
- break;
- case NL80211_CHAN_HT20:
- buf = "ht20\n";
- break;
- case NL80211_CHAN_HT40MINUS:
- buf = "ht40-\n";
- break;
- case NL80211_CHAN_HT40PLUS:
- buf = "ht40+\n";
- break;
- default:
- buf = "???";
- break;
- }
-
- return simple_read_from_buffer(user_buf, count, ppos, buf, strlen(buf));
-}
+#endif
static ssize_t hwflags_read(struct file *file, char __user *user_buf,
size_t count, loff_t *ppos)
@@ -205,7 +178,6 @@ static ssize_t queues_read(struct file *file, char __user *user_buf,
}
DEBUGFS_READONLY_FILE_OPS(hwflags);
-DEBUGFS_READONLY_FILE_OPS(channel_type);
DEBUGFS_READONLY_FILE_OPS(queues);
/* statistics stuff */
@@ -272,12 +244,12 @@ void debugfs_hw_add(struct ieee80211_local *local)
local->debugfs.keys = debugfs_create_dir("keys", phyd);
- DEBUGFS_ADD(frequency);
DEBUGFS_ADD(total_ps_buffered);
DEBUGFS_ADD(wep_iv);
DEBUGFS_ADD(queues);
+#ifdef CONFIG_PM
DEBUGFS_ADD_MODE(reset, 0200);
- DEBUGFS_ADD(channel_type);
+#endif
DEBUGFS_ADD(hwflags);
DEBUGFS_ADD(user_power);
DEBUGFS_ADD(power);
diff --git a/net/mac80211/driver-ops.h b/net/mac80211/driver-ops.h
index df9203199102..da9003b20004 100644
--- a/net/mac80211/driver-ops.h
+++ b/net/mac80211/driver-ops.h
@@ -9,7 +9,7 @@ static inline void check_sdata_in_driver(struct ieee80211_sub_if_data *sdata)
{
WARN(!(sdata->flags & IEEE80211_SDATA_IN_DRIVER),
"%s: Failed check-sdata-in-driver check, flags: 0x%x\n",
- sdata->dev->name, sdata->flags);
+ sdata->dev ? sdata->dev->name : sdata->name, sdata->flags);
}
static inline struct ieee80211_sub_if_data *
@@ -22,9 +22,11 @@ get_bss_sdata(struct ieee80211_sub_if_data *sdata)
return sdata;
}
-static inline void drv_tx(struct ieee80211_local *local, struct sk_buff *skb)
+static inline void drv_tx(struct ieee80211_local *local,
+ struct ieee80211_tx_control *control,
+ struct sk_buff *skb)
{
- local->ops->tx(&local->hw, skb);
+ local->ops->tx(&local->hw, control, skb);
}
static inline void drv_get_et_strings(struct ieee80211_sub_if_data *sdata,
@@ -526,6 +528,9 @@ static inline void drv_sta_rc_update(struct ieee80211_local *local,
sdata = get_bss_sdata(sdata);
check_sdata_in_driver(sdata);
+ WARN_ON(changed & IEEE80211_RC_SUPP_RATES_CHANGED &&
+ sdata->vif.type != NL80211_IFTYPE_ADHOC);
+
trace_drv_sta_rc_update(local, sdata, sta, changed);
if (local->ops->sta_rc_update)
local->ops->sta_rc_update(&local->hw, &sdata->vif,
diff --git a/net/mac80211/ibss.c b/net/mac80211/ibss.c
index 5746d62faba1..5f3620f0bc0a 100644
--- a/net/mac80211/ibss.c
+++ b/net/mac80211/ibss.c
@@ -109,7 +109,7 @@ static void __ieee80211_sta_join_ibss(struct ieee80211_sub_if_data *sdata,
memset(mgmt, 0, 24 + sizeof(mgmt->u.beacon));
mgmt->frame_control = cpu_to_le16(IEEE80211_FTYPE_MGMT |
IEEE80211_STYPE_PROBE_RESP);
- memset(mgmt->da, 0xff, ETH_ALEN);
+ eth_broadcast_addr(mgmt->da);
memcpy(mgmt->sa, sdata->vif.addr, ETH_ALEN);
memcpy(mgmt->bssid, ifibss->bssid, ETH_ALEN);
mgmt->u.beacon.beacon_int = cpu_to_le16(beacon_int);
@@ -205,7 +205,7 @@ static void __ieee80211_sta_join_ibss(struct ieee80211_sub_if_data *sdata,
mod_timer(&ifibss->timer,
round_jiffies(jiffies + IEEE80211_IBSS_MERGE_INTERVAL));
- bss = cfg80211_inform_bss_frame(local->hw.wiphy, local->hw.conf.channel,
+ bss = cfg80211_inform_bss_frame(local->hw.wiphy, chan,
mgmt, skb->len, 0, GFP_KERNEL);
cfg80211_put_bss(bss);
netif_carrier_on(sdata->dev);
@@ -278,7 +278,7 @@ static struct sta_info *ieee80211_ibss_finish_sta(struct sta_info *sta,
if (auth && !sdata->u.ibss.auth_frame_registrations) {
ibss_dbg(sdata,
"TX Auth SA=%pM DA=%pM BSSID=%pM (auth_transaction=1)\n",
- sdata->vif.addr, sdata->u.ibss.bssid, addr);
+ sdata->vif.addr, addr, sdata->u.ibss.bssid);
ieee80211_send_auth(sdata, 1, WLAN_AUTH_OPEN, NULL, 0,
addr, sdata->u.ibss.bssid, NULL, 0, 0);
}
@@ -294,7 +294,7 @@ ieee80211_ibss_add_sta(struct ieee80211_sub_if_data *sdata,
struct ieee80211_if_ibss *ifibss = &sdata->u.ibss;
struct ieee80211_local *local = sdata->local;
struct sta_info *sta;
- int band = local->hw.conf.channel->band;
+ int band = local->oper_channel->band;
/*
* XXX: Consider removing the least recently used entry and
@@ -332,11 +332,27 @@ ieee80211_ibss_add_sta(struct ieee80211_sub_if_data *sdata,
return ieee80211_ibss_finish_sta(sta, auth);
}
+static void ieee80211_rx_mgmt_deauth_ibss(struct ieee80211_sub_if_data *sdata,
+ struct ieee80211_mgmt *mgmt,
+ size_t len)
+{
+ u16 reason = le16_to_cpu(mgmt->u.deauth.reason_code);
+
+ if (len < IEEE80211_DEAUTH_FRAME_LEN)
+ return;
+
+ ibss_dbg(sdata, "RX DeAuth SA=%pM DA=%pM BSSID=%pM (reason: %d)\n",
+ mgmt->sa, mgmt->da, mgmt->bssid, reason);
+ sta_info_destroy_addr(sdata, mgmt->sa);
+}
+
static void ieee80211_rx_mgmt_auth_ibss(struct ieee80211_sub_if_data *sdata,
struct ieee80211_mgmt *mgmt,
size_t len)
{
u16 auth_alg, auth_transaction;
+ struct sta_info *sta;
+ u8 deauth_frame_buf[IEEE80211_DEAUTH_FRAME_LEN];
lockdep_assert_held(&sdata->u.ibss.mtx);
@@ -352,10 +368,22 @@ static void ieee80211_rx_mgmt_auth_ibss(struct ieee80211_sub_if_data *sdata,
"RX Auth SA=%pM DA=%pM BSSID=%pM (auth_transaction=%d)\n",
mgmt->sa, mgmt->da, mgmt->bssid, auth_transaction);
sta_info_destroy_addr(sdata, mgmt->sa);
- ieee80211_ibss_add_sta(sdata, mgmt->bssid, mgmt->sa, 0, false);
+ sta = ieee80211_ibss_add_sta(sdata, mgmt->bssid, mgmt->sa, 0, false);
rcu_read_unlock();
/*
+ * if we have any problem in allocating the new station, we reply with a
+ * DEAUTH frame to tell the other end that we had a problem
+ */
+ if (!sta) {
+ ieee80211_send_deauth_disassoc(sdata, sdata->u.ibss.bssid,
+ IEEE80211_STYPE_DEAUTH,
+ WLAN_REASON_UNSPECIFIED, true,
+ deauth_frame_buf);
+ return;
+ }
+
+ /*
* IEEE 802.11 standard does not require authentication in IBSS
* networks and most implementations do not seem to use it.
* However, try to reply to authentication attempts if someone
@@ -459,8 +487,11 @@ static void ieee80211_rx_bss_info(struct ieee80211_sub_if_data *sdata,
}
}
- if (sta && rates_updated)
+ if (sta && rates_updated) {
+ drv_sta_rc_update(local, sdata, &sta->sta,
+ IEEE80211_RC_SUPP_RATES_CHANGED);
rate_control_rate_init(sta);
+ }
rcu_read_unlock();
}
@@ -561,7 +592,7 @@ void ieee80211_ibss_rx_no_sta(struct ieee80211_sub_if_data *sdata,
struct ieee80211_if_ibss *ifibss = &sdata->u.ibss;
struct ieee80211_local *local = sdata->local;
struct sta_info *sta;
- int band = local->hw.conf.channel->band;
+ int band = local->oper_channel->band;
/*
* XXX: Consider removing the least recently used entry and
@@ -759,7 +790,7 @@ static void ieee80211_sta_find_ibss(struct ieee80211_sub_if_data *sdata)
return;
}
sdata_info(sdata, "IBSS not allowed on %d MHz\n",
- local->hw.conf.channel->center_freq);
+ local->oper_channel->center_freq);
/* No IBSS found - decrease scan interval and continue
* scanning. */
@@ -899,6 +930,9 @@ void ieee80211_ibss_rx_queued_mgmt(struct ieee80211_sub_if_data *sdata,
case IEEE80211_STYPE_AUTH:
ieee80211_rx_mgmt_auth_ibss(sdata, mgmt, skb->len);
break;
+ case IEEE80211_STYPE_DEAUTH:
+ ieee80211_rx_mgmt_deauth_ibss(sdata, mgmt, skb->len);
+ break;
}
mgmt_out:
diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h
index bb61f7718c4c..8c804550465b 100644
--- a/net/mac80211/ieee80211_i.h
+++ b/net/mac80211/ieee80211_i.h
@@ -68,6 +68,8 @@ struct ieee80211_local;
#define IEEE80211_DEFAULT_MAX_SP_LEN \
IEEE80211_WMM_IE_STA_QOSINFO_SP_ALL
+#define IEEE80211_DEAUTH_FRAME_LEN (24 /* hdr */ + 2 /* reason */)
+
struct ieee80211_fragment_entry {
unsigned long first_frag_time;
unsigned int seq;
@@ -193,8 +195,6 @@ struct ieee80211_tx_data {
struct sta_info *sta;
struct ieee80211_key *key;
- struct ieee80211_channel *channel;
-
unsigned int flags;
};
@@ -274,9 +274,15 @@ struct beacon_data {
struct rcu_head rcu_head;
};
+struct probe_resp {
+ struct rcu_head rcu_head;
+ int len;
+ u8 data[0];
+};
+
struct ieee80211_if_ap {
struct beacon_data __rcu *beacon;
- struct sk_buff __rcu *probe_resp;
+ struct probe_resp __rcu *probe_resp;
struct list_head vlans;
@@ -359,6 +365,7 @@ enum ieee80211_sta_flags {
IEEE80211_STA_NULLFUNC_ACKED = BIT(8),
IEEE80211_STA_RESET_SIGNAL_AVE = BIT(9),
IEEE80211_STA_DISABLE_40MHZ = BIT(10),
+ IEEE80211_STA_DISABLE_VHT = BIT(11),
};
struct ieee80211_mgd_auth_data {
@@ -406,6 +413,7 @@ struct ieee80211_if_managed {
struct work_struct monitor_work;
struct work_struct chswitch_work;
struct work_struct beacon_connection_loss_work;
+ struct work_struct csa_connection_drop_work;
unsigned long beacon_timeout;
unsigned long probe_timeout;
@@ -965,7 +973,6 @@ struct ieee80211_local {
int scan_channel_idx;
int scan_ies_len;
- struct ieee80211_sched_scan_ies sched_scan_ies;
struct work_struct sched_scan_stopped_work;
struct ieee80211_sub_if_data __rcu *sched_scan_sdata;
@@ -1052,7 +1059,7 @@ struct ieee80211_local {
bool disable_dynamic_ps;
int user_power_level; /* in dBm */
- int power_constr_level; /* in dBm */
+ int ap_power_level; /* in dBm */
enum ieee80211_smps_mode smps_mode;
@@ -1075,6 +1082,8 @@ struct ieee80211_local {
struct idr ack_status_frames;
spinlock_t ack_status_lock;
+ struct ieee80211_sub_if_data __rcu *p2p_sdata;
+
/* dummy netdev for use w/ NAPI */
struct net_device napi_dev;
@@ -1131,7 +1140,7 @@ struct ieee802_11_elems {
u8 *prep;
u8 *perr;
struct ieee80211_rann_ie *rann;
- u8 *ch_switch_elem;
+ struct ieee80211_channel_sw_ie *ch_switch_ie;
u8 *country_elem;
u8 *pwr_constr_elem;
u8 *quiet_elem; /* first quite element */
@@ -1157,9 +1166,7 @@ struct ieee802_11_elems {
u8 preq_len;
u8 prep_len;
u8 perr_len;
- u8 ch_switch_elem_len;
u8 country_elem_len;
- u8 pwr_constr_elem_len;
u8 quiet_elem_len;
u8 num_of_quiet_elem; /* can be more the one */
u8 timeout_int_len;
@@ -1202,6 +1209,7 @@ int ieee80211_mgd_disassoc(struct ieee80211_sub_if_data *sdata,
void ieee80211_send_pspoll(struct ieee80211_local *local,
struct ieee80211_sub_if_data *sdata);
void ieee80211_recalc_ps(struct ieee80211_local *local, s32 latency);
+void ieee80211_recalc_ps_vif(struct ieee80211_sub_if_data *sdata);
int ieee80211_max_network_latency(struct notifier_block *nb,
unsigned long data, void *dummy);
int ieee80211_set_arp_filter(struct ieee80211_sub_if_data *sdata);
@@ -1291,6 +1299,8 @@ void ieee80211_remove_interfaces(struct ieee80211_local *local);
void ieee80211_recalc_idle(struct ieee80211_local *local);
void ieee80211_adjust_monitor_flags(struct ieee80211_sub_if_data *sdata,
const int offset);
+int ieee80211_do_open(struct wireless_dev *wdev, bool coming_up);
+void ieee80211_sdata_stop(struct ieee80211_sub_if_data *sdata);
static inline bool ieee80211_sdata_running(struct ieee80211_sub_if_data *sdata)
{
@@ -1358,7 +1368,6 @@ void ieee80211_process_measurement_req(struct ieee80211_sub_if_data *sdata,
int ieee80211_reconfig(struct ieee80211_local *local);
void ieee80211_stop_device(struct ieee80211_local *local);
-#ifdef CONFIG_PM
int __ieee80211_suspend(struct ieee80211_hw *hw,
struct cfg80211_wowlan *wowlan);
@@ -1372,18 +1381,6 @@ static inline int __ieee80211_resume(struct ieee80211_hw *hw)
return ieee80211_reconfig(hw_to_local(hw));
}
-#else
-static inline int __ieee80211_suspend(struct ieee80211_hw *hw,
- struct cfg80211_wowlan *wowlan)
-{
- return 0;
-}
-
-static inline int __ieee80211_resume(struct ieee80211_hw *hw)
-{
- return 0;
-}
-#endif
/* utility functions/constants */
extern void *mac80211_wiphy_privid; /* for wiphy privid */
@@ -1425,7 +1422,6 @@ void ieee80211_sta_rx_notify(struct ieee80211_sub_if_data *sdata,
struct ieee80211_hdr *hdr);
void ieee80211_sta_tx_notify(struct ieee80211_sub_if_data *sdata,
struct ieee80211_hdr *hdr, bool ack);
-void ieee80211_beacon_connection_loss_work(struct work_struct *work);
void ieee80211_wake_queues_by_reason(struct ieee80211_hw *hw,
enum queue_stop_reason reason);
@@ -1451,19 +1447,24 @@ void ieee80211_send_auth(struct ieee80211_sub_if_data *sdata,
u16 transaction, u16 auth_alg,
u8 *extra, size_t extra_len, const u8 *bssid,
const u8 *da, const u8 *key, u8 key_len, u8 key_idx);
+void ieee80211_send_deauth_disassoc(struct ieee80211_sub_if_data *sdata,
+ const u8 *bssid, u16 stype, u16 reason,
+ bool send_frame, u8 *frame_buf);
int ieee80211_build_preq_ies(struct ieee80211_local *local, u8 *buffer,
const u8 *ie, size_t ie_len,
enum ieee80211_band band, u32 rate_mask,
u8 channel);
struct sk_buff *ieee80211_build_probe_req(struct ieee80211_sub_if_data *sdata,
u8 *dst, u32 ratemask,
+ struct ieee80211_channel *chan,
const u8 *ssid, size_t ssid_len,
const u8 *ie, size_t ie_len,
bool directed);
void ieee80211_send_probe_req(struct ieee80211_sub_if_data *sdata, u8 *dst,
const u8 *ssid, size_t ssid_len,
const u8 *ie, size_t ie_len,
- u32 ratemask, bool directed, bool no_cck);
+ u32 ratemask, bool directed, bool no_cck,
+ struct ieee80211_channel *channel);
void ieee80211_sta_def_wmm_params(struct ieee80211_sub_if_data *sdata,
const size_t supp_rates_len,
@@ -1487,9 +1488,11 @@ u8 *ieee80211_ie_build_ht_oper(u8 *pos, struct ieee80211_sta_ht_cap *ht_cap,
u8 *ieee80211_ie_build_vht_cap(u8 *pos, struct ieee80211_sta_vht_cap *vht_cap,
u32 cap);
int ieee80211_add_srates_ie(struct ieee80211_sub_if_data *sdata,
- struct sk_buff *skb, bool need_basic);
+ struct sk_buff *skb, bool need_basic,
+ enum ieee80211_band band);
int ieee80211_add_ext_srates_ie(struct ieee80211_sub_if_data *sdata,
- struct sk_buff *skb, bool need_basic);
+ struct sk_buff *skb, bool need_basic,
+ enum ieee80211_band band);
/* channel management */
enum ieee80211_chan_mode {
diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c
index bfb57dcc1538..6f8a73c64fb3 100644
--- a/net/mac80211/iface.c
+++ b/net/mac80211/iface.c
@@ -100,6 +100,10 @@ static u32 __ieee80211_recalc_idle(struct ieee80211_local *local)
sdata->vif.bss_conf.idle = true;
continue;
}
+
+ if (sdata->vif.type == NL80211_IFTYPE_P2P_DEVICE)
+ continue;
+
/* count everything else */
sdata->vif.bss_conf.idle = false;
count++;
@@ -121,7 +125,8 @@ static u32 __ieee80211_recalc_idle(struct ieee80211_local *local)
list_for_each_entry(sdata, &local->interfaces, list) {
if (sdata->vif.type == NL80211_IFTYPE_MONITOR ||
- sdata->vif.type == NL80211_IFTYPE_AP_VLAN)
+ sdata->vif.type == NL80211_IFTYPE_AP_VLAN ||
+ sdata->vif.type == NL80211_IFTYPE_P2P_DEVICE)
continue;
if (sdata->old_idle == sdata->vif.bss_conf.idle)
continue;
@@ -204,6 +209,8 @@ static inline int identical_mac_addr_allowed(int type1, int type2)
{
return type1 == NL80211_IFTYPE_MONITOR ||
type2 == NL80211_IFTYPE_MONITOR ||
+ type1 == NL80211_IFTYPE_P2P_DEVICE ||
+ type2 == NL80211_IFTYPE_P2P_DEVICE ||
(type1 == NL80211_IFTYPE_AP && type2 == NL80211_IFTYPE_WDS) ||
(type1 == NL80211_IFTYPE_WDS &&
(type2 == NL80211_IFTYPE_WDS ||
@@ -271,13 +278,15 @@ static int ieee80211_check_queues(struct ieee80211_sub_if_data *sdata)
int n_queues = sdata->local->hw.queues;
int i;
- for (i = 0; i < IEEE80211_NUM_ACS; i++) {
- if (WARN_ON_ONCE(sdata->vif.hw_queue[i] ==
- IEEE80211_INVAL_HW_QUEUE))
- return -EINVAL;
- if (WARN_ON_ONCE(sdata->vif.hw_queue[i] >=
- n_queues))
- return -EINVAL;
+ if (sdata->vif.type != NL80211_IFTYPE_P2P_DEVICE) {
+ for (i = 0; i < IEEE80211_NUM_ACS; i++) {
+ if (WARN_ON_ONCE(sdata->vif.hw_queue[i] ==
+ IEEE80211_INVAL_HW_QUEUE))
+ return -EINVAL;
+ if (WARN_ON_ONCE(sdata->vif.hw_queue[i] >=
+ n_queues))
+ return -EINVAL;
+ }
}
if ((sdata->vif.type != NL80211_IFTYPE_AP) ||
@@ -406,9 +415,10 @@ static void ieee80211_del_virtual_monitor(struct ieee80211_local *local)
* an error on interface type changes that have been pre-checked, so most
* checks should be in ieee80211_check_concurrent_iface.
*/
-static int ieee80211_do_open(struct net_device *dev, bool coming_up)
+int ieee80211_do_open(struct wireless_dev *wdev, bool coming_up)
{
- struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev);
+ struct ieee80211_sub_if_data *sdata = IEEE80211_WDEV_TO_SUB_IF(wdev);
+ struct net_device *dev = wdev->netdev;
struct ieee80211_local *local = sdata->local;
struct sta_info *sta;
u32 changed = 0;
@@ -443,6 +453,7 @@ static int ieee80211_do_open(struct net_device *dev, bool coming_up)
case NL80211_IFTYPE_STATION:
case NL80211_IFTYPE_MONITOR:
case NL80211_IFTYPE_ADHOC:
+ case NL80211_IFTYPE_P2P_DEVICE:
/* no special treatment */
break;
case NL80211_IFTYPE_UNSPECIFIED:
@@ -471,7 +482,7 @@ static int ieee80211_do_open(struct net_device *dev, bool coming_up)
* Copy the hopefully now-present MAC address to
* this interface, if it has the special null one.
*/
- if (is_zero_ether_addr(dev->dev_addr)) {
+ if (dev && is_zero_ether_addr(dev->dev_addr)) {
memcpy(dev->dev_addr,
local->hw.wiphy->perm_addr,
ETH_ALEN);
@@ -536,15 +547,23 @@ static int ieee80211_do_open(struct net_device *dev, bool coming_up)
local->fif_probe_req++;
}
- changed |= ieee80211_reset_erp_info(sdata);
+ if (sdata->vif.type != NL80211_IFTYPE_P2P_DEVICE)
+ changed |= ieee80211_reset_erp_info(sdata);
ieee80211_bss_info_change_notify(sdata, changed);
- if (sdata->vif.type == NL80211_IFTYPE_STATION ||
- sdata->vif.type == NL80211_IFTYPE_ADHOC ||
- sdata->vif.type == NL80211_IFTYPE_AP)
+ switch (sdata->vif.type) {
+ case NL80211_IFTYPE_STATION:
+ case NL80211_IFTYPE_ADHOC:
+ case NL80211_IFTYPE_AP:
+ case NL80211_IFTYPE_MESH_POINT:
netif_carrier_off(dev);
- else
+ break;
+ case NL80211_IFTYPE_WDS:
+ case NL80211_IFTYPE_P2P_DEVICE:
+ break;
+ default:
netif_carrier_on(dev);
+ }
/*
* set default queue parameters so drivers don't
@@ -576,6 +595,9 @@ static int ieee80211_do_open(struct net_device *dev, bool coming_up)
}
rate_control_rate_init(sta);
+ netif_carrier_on(dev);
+ } else if (sdata->vif.type == NL80211_IFTYPE_P2P_DEVICE) {
+ rcu_assign_pointer(local->p2p_sdata, sdata);
}
/*
@@ -601,7 +623,8 @@ static int ieee80211_do_open(struct net_device *dev, bool coming_up)
ieee80211_recalc_ps(local, -1);
- netif_tx_start_all_queues(dev);
+ if (dev)
+ netif_tx_start_all_queues(dev);
return 0;
err_del_interface:
@@ -631,7 +654,7 @@ static int ieee80211_open(struct net_device *dev)
if (err)
return err;
- return ieee80211_do_open(dev, true);
+ return ieee80211_do_open(&sdata->wdev, true);
}
static void ieee80211_do_stop(struct ieee80211_sub_if_data *sdata,
@@ -652,7 +675,8 @@ static void ieee80211_do_stop(struct ieee80211_sub_if_data *sdata,
/*
* Stop TX on this interface first.
*/
- netif_tx_stop_all_queues(sdata->dev);
+ if (sdata->dev)
+ netif_tx_stop_all_queues(sdata->dev);
ieee80211_roc_purge(sdata);
@@ -691,14 +715,16 @@ static void ieee80211_do_stop(struct ieee80211_sub_if_data *sdata,
local->fif_probe_req--;
}
- netif_addr_lock_bh(sdata->dev);
- spin_lock_bh(&local->filter_lock);
- __hw_addr_unsync(&local->mc_list, &sdata->dev->mc,
- sdata->dev->addr_len);
- spin_unlock_bh(&local->filter_lock);
- netif_addr_unlock_bh(sdata->dev);
+ if (sdata->dev) {
+ netif_addr_lock_bh(sdata->dev);
+ spin_lock_bh(&local->filter_lock);
+ __hw_addr_unsync(&local->mc_list, &sdata->dev->mc,
+ sdata->dev->addr_len);
+ spin_unlock_bh(&local->filter_lock);
+ netif_addr_unlock_bh(sdata->dev);
- ieee80211_configure_filter(local);
+ ieee80211_configure_filter(local);
+ }
del_timer_sync(&local->dynamic_ps_timer);
cancel_work_sync(&local->dynamic_ps_enable_work);
@@ -708,7 +734,7 @@ static void ieee80211_do_stop(struct ieee80211_sub_if_data *sdata,
struct ieee80211_sub_if_data *vlan, *tmpsdata;
struct beacon_data *old_beacon =
rtnl_dereference(sdata->u.ap.beacon);
- struct sk_buff *old_probe_resp =
+ struct probe_resp *old_probe_resp =
rtnl_dereference(sdata->u.ap.probe_resp);
/* sdata_running will return false, so this will disable */
@@ -720,7 +746,7 @@ static void ieee80211_do_stop(struct ieee80211_sub_if_data *sdata,
RCU_INIT_POINTER(sdata->u.ap.probe_resp, NULL);
synchronize_rcu();
kfree(old_beacon);
- kfree_skb(old_probe_resp);
+ kfree(old_probe_resp);
/* down all dependent devices, that is VLANs */
list_for_each_entry_safe(vlan, tmpsdata, &sdata->u.ap.vlans,
@@ -759,24 +785,29 @@ static void ieee80211_do_stop(struct ieee80211_sub_if_data *sdata,
ieee80211_adjust_monitor_flags(sdata, -1);
ieee80211_configure_filter(local);
break;
+ case NL80211_IFTYPE_P2P_DEVICE:
+ /* relies on synchronize_rcu() below */
+ rcu_assign_pointer(local->p2p_sdata, NULL);
+ /* fall through */
default:
flush_work(&sdata->work);
/*
* When we get here, the interface is marked down.
- * Call synchronize_rcu() to wait for the RX path
+ * Call rcu_barrier() to wait both for the RX path
* should it be using the interface and enqueuing
- * frames at this very time on another CPU.
+ * frames at this very time on another CPU, and
+ * for the sta free call_rcu callbacks.
*/
- synchronize_rcu();
- skb_queue_purge(&sdata->skb_queue);
+ rcu_barrier();
/*
- * Disable beaconing here for mesh only, AP and IBSS
- * are already taken care of.
+ * free_sta_rcu() enqueues a work for the actual
+ * sta cleanup, so we need to flush it while
+ * sdata is still valid.
*/
- if (sdata->vif.type == NL80211_IFTYPE_MESH_POINT)
- ieee80211_bss_info_change_notify(sdata,
- BSS_CHANGED_BEACON_ENABLED);
+ flush_workqueue(local->workqueue);
+
+ skb_queue_purge(&sdata->skb_queue);
/*
* Free all remaining keys, there shouldn't be any,
@@ -877,9 +908,8 @@ static void ieee80211_set_multicast_list(struct net_device *dev)
* Called when the netdev is removed or, by the code below, before
* the interface type changes.
*/
-static void ieee80211_teardown_sdata(struct net_device *dev)
+static void ieee80211_teardown_sdata(struct ieee80211_sub_if_data *sdata)
{
- struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev);
struct ieee80211_local *local = sdata->local;
int flushed;
int i;
@@ -900,6 +930,11 @@ static void ieee80211_teardown_sdata(struct net_device *dev)
WARN_ON(flushed);
}
+static void ieee80211_uninit(struct net_device *dev)
+{
+ ieee80211_teardown_sdata(IEEE80211_DEV_TO_SUB_IF(dev));
+}
+
static u16 ieee80211_netdev_select_queue(struct net_device *dev,
struct sk_buff *skb)
{
@@ -909,7 +944,7 @@ static u16 ieee80211_netdev_select_queue(struct net_device *dev,
static const struct net_device_ops ieee80211_dataif_ops = {
.ndo_open = ieee80211_open,
.ndo_stop = ieee80211_stop,
- .ndo_uninit = ieee80211_teardown_sdata,
+ .ndo_uninit = ieee80211_uninit,
.ndo_start_xmit = ieee80211_subif_start_xmit,
.ndo_set_rx_mode = ieee80211_set_multicast_list,
.ndo_change_mtu = ieee80211_change_mtu,
@@ -940,7 +975,7 @@ static u16 ieee80211_monitor_select_queue(struct net_device *dev,
static const struct net_device_ops ieee80211_monitorif_ops = {
.ndo_open = ieee80211_open,
.ndo_stop = ieee80211_stop,
- .ndo_uninit = ieee80211_teardown_sdata,
+ .ndo_uninit = ieee80211_uninit,
.ndo_start_xmit = ieee80211_monitor_start_xmit,
.ndo_set_rx_mode = ieee80211_set_multicast_list,
.ndo_change_mtu = ieee80211_change_mtu,
@@ -1099,7 +1134,6 @@ static void ieee80211_setup_sdata(struct ieee80211_sub_if_data *sdata,
/* and set some type-dependent values */
sdata->vif.type = type;
sdata->vif.p2p = false;
- sdata->dev->netdev_ops = &ieee80211_dataif_ops;
sdata->wdev.iftype = type;
sdata->control_port_protocol = cpu_to_be16(ETH_P_PAE);
@@ -1107,8 +1141,11 @@ static void ieee80211_setup_sdata(struct ieee80211_sub_if_data *sdata,
sdata->noack_map = 0;
- /* only monitor differs */
- sdata->dev->type = ARPHRD_ETHER;
+ /* only monitor/p2p-device differ */
+ if (sdata->dev) {
+ sdata->dev->netdev_ops = &ieee80211_dataif_ops;
+ sdata->dev->type = ARPHRD_ETHER;
+ }
skb_queue_head_init(&sdata->skb_queue);
INIT_WORK(&sdata->work, ieee80211_iface_work);
@@ -1146,6 +1183,7 @@ static void ieee80211_setup_sdata(struct ieee80211_sub_if_data *sdata,
break;
case NL80211_IFTYPE_WDS:
case NL80211_IFTYPE_AP_VLAN:
+ case NL80211_IFTYPE_P2P_DEVICE:
break;
case NL80211_IFTYPE_UNSPECIFIED:
case NUM_NL80211_IFTYPES:
@@ -1156,18 +1194,6 @@ static void ieee80211_setup_sdata(struct ieee80211_sub_if_data *sdata,
ieee80211_debugfs_add_netdev(sdata);
}
-static void ieee80211_clean_sdata(struct ieee80211_sub_if_data *sdata)
-{
- switch (sdata->vif.type) {
- case NL80211_IFTYPE_MESH_POINT:
- mesh_path_flush_by_iface(sdata);
- break;
-
- default:
- break;
- }
-}
-
static int ieee80211_runtime_change_iftype(struct ieee80211_sub_if_data *sdata,
enum nl80211_iftype type)
{
@@ -1225,7 +1251,7 @@ static int ieee80211_runtime_change_iftype(struct ieee80211_sub_if_data *sdata,
ieee80211_do_stop(sdata, false);
- ieee80211_teardown_sdata(sdata->dev);
+ ieee80211_teardown_sdata(sdata);
ret = drv_change_interface(local, sdata, internal_type, p2p);
if (ret)
@@ -1240,7 +1266,7 @@ static int ieee80211_runtime_change_iftype(struct ieee80211_sub_if_data *sdata,
ieee80211_setup_sdata(sdata, type);
- err = ieee80211_do_open(sdata->dev, false);
+ err = ieee80211_do_open(&sdata->wdev, false);
WARN(err, "type change: do_open returned %d", err);
return ret;
@@ -1267,14 +1293,14 @@ int ieee80211_if_change_type(struct ieee80211_sub_if_data *sdata,
return ret;
} else {
/* Purge and reset type-dependent state. */
- ieee80211_teardown_sdata(sdata->dev);
+ ieee80211_teardown_sdata(sdata);
ieee80211_setup_sdata(sdata, type);
}
/* reset some values that shouldn't be kept across type changes */
sdata->vif.bss_conf.basic_rates =
ieee80211_mandatory_rates(sdata->local,
- sdata->local->hw.conf.channel->band);
+ sdata->local->oper_channel->band);
sdata->drop_unencrypted = 0;
if (type == NL80211_IFTYPE_STATION)
sdata->u.mgd.use_4addr = false;
@@ -1283,8 +1309,7 @@ int ieee80211_if_change_type(struct ieee80211_sub_if_data *sdata,
}
static void ieee80211_assign_perm_addr(struct ieee80211_local *local,
- struct net_device *dev,
- enum nl80211_iftype type)
+ u8 *perm_addr, enum nl80211_iftype type)
{
struct ieee80211_sub_if_data *sdata;
u64 mask, start, addr, val, inc;
@@ -1293,13 +1318,12 @@ static void ieee80211_assign_perm_addr(struct ieee80211_local *local,
int i;
/* default ... something at least */
- memcpy(dev->perm_addr, local->hw.wiphy->perm_addr, ETH_ALEN);
+ memcpy(perm_addr, local->hw.wiphy->perm_addr, ETH_ALEN);
if (is_zero_ether_addr(local->hw.wiphy->addr_mask) &&
local->hw.wiphy->n_addresses <= 1)
return;
-
mutex_lock(&local->iflist_mtx);
switch (type) {
@@ -1312,11 +1336,24 @@ static void ieee80211_assign_perm_addr(struct ieee80211_local *local,
list_for_each_entry(sdata, &local->interfaces, list) {
if (sdata->vif.type != NL80211_IFTYPE_AP)
continue;
- memcpy(dev->perm_addr, sdata->vif.addr, ETH_ALEN);
+ memcpy(perm_addr, sdata->vif.addr, ETH_ALEN);
break;
}
/* keep default if no AP interface present */
break;
+ case NL80211_IFTYPE_P2P_CLIENT:
+ case NL80211_IFTYPE_P2P_GO:
+ if (local->hw.flags & IEEE80211_HW_P2P_DEV_ADDR_FOR_INTF) {
+ list_for_each_entry(sdata, &local->interfaces, list) {
+ if (sdata->vif.type != NL80211_IFTYPE_P2P_DEVICE)
+ continue;
+ if (!ieee80211_sdata_running(sdata))
+ continue;
+ memcpy(perm_addr, sdata->vif.addr, ETH_ALEN);
+ goto out_unlock;
+ }
+ }
+ /* otherwise fall through */
default:
/* assign a new address if possible -- try n_addresses first */
for (i = 0; i < local->hw.wiphy->n_addresses; i++) {
@@ -1331,7 +1368,7 @@ static void ieee80211_assign_perm_addr(struct ieee80211_local *local,
}
if (!used) {
- memcpy(dev->perm_addr,
+ memcpy(perm_addr,
local->hw.wiphy->addresses[i].addr,
ETH_ALEN);
break;
@@ -1382,7 +1419,7 @@ static void ieee80211_assign_perm_addr(struct ieee80211_local *local,
}
if (!used) {
- memcpy(dev->perm_addr, tmp_addr, ETH_ALEN);
+ memcpy(perm_addr, tmp_addr, ETH_ALEN);
break;
}
addr = (start & ~mask) | (val & mask);
@@ -1391,6 +1428,7 @@ static void ieee80211_assign_perm_addr(struct ieee80211_local *local,
break;
}
+ out_unlock:
mutex_unlock(&local->iflist_mtx);
}
@@ -1398,49 +1436,68 @@ int ieee80211_if_add(struct ieee80211_local *local, const char *name,
struct wireless_dev **new_wdev, enum nl80211_iftype type,
struct vif_params *params)
{
- struct net_device *ndev;
+ struct net_device *ndev = NULL;
struct ieee80211_sub_if_data *sdata = NULL;
int ret, i;
int txqs = 1;
ASSERT_RTNL();
- if (local->hw.queues >= IEEE80211_NUM_ACS)
- txqs = IEEE80211_NUM_ACS;
-
- ndev = alloc_netdev_mqs(sizeof(*sdata) + local->hw.vif_data_size,
- name, ieee80211_if_setup, txqs, 1);
- if (!ndev)
- return -ENOMEM;
- dev_net_set(ndev, wiphy_net(local->hw.wiphy));
-
- ndev->needed_headroom = local->tx_headroom +
- 4*6 /* four MAC addresses */
- + 2 + 2 + 2 + 2 /* ctl, dur, seq, qos */
- + 6 /* mesh */
- + 8 /* rfc1042/bridge tunnel */
- - ETH_HLEN /* ethernet hard_header_len */
- + IEEE80211_ENCRYPT_HEADROOM;
- ndev->needed_tailroom = IEEE80211_ENCRYPT_TAILROOM;
-
- ret = dev_alloc_name(ndev, ndev->name);
- if (ret < 0)
- goto fail;
-
- ieee80211_assign_perm_addr(local, ndev, type);
- memcpy(ndev->dev_addr, ndev->perm_addr, ETH_ALEN);
- SET_NETDEV_DEV(ndev, wiphy_dev(local->hw.wiphy));
-
- /* don't use IEEE80211_DEV_TO_SUB_IF because it checks too much */
- sdata = netdev_priv(ndev);
- ndev->ieee80211_ptr = &sdata->wdev;
- memcpy(sdata->vif.addr, ndev->dev_addr, ETH_ALEN);
- memcpy(sdata->name, ndev->name, IFNAMSIZ);
+ if (type == NL80211_IFTYPE_P2P_DEVICE) {
+ struct wireless_dev *wdev;
+
+ sdata = kzalloc(sizeof(*sdata) + local->hw.vif_data_size,
+ GFP_KERNEL);
+ if (!sdata)
+ return -ENOMEM;
+ wdev = &sdata->wdev;
+
+ sdata->dev = NULL;
+ strlcpy(sdata->name, name, IFNAMSIZ);
+ ieee80211_assign_perm_addr(local, wdev->address, type);
+ memcpy(sdata->vif.addr, wdev->address, ETH_ALEN);
+ } else {
+ if (local->hw.queues >= IEEE80211_NUM_ACS)
+ txqs = IEEE80211_NUM_ACS;
+
+ ndev = alloc_netdev_mqs(sizeof(*sdata) +
+ local->hw.vif_data_size,
+ name, ieee80211_if_setup, txqs, 1);
+ if (!ndev)
+ return -ENOMEM;
+ dev_net_set(ndev, wiphy_net(local->hw.wiphy));
+
+ ndev->needed_headroom = local->tx_headroom +
+ 4*6 /* four MAC addresses */
+ + 2 + 2 + 2 + 2 /* ctl, dur, seq, qos */
+ + 6 /* mesh */
+ + 8 /* rfc1042/bridge tunnel */
+ - ETH_HLEN /* ethernet hard_header_len */
+ + IEEE80211_ENCRYPT_HEADROOM;
+ ndev->needed_tailroom = IEEE80211_ENCRYPT_TAILROOM;
+
+ ret = dev_alloc_name(ndev, ndev->name);
+ if (ret < 0) {
+ free_netdev(ndev);
+ return ret;
+ }
+
+ ieee80211_assign_perm_addr(local, ndev->perm_addr, type);
+ memcpy(ndev->dev_addr, ndev->perm_addr, ETH_ALEN);
+ SET_NETDEV_DEV(ndev, wiphy_dev(local->hw.wiphy));
+
+ /* don't use IEEE80211_DEV_TO_SUB_IF -- it checks too much */
+ sdata = netdev_priv(ndev);
+ ndev->ieee80211_ptr = &sdata->wdev;
+ memcpy(sdata->vif.addr, ndev->dev_addr, ETH_ALEN);
+ memcpy(sdata->name, ndev->name, IFNAMSIZ);
+
+ sdata->dev = ndev;
+ }
/* initialise type-independent data */
sdata->wdev.wiphy = local->hw.wiphy;
sdata->local = local;
- sdata->dev = ndev;
#ifdef CONFIG_INET
sdata->arp_filter_state = true;
#endif
@@ -1469,17 +1526,21 @@ int ieee80211_if_add(struct ieee80211_local *local, const char *name,
/* setup type-dependent data */
ieee80211_setup_sdata(sdata, type);
- if (params) {
- ndev->ieee80211_ptr->use_4addr = params->use_4addr;
- if (type == NL80211_IFTYPE_STATION)
- sdata->u.mgd.use_4addr = params->use_4addr;
- }
+ if (ndev) {
+ if (params) {
+ ndev->ieee80211_ptr->use_4addr = params->use_4addr;
+ if (type == NL80211_IFTYPE_STATION)
+ sdata->u.mgd.use_4addr = params->use_4addr;
+ }
- ndev->features |= local->hw.netdev_features;
+ ndev->features |= local->hw.netdev_features;
- ret = register_netdevice(ndev);
- if (ret)
- goto fail;
+ ret = register_netdevice(ndev);
+ if (ret) {
+ free_netdev(ndev);
+ return ret;
+ }
+ }
mutex_lock(&local->iflist_mtx);
list_add_tail_rcu(&sdata->list, &local->interfaces);
@@ -1489,10 +1550,6 @@ int ieee80211_if_add(struct ieee80211_local *local, const char *name,
*new_wdev = &sdata->wdev;
return 0;
-
- fail:
- free_netdev(ndev);
- return ret;
}
void ieee80211_if_remove(struct ieee80211_sub_if_data *sdata)
@@ -1503,11 +1560,22 @@ void ieee80211_if_remove(struct ieee80211_sub_if_data *sdata)
list_del_rcu(&sdata->list);
mutex_unlock(&sdata->local->iflist_mtx);
- /* clean up type-dependent data */
- ieee80211_clean_sdata(sdata);
-
synchronize_rcu();
- unregister_netdevice(sdata->dev);
+
+ if (sdata->dev) {
+ unregister_netdevice(sdata->dev);
+ } else {
+ cfg80211_unregister_wdev(&sdata->wdev);
+ kfree(sdata);
+ }
+}
+
+void ieee80211_sdata_stop(struct ieee80211_sub_if_data *sdata)
+{
+ if (WARN_ON_ONCE(!test_bit(SDATA_STATE_RUNNING, &sdata->state)))
+ return;
+ ieee80211_do_stop(sdata, true);
+ ieee80211_teardown_sdata(sdata);
}
/*
@@ -1518,6 +1586,7 @@ void ieee80211_remove_interfaces(struct ieee80211_local *local)
{
struct ieee80211_sub_if_data *sdata, *tmp;
LIST_HEAD(unreg_list);
+ LIST_HEAD(wdev_list);
ASSERT_RTNL();
@@ -1525,13 +1594,20 @@ void ieee80211_remove_interfaces(struct ieee80211_local *local)
list_for_each_entry_safe(sdata, tmp, &local->interfaces, list) {
list_del(&sdata->list);
- ieee80211_clean_sdata(sdata);
-
- unregister_netdevice_queue(sdata->dev, &unreg_list);
+ if (sdata->dev)
+ unregister_netdevice_queue(sdata->dev, &unreg_list);
+ else
+ list_add(&sdata->list, &wdev_list);
}
mutex_unlock(&local->iflist_mtx);
unregister_netdevice_many(&unreg_list);
list_del(&unreg_list);
+
+ list_for_each_entry_safe(sdata, tmp, &wdev_list, list) {
+ list_del(&sdata->list);
+ cfg80211_unregister_wdev(&sdata->wdev);
+ kfree(sdata);
+ }
}
static int netdev_notify(struct notifier_block *nb,
diff --git a/net/mac80211/key.c b/net/mac80211/key.c
index 7ae678ba5d67..d27e61aaa71b 100644
--- a/net/mac80211/key.c
+++ b/net/mac80211/key.c
@@ -402,7 +402,7 @@ static void __ieee80211_key_destroy(struct ieee80211_key *key)
* Synchronize so the TX path can no longer be using
* this key before we free/remove it.
*/
- synchronize_rcu();
+ synchronize_net();
if (key->local)
ieee80211_key_disable_hw_accel(key);
diff --git a/net/mac80211/led.c b/net/mac80211/led.c
index 1bf7903496f8..bcffa6903129 100644
--- a/net/mac80211/led.c
+++ b/net/mac80211/led.c
@@ -276,7 +276,7 @@ static void ieee80211_stop_tpt_led_trig(struct ieee80211_local *local)
read_lock(&tpt_trig->trig.leddev_list_lock);
list_for_each_entry(led_cdev, &tpt_trig->trig.led_cdevs, trig_list)
- led_brightness_set(led_cdev, LED_OFF);
+ led_set_brightness(led_cdev, LED_OFF);
read_unlock(&tpt_trig->trig.leddev_list_lock);
}
diff --git a/net/mac80211/main.c b/net/mac80211/main.c
index c26e231c733a..c80c4490351c 100644
--- a/net/mac80211/main.c
+++ b/net/mac80211/main.c
@@ -150,13 +150,11 @@ int ieee80211_hw_config(struct ieee80211_local *local, u32 changed)
if (test_bit(SCAN_SW_SCANNING, &local->scanning) ||
test_bit(SCAN_ONCHANNEL_SCANNING, &local->scanning) ||
- test_bit(SCAN_HW_SCANNING, &local->scanning))
+ test_bit(SCAN_HW_SCANNING, &local->scanning) ||
+ !local->ap_power_level)
power = chan->max_power;
else
- power = local->power_constr_level ?
- min(chan->max_power,
- (chan->max_reg_power - local->power_constr_level)) :
- chan->max_power;
+ power = min(chan->max_power, local->ap_power_level);
if (local->user_power_level >= 0)
power = min(power, local->user_power_level);
@@ -207,6 +205,10 @@ void ieee80211_bss_info_change_notify(struct ieee80211_sub_if_data *sdata,
sdata->vif.bss_conf.bssid = NULL;
else if (ieee80211_vif_is_mesh(&sdata->vif)) {
sdata->vif.bss_conf.bssid = zero;
+ } else if (sdata->vif.type == NL80211_IFTYPE_P2P_DEVICE) {
+ sdata->vif.bss_conf.bssid = sdata->vif.addr;
+ WARN_ONCE(changed & ~(BSS_CHANGED_IDLE),
+ "P2P Device BSS changed %#x", changed);
} else {
WARN_ON(1);
return;
@@ -362,9 +364,7 @@ static void ieee80211_recalc_smps_work(struct work_struct *work)
struct ieee80211_local *local =
container_of(work, struct ieee80211_local, recalc_smps);
- mutex_lock(&local->iflist_mtx);
ieee80211_recalc_smps(local);
- mutex_unlock(&local->iflist_mtx);
}
#ifdef CONFIG_INET
@@ -514,6 +514,11 @@ ieee80211_default_mgmt_stypes[NUM_NL80211_IFTYPES] = {
BIT(IEEE80211_STYPE_AUTH >> 4) |
BIT(IEEE80211_STYPE_DEAUTH >> 4),
},
+ [NL80211_IFTYPE_P2P_DEVICE] = {
+ .tx = 0xffff,
+ .rx = BIT(IEEE80211_STYPE_ACTION >> 4) |
+ BIT(IEEE80211_STYPE_PROBE_REQ >> 4),
+ },
};
static const struct ieee80211_ht_cap mac80211_ht_capa_mod_mask = {
@@ -536,6 +541,11 @@ struct ieee80211_hw *ieee80211_alloc_hw(size_t priv_data_len,
int priv_size, i;
struct wiphy *wiphy;
+ if (WARN_ON(!ops->tx || !ops->start || !ops->stop || !ops->config ||
+ !ops->add_interface || !ops->remove_interface ||
+ !ops->configure_filter))
+ return NULL;
+
if (WARN_ON(ops->sta_state && (ops->sta_add || ops->sta_remove)))
return NULL;
@@ -588,13 +598,6 @@ struct ieee80211_hw *ieee80211_alloc_hw(size_t priv_data_len,
local->hw.priv = (char *)local + ALIGN(sizeof(*local), NETDEV_ALIGN);
- BUG_ON(!ops->tx);
- BUG_ON(!ops->start);
- BUG_ON(!ops->stop);
- BUG_ON(!ops->config);
- BUG_ON(!ops->add_interface);
- BUG_ON(!ops->remove_interface);
- BUG_ON(!ops->configure_filter);
local->ops = ops;
/* set up some defaults */
diff --git a/net/mac80211/mesh.c b/net/mac80211/mesh.c
index 85572353a7e3..ff0296c7bab8 100644
--- a/net/mac80211/mesh.c
+++ b/net/mac80211/mesh.c
@@ -109,11 +109,11 @@ bool mesh_matches_local(struct ieee80211_sub_if_data *sdata,
/* Disallow HT40+/- mismatch */
if (ie->ht_operation &&
- (local->_oper_channel_type == NL80211_CHAN_HT40MINUS ||
- local->_oper_channel_type == NL80211_CHAN_HT40PLUS) &&
+ (sdata->vif.bss_conf.channel_type == NL80211_CHAN_HT40MINUS ||
+ sdata->vif.bss_conf.channel_type == NL80211_CHAN_HT40PLUS) &&
(sta_channel_type == NL80211_CHAN_HT40MINUS ||
sta_channel_type == NL80211_CHAN_HT40PLUS) &&
- local->_oper_channel_type != sta_channel_type)
+ sdata->vif.bss_conf.channel_type != sta_channel_type)
goto mismatch;
return true;
@@ -136,10 +136,13 @@ bool mesh_peer_accepts_plinks(struct ieee802_11_elems *ie)
* mesh_accept_plinks_update - update accepting_plink in local mesh beacons
*
* @sdata: mesh interface in which mesh beacons are going to be updated
+ *
+ * Returns: beacon changed flag if the beacon content changed.
*/
-void mesh_accept_plinks_update(struct ieee80211_sub_if_data *sdata)
+u32 mesh_accept_plinks_update(struct ieee80211_sub_if_data *sdata)
{
bool free_plinks;
+ u32 changed = 0;
/* In case mesh_plink_free_count > 0 and mesh_plinktbl_capacity == 0,
* the mesh interface might be able to establish plinks with peers that
@@ -149,8 +152,12 @@ void mesh_accept_plinks_update(struct ieee80211_sub_if_data *sdata)
*/
free_plinks = mesh_plink_availables(sdata);
- if (free_plinks != sdata->u.mesh.accepting_plinks)
- ieee80211_mesh_housekeeping_timer((unsigned long) sdata);
+ if (free_plinks != sdata->u.mesh.accepting_plinks) {
+ sdata->u.mesh.accepting_plinks = free_plinks;
+ changed = BSS_CHANGED_BEACON;
+ }
+
+ return changed;
}
int mesh_rmc_init(struct ieee80211_sub_if_data *sdata)
@@ -262,7 +269,6 @@ mesh_add_meshconf_ie(struct sk_buff *skb, struct ieee80211_sub_if_data *sdata)
neighbors = (neighbors > 15) ? 15 : neighbors;
*pos++ = neighbors << 1;
/* Mesh capability */
- ifmsh->accepting_plinks = mesh_plink_availables(sdata);
*pos = MESHCONF_CAPAB_FORWARDING;
*pos |= ifmsh->accepting_plinks ?
MESHCONF_CAPAB_ACCEPT_PLINKS : 0x00;
@@ -349,17 +355,18 @@ int mesh_add_ds_params_ie(struct sk_buff *skb,
{
struct ieee80211_local *local = sdata->local;
struct ieee80211_supported_band *sband;
+ struct ieee80211_channel *chan = local->oper_channel;
u8 *pos;
if (skb_tailroom(skb) < 3)
return -ENOMEM;
- sband = local->hw.wiphy->bands[local->hw.conf.channel->band];
+ sband = local->hw.wiphy->bands[chan->band];
if (sband->band == IEEE80211_BAND_2GHZ) {
pos = skb_put(skb, 2 + 1);
*pos++ = WLAN_EID_DS_PARAMS;
*pos++ = 1;
- *pos++ = ieee80211_frequency_to_channel(local->hw.conf.channel->center_freq);
+ *pos++ = ieee80211_frequency_to_channel(chan->center_freq);
}
return 0;
@@ -374,7 +381,7 @@ int mesh_add_ht_cap_ie(struct sk_buff *skb,
sband = local->hw.wiphy->bands[local->oper_channel->band];
if (!sband->ht_cap.ht_supported ||
- local->_oper_channel_type == NL80211_CHAN_NO_HT)
+ sdata->vif.bss_conf.channel_type == NL80211_CHAN_NO_HT)
return 0;
if (skb_tailroom(skb) < 2 + sizeof(struct ieee80211_ht_cap))
@@ -391,7 +398,8 @@ int mesh_add_ht_oper_ie(struct sk_buff *skb,
{
struct ieee80211_local *local = sdata->local;
struct ieee80211_channel *channel = local->oper_channel;
- enum nl80211_channel_type channel_type = local->_oper_channel_type;
+ enum nl80211_channel_type channel_type =
+ sdata->vif.bss_conf.channel_type;
struct ieee80211_supported_band *sband =
local->hw.wiphy->bands[channel->band];
struct ieee80211_sta_ht_cap *ht_cap = &sband->ht_cap;
@@ -521,14 +529,13 @@ int ieee80211_new_mesh_header(struct ieee80211s_hdr *meshhdr,
static void ieee80211_mesh_housekeeping(struct ieee80211_sub_if_data *sdata,
struct ieee80211_if_mesh *ifmsh)
{
- bool free_plinks;
+ u32 changed;
ieee80211_sta_expire(sdata, IEEE80211_MESH_PEER_INACTIVITY_LIMIT);
mesh_path_expire(sdata);
- free_plinks = mesh_plink_availables(sdata);
- if (free_plinks != sdata->u.mesh.accepting_plinks)
- ieee80211_bss_info_change_notify(sdata, BSS_CHANGED_BEACON);
+ changed = mesh_accept_plinks_update(sdata);
+ ieee80211_bss_info_change_notify(sdata, changed);
mod_timer(&ifmsh->housekeeping_timer,
round_jiffies(jiffies + IEEE80211_MESH_HOUSEKEEPING_INTERVAL));
@@ -603,12 +610,14 @@ void ieee80211_start_mesh(struct ieee80211_sub_if_data *sdata)
sdata->vif.bss_conf.beacon_int = MESH_DEFAULT_BEACON_INTERVAL;
sdata->vif.bss_conf.basic_rates =
ieee80211_mandatory_rates(sdata->local,
- sdata->local->hw.conf.channel->band);
+ sdata->local->oper_channel->band);
ieee80211_bss_info_change_notify(sdata, BSS_CHANGED_BEACON |
BSS_CHANGED_BEACON_ENABLED |
BSS_CHANGED_HT |
BSS_CHANGED_BASIC_RATES |
BSS_CHANGED_BEACON_INT);
+
+ netif_carrier_on(sdata->dev);
}
void ieee80211_stop_mesh(struct ieee80211_sub_if_data *sdata)
@@ -616,9 +625,15 @@ void ieee80211_stop_mesh(struct ieee80211_sub_if_data *sdata)
struct ieee80211_local *local = sdata->local;
struct ieee80211_if_mesh *ifmsh = &sdata->u.mesh;
+ netif_carrier_off(sdata->dev);
+
+ /* stop the beacon */
ifmsh->mesh_id_len = 0;
ieee80211_bss_info_change_notify(sdata, BSS_CHANGED_BEACON_ENABLED);
- sta_info_flush(local, NULL);
+
+ /* flush STAs and mpaths on this iface */
+ sta_info_flush(sdata->local, sdata);
+ mesh_path_flush_by_iface(sdata);
del_timer_sync(&sdata->u.mesh.housekeeping_timer);
del_timer_sync(&sdata->u.mesh.mesh_path_root_timer);
diff --git a/net/mac80211/mesh.h b/net/mac80211/mesh.h
index faaa39bcfd10..25d0f17dec71 100644
--- a/net/mac80211/mesh.h
+++ b/net/mac80211/mesh.h
@@ -215,6 +215,9 @@ struct mesh_rmc {
/* Maximum number of paths per interface */
#define MESH_MAX_MPATHS 1024
+/* Number of frames buffered per destination for unresolved destinations */
+#define MESH_FRAME_QUEUE_LEN 10
+
/* Public interfaces */
/* Various */
int ieee80211_fill_mesh_addresses(struct ieee80211_hdr *hdr, __le16 *fc,
@@ -282,7 +285,7 @@ void mesh_neighbour_update(struct ieee80211_sub_if_data *sdata,
u8 *hw_addr,
struct ieee802_11_elems *ie);
bool mesh_peer_accepts_plinks(struct ieee802_11_elems *ie);
-void mesh_accept_plinks_update(struct ieee80211_sub_if_data *sdata);
+u32 mesh_accept_plinks_update(struct ieee80211_sub_if_data *sdata);
void mesh_plink_broken(struct sta_info *sta);
void mesh_plink_deactivate(struct sta_info *sta);
int mesh_plink_open(struct sta_info *sta);
diff --git a/net/mac80211/mesh_hwmp.c b/net/mac80211/mesh_hwmp.c
index 494bc39f61a4..47aeee2d8db1 100644
--- a/net/mac80211/mesh_hwmp.c
+++ b/net/mac80211/mesh_hwmp.c
@@ -17,8 +17,6 @@
#define MAX_METRIC 0xffffffff
#define ARITH_SHIFT 8
-/* Number of frames buffered per destination for unresolved destinations */
-#define MESH_FRAME_QUEUE_LEN 10
#define MAX_PREQ_QUEUE_LEN 64
/* Destination only */
diff --git a/net/mac80211/mesh_pathtbl.c b/net/mac80211/mesh_pathtbl.c
index 075bc535c601..aa749818860e 100644
--- a/net/mac80211/mesh_pathtbl.c
+++ b/net/mac80211/mesh_pathtbl.c
@@ -203,23 +203,17 @@ void mesh_path_assign_nexthop(struct mesh_path *mpath, struct sta_info *sta)
{
struct sk_buff *skb;
struct ieee80211_hdr *hdr;
- struct sk_buff_head tmpq;
unsigned long flags;
rcu_assign_pointer(mpath->next_hop, sta);
- __skb_queue_head_init(&tmpq);
-
spin_lock_irqsave(&mpath->frame_queue.lock, flags);
-
- while ((skb = __skb_dequeue(&mpath->frame_queue)) != NULL) {
+ skb_queue_walk(&mpath->frame_queue, skb) {
hdr = (struct ieee80211_hdr *) skb->data;
memcpy(hdr->addr1, sta->sta.addr, ETH_ALEN);
memcpy(hdr->addr2, mpath->sdata->vif.addr, ETH_ALEN);
- __skb_queue_tail(&tmpq, skb);
}
- skb_queue_splice(&tmpq, &mpath->frame_queue);
spin_unlock_irqrestore(&mpath->frame_queue.lock, flags);
}
@@ -285,40 +279,42 @@ static void mesh_path_move_to_queue(struct mesh_path *gate_mpath,
struct mesh_path *from_mpath,
bool copy)
{
- struct sk_buff *skb, *cp_skb = NULL;
- struct sk_buff_head gateq, failq;
+ struct sk_buff *skb, *fskb, *tmp;
+ struct sk_buff_head failq;
unsigned long flags;
- int num_skbs;
BUG_ON(gate_mpath == from_mpath);
BUG_ON(!gate_mpath->next_hop);
- __skb_queue_head_init(&gateq);
__skb_queue_head_init(&failq);
spin_lock_irqsave(&from_mpath->frame_queue.lock, flags);
skb_queue_splice_init(&from_mpath->frame_queue, &failq);
spin_unlock_irqrestore(&from_mpath->frame_queue.lock, flags);
- num_skbs = skb_queue_len(&failq);
-
- while (num_skbs--) {
- skb = __skb_dequeue(&failq);
- if (copy) {
- cp_skb = skb_copy(skb, GFP_ATOMIC);
- if (cp_skb)
- __skb_queue_tail(&failq, cp_skb);
+ skb_queue_walk_safe(&failq, fskb, tmp) {
+ if (skb_queue_len(&gate_mpath->frame_queue) >=
+ MESH_FRAME_QUEUE_LEN) {
+ mpath_dbg(gate_mpath->sdata, "mpath queue full!\n");
+ break;
}
+ skb = skb_copy(fskb, GFP_ATOMIC);
+ if (WARN_ON(!skb))
+ break;
+
prepare_for_gate(skb, gate_mpath->dst, gate_mpath);
- __skb_queue_tail(&gateq, skb);
+ skb_queue_tail(&gate_mpath->frame_queue, skb);
+
+ if (copy)
+ continue;
+
+ __skb_unlink(fskb, &failq);
+ kfree_skb(fskb);
}
- spin_lock_irqsave(&gate_mpath->frame_queue.lock, flags);
- skb_queue_splice(&gateq, &gate_mpath->frame_queue);
mpath_dbg(gate_mpath->sdata, "Mpath queue for gate %pM has %d frames\n",
gate_mpath->dst, skb_queue_len(&gate_mpath->frame_queue));
- spin_unlock_irqrestore(&gate_mpath->frame_queue.lock, flags);
if (!copy)
return;
@@ -531,7 +527,7 @@ int mesh_path_add(u8 *dst, struct ieee80211_sub_if_data *sdata)
read_lock_bh(&pathtbl_resize_lock);
memcpy(new_mpath->dst, dst, ETH_ALEN);
- memset(new_mpath->rann_snd_addr, 0xff, ETH_ALEN);
+ eth_broadcast_addr(new_mpath->rann_snd_addr);
new_mpath->is_root = false;
new_mpath->sdata = sdata;
new_mpath->flags = 0;
diff --git a/net/mac80211/mesh_plink.c b/net/mac80211/mesh_plink.c
index af671b984df3..3ab34d816897 100644
--- a/net/mac80211/mesh_plink.c
+++ b/net/mac80211/mesh_plink.c
@@ -48,17 +48,17 @@ static int mesh_plink_frame_tx(struct ieee80211_sub_if_data *sdata,
u8 *da, __le16 llid, __le16 plid, __le16 reason);
static inline
-void mesh_plink_inc_estab_count(struct ieee80211_sub_if_data *sdata)
+u32 mesh_plink_inc_estab_count(struct ieee80211_sub_if_data *sdata)
{
atomic_inc(&sdata->u.mesh.mshstats.estab_plinks);
- mesh_accept_plinks_update(sdata);
+ return mesh_accept_plinks_update(sdata);
}
static inline
-void mesh_plink_dec_estab_count(struct ieee80211_sub_if_data *sdata)
+u32 mesh_plink_dec_estab_count(struct ieee80211_sub_if_data *sdata)
{
atomic_dec(&sdata->u.mesh.mshstats.estab_plinks);
- mesh_accept_plinks_update(sdata);
+ return mesh_accept_plinks_update(sdata);
}
/**
@@ -117,7 +117,7 @@ static u32 mesh_set_ht_prot_mode(struct ieee80211_sub_if_data *sdata)
u16 ht_opmode;
bool non_ht_sta = false, ht20_sta = false;
- if (local->_oper_channel_type == NL80211_CHAN_NO_HT)
+ if (sdata->vif.bss_conf.channel_type == NL80211_CHAN_NO_HT)
return 0;
rcu_read_lock();
@@ -147,7 +147,8 @@ out:
if (non_ht_sta)
ht_opmode = IEEE80211_HT_OP_MODE_PROTECTION_NONHT_MIXED;
- else if (ht20_sta && local->_oper_channel_type > NL80211_CHAN_HT20)
+ else if (ht20_sta &&
+ sdata->vif.bss_conf.channel_type > NL80211_CHAN_HT20)
ht_opmode = IEEE80211_HT_OP_MODE_PROTECTION_20MHZ;
else
ht_opmode = IEEE80211_HT_OP_MODE_PROTECTION_NONE;
@@ -170,22 +171,21 @@ out:
* @sta: mesh peer link to deactivate
*
* All mesh paths with this peer as next hop will be flushed
+ * Returns beacon changed flag if the beacon content changed.
*
* Locking: the caller must hold sta->lock
*/
-static bool __mesh_plink_deactivate(struct sta_info *sta)
+static u32 __mesh_plink_deactivate(struct sta_info *sta)
{
struct ieee80211_sub_if_data *sdata = sta->sdata;
- bool deactivated = false;
+ u32 changed = 0;
- if (sta->plink_state == NL80211_PLINK_ESTAB) {
- mesh_plink_dec_estab_count(sdata);
- deactivated = true;
- }
+ if (sta->plink_state == NL80211_PLINK_ESTAB)
+ changed = mesh_plink_dec_estab_count(sdata);
sta->plink_state = NL80211_PLINK_BLOCKED;
mesh_path_flush_by_nexthop(sta);
- return deactivated;
+ return changed;
}
/**
@@ -198,18 +198,17 @@ static bool __mesh_plink_deactivate(struct sta_info *sta)
void mesh_plink_deactivate(struct sta_info *sta)
{
struct ieee80211_sub_if_data *sdata = sta->sdata;
- bool deactivated;
+ u32 changed;
spin_lock_bh(&sta->lock);
- deactivated = __mesh_plink_deactivate(sta);
+ changed = __mesh_plink_deactivate(sta);
sta->reason = cpu_to_le16(WLAN_REASON_MESH_PEER_CANCELED);
mesh_plink_frame_tx(sdata, WLAN_SP_MESH_PEERING_CLOSE,
sta->sta.addr, sta->llid, sta->plid,
sta->reason);
spin_unlock_bh(&sta->lock);
- if (deactivated)
- ieee80211_bss_info_change_notify(sdata, BSS_CHANGED_BEACON);
+ ieee80211_bss_info_change_notify(sdata, changed);
}
static int mesh_plink_frame_tx(struct ieee80211_sub_if_data *sdata,
@@ -217,12 +216,14 @@ static int mesh_plink_frame_tx(struct ieee80211_sub_if_data *sdata,
u8 *da, __le16 llid, __le16 plid, __le16 reason) {
struct ieee80211_local *local = sdata->local;
struct sk_buff *skb;
+ struct ieee80211_tx_info *info;
struct ieee80211_mgmt *mgmt;
bool include_plid = false;
u16 peering_proto = 0;
u8 *pos, ie_len = 4;
int hdr_len = offsetof(struct ieee80211_mgmt, u.action.u.self_prot) +
sizeof(mgmt->u.action.u.self_prot);
+ int err = -ENOMEM;
skb = dev_alloc_skb(local->tx_headroom +
hdr_len +
@@ -238,6 +239,7 @@ static int mesh_plink_frame_tx(struct ieee80211_sub_if_data *sdata,
sdata->u.mesh.ie_len);
if (!skb)
return -1;
+ info = IEEE80211_SKB_CB(skb);
skb_reserve(skb, local->tx_headroom);
mgmt = (struct ieee80211_mgmt *) skb_put(skb, hdr_len);
memset(mgmt, 0, hdr_len);
@@ -258,15 +260,18 @@ static int mesh_plink_frame_tx(struct ieee80211_sub_if_data *sdata,
pos = skb_put(skb, 2);
memcpy(pos + 2, &plid, 2);
}
- if (ieee80211_add_srates_ie(sdata, skb, true) ||
- ieee80211_add_ext_srates_ie(sdata, skb, true) ||
+ if (ieee80211_add_srates_ie(sdata, skb, true,
+ local->oper_channel->band) ||
+ ieee80211_add_ext_srates_ie(sdata, skb, true,
+ local->oper_channel->band) ||
mesh_add_rsn_ie(skb, sdata) ||
mesh_add_meshid_ie(skb, sdata) ||
mesh_add_meshconf_ie(skb, sdata))
- return -1;
+ goto free;
} else { /* WLAN_SP_MESH_PEERING_CLOSE */
+ info->flags |= IEEE80211_TX_CTL_NO_ACK;
if (mesh_add_meshid_ie(skb, sdata))
- return -1;
+ goto free;
}
/* Add Mesh Peering Management element */
@@ -285,11 +290,12 @@ static int mesh_plink_frame_tx(struct ieee80211_sub_if_data *sdata,
ie_len += 2; /* reason code */
break;
default:
- return -EINVAL;
+ err = -EINVAL;
+ goto free;
}
if (WARN_ON(skb_tailroom(skb) < 2 + ie_len))
- return -ENOMEM;
+ goto free;
pos = skb_put(skb, 2 + ie_len);
*pos++ = WLAN_EID_PEER_MGMT;
@@ -310,14 +316,17 @@ static int mesh_plink_frame_tx(struct ieee80211_sub_if_data *sdata,
if (action != WLAN_SP_MESH_PEERING_CLOSE) {
if (mesh_add_ht_cap_ie(skb, sdata) ||
mesh_add_ht_oper_ie(skb, sdata))
- return -1;
+ goto free;
}
if (mesh_add_vendor_ies(skb, sdata))
- return -1;
+ goto free;
ieee80211_tx_skb(sdata, skb);
return 0;
+free:
+ kfree_skb(skb);
+ return err;
}
/**
@@ -362,9 +371,14 @@ static struct sta_info *mesh_peer_init(struct ieee80211_sub_if_data *sdata,
spin_lock_bh(&sta->lock);
sta->last_rx = jiffies;
+ if (sta->plink_state == NL80211_PLINK_ESTAB) {
+ spin_unlock_bh(&sta->lock);
+ return sta;
+ }
+
sta->sta.supp_rates[band] = rates;
if (elems->ht_cap_elem &&
- sdata->local->_oper_channel_type != NL80211_CHAN_NO_HT)
+ sdata->vif.bss_conf.channel_type != NL80211_CHAN_NO_HT)
ieee80211_ht_cap_ie_to_sta_ht_cap(sdata, sband,
elems->ht_cap_elem,
&sta->sta.ht_cap);
@@ -523,7 +537,8 @@ int mesh_plink_open(struct sta_info *sta)
spin_lock_bh(&sta->lock);
get_random_bytes(&llid, 2);
sta->llid = llid;
- if (sta->plink_state != NL80211_PLINK_LISTEN) {
+ if (sta->plink_state != NL80211_PLINK_LISTEN &&
+ sta->plink_state != NL80211_PLINK_BLOCKED) {
spin_unlock_bh(&sta->lock);
return -EBUSY;
}
@@ -541,15 +556,14 @@ int mesh_plink_open(struct sta_info *sta)
void mesh_plink_block(struct sta_info *sta)
{
struct ieee80211_sub_if_data *sdata = sta->sdata;
- bool deactivated;
+ u32 changed;
spin_lock_bh(&sta->lock);
- deactivated = __mesh_plink_deactivate(sta);
+ changed = __mesh_plink_deactivate(sta);
sta->plink_state = NL80211_PLINK_BLOCKED;
spin_unlock_bh(&sta->lock);
- if (deactivated)
- ieee80211_bss_info_change_notify(sdata, BSS_CHANGED_BEACON);
+ ieee80211_bss_info_change_notify(sdata, changed);
}
@@ -852,9 +866,8 @@ void mesh_rx_plink_frame(struct ieee80211_sub_if_data *sdata, struct ieee80211_m
del_timer(&sta->plink_timer);
sta->plink_state = NL80211_PLINK_ESTAB;
spin_unlock_bh(&sta->lock);
- mesh_plink_inc_estab_count(sdata);
+ changed |= mesh_plink_inc_estab_count(sdata);
changed |= mesh_set_ht_prot_mode(sdata);
- changed |= BSS_CHANGED_BEACON;
mpl_dbg(sdata, "Mesh plink with %pM ESTABLISHED\n",
sta->sta.addr);
break;
@@ -888,9 +901,8 @@ void mesh_rx_plink_frame(struct ieee80211_sub_if_data *sdata, struct ieee80211_m
del_timer(&sta->plink_timer);
sta->plink_state = NL80211_PLINK_ESTAB;
spin_unlock_bh(&sta->lock);
- mesh_plink_inc_estab_count(sdata);
+ changed |= mesh_plink_inc_estab_count(sdata);
changed |= mesh_set_ht_prot_mode(sdata);
- changed |= BSS_CHANGED_BEACON;
mpl_dbg(sdata, "Mesh plink with %pM ESTABLISHED\n",
sta->sta.addr);
mesh_plink_frame_tx(sdata,
@@ -908,13 +920,12 @@ void mesh_rx_plink_frame(struct ieee80211_sub_if_data *sdata, struct ieee80211_m
case CLS_ACPT:
reason = cpu_to_le16(WLAN_REASON_MESH_CLOSE);
sta->reason = reason;
- __mesh_plink_deactivate(sta);
+ changed |= __mesh_plink_deactivate(sta);
sta->plink_state = NL80211_PLINK_HOLDING;
llid = sta->llid;
mod_plink_timer(sta, dot11MeshHoldingTimeout(sdata));
spin_unlock_bh(&sta->lock);
changed |= mesh_set_ht_prot_mode(sdata);
- changed |= BSS_CHANGED_BEACON;
mesh_plink_frame_tx(sdata, WLAN_SP_MESH_PEERING_CLOSE,
sta->sta.addr, llid, plid, reason);
break;
diff --git a/net/mac80211/mesh_sync.c b/net/mac80211/mesh_sync.c
index accfa00ffcdf..a16b7b4b1e02 100644
--- a/net/mac80211/mesh_sync.c
+++ b/net/mac80211/mesh_sync.c
@@ -56,7 +56,6 @@ void mesh_sync_adjust_tbtt(struct ieee80211_sub_if_data *sdata)
u64 tsfdelta;
spin_lock_bh(&ifmsh->sync_offset_lock);
-
if (ifmsh->sync_offset_clockdrift_max < beacon_int_fraction) {
msync_dbg(sdata, "TBTT : max clockdrift=%lld; adjusting\n",
(long long) ifmsh->sync_offset_clockdrift_max);
@@ -69,11 +68,11 @@ void mesh_sync_adjust_tbtt(struct ieee80211_sub_if_data *sdata)
tsfdelta = -beacon_int_fraction;
ifmsh->sync_offset_clockdrift_max -= beacon_int_fraction;
}
+ spin_unlock_bh(&ifmsh->sync_offset_lock);
tsf = drv_get_tsf(local, sdata);
if (tsf != -1ULL)
drv_set_tsf(local, sdata, tsf + tsfdelta);
- spin_unlock_bh(&ifmsh->sync_offset_lock);
}
static void mesh_sync_offset_rx_bcn_presp(struct ieee80211_sub_if_data *sdata,
diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c
index f76b83341cf9..e714ed8bb198 100644
--- a/net/mac80211/mlme.c
+++ b/net/mac80211/mlme.c
@@ -88,8 +88,6 @@ MODULE_PARM_DESC(probe_wait_ms,
#define TMR_RUNNING_TIMER 0
#define TMR_RUNNING_CHANSW 1
-#define DEAUTH_DISASSOC_LEN (24 /* hdr */ + 2 /* reason */)
-
/*
* All cfg80211 functions have to be called outside a locked
* section so that they can acquire a lock themselves... This
@@ -146,6 +144,9 @@ void ieee80211_sta_reset_beacon_monitor(struct ieee80211_sub_if_data *sdata)
if (sdata->vif.driver_flags & IEEE80211_VIF_BEACON_FILTER)
return;
+ if (sdata->local->hw.flags & IEEE80211_HW_CONNECTION_MONITOR)
+ return;
+
mod_timer(&sdata->u.mgd.bcn_mon_timer,
round_jiffies_up(jiffies + sdata->u.mgd.beacon_timeout));
}
@@ -182,15 +183,15 @@ static u32 ieee80211_config_ht_tx(struct ieee80211_sub_if_data *sdata,
u16 ht_opmode;
bool disable_40 = false;
- sband = local->hw.wiphy->bands[local->hw.conf.channel->band];
+ sband = local->hw.wiphy->bands[local->oper_channel->band];
switch (sdata->vif.bss_conf.channel_type) {
case NL80211_CHAN_HT40PLUS:
- if (local->hw.conf.channel->flags & IEEE80211_CHAN_NO_HT40PLUS)
+ if (local->oper_channel->flags & IEEE80211_CHAN_NO_HT40PLUS)
disable_40 = true;
break;
case NL80211_CHAN_HT40MINUS:
- if (local->hw.conf.channel->flags & IEEE80211_CHAN_NO_HT40MINUS)
+ if (local->oper_channel->flags & IEEE80211_CHAN_NO_HT40MINUS)
disable_40 = true;
break;
default:
@@ -326,6 +327,26 @@ static void ieee80211_add_ht_ie(struct ieee80211_sub_if_data *sdata,
ieee80211_ie_build_ht_cap(pos, &ht_cap, cap);
}
+static void ieee80211_add_vht_ie(struct ieee80211_sub_if_data *sdata,
+ struct sk_buff *skb,
+ struct ieee80211_supported_band *sband)
+{
+ u8 *pos;
+ u32 cap;
+ struct ieee80211_sta_vht_cap vht_cap;
+
+ BUILD_BUG_ON(sizeof(vht_cap) != sizeof(sband->vht_cap));
+
+ memcpy(&vht_cap, &sband->vht_cap, sizeof(vht_cap));
+
+ /* determine capability flags */
+ cap = vht_cap.cap;
+
+ /* reserve and fill IE */
+ pos = skb_put(skb, sizeof(struct ieee80211_vht_capabilities) + 2);
+ ieee80211_ie_build_vht_cap(pos, &vht_cap, cap);
+}
+
static void ieee80211_send_assoc(struct ieee80211_sub_if_data *sdata)
{
struct ieee80211_local *local = sdata->local;
@@ -371,6 +392,7 @@ static void ieee80211_send_assoc(struct ieee80211_sub_if_data *sdata)
4 + /* power capability */
2 + 2 * sband->n_channels + /* supported channels */
2 + sizeof(struct ieee80211_ht_cap) + /* HT */
+ 2 + sizeof(struct ieee80211_vht_capabilities) + /* VHT */
assoc_data->ie_len + /* extra IEs */
9, /* WMM */
GFP_KERNEL);
@@ -503,6 +525,9 @@ static void ieee80211_send_assoc(struct ieee80211_sub_if_data *sdata)
ieee80211_add_ht_ie(sdata, skb, assoc_data->ap_ht_param,
sband, local->oper_channel, ifmgd->ap_smps);
+ if (!(ifmgd->flags & IEEE80211_STA_DISABLE_VHT))
+ ieee80211_add_vht_ie(sdata, skb, sband);
+
/* if present, add any custom non-vendor IEs that go after HT */
if (assoc_data->ie_len && assoc_data->ie) {
noffset = ieee80211_ie_split_vendor(assoc_data->ie,
@@ -547,48 +572,6 @@ static void ieee80211_send_assoc(struct ieee80211_sub_if_data *sdata)
ieee80211_tx_skb(sdata, skb);
}
-static void ieee80211_send_deauth_disassoc(struct ieee80211_sub_if_data *sdata,
- const u8 *bssid, u16 stype,
- u16 reason, bool send_frame,
- u8 *frame_buf)
-{
- struct ieee80211_local *local = sdata->local;
- struct ieee80211_if_managed *ifmgd = &sdata->u.mgd;
- struct sk_buff *skb;
- struct ieee80211_mgmt *mgmt = (void *)frame_buf;
-
- /* build frame */
- mgmt->frame_control = cpu_to_le16(IEEE80211_FTYPE_MGMT | stype);
- mgmt->duration = 0; /* initialize only */
- mgmt->seq_ctrl = 0; /* initialize only */
- memcpy(mgmt->da, bssid, ETH_ALEN);
- memcpy(mgmt->sa, sdata->vif.addr, ETH_ALEN);
- memcpy(mgmt->bssid, bssid, ETH_ALEN);
- /* u.deauth.reason_code == u.disassoc.reason_code */
- mgmt->u.deauth.reason_code = cpu_to_le16(reason);
-
- if (send_frame) {
- skb = dev_alloc_skb(local->hw.extra_tx_headroom +
- DEAUTH_DISASSOC_LEN);
- if (!skb)
- return;
-
- skb_reserve(skb, local->hw.extra_tx_headroom);
-
- /* copy in frame */
- memcpy(skb_put(skb, DEAUTH_DISASSOC_LEN),
- mgmt, DEAUTH_DISASSOC_LEN);
-
- if (!(ifmgd->flags & IEEE80211_STA_MFP_ENABLED))
- IEEE80211_SKB_CB(skb)->flags |=
- IEEE80211_TX_INTFL_DONT_ENCRYPT;
-
- drv_mgd_prepare_tx(local, sdata);
-
- ieee80211_tx_skb(sdata, skb);
- }
-}
-
void ieee80211_send_pspoll(struct ieee80211_local *local,
struct ieee80211_sub_if_data *sdata)
{
@@ -687,6 +670,7 @@ static void ieee80211_chswitch_work(struct work_struct *work)
/* XXX: shouldn't really modify cfg80211-owned data! */
ifmgd->associated->channel = sdata->local->oper_channel;
+ /* XXX: wait for a beacon first? */
ieee80211_wake_queues_by_reason(&sdata->local->hw,
IEEE80211_QUEUE_STOP_REASON_CSA);
out:
@@ -704,16 +688,13 @@ void ieee80211_chswitch_done(struct ieee80211_vif *vif, bool success)
trace_api_chswitch_done(sdata, success);
if (!success) {
- /*
- * If the channel switch was not successful, stay
- * around on the old channel. We currently lack
- * good handling of this situation, possibly we
- * should just drop the association.
- */
- sdata->local->csa_channel = sdata->local->oper_channel;
+ sdata_info(sdata,
+ "driver channel switch failed, disconnecting\n");
+ ieee80211_queue_work(&sdata->local->hw,
+ &ifmgd->csa_connection_drop_work);
+ } else {
+ ieee80211_queue_work(&sdata->local->hw, &ifmgd->chswitch_work);
}
-
- ieee80211_queue_work(&sdata->local->hw, &ifmgd->chswitch_work);
}
EXPORT_SYMBOL(ieee80211_chswitch_done);
@@ -758,61 +739,111 @@ void ieee80211_sta_process_chanswitch(struct ieee80211_sub_if_data *sdata,
return;
new_ch = ieee80211_get_channel(sdata->local->hw.wiphy, new_freq);
- if (!new_ch || new_ch->flags & IEEE80211_CHAN_DISABLED)
+ if (!new_ch || new_ch->flags & IEEE80211_CHAN_DISABLED) {
+ sdata_info(sdata,
+ "AP %pM switches to unsupported channel (%d MHz), disconnecting\n",
+ ifmgd->associated->bssid, new_freq);
+ ieee80211_queue_work(&sdata->local->hw,
+ &ifmgd->csa_connection_drop_work);
return;
+ }
sdata->local->csa_channel = new_ch;
+ ifmgd->flags |= IEEE80211_STA_CSA_RECEIVED;
+
+ if (sw_elem->mode)
+ ieee80211_stop_queues_by_reason(&sdata->local->hw,
+ IEEE80211_QUEUE_STOP_REASON_CSA);
+
if (sdata->local->ops->channel_switch) {
/* use driver's channel switch callback */
- struct ieee80211_channel_switch ch_switch;
- memset(&ch_switch, 0, sizeof(ch_switch));
- ch_switch.timestamp = timestamp;
- if (sw_elem->mode) {
- ch_switch.block_tx = true;
- ieee80211_stop_queues_by_reason(&sdata->local->hw,
- IEEE80211_QUEUE_STOP_REASON_CSA);
- }
- ch_switch.channel = new_ch;
- ch_switch.count = sw_elem->count;
- ifmgd->flags |= IEEE80211_STA_CSA_RECEIVED;
+ struct ieee80211_channel_switch ch_switch = {
+ .timestamp = timestamp,
+ .block_tx = sw_elem->mode,
+ .channel = new_ch,
+ .count = sw_elem->count,
+ };
+
drv_channel_switch(sdata->local, &ch_switch);
return;
}
/* channel switch handled in software */
- if (sw_elem->count <= 1) {
+ if (sw_elem->count <= 1)
ieee80211_queue_work(&sdata->local->hw, &ifmgd->chswitch_work);
- } else {
- if (sw_elem->mode)
- ieee80211_stop_queues_by_reason(&sdata->local->hw,
- IEEE80211_QUEUE_STOP_REASON_CSA);
- ifmgd->flags |= IEEE80211_STA_CSA_RECEIVED;
+ else
mod_timer(&ifmgd->chswitch_timer,
- jiffies +
- msecs_to_jiffies(sw_elem->count *
- cbss->beacon_interval));
- }
+ TU_TO_EXP_TIME(sw_elem->count *
+ cbss->beacon_interval));
}
static void ieee80211_handle_pwr_constr(struct ieee80211_sub_if_data *sdata,
- u16 capab_info, u8 *pwr_constr_elem,
- u8 pwr_constr_elem_len)
+ struct ieee80211_channel *channel,
+ const u8 *country_ie, u8 country_ie_len,
+ const u8 *pwr_constr_elem)
{
- struct ieee80211_conf *conf = &sdata->local->hw.conf;
+ struct ieee80211_country_ie_triplet *triplet;
+ int chan = ieee80211_frequency_to_channel(channel->center_freq);
+ int i, chan_pwr, chan_increment, new_ap_level;
+ bool have_chan_pwr = false;
- if (!(capab_info & WLAN_CAPABILITY_SPECTRUM_MGMT))
+ /* Invalid IE */
+ if (country_ie_len % 2 || country_ie_len < IEEE80211_COUNTRY_IE_MIN_LEN)
return;
- /* Power constraint IE length should be 1 octet */
- if (pwr_constr_elem_len != 1)
- return;
+ triplet = (void *)(country_ie + 3);
+ country_ie_len -= 3;
+
+ switch (channel->band) {
+ default:
+ WARN_ON_ONCE(1);
+ /* fall through */
+ case IEEE80211_BAND_2GHZ:
+ case IEEE80211_BAND_60GHZ:
+ chan_increment = 1;
+ break;
+ case IEEE80211_BAND_5GHZ:
+ chan_increment = 4;
+ break;
+ }
+
+ /* find channel */
+ while (country_ie_len >= 3) {
+ u8 first_channel = triplet->chans.first_channel;
- if ((*pwr_constr_elem <= conf->channel->max_reg_power) &&
- (*pwr_constr_elem != sdata->local->power_constr_level)) {
- sdata->local->power_constr_level = *pwr_constr_elem;
- ieee80211_hw_config(sdata->local, 0);
+ if (first_channel >= IEEE80211_COUNTRY_EXTENSION_ID)
+ goto next;
+
+ for (i = 0; i < triplet->chans.num_channels; i++) {
+ if (first_channel + i * chan_increment == chan) {
+ have_chan_pwr = true;
+ chan_pwr = triplet->chans.max_power;
+ break;
+ }
+ }
+ if (have_chan_pwr)
+ break;
+
+ next:
+ triplet++;
+ country_ie_len -= 3;
}
+
+ if (!have_chan_pwr)
+ return;
+
+ new_ap_level = max_t(int, 0, chan_pwr - *pwr_constr_elem);
+
+ if (sdata->local->ap_power_level == new_ap_level)
+ return;
+
+ sdata_info(sdata,
+ "Limiting TX power to %d (%d - %d) dBm as advertised by %pM\n",
+ new_ap_level, chan_pwr, *pwr_constr_elem,
+ sdata->u.mgd.bssid);
+ sdata->local->ap_power_level = new_ap_level;
+ ieee80211_hw_config(sdata->local, 0);
}
void ieee80211_enable_dyn_ps(struct ieee80211_vif *vif)
@@ -1007,6 +1038,16 @@ void ieee80211_recalc_ps(struct ieee80211_local *local, s32 latency)
ieee80211_change_ps(local);
}
+void ieee80211_recalc_ps_vif(struct ieee80211_sub_if_data *sdata)
+{
+ bool ps_allowed = ieee80211_powersave_allowed(sdata);
+
+ if (sdata->vif.bss_conf.ps != ps_allowed) {
+ sdata->vif.bss_conf.ps = ps_allowed;
+ ieee80211_bss_info_change_notify(sdata, BSS_CHANGED_PS);
+ }
+}
+
void ieee80211_dynamic_ps_disable_work(struct work_struct *work)
{
struct ieee80211_local *local =
@@ -1239,7 +1280,7 @@ static u32 ieee80211_handle_bss_capability(struct ieee80211_sub_if_data *sdata,
}
use_short_slot = !!(capab & WLAN_CAPABILITY_SHORT_SLOT_TIME);
- if (sdata->local->hw.conf.channel->band == IEEE80211_BAND_5GHZ)
+ if (sdata->local->oper_channel->band == IEEE80211_BAND_5GHZ)
use_short_slot = true;
if (use_protection != bss_conf->use_cts_prot) {
@@ -1307,9 +1348,11 @@ static void ieee80211_set_associated(struct ieee80211_sub_if_data *sdata,
mutex_lock(&local->iflist_mtx);
ieee80211_recalc_ps(local, -1);
- ieee80211_recalc_smps(local);
mutex_unlock(&local->iflist_mtx);
+ ieee80211_recalc_smps(local);
+ ieee80211_recalc_ps_vif(sdata);
+
netif_tx_start_all_queues(sdata->dev);
netif_carrier_on(sdata->dev);
}
@@ -1356,7 +1399,7 @@ static void ieee80211_set_disassoc(struct ieee80211_sub_if_data *sdata,
sta = sta_info_get(sdata, ifmgd->bssid);
if (sta) {
set_sta_flag(sta, WLAN_STA_BLOCK_BA);
- ieee80211_sta_tear_down_BA_sessions(sta, tx);
+ ieee80211_sta_tear_down_BA_sessions(sta, false);
}
mutex_unlock(&local->sta_mtx);
@@ -1371,6 +1414,9 @@ static void ieee80211_set_disassoc(struct ieee80211_sub_if_data *sdata,
}
local->ps_sdata = NULL;
+ /* disable per-vif ps */
+ ieee80211_recalc_ps_vif(sdata);
+
/* flush out any pending frame (e.g. DELBA) before deauth/disassoc */
if (tx)
drv_flush(local, false);
@@ -1401,7 +1447,7 @@ static void ieee80211_set_disassoc(struct ieee80211_sub_if_data *sdata,
memset(&ifmgd->ht_capa, 0, sizeof(ifmgd->ht_capa));
memset(&ifmgd->ht_capa_mask, 0, sizeof(ifmgd->ht_capa_mask));
- local->power_constr_level = 0;
+ local->ap_power_level = 0;
del_timer_sync(&local->dynamic_ps_timer);
cancel_work_sync(&local->dynamic_ps_enable_work);
@@ -1542,7 +1588,8 @@ static void ieee80211_mgd_probe_ap_send(struct ieee80211_sub_if_data *sdata)
ssid_len = ssid[1];
ieee80211_send_probe_req(sdata, dst, ssid + 2, ssid_len, NULL,
- 0, (u32) -1, true, false);
+ 0, (u32) -1, true, false,
+ ifmgd->associated->channel);
}
ifmgd->probe_timeout = jiffies + msecs_to_jiffies(probe_wait_ms);
@@ -1645,19 +1692,21 @@ struct sk_buff *ieee80211_ap_probereq_get(struct ieee80211_hw *hw,
ssid_len = ssid[1];
skb = ieee80211_build_probe_req(sdata, cbss->bssid,
- (u32) -1, ssid + 2, ssid_len,
+ (u32) -1,
+ sdata->local->oper_channel,
+ ssid + 2, ssid_len,
NULL, 0, true);
return skb;
}
EXPORT_SYMBOL(ieee80211_ap_probereq_get);
-static void __ieee80211_connection_loss(struct ieee80211_sub_if_data *sdata)
+static void __ieee80211_disconnect(struct ieee80211_sub_if_data *sdata,
+ bool transmit_frame)
{
struct ieee80211_if_managed *ifmgd = &sdata->u.mgd;
struct ieee80211_local *local = sdata->local;
- u8 bssid[ETH_ALEN];
- u8 frame_buf[DEAUTH_DISASSOC_LEN];
+ u8 frame_buf[IEEE80211_DEAUTH_FRAME_LEN];
mutex_lock(&ifmgd->mtx);
if (!ifmgd->associated) {
@@ -1665,27 +1714,24 @@ static void __ieee80211_connection_loss(struct ieee80211_sub_if_data *sdata)
return;
}
- memcpy(bssid, ifmgd->associated->bssid, ETH_ALEN);
-
- sdata_info(sdata, "Connection to AP %pM lost\n", bssid);
-
ieee80211_set_disassoc(sdata, IEEE80211_STYPE_DEAUTH,
WLAN_REASON_DISASSOC_DUE_TO_INACTIVITY,
- false, frame_buf);
+ transmit_frame, frame_buf);
+ ifmgd->flags &= ~IEEE80211_STA_CSA_RECEIVED;
mutex_unlock(&ifmgd->mtx);
/*
* must be outside lock due to cfg80211,
* but that's not a problem.
*/
- cfg80211_send_deauth(sdata->dev, frame_buf, DEAUTH_DISASSOC_LEN);
+ cfg80211_send_deauth(sdata->dev, frame_buf, IEEE80211_DEAUTH_FRAME_LEN);
mutex_lock(&local->mtx);
ieee80211_recalc_idle(local);
mutex_unlock(&local->mtx);
}
-void ieee80211_beacon_connection_loss_work(struct work_struct *work)
+static void ieee80211_beacon_connection_loss_work(struct work_struct *work)
{
struct ieee80211_sub_if_data *sdata =
container_of(work, struct ieee80211_sub_if_data,
@@ -1701,10 +1747,24 @@ void ieee80211_beacon_connection_loss_work(struct work_struct *work)
rcu_read_unlock();
}
- if (sdata->local->hw.flags & IEEE80211_HW_CONNECTION_MONITOR)
- __ieee80211_connection_loss(sdata);
- else
+ if (sdata->local->hw.flags & IEEE80211_HW_CONNECTION_MONITOR) {
+ sdata_info(sdata, "Connection to AP %pM lost\n",
+ ifmgd->bssid);
+ __ieee80211_disconnect(sdata, false);
+ } else {
ieee80211_mgd_probe_ap(sdata, true);
+ }
+}
+
+static void ieee80211_csa_connection_drop_work(struct work_struct *work)
+{
+ struct ieee80211_sub_if_data *sdata =
+ container_of(work, struct ieee80211_sub_if_data,
+ u.mgd.csa_connection_drop_work);
+
+ ieee80211_wake_queues_by_reason(&sdata->local->hw,
+ IEEE80211_QUEUE_STOP_REASON_CSA);
+ __ieee80211_disconnect(sdata, true);
}
void ieee80211_beacon_loss(struct ieee80211_vif *vif)
@@ -2232,14 +2292,10 @@ static void ieee80211_rx_bss_info(struct ieee80211_sub_if_data *sdata,
mutex_unlock(&local->iflist_mtx);
}
- if (elems->ch_switch_elem && (elems->ch_switch_elem_len == 3) &&
- (memcmp(mgmt->bssid, sdata->u.mgd.associated->bssid,
- ETH_ALEN) == 0)) {
- struct ieee80211_channel_sw_ie *sw_elem =
- (struct ieee80211_channel_sw_ie *)elems->ch_switch_elem;
- ieee80211_sta_process_chanswitch(sdata, sw_elem,
+ if (elems->ch_switch_ie &&
+ memcmp(mgmt->bssid, sdata->u.mgd.associated->bssid, ETH_ALEN) == 0)
+ ieee80211_sta_process_chanswitch(sdata, elems->ch_switch_ie,
bss, rx_status->mactime);
- }
}
@@ -2326,7 +2382,7 @@ static void ieee80211_rx_mgmt_beacon(struct ieee80211_sub_if_data *sdata,
if (baselen > len)
return;
- if (rx_status->freq != local->hw.conf.channel->center_freq)
+ if (rx_status->freq != local->oper_channel->center_freq)
return;
if (ifmgd->assoc_data && !ifmgd->assoc_data->have_beacon &&
@@ -2490,21 +2546,19 @@ static void ieee80211_rx_mgmt_beacon(struct ieee80211_sub_if_data *sdata,
!(ifmgd->flags & IEEE80211_STA_DISABLE_11N)) {
struct ieee80211_supported_band *sband;
- sband = local->hw.wiphy->bands[local->hw.conf.channel->band];
+ sband = local->hw.wiphy->bands[local->oper_channel->band];
changed |= ieee80211_config_ht_tx(sdata, elems.ht_operation,
bssid, true);
}
- /* Note: country IE parsing is done for us by cfg80211 */
- if (elems.country_elem) {
- /* TODO: IBSS also needs this */
- if (elems.pwr_constr_elem)
- ieee80211_handle_pwr_constr(sdata,
- le16_to_cpu(mgmt->u.probe_resp.capab_info),
- elems.pwr_constr_elem,
- elems.pwr_constr_elem_len);
- }
+ if (elems.country_elem && elems.pwr_constr_elem &&
+ mgmt->u.probe_resp.capab_info &
+ cpu_to_le16(WLAN_CAPABILITY_SPECTRUM_MGMT))
+ ieee80211_handle_pwr_constr(sdata, local->oper_channel,
+ elems.country_elem,
+ elems.country_elem_len,
+ elems.pwr_constr_elem);
ieee80211_bss_info_change_notify(sdata, changed);
}
@@ -2601,7 +2655,7 @@ static void ieee80211_sta_connection_lost(struct ieee80211_sub_if_data *sdata,
{
struct ieee80211_local *local = sdata->local;
struct ieee80211_if_managed *ifmgd = &sdata->u.mgd;
- u8 frame_buf[DEAUTH_DISASSOC_LEN];
+ u8 frame_buf[IEEE80211_DEAUTH_FRAME_LEN];
ieee80211_set_disassoc(sdata, IEEE80211_STYPE_DEAUTH, reason,
false, frame_buf);
@@ -2611,7 +2665,7 @@ static void ieee80211_sta_connection_lost(struct ieee80211_sub_if_data *sdata,
* must be outside lock due to cfg80211,
* but that's not a problem.
*/
- cfg80211_send_deauth(sdata->dev, frame_buf, DEAUTH_DISASSOC_LEN);
+ cfg80211_send_deauth(sdata->dev, frame_buf, IEEE80211_DEAUTH_FRAME_LEN);
mutex_lock(&local->mtx);
ieee80211_recalc_idle(local);
@@ -2673,7 +2727,8 @@ static int ieee80211_probe_auth(struct ieee80211_sub_if_data *sdata)
* will not answer to direct packet in unassociated state.
*/
ieee80211_send_probe_req(sdata, NULL, ssidie + 2, ssidie[1],
- NULL, 0, (u32) -1, true, false);
+ NULL, 0, (u32) -1, true, false,
+ auth_data->bss->channel);
}
auth_data->timeout = jiffies + IEEE80211_AUTH_TIMEOUT;
@@ -2894,6 +2949,7 @@ void ieee80211_sta_quiesce(struct ieee80211_sub_if_data *sdata)
cancel_work_sync(&ifmgd->monitor_work);
cancel_work_sync(&ifmgd->beacon_connection_loss_work);
+ cancel_work_sync(&ifmgd->csa_connection_drop_work);
if (del_timer_sync(&ifmgd->timer))
set_bit(TMR_RUNNING_TIMER, &ifmgd->timers_running);
@@ -2950,6 +3006,8 @@ void ieee80211_sta_setup_sdata(struct ieee80211_sub_if_data *sdata)
INIT_WORK(&ifmgd->chswitch_work, ieee80211_chswitch_work);
INIT_WORK(&ifmgd->beacon_connection_loss_work,
ieee80211_beacon_connection_loss_work);
+ INIT_WORK(&ifmgd->csa_connection_drop_work,
+ ieee80211_csa_connection_drop_work);
INIT_WORK(&ifmgd->request_smps_work, ieee80211_request_smps_work);
setup_timer(&ifmgd->timer, ieee80211_sta_timer,
(unsigned long) sdata);
@@ -3000,41 +3058,17 @@ int ieee80211_max_network_latency(struct notifier_block *nb,
return 0;
}
-static int ieee80211_prep_connection(struct ieee80211_sub_if_data *sdata,
- struct cfg80211_bss *cbss, bool assoc)
+static int ieee80211_prep_channel(struct ieee80211_sub_if_data *sdata,
+ struct cfg80211_bss *cbss)
{
struct ieee80211_local *local = sdata->local;
struct ieee80211_if_managed *ifmgd = &sdata->u.mgd;
- struct ieee80211_bss *bss = (void *)cbss->priv;
- struct sta_info *sta = NULL;
- bool have_sta = false;
- int err;
int ht_cfreq;
enum nl80211_channel_type channel_type = NL80211_CHAN_NO_HT;
const u8 *ht_oper_ie;
const struct ieee80211_ht_operation *ht_oper = NULL;
struct ieee80211_supported_band *sband;
- if (WARN_ON(!ifmgd->auth_data && !ifmgd->assoc_data))
- return -EINVAL;
-
- if (assoc) {
- rcu_read_lock();
- have_sta = sta_info_get(sdata, cbss->bssid);
- rcu_read_unlock();
- }
-
- if (!have_sta) {
- sta = sta_info_alloc(sdata, cbss->bssid, GFP_KERNEL);
- if (!sta)
- return -ENOMEM;
- }
-
- mutex_lock(&local->mtx);
- ieee80211_recalc_idle(sdata->local);
- mutex_unlock(&local->mtx);
-
- /* switch to the right channel */
sband = local->hw.wiphy->bands[cbss->channel->band];
ifmgd->flags &= ~IEEE80211_STA_DISABLE_40MHZ;
@@ -3097,10 +3131,51 @@ static int ieee80211_prep_connection(struct ieee80211_sub_if_data *sdata,
local->oper_channel = cbss->channel;
ieee80211_hw_config(local, IEEE80211_CONF_CHANGE_CHANNEL);
- if (sta) {
+ return 0;
+}
+
+static int ieee80211_prep_connection(struct ieee80211_sub_if_data *sdata,
+ struct cfg80211_bss *cbss, bool assoc)
+{
+ struct ieee80211_local *local = sdata->local;
+ struct ieee80211_if_managed *ifmgd = &sdata->u.mgd;
+ struct ieee80211_bss *bss = (void *)cbss->priv;
+ struct sta_info *new_sta = NULL;
+ bool have_sta = false;
+ int err;
+
+ if (WARN_ON(!ifmgd->auth_data && !ifmgd->assoc_data))
+ return -EINVAL;
+
+ if (assoc) {
+ rcu_read_lock();
+ have_sta = sta_info_get(sdata, cbss->bssid);
+ rcu_read_unlock();
+ }
+
+ if (!have_sta) {
+ new_sta = sta_info_alloc(sdata, cbss->bssid, GFP_KERNEL);
+ if (!new_sta)
+ return -ENOMEM;
+ }
+
+ mutex_lock(&local->mtx);
+ ieee80211_recalc_idle(sdata->local);
+ mutex_unlock(&local->mtx);
+
+ if (new_sta) {
u32 rates = 0, basic_rates = 0;
bool have_higher_than_11mbit;
int min_rate = INT_MAX, min_rate_index = -1;
+ struct ieee80211_supported_band *sband;
+
+ sband = local->hw.wiphy->bands[cbss->channel->band];
+
+ err = ieee80211_prep_channel(sdata, cbss);
+ if (err) {
+ sta_info_free(local, new_sta);
+ return err;
+ }
ieee80211_get_rates(sband, bss->supp_rates,
bss->supp_rates_len,
@@ -3122,7 +3197,7 @@ static int ieee80211_prep_connection(struct ieee80211_sub_if_data *sdata,
basic_rates = BIT(min_rate_index);
}
- sta->sta.supp_rates[cbss->channel->band] = rates;
+ new_sta->sta.supp_rates[cbss->channel->band] = rates;
sdata->vif.bss_conf.basic_rates = basic_rates;
/* cf. IEEE 802.11 9.2.12 */
@@ -3145,10 +3220,10 @@ static int ieee80211_prep_connection(struct ieee80211_sub_if_data *sdata,
BSS_CHANGED_BEACON_INT);
if (assoc)
- sta_info_pre_move_state(sta, IEEE80211_STA_AUTH);
+ sta_info_pre_move_state(new_sta, IEEE80211_STA_AUTH);
- err = sta_info_insert(sta);
- sta = NULL;
+ err = sta_info_insert(new_sta);
+ new_sta = NULL;
if (err) {
sdata_info(sdata,
"failed to insert STA entry for the AP (error %d)\n",
@@ -3302,9 +3377,13 @@ int ieee80211_mgd_assoc(struct ieee80211_sub_if_data *sdata,
}
/* prepare assoc data */
-
- ifmgd->flags &= ~IEEE80211_STA_DISABLE_11N;
- ifmgd->flags &= ~IEEE80211_STA_NULLFUNC_ACKED;
+
+ /*
+ * keep only the 40 MHz disable bit set as it might have
+ * been set during authentication already, all other bits
+ * should be reset for a new connection
+ */
+ ifmgd->flags &= IEEE80211_STA_DISABLE_40MHZ;
ifmgd->beacon_crc_valid = false;
@@ -3320,21 +3399,34 @@ int ieee80211_mgd_assoc(struct ieee80211_sub_if_data *sdata,
req->crypto.ciphers_pairwise[i] == WLAN_CIPHER_SUITE_TKIP ||
req->crypto.ciphers_pairwise[i] == WLAN_CIPHER_SUITE_WEP104) {
ifmgd->flags |= IEEE80211_STA_DISABLE_11N;
+ ifmgd->flags |= IEEE80211_STA_DISABLE_VHT;
netdev_info(sdata->dev,
- "disabling HT due to WEP/TKIP use\n");
+ "disabling HT/VHT due to WEP/TKIP use\n");
}
}
- if (req->flags & ASSOC_REQ_DISABLE_HT)
+ if (req->flags & ASSOC_REQ_DISABLE_HT) {
ifmgd->flags |= IEEE80211_STA_DISABLE_11N;
+ ifmgd->flags |= IEEE80211_STA_DISABLE_VHT;
+ }
/* Also disable HT if we don't support it or the AP doesn't use WMM */
sband = local->hw.wiphy->bands[req->bss->channel->band];
if (!sband->ht_cap.ht_supported ||
local->hw.queues < IEEE80211_NUM_ACS || !bss->wmm_used) {
ifmgd->flags |= IEEE80211_STA_DISABLE_11N;
- netdev_info(sdata->dev,
- "disabling HT as WMM/QoS is not supported\n");
+ if (!bss->wmm_used)
+ netdev_info(sdata->dev,
+ "disabling HT as WMM/QoS is not supported by the AP\n");
+ }
+
+ /* disable VHT if we don't support it or the AP doesn't use WMM */
+ if (!sband->vht_cap.vht_supported ||
+ local->hw.queues < IEEE80211_NUM_ACS || !bss->wmm_used) {
+ ifmgd->flags |= IEEE80211_STA_DISABLE_VHT;
+ if (!bss->wmm_used)
+ netdev_info(sdata->dev,
+ "disabling VHT as WMM/QoS is not supported by the AP\n");
}
memcpy(&ifmgd->ht_capa, &req->ht_capa, sizeof(ifmgd->ht_capa));
@@ -3456,7 +3548,7 @@ int ieee80211_mgd_deauth(struct ieee80211_sub_if_data *sdata,
struct cfg80211_deauth_request *req)
{
struct ieee80211_if_managed *ifmgd = &sdata->u.mgd;
- u8 frame_buf[DEAUTH_DISASSOC_LEN];
+ u8 frame_buf[IEEE80211_DEAUTH_FRAME_LEN];
mutex_lock(&ifmgd->mtx);
@@ -3471,17 +3563,21 @@ int ieee80211_mgd_deauth(struct ieee80211_sub_if_data *sdata,
req->bssid, req->reason_code);
if (ifmgd->associated &&
- ether_addr_equal(ifmgd->associated->bssid, req->bssid))
+ ether_addr_equal(ifmgd->associated->bssid, req->bssid)) {
ieee80211_set_disassoc(sdata, IEEE80211_STYPE_DEAUTH,
req->reason_code, true, frame_buf);
- else
+ } else {
+ drv_mgd_prepare_tx(sdata->local, sdata);
ieee80211_send_deauth_disassoc(sdata, req->bssid,
IEEE80211_STYPE_DEAUTH,
req->reason_code, true,
frame_buf);
+ }
+
mutex_unlock(&ifmgd->mtx);
- __cfg80211_send_deauth(sdata->dev, frame_buf, DEAUTH_DISASSOC_LEN);
+ __cfg80211_send_deauth(sdata->dev, frame_buf,
+ IEEE80211_DEAUTH_FRAME_LEN);
mutex_lock(&sdata->local->mtx);
ieee80211_recalc_idle(sdata->local);
@@ -3495,7 +3591,7 @@ int ieee80211_mgd_disassoc(struct ieee80211_sub_if_data *sdata,
{
struct ieee80211_if_managed *ifmgd = &sdata->u.mgd;
u8 bssid[ETH_ALEN];
- u8 frame_buf[DEAUTH_DISASSOC_LEN];
+ u8 frame_buf[IEEE80211_DEAUTH_FRAME_LEN];
mutex_lock(&ifmgd->mtx);
@@ -3520,7 +3616,8 @@ int ieee80211_mgd_disassoc(struct ieee80211_sub_if_data *sdata,
frame_buf);
mutex_unlock(&ifmgd->mtx);
- __cfg80211_send_disassoc(sdata->dev, frame_buf, DEAUTH_DISASSOC_LEN);
+ __cfg80211_send_disassoc(sdata->dev, frame_buf,
+ IEEE80211_DEAUTH_FRAME_LEN);
mutex_lock(&sdata->local->mtx);
ieee80211_recalc_idle(sdata->local);
diff --git a/net/mac80211/offchannel.c b/net/mac80211/offchannel.c
index 635c3250c668..83608ac16780 100644
--- a/net/mac80211/offchannel.c
+++ b/net/mac80211/offchannel.c
@@ -116,6 +116,9 @@ void ieee80211_offchannel_stop_vifs(struct ieee80211_local *local,
if (!ieee80211_sdata_running(sdata))
continue;
+ if (sdata->vif.type == NL80211_IFTYPE_P2P_DEVICE)
+ continue;
+
if (sdata->vif.type != NL80211_IFTYPE_MONITOR)
set_bit(SDATA_STATE_OFFCHANNEL, &sdata->state);
@@ -144,6 +147,9 @@ void ieee80211_offchannel_return(struct ieee80211_local *local,
mutex_lock(&local->iflist_mtx);
list_for_each_entry(sdata, &local->interfaces, list) {
+ if (sdata->vif.type == NL80211_IFTYPE_P2P_DEVICE)
+ continue;
+
if (sdata->vif.type != NL80211_IFTYPE_MONITOR)
clear_bit(SDATA_STATE_OFFCHANNEL, &sdata->state);
@@ -227,8 +233,7 @@ static void ieee80211_hw_roc_start(struct work_struct *work)
u32 dur = dep->duration;
dep->duration = dur - roc->duration;
roc->duration = dur;
- list_del(&dep->list);
- list_add(&dep->list, &roc->list);
+ list_move(&dep->list, &roc->list);
}
}
out_unlock:
diff --git a/net/mac80211/rate.h b/net/mac80211/rate.h
index 6e4fd32c6617..10de668eb9f6 100644
--- a/net/mac80211/rate.h
+++ b/net/mac80211/rate.h
@@ -56,7 +56,7 @@ static inline void rate_control_rate_init(struct sta_info *sta)
if (!ref)
return;
- sband = local->hw.wiphy->bands[local->hw.conf.channel->band];
+ sband = local->hw.wiphy->bands[local->oper_channel->band];
ref->ops->rate_init(ref->priv, sband, ista, priv_sta);
set_sta_flag(sta, WLAN_STA_RATE_CONTROL);
diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c
index 0cb4edee6af5..61c621e9273f 100644
--- a/net/mac80211/rx.c
+++ b/net/mac80211/rx.c
@@ -60,7 +60,9 @@ static inline int should_drop_frame(struct sk_buff *skb,
struct ieee80211_rx_status *status = IEEE80211_SKB_RXCB(skb);
struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)skb->data;
- if (status->flag & (RX_FLAG_FAILED_FCS_CRC | RX_FLAG_FAILED_PLCP_CRC))
+ if (status->flag & (RX_FLAG_FAILED_FCS_CRC |
+ RX_FLAG_FAILED_PLCP_CRC |
+ RX_FLAG_AMPDU_IS_ZEROLEN))
return 1;
if (unlikely(skb->len < 16 + present_fcs_len))
return 1;
@@ -91,10 +93,17 @@ ieee80211_rx_radiotap_len(struct ieee80211_local *local,
if (status->flag & RX_FLAG_HT) /* HT info */
len += 3;
+ if (status->flag & RX_FLAG_AMPDU_DETAILS) {
+ /* padding */
+ while (len & 3)
+ len++;
+ len += 8;
+ }
+
return len;
}
-/**
+/*
* ieee80211_add_rx_radiotap_header - add radiotap header
*
* add a radiotap header containing all the fields which the hardware provided.
@@ -215,6 +224,37 @@ ieee80211_add_rx_radiotap_header(struct ieee80211_local *local,
pos++;
*pos++ = status->rate_idx;
}
+
+ if (status->flag & RX_FLAG_AMPDU_DETAILS) {
+ u16 flags = 0;
+
+ /* ensure 4 byte alignment */
+ while ((pos - (u8 *)rthdr) & 3)
+ pos++;
+ rthdr->it_present |=
+ cpu_to_le32(1 << IEEE80211_RADIOTAP_AMPDU_STATUS);
+ put_unaligned_le32(status->ampdu_reference, pos);
+ pos += 4;
+ if (status->flag & RX_FLAG_AMPDU_REPORT_ZEROLEN)
+ flags |= IEEE80211_RADIOTAP_AMPDU_REPORT_ZEROLEN;
+ if (status->flag & RX_FLAG_AMPDU_IS_ZEROLEN)
+ flags |= IEEE80211_RADIOTAP_AMPDU_IS_ZEROLEN;
+ if (status->flag & RX_FLAG_AMPDU_LAST_KNOWN)
+ flags |= IEEE80211_RADIOTAP_AMPDU_LAST_KNOWN;
+ if (status->flag & RX_FLAG_AMPDU_IS_LAST)
+ flags |= IEEE80211_RADIOTAP_AMPDU_IS_LAST;
+ if (status->flag & RX_FLAG_AMPDU_DELIM_CRC_ERROR)
+ flags |= IEEE80211_RADIOTAP_AMPDU_DELIM_CRC_ERR;
+ if (status->flag & RX_FLAG_AMPDU_DELIM_CRC_KNOWN)
+ flags |= IEEE80211_RADIOTAP_AMPDU_DELIM_CRC_KNOWN;
+ put_unaligned_le16(flags, pos);
+ pos += 2;
+ if (status->flag & RX_FLAG_AMPDU_DELIM_CRC_KNOWN)
+ *pos++ = status->ampdu_delimiter_crc;
+ else
+ *pos++ = 0;
+ *pos++ = 0;
+ }
}
/*
@@ -2268,7 +2308,7 @@ ieee80211_rx_h_action(struct ieee80211_rx_data *rx)
goto queue;
case WLAN_CATEGORY_SPECTRUM_MGMT:
- if (local->hw.conf.channel->band != IEEE80211_BAND_5GHZ)
+ if (status->band != IEEE80211_BAND_5GHZ)
break;
if (sdata->vif.type != NL80211_IFTYPE_STATION)
@@ -2772,8 +2812,7 @@ static int prepare_for_handlers(struct ieee80211_rx_data *rx,
if (!bssid) {
if (!ether_addr_equal(sdata->vif.addr, hdr->addr1))
return 0;
- } else if (!ieee80211_bssid_match(bssid,
- sdata->vif.addr)) {
+ } else if (!ieee80211_bssid_match(bssid, sdata->vif.addr)) {
/*
* Accept public action frames even when the
* BSSID doesn't match, this is used for P2P
@@ -2793,9 +2832,18 @@ static int prepare_for_handlers(struct ieee80211_rx_data *rx,
if (!ether_addr_equal(sdata->u.wds.remote_addr, hdr->addr2))
return 0;
break;
+ case NL80211_IFTYPE_P2P_DEVICE:
+ if (!ieee80211_is_public_action(hdr, skb->len) &&
+ !ieee80211_is_probe_req(hdr->frame_control) &&
+ !ieee80211_is_probe_resp(hdr->frame_control) &&
+ !ieee80211_is_beacon(hdr->frame_control))
+ return 0;
+ if (!ether_addr_equal(sdata->vif.addr, hdr->addr1))
+ status->rx_flags &= ~IEEE80211_RX_RA_MATCH;
+ break;
default:
/* should never get here */
- WARN_ON(1);
+ WARN_ON_ONCE(1);
break;
}
diff --git a/net/mac80211/scan.c b/net/mac80211/scan.c
index 839dd9737989..c4cdbde24fd3 100644
--- a/net/mac80211/scan.c
+++ b/net/mac80211/scan.c
@@ -407,7 +407,7 @@ static void ieee80211_scan_state_send_probe(struct ieee80211_local *local,
enum ieee80211_band band = local->hw.conf.channel->band;
sdata = rcu_dereference_protected(local->scan_sdata,
- lockdep_is_held(&local->mtx));;
+ lockdep_is_held(&local->mtx));
for (i = 0; i < local->scan_req->n_ssids; i++)
ieee80211_send_probe_req(
@@ -416,7 +416,8 @@ static void ieee80211_scan_state_send_probe(struct ieee80211_local *local,
local->scan_req->ssids[i].ssid_len,
local->scan_req->ie, local->scan_req->ie_len,
local->scan_req->rates[band], false,
- local->scan_req->no_cck);
+ local->scan_req->no_cck,
+ local->hw.conf.channel);
/*
* After sending probe requests, wait for probe responses
@@ -479,11 +480,10 @@ static int __ieee80211_start_scan(struct ieee80211_sub_if_data *sdata,
if (local->ops->hw_scan) {
__set_bit(SCAN_HW_SCANNING, &local->scanning);
} else if ((req->n_channels == 1) &&
- (req->channels[0]->center_freq ==
- local->hw.conf.channel->center_freq)) {
-
- /* If we are scanning only on the current channel, then
- * we do not need to stop normal activities
+ (req->channels[0] == local->oper_channel)) {
+ /*
+ * If we are scanning only on the operating channel
+ * then we do not need to stop normal activities
*/
unsigned long next_delay;
@@ -917,6 +917,7 @@ int ieee80211_request_sched_scan_start(struct ieee80211_sub_if_data *sdata,
struct cfg80211_sched_scan_request *req)
{
struct ieee80211_local *local = sdata->local;
+ struct ieee80211_sched_scan_ies sched_scan_ies;
int ret, i;
mutex_lock(&local->mtx);
@@ -935,33 +936,28 @@ int ieee80211_request_sched_scan_start(struct ieee80211_sub_if_data *sdata,
if (!local->hw.wiphy->bands[i])
continue;
- local->sched_scan_ies.ie[i] = kzalloc(2 +
- IEEE80211_MAX_SSID_LEN +
- local->scan_ies_len +
- req->ie_len,
- GFP_KERNEL);
- if (!local->sched_scan_ies.ie[i]) {
+ sched_scan_ies.ie[i] = kzalloc(2 + IEEE80211_MAX_SSID_LEN +
+ local->scan_ies_len +
+ req->ie_len,
+ GFP_KERNEL);
+ if (!sched_scan_ies.ie[i]) {
ret = -ENOMEM;
goto out_free;
}
- local->sched_scan_ies.len[i] =
- ieee80211_build_preq_ies(local,
- local->sched_scan_ies.ie[i],
+ sched_scan_ies.len[i] =
+ ieee80211_build_preq_ies(local, sched_scan_ies.ie[i],
req->ie, req->ie_len, i,
(u32) -1, 0);
}
- ret = drv_sched_scan_start(local, sdata, req,
- &local->sched_scan_ies);
- if (ret == 0) {
+ ret = drv_sched_scan_start(local, sdata, req, &sched_scan_ies);
+ if (ret == 0)
rcu_assign_pointer(local->sched_scan_sdata, sdata);
- goto out;
- }
out_free:
while (i > 0)
- kfree(local->sched_scan_ies.ie[--i]);
+ kfree(sched_scan_ies.ie[--i]);
out:
mutex_unlock(&local->mtx);
return ret;
@@ -970,7 +966,7 @@ out:
int ieee80211_request_sched_scan_stop(struct ieee80211_sub_if_data *sdata)
{
struct ieee80211_local *local = sdata->local;
- int ret = 0, i;
+ int ret = 0;
mutex_lock(&local->mtx);
@@ -979,12 +975,9 @@ int ieee80211_request_sched_scan_stop(struct ieee80211_sub_if_data *sdata)
goto out;
}
- if (rcu_access_pointer(local->sched_scan_sdata)) {
- for (i = 0; i < IEEE80211_NUM_BANDS; i++)
- kfree(local->sched_scan_ies.ie[i]);
-
+ if (rcu_access_pointer(local->sched_scan_sdata))
drv_sched_scan_stop(local, sdata);
- }
+
out:
mutex_unlock(&local->mtx);
@@ -1006,7 +999,6 @@ void ieee80211_sched_scan_stopped_work(struct work_struct *work)
struct ieee80211_local *local =
container_of(work, struct ieee80211_local,
sched_scan_stopped_work);
- int i;
mutex_lock(&local->mtx);
@@ -1015,9 +1007,6 @@ void ieee80211_sched_scan_stopped_work(struct work_struct *work)
return;
}
- for (i = 0; i < IEEE80211_NUM_BANDS; i++)
- kfree(local->sched_scan_ies.ie[i]);
-
rcu_assign_pointer(local->sched_scan_sdata, NULL);
mutex_unlock(&local->mtx);
diff --git a/net/mac80211/sta_info.c b/net/mac80211/sta_info.c
index 06fa75ceb025..797dd36a220d 100644
--- a/net/mac80211/sta_info.c
+++ b/net/mac80211/sta_info.c
@@ -91,6 +91,70 @@ static int sta_info_hash_del(struct ieee80211_local *local,
return -ENOENT;
}
+static void free_sta_work(struct work_struct *wk)
+{
+ struct sta_info *sta = container_of(wk, struct sta_info, free_sta_wk);
+ int ac, i;
+ struct tid_ampdu_tx *tid_tx;
+ struct ieee80211_sub_if_data *sdata = sta->sdata;
+ struct ieee80211_local *local = sdata->local;
+
+ /*
+ * At this point, when being called as call_rcu callback,
+ * neither mac80211 nor the driver can reference this
+ * sta struct any more except by still existing timers
+ * associated with this station that we clean up below.
+ */
+
+ if (test_sta_flag(sta, WLAN_STA_PS_STA)) {
+ BUG_ON(!sdata->bss);
+
+ clear_sta_flag(sta, WLAN_STA_PS_STA);
+
+ atomic_dec(&sdata->bss->num_sta_ps);
+ sta_info_recalc_tim(sta);
+ }
+
+ for (ac = 0; ac < IEEE80211_NUM_ACS; ac++) {
+ local->total_ps_buffered -= skb_queue_len(&sta->ps_tx_buf[ac]);
+ __skb_queue_purge(&sta->ps_tx_buf[ac]);
+ __skb_queue_purge(&sta->tx_filtered[ac]);
+ }
+
+#ifdef CONFIG_MAC80211_MESH
+ if (ieee80211_vif_is_mesh(&sdata->vif)) {
+ mesh_accept_plinks_update(sdata);
+ mesh_plink_deactivate(sta);
+ del_timer_sync(&sta->plink_timer);
+ }
+#endif
+
+ cancel_work_sync(&sta->drv_unblock_wk);
+
+ /*
+ * Destroy aggregation state here. It would be nice to wait for the
+ * driver to finish aggregation stop and then clean up, but for now
+ * drivers have to handle aggregation stop being requested, followed
+ * directly by station destruction.
+ */
+ for (i = 0; i < STA_TID_NUM; i++) {
+ tid_tx = rcu_dereference_raw(sta->ampdu_mlme.tid_tx[i]);
+ if (!tid_tx)
+ continue;
+ __skb_queue_purge(&tid_tx->pending);
+ kfree(tid_tx);
+ }
+
+ sta_info_free(local, sta);
+}
+
+static void free_sta_rcu(struct rcu_head *h)
+{
+ struct sta_info *sta = container_of(h, struct sta_info, rcu_head);
+
+ ieee80211_queue_work(&sta->local->hw, &sta->free_sta_wk);
+}
+
/* protected by RCU */
struct sta_info *sta_info_get(struct ieee80211_sub_if_data *sdata,
const u8 *addr)
@@ -241,6 +305,7 @@ struct sta_info *sta_info_alloc(struct ieee80211_sub_if_data *sdata,
spin_lock_init(&sta->lock);
INIT_WORK(&sta->drv_unblock_wk, sta_unblock);
+ INIT_WORK(&sta->free_sta_wk, free_sta_work);
INIT_WORK(&sta->ampdu_mlme.work, ieee80211_ba_session_work);
mutex_init(&sta->ampdu_mlme.mtx);
@@ -654,8 +719,7 @@ int __must_check __sta_info_destroy(struct sta_info *sta)
{
struct ieee80211_local *local;
struct ieee80211_sub_if_data *sdata;
- int ret, i, ac;
- struct tid_ampdu_tx *tid_tx;
+ int ret, i;
might_sleep();
@@ -674,7 +738,7 @@ int __must_check __sta_info_destroy(struct sta_info *sta)
* will be sufficient.
*/
set_sta_flag(sta, WLAN_STA_BLOCK_BA);
- ieee80211_sta_tear_down_BA_sessions(sta, true);
+ ieee80211_sta_tear_down_BA_sessions(sta, false);
ret = sta_info_hash_del(local, sta);
if (ret)
@@ -711,65 +775,14 @@ int __must_check __sta_info_destroy(struct sta_info *sta)
WARN_ON_ONCE(ret != 0);
}
- /*
- * At this point, after we wait for an RCU grace period,
- * neither mac80211 nor the driver can reference this
- * sta struct any more except by still existing timers
- * associated with this station that we clean up below.
- */
- synchronize_rcu();
-
- if (test_sta_flag(sta, WLAN_STA_PS_STA)) {
- BUG_ON(!sdata->bss);
-
- clear_sta_flag(sta, WLAN_STA_PS_STA);
-
- atomic_dec(&sdata->bss->num_sta_ps);
- sta_info_recalc_tim(sta);
- }
-
- for (ac = 0; ac < IEEE80211_NUM_ACS; ac++) {
- local->total_ps_buffered -= skb_queue_len(&sta->ps_tx_buf[ac]);
- __skb_queue_purge(&sta->ps_tx_buf[ac]);
- __skb_queue_purge(&sta->tx_filtered[ac]);
- }
-
-#ifdef CONFIG_MAC80211_MESH
- if (ieee80211_vif_is_mesh(&sdata->vif))
- mesh_accept_plinks_update(sdata);
-#endif
-
sta_dbg(sdata, "Removed STA %pM\n", sta->sta.addr);
- cancel_work_sync(&sta->drv_unblock_wk);
-
cfg80211_del_sta(sdata->dev, sta->sta.addr, GFP_KERNEL);
rate_control_remove_sta_debugfs(sta);
ieee80211_sta_debugfs_remove(sta);
-#ifdef CONFIG_MAC80211_MESH
- if (ieee80211_vif_is_mesh(&sta->sdata->vif)) {
- mesh_plink_deactivate(sta);
- del_timer_sync(&sta->plink_timer);
- }
-#endif
-
- /*
- * Destroy aggregation state here. It would be nice to wait for the
- * driver to finish aggregation stop and then clean up, but for now
- * drivers have to handle aggregation stop being requested, followed
- * directly by station destruction.
- */
- for (i = 0; i < STA_TID_NUM; i++) {
- tid_tx = rcu_dereference_raw(sta->ampdu_mlme.tid_tx[i]);
- if (!tid_tx)
- continue;
- __skb_queue_purge(&tid_tx->pending);
- kfree(tid_tx);
- }
-
- sta_info_free(local, sta);
+ call_rcu(&sta->rcu_head, free_sta_rcu);
return 0;
}
diff --git a/net/mac80211/sta_info.h b/net/mac80211/sta_info.h
index a470e1123a55..c88f161f8118 100644
--- a/net/mac80211/sta_info.h
+++ b/net/mac80211/sta_info.h
@@ -287,6 +287,7 @@ struct sta_ampdu_mlme {
struct sta_info {
/* General information, mostly static */
struct list_head list;
+ struct rcu_head rcu_head;
struct sta_info __rcu *hnext;
struct ieee80211_local *local;
struct ieee80211_sub_if_data *sdata;
@@ -297,6 +298,7 @@ struct sta_info {
spinlock_t lock;
struct work_struct drv_unblock_wk;
+ struct work_struct free_sta_wk;
u16 listen_interval;
diff --git a/net/mac80211/status.c b/net/mac80211/status.c
index 8cd72914cdaf..3af0cc4130f1 100644
--- a/net/mac80211/status.c
+++ b/net/mac80211/status.c
@@ -34,7 +34,7 @@ void ieee80211_tx_status_irqsafe(struct ieee80211_hw *hw,
skb_queue_len(&local->skb_queue_unreliable);
while (tmp > IEEE80211_IRQSAFE_QUEUE_LIMIT &&
(skb = skb_dequeue(&local->skb_queue_unreliable))) {
- dev_kfree_skb_irq(skb);
+ ieee80211_free_txskb(hw, skb);
tmp--;
I802_DEBUG_INC(local->tx_status_drop);
}
@@ -159,7 +159,7 @@ static void ieee80211_handle_filtered_frame(struct ieee80211_local *local,
"dropped TX filtered frame, queue_len=%d PS=%d @%lu\n",
skb_queue_len(&sta->tx_filtered[ac]),
!!test_sta_flag(sta, WLAN_STA_PS_STA), jiffies);
- dev_kfree_skb(skb);
+ ieee80211_free_txskb(&local->hw, skb);
}
static void ieee80211_check_pending_bar(struct sta_info *sta, u8 *addr, u8 tid)
@@ -517,21 +517,41 @@ void ieee80211_tx_status(struct ieee80211_hw *hw, struct sk_buff *skb)
if (info->flags & IEEE80211_TX_INTFL_NL80211_FRAME_TX) {
u64 cookie = (unsigned long)skb;
+ bool found = false;
+
acked = info->flags & IEEE80211_TX_STAT_ACK;
- /*
- * TODO: When we have non-netdev frame TX,
- * we cannot use skb->dev->ieee80211_ptr
- */
+ rcu_read_lock();
+
+ list_for_each_entry_rcu(sdata, &local->interfaces, list) {
+ if (!sdata->dev)
+ continue;
+
+ if (skb->dev != sdata->dev)
+ continue;
- if (ieee80211_is_nullfunc(hdr->frame_control) ||
- ieee80211_is_qos_nullfunc(hdr->frame_control))
- cfg80211_probe_status(skb->dev, hdr->addr1,
+ found = true;
+ break;
+ }
+
+ if (!skb->dev) {
+ sdata = rcu_dereference(local->p2p_sdata);
+ if (sdata)
+ found = true;
+ }
+
+ if (!found)
+ skb->dev = NULL;
+ else if (ieee80211_is_nullfunc(hdr->frame_control) ||
+ ieee80211_is_qos_nullfunc(hdr->frame_control)) {
+ cfg80211_probe_status(sdata->dev, hdr->addr1,
cookie, acked, GFP_ATOMIC);
- else
- cfg80211_mgmt_tx_status(
- skb->dev->ieee80211_ptr, cookie, skb->data,
- skb->len, acked, GFP_ATOMIC);
+ } else {
+ cfg80211_mgmt_tx_status(&sdata->wdev, cookie, skb->data,
+ skb->len, acked, GFP_ATOMIC);
+ }
+
+ rcu_read_unlock();
}
if (unlikely(info->ack_frame_id)) {
diff --git a/net/mac80211/trace.h b/net/mac80211/trace.h
index c6d33b55b2df..18d9c8a52e9e 100644
--- a/net/mac80211/trace.h
+++ b/net/mac80211/trace.h
@@ -24,7 +24,7 @@
__string(vif_name, sdata->dev ? sdata->dev->name : "<nodev>")
#define VIF_ASSIGN __entry->vif_type = sdata->vif.type; __entry->sdata = sdata; \
__entry->p2p = sdata->vif.p2p; \
- __assign_str(vif_name, sdata->dev ? sdata->dev->name : "<nodev>")
+ __assign_str(vif_name, sdata->dev ? sdata->dev->name : sdata->name)
#define VIF_PR_FMT " vif:%s(%d%s)"
#define VIF_PR_ARG __get_str(vif_name), __entry->vif_type, __entry->p2p ? "/p2p" : ""
@@ -274,9 +274,12 @@ TRACE_EVENT(drv_config,
__entry->dynamic_ps_timeout = local->hw.conf.dynamic_ps_timeout;
__entry->max_sleep_period = local->hw.conf.max_sleep_period;
__entry->listen_interval = local->hw.conf.listen_interval;
- __entry->long_frame_max_tx_count = local->hw.conf.long_frame_max_tx_count;
- __entry->short_frame_max_tx_count = local->hw.conf.short_frame_max_tx_count;
- __entry->center_freq = local->hw.conf.channel->center_freq;
+ __entry->long_frame_max_tx_count =
+ local->hw.conf.long_frame_max_tx_count;
+ __entry->short_frame_max_tx_count =
+ local->hw.conf.short_frame_max_tx_count;
+ __entry->center_freq = local->hw.conf.channel ?
+ local->hw.conf.channel->center_freq : 0;
__entry->channel_type = local->hw.conf.channel_type;
__entry->smps = local->hw.conf.smps_mode;
),
diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c
index c5e8c9c31f76..c9bf83f36657 100644
--- a/net/mac80211/tx.c
+++ b/net/mac80211/tx.c
@@ -55,7 +55,7 @@ static __le16 ieee80211_duration(struct ieee80211_tx_data *tx,
if (WARN_ON_ONCE(info->control.rates[0].idx < 0))
return 0;
- sband = local->hw.wiphy->bands[tx->channel->band];
+ sband = local->hw.wiphy->bands[info->band];
txrate = &sband->bitrates[info->control.rates[0].idx];
erp = txrate->flags & IEEE80211_RATE_ERP_G;
@@ -354,7 +354,7 @@ static void purge_old_ps_buffers(struct ieee80211_local *local)
total += skb_queue_len(&sta->ps_tx_buf[ac]);
if (skb) {
purged++;
- dev_kfree_skb(skb);
+ ieee80211_free_txskb(&local->hw, skb);
break;
}
}
@@ -466,7 +466,7 @@ ieee80211_tx_h_unicast_ps_buf(struct ieee80211_tx_data *tx)
ps_dbg(tx->sdata,
"STA %pM TX buffer for AC %d full - dropping oldest frame\n",
sta->sta.addr, ac);
- dev_kfree_skb(old);
+ ieee80211_free_txskb(&local->hw, old);
} else
tx->local->total_ps_buffered++;
@@ -580,7 +580,7 @@ ieee80211_tx_h_select_key(struct ieee80211_tx_data *tx)
tx->key = NULL;
else
skip_hw = (tx->key->conf.flags &
- IEEE80211_KEY_FLAG_SW_MGMT) &&
+ IEEE80211_KEY_FLAG_SW_MGMT_TX) &&
ieee80211_is_mgmt(hdr->frame_control);
break;
case WLAN_CIPHER_SUITE_AES_CMAC:
@@ -615,7 +615,7 @@ ieee80211_tx_h_rate_ctrl(struct ieee80211_tx_data *tx)
memset(&txrc, 0, sizeof(txrc));
- sband = tx->local->hw.wiphy->bands[tx->channel->band];
+ sband = tx->local->hw.wiphy->bands[info->band];
len = min_t(u32, tx->skb->len + FCS_LEN,
tx->local->hw.wiphy->frag_threshold);
@@ -626,13 +626,13 @@ ieee80211_tx_h_rate_ctrl(struct ieee80211_tx_data *tx)
txrc.bss_conf = &tx->sdata->vif.bss_conf;
txrc.skb = tx->skb;
txrc.reported_rate.idx = -1;
- txrc.rate_idx_mask = tx->sdata->rc_rateidx_mask[tx->channel->band];
+ txrc.rate_idx_mask = tx->sdata->rc_rateidx_mask[info->band];
if (txrc.rate_idx_mask == (1 << sband->n_bitrates) - 1)
txrc.max_rate_idx = -1;
else
txrc.max_rate_idx = fls(txrc.rate_idx_mask) - 1;
memcpy(txrc.rate_idx_mcs_mask,
- tx->sdata->rc_rateidx_mcs_mask[tx->channel->band],
+ tx->sdata->rc_rateidx_mcs_mask[info->band],
sizeof(txrc.rate_idx_mcs_mask));
txrc.bss = (tx->sdata->vif.type == NL80211_IFTYPE_AP ||
tx->sdata->vif.type == NL80211_IFTYPE_MESH_POINT ||
@@ -667,7 +667,7 @@ ieee80211_tx_h_rate_ctrl(struct ieee80211_tx_data *tx)
"scanning and associated. Target station: "
"%pM on %d GHz band\n",
tx->sdata->name, hdr->addr1,
- tx->channel->band ? 5 : 2))
+ info->band ? 5 : 2))
return TX_DROP;
/*
@@ -1103,7 +1103,7 @@ static bool ieee80211_tx_prep_agg(struct ieee80211_tx_data *tx,
spin_unlock(&tx->sta->lock);
if (purge_skb)
- dev_kfree_skb(purge_skb);
+ ieee80211_free_txskb(&tx->local->hw, purge_skb);
}
/* reset session timer */
@@ -1131,7 +1131,6 @@ ieee80211_tx_prepare(struct ieee80211_sub_if_data *sdata,
tx->skb = skb;
tx->local = local;
tx->sdata = sdata;
- tx->channel = local->hw.conf.channel;
__skb_queue_head_init(&tx->skbs);
/*
@@ -1204,6 +1203,7 @@ static bool ieee80211_tx_frags(struct ieee80211_local *local,
struct sk_buff_head *skbs,
bool txpending)
{
+ struct ieee80211_tx_control control;
struct sk_buff *skb, *tmp;
unsigned long flags;
@@ -1214,7 +1214,7 @@ static bool ieee80211_tx_frags(struct ieee80211_local *local,
#ifdef CONFIG_MAC80211_VERBOSE_DEBUG
if (WARN_ON_ONCE(q >= local->hw.queues)) {
__skb_unlink(skb, skbs);
- dev_kfree_skb(skb);
+ ieee80211_free_txskb(&local->hw, skb);
continue;
}
#endif
@@ -1240,10 +1240,10 @@ static bool ieee80211_tx_frags(struct ieee80211_local *local,
spin_unlock_irqrestore(&local->queue_stop_reason_lock, flags);
info->control.vif = vif;
- info->control.sta = sta;
+ control.sta = sta;
__skb_unlink(skb, skbs);
- drv_tx(local, skb);
+ drv_tx(local, &control, skb);
}
return true;
@@ -1356,7 +1356,7 @@ static int invoke_tx_handlers(struct ieee80211_tx_data *tx)
if (unlikely(res == TX_DROP)) {
I802_DEBUG_INC(tx->local->tx_handlers_drop);
if (tx->skb)
- dev_kfree_skb(tx->skb);
+ ieee80211_free_txskb(&tx->local->hw, tx->skb);
else
__skb_queue_purge(&tx->skbs);
return -1;
@@ -1393,14 +1393,13 @@ static bool ieee80211_tx(struct ieee80211_sub_if_data *sdata,
res_prepare = ieee80211_tx_prepare(sdata, &tx, skb);
if (unlikely(res_prepare == TX_DROP)) {
- dev_kfree_skb(skb);
+ ieee80211_free_txskb(&local->hw, skb);
goto out;
} else if (unlikely(res_prepare == TX_QUEUED)) {
goto out;
}
- tx.channel = local->hw.conf.channel;
- info->band = tx.channel->band;
+ info->band = local->hw.conf.channel->band;
/* set up hw_queue value early */
if (!(info->flags & IEEE80211_TX_CTL_TX_OFFCHAN) ||
@@ -1466,7 +1465,7 @@ void ieee80211_xmit(struct ieee80211_sub_if_data *sdata, struct sk_buff *skb)
headroom = max_t(int, 0, headroom);
if (ieee80211_skb_resize(sdata, skb, headroom, may_encrypt)) {
- dev_kfree_skb(skb);
+ ieee80211_free_txskb(&local->hw, skb);
rcu_read_unlock();
return;
}
@@ -1720,7 +1719,7 @@ netdev_tx_t ieee80211_subif_start_xmit(struct sk_buff *skb,
struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev);
struct ieee80211_local *local = sdata->local;
struct ieee80211_tx_info *info;
- int ret = NETDEV_TX_BUSY, head_need;
+ int head_need;
u16 ethertype, hdrlen, meshhdrlen = 0;
__le16 fc;
struct ieee80211_hdr hdr;
@@ -1736,10 +1735,8 @@ netdev_tx_t ieee80211_subif_start_xmit(struct sk_buff *skb,
u32 info_flags = 0;
u16 info_id = 0;
- if (unlikely(skb->len < ETH_HLEN)) {
- ret = NETDEV_TX_OK;
+ if (unlikely(skb->len < ETH_HLEN))
goto fail;
- }
/* convert Ethernet header to proper 802.11 header (based on
* operation mode) */
@@ -1787,7 +1784,6 @@ netdev_tx_t ieee80211_subif_start_xmit(struct sk_buff *skb,
if (!sdata->u.mesh.mshcfg.dot11MeshTTL) {
/* Do not send frames with mesh_ttl == 0 */
sdata->u.mesh.mshstats.dropped_frames_ttl++;
- ret = NETDEV_TX_OK;
goto fail;
}
rcu_read_lock();
@@ -1874,10 +1870,8 @@ netdev_tx_t ieee80211_subif_start_xmit(struct sk_buff *skb,
if (tdls_direct) {
/* link during setup - throw out frames to peer */
- if (!tdls_auth) {
- ret = NETDEV_TX_OK;
+ if (!tdls_auth)
goto fail;
- }
/* DA SA BSSID */
memcpy(hdr.addr1, skb->data, ETH_ALEN);
@@ -1911,7 +1905,6 @@ netdev_tx_t ieee80211_subif_start_xmit(struct sk_buff *skb,
hdrlen = 24;
break;
default:
- ret = NETDEV_TX_OK;
goto fail;
}
@@ -1956,7 +1949,6 @@ netdev_tx_t ieee80211_subif_start_xmit(struct sk_buff *skb,
I802_DEBUG_INC(local->tx_handlers_drop_unauth_port);
- ret = NETDEV_TX_OK;
goto fail;
}
@@ -2011,10 +2003,8 @@ netdev_tx_t ieee80211_subif_start_xmit(struct sk_buff *skb,
skb = skb_clone(skb, GFP_ATOMIC);
kfree_skb(tmp_skb);
- if (!skb) {
- ret = NETDEV_TX_OK;
+ if (!skb)
goto fail;
- }
}
hdr.frame_control = fc;
@@ -2060,8 +2050,10 @@ netdev_tx_t ieee80211_subif_start_xmit(struct sk_buff *skb,
head_need += IEEE80211_ENCRYPT_HEADROOM;
head_need += local->tx_headroom;
head_need = max_t(int, 0, head_need);
- if (ieee80211_skb_resize(sdata, skb, head_need, true))
- goto fail;
+ if (ieee80211_skb_resize(sdata, skb, head_need, true)) {
+ ieee80211_free_txskb(&local->hw, skb);
+ return NETDEV_TX_OK;
+ }
}
if (encaps_data) {
@@ -2117,10 +2109,8 @@ netdev_tx_t ieee80211_subif_start_xmit(struct sk_buff *skb,
return NETDEV_TX_OK;
fail:
- if (ret == NETDEV_TX_OK)
- dev_kfree_skb(skb);
-
- return ret;
+ dev_kfree_skb(skb);
+ return NETDEV_TX_OK;
}
@@ -2196,7 +2186,7 @@ void ieee80211_tx_pending(unsigned long data)
struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb);
if (WARN_ON(!info->control.vif)) {
- kfree_skb(skb);
+ ieee80211_free_txskb(&local->hw, skb);
continue;
}
@@ -2295,12 +2285,9 @@ struct sk_buff *ieee80211_beacon_get_tim(struct ieee80211_hw *hw,
struct ieee80211_sub_if_data *sdata = NULL;
struct ieee80211_if_ap *ap = NULL;
struct beacon_data *beacon;
- struct ieee80211_supported_band *sband;
- enum ieee80211_band band = local->hw.conf.channel->band;
+ enum ieee80211_band band = local->oper_channel->band;
struct ieee80211_tx_rate_control txrc;
- sband = local->hw.wiphy->bands[band];
-
rcu_read_lock();
sdata = vif_to_sdata(vif);
@@ -2410,7 +2397,7 @@ struct sk_buff *ieee80211_beacon_get_tim(struct ieee80211_hw *hw,
memset(mgmt, 0, hdr_len);
mgmt->frame_control =
cpu_to_le16(IEEE80211_FTYPE_MGMT | IEEE80211_STYPE_BEACON);
- memset(mgmt->da, 0xff, ETH_ALEN);
+ eth_broadcast_addr(mgmt->da);
memcpy(mgmt->sa, sdata->vif.addr, ETH_ALEN);
memcpy(mgmt->bssid, sdata->vif.addr, ETH_ALEN);
mgmt->u.beacon.beacon_int =
@@ -2422,9 +2409,9 @@ struct sk_buff *ieee80211_beacon_get_tim(struct ieee80211_hw *hw,
*pos++ = WLAN_EID_SSID;
*pos++ = 0x0;
- if (ieee80211_add_srates_ie(sdata, skb, true) ||
+ if (ieee80211_add_srates_ie(sdata, skb, true, band) ||
mesh_add_ds_params_ie(skb, sdata) ||
- ieee80211_add_ext_srates_ie(sdata, skb, true) ||
+ ieee80211_add_ext_srates_ie(sdata, skb, true, band) ||
mesh_add_rsn_ie(skb, sdata) ||
mesh_add_ht_cap_ie(skb, sdata) ||
mesh_add_ht_oper_ie(skb, sdata) ||
@@ -2447,12 +2434,12 @@ struct sk_buff *ieee80211_beacon_get_tim(struct ieee80211_hw *hw,
memset(&txrc, 0, sizeof(txrc));
txrc.hw = hw;
- txrc.sband = sband;
+ txrc.sband = local->hw.wiphy->bands[band];
txrc.bss_conf = &sdata->vif.bss_conf;
txrc.skb = skb;
txrc.reported_rate.idx = -1;
txrc.rate_idx_mask = sdata->rc_rateidx_mask[band];
- if (txrc.rate_idx_mask == (1 << sband->n_bitrates) - 1)
+ if (txrc.rate_idx_mask == (1 << txrc.sband->n_bitrates) - 1)
txrc.max_rate_idx = -1;
else
txrc.max_rate_idx = fls(txrc.rate_idx_mask) - 1;
@@ -2476,7 +2463,8 @@ struct sk_buff *ieee80211_proberesp_get(struct ieee80211_hw *hw,
struct ieee80211_vif *vif)
{
struct ieee80211_if_ap *ap = NULL;
- struct sk_buff *presp = NULL, *skb = NULL;
+ struct sk_buff *skb = NULL;
+ struct probe_resp *presp = NULL;
struct ieee80211_hdr *hdr;
struct ieee80211_sub_if_data *sdata = vif_to_sdata(vif);
@@ -2490,10 +2478,12 @@ struct sk_buff *ieee80211_proberesp_get(struct ieee80211_hw *hw,
if (!presp)
goto out;
- skb = skb_copy(presp, GFP_ATOMIC);
+ skb = dev_alloc_skb(presp->len);
if (!skb)
goto out;
+ memcpy(skb_put(skb, presp->len), presp->data, presp->len);
+
hdr = (struct ieee80211_hdr *) skb->data;
memset(hdr->addr1, 0, sizeof(hdr->addr1));
@@ -2604,9 +2594,9 @@ struct sk_buff *ieee80211_probereq_get(struct ieee80211_hw *hw,
memset(hdr, 0, sizeof(*hdr));
hdr->frame_control = cpu_to_le16(IEEE80211_FTYPE_MGMT |
IEEE80211_STYPE_PROBE_REQ);
- memset(hdr->addr1, 0xff, ETH_ALEN);
+ eth_broadcast_addr(hdr->addr1);
memcpy(hdr->addr2, vif->addr, ETH_ALEN);
- memset(hdr->addr3, 0xff, ETH_ALEN);
+ eth_broadcast_addr(hdr->addr3);
pos = skb_put(skb, ie_ssid_len);
*pos++ = WLAN_EID_SSID;
@@ -2703,8 +2693,7 @@ ieee80211_get_buffered_bc(struct ieee80211_hw *hw,
info = IEEE80211_SKB_CB(skb);
tx.flags |= IEEE80211_TX_PS_BUFFERED;
- tx.channel = local->hw.conf.channel;
- info->band = tx.channel->band;
+ info->band = local->oper_channel->band;
if (invoke_tx_handlers(&tx))
skb = NULL;
diff --git a/net/mac80211/util.c b/net/mac80211/util.c
index 39b82fee4904..22ca35054dd0 100644
--- a/net/mac80211/util.c
+++ b/net/mac80211/util.c
@@ -276,6 +276,9 @@ void ieee80211_propagate_queue_wake(struct ieee80211_local *local, int queue)
list_for_each_entry_rcu(sdata, &local->interfaces, list) {
int ac;
+ if (!sdata->dev)
+ continue;
+
if (test_bit(SDATA_STATE_OFFCHANNEL, &sdata->state))
continue;
@@ -364,6 +367,9 @@ static void __ieee80211_stop_queue(struct ieee80211_hw *hw, int queue,
list_for_each_entry_rcu(sdata, &local->interfaces, list) {
int ac;
+ if (!sdata->dev)
+ continue;
+
for (ac = 0; ac < n_acs; ac++) {
if (sdata->vif.hw_queue[ac] == queue ||
sdata->vif.cab_queue == queue)
@@ -768,8 +774,11 @@ u32 ieee802_11_parse_elems_crc(u8 *start, size_t len,
elem_parse_failed = true;
break;
case WLAN_EID_CHANNEL_SWITCH:
- elems->ch_switch_elem = pos;
- elems->ch_switch_elem_len = elen;
+ if (elen != sizeof(struct ieee80211_channel_sw_ie)) {
+ elem_parse_failed = true;
+ break;
+ }
+ elems->ch_switch_ie = (void *)pos;
break;
case WLAN_EID_QUIET:
if (!elems->quiet_elem) {
@@ -783,8 +792,11 @@ u32 ieee802_11_parse_elems_crc(u8 *start, size_t len,
elems->country_elem_len = elen;
break;
case WLAN_EID_PWR_CONSTRAINT:
+ if (elen != 1) {
+ elem_parse_failed = true;
+ break;
+ }
elems->pwr_constr_elem = pos;
- elems->pwr_constr_elem_len = elen;
break;
case WLAN_EID_TIMEOUT_INTERVAL:
elems->timeout_int = pos;
@@ -832,7 +844,7 @@ void ieee80211_set_wmm_default(struct ieee80211_sub_if_data *sdata,
memset(&qparam, 0, sizeof(qparam));
- use_11b = (local->hw.conf.channel->band == IEEE80211_BAND_2GHZ) &&
+ use_11b = (local->oper_channel->band == IEEE80211_BAND_2GHZ) &&
!(sdata->flags & IEEE80211_SDATA_OPERATING_GMODE);
/*
@@ -899,7 +911,8 @@ void ieee80211_set_wmm_default(struct ieee80211_sub_if_data *sdata,
drv_conf_tx(local, sdata, ac, &qparam);
}
- if (sdata->vif.type != NL80211_IFTYPE_MONITOR) {
+ if (sdata->vif.type != NL80211_IFTYPE_MONITOR &&
+ sdata->vif.type != NL80211_IFTYPE_P2P_DEVICE) {
sdata->vif.bss_conf.qos = enable_qos;
if (bss_notify)
ieee80211_bss_info_change_notify(sdata,
@@ -919,7 +932,7 @@ void ieee80211_sta_def_wmm_params(struct ieee80211_sub_if_data *sdata,
if ((supp_rates[i] & 0x7f) * 5 > 110)
have_higher_than_11mbit = 1;
- if (local->hw.conf.channel->band == IEEE80211_BAND_2GHZ &&
+ if (local->oper_channel->band == IEEE80211_BAND_2GHZ &&
have_higher_than_11mbit)
sdata->flags |= IEEE80211_SDATA_OPERATING_GMODE;
else
@@ -994,6 +1007,45 @@ void ieee80211_send_auth(struct ieee80211_sub_if_data *sdata,
ieee80211_tx_skb(sdata, skb);
}
+void ieee80211_send_deauth_disassoc(struct ieee80211_sub_if_data *sdata,
+ const u8 *bssid, u16 stype, u16 reason,
+ bool send_frame, u8 *frame_buf)
+{
+ struct ieee80211_local *local = sdata->local;
+ struct sk_buff *skb;
+ struct ieee80211_mgmt *mgmt = (void *)frame_buf;
+
+ /* build frame */
+ mgmt->frame_control = cpu_to_le16(IEEE80211_FTYPE_MGMT | stype);
+ mgmt->duration = 0; /* initialize only */
+ mgmt->seq_ctrl = 0; /* initialize only */
+ memcpy(mgmt->da, bssid, ETH_ALEN);
+ memcpy(mgmt->sa, sdata->vif.addr, ETH_ALEN);
+ memcpy(mgmt->bssid, bssid, ETH_ALEN);
+ /* u.deauth.reason_code == u.disassoc.reason_code */
+ mgmt->u.deauth.reason_code = cpu_to_le16(reason);
+
+ if (send_frame) {
+ skb = dev_alloc_skb(local->hw.extra_tx_headroom +
+ IEEE80211_DEAUTH_FRAME_LEN);
+ if (!skb)
+ return;
+
+ skb_reserve(skb, local->hw.extra_tx_headroom);
+
+ /* copy in frame */
+ memcpy(skb_put(skb, IEEE80211_DEAUTH_FRAME_LEN),
+ mgmt, IEEE80211_DEAUTH_FRAME_LEN);
+
+ if (sdata->vif.type != NL80211_IFTYPE_STATION ||
+ !(sdata->u.mgd.flags & IEEE80211_STA_MFP_ENABLED))
+ IEEE80211_SKB_CB(skb)->flags |=
+ IEEE80211_TX_INTFL_DONT_ENCRYPT;
+
+ ieee80211_tx_skb(sdata, skb);
+ }
+}
+
int ieee80211_build_preq_ies(struct ieee80211_local *local, u8 *buffer,
const u8 *ie, size_t ie_len,
enum ieee80211_band band, u32 rate_mask,
@@ -1100,6 +1152,7 @@ int ieee80211_build_preq_ies(struct ieee80211_local *local, u8 *buffer,
struct sk_buff *ieee80211_build_probe_req(struct ieee80211_sub_if_data *sdata,
u8 *dst, u32 ratemask,
+ struct ieee80211_channel *chan,
const u8 *ssid, size_t ssid_len,
const u8 *ie, size_t ie_len,
bool directed)
@@ -1109,7 +1162,7 @@ struct sk_buff *ieee80211_build_probe_req(struct ieee80211_sub_if_data *sdata,
struct ieee80211_mgmt *mgmt;
size_t buf_len;
u8 *buf;
- u8 chan;
+ u8 chan_no;
/* FIXME: come up with a proper value */
buf = kmalloc(200 + ie_len, GFP_KERNEL);
@@ -1122,14 +1175,12 @@ struct sk_buff *ieee80211_build_probe_req(struct ieee80211_sub_if_data *sdata,
* badly-behaved APs don't respond when this parameter is included.
*/
if (directed)
- chan = 0;
+ chan_no = 0;
else
- chan = ieee80211_frequency_to_channel(
- local->hw.conf.channel->center_freq);
+ chan_no = ieee80211_frequency_to_channel(chan->center_freq);
- buf_len = ieee80211_build_preq_ies(local, buf, ie, ie_len,
- local->hw.conf.channel->band,
- ratemask, chan);
+ buf_len = ieee80211_build_preq_ies(local, buf, ie, ie_len, chan->band,
+ ratemask, chan_no);
skb = ieee80211_probereq_get(&local->hw, &sdata->vif,
ssid, ssid_len,
@@ -1154,11 +1205,13 @@ struct sk_buff *ieee80211_build_probe_req(struct ieee80211_sub_if_data *sdata,
void ieee80211_send_probe_req(struct ieee80211_sub_if_data *sdata, u8 *dst,
const u8 *ssid, size_t ssid_len,
const u8 *ie, size_t ie_len,
- u32 ratemask, bool directed, bool no_cck)
+ u32 ratemask, bool directed, bool no_cck,
+ struct ieee80211_channel *channel)
{
struct sk_buff *skb;
- skb = ieee80211_build_probe_req(sdata, dst, ratemask, ssid, ssid_len,
+ skb = ieee80211_build_probe_req(sdata, dst, ratemask, channel,
+ ssid, ssid_len,
ie, ie_len, directed);
if (skb) {
if (no_cck)
@@ -1359,7 +1412,8 @@ int ieee80211_reconfig(struct ieee80211_local *local)
switch (sdata->vif.type) {
case NL80211_IFTYPE_STATION:
changed |= BSS_CHANGED_ASSOC |
- BSS_CHANGED_ARP_FILTER;
+ BSS_CHANGED_ARP_FILTER |
+ BSS_CHANGED_PS;
mutex_lock(&sdata->u.mgd.mtx);
ieee80211_bss_info_change_notify(sdata, changed);
mutex_unlock(&sdata->u.mgd.mtx);
@@ -1385,6 +1439,9 @@ int ieee80211_reconfig(struct ieee80211_local *local)
case NL80211_IFTYPE_MONITOR:
/* ignore virtual */
break;
+ case NL80211_IFTYPE_P2P_DEVICE:
+ changed = BSS_CHANGED_IDLE;
+ break;
case NL80211_IFTYPE_UNSPECIFIED:
case NUM_NL80211_IFTYPES:
case NL80211_IFTYPE_P2P_CLIENT:
@@ -1549,14 +1606,13 @@ static int check_mgd_smps(struct ieee80211_if_managed *ifmgd,
return 0;
}
-/* must hold iflist_mtx */
void ieee80211_recalc_smps(struct ieee80211_local *local)
{
struct ieee80211_sub_if_data *sdata;
enum ieee80211_smps_mode smps_mode = IEEE80211_SMPS_OFF;
int count = 0;
- lockdep_assert_held(&local->iflist_mtx);
+ mutex_lock(&local->iflist_mtx);
/*
* This function could be improved to handle multiple
@@ -1571,6 +1627,8 @@ void ieee80211_recalc_smps(struct ieee80211_local *local)
list_for_each_entry(sdata, &local->interfaces, list) {
if (!ieee80211_sdata_running(sdata))
continue;
+ if (sdata->vif.type == NL80211_IFTYPE_P2P_DEVICE)
+ continue;
if (sdata->vif.type != NL80211_IFTYPE_STATION)
goto set;
@@ -1583,12 +1641,14 @@ void ieee80211_recalc_smps(struct ieee80211_local *local)
}
if (smps_mode == local->smps_mode)
- return;
+ goto unlock;
set:
local->smps_mode = smps_mode;
/* changed flag is auto-detected for this */
ieee80211_hw_config(local, 0);
+ unlock:
+ mutex_unlock(&local->iflist_mtx);
}
static bool ieee80211_id_in_list(const u8 *ids, int n_ids, u8 id)
@@ -1809,7 +1869,8 @@ ieee80211_ht_oper_to_channel_type(struct ieee80211_ht_operation *ht_oper)
}
int ieee80211_add_srates_ie(struct ieee80211_sub_if_data *sdata,
- struct sk_buff *skb, bool need_basic)
+ struct sk_buff *skb, bool need_basic,
+ enum ieee80211_band band)
{
struct ieee80211_local *local = sdata->local;
struct ieee80211_supported_band *sband;
@@ -1817,7 +1878,7 @@ int ieee80211_add_srates_ie(struct ieee80211_sub_if_data *sdata,
u8 i, rates, *pos;
u32 basic_rates = sdata->vif.bss_conf.basic_rates;
- sband = local->hw.wiphy->bands[local->hw.conf.channel->band];
+ sband = local->hw.wiphy->bands[band];
rates = sband->n_bitrates;
if (rates > 8)
rates = 8;
@@ -1840,7 +1901,8 @@ int ieee80211_add_srates_ie(struct ieee80211_sub_if_data *sdata,
}
int ieee80211_add_ext_srates_ie(struct ieee80211_sub_if_data *sdata,
- struct sk_buff *skb, bool need_basic)
+ struct sk_buff *skb, bool need_basic,
+ enum ieee80211_band band)
{
struct ieee80211_local *local = sdata->local;
struct ieee80211_supported_band *sband;
@@ -1848,7 +1910,7 @@ int ieee80211_add_ext_srates_ie(struct ieee80211_sub_if_data *sdata,
u8 i, exrates, *pos;
u32 basic_rates = sdata->vif.bss_conf.basic_rates;
- sband = local->hw.wiphy->bands[local->hw.conf.channel->band];
+ sband = local->hw.wiphy->bands[band];
exrates = sband->n_bitrates;
if (exrates > 8)
exrates -= 8;
diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig
index c19b214ffd57..fefa514b9917 100644
--- a/net/netfilter/Kconfig
+++ b/net/netfilter/Kconfig
@@ -356,6 +356,55 @@ config NETFILTER_NETLINK_QUEUE_CT
If this option is enabled, NFQUEUE can include Connection Tracking
information together with the packet is the enqueued via NFNETLINK.
+config NF_NAT
+ tristate
+
+config NF_NAT_NEEDED
+ bool
+ depends on NF_NAT
+ default y
+
+config NF_NAT_PROTO_DCCP
+ tristate
+ depends on NF_NAT && NF_CT_PROTO_DCCP
+ default NF_NAT && NF_CT_PROTO_DCCP
+
+config NF_NAT_PROTO_UDPLITE
+ tristate
+ depends on NF_NAT && NF_CT_PROTO_UDPLITE
+ default NF_NAT && NF_CT_PROTO_UDPLITE
+
+config NF_NAT_PROTO_SCTP
+ tristate
+ default NF_NAT && NF_CT_PROTO_SCTP
+ depends on NF_NAT && NF_CT_PROTO_SCTP
+ select LIBCRC32C
+
+config NF_NAT_AMANDA
+ tristate
+ depends on NF_CONNTRACK && NF_NAT
+ default NF_NAT && NF_CONNTRACK_AMANDA
+
+config NF_NAT_FTP
+ tristate
+ depends on NF_CONNTRACK && NF_NAT
+ default NF_NAT && NF_CONNTRACK_FTP
+
+config NF_NAT_IRC
+ tristate
+ depends on NF_CONNTRACK && NF_NAT
+ default NF_NAT && NF_CONNTRACK_IRC
+
+config NF_NAT_SIP
+ tristate
+ depends on NF_CONNTRACK && NF_NAT
+ default NF_NAT && NF_CONNTRACK_SIP
+
+config NF_NAT_TFTP
+ tristate
+ depends on NF_CONNTRACK && NF_NAT
+ default NF_NAT && NF_CONNTRACK_TFTP
+
endif # NF_CONNTRACK
# transparent proxy support
@@ -599,6 +648,16 @@ config NETFILTER_XT_TARGET_MARK
(e.g. when running oldconfig). It selects
CONFIG_NETFILTER_XT_MARK (combined mark/MARK module).
+config NETFILTER_XT_TARGET_NETMAP
+ tristate '"NETMAP" target support'
+ depends on NF_NAT
+ ---help---
+ NETMAP is an implementation of static 1:1 NAT mapping of network
+ addresses. It maps the network address part, while keeping the host
+ address part intact.
+
+ To compile it as a module, choose M here. If unsure, say N.
+
config NETFILTER_XT_TARGET_NFLOG
tristate '"NFLOG" target support'
default m if NETFILTER_ADVANCED=n
@@ -621,19 +680,6 @@ config NETFILTER_XT_TARGET_NFQUEUE
To compile it as a module, choose M here. If unsure, say N.
-config NETFILTER_XT_TARGET_NOTRACK
- tristate '"NOTRACK" target support'
- depends on IP_NF_RAW || IP6_NF_RAW
- depends on NF_CONNTRACK
- help
- The NOTRACK target allows a select rule to specify
- which packets *not* to enter the conntrack/NAT
- subsystem with all the consequences (no ICMP error tracking,
- no protocol helpers for the selected packets).
-
- If you want to compile it as a module, say M here and read
- <file:Documentation/kbuild/modules.txt>. If unsure, say `N'.
-
config NETFILTER_XT_TARGET_RATEEST
tristate '"RATEEST" target support'
depends on NETFILTER_ADVANCED
@@ -644,6 +690,17 @@ config NETFILTER_XT_TARGET_RATEEST
To compile it as a module, choose M here. If unsure, say N.
+config NETFILTER_XT_TARGET_REDIRECT
+ tristate "REDIRECT target support"
+ depends on NF_NAT
+ ---help---
+ REDIRECT is a special case of NAT: all incoming connections are
+ mapped onto the incoming interface's address, causing the packets to
+ come to the local machine instead of passing through. This is
+ useful for transparent proxies.
+
+ To compile it as a module, choose M here. If unsure, say N.
+
config NETFILTER_XT_TARGET_TEE
tristate '"TEE" - packet cloning to alternate destination'
depends on NETFILTER_ADVANCED
diff --git a/net/netfilter/Makefile b/net/netfilter/Makefile
index 1c5160f2278e..32596978df1d 100644
--- a/net/netfilter/Makefile
+++ b/net/netfilter/Makefile
@@ -43,6 +43,23 @@ obj-$(CONFIG_NF_CONNTRACK_SANE) += nf_conntrack_sane.o
obj-$(CONFIG_NF_CONNTRACK_SIP) += nf_conntrack_sip.o
obj-$(CONFIG_NF_CONNTRACK_TFTP) += nf_conntrack_tftp.o
+nf_nat-y := nf_nat_core.o nf_nat_proto_unknown.o nf_nat_proto_common.o \
+ nf_nat_proto_udp.o nf_nat_proto_tcp.o nf_nat_helper.o
+
+obj-$(CONFIG_NF_NAT) += nf_nat.o
+
+# NAT protocols (nf_nat)
+obj-$(CONFIG_NF_NAT_PROTO_DCCP) += nf_nat_proto_dccp.o
+obj-$(CONFIG_NF_NAT_PROTO_UDPLITE) += nf_nat_proto_udplite.o
+obj-$(CONFIG_NF_NAT_PROTO_SCTP) += nf_nat_proto_sctp.o
+
+# NAT helpers
+obj-$(CONFIG_NF_NAT_AMANDA) += nf_nat_amanda.o
+obj-$(CONFIG_NF_NAT_FTP) += nf_nat_ftp.o
+obj-$(CONFIG_NF_NAT_IRC) += nf_nat_irc.o
+obj-$(CONFIG_NF_NAT_SIP) += nf_nat_sip.o
+obj-$(CONFIG_NF_NAT_TFTP) += nf_nat_tftp.o
+
# transparent proxy support
obj-$(CONFIG_NETFILTER_TPROXY) += nf_tproxy_core.o
@@ -53,6 +70,7 @@ obj-$(CONFIG_NETFILTER_XTABLES) += x_tables.o xt_tcpudp.o
obj-$(CONFIG_NETFILTER_XT_MARK) += xt_mark.o
obj-$(CONFIG_NETFILTER_XT_CONNMARK) += xt_connmark.o
obj-$(CONFIG_NETFILTER_XT_SET) += xt_set.o
+obj-$(CONFIG_NF_NAT) += xt_nat.o
# targets
obj-$(CONFIG_NETFILTER_XT_TARGET_AUDIT) += xt_AUDIT.o
@@ -65,10 +83,11 @@ obj-$(CONFIG_NETFILTER_XT_TARGET_HL) += xt_HL.o
obj-$(CONFIG_NETFILTER_XT_TARGET_HMARK) += xt_HMARK.o
obj-$(CONFIG_NETFILTER_XT_TARGET_LED) += xt_LED.o
obj-$(CONFIG_NETFILTER_XT_TARGET_LOG) += xt_LOG.o
+obj-$(CONFIG_NETFILTER_XT_TARGET_NETMAP) += xt_NETMAP.o
obj-$(CONFIG_NETFILTER_XT_TARGET_NFLOG) += xt_NFLOG.o
obj-$(CONFIG_NETFILTER_XT_TARGET_NFQUEUE) += xt_NFQUEUE.o
-obj-$(CONFIG_NETFILTER_XT_TARGET_NOTRACK) += xt_NOTRACK.o
obj-$(CONFIG_NETFILTER_XT_TARGET_RATEEST) += xt_RATEEST.o
+obj-$(CONFIG_NETFILTER_XT_TARGET_REDIRECT) += xt_REDIRECT.o
obj-$(CONFIG_NETFILTER_XT_TARGET_SECMARK) += xt_SECMARK.o
obj-$(CONFIG_NETFILTER_XT_TARGET_TPROXY) += xt_TPROXY.o
obj-$(CONFIG_NETFILTER_XT_TARGET_TCPMSS) += xt_TCPMSS.o
diff --git a/net/netfilter/core.c b/net/netfilter/core.c
index 0bc6b60db4df..68912dadf13d 100644
--- a/net/netfilter/core.c
+++ b/net/netfilter/core.c
@@ -126,7 +126,7 @@ unsigned int nf_iterate(struct list_head *head,
unsigned int hook,
const struct net_device *indev,
const struct net_device *outdev,
- struct list_head **i,
+ struct nf_hook_ops **elemp,
int (*okfn)(struct sk_buff *),
int hook_thresh)
{
@@ -136,22 +136,20 @@ unsigned int nf_iterate(struct list_head *head,
* The caller must not block between calls to this
* function because of risk of continuing from deleted element.
*/
- list_for_each_continue_rcu(*i, head) {
- struct nf_hook_ops *elem = (struct nf_hook_ops *)*i;
-
- if (hook_thresh > elem->priority)
+ list_for_each_entry_continue_rcu((*elemp), head, list) {
+ if (hook_thresh > (*elemp)->priority)
continue;
/* Optimization: we don't need to hold module
reference here, since function can't sleep. --RR */
repeat:
- verdict = elem->hook(hook, skb, indev, outdev, okfn);
+ verdict = (*elemp)->hook(hook, skb, indev, outdev, okfn);
if (verdict != NF_ACCEPT) {
#ifdef CONFIG_NETFILTER_DEBUG
if (unlikely((verdict & NF_VERDICT_MASK)
> NF_MAX_VERDICT)) {
NFDEBUG("Evil return from %p(%u).\n",
- elem->hook, hook);
+ (*elemp)->hook, hook);
continue;
}
#endif
@@ -172,14 +170,14 @@ int nf_hook_slow(u_int8_t pf, unsigned int hook, struct sk_buff *skb,
int (*okfn)(struct sk_buff *),
int hook_thresh)
{
- struct list_head *elem;
+ struct nf_hook_ops *elem;
unsigned int verdict;
int ret = 0;
/* We may already have this, but read-locks nest anyway */
rcu_read_lock();
- elem = &nf_hooks[pf][hook];
+ elem = list_entry_rcu(&nf_hooks[pf][hook], struct nf_hook_ops, list);
next_hook:
verdict = nf_iterate(&nf_hooks[pf][hook], skb, hook, indev,
outdev, &elem, okfn, hook_thresh);
@@ -273,6 +271,11 @@ EXPORT_SYMBOL_GPL(nfq_ct_nat_hook);
#endif /* CONFIG_NF_CONNTRACK */
+#ifdef CONFIG_NF_NAT_NEEDED
+void (*nf_nat_decode_session_hook)(struct sk_buff *, struct flowi *);
+EXPORT_SYMBOL(nf_nat_decode_session_hook);
+#endif
+
#ifdef CONFIG_PROC_FS
struct proc_dir_entry *proc_net_netfilter;
EXPORT_SYMBOL(proc_net_netfilter);
diff --git a/net/netfilter/ipset/ip_set_bitmap_ip.c b/net/netfilter/ipset/ip_set_bitmap_ip.c
index 7e1b061aeeba..4a92fd47bd4c 100644
--- a/net/netfilter/ipset/ip_set_bitmap_ip.c
+++ b/net/netfilter/ipset/ip_set_bitmap_ip.c
@@ -27,9 +27,12 @@
#define IP_SET_BITMAP_TIMEOUT
#include <linux/netfilter/ipset/ip_set_timeout.h>
+#define REVISION_MIN 0
+#define REVISION_MAX 0
+
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>");
-MODULE_DESCRIPTION("bitmap:ip type of IP sets");
+IP_SET_MODULE_DESC("bitmap:ip", REVISION_MIN, REVISION_MAX);
MODULE_ALIAS("ip_set_bitmap:ip");
/* Type structure */
@@ -284,7 +287,7 @@ bitmap_ip_uadt(struct ip_set *set, struct nlattr *tb[],
} else if (tb[IPSET_ATTR_CIDR]) {
u8 cidr = nla_get_u8(tb[IPSET_ATTR_CIDR]);
- if (cidr > 32)
+ if (!cidr || cidr > 32)
return -IPSET_ERR_INVALID_CIDR;
ip_set_mask_from_to(ip, ip_to, cidr);
} else
@@ -454,7 +457,8 @@ static int
bitmap_ip_create(struct ip_set *set, struct nlattr *tb[], u32 flags)
{
struct bitmap_ip *map;
- u32 first_ip, last_ip, hosts, elements;
+ u32 first_ip, last_ip, hosts;
+ u64 elements;
u8 netmask = 32;
int ret;
@@ -497,7 +501,7 @@ bitmap_ip_create(struct ip_set *set, struct nlattr *tb[], u32 flags)
if (netmask == 32) {
hosts = 1;
- elements = last_ip - first_ip + 1;
+ elements = (u64)last_ip - first_ip + 1;
} else {
u8 mask_bits;
u32 mask;
@@ -515,7 +519,8 @@ bitmap_ip_create(struct ip_set *set, struct nlattr *tb[], u32 flags)
if (elements > IPSET_BITMAP_MAX_RANGE + 1)
return -IPSET_ERR_BITMAP_RANGE_SIZE;
- pr_debug("hosts %u, elements %u\n", hosts, elements);
+ pr_debug("hosts %u, elements %llu\n",
+ hosts, (unsigned long long)elements);
map = kzalloc(sizeof(*map), GFP_KERNEL);
if (!map)
@@ -554,8 +559,8 @@ static struct ip_set_type bitmap_ip_type __read_mostly = {
.features = IPSET_TYPE_IP,
.dimension = IPSET_DIM_ONE,
.family = NFPROTO_IPV4,
- .revision_min = 0,
- .revision_max = 0,
+ .revision_min = REVISION_MIN,
+ .revision_max = REVISION_MAX,
.create = bitmap_ip_create,
.create_policy = {
[IPSET_ATTR_IP] = { .type = NLA_NESTED },
diff --git a/net/netfilter/ipset/ip_set_bitmap_ipmac.c b/net/netfilter/ipset/ip_set_bitmap_ipmac.c
index d7eaf10edb6d..0f92dc24cb89 100644
--- a/net/netfilter/ipset/ip_set_bitmap_ipmac.c
+++ b/net/netfilter/ipset/ip_set_bitmap_ipmac.c
@@ -26,9 +26,12 @@
#include <linux/netfilter/ipset/ip_set_timeout.h>
#include <linux/netfilter/ipset/ip_set_bitmap.h>
+#define REVISION_MIN 0
+#define REVISION_MAX 0
+
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>");
-MODULE_DESCRIPTION("bitmap:ip,mac type of IP sets");
+IP_SET_MODULE_DESC("bitmap:ip,mac", REVISION_MIN, REVISION_MAX);
MODULE_ALIAS("ip_set_bitmap:ip,mac");
enum {
@@ -320,11 +323,11 @@ bitmap_ipmac_tlist(const struct ip_set *set,
(elem->match == MAC_FILLED &&
nla_put(skb, IPSET_ATTR_ETHER, ETH_ALEN,
elem->ether)))
- goto nla_put_failure;
+ goto nla_put_failure;
timeout = elem->match == MAC_UNSET ? elem->timeout
: ip_set_timeout_get(elem->timeout);
if (nla_put_net32(skb, IPSET_ATTR_TIMEOUT, htonl(timeout)))
- goto nla_put_failure;
+ goto nla_put_failure;
ipset_nest_end(skb, nested);
}
ipset_nest_end(skb, atd);
@@ -557,7 +560,8 @@ static int
bitmap_ipmac_create(struct ip_set *set, struct nlattr *tb[],
u32 flags)
{
- u32 first_ip, last_ip, elements;
+ u32 first_ip, last_ip;
+ u64 elements;
struct bitmap_ipmac *map;
int ret;
@@ -588,7 +592,7 @@ bitmap_ipmac_create(struct ip_set *set, struct nlattr *tb[],
} else
return -IPSET_ERR_PROTOCOL;
- elements = last_ip - first_ip + 1;
+ elements = (u64)last_ip - first_ip + 1;
if (elements > IPSET_BITMAP_MAX_RANGE + 1)
return -IPSET_ERR_BITMAP_RANGE_SIZE;
@@ -629,8 +633,8 @@ static struct ip_set_type bitmap_ipmac_type = {
.features = IPSET_TYPE_IP | IPSET_TYPE_MAC,
.dimension = IPSET_DIM_TWO,
.family = NFPROTO_IPV4,
- .revision_min = 0,
- .revision_max = 0,
+ .revision_min = REVISION_MIN,
+ .revision_max = REVISION_MAX,
.create = bitmap_ipmac_create,
.create_policy = {
[IPSET_ATTR_IP] = { .type = NLA_NESTED },
diff --git a/net/netfilter/ipset/ip_set_bitmap_port.c b/net/netfilter/ipset/ip_set_bitmap_port.c
index b9f1fce7053b..e6b2db76f4c3 100644
--- a/net/netfilter/ipset/ip_set_bitmap_port.c
+++ b/net/netfilter/ipset/ip_set_bitmap_port.c
@@ -22,9 +22,12 @@
#define IP_SET_BITMAP_TIMEOUT
#include <linux/netfilter/ipset/ip_set_timeout.h>
+#define REVISION_MIN 0
+#define REVISION_MAX 0
+
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>");
-MODULE_DESCRIPTION("bitmap:port type of IP sets");
+IP_SET_MODULE_DESC("bitmap:port", REVISION_MIN, REVISION_MAX);
MODULE_ALIAS("ip_set_bitmap:port");
/* Type structure */
@@ -487,8 +490,8 @@ static struct ip_set_type bitmap_port_type = {
.features = IPSET_TYPE_PORT,
.dimension = IPSET_DIM_ONE,
.family = NFPROTO_UNSPEC,
- .revision_min = 0,
- .revision_max = 0,
+ .revision_min = REVISION_MIN,
+ .revision_max = REVISION_MAX,
.create = bitmap_port_create,
.create_policy = {
[IPSET_ATTR_PORT] = { .type = NLA_U16 },
diff --git a/net/netfilter/ipset/ip_set_core.c b/net/netfilter/ipset/ip_set_core.c
index 9730882697aa..778465f217fa 100644
--- a/net/netfilter/ipset/ip_set_core.c
+++ b/net/netfilter/ipset/ip_set_core.c
@@ -69,7 +69,8 @@ find_set_type(const char *name, u8 family, u8 revision)
list_for_each_entry_rcu(type, &ip_set_type_list, list)
if (STREQ(type->name, name) &&
- (type->family == family || type->family == NFPROTO_UNSPEC) &&
+ (type->family == family ||
+ type->family == NFPROTO_UNSPEC) &&
revision >= type->revision_min &&
revision <= type->revision_max)
return type;
@@ -149,7 +150,8 @@ __find_set_type_minmax(const char *name, u8 family, u8 *min, u8 *max,
rcu_read_lock();
list_for_each_entry_rcu(type, &ip_set_type_list, list)
if (STREQ(type->name, name) &&
- (type->family == family || type->family == NFPROTO_UNSPEC)) {
+ (type->family == family ||
+ type->family == NFPROTO_UNSPEC)) {
found = true;
if (type->revision_min < *min)
*min = type->revision_min;
@@ -368,6 +370,12 @@ ip_set_test(ip_set_id_t index, const struct sk_buff *skb,
set->variant->kadt(set, skb, par, IPSET_ADD, opt);
write_unlock_bh(&set->lock);
ret = 1;
+ } else {
+ /* --return-nomatch: invert matched element */
+ if ((opt->flags & IPSET_RETURN_NOMATCH) &&
+ (set->type->features & IPSET_TYPE_NOMATCH) &&
+ (ret > 0 || ret == -ENOTEMPTY))
+ ret = -ret;
}
/* Convert error codes to nomatch */
@@ -563,13 +571,13 @@ flag_exist(const struct nlmsghdr *nlh)
}
static struct nlmsghdr *
-start_msg(struct sk_buff *skb, u32 pid, u32 seq, unsigned int flags,
+start_msg(struct sk_buff *skb, u32 portid, u32 seq, unsigned int flags,
enum ipset_cmd cmd)
{
struct nlmsghdr *nlh;
struct nfgenmsg *nfmsg;
- nlh = nlmsg_put(skb, pid, seq, cmd | (NFNL_SUBSYS_IPSET << 8),
+ nlh = nlmsg_put(skb, portid, seq, cmd | (NFNL_SUBSYS_IPSET << 8),
sizeof(*nfmsg), flags);
if (nlh == NULL)
return NULL;
@@ -721,7 +729,8 @@ ip_set_create(struct sock *ctnl, struct sk_buff *skb,
* by the nfnl mutex. Find the first free index in ip_set_list
* and check clashing.
*/
- if ((ret = find_free_id(set->name, &index, &clash)) != 0) {
+ ret = find_free_id(set->name, &index, &clash);
+ if (ret != 0) {
/* If this is the same set and requested, ignore error */
if (ret == -EEXIST &&
(flags & IPSET_FLAG_EXIST) &&
@@ -1045,7 +1054,7 @@ ip_set_dump_start(struct sk_buff *skb, struct netlink_callback *cb)
ip_set_id_t index = IPSET_INVALID_ID, max;
struct ip_set *set = NULL;
struct nlmsghdr *nlh = NULL;
- unsigned int flags = NETLINK_CB(cb->skb).pid ? NLM_F_MULTI : 0;
+ unsigned int flags = NETLINK_CB(cb->skb).portid ? NLM_F_MULTI : 0;
u32 dump_type, dump_flags;
int ret = 0;
@@ -1093,7 +1102,7 @@ dump_last:
pr_debug("reference set\n");
__ip_set_get(index);
}
- nlh = start_msg(skb, NETLINK_CB(cb->skb).pid,
+ nlh = start_msg(skb, NETLINK_CB(cb->skb).portid,
cb->nlh->nlmsg_seq, flags,
IPSET_CMD_LIST);
if (!nlh) {
@@ -1226,7 +1235,7 @@ call_ad(struct sock *ctnl, struct sk_buff *skb, struct ip_set *set,
skb2 = nlmsg_new(payload, GFP_KERNEL);
if (skb2 == NULL)
return -ENOMEM;
- rep = __nlmsg_put(skb2, NETLINK_CB(skb).pid,
+ rep = __nlmsg_put(skb2, NETLINK_CB(skb).portid,
nlh->nlmsg_seq, NLMSG_ERROR, payload, 0);
errmsg = nlmsg_data(rep);
errmsg->error = ret;
@@ -1241,7 +1250,7 @@ call_ad(struct sock *ctnl, struct sk_buff *skb, struct ip_set *set,
*errline = lineno;
- netlink_unicast(ctnl, skb2, NETLINK_CB(skb).pid, MSG_DONTWAIT);
+ netlink_unicast(ctnl, skb2, NETLINK_CB(skb).portid, MSG_DONTWAIT);
/* Signal netlink not to send its ACK/errmsg. */
return -EINTR;
}
@@ -1416,7 +1425,7 @@ ip_set_header(struct sock *ctnl, struct sk_buff *skb,
if (skb2 == NULL)
return -ENOMEM;
- nlh2 = start_msg(skb2, NETLINK_CB(skb).pid, nlh->nlmsg_seq, 0,
+ nlh2 = start_msg(skb2, NETLINK_CB(skb).portid, nlh->nlmsg_seq, 0,
IPSET_CMD_HEADER);
if (!nlh2)
goto nlmsg_failure;
@@ -1428,7 +1437,7 @@ ip_set_header(struct sock *ctnl, struct sk_buff *skb,
goto nla_put_failure;
nlmsg_end(skb2, nlh2);
- ret = netlink_unicast(ctnl, skb2, NETLINK_CB(skb).pid, MSG_DONTWAIT);
+ ret = netlink_unicast(ctnl, skb2, NETLINK_CB(skb).portid, MSG_DONTWAIT);
if (ret < 0)
return ret;
@@ -1476,7 +1485,7 @@ ip_set_type(struct sock *ctnl, struct sk_buff *skb,
if (skb2 == NULL)
return -ENOMEM;
- nlh2 = start_msg(skb2, NETLINK_CB(skb).pid, nlh->nlmsg_seq, 0,
+ nlh2 = start_msg(skb2, NETLINK_CB(skb).portid, nlh->nlmsg_seq, 0,
IPSET_CMD_TYPE);
if (!nlh2)
goto nlmsg_failure;
@@ -1489,7 +1498,7 @@ ip_set_type(struct sock *ctnl, struct sk_buff *skb,
nlmsg_end(skb2, nlh2);
pr_debug("Send TYPE, nlmsg_len: %u\n", nlh2->nlmsg_len);
- ret = netlink_unicast(ctnl, skb2, NETLINK_CB(skb).pid, MSG_DONTWAIT);
+ ret = netlink_unicast(ctnl, skb2, NETLINK_CB(skb).portid, MSG_DONTWAIT);
if (ret < 0)
return ret;
@@ -1525,7 +1534,7 @@ ip_set_protocol(struct sock *ctnl, struct sk_buff *skb,
if (skb2 == NULL)
return -ENOMEM;
- nlh2 = start_msg(skb2, NETLINK_CB(skb).pid, nlh->nlmsg_seq, 0,
+ nlh2 = start_msg(skb2, NETLINK_CB(skb).portid, nlh->nlmsg_seq, 0,
IPSET_CMD_PROTOCOL);
if (!nlh2)
goto nlmsg_failure;
@@ -1533,7 +1542,7 @@ ip_set_protocol(struct sock *ctnl, struct sk_buff *skb,
goto nla_put_failure;
nlmsg_end(skb2, nlh2);
- ret = netlink_unicast(ctnl, skb2, NETLINK_CB(skb).pid, MSG_DONTWAIT);
+ ret = netlink_unicast(ctnl, skb2, NETLINK_CB(skb).portid, MSG_DONTWAIT);
if (ret < 0)
return ret;
diff --git a/net/netfilter/ipset/ip_set_hash_ip.c b/net/netfilter/ipset/ip_set_hash_ip.c
index a68dbd4f1e4e..ec3dba5dcd62 100644
--- a/net/netfilter/ipset/ip_set_hash_ip.c
+++ b/net/netfilter/ipset/ip_set_hash_ip.c
@@ -24,9 +24,12 @@
#include <linux/netfilter/ipset/ip_set_timeout.h>
#include <linux/netfilter/ipset/ip_set_hash.h>
+#define REVISION_MIN 0
+#define REVISION_MAX 0
+
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>");
-MODULE_DESCRIPTION("hash:ip type of IP sets");
+IP_SET_MODULE_DESC("hash:ip", REVISION_MIN, REVISION_MAX);
MODULE_ALIAS("ip_set_hash:ip");
/* Type specific function prefix */
@@ -114,7 +117,7 @@ nla_put_failure:
static inline void
hash_ip4_data_next(struct ip_set_hash *h, const struct hash_ip4_elem *d)
{
- h->next.ip = ntohl(d->ip);
+ h->next.ip = d->ip;
}
static int
@@ -179,7 +182,7 @@ hash_ip4_uadt(struct ip_set *set, struct nlattr *tb[],
} else if (tb[IPSET_ATTR_CIDR]) {
u8 cidr = nla_get_u8(tb[IPSET_ATTR_CIDR]);
- if (cidr > 32)
+ if (!cidr || cidr > 32)
return -IPSET_ERR_INVALID_CIDR;
ip_set_mask_from_to(ip, ip_to, cidr);
} else
@@ -188,7 +191,7 @@ hash_ip4_uadt(struct ip_set *set, struct nlattr *tb[],
hosts = h->netmask == 32 ? 1 : 2 << (32 - h->netmask - 1);
if (retried)
- ip = h->next.ip;
+ ip = ntohl(h->next.ip);
for (; !before(ip_to, ip); ip += hosts) {
nip = htonl(ip);
if (nip == 0)
@@ -452,8 +455,8 @@ static struct ip_set_type hash_ip_type __read_mostly = {
.features = IPSET_TYPE_IP,
.dimension = IPSET_DIM_ONE,
.family = NFPROTO_UNSPEC,
- .revision_min = 0,
- .revision_max = 0,
+ .revision_min = REVISION_MIN,
+ .revision_max = REVISION_MAX,
.create = hash_ip_create,
.create_policy = {
[IPSET_ATTR_HASHSIZE] = { .type = NLA_U32 },
diff --git a/net/netfilter/ipset/ip_set_hash_ipport.c b/net/netfilter/ipset/ip_set_hash_ipport.c
index 92722bb82eea..0171f7502fa5 100644
--- a/net/netfilter/ipset/ip_set_hash_ipport.c
+++ b/net/netfilter/ipset/ip_set_hash_ipport.c
@@ -25,9 +25,12 @@
#include <linux/netfilter/ipset/ip_set_getport.h>
#include <linux/netfilter/ipset/ip_set_hash.h>
+#define REVISION_MIN 0
+#define REVISION_MAX 1 /* SCTP and UDPLITE support added */
+
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>");
-MODULE_DESCRIPTION("hash:ip,port type of IP sets");
+IP_SET_MODULE_DESC("hash:ip,port", REVISION_MIN, REVISION_MAX);
MODULE_ALIAS("ip_set_hash:ip,port");
/* Type specific function prefix */
@@ -130,8 +133,8 @@ static inline void
hash_ipport4_data_next(struct ip_set_hash *h,
const struct hash_ipport4_elem *d)
{
- h->next.ip = ntohl(d->ip);
- h->next.port = ntohs(d->port);
+ h->next.ip = d->ip;
+ h->next.port = d->port;
}
static int
@@ -217,7 +220,7 @@ hash_ipport4_uadt(struct ip_set *set, struct nlattr *tb[],
} else if (tb[IPSET_ATTR_CIDR]) {
u8 cidr = nla_get_u8(tb[IPSET_ATTR_CIDR]);
- if (cidr > 32)
+ if (!cidr || cidr > 32)
return -IPSET_ERR_INVALID_CIDR;
ip_set_mask_from_to(ip, ip_to, cidr);
} else
@@ -231,9 +234,10 @@ hash_ipport4_uadt(struct ip_set *set, struct nlattr *tb[],
}
if (retried)
- ip = h->next.ip;
+ ip = ntohl(h->next.ip);
for (; !before(ip_to, ip); ip++) {
- p = retried && ip == h->next.ip ? h->next.port : port;
+ p = retried && ip == ntohl(h->next.ip) ? ntohs(h->next.port)
+ : port;
for (; p <= port_to; p++) {
data.ip = htonl(ip);
data.port = htons(p);
@@ -349,7 +353,7 @@ static inline void
hash_ipport6_data_next(struct ip_set_hash *h,
const struct hash_ipport6_elem *d)
{
- h->next.port = ntohs(d->port);
+ h->next.port = d->port;
}
static int
@@ -431,7 +435,7 @@ hash_ipport6_uadt(struct ip_set *set, struct nlattr *tb[],
swap(port, port_to);
if (retried)
- port = h->next.port;
+ port = ntohs(h->next.port);
for (; port <= port_to; port++) {
data.port = htons(port);
ret = adtfn(set, &data, timeout, flags);
@@ -522,8 +526,8 @@ static struct ip_set_type hash_ipport_type __read_mostly = {
.features = IPSET_TYPE_IP | IPSET_TYPE_PORT,
.dimension = IPSET_DIM_TWO,
.family = NFPROTO_UNSPEC,
- .revision_min = 0,
- .revision_max = 1, /* SCTP and UDPLITE support added */
+ .revision_min = REVISION_MIN,
+ .revision_max = REVISION_MAX,
.create = hash_ipport_create,
.create_policy = {
[IPSET_ATTR_HASHSIZE] = { .type = NLA_U32 },
diff --git a/net/netfilter/ipset/ip_set_hash_ipportip.c b/net/netfilter/ipset/ip_set_hash_ipportip.c
index 0637ce096def..6344ef551ec8 100644
--- a/net/netfilter/ipset/ip_set_hash_ipportip.c
+++ b/net/netfilter/ipset/ip_set_hash_ipportip.c
@@ -25,9 +25,12 @@
#include <linux/netfilter/ipset/ip_set_getport.h>
#include <linux/netfilter/ipset/ip_set_hash.h>
+#define REVISION_MIN 0
+#define REVISION_MAX 1 /* SCTP and UDPLITE support added */
+
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>");
-MODULE_DESCRIPTION("hash:ip,port,ip type of IP sets");
+IP_SET_MODULE_DESC("hash:ip,port,ip", REVISION_MIN, REVISION_MAX);
MODULE_ALIAS("ip_set_hash:ip,port,ip");
/* Type specific function prefix */
@@ -133,8 +136,8 @@ static inline void
hash_ipportip4_data_next(struct ip_set_hash *h,
const struct hash_ipportip4_elem *d)
{
- h->next.ip = ntohl(d->ip);
- h->next.port = ntohs(d->port);
+ h->next.ip = d->ip;
+ h->next.port = d->port;
}
static int
@@ -225,7 +228,7 @@ hash_ipportip4_uadt(struct ip_set *set, struct nlattr *tb[],
} else if (tb[IPSET_ATTR_CIDR]) {
u8 cidr = nla_get_u8(tb[IPSET_ATTR_CIDR]);
- if (cidr > 32)
+ if (!cidr || cidr > 32)
return -IPSET_ERR_INVALID_CIDR;
ip_set_mask_from_to(ip, ip_to, cidr);
} else
@@ -239,9 +242,10 @@ hash_ipportip4_uadt(struct ip_set *set, struct nlattr *tb[],
}
if (retried)
- ip = h->next.ip;
+ ip = ntohl(h->next.ip);
for (; !before(ip_to, ip); ip++) {
- p = retried && ip == h->next.ip ? h->next.port : port;
+ p = retried && ip == ntohl(h->next.ip) ? ntohs(h->next.port)
+ : port;
for (; p <= port_to; p++) {
data.ip = htonl(ip);
data.port = htons(p);
@@ -362,7 +366,7 @@ static inline void
hash_ipportip6_data_next(struct ip_set_hash *h,
const struct hash_ipportip6_elem *d)
{
- h->next.port = ntohs(d->port);
+ h->next.port = d->port;
}
static int
@@ -449,7 +453,7 @@ hash_ipportip6_uadt(struct ip_set *set, struct nlattr *tb[],
swap(port, port_to);
if (retried)
- port = h->next.port;
+ port = ntohs(h->next.port);
for (; port <= port_to; port++) {
data.port = htons(port);
ret = adtfn(set, &data, timeout, flags);
@@ -540,8 +544,8 @@ static struct ip_set_type hash_ipportip_type __read_mostly = {
.features = IPSET_TYPE_IP | IPSET_TYPE_PORT | IPSET_TYPE_IP2,
.dimension = IPSET_DIM_THREE,
.family = NFPROTO_UNSPEC,
- .revision_min = 0,
- .revision_max = 1, /* SCTP and UDPLITE support added */
+ .revision_min = REVISION_MIN,
+ .revision_max = REVISION_MAX,
.create = hash_ipportip_create,
.create_policy = {
[IPSET_ATTR_HASHSIZE] = { .type = NLA_U32 },
diff --git a/net/netfilter/ipset/ip_set_hash_ipportnet.c b/net/netfilter/ipset/ip_set_hash_ipportnet.c
index 1ce21ca976e1..cb71f9a774e7 100644
--- a/net/netfilter/ipset/ip_set_hash_ipportnet.c
+++ b/net/netfilter/ipset/ip_set_hash_ipportnet.c
@@ -25,9 +25,14 @@
#include <linux/netfilter/ipset/ip_set_getport.h>
#include <linux/netfilter/ipset/ip_set_hash.h>
+#define REVISION_MIN 0
+/* 1 SCTP and UDPLITE support added */
+/* 2 Range as input support for IPv4 added */
+#define REVISION_MAX 3 /* nomatch flag support added */
+
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>");
-MODULE_DESCRIPTION("hash:ip,port,net type of IP sets");
+IP_SET_MODULE_DESC("hash:ip,port,net", REVISION_MIN, REVISION_MAX);
MODULE_ALIAS("ip_set_hash:ip,port,net");
/* Type specific function prefix */
@@ -99,10 +104,10 @@ hash_ipportnet4_data_flags(struct hash_ipportnet4_elem *dst, u32 flags)
dst->nomatch = !!(flags & IPSET_FLAG_NOMATCH);
}
-static inline bool
+static inline int
hash_ipportnet4_data_match(const struct hash_ipportnet4_elem *elem)
{
- return !elem->nomatch;
+ return elem->nomatch ? -ENOTEMPTY : 1;
}
static inline void
@@ -173,9 +178,9 @@ static inline void
hash_ipportnet4_data_next(struct ip_set_hash *h,
const struct hash_ipportnet4_elem *d)
{
- h->next.ip = ntohl(d->ip);
- h->next.port = ntohs(d->port);
- h->next.ip2 = ntohl(d->ip2);
+ h->next.ip = d->ip;
+ h->next.port = d->port;
+ h->next.ip2 = d->ip2;
}
static int
@@ -290,7 +295,7 @@ hash_ipportnet4_uadt(struct ip_set *set, struct nlattr *tb[],
} else if (tb[IPSET_ATTR_CIDR]) {
u8 cidr = nla_get_u8(tb[IPSET_ATTR_CIDR]);
- if (cidr > 32)
+ if (!cidr || cidr > 32)
return -IPSET_ERR_INVALID_CIDR;
ip_set_mask_from_to(ip, ip_to, cidr);
}
@@ -314,14 +319,17 @@ hash_ipportnet4_uadt(struct ip_set *set, struct nlattr *tb[],
}
if (retried)
- ip = h->next.ip;
+ ip = ntohl(h->next.ip);
for (; !before(ip_to, ip); ip++) {
data.ip = htonl(ip);
- p = retried && ip == h->next.ip ? h->next.port : port;
+ p = retried && ip == ntohl(h->next.ip) ? ntohs(h->next.port)
+ : port;
for (; p <= port_to; p++) {
data.port = htons(p);
- ip2 = retried && ip == h->next.ip && p == h->next.port
- ? h->next.ip2 : ip2_from;
+ ip2 = retried
+ && ip == ntohl(h->next.ip)
+ && p == ntohs(h->next.port)
+ ? ntohl(h->next.ip2) : ip2_from;
while (!after(ip2, ip2_to)) {
data.ip2 = htonl(ip2);
ip2_last = ip_set_range_to_cidr(ip2, ip2_to,
@@ -403,10 +411,10 @@ hash_ipportnet6_data_flags(struct hash_ipportnet6_elem *dst, u32 flags)
dst->nomatch = !!(flags & IPSET_FLAG_NOMATCH);
}
-static inline bool
+static inline int
hash_ipportnet6_data_match(const struct hash_ipportnet6_elem *elem)
{
- return !elem->nomatch;
+ return elem->nomatch ? -ENOTEMPTY : 1;
}
static inline void
@@ -486,7 +494,7 @@ static inline void
hash_ipportnet6_data_next(struct ip_set_hash *h,
const struct hash_ipportnet6_elem *d)
{
- h->next.port = ntohs(d->port);
+ h->next.port = d->port;
}
static int
@@ -598,7 +606,7 @@ hash_ipportnet6_uadt(struct ip_set *set, struct nlattr *tb[],
swap(port, port_to);
if (retried)
- port = h->next.port;
+ port = ntohs(h->next.port);
for (; port <= port_to; port++) {
data.port = htons(port);
ret = adtfn(set, &data, timeout, flags);
@@ -689,13 +697,12 @@ hash_ipportnet_create(struct ip_set *set, struct nlattr *tb[], u32 flags)
static struct ip_set_type hash_ipportnet_type __read_mostly = {
.name = "hash:ip,port,net",
.protocol = IPSET_PROTOCOL,
- .features = IPSET_TYPE_IP | IPSET_TYPE_PORT | IPSET_TYPE_IP2,
+ .features = IPSET_TYPE_IP | IPSET_TYPE_PORT | IPSET_TYPE_IP2 |
+ IPSET_TYPE_NOMATCH,
.dimension = IPSET_DIM_THREE,
.family = NFPROTO_UNSPEC,
- .revision_min = 0,
- /* 1 SCTP and UDPLITE support added */
- /* 2 Range as input support for IPv4 added */
- .revision_max = 3, /* nomatch flag support added */
+ .revision_min = REVISION_MIN,
+ .revision_max = REVISION_MAX,
.create = hash_ipportnet_create,
.create_policy = {
[IPSET_ATTR_HASHSIZE] = { .type = NLA_U32 },
diff --git a/net/netfilter/ipset/ip_set_hash_net.c b/net/netfilter/ipset/ip_set_hash_net.c
index c57a6a09906d..29e94b981f3f 100644
--- a/net/netfilter/ipset/ip_set_hash_net.c
+++ b/net/netfilter/ipset/ip_set_hash_net.c
@@ -23,9 +23,13 @@
#include <linux/netfilter/ipset/ip_set_timeout.h>
#include <linux/netfilter/ipset/ip_set_hash.h>
+#define REVISION_MIN 0
+/* 1 Range as input support for IPv4 added */
+#define REVISION_MAX 2 /* nomatch flag support added */
+
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>");
-MODULE_DESCRIPTION("hash:net type of IP sets");
+IP_SET_MODULE_DESC("hash:net", REVISION_MIN, REVISION_MAX);
MODULE_ALIAS("ip_set_hash:net");
/* Type specific function prefix */
@@ -86,10 +90,10 @@ hash_net4_data_flags(struct hash_net4_elem *dst, u32 flags)
dst->nomatch = flags & IPSET_FLAG_NOMATCH;
}
-static inline bool
+static inline int
hash_net4_data_match(const struct hash_net4_elem *elem)
{
- return !elem->nomatch;
+ return elem->nomatch ? -ENOTEMPTY : 1;
}
static inline void
@@ -152,7 +156,7 @@ static inline void
hash_net4_data_next(struct ip_set_hash *h,
const struct hash_net4_elem *d)
{
- h->next.ip = ntohl(d->ip);
+ h->next.ip = d->ip;
}
static int
@@ -235,7 +239,7 @@ hash_net4_uadt(struct ip_set *set, struct nlattr *tb[],
return -IPSET_ERR_HASH_RANGE;
}
if (retried)
- ip = h->next.ip;
+ ip = ntohl(h->next.ip);
while (!after(ip, ip_to)) {
data.ip = htonl(ip);
last = ip_set_range_to_cidr(ip, ip_to, &data.cidr);
@@ -307,10 +311,10 @@ hash_net6_data_flags(struct hash_net6_elem *dst, u32 flags)
dst->nomatch = flags & IPSET_FLAG_NOMATCH;
}
-static inline bool
+static inline int
hash_net6_data_match(const struct hash_net6_elem *elem)
{
- return !elem->nomatch;
+ return elem->nomatch ? -ENOTEMPTY : 1;
}
static inline void
@@ -532,12 +536,11 @@ hash_net_create(struct ip_set *set, struct nlattr *tb[], u32 flags)
static struct ip_set_type hash_net_type __read_mostly = {
.name = "hash:net",
.protocol = IPSET_PROTOCOL,
- .features = IPSET_TYPE_IP,
+ .features = IPSET_TYPE_IP | IPSET_TYPE_NOMATCH,
.dimension = IPSET_DIM_ONE,
.family = NFPROTO_UNSPEC,
- .revision_min = 0,
- /* = 1 Range as input support for IPv4 added */
- .revision_max = 2, /* nomatch flag support added */
+ .revision_min = REVISION_MIN,
+ .revision_max = REVISION_MAX,
.create = hash_net_create,
.create_policy = {
[IPSET_ATTR_HASHSIZE] = { .type = NLA_U32 },
diff --git a/net/netfilter/ipset/ip_set_hash_netiface.c b/net/netfilter/ipset/ip_set_hash_netiface.c
index d5d3607ae7bc..b9a63381e349 100644
--- a/net/netfilter/ipset/ip_set_hash_netiface.c
+++ b/net/netfilter/ipset/ip_set_hash_netiface.c
@@ -24,9 +24,13 @@
#include <linux/netfilter/ipset/ip_set_timeout.h>
#include <linux/netfilter/ipset/ip_set_hash.h>
+#define REVISION_MIN 0
+/* 1 nomatch flag support added */
+#define REVISION_MAX 2 /* /0 support added */
+
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>");
-MODULE_DESCRIPTION("hash:net,iface type of IP sets");
+IP_SET_MODULE_DESC("hash:net,iface", REVISION_MIN, REVISION_MAX);
MODULE_ALIAS("ip_set_hash:net,iface");
/* Interface name rbtree */
@@ -140,7 +144,7 @@ struct hash_netiface4_elem_hashed {
u8 physdev;
u8 cidr;
u8 nomatch;
- u8 padding;
+ u8 elem;
};
#define HKEY_DATALEN sizeof(struct hash_netiface4_elem_hashed)
@@ -151,7 +155,7 @@ struct hash_netiface4_elem {
u8 physdev;
u8 cidr;
u8 nomatch;
- u8 padding;
+ u8 elem;
const char *iface;
};
@@ -161,7 +165,7 @@ struct hash_netiface4_telem {
u8 physdev;
u8 cidr;
u8 nomatch;
- u8 padding;
+ u8 elem;
const char *iface;
unsigned long timeout;
};
@@ -181,18 +185,14 @@ hash_netiface4_data_equal(const struct hash_netiface4_elem *ip1,
static inline bool
hash_netiface4_data_isnull(const struct hash_netiface4_elem *elem)
{
- return elem->cidr == 0;
+ return elem->elem == 0;
}
static inline void
hash_netiface4_data_copy(struct hash_netiface4_elem *dst,
const struct hash_netiface4_elem *src)
{
- dst->ip = src->ip;
- dst->cidr = src->cidr;
- dst->physdev = src->physdev;
- dst->iface = src->iface;
- dst->nomatch = src->nomatch;
+ memcpy(dst, src, sizeof(*dst));
}
static inline void
@@ -201,10 +201,10 @@ hash_netiface4_data_flags(struct hash_netiface4_elem *dst, u32 flags)
dst->nomatch = flags & IPSET_FLAG_NOMATCH;
}
-static inline bool
+static inline int
hash_netiface4_data_match(const struct hash_netiface4_elem *elem)
{
- return !elem->nomatch;
+ return elem->nomatch ? -ENOTEMPTY : 1;
}
static inline void
@@ -217,7 +217,7 @@ hash_netiface4_data_netmask(struct hash_netiface4_elem *elem, u8 cidr)
static inline void
hash_netiface4_data_zero_out(struct hash_netiface4_elem *elem)
{
- elem->cidr = 0;
+ elem->elem = 0;
}
static bool
@@ -277,7 +277,7 @@ static inline void
hash_netiface4_data_next(struct ip_set_hash *h,
const struct hash_netiface4_elem *d)
{
- h->next.ip = ntohl(d->ip);
+ h->next.ip = d->ip;
}
static int
@@ -288,7 +288,8 @@ hash_netiface4_kadt(struct ip_set *set, const struct sk_buff *skb,
struct ip_set_hash *h = set->data;
ipset_adtfn adtfn = set->variant->adt[adt];
struct hash_netiface4_elem data = {
- .cidr = h->nets[0].cidr ? h->nets[0].cidr : HOST_MASK
+ .cidr = h->nets[0].cidr ? h->nets[0].cidr : HOST_MASK,
+ .elem = 1,
};
int ret;
@@ -339,7 +340,7 @@ hash_netiface4_uadt(struct ip_set *set, struct nlattr *tb[],
{
struct ip_set_hash *h = set->data;
ipset_adtfn adtfn = set->variant->adt[adt];
- struct hash_netiface4_elem data = { .cidr = HOST_MASK };
+ struct hash_netiface4_elem data = { .cidr = HOST_MASK, .elem = 1 };
u32 ip = 0, ip_to, last;
u32 timeout = h->timeout;
char iface[IFNAMSIZ];
@@ -360,7 +361,7 @@ hash_netiface4_uadt(struct ip_set *set, struct nlattr *tb[],
if (tb[IPSET_ATTR_CIDR]) {
data.cidr = nla_get_u8(tb[IPSET_ATTR_CIDR]);
- if (!data.cidr || data.cidr > HOST_MASK)
+ if (data.cidr > HOST_MASK)
return -IPSET_ERR_INVALID_CIDR;
}
@@ -389,7 +390,6 @@ hash_netiface4_uadt(struct ip_set *set, struct nlattr *tb[],
if (adt == IPSET_ADD && (cadt_flags & IPSET_FLAG_NOMATCH))
flags |= (cadt_flags << 16);
}
-
if (adt == IPSET_TEST || !tb[IPSET_ATTR_IP_TO]) {
data.ip = htonl(ip & ip_set_hostmask(data.cidr));
ret = adtfn(set, &data, timeout, flags);
@@ -409,7 +409,7 @@ hash_netiface4_uadt(struct ip_set *set, struct nlattr *tb[],
}
if (retried)
- ip = h->next.ip;
+ ip = ntohl(h->next.ip);
while (!after(ip, ip_to)) {
data.ip = htonl(ip);
last = ip_set_range_to_cidr(ip, ip_to, &data.cidr);
@@ -442,7 +442,7 @@ struct hash_netiface6_elem_hashed {
u8 physdev;
u8 cidr;
u8 nomatch;
- u8 padding;
+ u8 elem;
};
#define HKEY_DATALEN sizeof(struct hash_netiface6_elem_hashed)
@@ -452,7 +452,7 @@ struct hash_netiface6_elem {
u8 physdev;
u8 cidr;
u8 nomatch;
- u8 padding;
+ u8 elem;
const char *iface;
};
@@ -461,7 +461,7 @@ struct hash_netiface6_telem {
u8 physdev;
u8 cidr;
u8 nomatch;
- u8 padding;
+ u8 elem;
const char *iface;
unsigned long timeout;
};
@@ -481,7 +481,7 @@ hash_netiface6_data_equal(const struct hash_netiface6_elem *ip1,
static inline bool
hash_netiface6_data_isnull(const struct hash_netiface6_elem *elem)
{
- return elem->cidr == 0;
+ return elem->elem == 0;
}
static inline void
@@ -497,16 +497,16 @@ hash_netiface6_data_flags(struct hash_netiface6_elem *dst, u32 flags)
dst->nomatch = flags & IPSET_FLAG_NOMATCH;
}
-static inline bool
+static inline int
hash_netiface6_data_match(const struct hash_netiface6_elem *elem)
{
- return !elem->nomatch;
+ return elem->nomatch ? -ENOTEMPTY : 1;
}
static inline void
hash_netiface6_data_zero_out(struct hash_netiface6_elem *elem)
{
- elem->cidr = 0;
+ elem->elem = 0;
}
static inline void
@@ -590,7 +590,8 @@ hash_netiface6_kadt(struct ip_set *set, const struct sk_buff *skb,
struct ip_set_hash *h = set->data;
ipset_adtfn adtfn = set->variant->adt[adt];
struct hash_netiface6_elem data = {
- .cidr = h->nets[0].cidr ? h->nets[0].cidr : HOST_MASK
+ .cidr = h->nets[0].cidr ? h->nets[0].cidr : HOST_MASK,
+ .elem = 1,
};
int ret;
@@ -637,7 +638,7 @@ hash_netiface6_uadt(struct ip_set *set, struct nlattr *tb[],
{
struct ip_set_hash *h = set->data;
ipset_adtfn adtfn = set->variant->adt[adt];
- struct hash_netiface6_elem data = { .cidr = HOST_MASK };
+ struct hash_netiface6_elem data = { .cidr = HOST_MASK, .elem = 1 };
u32 timeout = h->timeout;
char iface[IFNAMSIZ];
int ret;
@@ -659,7 +660,7 @@ hash_netiface6_uadt(struct ip_set *set, struct nlattr *tb[],
if (tb[IPSET_ATTR_CIDR])
data.cidr = nla_get_u8(tb[IPSET_ATTR_CIDR]);
- if (!data.cidr || data.cidr > HOST_MASK)
+ if (data.cidr > HOST_MASK)
return -IPSET_ERR_INVALID_CIDR;
ip6_netmask(&data.ip, data.cidr);
@@ -773,11 +774,12 @@ hash_netiface_create(struct ip_set *set, struct nlattr *tb[], u32 flags)
static struct ip_set_type hash_netiface_type __read_mostly = {
.name = "hash:net,iface",
.protocol = IPSET_PROTOCOL,
- .features = IPSET_TYPE_IP | IPSET_TYPE_IFACE,
+ .features = IPSET_TYPE_IP | IPSET_TYPE_IFACE |
+ IPSET_TYPE_NOMATCH,
.dimension = IPSET_DIM_TWO,
.family = NFPROTO_UNSPEC,
- .revision_min = 0,
- .revision_max = 1, /* nomatch flag support added */
+ .revision_min = REVISION_MIN,
+ .revision_max = REVISION_MAX,
.create = hash_netiface_create,
.create_policy = {
[IPSET_ATTR_HASHSIZE] = { .type = NLA_U32 },
diff --git a/net/netfilter/ipset/ip_set_hash_netport.c b/net/netfilter/ipset/ip_set_hash_netport.c
index fc3143a2d41b..7ef700de596c 100644
--- a/net/netfilter/ipset/ip_set_hash_netport.c
+++ b/net/netfilter/ipset/ip_set_hash_netport.c
@@ -24,9 +24,14 @@
#include <linux/netfilter/ipset/ip_set_getport.h>
#include <linux/netfilter/ipset/ip_set_hash.h>
+#define REVISION_MIN 0
+/* 1 SCTP and UDPLITE support added */
+/* 2 Range as input support for IPv4 added */
+#define REVISION_MAX 3 /* nomatch flag support added */
+
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>");
-MODULE_DESCRIPTION("hash:net,port type of IP sets");
+IP_SET_MODULE_DESC("hash:net,port", REVISION_MIN, REVISION_MAX);
MODULE_ALIAS("ip_set_hash:net,port");
/* Type specific function prefix */
@@ -99,10 +104,10 @@ hash_netport4_data_flags(struct hash_netport4_elem *dst, u32 flags)
dst->nomatch = !!(flags & IPSET_FLAG_NOMATCH);
}
-static inline bool
+static inline int
hash_netport4_data_match(const struct hash_netport4_elem *elem)
{
- return !elem->nomatch;
+ return elem->nomatch ? -ENOTEMPTY : 1;
}
static inline void
@@ -171,8 +176,8 @@ static inline void
hash_netport4_data_next(struct ip_set_hash *h,
const struct hash_netport4_elem *d)
{
- h->next.ip = ntohl(d->ip);
- h->next.port = ntohs(d->port);
+ h->next.ip = d->ip;
+ h->next.port = d->port;
}
static int
@@ -289,12 +294,13 @@ hash_netport4_uadt(struct ip_set *set, struct nlattr *tb[],
}
if (retried)
- ip = h->next.ip;
+ ip = ntohl(h->next.ip);
while (!after(ip, ip_to)) {
data.ip = htonl(ip);
last = ip_set_range_to_cidr(ip, ip_to, &cidr);
data.cidr = cidr - 1;
- p = retried && ip == h->next.ip ? h->next.port : port;
+ p = retried && ip == ntohl(h->next.ip) ? ntohs(h->next.port)
+ : port;
for (; p <= port_to; p++) {
data.port = htons(p);
ret = adtfn(set, &data, timeout, flags);
@@ -369,10 +375,10 @@ hash_netport6_data_flags(struct hash_netport6_elem *dst, u32 flags)
dst->nomatch = !!(flags & IPSET_FLAG_NOMATCH);
}
-static inline bool
+static inline int
hash_netport6_data_match(const struct hash_netport6_elem *elem)
{
- return !elem->nomatch;
+ return elem->nomatch ? -ENOTEMPTY : 1;
}
static inline void
@@ -450,7 +456,7 @@ static inline void
hash_netport6_data_next(struct ip_set_hash *h,
const struct hash_netport6_elem *d)
{
- h->next.port = ntohs(d->port);
+ h->next.port = d->port;
}
static int
@@ -554,7 +560,7 @@ hash_netport6_uadt(struct ip_set *set, struct nlattr *tb[],
swap(port, port_to);
if (retried)
- port = h->next.port;
+ port = ntohs(h->next.port);
for (; port <= port_to; port++) {
data.port = htons(port);
ret = adtfn(set, &data, timeout, flags);
@@ -644,13 +650,11 @@ hash_netport_create(struct ip_set *set, struct nlattr *tb[], u32 flags)
static struct ip_set_type hash_netport_type __read_mostly = {
.name = "hash:net,port",
.protocol = IPSET_PROTOCOL,
- .features = IPSET_TYPE_IP | IPSET_TYPE_PORT,
+ .features = IPSET_TYPE_IP | IPSET_TYPE_PORT | IPSET_TYPE_NOMATCH,
.dimension = IPSET_DIM_TWO,
.family = NFPROTO_UNSPEC,
- .revision_min = 0,
- /* 1 SCTP and UDPLITE support added */
- /* 2, Range as input support for IPv4 added */
- .revision_max = 3, /* nomatch flag support added */
+ .revision_min = REVISION_MIN,
+ .revision_max = REVISION_MAX,
.create = hash_netport_create,
.create_policy = {
[IPSET_ATTR_HASHSIZE] = { .type = NLA_U32 },
diff --git a/net/netfilter/ipset/ip_set_list_set.c b/net/netfilter/ipset/ip_set_list_set.c
index 6cb1225765f9..8371c2bac2e4 100644
--- a/net/netfilter/ipset/ip_set_list_set.c
+++ b/net/netfilter/ipset/ip_set_list_set.c
@@ -16,9 +16,12 @@
#include <linux/netfilter/ipset/ip_set_timeout.h>
#include <linux/netfilter/ipset/ip_set_list.h>
+#define REVISION_MIN 0
+#define REVISION_MAX 0
+
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>");
-MODULE_DESCRIPTION("list:set type of IP sets");
+IP_SET_MODULE_DESC("list:set", REVISION_MIN, REVISION_MAX);
MODULE_ALIAS("ip_set_list:set");
/* Member elements without and with timeout */
@@ -579,8 +582,8 @@ static struct ip_set_type list_set_type __read_mostly = {
.features = IPSET_TYPE_NAME | IPSET_DUMP_LAST,
.dimension = IPSET_DIM_ONE,
.family = NFPROTO_UNSPEC,
- .revision_min = 0,
- .revision_max = 0,
+ .revision_min = REVISION_MIN,
+ .revision_max = REVISION_MAX,
.create = list_set_create,
.create_policy = {
[IPSET_ATTR_SIZE] = { .type = NLA_U32 },
diff --git a/net/netfilter/ipvs/Kconfig b/net/netfilter/ipvs/Kconfig
index f9871385a65e..8b2cffdfdd99 100644
--- a/net/netfilter/ipvs/Kconfig
+++ b/net/netfilter/ipvs/Kconfig
@@ -250,7 +250,8 @@ comment 'IPVS application helper'
config IP_VS_FTP
tristate "FTP protocol helper"
- depends on IP_VS_PROTO_TCP && NF_CONNTRACK && NF_NAT
+ depends on IP_VS_PROTO_TCP && NF_CONNTRACK && NF_NAT && \
+ NF_CONNTRACK_FTP
select IP_VS_NFCT
---help---
FTP is a protocol that transfers IP address and/or port number in
diff --git a/net/netfilter/ipvs/ip_vs_app.c b/net/netfilter/ipvs/ip_vs_app.c
index 64f9e8f13207..9713e6e86d47 100644
--- a/net/netfilter/ipvs/ip_vs_app.c
+++ b/net/netfilter/ipvs/ip_vs_app.c
@@ -180,22 +180,38 @@ register_ip_vs_app_inc(struct net *net, struct ip_vs_app *app, __u16 proto,
}
-/*
- * ip_vs_app registration routine
- */
-int register_ip_vs_app(struct net *net, struct ip_vs_app *app)
+/* Register application for netns */
+struct ip_vs_app *register_ip_vs_app(struct net *net, struct ip_vs_app *app)
{
struct netns_ipvs *ipvs = net_ipvs(net);
- /* increase the module use count */
- ip_vs_use_count_inc();
+ struct ip_vs_app *a;
+ int err = 0;
+
+ if (!ipvs)
+ return ERR_PTR(-ENOENT);
mutex_lock(&__ip_vs_app_mutex);
- list_add(&app->a_list, &ipvs->app_list);
+ list_for_each_entry(a, &ipvs->app_list, a_list) {
+ if (!strcmp(app->name, a->name)) {
+ err = -EEXIST;
+ goto out_unlock;
+ }
+ }
+ a = kmemdup(app, sizeof(*app), GFP_KERNEL);
+ if (!a) {
+ err = -ENOMEM;
+ goto out_unlock;
+ }
+ INIT_LIST_HEAD(&a->incs_list);
+ list_add(&a->a_list, &ipvs->app_list);
+ /* increase the module use count */
+ ip_vs_use_count_inc();
+out_unlock:
mutex_unlock(&__ip_vs_app_mutex);
- return 0;
+ return err ? ERR_PTR(err) : a;
}
@@ -205,20 +221,29 @@ int register_ip_vs_app(struct net *net, struct ip_vs_app *app)
*/
void unregister_ip_vs_app(struct net *net, struct ip_vs_app *app)
{
- struct ip_vs_app *inc, *nxt;
+ struct netns_ipvs *ipvs = net_ipvs(net);
+ struct ip_vs_app *a, *anxt, *inc, *nxt;
+
+ if (!ipvs)
+ return;
mutex_lock(&__ip_vs_app_mutex);
- list_for_each_entry_safe(inc, nxt, &app->incs_list, a_list) {
- ip_vs_app_inc_release(net, inc);
- }
+ list_for_each_entry_safe(a, anxt, &ipvs->app_list, a_list) {
+ if (app && strcmp(app->name, a->name))
+ continue;
+ list_for_each_entry_safe(inc, nxt, &a->incs_list, a_list) {
+ ip_vs_app_inc_release(net, inc);
+ }
- list_del(&app->a_list);
+ list_del(&a->a_list);
+ kfree(a);
- mutex_unlock(&__ip_vs_app_mutex);
+ /* decrease the module use count */
+ ip_vs_use_count_dec();
+ }
- /* decrease the module use count */
- ip_vs_use_count_dec();
+ mutex_unlock(&__ip_vs_app_mutex);
}
@@ -586,5 +611,6 @@ int __net_init ip_vs_app_net_init(struct net *net)
void __net_exit ip_vs_app_net_cleanup(struct net *net)
{
+ unregister_ip_vs_app(net, NULL /* all */);
proc_net_remove(net, "ip_vs_app");
}
diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c
index b54eccef40b5..58918e20f9d5 100644
--- a/net/netfilter/ipvs/ip_vs_core.c
+++ b/net/netfilter/ipvs/ip_vs_core.c
@@ -1303,7 +1303,8 @@ ip_vs_in_icmp(struct sk_buff *skb, int *related, unsigned int hooknum)
struct ip_vs_conn *cp;
struct ip_vs_protocol *pp;
struct ip_vs_proto_data *pd;
- unsigned int offset, ihl, verdict;
+ unsigned int offset, offset2, ihl, verdict;
+ bool ipip;
*related = 1;
@@ -1345,6 +1346,21 @@ ip_vs_in_icmp(struct sk_buff *skb, int *related, unsigned int hooknum)
net = skb_net(skb);
+ /* Special case for errors for IPIP packets */
+ ipip = false;
+ if (cih->protocol == IPPROTO_IPIP) {
+ if (unlikely(cih->frag_off & htons(IP_OFFSET)))
+ return NF_ACCEPT;
+ /* Error for our IPIP must arrive at LOCAL_IN */
+ if (!(skb_rtable(skb)->rt_flags & RTCF_LOCAL))
+ return NF_ACCEPT;
+ offset += cih->ihl * 4;
+ cih = skb_header_pointer(skb, offset, sizeof(_ciph), &_ciph);
+ if (cih == NULL)
+ return NF_ACCEPT; /* The packet looks wrong, ignore */
+ ipip = true;
+ }
+
pd = ip_vs_proto_data_get(net, cih->protocol);
if (!pd)
return NF_ACCEPT;
@@ -1358,11 +1374,14 @@ ip_vs_in_icmp(struct sk_buff *skb, int *related, unsigned int hooknum)
IP_VS_DBG_PKT(11, AF_INET, pp, skb, offset,
"Checking incoming ICMP for");
+ offset2 = offset;
offset += cih->ihl * 4;
ip_vs_fill_iphdr(AF_INET, cih, &ciph);
- /* The embedded headers contain source and dest in reverse order */
- cp = pp->conn_in_get(AF_INET, skb, &ciph, offset, 1);
+ /* The embedded headers contain source and dest in reverse order.
+ * For IPIP this is error for request, not for reply.
+ */
+ cp = pp->conn_in_get(AF_INET, skb, &ciph, offset, ipip ? 0 : 1);
if (!cp)
return NF_ACCEPT;
@@ -1376,6 +1395,57 @@ ip_vs_in_icmp(struct sk_buff *skb, int *related, unsigned int hooknum)
goto out;
}
+ if (ipip) {
+ __be32 info = ic->un.gateway;
+
+ /* Update the MTU */
+ if (ic->type == ICMP_DEST_UNREACH &&
+ ic->code == ICMP_FRAG_NEEDED) {
+ struct ip_vs_dest *dest = cp->dest;
+ u32 mtu = ntohs(ic->un.frag.mtu);
+
+ /* Strip outer IP and ICMP, go to IPIP header */
+ __skb_pull(skb, ihl + sizeof(_icmph));
+ offset2 -= ihl + sizeof(_icmph);
+ skb_reset_network_header(skb);
+ IP_VS_DBG(12, "ICMP for IPIP %pI4->%pI4: mtu=%u\n",
+ &ip_hdr(skb)->saddr, &ip_hdr(skb)->daddr, mtu);
+ rcu_read_lock();
+ ipv4_update_pmtu(skb, dev_net(skb->dev),
+ mtu, 0, 0, 0, 0);
+ rcu_read_unlock();
+ /* Client uses PMTUD? */
+ if (!(cih->frag_off & htons(IP_DF)))
+ goto ignore_ipip;
+ /* Prefer the resulting PMTU */
+ if (dest) {
+ spin_lock(&dest->dst_lock);
+ if (dest->dst_cache)
+ mtu = dst_mtu(dest->dst_cache);
+ spin_unlock(&dest->dst_lock);
+ }
+ if (mtu > 68 + sizeof(struct iphdr))
+ mtu -= sizeof(struct iphdr);
+ info = htonl(mtu);
+ }
+ /* Strip outer IP, ICMP and IPIP, go to IP header of
+ * original request.
+ */
+ __skb_pull(skb, offset2);
+ skb_reset_network_header(skb);
+ IP_VS_DBG(12, "Sending ICMP for %pI4->%pI4: t=%u, c=%u, i=%u\n",
+ &ip_hdr(skb)->saddr, &ip_hdr(skb)->daddr,
+ ic->type, ic->code, ntohl(info));
+ icmp_send(skb, ic->type, ic->code, info);
+ /* ICMP can be shorter but anyways, account it */
+ ip_vs_out_stats(cp, skb);
+
+ignore_ipip:
+ consume_skb(skb);
+ verdict = NF_STOLEN;
+ goto out;
+ }
+
/* do the statistics and put it back */
ip_vs_in_stats(cp, skb);
if (IPPROTO_TCP == cih->protocol || IPPROTO_UDP == cih->protocol)
diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c
index 84444dda194b..7e7198b51c06 100644
--- a/net/netfilter/ipvs/ip_vs_ctl.c
+++ b/net/netfilter/ipvs/ip_vs_ctl.c
@@ -539,8 +539,7 @@ static int ip_vs_rs_unhash(struct ip_vs_dest *dest)
* Remove it from the rs_table table.
*/
if (!list_empty(&dest->d_list)) {
- list_del(&dest->d_list);
- INIT_LIST_HEAD(&dest->d_list);
+ list_del_init(&dest->d_list);
}
return 1;
@@ -1171,8 +1170,10 @@ ip_vs_add_service(struct net *net, struct ip_vs_service_user_kern *u,
goto out_err;
}
svc->stats.cpustats = alloc_percpu(struct ip_vs_cpu_stats);
- if (!svc->stats.cpustats)
+ if (!svc->stats.cpustats) {
+ ret = -ENOMEM;
goto out_err;
+ }
/* I'm the first user of the service */
atomic_set(&svc->usecnt, 0);
@@ -1801,6 +1802,12 @@ static struct ctl_table vs_vars[] = {
.mode = 0644,
.proc_handler = proc_dointvec,
},
+ {
+ .procname = "pmtu_disc",
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec,
+ },
#ifdef CONFIG_IP_VS_DEBUG
{
.procname = "debug_level",
@@ -2759,6 +2766,7 @@ do_ip_vs_get_ctl(struct sock *sk, int cmd, void __user *user, int *len)
{
struct ip_vs_timeout_user t;
+ memset(&t, 0, sizeof(t));
__ip_vs_get_timeouts(net, &t);
if (copy_to_user(user, &t, sizeof(t)) != 0)
ret = -EFAULT;
@@ -2930,7 +2938,7 @@ static int ip_vs_genl_dump_service(struct sk_buff *skb,
{
void *hdr;
- hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).pid, cb->nlh->nlmsg_seq,
+ hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq,
&ip_vs_genl_family, NLM_F_MULTI,
IPVS_CMD_NEW_SERVICE);
if (!hdr)
@@ -3119,7 +3127,7 @@ static int ip_vs_genl_dump_dest(struct sk_buff *skb, struct ip_vs_dest *dest,
{
void *hdr;
- hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).pid, cb->nlh->nlmsg_seq,
+ hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq,
&ip_vs_genl_family, NLM_F_MULTI,
IPVS_CMD_NEW_DEST);
if (!hdr)
@@ -3248,7 +3256,7 @@ static int ip_vs_genl_dump_daemon(struct sk_buff *skb, __be32 state,
struct netlink_callback *cb)
{
void *hdr;
- hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).pid, cb->nlh->nlmsg_seq,
+ hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq,
&ip_vs_genl_family, NLM_F_MULTI,
IPVS_CMD_NEW_DAEMON);
if (!hdr)
@@ -3675,7 +3683,7 @@ static void ip_vs_genl_unregister(void)
* per netns intit/exit func.
*/
#ifdef CONFIG_SYSCTL
-int __net_init ip_vs_control_net_init_sysctl(struct net *net)
+static int __net_init ip_vs_control_net_init_sysctl(struct net *net)
{
int idx;
struct netns_ipvs *ipvs = net_ipvs(net);
@@ -3726,6 +3734,8 @@ int __net_init ip_vs_control_net_init_sysctl(struct net *net)
ipvs->sysctl_sync_retries = clamp_t(int, DEFAULT_SYNC_RETRIES, 0, 3);
tbl[idx++].data = &ipvs->sysctl_sync_retries;
tbl[idx++].data = &ipvs->sysctl_nat_icmp_send;
+ ipvs->sysctl_pmtu_disc = 1;
+ tbl[idx++].data = &ipvs->sysctl_pmtu_disc;
ipvs->sysctl_hdr = register_net_sysctl(net, "net/ipv4/vs", tbl);
@@ -3743,7 +3753,7 @@ int __net_init ip_vs_control_net_init_sysctl(struct net *net)
return 0;
}
-void __net_exit ip_vs_control_net_cleanup_sysctl(struct net *net)
+static void __net_exit ip_vs_control_net_cleanup_sysctl(struct net *net)
{
struct netns_ipvs *ipvs = net_ipvs(net);
@@ -3754,8 +3764,8 @@ void __net_exit ip_vs_control_net_cleanup_sysctl(struct net *net)
#else
-int __net_init ip_vs_control_net_init_sysctl(struct net *net) { return 0; }
-void __net_exit ip_vs_control_net_cleanup_sysctl(struct net *net) { }
+static int __net_init ip_vs_control_net_init_sysctl(struct net *net) { return 0; }
+static void __net_exit ip_vs_control_net_cleanup_sysctl(struct net *net) { }
#endif
diff --git a/net/netfilter/ipvs/ip_vs_ftp.c b/net/netfilter/ipvs/ip_vs_ftp.c
index b20b29c903ef..4f53a5f04437 100644
--- a/net/netfilter/ipvs/ip_vs_ftp.c
+++ b/net/netfilter/ipvs/ip_vs_ftp.c
@@ -268,6 +268,7 @@ static int ip_vs_ftp_out(struct ip_vs_app *app, struct ip_vs_conn *cp,
* packet.
*/
ret = nf_nat_mangle_tcp_packet(skb, ct, ctinfo,
+ iph->ihl * 4,
start-data, end-start,
buf, buf_len);
if (ret) {
@@ -441,16 +442,10 @@ static int __net_init __ip_vs_ftp_init(struct net *net)
if (!ipvs)
return -ENOENT;
- app = kmemdup(&ip_vs_ftp, sizeof(struct ip_vs_app), GFP_KERNEL);
- if (!app)
- return -ENOMEM;
- INIT_LIST_HEAD(&app->a_list);
- INIT_LIST_HEAD(&app->incs_list);
- ipvs->ftp_app = app;
- ret = register_ip_vs_app(net, app);
- if (ret)
- goto err_exit;
+ app = register_ip_vs_app(net, &ip_vs_ftp);
+ if (IS_ERR(app))
+ return PTR_ERR(app);
for (i = 0; i < ports_count; i++) {
if (!ports[i])
@@ -464,9 +459,7 @@ static int __net_init __ip_vs_ftp_init(struct net *net)
return 0;
err_unreg:
- unregister_ip_vs_app(net, app);
-err_exit:
- kfree(ipvs->ftp_app);
+ unregister_ip_vs_app(net, &ip_vs_ftp);
return ret;
}
/*
@@ -474,10 +467,7 @@ err_exit:
*/
static void __ip_vs_ftp_exit(struct net *net)
{
- struct netns_ipvs *ipvs = net_ipvs(net);
-
- unregister_ip_vs_app(net, ipvs->ftp_app);
- kfree(ipvs->ftp_app);
+ unregister_ip_vs_app(net, &ip_vs_ftp);
}
static struct pernet_operations ip_vs_ftp_ops = {
diff --git a/net/netfilter/ipvs/ip_vs_xmit.c b/net/netfilter/ipvs/ip_vs_xmit.c
index 65b616ae1716..56f6d5d81a77 100644
--- a/net/netfilter/ipvs/ip_vs_xmit.c
+++ b/net/netfilter/ipvs/ip_vs_xmit.c
@@ -49,6 +49,7 @@ enum {
IP_VS_RT_MODE_RDR = 4, /* Allow redirect from remote daddr to
* local
*/
+ IP_VS_RT_MODE_CONNECT = 8, /* Always bind route to saddr */
};
/*
@@ -84,6 +85,58 @@ __ip_vs_dst_check(struct ip_vs_dest *dest, u32 rtos)
return dst;
}
+static inline bool
+__mtu_check_toobig_v6(const struct sk_buff *skb, u32 mtu)
+{
+ if (IP6CB(skb)->frag_max_size) {
+ /* frag_max_size tell us that, this packet have been
+ * defragmented by netfilter IPv6 conntrack module.
+ */
+ if (IP6CB(skb)->frag_max_size > mtu)
+ return true; /* largest fragment violate MTU */
+ }
+ else if (skb->len > mtu && !skb_is_gso(skb)) {
+ return true; /* Packet size violate MTU size */
+ }
+ return false;
+}
+
+/* Get route to daddr, update *saddr, optionally bind route to saddr */
+static struct rtable *do_output_route4(struct net *net, __be32 daddr,
+ u32 rtos, int rt_mode, __be32 *saddr)
+{
+ struct flowi4 fl4;
+ struct rtable *rt;
+ int loop = 0;
+
+ memset(&fl4, 0, sizeof(fl4));
+ fl4.daddr = daddr;
+ fl4.saddr = (rt_mode & IP_VS_RT_MODE_CONNECT) ? *saddr : 0;
+ fl4.flowi4_tos = rtos;
+
+retry:
+ rt = ip_route_output_key(net, &fl4);
+ if (IS_ERR(rt)) {
+ /* Invalid saddr ? */
+ if (PTR_ERR(rt) == -EINVAL && *saddr &&
+ rt_mode & IP_VS_RT_MODE_CONNECT && !loop) {
+ *saddr = 0;
+ flowi4_update_output(&fl4, 0, rtos, daddr, 0);
+ goto retry;
+ }
+ IP_VS_DBG_RL("ip_route_output error, dest: %pI4\n", &daddr);
+ return NULL;
+ } else if (!*saddr && rt_mode & IP_VS_RT_MODE_CONNECT && fl4.saddr) {
+ ip_rt_put(rt);
+ *saddr = fl4.saddr;
+ flowi4_update_output(&fl4, 0, rtos, daddr, fl4.saddr);
+ loop++;
+ goto retry;
+ }
+ *saddr = fl4.saddr;
+ return rt;
+}
+
/* Get route to destination or remote server */
static struct rtable *
__ip_vs_get_out_rt(struct sk_buff *skb, struct ip_vs_dest *dest,
@@ -98,20 +151,13 @@ __ip_vs_get_out_rt(struct sk_buff *skb, struct ip_vs_dest *dest,
spin_lock(&dest->dst_lock);
if (!(rt = (struct rtable *)
__ip_vs_dst_check(dest, rtos))) {
- struct flowi4 fl4;
-
- memset(&fl4, 0, sizeof(fl4));
- fl4.daddr = dest->addr.ip;
- fl4.flowi4_tos = rtos;
- rt = ip_route_output_key(net, &fl4);
- if (IS_ERR(rt)) {
+ rt = do_output_route4(net, dest->addr.ip, rtos,
+ rt_mode, &dest->dst_saddr.ip);
+ if (!rt) {
spin_unlock(&dest->dst_lock);
- IP_VS_DBG_RL("ip_route_output error, dest: %pI4\n",
- &dest->addr.ip);
return NULL;
}
__ip_vs_dst_set(dest, rtos, dst_clone(&rt->dst), 0);
- dest->dst_saddr.ip = fl4.saddr;
IP_VS_DBG(10, "new dst %pI4, src %pI4, refcnt=%d, "
"rtos=%X\n",
&dest->addr.ip, &dest->dst_saddr.ip,
@@ -122,19 +168,17 @@ __ip_vs_get_out_rt(struct sk_buff *skb, struct ip_vs_dest *dest,
*ret_saddr = dest->dst_saddr.ip;
spin_unlock(&dest->dst_lock);
} else {
- struct flowi4 fl4;
+ __be32 saddr = htonl(INADDR_ANY);
- memset(&fl4, 0, sizeof(fl4));
- fl4.daddr = daddr;
- fl4.flowi4_tos = rtos;
- rt = ip_route_output_key(net, &fl4);
- if (IS_ERR(rt)) {
- IP_VS_DBG_RL("ip_route_output error, dest: %pI4\n",
- &daddr);
+ /* For such unconfigured boxes avoid many route lookups
+ * for performance reasons because we do not remember saddr
+ */
+ rt_mode &= ~IP_VS_RT_MODE_CONNECT;
+ rt = do_output_route4(net, daddr, rtos, rt_mode, &saddr);
+ if (!rt)
return NULL;
- }
if (ret_saddr)
- *ret_saddr = fl4.saddr;
+ *ret_saddr = saddr;
}
local = rt->rt_flags & RTCF_LOCAL;
@@ -331,6 +375,7 @@ ip_vs_dst_reset(struct ip_vs_dest *dest)
old_dst = dest->dst_cache;
dest->dst_cache = NULL;
dst_release(old_dst);
+ dest->dst_saddr.ip = 0;
}
#define IP_VS_XMIT_TUNNEL(skb, cp) \
@@ -462,7 +507,7 @@ ip_vs_bypass_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
/* MTU checking */
mtu = dst_mtu(&rt->dst);
- if (skb->len > mtu && !skb_is_gso(skb)) {
+ if (__mtu_check_toobig_v6(skb, mtu)) {
if (!skb->dev) {
struct net *net = dev_net(skb_dst(skb)->dev);
@@ -683,7 +728,7 @@ ip_vs_nat_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
/* MTU checking */
mtu = dst_mtu(&rt->dst);
- if (skb->len > mtu && !skb_is_gso(skb)) {
+ if (__mtu_check_toobig_v6(skb, mtu)) {
if (!skb->dev) {
struct net *net = dev_net(skb_dst(skb)->dev);
@@ -766,12 +811,13 @@ int
ip_vs_tunnel_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
struct ip_vs_protocol *pp)
{
+ struct netns_ipvs *ipvs = net_ipvs(skb_net(skb));
struct rtable *rt; /* Route to the other host */
__be32 saddr; /* Source for tunnel */
struct net_device *tdev; /* Device to other host */
struct iphdr *old_iph = ip_hdr(skb);
u8 tos = old_iph->tos;
- __be16 df = old_iph->frag_off;
+ __be16 df;
struct iphdr *iph; /* Our new IP header */
unsigned int max_headroom; /* The extra header space needed */
int mtu;
@@ -781,7 +827,8 @@ ip_vs_tunnel_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
if (!(rt = __ip_vs_get_out_rt(skb, cp->dest, cp->daddr.ip,
RT_TOS(tos), IP_VS_RT_MODE_LOCAL |
- IP_VS_RT_MODE_NON_LOCAL,
+ IP_VS_RT_MODE_NON_LOCAL |
+ IP_VS_RT_MODE_CONNECT,
&saddr)))
goto tx_error_icmp;
if (rt->rt_flags & RTCF_LOCAL) {
@@ -796,13 +843,13 @@ ip_vs_tunnel_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
IP_VS_DBG_RL("%s(): mtu less than 68\n", __func__);
goto tx_error_put;
}
- if (skb_dst(skb))
+ if (rt_is_output_route(skb_rtable(skb)))
skb_dst(skb)->ops->update_pmtu(skb_dst(skb), NULL, skb, mtu);
- df |= (old_iph->frag_off & htons(IP_DF));
+ /* Copy DF, reset fragment offset and MF */
+ df = sysctl_pmtu_disc(ipvs) ? old_iph->frag_off & htons(IP_DF) : 0;
- if ((old_iph->frag_off & htons(IP_DF) &&
- mtu < ntohs(old_iph->tot_len) && !skb_is_gso(skb))) {
+ if (df && mtu < ntohs(old_iph->tot_len) && !skb_is_gso(skb)) {
icmp_send(skb, ICMP_DEST_UNREACH,ICMP_FRAG_NEEDED, htonl(mtu));
IP_VS_DBG_RL("%s(): frag needed\n", __func__);
goto tx_error_put;
@@ -915,8 +962,8 @@ ip_vs_tunnel_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
if (skb_dst(skb))
skb_dst(skb)->ops->update_pmtu(skb_dst(skb), NULL, skb, mtu);
- if (mtu < ntohs(old_iph->payload_len) + sizeof(struct ipv6hdr) &&
- !skb_is_gso(skb)) {
+ /* MTU checking: Notice that 'mtu' have been adjusted before hand */
+ if (__mtu_check_toobig_v6(skb, mtu)) {
if (!skb->dev) {
struct net *net = dev_net(skb_dst(skb)->dev);
@@ -1082,7 +1129,7 @@ ip_vs_dr_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
/* MTU checking */
mtu = dst_mtu(&rt->dst);
- if (skb->len > mtu) {
+ if (__mtu_check_toobig_v6(skb, mtu)) {
if (!skb->dev) {
struct net *net = dev_net(skb_dst(skb)->dev);
@@ -1318,7 +1365,7 @@ ip_vs_icmp_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
/* MTU checking */
mtu = dst_mtu(&rt->dst);
- if (skb->len > mtu && !skb_is_gso(skb)) {
+ if (__mtu_check_toobig_v6(skb, mtu)) {
if (!skb->dev) {
struct net *net = dev_net(skb_dst(skb)->dev);
diff --git a/net/netfilter/nf_conntrack_amanda.c b/net/netfilter/nf_conntrack_amanda.c
index f2de8c55ac50..c514fe6033d2 100644
--- a/net/netfilter/nf_conntrack_amanda.c
+++ b/net/netfilter/nf_conntrack_amanda.c
@@ -40,6 +40,7 @@ MODULE_PARM_DESC(ts_algo, "textsearch algorithm to use (default kmp)");
unsigned int (*nf_nat_amanda_hook)(struct sk_buff *skb,
enum ip_conntrack_info ctinfo,
+ unsigned int protoff,
unsigned int matchoff,
unsigned int matchlen,
struct nf_conntrack_expect *exp)
@@ -155,8 +156,8 @@ static int amanda_help(struct sk_buff *skb,
nf_nat_amanda = rcu_dereference(nf_nat_amanda_hook);
if (nf_nat_amanda && ct->status & IPS_NAT_MASK)
- ret = nf_nat_amanda(skb, ctinfo, off - dataoff,
- len, exp);
+ ret = nf_nat_amanda(skb, ctinfo, protoff,
+ off - dataoff, len, exp);
else if (nf_ct_expect_related(exp) != 0)
ret = NF_DROP;
nf_ct_expect_put(exp);
diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
index cf4875565d67..0f241be28f9e 100644
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -55,6 +55,12 @@ int (*nfnetlink_parse_nat_setup_hook)(struct nf_conn *ct,
const struct nlattr *attr) __read_mostly;
EXPORT_SYMBOL_GPL(nfnetlink_parse_nat_setup_hook);
+int (*nf_nat_seq_adjust_hook)(struct sk_buff *skb,
+ struct nf_conn *ct,
+ enum ip_conntrack_info ctinfo,
+ unsigned int protoff);
+EXPORT_SYMBOL_GPL(nf_nat_seq_adjust_hook);
+
DEFINE_SPINLOCK(nf_conntrack_lock);
EXPORT_SYMBOL_GPL(nf_conntrack_lock);
@@ -249,12 +255,15 @@ static void death_by_event(unsigned long ul_conntrack)
{
struct nf_conn *ct = (void *)ul_conntrack;
struct net *net = nf_ct_net(ct);
+ struct nf_conntrack_ecache *ecache = nf_ct_ecache_find(ct);
+
+ BUG_ON(ecache == NULL);
if (nf_conntrack_event(IPCT_DESTROY, ct) < 0) {
/* bad luck, let's retry again */
- ct->timeout.expires = jiffies +
+ ecache->timeout.expires = jiffies +
(random32() % net->ct.sysctl_events_retry_timeout);
- add_timer(&ct->timeout);
+ add_timer(&ecache->timeout);
return;
}
/* we've got the event delivered, now it's dying */
@@ -268,6 +277,9 @@ static void death_by_event(unsigned long ul_conntrack)
void nf_ct_insert_dying_list(struct nf_conn *ct)
{
struct net *net = nf_ct_net(ct);
+ struct nf_conntrack_ecache *ecache = nf_ct_ecache_find(ct);
+
+ BUG_ON(ecache == NULL);
/* add this conntrack to the dying list */
spin_lock_bh(&nf_conntrack_lock);
@@ -275,10 +287,10 @@ void nf_ct_insert_dying_list(struct nf_conn *ct)
&net->ct.dying);
spin_unlock_bh(&nf_conntrack_lock);
/* set a new timer to retry event delivery */
- setup_timer(&ct->timeout, death_by_event, (unsigned long)ct);
- ct->timeout.expires = jiffies +
+ setup_timer(&ecache->timeout, death_by_event, (unsigned long)ct);
+ ecache->timeout.expires = jiffies +
(random32() % net->ct.sysctl_events_retry_timeout);
- add_timer(&ct->timeout);
+ add_timer(&ecache->timeout);
}
EXPORT_SYMBOL_GPL(nf_ct_insert_dying_list);
@@ -924,7 +936,6 @@ nf_conntrack_in(struct net *net, u_int8_t pf, unsigned int hooknum,
enum ip_conntrack_info ctinfo;
struct nf_conntrack_l3proto *l3proto;
struct nf_conntrack_l4proto *l4proto;
- struct nf_conn_timeout *timeout_ext;
unsigned int *timeouts;
unsigned int dataoff;
u_int8_t protonum;
@@ -991,11 +1002,7 @@ nf_conntrack_in(struct net *net, u_int8_t pf, unsigned int hooknum,
NF_CT_ASSERT(skb->nfct);
/* Decide what timeout policy we want to apply to this flow. */
- timeout_ext = nf_ct_timeout_find(ct);
- if (timeout_ext)
- timeouts = NF_CT_TIMEOUT_EXT_DATA(timeout_ext);
- else
- timeouts = l4proto->get_timeouts(net);
+ timeouts = nf_ct_timeout_lookup(net, ct, l4proto);
ret = l4proto->packet(ct, skb, dataoff, ctinfo, pf, hooknum, timeouts);
if (ret <= 0) {
@@ -1217,6 +1224,8 @@ get_next_corpse(struct net *net, int (*iter)(struct nf_conn *i, void *data),
spin_lock_bh(&nf_conntrack_lock);
for (; *bucket < net->ct.htable_size; (*bucket)++) {
hlist_nulls_for_each_entry(h, n, &net->ct.hash[*bucket], hnnode) {
+ if (NF_CT_DIRECTION(h) != IP_CT_DIR_ORIGINAL)
+ continue;
ct = nf_ct_tuplehash_to_ctrack(h);
if (iter(ct, data))
goto found;
diff --git a/net/netfilter/nf_conntrack_ecache.c b/net/netfilter/nf_conntrack_ecache.c
index e7be79e640de..de9781b6464f 100644
--- a/net/netfilter/nf_conntrack_ecache.c
+++ b/net/netfilter/nf_conntrack_ecache.c
@@ -61,7 +61,7 @@ void nf_ct_deliver_cached_events(struct nf_conn *ct)
goto out_unlock;
item.ct = ct;
- item.pid = 0;
+ item.portid = 0;
item.report = 0;
ret = notify->fcn(events | missed, &item);
diff --git a/net/netfilter/nf_conntrack_expect.c b/net/netfilter/nf_conntrack_expect.c
index 45cf602a76bc..527651a53a45 100644
--- a/net/netfilter/nf_conntrack_expect.c
+++ b/net/netfilter/nf_conntrack_expect.c
@@ -361,23 +361,6 @@ static void evict_oldest_expect(struct nf_conn *master,
}
}
-static inline int refresh_timer(struct nf_conntrack_expect *i)
-{
- struct nf_conn_help *master_help = nfct_help(i->master);
- const struct nf_conntrack_expect_policy *p;
-
- if (!del_timer(&i->timeout))
- return 0;
-
- p = &rcu_dereference_protected(
- master_help->helper,
- lockdep_is_held(&nf_conntrack_lock)
- )->expect_policy[i->class];
- i->timeout.expires = jiffies + p->timeout * HZ;
- add_timer(&i->timeout);
- return 1;
-}
-
static inline int __nf_ct_expect_check(struct nf_conntrack_expect *expect)
{
const struct nf_conntrack_expect_policy *p;
@@ -386,7 +369,7 @@ static inline int __nf_ct_expect_check(struct nf_conntrack_expect *expect)
struct nf_conn_help *master_help = nfct_help(master);
struct nf_conntrack_helper *helper;
struct net *net = nf_ct_exp_net(expect);
- struct hlist_node *n;
+ struct hlist_node *n, *next;
unsigned int h;
int ret = 1;
@@ -395,12 +378,12 @@ static inline int __nf_ct_expect_check(struct nf_conntrack_expect *expect)
goto out;
}
h = nf_ct_expect_dst_hash(&expect->tuple);
- hlist_for_each_entry(i, n, &net->ct.expect_hash[h], hnode) {
+ hlist_for_each_entry_safe(i, n, next, &net->ct.expect_hash[h], hnode) {
if (expect_matches(i, expect)) {
- /* Refresh timer: if it's dying, ignore.. */
- if (refresh_timer(i)) {
- ret = 0;
- goto out;
+ if (del_timer(&i->timeout)) {
+ nf_ct_unlink_expect(i);
+ nf_ct_expect_put(i);
+ break;
}
} else if (expect_clash(i, expect)) {
ret = -EBUSY;
diff --git a/net/netfilter/nf_conntrack_ftp.c b/net/netfilter/nf_conntrack_ftp.c
index 4bb771d1f57a..1ce3befb7c8a 100644
--- a/net/netfilter/nf_conntrack_ftp.c
+++ b/net/netfilter/nf_conntrack_ftp.c
@@ -48,6 +48,7 @@ module_param(loose, bool, 0600);
unsigned int (*nf_nat_ftp_hook)(struct sk_buff *skb,
enum ip_conntrack_info ctinfo,
enum nf_ct_ftp_type type,
+ unsigned int protoff,
unsigned int matchoff,
unsigned int matchlen,
struct nf_conntrack_expect *exp);
@@ -395,6 +396,12 @@ static int help(struct sk_buff *skb,
/* Look up to see if we're just after a \n. */
if (!find_nl_seq(ntohl(th->seq), ct_ftp_info, dir)) {
+ /* We're picking up this, clear flags and let it continue */
+ if (unlikely(ct_ftp_info->flags[dir] & NF_CT_FTP_SEQ_PICKUP)) {
+ ct_ftp_info->flags[dir] ^= NF_CT_FTP_SEQ_PICKUP;
+ goto skip_nl_seq;
+ }
+
/* Now if this ends in \n, update ftp info. */
pr_debug("nf_conntrack_ftp: wrong seq pos %s(%u) or %s(%u)\n",
ct_ftp_info->seq_aft_nl_num[dir] > 0 ? "" : "(UNSET)",
@@ -405,6 +412,7 @@ static int help(struct sk_buff *skb,
goto out_update_nl;
}
+skip_nl_seq:
/* Initialize IP/IPv6 addr to expected address (it's not mentioned
in EPSV responses) */
cmd.l3num = nf_ct_l3num(ct);
@@ -489,7 +497,7 @@ static int help(struct sk_buff *skb,
nf_nat_ftp = rcu_dereference(nf_nat_ftp_hook);
if (nf_nat_ftp && ct->status & IPS_NAT_MASK)
ret = nf_nat_ftp(skb, ctinfo, search[dir][i].ftptype,
- matchoff, matchlen, exp);
+ protoff, matchoff, matchlen, exp);
else {
/* Can't expect this? Best to drop packet now. */
if (nf_ct_expect_related(exp) != 0)
@@ -511,6 +519,19 @@ out_update_nl:
return ret;
}
+static int nf_ct_ftp_from_nlattr(struct nlattr *attr, struct nf_conn *ct)
+{
+ struct nf_ct_ftp_master *ftp = nfct_help_data(ct);
+
+ /* This conntrack has been injected from user-space, always pick up
+ * sequence tracking. Otherwise, the first FTP command after the
+ * failover breaks.
+ */
+ ftp->flags[IP_CT_DIR_ORIGINAL] |= NF_CT_FTP_SEQ_PICKUP;
+ ftp->flags[IP_CT_DIR_REPLY] |= NF_CT_FTP_SEQ_PICKUP;
+ return 0;
+}
+
static struct nf_conntrack_helper ftp[MAX_PORTS][2] __read_mostly;
static const struct nf_conntrack_expect_policy ftp_exp_policy = {
@@ -560,6 +581,7 @@ static int __init nf_conntrack_ftp_init(void)
ftp[i][j].expect_policy = &ftp_exp_policy;
ftp[i][j].me = THIS_MODULE;
ftp[i][j].help = help;
+ ftp[i][j].from_nlattr = nf_ct_ftp_from_nlattr;
if (ports[i] == FTP_PORT)
sprintf(ftp[i][j].name, "ftp");
else
diff --git a/net/netfilter/nf_conntrack_h323_main.c b/net/netfilter/nf_conntrack_h323_main.c
index 4283b207e63b..1b30b0dee708 100644
--- a/net/netfilter/nf_conntrack_h323_main.c
+++ b/net/netfilter/nf_conntrack_h323_main.c
@@ -49,12 +49,12 @@ MODULE_PARM_DESC(callforward_filter, "only create call forwarding expectations "
"(determined by routing information)");
/* Hooks for NAT */
-int (*set_h245_addr_hook) (struct sk_buff *skb,
+int (*set_h245_addr_hook) (struct sk_buff *skb, unsigned int protoff,
unsigned char **data, int dataoff,
H245_TransportAddress *taddr,
union nf_inet_addr *addr, __be16 port)
__read_mostly;
-int (*set_h225_addr_hook) (struct sk_buff *skb,
+int (*set_h225_addr_hook) (struct sk_buff *skb, unsigned int protoff,
unsigned char **data, int dataoff,
TransportAddress *taddr,
union nf_inet_addr *addr, __be16 port)
@@ -62,16 +62,17 @@ int (*set_h225_addr_hook) (struct sk_buff *skb,
int (*set_sig_addr_hook) (struct sk_buff *skb,
struct nf_conn *ct,
enum ip_conntrack_info ctinfo,
- unsigned char **data,
+ unsigned int protoff, unsigned char **data,
TransportAddress *taddr, int count) __read_mostly;
int (*set_ras_addr_hook) (struct sk_buff *skb,
struct nf_conn *ct,
enum ip_conntrack_info ctinfo,
- unsigned char **data,
+ unsigned int protoff, unsigned char **data,
TransportAddress *taddr, int count) __read_mostly;
int (*nat_rtp_rtcp_hook) (struct sk_buff *skb,
struct nf_conn *ct,
enum ip_conntrack_info ctinfo,
+ unsigned int protoff,
unsigned char **data, int dataoff,
H245_TransportAddress *taddr,
__be16 port, __be16 rtp_port,
@@ -80,24 +81,28 @@ int (*nat_rtp_rtcp_hook) (struct sk_buff *skb,
int (*nat_t120_hook) (struct sk_buff *skb,
struct nf_conn *ct,
enum ip_conntrack_info ctinfo,
+ unsigned int protoff,
unsigned char **data, int dataoff,
H245_TransportAddress *taddr, __be16 port,
struct nf_conntrack_expect *exp) __read_mostly;
int (*nat_h245_hook) (struct sk_buff *skb,
struct nf_conn *ct,
enum ip_conntrack_info ctinfo,
+ unsigned int protoff,
unsigned char **data, int dataoff,
TransportAddress *taddr, __be16 port,
struct nf_conntrack_expect *exp) __read_mostly;
int (*nat_callforwarding_hook) (struct sk_buff *skb,
struct nf_conn *ct,
enum ip_conntrack_info ctinfo,
+ unsigned int protoff,
unsigned char **data, int dataoff,
TransportAddress *taddr, __be16 port,
struct nf_conntrack_expect *exp) __read_mostly;
int (*nat_q931_hook) (struct sk_buff *skb,
struct nf_conn *ct,
enum ip_conntrack_info ctinfo,
+ unsigned int protoff,
unsigned char **data, TransportAddress *taddr, int idx,
__be16 port, struct nf_conntrack_expect *exp)
__read_mostly;
@@ -251,6 +256,7 @@ static int get_h245_addr(struct nf_conn *ct, const unsigned char *data,
/****************************************************************************/
static int expect_rtp_rtcp(struct sk_buff *skb, struct nf_conn *ct,
enum ip_conntrack_info ctinfo,
+ unsigned int protoff,
unsigned char **data, int dataoff,
H245_TransportAddress *taddr)
{
@@ -295,9 +301,10 @@ static int expect_rtp_rtcp(struct sk_buff *skb, struct nf_conn *ct,
&ct->tuplehash[!dir].tuple.dst.u3,
sizeof(ct->tuplehash[dir].tuple.src.u3)) &&
(nat_rtp_rtcp = rcu_dereference(nat_rtp_rtcp_hook)) &&
+ nf_ct_l3num(ct) == NFPROTO_IPV4 &&
ct->status & IPS_NAT_MASK) {
/* NAT needed */
- ret = nat_rtp_rtcp(skb, ct, ctinfo, data, dataoff,
+ ret = nat_rtp_rtcp(skb, ct, ctinfo, protoff, data, dataoff,
taddr, port, rtp_port, rtp_exp, rtcp_exp);
} else { /* Conntrack only */
if (nf_ct_expect_related(rtp_exp) == 0) {
@@ -324,6 +331,7 @@ static int expect_rtp_rtcp(struct sk_buff *skb, struct nf_conn *ct,
static int expect_t120(struct sk_buff *skb,
struct nf_conn *ct,
enum ip_conntrack_info ctinfo,
+ unsigned int protoff,
unsigned char **data, int dataoff,
H245_TransportAddress *taddr)
{
@@ -353,9 +361,10 @@ static int expect_t120(struct sk_buff *skb,
&ct->tuplehash[!dir].tuple.dst.u3,
sizeof(ct->tuplehash[dir].tuple.src.u3)) &&
(nat_t120 = rcu_dereference(nat_t120_hook)) &&
+ nf_ct_l3num(ct) == NFPROTO_IPV4 &&
ct->status & IPS_NAT_MASK) {
/* NAT needed */
- ret = nat_t120(skb, ct, ctinfo, data, dataoff, taddr,
+ ret = nat_t120(skb, ct, ctinfo, protoff, data, dataoff, taddr,
port, exp);
} else { /* Conntrack only */
if (nf_ct_expect_related(exp) == 0) {
@@ -374,6 +383,7 @@ static int expect_t120(struct sk_buff *skb,
static int process_h245_channel(struct sk_buff *skb,
struct nf_conn *ct,
enum ip_conntrack_info ctinfo,
+ unsigned int protoff,
unsigned char **data, int dataoff,
H2250LogicalChannelParameters *channel)
{
@@ -381,7 +391,7 @@ static int process_h245_channel(struct sk_buff *skb,
if (channel->options & eH2250LogicalChannelParameters_mediaChannel) {
/* RTP */
- ret = expect_rtp_rtcp(skb, ct, ctinfo, data, dataoff,
+ ret = expect_rtp_rtcp(skb, ct, ctinfo, protoff, data, dataoff,
&channel->mediaChannel);
if (ret < 0)
return -1;
@@ -390,7 +400,7 @@ static int process_h245_channel(struct sk_buff *skb,
if (channel->
options & eH2250LogicalChannelParameters_mediaControlChannel) {
/* RTCP */
- ret = expect_rtp_rtcp(skb, ct, ctinfo, data, dataoff,
+ ret = expect_rtp_rtcp(skb, ct, ctinfo, protoff, data, dataoff,
&channel->mediaControlChannel);
if (ret < 0)
return -1;
@@ -402,6 +412,7 @@ static int process_h245_channel(struct sk_buff *skb,
/****************************************************************************/
static int process_olc(struct sk_buff *skb, struct nf_conn *ct,
enum ip_conntrack_info ctinfo,
+ unsigned int protoff,
unsigned char **data, int dataoff,
OpenLogicalChannel *olc)
{
@@ -412,7 +423,8 @@ static int process_olc(struct sk_buff *skb, struct nf_conn *ct,
if (olc->forwardLogicalChannelParameters.multiplexParameters.choice ==
eOpenLogicalChannel_forwardLogicalChannelParameters_multiplexParameters_h2250LogicalChannelParameters)
{
- ret = process_h245_channel(skb, ct, ctinfo, data, dataoff,
+ ret = process_h245_channel(skb, ct, ctinfo,
+ protoff, data, dataoff,
&olc->
forwardLogicalChannelParameters.
multiplexParameters.
@@ -430,7 +442,8 @@ static int process_olc(struct sk_buff *skb, struct nf_conn *ct,
eOpenLogicalChannel_reverseLogicalChannelParameters_multiplexParameters_h2250LogicalChannelParameters))
{
ret =
- process_h245_channel(skb, ct, ctinfo, data, dataoff,
+ process_h245_channel(skb, ct, ctinfo,
+ protoff, data, dataoff,
&olc->
reverseLogicalChannelParameters.
multiplexParameters.
@@ -448,7 +461,7 @@ static int process_olc(struct sk_buff *skb, struct nf_conn *ct,
t120.choice == eDataProtocolCapability_separateLANStack &&
olc->separateStack.networkAddress.choice ==
eNetworkAccessParameters_networkAddress_localAreaAddress) {
- ret = expect_t120(skb, ct, ctinfo, data, dataoff,
+ ret = expect_t120(skb, ct, ctinfo, protoff, data, dataoff,
&olc->separateStack.networkAddress.
localAreaAddress);
if (ret < 0)
@@ -461,7 +474,7 @@ static int process_olc(struct sk_buff *skb, struct nf_conn *ct,
/****************************************************************************/
static int process_olca(struct sk_buff *skb, struct nf_conn *ct,
enum ip_conntrack_info ctinfo,
- unsigned char **data, int dataoff,
+ unsigned int protoff, unsigned char **data, int dataoff,
OpenLogicalChannelAck *olca)
{
H2250LogicalChannelAckParameters *ack;
@@ -477,7 +490,8 @@ static int process_olca(struct sk_buff *skb, struct nf_conn *ct,
choice ==
eOpenLogicalChannelAck_reverseLogicalChannelParameters_multiplexParameters_h2250LogicalChannelParameters))
{
- ret = process_h245_channel(skb, ct, ctinfo, data, dataoff,
+ ret = process_h245_channel(skb, ct, ctinfo,
+ protoff, data, dataoff,
&olca->
reverseLogicalChannelParameters.
multiplexParameters.
@@ -496,7 +510,8 @@ static int process_olca(struct sk_buff *skb, struct nf_conn *ct,
if (ack->options &
eH2250LogicalChannelAckParameters_mediaChannel) {
/* RTP */
- ret = expect_rtp_rtcp(skb, ct, ctinfo, data, dataoff,
+ ret = expect_rtp_rtcp(skb, ct, ctinfo,
+ protoff, data, dataoff,
&ack->mediaChannel);
if (ret < 0)
return -1;
@@ -505,7 +520,8 @@ static int process_olca(struct sk_buff *skb, struct nf_conn *ct,
if (ack->options &
eH2250LogicalChannelAckParameters_mediaControlChannel) {
/* RTCP */
- ret = expect_rtp_rtcp(skb, ct, ctinfo, data, dataoff,
+ ret = expect_rtp_rtcp(skb, ct, ctinfo,
+ protoff, data, dataoff,
&ack->mediaControlChannel);
if (ret < 0)
return -1;
@@ -515,7 +531,7 @@ static int process_olca(struct sk_buff *skb, struct nf_conn *ct,
if ((olca->options & eOpenLogicalChannelAck_separateStack) &&
olca->separateStack.networkAddress.choice ==
eNetworkAccessParameters_networkAddress_localAreaAddress) {
- ret = expect_t120(skb, ct, ctinfo, data, dataoff,
+ ret = expect_t120(skb, ct, ctinfo, protoff, data, dataoff,
&olca->separateStack.networkAddress.
localAreaAddress);
if (ret < 0)
@@ -528,14 +544,15 @@ static int process_olca(struct sk_buff *skb, struct nf_conn *ct,
/****************************************************************************/
static int process_h245(struct sk_buff *skb, struct nf_conn *ct,
enum ip_conntrack_info ctinfo,
- unsigned char **data, int dataoff,
+ unsigned int protoff, unsigned char **data, int dataoff,
MultimediaSystemControlMessage *mscm)
{
switch (mscm->choice) {
case eMultimediaSystemControlMessage_request:
if (mscm->request.choice ==
eRequestMessage_openLogicalChannel) {
- return process_olc(skb, ct, ctinfo, data, dataoff,
+ return process_olc(skb, ct, ctinfo,
+ protoff, data, dataoff,
&mscm->request.openLogicalChannel);
}
pr_debug("nf_ct_h323: H.245 Request %d\n",
@@ -544,7 +561,8 @@ static int process_h245(struct sk_buff *skb, struct nf_conn *ct,
case eMultimediaSystemControlMessage_response:
if (mscm->response.choice ==
eResponseMessage_openLogicalChannelAck) {
- return process_olca(skb, ct, ctinfo, data, dataoff,
+ return process_olca(skb, ct, ctinfo,
+ protoff, data, dataoff,
&mscm->response.
openLogicalChannelAck);
}
@@ -595,7 +613,8 @@ static int h245_help(struct sk_buff *skb, unsigned int protoff,
}
/* Process H.245 signal */
- if (process_h245(skb, ct, ctinfo, &data, dataoff, &mscm) < 0)
+ if (process_h245(skb, ct, ctinfo, protoff,
+ &data, dataoff, &mscm) < 0)
goto drop;
}
@@ -659,7 +678,7 @@ int get_h225_addr(struct nf_conn *ct, unsigned char *data,
/****************************************************************************/
static int expect_h245(struct sk_buff *skb, struct nf_conn *ct,
enum ip_conntrack_info ctinfo,
- unsigned char **data, int dataoff,
+ unsigned int protoff, unsigned char **data, int dataoff,
TransportAddress *taddr)
{
int dir = CTINFO2DIR(ctinfo);
@@ -688,9 +707,10 @@ static int expect_h245(struct sk_buff *skb, struct nf_conn *ct,
&ct->tuplehash[!dir].tuple.dst.u3,
sizeof(ct->tuplehash[dir].tuple.src.u3)) &&
(nat_h245 = rcu_dereference(nat_h245_hook)) &&
+ nf_ct_l3num(ct) == NFPROTO_IPV4 &&
ct->status & IPS_NAT_MASK) {
/* NAT needed */
- ret = nat_h245(skb, ct, ctinfo, data, dataoff, taddr,
+ ret = nat_h245(skb, ct, ctinfo, protoff, data, dataoff, taddr,
port, exp);
} else { /* Conntrack only */
if (nf_ct_expect_related(exp) == 0) {
@@ -776,6 +796,7 @@ static int callforward_do_filter(const union nf_inet_addr *src,
static int expect_callforwarding(struct sk_buff *skb,
struct nf_conn *ct,
enum ip_conntrack_info ctinfo,
+ unsigned int protoff,
unsigned char **data, int dataoff,
TransportAddress *taddr)
{
@@ -811,9 +832,11 @@ static int expect_callforwarding(struct sk_buff *skb,
&ct->tuplehash[!dir].tuple.dst.u3,
sizeof(ct->tuplehash[dir].tuple.src.u3)) &&
(nat_callforwarding = rcu_dereference(nat_callforwarding_hook)) &&
+ nf_ct_l3num(ct) == NFPROTO_IPV4 &&
ct->status & IPS_NAT_MASK) {
/* Need NAT */
- ret = nat_callforwarding(skb, ct, ctinfo, data, dataoff,
+ ret = nat_callforwarding(skb, ct, ctinfo,
+ protoff, data, dataoff,
taddr, port, exp);
} else { /* Conntrack only */
if (nf_ct_expect_related(exp) == 0) {
@@ -831,6 +854,7 @@ static int expect_callforwarding(struct sk_buff *skb,
/****************************************************************************/
static int process_setup(struct sk_buff *skb, struct nf_conn *ct,
enum ip_conntrack_info ctinfo,
+ unsigned int protoff,
unsigned char **data, int dataoff,
Setup_UUIE *setup)
{
@@ -844,7 +868,7 @@ static int process_setup(struct sk_buff *skb, struct nf_conn *ct,
pr_debug("nf_ct_q931: Setup\n");
if (setup->options & eSetup_UUIE_h245Address) {
- ret = expect_h245(skb, ct, ctinfo, data, dataoff,
+ ret = expect_h245(skb, ct, ctinfo, protoff, data, dataoff,
&setup->h245Address);
if (ret < 0)
return -1;
@@ -852,14 +876,15 @@ static int process_setup(struct sk_buff *skb, struct nf_conn *ct,
set_h225_addr = rcu_dereference(set_h225_addr_hook);
if ((setup->options & eSetup_UUIE_destCallSignalAddress) &&
- (set_h225_addr) && ct->status & IPS_NAT_MASK &&
+ (set_h225_addr) && nf_ct_l3num(ct) == NFPROTO_IPV4 &&
+ ct->status & IPS_NAT_MASK &&
get_h225_addr(ct, *data, &setup->destCallSignalAddress,
&addr, &port) &&
memcmp(&addr, &ct->tuplehash[!dir].tuple.src.u3, sizeof(addr))) {
pr_debug("nf_ct_q931: set destCallSignalAddress %pI6:%hu->%pI6:%hu\n",
&addr, ntohs(port), &ct->tuplehash[!dir].tuple.src.u3,
ntohs(ct->tuplehash[!dir].tuple.src.u.tcp.port));
- ret = set_h225_addr(skb, data, dataoff,
+ ret = set_h225_addr(skb, protoff, data, dataoff,
&setup->destCallSignalAddress,
&ct->tuplehash[!dir].tuple.src.u3,
ct->tuplehash[!dir].tuple.src.u.tcp.port);
@@ -868,14 +893,15 @@ static int process_setup(struct sk_buff *skb, struct nf_conn *ct,
}
if ((setup->options & eSetup_UUIE_sourceCallSignalAddress) &&
- (set_h225_addr) && ct->status & IPS_NAT_MASK &&
+ (set_h225_addr) && nf_ct_l3num(ct) == NFPROTO_IPV4 &&
+ ct->status & IPS_NAT_MASK &&
get_h225_addr(ct, *data, &setup->sourceCallSignalAddress,
&addr, &port) &&
memcmp(&addr, &ct->tuplehash[!dir].tuple.dst.u3, sizeof(addr))) {
pr_debug("nf_ct_q931: set sourceCallSignalAddress %pI6:%hu->%pI6:%hu\n",
&addr, ntohs(port), &ct->tuplehash[!dir].tuple.dst.u3,
ntohs(ct->tuplehash[!dir].tuple.dst.u.tcp.port));
- ret = set_h225_addr(skb, data, dataoff,
+ ret = set_h225_addr(skb, protoff, data, dataoff,
&setup->sourceCallSignalAddress,
&ct->tuplehash[!dir].tuple.dst.u3,
ct->tuplehash[!dir].tuple.dst.u.tcp.port);
@@ -885,7 +911,8 @@ static int process_setup(struct sk_buff *skb, struct nf_conn *ct,
if (setup->options & eSetup_UUIE_fastStart) {
for (i = 0; i < setup->fastStart.count; i++) {
- ret = process_olc(skb, ct, ctinfo, data, dataoff,
+ ret = process_olc(skb, ct, ctinfo,
+ protoff, data, dataoff,
&setup->fastStart.item[i]);
if (ret < 0)
return -1;
@@ -899,6 +926,7 @@ static int process_setup(struct sk_buff *skb, struct nf_conn *ct,
static int process_callproceeding(struct sk_buff *skb,
struct nf_conn *ct,
enum ip_conntrack_info ctinfo,
+ unsigned int protoff,
unsigned char **data, int dataoff,
CallProceeding_UUIE *callproc)
{
@@ -908,7 +936,7 @@ static int process_callproceeding(struct sk_buff *skb,
pr_debug("nf_ct_q931: CallProceeding\n");
if (callproc->options & eCallProceeding_UUIE_h245Address) {
- ret = expect_h245(skb, ct, ctinfo, data, dataoff,
+ ret = expect_h245(skb, ct, ctinfo, protoff, data, dataoff,
&callproc->h245Address);
if (ret < 0)
return -1;
@@ -916,7 +944,8 @@ static int process_callproceeding(struct sk_buff *skb,
if (callproc->options & eCallProceeding_UUIE_fastStart) {
for (i = 0; i < callproc->fastStart.count; i++) {
- ret = process_olc(skb, ct, ctinfo, data, dataoff,
+ ret = process_olc(skb, ct, ctinfo,
+ protoff, data, dataoff,
&callproc->fastStart.item[i]);
if (ret < 0)
return -1;
@@ -929,6 +958,7 @@ static int process_callproceeding(struct sk_buff *skb,
/****************************************************************************/
static int process_connect(struct sk_buff *skb, struct nf_conn *ct,
enum ip_conntrack_info ctinfo,
+ unsigned int protoff,
unsigned char **data, int dataoff,
Connect_UUIE *connect)
{
@@ -938,7 +968,7 @@ static int process_connect(struct sk_buff *skb, struct nf_conn *ct,
pr_debug("nf_ct_q931: Connect\n");
if (connect->options & eConnect_UUIE_h245Address) {
- ret = expect_h245(skb, ct, ctinfo, data, dataoff,
+ ret = expect_h245(skb, ct, ctinfo, protoff, data, dataoff,
&connect->h245Address);
if (ret < 0)
return -1;
@@ -946,7 +976,8 @@ static int process_connect(struct sk_buff *skb, struct nf_conn *ct,
if (connect->options & eConnect_UUIE_fastStart) {
for (i = 0; i < connect->fastStart.count; i++) {
- ret = process_olc(skb, ct, ctinfo, data, dataoff,
+ ret = process_olc(skb, ct, ctinfo,
+ protoff, data, dataoff,
&connect->fastStart.item[i]);
if (ret < 0)
return -1;
@@ -959,6 +990,7 @@ static int process_connect(struct sk_buff *skb, struct nf_conn *ct,
/****************************************************************************/
static int process_alerting(struct sk_buff *skb, struct nf_conn *ct,
enum ip_conntrack_info ctinfo,
+ unsigned int protoff,
unsigned char **data, int dataoff,
Alerting_UUIE *alert)
{
@@ -968,7 +1000,7 @@ static int process_alerting(struct sk_buff *skb, struct nf_conn *ct,
pr_debug("nf_ct_q931: Alerting\n");
if (alert->options & eAlerting_UUIE_h245Address) {
- ret = expect_h245(skb, ct, ctinfo, data, dataoff,
+ ret = expect_h245(skb, ct, ctinfo, protoff, data, dataoff,
&alert->h245Address);
if (ret < 0)
return -1;
@@ -976,7 +1008,8 @@ static int process_alerting(struct sk_buff *skb, struct nf_conn *ct,
if (alert->options & eAlerting_UUIE_fastStart) {
for (i = 0; i < alert->fastStart.count; i++) {
- ret = process_olc(skb, ct, ctinfo, data, dataoff,
+ ret = process_olc(skb, ct, ctinfo,
+ protoff, data, dataoff,
&alert->fastStart.item[i]);
if (ret < 0)
return -1;
@@ -989,6 +1022,7 @@ static int process_alerting(struct sk_buff *skb, struct nf_conn *ct,
/****************************************************************************/
static int process_facility(struct sk_buff *skb, struct nf_conn *ct,
enum ip_conntrack_info ctinfo,
+ unsigned int protoff,
unsigned char **data, int dataoff,
Facility_UUIE *facility)
{
@@ -999,15 +1033,15 @@ static int process_facility(struct sk_buff *skb, struct nf_conn *ct,
if (facility->reason.choice == eFacilityReason_callForwarded) {
if (facility->options & eFacility_UUIE_alternativeAddress)
- return expect_callforwarding(skb, ct, ctinfo, data,
- dataoff,
+ return expect_callforwarding(skb, ct, ctinfo,
+ protoff, data, dataoff,
&facility->
alternativeAddress);
return 0;
}
if (facility->options & eFacility_UUIE_h245Address) {
- ret = expect_h245(skb, ct, ctinfo, data, dataoff,
+ ret = expect_h245(skb, ct, ctinfo, protoff, data, dataoff,
&facility->h245Address);
if (ret < 0)
return -1;
@@ -1015,7 +1049,8 @@ static int process_facility(struct sk_buff *skb, struct nf_conn *ct,
if (facility->options & eFacility_UUIE_fastStart) {
for (i = 0; i < facility->fastStart.count; i++) {
- ret = process_olc(skb, ct, ctinfo, data, dataoff,
+ ret = process_olc(skb, ct, ctinfo,
+ protoff, data, dataoff,
&facility->fastStart.item[i]);
if (ret < 0)
return -1;
@@ -1028,6 +1063,7 @@ static int process_facility(struct sk_buff *skb, struct nf_conn *ct,
/****************************************************************************/
static int process_progress(struct sk_buff *skb, struct nf_conn *ct,
enum ip_conntrack_info ctinfo,
+ unsigned int protoff,
unsigned char **data, int dataoff,
Progress_UUIE *progress)
{
@@ -1037,7 +1073,7 @@ static int process_progress(struct sk_buff *skb, struct nf_conn *ct,
pr_debug("nf_ct_q931: Progress\n");
if (progress->options & eProgress_UUIE_h245Address) {
- ret = expect_h245(skb, ct, ctinfo, data, dataoff,
+ ret = expect_h245(skb, ct, ctinfo, protoff, data, dataoff,
&progress->h245Address);
if (ret < 0)
return -1;
@@ -1045,7 +1081,8 @@ static int process_progress(struct sk_buff *skb, struct nf_conn *ct,
if (progress->options & eProgress_UUIE_fastStart) {
for (i = 0; i < progress->fastStart.count; i++) {
- ret = process_olc(skb, ct, ctinfo, data, dataoff,
+ ret = process_olc(skb, ct, ctinfo,
+ protoff, data, dataoff,
&progress->fastStart.item[i]);
if (ret < 0)
return -1;
@@ -1058,7 +1095,8 @@ static int process_progress(struct sk_buff *skb, struct nf_conn *ct,
/****************************************************************************/
static int process_q931(struct sk_buff *skb, struct nf_conn *ct,
enum ip_conntrack_info ctinfo,
- unsigned char **data, int dataoff, Q931 *q931)
+ unsigned int protoff, unsigned char **data, int dataoff,
+ Q931 *q931)
{
H323_UU_PDU *pdu = &q931->UUIE.h323_uu_pdu;
int i;
@@ -1066,28 +1104,29 @@ static int process_q931(struct sk_buff *skb, struct nf_conn *ct,
switch (pdu->h323_message_body.choice) {
case eH323_UU_PDU_h323_message_body_setup:
- ret = process_setup(skb, ct, ctinfo, data, dataoff,
+ ret = process_setup(skb, ct, ctinfo, protoff, data, dataoff,
&pdu->h323_message_body.setup);
break;
case eH323_UU_PDU_h323_message_body_callProceeding:
- ret = process_callproceeding(skb, ct, ctinfo, data, dataoff,
+ ret = process_callproceeding(skb, ct, ctinfo,
+ protoff, data, dataoff,
&pdu->h323_message_body.
callProceeding);
break;
case eH323_UU_PDU_h323_message_body_connect:
- ret = process_connect(skb, ct, ctinfo, data, dataoff,
+ ret = process_connect(skb, ct, ctinfo, protoff, data, dataoff,
&pdu->h323_message_body.connect);
break;
case eH323_UU_PDU_h323_message_body_alerting:
- ret = process_alerting(skb, ct, ctinfo, data, dataoff,
+ ret = process_alerting(skb, ct, ctinfo, protoff, data, dataoff,
&pdu->h323_message_body.alerting);
break;
case eH323_UU_PDU_h323_message_body_facility:
- ret = process_facility(skb, ct, ctinfo, data, dataoff,
+ ret = process_facility(skb, ct, ctinfo, protoff, data, dataoff,
&pdu->h323_message_body.facility);
break;
case eH323_UU_PDU_h323_message_body_progress:
- ret = process_progress(skb, ct, ctinfo, data, dataoff,
+ ret = process_progress(skb, ct, ctinfo, protoff, data, dataoff,
&pdu->h323_message_body.progress);
break;
default:
@@ -1101,7 +1140,8 @@ static int process_q931(struct sk_buff *skb, struct nf_conn *ct,
if (pdu->options & eH323_UU_PDU_h245Control) {
for (i = 0; i < pdu->h245Control.count; i++) {
- ret = process_h245(skb, ct, ctinfo, data, dataoff,
+ ret = process_h245(skb, ct, ctinfo,
+ protoff, data, dataoff,
&pdu->h245Control.item[i]);
if (ret < 0)
return -1;
@@ -1146,7 +1186,8 @@ static int q931_help(struct sk_buff *skb, unsigned int protoff,
}
/* Process Q.931 signal */
- if (process_q931(skb, ct, ctinfo, &data, dataoff, &q931) < 0)
+ if (process_q931(skb, ct, ctinfo, protoff,
+ &data, dataoff, &q931) < 0)
goto drop;
}
@@ -1243,7 +1284,7 @@ static int set_expect_timeout(struct nf_conntrack_expect *exp,
/****************************************************************************/
static int expect_q931(struct sk_buff *skb, struct nf_conn *ct,
enum ip_conntrack_info ctinfo,
- unsigned char **data,
+ unsigned int protoff, unsigned char **data,
TransportAddress *taddr, int count)
{
struct nf_ct_h323_master *info = nfct_help_data(ct);
@@ -1278,8 +1319,10 @@ static int expect_q931(struct sk_buff *skb, struct nf_conn *ct,
exp->flags = NF_CT_EXPECT_PERMANENT; /* Accept multiple calls */
nat_q931 = rcu_dereference(nat_q931_hook);
- if (nat_q931 && ct->status & IPS_NAT_MASK) { /* Need NAT */
- ret = nat_q931(skb, ct, ctinfo, data, taddr, i, port, exp);
+ if (nat_q931 && nf_ct_l3num(ct) == NFPROTO_IPV4 &&
+ ct->status & IPS_NAT_MASK) { /* Need NAT */
+ ret = nat_q931(skb, ct, ctinfo, protoff, data,
+ taddr, i, port, exp);
} else { /* Conntrack only */
if (nf_ct_expect_related(exp) == 0) {
pr_debug("nf_ct_ras: expect Q.931 ");
@@ -1299,6 +1342,7 @@ static int expect_q931(struct sk_buff *skb, struct nf_conn *ct,
/****************************************************************************/
static int process_grq(struct sk_buff *skb, struct nf_conn *ct,
enum ip_conntrack_info ctinfo,
+ unsigned int protoff,
unsigned char **data, GatekeeperRequest *grq)
{
typeof(set_ras_addr_hook) set_ras_addr;
@@ -1306,8 +1350,9 @@ static int process_grq(struct sk_buff *skb, struct nf_conn *ct,
pr_debug("nf_ct_ras: GRQ\n");
set_ras_addr = rcu_dereference(set_ras_addr_hook);
- if (set_ras_addr && ct->status & IPS_NAT_MASK) /* NATed */
- return set_ras_addr(skb, ct, ctinfo, data,
+ if (set_ras_addr && nf_ct_l3num(ct) == NFPROTO_IPV4 &&
+ ct->status & IPS_NAT_MASK) /* NATed */
+ return set_ras_addr(skb, ct, ctinfo, protoff, data,
&grq->rasAddress, 1);
return 0;
}
@@ -1315,6 +1360,7 @@ static int process_grq(struct sk_buff *skb, struct nf_conn *ct,
/****************************************************************************/
static int process_gcf(struct sk_buff *skb, struct nf_conn *ct,
enum ip_conntrack_info ctinfo,
+ unsigned int protoff,
unsigned char **data, GatekeeperConfirm *gcf)
{
int dir = CTINFO2DIR(ctinfo);
@@ -1359,6 +1405,7 @@ static int process_gcf(struct sk_buff *skb, struct nf_conn *ct,
/****************************************************************************/
static int process_rrq(struct sk_buff *skb, struct nf_conn *ct,
enum ip_conntrack_info ctinfo,
+ unsigned int protoff,
unsigned char **data, RegistrationRequest *rrq)
{
struct nf_ct_h323_master *info = nfct_help_data(ct);
@@ -1367,15 +1414,16 @@ static int process_rrq(struct sk_buff *skb, struct nf_conn *ct,
pr_debug("nf_ct_ras: RRQ\n");
- ret = expect_q931(skb, ct, ctinfo, data,
+ ret = expect_q931(skb, ct, ctinfo, protoff, data,
rrq->callSignalAddress.item,
rrq->callSignalAddress.count);
if (ret < 0)
return -1;
set_ras_addr = rcu_dereference(set_ras_addr_hook);
- if (set_ras_addr && ct->status & IPS_NAT_MASK) {
- ret = set_ras_addr(skb, ct, ctinfo, data,
+ if (set_ras_addr && nf_ct_l3num(ct) == NFPROTO_IPV4 &&
+ ct->status & IPS_NAT_MASK) {
+ ret = set_ras_addr(skb, ct, ctinfo, protoff, data,
rrq->rasAddress.item,
rrq->rasAddress.count);
if (ret < 0)
@@ -1394,6 +1442,7 @@ static int process_rrq(struct sk_buff *skb, struct nf_conn *ct,
/****************************************************************************/
static int process_rcf(struct sk_buff *skb, struct nf_conn *ct,
enum ip_conntrack_info ctinfo,
+ unsigned int protoff,
unsigned char **data, RegistrationConfirm *rcf)
{
struct nf_ct_h323_master *info = nfct_help_data(ct);
@@ -1405,8 +1454,9 @@ static int process_rcf(struct sk_buff *skb, struct nf_conn *ct,
pr_debug("nf_ct_ras: RCF\n");
set_sig_addr = rcu_dereference(set_sig_addr_hook);
- if (set_sig_addr && ct->status & IPS_NAT_MASK) {
- ret = set_sig_addr(skb, ct, ctinfo, data,
+ if (set_sig_addr && nf_ct_l3num(ct) == NFPROTO_IPV4 &&
+ ct->status & IPS_NAT_MASK) {
+ ret = set_sig_addr(skb, ct, ctinfo, protoff, data,
rcf->callSignalAddress.item,
rcf->callSignalAddress.count);
if (ret < 0)
@@ -1443,6 +1493,7 @@ static int process_rcf(struct sk_buff *skb, struct nf_conn *ct,
/****************************************************************************/
static int process_urq(struct sk_buff *skb, struct nf_conn *ct,
enum ip_conntrack_info ctinfo,
+ unsigned int protoff,
unsigned char **data, UnregistrationRequest *urq)
{
struct nf_ct_h323_master *info = nfct_help_data(ct);
@@ -1453,8 +1504,9 @@ static int process_urq(struct sk_buff *skb, struct nf_conn *ct,
pr_debug("nf_ct_ras: URQ\n");
set_sig_addr = rcu_dereference(set_sig_addr_hook);
- if (set_sig_addr && ct->status & IPS_NAT_MASK) {
- ret = set_sig_addr(skb, ct, ctinfo, data,
+ if (set_sig_addr && nf_ct_l3num(ct) == NFPROTO_IPV4 &&
+ ct->status & IPS_NAT_MASK) {
+ ret = set_sig_addr(skb, ct, ctinfo, protoff, data,
urq->callSignalAddress.item,
urq->callSignalAddress.count);
if (ret < 0)
@@ -1475,6 +1527,7 @@ static int process_urq(struct sk_buff *skb, struct nf_conn *ct,
/****************************************************************************/
static int process_arq(struct sk_buff *skb, struct nf_conn *ct,
enum ip_conntrack_info ctinfo,
+ unsigned int protoff,
unsigned char **data, AdmissionRequest *arq)
{
const struct nf_ct_h323_master *info = nfct_help_data(ct);
@@ -1491,9 +1544,10 @@ static int process_arq(struct sk_buff *skb, struct nf_conn *ct,
&addr, &port) &&
!memcmp(&addr, &ct->tuplehash[dir].tuple.src.u3, sizeof(addr)) &&
port == info->sig_port[dir] &&
+ nf_ct_l3num(ct) == NFPROTO_IPV4 &&
set_h225_addr && ct->status & IPS_NAT_MASK) {
/* Answering ARQ */
- return set_h225_addr(skb, data, 0,
+ return set_h225_addr(skb, protoff, data, 0,
&arq->destCallSignalAddress,
&ct->tuplehash[!dir].tuple.dst.u3,
info->sig_port[!dir]);
@@ -1503,9 +1557,10 @@ static int process_arq(struct sk_buff *skb, struct nf_conn *ct,
get_h225_addr(ct, *data, &arq->srcCallSignalAddress,
&addr, &port) &&
!memcmp(&addr, &ct->tuplehash[dir].tuple.src.u3, sizeof(addr)) &&
- set_h225_addr && ct->status & IPS_NAT_MASK) {
+ set_h225_addr && nf_ct_l3num(ct) == NFPROTO_IPV4 &&
+ ct->status & IPS_NAT_MASK) {
/* Calling ARQ */
- return set_h225_addr(skb, data, 0,
+ return set_h225_addr(skb, protoff, data, 0,
&arq->srcCallSignalAddress,
&ct->tuplehash[!dir].tuple.dst.u3,
port);
@@ -1517,6 +1572,7 @@ static int process_arq(struct sk_buff *skb, struct nf_conn *ct,
/****************************************************************************/
static int process_acf(struct sk_buff *skb, struct nf_conn *ct,
enum ip_conntrack_info ctinfo,
+ unsigned int protoff,
unsigned char **data, AdmissionConfirm *acf)
{
int dir = CTINFO2DIR(ctinfo);
@@ -1535,8 +1591,9 @@ static int process_acf(struct sk_buff *skb, struct nf_conn *ct,
if (!memcmp(&addr, &ct->tuplehash[dir].tuple.dst.u3, sizeof(addr))) {
/* Answering ACF */
set_sig_addr = rcu_dereference(set_sig_addr_hook);
- if (set_sig_addr && ct->status & IPS_NAT_MASK)
- return set_sig_addr(skb, ct, ctinfo, data,
+ if (set_sig_addr && nf_ct_l3num(ct) == NFPROTO_IPV4 &&
+ ct->status & IPS_NAT_MASK)
+ return set_sig_addr(skb, ct, ctinfo, protoff, data,
&acf->destCallSignalAddress, 1);
return 0;
}
@@ -1564,6 +1621,7 @@ static int process_acf(struct sk_buff *skb, struct nf_conn *ct,
/****************************************************************************/
static int process_lrq(struct sk_buff *skb, struct nf_conn *ct,
enum ip_conntrack_info ctinfo,
+ unsigned int protoff,
unsigned char **data, LocationRequest *lrq)
{
typeof(set_ras_addr_hook) set_ras_addr;
@@ -1571,8 +1629,9 @@ static int process_lrq(struct sk_buff *skb, struct nf_conn *ct,
pr_debug("nf_ct_ras: LRQ\n");
set_ras_addr = rcu_dereference(set_ras_addr_hook);
- if (set_ras_addr && ct->status & IPS_NAT_MASK)
- return set_ras_addr(skb, ct, ctinfo, data,
+ if (set_ras_addr && nf_ct_l3num(ct) == NFPROTO_IPV4 &&
+ ct->status & IPS_NAT_MASK)
+ return set_ras_addr(skb, ct, ctinfo, protoff, data,
&lrq->replyAddress, 1);
return 0;
}
@@ -1580,6 +1639,7 @@ static int process_lrq(struct sk_buff *skb, struct nf_conn *ct,
/****************************************************************************/
static int process_lcf(struct sk_buff *skb, struct nf_conn *ct,
enum ip_conntrack_info ctinfo,
+ unsigned int protoff,
unsigned char **data, LocationConfirm *lcf)
{
int dir = CTINFO2DIR(ctinfo);
@@ -1619,6 +1679,7 @@ static int process_lcf(struct sk_buff *skb, struct nf_conn *ct,
/****************************************************************************/
static int process_irr(struct sk_buff *skb, struct nf_conn *ct,
enum ip_conntrack_info ctinfo,
+ unsigned int protoff,
unsigned char **data, InfoRequestResponse *irr)
{
int ret;
@@ -1628,16 +1689,18 @@ static int process_irr(struct sk_buff *skb, struct nf_conn *ct,
pr_debug("nf_ct_ras: IRR\n");
set_ras_addr = rcu_dereference(set_ras_addr_hook);
- if (set_ras_addr && ct->status & IPS_NAT_MASK) {
- ret = set_ras_addr(skb, ct, ctinfo, data,
+ if (set_ras_addr && nf_ct_l3num(ct) == NFPROTO_IPV4 &&
+ ct->status & IPS_NAT_MASK) {
+ ret = set_ras_addr(skb, ct, ctinfo, protoff, data,
&irr->rasAddress, 1);
if (ret < 0)
return -1;
}
set_sig_addr = rcu_dereference(set_sig_addr_hook);
- if (set_sig_addr && ct->status & IPS_NAT_MASK) {
- ret = set_sig_addr(skb, ct, ctinfo, data,
+ if (set_sig_addr && nf_ct_l3num(ct) == NFPROTO_IPV4 &&
+ ct->status & IPS_NAT_MASK) {
+ ret = set_sig_addr(skb, ct, ctinfo, protoff, data,
irr->callSignalAddress.item,
irr->callSignalAddress.count);
if (ret < 0)
@@ -1650,38 +1713,39 @@ static int process_irr(struct sk_buff *skb, struct nf_conn *ct,
/****************************************************************************/
static int process_ras(struct sk_buff *skb, struct nf_conn *ct,
enum ip_conntrack_info ctinfo,
+ unsigned int protoff,
unsigned char **data, RasMessage *ras)
{
switch (ras->choice) {
case eRasMessage_gatekeeperRequest:
- return process_grq(skb, ct, ctinfo, data,
+ return process_grq(skb, ct, ctinfo, protoff, data,
&ras->gatekeeperRequest);
case eRasMessage_gatekeeperConfirm:
- return process_gcf(skb, ct, ctinfo, data,
+ return process_gcf(skb, ct, ctinfo, protoff, data,
&ras->gatekeeperConfirm);
case eRasMessage_registrationRequest:
- return process_rrq(skb, ct, ctinfo, data,
+ return process_rrq(skb, ct, ctinfo, protoff, data,
&ras->registrationRequest);
case eRasMessage_registrationConfirm:
- return process_rcf(skb, ct, ctinfo, data,
+ return process_rcf(skb, ct, ctinfo, protoff, data,
&ras->registrationConfirm);
case eRasMessage_unregistrationRequest:
- return process_urq(skb, ct, ctinfo, data,
+ return process_urq(skb, ct, ctinfo, protoff, data,
&ras->unregistrationRequest);
case eRasMessage_admissionRequest:
- return process_arq(skb, ct, ctinfo, data,
+ return process_arq(skb, ct, ctinfo, protoff, data,
&ras->admissionRequest);
case eRasMessage_admissionConfirm:
- return process_acf(skb, ct, ctinfo, data,
+ return process_acf(skb, ct, ctinfo, protoff, data,
&ras->admissionConfirm);
case eRasMessage_locationRequest:
- return process_lrq(skb, ct, ctinfo, data,
+ return process_lrq(skb, ct, ctinfo, protoff, data,
&ras->locationRequest);
case eRasMessage_locationConfirm:
- return process_lcf(skb, ct, ctinfo, data,
+ return process_lcf(skb, ct, ctinfo, protoff, data,
&ras->locationConfirm);
case eRasMessage_infoRequestResponse:
- return process_irr(skb, ct, ctinfo, data,
+ return process_irr(skb, ct, ctinfo, protoff, data,
&ras->infoRequestResponse);
default:
pr_debug("nf_ct_ras: RAS message %d\n", ras->choice);
@@ -1721,7 +1785,7 @@ static int ras_help(struct sk_buff *skb, unsigned int protoff,
}
/* Process RAS message */
- if (process_ras(skb, ct, ctinfo, &data, &ras) < 0)
+ if (process_ras(skb, ct, ctinfo, protoff, &data, &ras) < 0)
goto drop;
accept:
diff --git a/net/netfilter/nf_conntrack_irc.c b/net/netfilter/nf_conntrack_irc.c
index 009c52cfd1ec..3b20aa77cfc8 100644
--- a/net/netfilter/nf_conntrack_irc.c
+++ b/net/netfilter/nf_conntrack_irc.c
@@ -33,6 +33,7 @@ static DEFINE_SPINLOCK(irc_buffer_lock);
unsigned int (*nf_nat_irc_hook)(struct sk_buff *skb,
enum ip_conntrack_info ctinfo,
+ unsigned int protoff,
unsigned int matchoff,
unsigned int matchlen,
struct nf_conntrack_expect *exp) __read_mostly;
@@ -205,7 +206,7 @@ static int help(struct sk_buff *skb, unsigned int protoff,
nf_nat_irc = rcu_dereference(nf_nat_irc_hook);
if (nf_nat_irc && ct->status & IPS_NAT_MASK)
- ret = nf_nat_irc(skb, ctinfo,
+ ret = nf_nat_irc(skb, ctinfo, protoff,
addr_beg_p - ib_ptr,
addr_end_p - addr_beg_p,
exp);
diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c
index 14f67a2cbcb5..7bbfb3deea30 100644
--- a/net/netfilter/nf_conntrack_netlink.c
+++ b/net/netfilter/nf_conntrack_netlink.c
@@ -45,7 +45,7 @@
#include <net/netfilter/nf_conntrack_timestamp.h>
#ifdef CONFIG_NF_NAT_NEEDED
#include <net/netfilter/nf_nat_core.h>
-#include <net/netfilter/nf_nat_protocol.h>
+#include <net/netfilter/nf_nat_l4proto.h>
#include <net/netfilter/nf_nat_helper.h>
#endif
@@ -418,16 +418,16 @@ nla_put_failure:
}
static int
-ctnetlink_fill_info(struct sk_buff *skb, u32 pid, u32 seq, u32 type,
+ctnetlink_fill_info(struct sk_buff *skb, u32 portid, u32 seq, u32 type,
struct nf_conn *ct)
{
struct nlmsghdr *nlh;
struct nfgenmsg *nfmsg;
struct nlattr *nest_parms;
- unsigned int flags = pid ? NLM_F_MULTI : 0, event;
+ unsigned int flags = portid ? NLM_F_MULTI : 0, event;
event = (NFNL_SUBSYS_CTNETLINK << 8 | IPCTNL_MSG_CT_NEW);
- nlh = nlmsg_put(skb, pid, seq, event, sizeof(*nfmsg), flags);
+ nlh = nlmsg_put(skb, portid, seq, event, sizeof(*nfmsg), flags);
if (nlh == NULL)
goto nlmsg_failure;
@@ -604,7 +604,7 @@ ctnetlink_conntrack_event(unsigned int events, struct nf_ct_event *item)
goto errout;
type |= NFNL_SUBSYS_CTNETLINK << 8;
- nlh = nlmsg_put(skb, item->pid, 0, type, sizeof(*nfmsg), flags);
+ nlh = nlmsg_put(skb, item->portid, 0, type, sizeof(*nfmsg), flags);
if (nlh == NULL)
goto nlmsg_failure;
@@ -680,7 +680,7 @@ ctnetlink_conntrack_event(unsigned int events, struct nf_ct_event *item)
rcu_read_unlock();
nlmsg_end(skb, nlh);
- err = nfnetlink_send(skb, net, item->pid, group, item->report,
+ err = nfnetlink_send(skb, net, item->portid, group, item->report,
GFP_ATOMIC);
if (err == -ENOBUFS || err == -EAGAIN)
return -ENOBUFS;
@@ -757,7 +757,7 @@ restart:
#endif
rcu_read_lock();
res =
- ctnetlink_fill_info(skb, NETLINK_CB(cb->skb).pid,
+ ctnetlink_fill_info(skb, NETLINK_CB(cb->skb).portid,
cb->nlh->nlmsg_seq,
NFNL_MSG_TYPE(cb->nlh->nlmsg_type),
ct);
@@ -961,7 +961,7 @@ ctnetlink_del_conntrack(struct sock *ctnl, struct sk_buff *skb,
else {
/* Flush the whole table */
nf_conntrack_flush_report(net,
- NETLINK_CB(skb).pid,
+ NETLINK_CB(skb).portid,
nlmsg_report(nlh));
return 0;
}
@@ -985,7 +985,7 @@ ctnetlink_del_conntrack(struct sock *ctnl, struct sk_buff *skb,
if (del_timer(&ct->timeout)) {
if (nf_conntrack_event_report(IPCT_DESTROY, ct,
- NETLINK_CB(skb).pid,
+ NETLINK_CB(skb).portid,
nlmsg_report(nlh)) < 0) {
nf_ct_delete_from_lists(ct);
/* we failed to report the event, try later */
@@ -1069,14 +1069,14 @@ ctnetlink_get_conntrack(struct sock *ctnl, struct sk_buff *skb,
}
rcu_read_lock();
- err = ctnetlink_fill_info(skb2, NETLINK_CB(skb).pid, nlh->nlmsg_seq,
+ err = ctnetlink_fill_info(skb2, NETLINK_CB(skb).portid, nlh->nlmsg_seq,
NFNL_MSG_TYPE(nlh->nlmsg_type), ct);
rcu_read_unlock();
nf_ct_put(ct);
if (err <= 0)
goto free;
- err = netlink_unicast(ctnl, skb2, NETLINK_CB(skb).pid, MSG_DONTWAIT);
+ err = netlink_unicast(ctnl, skb2, NETLINK_CB(skb).portid, MSG_DONTWAIT);
if (err < 0)
goto out;
@@ -1096,13 +1096,14 @@ ctnetlink_parse_nat_setup(struct nf_conn *ct,
const struct nlattr *attr)
{
typeof(nfnetlink_parse_nat_setup_hook) parse_nat_setup;
+ int err;
parse_nat_setup = rcu_dereference(nfnetlink_parse_nat_setup_hook);
if (!parse_nat_setup) {
#ifdef CONFIG_MODULES
rcu_read_unlock();
nfnl_unlock();
- if (request_module("nf-nat-ipv4") < 0) {
+ if (request_module("nf-nat") < 0) {
nfnl_lock();
rcu_read_lock();
return -EOPNOTSUPP;
@@ -1115,7 +1116,23 @@ ctnetlink_parse_nat_setup(struct nf_conn *ct,
return -EOPNOTSUPP;
}
- return parse_nat_setup(ct, manip, attr);
+ err = parse_nat_setup(ct, manip, attr);
+ if (err == -EAGAIN) {
+#ifdef CONFIG_MODULES
+ rcu_read_unlock();
+ nfnl_unlock();
+ if (request_module("nf-nat-%u", nf_ct_l3num(ct)) < 0) {
+ nfnl_lock();
+ rcu_read_lock();
+ return -EOPNOTSUPP;
+ }
+ nfnl_lock();
+ rcu_read_lock();
+#else
+ err = -EOPNOTSUPP;
+#endif
+ }
+ return err;
}
#endif
@@ -1221,7 +1238,7 @@ ctnetlink_change_helper(struct nf_conn *ct, const struct nlattr * const cda[])
if (help) {
if (help->helper == helper) {
/* update private helper data if allowed. */
- if (helper->from_nlattr && helpinfo)
+ if (helper->from_nlattr)
helper->from_nlattr(helpinfo, ct);
return 0;
} else
@@ -1450,7 +1467,7 @@ ctnetlink_create_conntrack(struct net *net, u16 zone,
goto err2;
}
/* set private helper data if allowed. */
- if (helper->from_nlattr && helpinfo)
+ if (helper->from_nlattr)
helper->from_nlattr(helpinfo, ct);
/* not in hash table yet so not strictly necessary */
@@ -1596,7 +1613,7 @@ ctnetlink_new_conntrack(struct sock *ctnl, struct sk_buff *skb,
(1 << IPCT_PROTOINFO) |
(1 << IPCT_NATSEQADJ) |
(1 << IPCT_MARK) | events,
- ct, NETLINK_CB(skb).pid,
+ ct, NETLINK_CB(skb).portid,
nlmsg_report(nlh));
nf_ct_put(ct);
}
@@ -1618,7 +1635,7 @@ ctnetlink_new_conntrack(struct sock *ctnl, struct sk_buff *skb,
(1 << IPCT_PROTOINFO) |
(1 << IPCT_NATSEQADJ) |
(1 << IPCT_MARK),
- ct, NETLINK_CB(skb).pid,
+ ct, NETLINK_CB(skb).portid,
nlmsg_report(nlh));
}
}
@@ -1628,15 +1645,15 @@ ctnetlink_new_conntrack(struct sock *ctnl, struct sk_buff *skb,
}
static int
-ctnetlink_ct_stat_cpu_fill_info(struct sk_buff *skb, u32 pid, u32 seq,
+ctnetlink_ct_stat_cpu_fill_info(struct sk_buff *skb, u32 portid, u32 seq,
__u16 cpu, const struct ip_conntrack_stat *st)
{
struct nlmsghdr *nlh;
struct nfgenmsg *nfmsg;
- unsigned int flags = pid ? NLM_F_MULTI : 0, event;
+ unsigned int flags = portid ? NLM_F_MULTI : 0, event;
event = (NFNL_SUBSYS_CTNETLINK << 8 | IPCTNL_MSG_CT_GET_STATS_CPU);
- nlh = nlmsg_put(skb, pid, seq, event, sizeof(*nfmsg), flags);
+ nlh = nlmsg_put(skb, portid, seq, event, sizeof(*nfmsg), flags);
if (nlh == NULL)
goto nlmsg_failure;
@@ -1688,7 +1705,7 @@ ctnetlink_ct_stat_cpu_dump(struct sk_buff *skb, struct netlink_callback *cb)
st = per_cpu_ptr(net->ct.stat, cpu);
if (ctnetlink_ct_stat_cpu_fill_info(skb,
- NETLINK_CB(cb->skb).pid,
+ NETLINK_CB(cb->skb).portid,
cb->nlh->nlmsg_seq,
cpu, st) < 0)
break;
@@ -1714,16 +1731,16 @@ ctnetlink_stat_ct_cpu(struct sock *ctnl, struct sk_buff *skb,
}
static int
-ctnetlink_stat_ct_fill_info(struct sk_buff *skb, u32 pid, u32 seq, u32 type,
+ctnetlink_stat_ct_fill_info(struct sk_buff *skb, u32 portid, u32 seq, u32 type,
struct net *net)
{
struct nlmsghdr *nlh;
struct nfgenmsg *nfmsg;
- unsigned int flags = pid ? NLM_F_MULTI : 0, event;
+ unsigned int flags = portid ? NLM_F_MULTI : 0, event;
unsigned int nr_conntracks = atomic_read(&net->ct.count);
event = (NFNL_SUBSYS_CTNETLINK << 8 | IPCTNL_MSG_CT_GET_STATS);
- nlh = nlmsg_put(skb, pid, seq, event, sizeof(*nfmsg), flags);
+ nlh = nlmsg_put(skb, portid, seq, event, sizeof(*nfmsg), flags);
if (nlh == NULL)
goto nlmsg_failure;
@@ -1756,14 +1773,14 @@ ctnetlink_stat_ct(struct sock *ctnl, struct sk_buff *skb,
if (skb2 == NULL)
return -ENOMEM;
- err = ctnetlink_stat_ct_fill_info(skb2, NETLINK_CB(skb).pid,
+ err = ctnetlink_stat_ct_fill_info(skb2, NETLINK_CB(skb).portid,
nlh->nlmsg_seq,
NFNL_MSG_TYPE(nlh->nlmsg_type),
sock_net(skb->sk));
if (err <= 0)
goto free;
- err = netlink_unicast(ctnl, skb2, NETLINK_CB(skb).pid, MSG_DONTWAIT);
+ err = netlink_unicast(ctnl, skb2, NETLINK_CB(skb).portid, MSG_DONTWAIT);
if (err < 0)
goto out;
@@ -1896,10 +1913,15 @@ static int
ctnetlink_nfqueue_parse(const struct nlattr *attr, struct nf_conn *ct)
{
struct nlattr *cda[CTA_MAX+1];
+ int ret;
nla_parse_nested(cda, CTA_MAX, attr, ct_nla_policy);
- return ctnetlink_nfqueue_parse_ct((const struct nlattr **)cda, ct);
+ spin_lock_bh(&nf_conntrack_lock);
+ ret = ctnetlink_nfqueue_parse_ct((const struct nlattr **)cda, ct);
+ spin_unlock_bh(&nf_conntrack_lock);
+
+ return ret;
}
static struct nfq_ct_hook ctnetlink_nfqueue_hook = {
@@ -1974,6 +1996,8 @@ nla_put_failure:
return -1;
}
+static const union nf_inet_addr any_addr;
+
static int
ctnetlink_exp_dump_expect(struct sk_buff *skb,
const struct nf_conntrack_expect *exp)
@@ -2000,7 +2024,8 @@ ctnetlink_exp_dump_expect(struct sk_buff *skb,
goto nla_put_failure;
#ifdef CONFIG_NF_NAT_NEEDED
- if (exp->saved_ip || exp->saved_proto.all) {
+ if (!nf_inet_addr_cmp(&exp->saved_addr, &any_addr) ||
+ exp->saved_proto.all) {
nest_parms = nla_nest_start(skb, CTA_EXPECT_NAT | NLA_F_NESTED);
if (!nest_parms)
goto nla_put_failure;
@@ -2009,7 +2034,7 @@ ctnetlink_exp_dump_expect(struct sk_buff *skb,
goto nla_put_failure;
nat_tuple.src.l3num = nf_ct_l3num(master);
- nat_tuple.src.u3.ip = exp->saved_ip;
+ nat_tuple.src.u3 = exp->saved_addr;
nat_tuple.dst.protonum = nf_ct_protonum(master);
nat_tuple.src.u = exp->saved_proto;
@@ -2045,15 +2070,15 @@ nla_put_failure:
}
static int
-ctnetlink_exp_fill_info(struct sk_buff *skb, u32 pid, u32 seq,
+ctnetlink_exp_fill_info(struct sk_buff *skb, u32 portid, u32 seq,
int event, const struct nf_conntrack_expect *exp)
{
struct nlmsghdr *nlh;
struct nfgenmsg *nfmsg;
- unsigned int flags = pid ? NLM_F_MULTI : 0;
+ unsigned int flags = portid ? NLM_F_MULTI : 0;
event |= NFNL_SUBSYS_CTNETLINK_EXP << 8;
- nlh = nlmsg_put(skb, pid, seq, event, sizeof(*nfmsg), flags);
+ nlh = nlmsg_put(skb, portid, seq, event, sizeof(*nfmsg), flags);
if (nlh == NULL)
goto nlmsg_failure;
@@ -2104,7 +2129,7 @@ ctnetlink_expect_event(unsigned int events, struct nf_exp_event *item)
goto errout;
type |= NFNL_SUBSYS_CTNETLINK_EXP << 8;
- nlh = nlmsg_put(skb, item->pid, 0, type, sizeof(*nfmsg), flags);
+ nlh = nlmsg_put(skb, item->portid, 0, type, sizeof(*nfmsg), flags);
if (nlh == NULL)
goto nlmsg_failure;
@@ -2119,7 +2144,7 @@ ctnetlink_expect_event(unsigned int events, struct nf_exp_event *item)
rcu_read_unlock();
nlmsg_end(skb, nlh);
- nfnetlink_send(skb, net, item->pid, group, item->report, GFP_ATOMIC);
+ nfnetlink_send(skb, net, item->portid, group, item->report, GFP_ATOMIC);
return 0;
nla_put_failure:
@@ -2162,7 +2187,7 @@ restart:
cb->args[1] = 0;
}
if (ctnetlink_exp_fill_info(skb,
- NETLINK_CB(cb->skb).pid,
+ NETLINK_CB(cb->skb).portid,
cb->nlh->nlmsg_seq,
IPCTNL_MSG_EXP_NEW,
exp) < 0) {
@@ -2255,14 +2280,14 @@ ctnetlink_get_expect(struct sock *ctnl, struct sk_buff *skb,
}
rcu_read_lock();
- err = ctnetlink_exp_fill_info(skb2, NETLINK_CB(skb).pid,
+ err = ctnetlink_exp_fill_info(skb2, NETLINK_CB(skb).portid,
nlh->nlmsg_seq, IPCTNL_MSG_EXP_NEW, exp);
rcu_read_unlock();
nf_ct_expect_put(exp);
if (err <= 0)
goto free;
- err = netlink_unicast(ctnl, skb2, NETLINK_CB(skb).pid, MSG_DONTWAIT);
+ err = netlink_unicast(ctnl, skb2, NETLINK_CB(skb).portid, MSG_DONTWAIT);
if (err < 0)
goto out;
@@ -2316,7 +2341,7 @@ ctnetlink_del_expect(struct sock *ctnl, struct sk_buff *skb,
/* after list removal, usage count == 1 */
spin_lock_bh(&nf_conntrack_lock);
if (del_timer(&exp->timeout)) {
- nf_ct_unlink_expect_report(exp, NETLINK_CB(skb).pid,
+ nf_ct_unlink_expect_report(exp, NETLINK_CB(skb).portid,
nlmsg_report(nlh));
nf_ct_expect_put(exp);
}
@@ -2338,7 +2363,7 @@ ctnetlink_del_expect(struct sock *ctnl, struct sk_buff *skb,
if (!strcmp(m_help->helper->name, name) &&
del_timer(&exp->timeout)) {
nf_ct_unlink_expect_report(exp,
- NETLINK_CB(skb).pid,
+ NETLINK_CB(skb).portid,
nlmsg_report(nlh));
nf_ct_expect_put(exp);
}
@@ -2354,7 +2379,7 @@ ctnetlink_del_expect(struct sock *ctnl, struct sk_buff *skb,
hnode) {
if (del_timer(&exp->timeout)) {
nf_ct_unlink_expect_report(exp,
- NETLINK_CB(skb).pid,
+ NETLINK_CB(skb).portid,
nlmsg_report(nlh));
nf_ct_expect_put(exp);
}
@@ -2405,7 +2430,7 @@ ctnetlink_parse_expect_nat(const struct nlattr *attr,
if (err < 0)
return err;
- exp->saved_ip = nat_tuple.src.u3.ip;
+ exp->saved_addr = nat_tuple.src.u3;
exp->saved_proto = nat_tuple.src.u;
exp->dir = ntohl(nla_get_be32(tb[CTA_EXPECT_NAT_DIR]));
@@ -2419,7 +2444,7 @@ static int
ctnetlink_create_expect(struct net *net, u16 zone,
const struct nlattr * const cda[],
u_int8_t u3,
- u32 pid, int report)
+ u32 portid, int report)
{
struct nf_conntrack_tuple tuple, mask, master_tuple;
struct nf_conntrack_tuple_hash *h = NULL;
@@ -2532,7 +2557,7 @@ ctnetlink_create_expect(struct net *net, u16 zone,
if (err < 0)
goto err_out;
}
- err = nf_ct_expect_related_report(exp, pid, report);
+ err = nf_ct_expect_related_report(exp, portid, report);
err_out:
nf_ct_expect_put(exp);
out:
@@ -2575,7 +2600,7 @@ ctnetlink_new_expect(struct sock *ctnl, struct sk_buff *skb,
if (nlh->nlmsg_flags & NLM_F_CREATE) {
err = ctnetlink_create_expect(net, zone, cda,
u3,
- NETLINK_CB(skb).pid,
+ NETLINK_CB(skb).portid,
nlmsg_report(nlh));
}
return err;
@@ -2590,15 +2615,15 @@ ctnetlink_new_expect(struct sock *ctnl, struct sk_buff *skb,
}
static int
-ctnetlink_exp_stat_fill_info(struct sk_buff *skb, u32 pid, u32 seq, int cpu,
+ctnetlink_exp_stat_fill_info(struct sk_buff *skb, u32 portid, u32 seq, int cpu,
const struct ip_conntrack_stat *st)
{
struct nlmsghdr *nlh;
struct nfgenmsg *nfmsg;
- unsigned int flags = pid ? NLM_F_MULTI : 0, event;
+ unsigned int flags = portid ? NLM_F_MULTI : 0, event;
event = (NFNL_SUBSYS_CTNETLINK << 8 | IPCTNL_MSG_EXP_GET_STATS_CPU);
- nlh = nlmsg_put(skb, pid, seq, event, sizeof(*nfmsg), flags);
+ nlh = nlmsg_put(skb, portid, seq, event, sizeof(*nfmsg), flags);
if (nlh == NULL)
goto nlmsg_failure;
@@ -2637,7 +2662,7 @@ ctnetlink_exp_stat_cpu_dump(struct sk_buff *skb, struct netlink_callback *cb)
continue;
st = per_cpu_ptr(net->ct.stat, cpu);
- if (ctnetlink_exp_stat_fill_info(skb, NETLINK_CB(cb->skb).pid,
+ if (ctnetlink_exp_stat_fill_info(skb, NETLINK_CB(cb->skb).portid,
cb->nlh->nlmsg_seq,
cpu, st) < 0)
break;
@@ -2785,7 +2810,8 @@ static int __init ctnetlink_init(void)
goto err_unreg_subsys;
}
- if (register_pernet_subsys(&ctnetlink_net_ops)) {
+ ret = register_pernet_subsys(&ctnetlink_net_ops);
+ if (ret < 0) {
pr_err("ctnetlink_init: cannot register pernet operations\n");
goto err_unreg_exp_subsys;
}
diff --git a/net/netfilter/nf_conntrack_pptp.c b/net/netfilter/nf_conntrack_pptp.c
index 6fed9ec35248..cc7669ef0b95 100644
--- a/net/netfilter/nf_conntrack_pptp.c
+++ b/net/netfilter/nf_conntrack_pptp.c
@@ -45,14 +45,14 @@ static DEFINE_SPINLOCK(nf_pptp_lock);
int
(*nf_nat_pptp_hook_outbound)(struct sk_buff *skb,
struct nf_conn *ct, enum ip_conntrack_info ctinfo,
- struct PptpControlHeader *ctlh,
+ unsigned int protoff, struct PptpControlHeader *ctlh,
union pptp_ctrl_union *pptpReq) __read_mostly;
EXPORT_SYMBOL_GPL(nf_nat_pptp_hook_outbound);
int
(*nf_nat_pptp_hook_inbound)(struct sk_buff *skb,
struct nf_conn *ct, enum ip_conntrack_info ctinfo,
- struct PptpControlHeader *ctlh,
+ unsigned int protoff, struct PptpControlHeader *ctlh,
union pptp_ctrl_union *pptpReq) __read_mostly;
EXPORT_SYMBOL_GPL(nf_nat_pptp_hook_inbound);
@@ -262,7 +262,7 @@ out_unexpect_orig:
}
static inline int
-pptp_inbound_pkt(struct sk_buff *skb,
+pptp_inbound_pkt(struct sk_buff *skb, unsigned int protoff,
struct PptpControlHeader *ctlh,
union pptp_ctrl_union *pptpReq,
unsigned int reqlen,
@@ -376,7 +376,8 @@ pptp_inbound_pkt(struct sk_buff *skb,
nf_nat_pptp_inbound = rcu_dereference(nf_nat_pptp_hook_inbound);
if (nf_nat_pptp_inbound && ct->status & IPS_NAT_MASK)
- return nf_nat_pptp_inbound(skb, ct, ctinfo, ctlh, pptpReq);
+ return nf_nat_pptp_inbound(skb, ct, ctinfo,
+ protoff, ctlh, pptpReq);
return NF_ACCEPT;
invalid:
@@ -389,7 +390,7 @@ invalid:
}
static inline int
-pptp_outbound_pkt(struct sk_buff *skb,
+pptp_outbound_pkt(struct sk_buff *skb, unsigned int protoff,
struct PptpControlHeader *ctlh,
union pptp_ctrl_union *pptpReq,
unsigned int reqlen,
@@ -471,7 +472,8 @@ pptp_outbound_pkt(struct sk_buff *skb,
nf_nat_pptp_outbound = rcu_dereference(nf_nat_pptp_hook_outbound);
if (nf_nat_pptp_outbound && ct->status & IPS_NAT_MASK)
- return nf_nat_pptp_outbound(skb, ct, ctinfo, ctlh, pptpReq);
+ return nf_nat_pptp_outbound(skb, ct, ctinfo,
+ protoff, ctlh, pptpReq);
return NF_ACCEPT;
invalid:
@@ -570,11 +572,11 @@ conntrack_pptp_help(struct sk_buff *skb, unsigned int protoff,
* established from PNS->PAC. However, RFC makes no guarantee */
if (dir == IP_CT_DIR_ORIGINAL)
/* client -> server (PNS -> PAC) */
- ret = pptp_outbound_pkt(skb, ctlh, pptpReq, reqlen, ct,
+ ret = pptp_outbound_pkt(skb, protoff, ctlh, pptpReq, reqlen, ct,
ctinfo);
else
/* server -> client (PAC -> PNS) */
- ret = pptp_inbound_pkt(skb, ctlh, pptpReq, reqlen, ct,
+ ret = pptp_inbound_pkt(skb, protoff, ctlh, pptpReq, reqlen, ct,
ctinfo);
pr_debug("sstate: %d->%d, cstate: %d->%d\n",
oldsstate, info->sstate, oldcstate, info->cstate);
diff --git a/net/netfilter/nf_conntrack_proto.c b/net/netfilter/nf_conntrack_proto.c
index 0dc63854390f..51e928db48c8 100644
--- a/net/netfilter/nf_conntrack_proto.c
+++ b/net/netfilter/nf_conntrack_proto.c
@@ -21,7 +21,6 @@
#include <linux/notifier.h>
#include <linux/kernel.h>
#include <linux/netdevice.h>
-#include <linux/rtnetlink.h>
#include <net/netfilter/nf_conntrack.h>
#include <net/netfilter/nf_conntrack_l3proto.h>
@@ -294,9 +293,7 @@ void nf_conntrack_l3proto_unregister(struct net *net,
nf_ct_l3proto_unregister_sysctl(net, proto);
/* Remove all contrack entries for this protocol */
- rtnl_lock();
nf_ct_iterate_cleanup(net, kill_l3proto, proto);
- rtnl_unlock();
}
EXPORT_SYMBOL_GPL(nf_conntrack_l3proto_unregister);
@@ -502,9 +499,7 @@ void nf_conntrack_l4proto_unregister(struct net *net,
nf_ct_l4proto_unregister_sysctl(net, pn, l4proto);
/* Remove all contrack entries for this protocol */
- rtnl_lock();
nf_ct_iterate_cleanup(net, kill_l4proto, l4proto);
- rtnl_unlock();
}
EXPORT_SYMBOL_GPL(nf_conntrack_l4proto_unregister);
diff --git a/net/netfilter/nf_conntrack_proto_tcp.c b/net/netfilter/nf_conntrack_proto_tcp.c
index a5ac11ebef33..61f9285111d1 100644
--- a/net/netfilter/nf_conntrack_proto_tcp.c
+++ b/net/netfilter/nf_conntrack_proto_tcp.c
@@ -158,21 +158,18 @@ static const u8 tcp_conntracks[2][6][TCP_CONNTRACK_MAX] = {
* sCL -> sSS
*/
/* sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sS2 */
-/*synack*/ { sIV, sIV, sIG, sIG, sIG, sIG, sIG, sIG, sIG, sSR },
+/*synack*/ { sIV, sIV, sSR, sIV, sIV, sIV, sIV, sIV, sIV, sSR },
/*
* sNO -> sIV Too late and no reason to do anything
* sSS -> sIV Client can't send SYN and then SYN/ACK
* sS2 -> sSR SYN/ACK sent to SYN2 in simultaneous open
- * sSR -> sIG
- * sES -> sIG Error: SYNs in window outside the SYN_SENT state
- * are errors. Receiver will reply with RST
- * and close the connection.
- * Or we are not in sync and hold a dead connection.
- * sFW -> sIG
- * sCW -> sIG
- * sLA -> sIG
- * sTW -> sIG
- * sCL -> sIG
+ * sSR -> sSR Late retransmitted SYN/ACK in simultaneous open
+ * sES -> sIV Invalid SYN/ACK packets sent by the client
+ * sFW -> sIV
+ * sCW -> sIV
+ * sLA -> sIV
+ * sTW -> sIV
+ * sCL -> sIV
*/
/* sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sS2 */
/*fin*/ { sIV, sIV, sFW, sFW, sLA, sLA, sLA, sTW, sCL, sIV },
@@ -505,10 +502,10 @@ static inline s16 nat_offset(const struct nf_conn *ct,
return get_offset != NULL ? get_offset(ct, dir, seq) : 0;
}
-#define NAT_OFFSET(pf, ct, dir, seq) \
- (pf == NFPROTO_IPV4 ? nat_offset(ct, dir, seq) : 0)
+#define NAT_OFFSET(ct, dir, seq) \
+ (nat_offset(ct, dir, seq))
#else
-#define NAT_OFFSET(pf, ct, dir, seq) 0
+#define NAT_OFFSET(ct, dir, seq) 0
#endif
static bool tcp_in_window(const struct nf_conn *ct,
@@ -541,7 +538,7 @@ static bool tcp_in_window(const struct nf_conn *ct,
tcp_sack(skb, dataoff, tcph, &sack);
/* Take into account NAT sequence number mangling */
- receiver_offset = NAT_OFFSET(pf, ct, !dir, ack - 1);
+ receiver_offset = NAT_OFFSET(ct, !dir, ack - 1);
ack -= receiver_offset;
sack -= receiver_offset;
@@ -633,15 +630,9 @@ static bool tcp_in_window(const struct nf_conn *ct,
ack = sack = receiver->td_end;
}
- if (seq == end
- && (!tcph->rst
- || (seq == 0 && state->state == TCP_CONNTRACK_SYN_SENT)))
+ if (tcph->rst && seq == 0 && state->state == TCP_CONNTRACK_SYN_SENT)
/*
- * Packets contains no data: we assume it is valid
- * and check the ack value only.
- * However RST segments are always validated by their
- * SEQ number, except when seq == 0 (reset sent answering
- * SYN.
+ * RST sent answering SYN.
*/
seq = end = sender->td_end;
diff --git a/net/netfilter/nf_conntrack_sip.c b/net/netfilter/nf_conntrack_sip.c
index 758a1bacc126..df8f4f284481 100644
--- a/net/netfilter/nf_conntrack_sip.c
+++ b/net/netfilter/nf_conntrack_sip.c
@@ -52,15 +52,17 @@ module_param(sip_direct_media, int, 0600);
MODULE_PARM_DESC(sip_direct_media, "Expect Media streams between signalling "
"endpoints only (default 1)");
-unsigned int (*nf_nat_sip_hook)(struct sk_buff *skb, unsigned int dataoff,
- const char **dptr,
+unsigned int (*nf_nat_sip_hook)(struct sk_buff *skb, unsigned int protoff,
+ unsigned int dataoff, const char **dptr,
unsigned int *datalen) __read_mostly;
EXPORT_SYMBOL_GPL(nf_nat_sip_hook);
-void (*nf_nat_sip_seq_adjust_hook)(struct sk_buff *skb, s16 off) __read_mostly;
+void (*nf_nat_sip_seq_adjust_hook)(struct sk_buff *skb, unsigned int protoff,
+ s16 off) __read_mostly;
EXPORT_SYMBOL_GPL(nf_nat_sip_seq_adjust_hook);
unsigned int (*nf_nat_sip_expect_hook)(struct sk_buff *skb,
+ unsigned int protoff,
unsigned int dataoff,
const char **dptr,
unsigned int *datalen,
@@ -69,7 +71,8 @@ unsigned int (*nf_nat_sip_expect_hook)(struct sk_buff *skb,
unsigned int matchlen) __read_mostly;
EXPORT_SYMBOL_GPL(nf_nat_sip_expect_hook);
-unsigned int (*nf_nat_sdp_addr_hook)(struct sk_buff *skb, unsigned int dataoff,
+unsigned int (*nf_nat_sdp_addr_hook)(struct sk_buff *skb, unsigned int protoff,
+ unsigned int dataoff,
const char **dptr,
unsigned int *datalen,
unsigned int sdpoff,
@@ -79,7 +82,8 @@ unsigned int (*nf_nat_sdp_addr_hook)(struct sk_buff *skb, unsigned int dataoff,
__read_mostly;
EXPORT_SYMBOL_GPL(nf_nat_sdp_addr_hook);
-unsigned int (*nf_nat_sdp_port_hook)(struct sk_buff *skb, unsigned int dataoff,
+unsigned int (*nf_nat_sdp_port_hook)(struct sk_buff *skb, unsigned int protoff,
+ unsigned int dataoff,
const char **dptr,
unsigned int *datalen,
unsigned int matchoff,
@@ -88,6 +92,7 @@ unsigned int (*nf_nat_sdp_port_hook)(struct sk_buff *skb, unsigned int dataoff,
EXPORT_SYMBOL_GPL(nf_nat_sdp_port_hook);
unsigned int (*nf_nat_sdp_session_hook)(struct sk_buff *skb,
+ unsigned int protoff,
unsigned int dataoff,
const char **dptr,
unsigned int *datalen,
@@ -96,7 +101,8 @@ unsigned int (*nf_nat_sdp_session_hook)(struct sk_buff *skb,
__read_mostly;
EXPORT_SYMBOL_GPL(nf_nat_sdp_session_hook);
-unsigned int (*nf_nat_sdp_media_hook)(struct sk_buff *skb, unsigned int dataoff,
+unsigned int (*nf_nat_sdp_media_hook)(struct sk_buff *skb, unsigned int protoff,
+ unsigned int dataoff,
const char **dptr,
unsigned int *datalen,
struct nf_conntrack_expect *rtp_exp,
@@ -183,12 +189,12 @@ static int media_len(const struct nf_conn *ct, const char *dptr,
return len + digits_len(ct, dptr, limit, shift);
}
-static int parse_addr(const struct nf_conn *ct, const char *cp,
- const char **endp, union nf_inet_addr *addr,
- const char *limit)
+static int sip_parse_addr(const struct nf_conn *ct, const char *cp,
+ const char **endp, union nf_inet_addr *addr,
+ const char *limit, bool delim)
{
const char *end;
- int ret = 0;
+ int ret;
if (!ct)
return 0;
@@ -197,16 +203,28 @@ static int parse_addr(const struct nf_conn *ct, const char *cp,
switch (nf_ct_l3num(ct)) {
case AF_INET:
ret = in4_pton(cp, limit - cp, (u8 *)&addr->ip, -1, &end);
+ if (ret == 0)
+ return 0;
break;
case AF_INET6:
+ if (cp < limit && *cp == '[')
+ cp++;
+ else if (delim)
+ return 0;
+
ret = in6_pton(cp, limit - cp, (u8 *)&addr->ip6, -1, &end);
+ if (ret == 0)
+ return 0;
+
+ if (end < limit && *end == ']')
+ end++;
+ else if (delim)
+ return 0;
break;
default:
BUG();
}
- if (ret == 0 || end == cp)
- return 0;
if (endp)
*endp = end;
return 1;
@@ -219,7 +237,7 @@ static int epaddr_len(const struct nf_conn *ct, const char *dptr,
union nf_inet_addr addr;
const char *aux = dptr;
- if (!parse_addr(ct, dptr, &dptr, &addr, limit)) {
+ if (!sip_parse_addr(ct, dptr, &dptr, &addr, limit, true)) {
pr_debug("ip: %s parse failed.!\n", dptr);
return 0;
}
@@ -296,7 +314,7 @@ int ct_sip_parse_request(const struct nf_conn *ct,
return 0;
dptr += shift;
- if (!parse_addr(ct, dptr, &end, addr, limit))
+ if (!sip_parse_addr(ct, dptr, &end, addr, limit, true))
return -1;
if (end < limit && *end == ':') {
end++;
@@ -550,7 +568,7 @@ int ct_sip_parse_header_uri(const struct nf_conn *ct, const char *dptr,
if (ret == 0)
return ret;
- if (!parse_addr(ct, dptr + *matchoff, &c, addr, limit))
+ if (!sip_parse_addr(ct, dptr + *matchoff, &c, addr, limit, true))
return -1;
if (*c == ':') {
c++;
@@ -599,7 +617,7 @@ int ct_sip_parse_address_param(const struct nf_conn *ct, const char *dptr,
unsigned int dataoff, unsigned int datalen,
const char *name,
unsigned int *matchoff, unsigned int *matchlen,
- union nf_inet_addr *addr)
+ union nf_inet_addr *addr, bool delim)
{
const char *limit = dptr + datalen;
const char *start, *end;
@@ -613,7 +631,7 @@ int ct_sip_parse_address_param(const struct nf_conn *ct, const char *dptr,
return 0;
start += strlen(name);
- if (!parse_addr(ct, start, &end, addr, limit))
+ if (!sip_parse_addr(ct, start, &end, addr, limit, delim))
return 0;
*matchoff = start - dptr;
*matchlen = end - start;
@@ -675,6 +693,47 @@ static int ct_sip_parse_transport(struct nf_conn *ct, const char *dptr,
return 1;
}
+static int sdp_parse_addr(const struct nf_conn *ct, const char *cp,
+ const char **endp, union nf_inet_addr *addr,
+ const char *limit)
+{
+ const char *end;
+ int ret;
+
+ memset(addr, 0, sizeof(*addr));
+ switch (nf_ct_l3num(ct)) {
+ case AF_INET:
+ ret = in4_pton(cp, limit - cp, (u8 *)&addr->ip, -1, &end);
+ break;
+ case AF_INET6:
+ ret = in6_pton(cp, limit - cp, (u8 *)&addr->ip6, -1, &end);
+ break;
+ default:
+ BUG();
+ }
+
+ if (ret == 0)
+ return 0;
+ if (endp)
+ *endp = end;
+ return 1;
+}
+
+/* skip ip address. returns its length. */
+static int sdp_addr_len(const struct nf_conn *ct, const char *dptr,
+ const char *limit, int *shift)
+{
+ union nf_inet_addr addr;
+ const char *aux = dptr;
+
+ if (!sdp_parse_addr(ct, dptr, &dptr, &addr, limit)) {
+ pr_debug("ip: %s parse failed.!\n", dptr);
+ return 0;
+ }
+
+ return dptr - aux;
+}
+
/* SDP header parsing: a SDP session description contains an ordered set of
* headers, starting with a section containing general session parameters,
* optionally followed by multiple media descriptions.
@@ -684,13 +743,18 @@ static int ct_sip_parse_transport(struct nf_conn *ct, const char *dptr,
* be tolerant and also accept records terminated with a single newline
* character". We handle both cases.
*/
-static const struct sip_header ct_sdp_hdrs[] = {
- [SDP_HDR_VERSION] = SDP_HDR("v=", NULL, digits_len),
- [SDP_HDR_OWNER_IP4] = SDP_HDR("o=", "IN IP4 ", epaddr_len),
- [SDP_HDR_CONNECTION_IP4] = SDP_HDR("c=", "IN IP4 ", epaddr_len),
- [SDP_HDR_OWNER_IP6] = SDP_HDR("o=", "IN IP6 ", epaddr_len),
- [SDP_HDR_CONNECTION_IP6] = SDP_HDR("c=", "IN IP6 ", epaddr_len),
- [SDP_HDR_MEDIA] = SDP_HDR("m=", NULL, media_len),
+static const struct sip_header ct_sdp_hdrs_v4[] = {
+ [SDP_HDR_VERSION] = SDP_HDR("v=", NULL, digits_len),
+ [SDP_HDR_OWNER] = SDP_HDR("o=", "IN IP4 ", sdp_addr_len),
+ [SDP_HDR_CONNECTION] = SDP_HDR("c=", "IN IP4 ", sdp_addr_len),
+ [SDP_HDR_MEDIA] = SDP_HDR("m=", NULL, media_len),
+};
+
+static const struct sip_header ct_sdp_hdrs_v6[] = {
+ [SDP_HDR_VERSION] = SDP_HDR("v=", NULL, digits_len),
+ [SDP_HDR_OWNER] = SDP_HDR("o=", "IN IP6 ", sdp_addr_len),
+ [SDP_HDR_CONNECTION] = SDP_HDR("c=", "IN IP6 ", sdp_addr_len),
+ [SDP_HDR_MEDIA] = SDP_HDR("m=", NULL, media_len),
};
/* Linear string search within SDP header values */
@@ -716,11 +780,14 @@ int ct_sip_get_sdp_header(const struct nf_conn *ct, const char *dptr,
enum sdp_header_types term,
unsigned int *matchoff, unsigned int *matchlen)
{
- const struct sip_header *hdr = &ct_sdp_hdrs[type];
- const struct sip_header *thdr = &ct_sdp_hdrs[term];
+ const struct sip_header *hdrs, *hdr, *thdr;
const char *start = dptr, *limit = dptr + datalen;
int shift = 0;
+ hdrs = nf_ct_l3num(ct) == NFPROTO_IPV4 ? ct_sdp_hdrs_v4 : ct_sdp_hdrs_v6;
+ hdr = &hdrs[type];
+ thdr = &hdrs[term];
+
for (dptr += dataoff; dptr < limit; dptr++) {
/* Find beginning of line */
if (*dptr != '\r' && *dptr != '\n')
@@ -775,8 +842,8 @@ static int ct_sip_parse_sdp_addr(const struct nf_conn *ct, const char *dptr,
if (ret <= 0)
return ret;
- if (!parse_addr(ct, dptr + *matchoff, NULL, addr,
- dptr + *matchoff + *matchlen))
+ if (!sdp_parse_addr(ct, dptr + *matchoff, NULL, addr,
+ dptr + *matchoff + *matchlen))
return -1;
return 1;
}
@@ -830,7 +897,8 @@ static void flush_expectations(struct nf_conn *ct, bool media)
spin_unlock_bh(&nf_conntrack_lock);
}
-static int set_expected_rtp_rtcp(struct sk_buff *skb, unsigned int dataoff,
+static int set_expected_rtp_rtcp(struct sk_buff *skb, unsigned int protoff,
+ unsigned int dataoff,
const char **dptr, unsigned int *datalen,
union nf_inet_addr *daddr, __be16 port,
enum sip_expectation_classes class,
@@ -886,12 +954,12 @@ static int set_expected_rtp_rtcp(struct sk_buff *skb, unsigned int dataoff,
exp->class != class)
break;
#ifdef CONFIG_NF_NAT_NEEDED
- if (exp->tuple.src.l3num == AF_INET && !direct_rtp &&
- (exp->saved_ip != exp->tuple.dst.u3.ip ||
+ if (!direct_rtp &&
+ (!nf_inet_addr_cmp(&exp->saved_addr, &exp->tuple.dst.u3) ||
exp->saved_proto.udp.port != exp->tuple.dst.u.udp.port) &&
ct->status & IPS_NAT_MASK) {
- daddr->ip = exp->saved_ip;
- tuple.dst.u3.ip = exp->saved_ip;
+ *daddr = exp->saved_addr;
+ tuple.dst.u3 = exp->saved_addr;
tuple.dst.u.udp.port = exp->saved_proto.udp.port;
direct_rtp = 1;
} else
@@ -907,7 +975,7 @@ static int set_expected_rtp_rtcp(struct sk_buff *skb, unsigned int dataoff,
if (direct_rtp) {
nf_nat_sdp_port = rcu_dereference(nf_nat_sdp_port_hook);
if (nf_nat_sdp_port &&
- !nf_nat_sdp_port(skb, dataoff, dptr, datalen,
+ !nf_nat_sdp_port(skb, protoff, dataoff, dptr, datalen,
mediaoff, medialen, ntohs(rtp_port)))
goto err1;
}
@@ -929,7 +997,7 @@ static int set_expected_rtp_rtcp(struct sk_buff *skb, unsigned int dataoff,
nf_nat_sdp_media = rcu_dereference(nf_nat_sdp_media_hook);
if (nf_nat_sdp_media && ct->status & IPS_NAT_MASK && !direct_rtp)
- ret = nf_nat_sdp_media(skb, dataoff, dptr, datalen,
+ ret = nf_nat_sdp_media(skb, protoff, dataoff, dptr, datalen,
rtp_exp, rtcp_exp,
mediaoff, medialen, daddr);
else {
@@ -970,7 +1038,8 @@ static const struct sdp_media_type *sdp_media_type(const char *dptr,
return NULL;
}
-static int process_sdp(struct sk_buff *skb, unsigned int dataoff,
+static int process_sdp(struct sk_buff *skb, unsigned int protoff,
+ unsigned int dataoff,
const char **dptr, unsigned int *datalen,
unsigned int cseq)
{
@@ -983,15 +1052,12 @@ static int process_sdp(struct sk_buff *skb, unsigned int dataoff,
unsigned int i;
union nf_inet_addr caddr, maddr, rtp_addr;
unsigned int port;
- enum sdp_header_types c_hdr;
const struct sdp_media_type *t;
int ret = NF_ACCEPT;
typeof(nf_nat_sdp_addr_hook) nf_nat_sdp_addr;
typeof(nf_nat_sdp_session_hook) nf_nat_sdp_session;
nf_nat_sdp_addr = rcu_dereference(nf_nat_sdp_addr_hook);
- c_hdr = nf_ct_l3num(ct) == AF_INET ? SDP_HDR_CONNECTION_IP4 :
- SDP_HDR_CONNECTION_IP6;
/* Find beginning of session description */
if (ct_sip_get_sdp_header(ct, *dptr, 0, *datalen,
@@ -1005,7 +1071,7 @@ static int process_sdp(struct sk_buff *skb, unsigned int dataoff,
* the end of the session description. */
caddr_len = 0;
if (ct_sip_parse_sdp_addr(ct, *dptr, sdpoff, *datalen,
- c_hdr, SDP_HDR_MEDIA,
+ SDP_HDR_CONNECTION, SDP_HDR_MEDIA,
&matchoff, &matchlen, &caddr) > 0)
caddr_len = matchlen;
@@ -1035,7 +1101,7 @@ static int process_sdp(struct sk_buff *skb, unsigned int dataoff,
/* The media description overrides the session description. */
maddr_len = 0;
if (ct_sip_parse_sdp_addr(ct, *dptr, mediaoff, *datalen,
- c_hdr, SDP_HDR_MEDIA,
+ SDP_HDR_CONNECTION, SDP_HDR_MEDIA,
&matchoff, &matchlen, &maddr) > 0) {
maddr_len = matchlen;
memcpy(&rtp_addr, &maddr, sizeof(rtp_addr));
@@ -1044,7 +1110,8 @@ static int process_sdp(struct sk_buff *skb, unsigned int dataoff,
else
return NF_DROP;
- ret = set_expected_rtp_rtcp(skb, dataoff, dptr, datalen,
+ ret = set_expected_rtp_rtcp(skb, protoff, dataoff,
+ dptr, datalen,
&rtp_addr, htons(port), t->class,
mediaoff, medialen);
if (ret != NF_ACCEPT)
@@ -1052,8 +1119,9 @@ static int process_sdp(struct sk_buff *skb, unsigned int dataoff,
/* Update media connection address if present */
if (maddr_len && nf_nat_sdp_addr && ct->status & IPS_NAT_MASK) {
- ret = nf_nat_sdp_addr(skb, dataoff, dptr, datalen,
- mediaoff, c_hdr, SDP_HDR_MEDIA,
+ ret = nf_nat_sdp_addr(skb, protoff, dataoff,
+ dptr, datalen, mediaoff,
+ SDP_HDR_CONNECTION, SDP_HDR_MEDIA,
&rtp_addr);
if (ret != NF_ACCEPT)
return ret;
@@ -1064,12 +1132,13 @@ static int process_sdp(struct sk_buff *skb, unsigned int dataoff,
/* Update session connection and owner addresses */
nf_nat_sdp_session = rcu_dereference(nf_nat_sdp_session_hook);
if (nf_nat_sdp_session && ct->status & IPS_NAT_MASK)
- ret = nf_nat_sdp_session(skb, dataoff, dptr, datalen, sdpoff,
- &rtp_addr);
+ ret = nf_nat_sdp_session(skb, protoff, dataoff,
+ dptr, datalen, sdpoff, &rtp_addr);
return ret;
}
-static int process_invite_response(struct sk_buff *skb, unsigned int dataoff,
+static int process_invite_response(struct sk_buff *skb, unsigned int protoff,
+ unsigned int dataoff,
const char **dptr, unsigned int *datalen,
unsigned int cseq, unsigned int code)
{
@@ -1079,13 +1148,14 @@ static int process_invite_response(struct sk_buff *skb, unsigned int dataoff,
if ((code >= 100 && code <= 199) ||
(code >= 200 && code <= 299))
- return process_sdp(skb, dataoff, dptr, datalen, cseq);
+ return process_sdp(skb, protoff, dataoff, dptr, datalen, cseq);
else if (ct_sip_info->invite_cseq == cseq)
flush_expectations(ct, true);
return NF_ACCEPT;
}
-static int process_update_response(struct sk_buff *skb, unsigned int dataoff,
+static int process_update_response(struct sk_buff *skb, unsigned int protoff,
+ unsigned int dataoff,
const char **dptr, unsigned int *datalen,
unsigned int cseq, unsigned int code)
{
@@ -1095,13 +1165,14 @@ static int process_update_response(struct sk_buff *skb, unsigned int dataoff,
if ((code >= 100 && code <= 199) ||
(code >= 200 && code <= 299))
- return process_sdp(skb, dataoff, dptr, datalen, cseq);
+ return process_sdp(skb, protoff, dataoff, dptr, datalen, cseq);
else if (ct_sip_info->invite_cseq == cseq)
flush_expectations(ct, true);
return NF_ACCEPT;
}
-static int process_prack_response(struct sk_buff *skb, unsigned int dataoff,
+static int process_prack_response(struct sk_buff *skb, unsigned int protoff,
+ unsigned int dataoff,
const char **dptr, unsigned int *datalen,
unsigned int cseq, unsigned int code)
{
@@ -1111,13 +1182,14 @@ static int process_prack_response(struct sk_buff *skb, unsigned int dataoff,
if ((code >= 100 && code <= 199) ||
(code >= 200 && code <= 299))
- return process_sdp(skb, dataoff, dptr, datalen, cseq);
+ return process_sdp(skb, protoff, dataoff, dptr, datalen, cseq);
else if (ct_sip_info->invite_cseq == cseq)
flush_expectations(ct, true);
return NF_ACCEPT;
}
-static int process_invite_request(struct sk_buff *skb, unsigned int dataoff,
+static int process_invite_request(struct sk_buff *skb, unsigned int protoff,
+ unsigned int dataoff,
const char **dptr, unsigned int *datalen,
unsigned int cseq)
{
@@ -1127,13 +1199,14 @@ static int process_invite_request(struct sk_buff *skb, unsigned int dataoff,
unsigned int ret;
flush_expectations(ct, true);
- ret = process_sdp(skb, dataoff, dptr, datalen, cseq);
+ ret = process_sdp(skb, protoff, dataoff, dptr, datalen, cseq);
if (ret == NF_ACCEPT)
ct_sip_info->invite_cseq = cseq;
return ret;
}
-static int process_bye_request(struct sk_buff *skb, unsigned int dataoff,
+static int process_bye_request(struct sk_buff *skb, unsigned int protoff,
+ unsigned int dataoff,
const char **dptr, unsigned int *datalen,
unsigned int cseq)
{
@@ -1148,7 +1221,8 @@ static int process_bye_request(struct sk_buff *skb, unsigned int dataoff,
* signalling connections. The expectation is marked inactive and is activated
* when receiving a response indicating success from the registrar.
*/
-static int process_register_request(struct sk_buff *skb, unsigned int dataoff,
+static int process_register_request(struct sk_buff *skb, unsigned int protoff,
+ unsigned int dataoff,
const char **dptr, unsigned int *datalen,
unsigned int cseq)
{
@@ -1223,8 +1297,8 @@ static int process_register_request(struct sk_buff *skb, unsigned int dataoff,
nf_nat_sip_expect = rcu_dereference(nf_nat_sip_expect_hook);
if (nf_nat_sip_expect && ct->status & IPS_NAT_MASK)
- ret = nf_nat_sip_expect(skb, dataoff, dptr, datalen, exp,
- matchoff, matchlen);
+ ret = nf_nat_sip_expect(skb, protoff, dataoff, dptr, datalen,
+ exp, matchoff, matchlen);
else {
if (nf_ct_expect_related(exp) != 0)
ret = NF_DROP;
@@ -1239,7 +1313,8 @@ store_cseq:
return ret;
}
-static int process_register_response(struct sk_buff *skb, unsigned int dataoff,
+static int process_register_response(struct sk_buff *skb, unsigned int protoff,
+ unsigned int dataoff,
const char **dptr, unsigned int *datalen,
unsigned int cseq, unsigned int code)
{
@@ -1321,7 +1396,8 @@ static const struct sip_handler sip_handlers[] = {
SIP_HANDLER("REGISTER", process_register_request, process_register_response),
};
-static int process_sip_response(struct sk_buff *skb, unsigned int dataoff,
+static int process_sip_response(struct sk_buff *skb, unsigned int protoff,
+ unsigned int dataoff,
const char **dptr, unsigned int *datalen)
{
enum ip_conntrack_info ctinfo;
@@ -1352,13 +1428,14 @@ static int process_sip_response(struct sk_buff *skb, unsigned int dataoff,
if (*datalen < matchend + handler->len ||
strnicmp(*dptr + matchend, handler->method, handler->len))
continue;
- return handler->response(skb, dataoff, dptr, datalen,
+ return handler->response(skb, protoff, dataoff, dptr, datalen,
cseq, code);
}
return NF_ACCEPT;
}
-static int process_sip_request(struct sk_buff *skb, unsigned int dataoff,
+static int process_sip_request(struct sk_buff *skb, unsigned int protoff,
+ unsigned int dataoff,
const char **dptr, unsigned int *datalen)
{
enum ip_conntrack_info ctinfo;
@@ -1383,26 +1460,28 @@ static int process_sip_request(struct sk_buff *skb, unsigned int dataoff,
if (!cseq)
return NF_DROP;
- return handler->request(skb, dataoff, dptr, datalen, cseq);
+ return handler->request(skb, protoff, dataoff, dptr, datalen,
+ cseq);
}
return NF_ACCEPT;
}
static int process_sip_msg(struct sk_buff *skb, struct nf_conn *ct,
- unsigned int dataoff, const char **dptr,
- unsigned int *datalen)
+ unsigned int protoff, unsigned int dataoff,
+ const char **dptr, unsigned int *datalen)
{
typeof(nf_nat_sip_hook) nf_nat_sip;
int ret;
if (strnicmp(*dptr, "SIP/2.0 ", strlen("SIP/2.0 ")) != 0)
- ret = process_sip_request(skb, dataoff, dptr, datalen);
+ ret = process_sip_request(skb, protoff, dataoff, dptr, datalen);
else
- ret = process_sip_response(skb, dataoff, dptr, datalen);
+ ret = process_sip_response(skb, protoff, dataoff, dptr, datalen);
if (ret == NF_ACCEPT && ct->status & IPS_NAT_MASK) {
nf_nat_sip = rcu_dereference(nf_nat_sip_hook);
- if (nf_nat_sip && !nf_nat_sip(skb, dataoff, dptr, datalen))
+ if (nf_nat_sip && !nf_nat_sip(skb, protoff, dataoff,
+ dptr, datalen))
ret = NF_DROP;
}
@@ -1470,7 +1549,8 @@ static int sip_help_tcp(struct sk_buff *skb, unsigned int protoff,
if (msglen > datalen)
return NF_DROP;
- ret = process_sip_msg(skb, ct, dataoff, &dptr, &msglen);
+ ret = process_sip_msg(skb, ct, protoff, dataoff,
+ &dptr, &msglen);
if (ret != NF_ACCEPT)
break;
diff = msglen - origlen;
@@ -1484,7 +1564,7 @@ static int sip_help_tcp(struct sk_buff *skb, unsigned int protoff,
if (ret == NF_ACCEPT && ct->status & IPS_NAT_MASK) {
nf_nat_sip_seq_adjust = rcu_dereference(nf_nat_sip_seq_adjust_hook);
if (nf_nat_sip_seq_adjust)
- nf_nat_sip_seq_adjust(skb, tdiff);
+ nf_nat_sip_seq_adjust(skb, protoff, tdiff);
}
return ret;
@@ -1511,11 +1591,10 @@ static int sip_help_udp(struct sk_buff *skb, unsigned int protoff,
if (datalen < strlen("SIP/2.0 200"))
return NF_ACCEPT;
- return process_sip_msg(skb, ct, dataoff, &dptr, &datalen);
+ return process_sip_msg(skb, ct, protoff, dataoff, &dptr, &datalen);
}
static struct nf_conntrack_helper sip[MAX_PORTS][4] __read_mostly;
-static char sip_names[MAX_PORTS][4][sizeof("sip-65535")] __read_mostly;
static const struct nf_conntrack_expect_policy sip_exp_policy[SIP_EXPECT_MAX + 1] = {
[SIP_EXPECT_SIGNALLING] = {
@@ -1585,9 +1664,9 @@ static int __init nf_conntrack_sip_init(void)
sip[i][j].me = THIS_MODULE;
if (ports[i] == SIP_PORT)
- sprintf(sip_names[i][j], "sip");
+ sprintf(sip[i][j].name, "sip");
else
- sprintf(sip_names[i][j], "sip-%u", i);
+ sprintf(sip[i][j].name, "sip-%u", i);
pr_debug("port #%u: %u\n", i, ports[i]);
diff --git a/net/netfilter/nf_internals.h b/net/netfilter/nf_internals.h
index 770f76432ad0..3deec997be89 100644
--- a/net/netfilter/nf_internals.h
+++ b/net/netfilter/nf_internals.h
@@ -18,13 +18,13 @@ extern unsigned int nf_iterate(struct list_head *head,
unsigned int hook,
const struct net_device *indev,
const struct net_device *outdev,
- struct list_head **i,
+ struct nf_hook_ops **elemp,
int (*okfn)(struct sk_buff *),
int hook_thresh);
/* nf_queue.c */
extern int nf_queue(struct sk_buff *skb,
- struct list_head *elem,
+ struct nf_hook_ops *elem,
u_int8_t pf, unsigned int hook,
struct net_device *indev,
struct net_device *outdev,
diff --git a/net/ipv4/netfilter/nf_nat_amanda.c b/net/netfilter/nf_nat_amanda.c
index 3c04d24e2976..42d337881171 100644
--- a/net/ipv4/netfilter/nf_nat_amanda.c
+++ b/net/netfilter/nf_nat_amanda.c
@@ -16,7 +16,6 @@
#include <net/netfilter/nf_conntrack_helper.h>
#include <net/netfilter/nf_conntrack_expect.h>
#include <net/netfilter/nf_nat_helper.h>
-#include <net/netfilter/nf_nat_rule.h>
#include <linux/netfilter/nf_conntrack_amanda.h>
MODULE_AUTHOR("Brian J. Murrell <netfilter@interlinx.bc.ca>");
@@ -26,6 +25,7 @@ MODULE_ALIAS("ip_nat_amanda");
static unsigned int help(struct sk_buff *skb,
enum ip_conntrack_info ctinfo,
+ unsigned int protoff,
unsigned int matchoff,
unsigned int matchlen,
struct nf_conntrack_expect *exp)
@@ -61,7 +61,7 @@ static unsigned int help(struct sk_buff *skb,
sprintf(buffer, "%u", port);
ret = nf_nat_mangle_udp_packet(skb, exp->master, ctinfo,
- matchoff, matchlen,
+ protoff, matchoff, matchlen,
buffer, strlen(buffer));
if (ret != NF_ACCEPT)
nf_ct_unexpect_related(exp);
diff --git a/net/ipv4/netfilter/nf_nat_core.c b/net/netfilter/nf_nat_core.c
index 44b082fd48ab..5f2f9109f461 100644
--- a/net/ipv4/netfilter/nf_nat_core.c
+++ b/net/netfilter/nf_nat_core.c
@@ -1,7 +1,7 @@
-/* NAT for netfilter; shared with compatibility layer. */
-
-/* (C) 1999-2001 Paul `Rusty' Russell
+/*
+ * (C) 1999-2001 Paul `Rusty' Russell
* (C) 2002-2006 Netfilter Core Team <coreteam@netfilter.org>
+ * (C) 2011 Patrick McHardy <kaber@trash.net>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
@@ -13,38 +13,105 @@
#include <linux/timer.h>
#include <linux/skbuff.h>
#include <linux/gfp.h>
-#include <net/checksum.h>
-#include <net/icmp.h>
-#include <net/ip.h>
-#include <net/tcp.h> /* For tcp_prot in getorigdst */
-#include <linux/icmp.h>
-#include <linux/udp.h>
+#include <net/xfrm.h>
#include <linux/jhash.h>
+#include <linux/rtnetlink.h>
-#include <linux/netfilter_ipv4.h>
#include <net/netfilter/nf_conntrack.h>
#include <net/netfilter/nf_conntrack_core.h>
#include <net/netfilter/nf_nat.h>
-#include <net/netfilter/nf_nat_protocol.h>
+#include <net/netfilter/nf_nat_l3proto.h>
+#include <net/netfilter/nf_nat_l4proto.h>
#include <net/netfilter/nf_nat_core.h>
#include <net/netfilter/nf_nat_helper.h>
#include <net/netfilter/nf_conntrack_helper.h>
#include <net/netfilter/nf_conntrack_l3proto.h>
#include <net/netfilter/nf_conntrack_zones.h>
+#include <linux/netfilter/nf_nat.h>
static DEFINE_SPINLOCK(nf_nat_lock);
-static struct nf_conntrack_l3proto *l3proto __read_mostly;
-
-#define MAX_IP_NAT_PROTO 256
-static const struct nf_nat_protocol __rcu *nf_nat_protos[MAX_IP_NAT_PROTO]
+static DEFINE_MUTEX(nf_nat_proto_mutex);
+static const struct nf_nat_l3proto __rcu *nf_nat_l3protos[NFPROTO_NUMPROTO]
+ __read_mostly;
+static const struct nf_nat_l4proto __rcu **nf_nat_l4protos[NFPROTO_NUMPROTO]
__read_mostly;
-static inline const struct nf_nat_protocol *
-__nf_nat_proto_find(u_int8_t protonum)
+
+inline const struct nf_nat_l3proto *
+__nf_nat_l3proto_find(u8 family)
+{
+ return rcu_dereference(nf_nat_l3protos[family]);
+}
+
+inline const struct nf_nat_l4proto *
+__nf_nat_l4proto_find(u8 family, u8 protonum)
+{
+ return rcu_dereference(nf_nat_l4protos[family][protonum]);
+}
+EXPORT_SYMBOL_GPL(__nf_nat_l4proto_find);
+
+#ifdef CONFIG_XFRM
+static void __nf_nat_decode_session(struct sk_buff *skb, struct flowi *fl)
+{
+ const struct nf_nat_l3proto *l3proto;
+ const struct nf_conn *ct;
+ enum ip_conntrack_info ctinfo;
+ enum ip_conntrack_dir dir;
+ unsigned long statusbit;
+ u8 family;
+
+ ct = nf_ct_get(skb, &ctinfo);
+ if (ct == NULL)
+ return;
+
+ family = ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.l3num;
+ rcu_read_lock();
+ l3proto = __nf_nat_l3proto_find(family);
+ if (l3proto == NULL)
+ goto out;
+
+ dir = CTINFO2DIR(ctinfo);
+ if (dir == IP_CT_DIR_ORIGINAL)
+ statusbit = IPS_DST_NAT;
+ else
+ statusbit = IPS_SRC_NAT;
+
+ l3proto->decode_session(skb, ct, dir, statusbit, fl);
+out:
+ rcu_read_unlock();
+}
+
+int nf_xfrm_me_harder(struct sk_buff *skb, unsigned int family)
{
- return rcu_dereference(nf_nat_protos[protonum]);
+ struct flowi fl;
+ unsigned int hh_len;
+ struct dst_entry *dst;
+
+ if (xfrm_decode_session(skb, &fl, family) < 0)
+ return -1;
+
+ dst = skb_dst(skb);
+ if (dst->xfrm)
+ dst = ((struct xfrm_dst *)dst)->route;
+ dst_hold(dst);
+
+ dst = xfrm_lookup(dev_net(dst->dev), dst, &fl, skb->sk, 0);
+ if (IS_ERR(dst))
+ return -1;
+
+ skb_dst_drop(skb);
+ skb_dst_set(skb, dst);
+
+ /* Change in oif may mean change in hh_len. */
+ hh_len = skb_dst(skb)->dev->hard_header_len;
+ if (skb_headroom(skb) < hh_len &&
+ pskb_expand_head(skb, hh_len - skb_headroom(skb), 0, GFP_ATOMIC))
+ return -1;
+ return 0;
}
+EXPORT_SYMBOL(nf_xfrm_me_harder);
+#endif /* CONFIG_XFRM */
/* We keep an extra hash for each conntrack, for fast searching. */
static inline unsigned int
@@ -54,10 +121,9 @@ hash_by_src(const struct net *net, u16 zone,
unsigned int hash;
/* Original src, to ensure we map it consistently if poss. */
- hash = jhash_3words((__force u32)tuple->src.u3.ip,
- (__force u32)tuple->src.u.all ^ zone,
- tuple->dst.protonum, nf_conntrack_hash_rnd);
- return ((u64)hash * net->ipv4.nat_htable_size) >> 32;
+ hash = jhash2((u32 *)&tuple->src, sizeof(tuple->src) / sizeof(u32),
+ tuple->dst.protonum ^ zone ^ nf_conntrack_hash_rnd);
+ return ((u64)hash * net->ct.nat_htable_size) >> 32;
}
/* Is this tuple already taken? (not by us) */
@@ -66,10 +132,11 @@ nf_nat_used_tuple(const struct nf_conntrack_tuple *tuple,
const struct nf_conn *ignored_conntrack)
{
/* Conntrack tracking doesn't keep track of outgoing tuples; only
- incoming ones. NAT means they don't have a fixed mapping,
- so we invert the tuple and look for the incoming reply.
-
- We could keep a separate hash if this proves too slow. */
+ * incoming ones. NAT means they don't have a fixed mapping,
+ * so we invert the tuple and look for the incoming reply.
+ *
+ * We could keep a separate hash if this proves too slow.
+ */
struct nf_conntrack_tuple reply;
nf_ct_invert_tuplepr(&reply, tuple);
@@ -78,31 +145,26 @@ nf_nat_used_tuple(const struct nf_conntrack_tuple *tuple,
EXPORT_SYMBOL(nf_nat_used_tuple);
/* If we source map this tuple so reply looks like reply_tuple, will
- * that meet the constraints of range. */
-static int
-in_range(const struct nf_conntrack_tuple *tuple,
- const struct nf_nat_ipv4_range *range)
+ * that meet the constraints of range.
+ */
+static int in_range(const struct nf_nat_l3proto *l3proto,
+ const struct nf_nat_l4proto *l4proto,
+ const struct nf_conntrack_tuple *tuple,
+ const struct nf_nat_range *range)
{
- const struct nf_nat_protocol *proto;
- int ret = 0;
-
/* If we are supposed to map IPs, then we must be in the
- range specified, otherwise let this drag us onto a new src IP. */
- if (range->flags & NF_NAT_RANGE_MAP_IPS) {
- if (ntohl(tuple->src.u3.ip) < ntohl(range->min_ip) ||
- ntohl(tuple->src.u3.ip) > ntohl(range->max_ip))
- return 0;
- }
+ * range specified, otherwise let this drag us onto a new src IP.
+ */
+ if (range->flags & NF_NAT_RANGE_MAP_IPS &&
+ !l3proto->in_range(tuple, range))
+ return 0;
- rcu_read_lock();
- proto = __nf_nat_proto_find(tuple->dst.protonum);
if (!(range->flags & NF_NAT_RANGE_PROTO_SPECIFIED) ||
- proto->in_range(tuple, NF_NAT_MANIP_SRC,
- &range->min, &range->max))
- ret = 1;
- rcu_read_unlock();
+ l4proto->in_range(tuple, NF_NAT_MANIP_SRC,
+ &range->min_proto, &range->max_proto))
+ return 1;
- return ret;
+ return 0;
}
static inline int
@@ -113,24 +175,25 @@ same_src(const struct nf_conn *ct,
t = &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple;
return (t->dst.protonum == tuple->dst.protonum &&
- t->src.u3.ip == tuple->src.u3.ip &&
+ nf_inet_addr_cmp(&t->src.u3, &tuple->src.u3) &&
t->src.u.all == tuple->src.u.all);
}
/* Only called for SRC manip */
static int
find_appropriate_src(struct net *net, u16 zone,
+ const struct nf_nat_l3proto *l3proto,
+ const struct nf_nat_l4proto *l4proto,
const struct nf_conntrack_tuple *tuple,
struct nf_conntrack_tuple *result,
- const struct nf_nat_ipv4_range *range)
+ const struct nf_nat_range *range)
{
unsigned int h = hash_by_src(net, zone, tuple);
const struct nf_conn_nat *nat;
const struct nf_conn *ct;
const struct hlist_node *n;
- rcu_read_lock();
- hlist_for_each_entry_rcu(nat, n, &net->ipv4.nat_bysource[h], bysource) {
+ hlist_for_each_entry_rcu(nat, n, &net->ct.nat_bysource[h], bysource) {
ct = nat->ct;
if (same_src(ct, tuple) && nf_ct_zone(ct) == zone) {
/* Copy source part from reply tuple. */
@@ -138,119 +201,148 @@ find_appropriate_src(struct net *net, u16 zone,
&ct->tuplehash[IP_CT_DIR_REPLY].tuple);
result->dst = tuple->dst;
- if (in_range(result, range)) {
- rcu_read_unlock();
+ if (in_range(l3proto, l4proto, result, range))
return 1;
- }
}
}
- rcu_read_unlock();
return 0;
}
/* For [FUTURE] fragmentation handling, we want the least-used
- src-ip/dst-ip/proto triple. Fairness doesn't come into it. Thus
- if the range specifies 1.2.3.4 ports 10000-10005 and 1.2.3.5 ports
- 1-65535, we don't do pro-rata allocation based on ports; we choose
- the ip with the lowest src-ip/dst-ip/proto usage.
-*/
+ * src-ip/dst-ip/proto triple. Fairness doesn't come into it. Thus
+ * if the range specifies 1.2.3.4 ports 10000-10005 and 1.2.3.5 ports
+ * 1-65535, we don't do pro-rata allocation based on ports; we choose
+ * the ip with the lowest src-ip/dst-ip/proto usage.
+ */
static void
find_best_ips_proto(u16 zone, struct nf_conntrack_tuple *tuple,
- const struct nf_nat_ipv4_range *range,
+ const struct nf_nat_range *range,
const struct nf_conn *ct,
enum nf_nat_manip_type maniptype)
{
- __be32 *var_ipp;
+ union nf_inet_addr *var_ipp;
+ unsigned int i, max;
/* Host order */
- u_int32_t minip, maxip, j;
+ u32 minip, maxip, j, dist;
+ bool full_range;
/* No IP mapping? Do nothing. */
if (!(range->flags & NF_NAT_RANGE_MAP_IPS))
return;
if (maniptype == NF_NAT_MANIP_SRC)
- var_ipp = &tuple->src.u3.ip;
+ var_ipp = &tuple->src.u3;
else
- var_ipp = &tuple->dst.u3.ip;
+ var_ipp = &tuple->dst.u3;
/* Fast path: only one choice. */
- if (range->min_ip == range->max_ip) {
- *var_ipp = range->min_ip;
+ if (nf_inet_addr_cmp(&range->min_addr, &range->max_addr)) {
+ *var_ipp = range->min_addr;
return;
}
+ if (nf_ct_l3num(ct) == NFPROTO_IPV4)
+ max = sizeof(var_ipp->ip) / sizeof(u32) - 1;
+ else
+ max = sizeof(var_ipp->ip6) / sizeof(u32) - 1;
+
/* Hashing source and destination IPs gives a fairly even
* spread in practice (if there are a small number of IPs
* involved, there usually aren't that many connections
* anyway). The consistency means that servers see the same
* client coming from the same IP (some Internet Banking sites
- * like this), even across reboots. */
- minip = ntohl(range->min_ip);
- maxip = ntohl(range->max_ip);
- j = jhash_2words((__force u32)tuple->src.u3.ip,
- range->flags & NF_NAT_RANGE_PERSISTENT ?
- 0 : (__force u32)tuple->dst.u3.ip ^ zone, 0);
- j = ((u64)j * (maxip - minip + 1)) >> 32;
- *var_ipp = htonl(minip + j);
+ * like this), even across reboots.
+ */
+ j = jhash2((u32 *)&tuple->src.u3, sizeof(tuple->src.u3) / sizeof(u32),
+ range->flags & NF_NAT_RANGE_PERSISTENT ?
+ 0 : (__force u32)tuple->dst.u3.all[max] ^ zone);
+
+ full_range = false;
+ for (i = 0; i <= max; i++) {
+ /* If first bytes of the address are at the maximum, use the
+ * distance. Otherwise use the full range.
+ */
+ if (!full_range) {
+ minip = ntohl((__force __be32)range->min_addr.all[i]);
+ maxip = ntohl((__force __be32)range->max_addr.all[i]);
+ dist = maxip - minip + 1;
+ } else {
+ minip = 0;
+ dist = ~0;
+ }
+
+ var_ipp->all[i] = (__force __u32)
+ htonl(minip + (((u64)j * dist) >> 32));
+ if (var_ipp->all[i] != range->max_addr.all[i])
+ full_range = true;
+
+ if (!(range->flags & NF_NAT_RANGE_PERSISTENT))
+ j ^= (__force u32)tuple->dst.u3.all[i];
+ }
}
-/* Manipulate the tuple into the range given. For NF_INET_POST_ROUTING,
- * we change the source to map into the range. For NF_INET_PRE_ROUTING
+/* Manipulate the tuple into the range given. For NF_INET_POST_ROUTING,
+ * we change the source to map into the range. For NF_INET_PRE_ROUTING
* and NF_INET_LOCAL_OUT, we change the destination to map into the
- * range. It might not be possible to get a unique tuple, but we try.
+ * range. It might not be possible to get a unique tuple, but we try.
* At worst (or if we race), we will end up with a final duplicate in
* __ip_conntrack_confirm and drop the packet. */
static void
get_unique_tuple(struct nf_conntrack_tuple *tuple,
const struct nf_conntrack_tuple *orig_tuple,
- const struct nf_nat_ipv4_range *range,
+ const struct nf_nat_range *range,
struct nf_conn *ct,
enum nf_nat_manip_type maniptype)
{
+ const struct nf_nat_l3proto *l3proto;
+ const struct nf_nat_l4proto *l4proto;
struct net *net = nf_ct_net(ct);
- const struct nf_nat_protocol *proto;
u16 zone = nf_ct_zone(ct);
- /* 1) If this srcip/proto/src-proto-part is currently mapped,
- and that same mapping gives a unique tuple within the given
- range, use that.
+ rcu_read_lock();
+ l3proto = __nf_nat_l3proto_find(orig_tuple->src.l3num);
+ l4proto = __nf_nat_l4proto_find(orig_tuple->src.l3num,
+ orig_tuple->dst.protonum);
- This is only required for source (ie. NAT/masq) mappings.
- So far, we don't do local source mappings, so multiple
- manips not an issue. */
+ /* 1) If this srcip/proto/src-proto-part is currently mapped,
+ * and that same mapping gives a unique tuple within the given
+ * range, use that.
+ *
+ * This is only required for source (ie. NAT/masq) mappings.
+ * So far, we don't do local source mappings, so multiple
+ * manips not an issue.
+ */
if (maniptype == NF_NAT_MANIP_SRC &&
!(range->flags & NF_NAT_RANGE_PROTO_RANDOM)) {
/* try the original tuple first */
- if (in_range(orig_tuple, range)) {
+ if (in_range(l3proto, l4proto, orig_tuple, range)) {
if (!nf_nat_used_tuple(orig_tuple, ct)) {
*tuple = *orig_tuple;
- return;
+ goto out;
}
- } else if (find_appropriate_src(net, zone, orig_tuple, tuple,
- range)) {
+ } else if (find_appropriate_src(net, zone, l3proto, l4proto,
+ orig_tuple, tuple, range)) {
pr_debug("get_unique_tuple: Found current src map\n");
if (!nf_nat_used_tuple(tuple, ct))
- return;
+ goto out;
}
}
- /* 2) Select the least-used IP/proto combination in the given
- range. */
+ /* 2) Select the least-used IP/proto combination in the given range */
*tuple = *orig_tuple;
find_best_ips_proto(zone, tuple, range, ct, maniptype);
/* 3) The per-protocol part of the manip is made to map into
- the range to make a unique tuple. */
-
- rcu_read_lock();
- proto = __nf_nat_proto_find(orig_tuple->dst.protonum);
+ * the range to make a unique tuple.
+ */
/* Only bother mapping if it's not already in range and unique */
if (!(range->flags & NF_NAT_RANGE_PROTO_RANDOM)) {
if (range->flags & NF_NAT_RANGE_PROTO_SPECIFIED) {
- if (proto->in_range(tuple, maniptype, &range->min,
- &range->max) &&
- (range->min.all == range->max.all ||
+ if (l4proto->in_range(tuple, maniptype,
+ &range->min_proto,
+ &range->max_proto) &&
+ (range->min_proto.all == range->max_proto.all ||
!nf_nat_used_tuple(tuple, ct)))
goto out;
} else if (!nf_nat_used_tuple(tuple, ct)) {
@@ -259,14 +351,14 @@ get_unique_tuple(struct nf_conntrack_tuple *tuple,
}
/* Last change: get protocol to try to obtain unique tuple. */
- proto->unique_tuple(tuple, range, maniptype, ct);
+ l4proto->unique_tuple(l3proto, tuple, range, maniptype, ct);
out:
rcu_read_unlock();
}
unsigned int
nf_nat_setup_info(struct nf_conn *ct,
- const struct nf_nat_ipv4_range *range,
+ const struct nf_nat_range *range,
enum nf_nat_manip_type maniptype)
{
struct net *net = nf_ct_net(ct);
@@ -288,10 +380,10 @@ nf_nat_setup_info(struct nf_conn *ct,
BUG_ON(nf_nat_initialized(ct, maniptype));
/* What we've got will look like inverse of reply. Normally
- this is what is in the conntrack, except for prior
- manipulations (future optimization: if num_manips == 0,
- orig_tp =
- conntrack->tuplehash[IP_CT_DIR_ORIGINAL].tuple) */
+ * this is what is in the conntrack, except for prior
+ * manipulations (future optimization: if num_manips == 0,
+ * orig_tp = ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple)
+ */
nf_ct_invert_tuplepr(&curr_tuple,
&ct->tuplehash[IP_CT_DIR_REPLY].tuple);
@@ -317,11 +409,11 @@ nf_nat_setup_info(struct nf_conn *ct,
srchash = hash_by_src(net, nf_ct_zone(ct),
&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple);
spin_lock_bh(&nf_nat_lock);
- /* nf_conntrack_alter_reply might re-allocate extension area */
+ /* nf_conntrack_alter_reply might re-allocate extension aera */
nat = nfct_nat(ct);
nat->ct = ct;
hlist_add_head_rcu(&nat->bysource,
- &net->ipv4.nat_bysource[srchash]);
+ &net->ct.nat_bysource[srchash]);
spin_unlock_bh(&nf_nat_lock);
}
@@ -335,47 +427,14 @@ nf_nat_setup_info(struct nf_conn *ct,
}
EXPORT_SYMBOL(nf_nat_setup_info);
-/* Returns true if succeeded. */
-static bool
-manip_pkt(u_int16_t proto,
- struct sk_buff *skb,
- unsigned int iphdroff,
- const struct nf_conntrack_tuple *target,
- enum nf_nat_manip_type maniptype)
-{
- struct iphdr *iph;
- const struct nf_nat_protocol *p;
-
- if (!skb_make_writable(skb, iphdroff + sizeof(*iph)))
- return false;
-
- iph = (void *)skb->data + iphdroff;
-
- /* Manipulate protcol part. */
-
- /* rcu_read_lock()ed by nf_hook_slow */
- p = __nf_nat_proto_find(proto);
- if (!p->manip_pkt(skb, iphdroff, target, maniptype))
- return false;
-
- iph = (void *)skb->data + iphdroff;
-
- if (maniptype == NF_NAT_MANIP_SRC) {
- csum_replace4(&iph->check, iph->saddr, target->src.u3.ip);
- iph->saddr = target->src.u3.ip;
- } else {
- csum_replace4(&iph->check, iph->daddr, target->dst.u3.ip);
- iph->daddr = target->dst.u3.ip;
- }
- return true;
-}
-
/* Do packet manipulations according to nf_nat_setup_info. */
unsigned int nf_nat_packet(struct nf_conn *ct,
enum ip_conntrack_info ctinfo,
unsigned int hooknum,
struct sk_buff *skb)
{
+ const struct nf_nat_l3proto *l3proto;
+ const struct nf_nat_l4proto *l4proto;
enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);
unsigned long statusbit;
enum nf_nat_manip_type mtype = HOOK2MANIP(hooknum);
@@ -396,129 +455,176 @@ unsigned int nf_nat_packet(struct nf_conn *ct,
/* We are aiming to look like inverse of other direction. */
nf_ct_invert_tuplepr(&target, &ct->tuplehash[!dir].tuple);
- if (!manip_pkt(target.dst.protonum, skb, 0, &target, mtype))
+ l3proto = __nf_nat_l3proto_find(target.src.l3num);
+ l4proto = __nf_nat_l4proto_find(target.src.l3num,
+ target.dst.protonum);
+ if (!l3proto->manip_pkt(skb, 0, l4proto, &target, mtype))
return NF_DROP;
}
return NF_ACCEPT;
}
EXPORT_SYMBOL_GPL(nf_nat_packet);
-/* Dir is direction ICMP is coming from (opposite to packet it contains) */
-int nf_nat_icmp_reply_translation(struct nf_conn *ct,
- enum ip_conntrack_info ctinfo,
- unsigned int hooknum,
- struct sk_buff *skb)
+struct nf_nat_proto_clean {
+ u8 l3proto;
+ u8 l4proto;
+ bool hash;
+};
+
+/* Clear NAT section of all conntracks, in case we're loaded again. */
+static int nf_nat_proto_clean(struct nf_conn *i, void *data)
{
- struct {
- struct icmphdr icmp;
- struct iphdr ip;
- } *inside;
- struct nf_conntrack_tuple target;
- int hdrlen = ip_hdrlen(skb);
- enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);
- unsigned long statusbit;
- enum nf_nat_manip_type manip = HOOK2MANIP(hooknum);
+ const struct nf_nat_proto_clean *clean = data;
+ struct nf_conn_nat *nat = nfct_nat(i);
- if (!skb_make_writable(skb, hdrlen + sizeof(*inside)))
+ if (!nat)
return 0;
-
- inside = (void *)skb->data + hdrlen;
-
- /* We're actually going to mangle it beyond trivial checksum
- adjustment, so make sure the current checksum is correct. */
- if (nf_ip_checksum(skb, hooknum, hdrlen, 0))
+ if (!(i->status & IPS_SRC_NAT_DONE))
+ return 0;
+ if ((clean->l3proto && nf_ct_l3num(i) != clean->l3proto) ||
+ (clean->l4proto && nf_ct_protonum(i) != clean->l4proto))
return 0;
- /* Must be RELATED */
- NF_CT_ASSERT(skb->nfctinfo == IP_CT_RELATED ||
- skb->nfctinfo == IP_CT_RELATED_REPLY);
-
- /* Redirects on non-null nats must be dropped, else they'll
- start talking to each other without our translation, and be
- confused... --RR */
- if (inside->icmp.type == ICMP_REDIRECT) {
- /* If NAT isn't finished, assume it and drop. */
- if ((ct->status & IPS_NAT_DONE_MASK) != IPS_NAT_DONE_MASK)
- return 0;
-
- if (ct->status & IPS_NAT_MASK)
- return 0;
+ if (clean->hash) {
+ spin_lock_bh(&nf_nat_lock);
+ hlist_del_rcu(&nat->bysource);
+ spin_unlock_bh(&nf_nat_lock);
+ } else {
+ memset(nat, 0, sizeof(*nat));
+ i->status &= ~(IPS_NAT_MASK | IPS_NAT_DONE_MASK |
+ IPS_SEQ_ADJUST);
}
+ return 0;
+}
- if (manip == NF_NAT_MANIP_SRC)
- statusbit = IPS_SRC_NAT;
- else
- statusbit = IPS_DST_NAT;
-
- /* Invert if this is reply dir. */
- if (dir == IP_CT_DIR_REPLY)
- statusbit ^= IPS_NAT_MASK;
-
- if (!(ct->status & statusbit))
- return 1;
-
- pr_debug("icmp_reply_translation: translating error %p manip %u "
- "dir %s\n", skb, manip,
- dir == IP_CT_DIR_ORIGINAL ? "ORIG" : "REPLY");
-
- /* Change inner back to look like incoming packet. We do the
- opposite manip on this hook to normal, because it might not
- pass all hooks (locally-generated ICMP). Consider incoming
- packet: PREROUTING (DST manip), routing produces ICMP, goes
- through POSTROUTING (which must correct the DST manip). */
- if (!manip_pkt(inside->ip.protocol, skb, hdrlen + sizeof(inside->icmp),
- &ct->tuplehash[!dir].tuple, !manip))
- return 0;
+static void nf_nat_l4proto_clean(u8 l3proto, u8 l4proto)
+{
+ struct nf_nat_proto_clean clean = {
+ .l3proto = l3proto,
+ .l4proto = l4proto,
+ };
+ struct net *net;
+
+ rtnl_lock();
+ /* Step 1 - remove from bysource hash */
+ clean.hash = true;
+ for_each_net(net)
+ nf_ct_iterate_cleanup(net, nf_nat_proto_clean, &clean);
+ synchronize_rcu();
- if (skb->ip_summed != CHECKSUM_PARTIAL) {
- /* Reloading "inside" here since manip_pkt inner. */
- inside = (void *)skb->data + hdrlen;
- inside->icmp.checksum = 0;
- inside->icmp.checksum =
- csum_fold(skb_checksum(skb, hdrlen,
- skb->len - hdrlen, 0));
- }
+ /* Step 2 - clean NAT section */
+ clean.hash = false;
+ for_each_net(net)
+ nf_ct_iterate_cleanup(net, nf_nat_proto_clean, &clean);
+ rtnl_unlock();
+}
- /* Change outer to look the reply to an incoming packet
- * (proto 0 means don't invert per-proto part). */
- nf_ct_invert_tuplepr(&target, &ct->tuplehash[!dir].tuple);
- if (!manip_pkt(0, skb, 0, &target, manip))
- return 0;
+static void nf_nat_l3proto_clean(u8 l3proto)
+{
+ struct nf_nat_proto_clean clean = {
+ .l3proto = l3proto,
+ };
+ struct net *net;
+
+ rtnl_lock();
+ /* Step 1 - remove from bysource hash */
+ clean.hash = true;
+ for_each_net(net)
+ nf_ct_iterate_cleanup(net, nf_nat_proto_clean, &clean);
+ synchronize_rcu();
- return 1;
+ /* Step 2 - clean NAT section */
+ clean.hash = false;
+ for_each_net(net)
+ nf_ct_iterate_cleanup(net, nf_nat_proto_clean, &clean);
+ rtnl_unlock();
}
-EXPORT_SYMBOL_GPL(nf_nat_icmp_reply_translation);
/* Protocol registration. */
-int nf_nat_protocol_register(const struct nf_nat_protocol *proto)
+int nf_nat_l4proto_register(u8 l3proto, const struct nf_nat_l4proto *l4proto)
{
+ const struct nf_nat_l4proto **l4protos;
+ unsigned int i;
int ret = 0;
- spin_lock_bh(&nf_nat_lock);
+ mutex_lock(&nf_nat_proto_mutex);
+ if (nf_nat_l4protos[l3proto] == NULL) {
+ l4protos = kmalloc(IPPROTO_MAX * sizeof(struct nf_nat_l4proto *),
+ GFP_KERNEL);
+ if (l4protos == NULL) {
+ ret = -ENOMEM;
+ goto out;
+ }
+
+ for (i = 0; i < IPPROTO_MAX; i++)
+ RCU_INIT_POINTER(l4protos[i], &nf_nat_l4proto_unknown);
+
+ /* Before making proto_array visible to lockless readers,
+ * we must make sure its content is committed to memory.
+ */
+ smp_wmb();
+
+ nf_nat_l4protos[l3proto] = l4protos;
+ }
+
if (rcu_dereference_protected(
- nf_nat_protos[proto->protonum],
- lockdep_is_held(&nf_nat_lock)
- ) != &nf_nat_unknown_protocol) {
+ nf_nat_l4protos[l3proto][l4proto->l4proto],
+ lockdep_is_held(&nf_nat_proto_mutex)
+ ) != &nf_nat_l4proto_unknown) {
ret = -EBUSY;
goto out;
}
- RCU_INIT_POINTER(nf_nat_protos[proto->protonum], proto);
+ RCU_INIT_POINTER(nf_nat_l4protos[l3proto][l4proto->l4proto], l4proto);
out:
- spin_unlock_bh(&nf_nat_lock);
+ mutex_unlock(&nf_nat_proto_mutex);
return ret;
}
-EXPORT_SYMBOL(nf_nat_protocol_register);
+EXPORT_SYMBOL_GPL(nf_nat_l4proto_register);
/* No one stores the protocol anywhere; simply delete it. */
-void nf_nat_protocol_unregister(const struct nf_nat_protocol *proto)
+void nf_nat_l4proto_unregister(u8 l3proto, const struct nf_nat_l4proto *l4proto)
{
- spin_lock_bh(&nf_nat_lock);
- RCU_INIT_POINTER(nf_nat_protos[proto->protonum],
- &nf_nat_unknown_protocol);
- spin_unlock_bh(&nf_nat_lock);
+ mutex_lock(&nf_nat_proto_mutex);
+ RCU_INIT_POINTER(nf_nat_l4protos[l3proto][l4proto->l4proto],
+ &nf_nat_l4proto_unknown);
+ mutex_unlock(&nf_nat_proto_mutex);
synchronize_rcu();
+
+ nf_nat_l4proto_clean(l3proto, l4proto->l4proto);
+}
+EXPORT_SYMBOL_GPL(nf_nat_l4proto_unregister);
+
+int nf_nat_l3proto_register(const struct nf_nat_l3proto *l3proto)
+{
+ int err;
+
+ err = nf_ct_l3proto_try_module_get(l3proto->l3proto);
+ if (err < 0)
+ return err;
+
+ mutex_lock(&nf_nat_proto_mutex);
+ RCU_INIT_POINTER(nf_nat_l4protos[l3proto->l3proto][IPPROTO_TCP],
+ &nf_nat_l4proto_tcp);
+ RCU_INIT_POINTER(nf_nat_l4protos[l3proto->l3proto][IPPROTO_UDP],
+ &nf_nat_l4proto_udp);
+ mutex_unlock(&nf_nat_proto_mutex);
+
+ RCU_INIT_POINTER(nf_nat_l3protos[l3proto->l3proto], l3proto);
+ return 0;
+}
+EXPORT_SYMBOL_GPL(nf_nat_l3proto_register);
+
+void nf_nat_l3proto_unregister(const struct nf_nat_l3proto *l3proto)
+{
+ mutex_lock(&nf_nat_proto_mutex);
+ RCU_INIT_POINTER(nf_nat_l3protos[l3proto->l3proto], NULL);
+ mutex_unlock(&nf_nat_proto_mutex);
+ synchronize_rcu();
+
+ nf_nat_l3proto_clean(l3proto->l3proto);
+ nf_ct_l3proto_module_put(l3proto->l3proto);
}
-EXPORT_SYMBOL(nf_nat_protocol_unregister);
+EXPORT_SYMBOL_GPL(nf_nat_l3proto_unregister);
/* No one using conntrack by the time this called. */
static void nf_nat_cleanup_conntrack(struct nf_conn *ct)
@@ -570,34 +676,36 @@ static const struct nla_policy protonat_nla_policy[CTA_PROTONAT_MAX+1] = {
static int nfnetlink_parse_nat_proto(struct nlattr *attr,
const struct nf_conn *ct,
- struct nf_nat_ipv4_range *range)
+ struct nf_nat_range *range)
{
struct nlattr *tb[CTA_PROTONAT_MAX+1];
- const struct nf_nat_protocol *npt;
+ const struct nf_nat_l4proto *l4proto;
int err;
err = nla_parse_nested(tb, CTA_PROTONAT_MAX, attr, protonat_nla_policy);
if (err < 0)
return err;
- rcu_read_lock();
- npt = __nf_nat_proto_find(nf_ct_protonum(ct));
- if (npt->nlattr_to_range)
- err = npt->nlattr_to_range(tb, range);
- rcu_read_unlock();
+ l4proto = __nf_nat_l4proto_find(nf_ct_l3num(ct), nf_ct_protonum(ct));
+ if (l4proto->nlattr_to_range)
+ err = l4proto->nlattr_to_range(tb, range);
+
return err;
}
static const struct nla_policy nat_nla_policy[CTA_NAT_MAX+1] = {
- [CTA_NAT_MINIP] = { .type = NLA_U32 },
- [CTA_NAT_MAXIP] = { .type = NLA_U32 },
+ [CTA_NAT_V4_MINIP] = { .type = NLA_U32 },
+ [CTA_NAT_V4_MAXIP] = { .type = NLA_U32 },
+ [CTA_NAT_V6_MINIP] = { .len = sizeof(struct in6_addr) },
+ [CTA_NAT_V6_MAXIP] = { .len = sizeof(struct in6_addr) },
[CTA_NAT_PROTO] = { .type = NLA_NESTED },
};
static int
nfnetlink_parse_nat(const struct nlattr *nat,
- const struct nf_conn *ct, struct nf_nat_ipv4_range *range)
+ const struct nf_conn *ct, struct nf_nat_range *range)
{
+ const struct nf_nat_l3proto *l3proto;
struct nlattr *tb[CTA_NAT_MAX+1];
int err;
@@ -607,25 +715,23 @@ nfnetlink_parse_nat(const struct nlattr *nat,
if (err < 0)
return err;
- if (tb[CTA_NAT_MINIP])
- range->min_ip = nla_get_be32(tb[CTA_NAT_MINIP]);
-
- if (!tb[CTA_NAT_MAXIP])
- range->max_ip = range->min_ip;
- else
- range->max_ip = nla_get_be32(tb[CTA_NAT_MAXIP]);
-
- if (range->min_ip)
- range->flags |= NF_NAT_RANGE_MAP_IPS;
+ rcu_read_lock();
+ l3proto = __nf_nat_l3proto_find(nf_ct_l3num(ct));
+ if (l3proto == NULL) {
+ err = -EAGAIN;
+ goto out;
+ }
+ err = l3proto->nlattr_to_range(tb, range);
+ if (err < 0)
+ goto out;
if (!tb[CTA_NAT_PROTO])
- return 0;
+ goto out;
err = nfnetlink_parse_nat_proto(tb[CTA_NAT_PROTO], ct, range);
- if (err < 0)
- return err;
-
- return 0;
+out:
+ rcu_read_unlock();
+ return err;
}
static int
@@ -633,10 +739,12 @@ nfnetlink_parse_nat_setup(struct nf_conn *ct,
enum nf_nat_manip_type manip,
const struct nlattr *attr)
{
- struct nf_nat_ipv4_range range;
+ struct nf_nat_range range;
+ int err;
- if (nfnetlink_parse_nat(attr, ct, &range) < 0)
- return -EINVAL;
+ err = nfnetlink_parse_nat(attr, ct, &range);
+ if (err < 0)
+ return err;
if (nf_nat_initialized(ct, manip))
return -EEXIST;
@@ -655,30 +763,20 @@ nfnetlink_parse_nat_setup(struct nf_conn *ct,
static int __net_init nf_nat_net_init(struct net *net)
{
/* Leave them the same for the moment. */
- net->ipv4.nat_htable_size = net->ct.htable_size;
- net->ipv4.nat_bysource = nf_ct_alloc_hashtable(&net->ipv4.nat_htable_size, 0);
- if (!net->ipv4.nat_bysource)
+ net->ct.nat_htable_size = net->ct.htable_size;
+ net->ct.nat_bysource = nf_ct_alloc_hashtable(&net->ct.nat_htable_size, 0);
+ if (!net->ct.nat_bysource)
return -ENOMEM;
return 0;
}
-/* Clear NAT section of all conntracks, in case we're loaded again. */
-static int clean_nat(struct nf_conn *i, void *data)
-{
- struct nf_conn_nat *nat = nfct_nat(i);
-
- if (!nat)
- return 0;
- memset(nat, 0, sizeof(*nat));
- i->status &= ~(IPS_NAT_MASK | IPS_NAT_DONE_MASK | IPS_SEQ_ADJUST);
- return 0;
-}
-
static void __net_exit nf_nat_net_exit(struct net *net)
{
- nf_ct_iterate_cleanup(net, &clean_nat, NULL);
+ struct nf_nat_proto_clean clean = {};
+
+ nf_ct_iterate_cleanup(net, &nf_nat_proto_clean, &clean);
synchronize_rcu();
- nf_ct_free_hashtable(net->ipv4.nat_bysource, net->ipv4.nat_htable_size);
+ nf_ct_free_hashtable(net->ct.nat_bysource, net->ct.nat_htable_size);
}
static struct pernet_operations nf_nat_net_ops = {
@@ -697,11 +795,8 @@ static struct nfq_ct_nat_hook nfq_ct_nat = {
static int __init nf_nat_init(void)
{
- size_t i;
int ret;
- need_ipv4_conntrack();
-
ret = nf_ct_extend_register(&nat_extend);
if (ret < 0) {
printk(KERN_ERR "nf_nat_core: Unable to register extension\n");
@@ -712,22 +807,11 @@ static int __init nf_nat_init(void)
if (ret < 0)
goto cleanup_extend;
- /* Sew in builtin protocols. */
- spin_lock_bh(&nf_nat_lock);
- for (i = 0; i < MAX_IP_NAT_PROTO; i++)
- RCU_INIT_POINTER(nf_nat_protos[i], &nf_nat_unknown_protocol);
- RCU_INIT_POINTER(nf_nat_protos[IPPROTO_TCP], &nf_nat_protocol_tcp);
- RCU_INIT_POINTER(nf_nat_protos[IPPROTO_UDP], &nf_nat_protocol_udp);
- RCU_INIT_POINTER(nf_nat_protos[IPPROTO_ICMP], &nf_nat_protocol_icmp);
- spin_unlock_bh(&nf_nat_lock);
+ nf_ct_helper_expectfn_register(&follow_master_nat);
/* Initialize fake conntrack so that NAT will skip it */
nf_ct_untracked_status_or(IPS_NAT_DONE_MASK);
- l3proto = nf_ct_l3proto_find_get((u_int16_t)AF_INET);
-
- nf_ct_helper_expectfn_register(&follow_master_nat);
-
BUG_ON(nf_nat_seq_adjust_hook != NULL);
RCU_INIT_POINTER(nf_nat_seq_adjust_hook, nf_nat_seq_adjust);
BUG_ON(nfnetlink_parse_nat_setup_hook != NULL);
@@ -736,6 +820,10 @@ static int __init nf_nat_init(void)
BUG_ON(nf_ct_nat_offset != NULL);
RCU_INIT_POINTER(nf_ct_nat_offset, nf_nat_get_offset);
RCU_INIT_POINTER(nfq_ct_nat_hook, &nfq_ct_nat);
+#ifdef CONFIG_XFRM
+ BUG_ON(nf_nat_decode_session_hook != NULL);
+ RCU_INIT_POINTER(nf_nat_decode_session_hook, __nf_nat_decode_session);
+#endif
return 0;
cleanup_extend:
@@ -745,19 +833,24 @@ static int __init nf_nat_init(void)
static void __exit nf_nat_cleanup(void)
{
+ unsigned int i;
+
unregister_pernet_subsys(&nf_nat_net_ops);
- nf_ct_l3proto_put(l3proto);
nf_ct_extend_unregister(&nat_extend);
nf_ct_helper_expectfn_unregister(&follow_master_nat);
RCU_INIT_POINTER(nf_nat_seq_adjust_hook, NULL);
RCU_INIT_POINTER(nfnetlink_parse_nat_setup_hook, NULL);
RCU_INIT_POINTER(nf_ct_nat_offset, NULL);
RCU_INIT_POINTER(nfq_ct_nat_hook, NULL);
+#ifdef CONFIG_XFRM
+ RCU_INIT_POINTER(nf_nat_decode_session_hook, NULL);
+#endif
+ for (i = 0; i < NFPROTO_NUMPROTO; i++)
+ kfree(nf_nat_l4protos[i]);
synchronize_net();
}
MODULE_LICENSE("GPL");
-MODULE_ALIAS("nf-nat-ipv4");
module_init(nf_nat_init);
module_exit(nf_nat_cleanup);
diff --git a/net/ipv4/netfilter/nf_nat_ftp.c b/net/netfilter/nf_nat_ftp.c
index e462a957d080..e839b97b2863 100644
--- a/net/ipv4/netfilter/nf_nat_ftp.c
+++ b/net/netfilter/nf_nat_ftp.c
@@ -10,12 +10,11 @@
#include <linux/module.h>
#include <linux/moduleparam.h>
-#include <linux/ip.h>
+#include <linux/inet.h>
#include <linux/tcp.h>
#include <linux/netfilter_ipv4.h>
#include <net/netfilter/nf_nat.h>
#include <net/netfilter/nf_nat_helper.h>
-#include <net/netfilter/nf_nat_rule.h>
#include <net/netfilter/nf_conntrack_helper.h>
#include <net/netfilter/nf_conntrack_expect.h>
#include <linux/netfilter/nf_conntrack_ftp.h>
@@ -27,22 +26,27 @@ MODULE_ALIAS("ip_nat_ftp");
/* FIXME: Time out? --RR */
-static int nf_nat_ftp_fmt_cmd(enum nf_ct_ftp_type type,
+static int nf_nat_ftp_fmt_cmd(struct nf_conn *ct, enum nf_ct_ftp_type type,
char *buffer, size_t buflen,
- __be32 addr, u16 port)
+ union nf_inet_addr *addr, u16 port)
{
switch (type) {
case NF_CT_FTP_PORT:
case NF_CT_FTP_PASV:
return snprintf(buffer, buflen, "%u,%u,%u,%u,%u,%u",
- ((unsigned char *)&addr)[0],
- ((unsigned char *)&addr)[1],
- ((unsigned char *)&addr)[2],
- ((unsigned char *)&addr)[3],
+ ((unsigned char *)&addr->ip)[0],
+ ((unsigned char *)&addr->ip)[1],
+ ((unsigned char *)&addr->ip)[2],
+ ((unsigned char *)&addr->ip)[3],
port >> 8,
port & 0xFF);
case NF_CT_FTP_EPRT:
- return snprintf(buffer, buflen, "|1|%pI4|%u|", &addr, port);
+ if (nf_ct_l3num(ct) == NFPROTO_IPV4)
+ return snprintf(buffer, buflen, "|1|%pI4|%u|",
+ &addr->ip, port);
+ else
+ return snprintf(buffer, buflen, "|2|%pI6|%u|",
+ &addr->ip6, port);
case NF_CT_FTP_EPSV:
return snprintf(buffer, buflen, "|||%u|", port);
}
@@ -55,21 +59,22 @@ static int nf_nat_ftp_fmt_cmd(enum nf_ct_ftp_type type,
static unsigned int nf_nat_ftp(struct sk_buff *skb,
enum ip_conntrack_info ctinfo,
enum nf_ct_ftp_type type,
+ unsigned int protoff,
unsigned int matchoff,
unsigned int matchlen,
struct nf_conntrack_expect *exp)
{
- __be32 newip;
+ union nf_inet_addr newaddr;
u_int16_t port;
int dir = CTINFO2DIR(ctinfo);
struct nf_conn *ct = exp->master;
- char buffer[sizeof("|1|255.255.255.255|65535|")];
+ char buffer[sizeof("|1||65535|") + INET6_ADDRSTRLEN];
unsigned int buflen;
pr_debug("FTP_NAT: type %i, off %u len %u\n", type, matchoff, matchlen);
/* Connection will come from wherever this packet goes, hence !dir */
- newip = ct->tuplehash[!dir].tuple.dst.u3.ip;
+ newaddr = ct->tuplehash[!dir].tuple.dst.u3;
exp->saved_proto.tcp.port = exp->tuple.dst.u.tcp.port;
exp->dir = !dir;
@@ -94,13 +99,14 @@ static unsigned int nf_nat_ftp(struct sk_buff *skb,
if (port == 0)
return NF_DROP;
- buflen = nf_nat_ftp_fmt_cmd(type, buffer, sizeof(buffer), newip, port);
+ buflen = nf_nat_ftp_fmt_cmd(ct, type, buffer, sizeof(buffer),
+ &newaddr, port);
if (!buflen)
goto out;
pr_debug("calling nf_nat_mangle_tcp_packet\n");
- if (!nf_nat_mangle_tcp_packet(skb, ct, ctinfo, matchoff,
+ if (!nf_nat_mangle_tcp_packet(skb, ct, ctinfo, protoff, matchoff,
matchlen, buffer, buflen))
goto out;
diff --git a/net/ipv4/netfilter/nf_nat_helper.c b/net/netfilter/nf_nat_helper.c
index 2e59ad0b90ca..23c2b38676a6 100644
--- a/net/ipv4/netfilter/nf_nat_helper.c
+++ b/net/netfilter/nf_nat_helper.c
@@ -1,4 +1,4 @@
-/* ip_nat_helper.c - generic support functions for NAT helpers
+/* nf_nat_helper.c - generic support functions for NAT helpers
*
* (C) 2000-2002 Harald Welte <laforge@netfilter.org>
* (C) 2003-2006 Netfilter Core Team <coreteam@netfilter.org>
@@ -9,23 +9,19 @@
*/
#include <linux/module.h>
#include <linux/gfp.h>
-#include <linux/kmod.h>
#include <linux/types.h>
-#include <linux/timer.h>
#include <linux/skbuff.h>
#include <linux/tcp.h>
#include <linux/udp.h>
-#include <net/checksum.h>
#include <net/tcp.h>
-#include <net/route.h>
-#include <linux/netfilter_ipv4.h>
#include <net/netfilter/nf_conntrack.h>
#include <net/netfilter/nf_conntrack_helper.h>
#include <net/netfilter/nf_conntrack_ecache.h>
#include <net/netfilter/nf_conntrack_expect.h>
#include <net/netfilter/nf_nat.h>
-#include <net/netfilter/nf_nat_protocol.h>
+#include <net/netfilter/nf_nat_l3proto.h>
+#include <net/netfilter/nf_nat_l4proto.h>
#include <net/netfilter/nf_nat_core.h>
#include <net/netfilter/nf_nat_helper.h>
@@ -90,7 +86,6 @@ s16 nf_nat_get_offset(const struct nf_conn *ct,
return offset;
}
-EXPORT_SYMBOL_GPL(nf_nat_get_offset);
/* Frobs data inside this packet, which is linear. */
static void mangle_contents(struct sk_buff *skb,
@@ -125,9 +120,13 @@ static void mangle_contents(struct sk_buff *skb,
__skb_trim(skb, skb->len + rep_len - match_len);
}
- /* fix IP hdr checksum information */
- ip_hdr(skb)->tot_len = htons(skb->len);
- ip_send_check(ip_hdr(skb));
+ if (nf_ct_l3num((struct nf_conn *)skb->nfct) == NFPROTO_IPV4) {
+ /* fix IP hdr checksum information */
+ ip_hdr(skb)->tot_len = htons(skb->len);
+ ip_send_check(ip_hdr(skb));
+ } else
+ ipv6_hdr(skb)->payload_len =
+ htons(skb->len - sizeof(struct ipv6hdr));
}
/* Unusual, but possible case. */
@@ -166,35 +165,6 @@ void nf_nat_tcp_seq_adjust(struct sk_buff *skb, struct nf_conn *ct,
}
EXPORT_SYMBOL_GPL(nf_nat_tcp_seq_adjust);
-static void nf_nat_csum(struct sk_buff *skb, const struct iphdr *iph, void *data,
- int datalen, __sum16 *check, int oldlen)
-{
- struct rtable *rt = skb_rtable(skb);
-
- if (skb->ip_summed != CHECKSUM_PARTIAL) {
- if (!(rt->rt_flags & RTCF_LOCAL) &&
- (!skb->dev || skb->dev->features & NETIF_F_V4_CSUM)) {
- skb->ip_summed = CHECKSUM_PARTIAL;
- skb->csum_start = skb_headroom(skb) +
- skb_network_offset(skb) +
- iph->ihl * 4;
- skb->csum_offset = (void *)check - data;
- *check = ~csum_tcpudp_magic(iph->saddr, iph->daddr,
- datalen, iph->protocol, 0);
- } else {
- *check = 0;
- *check = csum_tcpudp_magic(iph->saddr, iph->daddr,
- datalen, iph->protocol,
- csum_partial(data, datalen,
- 0));
- if (iph->protocol == IPPROTO_UDP && !*check)
- *check = CSUM_MANGLED_0;
- }
- } else
- inet_proto_csum_replace2(check, skb,
- htons(oldlen), htons(datalen), 1);
-}
-
/* Generic function for mangling variable-length address changes inside
* NATed TCP connections (like the PORT XXX,XXX,XXX,XXX,XXX,XXX
* command in FTP).
@@ -206,12 +176,13 @@ static void nf_nat_csum(struct sk_buff *skb, const struct iphdr *iph, void *data
int __nf_nat_mangle_tcp_packet(struct sk_buff *skb,
struct nf_conn *ct,
enum ip_conntrack_info ctinfo,
+ unsigned int protoff,
unsigned int match_offset,
unsigned int match_len,
const char *rep_buffer,
unsigned int rep_len, bool adjust)
{
- struct iphdr *iph;
+ const struct nf_nat_l3proto *l3proto;
struct tcphdr *tcph;
int oldlen, datalen;
@@ -225,15 +196,17 @@ int __nf_nat_mangle_tcp_packet(struct sk_buff *skb,
SKB_LINEAR_ASSERT(skb);
- iph = ip_hdr(skb);
- tcph = (void *)iph + iph->ihl*4;
+ tcph = (void *)skb->data + protoff;
- oldlen = skb->len - iph->ihl*4;
- mangle_contents(skb, iph->ihl*4 + tcph->doff*4,
+ oldlen = skb->len - protoff;
+ mangle_contents(skb, protoff + tcph->doff*4,
match_offset, match_len, rep_buffer, rep_len);
- datalen = skb->len - iph->ihl*4;
- nf_nat_csum(skb, iph, tcph, datalen, &tcph->check, oldlen);
+ datalen = skb->len - protoff;
+
+ l3proto = __nf_nat_l3proto_find(nf_ct_l3num(ct));
+ l3proto->csum_recalc(skb, IPPROTO_TCP, tcph, &tcph->check,
+ datalen, oldlen);
if (adjust && rep_len != match_len)
nf_nat_set_seq_adjust(ct, ctinfo, tcph->seq,
@@ -257,12 +230,13 @@ int
nf_nat_mangle_udp_packet(struct sk_buff *skb,
struct nf_conn *ct,
enum ip_conntrack_info ctinfo,
+ unsigned int protoff,
unsigned int match_offset,
unsigned int match_len,
const char *rep_buffer,
unsigned int rep_len)
{
- struct iphdr *iph;
+ const struct nf_nat_l3proto *l3proto;
struct udphdr *udph;
int datalen, oldlen;
@@ -274,22 +248,23 @@ nf_nat_mangle_udp_packet(struct sk_buff *skb,
!enlarge_skb(skb, rep_len - match_len))
return 0;
- iph = ip_hdr(skb);
- udph = (void *)iph + iph->ihl*4;
+ udph = (void *)skb->data + protoff;
- oldlen = skb->len - iph->ihl*4;
- mangle_contents(skb, iph->ihl*4 + sizeof(*udph),
+ oldlen = skb->len - protoff;
+ mangle_contents(skb, protoff + sizeof(*udph),
match_offset, match_len, rep_buffer, rep_len);
/* update the length of the UDP packet */
- datalen = skb->len - iph->ihl*4;
+ datalen = skb->len - protoff;
udph->len = htons(datalen);
/* fix udp checksum if udp checksum was previously calculated */
if (!udph->check && skb->ip_summed != CHECKSUM_PARTIAL)
return 1;
- nf_nat_csum(skb, iph, udph, datalen, &udph->check, oldlen);
+ l3proto = __nf_nat_l3proto_find(nf_ct_l3num(ct));
+ l3proto->csum_recalc(skb, IPPROTO_UDP, udph, &udph->check,
+ datalen, oldlen);
return 1;
}
@@ -341,6 +316,7 @@ sack_adjust(struct sk_buff *skb,
/* TCP SACK sequence number adjustment */
static inline unsigned int
nf_nat_sack_adjust(struct sk_buff *skb,
+ unsigned int protoff,
struct tcphdr *tcph,
struct nf_conn *ct,
enum ip_conntrack_info ctinfo)
@@ -348,8 +324,8 @@ nf_nat_sack_adjust(struct sk_buff *skb,
unsigned int dir, optoff, optend;
struct nf_conn_nat *nat = nfct_nat(ct);
- optoff = ip_hdrlen(skb) + sizeof(struct tcphdr);
- optend = ip_hdrlen(skb) + tcph->doff * 4;
+ optoff = protoff + sizeof(struct tcphdr);
+ optend = protoff + tcph->doff * 4;
if (!skb_make_writable(skb, optend))
return 0;
@@ -387,7 +363,8 @@ nf_nat_sack_adjust(struct sk_buff *skb,
int
nf_nat_seq_adjust(struct sk_buff *skb,
struct nf_conn *ct,
- enum ip_conntrack_info ctinfo)
+ enum ip_conntrack_info ctinfo,
+ unsigned int protoff)
{
struct tcphdr *tcph;
int dir;
@@ -401,10 +378,10 @@ nf_nat_seq_adjust(struct sk_buff *skb,
this_way = &nat->seq[dir];
other_way = &nat->seq[!dir];
- if (!skb_make_writable(skb, ip_hdrlen(skb) + sizeof(*tcph)))
+ if (!skb_make_writable(skb, protoff + sizeof(*tcph)))
return 0;
- tcph = (void *)skb->data + ip_hdrlen(skb);
+ tcph = (void *)skb->data + protoff;
if (after(ntohl(tcph->seq), this_way->correction_pos))
seqoff = this_way->offset_after;
else
@@ -429,7 +406,7 @@ nf_nat_seq_adjust(struct sk_buff *skb,
tcph->seq = newseq;
tcph->ack_seq = newack;
- return nf_nat_sack_adjust(skb, tcph, ct, ctinfo);
+ return nf_nat_sack_adjust(skb, protoff, tcph, ct, ctinfo);
}
/* Setup NAT on this expected conntrack so it follows master. */
@@ -437,22 +414,22 @@ nf_nat_seq_adjust(struct sk_buff *skb,
void nf_nat_follow_master(struct nf_conn *ct,
struct nf_conntrack_expect *exp)
{
- struct nf_nat_ipv4_range range;
+ struct nf_nat_range range;
/* This must be a fresh one. */
BUG_ON(ct->status & IPS_NAT_DONE_MASK);
/* Change src to where master sends to */
range.flags = NF_NAT_RANGE_MAP_IPS;
- range.min_ip = range.max_ip
- = ct->master->tuplehash[!exp->dir].tuple.dst.u3.ip;
+ range.min_addr = range.max_addr
+ = ct->master->tuplehash[!exp->dir].tuple.dst.u3;
nf_nat_setup_info(ct, &range, NF_NAT_MANIP_SRC);
/* For DST manip, map port here to where it's expected. */
range.flags = (NF_NAT_RANGE_MAP_IPS | NF_NAT_RANGE_PROTO_SPECIFIED);
- range.min = range.max = exp->saved_proto;
- range.min_ip = range.max_ip
- = ct->master->tuplehash[!exp->dir].tuple.src.u3.ip;
+ range.min_proto = range.max_proto = exp->saved_proto;
+ range.min_addr = range.max_addr
+ = ct->master->tuplehash[!exp->dir].tuple.src.u3;
nf_nat_setup_info(ct, &range, NF_NAT_MANIP_DST);
}
EXPORT_SYMBOL(nf_nat_follow_master);
diff --git a/net/ipv4/netfilter/nf_nat_irc.c b/net/netfilter/nf_nat_irc.c
index 979ae165f4ef..1fedee6e7fb6 100644
--- a/net/ipv4/netfilter/nf_nat_irc.c
+++ b/net/netfilter/nf_nat_irc.c
@@ -17,7 +17,6 @@
#include <net/netfilter/nf_nat.h>
#include <net/netfilter/nf_nat_helper.h>
-#include <net/netfilter/nf_nat_rule.h>
#include <net/netfilter/nf_conntrack_helper.h>
#include <net/netfilter/nf_conntrack_expect.h>
#include <linux/netfilter/nf_conntrack_irc.h>
@@ -29,12 +28,12 @@ MODULE_ALIAS("ip_nat_irc");
static unsigned int help(struct sk_buff *skb,
enum ip_conntrack_info ctinfo,
+ unsigned int protoff,
unsigned int matchoff,
unsigned int matchlen,
struct nf_conntrack_expect *exp)
{
char buffer[sizeof("4294967296 65635")];
- u_int32_t ip;
u_int16_t port;
unsigned int ret;
@@ -60,13 +59,8 @@ static unsigned int help(struct sk_buff *skb,
if (port == 0)
return NF_DROP;
- ip = ntohl(exp->master->tuplehash[IP_CT_DIR_REPLY].tuple.dst.u3.ip);
- sprintf(buffer, "%u %u", ip, port);
- pr_debug("nf_nat_irc: inserting '%s' == %pI4, port %u\n",
- buffer, &ip, port);
-
ret = nf_nat_mangle_tcp_packet(skb, exp->master, ctinfo,
- matchoff, matchlen, buffer,
+ protoff, matchoff, matchlen, buffer,
strlen(buffer));
if (ret != NF_ACCEPT)
nf_ct_unexpect_related(exp);
diff --git a/net/ipv4/netfilter/nf_nat_proto_common.c b/net/netfilter/nf_nat_proto_common.c
index 9993bc93e102..9baaf734c142 100644
--- a/net/ipv4/netfilter/nf_nat_proto_common.c
+++ b/net/netfilter/nf_nat_proto_common.c
@@ -9,20 +9,18 @@
#include <linux/types.h>
#include <linux/random.h>
-#include <linux/ip.h>
-
#include <linux/netfilter.h>
#include <linux/export.h>
-#include <net/secure_seq.h>
+
#include <net/netfilter/nf_nat.h>
#include <net/netfilter/nf_nat_core.h>
-#include <net/netfilter/nf_nat_rule.h>
-#include <net/netfilter/nf_nat_protocol.h>
+#include <net/netfilter/nf_nat_l3proto.h>
+#include <net/netfilter/nf_nat_l4proto.h>
-bool nf_nat_proto_in_range(const struct nf_conntrack_tuple *tuple,
- enum nf_nat_manip_type maniptype,
- const union nf_conntrack_man_proto *min,
- const union nf_conntrack_man_proto *max)
+bool nf_nat_l4proto_in_range(const struct nf_conntrack_tuple *tuple,
+ enum nf_nat_manip_type maniptype,
+ const union nf_conntrack_man_proto *min,
+ const union nf_conntrack_man_proto *max)
{
__be16 port;
@@ -34,13 +32,14 @@ bool nf_nat_proto_in_range(const struct nf_conntrack_tuple *tuple,
return ntohs(port) >= ntohs(min->all) &&
ntohs(port) <= ntohs(max->all);
}
-EXPORT_SYMBOL_GPL(nf_nat_proto_in_range);
+EXPORT_SYMBOL_GPL(nf_nat_l4proto_in_range);
-void nf_nat_proto_unique_tuple(struct nf_conntrack_tuple *tuple,
- const struct nf_nat_ipv4_range *range,
- enum nf_nat_manip_type maniptype,
- const struct nf_conn *ct,
- u_int16_t *rover)
+void nf_nat_l4proto_unique_tuple(const struct nf_nat_l3proto *l3proto,
+ struct nf_conntrack_tuple *tuple,
+ const struct nf_nat_range *range,
+ enum nf_nat_manip_type maniptype,
+ const struct nf_conn *ct,
+ u16 *rover)
{
unsigned int range_size, min, i;
__be16 *portptr;
@@ -71,15 +70,14 @@ void nf_nat_proto_unique_tuple(struct nf_conntrack_tuple *tuple,
range_size = 65535 - 1024 + 1;
}
} else {
- min = ntohs(range->min.all);
- range_size = ntohs(range->max.all) - min + 1;
+ min = ntohs(range->min_proto.all);
+ range_size = ntohs(range->max_proto.all) - min + 1;
}
if (range->flags & NF_NAT_RANGE_PROTO_RANDOM)
- off = secure_ipv4_port_ephemeral(tuple->src.u3.ip, tuple->dst.u3.ip,
- maniptype == NF_NAT_MANIP_SRC
- ? tuple->dst.u.all
- : tuple->src.u.all);
+ off = l3proto->secure_port(tuple, maniptype == NF_NAT_MANIP_SRC
+ ? tuple->dst.u.all
+ : tuple->src.u.all);
else
off = *rover;
@@ -93,22 +91,22 @@ void nf_nat_proto_unique_tuple(struct nf_conntrack_tuple *tuple,
}
return;
}
-EXPORT_SYMBOL_GPL(nf_nat_proto_unique_tuple);
+EXPORT_SYMBOL_GPL(nf_nat_l4proto_unique_tuple);
#if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE)
-int nf_nat_proto_nlattr_to_range(struct nlattr *tb[],
- struct nf_nat_ipv4_range *range)
+int nf_nat_l4proto_nlattr_to_range(struct nlattr *tb[],
+ struct nf_nat_range *range)
{
if (tb[CTA_PROTONAT_PORT_MIN]) {
- range->min.all = nla_get_be16(tb[CTA_PROTONAT_PORT_MIN]);
- range->max.all = range->min.tcp.port;
+ range->min_proto.all = nla_get_be16(tb[CTA_PROTONAT_PORT_MIN]);
+ range->max_proto.all = range->min_proto.all;
range->flags |= NF_NAT_RANGE_PROTO_SPECIFIED;
}
if (tb[CTA_PROTONAT_PORT_MAX]) {
- range->max.all = nla_get_be16(tb[CTA_PROTONAT_PORT_MAX]);
+ range->max_proto.all = nla_get_be16(tb[CTA_PROTONAT_PORT_MAX]);
range->flags |= NF_NAT_RANGE_PROTO_SPECIFIED;
}
return 0;
}
-EXPORT_SYMBOL_GPL(nf_nat_proto_nlattr_to_range);
+EXPORT_SYMBOL_GPL(nf_nat_l4proto_nlattr_to_range);
#endif
diff --git a/net/ipv4/netfilter/nf_nat_proto_dccp.c b/net/netfilter/nf_nat_proto_dccp.c
index 3f67138d187c..c8be2cdac0bf 100644
--- a/net/ipv4/netfilter/nf_nat_proto_dccp.c
+++ b/net/netfilter/nf_nat_proto_dccp.c
@@ -1,7 +1,7 @@
/*
* DCCP NAT protocol helper
*
- * Copyright (c) 2005, 2006. 2008 Patrick McHardy <kaber@trash.net>
+ * Copyright (c) 2005, 2006, 2008 Patrick McHardy <kaber@trash.net>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
@@ -13,35 +13,34 @@
#include <linux/module.h>
#include <linux/init.h>
#include <linux/skbuff.h>
-#include <linux/ip.h>
#include <linux/dccp.h>
#include <net/netfilter/nf_conntrack.h>
#include <net/netfilter/nf_nat.h>
-#include <net/netfilter/nf_nat_protocol.h>
+#include <net/netfilter/nf_nat_l3proto.h>
+#include <net/netfilter/nf_nat_l4proto.h>
static u_int16_t dccp_port_rover;
static void
-dccp_unique_tuple(struct nf_conntrack_tuple *tuple,
- const struct nf_nat_ipv4_range *range,
+dccp_unique_tuple(const struct nf_nat_l3proto *l3proto,
+ struct nf_conntrack_tuple *tuple,
+ const struct nf_nat_range *range,
enum nf_nat_manip_type maniptype,
const struct nf_conn *ct)
{
- nf_nat_proto_unique_tuple(tuple, range, maniptype, ct,
- &dccp_port_rover);
+ nf_nat_l4proto_unique_tuple(l3proto, tuple, range, maniptype, ct,
+ &dccp_port_rover);
}
static bool
dccp_manip_pkt(struct sk_buff *skb,
- unsigned int iphdroff,
+ const struct nf_nat_l3proto *l3proto,
+ unsigned int iphdroff, unsigned int hdroff,
const struct nf_conntrack_tuple *tuple,
enum nf_nat_manip_type maniptype)
{
- const struct iphdr *iph = (const void *)(skb->data + iphdroff);
struct dccp_hdr *hdr;
- unsigned int hdroff = iphdroff + iph->ihl * 4;
- __be32 oldip, newip;
__be16 *portptr, oldport, newport;
int hdrsize = 8; /* DCCP connection tracking guarantees this much */
@@ -51,17 +50,12 @@ dccp_manip_pkt(struct sk_buff *skb,
if (!skb_make_writable(skb, hdroff + hdrsize))
return false;
- iph = (struct iphdr *)(skb->data + iphdroff);
hdr = (struct dccp_hdr *)(skb->data + hdroff);
if (maniptype == NF_NAT_MANIP_SRC) {
- oldip = iph->saddr;
- newip = tuple->src.u3.ip;
newport = tuple->src.u.dccp.port;
portptr = &hdr->dccph_sport;
} else {
- oldip = iph->daddr;
- newip = tuple->dst.u3.ip;
newport = tuple->dst.u.dccp.port;
portptr = &hdr->dccph_dport;
}
@@ -72,30 +66,46 @@ dccp_manip_pkt(struct sk_buff *skb,
if (hdrsize < sizeof(*hdr))
return true;
- inet_proto_csum_replace4(&hdr->dccph_checksum, skb, oldip, newip, 1);
+ l3proto->csum_update(skb, iphdroff, &hdr->dccph_checksum,
+ tuple, maniptype);
inet_proto_csum_replace2(&hdr->dccph_checksum, skb, oldport, newport,
0);
return true;
}
-static const struct nf_nat_protocol nf_nat_protocol_dccp = {
- .protonum = IPPROTO_DCCP,
+static const struct nf_nat_l4proto nf_nat_l4proto_dccp = {
+ .l4proto = IPPROTO_DCCP,
.manip_pkt = dccp_manip_pkt,
- .in_range = nf_nat_proto_in_range,
+ .in_range = nf_nat_l4proto_in_range,
.unique_tuple = dccp_unique_tuple,
#if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE)
- .nlattr_to_range = nf_nat_proto_nlattr_to_range,
+ .nlattr_to_range = nf_nat_l4proto_nlattr_to_range,
#endif
};
static int __init nf_nat_proto_dccp_init(void)
{
- return nf_nat_protocol_register(&nf_nat_protocol_dccp);
+ int err;
+
+ err = nf_nat_l4proto_register(NFPROTO_IPV4, &nf_nat_l4proto_dccp);
+ if (err < 0)
+ goto err1;
+ err = nf_nat_l4proto_register(NFPROTO_IPV6, &nf_nat_l4proto_dccp);
+ if (err < 0)
+ goto err2;
+ return 0;
+
+err2:
+ nf_nat_l4proto_unregister(NFPROTO_IPV4, &nf_nat_l4proto_dccp);
+err1:
+ return err;
}
static void __exit nf_nat_proto_dccp_fini(void)
{
- nf_nat_protocol_unregister(&nf_nat_protocol_dccp);
+ nf_nat_l4proto_unregister(NFPROTO_IPV6, &nf_nat_l4proto_dccp);
+ nf_nat_l4proto_unregister(NFPROTO_IPV4, &nf_nat_l4proto_dccp);
+
}
module_init(nf_nat_proto_dccp_init);
diff --git a/net/ipv4/netfilter/nf_nat_proto_sctp.c b/net/netfilter/nf_nat_proto_sctp.c
index 3cce9b6c1c29..e64faa5ca893 100644
--- a/net/ipv4/netfilter/nf_nat_proto_sctp.c
+++ b/net/netfilter/nf_nat_proto_sctp.c
@@ -8,53 +8,46 @@
#include <linux/types.h>
#include <linux/init.h>
-#include <linux/ip.h>
#include <linux/sctp.h>
#include <linux/module.h>
#include <net/sctp/checksum.h>
-#include <net/netfilter/nf_nat_protocol.h>
+#include <net/netfilter/nf_nat_l4proto.h>
static u_int16_t nf_sctp_port_rover;
static void
-sctp_unique_tuple(struct nf_conntrack_tuple *tuple,
- const struct nf_nat_ipv4_range *range,
+sctp_unique_tuple(const struct nf_nat_l3proto *l3proto,
+ struct nf_conntrack_tuple *tuple,
+ const struct nf_nat_range *range,
enum nf_nat_manip_type maniptype,
const struct nf_conn *ct)
{
- nf_nat_proto_unique_tuple(tuple, range, maniptype, ct,
- &nf_sctp_port_rover);
+ nf_nat_l4proto_unique_tuple(l3proto, tuple, range, maniptype, ct,
+ &nf_sctp_port_rover);
}
static bool
sctp_manip_pkt(struct sk_buff *skb,
- unsigned int iphdroff,
+ const struct nf_nat_l3proto *l3proto,
+ unsigned int iphdroff, unsigned int hdroff,
const struct nf_conntrack_tuple *tuple,
enum nf_nat_manip_type maniptype)
{
- const struct iphdr *iph = (struct iphdr *)(skb->data + iphdroff);
struct sk_buff *frag;
sctp_sctphdr_t *hdr;
- unsigned int hdroff = iphdroff + iph->ihl*4;
- __be32 oldip, newip;
__be32 crc32;
if (!skb_make_writable(skb, hdroff + sizeof(*hdr)))
return false;
- iph = (struct iphdr *)(skb->data + iphdroff);
hdr = (struct sctphdr *)(skb->data + hdroff);
if (maniptype == NF_NAT_MANIP_SRC) {
- /* Get rid of src ip and src pt */
- oldip = iph->saddr;
- newip = tuple->src.u3.ip;
+ /* Get rid of src port */
hdr->source = tuple->src.u.sctp.port;
} else {
- /* Get rid of dst ip and dst pt */
- oldip = iph->daddr;
- newip = tuple->dst.u3.ip;
+ /* Get rid of dst port */
hdr->dest = tuple->dst.u.sctp.port;
}
@@ -68,24 +61,38 @@ sctp_manip_pkt(struct sk_buff *skb,
return true;
}
-static const struct nf_nat_protocol nf_nat_protocol_sctp = {
- .protonum = IPPROTO_SCTP,
+static const struct nf_nat_l4proto nf_nat_l4proto_sctp = {
+ .l4proto = IPPROTO_SCTP,
.manip_pkt = sctp_manip_pkt,
- .in_range = nf_nat_proto_in_range,
+ .in_range = nf_nat_l4proto_in_range,
.unique_tuple = sctp_unique_tuple,
#if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE)
- .nlattr_to_range = nf_nat_proto_nlattr_to_range,
+ .nlattr_to_range = nf_nat_l4proto_nlattr_to_range,
#endif
};
static int __init nf_nat_proto_sctp_init(void)
{
- return nf_nat_protocol_register(&nf_nat_protocol_sctp);
+ int err;
+
+ err = nf_nat_l4proto_register(NFPROTO_IPV4, &nf_nat_l4proto_sctp);
+ if (err < 0)
+ goto err1;
+ err = nf_nat_l4proto_register(NFPROTO_IPV6, &nf_nat_l4proto_sctp);
+ if (err < 0)
+ goto err2;
+ return 0;
+
+err2:
+ nf_nat_l4proto_unregister(NFPROTO_IPV4, &nf_nat_l4proto_sctp);
+err1:
+ return err;
}
static void __exit nf_nat_proto_sctp_exit(void)
{
- nf_nat_protocol_unregister(&nf_nat_protocol_sctp);
+ nf_nat_l4proto_unregister(NFPROTO_IPV6, &nf_nat_l4proto_sctp);
+ nf_nat_l4proto_unregister(NFPROTO_IPV4, &nf_nat_l4proto_sctp);
}
module_init(nf_nat_proto_sctp_init);
diff --git a/net/ipv4/netfilter/nf_nat_proto_tcp.c b/net/netfilter/nf_nat_proto_tcp.c
index 9fb4b4e72bbf..83ec8a6e4c36 100644
--- a/net/ipv4/netfilter/nf_nat_proto_tcp.c
+++ b/net/netfilter/nf_nat_proto_tcp.c
@@ -9,37 +9,36 @@
#include <linux/types.h>
#include <linux/init.h>
#include <linux/export.h>
-#include <linux/ip.h>
#include <linux/tcp.h>
#include <linux/netfilter.h>
#include <linux/netfilter/nfnetlink_conntrack.h>
#include <net/netfilter/nf_nat.h>
-#include <net/netfilter/nf_nat_rule.h>
-#include <net/netfilter/nf_nat_protocol.h>
+#include <net/netfilter/nf_nat_l3proto.h>
+#include <net/netfilter/nf_nat_l4proto.h>
#include <net/netfilter/nf_nat_core.h>
-static u_int16_t tcp_port_rover;
+static u16 tcp_port_rover;
static void
-tcp_unique_tuple(struct nf_conntrack_tuple *tuple,
- const struct nf_nat_ipv4_range *range,
+tcp_unique_tuple(const struct nf_nat_l3proto *l3proto,
+ struct nf_conntrack_tuple *tuple,
+ const struct nf_nat_range *range,
enum nf_nat_manip_type maniptype,
const struct nf_conn *ct)
{
- nf_nat_proto_unique_tuple(tuple, range, maniptype, ct, &tcp_port_rover);
+ nf_nat_l4proto_unique_tuple(l3proto, tuple, range, maniptype, ct,
+ &tcp_port_rover);
}
static bool
tcp_manip_pkt(struct sk_buff *skb,
- unsigned int iphdroff,
+ const struct nf_nat_l3proto *l3proto,
+ unsigned int iphdroff, unsigned int hdroff,
const struct nf_conntrack_tuple *tuple,
enum nf_nat_manip_type maniptype)
{
- const struct iphdr *iph = (struct iphdr *)(skb->data + iphdroff);
struct tcphdr *hdr;
- unsigned int hdroff = iphdroff + iph->ihl*4;
- __be32 oldip, newip;
__be16 *portptr, newport, oldport;
int hdrsize = 8; /* TCP connection tracking guarantees this much */
@@ -52,19 +51,14 @@ tcp_manip_pkt(struct sk_buff *skb,
if (!skb_make_writable(skb, hdroff + hdrsize))
return false;
- iph = (struct iphdr *)(skb->data + iphdroff);
hdr = (struct tcphdr *)(skb->data + hdroff);
if (maniptype == NF_NAT_MANIP_SRC) {
- /* Get rid of src ip and src pt */
- oldip = iph->saddr;
- newip = tuple->src.u3.ip;
+ /* Get rid of src port */
newport = tuple->src.u.tcp.port;
portptr = &hdr->source;
} else {
- /* Get rid of dst ip and dst pt */
- oldip = iph->daddr;
- newip = tuple->dst.u3.ip;
+ /* Get rid of dst port */
newport = tuple->dst.u.tcp.port;
portptr = &hdr->dest;
}
@@ -75,17 +69,17 @@ tcp_manip_pkt(struct sk_buff *skb,
if (hdrsize < sizeof(*hdr))
return true;
- inet_proto_csum_replace4(&hdr->check, skb, oldip, newip, 1);
+ l3proto->csum_update(skb, iphdroff, &hdr->check, tuple, maniptype);
inet_proto_csum_replace2(&hdr->check, skb, oldport, newport, 0);
return true;
}
-const struct nf_nat_protocol nf_nat_protocol_tcp = {
- .protonum = IPPROTO_TCP,
+const struct nf_nat_l4proto nf_nat_l4proto_tcp = {
+ .l4proto = IPPROTO_TCP,
.manip_pkt = tcp_manip_pkt,
- .in_range = nf_nat_proto_in_range,
+ .in_range = nf_nat_l4proto_in_range,
.unique_tuple = tcp_unique_tuple,
#if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE)
- .nlattr_to_range = nf_nat_proto_nlattr_to_range,
+ .nlattr_to_range = nf_nat_l4proto_nlattr_to_range,
#endif
};
diff --git a/net/ipv4/netfilter/nf_nat_proto_udp.c b/net/netfilter/nf_nat_proto_udp.c
index 9883336e628f..7df613fb34a2 100644
--- a/net/ipv4/netfilter/nf_nat_proto_udp.c
+++ b/net/netfilter/nf_nat_proto_udp.c
@@ -9,59 +9,53 @@
#include <linux/types.h>
#include <linux/export.h>
#include <linux/init.h>
-#include <linux/ip.h>
#include <linux/udp.h>
#include <linux/netfilter.h>
#include <net/netfilter/nf_nat.h>
#include <net/netfilter/nf_nat_core.h>
-#include <net/netfilter/nf_nat_rule.h>
-#include <net/netfilter/nf_nat_protocol.h>
+#include <net/netfilter/nf_nat_l3proto.h>
+#include <net/netfilter/nf_nat_l4proto.h>
-static u_int16_t udp_port_rover;
+static u16 udp_port_rover;
static void
-udp_unique_tuple(struct nf_conntrack_tuple *tuple,
- const struct nf_nat_ipv4_range *range,
+udp_unique_tuple(const struct nf_nat_l3proto *l3proto,
+ struct nf_conntrack_tuple *tuple,
+ const struct nf_nat_range *range,
enum nf_nat_manip_type maniptype,
const struct nf_conn *ct)
{
- nf_nat_proto_unique_tuple(tuple, range, maniptype, ct, &udp_port_rover);
+ nf_nat_l4proto_unique_tuple(l3proto, tuple, range, maniptype, ct,
+ &udp_port_rover);
}
static bool
udp_manip_pkt(struct sk_buff *skb,
- unsigned int iphdroff,
+ const struct nf_nat_l3proto *l3proto,
+ unsigned int iphdroff, unsigned int hdroff,
const struct nf_conntrack_tuple *tuple,
enum nf_nat_manip_type maniptype)
{
- const struct iphdr *iph = (struct iphdr *)(skb->data + iphdroff);
struct udphdr *hdr;
- unsigned int hdroff = iphdroff + iph->ihl*4;
- __be32 oldip, newip;
__be16 *portptr, newport;
if (!skb_make_writable(skb, hdroff + sizeof(*hdr)))
return false;
-
- iph = (struct iphdr *)(skb->data + iphdroff);
hdr = (struct udphdr *)(skb->data + hdroff);
if (maniptype == NF_NAT_MANIP_SRC) {
- /* Get rid of src ip and src pt */
- oldip = iph->saddr;
- newip = tuple->src.u3.ip;
+ /* Get rid of src port */
newport = tuple->src.u.udp.port;
portptr = &hdr->source;
} else {
- /* Get rid of dst ip and dst pt */
- oldip = iph->daddr;
- newip = tuple->dst.u3.ip;
+ /* Get rid of dst port */
newport = tuple->dst.u.udp.port;
portptr = &hdr->dest;
}
if (hdr->check || skb->ip_summed == CHECKSUM_PARTIAL) {
- inet_proto_csum_replace4(&hdr->check, skb, oldip, newip, 1);
+ l3proto->csum_update(skb, iphdroff, &hdr->check,
+ tuple, maniptype);
inet_proto_csum_replace2(&hdr->check, skb, *portptr, newport,
0);
if (!hdr->check)
@@ -71,12 +65,12 @@ udp_manip_pkt(struct sk_buff *skb,
return true;
}
-const struct nf_nat_protocol nf_nat_protocol_udp = {
- .protonum = IPPROTO_UDP,
+const struct nf_nat_l4proto nf_nat_l4proto_udp = {
+ .l4proto = IPPROTO_UDP,
.manip_pkt = udp_manip_pkt,
- .in_range = nf_nat_proto_in_range,
+ .in_range = nf_nat_l4proto_in_range,
.unique_tuple = udp_unique_tuple,
#if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE)
- .nlattr_to_range = nf_nat_proto_nlattr_to_range,
+ .nlattr_to_range = nf_nat_l4proto_nlattr_to_range,
#endif
};
diff --git a/net/ipv4/netfilter/nf_nat_proto_udplite.c b/net/netfilter/nf_nat_proto_udplite.c
index d24d10a7beb2..776a0d1317b1 100644
--- a/net/ipv4/netfilter/nf_nat_proto_udplite.c
+++ b/net/netfilter/nf_nat_proto_udplite.c
@@ -9,59 +9,53 @@
#include <linux/types.h>
#include <linux/init.h>
-#include <linux/ip.h>
#include <linux/udp.h>
#include <linux/netfilter.h>
#include <linux/module.h>
#include <net/netfilter/nf_nat.h>
-#include <net/netfilter/nf_nat_protocol.h>
+#include <net/netfilter/nf_nat_l3proto.h>
+#include <net/netfilter/nf_nat_l4proto.h>
-static u_int16_t udplite_port_rover;
+static u16 udplite_port_rover;
static void
-udplite_unique_tuple(struct nf_conntrack_tuple *tuple,
- const struct nf_nat_ipv4_range *range,
+udplite_unique_tuple(const struct nf_nat_l3proto *l3proto,
+ struct nf_conntrack_tuple *tuple,
+ const struct nf_nat_range *range,
enum nf_nat_manip_type maniptype,
const struct nf_conn *ct)
{
- nf_nat_proto_unique_tuple(tuple, range, maniptype, ct,
- &udplite_port_rover);
+ nf_nat_l4proto_unique_tuple(l3proto, tuple, range, maniptype, ct,
+ &udplite_port_rover);
}
static bool
udplite_manip_pkt(struct sk_buff *skb,
- unsigned int iphdroff,
+ const struct nf_nat_l3proto *l3proto,
+ unsigned int iphdroff, unsigned int hdroff,
const struct nf_conntrack_tuple *tuple,
enum nf_nat_manip_type maniptype)
{
- const struct iphdr *iph = (struct iphdr *)(skb->data + iphdroff);
struct udphdr *hdr;
- unsigned int hdroff = iphdroff + iph->ihl*4;
- __be32 oldip, newip;
__be16 *portptr, newport;
if (!skb_make_writable(skb, hdroff + sizeof(*hdr)))
return false;
- iph = (struct iphdr *)(skb->data + iphdroff);
hdr = (struct udphdr *)(skb->data + hdroff);
if (maniptype == NF_NAT_MANIP_SRC) {
- /* Get rid of src ip and src pt */
- oldip = iph->saddr;
- newip = tuple->src.u3.ip;
+ /* Get rid of source port */
newport = tuple->src.u.udp.port;
portptr = &hdr->source;
} else {
- /* Get rid of dst ip and dst pt */
- oldip = iph->daddr;
- newip = tuple->dst.u3.ip;
+ /* Get rid of dst port */
newport = tuple->dst.u.udp.port;
portptr = &hdr->dest;
}
- inet_proto_csum_replace4(&hdr->check, skb, oldip, newip, 1);
+ l3proto->csum_update(skb, iphdroff, &hdr->check, tuple, maniptype);
inet_proto_csum_replace2(&hdr->check, skb, *portptr, newport, 0);
if (!hdr->check)
hdr->check = CSUM_MANGLED_0;
@@ -70,24 +64,38 @@ udplite_manip_pkt(struct sk_buff *skb,
return true;
}
-static const struct nf_nat_protocol nf_nat_protocol_udplite = {
- .protonum = IPPROTO_UDPLITE,
+static const struct nf_nat_l4proto nf_nat_l4proto_udplite = {
+ .l4proto = IPPROTO_UDPLITE,
.manip_pkt = udplite_manip_pkt,
- .in_range = nf_nat_proto_in_range,
+ .in_range = nf_nat_l4proto_in_range,
.unique_tuple = udplite_unique_tuple,
#if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE)
- .nlattr_to_range = nf_nat_proto_nlattr_to_range,
+ .nlattr_to_range = nf_nat_l4proto_nlattr_to_range,
#endif
};
static int __init nf_nat_proto_udplite_init(void)
{
- return nf_nat_protocol_register(&nf_nat_protocol_udplite);
+ int err;
+
+ err = nf_nat_l4proto_register(NFPROTO_IPV4, &nf_nat_l4proto_udplite);
+ if (err < 0)
+ goto err1;
+ err = nf_nat_l4proto_register(NFPROTO_IPV6, &nf_nat_l4proto_udplite);
+ if (err < 0)
+ goto err2;
+ return 0;
+
+err2:
+ nf_nat_l4proto_unregister(NFPROTO_IPV4, &nf_nat_l4proto_udplite);
+err1:
+ return err;
}
static void __exit nf_nat_proto_udplite_fini(void)
{
- nf_nat_protocol_unregister(&nf_nat_protocol_udplite);
+ nf_nat_l4proto_unregister(NFPROTO_IPV6, &nf_nat_l4proto_udplite);
+ nf_nat_l4proto_unregister(NFPROTO_IPV4, &nf_nat_l4proto_udplite);
}
module_init(nf_nat_proto_udplite_init);
diff --git a/net/ipv4/netfilter/nf_nat_proto_unknown.c b/net/netfilter/nf_nat_proto_unknown.c
index e0afe8112b1c..6e494d584412 100644
--- a/net/ipv4/netfilter/nf_nat_proto_unknown.c
+++ b/net/netfilter/nf_nat_proto_unknown.c
@@ -15,8 +15,7 @@
#include <linux/netfilter.h>
#include <net/netfilter/nf_nat.h>
-#include <net/netfilter/nf_nat_rule.h>
-#include <net/netfilter/nf_nat_protocol.h>
+#include <net/netfilter/nf_nat_l4proto.h>
static bool unknown_in_range(const struct nf_conntrack_tuple *tuple,
enum nf_nat_manip_type manip_type,
@@ -26,26 +25,29 @@ static bool unknown_in_range(const struct nf_conntrack_tuple *tuple,
return true;
}
-static void unknown_unique_tuple(struct nf_conntrack_tuple *tuple,
- const struct nf_nat_ipv4_range *range,
+static void unknown_unique_tuple(const struct nf_nat_l3proto *l3proto,
+ struct nf_conntrack_tuple *tuple,
+ const struct nf_nat_range *range,
enum nf_nat_manip_type maniptype,
const struct nf_conn *ct)
{
/* Sorry: we can't help you; if it's not unique, we can't frob
- anything. */
+ * anything.
+ */
return;
}
static bool
unknown_manip_pkt(struct sk_buff *skb,
- unsigned int iphdroff,
+ const struct nf_nat_l3proto *l3proto,
+ unsigned int iphdroff, unsigned int hdroff,
const struct nf_conntrack_tuple *tuple,
enum nf_nat_manip_type maniptype)
{
return true;
}
-const struct nf_nat_protocol nf_nat_unknown_protocol = {
+const struct nf_nat_l4proto nf_nat_l4proto_unknown = {
.manip_pkt = unknown_manip_pkt,
.in_range = unknown_in_range,
.unique_tuple = unknown_unique_tuple,
diff --git a/net/ipv4/netfilter/nf_nat_sip.c b/net/netfilter/nf_nat_sip.c
index ea4a23813d26..16303c752213 100644
--- a/net/ipv4/netfilter/nf_nat_sip.c
+++ b/net/netfilter/nf_nat_sip.c
@@ -3,7 +3,7 @@
* (C) 2005 by Christian Hentschel <chentschel@arnet.com.ar>
* based on RR's ip_nat_ftp.c and other modules.
* (C) 2007 United Security Providers
- * (C) 2007, 2008 Patrick McHardy <kaber@trash.net>
+ * (C) 2007, 2008, 2011, 2012 Patrick McHardy <kaber@trash.net>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
@@ -12,14 +12,12 @@
#include <linux/module.h>
#include <linux/skbuff.h>
-#include <linux/ip.h>
-#include <net/ip.h>
+#include <linux/inet.h>
#include <linux/udp.h>
#include <linux/tcp.h>
#include <net/netfilter/nf_nat.h>
#include <net/netfilter/nf_nat_helper.h>
-#include <net/netfilter/nf_nat_rule.h>
#include <net/netfilter/nf_conntrack_helper.h>
#include <net/netfilter/nf_conntrack_expect.h>
#include <linux/netfilter/nf_conntrack_sip.h>
@@ -30,7 +28,8 @@ MODULE_DESCRIPTION("SIP NAT helper");
MODULE_ALIAS("ip_nat_sip");
-static unsigned int mangle_packet(struct sk_buff *skb, unsigned int dataoff,
+static unsigned int mangle_packet(struct sk_buff *skb, unsigned int protoff,
+ unsigned int dataoff,
const char **dptr, unsigned int *datalen,
unsigned int matchoff, unsigned int matchlen,
const char *buffer, unsigned int buflen)
@@ -41,20 +40,20 @@ static unsigned int mangle_packet(struct sk_buff *skb, unsigned int dataoff,
unsigned int baseoff;
if (nf_ct_protonum(ct) == IPPROTO_TCP) {
- th = (struct tcphdr *)(skb->data + ip_hdrlen(skb));
- baseoff = ip_hdrlen(skb) + th->doff * 4;
+ th = (struct tcphdr *)(skb->data + protoff);
+ baseoff = protoff + th->doff * 4;
matchoff += dataoff - baseoff;
if (!__nf_nat_mangle_tcp_packet(skb, ct, ctinfo,
- matchoff, matchlen,
+ protoff, matchoff, matchlen,
buffer, buflen, false))
return 0;
} else {
- baseoff = ip_hdrlen(skb) + sizeof(struct udphdr);
+ baseoff = protoff + sizeof(struct udphdr);
matchoff += dataoff - baseoff;
if (!nf_nat_mangle_udp_packet(skb, ct, ctinfo,
- matchoff, matchlen,
+ protoff, matchoff, matchlen,
buffer, buflen))
return 0;
}
@@ -65,7 +64,30 @@ static unsigned int mangle_packet(struct sk_buff *skb, unsigned int dataoff,
return 1;
}
-static int map_addr(struct sk_buff *skb, unsigned int dataoff,
+static int sip_sprintf_addr(const struct nf_conn *ct, char *buffer,
+ const union nf_inet_addr *addr, bool delim)
+{
+ if (nf_ct_l3num(ct) == NFPROTO_IPV4)
+ return sprintf(buffer, "%pI4", &addr->ip);
+ else {
+ if (delim)
+ return sprintf(buffer, "[%pI6c]", &addr->ip6);
+ else
+ return sprintf(buffer, "%pI6c", &addr->ip6);
+ }
+}
+
+static int sip_sprintf_addr_port(const struct nf_conn *ct, char *buffer,
+ const union nf_inet_addr *addr, u16 port)
+{
+ if (nf_ct_l3num(ct) == NFPROTO_IPV4)
+ return sprintf(buffer, "%pI4:%u", &addr->ip, port);
+ else
+ return sprintf(buffer, "[%pI6c]:%u", &addr->ip6, port);
+}
+
+static int map_addr(struct sk_buff *skb, unsigned int protoff,
+ unsigned int dataoff,
const char **dptr, unsigned int *datalen,
unsigned int matchoff, unsigned int matchlen,
union nf_inet_addr *addr, __be16 port)
@@ -73,32 +95,32 @@ static int map_addr(struct sk_buff *skb, unsigned int dataoff,
enum ip_conntrack_info ctinfo;
struct nf_conn *ct = nf_ct_get(skb, &ctinfo);
enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);
- char buffer[sizeof("nnn.nnn.nnn.nnn:nnnnn")];
+ char buffer[INET6_ADDRSTRLEN + sizeof("[]:nnnnn")];
unsigned int buflen;
- __be32 newaddr;
+ union nf_inet_addr newaddr;
__be16 newport;
- if (ct->tuplehash[dir].tuple.src.u3.ip == addr->ip &&
+ if (nf_inet_addr_cmp(&ct->tuplehash[dir].tuple.src.u3, addr) &&
ct->tuplehash[dir].tuple.src.u.udp.port == port) {
- newaddr = ct->tuplehash[!dir].tuple.dst.u3.ip;
+ newaddr = ct->tuplehash[!dir].tuple.dst.u3;
newport = ct->tuplehash[!dir].tuple.dst.u.udp.port;
- } else if (ct->tuplehash[dir].tuple.dst.u3.ip == addr->ip &&
+ } else if (nf_inet_addr_cmp(&ct->tuplehash[dir].tuple.dst.u3, addr) &&
ct->tuplehash[dir].tuple.dst.u.udp.port == port) {
- newaddr = ct->tuplehash[!dir].tuple.src.u3.ip;
+ newaddr = ct->tuplehash[!dir].tuple.src.u3;
newport = ct->tuplehash[!dir].tuple.src.u.udp.port;
} else
return 1;
- if (newaddr == addr->ip && newport == port)
+ if (nf_inet_addr_cmp(&newaddr, addr) && newport == port)
return 1;
- buflen = sprintf(buffer, "%pI4:%u", &newaddr, ntohs(newport));
-
- return mangle_packet(skb, dataoff, dptr, datalen, matchoff, matchlen,
- buffer, buflen);
+ buflen = sip_sprintf_addr_port(ct, buffer, &newaddr, ntohs(newport));
+ return mangle_packet(skb, protoff, dataoff, dptr, datalen,
+ matchoff, matchlen, buffer, buflen);
}
-static int map_sip_addr(struct sk_buff *skb, unsigned int dataoff,
+static int map_sip_addr(struct sk_buff *skb, unsigned int protoff,
+ unsigned int dataoff,
const char **dptr, unsigned int *datalen,
enum sip_header_types type)
{
@@ -111,11 +133,12 @@ static int map_sip_addr(struct sk_buff *skb, unsigned int dataoff,
if (ct_sip_parse_header_uri(ct, *dptr, NULL, *datalen, type, NULL,
&matchoff, &matchlen, &addr, &port) <= 0)
return 1;
- return map_addr(skb, dataoff, dptr, datalen, matchoff, matchlen,
- &addr, port);
+ return map_addr(skb, protoff, dataoff, dptr, datalen,
+ matchoff, matchlen, &addr, port);
}
-static unsigned int ip_nat_sip(struct sk_buff *skb, unsigned int dataoff,
+static unsigned int nf_nat_sip(struct sk_buff *skb, unsigned int protoff,
+ unsigned int dataoff,
const char **dptr, unsigned int *datalen)
{
enum ip_conntrack_info ctinfo;
@@ -132,8 +155,8 @@ static unsigned int ip_nat_sip(struct sk_buff *skb, unsigned int dataoff,
if (ct_sip_parse_request(ct, *dptr, *datalen,
&matchoff, &matchlen,
&addr, &port) > 0 &&
- !map_addr(skb, dataoff, dptr, datalen, matchoff, matchlen,
- &addr, port))
+ !map_addr(skb, protoff, dataoff, dptr, datalen,
+ matchoff, matchlen, &addr, port))
return NF_DROP;
request = 1;
} else
@@ -148,37 +171,41 @@ static unsigned int ip_nat_sip(struct sk_buff *skb, unsigned int dataoff,
if (ct_sip_parse_header_uri(ct, *dptr, NULL, *datalen,
hdr, NULL, &matchoff, &matchlen,
&addr, &port) > 0) {
- unsigned int matchend, poff, plen, buflen, n;
- char buffer[sizeof("nnn.nnn.nnn.nnn:nnnnn")];
+ unsigned int olen, matchend, poff, plen, buflen, n;
+ char buffer[INET6_ADDRSTRLEN + sizeof("[]:nnnnn")];
/* We're only interested in headers related to this
* connection */
if (request) {
- if (addr.ip != ct->tuplehash[dir].tuple.src.u3.ip ||
+ if (!nf_inet_addr_cmp(&addr,
+ &ct->tuplehash[dir].tuple.src.u3) ||
port != ct->tuplehash[dir].tuple.src.u.udp.port)
goto next;
} else {
- if (addr.ip != ct->tuplehash[dir].tuple.dst.u3.ip ||
+ if (!nf_inet_addr_cmp(&addr,
+ &ct->tuplehash[dir].tuple.dst.u3) ||
port != ct->tuplehash[dir].tuple.dst.u.udp.port)
goto next;
}
- if (!map_addr(skb, dataoff, dptr, datalen, matchoff, matchlen,
- &addr, port))
+ olen = *datalen;
+ if (!map_addr(skb, protoff, dataoff, dptr, datalen,
+ matchoff, matchlen, &addr, port))
return NF_DROP;
- matchend = matchoff + matchlen;
+ matchend = matchoff + matchlen + *datalen - olen;
/* The maddr= parameter (RFC 2361) specifies where to send
* the reply. */
if (ct_sip_parse_address_param(ct, *dptr, matchend, *datalen,
"maddr=", &poff, &plen,
- &addr) > 0 &&
- addr.ip == ct->tuplehash[dir].tuple.src.u3.ip &&
- addr.ip != ct->tuplehash[!dir].tuple.dst.u3.ip) {
- buflen = sprintf(buffer, "%pI4",
- &ct->tuplehash[!dir].tuple.dst.u3.ip);
- if (!mangle_packet(skb, dataoff, dptr, datalen,
+ &addr, true) > 0 &&
+ nf_inet_addr_cmp(&addr, &ct->tuplehash[dir].tuple.src.u3) &&
+ !nf_inet_addr_cmp(&addr, &ct->tuplehash[!dir].tuple.dst.u3)) {
+ buflen = sip_sprintf_addr(ct, buffer,
+ &ct->tuplehash[!dir].tuple.dst.u3,
+ true);
+ if (!mangle_packet(skb, protoff, dataoff, dptr, datalen,
poff, plen, buffer, buflen))
return NF_DROP;
}
@@ -187,12 +214,13 @@ static unsigned int ip_nat_sip(struct sk_buff *skb, unsigned int dataoff,
* from which the server received the request. */
if (ct_sip_parse_address_param(ct, *dptr, matchend, *datalen,
"received=", &poff, &plen,
- &addr) > 0 &&
- addr.ip == ct->tuplehash[dir].tuple.dst.u3.ip &&
- addr.ip != ct->tuplehash[!dir].tuple.src.u3.ip) {
- buflen = sprintf(buffer, "%pI4",
- &ct->tuplehash[!dir].tuple.src.u3.ip);
- if (!mangle_packet(skb, dataoff, dptr, datalen,
+ &addr, false) > 0 &&
+ nf_inet_addr_cmp(&addr, &ct->tuplehash[dir].tuple.dst.u3) &&
+ !nf_inet_addr_cmp(&addr, &ct->tuplehash[!dir].tuple.src.u3)) {
+ buflen = sip_sprintf_addr(ct, buffer,
+ &ct->tuplehash[!dir].tuple.src.u3,
+ false);
+ if (!mangle_packet(skb, protoff, dataoff, dptr, datalen,
poff, plen, buffer, buflen))
return NF_DROP;
}
@@ -206,7 +234,7 @@ static unsigned int ip_nat_sip(struct sk_buff *skb, unsigned int dataoff,
htons(n) != ct->tuplehash[!dir].tuple.src.u.udp.port) {
__be16 p = ct->tuplehash[!dir].tuple.src.u.udp.port;
buflen = sprintf(buffer, "%u", ntohs(p));
- if (!mangle_packet(skb, dataoff, dptr, datalen,
+ if (!mangle_packet(skb, protoff, dataoff, dptr, datalen,
poff, plen, buffer, buflen))
return NF_DROP;
}
@@ -220,19 +248,21 @@ next:
SIP_HDR_CONTACT, &in_header,
&matchoff, &matchlen,
&addr, &port) > 0) {
- if (!map_addr(skb, dataoff, dptr, datalen, matchoff, matchlen,
+ if (!map_addr(skb, protoff, dataoff, dptr, datalen,
+ matchoff, matchlen,
&addr, port))
return NF_DROP;
}
- if (!map_sip_addr(skb, dataoff, dptr, datalen, SIP_HDR_FROM) ||
- !map_sip_addr(skb, dataoff, dptr, datalen, SIP_HDR_TO))
+ if (!map_sip_addr(skb, protoff, dataoff, dptr, datalen, SIP_HDR_FROM) ||
+ !map_sip_addr(skb, protoff, dataoff, dptr, datalen, SIP_HDR_TO))
return NF_DROP;
return NF_ACCEPT;
}
-static void ip_nat_sip_seq_adjust(struct sk_buff *skb, s16 off)
+static void nf_nat_sip_seq_adjust(struct sk_buff *skb, unsigned int protoff,
+ s16 off)
{
enum ip_conntrack_info ctinfo;
struct nf_conn *ct = nf_ct_get(skb, &ctinfo);
@@ -241,37 +271,38 @@ static void ip_nat_sip_seq_adjust(struct sk_buff *skb, s16 off)
if (nf_ct_protonum(ct) != IPPROTO_TCP || off == 0)
return;
- th = (struct tcphdr *)(skb->data + ip_hdrlen(skb));
+ th = (struct tcphdr *)(skb->data + protoff);
nf_nat_set_seq_adjust(ct, ctinfo, th->seq, off);
}
/* Handles expected signalling connections and media streams */
-static void ip_nat_sip_expected(struct nf_conn *ct,
+static void nf_nat_sip_expected(struct nf_conn *ct,
struct nf_conntrack_expect *exp)
{
- struct nf_nat_ipv4_range range;
+ struct nf_nat_range range;
/* This must be a fresh one. */
BUG_ON(ct->status & IPS_NAT_DONE_MASK);
/* For DST manip, map port here to where it's expected. */
range.flags = (NF_NAT_RANGE_MAP_IPS | NF_NAT_RANGE_PROTO_SPECIFIED);
- range.min = range.max = exp->saved_proto;
- range.min_ip = range.max_ip = exp->saved_ip;
+ range.min_proto = range.max_proto = exp->saved_proto;
+ range.min_addr = range.max_addr = exp->saved_addr;
nf_nat_setup_info(ct, &range, NF_NAT_MANIP_DST);
/* Change src to where master sends to, but only if the connection
* actually came from the same source. */
- if (ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.u3.ip ==
- ct->master->tuplehash[exp->dir].tuple.src.u3.ip) {
+ if (nf_inet_addr_cmp(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.u3,
+ &ct->master->tuplehash[exp->dir].tuple.src.u3)) {
range.flags = NF_NAT_RANGE_MAP_IPS;
- range.min_ip = range.max_ip
- = ct->master->tuplehash[!exp->dir].tuple.dst.u3.ip;
+ range.min_addr = range.max_addr
+ = ct->master->tuplehash[!exp->dir].tuple.dst.u3;
nf_nat_setup_info(ct, &range, NF_NAT_MANIP_SRC);
}
}
-static unsigned int ip_nat_sip_expect(struct sk_buff *skb, unsigned int dataoff,
+static unsigned int nf_nat_sip_expect(struct sk_buff *skb, unsigned int protoff,
+ unsigned int dataoff,
const char **dptr, unsigned int *datalen,
struct nf_conntrack_expect *exp,
unsigned int matchoff,
@@ -280,16 +311,17 @@ static unsigned int ip_nat_sip_expect(struct sk_buff *skb, unsigned int dataoff,
enum ip_conntrack_info ctinfo;
struct nf_conn *ct = nf_ct_get(skb, &ctinfo);
enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);
- __be32 newip;
+ union nf_inet_addr newaddr;
u_int16_t port;
- char buffer[sizeof("nnn.nnn.nnn.nnn:nnnnn")];
+ char buffer[INET6_ADDRSTRLEN + sizeof("[]:nnnnn")];
unsigned int buflen;
/* Connection will come from reply */
- if (ct->tuplehash[dir].tuple.src.u3.ip == ct->tuplehash[!dir].tuple.dst.u3.ip)
- newip = exp->tuple.dst.u3.ip;
+ if (nf_inet_addr_cmp(&ct->tuplehash[dir].tuple.src.u3,
+ &ct->tuplehash[!dir].tuple.dst.u3))
+ newaddr = exp->tuple.dst.u3;
else
- newip = ct->tuplehash[!dir].tuple.dst.u3.ip;
+ newaddr = ct->tuplehash[!dir].tuple.dst.u3;
/* If the signalling port matches the connection's source port in the
* original direction, try to use the destination port in the opposite
@@ -300,11 +332,11 @@ static unsigned int ip_nat_sip_expect(struct sk_buff *skb, unsigned int dataoff,
else
port = ntohs(exp->tuple.dst.u.udp.port);
- exp->saved_ip = exp->tuple.dst.u3.ip;
- exp->tuple.dst.u3.ip = newip;
+ exp->saved_addr = exp->tuple.dst.u3;
+ exp->tuple.dst.u3 = newaddr;
exp->saved_proto.udp.port = exp->tuple.dst.u.udp.port;
exp->dir = !dir;
- exp->expectfn = ip_nat_sip_expected;
+ exp->expectfn = nf_nat_sip_expected;
for (; port != 0; port++) {
int ret;
@@ -322,10 +354,10 @@ static unsigned int ip_nat_sip_expect(struct sk_buff *skb, unsigned int dataoff,
if (port == 0)
return NF_DROP;
- if (exp->tuple.dst.u3.ip != exp->saved_ip ||
+ if (!nf_inet_addr_cmp(&exp->tuple.dst.u3, &exp->saved_addr) ||
exp->tuple.dst.u.udp.port != exp->saved_proto.udp.port) {
- buflen = sprintf(buffer, "%pI4:%u", &newip, port);
- if (!mangle_packet(skb, dataoff, dptr, datalen,
+ buflen = sip_sprintf_addr_port(ct, buffer, &newaddr, port);
+ if (!mangle_packet(skb, protoff, dataoff, dptr, datalen,
matchoff, matchlen, buffer, buflen))
goto err;
}
@@ -336,7 +368,8 @@ err:
return NF_DROP;
}
-static int mangle_content_len(struct sk_buff *skb, unsigned int dataoff,
+static int mangle_content_len(struct sk_buff *skb, unsigned int protoff,
+ unsigned int dataoff,
const char **dptr, unsigned int *datalen)
{
enum ip_conntrack_info ctinfo;
@@ -358,11 +391,12 @@ static int mangle_content_len(struct sk_buff *skb, unsigned int dataoff,
return 0;
buflen = sprintf(buffer, "%u", c_len);
- return mangle_packet(skb, dataoff, dptr, datalen, matchoff, matchlen,
- buffer, buflen);
+ return mangle_packet(skb, protoff, dataoff, dptr, datalen,
+ matchoff, matchlen, buffer, buflen);
}
-static int mangle_sdp_packet(struct sk_buff *skb, unsigned int dataoff,
+static int mangle_sdp_packet(struct sk_buff *skb, unsigned int protoff,
+ unsigned int dataoff,
const char **dptr, unsigned int *datalen,
unsigned int sdpoff,
enum sdp_header_types type,
@@ -376,29 +410,33 @@ static int mangle_sdp_packet(struct sk_buff *skb, unsigned int dataoff,
if (ct_sip_get_sdp_header(ct, *dptr, sdpoff, *datalen, type, term,
&matchoff, &matchlen) <= 0)
return -ENOENT;
- return mangle_packet(skb, dataoff, dptr, datalen, matchoff, matchlen,
- buffer, buflen) ? 0 : -EINVAL;
+ return mangle_packet(skb, protoff, dataoff, dptr, datalen,
+ matchoff, matchlen, buffer, buflen) ? 0 : -EINVAL;
}
-static unsigned int ip_nat_sdp_addr(struct sk_buff *skb, unsigned int dataoff,
+static unsigned int nf_nat_sdp_addr(struct sk_buff *skb, unsigned int protoff,
+ unsigned int dataoff,
const char **dptr, unsigned int *datalen,
unsigned int sdpoff,
enum sdp_header_types type,
enum sdp_header_types term,
const union nf_inet_addr *addr)
{
- char buffer[sizeof("nnn.nnn.nnn.nnn")];
+ enum ip_conntrack_info ctinfo;
+ struct nf_conn *ct = nf_ct_get(skb, &ctinfo);
+ char buffer[INET6_ADDRSTRLEN];
unsigned int buflen;
- buflen = sprintf(buffer, "%pI4", &addr->ip);
- if (mangle_sdp_packet(skb, dataoff, dptr, datalen, sdpoff, type, term,
- buffer, buflen))
+ buflen = sip_sprintf_addr(ct, buffer, addr, false);
+ if (mangle_sdp_packet(skb, protoff, dataoff, dptr, datalen,
+ sdpoff, type, term, buffer, buflen))
return 0;
- return mangle_content_len(skb, dataoff, dptr, datalen);
+ return mangle_content_len(skb, protoff, dataoff, dptr, datalen);
}
-static unsigned int ip_nat_sdp_port(struct sk_buff *skb, unsigned int dataoff,
+static unsigned int nf_nat_sdp_port(struct sk_buff *skb, unsigned int protoff,
+ unsigned int dataoff,
const char **dptr, unsigned int *datalen,
unsigned int matchoff,
unsigned int matchlen,
@@ -408,30 +446,32 @@ static unsigned int ip_nat_sdp_port(struct sk_buff *skb, unsigned int dataoff,
unsigned int buflen;
buflen = sprintf(buffer, "%u", port);
- if (!mangle_packet(skb, dataoff, dptr, datalen, matchoff, matchlen,
- buffer, buflen))
+ if (!mangle_packet(skb, protoff, dataoff, dptr, datalen,
+ matchoff, matchlen, buffer, buflen))
return 0;
- return mangle_content_len(skb, dataoff, dptr, datalen);
+ return mangle_content_len(skb, protoff, dataoff, dptr, datalen);
}
-static unsigned int ip_nat_sdp_session(struct sk_buff *skb, unsigned int dataoff,
+static unsigned int nf_nat_sdp_session(struct sk_buff *skb, unsigned int protoff,
+ unsigned int dataoff,
const char **dptr, unsigned int *datalen,
unsigned int sdpoff,
const union nf_inet_addr *addr)
{
- char buffer[sizeof("nnn.nnn.nnn.nnn")];
+ enum ip_conntrack_info ctinfo;
+ struct nf_conn *ct = nf_ct_get(skb, &ctinfo);
+ char buffer[INET6_ADDRSTRLEN];
unsigned int buflen;
/* Mangle session description owner and contact addresses */
- buflen = sprintf(buffer, "%pI4", &addr->ip);
- if (mangle_sdp_packet(skb, dataoff, dptr, datalen, sdpoff,
- SDP_HDR_OWNER_IP4, SDP_HDR_MEDIA,
- buffer, buflen))
+ buflen = sip_sprintf_addr(ct, buffer, addr, false);
+ if (mangle_sdp_packet(skb, protoff, dataoff, dptr, datalen, sdpoff,
+ SDP_HDR_OWNER, SDP_HDR_MEDIA, buffer, buflen))
return 0;
- switch (mangle_sdp_packet(skb, dataoff, dptr, datalen, sdpoff,
- SDP_HDR_CONNECTION_IP4, SDP_HDR_MEDIA,
+ switch (mangle_sdp_packet(skb, protoff, dataoff, dptr, datalen, sdpoff,
+ SDP_HDR_CONNECTION, SDP_HDR_MEDIA,
buffer, buflen)) {
case 0:
/*
@@ -447,12 +487,13 @@ static unsigned int ip_nat_sdp_session(struct sk_buff *skb, unsigned int dataoff
return 0;
}
- return mangle_content_len(skb, dataoff, dptr, datalen);
+ return mangle_content_len(skb, protoff, dataoff, dptr, datalen);
}
/* So, this packet has hit the connection tracking matching code.
Mangle it, and change the expectation to match the new version. */
-static unsigned int ip_nat_sdp_media(struct sk_buff *skb, unsigned int dataoff,
+static unsigned int nf_nat_sdp_media(struct sk_buff *skb, unsigned int protoff,
+ unsigned int dataoff,
const char **dptr, unsigned int *datalen,
struct nf_conntrack_expect *rtp_exp,
struct nf_conntrack_expect *rtcp_exp,
@@ -466,23 +507,23 @@ static unsigned int ip_nat_sdp_media(struct sk_buff *skb, unsigned int dataoff,
u_int16_t port;
/* Connection will come from reply */
- if (ct->tuplehash[dir].tuple.src.u3.ip ==
- ct->tuplehash[!dir].tuple.dst.u3.ip)
- rtp_addr->ip = rtp_exp->tuple.dst.u3.ip;
+ if (nf_inet_addr_cmp(&ct->tuplehash[dir].tuple.src.u3,
+ &ct->tuplehash[!dir].tuple.dst.u3))
+ *rtp_addr = rtp_exp->tuple.dst.u3;
else
- rtp_addr->ip = ct->tuplehash[!dir].tuple.dst.u3.ip;
+ *rtp_addr = ct->tuplehash[!dir].tuple.dst.u3;
- rtp_exp->saved_ip = rtp_exp->tuple.dst.u3.ip;
- rtp_exp->tuple.dst.u3.ip = rtp_addr->ip;
+ rtp_exp->saved_addr = rtp_exp->tuple.dst.u3;
+ rtp_exp->tuple.dst.u3 = *rtp_addr;
rtp_exp->saved_proto.udp.port = rtp_exp->tuple.dst.u.udp.port;
rtp_exp->dir = !dir;
- rtp_exp->expectfn = ip_nat_sip_expected;
+ rtp_exp->expectfn = nf_nat_sip_expected;
- rtcp_exp->saved_ip = rtcp_exp->tuple.dst.u3.ip;
- rtcp_exp->tuple.dst.u3.ip = rtp_addr->ip;
+ rtcp_exp->saved_addr = rtcp_exp->tuple.dst.u3;
+ rtcp_exp->tuple.dst.u3 = *rtp_addr;
rtcp_exp->saved_proto.udp.port = rtcp_exp->tuple.dst.u.udp.port;
rtcp_exp->dir = !dir;
- rtcp_exp->expectfn = ip_nat_sip_expected;
+ rtcp_exp->expectfn = nf_nat_sip_expected;
/* Try to get same pair of ports: if not, try to change them. */
for (port = ntohs(rtp_exp->tuple.dst.u.udp.port);
@@ -501,7 +542,10 @@ static unsigned int ip_nat_sdp_media(struct sk_buff *skb, unsigned int dataoff,
ret = nf_ct_expect_related(rtcp_exp);
if (ret == 0)
break;
- else if (ret != -EBUSY) {
+ else if (ret == -EBUSY) {
+ nf_ct_unexpect_related(rtp_exp);
+ continue;
+ } else if (ret < 0) {
nf_ct_unexpect_related(rtp_exp);
port = 0;
break;
@@ -513,7 +557,7 @@ static unsigned int ip_nat_sdp_media(struct sk_buff *skb, unsigned int dataoff,
/* Update media port. */
if (rtp_exp->tuple.dst.u.udp.port != rtp_exp->saved_proto.udp.port &&
- !ip_nat_sdp_port(skb, dataoff, dptr, datalen,
+ !nf_nat_sdp_port(skb, protoff, dataoff, dptr, datalen,
mediaoff, medialen, port))
goto err2;
@@ -527,8 +571,8 @@ err1:
}
static struct nf_ct_helper_expectfn sip_nat = {
- .name = "sip",
- .expectfn = ip_nat_sip_expected,
+ .name = "sip",
+ .expectfn = nf_nat_sip_expected,
};
static void __exit nf_nat_sip_fini(void)
@@ -553,13 +597,13 @@ static int __init nf_nat_sip_init(void)
BUG_ON(nf_nat_sdp_port_hook != NULL);
BUG_ON(nf_nat_sdp_session_hook != NULL);
BUG_ON(nf_nat_sdp_media_hook != NULL);
- RCU_INIT_POINTER(nf_nat_sip_hook, ip_nat_sip);
- RCU_INIT_POINTER(nf_nat_sip_seq_adjust_hook, ip_nat_sip_seq_adjust);
- RCU_INIT_POINTER(nf_nat_sip_expect_hook, ip_nat_sip_expect);
- RCU_INIT_POINTER(nf_nat_sdp_addr_hook, ip_nat_sdp_addr);
- RCU_INIT_POINTER(nf_nat_sdp_port_hook, ip_nat_sdp_port);
- RCU_INIT_POINTER(nf_nat_sdp_session_hook, ip_nat_sdp_session);
- RCU_INIT_POINTER(nf_nat_sdp_media_hook, ip_nat_sdp_media);
+ RCU_INIT_POINTER(nf_nat_sip_hook, nf_nat_sip);
+ RCU_INIT_POINTER(nf_nat_sip_seq_adjust_hook, nf_nat_sip_seq_adjust);
+ RCU_INIT_POINTER(nf_nat_sip_expect_hook, nf_nat_sip_expect);
+ RCU_INIT_POINTER(nf_nat_sdp_addr_hook, nf_nat_sdp_addr);
+ RCU_INIT_POINTER(nf_nat_sdp_port_hook, nf_nat_sdp_port);
+ RCU_INIT_POINTER(nf_nat_sdp_session_hook, nf_nat_sdp_session);
+ RCU_INIT_POINTER(nf_nat_sdp_media_hook, nf_nat_sdp_media);
nf_ct_helper_expectfn_register(&sip_nat);
return 0;
}
diff --git a/net/ipv4/netfilter/nf_nat_tftp.c b/net/netfilter/nf_nat_tftp.c
index 9dbb8d284f99..ccabbda71a3e 100644
--- a/net/ipv4/netfilter/nf_nat_tftp.c
+++ b/net/netfilter/nf_nat_tftp.c
@@ -11,7 +11,6 @@
#include <net/netfilter/nf_conntrack_helper.h>
#include <net/netfilter/nf_conntrack_expect.h>
#include <net/netfilter/nf_nat_helper.h>
-#include <net/netfilter/nf_nat_rule.h>
#include <linux/netfilter/nf_conntrack_tftp.h>
MODULE_AUTHOR("Magnus Boden <mb@ozaba.mine.nu>");
diff --git a/net/netfilter/nf_queue.c b/net/netfilter/nf_queue.c
index ce60cf0f6c11..8d2cf9ec37a8 100644
--- a/net/netfilter/nf_queue.c
+++ b/net/netfilter/nf_queue.c
@@ -118,7 +118,7 @@ static void nf_queue_entry_release_refs(struct nf_queue_entry *entry)
* through nf_reinject().
*/
static int __nf_queue(struct sk_buff *skb,
- struct list_head *elem,
+ struct nf_hook_ops *elem,
u_int8_t pf, unsigned int hook,
struct net_device *indev,
struct net_device *outdev,
@@ -155,7 +155,7 @@ static int __nf_queue(struct sk_buff *skb,
*entry = (struct nf_queue_entry) {
.skb = skb,
- .elem = list_entry(elem, struct nf_hook_ops, list),
+ .elem = elem,
.pf = pf,
.hook = hook,
.indev = indev,
@@ -225,7 +225,7 @@ static void nf_bridge_adjust_segmented_data(struct sk_buff *skb)
#endif
int nf_queue(struct sk_buff *skb,
- struct list_head *elem,
+ struct nf_hook_ops *elem,
u_int8_t pf, unsigned int hook,
struct net_device *indev,
struct net_device *outdev,
@@ -287,7 +287,7 @@ int nf_queue(struct sk_buff *skb,
void nf_reinject(struct nf_queue_entry *entry, unsigned int verdict)
{
struct sk_buff *skb = entry->skb;
- struct list_head *elem = &entry->elem->list;
+ struct nf_hook_ops *elem = entry->elem;
const struct nf_afinfo *afinfo;
int err;
@@ -297,7 +297,7 @@ void nf_reinject(struct nf_queue_entry *entry, unsigned int verdict)
/* Continue traversal iff userspace said ok... */
if (verdict == NF_REPEAT) {
- elem = elem->prev;
+ elem = list_entry(elem->list.prev, struct nf_hook_ops, list);
verdict = NF_ACCEPT;
}
diff --git a/net/netfilter/nfnetlink.c b/net/netfilter/nfnetlink.c
index a26503342e71..ffb92c03a358 100644
--- a/net/netfilter/nfnetlink.c
+++ b/net/netfilter/nfnetlink.c
@@ -241,7 +241,7 @@ static int __net_init nfnetlink_net_init(struct net *net)
#endif
};
- nfnl = netlink_kernel_create(net, NETLINK_NETFILTER, THIS_MODULE, &cfg);
+ nfnl = netlink_kernel_create(net, NETLINK_NETFILTER, &cfg);
if (!nfnl)
return -ENOMEM;
net->nfnl_stash = nfnl;
diff --git a/net/netfilter/nfnetlink_acct.c b/net/netfilter/nfnetlink_acct.c
index b2e7310ca0b8..589d686f0b4c 100644
--- a/net/netfilter/nfnetlink_acct.c
+++ b/net/netfilter/nfnetlink_acct.c
@@ -79,11 +79,11 @@ nfnl_acct_new(struct sock *nfnl, struct sk_buff *skb,
if (tb[NFACCT_BYTES]) {
atomic64_set(&nfacct->bytes,
- be64_to_cpu(nla_get_u64(tb[NFACCT_BYTES])));
+ be64_to_cpu(nla_get_be64(tb[NFACCT_BYTES])));
}
if (tb[NFACCT_PKTS]) {
atomic64_set(&nfacct->pkts,
- be64_to_cpu(nla_get_u64(tb[NFACCT_PKTS])));
+ be64_to_cpu(nla_get_be64(tb[NFACCT_PKTS])));
}
atomic_set(&nfacct->refcnt, 1);
list_add_tail_rcu(&nfacct->head, &nfnl_acct_list);
@@ -91,16 +91,16 @@ nfnl_acct_new(struct sock *nfnl, struct sk_buff *skb,
}
static int
-nfnl_acct_fill_info(struct sk_buff *skb, u32 pid, u32 seq, u32 type,
+nfnl_acct_fill_info(struct sk_buff *skb, u32 portid, u32 seq, u32 type,
int event, struct nf_acct *acct)
{
struct nlmsghdr *nlh;
struct nfgenmsg *nfmsg;
- unsigned int flags = pid ? NLM_F_MULTI : 0;
+ unsigned int flags = portid ? NLM_F_MULTI : 0;
u64 pkts, bytes;
event |= NFNL_SUBSYS_ACCT << 8;
- nlh = nlmsg_put(skb, pid, seq, event, sizeof(*nfmsg), flags);
+ nlh = nlmsg_put(skb, portid, seq, event, sizeof(*nfmsg), flags);
if (nlh == NULL)
goto nlmsg_failure;
@@ -150,7 +150,7 @@ nfnl_acct_dump(struct sk_buff *skb, struct netlink_callback *cb)
if (last && cur != last)
continue;
- if (nfnl_acct_fill_info(skb, NETLINK_CB(cb->skb).pid,
+ if (nfnl_acct_fill_info(skb, NETLINK_CB(cb->skb).portid,
cb->nlh->nlmsg_seq,
NFNL_MSG_TYPE(cb->nlh->nlmsg_type),
NFNL_MSG_ACCT_NEW, cur) < 0) {
@@ -195,7 +195,7 @@ nfnl_acct_get(struct sock *nfnl, struct sk_buff *skb,
break;
}
- ret = nfnl_acct_fill_info(skb2, NETLINK_CB(skb).pid,
+ ret = nfnl_acct_fill_info(skb2, NETLINK_CB(skb).portid,
nlh->nlmsg_seq,
NFNL_MSG_TYPE(nlh->nlmsg_type),
NFNL_MSG_ACCT_NEW, cur);
@@ -203,7 +203,7 @@ nfnl_acct_get(struct sock *nfnl, struct sk_buff *skb,
kfree_skb(skb2);
break;
}
- ret = netlink_unicast(nfnl, skb2, NETLINK_CB(skb).pid,
+ ret = netlink_unicast(nfnl, skb2, NETLINK_CB(skb).portid,
MSG_DONTWAIT);
if (ret > 0)
ret = 0;
diff --git a/net/netfilter/nfnetlink_cthelper.c b/net/netfilter/nfnetlink_cthelper.c
index d6836193d479..945950a8b1f1 100644
--- a/net/netfilter/nfnetlink_cthelper.c
+++ b/net/netfilter/nfnetlink_cthelper.c
@@ -74,7 +74,7 @@ nfnl_cthelper_parse_tuple(struct nf_conntrack_tuple *tuple,
if (!tb[NFCTH_TUPLE_L3PROTONUM] || !tb[NFCTH_TUPLE_L4PROTONUM])
return -EINVAL;
- tuple->src.l3num = ntohs(nla_get_u16(tb[NFCTH_TUPLE_L3PROTONUM]));
+ tuple->src.l3num = ntohs(nla_get_be16(tb[NFCTH_TUPLE_L3PROTONUM]));
tuple->dst.protonum = nla_get_u8(tb[NFCTH_TUPLE_L4PROTONUM]);
return 0;
@@ -85,6 +85,9 @@ nfnl_cthelper_from_nlattr(struct nlattr *attr, struct nf_conn *ct)
{
const struct nf_conn_help *help = nfct_help(ct);
+ if (attr == NULL)
+ return -EINVAL;
+
if (help->helper->data_len == 0)
return -EINVAL;
@@ -395,16 +398,16 @@ nla_put_failure:
}
static int
-nfnl_cthelper_fill_info(struct sk_buff *skb, u32 pid, u32 seq, u32 type,
+nfnl_cthelper_fill_info(struct sk_buff *skb, u32 portid, u32 seq, u32 type,
int event, struct nf_conntrack_helper *helper)
{
struct nlmsghdr *nlh;
struct nfgenmsg *nfmsg;
- unsigned int flags = pid ? NLM_F_MULTI : 0;
+ unsigned int flags = portid ? NLM_F_MULTI : 0;
int status;
event |= NFNL_SUBSYS_CTHELPER << 8;
- nlh = nlmsg_put(skb, pid, seq, event, sizeof(*nfmsg), flags);
+ nlh = nlmsg_put(skb, portid, seq, event, sizeof(*nfmsg), flags);
if (nlh == NULL)
goto nlmsg_failure;
@@ -468,7 +471,7 @@ restart:
cb->args[1] = 0;
}
if (nfnl_cthelper_fill_info(skb,
- NETLINK_CB(cb->skb).pid,
+ NETLINK_CB(cb->skb).portid,
cb->nlh->nlmsg_seq,
NFNL_MSG_TYPE(cb->nlh->nlmsg_type),
NFNL_MSG_CTHELPER_NEW, cur) < 0) {
@@ -538,7 +541,7 @@ nfnl_cthelper_get(struct sock *nfnl, struct sk_buff *skb,
break;
}
- ret = nfnl_cthelper_fill_info(skb2, NETLINK_CB(skb).pid,
+ ret = nfnl_cthelper_fill_info(skb2, NETLINK_CB(skb).portid,
nlh->nlmsg_seq,
NFNL_MSG_TYPE(nlh->nlmsg_type),
NFNL_MSG_CTHELPER_NEW, cur);
@@ -547,7 +550,7 @@ nfnl_cthelper_get(struct sock *nfnl, struct sk_buff *skb,
break;
}
- ret = netlink_unicast(nfnl, skb2, NETLINK_CB(skb).pid,
+ ret = netlink_unicast(nfnl, skb2, NETLINK_CB(skb).portid,
MSG_DONTWAIT);
if (ret > 0)
ret = 0;
diff --git a/net/netfilter/nfnetlink_cttimeout.c b/net/netfilter/nfnetlink_cttimeout.c
index cdecbc8fe965..8847b4d8be06 100644
--- a/net/netfilter/nfnetlink_cttimeout.c
+++ b/net/netfilter/nfnetlink_cttimeout.c
@@ -155,16 +155,16 @@ err_proto_put:
}
static int
-ctnl_timeout_fill_info(struct sk_buff *skb, u32 pid, u32 seq, u32 type,
+ctnl_timeout_fill_info(struct sk_buff *skb, u32 portid, u32 seq, u32 type,
int event, struct ctnl_timeout *timeout)
{
struct nlmsghdr *nlh;
struct nfgenmsg *nfmsg;
- unsigned int flags = pid ? NLM_F_MULTI : 0;
+ unsigned int flags = portid ? NLM_F_MULTI : 0;
struct nf_conntrack_l4proto *l4proto = timeout->l4proto;
event |= NFNL_SUBSYS_CTNETLINK_TIMEOUT << 8;
- nlh = nlmsg_put(skb, pid, seq, event, sizeof(*nfmsg), flags);
+ nlh = nlmsg_put(skb, portid, seq, event, sizeof(*nfmsg), flags);
if (nlh == NULL)
goto nlmsg_failure;
@@ -222,7 +222,7 @@ ctnl_timeout_dump(struct sk_buff *skb, struct netlink_callback *cb)
if (last && cur != last)
continue;
- if (ctnl_timeout_fill_info(skb, NETLINK_CB(cb->skb).pid,
+ if (ctnl_timeout_fill_info(skb, NETLINK_CB(cb->skb).portid,
cb->nlh->nlmsg_seq,
NFNL_MSG_TYPE(cb->nlh->nlmsg_type),
IPCTNL_MSG_TIMEOUT_NEW, cur) < 0) {
@@ -268,7 +268,7 @@ cttimeout_get_timeout(struct sock *ctnl, struct sk_buff *skb,
break;
}
- ret = ctnl_timeout_fill_info(skb2, NETLINK_CB(skb).pid,
+ ret = ctnl_timeout_fill_info(skb2, NETLINK_CB(skb).portid,
nlh->nlmsg_seq,
NFNL_MSG_TYPE(nlh->nlmsg_type),
IPCTNL_MSG_TIMEOUT_NEW, cur);
@@ -276,7 +276,7 @@ cttimeout_get_timeout(struct sock *ctnl, struct sk_buff *skb,
kfree_skb(skb2);
break;
}
- ret = netlink_unicast(ctnl, skb2, NETLINK_CB(skb).pid,
+ ret = netlink_unicast(ctnl, skb2, NETLINK_CB(skb).portid,
MSG_DONTWAIT);
if (ret > 0)
ret = 0;
diff --git a/net/netfilter/nfnetlink_log.c b/net/netfilter/nfnetlink_log.c
index 169ab59ed9d4..9f199f2e31fa 100644
--- a/net/netfilter/nfnetlink_log.c
+++ b/net/netfilter/nfnetlink_log.c
@@ -55,7 +55,8 @@ struct nfulnl_instance {
unsigned int qlen; /* number of nlmsgs in skb */
struct sk_buff *skb; /* pre-allocatd skb */
struct timer_list timer;
- int peer_pid; /* PID of the peer process */
+ struct user_namespace *peer_user_ns; /* User namespace of the peer process */
+ int peer_portid; /* PORTID of the peer process */
/* configurable parameters */
unsigned int flushtimeout; /* timeout until queue flush */
@@ -132,7 +133,7 @@ instance_put(struct nfulnl_instance *inst)
static void nfulnl_timer(unsigned long data);
static struct nfulnl_instance *
-instance_create(u_int16_t group_num, int pid)
+instance_create(u_int16_t group_num, int portid, struct user_namespace *user_ns)
{
struct nfulnl_instance *inst;
int err;
@@ -162,7 +163,8 @@ instance_create(u_int16_t group_num, int pid)
setup_timer(&inst->timer, nfulnl_timer, (unsigned long)inst);
- inst->peer_pid = pid;
+ inst->peer_user_ns = user_ns;
+ inst->peer_portid = portid;
inst->group_num = group_num;
inst->qthreshold = NFULNL_QTHRESH_DEFAULT;
@@ -334,7 +336,7 @@ __nfulnl_send(struct nfulnl_instance *inst)
if (!nlh)
goto out;
}
- status = nfnetlink_unicast(inst->skb, &init_net, inst->peer_pid,
+ status = nfnetlink_unicast(inst->skb, &init_net, inst->peer_portid,
MSG_DONTWAIT);
inst->qlen = 0;
@@ -381,6 +383,7 @@ __build_packet_message(struct nfulnl_instance *inst,
struct nlmsghdr *nlh;
struct nfgenmsg *nfmsg;
sk_buff_data_t old_tail = inst->skb->tail;
+ struct sock *sk;
nlh = nlmsg_put(inst->skb, 0, 0,
NFNL_SUBSYS_ULOG << 8 | NFULNL_MSG_PACKET,
@@ -480,7 +483,7 @@ __build_packet_message(struct nfulnl_instance *inst,
}
if (indev && skb_mac_header_was_set(skb)) {
- if (nla_put_be32(inst->skb, NFULA_HWTYPE, htons(skb->dev->type)) ||
+ if (nla_put_be16(inst->skb, NFULA_HWTYPE, htons(skb->dev->type)) ||
nla_put_be16(inst->skb, NFULA_HWLEN,
htons(skb->dev->hard_header_len)) ||
nla_put(inst->skb, NFULA_HWHEADER, skb->dev->hard_header_len,
@@ -499,18 +502,21 @@ __build_packet_message(struct nfulnl_instance *inst,
}
/* UID */
- if (skb->sk) {
- read_lock_bh(&skb->sk->sk_callback_lock);
- if (skb->sk->sk_socket && skb->sk->sk_socket->file) {
- struct file *file = skb->sk->sk_socket->file;
- __be32 uid = htonl(file->f_cred->fsuid);
- __be32 gid = htonl(file->f_cred->fsgid);
- read_unlock_bh(&skb->sk->sk_callback_lock);
+ sk = skb->sk;
+ if (sk && sk->sk_state != TCP_TIME_WAIT) {
+ read_lock_bh(&sk->sk_callback_lock);
+ if (sk->sk_socket && sk->sk_socket->file) {
+ struct file *file = sk->sk_socket->file;
+ const struct cred *cred = file->f_cred;
+ struct user_namespace *user_ns = inst->peer_user_ns;
+ __be32 uid = htonl(from_kuid_munged(user_ns, cred->fsuid));
+ __be32 gid = htonl(from_kgid_munged(user_ns, cred->fsgid));
+ read_unlock_bh(&sk->sk_callback_lock);
if (nla_put_be32(inst->skb, NFULA_UID, uid) ||
nla_put_be32(inst->skb, NFULA_GID, gid))
goto nla_put_failure;
} else
- read_unlock_bh(&skb->sk->sk_callback_lock);
+ read_unlock_bh(&sk->sk_callback_lock);
}
/* local sequence number */
@@ -698,7 +704,7 @@ nfulnl_rcv_nl_event(struct notifier_block *this,
if (event == NETLINK_URELEASE && n->protocol == NETLINK_NETFILTER) {
int i;
- /* destroy all instances for this pid */
+ /* destroy all instances for this portid */
spin_lock_bh(&instances_lock);
for (i = 0; i < INSTANCE_BUCKETS; i++) {
struct hlist_node *tmp, *t2;
@@ -707,7 +713,7 @@ nfulnl_rcv_nl_event(struct notifier_block *this,
hlist_for_each_entry_safe(inst, tmp, t2, head, hlist) {
if ((net_eq(n->net, &init_net)) &&
- (n->pid == inst->peer_pid))
+ (n->portid == inst->peer_portid))
__instance_destroy(inst);
}
}
@@ -769,7 +775,7 @@ nfulnl_recv_config(struct sock *ctnl, struct sk_buff *skb,
}
inst = instance_lookup_get(group_num);
- if (inst && inst->peer_pid != NETLINK_CB(skb).pid) {
+ if (inst && inst->peer_portid != NETLINK_CB(skb).portid) {
ret = -EPERM;
goto out_put;
}
@@ -783,7 +789,8 @@ nfulnl_recv_config(struct sock *ctnl, struct sk_buff *skb,
}
inst = instance_create(group_num,
- NETLINK_CB(skb).pid);
+ NETLINK_CB(skb).portid,
+ sk_user_ns(NETLINK_CB(skb).ssk));
if (IS_ERR(inst)) {
ret = PTR_ERR(inst);
goto out;
@@ -941,7 +948,7 @@ static int seq_show(struct seq_file *s, void *v)
return seq_printf(s, "%5d %6d %5d %1d %5d %6d %2d\n",
inst->group_num,
- inst->peer_pid, inst->qlen,
+ inst->peer_portid, inst->qlen,
inst->copy_mode, inst->copy_range,
inst->flushtimeout, atomic_read(&inst->use));
}
@@ -996,8 +1003,10 @@ static int __init nfnetlink_log_init(void)
#ifdef CONFIG_PROC_FS
if (!proc_create("nfnetlink_log", 0440,
- proc_net_netfilter, &nful_file_ops))
+ proc_net_netfilter, &nful_file_ops)) {
+ status = -ENOMEM;
goto cleanup_logger;
+ }
#endif
return status;
diff --git a/net/netfilter/nfnetlink_queue_core.c b/net/netfilter/nfnetlink_queue_core.c
index c0496a55ad0c..e12d44e75b21 100644
--- a/net/netfilter/nfnetlink_queue_core.c
+++ b/net/netfilter/nfnetlink_queue_core.c
@@ -44,7 +44,7 @@ struct nfqnl_instance {
struct hlist_node hlist; /* global list of queues */
struct rcu_head rcu;
- int peer_pid;
+ int peer_portid;
unsigned int queue_maxlen;
unsigned int copy_range;
unsigned int queue_dropped;
@@ -92,7 +92,7 @@ instance_lookup(u_int16_t queue_num)
}
static struct nfqnl_instance *
-instance_create(u_int16_t queue_num, int pid)
+instance_create(u_int16_t queue_num, int portid)
{
struct nfqnl_instance *inst;
unsigned int h;
@@ -111,7 +111,7 @@ instance_create(u_int16_t queue_num, int pid)
}
inst->queue_num = queue_num;
- inst->peer_pid = pid;
+ inst->peer_portid = portid;
inst->queue_maxlen = NFQNL_QMAX_DEFAULT;
inst->copy_range = 0xfffff;
inst->copy_mode = NFQNL_COPY_NONE;
@@ -225,7 +225,7 @@ nfqnl_build_packet_message(struct nfqnl_instance *queue,
{
sk_buff_data_t old_tail;
size_t size;
- size_t data_len = 0;
+ size_t data_len = 0, cap_len = 0;
struct sk_buff *skb;
struct nlattr *nla;
struct nfqnl_msg_packet_hdr *pmsg;
@@ -247,7 +247,8 @@ nfqnl_build_packet_message(struct nfqnl_instance *queue,
#endif
+ nla_total_size(sizeof(u_int32_t)) /* mark */
+ nla_total_size(sizeof(struct nfqnl_msg_packet_hw))
- + nla_total_size(sizeof(struct nfqnl_msg_packet_timestamp));
+ + nla_total_size(sizeof(struct nfqnl_msg_packet_timestamp)
+ + nla_total_size(sizeof(u_int32_t))); /* cap_len */
outdev = entry->outdev;
@@ -266,6 +267,7 @@ nfqnl_build_packet_message(struct nfqnl_instance *queue,
data_len = entskb->len;
size += nla_total_size(data_len);
+ cap_len = entskb->len;
break;
}
@@ -402,12 +404,14 @@ nfqnl_build_packet_message(struct nfqnl_instance *queue,
if (ct && nfqnl_ct_put(skb, ct, ctinfo) < 0)
goto nla_put_failure;
+ if (cap_len > 0 && nla_put_be32(skb, NFQA_CAP_LEN, htonl(cap_len)))
+ goto nla_put_failure;
+
nlh->nlmsg_len = skb->tail - old_tail;
return skb;
nla_put_failure:
- if (skb)
- kfree_skb(skb);
+ kfree_skb(skb);
net_err_ratelimited("nf_queue: error creating packet message\n");
return NULL;
}
@@ -440,7 +444,7 @@ nfqnl_enqueue_packet(struct nf_queue_entry *entry, unsigned int queuenum)
}
spin_lock_bh(&queue->lock);
- if (!queue->peer_pid) {
+ if (!queue->peer_portid) {
err = -EINVAL;
goto err_out_free_nskb;
}
@@ -459,7 +463,7 @@ nfqnl_enqueue_packet(struct nf_queue_entry *entry, unsigned int queuenum)
*packet_id_ptr = htonl(entry->id);
/* nfnetlink_unicast will either free the nskb or add it to a socket */
- err = nfnetlink_unicast(nskb, &init_net, queue->peer_pid, MSG_DONTWAIT);
+ err = nfnetlink_unicast(nskb, &init_net, queue->peer_portid, MSG_DONTWAIT);
if (err < 0) {
queue->queue_user_dropped++;
goto err_out_unlock;
@@ -527,9 +531,13 @@ nfqnl_set_mode(struct nfqnl_instance *queue,
case NFQNL_COPY_PACKET:
queue->copy_mode = mode;
- /* we're using struct nlattr which has 16bit nla_len */
- if (range > 0xffff)
- queue->copy_range = 0xffff;
+ /* We're using struct nlattr which has 16bit nla_len. Note that
+ * nla_len includes the header length. Thus, the maximum packet
+ * length that we support is 65531 bytes. We send truncated
+ * packets if the specified length is larger than that.
+ */
+ if (range > 0xffff - NLA_HDRLEN)
+ queue->copy_range = 0xffff - NLA_HDRLEN;
else
queue->copy_range = range;
break;
@@ -616,7 +624,7 @@ nfqnl_rcv_nl_event(struct notifier_block *this,
if (event == NETLINK_URELEASE && n->protocol == NETLINK_NETFILTER) {
int i;
- /* destroy all instances for this pid */
+ /* destroy all instances for this portid */
spin_lock(&instances_lock);
for (i = 0; i < INSTANCE_BUCKETS; i++) {
struct hlist_node *tmp, *t2;
@@ -625,7 +633,7 @@ nfqnl_rcv_nl_event(struct notifier_block *this,
hlist_for_each_entry_safe(inst, tmp, t2, head, hlist) {
if ((n->net == &init_net) &&
- (n->pid == inst->peer_pid))
+ (n->portid == inst->peer_portid))
__instance_destroy(inst);
}
}
@@ -650,7 +658,7 @@ static const struct nla_policy nfqa_verdict_batch_policy[NFQA_MAX+1] = {
[NFQA_MARK] = { .type = NLA_U32 },
};
-static struct nfqnl_instance *verdict_instance_lookup(u16 queue_num, int nlpid)
+static struct nfqnl_instance *verdict_instance_lookup(u16 queue_num, int nlportid)
{
struct nfqnl_instance *queue;
@@ -658,7 +666,7 @@ static struct nfqnl_instance *verdict_instance_lookup(u16 queue_num, int nlpid)
if (!queue)
return ERR_PTR(-ENODEV);
- if (queue->peer_pid != nlpid)
+ if (queue->peer_portid != nlportid)
return ERR_PTR(-EPERM);
return queue;
@@ -698,7 +706,7 @@ nfqnl_recv_verdict_batch(struct sock *ctnl, struct sk_buff *skb,
LIST_HEAD(batch_list);
u16 queue_num = ntohs(nfmsg->res_id);
- queue = verdict_instance_lookup(queue_num, NETLINK_CB(skb).pid);
+ queue = verdict_instance_lookup(queue_num, NETLINK_CB(skb).portid);
if (IS_ERR(queue))
return PTR_ERR(queue);
@@ -749,7 +757,7 @@ nfqnl_recv_verdict(struct sock *ctnl, struct sk_buff *skb,
queue = instance_lookup(queue_num);
if (!queue)
- queue = verdict_instance_lookup(queue_num, NETLINK_CB(skb).pid);
+ queue = verdict_instance_lookup(queue_num, NETLINK_CB(skb).portid);
if (IS_ERR(queue))
return PTR_ERR(queue);
@@ -832,7 +840,7 @@ nfqnl_recv_config(struct sock *ctnl, struct sk_buff *skb,
rcu_read_lock();
queue = instance_lookup(queue_num);
- if (queue && queue->peer_pid != NETLINK_CB(skb).pid) {
+ if (queue && queue->peer_portid != NETLINK_CB(skb).portid) {
ret = -EPERM;
goto err_out_unlock;
}
@@ -844,7 +852,7 @@ nfqnl_recv_config(struct sock *ctnl, struct sk_buff *skb,
ret = -EBUSY;
goto err_out_unlock;
}
- queue = instance_create(queue_num, NETLINK_CB(skb).pid);
+ queue = instance_create(queue_num, NETLINK_CB(skb).portid);
if (IS_ERR(queue)) {
ret = PTR_ERR(queue);
goto err_out_unlock;
@@ -1016,7 +1024,7 @@ static int seq_show(struct seq_file *s, void *v)
return seq_printf(s, "%5d %6d %5d %1d %5d %5d %5d %8d %2d\n",
inst->queue_num,
- inst->peer_pid, inst->queue_total,
+ inst->peer_portid, inst->queue_total,
inst->copy_mode, inst->copy_range,
inst->queue_dropped, inst->queue_user_dropped,
inst->id_sequence, 1);
diff --git a/net/netfilter/xt_CT.c b/net/netfilter/xt_CT.c
index 116018560c60..16c712563860 100644
--- a/net/netfilter/xt_CT.c
+++ b/net/netfilter/xt_CT.c
@@ -72,14 +72,44 @@ static u8 xt_ct_find_proto(const struct xt_tgchk_param *par)
return 0;
}
+static int
+xt_ct_set_helper(struct nf_conn *ct, const char *helper_name,
+ const struct xt_tgchk_param *par)
+{
+ struct nf_conntrack_helper *helper;
+ struct nf_conn_help *help;
+ u8 proto;
+
+ proto = xt_ct_find_proto(par);
+ if (!proto) {
+ pr_info("You must specify a L4 protocol, and not use "
+ "inversions on it.\n");
+ return -ENOENT;
+ }
+
+ helper = nf_conntrack_helper_try_module_get(helper_name, par->family,
+ proto);
+ if (helper == NULL) {
+ pr_info("No such helper \"%s\"\n", helper_name);
+ return -ENOENT;
+ }
+
+ help = nf_ct_helper_ext_add(ct, helper, GFP_KERNEL);
+ if (help == NULL) {
+ module_put(helper->me);
+ return -ENOMEM;
+ }
+
+ help->helper = helper;
+ return 0;
+}
+
static int xt_ct_tg_check_v0(const struct xt_tgchk_param *par)
{
struct xt_ct_target_info *info = par->targinfo;
struct nf_conntrack_tuple t;
- struct nf_conn_help *help;
struct nf_conn *ct;
- int ret = 0;
- u8 proto;
+ int ret;
if (info->flags & ~XT_CT_NOTRACK)
return -EINVAL;
@@ -112,31 +142,9 @@ static int xt_ct_tg_check_v0(const struct xt_tgchk_param *par)
goto err3;
if (info->helper[0]) {
- struct nf_conntrack_helper *helper;
-
- ret = -ENOENT;
- proto = xt_ct_find_proto(par);
- if (!proto) {
- pr_info("You must specify a L4 protocol, "
- "and not use inversions on it.\n");
- goto err3;
- }
-
- ret = -ENOENT;
- helper = nf_conntrack_helper_try_module_get(info->helper,
- par->family,
- proto);
- if (helper == NULL) {
- pr_info("No such helper \"%s\"\n", info->helper);
- goto err3;
- }
-
- ret = -ENOMEM;
- help = nf_ct_helper_ext_add(ct, helper, GFP_KERNEL);
- if (help == NULL)
+ ret = xt_ct_set_helper(ct, info->helper, par);
+ if (ret < 0)
goto err3;
-
- help->helper = helper;
}
__set_bit(IPS_TEMPLATE_BIT, &ct->status);
@@ -164,17 +172,77 @@ static void __xt_ct_tg_timeout_put(struct ctnl_timeout *timeout)
}
#endif
+static int
+xt_ct_set_timeout(struct nf_conn *ct, const struct xt_tgchk_param *par,
+ const char *timeout_name)
+{
+#ifdef CONFIG_NF_CONNTRACK_TIMEOUT
+ typeof(nf_ct_timeout_find_get_hook) timeout_find_get;
+ struct ctnl_timeout *timeout;
+ struct nf_conn_timeout *timeout_ext;
+ const struct ipt_entry *e = par->entryinfo;
+ struct nf_conntrack_l4proto *l4proto;
+ int ret = 0;
+
+ rcu_read_lock();
+ timeout_find_get = rcu_dereference(nf_ct_timeout_find_get_hook);
+ if (timeout_find_get == NULL) {
+ ret = -ENOENT;
+ pr_info("Timeout policy base is empty\n");
+ goto out;
+ }
+
+ if (e->ip.invflags & IPT_INV_PROTO) {
+ ret = -EINVAL;
+ pr_info("You cannot use inversion on L4 protocol\n");
+ goto out;
+ }
+
+ timeout = timeout_find_get(timeout_name);
+ if (timeout == NULL) {
+ ret = -ENOENT;
+ pr_info("No such timeout policy \"%s\"\n", timeout_name);
+ goto out;
+ }
+
+ if (timeout->l3num != par->family) {
+ ret = -EINVAL;
+ pr_info("Timeout policy `%s' can only be used by L3 protocol "
+ "number %d\n", timeout_name, timeout->l3num);
+ goto err_put_timeout;
+ }
+ /* Make sure the timeout policy matches any existing protocol tracker,
+ * otherwise default to generic.
+ */
+ l4proto = __nf_ct_l4proto_find(par->family, e->ip.proto);
+ if (timeout->l4proto->l4proto != l4proto->l4proto) {
+ ret = -EINVAL;
+ pr_info("Timeout policy `%s' can only be used by L4 protocol "
+ "number %d\n",
+ timeout_name, timeout->l4proto->l4proto);
+ goto err_put_timeout;
+ }
+ timeout_ext = nf_ct_timeout_ext_add(ct, timeout, GFP_ATOMIC);
+ if (timeout_ext == NULL)
+ ret = -ENOMEM;
+
+err_put_timeout:
+ __xt_ct_tg_timeout_put(timeout);
+out:
+ rcu_read_unlock();
+ return ret;
+#else
+ return -EOPNOTSUPP;
+#endif
+}
+
static int xt_ct_tg_check_v1(const struct xt_tgchk_param *par)
{
struct xt_ct_target_info_v1 *info = par->targinfo;
struct nf_conntrack_tuple t;
- struct nf_conn_help *help;
struct nf_conn *ct;
- int ret = 0;
- u8 proto;
-#ifdef CONFIG_NF_CONNTRACK_TIMEOUT
- struct ctnl_timeout *timeout;
-#endif
+ int ret;
+
if (info->flags & ~XT_CT_NOTRACK)
return -EINVAL;
@@ -206,93 +274,16 @@ static int xt_ct_tg_check_v1(const struct xt_tgchk_param *par)
goto err3;
if (info->helper[0]) {
- struct nf_conntrack_helper *helper;
-
- ret = -ENOENT;
- proto = xt_ct_find_proto(par);
- if (!proto) {
- pr_info("You must specify a L4 protocol, "
- "and not use inversions on it.\n");
- goto err3;
- }
-
- ret = -ENOENT;
- helper = nf_conntrack_helper_try_module_get(info->helper,
- par->family,
- proto);
- if (helper == NULL) {
- pr_info("No such helper \"%s\"\n", info->helper);
- goto err3;
- }
-
- ret = -ENOMEM;
- help = nf_ct_helper_ext_add(ct, helper, GFP_KERNEL);
- if (help == NULL)
+ ret = xt_ct_set_helper(ct, info->helper, par);
+ if (ret < 0)
goto err3;
-
- help->helper = helper;
}
-#ifdef CONFIG_NF_CONNTRACK_TIMEOUT
if (info->timeout[0]) {
- typeof(nf_ct_timeout_find_get_hook) timeout_find_get;
- struct nf_conn_timeout *timeout_ext;
-
- rcu_read_lock();
- timeout_find_get =
- rcu_dereference(nf_ct_timeout_find_get_hook);
-
- if (timeout_find_get) {
- const struct ipt_entry *e = par->entryinfo;
- struct nf_conntrack_l4proto *l4proto;
-
- if (e->ip.invflags & IPT_INV_PROTO) {
- ret = -EINVAL;
- pr_info("You cannot use inversion on "
- "L4 protocol\n");
- goto err4;
- }
- timeout = timeout_find_get(info->timeout);
- if (timeout == NULL) {
- ret = -ENOENT;
- pr_info("No such timeout policy \"%s\"\n",
- info->timeout);
- goto err4;
- }
- if (timeout->l3num != par->family) {
- ret = -EINVAL;
- pr_info("Timeout policy `%s' can only be "
- "used by L3 protocol number %d\n",
- info->timeout, timeout->l3num);
- goto err5;
- }
- /* Make sure the timeout policy matches any existing
- * protocol tracker, otherwise default to generic.
- */
- l4proto = __nf_ct_l4proto_find(par->family,
- e->ip.proto);
- if (timeout->l4proto->l4proto != l4proto->l4proto) {
- ret = -EINVAL;
- pr_info("Timeout policy `%s' can only be "
- "used by L4 protocol number %d\n",
- info->timeout,
- timeout->l4proto->l4proto);
- goto err5;
- }
- timeout_ext = nf_ct_timeout_ext_add(ct, timeout,
- GFP_ATOMIC);
- if (timeout_ext == NULL) {
- ret = -ENOMEM;
- goto err5;
- }
- } else {
- ret = -ENOENT;
- pr_info("Timeout policy base is empty\n");
- goto err4;
- }
- rcu_read_unlock();
+ ret = xt_ct_set_timeout(ct, par, info->timeout);
+ if (ret < 0)
+ goto err3;
}
-#endif
__set_bit(IPS_TEMPLATE_BIT, &ct->status);
__set_bit(IPS_CONFIRMED_BIT, &ct->status);
@@ -300,12 +291,6 @@ out:
info->ct = ct;
return 0;
-#ifdef CONFIG_NF_CONNTRACK_TIMEOUT
-err5:
- __xt_ct_tg_timeout_put(timeout);
-err4:
- rcu_read_unlock();
-#endif
err3:
nf_conntrack_free(ct);
err2:
@@ -330,15 +315,30 @@ static void xt_ct_tg_destroy_v0(const struct xt_tgdtor_param *par)
nf_ct_put(info->ct);
}
-static void xt_ct_tg_destroy_v1(const struct xt_tgdtor_param *par)
+static void xt_ct_destroy_timeout(struct nf_conn *ct)
{
- struct xt_ct_target_info_v1 *info = par->targinfo;
- struct nf_conn *ct = info->ct;
- struct nf_conn_help *help;
#ifdef CONFIG_NF_CONNTRACK_TIMEOUT
struct nf_conn_timeout *timeout_ext;
typeof(nf_ct_timeout_put_hook) timeout_put;
+
+ rcu_read_lock();
+ timeout_put = rcu_dereference(nf_ct_timeout_put_hook);
+
+ if (timeout_put) {
+ timeout_ext = nf_ct_timeout_find(ct);
+ if (timeout_ext)
+ timeout_put(timeout_ext->timeout);
+ }
+ rcu_read_unlock();
#endif
+}
+
+static void xt_ct_tg_destroy_v1(const struct xt_tgdtor_param *par)
+{
+ struct xt_ct_target_info_v1 *info = par->targinfo;
+ struct nf_conn *ct = info->ct;
+ struct nf_conn_help *help;
+
if (!nf_ct_is_untracked(ct)) {
help = nfct_help(ct);
if (help)
@@ -346,17 +346,7 @@ static void xt_ct_tg_destroy_v1(const struct xt_tgdtor_param *par)
nf_ct_l3proto_module_put(par->family);
-#ifdef CONFIG_NF_CONNTRACK_TIMEOUT
- rcu_read_lock();
- timeout_put = rcu_dereference(nf_ct_timeout_put_hook);
-
- if (timeout_put) {
- timeout_ext = nf_ct_timeout_find(ct);
- if (timeout_ext)
- timeout_put(timeout_ext->timeout);
- }
- rcu_read_unlock();
-#endif
+ xt_ct_destroy_timeout(ct);
}
nf_ct_put(info->ct);
}
diff --git a/net/netfilter/xt_LOG.c b/net/netfilter/xt_LOG.c
index ff5f75fddb15..fa40096940a1 100644
--- a/net/netfilter/xt_LOG.c
+++ b/net/netfilter/xt_LOG.c
@@ -145,6 +145,21 @@ static int dump_tcp_header(struct sbuff *m, const struct sk_buff *skb,
return 0;
}
+static void dump_sk_uid_gid(struct sbuff *m, struct sock *sk)
+{
+ if (!sk || sk->sk_state == TCP_TIME_WAIT)
+ return;
+
+ read_lock_bh(&sk->sk_callback_lock);
+ if (sk->sk_socket && sk->sk_socket->file) {
+ const struct cred *cred = sk->sk_socket->file->f_cred;
+ sb_add(m, "UID=%u GID=%u ",
+ from_kuid_munged(&init_user_ns, cred->fsuid),
+ from_kgid_munged(&init_user_ns, cred->fsgid));
+ }
+ read_unlock_bh(&sk->sk_callback_lock);
+}
+
/* One level of recursion won't kill us */
static void dump_ipv4_packet(struct sbuff *m,
const struct nf_loginfo *info,
@@ -361,14 +376,8 @@ static void dump_ipv4_packet(struct sbuff *m,
}
/* Max length: 15 "UID=4294967295 " */
- if ((logflags & XT_LOG_UID) && !iphoff && skb->sk) {
- read_lock_bh(&skb->sk->sk_callback_lock);
- if (skb->sk->sk_socket && skb->sk->sk_socket->file)
- sb_add(m, "UID=%u GID=%u ",
- skb->sk->sk_socket->file->f_cred->fsuid,
- skb->sk->sk_socket->file->f_cred->fsgid);
- read_unlock_bh(&skb->sk->sk_callback_lock);
- }
+ if ((logflags & XT_LOG_UID) && !iphoff)
+ dump_sk_uid_gid(m, skb->sk);
/* Max length: 16 "MARK=0xFFFFFFFF " */
if (!iphoff && skb->mark)
@@ -436,8 +445,8 @@ log_packet_common(struct sbuff *m,
const struct nf_loginfo *loginfo,
const char *prefix)
{
- sb_add(m, "<%d>%sIN=%s OUT=%s ", loginfo->u.log.level,
- prefix,
+ sb_add(m, KERN_SOH "%c%sIN=%s OUT=%s ",
+ '0' + loginfo->u.log.level, prefix,
in ? in->name : "",
out ? out->name : "");
#ifdef CONFIG_BRIDGE_NETFILTER
@@ -717,14 +726,8 @@ static void dump_ipv6_packet(struct sbuff *m,
}
/* Max length: 15 "UID=4294967295 " */
- if ((logflags & XT_LOG_UID) && recurse && skb->sk) {
- read_lock_bh(&skb->sk->sk_callback_lock);
- if (skb->sk->sk_socket && skb->sk->sk_socket->file)
- sb_add(m, "UID=%u GID=%u ",
- skb->sk->sk_socket->file->f_cred->fsuid,
- skb->sk->sk_socket->file->f_cred->fsgid);
- read_unlock_bh(&skb->sk->sk_callback_lock);
- }
+ if ((logflags & XT_LOG_UID) && recurse)
+ dump_sk_uid_gid(m, skb->sk);
/* Max length: 16 "MARK=0xFFFFFFFF " */
if (!recurse && skb->mark)
diff --git a/net/netfilter/xt_NETMAP.c b/net/netfilter/xt_NETMAP.c
new file mode 100644
index 000000000000..b253e07cb1c5
--- /dev/null
+++ b/net/netfilter/xt_NETMAP.c
@@ -0,0 +1,165 @@
+/*
+ * (C) 2000-2001 Svenning Soerensen <svenning@post5.tele.dk>
+ * Copyright (c) 2011 Patrick McHardy <kaber@trash.net>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/ip.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/netdevice.h>
+#include <linux/ipv6.h>
+#include <linux/netfilter.h>
+#include <linux/netfilter_ipv4.h>
+#include <linux/netfilter_ipv6.h>
+#include <linux/netfilter/x_tables.h>
+#include <net/netfilter/nf_nat.h>
+
+static unsigned int
+netmap_tg6(struct sk_buff *skb, const struct xt_action_param *par)
+{
+ const struct nf_nat_range *range = par->targinfo;
+ struct nf_nat_range newrange;
+ struct nf_conn *ct;
+ enum ip_conntrack_info ctinfo;
+ union nf_inet_addr new_addr, netmask;
+ unsigned int i;
+
+ ct = nf_ct_get(skb, &ctinfo);
+ for (i = 0; i < ARRAY_SIZE(range->min_addr.ip6); i++)
+ netmask.ip6[i] = ~(range->min_addr.ip6[i] ^
+ range->max_addr.ip6[i]);
+
+ if (par->hooknum == NF_INET_PRE_ROUTING ||
+ par->hooknum == NF_INET_LOCAL_OUT)
+ new_addr.in6 = ipv6_hdr(skb)->daddr;
+ else
+ new_addr.in6 = ipv6_hdr(skb)->saddr;
+
+ for (i = 0; i < ARRAY_SIZE(new_addr.ip6); i++) {
+ new_addr.ip6[i] &= ~netmask.ip6[i];
+ new_addr.ip6[i] |= range->min_addr.ip6[i] &
+ netmask.ip6[i];
+ }
+
+ newrange.flags = range->flags | NF_NAT_RANGE_MAP_IPS;
+ newrange.min_addr = new_addr;
+ newrange.max_addr = new_addr;
+ newrange.min_proto = range->min_proto;
+ newrange.max_proto = range->max_proto;
+
+ return nf_nat_setup_info(ct, &newrange, HOOK2MANIP(par->hooknum));
+}
+
+static int netmap_tg6_checkentry(const struct xt_tgchk_param *par)
+{
+ const struct nf_nat_range *range = par->targinfo;
+
+ if (!(range->flags & NF_NAT_RANGE_MAP_IPS))
+ return -EINVAL;
+ return 0;
+}
+
+static unsigned int
+netmap_tg4(struct sk_buff *skb, const struct xt_action_param *par)
+{
+ struct nf_conn *ct;
+ enum ip_conntrack_info ctinfo;
+ __be32 new_ip, netmask;
+ const struct nf_nat_ipv4_multi_range_compat *mr = par->targinfo;
+ struct nf_nat_range newrange;
+
+ NF_CT_ASSERT(par->hooknum == NF_INET_PRE_ROUTING ||
+ par->hooknum == NF_INET_POST_ROUTING ||
+ par->hooknum == NF_INET_LOCAL_OUT ||
+ par->hooknum == NF_INET_LOCAL_IN);
+ ct = nf_ct_get(skb, &ctinfo);
+
+ netmask = ~(mr->range[0].min_ip ^ mr->range[0].max_ip);
+
+ if (par->hooknum == NF_INET_PRE_ROUTING ||
+ par->hooknum == NF_INET_LOCAL_OUT)
+ new_ip = ip_hdr(skb)->daddr & ~netmask;
+ else
+ new_ip = ip_hdr(skb)->saddr & ~netmask;
+ new_ip |= mr->range[0].min_ip & netmask;
+
+ memset(&newrange.min_addr, 0, sizeof(newrange.min_addr));
+ memset(&newrange.max_addr, 0, sizeof(newrange.max_addr));
+ newrange.flags = mr->range[0].flags | NF_NAT_RANGE_MAP_IPS;
+ newrange.min_addr.ip = new_ip;
+ newrange.max_addr.ip = new_ip;
+ newrange.min_proto = mr->range[0].min;
+ newrange.max_proto = mr->range[0].max;
+
+ /* Hand modified range to generic setup. */
+ return nf_nat_setup_info(ct, &newrange, HOOK2MANIP(par->hooknum));
+}
+
+static int netmap_tg4_check(const struct xt_tgchk_param *par)
+{
+ const struct nf_nat_ipv4_multi_range_compat *mr = par->targinfo;
+
+ if (!(mr->range[0].flags & NF_NAT_RANGE_MAP_IPS)) {
+ pr_debug("bad MAP_IPS.\n");
+ return -EINVAL;
+ }
+ if (mr->rangesize != 1) {
+ pr_debug("bad rangesize %u.\n", mr->rangesize);
+ return -EINVAL;
+ }
+ return 0;
+}
+
+static struct xt_target netmap_tg_reg[] __read_mostly = {
+ {
+ .name = "NETMAP",
+ .family = NFPROTO_IPV6,
+ .revision = 0,
+ .target = netmap_tg6,
+ .targetsize = sizeof(struct nf_nat_range),
+ .table = "nat",
+ .hooks = (1 << NF_INET_PRE_ROUTING) |
+ (1 << NF_INET_POST_ROUTING) |
+ (1 << NF_INET_LOCAL_OUT) |
+ (1 << NF_INET_LOCAL_IN),
+ .checkentry = netmap_tg6_checkentry,
+ .me = THIS_MODULE,
+ },
+ {
+ .name = "NETMAP",
+ .family = NFPROTO_IPV4,
+ .revision = 0,
+ .target = netmap_tg4,
+ .targetsize = sizeof(struct nf_nat_ipv4_multi_range_compat),
+ .table = "nat",
+ .hooks = (1 << NF_INET_PRE_ROUTING) |
+ (1 << NF_INET_POST_ROUTING) |
+ (1 << NF_INET_LOCAL_OUT) |
+ (1 << NF_INET_LOCAL_IN),
+ .checkentry = netmap_tg4_check,
+ .me = THIS_MODULE,
+ },
+};
+
+static int __init netmap_tg_init(void)
+{
+ return xt_register_targets(netmap_tg_reg, ARRAY_SIZE(netmap_tg_reg));
+}
+
+static void netmap_tg_exit(void)
+{
+ xt_unregister_targets(netmap_tg_reg, ARRAY_SIZE(netmap_tg_reg));
+}
+
+module_init(netmap_tg_init);
+module_exit(netmap_tg_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("Xtables: 1:1 NAT mapping of subnets");
+MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>");
+MODULE_ALIAS("ip6t_NETMAP");
+MODULE_ALIAS("ipt_NETMAP");
diff --git a/net/netfilter/xt_NFQUEUE.c b/net/netfilter/xt_NFQUEUE.c
index 7babe7d68716..817f9e9f2b16 100644
--- a/net/netfilter/xt_NFQUEUE.c
+++ b/net/netfilter/xt_NFQUEUE.c
@@ -43,7 +43,7 @@ static u32 hash_v4(const struct sk_buff *skb)
const struct iphdr *iph = ip_hdr(skb);
/* packets in either direction go into same queue */
- if (iph->saddr < iph->daddr)
+ if ((__force u32)iph->saddr < (__force u32)iph->daddr)
return jhash_3words((__force u32)iph->saddr,
(__force u32)iph->daddr, iph->protocol, jhash_initval);
@@ -57,7 +57,8 @@ static u32 hash_v6(const struct sk_buff *skb)
const struct ipv6hdr *ip6h = ipv6_hdr(skb);
u32 a, b, c;
- if (ip6h->saddr.s6_addr32[3] < ip6h->daddr.s6_addr32[3]) {
+ if ((__force u32)ip6h->saddr.s6_addr32[3] <
+ (__force u32)ip6h->daddr.s6_addr32[3]) {
a = (__force u32) ip6h->saddr.s6_addr32[3];
b = (__force u32) ip6h->daddr.s6_addr32[3];
} else {
@@ -65,7 +66,8 @@ static u32 hash_v6(const struct sk_buff *skb)
a = (__force u32) ip6h->daddr.s6_addr32[3];
}
- if (ip6h->saddr.s6_addr32[1] < ip6h->daddr.s6_addr32[1])
+ if ((__force u32)ip6h->saddr.s6_addr32[1] <
+ (__force u32)ip6h->daddr.s6_addr32[1])
c = (__force u32) ip6h->saddr.s6_addr32[1];
else
c = (__force u32) ip6h->daddr.s6_addr32[1];
diff --git a/net/netfilter/xt_NOTRACK.c b/net/netfilter/xt_NOTRACK.c
deleted file mode 100644
index 9d782181b6c8..000000000000
--- a/net/netfilter/xt_NOTRACK.c
+++ /dev/null
@@ -1,53 +0,0 @@
-/* This is a module which is used for setting up fake conntracks
- * on packets so that they are not seen by the conntrack/NAT code.
- */
-#include <linux/module.h>
-#include <linux/skbuff.h>
-
-#include <linux/netfilter/x_tables.h>
-#include <net/netfilter/nf_conntrack.h>
-
-MODULE_DESCRIPTION("Xtables: Disabling connection tracking for packets");
-MODULE_LICENSE("GPL");
-MODULE_ALIAS("ipt_NOTRACK");
-MODULE_ALIAS("ip6t_NOTRACK");
-
-static unsigned int
-notrack_tg(struct sk_buff *skb, const struct xt_action_param *par)
-{
- /* Previously seen (loopback)? Ignore. */
- if (skb->nfct != NULL)
- return XT_CONTINUE;
-
- /* Attach fake conntrack entry.
- If there is a real ct entry correspondig to this packet,
- it'll hang aroun till timing out. We don't deal with it
- for performance reasons. JK */
- skb->nfct = &nf_ct_untracked_get()->ct_general;
- skb->nfctinfo = IP_CT_NEW;
- nf_conntrack_get(skb->nfct);
-
- return XT_CONTINUE;
-}
-
-static struct xt_target notrack_tg_reg __read_mostly = {
- .name = "NOTRACK",
- .revision = 0,
- .family = NFPROTO_UNSPEC,
- .target = notrack_tg,
- .table = "raw",
- .me = THIS_MODULE,
-};
-
-static int __init notrack_tg_init(void)
-{
- return xt_register_target(&notrack_tg_reg);
-}
-
-static void __exit notrack_tg_exit(void)
-{
- xt_unregister_target(&notrack_tg_reg);
-}
-
-module_init(notrack_tg_init);
-module_exit(notrack_tg_exit);
diff --git a/net/netfilter/xt_REDIRECT.c b/net/netfilter/xt_REDIRECT.c
new file mode 100644
index 000000000000..22a10309297c
--- /dev/null
+++ b/net/netfilter/xt_REDIRECT.c
@@ -0,0 +1,190 @@
+/*
+ * (C) 1999-2001 Paul `Rusty' Russell
+ * (C) 2002-2006 Netfilter Core Team <coreteam@netfilter.org>
+ * Copyright (c) 2011 Patrick McHardy <kaber@trash.net>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * Based on Rusty Russell's IPv4 REDIRECT target. Development of IPv6
+ * NAT funded by Astaro.
+ */
+
+#include <linux/if.h>
+#include <linux/inetdevice.h>
+#include <linux/ip.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/netdevice.h>
+#include <linux/netfilter.h>
+#include <linux/types.h>
+#include <linux/netfilter_ipv4.h>
+#include <linux/netfilter_ipv6.h>
+#include <linux/netfilter/x_tables.h>
+#include <net/addrconf.h>
+#include <net/checksum.h>
+#include <net/protocol.h>
+#include <net/netfilter/nf_nat.h>
+
+static const struct in6_addr loopback_addr = IN6ADDR_LOOPBACK_INIT;
+
+static unsigned int
+redirect_tg6(struct sk_buff *skb, const struct xt_action_param *par)
+{
+ const struct nf_nat_range *range = par->targinfo;
+ struct nf_nat_range newrange;
+ struct in6_addr newdst;
+ enum ip_conntrack_info ctinfo;
+ struct nf_conn *ct;
+
+ ct = nf_ct_get(skb, &ctinfo);
+ if (par->hooknum == NF_INET_LOCAL_OUT)
+ newdst = loopback_addr;
+ else {
+ struct inet6_dev *idev;
+ struct inet6_ifaddr *ifa;
+ bool addr = false;
+
+ rcu_read_lock();
+ idev = __in6_dev_get(skb->dev);
+ if (idev != NULL) {
+ list_for_each_entry(ifa, &idev->addr_list, if_list) {
+ newdst = ifa->addr;
+ addr = true;
+ break;
+ }
+ }
+ rcu_read_unlock();
+
+ if (!addr)
+ return NF_DROP;
+ }
+
+ newrange.flags = range->flags | NF_NAT_RANGE_MAP_IPS;
+ newrange.min_addr.in6 = newdst;
+ newrange.max_addr.in6 = newdst;
+ newrange.min_proto = range->min_proto;
+ newrange.max_proto = range->max_proto;
+
+ return nf_nat_setup_info(ct, &newrange, NF_NAT_MANIP_DST);
+}
+
+static int redirect_tg6_checkentry(const struct xt_tgchk_param *par)
+{
+ const struct nf_nat_range *range = par->targinfo;
+
+ if (range->flags & NF_NAT_RANGE_MAP_IPS)
+ return -EINVAL;
+ return 0;
+}
+
+/* FIXME: Take multiple ranges --RR */
+static int redirect_tg4_check(const struct xt_tgchk_param *par)
+{
+ const struct nf_nat_ipv4_multi_range_compat *mr = par->targinfo;
+
+ if (mr->range[0].flags & NF_NAT_RANGE_MAP_IPS) {
+ pr_debug("bad MAP_IPS.\n");
+ return -EINVAL;
+ }
+ if (mr->rangesize != 1) {
+ pr_debug("bad rangesize %u.\n", mr->rangesize);
+ return -EINVAL;
+ }
+ return 0;
+}
+
+static unsigned int
+redirect_tg4(struct sk_buff *skb, const struct xt_action_param *par)
+{
+ struct nf_conn *ct;
+ enum ip_conntrack_info ctinfo;
+ __be32 newdst;
+ const struct nf_nat_ipv4_multi_range_compat *mr = par->targinfo;
+ struct nf_nat_range newrange;
+
+ NF_CT_ASSERT(par->hooknum == NF_INET_PRE_ROUTING ||
+ par->hooknum == NF_INET_LOCAL_OUT);
+
+ ct = nf_ct_get(skb, &ctinfo);
+ NF_CT_ASSERT(ct && (ctinfo == IP_CT_NEW || ctinfo == IP_CT_RELATED));
+
+ /* Local packets: make them go to loopback */
+ if (par->hooknum == NF_INET_LOCAL_OUT)
+ newdst = htonl(0x7F000001);
+ else {
+ struct in_device *indev;
+ struct in_ifaddr *ifa;
+
+ newdst = 0;
+
+ rcu_read_lock();
+ indev = __in_dev_get_rcu(skb->dev);
+ if (indev && (ifa = indev->ifa_list))
+ newdst = ifa->ifa_local;
+ rcu_read_unlock();
+
+ if (!newdst)
+ return NF_DROP;
+ }
+
+ /* Transfer from original range. */
+ memset(&newrange.min_addr, 0, sizeof(newrange.min_addr));
+ memset(&newrange.max_addr, 0, sizeof(newrange.max_addr));
+ newrange.flags = mr->range[0].flags | NF_NAT_RANGE_MAP_IPS;
+ newrange.min_addr.ip = newdst;
+ newrange.max_addr.ip = newdst;
+ newrange.min_proto = mr->range[0].min;
+ newrange.max_proto = mr->range[0].max;
+
+ /* Hand modified range to generic setup. */
+ return nf_nat_setup_info(ct, &newrange, NF_NAT_MANIP_DST);
+}
+
+static struct xt_target redirect_tg_reg[] __read_mostly = {
+ {
+ .name = "REDIRECT",
+ .family = NFPROTO_IPV6,
+ .revision = 0,
+ .table = "nat",
+ .checkentry = redirect_tg6_checkentry,
+ .target = redirect_tg6,
+ .targetsize = sizeof(struct nf_nat_range),
+ .hooks = (1 << NF_INET_PRE_ROUTING) |
+ (1 << NF_INET_LOCAL_OUT),
+ .me = THIS_MODULE,
+ },
+ {
+ .name = "REDIRECT",
+ .family = NFPROTO_IPV4,
+ .revision = 0,
+ .table = "nat",
+ .target = redirect_tg4,
+ .checkentry = redirect_tg4_check,
+ .targetsize = sizeof(struct nf_nat_ipv4_multi_range_compat),
+ .hooks = (1 << NF_INET_PRE_ROUTING) |
+ (1 << NF_INET_LOCAL_OUT),
+ .me = THIS_MODULE,
+ },
+};
+
+static int __init redirect_tg_init(void)
+{
+ return xt_register_targets(redirect_tg_reg,
+ ARRAY_SIZE(redirect_tg_reg));
+}
+
+static void __exit redirect_tg_exit(void)
+{
+ xt_unregister_targets(redirect_tg_reg, ARRAY_SIZE(redirect_tg_reg));
+}
+
+module_init(redirect_tg_init);
+module_exit(redirect_tg_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>");
+MODULE_DESCRIPTION("Xtables: Connection redirection to localhost");
+MODULE_ALIAS("ip6t_REDIRECT");
+MODULE_ALIAS("ipt_REDIRECT");
diff --git a/net/netfilter/xt_limit.c b/net/netfilter/xt_limit.c
index 5c22ce8ab309..a4c1e4528cac 100644
--- a/net/netfilter/xt_limit.c
+++ b/net/netfilter/xt_limit.c
@@ -117,11 +117,11 @@ static int limit_mt_check(const struct xt_mtchk_param *par)
/* For SMP, we only want to use one set of state. */
r->master = priv;
+ /* User avg in seconds * XT_LIMIT_SCALE: convert to jiffies *
+ 128. */
+ priv->prev = jiffies;
+ priv->credit = user2credits(r->avg * r->burst); /* Credits full. */
if (r->cost == 0) {
- /* User avg in seconds * XT_LIMIT_SCALE: convert to jiffies *
- 128. */
- priv->prev = jiffies;
- priv->credit = user2credits(r->avg * r->burst); /* Credits full. */
r->credit_cap = priv->credit; /* Credits full. */
r->cost = user2credits(r->avg);
}
diff --git a/net/netfilter/xt_nat.c b/net/netfilter/xt_nat.c
new file mode 100644
index 000000000000..81aafa8e4fef
--- /dev/null
+++ b/net/netfilter/xt_nat.c
@@ -0,0 +1,170 @@
+/*
+ * (C) 1999-2001 Paul `Rusty' Russell
+ * (C) 2002-2006 Netfilter Core Team <coreteam@netfilter.org>
+ * (C) 2011 Patrick McHardy <kaber@trash.net>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/module.h>
+#include <linux/skbuff.h>
+#include <linux/netfilter.h>
+#include <linux/netfilter/x_tables.h>
+#include <net/netfilter/nf_nat_core.h>
+
+static int xt_nat_checkentry_v0(const struct xt_tgchk_param *par)
+{
+ const struct nf_nat_ipv4_multi_range_compat *mr = par->targinfo;
+
+ if (mr->rangesize != 1) {
+ pr_info("%s: multiple ranges no longer supported\n",
+ par->target->name);
+ return -EINVAL;
+ }
+ return 0;
+}
+
+static void xt_nat_convert_range(struct nf_nat_range *dst,
+ const struct nf_nat_ipv4_range *src)
+{
+ memset(&dst->min_addr, 0, sizeof(dst->min_addr));
+ memset(&dst->max_addr, 0, sizeof(dst->max_addr));
+
+ dst->flags = src->flags;
+ dst->min_addr.ip = src->min_ip;
+ dst->max_addr.ip = src->max_ip;
+ dst->min_proto = src->min;
+ dst->max_proto = src->max;
+}
+
+static unsigned int
+xt_snat_target_v0(struct sk_buff *skb, const struct xt_action_param *par)
+{
+ const struct nf_nat_ipv4_multi_range_compat *mr = par->targinfo;
+ struct nf_nat_range range;
+ enum ip_conntrack_info ctinfo;
+ struct nf_conn *ct;
+
+ ct = nf_ct_get(skb, &ctinfo);
+ NF_CT_ASSERT(ct != NULL &&
+ (ctinfo == IP_CT_NEW || ctinfo == IP_CT_RELATED ||
+ ctinfo == IP_CT_RELATED_REPLY));
+
+ xt_nat_convert_range(&range, &mr->range[0]);
+ return nf_nat_setup_info(ct, &range, NF_NAT_MANIP_SRC);
+}
+
+static unsigned int
+xt_dnat_target_v0(struct sk_buff *skb, const struct xt_action_param *par)
+{
+ const struct nf_nat_ipv4_multi_range_compat *mr = par->targinfo;
+ struct nf_nat_range range;
+ enum ip_conntrack_info ctinfo;
+ struct nf_conn *ct;
+
+ ct = nf_ct_get(skb, &ctinfo);
+ NF_CT_ASSERT(ct != NULL &&
+ (ctinfo == IP_CT_NEW || ctinfo == IP_CT_RELATED));
+
+ xt_nat_convert_range(&range, &mr->range[0]);
+ return nf_nat_setup_info(ct, &range, NF_NAT_MANIP_DST);
+}
+
+static unsigned int
+xt_snat_target_v1(struct sk_buff *skb, const struct xt_action_param *par)
+{
+ const struct nf_nat_range *range = par->targinfo;
+ enum ip_conntrack_info ctinfo;
+ struct nf_conn *ct;
+
+ ct = nf_ct_get(skb, &ctinfo);
+ NF_CT_ASSERT(ct != NULL &&
+ (ctinfo == IP_CT_NEW || ctinfo == IP_CT_RELATED ||
+ ctinfo == IP_CT_RELATED_REPLY));
+
+ return nf_nat_setup_info(ct, range, NF_NAT_MANIP_SRC);
+}
+
+static unsigned int
+xt_dnat_target_v1(struct sk_buff *skb, const struct xt_action_param *par)
+{
+ const struct nf_nat_range *range = par->targinfo;
+ enum ip_conntrack_info ctinfo;
+ struct nf_conn *ct;
+
+ ct = nf_ct_get(skb, &ctinfo);
+ NF_CT_ASSERT(ct != NULL &&
+ (ctinfo == IP_CT_NEW || ctinfo == IP_CT_RELATED));
+
+ return nf_nat_setup_info(ct, range, NF_NAT_MANIP_DST);
+}
+
+static struct xt_target xt_nat_target_reg[] __read_mostly = {
+ {
+ .name = "SNAT",
+ .revision = 0,
+ .checkentry = xt_nat_checkentry_v0,
+ .target = xt_snat_target_v0,
+ .targetsize = sizeof(struct nf_nat_ipv4_multi_range_compat),
+ .family = NFPROTO_IPV4,
+ .table = "nat",
+ .hooks = (1 << NF_INET_POST_ROUTING) |
+ (1 << NF_INET_LOCAL_OUT),
+ .me = THIS_MODULE,
+ },
+ {
+ .name = "DNAT",
+ .revision = 0,
+ .checkentry = xt_nat_checkentry_v0,
+ .target = xt_dnat_target_v0,
+ .targetsize = sizeof(struct nf_nat_ipv4_multi_range_compat),
+ .family = NFPROTO_IPV4,
+ .table = "nat",
+ .hooks = (1 << NF_INET_PRE_ROUTING) |
+ (1 << NF_INET_LOCAL_IN),
+ .me = THIS_MODULE,
+ },
+ {
+ .name = "SNAT",
+ .revision = 1,
+ .target = xt_snat_target_v1,
+ .targetsize = sizeof(struct nf_nat_range),
+ .table = "nat",
+ .hooks = (1 << NF_INET_POST_ROUTING) |
+ (1 << NF_INET_LOCAL_OUT),
+ .me = THIS_MODULE,
+ },
+ {
+ .name = "DNAT",
+ .revision = 1,
+ .target = xt_dnat_target_v1,
+ .targetsize = sizeof(struct nf_nat_range),
+ .table = "nat",
+ .hooks = (1 << NF_INET_PRE_ROUTING) |
+ (1 << NF_INET_LOCAL_IN),
+ .me = THIS_MODULE,
+ },
+};
+
+static int __init xt_nat_init(void)
+{
+ return xt_register_targets(xt_nat_target_reg,
+ ARRAY_SIZE(xt_nat_target_reg));
+}
+
+static void __exit xt_nat_exit(void)
+{
+ xt_unregister_targets(xt_nat_target_reg, ARRAY_SIZE(xt_nat_target_reg));
+}
+
+module_init(xt_nat_init);
+module_exit(xt_nat_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>");
+MODULE_ALIAS("ipt_SNAT");
+MODULE_ALIAS("ipt_DNAT");
+MODULE_ALIAS("ip6t_SNAT");
+MODULE_ALIAS("ip6t_DNAT");
diff --git a/net/netfilter/xt_osf.c b/net/netfilter/xt_osf.c
index 846f895cb656..a5e673d32bda 100644
--- a/net/netfilter/xt_osf.c
+++ b/net/netfilter/xt_osf.c
@@ -269,7 +269,7 @@ xt_osf_match_packet(const struct sk_buff *skb, struct xt_action_param *p)
mss <<= 8;
mss |= optp[2];
- mss = ntohs(mss);
+ mss = ntohs((__force __be16)mss);
break;
case OSFOPT_TS:
loop_cont = 1;
diff --git a/net/netfilter/xt_owner.c b/net/netfilter/xt_owner.c
index 772d7389b337..ca2e577ed8ac 100644
--- a/net/netfilter/xt_owner.c
+++ b/net/netfilter/xt_owner.c
@@ -17,6 +17,17 @@
#include <linux/netfilter/x_tables.h>
#include <linux/netfilter/xt_owner.h>
+static int owner_check(const struct xt_mtchk_param *par)
+{
+ struct xt_owner_match_info *info = par->matchinfo;
+
+ /* For now only allow adding matches from the initial user namespace */
+ if ((info->match & (XT_OWNER_UID|XT_OWNER_GID)) &&
+ (current_user_ns() != &init_user_ns))
+ return -EINVAL;
+ return 0;
+}
+
static bool
owner_mt(const struct sk_buff *skb, struct xt_action_param *par)
{
@@ -37,17 +48,23 @@ owner_mt(const struct sk_buff *skb, struct xt_action_param *par)
return ((info->match ^ info->invert) &
(XT_OWNER_UID | XT_OWNER_GID)) == 0;
- if (info->match & XT_OWNER_UID)
- if ((filp->f_cred->fsuid >= info->uid_min &&
- filp->f_cred->fsuid <= info->uid_max) ^
+ if (info->match & XT_OWNER_UID) {
+ kuid_t uid_min = make_kuid(&init_user_ns, info->uid_min);
+ kuid_t uid_max = make_kuid(&init_user_ns, info->uid_max);
+ if ((uid_gte(filp->f_cred->fsuid, uid_min) &&
+ uid_lte(filp->f_cred->fsuid, uid_max)) ^
!(info->invert & XT_OWNER_UID))
return false;
+ }
- if (info->match & XT_OWNER_GID)
- if ((filp->f_cred->fsgid >= info->gid_min &&
- filp->f_cred->fsgid <= info->gid_max) ^
+ if (info->match & XT_OWNER_GID) {
+ kgid_t gid_min = make_kgid(&init_user_ns, info->gid_min);
+ kgid_t gid_max = make_kgid(&init_user_ns, info->gid_max);
+ if ((gid_gte(filp->f_cred->fsgid, gid_min) &&
+ gid_lte(filp->f_cred->fsgid, gid_max)) ^
!(info->invert & XT_OWNER_GID))
return false;
+ }
return true;
}
@@ -56,6 +73,7 @@ static struct xt_match owner_mt_reg __read_mostly = {
.name = "owner",
.revision = 1,
.family = NFPROTO_UNSPEC,
+ .checkentry = owner_check,
.match = owner_mt,
.matchsize = sizeof(struct xt_owner_match_info),
.hooks = (1 << NF_INET_LOCAL_OUT) |
diff --git a/net/netfilter/xt_recent.c b/net/netfilter/xt_recent.c
index ae2ad1eec8d0..4635c9b00459 100644
--- a/net/netfilter/xt_recent.c
+++ b/net/netfilter/xt_recent.c
@@ -317,6 +317,8 @@ static int recent_mt_check(const struct xt_mtchk_param *par,
struct recent_table *t;
#ifdef CONFIG_PROC_FS
struct proc_dir_entry *pde;
+ kuid_t uid;
+ kgid_t gid;
#endif
unsigned int i;
int ret = -EINVAL;
@@ -372,6 +374,13 @@ static int recent_mt_check(const struct xt_mtchk_param *par,
for (i = 0; i < ip_list_hash_size; i++)
INIT_LIST_HEAD(&t->iphash[i]);
#ifdef CONFIG_PROC_FS
+ uid = make_kuid(&init_user_ns, ip_list_uid);
+ gid = make_kgid(&init_user_ns, ip_list_gid);
+ if (!uid_valid(uid) || !gid_valid(gid)) {
+ kfree(t);
+ ret = -EINVAL;
+ goto out;
+ }
pde = proc_create_data(t->name, ip_list_perms, recent_net->xt_recent,
&recent_mt_fops, t);
if (pde == NULL) {
@@ -379,8 +388,8 @@ static int recent_mt_check(const struct xt_mtchk_param *par,
ret = -ENOMEM;
goto out;
}
- pde->uid = ip_list_uid;
- pde->gid = ip_list_gid;
+ pde->uid = uid;
+ pde->gid = gid;
#endif
spin_lock_bh(&recent_lock);
list_add_tail(&t->list, &recent_net->tables);
diff --git a/net/netfilter/xt_set.c b/net/netfilter/xt_set.c
index c6f7db720d84..865a9e54f3ad 100644
--- a/net/netfilter/xt_set.c
+++ b/net/netfilter/xt_set.c
@@ -356,6 +356,27 @@ static struct xt_match set_matches[] __read_mostly = {
.destroy = set_match_v1_destroy,
.me = THIS_MODULE
},
+ /* --return-nomatch flag support */
+ {
+ .name = "set",
+ .family = NFPROTO_IPV4,
+ .revision = 2,
+ .match = set_match_v1,
+ .matchsize = sizeof(struct xt_set_info_match_v1),
+ .checkentry = set_match_v1_checkentry,
+ .destroy = set_match_v1_destroy,
+ .me = THIS_MODULE
+ },
+ {
+ .name = "set",
+ .family = NFPROTO_IPV6,
+ .revision = 2,
+ .match = set_match_v1,
+ .matchsize = sizeof(struct xt_set_info_match_v1),
+ .checkentry = set_match_v1_checkentry,
+ .destroy = set_match_v1_destroy,
+ .me = THIS_MODULE
+ },
};
static struct xt_target set_targets[] __read_mostly = {
@@ -389,6 +410,7 @@ static struct xt_target set_targets[] __read_mostly = {
.destroy = set_target_v1_destroy,
.me = THIS_MODULE
},
+ /* --timeout and --exist flags support */
{
.name = "SET",
.revision = 2,
diff --git a/net/netfilter/xt_socket.c b/net/netfilter/xt_socket.c
index 9ea482d08cf7..63b2bdb59e95 100644
--- a/net/netfilter/xt_socket.c
+++ b/net/netfilter/xt_socket.c
@@ -108,9 +108,9 @@ socket_match(const struct sk_buff *skb, struct xt_action_param *par,
const struct iphdr *iph = ip_hdr(skb);
struct udphdr _hdr, *hp = NULL;
struct sock *sk;
- __be32 daddr, saddr;
- __be16 dport, sport;
- u8 protocol;
+ __be32 uninitialized_var(daddr), uninitialized_var(saddr);
+ __be16 uninitialized_var(dport), uninitialized_var(sport);
+ u8 uninitialized_var(protocol);
#ifdef XT_SOCKET_HAVE_CONNTRACK
struct nf_conn const *ct;
enum ip_conntrack_info ctinfo;
@@ -261,9 +261,9 @@ socket_mt6_v1(const struct sk_buff *skb, struct xt_action_param *par)
struct ipv6hdr *iph = ipv6_hdr(skb);
struct udphdr _hdr, *hp = NULL;
struct sock *sk;
- struct in6_addr *daddr, *saddr;
- __be16 dport, sport;
- int thoff = 0, tproto;
+ struct in6_addr *daddr = NULL, *saddr = NULL;
+ __be16 uninitialized_var(dport), uninitialized_var(sport);
+ int thoff = 0, uninitialized_var(tproto);
const struct xt_socket_mtinfo1 *info = (struct xt_socket_mtinfo1 *) par->matchinfo;
tproto = ipv6_find_hdr(skb, &thoff, -1, NULL, NULL);
diff --git a/net/netfilter/xt_time.c b/net/netfilter/xt_time.c
index c48975ff8ea2..0ae55a36f492 100644
--- a/net/netfilter/xt_time.c
+++ b/net/netfilter/xt_time.c
@@ -42,6 +42,7 @@ static const u_int16_t days_since_leapyear[] = {
*/
enum {
DSE_FIRST = 2039,
+ SECONDS_PER_DAY = 86400,
};
static const u_int16_t days_since_epoch[] = {
/* 2039 - 2030 */
@@ -78,7 +79,7 @@ static inline unsigned int localtime_1(struct xtm *r, time_t time)
unsigned int v, w;
/* Each day has 86400s, so finding the hour/minute is actually easy. */
- v = time % 86400;
+ v = time % SECONDS_PER_DAY;
r->second = v % 60;
w = v / 60;
r->minute = w % 60;
@@ -199,6 +200,18 @@ time_mt(const struct sk_buff *skb, struct xt_action_param *par)
if (packet_time < info->daytime_start &&
packet_time > info->daytime_stop)
return false;
+
+ /** if user asked to ignore 'next day', then e.g.
+ * '1 PM Wed, August 1st' should be treated
+ * like 'Tue 1 PM July 31st'.
+ *
+ * This also causes
+ * 'Monday, "23:00 to 01:00", to match for 2 hours, starting
+ * Monday 23:00 to Tuesday 01:00.
+ */
+ if ((info->flags & XT_TIME_CONTIGUOUS) &&
+ packet_time <= info->daytime_stop)
+ stamp -= SECONDS_PER_DAY;
}
localtime_2(&current_time, stamp);
@@ -227,6 +240,15 @@ static int time_mt_check(const struct xt_mtchk_param *par)
return -EDOM;
}
+ if (info->flags & ~XT_TIME_ALL_FLAGS) {
+ pr_info("unknown flags 0x%x\n", info->flags & ~XT_TIME_ALL_FLAGS);
+ return -EINVAL;
+ }
+
+ if ((info->flags & XT_TIME_CONTIGUOUS) &&
+ info->daytime_start < info->daytime_stop)
+ return -EINVAL;
+
return 0;
}
diff --git a/net/netlabel/netlabel_cipso_v4.c b/net/netlabel/netlabel_cipso_v4.c
index 6bf878335d94..c15042f987bd 100644
--- a/net/netlabel/netlabel_cipso_v4.c
+++ b/net/netlabel/netlabel_cipso_v4.c
@@ -627,7 +627,7 @@ static int netlbl_cipsov4_listall_cb(struct cipso_v4_doi *doi_def, void *arg)
struct netlbl_cipsov4_doiwalk_arg *cb_arg = arg;
void *data;
- data = genlmsg_put(cb_arg->skb, NETLINK_CB(cb_arg->nl_cb->skb).pid,
+ data = genlmsg_put(cb_arg->skb, NETLINK_CB(cb_arg->nl_cb->skb).portid,
cb_arg->seq, &netlbl_cipsov4_gnl_family,
NLM_F_MULTI, NLBL_CIPSOV4_C_LISTALL);
if (data == NULL)
diff --git a/net/netlabel/netlabel_mgmt.c b/net/netlabel/netlabel_mgmt.c
index 4809e2e48b02..c5384ffc6146 100644
--- a/net/netlabel/netlabel_mgmt.c
+++ b/net/netlabel/netlabel_mgmt.c
@@ -448,7 +448,7 @@ static int netlbl_mgmt_listall_cb(struct netlbl_dom_map *entry, void *arg)
struct netlbl_domhsh_walk_arg *cb_arg = arg;
void *data;
- data = genlmsg_put(cb_arg->skb, NETLINK_CB(cb_arg->nl_cb->skb).pid,
+ data = genlmsg_put(cb_arg->skb, NETLINK_CB(cb_arg->nl_cb->skb).portid,
cb_arg->seq, &netlbl_mgmt_gnl_family,
NLM_F_MULTI, NLBL_MGMT_C_LISTALL);
if (data == NULL)
@@ -613,7 +613,7 @@ static int netlbl_mgmt_protocols_cb(struct sk_buff *skb,
int ret_val = -ENOMEM;
void *data;
- data = genlmsg_put(skb, NETLINK_CB(cb->skb).pid, cb->nlh->nlmsg_seq,
+ data = genlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq,
&netlbl_mgmt_gnl_family, NLM_F_MULTI,
NLBL_MGMT_C_PROTOCOLS);
if (data == NULL)
diff --git a/net/netlabel/netlabel_unlabeled.c b/net/netlabel/netlabel_unlabeled.c
index e7ff694f1049..847d495cd4de 100644
--- a/net/netlabel/netlabel_unlabeled.c
+++ b/net/netlabel/netlabel_unlabeled.c
@@ -1096,7 +1096,7 @@ static int netlbl_unlabel_staticlist_gen(u32 cmd,
char *secctx;
u32 secctx_len;
- data = genlmsg_put(cb_arg->skb, NETLINK_CB(cb_arg->nl_cb->skb).pid,
+ data = genlmsg_put(cb_arg->skb, NETLINK_CB(cb_arg->nl_cb->skb).portid,
cb_arg->seq, &netlbl_unlabel_gnl_family,
NLM_F_MULTI, cmd);
if (data == NULL)
@@ -1541,7 +1541,7 @@ int __init netlbl_unlabel_defconf(void)
* it is called is at bootup before the audit subsystem is reporting
* messages so don't worry to much about these values. */
security_task_getsecid(current, &audit_info.secid);
- audit_info.loginuid = 0;
+ audit_info.loginuid = GLOBAL_ROOT_UID;
audit_info.sessionid = 0;
entry = kzalloc(sizeof(*entry), GFP_KERNEL);
diff --git a/net/netlabel/netlabel_user.c b/net/netlabel/netlabel_user.c
index 9fae63f10298..9650c4ad5f88 100644
--- a/net/netlabel/netlabel_user.c
+++ b/net/netlabel/netlabel_user.c
@@ -109,7 +109,7 @@ struct audit_buffer *netlbl_audit_start_common(int type,
return NULL;
audit_log_format(audit_buf, "netlabel: auid=%u ses=%u",
- audit_info->loginuid,
+ from_kuid(&init_user_ns, audit_info->loginuid),
audit_info->sessionid);
if (audit_info->secid != 0 &&
diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c
index 5463969da45b..0f2e3ad69c47 100644
--- a/net/netlink/af_netlink.c
+++ b/net/netlink/af_netlink.c
@@ -67,8 +67,8 @@
struct netlink_sock {
/* struct sock has to be the first member of netlink_sock */
struct sock sk;
- u32 pid;
- u32 dst_pid;
+ u32 portid;
+ u32 dst_portid;
u32 dst_group;
u32 flags;
u32 subscriptions;
@@ -104,7 +104,7 @@ static inline int netlink_is_kernel(struct sock *sk)
return nlk_sk(sk)->flags & NETLINK_KERNEL_SOCKET;
}
-struct nl_pid_hash {
+struct nl_portid_hash {
struct hlist_head *table;
unsigned long rehash_time;
@@ -118,10 +118,10 @@ struct nl_pid_hash {
};
struct netlink_table {
- struct nl_pid_hash hash;
+ struct nl_portid_hash hash;
struct hlist_head mc_list;
struct listeners __rcu *listeners;
- unsigned int nl_nonroot;
+ unsigned int flags;
unsigned int groups;
struct mutex *cb_mutex;
struct module *module;
@@ -145,9 +145,9 @@ static inline u32 netlink_group_mask(u32 group)
return group ? 1 << (group - 1) : 0;
}
-static inline struct hlist_head *nl_pid_hashfn(struct nl_pid_hash *hash, u32 pid)
+static inline struct hlist_head *nl_portid_hashfn(struct nl_portid_hash *hash, u32 portid)
{
- return &hash->table[jhash_1word(pid, hash->rnd) & hash->mask];
+ return &hash->table[jhash_1word(portid, hash->rnd) & hash->mask];
}
static void netlink_destroy_callback(struct netlink_callback *cb)
@@ -239,17 +239,17 @@ netlink_unlock_table(void)
wake_up(&nl_table_wait);
}
-static struct sock *netlink_lookup(struct net *net, int protocol, u32 pid)
+static struct sock *netlink_lookup(struct net *net, int protocol, u32 portid)
{
- struct nl_pid_hash *hash = &nl_table[protocol].hash;
+ struct nl_portid_hash *hash = &nl_table[protocol].hash;
struct hlist_head *head;
struct sock *sk;
struct hlist_node *node;
read_lock(&nl_table_lock);
- head = nl_pid_hashfn(hash, pid);
+ head = nl_portid_hashfn(hash, portid);
sk_for_each(sk, node, head) {
- if (net_eq(sock_net(sk), net) && (nlk_sk(sk)->pid == pid)) {
+ if (net_eq(sock_net(sk), net) && (nlk_sk(sk)->portid == portid)) {
sock_hold(sk);
goto found;
}
@@ -260,7 +260,7 @@ found:
return sk;
}
-static struct hlist_head *nl_pid_hash_zalloc(size_t size)
+static struct hlist_head *nl_portid_hash_zalloc(size_t size)
{
if (size <= PAGE_SIZE)
return kzalloc(size, GFP_ATOMIC);
@@ -270,7 +270,7 @@ static struct hlist_head *nl_pid_hash_zalloc(size_t size)
get_order(size));
}
-static void nl_pid_hash_free(struct hlist_head *table, size_t size)
+static void nl_portid_hash_free(struct hlist_head *table, size_t size)
{
if (size <= PAGE_SIZE)
kfree(table);
@@ -278,7 +278,7 @@ static void nl_pid_hash_free(struct hlist_head *table, size_t size)
free_pages((unsigned long)table, get_order(size));
}
-static int nl_pid_hash_rehash(struct nl_pid_hash *hash, int grow)
+static int nl_portid_hash_rehash(struct nl_portid_hash *hash, int grow)
{
unsigned int omask, mask, shift;
size_t osize, size;
@@ -296,7 +296,7 @@ static int nl_pid_hash_rehash(struct nl_pid_hash *hash, int grow)
size *= 2;
}
- table = nl_pid_hash_zalloc(size);
+ table = nl_portid_hash_zalloc(size);
if (!table)
return 0;
@@ -311,23 +311,23 @@ static int nl_pid_hash_rehash(struct nl_pid_hash *hash, int grow)
struct hlist_node *node, *tmp;
sk_for_each_safe(sk, node, tmp, &otable[i])
- __sk_add_node(sk, nl_pid_hashfn(hash, nlk_sk(sk)->pid));
+ __sk_add_node(sk, nl_portid_hashfn(hash, nlk_sk(sk)->portid));
}
- nl_pid_hash_free(otable, osize);
+ nl_portid_hash_free(otable, osize);
hash->rehash_time = jiffies + 10 * 60 * HZ;
return 1;
}
-static inline int nl_pid_hash_dilute(struct nl_pid_hash *hash, int len)
+static inline int nl_portid_hash_dilute(struct nl_portid_hash *hash, int len)
{
int avg = hash->entries >> hash->shift;
- if (unlikely(avg > 1) && nl_pid_hash_rehash(hash, 1))
+ if (unlikely(avg > 1) && nl_portid_hash_rehash(hash, 1))
return 1;
if (unlikely(len > avg) && time_after(jiffies, hash->rehash_time)) {
- nl_pid_hash_rehash(hash, 0);
+ nl_portid_hash_rehash(hash, 0);
return 1;
}
@@ -356,9 +356,9 @@ netlink_update_listeners(struct sock *sk)
* makes sure updates are visible before bind or setsockopt return. */
}
-static int netlink_insert(struct sock *sk, struct net *net, u32 pid)
+static int netlink_insert(struct sock *sk, struct net *net, u32 portid)
{
- struct nl_pid_hash *hash = &nl_table[sk->sk_protocol].hash;
+ struct nl_portid_hash *hash = &nl_table[sk->sk_protocol].hash;
struct hlist_head *head;
int err = -EADDRINUSE;
struct sock *osk;
@@ -366,10 +366,10 @@ static int netlink_insert(struct sock *sk, struct net *net, u32 pid)
int len;
netlink_table_grab();
- head = nl_pid_hashfn(hash, pid);
+ head = nl_portid_hashfn(hash, portid);
len = 0;
sk_for_each(osk, node, head) {
- if (net_eq(sock_net(osk), net) && (nlk_sk(osk)->pid == pid))
+ if (net_eq(sock_net(osk), net) && (nlk_sk(osk)->portid == portid))
break;
len++;
}
@@ -377,17 +377,17 @@ static int netlink_insert(struct sock *sk, struct net *net, u32 pid)
goto err;
err = -EBUSY;
- if (nlk_sk(sk)->pid)
+ if (nlk_sk(sk)->portid)
goto err;
err = -ENOMEM;
if (BITS_PER_LONG > 32 && unlikely(hash->entries >= UINT_MAX))
goto err;
- if (len && nl_pid_hash_dilute(hash, len))
- head = nl_pid_hashfn(hash, pid);
+ if (len && nl_portid_hash_dilute(hash, len))
+ head = nl_portid_hashfn(hash, portid);
hash->entries++;
- nlk_sk(sk)->pid = pid;
+ nlk_sk(sk)->portid = portid;
sk_add_node(sk, head);
err = 0;
@@ -518,11 +518,11 @@ static int netlink_release(struct socket *sock)
skb_queue_purge(&sk->sk_write_queue);
- if (nlk->pid) {
+ if (nlk->portid) {
struct netlink_notify n = {
.net = sock_net(sk),
.protocol = sk->sk_protocol,
- .pid = nlk->pid,
+ .portid = nlk->portid,
};
atomic_notifier_call_chain(&netlink_chain,
NETLINK_URELEASE, &n);
@@ -536,6 +536,8 @@ static int netlink_release(struct socket *sock)
if (--nl_table[sk->sk_protocol].registered == 0) {
kfree(nl_table[sk->sk_protocol].listeners);
nl_table[sk->sk_protocol].module = NULL;
+ nl_table[sk->sk_protocol].bind = NULL;
+ nl_table[sk->sk_protocol].flags = 0;
nl_table[sk->sk_protocol].registered = 0;
}
} else if (nlk->subscriptions) {
@@ -557,24 +559,24 @@ static int netlink_autobind(struct socket *sock)
{
struct sock *sk = sock->sk;
struct net *net = sock_net(sk);
- struct nl_pid_hash *hash = &nl_table[sk->sk_protocol].hash;
+ struct nl_portid_hash *hash = &nl_table[sk->sk_protocol].hash;
struct hlist_head *head;
struct sock *osk;
struct hlist_node *node;
- s32 pid = task_tgid_vnr(current);
+ s32 portid = task_tgid_vnr(current);
int err;
static s32 rover = -4097;
retry:
cond_resched();
netlink_table_grab();
- head = nl_pid_hashfn(hash, pid);
+ head = nl_portid_hashfn(hash, portid);
sk_for_each(osk, node, head) {
if (!net_eq(sock_net(osk), net))
continue;
- if (nlk_sk(osk)->pid == pid) {
- /* Bind collision, search negative pid values. */
- pid = rover--;
+ if (nlk_sk(osk)->portid == portid) {
+ /* Bind collision, search negative portid values. */
+ portid = rover--;
if (rover > -4097)
rover = -4097;
netlink_table_ungrab();
@@ -583,7 +585,7 @@ retry:
}
netlink_table_ungrab();
- err = netlink_insert(sk, net, pid);
+ err = netlink_insert(sk, net, portid);
if (err == -EADDRINUSE)
goto retry;
@@ -596,7 +598,7 @@ retry:
static inline int netlink_capable(const struct socket *sock, unsigned int flag)
{
- return (nl_table[sock->sk->sk_protocol].nl_nonroot & flag) ||
+ return (nl_table[sock->sk->sk_protocol].flags & flag) ||
capable(CAP_NET_ADMIN);
}
@@ -659,15 +661,15 @@ static int netlink_bind(struct socket *sock, struct sockaddr *addr,
/* Only superuser is allowed to listen multicasts */
if (nladdr->nl_groups) {
- if (!netlink_capable(sock, NL_NONROOT_RECV))
+ if (!netlink_capable(sock, NL_CFG_F_NONROOT_RECV))
return -EPERM;
err = netlink_realloc_groups(sk);
if (err)
return err;
}
- if (nlk->pid) {
- if (nladdr->nl_pid != nlk->pid)
+ if (nlk->portid) {
+ if (nladdr->nl_pid != nlk->portid)
return -EINVAL;
} else {
err = nladdr->nl_pid ?
@@ -713,7 +715,7 @@ static int netlink_connect(struct socket *sock, struct sockaddr *addr,
if (addr->sa_family == AF_UNSPEC) {
sk->sk_state = NETLINK_UNCONNECTED;
- nlk->dst_pid = 0;
+ nlk->dst_portid = 0;
nlk->dst_group = 0;
return 0;
}
@@ -721,15 +723,15 @@ static int netlink_connect(struct socket *sock, struct sockaddr *addr,
return -EINVAL;
/* Only superuser is allowed to send multicasts */
- if (nladdr->nl_groups && !netlink_capable(sock, NL_NONROOT_SEND))
+ if (nladdr->nl_groups && !netlink_capable(sock, NL_CFG_F_NONROOT_SEND))
return -EPERM;
- if (!nlk->pid)
+ if (!nlk->portid)
err = netlink_autobind(sock);
if (err == 0) {
sk->sk_state = NETLINK_CONNECTED;
- nlk->dst_pid = nladdr->nl_pid;
+ nlk->dst_portid = nladdr->nl_pid;
nlk->dst_group = ffs(nladdr->nl_groups);
}
@@ -748,10 +750,10 @@ static int netlink_getname(struct socket *sock, struct sockaddr *addr,
*addr_len = sizeof(*nladdr);
if (peer) {
- nladdr->nl_pid = nlk->dst_pid;
+ nladdr->nl_pid = nlk->dst_portid;
nladdr->nl_groups = netlink_group_mask(nlk->dst_group);
} else {
- nladdr->nl_pid = nlk->pid;
+ nladdr->nl_pid = nlk->portid;
nladdr->nl_groups = nlk->groups ? nlk->groups[0] : 0;
}
return 0;
@@ -770,19 +772,19 @@ static void netlink_overrun(struct sock *sk)
atomic_inc(&sk->sk_drops);
}
-static struct sock *netlink_getsockbypid(struct sock *ssk, u32 pid)
+static struct sock *netlink_getsockbyportid(struct sock *ssk, u32 portid)
{
struct sock *sock;
struct netlink_sock *nlk;
- sock = netlink_lookup(sock_net(ssk), ssk->sk_protocol, pid);
+ sock = netlink_lookup(sock_net(ssk), ssk->sk_protocol, portid);
if (!sock)
return ERR_PTR(-ECONNREFUSED);
/* Don't bother queuing skb if kernel socket has no input function */
nlk = nlk_sk(sock);
if (sock->sk_state == NETLINK_CONNECTED &&
- nlk->dst_pid != nlk_sk(ssk)->pid) {
+ nlk->dst_portid != nlk_sk(ssk)->portid) {
sock_put(sock);
return ERR_PTR(-ECONNREFUSED);
}
@@ -912,7 +914,8 @@ static void netlink_rcv_wake(struct sock *sk)
wake_up_interruptible(&nlk->wait);
}
-static int netlink_unicast_kernel(struct sock *sk, struct sk_buff *skb)
+static int netlink_unicast_kernel(struct sock *sk, struct sk_buff *skb,
+ struct sock *ssk)
{
int ret;
struct netlink_sock *nlk = nlk_sk(sk);
@@ -921,6 +924,7 @@ static int netlink_unicast_kernel(struct sock *sk, struct sk_buff *skb)
if (nlk->netlink_rcv != NULL) {
ret = skb->len;
skb_set_owner_r(skb, sk);
+ NETLINK_CB(skb).ssk = ssk;
nlk->netlink_rcv(skb);
consume_skb(skb);
} else {
@@ -931,7 +935,7 @@ static int netlink_unicast_kernel(struct sock *sk, struct sk_buff *skb)
}
int netlink_unicast(struct sock *ssk, struct sk_buff *skb,
- u32 pid, int nonblock)
+ u32 portid, int nonblock)
{
struct sock *sk;
int err;
@@ -941,13 +945,13 @@ int netlink_unicast(struct sock *ssk, struct sk_buff *skb,
timeo = sock_sndtimeo(ssk, nonblock);
retry:
- sk = netlink_getsockbypid(ssk, pid);
+ sk = netlink_getsockbyportid(ssk, portid);
if (IS_ERR(sk)) {
kfree_skb(skb);
return PTR_ERR(sk);
}
if (netlink_is_kernel(sk))
- return netlink_unicast_kernel(sk, skb);
+ return netlink_unicast_kernel(sk, skb, ssk);
if (sk_filter(sk, skb)) {
err = skb->len;
@@ -1001,7 +1005,7 @@ static int netlink_broadcast_deliver(struct sock *sk, struct sk_buff *skb)
struct netlink_broadcast_data {
struct sock *exclude_sk;
struct net *net;
- u32 pid;
+ u32 portid;
u32 group;
int failure;
int delivery_failure;
@@ -1022,7 +1026,7 @@ static int do_one_broadcast(struct sock *sk,
if (p->exclude_sk == sk)
goto out;
- if (nlk->pid == p->pid || p->group - 1 >= nlk->ngroups ||
+ if (nlk->portid == p->portid || p->group - 1 >= nlk->ngroups ||
!test_bit(p->group - 1, nlk->groups))
goto out;
@@ -1074,7 +1078,7 @@ out:
return 0;
}
-int netlink_broadcast_filtered(struct sock *ssk, struct sk_buff *skb, u32 pid,
+int netlink_broadcast_filtered(struct sock *ssk, struct sk_buff *skb, u32 portid,
u32 group, gfp_t allocation,
int (*filter)(struct sock *dsk, struct sk_buff *skb, void *data),
void *filter_data)
@@ -1088,7 +1092,7 @@ int netlink_broadcast_filtered(struct sock *ssk, struct sk_buff *skb, u32 pid,
info.exclude_sk = ssk;
info.net = net;
- info.pid = pid;
+ info.portid = portid;
info.group = group;
info.failure = 0;
info.delivery_failure = 0;
@@ -1126,17 +1130,17 @@ int netlink_broadcast_filtered(struct sock *ssk, struct sk_buff *skb, u32 pid,
}
EXPORT_SYMBOL(netlink_broadcast_filtered);
-int netlink_broadcast(struct sock *ssk, struct sk_buff *skb, u32 pid,
+int netlink_broadcast(struct sock *ssk, struct sk_buff *skb, u32 portid,
u32 group, gfp_t allocation)
{
- return netlink_broadcast_filtered(ssk, skb, pid, group, allocation,
+ return netlink_broadcast_filtered(ssk, skb, portid, group, allocation,
NULL, NULL);
}
EXPORT_SYMBOL(netlink_broadcast);
struct netlink_set_err_data {
struct sock *exclude_sk;
- u32 pid;
+ u32 portid;
u32 group;
int code;
};
@@ -1152,7 +1156,7 @@ static int do_one_set_err(struct sock *sk, struct netlink_set_err_data *p)
if (!net_eq(sock_net(sk), sock_net(p->exclude_sk)))
goto out;
- if (nlk->pid == p->pid || p->group - 1 >= nlk->ngroups ||
+ if (nlk->portid == p->portid || p->group - 1 >= nlk->ngroups ||
!test_bit(p->group - 1, nlk->groups))
goto out;
@@ -1170,14 +1174,14 @@ out:
/**
* netlink_set_err - report error to broadcast listeners
* @ssk: the kernel netlink socket, as returned by netlink_kernel_create()
- * @pid: the PID of a process that we want to skip (if any)
+ * @portid: the PORTID of a process that we want to skip (if any)
* @groups: the broadcast group that will notice the error
* @code: error code, must be negative (as usual in kernelspace)
*
* This function returns the number of broadcast listeners that have set the
* NETLINK_RECV_NO_ENOBUFS socket option.
*/
-int netlink_set_err(struct sock *ssk, u32 pid, u32 group, int code)
+int netlink_set_err(struct sock *ssk, u32 portid, u32 group, int code)
{
struct netlink_set_err_data info;
struct hlist_node *node;
@@ -1185,7 +1189,7 @@ int netlink_set_err(struct sock *ssk, u32 pid, u32 group, int code)
int ret = 0;
info.exclude_sk = ssk;
- info.pid = pid;
+ info.portid = portid;
info.group = group;
/* sk->sk_err wants a positive error value */
info.code = -code;
@@ -1242,7 +1246,7 @@ static int netlink_setsockopt(struct socket *sock, int level, int optname,
break;
case NETLINK_ADD_MEMBERSHIP:
case NETLINK_DROP_MEMBERSHIP: {
- if (!netlink_capable(sock, NL_NONROOT_RECV))
+ if (!netlink_capable(sock, NL_CFG_F_NONROOT_RECV))
return -EPERM;
err = netlink_realloc_groups(sk);
if (err)
@@ -1350,7 +1354,7 @@ static int netlink_sendmsg(struct kiocb *kiocb, struct socket *sock,
struct sock *sk = sock->sk;
struct netlink_sock *nlk = nlk_sk(sk);
struct sockaddr_nl *addr = msg->msg_name;
- u32 dst_pid;
+ u32 dst_portid;
u32 dst_group;
struct sk_buff *skb;
int err;
@@ -1362,7 +1366,7 @@ static int netlink_sendmsg(struct kiocb *kiocb, struct socket *sock,
if (NULL == siocb->scm)
siocb->scm = &scm;
- err = scm_send(sock, msg, siocb->scm);
+ err = scm_send(sock, msg, siocb->scm, true);
if (err < 0)
return err;
@@ -1370,17 +1374,18 @@ static int netlink_sendmsg(struct kiocb *kiocb, struct socket *sock,
err = -EINVAL;
if (addr->nl_family != AF_NETLINK)
goto out;
- dst_pid = addr->nl_pid;
+ dst_portid = addr->nl_pid;
dst_group = ffs(addr->nl_groups);
err = -EPERM;
- if (dst_group && !netlink_capable(sock, NL_NONROOT_SEND))
+ if ((dst_group || dst_portid) &&
+ !netlink_capable(sock, NL_CFG_F_NONROOT_SEND))
goto out;
} else {
- dst_pid = nlk->dst_pid;
+ dst_portid = nlk->dst_portid;
dst_group = nlk->dst_group;
}
- if (!nlk->pid) {
+ if (!nlk->portid) {
err = netlink_autobind(sock);
if (err)
goto out;
@@ -1394,9 +1399,9 @@ static int netlink_sendmsg(struct kiocb *kiocb, struct socket *sock,
if (skb == NULL)
goto out;
- NETLINK_CB(skb).pid = nlk->pid;
+ NETLINK_CB(skb).portid = nlk->portid;
NETLINK_CB(skb).dst_group = dst_group;
- memcpy(NETLINK_CREDS(skb), &siocb->scm->creds, sizeof(struct ucred));
+ NETLINK_CB(skb).creds = siocb->scm->creds;
err = -EFAULT;
if (memcpy_fromiovec(skb_put(skb, len), msg->msg_iov, len)) {
@@ -1412,9 +1417,9 @@ static int netlink_sendmsg(struct kiocb *kiocb, struct socket *sock,
if (dst_group) {
atomic_inc(&skb->users);
- netlink_broadcast(sk, skb, dst_pid, dst_group, GFP_KERNEL);
+ netlink_broadcast(sk, skb, dst_portid, dst_group, GFP_KERNEL);
}
- err = netlink_unicast(sk, skb, dst_pid, msg->msg_flags&MSG_DONTWAIT);
+ err = netlink_unicast(sk, skb, dst_portid, msg->msg_flags&MSG_DONTWAIT);
out:
scm_destroy(siocb->scm);
@@ -1477,7 +1482,7 @@ static int netlink_recvmsg(struct kiocb *kiocb, struct socket *sock,
struct sockaddr_nl *addr = (struct sockaddr_nl *)msg->msg_name;
addr->nl_family = AF_NETLINK;
addr->nl_pad = 0;
- addr->nl_pid = NETLINK_CB(skb).pid;
+ addr->nl_pid = NETLINK_CB(skb).portid;
addr->nl_groups = netlink_group_mask(NETLINK_CB(skb).dst_group);
msg->msg_namelen = sizeof(*addr);
}
@@ -1521,9 +1526,8 @@ static void netlink_data_ready(struct sock *sk, int len)
*/
struct sock *
-netlink_kernel_create(struct net *net, int unit,
- struct module *module,
- struct netlink_kernel_cfg *cfg)
+__netlink_kernel_create(struct net *net, int unit, struct module *module,
+ struct netlink_kernel_cfg *cfg)
{
struct socket *sock;
struct sock *sk;
@@ -1577,7 +1581,10 @@ netlink_kernel_create(struct net *net, int unit,
rcu_assign_pointer(nl_table[unit].listeners, listeners);
nl_table[unit].cb_mutex = cb_mutex;
nl_table[unit].module = module;
- nl_table[unit].bind = cfg ? cfg->bind : NULL;
+ if (cfg) {
+ nl_table[unit].bind = cfg->bind;
+ nl_table[unit].flags = cfg->flags;
+ }
nl_table[unit].registered = 1;
} else {
kfree(listeners);
@@ -1595,8 +1602,7 @@ out_sock_release_nosk:
sock_release(sock);
return NULL;
}
-EXPORT_SYMBOL(netlink_kernel_create);
-
+EXPORT_SYMBOL(__netlink_kernel_create);
void
netlink_kernel_release(struct sock *sk)
@@ -1676,15 +1682,8 @@ void netlink_clear_multicast_users(struct sock *ksk, unsigned int group)
netlink_table_ungrab();
}
-void netlink_set_nonroot(int protocol, unsigned int flags)
-{
- if ((unsigned int)protocol < MAX_LINKS)
- nl_table[protocol].nl_nonroot = flags;
-}
-EXPORT_SYMBOL(netlink_set_nonroot);
-
struct nlmsghdr *
-__nlmsg_put(struct sk_buff *skb, u32 pid, u32 seq, int type, int len, int flags)
+__nlmsg_put(struct sk_buff *skb, u32 portid, u32 seq, int type, int len, int flags)
{
struct nlmsghdr *nlh;
int size = NLMSG_LENGTH(len);
@@ -1693,7 +1692,7 @@ __nlmsg_put(struct sk_buff *skb, u32 pid, u32 seq, int type, int len, int flags)
nlh->nlmsg_type = type;
nlh->nlmsg_len = size;
nlh->nlmsg_flags = flags;
- nlh->nlmsg_pid = pid;
+ nlh->nlmsg_pid = portid;
nlh->nlmsg_seq = seq;
if (!__builtin_constant_p(size) || NLMSG_ALIGN(size) - size != 0)
memset(NLMSG_DATA(nlh) + len, 0, NLMSG_ALIGN(size) - size);
@@ -1789,7 +1788,7 @@ int netlink_dump_start(struct sock *ssk, struct sk_buff *skb,
atomic_inc(&skb->users);
cb->skb = skb;
- sk = netlink_lookup(sock_net(ssk), ssk->sk_protocol, NETLINK_CB(skb).pid);
+ sk = netlink_lookup(sock_net(ssk), ssk->sk_protocol, NETLINK_CB(skb).portid);
if (sk == NULL) {
netlink_destroy_callback(cb);
return -ECONNREFUSED;
@@ -1837,7 +1836,7 @@ void netlink_ack(struct sk_buff *in_skb, struct nlmsghdr *nlh, int err)
sk = netlink_lookup(sock_net(in_skb->sk),
in_skb->sk->sk_protocol,
- NETLINK_CB(in_skb).pid);
+ NETLINK_CB(in_skb).portid);
if (sk) {
sk->sk_err = ENOBUFS;
sk->sk_error_report(sk);
@@ -1846,12 +1845,12 @@ void netlink_ack(struct sk_buff *in_skb, struct nlmsghdr *nlh, int err)
return;
}
- rep = __nlmsg_put(skb, NETLINK_CB(in_skb).pid, nlh->nlmsg_seq,
+ rep = __nlmsg_put(skb, NETLINK_CB(in_skb).portid, nlh->nlmsg_seq,
NLMSG_ERROR, payload, 0);
errmsg = nlmsg_data(rep);
errmsg->error = err;
memcpy(&errmsg->msg, nlh, err ? nlh->nlmsg_len : sizeof(*nlh));
- netlink_unicast(in_skb->sk, skb, NETLINK_CB(in_skb).pid, MSG_DONTWAIT);
+ netlink_unicast(in_skb->sk, skb, NETLINK_CB(in_skb).portid, MSG_DONTWAIT);
}
EXPORT_SYMBOL(netlink_ack);
@@ -1901,33 +1900,33 @@ EXPORT_SYMBOL(netlink_rcv_skb);
* nlmsg_notify - send a notification netlink message
* @sk: netlink socket to use
* @skb: notification message
- * @pid: destination netlink pid for reports or 0
+ * @portid: destination netlink portid for reports or 0
* @group: destination multicast group or 0
* @report: 1 to report back, 0 to disable
* @flags: allocation flags
*/
-int nlmsg_notify(struct sock *sk, struct sk_buff *skb, u32 pid,
+int nlmsg_notify(struct sock *sk, struct sk_buff *skb, u32 portid,
unsigned int group, int report, gfp_t flags)
{
int err = 0;
if (group) {
- int exclude_pid = 0;
+ int exclude_portid = 0;
if (report) {
atomic_inc(&skb->users);
- exclude_pid = pid;
+ exclude_portid = portid;
}
/* errors reported via destination sk->sk_err, but propagate
* delivery errors if NETLINK_BROADCAST_ERROR flag is set */
- err = nlmsg_multicast(sk, skb, exclude_pid, group, flags);
+ err = nlmsg_multicast(sk, skb, exclude_portid, group, flags);
}
if (report) {
int err2;
- err2 = nlmsg_unicast(sk, skb, pid);
+ err2 = nlmsg_unicast(sk, skb, portid);
if (!err || err == -ESRCH)
err = err2;
}
@@ -1952,7 +1951,7 @@ static struct sock *netlink_seq_socket_idx(struct seq_file *seq, loff_t pos)
loff_t off = 0;
for (i = 0; i < MAX_LINKS; i++) {
- struct nl_pid_hash *hash = &nl_table[i].hash;
+ struct nl_portid_hash *hash = &nl_table[i].hash;
for (j = 0; j <= hash->mask; j++) {
sk_for_each(s, node, &hash->table[j]) {
@@ -2000,7 +1999,7 @@ static void *netlink_seq_next(struct seq_file *seq, void *v, loff_t *pos)
j = iter->hash_idx + 1;
do {
- struct nl_pid_hash *hash = &nl_table[i].hash;
+ struct nl_portid_hash *hash = &nl_table[i].hash;
for (; j <= hash->mask; j++) {
s = sk_head(&hash->table[j]);
@@ -2039,7 +2038,7 @@ static int netlink_seq_show(struct seq_file *seq, void *v)
seq_printf(seq, "%pK %-3d %-6d %08x %-8d %-8d %pK %-8d %-8d %-8lu\n",
s,
s->sk_protocol,
- nlk->pid,
+ nlk->portid,
nlk->groups ? (u32)nlk->groups[0] : 0,
sk_rmem_alloc_get(s),
sk_wmem_alloc_get(s),
@@ -2147,6 +2146,7 @@ static void __init netlink_add_usersock_entry(void)
rcu_assign_pointer(nl_table[NETLINK_USERSOCK].listeners, listeners);
nl_table[NETLINK_USERSOCK].module = THIS_MODULE;
nl_table[NETLINK_USERSOCK].registered = 1;
+ nl_table[NETLINK_USERSOCK].flags = NL_CFG_F_NONROOT_SEND;
netlink_table_ungrab();
}
@@ -2183,12 +2183,12 @@ static int __init netlink_proto_init(void)
order = get_bitmask_order(min(limit, (unsigned long)UINT_MAX)) - 1;
for (i = 0; i < MAX_LINKS; i++) {
- struct nl_pid_hash *hash = &nl_table[i].hash;
+ struct nl_portid_hash *hash = &nl_table[i].hash;
- hash->table = nl_pid_hash_zalloc(1 * sizeof(*hash->table));
+ hash->table = nl_portid_hash_zalloc(1 * sizeof(*hash->table));
if (!hash->table) {
while (i-- > 0)
- nl_pid_hash_free(nl_table[i].hash.table,
+ nl_portid_hash_free(nl_table[i].hash.table,
1 * sizeof(*hash->table));
kfree(nl_table);
goto panic;
diff --git a/net/netlink/genetlink.c b/net/netlink/genetlink.c
index fda497412fc3..f2aabb6f4105 100644
--- a/net/netlink/genetlink.c
+++ b/net/netlink/genetlink.c
@@ -501,7 +501,7 @@ EXPORT_SYMBOL(genl_unregister_family);
/**
* genlmsg_put - Add generic netlink header to netlink message
* @skb: socket buffer holding the message
- * @pid: netlink pid the message is addressed to
+ * @portid: netlink portid the message is addressed to
* @seq: sequence number (usually the one of the sender)
* @family: generic netlink family
* @flags: netlink message flags
@@ -509,13 +509,13 @@ EXPORT_SYMBOL(genl_unregister_family);
*
* Returns pointer to user specific header
*/
-void *genlmsg_put(struct sk_buff *skb, u32 pid, u32 seq,
+void *genlmsg_put(struct sk_buff *skb, u32 portid, u32 seq,
struct genl_family *family, int flags, u8 cmd)
{
struct nlmsghdr *nlh;
struct genlmsghdr *hdr;
- nlh = nlmsg_put(skb, pid, seq, family->id, GENL_HDRLEN +
+ nlh = nlmsg_put(skb, portid, seq, family->id, GENL_HDRLEN +
family->hdrsize, flags);
if (nlh == NULL)
return NULL;
@@ -585,7 +585,7 @@ static int genl_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
}
info.snd_seq = nlh->nlmsg_seq;
- info.snd_pid = NETLINK_CB(skb).pid;
+ info.snd_portid = NETLINK_CB(skb).portid;
info.nlhdr = nlh;
info.genlhdr = nlmsg_data(nlh);
info.userhdr = nlmsg_data(nlh) + GENL_HDRLEN;
@@ -626,12 +626,12 @@ static struct genl_family genl_ctrl = {
.netnsok = true,
};
-static int ctrl_fill_info(struct genl_family *family, u32 pid, u32 seq,
+static int ctrl_fill_info(struct genl_family *family, u32 portid, u32 seq,
u32 flags, struct sk_buff *skb, u8 cmd)
{
void *hdr;
- hdr = genlmsg_put(skb, pid, seq, &genl_ctrl, flags, cmd);
+ hdr = genlmsg_put(skb, portid, seq, &genl_ctrl, flags, cmd);
if (hdr == NULL)
return -1;
@@ -701,7 +701,7 @@ nla_put_failure:
return -EMSGSIZE;
}
-static int ctrl_fill_mcgrp_info(struct genl_multicast_group *grp, u32 pid,
+static int ctrl_fill_mcgrp_info(struct genl_multicast_group *grp, u32 portid,
u32 seq, u32 flags, struct sk_buff *skb,
u8 cmd)
{
@@ -709,7 +709,7 @@ static int ctrl_fill_mcgrp_info(struct genl_multicast_group *grp, u32 pid,
struct nlattr *nla_grps;
struct nlattr *nest;
- hdr = genlmsg_put(skb, pid, seq, &genl_ctrl, flags, cmd);
+ hdr = genlmsg_put(skb, portid, seq, &genl_ctrl, flags, cmd);
if (hdr == NULL)
return -1;
@@ -756,7 +756,7 @@ static int ctrl_dumpfamily(struct sk_buff *skb, struct netlink_callback *cb)
continue;
if (++n < fams_to_skip)
continue;
- if (ctrl_fill_info(rt, NETLINK_CB(cb->skb).pid,
+ if (ctrl_fill_info(rt, NETLINK_CB(cb->skb).portid,
cb->nlh->nlmsg_seq, NLM_F_MULTI,
skb, CTRL_CMD_NEWFAMILY) < 0)
goto errout;
@@ -773,7 +773,7 @@ errout:
}
static struct sk_buff *ctrl_build_family_msg(struct genl_family *family,
- u32 pid, int seq, u8 cmd)
+ u32 portid, int seq, u8 cmd)
{
struct sk_buff *skb;
int err;
@@ -782,7 +782,7 @@ static struct sk_buff *ctrl_build_family_msg(struct genl_family *family,
if (skb == NULL)
return ERR_PTR(-ENOBUFS);
- err = ctrl_fill_info(family, pid, seq, 0, skb, cmd);
+ err = ctrl_fill_info(family, portid, seq, 0, skb, cmd);
if (err < 0) {
nlmsg_free(skb);
return ERR_PTR(err);
@@ -792,7 +792,7 @@ static struct sk_buff *ctrl_build_family_msg(struct genl_family *family,
}
static struct sk_buff *ctrl_build_mcgrp_msg(struct genl_multicast_group *grp,
- u32 pid, int seq, u8 cmd)
+ u32 portid, int seq, u8 cmd)
{
struct sk_buff *skb;
int err;
@@ -801,7 +801,7 @@ static struct sk_buff *ctrl_build_mcgrp_msg(struct genl_multicast_group *grp,
if (skb == NULL)
return ERR_PTR(-ENOBUFS);
- err = ctrl_fill_mcgrp_info(grp, pid, seq, 0, skb, cmd);
+ err = ctrl_fill_mcgrp_info(grp, portid, seq, 0, skb, cmd);
if (err < 0) {
nlmsg_free(skb);
return ERR_PTR(err);
@@ -853,7 +853,7 @@ static int ctrl_getfamily(struct sk_buff *skb, struct genl_info *info)
return -ENOENT;
}
- msg = ctrl_build_family_msg(res, info->snd_pid, info->snd_seq,
+ msg = ctrl_build_family_msg(res, info->snd_portid, info->snd_seq,
CTRL_CMD_NEWFAMILY);
if (IS_ERR(msg))
return PTR_ERR(msg);
@@ -918,11 +918,11 @@ static int __net_init genl_pernet_init(struct net *net)
struct netlink_kernel_cfg cfg = {
.input = genl_rcv,
.cb_mutex = &genl_mutex,
+ .flags = NL_CFG_F_NONROOT_RECV,
};
/* we'll bump the group number right afterwards */
- net->genl_sock = netlink_kernel_create(net, NETLINK_GENERIC,
- THIS_MODULE, &cfg);
+ net->genl_sock = netlink_kernel_create(net, NETLINK_GENERIC, &cfg);
if (!net->genl_sock && net_eq(net, &init_net))
panic("GENL: Cannot initialize generic netlink\n");
@@ -955,8 +955,6 @@ static int __init genl_init(void)
if (err < 0)
goto problem;
- netlink_set_nonroot(NETLINK_GENERIC, NL_NONROOT_RECV);
-
err = register_pernet_subsys(&genl_pernet_ops);
if (err)
goto problem;
@@ -973,7 +971,7 @@ problem:
subsys_initcall(genl_init);
-static int genlmsg_mcast(struct sk_buff *skb, u32 pid, unsigned long group,
+static int genlmsg_mcast(struct sk_buff *skb, u32 portid, unsigned long group,
gfp_t flags)
{
struct sk_buff *tmp;
@@ -988,7 +986,7 @@ static int genlmsg_mcast(struct sk_buff *skb, u32 pid, unsigned long group,
goto error;
}
err = nlmsg_multicast(prev->genl_sock, tmp,
- pid, group, flags);
+ portid, group, flags);
if (err)
goto error;
}
@@ -996,20 +994,20 @@ static int genlmsg_mcast(struct sk_buff *skb, u32 pid, unsigned long group,
prev = net;
}
- return nlmsg_multicast(prev->genl_sock, skb, pid, group, flags);
+ return nlmsg_multicast(prev->genl_sock, skb, portid, group, flags);
error:
kfree_skb(skb);
return err;
}
-int genlmsg_multicast_allns(struct sk_buff *skb, u32 pid, unsigned int group,
+int genlmsg_multicast_allns(struct sk_buff *skb, u32 portid, unsigned int group,
gfp_t flags)
{
- return genlmsg_mcast(skb, pid, group, flags);
+ return genlmsg_mcast(skb, portid, group, flags);
}
EXPORT_SYMBOL(genlmsg_multicast_allns);
-void genl_notify(struct sk_buff *skb, struct net *net, u32 pid, u32 group,
+void genl_notify(struct sk_buff *skb, struct net *net, u32 portid, u32 group,
struct nlmsghdr *nlh, gfp_t flags)
{
struct sock *sk = net->genl_sock;
@@ -1018,6 +1016,6 @@ void genl_notify(struct sk_buff *skb, struct net *net, u32 pid, u32 group,
if (nlh)
report = nlmsg_report(nlh);
- nlmsg_notify(sk, skb, pid, group, report, flags);
+ nlmsg_notify(sk, skb, portid, group, report, flags);
}
EXPORT_SYMBOL(genl_notify);
diff --git a/net/netrom/af_netrom.c b/net/netrom/af_netrom.c
index 06592d8b4a2b..7261eb81974f 100644
--- a/net/netrom/af_netrom.c
+++ b/net/netrom/af_netrom.c
@@ -601,7 +601,7 @@ static int nr_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
if (!capable(CAP_NET_BIND_SERVICE)) {
dev_put(dev);
release_sock(sk);
- return -EACCES;
+ return -EPERM;
}
nr->user_addr = addr->fsa_digipeater[0];
nr->source_addr = addr->fsa_ax25.sax25_call;
@@ -1169,7 +1169,12 @@ static int nr_recvmsg(struct kiocb *iocb, struct socket *sock,
msg->msg_flags |= MSG_TRUNC;
}
- skb_copy_datagram_iovec(skb, 0, msg->msg_iov, copied);
+ er = skb_copy_datagram_iovec(skb, 0, msg->msg_iov, copied);
+ if (er < 0) {
+ skb_free_datagram(sk, skb);
+ release_sock(sk);
+ return er;
+ }
if (sax != NULL) {
sax->sax25_family = AF_NETROM;
diff --git a/net/nfc/core.c b/net/nfc/core.c
index ff749794bc5b..479bee36dc3e 100644
--- a/net/nfc/core.c
+++ b/net/nfc/core.c
@@ -679,7 +679,7 @@ static void nfc_release(struct device *d)
if (dev->ops->check_presence) {
del_timer_sync(&dev->check_pres_timer);
- destroy_workqueue(dev->check_pres_wq);
+ cancel_work_sync(&dev->check_pres_work);
}
nfc_genl_data_exit(&dev->genl_data);
@@ -715,7 +715,7 @@ static void nfc_check_pres_timeout(unsigned long data)
{
struct nfc_dev *dev = (struct nfc_dev *)data;
- queue_work(dev->check_pres_wq, &dev->check_pres_work);
+ schedule_work(&dev->check_pres_work);
}
struct class nfc_class = {
@@ -784,20 +784,11 @@ struct nfc_dev *nfc_allocate_device(struct nfc_ops *ops,
dev->targets_generation = 1;
if (ops->check_presence) {
- char name[32];
init_timer(&dev->check_pres_timer);
dev->check_pres_timer.data = (unsigned long)dev;
dev->check_pres_timer.function = nfc_check_pres_timeout;
INIT_WORK(&dev->check_pres_work, nfc_check_pres_work);
- snprintf(name, sizeof(name), "nfc%d_check_pres_wq", dev->idx);
- dev->check_pres_wq = alloc_workqueue(name, WQ_NON_REENTRANT |
- WQ_UNBOUND |
- WQ_MEM_RECLAIM, 1);
- if (dev->check_pres_wq == NULL) {
- kfree(dev);
- return NULL;
- }
}
return dev;
diff --git a/net/nfc/hci/Makefile b/net/nfc/hci/Makefile
index f9c44b2fb065..c5dbb6891b24 100644
--- a/net/nfc/hci/Makefile
+++ b/net/nfc/hci/Makefile
@@ -4,5 +4,5 @@
obj-$(CONFIG_NFC_HCI) += hci.o
-hci-y := core.o hcp.o command.o
-hci-$(CONFIG_NFC_SHDLC) += shdlc.o
+hci-y := core.o hcp.o command.o llc.o llc_nop.o
+hci-$(CONFIG_NFC_SHDLC) += llc_shdlc.o
diff --git a/net/nfc/hci/command.c b/net/nfc/hci/command.c
index 46362ef979db..71c6a7086b8f 100644
--- a/net/nfc/hci/command.c
+++ b/net/nfc/hci/command.c
@@ -28,10 +28,29 @@
#include "hci.h"
-static void nfc_hci_execute_cb(struct nfc_hci_dev *hdev, int err,
- struct sk_buff *skb, void *cb_data)
+static int nfc_hci_execute_cmd_async(struct nfc_hci_dev *hdev, u8 pipe, u8 cmd,
+ const u8 *param, size_t param_len,
+ data_exchange_cb_t cb, void *cb_context)
{
- struct hcp_exec_waiter *hcp_ew = (struct hcp_exec_waiter *)cb_data;
+ pr_debug("exec cmd async through pipe=%d, cmd=%d, plen=%zd\n", pipe,
+ cmd, param_len);
+
+ /* TODO: Define hci cmd execution delay. Should it be the same
+ * for all commands?
+ */
+ return nfc_hci_hcp_message_tx(hdev, pipe, NFC_HCI_HCP_COMMAND, cmd,
+ param, param_len, cb, cb_context, 3000);
+}
+
+/*
+ * HCI command execution completion callback.
+ * err will be a standard linux error (may be converted from HCI response)
+ * skb contains the response data and must be disposed, or may be NULL if
+ * an error occured
+ */
+static void nfc_hci_execute_cb(void *context, struct sk_buff *skb, int err)
+{
+ struct hcp_exec_waiter *hcp_ew = (struct hcp_exec_waiter *)context;
pr_debug("HCI Cmd completed with result=%d\n", err);
@@ -55,7 +74,8 @@ static int nfc_hci_execute_cmd(struct nfc_hci_dev *hdev, u8 pipe, u8 cmd,
hcp_ew.exec_complete = false;
hcp_ew.result_skb = NULL;
- pr_debug("through pipe=%d, cmd=%d, plen=%zd\n", pipe, cmd, param_len);
+ pr_debug("exec cmd sync through pipe=%d, cmd=%d, plen=%zd\n", pipe,
+ cmd, param_len);
/* TODO: Define hci cmd execution delay. Should it be the same
* for all commands?
@@ -133,6 +153,23 @@ int nfc_hci_send_cmd(struct nfc_hci_dev *hdev, u8 gate, u8 cmd,
}
EXPORT_SYMBOL(nfc_hci_send_cmd);
+int nfc_hci_send_cmd_async(struct nfc_hci_dev *hdev, u8 gate, u8 cmd,
+ const u8 *param, size_t param_len,
+ data_exchange_cb_t cb, void *cb_context)
+{
+ u8 pipe;
+
+ pr_debug("\n");
+
+ pipe = hdev->gate2pipe[gate];
+ if (pipe == NFC_HCI_INVALID_PIPE)
+ return -EADDRNOTAVAIL;
+
+ return nfc_hci_execute_cmd_async(hdev, pipe, cmd, param, param_len,
+ cb, cb_context);
+}
+EXPORT_SYMBOL(nfc_hci_send_cmd_async);
+
int nfc_hci_set_param(struct nfc_hci_dev *hdev, u8 gate, u8 idx,
const u8 *param, size_t param_len)
{
diff --git a/net/nfc/hci/core.c b/net/nfc/hci/core.c
index 1ac7b3fac6c9..5fbb6e40793e 100644
--- a/net/nfc/hci/core.c
+++ b/net/nfc/hci/core.c
@@ -26,6 +26,7 @@
#include <net/nfc/nfc.h>
#include <net/nfc/hci.h>
+#include <net/nfc/llc.h>
#include "hci.h"
@@ -57,12 +58,11 @@ static void nfc_hci_msg_tx_work(struct work_struct *work)
if (hdev->cmd_pending_msg) {
if (timer_pending(&hdev->cmd_timer) == 0) {
if (hdev->cmd_pending_msg->cb)
- hdev->cmd_pending_msg->cb(hdev,
- -ETIME,
- NULL,
- hdev->
+ hdev->cmd_pending_msg->cb(hdev->
cmd_pending_msg->
- cb_context);
+ cb_context,
+ NULL,
+ -ETIME);
kfree(hdev->cmd_pending_msg);
hdev->cmd_pending_msg = NULL;
} else
@@ -78,12 +78,12 @@ next_msg:
pr_debug("msg_tx_queue has a cmd to send\n");
while ((skb = skb_dequeue(&msg->msg_frags)) != NULL) {
- r = hdev->ops->xmit(hdev, skb);
+ r = nfc_llc_xmit_from_hci(hdev->llc, skb);
if (r < 0) {
kfree_skb(skb);
skb_queue_purge(&msg->msg_frags);
if (msg->cb)
- msg->cb(hdev, r, NULL, msg->cb_context);
+ msg->cb(msg->cb_context, NULL, r);
kfree(msg);
break;
}
@@ -133,15 +133,15 @@ static void __nfc_hci_cmd_completion(struct nfc_hci_dev *hdev, int err,
del_timer_sync(&hdev->cmd_timer);
if (hdev->cmd_pending_msg->cb)
- hdev->cmd_pending_msg->cb(hdev, err, skb,
- hdev->cmd_pending_msg->cb_context);
+ hdev->cmd_pending_msg->cb(hdev->cmd_pending_msg->cb_context,
+ skb, err);
else
kfree_skb(skb);
kfree(hdev->cmd_pending_msg);
hdev->cmd_pending_msg = NULL;
- queue_work(hdev->msg_tx_wq, &hdev->msg_tx_work);
+ schedule_work(&hdev->msg_tx_work);
}
void nfc_hci_resp_received(struct nfc_hci_dev *hdev, u8 result,
@@ -326,7 +326,7 @@ static void nfc_hci_cmd_timeout(unsigned long data)
{
struct nfc_hci_dev *hdev = (struct nfc_hci_dev *)data;
- queue_work(hdev->msg_tx_wq, &hdev->msg_tx_work);
+ schedule_work(&hdev->msg_tx_work);
}
static int hci_dev_connect_gates(struct nfc_hci_dev *hdev, u8 gate_count,
@@ -398,8 +398,7 @@ disconnect_all:
nfc_hci_disconnect_all_gates(hdev);
exit:
- if (skb)
- kfree_skb(skb);
+ kfree_skb(skb);
return r;
}
@@ -470,29 +469,38 @@ static int hci_dev_up(struct nfc_dev *nfc_dev)
return r;
}
+ r = nfc_llc_start(hdev->llc);
+ if (r < 0)
+ goto exit_close;
+
r = hci_dev_session_init(hdev);
if (r < 0)
- goto exit;
+ goto exit_llc;
r = nfc_hci_send_event(hdev, NFC_HCI_RF_READER_A_GATE,
NFC_HCI_EVT_END_OPERATION, NULL, 0);
if (r < 0)
- goto exit;
+ goto exit_llc;
if (hdev->ops->hci_ready) {
r = hdev->ops->hci_ready(hdev);
if (r < 0)
- goto exit;
+ goto exit_llc;
}
r = hci_dev_version(hdev);
if (r < 0)
- goto exit;
+ goto exit_llc;
+
+ return 0;
+
+exit_llc:
+ nfc_llc_stop(hdev->llc);
+
+exit_close:
+ if (hdev->ops->close)
+ hdev->ops->close(hdev);
-exit:
- if (r < 0)
- if (hdev->ops->close)
- hdev->ops->close(hdev);
return r;
}
@@ -500,6 +508,8 @@ static int hci_dev_down(struct nfc_dev *nfc_dev)
{
struct nfc_hci_dev *hdev = nfc_get_drvdata(nfc_dev);
+ nfc_llc_stop(hdev->llc);
+
if (hdev->ops->close)
hdev->ops->close(hdev);
@@ -539,13 +549,37 @@ static void hci_deactivate_target(struct nfc_dev *nfc_dev,
{
}
+#define HCI_CB_TYPE_TRANSCEIVE 1
+
+static void hci_transceive_cb(void *context, struct sk_buff *skb, int err)
+{
+ struct nfc_hci_dev *hdev = context;
+
+ switch (hdev->async_cb_type) {
+ case HCI_CB_TYPE_TRANSCEIVE:
+ /*
+ * TODO: Check RF Error indicator to make sure data is valid.
+ * It seems that HCI cmd can complete without error, but data
+ * can be invalid if an RF error occured? Ignore for now.
+ */
+ if (err == 0)
+ skb_trim(skb, skb->len - 1); /* RF Err ind */
+
+ hdev->async_cb(hdev->async_cb_context, skb, err);
+ break;
+ default:
+ if (err == 0)
+ kfree_skb(skb);
+ break;
+ }
+}
+
static int hci_transceive(struct nfc_dev *nfc_dev, struct nfc_target *target,
struct sk_buff *skb, data_exchange_cb_t cb,
void *cb_context)
{
struct nfc_hci_dev *hdev = nfc_get_drvdata(nfc_dev);
int r;
- struct sk_buff *res_skb = NULL;
pr_debug("target_idx=%d\n", target->idx);
@@ -553,40 +587,37 @@ static int hci_transceive(struct nfc_dev *nfc_dev, struct nfc_target *target,
case NFC_HCI_RF_READER_A_GATE:
case NFC_HCI_RF_READER_B_GATE:
if (hdev->ops->data_exchange) {
- r = hdev->ops->data_exchange(hdev, target, skb,
- &res_skb);
+ r = hdev->ops->data_exchange(hdev, target, skb, cb,
+ cb_context);
if (r <= 0) /* handled */
break;
}
*skb_push(skb, 1) = 0; /* CTR, see spec:10.2.2.1 */
- r = nfc_hci_send_cmd(hdev, target->hci_reader_gate,
- NFC_HCI_WR_XCHG_DATA,
- skb->data, skb->len, &res_skb);
- /*
- * TODO: Check RF Error indicator to make sure data is valid.
- * It seems that HCI cmd can complete without error, but data
- * can be invalid if an RF error occured? Ignore for now.
- */
- if (r == 0)
- skb_trim(res_skb, res_skb->len - 1); /* RF Err ind */
+
+ hdev->async_cb_type = HCI_CB_TYPE_TRANSCEIVE;
+ hdev->async_cb = cb;
+ hdev->async_cb_context = cb_context;
+
+ r = nfc_hci_send_cmd_async(hdev, target->hci_reader_gate,
+ NFC_HCI_WR_XCHG_DATA, skb->data,
+ skb->len, hci_transceive_cb, hdev);
break;
default:
if (hdev->ops->data_exchange) {
- r = hdev->ops->data_exchange(hdev, target, skb,
- &res_skb);
+ r = hdev->ops->data_exchange(hdev, target, skb, cb,
+ cb_context);
if (r == 1)
r = -ENOTSUPP;
}
else
r = -ENOTSUPP;
+ break;
}
kfree_skb(skb);
- cb(cb_context, res_skb, r);
-
- return 0;
+ return r;
}
static int hci_check_presence(struct nfc_dev *nfc_dev,
@@ -600,6 +631,93 @@ static int hci_check_presence(struct nfc_dev *nfc_dev,
return 0;
}
+static void nfc_hci_failure(struct nfc_hci_dev *hdev, int err)
+{
+ mutex_lock(&hdev->msg_tx_mutex);
+
+ if (hdev->cmd_pending_msg == NULL) {
+ nfc_driver_failure(hdev->ndev, err);
+ goto exit;
+ }
+
+ __nfc_hci_cmd_completion(hdev, err, NULL);
+
+exit:
+ mutex_unlock(&hdev->msg_tx_mutex);
+}
+
+static void nfc_hci_llc_failure(struct nfc_hci_dev *hdev, int err)
+{
+ nfc_hci_failure(hdev, err);
+}
+
+static void nfc_hci_recv_from_llc(struct nfc_hci_dev *hdev, struct sk_buff *skb)
+{
+ struct hcp_packet *packet;
+ u8 type;
+ u8 instruction;
+ struct sk_buff *hcp_skb;
+ u8 pipe;
+ struct sk_buff *frag_skb;
+ int msg_len;
+
+ packet = (struct hcp_packet *)skb->data;
+ if ((packet->header & ~NFC_HCI_FRAGMENT) == 0) {
+ skb_queue_tail(&hdev->rx_hcp_frags, skb);
+ return;
+ }
+
+ /* it's the last fragment. Does it need re-aggregation? */
+ if (skb_queue_len(&hdev->rx_hcp_frags)) {
+ pipe = packet->header & NFC_HCI_FRAGMENT;
+ skb_queue_tail(&hdev->rx_hcp_frags, skb);
+
+ msg_len = 0;
+ skb_queue_walk(&hdev->rx_hcp_frags, frag_skb) {
+ msg_len += (frag_skb->len -
+ NFC_HCI_HCP_PACKET_HEADER_LEN);
+ }
+
+ hcp_skb = nfc_alloc_recv_skb(NFC_HCI_HCP_PACKET_HEADER_LEN +
+ msg_len, GFP_KERNEL);
+ if (hcp_skb == NULL) {
+ nfc_hci_failure(hdev, -ENOMEM);
+ return;
+ }
+
+ *skb_put(hcp_skb, NFC_HCI_HCP_PACKET_HEADER_LEN) = pipe;
+
+ skb_queue_walk(&hdev->rx_hcp_frags, frag_skb) {
+ msg_len = frag_skb->len - NFC_HCI_HCP_PACKET_HEADER_LEN;
+ memcpy(skb_put(hcp_skb, msg_len),
+ frag_skb->data + NFC_HCI_HCP_PACKET_HEADER_LEN,
+ msg_len);
+ }
+
+ skb_queue_purge(&hdev->rx_hcp_frags);
+ } else {
+ packet->header &= NFC_HCI_FRAGMENT;
+ hcp_skb = skb;
+ }
+
+ /* if this is a response, dispatch immediately to
+ * unblock waiting cmd context. Otherwise, enqueue to dispatch
+ * in separate context where handler can also execute command.
+ */
+ packet = (struct hcp_packet *)hcp_skb->data;
+ type = HCP_MSG_GET_TYPE(packet->message.header);
+ if (type == NFC_HCI_HCP_RESPONSE) {
+ pipe = packet->header;
+ instruction = HCP_MSG_GET_CMD(packet->message.header);
+ skb_pull(hcp_skb, NFC_HCI_HCP_PACKET_HEADER_LEN +
+ NFC_HCI_HCP_MESSAGE_HEADER_LEN);
+ nfc_hci_hcp_message_rx(hdev, pipe, type, instruction, hcp_skb);
+ } else {
+ skb_queue_tail(&hdev->msg_rx_queue, hcp_skb);
+ schedule_work(&hdev->msg_rx_work);
+ }
+}
+
static struct nfc_ops hci_nfc_ops = {
.dev_up = hci_dev_up,
.dev_down = hci_dev_down,
@@ -614,6 +732,7 @@ static struct nfc_ops hci_nfc_ops = {
struct nfc_hci_dev *nfc_hci_allocate_device(struct nfc_hci_ops *ops,
struct nfc_hci_init_data *init_data,
u32 protocols,
+ const char *llc_name,
int tx_headroom,
int tx_tailroom,
int max_link_payload)
@@ -630,10 +749,19 @@ struct nfc_hci_dev *nfc_hci_allocate_device(struct nfc_hci_ops *ops,
if (hdev == NULL)
return NULL;
+ hdev->llc = nfc_llc_allocate(llc_name, hdev, ops->xmit,
+ nfc_hci_recv_from_llc, tx_headroom,
+ tx_tailroom, nfc_hci_llc_failure);
+ if (hdev->llc == NULL) {
+ kfree(hdev);
+ return NULL;
+ }
+
hdev->ndev = nfc_allocate_device(&hci_nfc_ops, protocols,
tx_headroom + HCI_CMDS_HEADROOM,
tx_tailroom);
if (!hdev->ndev) {
+ nfc_llc_free(hdev->llc);
kfree(hdev);
return NULL;
}
@@ -653,29 +781,18 @@ EXPORT_SYMBOL(nfc_hci_allocate_device);
void nfc_hci_free_device(struct nfc_hci_dev *hdev)
{
nfc_free_device(hdev->ndev);
+ nfc_llc_free(hdev->llc);
kfree(hdev);
}
EXPORT_SYMBOL(nfc_hci_free_device);
int nfc_hci_register_device(struct nfc_hci_dev *hdev)
{
- struct device *dev = &hdev->ndev->dev;
- const char *devname = dev_name(dev);
- char name[32];
- int r = 0;
-
mutex_init(&hdev->msg_tx_mutex);
INIT_LIST_HEAD(&hdev->msg_tx_queue);
INIT_WORK(&hdev->msg_tx_work, nfc_hci_msg_tx_work);
- snprintf(name, sizeof(name), "%s_hci_msg_tx_wq", devname);
- hdev->msg_tx_wq = alloc_workqueue(name, WQ_NON_REENTRANT | WQ_UNBOUND |
- WQ_MEM_RECLAIM, 1);
- if (hdev->msg_tx_wq == NULL) {
- r = -ENOMEM;
- goto exit;
- }
init_timer(&hdev->cmd_timer);
hdev->cmd_timer.data = (unsigned long)hdev;
@@ -684,27 +801,10 @@ int nfc_hci_register_device(struct nfc_hci_dev *hdev)
skb_queue_head_init(&hdev->rx_hcp_frags);
INIT_WORK(&hdev->msg_rx_work, nfc_hci_msg_rx_work);
- snprintf(name, sizeof(name), "%s_hci_msg_rx_wq", devname);
- hdev->msg_rx_wq = alloc_workqueue(name, WQ_NON_REENTRANT | WQ_UNBOUND |
- WQ_MEM_RECLAIM, 1);
- if (hdev->msg_rx_wq == NULL) {
- r = -ENOMEM;
- goto exit;
- }
skb_queue_head_init(&hdev->msg_rx_queue);
- r = nfc_register_device(hdev->ndev);
-
-exit:
- if (r < 0) {
- if (hdev->msg_tx_wq)
- destroy_workqueue(hdev->msg_tx_wq);
- if (hdev->msg_rx_wq)
- destroy_workqueue(hdev->msg_rx_wq);
- }
-
- return r;
+ return nfc_register_device(hdev->ndev);
}
EXPORT_SYMBOL(nfc_hci_register_device);
@@ -725,9 +825,8 @@ void nfc_hci_unregister_device(struct nfc_hci_dev *hdev)
nfc_unregister_device(hdev->ndev);
- destroy_workqueue(hdev->msg_tx_wq);
-
- destroy_workqueue(hdev->msg_rx_wq);
+ cancel_work_sync(&hdev->msg_tx_work);
+ cancel_work_sync(&hdev->msg_rx_work);
}
EXPORT_SYMBOL(nfc_hci_unregister_device);
@@ -743,93 +842,30 @@ void *nfc_hci_get_clientdata(struct nfc_hci_dev *hdev)
}
EXPORT_SYMBOL(nfc_hci_get_clientdata);
-static void nfc_hci_failure(struct nfc_hci_dev *hdev, int err)
-{
- mutex_lock(&hdev->msg_tx_mutex);
-
- if (hdev->cmd_pending_msg == NULL) {
- nfc_driver_failure(hdev->ndev, err);
- goto exit;
- }
-
- __nfc_hci_cmd_completion(hdev, err, NULL);
-
-exit:
- mutex_unlock(&hdev->msg_tx_mutex);
-}
-
void nfc_hci_driver_failure(struct nfc_hci_dev *hdev, int err)
{
nfc_hci_failure(hdev, err);
}
EXPORT_SYMBOL(nfc_hci_driver_failure);
-void nfc_hci_recv_frame(struct nfc_hci_dev *hdev, struct sk_buff *skb)
+void inline nfc_hci_recv_frame(struct nfc_hci_dev *hdev, struct sk_buff *skb)
{
- struct hcp_packet *packet;
- u8 type;
- u8 instruction;
- struct sk_buff *hcp_skb;
- u8 pipe;
- struct sk_buff *frag_skb;
- int msg_len;
-
- packet = (struct hcp_packet *)skb->data;
- if ((packet->header & ~NFC_HCI_FRAGMENT) == 0) {
- skb_queue_tail(&hdev->rx_hcp_frags, skb);
- return;
- }
-
- /* it's the last fragment. Does it need re-aggregation? */
- if (skb_queue_len(&hdev->rx_hcp_frags)) {
- pipe = packet->header & NFC_HCI_FRAGMENT;
- skb_queue_tail(&hdev->rx_hcp_frags, skb);
-
- msg_len = 0;
- skb_queue_walk(&hdev->rx_hcp_frags, frag_skb) {
- msg_len += (frag_skb->len -
- NFC_HCI_HCP_PACKET_HEADER_LEN);
- }
-
- hcp_skb = nfc_alloc_recv_skb(NFC_HCI_HCP_PACKET_HEADER_LEN +
- msg_len, GFP_KERNEL);
- if (hcp_skb == NULL) {
- nfc_hci_failure(hdev, -ENOMEM);
- return;
- }
-
- *skb_put(hcp_skb, NFC_HCI_HCP_PACKET_HEADER_LEN) = pipe;
-
- skb_queue_walk(&hdev->rx_hcp_frags, frag_skb) {
- msg_len = frag_skb->len - NFC_HCI_HCP_PACKET_HEADER_LEN;
- memcpy(skb_put(hcp_skb, msg_len),
- frag_skb->data + NFC_HCI_HCP_PACKET_HEADER_LEN,
- msg_len);
- }
+ nfc_llc_rcv_from_drv(hdev->llc, skb);
+}
+EXPORT_SYMBOL(nfc_hci_recv_frame);
- skb_queue_purge(&hdev->rx_hcp_frags);
- } else {
- packet->header &= NFC_HCI_FRAGMENT;
- hcp_skb = skb;
- }
+static int __init nfc_hci_init(void)
+{
+ return nfc_llc_init();
+}
- /* if this is a response, dispatch immediately to
- * unblock waiting cmd context. Otherwise, enqueue to dispatch
- * in separate context where handler can also execute command.
- */
- packet = (struct hcp_packet *)hcp_skb->data;
- type = HCP_MSG_GET_TYPE(packet->message.header);
- if (type == NFC_HCI_HCP_RESPONSE) {
- pipe = packet->header;
- instruction = HCP_MSG_GET_CMD(packet->message.header);
- skb_pull(hcp_skb, NFC_HCI_HCP_PACKET_HEADER_LEN +
- NFC_HCI_HCP_MESSAGE_HEADER_LEN);
- nfc_hci_hcp_message_rx(hdev, pipe, type, instruction, hcp_skb);
- } else {
- skb_queue_tail(&hdev->msg_rx_queue, hcp_skb);
- queue_work(hdev->msg_rx_wq, &hdev->msg_rx_work);
- }
+static void __exit nfc_hci_exit(void)
+{
+ nfc_llc_exit();
}
-EXPORT_SYMBOL(nfc_hci_recv_frame);
+
+subsys_initcall(nfc_hci_init);
+module_exit(nfc_hci_exit);
MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("NFC HCI Core");
diff --git a/net/nfc/hci/hci.h b/net/nfc/hci/hci.h
index fa9a21e92239..b274d12c18ac 100644
--- a/net/nfc/hci/hci.h
+++ b/net/nfc/hci/hci.h
@@ -20,6 +20,8 @@
#ifndef __LOCAL_HCI_H
#define __LOCAL_HCI_H
+#include <net/nfc/hci.h>
+
struct gate_pipe_map {
u8 gate;
u8 pipe;
@@ -35,15 +37,6 @@ struct hcp_packet {
struct hcp_message message;
} __packed;
-/*
- * HCI command execution completion callback.
- * result will be a standard linux error (may be converted from HCI response)
- * skb contains the response data and must be disposed, or may be NULL if
- * an error occured
- */
-typedef void (*hci_cmd_cb_t) (struct nfc_hci_dev *hdev, int result,
- struct sk_buff *skb, void *cb_data);
-
struct hcp_exec_waiter {
wait_queue_head_t *wq;
bool exec_complete;
@@ -55,7 +48,7 @@ struct hci_msg {
struct list_head msg_l;
struct sk_buff_head msg_frags;
bool wait_response;
- hci_cmd_cb_t cb;
+ data_exchange_cb_t cb;
void *cb_context;
unsigned long completion_delay;
};
@@ -83,7 +76,7 @@ struct hci_create_pipe_resp {
int nfc_hci_hcp_message_tx(struct nfc_hci_dev *hdev, u8 pipe,
u8 type, u8 instruction,
const u8 *payload, size_t payload_len,
- hci_cmd_cb_t cb, void *cb_data,
+ data_exchange_cb_t cb, void *cb_context,
unsigned long completion_delay);
u8 nfc_hci_pipe2gate(struct nfc_hci_dev *hdev, u8 pipe);
diff --git a/net/nfc/hci/hcp.c b/net/nfc/hci/hcp.c
index f4dad1a89740..bc308a7ca609 100644
--- a/net/nfc/hci/hcp.c
+++ b/net/nfc/hci/hcp.c
@@ -35,7 +35,7 @@
int nfc_hci_hcp_message_tx(struct nfc_hci_dev *hdev, u8 pipe,
u8 type, u8 instruction,
const u8 *payload, size_t payload_len,
- hci_cmd_cb_t cb, void *cb_data,
+ data_exchange_cb_t cb, void *cb_context,
unsigned long completion_delay)
{
struct nfc_dev *ndev = hdev->ndev;
@@ -52,7 +52,7 @@ int nfc_hci_hcp_message_tx(struct nfc_hci_dev *hdev, u8 pipe,
skb_queue_head_init(&cmd->msg_frags);
cmd->wait_response = (type == NFC_HCI_HCP_COMMAND) ? true : false;
cmd->cb = cb;
- cmd->cb_context = cb_data;
+ cmd->cb_context = cb_context;
cmd->completion_delay = completion_delay;
hci_len = payload_len + 1;
@@ -108,7 +108,7 @@ int nfc_hci_hcp_message_tx(struct nfc_hci_dev *hdev, u8 pipe,
list_add_tail(&cmd->msg_l, &hdev->msg_tx_queue);
mutex_unlock(&hdev->msg_tx_mutex);
- queue_work(hdev->msg_tx_wq, &hdev->msg_tx_work);
+ schedule_work(&hdev->msg_tx_work);
return 0;
diff --git a/net/nfc/hci/llc.c b/net/nfc/hci/llc.c
new file mode 100644
index 000000000000..ae1205ded87f
--- /dev/null
+++ b/net/nfc/hci/llc.c
@@ -0,0 +1,170 @@
+/*
+ * Link Layer Control manager
+ *
+ * Copyright (C) 2012 Intel Corporation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the
+ * Free Software Foundation, Inc.,
+ * 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ */
+
+#include <net/nfc/llc.h>
+
+#include "llc.h"
+
+static struct list_head llc_engines;
+
+int nfc_llc_init(void)
+{
+ int r;
+
+ INIT_LIST_HEAD(&llc_engines);
+
+ r = nfc_llc_nop_register();
+ if (r)
+ goto exit;
+
+ r = nfc_llc_shdlc_register();
+ if (r)
+ goto exit;
+
+ return 0;
+
+exit:
+ nfc_llc_exit();
+ return r;
+}
+
+void nfc_llc_exit(void)
+{
+ struct nfc_llc_engine *llc_engine, *n;
+
+ list_for_each_entry_safe(llc_engine, n, &llc_engines, entry) {
+ list_del(&llc_engine->entry);
+ kfree(llc_engine->name);
+ kfree(llc_engine);
+ }
+}
+
+int nfc_llc_register(const char *name, struct nfc_llc_ops *ops)
+{
+ struct nfc_llc_engine *llc_engine;
+
+ llc_engine = kzalloc(sizeof(struct nfc_llc_engine), GFP_KERNEL);
+ if (llc_engine == NULL)
+ return -ENOMEM;
+
+ llc_engine->name = kstrdup(name, GFP_KERNEL);
+ if (llc_engine->name == NULL) {
+ kfree(llc_engine);
+ return -ENOMEM;
+ }
+ llc_engine->ops = ops;
+
+ INIT_LIST_HEAD(&llc_engine->entry);
+ list_add_tail (&llc_engine->entry, &llc_engines);
+
+ return 0;
+}
+
+static struct nfc_llc_engine *nfc_llc_name_to_engine(const char *name)
+{
+ struct nfc_llc_engine *llc_engine;
+
+ list_for_each_entry(llc_engine, &llc_engines, entry) {
+ if (strcmp(llc_engine->name, name) == 0)
+ return llc_engine;
+ }
+
+ return NULL;
+}
+
+void nfc_llc_unregister(const char *name)
+{
+ struct nfc_llc_engine *llc_engine;
+
+ llc_engine = nfc_llc_name_to_engine(name);
+ if (llc_engine == NULL)
+ return;
+
+ list_del(&llc_engine->entry);
+ kfree(llc_engine->name);
+ kfree(llc_engine);
+}
+
+struct nfc_llc *nfc_llc_allocate(const char *name, struct nfc_hci_dev *hdev,
+ xmit_to_drv_t xmit_to_drv,
+ rcv_to_hci_t rcv_to_hci, int tx_headroom,
+ int tx_tailroom, llc_failure_t llc_failure)
+{
+ struct nfc_llc_engine *llc_engine;
+ struct nfc_llc *llc;
+
+ llc_engine = nfc_llc_name_to_engine(name);
+ if (llc_engine == NULL)
+ return NULL;
+
+ llc = kzalloc(sizeof(struct nfc_llc), GFP_KERNEL);
+ if (llc == NULL)
+ return NULL;
+
+ llc->data = llc_engine->ops->init(hdev, xmit_to_drv, rcv_to_hci,
+ tx_headroom, tx_tailroom,
+ &llc->rx_headroom, &llc->rx_tailroom,
+ llc_failure);
+ if (llc->data == NULL) {
+ kfree(llc);
+ return NULL;
+ }
+ llc->ops = llc_engine->ops;
+
+ return llc;
+}
+
+void nfc_llc_free(struct nfc_llc *llc)
+{
+ llc->ops->deinit(llc);
+ kfree(llc);
+}
+
+inline void nfc_llc_get_rx_head_tail_room(struct nfc_llc *llc, int *rx_headroom,
+ int *rx_tailroom)
+{
+ *rx_headroom = llc->rx_headroom;
+ *rx_tailroom = llc->rx_tailroom;
+}
+
+inline int nfc_llc_start(struct nfc_llc *llc)
+{
+ return llc->ops->start(llc);
+}
+
+inline int nfc_llc_stop(struct nfc_llc *llc)
+{
+ return llc->ops->stop(llc);
+}
+
+inline void nfc_llc_rcv_from_drv(struct nfc_llc *llc, struct sk_buff *skb)
+{
+ llc->ops->rcv_from_drv(llc, skb);
+}
+
+inline int nfc_llc_xmit_from_hci(struct nfc_llc *llc, struct sk_buff *skb)
+{
+ return llc->ops->xmit_from_hci(llc, skb);
+}
+
+inline void *nfc_llc_get_data(struct nfc_llc *llc)
+{
+ return llc->data;
+}
diff --git a/net/nfc/hci/llc.h b/net/nfc/hci/llc.h
new file mode 100644
index 000000000000..7be0b7f3ceb6
--- /dev/null
+++ b/net/nfc/hci/llc.h
@@ -0,0 +1,69 @@
+/*
+ * Link Layer Control manager
+ *
+ * Copyright (C) 2012 Intel Corporation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the
+ * Free Software Foundation, Inc.,
+ * 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ */
+
+#ifndef __LOCAL_LLC_H_
+#define __LOCAL_LLC_H_
+
+#include <net/nfc/hci.h>
+#include <net/nfc/llc.h>
+#include <linux/skbuff.h>
+
+struct nfc_llc_ops {
+ void *(*init) (struct nfc_hci_dev *hdev, xmit_to_drv_t xmit_to_drv,
+ rcv_to_hci_t rcv_to_hci, int tx_headroom,
+ int tx_tailroom, int *rx_headroom, int *rx_tailroom,
+ llc_failure_t llc_failure);
+ void (*deinit) (struct nfc_llc *llc);
+ int (*start) (struct nfc_llc *llc);
+ int (*stop) (struct nfc_llc *llc);
+ void (*rcv_from_drv) (struct nfc_llc *llc, struct sk_buff *skb);
+ int (*xmit_from_hci) (struct nfc_llc *llc, struct sk_buff *skb);
+};
+
+struct nfc_llc_engine {
+ const char *name;
+ struct nfc_llc_ops *ops;
+ struct list_head entry;
+};
+
+struct nfc_llc {
+ void *data;
+ struct nfc_llc_ops *ops;
+ int rx_headroom;
+ int rx_tailroom;
+};
+
+void *nfc_llc_get_data(struct nfc_llc *llc);
+
+int nfc_llc_register(const char *name, struct nfc_llc_ops *ops);
+void nfc_llc_unregister(const char *name);
+
+int nfc_llc_nop_register(void);
+
+#if defined(CONFIG_NFC_SHDLC)
+int nfc_llc_shdlc_register(void);
+#else
+static inline int nfc_llc_shdlc_register(void)
+{
+ return 0;
+}
+#endif
+
+#endif /* __LOCAL_LLC_H_ */
diff --git a/net/nfc/hci/llc_nop.c b/net/nfc/hci/llc_nop.c
new file mode 100644
index 000000000000..87b10291b40f
--- /dev/null
+++ b/net/nfc/hci/llc_nop.c
@@ -0,0 +1,99 @@
+/*
+ * nop (passthrough) Link Layer Control
+ *
+ * Copyright (C) 2012 Intel Corporation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the
+ * Free Software Foundation, Inc.,
+ * 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ */
+
+#include <linux/types.h>
+
+#include "llc.h"
+
+struct llc_nop {
+ struct nfc_hci_dev *hdev;
+ xmit_to_drv_t xmit_to_drv;
+ rcv_to_hci_t rcv_to_hci;
+ int tx_headroom;
+ int tx_tailroom;
+ llc_failure_t llc_failure;
+};
+
+static void *llc_nop_init(struct nfc_hci_dev *hdev, xmit_to_drv_t xmit_to_drv,
+ rcv_to_hci_t rcv_to_hci, int tx_headroom,
+ int tx_tailroom, int *rx_headroom, int *rx_tailroom,
+ llc_failure_t llc_failure)
+{
+ struct llc_nop *llc_nop;
+
+ *rx_headroom = 0;
+ *rx_tailroom = 0;
+
+ llc_nop = kzalloc(sizeof(struct llc_nop), GFP_KERNEL);
+ if (llc_nop == NULL)
+ return NULL;
+
+ llc_nop->hdev = hdev;
+ llc_nop->xmit_to_drv = xmit_to_drv;
+ llc_nop->rcv_to_hci = rcv_to_hci;
+ llc_nop->tx_headroom = tx_headroom;
+ llc_nop->tx_tailroom = tx_tailroom;
+ llc_nop->llc_failure = llc_failure;
+
+ return llc_nop;
+}
+
+static void llc_nop_deinit(struct nfc_llc *llc)
+{
+ kfree(nfc_llc_get_data(llc));
+}
+
+static int llc_nop_start(struct nfc_llc *llc)
+{
+ return 0;
+}
+
+static int llc_nop_stop(struct nfc_llc *llc)
+{
+ return 0;
+}
+
+static void llc_nop_rcv_from_drv(struct nfc_llc *llc, struct sk_buff *skb)
+{
+ struct llc_nop *llc_nop = nfc_llc_get_data(llc);
+
+ llc_nop->rcv_to_hci(llc_nop->hdev, skb);
+}
+
+static int llc_nop_xmit_from_hci(struct nfc_llc *llc, struct sk_buff *skb)
+{
+ struct llc_nop *llc_nop = nfc_llc_get_data(llc);
+
+ return llc_nop->xmit_to_drv(llc_nop->hdev, skb);
+}
+
+static struct nfc_llc_ops llc_nop_ops = {
+ .init = llc_nop_init,
+ .deinit = llc_nop_deinit,
+ .start = llc_nop_start,
+ .stop = llc_nop_stop,
+ .rcv_from_drv = llc_nop_rcv_from_drv,
+ .xmit_from_hci = llc_nop_xmit_from_hci,
+};
+
+int nfc_llc_nop_register(void)
+{
+ return nfc_llc_register(LLC_NOP_NAME, &llc_nop_ops);
+}
diff --git a/net/nfc/hci/shdlc.c b/net/nfc/hci/llc_shdlc.c
index 6f840c18c892..01cbc72943cd 100644
--- a/net/nfc/hci/shdlc.c
+++ b/net/nfc/hci/llc_shdlc.c
@@ -1,10 +1,11 @@
/*
+ * shdlc Link Layer Control
+ *
* Copyright (C) 2012 Intel Corporation. All rights reserved.
*
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
@@ -19,18 +20,65 @@
#define pr_fmt(fmt) "shdlc: %s: " fmt, __func__
+#include <linux/types.h>
#include <linux/sched.h>
-#include <linux/export.h>
#include <linux/wait.h>
-#include <linux/crc-ccitt.h>
#include <linux/slab.h>
#include <linux/skbuff.h>
-#include <net/nfc/hci.h>
-#include <net/nfc/shdlc.h>
+#include "llc.h"
+
+enum shdlc_state {
+ SHDLC_DISCONNECTED = 0,
+ SHDLC_CONNECTING = 1,
+ SHDLC_NEGOTIATING = 2,
+ SHDLC_HALF_CONNECTED = 3,
+ SHDLC_CONNECTED = 4
+};
+
+struct llc_shdlc {
+ struct nfc_hci_dev *hdev;
+ xmit_to_drv_t xmit_to_drv;
+ rcv_to_hci_t rcv_to_hci;
+
+ struct mutex state_mutex;
+ enum shdlc_state state;
+ int hard_fault;
+
+ wait_queue_head_t *connect_wq;
+ int connect_tries;
+ int connect_result;
+ struct timer_list connect_timer;/* aka T3 in spec 10.6.1 */
+
+ u8 w; /* window size */
+ bool srej_support;
+
+ struct timer_list t1_timer; /* send ack timeout */
+ bool t1_active;
+
+ struct timer_list t2_timer; /* guard/retransmit timeout */
+ bool t2_active;
+
+ int ns; /* next seq num for send */
+ int nr; /* next expected seq num for receive */
+ int dnr; /* oldest sent unacked seq num */
+
+ struct sk_buff_head rcv_q;
+
+ struct sk_buff_head send_q;
+ bool rnr; /* other side is not ready to receive */
+
+ struct sk_buff_head ack_pending_q;
+
+ struct work_struct sm_work;
+
+ int tx_headroom;
+ int tx_tailroom;
+
+ llc_failure_t llc_failure;
+};
#define SHDLC_LLC_HEAD_ROOM 2
-#define SHDLC_LLC_TAIL_ROOM 2
#define SHDLC_MAX_WINDOW 4
#define SHDLC_SREJ_SUPPORT false
@@ -71,7 +119,7 @@ do { \
} while (0)
/* checks x < y <= z modulo 8 */
-static bool nfc_shdlc_x_lt_y_lteq_z(int x, int y, int z)
+static bool llc_shdlc_x_lt_y_lteq_z(int x, int y, int z)
{
if (x < z)
return ((x < y) && (y <= z)) ? true : false;
@@ -80,7 +128,7 @@ static bool nfc_shdlc_x_lt_y_lteq_z(int x, int y, int z)
}
/* checks x <= y < z modulo 8 */
-static bool nfc_shdlc_x_lteq_y_lt_z(int x, int y, int z)
+static bool llc_shdlc_x_lteq_y_lt_z(int x, int y, int z)
{
if (x <= z)
return ((x <= y) && (y < z)) ? true : false;
@@ -88,36 +136,21 @@ static bool nfc_shdlc_x_lteq_y_lt_z(int x, int y, int z)
return ((y >= x) || (y < z)) ? true : false;
}
-static struct sk_buff *nfc_shdlc_alloc_skb(struct nfc_shdlc *shdlc,
+static struct sk_buff *llc_shdlc_alloc_skb(struct llc_shdlc *shdlc,
int payload_len)
{
struct sk_buff *skb;
- skb = alloc_skb(shdlc->client_headroom + SHDLC_LLC_HEAD_ROOM +
- shdlc->client_tailroom + SHDLC_LLC_TAIL_ROOM +
- payload_len, GFP_KERNEL);
+ skb = alloc_skb(shdlc->tx_headroom + SHDLC_LLC_HEAD_ROOM +
+ shdlc->tx_tailroom + payload_len, GFP_KERNEL);
if (skb)
- skb_reserve(skb, shdlc->client_headroom + SHDLC_LLC_HEAD_ROOM);
+ skb_reserve(skb, shdlc->tx_headroom + SHDLC_LLC_HEAD_ROOM);
return skb;
}
-static void nfc_shdlc_add_len_crc(struct sk_buff *skb)
-{
- u16 crc;
- int len;
-
- len = skb->len + 2;
- *skb_push(skb, 1) = len;
-
- crc = crc_ccitt(0xffff, skb->data, skb->len);
- crc = ~crc;
- *skb_put(skb, 1) = crc & 0xff;
- *skb_put(skb, 1) = crc >> 8;
-}
-
/* immediately sends an S frame. */
-static int nfc_shdlc_send_s_frame(struct nfc_shdlc *shdlc,
+static int llc_shdlc_send_s_frame(struct llc_shdlc *shdlc,
enum sframe_type sframe_type, int nr)
{
int r;
@@ -125,15 +158,13 @@ static int nfc_shdlc_send_s_frame(struct nfc_shdlc *shdlc,
pr_debug("sframe_type=%d nr=%d\n", sframe_type, nr);
- skb = nfc_shdlc_alloc_skb(shdlc, 0);
+ skb = llc_shdlc_alloc_skb(shdlc, 0);
if (skb == NULL)
return -ENOMEM;
*skb_push(skb, 1) = SHDLC_CONTROL_HEAD_S | (sframe_type << 3) | nr;
- nfc_shdlc_add_len_crc(skb);
-
- r = shdlc->ops->xmit(shdlc, skb);
+ r = shdlc->xmit_to_drv(shdlc->hdev, skb);
kfree_skb(skb);
@@ -141,7 +172,7 @@ static int nfc_shdlc_send_s_frame(struct nfc_shdlc *shdlc,
}
/* immediately sends an U frame. skb may contain optional payload */
-static int nfc_shdlc_send_u_frame(struct nfc_shdlc *shdlc,
+static int llc_shdlc_send_u_frame(struct llc_shdlc *shdlc,
struct sk_buff *skb,
enum uframe_modifier uframe_modifier)
{
@@ -151,9 +182,7 @@ static int nfc_shdlc_send_u_frame(struct nfc_shdlc *shdlc,
*skb_push(skb, 1) = SHDLC_CONTROL_HEAD_U | uframe_modifier;
- nfc_shdlc_add_len_crc(skb);
-
- r = shdlc->ops->xmit(shdlc, skb);
+ r = shdlc->xmit_to_drv(shdlc->hdev, skb);
kfree_skb(skb);
@@ -164,7 +193,7 @@ static int nfc_shdlc_send_u_frame(struct nfc_shdlc *shdlc,
* Free ack_pending frames until y_nr - 1, and reset t2 according to
* the remaining oldest ack_pending frame sent time
*/
-static void nfc_shdlc_reset_t2(struct nfc_shdlc *shdlc, int y_nr)
+static void llc_shdlc_reset_t2(struct llc_shdlc *shdlc, int y_nr)
{
struct sk_buff *skb;
int dnr = shdlc->dnr; /* MUST initially be < y_nr */
@@ -204,7 +233,7 @@ static void nfc_shdlc_reset_t2(struct nfc_shdlc *shdlc, int y_nr)
* Receive validated frames from lower layer. skb contains HCI payload only.
* Handle according to algorithm at spec:10.8.2
*/
-static void nfc_shdlc_rcv_i_frame(struct nfc_shdlc *shdlc,
+static void llc_shdlc_rcv_i_frame(struct llc_shdlc *shdlc,
struct sk_buff *skb, int ns, int nr)
{
int x_ns = ns;
@@ -216,66 +245,64 @@ static void nfc_shdlc_rcv_i_frame(struct nfc_shdlc *shdlc,
goto exit;
if (x_ns != shdlc->nr) {
- nfc_shdlc_send_s_frame(shdlc, S_FRAME_REJ, shdlc->nr);
+ llc_shdlc_send_s_frame(shdlc, S_FRAME_REJ, shdlc->nr);
goto exit;
}
if (shdlc->t1_active == false) {
shdlc->t1_active = true;
- mod_timer(&shdlc->t1_timer,
+ mod_timer(&shdlc->t1_timer, jiffies +
msecs_to_jiffies(SHDLC_T1_VALUE_MS(shdlc->w)));
pr_debug("(re)Start T1(send ack)\n");
}
if (skb->len) {
- nfc_hci_recv_frame(shdlc->hdev, skb);
+ shdlc->rcv_to_hci(shdlc->hdev, skb);
skb = NULL;
}
shdlc->nr = (shdlc->nr + 1) % 8;
- if (nfc_shdlc_x_lt_y_lteq_z(shdlc->dnr, y_nr, shdlc->ns)) {
- nfc_shdlc_reset_t2(shdlc, y_nr);
+ if (llc_shdlc_x_lt_y_lteq_z(shdlc->dnr, y_nr, shdlc->ns)) {
+ llc_shdlc_reset_t2(shdlc, y_nr);
shdlc->dnr = y_nr;
}
exit:
- if (skb)
- kfree_skb(skb);
+ kfree_skb(skb);
}
-static void nfc_shdlc_rcv_ack(struct nfc_shdlc *shdlc, int y_nr)
+static void llc_shdlc_rcv_ack(struct llc_shdlc *shdlc, int y_nr)
{
pr_debug("remote acked up to frame %d excluded\n", y_nr);
- if (nfc_shdlc_x_lt_y_lteq_z(shdlc->dnr, y_nr, shdlc->ns)) {
- nfc_shdlc_reset_t2(shdlc, y_nr);
+ if (llc_shdlc_x_lt_y_lteq_z(shdlc->dnr, y_nr, shdlc->ns)) {
+ llc_shdlc_reset_t2(shdlc, y_nr);
shdlc->dnr = y_nr;
}
}
-static void nfc_shdlc_requeue_ack_pending(struct nfc_shdlc *shdlc)
+static void llc_shdlc_requeue_ack_pending(struct llc_shdlc *shdlc)
{
struct sk_buff *skb;
pr_debug("ns reset to %d\n", shdlc->dnr);
while ((skb = skb_dequeue_tail(&shdlc->ack_pending_q))) {
- skb_pull(skb, 2); /* remove len+control */
- skb_trim(skb, skb->len - 2); /* remove crc */
+ skb_pull(skb, 1); /* remove control field */
skb_queue_head(&shdlc->send_q, skb);
}
shdlc->ns = shdlc->dnr;
}
-static void nfc_shdlc_rcv_rej(struct nfc_shdlc *shdlc, int y_nr)
+static void llc_shdlc_rcv_rej(struct llc_shdlc *shdlc, int y_nr)
{
struct sk_buff *skb;
pr_debug("remote asks retransmition from frame %d\n", y_nr);
- if (nfc_shdlc_x_lteq_y_lt_z(shdlc->dnr, y_nr, shdlc->ns)) {
+ if (llc_shdlc_x_lteq_y_lt_z(shdlc->dnr, y_nr, shdlc->ns)) {
if (shdlc->t2_active) {
del_timer_sync(&shdlc->t2_timer);
shdlc->t2_active = false;
@@ -289,12 +316,12 @@ static void nfc_shdlc_rcv_rej(struct nfc_shdlc *shdlc, int y_nr)
}
}
- nfc_shdlc_requeue_ack_pending(shdlc);
+ llc_shdlc_requeue_ack_pending(shdlc);
}
}
/* See spec RR:10.8.3 REJ:10.8.4 */
-static void nfc_shdlc_rcv_s_frame(struct nfc_shdlc *shdlc,
+static void llc_shdlc_rcv_s_frame(struct llc_shdlc *shdlc,
enum sframe_type s_frame_type, int nr)
{
struct sk_buff *skb;
@@ -304,21 +331,21 @@ static void nfc_shdlc_rcv_s_frame(struct nfc_shdlc *shdlc,
switch (s_frame_type) {
case S_FRAME_RR:
- nfc_shdlc_rcv_ack(shdlc, nr);
+ llc_shdlc_rcv_ack(shdlc, nr);
if (shdlc->rnr == true) { /* see SHDLC 10.7.7 */
shdlc->rnr = false;
if (shdlc->send_q.qlen == 0) {
- skb = nfc_shdlc_alloc_skb(shdlc, 0);
+ skb = llc_shdlc_alloc_skb(shdlc, 0);
if (skb)
skb_queue_tail(&shdlc->send_q, skb);
}
}
break;
case S_FRAME_REJ:
- nfc_shdlc_rcv_rej(shdlc, nr);
+ llc_shdlc_rcv_rej(shdlc, nr);
break;
case S_FRAME_RNR:
- nfc_shdlc_rcv_ack(shdlc, nr);
+ llc_shdlc_rcv_ack(shdlc, nr);
shdlc->rnr = true;
break;
default:
@@ -326,7 +353,7 @@ static void nfc_shdlc_rcv_s_frame(struct nfc_shdlc *shdlc,
}
}
-static void nfc_shdlc_connect_complete(struct nfc_shdlc *shdlc, int r)
+static void llc_shdlc_connect_complete(struct llc_shdlc *shdlc, int r)
{
pr_debug("result=%d\n", r);
@@ -337,7 +364,7 @@ static void nfc_shdlc_connect_complete(struct nfc_shdlc *shdlc, int r)
shdlc->nr = 0;
shdlc->dnr = 0;
- shdlc->state = SHDLC_CONNECTED;
+ shdlc->state = SHDLC_HALF_CONNECTED;
} else {
shdlc->state = SHDLC_DISCONNECTED;
}
@@ -347,36 +374,36 @@ static void nfc_shdlc_connect_complete(struct nfc_shdlc *shdlc, int r)
wake_up(shdlc->connect_wq);
}
-static int nfc_shdlc_connect_initiate(struct nfc_shdlc *shdlc)
+static int llc_shdlc_connect_initiate(struct llc_shdlc *shdlc)
{
struct sk_buff *skb;
pr_debug("\n");
- skb = nfc_shdlc_alloc_skb(shdlc, 2);
+ skb = llc_shdlc_alloc_skb(shdlc, 2);
if (skb == NULL)
return -ENOMEM;
*skb_put(skb, 1) = SHDLC_MAX_WINDOW;
*skb_put(skb, 1) = SHDLC_SREJ_SUPPORT ? 1 : 0;
- return nfc_shdlc_send_u_frame(shdlc, skb, U_FRAME_RSET);
+ return llc_shdlc_send_u_frame(shdlc, skb, U_FRAME_RSET);
}
-static int nfc_shdlc_connect_send_ua(struct nfc_shdlc *shdlc)
+static int llc_shdlc_connect_send_ua(struct llc_shdlc *shdlc)
{
struct sk_buff *skb;
pr_debug("\n");
- skb = nfc_shdlc_alloc_skb(shdlc, 0);
+ skb = llc_shdlc_alloc_skb(shdlc, 0);
if (skb == NULL)
return -ENOMEM;
- return nfc_shdlc_send_u_frame(shdlc, skb, U_FRAME_UA);
+ return llc_shdlc_send_u_frame(shdlc, skb, U_FRAME_UA);
}
-static void nfc_shdlc_rcv_u_frame(struct nfc_shdlc *shdlc,
+static void llc_shdlc_rcv_u_frame(struct llc_shdlc *shdlc,
struct sk_buff *skb,
enum uframe_modifier u_frame_modifier)
{
@@ -388,8 +415,13 @@ static void nfc_shdlc_rcv_u_frame(struct nfc_shdlc *shdlc,
switch (u_frame_modifier) {
case U_FRAME_RSET:
- if (shdlc->state == SHDLC_NEGOCIATING) {
- /* we sent RSET, but chip wants to negociate */
+ switch (shdlc->state) {
+ case SHDLC_NEGOTIATING:
+ case SHDLC_CONNECTING:
+ /*
+ * We sent RSET, but chip wants to negociate or we
+ * got RSET before we managed to send out our.
+ */
if (skb->len > 0)
w = skb->data[0];
@@ -401,22 +433,34 @@ static void nfc_shdlc_rcv_u_frame(struct nfc_shdlc *shdlc,
(SHDLC_SREJ_SUPPORT || (srej_support == false))) {
shdlc->w = w;
shdlc->srej_support = srej_support;
- r = nfc_shdlc_connect_send_ua(shdlc);
- nfc_shdlc_connect_complete(shdlc, r);
+ r = llc_shdlc_connect_send_ua(shdlc);
+ llc_shdlc_connect_complete(shdlc, r);
}
- } else if (shdlc->state == SHDLC_CONNECTED) {
+ break;
+ case SHDLC_HALF_CONNECTED:
+ /*
+ * Chip resent RSET due to its timeout - Ignote it
+ * as we already sent UA.
+ */
+ break;
+ case SHDLC_CONNECTED:
/*
* Chip wants to reset link. This is unexpected and
* unsupported.
*/
shdlc->hard_fault = -ECONNRESET;
+ break;
+ default:
+ break;
}
break;
case U_FRAME_UA:
if ((shdlc->state == SHDLC_CONNECTING &&
shdlc->connect_tries > 0) ||
- (shdlc->state == SHDLC_NEGOCIATING))
- nfc_shdlc_connect_complete(shdlc, 0);
+ (shdlc->state == SHDLC_NEGOTIATING)) {
+ llc_shdlc_connect_complete(shdlc, 0);
+ shdlc->state = SHDLC_CONNECTED;
+ }
break;
default:
break;
@@ -425,7 +469,7 @@ static void nfc_shdlc_rcv_u_frame(struct nfc_shdlc *shdlc,
kfree_skb(skb);
}
-static void nfc_shdlc_handle_rcv_queue(struct nfc_shdlc *shdlc)
+static void llc_shdlc_handle_rcv_queue(struct llc_shdlc *shdlc)
{
struct sk_buff *skb;
u8 control;
@@ -443,19 +487,25 @@ static void nfc_shdlc_handle_rcv_queue(struct nfc_shdlc *shdlc)
switch (control & SHDLC_CONTROL_HEAD_MASK) {
case SHDLC_CONTROL_HEAD_I:
case SHDLC_CONTROL_HEAD_I2:
+ if (shdlc->state == SHDLC_HALF_CONNECTED)
+ shdlc->state = SHDLC_CONNECTED;
+
ns = (control & SHDLC_CONTROL_NS_MASK) >> 3;
nr = control & SHDLC_CONTROL_NR_MASK;
- nfc_shdlc_rcv_i_frame(shdlc, skb, ns, nr);
+ llc_shdlc_rcv_i_frame(shdlc, skb, ns, nr);
break;
case SHDLC_CONTROL_HEAD_S:
+ if (shdlc->state == SHDLC_HALF_CONNECTED)
+ shdlc->state = SHDLC_CONNECTED;
+
s_frame_type = (control & SHDLC_CONTROL_TYPE_MASK) >> 3;
nr = control & SHDLC_CONTROL_NR_MASK;
- nfc_shdlc_rcv_s_frame(shdlc, s_frame_type, nr);
+ llc_shdlc_rcv_s_frame(shdlc, s_frame_type, nr);
kfree_skb(skb);
break;
case SHDLC_CONTROL_HEAD_U:
u_frame_modifier = control & SHDLC_CONTROL_M_MASK;
- nfc_shdlc_rcv_u_frame(shdlc, skb, u_frame_modifier);
+ llc_shdlc_rcv_u_frame(shdlc, skb, u_frame_modifier);
break;
default:
pr_err("UNKNOWN Control=%d\n", control);
@@ -465,7 +515,7 @@ static void nfc_shdlc_handle_rcv_queue(struct nfc_shdlc *shdlc)
}
}
-static int nfc_shdlc_w_used(int ns, int dnr)
+static int llc_shdlc_w_used(int ns, int dnr)
{
int unack_count;
@@ -478,7 +528,7 @@ static int nfc_shdlc_w_used(int ns, int dnr)
}
/* Send frames according to algorithm at spec:10.8.1 */
-static void nfc_shdlc_handle_send_queue(struct nfc_shdlc *shdlc)
+static void llc_shdlc_handle_send_queue(struct llc_shdlc *shdlc)
{
struct sk_buff *skb;
int r;
@@ -489,7 +539,7 @@ static void nfc_shdlc_handle_send_queue(struct nfc_shdlc *shdlc)
("sendQlen=%d ns=%d dnr=%d rnr=%s w_room=%d unackQlen=%d\n",
shdlc->send_q.qlen, shdlc->ns, shdlc->dnr,
shdlc->rnr == false ? "false" : "true",
- shdlc->w - nfc_shdlc_w_used(shdlc->ns, shdlc->dnr),
+ shdlc->w - llc_shdlc_w_used(shdlc->ns, shdlc->dnr),
shdlc->ack_pending_q.qlen);
while (shdlc->send_q.qlen && shdlc->ack_pending_q.qlen < shdlc->w &&
@@ -508,11 +558,9 @@ static void nfc_shdlc_handle_send_queue(struct nfc_shdlc *shdlc)
pr_debug("Sending I-Frame %d, waiting to rcv %d\n", shdlc->ns,
shdlc->nr);
- /* SHDLC_DUMP_SKB("shdlc frame written", skb); */
-
- nfc_shdlc_add_len_crc(skb);
+ SHDLC_DUMP_SKB("shdlc frame written", skb);
- r = shdlc->ops->xmit(shdlc, skb);
+ r = shdlc->xmit_to_drv(shdlc->hdev, skb);
if (r < 0) {
shdlc->hard_fault = r;
break;
@@ -534,36 +582,36 @@ static void nfc_shdlc_handle_send_queue(struct nfc_shdlc *shdlc)
}
}
-static void nfc_shdlc_connect_timeout(unsigned long data)
+static void llc_shdlc_connect_timeout(unsigned long data)
{
- struct nfc_shdlc *shdlc = (struct nfc_shdlc *)data;
+ struct llc_shdlc *shdlc = (struct llc_shdlc *)data;
pr_debug("\n");
- queue_work(shdlc->sm_wq, &shdlc->sm_work);
+ schedule_work(&shdlc->sm_work);
}
-static void nfc_shdlc_t1_timeout(unsigned long data)
+static void llc_shdlc_t1_timeout(unsigned long data)
{
- struct nfc_shdlc *shdlc = (struct nfc_shdlc *)data;
+ struct llc_shdlc *shdlc = (struct llc_shdlc *)data;
pr_debug("SoftIRQ: need to send ack\n");
- queue_work(shdlc->sm_wq, &shdlc->sm_work);
+ schedule_work(&shdlc->sm_work);
}
-static void nfc_shdlc_t2_timeout(unsigned long data)
+static void llc_shdlc_t2_timeout(unsigned long data)
{
- struct nfc_shdlc *shdlc = (struct nfc_shdlc *)data;
+ struct llc_shdlc *shdlc = (struct llc_shdlc *)data;
pr_debug("SoftIRQ: need to retransmit\n");
- queue_work(shdlc->sm_wq, &shdlc->sm_work);
+ schedule_work(&shdlc->sm_work);
}
-static void nfc_shdlc_sm_work(struct work_struct *work)
+static void llc_shdlc_sm_work(struct work_struct *work)
{
- struct nfc_shdlc *shdlc = container_of(work, struct nfc_shdlc, sm_work);
+ struct llc_shdlc *shdlc = container_of(work, struct llc_shdlc, sm_work);
int r;
pr_debug("\n");
@@ -578,46 +626,47 @@ static void nfc_shdlc_sm_work(struct work_struct *work)
break;
case SHDLC_CONNECTING:
if (shdlc->hard_fault) {
- nfc_shdlc_connect_complete(shdlc, shdlc->hard_fault);
+ llc_shdlc_connect_complete(shdlc, shdlc->hard_fault);
break;
}
if (shdlc->connect_tries++ < 5)
- r = nfc_shdlc_connect_initiate(shdlc);
+ r = llc_shdlc_connect_initiate(shdlc);
else
r = -ETIME;
if (r < 0)
- nfc_shdlc_connect_complete(shdlc, r);
+ llc_shdlc_connect_complete(shdlc, r);
else {
mod_timer(&shdlc->connect_timer, jiffies +
msecs_to_jiffies(SHDLC_CONNECT_VALUE_MS));
- shdlc->state = SHDLC_NEGOCIATING;
+ shdlc->state = SHDLC_NEGOTIATING;
}
break;
- case SHDLC_NEGOCIATING:
+ case SHDLC_NEGOTIATING:
if (timer_pending(&shdlc->connect_timer) == 0) {
shdlc->state = SHDLC_CONNECTING;
- queue_work(shdlc->sm_wq, &shdlc->sm_work);
+ schedule_work(&shdlc->sm_work);
}
- nfc_shdlc_handle_rcv_queue(shdlc);
+ llc_shdlc_handle_rcv_queue(shdlc);
if (shdlc->hard_fault) {
- nfc_shdlc_connect_complete(shdlc, shdlc->hard_fault);
+ llc_shdlc_connect_complete(shdlc, shdlc->hard_fault);
break;
}
break;
+ case SHDLC_HALF_CONNECTED:
case SHDLC_CONNECTED:
- nfc_shdlc_handle_rcv_queue(shdlc);
- nfc_shdlc_handle_send_queue(shdlc);
+ llc_shdlc_handle_rcv_queue(shdlc);
+ llc_shdlc_handle_send_queue(shdlc);
if (shdlc->t1_active && timer_pending(&shdlc->t1_timer) == 0) {
pr_debug
("Handle T1(send ack) elapsed (T1 now inactive)\n");
shdlc->t1_active = false;
- r = nfc_shdlc_send_s_frame(shdlc, S_FRAME_RR,
+ r = llc_shdlc_send_s_frame(shdlc, S_FRAME_RR,
shdlc->nr);
if (r < 0)
shdlc->hard_fault = r;
@@ -629,12 +678,12 @@ static void nfc_shdlc_sm_work(struct work_struct *work)
shdlc->t2_active = false;
- nfc_shdlc_requeue_ack_pending(shdlc);
- nfc_shdlc_handle_send_queue(shdlc);
+ llc_shdlc_requeue_ack_pending(shdlc);
+ llc_shdlc_handle_send_queue(shdlc);
}
if (shdlc->hard_fault) {
- nfc_hci_driver_failure(shdlc->hdev, shdlc->hard_fault);
+ shdlc->llc_failure(shdlc->hdev, shdlc->hard_fault);
}
break;
default:
@@ -647,7 +696,7 @@ static void nfc_shdlc_sm_work(struct work_struct *work)
* Called from syscall context to establish shdlc link. Sleeps until
* link is ready or failure.
*/
-static int nfc_shdlc_connect(struct nfc_shdlc *shdlc)
+static int llc_shdlc_connect(struct llc_shdlc *shdlc)
{
DECLARE_WAIT_QUEUE_HEAD_ONSTACK(connect_wq);
@@ -662,14 +711,14 @@ static int nfc_shdlc_connect(struct nfc_shdlc *shdlc)
mutex_unlock(&shdlc->state_mutex);
- queue_work(shdlc->sm_wq, &shdlc->sm_work);
+ schedule_work(&shdlc->sm_work);
wait_event(connect_wq, shdlc->connect_result != 1);
return shdlc->connect_result;
}
-static void nfc_shdlc_disconnect(struct nfc_shdlc *shdlc)
+static void llc_shdlc_disconnect(struct llc_shdlc *shdlc)
{
pr_debug("\n");
@@ -679,7 +728,7 @@ static void nfc_shdlc_disconnect(struct nfc_shdlc *shdlc)
mutex_unlock(&shdlc->state_mutex);
- queue_work(shdlc->sm_wq, &shdlc->sm_work);
+ schedule_work(&shdlc->sm_work);
}
/*
@@ -687,7 +736,7 @@ static void nfc_shdlc_disconnect(struct nfc_shdlc *shdlc)
* skb contains only LLC header and payload.
* If skb == NULL, it is a notification that the link below is dead.
*/
-void nfc_shdlc_recv_frame(struct nfc_shdlc *shdlc, struct sk_buff *skb)
+static void llc_shdlc_recv_frame(struct llc_shdlc *shdlc, struct sk_buff *skb)
{
if (skb == NULL) {
pr_err("NULL Frame -> link is dead\n");
@@ -697,176 +746,37 @@ void nfc_shdlc_recv_frame(struct nfc_shdlc *shdlc, struct sk_buff *skb)
skb_queue_tail(&shdlc->rcv_q, skb);
}
- queue_work(shdlc->sm_wq, &shdlc->sm_work);
-}
-EXPORT_SYMBOL(nfc_shdlc_recv_frame);
-
-static int nfc_shdlc_open(struct nfc_hci_dev *hdev)
-{
- struct nfc_shdlc *shdlc = nfc_hci_get_clientdata(hdev);
- int r;
-
- pr_debug("\n");
-
- if (shdlc->ops->open) {
- r = shdlc->ops->open(shdlc);
- if (r < 0)
- return r;
- }
-
- r = nfc_shdlc_connect(shdlc);
- if (r < 0 && shdlc->ops->close)
- shdlc->ops->close(shdlc);
-
- return r;
-}
-
-static void nfc_shdlc_close(struct nfc_hci_dev *hdev)
-{
- struct nfc_shdlc *shdlc = nfc_hci_get_clientdata(hdev);
-
- pr_debug("\n");
-
- nfc_shdlc_disconnect(shdlc);
-
- if (shdlc->ops->close)
- shdlc->ops->close(shdlc);
+ schedule_work(&shdlc->sm_work);
}
-static int nfc_shdlc_hci_ready(struct nfc_hci_dev *hdev)
+static void *llc_shdlc_init(struct nfc_hci_dev *hdev, xmit_to_drv_t xmit_to_drv,
+ rcv_to_hci_t rcv_to_hci, int tx_headroom,
+ int tx_tailroom, int *rx_headroom, int *rx_tailroom,
+ llc_failure_t llc_failure)
{
- struct nfc_shdlc *shdlc = nfc_hci_get_clientdata(hdev);
- int r = 0;
-
- pr_debug("\n");
+ struct llc_shdlc *shdlc;
- if (shdlc->ops->hci_ready)
- r = shdlc->ops->hci_ready(shdlc);
-
- return r;
-}
-
-static int nfc_shdlc_xmit(struct nfc_hci_dev *hdev, struct sk_buff *skb)
-{
- struct nfc_shdlc *shdlc = nfc_hci_get_clientdata(hdev);
-
- SHDLC_DUMP_SKB("queuing HCP packet to shdlc", skb);
-
- skb_queue_tail(&shdlc->send_q, skb);
+ *rx_headroom = SHDLC_LLC_HEAD_ROOM;
+ *rx_tailroom = 0;
- queue_work(shdlc->sm_wq, &shdlc->sm_work);
-
- return 0;
-}
-
-static int nfc_shdlc_start_poll(struct nfc_hci_dev *hdev,
- u32 im_protocols, u32 tm_protocols)
-{
- struct nfc_shdlc *shdlc = nfc_hci_get_clientdata(hdev);
-
- pr_debug("\n");
-
- if (shdlc->ops->start_poll)
- return shdlc->ops->start_poll(shdlc,
- im_protocols, tm_protocols);
-
- return 0;
-}
-
-static int nfc_shdlc_target_from_gate(struct nfc_hci_dev *hdev, u8 gate,
- struct nfc_target *target)
-{
- struct nfc_shdlc *shdlc = nfc_hci_get_clientdata(hdev);
-
- if (shdlc->ops->target_from_gate)
- return shdlc->ops->target_from_gate(shdlc, gate, target);
-
- return -EPERM;
-}
-
-static int nfc_shdlc_complete_target_discovered(struct nfc_hci_dev *hdev,
- u8 gate,
- struct nfc_target *target)
-{
- struct nfc_shdlc *shdlc = nfc_hci_get_clientdata(hdev);
-
- pr_debug("\n");
-
- if (shdlc->ops->complete_target_discovered)
- return shdlc->ops->complete_target_discovered(shdlc, gate,
- target);
-
- return 0;
-}
-
-static int nfc_shdlc_data_exchange(struct nfc_hci_dev *hdev,
- struct nfc_target *target,
- struct sk_buff *skb,
- struct sk_buff **res_skb)
-{
- struct nfc_shdlc *shdlc = nfc_hci_get_clientdata(hdev);
-
- if (shdlc->ops->data_exchange)
- return shdlc->ops->data_exchange(shdlc, target, skb, res_skb);
-
- return -EPERM;
-}
-
-static int nfc_shdlc_check_presence(struct nfc_hci_dev *hdev,
- struct nfc_target *target)
-{
- struct nfc_shdlc *shdlc = nfc_hci_get_clientdata(hdev);
-
- if (shdlc->ops->check_presence)
- return shdlc->ops->check_presence(shdlc, target);
-
- return 0;
-}
-
-static struct nfc_hci_ops shdlc_ops = {
- .open = nfc_shdlc_open,
- .close = nfc_shdlc_close,
- .hci_ready = nfc_shdlc_hci_ready,
- .xmit = nfc_shdlc_xmit,
- .start_poll = nfc_shdlc_start_poll,
- .target_from_gate = nfc_shdlc_target_from_gate,
- .complete_target_discovered = nfc_shdlc_complete_target_discovered,
- .data_exchange = nfc_shdlc_data_exchange,
- .check_presence = nfc_shdlc_check_presence,
-};
-
-struct nfc_shdlc *nfc_shdlc_allocate(struct nfc_shdlc_ops *ops,
- struct nfc_hci_init_data *init_data,
- u32 protocols,
- int tx_headroom, int tx_tailroom,
- int max_link_payload, const char *devname)
-{
- struct nfc_shdlc *shdlc;
- int r;
- char name[32];
-
- if (ops->xmit == NULL)
- return NULL;
-
- shdlc = kzalloc(sizeof(struct nfc_shdlc), GFP_KERNEL);
+ shdlc = kzalloc(sizeof(struct llc_shdlc), GFP_KERNEL);
if (shdlc == NULL)
return NULL;
mutex_init(&shdlc->state_mutex);
- shdlc->ops = ops;
shdlc->state = SHDLC_DISCONNECTED;
init_timer(&shdlc->connect_timer);
shdlc->connect_timer.data = (unsigned long)shdlc;
- shdlc->connect_timer.function = nfc_shdlc_connect_timeout;
+ shdlc->connect_timer.function = llc_shdlc_connect_timeout;
init_timer(&shdlc->t1_timer);
shdlc->t1_timer.data = (unsigned long)shdlc;
- shdlc->t1_timer.function = nfc_shdlc_t1_timeout;
+ shdlc->t1_timer.function = llc_shdlc_t1_timeout;
init_timer(&shdlc->t2_timer);
shdlc->t2_timer.data = (unsigned long)shdlc;
- shdlc->t2_timer.function = nfc_shdlc_t2_timeout;
+ shdlc->t2_timer.function = llc_shdlc_t2_timeout;
shdlc->w = SHDLC_MAX_WINDOW;
shdlc->srej_support = SHDLC_SREJ_SUPPORT;
@@ -875,77 +785,73 @@ struct nfc_shdlc *nfc_shdlc_allocate(struct nfc_shdlc_ops *ops,
skb_queue_head_init(&shdlc->send_q);
skb_queue_head_init(&shdlc->ack_pending_q);
- INIT_WORK(&shdlc->sm_work, nfc_shdlc_sm_work);
- snprintf(name, sizeof(name), "%s_shdlc_sm_wq", devname);
- shdlc->sm_wq = alloc_workqueue(name, WQ_NON_REENTRANT | WQ_UNBOUND |
- WQ_MEM_RECLAIM, 1);
- if (shdlc->sm_wq == NULL)
- goto err_allocwq;
+ INIT_WORK(&shdlc->sm_work, llc_shdlc_sm_work);
- shdlc->client_headroom = tx_headroom;
- shdlc->client_tailroom = tx_tailroom;
-
- shdlc->hdev = nfc_hci_allocate_device(&shdlc_ops, init_data, protocols,
- tx_headroom + SHDLC_LLC_HEAD_ROOM,
- tx_tailroom + SHDLC_LLC_TAIL_ROOM,
- max_link_payload);
- if (shdlc->hdev == NULL)
- goto err_allocdev;
-
- nfc_hci_set_clientdata(shdlc->hdev, shdlc);
-
- r = nfc_hci_register_device(shdlc->hdev);
- if (r < 0)
- goto err_regdev;
+ shdlc->hdev = hdev;
+ shdlc->xmit_to_drv = xmit_to_drv;
+ shdlc->rcv_to_hci = rcv_to_hci;
+ shdlc->tx_headroom = tx_headroom;
+ shdlc->tx_tailroom = tx_tailroom;
+ shdlc->llc_failure = llc_failure;
return shdlc;
+}
-err_regdev:
- nfc_hci_free_device(shdlc->hdev);
+static void llc_shdlc_deinit(struct nfc_llc *llc)
+{
+ struct llc_shdlc *shdlc = nfc_llc_get_data(llc);
-err_allocdev:
- destroy_workqueue(shdlc->sm_wq);
+ skb_queue_purge(&shdlc->rcv_q);
+ skb_queue_purge(&shdlc->send_q);
+ skb_queue_purge(&shdlc->ack_pending_q);
-err_allocwq:
kfree(shdlc);
-
- return NULL;
}
-EXPORT_SYMBOL(nfc_shdlc_allocate);
-void nfc_shdlc_free(struct nfc_shdlc *shdlc)
+static int llc_shdlc_start(struct nfc_llc *llc)
{
- pr_debug("\n");
+ struct llc_shdlc *shdlc = nfc_llc_get_data(llc);
- nfc_hci_unregister_device(shdlc->hdev);
- nfc_hci_free_device(shdlc->hdev);
+ return llc_shdlc_connect(shdlc);
+}
- destroy_workqueue(shdlc->sm_wq);
+static int llc_shdlc_stop(struct nfc_llc *llc)
+{
+ struct llc_shdlc *shdlc = nfc_llc_get_data(llc);
- skb_queue_purge(&shdlc->rcv_q);
- skb_queue_purge(&shdlc->send_q);
- skb_queue_purge(&shdlc->ack_pending_q);
+ llc_shdlc_disconnect(shdlc);
- kfree(shdlc);
+ return 0;
}
-EXPORT_SYMBOL(nfc_shdlc_free);
-void nfc_shdlc_set_clientdata(struct nfc_shdlc *shdlc, void *clientdata)
+static void llc_shdlc_rcv_from_drv(struct nfc_llc *llc, struct sk_buff *skb)
{
- pr_debug("\n");
+ struct llc_shdlc *shdlc = nfc_llc_get_data(llc);
- shdlc->clientdata = clientdata;
+ llc_shdlc_recv_frame(shdlc, skb);
}
-EXPORT_SYMBOL(nfc_shdlc_set_clientdata);
-void *nfc_shdlc_get_clientdata(struct nfc_shdlc *shdlc)
+static int llc_shdlc_xmit_from_hci(struct nfc_llc *llc, struct sk_buff *skb)
{
- return shdlc->clientdata;
+ struct llc_shdlc *shdlc = nfc_llc_get_data(llc);
+
+ skb_queue_tail(&shdlc->send_q, skb);
+
+ schedule_work(&shdlc->sm_work);
+
+ return 0;
}
-EXPORT_SYMBOL(nfc_shdlc_get_clientdata);
-struct nfc_hci_dev *nfc_shdlc_get_hci_dev(struct nfc_shdlc *shdlc)
+static struct nfc_llc_ops llc_shdlc_ops = {
+ .init = llc_shdlc_init,
+ .deinit = llc_shdlc_deinit,
+ .start = llc_shdlc_start,
+ .stop = llc_shdlc_stop,
+ .rcv_from_drv = llc_shdlc_rcv_from_drv,
+ .xmit_from_hci = llc_shdlc_xmit_from_hci,
+};
+
+int nfc_llc_shdlc_register(void)
{
- return shdlc->hdev;
+ return nfc_llc_register(LLC_SHDLC_NAME, &llc_shdlc_ops);
}
-EXPORT_SYMBOL(nfc_shdlc_get_hci_dev);
diff --git a/net/nfc/llcp/commands.c b/net/nfc/llcp/commands.c
index b982b5b890d7..c45ccd6c094c 100644
--- a/net/nfc/llcp/commands.c
+++ b/net/nfc/llcp/commands.c
@@ -312,6 +312,8 @@ int nfc_llcp_send_symm(struct nfc_dev *dev)
skb = llcp_add_header(skb, 0, 0, LLCP_PDU_SYMM);
+ nfc_llcp_send_to_raw_sock(local, skb, NFC_LLCP_DIRECTION_TX);
+
return nfc_data_exchange(dev, local->target_idx, skb,
nfc_llcp_recv, local);
}
diff --git a/net/nfc/llcp/llcp.c b/net/nfc/llcp/llcp.c
index 82f0f7588b46..cc10d073c338 100644
--- a/net/nfc/llcp/llcp.c
+++ b/net/nfc/llcp/llcp.c
@@ -56,7 +56,7 @@ static void nfc_llcp_socket_release(struct nfc_llcp_local *local, bool listen)
sk_for_each_safe(sk, node, tmp, &local->sockets.head) {
llcp_sock = nfc_llcp_sock(sk);
- lock_sock(sk);
+ bh_lock_sock(sk);
if (sk->sk_state == LLCP_CONNECTED)
nfc_put_device(llcp_sock->dev);
@@ -68,26 +68,26 @@ static void nfc_llcp_socket_release(struct nfc_llcp_local *local, bool listen)
list_for_each_entry_safe(lsk, n, &llcp_sock->accept_queue,
accept_queue) {
accept_sk = &lsk->sk;
- lock_sock(accept_sk);
+ bh_lock_sock(accept_sk);
nfc_llcp_accept_unlink(accept_sk);
accept_sk->sk_state = LLCP_CLOSED;
- release_sock(accept_sk);
+ bh_unlock_sock(accept_sk);
sock_orphan(accept_sk);
}
if (listen == true) {
- release_sock(sk);
+ bh_unlock_sock(sk);
continue;
}
}
sk->sk_state = LLCP_CLOSED;
- release_sock(sk);
+ bh_unlock_sock(sk);
sock_orphan(sk);
@@ -114,9 +114,9 @@ static void local_release(struct kref *ref)
nfc_llcp_socket_release(local, false);
del_timer_sync(&local->link_timer);
skb_queue_purge(&local->tx_queue);
- destroy_workqueue(local->tx_wq);
- destroy_workqueue(local->rx_wq);
- destroy_workqueue(local->timeout_wq);
+ cancel_work_sync(&local->tx_work);
+ cancel_work_sync(&local->rx_work);
+ cancel_work_sync(&local->timeout_work);
kfree_skb(local->rx_pending);
kfree(local);
}
@@ -181,7 +181,7 @@ static void nfc_llcp_symm_timer(unsigned long data)
pr_err("SYMM timeout\n");
- queue_work(local->timeout_wq, &local->timeout_work);
+ schedule_work(&local->timeout_work);
}
struct nfc_llcp_local *nfc_llcp_find_local(struct nfc_dev *dev)
@@ -426,6 +426,7 @@ static int nfc_llcp_build_gb(struct nfc_llcp_local *local)
u8 *miux_tlv, miux_length;
__be16 miux;
u8 gb_len = 0;
+ int ret = 0;
version = LLCP_VERSION_11;
version_tlv = nfc_llcp_build_tlv(LLCP_TLV_VERSION, &version,
@@ -450,8 +451,8 @@ static int nfc_llcp_build_gb(struct nfc_llcp_local *local)
gb_len += ARRAY_SIZE(llcp_magic);
if (gb_len > NFC_MAX_GT_LEN) {
- kfree(version_tlv);
- return -EINVAL;
+ ret = -EINVAL;
+ goto out;
}
gb_cur = local->gb;
@@ -471,12 +472,15 @@ static int nfc_llcp_build_gb(struct nfc_llcp_local *local)
memcpy(gb_cur, miux_tlv, miux_length);
gb_cur += miux_length;
+ local->gb_len = gb_len;
+
+out:
kfree(version_tlv);
kfree(lto_tlv);
+ kfree(wks_tlv);
+ kfree(miux_tlv);
- local->gb_len = gb_len;
-
- return 0;
+ return ret;
}
u8 *nfc_llcp_general_bytes(struct nfc_dev *dev, size_t *general_bytes_len)
@@ -554,6 +558,46 @@ static void nfc_llcp_set_nrns(struct nfc_llcp_sock *sock, struct sk_buff *pdu)
sock->recv_ack_n = (sock->recv_n - 1) % 16;
}
+void nfc_llcp_send_to_raw_sock(struct nfc_llcp_local *local,
+ struct sk_buff *skb, u8 direction)
+{
+ struct hlist_node *node;
+ struct sk_buff *skb_copy = NULL, *nskb;
+ struct sock *sk;
+ u8 *data;
+
+ read_lock(&local->raw_sockets.lock);
+
+ sk_for_each(sk, node, &local->raw_sockets.head) {
+ if (sk->sk_state != LLCP_BOUND)
+ continue;
+
+ if (skb_copy == NULL) {
+ skb_copy = __pskb_copy(skb, NFC_LLCP_RAW_HEADER_SIZE,
+ GFP_ATOMIC);
+
+ if (skb_copy == NULL)
+ continue;
+
+ data = skb_push(skb_copy, NFC_LLCP_RAW_HEADER_SIZE);
+
+ data[0] = local->dev ? local->dev->idx : 0xFF;
+ data[1] = direction;
+ }
+
+ nskb = skb_clone(skb_copy, GFP_ATOMIC);
+ if (!nskb)
+ continue;
+
+ if (sock_queue_rcv_skb(sk, nskb))
+ kfree_skb(nskb);
+ }
+
+ read_unlock(&local->raw_sockets.lock);
+
+ kfree_skb(skb_copy);
+}
+
static void nfc_llcp_tx_work(struct work_struct *work)
{
struct nfc_llcp_local *local = container_of(work, struct nfc_llcp_local,
@@ -574,6 +618,9 @@ static void nfc_llcp_tx_work(struct work_struct *work)
DUMP_PREFIX_OFFSET, 16, 1,
skb->data, skb->len, true);
+ nfc_llcp_send_to_raw_sock(local, skb,
+ NFC_LLCP_DIRECTION_TX);
+
ret = nfc_data_exchange(local->dev, local->target_idx,
skb, nfc_llcp_recv, local);
@@ -1018,6 +1065,8 @@ static void nfc_llcp_rx_work(struct work_struct *work)
print_hex_dump(KERN_DEBUG, "LLCP Rx: ", DUMP_PREFIX_OFFSET,
16, 1, skb->data, skb->len, true);
+ nfc_llcp_send_to_raw_sock(local, skb, NFC_LLCP_DIRECTION_RX);
+
switch (ptype) {
case LLCP_PDU_SYMM:
pr_debug("SYMM\n");
@@ -1052,7 +1101,7 @@ static void nfc_llcp_rx_work(struct work_struct *work)
}
- queue_work(local->tx_wq, &local->tx_work);
+ schedule_work(&local->tx_work);
kfree_skb(local->rx_pending);
local->rx_pending = NULL;
@@ -1071,7 +1120,7 @@ void nfc_llcp_recv(void *data, struct sk_buff *skb, int err)
local->rx_pending = skb_get(skb);
del_timer(&local->link_timer);
- queue_work(local->rx_wq, &local->rx_work);
+ schedule_work(&local->rx_work);
return;
}
@@ -1086,7 +1135,7 @@ int nfc_llcp_data_received(struct nfc_dev *dev, struct sk_buff *skb)
local->rx_pending = skb_get(skb);
del_timer(&local->link_timer);
- queue_work(local->rx_wq, &local->rx_work);
+ schedule_work(&local->rx_work);
return 0;
}
@@ -1121,7 +1170,7 @@ void nfc_llcp_mac_is_up(struct nfc_dev *dev, u32 target_idx,
if (rf_mode == NFC_RF_INITIATOR) {
pr_debug("Queueing Tx work\n");
- queue_work(local->tx_wq, &local->tx_work);
+ schedule_work(&local->tx_work);
} else {
mod_timer(&local->link_timer,
jiffies + msecs_to_jiffies(local->remote_lto));
@@ -1130,10 +1179,7 @@ void nfc_llcp_mac_is_up(struct nfc_dev *dev, u32 target_idx,
int nfc_llcp_register_device(struct nfc_dev *ndev)
{
- struct device *dev = &ndev->dev;
struct nfc_llcp_local *local;
- char name[32];
- int err;
local = kzalloc(sizeof(struct nfc_llcp_local), GFP_KERNEL);
if (local == NULL)
@@ -1149,41 +1195,15 @@ int nfc_llcp_register_device(struct nfc_dev *ndev)
skb_queue_head_init(&local->tx_queue);
INIT_WORK(&local->tx_work, nfc_llcp_tx_work);
- snprintf(name, sizeof(name), "%s_llcp_tx_wq", dev_name(dev));
- local->tx_wq =
- alloc_workqueue(name,
- WQ_NON_REENTRANT | WQ_UNBOUND | WQ_MEM_RECLAIM,
- 1);
- if (local->tx_wq == NULL) {
- err = -ENOMEM;
- goto err_local;
- }
local->rx_pending = NULL;
INIT_WORK(&local->rx_work, nfc_llcp_rx_work);
- snprintf(name, sizeof(name), "%s_llcp_rx_wq", dev_name(dev));
- local->rx_wq =
- alloc_workqueue(name,
- WQ_NON_REENTRANT | WQ_UNBOUND | WQ_MEM_RECLAIM,
- 1);
- if (local->rx_wq == NULL) {
- err = -ENOMEM;
- goto err_tx_wq;
- }
INIT_WORK(&local->timeout_work, nfc_llcp_timeout_work);
- snprintf(name, sizeof(name), "%s_llcp_timeout_wq", dev_name(dev));
- local->timeout_wq =
- alloc_workqueue(name,
- WQ_NON_REENTRANT | WQ_UNBOUND | WQ_MEM_RECLAIM,
- 1);
- if (local->timeout_wq == NULL) {
- err = -ENOMEM;
- goto err_rx_wq;
- }
- local->sockets.lock = __RW_LOCK_UNLOCKED(local->sockets.lock);
- local->connecting_sockets.lock = __RW_LOCK_UNLOCKED(local->connecting_sockets.lock);
+ rwlock_init(&local->sockets.lock);
+ rwlock_init(&local->connecting_sockets.lock);
+ rwlock_init(&local->raw_sockets.lock);
nfc_llcp_build_gb(local);
@@ -1193,17 +1213,6 @@ int nfc_llcp_register_device(struct nfc_dev *ndev)
list_add(&llcp_devices, &local->list);
return 0;
-
-err_rx_wq:
- destroy_workqueue(local->rx_wq);
-
-err_tx_wq:
- destroy_workqueue(local->tx_wq);
-
-err_local:
- kfree(local);
-
- return 0;
}
void nfc_llcp_unregister_device(struct nfc_dev *dev)
diff --git a/net/nfc/llcp/llcp.h b/net/nfc/llcp/llcp.h
index 83b8bba5a280..fdb2d24e60bd 100644
--- a/net/nfc/llcp/llcp.h
+++ b/net/nfc/llcp/llcp.h
@@ -56,12 +56,9 @@ struct nfc_llcp_local {
struct timer_list link_timer;
struct sk_buff_head tx_queue;
- struct workqueue_struct *tx_wq;
struct work_struct tx_work;
- struct workqueue_struct *rx_wq;
struct work_struct rx_work;
struct sk_buff *rx_pending;
- struct workqueue_struct *timeout_wq;
struct work_struct timeout_work;
u32 target_idx;
@@ -89,6 +86,7 @@ struct nfc_llcp_local {
/* sockets array */
struct llcp_sock_list sockets;
struct llcp_sock_list connecting_sockets;
+ struct llcp_sock_list raw_sockets;
};
struct nfc_llcp_sock {
@@ -187,6 +185,8 @@ u8 nfc_llcp_get_sdp_ssap(struct nfc_llcp_local *local,
u8 nfc_llcp_get_local_ssap(struct nfc_llcp_local *local);
void nfc_llcp_put_ssap(struct nfc_llcp_local *local, u8 ssap);
int nfc_llcp_queue_i_frames(struct nfc_llcp_sock *sock);
+void nfc_llcp_send_to_raw_sock(struct nfc_llcp_local *local,
+ struct sk_buff *skb, u8 direction);
/* Sock API */
struct sock *nfc_llcp_sock_alloc(struct socket *sock, int type, gfp_t gfp);
diff --git a/net/nfc/llcp/sock.c b/net/nfc/llcp/sock.c
index ddeb9aa398f0..63e4cdc92376 100644
--- a/net/nfc/llcp/sock.c
+++ b/net/nfc/llcp/sock.c
@@ -142,6 +142,60 @@ error:
return ret;
}
+static int llcp_raw_sock_bind(struct socket *sock, struct sockaddr *addr,
+ int alen)
+{
+ struct sock *sk = sock->sk;
+ struct nfc_llcp_sock *llcp_sock = nfc_llcp_sock(sk);
+ struct nfc_llcp_local *local;
+ struct nfc_dev *dev;
+ struct sockaddr_nfc_llcp llcp_addr;
+ int len, ret = 0;
+
+ if (!addr || addr->sa_family != AF_NFC)
+ return -EINVAL;
+
+ pr_debug("sk %p addr %p family %d\n", sk, addr, addr->sa_family);
+
+ memset(&llcp_addr, 0, sizeof(llcp_addr));
+ len = min_t(unsigned int, sizeof(llcp_addr), alen);
+ memcpy(&llcp_addr, addr, len);
+
+ lock_sock(sk);
+
+ if (sk->sk_state != LLCP_CLOSED) {
+ ret = -EBADFD;
+ goto error;
+ }
+
+ dev = nfc_get_device(llcp_addr.dev_idx);
+ if (dev == NULL) {
+ ret = -ENODEV;
+ goto error;
+ }
+
+ local = nfc_llcp_find_local(dev);
+ if (local == NULL) {
+ ret = -ENODEV;
+ goto put_dev;
+ }
+
+ llcp_sock->dev = dev;
+ llcp_sock->local = nfc_llcp_local_get(local);
+ llcp_sock->nfc_protocol = llcp_addr.nfc_protocol;
+
+ nfc_llcp_sock_link(&local->raw_sockets, sk);
+
+ sk->sk_state = LLCP_BOUND;
+
+put_dev:
+ nfc_put_device(dev);
+
+error:
+ release_sock(sk);
+ return ret;
+}
+
static int llcp_sock_listen(struct socket *sock, int backlog)
{
struct sock *sk = sock->sk;
@@ -300,9 +354,6 @@ static int llcp_sock_getname(struct socket *sock, struct sockaddr *uaddr,
pr_debug("%p %d %d %d\n", sk, llcp_sock->target_idx,
llcp_sock->dsap, llcp_sock->ssap);
- if (llcp_sock == NULL || llcp_sock->dev == NULL)
- return -EBADFD;
-
uaddr->sa_family = AF_NFC;
*len = sizeof(struct sockaddr_nfc_llcp);
@@ -421,7 +472,10 @@ static int llcp_sock_release(struct socket *sock)
release_sock(sk);
- nfc_llcp_sock_unlink(&local->sockets, sk);
+ if (sock->type == SOCK_RAW)
+ nfc_llcp_sock_unlink(&local->raw_sockets, sk);
+ else
+ nfc_llcp_sock_unlink(&local->sockets, sk);
out:
sock_orphan(sk);
@@ -443,15 +497,11 @@ static int llcp_sock_connect(struct socket *sock, struct sockaddr *_addr,
pr_debug("sock %p sk %p flags 0x%x\n", sock, sk, flags);
if (!addr || len < sizeof(struct sockaddr_nfc) ||
- addr->sa_family != AF_NFC) {
- pr_err("Invalid socket\n");
+ addr->sa_family != AF_NFC)
return -EINVAL;
- }
- if (addr->service_name_len == 0 && addr->dsap == 0) {
- pr_err("Missing service name or dsap\n");
+ if (addr->service_name_len == 0 && addr->dsap == 0)
return -EINVAL;
- }
pr_debug("addr dev_idx=%u target_idx=%u protocol=%u\n", addr->dev_idx,
addr->target_idx, addr->nfc_protocol);
@@ -617,7 +667,7 @@ static int llcp_sock_recvmsg(struct kiocb *iocb, struct socket *sock,
if (!(flags & MSG_PEEK)) {
/* SOCK_STREAM: re-queue skb if it contains unreceived data */
- if (sk->sk_type == SOCK_STREAM) {
+ if (sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_RAW) {
skb_pull(skb, copied);
if (skb->len) {
skb_queue_head(&sk->sk_receive_queue, skb);
@@ -658,6 +708,26 @@ static const struct proto_ops llcp_sock_ops = {
.mmap = sock_no_mmap,
};
+static const struct proto_ops llcp_rawsock_ops = {
+ .family = PF_NFC,
+ .owner = THIS_MODULE,
+ .bind = llcp_raw_sock_bind,
+ .connect = sock_no_connect,
+ .release = llcp_sock_release,
+ .socketpair = sock_no_socketpair,
+ .accept = sock_no_accept,
+ .getname = llcp_sock_getname,
+ .poll = llcp_sock_poll,
+ .ioctl = sock_no_ioctl,
+ .listen = sock_no_listen,
+ .shutdown = sock_no_shutdown,
+ .setsockopt = sock_no_setsockopt,
+ .getsockopt = sock_no_getsockopt,
+ .sendmsg = sock_no_sendmsg,
+ .recvmsg = llcp_sock_recvmsg,
+ .mmap = sock_no_mmap,
+};
+
static void llcp_sock_destruct(struct sock *sk)
{
struct nfc_llcp_sock *llcp_sock = nfc_llcp_sock(sk);
@@ -735,10 +805,15 @@ static int llcp_sock_create(struct net *net, struct socket *sock,
pr_debug("%p\n", sock);
- if (sock->type != SOCK_STREAM && sock->type != SOCK_DGRAM)
+ if (sock->type != SOCK_STREAM &&
+ sock->type != SOCK_DGRAM &&
+ sock->type != SOCK_RAW)
return -ESOCKTNOSUPPORT;
- sock->ops = &llcp_sock_ops;
+ if (sock->type == SOCK_RAW)
+ sock->ops = &llcp_rawsock_ops;
+ else
+ sock->ops = &llcp_sock_ops;
sk = nfc_llcp_sock_alloc(sock, sock->type, GFP_ATOMIC);
if (sk == NULL)
diff --git a/net/nfc/nci/core.c b/net/nfc/nci/core.c
index f81efe13985a..acf9abb7d99b 100644
--- a/net/nfc/nci/core.c
+++ b/net/nfc/nci/core.c
@@ -176,6 +176,27 @@ static void nci_init_complete_req(struct nci_dev *ndev, unsigned long opt)
(1 + ((*num) * sizeof(struct disc_map_config))), &cmd);
}
+struct nci_set_config_param {
+ __u8 id;
+ size_t len;
+ __u8 *val;
+};
+
+static void nci_set_config_req(struct nci_dev *ndev, unsigned long opt)
+{
+ struct nci_set_config_param *param = (struct nci_set_config_param *)opt;
+ struct nci_core_set_config_cmd cmd;
+
+ BUG_ON(param->len > NCI_MAX_PARAM_LEN);
+
+ cmd.num_params = 1;
+ cmd.param.id = param->id;
+ cmd.param.len = param->len;
+ memcpy(cmd.param.val, param->val, param->len);
+
+ nci_send_cmd(ndev, NCI_OP_CORE_SET_CONFIG_CMD, (3 + param->len), &cmd);
+}
+
static void nci_rf_discover_req(struct nci_dev *ndev, unsigned long opt)
{
struct nci_rf_disc_cmd cmd;
@@ -388,6 +409,32 @@ static int nci_dev_down(struct nfc_dev *nfc_dev)
return nci_close_device(ndev);
}
+static int nci_set_local_general_bytes(struct nfc_dev *nfc_dev)
+{
+ struct nci_dev *ndev = nfc_get_drvdata(nfc_dev);
+ struct nci_set_config_param param;
+ __u8 local_gb[NFC_MAX_GT_LEN];
+ int i, rc = 0;
+
+ param.val = nfc_get_local_general_bytes(nfc_dev, &param.len);
+ if ((param.val == NULL) || (param.len == 0))
+ return rc;
+
+ if (param.len > NCI_MAX_PARAM_LEN)
+ return -EINVAL;
+
+ for (i = 0; i < param.len; i++)
+ local_gb[param.len-1-i] = param.val[i];
+
+ param.id = NCI_PN_ATR_REQ_GEN_BYTES;
+ param.val = local_gb;
+
+ rc = nci_request(ndev, nci_set_config_req, (unsigned long)&param,
+ msecs_to_jiffies(NCI_SET_CONFIG_TIMEOUT));
+
+ return rc;
+}
+
static int nci_start_poll(struct nfc_dev *nfc_dev,
__u32 im_protocols, __u32 tm_protocols)
{
@@ -415,6 +462,14 @@ static int nci_start_poll(struct nfc_dev *nfc_dev,
return -EBUSY;
}
+ if (im_protocols & NFC_PROTO_NFC_DEP_MASK) {
+ rc = nci_set_local_general_bytes(nfc_dev);
+ if (rc) {
+ pr_err("failed to set local general bytes\n");
+ return rc;
+ }
+ }
+
rc = nci_request(ndev, nci_rf_discover_req, im_protocols,
msecs_to_jiffies(NCI_RF_DISC_TIMEOUT));
@@ -509,7 +564,7 @@ static void nci_deactivate_target(struct nfc_dev *nfc_dev,
{
struct nci_dev *ndev = nfc_get_drvdata(nfc_dev);
- pr_debug("target_idx %d\n", target->idx);
+ pr_debug("entry\n");
if (!ndev->target_active_prot) {
pr_err("unable to deactivate target, no active target\n");
@@ -524,6 +579,38 @@ static void nci_deactivate_target(struct nfc_dev *nfc_dev,
}
}
+
+static int nci_dep_link_up(struct nfc_dev *nfc_dev, struct nfc_target *target,
+ __u8 comm_mode, __u8 *gb, size_t gb_len)
+{
+ struct nci_dev *ndev = nfc_get_drvdata(nfc_dev);
+ int rc;
+
+ pr_debug("target_idx %d, comm_mode %d\n", target->idx, comm_mode);
+
+ rc = nci_activate_target(nfc_dev, target, NFC_PROTO_NFC_DEP);
+ if (rc)
+ return rc;
+
+ rc = nfc_set_remote_general_bytes(nfc_dev, ndev->remote_gb,
+ ndev->remote_gb_len);
+ if (!rc)
+ rc = nfc_dep_link_is_up(nfc_dev, target->idx, NFC_COMM_PASSIVE,
+ NFC_RF_INITIATOR);
+
+ return rc;
+}
+
+static int nci_dep_link_down(struct nfc_dev *nfc_dev)
+{
+ pr_debug("entry\n");
+
+ nci_deactivate_target(nfc_dev, NULL);
+
+ return 0;
+}
+
+
static int nci_transceive(struct nfc_dev *nfc_dev, struct nfc_target *target,
struct sk_buff *skb,
data_exchange_cb_t cb, void *cb_context)
@@ -557,6 +644,8 @@ static struct nfc_ops nci_nfc_ops = {
.dev_down = nci_dev_down,
.start_poll = nci_start_poll,
.stop_poll = nci_stop_poll,
+ .dep_link_up = nci_dep_link_up,
+ .dep_link_down = nci_dep_link_down,
.activate_target = nci_activate_target,
.deactivate_target = nci_deactivate_target,
.im_transceive = nci_transceive,
diff --git a/net/nfc/nci/ntf.c b/net/nfc/nci/ntf.c
index af7a93b04393..b2aa98ef0927 100644
--- a/net/nfc/nci/ntf.c
+++ b/net/nfc/nci/ntf.c
@@ -176,6 +176,8 @@ static int nci_add_new_protocol(struct nci_dev *ndev,
protocol = NFC_PROTO_ISO14443_B_MASK;
else if (rf_protocol == NCI_RF_PROTOCOL_T3T)
protocol = NFC_PROTO_FELICA_MASK;
+ else if (rf_protocol == NCI_RF_PROTOCOL_NFC_DEP)
+ protocol = NFC_PROTO_NFC_DEP_MASK;
else
protocol = 0;
@@ -361,6 +363,33 @@ static int nci_extract_activation_params_iso_dep(struct nci_dev *ndev,
return NCI_STATUS_OK;
}
+static int nci_extract_activation_params_nfc_dep(struct nci_dev *ndev,
+ struct nci_rf_intf_activated_ntf *ntf, __u8 *data)
+{
+ struct activation_params_poll_nfc_dep *poll;
+ int i;
+
+ switch (ntf->activation_rf_tech_and_mode) {
+ case NCI_NFC_A_PASSIVE_POLL_MODE:
+ case NCI_NFC_F_PASSIVE_POLL_MODE:
+ poll = &ntf->activation_params.poll_nfc_dep;
+ poll->atr_res_len = min_t(__u8, *data++, 63);
+ pr_debug("atr_res_len %d\n", poll->atr_res_len);
+ if (poll->atr_res_len > 0) {
+ for (i = 0; i < poll->atr_res_len; i++)
+ poll->atr_res[poll->atr_res_len-1-i] = data[i];
+ }
+ break;
+
+ default:
+ pr_err("unsupported activation_rf_tech_and_mode 0x%x\n",
+ ntf->activation_rf_tech_and_mode);
+ return NCI_STATUS_RF_PROTOCOL_ERROR;
+ }
+
+ return NCI_STATUS_OK;
+}
+
static void nci_target_auto_activated(struct nci_dev *ndev,
struct nci_rf_intf_activated_ntf *ntf)
{
@@ -454,6 +483,11 @@ static void nci_rf_intf_activated_ntf_packet(struct nci_dev *ndev,
&ntf, data);
break;
+ case NCI_RF_INTERFACE_NFC_DEP:
+ err = nci_extract_activation_params_nfc_dep(ndev,
+ &ntf, data);
+ break;
+
case NCI_RF_INTERFACE_FRAME:
/* no activation params */
break;
@@ -473,6 +507,24 @@ exit:
/* set the available credits to initial value */
atomic_set(&ndev->credits_cnt, ndev->initial_num_credits);
+
+ /* store general bytes to be reported later in dep_link_up */
+ if (ntf.rf_interface == NCI_RF_INTERFACE_NFC_DEP) {
+ ndev->remote_gb_len = 0;
+
+ if (ntf.activation_params_len > 0) {
+ /* ATR_RES general bytes at offset 15 */
+ ndev->remote_gb_len = min_t(__u8,
+ (ntf.activation_params
+ .poll_nfc_dep.atr_res_len
+ - NFC_ATR_RES_GT_OFFSET),
+ NFC_MAX_GT_LEN);
+ memcpy(ndev->remote_gb,
+ (ntf.activation_params.poll_nfc_dep
+ .atr_res + NFC_ATR_RES_GT_OFFSET),
+ ndev->remote_gb_len);
+ }
+ }
}
if (atomic_read(&ndev->state) == NCI_DISCOVERY) {
diff --git a/net/nfc/nci/rsp.c b/net/nfc/nci/rsp.c
index 3003c3390e49..dd072f38ad00 100644
--- a/net/nfc/nci/rsp.c
+++ b/net/nfc/nci/rsp.c
@@ -119,6 +119,16 @@ exit:
nci_req_complete(ndev, rsp_1->status);
}
+static void nci_core_set_config_rsp_packet(struct nci_dev *ndev,
+ struct sk_buff *skb)
+{
+ struct nci_core_set_config_rsp *rsp = (void *) skb->data;
+
+ pr_debug("status 0x%x\n", rsp->status);
+
+ nci_req_complete(ndev, rsp->status);
+}
+
static void nci_rf_disc_map_rsp_packet(struct nci_dev *ndev,
struct sk_buff *skb)
{
@@ -194,6 +204,10 @@ void nci_rsp_packet(struct nci_dev *ndev, struct sk_buff *skb)
nci_core_init_rsp_packet(ndev, skb);
break;
+ case NCI_OP_CORE_SET_CONFIG_RSP:
+ nci_core_set_config_rsp_packet(ndev, skb);
+ break;
+
case NCI_OP_RF_DISCOVER_MAP_RSP:
nci_rf_disc_map_rsp_packet(ndev, skb);
break;
diff --git a/net/nfc/netlink.c b/net/nfc/netlink.c
index 4c51714ee741..c1b5285cbde7 100644
--- a/net/nfc/netlink.c
+++ b/net/nfc/netlink.c
@@ -58,7 +58,7 @@ static int nfc_genl_send_target(struct sk_buff *msg, struct nfc_target *target,
{
void *hdr;
- hdr = genlmsg_put(msg, NETLINK_CB(cb->skb).pid, cb->nlh->nlmsg_seq,
+ hdr = genlmsg_put(msg, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq,
&nfc_genl_family, flags, NFC_CMD_GET_TARGET);
if (!hdr)
return -EMSGSIZE;
@@ -165,7 +165,7 @@ int nfc_genl_targets_found(struct nfc_dev *dev)
struct sk_buff *msg;
void *hdr;
- dev->genl_data.poll_req_pid = 0;
+ dev->genl_data.poll_req_portid = 0;
msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_ATOMIC);
if (!msg)
@@ -347,13 +347,13 @@ free_msg:
}
static int nfc_genl_send_device(struct sk_buff *msg, struct nfc_dev *dev,
- u32 pid, u32 seq,
+ u32 portid, u32 seq,
struct netlink_callback *cb,
int flags)
{
void *hdr;
- hdr = genlmsg_put(msg, pid, seq, &nfc_genl_family, flags,
+ hdr = genlmsg_put(msg, portid, seq, &nfc_genl_family, flags,
NFC_CMD_GET_DEVICE);
if (!hdr)
return -EMSGSIZE;
@@ -401,7 +401,7 @@ static int nfc_genl_dump_devices(struct sk_buff *skb,
while (dev) {
int rc;
- rc = nfc_genl_send_device(skb, dev, NETLINK_CB(cb->skb).pid,
+ rc = nfc_genl_send_device(skb, dev, NETLINK_CB(cb->skb).portid,
cb->nlh->nlmsg_seq, cb, NLM_F_MULTI);
if (rc < 0)
break;
@@ -520,7 +520,7 @@ static int nfc_genl_get_device(struct sk_buff *skb, struct genl_info *info)
goto out_putdev;
}
- rc = nfc_genl_send_device(msg, dev, info->snd_pid, info->snd_seq,
+ rc = nfc_genl_send_device(msg, dev, info->snd_portid, info->snd_seq,
NULL, 0);
if (rc < 0)
goto out_free;
@@ -611,7 +611,7 @@ static int nfc_genl_start_poll(struct sk_buff *skb, struct genl_info *info)
rc = nfc_start_poll(dev, im_protocols, tm_protocols);
if (!rc)
- dev->genl_data.poll_req_pid = info->snd_pid;
+ dev->genl_data.poll_req_portid = info->snd_portid;
mutex_unlock(&dev->genl_data.genl_data_mutex);
@@ -645,13 +645,13 @@ static int nfc_genl_stop_poll(struct sk_buff *skb, struct genl_info *info)
mutex_lock(&dev->genl_data.genl_data_mutex);
- if (dev->genl_data.poll_req_pid != info->snd_pid) {
+ if (dev->genl_data.poll_req_portid != info->snd_portid) {
rc = -EBUSY;
goto out;
}
rc = nfc_stop_poll(dev);
- dev->genl_data.poll_req_pid = 0;
+ dev->genl_data.poll_req_portid = 0;
out:
mutex_unlock(&dev->genl_data.genl_data_mutex);
@@ -761,38 +761,70 @@ static struct genl_ops nfc_genl_ops[] = {
},
};
-static int nfc_genl_rcv_nl_event(struct notifier_block *this,
- unsigned long event, void *ptr)
+
+struct urelease_work {
+ struct work_struct w;
+ int portid;
+};
+
+static void nfc_urelease_event_work(struct work_struct *work)
{
- struct netlink_notify *n = ptr;
+ struct urelease_work *w = container_of(work, struct urelease_work, w);
struct class_dev_iter iter;
struct nfc_dev *dev;
- if (event != NETLINK_URELEASE || n->protocol != NETLINK_GENERIC)
- goto out;
+ pr_debug("portid %d\n", w->portid);
- pr_debug("NETLINK_URELEASE event from id %d\n", n->pid);
+ mutex_lock(&nfc_devlist_mutex);
nfc_device_iter_init(&iter);
dev = nfc_device_iter_next(&iter);
while (dev) {
- if (dev->genl_data.poll_req_pid == n->pid) {
+ mutex_lock(&dev->genl_data.genl_data_mutex);
+
+ if (dev->genl_data.poll_req_portid == w->portid) {
nfc_stop_poll(dev);
- dev->genl_data.poll_req_pid = 0;
+ dev->genl_data.poll_req_portid = 0;
}
+
+ mutex_unlock(&dev->genl_data.genl_data_mutex);
+
dev = nfc_device_iter_next(&iter);
}
nfc_device_iter_exit(&iter);
+ mutex_unlock(&nfc_devlist_mutex);
+
+ kfree(w);
+}
+
+static int nfc_genl_rcv_nl_event(struct notifier_block *this,
+ unsigned long event, void *ptr)
+{
+ struct netlink_notify *n = ptr;
+ struct urelease_work *w;
+
+ if (event != NETLINK_URELEASE || n->protocol != NETLINK_GENERIC)
+ goto out;
+
+ pr_debug("NETLINK_URELEASE event from id %d\n", n->portid);
+
+ w = kmalloc(sizeof(*w), GFP_ATOMIC);
+ if (w) {
+ INIT_WORK((struct work_struct *) w, nfc_urelease_event_work);
+ w->portid = n->portid;
+ schedule_work((struct work_struct *) w);
+ }
+
out:
return NOTIFY_DONE;
}
void nfc_genl_data_init(struct nfc_genl_data *genl_data)
{
- genl_data->poll_req_pid = 0;
+ genl_data->poll_req_portid = 0;
mutex_init(&genl_data->genl_data_mutex);
}
diff --git a/net/openvswitch/actions.c b/net/openvswitch/actions.c
index 320fa0e6951a..08114478cb85 100644
--- a/net/openvswitch/actions.c
+++ b/net/openvswitch/actions.c
@@ -45,7 +45,7 @@ static int make_writable(struct sk_buff *skb, int write_len)
return pskb_expand_head(skb, 0, 0, GFP_ATOMIC);
}
-/* remove VLAN header from packet and update csum accrodingly. */
+/* remove VLAN header from packet and update csum accordingly. */
static int __pop_vlan_tci(struct sk_buff *skb, __be16 *current_tci)
{
struct vlan_hdr *vhdr;
@@ -266,7 +266,7 @@ static int do_output(struct datapath *dp, struct sk_buff *skb, int out_port)
if (unlikely(!skb))
return -ENOMEM;
- vport = rcu_dereference(dp->ports[out_port]);
+ vport = ovs_vport_rcu(dp, out_port);
if (unlikely(!vport)) {
kfree_skb(skb);
return -ENODEV;
@@ -286,7 +286,7 @@ static int output_userspace(struct datapath *dp, struct sk_buff *skb,
upcall.cmd = OVS_PACKET_CMD_ACTION;
upcall.key = &OVS_CB(skb)->flow->key;
upcall.userdata = NULL;
- upcall.pid = 0;
+ upcall.portid = 0;
for (a = nla_data(attr), rem = nla_len(attr); rem > 0;
a = nla_next(a, &rem)) {
@@ -296,7 +296,7 @@ static int output_userspace(struct datapath *dp, struct sk_buff *skb,
break;
case OVS_USERSPACE_ATTR_PID:
- upcall.pid = nla_get_u32(a);
+ upcall.portid = nla_get_u32(a);
break;
}
}
@@ -325,9 +325,6 @@ static int sample(struct datapath *dp, struct sk_buff *skb,
}
}
- if (!acts_list)
- return 0;
-
return do_execute_actions(dp, skb, nla_data(acts_list),
nla_len(acts_list), true);
}
diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c
index d8277d29e710..4c4b62ccc7d7 100644
--- a/net/openvswitch/datapath.c
+++ b/net/openvswitch/datapath.c
@@ -49,12 +49,29 @@
#include <linux/dmi.h>
#include <linux/workqueue.h>
#include <net/genetlink.h>
+#include <net/net_namespace.h>
+#include <net/netns/generic.h>
#include "datapath.h"
#include "flow.h"
#include "vport-internal_dev.h"
/**
+ * struct ovs_net - Per net-namespace data for ovs.
+ * @dps: List of datapaths to enable dumping them all out.
+ * Protected by genl_mutex.
+ */
+struct ovs_net {
+ struct list_head dps;
+};
+
+static int ovs_net_id __read_mostly;
+
+#define REHASH_FLOW_INTERVAL (10 * 60 * HZ)
+static void rehash_flow_table(struct work_struct *work);
+static DECLARE_DELAYED_WORK(rehash_flow_wq, rehash_flow_table);
+
+/**
* DOC: Locking:
*
* Writes to device state (add/remove datapath, port, set operations on vports,
@@ -71,29 +88,21 @@
* each other.
*/
-/* Global list of datapaths to enable dumping them all out.
- * Protected by genl_mutex.
- */
-static LIST_HEAD(dps);
-
-#define REHASH_FLOW_INTERVAL (10 * 60 * HZ)
-static void rehash_flow_table(struct work_struct *work);
-static DECLARE_DELAYED_WORK(rehash_flow_wq, rehash_flow_table);
-
static struct vport *new_vport(const struct vport_parms *);
-static int queue_gso_packets(int dp_ifindex, struct sk_buff *,
+static int queue_gso_packets(struct net *, int dp_ifindex, struct sk_buff *,
const struct dp_upcall_info *);
-static int queue_userspace_packet(int dp_ifindex, struct sk_buff *,
+static int queue_userspace_packet(struct net *, int dp_ifindex,
+ struct sk_buff *,
const struct dp_upcall_info *);
/* Must be called with rcu_read_lock, genl_mutex, or RTNL lock. */
-static struct datapath *get_dp(int dp_ifindex)
+static struct datapath *get_dp(struct net *net, int dp_ifindex)
{
struct datapath *dp = NULL;
struct net_device *dev;
rcu_read_lock();
- dev = dev_get_by_index_rcu(&init_net, dp_ifindex);
+ dev = dev_get_by_index_rcu(net, dp_ifindex);
if (dev) {
struct vport *vport = ovs_internal_dev_get_vport(dev);
if (vport)
@@ -107,7 +116,7 @@ static struct datapath *get_dp(int dp_ifindex)
/* Must be called with rcu_read_lock or RTNL lock. */
const char *ovs_dp_name(const struct datapath *dp)
{
- struct vport *vport = rcu_dereference_rtnl(dp->ports[OVSP_LOCAL]);
+ struct vport *vport = ovs_vport_rtnl_rcu(dp, OVSP_LOCAL);
return vport->ops->get_name(vport);
}
@@ -118,7 +127,7 @@ static int get_dpifindex(struct datapath *dp)
rcu_read_lock();
- local = rcu_dereference(dp->ports[OVSP_LOCAL]);
+ local = ovs_vport_rcu(dp, OVSP_LOCAL);
if (local)
ifindex = local->ops->get_ifindex(local);
else
@@ -135,9 +144,31 @@ static void destroy_dp_rcu(struct rcu_head *rcu)
ovs_flow_tbl_destroy((__force struct flow_table *)dp->table);
free_percpu(dp->stats_percpu);
+ release_net(ovs_dp_get_net(dp));
+ kfree(dp->ports);
kfree(dp);
}
+static struct hlist_head *vport_hash_bucket(const struct datapath *dp,
+ u16 port_no)
+{
+ return &dp->ports[port_no & (DP_VPORT_HASH_BUCKETS - 1)];
+}
+
+struct vport *ovs_lookup_vport(const struct datapath *dp, u16 port_no)
+{
+ struct vport *vport;
+ struct hlist_node *n;
+ struct hlist_head *head;
+
+ head = vport_hash_bucket(dp, port_no);
+ hlist_for_each_entry_rcu(vport, n, head, dp_hash_node) {
+ if (vport->port_no == port_no)
+ return vport;
+ }
+ return NULL;
+}
+
/* Called with RTNL lock and genl_lock. */
static struct vport *new_vport(const struct vport_parms *parms)
{
@@ -146,9 +177,9 @@ static struct vport *new_vport(const struct vport_parms *parms)
vport = ovs_vport_add(parms);
if (!IS_ERR(vport)) {
struct datapath *dp = parms->dp;
+ struct hlist_head *head = vport_hash_bucket(dp, vport->port_no);
- rcu_assign_pointer(dp->ports[parms->port_no], vport);
- list_add(&vport->node, &dp->port_list);
+ hlist_add_head_rcu(&vport->dp_hash_node, head);
}
return vport;
@@ -160,8 +191,7 @@ void ovs_dp_detach_port(struct vport *p)
ASSERT_RTNL();
/* First drop references to device. */
- list_del(&p->node);
- rcu_assign_pointer(p->dp->ports[p->port_no], NULL);
+ hlist_del_rcu(&p->dp_hash_node);
/* Then destroy it. */
ovs_vport_del(p);
@@ -195,7 +225,7 @@ void ovs_dp_process_received_packet(struct vport *p, struct sk_buff *skb)
upcall.cmd = OVS_PACKET_CMD_MISS;
upcall.key = &key;
upcall.userdata = NULL;
- upcall.pid = p->upcall_pid;
+ upcall.portid = p->upcall_portid;
ovs_dp_upcall(dp, skb, &upcall);
consume_skb(skb);
stats_counter = &stats->n_missed;
@@ -220,17 +250,18 @@ static struct genl_family dp_packet_genl_family = {
.hdrsize = sizeof(struct ovs_header),
.name = OVS_PACKET_FAMILY,
.version = OVS_PACKET_VERSION,
- .maxattr = OVS_PACKET_ATTR_MAX
+ .maxattr = OVS_PACKET_ATTR_MAX,
+ .netnsok = true
};
int ovs_dp_upcall(struct datapath *dp, struct sk_buff *skb,
- const struct dp_upcall_info *upcall_info)
+ const struct dp_upcall_info *upcall_info)
{
struct dp_stats_percpu *stats;
int dp_ifindex;
int err;
- if (upcall_info->pid == 0) {
+ if (upcall_info->portid == 0) {
err = -ENOTCONN;
goto err;
}
@@ -242,9 +273,9 @@ int ovs_dp_upcall(struct datapath *dp, struct sk_buff *skb,
}
if (!skb_is_gso(skb))
- err = queue_userspace_packet(dp_ifindex, skb, upcall_info);
+ err = queue_userspace_packet(ovs_dp_get_net(dp), dp_ifindex, skb, upcall_info);
else
- err = queue_gso_packets(dp_ifindex, skb, upcall_info);
+ err = queue_gso_packets(ovs_dp_get_net(dp), dp_ifindex, skb, upcall_info);
if (err)
goto err;
@@ -260,7 +291,8 @@ err:
return err;
}
-static int queue_gso_packets(int dp_ifindex, struct sk_buff *skb,
+static int queue_gso_packets(struct net *net, int dp_ifindex,
+ struct sk_buff *skb,
const struct dp_upcall_info *upcall_info)
{
unsigned short gso_type = skb_shinfo(skb)->gso_type;
@@ -276,7 +308,7 @@ static int queue_gso_packets(int dp_ifindex, struct sk_buff *skb,
/* Queue all of the segments. */
skb = segs;
do {
- err = queue_userspace_packet(dp_ifindex, skb, upcall_info);
+ err = queue_userspace_packet(net, dp_ifindex, skb, upcall_info);
if (err)
break;
@@ -306,7 +338,8 @@ static int queue_gso_packets(int dp_ifindex, struct sk_buff *skb,
return err;
}
-static int queue_userspace_packet(int dp_ifindex, struct sk_buff *skb,
+static int queue_userspace_packet(struct net *net, int dp_ifindex,
+ struct sk_buff *skb,
const struct dp_upcall_info *upcall_info)
{
struct ovs_header *upcall;
@@ -362,7 +395,7 @@ static int queue_userspace_packet(int dp_ifindex, struct sk_buff *skb,
skb_copy_and_csum_dev(skb, nla_data(nla));
- err = genlmsg_unicast(&init_net, user_skb, upcall_info->pid);
+ err = genlmsg_unicast(net, user_skb, upcall_info->portid);
out:
kfree_skb(nskb);
@@ -370,15 +403,10 @@ out:
}
/* Called with genl_mutex. */
-static int flush_flows(int dp_ifindex)
+static int flush_flows(struct datapath *dp)
{
struct flow_table *old_table;
struct flow_table *new_table;
- struct datapath *dp;
-
- dp = get_dp(dp_ifindex);
- if (!dp)
- return -ENODEV;
old_table = genl_dereference(dp->table);
new_table = ovs_flow_tbl_alloc(TBL_MIN_BUCKETS);
@@ -425,10 +453,10 @@ static int validate_sample(const struct nlattr *attr,
static int validate_tp_port(const struct sw_flow_key *flow_key)
{
if (flow_key->eth.type == htons(ETH_P_IP)) {
- if (flow_key->ipv4.tp.src && flow_key->ipv4.tp.dst)
+ if (flow_key->ipv4.tp.src || flow_key->ipv4.tp.dst)
return 0;
} else if (flow_key->eth.type == htons(ETH_P_IPV6)) {
- if (flow_key->ipv6.tp.src && flow_key->ipv6.tp.dst)
+ if (flow_key->ipv6.tp.src || flow_key->ipv6.tp.dst)
return 0;
}
@@ -460,7 +488,7 @@ static int validate_set(const struct nlattr *a,
if (flow_key->eth.type != htons(ETH_P_IP))
return -EINVAL;
- if (!flow_key->ipv4.addr.src || !flow_key->ipv4.addr.dst)
+ if (!flow_key->ip.proto)
return -EINVAL;
ipv4_key = nla_data(ovs_key);
@@ -668,7 +696,7 @@ static int ovs_packet_cmd_execute(struct sk_buff *skb, struct genl_info *info)
packet->priority = flow->key.phy.priority;
rcu_read_lock();
- dp = get_dp(ovs_header->dp_ifindex);
+ dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex);
err = -ENODEV;
if (!dp)
goto err_unlock;
@@ -742,7 +770,8 @@ static struct genl_family dp_flow_genl_family = {
.hdrsize = sizeof(struct ovs_header),
.name = OVS_FLOW_FAMILY,
.version = OVS_FLOW_VERSION,
- .maxattr = OVS_FLOW_ATTR_MAX
+ .maxattr = OVS_FLOW_ATTR_MAX,
+ .netnsok = true
};
static struct genl_multicast_group ovs_dp_flow_multicast_group = {
@@ -751,7 +780,7 @@ static struct genl_multicast_group ovs_dp_flow_multicast_group = {
/* Called with genl_lock. */
static int ovs_flow_cmd_fill_info(struct sw_flow *flow, struct datapath *dp,
- struct sk_buff *skb, u32 pid,
+ struct sk_buff *skb, u32 portid,
u32 seq, u32 flags, u8 cmd)
{
const int skb_orig_len = skb->len;
@@ -766,7 +795,7 @@ static int ovs_flow_cmd_fill_info(struct sw_flow *flow, struct datapath *dp,
sf_acts = rcu_dereference_protected(flow->sf_acts,
lockdep_genl_is_held());
- ovs_header = genlmsg_put(skb, pid, seq, &dp_flow_genl_family, flags, cmd);
+ ovs_header = genlmsg_put(skb, portid, seq, &dp_flow_genl_family, flags, cmd);
if (!ovs_header)
return -EMSGSIZE;
@@ -850,7 +879,7 @@ static struct sk_buff *ovs_flow_cmd_alloc_info(struct sw_flow *flow)
static struct sk_buff *ovs_flow_cmd_build_info(struct sw_flow *flow,
struct datapath *dp,
- u32 pid, u32 seq, u8 cmd)
+ u32 portid, u32 seq, u8 cmd)
{
struct sk_buff *skb;
int retval;
@@ -859,7 +888,7 @@ static struct sk_buff *ovs_flow_cmd_build_info(struct sw_flow *flow,
if (!skb)
return ERR_PTR(-ENOMEM);
- retval = ovs_flow_cmd_fill_info(flow, dp, skb, pid, seq, 0, cmd);
+ retval = ovs_flow_cmd_fill_info(flow, dp, skb, portid, seq, 0, cmd);
BUG_ON(retval < 0);
return skb;
}
@@ -894,7 +923,7 @@ static int ovs_flow_cmd_new_or_set(struct sk_buff *skb, struct genl_info *info)
goto error;
}
- dp = get_dp(ovs_header->dp_ifindex);
+ dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex);
error = -ENODEV;
if (!dp)
goto error;
@@ -941,7 +970,7 @@ static int ovs_flow_cmd_new_or_set(struct sk_buff *skb, struct genl_info *info)
flow->hash = ovs_flow_hash(&key, key_len);
ovs_flow_tbl_insert(table, flow);
- reply = ovs_flow_cmd_build_info(flow, dp, info->snd_pid,
+ reply = ovs_flow_cmd_build_info(flow, dp, info->snd_portid,
info->snd_seq,
OVS_FLOW_CMD_NEW);
} else {
@@ -979,7 +1008,7 @@ static int ovs_flow_cmd_new_or_set(struct sk_buff *skb, struct genl_info *info)
ovs_flow_deferred_free_acts(old_acts);
}
- reply = ovs_flow_cmd_build_info(flow, dp, info->snd_pid,
+ reply = ovs_flow_cmd_build_info(flow, dp, info->snd_portid,
info->snd_seq, OVS_FLOW_CMD_NEW);
/* Clear stats. */
@@ -991,11 +1020,11 @@ static int ovs_flow_cmd_new_or_set(struct sk_buff *skb, struct genl_info *info)
}
if (!IS_ERR(reply))
- genl_notify(reply, genl_info_net(info), info->snd_pid,
+ genl_notify(reply, genl_info_net(info), info->snd_portid,
ovs_dp_flow_multicast_group.id, info->nlhdr,
GFP_KERNEL);
else
- netlink_set_err(init_net.genl_sock, 0,
+ netlink_set_err(sock_net(skb->sk)->genl_sock, 0,
ovs_dp_flow_multicast_group.id, PTR_ERR(reply));
return 0;
@@ -1023,7 +1052,7 @@ static int ovs_flow_cmd_get(struct sk_buff *skb, struct genl_info *info)
if (err)
return err;
- dp = get_dp(ovs_header->dp_ifindex);
+ dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex);
if (!dp)
return -ENODEV;
@@ -1032,7 +1061,7 @@ static int ovs_flow_cmd_get(struct sk_buff *skb, struct genl_info *info)
if (!flow)
return -ENOENT;
- reply = ovs_flow_cmd_build_info(flow, dp, info->snd_pid,
+ reply = ovs_flow_cmd_build_info(flow, dp, info->snd_portid,
info->snd_seq, OVS_FLOW_CMD_NEW);
if (IS_ERR(reply))
return PTR_ERR(reply);
@@ -1052,16 +1081,17 @@ static int ovs_flow_cmd_del(struct sk_buff *skb, struct genl_info *info)
int err;
int key_len;
+ dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex);
+ if (!dp)
+ return -ENODEV;
+
if (!a[OVS_FLOW_ATTR_KEY])
- return flush_flows(ovs_header->dp_ifindex);
+ return flush_flows(dp);
+
err = ovs_flow_from_nlattrs(&key, &key_len, a[OVS_FLOW_ATTR_KEY]);
if (err)
return err;
- dp = get_dp(ovs_header->dp_ifindex);
- if (!dp)
- return -ENODEV;
-
table = genl_dereference(dp->table);
flow = ovs_flow_tbl_lookup(table, &key, key_len);
if (!flow)
@@ -1073,13 +1103,13 @@ static int ovs_flow_cmd_del(struct sk_buff *skb, struct genl_info *info)
ovs_flow_tbl_remove(table, flow);
- err = ovs_flow_cmd_fill_info(flow, dp, reply, info->snd_pid,
+ err = ovs_flow_cmd_fill_info(flow, dp, reply, info->snd_portid,
info->snd_seq, 0, OVS_FLOW_CMD_DEL);
BUG_ON(err < 0);
ovs_flow_deferred_free(flow);
- genl_notify(reply, genl_info_net(info), info->snd_pid,
+ genl_notify(reply, genl_info_net(info), info->snd_portid,
ovs_dp_flow_multicast_group.id, info->nlhdr, GFP_KERNEL);
return 0;
}
@@ -1090,7 +1120,7 @@ static int ovs_flow_cmd_dump(struct sk_buff *skb, struct netlink_callback *cb)
struct datapath *dp;
struct flow_table *table;
- dp = get_dp(ovs_header->dp_ifindex);
+ dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex);
if (!dp)
return -ENODEV;
@@ -1107,7 +1137,7 @@ static int ovs_flow_cmd_dump(struct sk_buff *skb, struct netlink_callback *cb)
break;
if (ovs_flow_cmd_fill_info(flow, dp, skb,
- NETLINK_CB(cb->skb).pid,
+ NETLINK_CB(cb->skb).portid,
cb->nlh->nlmsg_seq, NLM_F_MULTI,
OVS_FLOW_CMD_NEW) < 0)
break;
@@ -1152,7 +1182,8 @@ static struct genl_family dp_datapath_genl_family = {
.hdrsize = sizeof(struct ovs_header),
.name = OVS_DATAPATH_FAMILY,
.version = OVS_DATAPATH_VERSION,
- .maxattr = OVS_DP_ATTR_MAX
+ .maxattr = OVS_DP_ATTR_MAX,
+ .netnsok = true
};
static struct genl_multicast_group ovs_dp_datapath_multicast_group = {
@@ -1160,13 +1191,13 @@ static struct genl_multicast_group ovs_dp_datapath_multicast_group = {
};
static int ovs_dp_cmd_fill_info(struct datapath *dp, struct sk_buff *skb,
- u32 pid, u32 seq, u32 flags, u8 cmd)
+ u32 portid, u32 seq, u32 flags, u8 cmd)
{
struct ovs_header *ovs_header;
struct ovs_dp_stats dp_stats;
int err;
- ovs_header = genlmsg_put(skb, pid, seq, &dp_datapath_genl_family,
+ ovs_header = genlmsg_put(skb, portid, seq, &dp_datapath_genl_family,
flags, cmd);
if (!ovs_header)
goto error;
@@ -1191,7 +1222,7 @@ error:
return -EMSGSIZE;
}
-static struct sk_buff *ovs_dp_cmd_build_info(struct datapath *dp, u32 pid,
+static struct sk_buff *ovs_dp_cmd_build_info(struct datapath *dp, u32 portid,
u32 seq, u8 cmd)
{
struct sk_buff *skb;
@@ -1201,7 +1232,7 @@ static struct sk_buff *ovs_dp_cmd_build_info(struct datapath *dp, u32 pid,
if (!skb)
return ERR_PTR(-ENOMEM);
- retval = ovs_dp_cmd_fill_info(dp, skb, pid, seq, 0, cmd);
+ retval = ovs_dp_cmd_fill_info(dp, skb, portid, seq, 0, cmd);
if (retval < 0) {
kfree_skb(skb);
return ERR_PTR(retval);
@@ -1210,18 +1241,19 @@ static struct sk_buff *ovs_dp_cmd_build_info(struct datapath *dp, u32 pid,
}
/* Called with genl_mutex and optionally with RTNL lock also. */
-static struct datapath *lookup_datapath(struct ovs_header *ovs_header,
+static struct datapath *lookup_datapath(struct net *net,
+ struct ovs_header *ovs_header,
struct nlattr *a[OVS_DP_ATTR_MAX + 1])
{
struct datapath *dp;
if (!a[OVS_DP_ATTR_NAME])
- dp = get_dp(ovs_header->dp_ifindex);
+ dp = get_dp(net, ovs_header->dp_ifindex);
else {
struct vport *vport;
rcu_read_lock();
- vport = ovs_vport_locate(nla_data(a[OVS_DP_ATTR_NAME]));
+ vport = ovs_vport_locate(net, nla_data(a[OVS_DP_ATTR_NAME]));
dp = vport && vport->port_no == OVSP_LOCAL ? vport->dp : NULL;
rcu_read_unlock();
}
@@ -1235,22 +1267,21 @@ static int ovs_dp_cmd_new(struct sk_buff *skb, struct genl_info *info)
struct sk_buff *reply;
struct datapath *dp;
struct vport *vport;
- int err;
+ struct ovs_net *ovs_net;
+ int err, i;
err = -EINVAL;
if (!a[OVS_DP_ATTR_NAME] || !a[OVS_DP_ATTR_UPCALL_PID])
goto err;
rtnl_lock();
- err = -ENODEV;
- if (!try_module_get(THIS_MODULE))
- goto err_unlock_rtnl;
err = -ENOMEM;
dp = kzalloc(sizeof(*dp), GFP_KERNEL);
if (dp == NULL)
- goto err_put_module;
- INIT_LIST_HEAD(&dp->port_list);
+ goto err_unlock_rtnl;
+
+ ovs_dp_set_net(dp, hold_net(sock_net(skb->sk)));
/* Allocate table. */
err = -ENOMEM;
@@ -1264,13 +1295,23 @@ static int ovs_dp_cmd_new(struct sk_buff *skb, struct genl_info *info)
goto err_destroy_table;
}
+ dp->ports = kmalloc(DP_VPORT_HASH_BUCKETS * sizeof(struct hlist_head),
+ GFP_KERNEL);
+ if (!dp->ports) {
+ err = -ENOMEM;
+ goto err_destroy_percpu;
+ }
+
+ for (i = 0; i < DP_VPORT_HASH_BUCKETS; i++)
+ INIT_HLIST_HEAD(&dp->ports[i]);
+
/* Set up our datapath device. */
parms.name = nla_data(a[OVS_DP_ATTR_NAME]);
parms.type = OVS_VPORT_TYPE_INTERNAL;
parms.options = NULL;
parms.dp = dp;
parms.port_no = OVSP_LOCAL;
- parms.upcall_pid = nla_get_u32(a[OVS_DP_ATTR_UPCALL_PID]);
+ parms.upcall_portid = nla_get_u32(a[OVS_DP_ATTR_UPCALL_PID]);
vport = new_vport(&parms);
if (IS_ERR(vport)) {
@@ -1278,64 +1319,59 @@ static int ovs_dp_cmd_new(struct sk_buff *skb, struct genl_info *info)
if (err == -EBUSY)
err = -EEXIST;
- goto err_destroy_percpu;
+ goto err_destroy_ports_array;
}
- reply = ovs_dp_cmd_build_info(dp, info->snd_pid,
+ reply = ovs_dp_cmd_build_info(dp, info->snd_portid,
info->snd_seq, OVS_DP_CMD_NEW);
err = PTR_ERR(reply);
if (IS_ERR(reply))
goto err_destroy_local_port;
- list_add_tail(&dp->list_node, &dps);
+ ovs_net = net_generic(ovs_dp_get_net(dp), ovs_net_id);
+ list_add_tail(&dp->list_node, &ovs_net->dps);
rtnl_unlock();
- genl_notify(reply, genl_info_net(info), info->snd_pid,
+ genl_notify(reply, genl_info_net(info), info->snd_portid,
ovs_dp_datapath_multicast_group.id, info->nlhdr,
GFP_KERNEL);
return 0;
err_destroy_local_port:
- ovs_dp_detach_port(rtnl_dereference(dp->ports[OVSP_LOCAL]));
+ ovs_dp_detach_port(ovs_vport_rtnl(dp, OVSP_LOCAL));
+err_destroy_ports_array:
+ kfree(dp->ports);
err_destroy_percpu:
free_percpu(dp->stats_percpu);
err_destroy_table:
ovs_flow_tbl_destroy(genl_dereference(dp->table));
err_free_dp:
+ release_net(ovs_dp_get_net(dp));
kfree(dp);
-err_put_module:
- module_put(THIS_MODULE);
err_unlock_rtnl:
rtnl_unlock();
err:
return err;
}
-static int ovs_dp_cmd_del(struct sk_buff *skb, struct genl_info *info)
+/* Called with genl_mutex. */
+static void __dp_destroy(struct datapath *dp)
{
- struct vport *vport, *next_vport;
- struct sk_buff *reply;
- struct datapath *dp;
- int err;
+ int i;
rtnl_lock();
- dp = lookup_datapath(info->userhdr, info->attrs);
- err = PTR_ERR(dp);
- if (IS_ERR(dp))
- goto exit_unlock;
- reply = ovs_dp_cmd_build_info(dp, info->snd_pid,
- info->snd_seq, OVS_DP_CMD_DEL);
- err = PTR_ERR(reply);
- if (IS_ERR(reply))
- goto exit_unlock;
+ for (i = 0; i < DP_VPORT_HASH_BUCKETS; i++) {
+ struct vport *vport;
+ struct hlist_node *node, *n;
- list_for_each_entry_safe(vport, next_vport, &dp->port_list, node)
- if (vport->port_no != OVSP_LOCAL)
- ovs_dp_detach_port(vport);
+ hlist_for_each_entry_safe(vport, node, n, &dp->ports[i], dp_hash_node)
+ if (vport->port_no != OVSP_LOCAL)
+ ovs_dp_detach_port(vport);
+ }
list_del(&dp->list_node);
- ovs_dp_detach_port(rtnl_dereference(dp->ports[OVSP_LOCAL]));
+ ovs_dp_detach_port(ovs_vport_rtnl(dp, OVSP_LOCAL));
/* rtnl_unlock() will wait until all the references to devices that
* are pending unregistration have been dropped. We do it here to
@@ -1345,17 +1381,32 @@ static int ovs_dp_cmd_del(struct sk_buff *skb, struct genl_info *info)
rtnl_unlock();
call_rcu(&dp->rcu, destroy_dp_rcu);
- module_put(THIS_MODULE);
+}
+
+static int ovs_dp_cmd_del(struct sk_buff *skb, struct genl_info *info)
+{
+ struct sk_buff *reply;
+ struct datapath *dp;
+ int err;
+
+ dp = lookup_datapath(sock_net(skb->sk), info->userhdr, info->attrs);
+ err = PTR_ERR(dp);
+ if (IS_ERR(dp))
+ return err;
- genl_notify(reply, genl_info_net(info), info->snd_pid,
+ reply = ovs_dp_cmd_build_info(dp, info->snd_portid,
+ info->snd_seq, OVS_DP_CMD_DEL);
+ err = PTR_ERR(reply);
+ if (IS_ERR(reply))
+ return err;
+
+ __dp_destroy(dp);
+
+ genl_notify(reply, genl_info_net(info), info->snd_portid,
ovs_dp_datapath_multicast_group.id, info->nlhdr,
GFP_KERNEL);
return 0;
-
-exit_unlock:
- rtnl_unlock();
- return err;
}
static int ovs_dp_cmd_set(struct sk_buff *skb, struct genl_info *info)
@@ -1364,20 +1415,20 @@ static int ovs_dp_cmd_set(struct sk_buff *skb, struct genl_info *info)
struct datapath *dp;
int err;
- dp = lookup_datapath(info->userhdr, info->attrs);
+ dp = lookup_datapath(sock_net(skb->sk), info->userhdr, info->attrs);
if (IS_ERR(dp))
return PTR_ERR(dp);
- reply = ovs_dp_cmd_build_info(dp, info->snd_pid,
+ reply = ovs_dp_cmd_build_info(dp, info->snd_portid,
info->snd_seq, OVS_DP_CMD_NEW);
if (IS_ERR(reply)) {
err = PTR_ERR(reply);
- netlink_set_err(init_net.genl_sock, 0,
+ netlink_set_err(sock_net(skb->sk)->genl_sock, 0,
ovs_dp_datapath_multicast_group.id, err);
return 0;
}
- genl_notify(reply, genl_info_net(info), info->snd_pid,
+ genl_notify(reply, genl_info_net(info), info->snd_portid,
ovs_dp_datapath_multicast_group.id, info->nlhdr,
GFP_KERNEL);
@@ -1389,11 +1440,11 @@ static int ovs_dp_cmd_get(struct sk_buff *skb, struct genl_info *info)
struct sk_buff *reply;
struct datapath *dp;
- dp = lookup_datapath(info->userhdr, info->attrs);
+ dp = lookup_datapath(sock_net(skb->sk), info->userhdr, info->attrs);
if (IS_ERR(dp))
return PTR_ERR(dp);
- reply = ovs_dp_cmd_build_info(dp, info->snd_pid,
+ reply = ovs_dp_cmd_build_info(dp, info->snd_portid,
info->snd_seq, OVS_DP_CMD_NEW);
if (IS_ERR(reply))
return PTR_ERR(reply);
@@ -1403,13 +1454,14 @@ static int ovs_dp_cmd_get(struct sk_buff *skb, struct genl_info *info)
static int ovs_dp_cmd_dump(struct sk_buff *skb, struct netlink_callback *cb)
{
+ struct ovs_net *ovs_net = net_generic(sock_net(skb->sk), ovs_net_id);
struct datapath *dp;
int skip = cb->args[0];
int i = 0;
- list_for_each_entry(dp, &dps, list_node) {
+ list_for_each_entry(dp, &ovs_net->dps, list_node) {
if (i >= skip &&
- ovs_dp_cmd_fill_info(dp, skb, NETLINK_CB(cb->skb).pid,
+ ovs_dp_cmd_fill_info(dp, skb, NETLINK_CB(cb->skb).portid,
cb->nlh->nlmsg_seq, NLM_F_MULTI,
OVS_DP_CMD_NEW) < 0)
break;
@@ -1459,7 +1511,8 @@ static struct genl_family dp_vport_genl_family = {
.hdrsize = sizeof(struct ovs_header),
.name = OVS_VPORT_FAMILY,
.version = OVS_VPORT_VERSION,
- .maxattr = OVS_VPORT_ATTR_MAX
+ .maxattr = OVS_VPORT_ATTR_MAX,
+ .netnsok = true
};
struct genl_multicast_group ovs_dp_vport_multicast_group = {
@@ -1468,13 +1521,13 @@ struct genl_multicast_group ovs_dp_vport_multicast_group = {
/* Called with RTNL lock or RCU read lock. */
static int ovs_vport_cmd_fill_info(struct vport *vport, struct sk_buff *skb,
- u32 pid, u32 seq, u32 flags, u8 cmd)
+ u32 portid, u32 seq, u32 flags, u8 cmd)
{
struct ovs_header *ovs_header;
struct ovs_vport_stats vport_stats;
int err;
- ovs_header = genlmsg_put(skb, pid, seq, &dp_vport_genl_family,
+ ovs_header = genlmsg_put(skb, portid, seq, &dp_vport_genl_family,
flags, cmd);
if (!ovs_header)
return -EMSGSIZE;
@@ -1484,7 +1537,7 @@ static int ovs_vport_cmd_fill_info(struct vport *vport, struct sk_buff *skb,
if (nla_put_u32(skb, OVS_VPORT_ATTR_PORT_NO, vport->port_no) ||
nla_put_u32(skb, OVS_VPORT_ATTR_TYPE, vport->ops->type) ||
nla_put_string(skb, OVS_VPORT_ATTR_NAME, vport->ops->get_name(vport)) ||
- nla_put_u32(skb, OVS_VPORT_ATTR_UPCALL_PID, vport->upcall_pid))
+ nla_put_u32(skb, OVS_VPORT_ATTR_UPCALL_PID, vport->upcall_portid))
goto nla_put_failure;
ovs_vport_get_stats(vport, &vport_stats);
@@ -1506,7 +1559,7 @@ error:
}
/* Called with RTNL lock or RCU read lock. */
-struct sk_buff *ovs_vport_cmd_build_info(struct vport *vport, u32 pid,
+struct sk_buff *ovs_vport_cmd_build_info(struct vport *vport, u32 portid,
u32 seq, u8 cmd)
{
struct sk_buff *skb;
@@ -1516,7 +1569,7 @@ struct sk_buff *ovs_vport_cmd_build_info(struct vport *vport, u32 pid,
if (!skb)
return ERR_PTR(-ENOMEM);
- retval = ovs_vport_cmd_fill_info(vport, skb, pid, seq, 0, cmd);
+ retval = ovs_vport_cmd_fill_info(vport, skb, portid, seq, 0, cmd);
if (retval < 0) {
kfree_skb(skb);
return ERR_PTR(retval);
@@ -1525,14 +1578,15 @@ struct sk_buff *ovs_vport_cmd_build_info(struct vport *vport, u32 pid,
}
/* Called with RTNL lock or RCU read lock. */
-static struct vport *lookup_vport(struct ovs_header *ovs_header,
+static struct vport *lookup_vport(struct net *net,
+ struct ovs_header *ovs_header,
struct nlattr *a[OVS_VPORT_ATTR_MAX + 1])
{
struct datapath *dp;
struct vport *vport;
if (a[OVS_VPORT_ATTR_NAME]) {
- vport = ovs_vport_locate(nla_data(a[OVS_VPORT_ATTR_NAME]));
+ vport = ovs_vport_locate(net, nla_data(a[OVS_VPORT_ATTR_NAME]));
if (!vport)
return ERR_PTR(-ENODEV);
if (ovs_header->dp_ifindex &&
@@ -1545,11 +1599,11 @@ static struct vport *lookup_vport(struct ovs_header *ovs_header,
if (port_no >= DP_MAX_PORTS)
return ERR_PTR(-EFBIG);
- dp = get_dp(ovs_header->dp_ifindex);
+ dp = get_dp(net, ovs_header->dp_ifindex);
if (!dp)
return ERR_PTR(-ENODEV);
- vport = rcu_dereference_rtnl(dp->ports[port_no]);
+ vport = ovs_vport_rtnl_rcu(dp, port_no);
if (!vport)
return ERR_PTR(-ENOENT);
return vport;
@@ -1574,7 +1628,7 @@ static int ovs_vport_cmd_new(struct sk_buff *skb, struct genl_info *info)
goto exit;
rtnl_lock();
- dp = get_dp(ovs_header->dp_ifindex);
+ dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex);
err = -ENODEV;
if (!dp)
goto exit_unlock;
@@ -1586,7 +1640,7 @@ static int ovs_vport_cmd_new(struct sk_buff *skb, struct genl_info *info)
if (port_no >= DP_MAX_PORTS)
goto exit_unlock;
- vport = rtnl_dereference(dp->ports[port_no]);
+ vport = ovs_vport_rtnl_rcu(dp, port_no);
err = -EBUSY;
if (vport)
goto exit_unlock;
@@ -1596,7 +1650,7 @@ static int ovs_vport_cmd_new(struct sk_buff *skb, struct genl_info *info)
err = -EFBIG;
goto exit_unlock;
}
- vport = rtnl_dereference(dp->ports[port_no]);
+ vport = ovs_vport_rtnl(dp, port_no);
if (!vport)
break;
}
@@ -1607,21 +1661,21 @@ static int ovs_vport_cmd_new(struct sk_buff *skb, struct genl_info *info)
parms.options = a[OVS_VPORT_ATTR_OPTIONS];
parms.dp = dp;
parms.port_no = port_no;
- parms.upcall_pid = nla_get_u32(a[OVS_VPORT_ATTR_UPCALL_PID]);
+ parms.upcall_portid = nla_get_u32(a[OVS_VPORT_ATTR_UPCALL_PID]);
vport = new_vport(&parms);
err = PTR_ERR(vport);
if (IS_ERR(vport))
goto exit_unlock;
- reply = ovs_vport_cmd_build_info(vport, info->snd_pid, info->snd_seq,
+ reply = ovs_vport_cmd_build_info(vport, info->snd_portid, info->snd_seq,
OVS_VPORT_CMD_NEW);
if (IS_ERR(reply)) {
err = PTR_ERR(reply);
ovs_dp_detach_port(vport);
goto exit_unlock;
}
- genl_notify(reply, genl_info_net(info), info->snd_pid,
+ genl_notify(reply, genl_info_net(info), info->snd_portid,
ovs_dp_vport_multicast_group.id, info->nlhdr, GFP_KERNEL);
exit_unlock:
@@ -1638,7 +1692,7 @@ static int ovs_vport_cmd_set(struct sk_buff *skb, struct genl_info *info)
int err;
rtnl_lock();
- vport = lookup_vport(info->userhdr, a);
+ vport = lookup_vport(sock_net(skb->sk), info->userhdr, a);
err = PTR_ERR(vport);
if (IS_ERR(vport))
goto exit_unlock;
@@ -1653,17 +1707,17 @@ static int ovs_vport_cmd_set(struct sk_buff *skb, struct genl_info *info)
if (err)
goto exit_unlock;
if (a[OVS_VPORT_ATTR_UPCALL_PID])
- vport->upcall_pid = nla_get_u32(a[OVS_VPORT_ATTR_UPCALL_PID]);
+ vport->upcall_portid = nla_get_u32(a[OVS_VPORT_ATTR_UPCALL_PID]);
- reply = ovs_vport_cmd_build_info(vport, info->snd_pid, info->snd_seq,
+ reply = ovs_vport_cmd_build_info(vport, info->snd_portid, info->snd_seq,
OVS_VPORT_CMD_NEW);
if (IS_ERR(reply)) {
- netlink_set_err(init_net.genl_sock, 0,
+ netlink_set_err(sock_net(skb->sk)->genl_sock, 0,
ovs_dp_vport_multicast_group.id, PTR_ERR(reply));
goto exit_unlock;
}
- genl_notify(reply, genl_info_net(info), info->snd_pid,
+ genl_notify(reply, genl_info_net(info), info->snd_portid,
ovs_dp_vport_multicast_group.id, info->nlhdr, GFP_KERNEL);
exit_unlock:
@@ -1679,7 +1733,7 @@ static int ovs_vport_cmd_del(struct sk_buff *skb, struct genl_info *info)
int err;
rtnl_lock();
- vport = lookup_vport(info->userhdr, a);
+ vport = lookup_vport(sock_net(skb->sk), info->userhdr, a);
err = PTR_ERR(vport);
if (IS_ERR(vport))
goto exit_unlock;
@@ -1689,7 +1743,7 @@ static int ovs_vport_cmd_del(struct sk_buff *skb, struct genl_info *info)
goto exit_unlock;
}
- reply = ovs_vport_cmd_build_info(vport, info->snd_pid, info->snd_seq,
+ reply = ovs_vport_cmd_build_info(vport, info->snd_portid, info->snd_seq,
OVS_VPORT_CMD_DEL);
err = PTR_ERR(reply);
if (IS_ERR(reply))
@@ -1697,7 +1751,7 @@ static int ovs_vport_cmd_del(struct sk_buff *skb, struct genl_info *info)
ovs_dp_detach_port(vport);
- genl_notify(reply, genl_info_net(info), info->snd_pid,
+ genl_notify(reply, genl_info_net(info), info->snd_portid,
ovs_dp_vport_multicast_group.id, info->nlhdr, GFP_KERNEL);
exit_unlock:
@@ -1714,12 +1768,12 @@ static int ovs_vport_cmd_get(struct sk_buff *skb, struct genl_info *info)
int err;
rcu_read_lock();
- vport = lookup_vport(ovs_header, a);
+ vport = lookup_vport(sock_net(skb->sk), ovs_header, a);
err = PTR_ERR(vport);
if (IS_ERR(vport))
goto exit_unlock;
- reply = ovs_vport_cmd_build_info(vport, info->snd_pid, info->snd_seq,
+ reply = ovs_vport_cmd_build_info(vport, info->snd_portid, info->snd_seq,
OVS_VPORT_CMD_NEW);
err = PTR_ERR(reply);
if (IS_ERR(reply))
@@ -1738,54 +1792,39 @@ static int ovs_vport_cmd_dump(struct sk_buff *skb, struct netlink_callback *cb)
{
struct ovs_header *ovs_header = genlmsg_data(nlmsg_data(cb->nlh));
struct datapath *dp;
- u32 port_no;
- int retval;
+ int bucket = cb->args[0], skip = cb->args[1];
+ int i, j = 0;
- dp = get_dp(ovs_header->dp_ifindex);
+ dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex);
if (!dp)
return -ENODEV;
rcu_read_lock();
- for (port_no = cb->args[0]; port_no < DP_MAX_PORTS; port_no++) {
+ for (i = bucket; i < DP_VPORT_HASH_BUCKETS; i++) {
struct vport *vport;
-
- vport = rcu_dereference(dp->ports[port_no]);
- if (!vport)
- continue;
-
- if (ovs_vport_cmd_fill_info(vport, skb, NETLINK_CB(cb->skb).pid,
- cb->nlh->nlmsg_seq, NLM_F_MULTI,
- OVS_VPORT_CMD_NEW) < 0)
- break;
- }
- rcu_read_unlock();
-
- cb->args[0] = port_no;
- retval = skb->len;
-
- return retval;
-}
-
-static void rehash_flow_table(struct work_struct *work)
-{
- struct datapath *dp;
-
- genl_lock();
-
- list_for_each_entry(dp, &dps, list_node) {
- struct flow_table *old_table = genl_dereference(dp->table);
- struct flow_table *new_table;
-
- new_table = ovs_flow_tbl_rehash(old_table);
- if (!IS_ERR(new_table)) {
- rcu_assign_pointer(dp->table, new_table);
- ovs_flow_tbl_deferred_destroy(old_table);
+ struct hlist_node *n;
+
+ j = 0;
+ hlist_for_each_entry_rcu(vport, n, &dp->ports[i], dp_hash_node) {
+ if (j >= skip &&
+ ovs_vport_cmd_fill_info(vport, skb,
+ NETLINK_CB(cb->skb).portid,
+ cb->nlh->nlmsg_seq,
+ NLM_F_MULTI,
+ OVS_VPORT_CMD_NEW) < 0)
+ goto out;
+
+ j++;
}
+ skip = 0;
}
+out:
+ rcu_read_unlock();
- genl_unlock();
+ cb->args[0] = i;
+ cb->args[1] = j;
- schedule_delayed_work(&rehash_flow_wq, REHASH_FLOW_INTERVAL);
+ return skb->len;
}
static struct genl_ops dp_vport_genl_ops[] = {
@@ -1872,6 +1911,59 @@ error:
return err;
}
+static void rehash_flow_table(struct work_struct *work)
+{
+ struct datapath *dp;
+ struct net *net;
+
+ genl_lock();
+ rtnl_lock();
+ for_each_net(net) {
+ struct ovs_net *ovs_net = net_generic(net, ovs_net_id);
+
+ list_for_each_entry(dp, &ovs_net->dps, list_node) {
+ struct flow_table *old_table = genl_dereference(dp->table);
+ struct flow_table *new_table;
+
+ new_table = ovs_flow_tbl_rehash(old_table);
+ if (!IS_ERR(new_table)) {
+ rcu_assign_pointer(dp->table, new_table);
+ ovs_flow_tbl_deferred_destroy(old_table);
+ }
+ }
+ }
+ rtnl_unlock();
+ genl_unlock();
+
+ schedule_delayed_work(&rehash_flow_wq, REHASH_FLOW_INTERVAL);
+}
+
+static int __net_init ovs_init_net(struct net *net)
+{
+ struct ovs_net *ovs_net = net_generic(net, ovs_net_id);
+
+ INIT_LIST_HEAD(&ovs_net->dps);
+ return 0;
+}
+
+static void __net_exit ovs_exit_net(struct net *net)
+{
+ struct ovs_net *ovs_net = net_generic(net, ovs_net_id);
+ struct datapath *dp, *dp_next;
+
+ genl_lock();
+ list_for_each_entry_safe(dp, dp_next, &ovs_net->dps, list_node)
+ __dp_destroy(dp);
+ genl_unlock();
+}
+
+static struct pernet_operations ovs_net_ops = {
+ .init = ovs_init_net,
+ .exit = ovs_exit_net,
+ .id = &ovs_net_id,
+ .size = sizeof(struct ovs_net),
+};
+
static int __init dp_init(void)
{
struct sk_buff *dummy_skb;
@@ -1889,10 +1981,14 @@ static int __init dp_init(void)
if (err)
goto error_flow_exit;
- err = register_netdevice_notifier(&ovs_dp_device_notifier);
+ err = register_pernet_device(&ovs_net_ops);
if (err)
goto error_vport_exit;
+ err = register_netdevice_notifier(&ovs_dp_device_notifier);
+ if (err)
+ goto error_netns_exit;
+
err = dp_register_genl();
if (err < 0)
goto error_unreg_notifier;
@@ -1903,6 +1999,8 @@ static int __init dp_init(void)
error_unreg_notifier:
unregister_netdevice_notifier(&ovs_dp_device_notifier);
+error_netns_exit:
+ unregister_pernet_device(&ovs_net_ops);
error_vport_exit:
ovs_vport_exit();
error_flow_exit:
@@ -1914,9 +2012,10 @@ error:
static void dp_cleanup(void)
{
cancel_delayed_work_sync(&rehash_flow_wq);
- rcu_barrier();
dp_unregister_genl(ARRAY_SIZE(dp_genl_families));
unregister_netdevice_notifier(&ovs_dp_device_notifier);
+ unregister_pernet_device(&ovs_net_ops);
+ rcu_barrier();
ovs_vport_exit();
ovs_flow_exit();
}
diff --git a/net/openvswitch/datapath.h b/net/openvswitch/datapath.h
index c1105c147531..031dfbf37c93 100644
--- a/net/openvswitch/datapath.h
+++ b/net/openvswitch/datapath.h
@@ -27,10 +27,11 @@
#include <linux/u64_stats_sync.h>
#include "flow.h"
+#include "vport.h"
-struct vport;
+#define DP_MAX_PORTS USHRT_MAX
+#define DP_VPORT_HASH_BUCKETS 1024
-#define DP_MAX_PORTS 1024
#define SAMPLE_ACTION_DEPTH 3
/**
@@ -58,11 +59,10 @@ struct dp_stats_percpu {
* @list_node: Element in global 'dps' list.
* @n_flows: Number of flows currently in flow table.
* @table: Current flow table. Protected by genl_lock and RCU.
- * @ports: Map from port number to &struct vport. %OVSP_LOCAL port
- * always exists, other ports may be %NULL. Protected by RTNL and RCU.
- * @port_list: List of all ports in @ports in arbitrary order. RTNL required
- * to iterate or modify.
+ * @ports: Hash table for ports. %OVSP_LOCAL port always exists. Protected by
+ * RTNL and RCU.
* @stats_percpu: Per-CPU datapath statistics.
+ * @net: Reference to net namespace.
*
* Context: See the comment on locking at the top of datapath.c for additional
* locking information.
@@ -75,13 +75,37 @@ struct datapath {
struct flow_table __rcu *table;
/* Switch ports. */
- struct vport __rcu *ports[DP_MAX_PORTS];
- struct list_head port_list;
+ struct hlist_head *ports;
/* Stats. */
struct dp_stats_percpu __percpu *stats_percpu;
+
+#ifdef CONFIG_NET_NS
+ /* Network namespace ref. */
+ struct net *net;
+#endif
};
+struct vport *ovs_lookup_vport(const struct datapath *dp, u16 port_no);
+
+static inline struct vport *ovs_vport_rcu(const struct datapath *dp, int port_no)
+{
+ WARN_ON_ONCE(!rcu_read_lock_held());
+ return ovs_lookup_vport(dp, port_no);
+}
+
+static inline struct vport *ovs_vport_rtnl_rcu(const struct datapath *dp, int port_no)
+{
+ WARN_ON_ONCE(!rcu_read_lock_held() && !rtnl_is_locked());
+ return ovs_lookup_vport(dp, port_no);
+}
+
+static inline struct vport *ovs_vport_rtnl(const struct datapath *dp, int port_no)
+{
+ ASSERT_RTNL();
+ return ovs_lookup_vport(dp, port_no);
+}
+
/**
* struct ovs_skb_cb - OVS data in skb CB
* @flow: The flow associated with this packet. May be %NULL if no flow.
@@ -105,9 +129,19 @@ struct dp_upcall_info {
u8 cmd;
const struct sw_flow_key *key;
const struct nlattr *userdata;
- u32 pid;
+ u32 portid;
};
+static inline struct net *ovs_dp_get_net(struct datapath *dp)
+{
+ return read_pnet(&dp->net);
+}
+
+static inline void ovs_dp_set_net(struct datapath *dp, struct net *net)
+{
+ write_pnet(&dp->net, net);
+}
+
extern struct notifier_block ovs_dp_device_notifier;
extern struct genl_multicast_group ovs_dp_vport_multicast_group;
diff --git a/net/openvswitch/dp_notify.c b/net/openvswitch/dp_notify.c
index 36dcee8fc84a..5558350e0d33 100644
--- a/net/openvswitch/dp_notify.c
+++ b/net/openvswitch/dp_notify.c
@@ -41,19 +41,21 @@ static int dp_device_event(struct notifier_block *unused, unsigned long event,
case NETDEV_UNREGISTER:
if (!ovs_is_internal_dev(dev)) {
struct sk_buff *notify;
+ struct datapath *dp = vport->dp;
notify = ovs_vport_cmd_build_info(vport, 0, 0,
OVS_VPORT_CMD_DEL);
ovs_dp_detach_port(vport);
if (IS_ERR(notify)) {
- netlink_set_err(init_net.genl_sock, 0,
+ netlink_set_err(ovs_dp_get_net(dp)->genl_sock, 0,
ovs_dp_vport_multicast_group.id,
PTR_ERR(notify));
break;
}
- genlmsg_multicast(notify, 0, ovs_dp_vport_multicast_group.id,
- GFP_KERNEL);
+ genlmsg_multicast_netns(ovs_dp_get_net(dp), notify, 0,
+ ovs_dp_vport_multicast_group.id,
+ GFP_KERNEL);
}
break;
}
diff --git a/net/openvswitch/flow.c b/net/openvswitch/flow.c
index b7f38b161909..98c70630ad06 100644
--- a/net/openvswitch/flow.c
+++ b/net/openvswitch/flow.c
@@ -203,10 +203,7 @@ struct sw_flow_actions *ovs_flow_actions_alloc(const struct nlattr *actions)
int actions_len = nla_len(actions);
struct sw_flow_actions *sfa;
- /* At least DP_MAX_PORTS actions are required to be able to flood a
- * packet to every port. Factor of 2 allows for setting VLAN tags,
- * etc. */
- if (actions_len > 2 * DP_MAX_PORTS * nla_total_size(4))
+ if (actions_len > MAX_ACTIONS_BUFSIZE)
return ERR_PTR(-EINVAL);
sfa = kmalloc(sizeof(*sfa) + actions_len, GFP_KERNEL);
@@ -427,19 +424,11 @@ void ovs_flow_deferred_free(struct sw_flow *flow)
call_rcu(&flow->rcu, rcu_free_flow_callback);
}
-/* RCU callback used by ovs_flow_deferred_free_acts. */
-static void rcu_free_acts_callback(struct rcu_head *rcu)
-{
- struct sw_flow_actions *sf_acts = container_of(rcu,
- struct sw_flow_actions, rcu);
- kfree(sf_acts);
-}
-
/* Schedules 'sf_acts' to be freed after the next RCU grace period.
* The caller must hold rcu_read_lock for this to be sensible. */
void ovs_flow_deferred_free_acts(struct sw_flow_actions *sf_acts)
{
- call_rcu(&sf_acts->rcu, rcu_free_acts_callback);
+ kfree_rcu(sf_acts, rcu);
}
static int parse_vlan(struct sk_buff *skb, struct sw_flow_key *key)
@@ -1000,7 +989,7 @@ int ovs_flow_from_nlattrs(struct sw_flow_key *swkey, int *key_lenp,
swkey->phy.in_port = in_port;
attrs &= ~(1 << OVS_KEY_ATTR_IN_PORT);
} else {
- swkey->phy.in_port = USHRT_MAX;
+ swkey->phy.in_port = DP_MAX_PORTS;
}
/* Data attributes. */
@@ -1143,7 +1132,7 @@ int ovs_flow_metadata_from_nlattrs(u32 *priority, u16 *in_port,
const struct nlattr *nla;
int rem;
- *in_port = USHRT_MAX;
+ *in_port = DP_MAX_PORTS;
*priority = 0;
nla_for_each_nested(nla, attr, rem) {
@@ -1180,7 +1169,7 @@ int ovs_flow_to_nlattrs(const struct sw_flow_key *swkey, struct sk_buff *skb)
nla_put_u32(skb, OVS_KEY_ATTR_PRIORITY, swkey->phy.priority))
goto nla_put_failure;
- if (swkey->phy.in_port != USHRT_MAX &&
+ if (swkey->phy.in_port != DP_MAX_PORTS &&
nla_put_u32(skb, OVS_KEY_ATTR_IN_PORT, swkey->phy.in_port))
goto nla_put_failure;
diff --git a/net/openvswitch/flow.h b/net/openvswitch/flow.h
index 9b75617ca4e0..14a324eb017b 100644
--- a/net/openvswitch/flow.h
+++ b/net/openvswitch/flow.h
@@ -43,7 +43,7 @@ struct sw_flow_actions {
struct sw_flow_key {
struct {
u32 priority; /* Packet QoS priority. */
- u16 in_port; /* Input switch port (or USHRT_MAX). */
+ u16 in_port; /* Input switch port (or DP_MAX_PORTS). */
} phy;
struct {
u8 src[ETH_ALEN]; /* Ethernet source address. */
@@ -145,15 +145,17 @@ u64 ovs_flow_used_time(unsigned long flow_jiffies);
* OVS_KEY_ATTR_PRIORITY 4 -- 4 8
* OVS_KEY_ATTR_IN_PORT 4 -- 4 8
* OVS_KEY_ATTR_ETHERNET 12 -- 4 16
+ * OVS_KEY_ATTR_ETHERTYPE 2 2 4 8 (outer VLAN ethertype)
* OVS_KEY_ATTR_8021Q 4 -- 4 8
- * OVS_KEY_ATTR_ETHERTYPE 2 2 4 8
+ * OVS_KEY_ATTR_ENCAP 0 -- 4 4 (VLAN encapsulation)
+ * OVS_KEY_ATTR_ETHERTYPE 2 2 4 8 (inner VLAN ethertype)
* OVS_KEY_ATTR_IPV6 40 -- 4 44
* OVS_KEY_ATTR_ICMPV6 2 2 4 8
* OVS_KEY_ATTR_ND 28 -- 4 32
* -------------------------------------------------
- * total 132
+ * total 144
*/
-#define FLOW_BUFSIZE 132
+#define FLOW_BUFSIZE 144
int ovs_flow_to_nlattrs(const struct sw_flow_key *, struct sk_buff *);
int ovs_flow_from_nlattrs(struct sw_flow_key *swkey, int *key_lenp,
@@ -161,6 +163,7 @@ int ovs_flow_from_nlattrs(struct sw_flow_key *swkey, int *key_lenp,
int ovs_flow_metadata_from_nlattrs(u32 *priority, u16 *in_port,
const struct nlattr *);
+#define MAX_ACTIONS_BUFSIZE (16 * 1024)
#define TBL_MIN_BUCKETS 1024
struct flow_table {
diff --git a/net/openvswitch/vport-internal_dev.c b/net/openvswitch/vport-internal_dev.c
index 4061b9ee07f7..5d460c37df07 100644
--- a/net/openvswitch/vport-internal_dev.c
+++ b/net/openvswitch/vport-internal_dev.c
@@ -144,7 +144,7 @@ static void do_setup(struct net_device *netdev)
netdev->tx_queue_len = 0;
netdev->features = NETIF_F_LLTX | NETIF_F_SG | NETIF_F_FRAGLIST |
- NETIF_F_HIGHDMA | NETIF_F_HW_CSUM | NETIF_F_TSO;
+ NETIF_F_HIGHDMA | NETIF_F_HW_CSUM | NETIF_F_TSO;
netdev->vlan_features = netdev->features;
netdev->features |= NETIF_F_HW_VLAN_TX;
@@ -175,9 +175,14 @@ static struct vport *internal_dev_create(const struct vport_parms *parms)
goto error_free_vport;
}
+ dev_net_set(netdev_vport->dev, ovs_dp_get_net(vport->dp));
internal_dev = internal_dev_priv(netdev_vport->dev);
internal_dev->vport = vport;
+ /* Restrict bridge port to current netns. */
+ if (vport->port_no == OVSP_LOCAL)
+ netdev_vport->dev->features |= NETIF_F_NETNS_LOCAL;
+
err = register_netdevice(netdev_vport->dev);
if (err)
goto error_free_netdev;
diff --git a/net/openvswitch/vport-netdev.c b/net/openvswitch/vport-netdev.c
index 6ea3551cc78c..3c1e58ba714b 100644
--- a/net/openvswitch/vport-netdev.c
+++ b/net/openvswitch/vport-netdev.c
@@ -83,7 +83,7 @@ static struct vport *netdev_create(const struct vport_parms *parms)
netdev_vport = netdev_vport_priv(vport);
- netdev_vport->dev = dev_get_by_name(&init_net, parms->name);
+ netdev_vport->dev = dev_get_by_name(ovs_dp_get_net(vport->dp), parms->name);
if (!netdev_vport->dev) {
err = -ENODEV;
goto error_free_vport;
diff --git a/net/openvswitch/vport.c b/net/openvswitch/vport.c
index 6140336e79d7..03779e8a2622 100644
--- a/net/openvswitch/vport.c
+++ b/net/openvswitch/vport.c
@@ -16,10 +16,10 @@
* 02110-1301, USA
*/
-#include <linux/dcache.h>
#include <linux/etherdevice.h>
#include <linux/if.h>
#include <linux/if_vlan.h>
+#include <linux/jhash.h>
#include <linux/kernel.h>
#include <linux/list.h>
#include <linux/mutex.h>
@@ -27,7 +27,9 @@
#include <linux/rcupdate.h>
#include <linux/rtnetlink.h>
#include <linux/compat.h>
+#include <net/net_namespace.h>
+#include "datapath.h"
#include "vport.h"
#include "vport-internal_dev.h"
@@ -67,9 +69,9 @@ void ovs_vport_exit(void)
kfree(dev_table);
}
-static struct hlist_head *hash_bucket(const char *name)
+static struct hlist_head *hash_bucket(struct net *net, const char *name)
{
- unsigned int hash = full_name_hash(name, strlen(name));
+ unsigned int hash = jhash(name, strlen(name), (unsigned long) net);
return &dev_table[hash & (VPORT_HASH_BUCKETS - 1)];
}
@@ -80,14 +82,15 @@ static struct hlist_head *hash_bucket(const char *name)
*
* Must be called with RTNL or RCU read lock.
*/
-struct vport *ovs_vport_locate(const char *name)
+struct vport *ovs_vport_locate(struct net *net, const char *name)
{
- struct hlist_head *bucket = hash_bucket(name);
+ struct hlist_head *bucket = hash_bucket(net, name);
struct vport *vport;
struct hlist_node *node;
hlist_for_each_entry_rcu(vport, node, bucket, hash_node)
- if (!strcmp(name, vport->ops->get_name(vport)))
+ if (!strcmp(name, vport->ops->get_name(vport)) &&
+ net_eq(ovs_dp_get_net(vport->dp), net))
return vport;
return NULL;
@@ -122,8 +125,9 @@ struct vport *ovs_vport_alloc(int priv_size, const struct vport_ops *ops,
vport->dp = parms->dp;
vport->port_no = parms->port_no;
- vport->upcall_pid = parms->upcall_pid;
+ vport->upcall_portid = parms->upcall_portid;
vport->ops = ops;
+ INIT_HLIST_NODE(&vport->dp_hash_node);
vport->percpu_stats = alloc_percpu(struct vport_percpu_stats);
if (!vport->percpu_stats) {
@@ -170,14 +174,17 @@ struct vport *ovs_vport_add(const struct vport_parms *parms)
for (i = 0; i < ARRAY_SIZE(vport_ops_list); i++) {
if (vport_ops_list[i]->type == parms->type) {
+ struct hlist_head *bucket;
+
vport = vport_ops_list[i]->create(parms);
if (IS_ERR(vport)) {
err = PTR_ERR(vport);
goto out;
}
- hlist_add_head_rcu(&vport->hash_node,
- hash_bucket(vport->ops->get_name(vport)));
+ bucket = hash_bucket(ovs_dp_get_net(vport->dp),
+ vport->ops->get_name(vport));
+ hlist_add_head_rcu(&vport->hash_node, bucket);
return vport;
}
}
@@ -391,7 +398,7 @@ void ovs_vport_record_error(struct vport *vport, enum vport_err_type err_type)
case VPORT_E_TX_ERROR:
vport->err_stats.tx_errors++;
break;
- };
+ }
spin_unlock(&vport->stats_lock);
}
diff --git a/net/openvswitch/vport.h b/net/openvswitch/vport.h
index aac680ca2b06..3f7961ea3c56 100644
--- a/net/openvswitch/vport.h
+++ b/net/openvswitch/vport.h
@@ -20,6 +20,7 @@
#define VPORT_H 1
#include <linux/list.h>
+#include <linux/netlink.h>
#include <linux/openvswitch.h>
#include <linux/skbuff.h>
#include <linux/spinlock.h>
@@ -38,7 +39,7 @@ void ovs_vport_exit(void);
struct vport *ovs_vport_add(const struct vport_parms *);
void ovs_vport_del(struct vport *);
-struct vport *ovs_vport_locate(const char *name);
+struct vport *ovs_vport_locate(struct net *net, const char *name);
void ovs_vport_get_stats(struct vport *, struct ovs_vport_stats *);
@@ -69,10 +70,10 @@ struct vport_err_stats {
* @rcu: RCU callback head for deferred destruction.
* @port_no: Index into @dp's @ports array.
* @dp: Datapath to which this port belongs.
- * @node: Element in @dp's @port_list.
- * @upcall_pid: The Netlink port to use for packets received on this port that
+ * @upcall_portid: The Netlink port to use for packets received on this port that
* miss the flow table.
* @hash_node: Element in @dev_table hash table in vport.c.
+ * @dp_hash_node: Element in @datapath->ports hash table in datapath.c.
* @ops: Class structure.
* @percpu_stats: Points to per-CPU statistics used and maintained by vport
* @stats_lock: Protects @err_stats;
@@ -82,10 +83,10 @@ struct vport {
struct rcu_head rcu;
u16 port_no;
struct datapath *dp;
- struct list_head node;
- u32 upcall_pid;
+ u32 upcall_portid;
struct hlist_node hash_node;
+ struct hlist_node dp_hash_node;
const struct vport_ops *ops;
struct vport_percpu_stats __percpu *percpu_stats;
@@ -112,7 +113,7 @@ struct vport_parms {
/* For ovs_vport_alloc(). */
struct datapath *dp;
u16 port_no;
- u32 upcall_pid;
+ u32 upcall_portid;
};
/**
diff --git a/net/packet/Kconfig b/net/packet/Kconfig
index 0060e3b396b7..cc55b35f80e5 100644
--- a/net/packet/Kconfig
+++ b/net/packet/Kconfig
@@ -14,3 +14,11 @@ config PACKET
be called af_packet.
If unsure, say Y.
+
+config PACKET_DIAG
+ tristate "Packet: sockets monitoring interface"
+ depends on PACKET
+ default n
+ ---help---
+ Support for PF_PACKET sockets monitoring interface used by the ss tool.
+ If unsure, say Y.
diff --git a/net/packet/Makefile b/net/packet/Makefile
index 81183eabfdec..9df61347a3c3 100644
--- a/net/packet/Makefile
+++ b/net/packet/Makefile
@@ -3,3 +3,5 @@
#
obj-$(CONFIG_PACKET) += af_packet.o
+obj-$(CONFIG_PACKET_DIAG) += af_packet_diag.o
+af_packet_diag-y += diag.o
diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index ceaca7c134a0..94060edbbd70 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -93,6 +93,8 @@
#include <net/inet_common.h>
#endif
+#include "internal.h"
+
/*
Assumptions:
- if device has no dev->hard_header routine, it adds and removes ll header
@@ -146,14 +148,6 @@ dev->hard_header == NULL (ll header is added by device, we cannot control it)
/* Private packet socket structures. */
-struct packet_mclist {
- struct packet_mclist *next;
- int ifindex;
- int count;
- unsigned short type;
- unsigned short alen;
- unsigned char addr[MAX_ADDR_LEN];
-};
/* identical to struct packet_mreq except it has
* a longer address field.
*/
@@ -175,63 +169,7 @@ static int packet_set_ring(struct sock *sk, union tpacket_req_u *req_u,
#define BLK_PLUS_PRIV(sz_of_priv) \
(BLK_HDR_LEN + ALIGN((sz_of_priv), V3_ALIGNMENT))
-/* kbdq - kernel block descriptor queue */
-struct tpacket_kbdq_core {
- struct pgv *pkbdq;
- unsigned int feature_req_word;
- unsigned int hdrlen;
- unsigned char reset_pending_on_curr_blk;
- unsigned char delete_blk_timer;
- unsigned short kactive_blk_num;
- unsigned short blk_sizeof_priv;
-
- /* last_kactive_blk_num:
- * trick to see if user-space has caught up
- * in order to avoid refreshing timer when every single pkt arrives.
- */
- unsigned short last_kactive_blk_num;
-
- char *pkblk_start;
- char *pkblk_end;
- int kblk_size;
- unsigned int knum_blocks;
- uint64_t knxt_seq_num;
- char *prev;
- char *nxt_offset;
- struct sk_buff *skb;
-
- atomic_t blk_fill_in_prog;
-
- /* Default is set to 8ms */
-#define DEFAULT_PRB_RETIRE_TOV (8)
-
- unsigned short retire_blk_tov;
- unsigned short version;
- unsigned long tov_in_jiffies;
-
- /* timer to retire an outstanding block */
- struct timer_list retire_blk_timer;
-};
-
#define PGV_FROM_VMALLOC 1
-struct pgv {
- char *buffer;
-};
-
-struct packet_ring_buffer {
- struct pgv *pg_vec;
- unsigned int head;
- unsigned int frames_per_block;
- unsigned int frame_size;
- unsigned int frame_max;
-
- unsigned int pg_vec_order;
- unsigned int pg_vec_pages;
- unsigned int pg_vec_len;
-
- struct tpacket_kbdq_core prb_bdqc;
- atomic_t pending;
-};
#define BLOCK_STATUS(x) ((x)->hdr.bh1.block_status)
#define BLOCK_NUM_PKTS(x) ((x)->hdr.bh1.num_pkts)
@@ -269,52 +207,6 @@ static void prb_fill_vlan_info(struct tpacket_kbdq_core *,
struct tpacket3_hdr *);
static void packet_flush_mclist(struct sock *sk);
-struct packet_fanout;
-struct packet_sock {
- /* struct sock has to be the first member of packet_sock */
- struct sock sk;
- struct packet_fanout *fanout;
- struct tpacket_stats stats;
- union tpacket_stats_u stats_u;
- struct packet_ring_buffer rx_ring;
- struct packet_ring_buffer tx_ring;
- int copy_thresh;
- spinlock_t bind_lock;
- struct mutex pg_vec_lock;
- unsigned int running:1, /* prot_hook is attached*/
- auxdata:1,
- origdev:1,
- has_vnet_hdr:1;
- int ifindex; /* bound device */
- __be16 num;
- struct packet_mclist *mclist;
- atomic_t mapped;
- enum tpacket_versions tp_version;
- unsigned int tp_hdrlen;
- unsigned int tp_reserve;
- unsigned int tp_loss:1;
- unsigned int tp_tstamp;
- struct packet_type prot_hook ____cacheline_aligned_in_smp;
-};
-
-#define PACKET_FANOUT_MAX 256
-
-struct packet_fanout {
-#ifdef CONFIG_NET_NS
- struct net *net;
-#endif
- unsigned int num_members;
- u16 id;
- u8 type;
- u8 defrag;
- atomic_t rr_cur;
- struct list_head list;
- struct sock *arr[PACKET_FANOUT_MAX];
- spinlock_t lock;
- atomic_t sk_ref;
- struct packet_type prot_hook ____cacheline_aligned_in_smp;
-};
-
struct packet_skb_cb {
unsigned int origlen;
union {
@@ -334,11 +226,6 @@ struct packet_skb_cb {
(((x)->kactive_blk_num < ((x)->knum_blocks-1)) ? \
((x)->kactive_blk_num+1) : 0)
-static struct packet_sock *pkt_sk(struct sock *sk)
-{
- return (struct packet_sock *)sk;
-}
-
static void __fanout_unlink(struct sock *sk, struct packet_sock *po);
static void __fanout_link(struct sock *sk, struct packet_sock *po);
@@ -968,7 +855,8 @@ static void prb_fill_vlan_info(struct tpacket_kbdq_core *pkc,
ppd->hv1.tp_vlan_tci = vlan_tx_tag_get(pkc->skb);
ppd->tp_status = TP_STATUS_VLAN_VALID;
} else {
- ppd->hv1.tp_vlan_tci = ppd->tp_status = 0;
+ ppd->hv1.tp_vlan_tci = 0;
+ ppd->tp_status = TP_STATUS_AVAILABLE;
}
}
@@ -1079,7 +967,7 @@ static void *packet_current_rx_frame(struct packet_sock *po,
default:
WARN(1, "TPACKET version not supported\n");
BUG();
- return 0;
+ return NULL;
}
}
@@ -1243,7 +1131,8 @@ static int packet_rcv_fanout(struct sk_buff *skb, struct net_device *dev,
return po->prot_hook.func(skb, dev, &po->prot_hook, orig_dev);
}
-static DEFINE_MUTEX(fanout_mutex);
+DEFINE_MUTEX(fanout_mutex);
+EXPORT_SYMBOL_GPL(fanout_mutex);
static LIST_HEAD(fanout_list);
static void __fanout_link(struct sock *sk, struct packet_sock *po)
@@ -1273,6 +1162,14 @@ static void __fanout_unlink(struct sock *sk, struct packet_sock *po)
spin_unlock(&f->lock);
}
+static bool match_fanout_group(struct packet_type *ptype, struct sock * sk)
+{
+ if (ptype->af_packet_priv == (void*)((struct packet_sock *)sk)->fanout)
+ return true;
+
+ return false;
+}
+
static int fanout_add(struct sock *sk, u16 id, u16 type_flags)
{
struct packet_sock *po = pkt_sk(sk);
@@ -1325,6 +1222,7 @@ static int fanout_add(struct sock *sk, u16 id, u16 type_flags)
match->prot_hook.dev = po->prot_hook.dev;
match->prot_hook.func = packet_rcv_fanout;
match->prot_hook.af_packet_priv = match;
+ match->prot_hook.id_match = match_fanout_group;
dev_add_pack(&match->prot_hook);
list_add(&match->list, &fanout_list);
}
@@ -1355,9 +1253,9 @@ static void fanout_release(struct sock *sk)
if (!f)
return;
+ mutex_lock(&fanout_mutex);
po->fanout = NULL;
- mutex_lock(&fanout_mutex);
if (atomic_dec_and_test(&f->sk_ref)) {
list_del(&f->list);
dev_remove_pack(&f->prot_hook);
@@ -1936,7 +1834,6 @@ static void tpacket_destruct_skb(struct sk_buff *skb)
if (likely(po->tx_ring.pg_vec)) {
ph = skb_shinfo(skb)->destructor_arg;
- BUG_ON(__packet_get_status(po, ph) != TP_STATUS_SENDING);
BUG_ON(atomic_read(&po->tx_ring.pending) == 0);
atomic_dec(&po->tx_ring.pending);
__packet_set_status(po, ph, TP_STATUS_AVAILABLE);
@@ -2055,7 +1952,7 @@ static int tpacket_snd(struct packet_sock *po, struct msghdr *msg)
int tp_len, size_max;
unsigned char *addr;
int len_sum = 0;
- int status = 0;
+ int status = TP_STATUS_AVAILABLE;
int hlen, tlen;
mutex_lock(&po->pg_vec_lock);
@@ -2420,10 +2317,13 @@ static int packet_release(struct socket *sock)
net = sock_net(sk);
po = pkt_sk(sk);
- spin_lock_bh(&net->packet.sklist_lock);
+ mutex_lock(&net->packet.sklist_lock);
sk_del_node_init_rcu(sk);
+ mutex_unlock(&net->packet.sklist_lock);
+
+ preempt_disable();
sock_prot_inuse_add(net, sk->sk_prot, -1);
- spin_unlock_bh(&net->packet.sklist_lock);
+ preempt_enable();
spin_lock(&po->bind_lock);
unregister_prot_hook(sk, false);
@@ -2622,10 +2522,13 @@ static int packet_create(struct net *net, struct socket *sock, int protocol,
register_prot_hook(sk);
}
- spin_lock_bh(&net->packet.sklist_lock);
+ mutex_lock(&net->packet.sklist_lock);
sk_add_node_rcu(sk, &net->packet.sklist);
+ mutex_unlock(&net->packet.sklist_lock);
+
+ preempt_disable();
sock_prot_inuse_add(net, &packet_proto, 1);
- spin_unlock_bh(&net->packet.sklist_lock);
+ preempt_enable();
return 0;
out:
@@ -3846,7 +3749,7 @@ static int packet_seq_show(struct seq_file *seq, void *v)
po->ifindex,
po->running,
atomic_read(&s->sk_rmem_alloc),
- sock_i_uid(s),
+ from_kuid_munged(seq_user_ns(seq), sock_i_uid(s)),
sock_i_ino(s));
}
@@ -3878,7 +3781,7 @@ static const struct file_operations packet_seq_fops = {
static int __net_init packet_net_init(struct net *net)
{
- spin_lock_init(&net->packet.sklist_lock);
+ mutex_init(&net->packet.sklist_lock);
INIT_HLIST_HEAD(&net->packet.sklist);
if (!proc_net_fops_create(net, "packet", 0, &packet_seq_fops))
diff --git a/net/packet/diag.c b/net/packet/diag.c
new file mode 100644
index 000000000000..8db6e21c46bd
--- /dev/null
+++ b/net/packet/diag.c
@@ -0,0 +1,242 @@
+#include <linux/module.h>
+#include <linux/sock_diag.h>
+#include <linux/net.h>
+#include <linux/netdevice.h>
+#include <linux/packet_diag.h>
+#include <net/net_namespace.h>
+#include <net/sock.h>
+
+#include "internal.h"
+
+static int pdiag_put_info(const struct packet_sock *po, struct sk_buff *nlskb)
+{
+ struct packet_diag_info pinfo;
+
+ pinfo.pdi_index = po->ifindex;
+ pinfo.pdi_version = po->tp_version;
+ pinfo.pdi_reserve = po->tp_reserve;
+ pinfo.pdi_copy_thresh = po->copy_thresh;
+ pinfo.pdi_tstamp = po->tp_tstamp;
+
+ pinfo.pdi_flags = 0;
+ if (po->running)
+ pinfo.pdi_flags |= PDI_RUNNING;
+ if (po->auxdata)
+ pinfo.pdi_flags |= PDI_AUXDATA;
+ if (po->origdev)
+ pinfo.pdi_flags |= PDI_ORIGDEV;
+ if (po->has_vnet_hdr)
+ pinfo.pdi_flags |= PDI_VNETHDR;
+ if (po->tp_loss)
+ pinfo.pdi_flags |= PDI_LOSS;
+
+ return nla_put(nlskb, PACKET_DIAG_INFO, sizeof(pinfo), &pinfo);
+}
+
+static int pdiag_put_mclist(const struct packet_sock *po, struct sk_buff *nlskb)
+{
+ struct nlattr *mca;
+ struct packet_mclist *ml;
+
+ mca = nla_nest_start(nlskb, PACKET_DIAG_MCLIST);
+ if (!mca)
+ return -EMSGSIZE;
+
+ rtnl_lock();
+ for (ml = po->mclist; ml; ml = ml->next) {
+ struct packet_diag_mclist *dml;
+
+ dml = nla_reserve_nohdr(nlskb, sizeof(*dml));
+ if (!dml) {
+ rtnl_unlock();
+ nla_nest_cancel(nlskb, mca);
+ return -EMSGSIZE;
+ }
+
+ dml->pdmc_index = ml->ifindex;
+ dml->pdmc_type = ml->type;
+ dml->pdmc_alen = ml->alen;
+ dml->pdmc_count = ml->count;
+ BUILD_BUG_ON(sizeof(dml->pdmc_addr) != sizeof(ml->addr));
+ memcpy(dml->pdmc_addr, ml->addr, sizeof(ml->addr));
+ }
+
+ rtnl_unlock();
+ nla_nest_end(nlskb, mca);
+
+ return 0;
+}
+
+static int pdiag_put_ring(struct packet_ring_buffer *ring, int ver, int nl_type,
+ struct sk_buff *nlskb)
+{
+ struct packet_diag_ring pdr;
+
+ if (!ring->pg_vec || ((ver > TPACKET_V2) &&
+ (nl_type == PACKET_DIAG_TX_RING)))
+ return 0;
+
+ pdr.pdr_block_size = ring->pg_vec_pages << PAGE_SHIFT;
+ pdr.pdr_block_nr = ring->pg_vec_len;
+ pdr.pdr_frame_size = ring->frame_size;
+ pdr.pdr_frame_nr = ring->frame_max + 1;
+
+ if (ver > TPACKET_V2) {
+ pdr.pdr_retire_tmo = ring->prb_bdqc.retire_blk_tov;
+ pdr.pdr_sizeof_priv = ring->prb_bdqc.blk_sizeof_priv;
+ pdr.pdr_features = ring->prb_bdqc.feature_req_word;
+ } else {
+ pdr.pdr_retire_tmo = 0;
+ pdr.pdr_sizeof_priv = 0;
+ pdr.pdr_features = 0;
+ }
+
+ return nla_put(nlskb, nl_type, sizeof(pdr), &pdr);
+}
+
+static int pdiag_put_rings_cfg(struct packet_sock *po, struct sk_buff *skb)
+{
+ int ret;
+
+ mutex_lock(&po->pg_vec_lock);
+ ret = pdiag_put_ring(&po->rx_ring, po->tp_version,
+ PACKET_DIAG_RX_RING, skb);
+ if (!ret)
+ ret = pdiag_put_ring(&po->tx_ring, po->tp_version,
+ PACKET_DIAG_TX_RING, skb);
+ mutex_unlock(&po->pg_vec_lock);
+
+ return ret;
+}
+
+static int pdiag_put_fanout(struct packet_sock *po, struct sk_buff *nlskb)
+{
+ int ret = 0;
+
+ mutex_lock(&fanout_mutex);
+ if (po->fanout) {
+ u32 val;
+
+ val = (u32)po->fanout->id | ((u32)po->fanout->type << 16);
+ ret = nla_put_u32(nlskb, PACKET_DIAG_FANOUT, val);
+ }
+ mutex_unlock(&fanout_mutex);
+
+ return ret;
+}
+
+static int sk_diag_fill(struct sock *sk, struct sk_buff *skb, struct packet_diag_req *req,
+ u32 portid, u32 seq, u32 flags, int sk_ino)
+{
+ struct nlmsghdr *nlh;
+ struct packet_diag_msg *rp;
+ struct packet_sock *po = pkt_sk(sk);
+
+ nlh = nlmsg_put(skb, portid, seq, SOCK_DIAG_BY_FAMILY, sizeof(*rp), flags);
+ if (!nlh)
+ return -EMSGSIZE;
+
+ rp = nlmsg_data(nlh);
+ rp->pdiag_family = AF_PACKET;
+ rp->pdiag_type = sk->sk_type;
+ rp->pdiag_num = ntohs(po->num);
+ rp->pdiag_ino = sk_ino;
+ sock_diag_save_cookie(sk, rp->pdiag_cookie);
+
+ if ((req->pdiag_show & PACKET_SHOW_INFO) &&
+ pdiag_put_info(po, skb))
+ goto out_nlmsg_trim;
+
+ if ((req->pdiag_show & PACKET_SHOW_MCLIST) &&
+ pdiag_put_mclist(po, skb))
+ goto out_nlmsg_trim;
+
+ if ((req->pdiag_show & PACKET_SHOW_RING_CFG) &&
+ pdiag_put_rings_cfg(po, skb))
+ goto out_nlmsg_trim;
+
+ if ((req->pdiag_show & PACKET_SHOW_FANOUT) &&
+ pdiag_put_fanout(po, skb))
+ goto out_nlmsg_trim;
+
+ return nlmsg_end(skb, nlh);
+
+out_nlmsg_trim:
+ nlmsg_cancel(skb, nlh);
+ return -EMSGSIZE;
+}
+
+static int packet_diag_dump(struct sk_buff *skb, struct netlink_callback *cb)
+{
+ int num = 0, s_num = cb->args[0];
+ struct packet_diag_req *req;
+ struct net *net;
+ struct sock *sk;
+ struct hlist_node *node;
+
+ net = sock_net(skb->sk);
+ req = nlmsg_data(cb->nlh);
+
+ mutex_lock(&net->packet.sklist_lock);
+ sk_for_each(sk, node, &net->packet.sklist) {
+ if (!net_eq(sock_net(sk), net))
+ continue;
+ if (num < s_num)
+ goto next;
+
+ if (sk_diag_fill(sk, skb, req, NETLINK_CB(cb->skb).portid,
+ cb->nlh->nlmsg_seq, NLM_F_MULTI,
+ sock_i_ino(sk)) < 0)
+ goto done;
+next:
+ num++;
+ }
+done:
+ mutex_unlock(&net->packet.sklist_lock);
+ cb->args[0] = num;
+
+ return skb->len;
+}
+
+static int packet_diag_handler_dump(struct sk_buff *skb, struct nlmsghdr *h)
+{
+ int hdrlen = sizeof(struct packet_diag_req);
+ struct net *net = sock_net(skb->sk);
+ struct packet_diag_req *req;
+
+ if (nlmsg_len(h) < hdrlen)
+ return -EINVAL;
+
+ req = nlmsg_data(h);
+ /* Make it possible to support protocol filtering later */
+ if (req->sdiag_protocol)
+ return -EINVAL;
+
+ if (h->nlmsg_flags & NLM_F_DUMP) {
+ struct netlink_dump_control c = {
+ .dump = packet_diag_dump,
+ };
+ return netlink_dump_start(net->diag_nlsk, skb, h, &c);
+ } else
+ return -EOPNOTSUPP;
+}
+
+static const struct sock_diag_handler packet_diag_handler = {
+ .family = AF_PACKET,
+ .dump = packet_diag_handler_dump,
+};
+
+static int __init packet_diag_init(void)
+{
+ return sock_diag_register(&packet_diag_handler);
+}
+
+static void __exit packet_diag_exit(void)
+{
+ sock_diag_unregister(&packet_diag_handler);
+}
+
+module_init(packet_diag_init);
+module_exit(packet_diag_exit);
+MODULE_LICENSE("GPL");
+MODULE_ALIAS_NET_PF_PROTO_TYPE(PF_NETLINK, NETLINK_SOCK_DIAG, 17 /* AF_PACKET */);
diff --git a/net/packet/internal.h b/net/packet/internal.h
new file mode 100644
index 000000000000..44945f6b7252
--- /dev/null
+++ b/net/packet/internal.h
@@ -0,0 +1,121 @@
+#ifndef __PACKET_INTERNAL_H__
+#define __PACKET_INTERNAL_H__
+
+struct packet_mclist {
+ struct packet_mclist *next;
+ int ifindex;
+ int count;
+ unsigned short type;
+ unsigned short alen;
+ unsigned char addr[MAX_ADDR_LEN];
+};
+
+/* kbdq - kernel block descriptor queue */
+struct tpacket_kbdq_core {
+ struct pgv *pkbdq;
+ unsigned int feature_req_word;
+ unsigned int hdrlen;
+ unsigned char reset_pending_on_curr_blk;
+ unsigned char delete_blk_timer;
+ unsigned short kactive_blk_num;
+ unsigned short blk_sizeof_priv;
+
+ /* last_kactive_blk_num:
+ * trick to see if user-space has caught up
+ * in order to avoid refreshing timer when every single pkt arrives.
+ */
+ unsigned short last_kactive_blk_num;
+
+ char *pkblk_start;
+ char *pkblk_end;
+ int kblk_size;
+ unsigned int knum_blocks;
+ uint64_t knxt_seq_num;
+ char *prev;
+ char *nxt_offset;
+ struct sk_buff *skb;
+
+ atomic_t blk_fill_in_prog;
+
+ /* Default is set to 8ms */
+#define DEFAULT_PRB_RETIRE_TOV (8)
+
+ unsigned short retire_blk_tov;
+ unsigned short version;
+ unsigned long tov_in_jiffies;
+
+ /* timer to retire an outstanding block */
+ struct timer_list retire_blk_timer;
+};
+
+struct pgv {
+ char *buffer;
+};
+
+struct packet_ring_buffer {
+ struct pgv *pg_vec;
+ unsigned int head;
+ unsigned int frames_per_block;
+ unsigned int frame_size;
+ unsigned int frame_max;
+
+ unsigned int pg_vec_order;
+ unsigned int pg_vec_pages;
+ unsigned int pg_vec_len;
+
+ struct tpacket_kbdq_core prb_bdqc;
+ atomic_t pending;
+};
+
+extern struct mutex fanout_mutex;
+#define PACKET_FANOUT_MAX 256
+
+struct packet_fanout {
+#ifdef CONFIG_NET_NS
+ struct net *net;
+#endif
+ unsigned int num_members;
+ u16 id;
+ u8 type;
+ u8 defrag;
+ atomic_t rr_cur;
+ struct list_head list;
+ struct sock *arr[PACKET_FANOUT_MAX];
+ spinlock_t lock;
+ atomic_t sk_ref;
+ struct packet_type prot_hook ____cacheline_aligned_in_smp;
+};
+
+struct packet_sock {
+ /* struct sock has to be the first member of packet_sock */
+ struct sock sk;
+ struct packet_fanout *fanout;
+ struct tpacket_stats stats;
+ union tpacket_stats_u stats_u;
+ struct packet_ring_buffer rx_ring;
+ struct packet_ring_buffer tx_ring;
+ int copy_thresh;
+ spinlock_t bind_lock;
+ struct mutex pg_vec_lock;
+ unsigned int running:1, /* prot_hook is attached*/
+ auxdata:1,
+ origdev:1,
+ has_vnet_hdr:1;
+ int ifindex; /* bound device */
+ __be16 num;
+ struct packet_mclist *mclist;
+ atomic_t mapped;
+ enum tpacket_versions tp_version;
+ unsigned int tp_hdrlen;
+ unsigned int tp_reserve;
+ unsigned int tp_loss:1;
+ unsigned int tp_tstamp;
+ struct packet_type prot_hook ____cacheline_aligned_in_smp;
+};
+
+static struct packet_sock *pkt_sk(struct sock *sk)
+{
+ return (struct packet_sock *)sk;
+}
+
+#endif
diff --git a/net/phonet/pn_netlink.c b/net/phonet/pn_netlink.c
index 7dd762a464e5..83a8389619aa 100644
--- a/net/phonet/pn_netlink.c
+++ b/net/phonet/pn_netlink.c
@@ -33,7 +33,7 @@
/* Device address handling */
static int fill_addr(struct sk_buff *skb, struct net_device *dev, u8 addr,
- u32 pid, u32 seq, int event);
+ u32 portid, u32 seq, int event);
void phonet_address_notify(int event, struct net_device *dev, u8 addr)
{
@@ -101,12 +101,12 @@ static int addr_doit(struct sk_buff *skb, struct nlmsghdr *nlh, void *attr)
}
static int fill_addr(struct sk_buff *skb, struct net_device *dev, u8 addr,
- u32 pid, u32 seq, int event)
+ u32 portid, u32 seq, int event)
{
struct ifaddrmsg *ifm;
struct nlmsghdr *nlh;
- nlh = nlmsg_put(skb, pid, seq, event, sizeof(*ifm), 0);
+ nlh = nlmsg_put(skb, portid, seq, event, sizeof(*ifm), 0);
if (nlh == NULL)
return -EMSGSIZE;
@@ -148,7 +148,7 @@ static int getaddr_dumpit(struct sk_buff *skb, struct netlink_callback *cb)
continue;
if (fill_addr(skb, pnd->netdev, addr << 2,
- NETLINK_CB(cb->skb).pid,
+ NETLINK_CB(cb->skb).portid,
cb->nlh->nlmsg_seq, RTM_NEWADDR) < 0)
goto out;
}
@@ -165,12 +165,12 @@ out:
/* Routes handling */
static int fill_route(struct sk_buff *skb, struct net_device *dev, u8 dst,
- u32 pid, u32 seq, int event)
+ u32 portid, u32 seq, int event)
{
struct rtmsg *rtm;
struct nlmsghdr *nlh;
- nlh = nlmsg_put(skb, pid, seq, event, sizeof(*rtm), 0);
+ nlh = nlmsg_put(skb, portid, seq, event, sizeof(*rtm), 0);
if (nlh == NULL)
return -EMSGSIZE;
@@ -276,7 +276,7 @@ static int route_dumpit(struct sk_buff *skb, struct netlink_callback *cb)
if (addr_idx++ < addr_start_idx)
continue;
- if (fill_route(skb, dev, addr << 2, NETLINK_CB(cb->skb).pid,
+ if (fill_route(skb, dev, addr << 2, NETLINK_CB(cb->skb).portid,
cb->nlh->nlmsg_seq, RTM_NEWROUTE))
goto out;
}
diff --git a/net/phonet/socket.c b/net/phonet/socket.c
index 0acc943f713a..b7e982782255 100644
--- a/net/phonet/socket.c
+++ b/net/phonet/socket.c
@@ -612,7 +612,8 @@ static int pn_sock_seq_show(struct seq_file *seq, void *v)
sk->sk_protocol, pn->sobject, pn->dobject,
pn->resource, sk->sk_state,
sk_wmem_alloc_get(sk), sk_rmem_alloc_get(sk),
- sock_i_uid(sk), sock_i_ino(sk),
+ from_kuid_munged(seq_user_ns(seq), sock_i_uid(sk)),
+ sock_i_ino(sk),
atomic_read(&sk->sk_refcnt), sk,
atomic_read(&sk->sk_drops), &len);
}
@@ -796,7 +797,8 @@ static int pn_res_seq_show(struct seq_file *seq, void *v)
struct sock *sk = *psk;
seq_printf(seq, "%02X %5d %lu%n",
- (int) (psk - pnres.sk), sock_i_uid(sk),
+ (int) (psk - pnres.sk),
+ from_kuid_munged(seq_user_ns(seq), sock_i_uid(sk)),
sock_i_ino(sk), &len);
}
seq_printf(seq, "%*s\n", 63 - len, "");
diff --git a/net/rds/tcp_connect.c b/net/rds/tcp_connect.c
index af95c8e058fc..a65ee78db0c5 100644
--- a/net/rds/tcp_connect.c
+++ b/net/rds/tcp_connect.c
@@ -43,7 +43,7 @@ void rds_tcp_state_change(struct sock *sk)
struct rds_connection *conn;
struct rds_tcp_connection *tc;
- read_lock_bh(&sk->sk_callback_lock);
+ read_lock(&sk->sk_callback_lock);
conn = sk->sk_user_data;
if (!conn) {
state_change = sk->sk_state_change;
@@ -68,7 +68,7 @@ void rds_tcp_state_change(struct sock *sk)
break;
}
out:
- read_unlock_bh(&sk->sk_callback_lock);
+ read_unlock(&sk->sk_callback_lock);
state_change(sk);
}
diff --git a/net/rds/tcp_listen.c b/net/rds/tcp_listen.c
index 72981375f47c..7787537e9c2e 100644
--- a/net/rds/tcp_listen.c
+++ b/net/rds/tcp_listen.c
@@ -114,7 +114,7 @@ void rds_tcp_listen_data_ready(struct sock *sk, int bytes)
rdsdebug("listen data ready sk %p\n", sk);
- read_lock_bh(&sk->sk_callback_lock);
+ read_lock(&sk->sk_callback_lock);
ready = sk->sk_user_data;
if (!ready) { /* check for teardown race */
ready = sk->sk_data_ready;
@@ -131,7 +131,7 @@ void rds_tcp_listen_data_ready(struct sock *sk, int bytes)
queue_work(rds_wq, &rds_tcp_listen_work);
out:
- read_unlock_bh(&sk->sk_callback_lock);
+ read_unlock(&sk->sk_callback_lock);
ready(sk, bytes);
}
diff --git a/net/rds/tcp_recv.c b/net/rds/tcp_recv.c
index 6243258f840f..4fac4f2bb9dc 100644
--- a/net/rds/tcp_recv.c
+++ b/net/rds/tcp_recv.c
@@ -322,7 +322,7 @@ void rds_tcp_data_ready(struct sock *sk, int bytes)
rdsdebug("data ready sk %p bytes %d\n", sk, bytes);
- read_lock_bh(&sk->sk_callback_lock);
+ read_lock(&sk->sk_callback_lock);
conn = sk->sk_user_data;
if (!conn) { /* check for teardown race */
ready = sk->sk_data_ready;
@@ -336,7 +336,7 @@ void rds_tcp_data_ready(struct sock *sk, int bytes)
if (rds_tcp_read_sock(conn, GFP_ATOMIC) == -ENOMEM)
queue_delayed_work(rds_wq, &conn->c_recv_w, 0);
out:
- read_unlock_bh(&sk->sk_callback_lock);
+ read_unlock(&sk->sk_callback_lock);
ready(sk, bytes);
}
diff --git a/net/rds/tcp_send.c b/net/rds/tcp_send.c
index 1b4fd68f0c7c..81cf5a4c5e40 100644
--- a/net/rds/tcp_send.c
+++ b/net/rds/tcp_send.c
@@ -174,7 +174,7 @@ void rds_tcp_write_space(struct sock *sk)
struct rds_connection *conn;
struct rds_tcp_connection *tc;
- read_lock_bh(&sk->sk_callback_lock);
+ read_lock(&sk->sk_callback_lock);
conn = sk->sk_user_data;
if (!conn) {
write_space = sk->sk_write_space;
@@ -194,7 +194,7 @@ void rds_tcp_write_space(struct sock *sk)
queue_delayed_work(rds_wq, &conn->c_send_w, 0);
out:
- read_unlock_bh(&sk->sk_callback_lock);
+ read_unlock(&sk->sk_callback_lock);
/*
* write_space is only called when data leaves tcp's send queue if
diff --git a/net/rfkill/core.c b/net/rfkill/core.c
index 752b72360ebc..a5c952741279 100644
--- a/net/rfkill/core.c
+++ b/net/rfkill/core.c
@@ -150,6 +150,20 @@ static void rfkill_led_trigger_activate(struct led_classdev *led)
rfkill_led_trigger_event(rfkill);
}
+const char *rfkill_get_led_trigger_name(struct rfkill *rfkill)
+{
+ return rfkill->led_trigger.name;
+}
+EXPORT_SYMBOL(rfkill_get_led_trigger_name);
+
+void rfkill_set_led_trigger_name(struct rfkill *rfkill, const char *name)
+{
+ BUG_ON(!rfkill);
+
+ rfkill->ledtrigname = name;
+}
+EXPORT_SYMBOL(rfkill_set_led_trigger_name);
+
static int rfkill_led_trigger_register(struct rfkill *rfkill)
{
rfkill->led_trigger.name = rfkill->ledtrigname
@@ -256,6 +270,7 @@ static bool __rfkill_set_hw_state(struct rfkill *rfkill,
static void rfkill_set_block(struct rfkill *rfkill, bool blocked)
{
unsigned long flags;
+ bool prev, curr;
int err;
if (unlikely(rfkill->dev.power.power_state.event & PM_EVENT_SLEEP))
@@ -270,6 +285,8 @@ static void rfkill_set_block(struct rfkill *rfkill, bool blocked)
rfkill->ops->query(rfkill, rfkill->data);
spin_lock_irqsave(&rfkill->lock, flags);
+ prev = rfkill->state & RFKILL_BLOCK_SW;
+
if (rfkill->state & RFKILL_BLOCK_SW)
rfkill->state |= RFKILL_BLOCK_SW_PREV;
else
@@ -299,10 +316,13 @@ static void rfkill_set_block(struct rfkill *rfkill, bool blocked)
}
rfkill->state &= ~RFKILL_BLOCK_SW_SETCALL;
rfkill->state &= ~RFKILL_BLOCK_SW_PREV;
+ curr = rfkill->state & RFKILL_BLOCK_SW;
spin_unlock_irqrestore(&rfkill->lock, flags);
rfkill_led_trigger_event(rfkill);
- rfkill_event(rfkill);
+
+ if (prev != curr)
+ rfkill_event(rfkill);
}
#ifdef CONFIG_RFKILL_INPUT
diff --git a/net/rfkill/input.c b/net/rfkill/input.c
index 24c55c53e6a2..c9d931e7ffec 100644
--- a/net/rfkill/input.c
+++ b/net/rfkill/input.c
@@ -164,8 +164,7 @@ static void rfkill_schedule_global_op(enum rfkill_sched_op op)
rfkill_op_pending = true;
if (op == RFKILL_GLOBAL_OP_EPO && !rfkill_is_epo_lock_active()) {
/* bypass the limiter for EPO */
- cancel_delayed_work(&rfkill_op_work);
- schedule_delayed_work(&rfkill_op_work, 0);
+ mod_delayed_work(system_wq, &rfkill_op_work, 0);
rfkill_last_scheduled = jiffies;
} else
rfkill_schedule_ratelimited();
diff --git a/net/rxrpc/ar-key.c b/net/rxrpc/ar-key.c
index 8b1f9f49960f..011d2384b115 100644
--- a/net/rxrpc/ar-key.c
+++ b/net/rxrpc/ar-key.c
@@ -948,7 +948,8 @@ int rxrpc_get_server_data_key(struct rxrpc_connection *conn,
_enter("");
- key = key_alloc(&key_type_rxrpc, "x", 0, 0, cred, 0,
+ key = key_alloc(&key_type_rxrpc, "x",
+ GLOBAL_ROOT_UID, GLOBAL_ROOT_GID, cred, 0,
KEY_ALLOC_NOT_IN_QUOTA);
if (IS_ERR(key)) {
_leave(" = -ENOMEM [alloc %ld]", PTR_ERR(key));
@@ -994,7 +995,8 @@ struct key *rxrpc_get_null_key(const char *keyname)
struct key *key;
int ret;
- key = key_alloc(&key_type_rxrpc, keyname, 0, 0, cred,
+ key = key_alloc(&key_type_rxrpc, keyname,
+ GLOBAL_ROOT_UID, GLOBAL_ROOT_GID, cred,
KEY_POS_SEARCH, KEY_ALLOC_NOT_IN_QUOTA);
if (IS_ERR(key))
return key;
diff --git a/net/sched/act_api.c b/net/sched/act_api.c
index e3d2c78cb52c..102761d294cb 100644
--- a/net/sched/act_api.c
+++ b/net/sched/act_api.c
@@ -644,7 +644,7 @@ errout:
}
static int
-tca_get_fill(struct sk_buff *skb, struct tc_action *a, u32 pid, u32 seq,
+tca_get_fill(struct sk_buff *skb, struct tc_action *a, u32 portid, u32 seq,
u16 flags, int event, int bind, int ref)
{
struct tcamsg *t;
@@ -652,7 +652,7 @@ tca_get_fill(struct sk_buff *skb, struct tc_action *a, u32 pid, u32 seq,
unsigned char *b = skb_tail_pointer(skb);
struct nlattr *nest;
- nlh = nlmsg_put(skb, pid, seq, event, sizeof(*t), flags);
+ nlh = nlmsg_put(skb, portid, seq, event, sizeof(*t), flags);
if (!nlh)
goto out_nlmsg_trim;
t = nlmsg_data(nlh);
@@ -678,7 +678,7 @@ out_nlmsg_trim:
}
static int
-act_get_notify(struct net *net, u32 pid, struct nlmsghdr *n,
+act_get_notify(struct net *net, u32 portid, struct nlmsghdr *n,
struct tc_action *a, int event)
{
struct sk_buff *skb;
@@ -686,16 +686,16 @@ act_get_notify(struct net *net, u32 pid, struct nlmsghdr *n,
skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
if (!skb)
return -ENOBUFS;
- if (tca_get_fill(skb, a, pid, n->nlmsg_seq, 0, event, 0, 0) <= 0) {
+ if (tca_get_fill(skb, a, portid, n->nlmsg_seq, 0, event, 0, 0) <= 0) {
kfree_skb(skb);
return -EINVAL;
}
- return rtnl_unicast(skb, net, pid);
+ return rtnl_unicast(skb, net, portid);
}
static struct tc_action *
-tcf_action_get_1(struct nlattr *nla, struct nlmsghdr *n, u32 pid)
+tcf_action_get_1(struct nlattr *nla, struct nlmsghdr *n, u32 portid)
{
struct nlattr *tb[TCA_ACT_MAX + 1];
struct tc_action *a;
@@ -762,7 +762,7 @@ static struct tc_action *create_a(int i)
}
static int tca_action_flush(struct net *net, struct nlattr *nla,
- struct nlmsghdr *n, u32 pid)
+ struct nlmsghdr *n, u32 portid)
{
struct sk_buff *skb;
unsigned char *b;
@@ -799,7 +799,7 @@ static int tca_action_flush(struct net *net, struct nlattr *nla,
if (a->ops == NULL)
goto err_out;
- nlh = nlmsg_put(skb, pid, n->nlmsg_seq, RTM_DELACTION, sizeof(*t), 0);
+ nlh = nlmsg_put(skb, portid, n->nlmsg_seq, RTM_DELACTION, sizeof(*t), 0);
if (!nlh)
goto out_module_put;
t = nlmsg_data(nlh);
@@ -823,7 +823,7 @@ static int tca_action_flush(struct net *net, struct nlattr *nla,
nlh->nlmsg_flags |= NLM_F_ROOT;
module_put(a->ops->owner);
kfree(a);
- err = rtnetlink_send(skb, net, pid, RTNLGRP_TC,
+ err = rtnetlink_send(skb, net, portid, RTNLGRP_TC,
n->nlmsg_flags & NLM_F_ECHO);
if (err > 0)
return 0;
@@ -841,7 +841,7 @@ noflush_out:
static int
tca_action_gd(struct net *net, struct nlattr *nla, struct nlmsghdr *n,
- u32 pid, int event)
+ u32 portid, int event)
{
int i, ret;
struct nlattr *tb[TCA_ACT_MAX_PRIO + 1];
@@ -853,13 +853,13 @@ tca_action_gd(struct net *net, struct nlattr *nla, struct nlmsghdr *n,
if (event == RTM_DELACTION && n->nlmsg_flags & NLM_F_ROOT) {
if (tb[1] != NULL)
- return tca_action_flush(net, tb[1], n, pid);
+ return tca_action_flush(net, tb[1], n, portid);
else
return -EINVAL;
}
for (i = 1; i <= TCA_ACT_MAX_PRIO && tb[i]; i++) {
- act = tcf_action_get_1(tb[i], n, pid);
+ act = tcf_action_get_1(tb[i], n, portid);
if (IS_ERR(act)) {
ret = PTR_ERR(act);
goto err;
@@ -874,7 +874,7 @@ tca_action_gd(struct net *net, struct nlattr *nla, struct nlmsghdr *n,
}
if (event == RTM_GETACTION)
- ret = act_get_notify(net, pid, n, head, event);
+ ret = act_get_notify(net, portid, n, head, event);
else { /* delete */
struct sk_buff *skb;
@@ -884,7 +884,7 @@ tca_action_gd(struct net *net, struct nlattr *nla, struct nlmsghdr *n,
goto err;
}
- if (tca_get_fill(skb, head, pid, n->nlmsg_seq, 0, event,
+ if (tca_get_fill(skb, head, portid, n->nlmsg_seq, 0, event,
0, 1) <= 0) {
kfree_skb(skb);
ret = -EINVAL;
@@ -893,7 +893,7 @@ tca_action_gd(struct net *net, struct nlattr *nla, struct nlmsghdr *n,
/* now do the delete */
tcf_action_destroy(head, 0);
- ret = rtnetlink_send(skb, net, pid, RTNLGRP_TC,
+ ret = rtnetlink_send(skb, net, portid, RTNLGRP_TC,
n->nlmsg_flags & NLM_F_ECHO);
if (ret > 0)
return 0;
@@ -905,7 +905,7 @@ err:
}
static int tcf_add_notify(struct net *net, struct tc_action *a,
- u32 pid, u32 seq, int event, u16 flags)
+ u32 portid, u32 seq, int event, u16 flags)
{
struct tcamsg *t;
struct nlmsghdr *nlh;
@@ -920,7 +920,7 @@ static int tcf_add_notify(struct net *net, struct tc_action *a,
b = skb_tail_pointer(skb);
- nlh = nlmsg_put(skb, pid, seq, event, sizeof(*t), flags);
+ nlh = nlmsg_put(skb, portid, seq, event, sizeof(*t), flags);
if (!nlh)
goto out_kfree_skb;
t = nlmsg_data(nlh);
@@ -940,7 +940,7 @@ static int tcf_add_notify(struct net *net, struct tc_action *a,
nlh->nlmsg_len = skb_tail_pointer(skb) - b;
NETLINK_CB(skb).dst_group = RTNLGRP_TC;
- err = rtnetlink_send(skb, net, pid, RTNLGRP_TC, flags & NLM_F_ECHO);
+ err = rtnetlink_send(skb, net, portid, RTNLGRP_TC, flags & NLM_F_ECHO);
if (err > 0)
err = 0;
return err;
@@ -953,7 +953,7 @@ out_kfree_skb:
static int
tcf_action_add(struct net *net, struct nlattr *nla, struct nlmsghdr *n,
- u32 pid, int ovr)
+ u32 portid, int ovr)
{
int ret = 0;
struct tc_action *act;
@@ -971,7 +971,7 @@ tcf_action_add(struct net *net, struct nlattr *nla, struct nlmsghdr *n,
/* dump then free all the actions after update; inserted policy
* stays intact
*/
- ret = tcf_add_notify(net, act, pid, seq, RTM_NEWACTION, n->nlmsg_flags);
+ ret = tcf_add_notify(net, act, portid, seq, RTM_NEWACTION, n->nlmsg_flags);
for (a = act; a; a = act) {
act = a->next;
kfree(a);
@@ -984,7 +984,7 @@ static int tc_ctl_action(struct sk_buff *skb, struct nlmsghdr *n, void *arg)
{
struct net *net = sock_net(skb->sk);
struct nlattr *tca[TCA_ACT_MAX + 1];
- u32 pid = skb ? NETLINK_CB(skb).pid : 0;
+ u32 portid = skb ? NETLINK_CB(skb).portid : 0;
int ret = 0, ovr = 0;
ret = nlmsg_parse(n, sizeof(struct tcamsg), tca, TCA_ACT_MAX, NULL);
@@ -1008,17 +1008,17 @@ static int tc_ctl_action(struct sk_buff *skb, struct nlmsghdr *n, void *arg)
if (n->nlmsg_flags & NLM_F_REPLACE)
ovr = 1;
replay:
- ret = tcf_action_add(net, tca[TCA_ACT_TAB], n, pid, ovr);
+ ret = tcf_action_add(net, tca[TCA_ACT_TAB], n, portid, ovr);
if (ret == -EAGAIN)
goto replay;
break;
case RTM_DELACTION:
ret = tca_action_gd(net, tca[TCA_ACT_TAB], n,
- pid, RTM_DELACTION);
+ portid, RTM_DELACTION);
break;
case RTM_GETACTION:
ret = tca_action_gd(net, tca[TCA_ACT_TAB], n,
- pid, RTM_GETACTION);
+ portid, RTM_GETACTION);
break;
default:
BUG();
@@ -1085,7 +1085,7 @@ tc_dump_action(struct sk_buff *skb, struct netlink_callback *cb)
goto out_module_put;
}
- nlh = nlmsg_put(skb, NETLINK_CB(cb->skb).pid, cb->nlh->nlmsg_seq,
+ nlh = nlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq,
cb->nlh->nlmsg_type, sizeof(*t), 0);
if (!nlh)
goto out_module_put;
@@ -1109,7 +1109,7 @@ tc_dump_action(struct sk_buff *skb, struct netlink_callback *cb)
nla_nest_cancel(skb, nest);
nlh->nlmsg_len = skb_tail_pointer(skb) - b;
- if (NETLINK_CB(cb->skb).pid && ret)
+ if (NETLINK_CB(cb->skb).portid && ret)
nlh->nlmsg_flags |= NLM_F_MULTI;
module_put(a_o->owner);
return skb->len;
diff --git a/net/sched/act_gact.c b/net/sched/act_gact.c
index f10fb8256442..05d60859d8e3 100644
--- a/net/sched/act_gact.c
+++ b/net/sched/act_gact.c
@@ -67,6 +67,9 @@ static int tcf_gact_init(struct nlattr *nla, struct nlattr *est,
struct tcf_common *pc;
int ret = 0;
int err;
+#ifdef CONFIG_GACT_PROB
+ struct tc_gact_p *p_parm = NULL;
+#endif
if (nla == NULL)
return -EINVAL;
@@ -82,6 +85,12 @@ static int tcf_gact_init(struct nlattr *nla, struct nlattr *est,
#ifndef CONFIG_GACT_PROB
if (tb[TCA_GACT_PROB] != NULL)
return -EOPNOTSUPP;
+#else
+ if (tb[TCA_GACT_PROB]) {
+ p_parm = nla_data(tb[TCA_GACT_PROB]);
+ if (p_parm->ptype >= MAX_RAND)
+ return -EINVAL;
+ }
#endif
pc = tcf_hash_check(parm->index, a, bind, &gact_hash_info);
@@ -103,8 +112,7 @@ static int tcf_gact_init(struct nlattr *nla, struct nlattr *est,
spin_lock_bh(&gact->tcf_lock);
gact->tcf_action = parm->action;
#ifdef CONFIG_GACT_PROB
- if (tb[TCA_GACT_PROB] != NULL) {
- struct tc_gact_p *p_parm = nla_data(tb[TCA_GACT_PROB]);
+ if (p_parm) {
gact->tcfg_paction = p_parm->paction;
gact->tcfg_pval = p_parm->pval;
gact->tcfg_ptype = p_parm->ptype;
@@ -133,7 +141,7 @@ static int tcf_gact(struct sk_buff *skb, const struct tc_action *a,
spin_lock(&gact->tcf_lock);
#ifdef CONFIG_GACT_PROB
- if (gact->tcfg_ptype && gact_rand[gact->tcfg_ptype] != NULL)
+ if (gact->tcfg_ptype)
action = gact_rand[gact->tcfg_ptype](gact);
else
action = gact->tcf_action;
diff --git a/net/sched/act_ipt.c b/net/sched/act_ipt.c
index 60e281ad0f07..58fb3c7aab9e 100644
--- a/net/sched/act_ipt.c
+++ b/net/sched/act_ipt.c
@@ -185,7 +185,12 @@ err3:
err2:
kfree(tname);
err1:
- kfree(pc);
+ if (ret == ACT_P_CREATED) {
+ if (est)
+ gen_kill_estimator(&pc->tcfc_bstats,
+ &pc->tcfc_rate_est);
+ kfree_rcu(pc, tcfc_rcu);
+ }
return err;
}
diff --git a/net/sched/act_mirred.c b/net/sched/act_mirred.c
index fe81cc18e9e0..9c0fd0c78814 100644
--- a/net/sched/act_mirred.c
+++ b/net/sched/act_mirred.c
@@ -200,13 +200,12 @@ static int tcf_mirred(struct sk_buff *skb, const struct tc_action *a,
out:
if (err) {
m->tcf_qstats.overlimits++;
- /* should we be asking for packet to be dropped?
- * may make sense for redirect case only
- */
- retval = TC_ACT_SHOT;
- } else {
+ if (m->tcfm_eaction != TCA_EGRESS_MIRROR)
+ retval = TC_ACT_SHOT;
+ else
+ retval = m->tcf_action;
+ } else
retval = m->tcf_action;
- }
spin_unlock(&m->tcf_lock);
return retval;
diff --git a/net/sched/act_pedit.c b/net/sched/act_pedit.c
index 26aa2f6ce257..45c53ab067a6 100644
--- a/net/sched/act_pedit.c
+++ b/net/sched/act_pedit.c
@@ -74,7 +74,10 @@ static int tcf_pedit_init(struct nlattr *nla, struct nlattr *est,
p = to_pedit(pc);
keys = kmalloc(ksize, GFP_KERNEL);
if (keys == NULL) {
- kfree(pc);
+ if (est)
+ gen_kill_estimator(&pc->tcfc_bstats,
+ &pc->tcfc_rate_est);
+ kfree_rcu(pc, tcfc_rcu);
return -ENOMEM;
}
ret = ACT_P_CREATED;
diff --git a/net/sched/act_simple.c b/net/sched/act_simple.c
index 3922f2a2821b..3714f60f0b3c 100644
--- a/net/sched/act_simple.c
+++ b/net/sched/act_simple.c
@@ -131,7 +131,10 @@ static int tcf_simp_init(struct nlattr *nla, struct nlattr *est,
d = to_defact(pc);
ret = alloc_defdata(d, defdata);
if (ret < 0) {
- kfree(pc);
+ if (est)
+ gen_kill_estimator(&pc->tcfc_bstats,
+ &pc->tcfc_rate_est);
+ kfree_rcu(pc, tcfc_rcu);
return ret;
}
d->tcf_action = parm->action;
diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c
index 6dd1131f2ec1..7ae02892437c 100644
--- a/net/sched/cls_api.c
+++ b/net/sched/cls_api.c
@@ -319,7 +319,7 @@ replay:
}
}
- err = tp->ops->change(tp, cl, t->tcm_handle, tca, &fh);
+ err = tp->ops->change(skb, tp, cl, t->tcm_handle, tca, &fh);
if (err == 0) {
if (tp_created) {
spin_lock_bh(root_lock);
@@ -343,13 +343,13 @@ errout:
}
static int tcf_fill_node(struct sk_buff *skb, struct tcf_proto *tp,
- unsigned long fh, u32 pid, u32 seq, u16 flags, int event)
+ unsigned long fh, u32 portid, u32 seq, u16 flags, int event)
{
struct tcmsg *tcm;
struct nlmsghdr *nlh;
unsigned char *b = skb_tail_pointer(skb);
- nlh = nlmsg_put(skb, pid, seq, event, sizeof(*tcm), flags);
+ nlh = nlmsg_put(skb, portid, seq, event, sizeof(*tcm), flags);
if (!nlh)
goto out_nlmsg_trim;
tcm = nlmsg_data(nlh);
@@ -381,18 +381,18 @@ static int tfilter_notify(struct net *net, struct sk_buff *oskb,
unsigned long fh, int event)
{
struct sk_buff *skb;
- u32 pid = oskb ? NETLINK_CB(oskb).pid : 0;
+ u32 portid = oskb ? NETLINK_CB(oskb).portid : 0;
skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
if (!skb)
return -ENOBUFS;
- if (tcf_fill_node(skb, tp, fh, pid, n->nlmsg_seq, 0, event) <= 0) {
+ if (tcf_fill_node(skb, tp, fh, portid, n->nlmsg_seq, 0, event) <= 0) {
kfree_skb(skb);
return -EINVAL;
}
- return rtnetlink_send(skb, net, pid, RTNLGRP_TC,
+ return rtnetlink_send(skb, net, portid, RTNLGRP_TC,
n->nlmsg_flags & NLM_F_ECHO);
}
@@ -407,7 +407,7 @@ static int tcf_node_dump(struct tcf_proto *tp, unsigned long n,
{
struct tcf_dump_args *a = (void *)arg;
- return tcf_fill_node(a->skb, tp, n, NETLINK_CB(a->cb->skb).pid,
+ return tcf_fill_node(a->skb, tp, n, NETLINK_CB(a->cb->skb).portid,
a->cb->nlh->nlmsg_seq, NLM_F_MULTI, RTM_NEWTFILTER);
}
@@ -465,7 +465,7 @@ static int tc_dump_tfilter(struct sk_buff *skb, struct netlink_callback *cb)
if (t > s_t)
memset(&cb->args[1], 0, sizeof(cb->args)-sizeof(cb->args[0]));
if (cb->args[1] == 0) {
- if (tcf_fill_node(skb, tp, 0, NETLINK_CB(cb->skb).pid,
+ if (tcf_fill_node(skb, tp, 0, NETLINK_CB(cb->skb).portid,
cb->nlh->nlmsg_seq, NLM_F_MULTI,
RTM_NEWTFILTER) <= 0)
break;
diff --git a/net/sched/cls_basic.c b/net/sched/cls_basic.c
index 590960a22a77..344a11b342e5 100644
--- a/net/sched/cls_basic.c
+++ b/net/sched/cls_basic.c
@@ -162,7 +162,8 @@ errout:
return err;
}
-static int basic_change(struct tcf_proto *tp, unsigned long base, u32 handle,
+static int basic_change(struct sk_buff *in_skb,
+ struct tcf_proto *tp, unsigned long base, u32 handle,
struct nlattr **tca, unsigned long *arg)
{
int err;
diff --git a/net/sched/cls_cgroup.c b/net/sched/cls_cgroup.c
index 7743ea8d1d38..2ecde225ae60 100644
--- a/net/sched/cls_cgroup.c
+++ b/net/sched/cls_cgroup.c
@@ -77,11 +77,18 @@ struct cgroup_subsys net_cls_subsys = {
.name = "net_cls",
.create = cgrp_create,
.destroy = cgrp_destroy,
-#ifdef CONFIG_NET_CLS_CGROUP
.subsys_id = net_cls_subsys_id,
-#endif
.base_cftypes = ss_files,
.module = THIS_MODULE,
+
+ /*
+ * While net_cls cgroup has the rudimentary hierarchy support of
+ * inheriting the parent's classid on cgroup creation, it doesn't
+ * properly propagates config changes in ancestors to their
+ * descendents. A child should follow the parent's configuration
+ * but be allowed to override it. Fix it and remove the following.
+ */
+ .broken_hierarchy = true,
};
struct cls_cgroup_head {
@@ -151,7 +158,8 @@ static const struct nla_policy cgroup_policy[TCA_CGROUP_MAX + 1] = {
[TCA_CGROUP_EMATCHES] = { .type = NLA_NESTED },
};
-static int cls_cgroup_change(struct tcf_proto *tp, unsigned long base,
+static int cls_cgroup_change(struct sk_buff *in_skb,
+ struct tcf_proto *tp, unsigned long base,
u32 handle, struct nlattr **tca,
unsigned long *arg)
{
@@ -283,12 +291,6 @@ static int __init init_cgroup_cls(void)
if (ret)
goto out;
-#ifndef CONFIG_NET_CLS_CGROUP
- /* We can't use rcu_assign_pointer because this is an int. */
- smp_wmb();
- net_cls_subsys_id = net_cls_subsys.subsys_id;
-#endif
-
ret = register_tcf_proto_ops(&cls_cgroup_ops);
if (ret)
cgroup_unload_subsys(&net_cls_subsys);
@@ -301,11 +303,6 @@ static void __exit exit_cgroup_cls(void)
{
unregister_tcf_proto_ops(&cls_cgroup_ops);
-#ifndef CONFIG_NET_CLS_CGROUP
- net_cls_subsys_id = -1;
- synchronize_rcu();
-#endif
-
cgroup_unload_subsys(&net_cls_subsys);
}
diff --git a/net/sched/cls_flow.c b/net/sched/cls_flow.c
index ccd08c8dc6a7..ce82d0cb1b47 100644
--- a/net/sched/cls_flow.c
+++ b/net/sched/cls_flow.c
@@ -193,15 +193,19 @@ static u32 flow_get_rtclassid(const struct sk_buff *skb)
static u32 flow_get_skuid(const struct sk_buff *skb)
{
- if (skb->sk && skb->sk->sk_socket && skb->sk->sk_socket->file)
- return skb->sk->sk_socket->file->f_cred->fsuid;
+ if (skb->sk && skb->sk->sk_socket && skb->sk->sk_socket->file) {
+ kuid_t skuid = skb->sk->sk_socket->file->f_cred->fsuid;
+ return from_kuid(&init_user_ns, skuid);
+ }
return 0;
}
static u32 flow_get_skgid(const struct sk_buff *skb)
{
- if (skb->sk && skb->sk->sk_socket && skb->sk->sk_socket->file)
- return skb->sk->sk_socket->file->f_cred->fsgid;
+ if (skb->sk && skb->sk->sk_socket && skb->sk->sk_socket->file) {
+ kgid_t skgid = skb->sk->sk_socket->file->f_cred->fsgid;
+ return from_kgid(&init_user_ns, skgid);
+ }
return 0;
}
@@ -347,7 +351,8 @@ static const struct nla_policy flow_policy[TCA_FLOW_MAX + 1] = {
[TCA_FLOW_PERTURB] = { .type = NLA_U32 },
};
-static int flow_change(struct tcf_proto *tp, unsigned long base,
+static int flow_change(struct sk_buff *in_skb,
+ struct tcf_proto *tp, unsigned long base,
u32 handle, struct nlattr **tca,
unsigned long *arg)
{
@@ -386,6 +391,10 @@ static int flow_change(struct tcf_proto *tp, unsigned long base,
if (fls(keymask) - 1 > FLOW_KEY_MAX)
return -EOPNOTSUPP;
+
+ if ((keymask & (FLOW_KEY_SKUID|FLOW_KEY_SKGID)) &&
+ sk_user_ns(NETLINK_CB(in_skb).ssk) != &init_user_ns)
+ return -EOPNOTSUPP;
}
err = tcf_exts_validate(tp, tb, tca[TCA_RATE], &e, &flow_ext_map);
diff --git a/net/sched/cls_fw.c b/net/sched/cls_fw.c
index 8384a4797240..4075a0aef2aa 100644
--- a/net/sched/cls_fw.c
+++ b/net/sched/cls_fw.c
@@ -233,7 +233,8 @@ errout:
return err;
}
-static int fw_change(struct tcf_proto *tp, unsigned long base,
+static int fw_change(struct sk_buff *in_skb,
+ struct tcf_proto *tp, unsigned long base,
u32 handle,
struct nlattr **tca,
unsigned long *arg)
diff --git a/net/sched/cls_route.c b/net/sched/cls_route.c
index 44f405cb9aaf..c10d57bf98f2 100644
--- a/net/sched/cls_route.c
+++ b/net/sched/cls_route.c
@@ -427,7 +427,8 @@ errout:
return err;
}
-static int route4_change(struct tcf_proto *tp, unsigned long base,
+static int route4_change(struct sk_buff *in_skb,
+ struct tcf_proto *tp, unsigned long base,
u32 handle,
struct nlattr **tca,
unsigned long *arg)
diff --git a/net/sched/cls_rsvp.h b/net/sched/cls_rsvp.h
index 18ab93ec8d7e..494bbb90924a 100644
--- a/net/sched/cls_rsvp.h
+++ b/net/sched/cls_rsvp.h
@@ -416,7 +416,8 @@ static const struct nla_policy rsvp_policy[TCA_RSVP_MAX + 1] = {
[TCA_RSVP_PINFO] = { .len = sizeof(struct tc_rsvp_pinfo) },
};
-static int rsvp_change(struct tcf_proto *tp, unsigned long base,
+static int rsvp_change(struct sk_buff *in_skb,
+ struct tcf_proto *tp, unsigned long base,
u32 handle,
struct nlattr **tca,
unsigned long *arg)
diff --git a/net/sched/cls_tcindex.c b/net/sched/cls_tcindex.c
index fe29420d0b0e..a1293b4ab7a1 100644
--- a/net/sched/cls_tcindex.c
+++ b/net/sched/cls_tcindex.c
@@ -332,7 +332,8 @@ errout:
}
static int
-tcindex_change(struct tcf_proto *tp, unsigned long base, u32 handle,
+tcindex_change(struct sk_buff *in_skb,
+ struct tcf_proto *tp, unsigned long base, u32 handle,
struct nlattr **tca, unsigned long *arg)
{
struct nlattr *opt = tca[TCA_OPTIONS];
diff --git a/net/sched/cls_u32.c b/net/sched/cls_u32.c
index d45373fb00b9..c7c27bc91b5a 100644
--- a/net/sched/cls_u32.c
+++ b/net/sched/cls_u32.c
@@ -544,7 +544,8 @@ errout:
return err;
}
-static int u32_change(struct tcf_proto *tp, unsigned long base, u32 handle,
+static int u32_change(struct sk_buff *in_skb,
+ struct tcf_proto *tp, unsigned long base, u32 handle,
struct nlattr **tca,
unsigned long *arg)
{
diff --git a/net/sched/em_meta.c b/net/sched/em_meta.c
index 4ab6e3325573..7c3de6ffa516 100644
--- a/net/sched/em_meta.c
+++ b/net/sched/em_meta.c
@@ -461,7 +461,7 @@ META_COLLECTOR(int_sk_sndtimeo)
META_COLLECTOR(int_sk_sendmsg_off)
{
SKIP_NONLOCAL(skb);
- dst->value = skb->sk->sk_sndmsg_off;
+ dst->value = skb->sk->sk_frag.offset;
}
META_COLLECTOR(int_sk_write_pend)
diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c
index a08b4ab3e421..a18d975db59c 100644
--- a/net/sched/sch_api.c
+++ b/net/sched/sch_api.c
@@ -1185,7 +1185,7 @@ graft:
}
static int tc_fill_qdisc(struct sk_buff *skb, struct Qdisc *q, u32 clid,
- u32 pid, u32 seq, u16 flags, int event)
+ u32 portid, u32 seq, u16 flags, int event)
{
struct tcmsg *tcm;
struct nlmsghdr *nlh;
@@ -1193,7 +1193,7 @@ static int tc_fill_qdisc(struct sk_buff *skb, struct Qdisc *q, u32 clid,
struct gnet_dump d;
struct qdisc_size_table *stab;
- nlh = nlmsg_put(skb, pid, seq, event, sizeof(*tcm), flags);
+ nlh = nlmsg_put(skb, portid, seq, event, sizeof(*tcm), flags);
if (!nlh)
goto out_nlmsg_trim;
tcm = nlmsg_data(nlh);
@@ -1248,25 +1248,25 @@ static int qdisc_notify(struct net *net, struct sk_buff *oskb,
struct Qdisc *old, struct Qdisc *new)
{
struct sk_buff *skb;
- u32 pid = oskb ? NETLINK_CB(oskb).pid : 0;
+ u32 portid = oskb ? NETLINK_CB(oskb).portid : 0;
skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
if (!skb)
return -ENOBUFS;
if (old && !tc_qdisc_dump_ignore(old)) {
- if (tc_fill_qdisc(skb, old, clid, pid, n->nlmsg_seq,
+ if (tc_fill_qdisc(skb, old, clid, portid, n->nlmsg_seq,
0, RTM_DELQDISC) < 0)
goto err_out;
}
if (new && !tc_qdisc_dump_ignore(new)) {
- if (tc_fill_qdisc(skb, new, clid, pid, n->nlmsg_seq,
+ if (tc_fill_qdisc(skb, new, clid, portid, n->nlmsg_seq,
old ? NLM_F_REPLACE : 0, RTM_NEWQDISC) < 0)
goto err_out;
}
if (skb->len)
- return rtnetlink_send(skb, net, pid, RTNLGRP_TC,
+ return rtnetlink_send(skb, net, portid, RTNLGRP_TC,
n->nlmsg_flags & NLM_F_ECHO);
err_out:
@@ -1289,7 +1289,7 @@ static int tc_dump_qdisc_root(struct Qdisc *root, struct sk_buff *skb,
q_idx++;
} else {
if (!tc_qdisc_dump_ignore(q) &&
- tc_fill_qdisc(skb, q, q->parent, NETLINK_CB(cb->skb).pid,
+ tc_fill_qdisc(skb, q, q->parent, NETLINK_CB(cb->skb).portid,
cb->nlh->nlmsg_seq, NLM_F_MULTI, RTM_NEWQDISC) <= 0)
goto done;
q_idx++;
@@ -1300,7 +1300,7 @@ static int tc_dump_qdisc_root(struct Qdisc *root, struct sk_buff *skb,
continue;
}
if (!tc_qdisc_dump_ignore(q) &&
- tc_fill_qdisc(skb, q, q->parent, NETLINK_CB(cb->skb).pid,
+ tc_fill_qdisc(skb, q, q->parent, NETLINK_CB(cb->skb).portid,
cb->nlh->nlmsg_seq, NLM_F_MULTI, RTM_NEWQDISC) <= 0)
goto done;
q_idx++;
@@ -1375,7 +1375,7 @@ static int tc_ctl_tclass(struct sk_buff *skb, struct nlmsghdr *n, void *arg)
const struct Qdisc_class_ops *cops;
unsigned long cl = 0;
unsigned long new_cl;
- u32 pid = tcm->tcm_parent;
+ u32 portid = tcm->tcm_parent;
u32 clid = tcm->tcm_handle;
u32 qid = TC_H_MAJ(clid);
int err;
@@ -1403,8 +1403,8 @@ static int tc_ctl_tclass(struct sk_buff *skb, struct nlmsghdr *n, void *arg)
/* Step 1. Determine qdisc handle X:0 */
- if (pid != TC_H_ROOT) {
- u32 qid1 = TC_H_MAJ(pid);
+ if (portid != TC_H_ROOT) {
+ u32 qid1 = TC_H_MAJ(portid);
if (qid && qid1) {
/* If both majors are known, they must be identical. */
@@ -1418,10 +1418,10 @@ static int tc_ctl_tclass(struct sk_buff *skb, struct nlmsghdr *n, void *arg)
/* Now qid is genuine qdisc handle consistent
* both with parent and child.
*
- * TC_H_MAJ(pid) still may be unspecified, complete it now.
+ * TC_H_MAJ(portid) still may be unspecified, complete it now.
*/
- if (pid)
- pid = TC_H_MAKE(qid, pid);
+ if (portid)
+ portid = TC_H_MAKE(qid, portid);
} else {
if (qid == 0)
qid = dev->qdisc->handle;
@@ -1439,7 +1439,7 @@ static int tc_ctl_tclass(struct sk_buff *skb, struct nlmsghdr *n, void *arg)
/* Now try to get class */
if (clid == 0) {
- if (pid == TC_H_ROOT)
+ if (portid == TC_H_ROOT)
clid = qid;
} else
clid = TC_H_MAKE(qid, clid);
@@ -1478,7 +1478,7 @@ static int tc_ctl_tclass(struct sk_buff *skb, struct nlmsghdr *n, void *arg)
new_cl = cl;
err = -EOPNOTSUPP;
if (cops->change)
- err = cops->change(q, clid, pid, tca, &new_cl);
+ err = cops->change(q, clid, portid, tca, &new_cl);
if (err == 0)
tclass_notify(net, skb, n, q, new_cl, RTM_NEWTCLASS);
@@ -1492,7 +1492,7 @@ out:
static int tc_fill_tclass(struct sk_buff *skb, struct Qdisc *q,
unsigned long cl,
- u32 pid, u32 seq, u16 flags, int event)
+ u32 portid, u32 seq, u16 flags, int event)
{
struct tcmsg *tcm;
struct nlmsghdr *nlh;
@@ -1500,7 +1500,7 @@ static int tc_fill_tclass(struct sk_buff *skb, struct Qdisc *q,
struct gnet_dump d;
const struct Qdisc_class_ops *cl_ops = q->ops->cl_ops;
- nlh = nlmsg_put(skb, pid, seq, event, sizeof(*tcm), flags);
+ nlh = nlmsg_put(skb, portid, seq, event, sizeof(*tcm), flags);
if (!nlh)
goto out_nlmsg_trim;
tcm = nlmsg_data(nlh);
@@ -1540,18 +1540,18 @@ static int tclass_notify(struct net *net, struct sk_buff *oskb,
unsigned long cl, int event)
{
struct sk_buff *skb;
- u32 pid = oskb ? NETLINK_CB(oskb).pid : 0;
+ u32 portid = oskb ? NETLINK_CB(oskb).portid : 0;
skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
if (!skb)
return -ENOBUFS;
- if (tc_fill_tclass(skb, q, cl, pid, n->nlmsg_seq, 0, event) < 0) {
+ if (tc_fill_tclass(skb, q, cl, portid, n->nlmsg_seq, 0, event) < 0) {
kfree_skb(skb);
return -EINVAL;
}
- return rtnetlink_send(skb, net, pid, RTNLGRP_TC,
+ return rtnetlink_send(skb, net, portid, RTNLGRP_TC,
n->nlmsg_flags & NLM_F_ECHO);
}
@@ -1565,7 +1565,7 @@ static int qdisc_class_dump(struct Qdisc *q, unsigned long cl, struct qdisc_walk
{
struct qdisc_dump_args *a = (struct qdisc_dump_args *)arg;
- return tc_fill_tclass(a->skb, q, cl, NETLINK_CB(a->cb->skb).pid,
+ return tc_fill_tclass(a->skb, q, cl, NETLINK_CB(a->cb->skb).portid,
a->cb->nlh->nlmsg_seq, NLM_F_MULTI, RTM_NEWTCLASS);
}
diff --git a/net/sched/sch_cbq.c b/net/sched/sch_cbq.c
index 6aabd77d1cfd..564b9fc8efd3 100644
--- a/net/sched/sch_cbq.c
+++ b/net/sched/sch_cbq.c
@@ -250,10 +250,11 @@ cbq_classify(struct sk_buff *skb, struct Qdisc *sch, int *qerr)
else if ((cl = defmap[res.classid & TC_PRIO_MAX]) == NULL)
cl = defmap[TC_PRIO_BESTEFFORT];
- if (cl == NULL || cl->level >= head->level)
+ if (cl == NULL)
goto fallback;
}
-
+ if (cl->level >= head->level)
+ goto fallback;
#ifdef CONFIG_NET_CLS_ACT
switch (result) {
case TC_ACT_QUEUED:
diff --git a/net/sched/sch_drr.c b/net/sched/sch_drr.c
index 9ce0b4fe23ff..71e50c80315f 100644
--- a/net/sched/sch_drr.c
+++ b/net/sched/sch_drr.c
@@ -352,7 +352,7 @@ static int drr_enqueue(struct sk_buff *skb, struct Qdisc *sch)
{
struct drr_sched *q = qdisc_priv(sch);
struct drr_class *cl;
- int err;
+ int err = 0;
cl = drr_classify(skb, sch, &err);
if (cl == NULL) {
diff --git a/net/sched/sch_fq_codel.c b/net/sched/sch_fq_codel.c
index 9fc1c62ec80e..4e606fcb2534 100644
--- a/net/sched/sch_fq_codel.c
+++ b/net/sched/sch_fq_codel.c
@@ -191,7 +191,6 @@ static int fq_codel_enqueue(struct sk_buff *skb, struct Qdisc *sch)
if (list_empty(&flow->flowchain)) {
list_add_tail(&flow->flowchain, &q->new_flows);
- codel_vars_init(&flow->cvars);
q->new_flow_count++;
flow->deficit = q->quantum;
flow->dropped = 0;
@@ -418,6 +417,7 @@ static int fq_codel_init(struct Qdisc *sch, struct nlattr *opt)
struct fq_codel_flow *flow = q->flows + i;
INIT_LIST_HEAD(&flow->flowchain);
+ codel_vars_init(&flow->cvars);
}
}
if (sch->limit >= 1)
diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c
index 511323e89cec..aefc1504dc88 100644
--- a/net/sched/sch_generic.c
+++ b/net/sched/sch_generic.c
@@ -324,24 +324,6 @@ void netif_carrier_off(struct net_device *dev)
}
EXPORT_SYMBOL(netif_carrier_off);
-/**
- * netif_notify_peers - notify network peers about existence of @dev
- * @dev: network device
- *
- * Generate traffic such that interested network peers are aware of
- * @dev, such as by generating a gratuitous ARP. This may be used when
- * a device wants to inform the rest of the network about some sort of
- * reconfiguration such as a failover event or virtual machine
- * migration.
- */
-void netif_notify_peers(struct net_device *dev)
-{
- rtnl_lock();
- call_netdevice_notifiers(NETDEV_NOTIFY_PEERS, dev);
- rtnl_unlock();
-}
-EXPORT_SYMBOL(netif_notify_peers);
-
/* "NOOP" scheduler: the best scheduler, recommended for all interfaces
under all circumstances. It is difficult to invent anything faster or
cheaper.
@@ -545,6 +527,8 @@ struct Qdisc_ops pfifo_fast_ops __read_mostly = {
};
EXPORT_SYMBOL(pfifo_fast_ops);
+static struct lock_class_key qdisc_tx_busylock;
+
struct Qdisc *qdisc_alloc(struct netdev_queue *dev_queue,
struct Qdisc_ops *ops)
{
@@ -552,6 +536,7 @@ struct Qdisc *qdisc_alloc(struct netdev_queue *dev_queue,
struct Qdisc *sch;
unsigned int size = QDISC_ALIGN(sizeof(*sch)) + ops->priv_size;
int err = -ENOBUFS;
+ struct net_device *dev = dev_queue->dev;
p = kzalloc_node(size, GFP_KERNEL,
netdev_queue_numa_node_read(dev_queue));
@@ -571,12 +556,16 @@ struct Qdisc *qdisc_alloc(struct netdev_queue *dev_queue,
}
INIT_LIST_HEAD(&sch->list);
skb_queue_head_init(&sch->q);
+
spin_lock_init(&sch->busylock);
+ lockdep_set_class(&sch->busylock,
+ dev->qdisc_tx_busylock ?: &qdisc_tx_busylock);
+
sch->ops = ops;
sch->enqueue = ops->enqueue;
sch->dequeue = ops->dequeue;
sch->dev_queue = dev_queue;
- dev_hold(qdisc_dev(sch));
+ dev_hold(dev);
atomic_set(&sch->refcnt, 1);
return sch;
diff --git a/net/sched/sch_gred.c b/net/sched/sch_gred.c
index e901583e4ea5..d42234c0f13b 100644
--- a/net/sched/sch_gred.c
+++ b/net/sched/sch_gred.c
@@ -102,9 +102,8 @@ static inline int gred_wred_mode_check(struct Qdisc *sch)
if (q == NULL)
continue;
- for (n = 0; n < table->DPs; n++)
- if (table->tab[n] && table->tab[n] != q &&
- table->tab[n]->prio == q->prio)
+ for (n = i + 1; n < table->DPs; n++)
+ if (table->tab[n] && table->tab[n]->prio == q->prio)
return 1;
}
@@ -137,6 +136,7 @@ static inline void gred_store_wred_set(struct gred_sched *table,
struct gred_sched_data *q)
{
table->wred_set.qavg = q->vars.qavg;
+ table->wred_set.qidlestart = q->vars.qidlestart;
}
static inline int gred_use_ecn(struct gred_sched *t)
@@ -176,7 +176,7 @@ static int gred_enqueue(struct sk_buff *skb, struct Qdisc *sch)
skb->tc_index = (skb->tc_index & ~GRED_VQ_MASK) | dp;
}
- /* sum up all the qaves of prios <= to ours to get the new qave */
+ /* sum up all the qaves of prios < ours to get the new qave */
if (!gred_wred_mode(t) && gred_rio_mode(t)) {
int i;
@@ -260,16 +260,18 @@ static struct sk_buff *gred_dequeue(struct Qdisc *sch)
} else {
q->backlog -= qdisc_pkt_len(skb);
- if (!q->backlog && !gred_wred_mode(t))
- red_start_of_idle_period(&q->vars);
+ if (gred_wred_mode(t)) {
+ if (!sch->qstats.backlog)
+ red_start_of_idle_period(&t->wred_set);
+ } else {
+ if (!q->backlog)
+ red_start_of_idle_period(&q->vars);
+ }
}
return skb;
}
- if (gred_wred_mode(t) && !red_is_idling(&t->wred_set))
- red_start_of_idle_period(&t->wred_set);
-
return NULL;
}
@@ -291,19 +293,20 @@ static unsigned int gred_drop(struct Qdisc *sch)
q->backlog -= len;
q->stats.other++;
- if (!q->backlog && !gred_wred_mode(t))
- red_start_of_idle_period(&q->vars);
+ if (gred_wred_mode(t)) {
+ if (!sch->qstats.backlog)
+ red_start_of_idle_period(&t->wred_set);
+ } else {
+ if (!q->backlog)
+ red_start_of_idle_period(&q->vars);
+ }
}
qdisc_drop(skb, sch);
return len;
}
- if (gred_wred_mode(t) && !red_is_idling(&t->wred_set))
- red_start_of_idle_period(&t->wred_set);
-
return 0;
-
}
static void gred_reset(struct Qdisc *sch)
@@ -535,6 +538,7 @@ static int gred_dump(struct Qdisc *sch, struct sk_buff *skb)
for (i = 0; i < MAX_DPs; i++) {
struct gred_sched_data *q = table->tab[i];
struct tc_gred_qopt opt;
+ unsigned long qavg;
memset(&opt, 0, sizeof(opt));
@@ -566,7 +570,9 @@ static int gred_dump(struct Qdisc *sch, struct sk_buff *skb)
if (gred_wred_mode(table))
gred_load_wred_set(table, q);
- opt.qave = red_calc_qavg(&q->parms, &q->vars, q->vars.qavg);
+ qavg = red_calc_qavg(&q->parms, &q->vars,
+ q->vars.qavg >> q->parms.Wlog);
+ opt.qave = qavg >> q->parms.Wlog;
append_opt:
if (nla_append(skb, sizeof(opt), &opt) < 0)
diff --git a/net/sched/sch_qfq.c b/net/sched/sch_qfq.c
index 9af01f3df18c..f0dd83cff906 100644
--- a/net/sched/sch_qfq.c
+++ b/net/sched/sch_qfq.c
@@ -203,6 +203,34 @@ out:
return index;
}
+/* Length of the next packet (0 if the queue is empty). */
+static unsigned int qdisc_peek_len(struct Qdisc *sch)
+{
+ struct sk_buff *skb;
+
+ skb = sch->ops->peek(sch);
+ return skb ? qdisc_pkt_len(skb) : 0;
+}
+
+static void qfq_deactivate_class(struct qfq_sched *, struct qfq_class *);
+static void qfq_activate_class(struct qfq_sched *q, struct qfq_class *cl,
+ unsigned int len);
+
+static void qfq_update_class_params(struct qfq_sched *q, struct qfq_class *cl,
+ u32 lmax, u32 inv_w, int delta_w)
+{
+ int i;
+
+ /* update qfq-specific data */
+ cl->lmax = lmax;
+ cl->inv_w = inv_w;
+ i = qfq_calc_index(cl->inv_w, cl->lmax);
+
+ cl->grp = &q->groups[i];
+
+ q->wsum += delta_w;
+}
+
static int qfq_change_class(struct Qdisc *sch, u32 classid, u32 parentid,
struct nlattr **tca, unsigned long *arg)
{
@@ -250,6 +278,8 @@ static int qfq_change_class(struct Qdisc *sch, u32 classid, u32 parentid,
lmax = 1UL << QFQ_MTU_SHIFT;
if (cl != NULL) {
+ bool need_reactivation = false;
+
if (tca[TCA_RATE]) {
err = gen_replace_estimator(&cl->bstats, &cl->rate_est,
qdisc_root_sleeping_lock(sch),
@@ -258,12 +288,29 @@ static int qfq_change_class(struct Qdisc *sch, u32 classid, u32 parentid,
return err;
}
- if (inv_w != cl->inv_w) {
- sch_tree_lock(sch);
- q->wsum += delta_w;
- cl->inv_w = inv_w;
- sch_tree_unlock(sch);
+ if (lmax == cl->lmax && inv_w == cl->inv_w)
+ return 0; /* nothing to update */
+
+ i = qfq_calc_index(inv_w, lmax);
+ sch_tree_lock(sch);
+ if (&q->groups[i] != cl->grp && cl->qdisc->q.qlen > 0) {
+ /*
+ * shift cl->F back, to not charge the
+ * class for the not-yet-served head
+ * packet
+ */
+ cl->F = cl->S;
+ /* remove class from its slot in the old group */
+ qfq_deactivate_class(q, cl);
+ need_reactivation = true;
}
+
+ qfq_update_class_params(q, cl, lmax, inv_w, delta_w);
+
+ if (need_reactivation) /* activate in new group */
+ qfq_activate_class(q, cl, qdisc_peek_len(cl->qdisc));
+ sch_tree_unlock(sch);
+
return 0;
}
@@ -273,11 +320,8 @@ static int qfq_change_class(struct Qdisc *sch, u32 classid, u32 parentid,
cl->refcnt = 1;
cl->common.classid = classid;
- cl->lmax = lmax;
- cl->inv_w = inv_w;
- i = qfq_calc_index(cl->inv_w, cl->lmax);
- cl->grp = &q->groups[i];
+ qfq_update_class_params(q, cl, lmax, inv_w, delta_w);
cl->qdisc = qdisc_create_dflt(sch->dev_queue,
&pfifo_qdisc_ops, classid);
@@ -294,7 +338,6 @@ static int qfq_change_class(struct Qdisc *sch, u32 classid, u32 parentid,
return err;
}
}
- q->wsum += weight;
sch_tree_lock(sch);
qdisc_class_hash_insert(&q->clhash, &cl->common);
@@ -711,15 +754,6 @@ static void qfq_update_eligible(struct qfq_sched *q, u64 old_V)
}
}
-/* What is length of next packet in queue (0 if queue is empty) */
-static unsigned int qdisc_peek_len(struct Qdisc *sch)
-{
- struct sk_buff *skb;
-
- skb = sch->ops->peek(sch);
- return skb ? qdisc_pkt_len(skb) : 0;
-}
-
/*
* Updates the class, returns true if also the group needs to be updated.
*/
@@ -831,7 +865,10 @@ static void qfq_update_start(struct qfq_sched *q, struct qfq_class *cl)
if (mask) {
struct qfq_group *next = qfq_ffs(q, mask);
if (qfq_gt(roundedF, next->F)) {
- cl->S = next->F;
+ if (qfq_gt(limit, next->F))
+ cl->S = next->F;
+ else /* preserve timestamp correctness */
+ cl->S = limit;
return;
}
}
@@ -843,11 +880,8 @@ static void qfq_update_start(struct qfq_sched *q, struct qfq_class *cl)
static int qfq_enqueue(struct sk_buff *skb, struct Qdisc *sch)
{
struct qfq_sched *q = qdisc_priv(sch);
- struct qfq_group *grp;
struct qfq_class *cl;
- int err;
- u64 roundedS;
- int s;
+ int err = 0;
cl = qfq_classify(skb, sch, &err);
if (cl == NULL) {
@@ -876,11 +910,25 @@ static int qfq_enqueue(struct sk_buff *skb, struct Qdisc *sch)
return err;
/* If reach this point, queue q was idle */
- grp = cl->grp;
+ qfq_activate_class(q, cl, qdisc_pkt_len(skb));
+
+ return err;
+}
+
+/*
+ * Handle class switch from idle to backlogged.
+ */
+static void qfq_activate_class(struct qfq_sched *q, struct qfq_class *cl,
+ unsigned int pkt_len)
+{
+ struct qfq_group *grp = cl->grp;
+ u64 roundedS;
+ int s;
+
qfq_update_start(q, cl);
/* compute new finish time and rounded start. */
- cl->F = cl->S + (u64)qdisc_pkt_len(skb) * cl->inv_w;
+ cl->F = cl->S + (u64)pkt_len * cl->inv_w;
roundedS = qfq_round_down(cl->S, grp->slot_shift);
/*
@@ -917,8 +965,6 @@ static int qfq_enqueue(struct sk_buff *skb, struct Qdisc *sch)
skip_update:
qfq_slot_insert(grp, cl, roundedS);
-
- return err;
}
diff --git a/net/sctp/associola.c b/net/sctp/associola.c
index ebaef3ed6065..b1ef3bc301a5 100644
--- a/net/sctp/associola.c
+++ b/net/sctp/associola.c
@@ -82,6 +82,7 @@ static struct sctp_association *sctp_association_init(struct sctp_association *a
sctp_scope_t scope,
gfp_t gfp)
{
+ struct net *net = sock_net(sk);
struct sctp_sock *sp;
int i;
sctp_paramhdr_t *p;
@@ -124,7 +125,7 @@ static struct sctp_association *sctp_association_init(struct sctp_association *a
* socket values.
*/
asoc->max_retrans = sp->assocparams.sasoc_asocmaxrxt;
- asoc->pf_retrans = sctp_pf_retrans;
+ asoc->pf_retrans = net->sctp.pf_retrans;
asoc->rto_initial = msecs_to_jiffies(sp->rtoinfo.srto_initial);
asoc->rto_max = msecs_to_jiffies(sp->rtoinfo.srto_max);
@@ -175,7 +176,7 @@ static struct sctp_association *sctp_association_init(struct sctp_association *a
asoc->timeouts[SCTP_EVENT_TIMEOUT_HEARTBEAT] = 0;
asoc->timeouts[SCTP_EVENT_TIMEOUT_SACK] = asoc->sackdelay;
asoc->timeouts[SCTP_EVENT_TIMEOUT_AUTOCLOSE] =
- min_t(unsigned long, sp->autoclose, sctp_max_autoclose) * HZ;
+ min_t(unsigned long, sp->autoclose, net->sctp.max_autoclose) * HZ;
/* Initializes the timers */
for (i = SCTP_EVENT_TIMEOUT_NONE; i < SCTP_NUM_TIMEOUT_TYPES; ++i)
@@ -281,7 +282,7 @@ static struct sctp_association *sctp_association_init(struct sctp_association *a
* and will revert old behavior.
*/
asoc->peer.asconf_capable = 0;
- if (sctp_addip_noauth)
+ if (net->sctp.addip_noauth)
asoc->peer.asconf_capable = 1;
asoc->asconf_addr_del_pending = NULL;
asoc->src_out_of_asoc_ok = 0;
@@ -641,6 +642,7 @@ struct sctp_transport *sctp_assoc_add_peer(struct sctp_association *asoc,
const gfp_t gfp,
const int peer_state)
{
+ struct net *net = sock_net(asoc->base.sk);
struct sctp_transport *peer;
struct sctp_sock *sp;
unsigned short port;
@@ -674,7 +676,7 @@ struct sctp_transport *sctp_assoc_add_peer(struct sctp_association *asoc,
return peer;
}
- peer = sctp_transport_new(addr, gfp);
+ peer = sctp_transport_new(net, addr, gfp);
if (!peer)
return NULL;
@@ -1089,13 +1091,15 @@ out:
/* Is this the association we are looking for? */
struct sctp_transport *sctp_assoc_is_match(struct sctp_association *asoc,
+ struct net *net,
const union sctp_addr *laddr,
const union sctp_addr *paddr)
{
struct sctp_transport *transport;
if ((htons(asoc->base.bind_addr.port) == laddr->v4.sin_port) &&
- (htons(asoc->peer.port) == paddr->v4.sin_port)) {
+ (htons(asoc->peer.port) == paddr->v4.sin_port) &&
+ net_eq(sock_net(asoc->base.sk), net)) {
transport = sctp_assoc_lookup_paddr(asoc, paddr);
if (!transport)
goto out;
@@ -1116,6 +1120,7 @@ static void sctp_assoc_bh_rcv(struct work_struct *work)
struct sctp_association *asoc =
container_of(work, struct sctp_association,
base.inqueue.immediate);
+ struct net *net = sock_net(asoc->base.sk);
struct sctp_endpoint *ep;
struct sctp_chunk *chunk;
struct sctp_inq *inqueue;
@@ -1148,13 +1153,13 @@ static void sctp_assoc_bh_rcv(struct work_struct *work)
if (sctp_chunk_is_data(chunk))
asoc->peer.last_data_from = chunk->transport;
else
- SCTP_INC_STATS(SCTP_MIB_INCTRLCHUNKS);
+ SCTP_INC_STATS(net, SCTP_MIB_INCTRLCHUNKS);
if (chunk->transport)
chunk->transport->last_time_heard = jiffies;
/* Run through the state machine. */
- error = sctp_do_sm(SCTP_EVENT_T_CHUNK, subtype,
+ error = sctp_do_sm(net, SCTP_EVENT_T_CHUNK, subtype,
state, ep, asoc, chunk, GFP_ATOMIC);
/* Check to see if the association is freed in response to
@@ -1414,6 +1419,7 @@ void sctp_assoc_sync_pmtu(struct sock *sk, struct sctp_association *asoc)
/* Should we send a SACK to update our peer? */
static inline int sctp_peer_needs_update(struct sctp_association *asoc)
{
+ struct net *net = sock_net(asoc->base.sk);
switch (asoc->state) {
case SCTP_STATE_ESTABLISHED:
case SCTP_STATE_SHUTDOWN_PENDING:
@@ -1421,7 +1427,7 @@ static inline int sctp_peer_needs_update(struct sctp_association *asoc)
case SCTP_STATE_SHUTDOWN_SENT:
if ((asoc->rwnd > asoc->a_rwnd) &&
((asoc->rwnd - asoc->a_rwnd) >= max_t(__u32,
- (asoc->base.sk->sk_rcvbuf >> sctp_rwnd_upd_shift),
+ (asoc->base.sk->sk_rcvbuf >> net->sctp.rwnd_upd_shift),
asoc->pathmtu)))
return 1;
break;
@@ -1542,7 +1548,8 @@ int sctp_assoc_set_bind_addr_from_ep(struct sctp_association *asoc,
if (asoc->peer.ipv6_address)
flags |= SCTP_ADDR6_PEERSUPP;
- return sctp_bind_addr_copy(&asoc->base.bind_addr,
+ return sctp_bind_addr_copy(sock_net(asoc->base.sk),
+ &asoc->base.bind_addr,
&asoc->ep->base.bind_addr,
scope, gfp, flags);
}
diff --git a/net/sctp/auth.c b/net/sctp/auth.c
index bf812048cf6f..159b9bc5d633 100644
--- a/net/sctp/auth.c
+++ b/net/sctp/auth.c
@@ -392,13 +392,14 @@ nomem:
*/
int sctp_auth_asoc_init_active_key(struct sctp_association *asoc, gfp_t gfp)
{
+ struct net *net = sock_net(asoc->base.sk);
struct sctp_auth_bytes *secret;
struct sctp_shared_key *ep_key;
/* If we don't support AUTH, or peer is not capable
* we don't need to do anything.
*/
- if (!sctp_auth_enable || !asoc->peer.auth_capable)
+ if (!net->sctp.auth_enable || !asoc->peer.auth_capable)
return 0;
/* If the key_id is non-zero and we couldn't find an
@@ -445,11 +446,12 @@ struct sctp_shared_key *sctp_auth_get_shkey(
*/
int sctp_auth_init_hmacs(struct sctp_endpoint *ep, gfp_t gfp)
{
+ struct net *net = sock_net(ep->base.sk);
struct crypto_hash *tfm = NULL;
__u16 id;
/* if the transforms are already allocted, we are done */
- if (!sctp_auth_enable) {
+ if (!net->sctp.auth_enable) {
ep->auth_hmacs = NULL;
return 0;
}
@@ -674,7 +676,12 @@ static int __sctp_auth_cid(sctp_cid_t chunk, struct sctp_chunks_param *param)
/* Check if peer requested that this chunk is authenticated */
int sctp_auth_send_cid(sctp_cid_t chunk, const struct sctp_association *asoc)
{
- if (!sctp_auth_enable || !asoc || !asoc->peer.auth_capable)
+ struct net *net;
+ if (!asoc)
+ return 0;
+
+ net = sock_net(asoc->base.sk);
+ if (!net->sctp.auth_enable || !asoc->peer.auth_capable)
return 0;
return __sctp_auth_cid(chunk, asoc->peer.peer_chunks);
@@ -683,7 +690,12 @@ int sctp_auth_send_cid(sctp_cid_t chunk, const struct sctp_association *asoc)
/* Check if we requested that peer authenticate this chunk. */
int sctp_auth_recv_cid(sctp_cid_t chunk, const struct sctp_association *asoc)
{
- if (!sctp_auth_enable || !asoc)
+ struct net *net;
+ if (!asoc)
+ return 0;
+
+ net = sock_net(asoc->base.sk);
+ if (!net->sctp.auth_enable)
return 0;
return __sctp_auth_cid(chunk,
diff --git a/net/sctp/bind_addr.c b/net/sctp/bind_addr.c
index 4ece451c8d27..d886b3bf84f5 100644
--- a/net/sctp/bind_addr.c
+++ b/net/sctp/bind_addr.c
@@ -52,8 +52,8 @@
#include <net/sctp/sm.h>
/* Forward declarations for internal helpers. */
-static int sctp_copy_one_addr(struct sctp_bind_addr *, union sctp_addr *,
- sctp_scope_t scope, gfp_t gfp,
+static int sctp_copy_one_addr(struct net *, struct sctp_bind_addr *,
+ union sctp_addr *, sctp_scope_t scope, gfp_t gfp,
int flags);
static void sctp_bind_addr_clean(struct sctp_bind_addr *);
@@ -62,7 +62,7 @@ static void sctp_bind_addr_clean(struct sctp_bind_addr *);
/* Copy 'src' to 'dest' taking 'scope' into account. Omit addresses
* in 'src' which have a broader scope than 'scope'.
*/
-int sctp_bind_addr_copy(struct sctp_bind_addr *dest,
+int sctp_bind_addr_copy(struct net *net, struct sctp_bind_addr *dest,
const struct sctp_bind_addr *src,
sctp_scope_t scope, gfp_t gfp,
int flags)
@@ -75,7 +75,7 @@ int sctp_bind_addr_copy(struct sctp_bind_addr *dest,
/* Extract the addresses which are relevant for this scope. */
list_for_each_entry(addr, &src->address_list, list) {
- error = sctp_copy_one_addr(dest, &addr->a, scope,
+ error = sctp_copy_one_addr(net, dest, &addr->a, scope,
gfp, flags);
if (error < 0)
goto out;
@@ -87,7 +87,7 @@ int sctp_bind_addr_copy(struct sctp_bind_addr *dest,
*/
if (list_empty(&dest->address_list) && (SCTP_SCOPE_GLOBAL == scope)) {
list_for_each_entry(addr, &src->address_list, list) {
- error = sctp_copy_one_addr(dest, &addr->a,
+ error = sctp_copy_one_addr(net, dest, &addr->a,
SCTP_SCOPE_LINK, gfp,
flags);
if (error < 0)
@@ -448,7 +448,7 @@ union sctp_addr *sctp_find_unmatch_addr(struct sctp_bind_addr *bp,
}
/* Copy out addresses from the global local address list. */
-static int sctp_copy_one_addr(struct sctp_bind_addr *dest,
+static int sctp_copy_one_addr(struct net *net, struct sctp_bind_addr *dest,
union sctp_addr *addr,
sctp_scope_t scope, gfp_t gfp,
int flags)
@@ -456,8 +456,8 @@ static int sctp_copy_one_addr(struct sctp_bind_addr *dest,
int error = 0;
if (sctp_is_any(NULL, addr)) {
- error = sctp_copy_local_addr_list(dest, scope, gfp, flags);
- } else if (sctp_in_scope(addr, scope)) {
+ error = sctp_copy_local_addr_list(net, dest, scope, gfp, flags);
+ } else if (sctp_in_scope(net, addr, scope)) {
/* Now that the address is in scope, check to see if
* the address type is supported by local sock as
* well as the remote peer.
@@ -494,7 +494,7 @@ int sctp_is_any(struct sock *sk, const union sctp_addr *addr)
}
/* Is 'addr' valid for 'scope'? */
-int sctp_in_scope(const union sctp_addr *addr, sctp_scope_t scope)
+int sctp_in_scope(struct net *net, const union sctp_addr *addr, sctp_scope_t scope)
{
sctp_scope_t addr_scope = sctp_scope(addr);
@@ -512,7 +512,7 @@ int sctp_in_scope(const union sctp_addr *addr, sctp_scope_t scope)
* Address scoping can be selectively controlled via sysctl
* option
*/
- switch (sctp_scope_policy) {
+ switch (net->sctp.scope_policy) {
case SCTP_SCOPE_POLICY_DISABLE:
return 1;
case SCTP_SCOPE_POLICY_ENABLE:
diff --git a/net/sctp/chunk.c b/net/sctp/chunk.c
index 6c8556459a75..7c2df9c33df3 100644
--- a/net/sctp/chunk.c
+++ b/net/sctp/chunk.c
@@ -257,7 +257,7 @@ struct sctp_datamsg *sctp_datamsg_from_user(struct sctp_association *asoc,
offset = 0;
if ((whole > 1) || (whole && over))
- SCTP_INC_STATS_USER(SCTP_MIB_FRAGUSRMSGS);
+ SCTP_INC_STATS_USER(sock_net(asoc->base.sk), SCTP_MIB_FRAGUSRMSGS);
/* Create chunks for all the full sized DATA chunks. */
for (i=0, len=first_len; i < whole; i++) {
diff --git a/net/sctp/endpointola.c b/net/sctp/endpointola.c
index 68a385d7c3bd..1859e2bc83d1 100644
--- a/net/sctp/endpointola.c
+++ b/net/sctp/endpointola.c
@@ -65,6 +65,7 @@ static struct sctp_endpoint *sctp_endpoint_init(struct sctp_endpoint *ep,
struct sock *sk,
gfp_t gfp)
{
+ struct net *net = sock_net(sk);
struct sctp_hmac_algo_param *auth_hmacs = NULL;
struct sctp_chunks_param *auth_chunks = NULL;
struct sctp_shared_key *null_key;
@@ -74,7 +75,7 @@ static struct sctp_endpoint *sctp_endpoint_init(struct sctp_endpoint *ep,
if (!ep->digest)
return NULL;
- if (sctp_auth_enable) {
+ if (net->sctp.auth_enable) {
/* Allocate space for HMACS and CHUNKS authentication
* variables. There are arrays that we encode directly
* into parameters to make the rest of the operations easier.
@@ -106,7 +107,7 @@ static struct sctp_endpoint *sctp_endpoint_init(struct sctp_endpoint *ep,
/* If the Add-IP functionality is enabled, we must
* authenticate, ASCONF and ASCONF-ACK chunks
*/
- if (sctp_addip_enable) {
+ if (net->sctp.addip_enable) {
auth_chunks->chunks[0] = SCTP_CID_ASCONF;
auth_chunks->chunks[1] = SCTP_CID_ASCONF_ACK;
auth_chunks->param_hdr.length =
@@ -140,14 +141,14 @@ static struct sctp_endpoint *sctp_endpoint_init(struct sctp_endpoint *ep,
INIT_LIST_HEAD(&ep->asocs);
/* Use SCTP specific send buffer space queues. */
- ep->sndbuf_policy = sctp_sndbuf_policy;
+ ep->sndbuf_policy = net->sctp.sndbuf_policy;
sk->sk_data_ready = sctp_data_ready;
sk->sk_write_space = sctp_write_space;
sock_set_flag(sk, SOCK_USE_WRITE_QUEUE);
/* Get the receive buffer policy for this endpoint */
- ep->rcvbuf_policy = sctp_rcvbuf_policy;
+ ep->rcvbuf_policy = net->sctp.rcvbuf_policy;
/* Initialize the secret key used with cookie. */
get_random_bytes(&ep->secret_key[0], SCTP_SECRET_SIZE);
@@ -302,11 +303,13 @@ void sctp_endpoint_put(struct sctp_endpoint *ep)
/* Is this the endpoint we are looking for? */
struct sctp_endpoint *sctp_endpoint_is_match(struct sctp_endpoint *ep,
+ struct net *net,
const union sctp_addr *laddr)
{
struct sctp_endpoint *retval = NULL;
- if (htons(ep->base.bind_addr.port) == laddr->v4.sin_port) {
+ if ((htons(ep->base.bind_addr.port) == laddr->v4.sin_port) &&
+ net_eq(sock_net(ep->base.sk), net)) {
if (sctp_bind_addr_match(&ep->base.bind_addr, laddr,
sctp_sk(ep->base.sk)))
retval = ep;
@@ -343,7 +346,8 @@ static struct sctp_association *__sctp_endpoint_lookup_assoc(
rport = ntohs(paddr->v4.sin_port);
- hash = sctp_assoc_hashfn(ep->base.bind_addr.port, rport);
+ hash = sctp_assoc_hashfn(sock_net(ep->base.sk), ep->base.bind_addr.port,
+ rport);
head = &sctp_assoc_hashtable[hash];
read_lock(&head->lock);
sctp_for_each_hentry(epb, node, &head->chain) {
@@ -386,13 +390,14 @@ int sctp_endpoint_is_peeled_off(struct sctp_endpoint *ep,
{
struct sctp_sockaddr_entry *addr;
struct sctp_bind_addr *bp;
+ struct net *net = sock_net(ep->base.sk);
bp = &ep->base.bind_addr;
/* This function is called with the socket lock held,
* so the address_list can not change.
*/
list_for_each_entry(addr, &bp->address_list, list) {
- if (sctp_has_association(&addr->a, paddr))
+ if (sctp_has_association(net, &addr->a, paddr))
return 1;
}
@@ -409,6 +414,7 @@ static void sctp_endpoint_bh_rcv(struct work_struct *work)
base.inqueue.immediate);
struct sctp_association *asoc;
struct sock *sk;
+ struct net *net;
struct sctp_transport *transport;
struct sctp_chunk *chunk;
struct sctp_inq *inqueue;
@@ -423,6 +429,7 @@ static void sctp_endpoint_bh_rcv(struct work_struct *work)
asoc = NULL;
inqueue = &ep->base.inqueue;
sk = ep->base.sk;
+ net = sock_net(sk);
while (NULL != (chunk = sctp_inq_pop(inqueue))) {
subtype = SCTP_ST_CHUNK(chunk->chunk_hdr->type);
@@ -474,12 +481,12 @@ normal:
if (asoc && sctp_chunk_is_data(chunk))
asoc->peer.last_data_from = chunk->transport;
else
- SCTP_INC_STATS(SCTP_MIB_INCTRLCHUNKS);
+ SCTP_INC_STATS(sock_net(ep->base.sk), SCTP_MIB_INCTRLCHUNKS);
if (chunk->transport)
chunk->transport->last_time_heard = jiffies;
- error = sctp_do_sm(SCTP_EVENT_T_CHUNK, subtype, state,
+ error = sctp_do_sm(net, SCTP_EVENT_T_CHUNK, subtype, state,
ep, asoc, chunk, GFP_ATOMIC);
if (error && chunk)
diff --git a/net/sctp/input.c b/net/sctp/input.c
index e64d5210ed13..8bd3c279427e 100644
--- a/net/sctp/input.c
+++ b/net/sctp/input.c
@@ -66,12 +66,15 @@
/* Forward declarations for internal helpers. */
static int sctp_rcv_ootb(struct sk_buff *);
-static struct sctp_association *__sctp_rcv_lookup(struct sk_buff *skb,
- const union sctp_addr *laddr,
+static struct sctp_association *__sctp_rcv_lookup(struct net *net,
+ struct sk_buff *skb,
const union sctp_addr *paddr,
+ const union sctp_addr *laddr,
struct sctp_transport **transportp);
-static struct sctp_endpoint *__sctp_rcv_lookup_endpoint(const union sctp_addr *laddr);
+static struct sctp_endpoint *__sctp_rcv_lookup_endpoint(struct net *net,
+ const union sctp_addr *laddr);
static struct sctp_association *__sctp_lookup_association(
+ struct net *net,
const union sctp_addr *local,
const union sctp_addr *peer,
struct sctp_transport **pt);
@@ -80,7 +83,7 @@ static int sctp_add_backlog(struct sock *sk, struct sk_buff *skb);
/* Calculate the SCTP checksum of an SCTP packet. */
-static inline int sctp_rcv_checksum(struct sk_buff *skb)
+static inline int sctp_rcv_checksum(struct net *net, struct sk_buff *skb)
{
struct sctphdr *sh = sctp_hdr(skb);
__le32 cmp = sh->checksum;
@@ -96,7 +99,7 @@ static inline int sctp_rcv_checksum(struct sk_buff *skb)
if (val != cmp) {
/* CRC failure, dump it. */
- SCTP_INC_STATS_BH(SCTP_MIB_CHECKSUMERRORS);
+ SCTP_INC_STATS_BH(net, SCTP_MIB_CHECKSUMERRORS);
return -1;
}
return 0;
@@ -129,11 +132,12 @@ int sctp_rcv(struct sk_buff *skb)
union sctp_addr dest;
int family;
struct sctp_af *af;
+ struct net *net = dev_net(skb->dev);
if (skb->pkt_type!=PACKET_HOST)
goto discard_it;
- SCTP_INC_STATS_BH(SCTP_MIB_INSCTPPACKS);
+ SCTP_INC_STATS_BH(net, SCTP_MIB_INSCTPPACKS);
if (skb_linearize(skb))
goto discard_it;
@@ -145,7 +149,7 @@ int sctp_rcv(struct sk_buff *skb)
if (skb->len < sizeof(struct sctphdr))
goto discard_it;
if (!sctp_checksum_disable && !skb_csum_unnecessary(skb) &&
- sctp_rcv_checksum(skb) < 0)
+ sctp_rcv_checksum(net, skb) < 0)
goto discard_it;
skb_pull(skb, sizeof(struct sctphdr));
@@ -178,10 +182,10 @@ int sctp_rcv(struct sk_buff *skb)
!af->addr_valid(&dest, NULL, skb))
goto discard_it;
- asoc = __sctp_rcv_lookup(skb, &src, &dest, &transport);
+ asoc = __sctp_rcv_lookup(net, skb, &src, &dest, &transport);
if (!asoc)
- ep = __sctp_rcv_lookup_endpoint(&dest);
+ ep = __sctp_rcv_lookup_endpoint(net, &dest);
/* Retrieve the common input handling substructure. */
rcvr = asoc ? &asoc->base : &ep->base;
@@ -200,7 +204,7 @@ int sctp_rcv(struct sk_buff *skb)
sctp_endpoint_put(ep);
ep = NULL;
}
- sk = sctp_get_ctl_sock();
+ sk = net->sctp.ctl_sock;
ep = sctp_sk(sk)->ep;
sctp_endpoint_hold(ep);
rcvr = &ep->base;
@@ -216,7 +220,7 @@ int sctp_rcv(struct sk_buff *skb)
*/
if (!asoc) {
if (sctp_rcv_ootb(skb)) {
- SCTP_INC_STATS_BH(SCTP_MIB_OUTOFBLUES);
+ SCTP_INC_STATS_BH(net, SCTP_MIB_OUTOFBLUES);
goto discard_release;
}
}
@@ -272,9 +276,9 @@ int sctp_rcv(struct sk_buff *skb)
skb = NULL; /* sctp_chunk_free already freed the skb */
goto discard_release;
}
- SCTP_INC_STATS_BH(SCTP_MIB_IN_PKT_BACKLOG);
+ SCTP_INC_STATS_BH(net, SCTP_MIB_IN_PKT_BACKLOG);
} else {
- SCTP_INC_STATS_BH(SCTP_MIB_IN_PKT_SOFTIRQ);
+ SCTP_INC_STATS_BH(net, SCTP_MIB_IN_PKT_SOFTIRQ);
sctp_inq_push(&chunk->rcvr->inqueue, chunk);
}
@@ -289,7 +293,7 @@ int sctp_rcv(struct sk_buff *skb)
return 0;
discard_it:
- SCTP_INC_STATS_BH(SCTP_MIB_IN_PKT_DISCARDS);
+ SCTP_INC_STATS_BH(net, SCTP_MIB_IN_PKT_DISCARDS);
kfree_skb(skb);
return 0;
@@ -462,11 +466,13 @@ void sctp_icmp_proto_unreachable(struct sock *sk,
}
} else {
+ struct net *net = sock_net(sk);
+
if (timer_pending(&t->proto_unreach_timer) &&
del_timer(&t->proto_unreach_timer))
sctp_association_put(asoc);
- sctp_do_sm(SCTP_EVENT_T_OTHER,
+ sctp_do_sm(net, SCTP_EVENT_T_OTHER,
SCTP_ST_OTHER(SCTP_EVENT_ICMP_PROTO_UNREACH),
asoc->state, asoc->ep, asoc, t,
GFP_ATOMIC);
@@ -474,7 +480,7 @@ void sctp_icmp_proto_unreachable(struct sock *sk,
}
/* Common lookup code for icmp/icmpv6 error handler. */
-struct sock *sctp_err_lookup(int family, struct sk_buff *skb,
+struct sock *sctp_err_lookup(struct net *net, int family, struct sk_buff *skb,
struct sctphdr *sctphdr,
struct sctp_association **app,
struct sctp_transport **tpp)
@@ -503,7 +509,7 @@ struct sock *sctp_err_lookup(int family, struct sk_buff *skb,
/* Look for an association that matches the incoming ICMP error
* packet.
*/
- asoc = __sctp_lookup_association(&saddr, &daddr, &transport);
+ asoc = __sctp_lookup_association(net, &saddr, &daddr, &transport);
if (!asoc)
return NULL;
@@ -539,7 +545,7 @@ struct sock *sctp_err_lookup(int family, struct sk_buff *skb,
* servers this needs to be solved differently.
*/
if (sock_owned_by_user(sk))
- NET_INC_STATS_BH(&init_net, LINUX_MIB_LOCKDROPPEDICMPS);
+ NET_INC_STATS_BH(net, LINUX_MIB_LOCKDROPPEDICMPS);
*app = asoc;
*tpp = transport;
@@ -586,9 +592,10 @@ void sctp_v4_err(struct sk_buff *skb, __u32 info)
struct inet_sock *inet;
sk_buff_data_t saveip, savesctp;
int err;
+ struct net *net = dev_net(skb->dev);
if (skb->len < ihlen + 8) {
- ICMP_INC_STATS_BH(&init_net, ICMP_MIB_INERRORS);
+ ICMP_INC_STATS_BH(net, ICMP_MIB_INERRORS);
return;
}
@@ -597,12 +604,12 @@ void sctp_v4_err(struct sk_buff *skb, __u32 info)
savesctp = skb->transport_header;
skb_reset_network_header(skb);
skb_set_transport_header(skb, ihlen);
- sk = sctp_err_lookup(AF_INET, skb, sctp_hdr(skb), &asoc, &transport);
+ sk = sctp_err_lookup(net, AF_INET, skb, sctp_hdr(skb), &asoc, &transport);
/* Put back, the original values. */
skb->network_header = saveip;
skb->transport_header = savesctp;
if (!sk) {
- ICMP_INC_STATS_BH(&init_net, ICMP_MIB_INERRORS);
+ ICMP_INC_STATS_BH(net, ICMP_MIB_INERRORS);
return;
}
/* Warning: The sock lock is held. Remember to call
@@ -723,12 +730,13 @@ discard:
/* Insert endpoint into the hash table. */
static void __sctp_hash_endpoint(struct sctp_endpoint *ep)
{
+ struct net *net = sock_net(ep->base.sk);
struct sctp_ep_common *epb;
struct sctp_hashbucket *head;
epb = &ep->base;
- epb->hashent = sctp_ep_hashfn(epb->bind_addr.port);
+ epb->hashent = sctp_ep_hashfn(net, epb->bind_addr.port);
head = &sctp_ep_hashtable[epb->hashent];
sctp_write_lock(&head->lock);
@@ -747,12 +755,13 @@ void sctp_hash_endpoint(struct sctp_endpoint *ep)
/* Remove endpoint from the hash table. */
static void __sctp_unhash_endpoint(struct sctp_endpoint *ep)
{
+ struct net *net = sock_net(ep->base.sk);
struct sctp_hashbucket *head;
struct sctp_ep_common *epb;
epb = &ep->base;
- epb->hashent = sctp_ep_hashfn(epb->bind_addr.port);
+ epb->hashent = sctp_ep_hashfn(net, epb->bind_addr.port);
head = &sctp_ep_hashtable[epb->hashent];
@@ -770,7 +779,8 @@ void sctp_unhash_endpoint(struct sctp_endpoint *ep)
}
/* Look up an endpoint. */
-static struct sctp_endpoint *__sctp_rcv_lookup_endpoint(const union sctp_addr *laddr)
+static struct sctp_endpoint *__sctp_rcv_lookup_endpoint(struct net *net,
+ const union sctp_addr *laddr)
{
struct sctp_hashbucket *head;
struct sctp_ep_common *epb;
@@ -778,16 +788,16 @@ static struct sctp_endpoint *__sctp_rcv_lookup_endpoint(const union sctp_addr *l
struct hlist_node *node;
int hash;
- hash = sctp_ep_hashfn(ntohs(laddr->v4.sin_port));
+ hash = sctp_ep_hashfn(net, ntohs(laddr->v4.sin_port));
head = &sctp_ep_hashtable[hash];
read_lock(&head->lock);
sctp_for_each_hentry(epb, node, &head->chain) {
ep = sctp_ep(epb);
- if (sctp_endpoint_is_match(ep, laddr))
+ if (sctp_endpoint_is_match(ep, net, laddr))
goto hit;
}
- ep = sctp_sk((sctp_get_ctl_sock()))->ep;
+ ep = sctp_sk(net->sctp.ctl_sock)->ep;
hit:
sctp_endpoint_hold(ep);
@@ -798,13 +808,15 @@ hit:
/* Insert association into the hash table. */
static void __sctp_hash_established(struct sctp_association *asoc)
{
+ struct net *net = sock_net(asoc->base.sk);
struct sctp_ep_common *epb;
struct sctp_hashbucket *head;
epb = &asoc->base;
/* Calculate which chain this entry will belong to. */
- epb->hashent = sctp_assoc_hashfn(epb->bind_addr.port, asoc->peer.port);
+ epb->hashent = sctp_assoc_hashfn(net, epb->bind_addr.port,
+ asoc->peer.port);
head = &sctp_assoc_hashtable[epb->hashent];
@@ -827,12 +839,13 @@ void sctp_hash_established(struct sctp_association *asoc)
/* Remove association from the hash table. */
static void __sctp_unhash_established(struct sctp_association *asoc)
{
+ struct net *net = sock_net(asoc->base.sk);
struct sctp_hashbucket *head;
struct sctp_ep_common *epb;
epb = &asoc->base;
- epb->hashent = sctp_assoc_hashfn(epb->bind_addr.port,
+ epb->hashent = sctp_assoc_hashfn(net, epb->bind_addr.port,
asoc->peer.port);
head = &sctp_assoc_hashtable[epb->hashent];
@@ -855,6 +868,7 @@ void sctp_unhash_established(struct sctp_association *asoc)
/* Look up an association. */
static struct sctp_association *__sctp_lookup_association(
+ struct net *net,
const union sctp_addr *local,
const union sctp_addr *peer,
struct sctp_transport **pt)
@@ -869,12 +883,13 @@ static struct sctp_association *__sctp_lookup_association(
/* Optimize here for direct hit, only listening connections can
* have wildcards anyways.
*/
- hash = sctp_assoc_hashfn(ntohs(local->v4.sin_port), ntohs(peer->v4.sin_port));
+ hash = sctp_assoc_hashfn(net, ntohs(local->v4.sin_port),
+ ntohs(peer->v4.sin_port));
head = &sctp_assoc_hashtable[hash];
read_lock(&head->lock);
sctp_for_each_hentry(epb, node, &head->chain) {
asoc = sctp_assoc(epb);
- transport = sctp_assoc_is_match(asoc, local, peer);
+ transport = sctp_assoc_is_match(asoc, net, local, peer);
if (transport)
goto hit;
}
@@ -892,27 +907,29 @@ hit:
/* Look up an association. BH-safe. */
SCTP_STATIC
-struct sctp_association *sctp_lookup_association(const union sctp_addr *laddr,
+struct sctp_association *sctp_lookup_association(struct net *net,
+ const union sctp_addr *laddr,
const union sctp_addr *paddr,
struct sctp_transport **transportp)
{
struct sctp_association *asoc;
sctp_local_bh_disable();
- asoc = __sctp_lookup_association(laddr, paddr, transportp);
+ asoc = __sctp_lookup_association(net, laddr, paddr, transportp);
sctp_local_bh_enable();
return asoc;
}
/* Is there an association matching the given local and peer addresses? */
-int sctp_has_association(const union sctp_addr *laddr,
+int sctp_has_association(struct net *net,
+ const union sctp_addr *laddr,
const union sctp_addr *paddr)
{
struct sctp_association *asoc;
struct sctp_transport *transport;
- if ((asoc = sctp_lookup_association(laddr, paddr, &transport))) {
+ if ((asoc = sctp_lookup_association(net, laddr, paddr, &transport))) {
sctp_association_put(asoc);
return 1;
}
@@ -938,7 +955,8 @@ int sctp_has_association(const union sctp_addr *laddr,
* in certain circumstances.
*
*/
-static struct sctp_association *__sctp_rcv_init_lookup(struct sk_buff *skb,
+static struct sctp_association *__sctp_rcv_init_lookup(struct net *net,
+ struct sk_buff *skb,
const union sctp_addr *laddr, struct sctp_transport **transportp)
{
struct sctp_association *asoc;
@@ -978,7 +996,7 @@ static struct sctp_association *__sctp_rcv_init_lookup(struct sk_buff *skb,
af->from_addr_param(paddr, params.addr, sh->source, 0);
- asoc = __sctp_lookup_association(laddr, paddr, &transport);
+ asoc = __sctp_lookup_association(net, laddr, paddr, &transport);
if (asoc)
return asoc;
}
@@ -1001,6 +1019,7 @@ static struct sctp_association *__sctp_rcv_init_lookup(struct sk_buff *skb,
* subsequent ASCONF Chunks. If found, proceed to rule D4.
*/
static struct sctp_association *__sctp_rcv_asconf_lookup(
+ struct net *net,
sctp_chunkhdr_t *ch,
const union sctp_addr *laddr,
__be16 peer_port,
@@ -1020,7 +1039,7 @@ static struct sctp_association *__sctp_rcv_asconf_lookup(
af->from_addr_param(&paddr, param, peer_port, 0);
- return __sctp_lookup_association(laddr, &paddr, transportp);
+ return __sctp_lookup_association(net, laddr, &paddr, transportp);
}
@@ -1033,7 +1052,8 @@ static struct sctp_association *__sctp_rcv_asconf_lookup(
* This means that any chunks that can help us identify the association need
* to be looked at to find this association.
*/
-static struct sctp_association *__sctp_rcv_walk_lookup(struct sk_buff *skb,
+static struct sctp_association *__sctp_rcv_walk_lookup(struct net *net,
+ struct sk_buff *skb,
const union sctp_addr *laddr,
struct sctp_transport **transportp)
{
@@ -1074,8 +1094,9 @@ static struct sctp_association *__sctp_rcv_walk_lookup(struct sk_buff *skb,
break;
case SCTP_CID_ASCONF:
- if (have_auth || sctp_addip_noauth)
- asoc = __sctp_rcv_asconf_lookup(ch, laddr,
+ if (have_auth || net->sctp.addip_noauth)
+ asoc = __sctp_rcv_asconf_lookup(
+ net, ch, laddr,
sctp_hdr(skb)->source,
transportp);
default:
@@ -1098,7 +1119,8 @@ static struct sctp_association *__sctp_rcv_walk_lookup(struct sk_buff *skb,
* include looking inside of INIT/INIT-ACK chunks or after the AUTH
* chunks.
*/
-static struct sctp_association *__sctp_rcv_lookup_harder(struct sk_buff *skb,
+static struct sctp_association *__sctp_rcv_lookup_harder(struct net *net,
+ struct sk_buff *skb,
const union sctp_addr *laddr,
struct sctp_transport **transportp)
{
@@ -1118,11 +1140,11 @@ static struct sctp_association *__sctp_rcv_lookup_harder(struct sk_buff *skb,
switch (ch->type) {
case SCTP_CID_INIT:
case SCTP_CID_INIT_ACK:
- return __sctp_rcv_init_lookup(skb, laddr, transportp);
+ return __sctp_rcv_init_lookup(net, skb, laddr, transportp);
break;
default:
- return __sctp_rcv_walk_lookup(skb, laddr, transportp);
+ return __sctp_rcv_walk_lookup(net, skb, laddr, transportp);
break;
}
@@ -1131,21 +1153,22 @@ static struct sctp_association *__sctp_rcv_lookup_harder(struct sk_buff *skb,
}
/* Lookup an association for an inbound skb. */
-static struct sctp_association *__sctp_rcv_lookup(struct sk_buff *skb,
+static struct sctp_association *__sctp_rcv_lookup(struct net *net,
+ struct sk_buff *skb,
const union sctp_addr *paddr,
const union sctp_addr *laddr,
struct sctp_transport **transportp)
{
struct sctp_association *asoc;
- asoc = __sctp_lookup_association(laddr, paddr, transportp);
+ asoc = __sctp_lookup_association(net, laddr, paddr, transportp);
/* Further lookup for INIT/INIT-ACK packets.
* SCTP Implementors Guide, 2.18 Handling of address
* parameters within the INIT or INIT-ACK.
*/
if (!asoc)
- asoc = __sctp_rcv_lookup_harder(skb, laddr, transportp);
+ asoc = __sctp_rcv_lookup_harder(net, skb, laddr, transportp);
return asoc;
}
diff --git a/net/sctp/ipv6.c b/net/sctp/ipv6.c
index ed7139ea7978..ea14cb445295 100644
--- a/net/sctp/ipv6.c
+++ b/net/sctp/ipv6.c
@@ -99,6 +99,7 @@ static int sctp_inet6addr_event(struct notifier_block *this, unsigned long ev,
struct inet6_ifaddr *ifa = (struct inet6_ifaddr *)ptr;
struct sctp_sockaddr_entry *addr = NULL;
struct sctp_sockaddr_entry *temp;
+ struct net *net = dev_net(ifa->idev->dev);
int found = 0;
switch (ev) {
@@ -110,27 +111,27 @@ static int sctp_inet6addr_event(struct notifier_block *this, unsigned long ev,
addr->a.v6.sin6_addr = ifa->addr;
addr->a.v6.sin6_scope_id = ifa->idev->dev->ifindex;
addr->valid = 1;
- spin_lock_bh(&sctp_local_addr_lock);
- list_add_tail_rcu(&addr->list, &sctp_local_addr_list);
- sctp_addr_wq_mgmt(addr, SCTP_ADDR_NEW);
- spin_unlock_bh(&sctp_local_addr_lock);
+ spin_lock_bh(&net->sctp.local_addr_lock);
+ list_add_tail_rcu(&addr->list, &net->sctp.local_addr_list);
+ sctp_addr_wq_mgmt(net, addr, SCTP_ADDR_NEW);
+ spin_unlock_bh(&net->sctp.local_addr_lock);
}
break;
case NETDEV_DOWN:
- spin_lock_bh(&sctp_local_addr_lock);
+ spin_lock_bh(&net->sctp.local_addr_lock);
list_for_each_entry_safe(addr, temp,
- &sctp_local_addr_list, list) {
+ &net->sctp.local_addr_list, list) {
if (addr->a.sa.sa_family == AF_INET6 &&
ipv6_addr_equal(&addr->a.v6.sin6_addr,
&ifa->addr)) {
- sctp_addr_wq_mgmt(addr, SCTP_ADDR_DEL);
+ sctp_addr_wq_mgmt(net, addr, SCTP_ADDR_DEL);
found = 1;
addr->valid = 0;
list_del_rcu(&addr->list);
break;
}
}
- spin_unlock_bh(&sctp_local_addr_lock);
+ spin_unlock_bh(&net->sctp.local_addr_lock);
if (found)
kfree_rcu(addr, rcu);
break;
@@ -154,6 +155,7 @@ SCTP_STATIC void sctp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
struct ipv6_pinfo *np;
sk_buff_data_t saveip, savesctp;
int err;
+ struct net *net = dev_net(skb->dev);
idev = in6_dev_get(skb->dev);
@@ -162,12 +164,12 @@ SCTP_STATIC void sctp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
savesctp = skb->transport_header;
skb_reset_network_header(skb);
skb_set_transport_header(skb, offset);
- sk = sctp_err_lookup(AF_INET6, skb, sctp_hdr(skb), &asoc, &transport);
+ sk = sctp_err_lookup(net, AF_INET6, skb, sctp_hdr(skb), &asoc, &transport);
/* Put back, the original pointers. */
skb->network_header = saveip;
skb->transport_header = savesctp;
if (!sk) {
- ICMP6_INC_STATS_BH(dev_net(skb->dev), idev, ICMP6_MIB_INERRORS);
+ ICMP6_INC_STATS_BH(net, idev, ICMP6_MIB_INERRORS);
goto out;
}
@@ -241,7 +243,7 @@ static int sctp_v6_xmit(struct sk_buff *skb, struct sctp_transport *transport)
__func__, skb, skb->len,
&fl6.saddr, &fl6.daddr);
- SCTP_INC_STATS(SCTP_MIB_OUTSCTPPACKS);
+ SCTP_INC_STATS(sock_net(sk), SCTP_MIB_OUTSCTPPACKS);
if (!(transport->param_flags & SPP_PMTUD_ENABLE))
skb->local_df = 1;
@@ -580,7 +582,7 @@ static int sctp_v6_available(union sctp_addr *addr, struct sctp_sock *sp)
if (!(type & IPV6_ADDR_UNICAST))
return 0;
- return ipv6_chk_addr(&init_net, in6, NULL, 0);
+ return ipv6_chk_addr(sock_net(&sp->inet.sk), in6, NULL, 0);
}
/* This function checks if the address is a valid address to be used for
@@ -857,14 +859,14 @@ static int sctp_inet6_bind_verify(struct sctp_sock *opt, union sctp_addr *addr)
struct net_device *dev;
if (type & IPV6_ADDR_LINKLOCAL) {
+ struct net *net;
if (!addr->v6.sin6_scope_id)
return 0;
+ net = sock_net(&opt->inet.sk);
rcu_read_lock();
- dev = dev_get_by_index_rcu(&init_net,
- addr->v6.sin6_scope_id);
+ dev = dev_get_by_index_rcu(net, addr->v6.sin6_scope_id);
if (!dev ||
- !ipv6_chk_addr(&init_net, &addr->v6.sin6_addr,
- dev, 0)) {
+ !ipv6_chk_addr(net, &addr->v6.sin6_addr, dev, 0)) {
rcu_read_unlock();
return 0;
}
@@ -897,7 +899,7 @@ static int sctp_inet6_send_verify(struct sctp_sock *opt, union sctp_addr *addr)
if (!addr->v6.sin6_scope_id)
return 0;
rcu_read_lock();
- dev = dev_get_by_index_rcu(&init_net,
+ dev = dev_get_by_index_rcu(sock_net(&opt->inet.sk),
addr->v6.sin6_scope_id);
rcu_read_unlock();
if (!dev)
diff --git a/net/sctp/objcnt.c b/net/sctp/objcnt.c
index 8ef8e7d9eb61..fe012c44f8df 100644
--- a/net/sctp/objcnt.c
+++ b/net/sctp/objcnt.c
@@ -129,20 +129,20 @@ static const struct file_operations sctp_objcnt_ops = {
};
/* Initialize the objcount in the proc filesystem. */
-void sctp_dbg_objcnt_init(void)
+void sctp_dbg_objcnt_init(struct net *net)
{
struct proc_dir_entry *ent;
ent = proc_create("sctp_dbg_objcnt", 0,
- proc_net_sctp, &sctp_objcnt_ops);
+ net->sctp.proc_net_sctp, &sctp_objcnt_ops);
if (!ent)
pr_warn("sctp_dbg_objcnt: Unable to create /proc entry.\n");
}
/* Cleanup the objcount entry in the proc filesystem. */
-void sctp_dbg_objcnt_exit(void)
+void sctp_dbg_objcnt_exit(struct net *net)
{
- remove_proc_entry("sctp_dbg_objcnt", proc_net_sctp);
+ remove_proc_entry("sctp_dbg_objcnt", net->sctp.proc_net_sctp);
}
diff --git a/net/sctp/output.c b/net/sctp/output.c
index 838e18b4d7ea..4e90188bf489 100644
--- a/net/sctp/output.c
+++ b/net/sctp/output.c
@@ -364,6 +364,25 @@ finish:
return retval;
}
+static void sctp_packet_release_owner(struct sk_buff *skb)
+{
+ sk_free(skb->sk);
+}
+
+static void sctp_packet_set_owner_w(struct sk_buff *skb, struct sock *sk)
+{
+ skb_orphan(skb);
+ skb->sk = sk;
+ skb->destructor = sctp_packet_release_owner;
+
+ /*
+ * The data chunks have already been accounted for in sctp_sendmsg(),
+ * therefore only reserve a single byte to keep socket around until
+ * the packet has been transmitted.
+ */
+ atomic_inc(&sk->sk_wmem_alloc);
+}
+
/* All packets are sent to the network through this function from
* sctp_outq_tail().
*
@@ -405,7 +424,7 @@ int sctp_packet_transmit(struct sctp_packet *packet)
/* Set the owning socket so that we know where to get the
* destination IP address.
*/
- skb_set_owner_w(nskb, sk);
+ sctp_packet_set_owner_w(nskb, sk);
if (!sctp_transport_dst_check(tp)) {
sctp_transport_route(tp, NULL, sctp_sk(sk));
@@ -597,7 +616,7 @@ out:
return err;
no_route:
kfree_skb(nskb);
- IP_INC_STATS_BH(&init_net, IPSTATS_MIB_OUTNOROUTES);
+ IP_INC_STATS_BH(sock_net(asoc->base.sk), IPSTATS_MIB_OUTNOROUTES);
/* FIXME: Returning the 'err' will effect all the associations
* associated with a socket, although only one of the paths of the
diff --git a/net/sctp/outqueue.c b/net/sctp/outqueue.c
index e7aa177c9522..1b4a7f8ec3fd 100644
--- a/net/sctp/outqueue.c
+++ b/net/sctp/outqueue.c
@@ -63,6 +63,7 @@ static int sctp_acked(struct sctp_sackhdr *sack, __u32 tsn);
static void sctp_check_transmitted(struct sctp_outq *q,
struct list_head *transmitted_queue,
struct sctp_transport *transport,
+ union sctp_addr *saddr,
struct sctp_sackhdr *sack,
__u32 *highest_new_tsn);
@@ -299,6 +300,7 @@ void sctp_outq_free(struct sctp_outq *q)
/* Put a new chunk in an sctp_outq. */
int sctp_outq_tail(struct sctp_outq *q, struct sctp_chunk *chunk)
{
+ struct net *net = sock_net(q->asoc->base.sk);
int error = 0;
SCTP_DEBUG_PRINTK("sctp_outq_tail(%p, %p[%s])\n",
@@ -337,15 +339,15 @@ int sctp_outq_tail(struct sctp_outq *q, struct sctp_chunk *chunk)
sctp_outq_tail_data(q, chunk);
if (chunk->chunk_hdr->flags & SCTP_DATA_UNORDERED)
- SCTP_INC_STATS(SCTP_MIB_OUTUNORDERCHUNKS);
+ SCTP_INC_STATS(net, SCTP_MIB_OUTUNORDERCHUNKS);
else
- SCTP_INC_STATS(SCTP_MIB_OUTORDERCHUNKS);
+ SCTP_INC_STATS(net, SCTP_MIB_OUTORDERCHUNKS);
q->empty = 0;
break;
}
} else {
list_add_tail(&chunk->list, &q->control_chunk_list);
- SCTP_INC_STATS(SCTP_MIB_OUTCTRLCHUNKS);
+ SCTP_INC_STATS(net, SCTP_MIB_OUTCTRLCHUNKS);
}
if (error < 0)
@@ -478,11 +480,12 @@ void sctp_retransmit_mark(struct sctp_outq *q,
void sctp_retransmit(struct sctp_outq *q, struct sctp_transport *transport,
sctp_retransmit_reason_t reason)
{
+ struct net *net = sock_net(q->asoc->base.sk);
int error = 0;
switch(reason) {
case SCTP_RTXR_T3_RTX:
- SCTP_INC_STATS(SCTP_MIB_T3_RETRANSMITS);
+ SCTP_INC_STATS(net, SCTP_MIB_T3_RETRANSMITS);
sctp_transport_lower_cwnd(transport, SCTP_LOWER_CWND_T3_RTX);
/* Update the retran path if the T3-rtx timer has expired for
* the current retran path.
@@ -493,15 +496,15 @@ void sctp_retransmit(struct sctp_outq *q, struct sctp_transport *transport,
transport->asoc->unack_data;
break;
case SCTP_RTXR_FAST_RTX:
- SCTP_INC_STATS(SCTP_MIB_FAST_RETRANSMITS);
+ SCTP_INC_STATS(net, SCTP_MIB_FAST_RETRANSMITS);
sctp_transport_lower_cwnd(transport, SCTP_LOWER_CWND_FAST_RTX);
q->fast_rtx = 1;
break;
case SCTP_RTXR_PMTUD:
- SCTP_INC_STATS(SCTP_MIB_PMTUD_RETRANSMITS);
+ SCTP_INC_STATS(net, SCTP_MIB_PMTUD_RETRANSMITS);
break;
case SCTP_RTXR_T1_RTX:
- SCTP_INC_STATS(SCTP_MIB_T1_RETRANSMITS);
+ SCTP_INC_STATS(net, SCTP_MIB_T1_RETRANSMITS);
transport->asoc->init_retries++;
break;
default:
@@ -589,9 +592,8 @@ static int sctp_outq_flush_rtx(struct sctp_outq *q, struct sctp_packet *pkt,
* next chunk.
*/
if (chunk->tsn_gap_acked) {
- list_del(&chunk->transmitted_list);
- list_add_tail(&chunk->transmitted_list,
- &transport->transmitted);
+ list_move_tail(&chunk->transmitted_list,
+ &transport->transmitted);
continue;
}
@@ -655,9 +657,8 @@ redo:
/* The append was successful, so add this chunk to
* the transmitted list.
*/
- list_del(&chunk->transmitted_list);
- list_add_tail(&chunk->transmitted_list,
- &transport->transmitted);
+ list_move_tail(&chunk->transmitted_list,
+ &transport->transmitted);
/* Mark the chunk as ineligible for fast retransmit
* after it is retransmitted.
@@ -1139,9 +1140,10 @@ static void sctp_sack_update_unack_data(struct sctp_association *assoc,
* Process the SACK against the outqueue. Mostly, this just frees
* things off the transmitted queue.
*/
-int sctp_outq_sack(struct sctp_outq *q, struct sctp_sackhdr *sack)
+int sctp_outq_sack(struct sctp_outq *q, struct sctp_chunk *chunk)
{
struct sctp_association *asoc = q->asoc;
+ struct sctp_sackhdr *sack = chunk->subh.sack_hdr;
struct sctp_transport *transport;
struct sctp_chunk *tchunk = NULL;
struct list_head *lchunk, *transport_list, *temp;
@@ -1210,7 +1212,7 @@ int sctp_outq_sack(struct sctp_outq *q, struct sctp_sackhdr *sack)
/* Run through the retransmit queue. Credit bytes received
* and free those chunks that we can.
*/
- sctp_check_transmitted(q, &q->retransmit, NULL, sack, &highest_new_tsn);
+ sctp_check_transmitted(q, &q->retransmit, NULL, NULL, sack, &highest_new_tsn);
/* Run through the transmitted queue.
* Credit bytes received and free those chunks which we can.
@@ -1219,7 +1221,8 @@ int sctp_outq_sack(struct sctp_outq *q, struct sctp_sackhdr *sack)
*/
list_for_each_entry(transport, transport_list, transports) {
sctp_check_transmitted(q, &transport->transmitted,
- transport, sack, &highest_new_tsn);
+ transport, &chunk->source, sack,
+ &highest_new_tsn);
/*
* SFR-CACC algorithm:
* C) Let count_of_newacks be the number of
@@ -1326,6 +1329,7 @@ int sctp_outq_is_empty(const struct sctp_outq *q)
static void sctp_check_transmitted(struct sctp_outq *q,
struct list_head *transmitted_queue,
struct sctp_transport *transport,
+ union sctp_addr *saddr,
struct sctp_sackhdr *sack,
__u32 *highest_new_tsn_in_sack)
{
@@ -1633,8 +1637,9 @@ static void sctp_check_transmitted(struct sctp_outq *q,
/* Mark the destination transport address as
* active if it is not so marked.
*/
- if ((transport->state == SCTP_INACTIVE) ||
- (transport->state == SCTP_UNCONFIRMED)) {
+ if ((transport->state == SCTP_INACTIVE ||
+ transport->state == SCTP_UNCONFIRMED) &&
+ sctp_cmp_addr_exact(&transport->ipaddr, saddr)) {
sctp_assoc_control_transport(
transport->asoc,
transport,
@@ -1914,6 +1919,6 @@ static void sctp_generate_fwdtsn(struct sctp_outq *q, __u32 ctsn)
if (ftsn_chunk) {
list_add_tail(&ftsn_chunk->list, &q->control_chunk_list);
- SCTP_INC_STATS(SCTP_MIB_OUTCTRLCHUNKS);
+ SCTP_INC_STATS(sock_net(asoc->base.sk), SCTP_MIB_OUTCTRLCHUNKS);
}
}
diff --git a/net/sctp/primitive.c b/net/sctp/primitive.c
index 534c7eae9d15..794bb14decde 100644
--- a/net/sctp/primitive.c
+++ b/net/sctp/primitive.c
@@ -57,7 +57,7 @@
#define DECLARE_PRIMITIVE(name) \
/* This is called in the code as sctp_primitive_ ## name. */ \
-int sctp_primitive_ ## name(struct sctp_association *asoc, \
+int sctp_primitive_ ## name(struct net *net, struct sctp_association *asoc, \
void *arg) { \
int error = 0; \
sctp_event_t event_type; sctp_subtype_t subtype; \
@@ -69,7 +69,7 @@ int sctp_primitive_ ## name(struct sctp_association *asoc, \
state = asoc ? asoc->state : SCTP_STATE_CLOSED; \
ep = asoc ? asoc->ep : NULL; \
\
- error = sctp_do_sm(event_type, subtype, state, ep, asoc, \
+ error = sctp_do_sm(net, event_type, subtype, state, ep, asoc, \
arg, GFP_KERNEL); \
return error; \
}
diff --git a/net/sctp/proc.c b/net/sctp/proc.c
index 1e2eee88c3ea..c3bea269faf4 100644
--- a/net/sctp/proc.c
+++ b/net/sctp/proc.c
@@ -80,11 +80,12 @@ static const struct snmp_mib sctp_snmp_list[] = {
/* Display sctp snmp mib statistics(/proc/net/sctp/snmp). */
static int sctp_snmp_seq_show(struct seq_file *seq, void *v)
{
+ struct net *net = seq->private;
int i;
for (i = 0; sctp_snmp_list[i].name != NULL; i++)
seq_printf(seq, "%-32s\t%ld\n", sctp_snmp_list[i].name,
- snmp_fold_field((void __percpu **)sctp_statistics,
+ snmp_fold_field((void __percpu **)net->sctp.sctp_statistics,
sctp_snmp_list[i].entry));
return 0;
@@ -93,7 +94,7 @@ static int sctp_snmp_seq_show(struct seq_file *seq, void *v)
/* Initialize the seq file operations for 'snmp' object. */
static int sctp_snmp_seq_open(struct inode *inode, struct file *file)
{
- return single_open(file, sctp_snmp_seq_show, NULL);
+ return single_open_net(inode, file, sctp_snmp_seq_show);
}
static const struct file_operations sctp_snmp_seq_fops = {
@@ -105,11 +106,12 @@ static const struct file_operations sctp_snmp_seq_fops = {
};
/* Set up the proc fs entry for 'snmp' object. */
-int __init sctp_snmp_proc_init(void)
+int __net_init sctp_snmp_proc_init(struct net *net)
{
struct proc_dir_entry *p;
- p = proc_create("snmp", S_IRUGO, proc_net_sctp, &sctp_snmp_seq_fops);
+ p = proc_create("snmp", S_IRUGO, net->sctp.proc_net_sctp,
+ &sctp_snmp_seq_fops);
if (!p)
return -ENOMEM;
@@ -117,9 +119,9 @@ int __init sctp_snmp_proc_init(void)
}
/* Cleanup the proc fs entry for 'snmp' object. */
-void sctp_snmp_proc_exit(void)
+void sctp_snmp_proc_exit(struct net *net)
{
- remove_proc_entry("snmp", proc_net_sctp);
+ remove_proc_entry("snmp", net->sctp.proc_net_sctp);
}
/* Dump local addresses of an association/endpoint. */
@@ -213,10 +215,13 @@ static int sctp_eps_seq_show(struct seq_file *seq, void *v)
sctp_for_each_hentry(epb, node, &head->chain) {
ep = sctp_ep(epb);
sk = epb->sk;
+ if (!net_eq(sock_net(sk), seq_file_net(seq)))
+ continue;
seq_printf(seq, "%8pK %8pK %-3d %-3d %-4d %-5d %5d %5lu ", ep, sk,
sctp_sk(sk)->type, sk->sk_state, hash,
epb->bind_addr.port,
- sock_i_uid(sk), sock_i_ino(sk));
+ from_kuid_munged(seq_user_ns(seq), sock_i_uid(sk)),
+ sock_i_ino(sk));
sctp_seq_dump_local_addrs(seq, epb);
seq_printf(seq, "\n");
@@ -238,7 +243,8 @@ static const struct seq_operations sctp_eps_ops = {
/* Initialize the seq file operations for 'eps' object. */
static int sctp_eps_seq_open(struct inode *inode, struct file *file)
{
- return seq_open(file, &sctp_eps_ops);
+ return seq_open_net(inode, file, &sctp_eps_ops,
+ sizeof(struct seq_net_private));
}
static const struct file_operations sctp_eps_seq_fops = {
@@ -249,11 +255,12 @@ static const struct file_operations sctp_eps_seq_fops = {
};
/* Set up the proc fs entry for 'eps' object. */
-int __init sctp_eps_proc_init(void)
+int __net_init sctp_eps_proc_init(struct net *net)
{
struct proc_dir_entry *p;
- p = proc_create("eps", S_IRUGO, proc_net_sctp, &sctp_eps_seq_fops);
+ p = proc_create("eps", S_IRUGO, net->sctp.proc_net_sctp,
+ &sctp_eps_seq_fops);
if (!p)
return -ENOMEM;
@@ -261,9 +268,9 @@ int __init sctp_eps_proc_init(void)
}
/* Cleanup the proc fs entry for 'eps' object. */
-void sctp_eps_proc_exit(void)
+void sctp_eps_proc_exit(struct net *net)
{
- remove_proc_entry("eps", proc_net_sctp);
+ remove_proc_entry("eps", net->sctp.proc_net_sctp);
}
@@ -316,6 +323,8 @@ static int sctp_assocs_seq_show(struct seq_file *seq, void *v)
sctp_for_each_hentry(epb, node, &head->chain) {
assoc = sctp_assoc(epb);
sk = epb->sk;
+ if (!net_eq(sock_net(sk), seq_file_net(seq)))
+ continue;
seq_printf(seq,
"%8pK %8pK %-3d %-3d %-2d %-4d "
"%4d %8d %8d %7d %5lu %-5d %5d ",
@@ -324,7 +333,8 @@ static int sctp_assocs_seq_show(struct seq_file *seq, void *v)
assoc->assoc_id,
assoc->sndbuf_used,
atomic_read(&assoc->rmem_alloc),
- sock_i_uid(sk), sock_i_ino(sk),
+ from_kuid_munged(seq_user_ns(seq), sock_i_uid(sk)),
+ sock_i_ino(sk),
epb->bind_addr.port,
assoc->peer.port);
seq_printf(seq, " ");
@@ -354,7 +364,8 @@ static const struct seq_operations sctp_assoc_ops = {
/* Initialize the seq file operations for 'assocs' object. */
static int sctp_assocs_seq_open(struct inode *inode, struct file *file)
{
- return seq_open(file, &sctp_assoc_ops);
+ return seq_open_net(inode, file, &sctp_assoc_ops,
+ sizeof(struct seq_net_private));
}
static const struct file_operations sctp_assocs_seq_fops = {
@@ -365,11 +376,11 @@ static const struct file_operations sctp_assocs_seq_fops = {
};
/* Set up the proc fs entry for 'assocs' object. */
-int __init sctp_assocs_proc_init(void)
+int __net_init sctp_assocs_proc_init(struct net *net)
{
struct proc_dir_entry *p;
- p = proc_create("assocs", S_IRUGO, proc_net_sctp,
+ p = proc_create("assocs", S_IRUGO, net->sctp.proc_net_sctp,
&sctp_assocs_seq_fops);
if (!p)
return -ENOMEM;
@@ -378,9 +389,9 @@ int __init sctp_assocs_proc_init(void)
}
/* Cleanup the proc fs entry for 'assocs' object. */
-void sctp_assocs_proc_exit(void)
+void sctp_assocs_proc_exit(struct net *net)
{
- remove_proc_entry("assocs", proc_net_sctp);
+ remove_proc_entry("assocs", net->sctp.proc_net_sctp);
}
static void *sctp_remaddr_seq_start(struct seq_file *seq, loff_t *pos)
@@ -426,6 +437,8 @@ static int sctp_remaddr_seq_show(struct seq_file *seq, void *v)
sctp_local_bh_disable();
read_lock(&head->lock);
sctp_for_each_hentry(epb, node, &head->chain) {
+ if (!net_eq(sock_net(epb->sk), seq_file_net(seq)))
+ continue;
assoc = sctp_assoc(epb);
list_for_each_entry(tsp, &assoc->peer.transport_addr_list,
transports) {
@@ -489,14 +502,15 @@ static const struct seq_operations sctp_remaddr_ops = {
};
/* Cleanup the proc fs entry for 'remaddr' object. */
-void sctp_remaddr_proc_exit(void)
+void sctp_remaddr_proc_exit(struct net *net)
{
- remove_proc_entry("remaddr", proc_net_sctp);
+ remove_proc_entry("remaddr", net->sctp.proc_net_sctp);
}
static int sctp_remaddr_seq_open(struct inode *inode, struct file *file)
{
- return seq_open(file, &sctp_remaddr_ops);
+ return seq_open_net(inode, file, &sctp_remaddr_ops,
+ sizeof(struct seq_net_private));
}
static const struct file_operations sctp_remaddr_seq_fops = {
@@ -506,11 +520,12 @@ static const struct file_operations sctp_remaddr_seq_fops = {
.release = seq_release,
};
-int __init sctp_remaddr_proc_init(void)
+int __net_init sctp_remaddr_proc_init(struct net *net)
{
struct proc_dir_entry *p;
- p = proc_create("remaddr", S_IRUGO, proc_net_sctp, &sctp_remaddr_seq_fops);
+ p = proc_create("remaddr", S_IRUGO, net->sctp.proc_net_sctp,
+ &sctp_remaddr_seq_fops);
if (!p)
return -ENOMEM;
return 0;
diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c
index 1f89c4e69645..2d518425d598 100644
--- a/net/sctp/protocol.c
+++ b/net/sctp/protocol.c
@@ -69,21 +69,10 @@
/* Global data structures. */
struct sctp_globals sctp_globals __read_mostly;
-DEFINE_SNMP_STAT(struct sctp_mib, sctp_statistics) __read_mostly;
-
-#ifdef CONFIG_PROC_FS
-struct proc_dir_entry *proc_net_sctp;
-#endif
struct idr sctp_assocs_id;
DEFINE_SPINLOCK(sctp_assocs_id_lock);
-/* This is the global socket data structure used for responding to
- * the Out-of-the-blue (OOTB) packets. A control sock will be created
- * for this socket at the initialization time.
- */
-static struct sock *sctp_ctl_sock;
-
static struct sctp_pf *sctp_pf_inet6_specific;
static struct sctp_pf *sctp_pf_inet_specific;
static struct sctp_af *sctp_af_v4_specific;
@@ -96,74 +85,54 @@ long sysctl_sctp_mem[3];
int sysctl_sctp_rmem[3];
int sysctl_sctp_wmem[3];
-/* Return the address of the control sock. */
-struct sock *sctp_get_ctl_sock(void)
-{
- return sctp_ctl_sock;
-}
-
/* Set up the proc fs entry for the SCTP protocol. */
-static __init int sctp_proc_init(void)
+static __net_init int sctp_proc_init(struct net *net)
{
- if (percpu_counter_init(&sctp_sockets_allocated, 0))
- goto out_nomem;
#ifdef CONFIG_PROC_FS
- if (!proc_net_sctp) {
- proc_net_sctp = proc_mkdir("sctp", init_net.proc_net);
- if (!proc_net_sctp)
- goto out_free_percpu;
- }
-
- if (sctp_snmp_proc_init())
+ net->sctp.proc_net_sctp = proc_net_mkdir(net, "sctp", net->proc_net);
+ if (!net->sctp.proc_net_sctp)
+ goto out_proc_net_sctp;
+ if (sctp_snmp_proc_init(net))
goto out_snmp_proc_init;
- if (sctp_eps_proc_init())
+ if (sctp_eps_proc_init(net))
goto out_eps_proc_init;
- if (sctp_assocs_proc_init())
+ if (sctp_assocs_proc_init(net))
goto out_assocs_proc_init;
- if (sctp_remaddr_proc_init())
+ if (sctp_remaddr_proc_init(net))
goto out_remaddr_proc_init;
return 0;
out_remaddr_proc_init:
- sctp_assocs_proc_exit();
+ sctp_assocs_proc_exit(net);
out_assocs_proc_init:
- sctp_eps_proc_exit();
+ sctp_eps_proc_exit(net);
out_eps_proc_init:
- sctp_snmp_proc_exit();
+ sctp_snmp_proc_exit(net);
out_snmp_proc_init:
- if (proc_net_sctp) {
- proc_net_sctp = NULL;
- remove_proc_entry("sctp", init_net.proc_net);
- }
-out_free_percpu:
- percpu_counter_destroy(&sctp_sockets_allocated);
-#else
- return 0;
-#endif /* CONFIG_PROC_FS */
-
-out_nomem:
+ remove_proc_entry("sctp", net->proc_net);
+ net->sctp.proc_net_sctp = NULL;
+out_proc_net_sctp:
return -ENOMEM;
+#endif /* CONFIG_PROC_FS */
+ return 0;
}
/* Clean up the proc fs entry for the SCTP protocol.
* Note: Do not make this __exit as it is used in the init error
* path.
*/
-static void sctp_proc_exit(void)
+static void sctp_proc_exit(struct net *net)
{
#ifdef CONFIG_PROC_FS
- sctp_snmp_proc_exit();
- sctp_eps_proc_exit();
- sctp_assocs_proc_exit();
- sctp_remaddr_proc_exit();
-
- if (proc_net_sctp) {
- proc_net_sctp = NULL;
- remove_proc_entry("sctp", init_net.proc_net);
- }
+ sctp_snmp_proc_exit(net);
+ sctp_eps_proc_exit(net);
+ sctp_assocs_proc_exit(net);
+ sctp_remaddr_proc_exit(net);
+
+ remove_proc_entry("sctp", net->proc_net);
+ net->sctp.proc_net_sctp = NULL;
#endif
- percpu_counter_destroy(&sctp_sockets_allocated);
}
/* Private helper to extract ipv4 address and stash them in
@@ -201,29 +170,29 @@ static void sctp_v4_copy_addrlist(struct list_head *addrlist,
/* Extract our IP addresses from the system and stash them in the
* protocol structure.
*/
-static void sctp_get_local_addr_list(void)
+static void sctp_get_local_addr_list(struct net *net)
{
struct net_device *dev;
struct list_head *pos;
struct sctp_af *af;
rcu_read_lock();
- for_each_netdev_rcu(&init_net, dev) {
+ for_each_netdev_rcu(net, dev) {
__list_for_each(pos, &sctp_address_families) {
af = list_entry(pos, struct sctp_af, list);
- af->copy_addrlist(&sctp_local_addr_list, dev);
+ af->copy_addrlist(&net->sctp.local_addr_list, dev);
}
}
rcu_read_unlock();
}
/* Free the existing local addresses. */
-static void sctp_free_local_addr_list(void)
+static void sctp_free_local_addr_list(struct net *net)
{
struct sctp_sockaddr_entry *addr;
struct list_head *pos, *temp;
- list_for_each_safe(pos, temp, &sctp_local_addr_list) {
+ list_for_each_safe(pos, temp, &net->sctp.local_addr_list) {
addr = list_entry(pos, struct sctp_sockaddr_entry, list);
list_del(pos);
kfree(addr);
@@ -231,17 +200,17 @@ static void sctp_free_local_addr_list(void)
}
/* Copy the local addresses which are valid for 'scope' into 'bp'. */
-int sctp_copy_local_addr_list(struct sctp_bind_addr *bp, sctp_scope_t scope,
- gfp_t gfp, int copy_flags)
+int sctp_copy_local_addr_list(struct net *net, struct sctp_bind_addr *bp,
+ sctp_scope_t scope, gfp_t gfp, int copy_flags)
{
struct sctp_sockaddr_entry *addr;
int error = 0;
rcu_read_lock();
- list_for_each_entry_rcu(addr, &sctp_local_addr_list, list) {
+ list_for_each_entry_rcu(addr, &net->sctp.local_addr_list, list) {
if (!addr->valid)
continue;
- if (sctp_in_scope(&addr->a, scope)) {
+ if (sctp_in_scope(net, &addr->a, scope)) {
/* Now that the address is in scope, check to see if
* the address type is really supported by the local
* sock as well as the remote peer.
@@ -397,7 +366,8 @@ static int sctp_v4_addr_valid(union sctp_addr *addr,
/* Should this be available for binding? */
static int sctp_v4_available(union sctp_addr *addr, struct sctp_sock *sp)
{
- int ret = inet_addr_type(&init_net, addr->v4.sin_addr.s_addr);
+ struct net *net = sock_net(&sp->inet.sk);
+ int ret = inet_addr_type(net, addr->v4.sin_addr.s_addr);
if (addr->v4.sin_addr.s_addr != htonl(INADDR_ANY) &&
@@ -484,7 +454,7 @@ static void sctp_v4_get_dst(struct sctp_transport *t, union sctp_addr *saddr,
SCTP_DEBUG_PRINTK("%s: DST:%pI4, SRC:%pI4 - ",
__func__, &fl4->daddr, &fl4->saddr);
- rt = ip_route_output_key(&init_net, fl4);
+ rt = ip_route_output_key(sock_net(sk), fl4);
if (!IS_ERR(rt))
dst = &rt->dst;
@@ -530,7 +500,7 @@ static void sctp_v4_get_dst(struct sctp_transport *t, union sctp_addr *saddr,
(AF_INET == laddr->a.sa.sa_family)) {
fl4->saddr = laddr->a.v4.sin_addr.s_addr;
fl4->fl4_sport = laddr->a.v4.sin_port;
- rt = ip_route_output_key(&init_net, fl4);
+ rt = ip_route_output_key(sock_net(sk), fl4);
if (!IS_ERR(rt)) {
dst = &rt->dst;
goto out_unlock;
@@ -627,14 +597,15 @@ static void sctp_v4_ecn_capable(struct sock *sk)
void sctp_addr_wq_timeout_handler(unsigned long arg)
{
+ struct net *net = (struct net *)arg;
struct sctp_sockaddr_entry *addrw, *temp;
struct sctp_sock *sp;
- spin_lock_bh(&sctp_addr_wq_lock);
+ spin_lock_bh(&net->sctp.addr_wq_lock);
- list_for_each_entry_safe(addrw, temp, &sctp_addr_waitq, list) {
+ list_for_each_entry_safe(addrw, temp, &net->sctp.addr_waitq, list) {
SCTP_DEBUG_PRINTK_IPADDR("sctp_addrwq_timo_handler: the first ent in wq %p is ",
- " for cmd %d at entry %p\n", &sctp_addr_waitq, &addrw->a, addrw->state,
+ " for cmd %d at entry %p\n", &net->sctp.addr_waitq, &addrw->a, addrw->state,
addrw);
#if IS_ENABLED(CONFIG_IPV6)
@@ -648,7 +619,7 @@ void sctp_addr_wq_timeout_handler(unsigned long arg)
goto free_next;
in6 = (struct in6_addr *)&addrw->a.v6.sin6_addr;
- if (ipv6_chk_addr(&init_net, in6, NULL, 0) == 0 &&
+ if (ipv6_chk_addr(net, in6, NULL, 0) == 0 &&
addrw->state == SCTP_ADDR_NEW) {
unsigned long timeo_val;
@@ -656,12 +627,12 @@ void sctp_addr_wq_timeout_handler(unsigned long arg)
SCTP_ADDRESS_TICK_DELAY);
timeo_val = jiffies;
timeo_val += msecs_to_jiffies(SCTP_ADDRESS_TICK_DELAY);
- mod_timer(&sctp_addr_wq_timer, timeo_val);
+ mod_timer(&net->sctp.addr_wq_timer, timeo_val);
break;
}
}
#endif
- list_for_each_entry(sp, &sctp_auto_asconf_splist, auto_asconf_list) {
+ list_for_each_entry(sp, &net->sctp.auto_asconf_splist, auto_asconf_list) {
struct sock *sk;
sk = sctp_opt2sk(sp);
@@ -679,31 +650,32 @@ free_next:
list_del(&addrw->list);
kfree(addrw);
}
- spin_unlock_bh(&sctp_addr_wq_lock);
+ spin_unlock_bh(&net->sctp.addr_wq_lock);
}
-static void sctp_free_addr_wq(void)
+static void sctp_free_addr_wq(struct net *net)
{
struct sctp_sockaddr_entry *addrw;
struct sctp_sockaddr_entry *temp;
- spin_lock_bh(&sctp_addr_wq_lock);
- del_timer(&sctp_addr_wq_timer);
- list_for_each_entry_safe(addrw, temp, &sctp_addr_waitq, list) {
+ spin_lock_bh(&net->sctp.addr_wq_lock);
+ del_timer(&net->sctp.addr_wq_timer);
+ list_for_each_entry_safe(addrw, temp, &net->sctp.addr_waitq, list) {
list_del(&addrw->list);
kfree(addrw);
}
- spin_unlock_bh(&sctp_addr_wq_lock);
+ spin_unlock_bh(&net->sctp.addr_wq_lock);
}
/* lookup the entry for the same address in the addr_waitq
* sctp_addr_wq MUST be locked
*/
-static struct sctp_sockaddr_entry *sctp_addr_wq_lookup(struct sctp_sockaddr_entry *addr)
+static struct sctp_sockaddr_entry *sctp_addr_wq_lookup(struct net *net,
+ struct sctp_sockaddr_entry *addr)
{
struct sctp_sockaddr_entry *addrw;
- list_for_each_entry(addrw, &sctp_addr_waitq, list) {
+ list_for_each_entry(addrw, &net->sctp.addr_waitq, list) {
if (addrw->a.sa.sa_family != addr->a.sa.sa_family)
continue;
if (addrw->a.sa.sa_family == AF_INET) {
@@ -719,7 +691,7 @@ static struct sctp_sockaddr_entry *sctp_addr_wq_lookup(struct sctp_sockaddr_entr
return NULL;
}
-void sctp_addr_wq_mgmt(struct sctp_sockaddr_entry *addr, int cmd)
+void sctp_addr_wq_mgmt(struct net *net, struct sctp_sockaddr_entry *addr, int cmd)
{
struct sctp_sockaddr_entry *addrw;
unsigned long timeo_val;
@@ -730,38 +702,38 @@ void sctp_addr_wq_mgmt(struct sctp_sockaddr_entry *addr, int cmd)
* new address after a couple of addition and deletion of that address
*/
- spin_lock_bh(&sctp_addr_wq_lock);
+ spin_lock_bh(&net->sctp.addr_wq_lock);
/* Offsets existing events in addr_wq */
- addrw = sctp_addr_wq_lookup(addr);
+ addrw = sctp_addr_wq_lookup(net, addr);
if (addrw) {
if (addrw->state != cmd) {
SCTP_DEBUG_PRINTK_IPADDR("sctp_addr_wq_mgmt offsets existing entry for %d ",
" in wq %p\n", addrw->state, &addrw->a,
- &sctp_addr_waitq);
+ &net->sctp.addr_waitq);
list_del(&addrw->list);
kfree(addrw);
}
- spin_unlock_bh(&sctp_addr_wq_lock);
+ spin_unlock_bh(&net->sctp.addr_wq_lock);
return;
}
/* OK, we have to add the new address to the wait queue */
addrw = kmemdup(addr, sizeof(struct sctp_sockaddr_entry), GFP_ATOMIC);
if (addrw == NULL) {
- spin_unlock_bh(&sctp_addr_wq_lock);
+ spin_unlock_bh(&net->sctp.addr_wq_lock);
return;
}
addrw->state = cmd;
- list_add_tail(&addrw->list, &sctp_addr_waitq);
+ list_add_tail(&addrw->list, &net->sctp.addr_waitq);
SCTP_DEBUG_PRINTK_IPADDR("sctp_addr_wq_mgmt add new entry for cmd:%d ",
- " in wq %p\n", addrw->state, &addrw->a, &sctp_addr_waitq);
+ " in wq %p\n", addrw->state, &addrw->a, &net->sctp.addr_waitq);
- if (!timer_pending(&sctp_addr_wq_timer)) {
+ if (!timer_pending(&net->sctp.addr_wq_timer)) {
timeo_val = jiffies;
timeo_val += msecs_to_jiffies(SCTP_ADDRESS_TICK_DELAY);
- mod_timer(&sctp_addr_wq_timer, timeo_val);
+ mod_timer(&net->sctp.addr_wq_timer, timeo_val);
}
- spin_unlock_bh(&sctp_addr_wq_lock);
+ spin_unlock_bh(&net->sctp.addr_wq_lock);
}
/* Event handler for inet address addition/deletion events.
@@ -776,11 +748,9 @@ static int sctp_inetaddr_event(struct notifier_block *this, unsigned long ev,
struct in_ifaddr *ifa = (struct in_ifaddr *)ptr;
struct sctp_sockaddr_entry *addr = NULL;
struct sctp_sockaddr_entry *temp;
+ struct net *net = dev_net(ifa->ifa_dev->dev);
int found = 0;
- if (!net_eq(dev_net(ifa->ifa_dev->dev), &init_net))
- return NOTIFY_DONE;
-
switch (ev) {
case NETDEV_UP:
addr = kmalloc(sizeof(struct sctp_sockaddr_entry), GFP_ATOMIC);
@@ -789,27 +759,27 @@ static int sctp_inetaddr_event(struct notifier_block *this, unsigned long ev,
addr->a.v4.sin_port = 0;
addr->a.v4.sin_addr.s_addr = ifa->ifa_local;
addr->valid = 1;
- spin_lock_bh(&sctp_local_addr_lock);
- list_add_tail_rcu(&addr->list, &sctp_local_addr_list);
- sctp_addr_wq_mgmt(addr, SCTP_ADDR_NEW);
- spin_unlock_bh(&sctp_local_addr_lock);
+ spin_lock_bh(&net->sctp.local_addr_lock);
+ list_add_tail_rcu(&addr->list, &net->sctp.local_addr_list);
+ sctp_addr_wq_mgmt(net, addr, SCTP_ADDR_NEW);
+ spin_unlock_bh(&net->sctp.local_addr_lock);
}
break;
case NETDEV_DOWN:
- spin_lock_bh(&sctp_local_addr_lock);
+ spin_lock_bh(&net->sctp.local_addr_lock);
list_for_each_entry_safe(addr, temp,
- &sctp_local_addr_list, list) {
+ &net->sctp.local_addr_list, list) {
if (addr->a.sa.sa_family == AF_INET &&
addr->a.v4.sin_addr.s_addr ==
ifa->ifa_local) {
- sctp_addr_wq_mgmt(addr, SCTP_ADDR_DEL);
+ sctp_addr_wq_mgmt(net, addr, SCTP_ADDR_DEL);
found = 1;
addr->valid = 0;
list_del_rcu(&addr->list);
break;
}
}
- spin_unlock_bh(&sctp_local_addr_lock);
+ spin_unlock_bh(&net->sctp.local_addr_lock);
if (found)
kfree_rcu(addr, rcu);
break;
@@ -822,7 +792,7 @@ static int sctp_inetaddr_event(struct notifier_block *this, unsigned long ev,
* Initialize the control inode/socket with a control endpoint data
* structure. This endpoint is reserved exclusively for the OOTB processing.
*/
-static int sctp_ctl_sock_init(void)
+static int sctp_ctl_sock_init(struct net *net)
{
int err;
sa_family_t family = PF_INET;
@@ -830,14 +800,14 @@ static int sctp_ctl_sock_init(void)
if (sctp_get_pf_specific(PF_INET6))
family = PF_INET6;
- err = inet_ctl_sock_create(&sctp_ctl_sock, family,
- SOCK_SEQPACKET, IPPROTO_SCTP, &init_net);
+ err = inet_ctl_sock_create(&net->sctp.ctl_sock, family,
+ SOCK_SEQPACKET, IPPROTO_SCTP, net);
/* If IPv6 socket could not be created, try the IPv4 socket */
if (err < 0 && family == PF_INET6)
- err = inet_ctl_sock_create(&sctp_ctl_sock, AF_INET,
+ err = inet_ctl_sock_create(&net->sctp.ctl_sock, AF_INET,
SOCK_SEQPACKET, IPPROTO_SCTP,
- &init_net);
+ net);
if (err < 0) {
pr_err("Failed to create the SCTP control socket\n");
@@ -990,7 +960,7 @@ static inline int sctp_v4_xmit(struct sk_buff *skb,
inet->pmtudisc = transport->param_flags & SPP_PMTUD_ENABLE ?
IP_PMTUDISC_DO : IP_PMTUDISC_DONT;
- SCTP_INC_STATS(SCTP_MIB_OUTSCTPPACKS);
+ SCTP_INC_STATS(sock_net(&inet->sk), SCTP_MIB_OUTSCTPPACKS);
return ip_queue_xmit(skb, &transport->fl);
}
@@ -1063,6 +1033,7 @@ static const struct net_protocol sctp_protocol = {
.handler = sctp_rcv,
.err_handler = sctp_v4_err,
.no_policy = 1,
+ .netns_ok = 1,
};
/* IPv4 address related functions. */
@@ -1130,16 +1101,16 @@ int sctp_register_pf(struct sctp_pf *pf, sa_family_t family)
return 1;
}
-static inline int init_sctp_mibs(void)
+static inline int init_sctp_mibs(struct net *net)
{
- return snmp_mib_init((void __percpu **)sctp_statistics,
+ return snmp_mib_init((void __percpu **)net->sctp.sctp_statistics,
sizeof(struct sctp_mib),
__alignof__(struct sctp_mib));
}
-static inline void cleanup_sctp_mibs(void)
+static inline void cleanup_sctp_mibs(struct net *net)
{
- snmp_mib_free((void __percpu **)sctp_statistics);
+ snmp_mib_free((void __percpu **)net->sctp.sctp_statistics);
}
static void sctp_v4_pf_init(void)
@@ -1194,6 +1165,143 @@ static void sctp_v4_del_protocol(void)
unregister_inetaddr_notifier(&sctp_inetaddr_notifier);
}
+static int sctp_net_init(struct net *net)
+{
+ int status;
+
+ /*
+ * 14. Suggested SCTP Protocol Parameter Values
+ */
+ /* The following protocol parameters are RECOMMENDED: */
+ /* RTO.Initial - 3 seconds */
+ net->sctp.rto_initial = SCTP_RTO_INITIAL;
+ /* RTO.Min - 1 second */
+ net->sctp.rto_min = SCTP_RTO_MIN;
+ /* RTO.Max - 60 seconds */
+ net->sctp.rto_max = SCTP_RTO_MAX;
+ /* RTO.Alpha - 1/8 */
+ net->sctp.rto_alpha = SCTP_RTO_ALPHA;
+ /* RTO.Beta - 1/4 */
+ net->sctp.rto_beta = SCTP_RTO_BETA;
+
+ /* Valid.Cookie.Life - 60 seconds */
+ net->sctp.valid_cookie_life = SCTP_DEFAULT_COOKIE_LIFE;
+
+ /* Whether Cookie Preservative is enabled(1) or not(0) */
+ net->sctp.cookie_preserve_enable = 1;
+
+ /* Max.Burst - 4 */
+ net->sctp.max_burst = SCTP_DEFAULT_MAX_BURST;
+
+ /* Association.Max.Retrans - 10 attempts
+ * Path.Max.Retrans - 5 attempts (per destination address)
+ * Max.Init.Retransmits - 8 attempts
+ */
+ net->sctp.max_retrans_association = 10;
+ net->sctp.max_retrans_path = 5;
+ net->sctp.max_retrans_init = 8;
+
+ /* Sendbuffer growth - do per-socket accounting */
+ net->sctp.sndbuf_policy = 0;
+
+ /* Rcvbuffer growth - do per-socket accounting */
+ net->sctp.rcvbuf_policy = 0;
+
+ /* HB.interval - 30 seconds */
+ net->sctp.hb_interval = SCTP_DEFAULT_TIMEOUT_HEARTBEAT;
+
+ /* delayed SACK timeout */
+ net->sctp.sack_timeout = SCTP_DEFAULT_TIMEOUT_SACK;
+
+ /* Disable ADDIP by default. */
+ net->sctp.addip_enable = 0;
+ net->sctp.addip_noauth = 0;
+ net->sctp.default_auto_asconf = 0;
+
+ /* Enable PR-SCTP by default. */
+ net->sctp.prsctp_enable = 1;
+
+ /* Disable AUTH by default. */
+ net->sctp.auth_enable = 0;
+
+ /* Set SCOPE policy to enabled */
+ net->sctp.scope_policy = SCTP_SCOPE_POLICY_ENABLE;
+
+ /* Set the default rwnd update threshold */
+ net->sctp.rwnd_upd_shift = SCTP_DEFAULT_RWND_SHIFT;
+
+ /* Initialize maximum autoclose timeout. */
+ net->sctp.max_autoclose = INT_MAX / HZ;
+
+ status = sctp_sysctl_net_register(net);
+ if (status)
+ goto err_sysctl_register;
+
+ /* Allocate and initialise sctp mibs. */
+ status = init_sctp_mibs(net);
+ if (status)
+ goto err_init_mibs;
+
+ /* Initialize proc fs directory. */
+ status = sctp_proc_init(net);
+ if (status)
+ goto err_init_proc;
+
+ sctp_dbg_objcnt_init(net);
+
+ /* Initialize the control inode/socket for handling OOTB packets. */
+ if ((status = sctp_ctl_sock_init(net))) {
+ pr_err("Failed to initialize the SCTP control sock\n");
+ goto err_ctl_sock_init;
+ }
+
+ /* Initialize the local address list. */
+ INIT_LIST_HEAD(&net->sctp.local_addr_list);
+ spin_lock_init(&net->sctp.local_addr_lock);
+ sctp_get_local_addr_list(net);
+
+ /* Initialize the address event list */
+ INIT_LIST_HEAD(&net->sctp.addr_waitq);
+ INIT_LIST_HEAD(&net->sctp.auto_asconf_splist);
+ spin_lock_init(&net->sctp.addr_wq_lock);
+ net->sctp.addr_wq_timer.expires = 0;
+ setup_timer(&net->sctp.addr_wq_timer, sctp_addr_wq_timeout_handler,
+ (unsigned long)net);
+
+ return 0;
+
+err_ctl_sock_init:
+ sctp_dbg_objcnt_exit(net);
+ sctp_proc_exit(net);
+err_init_proc:
+ cleanup_sctp_mibs(net);
+err_init_mibs:
+ sctp_sysctl_net_unregister(net);
+err_sysctl_register:
+ return status;
+}
+
+static void sctp_net_exit(struct net *net)
+{
+ /* Free the local address list */
+ sctp_free_addr_wq(net);
+ sctp_free_local_addr_list(net);
+
+ /* Free the control endpoint. */
+ inet_ctl_sock_destroy(net->sctp.ctl_sock);
+
+ sctp_dbg_objcnt_exit(net);
+
+ sctp_proc_exit(net);
+ cleanup_sctp_mibs(net);
+ sctp_sysctl_net_unregister(net);
+}
+
+static struct pernet_operations sctp_net_ops = {
+ .init = sctp_net_init,
+ .exit = sctp_net_exit,
+};
+
/* Initialize the universe into something sensible. */
SCTP_STATIC __init int sctp_init(void)
{
@@ -1224,62 +1332,9 @@ SCTP_STATIC __init int sctp_init(void)
if (!sctp_chunk_cachep)
goto err_chunk_cachep;
- /* Allocate and initialise sctp mibs. */
- status = init_sctp_mibs();
+ status = percpu_counter_init(&sctp_sockets_allocated, 0);
if (status)
- goto err_init_mibs;
-
- /* Initialize proc fs directory. */
- status = sctp_proc_init();
- if (status)
- goto err_init_proc;
-
- /* Initialize object count debugging. */
- sctp_dbg_objcnt_init();
-
- /*
- * 14. Suggested SCTP Protocol Parameter Values
- */
- /* The following protocol parameters are RECOMMENDED: */
- /* RTO.Initial - 3 seconds */
- sctp_rto_initial = SCTP_RTO_INITIAL;
- /* RTO.Min - 1 second */
- sctp_rto_min = SCTP_RTO_MIN;
- /* RTO.Max - 60 seconds */
- sctp_rto_max = SCTP_RTO_MAX;
- /* RTO.Alpha - 1/8 */
- sctp_rto_alpha = SCTP_RTO_ALPHA;
- /* RTO.Beta - 1/4 */
- sctp_rto_beta = SCTP_RTO_BETA;
-
- /* Valid.Cookie.Life - 60 seconds */
- sctp_valid_cookie_life = SCTP_DEFAULT_COOKIE_LIFE;
-
- /* Whether Cookie Preservative is enabled(1) or not(0) */
- sctp_cookie_preserve_enable = 1;
-
- /* Max.Burst - 4 */
- sctp_max_burst = SCTP_DEFAULT_MAX_BURST;
-
- /* Association.Max.Retrans - 10 attempts
- * Path.Max.Retrans - 5 attempts (per destination address)
- * Max.Init.Retransmits - 8 attempts
- */
- sctp_max_retrans_association = 10;
- sctp_max_retrans_path = 5;
- sctp_max_retrans_init = 8;
-
- /* Sendbuffer growth - do per-socket accounting */
- sctp_sndbuf_policy = 0;
-
- /* Rcvbuffer growth - do per-socket accounting */
- sctp_rcvbuf_policy = 0;
-
- /* HB.interval - 30 seconds */
- sctp_hb_interval = SCTP_DEFAULT_TIMEOUT_HEARTBEAT;
-
- /* delayed SACK timeout */
- sctp_sack_timeout = SCTP_DEFAULT_TIMEOUT_SACK;
+ goto err_percpu_counter_init;
/* Implementation specific variables. */
@@ -1287,9 +1342,6 @@ SCTP_STATIC __init int sctp_init(void)
sctp_max_instreams = SCTP_DEFAULT_INSTREAMS;
sctp_max_outstreams = SCTP_DEFAULT_OUTSTREAMS;
- /* Initialize maximum autoclose timeout. */
- sctp_max_autoclose = INT_MAX / HZ;
-
/* Initialize handle used for association ids. */
idr_init(&sctp_assocs_id);
@@ -1376,41 +1428,12 @@ SCTP_STATIC __init int sctp_init(void)
pr_info("Hash tables configured (established %d bind %d)\n",
sctp_assoc_hashsize, sctp_port_hashsize);
- /* Disable ADDIP by default. */
- sctp_addip_enable = 0;
- sctp_addip_noauth = 0;
- sctp_default_auto_asconf = 0;
-
- /* Enable PR-SCTP by default. */
- sctp_prsctp_enable = 1;
-
- /* Disable AUTH by default. */
- sctp_auth_enable = 0;
-
- /* Set SCOPE policy to enabled */
- sctp_scope_policy = SCTP_SCOPE_POLICY_ENABLE;
-
- /* Set the default rwnd update threshold */
- sctp_rwnd_upd_shift = SCTP_DEFAULT_RWND_SHIFT;
-
sctp_sysctl_register();
INIT_LIST_HEAD(&sctp_address_families);
sctp_v4_pf_init();
sctp_v6_pf_init();
- /* Initialize the local address list. */
- INIT_LIST_HEAD(&sctp_local_addr_list);
- spin_lock_init(&sctp_local_addr_lock);
- sctp_get_local_addr_list();
-
- /* Initialize the address event list */
- INIT_LIST_HEAD(&sctp_addr_waitq);
- INIT_LIST_HEAD(&sctp_auto_asconf_splist);
- spin_lock_init(&sctp_addr_wq_lock);
- sctp_addr_wq_timer.expires = 0;
- setup_timer(&sctp_addr_wq_timer, sctp_addr_wq_timeout_handler, 0);
-
status = sctp_v4_protosw_init();
if (status)
@@ -1420,11 +1443,9 @@ SCTP_STATIC __init int sctp_init(void)
if (status)
goto err_v6_protosw_init;
- /* Initialize the control inode/socket for handling OOTB packets. */
- if ((status = sctp_ctl_sock_init())) {
- pr_err("Failed to initialize the SCTP control sock\n");
- goto err_ctl_sock_init;
- }
+ status = register_pernet_subsys(&sctp_net_ops);
+ if (status)
+ goto err_register_pernet_subsys;
status = sctp_v4_add_protocol();
if (status)
@@ -1441,13 +1462,12 @@ out:
err_v6_add_protocol:
sctp_v4_del_protocol();
err_add_protocol:
- inet_ctl_sock_destroy(sctp_ctl_sock);
-err_ctl_sock_init:
+ unregister_pernet_subsys(&sctp_net_ops);
+err_register_pernet_subsys:
sctp_v6_protosw_exit();
err_v6_protosw_init:
sctp_v4_protosw_exit();
err_protosw_init:
- sctp_free_local_addr_list();
sctp_v4_pf_exit();
sctp_v6_pf_exit();
sctp_sysctl_unregister();
@@ -1461,11 +1481,8 @@ err_ehash_alloc:
get_order(sctp_assoc_hashsize *
sizeof(struct sctp_hashbucket)));
err_ahash_alloc:
- sctp_dbg_objcnt_exit();
- sctp_proc_exit();
-err_init_proc:
- cleanup_sctp_mibs();
-err_init_mibs:
+ percpu_counter_destroy(&sctp_sockets_allocated);
+err_percpu_counter_init:
kmem_cache_destroy(sctp_chunk_cachep);
err_chunk_cachep:
kmem_cache_destroy(sctp_bucket_cachep);
@@ -1482,18 +1499,13 @@ SCTP_STATIC __exit void sctp_exit(void)
/* Unregister with inet6/inet layers. */
sctp_v6_del_protocol();
sctp_v4_del_protocol();
- sctp_free_addr_wq();
- /* Free the control endpoint. */
- inet_ctl_sock_destroy(sctp_ctl_sock);
+ unregister_pernet_subsys(&sctp_net_ops);
/* Free protosw registrations */
sctp_v6_protosw_exit();
sctp_v4_protosw_exit();
- /* Free the local address list. */
- sctp_free_local_addr_list();
-
/* Unregister with socket layer. */
sctp_v6_pf_exit();
sctp_v4_pf_exit();
@@ -1508,9 +1520,7 @@ SCTP_STATIC __exit void sctp_exit(void)
get_order(sctp_port_hashsize *
sizeof(struct sctp_bind_hashbucket)));
- sctp_dbg_objcnt_exit();
- sctp_proc_exit();
- cleanup_sctp_mibs();
+ percpu_counter_destroy(&sctp_sockets_allocated);
rcu_barrier(); /* Wait for completion of call_rcu()'s */
diff --git a/net/sctp/sm_make_chunk.c b/net/sctp/sm_make_chunk.c
index 479a70ef6ff8..fbe1636309a7 100644
--- a/net/sctp/sm_make_chunk.c
+++ b/net/sctp/sm_make_chunk.c
@@ -198,6 +198,7 @@ struct sctp_chunk *sctp_make_init(const struct sctp_association *asoc,
const struct sctp_bind_addr *bp,
gfp_t gfp, int vparam_len)
{
+ struct net *net = sock_net(asoc->base.sk);
sctp_inithdr_t init;
union sctp_params addrs;
size_t chunksize;
@@ -237,7 +238,7 @@ struct sctp_chunk *sctp_make_init(const struct sctp_association *asoc,
chunksize += WORD_ROUND(SCTP_SAT_LEN(num_types));
chunksize += sizeof(ecap_param);
- if (sctp_prsctp_enable)
+ if (net->sctp.prsctp_enable)
chunksize += sizeof(prsctp_param);
/* ADDIP: Section 4.2.7:
@@ -245,7 +246,7 @@ struct sctp_chunk *sctp_make_init(const struct sctp_association *asoc,
* the ASCONF,the ASCONF-ACK, and the AUTH chunks in its INIT and
* INIT-ACK parameters.
*/
- if (sctp_addip_enable) {
+ if (net->sctp.addip_enable) {
extensions[num_ext] = SCTP_CID_ASCONF;
extensions[num_ext+1] = SCTP_CID_ASCONF_ACK;
num_ext += 2;
@@ -257,7 +258,7 @@ struct sctp_chunk *sctp_make_init(const struct sctp_association *asoc,
chunksize += vparam_len;
/* Account for AUTH related parameters */
- if (sctp_auth_enable) {
+ if (net->sctp.auth_enable) {
/* Add random parameter length*/
chunksize += sizeof(asoc->c.auth_random);
@@ -331,7 +332,7 @@ struct sctp_chunk *sctp_make_init(const struct sctp_association *asoc,
sctp_addto_param(retval, num_ext, extensions);
}
- if (sctp_prsctp_enable)
+ if (net->sctp.prsctp_enable)
sctp_addto_chunk(retval, sizeof(prsctp_param), &prsctp_param);
if (sp->adaptation_ind) {
@@ -342,7 +343,7 @@ struct sctp_chunk *sctp_make_init(const struct sctp_association *asoc,
}
/* Add SCTP-AUTH chunks to the parameter list */
- if (sctp_auth_enable) {
+ if (net->sctp.auth_enable) {
sctp_addto_chunk(retval, sizeof(asoc->c.auth_random),
asoc->c.auth_random);
if (auth_hmacs)
@@ -1940,7 +1941,7 @@ static int sctp_process_hn_param(const struct sctp_association *asoc,
return 0;
}
-static int sctp_verify_ext_param(union sctp_params param)
+static int sctp_verify_ext_param(struct net *net, union sctp_params param)
{
__u16 num_ext = ntohs(param.p->length) - sizeof(sctp_paramhdr_t);
int have_auth = 0;
@@ -1964,10 +1965,10 @@ static int sctp_verify_ext_param(union sctp_params param)
* only if ADD-IP is turned on and we are not backward-compatible
* mode.
*/
- if (sctp_addip_noauth)
+ if (net->sctp.addip_noauth)
return 1;
- if (sctp_addip_enable && !have_auth && have_asconf)
+ if (net->sctp.addip_enable && !have_auth && have_asconf)
return 0;
return 1;
@@ -1976,13 +1977,14 @@ static int sctp_verify_ext_param(union sctp_params param)
static void sctp_process_ext_param(struct sctp_association *asoc,
union sctp_params param)
{
+ struct net *net = sock_net(asoc->base.sk);
__u16 num_ext = ntohs(param.p->length) - sizeof(sctp_paramhdr_t);
int i;
for (i = 0; i < num_ext; i++) {
switch (param.ext->chunks[i]) {
case SCTP_CID_FWD_TSN:
- if (sctp_prsctp_enable &&
+ if (net->sctp.prsctp_enable &&
!asoc->peer.prsctp_capable)
asoc->peer.prsctp_capable = 1;
break;
@@ -1990,12 +1992,12 @@ static void sctp_process_ext_param(struct sctp_association *asoc,
/* if the peer reports AUTH, assume that he
* supports AUTH.
*/
- if (sctp_auth_enable)
+ if (net->sctp.auth_enable)
asoc->peer.auth_capable = 1;
break;
case SCTP_CID_ASCONF:
case SCTP_CID_ASCONF_ACK:
- if (sctp_addip_enable)
+ if (net->sctp.addip_enable)
asoc->peer.asconf_capable = 1;
break;
default:
@@ -2081,7 +2083,8 @@ static sctp_ierror_t sctp_process_unk_param(const struct sctp_association *asoc,
* SCTP_IERROR_ERROR - stop processing, trigger an ERROR
* SCTP_IERROR_NO_ERROR - continue with the chunk
*/
-static sctp_ierror_t sctp_verify_param(const struct sctp_association *asoc,
+static sctp_ierror_t sctp_verify_param(struct net *net,
+ const struct sctp_association *asoc,
union sctp_params param,
sctp_cid_t cid,
struct sctp_chunk *chunk,
@@ -2110,12 +2113,12 @@ static sctp_ierror_t sctp_verify_param(const struct sctp_association *asoc,
break;
case SCTP_PARAM_SUPPORTED_EXT:
- if (!sctp_verify_ext_param(param))
+ if (!sctp_verify_ext_param(net, param))
return SCTP_IERROR_ABORT;
break;
case SCTP_PARAM_SET_PRIMARY:
- if (sctp_addip_enable)
+ if (net->sctp.addip_enable)
break;
goto fallthrough;
@@ -2126,12 +2129,12 @@ static sctp_ierror_t sctp_verify_param(const struct sctp_association *asoc,
break;
case SCTP_PARAM_FWD_TSN_SUPPORT:
- if (sctp_prsctp_enable)
+ if (net->sctp.prsctp_enable)
break;
goto fallthrough;
case SCTP_PARAM_RANDOM:
- if (!sctp_auth_enable)
+ if (!net->sctp.auth_enable)
goto fallthrough;
/* SCTP-AUTH: Secion 6.1
@@ -2148,7 +2151,7 @@ static sctp_ierror_t sctp_verify_param(const struct sctp_association *asoc,
break;
case SCTP_PARAM_CHUNKS:
- if (!sctp_auth_enable)
+ if (!net->sctp.auth_enable)
goto fallthrough;
/* SCTP-AUTH: Section 3.2
@@ -2164,7 +2167,7 @@ static sctp_ierror_t sctp_verify_param(const struct sctp_association *asoc,
break;
case SCTP_PARAM_HMAC_ALGO:
- if (!sctp_auth_enable)
+ if (!net->sctp.auth_enable)
goto fallthrough;
hmacs = (struct sctp_hmac_algo_param *)param.p;
@@ -2198,7 +2201,7 @@ fallthrough:
}
/* Verify the INIT packet before we process it. */
-int sctp_verify_init(const struct sctp_association *asoc,
+int sctp_verify_init(struct net *net, const struct sctp_association *asoc,
sctp_cid_t cid,
sctp_init_chunk_t *peer_init,
struct sctp_chunk *chunk,
@@ -2245,7 +2248,7 @@ int sctp_verify_init(const struct sctp_association *asoc,
/* Verify all the variable length parameters */
sctp_walk_params(param, peer_init, init_hdr.params) {
- result = sctp_verify_param(asoc, param, cid, chunk, errp);
+ result = sctp_verify_param(net, asoc, param, cid, chunk, errp);
switch (result) {
case SCTP_IERROR_ABORT:
case SCTP_IERROR_NOMEM:
@@ -2270,6 +2273,7 @@ int sctp_process_init(struct sctp_association *asoc, struct sctp_chunk *chunk,
const union sctp_addr *peer_addr,
sctp_init_chunk_t *peer_init, gfp_t gfp)
{
+ struct net *net = sock_net(asoc->base.sk);
union sctp_params param;
struct sctp_transport *transport;
struct list_head *pos, *temp;
@@ -2326,7 +2330,7 @@ int sctp_process_init(struct sctp_association *asoc, struct sctp_chunk *chunk,
* also give us an option to silently ignore the packet, which
* is what we'll do here.
*/
- if (!sctp_addip_noauth &&
+ if (!net->sctp.addip_noauth &&
(asoc->peer.asconf_capable && !asoc->peer.auth_capable)) {
asoc->peer.addip_disabled_mask |= (SCTP_PARAM_ADD_IP |
SCTP_PARAM_DEL_IP |
@@ -2466,6 +2470,7 @@ static int sctp_process_param(struct sctp_association *asoc,
const union sctp_addr *peer_addr,
gfp_t gfp)
{
+ struct net *net = sock_net(asoc->base.sk);
union sctp_addr addr;
int i;
__u16 sat;
@@ -2494,13 +2499,13 @@ do_addr_param:
af = sctp_get_af_specific(param_type2af(param.p->type));
af->from_addr_param(&addr, param.addr, htons(asoc->peer.port), 0);
scope = sctp_scope(peer_addr);
- if (sctp_in_scope(&addr, scope))
+ if (sctp_in_scope(net, &addr, scope))
if (!sctp_assoc_add_peer(asoc, &addr, gfp, SCTP_UNCONFIRMED))
return 0;
break;
case SCTP_PARAM_COOKIE_PRESERVATIVE:
- if (!sctp_cookie_preserve_enable)
+ if (!net->sctp.cookie_preserve_enable)
break;
stale = ntohl(param.life->lifespan_increment);
@@ -2580,7 +2585,7 @@ do_addr_param:
break;
case SCTP_PARAM_SET_PRIMARY:
- if (!sctp_addip_enable)
+ if (!net->sctp.addip_enable)
goto fall_through;
addr_param = param.v + sizeof(sctp_addip_param_t);
@@ -2607,7 +2612,7 @@ do_addr_param:
break;
case SCTP_PARAM_FWD_TSN_SUPPORT:
- if (sctp_prsctp_enable) {
+ if (net->sctp.prsctp_enable) {
asoc->peer.prsctp_capable = 1;
break;
}
@@ -2615,7 +2620,7 @@ do_addr_param:
goto fall_through;
case SCTP_PARAM_RANDOM:
- if (!sctp_auth_enable)
+ if (!net->sctp.auth_enable)
goto fall_through;
/* Save peer's random parameter */
@@ -2628,7 +2633,7 @@ do_addr_param:
break;
case SCTP_PARAM_HMAC_ALGO:
- if (!sctp_auth_enable)
+ if (!net->sctp.auth_enable)
goto fall_through;
/* Save peer's HMAC list */
@@ -2644,7 +2649,7 @@ do_addr_param:
break;
case SCTP_PARAM_CHUNKS:
- if (!sctp_auth_enable)
+ if (!net->sctp.auth_enable)
goto fall_through;
asoc->peer.peer_chunks = kmemdup(param.p,
diff --git a/net/sctp/sm_sideeffect.c b/net/sctp/sm_sideeffect.c
index fe99628e1257..57f7de839b03 100644
--- a/net/sctp/sm_sideeffect.c
+++ b/net/sctp/sm_sideeffect.c
@@ -251,6 +251,7 @@ void sctp_generate_t3_rtx_event(unsigned long peer)
int error;
struct sctp_transport *transport = (struct sctp_transport *) peer;
struct sctp_association *asoc = transport->asoc;
+ struct net *net = sock_net(asoc->base.sk);
/* Check whether a task is in the sock. */
@@ -271,7 +272,7 @@ void sctp_generate_t3_rtx_event(unsigned long peer)
goto out_unlock;
/* Run through the state machine. */
- error = sctp_do_sm(SCTP_EVENT_T_TIMEOUT,
+ error = sctp_do_sm(net, SCTP_EVENT_T_TIMEOUT,
SCTP_ST_TIMEOUT(SCTP_EVENT_TIMEOUT_T3_RTX),
asoc->state,
asoc->ep, asoc,
@@ -291,6 +292,7 @@ out_unlock:
static void sctp_generate_timeout_event(struct sctp_association *asoc,
sctp_event_timeout_t timeout_type)
{
+ struct net *net = sock_net(asoc->base.sk);
int error = 0;
sctp_bh_lock_sock(asoc->base.sk);
@@ -312,7 +314,7 @@ static void sctp_generate_timeout_event(struct sctp_association *asoc,
goto out_unlock;
/* Run through the state machine. */
- error = sctp_do_sm(SCTP_EVENT_T_TIMEOUT,
+ error = sctp_do_sm(net, SCTP_EVENT_T_TIMEOUT,
SCTP_ST_TIMEOUT(timeout_type),
asoc->state, asoc->ep, asoc,
(void *)timeout_type, GFP_ATOMIC);
@@ -371,6 +373,7 @@ void sctp_generate_heartbeat_event(unsigned long data)
int error = 0;
struct sctp_transport *transport = (struct sctp_transport *) data;
struct sctp_association *asoc = transport->asoc;
+ struct net *net = sock_net(asoc->base.sk);
sctp_bh_lock_sock(asoc->base.sk);
if (sock_owned_by_user(asoc->base.sk)) {
@@ -388,7 +391,7 @@ void sctp_generate_heartbeat_event(unsigned long data)
if (transport->dead)
goto out_unlock;
- error = sctp_do_sm(SCTP_EVENT_T_TIMEOUT,
+ error = sctp_do_sm(net, SCTP_EVENT_T_TIMEOUT,
SCTP_ST_TIMEOUT(SCTP_EVENT_TIMEOUT_HEARTBEAT),
asoc->state, asoc->ep, asoc,
transport, GFP_ATOMIC);
@@ -408,6 +411,7 @@ void sctp_generate_proto_unreach_event(unsigned long data)
{
struct sctp_transport *transport = (struct sctp_transport *) data;
struct sctp_association *asoc = transport->asoc;
+ struct net *net = sock_net(asoc->base.sk);
sctp_bh_lock_sock(asoc->base.sk);
if (sock_owned_by_user(asoc->base.sk)) {
@@ -426,7 +430,7 @@ void sctp_generate_proto_unreach_event(unsigned long data)
if (asoc->base.dead)
goto out_unlock;
- sctp_do_sm(SCTP_EVENT_T_OTHER,
+ sctp_do_sm(net, SCTP_EVENT_T_OTHER,
SCTP_ST_OTHER(SCTP_EVENT_ICMP_PROTO_UNREACH),
asoc->state, asoc->ep, asoc, transport, GFP_ATOMIC);
@@ -748,13 +752,15 @@ static void sctp_cmd_transport_on(sctp_cmd_seq_t *cmds,
/* Helper function to process the process SACK command. */
static int sctp_cmd_process_sack(sctp_cmd_seq_t *cmds,
struct sctp_association *asoc,
- struct sctp_sackhdr *sackh)
+ struct sctp_chunk *chunk)
{
int err = 0;
- if (sctp_outq_sack(&asoc->outqueue, sackh)) {
+ if (sctp_outq_sack(&asoc->outqueue, chunk)) {
+ struct net *net = sock_net(asoc->base.sk);
+
/* There are no more TSNs awaiting SACK. */
- err = sctp_do_sm(SCTP_EVENT_T_OTHER,
+ err = sctp_do_sm(net, SCTP_EVENT_T_OTHER,
SCTP_ST_OTHER(SCTP_EVENT_NO_PENDING_TSN),
asoc->state, asoc->ep, asoc, NULL,
GFP_ATOMIC);
@@ -1042,6 +1048,8 @@ static int sctp_cmd_send_msg(struct sctp_association *asoc,
*/
static void sctp_cmd_send_asconf(struct sctp_association *asoc)
{
+ struct net *net = sock_net(asoc->base.sk);
+
/* Send the next asconf chunk from the addip chunk
* queue.
*/
@@ -1053,7 +1061,7 @@ static void sctp_cmd_send_asconf(struct sctp_association *asoc)
/* Hold the chunk until an ASCONF_ACK is received. */
sctp_chunk_hold(asconf);
- if (sctp_primitive_ASCONF(asoc, asconf))
+ if (sctp_primitive_ASCONF(net, asoc, asconf))
sctp_chunk_free(asconf);
else
asoc->addip_last_asconf = asconf;
@@ -1089,7 +1097,7 @@ static void sctp_cmd_send_asconf(struct sctp_association *asoc)
* If you want to understand all of lksctp, this is a
* good place to start.
*/
-int sctp_do_sm(sctp_event_t event_type, sctp_subtype_t subtype,
+int sctp_do_sm(struct net *net, sctp_event_t event_type, sctp_subtype_t subtype,
sctp_state_t state,
struct sctp_endpoint *ep,
struct sctp_association *asoc,
@@ -1110,12 +1118,12 @@ int sctp_do_sm(sctp_event_t event_type, sctp_subtype_t subtype,
/* Look up the state function, run it, and then process the
* side effects. These three steps are the heart of lksctp.
*/
- state_fn = sctp_sm_lookup_event(event_type, state, subtype);
+ state_fn = sctp_sm_lookup_event(net, event_type, state, subtype);
sctp_init_cmd_seq(&commands);
DEBUG_PRE;
- status = (*state_fn->fn)(ep, asoc, subtype, event_arg, &commands);
+ status = (*state_fn->fn)(net, ep, asoc, subtype, event_arg, &commands);
DEBUG_POST;
error = sctp_side_effects(event_type, subtype, state,
diff --git a/net/sctp/sm_statefuns.c b/net/sctp/sm_statefuns.c
index 9fca10357350..b6adef8a1e93 100644
--- a/net/sctp/sm_statefuns.c
+++ b/net/sctp/sm_statefuns.c
@@ -66,7 +66,8 @@
#include <net/sctp/sm.h>
#include <net/sctp/structs.h>
-static struct sctp_packet *sctp_abort_pkt_new(const struct sctp_endpoint *ep,
+static struct sctp_packet *sctp_abort_pkt_new(struct net *net,
+ const struct sctp_endpoint *ep,
const struct sctp_association *asoc,
struct sctp_chunk *chunk,
const void *payload,
@@ -74,36 +75,43 @@ static struct sctp_packet *sctp_abort_pkt_new(const struct sctp_endpoint *ep,
static int sctp_eat_data(const struct sctp_association *asoc,
struct sctp_chunk *chunk,
sctp_cmd_seq_t *commands);
-static struct sctp_packet *sctp_ootb_pkt_new(const struct sctp_association *asoc,
+static struct sctp_packet *sctp_ootb_pkt_new(struct net *net,
+ const struct sctp_association *asoc,
const struct sctp_chunk *chunk);
-static void sctp_send_stale_cookie_err(const struct sctp_endpoint *ep,
+static void sctp_send_stale_cookie_err(struct net *net,
+ const struct sctp_endpoint *ep,
const struct sctp_association *asoc,
const struct sctp_chunk *chunk,
sctp_cmd_seq_t *commands,
struct sctp_chunk *err_chunk);
-static sctp_disposition_t sctp_sf_do_5_2_6_stale(const struct sctp_endpoint *ep,
+static sctp_disposition_t sctp_sf_do_5_2_6_stale(struct net *net,
+ const struct sctp_endpoint *ep,
const struct sctp_association *asoc,
const sctp_subtype_t type,
void *arg,
sctp_cmd_seq_t *commands);
-static sctp_disposition_t sctp_sf_shut_8_4_5(const struct sctp_endpoint *ep,
+static sctp_disposition_t sctp_sf_shut_8_4_5(struct net *net,
+ const struct sctp_endpoint *ep,
const struct sctp_association *asoc,
const sctp_subtype_t type,
void *arg,
sctp_cmd_seq_t *commands);
-static sctp_disposition_t sctp_sf_tabort_8_4_8(const struct sctp_endpoint *ep,
+static sctp_disposition_t sctp_sf_tabort_8_4_8(struct net *net,
+ const struct sctp_endpoint *ep,
const struct sctp_association *asoc,
const sctp_subtype_t type,
void *arg,
sctp_cmd_seq_t *commands);
static struct sctp_sackhdr *sctp_sm_pull_sack(struct sctp_chunk *chunk);
-static sctp_disposition_t sctp_stop_t1_and_abort(sctp_cmd_seq_t *commands,
+static sctp_disposition_t sctp_stop_t1_and_abort(struct net *net,
+ sctp_cmd_seq_t *commands,
__be16 error, int sk_err,
const struct sctp_association *asoc,
struct sctp_transport *transport);
static sctp_disposition_t sctp_sf_abort_violation(
+ struct net *net,
const struct sctp_endpoint *ep,
const struct sctp_association *asoc,
void *arg,
@@ -112,6 +120,7 @@ static sctp_disposition_t sctp_sf_abort_violation(
const size_t paylen);
static sctp_disposition_t sctp_sf_violation_chunklen(
+ struct net *net,
const struct sctp_endpoint *ep,
const struct sctp_association *asoc,
const sctp_subtype_t type,
@@ -119,6 +128,7 @@ static sctp_disposition_t sctp_sf_violation_chunklen(
sctp_cmd_seq_t *commands);
static sctp_disposition_t sctp_sf_violation_paramlen(
+ struct net *net,
const struct sctp_endpoint *ep,
const struct sctp_association *asoc,
const sctp_subtype_t type,
@@ -126,6 +136,7 @@ static sctp_disposition_t sctp_sf_violation_paramlen(
sctp_cmd_seq_t *commands);
static sctp_disposition_t sctp_sf_violation_ctsn(
+ struct net *net,
const struct sctp_endpoint *ep,
const struct sctp_association *asoc,
const sctp_subtype_t type,
@@ -133,18 +144,21 @@ static sctp_disposition_t sctp_sf_violation_ctsn(
sctp_cmd_seq_t *commands);
static sctp_disposition_t sctp_sf_violation_chunk(
+ struct net *net,
const struct sctp_endpoint *ep,
const struct sctp_association *asoc,
const sctp_subtype_t type,
void *arg,
sctp_cmd_seq_t *commands);
-static sctp_ierror_t sctp_sf_authenticate(const struct sctp_endpoint *ep,
+static sctp_ierror_t sctp_sf_authenticate(struct net *net,
+ const struct sctp_endpoint *ep,
const struct sctp_association *asoc,
const sctp_subtype_t type,
struct sctp_chunk *chunk);
-static sctp_disposition_t __sctp_sf_do_9_1_abort(const struct sctp_endpoint *ep,
+static sctp_disposition_t __sctp_sf_do_9_1_abort(struct net *net,
+ const struct sctp_endpoint *ep,
const struct sctp_association *asoc,
const sctp_subtype_t type,
void *arg,
@@ -204,7 +218,8 @@ sctp_chunk_length_valid(struct sctp_chunk *chunk,
*
* The return value is the disposition of the chunk.
*/
-sctp_disposition_t sctp_sf_do_4_C(const struct sctp_endpoint *ep,
+sctp_disposition_t sctp_sf_do_4_C(struct net *net,
+ const struct sctp_endpoint *ep,
const struct sctp_association *asoc,
const sctp_subtype_t type,
void *arg,
@@ -214,7 +229,7 @@ sctp_disposition_t sctp_sf_do_4_C(const struct sctp_endpoint *ep,
struct sctp_ulpevent *ev;
if (!sctp_vtag_verify_either(chunk, asoc))
- return sctp_sf_pdiscard(ep, asoc, type, arg, commands);
+ return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands);
/* RFC 2960 6.10 Bundling
*
@@ -222,11 +237,11 @@ sctp_disposition_t sctp_sf_do_4_C(const struct sctp_endpoint *ep,
* SHUTDOWN COMPLETE with any other chunks.
*/
if (!chunk->singleton)
- return sctp_sf_violation_chunk(ep, asoc, type, arg, commands);
+ return sctp_sf_violation_chunk(net, ep, asoc, type, arg, commands);
/* Make sure that the SHUTDOWN_COMPLETE chunk has a valid length. */
if (!sctp_chunk_length_valid(chunk, sizeof(sctp_chunkhdr_t)))
- return sctp_sf_violation_chunklen(ep, asoc, type, arg,
+ return sctp_sf_violation_chunklen(net, ep, asoc, type, arg,
commands);
/* RFC 2960 10.2 SCTP-to-ULP
@@ -259,8 +274,8 @@ sctp_disposition_t sctp_sf_do_4_C(const struct sctp_endpoint *ep,
sctp_add_cmd_sf(commands, SCTP_CMD_NEW_STATE,
SCTP_STATE(SCTP_STATE_CLOSED));
- SCTP_INC_STATS(SCTP_MIB_SHUTDOWNS);
- SCTP_DEC_STATS(SCTP_MIB_CURRESTAB);
+ SCTP_INC_STATS(net, SCTP_MIB_SHUTDOWNS);
+ SCTP_DEC_STATS(net, SCTP_MIB_CURRESTAB);
sctp_add_cmd_sf(commands, SCTP_CMD_DELETE_TCB, SCTP_NULL());
@@ -289,7 +304,8 @@ sctp_disposition_t sctp_sf_do_4_C(const struct sctp_endpoint *ep,
*
* The return value is the disposition of the chunk.
*/
-sctp_disposition_t sctp_sf_do_5_1B_init(const struct sctp_endpoint *ep,
+sctp_disposition_t sctp_sf_do_5_1B_init(struct net *net,
+ const struct sctp_endpoint *ep,
const struct sctp_association *asoc,
const sctp_subtype_t type,
void *arg,
@@ -313,21 +329,21 @@ sctp_disposition_t sctp_sf_do_5_1B_init(const struct sctp_endpoint *ep,
* with an INIT chunk that is bundled with other chunks.
*/
if (!chunk->singleton)
- return sctp_sf_pdiscard(ep, asoc, type, arg, commands);
+ return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands);
/* If the packet is an OOTB packet which is temporarily on the
* control endpoint, respond with an ABORT.
*/
- if (ep == sctp_sk((sctp_get_ctl_sock()))->ep) {
- SCTP_INC_STATS(SCTP_MIB_OUTOFBLUES);
- return sctp_sf_tabort_8_4_8(ep, asoc, type, arg, commands);
+ if (ep == sctp_sk(net->sctp.ctl_sock)->ep) {
+ SCTP_INC_STATS(net, SCTP_MIB_OUTOFBLUES);
+ return sctp_sf_tabort_8_4_8(net, ep, asoc, type, arg, commands);
}
/* 3.1 A packet containing an INIT chunk MUST have a zero Verification
* Tag.
*/
if (chunk->sctp_hdr->vtag != 0)
- return sctp_sf_tabort_8_4_8(ep, asoc, type, arg, commands);
+ return sctp_sf_tabort_8_4_8(net, ep, asoc, type, arg, commands);
/* Make sure that the INIT chunk has a valid length.
* Normally, this would cause an ABORT with a Protocol Violation
@@ -335,7 +351,7 @@ sctp_disposition_t sctp_sf_do_5_1B_init(const struct sctp_endpoint *ep,
* just discard the packet.
*/
if (!sctp_chunk_length_valid(chunk, sizeof(sctp_init_chunk_t)))
- return sctp_sf_pdiscard(ep, asoc, type, arg, commands);
+ return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands);
/* If the INIT is coming toward a closing socket, we'll send back
* and ABORT. Essentially, this catches the race of INIT being
@@ -344,18 +360,18 @@ sctp_disposition_t sctp_sf_do_5_1B_init(const struct sctp_endpoint *ep,
* can treat this OOTB
*/
if (sctp_sstate(ep->base.sk, CLOSING))
- return sctp_sf_tabort_8_4_8(ep, asoc, type, arg, commands);
+ return sctp_sf_tabort_8_4_8(net, ep, asoc, type, arg, commands);
/* Verify the INIT chunk before processing it. */
err_chunk = NULL;
- if (!sctp_verify_init(asoc, chunk->chunk_hdr->type,
+ if (!sctp_verify_init(net, asoc, chunk->chunk_hdr->type,
(sctp_init_chunk_t *)chunk->chunk_hdr, chunk,
&err_chunk)) {
/* This chunk contains fatal error. It is to be discarded.
* Send an ABORT, with causes if there is any.
*/
if (err_chunk) {
- packet = sctp_abort_pkt_new(ep, asoc, arg,
+ packet = sctp_abort_pkt_new(net, ep, asoc, arg,
(__u8 *)(err_chunk->chunk_hdr) +
sizeof(sctp_chunkhdr_t),
ntohs(err_chunk->chunk_hdr->length) -
@@ -366,13 +382,13 @@ sctp_disposition_t sctp_sf_do_5_1B_init(const struct sctp_endpoint *ep,
if (packet) {
sctp_add_cmd_sf(commands, SCTP_CMD_SEND_PKT,
SCTP_PACKET(packet));
- SCTP_INC_STATS(SCTP_MIB_OUTCTRLCHUNKS);
+ SCTP_INC_STATS(net, SCTP_MIB_OUTCTRLCHUNKS);
return SCTP_DISPOSITION_CONSUME;
} else {
return SCTP_DISPOSITION_NOMEM;
}
} else {
- return sctp_sf_tabort_8_4_8(ep, asoc, type, arg,
+ return sctp_sf_tabort_8_4_8(net, ep, asoc, type, arg,
commands);
}
}
@@ -484,7 +500,8 @@ nomem:
*
* The return value is the disposition of the chunk.
*/
-sctp_disposition_t sctp_sf_do_5_1C_ack(const struct sctp_endpoint *ep,
+sctp_disposition_t sctp_sf_do_5_1C_ack(struct net *net,
+ const struct sctp_endpoint *ep,
const struct sctp_association *asoc,
const sctp_subtype_t type,
void *arg,
@@ -496,25 +513,25 @@ sctp_disposition_t sctp_sf_do_5_1C_ack(const struct sctp_endpoint *ep,
struct sctp_packet *packet;
if (!sctp_vtag_verify(chunk, asoc))
- return sctp_sf_pdiscard(ep, asoc, type, arg, commands);
+ return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands);
/* 6.10 Bundling
* An endpoint MUST NOT bundle INIT, INIT ACK or
* SHUTDOWN COMPLETE with any other chunks.
*/
if (!chunk->singleton)
- return sctp_sf_violation_chunk(ep, asoc, type, arg, commands);
+ return sctp_sf_violation_chunk(net, ep, asoc, type, arg, commands);
/* Make sure that the INIT-ACK chunk has a valid length */
if (!sctp_chunk_length_valid(chunk, sizeof(sctp_initack_chunk_t)))
- return sctp_sf_violation_chunklen(ep, asoc, type, arg,
+ return sctp_sf_violation_chunklen(net, ep, asoc, type, arg,
commands);
/* Grab the INIT header. */
chunk->subh.init_hdr = (sctp_inithdr_t *) chunk->skb->data;
/* Verify the INIT chunk before processing it. */
err_chunk = NULL;
- if (!sctp_verify_init(asoc, chunk->chunk_hdr->type,
+ if (!sctp_verify_init(net, asoc, chunk->chunk_hdr->type,
(sctp_init_chunk_t *)chunk->chunk_hdr, chunk,
&err_chunk)) {
@@ -526,7 +543,7 @@ sctp_disposition_t sctp_sf_do_5_1C_ack(const struct sctp_endpoint *ep,
* the association.
*/
if (err_chunk) {
- packet = sctp_abort_pkt_new(ep, asoc, arg,
+ packet = sctp_abort_pkt_new(net, ep, asoc, arg,
(__u8 *)(err_chunk->chunk_hdr) +
sizeof(sctp_chunkhdr_t),
ntohs(err_chunk->chunk_hdr->length) -
@@ -537,7 +554,7 @@ sctp_disposition_t sctp_sf_do_5_1C_ack(const struct sctp_endpoint *ep,
if (packet) {
sctp_add_cmd_sf(commands, SCTP_CMD_SEND_PKT,
SCTP_PACKET(packet));
- SCTP_INC_STATS(SCTP_MIB_OUTCTRLCHUNKS);
+ SCTP_INC_STATS(net, SCTP_MIB_OUTCTRLCHUNKS);
error = SCTP_ERROR_INV_PARAM;
}
}
@@ -554,10 +571,10 @@ sctp_disposition_t sctp_sf_do_5_1C_ack(const struct sctp_endpoint *ep,
* was malformed.
*/
if (sctp_auth_recv_cid(SCTP_CID_ABORT, asoc))
- return sctp_sf_pdiscard(ep, asoc, type, arg, commands);
+ return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands);
- SCTP_INC_STATS(SCTP_MIB_ABORTEDS);
- return sctp_stop_t1_and_abort(commands, error, ECONNREFUSED,
+ SCTP_INC_STATS(net, SCTP_MIB_ABORTEDS);
+ return sctp_stop_t1_and_abort(net, commands, error, ECONNREFUSED,
asoc, chunk->transport);
}
@@ -633,7 +650,8 @@ sctp_disposition_t sctp_sf_do_5_1C_ack(const struct sctp_endpoint *ep,
*
* The return value is the disposition of the chunk.
*/
-sctp_disposition_t sctp_sf_do_5_1D_ce(const struct sctp_endpoint *ep,
+sctp_disposition_t sctp_sf_do_5_1D_ce(struct net *net,
+ const struct sctp_endpoint *ep,
const struct sctp_association *asoc,
const sctp_subtype_t type, void *arg,
sctp_cmd_seq_t *commands)
@@ -650,9 +668,9 @@ sctp_disposition_t sctp_sf_do_5_1D_ce(const struct sctp_endpoint *ep,
/* If the packet is an OOTB packet which is temporarily on the
* control endpoint, respond with an ABORT.
*/
- if (ep == sctp_sk((sctp_get_ctl_sock()))->ep) {
- SCTP_INC_STATS(SCTP_MIB_OUTOFBLUES);
- return sctp_sf_tabort_8_4_8(ep, asoc, type, arg, commands);
+ if (ep == sctp_sk(net->sctp.ctl_sock)->ep) {
+ SCTP_INC_STATS(net, SCTP_MIB_OUTOFBLUES);
+ return sctp_sf_tabort_8_4_8(net, ep, asoc, type, arg, commands);
}
/* Make sure that the COOKIE_ECHO chunk has a valid length.
@@ -661,7 +679,7 @@ sctp_disposition_t sctp_sf_do_5_1D_ce(const struct sctp_endpoint *ep,
* in sctp_unpack_cookie().
*/
if (!sctp_chunk_length_valid(chunk, sizeof(sctp_chunkhdr_t)))
- return sctp_sf_pdiscard(ep, asoc, type, arg, commands);
+ return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands);
/* If the endpoint is not listening or if the number of associations
* on the TCP-style socket exceed the max backlog, respond with an
@@ -670,7 +688,7 @@ sctp_disposition_t sctp_sf_do_5_1D_ce(const struct sctp_endpoint *ep,
sk = ep->base.sk;
if (!sctp_sstate(sk, LISTENING) ||
(sctp_style(sk, TCP) && sk_acceptq_is_full(sk)))
- return sctp_sf_tabort_8_4_8(ep, asoc, type, arg, commands);
+ return sctp_sf_tabort_8_4_8(net, ep, asoc, type, arg, commands);
/* "Decode" the chunk. We have no optional parameters so we
* are in good shape.
@@ -703,13 +721,13 @@ sctp_disposition_t sctp_sf_do_5_1D_ce(const struct sctp_endpoint *ep,
goto nomem;
case -SCTP_IERROR_STALE_COOKIE:
- sctp_send_stale_cookie_err(ep, asoc, chunk, commands,
+ sctp_send_stale_cookie_err(net, ep, asoc, chunk, commands,
err_chk_p);
- return sctp_sf_pdiscard(ep, asoc, type, arg, commands);
+ return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands);
case -SCTP_IERROR_BAD_SIG:
default:
- return sctp_sf_pdiscard(ep, asoc, type, arg, commands);
+ return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands);
}
}
@@ -756,14 +774,14 @@ sctp_disposition_t sctp_sf_do_5_1D_ce(const struct sctp_endpoint *ep,
skb_pull(chunk->auth_chunk, sizeof(sctp_chunkhdr_t));
auth.transport = chunk->transport;
- ret = sctp_sf_authenticate(ep, new_asoc, type, &auth);
+ ret = sctp_sf_authenticate(net, ep, new_asoc, type, &auth);
/* We can now safely free the auth_chunk clone */
kfree_skb(chunk->auth_chunk);
if (ret != SCTP_IERROR_NO_ERROR) {
sctp_association_free(new_asoc);
- return sctp_sf_pdiscard(ep, asoc, type, arg, commands);
+ return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands);
}
}
@@ -804,8 +822,8 @@ sctp_disposition_t sctp_sf_do_5_1D_ce(const struct sctp_endpoint *ep,
sctp_add_cmd_sf(commands, SCTP_CMD_NEW_ASOC, SCTP_ASOC(new_asoc));
sctp_add_cmd_sf(commands, SCTP_CMD_NEW_STATE,
SCTP_STATE(SCTP_STATE_ESTABLISHED));
- SCTP_INC_STATS(SCTP_MIB_CURRESTAB);
- SCTP_INC_STATS(SCTP_MIB_PASSIVEESTABS);
+ SCTP_INC_STATS(net, SCTP_MIB_CURRESTAB);
+ SCTP_INC_STATS(net, SCTP_MIB_PASSIVEESTABS);
sctp_add_cmd_sf(commands, SCTP_CMD_HB_TIMERS_START, SCTP_NULL());
if (new_asoc->autoclose)
@@ -856,7 +874,8 @@ nomem:
*
* The return value is the disposition of the chunk.
*/
-sctp_disposition_t sctp_sf_do_5_1E_ca(const struct sctp_endpoint *ep,
+sctp_disposition_t sctp_sf_do_5_1E_ca(struct net *net,
+ const struct sctp_endpoint *ep,
const struct sctp_association *asoc,
const sctp_subtype_t type, void *arg,
sctp_cmd_seq_t *commands)
@@ -865,13 +884,13 @@ sctp_disposition_t sctp_sf_do_5_1E_ca(const struct sctp_endpoint *ep,
struct sctp_ulpevent *ev;
if (!sctp_vtag_verify(chunk, asoc))
- return sctp_sf_pdiscard(ep, asoc, type, arg, commands);
+ return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands);
/* Verify that the chunk length for the COOKIE-ACK is OK.
* If we don't do this, any bundled chunks may be junked.
*/
if (!sctp_chunk_length_valid(chunk, sizeof(sctp_chunkhdr_t)))
- return sctp_sf_violation_chunklen(ep, asoc, type, arg,
+ return sctp_sf_violation_chunklen(net, ep, asoc, type, arg,
commands);
/* Reset init error count upon receipt of COOKIE-ACK,
@@ -892,8 +911,8 @@ sctp_disposition_t sctp_sf_do_5_1E_ca(const struct sctp_endpoint *ep,
SCTP_TO(SCTP_EVENT_TIMEOUT_T1_COOKIE));
sctp_add_cmd_sf(commands, SCTP_CMD_NEW_STATE,
SCTP_STATE(SCTP_STATE_ESTABLISHED));
- SCTP_INC_STATS(SCTP_MIB_CURRESTAB);
- SCTP_INC_STATS(SCTP_MIB_ACTIVEESTABS);
+ SCTP_INC_STATS(net, SCTP_MIB_CURRESTAB);
+ SCTP_INC_STATS(net, SCTP_MIB_ACTIVEESTABS);
sctp_add_cmd_sf(commands, SCTP_CMD_HB_TIMERS_START, SCTP_NULL());
if (asoc->autoclose)
sctp_add_cmd_sf(commands, SCTP_CMD_TIMER_START,
@@ -958,7 +977,8 @@ static sctp_disposition_t sctp_sf_heartbeat(const struct sctp_endpoint *ep,
}
/* Generate a HEARTBEAT packet on the given transport. */
-sctp_disposition_t sctp_sf_sendbeat_8_3(const struct sctp_endpoint *ep,
+sctp_disposition_t sctp_sf_sendbeat_8_3(struct net *net,
+ const struct sctp_endpoint *ep,
const struct sctp_association *asoc,
const sctp_subtype_t type,
void *arg,
@@ -972,8 +992,8 @@ sctp_disposition_t sctp_sf_sendbeat_8_3(const struct sctp_endpoint *ep,
/* CMD_ASSOC_FAILED calls CMD_DELETE_TCB. */
sctp_add_cmd_sf(commands, SCTP_CMD_ASSOC_FAILED,
SCTP_PERR(SCTP_ERROR_NO_ERROR));
- SCTP_INC_STATS(SCTP_MIB_ABORTEDS);
- SCTP_DEC_STATS(SCTP_MIB_CURRESTAB);
+ SCTP_INC_STATS(net, SCTP_MIB_ABORTEDS);
+ SCTP_DEC_STATS(net, SCTP_MIB_CURRESTAB);
return SCTP_DISPOSITION_DELETE_TCB;
}
@@ -1028,7 +1048,8 @@ sctp_disposition_t sctp_sf_sendbeat_8_3(const struct sctp_endpoint *ep,
*
* The return value is the disposition of the chunk.
*/
-sctp_disposition_t sctp_sf_beat_8_3(const struct sctp_endpoint *ep,
+sctp_disposition_t sctp_sf_beat_8_3(struct net *net,
+ const struct sctp_endpoint *ep,
const struct sctp_association *asoc,
const sctp_subtype_t type,
void *arg,
@@ -1039,11 +1060,11 @@ sctp_disposition_t sctp_sf_beat_8_3(const struct sctp_endpoint *ep,
size_t paylen = 0;
if (!sctp_vtag_verify(chunk, asoc))
- return sctp_sf_pdiscard(ep, asoc, type, arg, commands);
+ return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands);
/* Make sure that the HEARTBEAT chunk has a valid length. */
if (!sctp_chunk_length_valid(chunk, sizeof(sctp_heartbeat_chunk_t)))
- return sctp_sf_violation_chunklen(ep, asoc, type, arg,
+ return sctp_sf_violation_chunklen(net, ep, asoc, type, arg,
commands);
/* 8.3 The receiver of the HEARTBEAT should immediately
@@ -1095,7 +1116,8 @@ nomem:
*
* The return value is the disposition of the chunk.
*/
-sctp_disposition_t sctp_sf_backbeat_8_3(const struct sctp_endpoint *ep,
+sctp_disposition_t sctp_sf_backbeat_8_3(struct net *net,
+ const struct sctp_endpoint *ep,
const struct sctp_association *asoc,
const sctp_subtype_t type,
void *arg,
@@ -1108,12 +1130,12 @@ sctp_disposition_t sctp_sf_backbeat_8_3(const struct sctp_endpoint *ep,
unsigned long max_interval;
if (!sctp_vtag_verify(chunk, asoc))
- return sctp_sf_pdiscard(ep, asoc, type, arg, commands);
+ return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands);
/* Make sure that the HEARTBEAT-ACK chunk has a valid length. */
if (!sctp_chunk_length_valid(chunk, sizeof(sctp_chunkhdr_t) +
sizeof(sctp_sender_hb_info_t)))
- return sctp_sf_violation_chunklen(ep, asoc, type, arg,
+ return sctp_sf_violation_chunklen(net, ep, asoc, type, arg,
commands);
hbinfo = (sctp_sender_hb_info_t *) chunk->skb->data;
@@ -1171,7 +1193,7 @@ sctp_disposition_t sctp_sf_backbeat_8_3(const struct sctp_endpoint *ep,
/* Helper function to send out an abort for the restart
* condition.
*/
-static int sctp_sf_send_restart_abort(union sctp_addr *ssa,
+static int sctp_sf_send_restart_abort(struct net *net, union sctp_addr *ssa,
struct sctp_chunk *init,
sctp_cmd_seq_t *commands)
{
@@ -1197,18 +1219,18 @@ static int sctp_sf_send_restart_abort(union sctp_addr *ssa,
errhdr->length = htons(len);
/* Assign to the control socket. */
- ep = sctp_sk((sctp_get_ctl_sock()))->ep;
+ ep = sctp_sk(net->sctp.ctl_sock)->ep;
/* Association is NULL since this may be a restart attack and we
* want to send back the attacker's vtag.
*/
- pkt = sctp_abort_pkt_new(ep, NULL, init, errhdr, len);
+ pkt = sctp_abort_pkt_new(net, ep, NULL, init, errhdr, len);
if (!pkt)
goto out;
sctp_add_cmd_sf(commands, SCTP_CMD_SEND_PKT, SCTP_PACKET(pkt));
- SCTP_INC_STATS(SCTP_MIB_OUTCTRLCHUNKS);
+ SCTP_INC_STATS(net, SCTP_MIB_OUTCTRLCHUNKS);
/* Discard the rest of the inbound packet. */
sctp_add_cmd_sf(commands, SCTP_CMD_DISCARD_PACKET, SCTP_NULL());
@@ -1240,6 +1262,7 @@ static int sctp_sf_check_restart_addrs(const struct sctp_association *new_asoc,
struct sctp_chunk *init,
sctp_cmd_seq_t *commands)
{
+ struct net *net = sock_net(new_asoc->base.sk);
struct sctp_transport *new_addr;
int ret = 1;
@@ -1258,7 +1281,7 @@ static int sctp_sf_check_restart_addrs(const struct sctp_association *new_asoc,
transports) {
if (!list_has_sctp_addr(&asoc->peer.transport_addr_list,
&new_addr->ipaddr)) {
- sctp_sf_send_restart_abort(&new_addr->ipaddr, init,
+ sctp_sf_send_restart_abort(net, &new_addr->ipaddr, init,
commands);
ret = 0;
break;
@@ -1358,6 +1381,7 @@ static char sctp_tietags_compare(struct sctp_association *new_asoc,
* chunk handling.
*/
static sctp_disposition_t sctp_sf_do_unexpected_init(
+ struct net *net,
const struct sctp_endpoint *ep,
const struct sctp_association *asoc,
const sctp_subtype_t type,
@@ -1382,20 +1406,20 @@ static sctp_disposition_t sctp_sf_do_unexpected_init(
* with an INIT chunk that is bundled with other chunks.
*/
if (!chunk->singleton)
- return sctp_sf_pdiscard(ep, asoc, type, arg, commands);
+ return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands);
/* 3.1 A packet containing an INIT chunk MUST have a zero Verification
* Tag.
*/
if (chunk->sctp_hdr->vtag != 0)
- return sctp_sf_tabort_8_4_8(ep, asoc, type, arg, commands);
+ return sctp_sf_tabort_8_4_8(net, ep, asoc, type, arg, commands);
/* Make sure that the INIT chunk has a valid length.
* In this case, we generate a protocol violation since we have
* an association established.
*/
if (!sctp_chunk_length_valid(chunk, sizeof(sctp_init_chunk_t)))
- return sctp_sf_violation_chunklen(ep, asoc, type, arg,
+ return sctp_sf_violation_chunklen(net, ep, asoc, type, arg,
commands);
/* Grab the INIT header. */
chunk->subh.init_hdr = (sctp_inithdr_t *) chunk->skb->data;
@@ -1405,14 +1429,14 @@ static sctp_disposition_t sctp_sf_do_unexpected_init(
/* Verify the INIT chunk before processing it. */
err_chunk = NULL;
- if (!sctp_verify_init(asoc, chunk->chunk_hdr->type,
+ if (!sctp_verify_init(net, asoc, chunk->chunk_hdr->type,
(sctp_init_chunk_t *)chunk->chunk_hdr, chunk,
&err_chunk)) {
/* This chunk contains fatal error. It is to be discarded.
* Send an ABORT, with causes if there is any.
*/
if (err_chunk) {
- packet = sctp_abort_pkt_new(ep, asoc, arg,
+ packet = sctp_abort_pkt_new(net, ep, asoc, arg,
(__u8 *)(err_chunk->chunk_hdr) +
sizeof(sctp_chunkhdr_t),
ntohs(err_chunk->chunk_hdr->length) -
@@ -1421,14 +1445,14 @@ static sctp_disposition_t sctp_sf_do_unexpected_init(
if (packet) {
sctp_add_cmd_sf(commands, SCTP_CMD_SEND_PKT,
SCTP_PACKET(packet));
- SCTP_INC_STATS(SCTP_MIB_OUTCTRLCHUNKS);
+ SCTP_INC_STATS(net, SCTP_MIB_OUTCTRLCHUNKS);
retval = SCTP_DISPOSITION_CONSUME;
} else {
retval = SCTP_DISPOSITION_NOMEM;
}
goto cleanup;
} else {
- return sctp_sf_tabort_8_4_8(ep, asoc, type, arg,
+ return sctp_sf_tabort_8_4_8(net, ep, asoc, type, arg,
commands);
}
}
@@ -1570,7 +1594,8 @@ cleanup:
*
* The return value is the disposition of the chunk.
*/
-sctp_disposition_t sctp_sf_do_5_2_1_siminit(const struct sctp_endpoint *ep,
+sctp_disposition_t sctp_sf_do_5_2_1_siminit(struct net *net,
+ const struct sctp_endpoint *ep,
const struct sctp_association *asoc,
const sctp_subtype_t type,
void *arg,
@@ -1579,7 +1604,7 @@ sctp_disposition_t sctp_sf_do_5_2_1_siminit(const struct sctp_endpoint *ep,
/* Call helper to do the real work for both simulataneous and
* duplicate INIT chunk handling.
*/
- return sctp_sf_do_unexpected_init(ep, asoc, type, arg, commands);
+ return sctp_sf_do_unexpected_init(net, ep, asoc, type, arg, commands);
}
/*
@@ -1623,7 +1648,8 @@ sctp_disposition_t sctp_sf_do_5_2_1_siminit(const struct sctp_endpoint *ep,
*
* The return value is the disposition of the chunk.
*/
-sctp_disposition_t sctp_sf_do_5_2_2_dupinit(const struct sctp_endpoint *ep,
+sctp_disposition_t sctp_sf_do_5_2_2_dupinit(struct net *net,
+ const struct sctp_endpoint *ep,
const struct sctp_association *asoc,
const sctp_subtype_t type,
void *arg,
@@ -1632,7 +1658,7 @@ sctp_disposition_t sctp_sf_do_5_2_2_dupinit(const struct sctp_endpoint *ep,
/* Call helper to do the real work for both simulataneous and
* duplicate INIT chunk handling.
*/
- return sctp_sf_do_unexpected_init(ep, asoc, type, arg, commands);
+ return sctp_sf_do_unexpected_init(net, ep, asoc, type, arg, commands);
}
@@ -1645,7 +1671,8 @@ sctp_disposition_t sctp_sf_do_5_2_2_dupinit(const struct sctp_endpoint *ep,
* An unexpected INIT ACK usually indicates the processing of an old or
* duplicated INIT chunk.
*/
-sctp_disposition_t sctp_sf_do_5_2_3_initack(const struct sctp_endpoint *ep,
+sctp_disposition_t sctp_sf_do_5_2_3_initack(struct net *net,
+ const struct sctp_endpoint *ep,
const struct sctp_association *asoc,
const sctp_subtype_t type,
void *arg, sctp_cmd_seq_t *commands)
@@ -1653,10 +1680,10 @@ sctp_disposition_t sctp_sf_do_5_2_3_initack(const struct sctp_endpoint *ep,
/* Per the above section, we'll discard the chunk if we have an
* endpoint. If this is an OOTB INIT-ACK, treat it as such.
*/
- if (ep == sctp_sk((sctp_get_ctl_sock()))->ep)
- return sctp_sf_ootb(ep, asoc, type, arg, commands);
+ if (ep == sctp_sk(net->sctp.ctl_sock)->ep)
+ return sctp_sf_ootb(net, ep, asoc, type, arg, commands);
else
- return sctp_sf_discard_chunk(ep, asoc, type, arg, commands);
+ return sctp_sf_discard_chunk(net, ep, asoc, type, arg, commands);
}
/* Unexpected COOKIE-ECHO handler for peer restart (Table 2, action 'A')
@@ -1664,7 +1691,8 @@ sctp_disposition_t sctp_sf_do_5_2_3_initack(const struct sctp_endpoint *ep,
* Section 5.2.4
* A) In this case, the peer may have restarted.
*/
-static sctp_disposition_t sctp_sf_do_dupcook_a(const struct sctp_endpoint *ep,
+static sctp_disposition_t sctp_sf_do_dupcook_a(struct net *net,
+ const struct sctp_endpoint *ep,
const struct sctp_association *asoc,
struct sctp_chunk *chunk,
sctp_cmd_seq_t *commands,
@@ -1700,7 +1728,7 @@ static sctp_disposition_t sctp_sf_do_dupcook_a(const struct sctp_endpoint *ep,
* its peer.
*/
if (sctp_state(asoc, SHUTDOWN_ACK_SENT)) {
- disposition = sctp_sf_do_9_2_reshutack(ep, asoc,
+ disposition = sctp_sf_do_9_2_reshutack(net, ep, asoc,
SCTP_ST_CHUNK(chunk->chunk_hdr->type),
chunk, commands);
if (SCTP_DISPOSITION_NOMEM == disposition)
@@ -1763,7 +1791,8 @@ nomem:
* after responding to the local endpoint's INIT
*/
/* This case represents an initialization collision. */
-static sctp_disposition_t sctp_sf_do_dupcook_b(const struct sctp_endpoint *ep,
+static sctp_disposition_t sctp_sf_do_dupcook_b(struct net *net,
+ const struct sctp_endpoint *ep,
const struct sctp_association *asoc,
struct sctp_chunk *chunk,
sctp_cmd_seq_t *commands,
@@ -1784,7 +1813,7 @@ static sctp_disposition_t sctp_sf_do_dupcook_b(const struct sctp_endpoint *ep,
sctp_add_cmd_sf(commands, SCTP_CMD_UPDATE_ASSOC, SCTP_ASOC(new_asoc));
sctp_add_cmd_sf(commands, SCTP_CMD_NEW_STATE,
SCTP_STATE(SCTP_STATE_ESTABLISHED));
- SCTP_INC_STATS(SCTP_MIB_CURRESTAB);
+ SCTP_INC_STATS(net, SCTP_MIB_CURRESTAB);
sctp_add_cmd_sf(commands, SCTP_CMD_HB_TIMERS_START, SCTP_NULL());
repl = sctp_make_cookie_ack(new_asoc, chunk);
@@ -1833,7 +1862,8 @@ nomem:
* but a new tag of its own.
*/
/* This case represents an initialization collision. */
-static sctp_disposition_t sctp_sf_do_dupcook_c(const struct sctp_endpoint *ep,
+static sctp_disposition_t sctp_sf_do_dupcook_c(struct net *net,
+ const struct sctp_endpoint *ep,
const struct sctp_association *asoc,
struct sctp_chunk *chunk,
sctp_cmd_seq_t *commands,
@@ -1854,7 +1884,8 @@ static sctp_disposition_t sctp_sf_do_dupcook_c(const struct sctp_endpoint *ep,
* enter the ESTABLISHED state, if it has not already done so.
*/
/* This case represents an initialization collision. */
-static sctp_disposition_t sctp_sf_do_dupcook_d(const struct sctp_endpoint *ep,
+static sctp_disposition_t sctp_sf_do_dupcook_d(struct net *net,
+ const struct sctp_endpoint *ep,
const struct sctp_association *asoc,
struct sctp_chunk *chunk,
sctp_cmd_seq_t *commands,
@@ -1876,7 +1907,7 @@ static sctp_disposition_t sctp_sf_do_dupcook_d(const struct sctp_endpoint *ep,
SCTP_TO(SCTP_EVENT_TIMEOUT_T1_COOKIE));
sctp_add_cmd_sf(commands, SCTP_CMD_NEW_STATE,
SCTP_STATE(SCTP_STATE_ESTABLISHED));
- SCTP_INC_STATS(SCTP_MIB_CURRESTAB);
+ SCTP_INC_STATS(net, SCTP_MIB_CURRESTAB);
sctp_add_cmd_sf(commands, SCTP_CMD_HB_TIMERS_START,
SCTP_NULL());
@@ -1948,7 +1979,8 @@ nomem:
*
* The return value is the disposition of the chunk.
*/
-sctp_disposition_t sctp_sf_do_5_2_4_dupcook(const struct sctp_endpoint *ep,
+sctp_disposition_t sctp_sf_do_5_2_4_dupcook(struct net *net,
+ const struct sctp_endpoint *ep,
const struct sctp_association *asoc,
const sctp_subtype_t type,
void *arg,
@@ -1967,7 +1999,7 @@ sctp_disposition_t sctp_sf_do_5_2_4_dupcook(const struct sctp_endpoint *ep,
* done later.
*/
if (!sctp_chunk_length_valid(chunk, sizeof(sctp_chunkhdr_t)))
- return sctp_sf_violation_chunklen(ep, asoc, type, arg,
+ return sctp_sf_violation_chunklen(net, ep, asoc, type, arg,
commands);
/* "Decode" the chunk. We have no optional parameters so we
@@ -2001,12 +2033,12 @@ sctp_disposition_t sctp_sf_do_5_2_4_dupcook(const struct sctp_endpoint *ep,
goto nomem;
case -SCTP_IERROR_STALE_COOKIE:
- sctp_send_stale_cookie_err(ep, asoc, chunk, commands,
+ sctp_send_stale_cookie_err(net, ep, asoc, chunk, commands,
err_chk_p);
- return sctp_sf_pdiscard(ep, asoc, type, arg, commands);
+ return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands);
case -SCTP_IERROR_BAD_SIG:
default:
- return sctp_sf_pdiscard(ep, asoc, type, arg, commands);
+ return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands);
}
}
@@ -2017,27 +2049,27 @@ sctp_disposition_t sctp_sf_do_5_2_4_dupcook(const struct sctp_endpoint *ep,
switch (action) {
case 'A': /* Association restart. */
- retval = sctp_sf_do_dupcook_a(ep, asoc, chunk, commands,
+ retval = sctp_sf_do_dupcook_a(net, ep, asoc, chunk, commands,
new_asoc);
break;
case 'B': /* Collision case B. */
- retval = sctp_sf_do_dupcook_b(ep, asoc, chunk, commands,
+ retval = sctp_sf_do_dupcook_b(net, ep, asoc, chunk, commands,
new_asoc);
break;
case 'C': /* Collision case C. */
- retval = sctp_sf_do_dupcook_c(ep, asoc, chunk, commands,
+ retval = sctp_sf_do_dupcook_c(net, ep, asoc, chunk, commands,
new_asoc);
break;
case 'D': /* Collision case D. */
- retval = sctp_sf_do_dupcook_d(ep, asoc, chunk, commands,
+ retval = sctp_sf_do_dupcook_d(net, ep, asoc, chunk, commands,
new_asoc);
break;
default: /* Discard packet for all others. */
- retval = sctp_sf_pdiscard(ep, asoc, type, arg, commands);
+ retval = sctp_sf_pdiscard(net, ep, asoc, type, arg, commands);
break;
}
@@ -2063,6 +2095,7 @@ nomem:
* See sctp_sf_do_9_1_abort().
*/
sctp_disposition_t sctp_sf_shutdown_pending_abort(
+ struct net *net,
const struct sctp_endpoint *ep,
const struct sctp_association *asoc,
const sctp_subtype_t type,
@@ -2072,7 +2105,7 @@ sctp_disposition_t sctp_sf_shutdown_pending_abort(
struct sctp_chunk *chunk = arg;
if (!sctp_vtag_verify_either(chunk, asoc))
- return sctp_sf_pdiscard(ep, asoc, type, arg, commands);
+ return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands);
/* Make sure that the ABORT chunk has a valid length.
* Since this is an ABORT chunk, we have to discard it
@@ -2085,7 +2118,7 @@ sctp_disposition_t sctp_sf_shutdown_pending_abort(
* packet.
*/
if (!sctp_chunk_length_valid(chunk, sizeof(sctp_abort_chunk_t)))
- return sctp_sf_pdiscard(ep, asoc, type, arg, commands);
+ return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands);
/* ADD-IP: Special case for ABORT chunks
* F4) One special consideration is that ABORT Chunks arriving
@@ -2094,9 +2127,9 @@ sctp_disposition_t sctp_sf_shutdown_pending_abort(
*/
if (SCTP_ADDR_DEL ==
sctp_bind_addr_state(&asoc->base.bind_addr, &chunk->dest))
- return sctp_sf_discard_chunk(ep, asoc, type, arg, commands);
+ return sctp_sf_discard_chunk(net, ep, asoc, type, arg, commands);
- return __sctp_sf_do_9_1_abort(ep, asoc, type, arg, commands);
+ return __sctp_sf_do_9_1_abort(net, ep, asoc, type, arg, commands);
}
/*
@@ -2104,7 +2137,8 @@ sctp_disposition_t sctp_sf_shutdown_pending_abort(
*
* See sctp_sf_do_9_1_abort().
*/
-sctp_disposition_t sctp_sf_shutdown_sent_abort(const struct sctp_endpoint *ep,
+sctp_disposition_t sctp_sf_shutdown_sent_abort(struct net *net,
+ const struct sctp_endpoint *ep,
const struct sctp_association *asoc,
const sctp_subtype_t type,
void *arg,
@@ -2113,7 +2147,7 @@ sctp_disposition_t sctp_sf_shutdown_sent_abort(const struct sctp_endpoint *ep,
struct sctp_chunk *chunk = arg;
if (!sctp_vtag_verify_either(chunk, asoc))
- return sctp_sf_pdiscard(ep, asoc, type, arg, commands);
+ return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands);
/* Make sure that the ABORT chunk has a valid length.
* Since this is an ABORT chunk, we have to discard it
@@ -2126,7 +2160,7 @@ sctp_disposition_t sctp_sf_shutdown_sent_abort(const struct sctp_endpoint *ep,
* packet.
*/
if (!sctp_chunk_length_valid(chunk, sizeof(sctp_abort_chunk_t)))
- return sctp_sf_pdiscard(ep, asoc, type, arg, commands);
+ return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands);
/* ADD-IP: Special case for ABORT chunks
* F4) One special consideration is that ABORT Chunks arriving
@@ -2135,7 +2169,7 @@ sctp_disposition_t sctp_sf_shutdown_sent_abort(const struct sctp_endpoint *ep,
*/
if (SCTP_ADDR_DEL ==
sctp_bind_addr_state(&asoc->base.bind_addr, &chunk->dest))
- return sctp_sf_discard_chunk(ep, asoc, type, arg, commands);
+ return sctp_sf_discard_chunk(net, ep, asoc, type, arg, commands);
/* Stop the T2-shutdown timer. */
sctp_add_cmd_sf(commands, SCTP_CMD_TIMER_STOP,
@@ -2145,7 +2179,7 @@ sctp_disposition_t sctp_sf_shutdown_sent_abort(const struct sctp_endpoint *ep,
sctp_add_cmd_sf(commands, SCTP_CMD_TIMER_STOP,
SCTP_TO(SCTP_EVENT_TIMEOUT_T5_SHUTDOWN_GUARD));
- return __sctp_sf_do_9_1_abort(ep, asoc, type, arg, commands);
+ return __sctp_sf_do_9_1_abort(net, ep, asoc, type, arg, commands);
}
/*
@@ -2154,6 +2188,7 @@ sctp_disposition_t sctp_sf_shutdown_sent_abort(const struct sctp_endpoint *ep,
* See sctp_sf_do_9_1_abort().
*/
sctp_disposition_t sctp_sf_shutdown_ack_sent_abort(
+ struct net *net,
const struct sctp_endpoint *ep,
const struct sctp_association *asoc,
const sctp_subtype_t type,
@@ -2163,7 +2198,7 @@ sctp_disposition_t sctp_sf_shutdown_ack_sent_abort(
/* The same T2 timer, so we should be able to use
* common function with the SHUTDOWN-SENT state.
*/
- return sctp_sf_shutdown_sent_abort(ep, asoc, type, arg, commands);
+ return sctp_sf_shutdown_sent_abort(net, ep, asoc, type, arg, commands);
}
/*
@@ -2180,7 +2215,8 @@ sctp_disposition_t sctp_sf_shutdown_ack_sent_abort(
*
* The return value is the disposition of the chunk.
*/
-sctp_disposition_t sctp_sf_cookie_echoed_err(const struct sctp_endpoint *ep,
+sctp_disposition_t sctp_sf_cookie_echoed_err(struct net *net,
+ const struct sctp_endpoint *ep,
const struct sctp_association *asoc,
const sctp_subtype_t type,
void *arg,
@@ -2190,13 +2226,13 @@ sctp_disposition_t sctp_sf_cookie_echoed_err(const struct sctp_endpoint *ep,
sctp_errhdr_t *err;
if (!sctp_vtag_verify(chunk, asoc))
- return sctp_sf_pdiscard(ep, asoc, type, arg, commands);
+ return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands);
/* Make sure that the ERROR chunk has a valid length.
* The parameter walking depends on this as well.
*/
if (!sctp_chunk_length_valid(chunk, sizeof(sctp_operr_chunk_t)))
- return sctp_sf_violation_chunklen(ep, asoc, type, arg,
+ return sctp_sf_violation_chunklen(net, ep, asoc, type, arg,
commands);
/* Process the error here */
@@ -2206,7 +2242,7 @@ sctp_disposition_t sctp_sf_cookie_echoed_err(const struct sctp_endpoint *ep,
*/
sctp_walk_errors(err, chunk->chunk_hdr) {
if (SCTP_ERROR_STALE_COOKIE == err->cause)
- return sctp_sf_do_5_2_6_stale(ep, asoc, type,
+ return sctp_sf_do_5_2_6_stale(net, ep, asoc, type,
arg, commands);
}
@@ -2215,7 +2251,7 @@ sctp_disposition_t sctp_sf_cookie_echoed_err(const struct sctp_endpoint *ep,
* we are discarding the packet, there should be no adverse
* affects.
*/
- return sctp_sf_pdiscard(ep, asoc, type, arg, commands);
+ return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands);
}
/*
@@ -2243,7 +2279,8 @@ sctp_disposition_t sctp_sf_cookie_echoed_err(const struct sctp_endpoint *ep,
*
* The return value is the disposition of the chunk.
*/
-static sctp_disposition_t sctp_sf_do_5_2_6_stale(const struct sctp_endpoint *ep,
+static sctp_disposition_t sctp_sf_do_5_2_6_stale(struct net *net,
+ const struct sctp_endpoint *ep,
const struct sctp_association *asoc,
const sctp_subtype_t type,
void *arg,
@@ -2365,7 +2402,8 @@ nomem:
*
* The return value is the disposition of the chunk.
*/
-sctp_disposition_t sctp_sf_do_9_1_abort(const struct sctp_endpoint *ep,
+sctp_disposition_t sctp_sf_do_9_1_abort(struct net *net,
+ const struct sctp_endpoint *ep,
const struct sctp_association *asoc,
const sctp_subtype_t type,
void *arg,
@@ -2374,7 +2412,7 @@ sctp_disposition_t sctp_sf_do_9_1_abort(const struct sctp_endpoint *ep,
struct sctp_chunk *chunk = arg;
if (!sctp_vtag_verify_either(chunk, asoc))
- return sctp_sf_pdiscard(ep, asoc, type, arg, commands);
+ return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands);
/* Make sure that the ABORT chunk has a valid length.
* Since this is an ABORT chunk, we have to discard it
@@ -2387,7 +2425,7 @@ sctp_disposition_t sctp_sf_do_9_1_abort(const struct sctp_endpoint *ep,
* packet.
*/
if (!sctp_chunk_length_valid(chunk, sizeof(sctp_abort_chunk_t)))
- return sctp_sf_pdiscard(ep, asoc, type, arg, commands);
+ return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands);
/* ADD-IP: Special case for ABORT chunks
* F4) One special consideration is that ABORT Chunks arriving
@@ -2396,12 +2434,13 @@ sctp_disposition_t sctp_sf_do_9_1_abort(const struct sctp_endpoint *ep,
*/
if (SCTP_ADDR_DEL ==
sctp_bind_addr_state(&asoc->base.bind_addr, &chunk->dest))
- return sctp_sf_discard_chunk(ep, asoc, type, arg, commands);
+ return sctp_sf_discard_chunk(net, ep, asoc, type, arg, commands);
- return __sctp_sf_do_9_1_abort(ep, asoc, type, arg, commands);
+ return __sctp_sf_do_9_1_abort(net, ep, asoc, type, arg, commands);
}
-static sctp_disposition_t __sctp_sf_do_9_1_abort(const struct sctp_endpoint *ep,
+static sctp_disposition_t __sctp_sf_do_9_1_abort(struct net *net,
+ const struct sctp_endpoint *ep,
const struct sctp_association *asoc,
const sctp_subtype_t type,
void *arg,
@@ -2418,7 +2457,7 @@ static sctp_disposition_t __sctp_sf_do_9_1_abort(const struct sctp_endpoint *ep,
sctp_errhdr_t *err;
sctp_walk_errors(err, chunk->chunk_hdr);
if ((void *)err != (void *)chunk->chunk_end)
- return sctp_sf_pdiscard(ep, asoc, type, arg, commands);
+ return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands);
error = ((sctp_errhdr_t *)chunk->skb->data)->cause;
}
@@ -2426,8 +2465,8 @@ static sctp_disposition_t __sctp_sf_do_9_1_abort(const struct sctp_endpoint *ep,
sctp_add_cmd_sf(commands, SCTP_CMD_SET_SK_ERR, SCTP_ERROR(ECONNRESET));
/* ASSOC_FAILED will DELETE_TCB. */
sctp_add_cmd_sf(commands, SCTP_CMD_ASSOC_FAILED, SCTP_PERR(error));
- SCTP_INC_STATS(SCTP_MIB_ABORTEDS);
- SCTP_DEC_STATS(SCTP_MIB_CURRESTAB);
+ SCTP_INC_STATS(net, SCTP_MIB_ABORTEDS);
+ SCTP_DEC_STATS(net, SCTP_MIB_CURRESTAB);
return SCTP_DISPOSITION_ABORT;
}
@@ -2437,7 +2476,8 @@ static sctp_disposition_t __sctp_sf_do_9_1_abort(const struct sctp_endpoint *ep,
*
* See sctp_sf_do_9_1_abort() above.
*/
-sctp_disposition_t sctp_sf_cookie_wait_abort(const struct sctp_endpoint *ep,
+sctp_disposition_t sctp_sf_cookie_wait_abort(struct net *net,
+ const struct sctp_endpoint *ep,
const struct sctp_association *asoc,
const sctp_subtype_t type,
void *arg,
@@ -2448,7 +2488,7 @@ sctp_disposition_t sctp_sf_cookie_wait_abort(const struct sctp_endpoint *ep,
__be16 error = SCTP_ERROR_NO_ERROR;
if (!sctp_vtag_verify_either(chunk, asoc))
- return sctp_sf_pdiscard(ep, asoc, type, arg, commands);
+ return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands);
/* Make sure that the ABORT chunk has a valid length.
* Since this is an ABORT chunk, we have to discard it
@@ -2461,27 +2501,28 @@ sctp_disposition_t sctp_sf_cookie_wait_abort(const struct sctp_endpoint *ep,
* packet.
*/
if (!sctp_chunk_length_valid(chunk, sizeof(sctp_abort_chunk_t)))
- return sctp_sf_pdiscard(ep, asoc, type, arg, commands);
+ return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands);
/* See if we have an error cause code in the chunk. */
len = ntohs(chunk->chunk_hdr->length);
if (len >= sizeof(struct sctp_chunkhdr) + sizeof(struct sctp_errhdr))
error = ((sctp_errhdr_t *)chunk->skb->data)->cause;
- return sctp_stop_t1_and_abort(commands, error, ECONNREFUSED, asoc,
+ return sctp_stop_t1_and_abort(net, commands, error, ECONNREFUSED, asoc,
chunk->transport);
}
/*
* Process an incoming ICMP as an ABORT. (COOKIE-WAIT state)
*/
-sctp_disposition_t sctp_sf_cookie_wait_icmp_abort(const struct sctp_endpoint *ep,
+sctp_disposition_t sctp_sf_cookie_wait_icmp_abort(struct net *net,
+ const struct sctp_endpoint *ep,
const struct sctp_association *asoc,
const sctp_subtype_t type,
void *arg,
sctp_cmd_seq_t *commands)
{
- return sctp_stop_t1_and_abort(commands, SCTP_ERROR_NO_ERROR,
+ return sctp_stop_t1_and_abort(net, commands, SCTP_ERROR_NO_ERROR,
ENOPROTOOPT, asoc,
(struct sctp_transport *)arg);
}
@@ -2489,7 +2530,8 @@ sctp_disposition_t sctp_sf_cookie_wait_icmp_abort(const struct sctp_endpoint *ep
/*
* Process an ABORT. (COOKIE-ECHOED state)
*/
-sctp_disposition_t sctp_sf_cookie_echoed_abort(const struct sctp_endpoint *ep,
+sctp_disposition_t sctp_sf_cookie_echoed_abort(struct net *net,
+ const struct sctp_endpoint *ep,
const struct sctp_association *asoc,
const sctp_subtype_t type,
void *arg,
@@ -2498,7 +2540,7 @@ sctp_disposition_t sctp_sf_cookie_echoed_abort(const struct sctp_endpoint *ep,
/* There is a single T1 timer, so we should be able to use
* common function with the COOKIE-WAIT state.
*/
- return sctp_sf_cookie_wait_abort(ep, asoc, type, arg, commands);
+ return sctp_sf_cookie_wait_abort(net, ep, asoc, type, arg, commands);
}
/*
@@ -2506,7 +2548,8 @@ sctp_disposition_t sctp_sf_cookie_echoed_abort(const struct sctp_endpoint *ep,
*
* This is common code called by several sctp_sf_*_abort() functions above.
*/
-static sctp_disposition_t sctp_stop_t1_and_abort(sctp_cmd_seq_t *commands,
+static sctp_disposition_t sctp_stop_t1_and_abort(struct net *net,
+ sctp_cmd_seq_t *commands,
__be16 error, int sk_err,
const struct sctp_association *asoc,
struct sctp_transport *transport)
@@ -2514,7 +2557,7 @@ static sctp_disposition_t sctp_stop_t1_and_abort(sctp_cmd_seq_t *commands,
SCTP_DEBUG_PRINTK("ABORT received (INIT).\n");
sctp_add_cmd_sf(commands, SCTP_CMD_NEW_STATE,
SCTP_STATE(SCTP_STATE_CLOSED));
- SCTP_INC_STATS(SCTP_MIB_ABORTEDS);
+ SCTP_INC_STATS(net, SCTP_MIB_ABORTEDS);
sctp_add_cmd_sf(commands, SCTP_CMD_TIMER_STOP,
SCTP_TO(SCTP_EVENT_TIMEOUT_T1_INIT));
sctp_add_cmd_sf(commands, SCTP_CMD_SET_SK_ERR, SCTP_ERROR(sk_err));
@@ -2557,7 +2600,8 @@ static sctp_disposition_t sctp_stop_t1_and_abort(sctp_cmd_seq_t *commands,
*
* The return value is the disposition of the chunk.
*/
-sctp_disposition_t sctp_sf_do_9_2_shutdown(const struct sctp_endpoint *ep,
+sctp_disposition_t sctp_sf_do_9_2_shutdown(struct net *net,
+ const struct sctp_endpoint *ep,
const struct sctp_association *asoc,
const sctp_subtype_t type,
void *arg,
@@ -2570,12 +2614,12 @@ sctp_disposition_t sctp_sf_do_9_2_shutdown(const struct sctp_endpoint *ep,
__u32 ctsn;
if (!sctp_vtag_verify(chunk, asoc))
- return sctp_sf_pdiscard(ep, asoc, type, arg, commands);
+ return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands);
/* Make sure that the SHUTDOWN chunk has a valid length. */
if (!sctp_chunk_length_valid(chunk,
sizeof(struct sctp_shutdown_chunk_t)))
- return sctp_sf_violation_chunklen(ep, asoc, type, arg,
+ return sctp_sf_violation_chunklen(net, ep, asoc, type, arg,
commands);
/* Convert the elaborate header. */
@@ -2595,7 +2639,7 @@ sctp_disposition_t sctp_sf_do_9_2_shutdown(const struct sctp_endpoint *ep,
* sender with an ABORT.
*/
if (!TSN_lt(ctsn, asoc->next_tsn))
- return sctp_sf_violation_ctsn(ep, asoc, type, arg, commands);
+ return sctp_sf_violation_ctsn(net, ep, asoc, type, arg, commands);
/* API 5.3.1.5 SCTP_SHUTDOWN_EVENT
* When a peer sends a SHUTDOWN, SCTP delivers this notification to
@@ -2619,7 +2663,7 @@ sctp_disposition_t sctp_sf_do_9_2_shutdown(const struct sctp_endpoint *ep,
disposition = SCTP_DISPOSITION_CONSUME;
if (sctp_outq_is_empty(&asoc->outqueue)) {
- disposition = sctp_sf_do_9_2_shutdown_ack(ep, asoc, type,
+ disposition = sctp_sf_do_9_2_shutdown_ack(net, ep, asoc, type,
arg, commands);
}
@@ -2645,7 +2689,8 @@ out:
* The Cumulative TSN Ack of the received SHUTDOWN chunk
* MUST be processed.
*/
-sctp_disposition_t sctp_sf_do_9_2_shut_ctsn(const struct sctp_endpoint *ep,
+sctp_disposition_t sctp_sf_do_9_2_shut_ctsn(struct net *net,
+ const struct sctp_endpoint *ep,
const struct sctp_association *asoc,
const sctp_subtype_t type,
void *arg,
@@ -2656,12 +2701,12 @@ sctp_disposition_t sctp_sf_do_9_2_shut_ctsn(const struct sctp_endpoint *ep,
__u32 ctsn;
if (!sctp_vtag_verify(chunk, asoc))
- return sctp_sf_pdiscard(ep, asoc, type, arg, commands);
+ return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands);
/* Make sure that the SHUTDOWN chunk has a valid length. */
if (!sctp_chunk_length_valid(chunk,
sizeof(struct sctp_shutdown_chunk_t)))
- return sctp_sf_violation_chunklen(ep, asoc, type, arg,
+ return sctp_sf_violation_chunklen(net, ep, asoc, type, arg,
commands);
sdh = (sctp_shutdownhdr_t *)chunk->skb->data;
@@ -2678,7 +2723,7 @@ sctp_disposition_t sctp_sf_do_9_2_shut_ctsn(const struct sctp_endpoint *ep,
* sender with an ABORT.
*/
if (!TSN_lt(ctsn, asoc->next_tsn))
- return sctp_sf_violation_ctsn(ep, asoc, type, arg, commands);
+ return sctp_sf_violation_ctsn(net, ep, asoc, type, arg, commands);
/* verify, by checking the Cumulative TSN Ack field of the
* chunk, that all its outstanding DATA chunks have been
@@ -2697,7 +2742,8 @@ sctp_disposition_t sctp_sf_do_9_2_shut_ctsn(const struct sctp_endpoint *ep,
* that belong to this association, it should discard the INIT chunk and
* retransmit the SHUTDOWN ACK chunk.
*/
-sctp_disposition_t sctp_sf_do_9_2_reshutack(const struct sctp_endpoint *ep,
+sctp_disposition_t sctp_sf_do_9_2_reshutack(struct net *net,
+ const struct sctp_endpoint *ep,
const struct sctp_association *asoc,
const sctp_subtype_t type,
void *arg,
@@ -2708,7 +2754,7 @@ sctp_disposition_t sctp_sf_do_9_2_reshutack(const struct sctp_endpoint *ep,
/* Make sure that the chunk has a valid length */
if (!sctp_chunk_length_valid(chunk, sizeof(sctp_chunkhdr_t)))
- return sctp_sf_violation_chunklen(ep, asoc, type, arg,
+ return sctp_sf_violation_chunklen(net, ep, asoc, type, arg,
commands);
/* Since we are not going to really process this INIT, there
@@ -2760,7 +2806,8 @@ nomem:
*
* The return value is the disposition of the chunk.
*/
-sctp_disposition_t sctp_sf_do_ecn_cwr(const struct sctp_endpoint *ep,
+sctp_disposition_t sctp_sf_do_ecn_cwr(struct net *net,
+ const struct sctp_endpoint *ep,
const struct sctp_association *asoc,
const sctp_subtype_t type,
void *arg,
@@ -2771,10 +2818,10 @@ sctp_disposition_t sctp_sf_do_ecn_cwr(const struct sctp_endpoint *ep,
u32 lowest_tsn;
if (!sctp_vtag_verify(chunk, asoc))
- return sctp_sf_pdiscard(ep, asoc, type, arg, commands);
+ return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands);
if (!sctp_chunk_length_valid(chunk, sizeof(sctp_ecne_chunk_t)))
- return sctp_sf_violation_chunklen(ep, asoc, type, arg,
+ return sctp_sf_violation_chunklen(net, ep, asoc, type, arg,
commands);
cwr = (sctp_cwrhdr_t *) chunk->skb->data;
@@ -2815,7 +2862,8 @@ sctp_disposition_t sctp_sf_do_ecn_cwr(const struct sctp_endpoint *ep,
*
* The return value is the disposition of the chunk.
*/
-sctp_disposition_t sctp_sf_do_ecne(const struct sctp_endpoint *ep,
+sctp_disposition_t sctp_sf_do_ecne(struct net *net,
+ const struct sctp_endpoint *ep,
const struct sctp_association *asoc,
const sctp_subtype_t type,
void *arg,
@@ -2825,10 +2873,10 @@ sctp_disposition_t sctp_sf_do_ecne(const struct sctp_endpoint *ep,
struct sctp_chunk *chunk = arg;
if (!sctp_vtag_verify(chunk, asoc))
- return sctp_sf_pdiscard(ep, asoc, type, arg, commands);
+ return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands);
if (!sctp_chunk_length_valid(chunk, sizeof(sctp_ecne_chunk_t)))
- return sctp_sf_violation_chunklen(ep, asoc, type, arg,
+ return sctp_sf_violation_chunklen(net, ep, asoc, type, arg,
commands);
ecne = (sctp_ecnehdr_t *) chunk->skb->data;
@@ -2871,7 +2919,8 @@ sctp_disposition_t sctp_sf_do_ecne(const struct sctp_endpoint *ep,
*
* The return value is the disposition of the chunk.
*/
-sctp_disposition_t sctp_sf_eat_data_6_2(const struct sctp_endpoint *ep,
+sctp_disposition_t sctp_sf_eat_data_6_2(struct net *net,
+ const struct sctp_endpoint *ep,
const struct sctp_association *asoc,
const sctp_subtype_t type,
void *arg,
@@ -2884,11 +2933,11 @@ sctp_disposition_t sctp_sf_eat_data_6_2(const struct sctp_endpoint *ep,
if (!sctp_vtag_verify(chunk, asoc)) {
sctp_add_cmd_sf(commands, SCTP_CMD_REPORT_BAD_TAG,
SCTP_NULL());
- return sctp_sf_pdiscard(ep, asoc, type, arg, commands);
+ return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands);
}
if (!sctp_chunk_length_valid(chunk, sizeof(sctp_data_chunk_t)))
- return sctp_sf_violation_chunklen(ep, asoc, type, arg,
+ return sctp_sf_violation_chunklen(net, ep, asoc, type, arg,
commands);
error = sctp_eat_data(asoc, chunk, commands );
@@ -2897,16 +2946,16 @@ sctp_disposition_t sctp_sf_eat_data_6_2(const struct sctp_endpoint *ep,
break;
case SCTP_IERROR_HIGH_TSN:
case SCTP_IERROR_BAD_STREAM:
- SCTP_INC_STATS(SCTP_MIB_IN_DATA_CHUNK_DISCARDS);
+ SCTP_INC_STATS(net, SCTP_MIB_IN_DATA_CHUNK_DISCARDS);
goto discard_noforce;
case SCTP_IERROR_DUP_TSN:
case SCTP_IERROR_IGNORE_TSN:
- SCTP_INC_STATS(SCTP_MIB_IN_DATA_CHUNK_DISCARDS);
+ SCTP_INC_STATS(net, SCTP_MIB_IN_DATA_CHUNK_DISCARDS);
goto discard_force;
case SCTP_IERROR_NO_DATA:
goto consume;
case SCTP_IERROR_PROTO_VIOLATION:
- return sctp_sf_abort_violation(ep, asoc, chunk, commands,
+ return sctp_sf_abort_violation(net, ep, asoc, chunk, commands,
(u8 *)chunk->subh.data_hdr, sizeof(sctp_datahdr_t));
default:
BUG();
@@ -2992,7 +3041,8 @@ consume:
*
* The return value is the disposition of the chunk.
*/
-sctp_disposition_t sctp_sf_eat_data_fast_4_4(const struct sctp_endpoint *ep,
+sctp_disposition_t sctp_sf_eat_data_fast_4_4(struct net *net,
+ const struct sctp_endpoint *ep,
const struct sctp_association *asoc,
const sctp_subtype_t type,
void *arg,
@@ -3004,11 +3054,11 @@ sctp_disposition_t sctp_sf_eat_data_fast_4_4(const struct sctp_endpoint *ep,
if (!sctp_vtag_verify(chunk, asoc)) {
sctp_add_cmd_sf(commands, SCTP_CMD_REPORT_BAD_TAG,
SCTP_NULL());
- return sctp_sf_pdiscard(ep, asoc, type, arg, commands);
+ return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands);
}
if (!sctp_chunk_length_valid(chunk, sizeof(sctp_data_chunk_t)))
- return sctp_sf_violation_chunklen(ep, asoc, type, arg,
+ return sctp_sf_violation_chunklen(net, ep, asoc, type, arg,
commands);
error = sctp_eat_data(asoc, chunk, commands );
@@ -3022,7 +3072,7 @@ sctp_disposition_t sctp_sf_eat_data_fast_4_4(const struct sctp_endpoint *ep,
case SCTP_IERROR_NO_DATA:
goto consume;
case SCTP_IERROR_PROTO_VIOLATION:
- return sctp_sf_abort_violation(ep, asoc, chunk, commands,
+ return sctp_sf_abort_violation(net, ep, asoc, chunk, commands,
(u8 *)chunk->subh.data_hdr, sizeof(sctp_datahdr_t));
default:
BUG();
@@ -3082,7 +3132,8 @@ consume:
*
* The return value is the disposition of the chunk.
*/
-sctp_disposition_t sctp_sf_eat_sack_6_2(const struct sctp_endpoint *ep,
+sctp_disposition_t sctp_sf_eat_sack_6_2(struct net *net,
+ const struct sctp_endpoint *ep,
const struct sctp_association *asoc,
const sctp_subtype_t type,
void *arg,
@@ -3093,18 +3144,18 @@ sctp_disposition_t sctp_sf_eat_sack_6_2(const struct sctp_endpoint *ep,
__u32 ctsn;
if (!sctp_vtag_verify(chunk, asoc))
- return sctp_sf_pdiscard(ep, asoc, type, arg, commands);
+ return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands);
/* Make sure that the SACK chunk has a valid length. */
if (!sctp_chunk_length_valid(chunk, sizeof(sctp_sack_chunk_t)))
- return sctp_sf_violation_chunklen(ep, asoc, type, arg,
+ return sctp_sf_violation_chunklen(net, ep, asoc, type, arg,
commands);
/* Pull the SACK chunk from the data buffer */
sackh = sctp_sm_pull_sack(chunk);
/* Was this a bogus SACK? */
if (!sackh)
- return sctp_sf_pdiscard(ep, asoc, type, arg, commands);
+ return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands);
chunk->subh.sack_hdr = sackh;
ctsn = ntohl(sackh->cum_tsn_ack);
@@ -3125,10 +3176,10 @@ sctp_disposition_t sctp_sf_eat_sack_6_2(const struct sctp_endpoint *ep,
* sender with an ABORT.
*/
if (!TSN_lt(ctsn, asoc->next_tsn))
- return sctp_sf_violation_ctsn(ep, asoc, type, arg, commands);
+ return sctp_sf_violation_ctsn(net, ep, asoc, type, arg, commands);
/* Return this SACK for further processing. */
- sctp_add_cmd_sf(commands, SCTP_CMD_PROCESS_SACK, SCTP_SACKH(sackh));
+ sctp_add_cmd_sf(commands, SCTP_CMD_PROCESS_SACK, SCTP_CHUNK(chunk));
/* Note: We do the rest of the work on the PROCESS_SACK
* sideeffect.
@@ -3154,7 +3205,8 @@ sctp_disposition_t sctp_sf_eat_sack_6_2(const struct sctp_endpoint *ep,
*
* The return value is the disposition of the chunk.
*/
-static sctp_disposition_t sctp_sf_tabort_8_4_8(const struct sctp_endpoint *ep,
+static sctp_disposition_t sctp_sf_tabort_8_4_8(struct net *net,
+ const struct sctp_endpoint *ep,
const struct sctp_association *asoc,
const sctp_subtype_t type,
void *arg,
@@ -3164,7 +3216,7 @@ static sctp_disposition_t sctp_sf_tabort_8_4_8(const struct sctp_endpoint *ep,
struct sctp_chunk *chunk = arg;
struct sctp_chunk *abort;
- packet = sctp_ootb_pkt_new(asoc, chunk);
+ packet = sctp_ootb_pkt_new(net, asoc, chunk);
if (packet) {
/* Make an ABORT. The T bit will be set if the asoc
@@ -3188,9 +3240,9 @@ static sctp_disposition_t sctp_sf_tabort_8_4_8(const struct sctp_endpoint *ep,
sctp_add_cmd_sf(commands, SCTP_CMD_SEND_PKT,
SCTP_PACKET(packet));
- SCTP_INC_STATS(SCTP_MIB_OUTCTRLCHUNKS);
+ SCTP_INC_STATS(net, SCTP_MIB_OUTCTRLCHUNKS);
- sctp_sf_pdiscard(ep, asoc, type, arg, commands);
+ sctp_sf_pdiscard(net, ep, asoc, type, arg, commands);
return SCTP_DISPOSITION_CONSUME;
}
@@ -3205,7 +3257,8 @@ static sctp_disposition_t sctp_sf_tabort_8_4_8(const struct sctp_endpoint *ep,
*
* The return value is the disposition of the chunk.
*/
-sctp_disposition_t sctp_sf_operr_notify(const struct sctp_endpoint *ep,
+sctp_disposition_t sctp_sf_operr_notify(struct net *net,
+ const struct sctp_endpoint *ep,
const struct sctp_association *asoc,
const sctp_subtype_t type,
void *arg,
@@ -3215,15 +3268,15 @@ sctp_disposition_t sctp_sf_operr_notify(const struct sctp_endpoint *ep,
sctp_errhdr_t *err;
if (!sctp_vtag_verify(chunk, asoc))
- return sctp_sf_pdiscard(ep, asoc, type, arg, commands);
+ return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands);
/* Make sure that the ERROR chunk has a valid length. */
if (!sctp_chunk_length_valid(chunk, sizeof(sctp_operr_chunk_t)))
- return sctp_sf_violation_chunklen(ep, asoc, type, arg,
+ return sctp_sf_violation_chunklen(net, ep, asoc, type, arg,
commands);
sctp_walk_errors(err, chunk->chunk_hdr);
if ((void *)err != (void *)chunk->chunk_end)
- return sctp_sf_violation_paramlen(ep, asoc, type, arg,
+ return sctp_sf_violation_paramlen(net, ep, asoc, type, arg,
(void *)err, commands);
sctp_add_cmd_sf(commands, SCTP_CMD_PROCESS_OPERR,
@@ -3242,7 +3295,8 @@ sctp_disposition_t sctp_sf_operr_notify(const struct sctp_endpoint *ep,
*
* The return value is the disposition.
*/
-sctp_disposition_t sctp_sf_do_9_2_final(const struct sctp_endpoint *ep,
+sctp_disposition_t sctp_sf_do_9_2_final(struct net *net,
+ const struct sctp_endpoint *ep,
const struct sctp_association *asoc,
const sctp_subtype_t type,
void *arg,
@@ -3253,11 +3307,11 @@ sctp_disposition_t sctp_sf_do_9_2_final(const struct sctp_endpoint *ep,
struct sctp_ulpevent *ev;
if (!sctp_vtag_verify(chunk, asoc))
- return sctp_sf_pdiscard(ep, asoc, type, arg, commands);
+ return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands);
/* Make sure that the SHUTDOWN_ACK chunk has a valid length. */
if (!sctp_chunk_length_valid(chunk, sizeof(sctp_chunkhdr_t)))
- return sctp_sf_violation_chunklen(ep, asoc, type, arg,
+ return sctp_sf_violation_chunklen(net, ep, asoc, type, arg,
commands);
/* 10.2 H) SHUTDOWN COMPLETE notification
*
@@ -3290,8 +3344,8 @@ sctp_disposition_t sctp_sf_do_9_2_final(const struct sctp_endpoint *ep,
sctp_add_cmd_sf(commands, SCTP_CMD_NEW_STATE,
SCTP_STATE(SCTP_STATE_CLOSED));
- SCTP_INC_STATS(SCTP_MIB_SHUTDOWNS);
- SCTP_DEC_STATS(SCTP_MIB_CURRESTAB);
+ SCTP_INC_STATS(net, SCTP_MIB_SHUTDOWNS);
+ SCTP_DEC_STATS(net, SCTP_MIB_CURRESTAB);
sctp_add_cmd_sf(commands, SCTP_CMD_REPLY, SCTP_CHUNK(reply));
/* ...and remove all record of the association. */
@@ -3324,7 +3378,8 @@ nomem:
* receiver of the OOTB packet shall discard the OOTB packet and take
* no further action.
*/
-sctp_disposition_t sctp_sf_ootb(const struct sctp_endpoint *ep,
+sctp_disposition_t sctp_sf_ootb(struct net *net,
+ const struct sctp_endpoint *ep,
const struct sctp_association *asoc,
const sctp_subtype_t type,
void *arg,
@@ -3338,13 +3393,13 @@ sctp_disposition_t sctp_sf_ootb(const struct sctp_endpoint *ep,
int ootb_shut_ack = 0;
int ootb_cookie_ack = 0;
- SCTP_INC_STATS(SCTP_MIB_OUTOFBLUES);
+ SCTP_INC_STATS(net, SCTP_MIB_OUTOFBLUES);
ch = (sctp_chunkhdr_t *) chunk->chunk_hdr;
do {
/* Report violation if the chunk is less then minimal */
if (ntohs(ch->length) < sizeof(sctp_chunkhdr_t))
- return sctp_sf_violation_chunklen(ep, asoc, type, arg,
+ return sctp_sf_violation_chunklen(net, ep, asoc, type, arg,
commands);
/* Now that we know we at least have a chunk header,
@@ -3359,7 +3414,7 @@ sctp_disposition_t sctp_sf_ootb(const struct sctp_endpoint *ep,
* sending an ABORT of its own.
*/
if (SCTP_CID_ABORT == ch->type)
- return sctp_sf_pdiscard(ep, asoc, type, arg, commands);
+ return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands);
/* RFC 8.4, 7) If the packet contains a "Stale cookie" ERROR
* or a COOKIE ACK the SCTP Packet should be silently
@@ -3381,18 +3436,18 @@ sctp_disposition_t sctp_sf_ootb(const struct sctp_endpoint *ep,
/* Report violation if chunk len overflows */
ch_end = ((__u8 *)ch) + WORD_ROUND(ntohs(ch->length));
if (ch_end > skb_tail_pointer(skb))
- return sctp_sf_violation_chunklen(ep, asoc, type, arg,
+ return sctp_sf_violation_chunklen(net, ep, asoc, type, arg,
commands);
ch = (sctp_chunkhdr_t *) ch_end;
} while (ch_end < skb_tail_pointer(skb));
if (ootb_shut_ack)
- return sctp_sf_shut_8_4_5(ep, asoc, type, arg, commands);
+ return sctp_sf_shut_8_4_5(net, ep, asoc, type, arg, commands);
else if (ootb_cookie_ack)
- return sctp_sf_pdiscard(ep, asoc, type, arg, commands);
+ return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands);
else
- return sctp_sf_tabort_8_4_8(ep, asoc, type, arg, commands);
+ return sctp_sf_tabort_8_4_8(net, ep, asoc, type, arg, commands);
}
/*
@@ -3416,7 +3471,8 @@ sctp_disposition_t sctp_sf_ootb(const struct sctp_endpoint *ep,
*
* The return value is the disposition of the chunk.
*/
-static sctp_disposition_t sctp_sf_shut_8_4_5(const struct sctp_endpoint *ep,
+static sctp_disposition_t sctp_sf_shut_8_4_5(struct net *net,
+ const struct sctp_endpoint *ep,
const struct sctp_association *asoc,
const sctp_subtype_t type,
void *arg,
@@ -3426,7 +3482,7 @@ static sctp_disposition_t sctp_sf_shut_8_4_5(const struct sctp_endpoint *ep,
struct sctp_chunk *chunk = arg;
struct sctp_chunk *shut;
- packet = sctp_ootb_pkt_new(asoc, chunk);
+ packet = sctp_ootb_pkt_new(net, asoc, chunk);
if (packet) {
/* Make an SHUTDOWN_COMPLETE.
@@ -3450,19 +3506,19 @@ static sctp_disposition_t sctp_sf_shut_8_4_5(const struct sctp_endpoint *ep,
sctp_add_cmd_sf(commands, SCTP_CMD_SEND_PKT,
SCTP_PACKET(packet));
- SCTP_INC_STATS(SCTP_MIB_OUTCTRLCHUNKS);
+ SCTP_INC_STATS(net, SCTP_MIB_OUTCTRLCHUNKS);
/* If the chunk length is invalid, we don't want to process
* the reset of the packet.
*/
if (!sctp_chunk_length_valid(chunk, sizeof(sctp_chunkhdr_t)))
- return sctp_sf_pdiscard(ep, asoc, type, arg, commands);
+ return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands);
/* We need to discard the rest of the packet to prevent
* potential bomming attacks from additional bundled chunks.
* This is documented in SCTP Threats ID.
*/
- return sctp_sf_pdiscard(ep, asoc, type, arg, commands);
+ return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands);
}
return SCTP_DISPOSITION_NOMEM;
@@ -3479,7 +3535,8 @@ static sctp_disposition_t sctp_sf_shut_8_4_5(const struct sctp_endpoint *ep,
* chunks. --piggy ]
*
*/
-sctp_disposition_t sctp_sf_do_8_5_1_E_sa(const struct sctp_endpoint *ep,
+sctp_disposition_t sctp_sf_do_8_5_1_E_sa(struct net *net,
+ const struct sctp_endpoint *ep,
const struct sctp_association *asoc,
const sctp_subtype_t type,
void *arg,
@@ -3489,7 +3546,7 @@ sctp_disposition_t sctp_sf_do_8_5_1_E_sa(const struct sctp_endpoint *ep,
/* Make sure that the SHUTDOWN_ACK chunk has a valid length. */
if (!sctp_chunk_length_valid(chunk, sizeof(sctp_chunkhdr_t)))
- return sctp_sf_violation_chunklen(ep, asoc, type, arg,
+ return sctp_sf_violation_chunklen(net, ep, asoc, type, arg,
commands);
/* Although we do have an association in this case, it corresponds
@@ -3497,13 +3554,14 @@ sctp_disposition_t sctp_sf_do_8_5_1_E_sa(const struct sctp_endpoint *ep,
* packet and the state function that handles OOTB SHUTDOWN_ACK is
* called with a NULL association.
*/
- SCTP_INC_STATS(SCTP_MIB_OUTOFBLUES);
+ SCTP_INC_STATS(net, SCTP_MIB_OUTOFBLUES);
- return sctp_sf_shut_8_4_5(ep, NULL, type, arg, commands);
+ return sctp_sf_shut_8_4_5(net, ep, NULL, type, arg, commands);
}
/* ADDIP Section 4.2 Upon reception of an ASCONF Chunk. */
-sctp_disposition_t sctp_sf_do_asconf(const struct sctp_endpoint *ep,
+sctp_disposition_t sctp_sf_do_asconf(struct net *net,
+ const struct sctp_endpoint *ep,
const struct sctp_association *asoc,
const sctp_subtype_t type, void *arg,
sctp_cmd_seq_t *commands)
@@ -3519,7 +3577,7 @@ sctp_disposition_t sctp_sf_do_asconf(const struct sctp_endpoint *ep,
if (!sctp_vtag_verify(chunk, asoc)) {
sctp_add_cmd_sf(commands, SCTP_CMD_REPORT_BAD_TAG,
SCTP_NULL());
- return sctp_sf_pdiscard(ep, asoc, type, arg, commands);
+ return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands);
}
/* ADD-IP: Section 4.1.1
@@ -3528,12 +3586,12 @@ sctp_disposition_t sctp_sf_do_asconf(const struct sctp_endpoint *ep,
* is received unauthenticated it MUST be silently discarded as
* described in [I-D.ietf-tsvwg-sctp-auth].
*/
- if (!sctp_addip_noauth && !chunk->auth)
- return sctp_sf_discard_chunk(ep, asoc, type, arg, commands);
+ if (!net->sctp.addip_noauth && !chunk->auth)
+ return sctp_sf_discard_chunk(net, ep, asoc, type, arg, commands);
/* Make sure that the ASCONF ADDIP chunk has a valid length. */
if (!sctp_chunk_length_valid(chunk, sizeof(sctp_addip_chunk_t)))
- return sctp_sf_violation_chunklen(ep, asoc, type, arg,
+ return sctp_sf_violation_chunklen(net, ep, asoc, type, arg,
commands);
hdr = (sctp_addiphdr_t *)chunk->skb->data;
@@ -3542,7 +3600,7 @@ sctp_disposition_t sctp_sf_do_asconf(const struct sctp_endpoint *ep,
addr_param = (union sctp_addr_param *)hdr->params;
length = ntohs(addr_param->p.length);
if (length < sizeof(sctp_paramhdr_t))
- return sctp_sf_violation_paramlen(ep, asoc, type, arg,
+ return sctp_sf_violation_paramlen(net, ep, asoc, type, arg,
(void *)addr_param, commands);
/* Verify the ASCONF chunk before processing it. */
@@ -3550,7 +3608,7 @@ sctp_disposition_t sctp_sf_do_asconf(const struct sctp_endpoint *ep,
(sctp_paramhdr_t *)((void *)addr_param + length),
(void *)chunk->chunk_end,
&err_param))
- return sctp_sf_violation_paramlen(ep, asoc, type, arg,
+ return sctp_sf_violation_paramlen(net, ep, asoc, type, arg,
(void *)err_param, commands);
/* ADDIP 5.2 E1) Compare the value of the serial number to the value
@@ -3630,7 +3688,8 @@ sctp_disposition_t sctp_sf_do_asconf(const struct sctp_endpoint *ep,
* When building TLV parameters for the ASCONF Chunk that will add or
* delete IP addresses the D0 to D13 rules should be applied:
*/
-sctp_disposition_t sctp_sf_do_asconf_ack(const struct sctp_endpoint *ep,
+sctp_disposition_t sctp_sf_do_asconf_ack(struct net *net,
+ const struct sctp_endpoint *ep,
const struct sctp_association *asoc,
const sctp_subtype_t type, void *arg,
sctp_cmd_seq_t *commands)
@@ -3645,7 +3704,7 @@ sctp_disposition_t sctp_sf_do_asconf_ack(const struct sctp_endpoint *ep,
if (!sctp_vtag_verify(asconf_ack, asoc)) {
sctp_add_cmd_sf(commands, SCTP_CMD_REPORT_BAD_TAG,
SCTP_NULL());
- return sctp_sf_pdiscard(ep, asoc, type, arg, commands);
+ return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands);
}
/* ADD-IP, Section 4.1.2:
@@ -3654,12 +3713,12 @@ sctp_disposition_t sctp_sf_do_asconf_ack(const struct sctp_endpoint *ep,
* is received unauthenticated it MUST be silently discarded as
* described in [I-D.ietf-tsvwg-sctp-auth].
*/
- if (!sctp_addip_noauth && !asconf_ack->auth)
- return sctp_sf_discard_chunk(ep, asoc, type, arg, commands);
+ if (!net->sctp.addip_noauth && !asconf_ack->auth)
+ return sctp_sf_discard_chunk(net, ep, asoc, type, arg, commands);
/* Make sure that the ADDIP chunk has a valid length. */
if (!sctp_chunk_length_valid(asconf_ack, sizeof(sctp_addip_chunk_t)))
- return sctp_sf_violation_chunklen(ep, asoc, type, arg,
+ return sctp_sf_violation_chunklen(net, ep, asoc, type, arg,
commands);
addip_hdr = (sctp_addiphdr_t *)asconf_ack->skb->data;
@@ -3670,7 +3729,7 @@ sctp_disposition_t sctp_sf_do_asconf_ack(const struct sctp_endpoint *ep,
(sctp_paramhdr_t *)addip_hdr->params,
(void *)asconf_ack->chunk_end,
&err_param))
- return sctp_sf_violation_paramlen(ep, asoc, type, arg,
+ return sctp_sf_violation_paramlen(net, ep, asoc, type, arg,
(void *)err_param, commands);
if (last_asconf) {
@@ -3705,8 +3764,8 @@ sctp_disposition_t sctp_sf_do_asconf_ack(const struct sctp_endpoint *ep,
SCTP_ERROR(ECONNABORTED));
sctp_add_cmd_sf(commands, SCTP_CMD_ASSOC_FAILED,
SCTP_PERR(SCTP_ERROR_ASCONF_ACK));
- SCTP_INC_STATS(SCTP_MIB_ABORTEDS);
- SCTP_DEC_STATS(SCTP_MIB_CURRESTAB);
+ SCTP_INC_STATS(net, SCTP_MIB_ABORTEDS);
+ SCTP_DEC_STATS(net, SCTP_MIB_CURRESTAB);
return SCTP_DISPOSITION_ABORT;
}
@@ -3739,8 +3798,8 @@ sctp_disposition_t sctp_sf_do_asconf_ack(const struct sctp_endpoint *ep,
SCTP_ERROR(ECONNABORTED));
sctp_add_cmd_sf(commands, SCTP_CMD_ASSOC_FAILED,
SCTP_PERR(SCTP_ERROR_ASCONF_ACK));
- SCTP_INC_STATS(SCTP_MIB_ABORTEDS);
- SCTP_DEC_STATS(SCTP_MIB_CURRESTAB);
+ SCTP_INC_STATS(net, SCTP_MIB_ABORTEDS);
+ SCTP_DEC_STATS(net, SCTP_MIB_CURRESTAB);
return SCTP_DISPOSITION_ABORT;
}
@@ -3761,7 +3820,8 @@ sctp_disposition_t sctp_sf_do_asconf_ack(const struct sctp_endpoint *ep,
*
* The return value is the disposition of the chunk.
*/
-sctp_disposition_t sctp_sf_eat_fwd_tsn(const struct sctp_endpoint *ep,
+sctp_disposition_t sctp_sf_eat_fwd_tsn(struct net *net,
+ const struct sctp_endpoint *ep,
const struct sctp_association *asoc,
const sctp_subtype_t type,
void *arg,
@@ -3776,12 +3836,12 @@ sctp_disposition_t sctp_sf_eat_fwd_tsn(const struct sctp_endpoint *ep,
if (!sctp_vtag_verify(chunk, asoc)) {
sctp_add_cmd_sf(commands, SCTP_CMD_REPORT_BAD_TAG,
SCTP_NULL());
- return sctp_sf_pdiscard(ep, asoc, type, arg, commands);
+ return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands);
}
/* Make sure that the FORWARD_TSN chunk has valid length. */
if (!sctp_chunk_length_valid(chunk, sizeof(struct sctp_fwdtsn_chunk)))
- return sctp_sf_violation_chunklen(ep, asoc, type, arg,
+ return sctp_sf_violation_chunklen(net, ep, asoc, type, arg,
commands);
fwdtsn_hdr = (struct sctp_fwdtsn_hdr *)chunk->skb->data;
@@ -3828,6 +3888,7 @@ discard_noforce:
}
sctp_disposition_t sctp_sf_eat_fwd_tsn_fast(
+ struct net *net,
const struct sctp_endpoint *ep,
const struct sctp_association *asoc,
const sctp_subtype_t type,
@@ -3843,12 +3904,12 @@ sctp_disposition_t sctp_sf_eat_fwd_tsn_fast(
if (!sctp_vtag_verify(chunk, asoc)) {
sctp_add_cmd_sf(commands, SCTP_CMD_REPORT_BAD_TAG,
SCTP_NULL());
- return sctp_sf_pdiscard(ep, asoc, type, arg, commands);
+ return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands);
}
/* Make sure that the FORWARD_TSN chunk has a valid length. */
if (!sctp_chunk_length_valid(chunk, sizeof(struct sctp_fwdtsn_chunk)))
- return sctp_sf_violation_chunklen(ep, asoc, type, arg,
+ return sctp_sf_violation_chunklen(net, ep, asoc, type, arg,
commands);
fwdtsn_hdr = (struct sctp_fwdtsn_hdr *)chunk->skb->data;
@@ -3915,7 +3976,8 @@ gen_shutdown:
*
* The return value is the disposition of the chunk.
*/
-static sctp_ierror_t sctp_sf_authenticate(const struct sctp_endpoint *ep,
+static sctp_ierror_t sctp_sf_authenticate(struct net *net,
+ const struct sctp_endpoint *ep,
const struct sctp_association *asoc,
const sctp_subtype_t type,
struct sctp_chunk *chunk)
@@ -3988,7 +4050,8 @@ nomem:
return SCTP_IERROR_NOMEM;
}
-sctp_disposition_t sctp_sf_eat_auth(const struct sctp_endpoint *ep,
+sctp_disposition_t sctp_sf_eat_auth(struct net *net,
+ const struct sctp_endpoint *ep,
const struct sctp_association *asoc,
const sctp_subtype_t type,
void *arg,
@@ -4001,21 +4064,21 @@ sctp_disposition_t sctp_sf_eat_auth(const struct sctp_endpoint *ep,
/* Make sure that the peer has AUTH capable */
if (!asoc->peer.auth_capable)
- return sctp_sf_unk_chunk(ep, asoc, type, arg, commands);
+ return sctp_sf_unk_chunk(net, ep, asoc, type, arg, commands);
if (!sctp_vtag_verify(chunk, asoc)) {
sctp_add_cmd_sf(commands, SCTP_CMD_REPORT_BAD_TAG,
SCTP_NULL());
- return sctp_sf_pdiscard(ep, asoc, type, arg, commands);
+ return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands);
}
/* Make sure that the AUTH chunk has valid length. */
if (!sctp_chunk_length_valid(chunk, sizeof(struct sctp_auth_chunk)))
- return sctp_sf_violation_chunklen(ep, asoc, type, arg,
+ return sctp_sf_violation_chunklen(net, ep, asoc, type, arg,
commands);
auth_hdr = (struct sctp_authhdr *)chunk->skb->data;
- error = sctp_sf_authenticate(ep, asoc, type, chunk);
+ error = sctp_sf_authenticate(net, ep, asoc, type, chunk);
switch (error) {
case SCTP_IERROR_AUTH_BAD_HMAC:
/* Generate the ERROR chunk and discard the rest
@@ -4032,10 +4095,10 @@ sctp_disposition_t sctp_sf_eat_auth(const struct sctp_endpoint *ep,
/* Fall Through */
case SCTP_IERROR_AUTH_BAD_KEYID:
case SCTP_IERROR_BAD_SIG:
- return sctp_sf_pdiscard(ep, asoc, type, arg, commands);
+ return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands);
case SCTP_IERROR_PROTO_VIOLATION:
- return sctp_sf_violation_chunklen(ep, asoc, type, arg,
+ return sctp_sf_violation_chunklen(net, ep, asoc, type, arg,
commands);
case SCTP_IERROR_NOMEM:
@@ -4084,7 +4147,8 @@ sctp_disposition_t sctp_sf_eat_auth(const struct sctp_endpoint *ep,
*
* The return value is the disposition of the chunk.
*/
-sctp_disposition_t sctp_sf_unk_chunk(const struct sctp_endpoint *ep,
+sctp_disposition_t sctp_sf_unk_chunk(struct net *net,
+ const struct sctp_endpoint *ep,
const struct sctp_association *asoc,
const sctp_subtype_t type,
void *arg,
@@ -4097,20 +4161,20 @@ sctp_disposition_t sctp_sf_unk_chunk(const struct sctp_endpoint *ep,
SCTP_DEBUG_PRINTK("Processing the unknown chunk id %d.\n", type.chunk);
if (!sctp_vtag_verify(unk_chunk, asoc))
- return sctp_sf_pdiscard(ep, asoc, type, arg, commands);
+ return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands);
/* Make sure that the chunk has a valid length.
* Since we don't know the chunk type, we use a general
* chunkhdr structure to make a comparison.
*/
if (!sctp_chunk_length_valid(unk_chunk, sizeof(sctp_chunkhdr_t)))
- return sctp_sf_violation_chunklen(ep, asoc, type, arg,
+ return sctp_sf_violation_chunklen(net, ep, asoc, type, arg,
commands);
switch (type.chunk & SCTP_CID_ACTION_MASK) {
case SCTP_CID_ACTION_DISCARD:
/* Discard the packet. */
- return sctp_sf_pdiscard(ep, asoc, type, arg, commands);
+ return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands);
break;
case SCTP_CID_ACTION_DISCARD_ERR:
/* Generate an ERROR chunk as response. */
@@ -4125,7 +4189,7 @@ sctp_disposition_t sctp_sf_unk_chunk(const struct sctp_endpoint *ep,
}
/* Discard the packet. */
- sctp_sf_pdiscard(ep, asoc, type, arg, commands);
+ sctp_sf_pdiscard(net, ep, asoc, type, arg, commands);
return SCTP_DISPOSITION_CONSUME;
break;
case SCTP_CID_ACTION_SKIP:
@@ -4167,7 +4231,8 @@ sctp_disposition_t sctp_sf_unk_chunk(const struct sctp_endpoint *ep,
*
* The return value is the disposition of the chunk.
*/
-sctp_disposition_t sctp_sf_discard_chunk(const struct sctp_endpoint *ep,
+sctp_disposition_t sctp_sf_discard_chunk(struct net *net,
+ const struct sctp_endpoint *ep,
const struct sctp_association *asoc,
const sctp_subtype_t type,
void *arg,
@@ -4180,7 +4245,7 @@ sctp_disposition_t sctp_sf_discard_chunk(const struct sctp_endpoint *ep,
* chunkhdr structure to make a comparison.
*/
if (!sctp_chunk_length_valid(chunk, sizeof(sctp_chunkhdr_t)))
- return sctp_sf_violation_chunklen(ep, asoc, type, arg,
+ return sctp_sf_violation_chunklen(net, ep, asoc, type, arg,
commands);
SCTP_DEBUG_PRINTK("Chunk %d is discarded\n", type.chunk);
@@ -4205,13 +4270,14 @@ sctp_disposition_t sctp_sf_discard_chunk(const struct sctp_endpoint *ep,
*
* The return value is the disposition of the chunk.
*/
-sctp_disposition_t sctp_sf_pdiscard(const struct sctp_endpoint *ep,
+sctp_disposition_t sctp_sf_pdiscard(struct net *net,
+ const struct sctp_endpoint *ep,
const struct sctp_association *asoc,
const sctp_subtype_t type,
void *arg,
sctp_cmd_seq_t *commands)
{
- SCTP_INC_STATS(SCTP_MIB_IN_PKT_DISCARDS);
+ SCTP_INC_STATS(net, SCTP_MIB_IN_PKT_DISCARDS);
sctp_add_cmd_sf(commands, SCTP_CMD_DISCARD_PACKET, SCTP_NULL());
return SCTP_DISPOSITION_CONSUME;
@@ -4232,7 +4298,8 @@ sctp_disposition_t sctp_sf_pdiscard(const struct sctp_endpoint *ep,
* We simply tag the chunk as a violation. The state machine will log
* the violation and continue.
*/
-sctp_disposition_t sctp_sf_violation(const struct sctp_endpoint *ep,
+sctp_disposition_t sctp_sf_violation(struct net *net,
+ const struct sctp_endpoint *ep,
const struct sctp_association *asoc,
const sctp_subtype_t type,
void *arg,
@@ -4242,7 +4309,7 @@ sctp_disposition_t sctp_sf_violation(const struct sctp_endpoint *ep,
/* Make sure that the chunk has a valid length. */
if (!sctp_chunk_length_valid(chunk, sizeof(sctp_chunkhdr_t)))
- return sctp_sf_violation_chunklen(ep, asoc, type, arg,
+ return sctp_sf_violation_chunklen(net, ep, asoc, type, arg,
commands);
return SCTP_DISPOSITION_VIOLATION;
@@ -4252,6 +4319,7 @@ sctp_disposition_t sctp_sf_violation(const struct sctp_endpoint *ep,
* Common function to handle a protocol violation.
*/
static sctp_disposition_t sctp_sf_abort_violation(
+ struct net *net,
const struct sctp_endpoint *ep,
const struct sctp_association *asoc,
void *arg,
@@ -4302,7 +4370,7 @@ static sctp_disposition_t sctp_sf_abort_violation(
}
sctp_add_cmd_sf(commands, SCTP_CMD_REPLY, SCTP_CHUNK(abort));
- SCTP_INC_STATS(SCTP_MIB_OUTCTRLCHUNKS);
+ SCTP_INC_STATS(net, SCTP_MIB_OUTCTRLCHUNKS);
if (asoc->state <= SCTP_STATE_COOKIE_ECHOED) {
sctp_add_cmd_sf(commands, SCTP_CMD_TIMER_STOP,
@@ -4316,10 +4384,10 @@ static sctp_disposition_t sctp_sf_abort_violation(
SCTP_ERROR(ECONNABORTED));
sctp_add_cmd_sf(commands, SCTP_CMD_ASSOC_FAILED,
SCTP_PERR(SCTP_ERROR_PROTO_VIOLATION));
- SCTP_DEC_STATS(SCTP_MIB_CURRESTAB);
+ SCTP_DEC_STATS(net, SCTP_MIB_CURRESTAB);
}
} else {
- packet = sctp_ootb_pkt_new(asoc, chunk);
+ packet = sctp_ootb_pkt_new(net, asoc, chunk);
if (!packet)
goto nomem_pkt;
@@ -4334,13 +4402,13 @@ static sctp_disposition_t sctp_sf_abort_violation(
sctp_add_cmd_sf(commands, SCTP_CMD_SEND_PKT,
SCTP_PACKET(packet));
- SCTP_INC_STATS(SCTP_MIB_OUTCTRLCHUNKS);
+ SCTP_INC_STATS(net, SCTP_MIB_OUTCTRLCHUNKS);
}
- SCTP_INC_STATS(SCTP_MIB_ABORTEDS);
+ SCTP_INC_STATS(net, SCTP_MIB_ABORTEDS);
discard:
- sctp_sf_pdiscard(ep, asoc, SCTP_ST_CHUNK(0), arg, commands);
+ sctp_sf_pdiscard(net, ep, asoc, SCTP_ST_CHUNK(0), arg, commands);
return SCTP_DISPOSITION_ABORT;
nomem_pkt:
@@ -4369,6 +4437,7 @@ nomem:
* Generate an ABORT chunk and terminate the association.
*/
static sctp_disposition_t sctp_sf_violation_chunklen(
+ struct net *net,
const struct sctp_endpoint *ep,
const struct sctp_association *asoc,
const sctp_subtype_t type,
@@ -4377,7 +4446,7 @@ static sctp_disposition_t sctp_sf_violation_chunklen(
{
static const char err_str[]="The following chunk had invalid length:";
- return sctp_sf_abort_violation(ep, asoc, arg, commands, err_str,
+ return sctp_sf_abort_violation(net, ep, asoc, arg, commands, err_str,
sizeof(err_str));
}
@@ -4388,6 +4457,7 @@ static sctp_disposition_t sctp_sf_violation_chunklen(
* the length is considered as invalid.
*/
static sctp_disposition_t sctp_sf_violation_paramlen(
+ struct net *net,
const struct sctp_endpoint *ep,
const struct sctp_association *asoc,
const sctp_subtype_t type,
@@ -4407,17 +4477,17 @@ static sctp_disposition_t sctp_sf_violation_paramlen(
goto nomem;
sctp_add_cmd_sf(commands, SCTP_CMD_REPLY, SCTP_CHUNK(abort));
- SCTP_INC_STATS(SCTP_MIB_OUTCTRLCHUNKS);
+ SCTP_INC_STATS(net, SCTP_MIB_OUTCTRLCHUNKS);
sctp_add_cmd_sf(commands, SCTP_CMD_SET_SK_ERR,
SCTP_ERROR(ECONNABORTED));
sctp_add_cmd_sf(commands, SCTP_CMD_ASSOC_FAILED,
SCTP_PERR(SCTP_ERROR_PROTO_VIOLATION));
- SCTP_DEC_STATS(SCTP_MIB_CURRESTAB);
- SCTP_INC_STATS(SCTP_MIB_ABORTEDS);
+ SCTP_DEC_STATS(net, SCTP_MIB_CURRESTAB);
+ SCTP_INC_STATS(net, SCTP_MIB_ABORTEDS);
discard:
- sctp_sf_pdiscard(ep, asoc, SCTP_ST_CHUNK(0), arg, commands);
+ sctp_sf_pdiscard(net, ep, asoc, SCTP_ST_CHUNK(0), arg, commands);
return SCTP_DISPOSITION_ABORT;
nomem:
return SCTP_DISPOSITION_NOMEM;
@@ -4430,6 +4500,7 @@ nomem:
* error code.
*/
static sctp_disposition_t sctp_sf_violation_ctsn(
+ struct net *net,
const struct sctp_endpoint *ep,
const struct sctp_association *asoc,
const sctp_subtype_t type,
@@ -4438,7 +4509,7 @@ static sctp_disposition_t sctp_sf_violation_ctsn(
{
static const char err_str[]="The cumulative tsn ack beyond the max tsn currently sent:";
- return sctp_sf_abort_violation(ep, asoc, arg, commands, err_str,
+ return sctp_sf_abort_violation(net, ep, asoc, arg, commands, err_str,
sizeof(err_str));
}
@@ -4449,6 +4520,7 @@ static sctp_disposition_t sctp_sf_violation_ctsn(
* on the path and we may not want to continue this communication.
*/
static sctp_disposition_t sctp_sf_violation_chunk(
+ struct net *net,
const struct sctp_endpoint *ep,
const struct sctp_association *asoc,
const sctp_subtype_t type,
@@ -4458,9 +4530,9 @@ static sctp_disposition_t sctp_sf_violation_chunk(
static const char err_str[]="The following chunk violates protocol:";
if (!asoc)
- return sctp_sf_violation(ep, asoc, type, arg, commands);
+ return sctp_sf_violation(net, ep, asoc, type, arg, commands);
- return sctp_sf_abort_violation(ep, asoc, arg, commands, err_str,
+ return sctp_sf_abort_violation(net, ep, asoc, arg, commands, err_str,
sizeof(err_str));
}
/***************************************************************************
@@ -4523,7 +4595,8 @@ static sctp_disposition_t sctp_sf_violation_chunk(
*
* The return value is a disposition.
*/
-sctp_disposition_t sctp_sf_do_prm_asoc(const struct sctp_endpoint *ep,
+sctp_disposition_t sctp_sf_do_prm_asoc(struct net *net,
+ const struct sctp_endpoint *ep,
const struct sctp_association *asoc,
const sctp_subtype_t type,
void *arg,
@@ -4634,7 +4707,8 @@ nomem:
*
* The return value is the disposition.
*/
-sctp_disposition_t sctp_sf_do_prm_send(const struct sctp_endpoint *ep,
+sctp_disposition_t sctp_sf_do_prm_send(struct net *net,
+ const struct sctp_endpoint *ep,
const struct sctp_association *asoc,
const sctp_subtype_t type,
void *arg,
@@ -4673,6 +4747,7 @@ sctp_disposition_t sctp_sf_do_prm_send(const struct sctp_endpoint *ep,
* The return value is the disposition.
*/
sctp_disposition_t sctp_sf_do_9_2_prm_shutdown(
+ struct net *net,
const struct sctp_endpoint *ep,
const struct sctp_association *asoc,
const sctp_subtype_t type,
@@ -4694,7 +4769,7 @@ sctp_disposition_t sctp_sf_do_9_2_prm_shutdown(
disposition = SCTP_DISPOSITION_CONSUME;
if (sctp_outq_is_empty(&asoc->outqueue)) {
- disposition = sctp_sf_do_9_2_start_shutdown(ep, asoc, type,
+ disposition = sctp_sf_do_9_2_start_shutdown(net, ep, asoc, type,
arg, commands);
}
return disposition;
@@ -4728,6 +4803,7 @@ sctp_disposition_t sctp_sf_do_9_2_prm_shutdown(
* The return value is the disposition.
*/
sctp_disposition_t sctp_sf_do_9_1_prm_abort(
+ struct net *net,
const struct sctp_endpoint *ep,
const struct sctp_association *asoc,
const sctp_subtype_t type,
@@ -4759,14 +4835,15 @@ sctp_disposition_t sctp_sf_do_9_1_prm_abort(
sctp_add_cmd_sf(commands, SCTP_CMD_ASSOC_FAILED,
SCTP_PERR(SCTP_ERROR_USER_ABORT));
- SCTP_INC_STATS(SCTP_MIB_ABORTEDS);
- SCTP_DEC_STATS(SCTP_MIB_CURRESTAB);
+ SCTP_INC_STATS(net, SCTP_MIB_ABORTEDS);
+ SCTP_DEC_STATS(net, SCTP_MIB_CURRESTAB);
return retval;
}
/* We tried an illegal operation on an association which is closed. */
-sctp_disposition_t sctp_sf_error_closed(const struct sctp_endpoint *ep,
+sctp_disposition_t sctp_sf_error_closed(struct net *net,
+ const struct sctp_endpoint *ep,
const struct sctp_association *asoc,
const sctp_subtype_t type,
void *arg,
@@ -4779,7 +4856,8 @@ sctp_disposition_t sctp_sf_error_closed(const struct sctp_endpoint *ep,
/* We tried an illegal operation on an association which is shutting
* down.
*/
-sctp_disposition_t sctp_sf_error_shutdown(const struct sctp_endpoint *ep,
+sctp_disposition_t sctp_sf_error_shutdown(struct net *net,
+ const struct sctp_endpoint *ep,
const struct sctp_association *asoc,
const sctp_subtype_t type,
void *arg,
@@ -4805,6 +4883,7 @@ sctp_disposition_t sctp_sf_error_shutdown(const struct sctp_endpoint *ep,
* (timers)
*/
sctp_disposition_t sctp_sf_cookie_wait_prm_shutdown(
+ struct net *net,
const struct sctp_endpoint *ep,
const struct sctp_association *asoc,
const sctp_subtype_t type,
@@ -4817,7 +4896,7 @@ sctp_disposition_t sctp_sf_cookie_wait_prm_shutdown(
sctp_add_cmd_sf(commands, SCTP_CMD_NEW_STATE,
SCTP_STATE(SCTP_STATE_CLOSED));
- SCTP_INC_STATS(SCTP_MIB_SHUTDOWNS);
+ SCTP_INC_STATS(net, SCTP_MIB_SHUTDOWNS);
sctp_add_cmd_sf(commands, SCTP_CMD_DELETE_TCB, SCTP_NULL());
@@ -4839,6 +4918,7 @@ sctp_disposition_t sctp_sf_cookie_wait_prm_shutdown(
* (timers)
*/
sctp_disposition_t sctp_sf_cookie_echoed_prm_shutdown(
+ struct net *net,
const struct sctp_endpoint *ep,
const struct sctp_association *asoc,
const sctp_subtype_t type,
@@ -4847,7 +4927,7 @@ sctp_disposition_t sctp_sf_cookie_echoed_prm_shutdown(
/* There is a single T1 timer, so we should be able to use
* common function with the COOKIE-WAIT state.
*/
- return sctp_sf_cookie_wait_prm_shutdown(ep, asoc, type, arg, commands);
+ return sctp_sf_cookie_wait_prm_shutdown(net, ep, asoc, type, arg, commands);
}
/*
@@ -4865,6 +4945,7 @@ sctp_disposition_t sctp_sf_cookie_echoed_prm_shutdown(
* (timers)
*/
sctp_disposition_t sctp_sf_cookie_wait_prm_abort(
+ struct net *net,
const struct sctp_endpoint *ep,
const struct sctp_association *asoc,
const sctp_subtype_t type,
@@ -4884,7 +4965,7 @@ sctp_disposition_t sctp_sf_cookie_wait_prm_abort(
sctp_add_cmd_sf(commands, SCTP_CMD_NEW_STATE,
SCTP_STATE(SCTP_STATE_CLOSED));
- SCTP_INC_STATS(SCTP_MIB_ABORTEDS);
+ SCTP_INC_STATS(net, SCTP_MIB_ABORTEDS);
/* Even if we can't send the ABORT due to low memory delete the
* TCB. This is a departure from our typical NOMEM handling.
@@ -4914,6 +4995,7 @@ sctp_disposition_t sctp_sf_cookie_wait_prm_abort(
* (timers)
*/
sctp_disposition_t sctp_sf_cookie_echoed_prm_abort(
+ struct net *net,
const struct sctp_endpoint *ep,
const struct sctp_association *asoc,
const sctp_subtype_t type,
@@ -4923,7 +5005,7 @@ sctp_disposition_t sctp_sf_cookie_echoed_prm_abort(
/* There is a single T1 timer, so we should be able to use
* common function with the COOKIE-WAIT state.
*/
- return sctp_sf_cookie_wait_prm_abort(ep, asoc, type, arg, commands);
+ return sctp_sf_cookie_wait_prm_abort(net, ep, asoc, type, arg, commands);
}
/*
@@ -4939,6 +5021,7 @@ sctp_disposition_t sctp_sf_cookie_echoed_prm_abort(
* (timers)
*/
sctp_disposition_t sctp_sf_shutdown_pending_prm_abort(
+ struct net *net,
const struct sctp_endpoint *ep,
const struct sctp_association *asoc,
const sctp_subtype_t type,
@@ -4949,7 +5032,7 @@ sctp_disposition_t sctp_sf_shutdown_pending_prm_abort(
sctp_add_cmd_sf(commands, SCTP_CMD_TIMER_STOP,
SCTP_TO(SCTP_EVENT_TIMEOUT_T5_SHUTDOWN_GUARD));
- return sctp_sf_do_9_1_prm_abort(ep, asoc, type, arg, commands);
+ return sctp_sf_do_9_1_prm_abort(net, ep, asoc, type, arg, commands);
}
/*
@@ -4965,6 +5048,7 @@ sctp_disposition_t sctp_sf_shutdown_pending_prm_abort(
* (timers)
*/
sctp_disposition_t sctp_sf_shutdown_sent_prm_abort(
+ struct net *net,
const struct sctp_endpoint *ep,
const struct sctp_association *asoc,
const sctp_subtype_t type,
@@ -4979,7 +5063,7 @@ sctp_disposition_t sctp_sf_shutdown_sent_prm_abort(
sctp_add_cmd_sf(commands, SCTP_CMD_TIMER_STOP,
SCTP_TO(SCTP_EVENT_TIMEOUT_T5_SHUTDOWN_GUARD));
- return sctp_sf_do_9_1_prm_abort(ep, asoc, type, arg, commands);
+ return sctp_sf_do_9_1_prm_abort(net, ep, asoc, type, arg, commands);
}
/*
@@ -4995,6 +5079,7 @@ sctp_disposition_t sctp_sf_shutdown_sent_prm_abort(
* (timers)
*/
sctp_disposition_t sctp_sf_shutdown_ack_sent_prm_abort(
+ struct net *net,
const struct sctp_endpoint *ep,
const struct sctp_association *asoc,
const sctp_subtype_t type,
@@ -5004,7 +5089,7 @@ sctp_disposition_t sctp_sf_shutdown_ack_sent_prm_abort(
/* The same T2 timer, so we should be able to use
* common function with the SHUTDOWN-SENT state.
*/
- return sctp_sf_shutdown_sent_prm_abort(ep, asoc, type, arg, commands);
+ return sctp_sf_shutdown_sent_prm_abort(net, ep, asoc, type, arg, commands);
}
/*
@@ -5030,6 +5115,7 @@ sctp_disposition_t sctp_sf_shutdown_ack_sent_prm_abort(
* association on which a heartbeat should be issued.
*/
sctp_disposition_t sctp_sf_do_prm_requestheartbeat(
+ struct net *net,
const struct sctp_endpoint *ep,
const struct sctp_association *asoc,
const sctp_subtype_t type,
@@ -5061,7 +5147,8 @@ sctp_disposition_t sctp_sf_do_prm_requestheartbeat(
* When an endpoint has an ASCONF signaled change to be sent to the
* remote endpoint it should do A1 to A9
*/
-sctp_disposition_t sctp_sf_do_prm_asconf(const struct sctp_endpoint *ep,
+sctp_disposition_t sctp_sf_do_prm_asconf(struct net *net,
+ const struct sctp_endpoint *ep,
const struct sctp_association *asoc,
const sctp_subtype_t type,
void *arg,
@@ -5082,6 +5169,7 @@ sctp_disposition_t sctp_sf_do_prm_asconf(const struct sctp_endpoint *ep,
* The return value is the disposition of the primitive.
*/
sctp_disposition_t sctp_sf_ignore_primitive(
+ struct net *net,
const struct sctp_endpoint *ep,
const struct sctp_association *asoc,
const sctp_subtype_t type,
@@ -5103,6 +5191,7 @@ sctp_disposition_t sctp_sf_ignore_primitive(
* retransmit, the stack will immediately send up this notification.
*/
sctp_disposition_t sctp_sf_do_no_pending_tsn(
+ struct net *net,
const struct sctp_endpoint *ep,
const struct sctp_association *asoc,
const sctp_subtype_t type,
@@ -5134,6 +5223,7 @@ sctp_disposition_t sctp_sf_do_no_pending_tsn(
* The return value is the disposition.
*/
sctp_disposition_t sctp_sf_do_9_2_start_shutdown(
+ struct net *net,
const struct sctp_endpoint *ep,
const struct sctp_association *asoc,
const sctp_subtype_t type,
@@ -5203,6 +5293,7 @@ nomem:
* The return value is the disposition.
*/
sctp_disposition_t sctp_sf_do_9_2_shutdown_ack(
+ struct net *net,
const struct sctp_endpoint *ep,
const struct sctp_association *asoc,
const sctp_subtype_t type,
@@ -5221,11 +5312,11 @@ sctp_disposition_t sctp_sf_do_9_2_shutdown_ack(
*/
if (chunk) {
if (!sctp_vtag_verify(chunk, asoc))
- return sctp_sf_pdiscard(ep, asoc, type, arg, commands);
+ return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands);
/* Make sure that the SHUTDOWN chunk has a valid length. */
if (!sctp_chunk_length_valid(chunk, sizeof(struct sctp_shutdown_chunk_t)))
- return sctp_sf_violation_chunklen(ep, asoc, type, arg,
+ return sctp_sf_violation_chunklen(net, ep, asoc, type, arg,
commands);
}
@@ -5273,7 +5364,8 @@ nomem:
*
* The return value is the disposition of the event.
*/
-sctp_disposition_t sctp_sf_ignore_other(const struct sctp_endpoint *ep,
+sctp_disposition_t sctp_sf_ignore_other(struct net *net,
+ const struct sctp_endpoint *ep,
const struct sctp_association *asoc,
const sctp_subtype_t type,
void *arg,
@@ -5298,7 +5390,8 @@ sctp_disposition_t sctp_sf_ignore_other(const struct sctp_endpoint *ep,
*
* The return value is the disposition of the chunk.
*/
-sctp_disposition_t sctp_sf_do_6_3_3_rtx(const struct sctp_endpoint *ep,
+sctp_disposition_t sctp_sf_do_6_3_3_rtx(struct net *net,
+ const struct sctp_endpoint *ep,
const struct sctp_association *asoc,
const sctp_subtype_t type,
void *arg,
@@ -5306,7 +5399,7 @@ sctp_disposition_t sctp_sf_do_6_3_3_rtx(const struct sctp_endpoint *ep,
{
struct sctp_transport *transport = arg;
- SCTP_INC_STATS(SCTP_MIB_T3_RTX_EXPIREDS);
+ SCTP_INC_STATS(net, SCTP_MIB_T3_RTX_EXPIREDS);
if (asoc->overall_error_count >= asoc->max_retrans) {
if (asoc->state == SCTP_STATE_SHUTDOWN_PENDING) {
@@ -5327,8 +5420,8 @@ sctp_disposition_t sctp_sf_do_6_3_3_rtx(const struct sctp_endpoint *ep,
/* CMD_ASSOC_FAILED calls CMD_DELETE_TCB. */
sctp_add_cmd_sf(commands, SCTP_CMD_ASSOC_FAILED,
SCTP_PERR(SCTP_ERROR_NO_ERROR));
- SCTP_INC_STATS(SCTP_MIB_ABORTEDS);
- SCTP_DEC_STATS(SCTP_MIB_CURRESTAB);
+ SCTP_INC_STATS(net, SCTP_MIB_ABORTEDS);
+ SCTP_DEC_STATS(net, SCTP_MIB_CURRESTAB);
return SCTP_DISPOSITION_DELETE_TCB;
}
}
@@ -5384,13 +5477,14 @@ sctp_disposition_t sctp_sf_do_6_3_3_rtx(const struct sctp_endpoint *ep,
* allow. However, an SCTP transmitter MUST NOT be more aggressive than
* the following algorithms allow.
*/
-sctp_disposition_t sctp_sf_do_6_2_sack(const struct sctp_endpoint *ep,
+sctp_disposition_t sctp_sf_do_6_2_sack(struct net *net,
+ const struct sctp_endpoint *ep,
const struct sctp_association *asoc,
const sctp_subtype_t type,
void *arg,
sctp_cmd_seq_t *commands)
{
- SCTP_INC_STATS(SCTP_MIB_DELAY_SACK_EXPIREDS);
+ SCTP_INC_STATS(net, SCTP_MIB_DELAY_SACK_EXPIREDS);
sctp_add_cmd_sf(commands, SCTP_CMD_GEN_SACK, SCTP_FORCE());
return SCTP_DISPOSITION_CONSUME;
}
@@ -5414,7 +5508,8 @@ sctp_disposition_t sctp_sf_do_6_2_sack(const struct sctp_endpoint *ep,
* (timers, events)
*
*/
-sctp_disposition_t sctp_sf_t1_init_timer_expire(const struct sctp_endpoint *ep,
+sctp_disposition_t sctp_sf_t1_init_timer_expire(struct net *net,
+ const struct sctp_endpoint *ep,
const struct sctp_association *asoc,
const sctp_subtype_t type,
void *arg,
@@ -5425,7 +5520,7 @@ sctp_disposition_t sctp_sf_t1_init_timer_expire(const struct sctp_endpoint *ep,
int attempts = asoc->init_err_counter + 1;
SCTP_DEBUG_PRINTK("Timer T1 expired (INIT).\n");
- SCTP_INC_STATS(SCTP_MIB_T1_INIT_EXPIREDS);
+ SCTP_INC_STATS(net, SCTP_MIB_T1_INIT_EXPIREDS);
if (attempts <= asoc->max_init_attempts) {
bp = (struct sctp_bind_addr *) &asoc->base.bind_addr;
@@ -5475,7 +5570,8 @@ sctp_disposition_t sctp_sf_t1_init_timer_expire(const struct sctp_endpoint *ep,
* (timers, events)
*
*/
-sctp_disposition_t sctp_sf_t1_cookie_timer_expire(const struct sctp_endpoint *ep,
+sctp_disposition_t sctp_sf_t1_cookie_timer_expire(struct net *net,
+ const struct sctp_endpoint *ep,
const struct sctp_association *asoc,
const sctp_subtype_t type,
void *arg,
@@ -5485,7 +5581,7 @@ sctp_disposition_t sctp_sf_t1_cookie_timer_expire(const struct sctp_endpoint *ep
int attempts = asoc->init_err_counter + 1;
SCTP_DEBUG_PRINTK("Timer T1 expired (COOKIE-ECHO).\n");
- SCTP_INC_STATS(SCTP_MIB_T1_COOKIE_EXPIREDS);
+ SCTP_INC_STATS(net, SCTP_MIB_T1_COOKIE_EXPIREDS);
if (attempts <= asoc->max_init_attempts) {
repl = sctp_make_cookie_echo(asoc, NULL);
@@ -5523,7 +5619,8 @@ sctp_disposition_t sctp_sf_t1_cookie_timer_expire(const struct sctp_endpoint *ep
* the T2-Shutdown timer, giving its peer ample opportunity to transmit
* all of its queued DATA chunks that have not yet been sent.
*/
-sctp_disposition_t sctp_sf_t2_timer_expire(const struct sctp_endpoint *ep,
+sctp_disposition_t sctp_sf_t2_timer_expire(struct net *net,
+ const struct sctp_endpoint *ep,
const struct sctp_association *asoc,
const sctp_subtype_t type,
void *arg,
@@ -5532,7 +5629,7 @@ sctp_disposition_t sctp_sf_t2_timer_expire(const struct sctp_endpoint *ep,
struct sctp_chunk *reply = NULL;
SCTP_DEBUG_PRINTK("Timer T2 expired.\n");
- SCTP_INC_STATS(SCTP_MIB_T2_SHUTDOWN_EXPIREDS);
+ SCTP_INC_STATS(net, SCTP_MIB_T2_SHUTDOWN_EXPIREDS);
((struct sctp_association *)asoc)->shutdown_retries++;
@@ -5542,8 +5639,8 @@ sctp_disposition_t sctp_sf_t2_timer_expire(const struct sctp_endpoint *ep,
/* Note: CMD_ASSOC_FAILED calls CMD_DELETE_TCB. */
sctp_add_cmd_sf(commands, SCTP_CMD_ASSOC_FAILED,
SCTP_PERR(SCTP_ERROR_NO_ERROR));
- SCTP_INC_STATS(SCTP_MIB_ABORTEDS);
- SCTP_DEC_STATS(SCTP_MIB_CURRESTAB);
+ SCTP_INC_STATS(net, SCTP_MIB_ABORTEDS);
+ SCTP_DEC_STATS(net, SCTP_MIB_CURRESTAB);
return SCTP_DISPOSITION_DELETE_TCB;
}
@@ -5592,6 +5689,7 @@ nomem:
* If the T4 RTO timer expires the endpoint should do B1 to B5
*/
sctp_disposition_t sctp_sf_t4_timer_expire(
+ struct net *net,
const struct sctp_endpoint *ep,
const struct sctp_association *asoc,
const sctp_subtype_t type,
@@ -5601,7 +5699,7 @@ sctp_disposition_t sctp_sf_t4_timer_expire(
struct sctp_chunk *chunk = asoc->addip_last_asconf;
struct sctp_transport *transport = chunk->transport;
- SCTP_INC_STATS(SCTP_MIB_T4_RTO_EXPIREDS);
+ SCTP_INC_STATS(net, SCTP_MIB_T4_RTO_EXPIREDS);
/* ADDIP 4.1 B1) Increment the error counters and perform path failure
* detection on the appropriate destination address as defined in
@@ -5626,8 +5724,8 @@ sctp_disposition_t sctp_sf_t4_timer_expire(
SCTP_ERROR(ETIMEDOUT));
sctp_add_cmd_sf(commands, SCTP_CMD_ASSOC_FAILED,
SCTP_PERR(SCTP_ERROR_NO_ERROR));
- SCTP_INC_STATS(SCTP_MIB_ABORTEDS);
- SCTP_DEC_STATS(SCTP_MIB_CURRESTAB);
+ SCTP_INC_STATS(net, SCTP_MIB_ABORTEDS);
+ SCTP_DEC_STATS(net, SCTP_MIB_CURRESTAB);
return SCTP_DISPOSITION_ABORT;
}
@@ -5662,7 +5760,8 @@ sctp_disposition_t sctp_sf_t4_timer_expire(
* At the expiration of this timer the sender SHOULD abort the association
* by sending an ABORT chunk.
*/
-sctp_disposition_t sctp_sf_t5_timer_expire(const struct sctp_endpoint *ep,
+sctp_disposition_t sctp_sf_t5_timer_expire(struct net *net,
+ const struct sctp_endpoint *ep,
const struct sctp_association *asoc,
const sctp_subtype_t type,
void *arg,
@@ -5671,7 +5770,7 @@ sctp_disposition_t sctp_sf_t5_timer_expire(const struct sctp_endpoint *ep,
struct sctp_chunk *reply = NULL;
SCTP_DEBUG_PRINTK("Timer T5 expired.\n");
- SCTP_INC_STATS(SCTP_MIB_T5_SHUTDOWN_GUARD_EXPIREDS);
+ SCTP_INC_STATS(net, SCTP_MIB_T5_SHUTDOWN_GUARD_EXPIREDS);
reply = sctp_make_abort(asoc, NULL, 0);
if (!reply)
@@ -5683,8 +5782,8 @@ sctp_disposition_t sctp_sf_t5_timer_expire(const struct sctp_endpoint *ep,
sctp_add_cmd_sf(commands, SCTP_CMD_ASSOC_FAILED,
SCTP_PERR(SCTP_ERROR_NO_ERROR));
- SCTP_INC_STATS(SCTP_MIB_ABORTEDS);
- SCTP_DEC_STATS(SCTP_MIB_CURRESTAB);
+ SCTP_INC_STATS(net, SCTP_MIB_ABORTEDS);
+ SCTP_DEC_STATS(net, SCTP_MIB_CURRESTAB);
return SCTP_DISPOSITION_DELETE_TCB;
nomem:
@@ -5697,6 +5796,7 @@ nomem:
* the user. So this routine looks same as sctp_sf_do_9_2_prm_shutdown().
*/
sctp_disposition_t sctp_sf_autoclose_timer_expire(
+ struct net *net,
const struct sctp_endpoint *ep,
const struct sctp_association *asoc,
const sctp_subtype_t type,
@@ -5705,7 +5805,7 @@ sctp_disposition_t sctp_sf_autoclose_timer_expire(
{
int disposition;
- SCTP_INC_STATS(SCTP_MIB_AUTOCLOSE_EXPIREDS);
+ SCTP_INC_STATS(net, SCTP_MIB_AUTOCLOSE_EXPIREDS);
/* From 9.2 Shutdown of an Association
* Upon receipt of the SHUTDOWN primitive from its upper
@@ -5720,7 +5820,7 @@ sctp_disposition_t sctp_sf_autoclose_timer_expire(
disposition = SCTP_DISPOSITION_CONSUME;
if (sctp_outq_is_empty(&asoc->outqueue)) {
- disposition = sctp_sf_do_9_2_start_shutdown(ep, asoc, type,
+ disposition = sctp_sf_do_9_2_start_shutdown(net, ep, asoc, type,
arg, commands);
}
return disposition;
@@ -5738,7 +5838,8 @@ sctp_disposition_t sctp_sf_autoclose_timer_expire(
*
* The return value is the disposition of the chunk.
*/
-sctp_disposition_t sctp_sf_not_impl(const struct sctp_endpoint *ep,
+sctp_disposition_t sctp_sf_not_impl(struct net *net,
+ const struct sctp_endpoint *ep,
const struct sctp_association *asoc,
const sctp_subtype_t type,
void *arg,
@@ -5755,7 +5856,8 @@ sctp_disposition_t sctp_sf_not_impl(const struct sctp_endpoint *ep,
*
* The return value is the disposition of the chunk.
*/
-sctp_disposition_t sctp_sf_bug(const struct sctp_endpoint *ep,
+sctp_disposition_t sctp_sf_bug(struct net *net,
+ const struct sctp_endpoint *ep,
const struct sctp_association *asoc,
const sctp_subtype_t type,
void *arg,
@@ -5775,7 +5877,8 @@ sctp_disposition_t sctp_sf_bug(const struct sctp_endpoint *ep,
*
* The return value is the disposition of the chunk.
*/
-sctp_disposition_t sctp_sf_timer_ignore(const struct sctp_endpoint *ep,
+sctp_disposition_t sctp_sf_timer_ignore(struct net *net,
+ const struct sctp_endpoint *ep,
const struct sctp_association *asoc,
const sctp_subtype_t type,
void *arg,
@@ -5817,7 +5920,8 @@ static struct sctp_sackhdr *sctp_sm_pull_sack(struct sctp_chunk *chunk)
/* Create an ABORT packet to be sent as a response, with the specified
* error causes.
*/
-static struct sctp_packet *sctp_abort_pkt_new(const struct sctp_endpoint *ep,
+static struct sctp_packet *sctp_abort_pkt_new(struct net *net,
+ const struct sctp_endpoint *ep,
const struct sctp_association *asoc,
struct sctp_chunk *chunk,
const void *payload,
@@ -5826,7 +5930,7 @@ static struct sctp_packet *sctp_abort_pkt_new(const struct sctp_endpoint *ep,
struct sctp_packet *packet;
struct sctp_chunk *abort;
- packet = sctp_ootb_pkt_new(asoc, chunk);
+ packet = sctp_ootb_pkt_new(net, asoc, chunk);
if (packet) {
/* Make an ABORT.
@@ -5858,7 +5962,8 @@ static struct sctp_packet *sctp_abort_pkt_new(const struct sctp_endpoint *ep,
}
/* Allocate a packet for responding in the OOTB conditions. */
-static struct sctp_packet *sctp_ootb_pkt_new(const struct sctp_association *asoc,
+static struct sctp_packet *sctp_ootb_pkt_new(struct net *net,
+ const struct sctp_association *asoc,
const struct sctp_chunk *chunk)
{
struct sctp_packet *packet;
@@ -5911,7 +6016,7 @@ static struct sctp_packet *sctp_ootb_pkt_new(const struct sctp_association *asoc
}
/* Make a transport for the bucket, Eliza... */
- transport = sctp_transport_new(sctp_source(chunk), GFP_ATOMIC);
+ transport = sctp_transport_new(net, sctp_source(chunk), GFP_ATOMIC);
if (!transport)
goto nomem;
@@ -5919,7 +6024,7 @@ static struct sctp_packet *sctp_ootb_pkt_new(const struct sctp_association *asoc
* the source address.
*/
sctp_transport_route(transport, (union sctp_addr *)&chunk->dest,
- sctp_sk(sctp_get_ctl_sock()));
+ sctp_sk(net->sctp.ctl_sock));
packet = sctp_packet_init(&transport->packet, transport, sport, dport);
packet = sctp_packet_config(packet, vtag, 0);
@@ -5937,7 +6042,8 @@ void sctp_ootb_pkt_free(struct sctp_packet *packet)
}
/* Send a stale cookie error when a invalid COOKIE ECHO chunk is found */
-static void sctp_send_stale_cookie_err(const struct sctp_endpoint *ep,
+static void sctp_send_stale_cookie_err(struct net *net,
+ const struct sctp_endpoint *ep,
const struct sctp_association *asoc,
const struct sctp_chunk *chunk,
sctp_cmd_seq_t *commands,
@@ -5946,7 +6052,7 @@ static void sctp_send_stale_cookie_err(const struct sctp_endpoint *ep,
struct sctp_packet *packet;
if (err_chunk) {
- packet = sctp_ootb_pkt_new(asoc, chunk);
+ packet = sctp_ootb_pkt_new(net, asoc, chunk);
if (packet) {
struct sctp_signed_cookie *cookie;
@@ -5959,7 +6065,7 @@ static void sctp_send_stale_cookie_err(const struct sctp_endpoint *ep,
sctp_packet_append_chunk(packet, err_chunk);
sctp_add_cmd_sf(commands, SCTP_CMD_SEND_PKT,
SCTP_PACKET(packet));
- SCTP_INC_STATS(SCTP_MIB_OUTCTRLCHUNKS);
+ SCTP_INC_STATS(net, SCTP_MIB_OUTCTRLCHUNKS);
} else
sctp_chunk_free (err_chunk);
}
@@ -5979,6 +6085,7 @@ static int sctp_eat_data(const struct sctp_association *asoc,
__u32 tsn;
struct sctp_tsnmap *map = (struct sctp_tsnmap *)&asoc->peer.tsn_map;
struct sock *sk = asoc->base.sk;
+ struct net *net = sock_net(sk);
u16 ssn;
u16 sid;
u8 ordered = 0;
@@ -6109,8 +6216,8 @@ static int sctp_eat_data(const struct sctp_association *asoc,
SCTP_ERROR(ECONNABORTED));
sctp_add_cmd_sf(commands, SCTP_CMD_ASSOC_FAILED,
SCTP_PERR(SCTP_ERROR_NO_DATA));
- SCTP_INC_STATS(SCTP_MIB_ABORTEDS);
- SCTP_DEC_STATS(SCTP_MIB_CURRESTAB);
+ SCTP_INC_STATS(net, SCTP_MIB_ABORTEDS);
+ SCTP_DEC_STATS(net, SCTP_MIB_CURRESTAB);
return SCTP_IERROR_NO_DATA;
}
@@ -6120,9 +6227,9 @@ static int sctp_eat_data(const struct sctp_association *asoc,
* if we renege and the chunk arrives again.
*/
if (chunk->chunk_hdr->flags & SCTP_DATA_UNORDERED)
- SCTP_INC_STATS(SCTP_MIB_INUNORDERCHUNKS);
+ SCTP_INC_STATS(net, SCTP_MIB_INUNORDERCHUNKS);
else {
- SCTP_INC_STATS(SCTP_MIB_INORDERCHUNKS);
+ SCTP_INC_STATS(net, SCTP_MIB_INORDERCHUNKS);
ordered = 1;
}
diff --git a/net/sctp/sm_statetable.c b/net/sctp/sm_statetable.c
index 7c211a7f90f4..84d98d8a5a74 100644
--- a/net/sctp/sm_statetable.c
+++ b/net/sctp/sm_statetable.c
@@ -59,7 +59,8 @@ other_event_table[SCTP_NUM_OTHER_TYPES][SCTP_STATE_NUM_STATES];
static const sctp_sm_table_entry_t
timeout_event_table[SCTP_NUM_TIMEOUT_TYPES][SCTP_STATE_NUM_STATES];
-static const sctp_sm_table_entry_t *sctp_chunk_event_lookup(sctp_cid_t cid,
+static const sctp_sm_table_entry_t *sctp_chunk_event_lookup(struct net *net,
+ sctp_cid_t cid,
sctp_state_t state);
@@ -82,13 +83,14 @@ static const sctp_sm_table_entry_t bug = {
rtn; \
})
-const sctp_sm_table_entry_t *sctp_sm_lookup_event(sctp_event_t event_type,
+const sctp_sm_table_entry_t *sctp_sm_lookup_event(struct net *net,
+ sctp_event_t event_type,
sctp_state_t state,
sctp_subtype_t event_subtype)
{
switch (event_type) {
case SCTP_EVENT_T_CHUNK:
- return sctp_chunk_event_lookup(event_subtype.chunk, state);
+ return sctp_chunk_event_lookup(net, event_subtype.chunk, state);
case SCTP_EVENT_T_TIMEOUT:
return DO_LOOKUP(SCTP_EVENT_TIMEOUT_MAX, timeout,
timeout_event_table);
@@ -906,7 +908,8 @@ static const sctp_sm_table_entry_t timeout_event_table[SCTP_NUM_TIMEOUT_TYPES][S
TYPE_SCTP_EVENT_TIMEOUT_AUTOCLOSE,
};
-static const sctp_sm_table_entry_t *sctp_chunk_event_lookup(sctp_cid_t cid,
+static const sctp_sm_table_entry_t *sctp_chunk_event_lookup(struct net *net,
+ sctp_cid_t cid,
sctp_state_t state)
{
if (state > SCTP_STATE_MAX)
@@ -915,12 +918,12 @@ static const sctp_sm_table_entry_t *sctp_chunk_event_lookup(sctp_cid_t cid,
if (cid <= SCTP_CID_BASE_MAX)
return &chunk_event_table[cid][state];
- if (sctp_prsctp_enable) {
+ if (net->sctp.prsctp_enable) {
if (cid == SCTP_CID_FWD_TSN)
return &prsctp_chunk_event_table[0][state];
}
- if (sctp_addip_enable) {
+ if (net->sctp.addip_enable) {
if (cid == SCTP_CID_ASCONF)
return &addip_chunk_event_table[0][state];
@@ -928,7 +931,7 @@ static const sctp_sm_table_entry_t *sctp_chunk_event_lookup(sctp_cid_t cid,
return &addip_chunk_event_table[1][state];
}
- if (sctp_auth_enable) {
+ if (net->sctp.auth_enable) {
if (cid == SCTP_CID_AUTH)
return &auth_chunk_event_table[0][state];
}
diff --git a/net/sctp/socket.c b/net/sctp/socket.c
index 5e259817a7f3..d37d24ff197f 100644
--- a/net/sctp/socket.c
+++ b/net/sctp/socket.c
@@ -427,6 +427,7 @@ SCTP_STATIC int sctp_do_bind(struct sock *sk, union sctp_addr *addr, int len)
static int sctp_send_asconf(struct sctp_association *asoc,
struct sctp_chunk *chunk)
{
+ struct net *net = sock_net(asoc->base.sk);
int retval = 0;
/* If there is an outstanding ASCONF chunk, queue it for later
@@ -439,7 +440,7 @@ static int sctp_send_asconf(struct sctp_association *asoc,
/* Hold the chunk until an ASCONF_ACK is received. */
sctp_chunk_hold(chunk);
- retval = sctp_primitive_ASCONF(asoc, chunk);
+ retval = sctp_primitive_ASCONF(net, asoc, chunk);
if (retval)
sctp_chunk_free(chunk);
else
@@ -515,6 +516,7 @@ static int sctp_send_asconf_add_ip(struct sock *sk,
struct sockaddr *addrs,
int addrcnt)
{
+ struct net *net = sock_net(sk);
struct sctp_sock *sp;
struct sctp_endpoint *ep;
struct sctp_association *asoc;
@@ -529,7 +531,7 @@ static int sctp_send_asconf_add_ip(struct sock *sk,
int i;
int retval = 0;
- if (!sctp_addip_enable)
+ if (!net->sctp.addip_enable)
return retval;
sp = sctp_sk(sk);
@@ -717,6 +719,7 @@ static int sctp_send_asconf_del_ip(struct sock *sk,
struct sockaddr *addrs,
int addrcnt)
{
+ struct net *net = sock_net(sk);
struct sctp_sock *sp;
struct sctp_endpoint *ep;
struct sctp_association *asoc;
@@ -732,7 +735,7 @@ static int sctp_send_asconf_del_ip(struct sock *sk,
int stored = 0;
chunk = NULL;
- if (!sctp_addip_enable)
+ if (!net->sctp.addip_enable)
return retval;
sp = sctp_sk(sk);
@@ -1050,6 +1053,7 @@ static int __sctp_connect(struct sock* sk,
int addrs_size,
sctp_assoc_t *assoc_id)
{
+ struct net *net = sock_net(sk);
struct sctp_sock *sp;
struct sctp_endpoint *ep;
struct sctp_association *asoc = NULL;
@@ -1200,7 +1204,7 @@ static int __sctp_connect(struct sock* sk,
goto out_free;
}
- err = sctp_primitive_ASSOCIATE(asoc, NULL);
+ err = sctp_primitive_ASSOCIATE(net, asoc, NULL);
if (err < 0) {
goto out_free;
}
@@ -1458,6 +1462,7 @@ SCTP_STATIC int sctp_getsockopt_connectx3(struct sock* sk, int len,
*/
SCTP_STATIC void sctp_close(struct sock *sk, long timeout)
{
+ struct net *net = sock_net(sk);
struct sctp_endpoint *ep;
struct sctp_association *asoc;
struct list_head *pos, *temp;
@@ -1499,9 +1504,9 @@ SCTP_STATIC void sctp_close(struct sock *sk, long timeout)
chunk = sctp_make_abort_user(asoc, NULL, 0);
if (chunk)
- sctp_primitive_ABORT(asoc, chunk);
+ sctp_primitive_ABORT(net, asoc, chunk);
} else
- sctp_primitive_SHUTDOWN(asoc, NULL);
+ sctp_primitive_SHUTDOWN(net, asoc, NULL);
}
/* On a TCP-style socket, block for at most linger_time if set. */
@@ -1569,6 +1574,7 @@ SCTP_STATIC int sctp_msghdr_parse(const struct msghdr *, sctp_cmsgs_t *);
SCTP_STATIC int sctp_sendmsg(struct kiocb *iocb, struct sock *sk,
struct msghdr *msg, size_t msg_len)
{
+ struct net *net = sock_net(sk);
struct sctp_sock *sp;
struct sctp_endpoint *ep;
struct sctp_association *new_asoc=NULL, *asoc=NULL;
@@ -1714,7 +1720,7 @@ SCTP_STATIC int sctp_sendmsg(struct kiocb *iocb, struct sock *sk,
if (sinfo_flags & SCTP_EOF) {
SCTP_DEBUG_PRINTK("Shutting down association: %p\n",
asoc);
- sctp_primitive_SHUTDOWN(asoc, NULL);
+ sctp_primitive_SHUTDOWN(net, asoc, NULL);
err = 0;
goto out_unlock;
}
@@ -1727,7 +1733,7 @@ SCTP_STATIC int sctp_sendmsg(struct kiocb *iocb, struct sock *sk,
}
SCTP_DEBUG_PRINTK("Aborting association: %p\n", asoc);
- sctp_primitive_ABORT(asoc, chunk);
+ sctp_primitive_ABORT(net, asoc, chunk);
err = 0;
goto out_unlock;
}
@@ -1900,7 +1906,7 @@ SCTP_STATIC int sctp_sendmsg(struct kiocb *iocb, struct sock *sk,
/* Auto-connect, if we aren't connected already. */
if (sctp_state(asoc, CLOSED)) {
- err = sctp_primitive_ASSOCIATE(asoc, NULL);
+ err = sctp_primitive_ASSOCIATE(net, asoc, NULL);
if (err < 0)
goto out_free;
SCTP_DEBUG_PRINTK("We associated primitively.\n");
@@ -1928,7 +1934,7 @@ SCTP_STATIC int sctp_sendmsg(struct kiocb *iocb, struct sock *sk,
* works that way today. Keep it that way or this
* breaks.
*/
- err = sctp_primitive_SEND(asoc, datamsg);
+ err = sctp_primitive_SEND(net, asoc, datamsg);
/* Did the lower layer accept the chunk? */
if (err)
sctp_datamsg_free(datamsg);
@@ -2320,7 +2326,9 @@ static int sctp_apply_peer_addr_params(struct sctp_paddrparams *params,
int error;
if (params->spp_flags & SPP_HB_DEMAND && trans) {
- error = sctp_primitive_REQUESTHEARTBEAT (trans->asoc, trans);
+ struct net *net = sock_net(trans->asoc->base.sk);
+
+ error = sctp_primitive_REQUESTHEARTBEAT(net, trans->asoc, trans);
if (error)
return error;
}
@@ -3033,6 +3041,7 @@ static int sctp_setsockopt_maxseg(struct sock *sk, char __user *optval, unsigned
static int sctp_setsockopt_peer_primary_addr(struct sock *sk, char __user *optval,
unsigned int optlen)
{
+ struct net *net = sock_net(sk);
struct sctp_sock *sp;
struct sctp_association *asoc = NULL;
struct sctp_setpeerprim prim;
@@ -3042,7 +3051,7 @@ static int sctp_setsockopt_peer_primary_addr(struct sock *sk, char __user *optva
sp = sctp_sk(sk);
- if (!sctp_addip_enable)
+ if (!net->sctp.addip_enable)
return -EPERM;
if (optlen != sizeof(struct sctp_setpeerprim))
@@ -3279,9 +3288,10 @@ static int sctp_setsockopt_auth_chunk(struct sock *sk,
char __user *optval,
unsigned int optlen)
{
+ struct net *net = sock_net(sk);
struct sctp_authchunk val;
- if (!sctp_auth_enable)
+ if (!net->sctp.auth_enable)
return -EACCES;
if (optlen != sizeof(struct sctp_authchunk))
@@ -3311,11 +3321,12 @@ static int sctp_setsockopt_hmac_ident(struct sock *sk,
char __user *optval,
unsigned int optlen)
{
+ struct net *net = sock_net(sk);
struct sctp_hmacalgo *hmacs;
u32 idents;
int err;
- if (!sctp_auth_enable)
+ if (!net->sctp.auth_enable)
return -EACCES;
if (optlen < sizeof(struct sctp_hmacalgo))
@@ -3348,11 +3359,12 @@ static int sctp_setsockopt_auth_key(struct sock *sk,
char __user *optval,
unsigned int optlen)
{
+ struct net *net = sock_net(sk);
struct sctp_authkey *authkey;
struct sctp_association *asoc;
int ret;
- if (!sctp_auth_enable)
+ if (!net->sctp.auth_enable)
return -EACCES;
if (optlen <= sizeof(struct sctp_authkey))
@@ -3389,10 +3401,11 @@ static int sctp_setsockopt_active_key(struct sock *sk,
char __user *optval,
unsigned int optlen)
{
+ struct net *net = sock_net(sk);
struct sctp_authkeyid val;
struct sctp_association *asoc;
- if (!sctp_auth_enable)
+ if (!net->sctp.auth_enable)
return -EACCES;
if (optlen != sizeof(struct sctp_authkeyid))
@@ -3417,10 +3430,11 @@ static int sctp_setsockopt_del_key(struct sock *sk,
char __user *optval,
unsigned int optlen)
{
+ struct net *net = sock_net(sk);
struct sctp_authkeyid val;
struct sctp_association *asoc;
- if (!sctp_auth_enable)
+ if (!net->sctp.auth_enable)
return -EACCES;
if (optlen != sizeof(struct sctp_authkeyid))
@@ -3471,7 +3485,7 @@ static int sctp_setsockopt_auto_asconf(struct sock *sk, char __user *optval,
sp->do_auto_asconf = 0;
} else if (val && !sp->do_auto_asconf) {
list_add_tail(&sp->auto_asconf_list,
- &sctp_auto_asconf_splist);
+ &sock_net(sk)->sctp.auto_asconf_splist);
sp->do_auto_asconf = 1;
}
return 0;
@@ -3843,6 +3857,7 @@ out:
*/
SCTP_STATIC int sctp_init_sock(struct sock *sk)
{
+ struct net *net = sock_net(sk);
struct sctp_endpoint *ep;
struct sctp_sock *sp;
@@ -3872,7 +3887,7 @@ SCTP_STATIC int sctp_init_sock(struct sock *sk)
sp->default_timetolive = 0;
sp->default_rcv_context = 0;
- sp->max_burst = sctp_max_burst;
+ sp->max_burst = net->sctp.max_burst;
/* Initialize default setup parameters. These parameters
* can be modified with the SCTP_INITMSG socket option or
@@ -3880,24 +3895,24 @@ SCTP_STATIC int sctp_init_sock(struct sock *sk)
*/
sp->initmsg.sinit_num_ostreams = sctp_max_outstreams;
sp->initmsg.sinit_max_instreams = sctp_max_instreams;
- sp->initmsg.sinit_max_attempts = sctp_max_retrans_init;
- sp->initmsg.sinit_max_init_timeo = sctp_rto_max;
+ sp->initmsg.sinit_max_attempts = net->sctp.max_retrans_init;
+ sp->initmsg.sinit_max_init_timeo = net->sctp.rto_max;
/* Initialize default RTO related parameters. These parameters can
* be modified for with the SCTP_RTOINFO socket option.
*/
- sp->rtoinfo.srto_initial = sctp_rto_initial;
- sp->rtoinfo.srto_max = sctp_rto_max;
- sp->rtoinfo.srto_min = sctp_rto_min;
+ sp->rtoinfo.srto_initial = net->sctp.rto_initial;
+ sp->rtoinfo.srto_max = net->sctp.rto_max;
+ sp->rtoinfo.srto_min = net->sctp.rto_min;
/* Initialize default association related parameters. These parameters
* can be modified with the SCTP_ASSOCINFO socket option.
*/
- sp->assocparams.sasoc_asocmaxrxt = sctp_max_retrans_association;
+ sp->assocparams.sasoc_asocmaxrxt = net->sctp.max_retrans_association;
sp->assocparams.sasoc_number_peer_destinations = 0;
sp->assocparams.sasoc_peer_rwnd = 0;
sp->assocparams.sasoc_local_rwnd = 0;
- sp->assocparams.sasoc_cookie_life = sctp_valid_cookie_life;
+ sp->assocparams.sasoc_cookie_life = net->sctp.valid_cookie_life;
/* Initialize default event subscriptions. By default, all the
* options are off.
@@ -3907,10 +3922,10 @@ SCTP_STATIC int sctp_init_sock(struct sock *sk)
/* Default Peer Address Parameters. These defaults can
* be modified via SCTP_PEER_ADDR_PARAMS
*/
- sp->hbinterval = sctp_hb_interval;
- sp->pathmaxrxt = sctp_max_retrans_path;
+ sp->hbinterval = net->sctp.hb_interval;
+ sp->pathmaxrxt = net->sctp.max_retrans_path;
sp->pathmtu = 0; // allow default discovery
- sp->sackdelay = sctp_sack_timeout;
+ sp->sackdelay = net->sctp.sack_timeout;
sp->sackfreq = 2;
sp->param_flags = SPP_HB_ENABLE |
SPP_PMTUD_ENABLE |
@@ -3961,10 +3976,10 @@ SCTP_STATIC int sctp_init_sock(struct sock *sk)
local_bh_disable();
percpu_counter_inc(&sctp_sockets_allocated);
- sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1);
- if (sctp_default_auto_asconf) {
+ sock_prot_inuse_add(net, sk->sk_prot, 1);
+ if (net->sctp.default_auto_asconf) {
list_add_tail(&sp->auto_asconf_list,
- &sctp_auto_asconf_splist);
+ &net->sctp.auto_asconf_splist);
sp->do_auto_asconf = 1;
} else
sp->do_auto_asconf = 0;
@@ -4011,6 +4026,7 @@ SCTP_STATIC void sctp_destroy_sock(struct sock *sk)
*/
SCTP_STATIC void sctp_shutdown(struct sock *sk, int how)
{
+ struct net *net = sock_net(sk);
struct sctp_endpoint *ep;
struct sctp_association *asoc;
@@ -4022,7 +4038,7 @@ SCTP_STATIC void sctp_shutdown(struct sock *sk, int how)
if (!list_empty(&ep->asocs)) {
asoc = list_entry(ep->asocs.next,
struct sctp_association, asocs);
- sctp_primitive_SHUTDOWN(asoc, NULL);
+ sctp_primitive_SHUTDOWN(net, asoc, NULL);
}
}
}
@@ -4653,9 +4669,10 @@ static int sctp_copy_laddrs(struct sock *sk, __u16 port, void *to,
union sctp_addr temp;
int cnt = 0;
int addrlen;
+ struct net *net = sock_net(sk);
rcu_read_lock();
- list_for_each_entry_rcu(addr, &sctp_local_addr_list, list) {
+ list_for_each_entry_rcu(addr, &net->sctp.local_addr_list, list) {
if (!addr->valid)
continue;
@@ -5299,12 +5316,13 @@ static int sctp_getsockopt_maxburst(struct sock *sk, int len,
static int sctp_getsockopt_hmac_ident(struct sock *sk, int len,
char __user *optval, int __user *optlen)
{
+ struct net *net = sock_net(sk);
struct sctp_hmacalgo __user *p = (void __user *)optval;
struct sctp_hmac_algo_param *hmacs;
__u16 data_len = 0;
u32 num_idents;
- if (!sctp_auth_enable)
+ if (!net->sctp.auth_enable)
return -EACCES;
hmacs = sctp_sk(sk)->ep->auth_hmacs_list;
@@ -5328,10 +5346,11 @@ static int sctp_getsockopt_hmac_ident(struct sock *sk, int len,
static int sctp_getsockopt_active_key(struct sock *sk, int len,
char __user *optval, int __user *optlen)
{
+ struct net *net = sock_net(sk);
struct sctp_authkeyid val;
struct sctp_association *asoc;
- if (!sctp_auth_enable)
+ if (!net->sctp.auth_enable)
return -EACCES;
if (len < sizeof(struct sctp_authkeyid))
@@ -5360,6 +5379,7 @@ static int sctp_getsockopt_active_key(struct sock *sk, int len,
static int sctp_getsockopt_peer_auth_chunks(struct sock *sk, int len,
char __user *optval, int __user *optlen)
{
+ struct net *net = sock_net(sk);
struct sctp_authchunks __user *p = (void __user *)optval;
struct sctp_authchunks val;
struct sctp_association *asoc;
@@ -5367,7 +5387,7 @@ static int sctp_getsockopt_peer_auth_chunks(struct sock *sk, int len,
u32 num_chunks = 0;
char __user *to;
- if (!sctp_auth_enable)
+ if (!net->sctp.auth_enable)
return -EACCES;
if (len < sizeof(struct sctp_authchunks))
@@ -5403,6 +5423,7 @@ num:
static int sctp_getsockopt_local_auth_chunks(struct sock *sk, int len,
char __user *optval, int __user *optlen)
{
+ struct net *net = sock_net(sk);
struct sctp_authchunks __user *p = (void __user *)optval;
struct sctp_authchunks val;
struct sctp_association *asoc;
@@ -5410,7 +5431,7 @@ static int sctp_getsockopt_local_auth_chunks(struct sock *sk, int len,
u32 num_chunks = 0;
char __user *to;
- if (!sctp_auth_enable)
+ if (!net->sctp.auth_enable)
return -EACCES;
if (len < sizeof(struct sctp_authchunks))
@@ -5769,7 +5790,7 @@ static void sctp_unhash(struct sock *sk)
* a fastreuse flag (FIXME: NPI ipg).
*/
static struct sctp_bind_bucket *sctp_bucket_create(
- struct sctp_bind_hashbucket *head, unsigned short snum);
+ struct sctp_bind_hashbucket *head, struct net *, unsigned short snum);
static long sctp_get_port_local(struct sock *sk, union sctp_addr *addr)
{
@@ -5799,11 +5820,12 @@ static long sctp_get_port_local(struct sock *sk, union sctp_addr *addr)
rover = low;
if (inet_is_reserved_local_port(rover))
continue;
- index = sctp_phashfn(rover);
+ index = sctp_phashfn(sock_net(sk), rover);
head = &sctp_port_hashtable[index];
sctp_spin_lock(&head->lock);
sctp_for_each_hentry(pp, node, &head->chain)
- if (pp->port == rover)
+ if ((pp->port == rover) &&
+ net_eq(sock_net(sk), pp->net))
goto next;
break;
next:
@@ -5827,10 +5849,10 @@ static long sctp_get_port_local(struct sock *sk, union sctp_addr *addr)
* to the port number (snum) - we detect that with the
* port iterator, pp being NULL.
*/
- head = &sctp_port_hashtable[sctp_phashfn(snum)];
+ head = &sctp_port_hashtable[sctp_phashfn(sock_net(sk), snum)];
sctp_spin_lock(&head->lock);
sctp_for_each_hentry(pp, node, &head->chain) {
- if (pp->port == snum)
+ if ((pp->port == snum) && net_eq(pp->net, sock_net(sk)))
goto pp_found;
}
}
@@ -5881,7 +5903,7 @@ pp_found:
pp_not_found:
/* If there was a hash table miss, create a new port. */
ret = 1;
- if (!pp && !(pp = sctp_bucket_create(head, snum)))
+ if (!pp && !(pp = sctp_bucket_create(head, sock_net(sk), snum)))
goto fail_unlock;
/* In either case (hit or miss), make sure fastreuse is 1 only
@@ -6113,7 +6135,7 @@ unsigned int sctp_poll(struct file *file, struct socket *sock, poll_table *wait)
********************************************************************/
static struct sctp_bind_bucket *sctp_bucket_create(
- struct sctp_bind_hashbucket *head, unsigned short snum)
+ struct sctp_bind_hashbucket *head, struct net *net, unsigned short snum)
{
struct sctp_bind_bucket *pp;
@@ -6123,6 +6145,7 @@ static struct sctp_bind_bucket *sctp_bucket_create(
pp->port = snum;
pp->fastreuse = 0;
INIT_HLIST_HEAD(&pp->owner);
+ pp->net = net;
hlist_add_head(&pp->node, &head->chain);
}
return pp;
@@ -6142,7 +6165,8 @@ static void sctp_bucket_destroy(struct sctp_bind_bucket *pp)
static inline void __sctp_put_port(struct sock *sk)
{
struct sctp_bind_hashbucket *head =
- &sctp_port_hashtable[sctp_phashfn(inet_sk(sk)->inet_num)];
+ &sctp_port_hashtable[sctp_phashfn(sock_net(sk),
+ inet_sk(sk)->inet_num)];
struct sctp_bind_bucket *pp;
sctp_spin_lock(&head->lock);
@@ -6809,7 +6833,8 @@ static void sctp_sock_migrate(struct sock *oldsk, struct sock *newsk,
newsp->hmac = NULL;
/* Hook this new socket in to the bind_hash list. */
- head = &sctp_port_hashtable[sctp_phashfn(inet_sk(oldsk)->inet_num)];
+ head = &sctp_port_hashtable[sctp_phashfn(sock_net(oldsk),
+ inet_sk(oldsk)->inet_num)];
sctp_local_bh_disable();
sctp_spin_lock(&head->lock);
pp = sctp_sk(oldsk)->bind_hash;
diff --git a/net/sctp/sysctl.c b/net/sctp/sysctl.c
index 2b2bfe933ff1..70e3ba5cb50b 100644
--- a/net/sctp/sysctl.c
+++ b/net/sctp/sysctl.c
@@ -64,8 +64,34 @@ extern int sysctl_sctp_wmem[3];
static ctl_table sctp_table[] = {
{
+ .procname = "sctp_mem",
+ .data = &sysctl_sctp_mem,
+ .maxlen = sizeof(sysctl_sctp_mem),
+ .mode = 0644,
+ .proc_handler = proc_doulongvec_minmax
+ },
+ {
+ .procname = "sctp_rmem",
+ .data = &sysctl_sctp_rmem,
+ .maxlen = sizeof(sysctl_sctp_rmem),
+ .mode = 0644,
+ .proc_handler = proc_dointvec,
+ },
+ {
+ .procname = "sctp_wmem",
+ .data = &sysctl_sctp_wmem,
+ .maxlen = sizeof(sysctl_sctp_wmem),
+ .mode = 0644,
+ .proc_handler = proc_dointvec,
+ },
+
+ { /* sentinel */ }
+};
+
+static ctl_table sctp_net_table[] = {
+ {
.procname = "rto_initial",
- .data = &sctp_rto_initial,
+ .data = &init_net.sctp.rto_initial,
.maxlen = sizeof(unsigned int),
.mode = 0644,
.proc_handler = proc_dointvec_minmax,
@@ -74,7 +100,7 @@ static ctl_table sctp_table[] = {
},
{
.procname = "rto_min",
- .data = &sctp_rto_min,
+ .data = &init_net.sctp.rto_min,
.maxlen = sizeof(unsigned int),
.mode = 0644,
.proc_handler = proc_dointvec_minmax,
@@ -83,7 +109,7 @@ static ctl_table sctp_table[] = {
},
{
.procname = "rto_max",
- .data = &sctp_rto_max,
+ .data = &init_net.sctp.rto_max,
.maxlen = sizeof(unsigned int),
.mode = 0644,
.proc_handler = proc_dointvec_minmax,
@@ -91,17 +117,22 @@ static ctl_table sctp_table[] = {
.extra2 = &timer_max
},
{
- .procname = "valid_cookie_life",
- .data = &sctp_valid_cookie_life,
- .maxlen = sizeof(unsigned int),
- .mode = 0644,
- .proc_handler = proc_dointvec_minmax,
- .extra1 = &one,
- .extra2 = &timer_max
+ .procname = "rto_alpha_exp_divisor",
+ .data = &init_net.sctp.rto_alpha,
+ .maxlen = sizeof(int),
+ .mode = 0444,
+ .proc_handler = proc_dointvec,
+ },
+ {
+ .procname = "rto_beta_exp_divisor",
+ .data = &init_net.sctp.rto_beta,
+ .maxlen = sizeof(int),
+ .mode = 0444,
+ .proc_handler = proc_dointvec,
},
{
.procname = "max_burst",
- .data = &sctp_max_burst,
+ .data = &init_net.sctp.max_burst,
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = proc_dointvec_minmax,
@@ -109,31 +140,42 @@ static ctl_table sctp_table[] = {
.extra2 = &int_max
},
{
- .procname = "association_max_retrans",
- .data = &sctp_max_retrans_association,
+ .procname = "cookie_preserve_enable",
+ .data = &init_net.sctp.cookie_preserve_enable,
.maxlen = sizeof(int),
.mode = 0644,
+ .proc_handler = proc_dointvec,
+ },
+ {
+ .procname = "valid_cookie_life",
+ .data = &init_net.sctp.valid_cookie_life,
+ .maxlen = sizeof(unsigned int),
+ .mode = 0644,
.proc_handler = proc_dointvec_minmax,
- .extra1 = &one,
- .extra2 = &int_max
+ .extra1 = &one,
+ .extra2 = &timer_max
},
{
- .procname = "sndbuf_policy",
- .data = &sctp_sndbuf_policy,
+ .procname = "sack_timeout",
+ .data = &init_net.sctp.sack_timeout,
.maxlen = sizeof(int),
.mode = 0644,
- .proc_handler = proc_dointvec,
+ .proc_handler = proc_dointvec_minmax,
+ .extra1 = &sack_timer_min,
+ .extra2 = &sack_timer_max,
},
{
- .procname = "rcvbuf_policy",
- .data = &sctp_rcvbuf_policy,
- .maxlen = sizeof(int),
+ .procname = "hb_interval",
+ .data = &init_net.sctp.hb_interval,
+ .maxlen = sizeof(unsigned int),
.mode = 0644,
- .proc_handler = proc_dointvec,
+ .proc_handler = proc_dointvec_minmax,
+ .extra1 = &one,
+ .extra2 = &timer_max
},
{
- .procname = "path_max_retrans",
- .data = &sctp_max_retrans_path,
+ .procname = "association_max_retrans",
+ .data = &init_net.sctp.max_retrans_association,
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = proc_dointvec_minmax,
@@ -141,17 +183,17 @@ static ctl_table sctp_table[] = {
.extra2 = &int_max
},
{
- .procname = "pf_retrans",
- .data = &sctp_pf_retrans,
+ .procname = "path_max_retrans",
+ .data = &init_net.sctp.max_retrans_path,
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = proc_dointvec_minmax,
- .extra1 = &zero,
+ .extra1 = &one,
.extra2 = &int_max
},
{
.procname = "max_init_retransmits",
- .data = &sctp_max_retrans_init,
+ .data = &init_net.sctp.max_retrans_init,
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = proc_dointvec_minmax,
@@ -159,103 +201,66 @@ static ctl_table sctp_table[] = {
.extra2 = &int_max
},
{
- .procname = "hb_interval",
- .data = &sctp_hb_interval,
- .maxlen = sizeof(unsigned int),
+ .procname = "pf_retrans",
+ .data = &init_net.sctp.pf_retrans,
+ .maxlen = sizeof(int),
.mode = 0644,
.proc_handler = proc_dointvec_minmax,
- .extra1 = &one,
- .extra2 = &timer_max
+ .extra1 = &zero,
+ .extra2 = &int_max
},
{
- .procname = "cookie_preserve_enable",
- .data = &sctp_cookie_preserve_enable,
+ .procname = "sndbuf_policy",
+ .data = &init_net.sctp.sndbuf_policy,
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = proc_dointvec,
},
{
- .procname = "rto_alpha_exp_divisor",
- .data = &sctp_rto_alpha,
- .maxlen = sizeof(int),
- .mode = 0444,
- .proc_handler = proc_dointvec,
- },
- {
- .procname = "rto_beta_exp_divisor",
- .data = &sctp_rto_beta,
- .maxlen = sizeof(int),
- .mode = 0444,
- .proc_handler = proc_dointvec,
- },
- {
- .procname = "addip_enable",
- .data = &sctp_addip_enable,
+ .procname = "rcvbuf_policy",
+ .data = &init_net.sctp.rcvbuf_policy,
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = proc_dointvec,
},
{
.procname = "default_auto_asconf",
- .data = &sctp_default_auto_asconf,
+ .data = &init_net.sctp.default_auto_asconf,
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = proc_dointvec,
},
{
- .procname = "prsctp_enable",
- .data = &sctp_prsctp_enable,
+ .procname = "addip_enable",
+ .data = &init_net.sctp.addip_enable,
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = proc_dointvec,
},
{
- .procname = "sack_timeout",
- .data = &sctp_sack_timeout,
+ .procname = "addip_noauth_enable",
+ .data = &init_net.sctp.addip_noauth,
.maxlen = sizeof(int),
.mode = 0644,
- .proc_handler = proc_dointvec_minmax,
- .extra1 = &sack_timer_min,
- .extra2 = &sack_timer_max,
- },
- {
- .procname = "sctp_mem",
- .data = &sysctl_sctp_mem,
- .maxlen = sizeof(sysctl_sctp_mem),
- .mode = 0644,
- .proc_handler = proc_doulongvec_minmax
- },
- {
- .procname = "sctp_rmem",
- .data = &sysctl_sctp_rmem,
- .maxlen = sizeof(sysctl_sctp_rmem),
- .mode = 0644,
- .proc_handler = proc_dointvec,
- },
- {
- .procname = "sctp_wmem",
- .data = &sysctl_sctp_wmem,
- .maxlen = sizeof(sysctl_sctp_wmem),
- .mode = 0644,
.proc_handler = proc_dointvec,
},
{
- .procname = "auth_enable",
- .data = &sctp_auth_enable,
+ .procname = "prsctp_enable",
+ .data = &init_net.sctp.prsctp_enable,
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = proc_dointvec,
},
{
- .procname = "addip_noauth_enable",
- .data = &sctp_addip_noauth,
+ .procname = "auth_enable",
+ .data = &init_net.sctp.auth_enable,
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = proc_dointvec,
},
{
.procname = "addr_scope_policy",
- .data = &sctp_scope_policy,
+ .data = &init_net.sctp.scope_policy,
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = proc_dointvec_minmax,
@@ -264,7 +269,7 @@ static ctl_table sctp_table[] = {
},
{
.procname = "rwnd_update_shift",
- .data = &sctp_rwnd_upd_shift,
+ .data = &init_net.sctp.rwnd_upd_shift,
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = &proc_dointvec_minmax,
@@ -273,7 +278,7 @@ static ctl_table sctp_table[] = {
},
{
.procname = "max_autoclose",
- .data = &sctp_max_autoclose,
+ .data = &init_net.sctp.max_autoclose,
.maxlen = sizeof(unsigned long),
.mode = 0644,
.proc_handler = &proc_doulongvec_minmax,
@@ -284,6 +289,27 @@ static ctl_table sctp_table[] = {
{ /* sentinel */ }
};
+int sctp_sysctl_net_register(struct net *net)
+{
+ struct ctl_table *table;
+ int i;
+
+ table = kmemdup(sctp_net_table, sizeof(sctp_net_table), GFP_KERNEL);
+ if (!table)
+ return -ENOMEM;
+
+ for (i = 0; table[i].data; i++)
+ table[i].data += (char *)(&net->sctp) - (char *)&init_net.sctp;
+
+ net->sctp.sysctl_header = register_net_sysctl(net, "net/sctp", table);
+ return 0;
+}
+
+void sctp_sysctl_net_unregister(struct net *net)
+{
+ unregister_net_sysctl_table(net->sctp.sysctl_header);
+}
+
static struct ctl_table_header * sctp_sysctl_header;
/* Sysctl registration. */
diff --git a/net/sctp/transport.c b/net/sctp/transport.c
index c97472b248a2..953c21e4af97 100644
--- a/net/sctp/transport.c
+++ b/net/sctp/transport.c
@@ -59,7 +59,8 @@
/* 1st Level Abstractions. */
/* Initialize a new transport from provided memory. */
-static struct sctp_transport *sctp_transport_init(struct sctp_transport *peer,
+static struct sctp_transport *sctp_transport_init(struct net *net,
+ struct sctp_transport *peer,
const union sctp_addr *addr,
gfp_t gfp)
{
@@ -76,7 +77,7 @@ static struct sctp_transport *sctp_transport_init(struct sctp_transport *peer,
* given destination transport address, set RTO to the protocol
* parameter 'RTO.Initial'.
*/
- peer->rto = msecs_to_jiffies(sctp_rto_initial);
+ peer->rto = msecs_to_jiffies(net->sctp.rto_initial);
peer->last_time_heard = jiffies;
peer->last_time_ecne_reduced = jiffies;
@@ -86,8 +87,8 @@ static struct sctp_transport *sctp_transport_init(struct sctp_transport *peer,
SPP_SACKDELAY_ENABLE;
/* Initialize the default path max_retrans. */
- peer->pathmaxrxt = sctp_max_retrans_path;
- peer->pf_retrans = sctp_pf_retrans;
+ peer->pathmaxrxt = net->sctp.max_retrans_path;
+ peer->pf_retrans = net->sctp.pf_retrans;
INIT_LIST_HEAD(&peer->transmitted);
INIT_LIST_HEAD(&peer->send_ready);
@@ -109,7 +110,8 @@ static struct sctp_transport *sctp_transport_init(struct sctp_transport *peer,
}
/* Allocate and initialize a new transport. */
-struct sctp_transport *sctp_transport_new(const union sctp_addr *addr,
+struct sctp_transport *sctp_transport_new(struct net *net,
+ const union sctp_addr *addr,
gfp_t gfp)
{
struct sctp_transport *transport;
@@ -118,7 +120,7 @@ struct sctp_transport *sctp_transport_new(const union sctp_addr *addr,
if (!transport)
goto fail;
- if (!sctp_transport_init(transport, addr, gfp))
+ if (!sctp_transport_init(net, transport, addr, gfp))
goto fail_init;
transport->malloced = 1;
@@ -316,6 +318,7 @@ void sctp_transport_update_rto(struct sctp_transport *tp, __u32 rtt)
SCTP_ASSERT(tp->rto_pending, "rto_pending not set", return);
if (tp->rttvar || tp->srtt) {
+ struct net *net = sock_net(tp->asoc->base.sk);
/* 6.3.1 C3) When a new RTT measurement R' is made, set
* RTTVAR <- (1 - RTO.Beta) * RTTVAR + RTO.Beta * |SRTT - R'|
* SRTT <- (1 - RTO.Alpha) * SRTT + RTO.Alpha * R'
@@ -327,10 +330,10 @@ void sctp_transport_update_rto(struct sctp_transport *tp, __u32 rtt)
* For example, assuming the default value of RTO.Alpha of
* 1/8, rto_alpha would be expressed as 3.
*/
- tp->rttvar = tp->rttvar - (tp->rttvar >> sctp_rto_beta)
- + ((abs(tp->srtt - rtt)) >> sctp_rto_beta);
- tp->srtt = tp->srtt - (tp->srtt >> sctp_rto_alpha)
- + (rtt >> sctp_rto_alpha);
+ tp->rttvar = tp->rttvar - (tp->rttvar >> net->sctp.rto_beta)
+ + ((abs(tp->srtt - rtt)) >> net->sctp.rto_beta);
+ tp->srtt = tp->srtt - (tp->srtt >> net->sctp.rto_alpha)
+ + (rtt >> net->sctp.rto_alpha);
} else {
/* 6.3.1 C2) When the first RTT measurement R is made, set
* SRTT <- R, RTTVAR <- R/2.
diff --git a/net/sctp/ulpevent.c b/net/sctp/ulpevent.c
index 33d894776192..10c018a5b9fe 100644
--- a/net/sctp/ulpevent.c
+++ b/net/sctp/ulpevent.c
@@ -702,7 +702,8 @@ struct sctp_ulpevent *sctp_ulpevent_make_rcvmsg(struct sctp_association *asoc,
if (rx_count >= asoc->base.sk->sk_rcvbuf) {
if ((asoc->base.sk->sk_userlocks & SOCK_RCVBUF_LOCK) ||
- (!sk_rmem_schedule(asoc->base.sk, chunk->skb->truesize)))
+ (!sk_rmem_schedule(asoc->base.sk, chunk->skb,
+ chunk->skb->truesize)))
goto fail;
}
diff --git a/net/sctp/ulpqueue.c b/net/sctp/ulpqueue.c
index f5a6a4f4faf7..360d8697b95c 100644
--- a/net/sctp/ulpqueue.c
+++ b/net/sctp/ulpqueue.c
@@ -326,7 +326,9 @@ static void sctp_ulpq_store_reasm(struct sctp_ulpq *ulpq,
* payload was fragmented on the way and ip had to reassemble them.
* We add the rest of skb's to the first skb's fraglist.
*/
-static struct sctp_ulpevent *sctp_make_reassembled_event(struct sk_buff_head *queue, struct sk_buff *f_frag, struct sk_buff *l_frag)
+static struct sctp_ulpevent *sctp_make_reassembled_event(struct net *net,
+ struct sk_buff_head *queue, struct sk_buff *f_frag,
+ struct sk_buff *l_frag)
{
struct sk_buff *pos;
struct sk_buff *new = NULL;
@@ -394,7 +396,7 @@ static struct sctp_ulpevent *sctp_make_reassembled_event(struct sk_buff_head *qu
}
event = sctp_skb2event(f_frag);
- SCTP_INC_STATS(SCTP_MIB_REASMUSRMSGS);
+ SCTP_INC_STATS(net, SCTP_MIB_REASMUSRMSGS);
return event;
}
@@ -493,7 +495,8 @@ static struct sctp_ulpevent *sctp_ulpq_retrieve_reassembled(struct sctp_ulpq *ul
cevent = sctp_skb2event(pd_first);
pd_point = sctp_sk(asoc->base.sk)->pd_point;
if (pd_point && pd_point <= pd_len) {
- retval = sctp_make_reassembled_event(&ulpq->reasm,
+ retval = sctp_make_reassembled_event(sock_net(asoc->base.sk),
+ &ulpq->reasm,
pd_first,
pd_last);
if (retval)
@@ -503,7 +506,8 @@ static struct sctp_ulpevent *sctp_ulpq_retrieve_reassembled(struct sctp_ulpq *ul
done:
return retval;
found:
- retval = sctp_make_reassembled_event(&ulpq->reasm, first_frag, pos);
+ retval = sctp_make_reassembled_event(sock_net(ulpq->asoc->base.sk),
+ &ulpq->reasm, first_frag, pos);
if (retval)
retval->msg_flags |= MSG_EOR;
goto done;
@@ -563,7 +567,8 @@ static struct sctp_ulpevent *sctp_ulpq_retrieve_partial(struct sctp_ulpq *ulpq)
* further.
*/
done:
- retval = sctp_make_reassembled_event(&ulpq->reasm, first_frag, last_frag);
+ retval = sctp_make_reassembled_event(sock_net(ulpq->asoc->base.sk),
+ &ulpq->reasm, first_frag, last_frag);
if (retval && is_last)
retval->msg_flags |= MSG_EOR;
@@ -655,7 +660,8 @@ static struct sctp_ulpevent *sctp_ulpq_retrieve_first(struct sctp_ulpq *ulpq)
* further.
*/
done:
- retval = sctp_make_reassembled_event(&ulpq->reasm, first_frag, last_frag);
+ retval = sctp_make_reassembled_event(sock_net(ulpq->asoc->base.sk),
+ &ulpq->reasm, first_frag, last_frag);
return retval;
}
diff --git a/net/socket.c b/net/socket.c
index dfe5b66c97e0..80dc7e84b046 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -88,6 +88,7 @@
#include <linux/nsproxy.h>
#include <linux/magic.h>
#include <linux/slab.h>
+#include <linux/xattr.h>
#include <asm/uaccess.h>
#include <asm/unistd.h>
@@ -346,7 +347,8 @@ static struct file_system_type sock_fs_type = {
* but we take care of internal coherence yet.
*/
-static int sock_alloc_file(struct socket *sock, struct file **f, int flags)
+static int sock_alloc_file(struct socket *sock, struct file **f, int flags,
+ const char *dname)
{
struct qstr name = { .name = "" };
struct path path;
@@ -357,6 +359,13 @@ static int sock_alloc_file(struct socket *sock, struct file **f, int flags)
if (unlikely(fd < 0))
return fd;
+ if (dname) {
+ name.name = dname;
+ name.len = strlen(name.name);
+ } else if (sock->sk) {
+ name.name = sock->sk->sk_prot_creator->name;
+ name.len = strlen(name.name);
+ }
path.dentry = d_alloc_pseudo(sock_mnt->mnt_sb, &name);
if (unlikely(!path.dentry)) {
put_unused_fd(fd);
@@ -389,7 +398,7 @@ static int sock_alloc_file(struct socket *sock, struct file **f, int flags)
int sock_map_fd(struct socket *sock, int flags)
{
struct file *newfile;
- int fd = sock_alloc_file(sock, &newfile, flags);
+ int fd = sock_alloc_file(sock, &newfile, flags, NULL);
if (likely(fd >= 0))
fd_install(fd, newfile);
@@ -455,6 +464,68 @@ static struct socket *sockfd_lookup_light(int fd, int *err, int *fput_needed)
return NULL;
}
+#define XATTR_SOCKPROTONAME_SUFFIX "sockprotoname"
+#define XATTR_NAME_SOCKPROTONAME (XATTR_SYSTEM_PREFIX XATTR_SOCKPROTONAME_SUFFIX)
+#define XATTR_NAME_SOCKPROTONAME_LEN (sizeof(XATTR_NAME_SOCKPROTONAME)-1)
+static ssize_t sockfs_getxattr(struct dentry *dentry,
+ const char *name, void *value, size_t size)
+{
+ const char *proto_name;
+ size_t proto_size;
+ int error;
+
+ error = -ENODATA;
+ if (!strncmp(name, XATTR_NAME_SOCKPROTONAME, XATTR_NAME_SOCKPROTONAME_LEN)) {
+ proto_name = dentry->d_name.name;
+ proto_size = strlen(proto_name);
+
+ if (value) {
+ error = -ERANGE;
+ if (proto_size + 1 > size)
+ goto out;
+
+ strncpy(value, proto_name, proto_size + 1);
+ }
+ error = proto_size + 1;
+ }
+
+out:
+ return error;
+}
+
+static ssize_t sockfs_listxattr(struct dentry *dentry, char *buffer,
+ size_t size)
+{
+ ssize_t len;
+ ssize_t used = 0;
+
+ len = security_inode_listsecurity(dentry->d_inode, buffer, size);
+ if (len < 0)
+ return len;
+ used += len;
+ if (buffer) {
+ if (size < used)
+ return -ERANGE;
+ buffer += len;
+ }
+
+ len = (XATTR_NAME_SOCKPROTONAME_LEN + 1);
+ used += len;
+ if (buffer) {
+ if (size < used)
+ return -ERANGE;
+ memcpy(buffer, XATTR_NAME_SOCKPROTONAME, len);
+ buffer += len;
+ }
+
+ return used;
+}
+
+static const struct inode_operations sockfs_inode_ops = {
+ .getxattr = sockfs_getxattr,
+ .listxattr = sockfs_listxattr,
+};
+
/**
* sock_alloc - allocate a socket
*
@@ -479,6 +550,7 @@ static struct socket *sock_alloc(void)
inode->i_mode = S_IFSOCK | S_IRWXUGO;
inode->i_uid = current_fsuid();
inode->i_gid = current_fsgid();
+ inode->i_op = &sockfs_inode_ops;
this_cpu_add(sockets_in_use, 1);
return sock;
@@ -1394,13 +1466,13 @@ SYSCALL_DEFINE4(socketpair, int, family, int, type, int, protocol,
if (err < 0)
goto out_release_both;
- fd1 = sock_alloc_file(sock1, &newfile1, flags);
+ fd1 = sock_alloc_file(sock1, &newfile1, flags, NULL);
if (unlikely(fd1 < 0)) {
err = fd1;
goto out_release_both;
}
- fd2 = sock_alloc_file(sock2, &newfile2, flags);
+ fd2 = sock_alloc_file(sock2, &newfile2, flags, NULL);
if (unlikely(fd2 < 0)) {
err = fd2;
fput(newfile1);
@@ -1536,7 +1608,8 @@ SYSCALL_DEFINE4(accept4, int, fd, struct sockaddr __user *, upeer_sockaddr,
*/
__module_get(newsock->ops->owner);
- newfd = sock_alloc_file(newsock, &newfile, flags);
+ newfd = sock_alloc_file(newsock, &newfile, flags,
+ sock->sk->sk_prot_creator->name);
if (unlikely(newfd < 0)) {
err = newfd;
sock_release(newsock);
@@ -2528,12 +2601,6 @@ static int __init sock_init(void)
goto out;
/*
- * Initialize sock SLAB cache.
- */
-
- sk_init();
-
- /*
* Initialize skbuff SLAB cache
*/
skb_init();
@@ -2604,7 +2671,7 @@ static int do_siocgstamp(struct net *net, struct socket *sock,
err = sock_do_ioctl(net, sock, cmd, (unsigned long)&ktv);
set_fs(old_fs);
if (!err)
- err = compat_put_timeval(up, &ktv);
+ err = compat_put_timeval(&ktv, up);
return err;
}
@@ -2620,7 +2687,7 @@ static int do_siocgstampns(struct net *net, struct socket *sock,
err = sock_do_ioctl(net, sock, cmd, (unsigned long)&kts);
set_fs(old_fs);
if (!err)
- err = compat_put_timespec(up, &kts);
+ err = compat_put_timespec(&kts, up);
return err;
}
@@ -2657,6 +2724,7 @@ static int dev_ifconf(struct net *net, struct compat_ifconf __user *uifc32)
if (copy_from_user(&ifc32, uifc32, sizeof(struct compat_ifconf)))
return -EFAULT;
+ memset(&ifc, 0, sizeof(ifc));
if (ifc32.ifcbuf == 0) {
ifc32.ifc_len = 0;
ifc.ifc_len = 0;
diff --git a/net/sunrpc/Kconfig b/net/sunrpc/Kconfig
index 9fe8857d8d59..03d03e37a7d5 100644
--- a/net/sunrpc/Kconfig
+++ b/net/sunrpc/Kconfig
@@ -21,6 +21,11 @@ config SUNRPC_XPRT_RDMA
If unsure, say N.
+config SUNRPC_SWAP
+ bool
+ depends on SUNRPC
+ select NETVM
+
config RPCSEC_GSS_KRB5
tristate "Secure RPC: Kerberos V mechanism"
depends on SUNRPC && CRYPTO
diff --git a/net/sunrpc/auth.c b/net/sunrpc/auth.c
index 727e506cacda..b5c067bccc45 100644
--- a/net/sunrpc/auth.c
+++ b/net/sunrpc/auth.c
@@ -13,6 +13,7 @@
#include <linux/errno.h>
#include <linux/hash.h>
#include <linux/sunrpc/clnt.h>
+#include <linux/sunrpc/gss_api.h>
#include <linux/spinlock.h>
#ifdef RPC_DEBUG
@@ -122,6 +123,59 @@ rpcauth_unregister(const struct rpc_authops *ops)
}
EXPORT_SYMBOL_GPL(rpcauth_unregister);
+/**
+ * rpcauth_list_flavors - discover registered flavors and pseudoflavors
+ * @array: array to fill in
+ * @size: size of "array"
+ *
+ * Returns the number of array items filled in, or a negative errno.
+ *
+ * The returned array is not sorted by any policy. Callers should not
+ * rely on the order of the items in the returned array.
+ */
+int
+rpcauth_list_flavors(rpc_authflavor_t *array, int size)
+{
+ rpc_authflavor_t flavor;
+ int result = 0;
+
+ spin_lock(&rpc_authflavor_lock);
+ for (flavor = 0; flavor < RPC_AUTH_MAXFLAVOR; flavor++) {
+ const struct rpc_authops *ops = auth_flavors[flavor];
+ rpc_authflavor_t pseudos[4];
+ int i, len;
+
+ if (result >= size) {
+ result = -ENOMEM;
+ break;
+ }
+
+ if (ops == NULL)
+ continue;
+ if (ops->list_pseudoflavors == NULL) {
+ array[result++] = ops->au_flavor;
+ continue;
+ }
+ len = ops->list_pseudoflavors(pseudos, ARRAY_SIZE(pseudos));
+ if (len < 0) {
+ result = len;
+ break;
+ }
+ for (i = 0; i < len; i++) {
+ if (result >= size) {
+ result = -ENOMEM;
+ break;
+ }
+ array[result++] = pseudos[i];
+ }
+ }
+ spin_unlock(&rpc_authflavor_lock);
+
+ dprintk("RPC: %s returns %d\n", __func__, result);
+ return result;
+}
+EXPORT_SYMBOL_GPL(rpcauth_list_flavors);
+
struct rpc_auth *
rpcauth_create(rpc_authflavor_t pseudoflavor, struct rpc_clnt *clnt)
{
diff --git a/net/sunrpc/auth_gss/auth_gss.c b/net/sunrpc/auth_gss/auth_gss.c
index d3ad81f8da5b..34c522021004 100644
--- a/net/sunrpc/auth_gss/auth_gss.c
+++ b/net/sunrpc/auth_gss/auth_gss.c
@@ -1619,6 +1619,7 @@ static const struct rpc_authops authgss_ops = {
.crcreate = gss_create_cred,
.pipes_create = gss_pipes_dentries_create,
.pipes_destroy = gss_pipes_dentries_destroy,
+ .list_pseudoflavors = gss_mech_list_pseudoflavors,
};
static const struct rpc_credops gss_credops = {
diff --git a/net/sunrpc/auth_gss/gss_mech_switch.c b/net/sunrpc/auth_gss/gss_mech_switch.c
index 782bfe1b6465..b174fcd9ff4c 100644
--- a/net/sunrpc/auth_gss/gss_mech_switch.c
+++ b/net/sunrpc/auth_gss/gss_mech_switch.c
@@ -239,14 +239,28 @@ gss_mech_get_by_pseudoflavor(u32 pseudoflavor)
EXPORT_SYMBOL_GPL(gss_mech_get_by_pseudoflavor);
-int gss_mech_list_pseudoflavors(rpc_authflavor_t *array_ptr)
+/**
+ * gss_mech_list_pseudoflavors - Discover registered GSS pseudoflavors
+ * @array: array to fill in
+ * @size: size of "array"
+ *
+ * Returns the number of array items filled in, or a negative errno.
+ *
+ * The returned array is not sorted by any policy. Callers should not
+ * rely on the order of the items in the returned array.
+ */
+int gss_mech_list_pseudoflavors(rpc_authflavor_t *array_ptr, int size)
{
struct gss_api_mech *pos = NULL;
int j, i = 0;
spin_lock(&registered_mechs_lock);
list_for_each_entry(pos, &registered_mechs, gm_list) {
- for (j=0; j < pos->gm_pf_num; j++) {
+ for (j = 0; j < pos->gm_pf_num; j++) {
+ if (i >= size) {
+ spin_unlock(&registered_mechs_lock);
+ return -ENOMEM;
+ }
array_ptr[i++] = pos->gm_pfs[j].pseudoflavor;
}
}
@@ -254,8 +268,6 @@ int gss_mech_list_pseudoflavors(rpc_authflavor_t *array_ptr)
return i;
}
-EXPORT_SYMBOL_GPL(gss_mech_list_pseudoflavors);
-
u32
gss_svc_to_pseudoflavor(struct gss_api_mech *gm, u32 service)
{
diff --git a/net/sunrpc/cache.c b/net/sunrpc/cache.c
index 47ad2666fdf6..2a68bb3db772 100644
--- a/net/sunrpc/cache.c
+++ b/net/sunrpc/cache.c
@@ -1349,8 +1349,11 @@ static int c_show(struct seq_file *m, void *p)
if (cache_check(cd, cp, NULL))
/* cache_check does a cache_put on failure */
seq_printf(m, "# ");
- else
+ else {
+ if (cache_is_expired(cd, cp))
+ seq_printf(m, "# ");
cache_put(cp, cd);
+ }
return cd->cache_show(m, cd, cp);
}
@@ -1632,7 +1635,7 @@ static int create_cache_proc_entries(struct cache_detail *cd, struct net *net)
void __init cache_initialize(void)
{
- INIT_DELAYED_WORK_DEFERRABLE(&cache_cleaner, do_cache_clean);
+ INIT_DEFERRABLE_WORK(&cache_cleaner, do_cache_clean);
}
int cache_register_net(struct cache_detail *cd, struct net *net)
diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c
index 00eb859b7de5..fa48c60aef23 100644
--- a/net/sunrpc/clnt.c
+++ b/net/sunrpc/clnt.c
@@ -717,6 +717,15 @@ void rpc_task_set_client(struct rpc_task *task, struct rpc_clnt *clnt)
atomic_inc(&clnt->cl_count);
if (clnt->cl_softrtry)
task->tk_flags |= RPC_TASK_SOFT;
+ if (sk_memalloc_socks()) {
+ struct rpc_xprt *xprt;
+
+ rcu_read_lock();
+ xprt = rcu_dereference(clnt->cl_xprt);
+ if (xprt->swapper)
+ task->tk_flags |= RPC_TASK_SWAPPER;
+ rcu_read_unlock();
+ }
/* Add to the client's list of all tasks */
spin_lock(&clnt->cl_lock);
list_add_tail(&task->tk_task, &clnt->cl_tasks);
@@ -1844,12 +1853,13 @@ call_timeout(struct rpc_task *task)
return;
}
if (RPC_IS_SOFT(task)) {
- if (clnt->cl_chatty)
+ if (clnt->cl_chatty) {
rcu_read_lock();
printk(KERN_NOTICE "%s: server %s not responding, timed out\n",
clnt->cl_protname,
rcu_dereference(clnt->cl_xprt)->servername);
rcu_read_unlock();
+ }
if (task->tk_flags & RPC_TASK_TIMEOUT)
rpc_exit(task, -ETIMEDOUT);
else
diff --git a/net/sunrpc/rpcb_clnt.c b/net/sunrpc/rpcb_clnt.c
index 92509ffe15fc..a70acae496e4 100644
--- a/net/sunrpc/rpcb_clnt.c
+++ b/net/sunrpc/rpcb_clnt.c
@@ -251,7 +251,7 @@ static int rpcb_create_local_unix(struct net *net)
if (IS_ERR(clnt)) {
dprintk("RPC: failed to create AF_LOCAL rpcbind "
"client (errno %ld).\n", PTR_ERR(clnt));
- result = -PTR_ERR(clnt);
+ result = PTR_ERR(clnt);
goto out;
}
@@ -298,7 +298,7 @@ static int rpcb_create_local_net(struct net *net)
if (IS_ERR(clnt)) {
dprintk("RPC: failed to create local rpcbind "
"client (errno %ld).\n", PTR_ERR(clnt));
- result = -PTR_ERR(clnt);
+ result = PTR_ERR(clnt);
goto out;
}
diff --git a/net/sunrpc/sched.c b/net/sunrpc/sched.c
index 994cfea2bad6..128494ec9a64 100644
--- a/net/sunrpc/sched.c
+++ b/net/sunrpc/sched.c
@@ -300,8 +300,9 @@ EXPORT_SYMBOL_GPL(__rpc_wait_for_completion_task);
/*
* Make an RPC task runnable.
*
- * Note: If the task is ASYNC, this must be called with
- * the spinlock held to protect the wait queue operation.
+ * Note: If the task is ASYNC, and is being made runnable after sitting on an
+ * rpc_wait_queue, this must be called with the queue spinlock held to protect
+ * the wait queue operation.
*/
static void rpc_make_runnable(struct rpc_task *task)
{
@@ -790,7 +791,9 @@ void rpc_execute(struct rpc_task *task)
static void rpc_async_schedule(struct work_struct *work)
{
+ current->flags |= PF_FSTRANS;
__rpc_execute(container_of(work, struct rpc_task, u.tk_work));
+ current->flags &= ~PF_FSTRANS;
}
/**
@@ -812,7 +815,10 @@ static void rpc_async_schedule(struct work_struct *work)
void *rpc_malloc(struct rpc_task *task, size_t size)
{
struct rpc_buffer *buf;
- gfp_t gfp = RPC_IS_SWAPPER(task) ? GFP_ATOMIC : GFP_NOWAIT;
+ gfp_t gfp = GFP_NOWAIT;
+
+ if (RPC_IS_SWAPPER(task))
+ gfp |= __GFP_MEMALLOC;
size += sizeof(struct rpc_buffer);
if (size <= RPC_BUFFER_MAXSIZE)
@@ -886,7 +892,7 @@ static void rpc_init_task(struct rpc_task *task, const struct rpc_task_setup *ta
static struct rpc_task *
rpc_alloc_task(void)
{
- return (struct rpc_task *)mempool_alloc(rpc_task_mempool, GFP_NOFS);
+ return (struct rpc_task *)mempool_alloc(rpc_task_mempool, GFP_NOIO);
}
/*
diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c
index 88f2bf671960..bac973a31367 100644
--- a/net/sunrpc/svc_xprt.c
+++ b/net/sunrpc/svc_xprt.c
@@ -316,7 +316,6 @@ static bool svc_xprt_has_something_to_do(struct svc_xprt *xprt)
*/
void svc_xprt_enqueue(struct svc_xprt *xprt)
{
- struct svc_serv *serv = xprt->xpt_server;
struct svc_pool *pool;
struct svc_rqst *rqstp;
int cpu;
@@ -362,8 +361,6 @@ void svc_xprt_enqueue(struct svc_xprt *xprt)
rqstp, rqstp->rq_xprt);
rqstp->rq_xprt = xprt;
svc_xprt_get(xprt);
- rqstp->rq_reserved = serv->sv_max_mesg;
- atomic_add(rqstp->rq_reserved, &xprt->xpt_reserved);
pool->sp_stats.threads_woken++;
wake_up(&rqstp->rq_wait);
} else {
@@ -640,8 +637,6 @@ int svc_recv(struct svc_rqst *rqstp, long timeout)
if (xprt) {
rqstp->rq_xprt = xprt;
svc_xprt_get(xprt);
- rqstp->rq_reserved = serv->sv_max_mesg;
- atomic_add(rqstp->rq_reserved, &xprt->xpt_reserved);
/* As there is a shortage of threads and this request
* had to be queued, don't allow the thread to wait so
@@ -738,6 +733,8 @@ int svc_recv(struct svc_rqst *rqstp, long timeout)
else
len = xprt->xpt_ops->xpo_recvfrom(rqstp);
dprintk("svc: got len=%d\n", len);
+ rqstp->rq_reserved = serv->sv_max_mesg;
+ atomic_add(rqstp->rq_reserved, &xprt->xpt_reserved);
}
svc_xprt_received(xprt);
@@ -794,7 +791,8 @@ int svc_send(struct svc_rqst *rqstp)
/* Grab mutex to serialize outgoing data. */
mutex_lock(&xprt->xpt_mutex);
- if (test_bit(XPT_DEAD, &xprt->xpt_flags))
+ if (test_bit(XPT_DEAD, &xprt->xpt_flags)
+ || test_bit(XPT_CLOSE, &xprt->xpt_flags))
len = -ENOTCONN;
else
len = xprt->xpt_ops->xpo_sendto(rqstp);
diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c
index 18bc130255a7..998aa8c1807c 100644
--- a/net/sunrpc/svcsock.c
+++ b/net/sunrpc/svcsock.c
@@ -1129,9 +1129,9 @@ static int svc_tcp_recvfrom(struct svc_rqst *rqstp)
if (len >= 0)
svsk->sk_tcplen += len;
if (len != want) {
+ svc_tcp_save_pages(svsk, rqstp);
if (len < 0 && len != -EAGAIN)
goto err_other;
- svc_tcp_save_pages(svsk, rqstp);
dprintk("svc: incomplete TCP record (%d of %d)\n",
svsk->sk_tcplen, svsk->sk_reclen);
goto err_noclose;
diff --git a/net/sunrpc/xdr.c b/net/sunrpc/xdr.c
index 0cf165580d8d..0afba1b4b656 100644
--- a/net/sunrpc/xdr.c
+++ b/net/sunrpc/xdr.c
@@ -129,34 +129,6 @@ xdr_terminate_string(struct xdr_buf *buf, const u32 len)
EXPORT_SYMBOL_GPL(xdr_terminate_string);
void
-xdr_encode_pages(struct xdr_buf *xdr, struct page **pages, unsigned int base,
- unsigned int len)
-{
- struct kvec *tail = xdr->tail;
- u32 *p;
-
- xdr->pages = pages;
- xdr->page_base = base;
- xdr->page_len = len;
-
- p = (u32 *)xdr->head[0].iov_base + XDR_QUADLEN(xdr->head[0].iov_len);
- tail->iov_base = p;
- tail->iov_len = 0;
-
- if (len & 3) {
- unsigned int pad = 4 - (len & 3);
-
- *p = 0;
- tail->iov_base = (char *)p + (len & 3);
- tail->iov_len = pad;
- len += pad;
- }
- xdr->buflen += len;
- xdr->len += len;
-}
-EXPORT_SYMBOL_GPL(xdr_encode_pages);
-
-void
xdr_inline_pages(struct xdr_buf *xdr, unsigned int offset,
struct page **pages, unsigned int base, unsigned int len)
{
@@ -457,6 +429,16 @@ xdr_shift_buf(struct xdr_buf *buf, size_t len)
EXPORT_SYMBOL_GPL(xdr_shift_buf);
/**
+ * xdr_stream_pos - Return the current offset from the start of the xdr_stream
+ * @xdr: pointer to struct xdr_stream
+ */
+unsigned int xdr_stream_pos(const struct xdr_stream *xdr)
+{
+ return (unsigned int)(XDR_QUADLEN(xdr->buf->len) - xdr->nwords) << 2;
+}
+EXPORT_SYMBOL_GPL(xdr_stream_pos);
+
+/**
* xdr_init_encode - Initialize a struct xdr_stream for sending data.
* @xdr: pointer to xdr_stream struct
* @buf: pointer to XDR buffer in which to encode data
@@ -556,13 +538,11 @@ void xdr_write_pages(struct xdr_stream *xdr, struct page **pages, unsigned int b
EXPORT_SYMBOL_GPL(xdr_write_pages);
static void xdr_set_iov(struct xdr_stream *xdr, struct kvec *iov,
- __be32 *p, unsigned int len)
+ unsigned int len)
{
if (len > iov->iov_len)
len = iov->iov_len;
- if (p == NULL)
- p = (__be32*)iov->iov_base;
- xdr->p = p;
+ xdr->p = (__be32*)iov->iov_base;
xdr->end = (__be32*)(iov->iov_base + len);
xdr->iov = iov;
xdr->page_ptr = NULL;
@@ -609,7 +589,7 @@ static void xdr_set_next_page(struct xdr_stream *xdr)
newbase -= xdr->buf->page_base;
if (xdr_set_page_base(xdr, newbase, PAGE_SIZE) < 0)
- xdr_set_iov(xdr, xdr->buf->tail, NULL, xdr->buf->len);
+ xdr_set_iov(xdr, xdr->buf->tail, xdr->buf->len);
}
static bool xdr_set_next_buffer(struct xdr_stream *xdr)
@@ -618,7 +598,7 @@ static bool xdr_set_next_buffer(struct xdr_stream *xdr)
xdr_set_next_page(xdr);
else if (xdr->iov == xdr->buf->head) {
if (xdr_set_page_base(xdr, 0, PAGE_SIZE) < 0)
- xdr_set_iov(xdr, xdr->buf->tail, NULL, xdr->buf->len);
+ xdr_set_iov(xdr, xdr->buf->tail, xdr->buf->len);
}
return xdr->p != xdr->end;
}
@@ -634,10 +614,15 @@ void xdr_init_decode(struct xdr_stream *xdr, struct xdr_buf *buf, __be32 *p)
xdr->buf = buf;
xdr->scratch.iov_base = NULL;
xdr->scratch.iov_len = 0;
+ xdr->nwords = XDR_QUADLEN(buf->len);
if (buf->head[0].iov_len != 0)
- xdr_set_iov(xdr, buf->head, p, buf->len);
+ xdr_set_iov(xdr, buf->head, buf->len);
else if (buf->page_len != 0)
xdr_set_page_base(xdr, 0, buf->len);
+ if (p != NULL && p > xdr->p && xdr->end >= p) {
+ xdr->nwords -= p - xdr->p;
+ xdr->p = p;
+ }
}
EXPORT_SYMBOL_GPL(xdr_init_decode);
@@ -662,12 +647,14 @@ EXPORT_SYMBOL_GPL(xdr_init_decode_pages);
static __be32 * __xdr_inline_decode(struct xdr_stream *xdr, size_t nbytes)
{
+ unsigned int nwords = XDR_QUADLEN(nbytes);
__be32 *p = xdr->p;
- __be32 *q = p + XDR_QUADLEN(nbytes);
+ __be32 *q = p + nwords;
- if (unlikely(q > xdr->end || q < p))
+ if (unlikely(nwords > xdr->nwords || q > xdr->end || q < p))
return NULL;
xdr->p = q;
+ xdr->nwords -= nwords;
return p;
}
@@ -734,6 +721,31 @@ __be32 * xdr_inline_decode(struct xdr_stream *xdr, size_t nbytes)
}
EXPORT_SYMBOL_GPL(xdr_inline_decode);
+static unsigned int xdr_align_pages(struct xdr_stream *xdr, unsigned int len)
+{
+ struct xdr_buf *buf = xdr->buf;
+ struct kvec *iov;
+ unsigned int nwords = XDR_QUADLEN(len);
+ unsigned int cur = xdr_stream_pos(xdr);
+
+ if (xdr->nwords == 0)
+ return 0;
+ if (nwords > xdr->nwords) {
+ nwords = xdr->nwords;
+ len = nwords << 2;
+ }
+ /* Realign pages to current pointer position */
+ iov = buf->head;
+ if (iov->iov_len > cur)
+ xdr_shrink_bufhead(buf, iov->iov_len - cur);
+
+ /* Truncate page data and move it into the tail */
+ if (buf->page_len > len)
+ xdr_shrink_pagelen(buf, buf->page_len - len);
+ xdr->nwords = XDR_QUADLEN(buf->len - cur);
+ return len;
+}
+
/**
* xdr_read_pages - Ensure page-based XDR data to decode is aligned at current pointer position
* @xdr: pointer to xdr_stream struct
@@ -742,39 +754,37 @@ EXPORT_SYMBOL_GPL(xdr_inline_decode);
* Moves data beyond the current pointer position from the XDR head[] buffer
* into the page list. Any data that lies beyond current position + "len"
* bytes is moved into the XDR tail[].
+ *
+ * Returns the number of XDR encoded bytes now contained in the pages
*/
-void xdr_read_pages(struct xdr_stream *xdr, unsigned int len)
+unsigned int xdr_read_pages(struct xdr_stream *xdr, unsigned int len)
{
struct xdr_buf *buf = xdr->buf;
struct kvec *iov;
- ssize_t shift;
+ unsigned int nwords;
unsigned int end;
- int padding;
+ unsigned int padding;
- /* Realign pages to current pointer position */
- iov = buf->head;
- shift = iov->iov_len + (char *)iov->iov_base - (char *)xdr->p;
- if (shift > 0)
- xdr_shrink_bufhead(buf, shift);
-
- /* Truncate page data and move it into the tail */
- if (buf->page_len > len)
- xdr_shrink_pagelen(buf, buf->page_len - len);
- padding = (XDR_QUADLEN(len) << 2) - len;
+ len = xdr_align_pages(xdr, len);
+ if (len == 0)
+ return 0;
+ nwords = XDR_QUADLEN(len);
+ padding = (nwords << 2) - len;
xdr->iov = iov = buf->tail;
/* Compute remaining message length. */
- end = iov->iov_len;
- shift = buf->buflen - buf->len;
- if (shift < end)
- end -= shift;
- else if (shift > 0)
- end = 0;
+ end = ((xdr->nwords - nwords) << 2) + padding;
+ if (end > iov->iov_len)
+ end = iov->iov_len;
+
/*
* Position current pointer at beginning of tail, and
* set remaining message length.
*/
xdr->p = (__be32 *)((char *)iov->iov_base + padding);
xdr->end = (__be32 *)((char *)iov->iov_base + end);
+ xdr->page_ptr = NULL;
+ xdr->nwords = XDR_QUADLEN(end - padding);
+ return len;
}
EXPORT_SYMBOL_GPL(xdr_read_pages);
@@ -790,12 +800,13 @@ EXPORT_SYMBOL_GPL(xdr_read_pages);
*/
void xdr_enter_page(struct xdr_stream *xdr, unsigned int len)
{
- xdr_read_pages(xdr, len);
+ len = xdr_align_pages(xdr, len);
/*
* Position current pointer at beginning of tail, and
* set remaining message length.
*/
- xdr_set_page_base(xdr, 0, len);
+ if (len != 0)
+ xdr_set_page_base(xdr, 0, len);
}
EXPORT_SYMBOL_GPL(xdr_enter_page);
diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c
index a5a402a7d21f..5d7f61d7559c 100644
--- a/net/sunrpc/xprt.c
+++ b/net/sunrpc/xprt.c
@@ -969,11 +969,11 @@ static bool xprt_dynamic_free_slot(struct rpc_xprt *xprt, struct rpc_rqst *req)
return false;
}
-static void xprt_alloc_slot(struct rpc_task *task)
+void xprt_alloc_slot(struct rpc_xprt *xprt, struct rpc_task *task)
{
- struct rpc_xprt *xprt = task->tk_xprt;
struct rpc_rqst *req;
+ spin_lock(&xprt->reserve_lock);
if (!list_empty(&xprt->free)) {
req = list_entry(xprt->free.next, struct rpc_rqst, rq_list);
list_del(&req->rq_list);
@@ -994,12 +994,29 @@ static void xprt_alloc_slot(struct rpc_task *task)
default:
task->tk_status = -EAGAIN;
}
+ spin_unlock(&xprt->reserve_lock);
return;
out_init_req:
task->tk_status = 0;
task->tk_rqstp = req;
xprt_request_init(task, xprt);
+ spin_unlock(&xprt->reserve_lock);
+}
+EXPORT_SYMBOL_GPL(xprt_alloc_slot);
+
+void xprt_lock_and_alloc_slot(struct rpc_xprt *xprt, struct rpc_task *task)
+{
+ /* Note: grabbing the xprt_lock_write() ensures that we throttle
+ * new slot allocation if the transport is congested (i.e. when
+ * reconnecting a stream transport or when out of socket write
+ * buffer space).
+ */
+ if (xprt_lock_write(xprt, task)) {
+ xprt_alloc_slot(xprt, task);
+ xprt_release_write(xprt, task);
+ }
}
+EXPORT_SYMBOL_GPL(xprt_lock_and_alloc_slot);
static void xprt_free_slot(struct rpc_xprt *xprt, struct rpc_rqst *req)
{
@@ -1083,20 +1100,9 @@ void xprt_reserve(struct rpc_task *task)
if (task->tk_rqstp != NULL)
return;
- /* Note: grabbing the xprt_lock_write() here is not strictly needed,
- * but ensures that we throttle new slot allocation if the transport
- * is congested (e.g. if reconnecting or if we're out of socket
- * write buffer space).
- */
task->tk_timeout = 0;
task->tk_status = -EAGAIN;
- if (!xprt_lock_write(xprt, task))
- return;
-
- spin_lock(&xprt->reserve_lock);
- xprt_alloc_slot(task);
- spin_unlock(&xprt->reserve_lock);
- xprt_release_write(xprt, task);
+ xprt->ops->alloc_slot(xprt, task);
}
static inline __be32 xprt_alloc_xid(struct rpc_xprt *xprt)
diff --git a/net/sunrpc/xprtrdma/transport.c b/net/sunrpc/xprtrdma/transport.c
index b446e100286f..5d9202dc7cb1 100644
--- a/net/sunrpc/xprtrdma/transport.c
+++ b/net/sunrpc/xprtrdma/transport.c
@@ -200,6 +200,7 @@ xprt_rdma_connect_worker(struct work_struct *work)
int rc = 0;
if (!xprt->shutdown) {
+ current->flags |= PF_FSTRANS;
xprt_clear_connected(xprt);
dprintk("RPC: %s: %sconnect\n", __func__,
@@ -212,10 +213,10 @@ xprt_rdma_connect_worker(struct work_struct *work)
out:
xprt_wake_pending_tasks(xprt, rc);
-
out_clear:
dprintk("RPC: %s: exit\n", __func__);
xprt_clear_connecting(xprt);
+ current->flags &= ~PF_FSTRANS;
}
/*
@@ -712,6 +713,7 @@ static void xprt_rdma_print_stats(struct rpc_xprt *xprt, struct seq_file *seq)
static struct rpc_xprt_ops xprt_rdma_procs = {
.reserve_xprt = xprt_rdma_reserve_xprt,
.release_xprt = xprt_release_xprt_cong, /* sunrpc/xprt.c */
+ .alloc_slot = xprt_alloc_slot,
.release_request = xprt_release_rqst_cong, /* ditto */
.set_retrans_timeout = xprt_set_retrans_timeout_def, /* ditto */
.rpcbind = rpcb_getport_async, /* sunrpc/rpcb_clnt.c */
diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c
index 62d0dac8f780..a35b8e52e551 100644
--- a/net/sunrpc/xprtsock.c
+++ b/net/sunrpc/xprtsock.c
@@ -1892,6 +1892,8 @@ static void xs_local_setup_socket(struct work_struct *work)
if (xprt->shutdown)
goto out;
+ current->flags |= PF_FSTRANS;
+
clear_bit(XPRT_CONNECTION_ABORT, &xprt->state);
status = __sock_create(xprt->xprt_net, AF_LOCAL,
SOCK_STREAM, 0, &sock, 1);
@@ -1925,7 +1927,47 @@ static void xs_local_setup_socket(struct work_struct *work)
out:
xprt_clear_connecting(xprt);
xprt_wake_pending_tasks(xprt, status);
+ current->flags &= ~PF_FSTRANS;
+}
+
+#ifdef CONFIG_SUNRPC_SWAP
+static void xs_set_memalloc(struct rpc_xprt *xprt)
+{
+ struct sock_xprt *transport = container_of(xprt, struct sock_xprt,
+ xprt);
+
+ if (xprt->swapper)
+ sk_set_memalloc(transport->inet);
+}
+
+/**
+ * xs_swapper - Tag this transport as being used for swap.
+ * @xprt: transport to tag
+ * @enable: enable/disable
+ *
+ */
+int xs_swapper(struct rpc_xprt *xprt, int enable)
+{
+ struct sock_xprt *transport = container_of(xprt, struct sock_xprt,
+ xprt);
+ int err = 0;
+
+ if (enable) {
+ xprt->swapper++;
+ xs_set_memalloc(xprt);
+ } else if (xprt->swapper) {
+ xprt->swapper--;
+ sk_clear_memalloc(transport->inet);
+ }
+
+ return err;
+}
+EXPORT_SYMBOL_GPL(xs_swapper);
+#else
+static void xs_set_memalloc(struct rpc_xprt *xprt)
+{
}
+#endif
static void xs_udp_finish_connecting(struct rpc_xprt *xprt, struct socket *sock)
{
@@ -1951,6 +1993,8 @@ static void xs_udp_finish_connecting(struct rpc_xprt *xprt, struct socket *sock)
transport->sock = sock;
transport->inet = sk;
+ xs_set_memalloc(xprt);
+
write_unlock_bh(&sk->sk_callback_lock);
}
xs_udp_do_set_buffer_size(xprt);
@@ -1967,6 +2011,8 @@ static void xs_udp_setup_socket(struct work_struct *work)
if (xprt->shutdown)
goto out;
+ current->flags |= PF_FSTRANS;
+
/* Start by resetting any existing state */
xs_reset_transport(transport);
sock = xs_create_sock(xprt, transport,
@@ -1985,6 +2031,7 @@ static void xs_udp_setup_socket(struct work_struct *work)
out:
xprt_clear_connecting(xprt);
xprt_wake_pending_tasks(xprt, status);
+ current->flags &= ~PF_FSTRANS;
}
/*
@@ -2075,6 +2122,8 @@ static int xs_tcp_finish_connecting(struct rpc_xprt *xprt, struct socket *sock)
if (!xprt_bound(xprt))
goto out;
+ xs_set_memalloc(xprt);
+
/* Tell the socket layer to start connecting... */
xprt->stat.connect_count++;
xprt->stat.connect_start = jiffies;
@@ -2110,6 +2159,8 @@ static void xs_tcp_setup_socket(struct work_struct *work)
if (xprt->shutdown)
goto out;
+ current->flags |= PF_FSTRANS;
+
if (!sock) {
clear_bit(XPRT_CONNECTION_ABORT, &xprt->state);
sock = xs_create_sock(xprt, transport,
@@ -2159,6 +2210,7 @@ static void xs_tcp_setup_socket(struct work_struct *work)
case -EINPROGRESS:
case -EALREADY:
xprt_clear_connecting(xprt);
+ current->flags &= ~PF_FSTRANS;
return;
case -EINVAL:
/* Happens, for instance, if the user specified a link
@@ -2171,6 +2223,7 @@ out_eagain:
out:
xprt_clear_connecting(xprt);
xprt_wake_pending_tasks(xprt, status);
+ current->flags &= ~PF_FSTRANS;
}
/**
@@ -2420,6 +2473,7 @@ static void bc_destroy(struct rpc_xprt *xprt)
static struct rpc_xprt_ops xs_local_ops = {
.reserve_xprt = xprt_reserve_xprt,
.release_xprt = xs_tcp_release_xprt,
+ .alloc_slot = xprt_alloc_slot,
.rpcbind = xs_local_rpcbind,
.set_port = xs_local_set_port,
.connect = xs_connect,
@@ -2436,6 +2490,7 @@ static struct rpc_xprt_ops xs_udp_ops = {
.set_buffer_size = xs_udp_set_buffer_size,
.reserve_xprt = xprt_reserve_xprt_cong,
.release_xprt = xprt_release_xprt_cong,
+ .alloc_slot = xprt_alloc_slot,
.rpcbind = rpcb_getport_async,
.set_port = xs_set_port,
.connect = xs_connect,
@@ -2453,6 +2508,7 @@ static struct rpc_xprt_ops xs_udp_ops = {
static struct rpc_xprt_ops xs_tcp_ops = {
.reserve_xprt = xprt_reserve_xprt,
.release_xprt = xs_tcp_release_xprt,
+ .alloc_slot = xprt_lock_and_alloc_slot,
.rpcbind = rpcb_getport_async,
.set_port = xs_set_port,
.connect = xs_connect,
diff --git a/net/tipc/bearer.c b/net/tipc/bearer.c
index 09e71241265d..4ec5c80e8a7c 100644
--- a/net/tipc/bearer.c
+++ b/net/tipc/bearer.c
@@ -49,21 +49,6 @@ struct tipc_bearer tipc_bearers[MAX_BEARERS];
static void bearer_disable(struct tipc_bearer *b_ptr);
/**
- * media_name_valid - validate media name
- *
- * Returns 1 if media name is valid, otherwise 0.
- */
-static int media_name_valid(const char *name)
-{
- u32 len;
-
- len = strlen(name);
- if ((len + 1) > TIPC_MAX_MEDIA_NAME)
- return 0;
- return strspn(name, tipc_alphabet) == len;
-}
-
-/**
* tipc_media_find - locates specified media object by name
*/
struct tipc_media *tipc_media_find(const char *name)
@@ -102,7 +87,7 @@ int tipc_register_media(struct tipc_media *m_ptr)
write_lock_bh(&tipc_net_lock);
- if (!media_name_valid(m_ptr->name))
+ if ((strlen(m_ptr->name) + 1) > TIPC_MAX_MEDIA_NAME)
goto exit;
if ((m_ptr->bcast_addr.media_id != m_ptr->type_id) ||
!m_ptr->bcast_addr.broadcast)
@@ -206,9 +191,7 @@ static int bearer_name_validate(const char *name,
/* validate component parts of bearer name */
if ((media_len <= 1) || (media_len > TIPC_MAX_MEDIA_NAME) ||
- (if_len <= 1) || (if_len > TIPC_MAX_IF_NAME) ||
- (strspn(media_name, tipc_alphabet) != (media_len - 1)) ||
- (strspn(if_name, tipc_alphabet) != (if_len - 1)))
+ (if_len <= 1) || (if_len > TIPC_MAX_IF_NAME))
return 0;
/* return bearer name components, if necessary */
diff --git a/net/tipc/config.c b/net/tipc/config.c
index a056a3852f71..f67866c765dd 100644
--- a/net/tipc/config.c
+++ b/net/tipc/config.c
@@ -2,7 +2,7 @@
* net/tipc/config.c: TIPC configuration management code
*
* Copyright (c) 2002-2006, Ericsson AB
- * Copyright (c) 2004-2007, 2010-2011, Wind River Systems
+ * Copyright (c) 2004-2007, 2010-2012, Wind River Systems
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -208,36 +208,6 @@ static struct sk_buff *cfg_set_remote_mng(void)
return tipc_cfg_reply_none();
}
-static struct sk_buff *cfg_set_max_publications(void)
-{
- u32 value;
-
- if (!TLV_CHECK(req_tlv_area, req_tlv_space, TIPC_TLV_UNSIGNED))
- return tipc_cfg_reply_error_string(TIPC_CFG_TLV_ERROR);
-
- value = ntohl(*(__be32 *)TLV_DATA(req_tlv_area));
- if (value < 1 || value > 65535)
- return tipc_cfg_reply_error_string(TIPC_CFG_INVALID_VALUE
- " (max publications must be 1-65535)");
- tipc_max_publications = value;
- return tipc_cfg_reply_none();
-}
-
-static struct sk_buff *cfg_set_max_subscriptions(void)
-{
- u32 value;
-
- if (!TLV_CHECK(req_tlv_area, req_tlv_space, TIPC_TLV_UNSIGNED))
- return tipc_cfg_reply_error_string(TIPC_CFG_TLV_ERROR);
-
- value = ntohl(*(__be32 *)TLV_DATA(req_tlv_area));
- if (value < 1 || value > 65535)
- return tipc_cfg_reply_error_string(TIPC_CFG_INVALID_VALUE
- " (max subscriptions must be 1-65535");
- tipc_max_subscriptions = value;
- return tipc_cfg_reply_none();
-}
-
static struct sk_buff *cfg_set_max_ports(void)
{
u32 value;
@@ -357,12 +327,6 @@ struct sk_buff *tipc_cfg_do_cmd(u32 orig_node, u16 cmd, const void *request_area
case TIPC_CMD_SET_MAX_PORTS:
rep_tlv_buf = cfg_set_max_ports();
break;
- case TIPC_CMD_SET_MAX_PUBL:
- rep_tlv_buf = cfg_set_max_publications();
- break;
- case TIPC_CMD_SET_MAX_SUBSCR:
- rep_tlv_buf = cfg_set_max_subscriptions();
- break;
case TIPC_CMD_SET_NETID:
rep_tlv_buf = cfg_set_netid();
break;
@@ -372,12 +336,6 @@ struct sk_buff *tipc_cfg_do_cmd(u32 orig_node, u16 cmd, const void *request_area
case TIPC_CMD_GET_MAX_PORTS:
rep_tlv_buf = tipc_cfg_reply_unsigned(tipc_max_ports);
break;
- case TIPC_CMD_GET_MAX_PUBL:
- rep_tlv_buf = tipc_cfg_reply_unsigned(tipc_max_publications);
- break;
- case TIPC_CMD_GET_MAX_SUBSCR:
- rep_tlv_buf = tipc_cfg_reply_unsigned(tipc_max_subscriptions);
- break;
case TIPC_CMD_GET_NETID:
rep_tlv_buf = tipc_cfg_reply_unsigned(tipc_net_id);
break;
@@ -393,6 +351,10 @@ struct sk_buff *tipc_cfg_do_cmd(u32 orig_node, u16 cmd, const void *request_area
case TIPC_CMD_GET_MAX_CLUSTERS:
case TIPC_CMD_SET_MAX_NODES:
case TIPC_CMD_GET_MAX_NODES:
+ case TIPC_CMD_SET_MAX_SUBSCR:
+ case TIPC_CMD_GET_MAX_SUBSCR:
+ case TIPC_CMD_SET_MAX_PUBL:
+ case TIPC_CMD_GET_MAX_PUBL:
case TIPC_CMD_SET_LOG_SIZE:
case TIPC_CMD_DUMP_LOG:
rep_tlv_buf = tipc_cfg_reply_error_string(TIPC_CFG_NOT_SUPPORTED
diff --git a/net/tipc/core.c b/net/tipc/core.c
index 6586eac6a50e..bfe8af88469a 100644
--- a/net/tipc/core.c
+++ b/net/tipc/core.c
@@ -48,18 +48,13 @@
/* global variables used by multiple sub-systems within TIPC */
-int tipc_random;
-
-const char tipc_alphabet[] =
- "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789_.";
+int tipc_random __read_mostly;
/* configurable TIPC parameters */
-u32 tipc_own_addr;
-int tipc_max_ports;
-int tipc_max_subscriptions;
-int tipc_max_publications;
-int tipc_net_id;
-int tipc_remote_management;
+u32 tipc_own_addr __read_mostly;
+int tipc_max_ports __read_mostly;
+int tipc_net_id __read_mostly;
+int tipc_remote_management __read_mostly;
/**
@@ -101,9 +96,8 @@ int tipc_core_start_net(unsigned long addr)
{
int res;
- res = tipc_net_start(addr);
- if (!res)
- res = tipc_eth_media_start();
+ tipc_net_start(addr);
+ res = tipc_eth_media_start();
if (res)
tipc_core_stop_net();
return res;
@@ -160,8 +154,6 @@ static int __init tipc_init(void)
tipc_own_addr = 0;
tipc_remote_management = 1;
- tipc_max_publications = 10000;
- tipc_max_subscriptions = 2000;
tipc_max_ports = CONFIG_TIPC_PORTS;
tipc_net_id = 4711;
diff --git a/net/tipc/core.h b/net/tipc/core.h
index fd42e106c185..0207db04179a 100644
--- a/net/tipc/core.h
+++ b/net/tipc/core.h
@@ -60,7 +60,9 @@
#define TIPC_MOD_VER "2.0.0"
-#define ULTRA_STRING_MAX_LEN 32768
+#define ULTRA_STRING_MAX_LEN 32768
+#define TIPC_MAX_SUBSCRIPTIONS 65535
+#define TIPC_MAX_PUBLICATIONS 65535
struct tipc_msg; /* msg.h */
@@ -74,19 +76,15 @@ int tipc_snprintf(char *buf, int len, const char *fmt, ...);
/*
* Global configuration variables
*/
-extern u32 tipc_own_addr;
-extern int tipc_max_ports;
-extern int tipc_max_subscriptions;
-extern int tipc_max_publications;
-extern int tipc_net_id;
-extern int tipc_remote_management;
+extern u32 tipc_own_addr __read_mostly;
+extern int tipc_max_ports __read_mostly;
+extern int tipc_net_id __read_mostly;
+extern int tipc_remote_management __read_mostly;
/*
* Other global variables
*/
-extern int tipc_random;
-extern const char tipc_alphabet[];
-
+extern int tipc_random __read_mostly;
/*
* Routines available to privileged subsystems
diff --git a/net/tipc/eth_media.c b/net/tipc/eth_media.c
index 90ac9bfa7abb..2132c1ef2951 100644
--- a/net/tipc/eth_media.c
+++ b/net/tipc/eth_media.c
@@ -46,19 +46,30 @@
* @bearer: ptr to associated "generic" bearer structure
* @dev: ptr to associated Ethernet network device
* @tipc_packet_type: used in binding TIPC to Ethernet driver
+ * @setup: work item used when enabling bearer
* @cleanup: work item used when disabling bearer
*/
struct eth_bearer {
struct tipc_bearer *bearer;
struct net_device *dev;
struct packet_type tipc_packet_type;
+ struct work_struct setup;
struct work_struct cleanup;
};
static struct tipc_media eth_media_info;
static struct eth_bearer eth_bearers[MAX_ETH_BEARERS];
static int eth_started;
-static struct notifier_block notifier;
+
+static int recv_notification(struct notifier_block *nb, unsigned long evt,
+ void *dv);
+/*
+ * Network device notifier info
+ */
+static struct notifier_block notifier = {
+ .notifier_call = recv_notification,
+ .priority = 0
+};
/**
* eth_media_addr_set - initialize Ethernet media address structure
@@ -134,6 +145,17 @@ static int recv_msg(struct sk_buff *buf, struct net_device *dev,
}
/**
+ * setup_bearer - setup association between Ethernet bearer and interface
+ */
+static void setup_bearer(struct work_struct *work)
+{
+ struct eth_bearer *eb_ptr =
+ container_of(work, struct eth_bearer, setup);
+
+ dev_add_pack(&eb_ptr->tipc_packet_type);
+}
+
+/**
* enable_bearer - attach TIPC bearer to an Ethernet interface
*/
static int enable_bearer(struct tipc_bearer *tb_ptr)
@@ -173,7 +195,8 @@ static int enable_bearer(struct tipc_bearer *tb_ptr)
eb_ptr->tipc_packet_type.func = recv_msg;
eb_ptr->tipc_packet_type.af_packet_priv = eb_ptr;
INIT_LIST_HEAD(&(eb_ptr->tipc_packet_type.list));
- dev_add_pack(&eb_ptr->tipc_packet_type);
+ INIT_WORK(&eb_ptr->setup, setup_bearer);
+ schedule_work(&eb_ptr->setup);
/* Associate TIPC bearer with Ethernet bearer */
eb_ptr->bearer = tb_ptr;
@@ -357,8 +380,6 @@ int tipc_eth_media_start(void)
if (res)
return res;
- notifier.notifier_call = &recv_notification;
- notifier.priority = 0;
res = register_netdevice_notifier(&notifier);
if (!res)
eth_started = 1;
diff --git a/net/tipc/handler.c b/net/tipc/handler.c
index 7a52d3922f3c..111ff8300ae5 100644
--- a/net/tipc/handler.c
+++ b/net/tipc/handler.c
@@ -45,7 +45,7 @@ struct queue_item {
static struct kmem_cache *tipc_queue_item_cache;
static struct list_head signal_queue_head;
static DEFINE_SPINLOCK(qitem_lock);
-static int handler_enabled;
+static int handler_enabled __read_mostly;
static void process_signal_queue(unsigned long dummy);
diff --git a/net/tipc/link.c b/net/tipc/link.c
index 1c1e6151875e..a79c755cb417 100644
--- a/net/tipc/link.c
+++ b/net/tipc/link.c
@@ -210,9 +210,7 @@ static int link_name_validate(const char *name,
(z_local > 255) || (c_local > 4095) || (n_local > 4095) ||
(z_peer > 255) || (c_peer > 4095) || (n_peer > 4095) ||
(if_local_len <= 1) || (if_local_len > TIPC_MAX_IF_NAME) ||
- (if_peer_len <= 1) || (if_peer_len > TIPC_MAX_IF_NAME) ||
- (strspn(if_local, tipc_alphabet) != (if_local_len - 1)) ||
- (strspn(if_peer, tipc_alphabet) != (if_peer_len - 1)))
+ (if_peer_len <= 1) || (if_peer_len > TIPC_MAX_IF_NAME))
return 0;
/* return link name components, if necessary */
diff --git a/net/tipc/name_table.c b/net/tipc/name_table.c
index 360c478b0b53..46754779fd3d 100644
--- a/net/tipc/name_table.c
+++ b/net/tipc/name_table.c
@@ -41,7 +41,7 @@
#include "subscr.h"
#include "port.h"
-static int tipc_nametbl_size = 1024; /* must be a power of 2 */
+#define TIPC_NAMETBL_SIZE 1024 /* must be a power of 2 */
/**
* struct name_info - name sequence publication info
@@ -114,7 +114,7 @@ DEFINE_RWLOCK(tipc_nametbl_lock);
static int hash(int x)
{
- return x & (tipc_nametbl_size - 1);
+ return x & (TIPC_NAMETBL_SIZE - 1);
}
/**
@@ -667,9 +667,9 @@ struct publication *tipc_nametbl_publish(u32 type, u32 lower, u32 upper,
{
struct publication *publ;
- if (table.local_publ_count >= tipc_max_publications) {
+ if (table.local_publ_count >= TIPC_MAX_PUBLICATIONS) {
pr_warn("Publication failed, local publication limit reached (%u)\n",
- tipc_max_publications);
+ TIPC_MAX_PUBLICATIONS);
return NULL;
}
@@ -783,7 +783,7 @@ static int subseq_list(struct sub_seq *sseq, char *buf, int len, u32 depth,
if (!list_is_last(&publ->zone_list, &info->zone_list))
ret += tipc_snprintf(buf + ret, len - ret,
"\n%33s", " ");
- };
+ }
ret += tipc_snprintf(buf + ret, len - ret, "\n");
return ret;
@@ -871,7 +871,7 @@ static int nametbl_list(char *buf, int len, u32 depth_info,
ret += nametbl_header(buf, len, depth);
lowbound = 0;
upbound = ~0;
- for (i = 0; i < tipc_nametbl_size; i++) {
+ for (i = 0; i < TIPC_NAMETBL_SIZE; i++) {
seq_head = &table.types[i];
hlist_for_each_entry(seq, seq_node, seq_head, ns_list) {
ret += nameseq_list(seq, buf + ret, len - ret,
@@ -935,7 +935,7 @@ struct sk_buff *tipc_nametbl_get(const void *req_tlv_area, int req_tlv_space)
int tipc_nametbl_init(void)
{
- table.types = kcalloc(tipc_nametbl_size, sizeof(struct hlist_head),
+ table.types = kcalloc(TIPC_NAMETBL_SIZE, sizeof(struct hlist_head),
GFP_ATOMIC);
if (!table.types)
return -ENOMEM;
@@ -953,7 +953,7 @@ void tipc_nametbl_stop(void)
/* Verify name table is empty, then release it */
write_lock_bh(&tipc_nametbl_lock);
- for (i = 0; i < tipc_nametbl_size; i++) {
+ for (i = 0; i < TIPC_NAMETBL_SIZE; i++) {
if (hlist_empty(&table.types[i]))
continue;
pr_err("nametbl_stop(): orphaned hash chain detected\n");
diff --git a/net/tipc/net.c b/net/tipc/net.c
index 5b5cea259caf..7d305ecc09c2 100644
--- a/net/tipc/net.c
+++ b/net/tipc/net.c
@@ -171,7 +171,7 @@ void tipc_net_route_msg(struct sk_buff *buf)
tipc_link_send(buf, dnode, msg_link_selector(msg));
}
-int tipc_net_start(u32 addr)
+void tipc_net_start(u32 addr)
{
char addr_string[16];
@@ -187,7 +187,6 @@ int tipc_net_start(u32 addr)
pr_info("Started in network mode\n");
pr_info("Own node address %s, network identity %u\n",
tipc_addr_string_fill(addr_string, tipc_own_addr), tipc_net_id);
- return 0;
}
void tipc_net_stop(void)
diff --git a/net/tipc/net.h b/net/tipc/net.h
index 9eb4b9e220eb..079daadb3f72 100644
--- a/net/tipc/net.h
+++ b/net/tipc/net.h
@@ -41,7 +41,7 @@ extern rwlock_t tipc_net_lock;
void tipc_net_route_msg(struct sk_buff *buf);
-int tipc_net_start(u32 addr);
+void tipc_net_start(u32 addr);
void tipc_net_stop(void);
#endif
diff --git a/net/tipc/netlink.c b/net/tipc/netlink.c
index 47a839df27dc..6675914dc592 100644
--- a/net/tipc/netlink.c
+++ b/net/tipc/netlink.c
@@ -62,7 +62,7 @@ static int handle_cmd(struct sk_buff *skb, struct genl_info *info)
rep_nlh = nlmsg_hdr(rep_buf);
memcpy(rep_nlh, req_nlh, hdr_space);
rep_nlh->nlmsg_len = rep_buf->len;
- genlmsg_unicast(&init_net, rep_buf, NETLINK_CB(skb).pid);
+ genlmsg_unicast(&init_net, rep_buf, NETLINK_CB(skb).portid);
}
return 0;
diff --git a/net/tipc/socket.c b/net/tipc/socket.c
index 09dc5b97e079..fd5f042dbff4 100644
--- a/net/tipc/socket.c
+++ b/net/tipc/socket.c
@@ -220,6 +220,7 @@ static int tipc_create(struct net *net, struct socket *sock, int protocol,
sock_init_data(sock, sk);
sk->sk_backlog_rcv = backlog_rcv;
+ sk->sk_rcvbuf = TIPC_FLOW_CONTROL_WIN * 2 * TIPC_MAX_USER_MSG_SIZE * 2;
tipc_sk(sk)->p = tp_ptr;
tipc_sk(sk)->conn_timeout = CONN_TIMEOUT_DEFAULT;
diff --git a/net/tipc/subscr.c b/net/tipc/subscr.c
index 5ed5965eb0be..0f7d0d007e22 100644
--- a/net/tipc/subscr.c
+++ b/net/tipc/subscr.c
@@ -304,9 +304,9 @@ static struct tipc_subscription *subscr_subscribe(struct tipc_subscr *s,
}
/* Refuse subscription if global limit exceeded */
- if (atomic_read(&topsrv.subscription_count) >= tipc_max_subscriptions) {
+ if (atomic_read(&topsrv.subscription_count) >= TIPC_MAX_SUBSCRIPTIONS) {
pr_warn("Subscription rejected, limit reached (%u)\n",
- tipc_max_subscriptions);
+ TIPC_MAX_SUBSCRIPTIONS);
subscr_terminate(subscriber);
return NULL;
}
diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
index 79981d97bc9c..5b5c876c80e9 100644
--- a/net/unix/af_unix.c
+++ b/net/unix/af_unix.c
@@ -441,7 +441,7 @@ static int unix_release_sock(struct sock *sk, int embrion)
/* ---- Socket is dead now and most probably destroyed ---- */
/*
- * Fixme: BSD difference: In BSD all sockets connected to use get
+ * Fixme: BSD difference: In BSD all sockets connected to us get
* ECONNRESET and we die on the spot. In Linux we behave
* like files and pipes do and wait for the last
* dereference.
@@ -481,7 +481,6 @@ static int unix_listen(struct socket *sock, int backlog)
struct sock *sk = sock->sk;
struct unix_sock *u = unix_sk(sk);
struct pid *old_pid = NULL;
- const struct cred *old_cred = NULL;
err = -EOPNOTSUPP;
if (sock->type != SOCK_STREAM && sock->type != SOCK_SEQPACKET)
@@ -503,8 +502,6 @@ static int unix_listen(struct socket *sock, int backlog)
out_unlock:
unix_state_unlock(sk);
put_pid(old_pid);
- if (old_cred)
- put_cred(old_cred);
out:
return err;
}
@@ -823,6 +820,34 @@ fail:
return NULL;
}
+static int unix_mknod(const char *sun_path, umode_t mode, struct path *res)
+{
+ struct dentry *dentry;
+ struct path path;
+ int err = 0;
+ /*
+ * Get the parent directory, calculate the hash for last
+ * component.
+ */
+ dentry = kern_path_create(AT_FDCWD, sun_path, &path, 0);
+ err = PTR_ERR(dentry);
+ if (IS_ERR(dentry))
+ return err;
+
+ /*
+ * All right, let's create it.
+ */
+ err = security_path_mknod(&path, dentry, mode, 0);
+ if (!err) {
+ err = vfs_mknod(path.dentry->d_inode, dentry, mode, 0);
+ if (!err) {
+ res->mnt = mntget(path.mnt);
+ res->dentry = dget(dentry);
+ }
+ }
+ done_path_create(&path, dentry);
+ return err;
+}
static int unix_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
{
@@ -831,8 +856,6 @@ static int unix_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
struct unix_sock *u = unix_sk(sk);
struct sockaddr_un *sunaddr = (struct sockaddr_un *)uaddr;
char *sun_path = sunaddr->sun_path;
- struct dentry *dentry = NULL;
- struct path path;
int err;
unsigned int hash;
struct unix_address *addr;
@@ -869,43 +892,23 @@ static int unix_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
atomic_set(&addr->refcnt, 1);
if (sun_path[0]) {
- umode_t mode;
- err = 0;
- /*
- * Get the parent directory, calculate the hash for last
- * component.
- */
- dentry = kern_path_create(AT_FDCWD, sun_path, &path, 0);
- err = PTR_ERR(dentry);
- if (IS_ERR(dentry))
- goto out_mknod_parent;
-
- /*
- * All right, let's create it.
- */
- mode = S_IFSOCK |
+ struct path path;
+ umode_t mode = S_IFSOCK |
(SOCK_INODE(sock)->i_mode & ~current_umask());
- err = mnt_want_write(path.mnt);
- if (err)
- goto out_mknod_dput;
- err = security_path_mknod(&path, dentry, mode, 0);
- if (err)
- goto out_mknod_drop_write;
- err = vfs_mknod(path.dentry->d_inode, dentry, mode, 0);
-out_mknod_drop_write:
- mnt_drop_write(path.mnt);
- if (err)
- goto out_mknod_dput;
- mutex_unlock(&path.dentry->d_inode->i_mutex);
- dput(path.dentry);
- path.dentry = dentry;
-
+ err = unix_mknod(sun_path, mode, &path);
+ if (err) {
+ if (err == -EEXIST)
+ err = -EADDRINUSE;
+ unix_release_addr(addr);
+ goto out_up;
+ }
addr->hash = UNIX_HASH_SIZE;
- }
-
- spin_lock(&unix_table_lock);
-
- if (!sun_path[0]) {
+ hash = path.dentry->d_inode->i_ino & (UNIX_HASH_SIZE-1);
+ spin_lock(&unix_table_lock);
+ u->path = path;
+ list = &unix_socket_table[hash];
+ } else {
+ spin_lock(&unix_table_lock);
err = -EADDRINUSE;
if (__unix_find_socket_byname(net, sunaddr, addr_len,
sk->sk_type, hash)) {
@@ -914,9 +917,6 @@ out_mknod_drop_write:
}
list = &unix_socket_table[addr->hash];
- } else {
- list = &unix_socket_table[dentry->d_inode->i_ino & (UNIX_HASH_SIZE-1)];
- u->path = path;
}
err = 0;
@@ -930,16 +930,6 @@ out_up:
mutex_unlock(&u->readlock);
out:
return err;
-
-out_mknod_dput:
- dput(dentry);
- mutex_unlock(&path.dentry->d_inode->i_mutex);
- path_put(&path);
-out_mknod_parent:
- if (err == -EEXIST)
- err = -EADDRINUSE;
- unix_release_addr(addr);
- goto out_up;
}
static void unix_state_double_lock(struct sock *sk1, struct sock *sk2)
@@ -1457,7 +1447,7 @@ static int unix_dgram_sendmsg(struct kiocb *kiocb, struct socket *sock,
if (NULL == siocb->scm)
siocb->scm = &tmp_scm;
wait_for_unix_gc();
- err = scm_send(sock, msg, siocb->scm);
+ err = scm_send(sock, msg, siocb->scm, false);
if (err < 0)
return err;
@@ -1626,7 +1616,7 @@ static int unix_stream_sendmsg(struct kiocb *kiocb, struct socket *sock,
if (NULL == siocb->scm)
siocb->scm = &tmp_scm;
wait_for_unix_gc();
- err = scm_send(sock, msg, siocb->scm);
+ err = scm_send(sock, msg, siocb->scm, false);
if (err < 0)
return err;
@@ -2067,10 +2057,14 @@ static int unix_shutdown(struct socket *sock, int mode)
struct sock *sk = sock->sk;
struct sock *other;
- mode = (mode+1)&(RCV_SHUTDOWN|SEND_SHUTDOWN);
-
- if (!mode)
- return 0;
+ if (mode < SHUT_RD || mode > SHUT_RDWR)
+ return -EINVAL;
+ /* This maps:
+ * SHUT_RD (0) -> RCV_SHUTDOWN (1)
+ * SHUT_WR (1) -> SEND_SHUTDOWN (2)
+ * SHUT_RDWR (2) -> SHUTDOWN_MASK (3)
+ */
+ ++mode;
unix_state_lock(sk);
sk->sk_shutdown |= mode;
diff --git a/net/unix/diag.c b/net/unix/diag.c
index 750b13408449..06748f108a57 100644
--- a/net/unix/diag.c
+++ b/net/unix/diag.c
@@ -110,12 +110,12 @@ static int sk_diag_show_rqlen(struct sock *sk, struct sk_buff *nlskb)
}
static int sk_diag_fill(struct sock *sk, struct sk_buff *skb, struct unix_diag_req *req,
- u32 pid, u32 seq, u32 flags, int sk_ino)
+ u32 portid, u32 seq, u32 flags, int sk_ino)
{
struct nlmsghdr *nlh;
struct unix_diag_msg *rep;
- nlh = nlmsg_put(skb, pid, seq, SOCK_DIAG_BY_FAMILY, sizeof(*rep),
+ nlh = nlmsg_put(skb, portid, seq, SOCK_DIAG_BY_FAMILY, sizeof(*rep),
flags);
if (!nlh)
return -EMSGSIZE;
@@ -159,7 +159,7 @@ out_nlmsg_trim:
}
static int sk_diag_dump(struct sock *sk, struct sk_buff *skb, struct unix_diag_req *req,
- u32 pid, u32 seq, u32 flags)
+ u32 portid, u32 seq, u32 flags)
{
int sk_ino;
@@ -170,7 +170,7 @@ static int sk_diag_dump(struct sock *sk, struct sk_buff *skb, struct unix_diag_r
if (!sk_ino)
return 0;
- return sk_diag_fill(sk, skb, req, pid, seq, flags, sk_ino);
+ return sk_diag_fill(sk, skb, req, portid, seq, flags, sk_ino);
}
static int unix_diag_dump(struct sk_buff *skb, struct netlink_callback *cb)
@@ -200,7 +200,7 @@ static int unix_diag_dump(struct sk_buff *skb, struct netlink_callback *cb)
if (!(req->udiag_states & (1 << sk->sk_state)))
goto next;
if (sk_diag_dump(sk, skb, req,
- NETLINK_CB(cb->skb).pid,
+ NETLINK_CB(cb->skb).portid,
cb->nlh->nlmsg_seq,
NLM_F_MULTI) < 0)
goto done;
@@ -267,7 +267,7 @@ again:
if (!rep)
goto out;
- err = sk_diag_fill(sk, rep, req, NETLINK_CB(in_skb).pid,
+ err = sk_diag_fill(sk, rep, req, NETLINK_CB(in_skb).portid,
nlh->nlmsg_seq, 0, req->udiag_ino);
if (err < 0) {
nlmsg_free(rep);
@@ -277,7 +277,7 @@ again:
goto again;
}
- err = netlink_unicast(net->diag_nlsk, rep, NETLINK_CB(in_skb).pid,
+ err = netlink_unicast(net->diag_nlsk, rep, NETLINK_CB(in_skb).portid,
MSG_DONTWAIT);
if (err > 0)
err = 0;
diff --git a/net/wireless/chan.c b/net/wireless/chan.c
index d355f67d0cdd..2f876b9ee344 100644
--- a/net/wireless/chan.c
+++ b/net/wireless/chan.c
@@ -105,7 +105,7 @@ cfg80211_get_chan_state(struct wireless_dev *wdev,
ASSERT_WDEV_LOCK(wdev);
- if (!netif_running(wdev->netdev))
+ if (wdev->netdev && !netif_running(wdev->netdev))
return;
switch (wdev->iftype) {
@@ -143,6 +143,11 @@ cfg80211_get_chan_state(struct wireless_dev *wdev,
case NL80211_IFTYPE_WDS:
/* these interface types don't really have a channel */
return;
+ case NL80211_IFTYPE_P2P_DEVICE:
+ if (wdev->wiphy->features &
+ NL80211_FEATURE_P2P_DEVICE_NEEDS_CHANNEL)
+ *chanmode = CHAN_MODE_EXCLUSIVE;
+ return;
case NL80211_IFTYPE_UNSPECIFIED:
case NUM_NL80211_IFTYPES:
WARN_ON(1);
diff --git a/net/wireless/core.c b/net/wireless/core.c
index 31b40cc4a9c3..443d4d7deea2 100644
--- a/net/wireless/core.c
+++ b/net/wireless/core.c
@@ -230,9 +230,24 @@ static int cfg80211_rfkill_set_block(void *data, bool blocked)
rtnl_lock();
mutex_lock(&rdev->devlist_mtx);
- list_for_each_entry(wdev, &rdev->wdev_list, list)
- if (wdev->netdev)
+ list_for_each_entry(wdev, &rdev->wdev_list, list) {
+ if (wdev->netdev) {
dev_close(wdev->netdev);
+ continue;
+ }
+ /* otherwise, check iftype */
+ switch (wdev->iftype) {
+ case NL80211_IFTYPE_P2P_DEVICE:
+ if (!wdev->p2p_started)
+ break;
+ rdev->ops->stop_p2p_device(&rdev->wiphy, wdev);
+ wdev->p2p_started = false;
+ rdev->opencount--;
+ break;
+ default:
+ break;
+ }
+ }
mutex_unlock(&rdev->devlist_mtx);
rtnl_unlock();
@@ -407,6 +422,11 @@ static int wiphy_verify_combinations(struct wiphy *wiphy)
if (WARN_ON(wiphy->software_iftypes & types))
return -EINVAL;
+ /* Only a single P2P_DEVICE can be allowed */
+ if (WARN_ON(types & BIT(NL80211_IFTYPE_P2P_DEVICE) &&
+ c->limits[j].max > 1))
+ return -EINVAL;
+
cnt += c->limits[j].max;
/*
* Don't advertise an unsupported type
@@ -734,6 +754,35 @@ static void wdev_cleanup_work(struct work_struct *work)
dev_put(wdev->netdev);
}
+void cfg80211_unregister_wdev(struct wireless_dev *wdev)
+{
+ struct cfg80211_registered_device *rdev = wiphy_to_dev(wdev->wiphy);
+
+ ASSERT_RTNL();
+
+ if (WARN_ON(wdev->netdev))
+ return;
+
+ mutex_lock(&rdev->devlist_mtx);
+ list_del_rcu(&wdev->list);
+ rdev->devlist_generation++;
+
+ switch (wdev->iftype) {
+ case NL80211_IFTYPE_P2P_DEVICE:
+ if (!wdev->p2p_started)
+ break;
+ rdev->ops->stop_p2p_device(&rdev->wiphy, wdev);
+ wdev->p2p_started = false;
+ rdev->opencount--;
+ break;
+ default:
+ WARN_ON_ONCE(1);
+ break;
+ }
+ mutex_unlock(&rdev->devlist_mtx);
+}
+EXPORT_SYMBOL(cfg80211_unregister_wdev);
+
static struct device_type wiphy_type = {
.name = "wlan",
};
@@ -952,6 +1001,11 @@ static int cfg80211_netdev_notifier_call(struct notifier_block *nb,
*/
synchronize_rcu();
INIT_LIST_HEAD(&wdev->list);
+ /*
+ * Ensure that all events have been processed and
+ * freed.
+ */
+ cfg80211_process_wdev_events(wdev);
break;
case NETDEV_PRE_UP:
if (!(wdev->wiphy->interface_modes & BIT(wdev->iftype)))
diff --git a/net/wireless/core.h b/net/wireless/core.h
index 5206c6844fd7..a343be4a52bd 100644
--- a/net/wireless/core.h
+++ b/net/wireless/core.h
@@ -55,7 +55,7 @@ struct cfg80211_registered_device {
int opencount; /* also protected by devlist_mtx */
wait_queue_head_t dev_wait;
- u32 ap_beacons_nlpid;
+ u32 ap_beacons_nlportid;
/* protected by RTNL only */
int num_running_ifaces;
@@ -426,6 +426,7 @@ int cfg80211_change_iface(struct cfg80211_registered_device *rdev,
struct net_device *dev, enum nl80211_iftype ntype,
u32 *flags, struct vif_params *params);
void cfg80211_process_rdev_events(struct cfg80211_registered_device *rdev);
+void cfg80211_process_wdev_events(struct wireless_dev *wdev);
int cfg80211_can_use_iftype_chan(struct cfg80211_registered_device *rdev,
struct wireless_dev *wdev,
diff --git a/net/wireless/mlme.c b/net/wireless/mlme.c
index 1cdb1d5e6b0f..8016fee0752b 100644
--- a/net/wireless/mlme.c
+++ b/net/wireless/mlme.c
@@ -612,10 +612,21 @@ void cfg80211_del_sta(struct net_device *dev, const u8 *mac_addr, gfp_t gfp)
}
EXPORT_SYMBOL(cfg80211_del_sta);
+void cfg80211_conn_failed(struct net_device *dev, const u8 *mac_addr,
+ enum nl80211_connect_failed_reason reason,
+ gfp_t gfp)
+{
+ struct wiphy *wiphy = dev->ieee80211_ptr->wiphy;
+ struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy);
+
+ nl80211_send_conn_failed_event(rdev, dev, mac_addr, reason, gfp);
+}
+EXPORT_SYMBOL(cfg80211_conn_failed);
+
struct cfg80211_mgmt_registration {
struct list_head list;
- u32 nlpid;
+ u32 nlportid;
int match_len;
@@ -624,7 +635,7 @@ struct cfg80211_mgmt_registration {
u8 match[];
};
-int cfg80211_mlme_register_mgmt(struct wireless_dev *wdev, u32 snd_pid,
+int cfg80211_mlme_register_mgmt(struct wireless_dev *wdev, u32 snd_portid,
u16 frame_type, const u8 *match_data,
int match_len)
{
@@ -672,7 +683,7 @@ int cfg80211_mlme_register_mgmt(struct wireless_dev *wdev, u32 snd_pid,
memcpy(nreg->match, match_data, match_len);
nreg->match_len = match_len;
- nreg->nlpid = snd_pid;
+ nreg->nlportid = snd_portid;
nreg->frame_type = cpu_to_le16(frame_type);
list_add(&nreg->list, &wdev->mgmt_registrations);
@@ -685,7 +696,7 @@ int cfg80211_mlme_register_mgmt(struct wireless_dev *wdev, u32 snd_pid,
return err;
}
-void cfg80211_mlme_unregister_socket(struct wireless_dev *wdev, u32 nlpid)
+void cfg80211_mlme_unregister_socket(struct wireless_dev *wdev, u32 nlportid)
{
struct wiphy *wiphy = wdev->wiphy;
struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy);
@@ -694,7 +705,7 @@ void cfg80211_mlme_unregister_socket(struct wireless_dev *wdev, u32 nlpid)
spin_lock_bh(&wdev->mgmt_registrations_lock);
list_for_each_entry_safe(reg, tmp, &wdev->mgmt_registrations, list) {
- if (reg->nlpid != nlpid)
+ if (reg->nlportid != nlportid)
continue;
if (rdev->ops->mgmt_frame_register) {
@@ -710,8 +721,8 @@ void cfg80211_mlme_unregister_socket(struct wireless_dev *wdev, u32 nlpid)
spin_unlock_bh(&wdev->mgmt_registrations_lock);
- if (nlpid == wdev->ap_unexpected_nlpid)
- wdev->ap_unexpected_nlpid = 0;
+ if (nlportid == wdev->ap_unexpected_nlportid)
+ wdev->ap_unexpected_nlportid = 0;
}
void cfg80211_mlme_purge_registrations(struct wireless_dev *wdev)
@@ -736,7 +747,6 @@ int cfg80211_mlme_mgmt_tx(struct cfg80211_registered_device *rdev,
const u8 *buf, size_t len, bool no_cck,
bool dont_wait_for_ack, u64 *cookie)
{
- struct net_device *dev = wdev->netdev;
const struct ieee80211_mgmt *mgmt;
u16 stype;
@@ -796,7 +806,7 @@ int cfg80211_mlme_mgmt_tx(struct cfg80211_registered_device *rdev,
case NL80211_IFTYPE_AP:
case NL80211_IFTYPE_P2P_GO:
case NL80211_IFTYPE_AP_VLAN:
- if (!ether_addr_equal(mgmt->bssid, dev->dev_addr))
+ if (!ether_addr_equal(mgmt->bssid, wdev_address(wdev)))
err = -EINVAL;
break;
case NL80211_IFTYPE_MESH_POINT:
@@ -809,6 +819,11 @@ int cfg80211_mlme_mgmt_tx(struct cfg80211_registered_device *rdev,
* cfg80211 doesn't track the stations
*/
break;
+ case NL80211_IFTYPE_P2P_DEVICE:
+ /*
+ * fall through, P2P device only supports
+ * public action frames
+ */
default:
err = -EOPNOTSUPP;
break;
@@ -819,7 +834,7 @@ int cfg80211_mlme_mgmt_tx(struct cfg80211_registered_device *rdev,
return err;
}
- if (!ether_addr_equal(mgmt->sa, dev->dev_addr))
+ if (!ether_addr_equal(mgmt->sa, wdev_address(wdev)))
return -EINVAL;
/* Transmit the Action frame as requested by user space */
@@ -868,7 +883,7 @@ bool cfg80211_rx_mgmt(struct wireless_dev *wdev, int freq, int sig_mbm,
/* found match! */
/* Indicate the received Action frame to user space */
- if (nl80211_send_mgmt(rdev, wdev, reg->nlpid,
+ if (nl80211_send_mgmt(rdev, wdev, reg->nlportid,
freq, sig_mbm,
buf, len, gfp))
continue;
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index 1e37dbf00cb3..0418a6d5c1a6 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -496,11 +496,11 @@ static bool is_valid_ie_attr(const struct nlattr *attr)
}
/* message building helper */
-static inline void *nl80211hdr_put(struct sk_buff *skb, u32 pid, u32 seq,
+static inline void *nl80211hdr_put(struct sk_buff *skb, u32 portid, u32 seq,
int flags, u8 cmd)
{
/* since there is no private header just add the generic one */
- return genlmsg_put(skb, pid, seq, &nl80211_fam, flags, cmd);
+ return genlmsg_put(skb, portid, seq, &nl80211_fam, flags, cmd);
}
static int nl80211_msg_put_channel(struct sk_buff *msg,
@@ -851,7 +851,7 @@ nla_put_failure:
return -ENOBUFS;
}
-static int nl80211_send_wiphy(struct sk_buff *msg, u32 pid, u32 seq, int flags,
+static int nl80211_send_wiphy(struct sk_buff *msg, u32 portid, u32 seq, int flags,
struct cfg80211_registered_device *dev)
{
void *hdr;
@@ -866,7 +866,7 @@ static int nl80211_send_wiphy(struct sk_buff *msg, u32 pid, u32 seq, int flags,
const struct ieee80211_txrx_stypes *mgmt_stypes =
dev->wiphy.mgmt_stypes;
- hdr = nl80211hdr_put(msg, pid, seq, flags, NL80211_CMD_NEW_WIPHY);
+ hdr = nl80211hdr_put(msg, portid, seq, flags, NL80211_CMD_NEW_WIPHY);
if (!hdr)
return -1;
@@ -1100,6 +1100,7 @@ static int nl80211_send_wiphy(struct sk_buff *msg, u32 pid, u32 seq, int flags,
if (nla_put_u32(msg, i, NL80211_CMD_REGISTER_BEACONS))
goto nla_put_failure;
}
+ CMD(start_p2p_device, START_P2P_DEVICE);
#ifdef CONFIG_NL80211_TESTMODE
CMD(testmode_cmd, TESTMODE);
@@ -1266,7 +1267,7 @@ static int nl80211_dump_wiphy(struct sk_buff *skb, struct netlink_callback *cb)
continue;
if (++idx <= start)
continue;
- if (nl80211_send_wiphy(skb, NETLINK_CB(cb->skb).pid,
+ if (nl80211_send_wiphy(skb, NETLINK_CB(cb->skb).portid,
cb->nlh->nlmsg_seq, NLM_F_MULTI,
dev) < 0) {
idx--;
@@ -1289,7 +1290,7 @@ static int nl80211_get_wiphy(struct sk_buff *skb, struct genl_info *info)
if (!msg)
return -ENOMEM;
- if (nl80211_send_wiphy(msg, info->snd_pid, info->snd_seq, 0, dev) < 0) {
+ if (nl80211_send_wiphy(msg, info->snd_portid, info->snd_seq, 0, dev) < 0) {
nlmsg_free(msg);
return -ENOBUFS;
}
@@ -1735,26 +1736,26 @@ static inline u64 wdev_id(struct wireless_dev *wdev)
((u64)wiphy_to_dev(wdev->wiphy)->wiphy_idx << 32);
}
-static int nl80211_send_iface(struct sk_buff *msg, u32 pid, u32 seq, int flags,
+static int nl80211_send_iface(struct sk_buff *msg, u32 portid, u32 seq, int flags,
struct cfg80211_registered_device *rdev,
struct wireless_dev *wdev)
{
struct net_device *dev = wdev->netdev;
void *hdr;
- hdr = nl80211hdr_put(msg, pid, seq, flags, NL80211_CMD_NEW_INTERFACE);
+ hdr = nl80211hdr_put(msg, portid, seq, flags, NL80211_CMD_NEW_INTERFACE);
if (!hdr)
return -1;
if (dev &&
(nla_put_u32(msg, NL80211_ATTR_IFINDEX, dev->ifindex) ||
- nla_put_string(msg, NL80211_ATTR_IFNAME, dev->name) ||
- nla_put(msg, NL80211_ATTR_MAC, ETH_ALEN, dev->dev_addr)))
+ nla_put_string(msg, NL80211_ATTR_IFNAME, dev->name)))
goto nla_put_failure;
if (nla_put_u32(msg, NL80211_ATTR_WIPHY, rdev->wiphy_idx) ||
nla_put_u32(msg, NL80211_ATTR_IFTYPE, wdev->iftype) ||
nla_put_u64(msg, NL80211_ATTR_WDEV, wdev_id(wdev)) ||
+ nla_put(msg, NL80211_ATTR_MAC, ETH_ALEN, wdev_address(wdev)) ||
nla_put_u32(msg, NL80211_ATTR_GENERATION,
rdev->devlist_generation ^
(cfg80211_rdev_list_generation << 2)))
@@ -1806,7 +1807,7 @@ static int nl80211_dump_interface(struct sk_buff *skb, struct netlink_callback *
if_idx++;
continue;
}
- if (nl80211_send_iface(skb, NETLINK_CB(cb->skb).pid,
+ if (nl80211_send_iface(skb, NETLINK_CB(cb->skb).portid,
cb->nlh->nlmsg_seq, NLM_F_MULTI,
rdev, wdev) < 0) {
mutex_unlock(&rdev->devlist_mtx);
@@ -1837,7 +1838,7 @@ static int nl80211_get_interface(struct sk_buff *skb, struct genl_info *info)
if (!msg)
return -ENOMEM;
- if (nl80211_send_iface(msg, info->snd_pid, info->snd_seq, 0,
+ if (nl80211_send_iface(msg, info->snd_portid, info->snd_seq, 0,
dev, wdev) < 0) {
nlmsg_free(msg);
return -ENOBUFS;
@@ -2021,8 +2022,10 @@ static int nl80211_new_interface(struct sk_buff *skb, struct genl_info *info)
return PTR_ERR(wdev);
}
- if (type == NL80211_IFTYPE_MESH_POINT &&
- info->attrs[NL80211_ATTR_MESH_ID]) {
+ switch (type) {
+ case NL80211_IFTYPE_MESH_POINT:
+ if (!info->attrs[NL80211_ATTR_MESH_ID])
+ break;
wdev_lock(wdev);
BUILD_BUG_ON(IEEE80211_MAX_SSID_LEN !=
IEEE80211_MAX_MESH_ID_LEN);
@@ -2031,9 +2034,29 @@ static int nl80211_new_interface(struct sk_buff *skb, struct genl_info *info)
memcpy(wdev->ssid, nla_data(info->attrs[NL80211_ATTR_MESH_ID]),
wdev->mesh_id_up_len);
wdev_unlock(wdev);
+ break;
+ case NL80211_IFTYPE_P2P_DEVICE:
+ /*
+ * P2P Device doesn't have a netdev, so doesn't go
+ * through the netdev notifier and must be added here
+ */
+ mutex_init(&wdev->mtx);
+ INIT_LIST_HEAD(&wdev->event_list);
+ spin_lock_init(&wdev->event_lock);
+ INIT_LIST_HEAD(&wdev->mgmt_registrations);
+ spin_lock_init(&wdev->mgmt_registrations_lock);
+
+ mutex_lock(&rdev->devlist_mtx);
+ wdev->identifier = ++rdev->wdev_id;
+ list_add_rcu(&wdev->list, &rdev->wdev_list);
+ rdev->devlist_generation++;
+ mutex_unlock(&rdev->devlist_mtx);
+ break;
+ default:
+ break;
}
- if (nl80211_send_iface(msg, info->snd_pid, info->snd_seq, 0,
+ if (nl80211_send_iface(msg, info->snd_portid, info->snd_seq, 0,
rdev, wdev) < 0) {
nlmsg_free(msg);
return -ENOBUFS;
@@ -2168,7 +2191,7 @@ static int nl80211_get_key(struct sk_buff *skb, struct genl_info *info)
if (!msg)
return -ENOMEM;
- hdr = nl80211hdr_put(msg, info->snd_pid, info->snd_seq, 0,
+ hdr = nl80211hdr_put(msg, info->snd_portid, info->snd_seq, 0,
NL80211_CMD_NEW_KEY);
if (IS_ERR(hdr))
return PTR_ERR(hdr);
@@ -2746,7 +2769,7 @@ nla_put_failure:
return false;
}
-static int nl80211_send_station(struct sk_buff *msg, u32 pid, u32 seq,
+static int nl80211_send_station(struct sk_buff *msg, u32 portid, u32 seq,
int flags,
struct cfg80211_registered_device *rdev,
struct net_device *dev,
@@ -2755,7 +2778,7 @@ static int nl80211_send_station(struct sk_buff *msg, u32 pid, u32 seq,
void *hdr;
struct nlattr *sinfoattr, *bss_param;
- hdr = nl80211hdr_put(msg, pid, seq, flags, NL80211_CMD_NEW_STATION);
+ hdr = nl80211hdr_put(msg, portid, seq, flags, NL80211_CMD_NEW_STATION);
if (!hdr)
return -1;
@@ -2908,7 +2931,7 @@ static int nl80211_dump_station(struct sk_buff *skb,
goto out_err;
if (nl80211_send_station(skb,
- NETLINK_CB(cb->skb).pid,
+ NETLINK_CB(cb->skb).portid,
cb->nlh->nlmsg_seq, NLM_F_MULTI,
dev, netdev, mac_addr,
&sinfo) < 0)
@@ -2954,7 +2977,7 @@ static int nl80211_get_station(struct sk_buff *skb, struct genl_info *info)
if (!msg)
return -ENOMEM;
- if (nl80211_send_station(msg, info->snd_pid, info->snd_seq, 0,
+ if (nl80211_send_station(msg, info->snd_portid, info->snd_seq, 0,
rdev, dev, mac_addr, &sinfo) < 0) {
nlmsg_free(msg);
return -ENOBUFS;
@@ -3280,7 +3303,7 @@ static int nl80211_del_station(struct sk_buff *skb, struct genl_info *info)
return rdev->ops->del_station(&rdev->wiphy, dev, mac_addr);
}
-static int nl80211_send_mpath(struct sk_buff *msg, u32 pid, u32 seq,
+static int nl80211_send_mpath(struct sk_buff *msg, u32 portid, u32 seq,
int flags, struct net_device *dev,
u8 *dst, u8 *next_hop,
struct mpath_info *pinfo)
@@ -3288,7 +3311,7 @@ static int nl80211_send_mpath(struct sk_buff *msg, u32 pid, u32 seq,
void *hdr;
struct nlattr *pinfoattr;
- hdr = nl80211hdr_put(msg, pid, seq, flags, NL80211_CMD_NEW_STATION);
+ hdr = nl80211hdr_put(msg, portid, seq, flags, NL80211_CMD_NEW_STATION);
if (!hdr)
return -1;
@@ -3366,7 +3389,7 @@ static int nl80211_dump_mpath(struct sk_buff *skb,
if (err)
goto out_err;
- if (nl80211_send_mpath(skb, NETLINK_CB(cb->skb).pid,
+ if (nl80211_send_mpath(skb, NETLINK_CB(cb->skb).portid,
cb->nlh->nlmsg_seq, NLM_F_MULTI,
netdev, dst, next_hop,
&pinfo) < 0)
@@ -3415,7 +3438,7 @@ static int nl80211_get_mpath(struct sk_buff *skb, struct genl_info *info)
if (!msg)
return -ENOMEM;
- if (nl80211_send_mpath(msg, info->snd_pid, info->snd_seq, 0,
+ if (nl80211_send_mpath(msg, info->snd_portid, info->snd_seq, 0,
dev, dst, next_hop, &pinfo) < 0) {
nlmsg_free(msg);
return -ENOBUFS;
@@ -3656,7 +3679,7 @@ static int nl80211_get_mesh_config(struct sk_buff *skb,
msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
if (!msg)
return -ENOMEM;
- hdr = nl80211hdr_put(msg, info->snd_pid, info->snd_seq, 0,
+ hdr = nl80211hdr_put(msg, info->snd_portid, info->snd_seq, 0,
NL80211_CMD_GET_MESH_CONFIG);
if (!hdr)
goto out;
@@ -3975,7 +3998,7 @@ static int nl80211_get_reg(struct sk_buff *skb, struct genl_info *info)
goto out;
}
- hdr = nl80211hdr_put(msg, info->snd_pid, info->snd_seq, 0,
+ hdr = nl80211hdr_put(msg, info->snd_portid, info->snd_seq, 0,
NL80211_CMD_GET_REG);
if (!hdr)
goto put_failure;
@@ -4593,7 +4616,7 @@ static int nl80211_send_bss(struct sk_buff *msg, struct netlink_callback *cb,
ASSERT_WDEV_LOCK(wdev);
- hdr = nl80211hdr_put(msg, NETLINK_CB(cb->skb).pid, seq, flags,
+ hdr = nl80211hdr_put(msg, NETLINK_CB(cb->skb).portid, seq, flags,
NL80211_CMD_NEW_SCAN_RESULTS);
if (!hdr)
return -1;
@@ -4712,14 +4735,14 @@ static int nl80211_dump_scan(struct sk_buff *skb,
return skb->len;
}
-static int nl80211_send_survey(struct sk_buff *msg, u32 pid, u32 seq,
+static int nl80211_send_survey(struct sk_buff *msg, u32 portid, u32 seq,
int flags, struct net_device *dev,
struct survey_info *survey)
{
void *hdr;
struct nlattr *infoattr;
- hdr = nl80211hdr_put(msg, pid, seq, flags,
+ hdr = nl80211hdr_put(msg, portid, seq, flags,
NL80211_CMD_NEW_SURVEY_RESULTS);
if (!hdr)
return -ENOMEM;
@@ -4813,7 +4836,7 @@ static int nl80211_dump_survey(struct sk_buff *skb,
}
if (nl80211_send_survey(skb,
- NETLINK_CB(cb->skb).pid,
+ NETLINK_CB(cb->skb).portid,
cb->nlh->nlmsg_seq, NLM_F_MULTI,
netdev,
&survey) < 0)
@@ -5428,7 +5451,7 @@ static int nl80211_testmode_dump(struct sk_buff *skb,
}
while (1) {
- void *hdr = nl80211hdr_put(skb, NETLINK_CB(cb->skb).pid,
+ void *hdr = nl80211hdr_put(skb, NETLINK_CB(cb->skb).portid,
cb->nlh->nlmsg_seq, NLM_F_MULTI,
NL80211_CMD_TESTMODE);
struct nlattr *tmdata;
@@ -5468,7 +5491,7 @@ static int nl80211_testmode_dump(struct sk_buff *skb,
static struct sk_buff *
__cfg80211_testmode_alloc_skb(struct cfg80211_registered_device *rdev,
- int approxlen, u32 pid, u32 seq, gfp_t gfp)
+ int approxlen, u32 portid, u32 seq, gfp_t gfp)
{
struct sk_buff *skb;
void *hdr;
@@ -5478,7 +5501,7 @@ __cfg80211_testmode_alloc_skb(struct cfg80211_registered_device *rdev,
if (!skb)
return NULL;
- hdr = nl80211hdr_put(skb, pid, seq, 0, NL80211_CMD_TESTMODE);
+ hdr = nl80211hdr_put(skb, portid, seq, 0, NL80211_CMD_TESTMODE);
if (!hdr) {
kfree_skb(skb);
return NULL;
@@ -5508,7 +5531,7 @@ struct sk_buff *cfg80211_testmode_alloc_reply_skb(struct wiphy *wiphy,
return NULL;
return __cfg80211_testmode_alloc_skb(rdev, approxlen,
- rdev->testmode_info->snd_pid,
+ rdev->testmode_info->snd_portid,
rdev->testmode_info->snd_seq,
GFP_KERNEL);
}
@@ -5846,7 +5869,7 @@ static int nl80211_remain_on_channel(struct sk_buff *skb,
if (!msg)
return -ENOMEM;
- hdr = nl80211hdr_put(msg, info->snd_pid, info->snd_seq, 0,
+ hdr = nl80211hdr_put(msg, info->snd_portid, info->snd_seq, 0,
NL80211_CMD_REMAIN_ON_CHANNEL);
if (IS_ERR(hdr)) {
@@ -6055,6 +6078,7 @@ static int nl80211_register_mgmt(struct sk_buff *skb, struct genl_info *info)
case NL80211_IFTYPE_AP_VLAN:
case NL80211_IFTYPE_MESH_POINT:
case NL80211_IFTYPE_P2P_GO:
+ case NL80211_IFTYPE_P2P_DEVICE:
break;
default:
return -EOPNOTSUPP;
@@ -6064,7 +6088,7 @@ static int nl80211_register_mgmt(struct sk_buff *skb, struct genl_info *info)
if (!rdev->ops->mgmt_tx)
return -EOPNOTSUPP;
- return cfg80211_mlme_register_mgmt(wdev, info->snd_pid, frame_type,
+ return cfg80211_mlme_register_mgmt(wdev, info->snd_portid, frame_type,
nla_data(info->attrs[NL80211_ATTR_FRAME_MATCH]),
nla_len(info->attrs[NL80211_ATTR_FRAME_MATCH]));
}
@@ -6101,6 +6125,7 @@ static int nl80211_tx_mgmt(struct sk_buff *skb, struct genl_info *info)
case NL80211_IFTYPE_AP_VLAN:
case NL80211_IFTYPE_MESH_POINT:
case NL80211_IFTYPE_P2P_GO:
+ case NL80211_IFTYPE_P2P_DEVICE:
break;
default:
return -EOPNOTSUPP;
@@ -6144,7 +6169,7 @@ static int nl80211_tx_mgmt(struct sk_buff *skb, struct genl_info *info)
if (!msg)
return -ENOMEM;
- hdr = nl80211hdr_put(msg, info->snd_pid, info->snd_seq, 0,
+ hdr = nl80211hdr_put(msg, info->snd_portid, info->snd_seq, 0,
NL80211_CMD_FRAME);
if (IS_ERR(hdr)) {
@@ -6197,6 +6222,7 @@ static int nl80211_tx_mgmt_cancel_wait(struct sk_buff *skb, struct genl_info *in
case NL80211_IFTYPE_AP:
case NL80211_IFTYPE_AP_VLAN:
case NL80211_IFTYPE_P2P_GO:
+ case NL80211_IFTYPE_P2P_DEVICE:
break;
default:
return -EOPNOTSUPP;
@@ -6260,7 +6286,7 @@ static int nl80211_get_power_save(struct sk_buff *skb, struct genl_info *info)
if (!msg)
return -ENOMEM;
- hdr = nl80211hdr_put(msg, info->snd_pid, info->snd_seq, 0,
+ hdr = nl80211hdr_put(msg, info->snd_portid, info->snd_seq, 0,
NL80211_CMD_GET_POWER_SAVE);
if (!hdr) {
err = -ENOBUFS;
@@ -6462,7 +6488,7 @@ static int nl80211_get_wowlan(struct sk_buff *skb, struct genl_info *info)
if (!msg)
return -ENOMEM;
- hdr = nl80211hdr_put(msg, info->snd_pid, info->snd_seq, 0,
+ hdr = nl80211hdr_put(msg, info->snd_portid, info->snd_seq, 0,
NL80211_CMD_GET_WOWLAN);
if (!hdr)
goto nla_put_failure;
@@ -6736,10 +6762,10 @@ static int nl80211_register_unexpected_frame(struct sk_buff *skb,
wdev->iftype != NL80211_IFTYPE_P2P_GO)
return -EINVAL;
- if (wdev->ap_unexpected_nlpid)
+ if (wdev->ap_unexpected_nlportid)
return -EBUSY;
- wdev->ap_unexpected_nlpid = info->snd_pid;
+ wdev->ap_unexpected_nlportid = info->snd_portid;
return 0;
}
@@ -6769,7 +6795,7 @@ static int nl80211_probe_client(struct sk_buff *skb,
if (!msg)
return -ENOMEM;
- hdr = nl80211hdr_put(msg, info->snd_pid, info->snd_seq, 0,
+ hdr = nl80211hdr_put(msg, info->snd_portid, info->snd_seq, 0,
NL80211_CMD_PROBE_CLIENT);
if (IS_ERR(hdr)) {
@@ -6804,10 +6830,72 @@ static int nl80211_register_beacons(struct sk_buff *skb, struct genl_info *info)
if (!(rdev->wiphy.flags & WIPHY_FLAG_REPORTS_OBSS))
return -EOPNOTSUPP;
- if (rdev->ap_beacons_nlpid)
+ if (rdev->ap_beacons_nlportid)
return -EBUSY;
- rdev->ap_beacons_nlpid = info->snd_pid;
+ rdev->ap_beacons_nlportid = info->snd_portid;
+
+ return 0;
+}
+
+static int nl80211_start_p2p_device(struct sk_buff *skb, struct genl_info *info)
+{
+ struct cfg80211_registered_device *rdev = info->user_ptr[0];
+ struct wireless_dev *wdev = info->user_ptr[1];
+ int err;
+
+ if (!rdev->ops->start_p2p_device)
+ return -EOPNOTSUPP;
+
+ if (wdev->iftype != NL80211_IFTYPE_P2P_DEVICE)
+ return -EOPNOTSUPP;
+
+ if (wdev->p2p_started)
+ return 0;
+
+ mutex_lock(&rdev->devlist_mtx);
+ err = cfg80211_can_add_interface(rdev, wdev->iftype);
+ mutex_unlock(&rdev->devlist_mtx);
+ if (err)
+ return err;
+
+ err = rdev->ops->start_p2p_device(&rdev->wiphy, wdev);
+ if (err)
+ return err;
+
+ wdev->p2p_started = true;
+ mutex_lock(&rdev->devlist_mtx);
+ rdev->opencount++;
+ mutex_unlock(&rdev->devlist_mtx);
+
+ return 0;
+}
+
+static int nl80211_stop_p2p_device(struct sk_buff *skb, struct genl_info *info)
+{
+ struct cfg80211_registered_device *rdev = info->user_ptr[0];
+ struct wireless_dev *wdev = info->user_ptr[1];
+
+ if (wdev->iftype != NL80211_IFTYPE_P2P_DEVICE)
+ return -EOPNOTSUPP;
+
+ if (!rdev->ops->stop_p2p_device)
+ return -EOPNOTSUPP;
+
+ if (!wdev->p2p_started)
+ return 0;
+
+ rdev->ops->stop_p2p_device(&rdev->wiphy, wdev);
+ wdev->p2p_started = false;
+
+ mutex_lock(&rdev->devlist_mtx);
+ rdev->opencount--;
+ mutex_unlock(&rdev->devlist_mtx);
+
+ if (WARN_ON(rdev->scan_req && rdev->scan_req->wdev == wdev)) {
+ rdev->scan_req->aborted = true;
+ ___cfg80211_scan_done(rdev, true);
+ }
return 0;
}
@@ -6819,7 +6907,7 @@ static int nl80211_register_beacons(struct sk_buff *skb, struct genl_info *info)
#define NL80211_FLAG_NEED_NETDEV_UP (NL80211_FLAG_NEED_NETDEV |\
NL80211_FLAG_CHECK_NETDEV_UP)
#define NL80211_FLAG_NEED_WDEV 0x10
-/* If a netdev is associated, it must be UP */
+/* If a netdev is associated, it must be UP, P2P must be started */
#define NL80211_FLAG_NEED_WDEV_UP (NL80211_FLAG_NEED_WDEV |\
NL80211_FLAG_CHECK_NETDEV_UP)
@@ -6880,6 +6968,13 @@ static int nl80211_pre_doit(struct genl_ops *ops, struct sk_buff *skb,
}
dev_hold(dev);
+ } else if (ops->internal_flags & NL80211_FLAG_CHECK_NETDEV_UP) {
+ if (!wdev->p2p_started) {
+ mutex_unlock(&cfg80211_mutex);
+ if (rtnl)
+ rtnl_unlock();
+ return -ENETDOWN;
+ }
}
cfg80211_lock_rdev(rdev);
@@ -7441,7 +7536,22 @@ static struct genl_ops nl80211_ops[] = {
.internal_flags = NL80211_FLAG_NEED_NETDEV |
NL80211_FLAG_NEED_RTNL,
},
-
+ {
+ .cmd = NL80211_CMD_START_P2P_DEVICE,
+ .doit = nl80211_start_p2p_device,
+ .policy = nl80211_policy,
+ .flags = GENL_ADMIN_PERM,
+ .internal_flags = NL80211_FLAG_NEED_WDEV |
+ NL80211_FLAG_NEED_RTNL,
+ },
+ {
+ .cmd = NL80211_CMD_STOP_P2P_DEVICE,
+ .doit = nl80211_stop_p2p_device,
+ .policy = nl80211_policy,
+ .flags = GENL_ADMIN_PERM,
+ .internal_flags = NL80211_FLAG_NEED_WDEV_UP |
+ NL80211_FLAG_NEED_RTNL,
+ },
};
static struct genl_multicast_group nl80211_mlme_mcgrp = {
@@ -7520,12 +7630,12 @@ static int nl80211_add_scan_req(struct sk_buff *msg,
static int nl80211_send_scan_msg(struct sk_buff *msg,
struct cfg80211_registered_device *rdev,
struct wireless_dev *wdev,
- u32 pid, u32 seq, int flags,
+ u32 portid, u32 seq, int flags,
u32 cmd)
{
void *hdr;
- hdr = nl80211hdr_put(msg, pid, seq, flags, cmd);
+ hdr = nl80211hdr_put(msg, portid, seq, flags, cmd);
if (!hdr)
return -1;
@@ -7549,11 +7659,11 @@ static int
nl80211_send_sched_scan_msg(struct sk_buff *msg,
struct cfg80211_registered_device *rdev,
struct net_device *netdev,
- u32 pid, u32 seq, int flags, u32 cmd)
+ u32 portid, u32 seq, int flags, u32 cmd)
{
void *hdr;
- hdr = nl80211hdr_put(msg, pid, seq, flags, cmd);
+ hdr = nl80211hdr_put(msg, portid, seq, flags, cmd);
if (!hdr)
return -1;
@@ -8254,6 +8364,40 @@ void nl80211_send_sta_del_event(struct cfg80211_registered_device *rdev,
nlmsg_free(msg);
}
+void nl80211_send_conn_failed_event(struct cfg80211_registered_device *rdev,
+ struct net_device *dev, const u8 *mac_addr,
+ enum nl80211_connect_failed_reason reason,
+ gfp_t gfp)
+{
+ struct sk_buff *msg;
+ void *hdr;
+
+ msg = nlmsg_new(NLMSG_GOODSIZE, gfp);
+ if (!msg)
+ return;
+
+ hdr = nl80211hdr_put(msg, 0, 0, 0, NL80211_CMD_CONN_FAILED);
+ if (!hdr) {
+ nlmsg_free(msg);
+ return;
+ }
+
+ if (nla_put_u32(msg, NL80211_ATTR_IFINDEX, dev->ifindex) ||
+ nla_put(msg, NL80211_ATTR_MAC, ETH_ALEN, mac_addr) ||
+ nla_put_u32(msg, NL80211_ATTR_CONN_FAILED_REASON, reason))
+ goto nla_put_failure;
+
+ genlmsg_end(msg, hdr);
+
+ genlmsg_multicast_netns(wiphy_net(&rdev->wiphy), msg, 0,
+ nl80211_mlme_mcgrp.id, gfp);
+ return;
+
+ nla_put_failure:
+ genlmsg_cancel(msg, hdr);
+ nlmsg_free(msg);
+}
+
static bool __nl80211_unexpected_frame(struct net_device *dev, u8 cmd,
const u8 *addr, gfp_t gfp)
{
@@ -8262,9 +8406,9 @@ static bool __nl80211_unexpected_frame(struct net_device *dev, u8 cmd,
struct sk_buff *msg;
void *hdr;
int err;
- u32 nlpid = ACCESS_ONCE(wdev->ap_unexpected_nlpid);
+ u32 nlportid = ACCESS_ONCE(wdev->ap_unexpected_nlportid);
- if (!nlpid)
+ if (!nlportid)
return false;
msg = nlmsg_new(100, gfp);
@@ -8288,7 +8432,7 @@ static bool __nl80211_unexpected_frame(struct net_device *dev, u8 cmd,
return true;
}
- genlmsg_unicast(wiphy_net(&rdev->wiphy), msg, nlpid);
+ genlmsg_unicast(wiphy_net(&rdev->wiphy), msg, nlportid);
return true;
nla_put_failure:
@@ -8312,7 +8456,7 @@ bool nl80211_unexpected_4addr_frame(struct net_device *dev,
}
int nl80211_send_mgmt(struct cfg80211_registered_device *rdev,
- struct wireless_dev *wdev, u32 nlpid,
+ struct wireless_dev *wdev, u32 nlportid,
int freq, int sig_dbm,
const u8 *buf, size_t len, gfp_t gfp)
{
@@ -8341,7 +8485,7 @@ int nl80211_send_mgmt(struct cfg80211_registered_device *rdev,
genlmsg_end(msg, hdr);
- return genlmsg_unicast(wiphy_net(&rdev->wiphy), msg, nlpid);
+ return genlmsg_unicast(wiphy_net(&rdev->wiphy), msg, nlportid);
nla_put_failure:
genlmsg_cancel(msg, hdr);
@@ -8696,9 +8840,9 @@ void cfg80211_report_obss_beacon(struct wiphy *wiphy,
struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy);
struct sk_buff *msg;
void *hdr;
- u32 nlpid = ACCESS_ONCE(rdev->ap_beacons_nlpid);
+ u32 nlportid = ACCESS_ONCE(rdev->ap_beacons_nlportid);
- if (!nlpid)
+ if (!nlportid)
return;
msg = nlmsg_new(len + 100, gfp);
@@ -8721,7 +8865,7 @@ void cfg80211_report_obss_beacon(struct wiphy *wiphy,
genlmsg_end(msg, hdr);
- genlmsg_unicast(wiphy_net(&rdev->wiphy), msg, nlpid);
+ genlmsg_unicast(wiphy_net(&rdev->wiphy), msg, nlportid);
return;
nla_put_failure:
@@ -8745,9 +8889,9 @@ static int nl80211_netlink_notify(struct notifier_block * nb,
list_for_each_entry_rcu(rdev, &cfg80211_rdev_list, list) {
list_for_each_entry_rcu(wdev, &rdev->wdev_list, list)
- cfg80211_mlme_unregister_socket(wdev, notify->pid);
- if (rdev->ap_beacons_nlpid == notify->pid)
- rdev->ap_beacons_nlpid = 0;
+ cfg80211_mlme_unregister_socket(wdev, notify->portid);
+ if (rdev->ap_beacons_nlportid == notify->portid)
+ rdev->ap_beacons_nlportid = 0;
}
rcu_read_unlock();
diff --git a/net/wireless/nl80211.h b/net/wireless/nl80211.h
index 9f2616fffb40..f6153516068c 100644
--- a/net/wireless/nl80211.h
+++ b/net/wireless/nl80211.h
@@ -91,6 +91,11 @@ void nl80211_send_sta_del_event(struct cfg80211_registered_device *rdev,
struct net_device *dev, const u8 *mac_addr,
gfp_t gfp);
+void nl80211_send_conn_failed_event(struct cfg80211_registered_device *rdev,
+ struct net_device *dev, const u8 *mac_addr,
+ enum nl80211_connect_failed_reason reason,
+ gfp_t gfp);
+
int nl80211_send_mgmt(struct cfg80211_registered_device *rdev,
struct wireless_dev *wdev, u32 nlpid,
int freq, int sig_dbm,
diff --git a/net/wireless/radiotap.c b/net/wireless/radiotap.c
index c4ad7958af52..7d604c06c3dc 100644
--- a/net/wireless/radiotap.c
+++ b/net/wireless/radiotap.c
@@ -41,6 +41,8 @@ static const struct radiotap_align_size rtap_namespace_sizes[] = {
[IEEE80211_RADIOTAP_TX_FLAGS] = { .align = 2, .size = 2, },
[IEEE80211_RADIOTAP_RTS_RETRIES] = { .align = 1, .size = 1, },
[IEEE80211_RADIOTAP_DATA_RETRIES] = { .align = 1, .size = 1, },
+ [IEEE80211_RADIOTAP_MCS] = { .align = 1, .size = 3, },
+ [IEEE80211_RADIOTAP_AMPDU_STATUS] = { .align = 4, .size = 8, },
/*
* add more here as they are defined in radiotap.h
*/
diff --git a/net/wireless/reg.c b/net/wireless/reg.c
index 2ded3c7fad06..3b8cbbc214db 100644
--- a/net/wireless/reg.c
+++ b/net/wireless/reg.c
@@ -350,6 +350,9 @@ static void reg_regdb_search(struct work_struct *work)
struct reg_regdb_search_request *request;
const struct ieee80211_regdomain *curdom, *regdom;
int i, r;
+ bool set_reg = false;
+
+ mutex_lock(&cfg80211_mutex);
mutex_lock(&reg_regdb_search_mutex);
while (!list_empty(&reg_regdb_search_list)) {
@@ -365,9 +368,7 @@ static void reg_regdb_search(struct work_struct *work)
r = reg_copy_regd(&regdom, curdom);
if (r)
break;
- mutex_lock(&cfg80211_mutex);
- set_regdom(regdom);
- mutex_unlock(&cfg80211_mutex);
+ set_reg = true;
break;
}
}
@@ -375,6 +376,11 @@ static void reg_regdb_search(struct work_struct *work)
kfree(request);
}
mutex_unlock(&reg_regdb_search_mutex);
+
+ if (set_reg)
+ set_regdom(regdom);
+
+ mutex_unlock(&cfg80211_mutex);
}
static DECLARE_WORK(reg_regdb_work, reg_regdb_search);
@@ -504,9 +510,11 @@ static bool reg_does_bw_fit(const struct ieee80211_freq_range *freq_range,
*
* This lets us know if a specific frequency rule is or is not relevant to
* a specific frequency's band. Bands are device specific and artificial
- * definitions (the "2.4 GHz band" and the "5 GHz band"), however it is
- * safe for now to assume that a frequency rule should not be part of a
- * frequency's band if the start freq or end freq are off by more than 2 GHz.
+ * definitions (the "2.4 GHz band", the "5 GHz band" and the "60GHz band"),
+ * however it is safe for now to assume that a frequency rule should not be
+ * part of a frequency's band if the start freq or end freq are off by more
+ * than 2 GHz for the 2.4 and 5 GHz bands, and by more than 10 GHz for the
+ * 60 GHz band.
* This resolution can be lowered and should be considered as we add
* regulatory rule support for other "bands".
**/
@@ -514,9 +522,16 @@ static bool freq_in_rule_band(const struct ieee80211_freq_range *freq_range,
u32 freq_khz)
{
#define ONE_GHZ_IN_KHZ 1000000
- if (abs(freq_khz - freq_range->start_freq_khz) <= (2 * ONE_GHZ_IN_KHZ))
+ /*
+ * From 802.11ad: directional multi-gigabit (DMG):
+ * Pertaining to operation in a frequency band containing a channel
+ * with the Channel starting frequency above 45 GHz.
+ */
+ u32 limit = freq_khz > 45 * ONE_GHZ_IN_KHZ ?
+ 10 * ONE_GHZ_IN_KHZ : 2 * ONE_GHZ_IN_KHZ;
+ if (abs(freq_khz - freq_range->start_freq_khz) <= limit)
return true;
- if (abs(freq_khz - freq_range->end_freq_khz) <= (2 * ONE_GHZ_IN_KHZ))
+ if (abs(freq_khz - freq_range->end_freq_khz) <= limit)
return true;
return false;
#undef ONE_GHZ_IN_KHZ
@@ -1949,8 +1964,7 @@ static void restore_regulatory_settings(bool reset_user)
if (reg_request->initiator !=
NL80211_REGDOM_SET_BY_USER)
continue;
- list_del(&reg_request->list);
- list_add_tail(&reg_request->list, &tmp_reg_req_list);
+ list_move_tail(&reg_request->list, &tmp_reg_req_list);
}
}
spin_unlock(&reg_requests_lock);
@@ -2009,8 +2023,7 @@ static void restore_regulatory_settings(bool reset_user)
"into the queue\n",
reg_request->alpha2[0],
reg_request->alpha2[1]);
- list_del(&reg_request->list);
- list_add_tail(&reg_request->list, &reg_requests_list);
+ list_move_tail(&reg_request->list, &reg_requests_list);
}
spin_unlock(&reg_requests_lock);
@@ -2195,7 +2208,6 @@ static void print_regdomain_info(const struct ieee80211_regdomain *rd)
static int __set_regdom(const struct ieee80211_regdomain *rd)
{
const struct ieee80211_regdomain *intersected_rd = NULL;
- struct cfg80211_registered_device *rdev = NULL;
struct wiphy *request_wiphy;
/* Some basic sanity checks first */
@@ -2307,24 +2319,7 @@ static int __set_regdom(const struct ieee80211_regdomain *rd)
return 0;
}
- if (!intersected_rd)
- return -EINVAL;
-
- rdev = wiphy_to_dev(request_wiphy);
-
- rdev->country_ie_alpha2[0] = rd->alpha2[0];
- rdev->country_ie_alpha2[1] = rd->alpha2[1];
- rdev->env = last_request->country_ie_env;
-
- BUG_ON(intersected_rd == rd);
-
- kfree(rd);
- rd = NULL;
-
- reset_regdomains(false);
- cfg80211_regdomain = intersected_rd;
-
- return 0;
+ return -EINVAL;
}
diff --git a/net/wireless/scan.c b/net/wireless/scan.c
index 848523a2b22f..9730c9862bdc 100644
--- a/net/wireless/scan.c
+++ b/net/wireless/scan.c
@@ -815,7 +815,7 @@ cfg80211_inform_bss_frame(struct wiphy *wiphy,
return NULL;
if (WARN_ON(wiphy->signal_type == CFG80211_SIGNAL_TYPE_UNSPEC &&
- (signal < 0 || signal > 100)))
+ (signal < 0 || signal > 100)))
return NULL;
if (WARN_ON(len < offsetof(struct ieee80211_mgmt, u.probe_resp.variable)))
diff --git a/net/wireless/util.c b/net/wireless/util.c
index 26f8cd30f712..ef35f4ef2aa6 100644
--- a/net/wireless/util.c
+++ b/net/wireless/util.c
@@ -684,22 +684,10 @@ EXPORT_SYMBOL(cfg80211_classify8021d);
const u8 *ieee80211_bss_get_ie(struct cfg80211_bss *bss, u8 ie)
{
- u8 *end, *pos;
-
- pos = bss->information_elements;
- if (pos == NULL)
+ if (bss->information_elements == NULL)
return NULL;
- end = pos + bss->len_information_elements;
-
- while (pos + 1 < end) {
- if (pos + 2 + pos[1] > end)
- break;
- if (pos[0] == ie)
- return pos;
- pos += 2 + pos[1];
- }
-
- return NULL;
+ return cfg80211_find_ie(ie, bss->information_elements,
+ bss->len_information_elements);
}
EXPORT_SYMBOL(ieee80211_bss_get_ie);
@@ -735,7 +723,7 @@ void cfg80211_upload_connect_keys(struct wireless_dev *wdev)
wdev->connect_keys = NULL;
}
-static void cfg80211_process_wdev_events(struct wireless_dev *wdev)
+void cfg80211_process_wdev_events(struct wireless_dev *wdev)
{
struct cfg80211_event *ev;
unsigned long flags;
@@ -812,6 +800,10 @@ int cfg80211_change_iface(struct cfg80211_registered_device *rdev,
if (otype == NL80211_IFTYPE_AP_VLAN)
return -EOPNOTSUPP;
+ /* cannot change into P2P device type */
+ if (ntype == NL80211_IFTYPE_P2P_DEVICE)
+ return -EOPNOTSUPP;
+
if (!rdev->ops->change_virtual_intf ||
!(rdev->wiphy.interface_modes & (1 << ntype)))
return -EOPNOTSUPP;
@@ -889,6 +881,9 @@ int cfg80211_change_iface(struct cfg80211_registered_device *rdev,
case NUM_NL80211_IFTYPES:
/* not happening */
break;
+ case NL80211_IFTYPE_P2P_DEVICE:
+ WARN_ON(1);
+ break;
}
}
@@ -1053,8 +1048,15 @@ int cfg80211_can_use_iftype_chan(struct cfg80211_registered_device *rdev,
list_for_each_entry(wdev_iter, &rdev->wdev_list, list) {
if (wdev_iter == wdev)
continue;
- if (!netif_running(wdev_iter->netdev))
- continue;
+ if (wdev_iter->netdev) {
+ if (!netif_running(wdev_iter->netdev))
+ continue;
+ } else if (wdev_iter->iftype == NL80211_IFTYPE_P2P_DEVICE) {
+ if (!wdev_iter->p2p_started)
+ continue;
+ } else {
+ WARN_ON(1);
+ }
if (rdev->wiphy.software_iftypes & BIT(wdev_iter->iftype))
continue;
diff --git a/net/wireless/wext-core.c b/net/wireless/wext-core.c
index b0eb7aa49b60..c8717c1d082e 100644
--- a/net/wireless/wext-core.c
+++ b/net/wireless/wext-core.c
@@ -478,13 +478,13 @@ void wireless_send_event(struct net_device * dev,
if (descr->header_type == IW_HEADER_TYPE_POINT) {
/* Check if number of token fits within bounds */
if (wrqu->data.length > descr->max_tokens) {
- netdev_err(dev, "(WE) : Wireless Event too big (%d)\n",
- wrqu->data.length);
+ netdev_err(dev, "(WE) : Wireless Event (cmd=0x%04X) too big (%d)\n",
+ cmd, wrqu->data.length);
return;
}
if (wrqu->data.length < descr->min_tokens) {
- netdev_err(dev, "(WE) : Wireless Event too small (%d)\n",
- wrqu->data.length);
+ netdev_err(dev, "(WE) : Wireless Event (cmd=0x%04X) too small (%d)\n",
+ cmd, wrqu->data.length);
return;
}
/* Calculate extra_len - extra is NULL for restricted events */
diff --git a/net/xfrm/xfrm_input.c b/net/xfrm/xfrm_input.c
index 54a0dc2e2f8d..ab2bb42fe094 100644
--- a/net/xfrm/xfrm_input.c
+++ b/net/xfrm/xfrm_input.c
@@ -212,7 +212,7 @@ resume:
/* only the first xfrm gets the encap type */
encap_type = 0;
- if (async && x->repl->check(x, skb, seq)) {
+ if (async && x->repl->recheck(x, skb, seq)) {
XFRM_INC_STATS(net, LINUX_MIB_XFRMINSTATESEQERROR);
goto drop_unlock;
}
diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c
index c5a5165a5927..41eabc46f110 100644
--- a/net/xfrm/xfrm_policy.c
+++ b/net/xfrm/xfrm_policy.c
@@ -42,13 +42,12 @@ static DEFINE_SPINLOCK(xfrm_policy_sk_bundle_lock);
static struct dst_entry *xfrm_policy_sk_bundles;
static DEFINE_RWLOCK(xfrm_policy_lock);
-static DEFINE_RWLOCK(xfrm_policy_afinfo_lock);
-static struct xfrm_policy_afinfo *xfrm_policy_afinfo[NPROTO];
+static DEFINE_SPINLOCK(xfrm_policy_afinfo_lock);
+static struct xfrm_policy_afinfo __rcu *xfrm_policy_afinfo[NPROTO]
+ __read_mostly;
static struct kmem_cache *xfrm_dst_cache __read_mostly;
-static struct xfrm_policy_afinfo *xfrm_policy_get_afinfo(unsigned short family);
-static void xfrm_policy_put_afinfo(struct xfrm_policy_afinfo *afinfo);
static void xfrm_init_pmtu(struct dst_entry *dst);
static int stale_bundle(struct dst_entry *dst);
static int xfrm_bundle_ok(struct xfrm_dst *xdst);
@@ -95,6 +94,24 @@ bool xfrm_selector_match(const struct xfrm_selector *sel, const struct flowi *fl
return false;
}
+static struct xfrm_policy_afinfo *xfrm_policy_get_afinfo(unsigned short family)
+{
+ struct xfrm_policy_afinfo *afinfo;
+
+ if (unlikely(family >= NPROTO))
+ return NULL;
+ rcu_read_lock();
+ afinfo = rcu_dereference(xfrm_policy_afinfo[family]);
+ if (unlikely(!afinfo))
+ rcu_read_unlock();
+ return afinfo;
+}
+
+static void xfrm_policy_put_afinfo(struct xfrm_policy_afinfo *afinfo)
+{
+ rcu_read_unlock();
+}
+
static inline struct dst_entry *__xfrm_dst_lookup(struct net *net, int tos,
const xfrm_address_t *saddr,
const xfrm_address_t *daddr,
@@ -585,6 +602,7 @@ int xfrm_policy_insert(int dir, struct xfrm_policy *policy, int excl)
xfrm_pol_hold(policy);
net->xfrm.policy_count[dir]++;
atomic_inc(&flow_cache_genid);
+ rt_genid_bump(net);
if (delpol)
__xfrm_policy_unlink(delpol, dir);
policy->index = delpol ? delpol->index : xfrm_gen_index(net, dir);
@@ -1357,6 +1375,8 @@ static inline struct xfrm_dst *xfrm_alloc_dst(struct net *net, int family)
memset(dst + 1, 0, sizeof(*xdst) - sizeof(*dst));
xdst->flo.ops = &xfrm_bundle_fc_ops;
+ if (afinfo->init_dst)
+ afinfo->init_dst(net, xdst);
} else
xdst = ERR_PTR(-ENOBUFS);
@@ -1761,7 +1781,7 @@ static struct dst_entry *make_blackhole(struct net *net, u16 family,
if (!afinfo) {
dst_release(dst_orig);
- ret = ERR_PTR(-EINVAL);
+ return ERR_PTR(-EINVAL);
} else {
ret = afinfo->blackhole_route(net, dst_orig);
}
@@ -2418,7 +2438,7 @@ int xfrm_policy_register_afinfo(struct xfrm_policy_afinfo *afinfo)
return -EINVAL;
if (unlikely(afinfo->family >= NPROTO))
return -EAFNOSUPPORT;
- write_lock_bh(&xfrm_policy_afinfo_lock);
+ spin_lock(&xfrm_policy_afinfo_lock);
if (unlikely(xfrm_policy_afinfo[afinfo->family] != NULL))
err = -ENOBUFS;
else {
@@ -2439,9 +2459,9 @@ int xfrm_policy_register_afinfo(struct xfrm_policy_afinfo *afinfo)
dst_ops->neigh_lookup = xfrm_neigh_lookup;
if (likely(afinfo->garbage_collect == NULL))
afinfo->garbage_collect = xfrm_garbage_collect_deferred;
- xfrm_policy_afinfo[afinfo->family] = afinfo;
+ rcu_assign_pointer(xfrm_policy_afinfo[afinfo->family], afinfo);
}
- write_unlock_bh(&xfrm_policy_afinfo_lock);
+ spin_unlock(&xfrm_policy_afinfo_lock);
rtnl_lock();
for_each_net(net) {
@@ -2474,21 +2494,26 @@ int xfrm_policy_unregister_afinfo(struct xfrm_policy_afinfo *afinfo)
return -EINVAL;
if (unlikely(afinfo->family >= NPROTO))
return -EAFNOSUPPORT;
- write_lock_bh(&xfrm_policy_afinfo_lock);
+ spin_lock(&xfrm_policy_afinfo_lock);
if (likely(xfrm_policy_afinfo[afinfo->family] != NULL)) {
if (unlikely(xfrm_policy_afinfo[afinfo->family] != afinfo))
err = -EINVAL;
- else {
- struct dst_ops *dst_ops = afinfo->dst_ops;
- xfrm_policy_afinfo[afinfo->family] = NULL;
- dst_ops->kmem_cachep = NULL;
- dst_ops->check = NULL;
- dst_ops->negative_advice = NULL;
- dst_ops->link_failure = NULL;
- afinfo->garbage_collect = NULL;
- }
+ else
+ RCU_INIT_POINTER(xfrm_policy_afinfo[afinfo->family],
+ NULL);
+ }
+ spin_unlock(&xfrm_policy_afinfo_lock);
+ if (!err) {
+ struct dst_ops *dst_ops = afinfo->dst_ops;
+
+ synchronize_rcu();
+
+ dst_ops->kmem_cachep = NULL;
+ dst_ops->check = NULL;
+ dst_ops->negative_advice = NULL;
+ dst_ops->link_failure = NULL;
+ afinfo->garbage_collect = NULL;
}
- write_unlock_bh(&xfrm_policy_afinfo_lock);
return err;
}
EXPORT_SYMBOL(xfrm_policy_unregister_afinfo);
@@ -2497,33 +2522,16 @@ static void __net_init xfrm_dst_ops_init(struct net *net)
{
struct xfrm_policy_afinfo *afinfo;
- read_lock_bh(&xfrm_policy_afinfo_lock);
- afinfo = xfrm_policy_afinfo[AF_INET];
+ rcu_read_lock();
+ afinfo = rcu_dereference(xfrm_policy_afinfo[AF_INET]);
if (afinfo)
net->xfrm.xfrm4_dst_ops = *afinfo->dst_ops;
#if IS_ENABLED(CONFIG_IPV6)
- afinfo = xfrm_policy_afinfo[AF_INET6];
+ afinfo = rcu_dereference(xfrm_policy_afinfo[AF_INET6]);
if (afinfo)
net->xfrm.xfrm6_dst_ops = *afinfo->dst_ops;
#endif
- read_unlock_bh(&xfrm_policy_afinfo_lock);
-}
-
-static struct xfrm_policy_afinfo *xfrm_policy_get_afinfo(unsigned short family)
-{
- struct xfrm_policy_afinfo *afinfo;
- if (unlikely(family >= NPROTO))
- return NULL;
- read_lock(&xfrm_policy_afinfo_lock);
- afinfo = xfrm_policy_afinfo[family];
- if (unlikely(!afinfo))
- read_unlock(&xfrm_policy_afinfo_lock);
- return afinfo;
-}
-
-static void xfrm_policy_put_afinfo(struct xfrm_policy_afinfo *afinfo)
-{
- read_unlock(&xfrm_policy_afinfo_lock);
+ rcu_read_unlock();
}
static int xfrm_dev_event(struct notifier_block *this, unsigned long event, void *ptr)
@@ -2630,12 +2638,12 @@ static void xfrm_policy_fini(struct net *net)
flush_work(&net->xfrm.policy_hash_work);
#ifdef CONFIG_XFRM_SUB_POLICY
- audit_info.loginuid = -1;
+ audit_info.loginuid = INVALID_UID;
audit_info.sessionid = -1;
audit_info.secid = 0;
xfrm_policy_flush(net, XFRM_POLICY_TYPE_SUB, &audit_info);
#endif
- audit_info.loginuid = -1;
+ audit_info.loginuid = INVALID_UID;
audit_info.sessionid = -1;
audit_info.secid = 0;
xfrm_policy_flush(net, XFRM_POLICY_TYPE_MAIN, &audit_info);
@@ -2742,7 +2750,7 @@ static void xfrm_audit_common_policyinfo(struct xfrm_policy *xp,
}
void xfrm_audit_policy_add(struct xfrm_policy *xp, int result,
- uid_t auid, u32 sessionid, u32 secid)
+ kuid_t auid, u32 sessionid, u32 secid)
{
struct audit_buffer *audit_buf;
@@ -2757,7 +2765,7 @@ void xfrm_audit_policy_add(struct xfrm_policy *xp, int result,
EXPORT_SYMBOL_GPL(xfrm_audit_policy_add);
void xfrm_audit_policy_delete(struct xfrm_policy *xp, int result,
- uid_t auid, u32 sessionid, u32 secid)
+ kuid_t auid, u32 sessionid, u32 secid)
{
struct audit_buffer *audit_buf;
diff --git a/net/xfrm/xfrm_replay.c b/net/xfrm/xfrm_replay.c
index 2f6d11d04a2b..3efb07d3eb27 100644
--- a/net/xfrm/xfrm_replay.c
+++ b/net/xfrm/xfrm_replay.c
@@ -420,6 +420,18 @@ err:
return -EINVAL;
}
+static int xfrm_replay_recheck_esn(struct xfrm_state *x,
+ struct sk_buff *skb, __be32 net_seq)
+{
+ if (unlikely(XFRM_SKB_CB(skb)->seq.input.hi !=
+ htonl(xfrm_replay_seqhi(x, net_seq)))) {
+ x->stats.replay_window++;
+ return -EINVAL;
+ }
+
+ return xfrm_replay_check_esn(x, skb, net_seq);
+}
+
static void xfrm_replay_advance_esn(struct xfrm_state *x, __be32 net_seq)
{
unsigned int bitnr, nr, i;
@@ -479,6 +491,7 @@ static void xfrm_replay_advance_esn(struct xfrm_state *x, __be32 net_seq)
static struct xfrm_replay xfrm_replay_legacy = {
.advance = xfrm_replay_advance,
.check = xfrm_replay_check,
+ .recheck = xfrm_replay_check,
.notify = xfrm_replay_notify,
.overflow = xfrm_replay_overflow,
};
@@ -486,6 +499,7 @@ static struct xfrm_replay xfrm_replay_legacy = {
static struct xfrm_replay xfrm_replay_bmp = {
.advance = xfrm_replay_advance_bmp,
.check = xfrm_replay_check_bmp,
+ .recheck = xfrm_replay_check_bmp,
.notify = xfrm_replay_notify_bmp,
.overflow = xfrm_replay_overflow_bmp,
};
@@ -493,6 +507,7 @@ static struct xfrm_replay xfrm_replay_bmp = {
static struct xfrm_replay xfrm_replay_esn = {
.advance = xfrm_replay_advance_esn,
.check = xfrm_replay_check_esn,
+ .recheck = xfrm_replay_recheck_esn,
.notify = xfrm_replay_notify_bmp,
.overflow = xfrm_replay_overflow_esn,
};
diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c
index 5b228f97d4b3..3459692092ec 100644
--- a/net/xfrm/xfrm_state.c
+++ b/net/xfrm/xfrm_state.c
@@ -166,7 +166,7 @@ static DEFINE_SPINLOCK(xfrm_state_gc_lock);
int __xfrm_state_delete(struct xfrm_state *x);
int km_query(struct xfrm_state *x, struct xfrm_tmpl *t, struct xfrm_policy *pol);
-void km_state_expired(struct xfrm_state *x, int hard, u32 pid);
+void km_state_expired(struct xfrm_state *x, int hard, u32 portid);
static struct xfrm_state_afinfo *xfrm_state_lock_afinfo(unsigned int family)
{
@@ -415,8 +415,17 @@ static enum hrtimer_restart xfrm_timer_handler(struct hrtimer * me)
if (x->lft.hard_add_expires_seconds) {
long tmo = x->lft.hard_add_expires_seconds +
x->curlft.add_time - now;
- if (tmo <= 0)
- goto expired;
+ if (tmo <= 0) {
+ if (x->xflags & XFRM_SOFT_EXPIRE) {
+ /* enter hard expire without soft expire first?!
+ * setting a new date could trigger this.
+ * workarbound: fix x->curflt.add_time by below:
+ */
+ x->curlft.add_time = now - x->saved_tmo - 1;
+ tmo = x->lft.hard_add_expires_seconds - x->saved_tmo;
+ } else
+ goto expired;
+ }
if (tmo < next)
next = tmo;
}
@@ -433,10 +442,14 @@ static enum hrtimer_restart xfrm_timer_handler(struct hrtimer * me)
if (x->lft.soft_add_expires_seconds) {
long tmo = x->lft.soft_add_expires_seconds +
x->curlft.add_time - now;
- if (tmo <= 0)
+ if (tmo <= 0) {
warn = 1;
- else if (tmo < next)
+ x->xflags &= ~XFRM_SOFT_EXPIRE;
+ } else if (tmo < next) {
next = tmo;
+ x->xflags |= XFRM_SOFT_EXPIRE;
+ x->saved_tmo = tmo;
+ }
}
if (x->lft.soft_use_expires_seconds) {
long tmo = x->lft.soft_use_expires_seconds +
@@ -1661,13 +1674,13 @@ void km_state_notify(struct xfrm_state *x, const struct km_event *c)
EXPORT_SYMBOL(km_policy_notify);
EXPORT_SYMBOL(km_state_notify);
-void km_state_expired(struct xfrm_state *x, int hard, u32 pid)
+void km_state_expired(struct xfrm_state *x, int hard, u32 portid)
{
struct net *net = xs_net(x);
struct km_event c;
c.data.hard = hard;
- c.pid = pid;
+ c.portid = portid;
c.event = XFRM_MSG_EXPIRE;
km_state_notify(x, &c);
@@ -1687,7 +1700,7 @@ int km_query(struct xfrm_state *x, struct xfrm_tmpl *t, struct xfrm_policy *pol)
read_lock(&xfrm_km_lock);
list_for_each_entry(km, &xfrm_km_list, list) {
- acqret = km->acquire(x, t, pol, XFRM_POLICY_OUT);
+ acqret = km->acquire(x, t, pol);
if (!acqret)
err = acqret;
}
@@ -1713,13 +1726,13 @@ int km_new_mapping(struct xfrm_state *x, xfrm_address_t *ipaddr, __be16 sport)
}
EXPORT_SYMBOL(km_new_mapping);
-void km_policy_expired(struct xfrm_policy *pol, int dir, int hard, u32 pid)
+void km_policy_expired(struct xfrm_policy *pol, int dir, int hard, u32 portid)
{
struct net *net = xp_net(pol);
struct km_event c;
c.data.hard = hard;
- c.pid = pid;
+ c.portid = portid;
c.event = XFRM_MSG_POLEXPIRE;
km_policy_notify(pol, dir, &c);
@@ -1981,8 +1994,10 @@ int __xfrm_init_state(struct xfrm_state *x, bool init_replay)
goto error;
x->outer_mode = xfrm_get_mode(x->props.mode, family);
- if (x->outer_mode == NULL)
+ if (x->outer_mode == NULL) {
+ err = -EPROTONOSUPPORT;
goto error;
+ }
if (init_replay) {
err = xfrm_init_replay(x);
@@ -2045,7 +2060,7 @@ void xfrm_state_fini(struct net *net)
unsigned int sz;
flush_work(&net->xfrm.state_hash_work);
- audit_info.loginuid = -1;
+ audit_info.loginuid = INVALID_UID;
audit_info.sessionid = -1;
audit_info.secid = 0;
xfrm_state_flush(net, IPSEC_PROTO_ANY, &audit_info);
@@ -2112,7 +2127,7 @@ static void xfrm_audit_helper_pktinfo(struct sk_buff *skb, u16 family,
}
void xfrm_audit_state_add(struct xfrm_state *x, int result,
- uid_t auid, u32 sessionid, u32 secid)
+ kuid_t auid, u32 sessionid, u32 secid)
{
struct audit_buffer *audit_buf;
@@ -2127,7 +2142,7 @@ void xfrm_audit_state_add(struct xfrm_state *x, int result,
EXPORT_SYMBOL_GPL(xfrm_audit_state_add);
void xfrm_audit_state_delete(struct xfrm_state *x, int result,
- uid_t auid, u32 sessionid, u32 secid)
+ kuid_t auid, u32 sessionid, u32 secid)
{
struct audit_buffer *audit_buf;
diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c
index e75d8e47f35c..421f98444335 100644
--- a/net/xfrm/xfrm_user.c
+++ b/net/xfrm/xfrm_user.c
@@ -123,9 +123,21 @@ static inline int verify_replay(struct xfrm_usersa_info *p,
struct nlattr **attrs)
{
struct nlattr *rt = attrs[XFRMA_REPLAY_ESN_VAL];
+ struct xfrm_replay_state_esn *rs;
- if ((p->flags & XFRM_STATE_ESN) && !rt)
- return -EINVAL;
+ if (p->flags & XFRM_STATE_ESN) {
+ if (!rt)
+ return -EINVAL;
+
+ rs = nla_data(rt);
+
+ if (rs->bmp_len > XFRMA_REPLAY_ESN_MAX / sizeof(rs->bmp[0]) / 8)
+ return -EINVAL;
+
+ if (nla_len(rt) < xfrm_replay_state_esn_len(rs) &&
+ nla_len(rt) != sizeof(*rs))
+ return -EINVAL;
+ }
if (!rt)
return 0;
@@ -370,14 +382,15 @@ static inline int xfrm_replay_verify_len(struct xfrm_replay_state_esn *replay_es
struct nlattr *rp)
{
struct xfrm_replay_state_esn *up;
+ int ulen;
if (!replay_esn || !rp)
return 0;
up = nla_data(rp);
+ ulen = xfrm_replay_state_esn_len(up);
- if (xfrm_replay_state_esn_len(replay_esn) !=
- xfrm_replay_state_esn_len(up))
+ if (nla_len(rp) < ulen || xfrm_replay_state_esn_len(replay_esn) != ulen)
return -EINVAL;
return 0;
@@ -388,22 +401,28 @@ static int xfrm_alloc_replay_state_esn(struct xfrm_replay_state_esn **replay_esn
struct nlattr *rta)
{
struct xfrm_replay_state_esn *p, *pp, *up;
+ int klen, ulen;
if (!rta)
return 0;
up = nla_data(rta);
+ klen = xfrm_replay_state_esn_len(up);
+ ulen = nla_len(rta) >= klen ? klen : sizeof(*up);
- p = kmemdup(up, xfrm_replay_state_esn_len(up), GFP_KERNEL);
+ p = kzalloc(klen, GFP_KERNEL);
if (!p)
return -ENOMEM;
- pp = kmemdup(up, xfrm_replay_state_esn_len(up), GFP_KERNEL);
+ pp = kzalloc(klen, GFP_KERNEL);
if (!pp) {
kfree(p);
return -ENOMEM;
}
+ memcpy(p, up, ulen);
+ memcpy(pp, up, ulen);
+
*replay_esn = p;
*preplay_esn = pp;
@@ -442,10 +461,11 @@ static void copy_from_user_state(struct xfrm_state *x, struct xfrm_usersa_info *
* somehow made shareable and move it to xfrm_state.c - JHS
*
*/
-static void xfrm_update_ae_params(struct xfrm_state *x, struct nlattr **attrs)
+static void xfrm_update_ae_params(struct xfrm_state *x, struct nlattr **attrs,
+ int update_esn)
{
struct nlattr *rp = attrs[XFRMA_REPLAY_VAL];
- struct nlattr *re = attrs[XFRMA_REPLAY_ESN_VAL];
+ struct nlattr *re = update_esn ? attrs[XFRMA_REPLAY_ESN_VAL] : NULL;
struct nlattr *lt = attrs[XFRMA_LTIME_VAL];
struct nlattr *et = attrs[XFRMA_ETIMER_THRESH];
struct nlattr *rt = attrs[XFRMA_REPLAY_THRESH];
@@ -555,7 +575,7 @@ static struct xfrm_state *xfrm_state_construct(struct net *net,
goto error;
/* override default values from above */
- xfrm_update_ae_params(x, attrs);
+ xfrm_update_ae_params(x, attrs, 0);
return x;
@@ -575,7 +595,7 @@ static int xfrm_add_sa(struct sk_buff *skb, struct nlmsghdr *nlh,
struct xfrm_state *x;
int err;
struct km_event c;
- uid_t loginuid = audit_get_loginuid(current);
+ kuid_t loginuid = audit_get_loginuid(current);
u32 sessionid = audit_get_sessionid(current);
u32 sid;
@@ -603,7 +623,7 @@ static int xfrm_add_sa(struct sk_buff *skb, struct nlmsghdr *nlh,
}
c.seq = nlh->nlmsg_seq;
- c.pid = nlh->nlmsg_pid;
+ c.portid = nlh->nlmsg_pid;
c.event = nlh->nlmsg_type;
km_state_notify(x, &c);
@@ -654,7 +674,7 @@ static int xfrm_del_sa(struct sk_buff *skb, struct nlmsghdr *nlh,
int err = -ESRCH;
struct km_event c;
struct xfrm_usersa_id *p = nlmsg_data(nlh);
- uid_t loginuid = audit_get_loginuid(current);
+ kuid_t loginuid = audit_get_loginuid(current);
u32 sessionid = audit_get_sessionid(current);
u32 sid;
@@ -676,7 +696,7 @@ static int xfrm_del_sa(struct sk_buff *skb, struct nlmsghdr *nlh,
goto out;
c.seq = nlh->nlmsg_seq;
- c.pid = nlh->nlmsg_pid;
+ c.portid = nlh->nlmsg_pid;
c.event = nlh->nlmsg_type;
km_state_notify(x, &c);
@@ -689,6 +709,7 @@ out:
static void copy_to_user_state(struct xfrm_state *x, struct xfrm_usersa_info *p)
{
+ memset(p, 0, sizeof(*p));
memcpy(&p->id, &x->id, sizeof(p->id));
memcpy(&p->sel, &x->sel, sizeof(p->sel));
memcpy(&p->lft, &x->lft, sizeof(p->lft));
@@ -742,7 +763,7 @@ static int copy_to_user_auth(struct xfrm_algo_auth *auth, struct sk_buff *skb)
return -EMSGSIZE;
algo = nla_data(nla);
- strcpy(algo->alg_name, auth->alg_name);
+ strncpy(algo->alg_name, auth->alg_name, sizeof(algo->alg_name));
memcpy(algo->alg_key, auth->alg_key, (auth->alg_key_len + 7) / 8);
algo->alg_key_len = auth->alg_key_len;
@@ -826,7 +847,7 @@ static int dump_one_state(struct xfrm_state *x, int count, void *ptr)
struct nlmsghdr *nlh;
int err;
- nlh = nlmsg_put(skb, NETLINK_CB(in_skb).pid, sp->nlmsg_seq,
+ nlh = nlmsg_put(skb, NETLINK_CB(in_skb).portid, sp->nlmsg_seq,
XFRM_MSG_NEWSA, sizeof(*p), sp->nlmsg_flags);
if (nlh == NULL)
return -EMSGSIZE;
@@ -878,6 +899,7 @@ static struct sk_buff *xfrm_state_netlink(struct sk_buff *in_skb,
{
struct xfrm_dump_info info;
struct sk_buff *skb;
+ int err;
skb = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_ATOMIC);
if (!skb)
@@ -888,9 +910,10 @@ static struct sk_buff *xfrm_state_netlink(struct sk_buff *in_skb,
info.nlmsg_seq = seq;
info.nlmsg_flags = 0;
- if (dump_one_state(x, 0, &info)) {
+ err = dump_one_state(x, 0, &info);
+ if (err) {
kfree_skb(skb);
- return NULL;
+ return ERR_PTR(err);
}
return skb;
@@ -904,7 +927,7 @@ static inline size_t xfrm_spdinfo_msgsize(void)
}
static int build_spdinfo(struct sk_buff *skb, struct net *net,
- u32 pid, u32 seq, u32 flags)
+ u32 portid, u32 seq, u32 flags)
{
struct xfrmk_spdinfo si;
struct xfrmu_spdinfo spc;
@@ -913,7 +936,7 @@ static int build_spdinfo(struct sk_buff *skb, struct net *net,
int err;
u32 *f;
- nlh = nlmsg_put(skb, pid, seq, XFRM_MSG_NEWSPDINFO, sizeof(u32), 0);
+ nlh = nlmsg_put(skb, portid, seq, XFRM_MSG_NEWSPDINFO, sizeof(u32), 0);
if (nlh == NULL) /* shouldn't really happen ... */
return -EMSGSIZE;
@@ -946,17 +969,17 @@ static int xfrm_get_spdinfo(struct sk_buff *skb, struct nlmsghdr *nlh,
struct net *net = sock_net(skb->sk);
struct sk_buff *r_skb;
u32 *flags = nlmsg_data(nlh);
- u32 spid = NETLINK_CB(skb).pid;
+ u32 sportid = NETLINK_CB(skb).portid;
u32 seq = nlh->nlmsg_seq;
r_skb = nlmsg_new(xfrm_spdinfo_msgsize(), GFP_ATOMIC);
if (r_skb == NULL)
return -ENOMEM;
- if (build_spdinfo(r_skb, net, spid, seq, *flags) < 0)
+ if (build_spdinfo(r_skb, net, sportid, seq, *flags) < 0)
BUG();
- return nlmsg_unicast(net->xfrm.nlsk, r_skb, spid);
+ return nlmsg_unicast(net->xfrm.nlsk, r_skb, sportid);
}
static inline size_t xfrm_sadinfo_msgsize(void)
@@ -967,7 +990,7 @@ static inline size_t xfrm_sadinfo_msgsize(void)
}
static int build_sadinfo(struct sk_buff *skb, struct net *net,
- u32 pid, u32 seq, u32 flags)
+ u32 portid, u32 seq, u32 flags)
{
struct xfrmk_sadinfo si;
struct xfrmu_sadhinfo sh;
@@ -975,7 +998,7 @@ static int build_sadinfo(struct sk_buff *skb, struct net *net,
int err;
u32 *f;
- nlh = nlmsg_put(skb, pid, seq, XFRM_MSG_NEWSADINFO, sizeof(u32), 0);
+ nlh = nlmsg_put(skb, portid, seq, XFRM_MSG_NEWSADINFO, sizeof(u32), 0);
if (nlh == NULL) /* shouldn't really happen ... */
return -EMSGSIZE;
@@ -1003,17 +1026,17 @@ static int xfrm_get_sadinfo(struct sk_buff *skb, struct nlmsghdr *nlh,
struct net *net = sock_net(skb->sk);
struct sk_buff *r_skb;
u32 *flags = nlmsg_data(nlh);
- u32 spid = NETLINK_CB(skb).pid;
+ u32 sportid = NETLINK_CB(skb).portid;
u32 seq = nlh->nlmsg_seq;
r_skb = nlmsg_new(xfrm_sadinfo_msgsize(), GFP_ATOMIC);
if (r_skb == NULL)
return -ENOMEM;
- if (build_sadinfo(r_skb, net, spid, seq, *flags) < 0)
+ if (build_sadinfo(r_skb, net, sportid, seq, *flags) < 0)
BUG();
- return nlmsg_unicast(net->xfrm.nlsk, r_skb, spid);
+ return nlmsg_unicast(net->xfrm.nlsk, r_skb, sportid);
}
static int xfrm_get_sa(struct sk_buff *skb, struct nlmsghdr *nlh,
@@ -1033,7 +1056,7 @@ static int xfrm_get_sa(struct sk_buff *skb, struct nlmsghdr *nlh,
if (IS_ERR(resp_skb)) {
err = PTR_ERR(resp_skb);
} else {
- err = nlmsg_unicast(net->xfrm.nlsk, resp_skb, NETLINK_CB(skb).pid);
+ err = nlmsg_unicast(net->xfrm.nlsk, resp_skb, NETLINK_CB(skb).portid);
}
xfrm_state_put(x);
out_noput:
@@ -1114,7 +1137,7 @@ static int xfrm_alloc_userspi(struct sk_buff *skb, struct nlmsghdr *nlh,
goto out;
}
- err = nlmsg_unicast(net->xfrm.nlsk, resp_skb, NETLINK_CB(skb).pid);
+ err = nlmsg_unicast(net->xfrm.nlsk, resp_skb, NETLINK_CB(skb).portid);
out:
xfrm_state_put(x);
@@ -1317,6 +1340,7 @@ static void copy_from_user_policy(struct xfrm_policy *xp, struct xfrm_userpolicy
static void copy_to_user_policy(struct xfrm_policy *xp, struct xfrm_userpolicy_info *p, int dir)
{
+ memset(p, 0, sizeof(*p));
memcpy(&p->sel, &xp->selector, sizeof(p->sel));
memcpy(&p->lft, &xp->lft, sizeof(p->lft));
memcpy(&p->curlft, &xp->curlft, sizeof(p->curlft));
@@ -1369,7 +1393,7 @@ static int xfrm_add_policy(struct sk_buff *skb, struct nlmsghdr *nlh,
struct km_event c;
int err;
int excl;
- uid_t loginuid = audit_get_loginuid(current);
+ kuid_t loginuid = audit_get_loginuid(current);
u32 sessionid = audit_get_sessionid(current);
u32 sid;
@@ -1401,7 +1425,7 @@ static int xfrm_add_policy(struct sk_buff *skb, struct nlmsghdr *nlh,
c.event = nlh->nlmsg_type;
c.seq = nlh->nlmsg_seq;
- c.pid = nlh->nlmsg_pid;
+ c.portid = nlh->nlmsg_pid;
km_policy_notify(xp, p->dir, &c);
xfrm_pol_put(xp);
@@ -1421,6 +1445,7 @@ static int copy_to_user_tmpl(struct xfrm_policy *xp, struct sk_buff *skb)
struct xfrm_user_tmpl *up = &vec[i];
struct xfrm_tmpl *kp = &xp->xfrm_vec[i];
+ memset(up, 0, sizeof(*up));
memcpy(&up->id, &kp->id, sizeof(up->id));
up->family = kp->encap_family;
memcpy(&up->saddr, &kp->saddr, sizeof(up->saddr));
@@ -1486,7 +1511,7 @@ static int dump_one_policy(struct xfrm_policy *xp, int dir, int count, void *ptr
struct nlmsghdr *nlh;
int err;
- nlh = nlmsg_put(skb, NETLINK_CB(in_skb).pid, sp->nlmsg_seq,
+ nlh = nlmsg_put(skb, NETLINK_CB(in_skb).portid, sp->nlmsg_seq,
XFRM_MSG_NEWPOLICY, sizeof(*p), sp->nlmsg_flags);
if (nlh == NULL)
return -EMSGSIZE;
@@ -1546,6 +1571,7 @@ static struct sk_buff *xfrm_policy_netlink(struct sk_buff *in_skb,
{
struct xfrm_dump_info info;
struct sk_buff *skb;
+ int err;
skb = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
if (!skb)
@@ -1556,9 +1582,10 @@ static struct sk_buff *xfrm_policy_netlink(struct sk_buff *in_skb,
info.nlmsg_seq = seq;
info.nlmsg_flags = 0;
- if (dump_one_policy(xp, dir, 0, &info) < 0) {
+ err = dump_one_policy(xp, dir, 0, &info);
+ if (err) {
kfree_skb(skb);
- return NULL;
+ return ERR_PTR(err);
}
return skb;
@@ -1621,10 +1648,10 @@ static int xfrm_get_policy(struct sk_buff *skb, struct nlmsghdr *nlh,
err = PTR_ERR(resp_skb);
} else {
err = nlmsg_unicast(net->xfrm.nlsk, resp_skb,
- NETLINK_CB(skb).pid);
+ NETLINK_CB(skb).portid);
}
} else {
- uid_t loginuid = audit_get_loginuid(current);
+ kuid_t loginuid = audit_get_loginuid(current);
u32 sessionid = audit_get_sessionid(current);
u32 sid;
@@ -1638,7 +1665,7 @@ static int xfrm_get_policy(struct sk_buff *skb, struct nlmsghdr *nlh,
c.data.byid = p->index;
c.event = nlh->nlmsg_type;
c.seq = nlh->nlmsg_seq;
- c.pid = nlh->nlmsg_pid;
+ c.portid = nlh->nlmsg_pid;
km_policy_notify(xp, p->dir, &c);
}
@@ -1668,7 +1695,7 @@ static int xfrm_flush_sa(struct sk_buff *skb, struct nlmsghdr *nlh,
c.data.proto = p->proto;
c.event = nlh->nlmsg_type;
c.seq = nlh->nlmsg_seq;
- c.pid = nlh->nlmsg_pid;
+ c.portid = nlh->nlmsg_pid;
c.net = net;
km_state_notify(NULL, &c);
@@ -1695,7 +1722,7 @@ static int build_aevent(struct sk_buff *skb, struct xfrm_state *x, const struct
struct nlmsghdr *nlh;
int err;
- nlh = nlmsg_put(skb, c->pid, c->seq, XFRM_MSG_NEWAE, sizeof(*id), 0);
+ nlh = nlmsg_put(skb, c->portid, c->seq, XFRM_MSG_NEWAE, sizeof(*id), 0);
if (nlh == NULL)
return -EMSGSIZE;
@@ -1777,11 +1804,11 @@ static int xfrm_get_ae(struct sk_buff *skb, struct nlmsghdr *nlh,
spin_lock_bh(&x->lock);
c.data.aevent = p->flags;
c.seq = nlh->nlmsg_seq;
- c.pid = nlh->nlmsg_pid;
+ c.portid = nlh->nlmsg_pid;
if (build_aevent(r_skb, x, &c) < 0)
BUG();
- err = nlmsg_unicast(net->xfrm.nlsk, r_skb, NETLINK_CB(skb).pid);
+ err = nlmsg_unicast(net->xfrm.nlsk, r_skb, NETLINK_CB(skb).portid);
spin_unlock_bh(&x->lock);
xfrm_state_put(x);
return err;
@@ -1822,12 +1849,12 @@ static int xfrm_new_ae(struct sk_buff *skb, struct nlmsghdr *nlh,
goto out;
spin_lock_bh(&x->lock);
- xfrm_update_ae_params(x, attrs);
+ xfrm_update_ae_params(x, attrs, 1);
spin_unlock_bh(&x->lock);
c.event = nlh->nlmsg_type;
c.seq = nlh->nlmsg_seq;
- c.pid = nlh->nlmsg_pid;
+ c.portid = nlh->nlmsg_pid;
c.data.aevent = XFRM_AE_CU;
km_state_notify(x, &c);
err = 0;
@@ -1862,7 +1889,7 @@ static int xfrm_flush_policy(struct sk_buff *skb, struct nlmsghdr *nlh,
c.data.type = type;
c.event = nlh->nlmsg_type;
c.seq = nlh->nlmsg_seq;
- c.pid = nlh->nlmsg_pid;
+ c.portid = nlh->nlmsg_pid;
c.net = net;
km_policy_notify(NULL, 0, &c);
return 0;
@@ -1918,7 +1945,7 @@ static int xfrm_add_pol_expire(struct sk_buff *skb, struct nlmsghdr *nlh,
err = 0;
if (up->hard) {
- uid_t loginuid = audit_get_loginuid(current);
+ kuid_t loginuid = audit_get_loginuid(current);
u32 sessionid = audit_get_sessionid(current);
u32 sid;
@@ -1930,7 +1957,7 @@ static int xfrm_add_pol_expire(struct sk_buff *skb, struct nlmsghdr *nlh,
// reset the timers here?
WARN(1, "Dont know what to do with soft policy expire\n");
}
- km_policy_expired(xp, p->dir, up->hard, current->pid);
+ km_policy_expired(xp, p->dir, up->hard, nlh->nlmsg_pid);
out:
xfrm_pol_put(xp);
@@ -1958,10 +1985,10 @@ static int xfrm_add_sa_expire(struct sk_buff *skb, struct nlmsghdr *nlh,
err = -EINVAL;
if (x->km.state != XFRM_STATE_VALID)
goto out;
- km_state_expired(x, ue->hard, current->pid);
+ km_state_expired(x, ue->hard, nlh->nlmsg_pid);
if (ue->hard) {
- uid_t loginuid = audit_get_loginuid(current);
+ kuid_t loginuid = audit_get_loginuid(current);
u32 sessionid = audit_get_sessionid(current);
u32 sid;
@@ -2370,7 +2397,7 @@ static int build_expire(struct sk_buff *skb, struct xfrm_state *x, const struct
struct nlmsghdr *nlh;
int err;
- nlh = nlmsg_put(skb, c->pid, 0, XFRM_MSG_EXPIRE, sizeof(*ue), 0);
+ nlh = nlmsg_put(skb, c->portid, 0, XFRM_MSG_EXPIRE, sizeof(*ue), 0);
if (nlh == NULL)
return -EMSGSIZE;
@@ -2429,7 +2456,7 @@ static int xfrm_notify_sa_flush(const struct km_event *c)
if (skb == NULL)
return -ENOMEM;
- nlh = nlmsg_put(skb, c->pid, c->seq, XFRM_MSG_FLUSHSA, sizeof(*p), 0);
+ nlh = nlmsg_put(skb, c->portid, c->seq, XFRM_MSG_FLUSHSA, sizeof(*p), 0);
if (nlh == NULL) {
kfree_skb(skb);
return -EMSGSIZE;
@@ -2497,7 +2524,7 @@ static int xfrm_notify_sa(struct xfrm_state *x, const struct km_event *c)
if (skb == NULL)
return -ENOMEM;
- nlh = nlmsg_put(skb, c->pid, c->seq, c->event, headlen, 0);
+ nlh = nlmsg_put(skb, c->portid, c->seq, c->event, headlen, 0);
err = -EMSGSIZE;
if (nlh == NULL)
goto out_free_skb;
@@ -2567,8 +2594,7 @@ static inline size_t xfrm_acquire_msgsize(struct xfrm_state *x,
}
static int build_acquire(struct sk_buff *skb, struct xfrm_state *x,
- struct xfrm_tmpl *xt, struct xfrm_policy *xp,
- int dir)
+ struct xfrm_tmpl *xt, struct xfrm_policy *xp)
{
__u32 seq = xfrm_get_acqseq();
struct xfrm_user_acquire *ua;
@@ -2583,7 +2609,7 @@ static int build_acquire(struct sk_buff *skb, struct xfrm_state *x,
memcpy(&ua->id, &x->id, sizeof(ua->id));
memcpy(&ua->saddr, &x->props.saddr, sizeof(ua->saddr));
memcpy(&ua->sel, &x->sel, sizeof(ua->sel));
- copy_to_user_policy(xp, &ua->policy, dir);
+ copy_to_user_policy(xp, &ua->policy, XFRM_POLICY_OUT);
ua->aalgos = xt->aalgos;
ua->ealgos = xt->ealgos;
ua->calgos = xt->calgos;
@@ -2605,7 +2631,7 @@ static int build_acquire(struct sk_buff *skb, struct xfrm_state *x,
}
static int xfrm_send_acquire(struct xfrm_state *x, struct xfrm_tmpl *xt,
- struct xfrm_policy *xp, int dir)
+ struct xfrm_policy *xp)
{
struct net *net = xs_net(x);
struct sk_buff *skb;
@@ -2614,7 +2640,7 @@ static int xfrm_send_acquire(struct xfrm_state *x, struct xfrm_tmpl *xt,
if (skb == NULL)
return -ENOMEM;
- if (build_acquire(skb, x, xt, xp, dir) < 0)
+ if (build_acquire(skb, x, xt, xp) < 0)
BUG();
return nlmsg_multicast(net->xfrm.nlsk, skb, 0, XFRMNLGRP_ACQUIRE, GFP_ATOMIC);
@@ -2697,7 +2723,7 @@ static int build_polexpire(struct sk_buff *skb, struct xfrm_policy *xp,
struct nlmsghdr *nlh;
int err;
- nlh = nlmsg_put(skb, c->pid, 0, XFRM_MSG_POLEXPIRE, sizeof(*upe), 0);
+ nlh = nlmsg_put(skb, c->portid, 0, XFRM_MSG_POLEXPIRE, sizeof(*upe), 0);
if (nlh == NULL)
return -EMSGSIZE;
@@ -2757,7 +2783,7 @@ static int xfrm_notify_policy(struct xfrm_policy *xp, int dir, const struct km_e
if (skb == NULL)
return -ENOMEM;
- nlh = nlmsg_put(skb, c->pid, c->seq, c->event, headlen, 0);
+ nlh = nlmsg_put(skb, c->portid, c->seq, c->event, headlen, 0);
err = -EMSGSIZE;
if (nlh == NULL)
goto out_free_skb;
@@ -2811,7 +2837,7 @@ static int xfrm_notify_policy_flush(const struct km_event *c)
if (skb == NULL)
return -ENOMEM;
- nlh = nlmsg_put(skb, c->pid, c->seq, XFRM_MSG_FLUSHPOLICY, 0, 0);
+ nlh = nlmsg_put(skb, c->portid, c->seq, XFRM_MSG_FLUSHPOLICY, 0, 0);
err = -EMSGSIZE;
if (nlh == NULL)
goto out_free_skb;
@@ -2964,7 +2990,7 @@ static int __net_init xfrm_user_net_init(struct net *net)
.input = xfrm_netlink_rcv,
};
- nlsk = netlink_kernel_create(net, NETLINK_XFRM, THIS_MODULE, &cfg);
+ nlsk = netlink_kernel_create(net, NETLINK_XFRM, &cfg);
if (nlsk == NULL)
return -ENOMEM;
net->xfrm.nlsk_stash = nlsk; /* Don't set to NULL */