summaryrefslogtreecommitdiff
path: root/net
diff options
context:
space:
mode:
authorEric Paris <eparis@redhat.com>2013-11-22 18:57:08 -0500
committerEric Paris <eparis@redhat.com>2013-11-22 18:57:54 -0500
commitfc582aef7dcc27a7120cf232c1e76c569c7b6eab (patch)
tree7d275dd4ceab6067b91e9a25a5f6338b425fbccd /net
parent9175c9d2aed528800175ef81c90569d00d23f9be (diff)
parent5e01dc7b26d9f24f39abace5da98ccbd6a5ceb52 (diff)
Merge tag 'v3.12'
Linux 3.12 Conflicts: fs/exec.c
Diffstat (limited to 'net')
-rw-r--r--net/802/mrp.c27
-rw-r--r--net/8021q/vlan.c13
-rw-r--r--net/8021q/vlan_dev.c8
-rw-r--r--net/8021q/vlan_netlink.c2
-rw-r--r--net/9p/client.c14
-rw-r--r--net/9p/trans_rdma.c11
-rw-r--r--net/9p/trans_virtio.c5
-rw-r--r--net/Kconfig4
-rw-r--r--net/appletalk/atalk_proc.c2
-rw-r--r--net/batman-adv/bat_iv_ogm.c32
-rw-r--r--net/batman-adv/gateway_client.c27
-rw-r--r--net/batman-adv/gateway_client.h1
-rw-r--r--net/batman-adv/icmp_socket.c1
-rw-r--r--net/batman-adv/main.c63
-rw-r--r--net/batman-adv/main.h5
-rw-r--r--net/batman-adv/network-coding.c28
-rw-r--r--net/batman-adv/network-coding.h14
-rw-r--r--net/batman-adv/routing.c20
-rw-r--r--net/batman-adv/send.c1
-rw-r--r--net/batman-adv/soft-interface.c4
-rw-r--r--net/batman-adv/sysfs.c4
-rw-r--r--net/batman-adv/translation-table.c5
-rw-r--r--net/batman-adv/unicast.c2
-rw-r--r--net/batman-adv/vis.c2
-rw-r--r--net/bluetooth/hci_conn.c62
-rw-r--r--net/bluetooth/hci_core.c40
-rw-r--r--net/bluetooth/hci_event.c35
-rw-r--r--net/bluetooth/hci_sysfs.c2
-rw-r--r--net/bluetooth/hidp/core.c55
-rw-r--r--net/bluetooth/l2cap_core.c10
-rw-r--r--net/bluetooth/rfcomm/tty.c278
-rw-r--r--net/bluetooth/sco.c85
-rw-r--r--net/bridge/br_device.c12
-rw-r--r--net/bridge/br_fdb.c4
-rw-r--r--net/bridge/br_if.c6
-rw-r--r--net/bridge/br_mdb.c8
-rw-r--r--net/bridge/br_multicast.c55
-rw-r--r--net/bridge/br_netlink.c6
-rw-r--r--net/bridge/br_notify.c5
-rw-r--r--net/bridge/br_private.h35
-rw-r--r--net/bridge/br_stp.c23
-rw-r--r--net/bridge/br_stp_if.c12
-rw-r--r--net/bridge/br_vlan.c125
-rw-r--r--net/bridge/netfilter/ebtable_broute.c2
-rw-r--r--net/bridge/netfilter/ebtable_filter.c2
-rw-r--r--net/bridge/netfilter/ebtable_nat.c2
-rw-r--r--net/caif/cfctrl.c3
-rw-r--r--net/can/gw.c35
-rw-r--r--net/ceph/messenger.c4
-rw-r--r--net/ceph/osd_client.c38
-rw-r--r--net/ceph/osdmap.c2
-rw-r--r--net/compat.c2
-rw-r--r--net/core/datagram.c72
-rw-r--r--net/core/dev.c423
-rw-r--r--net/core/fib_rules.c25
-rw-r--r--net/core/filter.c8
-rw-r--r--net/core/flow_dissector.c12
-rw-r--r--net/core/iovec.c24
-rw-r--r--net/core/neighbour.c2
-rw-r--r--net/core/net-sysfs.c165
-rw-r--r--net/core/net_namespace.c2
-rw-r--r--net/core/netpoll.c11
-rw-r--r--net/core/netprio_cgroup.c72
-rw-r--r--net/core/pktgen.c61
-rw-r--r--net/core/rtnetlink.c29
-rw-r--r--net/core/scm.c4
-rw-r--r--net/core/secure_seq.c29
-rw-r--r--net/core/skbuff.c19
-rw-r--r--net/core/sock.c167
-rw-r--r--net/core/stream.c2
-rw-r--r--net/core/sysctl_net_core.c30
-rw-r--r--net/dccp/ipv6.c1
-rw-r--r--net/dccp/proto.c4
-rw-r--r--net/dsa/slave.c2
-rw-r--r--net/ieee802154/6lowpan.c291
-rw-r--r--net/ieee802154/6lowpan.h20
-rw-r--r--net/ieee802154/wpan-class.c23
-rw-r--r--net/ipv4/Kconfig16
-rw-r--r--net/ipv4/af_inet.c16
-rw-r--r--net/ipv4/arp.c2
-rw-r--r--net/ipv4/devinet.c17
-rw-r--r--net/ipv4/fib_rules.c25
-rw-r--r--net/ipv4/igmp.c88
-rw-r--r--net/ipv4/inet_hashtables.c2
-rw-r--r--net/ipv4/inetpeer.c4
-rw-r--r--net/ipv4/ip_gre.c4
-rw-r--r--net/ipv4/ip_input.c8
-rw-r--r--net/ipv4/ip_output.c21
-rw-r--r--net/ipv4/ip_tunnel.c87
-rw-r--r--net/ipv4/ip_tunnel_core.c12
-rw-r--r--net/ipv4/ip_vti.c542
-rw-r--r--net/ipv4/ipip.c3
-rw-r--r--net/ipv4/ipmr.c20
-rw-r--r--net/ipv4/netfilter/Kconfig13
-rw-r--r--net/ipv4/netfilter/Makefile1
-rw-r--r--net/ipv4/netfilter/arptable_filter.c2
-rw-r--r--net/ipv4/netfilter/ipt_MASQUERADE.c2
-rw-r--r--net/ipv4/netfilter/ipt_REJECT.c21
-rw-r--r--net/ipv4/netfilter/ipt_SYNPROXY.c480
-rw-r--r--net/ipv4/netfilter/iptable_filter.c2
-rw-r--r--net/ipv4/netfilter/iptable_mangle.c2
-rw-r--r--net/ipv4/netfilter/iptable_nat.c2
-rw-r--r--net/ipv4/netfilter/iptable_raw.c2
-rw-r--r--net/ipv4/netfilter/iptable_security.c2
-rw-r--r--net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c7
-rw-r--r--net/ipv4/ping.c2
-rw-r--r--net/ipv4/proc.c7
-rw-r--r--net/ipv4/raw.c8
-rw-r--r--net/ipv4/route.c26
-rw-r--r--net/ipv4/syncookies.c29
-rw-r--r--net/ipv4/sysctl_net_ipv4.c17
-rw-r--r--net/ipv4/tcp.c46
-rw-r--r--net/ipv4/tcp_fastopen.c13
-rw-r--r--net/ipv4/tcp_input.c213
-rw-r--r--net/ipv4/tcp_ipv4.c32
-rw-r--r--net/ipv4/tcp_memcontrol.c22
-rw-r--r--net/ipv4/tcp_metrics.c44
-rw-r--r--net/ipv4/tcp_minisocks.c8
-rw-r--r--net/ipv4/tcp_output.c36
-rw-r--r--net/ipv4/tcp_probe.c87
-rw-r--r--net/ipv4/udp.c20
-rw-r--r--net/ipv4/xfrm4_mode_tunnel.c2
-rw-r--r--net/ipv4/xfrm4_policy.c1
-rw-r--r--net/ipv6/addrconf.c244
-rw-r--r--net/ipv6/addrconf_core.c50
-rw-r--r--net/ipv6/addrlabel.c48
-rw-r--r--net/ipv6/af_inet6.c21
-rw-r--r--net/ipv6/ah6.c5
-rw-r--r--net/ipv6/datagram.c2
-rw-r--r--net/ipv6/esp6.c5
-rw-r--r--net/ipv6/exthdrs.c6
-rw-r--r--net/ipv6/fib6_rules.c37
-rw-r--r--net/ipv6/icmp.c12
-rw-r--r--net/ipv6/inet6_hashtables.c2
-rw-r--r--net/ipv6/ip6_fib.c18
-rw-r--r--net/ipv6/ip6_gre.c24
-rw-r--r--net/ipv6/ip6_input.c6
-rw-r--r--net/ipv6/ip6_offload.c4
-rw-r--r--net/ipv6/ip6_output.c103
-rw-r--r--net/ipv6/ip6_tunnel.c65
-rw-r--r--net/ipv6/ip6mr.c14
-rw-r--r--net/ipv6/ipcomp6.c5
-rw-r--r--net/ipv6/mcast.c295
-rw-r--r--net/ipv6/ndisc.c63
-rw-r--r--net/ipv6/netfilter/Kconfig13
-rw-r--r--net/ipv6/netfilter/Makefile3
-rw-r--r--net/ipv6/netfilter/ip6t_MASQUERADE.c2
-rw-r--r--net/ipv6/netfilter/ip6t_REJECT.c20
-rw-r--r--net/ipv6/netfilter/ip6t_SYNPROXY.c503
-rw-r--r--net/ipv6/netfilter/ip6table_filter.c2
-rw-r--r--net/ipv6/netfilter/ip6table_mangle.c2
-rw-r--r--net/ipv6/netfilter/ip6table_nat.c2
-rw-r--r--net/ipv6/netfilter/ip6table_raw.c2
-rw-r--r--net/ipv6/netfilter/ip6table_security.c2
-rw-r--r--net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c7
-rw-r--r--net/ipv6/netfilter/nf_nat_proto_icmpv6.c4
-rw-r--r--net/ipv6/output_core.c48
-rw-r--r--net/ipv6/proc.c4
-rw-r--r--net/ipv6/raw.c13
-rw-r--r--net/ipv6/route.c151
-rw-r--r--net/ipv6/sit.c101
-rw-r--r--net/ipv6/syncookies.c25
-rw-r--r--net/ipv6/tcp_ipv6.c15
-rw-r--r--net/ipv6/udp.c9
-rw-r--r--net/ipv6/udp_offload.c105
-rw-r--r--net/ipv6/xfrm6_policy.c1
-rw-r--r--net/ipx/ipx_proc.c2
-rw-r--r--net/irda/irttp.c50
-rw-r--r--net/key/af_key.c17
-rw-r--r--net/l2tp/l2tp_core.c36
-rw-r--r--net/l2tp/l2tp_core.h3
-rw-r--r--net/l2tp/l2tp_ppp.c4
-rw-r--r--net/lapb/lapb_timer.c1
-rw-r--r--net/llc/af_llc.c6
-rw-r--r--net/llc/llc_conn.c6
-rw-r--r--net/llc/llc_proc.c2
-rw-r--r--net/llc/llc_sap.c4
-rw-r--r--net/mac80211/cfg.c249
-rw-r--r--net/mac80211/chan.c58
-rw-r--r--net/mac80211/debugfs_sta.c9
-rw-r--r--net/mac80211/driver-ops.h13
-rw-r--r--net/mac80211/ht.c53
-rw-r--r--net/mac80211/ibss.c356
-rw-r--r--net/mac80211/ieee80211_i.h73
-rw-r--r--net/mac80211/iface.c30
-rw-r--r--net/mac80211/key.c154
-rw-r--r--net/mac80211/led.c19
-rw-r--r--net/mac80211/led.h4
-rw-r--r--net/mac80211/main.c18
-rw-r--r--net/mac80211/mesh.c10
-rw-r--r--net/mac80211/mesh_plink.c2
-rw-r--r--net/mac80211/mlme.c118
-rw-r--r--net/mac80211/offchannel.c2
-rw-r--r--net/mac80211/rate.c69
-rw-r--r--net/mac80211/rate.h22
-rw-r--r--net/mac80211/rc80211_minstrel.c33
-rw-r--r--net/mac80211/rc80211_minstrel_ht.c17
-rw-r--r--net/mac80211/rc80211_pid_algo.c1
-rw-r--r--net/mac80211/rx.c507
-rw-r--r--net/mac80211/scan.c91
-rw-r--r--net/mac80211/status.c93
-rw-r--r--net/mac80211/trace.h26
-rw-r--r--net/mac80211/tx.c125
-rw-r--r--net/mac80211/util.c219
-rw-r--r--net/netfilter/Kconfig26
-rw-r--r--net/netfilter/Makefile6
-rw-r--r--net/netfilter/core.c7
-rw-r--r--net/netfilter/ipset/ip_set_core.c5
-rw-r--r--net/netfilter/ipset/ip_set_getport.c4
-rw-r--r--net/netfilter/ipset/ip_set_hash_gen.h28
-rw-r--r--net/netfilter/ipset/ip_set_hash_ipportnet.c4
-rw-r--r--net/netfilter/ipset/ip_set_hash_net.c4
-rw-r--r--net/netfilter/ipset/ip_set_hash_netiface.c4
-rw-r--r--net/netfilter/ipset/ip_set_hash_netport.c4
-rw-r--r--net/netfilter/ipvs/ip_vs_core.c12
-rw-r--r--net/netfilter/ipvs/ip_vs_ctl.c86
-rw-r--r--net/netfilter/ipvs/ip_vs_est.c4
-rw-r--r--net/netfilter/ipvs/ip_vs_lblc.c72
-rw-r--r--net/netfilter/ipvs/ip_vs_lblcr.c70
-rw-r--r--net/netfilter/ipvs/ip_vs_nq.c8
-rw-r--r--net/netfilter/ipvs/ip_vs_proto_sctp.c23
-rw-r--r--net/netfilter/ipvs/ip_vs_sed.c8
-rw-r--r--net/netfilter/ipvs/ip_vs_sh.c6
-rw-r--r--net/netfilter/ipvs/ip_vs_wlc.c6
-rw-r--r--net/netfilter/ipvs/ip_vs_xmit.c2
-rw-r--r--net/netfilter/nf_conntrack_core.c89
-rw-r--r--net/netfilter/nf_conntrack_h323_main.c4
-rw-r--r--net/netfilter/nf_conntrack_labels.c4
-rw-r--r--net/netfilter/nf_conntrack_netlink.c384
-rw-r--r--net/netfilter/nf_conntrack_proto.c4
-rw-r--r--net/netfilter/nf_conntrack_proto_tcp.c36
-rw-r--r--net/netfilter/nf_conntrack_seqadj.c238
-rw-r--r--net/netfilter/nf_nat_core.c22
-rw-r--r--net/netfilter/nf_nat_helper.c230
-rw-r--r--net/netfilter/nf_nat_proto_sctp.c8
-rw-r--r--net/netfilter/nf_nat_sip.c3
-rw-r--r--net/netfilter/nf_synproxy_core.c434
-rw-r--r--net/netfilter/nf_tproxy_core.c62
-rw-r--r--net/netfilter/nfnetlink_queue_core.c13
-rw-r--r--net/netfilter/nfnetlink_queue_ct.c23
-rw-r--r--net/netfilter/xt_TCPMSS.c2
-rw-r--r--net/netfilter/xt_TPROXY.c169
-rw-r--r--net/netfilter/xt_addrtype.c2
-rw-r--r--net/netfilter/xt_socket.c66
-rw-r--r--net/netlink/af_netlink.c131
-rw-r--r--net/netlink/af_netlink.h3
-rw-r--r--net/nfc/core.c22
-rw-r--r--net/nfc/hci/core.c2
-rw-r--r--net/nfc/netlink.c95
-rw-r--r--net/nfc/nfc.h5
-rw-r--r--net/openvswitch/Kconfig14
-rw-r--r--net/openvswitch/Makefile9
-rw-r--r--net/openvswitch/actions.c45
-rw-r--r--net/openvswitch/datapath.c176
-rw-r--r--net/openvswitch/datapath.h6
-rw-r--r--net/openvswitch/flow.c1487
-rw-r--r--net/openvswitch/flow.h89
-rw-r--r--net/openvswitch/vport-gre.c7
-rw-r--r--net/openvswitch/vport-netdev.c20
-rw-r--r--net/openvswitch/vport-vxlan.c204
-rw-r--r--net/openvswitch/vport.c6
-rw-r--r--net/openvswitch/vport.h1
-rw-r--r--net/packet/af_packet.c65
-rw-r--r--net/phonet/socket.c2
-rw-r--r--net/rfkill/core.c90
-rw-r--r--net/rfkill/rfkill-regulator.c8
-rw-r--r--net/sched/Kconfig14
-rw-r--r--net/sched/Makefile1
-rw-r--r--net/sched/cls_cgroup.c39
-rw-r--r--net/sched/sch_api.c53
-rw-r--r--net/sched/sch_choke.c3
-rw-r--r--net/sched/sch_fq.c815
-rw-r--r--net/sched/sch_generic.c20
-rw-r--r--net/sched/sch_htb.c2
-rw-r--r--net/sched/sch_mq.c2
-rw-r--r--net/sched/sch_mqprio.c2
-rw-r--r--net/sched/sch_netem.c22
-rw-r--r--net/sctp/associola.c8
-rw-r--r--net/sctp/auth.c8
-rw-r--r--net/sctp/bind_addr.c8
-rw-r--r--net/sctp/chunk.c12
-rw-r--r--net/sctp/command.c8
-rw-r--r--net/sctp/debug.c8
-rw-r--r--net/sctp/endpointola.c8
-rw-r--r--net/sctp/input.c21
-rw-r--r--net/sctp/inqueue.c8
-rw-r--r--net/sctp/ipv6.c54
-rw-r--r--net/sctp/objcnt.c8
-rw-r--r--net/sctp/output.c11
-rw-r--r--net/sctp/outqueue.c8
-rw-r--r--net/sctp/primitive.c8
-rw-r--r--net/sctp/probe.c27
-rw-r--r--net/sctp/proc.c12
-rw-r--r--net/sctp/protocol.c10
-rw-r--r--net/sctp/sm_make_chunk.c133
-rw-r--r--net/sctp/sm_sideeffect.c8
-rw-r--r--net/sctp/sm_statefuns.c8
-rw-r--r--net/sctp/sm_statetable.c8
-rw-r--r--net/sctp/socket.c13
-rw-r--r--net/sctp/ssnmap.c8
-rw-r--r--net/sctp/sysctl.c8
-rw-r--r--net/sctp/transport.c8
-rw-r--r--net/sctp/tsnmap.c8
-rw-r--r--net/sctp/ulpevent.c8
-rw-r--r--net/sctp/ulpqueue.c8
-rw-r--r--net/socket.c89
-rw-r--r--net/sunrpc/auth.c68
-rw-r--r--net/sunrpc/auth_generic.c82
-rw-r--r--net/sunrpc/auth_gss/auth_gss.c453
-rw-r--r--net/sunrpc/auth_gss/gss_rpc_upcall.c26
-rw-r--r--net/sunrpc/auth_gss/gss_rpc_xdr.c41
-rw-r--r--net/sunrpc/auth_gss/gss_rpc_xdr.h5
-rw-r--r--net/sunrpc/auth_null.c6
-rw-r--r--net/sunrpc/auth_unix.c6
-rw-r--r--net/sunrpc/clnt.c157
-rw-r--r--net/sunrpc/rpc_pipe.c193
-rw-r--r--net/sunrpc/sched.c2
-rw-r--r--net/sunrpc/stats.c2
-rw-r--r--net/sunrpc/svcsock.c2
-rw-r--r--net/sunrpc/xprtsock.c15
-rw-r--r--net/sysctl_net.c4
-rw-r--r--net/unix/af_unix.c80
-rw-r--r--net/unix/diag.c1
-rw-r--r--net/vmw_vsock/af_vsock.c3
-rw-r--r--net/vmw_vsock/af_vsock.h175
-rw-r--r--net/vmw_vsock/vmci_transport.c2
-rw-r--r--net/vmw_vsock/vmci_transport.h4
-rw-r--r--net/vmw_vsock/vsock_addr.c3
-rw-r--r--net/vmw_vsock/vsock_addr.h30
-rw-r--r--net/wireless/core.c32
-rw-r--r--net/wireless/core.h5
-rw-r--r--net/wireless/ibss.c3
-rw-r--r--net/wireless/mesh.c5
-rw-r--r--net/wireless/mlme.c4
-rw-r--r--net/wireless/nl80211.c564
-rw-r--r--net/wireless/nl80211.h4
-rw-r--r--net/wireless/radiotap.c7
-rw-r--r--net/wireless/rdev-ops.h17
-rw-r--r--net/wireless/scan.c35
-rw-r--r--net/wireless/sysfs.c25
-rw-r--r--net/wireless/trace.h53
-rw-r--r--net/wireless/util.c14
-rw-r--r--net/x25/x25_facilities.c4
-rw-r--r--net/xfrm/xfrm_policy.c40
-rw-r--r--net/xfrm/xfrm_replay.c54
-rw-r--r--net/xfrm/xfrm_state.c15
-rw-r--r--net/xfrm/xfrm_user.c5
347 files changed, 12331 insertions, 5413 deletions
diff --git a/net/802/mrp.c b/net/802/mrp.c
index 1eb05d80b07b..3ed616215870 100644
--- a/net/802/mrp.c
+++ b/net/802/mrp.c
@@ -24,6 +24,11 @@
static unsigned int mrp_join_time __read_mostly = 200;
module_param(mrp_join_time, uint, 0644);
MODULE_PARM_DESC(mrp_join_time, "Join time in ms (default 200ms)");
+
+static unsigned int mrp_periodic_time __read_mostly = 1000;
+module_param(mrp_periodic_time, uint, 0644);
+MODULE_PARM_DESC(mrp_periodic_time, "Periodic time in ms (default 1s)");
+
MODULE_LICENSE("GPL");
static const u8
@@ -595,6 +600,24 @@ static void mrp_join_timer(unsigned long data)
mrp_join_timer_arm(app);
}
+static void mrp_periodic_timer_arm(struct mrp_applicant *app)
+{
+ mod_timer(&app->periodic_timer,
+ jiffies + msecs_to_jiffies(mrp_periodic_time));
+}
+
+static void mrp_periodic_timer(unsigned long data)
+{
+ struct mrp_applicant *app = (struct mrp_applicant *)data;
+
+ spin_lock(&app->lock);
+ mrp_mad_event(app, MRP_EVENT_PERIODIC);
+ mrp_pdu_queue(app);
+ spin_unlock(&app->lock);
+
+ mrp_periodic_timer_arm(app);
+}
+
static int mrp_pdu_parse_end_mark(struct sk_buff *skb, int *offset)
{
__be16 endmark;
@@ -845,6 +868,9 @@ int mrp_init_applicant(struct net_device *dev, struct mrp_application *appl)
rcu_assign_pointer(dev->mrp_port->applicants[appl->type], app);
setup_timer(&app->join_timer, mrp_join_timer, (unsigned long)app);
mrp_join_timer_arm(app);
+ setup_timer(&app->periodic_timer, mrp_periodic_timer,
+ (unsigned long)app);
+ mrp_periodic_timer_arm(app);
return 0;
err3:
@@ -870,6 +896,7 @@ void mrp_uninit_applicant(struct net_device *dev, struct mrp_application *appl)
* all pending messages before the applicant is gone.
*/
del_timer_sync(&app->join_timer);
+ del_timer_sync(&app->periodic_timer);
spin_lock_bh(&app->lock);
mrp_mad_event(app, MRP_EVENT_TX);
diff --git a/net/8021q/vlan.c b/net/8021q/vlan.c
index 2fb2d88e8c2e..61fc573f1142 100644
--- a/net/8021q/vlan.c
+++ b/net/8021q/vlan.c
@@ -210,6 +210,7 @@ out_vid_del:
static int register_vlan_device(struct net_device *real_dev, u16 vlan_id)
{
struct net_device *new_dev;
+ struct vlan_dev_priv *vlan;
struct net *net = dev_net(real_dev);
struct vlan_net *vn = net_generic(net, vlan_net_id);
char name[IFNAMSIZ];
@@ -260,11 +261,12 @@ static int register_vlan_device(struct net_device *real_dev, u16 vlan_id)
new_dev->mtu = real_dev->mtu;
new_dev->priv_flags |= (real_dev->priv_flags & IFF_UNICAST_FLT);
- vlan_dev_priv(new_dev)->vlan_proto = htons(ETH_P_8021Q);
- vlan_dev_priv(new_dev)->vlan_id = vlan_id;
- vlan_dev_priv(new_dev)->real_dev = real_dev;
- vlan_dev_priv(new_dev)->dent = NULL;
- vlan_dev_priv(new_dev)->flags = VLAN_FLAG_REORDER_HDR;
+ vlan = vlan_dev_priv(new_dev);
+ vlan->vlan_proto = htons(ETH_P_8021Q);
+ vlan->vlan_id = vlan_id;
+ vlan->real_dev = real_dev;
+ vlan->dent = NULL;
+ vlan->flags = VLAN_FLAG_REORDER_HDR;
new_dev->rtnl_link_ops = &vlan_link_ops;
err = register_vlan_dev(new_dev);
@@ -459,6 +461,7 @@ static int vlan_device_event(struct notifier_block *unused, unsigned long event,
case NETDEV_NOTIFY_PEERS:
case NETDEV_BONDING_FAILOVER:
+ case NETDEV_RESEND_IGMP:
/* Propagate to vlan devices */
vlan_group_for_each_dev(grp, i, vlandev)
call_netdevice_notifiers(event, vlandev);
diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c
index 1cd3d2a406f5..09bf1c38805b 100644
--- a/net/8021q/vlan_dev.c
+++ b/net/8021q/vlan_dev.c
@@ -107,10 +107,10 @@ static int vlan_dev_hard_header(struct sk_buff *skb, struct net_device *dev,
u16 vlan_tci = 0;
int rc;
- if (!(vlan_dev_priv(dev)->flags & VLAN_FLAG_REORDER_HDR)) {
+ if (!(vlan->flags & VLAN_FLAG_REORDER_HDR)) {
vhdr = (struct vlan_hdr *) skb_push(skb, VLAN_HLEN);
- vlan_tci = vlan_dev_priv(dev)->vlan_id;
+ vlan_tci = vlan->vlan_id;
vlan_tci |= vlan_dev_get_egress_qos_mask(dev, skb);
vhdr->h_vlan_TCI = htons(vlan_tci);
@@ -133,7 +133,7 @@ static int vlan_dev_hard_header(struct sk_buff *skb, struct net_device *dev,
saddr = dev->dev_addr;
/* Now make the underlying real hard header */
- dev = vlan_dev_priv(dev)->real_dev;
+ dev = vlan->real_dev;
rc = dev_hard_header(skb, dev, type, daddr, saddr, len + vhdrlen);
if (rc > 0)
rc += vhdrlen;
@@ -582,7 +582,7 @@ static int vlan_dev_init(struct net_device *dev)
dev->dev_id = real_dev->dev_id;
if (is_zero_ether_addr(dev->dev_addr))
- memcpy(dev->dev_addr, real_dev->dev_addr, dev->addr_len);
+ eth_hw_addr_inherit(dev, real_dev);
if (is_zero_ether_addr(dev->broadcast))
memcpy(dev->broadcast, real_dev->broadcast, dev->addr_len);
diff --git a/net/8021q/vlan_netlink.c b/net/8021q/vlan_netlink.c
index 309129732285..c7e634af8516 100644
--- a/net/8021q/vlan_netlink.c
+++ b/net/8021q/vlan_netlink.c
@@ -171,7 +171,7 @@ static size_t vlan_get_size(const struct net_device *dev)
return nla_total_size(2) + /* IFLA_VLAN_PROTOCOL */
nla_total_size(2) + /* IFLA_VLAN_ID */
- sizeof(struct ifla_vlan_flags) + /* IFLA_VLAN_FLAGS */
+ nla_total_size(sizeof(struct ifla_vlan_flags)) + /* IFLA_VLAN_FLAGS */
vlan_qos_map_size(vlan->nr_ingress_mappings) +
vlan_qos_map_size(vlan->nr_egress_mappings);
}
diff --git a/net/9p/client.c b/net/9p/client.c
index 8b93cae2d11d..ee8fd6bd4035 100644
--- a/net/9p/client.c
+++ b/net/9p/client.c
@@ -658,17 +658,12 @@ static int p9_client_flush(struct p9_client *c, struct p9_req_t *oldreq)
/*
* if we haven't received a response for oldreq,
- * remove it from the list, and notify the transport
- * layer that the reply will never arrive.
+ * remove it from the list
*/
- spin_lock(&c->lock);
if (oldreq->status == REQ_STATUS_FLSH) {
+ spin_lock(&c->lock);
list_del(&oldreq->req_list);
spin_unlock(&c->lock);
- if (c->trans_mod->cancelled)
- c->trans_mod->cancelled(c, req);
- } else {
- spin_unlock(&c->lock);
}
p9_free_req(c, req);
@@ -992,6 +987,7 @@ struct p9_client *p9_client_create(const char *dev_name, char *options)
{
int err;
struct p9_client *clnt;
+ char *client_id;
err = 0;
clnt = kmalloc(sizeof(struct p9_client), GFP_KERNEL);
@@ -1000,6 +996,10 @@ struct p9_client *p9_client_create(const char *dev_name, char *options)
clnt->trans_mod = NULL;
clnt->trans = NULL;
+
+ client_id = utsname()->nodename;
+ memcpy(clnt->name, client_id, strlen(client_id) + 1);
+
spin_lock_init(&clnt->lock);
INIT_LIST_HEAD(&clnt->fidlist);
diff --git a/net/9p/trans_rdma.c b/net/9p/trans_rdma.c
index 928f2bb9bf8d..8f68df5d2973 100644
--- a/net/9p/trans_rdma.c
+++ b/net/9p/trans_rdma.c
@@ -588,17 +588,6 @@ static int rdma_cancel(struct p9_client *client, struct p9_req_t *req)
return 1;
}
-/* A request has been fully flushed without a reply.
- * That means we have posted one buffer in excess.
- */
-static int rdma_cancelled(struct p9_client *client, struct p9_req_t *req)
-{
- struct p9_trans_rdma *rdma = client->trans;
-
- atomic_inc(&rdma->excess_rc);
- return 0;
-}
-
/**
* trans_create_rdma - Transport method for creating atransport instance
* @client: client instance
diff --git a/net/9p/trans_virtio.c b/net/9p/trans_virtio.c
index e1c26b101830..990afab2be1b 100644
--- a/net/9p/trans_virtio.c
+++ b/net/9p/trans_virtio.c
@@ -577,6 +577,10 @@ static int p9_virtio_probe(struct virtio_device *vdev)
mutex_lock(&virtio_9p_lock);
list_add_tail(&chan->chan_list, &virtio_chan_list);
mutex_unlock(&virtio_9p_lock);
+
+ /* Let udev rules use the new mount_tag attribute. */
+ kobject_uevent(&(vdev->dev.kobj), KOBJ_CHANGE);
+
return 0;
out_free_tag:
@@ -654,6 +658,7 @@ static void p9_virtio_remove(struct virtio_device *vdev)
list_del(&chan->chan_list);
mutex_unlock(&virtio_9p_lock);
sysfs_remove_file(&(vdev->dev.kobj), &dev_attr_mount_tag.attr);
+ kobject_uevent(&(vdev->dev.kobj), KOBJ_CHANGE);
kfree(chan->tag);
kfree(chan->vc_wq);
kfree(chan);
diff --git a/net/Kconfig b/net/Kconfig
index 2b406608a1a4..b50dacc072f0 100644
--- a/net/Kconfig
+++ b/net/Kconfig
@@ -228,7 +228,7 @@ config RPS
config RFS_ACCEL
boolean
- depends on RPS && GENERIC_HARDIRQS
+ depends on RPS
select CPU_RMAP
default y
@@ -281,7 +281,7 @@ menu "Network testing"
config NET_PKTGEN
tristate "Packet Generator (USE WITH CAUTION)"
- depends on PROC_FS
+ depends on INET && PROC_FS
---help---
This module will inject preconfigured packets, at a configurable
rate, out of a given interface. It is used for network interface
diff --git a/net/appletalk/atalk_proc.c b/net/appletalk/atalk_proc.c
index c30f3a0717fb..af46bc49e1e9 100644
--- a/net/appletalk/atalk_proc.c
+++ b/net/appletalk/atalk_proc.c
@@ -178,7 +178,7 @@ static int atalk_seq_socket_show(struct seq_file *seq, void *v)
at = at_sk(s);
seq_printf(seq, "%02X %04X:%02X:%02X %04X:%02X:%02X %08X:%08X "
- "%02X %d\n",
+ "%02X %u\n",
s->sk_type, ntohs(at->src_net), at->src_node, at->src_port,
ntohs(at->dest_net), at->dest_node, at->dest_port,
sk_wmem_alloc_get(s),
diff --git a/net/batman-adv/bat_iv_ogm.c b/net/batman-adv/bat_iv_ogm.c
index 62da5278014a..0a8a80cd4bf1 100644
--- a/net/batman-adv/bat_iv_ogm.c
+++ b/net/batman-adv/bat_iv_ogm.c
@@ -28,6 +28,22 @@
#include "bat_algo.h"
#include "network-coding.h"
+
+/**
+ * batadv_dup_status - duplicate status
+ * @BATADV_NO_DUP: the packet is a duplicate
+ * @BATADV_ORIG_DUP: OGM is a duplicate in the originator (but not for the
+ * neighbor)
+ * @BATADV_NEIGH_DUP: OGM is a duplicate for the neighbor
+ * @BATADV_PROTECTED: originator is currently protected (after reboot)
+ */
+enum batadv_dup_status {
+ BATADV_NO_DUP = 0,
+ BATADV_ORIG_DUP,
+ BATADV_NEIGH_DUP,
+ BATADV_PROTECTED,
+};
+
/**
* batadv_ring_buffer_set - update the ring buffer with the given value
* @lq_recv: pointer to the ring buffer
@@ -71,21 +87,6 @@ static uint8_t batadv_ring_buffer_avg(const uint8_t lq_recv[])
return (uint8_t)(sum / count);
}
-/*
- * batadv_dup_status - duplicate status
- * @BATADV_NO_DUP: the packet is a duplicate
- * @BATADV_ORIG_DUP: OGM is a duplicate in the originator (but not for the
- * neighbor)
- * @BATADV_NEIGH_DUP: OGM is a duplicate for the neighbor
- * @BATADV_PROTECTED: originator is currently protected (after reboot)
- */
-enum batadv_dup_status {
- BATADV_NO_DUP = 0,
- BATADV_ORIG_DUP,
- BATADV_NEIGH_DUP,
- BATADV_PROTECTED,
-};
-
static struct batadv_neigh_node *
batadv_iv_ogm_neigh_new(struct batadv_hard_iface *hard_iface,
const uint8_t *neigh_addr,
@@ -478,6 +479,7 @@ static void batadv_iv_ogm_aggregate_new(const unsigned char *packet_buff,
kfree(forw_packet_aggr);
goto out;
}
+ forw_packet_aggr->skb->priority = TC_PRIO_CONTROL;
skb_reserve(forw_packet_aggr->skb, ETH_HLEN);
skb_buff = skb_put(forw_packet_aggr->skb, packet_len);
diff --git a/net/batman-adv/gateway_client.c b/net/batman-adv/gateway_client.c
index 7614af31daff..1ce4b8763ef2 100644
--- a/net/batman-adv/gateway_client.c
+++ b/net/batman-adv/gateway_client.c
@@ -190,6 +190,33 @@ next:
return curr_gw;
}
+/**
+ * batadv_gw_check_client_stop - check if client mode has been switched off
+ * @bat_priv: the bat priv with all the soft interface information
+ *
+ * This function assumes the caller has checked that the gw state *is actually
+ * changing*. This function is not supposed to be called when there is no state
+ * change.
+ */
+void batadv_gw_check_client_stop(struct batadv_priv *bat_priv)
+{
+ struct batadv_gw_node *curr_gw;
+
+ if (atomic_read(&bat_priv->gw_mode) != BATADV_GW_MODE_CLIENT)
+ return;
+
+ curr_gw = batadv_gw_get_selected_gw_node(bat_priv);
+ if (!curr_gw)
+ return;
+
+ /* if batman-adv is switching the gw client mode off and a gateway was
+ * already selected, send a DEL uevent
+ */
+ batadv_throw_uevent(bat_priv, BATADV_UEV_GW, BATADV_UEV_DEL, NULL);
+
+ batadv_gw_node_free_ref(curr_gw);
+}
+
void batadv_gw_election(struct batadv_priv *bat_priv)
{
struct batadv_gw_node *curr_gw = NULL, *next_gw = NULL;
diff --git a/net/batman-adv/gateway_client.h b/net/batman-adv/gateway_client.h
index 1037d75da51f..ceef4ebe8bcd 100644
--- a/net/batman-adv/gateway_client.h
+++ b/net/batman-adv/gateway_client.h
@@ -20,6 +20,7 @@
#ifndef _NET_BATMAN_ADV_GATEWAY_CLIENT_H_
#define _NET_BATMAN_ADV_GATEWAY_CLIENT_H_
+void batadv_gw_check_client_stop(struct batadv_priv *bat_priv);
void batadv_gw_deselect(struct batadv_priv *bat_priv);
void batadv_gw_election(struct batadv_priv *bat_priv);
struct batadv_orig_node *
diff --git a/net/batman-adv/icmp_socket.c b/net/batman-adv/icmp_socket.c
index b27508b8085c..5a99bb4b6b82 100644
--- a/net/batman-adv/icmp_socket.c
+++ b/net/batman-adv/icmp_socket.c
@@ -183,6 +183,7 @@ static ssize_t batadv_socket_write(struct file *file, const char __user *buff,
goto out;
}
+ skb->priority = TC_PRIO_CONTROL;
skb_reserve(skb, ETH_HLEN);
icmp_packet = (struct batadv_icmp_packet_rr *)skb_put(skb, packet_len);
diff --git a/net/batman-adv/main.c b/net/batman-adv/main.c
index 08125f3f6064..1356af660b5b 100644
--- a/net/batman-adv/main.c
+++ b/net/batman-adv/main.c
@@ -19,6 +19,10 @@
#include <linux/crc32c.h>
#include <linux/highmem.h>
+#include <linux/if_vlan.h>
+#include <net/ip.h>
+#include <net/ipv6.h>
+#include <net/dsfield.h>
#include "main.h"
#include "sysfs.h"
#include "debugfs.h"
@@ -61,6 +65,7 @@ static int __init batadv_init(void)
batadv_recv_handler_init();
batadv_iv_init();
+ batadv_nc_init();
batadv_event_workqueue = create_singlethread_workqueue("bat_events");
@@ -138,7 +143,7 @@ int batadv_mesh_init(struct net_device *soft_iface)
if (ret < 0)
goto err;
- ret = batadv_nc_init(bat_priv);
+ ret = batadv_nc_mesh_init(bat_priv);
if (ret < 0)
goto err;
@@ -163,7 +168,7 @@ void batadv_mesh_free(struct net_device *soft_iface)
batadv_vis_quit(bat_priv);
batadv_gw_node_purge(bat_priv);
- batadv_nc_free(bat_priv);
+ batadv_nc_mesh_free(bat_priv);
batadv_dat_free(bat_priv);
batadv_bla_free(bat_priv);
@@ -249,6 +254,60 @@ out:
return primary_if;
}
+/**
+ * batadv_skb_set_priority - sets skb priority according to packet content
+ * @skb: the packet to be sent
+ * @offset: offset to the packet content
+ *
+ * This function sets a value between 256 and 263 (802.1d priority), which
+ * can be interpreted by the cfg80211 or other drivers.
+ */
+void batadv_skb_set_priority(struct sk_buff *skb, int offset)
+{
+ struct iphdr ip_hdr_tmp, *ip_hdr;
+ struct ipv6hdr ip6_hdr_tmp, *ip6_hdr;
+ struct ethhdr ethhdr_tmp, *ethhdr;
+ struct vlan_ethhdr *vhdr, vhdr_tmp;
+ u32 prio;
+
+ /* already set, do nothing */
+ if (skb->priority >= 256 && skb->priority <= 263)
+ return;
+
+ ethhdr = skb_header_pointer(skb, offset, sizeof(*ethhdr), &ethhdr_tmp);
+ if (!ethhdr)
+ return;
+
+ switch (ethhdr->h_proto) {
+ case htons(ETH_P_8021Q):
+ vhdr = skb_header_pointer(skb, offset + sizeof(*vhdr),
+ sizeof(*vhdr), &vhdr_tmp);
+ if (!vhdr)
+ return;
+ prio = ntohs(vhdr->h_vlan_TCI) & VLAN_PRIO_MASK;
+ prio = prio >> VLAN_PRIO_SHIFT;
+ break;
+ case htons(ETH_P_IP):
+ ip_hdr = skb_header_pointer(skb, offset + sizeof(*ethhdr),
+ sizeof(*ip_hdr), &ip_hdr_tmp);
+ if (!ip_hdr)
+ return;
+ prio = (ipv4_get_dsfield(ip_hdr) & 0xfc) >> 5;
+ break;
+ case htons(ETH_P_IPV6):
+ ip6_hdr = skb_header_pointer(skb, offset + sizeof(*ethhdr),
+ sizeof(*ip6_hdr), &ip6_hdr_tmp);
+ if (!ip6_hdr)
+ return;
+ prio = (ipv6_get_dsfield(ip6_hdr) & 0xfc) >> 5;
+ break;
+ default:
+ return;
+ }
+
+ skb->priority = prio + 256;
+}
+
static int batadv_recv_unhandled_packet(struct sk_buff *skb,
struct batadv_hard_iface *recv_if)
{
diff --git a/net/batman-adv/main.h b/net/batman-adv/main.h
index 5e9aebb7d56b..24675523930f 100644
--- a/net/batman-adv/main.h
+++ b/net/batman-adv/main.h
@@ -26,7 +26,7 @@
#define BATADV_DRIVER_DEVICE "batman-adv"
#ifndef BATADV_SOURCE_VERSION
-#define BATADV_SOURCE_VERSION "2013.3.0"
+#define BATADV_SOURCE_VERSION "2013.4.0"
#endif
/* B.A.T.M.A.N. parameters */
@@ -184,6 +184,7 @@ void batadv_mesh_free(struct net_device *soft_iface);
int batadv_is_my_mac(struct batadv_priv *bat_priv, const uint8_t *addr);
struct batadv_hard_iface *
batadv_seq_print_text_primary_if_get(struct seq_file *seq);
+void batadv_skb_set_priority(struct sk_buff *skb, int offset);
int batadv_batman_skb_recv(struct sk_buff *skb, struct net_device *dev,
struct packet_type *ptype,
struct net_device *orig_dev);
@@ -253,7 +254,7 @@ static inline void batadv_dbg(int type __always_unused,
/* returns 1 if they are the same ethernet addr
*
- * note: can't use compare_ether_addr() as it requires aligned memory
+ * note: can't use ether_addr_equal() as it requires aligned memory
*/
static inline int batadv_compare_eth(const void *data1, const void *data2)
{
diff --git a/net/batman-adv/network-coding.c b/net/batman-adv/network-coding.c
index a487d46e0aec..4ecc0b6bf8ab 100644
--- a/net/batman-adv/network-coding.c
+++ b/net/batman-adv/network-coding.c
@@ -35,6 +35,20 @@ static int batadv_nc_recv_coded_packet(struct sk_buff *skb,
struct batadv_hard_iface *recv_if);
/**
+ * batadv_nc_init - one-time initialization for network coding
+ */
+int __init batadv_nc_init(void)
+{
+ int ret;
+
+ /* Register our packet type */
+ ret = batadv_recv_handler_register(BATADV_CODED,
+ batadv_nc_recv_coded_packet);
+
+ return ret;
+}
+
+/**
* batadv_nc_start_timer - initialise the nc periodic worker
* @bat_priv: the bat priv with all the soft interface information
*/
@@ -45,10 +59,10 @@ static void batadv_nc_start_timer(struct batadv_priv *bat_priv)
}
/**
- * batadv_nc_init - initialise coding hash table and start house keeping
+ * batadv_nc_mesh_init - initialise coding hash table and start house keeping
* @bat_priv: the bat priv with all the soft interface information
*/
-int batadv_nc_init(struct batadv_priv *bat_priv)
+int batadv_nc_mesh_init(struct batadv_priv *bat_priv)
{
bat_priv->nc.timestamp_fwd_flush = jiffies;
bat_priv->nc.timestamp_sniffed_purge = jiffies;
@@ -70,11 +84,6 @@ int batadv_nc_init(struct batadv_priv *bat_priv)
batadv_hash_set_lock_class(bat_priv->nc.coding_hash,
&batadv_nc_decoding_hash_lock_class_key);
- /* Register our packet type */
- if (batadv_recv_handler_register(BATADV_CODED,
- batadv_nc_recv_coded_packet) < 0)
- goto err;
-
INIT_DELAYED_WORK(&bat_priv->nc.work, batadv_nc_worker);
batadv_nc_start_timer(bat_priv);
@@ -1721,12 +1730,11 @@ free_nc_packet:
}
/**
- * batadv_nc_free - clean up network coding memory
+ * batadv_nc_mesh_free - clean up network coding memory
* @bat_priv: the bat priv with all the soft interface information
*/
-void batadv_nc_free(struct batadv_priv *bat_priv)
+void batadv_nc_mesh_free(struct batadv_priv *bat_priv)
{
- batadv_recv_handler_unregister(BATADV_CODED);
cancel_delayed_work_sync(&bat_priv->nc.work);
batadv_nc_purge_paths(bat_priv, bat_priv->nc.coding_hash, NULL);
diff --git a/net/batman-adv/network-coding.h b/net/batman-adv/network-coding.h
index 85a4ec81ad50..ddfa618e80bf 100644
--- a/net/batman-adv/network-coding.h
+++ b/net/batman-adv/network-coding.h
@@ -22,8 +22,9 @@
#ifdef CONFIG_BATMAN_ADV_NC
-int batadv_nc_init(struct batadv_priv *bat_priv);
-void batadv_nc_free(struct batadv_priv *bat_priv);
+int batadv_nc_init(void);
+int batadv_nc_mesh_init(struct batadv_priv *bat_priv);
+void batadv_nc_mesh_free(struct batadv_priv *bat_priv);
void batadv_nc_update_nc_node(struct batadv_priv *bat_priv,
struct batadv_orig_node *orig_node,
struct batadv_orig_node *orig_neigh_node,
@@ -46,12 +47,17 @@ int batadv_nc_init_debugfs(struct batadv_priv *bat_priv);
#else /* ifdef CONFIG_BATMAN_ADV_NC */
-static inline int batadv_nc_init(struct batadv_priv *bat_priv)
+static inline int batadv_nc_init(void)
{
return 0;
}
-static inline void batadv_nc_free(struct batadv_priv *bat_priv)
+static inline int batadv_nc_mesh_init(struct batadv_priv *bat_priv)
+{
+ return 0;
+}
+
+static inline void batadv_nc_mesh_free(struct batadv_priv *bat_priv)
{
return;
}
diff --git a/net/batman-adv/routing.c b/net/batman-adv/routing.c
index 2f0bd3ffe6e8..0439395d7ba5 100644
--- a/net/batman-adv/routing.c
+++ b/net/batman-adv/routing.c
@@ -775,7 +775,7 @@ static int batadv_route_unicast_packet(struct sk_buff *skb,
struct batadv_neigh_node *neigh_node = NULL;
struct batadv_unicast_packet *unicast_packet;
struct ethhdr *ethhdr = eth_hdr(skb);
- int res, ret = NET_RX_DROP;
+ int res, hdr_len, ret = NET_RX_DROP;
struct sk_buff *new_skb;
unicast_packet = (struct batadv_unicast_packet *)skb->data;
@@ -835,6 +835,22 @@ static int batadv_route_unicast_packet(struct sk_buff *skb,
/* decrement ttl */
unicast_packet->header.ttl--;
+ switch (unicast_packet->header.packet_type) {
+ case BATADV_UNICAST_4ADDR:
+ hdr_len = sizeof(struct batadv_unicast_4addr_packet);
+ break;
+ case BATADV_UNICAST:
+ hdr_len = sizeof(struct batadv_unicast_packet);
+ break;
+ default:
+ /* other packet types not supported - yet */
+ hdr_len = -1;
+ break;
+ }
+
+ if (hdr_len > 0)
+ batadv_skb_set_priority(skb, hdr_len);
+
res = batadv_send_skb_to_orig(skb, orig_node, recv_if);
/* translate transmit result into receive result */
@@ -1193,6 +1209,8 @@ int batadv_recv_bcast_packet(struct sk_buff *skb,
if (batadv_bla_check_bcast_duplist(bat_priv, skb))
goto out;
+ batadv_skb_set_priority(skb, sizeof(struct batadv_bcast_packet));
+
/* rebroadcast packet */
batadv_add_bcast_packet_to_list(bat_priv, skb, 1);
diff --git a/net/batman-adv/send.c b/net/batman-adv/send.c
index e9ff8d801201..0266edd0fa7f 100644
--- a/net/batman-adv/send.c
+++ b/net/batman-adv/send.c
@@ -67,7 +67,6 @@ int batadv_send_skb_packet(struct sk_buff *skb,
ethhdr->h_proto = __constant_htons(ETH_P_BATMAN);
skb_set_network_header(skb, ETH_HLEN);
- skb->priority = TC_PRIO_CONTROL;
skb->protocol = __constant_htons(ETH_P_BATMAN);
skb->dev = hard_iface->net_dev;
diff --git a/net/batman-adv/soft-interface.c b/net/batman-adv/soft-interface.c
index 0f04e1c302b4..813db4e64602 100644
--- a/net/batman-adv/soft-interface.c
+++ b/net/batman-adv/soft-interface.c
@@ -168,6 +168,7 @@ static int batadv_interface_tx(struct sk_buff *skb,
case ETH_P_8021Q:
vhdr = (struct vlan_ethhdr *)skb->data;
vid = ntohs(vhdr->h_vlan_TCI) & VLAN_VID_MASK;
+ vid |= BATADV_VLAN_HAS_TAG;
if (vhdr->h_vlan_encapsulated_proto != ethertype)
break;
@@ -229,6 +230,8 @@ static int batadv_interface_tx(struct sk_buff *skb,
*/
}
+ batadv_skb_set_priority(skb, 0);
+
/* ethernet packet should be broadcasted */
if (do_bcast) {
primary_if = batadv_primary_if_get_selected(bat_priv);
@@ -329,6 +332,7 @@ void batadv_interface_rx(struct net_device *soft_iface,
case ETH_P_8021Q:
vhdr = (struct vlan_ethhdr *)skb->data;
vid = ntohs(vhdr->h_vlan_TCI) & VLAN_VID_MASK;
+ vid |= BATADV_VLAN_HAS_TAG;
if (vhdr->h_vlan_encapsulated_proto != ethertype)
break;
diff --git a/net/batman-adv/sysfs.c b/net/batman-adv/sysfs.c
index 929e304dacb2..4114b961bc2c 100644
--- a/net/batman-adv/sysfs.c
+++ b/net/batman-adv/sysfs.c
@@ -385,6 +385,10 @@ static ssize_t batadv_store_gw_mode(struct kobject *kobj,
curr_gw_mode_str, buff);
batadv_gw_deselect(bat_priv);
+ /* always call batadv_gw_check_client_stop() before changing the gateway
+ * state
+ */
+ batadv_gw_check_client_stop(bat_priv);
atomic_set(&bat_priv->gw_mode, (unsigned int)gw_mode_tmp);
return count;
}
diff --git a/net/batman-adv/translation-table.c b/net/batman-adv/translation-table.c
index 429aeef3d8b2..34510f38708f 100644
--- a/net/batman-adv/translation-table.c
+++ b/net/batman-adv/translation-table.c
@@ -1626,6 +1626,7 @@ batadv_tt_response_fill_table(uint16_t tt_len, uint8_t ttvn,
if (!skb)
goto out;
+ skb->priority = TC_PRIO_CONTROL;
skb_reserve(skb, ETH_HLEN);
tt_response = (struct batadv_tt_query_packet *)skb_put(skb, len);
tt_response->ttvn = ttvn;
@@ -1691,6 +1692,7 @@ static int batadv_send_tt_request(struct batadv_priv *bat_priv,
if (!skb)
goto out;
+ skb->priority = TC_PRIO_CONTROL;
skb_reserve(skb, ETH_HLEN);
tt_req_len = sizeof(*tt_request);
@@ -1788,6 +1790,7 @@ batadv_send_other_tt_response(struct batadv_priv *bat_priv,
if (!skb)
goto unlock;
+ skb->priority = TC_PRIO_CONTROL;
skb_reserve(skb, ETH_HLEN);
packet_pos = skb_put(skb, len);
tt_response = (struct batadv_tt_query_packet *)packet_pos;
@@ -1906,6 +1909,7 @@ batadv_send_my_tt_response(struct batadv_priv *bat_priv,
if (!skb)
goto unlock;
+ skb->priority = TC_PRIO_CONTROL;
skb_reserve(skb, ETH_HLEN);
packet_pos = skb_put(skb, len);
tt_response = (struct batadv_tt_query_packet *)packet_pos;
@@ -2240,6 +2244,7 @@ static void batadv_send_roam_adv(struct batadv_priv *bat_priv, uint8_t *client,
if (!skb)
goto out;
+ skb->priority = TC_PRIO_CONTROL;
skb_reserve(skb, ETH_HLEN);
roam_adv_packet = (struct batadv_roam_adv_packet *)skb_put(skb, len);
diff --git a/net/batman-adv/unicast.c b/net/batman-adv/unicast.c
index 857e1b8349ee..48b31d33ce6b 100644
--- a/net/batman-adv/unicast.c
+++ b/net/batman-adv/unicast.c
@@ -242,6 +242,8 @@ int batadv_frag_send_skb(struct sk_buff *skb, struct batadv_priv *bat_priv,
frag_skb = dev_alloc_skb(data_len - (data_len / 2) + ucf_hdr_len);
if (!frag_skb)
goto dropped;
+
+ skb->priority = TC_PRIO_CONTROL;
skb_reserve(frag_skb, ucf_hdr_len);
unicast_packet = (struct batadv_unicast_packet *)skb->data;
diff --git a/net/batman-adv/vis.c b/net/batman-adv/vis.c
index 4983340f1943..d8ea31a58457 100644
--- a/net/batman-adv/vis.c
+++ b/net/batman-adv/vis.c
@@ -397,6 +397,7 @@ batadv_add_packet(struct batadv_priv *bat_priv,
kfree(info);
return NULL;
}
+ info->skb_packet->priority = TC_PRIO_CONTROL;
skb_reserve(info->skb_packet, ETH_HLEN);
packet = (struct batadv_vis_packet *)skb_put(info->skb_packet, len);
@@ -861,6 +862,7 @@ int batadv_vis_init(struct batadv_priv *bat_priv)
if (!bat_priv->vis.my_info->skb_packet)
goto free_info;
+ bat_priv->vis.my_info->skb_packet->priority = TC_PRIO_CONTROL;
skb_reserve(bat_priv->vis.my_info->skb_packet, ETH_HLEN);
tmp_skb = bat_priv->vis.my_info->skb_packet;
packet = (struct batadv_vis_packet *)skb_put(tmp_skb, sizeof(*packet));
diff --git a/net/bluetooth/hci_conn.c b/net/bluetooth/hci_conn.c
index 6c7f36379722..f0817121ec5e 100644
--- a/net/bluetooth/hci_conn.c
+++ b/net/bluetooth/hci_conn.c
@@ -31,6 +31,24 @@
#include <net/bluetooth/a2mp.h>
#include <net/bluetooth/smp.h>
+struct sco_param {
+ u16 pkt_type;
+ u16 max_latency;
+};
+
+static const struct sco_param sco_param_cvsd[] = {
+ { EDR_ESCO_MASK & ~ESCO_2EV3, 0x000a }, /* S3 */
+ { EDR_ESCO_MASK & ~ESCO_2EV3, 0x0007 }, /* S2 */
+ { EDR_ESCO_MASK | ESCO_EV3, 0x0007 }, /* S1 */
+ { EDR_ESCO_MASK | ESCO_HV3, 0xffff }, /* D1 */
+ { EDR_ESCO_MASK | ESCO_HV1, 0xffff }, /* D0 */
+};
+
+static const struct sco_param sco_param_wideband[] = {
+ { EDR_ESCO_MASK & ~ESCO_2EV3, 0x000d }, /* T2 */
+ { EDR_ESCO_MASK | ESCO_EV3, 0x0008 }, /* T1 */
+};
+
static void hci_le_create_connection(struct hci_conn *conn)
{
struct hci_dev *hdev = conn->hdev;
@@ -172,10 +190,11 @@ static void hci_add_sco(struct hci_conn *conn, __u16 handle)
hci_send_cmd(hdev, HCI_OP_ADD_SCO, sizeof(cp), &cp);
}
-void hci_setup_sync(struct hci_conn *conn, __u16 handle)
+bool hci_setup_sync(struct hci_conn *conn, __u16 handle)
{
struct hci_dev *hdev = conn->hdev;
struct hci_cp_setup_sync_conn cp;
+ const struct sco_param *param;
BT_DBG("hcon %p", conn);
@@ -185,15 +204,35 @@ void hci_setup_sync(struct hci_conn *conn, __u16 handle)
conn->attempt++;
cp.handle = cpu_to_le16(handle);
- cp.pkt_type = cpu_to_le16(conn->pkt_type);
cp.tx_bandwidth = __constant_cpu_to_le32(0x00001f40);
cp.rx_bandwidth = __constant_cpu_to_le32(0x00001f40);
- cp.max_latency = __constant_cpu_to_le16(0xffff);
- cp.voice_setting = cpu_to_le16(hdev->voice_setting);
- cp.retrans_effort = 0xff;
+ cp.voice_setting = cpu_to_le16(conn->setting);
+
+ switch (conn->setting & SCO_AIRMODE_MASK) {
+ case SCO_AIRMODE_TRANSP:
+ if (conn->attempt > ARRAY_SIZE(sco_param_wideband))
+ return false;
+ cp.retrans_effort = 0x02;
+ param = &sco_param_wideband[conn->attempt - 1];
+ break;
+ case SCO_AIRMODE_CVSD:
+ if (conn->attempt > ARRAY_SIZE(sco_param_cvsd))
+ return false;
+ cp.retrans_effort = 0x01;
+ param = &sco_param_cvsd[conn->attempt - 1];
+ break;
+ default:
+ return false;
+ }
- hci_send_cmd(hdev, HCI_OP_SETUP_SYNC_CONN, sizeof(cp), &cp);
+ cp.pkt_type = __cpu_to_le16(param->pkt_type);
+ cp.max_latency = __cpu_to_le16(param->max_latency);
+
+ if (hci_send_cmd(hdev, HCI_OP_SETUP_SYNC_CONN, sizeof(cp), &cp) < 0)
+ return false;
+
+ return true;
}
void hci_le_conn_update(struct hci_conn *conn, u16 min, u16 max,
@@ -560,13 +599,13 @@ static struct hci_conn *hci_connect_acl(struct hci_dev *hdev, bdaddr_t *dst,
return acl;
}
-static struct hci_conn *hci_connect_sco(struct hci_dev *hdev, int type,
- bdaddr_t *dst, u8 sec_level, u8 auth_type)
+struct hci_conn *hci_connect_sco(struct hci_dev *hdev, int type, bdaddr_t *dst,
+ __u16 setting)
{
struct hci_conn *acl;
struct hci_conn *sco;
- acl = hci_connect_acl(hdev, dst, sec_level, auth_type);
+ acl = hci_connect_acl(hdev, dst, BT_SECURITY_LOW, HCI_AT_NO_BONDING);
if (IS_ERR(acl))
return acl;
@@ -584,6 +623,8 @@ static struct hci_conn *hci_connect_sco(struct hci_dev *hdev, int type,
hci_conn_hold(sco);
+ sco->setting = setting;
+
if (acl->state == BT_CONNECTED &&
(sco->state == BT_OPEN || sco->state == BT_CLOSED)) {
set_bit(HCI_CONN_POWER_SAVE, &acl->flags);
@@ -612,9 +653,6 @@ struct hci_conn *hci_connect(struct hci_dev *hdev, int type, bdaddr_t *dst,
return hci_connect_le(hdev, dst, dst_type, sec_level, auth_type);
case ACL_LINK:
return hci_connect_acl(hdev, dst, sec_level, auth_type);
- case SCO_LINK:
- case ESCO_LINK:
- return hci_connect_sco(hdev, type, dst, sec_level, auth_type);
}
return ERR_PTR(-EINVAL);
diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c
index cc27297da5a9..fb7356fcfe51 100644
--- a/net/bluetooth/hci_core.c
+++ b/net/bluetooth/hci_core.c
@@ -454,6 +454,18 @@ static void hci_setup_event_mask(struct hci_request *req)
events[4] |= 0x04; /* Read Remote Extended Features Complete */
events[5] |= 0x08; /* Synchronous Connection Complete */
events[5] |= 0x10; /* Synchronous Connection Changed */
+ } else {
+ /* Use a different default for LE-only devices */
+ memset(events, 0, sizeof(events));
+ events[0] |= 0x10; /* Disconnection Complete */
+ events[0] |= 0x80; /* Encryption Change */
+ events[1] |= 0x08; /* Read Remote Version Information Complete */
+ events[1] |= 0x20; /* Command Complete */
+ events[1] |= 0x40; /* Command Status */
+ events[1] |= 0x80; /* Hardware Error */
+ events[2] |= 0x04; /* Number of Completed Packets */
+ events[3] |= 0x02; /* Data Buffer Overflow */
+ events[5] |= 0x80; /* Encryption Key Refresh Complete */
}
if (lmp_inq_rssi_capable(hdev))
@@ -608,7 +620,7 @@ static void hci_init3_req(struct hci_request *req, unsigned long opt)
* as supported send it. If not supported assume that the controller
* does not have actual support for stored link keys which makes this
* command redundant anyway.
- */
+ */
if (hdev->commands[6] & 0x80) {
struct hci_cp_delete_stored_link_key cp;
@@ -1134,7 +1146,11 @@ int hci_dev_open(__u16 dev)
goto done;
}
- if (hdev->rfkill && rfkill_blocked(hdev->rfkill)) {
+ /* Check for rfkill but allow the HCI setup stage to proceed
+ * (which in itself doesn't cause any RF activity).
+ */
+ if (test_bit(HCI_RFKILLED, &hdev->dev_flags) &&
+ !test_bit(HCI_SETUP, &hdev->dev_flags)) {
ret = -ERFKILL;
goto done;
}
@@ -1554,10 +1570,13 @@ static int hci_rfkill_set_block(void *data, bool blocked)
BT_DBG("%p name %s blocked %d", hdev, hdev->name, blocked);
- if (!blocked)
- return 0;
-
- hci_dev_do_close(hdev);
+ if (blocked) {
+ set_bit(HCI_RFKILLED, &hdev->dev_flags);
+ if (!test_bit(HCI_SETUP, &hdev->dev_flags))
+ hci_dev_do_close(hdev);
+ } else {
+ clear_bit(HCI_RFKILLED, &hdev->dev_flags);
+ }
return 0;
}
@@ -1579,9 +1598,13 @@ static void hci_power_on(struct work_struct *work)
return;
}
- if (test_bit(HCI_AUTO_OFF, &hdev->dev_flags))
+ if (test_bit(HCI_RFKILLED, &hdev->dev_flags)) {
+ clear_bit(HCI_AUTO_OFF, &hdev->dev_flags);
+ hci_dev_do_close(hdev);
+ } else if (test_bit(HCI_AUTO_OFF, &hdev->dev_flags)) {
queue_delayed_work(hdev->req_workqueue, &hdev->power_off,
HCI_AUTO_OFF_TIMEOUT);
+ }
if (test_and_clear_bit(HCI_SETUP, &hdev->dev_flags))
mgmt_index_added(hdev);
@@ -2197,6 +2220,9 @@ int hci_register_dev(struct hci_dev *hdev)
}
}
+ if (hdev->rfkill && rfkill_blocked(hdev->rfkill))
+ set_bit(HCI_RFKILLED, &hdev->dev_flags);
+
set_bit(HCI_SETUP, &hdev->dev_flags);
if (hdev->dev_type != HCI_AMP)
diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c
index 0437200d92f4..8db3e89fae35 100644
--- a/net/bluetooth/hci_event.c
+++ b/net/bluetooth/hci_event.c
@@ -2904,15 +2904,16 @@ static void hci_sync_conn_complete_evt(struct hci_dev *hdev,
hci_conn_add_sysfs(conn);
break;
+ case 0x0d: /* Connection Rejected due to Limited Resources */
case 0x11: /* Unsupported Feature or Parameter Value */
case 0x1c: /* SCO interval rejected */
case 0x1a: /* Unsupported Remote Feature */
case 0x1f: /* Unspecified error */
- if (conn->out && conn->attempt < 2) {
+ if (conn->out) {
conn->pkt_type = (hdev->esco_type & SCO_ESCO_MASK) |
(hdev->esco_type & EDR_ESCO_MASK);
- hci_setup_sync(conn, conn->link->handle);
- goto unlock;
+ if (hci_setup_sync(conn, conn->link->handle))
+ goto unlock;
}
/* fall through */
@@ -3024,17 +3025,20 @@ unlock:
static u8 hci_get_auth_req(struct hci_conn *conn)
{
/* If remote requests dedicated bonding follow that lead */
- if (conn->remote_auth == 0x02 || conn->remote_auth == 0x03) {
+ if (conn->remote_auth == HCI_AT_DEDICATED_BONDING ||
+ conn->remote_auth == HCI_AT_DEDICATED_BONDING_MITM) {
/* If both remote and local IO capabilities allow MITM
* protection then require it, otherwise don't */
- if (conn->remote_cap == 0x03 || conn->io_capability == 0x03)
- return 0x02;
+ if (conn->remote_cap == HCI_IO_NO_INPUT_OUTPUT ||
+ conn->io_capability == HCI_IO_NO_INPUT_OUTPUT)
+ return HCI_AT_DEDICATED_BONDING;
else
- return 0x03;
+ return HCI_AT_DEDICATED_BONDING_MITM;
}
/* If remote requests no-bonding follow that lead */
- if (conn->remote_auth == 0x00 || conn->remote_auth == 0x01)
+ if (conn->remote_auth == HCI_AT_NO_BONDING ||
+ conn->remote_auth == HCI_AT_NO_BONDING_MITM)
return conn->remote_auth | (conn->auth_type & 0x01);
return conn->auth_type;
@@ -3066,7 +3070,7 @@ static void hci_io_capa_request_evt(struct hci_dev *hdev, struct sk_buff *skb)
/* Change the IO capability from KeyboardDisplay
* to DisplayYesNo as it is not supported by BT spec. */
cp.capability = (conn->io_capability == 0x04) ?
- 0x01 : conn->io_capability;
+ HCI_IO_DISPLAY_YESNO : conn->io_capability;
conn->auth_type = hci_get_auth_req(conn);
cp.authentication = conn->auth_type;
@@ -3140,7 +3144,8 @@ static void hci_user_confirm_request_evt(struct hci_dev *hdev,
* request. The only exception is when we're dedicated bonding
* initiators (connect_cfm_cb set) since then we always have the MITM
* bit set. */
- if (!conn->connect_cfm_cb && loc_mitm && conn->remote_cap == 0x03) {
+ if (!conn->connect_cfm_cb && loc_mitm &&
+ conn->remote_cap == HCI_IO_NO_INPUT_OUTPUT) {
BT_DBG("Rejecting request: remote device can't provide MITM");
hci_send_cmd(hdev, HCI_OP_USER_CONFIRM_NEG_REPLY,
sizeof(ev->bdaddr), &ev->bdaddr);
@@ -3148,8 +3153,8 @@ static void hci_user_confirm_request_evt(struct hci_dev *hdev,
}
/* If no side requires MITM protection; auto-accept */
- if ((!loc_mitm || conn->remote_cap == 0x03) &&
- (!rem_mitm || conn->io_capability == 0x03)) {
+ if ((!loc_mitm || conn->remote_cap == HCI_IO_NO_INPUT_OUTPUT) &&
+ (!rem_mitm || conn->io_capability == HCI_IO_NO_INPUT_OUTPUT)) {
/* If we're not the initiators request authorization to
* proceed from user space (mgmt_user_confirm with
@@ -3552,7 +3557,11 @@ static void hci_le_ltk_request_evt(struct hci_dev *hdev, struct sk_buff *skb)
cp.handle = cpu_to_le16(conn->handle);
if (ltk->authenticated)
- conn->sec_level = BT_SECURITY_HIGH;
+ conn->pending_sec_level = BT_SECURITY_HIGH;
+ else
+ conn->pending_sec_level = BT_SECURITY_MEDIUM;
+
+ conn->enc_key_size = ltk->enc_size;
hci_send_cmd(hdev, HCI_OP_LE_LTK_REPLY, sizeof(cp), &cp);
diff --git a/net/bluetooth/hci_sysfs.c b/net/bluetooth/hci_sysfs.c
index 7ad6ecf36f20..edf623a29043 100644
--- a/net/bluetooth/hci_sysfs.c
+++ b/net/bluetooth/hci_sysfs.c
@@ -590,7 +590,7 @@ int __init bt_sysfs_init(void)
bt_class = class_create(THIS_MODULE, "bluetooth");
- return PTR_RET(bt_class);
+ return PTR_ERR_OR_ZERO(bt_class);
}
void bt_sysfs_cleanup(void)
diff --git a/net/bluetooth/hidp/core.c b/net/bluetooth/hidp/core.c
index 0c699cdc3696..bdc35a7a7fee 100644
--- a/net/bluetooth/hidp/core.c
+++ b/net/bluetooth/hidp/core.c
@@ -225,17 +225,47 @@ static void hidp_input_report(struct hidp_session *session, struct sk_buff *skb)
static int hidp_send_report(struct hidp_session *session, struct hid_report *report)
{
- unsigned char buf[32], hdr;
- int rsize;
+ unsigned char hdr;
+ u8 *buf;
+ int rsize, ret;
- rsize = ((report->size - 1) >> 3) + 1 + (report->id > 0);
- if (rsize > sizeof(buf))
+ buf = hid_alloc_report_buf(report, GFP_ATOMIC);
+ if (!buf)
return -EIO;
hid_output_report(report, buf);
hdr = HIDP_TRANS_DATA | HIDP_DATA_RTYPE_OUPUT;
- return hidp_send_intr_message(session, hdr, buf, rsize);
+ rsize = ((report->size - 1) >> 3) + 1 + (report->id > 0);
+ ret = hidp_send_intr_message(session, hdr, buf, rsize);
+
+ kfree(buf);
+ return ret;
+}
+
+static int hidp_hidinput_event(struct input_dev *dev, unsigned int type,
+ unsigned int code, int value)
+{
+ struct hid_device *hid = input_get_drvdata(dev);
+ struct hidp_session *session = hid->driver_data;
+ struct hid_field *field;
+ int offset;
+
+ BT_DBG("session %p type %d code %d value %d",
+ session, type, code, value);
+
+ if (type != EV_LED)
+ return -1;
+
+ offset = hidinput_find_field(hid, type, code, &field);
+ if (offset == -1) {
+ hid_warn(dev, "event field not found\n");
+ return -1;
+ }
+
+ hid_set_field(field, offset, value);
+
+ return hidp_send_report(session, field->report);
}
static int hidp_get_raw_report(struct hid_device *hid,
@@ -678,20 +708,6 @@ static int hidp_parse(struct hid_device *hid)
static int hidp_start(struct hid_device *hid)
{
- struct hidp_session *session = hid->driver_data;
- struct hid_report *report;
-
- if (hid->quirks & HID_QUIRK_NO_INIT_REPORTS)
- return 0;
-
- list_for_each_entry(report, &hid->report_enum[HID_INPUT_REPORT].
- report_list, list)
- hidp_send_report(session, report);
-
- list_for_each_entry(report, &hid->report_enum[HID_FEATURE_REPORT].
- report_list, list)
- hidp_send_report(session, report);
-
return 0;
}
@@ -711,6 +727,7 @@ static struct hid_ll_driver hidp_hid_driver = {
.stop = hidp_stop,
.open = hidp_open,
.close = hidp_close,
+ .hidinput_input_event = hidp_hidinput_event,
};
/* This function sets up the hid device. It does not add it
diff --git a/net/bluetooth/l2cap_core.c b/net/bluetooth/l2cap_core.c
index 8c3499bec893..63fa11109a1c 100644
--- a/net/bluetooth/l2cap_core.c
+++ b/net/bluetooth/l2cap_core.c
@@ -1415,8 +1415,9 @@ static void l2cap_conn_ready(struct l2cap_conn *conn)
sk->sk_state_change(sk);
release_sock(sk);
- } else if (chan->state == BT_CONNECT)
+ } else if (chan->state == BT_CONNECT) {
l2cap_do_start(chan);
+ }
l2cap_chan_unlock(chan);
}
@@ -3754,6 +3755,13 @@ static struct l2cap_chan *l2cap_connect(struct l2cap_conn *conn,
sk = chan->sk;
+ /* For certain devices (ex: HID mouse), support for authentication,
+ * pairing and bonding is optional. For such devices, inorder to avoid
+ * the ACL alive for too long after L2CAP disconnection, reset the ACL
+ * disc_timeout back to HCI_DISCONN_TIMEOUT during L2CAP connect.
+ */
+ conn->hcon->disc_timeout = HCI_DISCONN_TIMEOUT;
+
bacpy(&bt_sk(sk)->src, conn->src);
bacpy(&bt_sk(sk)->dst, conn->dst);
chan->psm = psm;
diff --git a/net/bluetooth/rfcomm/tty.c b/net/bluetooth/rfcomm/tty.c
index b6e44ad6cca6..84fcf9fff3ea 100644
--- a/net/bluetooth/rfcomm/tty.c
+++ b/net/bluetooth/rfcomm/tty.c
@@ -58,7 +58,6 @@ struct rfcomm_dev {
uint modem_status;
struct rfcomm_dlc *dlc;
- wait_queue_head_t wait;
struct device *tty_dev;
@@ -76,13 +75,6 @@ static void rfcomm_dev_modem_status(struct rfcomm_dlc *dlc, u8 v24_sig);
/* ---- Device functions ---- */
-/*
- * The reason this isn't actually a race, as you no doubt have a little voice
- * screaming at you in your head, is that the refcount should never actually
- * reach zero unless the device has already been taken off the list, in
- * rfcomm_dev_del(). And if that's not true, we'll hit the BUG() in
- * rfcomm_dev_destruct() anyway.
- */
static void rfcomm_dev_destruct(struct tty_port *port)
{
struct rfcomm_dev *dev = container_of(port, struct rfcomm_dev, port);
@@ -90,10 +82,9 @@ static void rfcomm_dev_destruct(struct tty_port *port)
BT_DBG("dev %p dlc %p", dev, dlc);
- /* Refcount should only hit zero when called from rfcomm_dev_del()
- which will have taken us off the list. Everything else are
- refcounting bugs. */
- BUG_ON(!list_empty(&dev->list));
+ spin_lock(&rfcomm_dev_lock);
+ list_del(&dev->list);
+ spin_unlock(&rfcomm_dev_lock);
rfcomm_dlc_lock(dlc);
/* Detach DLC if it's owned by this dev */
@@ -112,8 +103,39 @@ static void rfcomm_dev_destruct(struct tty_port *port)
module_put(THIS_MODULE);
}
+/* device-specific initialization: open the dlc */
+static int rfcomm_dev_activate(struct tty_port *port, struct tty_struct *tty)
+{
+ struct rfcomm_dev *dev = container_of(port, struct rfcomm_dev, port);
+
+ return rfcomm_dlc_open(dev->dlc, &dev->src, &dev->dst, dev->channel);
+}
+
+/* we block the open until the dlc->state becomes BT_CONNECTED */
+static int rfcomm_dev_carrier_raised(struct tty_port *port)
+{
+ struct rfcomm_dev *dev = container_of(port, struct rfcomm_dev, port);
+
+ return (dev->dlc->state == BT_CONNECTED);
+}
+
+/* device-specific cleanup: close the dlc */
+static void rfcomm_dev_shutdown(struct tty_port *port)
+{
+ struct rfcomm_dev *dev = container_of(port, struct rfcomm_dev, port);
+
+ if (dev->tty_dev->parent)
+ device_move(dev->tty_dev, NULL, DPM_ORDER_DEV_LAST);
+
+ /* close the dlc */
+ rfcomm_dlc_close(dev->dlc, 0);
+}
+
static const struct tty_port_operations rfcomm_port_ops = {
.destruct = rfcomm_dev_destruct,
+ .activate = rfcomm_dev_activate,
+ .shutdown = rfcomm_dev_shutdown,
+ .carrier_raised = rfcomm_dev_carrier_raised,
};
static struct rfcomm_dev *__rfcomm_dev_get(int id)
@@ -236,7 +258,6 @@ static int rfcomm_dev_add(struct rfcomm_dev_req *req, struct rfcomm_dlc *dlc)
tty_port_init(&dev->port);
dev->port.ops = &rfcomm_port_ops;
- init_waitqueue_head(&dev->wait);
skb_queue_head_init(&dev->pending);
@@ -282,7 +303,9 @@ out:
dev->id, NULL);
if (IS_ERR(dev->tty_dev)) {
err = PTR_ERR(dev->tty_dev);
+ spin_lock(&rfcomm_dev_lock);
list_del(&dev->list);
+ spin_unlock(&rfcomm_dev_lock);
goto free;
}
@@ -301,27 +324,6 @@ free:
return err;
}
-static void rfcomm_dev_del(struct rfcomm_dev *dev)
-{
- unsigned long flags;
- BT_DBG("dev %p", dev);
-
- BUG_ON(test_and_set_bit(RFCOMM_TTY_RELEASED, &dev->flags));
-
- spin_lock_irqsave(&dev->port.lock, flags);
- if (dev->port.count > 0) {
- spin_unlock_irqrestore(&dev->port.lock, flags);
- return;
- }
- spin_unlock_irqrestore(&dev->port.lock, flags);
-
- spin_lock(&rfcomm_dev_lock);
- list_del_init(&dev->list);
- spin_unlock(&rfcomm_dev_lock);
-
- tty_port_put(&dev->port);
-}
-
/* ---- Send buffer ---- */
static inline unsigned int rfcomm_room(struct rfcomm_dlc *dlc)
{
@@ -333,10 +335,9 @@ static inline unsigned int rfcomm_room(struct rfcomm_dlc *dlc)
static void rfcomm_wfree(struct sk_buff *skb)
{
struct rfcomm_dev *dev = (void *) skb->sk;
- struct tty_struct *tty = dev->port.tty;
atomic_sub(skb->truesize, &dev->wmem_alloc);
- if (test_bit(RFCOMM_TTY_ATTACHED, &dev->flags) && tty)
- tty_wakeup(tty);
+ if (test_bit(RFCOMM_TTY_ATTACHED, &dev->flags))
+ tty_port_tty_wakeup(&dev->port);
tty_port_put(&dev->port);
}
@@ -410,6 +411,7 @@ static int rfcomm_release_dev(void __user *arg)
{
struct rfcomm_dev_req req;
struct rfcomm_dev *dev;
+ struct tty_struct *tty;
if (copy_from_user(&req, arg, sizeof(req)))
return -EFAULT;
@@ -429,11 +431,15 @@ static int rfcomm_release_dev(void __user *arg)
rfcomm_dlc_close(dev->dlc, 0);
/* Shut down TTY synchronously before freeing rfcomm_dev */
- if (dev->port.tty)
- tty_vhangup(dev->port.tty);
+ tty = tty_port_tty_get(&dev->port);
+ if (tty) {
+ tty_vhangup(tty);
+ tty_kref_put(tty);
+ }
+
+ if (!test_and_set_bit(RFCOMM_TTY_RELEASED, &dev->flags))
+ tty_port_put(&dev->port);
- if (!test_bit(RFCOMM_RELEASE_ONHUP, &dev->flags))
- rfcomm_dev_del(dev);
tty_port_put(&dev->port);
return 0;
}
@@ -569,31 +575,13 @@ static void rfcomm_dev_state_change(struct rfcomm_dlc *dlc, int err)
BT_DBG("dlc %p dev %p err %d", dlc, dev, err);
dev->err = err;
- wake_up_interruptible(&dev->wait);
-
- if (dlc->state == BT_CLOSED) {
- if (!dev->port.tty) {
- if (test_bit(RFCOMM_RELEASE_ONHUP, &dev->flags)) {
- /* Drop DLC lock here to avoid deadlock
- * 1. rfcomm_dev_get will take rfcomm_dev_lock
- * but in rfcomm_dev_add there's lock order:
- * rfcomm_dev_lock -> dlc lock
- * 2. tty_port_put will deadlock if it's
- * the last reference
- */
- rfcomm_dlc_unlock(dlc);
- if (rfcomm_dev_get(dev->id) == NULL) {
- rfcomm_dlc_lock(dlc);
- return;
- }
-
- rfcomm_dev_del(dev);
- tty_port_put(&dev->port);
- rfcomm_dlc_lock(dlc);
- }
- } else
- tty_hangup(dev->port.tty);
- }
+ if (dlc->state == BT_CONNECTED) {
+ device_move(dev->tty_dev, rfcomm_get_device(dev),
+ DPM_ORDER_DEV_AFTER_PARENT);
+
+ wake_up_interruptible(&dev->port.open_wait);
+ } else if (dlc->state == BT_CLOSED)
+ tty_port_tty_hangup(&dev->port, false);
}
static void rfcomm_dev_modem_status(struct rfcomm_dlc *dlc, u8 v24_sig)
@@ -604,10 +592,8 @@ static void rfcomm_dev_modem_status(struct rfcomm_dlc *dlc, u8 v24_sig)
BT_DBG("dlc %p dev %p v24_sig 0x%02x", dlc, dev, v24_sig);
- if ((dev->modem_status & TIOCM_CD) && !(v24_sig & RFCOMM_V24_DV)) {
- if (dev->port.tty && !C_CLOCAL(dev->port.tty))
- tty_hangup(dev->port.tty);
- }
+ if ((dev->modem_status & TIOCM_CD) && !(v24_sig & RFCOMM_V24_DV))
+ tty_port_tty_hangup(&dev->port, true);
dev->modem_status =
((v24_sig & RFCOMM_V24_RTC) ? (TIOCM_DSR | TIOCM_DTR) : 0) |
@@ -638,124 +624,92 @@ static void rfcomm_tty_copy_pending(struct rfcomm_dev *dev)
tty_flip_buffer_push(&dev->port);
}
-static int rfcomm_tty_open(struct tty_struct *tty, struct file *filp)
+/* do the reverse of install, clearing the tty fields and releasing the
+ * reference to tty_port
+ */
+static void rfcomm_tty_cleanup(struct tty_struct *tty)
{
- DECLARE_WAITQUEUE(wait, current);
- struct rfcomm_dev *dev;
- struct rfcomm_dlc *dlc;
- unsigned long flags;
- int err, id;
+ struct rfcomm_dev *dev = tty->driver_data;
- id = tty->index;
+ clear_bit(RFCOMM_TTY_ATTACHED, &dev->flags);
- BT_DBG("tty %p id %d", tty, id);
+ rfcomm_dlc_lock(dev->dlc);
+ tty->driver_data = NULL;
+ rfcomm_dlc_unlock(dev->dlc);
- /* We don't leak this refcount. For reasons which are not entirely
- clear, the TTY layer will call our ->close() method even if the
- open fails. We decrease the refcount there, and decreasing it
- here too would cause breakage. */
- dev = rfcomm_dev_get(id);
- if (!dev)
- return -ENODEV;
+ /*
+ * purge the dlc->tx_queue to avoid circular dependencies
+ * between dev and dlc
+ */
+ skb_queue_purge(&dev->dlc->tx_queue);
- BT_DBG("dev %p dst %pMR channel %d opened %d", dev, &dev->dst,
- dev->channel, dev->port.count);
+ tty_port_put(&dev->port);
+}
- spin_lock_irqsave(&dev->port.lock, flags);
- if (++dev->port.count > 1) {
- spin_unlock_irqrestore(&dev->port.lock, flags);
- return 0;
- }
- spin_unlock_irqrestore(&dev->port.lock, flags);
+/* we acquire the tty_port reference since it's here the tty is first used
+ * by setting the termios. We also populate the driver_data field and install
+ * the tty port
+ */
+static int rfcomm_tty_install(struct tty_driver *driver, struct tty_struct *tty)
+{
+ struct rfcomm_dev *dev;
+ struct rfcomm_dlc *dlc;
+ int err;
+
+ dev = rfcomm_dev_get(tty->index);
+ if (!dev)
+ return -ENODEV;
dlc = dev->dlc;
/* Attach TTY and open DLC */
-
rfcomm_dlc_lock(dlc);
tty->driver_data = dev;
- dev->port.tty = tty;
rfcomm_dlc_unlock(dlc);
set_bit(RFCOMM_TTY_ATTACHED, &dev->flags);
- err = rfcomm_dlc_open(dlc, &dev->src, &dev->dst, dev->channel);
- if (err < 0)
- return err;
-
- /* Wait for DLC to connect */
- add_wait_queue(&dev->wait, &wait);
- while (1) {
- set_current_state(TASK_INTERRUPTIBLE);
+ /* install the tty_port */
+ err = tty_port_install(&dev->port, driver, tty);
+ if (err)
+ rfcomm_tty_cleanup(tty);
- if (dlc->state == BT_CLOSED) {
- err = -dev->err;
- break;
- }
+ return err;
+}
- if (dlc->state == BT_CONNECTED)
- break;
+static int rfcomm_tty_open(struct tty_struct *tty, struct file *filp)
+{
+ struct rfcomm_dev *dev = tty->driver_data;
+ int err;
- if (signal_pending(current)) {
- err = -EINTR;
- break;
- }
+ BT_DBG("tty %p id %d", tty, tty->index);
- tty_unlock(tty);
- schedule();
- tty_lock(tty);
- }
- set_current_state(TASK_RUNNING);
- remove_wait_queue(&dev->wait, &wait);
+ BT_DBG("dev %p dst %pMR channel %d opened %d", dev, &dev->dst,
+ dev->channel, dev->port.count);
- if (err == 0)
- device_move(dev->tty_dev, rfcomm_get_device(dev),
- DPM_ORDER_DEV_AFTER_PARENT);
+ err = tty_port_open(&dev->port, tty, filp);
+ if (err)
+ return err;
+ /*
+ * FIXME: rfcomm should use proper flow control for
+ * received data. This hack will be unnecessary and can
+ * be removed when that's implemented
+ */
rfcomm_tty_copy_pending(dev);
rfcomm_dlc_unthrottle(dev->dlc);
- return err;
+ return 0;
}
static void rfcomm_tty_close(struct tty_struct *tty, struct file *filp)
{
struct rfcomm_dev *dev = (struct rfcomm_dev *) tty->driver_data;
- unsigned long flags;
-
- if (!dev)
- return;
BT_DBG("tty %p dev %p dlc %p opened %d", tty, dev, dev->dlc,
dev->port.count);
- spin_lock_irqsave(&dev->port.lock, flags);
- if (!--dev->port.count) {
- spin_unlock_irqrestore(&dev->port.lock, flags);
- if (dev->tty_dev->parent)
- device_move(dev->tty_dev, NULL, DPM_ORDER_DEV_LAST);
-
- /* Close DLC and dettach TTY */
- rfcomm_dlc_close(dev->dlc, 0);
-
- clear_bit(RFCOMM_TTY_ATTACHED, &dev->flags);
-
- rfcomm_dlc_lock(dev->dlc);
- tty->driver_data = NULL;
- dev->port.tty = NULL;
- rfcomm_dlc_unlock(dev->dlc);
-
- if (test_bit(RFCOMM_TTY_RELEASED, &dev->flags)) {
- spin_lock(&rfcomm_dev_lock);
- list_del_init(&dev->list);
- spin_unlock(&rfcomm_dev_lock);
-
- tty_port_put(&dev->port);
- }
- } else
- spin_unlock_irqrestore(&dev->port.lock, flags);
-
- tty_port_put(&dev->port);
+ tty_port_close(&dev->port, tty, filp);
}
static int rfcomm_tty_write(struct tty_struct *tty, const unsigned char *buf, int count)
@@ -1055,17 +1009,11 @@ static void rfcomm_tty_hangup(struct tty_struct *tty)
BT_DBG("tty %p dev %p", tty, dev);
- if (!dev)
- return;
-
- rfcomm_tty_flush_buffer(tty);
+ tty_port_hangup(&dev->port);
- if (test_bit(RFCOMM_RELEASE_ONHUP, &dev->flags)) {
- if (rfcomm_dev_get(dev->id) == NULL)
- return;
- rfcomm_dev_del(dev);
+ if (test_bit(RFCOMM_RELEASE_ONHUP, &dev->flags) &&
+ !test_and_set_bit(RFCOMM_TTY_RELEASED, &dev->flags))
tty_port_put(&dev->port);
- }
}
static int rfcomm_tty_tiocmget(struct tty_struct *tty)
@@ -1128,6 +1076,8 @@ static const struct tty_operations rfcomm_ops = {
.wait_until_sent = rfcomm_tty_wait_until_sent,
.tiocmget = rfcomm_tty_tiocmget,
.tiocmset = rfcomm_tty_tiocmset,
+ .install = rfcomm_tty_install,
+ .cleanup = rfcomm_tty_cleanup,
};
int __init rfcomm_init_ttys(void)
@@ -1146,7 +1096,7 @@ int __init rfcomm_init_ttys(void)
rfcomm_tty_driver->subtype = SERIAL_TYPE_NORMAL;
rfcomm_tty_driver->flags = TTY_DRIVER_REAL_RAW | TTY_DRIVER_DYNAMIC_DEV;
rfcomm_tty_driver->init_termios = tty_std_termios;
- rfcomm_tty_driver->init_termios.c_cflag = B9600 | CS8 | CREAD | HUPCL | CLOCAL;
+ rfcomm_tty_driver->init_termios.c_cflag = B9600 | CS8 | CREAD | HUPCL;
rfcomm_tty_driver->init_termios.c_lflag &= ~ICANON;
tty_set_operations(rfcomm_tty_driver, &rfcomm_ops);
diff --git a/net/bluetooth/sco.c b/net/bluetooth/sco.c
index e7bd4eea575c..96bd388d93a4 100644
--- a/net/bluetooth/sco.c
+++ b/net/bluetooth/sco.c
@@ -176,8 +176,13 @@ static int sco_connect(struct sock *sk)
else
type = SCO_LINK;
- hcon = hci_connect(hdev, type, dst, BDADDR_BREDR, BT_SECURITY_LOW,
- HCI_AT_NO_BONDING);
+ if (sco_pi(sk)->setting == BT_VOICE_TRANSPARENT &&
+ (!lmp_transp_capable(hdev) || !lmp_esco_capable(hdev))) {
+ err = -EOPNOTSUPP;
+ goto done;
+ }
+
+ hcon = hci_connect_sco(hdev, type, dst, sco_pi(sk)->setting);
if (IS_ERR(hcon)) {
err = PTR_ERR(hcon);
goto done;
@@ -417,6 +422,8 @@ static struct sock *sco_sock_alloc(struct net *net, struct socket *sock, int pro
sk->sk_protocol = proto;
sk->sk_state = BT_OPEN;
+ sco_pi(sk)->setting = BT_VOICE_CVSD_16BIT;
+
setup_timer(&sk->sk_timer, sco_sock_timeout, (unsigned long)sk);
bt_sock_link(&sco_sk_list, sk);
@@ -652,7 +659,7 @@ static int sco_sock_sendmsg(struct kiocb *iocb, struct socket *sock,
return err;
}
-static void sco_conn_defer_accept(struct hci_conn *conn, int mask)
+static void sco_conn_defer_accept(struct hci_conn *conn, u16 setting)
{
struct hci_dev *hdev = conn->hdev;
@@ -664,11 +671,7 @@ static void sco_conn_defer_accept(struct hci_conn *conn, int mask)
struct hci_cp_accept_conn_req cp;
bacpy(&cp.bdaddr, &conn->dst);
-
- if (lmp_rswitch_capable(hdev) && (mask & HCI_LM_MASTER))
- cp.role = 0x00; /* Become master */
- else
- cp.role = 0x01; /* Remain slave */
+ cp.role = 0x00; /* Ignored */
hci_send_cmd(hdev, HCI_OP_ACCEPT_CONN_REQ, sizeof(cp), &cp);
} else {
@@ -679,9 +682,21 @@ static void sco_conn_defer_accept(struct hci_conn *conn, int mask)
cp.tx_bandwidth = __constant_cpu_to_le32(0x00001f40);
cp.rx_bandwidth = __constant_cpu_to_le32(0x00001f40);
- cp.max_latency = __constant_cpu_to_le16(0xffff);
- cp.content_format = cpu_to_le16(hdev->voice_setting);
- cp.retrans_effort = 0xff;
+ cp.content_format = cpu_to_le16(setting);
+
+ switch (setting & SCO_AIRMODE_MASK) {
+ case SCO_AIRMODE_TRANSP:
+ if (conn->pkt_type & ESCO_2EV3)
+ cp.max_latency = __constant_cpu_to_le16(0x0008);
+ else
+ cp.max_latency = __constant_cpu_to_le16(0x000D);
+ cp.retrans_effort = 0x02;
+ break;
+ case SCO_AIRMODE_CVSD:
+ cp.max_latency = __constant_cpu_to_le16(0xffff);
+ cp.retrans_effort = 0xff;
+ break;
+ }
hci_send_cmd(hdev, HCI_OP_ACCEPT_SYNC_CONN_REQ,
sizeof(cp), &cp);
@@ -698,7 +713,7 @@ static int sco_sock_recvmsg(struct kiocb *iocb, struct socket *sock,
if (sk->sk_state == BT_CONNECT2 &&
test_bit(BT_SK_DEFER_SETUP, &bt_sk(sk)->flags)) {
- sco_conn_defer_accept(pi->conn->hcon, 0);
+ sco_conn_defer_accept(pi->conn->hcon, pi->setting);
sk->sk_state = BT_CONFIG;
msg->msg_namelen = 0;
@@ -714,7 +729,8 @@ static int sco_sock_recvmsg(struct kiocb *iocb, struct socket *sock,
static int sco_sock_setsockopt(struct socket *sock, int level, int optname, char __user *optval, unsigned int optlen)
{
struct sock *sk = sock->sk;
- int err = 0;
+ int len, err = 0;
+ struct bt_voice voice;
u32 opt;
BT_DBG("sk %p", sk);
@@ -740,6 +756,31 @@ static int sco_sock_setsockopt(struct socket *sock, int level, int optname, char
clear_bit(BT_SK_DEFER_SETUP, &bt_sk(sk)->flags);
break;
+ case BT_VOICE:
+ if (sk->sk_state != BT_OPEN && sk->sk_state != BT_BOUND &&
+ sk->sk_state != BT_CONNECT2) {
+ err = -EINVAL;
+ break;
+ }
+
+ voice.setting = sco_pi(sk)->setting;
+
+ len = min_t(unsigned int, sizeof(voice), optlen);
+ if (copy_from_user((char *) &voice, optval, len)) {
+ err = -EFAULT;
+ break;
+ }
+
+ /* Explicitly check for these values */
+ if (voice.setting != BT_VOICE_TRANSPARENT &&
+ voice.setting != BT_VOICE_CVSD_16BIT) {
+ err = -EINVAL;
+ break;
+ }
+
+ sco_pi(sk)->setting = voice.setting;
+ break;
+
default:
err = -ENOPROTOOPT;
break;
@@ -765,7 +806,9 @@ static int sco_sock_getsockopt_old(struct socket *sock, int optname, char __user
switch (optname) {
case SCO_OPTIONS:
- if (sk->sk_state != BT_CONNECTED) {
+ if (sk->sk_state != BT_CONNECTED &&
+ !(sk->sk_state == BT_CONNECT2 &&
+ test_bit(BT_SK_DEFER_SETUP, &bt_sk(sk)->flags))) {
err = -ENOTCONN;
break;
}
@@ -781,7 +824,9 @@ static int sco_sock_getsockopt_old(struct socket *sock, int optname, char __user
break;
case SCO_CONNINFO:
- if (sk->sk_state != BT_CONNECTED) {
+ if (sk->sk_state != BT_CONNECTED &&
+ !(sk->sk_state == BT_CONNECT2 &&
+ test_bit(BT_SK_DEFER_SETUP, &bt_sk(sk)->flags))) {
err = -ENOTCONN;
break;
}
@@ -809,6 +854,7 @@ static int sco_sock_getsockopt(struct socket *sock, int level, int optname, char
{
struct sock *sk = sock->sk;
int len, err = 0;
+ struct bt_voice voice;
BT_DBG("sk %p", sk);
@@ -834,6 +880,15 @@ static int sco_sock_getsockopt(struct socket *sock, int level, int optname, char
break;
+ case BT_VOICE:
+ voice.setting = sco_pi(sk)->setting;
+
+ len = min_t(unsigned int, len, sizeof(voice));
+ if (copy_to_user(optval, (char *)&voice, len))
+ err = -EFAULT;
+
+ break;
+
default:
err = -ENOPROTOOPT;
break;
diff --git a/net/bridge/br_device.c b/net/bridge/br_device.c
index 89659d4ed1f9..ca04163635da 100644
--- a/net/bridge/br_device.c
+++ b/net/bridge/br_device.c
@@ -245,22 +245,22 @@ fail:
int br_netpoll_enable(struct net_bridge_port *p, gfp_t gfp)
{
struct netpoll *np;
- int err = 0;
+ int err;
+
+ if (!p->br->dev->npinfo)
+ return 0;
np = kzalloc(sizeof(*p->np), gfp);
- err = -ENOMEM;
if (!np)
- goto out;
+ return -ENOMEM;
err = __netpoll_setup(np, p->dev, gfp);
if (err) {
kfree(np);
- goto out;
+ return err;
}
p->np = np;
-
-out:
return err;
}
diff --git a/net/bridge/br_fdb.c b/net/bridge/br_fdb.c
index ffd5874f2592..33e8f23acddd 100644
--- a/net/bridge/br_fdb.c
+++ b/net/bridge/br_fdb.c
@@ -700,7 +700,7 @@ int br_fdb_add(struct ndmsg *ndm, struct nlattr *tb[],
vid = nla_get_u16(tb[NDA_VLAN]);
- if (vid >= VLAN_N_VID) {
+ if (!vid || vid >= VLAN_VID_MASK) {
pr_info("bridge: RTM_NEWNEIGH with invalid vlan id %d\n",
vid);
return -EINVAL;
@@ -794,7 +794,7 @@ int br_fdb_delete(struct ndmsg *ndm, struct nlattr *tb[],
vid = nla_get_u16(tb[NDA_VLAN]);
- if (vid >= VLAN_N_VID) {
+ if (!vid || vid >= VLAN_VID_MASK) {
pr_info("bridge: RTM_NEWNEIGH with invalid vlan id %d\n",
vid);
return -EINVAL;
diff --git a/net/bridge/br_if.c b/net/bridge/br_if.c
index 5623be6b9ecd..c41d5fbb91d0 100644
--- a/net/bridge/br_if.c
+++ b/net/bridge/br_if.c
@@ -363,7 +363,8 @@ int br_add_if(struct net_bridge *br, struct net_device *dev)
if (err)
goto err2;
- if (br_netpoll_info(br) && ((err = br_netpoll_enable(p, GFP_KERNEL))))
+ err = br_netpoll_enable(p, GFP_KERNEL);
+ if (err)
goto err3;
err = netdev_master_upper_dev_link(dev, br->dev);
@@ -382,6 +383,9 @@ int br_add_if(struct net_bridge *br, struct net_device *dev)
netdev_update_features(br->dev);
+ if (br->dev->needed_headroom < dev->needed_headroom)
+ br->dev->needed_headroom = dev->needed_headroom;
+
spin_lock_bh(&br->lock);
changed_addr = br_stp_recalculate_bridge_id(br);
diff --git a/net/bridge/br_mdb.c b/net/bridge/br_mdb.c
index 6319c4333c39..b7b1914dfa25 100644
--- a/net/bridge/br_mdb.c
+++ b/net/bridge/br_mdb.c
@@ -9,6 +9,7 @@
#include <net/netlink.h>
#if IS_ENABLED(CONFIG_IPV6)
#include <net/ipv6.h>
+#include <net/addrconf.h>
#endif
#include "br_private.h"
@@ -61,7 +62,8 @@ static int br_mdb_fill_info(struct sk_buff *skb, struct netlink_callback *cb,
for (i = 0; i < mdb->max; i++) {
struct net_bridge_mdb_entry *mp;
- struct net_bridge_port_group *p, **pp;
+ struct net_bridge_port_group *p;
+ struct net_bridge_port_group __rcu **pp;
struct net_bridge_port *port;
hlist_for_each_entry_rcu(mp, &mdb->mhash[i], hlist[mdb->ver]) {
@@ -253,7 +255,7 @@ static bool is_valid_mdb_entry(struct br_mdb_entry *entry)
return false;
#if IS_ENABLED(CONFIG_IPV6)
} else if (entry->addr.proto == htons(ETH_P_IPV6)) {
- if (!ipv6_is_transient_multicast(&entry->addr.u.ip6))
+ if (ipv6_addr_is_ll_all_nodes(&entry->addr.u.ip6))
return false;
#endif
} else
@@ -451,7 +453,7 @@ static int __br_mdb_del(struct net_bridge *br, struct br_mdb_entry *entry)
call_rcu_bh(&p->rcu, br_multicast_free_pg);
err = 0;
- if (!mp->ports && !mp->mglist && mp->timer_armed &&
+ if (!mp->ports && !mp->mglist &&
netif_running(br->dev))
mod_timer(&mp->timer, jiffies);
break;
diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c
index bbcb43582496..8b0b610ca2c9 100644
--- a/net/bridge/br_multicast.c
+++ b/net/bridge/br_multicast.c
@@ -29,6 +29,7 @@
#include <net/ipv6.h>
#include <net/mld.h>
#include <net/ip6_checksum.h>
+#include <net/addrconf.h>
#endif
#include "br_private.h"
@@ -271,7 +272,7 @@ static void br_multicast_del_pg(struct net_bridge *br,
del_timer(&p->timer);
call_rcu_bh(&p->rcu, br_multicast_free_pg);
- if (!mp->ports && !mp->mglist && mp->timer_armed &&
+ if (!mp->ports && !mp->mglist &&
netif_running(br->dev))
mod_timer(&mp->timer, jiffies);
@@ -619,7 +620,6 @@ rehash:
mp->br = br;
mp->addr = *group;
-
setup_timer(&mp->timer, br_multicast_group_expired,
(unsigned long)mp);
@@ -659,6 +659,7 @@ static int br_multicast_add_group(struct net_bridge *br,
struct net_bridge_mdb_entry *mp;
struct net_bridge_port_group *p;
struct net_bridge_port_group __rcu **pp;
+ unsigned long now = jiffies;
int err;
spin_lock(&br->multicast_lock);
@@ -673,6 +674,7 @@ static int br_multicast_add_group(struct net_bridge *br,
if (!port) {
mp->mglist = true;
+ mod_timer(&mp->timer, now + br->multicast_membership_interval);
goto out;
}
@@ -680,7 +682,7 @@ static int br_multicast_add_group(struct net_bridge *br,
(p = mlock_dereference(*pp, br)) != NULL;
pp = &p->next) {
if (p->port == port)
- goto out;
+ goto found;
if ((unsigned long)p->port < (unsigned long)port)
break;
}
@@ -691,6 +693,8 @@ static int br_multicast_add_group(struct net_bridge *br,
rcu_assign_pointer(*pp, p);
br_mdb_notify(br->dev, port, group, RTM_NEWMDB);
+found:
+ mod_timer(&p->timer, now + br->multicast_membership_interval);
out:
err = 0;
@@ -724,7 +728,7 @@ static int br_ip6_multicast_add_group(struct net_bridge *br,
{
struct br_ip br_group;
- if (!ipv6_is_transient_multicast(group))
+ if (ipv6_addr_is_ll_all_nodes(group))
return 0;
br_group.u.ip6 = *group;
@@ -1190,9 +1194,6 @@ static int br_ip4_multicast_query(struct net_bridge *br,
if (!mp)
goto out;
- mod_timer(&mp->timer, now + br->multicast_membership_interval);
- mp->timer_armed = true;
-
max_delay *= br->multicast_last_member_count;
if (mp->mglist &&
@@ -1255,7 +1256,7 @@ static int br_ip6_multicast_query(struct net_bridge *br,
if (!mld2q->mld2q_nsrcs)
group = &mld2q->mld2q_mca;
- max_delay = max(msecs_to_jiffies(MLDV2_MRC(ntohs(mld2q->mld2q_mrc))), 1UL);
+ max_delay = max(msecs_to_jiffies(mldv2_mrc(mld2q)), 1UL);
}
br_multicast_query_received(br, port, &br->ip6_querier,
@@ -1269,9 +1270,6 @@ static int br_ip6_multicast_query(struct net_bridge *br,
if (!mp)
goto out;
- mod_timer(&mp->timer, now + br->multicast_membership_interval);
- mp->timer_armed = true;
-
max_delay *= br->multicast_last_member_count;
if (mp->mglist &&
(timer_pending(&mp->timer) ?
@@ -1357,7 +1355,7 @@ static void br_multicast_leave_group(struct net_bridge *br,
call_rcu_bh(&p->rcu, br_multicast_free_pg);
br_mdb_notify(br->dev, port, group, RTM_DELMDB);
- if (!mp->ports && !mp->mglist && mp->timer_armed &&
+ if (!mp->ports && !mp->mglist &&
netif_running(br->dev))
mod_timer(&mp->timer, jiffies);
}
@@ -1369,12 +1367,30 @@ static void br_multicast_leave_group(struct net_bridge *br,
br->multicast_last_member_interval;
if (!port) {
- if (mp->mglist && mp->timer_armed &&
+ if (mp->mglist &&
(timer_pending(&mp->timer) ?
time_after(mp->timer.expires, time) :
try_to_del_timer_sync(&mp->timer) >= 0)) {
mod_timer(&mp->timer, time);
}
+
+ goto out;
+ }
+
+ for (p = mlock_dereference(mp->ports, br);
+ p != NULL;
+ p = mlock_dereference(p->next, br)) {
+ if (p->port != port)
+ continue;
+
+ if (!hlist_unhashed(&p->mglist) &&
+ (timer_pending(&p->timer) ?
+ time_after(p->timer.expires, time) :
+ try_to_del_timer_sync(&p->timer) >= 0)) {
+ mod_timer(&p->timer, time);
+ }
+
+ break;
}
out:
spin_unlock(&br->multicast_lock);
@@ -1410,7 +1426,7 @@ static void br_ip6_multicast_leave_group(struct net_bridge *br,
&br->ip6_query;
- if (!ipv6_is_transient_multicast(group))
+ if (ipv6_addr_is_ll_all_nodes(group))
return;
br_group.u.ip6 = *group;
@@ -1547,8 +1563,14 @@ static int br_multicast_ipv6_rcv(struct net_bridge *br,
* - MLD has always Router Alert hop-by-hop option
* - But we do not support jumbrograms.
*/
- if (ip6h->version != 6 ||
- ip6h->nexthdr != IPPROTO_HOPOPTS ||
+ if (ip6h->version != 6)
+ return 0;
+
+ /* Prevent flooding this packet if there is no listener present */
+ if (!ipv6_addr_is_ll_all_nodes(&ip6h->daddr))
+ BR_INPUT_SKB_CB(skb)->mrouters_only = 1;
+
+ if (ip6h->nexthdr != IPPROTO_HOPOPTS ||
ip6h->payload_len == 0)
return 0;
@@ -1791,7 +1813,6 @@ void br_multicast_stop(struct net_bridge *br)
hlist_for_each_entry_safe(mp, n, &mdb->mhash[i],
hlist[ver]) {
del_timer(&mp->timer);
- mp->timer_armed = false;
call_rcu_bh(&mp->rcu, br_multicast_free_group);
}
}
diff --git a/net/bridge/br_netlink.c b/net/bridge/br_netlink.c
index b9259efa636e..f75d92e4f96b 100644
--- a/net/bridge/br_netlink.c
+++ b/net/bridge/br_netlink.c
@@ -207,7 +207,7 @@ int br_getlink(struct sk_buff *skb, u32 pid, u32 seq,
struct net_device *dev, u32 filter_mask)
{
int err = 0;
- struct net_bridge_port *port = br_port_get_rcu(dev);
+ struct net_bridge_port *port = br_port_get_rtnl(dev);
/* not a bridge port and */
if (!port && !(filter_mask & RTEXT_FILTER_BRVLAN))
@@ -243,7 +243,7 @@ static int br_afspec(struct net_bridge *br,
vinfo = nla_data(tb[IFLA_BRIDGE_VLAN_INFO]);
- if (vinfo->vid >= VLAN_N_VID)
+ if (!vinfo->vid || vinfo->vid >= VLAN_VID_MASK)
return -EINVAL;
switch (cmd) {
@@ -451,7 +451,7 @@ static size_t br_get_link_af_size(const struct net_device *dev)
struct net_port_vlans *pv;
if (br_port_exists(dev))
- pv = nbp_get_vlan_info(br_port_get_rcu(dev));
+ pv = nbp_get_vlan_info(br_port_get_rtnl(dev));
else if (dev->priv_flags & IFF_EBRIDGE)
pv = br_get_vlan_info((struct net_bridge *)netdev_priv(dev));
else
diff --git a/net/bridge/br_notify.c b/net/bridge/br_notify.c
index 3a3f371b2841..2998dd1769a0 100644
--- a/net/bridge/br_notify.c
+++ b/net/bridge/br_notify.c
@@ -102,6 +102,11 @@ static int br_device_event(struct notifier_block *unused, unsigned long event, v
case NETDEV_PRE_TYPE_CHANGE:
/* Forbid underlaying device to change its type. */
return NOTIFY_BAD;
+
+ case NETDEV_RESEND_IGMP:
+ /* Propagate to master device */
+ call_netdevice_notifiers(event, br->dev);
+ break;
}
/* Events that may cause spanning tree to refresh */
diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h
index 263ba9034468..e14c33b42f75 100644
--- a/net/bridge/br_private.h
+++ b/net/bridge/br_private.h
@@ -126,7 +126,6 @@ struct net_bridge_mdb_entry
struct timer_list timer;
struct br_ip addr;
bool mglist;
- bool timer_armed;
};
struct net_bridge_mdb_htable
@@ -202,13 +201,10 @@ struct net_bridge_port
static inline struct net_bridge_port *br_port_get_rcu(const struct net_device *dev)
{
- struct net_bridge_port *port =
- rcu_dereference_rtnl(dev->rx_handler_data);
-
- return br_port_exists(dev) ? port : NULL;
+ return rcu_dereference(dev->rx_handler_data);
}
-static inline struct net_bridge_port *br_port_get_rtnl(struct net_device *dev)
+static inline struct net_bridge_port *br_port_get_rtnl(const struct net_device *dev)
{
return br_port_exists(dev) ?
rtnl_dereference(dev->rx_handler_data) : NULL;
@@ -352,11 +348,6 @@ extern void br_dev_delete(struct net_device *dev, struct list_head *list);
extern netdev_tx_t br_dev_xmit(struct sk_buff *skb,
struct net_device *dev);
#ifdef CONFIG_NET_POLL_CONTROLLER
-static inline struct netpoll_info *br_netpoll_info(struct net_bridge *br)
-{
- return br->dev->npinfo;
-}
-
static inline void br_netpoll_send_skb(const struct net_bridge_port *p,
struct sk_buff *skb)
{
@@ -369,11 +360,6 @@ static inline void br_netpoll_send_skb(const struct net_bridge_port *p,
extern int br_netpoll_enable(struct net_bridge_port *p, gfp_t gfp);
extern void br_netpoll_disable(struct net_bridge_port *p);
#else
-static inline struct netpoll_info *br_netpoll_info(struct net_bridge *br)
-{
- return NULL;
-}
-
static inline void br_netpoll_send_skb(const struct net_bridge_port *p,
struct sk_buff *skb)
{
@@ -494,7 +480,7 @@ extern void br_multicast_free_pg(struct rcu_head *head);
extern struct net_bridge_port_group *br_multicast_new_port_group(
struct net_bridge_port *port,
struct br_ip *group,
- struct net_bridge_port_group *next,
+ struct net_bridge_port_group __rcu *next,
unsigned char state);
extern void br_mdb_init(void);
extern void br_mdb_uninit(void);
@@ -504,16 +490,6 @@ extern void br_mdb_notify(struct net_device *dev, struct net_bridge_port *port,
#define mlock_dereference(X, br) \
rcu_dereference_protected(X, lockdep_is_held(&br->multicast_lock))
-#if IS_ENABLED(CONFIG_IPV6)
-#include <net/addrconf.h>
-static inline int ipv6_is_transient_multicast(const struct in6_addr *addr)
-{
- if (ipv6_addr_is_multicast(addr) && IPV6_ADDR_MC_FLAG_TRANSIENT(addr))
- return 1;
- return 0;
-}
-#endif
-
static inline bool br_multicast_is_router(struct net_bridge *br)
{
return br->multicast_router == 2 ||
@@ -666,9 +642,7 @@ static inline u16 br_get_pvid(const struct net_port_vlans *v)
* vid wasn't set
*/
smp_rmb();
- return (v->pvid & VLAN_TAG_PRESENT) ?
- (v->pvid & ~VLAN_TAG_PRESENT) :
- VLAN_N_VID;
+ return v->pvid ?: VLAN_N_VID;
}
#else
@@ -766,6 +740,7 @@ extern struct net_bridge_port *br_get_port(struct net_bridge *br,
extern void br_init_port(struct net_bridge_port *p);
extern void br_become_designated_port(struct net_bridge_port *p);
+extern void __br_set_forward_delay(struct net_bridge *br, unsigned long t);
extern int br_set_forward_delay(struct net_bridge *br, unsigned long x);
extern int br_set_hello_time(struct net_bridge *br, unsigned long x);
extern int br_set_max_age(struct net_bridge *br, unsigned long x);
diff --git a/net/bridge/br_stp.c b/net/bridge/br_stp.c
index 1c0a50f13229..3c86f0538cbb 100644
--- a/net/bridge/br_stp.c
+++ b/net/bridge/br_stp.c
@@ -209,7 +209,7 @@ static void br_record_config_information(struct net_bridge_port *p,
p->designated_age = jiffies - bpdu->message_age;
mod_timer(&p->message_age_timer, jiffies
- + (p->br->max_age - bpdu->message_age));
+ + (bpdu->max_age - bpdu->message_age));
}
/* called under bridge lock */
@@ -544,18 +544,27 @@ int br_set_max_age(struct net_bridge *br, unsigned long val)
}
+void __br_set_forward_delay(struct net_bridge *br, unsigned long t)
+{
+ br->bridge_forward_delay = t;
+ if (br_is_root_bridge(br))
+ br->forward_delay = br->bridge_forward_delay;
+}
+
int br_set_forward_delay(struct net_bridge *br, unsigned long val)
{
unsigned long t = clock_t_to_jiffies(val);
+ int err = -ERANGE;
+ spin_lock_bh(&br->lock);
if (br->stp_enabled != BR_NO_STP &&
(t < BR_MIN_FORWARD_DELAY || t > BR_MAX_FORWARD_DELAY))
- return -ERANGE;
+ goto unlock;
- spin_lock_bh(&br->lock);
- br->bridge_forward_delay = t;
- if (br_is_root_bridge(br))
- br->forward_delay = br->bridge_forward_delay;
+ __br_set_forward_delay(br, t);
+ err = 0;
+
+unlock:
spin_unlock_bh(&br->lock);
- return 0;
+ return err;
}
diff --git a/net/bridge/br_stp_if.c b/net/bridge/br_stp_if.c
index d45e760141bb..656a6f3e40de 100644
--- a/net/bridge/br_stp_if.c
+++ b/net/bridge/br_stp_if.c
@@ -129,6 +129,14 @@ static void br_stp_start(struct net_bridge *br)
char *envp[] = { NULL };
r = call_usermodehelper(BR_STP_PROG, argv, envp, UMH_WAIT_PROC);
+
+ spin_lock_bh(&br->lock);
+
+ if (br->bridge_forward_delay < BR_MIN_FORWARD_DELAY)
+ __br_set_forward_delay(br, BR_MIN_FORWARD_DELAY);
+ else if (br->bridge_forward_delay > BR_MAX_FORWARD_DELAY)
+ __br_set_forward_delay(br, BR_MAX_FORWARD_DELAY);
+
if (r == 0) {
br->stp_enabled = BR_USER_STP;
br_debug(br, "userspace STP started\n");
@@ -137,10 +145,10 @@ static void br_stp_start(struct net_bridge *br)
br_debug(br, "using kernel STP\n");
/* To start timers on any ports left in blocking */
- spin_lock_bh(&br->lock);
br_port_state_selection(br);
- spin_unlock_bh(&br->lock);
}
+
+ spin_unlock_bh(&br->lock);
}
static void br_stp_stop(struct net_bridge *br)
diff --git a/net/bridge/br_vlan.c b/net/bridge/br_vlan.c
index 9a9ffe7e4019..53f0990eab58 100644
--- a/net/bridge/br_vlan.c
+++ b/net/bridge/br_vlan.c
@@ -45,37 +45,34 @@ static int __vlan_add(struct net_port_vlans *v, u16 vid, u16 flags)
return 0;
}
- if (vid) {
- if (v->port_idx) {
- p = v->parent.port;
- br = p->br;
- dev = p->dev;
- } else {
- br = v->parent.br;
- dev = br->dev;
- }
- ops = dev->netdev_ops;
-
- if (p && (dev->features & NETIF_F_HW_VLAN_CTAG_FILTER)) {
- /* Add VLAN to the device filter if it is supported.
- * Stricly speaking, this is not necessary now, since
- * devices are made promiscuous by the bridge, but if
- * that ever changes this code will allow tagged
- * traffic to enter the bridge.
- */
- err = ops->ndo_vlan_rx_add_vid(dev, htons(ETH_P_8021Q),
- vid);
- if (err)
- return err;
- }
-
- err = br_fdb_insert(br, p, dev->dev_addr, vid);
- if (err) {
- br_err(br, "failed insert local address into bridge "
- "forwarding table\n");
- goto out_filt;
- }
+ if (v->port_idx) {
+ p = v->parent.port;
+ br = p->br;
+ dev = p->dev;
+ } else {
+ br = v->parent.br;
+ dev = br->dev;
+ }
+ ops = dev->netdev_ops;
+
+ if (p && (dev->features & NETIF_F_HW_VLAN_CTAG_FILTER)) {
+ /* Add VLAN to the device filter if it is supported.
+ * Stricly speaking, this is not necessary now, since
+ * devices are made promiscuous by the bridge, but if
+ * that ever changes this code will allow tagged
+ * traffic to enter the bridge.
+ */
+ err = ops->ndo_vlan_rx_add_vid(dev, htons(ETH_P_8021Q),
+ vid);
+ if (err)
+ return err;
+ }
+ err = br_fdb_insert(br, p, dev->dev_addr, vid);
+ if (err) {
+ br_err(br, "failed insert local address into bridge "
+ "forwarding table\n");
+ goto out_filt;
}
set_bit(vid, v->vlan_bitmap);
@@ -98,7 +95,7 @@ static int __vlan_del(struct net_port_vlans *v, u16 vid)
__vlan_delete_pvid(v, vid);
clear_bit(vid, v->untagged_bitmap);
- if (v->port_idx && vid) {
+ if (v->port_idx) {
struct net_device *dev = v->parent.port->dev;
const struct net_device_ops *ops = dev->netdev_ops;
@@ -192,6 +189,8 @@ out:
bool br_allowed_ingress(struct net_bridge *br, struct net_port_vlans *v,
struct sk_buff *skb, u16 *vid)
{
+ int err;
+
/* If VLAN filtering is disabled on the bridge, all packets are
* permitted.
*/
@@ -204,20 +203,32 @@ bool br_allowed_ingress(struct net_bridge *br, struct net_port_vlans *v,
if (!v)
return false;
- if (br_vlan_get_tag(skb, vid)) {
+ err = br_vlan_get_tag(skb, vid);
+ if (!*vid) {
u16 pvid = br_get_pvid(v);
- /* Frame did not have a tag. See if pvid is set
- * on this port. That tells us which vlan untagged
- * traffic belongs to.
+ /* Frame had a tag with VID 0 or did not have a tag.
+ * See if pvid is set on this port. That tells us which
+ * vlan untagged or priority-tagged traffic belongs to.
*/
if (pvid == VLAN_N_VID)
return false;
- /* PVID is set on this port. Any untagged ingress
- * frame is considered to belong to this vlan.
+ /* PVID is set on this port. Any untagged or priority-tagged
+ * ingress frame is considered to belong to this vlan.
*/
- __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), pvid);
+ *vid = pvid;
+ if (likely(err))
+ /* Untagged Frame. */
+ __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), pvid);
+ else
+ /* Priority-tagged Frame.
+ * At this point, We know that skb->vlan_tci had
+ * VLAN_TAG_PRESENT bit and its VID field was 0x000.
+ * We update only VID field and preserve PCP field.
+ */
+ skb->vlan_tci |= pvid;
+
return true;
}
@@ -248,7 +259,9 @@ bool br_allowed_egress(struct net_bridge *br,
return false;
}
-/* Must be protected by RTNL */
+/* Must be protected by RTNL.
+ * Must be called with vid in range from 1 to 4094 inclusive.
+ */
int br_vlan_add(struct net_bridge *br, u16 vid, u16 flags)
{
struct net_port_vlans *pv = NULL;
@@ -278,7 +291,9 @@ out:
return err;
}
-/* Must be protected by RTNL */
+/* Must be protected by RTNL.
+ * Must be called with vid in range from 1 to 4094 inclusive.
+ */
int br_vlan_delete(struct net_bridge *br, u16 vid)
{
struct net_port_vlans *pv;
@@ -289,14 +304,9 @@ int br_vlan_delete(struct net_bridge *br, u16 vid)
if (!pv)
return -EINVAL;
- if (vid) {
- /* If the VID !=0 remove fdb for this vid. VID 0 is special
- * in that it's the default and is always there in the fdb.
- */
- spin_lock_bh(&br->hash_lock);
- fdb_delete_by_addr(br, br->dev->dev_addr, vid);
- spin_unlock_bh(&br->hash_lock);
- }
+ spin_lock_bh(&br->hash_lock);
+ fdb_delete_by_addr(br, br->dev->dev_addr, vid);
+ spin_unlock_bh(&br->hash_lock);
__vlan_del(pv, vid);
return 0;
@@ -329,7 +339,9 @@ unlock:
return 0;
}
-/* Must be protected by RTNL */
+/* Must be protected by RTNL.
+ * Must be called with vid in range from 1 to 4094 inclusive.
+ */
int nbp_vlan_add(struct net_bridge_port *port, u16 vid, u16 flags)
{
struct net_port_vlans *pv = NULL;
@@ -363,7 +375,9 @@ clean_up:
return err;
}
-/* Must be protected by RTNL */
+/* Must be protected by RTNL.
+ * Must be called with vid in range from 1 to 4094 inclusive.
+ */
int nbp_vlan_delete(struct net_bridge_port *port, u16 vid)
{
struct net_port_vlans *pv;
@@ -374,14 +388,9 @@ int nbp_vlan_delete(struct net_bridge_port *port, u16 vid)
if (!pv)
return -EINVAL;
- if (vid) {
- /* If the VID !=0 remove fdb for this vid. VID 0 is special
- * in that it's the default and is always there in the fdb.
- */
- spin_lock_bh(&port->br->hash_lock);
- fdb_delete_by_addr(port->br, port->dev->dev_addr, vid);
- spin_unlock_bh(&port->br->hash_lock);
- }
+ spin_lock_bh(&port->br->hash_lock);
+ fdb_delete_by_addr(port->br, port->dev->dev_addr, vid);
+ spin_unlock_bh(&port->br->hash_lock);
return __vlan_del(pv, vid);
}
diff --git a/net/bridge/netfilter/ebtable_broute.c b/net/bridge/netfilter/ebtable_broute.c
index 70f656ce0f4a..dbd1c783431b 100644
--- a/net/bridge/netfilter/ebtable_broute.c
+++ b/net/bridge/netfilter/ebtable_broute.c
@@ -64,7 +64,7 @@ static int ebt_broute(struct sk_buff *skb)
static int __net_init broute_net_init(struct net *net)
{
net->xt.broute_table = ebt_register_table(net, &broute_table);
- return PTR_RET(net->xt.broute_table);
+ return PTR_ERR_OR_ZERO(net->xt.broute_table);
}
static void __net_exit broute_net_exit(struct net *net)
diff --git a/net/bridge/netfilter/ebtable_filter.c b/net/bridge/netfilter/ebtable_filter.c
index 3c2e9dced9e0..94b2b700cff8 100644
--- a/net/bridge/netfilter/ebtable_filter.c
+++ b/net/bridge/netfilter/ebtable_filter.c
@@ -100,7 +100,7 @@ static struct nf_hook_ops ebt_ops_filter[] __read_mostly = {
static int __net_init frame_filter_net_init(struct net *net)
{
net->xt.frame_filter = ebt_register_table(net, &frame_filter);
- return PTR_RET(net->xt.frame_filter);
+ return PTR_ERR_OR_ZERO(net->xt.frame_filter);
}
static void __net_exit frame_filter_net_exit(struct net *net)
diff --git a/net/bridge/netfilter/ebtable_nat.c b/net/bridge/netfilter/ebtable_nat.c
index 10871bc77908..322555acdd40 100644
--- a/net/bridge/netfilter/ebtable_nat.c
+++ b/net/bridge/netfilter/ebtable_nat.c
@@ -100,7 +100,7 @@ static struct nf_hook_ops ebt_ops_nat[] __read_mostly = {
static int __net_init frame_nat_net_init(struct net *net)
{
net->xt.frame_nat = ebt_register_table(net, &frame_nat);
- return PTR_RET(net->xt.frame_nat);
+ return PTR_ERR_OR_ZERO(net->xt.frame_nat);
}
static void __net_exit frame_nat_net_exit(struct net *net)
diff --git a/net/caif/cfctrl.c b/net/caif/cfctrl.c
index 2bd4b58f4372..0f455227da83 100644
--- a/net/caif/cfctrl.c
+++ b/net/caif/cfctrl.c
@@ -293,9 +293,10 @@ int cfctrl_linkup_request(struct cflayer *layer,
count = cfctrl_cancel_req(&cfctrl->serv.layer,
user_layer);
- if (count != 1)
+ if (count != 1) {
pr_err("Could not remove request (%d)", count);
return -ENODEV;
+ }
}
return 0;
}
diff --git a/net/can/gw.c b/net/can/gw.c
index 2f291f961a17..3f9b0f3a2818 100644
--- a/net/can/gw.c
+++ b/net/can/gw.c
@@ -146,6 +146,7 @@ struct cgw_job {
/* tbc */
};
u8 gwtype;
+ u8 limit_hops;
u16 flags;
};
@@ -402,6 +403,11 @@ static void can_can_gw_rcv(struct sk_buff *skb, void *data)
/* put the incremented hop counter in the cloned skb */
cgw_hops(nskb) = cgw_hops(skb) + 1;
+
+ /* first processing of this CAN frame -> adjust to private hop limit */
+ if (gwj->limit_hops && cgw_hops(nskb) == 1)
+ cgw_hops(nskb) = max_hops - gwj->limit_hops + 1;
+
nskb->dev = gwj->dst.dev;
/* pointer to modifiable CAN frame */
@@ -509,6 +515,11 @@ static int cgw_put_job(struct sk_buff *skb, struct cgw_job *gwj, int type,
/* check non default settings of attributes */
+ if (gwj->limit_hops) {
+ if (nla_put_u8(skb, CGW_LIM_HOPS, gwj->limit_hops) < 0)
+ goto cancel;
+ }
+
if (gwj->mod.modtype.and) {
memcpy(&mb.cf, &gwj->mod.modframe.and, sizeof(mb.cf));
mb.modtype = gwj->mod.modtype.and;
@@ -606,11 +617,12 @@ static const struct nla_policy cgw_policy[CGW_MAX+1] = {
[CGW_SRC_IF] = { .type = NLA_U32 },
[CGW_DST_IF] = { .type = NLA_U32 },
[CGW_FILTER] = { .len = sizeof(struct can_filter) },
+ [CGW_LIM_HOPS] = { .type = NLA_U8 },
};
/* check for common and gwtype specific attributes */
static int cgw_parse_attr(struct nlmsghdr *nlh, struct cf_mod *mod,
- u8 gwtype, void *gwtypeattr)
+ u8 gwtype, void *gwtypeattr, u8 *limhops)
{
struct nlattr *tb[CGW_MAX+1];
struct cgw_frame_mod mb;
@@ -625,6 +637,13 @@ static int cgw_parse_attr(struct nlmsghdr *nlh, struct cf_mod *mod,
if (err < 0)
return err;
+ if (tb[CGW_LIM_HOPS]) {
+ *limhops = nla_get_u8(tb[CGW_LIM_HOPS]);
+
+ if (*limhops < 1 || *limhops > max_hops)
+ return -EINVAL;
+ }
+
/* check for AND/OR/XOR/SET modifications */
if (tb[CGW_MOD_AND]) {
@@ -782,6 +801,7 @@ static int cgw_create_job(struct sk_buff *skb, struct nlmsghdr *nlh)
{
struct rtcanmsg *r;
struct cgw_job *gwj;
+ u8 limhops = 0;
int err = 0;
if (!capable(CAP_NET_ADMIN))
@@ -808,7 +828,8 @@ static int cgw_create_job(struct sk_buff *skb, struct nlmsghdr *nlh)
gwj->flags = r->flags;
gwj->gwtype = r->gwtype;
- err = cgw_parse_attr(nlh, &gwj->mod, CGW_TYPE_CAN_CAN, &gwj->ccgw);
+ err = cgw_parse_attr(nlh, &gwj->mod, CGW_TYPE_CAN_CAN, &gwj->ccgw,
+ &limhops);
if (err < 0)
goto out;
@@ -836,6 +857,8 @@ static int cgw_create_job(struct sk_buff *skb, struct nlmsghdr *nlh)
if (gwj->dst.dev->type != ARPHRD_CAN || gwj->dst.dev->header_ops)
goto put_src_dst_out;
+ gwj->limit_hops = limhops;
+
ASSERT_RTNL();
err = cgw_register_filter(gwj);
@@ -867,13 +890,14 @@ static void cgw_remove_all_jobs(void)
}
}
-static int cgw_remove_job(struct sk_buff *skb, struct nlmsghdr *nlh)
+static int cgw_remove_job(struct sk_buff *skb, struct nlmsghdr *nlh)
{
struct cgw_job *gwj = NULL;
struct hlist_node *nx;
struct rtcanmsg *r;
struct cf_mod mod;
struct can_can_gw ccgw;
+ u8 limhops = 0;
int err = 0;
if (!capable(CAP_NET_ADMIN))
@@ -890,7 +914,7 @@ static int cgw_remove_job(struct sk_buff *skb, struct nlmsghdr *nlh)
if (r->gwtype != CGW_TYPE_CAN_CAN)
return -EINVAL;
- err = cgw_parse_attr(nlh, &mod, CGW_TYPE_CAN_CAN, &ccgw);
+ err = cgw_parse_attr(nlh, &mod, CGW_TYPE_CAN_CAN, &ccgw, &limhops);
if (err < 0)
return err;
@@ -910,6 +934,9 @@ static int cgw_remove_job(struct sk_buff *skb, struct nlmsghdr *nlh)
if (gwj->flags != r->flags)
continue;
+ if (gwj->limit_hops != limhops)
+ continue;
+
if (memcmp(&gwj->mod, &mod, sizeof(mod)))
continue;
diff --git a/net/ceph/messenger.c b/net/ceph/messenger.c
index eb0a46a49bd4..4a5df7b1cc9f 100644
--- a/net/ceph/messenger.c
+++ b/net/ceph/messenger.c
@@ -290,7 +290,7 @@ int ceph_msgr_init(void)
if (ceph_msgr_slab_init())
return -ENOMEM;
- ceph_msgr_wq = alloc_workqueue("ceph-msgr", WQ_NON_REENTRANT, 0);
+ ceph_msgr_wq = alloc_workqueue("ceph-msgr", 0, 0);
if (ceph_msgr_wq)
return 0;
@@ -409,7 +409,7 @@ static void ceph_sock_write_space(struct sock *sk)
* and net/core/stream.c:sk_stream_write_space().
*/
if (con_flag_test(con, CON_FLAG_WRITE_PENDING)) {
- if (sk_stream_wspace(sk) >= sk_stream_min_wspace(sk)) {
+ if (sk_stream_is_writeable(sk)) {
dout("%s %p queueing write work\n", __func__, con);
clear_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
queue_con(con);
diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c
index dd47889adc4a..2b4b32aaa893 100644
--- a/net/ceph/osd_client.c
+++ b/net/ceph/osd_client.c
@@ -503,7 +503,9 @@ void osd_req_op_extent_init(struct ceph_osd_request *osd_req,
struct ceph_osd_req_op *op = _osd_req_op_init(osd_req, which, opcode);
size_t payload_len = 0;
- BUG_ON(opcode != CEPH_OSD_OP_READ && opcode != CEPH_OSD_OP_WRITE);
+ BUG_ON(opcode != CEPH_OSD_OP_READ && opcode != CEPH_OSD_OP_WRITE &&
+ opcode != CEPH_OSD_OP_DELETE && opcode != CEPH_OSD_OP_ZERO &&
+ opcode != CEPH_OSD_OP_TRUNCATE);
op->extent.offset = offset;
op->extent.length = length;
@@ -631,6 +633,9 @@ static u64 osd_req_encode_op(struct ceph_osd_request *req,
break;
case CEPH_OSD_OP_READ:
case CEPH_OSD_OP_WRITE:
+ case CEPH_OSD_OP_ZERO:
+ case CEPH_OSD_OP_DELETE:
+ case CEPH_OSD_OP_TRUNCATE:
if (src->op == CEPH_OSD_OP_WRITE)
request_data_len = src->extent.length;
dst->extent.offset = cpu_to_le64(src->extent.offset);
@@ -715,7 +720,9 @@ struct ceph_osd_request *ceph_osdc_new_request(struct ceph_osd_client *osdc,
u64 object_base;
int r;
- BUG_ON(opcode != CEPH_OSD_OP_READ && opcode != CEPH_OSD_OP_WRITE);
+ BUG_ON(opcode != CEPH_OSD_OP_READ && opcode != CEPH_OSD_OP_WRITE &&
+ opcode != CEPH_OSD_OP_DELETE && opcode != CEPH_OSD_OP_ZERO &&
+ opcode != CEPH_OSD_OP_TRUNCATE);
req = ceph_osdc_alloc_request(osdc, snapc, num_ops, use_mempool,
GFP_NOFS);
@@ -1488,14 +1495,14 @@ static void handle_reply(struct ceph_osd_client *osdc, struct ceph_msg *msg,
dout("handle_reply %p tid %llu req %p result %d\n", msg, tid,
req, result);
- ceph_decode_need(&p, end, 4, bad);
+ ceph_decode_need(&p, end, 4, bad_put);
numops = ceph_decode_32(&p);
if (numops > CEPH_OSD_MAX_OP)
goto bad_put;
if (numops != req->r_num_ops)
goto bad_put;
payload_len = 0;
- ceph_decode_need(&p, end, numops * sizeof(struct ceph_osd_op), bad);
+ ceph_decode_need(&p, end, numops * sizeof(struct ceph_osd_op), bad_put);
for (i = 0; i < numops; i++) {
struct ceph_osd_op *op = p;
int len;
@@ -1513,7 +1520,7 @@ static void handle_reply(struct ceph_osd_client *osdc, struct ceph_msg *msg,
goto bad_put;
}
- ceph_decode_need(&p, end, 4 + numops * 4, bad);
+ ceph_decode_need(&p, end, 4 + numops * 4, bad_put);
retry_attempt = ceph_decode_32(&p);
for (i = 0; i < numops; i++)
req->r_reply_op_result[i] = ceph_decode_32(&p);
@@ -1786,6 +1793,8 @@ void ceph_osdc_handle_map(struct ceph_osd_client *osdc, struct ceph_msg *msg)
nr_maps--;
}
+ if (!osdc->osdmap)
+ goto bad;
done:
downgrade_write(&osdc->map_sem);
ceph_monc_got_osdmap(&osdc->client->monc, osdc->osdmap->epoch);
@@ -2129,6 +2138,8 @@ int ceph_osdc_start_request(struct ceph_osd_client *osdc,
dout("osdc_start_request failed map, "
" will retry %lld\n", req->r_tid);
rc = 0;
+ } else {
+ __unregister_request(osdc, req);
}
goto out_unlock;
}
@@ -2205,6 +2216,17 @@ void ceph_osdc_sync(struct ceph_osd_client *osdc)
EXPORT_SYMBOL(ceph_osdc_sync);
/*
+ * Call all pending notify callbacks - for use after a watch is
+ * unregistered, to make sure no more callbacks for it will be invoked
+ */
+extern void ceph_osdc_flush_notifies(struct ceph_osd_client *osdc)
+{
+ flush_workqueue(osdc->notify_wq);
+}
+EXPORT_SYMBOL(ceph_osdc_flush_notifies);
+
+
+/*
* init, shutdown
*/
int ceph_osdc_init(struct ceph_osd_client *osdc, struct ceph_client *client)
@@ -2253,12 +2275,10 @@ int ceph_osdc_init(struct ceph_osd_client *osdc, struct ceph_client *client)
if (err < 0)
goto out_msgpool;
+ err = -ENOMEM;
osdc->notify_wq = create_singlethread_workqueue("ceph-watch-notify");
- if (IS_ERR(osdc->notify_wq)) {
- err = PTR_ERR(osdc->notify_wq);
- osdc->notify_wq = NULL;
+ if (!osdc->notify_wq)
goto out_msgpool;
- }
return 0;
out_msgpool:
diff --git a/net/ceph/osdmap.c b/net/ceph/osdmap.c
index 603ddd92db19..dbd9a4792427 100644
--- a/net/ceph/osdmap.c
+++ b/net/ceph/osdmap.c
@@ -1129,7 +1129,7 @@ static int *calc_pg_raw(struct ceph_osdmap *osdmap, struct ceph_pg pgid,
/* pg_temp? */
pgid.seed = ceph_stable_mod(pgid.seed, pool->pg_num,
- pool->pgp_num_mask);
+ pool->pg_num_mask);
pg = __lookup_pg_mapping(&osdmap->pg_temp, pgid);
if (pg) {
*num = pg->len;
diff --git a/net/compat.c b/net/compat.c
index f0a1ba6c8086..89032580bd1d 100644
--- a/net/compat.c
+++ b/net/compat.c
@@ -71,6 +71,8 @@ int get_compat_msghdr(struct msghdr *kmsg, struct compat_msghdr __user *umsg)
__get_user(kmsg->msg_controllen, &umsg->msg_controllen) ||
__get_user(kmsg->msg_flags, &umsg->msg_flags))
return -EFAULT;
+ if (kmsg->msg_namelen > sizeof(struct sockaddr_storage))
+ return -EINVAL;
kmsg->msg_name = compat_ptr(tmp1);
kmsg->msg_iov = compat_ptr(tmp2);
kmsg->msg_control = compat_ptr(tmp3);
diff --git a/net/core/datagram.c b/net/core/datagram.c
index 8ab48cd89559..af814e764206 100644
--- a/net/core/datagram.c
+++ b/net/core/datagram.c
@@ -48,6 +48,7 @@
#include <linux/highmem.h>
#include <linux/spinlock.h>
#include <linux/slab.h>
+#include <linux/pagemap.h>
#include <net/protocol.h>
#include <linux/skbuff.h>
@@ -573,6 +574,77 @@ fault:
}
EXPORT_SYMBOL(skb_copy_datagram_from_iovec);
+/**
+ * zerocopy_sg_from_iovec - Build a zerocopy datagram from an iovec
+ * @skb: buffer to copy
+ * @from: io vector to copy to
+ * @offset: offset in the io vector to start copying from
+ * @count: amount of vectors to copy to buffer from
+ *
+ * The function will first copy up to headlen, and then pin the userspace
+ * pages and build frags through them.
+ *
+ * Returns 0, -EFAULT or -EMSGSIZE.
+ * Note: the iovec is not modified during the copy
+ */
+int zerocopy_sg_from_iovec(struct sk_buff *skb, const struct iovec *from,
+ int offset, size_t count)
+{
+ int len = iov_length(from, count) - offset;
+ int copy = min_t(int, skb_headlen(skb), len);
+ int size;
+ int i = 0;
+
+ /* copy up to skb headlen */
+ if (skb_copy_datagram_from_iovec(skb, 0, from, offset, copy))
+ return -EFAULT;
+
+ if (len == copy)
+ return 0;
+
+ offset += copy;
+ while (count--) {
+ struct page *page[MAX_SKB_FRAGS];
+ int num_pages;
+ unsigned long base;
+ unsigned long truesize;
+
+ /* Skip over from offset and copied */
+ if (offset >= from->iov_len) {
+ offset -= from->iov_len;
+ ++from;
+ continue;
+ }
+ len = from->iov_len - offset;
+ base = (unsigned long)from->iov_base + offset;
+ size = ((base & ~PAGE_MASK) + len + ~PAGE_MASK) >> PAGE_SHIFT;
+ if (i + size > MAX_SKB_FRAGS)
+ return -EMSGSIZE;
+ num_pages = get_user_pages_fast(base, size, 0, &page[i]);
+ if (num_pages != size) {
+ release_pages(&page[i], num_pages, 0);
+ return -EFAULT;
+ }
+ truesize = size * PAGE_SIZE;
+ skb->data_len += len;
+ skb->len += len;
+ skb->truesize += truesize;
+ atomic_add(truesize, &skb->sk->sk_wmem_alloc);
+ while (len) {
+ int off = base & ~PAGE_MASK;
+ int size = min_t(int, len, PAGE_SIZE - off);
+ skb_fill_page_desc(skb, i, page[i], off, size);
+ base += size;
+ len -= size;
+ i++;
+ }
+ offset = 0;
+ ++from;
+ }
+ return 0;
+}
+EXPORT_SYMBOL(zerocopy_sg_from_iovec);
+
static int skb_copy_and_csum_datagram(const struct sk_buff *skb, int offset,
u8 __user *to, int len,
__wsum *csump)
diff --git a/net/core/dev.c b/net/core/dev.c
index 26755dd40daa..3430b1ed12e5 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -174,7 +174,7 @@ static DEFINE_SPINLOCK(napi_hash_lock);
static unsigned int napi_gen_id;
static DEFINE_HASHTABLE(napi_hash, 8);
-seqcount_t devnet_rename_seq;
+static seqcount_t devnet_rename_seq;
static inline void dev_base_seq_inc(struct net *net)
{
@@ -1691,13 +1691,13 @@ int dev_forward_skb(struct net_device *dev, struct sk_buff *skb)
kfree_skb(skb);
return NET_RX_DROP;
}
- skb_scrub_packet(skb);
skb->protocol = eth_type_trans(skb, dev);
/* eth_type_trans() can set pkt_type.
- * clear pkt_type _after_ calling eth_type_trans()
+ * call skb_scrub_packet() after it to clear pkt_type _after_ calling
+ * eth_type_trans().
*/
- skb->pkt_type = PACKET_HOST;
+ skb_scrub_packet(skb, true);
return netif_rx(skb);
}
@@ -1917,7 +1917,8 @@ static struct xps_map *expand_xps_map(struct xps_map *map,
return new_map;
}
-int netif_set_xps_queue(struct net_device *dev, struct cpumask *mask, u16 index)
+int netif_set_xps_queue(struct net_device *dev, const struct cpumask *mask,
+ u16 index)
{
struct xps_dev_maps *dev_maps, *new_dev_maps = NULL;
struct xps_map *map, *new_map;
@@ -4367,57 +4368,48 @@ softnet_break:
goto out;
}
-struct netdev_upper {
+struct netdev_adjacent {
struct net_device *dev;
+
+ /* upper master flag, there can only be one master device per list */
bool master;
+
+ /* indicates that this dev is our first-level lower/upper device */
+ bool neighbour;
+
+ /* counter for the number of times this device was added to us */
+ u16 ref_nr;
+
struct list_head list;
struct rcu_head rcu;
- struct list_head search_list;
};
-static void __append_search_uppers(struct list_head *search_list,
- struct net_device *dev)
+static struct netdev_adjacent *__netdev_find_adj(struct net_device *dev,
+ struct net_device *adj_dev,
+ bool upper)
{
- struct netdev_upper *upper;
+ struct netdev_adjacent *adj;
+ struct list_head *dev_list;
- list_for_each_entry(upper, &dev->upper_dev_list, list) {
- /* check if this upper is not already in search list */
- if (list_empty(&upper->search_list))
- list_add_tail(&upper->search_list, search_list);
+ dev_list = upper ? &dev->upper_dev_list : &dev->lower_dev_list;
+
+ list_for_each_entry(adj, dev_list, list) {
+ if (adj->dev == adj_dev)
+ return adj;
}
+ return NULL;
}
-static bool __netdev_search_upper_dev(struct net_device *dev,
- struct net_device *upper_dev)
+static inline struct netdev_adjacent *__netdev_find_upper(struct net_device *dev,
+ struct net_device *udev)
{
- LIST_HEAD(search_list);
- struct netdev_upper *upper;
- struct netdev_upper *tmp;
- bool ret = false;
-
- __append_search_uppers(&search_list, dev);
- list_for_each_entry(upper, &search_list, search_list) {
- if (upper->dev == upper_dev) {
- ret = true;
- break;
- }
- __append_search_uppers(&search_list, upper->dev);
- }
- list_for_each_entry_safe(upper, tmp, &search_list, search_list)
- INIT_LIST_HEAD(&upper->search_list);
- return ret;
+ return __netdev_find_adj(dev, udev, true);
}
-static struct netdev_upper *__netdev_find_upper(struct net_device *dev,
- struct net_device *upper_dev)
+static inline struct netdev_adjacent *__netdev_find_lower(struct net_device *dev,
+ struct net_device *ldev)
{
- struct netdev_upper *upper;
-
- list_for_each_entry(upper, &dev->upper_dev_list, list) {
- if (upper->dev == upper_dev)
- return upper;
- }
- return NULL;
+ return __netdev_find_adj(dev, ldev, false);
}
/**
@@ -4462,7 +4454,7 @@ EXPORT_SYMBOL(netdev_has_any_upper_dev);
*/
struct net_device *netdev_master_upper_dev_get(struct net_device *dev)
{
- struct netdev_upper *upper;
+ struct netdev_adjacent *upper;
ASSERT_RTNL();
@@ -4470,13 +4462,38 @@ struct net_device *netdev_master_upper_dev_get(struct net_device *dev)
return NULL;
upper = list_first_entry(&dev->upper_dev_list,
- struct netdev_upper, list);
+ struct netdev_adjacent, list);
if (likely(upper->master))
return upper->dev;
return NULL;
}
EXPORT_SYMBOL(netdev_master_upper_dev_get);
+/* netdev_upper_get_next_dev_rcu - Get the next dev from upper list
+ * @dev: device
+ * @iter: list_head ** of the current position
+ *
+ * Gets the next device from the dev's upper list, starting from iter
+ * position. The caller must hold RCU read lock.
+ */
+struct net_device *netdev_upper_get_next_dev_rcu(struct net_device *dev,
+ struct list_head **iter)
+{
+ struct netdev_adjacent *upper;
+
+ WARN_ON_ONCE(!rcu_read_lock_held());
+
+ upper = list_entry_rcu((*iter)->next, struct netdev_adjacent, list);
+
+ if (&upper->list == &dev->upper_dev_list)
+ return NULL;
+
+ *iter = &upper->list;
+
+ return upper->dev;
+}
+EXPORT_SYMBOL(netdev_upper_get_next_dev_rcu);
+
/**
* netdev_master_upper_dev_get_rcu - Get master upper device
* @dev: device
@@ -4486,20 +4503,158 @@ EXPORT_SYMBOL(netdev_master_upper_dev_get);
*/
struct net_device *netdev_master_upper_dev_get_rcu(struct net_device *dev)
{
- struct netdev_upper *upper;
+ struct netdev_adjacent *upper;
upper = list_first_or_null_rcu(&dev->upper_dev_list,
- struct netdev_upper, list);
+ struct netdev_adjacent, list);
if (upper && likely(upper->master))
return upper->dev;
return NULL;
}
EXPORT_SYMBOL(netdev_master_upper_dev_get_rcu);
+static int __netdev_adjacent_dev_insert(struct net_device *dev,
+ struct net_device *adj_dev,
+ bool neighbour, bool master,
+ bool upper)
+{
+ struct netdev_adjacent *adj;
+
+ adj = __netdev_find_adj(dev, adj_dev, upper);
+
+ if (adj) {
+ BUG_ON(neighbour);
+ adj->ref_nr++;
+ return 0;
+ }
+
+ adj = kmalloc(sizeof(*adj), GFP_KERNEL);
+ if (!adj)
+ return -ENOMEM;
+
+ adj->dev = adj_dev;
+ adj->master = master;
+ adj->neighbour = neighbour;
+ adj->ref_nr = 1;
+
+ dev_hold(adj_dev);
+ pr_debug("dev_hold for %s, because of %s link added from %s to %s\n",
+ adj_dev->name, upper ? "upper" : "lower", dev->name,
+ adj_dev->name);
+
+ if (!upper) {
+ list_add_tail_rcu(&adj->list, &dev->lower_dev_list);
+ return 0;
+ }
+
+ /* Ensure that master upper link is always the first item in list. */
+ if (master)
+ list_add_rcu(&adj->list, &dev->upper_dev_list);
+ else
+ list_add_tail_rcu(&adj->list, &dev->upper_dev_list);
+
+ return 0;
+}
+
+static inline int __netdev_upper_dev_insert(struct net_device *dev,
+ struct net_device *udev,
+ bool master, bool neighbour)
+{
+ return __netdev_adjacent_dev_insert(dev, udev, neighbour, master,
+ true);
+}
+
+static inline int __netdev_lower_dev_insert(struct net_device *dev,
+ struct net_device *ldev,
+ bool neighbour)
+{
+ return __netdev_adjacent_dev_insert(dev, ldev, neighbour, false,
+ false);
+}
+
+void __netdev_adjacent_dev_remove(struct net_device *dev,
+ struct net_device *adj_dev, bool upper)
+{
+ struct netdev_adjacent *adj;
+
+ if (upper)
+ adj = __netdev_find_upper(dev, adj_dev);
+ else
+ adj = __netdev_find_lower(dev, adj_dev);
+
+ if (!adj)
+ BUG();
+
+ if (adj->ref_nr > 1) {
+ adj->ref_nr--;
+ return;
+ }
+
+ list_del_rcu(&adj->list);
+ pr_debug("dev_put for %s, because of %s link removed from %s to %s\n",
+ adj_dev->name, upper ? "upper" : "lower", dev->name,
+ adj_dev->name);
+ dev_put(adj_dev);
+ kfree_rcu(adj, rcu);
+}
+
+static inline void __netdev_upper_dev_remove(struct net_device *dev,
+ struct net_device *udev)
+{
+ return __netdev_adjacent_dev_remove(dev, udev, true);
+}
+
+static inline void __netdev_lower_dev_remove(struct net_device *dev,
+ struct net_device *ldev)
+{
+ return __netdev_adjacent_dev_remove(dev, ldev, false);
+}
+
+int __netdev_adjacent_dev_insert_link(struct net_device *dev,
+ struct net_device *upper_dev,
+ bool master, bool neighbour)
+{
+ int ret;
+
+ ret = __netdev_upper_dev_insert(dev, upper_dev, master, neighbour);
+ if (ret)
+ return ret;
+
+ ret = __netdev_lower_dev_insert(upper_dev, dev, neighbour);
+ if (ret) {
+ __netdev_upper_dev_remove(dev, upper_dev);
+ return ret;
+ }
+
+ return 0;
+}
+
+static inline int __netdev_adjacent_dev_link(struct net_device *dev,
+ struct net_device *udev)
+{
+ return __netdev_adjacent_dev_insert_link(dev, udev, false, false);
+}
+
+static inline int __netdev_adjacent_dev_link_neighbour(struct net_device *dev,
+ struct net_device *udev,
+ bool master)
+{
+ return __netdev_adjacent_dev_insert_link(dev, udev, master, true);
+}
+
+void __netdev_adjacent_dev_unlink(struct net_device *dev,
+ struct net_device *upper_dev)
+{
+ __netdev_upper_dev_remove(dev, upper_dev);
+ __netdev_lower_dev_remove(upper_dev, dev);
+}
+
+
static int __netdev_upper_dev_link(struct net_device *dev,
struct net_device *upper_dev, bool master)
{
- struct netdev_upper *upper;
+ struct netdev_adjacent *i, *j, *to_i, *to_j;
+ int ret = 0;
ASSERT_RTNL();
@@ -4507,7 +4662,7 @@ static int __netdev_upper_dev_link(struct net_device *dev,
return -EBUSY;
/* To prevent loops, check if dev is not upper device to upper_dev. */
- if (__netdev_search_upper_dev(upper_dev, dev))
+ if (__netdev_find_upper(upper_dev, dev))
return -EBUSY;
if (__netdev_find_upper(dev, upper_dev))
@@ -4516,22 +4671,76 @@ static int __netdev_upper_dev_link(struct net_device *dev,
if (master && netdev_master_upper_dev_get(dev))
return -EBUSY;
- upper = kmalloc(sizeof(*upper), GFP_KERNEL);
- if (!upper)
- return -ENOMEM;
+ ret = __netdev_adjacent_dev_link_neighbour(dev, upper_dev, master);
+ if (ret)
+ return ret;
- upper->dev = upper_dev;
- upper->master = master;
- INIT_LIST_HEAD(&upper->search_list);
+ /* Now that we linked these devs, make all the upper_dev's
+ * upper_dev_list visible to every dev's lower_dev_list and vice
+ * versa, and don't forget the devices itself. All of these
+ * links are non-neighbours.
+ */
+ list_for_each_entry(i, &dev->lower_dev_list, list) {
+ list_for_each_entry(j, &upper_dev->upper_dev_list, list) {
+ ret = __netdev_adjacent_dev_link(i->dev, j->dev);
+ if (ret)
+ goto rollback_mesh;
+ }
+ }
+
+ /* add dev to every upper_dev's upper device */
+ list_for_each_entry(i, &upper_dev->upper_dev_list, list) {
+ ret = __netdev_adjacent_dev_link(dev, i->dev);
+ if (ret)
+ goto rollback_upper_mesh;
+ }
+
+ /* add upper_dev to every dev's lower device */
+ list_for_each_entry(i, &dev->lower_dev_list, list) {
+ ret = __netdev_adjacent_dev_link(i->dev, upper_dev);
+ if (ret)
+ goto rollback_lower_mesh;
+ }
- /* Ensure that master upper link is always the first item in list. */
- if (master)
- list_add_rcu(&upper->list, &dev->upper_dev_list);
- else
- list_add_tail_rcu(&upper->list, &dev->upper_dev_list);
- dev_hold(upper_dev);
call_netdevice_notifiers(NETDEV_CHANGEUPPER, dev);
return 0;
+
+rollback_lower_mesh:
+ to_i = i;
+ list_for_each_entry(i, &dev->lower_dev_list, list) {
+ if (i == to_i)
+ break;
+ __netdev_adjacent_dev_unlink(i->dev, upper_dev);
+ }
+
+ i = NULL;
+
+rollback_upper_mesh:
+ to_i = i;
+ list_for_each_entry(i, &upper_dev->upper_dev_list, list) {
+ if (i == to_i)
+ break;
+ __netdev_adjacent_dev_unlink(dev, i->dev);
+ }
+
+ i = j = NULL;
+
+rollback_mesh:
+ to_i = i;
+ to_j = j;
+ list_for_each_entry(i, &dev->lower_dev_list, list) {
+ list_for_each_entry(j, &upper_dev->upper_dev_list, list) {
+ if (i == to_i && j == to_j)
+ break;
+ __netdev_adjacent_dev_unlink(i->dev, j->dev);
+ }
+ if (i == to_i)
+ break;
+ }
+
+ __netdev_adjacent_dev_unlink(dev, upper_dev);
+
+ return ret;
}
/**
@@ -4580,16 +4789,28 @@ EXPORT_SYMBOL(netdev_master_upper_dev_link);
void netdev_upper_dev_unlink(struct net_device *dev,
struct net_device *upper_dev)
{
- struct netdev_upper *upper;
-
+ struct netdev_adjacent *i, *j;
ASSERT_RTNL();
- upper = __netdev_find_upper(dev, upper_dev);
- if (!upper)
- return;
- list_del_rcu(&upper->list);
- dev_put(upper_dev);
- kfree_rcu(upper, rcu);
+ __netdev_adjacent_dev_unlink(dev, upper_dev);
+
+ /* Here is the tricky part. We must remove all dev's lower
+ * devices from all upper_dev's upper devices and vice
+ * versa, to maintain the graph relationship.
+ */
+ list_for_each_entry(i, &dev->lower_dev_list, list)
+ list_for_each_entry(j, &upper_dev->upper_dev_list, list)
+ __netdev_adjacent_dev_unlink(i->dev, j->dev);
+
+ /* remove also the devices itself from lower/upper device
+ * list
+ */
+ list_for_each_entry(i, &dev->lower_dev_list, list)
+ __netdev_adjacent_dev_unlink(i->dev, upper_dev);
+
+ list_for_each_entry(i, &upper_dev->upper_dev_list, list)
+ __netdev_adjacent_dev_unlink(dev, i->dev);
+
call_netdevice_notifiers(NETDEV_CHANGEUPPER, dev);
}
EXPORT_SYMBOL(netdev_upper_dev_unlink);
@@ -4989,6 +5210,24 @@ int dev_change_carrier(struct net_device *dev, bool new_carrier)
EXPORT_SYMBOL(dev_change_carrier);
/**
+ * dev_get_phys_port_id - Get device physical port ID
+ * @dev: device
+ * @ppid: port ID
+ *
+ * Get device physical port ID
+ */
+int dev_get_phys_port_id(struct net_device *dev,
+ struct netdev_phys_port_id *ppid)
+{
+ const struct net_device_ops *ops = dev->netdev_ops;
+
+ if (!ops->ndo_get_phys_port_id)
+ return -EOPNOTSUPP;
+ return ops->ndo_get_phys_port_id(dev, ppid);
+}
+EXPORT_SYMBOL(dev_get_phys_port_id);
+
+/**
* dev_new_index - allocate an ifindex
* @net: the applicable net namespace
*
@@ -5009,10 +5248,12 @@ static int dev_new_index(struct net *net)
/* Delayed registration/unregisteration */
static LIST_HEAD(net_todo_list);
+static DECLARE_WAIT_QUEUE_HEAD(netdev_unregistering_wq);
static void net_set_todo(struct net_device *dev)
{
list_add_tail(&dev->todo_list, &net_todo_list);
+ dev_net(dev)->dev_unreg_count++;
}
static void rollback_registered_many(struct list_head *head)
@@ -5680,6 +5921,12 @@ void netdev_run_todo(void)
if (dev->destructor)
dev->destructor(dev);
+ /* Report a network device has been unregistered */
+ rtnl_lock();
+ dev_net(dev)->dev_unreg_count--;
+ __rtnl_unlock();
+ wake_up(&netdev_unregistering_wq);
+
/* Free network device */
kobject_put(&dev->dev.kobj);
}
@@ -5832,6 +6079,7 @@ struct net_device *alloc_netdev_mqs(int sizeof_priv, const char *name,
INIT_LIST_HEAD(&dev->unreg_list);
INIT_LIST_HEAD(&dev->link_watch_list);
INIT_LIST_HEAD(&dev->upper_dev_list);
+ INIT_LIST_HEAD(&dev->lower_dev_list);
dev->priv_flags = IFF_XMIT_DST_RELEASE;
setup(dev);
@@ -6364,6 +6612,34 @@ static void __net_exit default_device_exit(struct net *net)
rtnl_unlock();
}
+static void __net_exit rtnl_lock_unregistering(struct list_head *net_list)
+{
+ /* Return with the rtnl_lock held when there are no network
+ * devices unregistering in any network namespace in net_list.
+ */
+ struct net *net;
+ bool unregistering;
+ DEFINE_WAIT(wait);
+
+ for (;;) {
+ prepare_to_wait(&netdev_unregistering_wq, &wait,
+ TASK_UNINTERRUPTIBLE);
+ unregistering = false;
+ rtnl_lock();
+ list_for_each_entry(net, net_list, exit_list) {
+ if (net->dev_unreg_count > 0) {
+ unregistering = true;
+ break;
+ }
+ }
+ if (!unregistering)
+ break;
+ __rtnl_unlock();
+ schedule();
+ }
+ finish_wait(&netdev_unregistering_wq, &wait);
+}
+
static void __net_exit default_device_exit_batch(struct list_head *net_list)
{
/* At exit all network devices most be removed from a network
@@ -6375,7 +6651,18 @@ static void __net_exit default_device_exit_batch(struct list_head *net_list)
struct net *net;
LIST_HEAD(dev_kill_list);
- rtnl_lock();
+ /* To prevent network device cleanup code from dereferencing
+ * loopback devices or network devices that have been freed
+ * wait here for all pending unregistrations to complete,
+ * before unregistring the loopback device and allowing the
+ * network namespace be freed.
+ *
+ * The netdev todo list containing all network devices
+ * unregistrations that happen in default_device_exit_batch
+ * will run in the rtnl_unlock() at the end of
+ * default_device_exit_batch.
+ */
+ rtnl_lock_unregistering(net_list);
list_for_each_entry(net, net_list, exit_list) {
for_each_netdev_reverse(net, dev) {
if (dev->rtnl_link_ops)
diff --git a/net/core/fib_rules.c b/net/core/fib_rules.c
index 21735440c44a..2e654138433c 100644
--- a/net/core/fib_rules.c
+++ b/net/core/fib_rules.c
@@ -33,6 +33,9 @@ int fib_default_rule_add(struct fib_rules_ops *ops,
r->flags = flags;
r->fr_net = hold_net(ops->fro_net);
+ r->suppress_prefixlen = -1;
+ r->suppress_ifgroup = -1;
+
/* The lock is not required here, the list in unreacheable
* at the moment this function is called */
list_add_tail(&r->list, &ops->rules_list);
@@ -226,6 +229,9 @@ jumped:
else
err = ops->action(rule, fl, flags, arg);
+ if (!err && ops->suppress && ops->suppress(rule, arg))
+ continue;
+
if (err != -EAGAIN) {
if ((arg->flags & FIB_LOOKUP_NOREF) ||
likely(atomic_inc_not_zero(&rule->refcnt))) {
@@ -337,6 +343,15 @@ static int fib_nl_newrule(struct sk_buff *skb, struct nlmsghdr* nlh)
rule->action = frh->action;
rule->flags = frh->flags;
rule->table = frh_get_table(frh, tb);
+ if (tb[FRA_SUPPRESS_PREFIXLEN])
+ rule->suppress_prefixlen = nla_get_u32(tb[FRA_SUPPRESS_PREFIXLEN]);
+ else
+ rule->suppress_prefixlen = -1;
+
+ if (tb[FRA_SUPPRESS_IFGROUP])
+ rule->suppress_ifgroup = nla_get_u32(tb[FRA_SUPPRESS_IFGROUP]);
+ else
+ rule->suppress_ifgroup = -1;
if (!tb[FRA_PRIORITY] && ops->default_pref)
rule->pref = ops->default_pref(ops);
@@ -523,6 +538,8 @@ static inline size_t fib_rule_nlmsg_size(struct fib_rules_ops *ops,
+ nla_total_size(IFNAMSIZ) /* FRA_OIFNAME */
+ nla_total_size(4) /* FRA_PRIORITY */
+ nla_total_size(4) /* FRA_TABLE */
+ + nla_total_size(4) /* FRA_SUPPRESS_PREFIXLEN */
+ + nla_total_size(4) /* FRA_SUPPRESS_IFGROUP */
+ nla_total_size(4) /* FRA_FWMARK */
+ nla_total_size(4); /* FRA_FWMASK */
@@ -548,6 +565,8 @@ static int fib_nl_fill_rule(struct sk_buff *skb, struct fib_rule *rule,
frh->table = rule->table;
if (nla_put_u32(skb, FRA_TABLE, rule->table))
goto nla_put_failure;
+ if (nla_put_u32(skb, FRA_SUPPRESS_PREFIXLEN, rule->suppress_prefixlen))
+ goto nla_put_failure;
frh->res1 = 0;
frh->res2 = 0;
frh->action = rule->action;
@@ -580,6 +599,12 @@ static int fib_nl_fill_rule(struct sk_buff *skb, struct fib_rule *rule,
(rule->target &&
nla_put_u32(skb, FRA_GOTO, rule->target)))
goto nla_put_failure;
+
+ if (rule->suppress_ifgroup != -1) {
+ if (nla_put_u32(skb, FRA_SUPPRESS_IFGROUP, rule->suppress_ifgroup))
+ goto nla_put_failure;
+ }
+
if (ops->fill(rule, skb, frh) < 0)
goto nla_put_failure;
diff --git a/net/core/filter.c b/net/core/filter.c
index 6438f29ff266..01b780856db2 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -644,7 +644,6 @@ void sk_filter_release_rcu(struct rcu_head *rcu)
struct sk_filter *fp = container_of(rcu, struct sk_filter, rcu);
bpf_jit_free(fp);
- kfree(fp);
}
EXPORT_SYMBOL(sk_filter_release_rcu);
@@ -683,7 +682,7 @@ int sk_unattached_filter_create(struct sk_filter **pfp,
if (fprog->filter == NULL)
return -EINVAL;
- fp = kmalloc(fsize + sizeof(*fp), GFP_KERNEL);
+ fp = kmalloc(sk_filter_size(fprog->len), GFP_KERNEL);
if (!fp)
return -ENOMEM;
memcpy(fp->insns, fprog->filter, fsize);
@@ -723,6 +722,7 @@ int sk_attach_filter(struct sock_fprog *fprog, struct sock *sk)
{
struct sk_filter *fp, *old_fp;
unsigned int fsize = sizeof(struct sock_filter) * fprog->len;
+ unsigned int sk_fsize = sk_filter_size(fprog->len);
int err;
if (sock_flag(sk, SOCK_FILTER_LOCKED))
@@ -732,11 +732,11 @@ int sk_attach_filter(struct sock_fprog *fprog, struct sock *sk)
if (fprog->filter == NULL)
return -EINVAL;
- fp = sock_kmalloc(sk, fsize+sizeof(*fp), GFP_KERNEL);
+ fp = sock_kmalloc(sk, sk_fsize, GFP_KERNEL);
if (!fp)
return -ENOMEM;
if (copy_from_user(fp->insns, fprog->filter, fsize)) {
- sock_kfree_s(sk, fp, fsize+sizeof(*fp));
+ sock_kfree_s(sk, fp, sk_fsize);
return -EFAULT;
}
diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c
index d12e3a9a5356..8d7d0dd72db2 100644
--- a/net/core/flow_dissector.c
+++ b/net/core/flow_dissector.c
@@ -140,7 +140,11 @@ ipv6:
break;
}
case IPPROTO_IPIP:
- goto again;
+ proto = htons(ETH_P_IP);
+ goto ip;
+ case IPPROTO_IPV6:
+ proto = htons(ETH_P_IPV6);
+ goto ipv6;
default:
break;
}
@@ -150,8 +154,8 @@ ipv6:
if (poff >= 0) {
__be32 *ports, _ports;
- nhoff += poff;
- ports = skb_header_pointer(skb, nhoff, sizeof(_ports), &_ports);
+ ports = skb_header_pointer(skb, nhoff + poff,
+ sizeof(_ports), &_ports);
if (ports)
flow->ports = *ports;
}
@@ -348,7 +352,7 @@ u16 __netdev_pick_tx(struct net_device *dev, struct sk_buff *skb)
if (queue_index != new_index && sk &&
rcu_access_pointer(sk->sk_dst_cache))
- sk_tx_queue_set(sk, queue_index);
+ sk_tx_queue_set(sk, new_index);
queue_index = new_index;
}
diff --git a/net/core/iovec.c b/net/core/iovec.c
index de178e462682..b77eeecc0011 100644
--- a/net/core/iovec.c
+++ b/net/core/iovec.c
@@ -212,3 +212,27 @@ out_fault:
goto out;
}
EXPORT_SYMBOL(csum_partial_copy_fromiovecend);
+
+unsigned long iov_pages(const struct iovec *iov, int offset,
+ unsigned long nr_segs)
+{
+ unsigned long seg, base;
+ int pages = 0, len, size;
+
+ while (nr_segs && (offset >= iov->iov_len)) {
+ offset -= iov->iov_len;
+ ++iov;
+ --nr_segs;
+ }
+
+ for (seg = 0; seg < nr_segs; seg++) {
+ base = (unsigned long)iov[seg].iov_base + offset;
+ len = iov[seg].iov_len - offset;
+ size = ((base & ~PAGE_MASK) + len + ~PAGE_MASK) >> PAGE_SHIFT;
+ pages += size;
+ offset = 0;
+ }
+
+ return pages;
+}
+EXPORT_SYMBOL(iov_pages);
diff --git a/net/core/neighbour.c b/net/core/neighbour.c
index 60533db8b72d..6072610a8672 100644
--- a/net/core/neighbour.c
+++ b/net/core/neighbour.c
@@ -2759,13 +2759,11 @@ errout:
rtnl_set_sk_err(net, RTNLGRP_NEIGH, err);
}
-#ifdef CONFIG_ARPD
void neigh_app_ns(struct neighbour *n)
{
__neigh_notify(n, RTM_GETNEIGH, NLM_F_REQUEST);
}
EXPORT_SYMBOL(neigh_app_ns);
-#endif /* CONFIG_ARPD */
#ifdef CONFIG_SYSCTL
static int zero;
diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c
index 981fed397d1d..d954b56b4e47 100644
--- a/net/core/net-sysfs.c
+++ b/net/core/net-sysfs.c
@@ -60,12 +60,19 @@ static ssize_t format_##field(const struct net_device *net, char *buf) \
{ \
return sprintf(buf, format_string, net->field); \
} \
-static ssize_t show_##field(struct device *dev, \
+static ssize_t field##_show(struct device *dev, \
struct device_attribute *attr, char *buf) \
{ \
return netdev_show(dev, attr, buf, format_##field); \
-}
+} \
+
+#define NETDEVICE_SHOW_RO(field, format_string) \
+NETDEVICE_SHOW(field, format_string); \
+static DEVICE_ATTR_RO(field)
+#define NETDEVICE_SHOW_RW(field, format_string) \
+NETDEVICE_SHOW(field, format_string); \
+static DEVICE_ATTR_RW(field)
/* use same locking and permission rules as SIF* ioctl's */
static ssize_t netdev_store(struct device *dev, struct device_attribute *attr,
@@ -96,16 +103,16 @@ static ssize_t netdev_store(struct device *dev, struct device_attribute *attr,
return ret;
}
-NETDEVICE_SHOW(dev_id, fmt_hex);
-NETDEVICE_SHOW(addr_assign_type, fmt_dec);
-NETDEVICE_SHOW(addr_len, fmt_dec);
-NETDEVICE_SHOW(iflink, fmt_dec);
-NETDEVICE_SHOW(ifindex, fmt_dec);
-NETDEVICE_SHOW(type, fmt_dec);
-NETDEVICE_SHOW(link_mode, fmt_dec);
+NETDEVICE_SHOW_RO(dev_id, fmt_hex);
+NETDEVICE_SHOW_RO(addr_assign_type, fmt_dec);
+NETDEVICE_SHOW_RO(addr_len, fmt_dec);
+NETDEVICE_SHOW_RO(iflink, fmt_dec);
+NETDEVICE_SHOW_RO(ifindex, fmt_dec);
+NETDEVICE_SHOW_RO(type, fmt_dec);
+NETDEVICE_SHOW_RO(link_mode, fmt_dec);
/* use same locking rules as GIFHWADDR ioctl's */
-static ssize_t show_address(struct device *dev, struct device_attribute *attr,
+static ssize_t address_show(struct device *dev, struct device_attribute *attr,
char *buf)
{
struct net_device *net = to_net_dev(dev);
@@ -117,15 +124,17 @@ static ssize_t show_address(struct device *dev, struct device_attribute *attr,
read_unlock(&dev_base_lock);
return ret;
}
+static DEVICE_ATTR_RO(address);
-static ssize_t show_broadcast(struct device *dev,
- struct device_attribute *attr, char *buf)
+static ssize_t broadcast_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
{
struct net_device *net = to_net_dev(dev);
if (dev_isalive(net))
return sysfs_format_mac(buf, net->broadcast, net->addr_len);
return -EINVAL;
}
+static DEVICE_ATTR_RO(broadcast);
static int change_carrier(struct net_device *net, unsigned long new_carrier)
{
@@ -134,13 +143,13 @@ static int change_carrier(struct net_device *net, unsigned long new_carrier)
return dev_change_carrier(net, (bool) new_carrier);
}
-static ssize_t store_carrier(struct device *dev, struct device_attribute *attr,
- const char *buf, size_t len)
+static ssize_t carrier_store(struct device *dev, struct device_attribute *attr,
+ const char *buf, size_t len)
{
return netdev_store(dev, attr, buf, len, change_carrier);
}
-static ssize_t show_carrier(struct device *dev,
+static ssize_t carrier_show(struct device *dev,
struct device_attribute *attr, char *buf)
{
struct net_device *netdev = to_net_dev(dev);
@@ -149,8 +158,9 @@ static ssize_t show_carrier(struct device *dev,
}
return -EINVAL;
}
+static DEVICE_ATTR_RW(carrier);
-static ssize_t show_speed(struct device *dev,
+static ssize_t speed_show(struct device *dev,
struct device_attribute *attr, char *buf)
{
struct net_device *netdev = to_net_dev(dev);
@@ -167,8 +177,9 @@ static ssize_t show_speed(struct device *dev,
rtnl_unlock();
return ret;
}
+static DEVICE_ATTR_RO(speed);
-static ssize_t show_duplex(struct device *dev,
+static ssize_t duplex_show(struct device *dev,
struct device_attribute *attr, char *buf)
{
struct net_device *netdev = to_net_dev(dev);
@@ -198,8 +209,9 @@ static ssize_t show_duplex(struct device *dev,
rtnl_unlock();
return ret;
}
+static DEVICE_ATTR_RO(duplex);
-static ssize_t show_dormant(struct device *dev,
+static ssize_t dormant_show(struct device *dev,
struct device_attribute *attr, char *buf)
{
struct net_device *netdev = to_net_dev(dev);
@@ -209,6 +221,7 @@ static ssize_t show_dormant(struct device *dev,
return -EINVAL;
}
+static DEVICE_ATTR_RO(dormant);
static const char *const operstates[] = {
"unknown",
@@ -220,7 +233,7 @@ static const char *const operstates[] = {
"up"
};
-static ssize_t show_operstate(struct device *dev,
+static ssize_t operstate_show(struct device *dev,
struct device_attribute *attr, char *buf)
{
const struct net_device *netdev = to_net_dev(dev);
@@ -237,35 +250,33 @@ static ssize_t show_operstate(struct device *dev,
return sprintf(buf, "%s\n", operstates[operstate]);
}
+static DEVICE_ATTR_RO(operstate);
/* read-write attributes */
-NETDEVICE_SHOW(mtu, fmt_dec);
static int change_mtu(struct net_device *net, unsigned long new_mtu)
{
return dev_set_mtu(net, (int) new_mtu);
}
-static ssize_t store_mtu(struct device *dev, struct device_attribute *attr,
+static ssize_t mtu_store(struct device *dev, struct device_attribute *attr,
const char *buf, size_t len)
{
return netdev_store(dev, attr, buf, len, change_mtu);
}
-
-NETDEVICE_SHOW(flags, fmt_hex);
+NETDEVICE_SHOW_RW(mtu, fmt_dec);
static int change_flags(struct net_device *net, unsigned long new_flags)
{
return dev_change_flags(net, (unsigned int) new_flags);
}
-static ssize_t store_flags(struct device *dev, struct device_attribute *attr,
+static ssize_t flags_store(struct device *dev, struct device_attribute *attr,
const char *buf, size_t len)
{
return netdev_store(dev, attr, buf, len, change_flags);
}
-
-NETDEVICE_SHOW(tx_queue_len, fmt_ulong);
+NETDEVICE_SHOW_RW(flags, fmt_hex);
static int change_tx_queue_len(struct net_device *net, unsigned long new_len)
{
@@ -273,7 +284,7 @@ static int change_tx_queue_len(struct net_device *net, unsigned long new_len)
return 0;
}
-static ssize_t store_tx_queue_len(struct device *dev,
+static ssize_t tx_queue_len_store(struct device *dev,
struct device_attribute *attr,
const char *buf, size_t len)
{
@@ -282,8 +293,9 @@ static ssize_t store_tx_queue_len(struct device *dev,
return netdev_store(dev, attr, buf, len, change_tx_queue_len);
}
+NETDEVICE_SHOW_RW(tx_queue_len, fmt_ulong);
-static ssize_t store_ifalias(struct device *dev, struct device_attribute *attr,
+static ssize_t ifalias_store(struct device *dev, struct device_attribute *attr,
const char *buf, size_t len)
{
struct net_device *netdev = to_net_dev(dev);
@@ -306,7 +318,7 @@ static ssize_t store_ifalias(struct device *dev, struct device_attribute *attr,
return ret < 0 ? ret : len;
}
-static ssize_t show_ifalias(struct device *dev,
+static ssize_t ifalias_show(struct device *dev,
struct device_attribute *attr, char *buf)
{
const struct net_device *netdev = to_net_dev(dev);
@@ -319,8 +331,7 @@ static ssize_t show_ifalias(struct device *dev,
rtnl_unlock();
return ret;
}
-
-NETDEVICE_SHOW(group, fmt_dec);
+static DEVICE_ATTR_RW(ifalias);
static int change_group(struct net_device *net, unsigned long new_group)
{
@@ -328,35 +339,60 @@ static int change_group(struct net_device *net, unsigned long new_group)
return 0;
}
-static ssize_t store_group(struct device *dev, struct device_attribute *attr,
- const char *buf, size_t len)
+static ssize_t group_store(struct device *dev, struct device_attribute *attr,
+ const char *buf, size_t len)
{
return netdev_store(dev, attr, buf, len, change_group);
}
+NETDEVICE_SHOW(group, fmt_dec);
+static DEVICE_ATTR(netdev_group, S_IRUGO | S_IWUSR, group_show, group_store);
+
+static ssize_t phys_port_id_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ struct net_device *netdev = to_net_dev(dev);
+ ssize_t ret = -EINVAL;
-static struct device_attribute net_class_attributes[] = {
- __ATTR(addr_assign_type, S_IRUGO, show_addr_assign_type, NULL),
- __ATTR(addr_len, S_IRUGO, show_addr_len, NULL),
- __ATTR(dev_id, S_IRUGO, show_dev_id, NULL),
- __ATTR(ifalias, S_IRUGO | S_IWUSR, show_ifalias, store_ifalias),
- __ATTR(iflink, S_IRUGO, show_iflink, NULL),
- __ATTR(ifindex, S_IRUGO, show_ifindex, NULL),
- __ATTR(type, S_IRUGO, show_type, NULL),
- __ATTR(link_mode, S_IRUGO, show_link_mode, NULL),
- __ATTR(address, S_IRUGO, show_address, NULL),
- __ATTR(broadcast, S_IRUGO, show_broadcast, NULL),
- __ATTR(carrier, S_IRUGO | S_IWUSR, show_carrier, store_carrier),
- __ATTR(speed, S_IRUGO, show_speed, NULL),
- __ATTR(duplex, S_IRUGO, show_duplex, NULL),
- __ATTR(dormant, S_IRUGO, show_dormant, NULL),
- __ATTR(operstate, S_IRUGO, show_operstate, NULL),
- __ATTR(mtu, S_IRUGO | S_IWUSR, show_mtu, store_mtu),
- __ATTR(flags, S_IRUGO | S_IWUSR, show_flags, store_flags),
- __ATTR(tx_queue_len, S_IRUGO | S_IWUSR, show_tx_queue_len,
- store_tx_queue_len),
- __ATTR(netdev_group, S_IRUGO | S_IWUSR, show_group, store_group),
- {}
+ if (!rtnl_trylock())
+ return restart_syscall();
+
+ if (dev_isalive(netdev)) {
+ struct netdev_phys_port_id ppid;
+
+ ret = dev_get_phys_port_id(netdev, &ppid);
+ if (!ret)
+ ret = sprintf(buf, "%*phN\n", ppid.id_len, ppid.id);
+ }
+ rtnl_unlock();
+
+ return ret;
+}
+static DEVICE_ATTR_RO(phys_port_id);
+
+static struct attribute *net_class_attrs[] = {
+ &dev_attr_netdev_group.attr,
+ &dev_attr_type.attr,
+ &dev_attr_dev_id.attr,
+ &dev_attr_iflink.attr,
+ &dev_attr_ifindex.attr,
+ &dev_attr_addr_assign_type.attr,
+ &dev_attr_addr_len.attr,
+ &dev_attr_link_mode.attr,
+ &dev_attr_address.attr,
+ &dev_attr_broadcast.attr,
+ &dev_attr_speed.attr,
+ &dev_attr_duplex.attr,
+ &dev_attr_dormant.attr,
+ &dev_attr_operstate.attr,
+ &dev_attr_ifalias.attr,
+ &dev_attr_carrier.attr,
+ &dev_attr_mtu.attr,
+ &dev_attr_flags.attr,
+ &dev_attr_tx_queue_len.attr,
+ &dev_attr_phys_port_id.attr,
+ NULL,
};
+ATTRIBUTE_GROUPS(net_class);
/* Show a given an attribute in the statistics group */
static ssize_t netstat_show(const struct device *d,
@@ -382,13 +418,13 @@ static ssize_t netstat_show(const struct device *d,
/* generate a read-only statistics attribute */
#define NETSTAT_ENTRY(name) \
-static ssize_t show_##name(struct device *d, \
+static ssize_t name##_show(struct device *d, \
struct device_attribute *attr, char *buf) \
{ \
return netstat_show(d, attr, buf, \
offsetof(struct rtnl_link_stats64, name)); \
} \
-static DEVICE_ATTR(name, S_IRUGO, show_##name, NULL)
+static DEVICE_ATTR_RO(name)
NETSTAT_ENTRY(rx_packets);
NETSTAT_ENTRY(tx_packets);
@@ -457,6 +493,9 @@ static struct attribute_group wireless_group = {
.attrs = wireless_attrs,
};
#endif
+
+#else /* CONFIG_SYSFS */
+#define net_class_groups NULL
#endif /* CONFIG_SYSFS */
#ifdef CONFIG_RPS
@@ -1157,6 +1196,13 @@ static void remove_queue_kobjects(struct net_device *net)
#endif
}
+static bool net_current_may_mount(void)
+{
+ struct net *net = current->nsproxy->net_ns;
+
+ return ns_capable(net->user_ns, CAP_SYS_ADMIN);
+}
+
static void *net_grab_current_ns(void)
{
struct net *ns = current->nsproxy->net_ns;
@@ -1179,6 +1225,7 @@ static const void *net_netlink_ns(struct sock *sk)
struct kobj_ns_type_operations net_ns_type_operations = {
.type = KOBJ_NS_TYPE_NET,
+ .current_may_mount = net_current_may_mount,
.grab_current_ns = net_grab_current_ns,
.netlink_ns = net_netlink_ns,
.initial_ns = net_initial_ns,
@@ -1229,9 +1276,7 @@ static const void *net_namespace(struct device *d)
static struct class net_class = {
.name = "net",
.dev_release = netdev_release,
-#ifdef CONFIG_SYSFS
- .dev_attrs = net_class_attributes,
-#endif /* CONFIG_SYSFS */
+ .dev_groups = net_class_groups,
.dev_uevent = netdev_uevent,
.ns_type = &net_ns_type_operations,
.namespace = net_namespace,
diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c
index f97652036754..81d3a9a08453 100644
--- a/net/core/net_namespace.c
+++ b/net/core/net_namespace.c
@@ -651,7 +651,7 @@ static int netns_install(struct nsproxy *nsproxy, void *ns)
struct net *net = ns;
if (!ns_capable(net->user_ns, CAP_SYS_ADMIN) ||
- !nsown_capable(CAP_SYS_ADMIN))
+ !ns_capable(current_user_ns(), CAP_SYS_ADMIN))
return -EPERM;
put_net(nsproxy->net_ns);
diff --git a/net/core/netpoll.c b/net/core/netpoll.c
index 2c637e9a0b27..fc75c9e461b8 100644
--- a/net/core/netpoll.c
+++ b/net/core/netpoll.c
@@ -550,7 +550,7 @@ static void netpoll_neigh_reply(struct sk_buff *skb, struct netpoll_info *npinfo
return;
proto = ntohs(eth_hdr(skb)->h_proto);
- if (proto == ETH_P_IP) {
+ if (proto == ETH_P_ARP) {
struct arphdr *arp;
unsigned char *arp_ptr;
/* No arp on this interface */
@@ -1284,15 +1284,14 @@ EXPORT_SYMBOL_GPL(__netpoll_free_async);
void netpoll_cleanup(struct netpoll *np)
{
- if (!np->dev)
- return;
-
rtnl_lock();
+ if (!np->dev)
+ goto out;
__netpoll_cleanup(np);
- rtnl_unlock();
-
dev_put(np->dev);
np->dev = NULL;
+out:
+ rtnl_unlock();
}
EXPORT_SYMBOL(netpoll_cleanup);
diff --git a/net/core/netprio_cgroup.c b/net/core/netprio_cgroup.c
index e533259dce3c..d9cd627e6a16 100644
--- a/net/core/netprio_cgroup.c
+++ b/net/core/netprio_cgroup.c
@@ -29,12 +29,6 @@
#define PRIOMAP_MIN_SZ 128
-static inline struct cgroup_netprio_state *cgrp_netprio_state(struct cgroup *cgrp)
-{
- return container_of(cgroup_subsys_state(cgrp, net_prio_subsys_id),
- struct cgroup_netprio_state, css);
-}
-
/*
* Extend @dev->priomap so that it's large enough to accomodate
* @target_idx. @dev->priomap.priomap_len > @target_idx after successful
@@ -87,67 +81,70 @@ static int extend_netdev_table(struct net_device *dev, u32 target_idx)
/**
* netprio_prio - return the effective netprio of a cgroup-net_device pair
- * @cgrp: cgroup part of the target pair
+ * @css: css part of the target pair
* @dev: net_device part of the target pair
*
* Should be called under RCU read or rtnl lock.
*/
-static u32 netprio_prio(struct cgroup *cgrp, struct net_device *dev)
+static u32 netprio_prio(struct cgroup_subsys_state *css, struct net_device *dev)
{
struct netprio_map *map = rcu_dereference_rtnl(dev->priomap);
+ int id = css->cgroup->id;
- if (map && cgrp->id < map->priomap_len)
- return map->priomap[cgrp->id];
+ if (map && id < map->priomap_len)
+ return map->priomap[id];
return 0;
}
/**
* netprio_set_prio - set netprio on a cgroup-net_device pair
- * @cgrp: cgroup part of the target pair
+ * @css: css part of the target pair
* @dev: net_device part of the target pair
* @prio: prio to set
*
- * Set netprio to @prio on @cgrp-@dev pair. Should be called under rtnl
+ * Set netprio to @prio on @css-@dev pair. Should be called under rtnl
* lock and may fail under memory pressure for non-zero @prio.
*/
-static int netprio_set_prio(struct cgroup *cgrp, struct net_device *dev,
- u32 prio)
+static int netprio_set_prio(struct cgroup_subsys_state *css,
+ struct net_device *dev, u32 prio)
{
struct netprio_map *map;
+ int id = css->cgroup->id;
int ret;
/* avoid extending priomap for zero writes */
map = rtnl_dereference(dev->priomap);
- if (!prio && (!map || map->priomap_len <= cgrp->id))
+ if (!prio && (!map || map->priomap_len <= id))
return 0;
- ret = extend_netdev_table(dev, cgrp->id);
+ ret = extend_netdev_table(dev, id);
if (ret)
return ret;
map = rtnl_dereference(dev->priomap);
- map->priomap[cgrp->id] = prio;
+ map->priomap[id] = prio;
return 0;
}
-static struct cgroup_subsys_state *cgrp_css_alloc(struct cgroup *cgrp)
+static struct cgroup_subsys_state *
+cgrp_css_alloc(struct cgroup_subsys_state *parent_css)
{
- struct cgroup_netprio_state *cs;
+ struct cgroup_subsys_state *css;
- cs = kzalloc(sizeof(*cs), GFP_KERNEL);
- if (!cs)
+ css = kzalloc(sizeof(*css), GFP_KERNEL);
+ if (!css)
return ERR_PTR(-ENOMEM);
- return &cs->css;
+ return css;
}
-static int cgrp_css_online(struct cgroup *cgrp)
+static int cgrp_css_online(struct cgroup_subsys_state *css)
{
- struct cgroup *parent = cgrp->parent;
+ struct cgroup_subsys_state *parent_css = css_parent(css);
struct net_device *dev;
int ret = 0;
- if (!parent)
+ if (!parent_css)
return 0;
rtnl_lock();
@@ -156,9 +153,9 @@ static int cgrp_css_online(struct cgroup *cgrp)
* onlining, there is no need to clear them on offline.
*/
for_each_netdev(&init_net, dev) {
- u32 prio = netprio_prio(parent, dev);
+ u32 prio = netprio_prio(parent_css, dev);
- ret = netprio_set_prio(cgrp, dev, prio);
+ ret = netprio_set_prio(css, dev, prio);
if (ret)
break;
}
@@ -166,29 +163,29 @@ static int cgrp_css_online(struct cgroup *cgrp)
return ret;
}
-static void cgrp_css_free(struct cgroup *cgrp)
+static void cgrp_css_free(struct cgroup_subsys_state *css)
{
- kfree(cgrp_netprio_state(cgrp));
+ kfree(css);
}
-static u64 read_prioidx(struct cgroup *cgrp, struct cftype *cft)
+static u64 read_prioidx(struct cgroup_subsys_state *css, struct cftype *cft)
{
- return cgrp->id;
+ return css->cgroup->id;
}
-static int read_priomap(struct cgroup *cont, struct cftype *cft,
+static int read_priomap(struct cgroup_subsys_state *css, struct cftype *cft,
struct cgroup_map_cb *cb)
{
struct net_device *dev;
rcu_read_lock();
for_each_netdev_rcu(&init_net, dev)
- cb->fill(cb, dev->name, netprio_prio(cont, dev));
+ cb->fill(cb, dev->name, netprio_prio(css, dev));
rcu_read_unlock();
return 0;
}
-static int write_priomap(struct cgroup *cgrp, struct cftype *cft,
+static int write_priomap(struct cgroup_subsys_state *css, struct cftype *cft,
const char *buffer)
{
char devname[IFNAMSIZ + 1];
@@ -205,7 +202,7 @@ static int write_priomap(struct cgroup *cgrp, struct cftype *cft,
rtnl_lock();
- ret = netprio_set_prio(cgrp, dev, prio);
+ ret = netprio_set_prio(css, dev, prio);
rtnl_unlock();
dev_put(dev);
@@ -221,12 +218,13 @@ static int update_netprio(const void *v, struct file *file, unsigned n)
return 0;
}
-static void net_prio_attach(struct cgroup *cgrp, struct cgroup_taskset *tset)
+static void net_prio_attach(struct cgroup_subsys_state *css,
+ struct cgroup_taskset *tset)
{
struct task_struct *p;
void *v;
- cgroup_taskset_for_each(p, cgrp, tset) {
+ cgroup_taskset_for_each(p, css, tset) {
task_lock(p);
v = (void *)(unsigned long)task_netprioidx(p);
iterate_fd(p->files, 0, update_netprio, v);
diff --git a/net/core/pktgen.c b/net/core/pktgen.c
index 9640972ec50e..261357a66300 100644
--- a/net/core/pktgen.c
+++ b/net/core/pktgen.c
@@ -160,6 +160,8 @@
#include <net/net_namespace.h>
#include <net/checksum.h>
#include <net/ipv6.h>
+#include <net/udp.h>
+#include <net/ip6_checksum.h>
#include <net/addrconf.h>
#ifdef CONFIG_XFRM
#include <net/xfrm.h>
@@ -198,6 +200,7 @@
#define F_QUEUE_MAP_RND (1<<13) /* queue map Random */
#define F_QUEUE_MAP_CPU (1<<14) /* queue map mirrors smp_processor_id() */
#define F_NODE (1<<15) /* Node memory alloc*/
+#define F_UDPCSUM (1<<16) /* Include UDP checksum */
/* Thread control flag bits */
#define T_STOP (1<<0) /* Stop run */
@@ -631,6 +634,9 @@ static int pktgen_if_show(struct seq_file *seq, void *v)
if (pkt_dev->flags & F_UDPDST_RND)
seq_printf(seq, "UDPDST_RND ");
+ if (pkt_dev->flags & F_UDPCSUM)
+ seq_printf(seq, "UDPCSUM ");
+
if (pkt_dev->flags & F_MPLS_RND)
seq_printf(seq, "MPLS_RND ");
@@ -1228,6 +1234,12 @@ static ssize_t pktgen_if_write(struct file *file,
else if (strcmp(f, "!NODE_ALLOC") == 0)
pkt_dev->flags &= ~F_NODE;
+ else if (strcmp(f, "UDPCSUM") == 0)
+ pkt_dev->flags |= F_UDPCSUM;
+
+ else if (strcmp(f, "!UDPCSUM") == 0)
+ pkt_dev->flags &= ~F_UDPCSUM;
+
else {
sprintf(pg_result,
"Flag -:%s:- unknown\nAvailable flags, (prepend ! to un-set flag):\n%s",
@@ -2733,7 +2745,7 @@ static struct sk_buff *fill_packet_ipv4(struct net_device *odev,
udph->source = htons(pkt_dev->cur_udp_src);
udph->dest = htons(pkt_dev->cur_udp_dst);
udph->len = htons(datalen + 8); /* DATA + udphdr */
- udph->check = 0; /* No checksum */
+ udph->check = 0;
iph->ihl = 5;
iph->version = 4;
@@ -2747,11 +2759,28 @@ static struct sk_buff *fill_packet_ipv4(struct net_device *odev,
iph->frag_off = 0;
iplen = 20 + 8 + datalen;
iph->tot_len = htons(iplen);
- iph->check = 0;
- iph->check = ip_fast_csum((void *)iph, iph->ihl);
+ ip_send_check(iph);
skb->protocol = protocol;
skb->dev = odev;
skb->pkt_type = PACKET_HOST;
+
+ if (!(pkt_dev->flags & F_UDPCSUM)) {
+ skb->ip_summed = CHECKSUM_NONE;
+ } else if (odev->features & NETIF_F_V4_CSUM) {
+ skb->ip_summed = CHECKSUM_PARTIAL;
+ skb->csum = 0;
+ udp4_hwcsum(skb, udph->source, udph->dest);
+ } else {
+ __wsum csum = udp_csum(skb);
+
+ /* add protocol-dependent pseudo-header */
+ udph->check = csum_tcpudp_magic(udph->source, udph->dest,
+ datalen + 8, IPPROTO_UDP, csum);
+
+ if (udph->check == 0)
+ udph->check = CSUM_MANGLED_0;
+ }
+
pktgen_finalize_skb(pkt_dev, skb, datalen);
#ifdef CONFIG_XFRM
@@ -2768,7 +2797,7 @@ static struct sk_buff *fill_packet_ipv6(struct net_device *odev,
struct sk_buff *skb = NULL;
__u8 *eth;
struct udphdr *udph;
- int datalen;
+ int datalen, udplen;
struct ipv6hdr *iph;
__be16 protocol = htons(ETH_P_IPV6);
__be32 *mpls;
@@ -2844,10 +2873,11 @@ static struct sk_buff *fill_packet_ipv6(struct net_device *odev,
net_info_ratelimited("increased datalen to %d\n", datalen);
}
+ udplen = datalen + sizeof(struct udphdr);
udph->source = htons(pkt_dev->cur_udp_src);
udph->dest = htons(pkt_dev->cur_udp_dst);
- udph->len = htons(datalen + sizeof(struct udphdr));
- udph->check = 0; /* No checksum */
+ udph->len = htons(udplen);
+ udph->check = 0;
*(__be32 *) iph = htonl(0x60000000); /* Version + flow */
@@ -2858,7 +2888,7 @@ static struct sk_buff *fill_packet_ipv6(struct net_device *odev,
iph->hop_limit = 32;
- iph->payload_len = htons(sizeof(struct udphdr) + datalen);
+ iph->payload_len = htons(udplen);
iph->nexthdr = IPPROTO_UDP;
iph->daddr = pkt_dev->cur_in6_daddr;
@@ -2868,6 +2898,23 @@ static struct sk_buff *fill_packet_ipv6(struct net_device *odev,
skb->dev = odev;
skb->pkt_type = PACKET_HOST;
+ if (!(pkt_dev->flags & F_UDPCSUM)) {
+ skb->ip_summed = CHECKSUM_NONE;
+ } else if (odev->features & NETIF_F_V6_CSUM) {
+ skb->ip_summed = CHECKSUM_PARTIAL;
+ skb->csum_start = skb_transport_header(skb) - skb->head;
+ skb->csum_offset = offsetof(struct udphdr, check);
+ udph->check = ~csum_ipv6_magic(&iph->saddr, &iph->daddr, udplen, IPPROTO_UDP, 0);
+ } else {
+ __wsum csum = udp_csum(skb);
+
+ /* add protocol-dependent pseudo-header */
+ udph->check = csum_ipv6_magic(&iph->saddr, &iph->daddr, udplen, IPPROTO_UDP, csum);
+
+ if (udph->check == 0)
+ udph->check = CSUM_MANGLED_0;
+ }
+
pktgen_finalize_skb(pkt_dev, skb, datalen);
return skb;
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index ca198c1d1d30..2a0e21de3060 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -767,7 +767,8 @@ static noinline size_t if_nlmsg_size(const struct net_device *dev,
+ rtnl_vfinfo_size(dev, ext_filter_mask) /* IFLA_VFINFO_LIST */
+ rtnl_port_size(dev) /* IFLA_VF_PORTS + IFLA_PORT_SELF */
+ rtnl_link_get_size(dev) /* IFLA_LINKINFO */
- + rtnl_link_get_af_size(dev); /* IFLA_AF_SPEC */
+ + rtnl_link_get_af_size(dev) /* IFLA_AF_SPEC */
+ + nla_total_size(MAX_PHYS_PORT_ID_LEN); /* IFLA_PHYS_PORT_ID */
}
static int rtnl_vf_ports_fill(struct sk_buff *skb, struct net_device *dev)
@@ -846,6 +847,24 @@ static int rtnl_port_fill(struct sk_buff *skb, struct net_device *dev)
return 0;
}
+static int rtnl_phys_port_id_fill(struct sk_buff *skb, struct net_device *dev)
+{
+ int err;
+ struct netdev_phys_port_id ppid;
+
+ err = dev_get_phys_port_id(dev, &ppid);
+ if (err) {
+ if (err == -EOPNOTSUPP)
+ return 0;
+ return err;
+ }
+
+ if (nla_put(skb, IFLA_PHYS_PORT_ID, ppid.id_len, ppid.id))
+ return -EMSGSIZE;
+
+ return 0;
+}
+
static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev,
int type, u32 pid, u32 seq, u32 change,
unsigned int flags, u32 ext_filter_mask)
@@ -913,6 +932,9 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev,
goto nla_put_failure;
}
+ if (rtnl_phys_port_id_fill(skb, dev))
+ goto nla_put_failure;
+
attr = nla_reserve(skb, IFLA_STATS,
sizeof(struct rtnl_link_stats));
if (attr == NULL)
@@ -1113,6 +1135,7 @@ const struct nla_policy ifla_policy[IFLA_MAX+1] = {
[IFLA_PROMISCUITY] = { .type = NLA_U32 },
[IFLA_NUM_TX_QUEUES] = { .type = NLA_U32 },
[IFLA_NUM_RX_QUEUES] = { .type = NLA_U32 },
+ [IFLA_PHYS_PORT_ID] = { .type = NLA_BINARY, .len = MAX_PHYS_PORT_ID_LEN },
};
EXPORT_SYMBOL(ifla_policy);
@@ -1844,10 +1867,10 @@ replay:
else
err = register_netdevice(dev);
- if (err < 0 && !IS_ERR(dev))
+ if (err < 0) {
free_netdev(dev);
- if (err < 0)
goto out;
+ }
err = rtnl_configure_link(dev, ifm);
if (err < 0)
diff --git a/net/core/scm.c b/net/core/scm.c
index b4da80b1cc07..b442e7e25e60 100644
--- a/net/core/scm.c
+++ b/net/core/scm.c
@@ -56,9 +56,9 @@ static __inline__ int scm_check_creds(struct ucred *creds)
if ((creds->pid == task_tgid_vnr(current) ||
ns_capable(task_active_pid_ns(current)->user_ns, CAP_SYS_ADMIN)) &&
((uid_eq(uid, cred->uid) || uid_eq(uid, cred->euid) ||
- uid_eq(uid, cred->suid)) || nsown_capable(CAP_SETUID)) &&
+ uid_eq(uid, cred->suid)) || ns_capable(cred->user_ns, CAP_SETUID)) &&
((gid_eq(gid, cred->gid) || gid_eq(gid, cred->egid) ||
- gid_eq(gid, cred->sgid)) || nsown_capable(CAP_SETGID))) {
+ gid_eq(gid, cred->sgid)) || ns_capable(cred->user_ns, CAP_SETGID))) {
return 0;
}
return -EPERM;
diff --git a/net/core/secure_seq.c b/net/core/secure_seq.c
index 6a2f13cee86a..8d9d05edd2eb 100644
--- a/net/core/secure_seq.c
+++ b/net/core/secure_seq.c
@@ -10,12 +10,27 @@
#include <net/secure_seq.h>
-static u32 net_secret[MD5_MESSAGE_BYTES / 4] ____cacheline_aligned;
+#if IS_ENABLED(CONFIG_IPV6) || IS_ENABLED(CONFIG_INET)
+#define NET_SECRET_SIZE (MD5_MESSAGE_BYTES / 4)
-void net_secret_init(void)
+static u32 net_secret[NET_SECRET_SIZE] ____cacheline_aligned;
+
+static void net_secret_init(void)
{
- get_random_bytes(net_secret, sizeof(net_secret));
+ u32 tmp;
+ int i;
+
+ if (likely(net_secret[0]))
+ return;
+
+ for (i = NET_SECRET_SIZE; i > 0;) {
+ do {
+ get_random_bytes(&tmp, sizeof(tmp));
+ } while (!tmp);
+ cmpxchg(&net_secret[--i], 0, tmp);
+ }
}
+#endif
#ifdef CONFIG_INET
static u32 seq_scale(u32 seq)
@@ -42,6 +57,7 @@ __u32 secure_tcpv6_sequence_number(const __be32 *saddr, const __be32 *daddr,
u32 hash[MD5_DIGEST_WORDS];
u32 i;
+ net_secret_init();
memcpy(hash, saddr, 16);
for (i = 0; i < 4; i++)
secret[i] = net_secret[i] + (__force u32)daddr[i];
@@ -63,6 +79,7 @@ u32 secure_ipv6_port_ephemeral(const __be32 *saddr, const __be32 *daddr,
u32 hash[MD5_DIGEST_WORDS];
u32 i;
+ net_secret_init();
memcpy(hash, saddr, 16);
for (i = 0; i < 4; i++)
secret[i] = net_secret[i] + (__force u32) daddr[i];
@@ -82,6 +99,7 @@ __u32 secure_ip_id(__be32 daddr)
{
u32 hash[MD5_DIGEST_WORDS];
+ net_secret_init();
hash[0] = (__force __u32) daddr;
hash[1] = net_secret[13];
hash[2] = net_secret[14];
@@ -96,6 +114,7 @@ __u32 secure_ipv6_id(const __be32 daddr[4])
{
__u32 hash[4];
+ net_secret_init();
memcpy(hash, daddr, 16);
md5_transform(hash, net_secret);
@@ -107,6 +126,7 @@ __u32 secure_tcp_sequence_number(__be32 saddr, __be32 daddr,
{
u32 hash[MD5_DIGEST_WORDS];
+ net_secret_init();
hash[0] = (__force u32)saddr;
hash[1] = (__force u32)daddr;
hash[2] = ((__force u16)sport << 16) + (__force u16)dport;
@@ -121,6 +141,7 @@ u32 secure_ipv4_port_ephemeral(__be32 saddr, __be32 daddr, __be16 dport)
{
u32 hash[MD5_DIGEST_WORDS];
+ net_secret_init();
hash[0] = (__force u32)saddr;
hash[1] = (__force u32)daddr;
hash[2] = (__force u32)dport ^ net_secret[14];
@@ -140,6 +161,7 @@ u64 secure_dccp_sequence_number(__be32 saddr, __be32 daddr,
u32 hash[MD5_DIGEST_WORDS];
u64 seq;
+ net_secret_init();
hash[0] = (__force u32)saddr;
hash[1] = (__force u32)daddr;
hash[2] = ((__force u16)sport << 16) + (__force u16)dport;
@@ -164,6 +186,7 @@ u64 secure_dccpv6_sequence_number(__be32 *saddr, __be32 *daddr,
u64 seq;
u32 i;
+ net_secret_init();
memcpy(hash, saddr, 16);
for (i = 0; i < 4; i++)
secret[i] = net_secret[i] + daddr[i];
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 2c3d0f53d198..d81cff119f73 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -3500,17 +3500,22 @@ bool skb_try_coalesce(struct sk_buff *to, struct sk_buff *from,
EXPORT_SYMBOL(skb_try_coalesce);
/**
- * skb_scrub_packet - scrub an skb before sending it to another netns
+ * skb_scrub_packet - scrub an skb
*
* @skb: buffer to clean
- *
- * skb_scrub_packet can be used to clean an skb before injecting it in
- * another namespace. We have to clear all information in the skb that
- * could impact namespace isolation.
+ * @xnet: packet is crossing netns
+ *
+ * skb_scrub_packet can be used after encapsulating or decapsulting a packet
+ * into/from a tunnel. Some information have to be cleared during these
+ * operations.
+ * skb_scrub_packet can also be used to clean a skb before injecting it in
+ * another namespace (@xnet == true). We have to clear all information in the
+ * skb that could impact namespace isolation.
*/
-void skb_scrub_packet(struct sk_buff *skb)
+void skb_scrub_packet(struct sk_buff *skb, bool xnet)
{
- skb_orphan(skb);
+ if (xnet)
+ skb_orphan(skb);
skb->tstamp.tv64 = 0;
skb->pkt_type = PACKET_HOST;
skb->skb_iif = 0;
diff --git a/net/core/sock.c b/net/core/sock.c
index 2c097c5a35dd..0b39e7ae4383 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -93,6 +93,7 @@
#include <linux/capability.h>
#include <linux/errno.h>
+#include <linux/errqueue.h>
#include <linux/types.h>
#include <linux/socket.h>
#include <linux/in.h>
@@ -1575,6 +1576,25 @@ void sock_wfree(struct sk_buff *skb)
}
EXPORT_SYMBOL(sock_wfree);
+void skb_orphan_partial(struct sk_buff *skb)
+{
+ /* TCP stack sets skb->ooo_okay based on sk_wmem_alloc,
+ * so we do not completely orphan skb, but transfert all
+ * accounted bytes but one, to avoid unexpected reorders.
+ */
+ if (skb->destructor == sock_wfree
+#ifdef CONFIG_INET
+ || skb->destructor == tcp_wfree
+#endif
+ ) {
+ atomic_sub(skb->truesize - 1, &skb->sk->sk_wmem_alloc);
+ skb->truesize = 1;
+ } else {
+ skb_orphan(skb);
+ }
+}
+EXPORT_SYMBOL(skb_orphan_partial);
+
/*
* Read buffer destructor automatically called from kfree_skb.
*/
@@ -1721,24 +1741,23 @@ static long sock_wait_for_wmem(struct sock *sk, long timeo)
struct sk_buff *sock_alloc_send_pskb(struct sock *sk, unsigned long header_len,
unsigned long data_len, int noblock,
- int *errcode)
+ int *errcode, int max_page_order)
{
- struct sk_buff *skb;
+ struct sk_buff *skb = NULL;
+ unsigned long chunk;
gfp_t gfp_mask;
long timeo;
int err;
int npages = (data_len + (PAGE_SIZE - 1)) >> PAGE_SHIFT;
+ struct page *page;
+ int i;
err = -EMSGSIZE;
if (npages > MAX_SKB_FRAGS)
goto failure;
- gfp_mask = sk->sk_allocation;
- if (gfp_mask & __GFP_WAIT)
- gfp_mask |= __GFP_REPEAT;
-
timeo = sock_sndtimeo(sk, noblock);
- while (1) {
+ while (!skb) {
err = sock_error(sk);
if (err != 0)
goto failure;
@@ -1747,50 +1766,52 @@ struct sk_buff *sock_alloc_send_pskb(struct sock *sk, unsigned long header_len,
if (sk->sk_shutdown & SEND_SHUTDOWN)
goto failure;
- if (atomic_read(&sk->sk_wmem_alloc) < sk->sk_sndbuf) {
- skb = alloc_skb(header_len, gfp_mask);
- if (skb) {
- int i;
-
- /* No pages, we're done... */
- if (!data_len)
- break;
-
- skb->truesize += data_len;
- skb_shinfo(skb)->nr_frags = npages;
- for (i = 0; i < npages; i++) {
- struct page *page;
-
- page = alloc_pages(sk->sk_allocation, 0);
- if (!page) {
- err = -ENOBUFS;
- skb_shinfo(skb)->nr_frags = i;
- kfree_skb(skb);
- goto failure;
- }
-
- __skb_fill_page_desc(skb, i,
- page, 0,
- (data_len >= PAGE_SIZE ?
- PAGE_SIZE :
- data_len));
- data_len -= PAGE_SIZE;
- }
+ if (atomic_read(&sk->sk_wmem_alloc) >= sk->sk_sndbuf) {
+ set_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags);
+ set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
+ err = -EAGAIN;
+ if (!timeo)
+ goto failure;
+ if (signal_pending(current))
+ goto interrupted;
+ timeo = sock_wait_for_wmem(sk, timeo);
+ continue;
+ }
- /* Full success... */
- break;
- }
- err = -ENOBUFS;
+ err = -ENOBUFS;
+ gfp_mask = sk->sk_allocation;
+ if (gfp_mask & __GFP_WAIT)
+ gfp_mask |= __GFP_REPEAT;
+
+ skb = alloc_skb(header_len, gfp_mask);
+ if (!skb)
goto failure;
+
+ skb->truesize += data_len;
+
+ for (i = 0; npages > 0; i++) {
+ int order = max_page_order;
+
+ while (order) {
+ if (npages >= 1 << order) {
+ page = alloc_pages(sk->sk_allocation |
+ __GFP_COMP | __GFP_NOWARN,
+ order);
+ if (page)
+ goto fill_page;
+ }
+ order--;
+ }
+ page = alloc_page(sk->sk_allocation);
+ if (!page)
+ goto failure;
+fill_page:
+ chunk = min_t(unsigned long, data_len,
+ PAGE_SIZE << order);
+ skb_fill_page_desc(skb, i, page, 0, chunk);
+ data_len -= chunk;
+ npages -= 1 << order;
}
- set_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags);
- set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
- err = -EAGAIN;
- if (!timeo)
- goto failure;
- if (signal_pending(current))
- goto interrupted;
- timeo = sock_wait_for_wmem(sk, timeo);
}
skb_set_owner_w(skb, sk);
@@ -1799,6 +1820,7 @@ struct sk_buff *sock_alloc_send_pskb(struct sock *sk, unsigned long header_len,
interrupted:
err = sock_intr_errno(timeo);
failure:
+ kfree_skb(skb);
*errcode = err;
return NULL;
}
@@ -1807,7 +1829,7 @@ EXPORT_SYMBOL(sock_alloc_send_pskb);
struct sk_buff *sock_alloc_send_skb(struct sock *sk, unsigned long size,
int noblock, int *errcode)
{
- return sock_alloc_send_pskb(sk, size, 0, noblock, errcode);
+ return sock_alloc_send_pskb(sk, size, 0, noblock, errcode, 0);
}
EXPORT_SYMBOL(sock_alloc_send_skb);
@@ -2297,6 +2319,7 @@ void sock_init_data(struct socket *sock, struct sock *sk)
sk->sk_ll_usec = sysctl_net_busy_read;
#endif
+ sk->sk_pacing_rate = ~0U;
/*
* Before updating sk_refcnt, we must commit prior changes to memory
* (Documentation/RCU/rculist_nulls.txt for details)
@@ -2425,6 +2448,52 @@ void sock_enable_timestamp(struct sock *sk, int flag)
}
}
+int sock_recv_errqueue(struct sock *sk, struct msghdr *msg, int len,
+ int level, int type)
+{
+ struct sock_exterr_skb *serr;
+ struct sk_buff *skb, *skb2;
+ int copied, err;
+
+ err = -EAGAIN;
+ skb = skb_dequeue(&sk->sk_error_queue);
+ if (skb == NULL)
+ goto out;
+
+ copied = skb->len;
+ if (copied > len) {
+ msg->msg_flags |= MSG_TRUNC;
+ copied = len;
+ }
+ err = skb_copy_datagram_iovec(skb, 0, msg->msg_iov, copied);
+ if (err)
+ goto out_free_skb;
+
+ sock_recv_timestamp(msg, sk, skb);
+
+ serr = SKB_EXT_ERR(skb);
+ put_cmsg(msg, level, type, sizeof(serr->ee), &serr->ee);
+
+ msg->msg_flags |= MSG_ERRQUEUE;
+ err = copied;
+
+ /* Reset and regenerate socket error */
+ spin_lock_bh(&sk->sk_error_queue.lock);
+ sk->sk_err = 0;
+ if ((skb2 = skb_peek(&sk->sk_error_queue)) != NULL) {
+ sk->sk_err = SKB_EXT_ERR(skb2)->ee.ee_errno;
+ spin_unlock_bh(&sk->sk_error_queue.lock);
+ sk->sk_error_report(sk);
+ } else
+ spin_unlock_bh(&sk->sk_error_queue.lock);
+
+out_free_skb:
+ kfree_skb(skb);
+out:
+ return err;
+}
+EXPORT_SYMBOL(sock_recv_errqueue);
+
/*
* Get a socket option on an socket.
*
diff --git a/net/core/stream.c b/net/core/stream.c
index f5df85dcd20b..512f0a24269b 100644
--- a/net/core/stream.c
+++ b/net/core/stream.c
@@ -30,7 +30,7 @@ void sk_stream_write_space(struct sock *sk)
struct socket *sock = sk->sk_socket;
struct socket_wq *wq;
- if (sk_stream_wspace(sk) >= sk_stream_min_wspace(sk) && sock) {
+ if (sk_stream_is_writeable(sk) && sock) {
clear_bit(SOCK_NOSPACE, &sock->flags);
rcu_read_lock();
diff --git a/net/core/sysctl_net_core.c b/net/core/sysctl_net_core.c
index 31107abd2783..cca444190907 100644
--- a/net/core/sysctl_net_core.c
+++ b/net/core/sysctl_net_core.c
@@ -20,6 +20,7 @@
#include <net/sock.h>
#include <net/net_ratelimit.h>
#include <net/busy_poll.h>
+#include <net/pkt_sched.h>
static int zero = 0;
static int one = 1;
@@ -193,6 +194,26 @@ static int flow_limit_table_len_sysctl(struct ctl_table *table, int write,
}
#endif /* CONFIG_NET_FLOW_LIMIT */
+#ifdef CONFIG_NET_SCHED
+static int set_default_qdisc(struct ctl_table *table, int write,
+ void __user *buffer, size_t *lenp, loff_t *ppos)
+{
+ char id[IFNAMSIZ];
+ struct ctl_table tbl = {
+ .data = id,
+ .maxlen = IFNAMSIZ,
+ };
+ int ret;
+
+ qdisc_get_default(id, IFNAMSIZ);
+
+ ret = proc_dostring(&tbl, write, buffer, lenp, ppos);
+ if (write && ret == 0)
+ ret = qdisc_set_default(id);
+ return ret;
+}
+#endif
+
static struct ctl_table net_core_table[] = {
#ifdef CONFIG_NET
{
@@ -315,7 +336,14 @@ static struct ctl_table net_core_table[] = {
.mode = 0644,
.proc_handler = proc_dointvec
},
-#
+#endif
+#ifdef CONFIG_NET_SCHED
+ {
+ .procname = "default_qdisc",
+ .mode = 0644,
+ .maxlen = IFNAMSIZ,
+ .proc_handler = set_default_qdisc
+ },
#endif
#endif /* CONFIG_NET */
{
diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c
index 9c61f9c02fdb..6cf9f7782ad4 100644
--- a/net/dccp/ipv6.c
+++ b/net/dccp/ipv6.c
@@ -135,6 +135,7 @@ static void dccp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
if (dst)
dst->ops->redirect(dst, sk, skb);
+ goto out;
}
if (type == ICMPV6_PKT_TOOBIG) {
diff --git a/net/dccp/proto.c b/net/dccp/proto.c
index 6c7c78b83940..ba64750f0387 100644
--- a/net/dccp/proto.c
+++ b/net/dccp/proto.c
@@ -336,7 +336,7 @@ unsigned int dccp_poll(struct file *file, struct socket *sock,
mask |= POLLIN | POLLRDNORM;
if (!(sk->sk_shutdown & SEND_SHUTDOWN)) {
- if (sk_stream_wspace(sk) >= sk_stream_min_wspace(sk)) {
+ if (sk_stream_is_writeable(sk)) {
mask |= POLLOUT | POLLWRNORM;
} else { /* send SIGIO later */
set_bit(SOCK_ASYNC_NOSPACE,
@@ -347,7 +347,7 @@ unsigned int dccp_poll(struct file *file, struct socket *sock,
* wspace test but before the flags are set,
* IO signal will be lost.
*/
- if (sk_stream_wspace(sk) >= sk_stream_min_wspace(sk))
+ if (sk_stream_is_writeable(sk))
mask |= POLLOUT | POLLWRNORM;
}
}
diff --git a/net/dsa/slave.c b/net/dsa/slave.c
index 6ebd8fbd9285..29d684ebca6a 100644
--- a/net/dsa/slave.c
+++ b/net/dsa/slave.c
@@ -347,7 +347,7 @@ dsa_slave_create(struct dsa_switch *ds, struct device *parent,
slave_dev->features = master->vlan_features;
SET_ETHTOOL_OPS(slave_dev, &dsa_slave_ethtool_ops);
- memcpy(slave_dev->dev_addr, master->dev_addr, ETH_ALEN);
+ eth_hw_addr_inherit(slave_dev, master);
slave_dev->tx_queue_len = 0;
switch (ds->dst->tag_protocol) {
diff --git a/net/ieee802154/6lowpan.c b/net/ieee802154/6lowpan.c
index 3b9d5f20bd1c..ff41b4d60d30 100644
--- a/net/ieee802154/6lowpan.c
+++ b/net/ieee802154/6lowpan.c
@@ -67,39 +67,6 @@ static const u8 lowpan_ttl_values[] = {0, 1, 64, 255};
static LIST_HEAD(lowpan_devices);
-/*
- * Uncompression of linklocal:
- * 0 -> 16 bytes from packet
- * 1 -> 2 bytes from prefix - bunch of zeroes and 8 from packet
- * 2 -> 2 bytes from prefix - zeroes + 2 from packet
- * 3 -> 2 bytes from prefix - infer 8 bytes from lladdr
- *
- * NOTE: => the uncompress function does change 0xf to 0x10
- * NOTE: 0x00 => no-autoconfig => unspecified
- */
-static const u8 lowpan_unc_llconf[] = {0x0f, 0x28, 0x22, 0x20};
-
-/*
- * Uncompression of ctx-based:
- * 0 -> 0 bits from packet [unspecified / reserved]
- * 1 -> 8 bytes from prefix - bunch of zeroes and 8 from packet
- * 2 -> 8 bytes from prefix - zeroes + 2 from packet
- * 3 -> 8 bytes from prefix - infer 8 bytes from lladdr
- */
-static const u8 lowpan_unc_ctxconf[] = {0x00, 0x88, 0x82, 0x80};
-
-/*
- * Uncompression of ctx-base
- * 0 -> 0 bits from packet
- * 1 -> 2 bytes from prefix - bunch of zeroes 5 from packet
- * 2 -> 2 bytes from prefix - zeroes + 3 from packet
- * 3 -> 2 bytes from prefix - infer 1 bytes from lladdr
- */
-static const u8 lowpan_unc_mxconf[] = {0x0f, 0x25, 0x23, 0x21};
-
-/* Link local prefix */
-static const u8 lowpan_llprefix[] = {0xfe, 0x80};
-
/* private device info */
struct lowpan_dev_info {
struct net_device *real_dev; /* real WPAN device ptr */
@@ -191,55 +158,177 @@ lowpan_compress_addr_64(u8 **hc06_ptr, u8 shift, const struct in6_addr *ipaddr,
return rol8(val, shift);
}
-static void
-lowpan_uip_ds6_set_addr_iid(struct in6_addr *ipaddr, unsigned char *lladdr)
+/*
+ * Uncompress address function for source and
+ * destination address(non-multicast).
+ *
+ * address_mode is sam value or dam value.
+ */
+static int
+lowpan_uncompress_addr(struct sk_buff *skb,
+ struct in6_addr *ipaddr,
+ const u8 address_mode,
+ const struct ieee802154_addr *lladdr)
{
- memcpy(&ipaddr->s6_addr[8], lladdr, IEEE802154_ADDR_LEN);
- /* second bit-flip (Universe/Local) is done according RFC2464 */
- ipaddr->s6_addr[8] ^= 0x02;
+ bool fail;
+
+ switch (address_mode) {
+ case LOWPAN_IPHC_ADDR_00:
+ /* for global link addresses */
+ fail = lowpan_fetch_skb(skb, ipaddr->s6_addr, 16);
+ break;
+ case LOWPAN_IPHC_ADDR_01:
+ /* fe:80::XXXX:XXXX:XXXX:XXXX */
+ ipaddr->s6_addr[0] = 0xFE;
+ ipaddr->s6_addr[1] = 0x80;
+ fail = lowpan_fetch_skb(skb, &ipaddr->s6_addr[8], 8);
+ break;
+ case LOWPAN_IPHC_ADDR_02:
+ /* fe:80::ff:fe00:XXXX */
+ ipaddr->s6_addr[0] = 0xFE;
+ ipaddr->s6_addr[1] = 0x80;
+ ipaddr->s6_addr[11] = 0xFF;
+ ipaddr->s6_addr[12] = 0xFE;
+ fail = lowpan_fetch_skb(skb, &ipaddr->s6_addr[14], 2);
+ break;
+ case LOWPAN_IPHC_ADDR_03:
+ fail = false;
+ switch (lladdr->addr_type) {
+ case IEEE802154_ADDR_LONG:
+ /* fe:80::XXXX:XXXX:XXXX:XXXX
+ * \_________________/
+ * hwaddr
+ */
+ ipaddr->s6_addr[0] = 0xFE;
+ ipaddr->s6_addr[1] = 0x80;
+ memcpy(&ipaddr->s6_addr[8], lladdr->hwaddr,
+ IEEE802154_ADDR_LEN);
+ /* second bit-flip (Universe/Local)
+ * is done according RFC2464
+ */
+ ipaddr->s6_addr[8] ^= 0x02;
+ break;
+ case IEEE802154_ADDR_SHORT:
+ /* fe:80::ff:fe00:XXXX
+ * \__/
+ * short_addr
+ *
+ * Universe/Local bit is zero.
+ */
+ ipaddr->s6_addr[0] = 0xFE;
+ ipaddr->s6_addr[1] = 0x80;
+ ipaddr->s6_addr[11] = 0xFF;
+ ipaddr->s6_addr[12] = 0xFE;
+ ipaddr->s6_addr16[7] = htons(lladdr->short_addr);
+ break;
+ default:
+ pr_debug("Invalid addr_type set\n");
+ return -EINVAL;
+ }
+ break;
+ default:
+ pr_debug("Invalid address mode value: 0x%x\n", address_mode);
+ return -EINVAL;
+ }
+
+ if (fail) {
+ pr_debug("Failed to fetch skb data\n");
+ return -EIO;
+ }
+
+ lowpan_raw_dump_inline(NULL, "Reconstructed ipv6 addr is:\n",
+ ipaddr->s6_addr, 16);
+
+ return 0;
}
-/*
- * Uncompress addresses based on a prefix and a postfix with zeroes in
- * between. If the postfix is zero in length it will use the link address
- * to configure the IP address (autoconf style).
- * pref_post_count takes a byte where the first nibble specify prefix count
- * and the second postfix count (NOTE: 15/0xf => 16 bytes copy).
+/* Uncompress address function for source context
+ * based address(non-multicast).
*/
static int
-lowpan_uncompress_addr(struct sk_buff *skb, struct in6_addr *ipaddr,
- u8 const *prefix, u8 pref_post_count, unsigned char *lladdr)
+lowpan_uncompress_context_based_src_addr(struct sk_buff *skb,
+ struct in6_addr *ipaddr,
+ const u8 sam)
{
- u8 prefcount = pref_post_count >> 4;
- u8 postcount = pref_post_count & 0x0f;
-
- /* full nibble 15 => 16 */
- prefcount = (prefcount == 15 ? 16 : prefcount);
- postcount = (postcount == 15 ? 16 : postcount);
-
- if (lladdr)
- lowpan_raw_dump_inline(__func__, "linklocal address",
- lladdr, IEEE802154_ADDR_LEN);
- if (prefcount > 0)
- memcpy(ipaddr, prefix, prefcount);
-
- if (prefcount + postcount < 16)
- memset(&ipaddr->s6_addr[prefcount], 0,
- 16 - (prefcount + postcount));
-
- if (postcount > 0) {
- memcpy(&ipaddr->s6_addr[16 - postcount], skb->data, postcount);
- skb_pull(skb, postcount);
- } else if (prefcount > 0) {
- if (lladdr == NULL)
- return -EINVAL;
+ switch (sam) {
+ case LOWPAN_IPHC_ADDR_00:
+ /* unspec address ::
+ * Do nothing, address is already ::
+ */
+ break;
+ case LOWPAN_IPHC_ADDR_01:
+ /* TODO */
+ case LOWPAN_IPHC_ADDR_02:
+ /* TODO */
+ case LOWPAN_IPHC_ADDR_03:
+ /* TODO */
+ netdev_warn(skb->dev, "SAM value 0x%x not supported\n", sam);
+ return -EINVAL;
+ default:
+ pr_debug("Invalid sam value: 0x%x\n", sam);
+ return -EINVAL;
+ }
+
+ lowpan_raw_dump_inline(NULL,
+ "Reconstructed context based ipv6 src addr is:\n",
+ ipaddr->s6_addr, 16);
+
+ return 0;
+}
+
+/* Uncompress function for multicast destination address,
+ * when M bit is set.
+ */
+static int
+lowpan_uncompress_multicast_daddr(struct sk_buff *skb,
+ struct in6_addr *ipaddr,
+ const u8 dam)
+{
+ bool fail;
+
+ switch (dam) {
+ case LOWPAN_IPHC_DAM_00:
+ /* 00: 128 bits. The full address
+ * is carried in-line.
+ */
+ fail = lowpan_fetch_skb(skb, ipaddr->s6_addr, 16);
+ break;
+ case LOWPAN_IPHC_DAM_01:
+ /* 01: 48 bits. The address takes
+ * the form ffXX::00XX:XXXX:XXXX.
+ */
+ ipaddr->s6_addr[0] = 0xFF;
+ fail = lowpan_fetch_skb(skb, &ipaddr->s6_addr[1], 1);
+ fail |= lowpan_fetch_skb(skb, &ipaddr->s6_addr[11], 5);
+ break;
+ case LOWPAN_IPHC_DAM_10:
+ /* 10: 32 bits. The address takes
+ * the form ffXX::00XX:XXXX.
+ */
+ ipaddr->s6_addr[0] = 0xFF;
+ fail = lowpan_fetch_skb(skb, &ipaddr->s6_addr[1], 1);
+ fail |= lowpan_fetch_skb(skb, &ipaddr->s6_addr[13], 3);
+ break;
+ case LOWPAN_IPHC_DAM_11:
+ /* 11: 8 bits. The address takes
+ * the form ff02::00XX.
+ */
+ ipaddr->s6_addr[0] = 0xFF;
+ ipaddr->s6_addr[1] = 0x02;
+ fail = lowpan_fetch_skb(skb, &ipaddr->s6_addr[15], 1);
+ break;
+ default:
+ pr_debug("DAM value has a wrong value: 0x%x\n", dam);
+ return -EINVAL;
+ }
- /* no IID based configuration if no prefix and no data */
- lowpan_uip_ds6_set_addr_iid(ipaddr, lladdr);
+ if (fail) {
+ pr_debug("Failed to fetch skb data\n");
+ return -EIO;
}
- pr_debug("uncompressing %d + %d => ", prefcount, postcount);
- lowpan_raw_dump_inline(NULL, NULL, ipaddr->s6_addr, 16);
+ lowpan_raw_dump_inline(NULL, "Reconstructed ipv6 multicast addr is:\n",
+ ipaddr->s6_addr, 16);
return 0;
}
@@ -702,6 +791,12 @@ lowpan_alloc_new_frame(struct sk_buff *skb, u16 len, u16 tag)
skb_reserve(frame->skb, sizeof(struct ipv6hdr));
skb_put(frame->skb, frame->length);
+ /* copy the first control block to keep a
+ * trace of the link-layer addresses in case
+ * of a link-local compressed address
+ */
+ memcpy(frame->skb->cb, skb->cb, sizeof(skb->cb));
+
init_timer(&frame->timer);
/* time out is the same as for ipv6 - 60 sec */
frame->timer.expires = jiffies + LOWPAN_FRAG_TIMEOUT;
@@ -723,9 +818,9 @@ frame_err:
static int
lowpan_process_data(struct sk_buff *skb)
{
- struct ipv6hdr hdr;
+ struct ipv6hdr hdr = {};
u8 tmp, iphc0, iphc1, num_context = 0;
- u8 *_saddr, *_daddr;
+ const struct ieee802154_addr *_saddr, *_daddr;
int err;
lowpan_raw_dump_table(__func__, "raw skb data dump", skb->data,
@@ -828,8 +923,8 @@ lowpan_process_data(struct sk_buff *skb)
if (lowpan_fetch_skb_u8(skb, &iphc1))
goto drop;
- _saddr = mac_cb(skb)->sa.hwaddr;
- _daddr = mac_cb(skb)->da.hwaddr;
+ _saddr = &mac_cb(skb)->sa;
+ _daddr = &mac_cb(skb)->da;
pr_debug("iphc0 = %02x, iphc1 = %02x\n", iphc0, iphc1);
@@ -868,8 +963,6 @@ lowpan_process_data(struct sk_buff *skb)
hdr.priority = ((tmp >> 2) & 0x0f);
hdr.flow_lbl[0] = ((tmp << 6) & 0xC0) | ((tmp >> 2) & 0x30);
- hdr.flow_lbl[1] = 0;
- hdr.flow_lbl[2] = 0;
break;
/*
* Flow Label carried in-line
@@ -885,10 +978,6 @@ lowpan_process_data(struct sk_buff *skb)
break;
/* Traffic Class and Flow Label are elided */
case 3: /* 11b */
- hdr.priority = 0;
- hdr.flow_lbl[0] = 0;
- hdr.flow_lbl[1] = 0;
- hdr.flow_lbl[2] = 0;
break;
default:
break;
@@ -915,10 +1004,18 @@ lowpan_process_data(struct sk_buff *skb)
/* Extract SAM to the tmp variable */
tmp = ((iphc1 & LOWPAN_IPHC_SAM) >> LOWPAN_IPHC_SAM_BIT) & 0x03;
- /* Source address uncompression */
- pr_debug("source address stateless compression\n");
- err = lowpan_uncompress_addr(skb, &hdr.saddr, lowpan_llprefix,
- lowpan_unc_llconf[tmp], skb->data);
+ if (iphc1 & LOWPAN_IPHC_SAC) {
+ /* Source address context based uncompression */
+ pr_debug("SAC bit is set. Handle context based source address.\n");
+ err = lowpan_uncompress_context_based_src_addr(
+ skb, &hdr.saddr, tmp);
+ } else {
+ /* Source address uncompression */
+ pr_debug("source address stateless compression\n");
+ err = lowpan_uncompress_addr(skb, &hdr.saddr, tmp, _saddr);
+ }
+
+ /* Check on error of previous branch */
if (err)
goto drop;
@@ -931,23 +1028,14 @@ lowpan_process_data(struct sk_buff *skb)
pr_debug("dest: context-based mcast compression\n");
/* TODO: implement this */
} else {
- u8 prefix[] = {0xff, 0x02};
-
- pr_debug("dest: non context-based mcast compression\n");
- if (0 < tmp && tmp < 3) {
- if (lowpan_fetch_skb_u8(skb, &prefix[1]))
- goto drop;
- }
-
- err = lowpan_uncompress_addr(skb, &hdr.daddr, prefix,
- lowpan_unc_mxconf[tmp], NULL);
+ err = lowpan_uncompress_multicast_daddr(
+ skb, &hdr.daddr, tmp);
if (err)
goto drop;
}
} else {
pr_debug("dest: stateless compression\n");
- err = lowpan_uncompress_addr(skb, &hdr.daddr, lowpan_llprefix,
- lowpan_unc_llconf[tmp], skb->data);
+ err = lowpan_uncompress_addr(skb, &hdr.daddr, tmp, _daddr);
if (err)
goto drop;
}
@@ -1284,6 +1372,8 @@ static int lowpan_newlink(struct net *src_net, struct net_device *dev,
real_dev = dev_get_by_index(src_net, nla_get_u32(tb[IFLA_LINK]));
if (!real_dev)
return -ENODEV;
+ if (real_dev->type != ARPHRD_IEEE802154)
+ return -EINVAL;
lowpan_dev_info(dev)->real_dev = real_dev;
lowpan_dev_info(dev)->fragment_tag = 0;
@@ -1298,6 +1388,9 @@ static int lowpan_newlink(struct net *src_net, struct net_device *dev,
entry->ldev = dev;
+ /* Set the lowpan harware address to the wpan hardware address. */
+ memcpy(dev->dev_addr, real_dev->dev_addr, IEEE802154_ADDR_LEN);
+
mutex_lock(&lowpan_dev_info(dev)->dev_list_mtx);
INIT_LIST_HEAD(&entry->list);
list_add_tail(&entry->list, &lowpan_devices);
diff --git a/net/ieee802154/6lowpan.h b/net/ieee802154/6lowpan.h
index 4b8f917658b5..2869c0526dad 100644
--- a/net/ieee802154/6lowpan.h
+++ b/net/ieee802154/6lowpan.h
@@ -193,10 +193,12 @@
/* Values of fields within the IPHC encoding second byte */
#define LOWPAN_IPHC_CID 0x80
+#define LOWPAN_IPHC_ADDR_00 0x00
+#define LOWPAN_IPHC_ADDR_01 0x01
+#define LOWPAN_IPHC_ADDR_02 0x02
+#define LOWPAN_IPHC_ADDR_03 0x03
+
#define LOWPAN_IPHC_SAC 0x40
-#define LOWPAN_IPHC_SAM_00 0x00
-#define LOWPAN_IPHC_SAM_01 0x10
-#define LOWPAN_IPHC_SAM_10 0x20
#define LOWPAN_IPHC_SAM 0x30
#define LOWPAN_IPHC_SAM_BIT 4
@@ -230,4 +232,16 @@
dest = 16 bit inline */
#define LOWPAN_NHC_UDP_CS_P_11 0xF3 /* source & dest = 0xF0B + 4bit inline */
+static inline bool lowpan_fetch_skb(struct sk_buff *skb,
+ void *data, const unsigned int len)
+{
+ if (unlikely(!pskb_may_pull(skb, len)))
+ return true;
+
+ skb_copy_from_linear_data(skb, data, len);
+ skb_pull(skb, len);
+
+ return false;
+}
+
#endif /* __6LOWPAN_H__ */
diff --git a/net/ieee802154/wpan-class.c b/net/ieee802154/wpan-class.c
index 13571eae6bae..ef56ab5b35fe 100644
--- a/net/ieee802154/wpan-class.c
+++ b/net/ieee802154/wpan-class.c
@@ -36,7 +36,8 @@ static ssize_t name ## _show(struct device *dev, \
ret = snprintf(buf, PAGE_SIZE, format_string "\n", args); \
mutex_unlock(&phy->pib_lock); \
return ret; \
-}
+} \
+static DEVICE_ATTR_RO(name);
#define MASTER_SHOW(field, format_string) \
MASTER_SHOW_COMPLEX(field, format_string, phy->field)
@@ -66,15 +67,17 @@ static ssize_t channels_supported_show(struct device *dev,
mutex_unlock(&phy->pib_lock);
return len;
}
-
-static struct device_attribute pmib_attrs[] = {
- __ATTR_RO(current_channel),
- __ATTR_RO(current_page),
- __ATTR_RO(channels_supported),
- __ATTR_RO(transmit_power),
- __ATTR_RO(cca_mode),
- {},
+static DEVICE_ATTR_RO(channels_supported);
+
+static struct attribute *pmib_attrs[] = {
+ &dev_attr_current_channel.attr,
+ &dev_attr_current_page.attr,
+ &dev_attr_channels_supported.attr,
+ &dev_attr_transmit_power.attr,
+ &dev_attr_cca_mode.attr,
+ NULL,
};
+ATTRIBUTE_GROUPS(pmib);
static void wpan_phy_release(struct device *d)
{
@@ -85,7 +88,7 @@ static void wpan_phy_release(struct device *d)
static struct class wpan_phy_class = {
.name = "ieee802154",
.dev_release = wpan_phy_release,
- .dev_attrs = pmib_attrs,
+ .dev_groups = pmib_groups,
};
static DEFINE_MUTEX(wpan_phy_mutex);
diff --git a/net/ipv4/Kconfig b/net/ipv4/Kconfig
index 37cf1a6ea3ad..05c57f0fcabe 100644
--- a/net/ipv4/Kconfig
+++ b/net/ipv4/Kconfig
@@ -259,22 +259,6 @@ config IP_PIMSM_V2
gated-5). This routing protocol is not used widely, so say N unless
you want to play with it.
-config ARPD
- bool "IP: ARP daemon support"
- ---help---
- The kernel maintains an internal cache which maps IP addresses to
- hardware addresses on the local network, so that Ethernet
- frames are sent to the proper address on the physical networking
- layer. Normally, kernel uses the ARP protocol to resolve these
- mappings.
-
- Saying Y here adds support to have an user space daemon to do this
- resolution instead. This is useful for implementing an alternate
- address resolution protocol (e.g. NHRP on mGRE tunnels) and also for
- testing purposes.
-
- If unsure, say N.
-
config SYN_COOKIES
bool "IP: TCP syncookie support"
---help---
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index b4d0be2b7ce9..cfeb85cff4f0 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -263,10 +263,8 @@ void build_ehash_secret(void)
get_random_bytes(&rnd, sizeof(rnd));
} while (rnd == 0);
- if (cmpxchg(&inet_ehash_secret, 0, rnd) == 0) {
+ if (cmpxchg(&inet_ehash_secret, 0, rnd) == 0)
get_random_bytes(&ipv6_hash_secret, sizeof(ipv6_hash_secret));
- net_secret_init();
- }
}
EXPORT_SYMBOL(build_ehash_secret);
@@ -1532,18 +1530,6 @@ int snmp_mib_init(void __percpu *ptr[2], size_t mibsize, size_t align)
}
EXPORT_SYMBOL_GPL(snmp_mib_init);
-void snmp_mib_free(void __percpu *ptr[SNMP_ARRAY_SZ])
-{
- int i;
-
- BUG_ON(ptr == NULL);
- for (i = 0; i < SNMP_ARRAY_SZ; i++) {
- free_percpu(ptr[i]);
- ptr[i] = NULL;
- }
-}
-EXPORT_SYMBOL_GPL(snmp_mib_free);
-
#ifdef CONFIG_IP_MULTICAST
static const struct net_protocol igmp_protocol = {
.handler = igmp_rcv,
diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c
index 4429b013f269..7808093cede6 100644
--- a/net/ipv4/arp.c
+++ b/net/ipv4/arp.c
@@ -368,9 +368,7 @@ static void arp_solicit(struct neighbour *neigh, struct sk_buff *skb)
} else {
probes -= neigh->parms->app_probes;
if (probes < 0) {
-#ifdef CONFIG_ARPD
neigh_app_ns(neigh);
-#endif
return;
}
}
diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c
index 34ca6d5a3a4b..a1b5bcbd04ae 100644
--- a/net/ipv4/devinet.c
+++ b/net/ipv4/devinet.c
@@ -73,6 +73,8 @@ static struct ipv4_devconf ipv4_devconf = {
[IPV4_DEVCONF_SEND_REDIRECTS - 1] = 1,
[IPV4_DEVCONF_SECURE_REDIRECTS - 1] = 1,
[IPV4_DEVCONF_SHARED_MEDIA - 1] = 1,
+ [IPV4_DEVCONF_IGMPV2_UNSOLICITED_REPORT_INTERVAL - 1] = 10000 /*ms*/,
+ [IPV4_DEVCONF_IGMPV3_UNSOLICITED_REPORT_INTERVAL - 1] = 1000 /*ms*/,
},
};
@@ -83,6 +85,8 @@ static struct ipv4_devconf ipv4_devconf_dflt = {
[IPV4_DEVCONF_SECURE_REDIRECTS - 1] = 1,
[IPV4_DEVCONF_SHARED_MEDIA - 1] = 1,
[IPV4_DEVCONF_ACCEPT_SOURCE_ROUTE - 1] = 1,
+ [IPV4_DEVCONF_IGMPV2_UNSOLICITED_REPORT_INTERVAL - 1] = 10000 /*ms*/,
+ [IPV4_DEVCONF_IGMPV3_UNSOLICITED_REPORT_INTERVAL - 1] = 1000 /*ms*/,
},
};
@@ -1126,10 +1130,7 @@ static int inet_gifconf(struct net_device *dev, char __user *buf, int len)
if (len < (int) sizeof(ifr))
break;
memset(&ifr, 0, sizeof(struct ifreq));
- if (ifa->ifa_label)
- strcpy(ifr.ifr_name, ifa->ifa_label);
- else
- strcpy(ifr.ifr_name, dev->name);
+ strcpy(ifr.ifr_name, ifa->ifa_label);
(*(struct sockaddr_in *)&ifr.ifr_addr).sin_family = AF_INET;
(*(struct sockaddr_in *)&ifr.ifr_addr).sin_addr.s_addr =
@@ -2097,11 +2098,15 @@ static struct devinet_sysctl_table {
DEVINET_SYSCTL_RW_ENTRY(ARP_ACCEPT, "arp_accept"),
DEVINET_SYSCTL_RW_ENTRY(ARP_NOTIFY, "arp_notify"),
DEVINET_SYSCTL_RW_ENTRY(PROXY_ARP_PVLAN, "proxy_arp_pvlan"),
+ DEVINET_SYSCTL_RW_ENTRY(FORCE_IGMP_VERSION,
+ "force_igmp_version"),
+ DEVINET_SYSCTL_RW_ENTRY(IGMPV2_UNSOLICITED_REPORT_INTERVAL,
+ "igmpv2_unsolicited_report_interval"),
+ DEVINET_SYSCTL_RW_ENTRY(IGMPV3_UNSOLICITED_REPORT_INTERVAL,
+ "igmpv3_unsolicited_report_interval"),
DEVINET_SYSCTL_FLUSHING_ENTRY(NOXFRM, "disable_xfrm"),
DEVINET_SYSCTL_FLUSHING_ENTRY(NOPOLICY, "disable_policy"),
- DEVINET_SYSCTL_FLUSHING_ENTRY(FORCE_IGMP_VERSION,
- "force_igmp_version"),
DEVINET_SYSCTL_FLUSHING_ENTRY(PROMOTE_SECONDARIES,
"promote_secondaries"),
DEVINET_SYSCTL_FLUSHING_ENTRY(ROUTE_LOCALNET,
diff --git a/net/ipv4/fib_rules.c b/net/ipv4/fib_rules.c
index 26aa65d1fce4..523be38e37de 100644
--- a/net/ipv4/fib_rules.c
+++ b/net/ipv4/fib_rules.c
@@ -101,6 +101,30 @@ errout:
return err;
}
+static bool fib4_rule_suppress(struct fib_rule *rule, struct fib_lookup_arg *arg)
+{
+ struct fib_result *result = (struct fib_result *) arg->result;
+ struct net_device *dev = result->fi->fib_dev;
+
+ /* do not accept result if the route does
+ * not meet the required prefix length
+ */
+ if (result->prefixlen <= rule->suppress_prefixlen)
+ goto suppress_route;
+
+ /* do not accept result if the route uses a device
+ * belonging to a forbidden interface group
+ */
+ if (rule->suppress_ifgroup != -1 && dev && dev->group == rule->suppress_ifgroup)
+ goto suppress_route;
+
+ return false;
+
+suppress_route:
+ if (!(arg->flags & FIB_LOOKUP_NOREF))
+ fib_info_put(result->fi);
+ return true;
+}
static int fib4_rule_match(struct fib_rule *rule, struct flowi *fl, int flags)
{
@@ -267,6 +291,7 @@ static const struct fib_rules_ops __net_initconst fib4_rules_ops_template = {
.rule_size = sizeof(struct fib4_rule),
.addr_size = sizeof(u32),
.action = fib4_rule_action,
+ .suppress = fib4_rule_suppress,
.match = fib4_rule_match,
.configure = fib4_rule_configure,
.delete = fib4_rule_delete,
diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c
index cd71190d2962..7defdc9ba167 100644
--- a/net/ipv4/igmp.c
+++ b/net/ipv4/igmp.c
@@ -88,6 +88,7 @@
#include <linux/if_arp.h>
#include <linux/rtnetlink.h>
#include <linux/times.h>
+#include <linux/pkt_sched.h>
#include <net/net_namespace.h>
#include <net/arp.h>
@@ -113,7 +114,8 @@
#define IGMP_V1_Router_Present_Timeout (400*HZ)
#define IGMP_V2_Router_Present_Timeout (400*HZ)
-#define IGMP_Unsolicited_Report_Interval (10*HZ)
+#define IGMP_V2_Unsolicited_Report_Interval (10*HZ)
+#define IGMP_V3_Unsolicited_Report_Interval (1*HZ)
#define IGMP_Query_Response_Interval (10*HZ)
#define IGMP_Unsolicited_Report_Count 2
@@ -138,6 +140,29 @@
((in_dev)->mr_v2_seen && \
time_before(jiffies, (in_dev)->mr_v2_seen)))
+static int unsolicited_report_interval(struct in_device *in_dev)
+{
+ int interval_ms, interval_jiffies;
+
+ if (IGMP_V1_SEEN(in_dev) || IGMP_V2_SEEN(in_dev))
+ interval_ms = IN_DEV_CONF_GET(
+ in_dev,
+ IGMPV2_UNSOLICITED_REPORT_INTERVAL);
+ else /* v3 */
+ interval_ms = IN_DEV_CONF_GET(
+ in_dev,
+ IGMPV3_UNSOLICITED_REPORT_INTERVAL);
+
+ interval_jiffies = msecs_to_jiffies(interval_ms);
+
+ /* _timer functions can't handle a delay of 0 jiffies so ensure
+ * we always return a positive value.
+ */
+ if (interval_jiffies <= 0)
+ interval_jiffies = 1;
+ return interval_jiffies;
+}
+
static void igmpv3_add_delrec(struct in_device *in_dev, struct ip_mc_list *im);
static void igmpv3_del_delrec(struct in_device *in_dev, __be32 multiaddr);
static void igmpv3_clear_delrec(struct in_device *in_dev);
@@ -315,6 +340,7 @@ static struct sk_buff *igmpv3_newpack(struct net_device *dev, int size)
if (size < 256)
return NULL;
}
+ skb->priority = TC_PRIO_CONTROL;
igmp_skb_size(skb) = size;
rt = ip_route_output_ports(net, &fl4, NULL, IGMPV3_ALL_MCR, 0,
@@ -343,7 +369,7 @@ static struct sk_buff *igmpv3_newpack(struct net_device *dev, int size)
pip->saddr = fl4.saddr;
pip->protocol = IPPROTO_IGMP;
pip->tot_len = 0; /* filled in later */
- ip_select_ident(pip, &rt->dst, NULL);
+ ip_select_ident(skb, &rt->dst, NULL);
((u8 *)&pip[1])[0] = IPOPT_RA;
((u8 *)&pip[1])[1] = 4;
((u8 *)&pip[1])[2] = 0;
@@ -670,6 +696,7 @@ static int igmp_send_report(struct in_device *in_dev, struct ip_mc_list *pmc,
ip_rt_put(rt);
return -1;
}
+ skb->priority = TC_PRIO_CONTROL;
skb_dst_set(skb, &rt->dst);
@@ -687,7 +714,7 @@ static int igmp_send_report(struct in_device *in_dev, struct ip_mc_list *pmc,
iph->daddr = dst;
iph->saddr = fl4.saddr;
iph->protocol = IPPROTO_IGMP;
- ip_select_ident(iph, &rt->dst, NULL);
+ ip_select_ident(skb, &rt->dst, NULL);
((u8 *)&iph[1])[0] = IPOPT_RA;
((u8 *)&iph[1])[1] = 4;
((u8 *)&iph[1])[2] = 0;
@@ -709,7 +736,7 @@ static void igmp_gq_timer_expire(unsigned long data)
in_dev->mr_gq_running = 0;
igmpv3_send_report(in_dev, NULL);
- __in_dev_put(in_dev);
+ in_dev_put(in_dev);
}
static void igmp_ifc_timer_expire(unsigned long data)
@@ -719,9 +746,10 @@ static void igmp_ifc_timer_expire(unsigned long data)
igmpv3_send_cr(in_dev);
if (in_dev->mr_ifc_count) {
in_dev->mr_ifc_count--;
- igmp_ifc_start_timer(in_dev, IGMP_Unsolicited_Report_Interval);
+ igmp_ifc_start_timer(in_dev,
+ unsolicited_report_interval(in_dev));
}
- __in_dev_put(in_dev);
+ in_dev_put(in_dev);
}
static void igmp_ifc_event(struct in_device *in_dev)
@@ -744,7 +772,7 @@ static void igmp_timer_expire(unsigned long data)
if (im->unsolicit_count) {
im->unsolicit_count--;
- igmp_start_timer(im, IGMP_Unsolicited_Report_Interval);
+ igmp_start_timer(im, unsolicited_report_interval(in_dev));
}
im->reporter = 1;
spin_unlock(&im->lock);
@@ -1323,16 +1351,17 @@ out:
EXPORT_SYMBOL(ip_mc_inc_group);
/*
- * Resend IGMP JOIN report; used for bonding.
- * Called with rcu_read_lock()
+ * Resend IGMP JOIN report; used by netdev notifier.
*/
-void ip_mc_rejoin_groups(struct in_device *in_dev)
+static void ip_mc_rejoin_groups(struct in_device *in_dev)
{
#ifdef CONFIG_IP_MULTICAST
struct ip_mc_list *im;
int type;
- for_each_pmc_rcu(in_dev, im) {
+ ASSERT_RTNL();
+
+ for_each_pmc_rtnl(in_dev, im) {
if (im->multiaddr == IGMP_ALL_HOSTS)
continue;
@@ -1349,7 +1378,6 @@ void ip_mc_rejoin_groups(struct in_device *in_dev)
}
#endif
}
-EXPORT_SYMBOL(ip_mc_rejoin_groups);
/*
* A socket has left a multicast group on device dev
@@ -2735,8 +2763,42 @@ static struct pernet_operations igmp_net_ops = {
.exit = igmp_net_exit,
};
+static int igmp_netdev_event(struct notifier_block *this,
+ unsigned long event, void *ptr)
+{
+ struct net_device *dev = netdev_notifier_info_to_dev(ptr);
+ struct in_device *in_dev;
+
+ switch (event) {
+ case NETDEV_RESEND_IGMP:
+ in_dev = __in_dev_get_rtnl(dev);
+ if (in_dev)
+ ip_mc_rejoin_groups(in_dev);
+ break;
+ default:
+ break;
+ }
+ return NOTIFY_DONE;
+}
+
+static struct notifier_block igmp_notifier = {
+ .notifier_call = igmp_netdev_event,
+};
+
int __init igmp_mc_proc_init(void)
{
- return register_pernet_subsys(&igmp_net_ops);
+ int err;
+
+ err = register_pernet_subsys(&igmp_net_ops);
+ if (err)
+ return err;
+ err = register_netdevice_notifier(&igmp_notifier);
+ if (err)
+ goto reg_notif_fail;
+ return 0;
+
+reg_notif_fail:
+ unregister_pernet_subsys(&igmp_net_ops);
+ return err;
}
#endif
diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c
index 7bd8983dbfcf..96da9c77deca 100644
--- a/net/ipv4/inet_hashtables.c
+++ b/net/ipv4/inet_hashtables.c
@@ -287,7 +287,7 @@ begintw:
if (unlikely(!INET_TW_MATCH(sk, net, acookie,
saddr, daddr, ports,
dif))) {
- sock_put(sk);
+ inet_twsk_put(inet_twsk(sk));
goto begintw;
}
goto out;
diff --git a/net/ipv4/inetpeer.c b/net/ipv4/inetpeer.c
index 000e3d239d64..33d5537881ed 100644
--- a/net/ipv4/inetpeer.c
+++ b/net/ipv4/inetpeer.c
@@ -32,8 +32,8 @@
* At the moment of writing this notes identifier of IP packets is generated
* to be unpredictable using this code only for packets subjected
* (actually or potentially) to defragmentation. I.e. DF packets less than
- * PMTU in size uses a constant ID and do not use this code (see
- * ip_select_ident() in include/net/ip.h).
+ * PMTU in size when local fragmentation is disabled use a constant ID and do
+ * not use this code (see ip_select_ident() in include/net/ip.h).
*
* Route cache entries hold references to our nodes.
* New cache entries get references via lookup by destination IP address in
diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c
index 8d6939eeb492..d7aea4c5b940 100644
--- a/net/ipv4/ip_gre.c
+++ b/net/ipv4/ip_gre.c
@@ -534,7 +534,7 @@ static int __net_init ipgre_init_net(struct net *net)
static void __net_exit ipgre_exit_net(struct net *net)
{
struct ip_tunnel_net *itn = net_generic(net, ipgre_net_id);
- ip_tunnel_delete_net(itn);
+ ip_tunnel_delete_net(itn, &ipgre_link_ops);
}
static struct pernet_operations ipgre_net_ops = {
@@ -767,7 +767,7 @@ static int __net_init ipgre_tap_init_net(struct net *net)
static void __net_exit ipgre_tap_exit_net(struct net *net)
{
struct ip_tunnel_net *itn = net_generic(net, gre_tap_net_id);
- ip_tunnel_delete_net(itn);
+ ip_tunnel_delete_net(itn, &ipgre_tap_ops);
}
static struct pernet_operations ipgre_tap_net_ops = {
diff --git a/net/ipv4/ip_input.c b/net/ipv4/ip_input.c
index 15e3e683adec..054a3e97d822 100644
--- a/net/ipv4/ip_input.c
+++ b/net/ipv4/ip_input.c
@@ -141,6 +141,7 @@
#include <net/icmp.h>
#include <net/raw.h>
#include <net/checksum.h>
+#include <net/inet_ecn.h>
#include <linux/netfilter_ipv4.h>
#include <net/xfrm.h>
#include <linux/mroute.h>
@@ -410,6 +411,13 @@ int ip_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt,
if (iph->ihl < 5 || iph->version != 4)
goto inhdr_error;
+ BUILD_BUG_ON(IPSTATS_MIB_ECT1PKTS != IPSTATS_MIB_NOECTPKTS + INET_ECN_ECT_1);
+ BUILD_BUG_ON(IPSTATS_MIB_ECT0PKTS != IPSTATS_MIB_NOECTPKTS + INET_ECN_ECT_0);
+ BUILD_BUG_ON(IPSTATS_MIB_CEPKTS != IPSTATS_MIB_NOECTPKTS + INET_ECN_CE);
+ IP_ADD_STATS_BH(dev_net(dev),
+ IPSTATS_MIB_NOECTPKTS + (iph->tos & INET_ECN_MASK),
+ max_t(unsigned short, 1, skb_shinfo(skb)->gso_segs));
+
if (!pskb_may_pull(skb, iph->ihl*4))
goto inhdr_error;
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index 9ee17e3d11c3..3982eabf61e1 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -148,7 +148,7 @@ int ip_build_and_send_pkt(struct sk_buff *skb, struct sock *sk,
iph->daddr = (opt && opt->opt.srr ? opt->opt.faddr : daddr);
iph->saddr = saddr;
iph->protocol = sk->sk_protocol;
- ip_select_ident(iph, &rt->dst, sk);
+ ip_select_ident(skb, &rt->dst, sk);
if (opt && opt->opt.optlen) {
iph->ihl += opt->opt.optlen>>2;
@@ -386,7 +386,7 @@ packet_routed:
ip_options_build(skb, &inet_opt->opt, inet->inet_daddr, rt, 0);
}
- ip_select_ident_more(iph, &rt->dst, sk,
+ ip_select_ident_more(skb, &rt->dst, sk,
(skb_shinfo(skb)->gso_segs ?: 1) - 1);
skb->priority = sk->sk_priority;
@@ -772,15 +772,20 @@ static inline int ip_ufo_append_data(struct sock *sk,
/* initialize protocol header pointer */
skb->transport_header = skb->network_header + fragheaderlen;
- skb->ip_summed = CHECKSUM_PARTIAL;
skb->csum = 0;
- /* specify the length of each IP datagram fragment */
- skb_shinfo(skb)->gso_size = maxfraglen - fragheaderlen;
- skb_shinfo(skb)->gso_type = SKB_GSO_UDP;
+
__skb_queue_tail(queue, skb);
+ } else if (skb_is_gso(skb)) {
+ goto append;
}
+ skb->ip_summed = CHECKSUM_PARTIAL;
+ /* specify the length of each IP datagram fragment */
+ skb_shinfo(skb)->gso_size = maxfraglen - fragheaderlen;
+ skb_shinfo(skb)->gso_type = SKB_GSO_UDP;
+
+append:
return skb_append_datato_frags(sk, skb, getfrag, from,
(length - transhdrlen));
}
@@ -1316,7 +1321,7 @@ struct sk_buff *__ip_make_skb(struct sock *sk,
else
ttl = ip_select_ttl(inet, &rt->dst);
- iph = (struct iphdr *)skb->data;
+ iph = ip_hdr(skb);
iph->version = 4;
iph->ihl = 5;
iph->tos = inet->tos;
@@ -1324,7 +1329,7 @@ struct sk_buff *__ip_make_skb(struct sock *sk,
iph->ttl = ttl;
iph->protocol = sk->sk_protocol;
ip_copy_addrs(iph, fl4);
- ip_select_ident(iph, &rt->dst, sk);
+ ip_select_ident(skb, &rt->dst, sk);
if (opt) {
iph->ihl += opt->optlen>>2;
diff --git a/net/ipv4/ip_tunnel.c b/net/ipv4/ip_tunnel.c
index ca1cb2d5f6e2..63a6d6d6b875 100644
--- a/net/ipv4/ip_tunnel.c
+++ b/net/ipv4/ip_tunnel.c
@@ -350,7 +350,7 @@ static int ip_tunnel_bind_dev(struct net_device *dev)
struct flowi4 fl4;
struct rtable *rt;
- rt = ip_route_output_tunnel(dev_net(dev), &fl4,
+ rt = ip_route_output_tunnel(tunnel->net, &fl4,
tunnel->parms.iph.protocol,
iph->daddr, iph->saddr,
tunnel->parms.o_key,
@@ -365,7 +365,7 @@ static int ip_tunnel_bind_dev(struct net_device *dev)
}
if (!tdev && tunnel->parms.link)
- tdev = __dev_get_by_index(dev_net(dev), tunnel->parms.link);
+ tdev = __dev_get_by_index(tunnel->net, tunnel->parms.link);
if (tdev) {
hlen = tdev->hard_header_len + tdev->needed_headroom;
@@ -454,15 +454,15 @@ int ip_tunnel_rcv(struct ip_tunnel *tunnel, struct sk_buff *skb,
tstats->rx_bytes += skb->len;
u64_stats_update_end(&tstats->syncp);
- if (tunnel->net != dev_net(tunnel->dev))
- skb_scrub_packet(skb);
-
if (tunnel->dev->type == ARPHRD_ETHER) {
skb->protocol = eth_type_trans(skb, tunnel->dev);
skb_postpull_rcsum(skb, eth_hdr(skb), ETH_HLEN);
} else {
skb->dev = tunnel->dev;
}
+
+ skb_scrub_packet(skb, !net_eq(tunnel->net, dev_net(tunnel->dev)));
+
gro_cells_receive(&tunnel->gro_cells, skb);
return 0;
@@ -613,9 +613,6 @@ void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev,
goto tx_error;
}
- if (tunnel->net != dev_net(dev))
- skb_scrub_packet(skb);
-
if (tunnel->err_count > 0) {
if (time_before(jiffies,
tunnel->err_time + IPTUNNEL_ERR_TIMEO)) {
@@ -626,6 +623,7 @@ void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev,
tunnel->err_count = 0;
}
+ tos = ip_tunnel_ecn_encap(tos, inner_iph, skb);
ttl = tnl_params->ttl;
if (ttl == 0) {
if (skb->protocol == htons(ETH_P_IP))
@@ -644,18 +642,17 @@ void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev,
max_headroom = LL_RESERVED_SPACE(rt->dst.dev) + sizeof(struct iphdr)
+ rt->dst.header_len;
- if (max_headroom > dev->needed_headroom) {
+ if (max_headroom > dev->needed_headroom)
dev->needed_headroom = max_headroom;
- if (skb_cow_head(skb, dev->needed_headroom)) {
- dev->stats.tx_dropped++;
- dev_kfree_skb(skb);
- return;
- }
+
+ if (skb_cow_head(skb, dev->needed_headroom)) {
+ dev->stats.tx_dropped++;
+ dev_kfree_skb(skb);
+ return;
}
- err = iptunnel_xmit(dev_net(dev), rt, skb,
- fl4.saddr, fl4.daddr, protocol,
- ip_tunnel_ecn_encap(tos, inner_iph, skb), ttl, df);
+ err = iptunnel_xmit(rt, skb, fl4.saddr, fl4.daddr, protocol,
+ tos, ttl, df, !net_eq(tunnel->net, dev_net(dev)));
iptunnel_xmit_stats(err, &dev->stats, dev->tstats);
return;
@@ -820,11 +817,10 @@ static void ip_tunnel_dev_free(struct net_device *dev)
void ip_tunnel_dellink(struct net_device *dev, struct list_head *head)
{
- struct net *net = dev_net(dev);
struct ip_tunnel *tunnel = netdev_priv(dev);
struct ip_tunnel_net *itn;
- itn = net_generic(net, tunnel->ip_tnl_net_id);
+ itn = net_generic(tunnel->net, tunnel->ip_tnl_net_id);
if (itn->fb_tunnel_dev != dev) {
ip_tunnel_del(netdev_priv(dev));
@@ -838,56 +834,68 @@ int ip_tunnel_init_net(struct net *net, int ip_tnl_net_id,
{
struct ip_tunnel_net *itn = net_generic(net, ip_tnl_net_id);
struct ip_tunnel_parm parms;
+ unsigned int i;
- itn->tunnels = kzalloc(IP_TNL_HASH_SIZE * sizeof(struct hlist_head), GFP_KERNEL);
- if (!itn->tunnels)
- return -ENOMEM;
+ for (i = 0; i < IP_TNL_HASH_SIZE; i++)
+ INIT_HLIST_HEAD(&itn->tunnels[i]);
if (!ops) {
itn->fb_tunnel_dev = NULL;
return 0;
}
+
memset(&parms, 0, sizeof(parms));
if (devname)
strlcpy(parms.name, devname, IFNAMSIZ);
rtnl_lock();
itn->fb_tunnel_dev = __ip_tunnel_create(net, ops, &parms);
- rtnl_unlock();
- if (IS_ERR(itn->fb_tunnel_dev)) {
- kfree(itn->tunnels);
- return PTR_ERR(itn->fb_tunnel_dev);
+ /* FB netdevice is special: we have one, and only one per netns.
+ * Allowing to move it to another netns is clearly unsafe.
+ */
+ if (!IS_ERR(itn->fb_tunnel_dev)) {
+ itn->fb_tunnel_dev->features |= NETIF_F_NETNS_LOCAL;
+ ip_tunnel_add(itn, netdev_priv(itn->fb_tunnel_dev));
}
+ rtnl_unlock();
- return 0;
+ return PTR_RET(itn->fb_tunnel_dev);
}
EXPORT_SYMBOL_GPL(ip_tunnel_init_net);
-static void ip_tunnel_destroy(struct ip_tunnel_net *itn, struct list_head *head)
+static void ip_tunnel_destroy(struct ip_tunnel_net *itn, struct list_head *head,
+ struct rtnl_link_ops *ops)
{
+ struct net *net = dev_net(itn->fb_tunnel_dev);
+ struct net_device *dev, *aux;
int h;
+ for_each_netdev_safe(net, dev, aux)
+ if (dev->rtnl_link_ops == ops)
+ unregister_netdevice_queue(dev, head);
+
for (h = 0; h < IP_TNL_HASH_SIZE; h++) {
struct ip_tunnel *t;
struct hlist_node *n;
struct hlist_head *thead = &itn->tunnels[h];
hlist_for_each_entry_safe(t, n, thead, hash_node)
- unregister_netdevice_queue(t->dev, head);
+ /* If dev is in the same netns, it has already
+ * been added to the list by the previous loop.
+ */
+ if (!net_eq(dev_net(t->dev), net))
+ unregister_netdevice_queue(t->dev, head);
}
- if (itn->fb_tunnel_dev)
- unregister_netdevice_queue(itn->fb_tunnel_dev, head);
}
-void ip_tunnel_delete_net(struct ip_tunnel_net *itn)
+void ip_tunnel_delete_net(struct ip_tunnel_net *itn, struct rtnl_link_ops *ops)
{
LIST_HEAD(list);
rtnl_lock();
- ip_tunnel_destroy(itn, &list);
+ ip_tunnel_destroy(itn, &list, ops);
unregister_netdevice_many(&list);
rtnl_unlock();
- kfree(itn->tunnels);
}
EXPORT_SYMBOL_GPL(ip_tunnel_delete_net);
@@ -929,23 +937,21 @@ EXPORT_SYMBOL_GPL(ip_tunnel_newlink);
int ip_tunnel_changelink(struct net_device *dev, struct nlattr *tb[],
struct ip_tunnel_parm *p)
{
- struct ip_tunnel *t, *nt;
- struct net *net = dev_net(dev);
+ struct ip_tunnel *t;
struct ip_tunnel *tunnel = netdev_priv(dev);
+ struct net *net = tunnel->net;
struct ip_tunnel_net *itn = net_generic(net, tunnel->ip_tnl_net_id);
if (dev == itn->fb_tunnel_dev)
return -EINVAL;
- nt = netdev_priv(dev);
-
t = ip_tunnel_find(itn, p, dev->type);
if (t) {
if (t->dev != dev)
return -EEXIST;
} else {
- t = nt;
+ t = tunnel;
if (dev->type != ARPHRD_ETHER) {
unsigned int nflags = 0;
@@ -984,6 +990,7 @@ int ip_tunnel_init(struct net_device *dev)
}
tunnel->dev = dev;
+ tunnel->net = dev_net(dev);
strcpy(tunnel->parms.name, dev->name);
iph->version = 4;
iph->ihl = 5;
@@ -994,8 +1001,8 @@ EXPORT_SYMBOL_GPL(ip_tunnel_init);
void ip_tunnel_uninit(struct net_device *dev)
{
- struct net *net = dev_net(dev);
struct ip_tunnel *tunnel = netdev_priv(dev);
+ struct net *net = tunnel->net;
struct ip_tunnel_net *itn;
itn = net_generic(net, tunnel->ip_tnl_net_id);
diff --git a/net/ipv4/ip_tunnel_core.c b/net/ipv4/ip_tunnel_core.c
index 850525b34899..c31e3ad98ef2 100644
--- a/net/ipv4/ip_tunnel_core.c
+++ b/net/ipv4/ip_tunnel_core.c
@@ -46,24 +46,22 @@
#include <net/netns/generic.h>
#include <net/rtnetlink.h>
-int iptunnel_xmit(struct net *net, struct rtable *rt,
- struct sk_buff *skb,
+int iptunnel_xmit(struct rtable *rt, struct sk_buff *skb,
__be32 src, __be32 dst, __u8 proto,
- __u8 tos, __u8 ttl, __be16 df)
+ __u8 tos, __u8 ttl, __be16 df, bool xnet)
{
int pkt_len = skb->len;
struct iphdr *iph;
int err;
- nf_reset(skb);
- secpath_reset(skb);
+ skb_scrub_packet(skb, xnet);
+
skb->rxhash = 0;
- skb_dst_drop(skb);
skb_dst_set(skb, &rt->dst);
memset(IPCB(skb), 0, sizeof(*IPCB(skb)));
/* Push down and install the IP header. */
- __skb_push(skb, sizeof(struct iphdr));
+ skb_push(skb, sizeof(struct iphdr));
skb_reset_network_header(skb);
iph = ip_hdr(skb);
diff --git a/net/ipv4/ip_vti.c b/net/ipv4/ip_vti.c
index 17cc0ffa8c0d..6e87f853d033 100644
--- a/net/ipv4/ip_vti.c
+++ b/net/ipv4/ip_vti.c
@@ -44,176 +44,10 @@
#include <net/net_namespace.h>
#include <net/netns/generic.h>
-#define HASH_SIZE 16
-#define HASH(addr) (((__force u32)addr^((__force u32)addr>>4))&(HASH_SIZE-1))
-
static struct rtnl_link_ops vti_link_ops __read_mostly;
static int vti_net_id __read_mostly;
-struct vti_net {
- struct ip_tunnel __rcu *tunnels_r_l[HASH_SIZE];
- struct ip_tunnel __rcu *tunnels_r[HASH_SIZE];
- struct ip_tunnel __rcu *tunnels_l[HASH_SIZE];
- struct ip_tunnel __rcu *tunnels_wc[1];
- struct ip_tunnel __rcu **tunnels[4];
-
- struct net_device *fb_tunnel_dev;
-};
-
-static int vti_fb_tunnel_init(struct net_device *dev);
static int vti_tunnel_init(struct net_device *dev);
-static void vti_tunnel_setup(struct net_device *dev);
-static void vti_dev_free(struct net_device *dev);
-static int vti_tunnel_bind_dev(struct net_device *dev);
-
-#define VTI_XMIT(stats1, stats2) do { \
- int err; \
- int pkt_len = skb->len; \
- err = dst_output(skb); \
- if (net_xmit_eval(err) == 0) { \
- u64_stats_update_begin(&(stats1)->syncp); \
- (stats1)->tx_bytes += pkt_len; \
- (stats1)->tx_packets++; \
- u64_stats_update_end(&(stats1)->syncp); \
- } else { \
- (stats2)->tx_errors++; \
- (stats2)->tx_aborted_errors++; \
- } \
-} while (0)
-
-
-static struct ip_tunnel *vti_tunnel_lookup(struct net *net,
- __be32 remote, __be32 local)
-{
- unsigned h0 = HASH(remote);
- unsigned h1 = HASH(local);
- struct ip_tunnel *t;
- struct vti_net *ipn = net_generic(net, vti_net_id);
-
- for_each_ip_tunnel_rcu(t, ipn->tunnels_r_l[h0 ^ h1])
- if (local == t->parms.iph.saddr &&
- remote == t->parms.iph.daddr && (t->dev->flags&IFF_UP))
- return t;
- for_each_ip_tunnel_rcu(t, ipn->tunnels_r[h0])
- if (remote == t->parms.iph.daddr && (t->dev->flags&IFF_UP))
- return t;
-
- for_each_ip_tunnel_rcu(t, ipn->tunnels_l[h1])
- if (local == t->parms.iph.saddr && (t->dev->flags&IFF_UP))
- return t;
-
- for_each_ip_tunnel_rcu(t, ipn->tunnels_wc[0])
- if (t && (t->dev->flags&IFF_UP))
- return t;
- return NULL;
-}
-
-static struct ip_tunnel __rcu **__vti_bucket(struct vti_net *ipn,
- struct ip_tunnel_parm *parms)
-{
- __be32 remote = parms->iph.daddr;
- __be32 local = parms->iph.saddr;
- unsigned h = 0;
- int prio = 0;
-
- if (remote) {
- prio |= 2;
- h ^= HASH(remote);
- }
- if (local) {
- prio |= 1;
- h ^= HASH(local);
- }
- return &ipn->tunnels[prio][h];
-}
-
-static inline struct ip_tunnel __rcu **vti_bucket(struct vti_net *ipn,
- struct ip_tunnel *t)
-{
- return __vti_bucket(ipn, &t->parms);
-}
-
-static void vti_tunnel_unlink(struct vti_net *ipn, struct ip_tunnel *t)
-{
- struct ip_tunnel __rcu **tp;
- struct ip_tunnel *iter;
-
- for (tp = vti_bucket(ipn, t);
- (iter = rtnl_dereference(*tp)) != NULL;
- tp = &iter->next) {
- if (t == iter) {
- rcu_assign_pointer(*tp, t->next);
- break;
- }
- }
-}
-
-static void vti_tunnel_link(struct vti_net *ipn, struct ip_tunnel *t)
-{
- struct ip_tunnel __rcu **tp = vti_bucket(ipn, t);
-
- rcu_assign_pointer(t->next, rtnl_dereference(*tp));
- rcu_assign_pointer(*tp, t);
-}
-
-static struct ip_tunnel *vti_tunnel_locate(struct net *net,
- struct ip_tunnel_parm *parms,
- int create)
-{
- __be32 remote = parms->iph.daddr;
- __be32 local = parms->iph.saddr;
- struct ip_tunnel *t, *nt;
- struct ip_tunnel __rcu **tp;
- struct net_device *dev;
- char name[IFNAMSIZ];
- struct vti_net *ipn = net_generic(net, vti_net_id);
-
- for (tp = __vti_bucket(ipn, parms);
- (t = rtnl_dereference(*tp)) != NULL;
- tp = &t->next) {
- if (local == t->parms.iph.saddr && remote == t->parms.iph.daddr)
- return t;
- }
- if (!create)
- return NULL;
-
- if (parms->name[0])
- strlcpy(name, parms->name, IFNAMSIZ);
- else
- strcpy(name, "vti%d");
-
- dev = alloc_netdev(sizeof(*t), name, vti_tunnel_setup);
- if (dev == NULL)
- return NULL;
-
- dev_net_set(dev, net);
-
- nt = netdev_priv(dev);
- nt->parms = *parms;
- dev->rtnl_link_ops = &vti_link_ops;
-
- vti_tunnel_bind_dev(dev);
-
- if (register_netdevice(dev) < 0)
- goto failed_free;
-
- dev_hold(dev);
- vti_tunnel_link(ipn, nt);
- return nt;
-
-failed_free:
- free_netdev(dev);
- return NULL;
-}
-
-static void vti_tunnel_uninit(struct net_device *dev)
-{
- struct net *net = dev_net(dev);
- struct vti_net *ipn = net_generic(net, vti_net_id);
-
- vti_tunnel_unlink(ipn, netdev_priv(dev));
- dev_put(dev);
-}
static int vti_err(struct sk_buff *skb, u32 info)
{
@@ -222,6 +56,8 @@ static int vti_err(struct sk_buff *skb, u32 info)
* 8 bytes of packet payload. It means, that precise relaying of
* ICMP in the real Internet is absolutely infeasible.
*/
+ struct net *net = dev_net(skb->dev);
+ struct ip_tunnel_net *itn = net_generic(net, vti_net_id);
struct iphdr *iph = (struct iphdr *)skb->data;
const int type = icmp_hdr(skb)->type;
const int code = icmp_hdr(skb)->code;
@@ -252,7 +88,8 @@ static int vti_err(struct sk_buff *skb, u32 info)
err = -ENOENT;
- t = vti_tunnel_lookup(dev_net(skb->dev), iph->daddr, iph->saddr);
+ t = ip_tunnel_lookup(itn, skb->dev->ifindex, TUNNEL_NO_KEY,
+ iph->daddr, iph->saddr, 0);
if (t == NULL)
goto out;
@@ -281,12 +118,24 @@ static int vti_rcv(struct sk_buff *skb)
{
struct ip_tunnel *tunnel;
const struct iphdr *iph = ip_hdr(skb);
+ struct net *net = dev_net(skb->dev);
+ struct ip_tunnel_net *itn = net_generic(net, vti_net_id);
- tunnel = vti_tunnel_lookup(dev_net(skb->dev), iph->saddr, iph->daddr);
+ tunnel = ip_tunnel_lookup(itn, skb->dev->ifindex, TUNNEL_NO_KEY,
+ iph->saddr, iph->daddr, 0);
if (tunnel != NULL) {
struct pcpu_tstats *tstats;
+ u32 oldmark = skb->mark;
+ int ret;
- if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb))
+
+ /* temporarily mark the skb with the tunnel o_key, to
+ * only match policies with this mark.
+ */
+ skb->mark = be32_to_cpu(tunnel->parms.o_key);
+ ret = xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb);
+ skb->mark = oldmark;
+ if (!ret)
return -1;
tstats = this_cpu_ptr(tunnel->dev->tstats);
@@ -295,7 +144,6 @@ static int vti_rcv(struct sk_buff *skb)
tstats->rx_bytes += skb->len;
u64_stats_update_end(&tstats->syncp);
- skb->mark = 0;
secpath_reset(skb);
skb->dev = tunnel->dev;
return 1;
@@ -311,7 +159,6 @@ static int vti_rcv(struct sk_buff *skb)
static netdev_tx_t vti_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
{
struct ip_tunnel *tunnel = netdev_priv(dev);
- struct pcpu_tstats *tstats;
struct iphdr *tiph = &tunnel->parms.iph;
u8 tos;
struct rtable *rt; /* Route to the other host */
@@ -319,6 +166,7 @@ static netdev_tx_t vti_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
struct iphdr *old_iph = ip_hdr(skb);
__be32 dst = tiph->daddr;
struct flowi4 fl4;
+ int err;
if (skb->protocol != htons(ETH_P_IP))
goto tx_error;
@@ -327,7 +175,7 @@ static netdev_tx_t vti_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
memset(&fl4, 0, sizeof(fl4));
flowi4_init_output(&fl4, tunnel->parms.link,
- be32_to_cpu(tunnel->parms.i_key), RT_TOS(tos),
+ be32_to_cpu(tunnel->parms.o_key), RT_TOS(tos),
RT_SCOPE_UNIVERSE,
IPPROTO_IPIP, 0,
dst, tiph->saddr, 0, 0);
@@ -367,8 +215,10 @@ static netdev_tx_t vti_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
nf_reset(skb);
skb->dev = skb_dst(skb)->dev;
- tstats = this_cpu_ptr(dev->tstats);
- VTI_XMIT(tstats, &dev->stats);
+ err = dst_output(skb);
+ if (net_xmit_eval(err) == 0)
+ err = skb->len;
+ iptunnel_xmit_stats(err, &dev->stats, dev->tstats);
return NETDEV_TX_OK;
tx_error_icmp:
@@ -379,198 +229,57 @@ tx_error:
return NETDEV_TX_OK;
}
-static int vti_tunnel_bind_dev(struct net_device *dev)
-{
- struct net_device *tdev = NULL;
- struct ip_tunnel *tunnel;
- struct iphdr *iph;
-
- tunnel = netdev_priv(dev);
- iph = &tunnel->parms.iph;
-
- if (iph->daddr) {
- struct rtable *rt;
- struct flowi4 fl4;
- memset(&fl4, 0, sizeof(fl4));
- flowi4_init_output(&fl4, tunnel->parms.link,
- be32_to_cpu(tunnel->parms.i_key),
- RT_TOS(iph->tos), RT_SCOPE_UNIVERSE,
- IPPROTO_IPIP, 0,
- iph->daddr, iph->saddr, 0, 0);
- rt = ip_route_output_key(dev_net(dev), &fl4);
- if (!IS_ERR(rt)) {
- tdev = rt->dst.dev;
- ip_rt_put(rt);
- }
- dev->flags |= IFF_POINTOPOINT;
- }
-
- if (!tdev && tunnel->parms.link)
- tdev = __dev_get_by_index(dev_net(dev), tunnel->parms.link);
-
- if (tdev) {
- dev->hard_header_len = tdev->hard_header_len +
- sizeof(struct iphdr);
- dev->mtu = tdev->mtu;
- }
- dev->iflink = tunnel->parms.link;
- return dev->mtu;
-}
-
static int
vti_tunnel_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
{
int err = 0;
struct ip_tunnel_parm p;
- struct ip_tunnel *t;
- struct net *net = dev_net(dev);
- struct vti_net *ipn = net_generic(net, vti_net_id);
-
- switch (cmd) {
- case SIOCGETTUNNEL:
- t = NULL;
- if (dev == ipn->fb_tunnel_dev) {
- if (copy_from_user(&p, ifr->ifr_ifru.ifru_data,
- sizeof(p))) {
- err = -EFAULT;
- break;
- }
- t = vti_tunnel_locate(net, &p, 0);
- }
- if (t == NULL)
- t = netdev_priv(dev);
- memcpy(&p, &t->parms, sizeof(p));
- p.i_flags |= GRE_KEY | VTI_ISVTI;
- p.o_flags |= GRE_KEY;
- if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof(p)))
- err = -EFAULT;
- break;
- case SIOCADDTUNNEL:
- case SIOCCHGTUNNEL:
- err = -EPERM;
- if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
- goto done;
+ if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
+ return -EFAULT;
- err = -EFAULT;
- if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
- goto done;
-
- err = -EINVAL;
+ if (cmd == SIOCADDTUNNEL || cmd == SIOCCHGTUNNEL) {
if (p.iph.version != 4 || p.iph.protocol != IPPROTO_IPIP ||
p.iph.ihl != 5)
- goto done;
-
- t = vti_tunnel_locate(net, &p, cmd == SIOCADDTUNNEL);
-
- if (dev != ipn->fb_tunnel_dev && cmd == SIOCCHGTUNNEL) {
- if (t != NULL) {
- if (t->dev != dev) {
- err = -EEXIST;
- break;
- }
- } else {
- if (((dev->flags&IFF_POINTOPOINT) &&
- !p.iph.daddr) ||
- (!(dev->flags&IFF_POINTOPOINT) &&
- p.iph.daddr)) {
- err = -EINVAL;
- break;
- }
- t = netdev_priv(dev);
- vti_tunnel_unlink(ipn, t);
- synchronize_net();
- t->parms.iph.saddr = p.iph.saddr;
- t->parms.iph.daddr = p.iph.daddr;
- t->parms.i_key = p.i_key;
- t->parms.o_key = p.o_key;
- t->parms.iph.protocol = IPPROTO_IPIP;
- memcpy(dev->dev_addr, &p.iph.saddr, 4);
- memcpy(dev->broadcast, &p.iph.daddr, 4);
- vti_tunnel_link(ipn, t);
- netdev_state_change(dev);
- }
- }
-
- if (t) {
- err = 0;
- if (cmd == SIOCCHGTUNNEL) {
- t->parms.i_key = p.i_key;
- t->parms.o_key = p.o_key;
- if (t->parms.link != p.link) {
- t->parms.link = p.link;
- vti_tunnel_bind_dev(dev);
- netdev_state_change(dev);
- }
- }
- p.i_flags |= GRE_KEY | VTI_ISVTI;
- p.o_flags |= GRE_KEY;
- if (copy_to_user(ifr->ifr_ifru.ifru_data, &t->parms,
- sizeof(p)))
- err = -EFAULT;
- } else
- err = (cmd == SIOCADDTUNNEL ? -ENOBUFS : -ENOENT);
- break;
+ return -EINVAL;
+ }
- case SIOCDELTUNNEL:
- err = -EPERM;
- if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
- goto done;
-
- if (dev == ipn->fb_tunnel_dev) {
- err = -EFAULT;
- if (copy_from_user(&p, ifr->ifr_ifru.ifru_data,
- sizeof(p)))
- goto done;
- err = -ENOENT;
-
- t = vti_tunnel_locate(net, &p, 0);
- if (t == NULL)
- goto done;
- err = -EPERM;
- if (t->dev == ipn->fb_tunnel_dev)
- goto done;
- dev = t->dev;
- }
- unregister_netdevice(dev);
- err = 0;
- break;
+ err = ip_tunnel_ioctl(dev, &p, cmd);
+ if (err)
+ return err;
- default:
- err = -EINVAL;
+ if (cmd != SIOCDELTUNNEL) {
+ p.i_flags |= GRE_KEY | VTI_ISVTI;
+ p.o_flags |= GRE_KEY;
}
-done:
- return err;
-}
-
-static int vti_tunnel_change_mtu(struct net_device *dev, int new_mtu)
-{
- if (new_mtu < 68 || new_mtu > 0xFFF8)
- return -EINVAL;
- dev->mtu = new_mtu;
+ if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof(p)))
+ return -EFAULT;
return 0;
}
static const struct net_device_ops vti_netdev_ops = {
.ndo_init = vti_tunnel_init,
- .ndo_uninit = vti_tunnel_uninit,
+ .ndo_uninit = ip_tunnel_uninit,
.ndo_start_xmit = vti_tunnel_xmit,
.ndo_do_ioctl = vti_tunnel_ioctl,
- .ndo_change_mtu = vti_tunnel_change_mtu,
+ .ndo_change_mtu = ip_tunnel_change_mtu,
.ndo_get_stats64 = ip_tunnel_get_stats64,
};
-static void vti_dev_free(struct net_device *dev)
+static void vti_tunnel_setup(struct net_device *dev)
{
- free_percpu(dev->tstats);
- free_netdev(dev);
+ dev->netdev_ops = &vti_netdev_ops;
+ ip_tunnel_setup(dev, vti_net_id);
}
-static void vti_tunnel_setup(struct net_device *dev)
+static int vti_tunnel_init(struct net_device *dev)
{
- dev->netdev_ops = &vti_netdev_ops;
- dev->destructor = vti_dev_free;
+ struct ip_tunnel *tunnel = netdev_priv(dev);
+ struct iphdr *iph = &tunnel->parms.iph;
+
+ memcpy(dev->dev_addr, &iph->saddr, 4);
+ memcpy(dev->broadcast, &iph->daddr, 4);
dev->type = ARPHRD_TUNNEL;
dev->hard_header_len = LL_MAX_HEADER + sizeof(struct iphdr);
@@ -581,38 +290,18 @@ static void vti_tunnel_setup(struct net_device *dev)
dev->features |= NETIF_F_NETNS_LOCAL;
dev->features |= NETIF_F_LLTX;
dev->priv_flags &= ~IFF_XMIT_DST_RELEASE;
-}
-static int vti_tunnel_init(struct net_device *dev)
-{
- struct ip_tunnel *tunnel = netdev_priv(dev);
-
- tunnel->dev = dev;
- strcpy(tunnel->parms.name, dev->name);
-
- memcpy(dev->dev_addr, &tunnel->parms.iph.saddr, 4);
- memcpy(dev->broadcast, &tunnel->parms.iph.daddr, 4);
-
- dev->tstats = alloc_percpu(struct pcpu_tstats);
- if (!dev->tstats)
- return -ENOMEM;
-
- return 0;
+ return ip_tunnel_init(dev);
}
-static int __net_init vti_fb_tunnel_init(struct net_device *dev)
+static void __net_init vti_fb_tunnel_init(struct net_device *dev)
{
struct ip_tunnel *tunnel = netdev_priv(dev);
struct iphdr *iph = &tunnel->parms.iph;
- struct vti_net *ipn = net_generic(dev_net(dev), vti_net_id);
iph->version = 4;
iph->protocol = IPPROTO_IPIP;
iph->ihl = 5;
-
- dev_hold(dev);
- rcu_assign_pointer(ipn->tunnels_wc[0], tunnel);
- return 0;
}
static struct xfrm_tunnel vti_handler __read_mostly = {
@@ -621,76 +310,30 @@ static struct xfrm_tunnel vti_handler __read_mostly = {
.priority = 1,
};
-static void vti_destroy_tunnels(struct vti_net *ipn, struct list_head *head)
-{
- int prio;
-
- for (prio = 1; prio < 4; prio++) {
- int h;
- for (h = 0; h < HASH_SIZE; h++) {
- struct ip_tunnel *t;
-
- t = rtnl_dereference(ipn->tunnels[prio][h]);
- while (t != NULL) {
- unregister_netdevice_queue(t->dev, head);
- t = rtnl_dereference(t->next);
- }
- }
- }
-}
-
static int __net_init vti_init_net(struct net *net)
{
int err;
- struct vti_net *ipn = net_generic(net, vti_net_id);
-
- ipn->tunnels[0] = ipn->tunnels_wc;
- ipn->tunnels[1] = ipn->tunnels_l;
- ipn->tunnels[2] = ipn->tunnels_r;
- ipn->tunnels[3] = ipn->tunnels_r_l;
-
- ipn->fb_tunnel_dev = alloc_netdev(sizeof(struct ip_tunnel),
- "ip_vti0",
- vti_tunnel_setup);
- if (!ipn->fb_tunnel_dev) {
- err = -ENOMEM;
- goto err_alloc_dev;
- }
- dev_net_set(ipn->fb_tunnel_dev, net);
-
- err = vti_fb_tunnel_init(ipn->fb_tunnel_dev);
- if (err)
- goto err_reg_dev;
- ipn->fb_tunnel_dev->rtnl_link_ops = &vti_link_ops;
+ struct ip_tunnel_net *itn;
- err = register_netdev(ipn->fb_tunnel_dev);
+ err = ip_tunnel_init_net(net, vti_net_id, &vti_link_ops, "ip_vti0");
if (err)
- goto err_reg_dev;
+ return err;
+ itn = net_generic(net, vti_net_id);
+ vti_fb_tunnel_init(itn->fb_tunnel_dev);
return 0;
-
-err_reg_dev:
- vti_dev_free(ipn->fb_tunnel_dev);
-err_alloc_dev:
- /* nothing */
- return err;
}
static void __net_exit vti_exit_net(struct net *net)
{
- struct vti_net *ipn = net_generic(net, vti_net_id);
- LIST_HEAD(list);
-
- rtnl_lock();
- vti_destroy_tunnels(ipn, &list);
- unregister_netdevice_many(&list);
- rtnl_unlock();
+ struct ip_tunnel_net *itn = net_generic(net, vti_net_id);
+ ip_tunnel_delete_net(itn, &vti_link_ops);
}
static struct pernet_operations vti_net_ops = {
.init = vti_init_net,
.exit = vti_exit_net,
.id = &vti_net_id,
- .size = sizeof(struct vti_net),
+ .size = sizeof(struct ip_tunnel_net),
};
static int vti_tunnel_validate(struct nlattr *tb[], struct nlattr *data[])
@@ -728,78 +371,19 @@ static void vti_netlink_parms(struct nlattr *data[],
static int vti_newlink(struct net *src_net, struct net_device *dev,
struct nlattr *tb[], struct nlattr *data[])
{
- struct ip_tunnel *nt;
- struct net *net = dev_net(dev);
- struct vti_net *ipn = net_generic(net, vti_net_id);
- int mtu;
- int err;
-
- nt = netdev_priv(dev);
- vti_netlink_parms(data, &nt->parms);
-
- if (vti_tunnel_locate(net, &nt->parms, 0))
- return -EEXIST;
+ struct ip_tunnel_parm parms;
- mtu = vti_tunnel_bind_dev(dev);
- if (!tb[IFLA_MTU])
- dev->mtu = mtu;
-
- err = register_netdevice(dev);
- if (err)
- goto out;
-
- dev_hold(dev);
- vti_tunnel_link(ipn, nt);
-
-out:
- return err;
+ vti_netlink_parms(data, &parms);
+ return ip_tunnel_newlink(dev, tb, &parms);
}
static int vti_changelink(struct net_device *dev, struct nlattr *tb[],
struct nlattr *data[])
{
- struct ip_tunnel *t, *nt;
- struct net *net = dev_net(dev);
- struct vti_net *ipn = net_generic(net, vti_net_id);
struct ip_tunnel_parm p;
- int mtu;
-
- if (dev == ipn->fb_tunnel_dev)
- return -EINVAL;
- nt = netdev_priv(dev);
vti_netlink_parms(data, &p);
-
- t = vti_tunnel_locate(net, &p, 0);
-
- if (t) {
- if (t->dev != dev)
- return -EEXIST;
- } else {
- t = nt;
-
- vti_tunnel_unlink(ipn, t);
- t->parms.iph.saddr = p.iph.saddr;
- t->parms.iph.daddr = p.iph.daddr;
- t->parms.i_key = p.i_key;
- t->parms.o_key = p.o_key;
- if (dev->type != ARPHRD_ETHER) {
- memcpy(dev->dev_addr, &p.iph.saddr, 4);
- memcpy(dev->broadcast, &p.iph.daddr, 4);
- }
- vti_tunnel_link(ipn, t);
- netdev_state_change(dev);
- }
-
- if (t->parms.link != p.link) {
- t->parms.link = p.link;
- mtu = vti_tunnel_bind_dev(dev);
- if (!tb[IFLA_MTU])
- dev->mtu = mtu;
- netdev_state_change(dev);
- }
-
- return 0;
+ return ip_tunnel_changelink(dev, tb, &p);
}
static size_t vti_get_size(const struct net_device *dev)
@@ -865,7 +449,7 @@ static int __init vti_init(void)
err = xfrm4_mode_tunnel_input_register(&vti_handler);
if (err < 0) {
unregister_pernet_device(&vti_net_ops);
- pr_info(KERN_INFO "vti init: can't register tunnel\n");
+ pr_info("vti init: can't register tunnel\n");
}
err = rtnl_link_register(&vti_link_ops);
diff --git a/net/ipv4/ipip.c b/net/ipv4/ipip.c
index b3ac3c3f6219..7f80fb4b82d3 100644
--- a/net/ipv4/ipip.c
+++ b/net/ipv4/ipip.c
@@ -285,7 +285,6 @@ static void ipip_tunnel_setup(struct net_device *dev)
dev->flags = IFF_NOARP;
dev->iflink = 0;
dev->addr_len = 4;
- dev->features |= NETIF_F_NETNS_LOCAL;
dev->features |= NETIF_F_LLTX;
dev->priv_flags &= ~IFF_XMIT_DST_RELEASE;
@@ -436,7 +435,7 @@ static int __net_init ipip_init_net(struct net *net)
static void __net_exit ipip_exit_net(struct net *net)
{
struct ip_tunnel_net *itn = net_generic(net, ipip_net_id);
- ip_tunnel_delete_net(itn);
+ ip_tunnel_delete_net(itn, &ipip_link_ops);
}
static struct pernet_operations ipip_net_ops = {
diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c
index 132a09664704..62212c772a4b 100644
--- a/net/ipv4/ipmr.c
+++ b/net/ipv4/ipmr.c
@@ -127,9 +127,9 @@ static struct kmem_cache *mrt_cachep __read_mostly;
static struct mr_table *ipmr_new_table(struct net *net, u32 id);
static void ipmr_free_table(struct mr_table *mrt);
-static int ip_mr_forward(struct net *net, struct mr_table *mrt,
- struct sk_buff *skb, struct mfc_cache *cache,
- int local);
+static void ip_mr_forward(struct net *net, struct mr_table *mrt,
+ struct sk_buff *skb, struct mfc_cache *cache,
+ int local);
static int ipmr_cache_report(struct mr_table *mrt,
struct sk_buff *pkt, vifi_t vifi, int assert);
static int __ipmr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb,
@@ -1658,7 +1658,7 @@ static void ip_encap(struct sk_buff *skb, __be32 saddr, __be32 daddr)
iph->protocol = IPPROTO_IPIP;
iph->ihl = 5;
iph->tot_len = htons(skb->len);
- ip_select_ident(iph, skb_dst(skb), NULL);
+ ip_select_ident(skb, skb_dst(skb), NULL);
ip_send_check(iph);
memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
@@ -1795,9 +1795,9 @@ static int ipmr_find_vif(struct mr_table *mrt, struct net_device *dev)
/* "local" means that we should preserve one skb (for local delivery) */
-static int ip_mr_forward(struct net *net, struct mr_table *mrt,
- struct sk_buff *skb, struct mfc_cache *cache,
- int local)
+static void ip_mr_forward(struct net *net, struct mr_table *mrt,
+ struct sk_buff *skb, struct mfc_cache *cache,
+ int local)
{
int psend = -1;
int vif, ct;
@@ -1903,14 +1903,13 @@ last_forward:
ipmr_queue_xmit(net, mrt, skb2, cache, psend);
} else {
ipmr_queue_xmit(net, mrt, skb, cache, psend);
- return 0;
+ return;
}
}
dont_forward:
if (!local)
kfree_skb(skb);
- return 0;
}
static struct mr_table *ipmr_rt_fib_lookup(struct net *net, struct sk_buff *skb)
@@ -2068,9 +2067,8 @@ static int __pim_rcv(struct mr_table *mrt, struct sk_buff *skb,
skb_reset_network_header(skb);
skb->protocol = htons(ETH_P_IP);
skb->ip_summed = CHECKSUM_NONE;
- skb->pkt_type = PACKET_HOST;
- skb_tunnel_rx(skb, reg_dev);
+ skb_tunnel_rx(skb, reg_dev, dev_net(reg_dev));
netif_rx(skb);
diff --git a/net/ipv4/netfilter/Kconfig b/net/ipv4/netfilter/Kconfig
index 4e9028017428..1657e39b291f 100644
--- a/net/ipv4/netfilter/Kconfig
+++ b/net/ipv4/netfilter/Kconfig
@@ -110,6 +110,19 @@ config IP_NF_TARGET_REJECT
To compile it as a module, choose M here. If unsure, say N.
+config IP_NF_TARGET_SYNPROXY
+ tristate "SYNPROXY target support"
+ depends on NF_CONNTRACK && NETFILTER_ADVANCED
+ select NETFILTER_SYNPROXY
+ select SYN_COOKIES
+ help
+ The SYNPROXY target allows you to intercept TCP connections and
+ establish them using syncookies before they are passed on to the
+ server. This allows to avoid conntrack and server resource usage
+ during SYN-flood attacks.
+
+ To compile it as a module, choose M here. If unsure, say N.
+
config IP_NF_TARGET_ULOG
tristate "ULOG target support (obsolete)"
default m if NETFILTER_ADVANCED=n
diff --git a/net/ipv4/netfilter/Makefile b/net/ipv4/netfilter/Makefile
index 007b128eecc9..3622b248b6dd 100644
--- a/net/ipv4/netfilter/Makefile
+++ b/net/ipv4/netfilter/Makefile
@@ -46,6 +46,7 @@ obj-$(CONFIG_IP_NF_TARGET_CLUSTERIP) += ipt_CLUSTERIP.o
obj-$(CONFIG_IP_NF_TARGET_ECN) += ipt_ECN.o
obj-$(CONFIG_IP_NF_TARGET_MASQUERADE) += ipt_MASQUERADE.o
obj-$(CONFIG_IP_NF_TARGET_REJECT) += ipt_REJECT.o
+obj-$(CONFIG_IP_NF_TARGET_SYNPROXY) += ipt_SYNPROXY.o
obj-$(CONFIG_IP_NF_TARGET_ULOG) += ipt_ULOG.o
# generic ARP tables
diff --git a/net/ipv4/netfilter/arptable_filter.c b/net/ipv4/netfilter/arptable_filter.c
index eadab1ed6500..a865f6f94013 100644
--- a/net/ipv4/netfilter/arptable_filter.c
+++ b/net/ipv4/netfilter/arptable_filter.c
@@ -48,7 +48,7 @@ static int __net_init arptable_filter_net_init(struct net *net)
net->ipv4.arptable_filter =
arpt_register_table(net, &packet_filter, repl);
kfree(repl);
- return PTR_RET(net->ipv4.arptable_filter);
+ return PTR_ERR_OR_ZERO(net->ipv4.arptable_filter);
}
static void __net_exit arptable_filter_net_exit(struct net *net)
diff --git a/net/ipv4/netfilter/ipt_MASQUERADE.c b/net/ipv4/netfilter/ipt_MASQUERADE.c
index 30e4de940567..00352ce0f0de 100644
--- a/net/ipv4/netfilter/ipt_MASQUERADE.c
+++ b/net/ipv4/netfilter/ipt_MASQUERADE.c
@@ -118,7 +118,7 @@ static int masq_device_event(struct notifier_block *this,
NF_CT_ASSERT(dev->ifindex != 0);
nf_ct_iterate_cleanup(net, device_cmp,
- (void *)(long)dev->ifindex);
+ (void *)(long)dev->ifindex, 0, 0);
}
return NOTIFY_DONE;
diff --git a/net/ipv4/netfilter/ipt_REJECT.c b/net/ipv4/netfilter/ipt_REJECT.c
index 04b18c1ac345..b969131ad1c1 100644
--- a/net/ipv4/netfilter/ipt_REJECT.c
+++ b/net/ipv4/netfilter/ipt_REJECT.c
@@ -119,7 +119,26 @@ static void send_reset(struct sk_buff *oldskb, int hook)
nf_ct_attach(nskb, oldskb);
- ip_local_out(nskb);
+#ifdef CONFIG_BRIDGE_NETFILTER
+ /* If we use ip_local_out for bridged traffic, the MAC source on
+ * the RST will be ours, instead of the destination's. This confuses
+ * some routers/firewalls, and they drop the packet. So we need to
+ * build the eth header using the original destination's MAC as the
+ * source, and send the RST packet directly.
+ */
+ if (oldskb->nf_bridge) {
+ struct ethhdr *oeth = eth_hdr(oldskb);
+ nskb->dev = oldskb->nf_bridge->physindev;
+ niph->tot_len = htons(nskb->len);
+ ip_send_check(niph);
+ if (dev_hard_header(nskb, nskb->dev, ntohs(nskb->protocol),
+ oeth->h_source, oeth->h_dest, nskb->len) < 0)
+ goto free_nskb;
+ dev_queue_xmit(nskb);
+ } else
+#endif
+ ip_local_out(nskb);
+
return;
free_nskb:
diff --git a/net/ipv4/netfilter/ipt_SYNPROXY.c b/net/ipv4/netfilter/ipt_SYNPROXY.c
new file mode 100644
index 000000000000..b6346bf2fde3
--- /dev/null
+++ b/net/ipv4/netfilter/ipt_SYNPROXY.c
@@ -0,0 +1,480 @@
+/*
+ * Copyright (c) 2013 Patrick McHardy <kaber@trash.net>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/module.h>
+#include <linux/skbuff.h>
+#include <net/tcp.h>
+
+#include <linux/netfilter_ipv4/ip_tables.h>
+#include <linux/netfilter/x_tables.h>
+#include <linux/netfilter/xt_SYNPROXY.h>
+#include <net/netfilter/nf_conntrack.h>
+#include <net/netfilter/nf_conntrack_seqadj.h>
+#include <net/netfilter/nf_conntrack_synproxy.h>
+
+static struct iphdr *
+synproxy_build_ip(struct sk_buff *skb, u32 saddr, u32 daddr)
+{
+ struct iphdr *iph;
+
+ skb_reset_network_header(skb);
+ iph = (struct iphdr *)skb_put(skb, sizeof(*iph));
+ iph->version = 4;
+ iph->ihl = sizeof(*iph) / 4;
+ iph->tos = 0;
+ iph->id = 0;
+ iph->frag_off = htons(IP_DF);
+ iph->ttl = sysctl_ip_default_ttl;
+ iph->protocol = IPPROTO_TCP;
+ iph->check = 0;
+ iph->saddr = saddr;
+ iph->daddr = daddr;
+
+ return iph;
+}
+
+static void
+synproxy_send_tcp(const struct sk_buff *skb, struct sk_buff *nskb,
+ struct nf_conntrack *nfct, enum ip_conntrack_info ctinfo,
+ struct iphdr *niph, struct tcphdr *nth,
+ unsigned int tcp_hdr_size)
+{
+ nth->check = ~tcp_v4_check(tcp_hdr_size, niph->saddr, niph->daddr, 0);
+ nskb->ip_summed = CHECKSUM_PARTIAL;
+ nskb->csum_start = (unsigned char *)nth - nskb->head;
+ nskb->csum_offset = offsetof(struct tcphdr, check);
+
+ skb_dst_set_noref(nskb, skb_dst(skb));
+ nskb->protocol = htons(ETH_P_IP);
+ if (ip_route_me_harder(nskb, RTN_UNSPEC))
+ goto free_nskb;
+
+ if (nfct) {
+ nskb->nfct = nfct;
+ nskb->nfctinfo = ctinfo;
+ nf_conntrack_get(nfct);
+ }
+
+ ip_local_out(nskb);
+ return;
+
+free_nskb:
+ kfree_skb(nskb);
+}
+
+static void
+synproxy_send_client_synack(const struct sk_buff *skb, const struct tcphdr *th,
+ const struct synproxy_options *opts)
+{
+ struct sk_buff *nskb;
+ struct iphdr *iph, *niph;
+ struct tcphdr *nth;
+ unsigned int tcp_hdr_size;
+ u16 mss = opts->mss;
+
+ iph = ip_hdr(skb);
+
+ tcp_hdr_size = sizeof(*nth) + synproxy_options_size(opts);
+ nskb = alloc_skb(sizeof(*niph) + tcp_hdr_size + MAX_TCP_HEADER,
+ GFP_ATOMIC);
+ if (nskb == NULL)
+ return;
+ skb_reserve(nskb, MAX_TCP_HEADER);
+
+ niph = synproxy_build_ip(nskb, iph->daddr, iph->saddr);
+
+ skb_reset_transport_header(nskb);
+ nth = (struct tcphdr *)skb_put(nskb, tcp_hdr_size);
+ nth->source = th->dest;
+ nth->dest = th->source;
+ nth->seq = htonl(__cookie_v4_init_sequence(iph, th, &mss));
+ nth->ack_seq = htonl(ntohl(th->seq) + 1);
+ tcp_flag_word(nth) = TCP_FLAG_SYN | TCP_FLAG_ACK;
+ if (opts->options & XT_SYNPROXY_OPT_ECN)
+ tcp_flag_word(nth) |= TCP_FLAG_ECE;
+ nth->doff = tcp_hdr_size / 4;
+ nth->window = 0;
+ nth->check = 0;
+ nth->urg_ptr = 0;
+
+ synproxy_build_options(nth, opts);
+
+ synproxy_send_tcp(skb, nskb, skb->nfct, IP_CT_ESTABLISHED_REPLY,
+ niph, nth, tcp_hdr_size);
+}
+
+static void
+synproxy_send_server_syn(const struct synproxy_net *snet,
+ const struct sk_buff *skb, const struct tcphdr *th,
+ const struct synproxy_options *opts, u32 recv_seq)
+{
+ struct sk_buff *nskb;
+ struct iphdr *iph, *niph;
+ struct tcphdr *nth;
+ unsigned int tcp_hdr_size;
+
+ iph = ip_hdr(skb);
+
+ tcp_hdr_size = sizeof(*nth) + synproxy_options_size(opts);
+ nskb = alloc_skb(sizeof(*niph) + tcp_hdr_size + MAX_TCP_HEADER,
+ GFP_ATOMIC);
+ if (nskb == NULL)
+ return;
+ skb_reserve(nskb, MAX_TCP_HEADER);
+
+ niph = synproxy_build_ip(nskb, iph->saddr, iph->daddr);
+
+ skb_reset_transport_header(nskb);
+ nth = (struct tcphdr *)skb_put(nskb, tcp_hdr_size);
+ nth->source = th->source;
+ nth->dest = th->dest;
+ nth->seq = htonl(recv_seq - 1);
+ /* ack_seq is used to relay our ISN to the synproxy hook to initialize
+ * sequence number translation once a connection tracking entry exists.
+ */
+ nth->ack_seq = htonl(ntohl(th->ack_seq) - 1);
+ tcp_flag_word(nth) = TCP_FLAG_SYN;
+ if (opts->options & XT_SYNPROXY_OPT_ECN)
+ tcp_flag_word(nth) |= TCP_FLAG_ECE | TCP_FLAG_CWR;
+ nth->doff = tcp_hdr_size / 4;
+ nth->window = th->window;
+ nth->check = 0;
+ nth->urg_ptr = 0;
+
+ synproxy_build_options(nth, opts);
+
+ synproxy_send_tcp(skb, nskb, &snet->tmpl->ct_general, IP_CT_NEW,
+ niph, nth, tcp_hdr_size);
+}
+
+static void
+synproxy_send_server_ack(const struct synproxy_net *snet,
+ const struct ip_ct_tcp *state,
+ const struct sk_buff *skb, const struct tcphdr *th,
+ const struct synproxy_options *opts)
+{
+ struct sk_buff *nskb;
+ struct iphdr *iph, *niph;
+ struct tcphdr *nth;
+ unsigned int tcp_hdr_size;
+
+ iph = ip_hdr(skb);
+
+ tcp_hdr_size = sizeof(*nth) + synproxy_options_size(opts);
+ nskb = alloc_skb(sizeof(*niph) + tcp_hdr_size + MAX_TCP_HEADER,
+ GFP_ATOMIC);
+ if (nskb == NULL)
+ return;
+ skb_reserve(nskb, MAX_TCP_HEADER);
+
+ niph = synproxy_build_ip(nskb, iph->daddr, iph->saddr);
+
+ skb_reset_transport_header(nskb);
+ nth = (struct tcphdr *)skb_put(nskb, tcp_hdr_size);
+ nth->source = th->dest;
+ nth->dest = th->source;
+ nth->seq = htonl(ntohl(th->ack_seq));
+ nth->ack_seq = htonl(ntohl(th->seq) + 1);
+ tcp_flag_word(nth) = TCP_FLAG_ACK;
+ nth->doff = tcp_hdr_size / 4;
+ nth->window = htons(state->seen[IP_CT_DIR_ORIGINAL].td_maxwin);
+ nth->check = 0;
+ nth->urg_ptr = 0;
+
+ synproxy_build_options(nth, opts);
+
+ synproxy_send_tcp(skb, nskb, NULL, 0, niph, nth, tcp_hdr_size);
+}
+
+static void
+synproxy_send_client_ack(const struct synproxy_net *snet,
+ const struct sk_buff *skb, const struct tcphdr *th,
+ const struct synproxy_options *opts)
+{
+ struct sk_buff *nskb;
+ struct iphdr *iph, *niph;
+ struct tcphdr *nth;
+ unsigned int tcp_hdr_size;
+
+ iph = ip_hdr(skb);
+
+ tcp_hdr_size = sizeof(*nth) + synproxy_options_size(opts);
+ nskb = alloc_skb(sizeof(*niph) + tcp_hdr_size + MAX_TCP_HEADER,
+ GFP_ATOMIC);
+ if (nskb == NULL)
+ return;
+ skb_reserve(nskb, MAX_TCP_HEADER);
+
+ niph = synproxy_build_ip(nskb, iph->saddr, iph->daddr);
+
+ skb_reset_transport_header(nskb);
+ nth = (struct tcphdr *)skb_put(nskb, tcp_hdr_size);
+ nth->source = th->source;
+ nth->dest = th->dest;
+ nth->seq = htonl(ntohl(th->seq) + 1);
+ nth->ack_seq = th->ack_seq;
+ tcp_flag_word(nth) = TCP_FLAG_ACK;
+ nth->doff = tcp_hdr_size / 4;
+ nth->window = ntohs(htons(th->window) >> opts->wscale);
+ nth->check = 0;
+ nth->urg_ptr = 0;
+
+ synproxy_build_options(nth, opts);
+
+ synproxy_send_tcp(skb, nskb, NULL, 0, niph, nth, tcp_hdr_size);
+}
+
+static bool
+synproxy_recv_client_ack(const struct synproxy_net *snet,
+ const struct sk_buff *skb, const struct tcphdr *th,
+ struct synproxy_options *opts, u32 recv_seq)
+{
+ int mss;
+
+ mss = __cookie_v4_check(ip_hdr(skb), th, ntohl(th->ack_seq) - 1);
+ if (mss == 0) {
+ this_cpu_inc(snet->stats->cookie_invalid);
+ return false;
+ }
+
+ this_cpu_inc(snet->stats->cookie_valid);
+ opts->mss = mss;
+
+ if (opts->options & XT_SYNPROXY_OPT_TIMESTAMP)
+ synproxy_check_timestamp_cookie(opts);
+
+ synproxy_send_server_syn(snet, skb, th, opts, recv_seq);
+ return true;
+}
+
+static unsigned int
+synproxy_tg4(struct sk_buff *skb, const struct xt_action_param *par)
+{
+ const struct xt_synproxy_info *info = par->targinfo;
+ struct synproxy_net *snet = synproxy_pernet(dev_net(par->in));
+ struct synproxy_options opts = {};
+ struct tcphdr *th, _th;
+
+ if (nf_ip_checksum(skb, par->hooknum, par->thoff, IPPROTO_TCP))
+ return NF_DROP;
+
+ th = skb_header_pointer(skb, par->thoff, sizeof(_th), &_th);
+ if (th == NULL)
+ return NF_DROP;
+
+ if (!synproxy_parse_options(skb, par->thoff, th, &opts))
+ return NF_DROP;
+
+ if (th->syn && !(th->ack || th->fin || th->rst)) {
+ /* Initial SYN from client */
+ this_cpu_inc(snet->stats->syn_received);
+
+ if (th->ece && th->cwr)
+ opts.options |= XT_SYNPROXY_OPT_ECN;
+
+ opts.options &= info->options;
+ if (opts.options & XT_SYNPROXY_OPT_TIMESTAMP)
+ synproxy_init_timestamp_cookie(info, &opts);
+ else
+ opts.options &= ~(XT_SYNPROXY_OPT_WSCALE |
+ XT_SYNPROXY_OPT_SACK_PERM |
+ XT_SYNPROXY_OPT_ECN);
+
+ synproxy_send_client_synack(skb, th, &opts);
+ return NF_DROP;
+
+ } else if (th->ack && !(th->fin || th->rst || th->syn)) {
+ /* ACK from client */
+ synproxy_recv_client_ack(snet, skb, th, &opts, ntohl(th->seq));
+ return NF_DROP;
+ }
+
+ return XT_CONTINUE;
+}
+
+static unsigned int ipv4_synproxy_hook(unsigned int hooknum,
+ struct sk_buff *skb,
+ const struct net_device *in,
+ const struct net_device *out,
+ int (*okfn)(struct sk_buff *))
+{
+ struct synproxy_net *snet = synproxy_pernet(dev_net(in ? : out));
+ enum ip_conntrack_info ctinfo;
+ struct nf_conn *ct;
+ struct nf_conn_synproxy *synproxy;
+ struct synproxy_options opts = {};
+ const struct ip_ct_tcp *state;
+ struct tcphdr *th, _th;
+ unsigned int thoff;
+
+ ct = nf_ct_get(skb, &ctinfo);
+ if (ct == NULL)
+ return NF_ACCEPT;
+
+ synproxy = nfct_synproxy(ct);
+ if (synproxy == NULL)
+ return NF_ACCEPT;
+
+ if (nf_is_loopback_packet(skb))
+ return NF_ACCEPT;
+
+ thoff = ip_hdrlen(skb);
+ th = skb_header_pointer(skb, thoff, sizeof(_th), &_th);
+ if (th == NULL)
+ return NF_DROP;
+
+ state = &ct->proto.tcp;
+ switch (state->state) {
+ case TCP_CONNTRACK_CLOSE:
+ if (th->rst && !test_bit(IPS_SEEN_REPLY_BIT, &ct->status)) {
+ nf_ct_seqadj_init(ct, ctinfo, synproxy->isn -
+ ntohl(th->seq) + 1);
+ break;
+ }
+
+ if (!th->syn || th->ack ||
+ CTINFO2DIR(ctinfo) != IP_CT_DIR_ORIGINAL)
+ break;
+
+ /* Reopened connection - reset the sequence number and timestamp
+ * adjustments, they will get initialized once the connection is
+ * reestablished.
+ */
+ nf_ct_seqadj_init(ct, ctinfo, 0);
+ synproxy->tsoff = 0;
+ this_cpu_inc(snet->stats->conn_reopened);
+
+ /* fall through */
+ case TCP_CONNTRACK_SYN_SENT:
+ if (!synproxy_parse_options(skb, thoff, th, &opts))
+ return NF_DROP;
+
+ if (!th->syn && th->ack &&
+ CTINFO2DIR(ctinfo) == IP_CT_DIR_ORIGINAL) {
+ /* Keep-Alives are sent with SEG.SEQ = SND.NXT-1,
+ * therefore we need to add 1 to make the SYN sequence
+ * number match the one of first SYN.
+ */
+ if (synproxy_recv_client_ack(snet, skb, th, &opts,
+ ntohl(th->seq) + 1))
+ this_cpu_inc(snet->stats->cookie_retrans);
+
+ return NF_DROP;
+ }
+
+ synproxy->isn = ntohl(th->ack_seq);
+ if (opts.options & XT_SYNPROXY_OPT_TIMESTAMP)
+ synproxy->its = opts.tsecr;
+ break;
+ case TCP_CONNTRACK_SYN_RECV:
+ if (!th->syn || !th->ack)
+ break;
+
+ if (!synproxy_parse_options(skb, thoff, th, &opts))
+ return NF_DROP;
+
+ if (opts.options & XT_SYNPROXY_OPT_TIMESTAMP)
+ synproxy->tsoff = opts.tsval - synproxy->its;
+
+ opts.options &= ~(XT_SYNPROXY_OPT_MSS |
+ XT_SYNPROXY_OPT_WSCALE |
+ XT_SYNPROXY_OPT_SACK_PERM);
+
+ swap(opts.tsval, opts.tsecr);
+ synproxy_send_server_ack(snet, state, skb, th, &opts);
+
+ nf_ct_seqadj_init(ct, ctinfo, synproxy->isn - ntohl(th->seq));
+
+ swap(opts.tsval, opts.tsecr);
+ synproxy_send_client_ack(snet, skb, th, &opts);
+
+ consume_skb(skb);
+ return NF_STOLEN;
+ default:
+ break;
+ }
+
+ synproxy_tstamp_adjust(skb, thoff, th, ct, ctinfo, synproxy);
+ return NF_ACCEPT;
+}
+
+static int synproxy_tg4_check(const struct xt_tgchk_param *par)
+{
+ const struct ipt_entry *e = par->entryinfo;
+
+ if (e->ip.proto != IPPROTO_TCP ||
+ e->ip.invflags & XT_INV_PROTO)
+ return -EINVAL;
+
+ return nf_ct_l3proto_try_module_get(par->family);
+}
+
+static void synproxy_tg4_destroy(const struct xt_tgdtor_param *par)
+{
+ nf_ct_l3proto_module_put(par->family);
+}
+
+static struct xt_target synproxy_tg4_reg __read_mostly = {
+ .name = "SYNPROXY",
+ .family = NFPROTO_IPV4,
+ .target = synproxy_tg4,
+ .targetsize = sizeof(struct xt_synproxy_info),
+ .checkentry = synproxy_tg4_check,
+ .destroy = synproxy_tg4_destroy,
+ .me = THIS_MODULE,
+};
+
+static struct nf_hook_ops ipv4_synproxy_ops[] __read_mostly = {
+ {
+ .hook = ipv4_synproxy_hook,
+ .owner = THIS_MODULE,
+ .pf = NFPROTO_IPV4,
+ .hooknum = NF_INET_LOCAL_IN,
+ .priority = NF_IP_PRI_CONNTRACK_CONFIRM - 1,
+ },
+ {
+ .hook = ipv4_synproxy_hook,
+ .owner = THIS_MODULE,
+ .pf = NFPROTO_IPV4,
+ .hooknum = NF_INET_POST_ROUTING,
+ .priority = NF_IP_PRI_CONNTRACK_CONFIRM - 1,
+ },
+};
+
+static int __init synproxy_tg4_init(void)
+{
+ int err;
+
+ err = nf_register_hooks(ipv4_synproxy_ops,
+ ARRAY_SIZE(ipv4_synproxy_ops));
+ if (err < 0)
+ goto err1;
+
+ err = xt_register_target(&synproxy_tg4_reg);
+ if (err < 0)
+ goto err2;
+
+ return 0;
+
+err2:
+ nf_unregister_hooks(ipv4_synproxy_ops, ARRAY_SIZE(ipv4_synproxy_ops));
+err1:
+ return err;
+}
+
+static void __exit synproxy_tg4_exit(void)
+{
+ xt_unregister_target(&synproxy_tg4_reg);
+ nf_unregister_hooks(ipv4_synproxy_ops, ARRAY_SIZE(ipv4_synproxy_ops));
+}
+
+module_init(synproxy_tg4_init);
+module_exit(synproxy_tg4_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>");
diff --git a/net/ipv4/netfilter/iptable_filter.c b/net/ipv4/netfilter/iptable_filter.c
index 6b3da5cf54e9..50af5b45c050 100644
--- a/net/ipv4/netfilter/iptable_filter.c
+++ b/net/ipv4/netfilter/iptable_filter.c
@@ -69,7 +69,7 @@ static int __net_init iptable_filter_net_init(struct net *net)
net->ipv4.iptable_filter =
ipt_register_table(net, &packet_filter, repl);
kfree(repl);
- return PTR_RET(net->ipv4.iptable_filter);
+ return PTR_ERR_OR_ZERO(net->ipv4.iptable_filter);
}
static void __net_exit iptable_filter_net_exit(struct net *net)
diff --git a/net/ipv4/netfilter/iptable_mangle.c b/net/ipv4/netfilter/iptable_mangle.c
index cba5658ec82c..0d8cd82e0fad 100644
--- a/net/ipv4/netfilter/iptable_mangle.c
+++ b/net/ipv4/netfilter/iptable_mangle.c
@@ -107,7 +107,7 @@ static int __net_init iptable_mangle_net_init(struct net *net)
net->ipv4.iptable_mangle =
ipt_register_table(net, &packet_mangler, repl);
kfree(repl);
- return PTR_RET(net->ipv4.iptable_mangle);
+ return PTR_ERR_OR_ZERO(net->ipv4.iptable_mangle);
}
static void __net_exit iptable_mangle_net_exit(struct net *net)
diff --git a/net/ipv4/netfilter/iptable_nat.c b/net/ipv4/netfilter/iptable_nat.c
index 6383273d54e1..683bfaffed65 100644
--- a/net/ipv4/netfilter/iptable_nat.c
+++ b/net/ipv4/netfilter/iptable_nat.c
@@ -292,7 +292,7 @@ static int __net_init iptable_nat_net_init(struct net *net)
return -ENOMEM;
net->ipv4.nat_table = ipt_register_table(net, &nf_nat_ipv4_table, repl);
kfree(repl);
- return PTR_RET(net->ipv4.nat_table);
+ return PTR_ERR_OR_ZERO(net->ipv4.nat_table);
}
static void __net_exit iptable_nat_net_exit(struct net *net)
diff --git a/net/ipv4/netfilter/iptable_raw.c b/net/ipv4/netfilter/iptable_raw.c
index 03d9696d3c6e..1f82aea11df6 100644
--- a/net/ipv4/netfilter/iptable_raw.c
+++ b/net/ipv4/netfilter/iptable_raw.c
@@ -48,7 +48,7 @@ static int __net_init iptable_raw_net_init(struct net *net)
net->ipv4.iptable_raw =
ipt_register_table(net, &packet_raw, repl);
kfree(repl);
- return PTR_RET(net->ipv4.iptable_raw);
+ return PTR_ERR_OR_ZERO(net->ipv4.iptable_raw);
}
static void __net_exit iptable_raw_net_exit(struct net *net)
diff --git a/net/ipv4/netfilter/iptable_security.c b/net/ipv4/netfilter/iptable_security.c
index b283d8e2601a..f867a8d38bf7 100644
--- a/net/ipv4/netfilter/iptable_security.c
+++ b/net/ipv4/netfilter/iptable_security.c
@@ -66,7 +66,7 @@ static int __net_init iptable_security_net_init(struct net *net)
net->ipv4.iptable_security =
ipt_register_table(net, &security_table, repl);
kfree(repl);
- return PTR_RET(net->ipv4.iptable_security);
+ return PTR_ERR_OR_ZERO(net->ipv4.iptable_security);
}
static void __net_exit iptable_security_net_exit(struct net *net)
diff --git a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
index 0a2e0e3e95ba..86f5b34a4ed1 100644
--- a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
+++ b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
@@ -25,6 +25,7 @@
#include <net/netfilter/nf_conntrack_l3proto.h>
#include <net/netfilter/nf_conntrack_zones.h>
#include <net/netfilter/nf_conntrack_core.h>
+#include <net/netfilter/nf_conntrack_seqadj.h>
#include <net/netfilter/ipv4/nf_conntrack_ipv4.h>
#include <net/netfilter/nf_nat_helper.h>
#include <net/netfilter/ipv4/nf_defrag_ipv4.h>
@@ -136,11 +137,7 @@ static unsigned int ipv4_confirm(unsigned int hooknum,
/* adjust seqs for loopback traffic only in outgoing direction */
if (test_bit(IPS_SEQ_ADJUST_BIT, &ct->status) &&
!nf_is_loopback_packet(skb)) {
- typeof(nf_nat_seq_adjust_hook) seq_adjust;
-
- seq_adjust = rcu_dereference(nf_nat_seq_adjust_hook);
- if (!seq_adjust ||
- !seq_adjust(skb, ct, ctinfo, ip_hdrlen(skb))) {
+ if (!nf_ct_seq_adjust(skb, ct, ctinfo, ip_hdrlen(skb))) {
NF_CT_STAT_INC_ATOMIC(nf_ct_net(ct), drop);
return NF_DROP;
}
diff --git a/net/ipv4/ping.c b/net/ipv4/ping.c
index 746427c9e719..d7d9882d4cae 100644
--- a/net/ipv4/ping.c
+++ b/net/ipv4/ping.c
@@ -1082,7 +1082,7 @@ static void ping_v4_format_sock(struct sock *sp, struct seq_file *f,
__u16 srcp = ntohs(inet->inet_sport);
seq_printf(f, "%5d: %08X:%04X %08X:%04X"
- " %02X %08X:%08X %02X:%08lX %08X %5d %8d %lu %d %pK %d%n",
+ " %02X %08X:%08X %02X:%08lX %08X %5u %8d %lu %d %pK %d%n",
bucket, src, srcp, dest, destp, sp->sk_state,
sk_wmem_alloc_get(sp),
sk_rmem_alloc_get(sp),
diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c
index 463bd1273346..4a0335854b89 100644
--- a/net/ipv4/proc.c
+++ b/net/ipv4/proc.c
@@ -111,7 +111,7 @@ static const struct snmp_mib snmp4_ipstats_list[] = {
SNMP_MIB_SENTINEL
};
-/* Following RFC4293 items are displayed in /proc/net/netstat */
+/* Following items are displayed in /proc/net/netstat */
static const struct snmp_mib snmp4_ipextstats_list[] = {
SNMP_MIB_ITEM("InNoRoutes", IPSTATS_MIB_INNOROUTES),
SNMP_MIB_ITEM("InTruncatedPkts", IPSTATS_MIB_INTRUNCATEDPKTS),
@@ -125,7 +125,12 @@ static const struct snmp_mib snmp4_ipextstats_list[] = {
SNMP_MIB_ITEM("OutMcastOctets", IPSTATS_MIB_OUTMCASTOCTETS),
SNMP_MIB_ITEM("InBcastOctets", IPSTATS_MIB_INBCASTOCTETS),
SNMP_MIB_ITEM("OutBcastOctets", IPSTATS_MIB_OUTBCASTOCTETS),
+ /* Non RFC4293 fields */
SNMP_MIB_ITEM("InCsumErrors", IPSTATS_MIB_CSUMERRORS),
+ SNMP_MIB_ITEM("InNoECTPkts", IPSTATS_MIB_NOECTPKTS),
+ SNMP_MIB_ITEM("InECT1Pkts", IPSTATS_MIB_ECT1PKTS),
+ SNMP_MIB_ITEM("InECT0Pkts", IPSTATS_MIB_ECT0PKTS),
+ SNMP_MIB_ITEM("InCEPkts", IPSTATS_MIB_CEPKTS),
SNMP_MIB_SENTINEL
};
diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c
index 61e60d67adca..193db03540ad 100644
--- a/net/ipv4/raw.c
+++ b/net/ipv4/raw.c
@@ -218,8 +218,10 @@ static void raw_err(struct sock *sk, struct sk_buff *skb, u32 info)
if (type == ICMP_DEST_UNREACH && code == ICMP_FRAG_NEEDED)
ipv4_sk_update_pmtu(skb, sk, info);
- else if (type == ICMP_REDIRECT)
+ else if (type == ICMP_REDIRECT) {
ipv4_sk_redirect(skb, sk);
+ return;
+ }
/* Report error on raw socket, if:
1. User requested ip_recverr.
@@ -387,7 +389,7 @@ static int raw_send_hdrinc(struct sock *sk, struct flowi4 *fl4,
iph->check = 0;
iph->tot_len = htons(length);
if (!iph->id)
- ip_select_ident(iph, &rt->dst, NULL);
+ ip_select_ident(skb, &rt->dst, NULL);
iph->check = ip_fast_csum((unsigned char *)iph, iph->ihl);
}
@@ -988,7 +990,7 @@ static void raw_sock_seq_show(struct seq_file *seq, struct sock *sp, int i)
srcp = inet->inet_num;
seq_printf(seq, "%4d: %08X:%04X %08X:%04X"
- " %02X %08X:%08X %02X:%08lX %08X %5d %8d %lu %d %pK %d\n",
+ " %02X %08X:%08X %02X:%08lX %08X %5u %8d %lu %d %pK %d\n",
i, src, srcp, dest, destp, sp->sk_state,
sk_wmem_alloc_get(sp),
sk_rmem_alloc_get(sp),
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index a9a54a236832..6011615e810d 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -112,7 +112,8 @@
#define RT_FL_TOS(oldflp4) \
((oldflp4)->flowi4_tos & (IPTOS_RT_MASK | RTO_ONLINK))
-#define IP_MAX_MTU 0xFFF0
+/* IPv4 datagram length is stored into 16bit field (tot_len) */
+#define IP_MAX_MTU 0xFFFF
#define RT_GC_TIMEOUT (300*HZ)
@@ -435,12 +436,12 @@ static inline int ip_rt_proc_init(void)
static inline bool rt_is_expired(const struct rtable *rth)
{
- return rth->rt_genid != rt_genid(dev_net(rth->dst.dev));
+ return rth->rt_genid != rt_genid_ipv4(dev_net(rth->dst.dev));
}
void rt_cache_flush(struct net *net)
{
- rt_genid_bump(net);
+ rt_genid_bump_ipv4(net);
}
static struct neighbour *ipv4_neigh_lookup(const struct dst_entry *dst,
@@ -1227,10 +1228,7 @@ static unsigned int ipv4_mtu(const struct dst_entry *dst)
mtu = 576;
}
- if (mtu > IP_MAX_MTU)
- mtu = IP_MAX_MTU;
-
- return mtu;
+ return min_t(unsigned int, mtu, IP_MAX_MTU);
}
static struct fib_nh_exception *find_exception(struct fib_nh *nh, __be32 daddr)
@@ -1458,7 +1456,7 @@ static int ip_route_input_mc(struct sk_buff *skb, __be32 daddr, __be32 saddr,
#endif
rth->dst.output = ip_rt_bug;
- rth->rt_genid = rt_genid(dev_net(dev));
+ rth->rt_genid = rt_genid_ipv4(dev_net(dev));
rth->rt_flags = RTCF_MULTICAST;
rth->rt_type = RTN_MULTICAST;
rth->rt_is_input= 1;
@@ -1589,7 +1587,7 @@ static int __mkroute_input(struct sk_buff *skb,
goto cleanup;
}
- rth->rt_genid = rt_genid(dev_net(rth->dst.dev));
+ rth->rt_genid = rt_genid_ipv4(dev_net(rth->dst.dev));
rth->rt_flags = flags;
rth->rt_type = res->type;
rth->rt_is_input = 1;
@@ -1760,7 +1758,7 @@ local_input:
rth->dst.tclassid = itag;
#endif
- rth->rt_genid = rt_genid(net);
+ rth->rt_genid = rt_genid_ipv4(net);
rth->rt_flags = flags|RTCF_LOCAL;
rth->rt_type = res.type;
rth->rt_is_input = 1;
@@ -1945,7 +1943,7 @@ add:
rth->dst.output = ip_output;
- rth->rt_genid = rt_genid(dev_net(dev_out));
+ rth->rt_genid = rt_genid_ipv4(dev_net(dev_out));
rth->rt_flags = flags;
rth->rt_type = type;
rth->rt_is_input = 0;
@@ -2074,7 +2072,7 @@ struct rtable *__ip_route_output_key(struct net *net, struct flowi4 *fl4)
RT_SCOPE_LINK);
goto make_route;
}
- if (fl4->saddr) {
+ if (!fl4->saddr) {
if (ipv4_is_multicast(fl4->daddr))
fl4->saddr = inet_select_addr(dev_out, 0,
fl4->flowi4_scope);
@@ -2227,7 +2225,7 @@ struct dst_entry *ipv4_blackhole_route(struct net *net, struct dst_entry *dst_or
rt->rt_iif = ort->rt_iif;
rt->rt_pmtu = ort->rt_pmtu;
- rt->rt_genid = rt_genid(net);
+ rt->rt_genid = rt_genid_ipv4(net);
rt->rt_flags = ort->rt_flags;
rt->rt_type = ort->rt_type;
rt->rt_gateway = ort->rt_gateway;
@@ -2665,7 +2663,7 @@ static __net_initdata struct pernet_operations sysctl_route_ops = {
static __net_init int rt_genid_init(struct net *net)
{
- atomic_set(&net->rt_genid, 0);
+ atomic_set(&net->ipv4.rt_genid, 0);
atomic_set(&net->fnhe_genid, 0);
get_random_bytes(&net->ipv4.dev_addr_genid,
sizeof(net->ipv4.dev_addr_genid));
diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c
index b05c96e7af8b..14a15c49129d 100644
--- a/net/ipv4/syncookies.c
+++ b/net/ipv4/syncookies.c
@@ -160,26 +160,33 @@ static __u16 const msstab[] = {
* Generate a syncookie. mssp points to the mss, which is returned
* rounded down to the value encoded in the cookie.
*/
-__u32 cookie_v4_init_sequence(struct sock *sk, struct sk_buff *skb, __u16 *mssp)
+u32 __cookie_v4_init_sequence(const struct iphdr *iph, const struct tcphdr *th,
+ u16 *mssp)
{
- const struct iphdr *iph = ip_hdr(skb);
- const struct tcphdr *th = tcp_hdr(skb);
int mssind;
const __u16 mss = *mssp;
- tcp_synq_overflow(sk);
-
for (mssind = ARRAY_SIZE(msstab) - 1; mssind ; mssind--)
if (mss >= msstab[mssind])
break;
*mssp = msstab[mssind];
- NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_SYNCOOKIESSENT);
-
return secure_tcp_syn_cookie(iph->saddr, iph->daddr,
th->source, th->dest, ntohl(th->seq),
jiffies / (HZ * 60), mssind);
}
+EXPORT_SYMBOL_GPL(__cookie_v4_init_sequence);
+
+__u32 cookie_v4_init_sequence(struct sock *sk, struct sk_buff *skb, __u16 *mssp)
+{
+ const struct iphdr *iph = ip_hdr(skb);
+ const struct tcphdr *th = tcp_hdr(skb);
+
+ tcp_synq_overflow(sk);
+ NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_SYNCOOKIESSENT);
+
+ return __cookie_v4_init_sequence(iph, th, mssp);
+}
/*
* This (misnamed) value is the age of syncookie which is permitted.
@@ -192,10 +199,9 @@ __u32 cookie_v4_init_sequence(struct sock *sk, struct sk_buff *skb, __u16 *mssp)
* Check if a ack sequence number is a valid syncookie.
* Return the decoded mss if it is, or 0 if not.
*/
-static inline int cookie_check(struct sk_buff *skb, __u32 cookie)
+int __cookie_v4_check(const struct iphdr *iph, const struct tcphdr *th,
+ u32 cookie)
{
- const struct iphdr *iph = ip_hdr(skb);
- const struct tcphdr *th = tcp_hdr(skb);
__u32 seq = ntohl(th->seq) - 1;
__u32 mssind = check_tcp_syn_cookie(cookie, iph->saddr, iph->daddr,
th->source, th->dest, seq,
@@ -204,6 +210,7 @@ static inline int cookie_check(struct sk_buff *skb, __u32 cookie)
return mssind < ARRAY_SIZE(msstab) ? msstab[mssind] : 0;
}
+EXPORT_SYMBOL_GPL(__cookie_v4_check);
static inline struct sock *get_cookie_sock(struct sock *sk, struct sk_buff *skb,
struct request_sock *req,
@@ -284,7 +291,7 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb,
goto out;
if (tcp_synq_no_recent_overflow(sk) ||
- (mss = cookie_check(skb, cookie)) == 0) {
+ (mss = __cookie_v4_check(ip_hdr(skb), th, cookie)) == 0) {
NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_SYNCOOKIESFAILED);
goto out;
}
diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c
index 610e324348d1..540279f4c531 100644
--- a/net/ipv4/sysctl_net_ipv4.c
+++ b/net/ipv4/sysctl_net_ipv4.c
@@ -29,6 +29,7 @@
static int zero;
static int one = 1;
static int four = 4;
+static int gso_max_segs = GSO_MAX_SEGS;
static int tcp_retr1_max = 255;
static int ip_local_port_range_min[] = { 1, 1 };
static int ip_local_port_range_max[] = { 65535, 65535 };
@@ -559,6 +560,13 @@ static struct ctl_table ipv4_table[] = {
.extra1 = &one,
},
{
+ .procname = "tcp_notsent_lowat",
+ .data = &sysctl_tcp_notsent_lowat,
+ .maxlen = sizeof(sysctl_tcp_notsent_lowat),
+ .mode = 0644,
+ .proc_handler = proc_dointvec,
+ },
+ {
.procname = "tcp_rmem",
.data = &sysctl_tcp_rmem,
.maxlen = sizeof(sysctl_tcp_rmem),
@@ -754,6 +762,15 @@ static struct ctl_table ipv4_table[] = {
.extra2 = &four,
},
{
+ .procname = "tcp_min_tso_segs",
+ .data = &sysctl_tcp_min_tso_segs,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec_minmax,
+ .extra1 = &zero,
+ .extra2 = &gso_max_segs,
+ },
+ {
.procname = "udp_mem",
.data = &sysctl_udp_mem,
.maxlen = sizeof(sysctl_udp_mem),
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index b2f6c74861af..6e5617b9f9db 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -283,6 +283,8 @@
int sysctl_tcp_fin_timeout __read_mostly = TCP_FIN_TIMEOUT;
+int sysctl_tcp_min_tso_segs __read_mostly = 2;
+
struct percpu_counter tcp_orphan_count;
EXPORT_SYMBOL_GPL(tcp_orphan_count);
@@ -410,10 +412,6 @@ void tcp_init_sock(struct sock *sk)
icsk->icsk_sync_mss = tcp_sync_mss;
- /* Presumed zeroed, in order of appearance:
- * cookie_in_always, cookie_out_never,
- * s_data_constant, s_data_in, s_data_out
- */
sk->sk_sndbuf = sysctl_tcp_wmem[1];
sk->sk_rcvbuf = sysctl_tcp_rmem[1];
@@ -499,7 +497,7 @@ unsigned int tcp_poll(struct file *file, struct socket *sock, poll_table *wait)
mask |= POLLIN | POLLRDNORM;
if (!(sk->sk_shutdown & SEND_SHUTDOWN)) {
- if (sk_stream_wspace(sk) >= sk_stream_min_wspace(sk)) {
+ if (sk_stream_is_writeable(sk)) {
mask |= POLLOUT | POLLWRNORM;
} else { /* send SIGIO later */
set_bit(SOCK_ASYNC_NOSPACE,
@@ -510,7 +508,7 @@ unsigned int tcp_poll(struct file *file, struct socket *sock, poll_table *wait)
* wspace test but before the flags are set,
* IO signal will be lost.
*/
- if (sk_stream_wspace(sk) >= sk_stream_min_wspace(sk))
+ if (sk_stream_is_writeable(sk))
mask |= POLLOUT | POLLWRNORM;
}
} else
@@ -789,12 +787,28 @@ static unsigned int tcp_xmit_size_goal(struct sock *sk, u32 mss_now,
xmit_size_goal = mss_now;
if (large_allowed && sk_can_gso(sk)) {
- xmit_size_goal = ((sk->sk_gso_max_size - 1) -
- inet_csk(sk)->icsk_af_ops->net_header_len -
- inet_csk(sk)->icsk_ext_hdr_len -
- tp->tcp_header_len);
+ u32 gso_size, hlen;
+
+ /* Maybe we should/could use sk->sk_prot->max_header here ? */
+ hlen = inet_csk(sk)->icsk_af_ops->net_header_len +
+ inet_csk(sk)->icsk_ext_hdr_len +
+ tp->tcp_header_len;
+
+ /* Goal is to send at least one packet per ms,
+ * not one big TSO packet every 100 ms.
+ * This preserves ACK clocking and is consistent
+ * with tcp_tso_should_defer() heuristic.
+ */
+ gso_size = sk->sk_pacing_rate / (2 * MSEC_PER_SEC);
+ gso_size = max_t(u32, gso_size,
+ sysctl_tcp_min_tso_segs * mss_now);
+
+ xmit_size_goal = min_t(u32, gso_size,
+ sk->sk_gso_max_size - 1 - hlen);
- /* TSQ : try to have two TSO segments in flight */
+ /* TSQ : try to have at least two segments in flight
+ * (one in NIC TX ring, another in Qdisc)
+ */
xmit_size_goal = min_t(u32, xmit_size_goal,
sysctl_tcp_limit_output_bytes >> 1);
@@ -2454,10 +2468,11 @@ static int do_tcp_setsockopt(struct sock *sk, int level,
case TCP_THIN_DUPACK:
if (val < 0 || val > 1)
err = -EINVAL;
- else
+ else {
tp->thin_dupack = val;
if (tp->thin_dupack)
tcp_disable_early_retrans(tp);
+ }
break;
case TCP_REPAIR:
@@ -2638,6 +2653,10 @@ static int do_tcp_setsockopt(struct sock *sk, int level,
else
tp->tsoffset = val - tcp_time_stamp;
break;
+ case TCP_NOTSENT_LOWAT:
+ tp->notsent_lowat = val;
+ sk->sk_write_space(sk);
+ break;
default:
err = -ENOPROTOOPT;
break;
@@ -2854,6 +2873,9 @@ static int do_tcp_getsockopt(struct sock *sk, int level,
case TCP_TIMESTAMP:
val = tcp_time_stamp + tp->tsoffset;
break;
+ case TCP_NOTSENT_LOWAT:
+ val = tp->notsent_lowat;
+ break;
default:
return -ENOPROTOOPT;
}
diff --git a/net/ipv4/tcp_fastopen.c b/net/ipv4/tcp_fastopen.c
index 8f7ef0ad80e5..ab7bd35bb312 100644
--- a/net/ipv4/tcp_fastopen.c
+++ b/net/ipv4/tcp_fastopen.c
@@ -58,23 +58,22 @@ error: kfree(ctx);
return err;
}
-/* Computes the fastopen cookie for the peer.
- * The peer address is a 128 bits long (pad with zeros for IPv4).
+/* Computes the fastopen cookie for the IP path.
+ * The path is a 128 bits long (pad with zeros for IPv4).
*
* The caller must check foc->len to determine if a valid cookie
* has been generated successfully.
*/
-void tcp_fastopen_cookie_gen(__be32 addr, struct tcp_fastopen_cookie *foc)
+void tcp_fastopen_cookie_gen(__be32 src, __be32 dst,
+ struct tcp_fastopen_cookie *foc)
{
- __be32 peer_addr[4] = { addr, 0, 0, 0 };
+ __be32 path[4] = { src, dst, 0, 0 };
struct tcp_fastopen_context *ctx;
rcu_read_lock();
ctx = rcu_dereference(tcp_fastopen_ctx);
if (ctx) {
- crypto_cipher_encrypt_one(ctx->tfm,
- foc->val,
- (__u8 *)peer_addr);
+ crypto_cipher_encrypt_one(ctx->tfm, foc->val, (__u8 *)path);
foc->len = TCP_FASTOPEN_COOKIE_SIZE;
}
rcu_read_unlock();
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 3ca2139a130b..a16b01b537ba 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -688,6 +688,34 @@ static void tcp_rtt_estimator(struct sock *sk, const __u32 mrtt)
}
}
+/* Set the sk_pacing_rate to allow proper sizing of TSO packets.
+ * Note: TCP stack does not yet implement pacing.
+ * FQ packet scheduler can be used to implement cheap but effective
+ * TCP pacing, to smooth the burst on large writes when packets
+ * in flight is significantly lower than cwnd (or rwin)
+ */
+static void tcp_update_pacing_rate(struct sock *sk)
+{
+ const struct tcp_sock *tp = tcp_sk(sk);
+ u64 rate;
+
+ /* set sk_pacing_rate to 200 % of current rate (mss * cwnd / srtt) */
+ rate = (u64)tp->mss_cache * 2 * (HZ << 3);
+
+ rate *= max(tp->snd_cwnd, tp->packets_out);
+
+ /* Correction for small srtt : minimum srtt being 8 (1 jiffy << 3),
+ * be conservative and assume srtt = 1 (125 us instead of 1.25 ms)
+ * We probably need usec resolution in the future.
+ * Note: This also takes care of possible srtt=0 case,
+ * when tcp_rtt_estimator() was not yet called.
+ */
+ if (tp->srtt > 8 + 2)
+ do_div(rate, tp->srtt);
+
+ sk->sk_pacing_rate = min_t(u64, rate, ~0U);
+}
+
/* Calculate rto without backoff. This is the second half of Van Jacobson's
* routine referred to above.
*/
@@ -1048,6 +1076,7 @@ struct tcp_sacktag_state {
int reord;
int fack_count;
int flag;
+ s32 rtt; /* RTT measured by SACKing never-retransmitted data */
};
/* Check if skb is fully within the SACK block. In presence of GSO skbs,
@@ -1108,7 +1137,7 @@ static int tcp_match_skb_to_sack(struct sock *sk, struct sk_buff *skb,
static u8 tcp_sacktag_one(struct sock *sk,
struct tcp_sacktag_state *state, u8 sacked,
u32 start_seq, u32 end_seq,
- bool dup_sack, int pcount)
+ int dup_sack, int pcount, u32 xmit_time)
{
struct tcp_sock *tp = tcp_sk(sk);
int fack_count = state->fack_count;
@@ -1148,6 +1177,9 @@ static u8 tcp_sacktag_one(struct sock *sk,
state->reord);
if (!after(end_seq, tp->high_seq))
state->flag |= FLAG_ORIG_SACK_ACKED;
+ /* Pick the earliest sequence sacked for RTT */
+ if (state->rtt < 0)
+ state->rtt = tcp_time_stamp - xmit_time;
}
if (sacked & TCPCB_LOST) {
@@ -1205,7 +1237,8 @@ static bool tcp_shifted_skb(struct sock *sk, struct sk_buff *skb,
* tcp_highest_sack_seq() when skb is highest_sack.
*/
tcp_sacktag_one(sk, state, TCP_SKB_CB(skb)->sacked,
- start_seq, end_seq, dup_sack, pcount);
+ start_seq, end_seq, dup_sack, pcount,
+ TCP_SKB_CB(skb)->when);
if (skb == tp->lost_skb_hint)
tp->lost_cnt_hint += pcount;
@@ -1251,7 +1284,10 @@ static bool tcp_shifted_skb(struct sock *sk, struct sk_buff *skb,
tp->lost_cnt_hint -= tcp_skb_pcount(prev);
}
- TCP_SKB_CB(skb)->tcp_flags |= TCP_SKB_CB(prev)->tcp_flags;
+ TCP_SKB_CB(prev)->tcp_flags |= TCP_SKB_CB(skb)->tcp_flags;
+ if (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN)
+ TCP_SKB_CB(prev)->end_seq++;
+
if (skb == tcp_highest_sack(sk))
tcp_advance_highest_sack(sk, skb);
@@ -1479,7 +1515,8 @@ static struct sk_buff *tcp_sacktag_walk(struct sk_buff *skb, struct sock *sk,
TCP_SKB_CB(skb)->seq,
TCP_SKB_CB(skb)->end_seq,
dup_sack,
- tcp_skb_pcount(skb));
+ tcp_skb_pcount(skb),
+ TCP_SKB_CB(skb)->when);
if (!before(TCP_SKB_CB(skb)->seq,
tcp_highest_sack_seq(tp)))
@@ -1536,7 +1573,7 @@ static int tcp_sack_cache_ok(const struct tcp_sock *tp, const struct tcp_sack_bl
static int
tcp_sacktag_write_queue(struct sock *sk, const struct sk_buff *ack_skb,
- u32 prior_snd_una)
+ u32 prior_snd_una, s32 *sack_rtt)
{
struct tcp_sock *tp = tcp_sk(sk);
const unsigned char *ptr = (skb_transport_header(ack_skb) +
@@ -1554,6 +1591,7 @@ tcp_sacktag_write_queue(struct sock *sk, const struct sk_buff *ack_skb,
state.flag = 0;
state.reord = tp->packets_out;
+ state.rtt = -1;
if (!tp->sacked_out) {
if (WARN_ON(tp->fackets_out))
@@ -1737,6 +1775,7 @@ out:
WARN_ON((int)tp->retrans_out < 0);
WARN_ON((int)tcp_packets_in_flight(tp) < 0);
#endif
+ *sack_rtt = state.rtt;
return state.flag;
}
@@ -1869,8 +1908,13 @@ void tcp_enter_loss(struct sock *sk, int how)
}
tcp_verify_left_out(tp);
- tp->reordering = min_t(unsigned int, tp->reordering,
- sysctl_tcp_reordering);
+ /* Timeout in disordered state after receiving substantial DUPACKs
+ * suggests that the degree of reordering is over-estimated.
+ */
+ if (icsk->icsk_ca_state <= TCP_CA_Disorder &&
+ tp->sacked_out >= sysctl_tcp_reordering)
+ tp->reordering = min_t(unsigned int, tp->reordering,
+ sysctl_tcp_reordering);
tcp_set_ca_state(sk, TCP_CA_Loss);
tp->high_seq = tp->snd_nxt;
TCP_ECN_queue_cwr(tp);
@@ -2472,8 +2516,6 @@ static void tcp_try_to_open(struct sock *sk, int flag, const int prior_unsacked)
if (inet_csk(sk)->icsk_ca_state != TCP_CA_CWR) {
tcp_try_keep_open(sk);
- if (inet_csk(sk)->icsk_ca_state != TCP_CA_Open)
- tcp_moderate_cwnd(tp);
} else {
tcp_cwnd_reduction(sk, prior_unsacked, 0);
}
@@ -2792,65 +2834,51 @@ static void tcp_fastretrans_alert(struct sock *sk, const int acked,
tcp_xmit_retransmit_queue(sk);
}
-void tcp_valid_rtt_meas(struct sock *sk, u32 seq_rtt)
+static inline bool tcp_ack_update_rtt(struct sock *sk, const int flag,
+ s32 seq_rtt, s32 sack_rtt)
{
- tcp_rtt_estimator(sk, seq_rtt);
- tcp_set_rto(sk);
- inet_csk(sk)->icsk_backoff = 0;
-}
-EXPORT_SYMBOL(tcp_valid_rtt_meas);
+ const struct tcp_sock *tp = tcp_sk(sk);
+
+ /* Prefer RTT measured from ACK's timing to TS-ECR. This is because
+ * broken middle-boxes or peers may corrupt TS-ECR fields. But
+ * Karn's algorithm forbids taking RTT if some retransmitted data
+ * is acked (RFC6298).
+ */
+ if (flag & FLAG_RETRANS_DATA_ACKED)
+ seq_rtt = -1;
+
+ if (seq_rtt < 0)
+ seq_rtt = sack_rtt;
-/* Read draft-ietf-tcplw-high-performance before mucking
- * with this code. (Supersedes RFC1323)
- */
-static void tcp_ack_saw_tstamp(struct sock *sk, int flag)
-{
/* RTTM Rule: A TSecr value received in a segment is used to
* update the averaged RTT measurement only if the segment
* acknowledges some new data, i.e., only if it advances the
* left edge of the send window.
- *
* See draft-ietf-tcplw-high-performance-00, section 3.3.
- * 1998/04/10 Andrey V. Savochkin <saw@msu.ru>
- *
- * Changed: reset backoff as soon as we see the first valid sample.
- * If we do not, we get strongly overestimated rto. With timestamps
- * samples are accepted even from very old segments: f.e., when rtt=1
- * increases to 8, we retransmit 5 times and after 8 seconds delayed
- * answer arrives rto becomes 120 seconds! If at least one of segments
- * in window is lost... Voila. --ANK (010210)
*/
- struct tcp_sock *tp = tcp_sk(sk);
+ if (seq_rtt < 0 && tp->rx_opt.saw_tstamp && tp->rx_opt.rcv_tsecr)
+ seq_rtt = tcp_time_stamp - tp->rx_opt.rcv_tsecr;
- tcp_valid_rtt_meas(sk, tcp_time_stamp - tp->rx_opt.rcv_tsecr);
-}
-
-static void tcp_ack_no_tstamp(struct sock *sk, u32 seq_rtt, int flag)
-{
- /* We don't have a timestamp. Can only use
- * packets that are not retransmitted to determine
- * rtt estimates. Also, we must not reset the
- * backoff for rto until we get a non-retransmitted
- * packet. This allows us to deal with a situation
- * where the network delay has increased suddenly.
- * I.e. Karn's algorithm. (SIGCOMM '87, p5.)
- */
+ if (seq_rtt < 0)
+ return false;
- if (flag & FLAG_RETRANS_DATA_ACKED)
- return;
+ tcp_rtt_estimator(sk, seq_rtt);
+ tcp_set_rto(sk);
- tcp_valid_rtt_meas(sk, seq_rtt);
+ /* RFC6298: only reset backoff on valid RTT measurement. */
+ inet_csk(sk)->icsk_backoff = 0;
+ return true;
}
-static inline void tcp_ack_update_rtt(struct sock *sk, const int flag,
- const s32 seq_rtt)
+/* Compute time elapsed between (last) SYNACK and the ACK completing 3WHS. */
+static void tcp_synack_rtt_meas(struct sock *sk, struct request_sock *req)
{
- const struct tcp_sock *tp = tcp_sk(sk);
- /* Note that peer MAY send zero echo. In this case it is ignored. (rfc1323) */
- if (tp->rx_opt.saw_tstamp && tp->rx_opt.rcv_tsecr)
- tcp_ack_saw_tstamp(sk, flag);
- else if (seq_rtt >= 0)
- tcp_ack_no_tstamp(sk, seq_rtt, flag);
+ struct tcp_sock *tp = tcp_sk(sk);
+ s32 seq_rtt = -1;
+
+ if (tp->lsndtime && !tp->total_retrans)
+ seq_rtt = tcp_time_stamp - tp->lsndtime;
+ tcp_ack_update_rtt(sk, FLAG_SYN_ACKED, seq_rtt, -1);
}
static void tcp_cong_avoid(struct sock *sk, u32 ack, u32 in_flight)
@@ -2939,7 +2967,7 @@ static u32 tcp_tso_acked(struct sock *sk, struct sk_buff *skb)
* arrived at the other end.
*/
static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets,
- u32 prior_snd_una)
+ u32 prior_snd_una, s32 sack_rtt)
{
struct tcp_sock *tp = tcp_sk(sk);
const struct inet_connection_sock *icsk = inet_csk(sk);
@@ -2978,8 +3006,6 @@ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets,
if (sacked & TCPCB_SACKED_RETRANS)
tp->retrans_out -= acked_pcount;
flag |= FLAG_RETRANS_DATA_ACKED;
- ca_seq_rtt = -1;
- seq_rtt = -1;
} else {
ca_seq_rtt = now - scb->when;
last_ackt = skb->tstamp;
@@ -3031,6 +3057,10 @@ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets,
if (skb && (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED))
flag |= FLAG_SACK_RENEGING;
+ if (tcp_ack_update_rtt(sk, flag, seq_rtt, sack_rtt) ||
+ (flag & FLAG_ACKED))
+ tcp_rearm_rto(sk);
+
if (flag & FLAG_ACKED) {
const struct tcp_congestion_ops *ca_ops
= inet_csk(sk)->icsk_ca_ops;
@@ -3040,9 +3070,6 @@ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets,
tcp_mtup_probe_success(sk);
}
- tcp_ack_update_rtt(sk, flag, seq_rtt);
- tcp_rearm_rto(sk);
-
if (tcp_is_reno(tp)) {
tcp_remove_reno_sacks(sk, pkts_acked);
} else {
@@ -3130,11 +3157,22 @@ static inline bool tcp_ack_is_dubious(const struct sock *sk, const int flag)
inet_csk(sk)->icsk_ca_state != TCP_CA_Open;
}
+/* Decide wheather to run the increase function of congestion control. */
static inline bool tcp_may_raise_cwnd(const struct sock *sk, const int flag)
{
- const struct tcp_sock *tp = tcp_sk(sk);
- return (!(flag & FLAG_ECE) || tp->snd_cwnd < tp->snd_ssthresh) &&
- !tcp_in_cwnd_reduction(sk);
+ if (tcp_in_cwnd_reduction(sk))
+ return false;
+
+ /* If reordering is high then always grow cwnd whenever data is
+ * delivered regardless of its ordering. Otherwise stay conservative
+ * and only grow cwnd on in-order delivery (RFC5681). A stretched ACK w/
+ * new SACK or ECE mark may first advance cwnd here and later reduce
+ * cwnd in tcp_fastretrans_alert() based on more states.
+ */
+ if (tcp_sk(sk)->reordering > sysctl_tcp_reordering)
+ return flag & FLAG_FORWARD_PROGRESS;
+
+ return flag & FLAG_DATA_ACKED;
}
/* Check that window update is acceptable.
@@ -3253,7 +3291,7 @@ static void tcp_process_tlp_ack(struct sock *sk, u32 ack, int flag)
tcp_init_cwnd_reduction(sk, true);
tcp_set_ca_state(sk, TCP_CA_CWR);
tcp_end_cwnd_reduction(sk);
- tcp_set_ca_state(sk, TCP_CA_Open);
+ tcp_try_keep_open(sk);
NET_INC_STATS_BH(sock_net(sk),
LINUX_MIB_TCPLOSSPROBERECOVERY);
}
@@ -3269,11 +3307,12 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag)
u32 ack_seq = TCP_SKB_CB(skb)->seq;
u32 ack = TCP_SKB_CB(skb)->ack_seq;
bool is_dupack = false;
- u32 prior_in_flight;
+ u32 prior_in_flight, prior_cwnd = tp->snd_cwnd, prior_rtt = tp->srtt;
u32 prior_fackets;
int prior_packets = tp->packets_out;
const int prior_unsacked = tp->packets_out - tp->sacked_out;
int acked = 0; /* Number of packets newly acked */
+ s32 sack_rtt = -1;
/* If the ack is older than previous acks
* then we can probably ignore it.
@@ -3330,7 +3369,8 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag)
flag |= tcp_ack_update_window(sk, skb, ack, ack_seq);
if (TCP_SKB_CB(skb)->sacked)
- flag |= tcp_sacktag_write_queue(sk, skb, prior_snd_una);
+ flag |= tcp_sacktag_write_queue(sk, skb, prior_snd_una,
+ &sack_rtt);
if (TCP_ECN_rcv_ecn_echo(tp, tcp_hdr(skb)))
flag |= FLAG_ECE;
@@ -3349,21 +3389,18 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag)
/* See if we can take anything off of the retransmit queue. */
acked = tp->packets_out;
- flag |= tcp_clean_rtx_queue(sk, prior_fackets, prior_snd_una);
+ flag |= tcp_clean_rtx_queue(sk, prior_fackets, prior_snd_una, sack_rtt);
acked -= tp->packets_out;
+ /* Advance cwnd if state allows */
+ if (tcp_may_raise_cwnd(sk, flag))
+ tcp_cong_avoid(sk, ack, prior_in_flight);
+
if (tcp_ack_is_dubious(sk, flag)) {
- /* Advance CWND, if state allows this. */
- if ((flag & FLAG_DATA_ACKED) && tcp_may_raise_cwnd(sk, flag))
- tcp_cong_avoid(sk, ack, prior_in_flight);
is_dupack = !(flag & (FLAG_SND_UNA_ADVANCED | FLAG_NOT_DUP));
tcp_fastretrans_alert(sk, acked, prior_unsacked,
is_dupack, flag);
- } else {
- if (flag & FLAG_DATA_ACKED)
- tcp_cong_avoid(sk, ack, prior_in_flight);
}
-
if (tp->tlp_high_seq)
tcp_process_tlp_ack(sk, ack, flag);
@@ -3375,6 +3412,8 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag)
if (icsk->icsk_pending == ICSK_TIME_RETRANS)
tcp_schedule_loss_probe(sk);
+ if (tp->srtt != prior_rtt || tp->snd_cwnd != prior_cwnd)
+ tcp_update_pacing_rate(sk);
return 1;
no_queue:
@@ -3402,7 +3441,8 @@ old_ack:
* If data was DSACKed, see if we can undo a cwnd reduction.
*/
if (TCP_SKB_CB(skb)->sacked) {
- flag |= tcp_sacktag_write_queue(sk, skb, prior_snd_una);
+ flag |= tcp_sacktag_write_queue(sk, skb, prior_snd_una,
+ &sack_rtt);
tcp_fastretrans_alert(sk, acked, prior_unsacked,
is_dupack, flag);
}
@@ -4102,6 +4142,7 @@ static void tcp_data_queue_ofo(struct sock *sk, struct sk_buff *skb)
if (!tcp_try_coalesce(sk, skb1, skb, &fragstolen)) {
__skb_queue_after(&tp->out_of_order_queue, skb1, skb);
} else {
+ tcp_grow_window(sk, skb);
kfree_skb_partial(skb, fragstolen);
skb = NULL;
}
@@ -4177,8 +4218,10 @@ add_sack:
if (tcp_is_sack(tp))
tcp_sack_new_ofo_skb(sk, seq, end_seq);
end:
- if (skb)
+ if (skb) {
+ tcp_grow_window(sk, skb);
skb_set_owner_r(skb, sk);
+ }
}
static int __must_check tcp_queue_rcv(struct sock *sk, struct sk_buff *skb, int hdrlen,
@@ -5013,8 +5056,8 @@ discard:
* the rest is checked inline. Fast processing is turned on in
* tcp_data_queue when everything is OK.
*/
-int tcp_rcv_established(struct sock *sk, struct sk_buff *skb,
- const struct tcphdr *th, unsigned int len)
+void tcp_rcv_established(struct sock *sk, struct sk_buff *skb,
+ const struct tcphdr *th, unsigned int len)
{
struct tcp_sock *tp = tcp_sk(sk);
@@ -5091,7 +5134,7 @@ int tcp_rcv_established(struct sock *sk, struct sk_buff *skb,
tcp_ack(sk, skb, 0);
__kfree_skb(skb);
tcp_data_snd_check(sk);
- return 0;
+ return;
} else { /* Header too small */
TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_INERRS);
goto discard;
@@ -5184,7 +5227,7 @@ no_ack:
if (eaten)
kfree_skb_partial(skb, fragstolen);
sk->sk_data_ready(sk, 0);
- return 0;
+ return;
}
}
@@ -5200,7 +5243,7 @@ slow_path:
*/
if (!tcp_validate_incoming(sk, skb, th, 1))
- return 0;
+ return;
step5:
if (tcp_ack(sk, skb, FLAG_SLOWPATH | FLAG_UPDATE_TS_RECENT) < 0)
@@ -5216,7 +5259,7 @@ step5:
tcp_data_snd_check(sk);
tcp_ack_snd_check(sk);
- return 0;
+ return;
csum_error:
TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_CSUMERRORS);
@@ -5224,7 +5267,6 @@ csum_error:
discard:
__kfree_skb(skb);
- return 0;
}
EXPORT_SYMBOL(tcp_rcv_established);
@@ -5627,9 +5669,7 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb,
* so release it.
*/
if (req) {
- tcp_synack_rtt_meas(sk, req);
tp->total_retrans = req->num_retrans;
-
reqsk_fastopen_remove(sk, req, false);
} else {
/* Make sure socket is routed, for correct metrics. */
@@ -5654,6 +5694,7 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb,
tp->snd_una = TCP_SKB_CB(skb)->ack_seq;
tp->snd_wnd = ntohs(th->window) << tp->rx_opt.snd_wscale;
tcp_init_wl(tp, TCP_SKB_CB(skb)->seq);
+ tcp_synack_rtt_meas(sk, req);
if (tp->rx_opt.tstamp_ok)
tp->advmss -= TCPOLEN_TSTAMP_ALIGNED;
@@ -5671,6 +5712,8 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb,
} else
tcp_init_metrics(sk);
+ tcp_update_pacing_rate(sk);
+
/* Prevent spurious tcp_cwnd_restart() on first data packet */
tp->lsndtime = tcp_time_stamp;
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index b299da5ff499..b14266bb91eb 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -821,8 +821,7 @@ static void tcp_v4_reqsk_send_ack(struct sock *sk, struct sk_buff *skb,
*/
static int tcp_v4_send_synack(struct sock *sk, struct dst_entry *dst,
struct request_sock *req,
- u16 queue_mapping,
- bool nocache)
+ u16 queue_mapping)
{
const struct inet_request_sock *ireq = inet_rsk(req);
struct flowi4 fl4;
@@ -852,7 +851,7 @@ static int tcp_v4_send_synack(struct sock *sk, struct dst_entry *dst,
static int tcp_v4_rtx_synack(struct sock *sk, struct request_sock *req)
{
- int res = tcp_v4_send_synack(sk, NULL, req, 0, false);
+ int res = tcp_v4_send_synack(sk, NULL, req, 0);
if (!res)
TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_RETRANSSEGS);
@@ -890,7 +889,7 @@ bool tcp_syn_flood_action(struct sock *sk,
NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPREQQFULLDROP);
lopt = inet_csk(sk)->icsk_accept_queue.listen_opt;
- if (!lopt->synflood_warned) {
+ if (!lopt->synflood_warned && sysctl_tcp_syncookies != 2) {
lopt->synflood_warned = 1;
pr_info("%s: Possible SYN flooding on port %d. %s. Check SNMP counters.\n",
proto, ntohs(tcp_hdr(skb)->dest), msg);
@@ -1316,9 +1315,11 @@ static bool tcp_fastopen_check(struct sock *sk, struct sk_buff *skb,
tcp_rsk(req)->rcv_nxt = TCP_SKB_CB(skb)->end_seq;
return true;
}
+
if (foc->len == TCP_FASTOPEN_COOKIE_SIZE) {
if ((sysctl_tcp_fastopen & TFO_SERVER_COOKIE_NOT_CHKED) == 0) {
- tcp_fastopen_cookie_gen(ip_hdr(skb)->saddr, valid_foc);
+ tcp_fastopen_cookie_gen(ip_hdr(skb)->saddr,
+ ip_hdr(skb)->daddr, valid_foc);
if ((valid_foc->len != TCP_FASTOPEN_COOKIE_SIZE) ||
memcmp(&foc->val[0], &valid_foc->val[0],
TCP_FASTOPEN_COOKIE_SIZE) != 0)
@@ -1329,14 +1330,16 @@ static bool tcp_fastopen_check(struct sock *sk, struct sk_buff *skb,
tcp_rsk(req)->rcv_nxt = TCP_SKB_CB(skb)->end_seq;
return true;
} else if (foc->len == 0) { /* Client requesting a cookie */
- tcp_fastopen_cookie_gen(ip_hdr(skb)->saddr, valid_foc);
+ tcp_fastopen_cookie_gen(ip_hdr(skb)->saddr,
+ ip_hdr(skb)->daddr, valid_foc);
NET_INC_STATS_BH(sock_net(sk),
LINUX_MIB_TCPFASTOPENCOOKIEREQD);
} else {
/* Client sent a cookie with wrong size. Treat it
* the same as invalid and return a valid one.
*/
- tcp_fastopen_cookie_gen(ip_hdr(skb)->saddr, valid_foc);
+ tcp_fastopen_cookie_gen(ip_hdr(skb)->saddr,
+ ip_hdr(skb)->daddr, valid_foc);
}
return false;
}
@@ -1462,7 +1465,8 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
* limitations, they conserve resources and peer is
* evidently real one.
*/
- if (inet_csk_reqsk_queue_is_full(sk) && !isn) {
+ if ((sysctl_tcp_syncookies == 2 ||
+ inet_csk_reqsk_queue_is_full(sk)) && !isn) {
want_cookie = tcp_syn_flood_action(sk, skb, "TCP");
if (!want_cookie)
goto drop;
@@ -1671,8 +1675,6 @@ struct sock *tcp_v4_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
newtp->advmss = tcp_sk(sk)->rx_opt.user_mss;
tcp_initialize_rcv_mss(newsk);
- tcp_synack_rtt_meas(newsk, req);
- newtp->total_retrans = req->num_retrans;
#ifdef CONFIG_TCP_MD5SIG
/* Copy over the MD5 key from the original socket */
@@ -1797,10 +1799,7 @@ int tcp_v4_do_rcv(struct sock *sk, struct sk_buff *skb)
sk->sk_rx_dst = NULL;
}
}
- if (tcp_rcv_established(sk, skb, tcp_hdr(skb), skb->len)) {
- rsk = sk;
- goto reset;
- }
+ tcp_rcv_established(sk, skb, tcp_hdr(skb), skb->len);
return 0;
}
@@ -2605,7 +2604,7 @@ static void get_openreq4(const struct sock *sk, const struct request_sock *req,
long delta = req->expires - jiffies;
seq_printf(f, "%4d: %08X:%04X %08X:%04X"
- " %02X %08X:%08X %02X:%08lX %08X %5d %8d %u %d %pK%n",
+ " %02X %08X:%08X %02X:%08lX %08X %5u %8d %u %d %pK%n",
i,
ireq->loc_addr,
ntohs(inet_sk(sk)->inet_sport),
@@ -2663,7 +2662,7 @@ static void get_tcp4_sock(struct sock *sk, struct seq_file *f, int i, int *len)
rx_queue = max_t(int, tp->rcv_nxt - tp->copied_seq, 0);
seq_printf(f, "%4d: %08X:%04X %08X:%04X %02X %08X:%08X %02X:%08lX "
- "%08X %5d %8d %lu %d %pK %lu %lu %u %u %d%n",
+ "%08X %5u %8d %lu %d %pK %lu %lu %u %u %d%n",
i, src, srcp, dest, destp, sk->sk_state,
tp->write_seq - tp->snd_una,
rx_queue,
@@ -2802,6 +2801,7 @@ struct proto tcp_prot = {
.unhash = inet_unhash,
.get_port = inet_csk_get_port,
.enter_memory_pressure = tcp_enter_memory_pressure,
+ .stream_memory_free = tcp_stream_memory_free,
.sockets_allocated = &tcp_sockets_allocated,
.orphan_count = &tcp_orphan_count,
.memory_allocated = &tcp_memory_allocated,
diff --git a/net/ipv4/tcp_memcontrol.c b/net/ipv4/tcp_memcontrol.c
index da14436c1735..559d4ae6ebf4 100644
--- a/net/ipv4/tcp_memcontrol.c
+++ b/net/ipv4/tcp_memcontrol.c
@@ -87,8 +87,8 @@ static int tcp_update_limit(struct mem_cgroup *memcg, u64 val)
if (!cg_proto)
return -EINVAL;
- if (val > RESOURCE_MAX)
- val = RESOURCE_MAX;
+ if (val > RES_COUNTER_MAX)
+ val = RES_COUNTER_MAX;
tcp = tcp_from_cgproto(cg_proto);
@@ -101,9 +101,9 @@ static int tcp_update_limit(struct mem_cgroup *memcg, u64 val)
tcp->tcp_prot_mem[i] = min_t(long, val >> PAGE_SHIFT,
net->ipv4.sysctl_tcp_mem[i]);
- if (val == RESOURCE_MAX)
+ if (val == RES_COUNTER_MAX)
clear_bit(MEMCG_SOCK_ACTIVE, &cg_proto->flags);
- else if (val != RESOURCE_MAX) {
+ else if (val != RES_COUNTER_MAX) {
/*
* The active bit needs to be written after the static_key
* update. This is what guarantees that the socket activation
@@ -132,10 +132,10 @@ static int tcp_update_limit(struct mem_cgroup *memcg, u64 val)
return 0;
}
-static int tcp_cgroup_write(struct cgroup *cont, struct cftype *cft,
+static int tcp_cgroup_write(struct cgroup_subsys_state *css, struct cftype *cft,
const char *buffer)
{
- struct mem_cgroup *memcg = mem_cgroup_from_cont(cont);
+ struct mem_cgroup *memcg = mem_cgroup_from_css(css);
unsigned long long val;
int ret = 0;
@@ -180,14 +180,14 @@ static u64 tcp_read_usage(struct mem_cgroup *memcg)
return res_counter_read_u64(&tcp->tcp_memory_allocated, RES_USAGE);
}
-static u64 tcp_cgroup_read(struct cgroup *cont, struct cftype *cft)
+static u64 tcp_cgroup_read(struct cgroup_subsys_state *css, struct cftype *cft)
{
- struct mem_cgroup *memcg = mem_cgroup_from_cont(cont);
+ struct mem_cgroup *memcg = mem_cgroup_from_css(css);
u64 val;
switch (cft->private) {
case RES_LIMIT:
- val = tcp_read_stat(memcg, RES_LIMIT, RESOURCE_MAX);
+ val = tcp_read_stat(memcg, RES_LIMIT, RES_COUNTER_MAX);
break;
case RES_USAGE:
val = tcp_read_usage(memcg);
@@ -202,13 +202,13 @@ static u64 tcp_cgroup_read(struct cgroup *cont, struct cftype *cft)
return val;
}
-static int tcp_cgroup_reset(struct cgroup *cont, unsigned int event)
+static int tcp_cgroup_reset(struct cgroup_subsys_state *css, unsigned int event)
{
struct mem_cgroup *memcg;
struct tcp_memcontrol *tcp;
struct cg_proto *cg_proto;
- memcg = mem_cgroup_from_cont(cont);
+ memcg = mem_cgroup_from_css(css);
cg_proto = tcp_prot.proto_cgroup(memcg);
if (!cg_proto)
return 0;
diff --git a/net/ipv4/tcp_metrics.c b/net/ipv4/tcp_metrics.c
index f6a005c485a9..52f3c6b971d2 100644
--- a/net/ipv4/tcp_metrics.c
+++ b/net/ipv4/tcp_metrics.c
@@ -443,7 +443,7 @@ void tcp_init_metrics(struct sock *sk)
struct dst_entry *dst = __sk_dst_get(sk);
struct tcp_sock *tp = tcp_sk(sk);
struct tcp_metrics_block *tm;
- u32 val;
+ u32 val, crtt = 0; /* cached RTT scaled by 8 */
if (dst == NULL)
goto reset;
@@ -478,15 +478,19 @@ void tcp_init_metrics(struct sock *sk)
tp->reordering = val;
}
- val = tcp_metric_get(tm, TCP_METRIC_RTT);
- if (val == 0 || tp->srtt == 0) {
- rcu_read_unlock();
- goto reset;
- }
- /* Initial rtt is determined from SYN,SYN-ACK.
- * The segment is small and rtt may appear much
- * less than real one. Use per-dst memory
- * to make it more realistic.
+ crtt = tcp_metric_get_jiffies(tm, TCP_METRIC_RTT);
+ rcu_read_unlock();
+reset:
+ /* The initial RTT measurement from the SYN/SYN-ACK is not ideal
+ * to seed the RTO for later data packets because SYN packets are
+ * small. Use the per-dst cached values to seed the RTO but keep
+ * the RTT estimator variables intact (e.g., srtt, mdev, rttvar).
+ * Later the RTO will be updated immediately upon obtaining the first
+ * data RTT sample (tcp_rtt_estimator()). Hence the cached RTT only
+ * influences the first RTO but not later RTT estimation.
+ *
+ * But if RTT is not available from the SYN (due to retransmits or
+ * syn cookies) or the cache, force a conservative 3secs timeout.
*
* A bit of theory. RTT is time passed after "normal" sized packet
* is sent until it is ACKed. In normal circumstances sending small
@@ -497,21 +501,11 @@ void tcp_init_metrics(struct sock *sk)
* to low value, and then abruptly stops to do it and starts to delay
* ACKs, wait for troubles.
*/
- val = msecs_to_jiffies(val);
- if (val > tp->srtt) {
- tp->srtt = val;
- tp->rtt_seq = tp->snd_nxt;
- }
- val = tcp_metric_get_jiffies(tm, TCP_METRIC_RTTVAR);
- if (val > tp->mdev) {
- tp->mdev = val;
- tp->mdev_max = tp->rttvar = max(tp->mdev, tcp_rto_min(sk));
- }
- rcu_read_unlock();
-
- tcp_set_rto(sk);
-reset:
- if (tp->srtt == 0) {
+ if (crtt > tp->srtt) {
+ /* Set RTO like tcp_rtt_estimator(), but from cached RTT. */
+ crtt >>= 3;
+ inet_csk(sk)->icsk_rto = crtt + max(2 * crtt, tcp_rto_min(sk));
+ } else if (tp->srtt == 0) {
/* RFC6298: 5.7 We've failed to get a valid RTT sample from
* 3WHS. This is most likely due to retransmission,
* including spurious one. Reset the RTO back to 3secs
diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c
index ab1c08658528..58a3e69aef64 100644
--- a/net/ipv4/tcp_minisocks.c
+++ b/net/ipv4/tcp_minisocks.c
@@ -411,6 +411,8 @@ struct sock *tcp_create_openreq_child(struct sock *sk, struct request_sock *req,
newtp->snd_ssthresh = TCP_INFINITE_SSTHRESH;
tcp_enable_early_retrans(newtp);
newtp->tlp_high_seq = 0;
+ newtp->lsndtime = treq->snt_synack;
+ newtp->total_retrans = req->num_retrans;
/* So many TCP implementations out there (incorrectly) count the
* initial SYN frame in their delayed-ACK and congestion control
@@ -666,12 +668,6 @@ struct sock *tcp_check_req(struct sock *sk, struct sk_buff *skb,
if (!(flg & TCP_FLAG_ACK))
return NULL;
- /* Got ACK for our SYNACK, so update baseline for SYNACK RTT sample. */
- if (tmp_opt.saw_tstamp && tmp_opt.rcv_tsecr)
- tcp_rsk(req)->snt_synack = tmp_opt.rcv_tsecr;
- else if (req->num_retrans) /* don't take RTT sample if retrans && ~TS */
- tcp_rsk(req)->snt_synack = 0;
-
/* For Fast Open no more processing is needed (sk is the
* child socket).
*/
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 170737a9d56d..d46f2143305c 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -65,6 +65,9 @@ int sysctl_tcp_base_mss __read_mostly = TCP_BASE_MSS;
/* By default, RFC2861 behavior. */
int sysctl_tcp_slow_start_after_idle __read_mostly = 1;
+unsigned int sysctl_tcp_notsent_lowat __read_mostly = UINT_MAX;
+EXPORT_SYMBOL(sysctl_tcp_notsent_lowat);
+
static bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle,
int push_one, gfp_t gfp);
@@ -634,6 +637,8 @@ static unsigned int tcp_established_options(struct sock *sk, struct sk_buff *skb
unsigned int size = 0;
unsigned int eff_sacks;
+ opts->options = 0;
+
#ifdef CONFIG_TCP_MD5SIG
*md5 = tp->af_specific->md5_lookup(sk, sk);
if (unlikely(*md5)) {
@@ -892,8 +897,7 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it,
skb_orphan(skb);
skb->sk = sk;
- skb->destructor = (sysctl_tcp_limit_output_bytes > 0) ?
- tcp_wfree : sock_wfree;
+ skb->destructor = tcp_wfree;
atomic_add(skb->truesize, &sk->sk_wmem_alloc);
/* Build TCP header and checksum it. */
@@ -982,8 +986,10 @@ static void tcp_queue_skb(struct sock *sk, struct sk_buff *skb)
static void tcp_set_skb_tso_segs(const struct sock *sk, struct sk_buff *skb,
unsigned int mss_now)
{
- if (skb->len <= mss_now || !sk_can_gso(sk) ||
- skb->ip_summed == CHECKSUM_NONE) {
+ /* Make sure we own this skb before messing gso_size/gso_segs */
+ WARN_ON_ONCE(skb_cloned(skb));
+
+ if (skb->len <= mss_now || skb->ip_summed == CHECKSUM_NONE) {
/* Avoid the costly divide in the normal
* non-TSO case.
*/
@@ -1063,9 +1069,7 @@ int tcp_fragment(struct sock *sk, struct sk_buff *skb, u32 len,
if (nsize < 0)
nsize = 0;
- if (skb_cloned(skb) &&
- skb_is_nonlinear(skb) &&
- pskb_expand_head(skb, 0, 0, GFP_ATOMIC))
+ if (skb_unclone(skb, GFP_ATOMIC))
return -ENOMEM;
/* Get a new skb... force flag on. */
@@ -1628,7 +1632,7 @@ static bool tcp_tso_should_defer(struct sock *sk, struct sk_buff *skb)
/* If a full-sized TSO skb can be sent, do it. */
if (limit >= min_t(unsigned int, sk->sk_gso_max_size,
- sk->sk_gso_max_segs * tp->mss_cache))
+ tp->xmit_size_goal_segs * tp->mss_cache))
goto send_now;
/* Middle in queue won't get any more data, full sendable already? */
@@ -1837,7 +1841,6 @@ static bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle,
while ((skb = tcp_send_head(sk))) {
unsigned int limit;
-
tso_segs = tcp_init_tso_segs(sk, skb, mss_now);
BUG_ON(!tso_segs);
@@ -1866,13 +1869,20 @@ static bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle,
break;
}
- /* TSQ : sk_wmem_alloc accounts skb truesize,
- * including skb overhead. But thats OK.
+ /* TCP Small Queues :
+ * Control number of packets in qdisc/devices to two packets / or ~1 ms.
+ * This allows for :
+ * - better RTT estimation and ACK scheduling
+ * - faster recovery
+ * - high rates
*/
- if (atomic_read(&sk->sk_wmem_alloc) >= sysctl_tcp_limit_output_bytes) {
+ limit = max(skb->truesize, sk->sk_pacing_rate >> 10);
+
+ if (atomic_read(&sk->sk_wmem_alloc) > limit) {
set_bit(TSQ_THROTTLED, &tp->tsq_flags);
break;
}
+
limit = mss_now;
if (tso_segs > 1 && !tcp_urg_mode(tp))
limit = tcp_mss_split_point(sk, skb, mss_now,
@@ -2334,6 +2344,8 @@ int __tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb)
int oldpcount = tcp_skb_pcount(skb);
if (unlikely(oldpcount > 1)) {
+ if (skb_unclone(skb, GFP_ATOMIC))
+ return -ENOMEM;
tcp_init_tso_segs(sk, skb, cur_mss);
tcp_adjust_pcount(sk, skb, oldpcount - tcp_skb_pcount(skb));
}
diff --git a/net/ipv4/tcp_probe.c b/net/ipv4/tcp_probe.c
index d4943f67aff2..611beab38a00 100644
--- a/net/ipv4/tcp_probe.c
+++ b/net/ipv4/tcp_probe.c
@@ -46,6 +46,10 @@ static unsigned int bufsize __read_mostly = 4096;
MODULE_PARM_DESC(bufsize, "Log buffer size in packets (4096)");
module_param(bufsize, uint, 0);
+static unsigned int fwmark __read_mostly = 0;
+MODULE_PARM_DESC(fwmark, "skb mark to match (0=no mark)");
+module_param(fwmark, uint, 0);
+
static int full __read_mostly;
MODULE_PARM_DESC(full, "Full log (1=every ack packet received, 0=only cwnd changes)");
module_param(full, int, 0);
@@ -54,12 +58,16 @@ static const char procname[] = "tcpprobe";
struct tcp_log {
ktime_t tstamp;
- __be32 saddr, daddr;
- __be16 sport, dport;
+ union {
+ struct sockaddr raw;
+ struct sockaddr_in v4;
+ struct sockaddr_in6 v6;
+ } src, dst;
u16 length;
u32 snd_nxt;
u32 snd_una;
u32 snd_wnd;
+ u32 rcv_wnd;
u32 snd_cwnd;
u32 ssthresh;
u32 srtt;
@@ -86,19 +94,45 @@ static inline int tcp_probe_avail(void)
return bufsize - tcp_probe_used() - 1;
}
+#define tcp_probe_copy_fl_to_si4(inet, si4, mem) \
+ do { \
+ si4.sin_family = AF_INET; \
+ si4.sin_port = inet->inet_##mem##port; \
+ si4.sin_addr.s_addr = inet->inet_##mem##addr; \
+ } while (0) \
+
+#if IS_ENABLED(CONFIG_IPV6)
+#define tcp_probe_copy_fl_to_si6(inet, si6, mem) \
+ do { \
+ struct ipv6_pinfo *pi6 = inet->pinet6; \
+ si6.sin6_family = AF_INET6; \
+ si6.sin6_port = inet->inet_##mem##port; \
+ si6.sin6_addr = pi6->mem##addr; \
+ si6.sin6_flowinfo = 0; /* No need here. */ \
+ si6.sin6_scope_id = 0; /* No need here. */ \
+ } while (0)
+#else
+#define tcp_probe_copy_fl_to_si6(fl, si6, mem) \
+ do { \
+ memset(&si6, 0, sizeof(si6)); \
+ } while (0)
+#endif
+
/*
* Hook inserted to be called before each receive packet.
* Note: arguments must match tcp_rcv_established()!
*/
-static int jtcp_rcv_established(struct sock *sk, struct sk_buff *skb,
- struct tcphdr *th, unsigned int len)
+static void jtcp_rcv_established(struct sock *sk, struct sk_buff *skb,
+ const struct tcphdr *th, unsigned int len)
{
const struct tcp_sock *tp = tcp_sk(sk);
const struct inet_sock *inet = inet_sk(sk);
- /* Only update if port matches */
- if ((port == 0 || ntohs(inet->inet_dport) == port ||
- ntohs(inet->inet_sport) == port) &&
+ /* Only update if port or skb mark matches */
+ if (((port == 0 && fwmark == 0) ||
+ ntohs(inet->inet_dport) == port ||
+ ntohs(inet->inet_sport) == port ||
+ (fwmark > 0 && skb->mark == fwmark)) &&
(full || tp->snd_cwnd != tcp_probe.lastcwnd)) {
spin_lock(&tcp_probe.lock);
@@ -107,15 +141,25 @@ static int jtcp_rcv_established(struct sock *sk, struct sk_buff *skb,
struct tcp_log *p = tcp_probe.log + tcp_probe.head;
p->tstamp = ktime_get();
- p->saddr = inet->inet_saddr;
- p->sport = inet->inet_sport;
- p->daddr = inet->inet_daddr;
- p->dport = inet->inet_dport;
+ switch (sk->sk_family) {
+ case AF_INET:
+ tcp_probe_copy_fl_to_si4(inet, p->src.v4, s);
+ tcp_probe_copy_fl_to_si4(inet, p->dst.v4, d);
+ break;
+ case AF_INET6:
+ tcp_probe_copy_fl_to_si6(inet, p->src.v6, s);
+ tcp_probe_copy_fl_to_si6(inet, p->dst.v6, d);
+ break;
+ default:
+ BUG();
+ }
+
p->length = skb->len;
p->snd_nxt = tp->snd_nxt;
p->snd_una = tp->snd_una;
p->snd_cwnd = tp->snd_cwnd;
p->snd_wnd = tp->snd_wnd;
+ p->rcv_wnd = tp->rcv_wnd;
p->ssthresh = tcp_current_ssthresh(sk);
p->srtt = tp->srtt >> 3;
@@ -128,7 +172,6 @@ static int jtcp_rcv_established(struct sock *sk, struct sk_buff *skb,
}
jprobe_return();
- return 0;
}
static struct jprobe tcp_jprobe = {
@@ -157,13 +200,11 @@ static int tcpprobe_sprint(char *tbuf, int n)
= ktime_to_timespec(ktime_sub(p->tstamp, tcp_probe.start));
return scnprintf(tbuf, n,
- "%lu.%09lu %pI4:%u %pI4:%u %d %#x %#x %u %u %u %u\n",
+ "%lu.%09lu %pISpc %pISpc %d %#x %#x %u %u %u %u %u\n",
(unsigned long) tv.tv_sec,
(unsigned long) tv.tv_nsec,
- &p->saddr, ntohs(p->sport),
- &p->daddr, ntohs(p->dport),
- p->length, p->snd_nxt, p->snd_una,
- p->snd_cwnd, p->ssthresh, p->snd_wnd, p->srtt);
+ &p->src, &p->dst, p->length, p->snd_nxt, p->snd_una,
+ p->snd_cwnd, p->ssthresh, p->snd_wnd, p->srtt, p->rcv_wnd);
}
static ssize_t tcpprobe_read(struct file *file, char __user *buf,
@@ -176,7 +217,7 @@ static ssize_t tcpprobe_read(struct file *file, char __user *buf,
return -EINVAL;
while (cnt < len) {
- char tbuf[164];
+ char tbuf[256];
int width;
/* Wait for data in buffer */
@@ -223,6 +264,13 @@ static __init int tcpprobe_init(void)
{
int ret = -ENOMEM;
+ /* Warning: if the function signature of tcp_rcv_established,
+ * has been changed, you also have to change the signature of
+ * jtcp_rcv_established, otherwise you end up right here!
+ */
+ BUILD_BUG_ON(__same_type(tcp_rcv_established,
+ jtcp_rcv_established) == 0);
+
init_waitqueue_head(&tcp_probe.wait);
spin_lock_init(&tcp_probe.lock);
@@ -241,7 +289,8 @@ static __init int tcpprobe_init(void)
if (ret)
goto err1;
- pr_info("probe registered (port=%d) bufsize=%u\n", port, bufsize);
+ pr_info("probe registered (port=%d/fwmark=%u) bufsize=%u\n",
+ port, fwmark, bufsize);
return 0;
err1:
remove_proc_entry(procname, init_net.proc_net);
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index 766e6bab9113..0ca44df51ee9 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -658,7 +658,7 @@ void __udp4_lib_err(struct sk_buff *skb, u32 info, struct udp_table *udptable)
break;
case ICMP_REDIRECT:
ipv4_sk_redirect(skb, sk);
- break;
+ goto out;
}
/*
@@ -704,7 +704,7 @@ EXPORT_SYMBOL(udp_flush_pending_frames);
* @src: source IP address
* @dst: destination IP address
*/
-static void udp4_hwcsum(struct sk_buff *skb, __be32 src, __be32 dst)
+void udp4_hwcsum(struct sk_buff *skb, __be32 src, __be32 dst)
{
struct udphdr *uh = udp_hdr(skb);
struct sk_buff *frags = skb_shinfo(skb)->frag_list;
@@ -740,6 +740,7 @@ static void udp4_hwcsum(struct sk_buff *skb, __be32 src, __be32 dst)
uh->check = CSUM_MANGLED_0;
}
}
+EXPORT_SYMBOL_GPL(udp4_hwcsum);
static int udp_send_skb(struct sk_buff *skb, struct flowi4 *fl4)
{
@@ -2158,7 +2159,7 @@ static void udp4_format_sock(struct sock *sp, struct seq_file *f,
__u16 srcp = ntohs(inet->inet_sport);
seq_printf(f, "%5d: %08X:%04X %08X:%04X"
- " %02X %08X:%08X %02X:%08lX %08X %5d %8d %lu %d %pK %d%n",
+ " %02X %08X:%08X %02X:%08lX %08X %5u %8d %lu %d %pK %d%n",
bucket, src, srcp, dest, destp, sp->sk_state,
sk_wmem_alloc_get(sp),
sk_rmem_alloc_get(sp),
@@ -2336,7 +2337,7 @@ struct sk_buff *skb_udp_tunnel_segment(struct sk_buff *skb,
uh->len = htons(skb->len - udp_offset);
/* csum segment if tunnel sets skb with csum. */
- if (unlikely(uh->check)) {
+ if (protocol == htons(ETH_P_IP) && unlikely(uh->check)) {
struct iphdr *iph = ip_hdr(skb);
uh->check = ~csum_tcpudp_magic(iph->saddr, iph->daddr,
@@ -2347,7 +2348,18 @@ struct sk_buff *skb_udp_tunnel_segment(struct sk_buff *skb,
if (uh->check == 0)
uh->check = CSUM_MANGLED_0;
+ } else if (protocol == htons(ETH_P_IPV6)) {
+ struct ipv6hdr *ipv6h = ipv6_hdr(skb);
+ u32 len = skb->len - udp_offset;
+
+ uh->check = ~csum_ipv6_magic(&ipv6h->saddr, &ipv6h->daddr,
+ len, IPPROTO_UDP, 0);
+ uh->check = csum_fold(skb_checksum(skb, udp_offset, len, 0));
+ if (uh->check == 0)
+ uh->check = CSUM_MANGLED_0;
+ skb->ip_summed = CHECKSUM_NONE;
}
+
skb->protocol = protocol;
} while ((skb = skb->next));
out:
diff --git a/net/ipv4/xfrm4_mode_tunnel.c b/net/ipv4/xfrm4_mode_tunnel.c
index eb1dd4d643f2..b5663c37f089 100644
--- a/net/ipv4/xfrm4_mode_tunnel.c
+++ b/net/ipv4/xfrm4_mode_tunnel.c
@@ -117,7 +117,7 @@ static int xfrm4_mode_tunnel_output(struct xfrm_state *x, struct sk_buff *skb)
top_iph->frag_off = (flags & XFRM_STATE_NOPMTUDISC) ?
0 : (XFRM_MODE_SKB_CB(skb)->frag_off & htons(IP_DF));
- ip_select_ident(top_iph, dst->child, NULL);
+ ip_select_ident(skb, dst->child, NULL);
top_iph->ttl = ip4_dst_hoplimit(dst->child);
diff --git a/net/ipv4/xfrm4_policy.c b/net/ipv4/xfrm4_policy.c
index 9a459be24af7..ccde54248c8c 100644
--- a/net/ipv4/xfrm4_policy.c
+++ b/net/ipv4/xfrm4_policy.c
@@ -107,6 +107,7 @@ _decode_session4(struct sk_buff *skb, struct flowi *fl, int reverse)
memset(fl4, 0, sizeof(struct flowi4));
fl4->flowi4_mark = skb->mark;
+ fl4->flowi4_oif = skb_dst(skb)->dev->ifindex;
if (!ip_is_fragment(iph)) {
switch (iph->protocol) {
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index 498ea99194af..cd3fb301da38 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -99,9 +99,9 @@
#define ACONF_DEBUG 2
#if ACONF_DEBUG >= 3
-#define ADBG(x) printk x
+#define ADBG(fmt, ...) printk(fmt, ##__VA_ARGS__)
#else
-#define ADBG(x)
+#define ADBG(fmt, ...) do { if (0) printk(fmt, ##__VA_ARGS__); } while (0)
#endif
#define INFINITY_LIFE_TIME 0xFFFFFFFF
@@ -177,6 +177,8 @@ static struct ipv6_devconf ipv6_devconf __read_mostly = {
.accept_redirects = 1,
.autoconf = 1,
.force_mld_version = 0,
+ .mldv1_unsolicited_report_interval = 10 * HZ,
+ .mldv2_unsolicited_report_interval = HZ,
.dad_transmits = 1,
.rtr_solicits = MAX_RTR_SOLICITATIONS,
.rtr_solicit_interval = RTR_SOLICITATION_INTERVAL,
@@ -202,6 +204,7 @@ static struct ipv6_devconf ipv6_devconf __read_mostly = {
.accept_source_route = 0, /* we do not accept RH0 by default. */
.disable_ipv6 = 0,
.accept_dad = 1,
+ .suppress_frag_ndisc = 1,
};
static struct ipv6_devconf ipv6_devconf_dflt __read_mostly = {
@@ -211,6 +214,9 @@ static struct ipv6_devconf ipv6_devconf_dflt __read_mostly = {
.accept_ra = 1,
.accept_redirects = 1,
.autoconf = 1,
+ .force_mld_version = 0,
+ .mldv1_unsolicited_report_interval = 10 * HZ,
+ .mldv2_unsolicited_report_interval = HZ,
.dad_transmits = 1,
.rtr_solicits = MAX_RTR_SOLICITATIONS,
.rtr_solicit_interval = RTR_SOLICITATION_INTERVAL,
@@ -236,17 +242,9 @@ static struct ipv6_devconf ipv6_devconf_dflt __read_mostly = {
.accept_source_route = 0, /* we do not accept RH0 by default. */
.disable_ipv6 = 0,
.accept_dad = 1,
+ .suppress_frag_ndisc = 1,
};
-/* IPv6 Wildcard Address and Loopback Address defined by RFC2553 */
-const struct in6_addr in6addr_any = IN6ADDR_ANY_INIT;
-const struct in6_addr in6addr_loopback = IN6ADDR_LOOPBACK_INIT;
-const struct in6_addr in6addr_linklocal_allnodes = IN6ADDR_LINKLOCAL_ALLNODES_INIT;
-const struct in6_addr in6addr_linklocal_allrouters = IN6ADDR_LINKLOCAL_ALLROUTERS_INIT;
-const struct in6_addr in6addr_interfacelocal_allnodes = IN6ADDR_INTERFACELOCAL_ALLNODES_INIT;
-const struct in6_addr in6addr_interfacelocal_allrouters = IN6ADDR_INTERFACELOCAL_ALLROUTERS_INIT;
-const struct in6_addr in6addr_sitelocal_allrouters = IN6ADDR_SITELOCAL_ALLROUTERS_INIT;
-
/* Check if a valid qdisc is available */
static inline bool addrconf_qdisc_ok(const struct net_device *dev)
{
@@ -306,36 +304,6 @@ err_ip:
return -ENOMEM;
}
-static void snmp6_free_dev(struct inet6_dev *idev)
-{
- kfree(idev->stats.icmpv6msgdev);
- kfree(idev->stats.icmpv6dev);
- snmp_mib_free((void __percpu **)idev->stats.ipv6);
-}
-
-/* Nobody refers to this device, we may destroy it. */
-
-void in6_dev_finish_destroy(struct inet6_dev *idev)
-{
- struct net_device *dev = idev->dev;
-
- WARN_ON(!list_empty(&idev->addr_list));
- WARN_ON(idev->mc_list != NULL);
- WARN_ON(timer_pending(&idev->rs_timer));
-
-#ifdef NET_REFCNT_DEBUG
- pr_debug("%s: %s\n", __func__, dev ? dev->name : "NIL");
-#endif
- dev_put(dev);
- if (!idev->dead) {
- pr_warn("Freeing alive inet6 device %p\n", idev);
- return;
- }
- snmp6_free_dev(idev);
- kfree_rcu(idev, rcu);
-}
-EXPORT_SYMBOL(in6_dev_finish_destroy);
-
static struct inet6_dev *ipv6_add_dev(struct net_device *dev)
{
struct inet6_dev *ndev;
@@ -369,9 +337,9 @@ static struct inet6_dev *ipv6_add_dev(struct net_device *dev)
dev_hold(dev);
if (snmp6_alloc_dev(ndev) < 0) {
- ADBG((KERN_WARNING
+ ADBG(KERN_WARNING
"%s: cannot allocate memory for statistics; dev=%s.\n",
- __func__, dev->name));
+ __func__, dev->name);
neigh_parms_release(&nd_tbl, ndev->nd_parms);
dev_put(dev);
kfree(ndev);
@@ -379,9 +347,9 @@ static struct inet6_dev *ipv6_add_dev(struct net_device *dev)
}
if (snmp6_register_dev(ndev) < 0) {
- ADBG((KERN_WARNING
+ ADBG(KERN_WARNING
"%s: cannot create /proc/net/dev_snmp6/%s\n",
- __func__, dev->name));
+ __func__, dev->name);
neigh_parms_release(&nd_tbl, ndev->nd_parms);
ndev->dead = 1;
in6_dev_finish_destroy(ndev);
@@ -844,7 +812,7 @@ ipv6_add_addr(struct inet6_dev *idev, const struct in6_addr *addr,
/* Ignore adding duplicate addresses on an interface */
if (ipv6_chk_same_addr(dev_net(idev->dev), addr, idev->dev)) {
- ADBG(("ipv6_add_addr: already assigned\n"));
+ ADBG("ipv6_add_addr: already assigned\n");
err = -EEXIST;
goto out;
}
@@ -852,7 +820,7 @@ ipv6_add_addr(struct inet6_dev *idev, const struct in6_addr *addr,
ifa = kzalloc(sizeof(struct inet6_ifaddr), GFP_ATOMIC);
if (ifa == NULL) {
- ADBG(("ipv6_add_addr: malloc failed\n"));
+ ADBG("ipv6_add_addr: malloc failed\n");
err = -ENOBUFS;
goto out;
}
@@ -1054,7 +1022,6 @@ static int ipv6_create_tempaddr(struct inet6_ifaddr *ifp, struct inet6_ifaddr *i
unsigned long regen_advance;
int tmp_plen;
int ret = 0;
- int max_addresses;
u32 addr_flags;
unsigned long now = jiffies;
@@ -1100,7 +1067,6 @@ retry:
idev->cnf.temp_prefered_lft + age -
idev->cnf.max_desync_factor);
tmp_plen = ifp->prefix_len;
- max_addresses = idev->cnf.max_addresses;
tmp_tstamp = ifp->tstamp;
spin_unlock_bh(&ifp->lock);
@@ -1533,6 +1499,33 @@ static bool ipv6_chk_same_addr(struct net *net, const struct in6_addr *addr,
return false;
}
+/* Compares an address/prefix_len with addresses on device @dev.
+ * If one is found it returns true.
+ */
+bool ipv6_chk_custom_prefix(const struct in6_addr *addr,
+ const unsigned int prefix_len, struct net_device *dev)
+{
+ struct inet6_dev *idev;
+ struct inet6_ifaddr *ifa;
+ bool ret = false;
+
+ rcu_read_lock();
+ idev = __in6_dev_get(dev);
+ if (idev) {
+ read_lock_bh(&idev->lock);
+ list_for_each_entry(ifa, &idev->addr_list, if_list) {
+ ret = ipv6_prefix_equal(addr, &ifa->addr, prefix_len);
+ if (ret)
+ break;
+ }
+ read_unlock_bh(&idev->lock);
+ }
+ rcu_read_unlock();
+
+ return ret;
+}
+EXPORT_SYMBOL(ipv6_chk_custom_prefix);
+
int ipv6_chk_prefix(const struct in6_addr *addr, struct net_device *dev)
{
struct inet6_dev *idev;
@@ -1807,6 +1800,16 @@ static int addrconf_ifid_gre(u8 *eui, struct net_device *dev)
return __ipv6_isatap_ifid(eui, *(__be32 *)dev->dev_addr);
}
+static int addrconf_ifid_ip6tnl(u8 *eui, struct net_device *dev)
+{
+ memcpy(eui, dev->perm_addr, 3);
+ memcpy(eui + 5, dev->perm_addr + 3, 3);
+ eui[3] = 0xFF;
+ eui[4] = 0xFE;
+ eui[0] ^= 2;
+ return 0;
+}
+
static int ipv6_generate_eui64(u8 *eui, struct net_device *dev)
{
switch (dev->type) {
@@ -1825,6 +1828,8 @@ static int ipv6_generate_eui64(u8 *eui, struct net_device *dev)
return addrconf_ifid_eui64(eui, dev);
case ARPHRD_IEEE1394:
return addrconf_ifid_ieee1394(eui, dev);
+ case ARPHRD_TUNNEL6:
+ return addrconf_ifid_ip6tnl(eui, dev);
}
return -1;
}
@@ -2050,7 +2055,7 @@ void addrconf_prefix_rcv(struct net_device *dev, u8 *opt, int len, bool sllao)
pinfo = (struct prefix_info *) opt;
if (len < sizeof(struct prefix_info)) {
- ADBG(("addrconf: prefix option too short\n"));
+ ADBG("addrconf: prefix option too short\n");
return;
}
@@ -2215,43 +2220,21 @@ ok:
else
stored_lft = 0;
if (!update_lft && !create && stored_lft) {
- if (valid_lft > MIN_VALID_LIFETIME ||
- valid_lft > stored_lft)
- update_lft = 1;
- else if (stored_lft <= MIN_VALID_LIFETIME) {
- /* valid_lft <= stored_lft is always true */
- /*
- * RFC 4862 Section 5.5.3e:
- * "Note that the preferred lifetime of
- * the corresponding address is always
- * reset to the Preferred Lifetime in
- * the received Prefix Information
- * option, regardless of whether the
- * valid lifetime is also reset or
- * ignored."
- *
- * So if the preferred lifetime in
- * this advertisement is different
- * than what we have stored, but the
- * valid lifetime is invalid, just
- * reset prefered_lft.
- *
- * We must set the valid lifetime
- * to the stored lifetime since we'll
- * be updating the timestamp below,
- * else we'll set it back to the
- * minimum.
- */
- if (prefered_lft != ifp->prefered_lft) {
- valid_lft = stored_lft;
- update_lft = 1;
- }
- } else {
- valid_lft = MIN_VALID_LIFETIME;
- if (valid_lft < prefered_lft)
- prefered_lft = valid_lft;
- update_lft = 1;
- }
+ const u32 minimum_lft = min(
+ stored_lft, (u32)MIN_VALID_LIFETIME);
+ valid_lft = max(valid_lft, minimum_lft);
+
+ /* RFC4862 Section 5.5.3e:
+ * "Note that the preferred lifetime of the
+ * corresponding address is always reset to
+ * the Preferred Lifetime in the received
+ * Prefix Information option, regardless of
+ * whether the valid lifetime is also reset or
+ * ignored."
+ *
+ * So we should always update prefered_lft here.
+ */
+ update_lft = 1;
}
if (update_lft) {
@@ -2702,7 +2685,8 @@ static void addrconf_dev_config(struct net_device *dev)
(dev->type != ARPHRD_ARCNET) &&
(dev->type != ARPHRD_INFINIBAND) &&
(dev->type != ARPHRD_IEEE802154) &&
- (dev->type != ARPHRD_IEEE1394)) {
+ (dev->type != ARPHRD_IEEE1394) &&
+ (dev->type != ARPHRD_TUNNEL6)) {
/* Alas, we support only Ethernet autoconfiguration. */
return;
}
@@ -2788,44 +2772,6 @@ ipv6_inherit_linklocal(struct inet6_dev *idev, struct net_device *link_dev)
return -1;
}
-static void ip6_tnl_add_linklocal(struct inet6_dev *idev)
-{
- struct net_device *link_dev;
- struct net *net = dev_net(idev->dev);
-
- /* first try to inherit the link-local address from the link device */
- if (idev->dev->iflink &&
- (link_dev = __dev_get_by_index(net, idev->dev->iflink))) {
- if (!ipv6_inherit_linklocal(idev, link_dev))
- return;
- }
- /* then try to inherit it from any device */
- for_each_netdev(net, link_dev) {
- if (!ipv6_inherit_linklocal(idev, link_dev))
- return;
- }
- pr_debug("init ip6-ip6: add_linklocal failed\n");
-}
-
-/*
- * Autoconfigure tunnel with a link-local address so routing protocols,
- * DHCPv6, MLD etc. can be run over the virtual link
- */
-
-static void addrconf_ip6_tnl_config(struct net_device *dev)
-{
- struct inet6_dev *idev;
-
- ASSERT_RTNL();
-
- idev = addrconf_add_dev(dev);
- if (IS_ERR(idev)) {
- pr_debug("init ip6-ip6: add_dev failed\n");
- return;
- }
- ip6_tnl_add_linklocal(idev);
-}
-
static int addrconf_notify(struct notifier_block *this, unsigned long event,
void *ptr)
{
@@ -2893,9 +2839,6 @@ static int addrconf_notify(struct notifier_block *this, unsigned long event,
addrconf_gre_config(dev);
break;
#endif
- case ARPHRD_TUNNEL6:
- addrconf_ip6_tnl_config(dev);
- break;
case ARPHRD_LOOPBACK:
init_loopback(dev);
break;
@@ -3120,6 +3063,7 @@ static int addrconf_ifdown(struct net_device *dev, int how)
static void addrconf_rs_timer(unsigned long data)
{
struct inet6_dev *idev = (struct inet6_dev *)data;
+ struct net_device *dev = idev->dev;
struct in6_addr lladdr;
write_lock(&idev->lock);
@@ -3134,12 +3078,14 @@ static void addrconf_rs_timer(unsigned long data)
goto out;
if (idev->rs_probes++ < idev->cnf.rtr_solicits) {
- if (!__ipv6_get_lladdr(idev, &lladdr, IFA_F_TENTATIVE))
- ndisc_send_rs(idev->dev, &lladdr,
+ write_unlock(&idev->lock);
+ if (!ipv6_get_lladdr(dev, &lladdr, IFA_F_TENTATIVE))
+ ndisc_send_rs(dev, &lladdr,
&in6addr_linklocal_allrouters);
else
- goto out;
+ goto put;
+ write_lock(&idev->lock);
/* The wait after the last probe can be shorter */
addrconf_mod_rs_timer(idev, (idev->rs_probes ==
idev->cnf.rtr_solicits) ?
@@ -3155,6 +3101,7 @@ static void addrconf_rs_timer(unsigned long data)
out:
write_unlock(&idev->lock);
+put:
in6_dev_put(idev);
}
@@ -3630,8 +3577,8 @@ restart:
if (time_before(next_sched, jiffies + ADDRCONF_TIMER_FUZZ_MAX))
next_sched = jiffies + ADDRCONF_TIMER_FUZZ_MAX;
- ADBG((KERN_DEBUG "now = %lu, schedule = %lu, rounded schedule = %lu => %lu\n",
- now, next, next_sec, next_sched));
+ ADBG(KERN_DEBUG "now = %lu, schedule = %lu, rounded schedule = %lu => %lu\n",
+ now, next, next_sec, next_sched);
addr_chk_timer.expires = next_sched;
add_timer(&addr_chk_timer);
@@ -4177,6 +4124,10 @@ static inline void ipv6_store_devconf(struct ipv6_devconf *cnf,
array[DEVCONF_RTR_SOLICIT_DELAY] =
jiffies_to_msecs(cnf->rtr_solicit_delay);
array[DEVCONF_FORCE_MLD_VERSION] = cnf->force_mld_version;
+ array[DEVCONF_MLDV1_UNSOLICITED_REPORT_INTERVAL] =
+ jiffies_to_msecs(cnf->mldv1_unsolicited_report_interval);
+ array[DEVCONF_MLDV2_UNSOLICITED_REPORT_INTERVAL] =
+ jiffies_to_msecs(cnf->mldv2_unsolicited_report_interval);
#ifdef CONFIG_IPV6_PRIVACY
array[DEVCONF_USE_TEMPADDR] = cnf->use_tempaddr;
array[DEVCONF_TEMP_VALID_LFT] = cnf->temp_valid_lft;
@@ -4207,6 +4158,7 @@ static inline void ipv6_store_devconf(struct ipv6_devconf *cnf,
array[DEVCONF_ACCEPT_DAD] = cnf->accept_dad;
array[DEVCONF_FORCE_TLLAO] = cnf->force_tllao;
array[DEVCONF_NDISC_NOTIFY] = cnf->ndisc_notify;
+ array[DEVCONF_SUPPRESS_FRAG_NDISC] = cnf->suppress_frag_ndisc;
}
static inline size_t inet6_ifla6_size(void)
@@ -4652,6 +4604,7 @@ static void __ipv6_ifa_notify(int event, struct inet6_ifaddr *ifp)
break;
}
atomic_inc(&net->ipv6.dev_addr_genid);
+ rt_genid_bump_ipv6(net);
}
static void ipv6_ifa_notify(int event, struct inet6_ifaddr *ifp)
@@ -4859,6 +4812,22 @@ static struct addrconf_sysctl_table
.mode = 0644,
.proc_handler = proc_dointvec,
},
+ {
+ .procname = "mldv1_unsolicited_report_interval",
+ .data =
+ &ipv6_devconf.mldv1_unsolicited_report_interval,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec_ms_jiffies,
+ },
+ {
+ .procname = "mldv2_unsolicited_report_interval",
+ .data =
+ &ipv6_devconf.mldv2_unsolicited_report_interval,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec_ms_jiffies,
+ },
#ifdef CONFIG_IPV6_PRIVACY
{
.procname = "use_tempaddr",
@@ -5004,6 +4973,13 @@ static struct addrconf_sysctl_table
.proc_handler = proc_dointvec
},
{
+ .procname = "suppress_frag_ndisc",
+ .data = &ipv6_devconf.suppress_frag_ndisc,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec
+ },
+ {
/* sentinel */
}
},
diff --git a/net/ipv6/addrconf_core.c b/net/ipv6/addrconf_core.c
index d2f87427244b..4c11cbcf8308 100644
--- a/net/ipv6/addrconf_core.c
+++ b/net/ipv6/addrconf_core.c
@@ -6,6 +6,7 @@
#include <linux/export.h>
#include <net/ipv6.h>
#include <net/addrconf.h>
+#include <net/ip.h>
#define IPV6_ADDR_SCOPE_TYPE(scope) ((scope) << 16)
@@ -98,3 +99,52 @@ int inet6addr_notifier_call_chain(unsigned long val, void *v)
return atomic_notifier_call_chain(&inet6addr_chain, val, v);
}
EXPORT_SYMBOL(inet6addr_notifier_call_chain);
+
+const struct ipv6_stub *ipv6_stub __read_mostly;
+EXPORT_SYMBOL_GPL(ipv6_stub);
+
+/* IPv6 Wildcard Address and Loopback Address defined by RFC2553 */
+const struct in6_addr in6addr_loopback = IN6ADDR_LOOPBACK_INIT;
+EXPORT_SYMBOL(in6addr_loopback);
+const struct in6_addr in6addr_any = IN6ADDR_ANY_INIT;
+EXPORT_SYMBOL(in6addr_any);
+const struct in6_addr in6addr_linklocal_allnodes = IN6ADDR_LINKLOCAL_ALLNODES_INIT;
+EXPORT_SYMBOL(in6addr_linklocal_allnodes);
+const struct in6_addr in6addr_linklocal_allrouters = IN6ADDR_LINKLOCAL_ALLROUTERS_INIT;
+EXPORT_SYMBOL(in6addr_linklocal_allrouters);
+const struct in6_addr in6addr_interfacelocal_allnodes = IN6ADDR_INTERFACELOCAL_ALLNODES_INIT;
+EXPORT_SYMBOL(in6addr_interfacelocal_allnodes);
+const struct in6_addr in6addr_interfacelocal_allrouters = IN6ADDR_INTERFACELOCAL_ALLROUTERS_INIT;
+EXPORT_SYMBOL(in6addr_interfacelocal_allrouters);
+const struct in6_addr in6addr_sitelocal_allrouters = IN6ADDR_SITELOCAL_ALLROUTERS_INIT;
+EXPORT_SYMBOL(in6addr_sitelocal_allrouters);
+
+static void snmp6_free_dev(struct inet6_dev *idev)
+{
+ kfree(idev->stats.icmpv6msgdev);
+ kfree(idev->stats.icmpv6dev);
+ snmp_mib_free((void __percpu **)idev->stats.ipv6);
+}
+
+/* Nobody refers to this device, we may destroy it. */
+
+void in6_dev_finish_destroy(struct inet6_dev *idev)
+{
+ struct net_device *dev = idev->dev;
+
+ WARN_ON(!list_empty(&idev->addr_list));
+ WARN_ON(idev->mc_list != NULL);
+ WARN_ON(timer_pending(&idev->rs_timer));
+
+#ifdef NET_REFCNT_DEBUG
+ pr_debug("%s: %s\n", __func__, dev ? dev->name : "NIL");
+#endif
+ dev_put(dev);
+ if (!idev->dead) {
+ pr_warn("Freeing alive inet6 device %p\n", idev);
+ return;
+ }
+ snmp6_free_dev(idev);
+ kfree_rcu(idev, rcu);
+}
+EXPORT_SYMBOL(in6_dev_finish_destroy);
diff --git a/net/ipv6/addrlabel.c b/net/ipv6/addrlabel.c
index f083a583a05c..b30ad3741b46 100644
--- a/net/ipv6/addrlabel.c
+++ b/net/ipv6/addrlabel.c
@@ -251,38 +251,36 @@ static struct ip6addrlbl_entry *ip6addrlbl_alloc(struct net *net,
/* add a label */
static int __ip6addrlbl_add(struct ip6addrlbl_entry *newp, int replace)
{
+ struct hlist_node *n;
+ struct ip6addrlbl_entry *last = NULL, *p = NULL;
int ret = 0;
- ADDRLABEL(KERN_DEBUG "%s(newp=%p, replace=%d)\n",
- __func__,
- newp, replace);
+ ADDRLABEL(KERN_DEBUG "%s(newp=%p, replace=%d)\n", __func__, newp,
+ replace);
- if (hlist_empty(&ip6addrlbl_table.head)) {
- hlist_add_head_rcu(&newp->list, &ip6addrlbl_table.head);
- } else {
- struct hlist_node *n;
- struct ip6addrlbl_entry *p = NULL;
- hlist_for_each_entry_safe(p, n,
- &ip6addrlbl_table.head, list) {
- if (p->prefixlen == newp->prefixlen &&
- net_eq(ip6addrlbl_net(p), ip6addrlbl_net(newp)) &&
- p->ifindex == newp->ifindex &&
- ipv6_addr_equal(&p->prefix, &newp->prefix)) {
- if (!replace) {
- ret = -EEXIST;
- goto out;
- }
- hlist_replace_rcu(&p->list, &newp->list);
- ip6addrlbl_put(p);
- goto out;
- } else if ((p->prefixlen == newp->prefixlen && !p->ifindex) ||
- (p->prefixlen < newp->prefixlen)) {
- hlist_add_before_rcu(&newp->list, &p->list);
+ hlist_for_each_entry_safe(p, n, &ip6addrlbl_table.head, list) {
+ if (p->prefixlen == newp->prefixlen &&
+ net_eq(ip6addrlbl_net(p), ip6addrlbl_net(newp)) &&
+ p->ifindex == newp->ifindex &&
+ ipv6_addr_equal(&p->prefix, &newp->prefix)) {
+ if (!replace) {
+ ret = -EEXIST;
goto out;
}
+ hlist_replace_rcu(&p->list, &newp->list);
+ ip6addrlbl_put(p);
+ goto out;
+ } else if ((p->prefixlen == newp->prefixlen && !p->ifindex) ||
+ (p->prefixlen < newp->prefixlen)) {
+ hlist_add_before_rcu(&newp->list, &p->list);
+ goto out;
}
- hlist_add_after_rcu(&p->list, &newp->list);
+ last = p;
}
+ if (last)
+ hlist_add_after_rcu(&last->list, &newp->list);
+ else
+ hlist_add_head_rcu(&newp->list, &ip6addrlbl_table.head);
out:
if (!ret)
ip6addrlbl_table.seq++;
diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c
index a5ac969aeefe..7c96100b021e 100644
--- a/net/ipv6/af_inet6.c
+++ b/net/ipv6/af_inet6.c
@@ -56,6 +56,7 @@
#include <net/transp_v6.h>
#include <net/ip6_route.h>
#include <net/addrconf.h>
+#include <net/ndisc.h>
#ifdef CONFIG_IPV6_TUNNEL
#include <net/ip6_tunnel.h>
#endif
@@ -766,6 +767,7 @@ static int __net_init inet6_net_init(struct net *net)
net->ipv6.sysctl.bindv6only = 0;
net->ipv6.sysctl.icmpv6_time = 1*HZ;
+ atomic_set(&net->ipv6.rt_genid, 0);
err = ipv6_init_mibs(net);
if (err)
@@ -809,6 +811,15 @@ static struct pernet_operations inet6_net_ops = {
.exit = inet6_net_exit,
};
+static const struct ipv6_stub ipv6_stub_impl = {
+ .ipv6_sock_mc_join = ipv6_sock_mc_join,
+ .ipv6_sock_mc_drop = ipv6_sock_mc_drop,
+ .ipv6_dst_lookup = ip6_dst_lookup,
+ .udpv6_encap_enable = udpv6_encap_enable,
+ .ndisc_send_na = ndisc_send_na,
+ .nd_tbl = &nd_tbl,
+};
+
static int __init inet6_init(void)
{
struct list_head *r;
@@ -883,6 +894,9 @@ static int __init inet6_init(void)
err = igmp6_init();
if (err)
goto igmp_fail;
+
+ ipv6_stub = &ipv6_stub_impl;
+
err = ipv6_netfilter_init();
if (err)
goto netfilter_fail;
@@ -901,6 +915,9 @@ static int __init inet6_init(void)
err = ip6_route_init();
if (err)
goto ip6_route_fail;
+ err = ndisc_late_init();
+ if (err)
+ goto ndisc_late_fail;
err = ip6_flowlabel_init();
if (err)
goto ip6_flowlabel_fail;
@@ -967,6 +984,8 @@ ipv6_exthdrs_fail:
addrconf_fail:
ip6_flowlabel_cleanup();
ip6_flowlabel_fail:
+ ndisc_late_cleanup();
+ndisc_late_fail:
ip6_route_cleanup();
ip6_route_fail:
#ifdef CONFIG_PROC_FS
@@ -1029,6 +1048,7 @@ static void __exit inet6_exit(void)
ipv6_exthdrs_exit();
addrconf_cleanup();
ip6_flowlabel_cleanup();
+ ndisc_late_cleanup();
ip6_route_cleanup();
#ifdef CONFIG_PROC_FS
@@ -1039,6 +1059,7 @@ static void __exit inet6_exit(void)
raw6_proc_exit();
#endif
ipv6_netfilter_fini();
+ ipv6_stub = NULL;
igmp6_cleanup();
ndisc_cleanup();
ip6_mr_cleanup();
diff --git a/net/ipv6/ah6.c b/net/ipv6/ah6.c
index bb02e176cb70..82e1da3a40b9 100644
--- a/net/ipv6/ah6.c
+++ b/net/ipv6/ah6.c
@@ -618,8 +618,7 @@ static void ah6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
struct ip_auth_hdr *ah = (struct ip_auth_hdr*)(skb->data+offset);
struct xfrm_state *x;
- if (type != ICMPV6_DEST_UNREACH &&
- type != ICMPV6_PKT_TOOBIG &&
+ if (type != ICMPV6_PKT_TOOBIG &&
type != NDISC_REDIRECT)
return;
@@ -628,7 +627,7 @@ static void ah6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
return;
if (type == NDISC_REDIRECT)
- ip6_redirect(skb, net, 0, 0);
+ ip6_redirect(skb, net, skb->dev->ifindex, 0);
else
ip6_update_pmtu(skb, net, info, 0, 0);
xfrm_state_put(x);
diff --git a/net/ipv6/datagram.c b/net/ipv6/datagram.c
index 197e6f4a2b74..48b6bd2a9a14 100644
--- a/net/ipv6/datagram.c
+++ b/net/ipv6/datagram.c
@@ -890,7 +890,7 @@ void ip6_dgram_sock_seq_show(struct seq_file *seq, struct sock *sp,
src = &np->rcv_saddr;
seq_printf(seq,
"%5d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
- "%02X %08X:%08X %02X:%08lX %08X %5d %8d %lu %d %pK %d\n",
+ "%02X %08X:%08X %02X:%08lX %08X %5u %8d %lu %d %pK %d\n",
bucket,
src->s6_addr32[0], src->s6_addr32[1],
src->s6_addr32[2], src->s6_addr32[3], srcp,
diff --git a/net/ipv6/esp6.c b/net/ipv6/esp6.c
index aeac0dc3635d..e67e63f9858d 100644
--- a/net/ipv6/esp6.c
+++ b/net/ipv6/esp6.c
@@ -436,8 +436,7 @@ static void esp6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
struct ip_esp_hdr *esph = (struct ip_esp_hdr *)(skb->data + offset);
struct xfrm_state *x;
- if (type != ICMPV6_DEST_UNREACH &&
- type != ICMPV6_PKT_TOOBIG &&
+ if (type != ICMPV6_PKT_TOOBIG &&
type != NDISC_REDIRECT)
return;
@@ -447,7 +446,7 @@ static void esp6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
return;
if (type == NDISC_REDIRECT)
- ip6_redirect(skb, net, 0, 0);
+ ip6_redirect(skb, net, skb->dev->ifindex, 0);
else
ip6_update_pmtu(skb, net, info, 0, 0);
xfrm_state_put(x);
diff --git a/net/ipv6/exthdrs.c b/net/ipv6/exthdrs.c
index 07a7d65a7cb6..8d67900aa003 100644
--- a/net/ipv6/exthdrs.c
+++ b/net/ipv6/exthdrs.c
@@ -162,12 +162,6 @@ static bool ip6_parse_tlv(const struct tlvtype_proc *procs, struct sk_buff *skb)
off += optlen;
len -= optlen;
}
- /* This case will not be caught by above check since its padding
- * length is smaller than 7:
- * 1 byte NH + 1 byte Length + 6 bytes Padding
- */
- if ((padlen == 6) && ((off - skb_network_header_len(skb)) == 8))
- goto bad;
if (len == 0)
return true;
diff --git a/net/ipv6/fib6_rules.c b/net/ipv6/fib6_rules.c
index 2e1a432867c0..e27591635f92 100644
--- a/net/ipv6/fib6_rules.c
+++ b/net/ipv6/fib6_rules.c
@@ -55,26 +55,33 @@ static int fib6_rule_action(struct fib_rule *rule, struct flowi *flp,
struct fib6_table *table;
struct net *net = rule->fr_net;
pol_lookup_t lookup = arg->lookup_ptr;
+ int err = 0;
switch (rule->action) {
case FR_ACT_TO_TBL:
break;
case FR_ACT_UNREACHABLE:
+ err = -ENETUNREACH;
rt = net->ipv6.ip6_null_entry;
goto discard_pkt;
default:
case FR_ACT_BLACKHOLE:
+ err = -EINVAL;
rt = net->ipv6.ip6_blk_hole_entry;
goto discard_pkt;
case FR_ACT_PROHIBIT:
+ err = -EACCES;
rt = net->ipv6.ip6_prohibit_entry;
goto discard_pkt;
}
table = fib6_get_table(net, rule->table);
- if (table)
- rt = lookup(net, table, flp6, flags);
+ if (!table) {
+ err = -EAGAIN;
+ goto out;
+ }
+ rt = lookup(net, table, flp6, flags);
if (rt != net->ipv6.ip6_null_entry) {
struct fib6_rule *r = (struct fib6_rule *)rule;
@@ -101,6 +108,7 @@ static int fib6_rule_action(struct fib_rule *rule, struct flowi *flp,
}
again:
ip6_rt_put(rt);
+ err = -EAGAIN;
rt = NULL;
goto out;
@@ -108,9 +116,31 @@ discard_pkt:
dst_hold(&rt->dst);
out:
arg->result = rt;
- return rt == NULL ? -EAGAIN : 0;
+ return err;
}
+static bool fib6_rule_suppress(struct fib_rule *rule, struct fib_lookup_arg *arg)
+{
+ struct rt6_info *rt = (struct rt6_info *) arg->result;
+ struct net_device *dev = rt->rt6i_idev->dev;
+ /* do not accept result if the route does
+ * not meet the required prefix length
+ */
+ if (rt->rt6i_dst.plen <= rule->suppress_prefixlen)
+ goto suppress_route;
+
+ /* do not accept result if the route uses a device
+ * belonging to a forbidden interface group
+ */
+ if (rule->suppress_ifgroup != -1 && dev && dev->group == rule->suppress_ifgroup)
+ goto suppress_route;
+
+ return false;
+
+suppress_route:
+ ip6_rt_put(rt);
+ return true;
+}
static int fib6_rule_match(struct fib_rule *rule, struct flowi *fl, int flags)
{
@@ -244,6 +274,7 @@ static const struct fib_rules_ops __net_initconst fib6_rules_ops_template = {
.addr_size = sizeof(struct in6_addr),
.action = fib6_rule_action,
.match = fib6_rule_match,
+ .suppress = fib6_rule_suppress,
.configure = fib6_rule_configure,
.compare = fib6_rule_compare,
.fill = fib6_rule_fill,
diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c
index 7cfc8d284870..eef8d945b362 100644
--- a/net/ipv6/icmp.c
+++ b/net/ipv6/icmp.c
@@ -92,7 +92,7 @@ static void icmpv6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
if (type == ICMPV6_PKT_TOOBIG)
ip6_update_pmtu(skb, net, info, 0, 0);
else if (type == NDISC_REDIRECT)
- ip6_redirect(skb, net, 0, 0);
+ ip6_redirect(skb, net, skb->dev->ifindex, 0);
if (!(type & ICMPV6_INFOMSG_MASK))
if (icmp6->icmp6_type == ICMPV6_ECHO_REQUEST)
@@ -940,6 +940,14 @@ static const struct icmp6_err {
.err = ECONNREFUSED,
.fatal = 1,
},
+ { /* POLICY_FAIL */
+ .err = EACCES,
+ .fatal = 1,
+ },
+ { /* REJECT_ROUTE */
+ .err = EACCES,
+ .fatal = 1,
+ },
};
int icmpv6_err_convert(u8 type, u8 code, int *err)
@@ -951,7 +959,7 @@ int icmpv6_err_convert(u8 type, u8 code, int *err)
switch (type) {
case ICMPV6_DEST_UNREACH:
fatal = 1;
- if (code <= ICMPV6_PORT_UNREACH) {
+ if (code < ARRAY_SIZE(tab_unreach)) {
*err = tab_unreach[code].err;
fatal = tab_unreach[code].fatal;
}
diff --git a/net/ipv6/inet6_hashtables.c b/net/ipv6/inet6_hashtables.c
index 32b4a1675d82..066640e0ba8e 100644
--- a/net/ipv6/inet6_hashtables.c
+++ b/net/ipv6/inet6_hashtables.c
@@ -116,7 +116,7 @@ begintw:
}
if (unlikely(!INET6_TW_MATCH(sk, net, saddr, daddr,
ports, dif))) {
- sock_put(sk);
+ inet_twsk_put(inet_twsk(sk));
goto begintw;
}
goto out;
diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c
index c4ff5bbb45c4..5bec666aba61 100644
--- a/net/ipv6/ip6_fib.c
+++ b/net/ipv6/ip6_fib.c
@@ -425,8 +425,8 @@ out:
* node.
*/
-static struct fib6_node * fib6_add_1(struct fib6_node *root, void *addr,
- int addrlen, int plen,
+static struct fib6_node *fib6_add_1(struct fib6_node *root,
+ struct in6_addr *addr, int plen,
int offset, int allow_create,
int replace_required)
{
@@ -543,7 +543,7 @@ insert_above:
but if it is >= plen, the value is ignored in any case.
*/
- bit = __ipv6_addr_diff(addr, &key->addr, addrlen);
+ bit = __ipv6_addr_diff(addr, &key->addr, sizeof(*addr));
/*
* (intermediate)[in]
@@ -822,12 +822,12 @@ int fib6_add(struct fib6_node *root, struct rt6_info *rt, struct nl_info *info)
if (!allow_create && !replace_required)
pr_warn("RTM_NEWROUTE with no NLM_F_CREATE or NLM_F_REPLACE\n");
- fn = fib6_add_1(root, &rt->rt6i_dst.addr, sizeof(struct in6_addr),
- rt->rt6i_dst.plen, offsetof(struct rt6_info, rt6i_dst),
- allow_create, replace_required);
-
+ fn = fib6_add_1(root, &rt->rt6i_dst.addr, rt->rt6i_dst.plen,
+ offsetof(struct rt6_info, rt6i_dst), allow_create,
+ replace_required);
if (IS_ERR(fn)) {
err = PTR_ERR(fn);
+ fn = NULL;
goto out;
}
@@ -863,7 +863,7 @@ int fib6_add(struct fib6_node *root, struct rt6_info *rt, struct nl_info *info)
/* Now add the first leaf node to new subtree */
sn = fib6_add_1(sfn, &rt->rt6i_src.addr,
- sizeof(struct in6_addr), rt->rt6i_src.plen,
+ rt->rt6i_src.plen,
offsetof(struct rt6_info, rt6i_src),
allow_create, replace_required);
@@ -882,7 +882,7 @@ int fib6_add(struct fib6_node *root, struct rt6_info *rt, struct nl_info *info)
fn->subtree = sfn;
} else {
sn = fib6_add_1(fn->subtree, &rt->rt6i_src.addr,
- sizeof(struct in6_addr), rt->rt6i_src.plen,
+ rt->rt6i_src.plen,
offsetof(struct rt6_info, rt6i_src),
allow_create, replace_required);
diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c
index 90747f1973fe..bf4a9a084de5 100644
--- a/net/ipv6/ip6_gre.c
+++ b/net/ipv6/ip6_gre.c
@@ -335,6 +335,7 @@ static struct ip6_tnl *ip6gre_tunnel_locate(struct net *net,
dev->rtnl_link_ops = &ip6gre_link_ops;
nt->dev = dev;
+ nt->net = dev_net(dev);
ip6gre_tnl_link_config(nt, 1);
if (register_netdevice(dev) < 0)
@@ -508,8 +509,6 @@ static int ip6gre_rcv(struct sk_buff *skb)
goto drop;
}
- secpath_reset(skb);
-
skb->protocol = gre_proto;
/* WCCP version 1 and 2 protocol decoding.
* - Change protocol to IP
@@ -524,7 +523,6 @@ static int ip6gre_rcv(struct sk_buff *skb)
skb->mac_header = skb->network_header;
__pskb_pull(skb, offset);
skb_postpull_rcsum(skb, skb_transport_header(skb), offset);
- skb->pkt_type = PACKET_HOST;
if (((flags&GRE_CSUM) && csum) ||
(!(flags&GRE_CSUM) && tunnel->parms.i_flags&GRE_CSUM)) {
@@ -556,7 +554,7 @@ static int ip6gre_rcv(struct sk_buff *skb)
skb_postpull_rcsum(skb, eth_hdr(skb), ETH_HLEN);
}
- __skb_tunnel_rx(skb, tunnel->dev);
+ __skb_tunnel_rx(skb, tunnel->dev, tunnel->net);
skb_reset_network_header(skb);
@@ -620,7 +618,7 @@ static netdev_tx_t ip6gre_xmit2(struct sk_buff *skb,
struct ip6_tnl *tunnel = netdev_priv(dev);
struct net_device *tdev; /* Device to other host */
struct ipv6hdr *ipv6h; /* Our new IP header */
- unsigned int max_headroom; /* The extra header space needed */
+ unsigned int max_headroom = 0; /* The extra header space needed */
int gre_hlen;
struct ipv6_tel_txoption opt;
int mtu;
@@ -693,7 +691,9 @@ static netdev_tx_t ip6gre_xmit2(struct sk_buff *skb,
tunnel->err_count = 0;
}
- max_headroom = LL_RESERVED_SPACE(tdev) + gre_hlen + dst->header_len;
+ skb_scrub_packet(skb, !net_eq(tunnel->net, dev_net(dev)));
+
+ max_headroom += LL_RESERVED_SPACE(tdev) + gre_hlen + dst->header_len;
if (skb_headroom(skb) < max_headroom || skb_shared(skb) ||
(skb_cloned(skb) && !skb_clone_writable(skb, 0))) {
@@ -709,8 +709,6 @@ static netdev_tx_t ip6gre_xmit2(struct sk_buff *skb,
skb = new_skb;
}
- skb_dst_drop(skb);
-
if (fl6->flowi6_mark) {
skb_dst_set(skb, dst);
ndst = NULL;
@@ -978,6 +976,7 @@ static void ip6gre_tnl_link_config(struct ip6_tnl *t, int set_mtu)
if (t->parms.o_flags&GRE_SEQ)
addend += 4;
}
+ t->hlen = addend;
if (p->flags & IP6_TNL_F_CAP_XMIT) {
int strict = (ipv6_addr_type(&p->raddr) &
@@ -1004,8 +1003,6 @@ static void ip6gre_tnl_link_config(struct ip6_tnl *t, int set_mtu)
}
ip6_rt_put(rt);
}
-
- t->hlen = addend;
}
static int ip6gre_tnl_change(struct ip6_tnl *t,
@@ -1175,9 +1172,8 @@ done:
static int ip6gre_tunnel_change_mtu(struct net_device *dev, int new_mtu)
{
- struct ip6_tnl *tunnel = netdev_priv(dev);
if (new_mtu < 68 ||
- new_mtu > 0xFFF8 - dev->hard_header_len - tunnel->hlen)
+ new_mtu > 0xFFF8 - dev->hard_header_len)
return -EINVAL;
dev->mtu = new_mtu;
return 0;
@@ -1260,6 +1256,7 @@ static int ip6gre_tunnel_init(struct net_device *dev)
tunnel = netdev_priv(dev);
tunnel->dev = dev;
+ tunnel->net = dev_net(dev);
strcpy(tunnel->parms.name, dev->name);
memcpy(dev->dev_addr, &tunnel->parms.laddr, sizeof(struct in6_addr));
@@ -1280,6 +1277,7 @@ static void ip6gre_fb_tunnel_init(struct net_device *dev)
struct ip6_tnl *tunnel = netdev_priv(dev);
tunnel->dev = dev;
+ tunnel->net = dev_net(dev);
strcpy(tunnel->parms.name, dev->name);
tunnel->hlen = sizeof(struct ipv6hdr) + 4;
@@ -1455,6 +1453,7 @@ static int ip6gre_tap_init(struct net_device *dev)
tunnel = netdev_priv(dev);
tunnel->dev = dev;
+ tunnel->net = dev_net(dev);
strcpy(tunnel->parms.name, dev->name);
ip6gre_tnl_link_config(tunnel, 1);
@@ -1506,6 +1505,7 @@ static int ip6gre_newlink(struct net *src_net, struct net_device *dev,
eth_hw_addr_random(dev);
nt->dev = dev;
+ nt->net = dev_net(dev);
ip6gre_tnl_link_config(nt, !tb[IFLA_MTU]);
/* Can use a lockless transmit, unless we generate output sequences */
diff --git a/net/ipv6/ip6_input.c b/net/ipv6/ip6_input.c
index 2bab2aa59745..302d6fb1ff2b 100644
--- a/net/ipv6/ip6_input.c
+++ b/net/ipv6/ip6_input.c
@@ -44,7 +44,7 @@
#include <net/ip6_route.h>
#include <net/addrconf.h>
#include <net/xfrm.h>
-
+#include <net/inet_ecn.h>
int ip6_rcv_finish(struct sk_buff *skb)
@@ -109,6 +109,10 @@ int ipv6_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt
if (hdr->version != 6)
goto err;
+ IP6_ADD_STATS_BH(dev_net(dev), idev,
+ IPSTATS_MIB_NOECTPKTS +
+ (ipv6_get_dsfield(hdr) & INET_ECN_MASK),
+ max_t(unsigned short, 1, skb_shinfo(skb)->gso_segs));
/*
* RFC4291 2.5.3
* A packet received on an interface with a destination address
diff --git a/net/ipv6/ip6_offload.c b/net/ipv6/ip6_offload.c
index a263b990ee11..d82de7228100 100644
--- a/net/ipv6/ip6_offload.c
+++ b/net/ipv6/ip6_offload.c
@@ -91,6 +91,7 @@ static struct sk_buff *ipv6_gso_segment(struct sk_buff *skb,
unsigned int unfrag_ip6hlen;
u8 *prevhdr;
int offset = 0;
+ bool tunnel;
if (unlikely(skb_shinfo(skb)->gso_type &
~(SKB_GSO_UDP |
@@ -106,6 +107,7 @@ static struct sk_buff *ipv6_gso_segment(struct sk_buff *skb,
if (unlikely(!pskb_may_pull(skb, sizeof(*ipv6h))))
goto out;
+ tunnel = skb->encapsulation;
ipv6h = ipv6_hdr(skb);
__skb_pull(skb, sizeof(*ipv6h));
segs = ERR_PTR(-EPROTONOSUPPORT);
@@ -126,7 +128,7 @@ static struct sk_buff *ipv6_gso_segment(struct sk_buff *skb,
ipv6h = ipv6_hdr(skb);
ipv6h->payload_len = htons(skb->len - skb->mac_len -
sizeof(*ipv6h));
- if (proto == IPPROTO_UDP) {
+ if (!tunnel && proto == IPPROTO_UDP) {
unfrag_ip6hlen = ip6_find_1stfragopt(skb, &prevhdr);
fptr = (struct frag_hdr *)(skb_network_header(skb) +
unfrag_ip6hlen);
diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index e7ceb6c871d1..91fb4e8212f5 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -56,31 +56,6 @@
#include <net/checksum.h>
#include <linux/mroute6.h>
-int __ip6_local_out(struct sk_buff *skb)
-{
- int len;
-
- len = skb->len - sizeof(struct ipv6hdr);
- if (len > IPV6_MAXPLEN)
- len = 0;
- ipv6_hdr(skb)->payload_len = htons(len);
-
- return nf_hook(NFPROTO_IPV6, NF_INET_LOCAL_OUT, skb, NULL,
- skb_dst(skb)->dev, dst_output);
-}
-
-int ip6_local_out(struct sk_buff *skb)
-{
- int err;
-
- err = __ip6_local_out(skb);
- if (likely(err == 1))
- err = dst_output(skb);
-
- return err;
-}
-EXPORT_SYMBOL_GPL(ip6_local_out);
-
static int ip6_finish_output2(struct sk_buff *skb)
{
struct dst_entry *dst = skb_dst(skb);
@@ -130,7 +105,7 @@ static int ip6_finish_output2(struct sk_buff *skb)
}
rcu_read_lock_bh();
- nexthop = rt6_nexthop((struct rt6_info *)dst, &ipv6_hdr(skb)->daddr);
+ nexthop = rt6_nexthop((struct rt6_info *)dst);
neigh = __ipv6_neigh_lookup_noref(dst->dev, nexthop);
if (unlikely(!neigh))
neigh = __neigh_create(&nd_tbl, nexthop, dst->dev, false);
@@ -899,7 +874,7 @@ static int ip6_dst_lookup_tail(struct sock *sk,
*/
rt = (struct rt6_info *) *dst;
rcu_read_lock_bh();
- n = __ipv6_neigh_lookup_noref(rt->dst.dev, rt6_nexthop(rt, &fl6->daddr));
+ n = __ipv6_neigh_lookup_noref(rt->dst.dev, rt6_nexthop(rt));
err = n && !(n->nud_state & NUD_VALID) ? -EINVAL : 0;
rcu_read_unlock_bh();
@@ -1033,6 +1008,7 @@ static inline int ip6_ufo_append_data(struct sock *sk,
{
struct sk_buff *skb;
+ struct frag_hdr fhdr;
int err;
/* There is support for UDP large send offload by network
@@ -1059,33 +1035,26 @@ static inline int ip6_ufo_append_data(struct sock *sk,
skb->transport_header = skb->network_header + fragheaderlen;
skb->protocol = htons(ETH_P_IPV6);
- skb->ip_summed = CHECKSUM_PARTIAL;
skb->csum = 0;
- }
- err = skb_append_datato_frags(sk,skb, getfrag, from,
- (length - transhdrlen));
- if (!err) {
- struct frag_hdr fhdr;
-
- /* Specify the length of each IPv6 datagram fragment.
- * It has to be a multiple of 8.
- */
- skb_shinfo(skb)->gso_size = (mtu - fragheaderlen -
- sizeof(struct frag_hdr)) & ~7;
- skb_shinfo(skb)->gso_type = SKB_GSO_UDP;
- ipv6_select_ident(&fhdr, rt);
- skb_shinfo(skb)->ip6_frag_id = fhdr.identification;
__skb_queue_tail(&sk->sk_write_queue, skb);
-
- return 0;
+ } else if (skb_is_gso(skb)) {
+ goto append;
}
- /* There is not enough support do UPD LSO,
- * so follow normal path
- */
- kfree_skb(skb);
- return err;
+ skb->ip_summed = CHECKSUM_PARTIAL;
+ /* Specify the length of each IPv6 datagram fragment.
+ * It has to be a multiple of 8.
+ */
+ skb_shinfo(skb)->gso_size = (mtu - fragheaderlen -
+ sizeof(struct frag_hdr)) & ~7;
+ skb_shinfo(skb)->gso_type = SKB_GSO_UDP;
+ ipv6_select_ident(&fhdr, rt);
+ skb_shinfo(skb)->ip6_frag_id = fhdr.identification;
+
+append:
+ return skb_append_datato_frags(sk, skb, getfrag, from,
+ (length - transhdrlen));
}
static inline struct ipv6_opt_hdr *ip6_opt_dup(struct ipv6_opt_hdr *src,
@@ -1252,27 +1221,27 @@ int ip6_append_data(struct sock *sk, int getfrag(void *from, char *to,
* --yoshfuji
*/
- cork->length += length;
- if (length > mtu) {
- int proto = sk->sk_protocol;
- if (dontfrag && (proto == IPPROTO_UDP || proto == IPPROTO_RAW)){
- ipv6_local_rxpmtu(sk, fl6, mtu-exthdrlen);
- return -EMSGSIZE;
- }
-
- if (proto == IPPROTO_UDP &&
- (rt->dst.dev->features & NETIF_F_UFO)) {
+ if ((length > mtu) && dontfrag && (sk->sk_protocol == IPPROTO_UDP ||
+ sk->sk_protocol == IPPROTO_RAW)) {
+ ipv6_local_rxpmtu(sk, fl6, mtu-exthdrlen);
+ return -EMSGSIZE;
+ }
- err = ip6_ufo_append_data(sk, getfrag, from, length,
- hh_len, fragheaderlen,
- transhdrlen, mtu, flags, rt);
- if (err)
- goto error;
- return 0;
- }
+ skb = skb_peek_tail(&sk->sk_write_queue);
+ cork->length += length;
+ if (((length > mtu) ||
+ (skb && skb_is_gso(skb))) &&
+ (sk->sk_protocol == IPPROTO_UDP) &&
+ (rt->dst.dev->features & NETIF_F_UFO)) {
+ err = ip6_ufo_append_data(sk, getfrag, from, length,
+ hh_len, fragheaderlen,
+ transhdrlen, mtu, flags, rt);
+ if (err)
+ goto error;
+ return 0;
}
- if ((skb = skb_peek_tail(&sk->sk_write_queue)) == NULL)
+ if (!skb)
goto alloc_new_skb;
while (length > 0) {
diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c
index 46ba243605a3..583b77e2f69b 100644
--- a/net/ipv6/ip6_tunnel.c
+++ b/net/ipv6/ip6_tunnel.c
@@ -41,6 +41,7 @@
#include <linux/netfilter_ipv6.h>
#include <linux/slab.h>
#include <linux/hash.h>
+#include <linux/etherdevice.h>
#include <asm/uaccess.h>
#include <linux/atomic.h>
@@ -315,6 +316,7 @@ static struct ip6_tnl *ip6_tnl_create(struct net *net, struct __ip6_tnl_parm *p)
t = netdev_priv(dev);
t->parms = *p;
+ t->net = dev_net(dev);
err = ip6_tnl_create2(dev);
if (err < 0)
goto failed_free;
@@ -374,7 +376,7 @@ static void
ip6_tnl_dev_uninit(struct net_device *dev)
{
struct ip6_tnl *t = netdev_priv(dev);
- struct net *net = dev_net(dev);
+ struct net *net = t->net;
struct ip6_tnl_net *ip6n = net_generic(net, ip6_tnl_net_id);
if (dev == ip6n->fb_tnl_dev)
@@ -741,7 +743,7 @@ int ip6_tnl_rcv_ctl(struct ip6_tnl *t,
{
struct __ip6_tnl_parm *p = &t->parms;
int ret = 0;
- struct net *net = dev_net(t->dev);
+ struct net *net = t->net;
if ((p->flags & IP6_TNL_F_CAP_RCV) ||
((p->flags & IP6_TNL_F_CAP_PER_PACKET) &&
@@ -800,14 +802,12 @@ static int ip6_tnl_rcv(struct sk_buff *skb, __u16 protocol,
rcu_read_unlock();
goto discard;
}
- secpath_reset(skb);
skb->mac_header = skb->network_header;
skb_reset_network_header(skb);
skb->protocol = htons(protocol);
- skb->pkt_type = PACKET_HOST;
memset(skb->cb, 0, sizeof(struct inet6_skb_parm));
- __skb_tunnel_rx(skb, t->dev);
+ __skb_tunnel_rx(skb, t->dev, t->net);
err = dscp_ecn_decapsulate(t, ipv6h, skb);
if (unlikely(err)) {
@@ -895,7 +895,7 @@ int ip6_tnl_xmit_ctl(struct ip6_tnl *t)
{
struct __ip6_tnl_parm *p = &t->parms;
int ret = 0;
- struct net *net = dev_net(t->dev);
+ struct net *net = t->net;
if (p->flags & IP6_TNL_F_CAP_XMIT) {
struct net_device *ldev = NULL;
@@ -945,8 +945,8 @@ static int ip6_tnl_xmit2(struct sk_buff *skb,
int encap_limit,
__u32 *pmtu)
{
- struct net *net = dev_net(dev);
struct ip6_tnl *t = netdev_priv(dev);
+ struct net *net = t->net;
struct net_device_stats *stats = &t->dev->stats;
struct ipv6hdr *ipv6h = ipv6_hdr(skb);
struct ipv6_tel_txoption opt;
@@ -996,6 +996,8 @@ static int ip6_tnl_xmit2(struct sk_buff *skb,
goto tx_err_dst_release;
}
+ skb_scrub_packet(skb, !net_eq(t->net, dev_net(dev)));
+
/*
* Okay, now see if we can stuff it in the buffer as-is.
*/
@@ -1013,7 +1015,6 @@ static int ip6_tnl_xmit2(struct sk_buff *skb,
consume_skb(skb);
skb = new_skb;
}
- skb_dst_drop(skb);
if (fl6->flowi6_mark) {
skb_dst_set(skb, dst);
ndst = NULL;
@@ -1208,7 +1209,7 @@ static void ip6_tnl_link_config(struct ip6_tnl *t)
int strict = (ipv6_addr_type(&p->raddr) &
(IPV6_ADDR_MULTICAST|IPV6_ADDR_LINKLOCAL));
- struct rt6_info *rt = rt6_lookup(dev_net(dev),
+ struct rt6_info *rt = rt6_lookup(t->net,
&p->raddr, &p->laddr,
p->link, strict);
@@ -1257,7 +1258,7 @@ ip6_tnl_change(struct ip6_tnl *t, const struct __ip6_tnl_parm *p)
static int ip6_tnl_update(struct ip6_tnl *t, struct __ip6_tnl_parm *p)
{
- struct net *net = dev_net(t->dev);
+ struct net *net = t->net;
struct ip6_tnl_net *ip6n = net_generic(net, ip6_tnl_net_id);
int err;
@@ -1429,9 +1430,17 @@ ip6_tnl_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
static int
ip6_tnl_change_mtu(struct net_device *dev, int new_mtu)
{
- if (new_mtu < IPV6_MIN_MTU) {
- return -EINVAL;
+ struct ip6_tnl *tnl = netdev_priv(dev);
+
+ if (tnl->parms.proto == IPPROTO_IPIP) {
+ if (new_mtu < 68)
+ return -EINVAL;
+ } else {
+ if (new_mtu < IPV6_MIN_MTU)
+ return -EINVAL;
}
+ if (new_mtu > 0xFFF8 - dev->hard_header_len)
+ return -EINVAL;
dev->mtu = new_mtu;
return 0;
}
@@ -1469,8 +1478,10 @@ static void ip6_tnl_dev_setup(struct net_device *dev)
dev->mtu-=8;
dev->flags |= IFF_NOARP;
dev->addr_len = sizeof(struct in6_addr);
- dev->features |= NETIF_F_NETNS_LOCAL;
dev->priv_flags &= ~IFF_XMIT_DST_RELEASE;
+ /* This perm addr will be used as interface identifier by IPv6 */
+ dev->addr_assign_type = NET_ADDR_RANDOM;
+ eth_random_addr(dev->perm_addr);
}
@@ -1485,6 +1496,7 @@ ip6_tnl_dev_init_gen(struct net_device *dev)
struct ip6_tnl *t = netdev_priv(dev);
t->dev = dev;
+ t->net = dev_net(dev);
dev->tstats = alloc_percpu(struct pcpu_tstats);
if (!dev->tstats)
return -ENOMEM;
@@ -1602,9 +1614,9 @@ static int ip6_tnl_newlink(struct net *src_net, struct net_device *dev,
static int ip6_tnl_changelink(struct net_device *dev, struct nlattr *tb[],
struct nlattr *data[])
{
- struct ip6_tnl *t;
+ struct ip6_tnl *t = netdev_priv(dev);
struct __ip6_tnl_parm p;
- struct net *net = dev_net(dev);
+ struct net *net = t->net;
struct ip6_tnl_net *ip6n = net_generic(net, ip6_tnl_net_id);
if (dev == ip6n->fb_tnl_dev)
@@ -1652,9 +1664,9 @@ static int ip6_tnl_fill_info(struct sk_buff *skb, const struct net_device *dev)
if (nla_put_u32(skb, IFLA_IPTUN_LINK, parm->link) ||
nla_put(skb, IFLA_IPTUN_LOCAL, sizeof(struct in6_addr),
- &parm->raddr) ||
- nla_put(skb, IFLA_IPTUN_REMOTE, sizeof(struct in6_addr),
&parm->laddr) ||
+ nla_put(skb, IFLA_IPTUN_REMOTE, sizeof(struct in6_addr),
+ &parm->raddr) ||
nla_put_u8(skb, IFLA_IPTUN_TTL, parm->hop_limit) ||
nla_put_u8(skb, IFLA_IPTUN_ENCAP_LIMIT, parm->encap_limit) ||
nla_put_be32(skb, IFLA_IPTUN_FLOWINFO, parm->flowinfo) ||
@@ -1705,20 +1717,28 @@ static struct xfrm6_tunnel ip6ip6_handler __read_mostly = {
static void __net_exit ip6_tnl_destroy_tunnels(struct ip6_tnl_net *ip6n)
{
+ struct net *net = dev_net(ip6n->fb_tnl_dev);
+ struct net_device *dev, *aux;
int h;
struct ip6_tnl *t;
LIST_HEAD(list);
+ for_each_netdev_safe(net, dev, aux)
+ if (dev->rtnl_link_ops == &ip6_link_ops)
+ unregister_netdevice_queue(dev, &list);
+
for (h = 0; h < HASH_SIZE; h++) {
t = rtnl_dereference(ip6n->tnls_r_l[h]);
while (t != NULL) {
- unregister_netdevice_queue(t->dev, &list);
+ /* If dev is in the same netns, it has already
+ * been added to the list by the previous loop.
+ */
+ if (!net_eq(dev_net(t->dev), net))
+ unregister_netdevice_queue(t->dev, &list);
t = rtnl_dereference(t->next);
}
}
- t = rtnl_dereference(ip6n->tnls_wc[0]);
- unregister_netdevice_queue(t->dev, &list);
unregister_netdevice_many(&list);
}
@@ -1738,6 +1758,11 @@ static int __net_init ip6_tnl_init_net(struct net *net)
if (!ip6n->fb_tnl_dev)
goto err_alloc_dev;
dev_net_set(ip6n->fb_tnl_dev, net);
+ ip6n->fb_tnl_dev->rtnl_link_ops = &ip6_link_ops;
+ /* FB netdevice is special: we have one, and only one per netns.
+ * Allowing to move it to another netns is clearly unsafe.
+ */
+ ip6n->fb_tnl_dev->features |= NETIF_F_NETNS_LOCAL;
err = ip6_fb_tnl_dev_init(ip6n->fb_tnl_dev);
if (err < 0)
diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c
index 03986d31fa41..f365310bfcca 100644
--- a/net/ipv6/ip6mr.c
+++ b/net/ipv6/ip6mr.c
@@ -110,8 +110,8 @@ static struct kmem_cache *mrt_cachep __read_mostly;
static struct mr6_table *ip6mr_new_table(struct net *net, u32 id);
static void ip6mr_free_table(struct mr6_table *mrt);
-static int ip6_mr_forward(struct net *net, struct mr6_table *mrt,
- struct sk_buff *skb, struct mfc6_cache *cache);
+static void ip6_mr_forward(struct net *net, struct mr6_table *mrt,
+ struct sk_buff *skb, struct mfc6_cache *cache);
static int ip6mr_cache_report(struct mr6_table *mrt, struct sk_buff *pkt,
mifi_t mifi, int assert);
static int __ip6mr_fill_mroute(struct mr6_table *mrt, struct sk_buff *skb,
@@ -672,9 +672,8 @@ static int pim6_rcv(struct sk_buff *skb)
skb_reset_network_header(skb);
skb->protocol = htons(ETH_P_IPV6);
skb->ip_summed = CHECKSUM_NONE;
- skb->pkt_type = PACKET_HOST;
- skb_tunnel_rx(skb, reg_dev);
+ skb_tunnel_rx(skb, reg_dev, dev_net(reg_dev));
netif_rx(skb);
@@ -2074,8 +2073,8 @@ static int ip6mr_find_vif(struct mr6_table *mrt, struct net_device *dev)
return ct;
}
-static int ip6_mr_forward(struct net *net, struct mr6_table *mrt,
- struct sk_buff *skb, struct mfc6_cache *cache)
+static void ip6_mr_forward(struct net *net, struct mr6_table *mrt,
+ struct sk_buff *skb, struct mfc6_cache *cache)
{
int psend = -1;
int vif, ct;
@@ -2156,12 +2155,11 @@ forward:
last_forward:
if (psend != -1) {
ip6mr_forward2(net, mrt, skb, cache, psend);
- return 0;
+ return;
}
dont_forward:
kfree_skb(skb);
- return 0;
}
diff --git a/net/ipv6/ipcomp6.c b/net/ipv6/ipcomp6.c
index 7af5aee75d98..ce507d9e1c90 100644
--- a/net/ipv6/ipcomp6.c
+++ b/net/ipv6/ipcomp6.c
@@ -64,8 +64,7 @@ static void ipcomp6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
(struct ip_comp_hdr *)(skb->data + offset);
struct xfrm_state *x;
- if (type != ICMPV6_DEST_UNREACH &&
- type != ICMPV6_PKT_TOOBIG &&
+ if (type != ICMPV6_PKT_TOOBIG &&
type != NDISC_REDIRECT)
return;
@@ -76,7 +75,7 @@ static void ipcomp6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
return;
if (type == NDISC_REDIRECT)
- ip6_redirect(skb, net, 0, 0);
+ ip6_redirect(skb, net, skb->dev->ifindex, 0);
else
ip6_update_pmtu(skb, net, info, 0, 0);
xfrm_state_put(x);
diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c
index 99cd65c715cd..d18f9f903db6 100644
--- a/net/ipv6/mcast.c
+++ b/net/ipv6/mcast.c
@@ -44,6 +44,7 @@
#include <linux/proc_fs.h>
#include <linux/seq_file.h>
#include <linux/slab.h>
+#include <linux/pkt_sched.h>
#include <net/mld.h>
#include <linux/netfilter.h>
@@ -94,6 +95,7 @@ static void mld_ifc_event(struct inet6_dev *idev);
static void mld_add_delrec(struct inet6_dev *idev, struct ifmcaddr6 *pmc);
static void mld_del_delrec(struct inet6_dev *idev, const struct in6_addr *addr);
static void mld_clear_delrec(struct inet6_dev *idev);
+static bool mld_in_v1_mode(const struct inet6_dev *idev);
static int sf_setstate(struct ifmcaddr6 *pmc);
static void sf_markstate(struct ifmcaddr6 *pmc);
static void ip6_mc_clear_src(struct ifmcaddr6 *pmc);
@@ -106,14 +108,15 @@ static int ip6_mc_add_src(struct inet6_dev *idev, const struct in6_addr *pmca,
static int ip6_mc_leave_src(struct sock *sk, struct ipv6_mc_socklist *iml,
struct inet6_dev *idev);
-
-#define IGMP6_UNSOLICITED_IVAL (10*HZ)
#define MLD_QRV_DEFAULT 2
+/* RFC3810, 9.2. Query Interval */
+#define MLD_QI_DEFAULT (125 * HZ)
+/* RFC3810, 9.3. Query Response Interval */
+#define MLD_QRI_DEFAULT (10 * HZ)
-#define MLD_V1_SEEN(idev) (dev_net((idev)->dev)->ipv6.devconf_all->force_mld_version == 1 || \
- (idev)->cnf.force_mld_version == 1 || \
- ((idev)->mc_v1_seen && \
- time_before(jiffies, (idev)->mc_v1_seen)))
+/* RFC3810, 8.1 Query Version Distinctions */
+#define MLD_V1_QUERY_LEN 24
+#define MLD_V2_QUERY_LEN_MIN 28
#define IPV6_MLD_MAX_MSF 64
@@ -128,6 +131,18 @@ int sysctl_mld_max_msf __read_mostly = IPV6_MLD_MAX_MSF;
pmc != NULL; \
pmc = rcu_dereference(pmc->next))
+static int unsolicited_report_interval(struct inet6_dev *idev)
+{
+ int iv;
+
+ if (mld_in_v1_mode(idev))
+ iv = idev->cnf.mldv1_unsolicited_report_interval;
+ else
+ iv = idev->cnf.mldv2_unsolicited_report_interval;
+
+ return iv > 0 ? iv : 1;
+}
+
int ipv6_sock_mc_join(struct sock *sk, int ifindex, const struct in6_addr *addr)
{
struct net_device *dev = NULL;
@@ -676,7 +691,7 @@ static void igmp6_group_added(struct ifmcaddr6 *mc)
if (!(dev->flags & IFF_UP) || (mc->mca_flags & MAF_NOREPORT))
return;
- if (MLD_V1_SEEN(mc->idev)) {
+ if (mld_in_v1_mode(mc->idev)) {
igmp6_join_group(mc);
return;
}
@@ -984,29 +999,49 @@ bool ipv6_chk_mcast_addr(struct net_device *dev, const struct in6_addr *group,
static void mld_gq_start_timer(struct inet6_dev *idev)
{
- int tv = net_random() % idev->mc_maxdelay;
+ unsigned long tv = net_random() % idev->mc_maxdelay;
idev->mc_gq_running = 1;
if (!mod_timer(&idev->mc_gq_timer, jiffies+tv+2))
in6_dev_hold(idev);
}
-static void mld_ifc_start_timer(struct inet6_dev *idev, int delay)
+static void mld_gq_stop_timer(struct inet6_dev *idev)
{
- int tv = net_random() % delay;
+ idev->mc_gq_running = 0;
+ if (del_timer(&idev->mc_gq_timer))
+ __in6_dev_put(idev);
+}
+
+static void mld_ifc_start_timer(struct inet6_dev *idev, unsigned long delay)
+{
+ unsigned long tv = net_random() % delay;
if (!mod_timer(&idev->mc_ifc_timer, jiffies+tv+2))
in6_dev_hold(idev);
}
-static void mld_dad_start_timer(struct inet6_dev *idev, int delay)
+static void mld_ifc_stop_timer(struct inet6_dev *idev)
{
- int tv = net_random() % delay;
+ idev->mc_ifc_count = 0;
+ if (del_timer(&idev->mc_ifc_timer))
+ __in6_dev_put(idev);
+}
+
+static void mld_dad_start_timer(struct inet6_dev *idev, unsigned long delay)
+{
+ unsigned long tv = net_random() % delay;
if (!mod_timer(&idev->mc_dad_timer, jiffies+tv+2))
in6_dev_hold(idev);
}
+static void mld_dad_stop_timer(struct inet6_dev *idev)
+{
+ if (del_timer(&idev->mc_dad_timer))
+ __in6_dev_put(idev);
+}
+
/*
* IGMP handling (alias multicast ICMPv6 messages)
*/
@@ -1025,12 +1060,9 @@ static void igmp6_group_queried(struct ifmcaddr6 *ma, unsigned long resptime)
delay = ma->mca_timer.expires - jiffies;
}
- if (delay >= resptime) {
- if (resptime)
- delay = net_random() % resptime;
- else
- delay = 1;
- }
+ if (delay >= resptime)
+ delay = net_random() % resptime;
+
ma->mca_timer.expires = jiffies + delay;
if (!mod_timer(&ma->mca_timer, jiffies + delay))
atomic_inc(&ma->mca_refcnt);
@@ -1097,6 +1129,158 @@ static bool mld_marksources(struct ifmcaddr6 *pmc, int nsrcs,
return true;
}
+static int mld_force_mld_version(const struct inet6_dev *idev)
+{
+ /* Normally, both are 0 here. If enforcement to a particular is
+ * being used, individual device enforcement will have a lower
+ * precedence over 'all' device (.../conf/all/force_mld_version).
+ */
+
+ if (dev_net(idev->dev)->ipv6.devconf_all->force_mld_version != 0)
+ return dev_net(idev->dev)->ipv6.devconf_all->force_mld_version;
+ else
+ return idev->cnf.force_mld_version;
+}
+
+static bool mld_in_v2_mode_only(const struct inet6_dev *idev)
+{
+ return mld_force_mld_version(idev) == 2;
+}
+
+static bool mld_in_v1_mode_only(const struct inet6_dev *idev)
+{
+ return mld_force_mld_version(idev) == 1;
+}
+
+static bool mld_in_v1_mode(const struct inet6_dev *idev)
+{
+ if (mld_in_v2_mode_only(idev))
+ return false;
+ if (mld_in_v1_mode_only(idev))
+ return true;
+ if (idev->mc_v1_seen && time_before(jiffies, idev->mc_v1_seen))
+ return true;
+
+ return false;
+}
+
+static void mld_set_v1_mode(struct inet6_dev *idev)
+{
+ /* RFC3810, relevant sections:
+ * - 9.1. Robustness Variable
+ * - 9.2. Query Interval
+ * - 9.3. Query Response Interval
+ * - 9.12. Older Version Querier Present Timeout
+ */
+ unsigned long switchback;
+
+ switchback = (idev->mc_qrv * idev->mc_qi) + idev->mc_qri;
+
+ idev->mc_v1_seen = jiffies + switchback;
+}
+
+static void mld_update_qrv(struct inet6_dev *idev,
+ const struct mld2_query *mlh2)
+{
+ /* RFC3810, relevant sections:
+ * - 5.1.8. QRV (Querier's Robustness Variable)
+ * - 9.1. Robustness Variable
+ */
+
+ /* The value of the Robustness Variable MUST NOT be zero,
+ * and SHOULD NOT be one. Catch this here if we ever run
+ * into such a case in future.
+ */
+ WARN_ON(idev->mc_qrv == 0);
+
+ if (mlh2->mld2q_qrv > 0)
+ idev->mc_qrv = mlh2->mld2q_qrv;
+
+ if (unlikely(idev->mc_qrv < 2)) {
+ net_warn_ratelimited("IPv6: MLD: clamping QRV from %u to %u!\n",
+ idev->mc_qrv, MLD_QRV_DEFAULT);
+ idev->mc_qrv = MLD_QRV_DEFAULT;
+ }
+}
+
+static void mld_update_qi(struct inet6_dev *idev,
+ const struct mld2_query *mlh2)
+{
+ /* RFC3810, relevant sections:
+ * - 5.1.9. QQIC (Querier's Query Interval Code)
+ * - 9.2. Query Interval
+ * - 9.12. Older Version Querier Present Timeout
+ * (the [Query Interval] in the last Query received)
+ */
+ unsigned long mc_qqi;
+
+ if (mlh2->mld2q_qqic < 128) {
+ mc_qqi = mlh2->mld2q_qqic;
+ } else {
+ unsigned long mc_man, mc_exp;
+
+ mc_exp = MLDV2_QQIC_EXP(mlh2->mld2q_qqic);
+ mc_man = MLDV2_QQIC_MAN(mlh2->mld2q_qqic);
+
+ mc_qqi = (mc_man | 0x10) << (mc_exp + 3);
+ }
+
+ idev->mc_qi = mc_qqi * HZ;
+}
+
+static void mld_update_qri(struct inet6_dev *idev,
+ const struct mld2_query *mlh2)
+{
+ /* RFC3810, relevant sections:
+ * - 5.1.3. Maximum Response Code
+ * - 9.3. Query Response Interval
+ */
+ idev->mc_qri = msecs_to_jiffies(mldv2_mrc(mlh2));
+}
+
+static int mld_process_v1(struct inet6_dev *idev, struct mld_msg *mld,
+ unsigned long *max_delay)
+{
+ unsigned long mldv1_md;
+
+ /* Ignore v1 queries */
+ if (mld_in_v2_mode_only(idev))
+ return -EINVAL;
+
+ /* MLDv1 router present */
+ mldv1_md = ntohs(mld->mld_maxdelay);
+ *max_delay = max(msecs_to_jiffies(mldv1_md), 1UL);
+
+ mld_set_v1_mode(idev);
+
+ /* cancel MLDv2 report timer */
+ mld_gq_stop_timer(idev);
+ /* cancel the interface change timer */
+ mld_ifc_stop_timer(idev);
+ /* clear deleted report items */
+ mld_clear_delrec(idev);
+
+ return 0;
+}
+
+static int mld_process_v2(struct inet6_dev *idev, struct mld2_query *mld,
+ unsigned long *max_delay)
+{
+ /* hosts need to stay in MLDv1 mode, discard MLDv2 queries */
+ if (mld_in_v1_mode(idev))
+ return -EINVAL;
+
+ *max_delay = max(msecs_to_jiffies(mldv2_mrc(mld)), 1UL);
+
+ mld_update_qrv(idev, mld);
+ mld_update_qi(idev, mld);
+ mld_update_qri(idev, mld);
+
+ idev->mc_maxdelay = *max_delay;
+
+ return 0;
+}
+
/* called with rcu_read_lock() */
int igmp6_event_query(struct sk_buff *skb)
{
@@ -1108,7 +1292,7 @@ int igmp6_event_query(struct sk_buff *skb)
struct mld_msg *mld;
int group_type;
int mark = 0;
- int len;
+ int len, err;
if (!pskb_may_pull(skb, sizeof(struct in6_addr)))
return -EINVAL;
@@ -1122,7 +1306,6 @@ int igmp6_event_query(struct sk_buff *skb)
return -EINVAL;
idev = __in6_dev_get(skb->dev);
-
if (idev == NULL)
return 0;
@@ -1134,35 +1317,23 @@ int igmp6_event_query(struct sk_buff *skb)
!(group_type&IPV6_ADDR_MULTICAST))
return -EINVAL;
- if (len == 24) {
- int switchback;
- /* MLDv1 router present */
-
- /* Translate milliseconds to jiffies */
- max_delay = (ntohs(mld->mld_maxdelay)*HZ)/1000;
-
- switchback = (idev->mc_qrv + 1) * max_delay;
- idev->mc_v1_seen = jiffies + switchback;
-
- /* cancel the interface change timer */
- idev->mc_ifc_count = 0;
- if (del_timer(&idev->mc_ifc_timer))
- __in6_dev_put(idev);
- /* clear deleted report items */
- mld_clear_delrec(idev);
- } else if (len >= 28) {
+ if (len == MLD_V1_QUERY_LEN) {
+ err = mld_process_v1(idev, mld, &max_delay);
+ if (err < 0)
+ return err;
+ } else if (len >= MLD_V2_QUERY_LEN_MIN) {
int srcs_offset = sizeof(struct mld2_query) -
sizeof(struct icmp6hdr);
+
if (!pskb_may_pull(skb, srcs_offset))
return -EINVAL;
mlh2 = (struct mld2_query *)skb_transport_header(skb);
- max_delay = (MLDV2_MRC(ntohs(mlh2->mld2q_mrc))*HZ)/1000;
- if (!max_delay)
- max_delay = 1;
- idev->mc_maxdelay = max_delay;
- if (mlh2->mld2q_qrv)
- idev->mc_qrv = mlh2->mld2q_qrv;
+
+ err = mld_process_v2(idev, mlh2, &max_delay);
+ if (err < 0)
+ return err;
+
if (group_type == IPV6_ADDR_ANY) { /* general query */
if (mlh2->mld2q_nsrcs)
return -EINVAL; /* no sources allowed */
@@ -1376,6 +1547,7 @@ static struct sk_buff *mld_newpack(struct inet6_dev *idev, int size)
if (!skb)
return NULL;
+ skb->priority = TC_PRIO_CONTROL;
skb_reserve(skb, hlen);
if (__ipv6_get_lladdr(idev, &addr_buf, IFA_F_TENTATIVE)) {
@@ -1769,7 +1941,7 @@ static void igmp6_send(struct in6_addr *addr, struct net_device *dev, int type)
rcu_read_unlock();
return;
}
-
+ skb->priority = TC_PRIO_CONTROL;
skb_reserve(skb, hlen);
if (ipv6_get_lladdr(dev, &addr_buf, IFA_F_TENTATIVE)) {
@@ -1827,7 +1999,7 @@ err_out:
static void mld_resend_report(struct inet6_dev *idev)
{
- if (MLD_V1_SEEN(idev)) {
+ if (mld_in_v1_mode(idev)) {
struct ifmcaddr6 *mcaddr;
read_lock_bh(&idev->lock);
for (mcaddr = idev->mc_list; mcaddr; mcaddr = mcaddr->next) {
@@ -1862,7 +2034,7 @@ static void mld_dad_timer_expire(unsigned long data)
if (idev->mc_dad_count)
mld_dad_start_timer(idev, idev->mc_maxdelay);
}
- __in6_dev_put(idev);
+ in6_dev_put(idev);
}
static int ip6_mc_del1_src(struct ifmcaddr6 *pmc, int sfmode,
@@ -1891,7 +2063,7 @@ static int ip6_mc_del1_src(struct ifmcaddr6 *pmc, int sfmode,
else
pmc->mca_sources = psf->sf_next;
if (psf->sf_oldin && !(pmc->mca_flags & MAF_NOREPORT) &&
- !MLD_V1_SEEN(idev)) {
+ !mld_in_v1_mode(idev)) {
psf->sf_crcount = idev->mc_qrv;
psf->sf_next = pmc->mca_tomb;
pmc->mca_tomb = psf;
@@ -2156,7 +2328,7 @@ static void igmp6_join_group(struct ifmcaddr6 *ma)
igmp6_send(&ma->mca_addr, ma->idev->dev, ICMPV6_MGM_REPORT);
- delay = net_random() % IGMP6_UNSOLICITED_IVAL;
+ delay = net_random() % unsolicited_report_interval(ma->idev);
spin_lock_bh(&ma->mca_lock);
if (del_timer(&ma->mca_timer)) {
@@ -2191,7 +2363,7 @@ static int ip6_mc_leave_src(struct sock *sk, struct ipv6_mc_socklist *iml,
static void igmp6_leave_group(struct ifmcaddr6 *ma)
{
- if (MLD_V1_SEEN(ma->idev)) {
+ if (mld_in_v1_mode(ma->idev)) {
if (ma->mca_flags & MAF_LAST_REPORTER)
igmp6_send(&ma->mca_addr, ma->idev->dev,
ICMPV6_MGM_REDUCTION);
@@ -2207,7 +2379,7 @@ static void mld_gq_timer_expire(unsigned long data)
idev->mc_gq_running = 0;
mld_send_report(idev, NULL);
- __in6_dev_put(idev);
+ in6_dev_put(idev);
}
static void mld_ifc_timer_expire(unsigned long data)
@@ -2220,12 +2392,12 @@ static void mld_ifc_timer_expire(unsigned long data)
if (idev->mc_ifc_count)
mld_ifc_start_timer(idev, idev->mc_maxdelay);
}
- __in6_dev_put(idev);
+ in6_dev_put(idev);
}
static void mld_ifc_event(struct inet6_dev *idev)
{
- if (MLD_V1_SEEN(idev))
+ if (mld_in_v1_mode(idev))
return;
idev->mc_ifc_count = idev->mc_qrv;
mld_ifc_start_timer(idev, 1);
@@ -2236,7 +2408,7 @@ static void igmp6_timer_handler(unsigned long data)
{
struct ifmcaddr6 *ma = (struct ifmcaddr6 *) data;
- if (MLD_V1_SEEN(ma->idev))
+ if (mld_in_v1_mode(ma->idev))
igmp6_send(&ma->mca_addr, ma->idev->dev, ICMPV6_MGM_REPORT);
else
mld_send_report(ma->idev, ma);
@@ -2276,14 +2448,9 @@ void ipv6_mc_down(struct inet6_dev *idev)
/* Withdraw multicast list */
read_lock_bh(&idev->lock);
- idev->mc_ifc_count = 0;
- if (del_timer(&idev->mc_ifc_timer))
- __in6_dev_put(idev);
- idev->mc_gq_running = 0;
- if (del_timer(&idev->mc_gq_timer))
- __in6_dev_put(idev);
- if (del_timer(&idev->mc_dad_timer))
- __in6_dev_put(idev);
+ mld_ifc_stop_timer(idev);
+ mld_gq_stop_timer(idev);
+ mld_dad_stop_timer(idev);
for (i = idev->mc_list; i; i=i->next)
igmp6_group_dropped(i);
@@ -2322,8 +2489,12 @@ void ipv6_mc_init_dev(struct inet6_dev *idev)
(unsigned long)idev);
setup_timer(&idev->mc_dad_timer, mld_dad_timer_expire,
(unsigned long)idev);
+
idev->mc_qrv = MLD_QRV_DEFAULT;
- idev->mc_maxdelay = IGMP6_UNSOLICITED_IVAL;
+ idev->mc_qi = MLD_QI_DEFAULT;
+ idev->mc_qri = MLD_QRI_DEFAULT;
+
+ idev->mc_maxdelay = unsolicited_report_interval(idev);
idev->mc_v1_seen = 0;
write_unlock_bh(&idev->lock);
}
diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c
index 04d31c2fbef1..f8a55ff1971b 100644
--- a/net/ipv6/ndisc.c
+++ b/net/ipv6/ndisc.c
@@ -372,14 +372,11 @@ static struct sk_buff *ndisc_alloc_skb(struct net_device *dev,
int tlen = dev->needed_tailroom;
struct sock *sk = dev_net(dev)->ipv6.ndisc_sk;
struct sk_buff *skb;
- int err;
- skb = sock_alloc_send_skb(sk,
- hlen + sizeof(struct ipv6hdr) + len + tlen,
- 1, &err);
+ skb = alloc_skb(hlen + sizeof(struct ipv6hdr) + len + tlen, GFP_ATOMIC);
if (!skb) {
- ND_PRINTK(0, err, "ndisc: %s failed to allocate an skb, err=%d\n",
- __func__, err);
+ ND_PRINTK(0, err, "ndisc: %s failed to allocate an skb\n",
+ __func__);
return NULL;
}
@@ -389,6 +386,11 @@ static struct sk_buff *ndisc_alloc_skb(struct net_device *dev,
skb_reserve(skb, hlen + sizeof(struct ipv6hdr));
skb_reset_transport_header(skb);
+ /* Manually assign socket ownership as we avoid calling
+ * sock_alloc_send_pskb() to bypass wmem buffer limits
+ */
+ skb_set_owner_w(skb, sk);
+
return skb;
}
@@ -428,7 +430,6 @@ static void ndisc_send_skb(struct sk_buff *skb,
type = icmp6h->icmp6_type;
if (!dst) {
- struct sock *sk = net->ipv6.ndisc_sk;
struct flowi6 fl6;
icmpv6_flow_init(sk, &fl6, type, saddr, daddr, skb->dev->ifindex);
@@ -462,10 +463,10 @@ static void ndisc_send_skb(struct sk_buff *skb,
rcu_read_unlock();
}
-static void ndisc_send_na(struct net_device *dev, struct neighbour *neigh,
- const struct in6_addr *daddr,
- const struct in6_addr *solicited_addr,
- bool router, bool solicited, bool override, bool inc_opt)
+void ndisc_send_na(struct net_device *dev, struct neighbour *neigh,
+ const struct in6_addr *daddr,
+ const struct in6_addr *solicited_addr,
+ bool router, bool solicited, bool override, bool inc_opt)
{
struct sk_buff *skb;
struct in6_addr tmpaddr;
@@ -663,9 +664,7 @@ static void ndisc_solicit(struct neighbour *neigh, struct sk_buff *skb)
}
ndisc_send_ns(dev, neigh, target, target, saddr);
} else if ((probes -= neigh->parms->app_probes) < 0) {
-#ifdef CONFIG_ARPD
neigh_app_ns(neigh);
-#endif
} else {
addrconf_addr_solict_mult(target, &mcaddr);
ndisc_send_ns(dev, NULL, target, &mcaddr, saddr);
@@ -1370,7 +1369,8 @@ static void ndisc_redirect_rcv(struct sk_buff *skb)
return;
if (!ndopts.nd_opts_rh) {
- ip6_redirect_no_header(skb, dev_net(skb->dev), 0, 0);
+ ip6_redirect_no_header(skb, dev_net(skb->dev),
+ skb->dev->ifindex, 0);
return;
}
@@ -1519,10 +1519,27 @@ static void pndisc_redo(struct sk_buff *skb)
kfree_skb(skb);
}
+static bool ndisc_suppress_frag_ndisc(struct sk_buff *skb)
+{
+ struct inet6_dev *idev = __in6_dev_get(skb->dev);
+
+ if (!idev)
+ return true;
+ if (IP6CB(skb)->flags & IP6SKB_FRAGMENTED &&
+ idev->cnf.suppress_frag_ndisc) {
+ net_warn_ratelimited("Received fragmented ndisc packet. Carefully consider disabling suppress_frag_ndisc.\n");
+ return true;
+ }
+ return false;
+}
+
int ndisc_rcv(struct sk_buff *skb)
{
struct nd_msg *msg;
+ if (ndisc_suppress_frag_ndisc(skb))
+ return 0;
+
if (skb_linearize(skb))
return 0;
@@ -1710,24 +1727,28 @@ int __init ndisc_init(void)
if (err)
goto out_unregister_pernet;
#endif
- err = register_netdevice_notifier(&ndisc_netdev_notifier);
- if (err)
- goto out_unregister_sysctl;
out:
return err;
-out_unregister_sysctl:
#ifdef CONFIG_SYSCTL
- neigh_sysctl_unregister(&nd_tbl.parms);
out_unregister_pernet:
-#endif
unregister_pernet_subsys(&ndisc_net_ops);
goto out;
+#endif
}
-void ndisc_cleanup(void)
+int __init ndisc_late_init(void)
+{
+ return register_netdevice_notifier(&ndisc_netdev_notifier);
+}
+
+void ndisc_late_cleanup(void)
{
unregister_netdevice_notifier(&ndisc_netdev_notifier);
+}
+
+void ndisc_cleanup(void)
+{
#ifdef CONFIG_SYSCTL
neigh_sysctl_unregister(&nd_tbl.parms);
#endif
diff --git a/net/ipv6/netfilter/Kconfig b/net/ipv6/netfilter/Kconfig
index 4433ab40e7de..a7f842b29b67 100644
--- a/net/ipv6/netfilter/Kconfig
+++ b/net/ipv6/netfilter/Kconfig
@@ -153,6 +153,19 @@ config IP6_NF_TARGET_REJECT
To compile it as a module, choose M here. If unsure, say N.
+config IP6_NF_TARGET_SYNPROXY
+ tristate "SYNPROXY target support"
+ depends on NF_CONNTRACK && NETFILTER_ADVANCED
+ select NETFILTER_SYNPROXY
+ select SYN_COOKIES
+ help
+ The SYNPROXY target allows you to intercept TCP connections and
+ establish them using syncookies before they are passed on to the
+ server. This allows to avoid conntrack and server resource usage
+ during SYN-flood attacks.
+
+ To compile it as a module, choose M here. If unsure, say N.
+
config IP6_NF_MANGLE
tristate "Packet mangling"
default m if NETFILTER_ADVANCED=n
diff --git a/net/ipv6/netfilter/Makefile b/net/ipv6/netfilter/Makefile
index 2d11fcc2cf3c..2b53738f798c 100644
--- a/net/ipv6/netfilter/Makefile
+++ b/net/ipv6/netfilter/Makefile
@@ -14,7 +14,7 @@ obj-$(CONFIG_NF_NAT_IPV6) += ip6table_nat.o
nf_conntrack_ipv6-y := nf_conntrack_l3proto_ipv6.o nf_conntrack_proto_icmpv6.o
# l3 independent conntrack
-obj-$(CONFIG_NF_CONNTRACK_IPV6) += nf_conntrack_ipv6.o nf_defrag_ipv6.o
+obj-$(CONFIG_NF_CONNTRACK_IPV6) += nf_conntrack_ipv6.o
nf_nat_ipv6-y := nf_nat_l3proto_ipv6.o nf_nat_proto_icmpv6.o
obj-$(CONFIG_NF_NAT_IPV6) += nf_nat_ipv6.o
@@ -37,3 +37,4 @@ obj-$(CONFIG_IP6_NF_MATCH_RT) += ip6t_rt.o
obj-$(CONFIG_IP6_NF_TARGET_MASQUERADE) += ip6t_MASQUERADE.o
obj-$(CONFIG_IP6_NF_TARGET_NPT) += ip6t_NPT.o
obj-$(CONFIG_IP6_NF_TARGET_REJECT) += ip6t_REJECT.o
+obj-$(CONFIG_IP6_NF_TARGET_SYNPROXY) += ip6t_SYNPROXY.o
diff --git a/net/ipv6/netfilter/ip6t_MASQUERADE.c b/net/ipv6/netfilter/ip6t_MASQUERADE.c
index 47bff6107519..3e4e92d5e157 100644
--- a/net/ipv6/netfilter/ip6t_MASQUERADE.c
+++ b/net/ipv6/netfilter/ip6t_MASQUERADE.c
@@ -76,7 +76,7 @@ static int masq_device_event(struct notifier_block *this,
if (event == NETDEV_DOWN)
nf_ct_iterate_cleanup(net, device_cmp,
- (void *)(long)dev->ifindex);
+ (void *)(long)dev->ifindex, 0, 0);
return NOTIFY_DONE;
}
diff --git a/net/ipv6/netfilter/ip6t_REJECT.c b/net/ipv6/netfilter/ip6t_REJECT.c
index 70f9abc0efe9..56eef30ee5f6 100644
--- a/net/ipv6/netfilter/ip6t_REJECT.c
+++ b/net/ipv6/netfilter/ip6t_REJECT.c
@@ -169,7 +169,25 @@ static void send_reset(struct net *net, struct sk_buff *oldskb)
nf_ct_attach(nskb, oldskb);
- ip6_local_out(nskb);
+#ifdef CONFIG_BRIDGE_NETFILTER
+ /* If we use ip6_local_out for bridged traffic, the MAC source on
+ * the RST will be ours, instead of the destination's. This confuses
+ * some routers/firewalls, and they drop the packet. So we need to
+ * build the eth header using the original destination's MAC as the
+ * source, and send the RST packet directly.
+ */
+ if (oldskb->nf_bridge) {
+ struct ethhdr *oeth = eth_hdr(oldskb);
+ nskb->dev = oldskb->nf_bridge->physindev;
+ nskb->protocol = htons(ETH_P_IPV6);
+ ip6h->payload_len = htons(sizeof(struct tcphdr));
+ if (dev_hard_header(nskb, nskb->dev, ntohs(nskb->protocol),
+ oeth->h_source, oeth->h_dest, nskb->len) < 0)
+ return;
+ dev_queue_xmit(nskb);
+ } else
+#endif
+ ip6_local_out(nskb);
}
static inline void
diff --git a/net/ipv6/netfilter/ip6t_SYNPROXY.c b/net/ipv6/netfilter/ip6t_SYNPROXY.c
new file mode 100644
index 000000000000..2748b042da72
--- /dev/null
+++ b/net/ipv6/netfilter/ip6t_SYNPROXY.c
@@ -0,0 +1,503 @@
+/*
+ * Copyright (c) 2013 Patrick McHardy <kaber@trash.net>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/module.h>
+#include <linux/skbuff.h>
+#include <net/ip6_checksum.h>
+#include <net/ip6_route.h>
+#include <net/tcp.h>
+
+#include <linux/netfilter_ipv6/ip6_tables.h>
+#include <linux/netfilter/x_tables.h>
+#include <linux/netfilter/xt_SYNPROXY.h>
+#include <net/netfilter/nf_conntrack.h>
+#include <net/netfilter/nf_conntrack_seqadj.h>
+#include <net/netfilter/nf_conntrack_synproxy.h>
+
+static struct ipv6hdr *
+synproxy_build_ip(struct sk_buff *skb, const struct in6_addr *saddr,
+ const struct in6_addr *daddr)
+{
+ struct ipv6hdr *iph;
+
+ skb_reset_network_header(skb);
+ iph = (struct ipv6hdr *)skb_put(skb, sizeof(*iph));
+ ip6_flow_hdr(iph, 0, 0);
+ iph->hop_limit = 64; //XXX
+ iph->nexthdr = IPPROTO_TCP;
+ iph->saddr = *saddr;
+ iph->daddr = *daddr;
+
+ return iph;
+}
+
+static void
+synproxy_send_tcp(const struct sk_buff *skb, struct sk_buff *nskb,
+ struct nf_conntrack *nfct, enum ip_conntrack_info ctinfo,
+ struct ipv6hdr *niph, struct tcphdr *nth,
+ unsigned int tcp_hdr_size)
+{
+ struct net *net = nf_ct_net((struct nf_conn *)nfct);
+ struct dst_entry *dst;
+ struct flowi6 fl6;
+
+ nth->check = ~tcp_v6_check(tcp_hdr_size, &niph->saddr, &niph->daddr, 0);
+ nskb->ip_summed = CHECKSUM_PARTIAL;
+ nskb->csum_start = (unsigned char *)nth - nskb->head;
+ nskb->csum_offset = offsetof(struct tcphdr, check);
+
+ memset(&fl6, 0, sizeof(fl6));
+ fl6.flowi6_proto = IPPROTO_TCP;
+ fl6.saddr = niph->saddr;
+ fl6.daddr = niph->daddr;
+ fl6.fl6_sport = nth->source;
+ fl6.fl6_dport = nth->dest;
+ security_skb_classify_flow((struct sk_buff *)skb, flowi6_to_flowi(&fl6));
+ dst = ip6_route_output(net, NULL, &fl6);
+ if (dst == NULL || dst->error) {
+ dst_release(dst);
+ goto free_nskb;
+ }
+ dst = xfrm_lookup(net, dst, flowi6_to_flowi(&fl6), NULL, 0);
+ if (IS_ERR(dst))
+ goto free_nskb;
+
+ skb_dst_set(nskb, dst);
+
+ if (nfct) {
+ nskb->nfct = nfct;
+ nskb->nfctinfo = ctinfo;
+ nf_conntrack_get(nfct);
+ }
+
+ ip6_local_out(nskb);
+ return;
+
+free_nskb:
+ kfree_skb(nskb);
+}
+
+static void
+synproxy_send_client_synack(const struct sk_buff *skb, const struct tcphdr *th,
+ const struct synproxy_options *opts)
+{
+ struct sk_buff *nskb;
+ struct ipv6hdr *iph, *niph;
+ struct tcphdr *nth;
+ unsigned int tcp_hdr_size;
+ u16 mss = opts->mss;
+
+ iph = ipv6_hdr(skb);
+
+ tcp_hdr_size = sizeof(*nth) + synproxy_options_size(opts);
+ nskb = alloc_skb(sizeof(*niph) + tcp_hdr_size + MAX_TCP_HEADER,
+ GFP_ATOMIC);
+ if (nskb == NULL)
+ return;
+ skb_reserve(nskb, MAX_TCP_HEADER);
+
+ niph = synproxy_build_ip(nskb, &iph->daddr, &iph->saddr);
+
+ skb_reset_transport_header(nskb);
+ nth = (struct tcphdr *)skb_put(nskb, tcp_hdr_size);
+ nth->source = th->dest;
+ nth->dest = th->source;
+ nth->seq = htonl(__cookie_v6_init_sequence(iph, th, &mss));
+ nth->ack_seq = htonl(ntohl(th->seq) + 1);
+ tcp_flag_word(nth) = TCP_FLAG_SYN | TCP_FLAG_ACK;
+ if (opts->options & XT_SYNPROXY_OPT_ECN)
+ tcp_flag_word(nth) |= TCP_FLAG_ECE;
+ nth->doff = tcp_hdr_size / 4;
+ nth->window = 0;
+ nth->check = 0;
+ nth->urg_ptr = 0;
+
+ synproxy_build_options(nth, opts);
+
+ synproxy_send_tcp(skb, nskb, skb->nfct, IP_CT_ESTABLISHED_REPLY,
+ niph, nth, tcp_hdr_size);
+}
+
+static void
+synproxy_send_server_syn(const struct synproxy_net *snet,
+ const struct sk_buff *skb, const struct tcphdr *th,
+ const struct synproxy_options *opts, u32 recv_seq)
+{
+ struct sk_buff *nskb;
+ struct ipv6hdr *iph, *niph;
+ struct tcphdr *nth;
+ unsigned int tcp_hdr_size;
+
+ iph = ipv6_hdr(skb);
+
+ tcp_hdr_size = sizeof(*nth) + synproxy_options_size(opts);
+ nskb = alloc_skb(sizeof(*niph) + tcp_hdr_size + MAX_TCP_HEADER,
+ GFP_ATOMIC);
+ if (nskb == NULL)
+ return;
+ skb_reserve(nskb, MAX_TCP_HEADER);
+
+ niph = synproxy_build_ip(nskb, &iph->saddr, &iph->daddr);
+
+ skb_reset_transport_header(nskb);
+ nth = (struct tcphdr *)skb_put(nskb, tcp_hdr_size);
+ nth->source = th->source;
+ nth->dest = th->dest;
+ nth->seq = htonl(recv_seq - 1);
+ /* ack_seq is used to relay our ISN to the synproxy hook to initialize
+ * sequence number translation once a connection tracking entry exists.
+ */
+ nth->ack_seq = htonl(ntohl(th->ack_seq) - 1);
+ tcp_flag_word(nth) = TCP_FLAG_SYN;
+ if (opts->options & XT_SYNPROXY_OPT_ECN)
+ tcp_flag_word(nth) |= TCP_FLAG_ECE | TCP_FLAG_CWR;
+ nth->doff = tcp_hdr_size / 4;
+ nth->window = th->window;
+ nth->check = 0;
+ nth->urg_ptr = 0;
+
+ synproxy_build_options(nth, opts);
+
+ synproxy_send_tcp(skb, nskb, &snet->tmpl->ct_general, IP_CT_NEW,
+ niph, nth, tcp_hdr_size);
+}
+
+static void
+synproxy_send_server_ack(const struct synproxy_net *snet,
+ const struct ip_ct_tcp *state,
+ const struct sk_buff *skb, const struct tcphdr *th,
+ const struct synproxy_options *opts)
+{
+ struct sk_buff *nskb;
+ struct ipv6hdr *iph, *niph;
+ struct tcphdr *nth;
+ unsigned int tcp_hdr_size;
+
+ iph = ipv6_hdr(skb);
+
+ tcp_hdr_size = sizeof(*nth) + synproxy_options_size(opts);
+ nskb = alloc_skb(sizeof(*niph) + tcp_hdr_size + MAX_TCP_HEADER,
+ GFP_ATOMIC);
+ if (nskb == NULL)
+ return;
+ skb_reserve(nskb, MAX_TCP_HEADER);
+
+ niph = synproxy_build_ip(nskb, &iph->daddr, &iph->saddr);
+
+ skb_reset_transport_header(nskb);
+ nth = (struct tcphdr *)skb_put(nskb, tcp_hdr_size);
+ nth->source = th->dest;
+ nth->dest = th->source;
+ nth->seq = htonl(ntohl(th->ack_seq));
+ nth->ack_seq = htonl(ntohl(th->seq) + 1);
+ tcp_flag_word(nth) = TCP_FLAG_ACK;
+ nth->doff = tcp_hdr_size / 4;
+ nth->window = htons(state->seen[IP_CT_DIR_ORIGINAL].td_maxwin);
+ nth->check = 0;
+ nth->urg_ptr = 0;
+
+ synproxy_build_options(nth, opts);
+
+ synproxy_send_tcp(skb, nskb, NULL, 0, niph, nth, tcp_hdr_size);
+}
+
+static void
+synproxy_send_client_ack(const struct synproxy_net *snet,
+ const struct sk_buff *skb, const struct tcphdr *th,
+ const struct synproxy_options *opts)
+{
+ struct sk_buff *nskb;
+ struct ipv6hdr *iph, *niph;
+ struct tcphdr *nth;
+ unsigned int tcp_hdr_size;
+
+ iph = ipv6_hdr(skb);
+
+ tcp_hdr_size = sizeof(*nth) + synproxy_options_size(opts);
+ nskb = alloc_skb(sizeof(*niph) + tcp_hdr_size + MAX_TCP_HEADER,
+ GFP_ATOMIC);
+ if (nskb == NULL)
+ return;
+ skb_reserve(nskb, MAX_TCP_HEADER);
+
+ niph = synproxy_build_ip(nskb, &iph->saddr, &iph->daddr);
+
+ skb_reset_transport_header(nskb);
+ nth = (struct tcphdr *)skb_put(nskb, tcp_hdr_size);
+ nth->source = th->source;
+ nth->dest = th->dest;
+ nth->seq = htonl(ntohl(th->seq) + 1);
+ nth->ack_seq = th->ack_seq;
+ tcp_flag_word(nth) = TCP_FLAG_ACK;
+ nth->doff = tcp_hdr_size / 4;
+ nth->window = ntohs(htons(th->window) >> opts->wscale);
+ nth->check = 0;
+ nth->urg_ptr = 0;
+
+ synproxy_build_options(nth, opts);
+
+ synproxy_send_tcp(skb, nskb, NULL, 0, niph, nth, tcp_hdr_size);
+}
+
+static bool
+synproxy_recv_client_ack(const struct synproxy_net *snet,
+ const struct sk_buff *skb, const struct tcphdr *th,
+ struct synproxy_options *opts, u32 recv_seq)
+{
+ int mss;
+
+ mss = __cookie_v6_check(ipv6_hdr(skb), th, ntohl(th->ack_seq) - 1);
+ if (mss == 0) {
+ this_cpu_inc(snet->stats->cookie_invalid);
+ return false;
+ }
+
+ this_cpu_inc(snet->stats->cookie_valid);
+ opts->mss = mss;
+
+ if (opts->options & XT_SYNPROXY_OPT_TIMESTAMP)
+ synproxy_check_timestamp_cookie(opts);
+
+ synproxy_send_server_syn(snet, skb, th, opts, recv_seq);
+ return true;
+}
+
+static unsigned int
+synproxy_tg6(struct sk_buff *skb, const struct xt_action_param *par)
+{
+ const struct xt_synproxy_info *info = par->targinfo;
+ struct synproxy_net *snet = synproxy_pernet(dev_net(par->in));
+ struct synproxy_options opts = {};
+ struct tcphdr *th, _th;
+
+ if (nf_ip6_checksum(skb, par->hooknum, par->thoff, IPPROTO_TCP))
+ return NF_DROP;
+
+ th = skb_header_pointer(skb, par->thoff, sizeof(_th), &_th);
+ if (th == NULL)
+ return NF_DROP;
+
+ if (!synproxy_parse_options(skb, par->thoff, th, &opts))
+ return NF_DROP;
+
+ if (th->syn && !(th->ack || th->fin || th->rst)) {
+ /* Initial SYN from client */
+ this_cpu_inc(snet->stats->syn_received);
+
+ if (th->ece && th->cwr)
+ opts.options |= XT_SYNPROXY_OPT_ECN;
+
+ opts.options &= info->options;
+ if (opts.options & XT_SYNPROXY_OPT_TIMESTAMP)
+ synproxy_init_timestamp_cookie(info, &opts);
+ else
+ opts.options &= ~(XT_SYNPROXY_OPT_WSCALE |
+ XT_SYNPROXY_OPT_SACK_PERM |
+ XT_SYNPROXY_OPT_ECN);
+
+ synproxy_send_client_synack(skb, th, &opts);
+ return NF_DROP;
+
+ } else if (th->ack && !(th->fin || th->rst || th->syn)) {
+ /* ACK from client */
+ synproxy_recv_client_ack(snet, skb, th, &opts, ntohl(th->seq));
+ return NF_DROP;
+ }
+
+ return XT_CONTINUE;
+}
+
+static unsigned int ipv6_synproxy_hook(unsigned int hooknum,
+ struct sk_buff *skb,
+ const struct net_device *in,
+ const struct net_device *out,
+ int (*okfn)(struct sk_buff *))
+{
+ struct synproxy_net *snet = synproxy_pernet(dev_net(in ? : out));
+ enum ip_conntrack_info ctinfo;
+ struct nf_conn *ct;
+ struct nf_conn_synproxy *synproxy;
+ struct synproxy_options opts = {};
+ const struct ip_ct_tcp *state;
+ struct tcphdr *th, _th;
+ __be16 frag_off;
+ u8 nexthdr;
+ int thoff;
+
+ ct = nf_ct_get(skb, &ctinfo);
+ if (ct == NULL)
+ return NF_ACCEPT;
+
+ synproxy = nfct_synproxy(ct);
+ if (synproxy == NULL)
+ return NF_ACCEPT;
+
+ if (nf_is_loopback_packet(skb))
+ return NF_ACCEPT;
+
+ nexthdr = ipv6_hdr(skb)->nexthdr;
+ thoff = ipv6_skip_exthdr(skb, sizeof(struct ipv6hdr), &nexthdr,
+ &frag_off);
+ if (thoff < 0)
+ return NF_ACCEPT;
+
+ th = skb_header_pointer(skb, thoff, sizeof(_th), &_th);
+ if (th == NULL)
+ return NF_DROP;
+
+ state = &ct->proto.tcp;
+ switch (state->state) {
+ case TCP_CONNTRACK_CLOSE:
+ if (th->rst && !test_bit(IPS_SEEN_REPLY_BIT, &ct->status)) {
+ nf_ct_seqadj_init(ct, ctinfo, synproxy->isn -
+ ntohl(th->seq) + 1);
+ break;
+ }
+
+ if (!th->syn || th->ack ||
+ CTINFO2DIR(ctinfo) != IP_CT_DIR_ORIGINAL)
+ break;
+
+ /* Reopened connection - reset the sequence number and timestamp
+ * adjustments, they will get initialized once the connection is
+ * reestablished.
+ */
+ nf_ct_seqadj_init(ct, ctinfo, 0);
+ synproxy->tsoff = 0;
+ this_cpu_inc(snet->stats->conn_reopened);
+
+ /* fall through */
+ case TCP_CONNTRACK_SYN_SENT:
+ if (!synproxy_parse_options(skb, thoff, th, &opts))
+ return NF_DROP;
+
+ if (!th->syn && th->ack &&
+ CTINFO2DIR(ctinfo) == IP_CT_DIR_ORIGINAL) {
+ /* Keep-Alives are sent with SEG.SEQ = SND.NXT-1,
+ * therefore we need to add 1 to make the SYN sequence
+ * number match the one of first SYN.
+ */
+ if (synproxy_recv_client_ack(snet, skb, th, &opts,
+ ntohl(th->seq) + 1))
+ this_cpu_inc(snet->stats->cookie_retrans);
+
+ return NF_DROP;
+ }
+
+ synproxy->isn = ntohl(th->ack_seq);
+ if (opts.options & XT_SYNPROXY_OPT_TIMESTAMP)
+ synproxy->its = opts.tsecr;
+ break;
+ case TCP_CONNTRACK_SYN_RECV:
+ if (!th->syn || !th->ack)
+ break;
+
+ if (!synproxy_parse_options(skb, thoff, th, &opts))
+ return NF_DROP;
+
+ if (opts.options & XT_SYNPROXY_OPT_TIMESTAMP)
+ synproxy->tsoff = opts.tsval - synproxy->its;
+
+ opts.options &= ~(XT_SYNPROXY_OPT_MSS |
+ XT_SYNPROXY_OPT_WSCALE |
+ XT_SYNPROXY_OPT_SACK_PERM);
+
+ swap(opts.tsval, opts.tsecr);
+ synproxy_send_server_ack(snet, state, skb, th, &opts);
+
+ nf_ct_seqadj_init(ct, ctinfo, synproxy->isn - ntohl(th->seq));
+
+ swap(opts.tsval, opts.tsecr);
+ synproxy_send_client_ack(snet, skb, th, &opts);
+
+ consume_skb(skb);
+ return NF_STOLEN;
+ default:
+ break;
+ }
+
+ synproxy_tstamp_adjust(skb, thoff, th, ct, ctinfo, synproxy);
+ return NF_ACCEPT;
+}
+
+static int synproxy_tg6_check(const struct xt_tgchk_param *par)
+{
+ const struct ip6t_entry *e = par->entryinfo;
+
+ if (!(e->ipv6.flags & IP6T_F_PROTO) ||
+ e->ipv6.proto != IPPROTO_TCP ||
+ e->ipv6.invflags & XT_INV_PROTO)
+ return -EINVAL;
+
+ return nf_ct_l3proto_try_module_get(par->family);
+}
+
+static void synproxy_tg6_destroy(const struct xt_tgdtor_param *par)
+{
+ nf_ct_l3proto_module_put(par->family);
+}
+
+static struct xt_target synproxy_tg6_reg __read_mostly = {
+ .name = "SYNPROXY",
+ .family = NFPROTO_IPV6,
+ .target = synproxy_tg6,
+ .targetsize = sizeof(struct xt_synproxy_info),
+ .checkentry = synproxy_tg6_check,
+ .destroy = synproxy_tg6_destroy,
+ .me = THIS_MODULE,
+};
+
+static struct nf_hook_ops ipv6_synproxy_ops[] __read_mostly = {
+ {
+ .hook = ipv6_synproxy_hook,
+ .owner = THIS_MODULE,
+ .pf = NFPROTO_IPV6,
+ .hooknum = NF_INET_LOCAL_IN,
+ .priority = NF_IP_PRI_CONNTRACK_CONFIRM - 1,
+ },
+ {
+ .hook = ipv6_synproxy_hook,
+ .owner = THIS_MODULE,
+ .pf = NFPROTO_IPV6,
+ .hooknum = NF_INET_POST_ROUTING,
+ .priority = NF_IP_PRI_CONNTRACK_CONFIRM - 1,
+ },
+};
+
+static int __init synproxy_tg6_init(void)
+{
+ int err;
+
+ err = nf_register_hooks(ipv6_synproxy_ops,
+ ARRAY_SIZE(ipv6_synproxy_ops));
+ if (err < 0)
+ goto err1;
+
+ err = xt_register_target(&synproxy_tg6_reg);
+ if (err < 0)
+ goto err2;
+
+ return 0;
+
+err2:
+ nf_unregister_hooks(ipv6_synproxy_ops, ARRAY_SIZE(ipv6_synproxy_ops));
+err1:
+ return err;
+}
+
+static void __exit synproxy_tg6_exit(void)
+{
+ xt_unregister_target(&synproxy_tg6_reg);
+ nf_unregister_hooks(ipv6_synproxy_ops, ARRAY_SIZE(ipv6_synproxy_ops));
+}
+
+module_init(synproxy_tg6_init);
+module_exit(synproxy_tg6_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>");
diff --git a/net/ipv6/netfilter/ip6table_filter.c b/net/ipv6/netfilter/ip6table_filter.c
index beb5777d2043..29b44b14c5ea 100644
--- a/net/ipv6/netfilter/ip6table_filter.c
+++ b/net/ipv6/netfilter/ip6table_filter.c
@@ -61,7 +61,7 @@ static int __net_init ip6table_filter_net_init(struct net *net)
net->ipv6.ip6table_filter =
ip6t_register_table(net, &packet_filter, repl);
kfree(repl);
- return PTR_RET(net->ipv6.ip6table_filter);
+ return PTR_ERR_OR_ZERO(net->ipv6.ip6table_filter);
}
static void __net_exit ip6table_filter_net_exit(struct net *net)
diff --git a/net/ipv6/netfilter/ip6table_mangle.c b/net/ipv6/netfilter/ip6table_mangle.c
index e075399d8b72..c705907ae6ab 100644
--- a/net/ipv6/netfilter/ip6table_mangle.c
+++ b/net/ipv6/netfilter/ip6table_mangle.c
@@ -101,7 +101,7 @@ static int __net_init ip6table_mangle_net_init(struct net *net)
net->ipv6.ip6table_mangle =
ip6t_register_table(net, &packet_mangler, repl);
kfree(repl);
- return PTR_RET(net->ipv6.ip6table_mangle);
+ return PTR_ERR_OR_ZERO(net->ipv6.ip6table_mangle);
}
static void __net_exit ip6table_mangle_net_exit(struct net *net)
diff --git a/net/ipv6/netfilter/ip6table_nat.c b/net/ipv6/netfilter/ip6table_nat.c
index 6383f90efda8..9b076d2d3a7b 100644
--- a/net/ipv6/netfilter/ip6table_nat.c
+++ b/net/ipv6/netfilter/ip6table_nat.c
@@ -293,7 +293,7 @@ static int __net_init ip6table_nat_net_init(struct net *net)
return -ENOMEM;
net->ipv6.ip6table_nat = ip6t_register_table(net, &nf_nat_ipv6_table, repl);
kfree(repl);
- return PTR_RET(net->ipv6.ip6table_nat);
+ return PTR_ERR_OR_ZERO(net->ipv6.ip6table_nat);
}
static void __net_exit ip6table_nat_net_exit(struct net *net)
diff --git a/net/ipv6/netfilter/ip6table_raw.c b/net/ipv6/netfilter/ip6table_raw.c
index 60d1bddff7a0..9a626d86720f 100644
--- a/net/ipv6/netfilter/ip6table_raw.c
+++ b/net/ipv6/netfilter/ip6table_raw.c
@@ -40,7 +40,7 @@ static int __net_init ip6table_raw_net_init(struct net *net)
net->ipv6.ip6table_raw =
ip6t_register_table(net, &packet_raw, repl);
kfree(repl);
- return PTR_RET(net->ipv6.ip6table_raw);
+ return PTR_ERR_OR_ZERO(net->ipv6.ip6table_raw);
}
static void __net_exit ip6table_raw_net_exit(struct net *net)
diff --git a/net/ipv6/netfilter/ip6table_security.c b/net/ipv6/netfilter/ip6table_security.c
index db155351339c..ce88d1d7e525 100644
--- a/net/ipv6/netfilter/ip6table_security.c
+++ b/net/ipv6/netfilter/ip6table_security.c
@@ -58,7 +58,7 @@ static int __net_init ip6table_security_net_init(struct net *net)
net->ipv6.ip6table_security =
ip6t_register_table(net, &security_table, repl);
kfree(repl);
- return PTR_RET(net->ipv6.ip6table_security);
+ return PTR_ERR_OR_ZERO(net->ipv6.ip6table_security);
}
static void __net_exit ip6table_security_net_exit(struct net *net)
diff --git a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c
index c9b6a6e6a1e8..d6e4dd8b58df 100644
--- a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c
+++ b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c
@@ -28,6 +28,7 @@
#include <net/netfilter/nf_conntrack_l3proto.h>
#include <net/netfilter/nf_conntrack_core.h>
#include <net/netfilter/nf_conntrack_zones.h>
+#include <net/netfilter/nf_conntrack_seqadj.h>
#include <net/netfilter/ipv6/nf_conntrack_ipv6.h>
#include <net/netfilter/nf_nat_helper.h>
#include <net/netfilter/ipv6/nf_defrag_ipv6.h>
@@ -158,11 +159,7 @@ static unsigned int ipv6_confirm(unsigned int hooknum,
/* adjust seqs for loopback traffic only in outgoing direction */
if (test_bit(IPS_SEQ_ADJUST_BIT, &ct->status) &&
!nf_is_loopback_packet(skb)) {
- typeof(nf_nat_seq_adjust_hook) seq_adjust;
-
- seq_adjust = rcu_dereference(nf_nat_seq_adjust_hook);
- if (!seq_adjust ||
- !seq_adjust(skb, ct, ctinfo, protoff)) {
+ if (!nf_ct_seq_adjust(skb, ct, ctinfo, protoff)) {
NF_CT_STAT_INC_ATOMIC(nf_ct_net(ct), drop);
return NF_DROP;
}
diff --git a/net/ipv6/netfilter/nf_nat_proto_icmpv6.c b/net/ipv6/netfilter/nf_nat_proto_icmpv6.c
index 61aaf70f376e..2205e8eeeacf 100644
--- a/net/ipv6/netfilter/nf_nat_proto_icmpv6.c
+++ b/net/ipv6/netfilter/nf_nat_proto_icmpv6.c
@@ -69,8 +69,8 @@ icmpv6_manip_pkt(struct sk_buff *skb,
hdr = (struct icmp6hdr *)(skb->data + hdroff);
l3proto->csum_update(skb, iphdroff, &hdr->icmp6_cksum,
tuple, maniptype);
- if (hdr->icmp6_code == ICMPV6_ECHO_REQUEST ||
- hdr->icmp6_code == ICMPV6_ECHO_REPLY) {
+ if (hdr->icmp6_type == ICMPV6_ECHO_REQUEST ||
+ hdr->icmp6_type == ICMPV6_ECHO_REPLY) {
inet_proto_csum_replace2(&hdr->icmp6_cksum, skb,
hdr->icmp6_identifier,
tuple->src.u.icmp.id, 0);
diff --git a/net/ipv6/output_core.c b/net/ipv6/output_core.c
index ab92a3673fbb..827f795209cf 100644
--- a/net/ipv6/output_core.c
+++ b/net/ipv6/output_core.c
@@ -5,6 +5,7 @@
#include <linux/export.h>
#include <net/ipv6.h>
#include <net/ip6_fib.h>
+#include <net/addrconf.h>
void ipv6_select_ident(struct frag_hdr *fhdr, struct rt6_info *rt)
{
@@ -75,3 +76,50 @@ int ip6_find_1stfragopt(struct sk_buff *skb, u8 **nexthdr)
return offset;
}
EXPORT_SYMBOL(ip6_find_1stfragopt);
+
+#if IS_ENABLED(CONFIG_IPV6)
+int ip6_dst_hoplimit(struct dst_entry *dst)
+{
+ int hoplimit = dst_metric_raw(dst, RTAX_HOPLIMIT);
+ if (hoplimit == 0) {
+ struct net_device *dev = dst->dev;
+ struct inet6_dev *idev;
+
+ rcu_read_lock();
+ idev = __in6_dev_get(dev);
+ if (idev)
+ hoplimit = idev->cnf.hop_limit;
+ else
+ hoplimit = dev_net(dev)->ipv6.devconf_all->hop_limit;
+ rcu_read_unlock();
+ }
+ return hoplimit;
+}
+EXPORT_SYMBOL(ip6_dst_hoplimit);
+#endif
+
+int __ip6_local_out(struct sk_buff *skb)
+{
+ int len;
+
+ len = skb->len - sizeof(struct ipv6hdr);
+ if (len > IPV6_MAXPLEN)
+ len = 0;
+ ipv6_hdr(skb)->payload_len = htons(len);
+
+ return nf_hook(NFPROTO_IPV6, NF_INET_LOCAL_OUT, skb, NULL,
+ skb_dst(skb)->dev, dst_output);
+}
+EXPORT_SYMBOL_GPL(__ip6_local_out);
+
+int ip6_local_out(struct sk_buff *skb)
+{
+ int err;
+
+ err = __ip6_local_out(skb);
+ if (likely(err == 1))
+ err = dst_output(skb);
+
+ return err;
+}
+EXPORT_SYMBOL_GPL(ip6_local_out);
diff --git a/net/ipv6/proc.c b/net/ipv6/proc.c
index 51c3285b5d9b..091d066a57b3 100644
--- a/net/ipv6/proc.c
+++ b/net/ipv6/proc.c
@@ -91,6 +91,10 @@ static const struct snmp_mib snmp6_ipstats_list[] = {
SNMP_MIB_ITEM("Ip6InBcastOctets", IPSTATS_MIB_INBCASTOCTETS),
SNMP_MIB_ITEM("Ip6OutBcastOctets", IPSTATS_MIB_OUTBCASTOCTETS),
/* IPSTATS_MIB_CSUMERRORS is not relevant in IPv6 (no checksum) */
+ SNMP_MIB_ITEM("Ip6InNoECTPkts", IPSTATS_MIB_NOECTPKTS),
+ SNMP_MIB_ITEM("Ip6InECT1Pkts", IPSTATS_MIB_ECT1PKTS),
+ SNMP_MIB_ITEM("Ip6InECT0Pkts", IPSTATS_MIB_ECT0PKTS),
+ SNMP_MIB_ITEM("Ip6InCEPkts", IPSTATS_MIB_CEPKTS),
SNMP_MIB_SENTINEL
};
diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c
index cdaed47ba932..a4ed2416399e 100644
--- a/net/ipv6/raw.c
+++ b/net/ipv6/raw.c
@@ -63,6 +63,8 @@
#include <linux/seq_file.h>
#include <linux/export.h>
+#define ICMPV6_HDRLEN 4 /* ICMPv6 header, RFC 4443 Section 2.1 */
+
static struct raw_hashinfo raw_v6_hashinfo = {
.lock = __RW_LOCK_UNLOCKED(raw_v6_hashinfo.lock),
};
@@ -108,11 +110,14 @@ found:
*/
static int icmpv6_filter(const struct sock *sk, const struct sk_buff *skb)
{
- struct icmp6hdr *_hdr;
+ struct icmp6hdr _hdr;
const struct icmp6hdr *hdr;
+ /* We require only the four bytes of the ICMPv6 header, not any
+ * additional bytes of message body in "struct icmp6hdr".
+ */
hdr = skb_header_pointer(skb, skb_transport_offset(skb),
- sizeof(_hdr), &_hdr);
+ ICMPV6_HDRLEN, &_hdr);
if (hdr) {
const __u32 *data = &raw6_sk(sk)->filter.data[0];
unsigned int type = hdr->icmp6_type;
@@ -330,8 +335,10 @@ static void rawv6_err(struct sock *sk, struct sk_buff *skb,
ip6_sk_update_pmtu(skb, sk, info);
harderr = (np->pmtudisc == IPV6_PMTUDISC_DO);
}
- if (type == NDISC_REDIRECT)
+ if (type == NDISC_REDIRECT) {
ip6_sk_redirect(skb, sk);
+ return;
+ }
if (np->recverr) {
u8 *payload = skb->data;
if (!inet->hdrincl)
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index 8d9a93ed9c59..f54e3a101098 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -283,9 +283,8 @@ static inline struct rt6_info *ip6_dst_alloc(struct net *net,
memset(dst + 1, 0, sizeof(*rt) - sizeof(*dst));
rt6_init_peer(rt, table ? &table->tb6_peers : net->ipv6.peers);
- rt->rt6i_genid = rt_genid(net);
+ rt->rt6i_genid = rt_genid_ipv6(net);
INIT_LIST_HEAD(&rt->rt6i_siblings);
- rt->rt6i_nsiblings = 0;
}
return rt;
}
@@ -477,6 +476,24 @@ out:
}
#ifdef CONFIG_IPV6_ROUTER_PREF
+struct __rt6_probe_work {
+ struct work_struct work;
+ struct in6_addr target;
+ struct net_device *dev;
+};
+
+static void rt6_probe_deferred(struct work_struct *w)
+{
+ struct in6_addr mcaddr;
+ struct __rt6_probe_work *work =
+ container_of(w, struct __rt6_probe_work, work);
+
+ addrconf_addr_solict_mult(&work->target, &mcaddr);
+ ndisc_send_ns(work->dev, NULL, &work->target, &mcaddr, NULL);
+ dev_put(work->dev);
+ kfree(w);
+}
+
static void rt6_probe(struct rt6_info *rt)
{
struct neighbour *neigh;
@@ -500,17 +517,23 @@ static void rt6_probe(struct rt6_info *rt)
if (!neigh ||
time_after(jiffies, neigh->updated + rt->rt6i_idev->cnf.rtr_probe_interval)) {
- struct in6_addr mcaddr;
- struct in6_addr *target;
+ struct __rt6_probe_work *work;
+
+ work = kmalloc(sizeof(*work), GFP_ATOMIC);
- if (neigh) {
+ if (neigh && work)
neigh->updated = jiffies;
+
+ if (neigh)
write_unlock(&neigh->lock);
- }
- target = (struct in6_addr *)&rt->rt6i_gateway;
- addrconf_addr_solict_mult(target, &mcaddr);
- ndisc_send_ns(rt->dst.dev, NULL, target, &mcaddr, NULL);
+ if (work) {
+ INIT_WORK(&work->work, rt6_probe_deferred);
+ work->target = rt->rt6i_gateway;
+ dev_hold(rt->dst.dev);
+ work->dev = rt->dst.dev;
+ schedule_work(&work->work);
+ }
} else {
out:
write_unlock(&neigh->lock);
@@ -852,7 +875,6 @@ static struct rt6_info *rt6_alloc_cow(struct rt6_info *ort,
if (ort->rt6i_dst.plen != 128 &&
ipv6_addr_equal(&ort->rt6i_dst.addr, daddr))
rt->rt6i_flags |= RTF_ANYCAST;
- rt->rt6i_gateway = *daddr;
}
rt->rt6i_flags |= RTF_CACHE;
@@ -1062,7 +1084,7 @@ static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie)
* DST_OBSOLETE_FORCE_CHK which forces validation calls down
* into this function always.
*/
- if (rt->rt6i_genid != rt_genid(dev_net(rt->dst.dev)))
+ if (rt->rt6i_genid != rt_genid_ipv6(dev_net(rt->dst.dev)))
return NULL;
if (rt->rt6i_node && (rt->rt6i_node->fn_sernum == cookie))
@@ -1157,6 +1179,77 @@ void ip6_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, __be32 mtu)
}
EXPORT_SYMBOL_GPL(ip6_sk_update_pmtu);
+/* Handle redirects */
+struct ip6rd_flowi {
+ struct flowi6 fl6;
+ struct in6_addr gateway;
+};
+
+static struct rt6_info *__ip6_route_redirect(struct net *net,
+ struct fib6_table *table,
+ struct flowi6 *fl6,
+ int flags)
+{
+ struct ip6rd_flowi *rdfl = (struct ip6rd_flowi *)fl6;
+ struct rt6_info *rt;
+ struct fib6_node *fn;
+
+ /* Get the "current" route for this destination and
+ * check if the redirect has come from approriate router.
+ *
+ * RFC 4861 specifies that redirects should only be
+ * accepted if they come from the nexthop to the target.
+ * Due to the way the routes are chosen, this notion
+ * is a bit fuzzy and one might need to check all possible
+ * routes.
+ */
+
+ read_lock_bh(&table->tb6_lock);
+ fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
+restart:
+ for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
+ if (rt6_check_expired(rt))
+ continue;
+ if (rt->dst.error)
+ break;
+ if (!(rt->rt6i_flags & RTF_GATEWAY))
+ continue;
+ if (fl6->flowi6_oif != rt->dst.dev->ifindex)
+ continue;
+ if (!ipv6_addr_equal(&rdfl->gateway, &rt->rt6i_gateway))
+ continue;
+ break;
+ }
+
+ if (!rt)
+ rt = net->ipv6.ip6_null_entry;
+ else if (rt->dst.error) {
+ rt = net->ipv6.ip6_null_entry;
+ goto out;
+ }
+ BACKTRACK(net, &fl6->saddr);
+out:
+ dst_hold(&rt->dst);
+
+ read_unlock_bh(&table->tb6_lock);
+
+ return rt;
+};
+
+static struct dst_entry *ip6_route_redirect(struct net *net,
+ const struct flowi6 *fl6,
+ const struct in6_addr *gateway)
+{
+ int flags = RT6_LOOKUP_F_HAS_SADDR;
+ struct ip6rd_flowi rdfl;
+
+ rdfl.fl6 = *fl6;
+ rdfl.gateway = *gateway;
+
+ return fib6_rule_lookup(net, &rdfl.fl6,
+ flags, __ip6_route_redirect);
+}
+
void ip6_redirect(struct sk_buff *skb, struct net *net, int oif, u32 mark)
{
const struct ipv6hdr *iph = (struct ipv6hdr *) skb->data;
@@ -1171,9 +1264,8 @@ void ip6_redirect(struct sk_buff *skb, struct net *net, int oif, u32 mark)
fl6.saddr = iph->saddr;
fl6.flowlabel = ip6_flowinfo(iph);
- dst = ip6_route_output(net, NULL, &fl6);
- if (!dst->error)
- rt6_do_redirect(dst, NULL, skb);
+ dst = ip6_route_redirect(net, &fl6, &ipv6_hdr(skb)->saddr);
+ rt6_do_redirect(dst, NULL, skb);
dst_release(dst);
}
EXPORT_SYMBOL_GPL(ip6_redirect);
@@ -1193,9 +1285,8 @@ void ip6_redirect_no_header(struct sk_buff *skb, struct net *net, int oif,
fl6.daddr = msg->dest;
fl6.saddr = iph->daddr;
- dst = ip6_route_output(net, NULL, &fl6);
- if (!dst->error)
- rt6_do_redirect(dst, NULL, skb);
+ dst = ip6_route_redirect(net, &fl6, &iph->saddr);
+ rt6_do_redirect(dst, NULL, skb);
dst_release(dst);
}
@@ -1270,6 +1361,7 @@ struct dst_entry *icmp6_dst_alloc(struct net_device *dev,
rt->dst.flags |= DST_HOST;
rt->dst.output = ip6_output;
atomic_set(&rt->dst.__refcnt, 1);
+ rt->rt6i_gateway = fl6->daddr;
rt->rt6i_dst.addr = fl6->daddr;
rt->rt6i_dst.plen = 128;
rt->rt6i_idev = idev;
@@ -1355,25 +1447,6 @@ out:
return entries > rt_max_size;
}
-int ip6_dst_hoplimit(struct dst_entry *dst)
-{
- int hoplimit = dst_metric_raw(dst, RTAX_HOPLIMIT);
- if (hoplimit == 0) {
- struct net_device *dev = dst->dev;
- struct inet6_dev *idev;
-
- rcu_read_lock();
- idev = __in6_dev_get(dev);
- if (idev)
- hoplimit = idev->cnf.hop_limit;
- else
- hoplimit = dev_net(dev)->ipv6.devconf_all->hop_limit;
- rcu_read_unlock();
- }
- return hoplimit;
-}
-EXPORT_SYMBOL(ip6_dst_hoplimit);
-
/*
*
*/
@@ -1824,7 +1897,10 @@ static struct rt6_info *ip6_rt_copy(struct rt6_info *ort,
in6_dev_hold(rt->rt6i_idev);
rt->dst.lastuse = jiffies;
- rt->rt6i_gateway = ort->rt6i_gateway;
+ if (ort->rt6i_flags & RTF_GATEWAY)
+ rt->rt6i_gateway = ort->rt6i_gateway;
+ else
+ rt->rt6i_gateway = *dest;
rt->rt6i_flags = ort->rt6i_flags;
if ((ort->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF)) ==
(RTF_DEFAULT | RTF_ADDRCONF))
@@ -2111,6 +2187,7 @@ struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev,
else
rt->rt6i_flags |= RTF_LOCAL;
+ rt->rt6i_gateway = *addr;
rt->rt6i_dst.addr = *addr;
rt->rt6i_dst.plen = 128;
rt->rt6i_table = fib6_get_table(net, RT6_TABLE_LOCAL);
diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c
index 21b25dd8466b..19269453a8ea 100644
--- a/net/ipv6/sit.c
+++ b/net/ipv6/sit.c
@@ -566,6 +566,70 @@ static inline bool is_spoofed_6rd(struct ip_tunnel *tunnel, const __be32 v4addr,
return false;
}
+/* Checks if an address matches an address on the tunnel interface.
+ * Used to detect the NAT of proto 41 packets and let them pass spoofing test.
+ * Long story:
+ * This function is called after we considered the packet as spoofed
+ * in is_spoofed_6rd.
+ * We may have a router that is doing NAT for proto 41 packets
+ * for an internal station. Destination a.a.a.a/PREFIX:bbbb:bbbb
+ * will be translated to n.n.n.n/PREFIX:bbbb:bbbb. And is_spoofed_6rd
+ * function will return true, dropping the packet.
+ * But, we can still check if is spoofed against the IP
+ * addresses associated with the interface.
+ */
+static bool only_dnatted(const struct ip_tunnel *tunnel,
+ const struct in6_addr *v6dst)
+{
+ int prefix_len;
+
+#ifdef CONFIG_IPV6_SIT_6RD
+ prefix_len = tunnel->ip6rd.prefixlen + 32
+ - tunnel->ip6rd.relay_prefixlen;
+#else
+ prefix_len = 48;
+#endif
+ return ipv6_chk_custom_prefix(v6dst, prefix_len, tunnel->dev);
+}
+
+/* Returns true if a packet is spoofed */
+static bool packet_is_spoofed(struct sk_buff *skb,
+ const struct iphdr *iph,
+ struct ip_tunnel *tunnel)
+{
+ const struct ipv6hdr *ipv6h;
+
+ if (tunnel->dev->priv_flags & IFF_ISATAP) {
+ if (!isatap_chksrc(skb, iph, tunnel))
+ return true;
+
+ return false;
+ }
+
+ if (tunnel->dev->flags & IFF_POINTOPOINT)
+ return false;
+
+ ipv6h = ipv6_hdr(skb);
+
+ if (unlikely(is_spoofed_6rd(tunnel, iph->saddr, &ipv6h->saddr))) {
+ net_warn_ratelimited("Src spoofed %pI4/%pI6c -> %pI4/%pI6c\n",
+ &iph->saddr, &ipv6h->saddr,
+ &iph->daddr, &ipv6h->daddr);
+ return true;
+ }
+
+ if (likely(!is_spoofed_6rd(tunnel, iph->daddr, &ipv6h->daddr)))
+ return false;
+
+ if (only_dnatted(tunnel, &ipv6h->daddr))
+ return false;
+
+ net_warn_ratelimited("Dst spoofed %pI4/%pI6c -> %pI4/%pI6c\n",
+ &iph->saddr, &ipv6h->saddr,
+ &iph->daddr, &ipv6h->daddr);
+ return true;
+}
+
static int ipip6_rcv(struct sk_buff *skb)
{
const struct iphdr *iph = ip_hdr(skb);
@@ -581,29 +645,17 @@ static int ipip6_rcv(struct sk_buff *skb)
tunnel->parms.iph.protocol != 0)
goto out;
- secpath_reset(skb);
skb->mac_header = skb->network_header;
skb_reset_network_header(skb);
IPCB(skb)->flags = 0;
skb->protocol = htons(ETH_P_IPV6);
- skb->pkt_type = PACKET_HOST;
- if (tunnel->dev->priv_flags & IFF_ISATAP) {
- if (!isatap_chksrc(skb, iph, tunnel)) {
- tunnel->dev->stats.rx_errors++;
- goto out;
- }
- } else if (!(tunnel->dev->flags&IFF_POINTOPOINT)) {
- if (is_spoofed_6rd(tunnel, iph->saddr,
- &ipv6_hdr(skb)->saddr) ||
- is_spoofed_6rd(tunnel, iph->daddr,
- &ipv6_hdr(skb)->daddr)) {
- tunnel->dev->stats.rx_errors++;
- goto out;
- }
+ if (packet_is_spoofed(skb, iph, tunnel)) {
+ tunnel->dev->stats.rx_errors++;
+ goto out;
}
- __skb_tunnel_rx(skb, tunnel->dev);
+ __skb_tunnel_rx(skb, tunnel->dev, tunnel->net);
err = IP_ECN_decapsulate(iph, skb);
if (unlikely(err)) {
@@ -621,8 +673,6 @@ static int ipip6_rcv(struct sk_buff *skb)
tstats->rx_packets++;
tstats->rx_bytes += skb->len;
- if (tunnel->net != dev_net(tunnel->dev))
- skb_scrub_packet(skb);
netif_rx(skb);
return 0;
@@ -752,7 +802,7 @@ static netdev_tx_t ipip6_tunnel_xmit(struct sk_buff *skb,
neigh = dst_neigh_lookup(skb_dst(skb), &iph6->daddr);
if (neigh == NULL) {
- net_dbg_ratelimited("sit: nexthop == NULL\n");
+ net_dbg_ratelimited("nexthop == NULL\n");
goto tx_error;
}
@@ -781,7 +831,7 @@ static netdev_tx_t ipip6_tunnel_xmit(struct sk_buff *skb,
neigh = dst_neigh_lookup(skb_dst(skb), &iph6->daddr);
if (neigh == NULL) {
- net_dbg_ratelimited("sit: nexthop == NULL\n");
+ net_dbg_ratelimited("nexthop == NULL\n");
goto tx_error;
}
@@ -858,9 +908,6 @@ static netdev_tx_t ipip6_tunnel_xmit(struct sk_buff *skb,
tunnel->err_count = 0;
}
- if (tunnel->net != dev_net(dev))
- skb_scrub_packet(skb);
-
/*
* Okay, now see if we can stuff it in the buffer as-is.
*/
@@ -891,8 +938,8 @@ static netdev_tx_t ipip6_tunnel_xmit(struct sk_buff *skb,
skb->encapsulation = 1;
}
- err = iptunnel_xmit(dev_net(dev), rt, skb, fl4.saddr, fl4.daddr,
- IPPROTO_IPV6, tos, ttl, df);
+ err = iptunnel_xmit(rt, skb, fl4.saddr, fl4.daddr, IPPROTO_IPV6, tos,
+ ttl, df, !net_eq(tunnel->net, dev_net(dev)));
iptunnel_xmit_stats(err, &dev->stats, dev->tstats);
return NETDEV_TX_OK;
@@ -1592,7 +1639,7 @@ static void __net_exit sit_destroy_tunnels(struct sit_net *sitn, struct list_hea
/* If dev is in the same netns, it has already
* been added to the list by the previous loop.
*/
- if (dev_net(t->dev) != net)
+ if (!net_eq(dev_net(t->dev), net))
unregister_netdevice_queue(t->dev,
head);
t = rtnl_dereference(t->next);
@@ -1619,6 +1666,7 @@ static int __net_init sit_init_net(struct net *net)
goto err_alloc_dev;
}
dev_net_set(sitn->fb_tunnel_dev, net);
+ sitn->fb_tunnel_dev->rtnl_link_ops = &sit_link_ops;
/* FB netdevice is special: we have one, and only one per netns.
* Allowing to move it to another netns is clearly unsafe.
*/
@@ -1653,7 +1701,6 @@ static void __net_exit sit_exit_net(struct net *net)
rtnl_lock();
sit_destroy_tunnels(sitn, &list);
- unregister_netdevice_queue(sitn->fb_tunnel_dev, &list);
unregister_netdevice_many(&list);
rtnl_unlock();
}
diff --git a/net/ipv6/syncookies.c b/net/ipv6/syncookies.c
index d5dda20bd717..bf63ac8a49b9 100644
--- a/net/ipv6/syncookies.c
+++ b/net/ipv6/syncookies.c
@@ -112,32 +112,38 @@ static __u32 check_tcp_syn_cookie(__u32 cookie, const struct in6_addr *saddr,
& COOKIEMASK;
}
-__u32 cookie_v6_init_sequence(struct sock *sk, const struct sk_buff *skb, __u16 *mssp)
+u32 __cookie_v6_init_sequence(const struct ipv6hdr *iph,
+ const struct tcphdr *th, __u16 *mssp)
{
- const struct ipv6hdr *iph = ipv6_hdr(skb);
- const struct tcphdr *th = tcp_hdr(skb);
int mssind;
const __u16 mss = *mssp;
- tcp_synq_overflow(sk);
-
for (mssind = ARRAY_SIZE(msstab) - 1; mssind ; mssind--)
if (mss >= msstab[mssind])
break;
*mssp = msstab[mssind];
- NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_SYNCOOKIESSENT);
-
return secure_tcp_syn_cookie(&iph->saddr, &iph->daddr, th->source,
th->dest, ntohl(th->seq),
jiffies / (HZ * 60), mssind);
}
+EXPORT_SYMBOL_GPL(__cookie_v6_init_sequence);
-static inline int cookie_check(const struct sk_buff *skb, __u32 cookie)
+__u32 cookie_v6_init_sequence(struct sock *sk, const struct sk_buff *skb, __u16 *mssp)
{
const struct ipv6hdr *iph = ipv6_hdr(skb);
const struct tcphdr *th = tcp_hdr(skb);
+
+ tcp_synq_overflow(sk);
+ NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_SYNCOOKIESSENT);
+
+ return __cookie_v6_init_sequence(iph, th, mssp);
+}
+
+int __cookie_v6_check(const struct ipv6hdr *iph, const struct tcphdr *th,
+ __u32 cookie)
+{
__u32 seq = ntohl(th->seq) - 1;
__u32 mssind = check_tcp_syn_cookie(cookie, &iph->saddr, &iph->daddr,
th->source, th->dest, seq,
@@ -145,6 +151,7 @@ static inline int cookie_check(const struct sk_buff *skb, __u32 cookie)
return mssind < ARRAY_SIZE(msstab) ? msstab[mssind] : 0;
}
+EXPORT_SYMBOL_GPL(__cookie_v6_check);
struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb)
{
@@ -167,7 +174,7 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb)
goto out;
if (tcp_synq_no_recent_overflow(sk) ||
- (mss = cookie_check(skb, cookie)) == 0) {
+ (mss = __cookie_v6_check(ipv6_hdr(skb), th, cookie)) == 0) {
NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_SYNCOOKIESFAILED);
goto out;
}
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index 6e1649d58533..5c71501fc917 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -963,7 +963,8 @@ static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
if (!ipv6_unicast_destination(skb))
goto drop;
- if (inet_csk_reqsk_queue_is_full(sk) && !isn) {
+ if ((sysctl_tcp_syncookies == 2 ||
+ inet_csk_reqsk_queue_is_full(sk)) && !isn) {
want_cookie = tcp_syn_flood_action(sk, skb, "TCPv6");
if (!want_cookie)
goto drop;
@@ -1237,8 +1238,6 @@ static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
newtp->advmss = tcp_sk(sk)->rx_opt.user_mss;
tcp_initialize_rcv_mss(newsk);
- tcp_synack_rtt_meas(newsk, req);
- newtp->total_retrans = req->num_retrans;
newinet->inet_daddr = newinet->inet_saddr = LOOPBACK4_IPV6;
newinet->inet_rcv_saddr = LOOPBACK4_IPV6;
@@ -1361,8 +1360,7 @@ static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb)
}
}
- if (tcp_rcv_established(sk, skb, tcp_hdr(skb), skb->len))
- goto reset;
+ tcp_rcv_established(sk, skb, tcp_hdr(skb), skb->len);
if (opt_skb)
goto ipv6_pktoptions;
return 0;
@@ -1427,7 +1425,7 @@ ipv6_pktoptions:
if (np->rxopt.bits.rxhlim || np->rxopt.bits.rxohlim)
np->mcast_hops = ipv6_hdr(opt_skb)->hop_limit;
if (np->rxopt.bits.rxtclass)
- np->rcv_tclass = ipv6_get_dsfield(ipv6_hdr(skb));
+ np->rcv_tclass = ipv6_get_dsfield(ipv6_hdr(opt_skb));
if (ipv6_opt_accepted(sk, opt_skb)) {
skb_set_owner_r(opt_skb, sk);
opt_skb = xchg(&np->pktoptions, opt_skb);
@@ -1732,7 +1730,7 @@ static void get_openreq6(struct seq_file *seq,
seq_printf(seq,
"%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
- "%02X %08X:%08X %02X:%08lX %08X %5d %8d %d %d %pK\n",
+ "%02X %08X:%08X %02X:%08lX %08X %5u %8d %d %d %pK\n",
i,
src->s6_addr32[0], src->s6_addr32[1],
src->s6_addr32[2], src->s6_addr32[3],
@@ -1783,7 +1781,7 @@ static void get_tcp6_sock(struct seq_file *seq, struct sock *sp, int i)
seq_printf(seq,
"%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
- "%02X %08X:%08X %02X:%08lX %08X %5d %8d %lu %d %pK %lu %lu %u %u %d\n",
+ "%02X %08X:%08X %02X:%08lX %08X %5u %8d %lu %d %pK %lu %lu %u %u %d\n",
i,
src->s6_addr32[0], src->s6_addr32[1],
src->s6_addr32[2], src->s6_addr32[3], srcp,
@@ -1926,6 +1924,7 @@ struct proto tcpv6_prot = {
.unhash = inet_unhash,
.get_port = inet_csk_get_port,
.enter_memory_pressure = tcp_enter_memory_pressure,
+ .stream_memory_free = tcp_stream_memory_free,
.sockets_allocated = &tcp_sockets_allocated,
.memory_allocated = &tcp_memory_allocated,
.memory_pressure = &tcp_memory_pressure,
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index f4058150262b..18786098fd41 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -525,8 +525,10 @@ void __udp6_lib_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
if (type == ICMPV6_PKT_TOOBIG)
ip6_sk_update_pmtu(skb, sk, info);
- if (type == NDISC_REDIRECT)
+ if (type == NDISC_REDIRECT) {
ip6_sk_redirect(skb, sk);
+ goto out;
+ }
np = inet6_sk(sk);
@@ -1223,9 +1225,6 @@ do_udp_sendmsg:
if (tclass < 0)
tclass = np->tclass;
- if (dontfrag < 0)
- dontfrag = np->dontfrag;
-
if (msg->msg_flags&MSG_CONFIRM)
goto do_confirm;
back_from_confirm:
@@ -1244,6 +1243,8 @@ back_from_confirm:
up->pending = AF_INET6;
do_append_data:
+ if (dontfrag < 0)
+ dontfrag = np->dontfrag;
up->len += ulen;
getfrag = is_udplite ? udplite_getfrag : ip_generic_getfrag;
err = ip6_append_data(sk, getfrag, msg->msg_iov, ulen,
diff --git a/net/ipv6/udp_offload.c b/net/ipv6/udp_offload.c
index 5d1b8d7ac993..60559511bd9c 100644
--- a/net/ipv6/udp_offload.c
+++ b/net/ipv6/udp_offload.c
@@ -21,26 +21,25 @@ static int udp6_ufo_send_check(struct sk_buff *skb)
const struct ipv6hdr *ipv6h;
struct udphdr *uh;
- /* UDP Tunnel offload on ipv6 is not yet supported. */
- if (skb->encapsulation)
- return -EINVAL;
-
if (!pskb_may_pull(skb, sizeof(*uh)))
return -EINVAL;
- ipv6h = ipv6_hdr(skb);
- uh = udp_hdr(skb);
+ if (likely(!skb->encapsulation)) {
+ ipv6h = ipv6_hdr(skb);
+ uh = udp_hdr(skb);
+
+ uh->check = ~csum_ipv6_magic(&ipv6h->saddr, &ipv6h->daddr, skb->len,
+ IPPROTO_UDP, 0);
+ skb->csum_start = skb_transport_header(skb) - skb->head;
+ skb->csum_offset = offsetof(struct udphdr, check);
+ skb->ip_summed = CHECKSUM_PARTIAL;
+ }
- uh->check = ~csum_ipv6_magic(&ipv6h->saddr, &ipv6h->daddr, skb->len,
- IPPROTO_UDP, 0);
- skb->csum_start = skb_transport_header(skb) - skb->head;
- skb->csum_offset = offsetof(struct udphdr, check);
- skb->ip_summed = CHECKSUM_PARTIAL;
return 0;
}
static struct sk_buff *udp6_ufo_fragment(struct sk_buff *skb,
- netdev_features_t features)
+ netdev_features_t features)
{
struct sk_buff *segs = ERR_PTR(-EINVAL);
unsigned int mss;
@@ -75,47 +74,51 @@ static struct sk_buff *udp6_ufo_fragment(struct sk_buff *skb,
goto out;
}
- /* Do software UFO. Complete and fill in the UDP checksum as HW cannot
- * do checksum of UDP packets sent as multiple IP fragments.
- */
- offset = skb_checksum_start_offset(skb);
- csum = skb_checksum(skb, offset, skb->len - offset, 0);
- offset += skb->csum_offset;
- *(__sum16 *)(skb->data + offset) = csum_fold(csum);
- skb->ip_summed = CHECKSUM_NONE;
-
- /* Check if there is enough headroom to insert fragment header. */
- tnl_hlen = skb_tnl_header_len(skb);
- if (skb_headroom(skb) < (tnl_hlen + frag_hdr_sz)) {
- if (gso_pskb_expand_head(skb, tnl_hlen + frag_hdr_sz))
- goto out;
+ if (skb->encapsulation && skb_shinfo(skb)->gso_type & SKB_GSO_UDP_TUNNEL)
+ segs = skb_udp_tunnel_segment(skb, features);
+ else {
+ /* Do software UFO. Complete and fill in the UDP checksum as HW cannot
+ * do checksum of UDP packets sent as multiple IP fragments.
+ */
+ offset = skb_checksum_start_offset(skb);
+ csum = skb_checksum(skb, offset, skb->len - offset, 0);
+ offset += skb->csum_offset;
+ *(__sum16 *)(skb->data + offset) = csum_fold(csum);
+ skb->ip_summed = CHECKSUM_NONE;
+
+ /* Check if there is enough headroom to insert fragment header. */
+ tnl_hlen = skb_tnl_header_len(skb);
+ if (skb_headroom(skb) < (tnl_hlen + frag_hdr_sz)) {
+ if (gso_pskb_expand_head(skb, tnl_hlen + frag_hdr_sz))
+ goto out;
+ }
+
+ /* Find the unfragmentable header and shift it left by frag_hdr_sz
+ * bytes to insert fragment header.
+ */
+ unfrag_ip6hlen = ip6_find_1stfragopt(skb, &prevhdr);
+ nexthdr = *prevhdr;
+ *prevhdr = NEXTHDR_FRAGMENT;
+ unfrag_len = (skb_network_header(skb) - skb_mac_header(skb)) +
+ unfrag_ip6hlen + tnl_hlen;
+ packet_start = (u8 *) skb->head + SKB_GSO_CB(skb)->mac_offset;
+ memmove(packet_start-frag_hdr_sz, packet_start, unfrag_len);
+
+ SKB_GSO_CB(skb)->mac_offset -= frag_hdr_sz;
+ skb->mac_header -= frag_hdr_sz;
+ skb->network_header -= frag_hdr_sz;
+
+ fptr = (struct frag_hdr *)(skb_network_header(skb) + unfrag_ip6hlen);
+ fptr->nexthdr = nexthdr;
+ fptr->reserved = 0;
+ ipv6_select_ident(fptr, (struct rt6_info *)skb_dst(skb));
+
+ /* Fragment the skb. ipv6 header and the remaining fields of the
+ * fragment header are updated in ipv6_gso_segment()
+ */
+ segs = skb_segment(skb, features);
}
- /* Find the unfragmentable header and shift it left by frag_hdr_sz
- * bytes to insert fragment header.
- */
- unfrag_ip6hlen = ip6_find_1stfragopt(skb, &prevhdr);
- nexthdr = *prevhdr;
- *prevhdr = NEXTHDR_FRAGMENT;
- unfrag_len = (skb_network_header(skb) - skb_mac_header(skb)) +
- unfrag_ip6hlen + tnl_hlen;
- packet_start = (u8 *) skb->head + SKB_GSO_CB(skb)->mac_offset;
- memmove(packet_start-frag_hdr_sz, packet_start, unfrag_len);
-
- SKB_GSO_CB(skb)->mac_offset -= frag_hdr_sz;
- skb->mac_header -= frag_hdr_sz;
- skb->network_header -= frag_hdr_sz;
-
- fptr = (struct frag_hdr *)(skb_network_header(skb) + unfrag_ip6hlen);
- fptr->nexthdr = nexthdr;
- fptr->reserved = 0;
- ipv6_select_ident(fptr, (struct rt6_info *)skb_dst(skb));
-
- /* Fragment the skb. ipv6 header and the remaining fields of the
- * fragment header are updated in ipv6_gso_segment()
- */
- segs = skb_segment(skb, features);
-
out:
return segs;
}
diff --git a/net/ipv6/xfrm6_policy.c b/net/ipv6/xfrm6_policy.c
index 23ed03d786c8..08ed2772b7aa 100644
--- a/net/ipv6/xfrm6_policy.c
+++ b/net/ipv6/xfrm6_policy.c
@@ -138,6 +138,7 @@ _decode_session6(struct sk_buff *skb, struct flowi *fl, int reverse)
memset(fl6, 0, sizeof(struct flowi6));
fl6->flowi6_mark = skb->mark;
+ fl6->flowi6_oif = skb_dst(skb)->dev->ifindex;
fl6->daddr = reverse ? hdr->saddr : hdr->daddr;
fl6->saddr = reverse ? hdr->daddr : hdr->saddr;
diff --git a/net/ipx/ipx_proc.c b/net/ipx/ipx_proc.c
index 65e8833a2510..e15c16a517e7 100644
--- a/net/ipx/ipx_proc.c
+++ b/net/ipx/ipx_proc.c
@@ -213,7 +213,7 @@ static int ipx_seq_socket_show(struct seq_file *seq, void *v)
ntohs(ipxs->dest_addr.sock));
}
- seq_printf(seq, "%08X %08X %02X %03d\n",
+ seq_printf(seq, "%08X %08X %02X %03u\n",
sk_wmem_alloc_get(s),
sk_rmem_alloc_get(s),
s->sk_state,
diff --git a/net/irda/irttp.c b/net/irda/irttp.c
index ae43c62f9045..85372cfa7b9f 100644
--- a/net/irda/irttp.c
+++ b/net/irda/irttp.c
@@ -75,7 +75,7 @@ static pi_minor_info_t pi_minor_call_table[] = {
{ NULL, 0 }, /* 0x00 */
{ irttp_param_max_sdu_size, PV_INTEGER | PV_BIG_ENDIAN } /* 0x01 */
};
-static pi_major_info_t pi_major_call_table[] = {{ pi_minor_call_table, 2 }};
+static pi_major_info_t pi_major_call_table[] = { { pi_minor_call_table, 2 } };
static pi_param_info_t param_info = { pi_major_call_table, 1, 0x0f, 4 };
/************************ GLOBAL PROCEDURES ************************/
@@ -205,7 +205,7 @@ static void irttp_todo_expired(unsigned long data)
*/
static void irttp_flush_queues(struct tsap_cb *self)
{
- struct sk_buff* skb;
+ struct sk_buff *skb;
IRDA_DEBUG(4, "%s()\n", __func__);
@@ -400,7 +400,7 @@ struct tsap_cb *irttp_open_tsap(__u8 stsap_sel, int credit, notify_t *notify)
/* The IrLMP spec (IrLMP 1.1 p10) says that we have the right to
* use only 0x01-0x6F. Of course, we can use LSAP_ANY as well.
* JeanII */
- if((stsap_sel != LSAP_ANY) &&
+ if ((stsap_sel != LSAP_ANY) &&
((stsap_sel < 0x01) || (stsap_sel >= 0x70))) {
IRDA_DEBUG(0, "%s(), invalid tsap!\n", __func__);
return NULL;
@@ -427,7 +427,7 @@ struct tsap_cb *irttp_open_tsap(__u8 stsap_sel, int credit, notify_t *notify)
ttp_notify.data_indication = irttp_data_indication;
ttp_notify.udata_indication = irttp_udata_indication;
ttp_notify.flow_indication = irttp_flow_indication;
- if(notify->status_indication != NULL)
+ if (notify->status_indication != NULL)
ttp_notify.status_indication = irttp_status_indication;
ttp_notify.instance = self;
strncpy(ttp_notify.name, notify->name, NOTIFY_MAX_NAME);
@@ -639,8 +639,7 @@ int irttp_data_request(struct tsap_cb *self, struct sk_buff *skb)
*/
if ((self->tx_max_sdu_size != 0) &&
(self->tx_max_sdu_size != TTP_SAR_UNBOUND) &&
- (skb->len > self->tx_max_sdu_size))
- {
+ (skb->len > self->tx_max_sdu_size)) {
IRDA_ERROR("%s: SAR enabled, but data is larger than TxMaxSduSize!\n",
__func__);
ret = -EMSGSIZE;
@@ -733,8 +732,7 @@ static void irttp_run_tx_queue(struct tsap_cb *self)
* poll us through irttp_flow_indication() - Jean II */
while ((self->send_credit > 0) &&
(!irlmp_lap_tx_queue_full(self->lsap)) &&
- (skb = skb_dequeue(&self->tx_queue)))
- {
+ (skb = skb_dequeue(&self->tx_queue))) {
/*
* Since we can transmit and receive frames concurrently,
* the code below is a critical region and we must assure that
@@ -798,8 +796,7 @@ static void irttp_run_tx_queue(struct tsap_cb *self)
* where we can spend a bit of time doing stuff. - Jean II */
if ((self->tx_sdu_busy) &&
(skb_queue_len(&self->tx_queue) < TTP_TX_LOW_THRESHOLD) &&
- (!self->close_pend))
- {
+ (!self->close_pend)) {
if (self->notify.flow_indication)
self->notify.flow_indication(self->notify.instance,
self, FLOW_START);
@@ -892,7 +889,7 @@ static int irttp_udata_indication(void *instance, void *sap,
/* Just pass data to layer above */
if (self->notify.udata_indication) {
err = self->notify.udata_indication(self->notify.instance,
- self,skb);
+ self, skb);
/* Same comment as in irttp_do_data_indication() */
if (!err)
return 0;
@@ -1057,7 +1054,7 @@ static void irttp_flow_indication(void *instance, void *sap, LOCAL_FLOW flow)
* to do that. Jean II */
/* If we need to send disconnect. try to do it now */
- if(self->disconnect_pend)
+ if (self->disconnect_pend)
irttp_start_todo_timer(self, 0);
}
@@ -1116,7 +1113,7 @@ int irttp_connect_request(struct tsap_cb *self, __u8 dtsap_sel,
IRDA_ASSERT(self->magic == TTP_TSAP_MAGIC, return -EBADR;);
if (self->connected) {
- if(userdata)
+ if (userdata)
dev_kfree_skb(userdata);
return -EISCONN;
}
@@ -1137,7 +1134,7 @@ int irttp_connect_request(struct tsap_cb *self, __u8 dtsap_sel,
* headers
*/
IRDA_ASSERT(skb_headroom(userdata) >= TTP_MAX_HEADER,
- { dev_kfree_skb(userdata); return -1; } );
+ { dev_kfree_skb(userdata); return -1; });
}
/* Initialize connection parameters */
@@ -1157,7 +1154,7 @@ int irttp_connect_request(struct tsap_cb *self, __u8 dtsap_sel,
* Give away max 127 credits for now
*/
if (n > 127) {
- self->avail_credit=n-127;
+ self->avail_credit = n - 127;
n = 127;
}
@@ -1166,10 +1163,10 @@ int irttp_connect_request(struct tsap_cb *self, __u8 dtsap_sel,
/* SAR enabled? */
if (max_sdu_size > 0) {
IRDA_ASSERT(skb_headroom(tx_skb) >= (TTP_MAX_HEADER + TTP_SAR_HEADER),
- { dev_kfree_skb(tx_skb); return -1; } );
+ { dev_kfree_skb(tx_skb); return -1; });
/* Insert SAR parameters */
- frame = skb_push(tx_skb, TTP_HEADER+TTP_SAR_HEADER);
+ frame = skb_push(tx_skb, TTP_HEADER + TTP_SAR_HEADER);
frame[0] = TTP_PARAMETERS | n;
frame[1] = 0x04; /* Length */
@@ -1386,7 +1383,7 @@ int irttp_connect_response(struct tsap_cb *self, __u32 max_sdu_size,
* headers
*/
IRDA_ASSERT(skb_headroom(userdata) >= TTP_MAX_HEADER,
- { dev_kfree_skb(userdata); return -1; } );
+ { dev_kfree_skb(userdata); return -1; });
}
self->avail_credit = 0;
@@ -1409,10 +1406,10 @@ int irttp_connect_response(struct tsap_cb *self, __u32 max_sdu_size,
/* SAR enabled? */
if (max_sdu_size > 0) {
IRDA_ASSERT(skb_headroom(tx_skb) >= (TTP_MAX_HEADER + TTP_SAR_HEADER),
- { dev_kfree_skb(tx_skb); return -1; } );
+ { dev_kfree_skb(tx_skb); return -1; });
/* Insert TTP header with SAR parameters */
- frame = skb_push(tx_skb, TTP_HEADER+TTP_SAR_HEADER);
+ frame = skb_push(tx_skb, TTP_HEADER + TTP_SAR_HEADER);
frame[0] = TTP_PARAMETERS | n;
frame[1] = 0x04; /* Length */
@@ -1522,7 +1519,7 @@ int irttp_disconnect_request(struct tsap_cb *self, struct sk_buff *userdata,
* function may be called from various context, like user, timer
* for following a disconnect_indication() (i.e. net_bh).
* Jean II */
- if(test_and_set_bit(0, &self->disconnect_pend)) {
+ if (test_and_set_bit(0, &self->disconnect_pend)) {
IRDA_DEBUG(0, "%s(), disconnect already pending\n",
__func__);
if (userdata)
@@ -1627,7 +1624,7 @@ static void irttp_disconnect_indication(void *instance, void *sap,
* Jean II */
/* No need to notify the client if has already tried to disconnect */
- if(self->notify.disconnect_indication)
+ if (self->notify.disconnect_indication)
self->notify.disconnect_indication(self->notify.instance, self,
reason, skb);
else
@@ -1738,8 +1735,7 @@ static void irttp_run_rx_queue(struct tsap_cb *self)
* This is the last fragment, so time to reassemble!
*/
if ((self->rx_sdu_size <= self->rx_max_sdu_size) ||
- (self->rx_max_sdu_size == TTP_SAR_UNBOUND))
- {
+ (self->rx_max_sdu_size == TTP_SAR_UNBOUND)) {
/*
* A little optimizing. Only queue the fragment if
* there are other fragments. Since if this is the
@@ -1860,7 +1856,7 @@ static int irttp_seq_show(struct seq_file *seq, void *v)
seq_printf(seq, "dtsap_sel: %02x\n",
self->dtsap_sel);
seq_printf(seq, " connected: %s, ",
- self->connected? "TRUE":"FALSE");
+ self->connected ? "TRUE" : "FALSE");
seq_printf(seq, "avail credit: %d, ",
self->avail_credit);
seq_printf(seq, "remote credit: %d, ",
@@ -1876,9 +1872,9 @@ static int irttp_seq_show(struct seq_file *seq, void *v)
seq_printf(seq, "rx_queue len: %u\n",
skb_queue_len(&self->rx_queue));
seq_printf(seq, " tx_sdu_busy: %s, ",
- self->tx_sdu_busy? "TRUE":"FALSE");
+ self->tx_sdu_busy ? "TRUE" : "FALSE");
seq_printf(seq, "rx_sdu_busy: %s\n",
- self->rx_sdu_busy? "TRUE":"FALSE");
+ self->rx_sdu_busy ? "TRUE" : "FALSE");
seq_printf(seq, " max_seg_size: %u, ",
self->max_seg_size);
seq_printf(seq, "tx_max_sdu_size: %u, ",
diff --git a/net/key/af_key.c b/net/key/af_key.c
index ab8bd2cabfa0..911ef03bf8fb 100644
--- a/net/key/af_key.c
+++ b/net/key/af_key.c
@@ -45,7 +45,7 @@ struct netns_pfkey {
static DEFINE_MUTEX(pfkey_mutex);
#define DUMMY_MARK 0
-static struct xfrm_mark dummy_mark = {0, 0};
+static const struct xfrm_mark dummy_mark = {0, 0};
struct pfkey_sock {
/* struct sock must be the first member of struct pfkey_sock */
struct sock sk;
@@ -338,7 +338,7 @@ static int pfkey_error(const struct sadb_msg *orig, int err, struct sock *sk)
return 0;
}
-static u8 sadb_ext_min_len[] = {
+static const u8 sadb_ext_min_len[] = {
[SADB_EXT_RESERVED] = (u8) 0,
[SADB_EXT_SA] = (u8) sizeof(struct sadb_sa),
[SADB_EXT_LIFETIME_CURRENT] = (u8) sizeof(struct sadb_lifetime),
@@ -1098,7 +1098,8 @@ static struct xfrm_state * pfkey_msg2xfrm_state(struct net *net,
x->id.proto = proto;
x->id.spi = sa->sadb_sa_spi;
- x->props.replay_window = sa->sadb_sa_replay;
+ x->props.replay_window = min_t(unsigned int, sa->sadb_sa_replay,
+ (sizeof(x->replay.bitmap) * 8));
if (sa->sadb_sa_flags & SADB_SAFLAGS_NOECN)
x->props.flags |= XFRM_STATE_NOECN;
if (sa->sadb_sa_flags & SADB_SAFLAGS_DECAP_DSCP)
@@ -1196,10 +1197,6 @@ static struct xfrm_state * pfkey_msg2xfrm_state(struct net *net,
x->props.family = pfkey_sadb_addr2xfrm_addr((struct sadb_address *) ext_hdrs[SADB_EXT_ADDRESS_SRC-1],
&x->props.saddr);
- if (!x->props.family) {
- err = -EAFNOSUPPORT;
- goto out;
- }
pfkey_sadb_addr2xfrm_addr((struct sadb_address *) ext_hdrs[SADB_EXT_ADDRESS_DST-1],
&x->id.daddr);
@@ -2205,10 +2202,6 @@ static int pfkey_spdadd(struct sock *sk, struct sk_buff *skb, const struct sadb_
sa = ext_hdrs[SADB_EXT_ADDRESS_SRC-1];
xp->family = pfkey_sadb_addr2xfrm_addr(sa, &xp->selector.saddr);
- if (!xp->family) {
- err = -EINVAL;
- goto out;
- }
xp->selector.family = xp->family;
xp->selector.prefixlen_s = sa->sadb_address_prefixlen;
xp->selector.proto = pfkey_proto_to_xfrm(sa->sadb_address_proto);
@@ -2737,7 +2730,7 @@ static int pfkey_spdflush(struct sock *sk, struct sk_buff *skb, const struct sad
typedef int (*pfkey_handler)(struct sock *sk, struct sk_buff *skb,
const struct sadb_msg *hdr, void * const *ext_hdrs);
-static pfkey_handler pfkey_funcs[SADB_MAX + 1] = {
+static const pfkey_handler pfkey_funcs[SADB_MAX + 1] = {
[SADB_RESERVED] = pfkey_reserved,
[SADB_GETSPI] = pfkey_getspi,
[SADB_UPDATE] = pfkey_add,
diff --git a/net/l2tp/l2tp_core.c b/net/l2tp/l2tp_core.c
index feae495a0a30..b076e8309bc2 100644
--- a/net/l2tp/l2tp_core.c
+++ b/net/l2tp/l2tp_core.c
@@ -115,6 +115,11 @@ struct l2tp_net {
static void l2tp_session_set_header_len(struct l2tp_session *session, int version);
static void l2tp_tunnel_free(struct l2tp_tunnel *tunnel);
+static inline struct l2tp_tunnel *l2tp_tunnel(struct sock *sk)
+{
+ return sk->sk_user_data;
+}
+
static inline struct l2tp_net *l2tp_pernet(struct net *net)
{
BUG_ON(!net);
@@ -504,7 +509,7 @@ static inline int l2tp_verify_udp_checksum(struct sock *sk,
return 0;
#if IS_ENABLED(CONFIG_IPV6)
- if (sk->sk_family == PF_INET6) {
+ if (sk->sk_family == PF_INET6 && !l2tp_tunnel(sk)->v4mapped) {
if (!uh->check) {
LIMIT_NETDEBUG(KERN_INFO "L2TP: IPv6: checksum is 0\n");
return 1;
@@ -1128,7 +1133,7 @@ static int l2tp_xmit_core(struct l2tp_session *session, struct sk_buff *skb,
/* Queue the packet to IP for output */
skb->local_df = 1;
#if IS_ENABLED(CONFIG_IPV6)
- if (skb->sk->sk_family == PF_INET6)
+ if (skb->sk->sk_family == PF_INET6 && !tunnel->v4mapped)
error = inet6_csk_xmit(skb, NULL);
else
#endif
@@ -1255,7 +1260,7 @@ int l2tp_xmit_skb(struct l2tp_session *session, struct sk_buff *skb, int hdr_len
/* Calculate UDP checksum if configured to do so */
#if IS_ENABLED(CONFIG_IPV6)
- if (sk->sk_family == PF_INET6)
+ if (sk->sk_family == PF_INET6 && !tunnel->v4mapped)
l2tp_xmit_ipv6_csum(sk, skb, udp_len);
else
#endif
@@ -1304,10 +1309,9 @@ EXPORT_SYMBOL_GPL(l2tp_xmit_skb);
*/
static void l2tp_tunnel_destruct(struct sock *sk)
{
- struct l2tp_tunnel *tunnel;
+ struct l2tp_tunnel *tunnel = l2tp_tunnel(sk);
struct l2tp_net *pn;
- tunnel = sk->sk_user_data;
if (tunnel == NULL)
goto end;
@@ -1675,7 +1679,7 @@ int l2tp_tunnel_create(struct net *net, int fd, int version, u32 tunnel_id, u32
}
/* Check if this socket has already been prepped */
- tunnel = (struct l2tp_tunnel *)sk->sk_user_data;
+ tunnel = l2tp_tunnel(sk);
if (tunnel != NULL) {
/* This socket has already been prepped */
err = -EBUSY;
@@ -1704,6 +1708,24 @@ int l2tp_tunnel_create(struct net *net, int fd, int version, u32 tunnel_id, u32
if (cfg != NULL)
tunnel->debug = cfg->debug;
+#if IS_ENABLED(CONFIG_IPV6)
+ if (sk->sk_family == PF_INET6) {
+ struct ipv6_pinfo *np = inet6_sk(sk);
+
+ if (ipv6_addr_v4mapped(&np->saddr) &&
+ ipv6_addr_v4mapped(&np->daddr)) {
+ struct inet_sock *inet = inet_sk(sk);
+
+ tunnel->v4mapped = true;
+ inet->inet_saddr = np->saddr.s6_addr32[3];
+ inet->inet_rcv_saddr = np->rcv_saddr.s6_addr32[3];
+ inet->inet_daddr = np->daddr.s6_addr32[3];
+ } else {
+ tunnel->v4mapped = false;
+ }
+ }
+#endif
+
/* Mark socket as an encapsulation socket. See net/ipv4/udp.c */
tunnel->encap = encap;
if (encap == L2TP_ENCAPTYPE_UDP) {
@@ -1712,7 +1734,7 @@ int l2tp_tunnel_create(struct net *net, int fd, int version, u32 tunnel_id, u32
udp_sk(sk)->encap_rcv = l2tp_udp_encap_recv;
udp_sk(sk)->encap_destroy = l2tp_udp_encap_destroy;
#if IS_ENABLED(CONFIG_IPV6)
- if (sk->sk_family == PF_INET6)
+ if (sk->sk_family == PF_INET6 && !tunnel->v4mapped)
udpv6_encap_enable();
else
#endif
diff --git a/net/l2tp/l2tp_core.h b/net/l2tp/l2tp_core.h
index 66a559b104b6..6f251cbc2ed7 100644
--- a/net/l2tp/l2tp_core.h
+++ b/net/l2tp/l2tp_core.h
@@ -194,6 +194,9 @@ struct l2tp_tunnel {
struct sock *sock; /* Parent socket */
int fd; /* Parent fd, if tunnel socket
* was created by userspace */
+#if IS_ENABLED(CONFIG_IPV6)
+ bool v4mapped;
+#endif
struct work_struct del_work;
diff --git a/net/l2tp/l2tp_ppp.c b/net/l2tp/l2tp_ppp.c
index 5ebee2ded9e9..8c46b271064a 100644
--- a/net/l2tp/l2tp_ppp.c
+++ b/net/l2tp/l2tp_ppp.c
@@ -353,7 +353,9 @@ static int pppol2tp_sendmsg(struct kiocb *iocb, struct socket *sock, struct msgh
goto error_put_sess_tun;
}
+ local_bh_disable();
l2tp_xmit_skb(session, skb, session->hdr_len);
+ local_bh_enable();
sock_put(ps->tunnel_sock);
sock_put(sk);
@@ -422,7 +424,9 @@ static int pppol2tp_xmit(struct ppp_channel *chan, struct sk_buff *skb)
skb->data[0] = ppph[0];
skb->data[1] = ppph[1];
+ local_bh_disable();
l2tp_xmit_skb(session, skb, session->hdr_len);
+ local_bh_enable();
sock_put(sk_tun);
sock_put(sk);
diff --git a/net/lapb/lapb_timer.c b/net/lapb/lapb_timer.c
index 54563ad8aeb1..355cc3b6fa4d 100644
--- a/net/lapb/lapb_timer.c
+++ b/net/lapb/lapb_timer.c
@@ -154,6 +154,7 @@ static void lapb_t1timer_expiry(unsigned long param)
} else {
lapb->n2count++;
lapb_requeue_frames(lapb);
+ lapb_kick(lapb);
}
break;
diff --git a/net/llc/af_llc.c b/net/llc/af_llc.c
index 48aaa89253e0..6cba486353e8 100644
--- a/net/llc/af_llc.c
+++ b/net/llc/af_llc.c
@@ -321,12 +321,12 @@ static int llc_ui_bind(struct socket *sock, struct sockaddr *uaddr, int addrlen)
if (llc->dev) {
if (!addr->sllc_arphrd)
addr->sllc_arphrd = llc->dev->type;
- if (llc_mac_null(addr->sllc_mac))
+ if (is_zero_ether_addr(addr->sllc_mac))
memcpy(addr->sllc_mac, llc->dev->dev_addr,
IFHWADDRLEN);
if (addr->sllc_arphrd != llc->dev->type ||
- !llc_mac_match(addr->sllc_mac,
- llc->dev->dev_addr)) {
+ !ether_addr_equal(addr->sllc_mac,
+ llc->dev->dev_addr)) {
rc = -EINVAL;
llc->dev = NULL;
}
diff --git a/net/llc/llc_conn.c b/net/llc/llc_conn.c
index 0d0d416dfab6..cd8724177965 100644
--- a/net/llc/llc_conn.c
+++ b/net/llc/llc_conn.c
@@ -478,8 +478,8 @@ static inline bool llc_estab_match(const struct llc_sap *sap,
return llc->laddr.lsap == laddr->lsap &&
llc->daddr.lsap == daddr->lsap &&
- llc_mac_match(llc->laddr.mac, laddr->mac) &&
- llc_mac_match(llc->daddr.mac, daddr->mac);
+ ether_addr_equal(llc->laddr.mac, laddr->mac) &&
+ ether_addr_equal(llc->daddr.mac, daddr->mac);
}
/**
@@ -550,7 +550,7 @@ static inline bool llc_listener_match(const struct llc_sap *sap,
return sk->sk_type == SOCK_STREAM && sk->sk_state == TCP_LISTEN &&
llc->laddr.lsap == laddr->lsap &&
- llc_mac_match(llc->laddr.mac, laddr->mac);
+ ether_addr_equal(llc->laddr.mac, laddr->mac);
}
static struct sock *__llc_lookup_listener(struct llc_sap *sap,
diff --git a/net/llc/llc_proc.c b/net/llc/llc_proc.c
index 7b4799cfbf8d..1a3c7e0f5d0d 100644
--- a/net/llc/llc_proc.c
+++ b/net/llc/llc_proc.c
@@ -147,7 +147,7 @@ static int llc_seq_socket_show(struct seq_file *seq, void *v)
}
seq_printf(seq, "@%02X ", llc->sap->laddr.lsap);
llc_ui_format_mac(seq, llc->daddr.mac);
- seq_printf(seq, "@%02X %8d %8d %2d %3d %4d\n", llc->daddr.lsap,
+ seq_printf(seq, "@%02X %8d %8d %2d %3u %4d\n", llc->daddr.lsap,
sk_wmem_alloc_get(sk),
sk_rmem_alloc_get(sk) - llc->copied_seq,
sk->sk_state,
diff --git a/net/llc/llc_sap.c b/net/llc/llc_sap.c
index 78be45cda5c1..e5850699098e 100644
--- a/net/llc/llc_sap.c
+++ b/net/llc/llc_sap.c
@@ -302,7 +302,7 @@ static inline bool llc_dgram_match(const struct llc_sap *sap,
return sk->sk_type == SOCK_DGRAM &&
llc->laddr.lsap == laddr->lsap &&
- llc_mac_match(llc->laddr.mac, laddr->mac);
+ ether_addr_equal(llc->laddr.mac, laddr->mac);
}
/**
@@ -425,7 +425,7 @@ void llc_sap_handler(struct llc_sap *sap, struct sk_buff *skb)
llc_pdu_decode_da(skb, laddr.mac);
llc_pdu_decode_dsap(skb, &laddr.lsap);
- if (llc_mac_multicast(laddr.mac)) {
+ if (is_multicast_ether_addr(laddr.mac)) {
llc_sap_mcast(sap, &laddr, skb);
kfree_skb(skb);
} else {
diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c
index 43dd7525bfcb..629dee7ec9bf 100644
--- a/net/mac80211/cfg.c
+++ b/net/mac80211/cfg.c
@@ -395,9 +395,13 @@ void sta_set_rate_info_tx(struct sta_info *sta,
rinfo->nss = ieee80211_rate_get_vht_nss(rate);
} else {
struct ieee80211_supported_band *sband;
+ int shift = ieee80211_vif_get_shift(&sta->sdata->vif);
+ u16 brate;
+
sband = sta->local->hw.wiphy->bands[
ieee80211_get_sdata_band(sta->sdata)];
- rinfo->legacy = sband->bitrates[rate->idx].bitrate;
+ brate = sband->bitrates[rate->idx].bitrate;
+ rinfo->legacy = DIV_ROUND_UP(brate, 1 << shift);
}
if (rate->flags & IEEE80211_TX_RC_40_MHZ_WIDTH)
rinfo->flags |= RATE_INFO_FLAGS_40_MHZ_WIDTH;
@@ -422,11 +426,13 @@ void sta_set_rate_info_rx(struct sta_info *sta, struct rate_info *rinfo)
rinfo->mcs = sta->last_rx_rate_idx;
} else {
struct ieee80211_supported_band *sband;
+ int shift = ieee80211_vif_get_shift(&sta->sdata->vif);
+ u16 brate;
sband = sta->local->hw.wiphy->bands[
ieee80211_get_sdata_band(sta->sdata)];
- rinfo->legacy =
- sband->bitrates[sta->last_rx_rate_idx].bitrate;
+ brate = sband->bitrates[sta->last_rx_rate_idx].bitrate;
+ rinfo->legacy = DIV_ROUND_UP(brate, 1 << shift);
}
if (sta->last_rx_rate_flag & RX_FLAG_40MHZ)
@@ -856,8 +862,8 @@ static int ieee80211_set_probe_resp(struct ieee80211_sub_if_data *sdata,
return 0;
}
-static int ieee80211_assign_beacon(struct ieee80211_sub_if_data *sdata,
- struct cfg80211_beacon_data *params)
+int ieee80211_assign_beacon(struct ieee80211_sub_if_data *sdata,
+ struct cfg80211_beacon_data *params)
{
struct beacon_data *new, *old;
int new_head_len, new_tail_len;
@@ -1020,6 +1026,12 @@ static int ieee80211_change_beacon(struct wiphy *wiphy, struct net_device *dev,
sdata = IEEE80211_DEV_TO_SUB_IF(dev);
+ /* don't allow changing the beacon while CSA is in place - offset
+ * of channel switch counter may change
+ */
+ if (sdata->vif.csa_active)
+ return -EBUSY;
+
old = rtnl_dereference(sdata->u.ap.beacon);
if (!old)
return -ENOENT;
@@ -1044,6 +1056,10 @@ static int ieee80211_stop_ap(struct wiphy *wiphy, struct net_device *dev)
return -ENOENT;
old_probe_resp = rtnl_dereference(sdata->u.ap.probe_resp);
+ /* abort any running channel switch */
+ sdata->vif.csa_active = false;
+ cancel_work_sync(&sdata->csa_finalize_work);
+
/* turn off carrier for this interface and dependent VLANs */
list_for_each_entry(vlan, &sdata->u.ap.vlans, u.vlan.list)
netif_carrier_off(vlan->dev);
@@ -1192,8 +1208,6 @@ static int sta_apply_parameters(struct ieee80211_local *local,
struct station_parameters *params)
{
int ret = 0;
- u32 rates;
- int i, j;
struct ieee80211_supported_band *sband;
struct ieee80211_sub_if_data *sdata = sta->sdata;
enum ieee80211_band band = ieee80211_get_sdata_band(sdata);
@@ -1286,16 +1300,10 @@ static int sta_apply_parameters(struct ieee80211_local *local,
sta->listen_interval = params->listen_interval;
if (params->supported_rates) {
- rates = 0;
-
- for (i = 0; i < params->supported_rates_len; i++) {
- int rate = (params->supported_rates[i] & 0x7f) * 5;
- for (j = 0; j < sband->n_bitrates; j++) {
- if (sband->bitrates[j].bitrate == rate)
- rates |= BIT(j);
- }
- }
- sta->sta.supp_rates[band] = rates;
+ ieee80211_parse_bitrates(&sdata->vif.bss_conf.chandef,
+ sband, params->supported_rates,
+ params->supported_rates_len,
+ &sta->sta.supp_rates[band]);
}
if (params->ht_capa)
@@ -1958,18 +1966,11 @@ static int ieee80211_change_bss(struct wiphy *wiphy,
}
if (params->basic_rates) {
- int i, j;
- u32 rates = 0;
- struct ieee80211_supported_band *sband = wiphy->bands[band];
-
- for (i = 0; i < params->basic_rates_len; i++) {
- int rate = (params->basic_rates[i] & 0x7f) * 5;
- for (j = 0; j < sband->n_bitrates; j++) {
- if (sband->bitrates[j].bitrate == rate)
- rates |= BIT(j);
- }
- }
- sdata->vif.bss_conf.basic_rates = rates;
+ ieee80211_parse_bitrates(&sdata->vif.bss_conf.chandef,
+ wiphy->bands[band],
+ params->basic_rates,
+ params->basic_rates_len,
+ &sdata->vif.bss_conf.basic_rates);
changed |= BSS_CHANGED_BASIC_RATES;
}
@@ -2301,14 +2302,25 @@ static void ieee80211_rfkill_poll(struct wiphy *wiphy)
}
#ifdef CONFIG_NL80211_TESTMODE
-static int ieee80211_testmode_cmd(struct wiphy *wiphy, void *data, int len)
+static int ieee80211_testmode_cmd(struct wiphy *wiphy,
+ struct wireless_dev *wdev,
+ void *data, int len)
{
struct ieee80211_local *local = wiphy_priv(wiphy);
+ struct ieee80211_vif *vif = NULL;
if (!local->ops->testmode_cmd)
return -EOPNOTSUPP;
- return local->ops->testmode_cmd(&local->hw, data, len);
+ if (wdev) {
+ struct ieee80211_sub_if_data *sdata;
+
+ sdata = IEEE80211_WDEV_TO_SUB_IF(wdev);
+ if (sdata->flags & IEEE80211_SDATA_IN_DRIVER)
+ vif = &sdata->vif;
+ }
+
+ return local->ops->testmode_cmd(&local->hw, vif, data, len);
}
static int ieee80211_testmode_dump(struct wiphy *wiphy,
@@ -2786,6 +2798,178 @@ static int ieee80211_start_radar_detection(struct wiphy *wiphy,
return 0;
}
+static struct cfg80211_beacon_data *
+cfg80211_beacon_dup(struct cfg80211_beacon_data *beacon)
+{
+ struct cfg80211_beacon_data *new_beacon;
+ u8 *pos;
+ int len;
+
+ len = beacon->head_len + beacon->tail_len + beacon->beacon_ies_len +
+ beacon->proberesp_ies_len + beacon->assocresp_ies_len +
+ beacon->probe_resp_len;
+
+ new_beacon = kzalloc(sizeof(*new_beacon) + len, GFP_KERNEL);
+ if (!new_beacon)
+ return NULL;
+
+ pos = (u8 *)(new_beacon + 1);
+ if (beacon->head_len) {
+ new_beacon->head_len = beacon->head_len;
+ new_beacon->head = pos;
+ memcpy(pos, beacon->head, beacon->head_len);
+ pos += beacon->head_len;
+ }
+ if (beacon->tail_len) {
+ new_beacon->tail_len = beacon->tail_len;
+ new_beacon->tail = pos;
+ memcpy(pos, beacon->tail, beacon->tail_len);
+ pos += beacon->tail_len;
+ }
+ if (beacon->beacon_ies_len) {
+ new_beacon->beacon_ies_len = beacon->beacon_ies_len;
+ new_beacon->beacon_ies = pos;
+ memcpy(pos, beacon->beacon_ies, beacon->beacon_ies_len);
+ pos += beacon->beacon_ies_len;
+ }
+ if (beacon->proberesp_ies_len) {
+ new_beacon->proberesp_ies_len = beacon->proberesp_ies_len;
+ new_beacon->proberesp_ies = pos;
+ memcpy(pos, beacon->proberesp_ies, beacon->proberesp_ies_len);
+ pos += beacon->proberesp_ies_len;
+ }
+ if (beacon->assocresp_ies_len) {
+ new_beacon->assocresp_ies_len = beacon->assocresp_ies_len;
+ new_beacon->assocresp_ies = pos;
+ memcpy(pos, beacon->assocresp_ies, beacon->assocresp_ies_len);
+ pos += beacon->assocresp_ies_len;
+ }
+ if (beacon->probe_resp_len) {
+ new_beacon->probe_resp_len = beacon->probe_resp_len;
+ beacon->probe_resp = pos;
+ memcpy(pos, beacon->probe_resp, beacon->probe_resp_len);
+ pos += beacon->probe_resp_len;
+ }
+
+ return new_beacon;
+}
+
+void ieee80211_csa_finalize_work(struct work_struct *work)
+{
+ struct ieee80211_sub_if_data *sdata =
+ container_of(work, struct ieee80211_sub_if_data,
+ csa_finalize_work);
+ struct ieee80211_local *local = sdata->local;
+ int err, changed;
+
+ if (!ieee80211_sdata_running(sdata))
+ return;
+
+ if (WARN_ON(sdata->vif.type != NL80211_IFTYPE_AP))
+ return;
+
+ sdata->radar_required = sdata->csa_radar_required;
+ err = ieee80211_vif_change_channel(sdata, &local->csa_chandef,
+ &changed);
+ if (WARN_ON(err < 0))
+ return;
+
+ err = ieee80211_assign_beacon(sdata, sdata->u.ap.next_beacon);
+ if (err < 0)
+ return;
+
+ changed |= err;
+ kfree(sdata->u.ap.next_beacon);
+ sdata->u.ap.next_beacon = NULL;
+ sdata->vif.csa_active = false;
+
+ ieee80211_wake_queues_by_reason(&sdata->local->hw,
+ IEEE80211_MAX_QUEUE_MAP,
+ IEEE80211_QUEUE_STOP_REASON_CSA);
+
+ ieee80211_bss_info_change_notify(sdata, changed);
+
+ cfg80211_ch_switch_notify(sdata->dev, &local->csa_chandef);
+}
+
+static int ieee80211_channel_switch(struct wiphy *wiphy, struct net_device *dev,
+ struct cfg80211_csa_settings *params)
+{
+ struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev);
+ struct ieee80211_local *local = sdata->local;
+ struct ieee80211_chanctx_conf *chanctx_conf;
+ struct ieee80211_chanctx *chanctx;
+ int err, num_chanctx;
+
+ if (!list_empty(&local->roc_list) || local->scanning)
+ return -EBUSY;
+
+ if (sdata->wdev.cac_started)
+ return -EBUSY;
+
+ if (cfg80211_chandef_identical(&params->chandef,
+ &sdata->vif.bss_conf.chandef))
+ return -EINVAL;
+
+ rcu_read_lock();
+ chanctx_conf = rcu_dereference(sdata->vif.chanctx_conf);
+ if (!chanctx_conf) {
+ rcu_read_unlock();
+ return -EBUSY;
+ }
+
+ /* don't handle for multi-VIF cases */
+ chanctx = container_of(chanctx_conf, struct ieee80211_chanctx, conf);
+ if (chanctx->refcount > 1) {
+ rcu_read_unlock();
+ return -EBUSY;
+ }
+ num_chanctx = 0;
+ list_for_each_entry_rcu(chanctx, &local->chanctx_list, list)
+ num_chanctx++;
+ rcu_read_unlock();
+
+ if (num_chanctx > 1)
+ return -EBUSY;
+
+ /* don't allow another channel switch if one is already active. */
+ if (sdata->vif.csa_active)
+ return -EBUSY;
+
+ /* only handle AP for now. */
+ switch (sdata->vif.type) {
+ case NL80211_IFTYPE_AP:
+ break;
+ default:
+ return -EOPNOTSUPP;
+ }
+
+ sdata->u.ap.next_beacon = cfg80211_beacon_dup(&params->beacon_after);
+ if (!sdata->u.ap.next_beacon)
+ return -ENOMEM;
+
+ sdata->csa_counter_offset_beacon = params->counter_offset_beacon;
+ sdata->csa_counter_offset_presp = params->counter_offset_presp;
+ sdata->csa_radar_required = params->radar_required;
+
+ if (params->block_tx)
+ ieee80211_stop_queues_by_reason(&local->hw,
+ IEEE80211_MAX_QUEUE_MAP,
+ IEEE80211_QUEUE_STOP_REASON_CSA);
+
+ err = ieee80211_assign_beacon(sdata, &params->beacon_csa);
+ if (err < 0)
+ return err;
+
+ local->csa_chandef = params->chandef;
+ sdata->vif.csa_active = true;
+
+ ieee80211_bss_info_change_notify(sdata, err);
+ drv_channel_switch_beacon(sdata, &params->chandef);
+
+ return 0;
+}
+
static int ieee80211_mgmt_tx(struct wiphy *wiphy, struct wireless_dev *wdev,
struct ieee80211_channel *chan, bool offchan,
unsigned int wait, const u8 *buf, size_t len,
@@ -3334,7 +3518,7 @@ static int ieee80211_probe_client(struct wiphy *wiphy, struct net_device *dev,
return -EINVAL;
}
band = chanctx_conf->def.chan->band;
- sta = sta_info_get(sdata, peer);
+ sta = sta_info_get_bss(sdata, peer);
if (sta) {
qos = test_sta_flag(sta, WLAN_STA_WME);
} else {
@@ -3503,4 +3687,5 @@ struct cfg80211_ops mac80211_config_ops = {
.get_et_strings = ieee80211_get_et_strings,
.get_channel = ieee80211_cfg_get_channel,
.start_radar_detection = ieee80211_start_radar_detection,
+ .channel_switch = ieee80211_channel_switch,
};
diff --git a/net/mac80211/chan.c b/net/mac80211/chan.c
index 03e8d2e3270e..3a4764b2869e 100644
--- a/net/mac80211/chan.c
+++ b/net/mac80211/chan.c
@@ -410,6 +410,64 @@ int ieee80211_vif_use_channel(struct ieee80211_sub_if_data *sdata,
return ret;
}
+int ieee80211_vif_change_channel(struct ieee80211_sub_if_data *sdata,
+ const struct cfg80211_chan_def *chandef,
+ u32 *changed)
+{
+ struct ieee80211_local *local = sdata->local;
+ struct ieee80211_chanctx_conf *conf;
+ struct ieee80211_chanctx *ctx;
+ int ret;
+ u32 chanctx_changed = 0;
+
+ /* should never be called if not performing a channel switch. */
+ if (WARN_ON(!sdata->vif.csa_active))
+ return -EINVAL;
+
+ if (!cfg80211_chandef_usable(sdata->local->hw.wiphy, chandef,
+ IEEE80211_CHAN_DISABLED))
+ return -EINVAL;
+
+ mutex_lock(&local->chanctx_mtx);
+ conf = rcu_dereference_protected(sdata->vif.chanctx_conf,
+ lockdep_is_held(&local->chanctx_mtx));
+ if (!conf) {
+ ret = -EINVAL;
+ goto out;
+ }
+
+ ctx = container_of(conf, struct ieee80211_chanctx, conf);
+ if (ctx->refcount != 1) {
+ ret = -EINVAL;
+ goto out;
+ }
+
+ if (sdata->vif.bss_conf.chandef.width != chandef->width) {
+ chanctx_changed = IEEE80211_CHANCTX_CHANGE_WIDTH;
+ *changed |= BSS_CHANGED_BANDWIDTH;
+ }
+
+ sdata->vif.bss_conf.chandef = *chandef;
+ ctx->conf.def = *chandef;
+
+ chanctx_changed |= IEEE80211_CHANCTX_CHANGE_CHANNEL;
+ drv_change_chanctx(local, ctx, chanctx_changed);
+
+ if (!local->use_chanctx) {
+ local->_oper_chandef = *chandef;
+ ieee80211_hw_config(local, 0);
+ }
+
+ ieee80211_recalc_chanctx_chantype(local, ctx);
+ ieee80211_recalc_smps_chanctx(local, ctx);
+ ieee80211_recalc_radar_chanctx(local, ctx);
+
+ ret = 0;
+ out:
+ mutex_unlock(&local->chanctx_mtx);
+ return ret;
+}
+
int ieee80211_vif_change_bandwidth(struct ieee80211_sub_if_data *sdata,
const struct cfg80211_chan_def *chandef,
u32 *changed)
diff --git a/net/mac80211/debugfs_sta.c b/net/mac80211/debugfs_sta.c
index 44e201d60a13..19c54a44ed47 100644
--- a/net/mac80211/debugfs_sta.c
+++ b/net/mac80211/debugfs_sta.c
@@ -455,6 +455,15 @@ void ieee80211_sta_debugfs_add(struct sta_info *sta)
DEBUGFS_ADD_COUNTER(tx_retry_count, tx_retry_count);
DEBUGFS_ADD_COUNTER(wep_weak_iv_count, wep_weak_iv_count);
+ if (sizeof(sta->driver_buffered_tids) == sizeof(u32))
+ debugfs_create_x32("driver_buffered_tids", 0400,
+ sta->debugfs.dir,
+ (u32 *)&sta->driver_buffered_tids);
+ else
+ debugfs_create_x64("driver_buffered_tids", 0400,
+ sta->debugfs.dir,
+ (u64 *)&sta->driver_buffered_tids);
+
drv_sta_add_debugfs(local, sdata, &sta->sta, sta->debugfs.dir);
}
diff --git a/net/mac80211/driver-ops.h b/net/mac80211/driver-ops.h
index b931c96a596f..b3ea11f3d526 100644
--- a/net/mac80211/driver-ops.h
+++ b/net/mac80211/driver-ops.h
@@ -1072,4 +1072,17 @@ static inline void drv_ipv6_addr_change(struct ieee80211_local *local,
}
#endif
+static inline void
+drv_channel_switch_beacon(struct ieee80211_sub_if_data *sdata,
+ struct cfg80211_chan_def *chandef)
+{
+ struct ieee80211_local *local = sdata->local;
+
+ if (local->ops->channel_switch_beacon) {
+ trace_drv_channel_switch_beacon(local, sdata, chandef);
+ local->ops->channel_switch_beacon(&local->hw, &sdata->vif,
+ chandef);
+ }
+}
+
#endif /* __MAC80211_DRIVER_OPS */
diff --git a/net/mac80211/ht.c b/net/mac80211/ht.c
index f83534f6a2ee..529bf58bc145 100644
--- a/net/mac80211/ht.c
+++ b/net/mac80211/ht.c
@@ -19,13 +19,14 @@
#include "ieee80211_i.h"
#include "rate.h"
-static void __check_htcap_disable(struct ieee80211_sub_if_data *sdata,
+static void __check_htcap_disable(struct ieee80211_ht_cap *ht_capa,
+ struct ieee80211_ht_cap *ht_capa_mask,
struct ieee80211_sta_ht_cap *ht_cap,
u16 flag)
{
__le16 le_flag = cpu_to_le16(flag);
- if (sdata->u.mgd.ht_capa_mask.cap_info & le_flag) {
- if (!(sdata->u.mgd.ht_capa.cap_info & le_flag))
+ if (ht_capa_mask->cap_info & le_flag) {
+ if (!(ht_capa->cap_info & le_flag))
ht_cap->cap &= ~flag;
}
}
@@ -33,13 +34,30 @@ static void __check_htcap_disable(struct ieee80211_sub_if_data *sdata,
void ieee80211_apply_htcap_overrides(struct ieee80211_sub_if_data *sdata,
struct ieee80211_sta_ht_cap *ht_cap)
{
- u8 *scaps = (u8 *)(&sdata->u.mgd.ht_capa.mcs.rx_mask);
- u8 *smask = (u8 *)(&sdata->u.mgd.ht_capa_mask.mcs.rx_mask);
+ struct ieee80211_ht_cap *ht_capa, *ht_capa_mask;
+ u8 *scaps, *smask;
int i;
if (!ht_cap->ht_supported)
return;
+ switch (sdata->vif.type) {
+ case NL80211_IFTYPE_STATION:
+ ht_capa = &sdata->u.mgd.ht_capa;
+ ht_capa_mask = &sdata->u.mgd.ht_capa_mask;
+ break;
+ case NL80211_IFTYPE_ADHOC:
+ ht_capa = &sdata->u.ibss.ht_capa;
+ ht_capa_mask = &sdata->u.ibss.ht_capa_mask;
+ break;
+ default:
+ WARN_ON_ONCE(1);
+ return;
+ }
+
+ scaps = (u8 *)(&ht_capa->mcs.rx_mask);
+ smask = (u8 *)(&ht_capa_mask->mcs.rx_mask);
+
/* NOTE: If you add more over-rides here, update register_hw
* ht_capa_mod_msk logic in main.c as well.
* And, if this method can ever change ht_cap.ht_supported, fix
@@ -55,28 +73,32 @@ void ieee80211_apply_htcap_overrides(struct ieee80211_sub_if_data *sdata,
}
/* Force removal of HT-40 capabilities? */
- __check_htcap_disable(sdata, ht_cap, IEEE80211_HT_CAP_SUP_WIDTH_20_40);
- __check_htcap_disable(sdata, ht_cap, IEEE80211_HT_CAP_SGI_40);
+ __check_htcap_disable(ht_capa, ht_capa_mask, ht_cap,
+ IEEE80211_HT_CAP_SUP_WIDTH_20_40);
+ __check_htcap_disable(ht_capa, ht_capa_mask, ht_cap,
+ IEEE80211_HT_CAP_SGI_40);
/* Allow user to disable SGI-20 (SGI-40 is handled above) */
- __check_htcap_disable(sdata, ht_cap, IEEE80211_HT_CAP_SGI_20);
+ __check_htcap_disable(ht_capa, ht_capa_mask, ht_cap,
+ IEEE80211_HT_CAP_SGI_20);
/* Allow user to disable the max-AMSDU bit. */
- __check_htcap_disable(sdata, ht_cap, IEEE80211_HT_CAP_MAX_AMSDU);
+ __check_htcap_disable(ht_capa, ht_capa_mask, ht_cap,
+ IEEE80211_HT_CAP_MAX_AMSDU);
/* Allow user to decrease AMPDU factor */
- if (sdata->u.mgd.ht_capa_mask.ampdu_params_info &
+ if (ht_capa_mask->ampdu_params_info &
IEEE80211_HT_AMPDU_PARM_FACTOR) {
- u8 n = sdata->u.mgd.ht_capa.ampdu_params_info
- & IEEE80211_HT_AMPDU_PARM_FACTOR;
+ u8 n = ht_capa->ampdu_params_info &
+ IEEE80211_HT_AMPDU_PARM_FACTOR;
if (n < ht_cap->ampdu_factor)
ht_cap->ampdu_factor = n;
}
/* Allow the user to increase AMPDU density. */
- if (sdata->u.mgd.ht_capa_mask.ampdu_params_info &
+ if (ht_capa_mask->ampdu_params_info &
IEEE80211_HT_AMPDU_PARM_DENSITY) {
- u8 n = (sdata->u.mgd.ht_capa.ampdu_params_info &
+ u8 n = (ht_capa->ampdu_params_info &
IEEE80211_HT_AMPDU_PARM_DENSITY)
>> IEEE80211_HT_AMPDU_PARM_DENSITY_SHIFT;
if (n > ht_cap->ampdu_density)
@@ -112,7 +134,8 @@ bool ieee80211_ht_cap_ie_to_sta_ht_cap(struct ieee80211_sub_if_data *sdata,
* we advertised a restricted capability set to. Override
* our own capabilities and then use those below.
*/
- if (sdata->vif.type == NL80211_IFTYPE_STATION &&
+ if ((sdata->vif.type == NL80211_IFTYPE_STATION ||
+ sdata->vif.type == NL80211_IFTYPE_ADHOC) &&
!test_sta_flag(sta, WLAN_STA_TDLS_PEER))
ieee80211_apply_htcap_overrides(sdata, &own_cap);
diff --git a/net/mac80211/ibss.c b/net/mac80211/ibss.c
index 2d45643c964e..a12afe77bb26 100644
--- a/net/mac80211/ibss.c
+++ b/net/mac80211/ibss.c
@@ -30,78 +30,27 @@
#define IEEE80211_IBSS_MERGE_INTERVAL (30 * HZ)
#define IEEE80211_IBSS_INACTIVITY_LIMIT (60 * HZ)
+#define IEEE80211_IBSS_RSN_INACTIVITY_LIMIT (10 * HZ)
#define IEEE80211_IBSS_MAX_STA_ENTRIES 128
-
-static void __ieee80211_sta_join_ibss(struct ieee80211_sub_if_data *sdata,
- const u8 *bssid, const int beacon_int,
- struct cfg80211_chan_def *req_chandef,
- const u32 basic_rates,
- const u16 capability, u64 tsf,
- bool creator)
+static struct beacon_data *
+ieee80211_ibss_build_presp(struct ieee80211_sub_if_data *sdata,
+ const int beacon_int, const u32 basic_rates,
+ const u16 capability, u64 tsf,
+ struct cfg80211_chan_def *chandef,
+ bool *have_higher_than_11mbit)
{
struct ieee80211_if_ibss *ifibss = &sdata->u.ibss;
struct ieee80211_local *local = sdata->local;
- int rates, i;
+ int rates_n = 0, i, ri;
struct ieee80211_mgmt *mgmt;
u8 *pos;
struct ieee80211_supported_band *sband;
- struct cfg80211_bss *bss;
- u32 bss_change;
- u8 supp_rates[IEEE80211_MAX_SUPP_RATES];
- struct cfg80211_chan_def chandef;
- struct ieee80211_channel *chan;
+ u32 rate_flags, rates = 0, rates_added = 0;
struct beacon_data *presp;
int frame_len;
-
- sdata_assert_lock(sdata);
-
- /* Reset own TSF to allow time synchronization work. */
- drv_reset_tsf(local, sdata);
-
- if (!ether_addr_equal(ifibss->bssid, bssid))
- sta_info_flush(sdata);
-
- /* if merging, indicate to driver that we leave the old IBSS */
- if (sdata->vif.bss_conf.ibss_joined) {
- sdata->vif.bss_conf.ibss_joined = false;
- sdata->vif.bss_conf.ibss_creator = false;
- sdata->vif.bss_conf.enable_beacon = false;
- netif_carrier_off(sdata->dev);
- ieee80211_bss_info_change_notify(sdata,
- BSS_CHANGED_IBSS |
- BSS_CHANGED_BEACON_ENABLED);
- }
-
- presp = rcu_dereference_protected(ifibss->presp,
- lockdep_is_held(&sdata->wdev.mtx));
- rcu_assign_pointer(ifibss->presp, NULL);
- if (presp)
- kfree_rcu(presp, rcu_head);
-
- sdata->drop_unencrypted = capability & WLAN_CAPABILITY_PRIVACY ? 1 : 0;
-
- /* make a copy of the chandef, it could be modified below. */
- chandef = *req_chandef;
- chan = chandef.chan;
- if (!cfg80211_reg_can_beacon(local->hw.wiphy, &chandef)) {
- chandef.width = NL80211_CHAN_WIDTH_20;
- chandef.center_freq1 = chan->center_freq;
- }
-
- ieee80211_vif_release_channel(sdata);
- if (ieee80211_vif_use_channel(sdata, &chandef,
- ifibss->fixed_channel ?
- IEEE80211_CHANCTX_SHARED :
- IEEE80211_CHANCTX_EXCLUSIVE)) {
- sdata_info(sdata, "Failed to join IBSS, no channel context\n");
- return;
- }
-
- memcpy(ifibss->bssid, bssid, ETH_ALEN);
-
- sband = local->hw.wiphy->bands[chan->band];
+ int shift;
/* Build IBSS probe response */
frame_len = sizeof(struct ieee80211_hdr_3addr) +
@@ -116,7 +65,7 @@ static void __ieee80211_sta_join_ibss(struct ieee80211_sub_if_data *sdata,
ifibss->ie_len;
presp = kzalloc(sizeof(*presp) + frame_len, GFP_KERNEL);
if (!presp)
- return;
+ return NULL;
presp->head = (void *)(presp + 1);
@@ -137,21 +86,47 @@ static void __ieee80211_sta_join_ibss(struct ieee80211_sub_if_data *sdata,
memcpy(pos, ifibss->ssid, ifibss->ssid_len);
pos += ifibss->ssid_len;
- rates = min_t(int, 8, sband->n_bitrates);
+ sband = local->hw.wiphy->bands[chandef->chan->band];
+ rate_flags = ieee80211_chandef_rate_flags(chandef);
+ shift = ieee80211_chandef_get_shift(chandef);
+ rates_n = 0;
+ if (have_higher_than_11mbit)
+ *have_higher_than_11mbit = false;
+
+ for (i = 0; i < sband->n_bitrates; i++) {
+ if ((rate_flags & sband->bitrates[i].flags) != rate_flags)
+ continue;
+ if (sband->bitrates[i].bitrate > 110 &&
+ have_higher_than_11mbit)
+ *have_higher_than_11mbit = true;
+
+ rates |= BIT(i);
+ rates_n++;
+ }
+
*pos++ = WLAN_EID_SUPP_RATES;
- *pos++ = rates;
- for (i = 0; i < rates; i++) {
- int rate = sband->bitrates[i].bitrate;
+ *pos++ = min_t(int, 8, rates_n);
+ for (ri = 0; ri < sband->n_bitrates; ri++) {
+ int rate = DIV_ROUND_UP(sband->bitrates[ri].bitrate,
+ 5 * (1 << shift));
u8 basic = 0;
- if (basic_rates & BIT(i))
+ if (!(rates & BIT(ri)))
+ continue;
+
+ if (basic_rates & BIT(ri))
basic = 0x80;
- *pos++ = basic | (u8) (rate / 5);
+ *pos++ = basic | (u8) rate;
+ if (++rates_added == 8) {
+ ri++; /* continue at next rate for EXT_SUPP_RATES */
+ break;
+ }
}
if (sband->band == IEEE80211_BAND_2GHZ) {
*pos++ = WLAN_EID_DS_PARAMS;
*pos++ = 1;
- *pos++ = ieee80211_frequency_to_channel(chan->center_freq);
+ *pos++ = ieee80211_frequency_to_channel(
+ chandef->chan->center_freq);
}
*pos++ = WLAN_EID_IBSS_PARAMS;
@@ -160,15 +135,20 @@ static void __ieee80211_sta_join_ibss(struct ieee80211_sub_if_data *sdata,
*pos++ = 0;
*pos++ = 0;
- if (sband->n_bitrates > 8) {
+ /* put the remaining rates in WLAN_EID_EXT_SUPP_RATES */
+ if (rates_n > 8) {
*pos++ = WLAN_EID_EXT_SUPP_RATES;
- *pos++ = sband->n_bitrates - 8;
- for (i = 8; i < sband->n_bitrates; i++) {
- int rate = sband->bitrates[i].bitrate;
+ *pos++ = rates_n - 8;
+ for (; ri < sband->n_bitrates; ri++) {
+ int rate = DIV_ROUND_UP(sband->bitrates[ri].bitrate,
+ 5 * (1 << shift));
u8 basic = 0;
- if (basic_rates & BIT(i))
+ if (!(rates & BIT(ri)))
+ continue;
+
+ if (basic_rates & BIT(ri))
basic = 0x80;
- *pos++ = basic | (u8) (rate / 5);
+ *pos++ = basic | (u8) rate;
}
}
@@ -178,19 +158,23 @@ static void __ieee80211_sta_join_ibss(struct ieee80211_sub_if_data *sdata,
}
/* add HT capability and information IEs */
- if (chandef.width != NL80211_CHAN_WIDTH_20_NOHT &&
- chandef.width != NL80211_CHAN_WIDTH_5 &&
- chandef.width != NL80211_CHAN_WIDTH_10 &&
+ if (chandef->width != NL80211_CHAN_WIDTH_20_NOHT &&
+ chandef->width != NL80211_CHAN_WIDTH_5 &&
+ chandef->width != NL80211_CHAN_WIDTH_10 &&
sband->ht_cap.ht_supported) {
- pos = ieee80211_ie_build_ht_cap(pos, &sband->ht_cap,
- sband->ht_cap.cap);
+ struct ieee80211_sta_ht_cap ht_cap;
+
+ memcpy(&ht_cap, &sband->ht_cap, sizeof(ht_cap));
+ ieee80211_apply_htcap_overrides(sdata, &ht_cap);
+
+ pos = ieee80211_ie_build_ht_cap(pos, &ht_cap, ht_cap.cap);
/*
* Note: According to 802.11n-2009 9.13.3.1, HT Protection
* field and RIFS Mode are reserved in IBSS mode, therefore
* keep them at 0
*/
pos = ieee80211_ie_build_ht_oper(pos, &sband->ht_cap,
- &chandef, 0);
+ chandef, 0);
}
if (local->hw.queues >= IEEE80211_NUM_ACS) {
@@ -207,9 +191,97 @@ static void __ieee80211_sta_join_ibss(struct ieee80211_sub_if_data *sdata,
presp->head_len = pos - presp->head;
if (WARN_ON(presp->head_len > frame_len))
+ goto error;
+
+ return presp;
+error:
+ kfree(presp);
+ return NULL;
+}
+
+static void __ieee80211_sta_join_ibss(struct ieee80211_sub_if_data *sdata,
+ const u8 *bssid, const int beacon_int,
+ struct cfg80211_chan_def *req_chandef,
+ const u32 basic_rates,
+ const u16 capability, u64 tsf,
+ bool creator)
+{
+ struct ieee80211_if_ibss *ifibss = &sdata->u.ibss;
+ struct ieee80211_local *local = sdata->local;
+ struct ieee80211_supported_band *sband;
+ struct ieee80211_mgmt *mgmt;
+ struct cfg80211_bss *bss;
+ u32 bss_change;
+ struct cfg80211_chan_def chandef;
+ struct ieee80211_channel *chan;
+ struct beacon_data *presp;
+ enum nl80211_bss_scan_width scan_width;
+ bool have_higher_than_11mbit;
+
+ sdata_assert_lock(sdata);
+
+ /* Reset own TSF to allow time synchronization work. */
+ drv_reset_tsf(local, sdata);
+
+ if (!ether_addr_equal(ifibss->bssid, bssid))
+ sta_info_flush(sdata);
+
+ /* if merging, indicate to driver that we leave the old IBSS */
+ if (sdata->vif.bss_conf.ibss_joined) {
+ sdata->vif.bss_conf.ibss_joined = false;
+ sdata->vif.bss_conf.ibss_creator = false;
+ sdata->vif.bss_conf.enable_beacon = false;
+ netif_carrier_off(sdata->dev);
+ ieee80211_bss_info_change_notify(sdata,
+ BSS_CHANGED_IBSS |
+ BSS_CHANGED_BEACON_ENABLED);
+ }
+
+ presp = rcu_dereference_protected(ifibss->presp,
+ lockdep_is_held(&sdata->wdev.mtx));
+ rcu_assign_pointer(ifibss->presp, NULL);
+ if (presp)
+ kfree_rcu(presp, rcu_head);
+
+ sdata->drop_unencrypted = capability & WLAN_CAPABILITY_PRIVACY ? 1 : 0;
+
+ /* make a copy of the chandef, it could be modified below. */
+ chandef = *req_chandef;
+ chan = chandef.chan;
+ if (!cfg80211_reg_can_beacon(local->hw.wiphy, &chandef)) {
+ if (chandef.width == NL80211_CHAN_WIDTH_5 ||
+ chandef.width == NL80211_CHAN_WIDTH_10 ||
+ chandef.width == NL80211_CHAN_WIDTH_20_NOHT ||
+ chandef.width == NL80211_CHAN_WIDTH_20) {
+ sdata_info(sdata,
+ "Failed to join IBSS, beacons forbidden\n");
+ return;
+ }
+ chandef.width = NL80211_CHAN_WIDTH_20;
+ chandef.center_freq1 = chan->center_freq;
+ }
+
+ ieee80211_vif_release_channel(sdata);
+ if (ieee80211_vif_use_channel(sdata, &chandef,
+ ifibss->fixed_channel ?
+ IEEE80211_CHANCTX_SHARED :
+ IEEE80211_CHANCTX_EXCLUSIVE)) {
+ sdata_info(sdata, "Failed to join IBSS, no channel context\n");
+ return;
+ }
+
+ memcpy(ifibss->bssid, bssid, ETH_ALEN);
+
+ sband = local->hw.wiphy->bands[chan->band];
+
+ presp = ieee80211_ibss_build_presp(sdata, beacon_int, basic_rates,
+ capability, tsf, &chandef,
+ &have_higher_than_11mbit);
+ if (!presp)
return;
rcu_assign_pointer(ifibss->presp, presp);
+ mgmt = (void *)presp->head;
sdata->vif.bss_conf.enable_beacon = true;
sdata->vif.bss_conf.beacon_int = beacon_int;
@@ -239,18 +311,26 @@ static void __ieee80211_sta_join_ibss(struct ieee80211_sub_if_data *sdata,
sdata->vif.bss_conf.use_short_slot = chan->band == IEEE80211_BAND_5GHZ;
bss_change |= BSS_CHANGED_ERP_SLOT;
+ /* cf. IEEE 802.11 9.2.12 */
+ if (chan->band == IEEE80211_BAND_2GHZ && have_higher_than_11mbit)
+ sdata->flags |= IEEE80211_SDATA_OPERATING_GMODE;
+ else
+ sdata->flags &= ~IEEE80211_SDATA_OPERATING_GMODE;
+
sdata->vif.bss_conf.ibss_joined = true;
sdata->vif.bss_conf.ibss_creator = creator;
ieee80211_bss_info_change_notify(sdata, bss_change);
- ieee80211_sta_def_wmm_params(sdata, sband->n_bitrates, supp_rates);
+ ieee80211_set_wmm_default(sdata, true);
ifibss->state = IEEE80211_IBSS_MLME_JOINED;
mod_timer(&ifibss->timer,
round_jiffies(jiffies + IEEE80211_IBSS_MERGE_INTERVAL));
- bss = cfg80211_inform_bss_frame(local->hw.wiphy, chan,
- mgmt, presp->head_len, 0, GFP_KERNEL);
+ scan_width = cfg80211_chandef_to_scan_width(&chandef);
+ bss = cfg80211_inform_bss_width_frame(local->hw.wiphy, chan,
+ scan_width, mgmt,
+ presp->head_len, 0, GFP_KERNEL);
cfg80211_put_bss(local->hw.wiphy, bss);
netif_carrier_on(sdata->dev);
cfg80211_ibss_joined(sdata->dev, ifibss->bssid, GFP_KERNEL);
@@ -269,6 +349,8 @@ static void ieee80211_sta_join_ibss(struct ieee80211_sub_if_data *sdata,
const struct cfg80211_bss_ies *ies;
enum nl80211_channel_type chan_type;
u64 tsf;
+ u32 rate_flags;
+ int shift;
sdata_assert_lock(sdata);
@@ -296,15 +378,24 @@ static void ieee80211_sta_join_ibss(struct ieee80211_sub_if_data *sdata,
}
sband = sdata->local->hw.wiphy->bands[cbss->channel->band];
+ rate_flags = ieee80211_chandef_rate_flags(&sdata->u.ibss.chandef);
+ shift = ieee80211_vif_get_shift(&sdata->vif);
basic_rates = 0;
for (i = 0; i < bss->supp_rates_len; i++) {
- int rate = (bss->supp_rates[i] & 0x7f) * 5;
+ int rate = bss->supp_rates[i] & 0x7f;
bool is_basic = !!(bss->supp_rates[i] & 0x80);
for (j = 0; j < sband->n_bitrates; j++) {
- if (sband->bitrates[j].bitrate == rate) {
+ int brate;
+ if ((rate_flags & sband->bitrates[j].flags)
+ != rate_flags)
+ continue;
+
+ brate = DIV_ROUND_UP(sband->bitrates[j].bitrate,
+ 5 * (1 << shift));
+ if (brate == rate) {
if (is_basic)
basic_rates |= BIT(j);
break;
@@ -360,6 +451,7 @@ ieee80211_ibss_add_sta(struct ieee80211_sub_if_data *sdata, const u8 *bssid,
struct sta_info *sta;
struct ieee80211_chanctx_conf *chanctx_conf;
struct ieee80211_supported_band *sband;
+ enum nl80211_bss_scan_width scan_width;
int band;
/*
@@ -388,6 +480,7 @@ ieee80211_ibss_add_sta(struct ieee80211_sub_if_data *sdata, const u8 *bssid,
if (WARN_ON_ONCE(!chanctx_conf))
return NULL;
band = chanctx_conf->def.chan->band;
+ scan_width = cfg80211_chandef_to_scan_width(&chanctx_conf->def);
rcu_read_unlock();
sta = sta_info_alloc(sdata, addr, GFP_KERNEL);
@@ -401,7 +494,7 @@ ieee80211_ibss_add_sta(struct ieee80211_sub_if_data *sdata, const u8 *bssid,
/* make sure mandatory rates are always added */
sband = local->hw.wiphy->bands[band];
sta->sta.supp_rates[band] = supp_rates |
- ieee80211_mandatory_rates(sband);
+ ieee80211_mandatory_rates(sband, scan_width);
return ieee80211_ibss_finish_sta(sta);
}
@@ -465,6 +558,7 @@ static void ieee80211_rx_bss_info(struct ieee80211_sub_if_data *sdata,
u64 beacon_timestamp, rx_timestamp;
u32 supp_rates = 0;
enum ieee80211_band band = rx_status->band;
+ enum nl80211_bss_scan_width scan_width;
struct ieee80211_supported_band *sband = local->hw.wiphy->bands[band];
bool rates_updated = false;
@@ -486,16 +580,22 @@ static void ieee80211_rx_bss_info(struct ieee80211_sub_if_data *sdata,
sta = sta_info_get(sdata, mgmt->sa);
if (elems->supp_rates) {
- supp_rates = ieee80211_sta_get_rates(local, elems,
+ supp_rates = ieee80211_sta_get_rates(sdata, elems,
band, NULL);
if (sta) {
u32 prev_rates;
prev_rates = sta->sta.supp_rates[band];
/* make sure mandatory rates are always added */
- sta->sta.supp_rates[band] = supp_rates |
- ieee80211_mandatory_rates(sband);
+ scan_width = NL80211_BSS_CHAN_WIDTH_20;
+ if (rx_status->flag & RX_FLAG_5MHZ)
+ scan_width = NL80211_BSS_CHAN_WIDTH_5;
+ if (rx_status->flag & RX_FLAG_10MHZ)
+ scan_width = NL80211_BSS_CHAN_WIDTH_10;
+ sta->sta.supp_rates[band] = supp_rates |
+ ieee80211_mandatory_rates(sband,
+ scan_width);
if (sta->sta.supp_rates[band] != prev_rates) {
ibss_dbg(sdata,
"updated supp_rates set for %pM based on beacon/probe_resp (0x%x -> 0x%x)\n",
@@ -610,7 +710,7 @@ static void ieee80211_rx_bss_info(struct ieee80211_sub_if_data *sdata,
"beacon TSF higher than local TSF - IBSS merge with BSSID %pM\n",
mgmt->bssid);
ieee80211_sta_join_ibss(sdata, bss);
- supp_rates = ieee80211_sta_get_rates(local, elems, band, NULL);
+ supp_rates = ieee80211_sta_get_rates(sdata, elems, band, NULL);
ieee80211_ibss_add_sta(sdata, mgmt->bssid, mgmt->sa,
supp_rates);
rcu_read_unlock();
@@ -629,6 +729,7 @@ void ieee80211_ibss_rx_no_sta(struct ieee80211_sub_if_data *sdata,
struct sta_info *sta;
struct ieee80211_chanctx_conf *chanctx_conf;
struct ieee80211_supported_band *sband;
+ enum nl80211_bss_scan_width scan_width;
int band;
/*
@@ -654,6 +755,7 @@ void ieee80211_ibss_rx_no_sta(struct ieee80211_sub_if_data *sdata,
return;
}
band = chanctx_conf->def.chan->band;
+ scan_width = cfg80211_chandef_to_scan_width(&chanctx_conf->def);
rcu_read_unlock();
sta = sta_info_alloc(sdata, addr, GFP_ATOMIC);
@@ -665,7 +767,7 @@ void ieee80211_ibss_rx_no_sta(struct ieee80211_sub_if_data *sdata,
/* make sure mandatory rates are always added */
sband = local->hw.wiphy->bands[band];
sta->sta.supp_rates[band] = supp_rates |
- ieee80211_mandatory_rates(sband);
+ ieee80211_mandatory_rates(sband, scan_width);
spin_lock(&ifibss->incomplete_lock);
list_add(&sta->list, &ifibss->incomplete_stations);
@@ -697,6 +799,33 @@ static int ieee80211_sta_active_ibss(struct ieee80211_sub_if_data *sdata)
return active;
}
+static void ieee80211_ibss_sta_expire(struct ieee80211_sub_if_data *sdata)
+{
+ struct ieee80211_local *local = sdata->local;
+ struct sta_info *sta, *tmp;
+ unsigned long exp_time = IEEE80211_IBSS_INACTIVITY_LIMIT;
+ unsigned long exp_rsn_time = IEEE80211_IBSS_RSN_INACTIVITY_LIMIT;
+
+ mutex_lock(&local->sta_mtx);
+
+ list_for_each_entry_safe(sta, tmp, &local->sta_list, list) {
+ if (sdata != sta->sdata)
+ continue;
+
+ if (time_after(jiffies, sta->last_rx + exp_time) ||
+ (time_after(jiffies, sta->last_rx + exp_rsn_time) &&
+ sta->sta_state != IEEE80211_STA_AUTHORIZED)) {
+ sta_dbg(sta->sdata, "expiring inactive %sSTA %pM\n",
+ sta->sta_state != IEEE80211_STA_AUTHORIZED ?
+ "not authorized " : "", sta->sta.addr);
+
+ WARN_ON(__sta_info_destroy(sta));
+ }
+ }
+
+ mutex_unlock(&local->sta_mtx);
+}
+
/*
* This function is called with state == IEEE80211_IBSS_MLME_JOINED
*/
@@ -704,13 +833,14 @@ static int ieee80211_sta_active_ibss(struct ieee80211_sub_if_data *sdata)
static void ieee80211_sta_merge_ibss(struct ieee80211_sub_if_data *sdata)
{
struct ieee80211_if_ibss *ifibss = &sdata->u.ibss;
+ enum nl80211_bss_scan_width scan_width;
sdata_assert_lock(sdata);
mod_timer(&ifibss->timer,
round_jiffies(jiffies + IEEE80211_IBSS_MERGE_INTERVAL));
- ieee80211_sta_expire(sdata, IEEE80211_IBSS_INACTIVITY_LIMIT);
+ ieee80211_ibss_sta_expire(sdata);
if (time_before(jiffies, ifibss->last_scan_completed +
IEEE80211_IBSS_MERGE_INTERVAL))
@@ -725,8 +855,9 @@ static void ieee80211_sta_merge_ibss(struct ieee80211_sub_if_data *sdata)
sdata_info(sdata,
"No active IBSS STAs - trying to scan for other IBSS networks with same SSID (merge)\n");
+ scan_width = cfg80211_chandef_to_scan_width(&ifibss->chandef);
ieee80211_request_ibss_scan(sdata, ifibss->ssid, ifibss->ssid_len,
- NULL);
+ NULL, scan_width);
}
static void ieee80211_sta_create_ibss(struct ieee80211_sub_if_data *sdata)
@@ -776,6 +907,7 @@ static void ieee80211_sta_find_ibss(struct ieee80211_sub_if_data *sdata)
struct cfg80211_bss *cbss;
struct ieee80211_channel *chan = NULL;
const u8 *bssid = NULL;
+ enum nl80211_bss_scan_width scan_width;
int active_ibss;
u16 capability;
@@ -817,6 +949,17 @@ static void ieee80211_sta_find_ibss(struct ieee80211_sub_if_data *sdata)
return;
}
+ /* if a fixed bssid and a fixed freq have been provided create the IBSS
+ * directly and do not waste time scanning
+ */
+ if (ifibss->fixed_bssid && ifibss->fixed_channel) {
+ sdata_info(sdata, "Created IBSS using preconfigured BSSID %pM\n",
+ bssid);
+ ieee80211_sta_create_ibss(sdata);
+ return;
+ }
+
+
ibss_dbg(sdata, "sta_find_ibss: did not try to join ibss\n");
/* Selected IBSS not found in current scan results - try to scan */
@@ -824,8 +967,10 @@ static void ieee80211_sta_find_ibss(struct ieee80211_sub_if_data *sdata)
IEEE80211_SCAN_INTERVAL)) {
sdata_info(sdata, "Trigger new scan to find an IBSS to join\n");
+ scan_width = cfg80211_chandef_to_scan_width(&ifibss->chandef);
ieee80211_request_ibss_scan(sdata, ifibss->ssid,
- ifibss->ssid_len, chan);
+ ifibss->ssid_len, chan,
+ scan_width);
} else {
int interval = IEEE80211_SCAN_INTERVAL;
@@ -1045,6 +1190,9 @@ int ieee80211_ibss_join(struct ieee80211_sub_if_data *sdata,
struct cfg80211_ibss_params *params)
{
u32 changed = 0;
+ u32 rate_flags;
+ struct ieee80211_supported_band *sband;
+ int i;
if (params->bssid) {
memcpy(sdata->u.ibss.bssid, params->bssid, ETH_ALEN);
@@ -1055,6 +1203,14 @@ int ieee80211_ibss_join(struct ieee80211_sub_if_data *sdata,
sdata->u.ibss.privacy = params->privacy;
sdata->u.ibss.control_port = params->control_port;
sdata->u.ibss.basic_rates = params->basic_rates;
+
+ /* fix basic_rates if channel does not support these rates */
+ rate_flags = ieee80211_chandef_rate_flags(&params->chandef);
+ sband = sdata->local->hw.wiphy->bands[params->chandef.chan->band];
+ for (i = 0; i < sband->n_bitrates; i++) {
+ if ((rate_flags & sband->bitrates[i].flags) != rate_flags)
+ sdata->u.ibss.basic_rates &= ~BIT(i);
+ }
memcpy(sdata->vif.bss_conf.mcast_rate, params->mcast_rate,
sizeof(params->mcast_rate));
@@ -1076,6 +1232,11 @@ int ieee80211_ibss_join(struct ieee80211_sub_if_data *sdata,
memcpy(sdata->u.ibss.ssid, params->ssid, params->ssid_len);
sdata->u.ibss.ssid_len = params->ssid_len;
+ memcpy(&sdata->u.ibss.ht_capa, &params->ht_capa,
+ sizeof(sdata->u.ibss.ht_capa));
+ memcpy(&sdata->u.ibss.ht_capa_mask, &params->ht_capa_mask,
+ sizeof(sdata->u.ibss.ht_capa_mask));
+
/*
* 802.11n-2009 9.13.3.1: In an IBSS, the HT Protection field is
* reserved, but an HT STA shall protect HT transmissions as though
@@ -1156,6 +1317,11 @@ int ieee80211_ibss_leave(struct ieee80211_sub_if_data *sdata)
presp = rcu_dereference_protected(ifibss->presp,
lockdep_is_held(&sdata->wdev.mtx));
RCU_INIT_POINTER(sdata->u.ibss.presp, NULL);
+
+ /* on the next join, re-program HT parameters */
+ memset(&ifibss->ht_capa, 0, sizeof(ifibss->ht_capa));
+ memset(&ifibss->ht_capa_mask, 0, sizeof(ifibss->ht_capa_mask));
+
sdata->vif.bss_conf.ibss_joined = false;
sdata->vif.bss_conf.ibss_creator = false;
sdata->vif.bss_conf.enable_beacon = false;
diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h
index 8412a303993a..611abfcfb5eb 100644
--- a/net/mac80211/ieee80211_i.h
+++ b/net/mac80211/ieee80211_i.h
@@ -53,9 +53,6 @@ struct ieee80211_local;
* increased memory use (about 2 kB of RAM per entry). */
#define IEEE80211_FRAGMENT_MAX 4
-#define TU_TO_JIFFIES(x) (usecs_to_jiffies((x) * 1024))
-#define TU_TO_EXP_TIME(x) (jiffies + TU_TO_JIFFIES(x))
-
/* power level hasn't been configured (or set to automatic) */
#define IEEE80211_UNSET_POWER_LEVEL INT_MIN
@@ -259,6 +256,8 @@ struct ieee80211_if_ap {
struct beacon_data __rcu *beacon;
struct probe_resp __rcu *probe_resp;
+ /* to be used after channel switch. */
+ struct cfg80211_beacon_data *next_beacon;
struct list_head vlans;
struct ps_data ps;
@@ -509,6 +508,9 @@ struct ieee80211_if_ibss {
/* probe response/beacon for IBSS */
struct beacon_data __rcu *presp;
+ struct ieee80211_ht_cap ht_capa; /* configured ht-cap over-rides */
+ struct ieee80211_ht_cap ht_capa_mask; /* Valid parts of ht_capa */
+
spinlock_t incomplete_lock;
struct list_head incomplete_stations;
@@ -713,6 +715,11 @@ struct ieee80211_sub_if_data {
struct ieee80211_tx_queue_params tx_conf[IEEE80211_NUM_ACS];
+ struct work_struct csa_finalize_work;
+ int csa_counter_offset_beacon;
+ int csa_counter_offset_presp;
+ bool csa_radar_required;
+
/* used to reconfigure hardware SM PS */
struct work_struct recalc_smps;
@@ -809,6 +816,34 @@ ieee80211_get_sdata_band(struct ieee80211_sub_if_data *sdata)
return band;
}
+static inline int
+ieee80211_chandef_get_shift(struct cfg80211_chan_def *chandef)
+{
+ switch (chandef->width) {
+ case NL80211_CHAN_WIDTH_5:
+ return 2;
+ case NL80211_CHAN_WIDTH_10:
+ return 1;
+ default:
+ return 0;
+ }
+}
+
+static inline int
+ieee80211_vif_get_shift(struct ieee80211_vif *vif)
+{
+ struct ieee80211_chanctx_conf *chanctx_conf;
+ int shift = 0;
+
+ rcu_read_lock();
+ chanctx_conf = rcu_dereference(vif->chanctx_conf);
+ if (chanctx_conf)
+ shift = ieee80211_chandef_get_shift(&chanctx_conf->def);
+ rcu_read_unlock();
+
+ return shift;
+}
+
enum sdata_queue_type {
IEEE80211_SDATA_QUEUE_TYPE_FRAME = 0,
IEEE80211_SDATA_QUEUE_AGG_START = 1,
@@ -858,6 +893,8 @@ struct tpt_led_trigger {
* that the scan completed.
* @SCAN_ABORTED: Set for our scan work function when the driver reported
* a scan complete for an aborted scan.
+ * @SCAN_HW_CANCELLED: Set for our scan work function when the scan is being
+ * cancelled.
*/
enum {
SCAN_SW_SCANNING,
@@ -865,6 +902,7 @@ enum {
SCAN_ONCHANNEL_SCANNING,
SCAN_COMPLETED,
SCAN_ABORTED,
+ SCAN_HW_CANCELLED,
};
/**
@@ -1026,7 +1064,7 @@ struct ieee80211_local {
struct cfg80211_ssid scan_ssid;
struct cfg80211_scan_request *int_scan_req;
struct cfg80211_scan_request *scan_req, *hw_scan_req;
- struct ieee80211_channel *scan_channel;
+ struct cfg80211_chan_def scan_chandef;
enum ieee80211_band hw_scan_band;
int scan_channel_idx;
int scan_ies_len;
@@ -1063,7 +1101,6 @@ struct ieee80211_local {
u32 dot11TransmittedFrameCount;
#ifdef CONFIG_MAC80211_LEDS
- int tx_led_counter, rx_led_counter;
struct led_trigger *tx_led, *rx_led, *assoc_led, *radio_led;
struct tpt_led_trigger *tpt_led_trigger;
char tx_led_name[32], rx_led_name[32],
@@ -1306,7 +1343,8 @@ void ieee80211_mesh_rx_queued_mgmt(struct ieee80211_sub_if_data *sdata,
void ieee80211_scan_work(struct work_struct *work);
int ieee80211_request_ibss_scan(struct ieee80211_sub_if_data *sdata,
const u8 *ssid, u8 ssid_len,
- struct ieee80211_channel *chan);
+ struct ieee80211_channel *chan,
+ enum nl80211_bss_scan_width scan_width);
int ieee80211_request_scan(struct ieee80211_sub_if_data *sdata,
struct cfg80211_scan_request *req);
void ieee80211_scan_cancel(struct ieee80211_local *local);
@@ -1341,6 +1379,9 @@ void ieee80211_roc_notify_destroy(struct ieee80211_roc_work *roc, bool free);
void ieee80211_sw_roc_work(struct work_struct *work);
void ieee80211_handle_roc_started(struct ieee80211_roc_work *roc);
+/* channel switch handling */
+void ieee80211_csa_finalize_work(struct work_struct *work);
+
/* interface handling */
int ieee80211_iface_init(void);
void ieee80211_iface_exit(void);
@@ -1362,6 +1403,8 @@ void ieee80211_del_virtual_monitor(struct ieee80211_local *local);
bool __ieee80211_recalc_txpower(struct ieee80211_sub_if_data *sdata);
void ieee80211_recalc_txpower(struct ieee80211_sub_if_data *sdata);
+int ieee80211_assign_beacon(struct ieee80211_sub_if_data *sdata,
+ struct cfg80211_beacon_data *params);
static inline bool ieee80211_sdata_running(struct ieee80211_sub_if_data *sdata)
{
@@ -1465,7 +1508,8 @@ extern void *mac80211_wiphy_privid; /* for wiphy privid */
u8 *ieee80211_get_bssid(struct ieee80211_hdr *hdr, size_t len,
enum nl80211_iftype type);
int ieee80211_frame_duration(enum ieee80211_band band, size_t len,
- int rate, int erp, int short_preamble);
+ int rate, int erp, int short_preamble,
+ int shift);
void mac80211_ev_michael_mic_failure(struct ieee80211_sub_if_data *sdata, int keyidx,
struct ieee80211_hdr *hdr, const u8 *tsc,
gfp_t gfp);
@@ -1569,7 +1613,7 @@ void ieee80211_send_deauth_disassoc(struct ieee80211_sub_if_data *sdata,
int ieee80211_build_preq_ies(struct ieee80211_local *local, u8 *buffer,
size_t buffer_len, const u8 *ie, size_t ie_len,
enum ieee80211_band band, u32 rate_mask,
- u8 channel);
+ struct cfg80211_chan_def *chandef);
struct sk_buff *ieee80211_build_probe_req(struct ieee80211_sub_if_data *sdata,
u8 *dst, u32 ratemask,
struct ieee80211_channel *chan,
@@ -1582,10 +1626,7 @@ void ieee80211_send_probe_req(struct ieee80211_sub_if_data *sdata, u8 *dst,
u32 ratemask, bool directed, u32 tx_flags,
struct ieee80211_channel *channel, bool scan);
-void ieee80211_sta_def_wmm_params(struct ieee80211_sub_if_data *sdata,
- const size_t supp_rates_len,
- const u8 *supp_rates);
-u32 ieee80211_sta_get_rates(struct ieee80211_local *local,
+u32 ieee80211_sta_get_rates(struct ieee80211_sub_if_data *sdata,
struct ieee802_11_elems *elems,
enum ieee80211_band band, u32 *basic_rates);
int __ieee80211_request_smps(struct ieee80211_sub_if_data *sdata,
@@ -1602,6 +1643,9 @@ u8 *ieee80211_ie_build_ht_oper(u8 *pos, struct ieee80211_sta_ht_cap *ht_cap,
u16 prot_mode);
u8 *ieee80211_ie_build_vht_cap(u8 *pos, struct ieee80211_sta_vht_cap *vht_cap,
u32 cap);
+int ieee80211_parse_bitrates(struct cfg80211_chan_def *chandef,
+ const struct ieee80211_supported_band *sband,
+ const u8 *srates, int srates_len, u32 *rates);
int ieee80211_add_srates_ie(struct ieee80211_sub_if_data *sdata,
struct sk_buff *skb, bool need_basic,
enum ieee80211_band band);
@@ -1622,6 +1666,11 @@ int __must_check
ieee80211_vif_change_bandwidth(struct ieee80211_sub_if_data *sdata,
const struct cfg80211_chan_def *chandef,
u32 *changed);
+/* NOTE: only use ieee80211_vif_change_channel() for channel switch */
+int __must_check
+ieee80211_vif_change_channel(struct ieee80211_sub_if_data *sdata,
+ const struct cfg80211_chan_def *chandef,
+ u32 *changed);
void ieee80211_vif_release_channel(struct ieee80211_sub_if_data *sdata);
void ieee80211_vif_vlan_copy_chanctx(struct ieee80211_sub_if_data *sdata);
void ieee80211_vif_copy_chanctx_to_vlans(struct ieee80211_sub_if_data *sdata,
diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c
index cc117591f678..fcecd633514e 100644
--- a/net/mac80211/iface.c
+++ b/net/mac80211/iface.c
@@ -54,7 +54,7 @@ bool __ieee80211_recalc_txpower(struct ieee80211_sub_if_data *sdata)
return false;
}
- power = chanctx_conf->def.chan->max_power;
+ power = ieee80211_chandef_max_power(&chanctx_conf->def);
rcu_read_unlock();
if (sdata->user_power_level != IEEE80211_UNSET_POWER_LEVEL)
@@ -274,6 +274,12 @@ static int ieee80211_check_concurrent_iface(struct ieee80211_sub_if_data *sdata,
if (iftype == NL80211_IFTYPE_ADHOC &&
nsdata->vif.type == NL80211_IFTYPE_ADHOC)
return -EBUSY;
+ /*
+ * will not add another interface while any channel
+ * switch is active.
+ */
+ if (nsdata->vif.csa_active)
+ return -EBUSY;
/*
* The remaining checks are only performed for interfaces
@@ -302,12 +308,13 @@ static int ieee80211_check_concurrent_iface(struct ieee80211_sub_if_data *sdata,
return 0;
}
-static int ieee80211_check_queues(struct ieee80211_sub_if_data *sdata)
+static int ieee80211_check_queues(struct ieee80211_sub_if_data *sdata,
+ enum nl80211_iftype iftype)
{
int n_queues = sdata->local->hw.queues;
int i;
- if (sdata->vif.type != NL80211_IFTYPE_P2P_DEVICE) {
+ if (iftype != NL80211_IFTYPE_P2P_DEVICE) {
for (i = 0; i < IEEE80211_NUM_ACS; i++) {
if (WARN_ON_ONCE(sdata->vif.hw_queue[i] ==
IEEE80211_INVAL_HW_QUEUE))
@@ -318,8 +325,9 @@ static int ieee80211_check_queues(struct ieee80211_sub_if_data *sdata)
}
}
- if ((sdata->vif.type != NL80211_IFTYPE_AP &&
- sdata->vif.type != NL80211_IFTYPE_MESH_POINT) ||
+ if ((iftype != NL80211_IFTYPE_AP &&
+ iftype != NL80211_IFTYPE_P2P_GO &&
+ iftype != NL80211_IFTYPE_MESH_POINT) ||
!(sdata->local->hw.flags & IEEE80211_HW_QUEUE_CONTROL)) {
sdata->vif.cab_queue = IEEE80211_INVAL_HW_QUEUE;
return 0;
@@ -402,7 +410,7 @@ int ieee80211_add_virtual_monitor(struct ieee80211_local *local)
return ret;
}
- ret = ieee80211_check_queues(sdata);
+ ret = ieee80211_check_queues(sdata, NL80211_IFTYPE_MONITOR);
if (ret) {
kfree(sdata);
return ret;
@@ -586,7 +594,8 @@ int ieee80211_do_open(struct wireless_dev *wdev, bool coming_up)
res = drv_add_interface(local, sdata);
if (res)
goto err_stop;
- res = ieee80211_check_queues(sdata);
+ res = ieee80211_check_queues(sdata,
+ ieee80211_vif_type_p2p(&sdata->vif));
if (res)
goto err_del_interface;
}
@@ -804,6 +813,8 @@ static void ieee80211_do_stop(struct ieee80211_sub_if_data *sdata,
cancel_work_sync(&local->dynamic_ps_enable_work);
cancel_work_sync(&sdata->recalc_smps);
+ sdata->vif.csa_active = false;
+ cancel_work_sync(&sdata->csa_finalize_work);
cancel_delayed_work_sync(&sdata->dfs_cac_timer_work);
@@ -1267,6 +1278,7 @@ static void ieee80211_setup_sdata(struct ieee80211_sub_if_data *sdata,
skb_queue_head_init(&sdata->skb_queue);
INIT_WORK(&sdata->work, ieee80211_iface_work);
INIT_WORK(&sdata->recalc_smps, ieee80211_recalc_smps_work);
+ INIT_WORK(&sdata->csa_finalize_work, ieee80211_csa_finalize_work);
switch (type) {
case NL80211_IFTYPE_P2P_GO:
@@ -1380,14 +1392,14 @@ static int ieee80211_runtime_change_iftype(struct ieee80211_sub_if_data *sdata,
ret = drv_change_interface(local, sdata, internal_type, p2p);
if (ret)
- type = sdata->vif.type;
+ type = ieee80211_vif_type_p2p(&sdata->vif);
/*
* Ignore return value here, there's not much we can do since
* the driver changed the interface type internally already.
* The warnings will hopefully make driver authors fix it :-)
*/
- ieee80211_check_queues(sdata);
+ ieee80211_check_queues(sdata, type);
ieee80211_setup_sdata(sdata, type);
diff --git a/net/mac80211/key.c b/net/mac80211/key.c
index e39cc91d0cf1..620677e897bd 100644
--- a/net/mac80211/key.c
+++ b/net/mac80211/key.c
@@ -93,6 +93,9 @@ static int ieee80211_key_enable_hw_accel(struct ieee80211_key *key)
might_sleep();
+ if (key->flags & KEY_FLAG_TAINTED)
+ return -EINVAL;
+
if (!key->local->ops->set_key)
goto out_unsupported;
@@ -455,6 +458,7 @@ int ieee80211_key_link(struct ieee80211_key *key,
struct ieee80211_sub_if_data *sdata,
struct sta_info *sta)
{
+ struct ieee80211_local *local = sdata->local;
struct ieee80211_key *old_key;
int idx, ret;
bool pairwise;
@@ -484,10 +488,13 @@ int ieee80211_key_link(struct ieee80211_key *key,
ieee80211_debugfs_key_add(key);
- ret = ieee80211_key_enable_hw_accel(key);
-
- if (ret)
- ieee80211_key_free(key, true);
+ if (!local->wowlan) {
+ ret = ieee80211_key_enable_hw_accel(key);
+ if (ret)
+ ieee80211_key_free(key, true);
+ } else {
+ ret = 0;
+ }
mutex_unlock(&sdata->local->key_mtx);
@@ -540,7 +547,7 @@ void ieee80211_iter_keys(struct ieee80211_hw *hw,
void *iter_data)
{
struct ieee80211_local *local = hw_to_local(hw);
- struct ieee80211_key *key;
+ struct ieee80211_key *key, *tmp;
struct ieee80211_sub_if_data *sdata;
ASSERT_RTNL();
@@ -548,13 +555,14 @@ void ieee80211_iter_keys(struct ieee80211_hw *hw,
mutex_lock(&local->key_mtx);
if (vif) {
sdata = vif_to_sdata(vif);
- list_for_each_entry(key, &sdata->key_list, list)
+ list_for_each_entry_safe(key, tmp, &sdata->key_list, list)
iter(hw, &sdata->vif,
key->sta ? &key->sta->sta : NULL,
&key->conf, iter_data);
} else {
list_for_each_entry(sdata, &local->interfaces, list)
- list_for_each_entry(key, &sdata->key_list, list)
+ list_for_each_entry_safe(key, tmp,
+ &sdata->key_list, list)
iter(hw, &sdata->vif,
key->sta ? &key->sta->sta : NULL,
&key->conf, iter_data);
@@ -751,3 +759,135 @@ void ieee80211_get_key_rx_seq(struct ieee80211_key_conf *keyconf,
}
}
EXPORT_SYMBOL(ieee80211_get_key_rx_seq);
+
+void ieee80211_set_key_tx_seq(struct ieee80211_key_conf *keyconf,
+ struct ieee80211_key_seq *seq)
+{
+ struct ieee80211_key *key;
+ u64 pn64;
+
+ key = container_of(keyconf, struct ieee80211_key, conf);
+
+ switch (key->conf.cipher) {
+ case WLAN_CIPHER_SUITE_TKIP:
+ key->u.tkip.tx.iv32 = seq->tkip.iv32;
+ key->u.tkip.tx.iv16 = seq->tkip.iv16;
+ break;
+ case WLAN_CIPHER_SUITE_CCMP:
+ pn64 = (u64)seq->ccmp.pn[5] |
+ ((u64)seq->ccmp.pn[4] << 8) |
+ ((u64)seq->ccmp.pn[3] << 16) |
+ ((u64)seq->ccmp.pn[2] << 24) |
+ ((u64)seq->ccmp.pn[1] << 32) |
+ ((u64)seq->ccmp.pn[0] << 40);
+ atomic64_set(&key->u.ccmp.tx_pn, pn64);
+ break;
+ case WLAN_CIPHER_SUITE_AES_CMAC:
+ pn64 = (u64)seq->aes_cmac.pn[5] |
+ ((u64)seq->aes_cmac.pn[4] << 8) |
+ ((u64)seq->aes_cmac.pn[3] << 16) |
+ ((u64)seq->aes_cmac.pn[2] << 24) |
+ ((u64)seq->aes_cmac.pn[1] << 32) |
+ ((u64)seq->aes_cmac.pn[0] << 40);
+ atomic64_set(&key->u.aes_cmac.tx_pn, pn64);
+ break;
+ default:
+ WARN_ON(1);
+ break;
+ }
+}
+EXPORT_SYMBOL_GPL(ieee80211_set_key_tx_seq);
+
+void ieee80211_set_key_rx_seq(struct ieee80211_key_conf *keyconf,
+ int tid, struct ieee80211_key_seq *seq)
+{
+ struct ieee80211_key *key;
+ u8 *pn;
+
+ key = container_of(keyconf, struct ieee80211_key, conf);
+
+ switch (key->conf.cipher) {
+ case WLAN_CIPHER_SUITE_TKIP:
+ if (WARN_ON(tid < 0 || tid >= IEEE80211_NUM_TIDS))
+ return;
+ key->u.tkip.rx[tid].iv32 = seq->tkip.iv32;
+ key->u.tkip.rx[tid].iv16 = seq->tkip.iv16;
+ break;
+ case WLAN_CIPHER_SUITE_CCMP:
+ if (WARN_ON(tid < -1 || tid >= IEEE80211_NUM_TIDS))
+ return;
+ if (tid < 0)
+ pn = key->u.ccmp.rx_pn[IEEE80211_NUM_TIDS];
+ else
+ pn = key->u.ccmp.rx_pn[tid];
+ memcpy(pn, seq->ccmp.pn, IEEE80211_CCMP_PN_LEN);
+ break;
+ case WLAN_CIPHER_SUITE_AES_CMAC:
+ if (WARN_ON(tid != 0))
+ return;
+ pn = key->u.aes_cmac.rx_pn;
+ memcpy(pn, seq->aes_cmac.pn, IEEE80211_CMAC_PN_LEN);
+ break;
+ default:
+ WARN_ON(1);
+ break;
+ }
+}
+EXPORT_SYMBOL_GPL(ieee80211_set_key_rx_seq);
+
+void ieee80211_remove_key(struct ieee80211_key_conf *keyconf)
+{
+ struct ieee80211_key *key;
+
+ key = container_of(keyconf, struct ieee80211_key, conf);
+
+ assert_key_lock(key->local);
+
+ /*
+ * if key was uploaded, we assume the driver will/has remove(d)
+ * it, so adjust bookkeeping accordingly
+ */
+ if (key->flags & KEY_FLAG_UPLOADED_TO_HARDWARE) {
+ key->flags &= ~KEY_FLAG_UPLOADED_TO_HARDWARE;
+
+ if (!((key->conf.flags & IEEE80211_KEY_FLAG_GENERATE_MMIC) ||
+ (key->conf.flags & IEEE80211_KEY_FLAG_GENERATE_IV) ||
+ (key->conf.flags & IEEE80211_KEY_FLAG_PUT_IV_SPACE)))
+ increment_tailroom_need_count(key->sdata);
+ }
+
+ ieee80211_key_free(key, false);
+}
+EXPORT_SYMBOL_GPL(ieee80211_remove_key);
+
+struct ieee80211_key_conf *
+ieee80211_gtk_rekey_add(struct ieee80211_vif *vif,
+ struct ieee80211_key_conf *keyconf)
+{
+ struct ieee80211_sub_if_data *sdata = vif_to_sdata(vif);
+ struct ieee80211_local *local = sdata->local;
+ struct ieee80211_key *key;
+ int err;
+
+ if (WARN_ON(!local->wowlan))
+ return ERR_PTR(-EINVAL);
+
+ if (WARN_ON(vif->type != NL80211_IFTYPE_STATION))
+ return ERR_PTR(-EINVAL);
+
+ key = ieee80211_key_alloc(keyconf->cipher, keyconf->keyidx,
+ keyconf->keylen, keyconf->key,
+ 0, NULL);
+ if (IS_ERR(key))
+ return ERR_PTR(PTR_ERR(key));
+
+ if (sdata->u.mgd.mfp != IEEE80211_MFP_DISABLED)
+ key->conf.flags |= IEEE80211_KEY_FLAG_RX_MGMT;
+
+ err = ieee80211_key_link(key, sdata, NULL);
+ if (err)
+ return ERR_PTR(err);
+
+ return &key->conf;
+}
+EXPORT_SYMBOL_GPL(ieee80211_gtk_rekey_add);
diff --git a/net/mac80211/led.c b/net/mac80211/led.c
index bcffa6903129..e2b836446af3 100644
--- a/net/mac80211/led.c
+++ b/net/mac80211/led.c
@@ -12,27 +12,22 @@
#include <linux/export.h>
#include "led.h"
+#define MAC80211_BLINK_DELAY 50 /* ms */
+
void ieee80211_led_rx(struct ieee80211_local *local)
{
+ unsigned long led_delay = MAC80211_BLINK_DELAY;
if (unlikely(!local->rx_led))
return;
- if (local->rx_led_counter++ % 2 == 0)
- led_trigger_event(local->rx_led, LED_OFF);
- else
- led_trigger_event(local->rx_led, LED_FULL);
+ led_trigger_blink_oneshot(local->rx_led, &led_delay, &led_delay, 0);
}
-/* q is 1 if a packet was enqueued, 0 if it has been transmitted */
-void ieee80211_led_tx(struct ieee80211_local *local, int q)
+void ieee80211_led_tx(struct ieee80211_local *local)
{
+ unsigned long led_delay = MAC80211_BLINK_DELAY;
if (unlikely(!local->tx_led))
return;
- /* not sure how this is supposed to work ... */
- local->tx_led_counter += 2*q-1;
- if (local->tx_led_counter % 2 == 0)
- led_trigger_event(local->tx_led, LED_OFF);
- else
- led_trigger_event(local->tx_led, LED_FULL);
+ led_trigger_blink_oneshot(local->tx_led, &led_delay, &led_delay, 0);
}
void ieee80211_led_assoc(struct ieee80211_local *local, bool associated)
diff --git a/net/mac80211/led.h b/net/mac80211/led.h
index e0275d9befa8..89f4344f13b9 100644
--- a/net/mac80211/led.h
+++ b/net/mac80211/led.h
@@ -13,7 +13,7 @@
#ifdef CONFIG_MAC80211_LEDS
void ieee80211_led_rx(struct ieee80211_local *local);
-void ieee80211_led_tx(struct ieee80211_local *local, int q);
+void ieee80211_led_tx(struct ieee80211_local *local);
void ieee80211_led_assoc(struct ieee80211_local *local,
bool associated);
void ieee80211_led_radio(struct ieee80211_local *local,
@@ -27,7 +27,7 @@ void ieee80211_mod_tpt_led_trig(struct ieee80211_local *local,
static inline void ieee80211_led_rx(struct ieee80211_local *local)
{
}
-static inline void ieee80211_led_tx(struct ieee80211_local *local, int q)
+static inline void ieee80211_led_tx(struct ieee80211_local *local)
{
}
static inline void ieee80211_led_assoc(struct ieee80211_local *local,
diff --git a/net/mac80211/main.c b/net/mac80211/main.c
index 091088ac7890..21d5d44444d0 100644
--- a/net/mac80211/main.c
+++ b/net/mac80211/main.c
@@ -102,17 +102,8 @@ static u32 ieee80211_hw_conf_chan(struct ieee80211_local *local)
offchannel_flag = local->hw.conf.flags & IEEE80211_CONF_OFFCHANNEL;
- if (local->scan_channel) {
- chandef.chan = local->scan_channel;
- /* If scanning on oper channel, use whatever channel-type
- * is currently in use.
- */
- if (chandef.chan == local->_oper_chandef.chan) {
- chandef = local->_oper_chandef;
- } else {
- chandef.width = NL80211_CHAN_WIDTH_20_NOHT;
- chandef.center_freq1 = chandef.chan->center_freq;
- }
+ if (local->scan_chandef.chan) {
+ chandef = local->scan_chandef;
} else if (local->tmp_channel) {
chandef.chan = local->tmp_channel;
chandef.width = NL80211_CHAN_WIDTH_20_NOHT;
@@ -151,7 +142,7 @@ static u32 ieee80211_hw_conf_chan(struct ieee80211_local *local)
changed |= IEEE80211_CONF_CHANGE_SMPS;
}
- power = chandef.chan->max_power;
+ power = ieee80211_chandef_max_power(&chandef);
rcu_read_lock();
list_for_each_entry_rcu(sdata, &local->interfaces, list) {
@@ -901,9 +892,6 @@ int ieee80211_register_hw(struct ieee80211_hw *hw)
if (!local->ops->remain_on_channel)
local->hw.wiphy->max_remain_on_channel_duration = 5000;
- if (local->ops->sched_scan_start)
- local->hw.wiphy->flags |= WIPHY_FLAG_SUPPORTS_SCHED_SCAN;
-
/* mac80211 based drivers don't support internal TDLS setup */
if (local->hw.wiphy->flags & WIPHY_FLAG_SUPPORTS_TDLS)
local->hw.wiphy->flags |= WIPHY_FLAG_TDLS_EXTERNAL_SETUP;
diff --git a/net/mac80211/mesh.c b/net/mac80211/mesh.c
index 447f41bbe744..707ac61d63e5 100644
--- a/net/mac80211/mesh.c
+++ b/net/mac80211/mesh.c
@@ -62,7 +62,6 @@ bool mesh_matches_local(struct ieee80211_sub_if_data *sdata,
struct ieee802_11_elems *ie)
{
struct ieee80211_if_mesh *ifmsh = &sdata->u.mesh;
- struct ieee80211_local *local = sdata->local;
u32 basic_rates = 0;
struct cfg80211_chan_def sta_chan_def;
@@ -85,7 +84,7 @@ bool mesh_matches_local(struct ieee80211_sub_if_data *sdata,
(ifmsh->mesh_auth_id == ie->mesh_config->meshconf_auth)))
return false;
- ieee80211_sta_get_rates(local, ie, ieee80211_get_sdata_band(sdata),
+ ieee80211_sta_get_rates(sdata, ie, ieee80211_get_sdata_band(sdata),
&basic_rates);
if (sdata->vif.bss_conf.basic_rates != basic_rates)
@@ -274,7 +273,9 @@ int mesh_add_meshconf_ie(struct ieee80211_sub_if_data *sdata,
neighbors = min_t(int, neighbors, IEEE80211_MAX_MESH_PEERINGS);
*pos++ = neighbors << 1;
/* Mesh capability */
- *pos = IEEE80211_MESHCONF_CAPAB_FORWARDING;
+ *pos = 0x00;
+ *pos |= ifmsh->mshcfg.dot11MeshForwarding ?
+ IEEE80211_MESHCONF_CAPAB_FORWARDING : 0x00;
*pos |= ifmsh->accepting_plinks ?
IEEE80211_MESHCONF_CAPAB_ACCEPT_PLINKS : 0x00;
/* Mesh PS mode. See IEEE802.11-2012 8.4.2.100.8 */
@@ -831,6 +832,9 @@ ieee80211_mesh_rx_probe_req(struct ieee80211_sub_if_data *sdata,
ieee802_11_parse_elems(pos, len - baselen, false, &elems);
+ if (!elems.mesh_id)
+ return;
+
/* 802.11-2012 10.1.4.3.2 */
if ((!ether_addr_equal(mgmt->da, sdata->vif.addr) &&
!is_broadcast_ether_addr(mgmt->da)) ||
diff --git a/net/mac80211/mesh_plink.c b/net/mac80211/mesh_plink.c
index 02c05fa15c20..6b65d5055f5b 100644
--- a/net/mac80211/mesh_plink.c
+++ b/net/mac80211/mesh_plink.c
@@ -379,7 +379,7 @@ static void mesh_sta_info_init(struct ieee80211_sub_if_data *sdata,
u32 rates, basic_rates = 0, changed = 0;
sband = local->hw.wiphy->bands[band];
- rates = ieee80211_sta_get_rates(local, elems, band, &basic_rates);
+ rates = ieee80211_sta_get_rates(sdata, elems, band, &basic_rates);
spin_lock_bh(&sta->lock);
sta->last_rx = jiffies;
diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c
index cc9e02d79b55..86e4ad56b573 100644
--- a/net/mac80211/mlme.c
+++ b/net/mac80211/mlme.c
@@ -489,27 +489,6 @@ static int ieee80211_config_bw(struct ieee80211_sub_if_data *sdata,
/* frame sending functions */
-static int ieee80211_compatible_rates(const u8 *supp_rates, int supp_rates_len,
- struct ieee80211_supported_band *sband,
- u32 *rates)
-{
- int i, j, count;
- *rates = 0;
- count = 0;
- for (i = 0; i < supp_rates_len; i++) {
- int rate = (supp_rates[i] & 0x7F) * 5;
-
- for (j = 0; j < sband->n_bitrates; j++)
- if (sband->bitrates[j].bitrate == rate) {
- *rates |= BIT(j);
- count++;
- break;
- }
- }
-
- return count;
-}
-
static void ieee80211_add_ht_ie(struct ieee80211_sub_if_data *sdata,
struct sk_buff *skb, u8 ap_ht_param,
struct ieee80211_supported_band *sband,
@@ -628,12 +607,12 @@ static void ieee80211_send_assoc(struct ieee80211_sub_if_data *sdata)
struct ieee80211_mgmt *mgmt;
u8 *pos, qos_info;
size_t offset = 0, noffset;
- int i, count, rates_len, supp_rates_len;
+ int i, count, rates_len, supp_rates_len, shift;
u16 capab;
struct ieee80211_supported_band *sband;
struct ieee80211_chanctx_conf *chanctx_conf;
struct ieee80211_channel *chan;
- u32 rates = 0;
+ u32 rate_flags, rates = 0;
sdata_assert_lock(sdata);
@@ -644,8 +623,10 @@ static void ieee80211_send_assoc(struct ieee80211_sub_if_data *sdata)
return;
}
chan = chanctx_conf->def.chan;
+ rate_flags = ieee80211_chandef_rate_flags(&chanctx_conf->def);
rcu_read_unlock();
sband = local->hw.wiphy->bands[chan->band];
+ shift = ieee80211_vif_get_shift(&sdata->vif);
if (assoc_data->supp_rates_len) {
/*
@@ -654,17 +635,24 @@ static void ieee80211_send_assoc(struct ieee80211_sub_if_data *sdata)
* in the association request (e.g. D-Link DAP 1353 in
* b-only mode)...
*/
- rates_len = ieee80211_compatible_rates(assoc_data->supp_rates,
- assoc_data->supp_rates_len,
- sband, &rates);
+ rates_len = ieee80211_parse_bitrates(&chanctx_conf->def, sband,
+ assoc_data->supp_rates,
+ assoc_data->supp_rates_len,
+ &rates);
} else {
/*
* In case AP not provide any supported rates information
* before association, we send information element(s) with
* all rates that we support.
*/
- rates = ~0;
- rates_len = sband->n_bitrates;
+ rates_len = 0;
+ for (i = 0; i < sband->n_bitrates; i++) {
+ if ((rate_flags & sband->bitrates[i].flags)
+ != rate_flags)
+ continue;
+ rates |= BIT(i);
+ rates_len++;
+ }
}
skb = alloc_skb(local->hw.extra_tx_headroom +
@@ -741,8 +729,9 @@ static void ieee80211_send_assoc(struct ieee80211_sub_if_data *sdata)
count = 0;
for (i = 0; i < sband->n_bitrates; i++) {
if (BIT(i) & rates) {
- int rate = sband->bitrates[i].bitrate;
- *pos++ = (u8) (rate / 5);
+ int rate = DIV_ROUND_UP(sband->bitrates[i].bitrate,
+ 5 * (1 << shift));
+ *pos++ = (u8) rate;
if (++count == 8)
break;
}
@@ -755,8 +744,10 @@ static void ieee80211_send_assoc(struct ieee80211_sub_if_data *sdata)
for (i++; i < sband->n_bitrates; i++) {
if (BIT(i) & rates) {
- int rate = sband->bitrates[i].bitrate;
- *pos++ = (u8) (rate / 5);
+ int rate;
+ rate = DIV_ROUND_UP(sband->bitrates[i].bitrate,
+ 5 * (1 << shift));
+ *pos++ = (u8) rate;
}
}
}
@@ -767,7 +758,8 @@ static void ieee80211_send_assoc(struct ieee80211_sub_if_data *sdata)
*pos++ = WLAN_EID_PWR_CAPABILITY;
*pos++ = 2;
*pos++ = 0; /* min tx power */
- *pos++ = chan->max_power; /* max tx power */
+ /* max tx power */
+ *pos++ = ieee80211_chandef_max_power(&chanctx_conf->def);
/* 2. supported channels */
/* TODO: get this in reg domain format */
@@ -1121,6 +1113,15 @@ ieee80211_sta_process_chanswitch(struct ieee80211_sub_if_data *sdata,
case -1:
cfg80211_chandef_create(&new_chandef, new_chan,
NL80211_CHAN_NO_HT);
+ /* keep width for 5/10 MHz channels */
+ switch (sdata->vif.bss_conf.chandef.width) {
+ case NL80211_CHAN_WIDTH_5:
+ case NL80211_CHAN_WIDTH_10:
+ new_chandef.width = sdata->vif.bss_conf.chandef.width;
+ break;
+ default:
+ break;
+ }
break;
}
@@ -2443,15 +2444,16 @@ static void ieee80211_get_rates(struct ieee80211_supported_band *sband,
u8 *supp_rates, unsigned int supp_rates_len,
u32 *rates, u32 *basic_rates,
bool *have_higher_than_11mbit,
- int *min_rate, int *min_rate_index)
+ int *min_rate, int *min_rate_index,
+ int shift, u32 rate_flags)
{
int i, j;
for (i = 0; i < supp_rates_len; i++) {
- int rate = (supp_rates[i] & 0x7f) * 5;
+ int rate = supp_rates[i] & 0x7f;
bool is_basic = !!(supp_rates[i] & 0x80);
- if (rate > 110)
+ if ((rate * 5 * (1 << shift)) > 110)
*have_higher_than_11mbit = true;
/*
@@ -2467,12 +2469,20 @@ static void ieee80211_get_rates(struct ieee80211_supported_band *sband,
continue;
for (j = 0; j < sband->n_bitrates; j++) {
- if (sband->bitrates[j].bitrate == rate) {
+ struct ieee80211_rate *br;
+ int brate;
+
+ br = &sband->bitrates[j];
+ if ((rate_flags & br->flags) != rate_flags)
+ continue;
+
+ brate = DIV_ROUND_UP(br->bitrate, (1 << shift) * 5);
+ if (brate == rate) {
*rates |= BIT(j);
if (is_basic)
*basic_rates |= BIT(j);
- if (rate < *min_rate) {
- *min_rate = rate;
+ if ((rate * 5) < *min_rate) {
+ *min_rate = rate * 5;
*min_rate_index = j;
}
break;
@@ -2851,14 +2861,6 @@ static void ieee80211_rx_bss_info(struct ieee80211_sub_if_data *sdata,
ieee80211_rx_bss_put(local, bss);
sdata->vif.bss_conf.beacon_rate = bss->beacon_rate;
}
-
- if (!sdata->u.mgd.associated ||
- !ether_addr_equal(mgmt->bssid, sdata->u.mgd.associated->bssid))
- return;
-
- ieee80211_sta_process_chanswitch(sdata, rx_status->mactime,
- elems, true);
-
}
@@ -3147,6 +3149,9 @@ static void ieee80211_rx_mgmt_beacon(struct ieee80211_sub_if_data *sdata,
ieee80211_rx_bss_info(sdata, mgmt, len, rx_status, &elems);
+ ieee80211_sta_process_chanswitch(sdata, rx_status->mactime,
+ &elems, true);
+
if (ieee80211_sta_wmm_params(local, sdata, elems.wmm_param,
elems.wmm_param_len))
changed |= BSS_CHANGED_QOS;
@@ -3902,27 +3907,40 @@ static int ieee80211_prep_connection(struct ieee80211_sub_if_data *sdata,
if (!new_sta)
return -ENOMEM;
}
-
if (new_sta) {
u32 rates = 0, basic_rates = 0;
bool have_higher_than_11mbit;
int min_rate = INT_MAX, min_rate_index = -1;
+ struct ieee80211_chanctx_conf *chanctx_conf;
struct ieee80211_supported_band *sband;
const struct cfg80211_bss_ies *ies;
+ int shift;
+ u32 rate_flags;
sband = local->hw.wiphy->bands[cbss->channel->band];
err = ieee80211_prep_channel(sdata, cbss);
if (err) {
sta_info_free(local, new_sta);
- return err;
+ return -EINVAL;
+ }
+ shift = ieee80211_vif_get_shift(&sdata->vif);
+
+ rcu_read_lock();
+ chanctx_conf = rcu_dereference(sdata->vif.chanctx_conf);
+ if (WARN_ON(!chanctx_conf)) {
+ rcu_read_unlock();
+ return -EINVAL;
}
+ rate_flags = ieee80211_chandef_rate_flags(&chanctx_conf->def);
+ rcu_read_unlock();
ieee80211_get_rates(sband, bss->supp_rates,
bss->supp_rates_len,
&rates, &basic_rates,
&have_higher_than_11mbit,
- &min_rate, &min_rate_index);
+ &min_rate, &min_rate_index,
+ shift, rate_flags);
/*
* This used to be a workaround for basic rates missing
diff --git a/net/mac80211/offchannel.c b/net/mac80211/offchannel.c
index acd1f71adc03..0c2a29484c07 100644
--- a/net/mac80211/offchannel.c
+++ b/net/mac80211/offchannel.c
@@ -394,6 +394,8 @@ void ieee80211_sw_roc_work(struct work_struct *work)
if (started)
ieee80211_start_next_roc(local);
+ else if (list_empty(&local->roc_list))
+ ieee80211_run_deferred_scan(local);
}
out_unlock:
diff --git a/net/mac80211/rate.c b/net/mac80211/rate.c
index 30d58d2d13e2..e126605cec66 100644
--- a/net/mac80211/rate.c
+++ b/net/mac80211/rate.c
@@ -210,7 +210,7 @@ static bool rc_no_data_or_no_ack_use_min(struct ieee80211_tx_rate_control *txrc)
!ieee80211_is_data(fc);
}
-static void rc_send_low_broadcast(s8 *idx, u32 basic_rates,
+static void rc_send_low_basicrate(s8 *idx, u32 basic_rates,
struct ieee80211_supported_band *sband)
{
u8 i;
@@ -232,37 +232,28 @@ static void rc_send_low_broadcast(s8 *idx, u32 basic_rates,
/* could not find a basic rate; use original selection */
}
-static inline s8
-rate_lowest_non_cck_index(struct ieee80211_supported_band *sband,
- struct ieee80211_sta *sta)
+static void __rate_control_send_low(struct ieee80211_hw *hw,
+ struct ieee80211_supported_band *sband,
+ struct ieee80211_sta *sta,
+ struct ieee80211_tx_info *info)
{
int i;
+ u32 rate_flags =
+ ieee80211_chandef_rate_flags(&hw->conf.chandef);
+
+ if ((sband->band == IEEE80211_BAND_2GHZ) &&
+ (info->flags & IEEE80211_TX_CTL_NO_CCK_RATE))
+ rate_flags |= IEEE80211_RATE_ERP_G;
+ info->control.rates[0].idx = 0;
for (i = 0; i < sband->n_bitrates; i++) {
- struct ieee80211_rate *srate = &sband->bitrates[i];
- if ((srate->bitrate == 10) || (srate->bitrate == 20) ||
- (srate->bitrate == 55) || (srate->bitrate == 110))
+ if (!rate_supported(sta, sband->band, i))
continue;
- if (rate_supported(sta, sband->band, i))
- return i;
+ info->control.rates[0].idx = i;
+ break;
}
-
- /* No matching rate found */
- return 0;
-}
-
-static void __rate_control_send_low(struct ieee80211_hw *hw,
- struct ieee80211_supported_band *sband,
- struct ieee80211_sta *sta,
- struct ieee80211_tx_info *info)
-{
- if ((sband->band != IEEE80211_BAND_2GHZ) ||
- !(info->flags & IEEE80211_TX_CTL_NO_CCK_RATE))
- info->control.rates[0].idx = rate_lowest_index(sband, sta);
- else
- info->control.rates[0].idx =
- rate_lowest_non_cck_index(sband, sta);
+ WARN_ON_ONCE(i == sband->n_bitrates);
info->control.rates[0].count =
(info->flags & IEEE80211_TX_CTL_NO_ACK) ?
@@ -272,28 +263,37 @@ static void __rate_control_send_low(struct ieee80211_hw *hw,
}
-bool rate_control_send_low(struct ieee80211_sta *sta,
+bool rate_control_send_low(struct ieee80211_sta *pubsta,
void *priv_sta,
struct ieee80211_tx_rate_control *txrc)
{
struct ieee80211_tx_info *info = IEEE80211_SKB_CB(txrc->skb);
struct ieee80211_supported_band *sband = txrc->sband;
+ struct sta_info *sta;
int mcast_rate;
+ bool use_basicrate = false;
- if (!sta || !priv_sta || rc_no_data_or_no_ack_use_min(txrc)) {
- __rate_control_send_low(txrc->hw, sband, sta, info);
+ if (!pubsta || !priv_sta || rc_no_data_or_no_ack_use_min(txrc)) {
+ __rate_control_send_low(txrc->hw, sband, pubsta, info);
- if (!sta && txrc->bss) {
+ if (!pubsta && txrc->bss) {
mcast_rate = txrc->bss_conf->mcast_rate[sband->band];
if (mcast_rate > 0) {
info->control.rates[0].idx = mcast_rate - 1;
return true;
}
+ use_basicrate = true;
+ } else if (pubsta) {
+ sta = container_of(pubsta, struct sta_info, sta);
+ if (ieee80211_vif_is_mesh(&sta->sdata->vif))
+ use_basicrate = true;
+ }
- rc_send_low_broadcast(&info->control.rates[0].idx,
+ if (use_basicrate)
+ rc_send_low_basicrate(&info->control.rates[0].idx,
txrc->bss_conf->basic_rates,
sband);
- }
+
return true;
}
return false;
@@ -585,6 +585,7 @@ static void rate_control_apply_mask(struct ieee80211_sub_if_data *sdata,
u8 mcs_mask[IEEE80211_HT_MCS_MASK_LEN];
bool has_mcs_mask;
u32 mask;
+ u32 rate_flags;
int i;
/*
@@ -594,6 +595,12 @@ static void rate_control_apply_mask(struct ieee80211_sub_if_data *sdata,
*/
mask = sdata->rc_rateidx_mask[info->band];
has_mcs_mask = sdata->rc_has_mcs_mask[info->band];
+ rate_flags =
+ ieee80211_chandef_rate_flags(&sdata->vif.bss_conf.chandef);
+ for (i = 0; i < sband->n_bitrates; i++)
+ if ((rate_flags & sband->bitrates[i].flags) != rate_flags)
+ mask &= ~BIT(i);
+
if (mask == (1 << sband->n_bitrates) - 1 && !has_mcs_mask)
return;
diff --git a/net/mac80211/rate.h b/net/mac80211/rate.h
index d35a5dd3fb13..5dedc56c94db 100644
--- a/net/mac80211/rate.h
+++ b/net/mac80211/rate.h
@@ -66,11 +66,12 @@ static inline void rate_control_rate_init(struct sta_info *sta)
}
sband = local->hw.wiphy->bands[chanctx_conf->def.chan->band];
- rcu_read_unlock();
ieee80211_sta_set_rx_nss(sta);
- ref->ops->rate_init(ref->priv, sband, ista, priv_sta);
+ ref->ops->rate_init(ref->priv, sband, &chanctx_conf->def, ista,
+ priv_sta);
+ rcu_read_unlock();
set_sta_flag(sta, WLAN_STA_RATE_CONTROL);
}
@@ -81,10 +82,21 @@ static inline void rate_control_rate_update(struct ieee80211_local *local,
struct rate_control_ref *ref = local->rate_ctrl;
struct ieee80211_sta *ista = &sta->sta;
void *priv_sta = sta->rate_ctrl_priv;
+ struct ieee80211_chanctx_conf *chanctx_conf;
+
+ if (ref && ref->ops->rate_update) {
+ rcu_read_lock();
- if (ref && ref->ops->rate_update)
- ref->ops->rate_update(ref->priv, sband, ista,
- priv_sta, changed);
+ chanctx_conf = rcu_dereference(sta->sdata->vif.chanctx_conf);
+ if (WARN_ON(!chanctx_conf)) {
+ rcu_read_unlock();
+ return;
+ }
+
+ ref->ops->rate_update(ref->priv, sband, &chanctx_conf->def,
+ ista, priv_sta, changed);
+ rcu_read_unlock();
+ }
drv_sta_rc_update(local, sta->sdata, &sta->sta, changed);
}
diff --git a/net/mac80211/rc80211_minstrel.c b/net/mac80211/rc80211_minstrel.c
index e6512e2ffd20..8b5f7ef7c0c9 100644
--- a/net/mac80211/rc80211_minstrel.c
+++ b/net/mac80211/rc80211_minstrel.c
@@ -383,14 +383,18 @@ minstrel_get_rate(void *priv, struct ieee80211_sta *sta,
static void
calc_rate_durations(enum ieee80211_band band,
struct minstrel_rate *d,
- struct ieee80211_rate *rate)
+ struct ieee80211_rate *rate,
+ struct cfg80211_chan_def *chandef)
{
int erp = !!(rate->flags & IEEE80211_RATE_ERP_G);
+ int shift = ieee80211_chandef_get_shift(chandef);
d->perfect_tx_time = ieee80211_frame_duration(band, 1200,
- rate->bitrate, erp, 1);
+ DIV_ROUND_UP(rate->bitrate, 1 << shift), erp, 1,
+ shift);
d->ack_time = ieee80211_frame_duration(band, 10,
- rate->bitrate, erp, 1);
+ DIV_ROUND_UP(rate->bitrate, 1 << shift), erp, 1,
+ shift);
}
static void
@@ -418,21 +422,25 @@ init_sample_table(struct minstrel_sta_info *mi)
static void
minstrel_rate_init(void *priv, struct ieee80211_supported_band *sband,
- struct ieee80211_sta *sta, void *priv_sta)
+ struct cfg80211_chan_def *chandef,
+ struct ieee80211_sta *sta, void *priv_sta)
{
struct minstrel_sta_info *mi = priv_sta;
struct minstrel_priv *mp = priv;
struct ieee80211_rate *ctl_rate;
unsigned int i, n = 0;
unsigned int t_slot = 9; /* FIXME: get real slot time */
+ u32 rate_flags;
mi->sta = sta;
mi->lowest_rix = rate_lowest_index(sband, sta);
ctl_rate = &sband->bitrates[mi->lowest_rix];
mi->sp_ack_dur = ieee80211_frame_duration(sband->band, 10,
ctl_rate->bitrate,
- !!(ctl_rate->flags & IEEE80211_RATE_ERP_G), 1);
+ !!(ctl_rate->flags & IEEE80211_RATE_ERP_G), 1,
+ ieee80211_chandef_get_shift(chandef));
+ rate_flags = ieee80211_chandef_rate_flags(&mp->hw->conf.chandef);
memset(mi->max_tp_rate, 0, sizeof(mi->max_tp_rate));
mi->max_prob_rate = 0;
@@ -441,15 +449,22 @@ minstrel_rate_init(void *priv, struct ieee80211_supported_band *sband,
unsigned int tx_time = 0, tx_time_cts = 0, tx_time_rtscts = 0;
unsigned int tx_time_single;
unsigned int cw = mp->cw_min;
+ int shift;
if (!rate_supported(sta, sband->band, i))
continue;
+ if ((rate_flags & sband->bitrates[i].flags) != rate_flags)
+ continue;
+
n++;
memset(mr, 0, sizeof(*mr));
mr->rix = i;
- mr->bitrate = sband->bitrates[i].bitrate / 5;
- calc_rate_durations(sband->band, mr, &sband->bitrates[i]);
+ shift = ieee80211_chandef_get_shift(chandef);
+ mr->bitrate = DIV_ROUND_UP(sband->bitrates[i].bitrate,
+ (1 << shift) * 5);
+ calc_rate_durations(sband->band, mr, &sband->bitrates[i],
+ chandef);
/* calculate maximum number of retransmissions before
* fallback (based on maximum segment size) */
@@ -547,6 +562,7 @@ minstrel_init_cck_rates(struct minstrel_priv *mp)
{
static const int bitrates[4] = { 10, 20, 55, 110 };
struct ieee80211_supported_band *sband;
+ u32 rate_flags = ieee80211_chandef_rate_flags(&mp->hw->conf.chandef);
int i, j;
sband = mp->hw->wiphy->bands[IEEE80211_BAND_2GHZ];
@@ -559,6 +575,9 @@ minstrel_init_cck_rates(struct minstrel_priv *mp)
if (rate->flags & IEEE80211_RATE_ERP_G)
continue;
+ if ((rate_flags & sband->bitrates[i].flags) != rate_flags)
+ continue;
+
for (j = 0; j < ARRAY_SIZE(bitrates); j++) {
if (rate->bitrate != bitrates[j])
continue;
diff --git a/net/mac80211/rc80211_minstrel_ht.c b/net/mac80211/rc80211_minstrel_ht.c
index f3bbea1eb9e7..7c323f27ba23 100644
--- a/net/mac80211/rc80211_minstrel_ht.c
+++ b/net/mac80211/rc80211_minstrel_ht.c
@@ -776,7 +776,7 @@ minstrel_ht_get_rate(void *priv, struct ieee80211_sta *sta, void *priv_sta,
/* Don't use EAPOL frames for sampling on non-mrr hw */
if (mp->hw->max_rates == 1 &&
- txrc->skb->protocol == cpu_to_be16(ETH_P_PAE))
+ (info->control.flags & IEEE80211_TX_CTRL_PORT_CTRL_PROTO))
sample_idx = -1;
else
sample_idx = minstrel_get_sample_rate(mp, mi);
@@ -847,6 +847,7 @@ minstrel_ht_update_cck(struct minstrel_priv *mp, struct minstrel_ht_sta *mi,
static void
minstrel_ht_update_caps(void *priv, struct ieee80211_supported_band *sband,
+ struct cfg80211_chan_def *chandef,
struct ieee80211_sta *sta, void *priv_sta)
{
struct minstrel_priv *mp = priv;
@@ -872,8 +873,9 @@ minstrel_ht_update_caps(void *priv, struct ieee80211_supported_band *sband,
mi->sta = sta;
mi->stats_update = jiffies;
- ack_dur = ieee80211_frame_duration(sband->band, 10, 60, 1, 1);
- mi->overhead = ieee80211_frame_duration(sband->band, 0, 60, 1, 1) + ack_dur;
+ ack_dur = ieee80211_frame_duration(sband->band, 10, 60, 1, 1, 0);
+ mi->overhead = ieee80211_frame_duration(sband->band, 0, 60, 1, 1, 0);
+ mi->overhead += ack_dur;
mi->overhead_rtscts = mi->overhead + 2 * ack_dur;
mi->avg_ampdu_len = MINSTREL_FRAC(1, 1);
@@ -942,22 +944,25 @@ use_legacy:
memset(&msp->legacy, 0, sizeof(msp->legacy));
msp->legacy.r = msp->ratelist;
msp->legacy.sample_table = msp->sample_table;
- return mac80211_minstrel.rate_init(priv, sband, sta, &msp->legacy);
+ return mac80211_minstrel.rate_init(priv, sband, chandef, sta,
+ &msp->legacy);
}
static void
minstrel_ht_rate_init(void *priv, struct ieee80211_supported_band *sband,
+ struct cfg80211_chan_def *chandef,
struct ieee80211_sta *sta, void *priv_sta)
{
- minstrel_ht_update_caps(priv, sband, sta, priv_sta);
+ minstrel_ht_update_caps(priv, sband, chandef, sta, priv_sta);
}
static void
minstrel_ht_rate_update(void *priv, struct ieee80211_supported_band *sband,
+ struct cfg80211_chan_def *chandef,
struct ieee80211_sta *sta, void *priv_sta,
u32 changed)
{
- minstrel_ht_update_caps(priv, sband, sta, priv_sta);
+ minstrel_ht_update_caps(priv, sband, chandef, sta, priv_sta);
}
static void *
diff --git a/net/mac80211/rc80211_pid_algo.c b/net/mac80211/rc80211_pid_algo.c
index 502d3ecc4a79..958fad07b54c 100644
--- a/net/mac80211/rc80211_pid_algo.c
+++ b/net/mac80211/rc80211_pid_algo.c
@@ -293,6 +293,7 @@ rate_control_pid_get_rate(void *priv, struct ieee80211_sta *sta,
static void
rate_control_pid_rate_init(void *priv, struct ieee80211_supported_band *sband,
+ struct cfg80211_chan_def *chandef,
struct ieee80211_sta *sta, void *priv_sta)
{
struct rc_pid_sta_info *spinfo = priv_sta;
diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c
index 2c5a79bd3777..674eac1f996c 100644
--- a/net/mac80211/rx.c
+++ b/net/mac80211/rx.c
@@ -87,11 +87,13 @@ ieee80211_rx_radiotap_space(struct ieee80211_local *local,
int len;
/* always present fields */
- len = sizeof(struct ieee80211_radiotap_header) + 9;
+ len = sizeof(struct ieee80211_radiotap_header) + 8;
- /* allocate extra bitmap */
+ /* allocate extra bitmaps */
if (status->vendor_radiotap_len)
len += 4;
+ if (status->chains)
+ len += 4 * hweight8(status->chains);
if (ieee80211_have_rx_timestamp(status)) {
len = ALIGN(len, 8);
@@ -100,6 +102,10 @@ ieee80211_rx_radiotap_space(struct ieee80211_local *local,
if (local->hw.flags & IEEE80211_HW_SIGNAL_DBM)
len += 1;
+ /* antenna field, if we don't have per-chain info */
+ if (!status->chains)
+ len += 1;
+
/* padding for RX_FLAGS if necessary */
len = ALIGN(len, 2);
@@ -116,6 +122,11 @@ ieee80211_rx_radiotap_space(struct ieee80211_local *local,
len += 12;
}
+ if (status->chains) {
+ /* antenna and antenna signal fields */
+ len += 2 * hweight8(status->chains);
+ }
+
if (status->vendor_radiotap_len) {
if (WARN_ON_ONCE(status->vendor_radiotap_align == 0))
status->vendor_radiotap_align = 1;
@@ -145,8 +156,12 @@ ieee80211_add_rx_radiotap_header(struct ieee80211_local *local,
struct ieee80211_rx_status *status = IEEE80211_SKB_RXCB(skb);
struct ieee80211_radiotap_header *rthdr;
unsigned char *pos;
+ __le32 *it_present;
+ u32 it_present_val;
u16 rx_flags = 0;
- int mpdulen;
+ u16 channel_flags = 0;
+ int mpdulen, chain;
+ unsigned long chains = status->chains;
mpdulen = skb->len;
if (!(has_fcs && (local->hw.flags & IEEE80211_HW_RX_INCLUDES_FCS)))
@@ -154,25 +169,39 @@ ieee80211_add_rx_radiotap_header(struct ieee80211_local *local,
rthdr = (struct ieee80211_radiotap_header *)skb_push(skb, rtap_len);
memset(rthdr, 0, rtap_len);
+ it_present = &rthdr->it_present;
/* radiotap header, set always present flags */
- rthdr->it_present =
- cpu_to_le32((1 << IEEE80211_RADIOTAP_FLAGS) |
- (1 << IEEE80211_RADIOTAP_CHANNEL) |
- (1 << IEEE80211_RADIOTAP_ANTENNA) |
- (1 << IEEE80211_RADIOTAP_RX_FLAGS));
rthdr->it_len = cpu_to_le16(rtap_len + status->vendor_radiotap_len);
+ it_present_val = BIT(IEEE80211_RADIOTAP_FLAGS) |
+ BIT(IEEE80211_RADIOTAP_CHANNEL) |
+ BIT(IEEE80211_RADIOTAP_RX_FLAGS);
+
+ if (!status->chains)
+ it_present_val |= BIT(IEEE80211_RADIOTAP_ANTENNA);
- pos = (unsigned char *)(rthdr + 1);
+ for_each_set_bit(chain, &chains, IEEE80211_MAX_CHAINS) {
+ it_present_val |=
+ BIT(IEEE80211_RADIOTAP_EXT) |
+ BIT(IEEE80211_RADIOTAP_RADIOTAP_NAMESPACE);
+ put_unaligned_le32(it_present_val, it_present);
+ it_present++;
+ it_present_val = BIT(IEEE80211_RADIOTAP_ANTENNA) |
+ BIT(IEEE80211_RADIOTAP_DBM_ANTSIGNAL);
+ }
if (status->vendor_radiotap_len) {
- rthdr->it_present |=
- cpu_to_le32(BIT(IEEE80211_RADIOTAP_VENDOR_NAMESPACE)) |
- cpu_to_le32(BIT(IEEE80211_RADIOTAP_EXT));
- put_unaligned_le32(status->vendor_radiotap_bitmap, pos);
- pos += 4;
+ it_present_val |= BIT(IEEE80211_RADIOTAP_VENDOR_NAMESPACE) |
+ BIT(IEEE80211_RADIOTAP_EXT);
+ put_unaligned_le32(it_present_val, it_present);
+ it_present++;
+ it_present_val = status->vendor_radiotap_bitmap;
}
+ put_unaligned_le32(it_present_val, it_present);
+
+ pos = (void *)(it_present + 1);
+
/* the order of the following fields is important */
/* IEEE80211_RADIOTAP_TSFT */
@@ -207,28 +236,35 @@ ieee80211_add_rx_radiotap_header(struct ieee80211_local *local,
*/
*pos = 0;
} else {
+ int shift = 0;
rthdr->it_present |= cpu_to_le32(1 << IEEE80211_RADIOTAP_RATE);
- *pos = rate->bitrate / 5;
+ if (status->flag & RX_FLAG_10MHZ)
+ shift = 1;
+ else if (status->flag & RX_FLAG_5MHZ)
+ shift = 2;
+ *pos = DIV_ROUND_UP(rate->bitrate, 5 * (1 << shift));
}
pos++;
/* IEEE80211_RADIOTAP_CHANNEL */
put_unaligned_le16(status->freq, pos);
pos += 2;
+ if (status->flag & RX_FLAG_10MHZ)
+ channel_flags |= IEEE80211_CHAN_HALF;
+ else if (status->flag & RX_FLAG_5MHZ)
+ channel_flags |= IEEE80211_CHAN_QUARTER;
+
if (status->band == IEEE80211_BAND_5GHZ)
- put_unaligned_le16(IEEE80211_CHAN_OFDM | IEEE80211_CHAN_5GHZ,
- pos);
+ channel_flags |= IEEE80211_CHAN_OFDM | IEEE80211_CHAN_5GHZ;
else if (status->flag & (RX_FLAG_HT | RX_FLAG_VHT))
- put_unaligned_le16(IEEE80211_CHAN_DYN | IEEE80211_CHAN_2GHZ,
- pos);
+ channel_flags |= IEEE80211_CHAN_DYN | IEEE80211_CHAN_2GHZ;
else if (rate && rate->flags & IEEE80211_RATE_ERP_G)
- put_unaligned_le16(IEEE80211_CHAN_OFDM | IEEE80211_CHAN_2GHZ,
- pos);
+ channel_flags |= IEEE80211_CHAN_OFDM | IEEE80211_CHAN_2GHZ;
else if (rate)
- put_unaligned_le16(IEEE80211_CHAN_CCK | IEEE80211_CHAN_2GHZ,
- pos);
+ channel_flags |= IEEE80211_CHAN_OFDM | IEEE80211_CHAN_2GHZ;
else
- put_unaligned_le16(IEEE80211_CHAN_2GHZ, pos);
+ channel_flags |= IEEE80211_CHAN_2GHZ;
+ put_unaligned_le16(channel_flags, pos);
pos += 2;
/* IEEE80211_RADIOTAP_DBM_ANTSIGNAL */
@@ -242,9 +278,11 @@ ieee80211_add_rx_radiotap_header(struct ieee80211_local *local,
/* IEEE80211_RADIOTAP_LOCK_QUALITY is missing */
- /* IEEE80211_RADIOTAP_ANTENNA */
- *pos = status->antenna;
- pos++;
+ if (!status->chains) {
+ /* IEEE80211_RADIOTAP_ANTENNA */
+ *pos = status->antenna;
+ pos++;
+ }
/* IEEE80211_RADIOTAP_DB_ANTNOISE is not used */
@@ -341,6 +379,11 @@ ieee80211_add_rx_radiotap_header(struct ieee80211_local *local,
pos += 2;
}
+ for_each_set_bit(chain, &chains, IEEE80211_MAX_CHAINS) {
+ *pos++ = status->chain_signal[chain];
+ *pos++ = chain;
+ }
+
if (status->vendor_radiotap_len) {
/* ensure 2 byte alignment for the vendor field as required */
if ((pos - (u8 *)rthdr) & 1)
@@ -1012,207 +1055,6 @@ ieee80211_rx_h_check(struct ieee80211_rx_data *rx)
static ieee80211_rx_result debug_noinline
-ieee80211_rx_h_decrypt(struct ieee80211_rx_data *rx)
-{
- struct sk_buff *skb = rx->skb;
- struct ieee80211_rx_status *status = IEEE80211_SKB_RXCB(skb);
- struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)skb->data;
- int keyidx;
- int hdrlen;
- ieee80211_rx_result result = RX_DROP_UNUSABLE;
- struct ieee80211_key *sta_ptk = NULL;
- int mmie_keyidx = -1;
- __le16 fc;
-
- /*
- * Key selection 101
- *
- * There are four types of keys:
- * - GTK (group keys)
- * - IGTK (group keys for management frames)
- * - PTK (pairwise keys)
- * - STK (station-to-station pairwise keys)
- *
- * When selecting a key, we have to distinguish between multicast
- * (including broadcast) and unicast frames, the latter can only
- * use PTKs and STKs while the former always use GTKs and IGTKs.
- * Unless, of course, actual WEP keys ("pre-RSNA") are used, then
- * unicast frames can also use key indices like GTKs. Hence, if we
- * don't have a PTK/STK we check the key index for a WEP key.
- *
- * Note that in a regular BSS, multicast frames are sent by the
- * AP only, associated stations unicast the frame to the AP first
- * which then multicasts it on their behalf.
- *
- * There is also a slight problem in IBSS mode: GTKs are negotiated
- * with each station, that is something we don't currently handle.
- * The spec seems to expect that one negotiates the same key with
- * every station but there's no such requirement; VLANs could be
- * possible.
- */
-
- /*
- * No point in finding a key and decrypting if the frame is neither
- * addressed to us nor a multicast frame.
- */
- if (!(status->rx_flags & IEEE80211_RX_RA_MATCH))
- return RX_CONTINUE;
-
- /* start without a key */
- rx->key = NULL;
-
- if (rx->sta)
- sta_ptk = rcu_dereference(rx->sta->ptk);
-
- fc = hdr->frame_control;
-
- if (!ieee80211_has_protected(fc))
- mmie_keyidx = ieee80211_get_mmie_keyidx(rx->skb);
-
- if (!is_multicast_ether_addr(hdr->addr1) && sta_ptk) {
- rx->key = sta_ptk;
- if ((status->flag & RX_FLAG_DECRYPTED) &&
- (status->flag & RX_FLAG_IV_STRIPPED))
- return RX_CONTINUE;
- /* Skip decryption if the frame is not protected. */
- if (!ieee80211_has_protected(fc))
- return RX_CONTINUE;
- } else if (mmie_keyidx >= 0) {
- /* Broadcast/multicast robust management frame / BIP */
- if ((status->flag & RX_FLAG_DECRYPTED) &&
- (status->flag & RX_FLAG_IV_STRIPPED))
- return RX_CONTINUE;
-
- if (mmie_keyidx < NUM_DEFAULT_KEYS ||
- mmie_keyidx >= NUM_DEFAULT_KEYS + NUM_DEFAULT_MGMT_KEYS)
- return RX_DROP_MONITOR; /* unexpected BIP keyidx */
- if (rx->sta)
- rx->key = rcu_dereference(rx->sta->gtk[mmie_keyidx]);
- if (!rx->key)
- rx->key = rcu_dereference(rx->sdata->keys[mmie_keyidx]);
- } else if (!ieee80211_has_protected(fc)) {
- /*
- * The frame was not protected, so skip decryption. However, we
- * need to set rx->key if there is a key that could have been
- * used so that the frame may be dropped if encryption would
- * have been expected.
- */
- struct ieee80211_key *key = NULL;
- struct ieee80211_sub_if_data *sdata = rx->sdata;
- int i;
-
- if (ieee80211_is_mgmt(fc) &&
- is_multicast_ether_addr(hdr->addr1) &&
- (key = rcu_dereference(rx->sdata->default_mgmt_key)))
- rx->key = key;
- else {
- if (rx->sta) {
- for (i = 0; i < NUM_DEFAULT_KEYS; i++) {
- key = rcu_dereference(rx->sta->gtk[i]);
- if (key)
- break;
- }
- }
- if (!key) {
- for (i = 0; i < NUM_DEFAULT_KEYS; i++) {
- key = rcu_dereference(sdata->keys[i]);
- if (key)
- break;
- }
- }
- if (key)
- rx->key = key;
- }
- return RX_CONTINUE;
- } else {
- u8 keyid;
- /*
- * The device doesn't give us the IV so we won't be
- * able to look up the key. That's ok though, we
- * don't need to decrypt the frame, we just won't
- * be able to keep statistics accurate.
- * Except for key threshold notifications, should
- * we somehow allow the driver to tell us which key
- * the hardware used if this flag is set?
- */
- if ((status->flag & RX_FLAG_DECRYPTED) &&
- (status->flag & RX_FLAG_IV_STRIPPED))
- return RX_CONTINUE;
-
- hdrlen = ieee80211_hdrlen(fc);
-
- if (rx->skb->len < 8 + hdrlen)
- return RX_DROP_UNUSABLE; /* TODO: count this? */
-
- /*
- * no need to call ieee80211_wep_get_keyidx,
- * it verifies a bunch of things we've done already
- */
- skb_copy_bits(rx->skb, hdrlen + 3, &keyid, 1);
- keyidx = keyid >> 6;
-
- /* check per-station GTK first, if multicast packet */
- if (is_multicast_ether_addr(hdr->addr1) && rx->sta)
- rx->key = rcu_dereference(rx->sta->gtk[keyidx]);
-
- /* if not found, try default key */
- if (!rx->key) {
- rx->key = rcu_dereference(rx->sdata->keys[keyidx]);
-
- /*
- * RSNA-protected unicast frames should always be
- * sent with pairwise or station-to-station keys,
- * but for WEP we allow using a key index as well.
- */
- if (rx->key &&
- rx->key->conf.cipher != WLAN_CIPHER_SUITE_WEP40 &&
- rx->key->conf.cipher != WLAN_CIPHER_SUITE_WEP104 &&
- !is_multicast_ether_addr(hdr->addr1))
- rx->key = NULL;
- }
- }
-
- if (rx->key) {
- if (unlikely(rx->key->flags & KEY_FLAG_TAINTED))
- return RX_DROP_MONITOR;
-
- rx->key->tx_rx_count++;
- /* TODO: add threshold stuff again */
- } else {
- return RX_DROP_MONITOR;
- }
-
- switch (rx->key->conf.cipher) {
- case WLAN_CIPHER_SUITE_WEP40:
- case WLAN_CIPHER_SUITE_WEP104:
- result = ieee80211_crypto_wep_decrypt(rx);
- break;
- case WLAN_CIPHER_SUITE_TKIP:
- result = ieee80211_crypto_tkip_decrypt(rx);
- break;
- case WLAN_CIPHER_SUITE_CCMP:
- result = ieee80211_crypto_ccmp_decrypt(rx);
- break;
- case WLAN_CIPHER_SUITE_AES_CMAC:
- result = ieee80211_crypto_aes_cmac_decrypt(rx);
- break;
- default:
- /*
- * We can reach here only with HW-only algorithms
- * but why didn't it decrypt the frame?!
- */
- return RX_DROP_UNUSABLE;
- }
-
- /* the hdr variable is invalid after the decrypt handlers */
-
- /* either the frame has been decrypted or will be dropped */
- status->flag |= RX_FLAG_DECRYPTED;
-
- return result;
-}
-
-static ieee80211_rx_result debug_noinline
ieee80211_rx_h_check_more_data(struct ieee80211_rx_data *rx)
{
struct ieee80211_local *local;
@@ -1513,6 +1355,207 @@ ieee80211_rx_h_sta_process(struct ieee80211_rx_data *rx)
return RX_CONTINUE;
} /* ieee80211_rx_h_sta_process */
+static ieee80211_rx_result debug_noinline
+ieee80211_rx_h_decrypt(struct ieee80211_rx_data *rx)
+{
+ struct sk_buff *skb = rx->skb;
+ struct ieee80211_rx_status *status = IEEE80211_SKB_RXCB(skb);
+ struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)skb->data;
+ int keyidx;
+ int hdrlen;
+ ieee80211_rx_result result = RX_DROP_UNUSABLE;
+ struct ieee80211_key *sta_ptk = NULL;
+ int mmie_keyidx = -1;
+ __le16 fc;
+
+ /*
+ * Key selection 101
+ *
+ * There are four types of keys:
+ * - GTK (group keys)
+ * - IGTK (group keys for management frames)
+ * - PTK (pairwise keys)
+ * - STK (station-to-station pairwise keys)
+ *
+ * When selecting a key, we have to distinguish between multicast
+ * (including broadcast) and unicast frames, the latter can only
+ * use PTKs and STKs while the former always use GTKs and IGTKs.
+ * Unless, of course, actual WEP keys ("pre-RSNA") are used, then
+ * unicast frames can also use key indices like GTKs. Hence, if we
+ * don't have a PTK/STK we check the key index for a WEP key.
+ *
+ * Note that in a regular BSS, multicast frames are sent by the
+ * AP only, associated stations unicast the frame to the AP first
+ * which then multicasts it on their behalf.
+ *
+ * There is also a slight problem in IBSS mode: GTKs are negotiated
+ * with each station, that is something we don't currently handle.
+ * The spec seems to expect that one negotiates the same key with
+ * every station but there's no such requirement; VLANs could be
+ * possible.
+ */
+
+ /*
+ * No point in finding a key and decrypting if the frame is neither
+ * addressed to us nor a multicast frame.
+ */
+ if (!(status->rx_flags & IEEE80211_RX_RA_MATCH))
+ return RX_CONTINUE;
+
+ /* start without a key */
+ rx->key = NULL;
+
+ if (rx->sta)
+ sta_ptk = rcu_dereference(rx->sta->ptk);
+
+ fc = hdr->frame_control;
+
+ if (!ieee80211_has_protected(fc))
+ mmie_keyidx = ieee80211_get_mmie_keyidx(rx->skb);
+
+ if (!is_multicast_ether_addr(hdr->addr1) && sta_ptk) {
+ rx->key = sta_ptk;
+ if ((status->flag & RX_FLAG_DECRYPTED) &&
+ (status->flag & RX_FLAG_IV_STRIPPED))
+ return RX_CONTINUE;
+ /* Skip decryption if the frame is not protected. */
+ if (!ieee80211_has_protected(fc))
+ return RX_CONTINUE;
+ } else if (mmie_keyidx >= 0) {
+ /* Broadcast/multicast robust management frame / BIP */
+ if ((status->flag & RX_FLAG_DECRYPTED) &&
+ (status->flag & RX_FLAG_IV_STRIPPED))
+ return RX_CONTINUE;
+
+ if (mmie_keyidx < NUM_DEFAULT_KEYS ||
+ mmie_keyidx >= NUM_DEFAULT_KEYS + NUM_DEFAULT_MGMT_KEYS)
+ return RX_DROP_MONITOR; /* unexpected BIP keyidx */
+ if (rx->sta)
+ rx->key = rcu_dereference(rx->sta->gtk[mmie_keyidx]);
+ if (!rx->key)
+ rx->key = rcu_dereference(rx->sdata->keys[mmie_keyidx]);
+ } else if (!ieee80211_has_protected(fc)) {
+ /*
+ * The frame was not protected, so skip decryption. However, we
+ * need to set rx->key if there is a key that could have been
+ * used so that the frame may be dropped if encryption would
+ * have been expected.
+ */
+ struct ieee80211_key *key = NULL;
+ struct ieee80211_sub_if_data *sdata = rx->sdata;
+ int i;
+
+ if (ieee80211_is_mgmt(fc) &&
+ is_multicast_ether_addr(hdr->addr1) &&
+ (key = rcu_dereference(rx->sdata->default_mgmt_key)))
+ rx->key = key;
+ else {
+ if (rx->sta) {
+ for (i = 0; i < NUM_DEFAULT_KEYS; i++) {
+ key = rcu_dereference(rx->sta->gtk[i]);
+ if (key)
+ break;
+ }
+ }
+ if (!key) {
+ for (i = 0; i < NUM_DEFAULT_KEYS; i++) {
+ key = rcu_dereference(sdata->keys[i]);
+ if (key)
+ break;
+ }
+ }
+ if (key)
+ rx->key = key;
+ }
+ return RX_CONTINUE;
+ } else {
+ u8 keyid;
+ /*
+ * The device doesn't give us the IV so we won't be
+ * able to look up the key. That's ok though, we
+ * don't need to decrypt the frame, we just won't
+ * be able to keep statistics accurate.
+ * Except for key threshold notifications, should
+ * we somehow allow the driver to tell us which key
+ * the hardware used if this flag is set?
+ */
+ if ((status->flag & RX_FLAG_DECRYPTED) &&
+ (status->flag & RX_FLAG_IV_STRIPPED))
+ return RX_CONTINUE;
+
+ hdrlen = ieee80211_hdrlen(fc);
+
+ if (rx->skb->len < 8 + hdrlen)
+ return RX_DROP_UNUSABLE; /* TODO: count this? */
+
+ /*
+ * no need to call ieee80211_wep_get_keyidx,
+ * it verifies a bunch of things we've done already
+ */
+ skb_copy_bits(rx->skb, hdrlen + 3, &keyid, 1);
+ keyidx = keyid >> 6;
+
+ /* check per-station GTK first, if multicast packet */
+ if (is_multicast_ether_addr(hdr->addr1) && rx->sta)
+ rx->key = rcu_dereference(rx->sta->gtk[keyidx]);
+
+ /* if not found, try default key */
+ if (!rx->key) {
+ rx->key = rcu_dereference(rx->sdata->keys[keyidx]);
+
+ /*
+ * RSNA-protected unicast frames should always be
+ * sent with pairwise or station-to-station keys,
+ * but for WEP we allow using a key index as well.
+ */
+ if (rx->key &&
+ rx->key->conf.cipher != WLAN_CIPHER_SUITE_WEP40 &&
+ rx->key->conf.cipher != WLAN_CIPHER_SUITE_WEP104 &&
+ !is_multicast_ether_addr(hdr->addr1))
+ rx->key = NULL;
+ }
+ }
+
+ if (rx->key) {
+ if (unlikely(rx->key->flags & KEY_FLAG_TAINTED))
+ return RX_DROP_MONITOR;
+
+ rx->key->tx_rx_count++;
+ /* TODO: add threshold stuff again */
+ } else {
+ return RX_DROP_MONITOR;
+ }
+
+ switch (rx->key->conf.cipher) {
+ case WLAN_CIPHER_SUITE_WEP40:
+ case WLAN_CIPHER_SUITE_WEP104:
+ result = ieee80211_crypto_wep_decrypt(rx);
+ break;
+ case WLAN_CIPHER_SUITE_TKIP:
+ result = ieee80211_crypto_tkip_decrypt(rx);
+ break;
+ case WLAN_CIPHER_SUITE_CCMP:
+ result = ieee80211_crypto_ccmp_decrypt(rx);
+ break;
+ case WLAN_CIPHER_SUITE_AES_CMAC:
+ result = ieee80211_crypto_aes_cmac_decrypt(rx);
+ break;
+ default:
+ /*
+ * We can reach here only with HW-only algorithms
+ * but why didn't it decrypt the frame?!
+ */
+ return RX_DROP_UNUSABLE;
+ }
+
+ /* the hdr variable is invalid after the decrypt handlers */
+
+ /* either the frame has been decrypted or will be dropped */
+ status->flag |= RX_FLAG_DECRYPTED;
+
+ return result;
+}
+
static inline struct ieee80211_fragment_entry *
ieee80211_reassemble_add(struct ieee80211_sub_if_data *sdata,
unsigned int frag, unsigned int seq, int rx_queue,
@@ -2641,8 +2684,7 @@ ieee80211_rx_h_userspace_mgmt(struct ieee80211_rx_data *rx)
sig = status->signal;
if (cfg80211_rx_mgmt(&rx->sdata->wdev, status->freq, sig,
- rx->skb->data, rx->skb->len,
- GFP_ATOMIC)) {
+ rx->skb->data, rx->skb->len, 0, GFP_ATOMIC)) {
if (rx->sta)
rx->sta->rx_packets++;
dev_kfree_skb(rx->skb);
@@ -2896,10 +2938,10 @@ static void ieee80211_rx_handlers(struct ieee80211_rx_data *rx,
*/
rx->skb = skb;
- CALL_RXH(ieee80211_rx_h_decrypt)
CALL_RXH(ieee80211_rx_h_check_more_data)
CALL_RXH(ieee80211_rx_h_uapsd_and_pspoll)
CALL_RXH(ieee80211_rx_h_sta_process)
+ CALL_RXH(ieee80211_rx_h_decrypt)
CALL_RXH(ieee80211_rx_h_defragment)
CALL_RXH(ieee80211_rx_h_michael_mic_verify)
/* must be after MMIC verify so header is counted in MPDU mic */
@@ -3014,6 +3056,9 @@ static int prepare_for_handlers(struct ieee80211_rx_data *rx,
case NL80211_IFTYPE_ADHOC:
if (!bssid)
return 0;
+ if (ether_addr_equal(sdata->vif.addr, hdr->addr2) ||
+ ether_addr_equal(sdata->u.ibss.bssid, hdr->addr2))
+ return 0;
if (ieee80211_is_beacon(hdr->frame_control)) {
return 1;
} else if (!ieee80211_bssid_match(bssid, sdata->u.ibss.bssid)) {
diff --git a/net/mac80211/scan.c b/net/mac80211/scan.c
index 1b122a79b0d8..d2d17a449224 100644
--- a/net/mac80211/scan.c
+++ b/net/mac80211/scan.c
@@ -66,6 +66,7 @@ ieee80211_bss_info_update(struct ieee80211_local *local,
struct cfg80211_bss *cbss;
struct ieee80211_bss *bss;
int clen, srlen;
+ enum nl80211_bss_scan_width scan_width;
s32 signal = 0;
if (local->hw.flags & IEEE80211_HW_SIGNAL_DBM)
@@ -73,8 +74,15 @@ ieee80211_bss_info_update(struct ieee80211_local *local,
else if (local->hw.flags & IEEE80211_HW_SIGNAL_UNSPEC)
signal = (rx_status->signal * 100) / local->hw.max_signal;
- cbss = cfg80211_inform_bss_frame(local->hw.wiphy, channel,
- mgmt, len, signal, GFP_ATOMIC);
+ scan_width = NL80211_BSS_CHAN_WIDTH_20;
+ if (rx_status->flag & RX_FLAG_5MHZ)
+ scan_width = NL80211_BSS_CHAN_WIDTH_5;
+ if (rx_status->flag & RX_FLAG_10MHZ)
+ scan_width = NL80211_BSS_CHAN_WIDTH_10;
+
+ cbss = cfg80211_inform_bss_width_frame(local->hw.wiphy, channel,
+ scan_width, mgmt, len, signal,
+ GFP_ATOMIC);
if (!cbss)
return NULL;
@@ -204,13 +212,35 @@ void ieee80211_scan_rx(struct ieee80211_local *local, struct sk_buff *skb)
ieee80211_rx_bss_put(local, bss);
}
+static void
+ieee80211_prepare_scan_chandef(struct cfg80211_chan_def *chandef,
+ enum nl80211_bss_scan_width scan_width)
+{
+ memset(chandef, 0, sizeof(*chandef));
+ switch (scan_width) {
+ case NL80211_BSS_CHAN_WIDTH_5:
+ chandef->width = NL80211_CHAN_WIDTH_5;
+ break;
+ case NL80211_BSS_CHAN_WIDTH_10:
+ chandef->width = NL80211_CHAN_WIDTH_10;
+ break;
+ default:
+ chandef->width = NL80211_CHAN_WIDTH_20_NOHT;
+ break;
+ }
+}
+
/* return false if no more work */
static bool ieee80211_prep_hw_scan(struct ieee80211_local *local)
{
struct cfg80211_scan_request *req = local->scan_req;
+ struct cfg80211_chan_def chandef;
enum ieee80211_band band;
int i, ielen, n_chans;
+ if (test_bit(SCAN_HW_CANCELLED, &local->scanning))
+ return false;
+
do {
if (local->hw_scan_band == IEEE80211_NUM_BANDS)
return false;
@@ -229,11 +259,12 @@ static bool ieee80211_prep_hw_scan(struct ieee80211_local *local)
} while (!n_chans);
local->hw_scan_req->n_channels = n_chans;
+ ieee80211_prepare_scan_chandef(&chandef, req->scan_width);
ielen = ieee80211_build_preq_ies(local, (u8 *)local->hw_scan_req->ie,
local->hw_scan_ies_bufsize,
req->ie, req->ie_len, band,
- req->rates[band], 0);
+ req->rates[band], &chandef);
local->hw_scan_req->ie_len = ielen;
local->hw_scan_req->no_cck = req->no_cck;
@@ -280,7 +311,7 @@ static void __ieee80211_scan_completed(struct ieee80211_hw *hw, bool aborted,
rcu_assign_pointer(local->scan_sdata, NULL);
local->scanning = 0;
- local->scan_channel = NULL;
+ local->scan_chandef.chan = NULL;
/* Set power back to normal operating levels. */
ieee80211_hw_config(local, 0);
@@ -615,11 +646,34 @@ static void ieee80211_scan_state_set_channel(struct ieee80211_local *local,
{
int skip;
struct ieee80211_channel *chan;
+ enum nl80211_bss_scan_width oper_scan_width;
skip = 0;
chan = local->scan_req->channels[local->scan_channel_idx];
- local->scan_channel = chan;
+ local->scan_chandef.chan = chan;
+ local->scan_chandef.center_freq1 = chan->center_freq;
+ local->scan_chandef.center_freq2 = 0;
+ switch (local->scan_req->scan_width) {
+ case NL80211_BSS_CHAN_WIDTH_5:
+ local->scan_chandef.width = NL80211_CHAN_WIDTH_5;
+ break;
+ case NL80211_BSS_CHAN_WIDTH_10:
+ local->scan_chandef.width = NL80211_CHAN_WIDTH_10;
+ break;
+ case NL80211_BSS_CHAN_WIDTH_20:
+ /* If scanning on oper channel, use whatever channel-type
+ * is currently in use.
+ */
+ oper_scan_width = cfg80211_chandef_to_scan_width(
+ &local->_oper_chandef);
+ if (chan == local->_oper_chandef.chan &&
+ oper_scan_width == local->scan_req->scan_width)
+ local->scan_chandef = local->_oper_chandef;
+ else
+ local->scan_chandef.width = NL80211_CHAN_WIDTH_20_NOHT;
+ break;
+ }
if (ieee80211_hw_config(local, IEEE80211_CONF_CHANGE_CHANNEL))
skip = 1;
@@ -659,7 +713,7 @@ static void ieee80211_scan_state_suspend(struct ieee80211_local *local,
unsigned long *next_delay)
{
/* switch back to the operating channel */
- local->scan_channel = NULL;
+ local->scan_chandef.chan = NULL;
ieee80211_hw_config(local, IEEE80211_CONF_CHANGE_CHANNEL);
/* disable PS */
@@ -801,7 +855,8 @@ int ieee80211_request_scan(struct ieee80211_sub_if_data *sdata,
int ieee80211_request_ibss_scan(struct ieee80211_sub_if_data *sdata,
const u8 *ssid, u8 ssid_len,
- struct ieee80211_channel *chan)
+ struct ieee80211_channel *chan,
+ enum nl80211_bss_scan_width scan_width)
{
struct ieee80211_local *local = sdata->local;
int ret = -EBUSY;
@@ -851,6 +906,7 @@ int ieee80211_request_ibss_scan(struct ieee80211_sub_if_data *sdata,
local->int_scan_req->ssids = &local->scan_ssid;
local->int_scan_req->n_ssids = 1;
+ local->int_scan_req->scan_width = scan_width;
memcpy(local->int_scan_req->ssids[0].ssid, ssid, IEEE80211_MAX_SSID_LEN);
local->int_scan_req->ssids[0].ssid_len = ssid_len;
@@ -887,7 +943,23 @@ void ieee80211_scan_cancel(struct ieee80211_local *local)
if (!local->scan_req)
goto out;
+ /*
+ * We have a scan running and the driver already reported completion,
+ * but the worker hasn't run yet or is stuck on the mutex - mark it as
+ * cancelled.
+ */
+ if (test_bit(SCAN_HW_SCANNING, &local->scanning) &&
+ test_bit(SCAN_COMPLETED, &local->scanning)) {
+ set_bit(SCAN_HW_CANCELLED, &local->scanning);
+ goto out;
+ }
+
if (test_bit(SCAN_HW_SCANNING, &local->scanning)) {
+ /*
+ * Make sure that __ieee80211_scan_completed doesn't trigger a
+ * scan on another band.
+ */
+ set_bit(SCAN_HW_CANCELLED, &local->scanning);
if (local->ops->cancel_hw_scan)
drv_cancel_hw_scan(local,
rcu_dereference_protected(local->scan_sdata,
@@ -912,6 +984,7 @@ int ieee80211_request_sched_scan_start(struct ieee80211_sub_if_data *sdata,
{
struct ieee80211_local *local = sdata->local;
struct ieee80211_sched_scan_ies sched_scan_ies = {};
+ struct cfg80211_chan_def chandef;
int ret, i, iebufsz;
iebufsz = 2 + IEEE80211_MAX_SSID_LEN +
@@ -939,10 +1012,12 @@ int ieee80211_request_sched_scan_start(struct ieee80211_sub_if_data *sdata,
goto out_free;
}
+ ieee80211_prepare_scan_chandef(&chandef, req->scan_width);
+
sched_scan_ies.len[i] =
ieee80211_build_preq_ies(local, sched_scan_ies.ie[i],
iebufsz, req->ie, req->ie_len,
- i, (u32) -1, 0);
+ i, (u32) -1, &chandef);
}
ret = drv_sched_scan_start(local, sdata, req, &sched_scan_ies);
diff --git a/net/mac80211/status.c b/net/mac80211/status.c
index 43439203f4e4..78dc2e99027e 100644
--- a/net/mac80211/status.c
+++ b/net/mac80211/status.c
@@ -180,6 +180,9 @@ static void ieee80211_frame_acked(struct sta_info *sta, struct sk_buff *skb)
struct ieee80211_local *local = sta->local;
struct ieee80211_sub_if_data *sdata = sta->sdata;
+ if (local->hw.flags & IEEE80211_HW_REPORTS_TX_ACK_STATUS)
+ sta->last_rx = jiffies;
+
if (ieee80211_is_data_qos(mgmt->frame_control)) {
struct ieee80211_hdr *hdr = (void *) skb->data;
u8 *qc = ieee80211_get_qos_ctl(hdr);
@@ -235,7 +238,8 @@ static int ieee80211_tx_radiotap_len(struct ieee80211_tx_info *info)
/* IEEE80211_RADIOTAP_RATE rate */
if (info->status.rates[0].idx >= 0 &&
- !(info->status.rates[0].flags & IEEE80211_TX_RC_MCS))
+ !(info->status.rates[0].flags & (IEEE80211_TX_RC_MCS |
+ IEEE80211_TX_RC_VHT_MCS)))
len += 2;
/* IEEE80211_RADIOTAP_TX_FLAGS */
@@ -244,17 +248,23 @@ static int ieee80211_tx_radiotap_len(struct ieee80211_tx_info *info)
/* IEEE80211_RADIOTAP_DATA_RETRIES */
len += 1;
- /* IEEE80211_TX_RC_MCS */
- if (info->status.rates[0].idx >= 0 &&
- info->status.rates[0].flags & IEEE80211_TX_RC_MCS)
- len += 3;
+ /* IEEE80211_RADIOTAP_MCS
+ * IEEE80211_RADIOTAP_VHT */
+ if (info->status.rates[0].idx >= 0) {
+ if (info->status.rates[0].flags & IEEE80211_TX_RC_MCS)
+ len += 3;
+ else if (info->status.rates[0].flags & IEEE80211_TX_RC_VHT_MCS)
+ len = ALIGN(len, 2) + 12;
+ }
return len;
}
-static void ieee80211_add_tx_radiotap_header(struct ieee80211_supported_band
- *sband, struct sk_buff *skb,
- int retry_count, int rtap_len)
+static void
+ieee80211_add_tx_radiotap_header(struct ieee80211_local *local,
+ struct ieee80211_supported_band *sband,
+ struct sk_buff *skb, int retry_count,
+ int rtap_len, int shift)
{
struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb);
struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) skb->data;
@@ -279,9 +289,13 @@ static void ieee80211_add_tx_radiotap_header(struct ieee80211_supported_band
/* IEEE80211_RADIOTAP_RATE */
if (info->status.rates[0].idx >= 0 &&
- !(info->status.rates[0].flags & IEEE80211_TX_RC_MCS)) {
+ !(info->status.rates[0].flags & (IEEE80211_TX_RC_MCS |
+ IEEE80211_TX_RC_VHT_MCS))) {
+ u16 rate;
+
rthdr->it_present |= cpu_to_le32(1 << IEEE80211_RADIOTAP_RATE);
- *pos = sband->bitrates[info->status.rates[0].idx].bitrate / 5;
+ rate = sband->bitrates[info->status.rates[0].idx].bitrate;
+ *pos = DIV_ROUND_UP(rate, 5 * (1 << shift));
/* padding for tx flags */
pos += 2;
}
@@ -306,9 +320,12 @@ static void ieee80211_add_tx_radiotap_header(struct ieee80211_supported_band
*pos = retry_count;
pos++;
- /* IEEE80211_TX_RC_MCS */
- if (info->status.rates[0].idx >= 0 &&
- info->status.rates[0].flags & IEEE80211_TX_RC_MCS) {
+ if (info->status.rates[0].idx < 0)
+ return;
+
+ /* IEEE80211_RADIOTAP_MCS
+ * IEEE80211_RADIOTAP_VHT */
+ if (info->status.rates[0].flags & IEEE80211_TX_RC_MCS) {
rthdr->it_present |= cpu_to_le32(1 << IEEE80211_RADIOTAP_MCS);
pos[0] = IEEE80211_RADIOTAP_MCS_HAVE_MCS |
IEEE80211_RADIOTAP_MCS_HAVE_GI |
@@ -321,8 +338,48 @@ static void ieee80211_add_tx_radiotap_header(struct ieee80211_supported_band
pos[1] |= IEEE80211_RADIOTAP_MCS_FMT_GF;
pos[2] = info->status.rates[0].idx;
pos += 3;
- }
+ } else if (info->status.rates[0].flags & IEEE80211_TX_RC_VHT_MCS) {
+ u16 known = local->hw.radiotap_vht_details &
+ (IEEE80211_RADIOTAP_VHT_KNOWN_GI |
+ IEEE80211_RADIOTAP_VHT_KNOWN_BANDWIDTH);
+
+ rthdr->it_present |= cpu_to_le32(1 << IEEE80211_RADIOTAP_VHT);
+ /* required alignment from rthdr */
+ pos = (u8 *)rthdr + ALIGN(pos - (u8 *)rthdr, 2);
+
+ /* u16 known - IEEE80211_RADIOTAP_VHT_KNOWN_* */
+ put_unaligned_le16(known, pos);
+ pos += 2;
+
+ /* u8 flags - IEEE80211_RADIOTAP_VHT_FLAG_* */
+ if (info->status.rates[0].flags & IEEE80211_TX_RC_SHORT_GI)
+ *pos |= IEEE80211_RADIOTAP_VHT_FLAG_SGI;
+ pos++;
+
+ /* u8 bandwidth */
+ if (info->status.rates[0].flags & IEEE80211_TX_RC_40_MHZ_WIDTH)
+ *pos = 1;
+ else if (info->status.rates[0].flags & IEEE80211_TX_RC_80_MHZ_WIDTH)
+ *pos = 4;
+ else if (info->status.rates[0].flags & IEEE80211_TX_RC_160_MHZ_WIDTH)
+ *pos = 11;
+ else /* IEEE80211_TX_RC_{20_MHZ_WIDTH,FIXME:DUP_DATA} */
+ *pos = 0;
+ pos++;
+
+ /* u8 mcs_nss[4] */
+ *pos = (ieee80211_rate_get_vht_mcs(&info->status.rates[0]) << 4) |
+ ieee80211_rate_get_vht_nss(&info->status.rates[0]);
+ pos += 4;
+
+ /* u8 coding */
+ pos++;
+ /* u8 group_id */
+ pos++;
+ /* u16 partial_aid */
+ pos += 2;
+ }
}
static void ieee80211_report_used_skb(struct ieee80211_local *local,
@@ -424,6 +481,7 @@ void ieee80211_tx_status(struct ieee80211_hw *hw, struct sk_buff *skb)
bool acked;
struct ieee80211_bar *bar;
int rtap_len;
+ int shift = 0;
for (i = 0; i < IEEE80211_TX_MAX_RATES; i++) {
if ((info->flags & IEEE80211_TX_CTL_AMPDU) &&
@@ -458,6 +516,8 @@ void ieee80211_tx_status(struct ieee80211_hw *hw, struct sk_buff *skb)
if (!ether_addr_equal(hdr->addr2, sta->sdata->vif.addr))
continue;
+ shift = ieee80211_vif_get_shift(&sta->sdata->vif);
+
if (info->flags & IEEE80211_TX_STATUS_EOSP)
clear_sta_flag(sta, WLAN_STA_SP);
@@ -557,7 +617,7 @@ void ieee80211_tx_status(struct ieee80211_hw *hw, struct sk_buff *skb)
rcu_read_unlock();
- ieee80211_led_tx(local, 0);
+ ieee80211_led_tx(local);
/* SNMP counters
* Fragments are passed to low-level drivers as separate skbs, so these
@@ -624,7 +684,8 @@ void ieee80211_tx_status(struct ieee80211_hw *hw, struct sk_buff *skb)
dev_kfree_skb(skb);
return;
}
- ieee80211_add_tx_radiotap_header(sband, skb, retry_count, rtap_len);
+ ieee80211_add_tx_radiotap_header(local, sband, skb, retry_count,
+ rtap_len, shift);
/* XXX: is this sufficient for BPF? */
skb_set_mac_header(skb, 0);
diff --git a/net/mac80211/trace.h b/net/mac80211/trace.h
index c215fafd7a2f..1aba645882bd 100644
--- a/net/mac80211/trace.h
+++ b/net/mac80211/trace.h
@@ -1906,6 +1906,32 @@ TRACE_EVENT(api_radar_detected,
)
);
+TRACE_EVENT(drv_channel_switch_beacon,
+ TP_PROTO(struct ieee80211_local *local,
+ struct ieee80211_sub_if_data *sdata,
+ struct cfg80211_chan_def *chandef),
+
+ TP_ARGS(local, sdata, chandef),
+
+ TP_STRUCT__entry(
+ LOCAL_ENTRY
+ VIF_ENTRY
+ CHANDEF_ENTRY
+ ),
+
+ TP_fast_assign(
+ LOCAL_ASSIGN;
+ VIF_ASSIGN;
+ CHANDEF_ASSIGN(chandef);
+ ),
+
+ TP_printk(
+ LOCAL_PR_FMT VIF_PR_FMT " channel switch to " CHANDEF_PR_FMT,
+ LOCAL_PR_ARG, VIF_PR_ARG, CHANDEF_PR_ARG
+ )
+);
+
+
#ifdef CONFIG_MAC80211_MESSAGE_TRACING
#undef TRACE_SYSTEM
#define TRACE_SYSTEM mac80211_msg
diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c
index 4105d0ca963e..70b5a05c0a4e 100644
--- a/net/mac80211/tx.c
+++ b/net/mac80211/tx.c
@@ -40,12 +40,22 @@ static __le16 ieee80211_duration(struct ieee80211_tx_data *tx,
struct sk_buff *skb, int group_addr,
int next_frag_len)
{
- int rate, mrate, erp, dur, i;
+ int rate, mrate, erp, dur, i, shift = 0;
struct ieee80211_rate *txrate;
struct ieee80211_local *local = tx->local;
struct ieee80211_supported_band *sband;
struct ieee80211_hdr *hdr;
struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb);
+ struct ieee80211_chanctx_conf *chanctx_conf;
+ u32 rate_flags = 0;
+
+ rcu_read_lock();
+ chanctx_conf = rcu_dereference(tx->sdata->vif.chanctx_conf);
+ if (chanctx_conf) {
+ shift = ieee80211_chandef_get_shift(&chanctx_conf->def);
+ rate_flags = ieee80211_chandef_rate_flags(&chanctx_conf->def);
+ }
+ rcu_read_unlock();
/* assume HW handles this */
if (tx->rate.flags & IEEE80211_TX_RC_MCS)
@@ -122,8 +132,11 @@ static __le16 ieee80211_duration(struct ieee80211_tx_data *tx,
if (r->bitrate > txrate->bitrate)
break;
+ if ((rate_flags & r->flags) != rate_flags)
+ continue;
+
if (tx->sdata->vif.bss_conf.basic_rates & BIT(i))
- rate = r->bitrate;
+ rate = DIV_ROUND_UP(r->bitrate, 1 << shift);
switch (sband->band) {
case IEEE80211_BAND_2GHZ: {
@@ -150,7 +163,7 @@ static __le16 ieee80211_duration(struct ieee80211_tx_data *tx,
if (rate == -1) {
/* No matching basic rate found; use highest suitable mandatory
* PHY rate */
- rate = mrate;
+ rate = DIV_ROUND_UP(mrate, 1 << shift);
}
/* Don't calculate ACKs for QoS Frames with NoAck Policy set */
@@ -162,7 +175,8 @@ static __le16 ieee80211_duration(struct ieee80211_tx_data *tx,
* (10 bytes + 4-byte FCS = 112 bits) plus SIFS; rounded up
* to closest integer */
dur = ieee80211_frame_duration(sband->band, 10, rate, erp,
- tx->sdata->vif.bss_conf.use_short_preamble);
+ tx->sdata->vif.bss_conf.use_short_preamble,
+ shift);
if (next_frag_len) {
/* Frame is fragmented: duration increases with time needed to
@@ -171,7 +185,8 @@ static __le16 ieee80211_duration(struct ieee80211_tx_data *tx,
/* next fragment */
dur += ieee80211_frame_duration(sband->band, next_frag_len,
txrate->bitrate, erp,
- tx->sdata->vif.bss_conf.use_short_preamble);
+ tx->sdata->vif.bss_conf.use_short_preamble,
+ shift);
}
return cpu_to_le16(dur);
@@ -524,9 +539,11 @@ ieee80211_tx_h_check_control_port_protocol(struct ieee80211_tx_data *tx)
{
struct ieee80211_tx_info *info = IEEE80211_SKB_CB(tx->skb);
- if (unlikely(tx->sdata->control_port_protocol == tx->skb->protocol &&
- tx->sdata->control_port_no_encrypt))
- info->flags |= IEEE80211_TX_INTFL_DONT_ENCRYPT;
+ if (unlikely(tx->sdata->control_port_protocol == tx->skb->protocol)) {
+ if (tx->sdata->control_port_no_encrypt)
+ info->flags |= IEEE80211_TX_INTFL_DONT_ENCRYPT;
+ info->control.flags |= IEEE80211_TX_CTRL_PORT_CTRL_PROTO;
+ }
return TX_CONTINUE;
}
@@ -764,9 +781,11 @@ ieee80211_tx_h_sequence(struct ieee80211_tx_data *tx)
/*
* Anything but QoS data that has a sequence number field
* (is long enough) gets a sequence number from the global
- * counter.
+ * counter. QoS data frames with a multicast destination
+ * also use the global counter (802.11-2012 9.3.2.10).
*/
- if (!ieee80211_is_data_qos(hdr->frame_control)) {
+ if (!ieee80211_is_data_qos(hdr->frame_control) ||
+ is_multicast_ether_addr(hdr->addr1)) {
/* driver should assign sequence number */
info->flags |= IEEE80211_TX_CTL_ASSIGN_SEQ;
/* for pure STA mode without beacons, we can do it */
@@ -1101,7 +1120,8 @@ ieee80211_tx_prepare(struct ieee80211_sub_if_data *sdata,
tx->sta = rcu_dereference(sdata->u.vlan.sta);
if (!tx->sta && sdata->dev->ieee80211_ptr->use_4addr)
return TX_DROP;
- } else if (info->flags & IEEE80211_TX_CTL_INJECTED ||
+ } else if (info->flags & (IEEE80211_TX_CTL_INJECTED |
+ IEEE80211_TX_INTFL_NL80211_FRAME_TX) ||
tx->sdata->control_port_protocol == tx->skb->protocol) {
tx->sta = sta_info_get_bss(sdata, hdr->addr1);
}
@@ -1257,6 +1277,10 @@ static bool __ieee80211_tx(struct ieee80211_local *local,
switch (sdata->vif.type) {
case NL80211_IFTYPE_MONITOR:
+ if (sdata->u.mntr_flags & MONITOR_FLAG_ACTIVE) {
+ vif = &sdata->vif;
+ break;
+ }
sdata = rcu_dereference(local->monitor_sdata);
if (sdata) {
vif = &sdata->vif;
@@ -1281,7 +1305,6 @@ static bool __ieee80211_tx(struct ieee80211_local *local,
txpending);
ieee80211_tpt_led_trig_tx(local, fc, led_len);
- ieee80211_led_tx(local, 1);
WARN_ON_ONCE(!skb_queue_empty(skbs));
@@ -2320,6 +2343,81 @@ static int ieee80211_beacon_add_tim(struct ieee80211_sub_if_data *sdata,
return 0;
}
+void ieee80211_csa_finish(struct ieee80211_vif *vif)
+{
+ struct ieee80211_sub_if_data *sdata = vif_to_sdata(vif);
+
+ ieee80211_queue_work(&sdata->local->hw,
+ &sdata->csa_finalize_work);
+}
+EXPORT_SYMBOL(ieee80211_csa_finish);
+
+static void ieee80211_update_csa(struct ieee80211_sub_if_data *sdata,
+ struct beacon_data *beacon)
+{
+ struct probe_resp *resp;
+ int counter_offset_beacon = sdata->csa_counter_offset_beacon;
+ int counter_offset_presp = sdata->csa_counter_offset_presp;
+
+ /* warn if the driver did not check for/react to csa completeness */
+ if (WARN_ON(((u8 *)beacon->tail)[counter_offset_beacon] == 0))
+ return;
+
+ ((u8 *)beacon->tail)[counter_offset_beacon]--;
+
+ if (sdata->vif.type == NL80211_IFTYPE_AP &&
+ counter_offset_presp) {
+ rcu_read_lock();
+ resp = rcu_dereference(sdata->u.ap.probe_resp);
+
+ /* if nl80211 accepted the offset, this should not happen. */
+ if (WARN_ON(!resp)) {
+ rcu_read_unlock();
+ return;
+ }
+ resp->data[counter_offset_presp]--;
+ rcu_read_unlock();
+ }
+}
+
+bool ieee80211_csa_is_complete(struct ieee80211_vif *vif)
+{
+ struct ieee80211_sub_if_data *sdata = vif_to_sdata(vif);
+ struct beacon_data *beacon = NULL;
+ u8 *beacon_data;
+ size_t beacon_data_len;
+ int counter_beacon = sdata->csa_counter_offset_beacon;
+ int ret = false;
+
+ if (!ieee80211_sdata_running(sdata))
+ return false;
+
+ rcu_read_lock();
+ if (vif->type == NL80211_IFTYPE_AP) {
+ struct ieee80211_if_ap *ap = &sdata->u.ap;
+
+ beacon = rcu_dereference(ap->beacon);
+ if (WARN_ON(!beacon || !beacon->tail))
+ goto out;
+ beacon_data = beacon->tail;
+ beacon_data_len = beacon->tail_len;
+ } else {
+ WARN_ON(1);
+ goto out;
+ }
+
+ if (WARN_ON(counter_beacon > beacon_data_len))
+ goto out;
+
+ if (beacon_data[counter_beacon] == 0)
+ ret = true;
+ out:
+ rcu_read_unlock();
+
+ return ret;
+}
+EXPORT_SYMBOL(ieee80211_csa_is_complete);
+
struct sk_buff *ieee80211_beacon_get_tim(struct ieee80211_hw *hw,
struct ieee80211_vif *vif,
u16 *tim_offset, u16 *tim_length)
@@ -2350,6 +2448,9 @@ struct sk_buff *ieee80211_beacon_get_tim(struct ieee80211_hw *hw,
struct beacon_data *beacon = rcu_dereference(ap->beacon);
if (beacon) {
+ if (sdata->vif.csa_active)
+ ieee80211_update_csa(sdata, beacon);
+
/*
* headroom, head length,
* tail length and maximum TIM length
diff --git a/net/mac80211/util.c b/net/mac80211/util.c
index 22654452a561..69e4ef5348a0 100644
--- a/net/mac80211/util.c
+++ b/net/mac80211/util.c
@@ -107,7 +107,8 @@ void ieee80211_tx_set_protected(struct ieee80211_tx_data *tx)
}
int ieee80211_frame_duration(enum ieee80211_band band, size_t len,
- int rate, int erp, int short_preamble)
+ int rate, int erp, int short_preamble,
+ int shift)
{
int dur;
@@ -118,6 +119,9 @@ int ieee80211_frame_duration(enum ieee80211_band band, size_t len,
*
* rate is in 100 kbps, so divident is multiplied by 10 in the
* DIV_ROUND_UP() operations.
+ *
+ * shift may be 2 for 5 MHz channels or 1 for 10 MHz channels, and
+ * is assumed to be 0 otherwise.
*/
if (band == IEEE80211_BAND_5GHZ || erp) {
@@ -130,13 +134,23 @@ int ieee80211_frame_duration(enum ieee80211_band band, size_t len,
* TXTIME = T_PREAMBLE + T_SIGNAL + T_SYM x N_SYM + Signal Ext
*
* T_SYM = 4 usec
- * 802.11a - 17.5.2: aSIFSTime = 16 usec
+ * 802.11a - 18.5.2: aSIFSTime = 16 usec
* 802.11g - 19.8.4: aSIFSTime = 10 usec +
* signal ext = 6 usec
*/
dur = 16; /* SIFS + signal ext */
- dur += 16; /* 17.3.2.3: T_PREAMBLE = 16 usec */
- dur += 4; /* 17.3.2.3: T_SIGNAL = 4 usec */
+ dur += 16; /* IEEE 802.11-2012 18.3.2.4: T_PREAMBLE = 16 usec */
+ dur += 4; /* IEEE 802.11-2012 18.3.2.4: T_SIGNAL = 4 usec */
+
+ /* IEEE 802.11-2012 18.3.2.4: all values above are:
+ * * times 4 for 5 MHz
+ * * times 2 for 10 MHz
+ */
+ dur *= 1 << shift;
+
+ /* rates should already consider the channel bandwidth,
+ * don't apply divisor again.
+ */
dur += 4 * DIV_ROUND_UP((16 + 8 * (len + 4) + 6) * 10,
4 * rate); /* T_SYM x N_SYM */
} else {
@@ -168,7 +182,7 @@ __le16 ieee80211_generic_frame_duration(struct ieee80211_hw *hw,
{
struct ieee80211_sub_if_data *sdata;
u16 dur;
- int erp;
+ int erp, shift = 0;
bool short_preamble = false;
erp = 0;
@@ -177,10 +191,11 @@ __le16 ieee80211_generic_frame_duration(struct ieee80211_hw *hw,
short_preamble = sdata->vif.bss_conf.use_short_preamble;
if (sdata->flags & IEEE80211_SDATA_OPERATING_GMODE)
erp = rate->flags & IEEE80211_RATE_ERP_G;
+ shift = ieee80211_vif_get_shift(vif);
}
dur = ieee80211_frame_duration(band, frame_len, rate->bitrate, erp,
- short_preamble);
+ short_preamble, shift);
return cpu_to_le16(dur);
}
@@ -194,7 +209,7 @@ __le16 ieee80211_rts_duration(struct ieee80211_hw *hw,
struct ieee80211_rate *rate;
struct ieee80211_sub_if_data *sdata;
bool short_preamble;
- int erp;
+ int erp, shift = 0, bitrate;
u16 dur;
struct ieee80211_supported_band *sband;
@@ -210,17 +225,20 @@ __le16 ieee80211_rts_duration(struct ieee80211_hw *hw,
short_preamble = sdata->vif.bss_conf.use_short_preamble;
if (sdata->flags & IEEE80211_SDATA_OPERATING_GMODE)
erp = rate->flags & IEEE80211_RATE_ERP_G;
+ shift = ieee80211_vif_get_shift(vif);
}
+ bitrate = DIV_ROUND_UP(rate->bitrate, 1 << shift);
+
/* CTS duration */
- dur = ieee80211_frame_duration(sband->band, 10, rate->bitrate,
- erp, short_preamble);
+ dur = ieee80211_frame_duration(sband->band, 10, bitrate,
+ erp, short_preamble, shift);
/* Data frame duration */
- dur += ieee80211_frame_duration(sband->band, frame_len, rate->bitrate,
- erp, short_preamble);
+ dur += ieee80211_frame_duration(sband->band, frame_len, bitrate,
+ erp, short_preamble, shift);
/* ACK duration */
- dur += ieee80211_frame_duration(sband->band, 10, rate->bitrate,
- erp, short_preamble);
+ dur += ieee80211_frame_duration(sband->band, 10, bitrate,
+ erp, short_preamble, shift);
return cpu_to_le16(dur);
}
@@ -235,7 +253,7 @@ __le16 ieee80211_ctstoself_duration(struct ieee80211_hw *hw,
struct ieee80211_rate *rate;
struct ieee80211_sub_if_data *sdata;
bool short_preamble;
- int erp;
+ int erp, shift = 0, bitrate;
u16 dur;
struct ieee80211_supported_band *sband;
@@ -250,15 +268,18 @@ __le16 ieee80211_ctstoself_duration(struct ieee80211_hw *hw,
short_preamble = sdata->vif.bss_conf.use_short_preamble;
if (sdata->flags & IEEE80211_SDATA_OPERATING_GMODE)
erp = rate->flags & IEEE80211_RATE_ERP_G;
+ shift = ieee80211_vif_get_shift(vif);
}
+ bitrate = DIV_ROUND_UP(rate->bitrate, 1 << shift);
+
/* Data frame duration */
- dur = ieee80211_frame_duration(sband->band, frame_len, rate->bitrate,
- erp, short_preamble);
+ dur = ieee80211_frame_duration(sband->band, frame_len, bitrate,
+ erp, short_preamble, shift);
if (!(frame_txctl->flags & IEEE80211_TX_CTL_NO_ACK)) {
/* ACK duration */
- dur += ieee80211_frame_duration(sband->band, 10, rate->bitrate,
- erp, short_preamble);
+ dur += ieee80211_frame_duration(sband->band, 10, bitrate,
+ erp, short_preamble, shift);
}
return cpu_to_le16(dur);
@@ -1052,32 +1073,6 @@ void ieee80211_set_wmm_default(struct ieee80211_sub_if_data *sdata,
}
}
-void ieee80211_sta_def_wmm_params(struct ieee80211_sub_if_data *sdata,
- const size_t supp_rates_len,
- const u8 *supp_rates)
-{
- struct ieee80211_chanctx_conf *chanctx_conf;
- int i, have_higher_than_11mbit = 0;
-
- /* cf. IEEE 802.11 9.2.12 */
- for (i = 0; i < supp_rates_len; i++)
- if ((supp_rates[i] & 0x7f) * 5 > 110)
- have_higher_than_11mbit = 1;
-
- rcu_read_lock();
- chanctx_conf = rcu_dereference(sdata->vif.chanctx_conf);
-
- if (chanctx_conf &&
- chanctx_conf->def.chan->band == IEEE80211_BAND_2GHZ &&
- have_higher_than_11mbit)
- sdata->flags |= IEEE80211_SDATA_OPERATING_GMODE;
- else
- sdata->flags &= ~IEEE80211_SDATA_OPERATING_GMODE;
- rcu_read_unlock();
-
- ieee80211_set_wmm_default(sdata, true);
-}
-
void ieee80211_send_auth(struct ieee80211_sub_if_data *sdata,
u16 transaction, u16 auth_alg, u16 status,
const u8 *extra, size_t extra_len, const u8 *da,
@@ -1162,7 +1157,7 @@ void ieee80211_send_deauth_disassoc(struct ieee80211_sub_if_data *sdata,
int ieee80211_build_preq_ies(struct ieee80211_local *local, u8 *buffer,
size_t buffer_len, const u8 *ie, size_t ie_len,
enum ieee80211_band band, u32 rate_mask,
- u8 channel)
+ struct cfg80211_chan_def *chandef)
{
struct ieee80211_supported_band *sband;
u8 *pos = buffer, *end = buffer + buffer_len;
@@ -1171,16 +1166,26 @@ int ieee80211_build_preq_ies(struct ieee80211_local *local, u8 *buffer,
u8 rates[32];
int num_rates;
int ext_rates_len;
+ int shift;
+ u32 rate_flags;
sband = local->hw.wiphy->bands[band];
if (WARN_ON_ONCE(!sband))
return 0;
+ rate_flags = ieee80211_chandef_rate_flags(chandef);
+ shift = ieee80211_chandef_get_shift(chandef);
+
num_rates = 0;
for (i = 0; i < sband->n_bitrates; i++) {
if ((BIT(i) & rate_mask) == 0)
continue; /* skip rate */
- rates[num_rates++] = (u8) (sband->bitrates[i].bitrate / 5);
+ if ((rate_flags & sband->bitrates[i].flags) != rate_flags)
+ continue;
+
+ rates[num_rates++] =
+ (u8) DIV_ROUND_UP(sband->bitrates[i].bitrate,
+ (1 << shift) * 5);
}
supp_rates_len = min_t(int, num_rates, 8);
@@ -1220,12 +1225,13 @@ int ieee80211_build_preq_ies(struct ieee80211_local *local, u8 *buffer,
pos += ext_rates_len;
}
- if (channel && sband->band == IEEE80211_BAND_2GHZ) {
+ if (chandef->chan && sband->band == IEEE80211_BAND_2GHZ) {
if (end - pos < 3)
goto out_err;
*pos++ = WLAN_EID_DS_PARAMS;
*pos++ = 1;
- *pos++ = channel;
+ *pos++ = ieee80211_frequency_to_channel(
+ chandef->chan->center_freq);
}
/* insert custom IEs that go before HT */
@@ -1290,9 +1296,9 @@ struct sk_buff *ieee80211_build_probe_req(struct ieee80211_sub_if_data *sdata,
bool directed)
{
struct ieee80211_local *local = sdata->local;
+ struct cfg80211_chan_def chandef;
struct sk_buff *skb;
struct ieee80211_mgmt *mgmt;
- u8 chan_no;
int ies_len;
/*
@@ -1300,10 +1306,11 @@ struct sk_buff *ieee80211_build_probe_req(struct ieee80211_sub_if_data *sdata,
* in order to maximize the chance that we get a response. Some
* badly-behaved APs don't respond when this parameter is included.
*/
+ chandef.width = sdata->vif.bss_conf.chandef.width;
if (directed)
- chan_no = 0;
+ chandef.chan = NULL;
else
- chan_no = ieee80211_frequency_to_channel(chan->center_freq);
+ chandef.chan = chan;
skb = ieee80211_probereq_get(&local->hw, &sdata->vif,
ssid, ssid_len, 100 + ie_len);
@@ -1313,7 +1320,7 @@ struct sk_buff *ieee80211_build_probe_req(struct ieee80211_sub_if_data *sdata,
ies_len = ieee80211_build_preq_ies(local, skb_tail_pointer(skb),
skb_tailroom(skb),
ie, ie_len, chan->band,
- ratemask, chan_no);
+ ratemask, &chandef);
skb_put(skb, ies_len);
if (dst) {
@@ -1347,16 +1354,19 @@ void ieee80211_send_probe_req(struct ieee80211_sub_if_data *sdata, u8 *dst,
}
}
-u32 ieee80211_sta_get_rates(struct ieee80211_local *local,
+u32 ieee80211_sta_get_rates(struct ieee80211_sub_if_data *sdata,
struct ieee802_11_elems *elems,
enum ieee80211_band band, u32 *basic_rates)
{
struct ieee80211_supported_band *sband;
struct ieee80211_rate *bitrates;
size_t num_rates;
- u32 supp_rates;
- int i, j;
- sband = local->hw.wiphy->bands[band];
+ u32 supp_rates, rate_flags;
+ int i, j, shift;
+ sband = sdata->local->hw.wiphy->bands[band];
+
+ rate_flags = ieee80211_chandef_rate_flags(&sdata->vif.bss_conf.chandef);
+ shift = ieee80211_vif_get_shift(&sdata->vif);
if (WARN_ON(!sband))
return 1;
@@ -1381,7 +1391,15 @@ u32 ieee80211_sta_get_rates(struct ieee80211_local *local,
continue;
for (j = 0; j < num_rates; j++) {
- if (bitrates[j].bitrate == own_rate) {
+ int brate;
+ if ((rate_flags & sband->bitrates[j].flags)
+ != rate_flags)
+ continue;
+
+ brate = DIV_ROUND_UP(sband->bitrates[j].bitrate,
+ 1 << shift);
+
+ if (brate == own_rate) {
supp_rates |= BIT(j);
if (basic_rates && is_basic)
*basic_rates |= BIT(j);
@@ -1435,8 +1453,8 @@ int ieee80211_reconfig(struct ieee80211_local *local)
local->resuming = true;
if (local->wowlan) {
- local->wowlan = false;
res = drv_resume(local);
+ local->wowlan = false;
if (res < 0) {
local->resuming = false;
return res;
@@ -2004,18 +2022,56 @@ void ieee80211_ht_oper_to_chandef(struct ieee80211_channel *control_chan,
cfg80211_chandef_create(chandef, control_chan, channel_type);
}
+int ieee80211_parse_bitrates(struct cfg80211_chan_def *chandef,
+ const struct ieee80211_supported_band *sband,
+ const u8 *srates, int srates_len, u32 *rates)
+{
+ u32 rate_flags = ieee80211_chandef_rate_flags(chandef);
+ int shift = ieee80211_chandef_get_shift(chandef);
+ struct ieee80211_rate *br;
+ int brate, rate, i, j, count = 0;
+
+ *rates = 0;
+
+ for (i = 0; i < srates_len; i++) {
+ rate = srates[i] & 0x7f;
+
+ for (j = 0; j < sband->n_bitrates; j++) {
+ br = &sband->bitrates[j];
+ if ((rate_flags & br->flags) != rate_flags)
+ continue;
+
+ brate = DIV_ROUND_UP(br->bitrate, (1 << shift) * 5);
+ if (brate == rate) {
+ *rates |= BIT(j);
+ count++;
+ break;
+ }
+ }
+ }
+ return count;
+}
+
int ieee80211_add_srates_ie(struct ieee80211_sub_if_data *sdata,
struct sk_buff *skb, bool need_basic,
enum ieee80211_band band)
{
struct ieee80211_local *local = sdata->local;
struct ieee80211_supported_band *sband;
- int rate;
+ int rate, shift;
u8 i, rates, *pos;
u32 basic_rates = sdata->vif.bss_conf.basic_rates;
+ u32 rate_flags;
+ shift = ieee80211_vif_get_shift(&sdata->vif);
+ rate_flags = ieee80211_chandef_rate_flags(&sdata->vif.bss_conf.chandef);
sband = local->hw.wiphy->bands[band];
- rates = sband->n_bitrates;
+ rates = 0;
+ for (i = 0; i < sband->n_bitrates; i++) {
+ if ((rate_flags & sband->bitrates[i].flags) != rate_flags)
+ continue;
+ rates++;
+ }
if (rates > 8)
rates = 8;
@@ -2027,10 +2083,15 @@ int ieee80211_add_srates_ie(struct ieee80211_sub_if_data *sdata,
*pos++ = rates;
for (i = 0; i < rates; i++) {
u8 basic = 0;
+ if ((rate_flags & sband->bitrates[i].flags) != rate_flags)
+ continue;
+
if (need_basic && basic_rates & BIT(i))
basic = 0x80;
rate = sband->bitrates[i].bitrate;
- *pos++ = basic | (u8) (rate / 5);
+ rate = DIV_ROUND_UP(sband->bitrates[i].bitrate,
+ 5 * (1 << shift));
+ *pos++ = basic | (u8) rate;
}
return 0;
@@ -2042,12 +2103,22 @@ int ieee80211_add_ext_srates_ie(struct ieee80211_sub_if_data *sdata,
{
struct ieee80211_local *local = sdata->local;
struct ieee80211_supported_band *sband;
- int rate;
+ int rate, shift;
u8 i, exrates, *pos;
u32 basic_rates = sdata->vif.bss_conf.basic_rates;
+ u32 rate_flags;
+
+ rate_flags = ieee80211_chandef_rate_flags(&sdata->vif.bss_conf.chandef);
+ shift = ieee80211_vif_get_shift(&sdata->vif);
sband = local->hw.wiphy->bands[band];
- exrates = sband->n_bitrates;
+ exrates = 0;
+ for (i = 0; i < sband->n_bitrates; i++) {
+ if ((rate_flags & sband->bitrates[i].flags) != rate_flags)
+ continue;
+ exrates++;
+ }
+
if (exrates > 8)
exrates -= 8;
else
@@ -2062,10 +2133,14 @@ int ieee80211_add_ext_srates_ie(struct ieee80211_sub_if_data *sdata,
*pos++ = exrates;
for (i = 8; i < sband->n_bitrates; i++) {
u8 basic = 0;
+ if ((rate_flags & sband->bitrates[i].flags)
+ != rate_flags)
+ continue;
if (need_basic && basic_rates & BIT(i))
basic = 0x80;
- rate = sband->bitrates[i].bitrate;
- *pos++ = basic | (u8) (rate / 5);
+ rate = DIV_ROUND_UP(sband->bitrates[i].bitrate,
+ 5 * (1 << shift));
+ *pos++ = basic | (u8) rate;
}
}
return 0;
@@ -2149,12 +2224,24 @@ u64 ieee80211_calculate_rx_timestamp(struct ieee80211_local *local,
ri.flags |= RATE_INFO_FLAGS_SHORT_GI;
} else {
struct ieee80211_supported_band *sband;
+ int shift = 0;
+ int bitrate;
+
+ if (status->flag & RX_FLAG_10MHZ)
+ shift = 1;
+ if (status->flag & RX_FLAG_5MHZ)
+ shift = 2;
sband = local->hw.wiphy->bands[status->band];
- ri.legacy = sband->bitrates[status->rate_idx].bitrate;
+ bitrate = sband->bitrates[status->rate_idx].bitrate;
+ ri.legacy = DIV_ROUND_UP(bitrate, (1 << shift));
}
rate = cfg80211_calculate_bitrate(&ri);
+ if (WARN_ONCE(!rate,
+ "Invalid bitrate: flags=0x%x, idx=%d, vht_nss=%d\n",
+ status->flag, status->rate_idx, status->vht_nss))
+ return 0;
/* rewind from end of MPDU */
if (status->flag & RX_FLAG_MACTIME_END)
diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig
index 56d22cae5906..6e839b6dff2b 100644
--- a/net/netfilter/Kconfig
+++ b/net/netfilter/Kconfig
@@ -408,21 +408,10 @@ config NF_NAT_TFTP
depends on NF_CONNTRACK && NF_NAT
default NF_NAT && NF_CONNTRACK_TFTP
-endif # NF_CONNTRACK
-
-# transparent proxy support
-config NETFILTER_TPROXY
- tristate "Transparent proxying support"
- depends on IP_NF_MANGLE
- depends on NETFILTER_ADVANCED
- help
- This option enables transparent proxying support, that is,
- support for handling non-locally bound IPv4 TCP and UDP sockets.
- For it to work you will have to configure certain iptables rules
- and use policy routing. For more information on how to set it up
- see Documentation/networking/tproxy.txt.
+config NETFILTER_SYNPROXY
+ tristate
- To compile it as a module, choose M here. If unsure, say N.
+endif # NF_CONNTRACK
config NETFILTER_XTABLES
tristate "Netfilter Xtables support (required for ip_tables)"
@@ -720,10 +709,10 @@ config NETFILTER_XT_TARGET_TEE
this clone be rerouted to another nexthop.
config NETFILTER_XT_TARGET_TPROXY
- tristate '"TPROXY" target support'
- depends on NETFILTER_TPROXY
+ tristate '"TPROXY" target transparent proxying support'
depends on NETFILTER_XTABLES
depends on NETFILTER_ADVANCED
+ depends on IP_NF_MANGLE
select NF_DEFRAG_IPV4
select NF_DEFRAG_IPV6 if IP6_NF_IPTABLES
help
@@ -731,6 +720,9 @@ config NETFILTER_XT_TARGET_TPROXY
REDIRECT. It can only be used in the mangle table and is useful
to redirect traffic to a transparent proxy. It does _not_ depend
on Netfilter connection tracking and NAT, unlike REDIRECT.
+ For it to work you will have to configure certain iptables rules
+ and use policy routing. For more information on how to set it up
+ see Documentation/networking/tproxy.txt.
To compile it as a module, choose M here. If unsure, say N.
@@ -1180,10 +1172,10 @@ config NETFILTER_XT_MATCH_SCTP
config NETFILTER_XT_MATCH_SOCKET
tristate '"socket" match support'
- depends on NETFILTER_TPROXY
depends on NETFILTER_XTABLES
depends on NETFILTER_ADVANCED
depends on !NF_CONNTRACK || NF_CONNTRACK
+ depends on (IPV6 || IPV6=n)
select NF_DEFRAG_IPV4
select NF_DEFRAG_IPV6 if IP6_NF_IPTABLES
help
diff --git a/net/netfilter/Makefile b/net/netfilter/Makefile
index a1abf87d43bf..c3a0a12907f6 100644
--- a/net/netfilter/Makefile
+++ b/net/netfilter/Makefile
@@ -1,6 +1,6 @@
netfilter-objs := core.o nf_log.o nf_queue.o nf_sockopt.o
-nf_conntrack-y := nf_conntrack_core.o nf_conntrack_standalone.o nf_conntrack_expect.o nf_conntrack_helper.o nf_conntrack_proto.o nf_conntrack_l3proto_generic.o nf_conntrack_proto_generic.o nf_conntrack_proto_tcp.o nf_conntrack_proto_udp.o nf_conntrack_extend.o nf_conntrack_acct.o
+nf_conntrack-y := nf_conntrack_core.o nf_conntrack_standalone.o nf_conntrack_expect.o nf_conntrack_helper.o nf_conntrack_proto.o nf_conntrack_l3proto_generic.o nf_conntrack_proto_generic.o nf_conntrack_proto_tcp.o nf_conntrack_proto_udp.o nf_conntrack_extend.o nf_conntrack_acct.o nf_conntrack_seqadj.o
nf_conntrack-$(CONFIG_NF_CONNTRACK_TIMEOUT) += nf_conntrack_timeout.o
nf_conntrack-$(CONFIG_NF_CONNTRACK_TIMESTAMP) += nf_conntrack_timestamp.o
nf_conntrack-$(CONFIG_NF_CONNTRACK_EVENTS) += nf_conntrack_ecache.o
@@ -61,8 +61,8 @@ obj-$(CONFIG_NF_NAT_IRC) += nf_nat_irc.o
obj-$(CONFIG_NF_NAT_SIP) += nf_nat_sip.o
obj-$(CONFIG_NF_NAT_TFTP) += nf_nat_tftp.o
-# transparent proxy support
-obj-$(CONFIG_NETFILTER_TPROXY) += nf_tproxy_core.o
+# SYNPROXY
+obj-$(CONFIG_NETFILTER_SYNPROXY) += nf_synproxy_core.o
# generic X tables
obj-$(CONFIG_NETFILTER_XTABLES) += x_tables.o xt_tcpudp.o
diff --git a/net/netfilter/core.c b/net/netfilter/core.c
index 2217363ab422..593b16ea45e0 100644
--- a/net/netfilter/core.c
+++ b/net/netfilter/core.c
@@ -234,12 +234,13 @@ EXPORT_SYMBOL(skb_make_writable);
/* This does not belong here, but locally generated errors need it if connection
tracking in use: without this, connection may not be in hash table, and hence
manufactured ICMP or RST packets will not be associated with it. */
-void (*ip_ct_attach)(struct sk_buff *, struct sk_buff *) __rcu __read_mostly;
+void (*ip_ct_attach)(struct sk_buff *, const struct sk_buff *)
+ __rcu __read_mostly;
EXPORT_SYMBOL(ip_ct_attach);
-void nf_ct_attach(struct sk_buff *new, struct sk_buff *skb)
+void nf_ct_attach(struct sk_buff *new, const struct sk_buff *skb)
{
- void (*attach)(struct sk_buff *, struct sk_buff *);
+ void (*attach)(struct sk_buff *, const struct sk_buff *);
if (skb->nfct) {
rcu_read_lock();
diff --git a/net/netfilter/ipset/ip_set_core.c b/net/netfilter/ipset/ip_set_core.c
index f77139007983..f2e30fb31e78 100644
--- a/net/netfilter/ipset/ip_set_core.c
+++ b/net/netfilter/ipset/ip_set_core.c
@@ -1052,7 +1052,7 @@ ip_set_swap(struct sock *ctnl, struct sk_buff *skb,
* Not an artificial restriction anymore, as we must prevent
* possible loops created by swapping in setlist type of sets. */
if (!(from->type->features == to->type->features &&
- from->type->family == to->type->family))
+ from->family == to->family))
return -IPSET_ERR_TYPE_MISMATCH;
strncpy(from_name, from->name, IPSET_MAXNAMELEN);
@@ -1489,8 +1489,7 @@ ip_set_utest(struct sock *ctnl, struct sk_buff *skb,
if (ret == -EAGAIN)
ret = 1;
- return (ret < 0 && ret != -ENOTEMPTY) ? ret :
- ret > 0 ? 0 : -IPSET_ERR_EXIST;
+ return ret > 0 ? 0 : -IPSET_ERR_EXIST;
}
/* Get headed data of a set */
diff --git a/net/netfilter/ipset/ip_set_getport.c b/net/netfilter/ipset/ip_set_getport.c
index 6fdf88ae2353..dac156f819ac 100644
--- a/net/netfilter/ipset/ip_set_getport.c
+++ b/net/netfilter/ipset/ip_set_getport.c
@@ -116,12 +116,12 @@ ip_set_get_ip6_port(const struct sk_buff *skb, bool src,
{
int protoff;
u8 nexthdr;
- __be16 frag_off;
+ __be16 frag_off = 0;
nexthdr = ipv6_hdr(skb)->nexthdr;
protoff = ipv6_skip_exthdr(skb, sizeof(struct ipv6hdr), &nexthdr,
&frag_off);
- if (protoff < 0)
+ if (protoff < 0 || (frag_off & htons(~0x7)) != 0)
return false;
return get_port(skb, nexthdr, protoff, src, port, proto);
diff --git a/net/netfilter/ipset/ip_set_hash_gen.h b/net/netfilter/ipset/ip_set_hash_gen.h
index 57beb1762b2d..707bc520d629 100644
--- a/net/netfilter/ipset/ip_set_hash_gen.h
+++ b/net/netfilter/ipset/ip_set_hash_gen.h
@@ -325,18 +325,22 @@ mtype_add_cidr(struct htype *h, u8 cidr, u8 nets_length)
static void
mtype_del_cidr(struct htype *h, u8 cidr, u8 nets_length)
{
- u8 i, j;
-
- for (i = 0; i < nets_length - 1 && h->nets[i].cidr != cidr; i++)
- ;
- h->nets[i].nets--;
-
- if (h->nets[i].nets != 0)
- return;
-
- for (j = i; j < nets_length - 1 && h->nets[j].nets; j++) {
- h->nets[j].cidr = h->nets[j + 1].cidr;
- h->nets[j].nets = h->nets[j + 1].nets;
+ u8 i, j, net_end = nets_length - 1;
+
+ for (i = 0; i < nets_length; i++) {
+ if (h->nets[i].cidr != cidr)
+ continue;
+ if (h->nets[i].nets > 1 || i == net_end ||
+ h->nets[i + 1].nets == 0) {
+ h->nets[i].nets--;
+ return;
+ }
+ for (j = i; j < net_end && h->nets[j].nets; j++) {
+ h->nets[j].cidr = h->nets[j + 1].cidr;
+ h->nets[j].nets = h->nets[j + 1].nets;
+ }
+ h->nets[j].nets = 0;
+ return;
}
}
#endif
diff --git a/net/netfilter/ipset/ip_set_hash_ipportnet.c b/net/netfilter/ipset/ip_set_hash_ipportnet.c
index c6a525373be4..f15f3e28b9c3 100644
--- a/net/netfilter/ipset/ip_set_hash_ipportnet.c
+++ b/net/netfilter/ipset/ip_set_hash_ipportnet.c
@@ -260,7 +260,7 @@ hash_ipportnet4_uadt(struct ip_set *set, struct nlattr *tb[],
e.ip = htonl(ip);
e.ip2 = htonl(ip2_from & ip_set_hostmask(e.cidr + 1));
ret = adtfn(set, &e, &ext, &ext, flags);
- return ip_set_enomatch(ret, flags, adt) ? 1 :
+ return ip_set_enomatch(ret, flags, adt, set) ? -ret :
ip_set_eexist(ret, flags) ? 0 : ret;
}
@@ -544,7 +544,7 @@ hash_ipportnet6_uadt(struct ip_set *set, struct nlattr *tb[],
if (adt == IPSET_TEST || !with_ports || !tb[IPSET_ATTR_PORT_TO]) {
ret = adtfn(set, &e, &ext, &ext, flags);
- return ip_set_enomatch(ret, flags, adt) ? 1 :
+ return ip_set_enomatch(ret, flags, adt, set) ? -ret :
ip_set_eexist(ret, flags) ? 0 : ret;
}
diff --git a/net/netfilter/ipset/ip_set_hash_net.c b/net/netfilter/ipset/ip_set_hash_net.c
index da740ceb56ae..223e9f546d0f 100644
--- a/net/netfilter/ipset/ip_set_hash_net.c
+++ b/net/netfilter/ipset/ip_set_hash_net.c
@@ -199,7 +199,7 @@ hash_net4_uadt(struct ip_set *set, struct nlattr *tb[],
if (adt == IPSET_TEST || !tb[IPSET_ATTR_IP_TO]) {
e.ip = htonl(ip & ip_set_hostmask(e.cidr));
ret = adtfn(set, &e, &ext, &ext, flags);
- return ip_set_enomatch(ret, flags, adt) ? 1 :
+ return ip_set_enomatch(ret, flags, adt, set) ? -ret:
ip_set_eexist(ret, flags) ? 0 : ret;
}
@@ -396,7 +396,7 @@ hash_net6_uadt(struct ip_set *set, struct nlattr *tb[],
ret = adtfn(set, &e, &ext, &ext, flags);
- return ip_set_enomatch(ret, flags, adt) ? 1 :
+ return ip_set_enomatch(ret, flags, adt, set) ? -ret :
ip_set_eexist(ret, flags) ? 0 : ret;
}
diff --git a/net/netfilter/ipset/ip_set_hash_netiface.c b/net/netfilter/ipset/ip_set_hash_netiface.c
index 84ae6f6ce624..7d798d5d5cd3 100644
--- a/net/netfilter/ipset/ip_set_hash_netiface.c
+++ b/net/netfilter/ipset/ip_set_hash_netiface.c
@@ -368,7 +368,7 @@ hash_netiface4_uadt(struct ip_set *set, struct nlattr *tb[],
if (adt == IPSET_TEST || !tb[IPSET_ATTR_IP_TO]) {
e.ip = htonl(ip & ip_set_hostmask(e.cidr));
ret = adtfn(set, &e, &ext, &ext, flags);
- return ip_set_enomatch(ret, flags, adt) ? 1 :
+ return ip_set_enomatch(ret, flags, adt, set) ? -ret :
ip_set_eexist(ret, flags) ? 0 : ret;
}
@@ -634,7 +634,7 @@ hash_netiface6_uadt(struct ip_set *set, struct nlattr *tb[],
ret = adtfn(set, &e, &ext, &ext, flags);
- return ip_set_enomatch(ret, flags, adt) ? 1 :
+ return ip_set_enomatch(ret, flags, adt, set) ? -ret :
ip_set_eexist(ret, flags) ? 0 : ret;
}
diff --git a/net/netfilter/ipset/ip_set_hash_netport.c b/net/netfilter/ipset/ip_set_hash_netport.c
index 9a0869853be5..09d6690bee6f 100644
--- a/net/netfilter/ipset/ip_set_hash_netport.c
+++ b/net/netfilter/ipset/ip_set_hash_netport.c
@@ -244,7 +244,7 @@ hash_netport4_uadt(struct ip_set *set, struct nlattr *tb[],
if (adt == IPSET_TEST || !(with_ports || tb[IPSET_ATTR_IP_TO])) {
e.ip = htonl(ip & ip_set_hostmask(e.cidr + 1));
ret = adtfn(set, &e, &ext, &ext, flags);
- return ip_set_enomatch(ret, flags, adt) ? 1 :
+ return ip_set_enomatch(ret, flags, adt, set) ? -ret :
ip_set_eexist(ret, flags) ? 0 : ret;
}
@@ -489,7 +489,7 @@ hash_netport6_uadt(struct ip_set *set, struct nlattr *tb[],
if (adt == IPSET_TEST || !with_ports || !tb[IPSET_ATTR_PORT_TO]) {
ret = adtfn(set, &e, &ext, &ext, flags);
- return ip_set_enomatch(ret, flags, adt) ? 1 :
+ return ip_set_enomatch(ret, flags, adt, set) ? -ret :
ip_set_eexist(ret, flags) ? 0 : ret;
}
diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c
index 4f69e83ff836..74fd00c27210 100644
--- a/net/netfilter/ipvs/ip_vs_core.c
+++ b/net/netfilter/ipvs/ip_vs_core.c
@@ -116,6 +116,7 @@ ip_vs_in_stats(struct ip_vs_conn *cp, struct sk_buff *skb)
if (dest && (dest->flags & IP_VS_DEST_F_AVAILABLE)) {
struct ip_vs_cpu_stats *s;
+ struct ip_vs_service *svc;
s = this_cpu_ptr(dest->stats.cpustats);
s->ustats.inpkts++;
@@ -123,11 +124,14 @@ ip_vs_in_stats(struct ip_vs_conn *cp, struct sk_buff *skb)
s->ustats.inbytes += skb->len;
u64_stats_update_end(&s->syncp);
- s = this_cpu_ptr(dest->svc->stats.cpustats);
+ rcu_read_lock();
+ svc = rcu_dereference(dest->svc);
+ s = this_cpu_ptr(svc->stats.cpustats);
s->ustats.inpkts++;
u64_stats_update_begin(&s->syncp);
s->ustats.inbytes += skb->len;
u64_stats_update_end(&s->syncp);
+ rcu_read_unlock();
s = this_cpu_ptr(ipvs->tot_stats.cpustats);
s->ustats.inpkts++;
@@ -146,6 +150,7 @@ ip_vs_out_stats(struct ip_vs_conn *cp, struct sk_buff *skb)
if (dest && (dest->flags & IP_VS_DEST_F_AVAILABLE)) {
struct ip_vs_cpu_stats *s;
+ struct ip_vs_service *svc;
s = this_cpu_ptr(dest->stats.cpustats);
s->ustats.outpkts++;
@@ -153,11 +158,14 @@ ip_vs_out_stats(struct ip_vs_conn *cp, struct sk_buff *skb)
s->ustats.outbytes += skb->len;
u64_stats_update_end(&s->syncp);
- s = this_cpu_ptr(dest->svc->stats.cpustats);
+ rcu_read_lock();
+ svc = rcu_dereference(dest->svc);
+ s = this_cpu_ptr(svc->stats.cpustats);
s->ustats.outpkts++;
u64_stats_update_begin(&s->syncp);
s->ustats.outbytes += skb->len;
u64_stats_update_end(&s->syncp);
+ rcu_read_unlock();
s = this_cpu_ptr(ipvs->tot_stats.cpustats);
s->ustats.outpkts++;
diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c
index c8148e487386..a3df9bddc4f7 100644
--- a/net/netfilter/ipvs/ip_vs_ctl.c
+++ b/net/netfilter/ipvs/ip_vs_ctl.c
@@ -460,7 +460,7 @@ static inline void
__ip_vs_bind_svc(struct ip_vs_dest *dest, struct ip_vs_service *svc)
{
atomic_inc(&svc->refcnt);
- dest->svc = svc;
+ rcu_assign_pointer(dest->svc, svc);
}
static void ip_vs_service_free(struct ip_vs_service *svc)
@@ -470,18 +470,25 @@ static void ip_vs_service_free(struct ip_vs_service *svc)
kfree(svc);
}
-static void
-__ip_vs_unbind_svc(struct ip_vs_dest *dest)
+static void ip_vs_service_rcu_free(struct rcu_head *head)
{
- struct ip_vs_service *svc = dest->svc;
+ struct ip_vs_service *svc;
+
+ svc = container_of(head, struct ip_vs_service, rcu_head);
+ ip_vs_service_free(svc);
+}
- dest->svc = NULL;
+static void __ip_vs_svc_put(struct ip_vs_service *svc, bool do_delay)
+{
if (atomic_dec_and_test(&svc->refcnt)) {
IP_VS_DBG_BUF(3, "Removing service %u/%s:%u\n",
svc->fwmark,
IP_VS_DBG_ADDR(svc->af, &svc->addr),
ntohs(svc->port));
- ip_vs_service_free(svc);
+ if (do_delay)
+ call_rcu(&svc->rcu_head, ip_vs_service_rcu_free);
+ else
+ ip_vs_service_free(svc);
}
}
@@ -667,11 +674,6 @@ ip_vs_trash_get_dest(struct ip_vs_service *svc, const union nf_inet_addr *daddr,
IP_VS_DBG_ADDR(svc->af, &dest->addr),
ntohs(dest->port),
atomic_read(&dest->refcnt));
- /* We can not reuse dest while in grace period
- * because conns still can use dest->svc
- */
- if (test_bit(IP_VS_DEST_STATE_REMOVING, &dest->state))
- continue;
if (dest->af == svc->af &&
ip_vs_addr_equal(svc->af, &dest->addr, daddr) &&
dest->port == dport &&
@@ -697,8 +699,10 @@ out:
static void ip_vs_dest_free(struct ip_vs_dest *dest)
{
+ struct ip_vs_service *svc = rcu_dereference_protected(dest->svc, 1);
+
__ip_vs_dst_cache_reset(dest);
- __ip_vs_unbind_svc(dest);
+ __ip_vs_svc_put(svc, false);
free_percpu(dest->stats.cpustats);
kfree(dest);
}
@@ -771,6 +775,7 @@ __ip_vs_update_dest(struct ip_vs_service *svc, struct ip_vs_dest *dest,
struct ip_vs_dest_user_kern *udest, int add)
{
struct netns_ipvs *ipvs = net_ipvs(svc->net);
+ struct ip_vs_service *old_svc;
struct ip_vs_scheduler *sched;
int conn_flags;
@@ -792,13 +797,14 @@ __ip_vs_update_dest(struct ip_vs_service *svc, struct ip_vs_dest *dest,
atomic_set(&dest->conn_flags, conn_flags);
/* bind the service */
- if (!dest->svc) {
+ old_svc = rcu_dereference_protected(dest->svc, 1);
+ if (!old_svc) {
__ip_vs_bind_svc(dest, svc);
} else {
- if (dest->svc != svc) {
- __ip_vs_unbind_svc(dest);
+ if (old_svc != svc) {
ip_vs_zero_stats(&dest->stats);
__ip_vs_bind_svc(dest, svc);
+ __ip_vs_svc_put(old_svc, true);
}
}
@@ -998,16 +1004,6 @@ ip_vs_edit_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest)
return 0;
}
-static void ip_vs_dest_wait_readers(struct rcu_head *head)
-{
- struct ip_vs_dest *dest = container_of(head, struct ip_vs_dest,
- rcu_head);
-
- /* End of grace period after unlinking */
- clear_bit(IP_VS_DEST_STATE_REMOVING, &dest->state);
-}
-
-
/*
* Delete a destination (must be already unlinked from the service)
*/
@@ -1023,20 +1019,16 @@ static void __ip_vs_del_dest(struct net *net, struct ip_vs_dest *dest,
*/
ip_vs_rs_unhash(dest);
- if (!cleanup) {
- set_bit(IP_VS_DEST_STATE_REMOVING, &dest->state);
- call_rcu(&dest->rcu_head, ip_vs_dest_wait_readers);
- }
-
spin_lock_bh(&ipvs->dest_trash_lock);
IP_VS_DBG_BUF(3, "Moving dest %s:%u into trash, dest->refcnt=%d\n",
IP_VS_DBG_ADDR(dest->af, &dest->addr), ntohs(dest->port),
atomic_read(&dest->refcnt));
if (list_empty(&ipvs->dest_trash) && !cleanup)
mod_timer(&ipvs->dest_trash_timer,
- jiffies + IP_VS_DEST_TRASH_PERIOD);
+ jiffies + (IP_VS_DEST_TRASH_PERIOD >> 1));
/* dest lives in trash without reference */
list_add(&dest->t_list, &ipvs->dest_trash);
+ dest->idle_start = 0;
spin_unlock_bh(&ipvs->dest_trash_lock);
ip_vs_dest_put(dest);
}
@@ -1108,24 +1100,30 @@ static void ip_vs_dest_trash_expire(unsigned long data)
struct net *net = (struct net *) data;
struct netns_ipvs *ipvs = net_ipvs(net);
struct ip_vs_dest *dest, *next;
+ unsigned long now = jiffies;
spin_lock(&ipvs->dest_trash_lock);
list_for_each_entry_safe(dest, next, &ipvs->dest_trash, t_list) {
- /* Skip if dest is in grace period */
- if (test_bit(IP_VS_DEST_STATE_REMOVING, &dest->state))
- continue;
if (atomic_read(&dest->refcnt) > 0)
continue;
+ if (dest->idle_start) {
+ if (time_before(now, dest->idle_start +
+ IP_VS_DEST_TRASH_PERIOD))
+ continue;
+ } else {
+ dest->idle_start = max(1UL, now);
+ continue;
+ }
IP_VS_DBG_BUF(3, "Removing destination %u/%s:%u from trash\n",
dest->vfwmark,
- IP_VS_DBG_ADDR(dest->svc->af, &dest->addr),
+ IP_VS_DBG_ADDR(dest->af, &dest->addr),
ntohs(dest->port));
list_del(&dest->t_list);
ip_vs_dest_free(dest);
}
if (!list_empty(&ipvs->dest_trash))
mod_timer(&ipvs->dest_trash_timer,
- jiffies + IP_VS_DEST_TRASH_PERIOD);
+ jiffies + (IP_VS_DEST_TRASH_PERIOD >> 1));
spin_unlock(&ipvs->dest_trash_lock);
}
@@ -1320,14 +1318,6 @@ out:
return ret;
}
-static void ip_vs_service_rcu_free(struct rcu_head *head)
-{
- struct ip_vs_service *svc;
-
- svc = container_of(head, struct ip_vs_service, rcu_head);
- ip_vs_service_free(svc);
-}
-
/*
* Delete a service from the service list
* - The service must be unlinked, unlocked and not referenced!
@@ -1376,13 +1366,7 @@ static void __ip_vs_del_service(struct ip_vs_service *svc, bool cleanup)
/*
* Free the service if nobody refers to it
*/
- if (atomic_dec_and_test(&svc->refcnt)) {
- IP_VS_DBG_BUF(3, "Removing service %u/%s:%u\n",
- svc->fwmark,
- IP_VS_DBG_ADDR(svc->af, &svc->addr),
- ntohs(svc->port));
- call_rcu(&svc->rcu_head, ip_vs_service_rcu_free);
- }
+ __ip_vs_svc_put(svc, true);
/* decrease the module use count */
ip_vs_use_count_dec();
diff --git a/net/netfilter/ipvs/ip_vs_est.c b/net/netfilter/ipvs/ip_vs_est.c
index 6bee6d0c73a5..1425e9a924c4 100644
--- a/net/netfilter/ipvs/ip_vs_est.c
+++ b/net/netfilter/ipvs/ip_vs_est.c
@@ -59,12 +59,13 @@ static void ip_vs_read_cpu_stats(struct ip_vs_stats_user *sum,
struct ip_vs_cpu_stats __percpu *stats)
{
int i;
+ bool add = false;
for_each_possible_cpu(i) {
struct ip_vs_cpu_stats *s = per_cpu_ptr(stats, i);
unsigned int start;
__u64 inbytes, outbytes;
- if (i) {
+ if (add) {
sum->conns += s->ustats.conns;
sum->inpkts += s->ustats.inpkts;
sum->outpkts += s->ustats.outpkts;
@@ -76,6 +77,7 @@ static void ip_vs_read_cpu_stats(struct ip_vs_stats_user *sum,
sum->inbytes += inbytes;
sum->outbytes += outbytes;
} else {
+ add = true;
sum->conns = s->ustats.conns;
sum->inpkts = s->ustats.inpkts;
sum->outpkts = s->ustats.outpkts;
diff --git a/net/netfilter/ipvs/ip_vs_lblc.c b/net/netfilter/ipvs/ip_vs_lblc.c
index 1383b0eadc0e..eff13c94498e 100644
--- a/net/netfilter/ipvs/ip_vs_lblc.c
+++ b/net/netfilter/ipvs/ip_vs_lblc.c
@@ -93,7 +93,7 @@ struct ip_vs_lblc_entry {
struct hlist_node list;
int af; /* address family */
union nf_inet_addr addr; /* destination IP address */
- struct ip_vs_dest __rcu *dest; /* real server (cache) */
+ struct ip_vs_dest *dest; /* real server (cache) */
unsigned long lastuse; /* last used time */
struct rcu_head rcu_head;
};
@@ -130,20 +130,21 @@ static struct ctl_table vs_vars_table[] = {
};
#endif
-static inline void ip_vs_lblc_free(struct ip_vs_lblc_entry *en)
+static void ip_vs_lblc_rcu_free(struct rcu_head *head)
{
- struct ip_vs_dest *dest;
+ struct ip_vs_lblc_entry *en = container_of(head,
+ struct ip_vs_lblc_entry,
+ rcu_head);
- hlist_del_rcu(&en->list);
- /*
- * We don't kfree dest because it is referred either by its service
- * or the trash dest list.
- */
- dest = rcu_dereference_protected(en->dest, 1);
- ip_vs_dest_put(dest);
- kfree_rcu(en, rcu_head);
+ ip_vs_dest_put(en->dest);
+ kfree(en);
}
+static inline void ip_vs_lblc_del(struct ip_vs_lblc_entry *en)
+{
+ hlist_del_rcu(&en->list);
+ call_rcu(&en->rcu_head, ip_vs_lblc_rcu_free);
+}
/*
* Returns hash value for IPVS LBLC entry
@@ -203,30 +204,23 @@ ip_vs_lblc_new(struct ip_vs_lblc_table *tbl, const union nf_inet_addr *daddr,
struct ip_vs_lblc_entry *en;
en = ip_vs_lblc_get(dest->af, tbl, daddr);
- if (!en) {
- en = kmalloc(sizeof(*en), GFP_ATOMIC);
- if (!en)
- return NULL;
-
- en->af = dest->af;
- ip_vs_addr_copy(dest->af, &en->addr, daddr);
- en->lastuse = jiffies;
+ if (en) {
+ if (en->dest == dest)
+ return en;
+ ip_vs_lblc_del(en);
+ }
+ en = kmalloc(sizeof(*en), GFP_ATOMIC);
+ if (!en)
+ return NULL;
- ip_vs_dest_hold(dest);
- RCU_INIT_POINTER(en->dest, dest);
+ en->af = dest->af;
+ ip_vs_addr_copy(dest->af, &en->addr, daddr);
+ en->lastuse = jiffies;
- ip_vs_lblc_hash(tbl, en);
- } else {
- struct ip_vs_dest *old_dest;
+ ip_vs_dest_hold(dest);
+ en->dest = dest;
- old_dest = rcu_dereference_protected(en->dest, 1);
- if (old_dest != dest) {
- ip_vs_dest_put(old_dest);
- ip_vs_dest_hold(dest);
- /* No ordering constraints for refcnt */
- RCU_INIT_POINTER(en->dest, dest);
- }
- }
+ ip_vs_lblc_hash(tbl, en);
return en;
}
@@ -246,7 +240,7 @@ static void ip_vs_lblc_flush(struct ip_vs_service *svc)
tbl->dead = 1;
for (i=0; i<IP_VS_LBLC_TAB_SIZE; i++) {
hlist_for_each_entry_safe(en, next, &tbl->bucket[i], list) {
- ip_vs_lblc_free(en);
+ ip_vs_lblc_del(en);
atomic_dec(&tbl->entries);
}
}
@@ -281,7 +275,7 @@ static inline void ip_vs_lblc_full_check(struct ip_vs_service *svc)
sysctl_lblc_expiration(svc)))
continue;
- ip_vs_lblc_free(en);
+ ip_vs_lblc_del(en);
atomic_dec(&tbl->entries);
}
spin_unlock(&svc->sched_lock);
@@ -335,7 +329,7 @@ static void ip_vs_lblc_check_expire(unsigned long data)
if (time_before(now, en->lastuse + ENTRY_TIMEOUT))
continue;
- ip_vs_lblc_free(en);
+ ip_vs_lblc_del(en);
atomic_dec(&tbl->entries);
goal--;
}
@@ -443,8 +437,8 @@ __ip_vs_lblc_schedule(struct ip_vs_service *svc)
continue;
doh = ip_vs_dest_conn_overhead(dest);
- if (loh * atomic_read(&dest->weight) >
- doh * atomic_read(&least->weight)) {
+ if ((__s64)loh * atomic_read(&dest->weight) >
+ (__s64)doh * atomic_read(&least->weight)) {
least = dest;
loh = doh;
}
@@ -511,7 +505,7 @@ ip_vs_lblc_schedule(struct ip_vs_service *svc, const struct sk_buff *skb,
* free up entries from the trash at any time.
*/
- dest = rcu_dereference(en->dest);
+ dest = en->dest;
if ((dest->flags & IP_VS_DEST_F_AVAILABLE) &&
atomic_read(&dest->weight) > 0 && !is_overloaded(dest, svc))
goto out;
@@ -631,7 +625,7 @@ static void __exit ip_vs_lblc_cleanup(void)
{
unregister_ip_vs_scheduler(&ip_vs_lblc_scheduler);
unregister_pernet_subsys(&ip_vs_lblc_ops);
- synchronize_rcu();
+ rcu_barrier();
}
diff --git a/net/netfilter/ipvs/ip_vs_lblcr.c b/net/netfilter/ipvs/ip_vs_lblcr.c
index 3cd85b2fc67c..0b8550089a2e 100644
--- a/net/netfilter/ipvs/ip_vs_lblcr.c
+++ b/net/netfilter/ipvs/ip_vs_lblcr.c
@@ -89,7 +89,7 @@
*/
struct ip_vs_dest_set_elem {
struct list_head list; /* list link */
- struct ip_vs_dest __rcu *dest; /* destination server */
+ struct ip_vs_dest *dest; /* destination server */
struct rcu_head rcu_head;
};
@@ -107,11 +107,7 @@ static void ip_vs_dest_set_insert(struct ip_vs_dest_set *set,
if (check) {
list_for_each_entry(e, &set->list, list) {
- struct ip_vs_dest *d;
-
- d = rcu_dereference_protected(e->dest, 1);
- if (d == dest)
- /* already existed */
+ if (e->dest == dest)
return;
}
}
@@ -121,7 +117,7 @@ static void ip_vs_dest_set_insert(struct ip_vs_dest_set *set,
return;
ip_vs_dest_hold(dest);
- RCU_INIT_POINTER(e->dest, dest);
+ e->dest = dest;
list_add_rcu(&e->list, &set->list);
atomic_inc(&set->size);
@@ -129,22 +125,27 @@ static void ip_vs_dest_set_insert(struct ip_vs_dest_set *set,
set->lastmod = jiffies;
}
+static void ip_vs_lblcr_elem_rcu_free(struct rcu_head *head)
+{
+ struct ip_vs_dest_set_elem *e;
+
+ e = container_of(head, struct ip_vs_dest_set_elem, rcu_head);
+ ip_vs_dest_put(e->dest);
+ kfree(e);
+}
+
static void
ip_vs_dest_set_erase(struct ip_vs_dest_set *set, struct ip_vs_dest *dest)
{
struct ip_vs_dest_set_elem *e;
list_for_each_entry(e, &set->list, list) {
- struct ip_vs_dest *d;
-
- d = rcu_dereference_protected(e->dest, 1);
- if (d == dest) {
+ if (e->dest == dest) {
/* HIT */
atomic_dec(&set->size);
set->lastmod = jiffies;
- ip_vs_dest_put(dest);
list_del_rcu(&e->list);
- kfree_rcu(e, rcu_head);
+ call_rcu(&e->rcu_head, ip_vs_lblcr_elem_rcu_free);
break;
}
}
@@ -155,16 +156,8 @@ static void ip_vs_dest_set_eraseall(struct ip_vs_dest_set *set)
struct ip_vs_dest_set_elem *e, *ep;
list_for_each_entry_safe(e, ep, &set->list, list) {
- struct ip_vs_dest *d;
-
- d = rcu_dereference_protected(e->dest, 1);
- /*
- * We don't kfree dest because it is referred either
- * by its service or by the trash dest list.
- */
- ip_vs_dest_put(d);
list_del_rcu(&e->list);
- kfree_rcu(e, rcu_head);
+ call_rcu(&e->rcu_head, ip_vs_lblcr_elem_rcu_free);
}
}
@@ -175,12 +168,9 @@ static inline struct ip_vs_dest *ip_vs_dest_set_min(struct ip_vs_dest_set *set)
struct ip_vs_dest *dest, *least;
int loh, doh;
- if (set == NULL)
- return NULL;
-
/* select the first destination server, whose weight > 0 */
list_for_each_entry_rcu(e, &set->list, list) {
- least = rcu_dereference(e->dest);
+ least = e->dest;
if (least->flags & IP_VS_DEST_F_OVERLOAD)
continue;
@@ -195,13 +185,13 @@ static inline struct ip_vs_dest *ip_vs_dest_set_min(struct ip_vs_dest_set *set)
/* find the destination with the weighted least load */
nextstage:
list_for_each_entry_continue_rcu(e, &set->list, list) {
- dest = rcu_dereference(e->dest);
+ dest = e->dest;
if (dest->flags & IP_VS_DEST_F_OVERLOAD)
continue;
doh = ip_vs_dest_conn_overhead(dest);
- if ((loh * atomic_read(&dest->weight) >
- doh * atomic_read(&least->weight))
+ if (((__s64)loh * atomic_read(&dest->weight) >
+ (__s64)doh * atomic_read(&least->weight))
&& (dest->flags & IP_VS_DEST_F_AVAILABLE)) {
least = dest;
loh = doh;
@@ -232,7 +222,7 @@ static inline struct ip_vs_dest *ip_vs_dest_set_max(struct ip_vs_dest_set *set)
/* select the first destination server, whose weight > 0 */
list_for_each_entry(e, &set->list, list) {
- most = rcu_dereference_protected(e->dest, 1);
+ most = e->dest;
if (atomic_read(&most->weight) > 0) {
moh = ip_vs_dest_conn_overhead(most);
goto nextstage;
@@ -243,11 +233,11 @@ static inline struct ip_vs_dest *ip_vs_dest_set_max(struct ip_vs_dest_set *set)
/* find the destination with the weighted most load */
nextstage:
list_for_each_entry_continue(e, &set->list, list) {
- dest = rcu_dereference_protected(e->dest, 1);
+ dest = e->dest;
doh = ip_vs_dest_conn_overhead(dest);
/* moh/mw < doh/dw ==> moh*dw < doh*mw, where mw,dw>0 */
- if ((moh * atomic_read(&dest->weight) <
- doh * atomic_read(&most->weight))
+ if (((__s64)moh * atomic_read(&dest->weight) <
+ (__s64)doh * atomic_read(&most->weight))
&& (atomic_read(&dest->weight) > 0)) {
most = dest;
moh = doh;
@@ -414,7 +404,7 @@ static void ip_vs_lblcr_flush(struct ip_vs_service *svc)
spin_lock_bh(&svc->sched_lock);
tbl->dead = 1;
- for (i=0; i<IP_VS_LBLCR_TAB_SIZE; i++) {
+ for (i = 0; i < IP_VS_LBLCR_TAB_SIZE; i++) {
hlist_for_each_entry_safe(en, next, &tbl->bucket[i], list) {
ip_vs_lblcr_free(en);
}
@@ -440,7 +430,7 @@ static inline void ip_vs_lblcr_full_check(struct ip_vs_service *svc)
struct ip_vs_lblcr_entry *en;
struct hlist_node *next;
- for (i=0, j=tbl->rover; i<IP_VS_LBLCR_TAB_SIZE; i++) {
+ for (i = 0, j = tbl->rover; i < IP_VS_LBLCR_TAB_SIZE; i++) {
j = (j + 1) & IP_VS_LBLCR_TAB_MASK;
spin_lock(&svc->sched_lock);
@@ -495,7 +485,7 @@ static void ip_vs_lblcr_check_expire(unsigned long data)
if (goal > tbl->max_size/2)
goal = tbl->max_size/2;
- for (i=0, j=tbl->rover; i<IP_VS_LBLCR_TAB_SIZE; i++) {
+ for (i = 0, j = tbl->rover; i < IP_VS_LBLCR_TAB_SIZE; i++) {
j = (j + 1) & IP_VS_LBLCR_TAB_MASK;
spin_lock(&svc->sched_lock);
@@ -536,7 +526,7 @@ static int ip_vs_lblcr_init_svc(struct ip_vs_service *svc)
/*
* Initialize the hash buckets
*/
- for (i=0; i<IP_VS_LBLCR_TAB_SIZE; i++) {
+ for (i = 0; i < IP_VS_LBLCR_TAB_SIZE; i++) {
INIT_HLIST_HEAD(&tbl->bucket[i]);
}
tbl->max_size = IP_VS_LBLCR_TAB_SIZE*16;
@@ -611,8 +601,8 @@ __ip_vs_lblcr_schedule(struct ip_vs_service *svc)
continue;
doh = ip_vs_dest_conn_overhead(dest);
- if (loh * atomic_read(&dest->weight) >
- doh * atomic_read(&least->weight)) {
+ if ((__s64)loh * atomic_read(&dest->weight) >
+ (__s64)doh * atomic_read(&least->weight)) {
least = dest;
loh = doh;
}
@@ -819,7 +809,7 @@ static void __exit ip_vs_lblcr_cleanup(void)
{
unregister_ip_vs_scheduler(&ip_vs_lblcr_scheduler);
unregister_pernet_subsys(&ip_vs_lblcr_ops);
- synchronize_rcu();
+ rcu_barrier();
}
diff --git a/net/netfilter/ipvs/ip_vs_nq.c b/net/netfilter/ipvs/ip_vs_nq.c
index d8d9860934fe..961a6de9bb29 100644
--- a/net/netfilter/ipvs/ip_vs_nq.c
+++ b/net/netfilter/ipvs/ip_vs_nq.c
@@ -40,7 +40,7 @@
#include <net/ip_vs.h>
-static inline unsigned int
+static inline int
ip_vs_nq_dest_overhead(struct ip_vs_dest *dest)
{
/*
@@ -59,7 +59,7 @@ ip_vs_nq_schedule(struct ip_vs_service *svc, const struct sk_buff *skb,
struct ip_vs_iphdr *iph)
{
struct ip_vs_dest *dest, *least = NULL;
- unsigned int loh = 0, doh;
+ int loh = 0, doh;
IP_VS_DBG(6, "%s(): Scheduling...\n", __func__);
@@ -92,8 +92,8 @@ ip_vs_nq_schedule(struct ip_vs_service *svc, const struct sk_buff *skb,
}
if (!least ||
- (loh * atomic_read(&dest->weight) >
- doh * atomic_read(&least->weight))) {
+ ((__s64)loh * atomic_read(&dest->weight) >
+ (__s64)doh * atomic_read(&least->weight))) {
least = dest;
loh = doh;
}
diff --git a/net/netfilter/ipvs/ip_vs_proto_sctp.c b/net/netfilter/ipvs/ip_vs_proto_sctp.c
index 3c0da8728036..23e596e438b3 100644
--- a/net/netfilter/ipvs/ip_vs_proto_sctp.c
+++ b/net/netfilter/ipvs/ip_vs_proto_sctp.c
@@ -66,15 +66,7 @@ sctp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_proto_data *pd,
static void sctp_nat_csum(struct sk_buff *skb, sctp_sctphdr_t *sctph,
unsigned int sctphoff)
{
- __u32 crc32;
- struct sk_buff *iter;
-
- crc32 = sctp_start_cksum((__u8 *)sctph, skb_headlen(skb) - sctphoff);
- skb_walk_frags(skb, iter)
- crc32 = sctp_update_cksum((u8 *) iter->data,
- skb_headlen(iter), crc32);
- sctph->checksum = sctp_end_cksum(crc32);
-
+ sctph->checksum = sctp_compute_cksum(skb, sctphoff);
skb->ip_summed = CHECKSUM_UNNECESSARY;
}
@@ -151,10 +143,7 @@ sctp_csum_check(int af, struct sk_buff *skb, struct ip_vs_protocol *pp)
{
unsigned int sctphoff;
struct sctphdr *sh, _sctph;
- struct sk_buff *iter;
- __le32 cmp;
- __le32 val;
- __u32 tmp;
+ __le32 cmp, val;
#ifdef CONFIG_IP_VS_IPV6
if (af == AF_INET6)
@@ -168,13 +157,7 @@ sctp_csum_check(int af, struct sk_buff *skb, struct ip_vs_protocol *pp)
return 0;
cmp = sh->checksum;
-
- tmp = sctp_start_cksum((__u8 *) sh, skb_headlen(skb));
- skb_walk_frags(skb, iter)
- tmp = sctp_update_cksum((__u8 *) iter->data,
- skb_headlen(iter), tmp);
-
- val = sctp_end_cksum(tmp);
+ val = sctp_compute_cksum(skb, sctphoff);
if (val != cmp) {
/* CRC failure, dump it. */
diff --git a/net/netfilter/ipvs/ip_vs_sed.c b/net/netfilter/ipvs/ip_vs_sed.c
index a5284cc3d882..e446b9fa7424 100644
--- a/net/netfilter/ipvs/ip_vs_sed.c
+++ b/net/netfilter/ipvs/ip_vs_sed.c
@@ -44,7 +44,7 @@
#include <net/ip_vs.h>
-static inline unsigned int
+static inline int
ip_vs_sed_dest_overhead(struct ip_vs_dest *dest)
{
/*
@@ -63,7 +63,7 @@ ip_vs_sed_schedule(struct ip_vs_service *svc, const struct sk_buff *skb,
struct ip_vs_iphdr *iph)
{
struct ip_vs_dest *dest, *least;
- unsigned int loh, doh;
+ int loh, doh;
IP_VS_DBG(6, "%s(): Scheduling...\n", __func__);
@@ -99,8 +99,8 @@ ip_vs_sed_schedule(struct ip_vs_service *svc, const struct sk_buff *skb,
if (dest->flags & IP_VS_DEST_F_OVERLOAD)
continue;
doh = ip_vs_sed_dest_overhead(dest);
- if (loh * atomic_read(&dest->weight) >
- doh * atomic_read(&least->weight)) {
+ if ((__s64)loh * atomic_read(&dest->weight) >
+ (__s64)doh * atomic_read(&least->weight)) {
least = dest;
loh = doh;
}
diff --git a/net/netfilter/ipvs/ip_vs_sh.c b/net/netfilter/ipvs/ip_vs_sh.c
index f16c027df15b..3588faebe529 100644
--- a/net/netfilter/ipvs/ip_vs_sh.c
+++ b/net/netfilter/ipvs/ip_vs_sh.c
@@ -269,14 +269,20 @@ ip_vs_sh_get_port(const struct sk_buff *skb, struct ip_vs_iphdr *iph)
switch (iph->protocol) {
case IPPROTO_TCP:
th = skb_header_pointer(skb, iph->len, sizeof(_tcph), &_tcph);
+ if (unlikely(th == NULL))
+ return 0;
port = th->source;
break;
case IPPROTO_UDP:
uh = skb_header_pointer(skb, iph->len, sizeof(_udph), &_udph);
+ if (unlikely(uh == NULL))
+ return 0;
port = uh->source;
break;
case IPPROTO_SCTP:
sh = skb_header_pointer(skb, iph->len, sizeof(_sctph), &_sctph);
+ if (unlikely(sh == NULL))
+ return 0;
port = sh->source;
break;
default:
diff --git a/net/netfilter/ipvs/ip_vs_wlc.c b/net/netfilter/ipvs/ip_vs_wlc.c
index 6dc1fa128840..b5b4650d50a9 100644
--- a/net/netfilter/ipvs/ip_vs_wlc.c
+++ b/net/netfilter/ipvs/ip_vs_wlc.c
@@ -35,7 +35,7 @@ ip_vs_wlc_schedule(struct ip_vs_service *svc, const struct sk_buff *skb,
struct ip_vs_iphdr *iph)
{
struct ip_vs_dest *dest, *least;
- unsigned int loh, doh;
+ int loh, doh;
IP_VS_DBG(6, "ip_vs_wlc_schedule(): Scheduling...\n");
@@ -71,8 +71,8 @@ ip_vs_wlc_schedule(struct ip_vs_service *svc, const struct sk_buff *skb,
if (dest->flags & IP_VS_DEST_F_OVERLOAD)
continue;
doh = ip_vs_dest_conn_overhead(dest);
- if (loh * atomic_read(&dest->weight) >
- doh * atomic_read(&least->weight)) {
+ if ((__s64)loh * atomic_read(&dest->weight) >
+ (__s64)doh * atomic_read(&least->weight)) {
least = dest;
loh = doh;
}
diff --git a/net/netfilter/ipvs/ip_vs_xmit.c b/net/netfilter/ipvs/ip_vs_xmit.c
index b75ff6429a04..c47444e4cf8c 100644
--- a/net/netfilter/ipvs/ip_vs_xmit.c
+++ b/net/netfilter/ipvs/ip_vs_xmit.c
@@ -883,7 +883,7 @@ ip_vs_tunnel_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
iph->daddr = cp->daddr.ip;
iph->saddr = saddr;
iph->ttl = old_iph->ttl;
- ip_select_ident(iph, &rt->dst, NULL);
+ ip_select_ident(skb, &rt->dst, NULL);
/* Another hack: avoid icmp_send in ip_fragment */
skb->local_df = 1;
diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
index 0283baedcdfb..5d892febd64c 100644
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -39,6 +39,7 @@
#include <net/netfilter/nf_conntrack_l4proto.h>
#include <net/netfilter/nf_conntrack_expect.h>
#include <net/netfilter/nf_conntrack_helper.h>
+#include <net/netfilter/nf_conntrack_seqadj.h>
#include <net/netfilter/nf_conntrack_core.h>
#include <net/netfilter/nf_conntrack_extend.h>
#include <net/netfilter/nf_conntrack_acct.h>
@@ -47,6 +48,7 @@
#include <net/netfilter/nf_conntrack_timestamp.h>
#include <net/netfilter/nf_conntrack_timeout.h>
#include <net/netfilter/nf_conntrack_labels.h>
+#include <net/netfilter/nf_conntrack_synproxy.h>
#include <net/netfilter/nf_nat.h>
#include <net/netfilter/nf_nat_core.h>
#include <net/netfilter/nf_nat_helper.h>
@@ -238,7 +240,7 @@ destroy_conntrack(struct nf_conntrack *nfct)
nf_conntrack_free(ct);
}
-void nf_ct_delete_from_lists(struct nf_conn *ct)
+static void nf_ct_delete_from_lists(struct nf_conn *ct)
{
struct net *net = nf_ct_net(ct);
@@ -253,7 +255,6 @@ void nf_ct_delete_from_lists(struct nf_conn *ct)
&net->ct.dying);
spin_unlock_bh(&nf_conntrack_lock);
}
-EXPORT_SYMBOL_GPL(nf_ct_delete_from_lists);
static void death_by_event(unsigned long ul_conntrack)
{
@@ -275,7 +276,7 @@ static void death_by_event(unsigned long ul_conntrack)
nf_ct_put(ct);
}
-void nf_ct_dying_timeout(struct nf_conn *ct)
+static void nf_ct_dying_timeout(struct nf_conn *ct)
{
struct net *net = nf_ct_net(ct);
struct nf_conntrack_ecache *ecache = nf_ct_ecache_find(ct);
@@ -288,27 +289,33 @@ void nf_ct_dying_timeout(struct nf_conn *ct)
(prandom_u32() % net->ct.sysctl_events_retry_timeout);
add_timer(&ecache->timeout);
}
-EXPORT_SYMBOL_GPL(nf_ct_dying_timeout);
-static void death_by_timeout(unsigned long ul_conntrack)
+bool nf_ct_delete(struct nf_conn *ct, u32 portid, int report)
{
- struct nf_conn *ct = (void *)ul_conntrack;
struct nf_conn_tstamp *tstamp;
tstamp = nf_conn_tstamp_find(ct);
if (tstamp && tstamp->stop == 0)
tstamp->stop = ktime_to_ns(ktime_get_real());
- if (!test_bit(IPS_DYING_BIT, &ct->status) &&
- unlikely(nf_conntrack_event(IPCT_DESTROY, ct) < 0)) {
+ if (!nf_ct_is_dying(ct) &&
+ unlikely(nf_conntrack_event_report(IPCT_DESTROY, ct,
+ portid, report) < 0)) {
/* destroy event was not delivered */
nf_ct_delete_from_lists(ct);
nf_ct_dying_timeout(ct);
- return;
+ return false;
}
set_bit(IPS_DYING_BIT, &ct->status);
nf_ct_delete_from_lists(ct);
nf_ct_put(ct);
+ return true;
+}
+EXPORT_SYMBOL_GPL(nf_ct_delete);
+
+static void death_by_timeout(unsigned long ul_conntrack)
+{
+ nf_ct_delete((struct nf_conn *)ul_conntrack, 0, 0);
}
/*
@@ -643,10 +650,7 @@ static noinline int early_drop(struct net *net, unsigned int hash)
return dropped;
if (del_timer(&ct->timeout)) {
- death_by_timeout((unsigned long)ct);
- /* Check if we indeed killed this entry. Reliable event
- delivery may have inserted it into the dying list. */
- if (test_bit(IPS_DYING_BIT, &ct->status)) {
+ if (nf_ct_delete(ct, 0, 0)) {
dropped = 1;
NF_CT_STAT_INC_ATOMIC(net, early_drop);
}
@@ -796,6 +800,11 @@ init_conntrack(struct net *net, struct nf_conn *tmpl,
if (IS_ERR(ct))
return (struct nf_conntrack_tuple_hash *)ct;
+ if (tmpl && nfct_synproxy(tmpl)) {
+ nfct_seqadj_ext_add(ct);
+ nfct_synproxy_ext_add(ct);
+ }
+
timeout_ext = tmpl ? nf_ct_timeout_find(tmpl) : NULL;
if (timeout_ext)
timeouts = NF_CT_TIMEOUT_EXT_DATA(timeout_ext);
@@ -1192,7 +1201,7 @@ EXPORT_SYMBOL_GPL(nf_ct_port_nlattr_tuple_size);
#endif
/* Used by ipt_REJECT and ip6t_REJECT. */
-static void nf_conntrack_attach(struct sk_buff *nskb, struct sk_buff *skb)
+static void nf_conntrack_attach(struct sk_buff *nskb, const struct sk_buff *skb)
{
struct nf_conn *ct;
enum ip_conntrack_info ctinfo;
@@ -1244,7 +1253,7 @@ found:
void nf_ct_iterate_cleanup(struct net *net,
int (*iter)(struct nf_conn *i, void *data),
- void *data)
+ void *data, u32 portid, int report)
{
struct nf_conn *ct;
unsigned int bucket = 0;
@@ -1252,7 +1261,8 @@ void nf_ct_iterate_cleanup(struct net *net,
while ((ct = get_next_corpse(net, iter, data, &bucket)) != NULL) {
/* Time to push up daises... */
if (del_timer(&ct->timeout))
- death_by_timeout((unsigned long)ct);
+ nf_ct_delete(ct, portid, report);
+
/* ... else the timer will get him soon. */
nf_ct_put(ct);
@@ -1260,30 +1270,6 @@ void nf_ct_iterate_cleanup(struct net *net,
}
EXPORT_SYMBOL_GPL(nf_ct_iterate_cleanup);
-struct __nf_ct_flush_report {
- u32 portid;
- int report;
-};
-
-static int kill_report(struct nf_conn *i, void *data)
-{
- struct __nf_ct_flush_report *fr = (struct __nf_ct_flush_report *)data;
- struct nf_conn_tstamp *tstamp;
-
- tstamp = nf_conn_tstamp_find(i);
- if (tstamp && tstamp->stop == 0)
- tstamp->stop = ktime_to_ns(ktime_get_real());
-
- /* If we fail to deliver the event, death_by_timeout() will retry */
- if (nf_conntrack_event_report(IPCT_DESTROY, i,
- fr->portid, fr->report) < 0)
- return 1;
-
- /* Avoid the delivery of the destroy event in death_by_timeout(). */
- set_bit(IPS_DYING_BIT, &i->status);
- return 1;
-}
-
static int kill_all(struct nf_conn *i, void *data)
{
return 1;
@@ -1301,11 +1287,7 @@ EXPORT_SYMBOL_GPL(nf_ct_free_hashtable);
void nf_conntrack_flush_report(struct net *net, u32 portid, int report)
{
- struct __nf_ct_flush_report fr = {
- .portid = portid,
- .report = report,
- };
- nf_ct_iterate_cleanup(net, kill_report, &fr);
+ nf_ct_iterate_cleanup(net, kill_all, NULL, portid, report);
}
EXPORT_SYMBOL_GPL(nf_conntrack_flush_report);
@@ -1351,6 +1333,7 @@ void nf_conntrack_cleanup_end(void)
nf_ct_extend_unregister(&nf_ct_zone_extend);
#endif
nf_conntrack_proto_fini();
+ nf_conntrack_seqadj_fini();
nf_conntrack_labels_fini();
nf_conntrack_helper_fini();
nf_conntrack_timeout_fini();
@@ -1386,7 +1369,7 @@ void nf_conntrack_cleanup_net_list(struct list_head *net_exit_list)
i_see_dead_people:
busy = 0;
list_for_each_entry(net, net_exit_list, exit_list) {
- nf_ct_iterate_cleanup(net, kill_all, NULL);
+ nf_ct_iterate_cleanup(net, kill_all, NULL, 0, 0);
nf_ct_release_dying_list(net);
if (atomic_read(&net->ct.count) != 0)
busy = 1;
@@ -1556,6 +1539,10 @@ int nf_conntrack_init_start(void)
if (ret < 0)
goto err_labels;
+ ret = nf_conntrack_seqadj_init();
+ if (ret < 0)
+ goto err_seqadj;
+
#ifdef CONFIG_NF_CONNTRACK_ZONES
ret = nf_ct_extend_register(&nf_ct_zone_extend);
if (ret < 0)
@@ -1580,6 +1567,8 @@ err_proto:
nf_ct_extend_unregister(&nf_ct_zone_extend);
err_extend:
#endif
+ nf_conntrack_seqadj_fini();
+err_seqadj:
nf_conntrack_labels_fini();
err_labels:
nf_conntrack_helper_fini();
@@ -1602,9 +1591,6 @@ void nf_conntrack_init_end(void)
/* For use by REJECT target */
RCU_INIT_POINTER(ip_ct_attach, nf_conntrack_attach);
RCU_INIT_POINTER(nf_ct_destroy, destroy_conntrack);
-
- /* Howto get NAT offsets */
- RCU_INIT_POINTER(nf_ct_nat_offset, NULL);
}
/*
@@ -1691,8 +1677,3 @@ err_slabname:
err_stat:
return ret;
}
-
-s16 (*nf_ct_nat_offset)(const struct nf_conn *ct,
- enum ip_conntrack_dir dir,
- u32 seq);
-EXPORT_SYMBOL_GPL(nf_ct_nat_offset);
diff --git a/net/netfilter/nf_conntrack_h323_main.c b/net/netfilter/nf_conntrack_h323_main.c
index bdebd03bc8cd..70866d192efc 100644
--- a/net/netfilter/nf_conntrack_h323_main.c
+++ b/net/netfilter/nf_conntrack_h323_main.c
@@ -778,8 +778,8 @@ static int callforward_do_filter(const union nf_inet_addr *src,
flowi6_to_flowi(&fl1), false)) {
if (!afinfo->route(&init_net, (struct dst_entry **)&rt2,
flowi6_to_flowi(&fl2), false)) {
- if (!memcmp(&rt1->rt6i_gateway, &rt2->rt6i_gateway,
- sizeof(rt1->rt6i_gateway)) &&
+ if (ipv6_addr_equal(rt6_nexthop(rt1),
+ rt6_nexthop(rt2)) &&
rt1->dst.dev == rt2->dst.dev)
ret = 1;
dst_release(&rt2->dst);
diff --git a/net/netfilter/nf_conntrack_labels.c b/net/netfilter/nf_conntrack_labels.c
index 355d2ef08094..bb53f120e79c 100644
--- a/net/netfilter/nf_conntrack_labels.c
+++ b/net/netfilter/nf_conntrack_labels.c
@@ -8,12 +8,8 @@
* published by the Free Software Foundation.
*/
-#include <linux/ctype.h>
#include <linux/export.h>
-#include <linux/jhash.h>
-#include <linux/spinlock.h>
#include <linux/types.h>
-#include <linux/slab.h>
#include <net/netfilter/nf_conntrack_ecache.h>
#include <net/netfilter/nf_conntrack_labels.h>
diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c
index edc410e778f7..eea936b70d15 100644
--- a/net/netfilter/nf_conntrack_netlink.c
+++ b/net/netfilter/nf_conntrack_netlink.c
@@ -37,6 +37,7 @@
#include <net/netfilter/nf_conntrack_core.h>
#include <net/netfilter/nf_conntrack_expect.h>
#include <net/netfilter/nf_conntrack_helper.h>
+#include <net/netfilter/nf_conntrack_seqadj.h>
#include <net/netfilter/nf_conntrack_l3proto.h>
#include <net/netfilter/nf_conntrack_l4proto.h>
#include <net/netfilter/nf_conntrack_tuple.h>
@@ -381,9 +382,8 @@ nla_put_failure:
return -1;
}
-#ifdef CONFIG_NF_NAT_NEEDED
static int
-dump_nat_seq_adj(struct sk_buff *skb, const struct nf_nat_seq *natseq, int type)
+dump_ct_seq_adj(struct sk_buff *skb, const struct nf_ct_seqadj *seq, int type)
{
struct nlattr *nest_parms;
@@ -391,12 +391,12 @@ dump_nat_seq_adj(struct sk_buff *skb, const struct nf_nat_seq *natseq, int type)
if (!nest_parms)
goto nla_put_failure;
- if (nla_put_be32(skb, CTA_NAT_SEQ_CORRECTION_POS,
- htonl(natseq->correction_pos)) ||
- nla_put_be32(skb, CTA_NAT_SEQ_OFFSET_BEFORE,
- htonl(natseq->offset_before)) ||
- nla_put_be32(skb, CTA_NAT_SEQ_OFFSET_AFTER,
- htonl(natseq->offset_after)))
+ if (nla_put_be32(skb, CTA_SEQADJ_CORRECTION_POS,
+ htonl(seq->correction_pos)) ||
+ nla_put_be32(skb, CTA_SEQADJ_OFFSET_BEFORE,
+ htonl(seq->offset_before)) ||
+ nla_put_be32(skb, CTA_SEQADJ_OFFSET_AFTER,
+ htonl(seq->offset_after)))
goto nla_put_failure;
nla_nest_end(skb, nest_parms);
@@ -408,27 +408,24 @@ nla_put_failure:
}
static inline int
-ctnetlink_dump_nat_seq_adj(struct sk_buff *skb, const struct nf_conn *ct)
+ctnetlink_dump_ct_seq_adj(struct sk_buff *skb, const struct nf_conn *ct)
{
- struct nf_nat_seq *natseq;
- struct nf_conn_nat *nat = nfct_nat(ct);
+ struct nf_conn_seqadj *seqadj = nfct_seqadj(ct);
+ struct nf_ct_seqadj *seq;
- if (!(ct->status & IPS_SEQ_ADJUST) || !nat)
+ if (!(ct->status & IPS_SEQ_ADJUST) || !seqadj)
return 0;
- natseq = &nat->seq[IP_CT_DIR_ORIGINAL];
- if (dump_nat_seq_adj(skb, natseq, CTA_NAT_SEQ_ADJ_ORIG) == -1)
+ seq = &seqadj->seq[IP_CT_DIR_ORIGINAL];
+ if (dump_ct_seq_adj(skb, seq, CTA_SEQ_ADJ_ORIG) == -1)
return -1;
- natseq = &nat->seq[IP_CT_DIR_REPLY];
- if (dump_nat_seq_adj(skb, natseq, CTA_NAT_SEQ_ADJ_REPLY) == -1)
+ seq = &seqadj->seq[IP_CT_DIR_REPLY];
+ if (dump_ct_seq_adj(skb, seq, CTA_SEQ_ADJ_REPLY) == -1)
return -1;
return 0;
}
-#else
-#define ctnetlink_dump_nat_seq_adj(a, b) (0)
-#endif
static inline int
ctnetlink_dump_id(struct sk_buff *skb, const struct nf_conn *ct)
@@ -502,7 +499,7 @@ ctnetlink_fill_info(struct sk_buff *skb, u32 portid, u32 seq, u32 type,
ctnetlink_dump_id(skb, ct) < 0 ||
ctnetlink_dump_use(skb, ct) < 0 ||
ctnetlink_dump_master(skb, ct) < 0 ||
- ctnetlink_dump_nat_seq_adj(skb, ct) < 0)
+ ctnetlink_dump_ct_seq_adj(skb, ct) < 0)
goto nla_put_failure;
nlmsg_end(skb, nlh);
@@ -707,8 +704,8 @@ ctnetlink_conntrack_event(unsigned int events, struct nf_ct_event *item)
ctnetlink_dump_master(skb, ct) < 0)
goto nla_put_failure;
- if (events & (1 << IPCT_NATSEQADJ) &&
- ctnetlink_dump_nat_seq_adj(skb, ct) < 0)
+ if (events & (1 << IPCT_SEQADJ) &&
+ ctnetlink_dump_ct_seq_adj(skb, ct) < 0)
goto nla_put_failure;
}
@@ -1038,21 +1035,9 @@ ctnetlink_del_conntrack(struct sock *ctnl, struct sk_buff *skb,
}
}
- if (del_timer(&ct->timeout)) {
- if (nf_conntrack_event_report(IPCT_DESTROY, ct,
- NETLINK_CB(skb).portid,
- nlmsg_report(nlh)) < 0) {
- nf_ct_delete_from_lists(ct);
- /* we failed to report the event, try later */
- nf_ct_dying_timeout(ct);
- nf_ct_put(ct);
- return 0;
- }
- /* death_by_timeout would report the event again */
- set_bit(IPS_DYING_BIT, &ct->status);
- nf_ct_delete_from_lists(ct);
- nf_ct_put(ct);
- }
+ if (del_timer(&ct->timeout))
+ nf_ct_delete(ct, NETLINK_CB(skb).portid, nlmsg_report(nlh));
+
nf_ct_put(ct);
return 0;
@@ -1451,66 +1436,65 @@ ctnetlink_change_protoinfo(struct nf_conn *ct, const struct nlattr * const cda[]
return err;
}
-#ifdef CONFIG_NF_NAT_NEEDED
-static const struct nla_policy nat_seq_policy[CTA_NAT_SEQ_MAX+1] = {
- [CTA_NAT_SEQ_CORRECTION_POS] = { .type = NLA_U32 },
- [CTA_NAT_SEQ_OFFSET_BEFORE] = { .type = NLA_U32 },
- [CTA_NAT_SEQ_OFFSET_AFTER] = { .type = NLA_U32 },
+static const struct nla_policy seqadj_policy[CTA_SEQADJ_MAX+1] = {
+ [CTA_SEQADJ_CORRECTION_POS] = { .type = NLA_U32 },
+ [CTA_SEQADJ_OFFSET_BEFORE] = { .type = NLA_U32 },
+ [CTA_SEQADJ_OFFSET_AFTER] = { .type = NLA_U32 },
};
static inline int
-change_nat_seq_adj(struct nf_nat_seq *natseq, const struct nlattr * const attr)
+change_seq_adj(struct nf_ct_seqadj *seq, const struct nlattr * const attr)
{
int err;
- struct nlattr *cda[CTA_NAT_SEQ_MAX+1];
+ struct nlattr *cda[CTA_SEQADJ_MAX+1];
- err = nla_parse_nested(cda, CTA_NAT_SEQ_MAX, attr, nat_seq_policy);
+ err = nla_parse_nested(cda, CTA_SEQADJ_MAX, attr, seqadj_policy);
if (err < 0)
return err;
- if (!cda[CTA_NAT_SEQ_CORRECTION_POS])
+ if (!cda[CTA_SEQADJ_CORRECTION_POS])
return -EINVAL;
- natseq->correction_pos =
- ntohl(nla_get_be32(cda[CTA_NAT_SEQ_CORRECTION_POS]));
+ seq->correction_pos =
+ ntohl(nla_get_be32(cda[CTA_SEQADJ_CORRECTION_POS]));
- if (!cda[CTA_NAT_SEQ_OFFSET_BEFORE])
+ if (!cda[CTA_SEQADJ_OFFSET_BEFORE])
return -EINVAL;
- natseq->offset_before =
- ntohl(nla_get_be32(cda[CTA_NAT_SEQ_OFFSET_BEFORE]));
+ seq->offset_before =
+ ntohl(nla_get_be32(cda[CTA_SEQADJ_OFFSET_BEFORE]));
- if (!cda[CTA_NAT_SEQ_OFFSET_AFTER])
+ if (!cda[CTA_SEQADJ_OFFSET_AFTER])
return -EINVAL;
- natseq->offset_after =
- ntohl(nla_get_be32(cda[CTA_NAT_SEQ_OFFSET_AFTER]));
+ seq->offset_after =
+ ntohl(nla_get_be32(cda[CTA_SEQADJ_OFFSET_AFTER]));
return 0;
}
static int
-ctnetlink_change_nat_seq_adj(struct nf_conn *ct,
- const struct nlattr * const cda[])
+ctnetlink_change_seq_adj(struct nf_conn *ct,
+ const struct nlattr * const cda[])
{
+ struct nf_conn_seqadj *seqadj = nfct_seqadj(ct);
int ret = 0;
- struct nf_conn_nat *nat = nfct_nat(ct);
- if (!nat)
+ if (!seqadj)
return 0;
- if (cda[CTA_NAT_SEQ_ADJ_ORIG]) {
- ret = change_nat_seq_adj(&nat->seq[IP_CT_DIR_ORIGINAL],
- cda[CTA_NAT_SEQ_ADJ_ORIG]);
+ if (cda[CTA_SEQ_ADJ_ORIG]) {
+ ret = change_seq_adj(&seqadj->seq[IP_CT_DIR_ORIGINAL],
+ cda[CTA_SEQ_ADJ_ORIG]);
if (ret < 0)
return ret;
ct->status |= IPS_SEQ_ADJUST;
}
- if (cda[CTA_NAT_SEQ_ADJ_REPLY]) {
- ret = change_nat_seq_adj(&nat->seq[IP_CT_DIR_REPLY],
- cda[CTA_NAT_SEQ_ADJ_REPLY]);
+ if (cda[CTA_SEQ_ADJ_REPLY]) {
+ ret = change_seq_adj(&seqadj->seq[IP_CT_DIR_REPLY],
+ cda[CTA_SEQ_ADJ_REPLY]);
if (ret < 0)
return ret;
@@ -1519,7 +1503,6 @@ ctnetlink_change_nat_seq_adj(struct nf_conn *ct,
return 0;
}
-#endif
static int
ctnetlink_attach_labels(struct nf_conn *ct, const struct nlattr * const cda[])
@@ -1585,13 +1568,12 @@ ctnetlink_change_conntrack(struct nf_conn *ct,
ct->mark = ntohl(nla_get_be32(cda[CTA_MARK]));
#endif
-#ifdef CONFIG_NF_NAT_NEEDED
- if (cda[CTA_NAT_SEQ_ADJ_ORIG] || cda[CTA_NAT_SEQ_ADJ_REPLY]) {
- err = ctnetlink_change_nat_seq_adj(ct, cda);
+ if (cda[CTA_SEQ_ADJ_ORIG] || cda[CTA_SEQ_ADJ_REPLY]) {
+ err = ctnetlink_change_seq_adj(ct, cda);
if (err < 0)
return err;
}
-#endif
+
if (cda[CTA_LABELS]) {
err = ctnetlink_attach_labels(ct, cda);
if (err < 0)
@@ -1696,13 +1678,11 @@ ctnetlink_create_conntrack(struct net *net, u16 zone,
goto err2;
}
-#ifdef CONFIG_NF_NAT_NEEDED
- if (cda[CTA_NAT_SEQ_ADJ_ORIG] || cda[CTA_NAT_SEQ_ADJ_REPLY]) {
- err = ctnetlink_change_nat_seq_adj(ct, cda);
+ if (cda[CTA_SEQ_ADJ_ORIG] || cda[CTA_SEQ_ADJ_REPLY]) {
+ err = ctnetlink_change_seq_adj(ct, cda);
if (err < 0)
goto err2;
}
-#endif
memset(&ct->proto, 0, sizeof(ct->proto));
if (cda[CTA_PROTOINFO]) {
@@ -1816,7 +1796,7 @@ ctnetlink_new_conntrack(struct sock *ctnl, struct sk_buff *skb,
(1 << IPCT_ASSURED) |
(1 << IPCT_HELPER) |
(1 << IPCT_PROTOINFO) |
- (1 << IPCT_NATSEQADJ) |
+ (1 << IPCT_SEQADJ) |
(1 << IPCT_MARK) | events,
ct, NETLINK_CB(skb).portid,
nlmsg_report(nlh));
@@ -1839,7 +1819,7 @@ ctnetlink_new_conntrack(struct sock *ctnl, struct sk_buff *skb,
(1 << IPCT_HELPER) |
(1 << IPCT_LABEL) |
(1 << IPCT_PROTOINFO) |
- (1 << IPCT_NATSEQADJ) |
+ (1 << IPCT_SEQADJ) |
(1 << IPCT_MARK),
ct, NETLINK_CB(skb).portid,
nlmsg_report(nlh));
@@ -1999,6 +1979,27 @@ out:
return err == -EAGAIN ? -ENOBUFS : err;
}
+static const struct nla_policy exp_nla_policy[CTA_EXPECT_MAX+1] = {
+ [CTA_EXPECT_MASTER] = { .type = NLA_NESTED },
+ [CTA_EXPECT_TUPLE] = { .type = NLA_NESTED },
+ [CTA_EXPECT_MASK] = { .type = NLA_NESTED },
+ [CTA_EXPECT_TIMEOUT] = { .type = NLA_U32 },
+ [CTA_EXPECT_ID] = { .type = NLA_U32 },
+ [CTA_EXPECT_HELP_NAME] = { .type = NLA_NUL_STRING,
+ .len = NF_CT_HELPER_NAME_LEN - 1 },
+ [CTA_EXPECT_ZONE] = { .type = NLA_U16 },
+ [CTA_EXPECT_FLAGS] = { .type = NLA_U32 },
+ [CTA_EXPECT_CLASS] = { .type = NLA_U32 },
+ [CTA_EXPECT_NAT] = { .type = NLA_NESTED },
+ [CTA_EXPECT_FN] = { .type = NLA_NUL_STRING },
+};
+
+static struct nf_conntrack_expect *
+ctnetlink_alloc_expect(const struct nlattr *const cda[], struct nf_conn *ct,
+ struct nf_conntrack_helper *helper,
+ struct nf_conntrack_tuple *tuple,
+ struct nf_conntrack_tuple *mask);
+
#ifdef CONFIG_NETFILTER_NETLINK_QUEUE_CT
static size_t
ctnetlink_nfqueue_build_size(const struct nf_conn *ct)
@@ -2073,7 +2074,7 @@ ctnetlink_nfqueue_build(struct sk_buff *skb, struct nf_conn *ct)
goto nla_put_failure;
if ((ct->status & IPS_SEQ_ADJUST) &&
- ctnetlink_dump_nat_seq_adj(skb, ct) < 0)
+ ctnetlink_dump_ct_seq_adj(skb, ct) < 0)
goto nla_put_failure;
#ifdef CONFIG_NF_CONNTRACK_MARK
@@ -2139,10 +2140,70 @@ ctnetlink_nfqueue_parse(const struct nlattr *attr, struct nf_conn *ct)
return ret;
}
+static int ctnetlink_nfqueue_exp_parse(const struct nlattr * const *cda,
+ const struct nf_conn *ct,
+ struct nf_conntrack_tuple *tuple,
+ struct nf_conntrack_tuple *mask)
+{
+ int err;
+
+ err = ctnetlink_parse_tuple(cda, tuple, CTA_EXPECT_TUPLE,
+ nf_ct_l3num(ct));
+ if (err < 0)
+ return err;
+
+ return ctnetlink_parse_tuple(cda, mask, CTA_EXPECT_MASK,
+ nf_ct_l3num(ct));
+}
+
+static int
+ctnetlink_nfqueue_attach_expect(const struct nlattr *attr, struct nf_conn *ct,
+ u32 portid, u32 report)
+{
+ struct nlattr *cda[CTA_EXPECT_MAX+1];
+ struct nf_conntrack_tuple tuple, mask;
+ struct nf_conntrack_helper *helper = NULL;
+ struct nf_conntrack_expect *exp;
+ int err;
+
+ err = nla_parse_nested(cda, CTA_EXPECT_MAX, attr, exp_nla_policy);
+ if (err < 0)
+ return err;
+
+ err = ctnetlink_nfqueue_exp_parse((const struct nlattr * const *)cda,
+ ct, &tuple, &mask);
+ if (err < 0)
+ return err;
+
+ if (cda[CTA_EXPECT_HELP_NAME]) {
+ const char *helpname = nla_data(cda[CTA_EXPECT_HELP_NAME]);
+
+ helper = __nf_conntrack_helper_find(helpname, nf_ct_l3num(ct),
+ nf_ct_protonum(ct));
+ if (helper == NULL)
+ return -EOPNOTSUPP;
+ }
+
+ exp = ctnetlink_alloc_expect((const struct nlattr * const *)cda, ct,
+ helper, &tuple, &mask);
+ if (IS_ERR(exp))
+ return PTR_ERR(exp);
+
+ err = nf_ct_expect_related_report(exp, portid, report);
+ if (err < 0) {
+ nf_ct_expect_put(exp);
+ return err;
+ }
+
+ return 0;
+}
+
static struct nfq_ct_hook ctnetlink_nfqueue_hook = {
.build_size = ctnetlink_nfqueue_build_size,
.build = ctnetlink_nfqueue_build,
.parse = ctnetlink_nfqueue_parse,
+ .attach_expect = ctnetlink_nfqueue_attach_expect,
+ .seq_adjust = nf_ct_tcp_seqadj_set,
};
#endif /* CONFIG_NETFILTER_NETLINK_QUEUE_CT */
@@ -2510,21 +2571,6 @@ static int ctnetlink_dump_exp_ct(struct sock *ctnl, struct sk_buff *skb,
return err;
}
-static const struct nla_policy exp_nla_policy[CTA_EXPECT_MAX+1] = {
- [CTA_EXPECT_MASTER] = { .type = NLA_NESTED },
- [CTA_EXPECT_TUPLE] = { .type = NLA_NESTED },
- [CTA_EXPECT_MASK] = { .type = NLA_NESTED },
- [CTA_EXPECT_TIMEOUT] = { .type = NLA_U32 },
- [CTA_EXPECT_ID] = { .type = NLA_U32 },
- [CTA_EXPECT_HELP_NAME] = { .type = NLA_NUL_STRING,
- .len = NF_CT_HELPER_NAME_LEN - 1 },
- [CTA_EXPECT_ZONE] = { .type = NLA_U16 },
- [CTA_EXPECT_FLAGS] = { .type = NLA_U32 },
- [CTA_EXPECT_CLASS] = { .type = NLA_U32 },
- [CTA_EXPECT_NAT] = { .type = NLA_NESTED },
- [CTA_EXPECT_FN] = { .type = NLA_NUL_STRING },
-};
-
static int
ctnetlink_get_expect(struct sock *ctnl, struct sk_buff *skb,
const struct nlmsghdr *nlh,
@@ -2747,76 +2793,26 @@ ctnetlink_parse_expect_nat(const struct nlattr *attr,
#endif
}
-static int
-ctnetlink_create_expect(struct net *net, u16 zone,
- const struct nlattr * const cda[],
- u_int8_t u3,
- u32 portid, int report)
+static struct nf_conntrack_expect *
+ctnetlink_alloc_expect(const struct nlattr * const cda[], struct nf_conn *ct,
+ struct nf_conntrack_helper *helper,
+ struct nf_conntrack_tuple *tuple,
+ struct nf_conntrack_tuple *mask)
{
- struct nf_conntrack_tuple tuple, mask, master_tuple;
- struct nf_conntrack_tuple_hash *h = NULL;
+ u_int32_t class = 0;
struct nf_conntrack_expect *exp;
- struct nf_conn *ct;
struct nf_conn_help *help;
- struct nf_conntrack_helper *helper = NULL;
- u_int32_t class = 0;
- int err = 0;
-
- /* caller guarantees that those three CTA_EXPECT_* exist */
- err = ctnetlink_parse_tuple(cda, &tuple, CTA_EXPECT_TUPLE, u3);
- if (err < 0)
- return err;
- err = ctnetlink_parse_tuple(cda, &mask, CTA_EXPECT_MASK, u3);
- if (err < 0)
- return err;
- err = ctnetlink_parse_tuple(cda, &master_tuple, CTA_EXPECT_MASTER, u3);
- if (err < 0)
- return err;
-
- /* Look for master conntrack of this expectation */
- h = nf_conntrack_find_get(net, zone, &master_tuple);
- if (!h)
- return -ENOENT;
- ct = nf_ct_tuplehash_to_ctrack(h);
-
- /* Look for helper of this expectation */
- if (cda[CTA_EXPECT_HELP_NAME]) {
- const char *helpname = nla_data(cda[CTA_EXPECT_HELP_NAME]);
-
- helper = __nf_conntrack_helper_find(helpname, nf_ct_l3num(ct),
- nf_ct_protonum(ct));
- if (helper == NULL) {
-#ifdef CONFIG_MODULES
- if (request_module("nfct-helper-%s", helpname) < 0) {
- err = -EOPNOTSUPP;
- goto out;
- }
-
- helper = __nf_conntrack_helper_find(helpname,
- nf_ct_l3num(ct),
- nf_ct_protonum(ct));
- if (helper) {
- err = -EAGAIN;
- goto out;
- }
-#endif
- err = -EOPNOTSUPP;
- goto out;
- }
- }
+ int err;
if (cda[CTA_EXPECT_CLASS] && helper) {
class = ntohl(nla_get_be32(cda[CTA_EXPECT_CLASS]));
- if (class > helper->expect_class_max) {
- err = -EINVAL;
- goto out;
- }
+ if (class > helper->expect_class_max)
+ return ERR_PTR(-EINVAL);
}
exp = nf_ct_expect_alloc(ct);
- if (!exp) {
- err = -ENOMEM;
- goto out;
- }
+ if (!exp)
+ return ERR_PTR(-ENOMEM);
+
help = nfct_help(ct);
if (!help) {
if (!cda[CTA_EXPECT_TIMEOUT]) {
@@ -2854,21 +2850,89 @@ ctnetlink_create_expect(struct net *net, u16 zone,
exp->class = class;
exp->master = ct;
exp->helper = helper;
- memcpy(&exp->tuple, &tuple, sizeof(struct nf_conntrack_tuple));
- memcpy(&exp->mask.src.u3, &mask.src.u3, sizeof(exp->mask.src.u3));
- exp->mask.src.u.all = mask.src.u.all;
+ exp->tuple = *tuple;
+ exp->mask.src.u3 = mask->src.u3;
+ exp->mask.src.u.all = mask->src.u.all;
if (cda[CTA_EXPECT_NAT]) {
err = ctnetlink_parse_expect_nat(cda[CTA_EXPECT_NAT],
- exp, u3);
+ exp, nf_ct_l3num(ct));
if (err < 0)
goto err_out;
}
- err = nf_ct_expect_related_report(exp, portid, report);
+ return exp;
err_out:
nf_ct_expect_put(exp);
-out:
- nf_ct_put(nf_ct_tuplehash_to_ctrack(h));
+ return ERR_PTR(err);
+}
+
+static int
+ctnetlink_create_expect(struct net *net, u16 zone,
+ const struct nlattr * const cda[],
+ u_int8_t u3, u32 portid, int report)
+{
+ struct nf_conntrack_tuple tuple, mask, master_tuple;
+ struct nf_conntrack_tuple_hash *h = NULL;
+ struct nf_conntrack_helper *helper = NULL;
+ struct nf_conntrack_expect *exp;
+ struct nf_conn *ct;
+ int err;
+
+ /* caller guarantees that those three CTA_EXPECT_* exist */
+ err = ctnetlink_parse_tuple(cda, &tuple, CTA_EXPECT_TUPLE, u3);
+ if (err < 0)
+ return err;
+ err = ctnetlink_parse_tuple(cda, &mask, CTA_EXPECT_MASK, u3);
+ if (err < 0)
+ return err;
+ err = ctnetlink_parse_tuple(cda, &master_tuple, CTA_EXPECT_MASTER, u3);
+ if (err < 0)
+ return err;
+
+ /* Look for master conntrack of this expectation */
+ h = nf_conntrack_find_get(net, zone, &master_tuple);
+ if (!h)
+ return -ENOENT;
+ ct = nf_ct_tuplehash_to_ctrack(h);
+
+ if (cda[CTA_EXPECT_HELP_NAME]) {
+ const char *helpname = nla_data(cda[CTA_EXPECT_HELP_NAME]);
+
+ helper = __nf_conntrack_helper_find(helpname, u3,
+ nf_ct_protonum(ct));
+ if (helper == NULL) {
+#ifdef CONFIG_MODULES
+ if (request_module("nfct-helper-%s", helpname) < 0) {
+ err = -EOPNOTSUPP;
+ goto err_ct;
+ }
+ helper = __nf_conntrack_helper_find(helpname, u3,
+ nf_ct_protonum(ct));
+ if (helper) {
+ err = -EAGAIN;
+ goto err_ct;
+ }
+#endif
+ err = -EOPNOTSUPP;
+ goto err_ct;
+ }
+ }
+
+ exp = ctnetlink_alloc_expect(cda, ct, helper, &tuple, &mask);
+ if (IS_ERR(exp)) {
+ err = PTR_ERR(exp);
+ goto err_ct;
+ }
+
+ err = nf_ct_expect_related_report(exp, portid, report);
+ if (err < 0)
+ goto err_exp;
+
+ return 0;
+err_exp:
+ nf_ct_expect_put(exp);
+err_ct:
+ nf_ct_put(ct);
return err;
}
diff --git a/net/netfilter/nf_conntrack_proto.c b/net/netfilter/nf_conntrack_proto.c
index 0ab9636ac57e..ce3004156eeb 100644
--- a/net/netfilter/nf_conntrack_proto.c
+++ b/net/netfilter/nf_conntrack_proto.c
@@ -281,7 +281,7 @@ void nf_ct_l3proto_pernet_unregister(struct net *net,
nf_ct_l3proto_unregister_sysctl(net, proto);
/* Remove all contrack entries for this protocol */
- nf_ct_iterate_cleanup(net, kill_l3proto, proto);
+ nf_ct_iterate_cleanup(net, kill_l3proto, proto, 0, 0);
}
EXPORT_SYMBOL_GPL(nf_ct_l3proto_pernet_unregister);
@@ -476,7 +476,7 @@ void nf_ct_l4proto_pernet_unregister(struct net *net,
nf_ct_l4proto_unregister_sysctl(net, pn, l4proto);
/* Remove all contrack entries for this protocol */
- nf_ct_iterate_cleanup(net, kill_l4proto, l4proto);
+ nf_ct_iterate_cleanup(net, kill_l4proto, l4proto, 0, 0);
}
EXPORT_SYMBOL_GPL(nf_ct_l4proto_pernet_unregister);
diff --git a/net/netfilter/nf_conntrack_proto_tcp.c b/net/netfilter/nf_conntrack_proto_tcp.c
index 2f8010707d01..44d1ea32570a 100644
--- a/net/netfilter/nf_conntrack_proto_tcp.c
+++ b/net/netfilter/nf_conntrack_proto_tcp.c
@@ -27,6 +27,8 @@
#include <net/netfilter/nf_conntrack.h>
#include <net/netfilter/nf_conntrack_l4proto.h>
#include <net/netfilter/nf_conntrack_ecache.h>
+#include <net/netfilter/nf_conntrack_seqadj.h>
+#include <net/netfilter/nf_conntrack_synproxy.h>
#include <net/netfilter/nf_log.h>
#include <net/netfilter/ipv4/nf_conntrack_ipv4.h>
#include <net/netfilter/ipv6/nf_conntrack_ipv6.h>
@@ -495,21 +497,6 @@ static void tcp_sack(const struct sk_buff *skb, unsigned int dataoff,
}
}
-#ifdef CONFIG_NF_NAT_NEEDED
-static inline s16 nat_offset(const struct nf_conn *ct,
- enum ip_conntrack_dir dir,
- u32 seq)
-{
- typeof(nf_ct_nat_offset) get_offset = rcu_dereference(nf_ct_nat_offset);
-
- return get_offset != NULL ? get_offset(ct, dir, seq) : 0;
-}
-#define NAT_OFFSET(ct, dir, seq) \
- (nat_offset(ct, dir, seq))
-#else
-#define NAT_OFFSET(ct, dir, seq) 0
-#endif
-
static bool tcp_in_window(const struct nf_conn *ct,
struct ip_ct_tcp *state,
enum ip_conntrack_dir dir,
@@ -525,7 +512,7 @@ static bool tcp_in_window(const struct nf_conn *ct,
struct ip_ct_tcp_state *receiver = &state->seen[!dir];
const struct nf_conntrack_tuple *tuple = &ct->tuplehash[dir].tuple;
__u32 seq, ack, sack, end, win, swin;
- s16 receiver_offset;
+ s32 receiver_offset;
bool res, in_recv_win;
/*
@@ -540,7 +527,7 @@ static bool tcp_in_window(const struct nf_conn *ct,
tcp_sack(skb, dataoff, tcph, &sack);
/* Take into account NAT sequence number mangling */
- receiver_offset = NAT_OFFSET(ct, !dir, ack - 1);
+ receiver_offset = nf_ct_seq_offset(ct, !dir, ack - 1);
ack -= receiver_offset;
sack -= receiver_offset;
@@ -960,6 +947,21 @@ static int tcp_packet(struct nf_conn *ct,
"state %s ", tcp_conntrack_names[old_state]);
return NF_ACCEPT;
case TCP_CONNTRACK_MAX:
+ /* Special case for SYN proxy: when the SYN to the server or
+ * the SYN/ACK from the server is lost, the client may transmit
+ * a keep-alive packet while in SYN_SENT state. This needs to
+ * be associated with the original conntrack entry in order to
+ * generate a new SYN with the correct sequence number.
+ */
+ if (nfct_synproxy(ct) && old_state == TCP_CONNTRACK_SYN_SENT &&
+ index == TCP_ACK_SET && dir == IP_CT_DIR_ORIGINAL &&
+ ct->proto.tcp.last_dir == IP_CT_DIR_ORIGINAL &&
+ ct->proto.tcp.seen[dir].td_end - 1 == ntohl(th->seq)) {
+ pr_debug("nf_ct_tcp: SYN proxy client keep alive\n");
+ spin_unlock_bh(&ct->lock);
+ return NF_ACCEPT;
+ }
+
/* Invalid packet */
pr_debug("nf_ct_tcp: Invalid dir=%i index=%u ostate=%u\n",
dir, get_conntrack_index(th), old_state);
diff --git a/net/netfilter/nf_conntrack_seqadj.c b/net/netfilter/nf_conntrack_seqadj.c
new file mode 100644
index 000000000000..5f9bfd060dea
--- /dev/null
+++ b/net/netfilter/nf_conntrack_seqadj.c
@@ -0,0 +1,238 @@
+#include <linux/types.h>
+#include <linux/netfilter.h>
+#include <net/tcp.h>
+
+#include <net/netfilter/nf_conntrack.h>
+#include <net/netfilter/nf_conntrack_extend.h>
+#include <net/netfilter/nf_conntrack_seqadj.h>
+
+int nf_ct_seqadj_init(struct nf_conn *ct, enum ip_conntrack_info ctinfo,
+ s32 off)
+{
+ enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);
+ struct nf_conn_seqadj *seqadj;
+ struct nf_ct_seqadj *this_way;
+
+ if (off == 0)
+ return 0;
+
+ set_bit(IPS_SEQ_ADJUST_BIT, &ct->status);
+
+ seqadj = nfct_seqadj(ct);
+ this_way = &seqadj->seq[dir];
+ this_way->offset_before = off;
+ this_way->offset_after = off;
+ return 0;
+}
+EXPORT_SYMBOL_GPL(nf_ct_seqadj_init);
+
+int nf_ct_seqadj_set(struct nf_conn *ct, enum ip_conntrack_info ctinfo,
+ __be32 seq, s32 off)
+{
+ struct nf_conn_seqadj *seqadj = nfct_seqadj(ct);
+ enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);
+ struct nf_ct_seqadj *this_way;
+
+ if (off == 0)
+ return 0;
+
+ set_bit(IPS_SEQ_ADJUST_BIT, &ct->status);
+
+ spin_lock_bh(&ct->lock);
+ this_way = &seqadj->seq[dir];
+ if (this_way->offset_before == this_way->offset_after ||
+ before(this_way->correction_pos, seq)) {
+ this_way->correction_pos = seq;
+ this_way->offset_before = this_way->offset_after;
+ this_way->offset_after += off;
+ }
+ spin_unlock_bh(&ct->lock);
+ return 0;
+}
+EXPORT_SYMBOL_GPL(nf_ct_seqadj_set);
+
+void nf_ct_tcp_seqadj_set(struct sk_buff *skb,
+ struct nf_conn *ct, enum ip_conntrack_info ctinfo,
+ s32 off)
+{
+ const struct tcphdr *th;
+
+ if (nf_ct_protonum(ct) != IPPROTO_TCP)
+ return;
+
+ th = (struct tcphdr *)(skb_network_header(skb) + ip_hdrlen(skb));
+ nf_ct_seqadj_set(ct, ctinfo, th->seq, off);
+}
+EXPORT_SYMBOL_GPL(nf_ct_tcp_seqadj_set);
+
+/* Adjust one found SACK option including checksum correction */
+static void nf_ct_sack_block_adjust(struct sk_buff *skb,
+ struct tcphdr *tcph,
+ unsigned int sackoff,
+ unsigned int sackend,
+ struct nf_ct_seqadj *seq)
+{
+ while (sackoff < sackend) {
+ struct tcp_sack_block_wire *sack;
+ __be32 new_start_seq, new_end_seq;
+
+ sack = (void *)skb->data + sackoff;
+ if (after(ntohl(sack->start_seq) - seq->offset_before,
+ seq->correction_pos))
+ new_start_seq = htonl(ntohl(sack->start_seq) -
+ seq->offset_after);
+ else
+ new_start_seq = htonl(ntohl(sack->start_seq) -
+ seq->offset_before);
+
+ if (after(ntohl(sack->end_seq) - seq->offset_before,
+ seq->correction_pos))
+ new_end_seq = htonl(ntohl(sack->end_seq) -
+ seq->offset_after);
+ else
+ new_end_seq = htonl(ntohl(sack->end_seq) -
+ seq->offset_before);
+
+ pr_debug("sack_adjust: start_seq: %d->%d, end_seq: %d->%d\n",
+ ntohl(sack->start_seq), new_start_seq,
+ ntohl(sack->end_seq), new_end_seq);
+
+ inet_proto_csum_replace4(&tcph->check, skb,
+ sack->start_seq, new_start_seq, 0);
+ inet_proto_csum_replace4(&tcph->check, skb,
+ sack->end_seq, new_end_seq, 0);
+ sack->start_seq = new_start_seq;
+ sack->end_seq = new_end_seq;
+ sackoff += sizeof(*sack);
+ }
+}
+
+/* TCP SACK sequence number adjustment */
+static unsigned int nf_ct_sack_adjust(struct sk_buff *skb,
+ unsigned int protoff,
+ struct tcphdr *tcph,
+ struct nf_conn *ct,
+ enum ip_conntrack_info ctinfo)
+{
+ unsigned int dir, optoff, optend;
+ struct nf_conn_seqadj *seqadj = nfct_seqadj(ct);
+
+ optoff = protoff + sizeof(struct tcphdr);
+ optend = protoff + tcph->doff * 4;
+
+ if (!skb_make_writable(skb, optend))
+ return 0;
+
+ dir = CTINFO2DIR(ctinfo);
+
+ while (optoff < optend) {
+ /* Usually: option, length. */
+ unsigned char *op = skb->data + optoff;
+
+ switch (op[0]) {
+ case TCPOPT_EOL:
+ return 1;
+ case TCPOPT_NOP:
+ optoff++;
+ continue;
+ default:
+ /* no partial options */
+ if (optoff + 1 == optend ||
+ optoff + op[1] > optend ||
+ op[1] < 2)
+ return 0;
+ if (op[0] == TCPOPT_SACK &&
+ op[1] >= 2+TCPOLEN_SACK_PERBLOCK &&
+ ((op[1] - 2) % TCPOLEN_SACK_PERBLOCK) == 0)
+ nf_ct_sack_block_adjust(skb, tcph, optoff + 2,
+ optoff+op[1],
+ &seqadj->seq[!dir]);
+ optoff += op[1];
+ }
+ }
+ return 1;
+}
+
+/* TCP sequence number adjustment. Returns 1 on success, 0 on failure */
+int nf_ct_seq_adjust(struct sk_buff *skb,
+ struct nf_conn *ct, enum ip_conntrack_info ctinfo,
+ unsigned int protoff)
+{
+ enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);
+ struct tcphdr *tcph;
+ __be32 newseq, newack;
+ s32 seqoff, ackoff;
+ struct nf_conn_seqadj *seqadj = nfct_seqadj(ct);
+ struct nf_ct_seqadj *this_way, *other_way;
+ int res;
+
+ this_way = &seqadj->seq[dir];
+ other_way = &seqadj->seq[!dir];
+
+ if (!skb_make_writable(skb, protoff + sizeof(*tcph)))
+ return 0;
+
+ tcph = (void *)skb->data + protoff;
+ spin_lock_bh(&ct->lock);
+ if (after(ntohl(tcph->seq), this_way->correction_pos))
+ seqoff = this_way->offset_after;
+ else
+ seqoff = this_way->offset_before;
+
+ if (after(ntohl(tcph->ack_seq) - other_way->offset_before,
+ other_way->correction_pos))
+ ackoff = other_way->offset_after;
+ else
+ ackoff = other_way->offset_before;
+
+ newseq = htonl(ntohl(tcph->seq) + seqoff);
+ newack = htonl(ntohl(tcph->ack_seq) - ackoff);
+
+ inet_proto_csum_replace4(&tcph->check, skb, tcph->seq, newseq, 0);
+ inet_proto_csum_replace4(&tcph->check, skb, tcph->ack_seq, newack, 0);
+
+ pr_debug("Adjusting sequence number from %u->%u, ack from %u->%u\n",
+ ntohl(tcph->seq), ntohl(newseq), ntohl(tcph->ack_seq),
+ ntohl(newack));
+
+ tcph->seq = newseq;
+ tcph->ack_seq = newack;
+
+ res = nf_ct_sack_adjust(skb, protoff, tcph, ct, ctinfo);
+ spin_unlock_bh(&ct->lock);
+
+ return res;
+}
+EXPORT_SYMBOL_GPL(nf_ct_seq_adjust);
+
+s32 nf_ct_seq_offset(const struct nf_conn *ct,
+ enum ip_conntrack_dir dir,
+ u32 seq)
+{
+ struct nf_conn_seqadj *seqadj = nfct_seqadj(ct);
+ struct nf_ct_seqadj *this_way;
+
+ if (!seqadj)
+ return 0;
+
+ this_way = &seqadj->seq[dir];
+ return after(seq, this_way->correction_pos) ?
+ this_way->offset_after : this_way->offset_before;
+}
+EXPORT_SYMBOL_GPL(nf_ct_seq_offset);
+
+static struct nf_ct_ext_type nf_ct_seqadj_extend __read_mostly = {
+ .len = sizeof(struct nf_conn_seqadj),
+ .align = __alignof__(struct nf_conn_seqadj),
+ .id = NF_CT_EXT_SEQADJ,
+};
+
+int nf_conntrack_seqadj_init(void)
+{
+ return nf_ct_extend_register(&nf_ct_seqadj_extend);
+}
+
+void nf_conntrack_seqadj_fini(void)
+{
+ nf_ct_extend_unregister(&nf_ct_seqadj_extend);
+}
diff --git a/net/netfilter/nf_nat_core.c b/net/netfilter/nf_nat_core.c
index 038eee5c8f85..6f0f4f7f68a5 100644
--- a/net/netfilter/nf_nat_core.c
+++ b/net/netfilter/nf_nat_core.c
@@ -25,6 +25,7 @@
#include <net/netfilter/nf_nat_core.h>
#include <net/netfilter/nf_nat_helper.h>
#include <net/netfilter/nf_conntrack_helper.h>
+#include <net/netfilter/nf_conntrack_seqadj.h>
#include <net/netfilter/nf_conntrack_l3proto.h>
#include <net/netfilter/nf_conntrack_zones.h>
#include <linux/netfilter/nf_nat.h>
@@ -402,6 +403,9 @@ nf_nat_setup_info(struct nf_conn *ct,
ct->status |= IPS_SRC_NAT;
else
ct->status |= IPS_DST_NAT;
+
+ if (nfct_help(ct))
+ nfct_seqadj_ext_add(ct);
}
if (maniptype == NF_NAT_MANIP_SRC) {
@@ -497,7 +501,7 @@ static void nf_nat_l4proto_clean(u8 l3proto, u8 l4proto)
rtnl_lock();
for_each_net(net)
- nf_ct_iterate_cleanup(net, nf_nat_proto_remove, &clean);
+ nf_ct_iterate_cleanup(net, nf_nat_proto_remove, &clean, 0, 0);
rtnl_unlock();
}
@@ -511,7 +515,7 @@ static void nf_nat_l3proto_clean(u8 l3proto)
rtnl_lock();
for_each_net(net)
- nf_ct_iterate_cleanup(net, nf_nat_proto_remove, &clean);
+ nf_ct_iterate_cleanup(net, nf_nat_proto_remove, &clean, 0, 0);
rtnl_unlock();
}
@@ -749,7 +753,7 @@ static void __net_exit nf_nat_net_exit(struct net *net)
{
struct nf_nat_proto_clean clean = {};
- nf_ct_iterate_cleanup(net, &nf_nat_proto_remove, &clean);
+ nf_ct_iterate_cleanup(net, &nf_nat_proto_remove, &clean, 0, 0);
synchronize_rcu();
nf_ct_free_hashtable(net->ct.nat_bysource, net->ct.nat_htable_size);
}
@@ -764,10 +768,6 @@ static struct nf_ct_helper_expectfn follow_master_nat = {
.expectfn = nf_nat_follow_master,
};
-static struct nfq_ct_nat_hook nfq_ct_nat = {
- .seq_adjust = nf_nat_tcp_seq_adjust,
-};
-
static int __init nf_nat_init(void)
{
int ret;
@@ -787,14 +787,9 @@ static int __init nf_nat_init(void)
/* Initialize fake conntrack so that NAT will skip it */
nf_ct_untracked_status_or(IPS_NAT_DONE_MASK);
- BUG_ON(nf_nat_seq_adjust_hook != NULL);
- RCU_INIT_POINTER(nf_nat_seq_adjust_hook, nf_nat_seq_adjust);
BUG_ON(nfnetlink_parse_nat_setup_hook != NULL);
RCU_INIT_POINTER(nfnetlink_parse_nat_setup_hook,
nfnetlink_parse_nat_setup);
- BUG_ON(nf_ct_nat_offset != NULL);
- RCU_INIT_POINTER(nf_ct_nat_offset, nf_nat_get_offset);
- RCU_INIT_POINTER(nfq_ct_nat_hook, &nfq_ct_nat);
#ifdef CONFIG_XFRM
BUG_ON(nf_nat_decode_session_hook != NULL);
RCU_INIT_POINTER(nf_nat_decode_session_hook, __nf_nat_decode_session);
@@ -813,10 +808,7 @@ static void __exit nf_nat_cleanup(void)
unregister_pernet_subsys(&nf_nat_net_ops);
nf_ct_extend_unregister(&nat_extend);
nf_ct_helper_expectfn_unregister(&follow_master_nat);
- RCU_INIT_POINTER(nf_nat_seq_adjust_hook, NULL);
RCU_INIT_POINTER(nfnetlink_parse_nat_setup_hook, NULL);
- RCU_INIT_POINTER(nf_ct_nat_offset, NULL);
- RCU_INIT_POINTER(nfq_ct_nat_hook, NULL);
#ifdef CONFIG_XFRM
RCU_INIT_POINTER(nf_nat_decode_session_hook, NULL);
#endif
diff --git a/net/netfilter/nf_nat_helper.c b/net/netfilter/nf_nat_helper.c
index 85e20a919081..2840abb5bb99 100644
--- a/net/netfilter/nf_nat_helper.c
+++ b/net/netfilter/nf_nat_helper.c
@@ -20,74 +20,13 @@
#include <net/netfilter/nf_conntrack_helper.h>
#include <net/netfilter/nf_conntrack_ecache.h>
#include <net/netfilter/nf_conntrack_expect.h>
+#include <net/netfilter/nf_conntrack_seqadj.h>
#include <net/netfilter/nf_nat.h>
#include <net/netfilter/nf_nat_l3proto.h>
#include <net/netfilter/nf_nat_l4proto.h>
#include <net/netfilter/nf_nat_core.h>
#include <net/netfilter/nf_nat_helper.h>
-#define DUMP_OFFSET(x) \
- pr_debug("offset_before=%d, offset_after=%d, correction_pos=%u\n", \
- x->offset_before, x->offset_after, x->correction_pos);
-
-static DEFINE_SPINLOCK(nf_nat_seqofs_lock);
-
-/* Setup TCP sequence correction given this change at this sequence */
-static inline void
-adjust_tcp_sequence(u32 seq,
- int sizediff,
- struct nf_conn *ct,
- enum ip_conntrack_info ctinfo)
-{
- enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);
- struct nf_conn_nat *nat = nfct_nat(ct);
- struct nf_nat_seq *this_way = &nat->seq[dir];
-
- pr_debug("adjust_tcp_sequence: seq = %u, sizediff = %d\n",
- seq, sizediff);
-
- pr_debug("adjust_tcp_sequence: Seq_offset before: ");
- DUMP_OFFSET(this_way);
-
- spin_lock_bh(&nf_nat_seqofs_lock);
-
- /* SYN adjust. If it's uninitialized, or this is after last
- * correction, record it: we don't handle more than one
- * adjustment in the window, but do deal with common case of a
- * retransmit */
- if (this_way->offset_before == this_way->offset_after ||
- before(this_way->correction_pos, seq)) {
- this_way->correction_pos = seq;
- this_way->offset_before = this_way->offset_after;
- this_way->offset_after += sizediff;
- }
- spin_unlock_bh(&nf_nat_seqofs_lock);
-
- pr_debug("adjust_tcp_sequence: Seq_offset after: ");
- DUMP_OFFSET(this_way);
-}
-
-/* Get the offset value, for conntrack */
-s16 nf_nat_get_offset(const struct nf_conn *ct,
- enum ip_conntrack_dir dir,
- u32 seq)
-{
- struct nf_conn_nat *nat = nfct_nat(ct);
- struct nf_nat_seq *this_way;
- s16 offset;
-
- if (!nat)
- return 0;
-
- this_way = &nat->seq[dir];
- spin_lock_bh(&nf_nat_seqofs_lock);
- offset = after(seq, this_way->correction_pos)
- ? this_way->offset_after : this_way->offset_before;
- spin_unlock_bh(&nf_nat_seqofs_lock);
-
- return offset;
-}
-
/* Frobs data inside this packet, which is linear. */
static void mangle_contents(struct sk_buff *skb,
unsigned int dataoff,
@@ -142,30 +81,6 @@ static int enlarge_skb(struct sk_buff *skb, unsigned int extra)
return 1;
}
-void nf_nat_set_seq_adjust(struct nf_conn *ct, enum ip_conntrack_info ctinfo,
- __be32 seq, s16 off)
-{
- if (!off)
- return;
- set_bit(IPS_SEQ_ADJUST_BIT, &ct->status);
- adjust_tcp_sequence(ntohl(seq), off, ct, ctinfo);
- nf_conntrack_event_cache(IPCT_NATSEQADJ, ct);
-}
-EXPORT_SYMBOL_GPL(nf_nat_set_seq_adjust);
-
-void nf_nat_tcp_seq_adjust(struct sk_buff *skb, struct nf_conn *ct,
- u32 ctinfo, int off)
-{
- const struct tcphdr *th;
-
- if (nf_ct_protonum(ct) != IPPROTO_TCP)
- return;
-
- th = (struct tcphdr *)(skb_network_header(skb)+ ip_hdrlen(skb));
- nf_nat_set_seq_adjust(ct, ctinfo, th->seq, off);
-}
-EXPORT_SYMBOL_GPL(nf_nat_tcp_seq_adjust);
-
/* Generic function for mangling variable-length address changes inside
* NATed TCP connections (like the PORT XXX,XXX,XXX,XXX,XXX,XXX
* command in FTP).
@@ -210,8 +125,8 @@ int __nf_nat_mangle_tcp_packet(struct sk_buff *skb,
datalen, oldlen);
if (adjust && rep_len != match_len)
- nf_nat_set_seq_adjust(ct, ctinfo, tcph->seq,
- (int)rep_len - (int)match_len);
+ nf_ct_seqadj_set(ct, ctinfo, tcph->seq,
+ (int)rep_len - (int)match_len);
return 1;
}
@@ -271,145 +186,6 @@ nf_nat_mangle_udp_packet(struct sk_buff *skb,
}
EXPORT_SYMBOL(nf_nat_mangle_udp_packet);
-/* Adjust one found SACK option including checksum correction */
-static void
-sack_adjust(struct sk_buff *skb,
- struct tcphdr *tcph,
- unsigned int sackoff,
- unsigned int sackend,
- struct nf_nat_seq *natseq)
-{
- while (sackoff < sackend) {
- struct tcp_sack_block_wire *sack;
- __be32 new_start_seq, new_end_seq;
-
- sack = (void *)skb->data + sackoff;
- if (after(ntohl(sack->start_seq) - natseq->offset_before,
- natseq->correction_pos))
- new_start_seq = htonl(ntohl(sack->start_seq)
- - natseq->offset_after);
- else
- new_start_seq = htonl(ntohl(sack->start_seq)
- - natseq->offset_before);
-
- if (after(ntohl(sack->end_seq) - natseq->offset_before,
- natseq->correction_pos))
- new_end_seq = htonl(ntohl(sack->end_seq)
- - natseq->offset_after);
- else
- new_end_seq = htonl(ntohl(sack->end_seq)
- - natseq->offset_before);
-
- pr_debug("sack_adjust: start_seq: %d->%d, end_seq: %d->%d\n",
- ntohl(sack->start_seq), new_start_seq,
- ntohl(sack->end_seq), new_end_seq);
-
- inet_proto_csum_replace4(&tcph->check, skb,
- sack->start_seq, new_start_seq, 0);
- inet_proto_csum_replace4(&tcph->check, skb,
- sack->end_seq, new_end_seq, 0);
- sack->start_seq = new_start_seq;
- sack->end_seq = new_end_seq;
- sackoff += sizeof(*sack);
- }
-}
-
-/* TCP SACK sequence number adjustment */
-static inline unsigned int
-nf_nat_sack_adjust(struct sk_buff *skb,
- unsigned int protoff,
- struct tcphdr *tcph,
- struct nf_conn *ct,
- enum ip_conntrack_info ctinfo)
-{
- unsigned int dir, optoff, optend;
- struct nf_conn_nat *nat = nfct_nat(ct);
-
- optoff = protoff + sizeof(struct tcphdr);
- optend = protoff + tcph->doff * 4;
-
- if (!skb_make_writable(skb, optend))
- return 0;
-
- dir = CTINFO2DIR(ctinfo);
-
- while (optoff < optend) {
- /* Usually: option, length. */
- unsigned char *op = skb->data + optoff;
-
- switch (op[0]) {
- case TCPOPT_EOL:
- return 1;
- case TCPOPT_NOP:
- optoff++;
- continue;
- default:
- /* no partial options */
- if (optoff + 1 == optend ||
- optoff + op[1] > optend ||
- op[1] < 2)
- return 0;
- if (op[0] == TCPOPT_SACK &&
- op[1] >= 2+TCPOLEN_SACK_PERBLOCK &&
- ((op[1] - 2) % TCPOLEN_SACK_PERBLOCK) == 0)
- sack_adjust(skb, tcph, optoff+2,
- optoff+op[1], &nat->seq[!dir]);
- optoff += op[1];
- }
- }
- return 1;
-}
-
-/* TCP sequence number adjustment. Returns 1 on success, 0 on failure */
-int
-nf_nat_seq_adjust(struct sk_buff *skb,
- struct nf_conn *ct,
- enum ip_conntrack_info ctinfo,
- unsigned int protoff)
-{
- struct tcphdr *tcph;
- int dir;
- __be32 newseq, newack;
- s16 seqoff, ackoff;
- struct nf_conn_nat *nat = nfct_nat(ct);
- struct nf_nat_seq *this_way, *other_way;
-
- dir = CTINFO2DIR(ctinfo);
-
- this_way = &nat->seq[dir];
- other_way = &nat->seq[!dir];
-
- if (!skb_make_writable(skb, protoff + sizeof(*tcph)))
- return 0;
-
- tcph = (void *)skb->data + protoff;
- if (after(ntohl(tcph->seq), this_way->correction_pos))
- seqoff = this_way->offset_after;
- else
- seqoff = this_way->offset_before;
-
- if (after(ntohl(tcph->ack_seq) - other_way->offset_before,
- other_way->correction_pos))
- ackoff = other_way->offset_after;
- else
- ackoff = other_way->offset_before;
-
- newseq = htonl(ntohl(tcph->seq) + seqoff);
- newack = htonl(ntohl(tcph->ack_seq) - ackoff);
-
- inet_proto_csum_replace4(&tcph->check, skb, tcph->seq, newseq, 0);
- inet_proto_csum_replace4(&tcph->check, skb, tcph->ack_seq, newack, 0);
-
- pr_debug("Adjusting sequence number from %u->%u, ack from %u->%u\n",
- ntohl(tcph->seq), ntohl(newseq), ntohl(tcph->ack_seq),
- ntohl(newack));
-
- tcph->seq = newseq;
- tcph->ack_seq = newack;
-
- return nf_nat_sack_adjust(skb, protoff, tcph, ct, ctinfo);
-}
-
/* Setup NAT on this expected conntrack so it follows master. */
/* If we fail to get a free NAT slot, we'll get dropped on confirm */
void nf_nat_follow_master(struct nf_conn *ct,
diff --git a/net/netfilter/nf_nat_proto_sctp.c b/net/netfilter/nf_nat_proto_sctp.c
index 396e55d46f90..754536f2c674 100644
--- a/net/netfilter/nf_nat_proto_sctp.c
+++ b/net/netfilter/nf_nat_proto_sctp.c
@@ -34,9 +34,7 @@ sctp_manip_pkt(struct sk_buff *skb,
const struct nf_conntrack_tuple *tuple,
enum nf_nat_manip_type maniptype)
{
- struct sk_buff *frag;
sctp_sctphdr_t *hdr;
- __u32 crc32;
if (!skb_make_writable(skb, hdroff + sizeof(*hdr)))
return false;
@@ -51,11 +49,7 @@ sctp_manip_pkt(struct sk_buff *skb,
hdr->dest = tuple->dst.u.sctp.port;
}
- crc32 = sctp_start_cksum((u8 *)hdr, skb_headlen(skb) - hdroff);
- skb_walk_frags(skb, frag)
- crc32 = sctp_update_cksum((u8 *)frag->data, skb_headlen(frag),
- crc32);
- hdr->checksum = sctp_end_cksum(crc32);
+ hdr->checksum = sctp_compute_cksum(skb, hdroff);
return true;
}
diff --git a/net/netfilter/nf_nat_sip.c b/net/netfilter/nf_nat_sip.c
index dac11f73868e..f9790405b7ff 100644
--- a/net/netfilter/nf_nat_sip.c
+++ b/net/netfilter/nf_nat_sip.c
@@ -20,6 +20,7 @@
#include <net/netfilter/nf_nat_helper.h>
#include <net/netfilter/nf_conntrack_helper.h>
#include <net/netfilter/nf_conntrack_expect.h>
+#include <net/netfilter/nf_conntrack_seqadj.h>
#include <linux/netfilter/nf_conntrack_sip.h>
MODULE_LICENSE("GPL");
@@ -308,7 +309,7 @@ static void nf_nat_sip_seq_adjust(struct sk_buff *skb, unsigned int protoff,
return;
th = (struct tcphdr *)(skb->data + protoff);
- nf_nat_set_seq_adjust(ct, ctinfo, th->seq, off);
+ nf_ct_seqadj_set(ct, ctinfo, th->seq, off);
}
/* Handles expected signalling connections and media streams */
diff --git a/net/netfilter/nf_synproxy_core.c b/net/netfilter/nf_synproxy_core.c
new file mode 100644
index 000000000000..cdf4567ba9b3
--- /dev/null
+++ b/net/netfilter/nf_synproxy_core.c
@@ -0,0 +1,434 @@
+/*
+ * Copyright (c) 2013 Patrick McHardy <kaber@trash.net>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/module.h>
+#include <linux/skbuff.h>
+#include <asm/unaligned.h>
+#include <net/tcp.h>
+#include <net/netns/generic.h>
+
+#include <linux/netfilter_ipv4/ip_tables.h>
+#include <linux/netfilter/x_tables.h>
+#include <linux/netfilter/xt_tcpudp.h>
+#include <linux/netfilter/xt_SYNPROXY.h>
+#include <net/netfilter/nf_conntrack.h>
+#include <net/netfilter/nf_conntrack_extend.h>
+#include <net/netfilter/nf_conntrack_seqadj.h>
+#include <net/netfilter/nf_conntrack_synproxy.h>
+
+int synproxy_net_id;
+EXPORT_SYMBOL_GPL(synproxy_net_id);
+
+bool
+synproxy_parse_options(const struct sk_buff *skb, unsigned int doff,
+ const struct tcphdr *th, struct synproxy_options *opts)
+{
+ int length = (th->doff * 4) - sizeof(*th);
+ u8 buf[40], *ptr;
+
+ ptr = skb_header_pointer(skb, doff + sizeof(*th), length, buf);
+ if (ptr == NULL)
+ return false;
+
+ opts->options = 0;
+ while (length > 0) {
+ int opcode = *ptr++;
+ int opsize;
+
+ switch (opcode) {
+ case TCPOPT_EOL:
+ return true;
+ case TCPOPT_NOP:
+ length--;
+ continue;
+ default:
+ opsize = *ptr++;
+ if (opsize < 2)
+ return true;
+ if (opsize > length)
+ return true;
+
+ switch (opcode) {
+ case TCPOPT_MSS:
+ if (opsize == TCPOLEN_MSS) {
+ opts->mss = get_unaligned_be16(ptr);
+ opts->options |= XT_SYNPROXY_OPT_MSS;
+ }
+ break;
+ case TCPOPT_WINDOW:
+ if (opsize == TCPOLEN_WINDOW) {
+ opts->wscale = *ptr;
+ if (opts->wscale > 14)
+ opts->wscale = 14;
+ opts->options |= XT_SYNPROXY_OPT_WSCALE;
+ }
+ break;
+ case TCPOPT_TIMESTAMP:
+ if (opsize == TCPOLEN_TIMESTAMP) {
+ opts->tsval = get_unaligned_be32(ptr);
+ opts->tsecr = get_unaligned_be32(ptr + 4);
+ opts->options |= XT_SYNPROXY_OPT_TIMESTAMP;
+ }
+ break;
+ case TCPOPT_SACK_PERM:
+ if (opsize == TCPOLEN_SACK_PERM)
+ opts->options |= XT_SYNPROXY_OPT_SACK_PERM;
+ break;
+ }
+
+ ptr += opsize - 2;
+ length -= opsize;
+ }
+ }
+ return true;
+}
+EXPORT_SYMBOL_GPL(synproxy_parse_options);
+
+unsigned int synproxy_options_size(const struct synproxy_options *opts)
+{
+ unsigned int size = 0;
+
+ if (opts->options & XT_SYNPROXY_OPT_MSS)
+ size += TCPOLEN_MSS_ALIGNED;
+ if (opts->options & XT_SYNPROXY_OPT_TIMESTAMP)
+ size += TCPOLEN_TSTAMP_ALIGNED;
+ else if (opts->options & XT_SYNPROXY_OPT_SACK_PERM)
+ size += TCPOLEN_SACKPERM_ALIGNED;
+ if (opts->options & XT_SYNPROXY_OPT_WSCALE)
+ size += TCPOLEN_WSCALE_ALIGNED;
+
+ return size;
+}
+EXPORT_SYMBOL_GPL(synproxy_options_size);
+
+void
+synproxy_build_options(struct tcphdr *th, const struct synproxy_options *opts)
+{
+ __be32 *ptr = (__be32 *)(th + 1);
+ u8 options = opts->options;
+
+ if (options & XT_SYNPROXY_OPT_MSS)
+ *ptr++ = htonl((TCPOPT_MSS << 24) |
+ (TCPOLEN_MSS << 16) |
+ opts->mss);
+
+ if (options & XT_SYNPROXY_OPT_TIMESTAMP) {
+ if (options & XT_SYNPROXY_OPT_SACK_PERM)
+ *ptr++ = htonl((TCPOPT_SACK_PERM << 24) |
+ (TCPOLEN_SACK_PERM << 16) |
+ (TCPOPT_TIMESTAMP << 8) |
+ TCPOLEN_TIMESTAMP);
+ else
+ *ptr++ = htonl((TCPOPT_NOP << 24) |
+ (TCPOPT_NOP << 16) |
+ (TCPOPT_TIMESTAMP << 8) |
+ TCPOLEN_TIMESTAMP);
+
+ *ptr++ = htonl(opts->tsval);
+ *ptr++ = htonl(opts->tsecr);
+ } else if (options & XT_SYNPROXY_OPT_SACK_PERM)
+ *ptr++ = htonl((TCPOPT_NOP << 24) |
+ (TCPOPT_NOP << 16) |
+ (TCPOPT_SACK_PERM << 8) |
+ TCPOLEN_SACK_PERM);
+
+ if (options & XT_SYNPROXY_OPT_WSCALE)
+ *ptr++ = htonl((TCPOPT_NOP << 24) |
+ (TCPOPT_WINDOW << 16) |
+ (TCPOLEN_WINDOW << 8) |
+ opts->wscale);
+}
+EXPORT_SYMBOL_GPL(synproxy_build_options);
+
+void synproxy_init_timestamp_cookie(const struct xt_synproxy_info *info,
+ struct synproxy_options *opts)
+{
+ opts->tsecr = opts->tsval;
+ opts->tsval = tcp_time_stamp & ~0x3f;
+
+ if (opts->options & XT_SYNPROXY_OPT_WSCALE)
+ opts->tsval |= info->wscale;
+ else
+ opts->tsval |= 0xf;
+
+ if (opts->options & XT_SYNPROXY_OPT_SACK_PERM)
+ opts->tsval |= 1 << 4;
+
+ if (opts->options & XT_SYNPROXY_OPT_ECN)
+ opts->tsval |= 1 << 5;
+}
+EXPORT_SYMBOL_GPL(synproxy_init_timestamp_cookie);
+
+void synproxy_check_timestamp_cookie(struct synproxy_options *opts)
+{
+ opts->wscale = opts->tsecr & 0xf;
+ if (opts->wscale != 0xf)
+ opts->options |= XT_SYNPROXY_OPT_WSCALE;
+
+ opts->options |= opts->tsecr & (1 << 4) ? XT_SYNPROXY_OPT_SACK_PERM : 0;
+
+ opts->options |= opts->tsecr & (1 << 5) ? XT_SYNPROXY_OPT_ECN : 0;
+}
+EXPORT_SYMBOL_GPL(synproxy_check_timestamp_cookie);
+
+unsigned int synproxy_tstamp_adjust(struct sk_buff *skb,
+ unsigned int protoff,
+ struct tcphdr *th,
+ struct nf_conn *ct,
+ enum ip_conntrack_info ctinfo,
+ const struct nf_conn_synproxy *synproxy)
+{
+ unsigned int optoff, optend;
+ u32 *ptr, old;
+
+ if (synproxy->tsoff == 0)
+ return 1;
+
+ optoff = protoff + sizeof(struct tcphdr);
+ optend = protoff + th->doff * 4;
+
+ if (!skb_make_writable(skb, optend))
+ return 0;
+
+ while (optoff < optend) {
+ unsigned char *op = skb->data + optoff;
+
+ switch (op[0]) {
+ case TCPOPT_EOL:
+ return 1;
+ case TCPOPT_NOP:
+ optoff++;
+ continue;
+ default:
+ if (optoff + 1 == optend ||
+ optoff + op[1] > optend ||
+ op[1] < 2)
+ return 0;
+ if (op[0] == TCPOPT_TIMESTAMP &&
+ op[1] == TCPOLEN_TIMESTAMP) {
+ if (CTINFO2DIR(ctinfo) == IP_CT_DIR_REPLY) {
+ ptr = (u32 *)&op[2];
+ old = *ptr;
+ *ptr = htonl(ntohl(*ptr) -
+ synproxy->tsoff);
+ } else {
+ ptr = (u32 *)&op[6];
+ old = *ptr;
+ *ptr = htonl(ntohl(*ptr) +
+ synproxy->tsoff);
+ }
+ inet_proto_csum_replace4(&th->check, skb,
+ old, *ptr, 0);
+ return 1;
+ }
+ optoff += op[1];
+ }
+ }
+ return 1;
+}
+EXPORT_SYMBOL_GPL(synproxy_tstamp_adjust);
+
+static struct nf_ct_ext_type nf_ct_synproxy_extend __read_mostly = {
+ .len = sizeof(struct nf_conn_synproxy),
+ .align = __alignof__(struct nf_conn_synproxy),
+ .id = NF_CT_EXT_SYNPROXY,
+};
+
+#ifdef CONFIG_PROC_FS
+static void *synproxy_cpu_seq_start(struct seq_file *seq, loff_t *pos)
+{
+ struct synproxy_net *snet = synproxy_pernet(seq_file_net(seq));
+ int cpu;
+
+ if (*pos == 0)
+ return SEQ_START_TOKEN;
+
+ for (cpu = *pos - 1; cpu < nr_cpu_ids; cpu++) {
+ if (!cpu_possible(cpu))
+ continue;
+ *pos = cpu + 1;
+ return per_cpu_ptr(snet->stats, cpu);
+ }
+
+ return NULL;
+}
+
+static void *synproxy_cpu_seq_next(struct seq_file *seq, void *v, loff_t *pos)
+{
+ struct synproxy_net *snet = synproxy_pernet(seq_file_net(seq));
+ int cpu;
+
+ for (cpu = *pos; cpu < nr_cpu_ids; cpu++) {
+ if (!cpu_possible(cpu))
+ continue;
+ *pos = cpu + 1;
+ return per_cpu_ptr(snet->stats, cpu);
+ }
+
+ return NULL;
+}
+
+static void synproxy_cpu_seq_stop(struct seq_file *seq, void *v)
+{
+ return;
+}
+
+static int synproxy_cpu_seq_show(struct seq_file *seq, void *v)
+{
+ struct synproxy_stats *stats = v;
+
+ if (v == SEQ_START_TOKEN) {
+ seq_printf(seq, "entries\t\tsyn_received\t"
+ "cookie_invalid\tcookie_valid\t"
+ "cookie_retrans\tconn_reopened\n");
+ return 0;
+ }
+
+ seq_printf(seq, "%08x\t%08x\t%08x\t%08x\t%08x\t%08x\n", 0,
+ stats->syn_received,
+ stats->cookie_invalid,
+ stats->cookie_valid,
+ stats->cookie_retrans,
+ stats->conn_reopened);
+
+ return 0;
+}
+
+static const struct seq_operations synproxy_cpu_seq_ops = {
+ .start = synproxy_cpu_seq_start,
+ .next = synproxy_cpu_seq_next,
+ .stop = synproxy_cpu_seq_stop,
+ .show = synproxy_cpu_seq_show,
+};
+
+static int synproxy_cpu_seq_open(struct inode *inode, struct file *file)
+{
+ return seq_open_net(inode, file, &synproxy_cpu_seq_ops,
+ sizeof(struct seq_net_private));
+}
+
+static const struct file_operations synproxy_cpu_seq_fops = {
+ .owner = THIS_MODULE,
+ .open = synproxy_cpu_seq_open,
+ .read = seq_read,
+ .llseek = seq_lseek,
+ .release = seq_release_net,
+};
+
+static int __net_init synproxy_proc_init(struct net *net)
+{
+ if (!proc_create("synproxy", S_IRUGO, net->proc_net_stat,
+ &synproxy_cpu_seq_fops))
+ return -ENOMEM;
+ return 0;
+}
+
+static void __net_exit synproxy_proc_exit(struct net *net)
+{
+ remove_proc_entry("synproxy", net->proc_net_stat);
+}
+#else
+static int __net_init synproxy_proc_init(struct net *net)
+{
+ return 0;
+}
+
+static void __net_exit synproxy_proc_exit(struct net *net)
+{
+ return;
+}
+#endif /* CONFIG_PROC_FS */
+
+static int __net_init synproxy_net_init(struct net *net)
+{
+ struct synproxy_net *snet = synproxy_pernet(net);
+ struct nf_conntrack_tuple t;
+ struct nf_conn *ct;
+ int err = -ENOMEM;
+
+ memset(&t, 0, sizeof(t));
+ ct = nf_conntrack_alloc(net, 0, &t, &t, GFP_KERNEL);
+ if (IS_ERR(ct)) {
+ err = PTR_ERR(ct);
+ goto err1;
+ }
+
+ if (!nfct_seqadj_ext_add(ct))
+ goto err2;
+ if (!nfct_synproxy_ext_add(ct))
+ goto err2;
+ __set_bit(IPS_TEMPLATE_BIT, &ct->status);
+ __set_bit(IPS_CONFIRMED_BIT, &ct->status);
+
+ snet->tmpl = ct;
+
+ snet->stats = alloc_percpu(struct synproxy_stats);
+ if (snet->stats == NULL)
+ goto err2;
+
+ err = synproxy_proc_init(net);
+ if (err < 0)
+ goto err3;
+
+ return 0;
+
+err3:
+ free_percpu(snet->stats);
+err2:
+ nf_conntrack_free(ct);
+err1:
+ return err;
+}
+
+static void __net_exit synproxy_net_exit(struct net *net)
+{
+ struct synproxy_net *snet = synproxy_pernet(net);
+
+ nf_conntrack_free(snet->tmpl);
+ synproxy_proc_exit(net);
+ free_percpu(snet->stats);
+}
+
+static struct pernet_operations synproxy_net_ops = {
+ .init = synproxy_net_init,
+ .exit = synproxy_net_exit,
+ .id = &synproxy_net_id,
+ .size = sizeof(struct synproxy_net),
+};
+
+static int __init synproxy_core_init(void)
+{
+ int err;
+
+ err = nf_ct_extend_register(&nf_ct_synproxy_extend);
+ if (err < 0)
+ goto err1;
+
+ err = register_pernet_subsys(&synproxy_net_ops);
+ if (err < 0)
+ goto err2;
+
+ return 0;
+
+err2:
+ nf_ct_extend_unregister(&nf_ct_synproxy_extend);
+err1:
+ return err;
+}
+
+static void __exit synproxy_core_exit(void)
+{
+ unregister_pernet_subsys(&synproxy_net_ops);
+ nf_ct_extend_unregister(&nf_ct_synproxy_extend);
+}
+
+module_init(synproxy_core_init);
+module_exit(synproxy_core_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>");
diff --git a/net/netfilter/nf_tproxy_core.c b/net/netfilter/nf_tproxy_core.c
deleted file mode 100644
index 474d621cbc2e..000000000000
--- a/net/netfilter/nf_tproxy_core.c
+++ /dev/null
@@ -1,62 +0,0 @@
-/*
- * Transparent proxy support for Linux/iptables
- *
- * Copyright (c) 2006-2007 BalaBit IT Ltd.
- * Author: Balazs Scheidler, Krisztian Kovacs
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
- */
-
-#include <linux/module.h>
-
-#include <linux/net.h>
-#include <linux/if.h>
-#include <linux/netdevice.h>
-#include <net/udp.h>
-#include <net/netfilter/nf_tproxy_core.h>
-
-
-static void
-nf_tproxy_destructor(struct sk_buff *skb)
-{
- struct sock *sk = skb->sk;
-
- skb->sk = NULL;
- skb->destructor = NULL;
-
- if (sk)
- sock_put(sk);
-}
-
-/* consumes sk */
-void
-nf_tproxy_assign_sock(struct sk_buff *skb, struct sock *sk)
-{
- /* assigning tw sockets complicates things; most
- * skb->sk->X checks would have to test sk->sk_state first */
- if (sk->sk_state == TCP_TIME_WAIT) {
- inet_twsk_put(inet_twsk(sk));
- return;
- }
-
- skb_orphan(skb);
- skb->sk = sk;
- skb->destructor = nf_tproxy_destructor;
-}
-EXPORT_SYMBOL_GPL(nf_tproxy_assign_sock);
-
-static int __init nf_tproxy_init(void)
-{
- pr_info("NF_TPROXY: Transparent proxy support initialized, version 4.1.0\n");
- pr_info("NF_TPROXY: Copyright (c) 2006-2007 BalaBit IT Ltd.\n");
- return 0;
-}
-
-module_init(nf_tproxy_init);
-
-MODULE_LICENSE("GPL");
-MODULE_AUTHOR("Krisztian Kovacs");
-MODULE_DESCRIPTION("Transparent proxy support core routines");
diff --git a/net/netfilter/nfnetlink_queue_core.c b/net/netfilter/nfnetlink_queue_core.c
index 8a703c3dd318..ae2e5c11d01a 100644
--- a/net/netfilter/nfnetlink_queue_core.c
+++ b/net/netfilter/nfnetlink_queue_core.c
@@ -862,6 +862,7 @@ static const struct nla_policy nfqa_verdict_policy[NFQA_MAX+1] = {
[NFQA_MARK] = { .type = NLA_U32 },
[NFQA_PAYLOAD] = { .type = NLA_UNSPEC },
[NFQA_CT] = { .type = NLA_UNSPEC },
+ [NFQA_EXP] = { .type = NLA_UNSPEC },
};
static const struct nla_policy nfqa_verdict_batch_policy[NFQA_MAX+1] = {
@@ -990,9 +991,14 @@ nfqnl_recv_verdict(struct sock *ctnl, struct sk_buff *skb,
if (entry == NULL)
return -ENOENT;
- rcu_read_lock();
- if (nfqa[NFQA_CT] && (queue->flags & NFQA_CFG_F_CONNTRACK))
+ if (nfqa[NFQA_CT]) {
ct = nfqnl_ct_parse(entry->skb, nfqa[NFQA_CT], &ctinfo);
+ if (ct && nfqa[NFQA_EXP]) {
+ nfqnl_attach_expect(ct, nfqa[NFQA_EXP],
+ NETLINK_CB(skb).portid,
+ nlmsg_report(nlh));
+ }
+ }
if (nfqa[NFQA_PAYLOAD]) {
u16 payload_len = nla_len(nfqa[NFQA_PAYLOAD]);
@@ -1003,9 +1009,8 @@ nfqnl_recv_verdict(struct sock *ctnl, struct sk_buff *skb,
verdict = NF_DROP;
if (ct)
- nfqnl_ct_seq_adjust(skb, ct, ctinfo, diff);
+ nfqnl_ct_seq_adjust(entry->skb, ct, ctinfo, diff);
}
- rcu_read_unlock();
if (nfqa[NFQA_MARK])
entry->skb->mark = ntohl(nla_get_be32(nfqa[NFQA_MARK]));
diff --git a/net/netfilter/nfnetlink_queue_ct.c b/net/netfilter/nfnetlink_queue_ct.c
index ab61d66bc0b9..96cac50e0d12 100644
--- a/net/netfilter/nfnetlink_queue_ct.c
+++ b/net/netfilter/nfnetlink_queue_ct.c
@@ -87,12 +87,27 @@ nla_put_failure:
void nfqnl_ct_seq_adjust(struct sk_buff *skb, struct nf_conn *ct,
enum ip_conntrack_info ctinfo, int diff)
{
- struct nfq_ct_nat_hook *nfq_nat_ct;
+ struct nfq_ct_hook *nfq_ct;
- nfq_nat_ct = rcu_dereference(nfq_ct_nat_hook);
- if (nfq_nat_ct == NULL)
+ nfq_ct = rcu_dereference(nfq_ct_hook);
+ if (nfq_ct == NULL)
return;
if ((ct->status & IPS_NAT_MASK) && diff)
- nfq_nat_ct->seq_adjust(skb, ct, ctinfo, diff);
+ nfq_ct->seq_adjust(skb, ct, ctinfo, diff);
+}
+
+int nfqnl_attach_expect(struct nf_conn *ct, const struct nlattr *attr,
+ u32 portid, u32 report)
+{
+ struct nfq_ct_hook *nfq_ct;
+
+ if (nf_ct_is_untracked(ct))
+ return 0;
+
+ nfq_ct = rcu_dereference(nfq_ct_hook);
+ if (nfq_ct == NULL)
+ return -EOPNOTSUPP;
+
+ return nfq_ct->attach_expect(attr, ct, portid, report);
}
diff --git a/net/netfilter/xt_TCPMSS.c b/net/netfilter/xt_TCPMSS.c
index 6113cc7efffc..cd24290f3b2f 100644
--- a/net/netfilter/xt_TCPMSS.c
+++ b/net/netfilter/xt_TCPMSS.c
@@ -60,7 +60,7 @@ tcpmss_mangle_packet(struct sk_buff *skb,
/* This is a fragment, no TCP header is available */
if (par->fragoff != 0)
- return XT_CONTINUE;
+ return 0;
if (!skb_make_writable(skb, skb->len))
return -1;
diff --git a/net/netfilter/xt_TPROXY.c b/net/netfilter/xt_TPROXY.c
index d7f195388f66..5d8a3a3cd5a7 100644
--- a/net/netfilter/xt_TPROXY.c
+++ b/net/netfilter/xt_TPROXY.c
@@ -15,7 +15,9 @@
#include <linux/ip.h>
#include <net/checksum.h>
#include <net/udp.h>
+#include <net/tcp.h>
#include <net/inet_sock.h>
+#include <net/inet_hashtables.h>
#include <linux/inetdevice.h>
#include <linux/netfilter/x_tables.h>
#include <linux/netfilter_ipv4/ip_tables.h>
@@ -26,13 +28,18 @@
#define XT_TPROXY_HAVE_IPV6 1
#include <net/if_inet6.h>
#include <net/addrconf.h>
+#include <net/inet6_hashtables.h>
#include <linux/netfilter_ipv6/ip6_tables.h>
#include <net/netfilter/ipv6/nf_defrag_ipv6.h>
#endif
-#include <net/netfilter/nf_tproxy_core.h>
#include <linux/netfilter/xt_TPROXY.h>
+enum nf_tproxy_lookup_t {
+ NFT_LOOKUP_LISTENER,
+ NFT_LOOKUP_ESTABLISHED,
+};
+
static bool tproxy_sk_is_transparent(struct sock *sk)
{
if (sk->sk_state != TCP_TIME_WAIT) {
@@ -68,6 +75,157 @@ tproxy_laddr4(struct sk_buff *skb, __be32 user_laddr, __be32 daddr)
return laddr ? laddr : daddr;
}
+/*
+ * This is used when the user wants to intercept a connection matching
+ * an explicit iptables rule. In this case the sockets are assumed
+ * matching in preference order:
+ *
+ * - match: if there's a fully established connection matching the
+ * _packet_ tuple, it is returned, assuming the redirection
+ * already took place and we process a packet belonging to an
+ * established connection
+ *
+ * - match: if there's a listening socket matching the redirection
+ * (e.g. on-port & on-ip of the connection), it is returned,
+ * regardless if it was bound to 0.0.0.0 or an explicit
+ * address. The reasoning is that if there's an explicit rule, it
+ * does not really matter if the listener is bound to an interface
+ * or to 0. The user already stated that he wants redirection
+ * (since he added the rule).
+ *
+ * Please note that there's an overlap between what a TPROXY target
+ * and a socket match will match. Normally if you have both rules the
+ * "socket" match will be the first one, effectively all packets
+ * belonging to established connections going through that one.
+ */
+static inline struct sock *
+nf_tproxy_get_sock_v4(struct net *net, const u8 protocol,
+ const __be32 saddr, const __be32 daddr,
+ const __be16 sport, const __be16 dport,
+ const struct net_device *in,
+ const enum nf_tproxy_lookup_t lookup_type)
+{
+ struct sock *sk;
+
+ switch (protocol) {
+ case IPPROTO_TCP:
+ switch (lookup_type) {
+ case NFT_LOOKUP_LISTENER:
+ sk = inet_lookup_listener(net, &tcp_hashinfo,
+ saddr, sport,
+ daddr, dport,
+ in->ifindex);
+
+ /* NOTE: we return listeners even if bound to
+ * 0.0.0.0, those are filtered out in
+ * xt_socket, since xt_TPROXY needs 0 bound
+ * listeners too
+ */
+ break;
+ case NFT_LOOKUP_ESTABLISHED:
+ sk = inet_lookup_established(net, &tcp_hashinfo,
+ saddr, sport, daddr, dport,
+ in->ifindex);
+ break;
+ default:
+ BUG();
+ }
+ break;
+ case IPPROTO_UDP:
+ sk = udp4_lib_lookup(net, saddr, sport, daddr, dport,
+ in->ifindex);
+ if (sk) {
+ int connected = (sk->sk_state == TCP_ESTABLISHED);
+ int wildcard = (inet_sk(sk)->inet_rcv_saddr == 0);
+
+ /* NOTE: we return listeners even if bound to
+ * 0.0.0.0, those are filtered out in
+ * xt_socket, since xt_TPROXY needs 0 bound
+ * listeners too
+ */
+ if ((lookup_type == NFT_LOOKUP_ESTABLISHED && (!connected || wildcard)) ||
+ (lookup_type == NFT_LOOKUP_LISTENER && connected)) {
+ sock_put(sk);
+ sk = NULL;
+ }
+ }
+ break;
+ default:
+ WARN_ON(1);
+ sk = NULL;
+ }
+
+ pr_debug("tproxy socket lookup: proto %u %08x:%u -> %08x:%u, lookup type: %d, sock %p\n",
+ protocol, ntohl(saddr), ntohs(sport), ntohl(daddr), ntohs(dport), lookup_type, sk);
+
+ return sk;
+}
+
+#ifdef XT_TPROXY_HAVE_IPV6
+static inline struct sock *
+nf_tproxy_get_sock_v6(struct net *net, const u8 protocol,
+ const struct in6_addr *saddr, const struct in6_addr *daddr,
+ const __be16 sport, const __be16 dport,
+ const struct net_device *in,
+ const enum nf_tproxy_lookup_t lookup_type)
+{
+ struct sock *sk;
+
+ switch (protocol) {
+ case IPPROTO_TCP:
+ switch (lookup_type) {
+ case NFT_LOOKUP_LISTENER:
+ sk = inet6_lookup_listener(net, &tcp_hashinfo,
+ saddr, sport,
+ daddr, ntohs(dport),
+ in->ifindex);
+
+ /* NOTE: we return listeners even if bound to
+ * 0.0.0.0, those are filtered out in
+ * xt_socket, since xt_TPROXY needs 0 bound
+ * listeners too
+ */
+ break;
+ case NFT_LOOKUP_ESTABLISHED:
+ sk = __inet6_lookup_established(net, &tcp_hashinfo,
+ saddr, sport, daddr, ntohs(dport),
+ in->ifindex);
+ break;
+ default:
+ BUG();
+ }
+ break;
+ case IPPROTO_UDP:
+ sk = udp6_lib_lookup(net, saddr, sport, daddr, dport,
+ in->ifindex);
+ if (sk) {
+ int connected = (sk->sk_state == TCP_ESTABLISHED);
+ int wildcard = ipv6_addr_any(&inet6_sk(sk)->rcv_saddr);
+
+ /* NOTE: we return listeners even if bound to
+ * 0.0.0.0, those are filtered out in
+ * xt_socket, since xt_TPROXY needs 0 bound
+ * listeners too
+ */
+ if ((lookup_type == NFT_LOOKUP_ESTABLISHED && (!connected || wildcard)) ||
+ (lookup_type == NFT_LOOKUP_LISTENER && connected)) {
+ sock_put(sk);
+ sk = NULL;
+ }
+ }
+ break;
+ default:
+ WARN_ON(1);
+ sk = NULL;
+ }
+
+ pr_debug("tproxy socket lookup: proto %u %pI6:%u -> %pI6:%u, lookup type: %d, sock %p\n",
+ protocol, saddr, ntohs(sport), daddr, ntohs(dport), lookup_type, sk);
+
+ return sk;
+}
+#endif
+
/**
* tproxy_handle_time_wait4 - handle IPv4 TCP TIME_WAIT reopen redirections
* @skb: The skb being processed.
@@ -117,6 +275,15 @@ tproxy_handle_time_wait4(struct sk_buff *skb, __be32 laddr, __be16 lport,
return sk;
}
+/* assign a socket to the skb -- consumes sk */
+static void
+nf_tproxy_assign_sock(struct sk_buff *skb, struct sock *sk)
+{
+ skb_orphan(skb);
+ skb->sk = sk;
+ skb->destructor = sock_edemux;
+}
+
static unsigned int
tproxy_tg4(struct sk_buff *skb, __be32 laddr, __be16 lport,
u_int32_t mark_mask, u_int32_t mark_value)
diff --git a/net/netfilter/xt_addrtype.c b/net/netfilter/xt_addrtype.c
index 68ff29f60867..fab6eea1bf38 100644
--- a/net/netfilter/xt_addrtype.c
+++ b/net/netfilter/xt_addrtype.c
@@ -202,7 +202,7 @@ static int addrtype_mt_checkentry_v1(const struct xt_mtchk_param *par)
return -EINVAL;
}
if ((info->source | info->dest) >= XT_ADDRTYPE_PROHIBIT) {
- pr_err("ipv6 PROHIBT (THROW, NAT ..) matching not supported\n");
+ pr_err("ipv6 PROHIBIT (THROW, NAT ..) matching not supported\n");
return -EINVAL;
}
if ((info->source | info->dest) & XT_ADDRTYPE_BROADCAST) {
diff --git a/net/netfilter/xt_socket.c b/net/netfilter/xt_socket.c
index 20b15916f403..06df2b9110f5 100644
--- a/net/netfilter/xt_socket.c
+++ b/net/netfilter/xt_socket.c
@@ -19,12 +19,12 @@
#include <net/icmp.h>
#include <net/sock.h>
#include <net/inet_sock.h>
-#include <net/netfilter/nf_tproxy_core.h>
#include <net/netfilter/ipv4/nf_defrag_ipv4.h>
#if IS_ENABLED(CONFIG_IP6_NF_IPTABLES)
#define XT_SOCKET_HAVE_IPV6 1
#include <linux/netfilter_ipv6/ip6_tables.h>
+#include <net/inet6_hashtables.h>
#include <net/netfilter/ipv6/nf_defrag_ipv6.h>
#endif
@@ -101,6 +101,43 @@ extract_icmp4_fields(const struct sk_buff *skb,
return 0;
}
+/* "socket" match based redirection (no specific rule)
+ * ===================================================
+ *
+ * There are connections with dynamic endpoints (e.g. FTP data
+ * connection) that the user is unable to add explicit rules
+ * for. These are taken care of by a generic "socket" rule. It is
+ * assumed that the proxy application is trusted to open such
+ * connections without explicit iptables rule (except of course the
+ * generic 'socket' rule). In this case the following sockets are
+ * matched in preference order:
+ *
+ * - match: if there's a fully established connection matching the
+ * _packet_ tuple
+ *
+ * - match: if there's a non-zero bound listener (possibly with a
+ * non-local address) We don't accept zero-bound listeners, since
+ * then local services could intercept traffic going through the
+ * box.
+ */
+static struct sock *
+xt_socket_get_sock_v4(struct net *net, const u8 protocol,
+ const __be32 saddr, const __be32 daddr,
+ const __be16 sport, const __be16 dport,
+ const struct net_device *in)
+{
+ switch (protocol) {
+ case IPPROTO_TCP:
+ return __inet_lookup(net, &tcp_hashinfo,
+ saddr, sport, daddr, dport,
+ in->ifindex);
+ case IPPROTO_UDP:
+ return udp4_lib_lookup(net, saddr, sport, daddr, dport,
+ in->ifindex);
+ }
+ return NULL;
+}
+
static bool
socket_match(const struct sk_buff *skb, struct xt_action_param *par,
const struct xt_socket_mtinfo1 *info)
@@ -156,9 +193,9 @@ socket_match(const struct sk_buff *skb, struct xt_action_param *par,
#endif
if (!sk)
- sk = nf_tproxy_get_sock_v4(dev_net(skb->dev), protocol,
+ sk = xt_socket_get_sock_v4(dev_net(skb->dev), protocol,
saddr, daddr, sport, dport,
- par->in, NFT_LOOKUP_ANY);
+ par->in);
if (sk) {
bool wildcard;
bool transparent = true;
@@ -265,6 +302,25 @@ extract_icmp6_fields(const struct sk_buff *skb,
return 0;
}
+static struct sock *
+xt_socket_get_sock_v6(struct net *net, const u8 protocol,
+ const struct in6_addr *saddr, const struct in6_addr *daddr,
+ const __be16 sport, const __be16 dport,
+ const struct net_device *in)
+{
+ switch (protocol) {
+ case IPPROTO_TCP:
+ return inet6_lookup(net, &tcp_hashinfo,
+ saddr, sport, daddr, dport,
+ in->ifindex);
+ case IPPROTO_UDP:
+ return udp6_lib_lookup(net, saddr, sport, daddr, dport,
+ in->ifindex);
+ }
+
+ return NULL;
+}
+
static bool
socket_mt6_v1_v2(const struct sk_buff *skb, struct xt_action_param *par)
{
@@ -302,9 +358,9 @@ socket_mt6_v1_v2(const struct sk_buff *skb, struct xt_action_param *par)
}
if (!sk)
- sk = nf_tproxy_get_sock_v6(dev_net(skb->dev), tproto,
+ sk = xt_socket_get_sock_v6(dev_net(skb->dev), tproto,
saddr, daddr, sport, dport,
- par->in, NFT_LOOKUP_ANY);
+ par->in);
if (sk) {
bool wildcard;
bool transparent = true;
diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c
index 0c61b59175dc..8df7f64c6db3 100644
--- a/net/netlink/af_netlink.c
+++ b/net/netlink/af_netlink.c
@@ -168,16 +168,43 @@ int netlink_remove_tap(struct netlink_tap *nt)
}
EXPORT_SYMBOL_GPL(netlink_remove_tap);
+static bool netlink_filter_tap(const struct sk_buff *skb)
+{
+ struct sock *sk = skb->sk;
+ bool pass = false;
+
+ /* We take the more conservative approach and
+ * whitelist socket protocols that may pass.
+ */
+ switch (sk->sk_protocol) {
+ case NETLINK_ROUTE:
+ case NETLINK_USERSOCK:
+ case NETLINK_SOCK_DIAG:
+ case NETLINK_NFLOG:
+ case NETLINK_XFRM:
+ case NETLINK_FIB_LOOKUP:
+ case NETLINK_NETFILTER:
+ case NETLINK_GENERIC:
+ pass = true;
+ break;
+ }
+
+ return pass;
+}
+
static int __netlink_deliver_tap_skb(struct sk_buff *skb,
struct net_device *dev)
{
struct sk_buff *nskb;
+ struct sock *sk = skb->sk;
int ret = -ENOMEM;
dev_hold(dev);
nskb = skb_clone(skb, GFP_ATOMIC);
if (nskb) {
nskb->dev = dev;
+ nskb->protocol = htons((u16) sk->sk_protocol);
+
ret = dev_queue_xmit(nskb);
if (unlikely(ret > 0))
ret = net_xmit_errno(ret);
@@ -192,6 +219,9 @@ static void __netlink_deliver_tap(struct sk_buff *skb)
int ret;
struct netlink_tap *tmp;
+ if (!netlink_filter_tap(skb))
+ return;
+
list_for_each_entry_rcu(tmp, &netlink_tap_all, list) {
ret = __netlink_deliver_tap_skb(skb, tmp->dev);
if (unlikely(ret))
@@ -294,14 +324,14 @@ static void **alloc_pg_vec(struct netlink_sock *nlk,
{
unsigned int block_nr = req->nm_block_nr;
unsigned int i;
- void **pg_vec, *ptr;
+ void **pg_vec;
pg_vec = kcalloc(block_nr, sizeof(void *), GFP_KERNEL);
if (pg_vec == NULL)
return NULL;
for (i = 0; i < block_nr; i++) {
- pg_vec[i] = ptr = alloc_one_pg_vec_page(order);
+ pg_vec[i] = alloc_one_pg_vec_page(order);
if (pg_vec[i] == NULL)
goto err1;
}
@@ -595,7 +625,7 @@ static unsigned int netlink_poll(struct file *file, struct socket *sock,
* for dumps is performed here. A dump is allowed to continue
* if at least half the ring is unused.
*/
- while (nlk->cb != NULL && netlink_dump_space(nlk)) {
+ while (nlk->cb_running && netlink_dump_space(nlk)) {
err = netlink_dump(sk);
if (err < 0) {
sk->sk_err = err;
@@ -802,18 +832,6 @@ static void netlink_ring_set_copied(struct sock *sk, struct sk_buff *skb)
#define netlink_mmap_sendmsg(sk, msg, dst_portid, dst_group, siocb) 0
#endif /* CONFIG_NETLINK_MMAP */
-static void netlink_destroy_callback(struct netlink_callback *cb)
-{
- kfree_skb(cb->skb);
- kfree(cb);
-}
-
-static void netlink_consume_callback(struct netlink_callback *cb)
-{
- consume_skb(cb->skb);
- kfree(cb);
-}
-
static void netlink_skb_destructor(struct sk_buff *skb)
{
#ifdef CONFIG_NETLINK_MMAP
@@ -872,12 +890,12 @@ static void netlink_sock_destruct(struct sock *sk)
{
struct netlink_sock *nlk = nlk_sk(sk);
- if (nlk->cb) {
- if (nlk->cb->done)
- nlk->cb->done(nlk->cb);
+ if (nlk->cb_running) {
+ if (nlk->cb.done)
+ nlk->cb.done(&nlk->cb);
- module_put(nlk->cb->module);
- netlink_destroy_callback(nlk->cb);
+ module_put(nlk->cb.module);
+ kfree_skb(nlk->cb.skb);
}
skb_queue_purge(&sk->sk_receive_queue);
@@ -2350,7 +2368,8 @@ static int netlink_recvmsg(struct kiocb *kiocb, struct socket *sock,
skb_free_datagram(sk, skb);
- if (nlk->cb && atomic_read(&sk->sk_rmem_alloc) <= sk->sk_rcvbuf / 2) {
+ if (nlk->cb_running &&
+ atomic_read(&sk->sk_rmem_alloc) <= sk->sk_rcvbuf / 2) {
ret = netlink_dump(sk);
if (ret) {
sk->sk_err = ret;
@@ -2566,13 +2585,12 @@ static int netlink_dump(struct sock *sk)
int alloc_size;
mutex_lock(nlk->cb_mutex);
-
- cb = nlk->cb;
- if (cb == NULL) {
+ if (!nlk->cb_running) {
err = -EINVAL;
goto errout_skb;
}
+ cb = &nlk->cb;
alloc_size = max_t(int, cb->min_dump_alloc, NLMSG_GOODSIZE);
if (!netlink_rx_is_mmaped(sk) &&
@@ -2610,11 +2628,11 @@ static int netlink_dump(struct sock *sk)
if (cb->done)
cb->done(cb);
- nlk->cb = NULL;
- mutex_unlock(nlk->cb_mutex);
+ nlk->cb_running = false;
+ mutex_unlock(nlk->cb_mutex);
module_put(cb->module);
- netlink_consume_callback(cb);
+ consume_skb(cb->skb);
return 0;
errout_skb:
@@ -2632,59 +2650,51 @@ int __netlink_dump_start(struct sock *ssk, struct sk_buff *skb,
struct netlink_sock *nlk;
int ret;
- cb = kzalloc(sizeof(*cb), GFP_KERNEL);
- if (cb == NULL)
- return -ENOBUFS;
-
/* Memory mapped dump requests need to be copied to avoid looping
* on the pending state in netlink_mmap_sendmsg() while the CB hold
* a reference to the skb.
*/
if (netlink_skb_is_mmaped(skb)) {
skb = skb_copy(skb, GFP_KERNEL);
- if (skb == NULL) {
- kfree(cb);
+ if (skb == NULL)
return -ENOBUFS;
- }
} else
atomic_inc(&skb->users);
- cb->dump = control->dump;
- cb->done = control->done;
- cb->nlh = nlh;
- cb->data = control->data;
- cb->module = control->module;
- cb->min_dump_alloc = control->min_dump_alloc;
- cb->skb = skb;
-
sk = netlink_lookup(sock_net(ssk), ssk->sk_protocol, NETLINK_CB(skb).portid);
if (sk == NULL) {
- netlink_destroy_callback(cb);
- return -ECONNREFUSED;
+ ret = -ECONNREFUSED;
+ goto error_free;
}
- nlk = nlk_sk(sk);
+ nlk = nlk_sk(sk);
mutex_lock(nlk->cb_mutex);
/* A dump is in progress... */
- if (nlk->cb) {
- mutex_unlock(nlk->cb_mutex);
- netlink_destroy_callback(cb);
+ if (nlk->cb_running) {
ret = -EBUSY;
- goto out;
+ goto error_unlock;
}
/* add reference of module which cb->dump belongs to */
- if (!try_module_get(cb->module)) {
- mutex_unlock(nlk->cb_mutex);
- netlink_destroy_callback(cb);
+ if (!try_module_get(control->module)) {
ret = -EPROTONOSUPPORT;
- goto out;
+ goto error_unlock;
}
- nlk->cb = cb;
+ cb = &nlk->cb;
+ memset(cb, 0, sizeof(*cb));
+ cb->dump = control->dump;
+ cb->done = control->done;
+ cb->nlh = nlh;
+ cb->data = control->data;
+ cb->module = control->module;
+ cb->min_dump_alloc = control->min_dump_alloc;
+ cb->skb = skb;
+
+ nlk->cb_running = true;
+
mutex_unlock(nlk->cb_mutex);
ret = netlink_dump(sk);
-out:
sock_put(sk);
if (ret)
@@ -2694,6 +2704,13 @@ out:
* signal not to send ACK even if it was requested.
*/
return -EINTR;
+
+error_unlock:
+ sock_put(sk);
+ mutex_unlock(nlk->cb_mutex);
+error_free:
+ kfree_skb(skb);
+ return ret;
}
EXPORT_SYMBOL(__netlink_dump_start);
@@ -2916,14 +2933,14 @@ static int netlink_seq_show(struct seq_file *seq, void *v)
struct sock *s = v;
struct netlink_sock *nlk = nlk_sk(s);
- seq_printf(seq, "%pK %-3d %-6u %08x %-8d %-8d %pK %-8d %-8d %-8lu\n",
+ seq_printf(seq, "%pK %-3d %-6u %08x %-8d %-8d %d %-8d %-8d %-8lu\n",
s,
s->sk_protocol,
nlk->portid,
nlk->groups ? (u32)nlk->groups[0] : 0,
sk_rmem_alloc_get(s),
sk_wmem_alloc_get(s),
- nlk->cb,
+ nlk->cb_running,
atomic_read(&s->sk_refcnt),
atomic_read(&s->sk_drops),
sock_i_ino(s)
diff --git a/net/netlink/af_netlink.h b/net/netlink/af_netlink.h
index eaa88d187cdc..acbd774eeb7c 100644
--- a/net/netlink/af_netlink.h
+++ b/net/netlink/af_netlink.h
@@ -32,7 +32,8 @@ struct netlink_sock {
unsigned long *groups;
unsigned long state;
wait_queue_head_t wait;
- struct netlink_callback *cb;
+ bool cb_running;
+ struct netlink_callback cb;
struct mutex *cb_mutex;
struct mutex cb_def_mutex;
void (*netlink_rcv)(struct sk_buff *skb);
diff --git a/net/nfc/core.c b/net/nfc/core.c
index 1d074dd1650f..e92923cf3e03 100644
--- a/net/nfc/core.c
+++ b/net/nfc/core.c
@@ -77,11 +77,19 @@ error:
return rc;
}
-int nfc_fw_download_done(struct nfc_dev *dev, const char *firmware_name)
+/**
+ * nfc_fw_download_done - inform that a firmware download was completed
+ *
+ * @dev: The nfc device to which firmware was downloaded
+ * @firmware_name: The firmware filename
+ * @result: The positive value of a standard errno value
+ */
+int nfc_fw_download_done(struct nfc_dev *dev, const char *firmware_name,
+ u32 result)
{
dev->fw_download_in_progress = false;
- return nfc_genl_fw_download_done(dev, firmware_name);
+ return nfc_genl_fw_download_done(dev, firmware_name, result);
}
EXPORT_SYMBOL(nfc_fw_download_done);
@@ -129,7 +137,7 @@ int nfc_dev_up(struct nfc_dev *dev)
/* We have to enable the device before discovering SEs */
if (dev->ops->discover_se) {
rc = dev->ops->discover_se(dev);
- if (!rc)
+ if (rc)
pr_warn("SE discovery failed\n");
}
@@ -575,12 +583,14 @@ int nfc_enable_se(struct nfc_dev *dev, u32 se_idx)
goto error;
}
- if (se->type == NFC_SE_ENABLED) {
+ if (se->state == NFC_SE_ENABLED) {
rc = -EALREADY;
goto error;
}
rc = dev->ops->enable_se(dev, se_idx);
+ if (rc >= 0)
+ se->state = NFC_SE_ENABLED;
error:
device_unlock(&dev->dev);
@@ -618,12 +628,14 @@ int nfc_disable_se(struct nfc_dev *dev, u32 se_idx)
goto error;
}
- if (se->type == NFC_SE_DISABLED) {
+ if (se->state == NFC_SE_DISABLED) {
rc = -EALREADY;
goto error;
}
rc = dev->ops->disable_se(dev, se_idx);
+ if (rc >= 0)
+ se->state = NFC_SE_DISABLED;
error:
device_unlock(&dev->dev);
diff --git a/net/nfc/hci/core.c b/net/nfc/hci/core.c
index fe66908401f5..d07ca4c5cf8c 100644
--- a/net/nfc/hci/core.c
+++ b/net/nfc/hci/core.c
@@ -717,7 +717,7 @@ static int hci_disable_se(struct nfc_dev *nfc_dev, u32 se_idx)
struct nfc_hci_dev *hdev = nfc_get_drvdata(nfc_dev);
if (hdev->ops->disable_se)
- return hdev->ops->enable_se(hdev, se_idx);
+ return hdev->ops->disable_se(hdev, se_idx);
return 0;
}
diff --git a/net/nfc/netlink.c b/net/nfc/netlink.c
index f16fd59d4160..68063b2025da 100644
--- a/net/nfc/netlink.c
+++ b/net/nfc/netlink.c
@@ -1114,7 +1114,8 @@ static int nfc_genl_fw_download(struct sk_buff *skb, struct genl_info *info)
return rc;
}
-int nfc_genl_fw_download_done(struct nfc_dev *dev, const char *firmware_name)
+int nfc_genl_fw_download_done(struct nfc_dev *dev, const char *firmware_name,
+ u32 result)
{
struct sk_buff *msg;
void *hdr;
@@ -1129,6 +1130,7 @@ int nfc_genl_fw_download_done(struct nfc_dev *dev, const char *firmware_name)
goto free_msg;
if (nla_put_string(msg, NFC_ATTR_FIRMWARE_NAME, firmware_name) ||
+ nla_put_u32(msg, NFC_ATTR_FIRMWARE_DOWNLOAD_STATUS, result) ||
nla_put_u32(msg, NFC_ATTR_DEVICE_INDEX, dev->idx))
goto nla_put_failure;
@@ -1191,6 +1193,91 @@ static int nfc_genl_disable_se(struct sk_buff *skb, struct genl_info *info)
return rc;
}
+static int nfc_genl_send_se(struct sk_buff *msg, struct nfc_dev *dev,
+ u32 portid, u32 seq,
+ struct netlink_callback *cb,
+ int flags)
+{
+ void *hdr;
+ struct nfc_se *se, *n;
+
+ list_for_each_entry_safe(se, n, &dev->secure_elements, list) {
+ hdr = genlmsg_put(msg, portid, seq, &nfc_genl_family, flags,
+ NFC_CMD_GET_SE);
+ if (!hdr)
+ goto nla_put_failure;
+
+ if (cb)
+ genl_dump_check_consistent(cb, hdr, &nfc_genl_family);
+
+ if (nla_put_u32(msg, NFC_ATTR_DEVICE_INDEX, dev->idx) ||
+ nla_put_u32(msg, NFC_ATTR_SE_INDEX, se->idx) ||
+ nla_put_u8(msg, NFC_ATTR_SE_TYPE, se->type))
+ goto nla_put_failure;
+
+ if (genlmsg_end(msg, hdr) < 0)
+ goto nla_put_failure;
+ }
+
+ return 0;
+
+nla_put_failure:
+ genlmsg_cancel(msg, hdr);
+ return -EMSGSIZE;
+}
+
+static int nfc_genl_dump_ses(struct sk_buff *skb,
+ struct netlink_callback *cb)
+{
+ struct class_dev_iter *iter = (struct class_dev_iter *) cb->args[0];
+ struct nfc_dev *dev = (struct nfc_dev *) cb->args[1];
+ bool first_call = false;
+
+ if (!iter) {
+ first_call = true;
+ iter = kmalloc(sizeof(struct class_dev_iter), GFP_KERNEL);
+ if (!iter)
+ return -ENOMEM;
+ cb->args[0] = (long) iter;
+ }
+
+ mutex_lock(&nfc_devlist_mutex);
+
+ cb->seq = nfc_devlist_generation;
+
+ if (first_call) {
+ nfc_device_iter_init(iter);
+ dev = nfc_device_iter_next(iter);
+ }
+
+ while (dev) {
+ int rc;
+
+ rc = nfc_genl_send_se(skb, dev, NETLINK_CB(cb->skb).portid,
+ cb->nlh->nlmsg_seq, cb, NLM_F_MULTI);
+ if (rc < 0)
+ break;
+
+ dev = nfc_device_iter_next(iter);
+ }
+
+ mutex_unlock(&nfc_devlist_mutex);
+
+ cb->args[1] = (long) dev;
+
+ return skb->len;
+}
+
+static int nfc_genl_dump_ses_done(struct netlink_callback *cb)
+{
+ struct class_dev_iter *iter = (struct class_dev_iter *) cb->args[0];
+
+ nfc_device_iter_exit(iter);
+ kfree(iter);
+
+ return 0;
+}
+
static struct genl_ops nfc_genl_ops[] = {
{
.cmd = NFC_CMD_GET_DEVICE,
@@ -1265,6 +1352,12 @@ static struct genl_ops nfc_genl_ops[] = {
.doit = nfc_genl_disable_se,
.policy = nfc_genl_policy,
},
+ {
+ .cmd = NFC_CMD_GET_SE,
+ .dumpit = nfc_genl_dump_ses,
+ .done = nfc_genl_dump_ses_done,
+ .policy = nfc_genl_policy,
+ },
};
diff --git a/net/nfc/nfc.h b/net/nfc/nfc.h
index 820a7850c36a..aaf606fc1faa 100644
--- a/net/nfc/nfc.h
+++ b/net/nfc/nfc.h
@@ -124,9 +124,8 @@ static inline void nfc_device_iter_exit(struct class_dev_iter *iter)
}
int nfc_fw_download(struct nfc_dev *dev, const char *firmware_name);
-int nfc_genl_fw_download_done(struct nfc_dev *dev, const char *firmware_name);
-
-int nfc_fw_download_done(struct nfc_dev *dev, const char *firmware_name);
+int nfc_genl_fw_download_done(struct nfc_dev *dev, const char *firmware_name,
+ u32 result);
int nfc_dev_up(struct nfc_dev *dev);
diff --git a/net/openvswitch/Kconfig b/net/openvswitch/Kconfig
index 27ee56b688a3..6ecf491ad509 100644
--- a/net/openvswitch/Kconfig
+++ b/net/openvswitch/Kconfig
@@ -4,6 +4,7 @@
config OPENVSWITCH
tristate "Open vSwitch"
+ select LIBCRC32C
---help---
Open vSwitch is a multilayer Ethernet switch targeted at virtualized
environments. In addition to supporting a variety of features
@@ -40,3 +41,16 @@ config OPENVSWITCH_GRE
Say N to exclude this support and reduce the binary size.
If unsure, say Y.
+
+config OPENVSWITCH_VXLAN
+ bool "Open vSwitch VXLAN tunneling support"
+ depends on INET
+ depends on OPENVSWITCH
+ depends on VXLAN && !(OPENVSWITCH=y && VXLAN=m)
+ default y
+ ---help---
+ If you say Y here, then the Open vSwitch will be able create vxlan vport.
+
+ Say N to exclude this support and reduce the binary size.
+
+ If unsure, say Y.
diff --git a/net/openvswitch/Makefile b/net/openvswitch/Makefile
index 01bddb2991e3..ea36e99089af 100644
--- a/net/openvswitch/Makefile
+++ b/net/openvswitch/Makefile
@@ -10,6 +10,13 @@ openvswitch-y := \
dp_notify.o \
flow.o \
vport.o \
- vport-gre.o \
vport-internal_dev.o \
vport-netdev.o
+
+ifneq ($(CONFIG_OPENVSWITCH_VXLAN),)
+openvswitch-y += vport-vxlan.o
+endif
+
+ifneq ($(CONFIG_OPENVSWITCH_GRE),)
+openvswitch-y += vport-gre.o
+endif
diff --git a/net/openvswitch/actions.c b/net/openvswitch/actions.c
index ab101f715447..65cfaa816075 100644
--- a/net/openvswitch/actions.c
+++ b/net/openvswitch/actions.c
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2007-2012 Nicira, Inc.
+ * Copyright (c) 2007-2013 Nicira, Inc.
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of version 2 of the GNU General Public
@@ -22,6 +22,7 @@
#include <linux/in.h>
#include <linux/ip.h>
#include <linux/openvswitch.h>
+#include <linux/sctp.h>
#include <linux/tcp.h>
#include <linux/udp.h>
#include <linux/in6.h>
@@ -31,6 +32,7 @@
#include <net/ipv6.h>
#include <net/checksum.h>
#include <net/dsfield.h>
+#include <net/sctp/checksum.h>
#include "datapath.h"
#include "vport.h"
@@ -352,6 +354,39 @@ static int set_tcp(struct sk_buff *skb, const struct ovs_key_tcp *tcp_port_key)
return 0;
}
+static int set_sctp(struct sk_buff *skb,
+ const struct ovs_key_sctp *sctp_port_key)
+{
+ struct sctphdr *sh;
+ int err;
+ unsigned int sctphoff = skb_transport_offset(skb);
+
+ err = make_writable(skb, sctphoff + sizeof(struct sctphdr));
+ if (unlikely(err))
+ return err;
+
+ sh = sctp_hdr(skb);
+ if (sctp_port_key->sctp_src != sh->source ||
+ sctp_port_key->sctp_dst != sh->dest) {
+ __le32 old_correct_csum, new_csum, old_csum;
+
+ old_csum = sh->checksum;
+ old_correct_csum = sctp_compute_cksum(skb, sctphoff);
+
+ sh->source = sctp_port_key->sctp_src;
+ sh->dest = sctp_port_key->sctp_dst;
+
+ new_csum = sctp_compute_cksum(skb, sctphoff);
+
+ /* Carry any checksum errors through. */
+ sh->checksum = old_csum ^ old_correct_csum ^ new_csum;
+
+ skb->rxhash = 0;
+ }
+
+ return 0;
+}
+
static int do_output(struct datapath *dp, struct sk_buff *skb, int out_port)
{
struct vport *vport;
@@ -376,8 +411,10 @@ static int output_userspace(struct datapath *dp, struct sk_buff *skb,
const struct nlattr *a;
int rem;
+ BUG_ON(!OVS_CB(skb)->pkt_key);
+
upcall.cmd = OVS_PACKET_CMD_ACTION;
- upcall.key = &OVS_CB(skb)->flow->key;
+ upcall.key = OVS_CB(skb)->pkt_key;
upcall.userdata = NULL;
upcall.portid = 0;
@@ -459,6 +496,10 @@ static int execute_set_action(struct sk_buff *skb,
case OVS_KEY_ATTR_UDP:
err = set_udp(skb, nla_data(nested_attr));
break;
+
+ case OVS_KEY_ATTR_SCTP:
+ err = set_sctp(skb, nla_data(nested_attr));
+ break;
}
return err;
diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c
index f2ed7600084e..2aa13bd7f2b2 100644
--- a/net/openvswitch/datapath.c
+++ b/net/openvswitch/datapath.c
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2007-2012 Nicira, Inc.
+ * Copyright (c) 2007-2013 Nicira, Inc.
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of version 2 of the GNU General Public
@@ -165,7 +165,7 @@ static void destroy_dp_rcu(struct rcu_head *rcu)
{
struct datapath *dp = container_of(rcu, struct datapath, rcu);
- ovs_flow_tbl_destroy((__force struct flow_table *)dp->table);
+ ovs_flow_tbl_destroy((__force struct flow_table *)dp->table, false);
free_percpu(dp->stats_percpu);
release_net(ovs_dp_get_net(dp));
kfree(dp->ports);
@@ -226,19 +226,18 @@ void ovs_dp_process_received_packet(struct vport *p, struct sk_buff *skb)
struct sw_flow_key key;
u64 *stats_counter;
int error;
- int key_len;
stats = this_cpu_ptr(dp->stats_percpu);
/* Extract flow from 'skb' into 'key'. */
- error = ovs_flow_extract(skb, p->port_no, &key, &key_len);
+ error = ovs_flow_extract(skb, p->port_no, &key);
if (unlikely(error)) {
kfree_skb(skb);
return;
}
/* Look up flow. */
- flow = ovs_flow_tbl_lookup(rcu_dereference(dp->table), &key, key_len);
+ flow = ovs_flow_lookup(rcu_dereference(dp->table), &key);
if (unlikely(!flow)) {
struct dp_upcall_info upcall;
@@ -253,6 +252,7 @@ void ovs_dp_process_received_packet(struct vport *p, struct sk_buff *skb)
}
OVS_CB(skb)->flow = flow;
+ OVS_CB(skb)->pkt_key = &key;
stats_counter = &stats->n_hit;
ovs_flow_used(OVS_CB(skb)->flow, skb);
@@ -435,7 +435,7 @@ static int queue_userspace_packet(struct net *net, int dp_ifindex,
upcall->dp_ifindex = dp_ifindex;
nla = nla_nest_start(user_skb, OVS_PACKET_ATTR_KEY);
- ovs_flow_to_nlattrs(upcall_info->key, user_skb);
+ ovs_flow_to_nlattrs(upcall_info->key, upcall_info->key, user_skb);
nla_nest_end(user_skb, nla);
if (upcall_info->userdata)
@@ -468,7 +468,7 @@ static int flush_flows(struct datapath *dp)
rcu_assign_pointer(dp->table, new_table);
- ovs_flow_tbl_deferred_destroy(old_table);
+ ovs_flow_tbl_destroy(old_table, true);
return 0;
}
@@ -611,10 +611,12 @@ static int validate_tp_port(const struct sw_flow_key *flow_key)
static int validate_and_copy_set_tun(const struct nlattr *attr,
struct sw_flow_actions **sfa)
{
- struct ovs_key_ipv4_tunnel tun_key;
+ struct sw_flow_match match;
+ struct sw_flow_key key;
int err, start;
- err = ovs_ipv4_tun_from_nlattr(nla_data(attr), &tun_key);
+ ovs_match_init(&match, &key, NULL);
+ err = ovs_ipv4_tun_from_nlattr(nla_data(attr), &match, false);
if (err)
return err;
@@ -622,7 +624,8 @@ static int validate_and_copy_set_tun(const struct nlattr *attr,
if (start < 0)
return start;
- err = add_action(sfa, OVS_KEY_ATTR_IPV4_TUNNEL, &tun_key, sizeof(tun_key));
+ err = add_action(sfa, OVS_KEY_ATTR_IPV4_TUNNEL, &match.key->tun_key,
+ sizeof(match.key->tun_key));
add_nested_action_end(*sfa, start);
return err;
@@ -709,6 +712,12 @@ static int validate_set(const struct nlattr *a,
return validate_tp_port(flow_key);
+ case OVS_KEY_ATTR_SCTP:
+ if (flow_key->ip.proto != IPPROTO_SCTP)
+ return -EINVAL;
+
+ return validate_tp_port(flow_key);
+
default:
return -EINVAL;
}
@@ -857,7 +866,6 @@ static int ovs_packet_cmd_execute(struct sk_buff *skb, struct genl_info *info)
struct ethhdr *eth;
int len;
int err;
- int key_len;
err = -EINVAL;
if (!a[OVS_PACKET_ATTR_PACKET] || !a[OVS_PACKET_ATTR_KEY] ||
@@ -890,11 +898,11 @@ static int ovs_packet_cmd_execute(struct sk_buff *skb, struct genl_info *info)
if (IS_ERR(flow))
goto err_kfree_skb;
- err = ovs_flow_extract(packet, -1, &flow->key, &key_len);
+ err = ovs_flow_extract(packet, -1, &flow->key);
if (err)
goto err_flow_free;
- err = ovs_flow_metadata_from_nlattrs(flow, key_len, a[OVS_PACKET_ATTR_KEY]);
+ err = ovs_flow_metadata_from_nlattrs(flow, a[OVS_PACKET_ATTR_KEY]);
if (err)
goto err_flow_free;
acts = ovs_flow_actions_alloc(nla_len(a[OVS_PACKET_ATTR_ACTIONS]));
@@ -908,6 +916,7 @@ static int ovs_packet_cmd_execute(struct sk_buff *skb, struct genl_info *info)
goto err_flow_free;
OVS_CB(packet)->flow = flow;
+ OVS_CB(packet)->pkt_key = &flow->key;
packet->priority = flow->key.phy.priority;
packet->mark = flow->key.phy.skb_mark;
@@ -922,13 +931,13 @@ static int ovs_packet_cmd_execute(struct sk_buff *skb, struct genl_info *info)
local_bh_enable();
rcu_read_unlock();
- ovs_flow_free(flow);
+ ovs_flow_free(flow, false);
return err;
err_unlock:
rcu_read_unlock();
err_flow_free:
- ovs_flow_free(flow);
+ ovs_flow_free(flow, false);
err_kfree_skb:
kfree_skb(packet);
err:
@@ -951,9 +960,10 @@ static struct genl_ops dp_packet_genl_ops[] = {
static void get_dp_stats(struct datapath *dp, struct ovs_dp_stats *stats)
{
+ struct flow_table *table;
int i;
- struct flow_table *table = ovsl_dereference(dp->table);
+ table = rcu_dereference_check(dp->table, lockdep_ovsl_is_held());
stats->n_flows = ovs_flow_tbl_count(table);
stats->n_hit = stats->n_missed = stats->n_lost = 0;
@@ -1044,7 +1054,8 @@ static int set_action_to_attr(const struct nlattr *a, struct sk_buff *skb)
if (!start)
return -EMSGSIZE;
- err = ovs_ipv4_tun_to_nlattr(skb, nla_data(ovs_key));
+ err = ovs_ipv4_tun_to_nlattr(skb, nla_data(ovs_key),
+ nla_data(ovs_key));
if (err)
return err;
nla_nest_end(skb, start);
@@ -1092,6 +1103,7 @@ static size_t ovs_flow_cmd_msg_size(const struct sw_flow_actions *acts)
{
return NLMSG_ALIGN(sizeof(struct ovs_header))
+ nla_total_size(key_attr_size()) /* OVS_FLOW_ATTR_KEY */
+ + nla_total_size(key_attr_size()) /* OVS_FLOW_ATTR_MASK */
+ nla_total_size(sizeof(struct ovs_flow_stats)) /* OVS_FLOW_ATTR_STATS */
+ nla_total_size(1) /* OVS_FLOW_ATTR_TCP_FLAGS */
+ nla_total_size(8) /* OVS_FLOW_ATTR_USED */
@@ -1104,7 +1116,6 @@ static int ovs_flow_cmd_fill_info(struct sw_flow *flow, struct datapath *dp,
u32 seq, u32 flags, u8 cmd)
{
const int skb_orig_len = skb->len;
- const struct sw_flow_actions *sf_acts;
struct nlattr *start;
struct ovs_flow_stats stats;
struct ovs_header *ovs_header;
@@ -1113,20 +1124,31 @@ static int ovs_flow_cmd_fill_info(struct sw_flow *flow, struct datapath *dp,
u8 tcp_flags;
int err;
- sf_acts = ovsl_dereference(flow->sf_acts);
-
ovs_header = genlmsg_put(skb, portid, seq, &dp_flow_genl_family, flags, cmd);
if (!ovs_header)
return -EMSGSIZE;
ovs_header->dp_ifindex = get_dpifindex(dp);
+ /* Fill flow key. */
nla = nla_nest_start(skb, OVS_FLOW_ATTR_KEY);
if (!nla)
goto nla_put_failure;
- err = ovs_flow_to_nlattrs(&flow->key, skb);
+
+ err = ovs_flow_to_nlattrs(&flow->unmasked_key,
+ &flow->unmasked_key, skb);
+ if (err)
+ goto error;
+ nla_nest_end(skb, nla);
+
+ nla = nla_nest_start(skb, OVS_FLOW_ATTR_MASK);
+ if (!nla)
+ goto nla_put_failure;
+
+ err = ovs_flow_to_nlattrs(&flow->key, &flow->mask->key, skb);
if (err)
goto error;
+
nla_nest_end(skb, nla);
spin_lock_bh(&flow->lock);
@@ -1161,6 +1183,11 @@ static int ovs_flow_cmd_fill_info(struct sw_flow *flow, struct datapath *dp,
*/
start = nla_nest_start(skb, OVS_FLOW_ATTR_ACTIONS);
if (start) {
+ const struct sw_flow_actions *sf_acts;
+
+ sf_acts = rcu_dereference_check(flow->sf_acts,
+ lockdep_ovsl_is_held());
+
err = actions_to_attr(sf_acts->actions, sf_acts->actions_len, skb);
if (!err)
nla_nest_end(skb, start);
@@ -1211,20 +1238,24 @@ static int ovs_flow_cmd_new_or_set(struct sk_buff *skb, struct genl_info *info)
{
struct nlattr **a = info->attrs;
struct ovs_header *ovs_header = info->userhdr;
- struct sw_flow_key key;
- struct sw_flow *flow;
+ struct sw_flow_key key, masked_key;
+ struct sw_flow *flow = NULL;
+ struct sw_flow_mask mask;
struct sk_buff *reply;
struct datapath *dp;
struct flow_table *table;
struct sw_flow_actions *acts = NULL;
+ struct sw_flow_match match;
int error;
- int key_len;
/* Extract key. */
error = -EINVAL;
if (!a[OVS_FLOW_ATTR_KEY])
goto error;
- error = ovs_flow_from_nlattrs(&key, &key_len, a[OVS_FLOW_ATTR_KEY]);
+
+ ovs_match_init(&match, &key, &mask);
+ error = ovs_match_from_nlattrs(&match,
+ a[OVS_FLOW_ATTR_KEY], a[OVS_FLOW_ATTR_MASK]);
if (error)
goto error;
@@ -1235,9 +1266,13 @@ static int ovs_flow_cmd_new_or_set(struct sk_buff *skb, struct genl_info *info)
if (IS_ERR(acts))
goto error;
- error = validate_and_copy_actions(a[OVS_FLOW_ATTR_ACTIONS], &key, 0, &acts);
- if (error)
+ ovs_flow_key_mask(&masked_key, &key, &mask);
+ error = validate_and_copy_actions(a[OVS_FLOW_ATTR_ACTIONS],
+ &masked_key, 0, &acts);
+ if (error) {
+ OVS_NLERR("Flow actions may not be safe on all matching packets.\n");
goto err_kfree;
+ }
} else if (info->genlhdr->cmd == OVS_FLOW_CMD_NEW) {
error = -EINVAL;
goto error;
@@ -1250,8 +1285,11 @@ static int ovs_flow_cmd_new_or_set(struct sk_buff *skb, struct genl_info *info)
goto err_unlock_ovs;
table = ovsl_dereference(dp->table);
- flow = ovs_flow_tbl_lookup(table, &key, key_len);
+
+ /* Check if this is a duplicate flow */
+ flow = ovs_flow_lookup(table, &key);
if (!flow) {
+ struct sw_flow_mask *mask_p;
/* Bail out if we're not allowed to create a new flow. */
error = -ENOENT;
if (info->genlhdr->cmd == OVS_FLOW_CMD_SET)
@@ -1264,7 +1302,7 @@ static int ovs_flow_cmd_new_or_set(struct sk_buff *skb, struct genl_info *info)
new_table = ovs_flow_tbl_expand(table);
if (!IS_ERR(new_table)) {
rcu_assign_pointer(dp->table, new_table);
- ovs_flow_tbl_deferred_destroy(table);
+ ovs_flow_tbl_destroy(table, true);
table = ovsl_dereference(dp->table);
}
}
@@ -1277,14 +1315,30 @@ static int ovs_flow_cmd_new_or_set(struct sk_buff *skb, struct genl_info *info)
}
clear_stats(flow);
+ flow->key = masked_key;
+ flow->unmasked_key = key;
+
+ /* Make sure mask is unique in the system */
+ mask_p = ovs_sw_flow_mask_find(table, &mask);
+ if (!mask_p) {
+ /* Allocate a new mask if none exsits. */
+ mask_p = ovs_sw_flow_mask_alloc();
+ if (!mask_p)
+ goto err_flow_free;
+ mask_p->key = mask.key;
+ mask_p->range = mask.range;
+ ovs_sw_flow_mask_insert(table, mask_p);
+ }
+
+ ovs_sw_flow_mask_add_ref(mask_p);
+ flow->mask = mask_p;
rcu_assign_pointer(flow->sf_acts, acts);
/* Put flow in bucket. */
- ovs_flow_tbl_insert(table, flow, &key, key_len);
+ ovs_flow_insert(table, flow);
reply = ovs_flow_cmd_build_info(flow, dp, info->snd_portid,
- info->snd_seq,
- OVS_FLOW_CMD_NEW);
+ info->snd_seq, OVS_FLOW_CMD_NEW);
} else {
/* We found a matching flow. */
struct sw_flow_actions *old_acts;
@@ -1300,6 +1354,13 @@ static int ovs_flow_cmd_new_or_set(struct sk_buff *skb, struct genl_info *info)
info->nlhdr->nlmsg_flags & (NLM_F_CREATE | NLM_F_EXCL))
goto err_unlock_ovs;
+ /* The unmasked key has to be the same for flow updates. */
+ error = -EINVAL;
+ if (!ovs_flow_cmp_unmasked_key(flow, &key, match.range.end)) {
+ OVS_NLERR("Flow modification message rejected, unmasked key does not match.\n");
+ goto err_unlock_ovs;
+ }
+
/* Update actions. */
old_acts = ovsl_dereference(flow->sf_acts);
rcu_assign_pointer(flow->sf_acts, acts);
@@ -1324,6 +1385,8 @@ static int ovs_flow_cmd_new_or_set(struct sk_buff *skb, struct genl_info *info)
ovs_dp_flow_multicast_group.id, PTR_ERR(reply));
return 0;
+err_flow_free:
+ ovs_flow_free(flow, false);
err_unlock_ovs:
ovs_unlock();
err_kfree:
@@ -1341,12 +1404,16 @@ static int ovs_flow_cmd_get(struct sk_buff *skb, struct genl_info *info)
struct sw_flow *flow;
struct datapath *dp;
struct flow_table *table;
+ struct sw_flow_match match;
int err;
- int key_len;
- if (!a[OVS_FLOW_ATTR_KEY])
+ if (!a[OVS_FLOW_ATTR_KEY]) {
+ OVS_NLERR("Flow get message rejected, Key attribute missing.\n");
return -EINVAL;
- err = ovs_flow_from_nlattrs(&key, &key_len, a[OVS_FLOW_ATTR_KEY]);
+ }
+
+ ovs_match_init(&match, &key, NULL);
+ err = ovs_match_from_nlattrs(&match, a[OVS_FLOW_ATTR_KEY], NULL);
if (err)
return err;
@@ -1358,7 +1425,7 @@ static int ovs_flow_cmd_get(struct sk_buff *skb, struct genl_info *info)
}
table = ovsl_dereference(dp->table);
- flow = ovs_flow_tbl_lookup(table, &key, key_len);
+ flow = ovs_flow_lookup_unmasked_key(table, &match);
if (!flow) {
err = -ENOENT;
goto unlock;
@@ -1387,8 +1454,8 @@ static int ovs_flow_cmd_del(struct sk_buff *skb, struct genl_info *info)
struct sw_flow *flow;
struct datapath *dp;
struct flow_table *table;
+ struct sw_flow_match match;
int err;
- int key_len;
ovs_lock();
dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex);
@@ -1401,12 +1468,14 @@ static int ovs_flow_cmd_del(struct sk_buff *skb, struct genl_info *info)
err = flush_flows(dp);
goto unlock;
}
- err = ovs_flow_from_nlattrs(&key, &key_len, a[OVS_FLOW_ATTR_KEY]);
+
+ ovs_match_init(&match, &key, NULL);
+ err = ovs_match_from_nlattrs(&match, a[OVS_FLOW_ATTR_KEY], NULL);
if (err)
goto unlock;
table = ovsl_dereference(dp->table);
- flow = ovs_flow_tbl_lookup(table, &key, key_len);
+ flow = ovs_flow_lookup_unmasked_key(table, &match);
if (!flow) {
err = -ENOENT;
goto unlock;
@@ -1418,13 +1487,13 @@ static int ovs_flow_cmd_del(struct sk_buff *skb, struct genl_info *info)
goto unlock;
}
- ovs_flow_tbl_remove(table, flow);
+ ovs_flow_remove(table, flow);
err = ovs_flow_cmd_fill_info(flow, dp, reply, info->snd_portid,
info->snd_seq, 0, OVS_FLOW_CMD_DEL);
BUG_ON(err < 0);
- ovs_flow_deferred_free(flow);
+ ovs_flow_free(flow, true);
ovs_unlock();
ovs_notify(reply, info, &ovs_dp_flow_multicast_group);
@@ -1440,22 +1509,21 @@ static int ovs_flow_cmd_dump(struct sk_buff *skb, struct netlink_callback *cb)
struct datapath *dp;
struct flow_table *table;
- ovs_lock();
+ rcu_read_lock();
dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex);
if (!dp) {
- ovs_unlock();
+ rcu_read_unlock();
return -ENODEV;
}
- table = ovsl_dereference(dp->table);
-
+ table = rcu_dereference(dp->table);
for (;;) {
struct sw_flow *flow;
u32 bucket, obj;
bucket = cb->args[0];
obj = cb->args[1];
- flow = ovs_flow_tbl_next(table, &bucket, &obj);
+ flow = ovs_flow_dump_next(table, &bucket, &obj);
if (!flow)
break;
@@ -1468,7 +1536,7 @@ static int ovs_flow_cmd_dump(struct sk_buff *skb, struct netlink_callback *cb)
cb->args[0] = bucket;
cb->args[1] = obj;
}
- ovs_unlock();
+ rcu_read_unlock();
return skb->len;
}
@@ -1664,7 +1732,7 @@ static int ovs_dp_cmd_new(struct sk_buff *skb, struct genl_info *info)
goto err_destroy_local_port;
ovs_net = net_generic(ovs_dp_get_net(dp), ovs_net_id);
- list_add_tail(&dp->list_node, &ovs_net->dps);
+ list_add_tail_rcu(&dp->list_node, &ovs_net->dps);
ovs_unlock();
@@ -1678,7 +1746,7 @@ err_destroy_ports_array:
err_destroy_percpu:
free_percpu(dp->stats_percpu);
err_destroy_table:
- ovs_flow_tbl_destroy(ovsl_dereference(dp->table));
+ ovs_flow_tbl_destroy(ovsl_dereference(dp->table), false);
err_free_dp:
release_net(ovs_dp_get_net(dp));
kfree(dp);
@@ -1702,7 +1770,7 @@ static void __dp_destroy(struct datapath *dp)
ovs_dp_detach_port(vport);
}
- list_del(&dp->list_node);
+ list_del_rcu(&dp->list_node);
/* OVSP_LOCAL is datapath internal port. We need to make sure that
* all port in datapath are destroyed first before freeing datapath.
@@ -1807,8 +1875,8 @@ static int ovs_dp_cmd_dump(struct sk_buff *skb, struct netlink_callback *cb)
int skip = cb->args[0];
int i = 0;
- ovs_lock();
- list_for_each_entry(dp, &ovs_net->dps, list_node) {
+ rcu_read_lock();
+ list_for_each_entry_rcu(dp, &ovs_net->dps, list_node) {
if (i >= skip &&
ovs_dp_cmd_fill_info(dp, skb, NETLINK_CB(cb->skb).portid,
cb->nlh->nlmsg_seq, NLM_F_MULTI,
@@ -1816,7 +1884,7 @@ static int ovs_dp_cmd_dump(struct sk_buff *skb, struct netlink_callback *cb)
break;
i++;
}
- ovs_unlock();
+ rcu_read_unlock();
cb->args[0] = i;
@@ -2285,7 +2353,7 @@ static void rehash_flow_table(struct work_struct *work)
new_table = ovs_flow_tbl_rehash(old_table);
if (!IS_ERR(new_table)) {
rcu_assign_pointer(dp->table, new_table);
- ovs_flow_tbl_deferred_destroy(old_table);
+ ovs_flow_tbl_destroy(old_table, true);
}
}
}
diff --git a/net/openvswitch/datapath.h b/net/openvswitch/datapath.h
index a91486484916..4d109c176ef3 100644
--- a/net/openvswitch/datapath.h
+++ b/net/openvswitch/datapath.h
@@ -88,11 +88,13 @@ struct datapath {
/**
* struct ovs_skb_cb - OVS data in skb CB
* @flow: The flow associated with this packet. May be %NULL if no flow.
+ * @pkt_key: The flow information extracted from the packet. Must be nonnull.
* @tun_key: Key for the tunnel that encapsulated this packet. NULL if the
* packet is not being tunneled.
*/
struct ovs_skb_cb {
struct sw_flow *flow;
+ struct sw_flow_key *pkt_key;
struct ovs_key_ipv4_tunnel *tun_key;
};
#define OVS_CB(skb) ((struct ovs_skb_cb *)(skb)->cb)
@@ -183,4 +185,8 @@ struct sk_buff *ovs_vport_cmd_build_info(struct vport *, u32 pid, u32 seq,
int ovs_execute_actions(struct datapath *dp, struct sk_buff *skb);
void ovs_dp_notify_wq(struct work_struct *work);
+
+#define OVS_NLERR(fmt, ...) \
+ pr_info_once("netlink: " fmt, ##__VA_ARGS__)
+
#endif /* datapath.h */
diff --git a/net/openvswitch/flow.c b/net/openvswitch/flow.c
index 1aa84dc58777..410db90db73d 100644
--- a/net/openvswitch/flow.c
+++ b/net/openvswitch/flow.c
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2007-2011 Nicira, Inc.
+ * Copyright (c) 2007-2013 Nicira, Inc.
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of version 2 of the GNU General Public
@@ -34,6 +34,7 @@
#include <linux/if_arp.h>
#include <linux/ip.h>
#include <linux/ipv6.h>
+#include <linux/sctp.h>
#include <linux/tcp.h>
#include <linux/udp.h>
#include <linux/icmp.h>
@@ -46,6 +47,202 @@
static struct kmem_cache *flow_cache;
+static void ovs_sw_flow_mask_set(struct sw_flow_mask *mask,
+ struct sw_flow_key_range *range, u8 val);
+
+static void update_range__(struct sw_flow_match *match,
+ size_t offset, size_t size, bool is_mask)
+{
+ struct sw_flow_key_range *range = NULL;
+ size_t start = rounddown(offset, sizeof(long));
+ size_t end = roundup(offset + size, sizeof(long));
+
+ if (!is_mask)
+ range = &match->range;
+ else if (match->mask)
+ range = &match->mask->range;
+
+ if (!range)
+ return;
+
+ if (range->start == range->end) {
+ range->start = start;
+ range->end = end;
+ return;
+ }
+
+ if (range->start > start)
+ range->start = start;
+
+ if (range->end < end)
+ range->end = end;
+}
+
+#define SW_FLOW_KEY_PUT(match, field, value, is_mask) \
+ do { \
+ update_range__(match, offsetof(struct sw_flow_key, field), \
+ sizeof((match)->key->field), is_mask); \
+ if (is_mask) { \
+ if ((match)->mask) \
+ (match)->mask->key.field = value; \
+ } else { \
+ (match)->key->field = value; \
+ } \
+ } while (0)
+
+#define SW_FLOW_KEY_MEMCPY(match, field, value_p, len, is_mask) \
+ do { \
+ update_range__(match, offsetof(struct sw_flow_key, field), \
+ len, is_mask); \
+ if (is_mask) { \
+ if ((match)->mask) \
+ memcpy(&(match)->mask->key.field, value_p, len);\
+ } else { \
+ memcpy(&(match)->key->field, value_p, len); \
+ } \
+ } while (0)
+
+static u16 range_n_bytes(const struct sw_flow_key_range *range)
+{
+ return range->end - range->start;
+}
+
+void ovs_match_init(struct sw_flow_match *match,
+ struct sw_flow_key *key,
+ struct sw_flow_mask *mask)
+{
+ memset(match, 0, sizeof(*match));
+ match->key = key;
+ match->mask = mask;
+
+ memset(key, 0, sizeof(*key));
+
+ if (mask) {
+ memset(&mask->key, 0, sizeof(mask->key));
+ mask->range.start = mask->range.end = 0;
+ }
+}
+
+static bool ovs_match_validate(const struct sw_flow_match *match,
+ u64 key_attrs, u64 mask_attrs)
+{
+ u64 key_expected = 1 << OVS_KEY_ATTR_ETHERNET;
+ u64 mask_allowed = key_attrs; /* At most allow all key attributes */
+
+ /* The following mask attributes allowed only if they
+ * pass the validation tests. */
+ mask_allowed &= ~((1 << OVS_KEY_ATTR_IPV4)
+ | (1 << OVS_KEY_ATTR_IPV6)
+ | (1 << OVS_KEY_ATTR_TCP)
+ | (1 << OVS_KEY_ATTR_UDP)
+ | (1 << OVS_KEY_ATTR_SCTP)
+ | (1 << OVS_KEY_ATTR_ICMP)
+ | (1 << OVS_KEY_ATTR_ICMPV6)
+ | (1 << OVS_KEY_ATTR_ARP)
+ | (1 << OVS_KEY_ATTR_ND));
+
+ /* Always allowed mask fields. */
+ mask_allowed |= ((1 << OVS_KEY_ATTR_TUNNEL)
+ | (1 << OVS_KEY_ATTR_IN_PORT)
+ | (1 << OVS_KEY_ATTR_ETHERTYPE));
+
+ /* Check key attributes. */
+ if (match->key->eth.type == htons(ETH_P_ARP)
+ || match->key->eth.type == htons(ETH_P_RARP)) {
+ key_expected |= 1 << OVS_KEY_ATTR_ARP;
+ if (match->mask && (match->mask->key.eth.type == htons(0xffff)))
+ mask_allowed |= 1 << OVS_KEY_ATTR_ARP;
+ }
+
+ if (match->key->eth.type == htons(ETH_P_IP)) {
+ key_expected |= 1 << OVS_KEY_ATTR_IPV4;
+ if (match->mask && (match->mask->key.eth.type == htons(0xffff)))
+ mask_allowed |= 1 << OVS_KEY_ATTR_IPV4;
+
+ if (match->key->ip.frag != OVS_FRAG_TYPE_LATER) {
+ if (match->key->ip.proto == IPPROTO_UDP) {
+ key_expected |= 1 << OVS_KEY_ATTR_UDP;
+ if (match->mask && (match->mask->key.ip.proto == 0xff))
+ mask_allowed |= 1 << OVS_KEY_ATTR_UDP;
+ }
+
+ if (match->key->ip.proto == IPPROTO_SCTP) {
+ key_expected |= 1 << OVS_KEY_ATTR_SCTP;
+ if (match->mask && (match->mask->key.ip.proto == 0xff))
+ mask_allowed |= 1 << OVS_KEY_ATTR_SCTP;
+ }
+
+ if (match->key->ip.proto == IPPROTO_TCP) {
+ key_expected |= 1 << OVS_KEY_ATTR_TCP;
+ if (match->mask && (match->mask->key.ip.proto == 0xff))
+ mask_allowed |= 1 << OVS_KEY_ATTR_TCP;
+ }
+
+ if (match->key->ip.proto == IPPROTO_ICMP) {
+ key_expected |= 1 << OVS_KEY_ATTR_ICMP;
+ if (match->mask && (match->mask->key.ip.proto == 0xff))
+ mask_allowed |= 1 << OVS_KEY_ATTR_ICMP;
+ }
+ }
+ }
+
+ if (match->key->eth.type == htons(ETH_P_IPV6)) {
+ key_expected |= 1 << OVS_KEY_ATTR_IPV6;
+ if (match->mask && (match->mask->key.eth.type == htons(0xffff)))
+ mask_allowed |= 1 << OVS_KEY_ATTR_IPV6;
+
+ if (match->key->ip.frag != OVS_FRAG_TYPE_LATER) {
+ if (match->key->ip.proto == IPPROTO_UDP) {
+ key_expected |= 1 << OVS_KEY_ATTR_UDP;
+ if (match->mask && (match->mask->key.ip.proto == 0xff))
+ mask_allowed |= 1 << OVS_KEY_ATTR_UDP;
+ }
+
+ if (match->key->ip.proto == IPPROTO_SCTP) {
+ key_expected |= 1 << OVS_KEY_ATTR_SCTP;
+ if (match->mask && (match->mask->key.ip.proto == 0xff))
+ mask_allowed |= 1 << OVS_KEY_ATTR_SCTP;
+ }
+
+ if (match->key->ip.proto == IPPROTO_TCP) {
+ key_expected |= 1 << OVS_KEY_ATTR_TCP;
+ if (match->mask && (match->mask->key.ip.proto == 0xff))
+ mask_allowed |= 1 << OVS_KEY_ATTR_TCP;
+ }
+
+ if (match->key->ip.proto == IPPROTO_ICMPV6) {
+ key_expected |= 1 << OVS_KEY_ATTR_ICMPV6;
+ if (match->mask && (match->mask->key.ip.proto == 0xff))
+ mask_allowed |= 1 << OVS_KEY_ATTR_ICMPV6;
+
+ if (match->key->ipv6.tp.src ==
+ htons(NDISC_NEIGHBOUR_SOLICITATION) ||
+ match->key->ipv6.tp.src == htons(NDISC_NEIGHBOUR_ADVERTISEMENT)) {
+ key_expected |= 1 << OVS_KEY_ATTR_ND;
+ if (match->mask && (match->mask->key.ipv6.tp.src == htons(0xffff)))
+ mask_allowed |= 1 << OVS_KEY_ATTR_ND;
+ }
+ }
+ }
+ }
+
+ if ((key_attrs & key_expected) != key_expected) {
+ /* Key attributes check failed. */
+ OVS_NLERR("Missing expected key attributes (key_attrs=%llx, expected=%llx).\n",
+ key_attrs, key_expected);
+ return false;
+ }
+
+ if ((mask_attrs & mask_allowed) != mask_attrs) {
+ /* Mask attributes check failed. */
+ OVS_NLERR("Contain more than allowed mask fields (mask_attrs=%llx, mask_allowed=%llx).\n",
+ mask_attrs, mask_allowed);
+ return false;
+ }
+
+ return true;
+}
+
static int check_header(struct sk_buff *skb, int len)
{
if (unlikely(skb->len < len))
@@ -102,6 +299,12 @@ static bool udphdr_ok(struct sk_buff *skb)
sizeof(struct udphdr));
}
+static bool sctphdr_ok(struct sk_buff *skb)
+{
+ return pskb_may_pull(skb, skb_transport_offset(skb) +
+ sizeof(struct sctphdr));
+}
+
static bool icmphdr_ok(struct sk_buff *skb)
{
return pskb_may_pull(skb, skb_transport_offset(skb) +
@@ -121,12 +324,7 @@ u64 ovs_flow_used_time(unsigned long flow_jiffies)
return cur_ms - idle_ms;
}
-#define SW_FLOW_KEY_OFFSET(field) \
- (offsetof(struct sw_flow_key, field) + \
- FIELD_SIZEOF(struct sw_flow_key, field))
-
-static int parse_ipv6hdr(struct sk_buff *skb, struct sw_flow_key *key,
- int *key_lenp)
+static int parse_ipv6hdr(struct sk_buff *skb, struct sw_flow_key *key)
{
unsigned int nh_ofs = skb_network_offset(skb);
unsigned int nh_len;
@@ -136,8 +334,6 @@ static int parse_ipv6hdr(struct sk_buff *skb, struct sw_flow_key *key,
__be16 frag_off;
int err;
- *key_lenp = SW_FLOW_KEY_OFFSET(ipv6.label);
-
err = check_header(skb, nh_ofs + sizeof(*nh));
if (unlikely(err))
return err;
@@ -176,6 +372,22 @@ static bool icmp6hdr_ok(struct sk_buff *skb)
sizeof(struct icmp6hdr));
}
+void ovs_flow_key_mask(struct sw_flow_key *dst, const struct sw_flow_key *src,
+ const struct sw_flow_mask *mask)
+{
+ const long *m = (long *)((u8 *)&mask->key + mask->range.start);
+ const long *s = (long *)((u8 *)src + mask->range.start);
+ long *d = (long *)((u8 *)dst + mask->range.start);
+ int i;
+
+ /* The memory outside of the 'mask->range' are not set since
+ * further operations on 'dst' only uses contents within
+ * 'mask->range'.
+ */
+ for (i = 0; i < range_n_bytes(&mask->range); i += sizeof(long))
+ *d++ = *s++ & *m++;
+}
+
#define TCP_FLAGS_OFFSET 13
#define TCP_FLAG_MASK 0x3f
@@ -224,6 +436,7 @@ struct sw_flow *ovs_flow_alloc(void)
spin_lock_init(&flow->lock);
flow->sf_acts = NULL;
+ flow->mask = NULL;
return flow;
}
@@ -263,7 +476,7 @@ static void free_buckets(struct flex_array *buckets)
flex_array_free(buckets);
}
-struct flow_table *ovs_flow_tbl_alloc(int new_size)
+static struct flow_table *__flow_tbl_alloc(int new_size)
{
struct flow_table *table = kmalloc(sizeof(*table), GFP_KERNEL);
@@ -281,17 +494,15 @@ struct flow_table *ovs_flow_tbl_alloc(int new_size)
table->node_ver = 0;
table->keep_flows = false;
get_random_bytes(&table->hash_seed, sizeof(u32));
+ table->mask_list = NULL;
return table;
}
-void ovs_flow_tbl_destroy(struct flow_table *table)
+static void __flow_tbl_destroy(struct flow_table *table)
{
int i;
- if (!table)
- return;
-
if (table->keep_flows)
goto skip_flows;
@@ -302,32 +513,56 @@ void ovs_flow_tbl_destroy(struct flow_table *table)
int ver = table->node_ver;
hlist_for_each_entry_safe(flow, n, head, hash_node[ver]) {
- hlist_del_rcu(&flow->hash_node[ver]);
- ovs_flow_free(flow);
+ hlist_del(&flow->hash_node[ver]);
+ ovs_flow_free(flow, false);
}
}
+ BUG_ON(!list_empty(table->mask_list));
+ kfree(table->mask_list);
+
skip_flows:
free_buckets(table->buckets);
kfree(table);
}
+struct flow_table *ovs_flow_tbl_alloc(int new_size)
+{
+ struct flow_table *table = __flow_tbl_alloc(new_size);
+
+ if (!table)
+ return NULL;
+
+ table->mask_list = kmalloc(sizeof(struct list_head), GFP_KERNEL);
+ if (!table->mask_list) {
+ table->keep_flows = true;
+ __flow_tbl_destroy(table);
+ return NULL;
+ }
+ INIT_LIST_HEAD(table->mask_list);
+
+ return table;
+}
+
static void flow_tbl_destroy_rcu_cb(struct rcu_head *rcu)
{
struct flow_table *table = container_of(rcu, struct flow_table, rcu);
- ovs_flow_tbl_destroy(table);
+ __flow_tbl_destroy(table);
}
-void ovs_flow_tbl_deferred_destroy(struct flow_table *table)
+void ovs_flow_tbl_destroy(struct flow_table *table, bool deferred)
{
if (!table)
return;
- call_rcu(&table->rcu, flow_tbl_destroy_rcu_cb);
+ if (deferred)
+ call_rcu(&table->rcu, flow_tbl_destroy_rcu_cb);
+ else
+ __flow_tbl_destroy(table);
}
-struct sw_flow *ovs_flow_tbl_next(struct flow_table *table, u32 *bucket, u32 *last)
+struct sw_flow *ovs_flow_dump_next(struct flow_table *table, u32 *bucket, u32 *last)
{
struct sw_flow *flow;
struct hlist_head *head;
@@ -353,11 +588,13 @@ struct sw_flow *ovs_flow_tbl_next(struct flow_table *table, u32 *bucket, u32 *la
return NULL;
}
-static void __flow_tbl_insert(struct flow_table *table, struct sw_flow *flow)
+static void __tbl_insert(struct flow_table *table, struct sw_flow *flow)
{
struct hlist_head *head;
+
head = find_bucket(table, flow->hash);
hlist_add_head_rcu(&flow->hash_node[table->node_ver], head);
+
table->count++;
}
@@ -377,8 +614,10 @@ static void flow_table_copy_flows(struct flow_table *old, struct flow_table *new
head = flex_array_get(old->buckets, i);
hlist_for_each_entry(flow, head, hash_node[old_ver])
- __flow_tbl_insert(new, flow);
+ __tbl_insert(new, flow);
}
+
+ new->mask_list = old->mask_list;
old->keep_flows = true;
}
@@ -386,7 +625,7 @@ static struct flow_table *__flow_tbl_rehash(struct flow_table *table, int n_buck
{
struct flow_table *new_table;
- new_table = ovs_flow_tbl_alloc(n_buckets);
+ new_table = __flow_tbl_alloc(n_buckets);
if (!new_table)
return ERR_PTR(-ENOMEM);
@@ -405,28 +644,30 @@ struct flow_table *ovs_flow_tbl_expand(struct flow_table *table)
return __flow_tbl_rehash(table, table->n_buckets * 2);
}
-void ovs_flow_free(struct sw_flow *flow)
+static void __flow_free(struct sw_flow *flow)
{
- if (unlikely(!flow))
- return;
-
kfree((struct sf_flow_acts __force *)flow->sf_acts);
kmem_cache_free(flow_cache, flow);
}
-/* RCU callback used by ovs_flow_deferred_free. */
static void rcu_free_flow_callback(struct rcu_head *rcu)
{
struct sw_flow *flow = container_of(rcu, struct sw_flow, rcu);
- ovs_flow_free(flow);
+ __flow_free(flow);
}
-/* Schedules 'flow' to be freed after the next RCU grace period.
- * The caller must hold rcu_read_lock for this to be sensible. */
-void ovs_flow_deferred_free(struct sw_flow *flow)
+void ovs_flow_free(struct sw_flow *flow, bool deferred)
{
- call_rcu(&flow->rcu, rcu_free_flow_callback);
+ if (!flow)
+ return;
+
+ ovs_sw_flow_mask_del_ref(flow->mask, deferred);
+
+ if (deferred)
+ call_rcu(&flow->rcu, rcu_free_flow_callback);
+ else
+ __flow_free(flow);
}
/* Schedules 'sf_acts' to be freed after the next RCU grace period.
@@ -497,18 +738,15 @@ static __be16 parse_ethertype(struct sk_buff *skb)
}
static int parse_icmpv6(struct sk_buff *skb, struct sw_flow_key *key,
- int *key_lenp, int nh_len)
+ int nh_len)
{
struct icmp6hdr *icmp = icmp6_hdr(skb);
- int error = 0;
- int key_len;
/* The ICMPv6 type and code fields use the 16-bit transport port
* fields, so we need to store them in 16-bit network byte order.
*/
key->ipv6.tp.src = htons(icmp->icmp6_type);
key->ipv6.tp.dst = htons(icmp->icmp6_code);
- key_len = SW_FLOW_KEY_OFFSET(ipv6.tp);
if (icmp->icmp6_code == 0 &&
(icmp->icmp6_type == NDISC_NEIGHBOUR_SOLICITATION ||
@@ -517,21 +755,17 @@ static int parse_icmpv6(struct sk_buff *skb, struct sw_flow_key *key,
struct nd_msg *nd;
int offset;
- key_len = SW_FLOW_KEY_OFFSET(ipv6.nd);
-
/* In order to process neighbor discovery options, we need the
* entire packet.
*/
if (unlikely(icmp_len < sizeof(*nd)))
- goto out;
- if (unlikely(skb_linearize(skb))) {
- error = -ENOMEM;
- goto out;
- }
+ return 0;
+
+ if (unlikely(skb_linearize(skb)))
+ return -ENOMEM;
nd = (struct nd_msg *)skb_transport_header(skb);
key->ipv6.nd.target = nd->target;
- key_len = SW_FLOW_KEY_OFFSET(ipv6.nd);
icmp_len -= sizeof(*nd);
offset = 0;
@@ -541,7 +775,7 @@ static int parse_icmpv6(struct sk_buff *skb, struct sw_flow_key *key,
int opt_len = nd_opt->nd_opt_len * 8;
if (unlikely(!opt_len || opt_len > icmp_len))
- goto invalid;
+ return 0;
/* Store the link layer address if the appropriate
* option is provided. It is considered an error if
@@ -566,16 +800,14 @@ static int parse_icmpv6(struct sk_buff *skb, struct sw_flow_key *key,
}
}
- goto out;
+ return 0;
invalid:
memset(&key->ipv6.nd.target, 0, sizeof(key->ipv6.nd.target));
memset(key->ipv6.nd.sll, 0, sizeof(key->ipv6.nd.sll));
memset(key->ipv6.nd.tll, 0, sizeof(key->ipv6.nd.tll));
-out:
- *key_lenp = key_len;
- return error;
+ return 0;
}
/**
@@ -584,7 +816,6 @@ out:
* Ethernet header
* @in_port: port number on which @skb was received.
* @key: output flow key
- * @key_lenp: length of output flow key
*
* The caller must ensure that skb->len >= ETH_HLEN.
*
@@ -602,11 +833,9 @@ out:
* of a correct length, otherwise the same as skb->network_header.
* For other key->eth.type values it is left untouched.
*/
-int ovs_flow_extract(struct sk_buff *skb, u16 in_port, struct sw_flow_key *key,
- int *key_lenp)
+int ovs_flow_extract(struct sk_buff *skb, u16 in_port, struct sw_flow_key *key)
{
- int error = 0;
- int key_len = SW_FLOW_KEY_OFFSET(eth);
+ int error;
struct ethhdr *eth;
memset(key, 0, sizeof(*key));
@@ -649,15 +878,13 @@ int ovs_flow_extract(struct sk_buff *skb, u16 in_port, struct sw_flow_key *key,
struct iphdr *nh;
__be16 offset;
- key_len = SW_FLOW_KEY_OFFSET(ipv4.addr);
-
error = check_iphdr(skb);
if (unlikely(error)) {
if (error == -EINVAL) {
skb->transport_header = skb->network_header;
error = 0;
}
- goto out;
+ return error;
}
nh = ip_hdr(skb);
@@ -671,7 +898,7 @@ int ovs_flow_extract(struct sk_buff *skb, u16 in_port, struct sw_flow_key *key,
offset = nh->frag_off & htons(IP_OFFSET);
if (offset) {
key->ip.frag = OVS_FRAG_TYPE_LATER;
- goto out;
+ return 0;
}
if (nh->frag_off & htons(IP_MF) ||
skb_shinfo(skb)->gso_type & SKB_GSO_UDP)
@@ -679,21 +906,24 @@ int ovs_flow_extract(struct sk_buff *skb, u16 in_port, struct sw_flow_key *key,
/* Transport layer. */
if (key->ip.proto == IPPROTO_TCP) {
- key_len = SW_FLOW_KEY_OFFSET(ipv4.tp);
if (tcphdr_ok(skb)) {
struct tcphdr *tcp = tcp_hdr(skb);
key->ipv4.tp.src = tcp->source;
key->ipv4.tp.dst = tcp->dest;
}
} else if (key->ip.proto == IPPROTO_UDP) {
- key_len = SW_FLOW_KEY_OFFSET(ipv4.tp);
if (udphdr_ok(skb)) {
struct udphdr *udp = udp_hdr(skb);
key->ipv4.tp.src = udp->source;
key->ipv4.tp.dst = udp->dest;
}
+ } else if (key->ip.proto == IPPROTO_SCTP) {
+ if (sctphdr_ok(skb)) {
+ struct sctphdr *sctp = sctp_hdr(skb);
+ key->ipv4.tp.src = sctp->source;
+ key->ipv4.tp.dst = sctp->dest;
+ }
} else if (key->ip.proto == IPPROTO_ICMP) {
- key_len = SW_FLOW_KEY_OFFSET(ipv4.tp);
if (icmphdr_ok(skb)) {
struct icmphdr *icmp = icmp_hdr(skb);
/* The ICMP type and code fields use the 16-bit
@@ -722,102 +952,175 @@ int ovs_flow_extract(struct sk_buff *skb, u16 in_port, struct sw_flow_key *key,
memcpy(&key->ipv4.addr.dst, arp->ar_tip, sizeof(key->ipv4.addr.dst));
memcpy(key->ipv4.arp.sha, arp->ar_sha, ETH_ALEN);
memcpy(key->ipv4.arp.tha, arp->ar_tha, ETH_ALEN);
- key_len = SW_FLOW_KEY_OFFSET(ipv4.arp);
}
} else if (key->eth.type == htons(ETH_P_IPV6)) {
int nh_len; /* IPv6 Header + Extensions */
- nh_len = parse_ipv6hdr(skb, key, &key_len);
+ nh_len = parse_ipv6hdr(skb, key);
if (unlikely(nh_len < 0)) {
- if (nh_len == -EINVAL)
+ if (nh_len == -EINVAL) {
skb->transport_header = skb->network_header;
- else
+ error = 0;
+ } else {
error = nh_len;
- goto out;
+ }
+ return error;
}
if (key->ip.frag == OVS_FRAG_TYPE_LATER)
- goto out;
+ return 0;
if (skb_shinfo(skb)->gso_type & SKB_GSO_UDP)
key->ip.frag = OVS_FRAG_TYPE_FIRST;
/* Transport layer. */
if (key->ip.proto == NEXTHDR_TCP) {
- key_len = SW_FLOW_KEY_OFFSET(ipv6.tp);
if (tcphdr_ok(skb)) {
struct tcphdr *tcp = tcp_hdr(skb);
key->ipv6.tp.src = tcp->source;
key->ipv6.tp.dst = tcp->dest;
}
} else if (key->ip.proto == NEXTHDR_UDP) {
- key_len = SW_FLOW_KEY_OFFSET(ipv6.tp);
if (udphdr_ok(skb)) {
struct udphdr *udp = udp_hdr(skb);
key->ipv6.tp.src = udp->source;
key->ipv6.tp.dst = udp->dest;
}
+ } else if (key->ip.proto == NEXTHDR_SCTP) {
+ if (sctphdr_ok(skb)) {
+ struct sctphdr *sctp = sctp_hdr(skb);
+ key->ipv6.tp.src = sctp->source;
+ key->ipv6.tp.dst = sctp->dest;
+ }
} else if (key->ip.proto == NEXTHDR_ICMP) {
- key_len = SW_FLOW_KEY_OFFSET(ipv6.tp);
if (icmp6hdr_ok(skb)) {
- error = parse_icmpv6(skb, key, &key_len, nh_len);
- if (error < 0)
- goto out;
+ error = parse_icmpv6(skb, key, nh_len);
+ if (error)
+ return error;
}
}
}
-out:
- *key_lenp = key_len;
- return error;
+ return 0;
}
-static u32 ovs_flow_hash(const struct sw_flow_key *key, int key_start, int key_len)
+static u32 ovs_flow_hash(const struct sw_flow_key *key, int key_start,
+ int key_end)
{
- return jhash2((u32 *)((u8 *)key + key_start),
- DIV_ROUND_UP(key_len - key_start, sizeof(u32)), 0);
+ u32 *hash_key = (u32 *)((u8 *)key + key_start);
+ int hash_u32s = (key_end - key_start) >> 2;
+
+ /* Make sure number of hash bytes are multiple of u32. */
+ BUILD_BUG_ON(sizeof(long) % sizeof(u32));
+
+ return jhash2(hash_key, hash_u32s, 0);
}
-static int flow_key_start(struct sw_flow_key *key)
+static int flow_key_start(const struct sw_flow_key *key)
{
if (key->tun_key.ipv4_dst)
return 0;
else
- return offsetof(struct sw_flow_key, phy);
+ return rounddown(offsetof(struct sw_flow_key, phy),
+ sizeof(long));
+}
+
+static bool __cmp_key(const struct sw_flow_key *key1,
+ const struct sw_flow_key *key2, int key_start, int key_end)
+{
+ const long *cp1 = (long *)((u8 *)key1 + key_start);
+ const long *cp2 = (long *)((u8 *)key2 + key_start);
+ long diffs = 0;
+ int i;
+
+ for (i = key_start; i < key_end; i += sizeof(long))
+ diffs |= *cp1++ ^ *cp2++;
+
+ return diffs == 0;
}
-struct sw_flow *ovs_flow_tbl_lookup(struct flow_table *table,
- struct sw_flow_key *key, int key_len)
+static bool __flow_cmp_masked_key(const struct sw_flow *flow,
+ const struct sw_flow_key *key, int key_start, int key_end)
+{
+ return __cmp_key(&flow->key, key, key_start, key_end);
+}
+
+static bool __flow_cmp_unmasked_key(const struct sw_flow *flow,
+ const struct sw_flow_key *key, int key_start, int key_end)
+{
+ return __cmp_key(&flow->unmasked_key, key, key_start, key_end);
+}
+
+bool ovs_flow_cmp_unmasked_key(const struct sw_flow *flow,
+ const struct sw_flow_key *key, int key_end)
+{
+ int key_start;
+ key_start = flow_key_start(key);
+
+ return __flow_cmp_unmasked_key(flow, key, key_start, key_end);
+
+}
+
+struct sw_flow *ovs_flow_lookup_unmasked_key(struct flow_table *table,
+ struct sw_flow_match *match)
+{
+ struct sw_flow_key *unmasked = match->key;
+ int key_end = match->range.end;
+ struct sw_flow *flow;
+
+ flow = ovs_flow_lookup(table, unmasked);
+ if (flow && (!ovs_flow_cmp_unmasked_key(flow, unmasked, key_end)))
+ flow = NULL;
+
+ return flow;
+}
+
+static struct sw_flow *ovs_masked_flow_lookup(struct flow_table *table,
+ const struct sw_flow_key *unmasked,
+ struct sw_flow_mask *mask)
{
struct sw_flow *flow;
struct hlist_head *head;
- u8 *_key;
- int key_start;
+ int key_start = mask->range.start;
+ int key_end = mask->range.end;
u32 hash;
+ struct sw_flow_key masked_key;
- key_start = flow_key_start(key);
- hash = ovs_flow_hash(key, key_start, key_len);
-
- _key = (u8 *) key + key_start;
+ ovs_flow_key_mask(&masked_key, unmasked, mask);
+ hash = ovs_flow_hash(&masked_key, key_start, key_end);
head = find_bucket(table, hash);
hlist_for_each_entry_rcu(flow, head, hash_node[table->node_ver]) {
-
- if (flow->hash == hash &&
- !memcmp((u8 *)&flow->key + key_start, _key, key_len - key_start)) {
+ if (flow->mask == mask &&
+ __flow_cmp_masked_key(flow, &masked_key,
+ key_start, key_end))
return flow;
- }
}
return NULL;
}
-void ovs_flow_tbl_insert(struct flow_table *table, struct sw_flow *flow,
- struct sw_flow_key *key, int key_len)
+struct sw_flow *ovs_flow_lookup(struct flow_table *tbl,
+ const struct sw_flow_key *key)
+{
+ struct sw_flow *flow = NULL;
+ struct sw_flow_mask *mask;
+
+ list_for_each_entry_rcu(mask, tbl->mask_list, list) {
+ flow = ovs_masked_flow_lookup(tbl, key, mask);
+ if (flow) /* Found */
+ break;
+ }
+
+ return flow;
+}
+
+
+void ovs_flow_insert(struct flow_table *table, struct sw_flow *flow)
{
- flow->hash = ovs_flow_hash(key, flow_key_start(key), key_len);
- memcpy(&flow->key, key, sizeof(flow->key));
- __flow_tbl_insert(table, flow);
+ flow->hash = ovs_flow_hash(&flow->key, flow->mask->range.start,
+ flow->mask->range.end);
+ __tbl_insert(table, flow);
}
-void ovs_flow_tbl_remove(struct flow_table *table, struct sw_flow *flow)
+void ovs_flow_remove(struct flow_table *table, struct sw_flow *flow)
{
BUG_ON(table->count == 0);
hlist_del_rcu(&flow->hash_node[table->node_ver]);
@@ -837,6 +1140,7 @@ const int ovs_key_lens[OVS_KEY_ATTR_MAX + 1] = {
[OVS_KEY_ATTR_IPV6] = sizeof(struct ovs_key_ipv6),
[OVS_KEY_ATTR_TCP] = sizeof(struct ovs_key_tcp),
[OVS_KEY_ATTR_UDP] = sizeof(struct ovs_key_udp),
+ [OVS_KEY_ATTR_SCTP] = sizeof(struct ovs_key_sctp),
[OVS_KEY_ATTR_ICMP] = sizeof(struct ovs_key_icmp),
[OVS_KEY_ATTR_ICMPV6] = sizeof(struct ovs_key_icmpv6),
[OVS_KEY_ATTR_ARP] = sizeof(struct ovs_key_arp),
@@ -844,149 +1148,85 @@ const int ovs_key_lens[OVS_KEY_ATTR_MAX + 1] = {
[OVS_KEY_ATTR_TUNNEL] = -1,
};
-static int ipv4_flow_from_nlattrs(struct sw_flow_key *swkey, int *key_len,
- const struct nlattr *a[], u32 *attrs)
+static bool is_all_zero(const u8 *fp, size_t size)
{
- const struct ovs_key_icmp *icmp_key;
- const struct ovs_key_tcp *tcp_key;
- const struct ovs_key_udp *udp_key;
-
- switch (swkey->ip.proto) {
- case IPPROTO_TCP:
- if (!(*attrs & (1 << OVS_KEY_ATTR_TCP)))
- return -EINVAL;
- *attrs &= ~(1 << OVS_KEY_ATTR_TCP);
-
- *key_len = SW_FLOW_KEY_OFFSET(ipv4.tp);
- tcp_key = nla_data(a[OVS_KEY_ATTR_TCP]);
- swkey->ipv4.tp.src = tcp_key->tcp_src;
- swkey->ipv4.tp.dst = tcp_key->tcp_dst;
- break;
-
- case IPPROTO_UDP:
- if (!(*attrs & (1 << OVS_KEY_ATTR_UDP)))
- return -EINVAL;
- *attrs &= ~(1 << OVS_KEY_ATTR_UDP);
-
- *key_len = SW_FLOW_KEY_OFFSET(ipv4.tp);
- udp_key = nla_data(a[OVS_KEY_ATTR_UDP]);
- swkey->ipv4.tp.src = udp_key->udp_src;
- swkey->ipv4.tp.dst = udp_key->udp_dst;
- break;
-
- case IPPROTO_ICMP:
- if (!(*attrs & (1 << OVS_KEY_ATTR_ICMP)))
- return -EINVAL;
- *attrs &= ~(1 << OVS_KEY_ATTR_ICMP);
-
- *key_len = SW_FLOW_KEY_OFFSET(ipv4.tp);
- icmp_key = nla_data(a[OVS_KEY_ATTR_ICMP]);
- swkey->ipv4.tp.src = htons(icmp_key->icmp_type);
- swkey->ipv4.tp.dst = htons(icmp_key->icmp_code);
- break;
- }
-
- return 0;
-}
-
-static int ipv6_flow_from_nlattrs(struct sw_flow_key *swkey, int *key_len,
- const struct nlattr *a[], u32 *attrs)
-{
- const struct ovs_key_icmpv6 *icmpv6_key;
- const struct ovs_key_tcp *tcp_key;
- const struct ovs_key_udp *udp_key;
-
- switch (swkey->ip.proto) {
- case IPPROTO_TCP:
- if (!(*attrs & (1 << OVS_KEY_ATTR_TCP)))
- return -EINVAL;
- *attrs &= ~(1 << OVS_KEY_ATTR_TCP);
-
- *key_len = SW_FLOW_KEY_OFFSET(ipv6.tp);
- tcp_key = nla_data(a[OVS_KEY_ATTR_TCP]);
- swkey->ipv6.tp.src = tcp_key->tcp_src;
- swkey->ipv6.tp.dst = tcp_key->tcp_dst;
- break;
-
- case IPPROTO_UDP:
- if (!(*attrs & (1 << OVS_KEY_ATTR_UDP)))
- return -EINVAL;
- *attrs &= ~(1 << OVS_KEY_ATTR_UDP);
-
- *key_len = SW_FLOW_KEY_OFFSET(ipv6.tp);
- udp_key = nla_data(a[OVS_KEY_ATTR_UDP]);
- swkey->ipv6.tp.src = udp_key->udp_src;
- swkey->ipv6.tp.dst = udp_key->udp_dst;
- break;
-
- case IPPROTO_ICMPV6:
- if (!(*attrs & (1 << OVS_KEY_ATTR_ICMPV6)))
- return -EINVAL;
- *attrs &= ~(1 << OVS_KEY_ATTR_ICMPV6);
-
- *key_len = SW_FLOW_KEY_OFFSET(ipv6.tp);
- icmpv6_key = nla_data(a[OVS_KEY_ATTR_ICMPV6]);
- swkey->ipv6.tp.src = htons(icmpv6_key->icmpv6_type);
- swkey->ipv6.tp.dst = htons(icmpv6_key->icmpv6_code);
+ int i;
- if (swkey->ipv6.tp.src == htons(NDISC_NEIGHBOUR_SOLICITATION) ||
- swkey->ipv6.tp.src == htons(NDISC_NEIGHBOUR_ADVERTISEMENT)) {
- const struct ovs_key_nd *nd_key;
+ if (!fp)
+ return false;
- if (!(*attrs & (1 << OVS_KEY_ATTR_ND)))
- return -EINVAL;
- *attrs &= ~(1 << OVS_KEY_ATTR_ND);
-
- *key_len = SW_FLOW_KEY_OFFSET(ipv6.nd);
- nd_key = nla_data(a[OVS_KEY_ATTR_ND]);
- memcpy(&swkey->ipv6.nd.target, nd_key->nd_target,
- sizeof(swkey->ipv6.nd.target));
- memcpy(swkey->ipv6.nd.sll, nd_key->nd_sll, ETH_ALEN);
- memcpy(swkey->ipv6.nd.tll, nd_key->nd_tll, ETH_ALEN);
- }
- break;
- }
+ for (i = 0; i < size; i++)
+ if (fp[i])
+ return false;
- return 0;
+ return true;
}
-static int parse_flow_nlattrs(const struct nlattr *attr,
- const struct nlattr *a[], u32 *attrsp)
+static int __parse_flow_nlattrs(const struct nlattr *attr,
+ const struct nlattr *a[],
+ u64 *attrsp, bool nz)
{
const struct nlattr *nla;
u32 attrs;
int rem;
- attrs = 0;
+ attrs = *attrsp;
nla_for_each_nested(nla, attr, rem) {
u16 type = nla_type(nla);
int expected_len;
- if (type > OVS_KEY_ATTR_MAX || attrs & (1 << type))
+ if (type > OVS_KEY_ATTR_MAX) {
+ OVS_NLERR("Unknown key attribute (type=%d, max=%d).\n",
+ type, OVS_KEY_ATTR_MAX);
return -EINVAL;
+ }
+
+ if (attrs & (1 << type)) {
+ OVS_NLERR("Duplicate key attribute (type %d).\n", type);
+ return -EINVAL;
+ }
expected_len = ovs_key_lens[type];
- if (nla_len(nla) != expected_len && expected_len != -1)
+ if (nla_len(nla) != expected_len && expected_len != -1) {
+ OVS_NLERR("Key attribute has unexpected length (type=%d"
+ ", length=%d, expected=%d).\n", type,
+ nla_len(nla), expected_len);
return -EINVAL;
+ }
- attrs |= 1 << type;
- a[type] = nla;
+ if (!nz || !is_all_zero(nla_data(nla), expected_len)) {
+ attrs |= 1 << type;
+ a[type] = nla;
+ }
}
- if (rem)
+ if (rem) {
+ OVS_NLERR("Message has %d unknown bytes.\n", rem);
return -EINVAL;
+ }
*attrsp = attrs;
return 0;
}
+static int parse_flow_mask_nlattrs(const struct nlattr *attr,
+ const struct nlattr *a[], u64 *attrsp)
+{
+ return __parse_flow_nlattrs(attr, a, attrsp, true);
+}
+
+static int parse_flow_nlattrs(const struct nlattr *attr,
+ const struct nlattr *a[], u64 *attrsp)
+{
+ return __parse_flow_nlattrs(attr, a, attrsp, false);
+}
+
int ovs_ipv4_tun_from_nlattr(const struct nlattr *attr,
- struct ovs_key_ipv4_tunnel *tun_key)
+ struct sw_flow_match *match, bool is_mask)
{
struct nlattr *a;
int rem;
bool ttl = false;
-
- memset(tun_key, 0, sizeof(*tun_key));
+ __be16 tun_flags = 0;
nla_for_each_nested(a, attr, rem) {
int type = nla_type(a);
@@ -1000,53 +1240,78 @@ int ovs_ipv4_tun_from_nlattr(const struct nlattr *attr,
[OVS_TUNNEL_KEY_ATTR_CSUM] = 0,
};
- if (type > OVS_TUNNEL_KEY_ATTR_MAX ||
- ovs_tunnel_key_lens[type] != nla_len(a))
+ if (type > OVS_TUNNEL_KEY_ATTR_MAX) {
+ OVS_NLERR("Unknown IPv4 tunnel attribute (type=%d, max=%d).\n",
+ type, OVS_TUNNEL_KEY_ATTR_MAX);
return -EINVAL;
+ }
+
+ if (ovs_tunnel_key_lens[type] != nla_len(a)) {
+ OVS_NLERR("IPv4 tunnel attribute type has unexpected "
+ " length (type=%d, length=%d, expected=%d).\n",
+ type, nla_len(a), ovs_tunnel_key_lens[type]);
+ return -EINVAL;
+ }
switch (type) {
case OVS_TUNNEL_KEY_ATTR_ID:
- tun_key->tun_id = nla_get_be64(a);
- tun_key->tun_flags |= TUNNEL_KEY;
+ SW_FLOW_KEY_PUT(match, tun_key.tun_id,
+ nla_get_be64(a), is_mask);
+ tun_flags |= TUNNEL_KEY;
break;
case OVS_TUNNEL_KEY_ATTR_IPV4_SRC:
- tun_key->ipv4_src = nla_get_be32(a);
+ SW_FLOW_KEY_PUT(match, tun_key.ipv4_src,
+ nla_get_be32(a), is_mask);
break;
case OVS_TUNNEL_KEY_ATTR_IPV4_DST:
- tun_key->ipv4_dst = nla_get_be32(a);
+ SW_FLOW_KEY_PUT(match, tun_key.ipv4_dst,
+ nla_get_be32(a), is_mask);
break;
case OVS_TUNNEL_KEY_ATTR_TOS:
- tun_key->ipv4_tos = nla_get_u8(a);
+ SW_FLOW_KEY_PUT(match, tun_key.ipv4_tos,
+ nla_get_u8(a), is_mask);
break;
case OVS_TUNNEL_KEY_ATTR_TTL:
- tun_key->ipv4_ttl = nla_get_u8(a);
+ SW_FLOW_KEY_PUT(match, tun_key.ipv4_ttl,
+ nla_get_u8(a), is_mask);
ttl = true;
break;
case OVS_TUNNEL_KEY_ATTR_DONT_FRAGMENT:
- tun_key->tun_flags |= TUNNEL_DONT_FRAGMENT;
+ tun_flags |= TUNNEL_DONT_FRAGMENT;
break;
case OVS_TUNNEL_KEY_ATTR_CSUM:
- tun_key->tun_flags |= TUNNEL_CSUM;
+ tun_flags |= TUNNEL_CSUM;
break;
default:
return -EINVAL;
-
}
}
- if (rem > 0)
- return -EINVAL;
- if (!tun_key->ipv4_dst)
- return -EINVAL;
+ SW_FLOW_KEY_PUT(match, tun_key.tun_flags, tun_flags, is_mask);
- if (!ttl)
+ if (rem > 0) {
+ OVS_NLERR("IPv4 tunnel attribute has %d unknown bytes.\n", rem);
return -EINVAL;
+ }
+
+ if (!is_mask) {
+ if (!match->key->tun_key.ipv4_dst) {
+ OVS_NLERR("IPv4 tunnel destination address is zero.\n");
+ return -EINVAL;
+ }
+
+ if (!ttl) {
+ OVS_NLERR("IPv4 tunnel TTL not specified.\n");
+ return -EINVAL;
+ }
+ }
return 0;
}
int ovs_ipv4_tun_to_nlattr(struct sk_buff *skb,
- const struct ovs_key_ipv4_tunnel *tun_key)
+ const struct ovs_key_ipv4_tunnel *tun_key,
+ const struct ovs_key_ipv4_tunnel *output)
{
struct nlattr *nla;
@@ -1054,23 +1319,24 @@ int ovs_ipv4_tun_to_nlattr(struct sk_buff *skb,
if (!nla)
return -EMSGSIZE;
- if (tun_key->tun_flags & TUNNEL_KEY &&
- nla_put_be64(skb, OVS_TUNNEL_KEY_ATTR_ID, tun_key->tun_id))
+ if (output->tun_flags & TUNNEL_KEY &&
+ nla_put_be64(skb, OVS_TUNNEL_KEY_ATTR_ID, output->tun_id))
return -EMSGSIZE;
- if (tun_key->ipv4_src &&
- nla_put_be32(skb, OVS_TUNNEL_KEY_ATTR_IPV4_SRC, tun_key->ipv4_src))
+ if (output->ipv4_src &&
+ nla_put_be32(skb, OVS_TUNNEL_KEY_ATTR_IPV4_SRC, output->ipv4_src))
return -EMSGSIZE;
- if (nla_put_be32(skb, OVS_TUNNEL_KEY_ATTR_IPV4_DST, tun_key->ipv4_dst))
+ if (output->ipv4_dst &&
+ nla_put_be32(skb, OVS_TUNNEL_KEY_ATTR_IPV4_DST, output->ipv4_dst))
return -EMSGSIZE;
- if (tun_key->ipv4_tos &&
- nla_put_u8(skb, OVS_TUNNEL_KEY_ATTR_TOS, tun_key->ipv4_tos))
+ if (output->ipv4_tos &&
+ nla_put_u8(skb, OVS_TUNNEL_KEY_ATTR_TOS, output->ipv4_tos))
return -EMSGSIZE;
- if (nla_put_u8(skb, OVS_TUNNEL_KEY_ATTR_TTL, tun_key->ipv4_ttl))
+ if (nla_put_u8(skb, OVS_TUNNEL_KEY_ATTR_TTL, output->ipv4_ttl))
return -EMSGSIZE;
- if ((tun_key->tun_flags & TUNNEL_DONT_FRAGMENT) &&
+ if ((output->tun_flags & TUNNEL_DONT_FRAGMENT) &&
nla_put_flag(skb, OVS_TUNNEL_KEY_ATTR_DONT_FRAGMENT))
return -EMSGSIZE;
- if ((tun_key->tun_flags & TUNNEL_CSUM) &&
+ if ((output->tun_flags & TUNNEL_CSUM) &&
nla_put_flag(skb, OVS_TUNNEL_KEY_ATTR_CSUM))
return -EMSGSIZE;
@@ -1078,176 +1344,390 @@ int ovs_ipv4_tun_to_nlattr(struct sk_buff *skb,
return 0;
}
-/**
- * ovs_flow_from_nlattrs - parses Netlink attributes into a flow key.
- * @swkey: receives the extracted flow key.
- * @key_lenp: number of bytes used in @swkey.
- * @attr: Netlink attribute holding nested %OVS_KEY_ATTR_* Netlink attribute
- * sequence.
- */
-int ovs_flow_from_nlattrs(struct sw_flow_key *swkey, int *key_lenp,
- const struct nlattr *attr)
+static int metadata_from_nlattrs(struct sw_flow_match *match, u64 *attrs,
+ const struct nlattr **a, bool is_mask)
{
- const struct nlattr *a[OVS_KEY_ATTR_MAX + 1];
- const struct ovs_key_ethernet *eth_key;
- int key_len;
- u32 attrs;
- int err;
+ if (*attrs & (1 << OVS_KEY_ATTR_PRIORITY)) {
+ SW_FLOW_KEY_PUT(match, phy.priority,
+ nla_get_u32(a[OVS_KEY_ATTR_PRIORITY]), is_mask);
+ *attrs &= ~(1 << OVS_KEY_ATTR_PRIORITY);
+ }
- memset(swkey, 0, sizeof(struct sw_flow_key));
- key_len = SW_FLOW_KEY_OFFSET(eth);
+ if (*attrs & (1 << OVS_KEY_ATTR_IN_PORT)) {
+ u32 in_port = nla_get_u32(a[OVS_KEY_ATTR_IN_PORT]);
- err = parse_flow_nlattrs(attr, a, &attrs);
- if (err)
- return err;
+ if (is_mask)
+ in_port = 0xffffffff; /* Always exact match in_port. */
+ else if (in_port >= DP_MAX_PORTS)
+ return -EINVAL;
- /* Metadata attributes. */
- if (attrs & (1 << OVS_KEY_ATTR_PRIORITY)) {
- swkey->phy.priority = nla_get_u32(a[OVS_KEY_ATTR_PRIORITY]);
- attrs &= ~(1 << OVS_KEY_ATTR_PRIORITY);
+ SW_FLOW_KEY_PUT(match, phy.in_port, in_port, is_mask);
+ *attrs &= ~(1 << OVS_KEY_ATTR_IN_PORT);
+ } else if (!is_mask) {
+ SW_FLOW_KEY_PUT(match, phy.in_port, DP_MAX_PORTS, is_mask);
}
- if (attrs & (1 << OVS_KEY_ATTR_IN_PORT)) {
- u32 in_port = nla_get_u32(a[OVS_KEY_ATTR_IN_PORT]);
- if (in_port >= DP_MAX_PORTS)
- return -EINVAL;
- swkey->phy.in_port = in_port;
- attrs &= ~(1 << OVS_KEY_ATTR_IN_PORT);
- } else {
- swkey->phy.in_port = DP_MAX_PORTS;
+
+ if (*attrs & (1 << OVS_KEY_ATTR_SKB_MARK)) {
+ uint32_t mark = nla_get_u32(a[OVS_KEY_ATTR_SKB_MARK]);
+
+ SW_FLOW_KEY_PUT(match, phy.skb_mark, mark, is_mask);
+ *attrs &= ~(1 << OVS_KEY_ATTR_SKB_MARK);
}
- if (attrs & (1 << OVS_KEY_ATTR_SKB_MARK)) {
- swkey->phy.skb_mark = nla_get_u32(a[OVS_KEY_ATTR_SKB_MARK]);
- attrs &= ~(1 << OVS_KEY_ATTR_SKB_MARK);
+ if (*attrs & (1 << OVS_KEY_ATTR_TUNNEL)) {
+ if (ovs_ipv4_tun_from_nlattr(a[OVS_KEY_ATTR_TUNNEL], match,
+ is_mask))
+ return -EINVAL;
+ *attrs &= ~(1 << OVS_KEY_ATTR_TUNNEL);
}
+ return 0;
+}
- if (attrs & (1 << OVS_KEY_ATTR_TUNNEL)) {
- err = ovs_ipv4_tun_from_nlattr(a[OVS_KEY_ATTR_TUNNEL], &swkey->tun_key);
- if (err)
- return err;
+static int ovs_key_from_nlattrs(struct sw_flow_match *match, u64 attrs,
+ const struct nlattr **a, bool is_mask)
+{
+ int err;
+ u64 orig_attrs = attrs;
- attrs &= ~(1 << OVS_KEY_ATTR_TUNNEL);
- }
+ err = metadata_from_nlattrs(match, &attrs, a, is_mask);
+ if (err)
+ return err;
- /* Data attributes. */
- if (!(attrs & (1 << OVS_KEY_ATTR_ETHERNET)))
- return -EINVAL;
- attrs &= ~(1 << OVS_KEY_ATTR_ETHERNET);
+ if (attrs & (1 << OVS_KEY_ATTR_ETHERNET)) {
+ const struct ovs_key_ethernet *eth_key;
- eth_key = nla_data(a[OVS_KEY_ATTR_ETHERNET]);
- memcpy(swkey->eth.src, eth_key->eth_src, ETH_ALEN);
- memcpy(swkey->eth.dst, eth_key->eth_dst, ETH_ALEN);
+ eth_key = nla_data(a[OVS_KEY_ATTR_ETHERNET]);
+ SW_FLOW_KEY_MEMCPY(match, eth.src,
+ eth_key->eth_src, ETH_ALEN, is_mask);
+ SW_FLOW_KEY_MEMCPY(match, eth.dst,
+ eth_key->eth_dst, ETH_ALEN, is_mask);
+ attrs &= ~(1 << OVS_KEY_ATTR_ETHERNET);
+ }
- if (attrs & (1u << OVS_KEY_ATTR_ETHERTYPE) &&
- nla_get_be16(a[OVS_KEY_ATTR_ETHERTYPE]) == htons(ETH_P_8021Q)) {
- const struct nlattr *encap;
+ if (attrs & (1 << OVS_KEY_ATTR_VLAN)) {
__be16 tci;
- if (attrs != ((1 << OVS_KEY_ATTR_VLAN) |
- (1 << OVS_KEY_ATTR_ETHERTYPE) |
- (1 << OVS_KEY_ATTR_ENCAP)))
- return -EINVAL;
-
- encap = a[OVS_KEY_ATTR_ENCAP];
tci = nla_get_be16(a[OVS_KEY_ATTR_VLAN]);
- if (tci & htons(VLAN_TAG_PRESENT)) {
- swkey->eth.tci = tci;
-
- err = parse_flow_nlattrs(encap, a, &attrs);
- if (err)
- return err;
- } else if (!tci) {
- /* Corner case for truncated 802.1Q header. */
- if (nla_len(encap))
- return -EINVAL;
+ if (!(tci & htons(VLAN_TAG_PRESENT))) {
+ if (is_mask)
+ OVS_NLERR("VLAN TCI mask does not have exact match for VLAN_TAG_PRESENT bit.\n");
+ else
+ OVS_NLERR("VLAN TCI does not have VLAN_TAG_PRESENT bit set.\n");
- swkey->eth.type = htons(ETH_P_8021Q);
- *key_lenp = key_len;
- return 0;
- } else {
return -EINVAL;
}
- }
+
+ SW_FLOW_KEY_PUT(match, eth.tci, tci, is_mask);
+ attrs &= ~(1 << OVS_KEY_ATTR_VLAN);
+ } else if (!is_mask)
+ SW_FLOW_KEY_PUT(match, eth.tci, htons(0xffff), true);
if (attrs & (1 << OVS_KEY_ATTR_ETHERTYPE)) {
- swkey->eth.type = nla_get_be16(a[OVS_KEY_ATTR_ETHERTYPE]);
- if (ntohs(swkey->eth.type) < ETH_P_802_3_MIN)
+ __be16 eth_type;
+
+ eth_type = nla_get_be16(a[OVS_KEY_ATTR_ETHERTYPE]);
+ if (is_mask) {
+ /* Always exact match EtherType. */
+ eth_type = htons(0xffff);
+ } else if (ntohs(eth_type) < ETH_P_802_3_MIN) {
+ OVS_NLERR("EtherType is less than minimum (type=%x, min=%x).\n",
+ ntohs(eth_type), ETH_P_802_3_MIN);
return -EINVAL;
+ }
+
+ SW_FLOW_KEY_PUT(match, eth.type, eth_type, is_mask);
attrs &= ~(1 << OVS_KEY_ATTR_ETHERTYPE);
- } else {
- swkey->eth.type = htons(ETH_P_802_2);
+ } else if (!is_mask) {
+ SW_FLOW_KEY_PUT(match, eth.type, htons(ETH_P_802_2), is_mask);
}
- if (swkey->eth.type == htons(ETH_P_IP)) {
+ if (attrs & (1 << OVS_KEY_ATTR_IPV4)) {
const struct ovs_key_ipv4 *ipv4_key;
- if (!(attrs & (1 << OVS_KEY_ATTR_IPV4)))
- return -EINVAL;
- attrs &= ~(1 << OVS_KEY_ATTR_IPV4);
-
- key_len = SW_FLOW_KEY_OFFSET(ipv4.addr);
ipv4_key = nla_data(a[OVS_KEY_ATTR_IPV4]);
- if (ipv4_key->ipv4_frag > OVS_FRAG_TYPE_MAX)
+ if (!is_mask && ipv4_key->ipv4_frag > OVS_FRAG_TYPE_MAX) {
+ OVS_NLERR("Unknown IPv4 fragment type (value=%d, max=%d).\n",
+ ipv4_key->ipv4_frag, OVS_FRAG_TYPE_MAX);
return -EINVAL;
- swkey->ip.proto = ipv4_key->ipv4_proto;
- swkey->ip.tos = ipv4_key->ipv4_tos;
- swkey->ip.ttl = ipv4_key->ipv4_ttl;
- swkey->ip.frag = ipv4_key->ipv4_frag;
- swkey->ipv4.addr.src = ipv4_key->ipv4_src;
- swkey->ipv4.addr.dst = ipv4_key->ipv4_dst;
-
- if (swkey->ip.frag != OVS_FRAG_TYPE_LATER) {
- err = ipv4_flow_from_nlattrs(swkey, &key_len, a, &attrs);
- if (err)
- return err;
}
- } else if (swkey->eth.type == htons(ETH_P_IPV6)) {
- const struct ovs_key_ipv6 *ipv6_key;
+ SW_FLOW_KEY_PUT(match, ip.proto,
+ ipv4_key->ipv4_proto, is_mask);
+ SW_FLOW_KEY_PUT(match, ip.tos,
+ ipv4_key->ipv4_tos, is_mask);
+ SW_FLOW_KEY_PUT(match, ip.ttl,
+ ipv4_key->ipv4_ttl, is_mask);
+ SW_FLOW_KEY_PUT(match, ip.frag,
+ ipv4_key->ipv4_frag, is_mask);
+ SW_FLOW_KEY_PUT(match, ipv4.addr.src,
+ ipv4_key->ipv4_src, is_mask);
+ SW_FLOW_KEY_PUT(match, ipv4.addr.dst,
+ ipv4_key->ipv4_dst, is_mask);
+ attrs &= ~(1 << OVS_KEY_ATTR_IPV4);
+ }
- if (!(attrs & (1 << OVS_KEY_ATTR_IPV6)))
- return -EINVAL;
- attrs &= ~(1 << OVS_KEY_ATTR_IPV6);
+ if (attrs & (1 << OVS_KEY_ATTR_IPV6)) {
+ const struct ovs_key_ipv6 *ipv6_key;
- key_len = SW_FLOW_KEY_OFFSET(ipv6.label);
ipv6_key = nla_data(a[OVS_KEY_ATTR_IPV6]);
- if (ipv6_key->ipv6_frag > OVS_FRAG_TYPE_MAX)
+ if (!is_mask && ipv6_key->ipv6_frag > OVS_FRAG_TYPE_MAX) {
+ OVS_NLERR("Unknown IPv6 fragment type (value=%d, max=%d).\n",
+ ipv6_key->ipv6_frag, OVS_FRAG_TYPE_MAX);
return -EINVAL;
- swkey->ipv6.label = ipv6_key->ipv6_label;
- swkey->ip.proto = ipv6_key->ipv6_proto;
- swkey->ip.tos = ipv6_key->ipv6_tclass;
- swkey->ip.ttl = ipv6_key->ipv6_hlimit;
- swkey->ip.frag = ipv6_key->ipv6_frag;
- memcpy(&swkey->ipv6.addr.src, ipv6_key->ipv6_src,
- sizeof(swkey->ipv6.addr.src));
- memcpy(&swkey->ipv6.addr.dst, ipv6_key->ipv6_dst,
- sizeof(swkey->ipv6.addr.dst));
-
- if (swkey->ip.frag != OVS_FRAG_TYPE_LATER) {
- err = ipv6_flow_from_nlattrs(swkey, &key_len, a, &attrs);
- if (err)
- return err;
}
- } else if (swkey->eth.type == htons(ETH_P_ARP) ||
- swkey->eth.type == htons(ETH_P_RARP)) {
+ SW_FLOW_KEY_PUT(match, ipv6.label,
+ ipv6_key->ipv6_label, is_mask);
+ SW_FLOW_KEY_PUT(match, ip.proto,
+ ipv6_key->ipv6_proto, is_mask);
+ SW_FLOW_KEY_PUT(match, ip.tos,
+ ipv6_key->ipv6_tclass, is_mask);
+ SW_FLOW_KEY_PUT(match, ip.ttl,
+ ipv6_key->ipv6_hlimit, is_mask);
+ SW_FLOW_KEY_PUT(match, ip.frag,
+ ipv6_key->ipv6_frag, is_mask);
+ SW_FLOW_KEY_MEMCPY(match, ipv6.addr.src,
+ ipv6_key->ipv6_src,
+ sizeof(match->key->ipv6.addr.src),
+ is_mask);
+ SW_FLOW_KEY_MEMCPY(match, ipv6.addr.dst,
+ ipv6_key->ipv6_dst,
+ sizeof(match->key->ipv6.addr.dst),
+ is_mask);
+
+ attrs &= ~(1 << OVS_KEY_ATTR_IPV6);
+ }
+
+ if (attrs & (1 << OVS_KEY_ATTR_ARP)) {
const struct ovs_key_arp *arp_key;
- if (!(attrs & (1 << OVS_KEY_ATTR_ARP)))
+ arp_key = nla_data(a[OVS_KEY_ATTR_ARP]);
+ if (!is_mask && (arp_key->arp_op & htons(0xff00))) {
+ OVS_NLERR("Unknown ARP opcode (opcode=%d).\n",
+ arp_key->arp_op);
return -EINVAL;
+ }
+
+ SW_FLOW_KEY_PUT(match, ipv4.addr.src,
+ arp_key->arp_sip, is_mask);
+ SW_FLOW_KEY_PUT(match, ipv4.addr.dst,
+ arp_key->arp_tip, is_mask);
+ SW_FLOW_KEY_PUT(match, ip.proto,
+ ntohs(arp_key->arp_op), is_mask);
+ SW_FLOW_KEY_MEMCPY(match, ipv4.arp.sha,
+ arp_key->arp_sha, ETH_ALEN, is_mask);
+ SW_FLOW_KEY_MEMCPY(match, ipv4.arp.tha,
+ arp_key->arp_tha, ETH_ALEN, is_mask);
+
attrs &= ~(1 << OVS_KEY_ATTR_ARP);
+ }
- key_len = SW_FLOW_KEY_OFFSET(ipv4.arp);
- arp_key = nla_data(a[OVS_KEY_ATTR_ARP]);
- swkey->ipv4.addr.src = arp_key->arp_sip;
- swkey->ipv4.addr.dst = arp_key->arp_tip;
- if (arp_key->arp_op & htons(0xff00))
+ if (attrs & (1 << OVS_KEY_ATTR_TCP)) {
+ const struct ovs_key_tcp *tcp_key;
+
+ tcp_key = nla_data(a[OVS_KEY_ATTR_TCP]);
+ if (orig_attrs & (1 << OVS_KEY_ATTR_IPV4)) {
+ SW_FLOW_KEY_PUT(match, ipv4.tp.src,
+ tcp_key->tcp_src, is_mask);
+ SW_FLOW_KEY_PUT(match, ipv4.tp.dst,
+ tcp_key->tcp_dst, is_mask);
+ } else {
+ SW_FLOW_KEY_PUT(match, ipv6.tp.src,
+ tcp_key->tcp_src, is_mask);
+ SW_FLOW_KEY_PUT(match, ipv6.tp.dst,
+ tcp_key->tcp_dst, is_mask);
+ }
+ attrs &= ~(1 << OVS_KEY_ATTR_TCP);
+ }
+
+ if (attrs & (1 << OVS_KEY_ATTR_UDP)) {
+ const struct ovs_key_udp *udp_key;
+
+ udp_key = nla_data(a[OVS_KEY_ATTR_UDP]);
+ if (orig_attrs & (1 << OVS_KEY_ATTR_IPV4)) {
+ SW_FLOW_KEY_PUT(match, ipv4.tp.src,
+ udp_key->udp_src, is_mask);
+ SW_FLOW_KEY_PUT(match, ipv4.tp.dst,
+ udp_key->udp_dst, is_mask);
+ } else {
+ SW_FLOW_KEY_PUT(match, ipv6.tp.src,
+ udp_key->udp_src, is_mask);
+ SW_FLOW_KEY_PUT(match, ipv6.tp.dst,
+ udp_key->udp_dst, is_mask);
+ }
+ attrs &= ~(1 << OVS_KEY_ATTR_UDP);
+ }
+
+ if (attrs & (1 << OVS_KEY_ATTR_SCTP)) {
+ const struct ovs_key_sctp *sctp_key;
+
+ sctp_key = nla_data(a[OVS_KEY_ATTR_SCTP]);
+ if (orig_attrs & (1 << OVS_KEY_ATTR_IPV4)) {
+ SW_FLOW_KEY_PUT(match, ipv4.tp.src,
+ sctp_key->sctp_src, is_mask);
+ SW_FLOW_KEY_PUT(match, ipv4.tp.dst,
+ sctp_key->sctp_dst, is_mask);
+ } else {
+ SW_FLOW_KEY_PUT(match, ipv6.tp.src,
+ sctp_key->sctp_src, is_mask);
+ SW_FLOW_KEY_PUT(match, ipv6.tp.dst,
+ sctp_key->sctp_dst, is_mask);
+ }
+ attrs &= ~(1 << OVS_KEY_ATTR_SCTP);
+ }
+
+ if (attrs & (1 << OVS_KEY_ATTR_ICMP)) {
+ const struct ovs_key_icmp *icmp_key;
+
+ icmp_key = nla_data(a[OVS_KEY_ATTR_ICMP]);
+ SW_FLOW_KEY_PUT(match, ipv4.tp.src,
+ htons(icmp_key->icmp_type), is_mask);
+ SW_FLOW_KEY_PUT(match, ipv4.tp.dst,
+ htons(icmp_key->icmp_code), is_mask);
+ attrs &= ~(1 << OVS_KEY_ATTR_ICMP);
+ }
+
+ if (attrs & (1 << OVS_KEY_ATTR_ICMPV6)) {
+ const struct ovs_key_icmpv6 *icmpv6_key;
+
+ icmpv6_key = nla_data(a[OVS_KEY_ATTR_ICMPV6]);
+ SW_FLOW_KEY_PUT(match, ipv6.tp.src,
+ htons(icmpv6_key->icmpv6_type), is_mask);
+ SW_FLOW_KEY_PUT(match, ipv6.tp.dst,
+ htons(icmpv6_key->icmpv6_code), is_mask);
+ attrs &= ~(1 << OVS_KEY_ATTR_ICMPV6);
+ }
+
+ if (attrs & (1 << OVS_KEY_ATTR_ND)) {
+ const struct ovs_key_nd *nd_key;
+
+ nd_key = nla_data(a[OVS_KEY_ATTR_ND]);
+ SW_FLOW_KEY_MEMCPY(match, ipv6.nd.target,
+ nd_key->nd_target,
+ sizeof(match->key->ipv6.nd.target),
+ is_mask);
+ SW_FLOW_KEY_MEMCPY(match, ipv6.nd.sll,
+ nd_key->nd_sll, ETH_ALEN, is_mask);
+ SW_FLOW_KEY_MEMCPY(match, ipv6.nd.tll,
+ nd_key->nd_tll, ETH_ALEN, is_mask);
+ attrs &= ~(1 << OVS_KEY_ATTR_ND);
+ }
+
+ if (attrs != 0)
+ return -EINVAL;
+
+ return 0;
+}
+
+/**
+ * ovs_match_from_nlattrs - parses Netlink attributes into a flow key and
+ * mask. In case the 'mask' is NULL, the flow is treated as exact match
+ * flow. Otherwise, it is treated as a wildcarded flow, except the mask
+ * does not include any don't care bit.
+ * @match: receives the extracted flow match information.
+ * @key: Netlink attribute holding nested %OVS_KEY_ATTR_* Netlink attribute
+ * sequence. The fields should of the packet that triggered the creation
+ * of this flow.
+ * @mask: Optional. Netlink attribute holding nested %OVS_KEY_ATTR_* Netlink
+ * attribute specifies the mask field of the wildcarded flow.
+ */
+int ovs_match_from_nlattrs(struct sw_flow_match *match,
+ const struct nlattr *key,
+ const struct nlattr *mask)
+{
+ const struct nlattr *a[OVS_KEY_ATTR_MAX + 1];
+ const struct nlattr *encap;
+ u64 key_attrs = 0;
+ u64 mask_attrs = 0;
+ bool encap_valid = false;
+ int err;
+
+ err = parse_flow_nlattrs(key, a, &key_attrs);
+ if (err)
+ return err;
+
+ if ((key_attrs & (1 << OVS_KEY_ATTR_ETHERNET)) &&
+ (key_attrs & (1 << OVS_KEY_ATTR_ETHERTYPE)) &&
+ (nla_get_be16(a[OVS_KEY_ATTR_ETHERTYPE]) == htons(ETH_P_8021Q))) {
+ __be16 tci;
+
+ if (!((key_attrs & (1 << OVS_KEY_ATTR_VLAN)) &&
+ (key_attrs & (1 << OVS_KEY_ATTR_ENCAP)))) {
+ OVS_NLERR("Invalid Vlan frame.\n");
return -EINVAL;
- swkey->ip.proto = ntohs(arp_key->arp_op);
- memcpy(swkey->ipv4.arp.sha, arp_key->arp_sha, ETH_ALEN);
- memcpy(swkey->ipv4.arp.tha, arp_key->arp_tha, ETH_ALEN);
+ }
+
+ key_attrs &= ~(1 << OVS_KEY_ATTR_ETHERTYPE);
+ tci = nla_get_be16(a[OVS_KEY_ATTR_VLAN]);
+ encap = a[OVS_KEY_ATTR_ENCAP];
+ key_attrs &= ~(1 << OVS_KEY_ATTR_ENCAP);
+ encap_valid = true;
+
+ if (tci & htons(VLAN_TAG_PRESENT)) {
+ err = parse_flow_nlattrs(encap, a, &key_attrs);
+ if (err)
+ return err;
+ } else if (!tci) {
+ /* Corner case for truncated 802.1Q header. */
+ if (nla_len(encap)) {
+ OVS_NLERR("Truncated 802.1Q header has non-zero encap attribute.\n");
+ return -EINVAL;
+ }
+ } else {
+ OVS_NLERR("Encap attribute is set for a non-VLAN frame.\n");
+ return -EINVAL;
+ }
}
- if (attrs)
+ err = ovs_key_from_nlattrs(match, key_attrs, a, false);
+ if (err)
+ return err;
+
+ if (mask) {
+ err = parse_flow_mask_nlattrs(mask, a, &mask_attrs);
+ if (err)
+ return err;
+
+ if (mask_attrs & 1ULL << OVS_KEY_ATTR_ENCAP) {
+ __be16 eth_type = 0;
+ __be16 tci = 0;
+
+ if (!encap_valid) {
+ OVS_NLERR("Encap mask attribute is set for non-VLAN frame.\n");
+ return -EINVAL;
+ }
+
+ mask_attrs &= ~(1 << OVS_KEY_ATTR_ENCAP);
+ if (a[OVS_KEY_ATTR_ETHERTYPE])
+ eth_type = nla_get_be16(a[OVS_KEY_ATTR_ETHERTYPE]);
+
+ if (eth_type == htons(0xffff)) {
+ mask_attrs &= ~(1 << OVS_KEY_ATTR_ETHERTYPE);
+ encap = a[OVS_KEY_ATTR_ENCAP];
+ err = parse_flow_mask_nlattrs(encap, a, &mask_attrs);
+ } else {
+ OVS_NLERR("VLAN frames must have an exact match on the TPID (mask=%x).\n",
+ ntohs(eth_type));
+ return -EINVAL;
+ }
+
+ if (a[OVS_KEY_ATTR_VLAN])
+ tci = nla_get_be16(a[OVS_KEY_ATTR_VLAN]);
+
+ if (!(tci & htons(VLAN_TAG_PRESENT))) {
+ OVS_NLERR("VLAN tag present bit must have an exact match (tci_mask=%x).\n", ntohs(tci));
+ return -EINVAL;
+ }
+ }
+
+ err = ovs_key_from_nlattrs(match, mask_attrs, a, true);
+ if (err)
+ return err;
+ } else {
+ /* Populate exact match flow's key mask. */
+ if (match->mask)
+ ovs_sw_flow_mask_set(match->mask, &match->range, 0xff);
+ }
+
+ if (!ovs_match_validate(match, key_attrs, mask_attrs))
return -EINVAL;
- *key_lenp = key_len;
return 0;
}
@@ -1255,7 +1735,6 @@ int ovs_flow_from_nlattrs(struct sw_flow_key *swkey, int *key_lenp,
/**
* ovs_flow_metadata_from_nlattrs - parses Netlink attributes into a flow key.
* @flow: Receives extracted in_port, priority, tun_key and skb_mark.
- * @key_len: Length of key in @flow. Used for calculating flow hash.
* @attr: Netlink attribute holding nested %OVS_KEY_ATTR_* Netlink attribute
* sequence.
*
@@ -1264,102 +1743,100 @@ int ovs_flow_from_nlattrs(struct sw_flow_key *swkey, int *key_lenp,
* get the metadata, that is, the parts of the flow key that cannot be
* extracted from the packet itself.
*/
-int ovs_flow_metadata_from_nlattrs(struct sw_flow *flow, int key_len,
- const struct nlattr *attr)
+
+int ovs_flow_metadata_from_nlattrs(struct sw_flow *flow,
+ const struct nlattr *attr)
{
struct ovs_key_ipv4_tunnel *tun_key = &flow->key.tun_key;
- const struct nlattr *nla;
- int rem;
+ const struct nlattr *a[OVS_KEY_ATTR_MAX + 1];
+ u64 attrs = 0;
+ int err;
+ struct sw_flow_match match;
flow->key.phy.in_port = DP_MAX_PORTS;
flow->key.phy.priority = 0;
flow->key.phy.skb_mark = 0;
memset(tun_key, 0, sizeof(flow->key.tun_key));
- nla_for_each_nested(nla, attr, rem) {
- int type = nla_type(nla);
-
- if (type <= OVS_KEY_ATTR_MAX && ovs_key_lens[type] > 0) {
- int err;
-
- if (nla_len(nla) != ovs_key_lens[type])
- return -EINVAL;
-
- switch (type) {
- case OVS_KEY_ATTR_PRIORITY:
- flow->key.phy.priority = nla_get_u32(nla);
- break;
-
- case OVS_KEY_ATTR_TUNNEL:
- err = ovs_ipv4_tun_from_nlattr(nla, tun_key);
- if (err)
- return err;
- break;
-
- case OVS_KEY_ATTR_IN_PORT:
- if (nla_get_u32(nla) >= DP_MAX_PORTS)
- return -EINVAL;
- flow->key.phy.in_port = nla_get_u32(nla);
- break;
-
- case OVS_KEY_ATTR_SKB_MARK:
- flow->key.phy.skb_mark = nla_get_u32(nla);
- break;
- }
- }
- }
- if (rem)
+ err = parse_flow_nlattrs(attr, a, &attrs);
+ if (err)
return -EINVAL;
- flow->hash = ovs_flow_hash(&flow->key,
- flow_key_start(&flow->key), key_len);
+ memset(&match, 0, sizeof(match));
+ match.key = &flow->key;
+
+ err = metadata_from_nlattrs(&match, &attrs, a, false);
+ if (err)
+ return err;
return 0;
}
-int ovs_flow_to_nlattrs(const struct sw_flow_key *swkey, struct sk_buff *skb)
+int ovs_flow_to_nlattrs(const struct sw_flow_key *swkey,
+ const struct sw_flow_key *output, struct sk_buff *skb)
{
struct ovs_key_ethernet *eth_key;
struct nlattr *nla, *encap;
+ bool is_mask = (swkey != output);
- if (swkey->phy.priority &&
- nla_put_u32(skb, OVS_KEY_ATTR_PRIORITY, swkey->phy.priority))
+ if (nla_put_u32(skb, OVS_KEY_ATTR_PRIORITY, output->phy.priority))
goto nla_put_failure;
- if (swkey->tun_key.ipv4_dst &&
- ovs_ipv4_tun_to_nlattr(skb, &swkey->tun_key))
+ if ((swkey->tun_key.ipv4_dst || is_mask) &&
+ ovs_ipv4_tun_to_nlattr(skb, &swkey->tun_key, &output->tun_key))
goto nla_put_failure;
- if (swkey->phy.in_port != DP_MAX_PORTS &&
- nla_put_u32(skb, OVS_KEY_ATTR_IN_PORT, swkey->phy.in_port))
- goto nla_put_failure;
+ if (swkey->phy.in_port == DP_MAX_PORTS) {
+ if (is_mask && (output->phy.in_port == 0xffff))
+ if (nla_put_u32(skb, OVS_KEY_ATTR_IN_PORT, 0xffffffff))
+ goto nla_put_failure;
+ } else {
+ u16 upper_u16;
+ upper_u16 = !is_mask ? 0 : 0xffff;
- if (swkey->phy.skb_mark &&
- nla_put_u32(skb, OVS_KEY_ATTR_SKB_MARK, swkey->phy.skb_mark))
+ if (nla_put_u32(skb, OVS_KEY_ATTR_IN_PORT,
+ (upper_u16 << 16) | output->phy.in_port))
+ goto nla_put_failure;
+ }
+
+ if (nla_put_u32(skb, OVS_KEY_ATTR_SKB_MARK, output->phy.skb_mark))
goto nla_put_failure;
nla = nla_reserve(skb, OVS_KEY_ATTR_ETHERNET, sizeof(*eth_key));
if (!nla)
goto nla_put_failure;
+
eth_key = nla_data(nla);
- memcpy(eth_key->eth_src, swkey->eth.src, ETH_ALEN);
- memcpy(eth_key->eth_dst, swkey->eth.dst, ETH_ALEN);
+ memcpy(eth_key->eth_src, output->eth.src, ETH_ALEN);
+ memcpy(eth_key->eth_dst, output->eth.dst, ETH_ALEN);
if (swkey->eth.tci || swkey->eth.type == htons(ETH_P_8021Q)) {
- if (nla_put_be16(skb, OVS_KEY_ATTR_ETHERTYPE, htons(ETH_P_8021Q)) ||
- nla_put_be16(skb, OVS_KEY_ATTR_VLAN, swkey->eth.tci))
+ __be16 eth_type;
+ eth_type = !is_mask ? htons(ETH_P_8021Q) : htons(0xffff);
+ if (nla_put_be16(skb, OVS_KEY_ATTR_ETHERTYPE, eth_type) ||
+ nla_put_be16(skb, OVS_KEY_ATTR_VLAN, output->eth.tci))
goto nla_put_failure;
encap = nla_nest_start(skb, OVS_KEY_ATTR_ENCAP);
if (!swkey->eth.tci)
goto unencap;
- } else {
+ } else
encap = NULL;
- }
- if (swkey->eth.type == htons(ETH_P_802_2))
+ if (swkey->eth.type == htons(ETH_P_802_2)) {
+ /*
+ * Ethertype 802.2 is represented in the netlink with omitted
+ * OVS_KEY_ATTR_ETHERTYPE in the flow key attribute, and
+ * 0xffff in the mask attribute. Ethertype can also
+ * be wildcarded.
+ */
+ if (is_mask && output->eth.type)
+ if (nla_put_be16(skb, OVS_KEY_ATTR_ETHERTYPE,
+ output->eth.type))
+ goto nla_put_failure;
goto unencap;
+ }
- if (nla_put_be16(skb, OVS_KEY_ATTR_ETHERTYPE, swkey->eth.type))
+ if (nla_put_be16(skb, OVS_KEY_ATTR_ETHERTYPE, output->eth.type))
goto nla_put_failure;
if (swkey->eth.type == htons(ETH_P_IP)) {
@@ -1369,12 +1846,12 @@ int ovs_flow_to_nlattrs(const struct sw_flow_key *swkey, struct sk_buff *skb)
if (!nla)
goto nla_put_failure;
ipv4_key = nla_data(nla);
- ipv4_key->ipv4_src = swkey->ipv4.addr.src;
- ipv4_key->ipv4_dst = swkey->ipv4.addr.dst;
- ipv4_key->ipv4_proto = swkey->ip.proto;
- ipv4_key->ipv4_tos = swkey->ip.tos;
- ipv4_key->ipv4_ttl = swkey->ip.ttl;
- ipv4_key->ipv4_frag = swkey->ip.frag;
+ ipv4_key->ipv4_src = output->ipv4.addr.src;
+ ipv4_key->ipv4_dst = output->ipv4.addr.dst;
+ ipv4_key->ipv4_proto = output->ip.proto;
+ ipv4_key->ipv4_tos = output->ip.tos;
+ ipv4_key->ipv4_ttl = output->ip.ttl;
+ ipv4_key->ipv4_frag = output->ip.frag;
} else if (swkey->eth.type == htons(ETH_P_IPV6)) {
struct ovs_key_ipv6 *ipv6_key;
@@ -1382,15 +1859,15 @@ int ovs_flow_to_nlattrs(const struct sw_flow_key *swkey, struct sk_buff *skb)
if (!nla)
goto nla_put_failure;
ipv6_key = nla_data(nla);
- memcpy(ipv6_key->ipv6_src, &swkey->ipv6.addr.src,
+ memcpy(ipv6_key->ipv6_src, &output->ipv6.addr.src,
sizeof(ipv6_key->ipv6_src));
- memcpy(ipv6_key->ipv6_dst, &swkey->ipv6.addr.dst,
+ memcpy(ipv6_key->ipv6_dst, &output->ipv6.addr.dst,
sizeof(ipv6_key->ipv6_dst));
- ipv6_key->ipv6_label = swkey->ipv6.label;
- ipv6_key->ipv6_proto = swkey->ip.proto;
- ipv6_key->ipv6_tclass = swkey->ip.tos;
- ipv6_key->ipv6_hlimit = swkey->ip.ttl;
- ipv6_key->ipv6_frag = swkey->ip.frag;
+ ipv6_key->ipv6_label = output->ipv6.label;
+ ipv6_key->ipv6_proto = output->ip.proto;
+ ipv6_key->ipv6_tclass = output->ip.tos;
+ ipv6_key->ipv6_hlimit = output->ip.ttl;
+ ipv6_key->ipv6_frag = output->ip.frag;
} else if (swkey->eth.type == htons(ETH_P_ARP) ||
swkey->eth.type == htons(ETH_P_RARP)) {
struct ovs_key_arp *arp_key;
@@ -1400,11 +1877,11 @@ int ovs_flow_to_nlattrs(const struct sw_flow_key *swkey, struct sk_buff *skb)
goto nla_put_failure;
arp_key = nla_data(nla);
memset(arp_key, 0, sizeof(struct ovs_key_arp));
- arp_key->arp_sip = swkey->ipv4.addr.src;
- arp_key->arp_tip = swkey->ipv4.addr.dst;
- arp_key->arp_op = htons(swkey->ip.proto);
- memcpy(arp_key->arp_sha, swkey->ipv4.arp.sha, ETH_ALEN);
- memcpy(arp_key->arp_tha, swkey->ipv4.arp.tha, ETH_ALEN);
+ arp_key->arp_sip = output->ipv4.addr.src;
+ arp_key->arp_tip = output->ipv4.addr.dst;
+ arp_key->arp_op = htons(output->ip.proto);
+ memcpy(arp_key->arp_sha, output->ipv4.arp.sha, ETH_ALEN);
+ memcpy(arp_key->arp_tha, output->ipv4.arp.tha, ETH_ALEN);
}
if ((swkey->eth.type == htons(ETH_P_IP) ||
@@ -1419,11 +1896,11 @@ int ovs_flow_to_nlattrs(const struct sw_flow_key *swkey, struct sk_buff *skb)
goto nla_put_failure;
tcp_key = nla_data(nla);
if (swkey->eth.type == htons(ETH_P_IP)) {
- tcp_key->tcp_src = swkey->ipv4.tp.src;
- tcp_key->tcp_dst = swkey->ipv4.tp.dst;
+ tcp_key->tcp_src = output->ipv4.tp.src;
+ tcp_key->tcp_dst = output->ipv4.tp.dst;
} else if (swkey->eth.type == htons(ETH_P_IPV6)) {
- tcp_key->tcp_src = swkey->ipv6.tp.src;
- tcp_key->tcp_dst = swkey->ipv6.tp.dst;
+ tcp_key->tcp_src = output->ipv6.tp.src;
+ tcp_key->tcp_dst = output->ipv6.tp.dst;
}
} else if (swkey->ip.proto == IPPROTO_UDP) {
struct ovs_key_udp *udp_key;
@@ -1433,11 +1910,25 @@ int ovs_flow_to_nlattrs(const struct sw_flow_key *swkey, struct sk_buff *skb)
goto nla_put_failure;
udp_key = nla_data(nla);
if (swkey->eth.type == htons(ETH_P_IP)) {
- udp_key->udp_src = swkey->ipv4.tp.src;
- udp_key->udp_dst = swkey->ipv4.tp.dst;
+ udp_key->udp_src = output->ipv4.tp.src;
+ udp_key->udp_dst = output->ipv4.tp.dst;
+ } else if (swkey->eth.type == htons(ETH_P_IPV6)) {
+ udp_key->udp_src = output->ipv6.tp.src;
+ udp_key->udp_dst = output->ipv6.tp.dst;
+ }
+ } else if (swkey->ip.proto == IPPROTO_SCTP) {
+ struct ovs_key_sctp *sctp_key;
+
+ nla = nla_reserve(skb, OVS_KEY_ATTR_SCTP, sizeof(*sctp_key));
+ if (!nla)
+ goto nla_put_failure;
+ sctp_key = nla_data(nla);
+ if (swkey->eth.type == htons(ETH_P_IP)) {
+ sctp_key->sctp_src = swkey->ipv4.tp.src;
+ sctp_key->sctp_dst = swkey->ipv4.tp.dst;
} else if (swkey->eth.type == htons(ETH_P_IPV6)) {
- udp_key->udp_src = swkey->ipv6.tp.src;
- udp_key->udp_dst = swkey->ipv6.tp.dst;
+ sctp_key->sctp_src = swkey->ipv6.tp.src;
+ sctp_key->sctp_dst = swkey->ipv6.tp.dst;
}
} else if (swkey->eth.type == htons(ETH_P_IP) &&
swkey->ip.proto == IPPROTO_ICMP) {
@@ -1447,8 +1938,8 @@ int ovs_flow_to_nlattrs(const struct sw_flow_key *swkey, struct sk_buff *skb)
if (!nla)
goto nla_put_failure;
icmp_key = nla_data(nla);
- icmp_key->icmp_type = ntohs(swkey->ipv4.tp.src);
- icmp_key->icmp_code = ntohs(swkey->ipv4.tp.dst);
+ icmp_key->icmp_type = ntohs(output->ipv4.tp.src);
+ icmp_key->icmp_code = ntohs(output->ipv4.tp.dst);
} else if (swkey->eth.type == htons(ETH_P_IPV6) &&
swkey->ip.proto == IPPROTO_ICMPV6) {
struct ovs_key_icmpv6 *icmpv6_key;
@@ -1458,8 +1949,8 @@ int ovs_flow_to_nlattrs(const struct sw_flow_key *swkey, struct sk_buff *skb)
if (!nla)
goto nla_put_failure;
icmpv6_key = nla_data(nla);
- icmpv6_key->icmpv6_type = ntohs(swkey->ipv6.tp.src);
- icmpv6_key->icmpv6_code = ntohs(swkey->ipv6.tp.dst);
+ icmpv6_key->icmpv6_type = ntohs(output->ipv6.tp.src);
+ icmpv6_key->icmpv6_code = ntohs(output->ipv6.tp.dst);
if (icmpv6_key->icmpv6_type == NDISC_NEIGHBOUR_SOLICITATION ||
icmpv6_key->icmpv6_type == NDISC_NEIGHBOUR_ADVERTISEMENT) {
@@ -1469,10 +1960,10 @@ int ovs_flow_to_nlattrs(const struct sw_flow_key *swkey, struct sk_buff *skb)
if (!nla)
goto nla_put_failure;
nd_key = nla_data(nla);
- memcpy(nd_key->nd_target, &swkey->ipv6.nd.target,
+ memcpy(nd_key->nd_target, &output->ipv6.nd.target,
sizeof(nd_key->nd_target));
- memcpy(nd_key->nd_sll, swkey->ipv6.nd.sll, ETH_ALEN);
- memcpy(nd_key->nd_tll, swkey->ipv6.nd.tll, ETH_ALEN);
+ memcpy(nd_key->nd_sll, output->ipv6.nd.sll, ETH_ALEN);
+ memcpy(nd_key->nd_tll, output->ipv6.nd.tll, ETH_ALEN);
}
}
}
@@ -1491,6 +1982,9 @@ nla_put_failure:
* Returns zero if successful or a negative error code. */
int ovs_flow_init(void)
{
+ BUILD_BUG_ON(__alignof__(struct sw_flow_key) % __alignof__(long));
+ BUILD_BUG_ON(sizeof(struct sw_flow_key) % sizeof(long));
+
flow_cache = kmem_cache_create("sw_flow", sizeof(struct sw_flow), 0,
0, NULL);
if (flow_cache == NULL)
@@ -1504,3 +1998,84 @@ void ovs_flow_exit(void)
{
kmem_cache_destroy(flow_cache);
}
+
+struct sw_flow_mask *ovs_sw_flow_mask_alloc(void)
+{
+ struct sw_flow_mask *mask;
+
+ mask = kmalloc(sizeof(*mask), GFP_KERNEL);
+ if (mask)
+ mask->ref_count = 0;
+
+ return mask;
+}
+
+void ovs_sw_flow_mask_add_ref(struct sw_flow_mask *mask)
+{
+ mask->ref_count++;
+}
+
+void ovs_sw_flow_mask_del_ref(struct sw_flow_mask *mask, bool deferred)
+{
+ if (!mask)
+ return;
+
+ BUG_ON(!mask->ref_count);
+ mask->ref_count--;
+
+ if (!mask->ref_count) {
+ list_del_rcu(&mask->list);
+ if (deferred)
+ kfree_rcu(mask, rcu);
+ else
+ kfree(mask);
+ }
+}
+
+static bool ovs_sw_flow_mask_equal(const struct sw_flow_mask *a,
+ const struct sw_flow_mask *b)
+{
+ u8 *a_ = (u8 *)&a->key + a->range.start;
+ u8 *b_ = (u8 *)&b->key + b->range.start;
+
+ return (a->range.end == b->range.end)
+ && (a->range.start == b->range.start)
+ && (memcmp(a_, b_, range_n_bytes(&a->range)) == 0);
+}
+
+struct sw_flow_mask *ovs_sw_flow_mask_find(const struct flow_table *tbl,
+ const struct sw_flow_mask *mask)
+{
+ struct list_head *ml;
+
+ list_for_each(ml, tbl->mask_list) {
+ struct sw_flow_mask *m;
+ m = container_of(ml, struct sw_flow_mask, list);
+ if (ovs_sw_flow_mask_equal(mask, m))
+ return m;
+ }
+
+ return NULL;
+}
+
+/**
+ * add a new mask into the mask list.
+ * The caller needs to make sure that 'mask' is not the same
+ * as any masks that are already on the list.
+ */
+void ovs_sw_flow_mask_insert(struct flow_table *tbl, struct sw_flow_mask *mask)
+{
+ list_add_rcu(&mask->list, tbl->mask_list);
+}
+
+/**
+ * Set 'range' fields in the mask to the value of 'val'.
+ */
+static void ovs_sw_flow_mask_set(struct sw_flow_mask *mask,
+ struct sw_flow_key_range *range, u8 val)
+{
+ u8 *m = (u8 *)&mask->key + range->start;
+
+ mask->range = *range;
+ memset(m, val, range_n_bytes(range));
+}
diff --git a/net/openvswitch/flow.h b/net/openvswitch/flow.h
index 66ef7220293e..212fbf7510c4 100644
--- a/net/openvswitch/flow.h
+++ b/net/openvswitch/flow.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2007-2011 Nicira, Inc.
+ * Copyright (c) 2007-2013 Nicira, Inc.
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of version 2 of the GNU General Public
@@ -33,6 +33,8 @@
#include <net/inet_ecn.h>
struct sk_buff;
+struct sw_flow_mask;
+struct flow_table;
struct sw_flow_actions {
struct rcu_head rcu;
@@ -97,8 +99,8 @@ struct sw_flow_key {
} addr;
union {
struct {
- __be16 src; /* TCP/UDP source port. */
- __be16 dst; /* TCP/UDP destination port. */
+ __be16 src; /* TCP/UDP/SCTP source port. */
+ __be16 dst; /* TCP/UDP/SCTP destination port. */
} tp;
struct {
u8 sha[ETH_ALEN]; /* ARP source hardware address. */
@@ -113,8 +115,8 @@ struct sw_flow_key {
} addr;
__be32 label; /* IPv6 flow label. */
struct {
- __be16 src; /* TCP/UDP source port. */
- __be16 dst; /* TCP/UDP destination port. */
+ __be16 src; /* TCP/UDP/SCTP source port. */
+ __be16 dst; /* TCP/UDP/SCTP destination port. */
} tp;
struct {
struct in6_addr target; /* ND target address. */
@@ -123,7 +125,7 @@ struct sw_flow_key {
} nd;
} ipv6;
};
-};
+} __aligned(BITS_PER_LONG/8); /* Ensure that we can do comparisons as longs. */
struct sw_flow {
struct rcu_head rcu;
@@ -131,6 +133,8 @@ struct sw_flow {
u32 hash;
struct sw_flow_key key;
+ struct sw_flow_key unmasked_key;
+ struct sw_flow_mask *mask;
struct sw_flow_actions __rcu *sf_acts;
spinlock_t lock; /* Lock for values below. */
@@ -140,6 +144,20 @@ struct sw_flow {
u8 tcp_flags; /* Union of seen TCP flags. */
};
+struct sw_flow_key_range {
+ size_t start;
+ size_t end;
+};
+
+struct sw_flow_match {
+ struct sw_flow_key *key;
+ struct sw_flow_key_range range;
+ struct sw_flow_mask *mask;
+};
+
+void ovs_match_init(struct sw_flow_match *match,
+ struct sw_flow_key *key, struct sw_flow_mask *mask);
+
struct arp_eth_header {
__be16 ar_hrd; /* format of hardware address */
__be16 ar_pro; /* format of protocol address */
@@ -159,21 +177,21 @@ void ovs_flow_exit(void);
struct sw_flow *ovs_flow_alloc(void);
void ovs_flow_deferred_free(struct sw_flow *);
-void ovs_flow_free(struct sw_flow *flow);
+void ovs_flow_free(struct sw_flow *, bool deferred);
struct sw_flow_actions *ovs_flow_actions_alloc(int actions_len);
void ovs_flow_deferred_free_acts(struct sw_flow_actions *);
-int ovs_flow_extract(struct sk_buff *, u16 in_port, struct sw_flow_key *,
- int *key_lenp);
+int ovs_flow_extract(struct sk_buff *, u16 in_port, struct sw_flow_key *);
void ovs_flow_used(struct sw_flow *, struct sk_buff *);
u64 ovs_flow_used_time(unsigned long flow_jiffies);
-
-int ovs_flow_to_nlattrs(const struct sw_flow_key *, struct sk_buff *);
-int ovs_flow_from_nlattrs(struct sw_flow_key *swkey, int *key_lenp,
+int ovs_flow_to_nlattrs(const struct sw_flow_key *,
+ const struct sw_flow_key *, struct sk_buff *);
+int ovs_match_from_nlattrs(struct sw_flow_match *match,
+ const struct nlattr *,
const struct nlattr *);
-int ovs_flow_metadata_from_nlattrs(struct sw_flow *flow, int key_len,
- const struct nlattr *attr);
+int ovs_flow_metadata_from_nlattrs(struct sw_flow *flow,
+ const struct nlattr *attr);
#define MAX_ACTIONS_BUFSIZE (32 * 1024)
#define TBL_MIN_BUCKETS 1024
@@ -182,6 +200,7 @@ struct flow_table {
struct flex_array *buckets;
unsigned int count, n_buckets;
struct rcu_head rcu;
+ struct list_head *mask_list;
int node_ver;
u32 hash_seed;
bool keep_flows;
@@ -197,22 +216,44 @@ static inline int ovs_flow_tbl_need_to_expand(struct flow_table *table)
return (table->count > table->n_buckets);
}
-struct sw_flow *ovs_flow_tbl_lookup(struct flow_table *table,
- struct sw_flow_key *key, int len);
-void ovs_flow_tbl_destroy(struct flow_table *table);
-void ovs_flow_tbl_deferred_destroy(struct flow_table *table);
+struct sw_flow *ovs_flow_lookup(struct flow_table *,
+ const struct sw_flow_key *);
+struct sw_flow *ovs_flow_lookup_unmasked_key(struct flow_table *table,
+ struct sw_flow_match *match);
+
+void ovs_flow_tbl_destroy(struct flow_table *table, bool deferred);
struct flow_table *ovs_flow_tbl_alloc(int new_size);
struct flow_table *ovs_flow_tbl_expand(struct flow_table *table);
struct flow_table *ovs_flow_tbl_rehash(struct flow_table *table);
-void ovs_flow_tbl_insert(struct flow_table *table, struct sw_flow *flow,
- struct sw_flow_key *key, int key_len);
-void ovs_flow_tbl_remove(struct flow_table *table, struct sw_flow *flow);
-struct sw_flow *ovs_flow_tbl_next(struct flow_table *table, u32 *bucket, u32 *idx);
+void ovs_flow_insert(struct flow_table *table, struct sw_flow *flow);
+void ovs_flow_remove(struct flow_table *table, struct sw_flow *flow);
+
+struct sw_flow *ovs_flow_dump_next(struct flow_table *table, u32 *bucket, u32 *idx);
extern const int ovs_key_lens[OVS_KEY_ATTR_MAX + 1];
int ovs_ipv4_tun_from_nlattr(const struct nlattr *attr,
- struct ovs_key_ipv4_tunnel *tun_key);
+ struct sw_flow_match *match, bool is_mask);
int ovs_ipv4_tun_to_nlattr(struct sk_buff *skb,
- const struct ovs_key_ipv4_tunnel *tun_key);
+ const struct ovs_key_ipv4_tunnel *tun_key,
+ const struct ovs_key_ipv4_tunnel *output);
+
+bool ovs_flow_cmp_unmasked_key(const struct sw_flow *flow,
+ const struct sw_flow_key *key, int key_end);
+
+struct sw_flow_mask {
+ int ref_count;
+ struct rcu_head rcu;
+ struct list_head list;
+ struct sw_flow_key_range range;
+ struct sw_flow_key key;
+};
+struct sw_flow_mask *ovs_sw_flow_mask_alloc(void);
+void ovs_sw_flow_mask_add_ref(struct sw_flow_mask *);
+void ovs_sw_flow_mask_del_ref(struct sw_flow_mask *, bool deferred);
+void ovs_sw_flow_mask_insert(struct flow_table *, struct sw_flow_mask *);
+struct sw_flow_mask *ovs_sw_flow_mask_find(const struct flow_table *,
+ const struct sw_flow_mask *);
+void ovs_flow_key_mask(struct sw_flow_key *dst, const struct sw_flow_key *src,
+ const struct sw_flow_mask *mask);
#endif /* flow.h */
diff --git a/net/openvswitch/vport-gre.c b/net/openvswitch/vport-gre.c
index 493e9775dcda..c99dea543d64 100644
--- a/net/openvswitch/vport-gre.c
+++ b/net/openvswitch/vport-gre.c
@@ -16,7 +16,6 @@
* 02110-1301, USA
*/
-#ifdef CONFIG_OPENVSWITCH_GRE
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
#include <linux/if.h>
@@ -177,10 +176,10 @@ static int gre_tnl_send(struct vport *vport, struct sk_buff *skb)
skb->local_df = 1;
- return iptunnel_xmit(net, rt, skb, fl.saddr,
+ return iptunnel_xmit(rt, skb, fl.saddr,
OVS_CB(skb)->tun_key->ipv4_dst, IPPROTO_GRE,
OVS_CB(skb)->tun_key->ipv4_tos,
- OVS_CB(skb)->tun_key->ipv4_ttl, df);
+ OVS_CB(skb)->tun_key->ipv4_ttl, df, false);
err_free_rt:
ip_rt_put(rt);
error:
@@ -271,5 +270,3 @@ const struct vport_ops ovs_gre_vport_ops = {
.get_name = gre_get_name,
.send = gre_tnl_send,
};
-
-#endif /* OPENVSWITCH_GRE */
diff --git a/net/openvswitch/vport-netdev.c b/net/openvswitch/vport-netdev.c
index 5982f3f62835..09d93c13cfd6 100644
--- a/net/openvswitch/vport-netdev.c
+++ b/net/openvswitch/vport-netdev.c
@@ -25,6 +25,7 @@
#include <linux/llc.h>
#include <linux/rtnetlink.h>
#include <linux/skbuff.h>
+#include <linux/openvswitch.h>
#include <net/llc.h>
@@ -74,6 +75,15 @@ static rx_handler_result_t netdev_frame_hook(struct sk_buff **pskb)
return RX_HANDLER_CONSUMED;
}
+static struct net_device *get_dpdev(struct datapath *dp)
+{
+ struct vport *local;
+
+ local = ovs_vport_ovsl(dp, OVSP_LOCAL);
+ BUG_ON(!local);
+ return netdev_vport_priv(local)->dev;
+}
+
static struct vport *netdev_create(const struct vport_parms *parms)
{
struct vport *vport;
@@ -103,10 +113,15 @@ static struct vport *netdev_create(const struct vport_parms *parms)
}
rtnl_lock();
+ err = netdev_master_upper_dev_link(netdev_vport->dev,
+ get_dpdev(vport->dp));
+ if (err)
+ goto error_unlock;
+
err = netdev_rx_handler_register(netdev_vport->dev, netdev_frame_hook,
vport);
if (err)
- goto error_unlock;
+ goto error_master_upper_dev_unlink;
dev_set_promiscuity(netdev_vport->dev, 1);
netdev_vport->dev->priv_flags |= IFF_OVS_DATAPATH;
@@ -114,6 +129,8 @@ static struct vport *netdev_create(const struct vport_parms *parms)
return vport;
+error_master_upper_dev_unlink:
+ netdev_upper_dev_unlink(netdev_vport->dev, get_dpdev(vport->dp));
error_unlock:
rtnl_unlock();
error_put:
@@ -140,6 +157,7 @@ static void netdev_destroy(struct vport *vport)
rtnl_lock();
netdev_vport->dev->priv_flags &= ~IFF_OVS_DATAPATH;
netdev_rx_handler_unregister(netdev_vport->dev);
+ netdev_upper_dev_unlink(netdev_vport->dev, get_dpdev(vport->dp));
dev_set_promiscuity(netdev_vport->dev, -1);
rtnl_unlock();
diff --git a/net/openvswitch/vport-vxlan.c b/net/openvswitch/vport-vxlan.c
new file mode 100644
index 000000000000..a481c03e2861
--- /dev/null
+++ b/net/openvswitch/vport-vxlan.c
@@ -0,0 +1,204 @@
+/*
+ * Copyright (c) 2013 Nicira, Inc.
+ * Copyright (c) 2013 Cisco Systems, Inc.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA
+ */
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#include <linux/in.h>
+#include <linux/ip.h>
+#include <linux/net.h>
+#include <linux/rculist.h>
+#include <linux/udp.h>
+
+#include <net/icmp.h>
+#include <net/ip.h>
+#include <net/udp.h>
+#include <net/ip_tunnels.h>
+#include <net/udp.h>
+#include <net/rtnetlink.h>
+#include <net/route.h>
+#include <net/dsfield.h>
+#include <net/inet_ecn.h>
+#include <net/net_namespace.h>
+#include <net/netns/generic.h>
+#include <net/vxlan.h>
+
+#include "datapath.h"
+#include "vport.h"
+
+/**
+ * struct vxlan_port - Keeps track of open UDP ports
+ * @vs: vxlan_sock created for the port.
+ * @name: vport name.
+ */
+struct vxlan_port {
+ struct vxlan_sock *vs;
+ char name[IFNAMSIZ];
+};
+
+static inline struct vxlan_port *vxlan_vport(const struct vport *vport)
+{
+ return vport_priv(vport);
+}
+
+/* Called with rcu_read_lock and BH disabled. */
+static void vxlan_rcv(struct vxlan_sock *vs, struct sk_buff *skb, __be32 vx_vni)
+{
+ struct ovs_key_ipv4_tunnel tun_key;
+ struct vport *vport = vs->data;
+ struct iphdr *iph;
+ __be64 key;
+
+ /* Save outer tunnel values */
+ iph = ip_hdr(skb);
+ key = cpu_to_be64(ntohl(vx_vni) >> 8);
+ ovs_flow_tun_key_init(&tun_key, iph, key, TUNNEL_KEY);
+
+ ovs_vport_receive(vport, skb, &tun_key);
+}
+
+static int vxlan_get_options(const struct vport *vport, struct sk_buff *skb)
+{
+ struct vxlan_port *vxlan_port = vxlan_vport(vport);
+ __be16 dst_port = inet_sk(vxlan_port->vs->sock->sk)->inet_sport;
+
+ if (nla_put_u16(skb, OVS_TUNNEL_ATTR_DST_PORT, ntohs(dst_port)))
+ return -EMSGSIZE;
+ return 0;
+}
+
+static void vxlan_tnl_destroy(struct vport *vport)
+{
+ struct vxlan_port *vxlan_port = vxlan_vport(vport);
+
+ vxlan_sock_release(vxlan_port->vs);
+
+ ovs_vport_deferred_free(vport);
+}
+
+static struct vport *vxlan_tnl_create(const struct vport_parms *parms)
+{
+ struct net *net = ovs_dp_get_net(parms->dp);
+ struct nlattr *options = parms->options;
+ struct vxlan_port *vxlan_port;
+ struct vxlan_sock *vs;
+ struct vport *vport;
+ struct nlattr *a;
+ u16 dst_port;
+ int err;
+
+ if (!options) {
+ err = -EINVAL;
+ goto error;
+ }
+ a = nla_find_nested(options, OVS_TUNNEL_ATTR_DST_PORT);
+ if (a && nla_len(a) == sizeof(u16)) {
+ dst_port = nla_get_u16(a);
+ } else {
+ /* Require destination port from userspace. */
+ err = -EINVAL;
+ goto error;
+ }
+
+ vport = ovs_vport_alloc(sizeof(struct vxlan_port),
+ &ovs_vxlan_vport_ops, parms);
+ if (IS_ERR(vport))
+ return vport;
+
+ vxlan_port = vxlan_vport(vport);
+ strncpy(vxlan_port->name, parms->name, IFNAMSIZ);
+
+ vs = vxlan_sock_add(net, htons(dst_port), vxlan_rcv, vport, true, false);
+ if (IS_ERR(vs)) {
+ ovs_vport_free(vport);
+ return (void *)vs;
+ }
+ vxlan_port->vs = vs;
+
+ return vport;
+
+error:
+ return ERR_PTR(err);
+}
+
+static int vxlan_tnl_send(struct vport *vport, struct sk_buff *skb)
+{
+ struct net *net = ovs_dp_get_net(vport->dp);
+ struct vxlan_port *vxlan_port = vxlan_vport(vport);
+ __be16 dst_port = inet_sk(vxlan_port->vs->sock->sk)->inet_sport;
+ struct rtable *rt;
+ struct flowi4 fl;
+ __be16 src_port;
+ int port_min;
+ int port_max;
+ __be16 df;
+ int err;
+
+ if (unlikely(!OVS_CB(skb)->tun_key)) {
+ err = -EINVAL;
+ goto error;
+ }
+
+ /* Route lookup */
+ memset(&fl, 0, sizeof(fl));
+ fl.daddr = OVS_CB(skb)->tun_key->ipv4_dst;
+ fl.saddr = OVS_CB(skb)->tun_key->ipv4_src;
+ fl.flowi4_tos = RT_TOS(OVS_CB(skb)->tun_key->ipv4_tos);
+ fl.flowi4_mark = skb->mark;
+ fl.flowi4_proto = IPPROTO_UDP;
+
+ rt = ip_route_output_key(net, &fl);
+ if (IS_ERR(rt)) {
+ err = PTR_ERR(rt);
+ goto error;
+ }
+
+ df = OVS_CB(skb)->tun_key->tun_flags & TUNNEL_DONT_FRAGMENT ?
+ htons(IP_DF) : 0;
+
+ skb->local_df = 1;
+
+ inet_get_local_port_range(&port_min, &port_max);
+ src_port = vxlan_src_port(port_min, port_max, skb);
+
+ err = vxlan_xmit_skb(vxlan_port->vs, rt, skb,
+ fl.saddr, OVS_CB(skb)->tun_key->ipv4_dst,
+ OVS_CB(skb)->tun_key->ipv4_tos,
+ OVS_CB(skb)->tun_key->ipv4_ttl, df,
+ src_port, dst_port,
+ htonl(be64_to_cpu(OVS_CB(skb)->tun_key->tun_id) << 8));
+ if (err < 0)
+ ip_rt_put(rt);
+error:
+ return err;
+}
+
+static const char *vxlan_get_name(const struct vport *vport)
+{
+ struct vxlan_port *vxlan_port = vxlan_vport(vport);
+ return vxlan_port->name;
+}
+
+const struct vport_ops ovs_vxlan_vport_ops = {
+ .type = OVS_VPORT_TYPE_VXLAN,
+ .create = vxlan_tnl_create,
+ .destroy = vxlan_tnl_destroy,
+ .get_name = vxlan_get_name,
+ .get_options = vxlan_get_options,
+ .send = vxlan_tnl_send,
+};
diff --git a/net/openvswitch/vport.c b/net/openvswitch/vport.c
index d4c7fa04ce08..6f65dbe13812 100644
--- a/net/openvswitch/vport.c
+++ b/net/openvswitch/vport.c
@@ -42,6 +42,9 @@ static const struct vport_ops *vport_ops_list[] = {
#ifdef CONFIG_OPENVSWITCH_GRE
&ovs_gre_vport_ops,
#endif
+#ifdef CONFIG_OPENVSWITCH_VXLAN
+ &ovs_vxlan_vport_ops,
+#endif
};
/* Protected by RCU read lock for reading, ovs_mutex for writing. */
@@ -200,7 +203,7 @@ out:
* ovs_vport_set_options - modify existing vport device (for kernel callers)
*
* @vport: vport to modify.
- * @port: New configuration.
+ * @options: New configuration.
*
* Modifies an existing device with the specified configuration (which is
* dependent on device type). ovs_mutex must be held.
@@ -325,6 +328,7 @@ int ovs_vport_get_options(const struct vport *vport, struct sk_buff *skb)
*
* @vport: vport that received the packet
* @skb: skb that was received
+ * @tun_key: tunnel (if any) that carried packet
*
* Must be called with rcu_read_lock. The packet cannot be shared and
* skb->data should point to the Ethernet header.
diff --git a/net/openvswitch/vport.h b/net/openvswitch/vport.h
index 376045c42f8b..1a9fbcec6e1b 100644
--- a/net/openvswitch/vport.h
+++ b/net/openvswitch/vport.h
@@ -199,6 +199,7 @@ void ovs_vport_record_error(struct vport *, enum vport_err_type err_type);
extern const struct vport_ops ovs_netdev_vport_ops;
extern const struct vport_ops ovs_internal_vport_ops;
extern const struct vport_ops ovs_gre_vport_ops;
+extern const struct vport_ops ovs_vxlan_vport_ops;
static inline void ovs_skb_postpush_rcsum(struct sk_buff *skb,
const void *start, unsigned int len)
diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index 75c8bbf598c8..2e8286b47c28 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -88,7 +88,7 @@
#include <linux/virtio_net.h>
#include <linux/errqueue.h>
#include <linux/net_tstamp.h>
-
+#include <linux/reciprocal_div.h>
#ifdef CONFIG_INET
#include <net/inet_common.h>
#endif
@@ -1135,7 +1135,7 @@ static unsigned int fanout_demux_hash(struct packet_fanout *f,
struct sk_buff *skb,
unsigned int num)
{
- return (((u64)skb->rxhash) * num) >> 32;
+ return reciprocal_divide(skb->rxhash, num);
}
static unsigned int fanout_demux_lb(struct packet_fanout *f,
@@ -1158,6 +1158,13 @@ static unsigned int fanout_demux_cpu(struct packet_fanout *f,
return smp_processor_id() % num;
}
+static unsigned int fanout_demux_rnd(struct packet_fanout *f,
+ struct sk_buff *skb,
+ unsigned int num)
+{
+ return reciprocal_divide(prandom_u32(), num);
+}
+
static unsigned int fanout_demux_rollover(struct packet_fanout *f,
struct sk_buff *skb,
unsigned int idx, unsigned int skip,
@@ -1215,6 +1222,9 @@ static int packet_rcv_fanout(struct sk_buff *skb, struct net_device *dev,
case PACKET_FANOUT_CPU:
idx = fanout_demux_cpu(f, skb, num);
break;
+ case PACKET_FANOUT_RND:
+ idx = fanout_demux_rnd(f, skb, num);
+ break;
case PACKET_FANOUT_ROLLOVER:
idx = fanout_demux_rollover(f, skb, 0, (unsigned int) -1, num);
break;
@@ -1284,6 +1294,7 @@ static int fanout_add(struct sock *sk, u16 id, u16 type_flags)
case PACKET_FANOUT_HASH:
case PACKET_FANOUT_LB:
case PACKET_FANOUT_CPU:
+ case PACKET_FANOUT_RND:
break;
default:
return -EINVAL;
@@ -2181,7 +2192,7 @@ static struct sk_buff *packet_alloc_skb(struct sock *sk, size_t prepad,
linear = len;
skb = sock_alloc_send_pskb(sk, prepad + linear, len - linear, noblock,
- err);
+ err, 0);
if (!skb)
return NULL;
@@ -2638,51 +2649,6 @@ out:
return err;
}
-static int packet_recv_error(struct sock *sk, struct msghdr *msg, int len)
-{
- struct sock_exterr_skb *serr;
- struct sk_buff *skb, *skb2;
- int copied, err;
-
- err = -EAGAIN;
- skb = skb_dequeue(&sk->sk_error_queue);
- if (skb == NULL)
- goto out;
-
- copied = skb->len;
- if (copied > len) {
- msg->msg_flags |= MSG_TRUNC;
- copied = len;
- }
- err = skb_copy_datagram_iovec(skb, 0, msg->msg_iov, copied);
- if (err)
- goto out_free_skb;
-
- sock_recv_timestamp(msg, sk, skb);
-
- serr = SKB_EXT_ERR(skb);
- put_cmsg(msg, SOL_PACKET, PACKET_TX_TIMESTAMP,
- sizeof(serr->ee), &serr->ee);
-
- msg->msg_flags |= MSG_ERRQUEUE;
- err = copied;
-
- /* Reset and regenerate socket error */
- spin_lock_bh(&sk->sk_error_queue.lock);
- sk->sk_err = 0;
- if ((skb2 = skb_peek(&sk->sk_error_queue)) != NULL) {
- sk->sk_err = SKB_EXT_ERR(skb2)->ee.ee_errno;
- spin_unlock_bh(&sk->sk_error_queue.lock);
- sk->sk_error_report(sk);
- } else
- spin_unlock_bh(&sk->sk_error_queue.lock);
-
-out_free_skb:
- kfree_skb(skb);
-out:
- return err;
-}
-
/*
* Pull a packet from our receive queue and hand it to the user.
* If necessary we block.
@@ -2708,7 +2674,8 @@ static int packet_recvmsg(struct kiocb *iocb, struct socket *sock,
#endif
if (flags & MSG_ERRQUEUE) {
- err = packet_recv_error(sk, msg, len);
+ err = sock_recv_errqueue(sk, msg, len,
+ SOL_PACKET, PACKET_TX_TIMESTAMP);
goto out;
}
diff --git a/net/phonet/socket.c b/net/phonet/socket.c
index 1afd1381cdc7..77e38f733496 100644
--- a/net/phonet/socket.c
+++ b/net/phonet/socket.c
@@ -793,7 +793,7 @@ static int pn_res_seq_show(struct seq_file *seq, void *v)
struct sock **psk = v;
struct sock *sk = *psk;
- seq_printf(seq, "%02X %5d %lu%n",
+ seq_printf(seq, "%02X %5u %lu%n",
(int) (psk - pnres.sk),
from_kuid_munged(seq_user_ns(seq), sock_i_uid(sk)),
sock_i_ino(sk), &len);
diff --git a/net/rfkill/core.c b/net/rfkill/core.c
index 1cec5e4f3a5e..1bacc1079942 100644
--- a/net/rfkill/core.c
+++ b/net/rfkill/core.c
@@ -576,14 +576,14 @@ void rfkill_set_states(struct rfkill *rfkill, bool sw, bool hw)
}
EXPORT_SYMBOL(rfkill_set_states);
-static ssize_t rfkill_name_show(struct device *dev,
- struct device_attribute *attr,
- char *buf)
+static ssize_t name_show(struct device *dev, struct device_attribute *attr,
+ char *buf)
{
struct rfkill *rfkill = to_rfkill(dev);
return sprintf(buf, "%s\n", rfkill->name);
}
+static DEVICE_ATTR_RO(name);
static const char *rfkill_get_type_str(enum rfkill_type type)
{
@@ -611,54 +611,52 @@ static const char *rfkill_get_type_str(enum rfkill_type type)
}
}
-static ssize_t rfkill_type_show(struct device *dev,
- struct device_attribute *attr,
- char *buf)
+static ssize_t type_show(struct device *dev, struct device_attribute *attr,
+ char *buf)
{
struct rfkill *rfkill = to_rfkill(dev);
return sprintf(buf, "%s\n", rfkill_get_type_str(rfkill->type));
}
+static DEVICE_ATTR_RO(type);
-static ssize_t rfkill_idx_show(struct device *dev,
- struct device_attribute *attr,
- char *buf)
+static ssize_t index_show(struct device *dev, struct device_attribute *attr,
+ char *buf)
{
struct rfkill *rfkill = to_rfkill(dev);
return sprintf(buf, "%d\n", rfkill->idx);
}
+static DEVICE_ATTR_RO(index);
-static ssize_t rfkill_persistent_show(struct device *dev,
- struct device_attribute *attr,
- char *buf)
+static ssize_t persistent_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
{
struct rfkill *rfkill = to_rfkill(dev);
return sprintf(buf, "%d\n", rfkill->persistent);
}
+static DEVICE_ATTR_RO(persistent);
-static ssize_t rfkill_hard_show(struct device *dev,
- struct device_attribute *attr,
- char *buf)
+static ssize_t hard_show(struct device *dev, struct device_attribute *attr,
+ char *buf)
{
struct rfkill *rfkill = to_rfkill(dev);
return sprintf(buf, "%d\n", (rfkill->state & RFKILL_BLOCK_HW) ? 1 : 0 );
}
+static DEVICE_ATTR_RO(hard);
-static ssize_t rfkill_soft_show(struct device *dev,
- struct device_attribute *attr,
- char *buf)
+static ssize_t soft_show(struct device *dev, struct device_attribute *attr,
+ char *buf)
{
struct rfkill *rfkill = to_rfkill(dev);
return sprintf(buf, "%d\n", (rfkill->state & RFKILL_BLOCK_SW) ? 1 : 0 );
}
-static ssize_t rfkill_soft_store(struct device *dev,
- struct device_attribute *attr,
- const char *buf, size_t count)
+static ssize_t soft_store(struct device *dev, struct device_attribute *attr,
+ const char *buf, size_t count)
{
struct rfkill *rfkill = to_rfkill(dev);
unsigned long state;
@@ -680,6 +678,7 @@ static ssize_t rfkill_soft_store(struct device *dev,
return count;
}
+static DEVICE_ATTR_RW(soft);
static u8 user_state_from_blocked(unsigned long state)
{
@@ -691,18 +690,16 @@ static u8 user_state_from_blocked(unsigned long state)
return RFKILL_USER_STATE_UNBLOCKED;
}
-static ssize_t rfkill_state_show(struct device *dev,
- struct device_attribute *attr,
- char *buf)
+static ssize_t state_show(struct device *dev, struct device_attribute *attr,
+ char *buf)
{
struct rfkill *rfkill = to_rfkill(dev);
return sprintf(buf, "%d\n", user_state_from_blocked(rfkill->state));
}
-static ssize_t rfkill_state_store(struct device *dev,
- struct device_attribute *attr,
- const char *buf, size_t count)
+static ssize_t state_store(struct device *dev, struct device_attribute *attr,
+ const char *buf, size_t count)
{
struct rfkill *rfkill = to_rfkill(dev);
unsigned long state;
@@ -725,32 +722,27 @@ static ssize_t rfkill_state_store(struct device *dev,
return count;
}
+static DEVICE_ATTR_RW(state);
-static ssize_t rfkill_claim_show(struct device *dev,
- struct device_attribute *attr,
- char *buf)
+static ssize_t claim_show(struct device *dev, struct device_attribute *attr,
+ char *buf)
{
return sprintf(buf, "%d\n", 0);
}
-
-static ssize_t rfkill_claim_store(struct device *dev,
- struct device_attribute *attr,
- const char *buf, size_t count)
-{
- return -EOPNOTSUPP;
-}
-
-static struct device_attribute rfkill_dev_attrs[] = {
- __ATTR(name, S_IRUGO, rfkill_name_show, NULL),
- __ATTR(type, S_IRUGO, rfkill_type_show, NULL),
- __ATTR(index, S_IRUGO, rfkill_idx_show, NULL),
- __ATTR(persistent, S_IRUGO, rfkill_persistent_show, NULL),
- __ATTR(state, S_IRUGO|S_IWUSR, rfkill_state_show, rfkill_state_store),
- __ATTR(claim, S_IRUGO|S_IWUSR, rfkill_claim_show, rfkill_claim_store),
- __ATTR(soft, S_IRUGO|S_IWUSR, rfkill_soft_show, rfkill_soft_store),
- __ATTR(hard, S_IRUGO, rfkill_hard_show, NULL),
- __ATTR_NULL
+static DEVICE_ATTR_RO(claim);
+
+static struct attribute *rfkill_dev_attrs[] = {
+ &dev_attr_name.attr,
+ &dev_attr_type.attr,
+ &dev_attr_index.attr,
+ &dev_attr_persistent.attr,
+ &dev_attr_state.attr,
+ &dev_attr_claim.attr,
+ &dev_attr_soft.attr,
+ &dev_attr_hard.attr,
+ NULL,
};
+ATTRIBUTE_GROUPS(rfkill_dev);
static void rfkill_release(struct device *dev)
{
@@ -830,7 +822,7 @@ static int rfkill_resume(struct device *dev)
static struct class rfkill_class = {
.name = "rfkill",
.dev_release = rfkill_release,
- .dev_attrs = rfkill_dev_attrs,
+ .dev_groups = rfkill_dev_groups,
.dev_uevent = rfkill_dev_uevent,
.suspend = rfkill_suspend,
.resume = rfkill_resume,
diff --git a/net/rfkill/rfkill-regulator.c b/net/rfkill/rfkill-regulator.c
index d11ac79246e4..cf5b145902e5 100644
--- a/net/rfkill/rfkill-regulator.c
+++ b/net/rfkill/rfkill-regulator.c
@@ -30,6 +30,7 @@ struct rfkill_regulator_data {
static int rfkill_regulator_set_block(void *data, bool blocked)
{
struct rfkill_regulator_data *rfkill_data = data;
+ int ret = 0;
pr_debug("%s: blocked: %d\n", __func__, blocked);
@@ -40,15 +41,16 @@ static int rfkill_regulator_set_block(void *data, bool blocked)
}
} else {
if (!rfkill_data->reg_enabled) {
- regulator_enable(rfkill_data->vcc);
- rfkill_data->reg_enabled = true;
+ ret = regulator_enable(rfkill_data->vcc);
+ if (!ret)
+ rfkill_data->reg_enabled = true;
}
}
pr_debug("%s: regulator_is_enabled after set_block: %d\n", __func__,
regulator_is_enabled(rfkill_data->vcc));
- return 0;
+ return ret;
}
static struct rfkill_ops rfkill_regulator_ops = {
diff --git a/net/sched/Kconfig b/net/sched/Kconfig
index 235e01acac51..c03a32a0418e 100644
--- a/net/sched/Kconfig
+++ b/net/sched/Kconfig
@@ -272,6 +272,20 @@ config NET_SCH_FQ_CODEL
If unsure, say N.
+config NET_SCH_FQ
+ tristate "Fair Queue"
+ help
+ Say Y here if you want to use the FQ packet scheduling algorithm.
+
+ FQ does flow separation, and is able to respect pacing requirements
+ set by TCP stack into sk->sk_pacing_rate (for localy generated
+ traffic)
+
+ To compile this driver as a module, choose M here: the module
+ will be called sch_fq.
+
+ If unsure, say N.
+
config NET_SCH_INGRESS
tristate "Ingress Qdisc"
depends on NET_CLS_ACT
diff --git a/net/sched/Makefile b/net/sched/Makefile
index 978cbf004e80..e5f9abe9a5db 100644
--- a/net/sched/Makefile
+++ b/net/sched/Makefile
@@ -39,6 +39,7 @@ obj-$(CONFIG_NET_SCH_CHOKE) += sch_choke.o
obj-$(CONFIG_NET_SCH_QFQ) += sch_qfq.o
obj-$(CONFIG_NET_SCH_CODEL) += sch_codel.o
obj-$(CONFIG_NET_SCH_FQ_CODEL) += sch_fq_codel.o
+obj-$(CONFIG_NET_SCH_FQ) += sch_fq.o
obj-$(CONFIG_NET_CLS_U32) += cls_u32.o
obj-$(CONFIG_NET_CLS_ROUTE4) += cls_route.o
diff --git a/net/sched/cls_cgroup.c b/net/sched/cls_cgroup.c
index 3a294eb98d61..867b4a3e3980 100644
--- a/net/sched/cls_cgroup.c
+++ b/net/sched/cls_cgroup.c
@@ -23,19 +23,18 @@
#include <net/sock.h>
#include <net/cls_cgroup.h>
-static inline struct cgroup_cls_state *cgrp_cls_state(struct cgroup *cgrp)
+static inline struct cgroup_cls_state *css_cls_state(struct cgroup_subsys_state *css)
{
- return container_of(cgroup_subsys_state(cgrp, net_cls_subsys_id),
- struct cgroup_cls_state, css);
+ return css ? container_of(css, struct cgroup_cls_state, css) : NULL;
}
static inline struct cgroup_cls_state *task_cls_state(struct task_struct *p)
{
- return container_of(task_subsys_state(p, net_cls_subsys_id),
- struct cgroup_cls_state, css);
+ return css_cls_state(task_css(p, net_cls_subsys_id));
}
-static struct cgroup_subsys_state *cgrp_css_alloc(struct cgroup *cgrp)
+static struct cgroup_subsys_state *
+cgrp_css_alloc(struct cgroup_subsys_state *parent_css)
{
struct cgroup_cls_state *cs;
@@ -45,17 +44,19 @@ static struct cgroup_subsys_state *cgrp_css_alloc(struct cgroup *cgrp)
return &cs->css;
}
-static int cgrp_css_online(struct cgroup *cgrp)
+static int cgrp_css_online(struct cgroup_subsys_state *css)
{
- if (cgrp->parent)
- cgrp_cls_state(cgrp)->classid =
- cgrp_cls_state(cgrp->parent)->classid;
+ struct cgroup_cls_state *cs = css_cls_state(css);
+ struct cgroup_cls_state *parent = css_cls_state(css_parent(css));
+
+ if (parent)
+ cs->classid = parent->classid;
return 0;
}
-static void cgrp_css_free(struct cgroup *cgrp)
+static void cgrp_css_free(struct cgroup_subsys_state *css)
{
- kfree(cgrp_cls_state(cgrp));
+ kfree(css_cls_state(css));
}
static int update_classid(const void *v, struct file *file, unsigned n)
@@ -67,12 +68,13 @@ static int update_classid(const void *v, struct file *file, unsigned n)
return 0;
}
-static void cgrp_attach(struct cgroup *cgrp, struct cgroup_taskset *tset)
+static void cgrp_attach(struct cgroup_subsys_state *css,
+ struct cgroup_taskset *tset)
{
struct task_struct *p;
void *v;
- cgroup_taskset_for_each(p, cgrp, tset) {
+ cgroup_taskset_for_each(p, css, tset) {
task_lock(p);
v = (void *)(unsigned long)task_cls_classid(p);
iterate_fd(p->files, 0, update_classid, v);
@@ -80,14 +82,15 @@ static void cgrp_attach(struct cgroup *cgrp, struct cgroup_taskset *tset)
}
}
-static u64 read_classid(struct cgroup *cgrp, struct cftype *cft)
+static u64 read_classid(struct cgroup_subsys_state *css, struct cftype *cft)
{
- return cgrp_cls_state(cgrp)->classid;
+ return css_cls_state(css)->classid;
}
-static int write_classid(struct cgroup *cgrp, struct cftype *cft, u64 value)
+static int write_classid(struct cgroup_subsys_state *css, struct cftype *cft,
+ u64 value)
{
- cgrp_cls_state(cgrp)->classid = (u32) value;
+ css_cls_state(css)->classid = (u32) value;
return 0;
}
diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c
index 51b968d3febb..2adda7fa2d39 100644
--- a/net/sched/sch_api.c
+++ b/net/sched/sch_api.c
@@ -200,6 +200,58 @@ int unregister_qdisc(struct Qdisc_ops *qops)
}
EXPORT_SYMBOL(unregister_qdisc);
+/* Get default qdisc if not otherwise specified */
+void qdisc_get_default(char *name, size_t len)
+{
+ read_lock(&qdisc_mod_lock);
+ strlcpy(name, default_qdisc_ops->id, len);
+ read_unlock(&qdisc_mod_lock);
+}
+
+static struct Qdisc_ops *qdisc_lookup_default(const char *name)
+{
+ struct Qdisc_ops *q = NULL;
+
+ for (q = qdisc_base; q; q = q->next) {
+ if (!strcmp(name, q->id)) {
+ if (!try_module_get(q->owner))
+ q = NULL;
+ break;
+ }
+ }
+
+ return q;
+}
+
+/* Set new default qdisc to use */
+int qdisc_set_default(const char *name)
+{
+ const struct Qdisc_ops *ops;
+
+ if (!capable(CAP_NET_ADMIN))
+ return -EPERM;
+
+ write_lock(&qdisc_mod_lock);
+ ops = qdisc_lookup_default(name);
+ if (!ops) {
+ /* Not found, drop lock and try to load module */
+ write_unlock(&qdisc_mod_lock);
+ request_module("sch_%s", name);
+ write_lock(&qdisc_mod_lock);
+
+ ops = qdisc_lookup_default(name);
+ }
+
+ if (ops) {
+ /* Set new default */
+ module_put(default_qdisc_ops->owner);
+ default_qdisc_ops = ops;
+ }
+ write_unlock(&qdisc_mod_lock);
+
+ return ops ? 0 : -ENOENT;
+}
+
/* We know handle. Find qdisc among all qdisc's attached to device
(root qdisc, all its children, children of children etc.)
*/
@@ -1854,6 +1906,7 @@ static int __init pktsched_init(void)
return err;
}
+ register_qdisc(&pfifo_fast_ops);
register_qdisc(&pfifo_qdisc_ops);
register_qdisc(&bfifo_qdisc_ops);
register_qdisc(&pfifo_head_drop_qdisc_ops);
diff --git a/net/sched/sch_choke.c b/net/sched/sch_choke.c
index ef53ab8d0aae..ddd73cb2d7ba 100644
--- a/net/sched/sch_choke.c
+++ b/net/sched/sch_choke.c
@@ -438,7 +438,8 @@ static int choke_change(struct Qdisc *sch, struct nlattr *opt)
if (mask != q->tab_mask) {
struct sk_buff **ntab;
- ntab = kcalloc(mask + 1, sizeof(struct sk_buff *), GFP_KERNEL);
+ ntab = kcalloc(mask + 1, sizeof(struct sk_buff *),
+ GFP_KERNEL | __GFP_NOWARN);
if (!ntab)
ntab = vzalloc((mask + 1) * sizeof(struct sk_buff *));
if (!ntab)
diff --git a/net/sched/sch_fq.c b/net/sched/sch_fq.c
new file mode 100644
index 000000000000..a9dfdda9ed1d
--- /dev/null
+++ b/net/sched/sch_fq.c
@@ -0,0 +1,815 @@
+/*
+ * net/sched/sch_fq.c Fair Queue Packet Scheduler (per flow pacing)
+ *
+ * Copyright (C) 2013 Eric Dumazet <edumazet@google.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ * Meant to be mostly used for localy generated traffic :
+ * Fast classification depends on skb->sk being set before reaching us.
+ * If not, (router workload), we use rxhash as fallback, with 32 bits wide hash.
+ * All packets belonging to a socket are considered as a 'flow'.
+ *
+ * Flows are dynamically allocated and stored in a hash table of RB trees
+ * They are also part of one Round Robin 'queues' (new or old flows)
+ *
+ * Burst avoidance (aka pacing) capability :
+ *
+ * Transport (eg TCP) can set in sk->sk_pacing_rate a rate, enqueue a
+ * bunch of packets, and this packet scheduler adds delay between
+ * packets to respect rate limitation.
+ *
+ * enqueue() :
+ * - lookup one RB tree (out of 1024 or more) to find the flow.
+ * If non existent flow, create it, add it to the tree.
+ * Add skb to the per flow list of skb (fifo).
+ * - Use a special fifo for high prio packets
+ *
+ * dequeue() : serves flows in Round Robin
+ * Note : When a flow becomes empty, we do not immediately remove it from
+ * rb trees, for performance reasons (its expected to send additional packets,
+ * or SLAB cache will reuse socket for another flow)
+ */
+
+#include <linux/module.h>
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/jiffies.h>
+#include <linux/string.h>
+#include <linux/in.h>
+#include <linux/errno.h>
+#include <linux/init.h>
+#include <linux/skbuff.h>
+#include <linux/slab.h>
+#include <linux/rbtree.h>
+#include <linux/hash.h>
+#include <linux/prefetch.h>
+#include <net/netlink.h>
+#include <net/pkt_sched.h>
+#include <net/sock.h>
+#include <net/tcp_states.h>
+
+/*
+ * Per flow structure, dynamically allocated
+ */
+struct fq_flow {
+ struct sk_buff *head; /* list of skbs for this flow : first skb */
+ union {
+ struct sk_buff *tail; /* last skb in the list */
+ unsigned long age; /* jiffies when flow was emptied, for gc */
+ };
+ struct rb_node fq_node; /* anchor in fq_root[] trees */
+ struct sock *sk;
+ int qlen; /* number of packets in flow queue */
+ int credit;
+ u32 socket_hash; /* sk_hash */
+ struct fq_flow *next; /* next pointer in RR lists, or &detached */
+
+ struct rb_node rate_node; /* anchor in q->delayed tree */
+ u64 time_next_packet;
+};
+
+struct fq_flow_head {
+ struct fq_flow *first;
+ struct fq_flow *last;
+};
+
+struct fq_sched_data {
+ struct fq_flow_head new_flows;
+
+ struct fq_flow_head old_flows;
+
+ struct rb_root delayed; /* for rate limited flows */
+ u64 time_next_delayed_flow;
+
+ struct fq_flow internal; /* for non classified or high prio packets */
+ u32 quantum;
+ u32 initial_quantum;
+ u32 flow_default_rate;/* rate per flow : bytes per second */
+ u32 flow_max_rate; /* optional max rate per flow */
+ u32 flow_plimit; /* max packets per flow */
+ struct rb_root *fq_root;
+ u8 rate_enable;
+ u8 fq_trees_log;
+
+ u32 flows;
+ u32 inactive_flows;
+ u32 throttled_flows;
+
+ u64 stat_gc_flows;
+ u64 stat_internal_packets;
+ u64 stat_tcp_retrans;
+ u64 stat_throttled;
+ u64 stat_flows_plimit;
+ u64 stat_pkts_too_long;
+ u64 stat_allocation_errors;
+ struct qdisc_watchdog watchdog;
+};
+
+/* special value to mark a detached flow (not on old/new list) */
+static struct fq_flow detached, throttled;
+
+static void fq_flow_set_detached(struct fq_flow *f)
+{
+ f->next = &detached;
+}
+
+static bool fq_flow_is_detached(const struct fq_flow *f)
+{
+ return f->next == &detached;
+}
+
+static void fq_flow_set_throttled(struct fq_sched_data *q, struct fq_flow *f)
+{
+ struct rb_node **p = &q->delayed.rb_node, *parent = NULL;
+
+ while (*p) {
+ struct fq_flow *aux;
+
+ parent = *p;
+ aux = container_of(parent, struct fq_flow, rate_node);
+ if (f->time_next_packet >= aux->time_next_packet)
+ p = &parent->rb_right;
+ else
+ p = &parent->rb_left;
+ }
+ rb_link_node(&f->rate_node, parent, p);
+ rb_insert_color(&f->rate_node, &q->delayed);
+ q->throttled_flows++;
+ q->stat_throttled++;
+
+ f->next = &throttled;
+ if (q->time_next_delayed_flow > f->time_next_packet)
+ q->time_next_delayed_flow = f->time_next_packet;
+}
+
+
+static struct kmem_cache *fq_flow_cachep __read_mostly;
+
+static void fq_flow_add_tail(struct fq_flow_head *head, struct fq_flow *flow)
+{
+ if (head->first)
+ head->last->next = flow;
+ else
+ head->first = flow;
+ head->last = flow;
+ flow->next = NULL;
+}
+
+/* limit number of collected flows per round */
+#define FQ_GC_MAX 8
+#define FQ_GC_AGE (3*HZ)
+
+static bool fq_gc_candidate(const struct fq_flow *f)
+{
+ return fq_flow_is_detached(f) &&
+ time_after(jiffies, f->age + FQ_GC_AGE);
+}
+
+static void fq_gc(struct fq_sched_data *q,
+ struct rb_root *root,
+ struct sock *sk)
+{
+ struct fq_flow *f, *tofree[FQ_GC_MAX];
+ struct rb_node **p, *parent;
+ int fcnt = 0;
+
+ p = &root->rb_node;
+ parent = NULL;
+ while (*p) {
+ parent = *p;
+
+ f = container_of(parent, struct fq_flow, fq_node);
+ if (f->sk == sk)
+ break;
+
+ if (fq_gc_candidate(f)) {
+ tofree[fcnt++] = f;
+ if (fcnt == FQ_GC_MAX)
+ break;
+ }
+
+ if (f->sk > sk)
+ p = &parent->rb_right;
+ else
+ p = &parent->rb_left;
+ }
+
+ q->flows -= fcnt;
+ q->inactive_flows -= fcnt;
+ q->stat_gc_flows += fcnt;
+ while (fcnt) {
+ struct fq_flow *f = tofree[--fcnt];
+
+ rb_erase(&f->fq_node, root);
+ kmem_cache_free(fq_flow_cachep, f);
+ }
+}
+
+static const u8 prio2band[TC_PRIO_MAX + 1] = {
+ 1, 2, 2, 2, 1, 2, 0, 0 , 1, 1, 1, 1, 1, 1, 1, 1
+};
+
+static struct fq_flow *fq_classify(struct sk_buff *skb, struct fq_sched_data *q)
+{
+ struct rb_node **p, *parent;
+ struct sock *sk = skb->sk;
+ struct rb_root *root;
+ struct fq_flow *f;
+ int band;
+
+ /* warning: no starvation prevention... */
+ band = prio2band[skb->priority & TC_PRIO_MAX];
+ if (unlikely(band == 0))
+ return &q->internal;
+
+ if (unlikely(!sk)) {
+ /* By forcing low order bit to 1, we make sure to not
+ * collide with a local flow (socket pointers are word aligned)
+ */
+ sk = (struct sock *)(skb_get_rxhash(skb) | 1L);
+ }
+
+ root = &q->fq_root[hash_32((u32)(long)sk, q->fq_trees_log)];
+
+ if (q->flows >= (2U << q->fq_trees_log) &&
+ q->inactive_flows > q->flows/2)
+ fq_gc(q, root, sk);
+
+ p = &root->rb_node;
+ parent = NULL;
+ while (*p) {
+ parent = *p;
+
+ f = container_of(parent, struct fq_flow, fq_node);
+ if (f->sk == sk) {
+ /* socket might have been reallocated, so check
+ * if its sk_hash is the same.
+ * It not, we need to refill credit with
+ * initial quantum
+ */
+ if (unlikely(skb->sk &&
+ f->socket_hash != sk->sk_hash)) {
+ f->credit = q->initial_quantum;
+ f->socket_hash = sk->sk_hash;
+ }
+ return f;
+ }
+ if (f->sk > sk)
+ p = &parent->rb_right;
+ else
+ p = &parent->rb_left;
+ }
+
+ f = kmem_cache_zalloc(fq_flow_cachep, GFP_ATOMIC | __GFP_NOWARN);
+ if (unlikely(!f)) {
+ q->stat_allocation_errors++;
+ return &q->internal;
+ }
+ fq_flow_set_detached(f);
+ f->sk = sk;
+ if (skb->sk)
+ f->socket_hash = sk->sk_hash;
+ f->credit = q->initial_quantum;
+
+ rb_link_node(&f->fq_node, parent, p);
+ rb_insert_color(&f->fq_node, root);
+
+ q->flows++;
+ q->inactive_flows++;
+ return f;
+}
+
+
+/* remove one skb from head of flow queue */
+static struct sk_buff *fq_dequeue_head(struct Qdisc *sch, struct fq_flow *flow)
+{
+ struct sk_buff *skb = flow->head;
+
+ if (skb) {
+ flow->head = skb->next;
+ skb->next = NULL;
+ flow->qlen--;
+ sch->qstats.backlog -= qdisc_pkt_len(skb);
+ sch->q.qlen--;
+ }
+ return skb;
+}
+
+/* We might add in the future detection of retransmits
+ * For the time being, just return false
+ */
+static bool skb_is_retransmit(struct sk_buff *skb)
+{
+ return false;
+}
+
+/* add skb to flow queue
+ * flow queue is a linked list, kind of FIFO, except for TCP retransmits
+ * We special case tcp retransmits to be transmitted before other packets.
+ * We rely on fact that TCP retransmits are unlikely, so we do not waste
+ * a separate queue or a pointer.
+ * head-> [retrans pkt 1]
+ * [retrans pkt 2]
+ * [ normal pkt 1]
+ * [ normal pkt 2]
+ * [ normal pkt 3]
+ * tail-> [ normal pkt 4]
+ */
+static void flow_queue_add(struct fq_flow *flow, struct sk_buff *skb)
+{
+ struct sk_buff *prev, *head = flow->head;
+
+ skb->next = NULL;
+ if (!head) {
+ flow->head = skb;
+ flow->tail = skb;
+ return;
+ }
+ if (likely(!skb_is_retransmit(skb))) {
+ flow->tail->next = skb;
+ flow->tail = skb;
+ return;
+ }
+
+ /* This skb is a tcp retransmit,
+ * find the last retrans packet in the queue
+ */
+ prev = NULL;
+ while (skb_is_retransmit(head)) {
+ prev = head;
+ head = head->next;
+ if (!head)
+ break;
+ }
+ if (!prev) { /* no rtx packet in queue, become the new head */
+ skb->next = flow->head;
+ flow->head = skb;
+ } else {
+ if (prev == flow->tail)
+ flow->tail = skb;
+ else
+ skb->next = prev->next;
+ prev->next = skb;
+ }
+}
+
+static int fq_enqueue(struct sk_buff *skb, struct Qdisc *sch)
+{
+ struct fq_sched_data *q = qdisc_priv(sch);
+ struct fq_flow *f;
+
+ if (unlikely(sch->q.qlen >= sch->limit))
+ return qdisc_drop(skb, sch);
+
+ f = fq_classify(skb, q);
+ if (unlikely(f->qlen >= q->flow_plimit && f != &q->internal)) {
+ q->stat_flows_plimit++;
+ return qdisc_drop(skb, sch);
+ }
+
+ f->qlen++;
+ flow_queue_add(f, skb);
+ if (skb_is_retransmit(skb))
+ q->stat_tcp_retrans++;
+ sch->qstats.backlog += qdisc_pkt_len(skb);
+ if (fq_flow_is_detached(f)) {
+ fq_flow_add_tail(&q->new_flows, f);
+ if (q->quantum > f->credit)
+ f->credit = q->quantum;
+ q->inactive_flows--;
+ qdisc_unthrottled(sch);
+ }
+ if (unlikely(f == &q->internal)) {
+ q->stat_internal_packets++;
+ qdisc_unthrottled(sch);
+ }
+ sch->q.qlen++;
+
+ return NET_XMIT_SUCCESS;
+}
+
+static void fq_check_throttled(struct fq_sched_data *q, u64 now)
+{
+ struct rb_node *p;
+
+ if (q->time_next_delayed_flow > now)
+ return;
+
+ q->time_next_delayed_flow = ~0ULL;
+ while ((p = rb_first(&q->delayed)) != NULL) {
+ struct fq_flow *f = container_of(p, struct fq_flow, rate_node);
+
+ if (f->time_next_packet > now) {
+ q->time_next_delayed_flow = f->time_next_packet;
+ break;
+ }
+ rb_erase(p, &q->delayed);
+ q->throttled_flows--;
+ fq_flow_add_tail(&q->old_flows, f);
+ }
+}
+
+static struct sk_buff *fq_dequeue(struct Qdisc *sch)
+{
+ struct fq_sched_data *q = qdisc_priv(sch);
+ u64 now = ktime_to_ns(ktime_get());
+ struct fq_flow_head *head;
+ struct sk_buff *skb;
+ struct fq_flow *f;
+ u32 rate;
+
+ skb = fq_dequeue_head(sch, &q->internal);
+ if (skb)
+ goto out;
+ fq_check_throttled(q, now);
+begin:
+ head = &q->new_flows;
+ if (!head->first) {
+ head = &q->old_flows;
+ if (!head->first) {
+ if (q->time_next_delayed_flow != ~0ULL)
+ qdisc_watchdog_schedule_ns(&q->watchdog,
+ q->time_next_delayed_flow);
+ return NULL;
+ }
+ }
+ f = head->first;
+
+ if (f->credit <= 0) {
+ f->credit += q->quantum;
+ head->first = f->next;
+ fq_flow_add_tail(&q->old_flows, f);
+ goto begin;
+ }
+
+ if (unlikely(f->head && now < f->time_next_packet)) {
+ head->first = f->next;
+ fq_flow_set_throttled(q, f);
+ goto begin;
+ }
+
+ skb = fq_dequeue_head(sch, f);
+ if (!skb) {
+ head->first = f->next;
+ /* force a pass through old_flows to prevent starvation */
+ if ((head == &q->new_flows) && q->old_flows.first) {
+ fq_flow_add_tail(&q->old_flows, f);
+ } else {
+ fq_flow_set_detached(f);
+ f->age = jiffies;
+ q->inactive_flows++;
+ }
+ goto begin;
+ }
+ prefetch(&skb->end);
+ f->time_next_packet = now;
+ f->credit -= qdisc_pkt_len(skb);
+
+ if (f->credit > 0 || !q->rate_enable)
+ goto out;
+
+ rate = q->flow_max_rate;
+ if (skb->sk && skb->sk->sk_state != TCP_TIME_WAIT)
+ rate = min(skb->sk->sk_pacing_rate, rate);
+
+ if (rate != ~0U) {
+ u32 plen = max(qdisc_pkt_len(skb), q->quantum);
+ u64 len = (u64)plen * NSEC_PER_SEC;
+
+ if (likely(rate))
+ do_div(len, rate);
+ /* Since socket rate can change later,
+ * clamp the delay to 125 ms.
+ * TODO: maybe segment the too big skb, as in commit
+ * e43ac79a4bc ("sch_tbf: segment too big GSO packets")
+ */
+ if (unlikely(len > 125 * NSEC_PER_MSEC)) {
+ len = 125 * NSEC_PER_MSEC;
+ q->stat_pkts_too_long++;
+ }
+
+ f->time_next_packet = now + len;
+ }
+out:
+ qdisc_bstats_update(sch, skb);
+ qdisc_unthrottled(sch);
+ return skb;
+}
+
+static void fq_reset(struct Qdisc *sch)
+{
+ struct fq_sched_data *q = qdisc_priv(sch);
+ struct rb_root *root;
+ struct sk_buff *skb;
+ struct rb_node *p;
+ struct fq_flow *f;
+ unsigned int idx;
+
+ while ((skb = fq_dequeue_head(sch, &q->internal)) != NULL)
+ kfree_skb(skb);
+
+ if (!q->fq_root)
+ return;
+
+ for (idx = 0; idx < (1U << q->fq_trees_log); idx++) {
+ root = &q->fq_root[idx];
+ while ((p = rb_first(root)) != NULL) {
+ f = container_of(p, struct fq_flow, fq_node);
+ rb_erase(p, root);
+
+ while ((skb = fq_dequeue_head(sch, f)) != NULL)
+ kfree_skb(skb);
+
+ kmem_cache_free(fq_flow_cachep, f);
+ }
+ }
+ q->new_flows.first = NULL;
+ q->old_flows.first = NULL;
+ q->delayed = RB_ROOT;
+ q->flows = 0;
+ q->inactive_flows = 0;
+ q->throttled_flows = 0;
+}
+
+static void fq_rehash(struct fq_sched_data *q,
+ struct rb_root *old_array, u32 old_log,
+ struct rb_root *new_array, u32 new_log)
+{
+ struct rb_node *op, **np, *parent;
+ struct rb_root *oroot, *nroot;
+ struct fq_flow *of, *nf;
+ int fcnt = 0;
+ u32 idx;
+
+ for (idx = 0; idx < (1U << old_log); idx++) {
+ oroot = &old_array[idx];
+ while ((op = rb_first(oroot)) != NULL) {
+ rb_erase(op, oroot);
+ of = container_of(op, struct fq_flow, fq_node);
+ if (fq_gc_candidate(of)) {
+ fcnt++;
+ kmem_cache_free(fq_flow_cachep, of);
+ continue;
+ }
+ nroot = &new_array[hash_32((u32)(long)of->sk, new_log)];
+
+ np = &nroot->rb_node;
+ parent = NULL;
+ while (*np) {
+ parent = *np;
+
+ nf = container_of(parent, struct fq_flow, fq_node);
+ BUG_ON(nf->sk == of->sk);
+
+ if (nf->sk > of->sk)
+ np = &parent->rb_right;
+ else
+ np = &parent->rb_left;
+ }
+
+ rb_link_node(&of->fq_node, parent, np);
+ rb_insert_color(&of->fq_node, nroot);
+ }
+ }
+ q->flows -= fcnt;
+ q->inactive_flows -= fcnt;
+ q->stat_gc_flows += fcnt;
+}
+
+static int fq_resize(struct fq_sched_data *q, u32 log)
+{
+ struct rb_root *array;
+ u32 idx;
+
+ if (q->fq_root && log == q->fq_trees_log)
+ return 0;
+
+ array = kmalloc(sizeof(struct rb_root) << log, GFP_KERNEL);
+ if (!array)
+ return -ENOMEM;
+
+ for (idx = 0; idx < (1U << log); idx++)
+ array[idx] = RB_ROOT;
+
+ if (q->fq_root) {
+ fq_rehash(q, q->fq_root, q->fq_trees_log, array, log);
+ kfree(q->fq_root);
+ }
+ q->fq_root = array;
+ q->fq_trees_log = log;
+
+ return 0;
+}
+
+static const struct nla_policy fq_policy[TCA_FQ_MAX + 1] = {
+ [TCA_FQ_PLIMIT] = { .type = NLA_U32 },
+ [TCA_FQ_FLOW_PLIMIT] = { .type = NLA_U32 },
+ [TCA_FQ_QUANTUM] = { .type = NLA_U32 },
+ [TCA_FQ_INITIAL_QUANTUM] = { .type = NLA_U32 },
+ [TCA_FQ_RATE_ENABLE] = { .type = NLA_U32 },
+ [TCA_FQ_FLOW_DEFAULT_RATE] = { .type = NLA_U32 },
+ [TCA_FQ_FLOW_MAX_RATE] = { .type = NLA_U32 },
+ [TCA_FQ_BUCKETS_LOG] = { .type = NLA_U32 },
+};
+
+static int fq_change(struct Qdisc *sch, struct nlattr *opt)
+{
+ struct fq_sched_data *q = qdisc_priv(sch);
+ struct nlattr *tb[TCA_FQ_MAX + 1];
+ int err, drop_count = 0;
+ u32 fq_log;
+
+ if (!opt)
+ return -EINVAL;
+
+ err = nla_parse_nested(tb, TCA_FQ_MAX, opt, fq_policy);
+ if (err < 0)
+ return err;
+
+ sch_tree_lock(sch);
+
+ fq_log = q->fq_trees_log;
+
+ if (tb[TCA_FQ_BUCKETS_LOG]) {
+ u32 nval = nla_get_u32(tb[TCA_FQ_BUCKETS_LOG]);
+
+ if (nval >= 1 && nval <= ilog2(256*1024))
+ fq_log = nval;
+ else
+ err = -EINVAL;
+ }
+ if (tb[TCA_FQ_PLIMIT])
+ sch->limit = nla_get_u32(tb[TCA_FQ_PLIMIT]);
+
+ if (tb[TCA_FQ_FLOW_PLIMIT])
+ q->flow_plimit = nla_get_u32(tb[TCA_FQ_FLOW_PLIMIT]);
+
+ if (tb[TCA_FQ_QUANTUM])
+ q->quantum = nla_get_u32(tb[TCA_FQ_QUANTUM]);
+
+ if (tb[TCA_FQ_INITIAL_QUANTUM])
+ q->initial_quantum = nla_get_u32(tb[TCA_FQ_INITIAL_QUANTUM]);
+
+ if (tb[TCA_FQ_FLOW_DEFAULT_RATE])
+ q->flow_default_rate = nla_get_u32(tb[TCA_FQ_FLOW_DEFAULT_RATE]);
+
+ if (tb[TCA_FQ_FLOW_MAX_RATE])
+ q->flow_max_rate = nla_get_u32(tb[TCA_FQ_FLOW_MAX_RATE]);
+
+ if (tb[TCA_FQ_RATE_ENABLE]) {
+ u32 enable = nla_get_u32(tb[TCA_FQ_RATE_ENABLE]);
+
+ if (enable <= 1)
+ q->rate_enable = enable;
+ else
+ err = -EINVAL;
+ }
+
+ if (!err)
+ err = fq_resize(q, fq_log);
+
+ while (sch->q.qlen > sch->limit) {
+ struct sk_buff *skb = fq_dequeue(sch);
+
+ if (!skb)
+ break;
+ kfree_skb(skb);
+ drop_count++;
+ }
+ qdisc_tree_decrease_qlen(sch, drop_count);
+
+ sch_tree_unlock(sch);
+ return err;
+}
+
+static void fq_destroy(struct Qdisc *sch)
+{
+ struct fq_sched_data *q = qdisc_priv(sch);
+
+ fq_reset(sch);
+ kfree(q->fq_root);
+ qdisc_watchdog_cancel(&q->watchdog);
+}
+
+static int fq_init(struct Qdisc *sch, struct nlattr *opt)
+{
+ struct fq_sched_data *q = qdisc_priv(sch);
+ int err;
+
+ sch->limit = 10000;
+ q->flow_plimit = 100;
+ q->quantum = 2 * psched_mtu(qdisc_dev(sch));
+ q->initial_quantum = 10 * psched_mtu(qdisc_dev(sch));
+ q->flow_default_rate = 0;
+ q->flow_max_rate = ~0U;
+ q->rate_enable = 1;
+ q->new_flows.first = NULL;
+ q->old_flows.first = NULL;
+ q->delayed = RB_ROOT;
+ q->fq_root = NULL;
+ q->fq_trees_log = ilog2(1024);
+ qdisc_watchdog_init(&q->watchdog, sch);
+
+ if (opt)
+ err = fq_change(sch, opt);
+ else
+ err = fq_resize(q, q->fq_trees_log);
+
+ return err;
+}
+
+static int fq_dump(struct Qdisc *sch, struct sk_buff *skb)
+{
+ struct fq_sched_data *q = qdisc_priv(sch);
+ struct nlattr *opts;
+
+ opts = nla_nest_start(skb, TCA_OPTIONS);
+ if (opts == NULL)
+ goto nla_put_failure;
+
+ /* TCA_FQ_FLOW_DEFAULT_RATE is not used anymore,
+ * do not bother giving its value
+ */
+ if (nla_put_u32(skb, TCA_FQ_PLIMIT, sch->limit) ||
+ nla_put_u32(skb, TCA_FQ_FLOW_PLIMIT, q->flow_plimit) ||
+ nla_put_u32(skb, TCA_FQ_QUANTUM, q->quantum) ||
+ nla_put_u32(skb, TCA_FQ_INITIAL_QUANTUM, q->initial_quantum) ||
+ nla_put_u32(skb, TCA_FQ_RATE_ENABLE, q->rate_enable) ||
+ nla_put_u32(skb, TCA_FQ_FLOW_MAX_RATE, q->flow_max_rate) ||
+ nla_put_u32(skb, TCA_FQ_BUCKETS_LOG, q->fq_trees_log))
+ goto nla_put_failure;
+
+ nla_nest_end(skb, opts);
+ return skb->len;
+
+nla_put_failure:
+ return -1;
+}
+
+static int fq_dump_stats(struct Qdisc *sch, struct gnet_dump *d)
+{
+ struct fq_sched_data *q = qdisc_priv(sch);
+ u64 now = ktime_to_ns(ktime_get());
+ struct tc_fq_qd_stats st = {
+ .gc_flows = q->stat_gc_flows,
+ .highprio_packets = q->stat_internal_packets,
+ .tcp_retrans = q->stat_tcp_retrans,
+ .throttled = q->stat_throttled,
+ .flows_plimit = q->stat_flows_plimit,
+ .pkts_too_long = q->stat_pkts_too_long,
+ .allocation_errors = q->stat_allocation_errors,
+ .flows = q->flows,
+ .inactive_flows = q->inactive_flows,
+ .throttled_flows = q->throttled_flows,
+ .time_next_delayed_flow = q->time_next_delayed_flow - now,
+ };
+
+ return gnet_stats_copy_app(d, &st, sizeof(st));
+}
+
+static struct Qdisc_ops fq_qdisc_ops __read_mostly = {
+ .id = "fq",
+ .priv_size = sizeof(struct fq_sched_data),
+
+ .enqueue = fq_enqueue,
+ .dequeue = fq_dequeue,
+ .peek = qdisc_peek_dequeued,
+ .init = fq_init,
+ .reset = fq_reset,
+ .destroy = fq_destroy,
+ .change = fq_change,
+ .dump = fq_dump,
+ .dump_stats = fq_dump_stats,
+ .owner = THIS_MODULE,
+};
+
+static int __init fq_module_init(void)
+{
+ int ret;
+
+ fq_flow_cachep = kmem_cache_create("fq_flow_cache",
+ sizeof(struct fq_flow),
+ 0, 0, NULL);
+ if (!fq_flow_cachep)
+ return -ENOMEM;
+
+ ret = register_qdisc(&fq_qdisc_ops);
+ if (ret)
+ kmem_cache_destroy(fq_flow_cachep);
+ return ret;
+}
+
+static void __exit fq_module_exit(void)
+{
+ unregister_qdisc(&fq_qdisc_ops);
+ kmem_cache_destroy(fq_flow_cachep);
+}
+
+module_init(fq_module_init)
+module_exit(fq_module_exit)
+MODULE_AUTHOR("Eric Dumazet");
+MODULE_LICENSE("GPL");
diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c
index 48be3d5c0d92..a74e278654aa 100644
--- a/net/sched/sch_generic.c
+++ b/net/sched/sch_generic.c
@@ -30,6 +30,10 @@
#include <net/pkt_sched.h>
#include <net/dst.h>
+/* Qdisc to use by default */
+const struct Qdisc_ops *default_qdisc_ops = &pfifo_fast_ops;
+EXPORT_SYMBOL(default_qdisc_ops);
+
/* Main transmission queue. */
/* Modifications to data participating in scheduling must be protected with
@@ -530,12 +534,11 @@ struct Qdisc_ops pfifo_fast_ops __read_mostly = {
.dump = pfifo_fast_dump,
.owner = THIS_MODULE,
};
-EXPORT_SYMBOL(pfifo_fast_ops);
static struct lock_class_key qdisc_tx_busylock;
struct Qdisc *qdisc_alloc(struct netdev_queue *dev_queue,
- struct Qdisc_ops *ops)
+ const struct Qdisc_ops *ops)
{
void *p;
struct Qdisc *sch;
@@ -579,10 +582,14 @@ errout:
}
struct Qdisc *qdisc_create_dflt(struct netdev_queue *dev_queue,
- struct Qdisc_ops *ops, unsigned int parentid)
+ const struct Qdisc_ops *ops,
+ unsigned int parentid)
{
struct Qdisc *sch;
+ if (!try_module_get(ops->owner))
+ goto errout;
+
sch = qdisc_alloc(dev_queue, ops);
if (IS_ERR(sch))
goto errout;
@@ -686,7 +693,7 @@ static void attach_one_default_qdisc(struct net_device *dev,
if (dev->tx_queue_len) {
qdisc = qdisc_create_dflt(dev_queue,
- &pfifo_fast_ops, TC_H_ROOT);
+ default_qdisc_ops, TC_H_ROOT);
if (!qdisc) {
netdev_info(dev, "activation failed\n");
return;
@@ -739,9 +746,8 @@ void dev_activate(struct net_device *dev)
int need_watchdog;
/* No queueing discipline is attached to device;
- create default one i.e. pfifo_fast for devices,
- which need queueing and noqueue_qdisc for
- virtual interfaces
+ * create default one for devices, which need queueing
+ * and noqueue_qdisc for virtual interfaces
*/
if (dev->qdisc == &noop_qdisc)
diff --git a/net/sched/sch_htb.c b/net/sched/sch_htb.c
index c2178b15ca6e..863846cc5513 100644
--- a/net/sched/sch_htb.c
+++ b/net/sched/sch_htb.c
@@ -1495,7 +1495,7 @@ static int htb_change_class(struct Qdisc *sch, u32 classid,
psched_ratecfg_precompute(&cl->ceil, &hopt->ceil);
cl->buffer = PSCHED_TICKS2NS(hopt->buffer);
- cl->cbuffer = PSCHED_TICKS2NS(hopt->buffer);
+ cl->cbuffer = PSCHED_TICKS2NS(hopt->cbuffer);
sch_tree_unlock(sch);
diff --git a/net/sched/sch_mq.c b/net/sched/sch_mq.c
index 5da78a19ac9a..2e56185736d6 100644
--- a/net/sched/sch_mq.c
+++ b/net/sched/sch_mq.c
@@ -57,7 +57,7 @@ static int mq_init(struct Qdisc *sch, struct nlattr *opt)
for (ntx = 0; ntx < dev->num_tx_queues; ntx++) {
dev_queue = netdev_get_tx_queue(dev, ntx);
- qdisc = qdisc_create_dflt(dev_queue, &pfifo_fast_ops,
+ qdisc = qdisc_create_dflt(dev_queue, default_qdisc_ops,
TC_H_MAKE(TC_H_MAJ(sch->handle),
TC_H_MIN(ntx + 1)));
if (qdisc == NULL)
diff --git a/net/sched/sch_mqprio.c b/net/sched/sch_mqprio.c
index accec33c454c..d44c868cb537 100644
--- a/net/sched/sch_mqprio.c
+++ b/net/sched/sch_mqprio.c
@@ -124,7 +124,7 @@ static int mqprio_init(struct Qdisc *sch, struct nlattr *opt)
for (i = 0; i < dev->num_tx_queues; i++) {
dev_queue = netdev_get_tx_queue(dev, i);
- qdisc = qdisc_create_dflt(dev_queue, &pfifo_fast_ops,
+ qdisc = qdisc_create_dflt(dev_queue, default_qdisc_ops,
TC_H_MAKE(TC_H_MAJ(sch->handle),
TC_H_MIN(i + 1)));
if (qdisc == NULL) {
diff --git a/net/sched/sch_netem.c b/net/sched/sch_netem.c
index 82f6016d89ab..b87e83d07478 100644
--- a/net/sched/sch_netem.c
+++ b/net/sched/sch_netem.c
@@ -358,6 +358,21 @@ static psched_time_t packet_len_2_sched_time(unsigned int len, struct netem_sche
return PSCHED_NS2TICKS(ticks);
}
+static void tfifo_reset(struct Qdisc *sch)
+{
+ struct netem_sched_data *q = qdisc_priv(sch);
+ struct rb_node *p;
+
+ while ((p = rb_first(&q->t_root))) {
+ struct sk_buff *skb = netem_rb_to_skb(p);
+
+ rb_erase(p, &q->t_root);
+ skb->next = NULL;
+ skb->prev = NULL;
+ kfree_skb(skb);
+ }
+}
+
static void tfifo_enqueue(struct sk_buff *nskb, struct Qdisc *sch)
{
struct netem_sched_data *q = qdisc_priv(sch);
@@ -412,12 +427,9 @@ static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch)
/* If a delay is expected, orphan the skb. (orphaning usually takes
* place at TX completion time, so _before_ the link transit delay)
- * Ideally, this orphaning should be done after the rate limiting
- * module, because this breaks TCP Small Queue, and other mechanisms
- * based on socket sk_wmem_alloc.
*/
if (q->latency || q->jitter)
- skb_orphan(skb);
+ skb_orphan_partial(skb);
/*
* If we need to duplicate packet, then re-insert at top of the
@@ -523,6 +535,7 @@ static unsigned int netem_drop(struct Qdisc *sch)
skb->next = NULL;
skb->prev = NULL;
len = qdisc_pkt_len(skb);
+ sch->qstats.backlog -= len;
kfree_skb(skb);
}
}
@@ -612,6 +625,7 @@ static void netem_reset(struct Qdisc *sch)
struct netem_sched_data *q = qdisc_priv(sch);
qdisc_reset_queue(sch);
+ tfifo_reset(sch);
if (q->qdisc)
qdisc_reset(q->qdisc);
qdisc_watchdog_cancel(&q->watchdog);
diff --git a/net/sctp/associola.c b/net/sctp/associola.c
index ab67efc64b24..cef509985192 100644
--- a/net/sctp/associola.c
+++ b/net/sctp/associola.c
@@ -28,10 +28,7 @@
*
* Please send any bug reports or fixes you make to the
* email address(es):
- * lksctp developers <lksctp-developers@lists.sourceforge.net>
- *
- * Or submit a bug report through the following website:
- * http://www.sf.net/projects/lksctp
+ * lksctp developers <linux-sctp@vger.kernel.org>
*
* Written or modified by:
* La Monte H.P. Yarroll <piggy@acm.org>
@@ -43,9 +40,6 @@
* Daisy Chang <daisyc@us.ibm.com>
* Ryan Layer <rmlayer@us.ibm.com>
* Kevin Gao <kevin.gao@intel.com>
- *
- * Any bugs reported given to us we will try to fix... any fixes shared will
- * be incorporated into the next SCTP release.
*/
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
diff --git a/net/sctp/auth.c b/net/sctp/auth.c
index ba1dfc3f8def..8c4fa5dec824 100644
--- a/net/sctp/auth.c
+++ b/net/sctp/auth.c
@@ -22,16 +22,10 @@
*
* Please send any bug reports or fixes you make to the
* email address(es):
- * lksctp developers <lksctp-developers@lists.sourceforge.net>
- *
- * Or submit a bug report through the following website:
- * http://www.sf.net/projects/lksctp
+ * lksctp developers <linux-sctp@vger.kernel.org>
*
* Written or modified by:
* Vlad Yasevich <vladislav.yasevich@hp.com>
- *
- * Any bugs reported given to us we will try to fix... any fixes shared will
- * be incorporated into the next SCTP release.
*/
#include <linux/slab.h>
diff --git a/net/sctp/bind_addr.c b/net/sctp/bind_addr.c
index 64977ea0f9c5..077bb070052b 100644
--- a/net/sctp/bind_addr.c
+++ b/net/sctp/bind_addr.c
@@ -27,19 +27,13 @@
*
* Please send any bug reports or fixes you make to the
* email address(es):
- * lksctp developers <lksctp-developers@lists.sourceforge.net>
- *
- * Or submit a bug report through the following website:
- * http://www.sf.net/projects/lksctp
+ * lksctp developers <linux-sctp@vger.kernel.org>
*
* Written or modified by:
* La Monte H.P. Yarroll <piggy@acm.org>
* Karl Knutson <karl@athena.chicago.il.us>
* Jon Grimm <jgrimm@us.ibm.com>
* Daisy Chang <daisyc@us.ibm.com>
- *
- * Any bugs reported given to us we will try to fix... any fixes shared will
- * be incorporated into the next SCTP release.
*/
#include <linux/types.h>
diff --git a/net/sctp/chunk.c b/net/sctp/chunk.c
index 5780565f5b7d..7bd5ed4a8657 100644
--- a/net/sctp/chunk.c
+++ b/net/sctp/chunk.c
@@ -24,17 +24,11 @@
*
* Please send any bug reports or fixes you make to the
* email address(es):
- * lksctp developers <lksctp-developers@lists.sourceforge.net>
- *
- * Or submit a bug report through the following website:
- * http://www.sf.net/projects/lksctp
+ * lksctp developers <linux-sctp@vger.kernel.org>
*
* Written or modified by:
* Jon Grimm <jgrimm@us.ibm.com>
* Sridhar Samudrala <sri@us.ibm.com>
- *
- * Any bugs reported given to us we will try to fix... any fixes shared will
- * be incorporated into the next SCTP release.
*/
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
@@ -201,9 +195,9 @@ struct sctp_datamsg *sctp_datamsg_from_user(struct sctp_association *asoc,
/* This is the biggest possible DATA chunk that can fit into
* the packet
*/
- max_data = asoc->pathmtu -
+ max_data = (asoc->pathmtu -
sctp_sk(asoc->base.sk)->pf->af->net_header_len -
- sizeof(struct sctphdr) - sizeof(struct sctp_data_chunk);
+ sizeof(struct sctphdr) - sizeof(struct sctp_data_chunk)) & ~3;
max = asoc->frag_point;
/* If the the peer requested that we authenticate DATA chunks
diff --git a/net/sctp/command.c b/net/sctp/command.c
index c0044019db9e..3d9a9ff69c03 100644
--- a/net/sctp/command.c
+++ b/net/sctp/command.c
@@ -25,17 +25,11 @@
*
* Please send any bug reports or fixes you make to the
* email address(es):
- * lksctp developers <lksctp-developers@lists.sourceforge.net>
- *
- * Or submit a bug report through the following website:
- * http://www.sf.net/projects/lksctp
+ * lksctp developers <linux-sctp@vger.kernel.org>
*
* Written or modified by:
* La Monte H.P. Yarroll <piggy@acm.org>
* Karl Knutson <karl@athena.chicago.il.us>
- *
- * Any bugs reported given to us we will try to fix... any fixes shared will
- * be incorporated into the next SCTP release.
*/
#include <linux/types.h>
diff --git a/net/sctp/debug.c b/net/sctp/debug.c
index f4998780d6df..e89015d8935a 100644
--- a/net/sctp/debug.c
+++ b/net/sctp/debug.c
@@ -28,10 +28,7 @@
*
* Please send any bug reports or fixes you make to the
* email address(es):
- * lksctp developers <lksctp-developers@lists.sourceforge.net>
- *
- * Or submit a bug report through the following website:
- * http://www.sf.net/projects/lksctp
+ * lksctp developers <linux-sctp@vger.kernel.org>
*
* Written or modified by:
* La Monte H.P. Yarroll <piggy@acm.org>
@@ -40,9 +37,6 @@
* Jon Grimm <jgrimm@us.ibm.com>
* Daisy Chang <daisyc@us.ibm.com>
* Sridhar Samudrala <sri@us.ibm.com>
- *
- * Any bugs reported given to us we will try to fix... any fixes shared will
- * be incorporated into the next SCTP release.
*/
#include <net/sctp/sctp.h>
diff --git a/net/sctp/endpointola.c b/net/sctp/endpointola.c
index 9e3d257de0e0..09b8daac87c8 100644
--- a/net/sctp/endpointola.c
+++ b/net/sctp/endpointola.c
@@ -29,10 +29,7 @@
*
* Please send any bug reports or fixes you make to the
* email address(es):
- * lksctp developers <lksctp-developers@lists.sourceforge.net>
- *
- * Or submit a bug report through the following website:
- * http://www.sf.net/projects/lksctp
+ * lksctp developers <linux-sctp@vger.kernel.org>
*
* Written or modified by:
* La Monte H.P. Yarroll <piggy@acm.org>
@@ -40,9 +37,6 @@
* Jon Grimm <jgrimm@austin.ibm.com>
* Daisy Chang <daisyc@us.ibm.com>
* Dajiang Zhang <dajiang.zhang@nokia.com>
- *
- * Any bugs reported given to us we will try to fix... any fixes shared will
- * be incorporated into the next SCTP release.
*/
#include <linux/types.h>
diff --git a/net/sctp/input.c b/net/sctp/input.c
index 3fa4d858c35a..98b69bbecdd9 100644
--- a/net/sctp/input.c
+++ b/net/sctp/input.c
@@ -29,10 +29,7 @@
*
* Please send any bug reports or fixes you make to the
* email address(es):
- * lksctp developers <lksctp-developers@lists.sourceforge.net>
- *
- * Or submit a bug report through the following website:
- * http://www.sf.net/projects/lksctp
+ * lksctp developers <linux-sctp@vger.kernel.org>
*
* Written or modified by:
* La Monte H.P. Yarroll <piggy@acm.org>
@@ -43,9 +40,6 @@
* Daisy Chang <daisyc@us.ibm.com>
* Sridhar Samudrala <sri@us.ibm.com>
* Ardelle Fan <ardelle.fan@intel.com>
- *
- * Any bugs reported given to us we will try to fix... any fixes shared will
- * be incorporated into the next SCTP release.
*/
#include <linux/types.h>
@@ -87,15 +81,7 @@ static inline int sctp_rcv_checksum(struct net *net, struct sk_buff *skb)
{
struct sctphdr *sh = sctp_hdr(skb);
__le32 cmp = sh->checksum;
- struct sk_buff *list;
- __le32 val;
- __u32 tmp = sctp_start_cksum((__u8 *)sh, skb_headlen(skb));
-
- skb_walk_frags(skb, list)
- tmp = sctp_update_cksum((__u8 *)list->data, skb_headlen(list),
- tmp);
-
- val = sctp_end_cksum(tmp);
+ __le32 val = sctp_compute_cksum(skb, 0);
if (val != cmp) {
/* CRC failure, dump it. */
@@ -648,8 +634,7 @@ void sctp_v4_err(struct sk_buff *skb, __u32 info)
break;
case ICMP_REDIRECT:
sctp_icmp_redirect(sk, transport, skb);
- err = 0;
- break;
+ /* Fall through to out_unlock. */
default:
goto out_unlock;
}
diff --git a/net/sctp/inqueue.c b/net/sctp/inqueue.c
index cb25f040fed0..5856932fdc38 100644
--- a/net/sctp/inqueue.c
+++ b/net/sctp/inqueue.c
@@ -30,17 +30,11 @@
*
* Please send any bug reports or fixes you make to the
* email address(es):
- * lksctp developers <lksctp-developers@lists.sourceforge.net>
- *
- * Or submit a bug report through the following website:
- * http://www.sf.net/projects/lksctp
+ * lksctp developers <linux-sctp@vger.kernel.org>
*
* Written or modified by:
* La Monte H.P. Yarroll <piggy@acm.org>
* Karl Knutson <karl@athena.chicago.il.us>
- *
- * Any bugs reported given to us we will try to fix... any fixes shared will
- * be incorporated into the next SCTP release.
*/
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
diff --git a/net/sctp/ipv6.c b/net/sctp/ipv6.c
index 09ffcc912d23..e7b2d4fe2b6a 100644
--- a/net/sctp/ipv6.c
+++ b/net/sctp/ipv6.c
@@ -27,10 +27,7 @@
*
* Please send any bug reports or fixes you make to the
* email address(es):
- * lksctp developers <lksctp-developers@lists.sourceforge.net>
- *
- * Or submit a bug report through the following website:
- * http://www.sf.net/projects/lksctp
+ * lksctp developers <linux-sctp@vger.kernel.org>
*
* Written or modified by:
* Le Yanqun <yanqun.le@nokia.com>
@@ -42,9 +39,6 @@
*
* Based on:
* linux/net/ipv6/tcp_ipv6.c
- *
- * Any bugs reported given to us we will try to fix... any fixes shared will
- * be incorporated into the next SCTP release.
*/
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
@@ -189,7 +183,7 @@ static void sctp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
break;
case NDISC_REDIRECT:
sctp_icmp_redirect(sk, transport, skb);
- break;
+ goto out_unlock;
default:
break;
}
@@ -210,44 +204,23 @@ out:
in6_dev_put(idev);
}
-/* Based on tcp_v6_xmit() in tcp_ipv6.c. */
static int sctp_v6_xmit(struct sk_buff *skb, struct sctp_transport *transport)
{
struct sock *sk = skb->sk;
struct ipv6_pinfo *np = inet6_sk(sk);
- struct flowi6 fl6;
-
- memset(&fl6, 0, sizeof(fl6));
-
- fl6.flowi6_proto = sk->sk_protocol;
-
- /* Fill in the dest address from the route entry passed with the skb
- * and the source address from the transport.
- */
- fl6.daddr = transport->ipaddr.v6.sin6_addr;
- fl6.saddr = transport->saddr.v6.sin6_addr;
-
- fl6.flowlabel = np->flow_label;
- IP6_ECN_flow_xmit(sk, fl6.flowlabel);
- if (ipv6_addr_type(&fl6.saddr) & IPV6_ADDR_LINKLOCAL)
- fl6.flowi6_oif = transport->saddr.v6.sin6_scope_id;
- else
- fl6.flowi6_oif = sk->sk_bound_dev_if;
-
- if (np->opt && np->opt->srcrt) {
- struct rt0_hdr *rt0 = (struct rt0_hdr *) np->opt->srcrt;
- fl6.daddr = *rt0->addr;
- }
+ struct flowi6 *fl6 = &transport->fl.u.ip6;
pr_debug("%s: skb:%p, len:%d, src:%pI6 dst:%pI6\n", __func__, skb,
- skb->len, &fl6.saddr, &fl6.daddr);
+ skb->len, &fl6->saddr, &fl6->daddr);
- SCTP_INC_STATS(sock_net(sk), SCTP_MIB_OUTSCTPPACKS);
+ IP6_ECN_flow_xmit(sk, fl6->flowlabel);
if (!(transport->param_flags & SPP_PMTUD_ENABLE))
skb->local_df = 1;
- return ip6_xmit(sk, skb, &fl6, np->opt, np->tclass);
+ SCTP_INC_STATS(sock_net(sk), SCTP_MIB_OUTSCTPPACKS);
+
+ return ip6_xmit(sk, skb, fl6, np->opt, np->tclass);
}
/* Returns the dst cache entry for the given source and destination ip
@@ -260,10 +233,12 @@ static void sctp_v6_get_dst(struct sctp_transport *t, union sctp_addr *saddr,
struct dst_entry *dst = NULL;
struct flowi6 *fl6 = &fl->u.ip6;
struct sctp_bind_addr *bp;
+ struct ipv6_pinfo *np = inet6_sk(sk);
struct sctp_sockaddr_entry *laddr;
union sctp_addr *baddr = NULL;
union sctp_addr *daddr = &t->ipaddr;
union sctp_addr dst_saddr;
+ struct in6_addr *final_p, final;
__u8 matchlen = 0;
__u8 bmatchlen;
sctp_scope_t scope;
@@ -287,7 +262,8 @@ static void sctp_v6_get_dst(struct sctp_transport *t, union sctp_addr *saddr,
pr_debug("src=%pI6 - ", &fl6->saddr);
}
- dst = ip6_dst_lookup_flow(sk, fl6, NULL, false);
+ final_p = fl6_update_dst(fl6, np->opt, &final);
+ dst = ip6_dst_lookup_flow(sk, fl6, final_p, false);
if (!asoc || saddr)
goto out;
@@ -339,10 +315,12 @@ static void sctp_v6_get_dst(struct sctp_transport *t, union sctp_addr *saddr,
}
}
rcu_read_unlock();
+
if (baddr) {
fl6->saddr = baddr->v6.sin6_addr;
fl6->fl6_sport = baddr->v6.sin6_port;
- dst = ip6_dst_lookup_flow(sk, fl6, NULL, false);
+ final_p = fl6_update_dst(fl6, np->opt, &final);
+ dst = ip6_dst_lookup_flow(sk, fl6, final_p, false);
}
out:
@@ -351,7 +329,7 @@ out:
rt = (struct rt6_info *)dst;
t->dst = dst;
-
+ t->dst_cookie = rt->rt6i_node ? rt->rt6i_node->fn_sernum : 0;
pr_debug("rt6_dst:%pI6 rt6_src:%pI6\n", &rt->rt6i_dst.addr,
&fl6->saddr);
} else {
diff --git a/net/sctp/objcnt.c b/net/sctp/objcnt.c
index fe012c44f8df..5ea573b37648 100644
--- a/net/sctp/objcnt.c
+++ b/net/sctp/objcnt.c
@@ -26,16 +26,10 @@
*
* Please send any bug reports or fixes you make to the
* email address(es):
- * lksctp developers <lksctp-developers@lists.sourceforge.net>
- *
- * Or submit a bug report through the following website:
- * http://www.sf.net/projects/lksctp
+ * lksctp developers <linux-sctp@vger.kernel.org>
*
* Written or modified by:
* Jon Grimm <jgrimm@us.ibm.com>
- *
- * Any bugs reported given to us we will try to fix... any fixes shared will
- * be incorporated into the next SCTP release.
*/
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
diff --git a/net/sctp/output.c b/net/sctp/output.c
index a46d1eb41762..319137340d15 100644
--- a/net/sctp/output.c
+++ b/net/sctp/output.c
@@ -26,19 +26,13 @@
*
* Please send any bug reports or fixes you make to the
* email address(es):
- * lksctp developers <lksctp-developers@lists.sourceforge.net>
- *
- * Or submit a bug report through the following website:
- * http://www.sf.net/projects/lksctp
+ * lksctp developers <linux-sctp@vger.kernel.org>
*
* Written or modified by:
* La Monte H.P. Yarroll <piggy@acm.org>
* Karl Knutson <karl@athena.chicago.il.us>
* Jon Grimm <jgrimm@austin.ibm.com>
* Sridhar Samudrala <sri@us.ibm.com>
- *
- * Any bugs reported given to us we will try to fix... any fixes shared will
- * be incorporated into the next SCTP release.
*/
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
@@ -542,7 +536,8 @@ int sctp_packet_transmit(struct sctp_packet *packet)
* by CRC32-C as described in <draft-ietf-tsvwg-sctpcsum-02.txt>.
*/
if (!sctp_checksum_disable) {
- if (!(dst->dev->features & NETIF_F_SCTP_CSUM)) {
+ if (!(dst->dev->features & NETIF_F_SCTP_CSUM) ||
+ (dst_xfrm(dst) != NULL) || packet->ipfragok) {
__u32 crc32 = sctp_start_cksum((__u8 *)sh, cksum_buf_len);
/* 3) Put the resultant value into the checksum field in the
diff --git a/net/sctp/outqueue.c b/net/sctp/outqueue.c
index ef9e2bbc0f2f..94df75877869 100644
--- a/net/sctp/outqueue.c
+++ b/net/sctp/outqueue.c
@@ -28,10 +28,7 @@
*
* Please send any bug reports or fixes you make to the
* email address(es):
- * lksctp developers <lksctp-developers@lists.sourceforge.net>
- *
- * Or submit a bug report through the following website:
- * http://www.sf.net/projects/lksctp
+ * lksctp developers <linux-sctp@vger.kernel.org>
*
* Written or modified by:
* La Monte H.P. Yarroll <piggy@acm.org>
@@ -41,9 +38,6 @@
* Hui Huang <hui.huang@nokia.com>
* Sridhar Samudrala <sri@us.ibm.com>
* Jon Grimm <jgrimm@us.ibm.com>
- *
- * Any bugs reported given to us we will try to fix... any fixes shared will
- * be incorporated into the next SCTP release.
*/
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
diff --git a/net/sctp/primitive.c b/net/sctp/primitive.c
index 794bb14decde..ce1ffd811775 100644
--- a/net/sctp/primitive.c
+++ b/net/sctp/primitive.c
@@ -29,10 +29,7 @@
*
* Please send any bug reports or fixes you make to the
* email address(es):
- * lksctp developers <lksctp-developers@lists.sourceforge.net>
- *
- * Or submit a bug report through the following website:
- * http://www.sf.net/projects/lksctp
+ * lksctp developers <linux-sctp@vger.kernel.org>
*
* Written or modified by:
* La Monte H.P. Yarroll <piggy@acm.org>
@@ -40,9 +37,6 @@
* Karl Knutson <karl@athena.chicago.il.us>
* Ardelle Fan <ardelle.fan@intel.com>
* Kevin Gao <kevin.gao@intel.com>
- *
- * Any bugs reported given to us we will try to fix... any fixes shared will
- * be incorporated into the next SCTP release.
*/
#include <linux/types.h>
diff --git a/net/sctp/probe.c b/net/sctp/probe.c
index e62c22535be4..53c452efb40b 100644
--- a/net/sctp/probe.c
+++ b/net/sctp/probe.c
@@ -46,6 +46,10 @@ static int port __read_mostly = 0;
MODULE_PARM_DESC(port, "Port to match (0=all)");
module_param(port, int, 0);
+static unsigned int fwmark __read_mostly = 0;
+MODULE_PARM_DESC(fwmark, "skb mark to match (0=no mark)");
+module_param(fwmark, uint, 0);
+
static int bufsize __read_mostly = 64 * 1024;
MODULE_PARM_DESC(bufsize, "Log buffer size (default 64k)");
module_param(bufsize, int, 0);
@@ -129,15 +133,19 @@ static sctp_disposition_t jsctp_sf_eat_sack(struct net *net,
void *arg,
sctp_cmd_seq_t *commands)
{
+ struct sctp_chunk *chunk = arg;
+ struct sk_buff *skb = chunk->skb;
struct sctp_transport *sp;
static __u32 lcwnd = 0;
struct timespec now;
sp = asoc->peer.primary_path;
- if ((full || sp->cwnd != lcwnd) &&
- (!port || asoc->peer.port == port ||
- ep->base.bind_addr.port == port)) {
+ if (((port == 0 && fwmark == 0) ||
+ asoc->peer.port == port ||
+ ep->base.bind_addr.port == port ||
+ (fwmark > 0 && skb->mark == fwmark)) &&
+ (full || sp->cwnd != lcwnd)) {
lcwnd = sp->cwnd;
getnstimeofday(&now);
@@ -155,13 +163,8 @@ static sctp_disposition_t jsctp_sf_eat_sack(struct net *net,
if (sp == asoc->peer.primary_path)
printl("*");
- if (sp->ipaddr.sa.sa_family == AF_INET)
- printl("%pI4 ", &sp->ipaddr.v4.sin_addr);
- else
- printl("%pI6 ", &sp->ipaddr.v6.sin6_addr);
-
- printl("%2u %8u %8u %8u %8u %8u ",
- sp->state, sp->cwnd, sp->ssthresh,
+ printl("%pISc %2u %8u %8u %8u %8u %8u ",
+ &sp->ipaddr, sp->state, sp->cwnd, sp->ssthresh,
sp->flight_size, sp->partial_bytes_acked,
sp->pathmtu);
}
@@ -203,8 +206,8 @@ static __init int sctpprobe_init(void)
if (ret)
goto remove_proc;
- pr_info("probe registered (port=%d)\n", port);
-
+ pr_info("probe registered (port=%d/fwmark=%u) bufsize=%u\n",
+ port, fwmark, bufsize);
return 0;
remove_proc:
diff --git a/net/sctp/proc.c b/net/sctp/proc.c
index 62526c477050..0c0642156842 100644
--- a/net/sctp/proc.c
+++ b/net/sctp/proc.c
@@ -22,16 +22,10 @@
*
* Please send any bug reports or fixes you make to the
* email address(es):
- * lksctp developers <lksctp-developers@lists.sourceforge.net>
- *
- * Or submit a bug report through the following website:
- * http://www.sf.net/projects/lksctp
+ * lksctp developers <linux-sctp@vger.kernel.org>
*
* Written or modified by:
* Sridhar Samudrala <sri@us.ibm.com>
- *
- * Any bugs reported given to us we will try to fix... any fixes shared will
- * be incorporated into the next SCTP release.
*/
#include <linux/types.h>
@@ -232,7 +226,7 @@ static int sctp_eps_seq_show(struct seq_file *seq, void *v)
sk = epb->sk;
if (!net_eq(sock_net(sk), seq_file_net(seq)))
continue;
- seq_printf(seq, "%8pK %8pK %-3d %-3d %-4d %-5d %5d %5lu ", ep, sk,
+ seq_printf(seq, "%8pK %8pK %-3d %-3d %-4d %-5d %5u %5lu ", ep, sk,
sctp_sk(sk)->type, sk->sk_state, hash,
epb->bind_addr.port,
from_kuid_munged(seq_user_ns(seq), sock_i_uid(sk)),
@@ -342,7 +336,7 @@ static int sctp_assocs_seq_show(struct seq_file *seq, void *v)
continue;
seq_printf(seq,
"%8pK %8pK %-3d %-3d %-2d %-4d "
- "%4d %8d %8d %7d %5lu %-5d %5d ",
+ "%4d %8d %8d %7u %5lu %-5d %5d ",
assoc, sk, sctp_sk(sk)->type, sk->sk_state,
assoc->state, hash,
assoc->assoc_id,
diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c
index 4a17494d736c..5e17092f4ada 100644
--- a/net/sctp/protocol.c
+++ b/net/sctp/protocol.c
@@ -29,10 +29,7 @@
*
* Please send any bug reports or fixes you make to the
* email address(es):
- * lksctp developers <lksctp-developers@lists.sourceforge.net>
- *
- * Or submit a bug report through the following website:
- * http://www.sf.net/projects/lksctp
+ * lksctp developers <linux-sctp@vger.kernel.org>
*
* Written or modified by:
* La Monte H.P. Yarroll <piggy@acm.org>
@@ -41,9 +38,6 @@
* Sridhar Samudrala <sri@us.ibm.com>
* Daisy Chang <daisyc@us.ibm.com>
* Ardelle Fan <ardelle.fan@intel.com>
- *
- * Any bugs reported given to us we will try to fix... any fixes shared will
- * be incorporated into the next SCTP release.
*/
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
@@ -1547,7 +1541,7 @@ module_exit(sctp_exit);
*/
MODULE_ALIAS("net-pf-" __stringify(PF_INET) "-proto-132");
MODULE_ALIAS("net-pf-" __stringify(PF_INET6) "-proto-132");
-MODULE_AUTHOR("Linux Kernel SCTP developers <lksctp-developers@lists.sourceforge.net>");
+MODULE_AUTHOR("Linux Kernel SCTP developers <linux-sctp@vger.kernel.org>");
MODULE_DESCRIPTION("Support for the SCTP protocol (RFC2960)");
module_param_named(no_checksums, sctp_checksum_disable, bool, 0644);
MODULE_PARM_DESC(no_checksums, "Disable checksums computing and verification");
diff --git a/net/sctp/sm_make_chunk.c b/net/sctp/sm_make_chunk.c
index 362ae6e2fd93..d244a23ab8d3 100644
--- a/net/sctp/sm_make_chunk.c
+++ b/net/sctp/sm_make_chunk.c
@@ -29,10 +29,7 @@
*
* Please send any bug reports or fixes you make to the
* email address(es):
- * lksctp developers <lksctp-developers@lists.sourceforge.net>
- *
- * Or submit a bug report through the following website:
- * http://www.sf.net/projects/lksctp
+ * lksctp developers <linux-sctp@vger.kernel.org>
*
* Written or modified by:
* La Monte H.P. Yarroll <piggy@acm.org>
@@ -45,9 +42,6 @@
* Daisy Chang <daisyc@us.ibm.com>
* Ardelle Fan <ardelle.fan@intel.com>
* Kevin Gao <kevin.gao@intel.com>
- *
- * Any bugs reported given to us we will try to fix... any fixes shared will
- * be incorporated into the next SCTP release.
*/
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
@@ -68,8 +62,12 @@
#include <net/sctp/sctp.h>
#include <net/sctp/sm.h>
-static struct sctp_chunk *sctp_make_chunk(const struct sctp_association *asoc,
- __u8 type, __u8 flags, int paylen);
+static struct sctp_chunk *sctp_make_control(const struct sctp_association *asoc,
+ __u8 type, __u8 flags, int paylen);
+static struct sctp_chunk *sctp_make_data(const struct sctp_association *asoc,
+ __u8 flags, int paylen);
+static struct sctp_chunk *_sctp_make_chunk(const struct sctp_association *asoc,
+ __u8 type, __u8 flags, int paylen);
static sctp_cookie_param_t *sctp_pack_cookie(const struct sctp_endpoint *ep,
const struct sctp_association *asoc,
const struct sctp_chunk *init_chunk,
@@ -82,6 +80,28 @@ static int sctp_process_param(struct sctp_association *asoc,
static void *sctp_addto_param(struct sctp_chunk *chunk, int len,
const void *data);
+/* Control chunk destructor */
+static void sctp_control_release_owner(struct sk_buff *skb)
+{
+ /*TODO: do memory release */
+}
+
+static void sctp_control_set_owner_w(struct sctp_chunk *chunk)
+{
+ struct sctp_association *asoc = chunk->asoc;
+ struct sk_buff *skb = chunk->skb;
+
+ /* TODO: properly account for control chunks.
+ * To do it right we'll need:
+ * 1) endpoint if association isn't known.
+ * 2) proper memory accounting.
+ *
+ * For now don't do anything for now.
+ */
+ skb->sk = asoc ? asoc->base.sk : NULL;
+ skb->destructor = sctp_control_release_owner;
+}
+
/* What was the inbound interface for this chunk? */
int sctp_chunk_iif(const struct sctp_chunk *chunk)
{
@@ -296,7 +316,7 @@ struct sctp_chunk *sctp_make_init(const struct sctp_association *asoc,
* PLEASE DO NOT FIXME [This version does not support Host Name.]
*/
- retval = sctp_make_chunk(asoc, SCTP_CID_INIT, 0, chunksize);
+ retval = sctp_make_control(asoc, SCTP_CID_INIT, 0, chunksize);
if (!retval)
goto nodata;
@@ -443,7 +463,7 @@ struct sctp_chunk *sctp_make_init_ack(const struct sctp_association *asoc,
num_ext);
/* Now allocate and fill out the chunk. */
- retval = sctp_make_chunk(asoc, SCTP_CID_INIT_ACK, 0, chunksize);
+ retval = sctp_make_control(asoc, SCTP_CID_INIT_ACK, 0, chunksize);
if (!retval)
goto nomem_chunk;
@@ -548,7 +568,7 @@ struct sctp_chunk *sctp_make_cookie_echo(const struct sctp_association *asoc,
cookie_len = asoc->peer.cookie_len;
/* Build a cookie echo chunk. */
- retval = sctp_make_chunk(asoc, SCTP_CID_COOKIE_ECHO, 0, cookie_len);
+ retval = sctp_make_control(asoc, SCTP_CID_COOKIE_ECHO, 0, cookie_len);
if (!retval)
goto nodata;
retval->subh.cookie_hdr =
@@ -593,7 +613,7 @@ struct sctp_chunk *sctp_make_cookie_ack(const struct sctp_association *asoc,
{
struct sctp_chunk *retval;
- retval = sctp_make_chunk(asoc, SCTP_CID_COOKIE_ACK, 0, 0);
+ retval = sctp_make_control(asoc, SCTP_CID_COOKIE_ACK, 0, 0);
/* RFC 2960 6.4 Multi-homed SCTP Endpoints
*
@@ -641,8 +661,8 @@ struct sctp_chunk *sctp_make_cwr(const struct sctp_association *asoc,
sctp_cwrhdr_t cwr;
cwr.lowest_tsn = htonl(lowest_tsn);
- retval = sctp_make_chunk(asoc, SCTP_CID_ECN_CWR, 0,
- sizeof(sctp_cwrhdr_t));
+ retval = sctp_make_control(asoc, SCTP_CID_ECN_CWR, 0,
+ sizeof(sctp_cwrhdr_t));
if (!retval)
goto nodata;
@@ -675,8 +695,8 @@ struct sctp_chunk *sctp_make_ecne(const struct sctp_association *asoc,
sctp_ecnehdr_t ecne;
ecne.lowest_tsn = htonl(lowest_tsn);
- retval = sctp_make_chunk(asoc, SCTP_CID_ECN_ECNE, 0,
- sizeof(sctp_ecnehdr_t));
+ retval = sctp_make_control(asoc, SCTP_CID_ECN_ECNE, 0,
+ sizeof(sctp_ecnehdr_t));
if (!retval)
goto nodata;
retval->subh.ecne_hdr =
@@ -712,7 +732,7 @@ struct sctp_chunk *sctp_make_datafrag_empty(struct sctp_association *asoc,
dp.ssn = htons(ssn);
chunk_len = sizeof(dp) + data_len;
- retval = sctp_make_chunk(asoc, SCTP_CID_DATA, flags, chunk_len);
+ retval = sctp_make_data(asoc, flags, chunk_len);
if (!retval)
goto nodata;
@@ -759,7 +779,7 @@ struct sctp_chunk *sctp_make_sack(const struct sctp_association *asoc)
+ sizeof(__u32) * num_dup_tsns;
/* Create the chunk. */
- retval = sctp_make_chunk(asoc, SCTP_CID_SACK, 0, len);
+ retval = sctp_make_control(asoc, SCTP_CID_SACK, 0, len);
if (!retval)
goto nodata;
@@ -838,8 +858,8 @@ struct sctp_chunk *sctp_make_shutdown(const struct sctp_association *asoc,
ctsn = sctp_tsnmap_get_ctsn(&asoc->peer.tsn_map);
shut.cum_tsn_ack = htonl(ctsn);
- retval = sctp_make_chunk(asoc, SCTP_CID_SHUTDOWN, 0,
- sizeof(sctp_shutdownhdr_t));
+ retval = sctp_make_control(asoc, SCTP_CID_SHUTDOWN, 0,
+ sizeof(sctp_shutdownhdr_t));
if (!retval)
goto nodata;
@@ -857,7 +877,7 @@ struct sctp_chunk *sctp_make_shutdown_ack(const struct sctp_association *asoc,
{
struct sctp_chunk *retval;
- retval = sctp_make_chunk(asoc, SCTP_CID_SHUTDOWN_ACK, 0, 0);
+ retval = sctp_make_control(asoc, SCTP_CID_SHUTDOWN_ACK, 0, 0);
/* RFC 2960 6.4 Multi-homed SCTP Endpoints
*
@@ -886,7 +906,7 @@ struct sctp_chunk *sctp_make_shutdown_complete(
*/
flags |= asoc ? 0 : SCTP_CHUNK_FLAG_T;
- retval = sctp_make_chunk(asoc, SCTP_CID_SHUTDOWN_COMPLETE, flags, 0);
+ retval = sctp_make_control(asoc, SCTP_CID_SHUTDOWN_COMPLETE, flags, 0);
/* RFC 2960 6.4 Multi-homed SCTP Endpoints
*
@@ -925,7 +945,7 @@ struct sctp_chunk *sctp_make_abort(const struct sctp_association *asoc,
flags = SCTP_CHUNK_FLAG_T;
}
- retval = sctp_make_chunk(asoc, SCTP_CID_ABORT, flags, hint);
+ retval = sctp_make_control(asoc, SCTP_CID_ABORT, flags, hint);
/* RFC 2960 6.4 Multi-homed SCTP Endpoints
*
@@ -1117,7 +1137,7 @@ struct sctp_chunk *sctp_make_heartbeat(const struct sctp_association *asoc,
struct sctp_chunk *retval;
sctp_sender_hb_info_t hbinfo;
- retval = sctp_make_chunk(asoc, SCTP_CID_HEARTBEAT, 0, sizeof(hbinfo));
+ retval = sctp_make_control(asoc, SCTP_CID_HEARTBEAT, 0, sizeof(hbinfo));
if (!retval)
goto nodata;
@@ -1145,7 +1165,7 @@ struct sctp_chunk *sctp_make_heartbeat_ack(const struct sctp_association *asoc,
{
struct sctp_chunk *retval;
- retval = sctp_make_chunk(asoc, SCTP_CID_HEARTBEAT_ACK, 0, paylen);
+ retval = sctp_make_control(asoc, SCTP_CID_HEARTBEAT_ACK, 0, paylen);
if (!retval)
goto nodata;
@@ -1177,8 +1197,8 @@ static struct sctp_chunk *sctp_make_op_error_space(
{
struct sctp_chunk *retval;
- retval = sctp_make_chunk(asoc, SCTP_CID_ERROR, 0,
- sizeof(sctp_errhdr_t) + size);
+ retval = sctp_make_control(asoc, SCTP_CID_ERROR, 0,
+ sizeof(sctp_errhdr_t) + size);
if (!retval)
goto nodata;
@@ -1248,7 +1268,7 @@ struct sctp_chunk *sctp_make_auth(const struct sctp_association *asoc)
if (unlikely(!hmac_desc))
return NULL;
- retval = sctp_make_chunk(asoc, SCTP_CID_AUTH, 0,
+ retval = sctp_make_control(asoc, SCTP_CID_AUTH, 0,
hmac_desc->hmac_len + sizeof(sctp_authhdr_t));
if (!retval)
return NULL;
@@ -1351,8 +1371,8 @@ const union sctp_addr *sctp_source(const struct sctp_chunk *chunk)
/* Create a new chunk, setting the type and flags headers from the
* arguments, reserving enough space for a 'paylen' byte payload.
*/
-static struct sctp_chunk *sctp_make_chunk(const struct sctp_association *asoc,
- __u8 type, __u8 flags, int paylen)
+static struct sctp_chunk *_sctp_make_chunk(const struct sctp_association *asoc,
+ __u8 type, __u8 flags, int paylen)
{
struct sctp_chunk *retval;
sctp_chunkhdr_t *chunk_hdr;
@@ -1385,14 +1405,27 @@ static struct sctp_chunk *sctp_make_chunk(const struct sctp_association *asoc,
if (sctp_auth_send_cid(type, asoc))
retval->auth = 1;
- /* Set the skb to the belonging sock for accounting. */
- skb->sk = sk;
-
return retval;
nodata:
return NULL;
}
+static struct sctp_chunk *sctp_make_data(const struct sctp_association *asoc,
+ __u8 flags, int paylen)
+{
+ return _sctp_make_chunk(asoc, SCTP_CID_DATA, flags, paylen);
+}
+
+static struct sctp_chunk *sctp_make_control(const struct sctp_association *asoc,
+ __u8 type, __u8 flags, int paylen)
+{
+ struct sctp_chunk *chunk = _sctp_make_chunk(asoc, type, flags, paylen);
+
+ if (chunk)
+ sctp_control_set_owner_w(chunk);
+
+ return chunk;
+}
/* Release the memory occupied by a chunk. */
static void sctp_chunk_destroy(struct sctp_chunk *chunk)
@@ -2207,25 +2240,23 @@ int sctp_verify_init(struct net *net, const struct sctp_association *asoc,
struct sctp_chunk **errp)
{
union sctp_params param;
- int has_cookie = 0;
+ bool has_cookie = false;
int result;
- /* Verify stream values are non-zero. */
- if ((0 == peer_init->init_hdr.num_outbound_streams) ||
- (0 == peer_init->init_hdr.num_inbound_streams) ||
- (0 == peer_init->init_hdr.init_tag) ||
- (SCTP_DEFAULT_MINWINDOW > ntohl(peer_init->init_hdr.a_rwnd))) {
-
+ /* Check for missing mandatory parameters. Note: Initial TSN is
+ * also mandatory, but is not checked here since the valid range
+ * is 0..2**32-1. RFC4960, section 3.3.3.
+ */
+ if (peer_init->init_hdr.num_outbound_streams == 0 ||
+ peer_init->init_hdr.num_inbound_streams == 0 ||
+ peer_init->init_hdr.init_tag == 0 ||
+ ntohl(peer_init->init_hdr.a_rwnd) < SCTP_DEFAULT_MINWINDOW)
return sctp_process_inv_mandatory(asoc, chunk, errp);
- }
- /* Check for missing mandatory parameters. */
sctp_walk_params(param, peer_init, init_hdr.params) {
-
- if (SCTP_PARAM_STATE_COOKIE == param.p->type)
- has_cookie = 1;
-
- } /* for (loop through all parameters) */
+ if (param.p->type == SCTP_PARAM_STATE_COOKIE)
+ has_cookie = true;
+ }
/* There is a possibility that a parameter length was bad and
* in that case we would have stoped walking the parameters.
@@ -2733,7 +2764,7 @@ static struct sctp_chunk *sctp_make_asconf(struct sctp_association *asoc,
length += addrlen;
/* Create the chunk. */
- retval = sctp_make_chunk(asoc, SCTP_CID_ASCONF, 0, length);
+ retval = sctp_make_control(asoc, SCTP_CID_ASCONF, 0, length);
if (!retval)
return NULL;
@@ -2917,7 +2948,7 @@ static struct sctp_chunk *sctp_make_asconf_ack(const struct sctp_association *as
int length = sizeof(asconf) + vparam_len;
/* Create the chunk. */
- retval = sctp_make_chunk(asoc, SCTP_CID_ASCONF_ACK, 0, length);
+ retval = sctp_make_control(asoc, SCTP_CID_ASCONF_ACK, 0, length);
if (!retval)
return NULL;
@@ -3448,7 +3479,7 @@ struct sctp_chunk *sctp_make_fwdtsn(const struct sctp_association *asoc,
hint = (nstreams + 1) * sizeof(__u32);
- retval = sctp_make_chunk(asoc, SCTP_CID_FWD_TSN, 0, hint);
+ retval = sctp_make_control(asoc, SCTP_CID_FWD_TSN, 0, hint);
if (!retval)
return NULL;
diff --git a/net/sctp/sm_sideeffect.c b/net/sctp/sm_sideeffect.c
index 9da68852ee94..666c66842799 100644
--- a/net/sctp/sm_sideeffect.c
+++ b/net/sctp/sm_sideeffect.c
@@ -28,10 +28,7 @@
*
* Please send any bug reports or fixes you make to the
* email address(es):
- * lksctp developers <lksctp-developers@lists.sourceforge.net>
- *
- * Or submit a bug report through the following website:
- * http://www.sf.net/projects/lksctp
+ * lksctp developers <linux-sctp@vger.kernel.org>
*
* Written or modified by:
* La Monte H.P. Yarroll <piggy@acm.org>
@@ -42,9 +39,6 @@
* Daisy Chang <daisyc@us.ibm.com>
* Sridhar Samudrala <sri@us.ibm.com>
* Ardelle Fan <ardelle.fan@intel.com>
- *
- * Any bugs reported given to us we will try to fix... any fixes shared will
- * be incorporated into the next SCTP release.
*/
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
diff --git a/net/sctp/sm_statefuns.c b/net/sctp/sm_statefuns.c
index f6b7109195a6..dfe3f36ff2aa 100644
--- a/net/sctp/sm_statefuns.c
+++ b/net/sctp/sm_statefuns.c
@@ -28,10 +28,7 @@
*
* Please send any bug reports or fixes you make to the
* email address(es):
- * lksctp developers <lksctp-developers@lists.sourceforge.net>
- *
- * Or submit a bug report through the following website:
- * http://www.sf.net/projects/lksctp
+ * lksctp developers <linux-sctp@vger.kernel.org>
*
* Written or modified by:
* La Monte H.P. Yarroll <piggy@acm.org>
@@ -45,9 +42,6 @@
* Ardelle Fan <ardelle.fan@intel.com>
* Ryan Layer <rmlayer@us.ibm.com>
* Kevin Gao <kevin.gao@intel.com>
- *
- * Any bugs reported given to us we will try to fix... any fixes shared will
- * be incorporated into the next SCTP release.
*/
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
diff --git a/net/sctp/sm_statetable.c b/net/sctp/sm_statetable.c
index 84d98d8a5a74..c5999b2dde7d 100644
--- a/net/sctp/sm_statetable.c
+++ b/net/sctp/sm_statetable.c
@@ -28,10 +28,7 @@
*
* Please send any bug reports or fixes you make to the
* email address(es):
- * lksctp developers <lksctp-developers@lists.sourceforge.net>
- *
- * Or submit a bug report through the following website:
- * http://www.sf.net/projects/lksctp
+ * lksctp developers <linux-sctp@vger.kernel.org>
*
* Written or modified by:
* La Monte H.P. Yarroll <piggy@acm.org>
@@ -41,9 +38,6 @@
* Daisy Chang <daisyc@us.ibm.com>
* Ardelle Fan <ardelle.fan@intel.com>
* Sridhar Samudrala <sri@us.ibm.com>
- *
- * Any bugs reported given to us we will try to fix... any fixes shared will
- * be incorporated into the next SCTP release.
*/
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
diff --git a/net/sctp/socket.c b/net/sctp/socket.c
index c6670d2e3f8d..911b71b26b0e 100644
--- a/net/sctp/socket.c
+++ b/net/sctp/socket.c
@@ -34,10 +34,7 @@
*
* Please send any bug reports or fixes you make to the
* email address(es):
- * lksctp developers <lksctp-developers@lists.sourceforge.net>
- *
- * Or submit a bug report through the following website:
- * http://www.sf.net/projects/lksctp
+ * lksctp developers <linux-sctp@vger.kernel.org>
*
* Written or modified by:
* La Monte H.P. Yarroll <piggy@acm.org>
@@ -52,9 +49,6 @@
* Ryan Layer <rmlayer@us.ibm.com>
* Anup Pemmaiah <pemmaiah@cc.usu.edu>
* Kevin Gao <kevin.gao@intel.com>
- *
- * Any bugs reported given to us we will try to fix... any fixes shared will
- * be incorporated into the next SCTP release.
*/
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
@@ -812,6 +806,9 @@ static int sctp_send_asconf_del_ip(struct sock *sk,
goto skip_mkasconf;
}
+ if (laddr == NULL)
+ return -EINVAL;
+
/* We do not need RCU protection throughout this loop
* because this is done under a socket lock from the
* setsockopt call.
@@ -6182,7 +6179,7 @@ unsigned int sctp_poll(struct file *file, struct socket *sock, poll_table *wait)
/* Is there any exceptional events? */
if (sk->sk_err || !skb_queue_empty(&sk->sk_error_queue))
mask |= POLLERR |
- sock_flag(sk, SOCK_SELECT_ERR_QUEUE) ? POLLPRI : 0;
+ (sock_flag(sk, SOCK_SELECT_ERR_QUEUE) ? POLLPRI : 0);
if (sk->sk_shutdown & RCV_SHUTDOWN)
mask |= POLLRDHUP | POLLIN | POLLRDNORM;
if (sk->sk_shutdown == SHUTDOWN_MASK)
diff --git a/net/sctp/ssnmap.c b/net/sctp/ssnmap.c
index da8603523808..6007124aefa0 100644
--- a/net/sctp/ssnmap.c
+++ b/net/sctp/ssnmap.c
@@ -24,16 +24,10 @@
*
* Please send any bug reports or fixes you make to the
* email address(es):
- * lksctp developers <lksctp-developers@lists.sourceforge.net>
- *
- * Or submit a bug report through the following website:
- * http://www.sf.net/projects/lksctp
+ * lksctp developers <linux-sctp@vger.kernel.org>
*
* Written or modified by:
* Jon Grimm <jgrimm@us.ibm.com>
- *
- * Any bugs reported given to us we will try to fix... any fixes shared will
- * be incorporated into the next SCTP release.
*/
#include <linux/types.h>
diff --git a/net/sctp/sysctl.c b/net/sctp/sysctl.c
index 9a5c4c9eddaf..6b36561a1b3b 100644
--- a/net/sctp/sysctl.c
+++ b/net/sctp/sysctl.c
@@ -25,10 +25,7 @@
*
* Please send any bug reports or fixes you make to the
* email address(es):
- * lksctp developers <lksctp-developers@lists.sourceforge.net>
- *
- * Or submit a bug report through the following website:
- * http://www.sf.net/projects/lksctp
+ * lksctp developers <linux-sctp@vger.kernel.org>
*
* Written or modified by:
* Mingqin Liu <liuming@us.ibm.com>
@@ -36,9 +33,6 @@
* Ardelle Fan <ardelle.fan@intel.com>
* Ryan Layer <rmlayer@us.ibm.com>
* Sridhar Samudrala <sri@us.ibm.com>
- *
- * Any bugs reported given to us we will try to fix... any fixes shared will
- * be incorporated into the next SCTP release.
*/
#include <net/sctp/structs.h>
diff --git a/net/sctp/transport.c b/net/sctp/transport.c
index 8fdd16046d66..e332efb124cc 100644
--- a/net/sctp/transport.c
+++ b/net/sctp/transport.c
@@ -30,10 +30,7 @@
*
* Please send any bug reports or fixes you make to the
* email address(es):
- * lksctp developers <lksctp-developers@lists.sourceforge.net>
- *
- * Or submit a bug report through the following website:
- * http://www.sf.net/projects/lksctp
+ * lksctp developers <linux-sctp@vger.kernel.org>
*
* Written or modified by:
* La Monte H.P. Yarroll <piggy@acm.org>
@@ -43,9 +40,6 @@
* Hui Huang <hui.huang@nokia.com>
* Sridhar Samudrala <sri@us.ibm.com>
* Ardelle Fan <ardelle.fan@intel.com>
- *
- * Any bugs reported given to us we will try to fix... any fixes shared will
- * be incorporated into the next SCTP release.
*/
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
diff --git a/net/sctp/tsnmap.c b/net/sctp/tsnmap.c
index b46019568a86..fbda20028285 100644
--- a/net/sctp/tsnmap.c
+++ b/net/sctp/tsnmap.c
@@ -27,19 +27,13 @@
*
* Please send any bug reports or fixes you make to the
* email address(es):
- * lksctp developers <lksctp-developers@lists.sourceforge.net>
- *
- * Or submit a bug report through the following website:
- * http://www.sf.net/projects/lksctp
+ * lksctp developers <linux-sctp@vger.kernel.org>
*
* Written or modified by:
* La Monte H.P. Yarroll <piggy@acm.org>
* Jon Grimm <jgrimm@us.ibm.com>
* Karl Knutson <karl@athena.chicago.il.us>
* Sridhar Samudrala <sri@us.ibm.com>
- *
- * Any bugs reported given to us we will try to fix... any fixes shared will
- * be incorporated into the next SCTP release.
*/
#include <linux/slab.h>
diff --git a/net/sctp/ulpevent.c b/net/sctp/ulpevent.c
index 44a45dbee4df..81089ed65456 100644
--- a/net/sctp/ulpevent.c
+++ b/net/sctp/ulpevent.c
@@ -28,19 +28,13 @@
*
* Please send any bug reports or fixes you make to the
* email address(es):
- * lksctp developers <lksctp-developers@lists.sourceforge.net>
- *
- * Or submit a bug report through the following website:
- * http://www.sf.net/projects/lksctp
+ * lksctp developers <linux-sctp@vger.kernel.org>
*
* Written or modified by:
* Jon Grimm <jgrimm@us.ibm.com>
* La Monte H.P. Yarroll <piggy@acm.org>
* Ardelle Fan <ardelle.fan@intel.com>
* Sridhar Samudrala <sri@us.ibm.com>
- *
- * Any bugs reported given to us we will try to fix... any fixes shared will
- * be incorporated into the next SCTP release.
*/
#include <linux/slab.h>
diff --git a/net/sctp/ulpqueue.c b/net/sctp/ulpqueue.c
index 04e3d470f877..1c1484ed605d 100644
--- a/net/sctp/ulpqueue.c
+++ b/net/sctp/ulpqueue.c
@@ -27,18 +27,12 @@
*
* Please send any bug reports or fixes you make to the
* email address(es):
- * lksctp developers <lksctp-developers@lists.sourceforge.net>
- *
- * Or submit a bug report through the following website:
- * http://www.sf.net/projects/lksctp
+ * lksctp developers <linux-sctp@vger.kernel.org>
*
* Written or modified by:
* Jon Grimm <jgrimm@us.ibm.com>
* La Monte H.P. Yarroll <piggy@acm.org>
* Sridhar Samudrala <sri@us.ibm.com>
- *
- * Any bugs reported given to us we will try to fix... any fixes shared will
- * be incorporated into the next SCTP release.
*/
#include <linux/slab.h>
diff --git a/net/socket.c b/net/socket.c
index b2d7c629eeb9..c226aceee65b 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -854,11 +854,6 @@ int kernel_recvmsg(struct socket *sock, struct msghdr *msg,
}
EXPORT_SYMBOL(kernel_recvmsg);
-static void sock_aio_dtor(struct kiocb *iocb)
-{
- kfree(iocb->private);
-}
-
static ssize_t sock_sendpage(struct file *file, struct page *page,
int offset, size_t size, loff_t *ppos, int more)
{
@@ -889,12 +884,8 @@ static ssize_t sock_splice_read(struct file *file, loff_t *ppos,
static struct sock_iocb *alloc_sock_iocb(struct kiocb *iocb,
struct sock_iocb *siocb)
{
- if (!is_sync_kiocb(iocb)) {
- siocb = kmalloc(sizeof(*siocb), GFP_KERNEL);
- if (!siocb)
- return NULL;
- iocb->ki_dtor = sock_aio_dtor;
- }
+ if (!is_sync_kiocb(iocb))
+ BUG();
siocb->kiocb = iocb;
iocb->private = siocb;
@@ -931,7 +922,7 @@ static ssize_t sock_aio_read(struct kiocb *iocb, const struct iovec *iov,
if (pos != 0)
return -ESPIPE;
- if (iocb->ki_left == 0) /* Match SYS5 behaviour */
+ if (iocb->ki_nbytes == 0) /* Match SYS5 behaviour */
return 0;
@@ -1973,6 +1964,16 @@ struct used_address {
unsigned int name_len;
};
+static int copy_msghdr_from_user(struct msghdr *kmsg,
+ struct msghdr __user *umsg)
+{
+ if (copy_from_user(kmsg, umsg, sizeof(struct msghdr)))
+ return -EFAULT;
+ if (kmsg->msg_namelen > sizeof(struct sockaddr_storage))
+ return -EINVAL;
+ return 0;
+}
+
static int ___sys_sendmsg(struct socket *sock, struct msghdr __user *msg,
struct msghdr *msg_sys, unsigned int flags,
struct used_address *used_address)
@@ -1991,8 +1992,11 @@ static int ___sys_sendmsg(struct socket *sock, struct msghdr __user *msg,
if (MSG_CMSG_COMPAT & flags) {
if (get_compat_msghdr(msg_sys, msg_compat))
return -EFAULT;
- } else if (copy_from_user(msg_sys, msg, sizeof(struct msghdr)))
- return -EFAULT;
+ } else {
+ err = copy_msghdr_from_user(msg_sys, msg);
+ if (err)
+ return err;
+ }
if (msg_sys->msg_iovlen > UIO_FASTIOV) {
err = -EMSGSIZE;
@@ -2200,8 +2204,11 @@ static int ___sys_recvmsg(struct socket *sock, struct msghdr __user *msg,
if (MSG_CMSG_COMPAT & flags) {
if (get_compat_msghdr(msg_sys, msg_compat))
return -EFAULT;
- } else if (copy_from_user(msg_sys, msg, sizeof(struct msghdr)))
- return -EFAULT;
+ } else {
+ err = copy_msghdr_from_user(msg_sys, msg);
+ if (err)
+ return err;
+ }
if (msg_sys->msg_iovlen > UIO_FASTIOV) {
err = -EMSGSIZE;
@@ -3072,12 +3079,12 @@ static int compat_sioc_ifmap(struct net *net, unsigned int cmd,
uifmap32 = &uifr32->ifr_ifru.ifru_map;
err = copy_from_user(&ifr, uifr32, sizeof(ifr.ifr_name));
- err |= __get_user(ifr.ifr_map.mem_start, &uifmap32->mem_start);
- err |= __get_user(ifr.ifr_map.mem_end, &uifmap32->mem_end);
- err |= __get_user(ifr.ifr_map.base_addr, &uifmap32->base_addr);
- err |= __get_user(ifr.ifr_map.irq, &uifmap32->irq);
- err |= __get_user(ifr.ifr_map.dma, &uifmap32->dma);
- err |= __get_user(ifr.ifr_map.port, &uifmap32->port);
+ err |= get_user(ifr.ifr_map.mem_start, &uifmap32->mem_start);
+ err |= get_user(ifr.ifr_map.mem_end, &uifmap32->mem_end);
+ err |= get_user(ifr.ifr_map.base_addr, &uifmap32->base_addr);
+ err |= get_user(ifr.ifr_map.irq, &uifmap32->irq);
+ err |= get_user(ifr.ifr_map.dma, &uifmap32->dma);
+ err |= get_user(ifr.ifr_map.port, &uifmap32->port);
if (err)
return -EFAULT;
@@ -3088,12 +3095,12 @@ static int compat_sioc_ifmap(struct net *net, unsigned int cmd,
if (cmd == SIOCGIFMAP && !err) {
err = copy_to_user(uifr32, &ifr, sizeof(ifr.ifr_name));
- err |= __put_user(ifr.ifr_map.mem_start, &uifmap32->mem_start);
- err |= __put_user(ifr.ifr_map.mem_end, &uifmap32->mem_end);
- err |= __put_user(ifr.ifr_map.base_addr, &uifmap32->base_addr);
- err |= __put_user(ifr.ifr_map.irq, &uifmap32->irq);
- err |= __put_user(ifr.ifr_map.dma, &uifmap32->dma);
- err |= __put_user(ifr.ifr_map.port, &uifmap32->port);
+ err |= put_user(ifr.ifr_map.mem_start, &uifmap32->mem_start);
+ err |= put_user(ifr.ifr_map.mem_end, &uifmap32->mem_end);
+ err |= put_user(ifr.ifr_map.base_addr, &uifmap32->base_addr);
+ err |= put_user(ifr.ifr_map.irq, &uifmap32->irq);
+ err |= put_user(ifr.ifr_map.dma, &uifmap32->dma);
+ err |= put_user(ifr.ifr_map.port, &uifmap32->port);
if (err)
err = -EFAULT;
}
@@ -3167,25 +3174,25 @@ static int routing_ioctl(struct net *net, struct socket *sock,
struct in6_rtmsg32 __user *ur6 = argp;
ret = copy_from_user(&r6.rtmsg_dst, &(ur6->rtmsg_dst),
3 * sizeof(struct in6_addr));
- ret |= __get_user(r6.rtmsg_type, &(ur6->rtmsg_type));
- ret |= __get_user(r6.rtmsg_dst_len, &(ur6->rtmsg_dst_len));
- ret |= __get_user(r6.rtmsg_src_len, &(ur6->rtmsg_src_len));
- ret |= __get_user(r6.rtmsg_metric, &(ur6->rtmsg_metric));
- ret |= __get_user(r6.rtmsg_info, &(ur6->rtmsg_info));
- ret |= __get_user(r6.rtmsg_flags, &(ur6->rtmsg_flags));
- ret |= __get_user(r6.rtmsg_ifindex, &(ur6->rtmsg_ifindex));
+ ret |= get_user(r6.rtmsg_type, &(ur6->rtmsg_type));
+ ret |= get_user(r6.rtmsg_dst_len, &(ur6->rtmsg_dst_len));
+ ret |= get_user(r6.rtmsg_src_len, &(ur6->rtmsg_src_len));
+ ret |= get_user(r6.rtmsg_metric, &(ur6->rtmsg_metric));
+ ret |= get_user(r6.rtmsg_info, &(ur6->rtmsg_info));
+ ret |= get_user(r6.rtmsg_flags, &(ur6->rtmsg_flags));
+ ret |= get_user(r6.rtmsg_ifindex, &(ur6->rtmsg_ifindex));
r = (void *) &r6;
} else { /* ipv4 */
struct rtentry32 __user *ur4 = argp;
ret = copy_from_user(&r4.rt_dst, &(ur4->rt_dst),
3 * sizeof(struct sockaddr));
- ret |= __get_user(r4.rt_flags, &(ur4->rt_flags));
- ret |= __get_user(r4.rt_metric, &(ur4->rt_metric));
- ret |= __get_user(r4.rt_mtu, &(ur4->rt_mtu));
- ret |= __get_user(r4.rt_window, &(ur4->rt_window));
- ret |= __get_user(r4.rt_irtt, &(ur4->rt_irtt));
- ret |= __get_user(rtdev, &(ur4->rt_dev));
+ ret |= get_user(r4.rt_flags, &(ur4->rt_flags));
+ ret |= get_user(r4.rt_metric, &(ur4->rt_metric));
+ ret |= get_user(r4.rt_mtu, &(ur4->rt_mtu));
+ ret |= get_user(r4.rt_window, &(ur4->rt_window));
+ ret |= get_user(r4.rt_irtt, &(ur4->rt_irtt));
+ ret |= get_user(rtdev, &(ur4->rt_dev));
if (rtdev) {
ret |= copy_from_user(devname, compat_ptr(rtdev), 15);
r4.rt_dev = (char __user __force *)devname;
diff --git a/net/sunrpc/auth.c b/net/sunrpc/auth.c
index ed2fdd210c0b..5285ead196c0 100644
--- a/net/sunrpc/auth.c
+++ b/net/sunrpc/auth.c
@@ -250,11 +250,11 @@ rpcauth_list_flavors(rpc_authflavor_t *array, int size)
EXPORT_SYMBOL_GPL(rpcauth_list_flavors);
struct rpc_auth *
-rpcauth_create(rpc_authflavor_t pseudoflavor, struct rpc_clnt *clnt)
+rpcauth_create(struct rpc_auth_create_args *args, struct rpc_clnt *clnt)
{
struct rpc_auth *auth;
const struct rpc_authops *ops;
- u32 flavor = pseudoflavor_to_flavor(pseudoflavor);
+ u32 flavor = pseudoflavor_to_flavor(args->pseudoflavor);
auth = ERR_PTR(-EINVAL);
if (flavor >= RPC_AUTH_MAXFLAVOR)
@@ -269,7 +269,7 @@ rpcauth_create(rpc_authflavor_t pseudoflavor, struct rpc_clnt *clnt)
goto out;
}
spin_unlock(&rpc_authflavor_lock);
- auth = ops->create(clnt, pseudoflavor);
+ auth = ops->create(args, clnt);
module_put(ops->owner);
if (IS_ERR(auth))
return auth;
@@ -343,6 +343,27 @@ out_nocache:
EXPORT_SYMBOL_GPL(rpcauth_init_credcache);
/*
+ * Setup a credential key lifetime timeout notification
+ */
+int
+rpcauth_key_timeout_notify(struct rpc_auth *auth, struct rpc_cred *cred)
+{
+ if (!cred->cr_auth->au_ops->key_timeout)
+ return 0;
+ return cred->cr_auth->au_ops->key_timeout(auth, cred);
+}
+EXPORT_SYMBOL_GPL(rpcauth_key_timeout_notify);
+
+bool
+rpcauth_cred_key_to_expire(struct rpc_cred *cred)
+{
+ if (!cred->cr_ops->crkey_to_expire)
+ return false;
+ return cred->cr_ops->crkey_to_expire(cred);
+}
+EXPORT_SYMBOL_GPL(rpcauth_cred_key_to_expire);
+
+/*
* Destroy a list of credentials
*/
static inline
@@ -413,12 +434,13 @@ EXPORT_SYMBOL_GPL(rpcauth_destroy_credcache);
/*
* Remove stale credentials. Avoid sleeping inside the loop.
*/
-static int
+static long
rpcauth_prune_expired(struct list_head *free, int nr_to_scan)
{
spinlock_t *cache_lock;
struct rpc_cred *cred, *next;
unsigned long expired = jiffies - RPC_AUTH_EXPIRY_MORATORIUM;
+ long freed = 0;
list_for_each_entry_safe(cred, next, &cred_unused, cr_lru) {
@@ -430,10 +452,11 @@ rpcauth_prune_expired(struct list_head *free, int nr_to_scan)
*/
if (time_in_range(cred->cr_expire, expired, jiffies) &&
test_bit(RPCAUTH_CRED_HASHED, &cred->cr_flags) != 0)
- return 0;
+ break;
list_del_init(&cred->cr_lru);
number_cred_unused--;
+ freed++;
if (atomic_read(&cred->cr_count) != 0)
continue;
@@ -446,29 +469,39 @@ rpcauth_prune_expired(struct list_head *free, int nr_to_scan)
}
spin_unlock(cache_lock);
}
- return (number_cred_unused / 100) * sysctl_vfs_cache_pressure;
+ return freed;
}
/*
* Run memory cache shrinker.
*/
-static int
-rpcauth_cache_shrinker(struct shrinker *shrink, struct shrink_control *sc)
+static unsigned long
+rpcauth_cache_shrink_scan(struct shrinker *shrink, struct shrink_control *sc)
+
{
LIST_HEAD(free);
- int res;
- int nr_to_scan = sc->nr_to_scan;
- gfp_t gfp_mask = sc->gfp_mask;
+ unsigned long freed;
- if ((gfp_mask & GFP_KERNEL) != GFP_KERNEL)
- return (nr_to_scan == 0) ? 0 : -1;
+ if ((sc->gfp_mask & GFP_KERNEL) != GFP_KERNEL)
+ return SHRINK_STOP;
+
+ /* nothing left, don't come back */
if (list_empty(&cred_unused))
- return 0;
+ return SHRINK_STOP;
+
spin_lock(&rpc_credcache_lock);
- res = rpcauth_prune_expired(&free, nr_to_scan);
+ freed = rpcauth_prune_expired(&free, sc->nr_to_scan);
spin_unlock(&rpc_credcache_lock);
rpcauth_destroy_credlist(&free);
- return res;
+
+ return freed;
+}
+
+static unsigned long
+rpcauth_cache_shrink_count(struct shrinker *shrink, struct shrink_control *sc)
+
+{
+ return (number_cred_unused / 100) * sysctl_vfs_cache_pressure;
}
/*
@@ -784,7 +817,8 @@ rpcauth_uptodatecred(struct rpc_task *task)
}
static struct shrinker rpc_cred_shrinker = {
- .shrink = rpcauth_cache_shrinker,
+ .count_objects = rpcauth_cache_shrink_count,
+ .scan_objects = rpcauth_cache_shrink_scan,
.seeks = DEFAULT_SEEKS,
};
diff --git a/net/sunrpc/auth_generic.c b/net/sunrpc/auth_generic.c
index b6badafc6494..ed04869b2d4f 100644
--- a/net/sunrpc/auth_generic.c
+++ b/net/sunrpc/auth_generic.c
@@ -89,6 +89,7 @@ generic_create_cred(struct rpc_auth *auth, struct auth_cred *acred, int flags)
gcred->acred.uid = acred->uid;
gcred->acred.gid = acred->gid;
gcred->acred.group_info = acred->group_info;
+ gcred->acred.ac_flags = 0;
if (gcred->acred.group_info != NULL)
get_group_info(gcred->acred.group_info);
gcred->acred.machine_cred = acred->machine_cred;
@@ -182,11 +183,78 @@ void rpc_destroy_generic_auth(void)
rpcauth_destroy_credcache(&generic_auth);
}
+/*
+ * Test the the current time (now) against the underlying credential key expiry
+ * minus a timeout and setup notification.
+ *
+ * The normal case:
+ * If 'now' is before the key expiry minus RPC_KEY_EXPIRE_TIMEO, set
+ * the RPC_CRED_NOTIFY_TIMEOUT flag to setup the underlying credential
+ * rpc_credops crmatch routine to notify this generic cred when it's key
+ * expiration is within RPC_KEY_EXPIRE_TIMEO, and return 0.
+ *
+ * The error case:
+ * If the underlying cred lookup fails, return -EACCES.
+ *
+ * The 'almost' error case:
+ * If 'now' is within key expiry minus RPC_KEY_EXPIRE_TIMEO, but not within
+ * key expiry minus RPC_KEY_EXPIRE_FAIL, set the RPC_CRED_EXPIRE_SOON bit
+ * on the acred ac_flags and return 0.
+ */
+static int
+generic_key_timeout(struct rpc_auth *auth, struct rpc_cred *cred)
+{
+ struct auth_cred *acred = &container_of(cred, struct generic_cred,
+ gc_base)->acred;
+ struct rpc_cred *tcred;
+ int ret = 0;
+
+
+ /* Fast track for non crkey_timeout (no key) underlying credentials */
+ if (test_bit(RPC_CRED_NO_CRKEY_TIMEOUT, &acred->ac_flags))
+ return 0;
+
+ /* Fast track for the normal case */
+ if (test_bit(RPC_CRED_NOTIFY_TIMEOUT, &acred->ac_flags))
+ return 0;
+
+ /* lookup_cred either returns a valid referenced rpc_cred, or PTR_ERR */
+ tcred = auth->au_ops->lookup_cred(auth, acred, 0);
+ if (IS_ERR(tcred))
+ return -EACCES;
+
+ if (!tcred->cr_ops->crkey_timeout) {
+ set_bit(RPC_CRED_NO_CRKEY_TIMEOUT, &acred->ac_flags);
+ ret = 0;
+ goto out_put;
+ }
+
+ /* Test for the almost error case */
+ ret = tcred->cr_ops->crkey_timeout(tcred);
+ if (ret != 0) {
+ set_bit(RPC_CRED_KEY_EXPIRE_SOON, &acred->ac_flags);
+ ret = 0;
+ } else {
+ /* In case underlying cred key has been reset */
+ if (test_and_clear_bit(RPC_CRED_KEY_EXPIRE_SOON,
+ &acred->ac_flags))
+ dprintk("RPC: UID %d Credential key reset\n",
+ from_kuid(&init_user_ns, tcred->cr_uid));
+ /* set up fasttrack for the normal case */
+ set_bit(RPC_CRED_NOTIFY_TIMEOUT, &acred->ac_flags);
+ }
+
+out_put:
+ put_rpccred(tcred);
+ return ret;
+}
+
static const struct rpc_authops generic_auth_ops = {
.owner = THIS_MODULE,
.au_name = "Generic",
.lookup_cred = generic_lookup_cred,
.crcreate = generic_create_cred,
+ .key_timeout = generic_key_timeout,
};
static struct rpc_auth generic_auth = {
@@ -194,9 +262,23 @@ static struct rpc_auth generic_auth = {
.au_count = ATOMIC_INIT(0),
};
+static bool generic_key_to_expire(struct rpc_cred *cred)
+{
+ struct auth_cred *acred = &container_of(cred, struct generic_cred,
+ gc_base)->acred;
+ bool ret;
+
+ get_rpccred(cred);
+ ret = test_bit(RPC_CRED_KEY_EXPIRE_SOON, &acred->ac_flags);
+ put_rpccred(cred);
+
+ return ret;
+}
+
static const struct rpc_credops generic_credops = {
.cr_name = "Generic cred",
.crdestroy = generic_destroy_cred,
.crbind = generic_bind_cred,
.crmatch = generic_match,
+ .crkey_to_expire = generic_key_to_expire,
};
diff --git a/net/sunrpc/auth_gss/auth_gss.c b/net/sunrpc/auth_gss/auth_gss.c
index fc2f78d6a9b4..084656671d6e 100644
--- a/net/sunrpc/auth_gss/auth_gss.c
+++ b/net/sunrpc/auth_gss/auth_gss.c
@@ -51,6 +51,7 @@
#include <linux/sunrpc/rpc_pipe_fs.h>
#include <linux/sunrpc/gss_api.h>
#include <asm/uaccess.h>
+#include <linux/hashtable.h>
#include "../netns.h"
@@ -62,6 +63,9 @@ static const struct rpc_credops gss_nullops;
#define GSS_RETRY_EXPIRED 5
static unsigned int gss_expired_cred_retry_delay = GSS_RETRY_EXPIRED;
+#define GSS_KEY_EXPIRE_TIMEO 240
+static unsigned int gss_key_expire_timeo = GSS_KEY_EXPIRE_TIMEO;
+
#ifdef RPC_DEBUG
# define RPCDBG_FACILITY RPCDBG_AUTH
#endif
@@ -71,19 +75,33 @@ static unsigned int gss_expired_cred_retry_delay = GSS_RETRY_EXPIRED;
* using integrity (two 4-byte integers): */
#define GSS_VERF_SLACK 100
+static DEFINE_HASHTABLE(gss_auth_hash_table, 4);
+static DEFINE_SPINLOCK(gss_auth_hash_lock);
+
+struct gss_pipe {
+ struct rpc_pipe_dir_object pdo;
+ struct rpc_pipe *pipe;
+ struct rpc_clnt *clnt;
+ const char *name;
+ struct kref kref;
+};
+
struct gss_auth {
struct kref kref;
+ struct hlist_node hash;
struct rpc_auth rpc_auth;
struct gss_api_mech *mech;
enum rpc_gss_svc service;
struct rpc_clnt *client;
+ struct net *net;
/*
* There are two upcall pipes; dentry[1], named "gssd", is used
* for the new text-based upcall; dentry[0] is named after the
* mechanism (for example, "krb5") and exists for
* backwards-compatibility with older gssd's.
*/
- struct rpc_pipe *pipe[2];
+ struct gss_pipe *gss_pipe[2];
+ const char *target_name;
};
/* pipe_version >= 0 if and only if someone has a pipe open. */
@@ -294,7 +312,7 @@ static void put_pipe_version(struct net *net)
static void
gss_release_msg(struct gss_upcall_msg *gss_msg)
{
- struct net *net = rpc_net_ns(gss_msg->auth->client);
+ struct net *net = gss_msg->auth->net;
if (!atomic_dec_and_test(&gss_msg->count))
return;
put_pipe_version(net);
@@ -406,8 +424,8 @@ static void gss_encode_v0_msg(struct gss_upcall_msg *gss_msg)
}
static void gss_encode_v1_msg(struct gss_upcall_msg *gss_msg,
- struct rpc_clnt *clnt,
- const char *service_name)
+ const char *service_name,
+ const char *target_name)
{
struct gss_api_mech *mech = gss_msg->auth->mech;
char *p = gss_msg->databuf;
@@ -417,8 +435,8 @@ static void gss_encode_v1_msg(struct gss_upcall_msg *gss_msg,
mech->gm_name,
from_kuid(&init_user_ns, gss_msg->uid));
p += gss_msg->msg.len;
- if (clnt->cl_principal) {
- len = sprintf(p, "target=%s ", clnt->cl_principal);
+ if (target_name) {
+ len = sprintf(p, "target=%s ", target_name);
p += len;
gss_msg->msg.len += len;
}
@@ -439,21 +457,8 @@ static void gss_encode_v1_msg(struct gss_upcall_msg *gss_msg,
BUG_ON(gss_msg->msg.len > UPCALL_BUF_LEN);
}
-static void gss_encode_msg(struct gss_upcall_msg *gss_msg,
- struct rpc_clnt *clnt,
- const char *service_name)
-{
- struct net *net = rpc_net_ns(clnt);
- struct sunrpc_net *sn = net_generic(net, sunrpc_net_id);
-
- if (sn->pipe_version == 0)
- gss_encode_v0_msg(gss_msg);
- else /* pipe_version == 1 */
- gss_encode_v1_msg(gss_msg, clnt, service_name);
-}
-
static struct gss_upcall_msg *
-gss_alloc_msg(struct gss_auth *gss_auth, struct rpc_clnt *clnt,
+gss_alloc_msg(struct gss_auth *gss_auth,
kuid_t uid, const char *service_name)
{
struct gss_upcall_msg *gss_msg;
@@ -462,31 +467,36 @@ gss_alloc_msg(struct gss_auth *gss_auth, struct rpc_clnt *clnt,
gss_msg = kzalloc(sizeof(*gss_msg), GFP_NOFS);
if (gss_msg == NULL)
return ERR_PTR(-ENOMEM);
- vers = get_pipe_version(rpc_net_ns(clnt));
+ vers = get_pipe_version(gss_auth->net);
if (vers < 0) {
kfree(gss_msg);
return ERR_PTR(vers);
}
- gss_msg->pipe = gss_auth->pipe[vers];
+ gss_msg->pipe = gss_auth->gss_pipe[vers]->pipe;
INIT_LIST_HEAD(&gss_msg->list);
rpc_init_wait_queue(&gss_msg->rpc_waitqueue, "RPCSEC_GSS upcall waitq");
init_waitqueue_head(&gss_msg->waitqueue);
atomic_set(&gss_msg->count, 1);
gss_msg->uid = uid;
gss_msg->auth = gss_auth;
- gss_encode_msg(gss_msg, clnt, service_name);
+ switch (vers) {
+ case 0:
+ gss_encode_v0_msg(gss_msg);
+ default:
+ gss_encode_v1_msg(gss_msg, service_name, gss_auth->target_name);
+ };
return gss_msg;
}
static struct gss_upcall_msg *
-gss_setup_upcall(struct rpc_clnt *clnt, struct gss_auth *gss_auth, struct rpc_cred *cred)
+gss_setup_upcall(struct gss_auth *gss_auth, struct rpc_cred *cred)
{
struct gss_cred *gss_cred = container_of(cred,
struct gss_cred, gc_base);
struct gss_upcall_msg *gss_new, *gss_msg;
kuid_t uid = cred->cr_uid;
- gss_new = gss_alloc_msg(gss_auth, clnt, uid, gss_cred->gc_principal);
+ gss_new = gss_alloc_msg(gss_auth, uid, gss_cred->gc_principal);
if (IS_ERR(gss_new))
return gss_new;
gss_msg = gss_add_msg(gss_new);
@@ -527,7 +537,7 @@ gss_refresh_upcall(struct rpc_task *task)
dprintk("RPC: %5u %s for uid %u\n",
task->tk_pid, __func__, from_kuid(&init_user_ns, cred->cr_uid));
- gss_msg = gss_setup_upcall(task->tk_client, gss_auth, cred);
+ gss_msg = gss_setup_upcall(gss_auth, cred);
if (PTR_ERR(gss_msg) == -EAGAIN) {
/* XXX: warning on the first, under the assumption we
* shouldn't normally hit this case on a refresh. */
@@ -566,7 +576,7 @@ out:
static inline int
gss_create_upcall(struct gss_auth *gss_auth, struct gss_cred *gss_cred)
{
- struct net *net = rpc_net_ns(gss_auth->client);
+ struct net *net = gss_auth->net;
struct sunrpc_net *sn = net_generic(net, sunrpc_net_id);
struct rpc_pipe *pipe;
struct rpc_cred *cred = &gss_cred->gc_base;
@@ -583,7 +593,7 @@ retry:
timeout = 15 * HZ;
if (!sn->gssd_running)
timeout = HZ >> 2;
- gss_msg = gss_setup_upcall(gss_auth->client, gss_auth, cred);
+ gss_msg = gss_setup_upcall(gss_auth, cred);
if (PTR_ERR(gss_msg) == -EAGAIN) {
err = wait_event_interruptible_timeout(pipe_version_waitqueue,
sn->pipe_version >= 0, timeout);
@@ -797,83 +807,153 @@ gss_pipe_destroy_msg(struct rpc_pipe_msg *msg)
}
}
-static void gss_pipes_dentries_destroy(struct rpc_auth *auth)
+static void gss_pipe_dentry_destroy(struct dentry *dir,
+ struct rpc_pipe_dir_object *pdo)
{
- struct gss_auth *gss_auth;
+ struct gss_pipe *gss_pipe = pdo->pdo_data;
+ struct rpc_pipe *pipe = gss_pipe->pipe;
- gss_auth = container_of(auth, struct gss_auth, rpc_auth);
- if (gss_auth->pipe[0]->dentry)
- rpc_unlink(gss_auth->pipe[0]->dentry);
- if (gss_auth->pipe[1]->dentry)
- rpc_unlink(gss_auth->pipe[1]->dentry);
+ if (pipe->dentry != NULL) {
+ rpc_unlink(pipe->dentry);
+ pipe->dentry = NULL;
+ }
}
-static int gss_pipes_dentries_create(struct rpc_auth *auth)
+static int gss_pipe_dentry_create(struct dentry *dir,
+ struct rpc_pipe_dir_object *pdo)
{
- int err;
- struct gss_auth *gss_auth;
- struct rpc_clnt *clnt;
+ struct gss_pipe *p = pdo->pdo_data;
+ struct dentry *dentry;
- gss_auth = container_of(auth, struct gss_auth, rpc_auth);
- clnt = gss_auth->client;
-
- gss_auth->pipe[1]->dentry = rpc_mkpipe_dentry(clnt->cl_dentry,
- "gssd",
- clnt, gss_auth->pipe[1]);
- if (IS_ERR(gss_auth->pipe[1]->dentry))
- return PTR_ERR(gss_auth->pipe[1]->dentry);
- gss_auth->pipe[0]->dentry = rpc_mkpipe_dentry(clnt->cl_dentry,
- gss_auth->mech->gm_name,
- clnt, gss_auth->pipe[0]);
- if (IS_ERR(gss_auth->pipe[0]->dentry)) {
- err = PTR_ERR(gss_auth->pipe[0]->dentry);
- goto err_unlink_pipe_1;
- }
+ dentry = rpc_mkpipe_dentry(dir, p->name, p->clnt, p->pipe);
+ if (IS_ERR(dentry))
+ return PTR_ERR(dentry);
+ p->pipe->dentry = dentry;
return 0;
+}
-err_unlink_pipe_1:
- rpc_unlink(gss_auth->pipe[1]->dentry);
- return err;
+static const struct rpc_pipe_dir_object_ops gss_pipe_dir_object_ops = {
+ .create = gss_pipe_dentry_create,
+ .destroy = gss_pipe_dentry_destroy,
+};
+
+static struct gss_pipe *gss_pipe_alloc(struct rpc_clnt *clnt,
+ const char *name,
+ const struct rpc_pipe_ops *upcall_ops)
+{
+ struct gss_pipe *p;
+ int err = -ENOMEM;
+
+ p = kmalloc(sizeof(*p), GFP_KERNEL);
+ if (p == NULL)
+ goto err;
+ p->pipe = rpc_mkpipe_data(upcall_ops, RPC_PIPE_WAIT_FOR_OPEN);
+ if (IS_ERR(p->pipe)) {
+ err = PTR_ERR(p->pipe);
+ goto err_free_gss_pipe;
+ }
+ p->name = name;
+ p->clnt = clnt;
+ kref_init(&p->kref);
+ rpc_init_pipe_dir_object(&p->pdo,
+ &gss_pipe_dir_object_ops,
+ p);
+ return p;
+err_free_gss_pipe:
+ kfree(p);
+err:
+ return ERR_PTR(err);
+}
+
+struct gss_alloc_pdo {
+ struct rpc_clnt *clnt;
+ const char *name;
+ const struct rpc_pipe_ops *upcall_ops;
+};
+
+static int gss_pipe_match_pdo(struct rpc_pipe_dir_object *pdo, void *data)
+{
+ struct gss_pipe *gss_pipe;
+ struct gss_alloc_pdo *args = data;
+
+ if (pdo->pdo_ops != &gss_pipe_dir_object_ops)
+ return 0;
+ gss_pipe = container_of(pdo, struct gss_pipe, pdo);
+ if (strcmp(gss_pipe->name, args->name) != 0)
+ return 0;
+ if (!kref_get_unless_zero(&gss_pipe->kref))
+ return 0;
+ return 1;
+}
+
+static struct rpc_pipe_dir_object *gss_pipe_alloc_pdo(void *data)
+{
+ struct gss_pipe *gss_pipe;
+ struct gss_alloc_pdo *args = data;
+
+ gss_pipe = gss_pipe_alloc(args->clnt, args->name, args->upcall_ops);
+ if (!IS_ERR(gss_pipe))
+ return &gss_pipe->pdo;
+ return NULL;
}
-static void gss_pipes_dentries_destroy_net(struct rpc_clnt *clnt,
- struct rpc_auth *auth)
+static struct gss_pipe *gss_pipe_get(struct rpc_clnt *clnt,
+ const char *name,
+ const struct rpc_pipe_ops *upcall_ops)
{
struct net *net = rpc_net_ns(clnt);
- struct super_block *sb;
+ struct rpc_pipe_dir_object *pdo;
+ struct gss_alloc_pdo args = {
+ .clnt = clnt,
+ .name = name,
+ .upcall_ops = upcall_ops,
+ };
- sb = rpc_get_sb_net(net);
- if (sb) {
- if (clnt->cl_dentry)
- gss_pipes_dentries_destroy(auth);
- rpc_put_sb_net(net);
- }
+ pdo = rpc_find_or_alloc_pipe_dir_object(net,
+ &clnt->cl_pipedir_objects,
+ gss_pipe_match_pdo,
+ gss_pipe_alloc_pdo,
+ &args);
+ if (pdo != NULL)
+ return container_of(pdo, struct gss_pipe, pdo);
+ return ERR_PTR(-ENOMEM);
}
-static int gss_pipes_dentries_create_net(struct rpc_clnt *clnt,
- struct rpc_auth *auth)
+static void __gss_pipe_free(struct gss_pipe *p)
{
+ struct rpc_clnt *clnt = p->clnt;
struct net *net = rpc_net_ns(clnt);
- struct super_block *sb;
- int err = 0;
- sb = rpc_get_sb_net(net);
- if (sb) {
- if (clnt->cl_dentry)
- err = gss_pipes_dentries_create(auth);
- rpc_put_sb_net(net);
- }
- return err;
+ rpc_remove_pipe_dir_object(net,
+ &clnt->cl_pipedir_objects,
+ &p->pdo);
+ rpc_destroy_pipe_data(p->pipe);
+ kfree(p);
+}
+
+static void __gss_pipe_release(struct kref *kref)
+{
+ struct gss_pipe *p = container_of(kref, struct gss_pipe, kref);
+
+ __gss_pipe_free(p);
+}
+
+static void gss_pipe_free(struct gss_pipe *p)
+{
+ if (p != NULL)
+ kref_put(&p->kref, __gss_pipe_release);
}
/*
* NOTE: we have the opportunity to use different
* parameters based on the input flavor (which must be a pseudoflavor)
*/
-static struct rpc_auth *
-gss_create(struct rpc_clnt *clnt, rpc_authflavor_t flavor)
+static struct gss_auth *
+gss_create_new(struct rpc_auth_create_args *args, struct rpc_clnt *clnt)
{
+ rpc_authflavor_t flavor = args->pseudoflavor;
struct gss_auth *gss_auth;
+ struct gss_pipe *gss_pipe;
struct rpc_auth * auth;
int err = -ENOMEM; /* XXX? */
@@ -883,12 +963,20 @@ gss_create(struct rpc_clnt *clnt, rpc_authflavor_t flavor)
return ERR_PTR(err);
if (!(gss_auth = kmalloc(sizeof(*gss_auth), GFP_KERNEL)))
goto out_dec;
+ INIT_HLIST_NODE(&gss_auth->hash);
+ gss_auth->target_name = NULL;
+ if (args->target_name) {
+ gss_auth->target_name = kstrdup(args->target_name, GFP_KERNEL);
+ if (gss_auth->target_name == NULL)
+ goto err_free;
+ }
gss_auth->client = clnt;
+ gss_auth->net = get_net(rpc_net_ns(clnt));
err = -EINVAL;
gss_auth->mech = gss_mech_get_by_pseudoflavor(flavor);
if (!gss_auth->mech) {
dprintk("RPC: Pseudoflavor %d not found!\n", flavor);
- goto err_free;
+ goto err_put_net;
}
gss_auth->service = gss_pseudoflavor_to_service(gss_auth->mech, flavor);
if (gss_auth->service == 0)
@@ -901,42 +989,41 @@ gss_create(struct rpc_clnt *clnt, rpc_authflavor_t flavor)
atomic_set(&auth->au_count, 1);
kref_init(&gss_auth->kref);
+ err = rpcauth_init_credcache(auth);
+ if (err)
+ goto err_put_mech;
/*
* Note: if we created the old pipe first, then someone who
* examined the directory at the right moment might conclude
* that we supported only the old pipe. So we instead create
* the new pipe first.
*/
- gss_auth->pipe[1] = rpc_mkpipe_data(&gss_upcall_ops_v1,
- RPC_PIPE_WAIT_FOR_OPEN);
- if (IS_ERR(gss_auth->pipe[1])) {
- err = PTR_ERR(gss_auth->pipe[1]);
- goto err_put_mech;
+ gss_pipe = gss_pipe_get(clnt, "gssd", &gss_upcall_ops_v1);
+ if (IS_ERR(gss_pipe)) {
+ err = PTR_ERR(gss_pipe);
+ goto err_destroy_credcache;
}
+ gss_auth->gss_pipe[1] = gss_pipe;
- gss_auth->pipe[0] = rpc_mkpipe_data(&gss_upcall_ops_v0,
- RPC_PIPE_WAIT_FOR_OPEN);
- if (IS_ERR(gss_auth->pipe[0])) {
- err = PTR_ERR(gss_auth->pipe[0]);
+ gss_pipe = gss_pipe_get(clnt, gss_auth->mech->gm_name,
+ &gss_upcall_ops_v0);
+ if (IS_ERR(gss_pipe)) {
+ err = PTR_ERR(gss_pipe);
goto err_destroy_pipe_1;
}
- err = gss_pipes_dentries_create_net(clnt, auth);
- if (err)
- goto err_destroy_pipe_0;
- err = rpcauth_init_credcache(auth);
- if (err)
- goto err_unlink_pipes;
+ gss_auth->gss_pipe[0] = gss_pipe;
- return auth;
-err_unlink_pipes:
- gss_pipes_dentries_destroy_net(clnt, auth);
-err_destroy_pipe_0:
- rpc_destroy_pipe_data(gss_auth->pipe[0]);
+ return gss_auth;
err_destroy_pipe_1:
- rpc_destroy_pipe_data(gss_auth->pipe[1]);
+ gss_pipe_free(gss_auth->gss_pipe[1]);
+err_destroy_credcache:
+ rpcauth_destroy_credcache(auth);
err_put_mech:
gss_mech_put(gss_auth->mech);
+err_put_net:
+ put_net(gss_auth->net);
err_free:
+ kfree(gss_auth->target_name);
kfree(gss_auth);
out_dec:
module_put(THIS_MODULE);
@@ -946,10 +1033,11 @@ out_dec:
static void
gss_free(struct gss_auth *gss_auth)
{
- gss_pipes_dentries_destroy_net(gss_auth->client, &gss_auth->rpc_auth);
- rpc_destroy_pipe_data(gss_auth->pipe[0]);
- rpc_destroy_pipe_data(gss_auth->pipe[1]);
+ gss_pipe_free(gss_auth->gss_pipe[0]);
+ gss_pipe_free(gss_auth->gss_pipe[1]);
gss_mech_put(gss_auth->mech);
+ put_net(gss_auth->net);
+ kfree(gss_auth->target_name);
kfree(gss_auth);
module_put(THIS_MODULE);
@@ -966,18 +1054,113 @@ gss_free_callback(struct kref *kref)
static void
gss_destroy(struct rpc_auth *auth)
{
- struct gss_auth *gss_auth;
+ struct gss_auth *gss_auth = container_of(auth,
+ struct gss_auth, rpc_auth);
dprintk("RPC: destroying GSS authenticator %p flavor %d\n",
auth, auth->au_flavor);
+ if (hash_hashed(&gss_auth->hash)) {
+ spin_lock(&gss_auth_hash_lock);
+ hash_del(&gss_auth->hash);
+ spin_unlock(&gss_auth_hash_lock);
+ }
+
+ gss_pipe_free(gss_auth->gss_pipe[0]);
+ gss_auth->gss_pipe[0] = NULL;
+ gss_pipe_free(gss_auth->gss_pipe[1]);
+ gss_auth->gss_pipe[1] = NULL;
rpcauth_destroy_credcache(auth);
- gss_auth = container_of(auth, struct gss_auth, rpc_auth);
kref_put(&gss_auth->kref, gss_free_callback);
}
/*
+ * Auths may be shared between rpc clients that were cloned from a
+ * common client with the same xprt, if they also share the flavor and
+ * target_name.
+ *
+ * The auth is looked up from the oldest parent sharing the same
+ * cl_xprt, and the auth itself references only that common parent
+ * (which is guaranteed to last as long as any of its descendants).
+ */
+static struct gss_auth *
+gss_auth_find_or_add_hashed(struct rpc_auth_create_args *args,
+ struct rpc_clnt *clnt,
+ struct gss_auth *new)
+{
+ struct gss_auth *gss_auth;
+ unsigned long hashval = (unsigned long)clnt;
+
+ spin_lock(&gss_auth_hash_lock);
+ hash_for_each_possible(gss_auth_hash_table,
+ gss_auth,
+ hash,
+ hashval) {
+ if (gss_auth->client != clnt)
+ continue;
+ if (gss_auth->rpc_auth.au_flavor != args->pseudoflavor)
+ continue;
+ if (gss_auth->target_name != args->target_name) {
+ if (gss_auth->target_name == NULL)
+ continue;
+ if (args->target_name == NULL)
+ continue;
+ if (strcmp(gss_auth->target_name, args->target_name))
+ continue;
+ }
+ if (!atomic_inc_not_zero(&gss_auth->rpc_auth.au_count))
+ continue;
+ goto out;
+ }
+ if (new)
+ hash_add(gss_auth_hash_table, &new->hash, hashval);
+ gss_auth = new;
+out:
+ spin_unlock(&gss_auth_hash_lock);
+ return gss_auth;
+}
+
+static struct gss_auth *
+gss_create_hashed(struct rpc_auth_create_args *args, struct rpc_clnt *clnt)
+{
+ struct gss_auth *gss_auth;
+ struct gss_auth *new;
+
+ gss_auth = gss_auth_find_or_add_hashed(args, clnt, NULL);
+ if (gss_auth != NULL)
+ goto out;
+ new = gss_create_new(args, clnt);
+ if (IS_ERR(new))
+ return new;
+ gss_auth = gss_auth_find_or_add_hashed(args, clnt, new);
+ if (gss_auth != new)
+ gss_destroy(&new->rpc_auth);
+out:
+ return gss_auth;
+}
+
+static struct rpc_auth *
+gss_create(struct rpc_auth_create_args *args, struct rpc_clnt *clnt)
+{
+ struct gss_auth *gss_auth;
+ struct rpc_xprt *xprt = rcu_access_pointer(clnt->cl_xprt);
+
+ while (clnt != clnt->cl_parent) {
+ struct rpc_clnt *parent = clnt->cl_parent;
+ /* Find the original parent for this transport */
+ if (rcu_access_pointer(parent->cl_xprt) != xprt)
+ break;
+ clnt = parent;
+ }
+
+ gss_auth = gss_create_hashed(args, clnt);
+ if (IS_ERR(gss_auth))
+ return ERR_CAST(gss_auth);
+ return &gss_auth->rpc_auth;
+}
+
+/*
* gss_destroying_context will cause the RPCSEC_GSS to send a NULL RPC call
* to the server with the GSS control procedure field set to
* RPC_GSS_PROC_DESTROY. This should normally cause the server to release
@@ -1126,10 +1309,32 @@ gss_cred_init(struct rpc_auth *auth, struct rpc_cred *cred)
return err;
}
+/*
+ * Returns -EACCES if GSS context is NULL or will expire within the
+ * timeout (miliseconds)
+ */
+static int
+gss_key_timeout(struct rpc_cred *rc)
+{
+ struct gss_cred *gss_cred = container_of(rc, struct gss_cred, gc_base);
+ unsigned long now = jiffies;
+ unsigned long expire;
+
+ if (gss_cred->gc_ctx == NULL)
+ return -EACCES;
+
+ expire = gss_cred->gc_ctx->gc_expiry - (gss_key_expire_timeo * HZ);
+
+ if (time_after(now, expire))
+ return -EACCES;
+ return 0;
+}
+
static int
gss_match(struct auth_cred *acred, struct rpc_cred *rc, int flags)
{
struct gss_cred *gss_cred = container_of(rc, struct gss_cred, gc_base);
+ int ret;
if (test_bit(RPCAUTH_CRED_NEW, &rc->cr_flags))
goto out;
@@ -1142,11 +1347,26 @@ out:
if (acred->principal != NULL) {
if (gss_cred->gc_principal == NULL)
return 0;
- return strcmp(acred->principal, gss_cred->gc_principal) == 0;
+ ret = strcmp(acred->principal, gss_cred->gc_principal) == 0;
+ goto check_expire;
}
if (gss_cred->gc_principal != NULL)
return 0;
- return uid_eq(rc->cr_uid, acred->uid);
+ ret = uid_eq(rc->cr_uid, acred->uid);
+
+check_expire:
+ if (ret == 0)
+ return ret;
+
+ /* Notify acred users of GSS context expiration timeout */
+ if (test_bit(RPC_CRED_NOTIFY_TIMEOUT, &acred->ac_flags) &&
+ (gss_key_timeout(rc) != 0)) {
+ /* test will now be done from generic cred */
+ test_and_clear_bit(RPC_CRED_NOTIFY_TIMEOUT, &acred->ac_flags);
+ /* tell NFS layer that key will expire soon */
+ set_bit(RPC_CRED_KEY_EXPIRE_SOON, &acred->ac_flags);
+ }
+ return ret;
}
/*
@@ -1292,6 +1512,7 @@ gss_validate(struct rpc_task *task, __be32 *p)
struct xdr_netobj mic;
u32 flav,len;
u32 maj_stat;
+ __be32 *ret = ERR_PTR(-EIO);
dprintk("RPC: %5u %s\n", task->tk_pid, __func__);
@@ -1307,6 +1528,7 @@ gss_validate(struct rpc_task *task, __be32 *p)
mic.data = (u8 *)p;
mic.len = len;
+ ret = ERR_PTR(-EACCES);
maj_stat = gss_verify_mic(ctx->gc_gss_ctx, &verf_buf, &mic);
if (maj_stat == GSS_S_CONTEXT_EXPIRED)
clear_bit(RPCAUTH_CRED_UPTODATE, &cred->cr_flags);
@@ -1324,8 +1546,9 @@ gss_validate(struct rpc_task *task, __be32 *p)
return p + XDR_QUADLEN(len);
out_bad:
gss_put_ctx(ctx);
- dprintk("RPC: %5u %s failed.\n", task->tk_pid, __func__);
- return NULL;
+ dprintk("RPC: %5u %s failed ret %ld.\n", task->tk_pid, __func__,
+ PTR_ERR(ret));
+ return ret;
}
static void gss_wrap_req_encode(kxdreproc_t encode, struct rpc_rqst *rqstp,
@@ -1657,8 +1880,6 @@ static const struct rpc_authops authgss_ops = {
.destroy = gss_destroy,
.lookup_cred = gss_lookup_cred,
.crcreate = gss_create_cred,
- .pipes_create = gss_pipes_dentries_create,
- .pipes_destroy = gss_pipes_dentries_destroy,
.list_pseudoflavors = gss_mech_list_pseudoflavors,
.info2flavor = gss_mech_info2flavor,
.flavor2info = gss_mech_flavor2info,
@@ -1675,6 +1896,7 @@ static const struct rpc_credops gss_credops = {
.crvalidate = gss_validate,
.crwrap_req = gss_wrap_req,
.crunwrap_resp = gss_unwrap_resp,
+ .crkey_timeout = gss_key_timeout,
};
static const struct rpc_credops gss_nullops = {
@@ -1762,5 +1984,12 @@ module_param_named(expired_cred_retry_delay,
MODULE_PARM_DESC(expired_cred_retry_delay, "Timeout (in seconds) until "
"the RPC engine retries an expired credential");
+module_param_named(key_expire_timeo,
+ gss_key_expire_timeo,
+ uint, 0644);
+MODULE_PARM_DESC(key_expire_timeo, "Time (in seconds) at the end of a "
+ "credential keys lifetime where the NFS layer cleans up "
+ "prior to key expiration");
+
module_init(init_rpcsec_gss)
module_exit(exit_rpcsec_gss)
diff --git a/net/sunrpc/auth_gss/gss_rpc_upcall.c b/net/sunrpc/auth_gss/gss_rpc_upcall.c
index af7ffd447fee..f1eb0d16666c 100644
--- a/net/sunrpc/auth_gss/gss_rpc_upcall.c
+++ b/net/sunrpc/auth_gss/gss_rpc_upcall.c
@@ -213,6 +213,26 @@ static int gssp_call(struct net *net, struct rpc_message *msg)
return status;
}
+static void gssp_free_receive_pages(struct gssx_arg_accept_sec_context *arg)
+{
+ int i;
+
+ for (i = 0; i < arg->npages && arg->pages[i]; i++)
+ __free_page(arg->pages[i]);
+}
+
+static int gssp_alloc_receive_pages(struct gssx_arg_accept_sec_context *arg)
+{
+ arg->npages = DIV_ROUND_UP(NGROUPS_MAX * 4, PAGE_SIZE);
+ arg->pages = kzalloc(arg->npages * sizeof(struct page *), GFP_KERNEL);
+ /*
+ * XXX: actual pages are allocated by xdr layer in
+ * xdr_partial_copy_from_skb.
+ */
+ if (!arg->pages)
+ return -ENOMEM;
+ return 0;
+}
/*
* Public functions
@@ -261,10 +281,16 @@ int gssp_accept_sec_context_upcall(struct net *net,
arg.context_handle = &ctxh;
res.output_token->len = GSSX_max_output_token_sz;
+ ret = gssp_alloc_receive_pages(&arg);
+ if (ret)
+ return ret;
+
/* use nfs/ for targ_name ? */
ret = gssp_call(net, &msg);
+ gssp_free_receive_pages(&arg);
+
/* we need to fetch all data even in case of error so
* that we can free special strctures is they have been allocated */
data->major_status = res.status.major_status;
diff --git a/net/sunrpc/auth_gss/gss_rpc_xdr.c b/net/sunrpc/auth_gss/gss_rpc_xdr.c
index 3c85d1c8a028..f0f78c5f1c7d 100644
--- a/net/sunrpc/auth_gss/gss_rpc_xdr.c
+++ b/net/sunrpc/auth_gss/gss_rpc_xdr.c
@@ -166,14 +166,15 @@ static int dummy_dec_opt_array(struct xdr_stream *xdr,
return 0;
}
-static int get_s32(void **p, void *max, s32 *res)
+static int get_host_u32(struct xdr_stream *xdr, u32 *res)
{
- void *base = *p;
- void *next = (void *)((char *)base + sizeof(s32));
- if (unlikely(next > max || next < base))
+ __be32 *p;
+
+ p = xdr_inline_decode(xdr, 4);
+ if (!p)
return -EINVAL;
- memcpy(res, base, sizeof(s32));
- *p = next;
+ /* Contents of linux creds are all host-endian: */
+ memcpy(res, p, sizeof(u32));
return 0;
}
@@ -182,9 +183,9 @@ static int gssx_dec_linux_creds(struct xdr_stream *xdr,
{
u32 length;
__be32 *p;
- void *q, *end;
- s32 tmp;
- int N, i, err;
+ u32 tmp;
+ u32 N;
+ int i, err;
p = xdr_inline_decode(xdr, 4);
if (unlikely(p == NULL))
@@ -192,33 +193,28 @@ static int gssx_dec_linux_creds(struct xdr_stream *xdr,
length = be32_to_cpup(p);
- /* FIXME: we do not want to use the scratch buffer for this one
- * may need to use functions that allows us to access an io vector
- * directly */
- p = xdr_inline_decode(xdr, length);
- if (unlikely(p == NULL))
+ if (length > (3 + NGROUPS_MAX) * sizeof(u32))
return -ENOSPC;
- q = p;
- end = q + length;
-
/* uid */
- err = get_s32(&q, end, &tmp);
+ err = get_host_u32(xdr, &tmp);
if (err)
return err;
creds->cr_uid = make_kuid(&init_user_ns, tmp);
/* gid */
- err = get_s32(&q, end, &tmp);
+ err = get_host_u32(xdr, &tmp);
if (err)
return err;
creds->cr_gid = make_kgid(&init_user_ns, tmp);
/* number of additional gid's */
- err = get_s32(&q, end, &tmp);
+ err = get_host_u32(xdr, &tmp);
if (err)
return err;
N = tmp;
+ if ((3 + N) * sizeof(u32) != length)
+ return -EINVAL;
creds->cr_group_info = groups_alloc(N);
if (creds->cr_group_info == NULL)
return -ENOMEM;
@@ -226,7 +222,7 @@ static int gssx_dec_linux_creds(struct xdr_stream *xdr,
/* gid's */
for (i = 0; i < N; i++) {
kgid_t kgid;
- err = get_s32(&q, end, &tmp);
+ err = get_host_u32(xdr, &tmp);
if (err)
goto out_free_groups;
err = -EINVAL;
@@ -784,6 +780,9 @@ void gssx_enc_accept_sec_context(struct rpc_rqst *req,
/* arg->options */
err = dummy_enc_opt_array(xdr, &arg->options);
+ xdr_inline_pages(&req->rq_rcv_buf,
+ PAGE_SIZE/2 /* pretty arbitrary */,
+ arg->pages, 0 /* page base */, arg->npages * PAGE_SIZE);
done:
if (err)
dprintk("RPC: gssx_enc_accept_sec_context: %d\n", err);
diff --git a/net/sunrpc/auth_gss/gss_rpc_xdr.h b/net/sunrpc/auth_gss/gss_rpc_xdr.h
index 1c98b27d870c..685a688f3d8a 100644
--- a/net/sunrpc/auth_gss/gss_rpc_xdr.h
+++ b/net/sunrpc/auth_gss/gss_rpc_xdr.h
@@ -147,6 +147,8 @@ struct gssx_arg_accept_sec_context {
struct gssx_cb *input_cb;
u32 ret_deleg_cred;
struct gssx_option_array options;
+ struct page **pages;
+ unsigned int npages;
};
struct gssx_res_accept_sec_context {
@@ -240,7 +242,8 @@ int gssx_dec_accept_sec_context(struct rpc_rqst *rqstp,
2 * GSSX_max_princ_sz + \
8 + 8 + 4 + 4 + 4)
#define GSSX_max_output_token_sz 1024
-#define GSSX_max_creds_sz (4 + 4 + 4 + NGROUPS_MAX * 4)
+/* grouplist not included; we allocate separate pages for that: */
+#define GSSX_max_creds_sz (4 + 4 + 4 /* + NGROUPS_MAX*4 */)
#define GSSX_RES_accept_sec_context_sz (GSSX_default_status_sz + \
GSSX_default_ctx_sz + \
GSSX_max_output_token_sz + \
diff --git a/net/sunrpc/auth_null.c b/net/sunrpc/auth_null.c
index a5c36c01707b..f0ebe07978a2 100644
--- a/net/sunrpc/auth_null.c
+++ b/net/sunrpc/auth_null.c
@@ -18,7 +18,7 @@ static struct rpc_auth null_auth;
static struct rpc_cred null_cred;
static struct rpc_auth *
-nul_create(struct rpc_clnt *clnt, rpc_authflavor_t flavor)
+nul_create(struct rpc_auth_create_args *args, struct rpc_clnt *clnt)
{
atomic_inc(&null_auth.au_count);
return &null_auth;
@@ -88,13 +88,13 @@ nul_validate(struct rpc_task *task, __be32 *p)
flavor = ntohl(*p++);
if (flavor != RPC_AUTH_NULL) {
printk("RPC: bad verf flavor: %u\n", flavor);
- return NULL;
+ return ERR_PTR(-EIO);
}
size = ntohl(*p++);
if (size != 0) {
printk("RPC: bad verf size: %u\n", size);
- return NULL;
+ return ERR_PTR(-EIO);
}
return p;
diff --git a/net/sunrpc/auth_unix.c b/net/sunrpc/auth_unix.c
index dc37021fc3e5..d5d692366294 100644
--- a/net/sunrpc/auth_unix.c
+++ b/net/sunrpc/auth_unix.c
@@ -33,7 +33,7 @@ static struct rpc_auth unix_auth;
static const struct rpc_credops unix_credops;
static struct rpc_auth *
-unx_create(struct rpc_clnt *clnt, rpc_authflavor_t flavor)
+unx_create(struct rpc_auth_create_args *args, struct rpc_clnt *clnt)
{
dprintk("RPC: creating UNIX authenticator for client %p\n",
clnt);
@@ -192,13 +192,13 @@ unx_validate(struct rpc_task *task, __be32 *p)
flavor != RPC_AUTH_UNIX &&
flavor != RPC_AUTH_SHORT) {
printk("RPC: bad verf flavor: %u\n", flavor);
- return NULL;
+ return ERR_PTR(-EIO);
}
size = ntohl(*p++);
if (size > RPC_MAX_AUTH_SIZE) {
printk("RPC: giant verf size: %u\n", size);
- return NULL;
+ return ERR_PTR(-EIO);
}
task->tk_rqstp->rq_cred->cr_auth->au_rslack = (size >> 2) + 2;
p += (size >> 2);
diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c
index ecbc4e3d83ad..77479606a971 100644
--- a/net/sunrpc/clnt.c
+++ b/net/sunrpc/clnt.c
@@ -102,12 +102,7 @@ static void rpc_unregister_client(struct rpc_clnt *clnt)
static void __rpc_clnt_remove_pipedir(struct rpc_clnt *clnt)
{
- if (clnt->cl_dentry) {
- if (clnt->cl_auth && clnt->cl_auth->au_ops->pipes_destroy)
- clnt->cl_auth->au_ops->pipes_destroy(clnt->cl_auth);
- rpc_remove_client_dir(clnt->cl_dentry);
- }
- clnt->cl_dentry = NULL;
+ rpc_remove_client_dir(clnt);
}
static void rpc_clnt_remove_pipedir(struct rpc_clnt *clnt)
@@ -123,10 +118,10 @@ static void rpc_clnt_remove_pipedir(struct rpc_clnt *clnt)
}
static struct dentry *rpc_setup_pipedir_sb(struct super_block *sb,
- struct rpc_clnt *clnt,
- const char *dir_name)
+ struct rpc_clnt *clnt)
{
static uint32_t clntid;
+ const char *dir_name = clnt->cl_program->pipe_dir_name;
char name[15];
struct dentry *dir, *dentry;
@@ -153,28 +148,35 @@ static struct dentry *rpc_setup_pipedir_sb(struct super_block *sb,
}
static int
-rpc_setup_pipedir(struct rpc_clnt *clnt, const char *dir_name,
- struct super_block *pipefs_sb)
+rpc_setup_pipedir(struct super_block *pipefs_sb, struct rpc_clnt *clnt)
{
struct dentry *dentry;
- clnt->cl_dentry = NULL;
- if (dir_name == NULL)
- return 0;
- dentry = rpc_setup_pipedir_sb(pipefs_sb, clnt, dir_name);
- if (IS_ERR(dentry))
- return PTR_ERR(dentry);
- clnt->cl_dentry = dentry;
+ if (clnt->cl_program->pipe_dir_name != NULL) {
+ dentry = rpc_setup_pipedir_sb(pipefs_sb, clnt);
+ if (IS_ERR(dentry))
+ return PTR_ERR(dentry);
+ }
return 0;
}
-static inline int rpc_clnt_skip_event(struct rpc_clnt *clnt, unsigned long event)
+static int rpc_clnt_skip_event(struct rpc_clnt *clnt, unsigned long event)
{
- if (((event == RPC_PIPEFS_MOUNT) && clnt->cl_dentry) ||
- ((event == RPC_PIPEFS_UMOUNT) && !clnt->cl_dentry))
- return 1;
- if ((event == RPC_PIPEFS_MOUNT) && atomic_read(&clnt->cl_count) == 0)
+ if (clnt->cl_program->pipe_dir_name == NULL)
return 1;
+
+ switch (event) {
+ case RPC_PIPEFS_MOUNT:
+ if (clnt->cl_pipedir_objects.pdh_dentry != NULL)
+ return 1;
+ if (atomic_read(&clnt->cl_count) == 0)
+ return 1;
+ break;
+ case RPC_PIPEFS_UMOUNT:
+ if (clnt->cl_pipedir_objects.pdh_dentry == NULL)
+ return 1;
+ break;
+ }
return 0;
}
@@ -186,18 +188,11 @@ static int __rpc_clnt_handle_event(struct rpc_clnt *clnt, unsigned long event,
switch (event) {
case RPC_PIPEFS_MOUNT:
- dentry = rpc_setup_pipedir_sb(sb, clnt,
- clnt->cl_program->pipe_dir_name);
+ dentry = rpc_setup_pipedir_sb(sb, clnt);
if (!dentry)
return -ENOENT;
if (IS_ERR(dentry))
return PTR_ERR(dentry);
- clnt->cl_dentry = dentry;
- if (clnt->cl_auth->au_ops->pipes_create) {
- err = clnt->cl_auth->au_ops->pipes_create(clnt->cl_auth);
- if (err)
- __rpc_clnt_remove_pipedir(clnt);
- }
break;
case RPC_PIPEFS_UMOUNT:
__rpc_clnt_remove_pipedir(clnt);
@@ -230,8 +225,6 @@ static struct rpc_clnt *rpc_get_client_for_event(struct net *net, int event)
spin_lock(&sn->rpc_client_lock);
list_for_each_entry(clnt, &sn->all_clients, cl_clients) {
- if (clnt->cl_program->pipe_dir_name == NULL)
- continue;
if (rpc_clnt_skip_event(clnt, event))
continue;
spin_unlock(&sn->rpc_client_lock);
@@ -282,7 +275,10 @@ static void rpc_clnt_set_nodename(struct rpc_clnt *clnt, const char *nodename)
static int rpc_client_register(const struct rpc_create_args *args,
struct rpc_clnt *clnt)
{
- const struct rpc_program *program = args->program;
+ struct rpc_auth_create_args auth_args = {
+ .pseudoflavor = args->authflavor,
+ .target_name = args->client_name,
+ };
struct rpc_auth *auth;
struct net *net = rpc_net_ns(clnt);
struct super_block *pipefs_sb;
@@ -290,7 +286,7 @@ static int rpc_client_register(const struct rpc_create_args *args,
pipefs_sb = rpc_get_sb_net(net);
if (pipefs_sb) {
- err = rpc_setup_pipedir(clnt, program->pipe_dir_name, pipefs_sb);
+ err = rpc_setup_pipedir(pipefs_sb, clnt);
if (err)
goto out;
}
@@ -299,7 +295,7 @@ static int rpc_client_register(const struct rpc_create_args *args,
if (pipefs_sb)
rpc_put_sb_net(net);
- auth = rpcauth_create(args->authflavor, clnt);
+ auth = rpcauth_create(&auth_args, clnt);
if (IS_ERR(auth)) {
dprintk("RPC: Couldn't create auth handle (flavor %u)\n",
args->authflavor);
@@ -317,7 +313,27 @@ out:
return err;
}
-static struct rpc_clnt * rpc_new_client(const struct rpc_create_args *args, struct rpc_xprt *xprt)
+static DEFINE_IDA(rpc_clids);
+
+static int rpc_alloc_clid(struct rpc_clnt *clnt)
+{
+ int clid;
+
+ clid = ida_simple_get(&rpc_clids, 0, 0, GFP_KERNEL);
+ if (clid < 0)
+ return clid;
+ clnt->cl_clid = clid;
+ return 0;
+}
+
+static void rpc_free_clid(struct rpc_clnt *clnt)
+{
+ ida_simple_remove(&rpc_clids, clnt->cl_clid);
+}
+
+static struct rpc_clnt * rpc_new_client(const struct rpc_create_args *args,
+ struct rpc_xprt *xprt,
+ struct rpc_clnt *parent)
{
const struct rpc_program *program = args->program;
const struct rpc_version *version;
@@ -343,16 +359,20 @@ static struct rpc_clnt * rpc_new_client(const struct rpc_create_args *args, stru
clnt = kzalloc(sizeof(*clnt), GFP_KERNEL);
if (!clnt)
goto out_err;
- clnt->cl_parent = clnt;
+ clnt->cl_parent = parent ? : clnt;
+
+ err = rpc_alloc_clid(clnt);
+ if (err)
+ goto out_no_clid;
rcu_assign_pointer(clnt->cl_xprt, xprt);
clnt->cl_procinfo = version->procs;
clnt->cl_maxproc = version->nrprocs;
- clnt->cl_protname = program->name;
clnt->cl_prog = args->prognumber ? : program->number;
clnt->cl_vers = version->number;
clnt->cl_stats = program->stats;
clnt->cl_metrics = rpc_alloc_iostats(clnt);
+ rpc_init_pipe_dir_head(&clnt->cl_pipedir_objects);
err = -ENOMEM;
if (clnt->cl_metrics == NULL)
goto out_no_stats;
@@ -372,12 +392,6 @@ static struct rpc_clnt * rpc_new_client(const struct rpc_create_args *args, stru
clnt->cl_rtt = &clnt->cl_rtt_default;
rpc_init_rtt(&clnt->cl_rtt_default, clnt->cl_timeout->to_initval);
- clnt->cl_principal = NULL;
- if (args->client_name) {
- clnt->cl_principal = kstrdup(args->client_name, GFP_KERNEL);
- if (!clnt->cl_principal)
- goto out_no_principal;
- }
atomic_set(&clnt->cl_count, 1);
@@ -387,13 +401,15 @@ static struct rpc_clnt * rpc_new_client(const struct rpc_create_args *args, stru
err = rpc_client_register(args, clnt);
if (err)
goto out_no_path;
+ if (parent)
+ atomic_inc(&parent->cl_count);
return clnt;
out_no_path:
- kfree(clnt->cl_principal);
-out_no_principal:
rpc_free_iostats(clnt->cl_metrics);
out_no_stats:
+ rpc_free_clid(clnt);
+out_no_clid:
kfree(clnt);
out_err:
rpciod_down();
@@ -479,7 +495,7 @@ struct rpc_clnt *rpc_create(struct rpc_create_args *args)
if (args->flags & RPC_CLNT_CREATE_NONPRIVPORT)
xprt->resvport = 0;
- clnt = rpc_new_client(args, xprt);
+ clnt = rpc_new_client(args, xprt, NULL);
if (IS_ERR(clnt))
return clnt;
@@ -526,15 +542,12 @@ static struct rpc_clnt *__rpc_clone_client(struct rpc_create_args *args,
goto out_err;
args->servername = xprt->servername;
- new = rpc_new_client(args, xprt);
+ new = rpc_new_client(args, xprt, clnt);
if (IS_ERR(new)) {
err = PTR_ERR(new);
goto out_err;
}
- atomic_inc(&clnt->cl_count);
- new->cl_parent = clnt;
-
/* Turn off autobind on clones */
new->cl_autobind = 0;
new->cl_softrtry = clnt->cl_softrtry;
@@ -561,7 +574,6 @@ struct rpc_clnt *rpc_clone_client(struct rpc_clnt *clnt)
.prognumber = clnt->cl_prog,
.version = clnt->cl_vers,
.authflavor = clnt->cl_auth->au_flavor,
- .client_name = clnt->cl_principal,
};
return __rpc_clone_client(&args, clnt);
}
@@ -583,7 +595,6 @@ rpc_clone_client_set_auth(struct rpc_clnt *clnt, rpc_authflavor_t flavor)
.prognumber = clnt->cl_prog,
.version = clnt->cl_vers,
.authflavor = flavor,
- .client_name = clnt->cl_principal,
};
return __rpc_clone_client(&args, clnt);
}
@@ -629,7 +640,7 @@ void rpc_shutdown_client(struct rpc_clnt *clnt)
might_sleep();
dprintk_rcu("RPC: shutting down %s client for %s\n",
- clnt->cl_protname,
+ clnt->cl_program->name,
rcu_dereference(clnt->cl_xprt)->servername);
while (!list_empty(&clnt->cl_tasks)) {
@@ -649,17 +660,17 @@ static void
rpc_free_client(struct rpc_clnt *clnt)
{
dprintk_rcu("RPC: destroying %s client for %s\n",
- clnt->cl_protname,
+ clnt->cl_program->name,
rcu_dereference(clnt->cl_xprt)->servername);
if (clnt->cl_parent != clnt)
rpc_release_client(clnt->cl_parent);
rpc_clnt_remove_pipedir(clnt);
rpc_unregister_client(clnt);
rpc_free_iostats(clnt->cl_metrics);
- kfree(clnt->cl_principal);
clnt->cl_metrics = NULL;
xprt_put(rcu_dereference_raw(clnt->cl_xprt));
rpciod_down();
+ rpc_free_clid(clnt);
kfree(clnt);
}
@@ -720,7 +731,6 @@ struct rpc_clnt *rpc_bind_new_program(struct rpc_clnt *old,
.prognumber = program->number,
.version = vers,
.authflavor = old->cl_auth->au_flavor,
- .client_name = old->cl_principal,
};
struct rpc_clnt *clnt;
int err;
@@ -1299,7 +1309,7 @@ call_start(struct rpc_task *task)
struct rpc_clnt *clnt = task->tk_client;
dprintk("RPC: %5u call_start %s%d proc %s (%s)\n", task->tk_pid,
- clnt->cl_protname, clnt->cl_vers,
+ clnt->cl_program->name, clnt->cl_vers,
rpc_proc_name(task),
(RPC_IS_ASYNC(task) ? "async" : "sync"));
@@ -1423,9 +1433,9 @@ call_refreshresult(struct rpc_task *task)
return;
case -ETIMEDOUT:
rpc_delay(task, 3*HZ);
- case -EKEYEXPIRED:
case -EAGAIN:
status = -EACCES;
+ case -EKEYEXPIRED:
if (!task->tk_cred_retry)
break;
task->tk_cred_retry--;
@@ -1912,7 +1922,7 @@ call_status(struct rpc_task *task)
default:
if (clnt->cl_chatty)
printk("%s: RPC call returned error %d\n",
- clnt->cl_protname, -status);
+ clnt->cl_program->name, -status);
rpc_exit(task, status);
}
}
@@ -1943,7 +1953,7 @@ call_timeout(struct rpc_task *task)
if (clnt->cl_chatty) {
rcu_read_lock();
printk(KERN_NOTICE "%s: server %s not responding, timed out\n",
- clnt->cl_protname,
+ clnt->cl_program->name,
rcu_dereference(clnt->cl_xprt)->servername);
rcu_read_unlock();
}
@@ -1959,7 +1969,7 @@ call_timeout(struct rpc_task *task)
if (clnt->cl_chatty) {
rcu_read_lock();
printk(KERN_NOTICE "%s: server %s not responding, still trying\n",
- clnt->cl_protname,
+ clnt->cl_program->name,
rcu_dereference(clnt->cl_xprt)->servername);
rcu_read_unlock();
}
@@ -1994,7 +2004,7 @@ call_decode(struct rpc_task *task)
if (clnt->cl_chatty) {
rcu_read_lock();
printk(KERN_NOTICE "%s: server %s OK\n",
- clnt->cl_protname,
+ clnt->cl_program->name,
rcu_dereference(clnt->cl_xprt)->servername);
rcu_read_unlock();
}
@@ -2019,7 +2029,7 @@ call_decode(struct rpc_task *task)
goto out_retry;
}
dprintk("RPC: %s: too small RPC reply size (%d bytes)\n",
- clnt->cl_protname, task->tk_status);
+ clnt->cl_program->name, task->tk_status);
task->tk_action = call_timeout;
goto out_retry;
}
@@ -2091,7 +2101,8 @@ rpc_verify_header(struct rpc_task *task)
dprintk("RPC: %5u %s: XDR representation not a multiple of"
" 4 bytes: 0x%x\n", task->tk_pid, __func__,
task->tk_rqstp->rq_rcv_buf.len);
- goto out_eio;
+ error = -EIO;
+ goto out_err;
}
if ((len -= 3) < 0)
goto out_overflow;
@@ -2100,6 +2111,7 @@ rpc_verify_header(struct rpc_task *task)
if ((n = ntohl(*p++)) != RPC_REPLY) {
dprintk("RPC: %5u %s: not an RPC reply: %x\n",
task->tk_pid, __func__, n);
+ error = -EIO;
goto out_garbage;
}
@@ -2118,7 +2130,8 @@ rpc_verify_header(struct rpc_task *task)
dprintk("RPC: %5u %s: RPC call rejected, "
"unknown error: %x\n",
task->tk_pid, __func__, n);
- goto out_eio;
+ error = -EIO;
+ goto out_err;
}
if (--len < 0)
goto out_overflow;
@@ -2163,9 +2176,11 @@ rpc_verify_header(struct rpc_task *task)
task->tk_pid, __func__, n);
goto out_err;
}
- if (!(p = rpcauth_checkverf(task, p))) {
- dprintk("RPC: %5u %s: auth check failed\n",
- task->tk_pid, __func__);
+ p = rpcauth_checkverf(task, p);
+ if (IS_ERR(p)) {
+ error = PTR_ERR(p);
+ dprintk("RPC: %5u %s: auth check failed with %d\n",
+ task->tk_pid, __func__, error);
goto out_garbage; /* bad verifier, retry */
}
len = p - (__be32 *)iov->iov_base - 1;
@@ -2218,8 +2233,6 @@ out_garbage:
out_retry:
return ERR_PTR(-EAGAIN);
}
-out_eio:
- error = -EIO;
out_err:
rpc_exit(task, error);
dprintk("RPC: %5u %s: call failed with error %d\n", task->tk_pid,
@@ -2291,7 +2304,7 @@ static void rpc_show_task(const struct rpc_clnt *clnt,
printk(KERN_INFO "%5u %04x %6d %8p %8p %8ld %8p %sv%u %s a:%ps q:%s\n",
task->tk_pid, task->tk_flags, task->tk_status,
clnt, task->tk_rqstp, task->tk_timeout, task->tk_ops,
- clnt->cl_protname, clnt->cl_vers, rpc_proc_name(task),
+ clnt->cl_program->name, clnt->cl_vers, rpc_proc_name(task),
task->tk_action, rpc_waitq);
}
diff --git a/net/sunrpc/rpc_pipe.c b/net/sunrpc/rpc_pipe.c
index 406859cc68aa..f94567b45bb3 100644
--- a/net/sunrpc/rpc_pipe.c
+++ b/net/sunrpc/rpc_pipe.c
@@ -409,7 +409,7 @@ rpc_show_info(struct seq_file *m, void *v)
rcu_read_lock();
seq_printf(m, "RPC server: %s\n",
rcu_dereference(clnt->cl_xprt)->servername);
- seq_printf(m, "service: %s (%d) version %d\n", clnt->cl_protname,
+ seq_printf(m, "service: %s (%d) version %d\n", clnt->cl_program->name,
clnt->cl_prog, clnt->cl_vers);
seq_printf(m, "address: %s\n", rpc_peeraddr2str(clnt, RPC_DISPLAY_ADDR));
seq_printf(m, "protocol: %s\n", rpc_peeraddr2str(clnt, RPC_DISPLAY_PROTO));
@@ -480,23 +480,6 @@ static const struct dentry_operations rpc_dentry_operations = {
.d_delete = rpc_delete_dentry,
};
-/*
- * Lookup the data. This is trivial - if the dentry didn't already
- * exist, we know it is negative.
- */
-static struct dentry *
-rpc_lookup(struct inode *dir, struct dentry *dentry, unsigned int flags)
-{
- if (dentry->d_name.len > NAME_MAX)
- return ERR_PTR(-ENAMETOOLONG);
- d_add(dentry, NULL);
- return NULL;
-}
-
-static const struct inode_operations rpc_dir_inode_operations = {
- .lookup = rpc_lookup,
-};
-
static struct inode *
rpc_get_inode(struct super_block *sb, umode_t mode)
{
@@ -509,7 +492,7 @@ rpc_get_inode(struct super_block *sb, umode_t mode)
switch (mode & S_IFMT) {
case S_IFDIR:
inode->i_fop = &simple_dir_operations;
- inode->i_op = &rpc_dir_inode_operations;
+ inode->i_op = &simple_dir_inode_operations;
inc_nlink(inode);
default:
break;
@@ -901,6 +884,159 @@ rpc_unlink(struct dentry *dentry)
}
EXPORT_SYMBOL_GPL(rpc_unlink);
+/**
+ * rpc_init_pipe_dir_head - initialise a struct rpc_pipe_dir_head
+ * @pdh: pointer to struct rpc_pipe_dir_head
+ */
+void rpc_init_pipe_dir_head(struct rpc_pipe_dir_head *pdh)
+{
+ INIT_LIST_HEAD(&pdh->pdh_entries);
+ pdh->pdh_dentry = NULL;
+}
+EXPORT_SYMBOL_GPL(rpc_init_pipe_dir_head);
+
+/**
+ * rpc_init_pipe_dir_object - initialise a struct rpc_pipe_dir_object
+ * @pdo: pointer to struct rpc_pipe_dir_object
+ * @pdo_ops: pointer to const struct rpc_pipe_dir_object_ops
+ * @pdo_data: pointer to caller-defined data
+ */
+void rpc_init_pipe_dir_object(struct rpc_pipe_dir_object *pdo,
+ const struct rpc_pipe_dir_object_ops *pdo_ops,
+ void *pdo_data)
+{
+ INIT_LIST_HEAD(&pdo->pdo_head);
+ pdo->pdo_ops = pdo_ops;
+ pdo->pdo_data = pdo_data;
+}
+EXPORT_SYMBOL_GPL(rpc_init_pipe_dir_object);
+
+static int
+rpc_add_pipe_dir_object_locked(struct net *net,
+ struct rpc_pipe_dir_head *pdh,
+ struct rpc_pipe_dir_object *pdo)
+{
+ int ret = 0;
+
+ if (pdh->pdh_dentry)
+ ret = pdo->pdo_ops->create(pdh->pdh_dentry, pdo);
+ if (ret == 0)
+ list_add_tail(&pdo->pdo_head, &pdh->pdh_entries);
+ return ret;
+}
+
+static void
+rpc_remove_pipe_dir_object_locked(struct net *net,
+ struct rpc_pipe_dir_head *pdh,
+ struct rpc_pipe_dir_object *pdo)
+{
+ if (pdh->pdh_dentry)
+ pdo->pdo_ops->destroy(pdh->pdh_dentry, pdo);
+ list_del_init(&pdo->pdo_head);
+}
+
+/**
+ * rpc_add_pipe_dir_object - associate a rpc_pipe_dir_object to a directory
+ * @net: pointer to struct net
+ * @pdh: pointer to struct rpc_pipe_dir_head
+ * @pdo: pointer to struct rpc_pipe_dir_object
+ *
+ */
+int
+rpc_add_pipe_dir_object(struct net *net,
+ struct rpc_pipe_dir_head *pdh,
+ struct rpc_pipe_dir_object *pdo)
+{
+ int ret = 0;
+
+ if (list_empty(&pdo->pdo_head)) {
+ struct sunrpc_net *sn = net_generic(net, sunrpc_net_id);
+
+ mutex_lock(&sn->pipefs_sb_lock);
+ ret = rpc_add_pipe_dir_object_locked(net, pdh, pdo);
+ mutex_unlock(&sn->pipefs_sb_lock);
+ }
+ return ret;
+}
+EXPORT_SYMBOL_GPL(rpc_add_pipe_dir_object);
+
+/**
+ * rpc_remove_pipe_dir_object - remove a rpc_pipe_dir_object from a directory
+ * @net: pointer to struct net
+ * @pdh: pointer to struct rpc_pipe_dir_head
+ * @pdo: pointer to struct rpc_pipe_dir_object
+ *
+ */
+void
+rpc_remove_pipe_dir_object(struct net *net,
+ struct rpc_pipe_dir_head *pdh,
+ struct rpc_pipe_dir_object *pdo)
+{
+ if (!list_empty(&pdo->pdo_head)) {
+ struct sunrpc_net *sn = net_generic(net, sunrpc_net_id);
+
+ mutex_lock(&sn->pipefs_sb_lock);
+ rpc_remove_pipe_dir_object_locked(net, pdh, pdo);
+ mutex_unlock(&sn->pipefs_sb_lock);
+ }
+}
+EXPORT_SYMBOL_GPL(rpc_remove_pipe_dir_object);
+
+/**
+ * rpc_find_or_alloc_pipe_dir_object
+ * @net: pointer to struct net
+ * @pdh: pointer to struct rpc_pipe_dir_head
+ * @match: match struct rpc_pipe_dir_object to data
+ * @alloc: allocate a new struct rpc_pipe_dir_object
+ * @data: user defined data for match() and alloc()
+ *
+ */
+struct rpc_pipe_dir_object *
+rpc_find_or_alloc_pipe_dir_object(struct net *net,
+ struct rpc_pipe_dir_head *pdh,
+ int (*match)(struct rpc_pipe_dir_object *, void *),
+ struct rpc_pipe_dir_object *(*alloc)(void *),
+ void *data)
+{
+ struct sunrpc_net *sn = net_generic(net, sunrpc_net_id);
+ struct rpc_pipe_dir_object *pdo;
+
+ mutex_lock(&sn->pipefs_sb_lock);
+ list_for_each_entry(pdo, &pdh->pdh_entries, pdo_head) {
+ if (!match(pdo, data))
+ continue;
+ goto out;
+ }
+ pdo = alloc(data);
+ if (!pdo)
+ goto out;
+ rpc_add_pipe_dir_object_locked(net, pdh, pdo);
+out:
+ mutex_unlock(&sn->pipefs_sb_lock);
+ return pdo;
+}
+EXPORT_SYMBOL_GPL(rpc_find_or_alloc_pipe_dir_object);
+
+static void
+rpc_create_pipe_dir_objects(struct rpc_pipe_dir_head *pdh)
+{
+ struct rpc_pipe_dir_object *pdo;
+ struct dentry *dir = pdh->pdh_dentry;
+
+ list_for_each_entry(pdo, &pdh->pdh_entries, pdo_head)
+ pdo->pdo_ops->create(dir, pdo);
+}
+
+static void
+rpc_destroy_pipe_dir_objects(struct rpc_pipe_dir_head *pdh)
+{
+ struct rpc_pipe_dir_object *pdo;
+ struct dentry *dir = pdh->pdh_dentry;
+
+ list_for_each_entry(pdo, &pdh->pdh_entries, pdo_head)
+ pdo->pdo_ops->destroy(dir, pdo);
+}
+
enum {
RPCAUTH_info,
RPCAUTH_EOF
@@ -941,16 +1077,29 @@ struct dentry *rpc_create_client_dir(struct dentry *dentry,
const char *name,
struct rpc_clnt *rpc_client)
{
- return rpc_mkdir_populate(dentry, name, S_IRUGO | S_IXUGO, NULL,
+ struct dentry *ret;
+
+ ret = rpc_mkdir_populate(dentry, name, S_IRUGO | S_IXUGO, NULL,
rpc_clntdir_populate, rpc_client);
+ if (!IS_ERR(ret)) {
+ rpc_client->cl_pipedir_objects.pdh_dentry = ret;
+ rpc_create_pipe_dir_objects(&rpc_client->cl_pipedir_objects);
+ }
+ return ret;
}
/**
* rpc_remove_client_dir - Remove a directory created with rpc_create_client_dir()
- * @dentry: dentry for the pipe
+ * @rpc_client: rpc_client for the pipe
*/
-int rpc_remove_client_dir(struct dentry *dentry)
+int rpc_remove_client_dir(struct rpc_clnt *rpc_client)
{
+ struct dentry *dentry = rpc_client->cl_pipedir_objects.pdh_dentry;
+
+ if (dentry == NULL)
+ return 0;
+ rpc_destroy_pipe_dir_objects(&rpc_client->cl_pipedir_objects);
+ rpc_client->cl_pipedir_objects.pdh_dentry = NULL;
return rpc_rmdir_depopulate(dentry, rpc_clntdir_depopulate);
}
diff --git a/net/sunrpc/sched.c b/net/sunrpc/sched.c
index 93a7a4e94d80..ff3cc4bf4b24 100644
--- a/net/sunrpc/sched.c
+++ b/net/sunrpc/sched.c
@@ -258,7 +258,7 @@ static int rpc_wait_bit_killable(void *word)
return 0;
}
-#ifdef RPC_DEBUG
+#if defined(RPC_DEBUG) || defined(RPC_TRACEPOINTS)
static void rpc_task_set_debuginfo(struct rpc_task *task)
{
static atomic_t rpc_pid;
diff --git a/net/sunrpc/stats.c b/net/sunrpc/stats.c
index 21b75cb08c03..54530490944e 100644
--- a/net/sunrpc/stats.c
+++ b/net/sunrpc/stats.c
@@ -188,7 +188,7 @@ void rpc_print_iostats(struct seq_file *seq, struct rpc_clnt *clnt)
seq_printf(seq, "\tRPC iostats version: %s ", RPC_IOSTATS_VERS);
seq_printf(seq, "p/v: %u/%u (%s)\n",
- clnt->cl_prog, clnt->cl_vers, clnt->cl_protname);
+ clnt->cl_prog, clnt->cl_vers, clnt->cl_program->name);
rcu_read_lock();
xprt = rcu_dereference(clnt->cl_xprt);
diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c
index 7762b9f8a8b7..9c9caaa5e0d3 100644
--- a/net/sunrpc/svcsock.c
+++ b/net/sunrpc/svcsock.c
@@ -442,7 +442,7 @@ static void svc_tcp_write_space(struct sock *sk)
{
struct socket *sock = sk->sk_socket;
- if (sk_stream_wspace(sk) >= sk_stream_min_wspace(sk) && sock)
+ if (sk_stream_is_writeable(sk) && sock)
clear_bit(SOCK_NOSPACE, &sock->flags);
svc_write_space(sk);
}
diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c
index ddf0602603bd..ee03d35677d9 100644
--- a/net/sunrpc/xprtsock.c
+++ b/net/sunrpc/xprtsock.c
@@ -47,6 +47,8 @@
#include <net/udp.h>
#include <net/tcp.h>
+#include <trace/events/sunrpc.h>
+
#include "sunrpc.h"
static void xs_close(struct rpc_xprt *xprt);
@@ -665,8 +667,10 @@ static void xs_tcp_shutdown(struct rpc_xprt *xprt)
struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt);
struct socket *sock = transport->sock;
- if (sock != NULL)
+ if (sock != NULL) {
kernel_sock_shutdown(sock, SHUT_WR);
+ trace_rpc_socket_shutdown(xprt, sock);
+ }
}
/**
@@ -811,6 +815,7 @@ static void xs_reset_transport(struct sock_xprt *transport)
sk->sk_no_check = 0;
+ trace_rpc_socket_close(&transport->xprt, sock);
sock_release(sock);
}
@@ -1492,6 +1497,7 @@ static void xs_tcp_state_change(struct sock *sk)
sock_flag(sk, SOCK_ZAPPED),
sk->sk_shutdown);
+ trace_rpc_socket_state_change(xprt, sk->sk_socket);
switch (sk->sk_state) {
case TCP_ESTABLISHED:
spin_lock(&xprt->transport_lock);
@@ -1602,7 +1608,7 @@ static void xs_tcp_write_space(struct sock *sk)
read_lock_bh(&sk->sk_callback_lock);
/* from net/core/stream.c:sk_stream_write_space */
- if (sk_stream_wspace(sk) >= sk_stream_min_wspace(sk))
+ if (sk_stream_is_writeable(sk))
xs_write_space(sk);
read_unlock_bh(&sk->sk_callback_lock);
@@ -1896,6 +1902,7 @@ static int xs_local_setup_socket(struct sock_xprt *transport)
xprt, xprt->address_strings[RPC_DISPLAY_ADDR]);
status = xs_local_finish_connecting(xprt, sock);
+ trace_rpc_socket_connect(xprt, sock, status);
switch (status) {
case 0:
dprintk("RPC: xprt %p connected to %s\n",
@@ -2039,6 +2046,7 @@ static void xs_udp_setup_socket(struct work_struct *work)
xprt->address_strings[RPC_DISPLAY_PORT]);
xs_udp_finish_connecting(xprt, sock);
+ trace_rpc_socket_connect(xprt, sock, 0);
status = 0;
out:
xprt_clear_connecting(xprt);
@@ -2064,6 +2072,8 @@ static void xs_abort_connection(struct sock_xprt *transport)
memset(&any, 0, sizeof(any));
any.sa_family = AF_UNSPEC;
result = kernel_connect(transport->sock, &any, sizeof(any), 0);
+ trace_rpc_socket_reset_connection(&transport->xprt,
+ transport->sock, result);
if (!result)
xs_sock_reset_connection_flags(&transport->xprt);
dprintk("RPC: AF_UNSPEC connect return code %d\n", result);
@@ -2194,6 +2204,7 @@ static void xs_tcp_setup_socket(struct work_struct *work)
xprt->address_strings[RPC_DISPLAY_PORT]);
status = xs_tcp_finish_connecting(xprt, sock);
+ trace_rpc_socket_connect(xprt, sock, status);
dprintk("RPC: %p connect status %d connected %d sock state %d\n",
xprt, -status, xprt_connected(xprt),
sock->sk->sk_state);
diff --git a/net/sysctl_net.c b/net/sysctl_net.c
index 9bc6db04be3e..e7000be321b0 100644
--- a/net/sysctl_net.c
+++ b/net/sysctl_net.c
@@ -47,12 +47,12 @@ static int net_ctl_permissions(struct ctl_table_header *head,
/* Allow network administrator to have same access as root. */
if (ns_capable(net->user_ns, CAP_NET_ADMIN) ||
- uid_eq(root_uid, current_uid())) {
+ uid_eq(root_uid, current_euid())) {
int mode = (table->mode >> 6) & 7;
return (mode << 6) | (mode << 3) | mode;
}
/* Allow netns root group to have the same access as the root group */
- if (gid_eq(root_gid, current_gid())) {
+ if (in_egroup_p(root_gid)) {
int mode = (table->mode >> 3) & 7;
return (mode << 3) | mode;
}
diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
index c4ce243824bb..c1f403bed683 100644
--- a/net/unix/af_unix.c
+++ b/net/unix/af_unix.c
@@ -1246,6 +1246,15 @@ static int unix_socketpair(struct socket *socka, struct socket *sockb)
return 0;
}
+static void unix_sock_inherit_flags(const struct socket *old,
+ struct socket *new)
+{
+ if (test_bit(SOCK_PASSCRED, &old->flags))
+ set_bit(SOCK_PASSCRED, &new->flags);
+ if (test_bit(SOCK_PASSSEC, &old->flags))
+ set_bit(SOCK_PASSSEC, &new->flags);
+}
+
static int unix_accept(struct socket *sock, struct socket *newsock, int flags)
{
struct sock *sk = sock->sk;
@@ -1280,6 +1289,7 @@ static int unix_accept(struct socket *sock, struct socket *newsock, int flags)
/* attach accepted sock to socket */
unix_state_lock(tsk);
newsock->state = SS_CONNECTED;
+ unix_sock_inherit_flags(sock, newsock);
sock_graft(tsk, newsock);
unix_state_unlock(tsk);
return 0;
@@ -1479,7 +1489,8 @@ static int unix_dgram_sendmsg(struct kiocb *kiocb, struct socket *sock,
MAX_SKB_FRAGS * PAGE_SIZE);
skb = sock_alloc_send_pskb(sk, len - data_len, data_len,
- msg->msg_flags & MSG_DONTWAIT, &err);
+ msg->msg_flags & MSG_DONTWAIT, &err,
+ PAGE_ALLOC_COSTLY_ORDER);
if (skb == NULL)
goto out;
@@ -1596,6 +1607,10 @@ out:
return err;
}
+/* We use paged skbs for stream sockets, and limit occupancy to 32768
+ * bytes, and a minimun of a full page.
+ */
+#define UNIX_SKB_FRAGS_SZ (PAGE_SIZE << get_order(32768))
static int unix_stream_sendmsg(struct kiocb *kiocb, struct socket *sock,
struct msghdr *msg, size_t len)
@@ -1609,6 +1624,7 @@ static int unix_stream_sendmsg(struct kiocb *kiocb, struct socket *sock,
struct scm_cookie tmp_scm;
bool fds_sent = false;
int max_level;
+ int data_len;
if (NULL == siocb->scm)
siocb->scm = &tmp_scm;
@@ -1635,40 +1651,22 @@ static int unix_stream_sendmsg(struct kiocb *kiocb, struct socket *sock,
goto pipe_err;
while (sent < len) {
- /*
- * Optimisation for the fact that under 0.01% of X
- * messages typically need breaking up.
- */
-
- size = len-sent;
+ size = len - sent;
/* Keep two messages in the pipe so it schedules better */
- if (size > ((sk->sk_sndbuf >> 1) - 64))
- size = (sk->sk_sndbuf >> 1) - 64;
+ size = min_t(int, size, (sk->sk_sndbuf >> 1) - 64);
- if (size > SKB_MAX_ALLOC)
- size = SKB_MAX_ALLOC;
+ /* allow fallback to order-0 allocations */
+ size = min_t(int, size, SKB_MAX_HEAD(0) + UNIX_SKB_FRAGS_SZ);
- /*
- * Grab a buffer
- */
+ data_len = max_t(int, 0, size - SKB_MAX_HEAD(0));
- skb = sock_alloc_send_skb(sk, size, msg->msg_flags&MSG_DONTWAIT,
- &err);
-
- if (skb == NULL)
+ skb = sock_alloc_send_pskb(sk, size - data_len, data_len,
+ msg->msg_flags & MSG_DONTWAIT, &err,
+ get_order(UNIX_SKB_FRAGS_SZ));
+ if (!skb)
goto out_err;
- /*
- * If you pass two values to the sock_alloc_send_skb
- * it tries to grab the large buffer with GFP_NOFS
- * (which can fail easily), and if it fails grab the
- * fallback size buffer which is under a page and will
- * succeed. [Alan]
- */
- size = min_t(int, size, skb_tailroom(skb));
-
-
/* Only send the fds in the first buffer */
err = unix_scm_to_skb(siocb->scm, skb, !fds_sent);
if (err < 0) {
@@ -1678,7 +1676,11 @@ static int unix_stream_sendmsg(struct kiocb *kiocb, struct socket *sock,
max_level = err + 1;
fds_sent = true;
- err = memcpy_fromiovec(skb_put(skb, size), msg->msg_iov, size);
+ skb_put(skb, size - data_len);
+ skb->data_len = data_len;
+ skb->len = size;
+ err = skb_copy_datagram_from_iovec(skb, 0, msg->msg_iov,
+ sent, size);
if (err) {
kfree_skb(skb);
goto out_err;
@@ -1890,6 +1892,11 @@ static long unix_stream_data_wait(struct sock *sk, long timeo,
return timeo;
}
+static unsigned int unix_skb_len(const struct sk_buff *skb)
+{
+ return skb->len - UNIXCB(skb).consumed;
+}
+
static int unix_stream_recvmsg(struct kiocb *iocb, struct socket *sock,
struct msghdr *msg, size_t size,
int flags)
@@ -1977,8 +1984,8 @@ again:
}
skip = sk_peek_offset(sk, flags);
- while (skip >= skb->len) {
- skip -= skb->len;
+ while (skip >= unix_skb_len(skb)) {
+ skip -= unix_skb_len(skb);
last = skb;
skb = skb_peek_next(skb, &sk->sk_receive_queue);
if (!skb)
@@ -2005,8 +2012,9 @@ again:
sunaddr = NULL;
}
- chunk = min_t(unsigned int, skb->len - skip, size);
- if (memcpy_toiovec(msg->msg_iov, skb->data + skip, chunk)) {
+ chunk = min_t(unsigned int, unix_skb_len(skb) - skip, size);
+ if (skb_copy_datagram_iovec(skb, UNIXCB(skb).consumed + skip,
+ msg->msg_iov, chunk)) {
if (copied == 0)
copied = -EFAULT;
break;
@@ -2016,14 +2024,14 @@ again:
/* Mark read part of skb as used */
if (!(flags & MSG_PEEK)) {
- skb_pull(skb, chunk);
+ UNIXCB(skb).consumed += chunk;
sk_peek_offset_bwd(sk, chunk);
if (UNIXCB(skb).fp)
unix_detach_fds(siocb->scm, skb);
- if (skb->len)
+ if (unix_skb_len(skb))
break;
skb_unlink(skb, &sk->sk_receive_queue);
@@ -2107,7 +2115,7 @@ long unix_inq_len(struct sock *sk)
if (sk->sk_type == SOCK_STREAM ||
sk->sk_type == SOCK_SEQPACKET) {
skb_queue_walk(&sk->sk_receive_queue, skb)
- amount += skb->len;
+ amount += unix_skb_len(skb);
} else {
skb = skb_peek(&sk->sk_receive_queue);
if (skb)
diff --git a/net/unix/diag.c b/net/unix/diag.c
index d591091603bf..86fa0f3b2caf 100644
--- a/net/unix/diag.c
+++ b/net/unix/diag.c
@@ -124,6 +124,7 @@ static int sk_diag_fill(struct sock *sk, struct sk_buff *skb, struct unix_diag_r
rep->udiag_family = AF_UNIX;
rep->udiag_type = sk->sk_type;
rep->udiag_state = sk->sk_state;
+ rep->pad = 0;
rep->udiag_ino = sk_ino;
sock_diag_save_cookie(sk, rep->udiag_cookie);
diff --git a/net/vmw_vsock/af_vsock.c b/net/vmw_vsock/af_vsock.c
index 4d9334683f84..545c08b8a1d4 100644
--- a/net/vmw_vsock/af_vsock.c
+++ b/net/vmw_vsock/af_vsock.c
@@ -96,8 +96,7 @@
#include <linux/wait.h>
#include <linux/workqueue.h>
#include <net/sock.h>
-
-#include "af_vsock.h"
+#include <net/af_vsock.h>
static int __vsock_bind(struct sock *sk, struct sockaddr_vm *addr);
static void vsock_sk_destruct(struct sock *sk);
diff --git a/net/vmw_vsock/af_vsock.h b/net/vmw_vsock/af_vsock.h
deleted file mode 100644
index 7d64d3609ec9..000000000000
--- a/net/vmw_vsock/af_vsock.h
+++ /dev/null
@@ -1,175 +0,0 @@
-/*
- * VMware vSockets Driver
- *
- * Copyright (C) 2007-2013 VMware, Inc. All rights reserved.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License as published by the Free
- * Software Foundation version 2 and no later version.
- *
- * This program is distributed in the hope that it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
- * more details.
- */
-
-#ifndef __AF_VSOCK_H__
-#define __AF_VSOCK_H__
-
-#include <linux/kernel.h>
-#include <linux/workqueue.h>
-#include <linux/vm_sockets.h>
-
-#include "vsock_addr.h"
-
-#define LAST_RESERVED_PORT 1023
-
-#define vsock_sk(__sk) ((struct vsock_sock *)__sk)
-#define sk_vsock(__vsk) (&(__vsk)->sk)
-
-struct vsock_sock {
- /* sk must be the first member. */
- struct sock sk;
- struct sockaddr_vm local_addr;
- struct sockaddr_vm remote_addr;
- /* Links for the global tables of bound and connected sockets. */
- struct list_head bound_table;
- struct list_head connected_table;
- /* Accessed without the socket lock held. This means it can never be
- * modified outsided of socket create or destruct.
- */
- bool trusted;
- bool cached_peer_allow_dgram; /* Dgram communication allowed to
- * cached peer?
- */
- u32 cached_peer; /* Context ID of last dgram destination check. */
- const struct cred *owner;
- /* Rest are SOCK_STREAM only. */
- long connect_timeout;
- /* Listening socket that this came from. */
- struct sock *listener;
- /* Used for pending list and accept queue during connection handshake.
- * The listening socket is the head for both lists. Sockets created
- * for connection requests are placed in the pending list until they
- * are connected, at which point they are put in the accept queue list
- * so they can be accepted in accept(). If accept() cannot accept the
- * connection, it is marked as rejected so the cleanup function knows
- * to clean up the socket.
- */
- struct list_head pending_links;
- struct list_head accept_queue;
- bool rejected;
- struct delayed_work dwork;
- u32 peer_shutdown;
- bool sent_request;
- bool ignore_connecting_rst;
-
- /* Private to transport. */
- void *trans;
-};
-
-s64 vsock_stream_has_data(struct vsock_sock *vsk);
-s64 vsock_stream_has_space(struct vsock_sock *vsk);
-void vsock_pending_work(struct work_struct *work);
-struct sock *__vsock_create(struct net *net,
- struct socket *sock,
- struct sock *parent,
- gfp_t priority, unsigned short type);
-
-/**** TRANSPORT ****/
-
-struct vsock_transport_recv_notify_data {
- u64 data1; /* Transport-defined. */
- u64 data2; /* Transport-defined. */
- bool notify_on_block;
-};
-
-struct vsock_transport_send_notify_data {
- u64 data1; /* Transport-defined. */
- u64 data2; /* Transport-defined. */
-};
-
-struct vsock_transport {
- /* Initialize/tear-down socket. */
- int (*init)(struct vsock_sock *, struct vsock_sock *);
- void (*destruct)(struct vsock_sock *);
- void (*release)(struct vsock_sock *);
-
- /* Connections. */
- int (*connect)(struct vsock_sock *);
-
- /* DGRAM. */
- int (*dgram_bind)(struct vsock_sock *, struct sockaddr_vm *);
- int (*dgram_dequeue)(struct kiocb *kiocb, struct vsock_sock *vsk,
- struct msghdr *msg, size_t len, int flags);
- int (*dgram_enqueue)(struct vsock_sock *, struct sockaddr_vm *,
- struct iovec *, size_t len);
- bool (*dgram_allow)(u32 cid, u32 port);
-
- /* STREAM. */
- /* TODO: stream_bind() */
- ssize_t (*stream_dequeue)(struct vsock_sock *, struct iovec *,
- size_t len, int flags);
- ssize_t (*stream_enqueue)(struct vsock_sock *, struct iovec *,
- size_t len);
- s64 (*stream_has_data)(struct vsock_sock *);
- s64 (*stream_has_space)(struct vsock_sock *);
- u64 (*stream_rcvhiwat)(struct vsock_sock *);
- bool (*stream_is_active)(struct vsock_sock *);
- bool (*stream_allow)(u32 cid, u32 port);
-
- /* Notification. */
- int (*notify_poll_in)(struct vsock_sock *, size_t, bool *);
- int (*notify_poll_out)(struct vsock_sock *, size_t, bool *);
- int (*notify_recv_init)(struct vsock_sock *, size_t,
- struct vsock_transport_recv_notify_data *);
- int (*notify_recv_pre_block)(struct vsock_sock *, size_t,
- struct vsock_transport_recv_notify_data *);
- int (*notify_recv_pre_dequeue)(struct vsock_sock *, size_t,
- struct vsock_transport_recv_notify_data *);
- int (*notify_recv_post_dequeue)(struct vsock_sock *, size_t,
- ssize_t, bool, struct vsock_transport_recv_notify_data *);
- int (*notify_send_init)(struct vsock_sock *,
- struct vsock_transport_send_notify_data *);
- int (*notify_send_pre_block)(struct vsock_sock *,
- struct vsock_transport_send_notify_data *);
- int (*notify_send_pre_enqueue)(struct vsock_sock *,
- struct vsock_transport_send_notify_data *);
- int (*notify_send_post_enqueue)(struct vsock_sock *, ssize_t,
- struct vsock_transport_send_notify_data *);
-
- /* Shutdown. */
- int (*shutdown)(struct vsock_sock *, int);
-
- /* Buffer sizes. */
- void (*set_buffer_size)(struct vsock_sock *, u64);
- void (*set_min_buffer_size)(struct vsock_sock *, u64);
- void (*set_max_buffer_size)(struct vsock_sock *, u64);
- u64 (*get_buffer_size)(struct vsock_sock *);
- u64 (*get_min_buffer_size)(struct vsock_sock *);
- u64 (*get_max_buffer_size)(struct vsock_sock *);
-
- /* Addressing. */
- u32 (*get_local_cid)(void);
-};
-
-/**** CORE ****/
-
-int vsock_core_init(const struct vsock_transport *t);
-void vsock_core_exit(void);
-
-/**** UTILS ****/
-
-void vsock_release_pending(struct sock *pending);
-void vsock_add_pending(struct sock *listener, struct sock *pending);
-void vsock_remove_pending(struct sock *listener, struct sock *pending);
-void vsock_enqueue_accept(struct sock *listener, struct sock *connected);
-void vsock_insert_connected(struct vsock_sock *vsk);
-void vsock_remove_bound(struct vsock_sock *vsk);
-void vsock_remove_connected(struct vsock_sock *vsk);
-struct sock *vsock_find_bound_socket(struct sockaddr_vm *addr);
-struct sock *vsock_find_connected_socket(struct sockaddr_vm *src,
- struct sockaddr_vm *dst);
-void vsock_for_each_connected_socket(void (*fn)(struct sock *sk));
-
-#endif /* __AF_VSOCK_H__ */
diff --git a/net/vmw_vsock/vmci_transport.c b/net/vmw_vsock/vmci_transport.c
index ffc11df02af2..9d6986634e0b 100644
--- a/net/vmw_vsock/vmci_transport.c
+++ b/net/vmw_vsock/vmci_transport.c
@@ -34,8 +34,8 @@
#include <linux/wait.h>
#include <linux/workqueue.h>
#include <net/sock.h>
+#include <net/af_vsock.h>
-#include "af_vsock.h"
#include "vmci_transport_notify.h"
static int vmci_transport_recv_dgram_cb(void *data, struct vmci_datagram *dg);
diff --git a/net/vmw_vsock/vmci_transport.h b/net/vmw_vsock/vmci_transport.h
index fd88ea8924e4..ce6c9623d5f0 100644
--- a/net/vmw_vsock/vmci_transport.h
+++ b/net/vmw_vsock/vmci_transport.h
@@ -19,8 +19,8 @@
#include <linux/vmw_vmci_defs.h>
#include <linux/vmw_vmci_api.h>
-#include "vsock_addr.h"
-#include "af_vsock.h"
+#include <net/vsock_addr.h>
+#include <net/af_vsock.h>
/* If the packet format changes in a release then this should change too. */
#define VMCI_TRANSPORT_PACKET_VERSION 1
diff --git a/net/vmw_vsock/vsock_addr.c b/net/vmw_vsock/vsock_addr.c
index ec2611b4ea0e..82486ee55eac 100644
--- a/net/vmw_vsock/vsock_addr.c
+++ b/net/vmw_vsock/vsock_addr.c
@@ -17,8 +17,7 @@
#include <linux/socket.h>
#include <linux/stddef.h>
#include <net/sock.h>
-
-#include "vsock_addr.h"
+#include <net/vsock_addr.h>
void vsock_addr_init(struct sockaddr_vm *addr, u32 cid, u32 port)
{
diff --git a/net/vmw_vsock/vsock_addr.h b/net/vmw_vsock/vsock_addr.h
deleted file mode 100644
index 9ccd5316eac0..000000000000
--- a/net/vmw_vsock/vsock_addr.h
+++ /dev/null
@@ -1,30 +0,0 @@
-/*
- * VMware vSockets Driver
- *
- * Copyright (C) 2007-2013 VMware, Inc. All rights reserved.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License as published by the Free
- * Software Foundation version 2 and no later version.
- *
- * This program is distributed in the hope that it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
- * more details.
- */
-
-#ifndef _VSOCK_ADDR_H_
-#define _VSOCK_ADDR_H_
-
-#include <linux/vm_sockets.h>
-
-void vsock_addr_init(struct sockaddr_vm *addr, u32 cid, u32 port);
-int vsock_addr_validate(const struct sockaddr_vm *addr);
-bool vsock_addr_bound(const struct sockaddr_vm *addr);
-void vsock_addr_unbind(struct sockaddr_vm *addr);
-bool vsock_addr_equals_addr(const struct sockaddr_vm *addr,
- const struct sockaddr_vm *other);
-int vsock_addr_cast(const struct sockaddr *addr, size_t len,
- struct sockaddr_vm **out_addr);
-
-#endif
diff --git a/net/wireless/core.c b/net/wireless/core.c
index a8c29fa4f1b3..aff959e5a1b3 100644
--- a/net/wireless/core.c
+++ b/net/wireless/core.c
@@ -462,6 +462,14 @@ int wiphy_register(struct wiphy *wiphy)
return -EINVAL;
#endif
+ if (WARN_ON(wiphy->coalesce &&
+ (!wiphy->coalesce->n_rules ||
+ !wiphy->coalesce->n_patterns) &&
+ (!wiphy->coalesce->pattern_min_len ||
+ wiphy->coalesce->pattern_min_len >
+ wiphy->coalesce->pattern_max_len)))
+ return -EINVAL;
+
if (WARN_ON(wiphy->ap_sme_capa &&
!(wiphy->flags & WIPHY_FLAG_HAVE_AP_SME)))
return -EINVAL;
@@ -558,18 +566,13 @@ int wiphy_register(struct wiphy *wiphy)
/* check and set up bitrates */
ieee80211_set_bitrate_flags(wiphy);
-
+ rtnl_lock();
res = device_add(&rdev->wiphy.dev);
- if (res)
- return res;
-
- res = rfkill_register(rdev->rfkill);
if (res) {
- device_del(&rdev->wiphy.dev);
+ rtnl_unlock();
return res;
}
- rtnl_lock();
/* set up regulatory info */
wiphy_regulatory_register(wiphy);
@@ -598,6 +601,15 @@ int wiphy_register(struct wiphy *wiphy)
rdev->wiphy.registered = true;
rtnl_unlock();
+
+ res = rfkill_register(rdev->rfkill);
+ if (res) {
+ rfkill_destroy(rdev->rfkill);
+ rdev->rfkill = NULL;
+ wiphy_unregister(&rdev->wiphy);
+ return res;
+ }
+
return 0;
}
EXPORT_SYMBOL(wiphy_register);
@@ -632,7 +644,8 @@ void wiphy_unregister(struct wiphy *wiphy)
rtnl_unlock();
__count == 0; }));
- rfkill_unregister(rdev->rfkill);
+ if (rdev->rfkill)
+ rfkill_unregister(rdev->rfkill);
rtnl_lock();
rdev->wiphy.registered = false;
@@ -668,6 +681,7 @@ void wiphy_unregister(struct wiphy *wiphy)
rdev_set_wakeup(rdev, false);
#endif
cfg80211_rdev_free_wowlan(rdev);
+ cfg80211_rdev_free_coalesce(rdev);
}
EXPORT_SYMBOL(wiphy_unregister);
@@ -944,8 +958,6 @@ static int cfg80211_netdev_notifier_call(struct notifier_block *nb,
case NETDEV_PRE_UP:
if (!(wdev->wiphy->interface_modes & BIT(wdev->iftype)))
return notifier_from_errno(-EOPNOTSUPP);
- if (rfkill_blocked(rdev->rfkill))
- return notifier_from_errno(-ERFKILL);
ret = cfg80211_can_add_interface(rdev, wdev->iftype);
if (ret)
return notifier_from_errno(ret);
diff --git a/net/wireless/core.h b/net/wireless/core.h
index a6b45bf00f33..3159e9c284c5 100644
--- a/net/wireless/core.h
+++ b/net/wireless/core.h
@@ -79,6 +79,8 @@ struct cfg80211_registered_device {
/* netlink port which started critical protocol (0 means not started) */
u32 crit_proto_nlportid;
+ struct cfg80211_coalesce *coalesce;
+
/* must be last because of the way we do wiphy_priv(),
* and it should at least be aligned to NETDEV_ALIGN */
struct wiphy wiphy __aligned(NETDEV_ALIGN);
@@ -409,6 +411,9 @@ static inline int
cfg80211_can_add_interface(struct cfg80211_registered_device *rdev,
enum nl80211_iftype iftype)
{
+ if (rfkill_blocked(rdev->rfkill))
+ return -ERFKILL;
+
return cfg80211_can_change_interface(rdev, NULL, iftype);
}
diff --git a/net/wireless/ibss.c b/net/wireless/ibss.c
index 39bff7d36768..403fe29c024d 100644
--- a/net/wireless/ibss.c
+++ b/net/wireless/ibss.c
@@ -263,6 +263,8 @@ int cfg80211_ibss_wext_join(struct cfg80211_registered_device *rdev,
if (chan->flags & IEEE80211_CHAN_DISABLED)
continue;
wdev->wext.ibss.chandef.chan = chan;
+ wdev->wext.ibss.chandef.center_freq1 =
+ chan->center_freq;
break;
}
@@ -347,6 +349,7 @@ int cfg80211_ibss_wext_siwfreq(struct net_device *dev,
if (chan) {
wdev->wext.ibss.chandef.chan = chan;
wdev->wext.ibss.chandef.width = NL80211_CHAN_WIDTH_20_NOHT;
+ wdev->wext.ibss.chandef.center_freq1 = freq;
wdev->wext.ibss.channel_fixed = true;
} else {
/* cfg80211_ibss_wext_join will pick one if needed */
diff --git a/net/wireless/mesh.c b/net/wireless/mesh.c
index 30c49202ee4d..0553fd4d85ae 100644
--- a/net/wireless/mesh.c
+++ b/net/wireless/mesh.c
@@ -167,9 +167,12 @@ int __cfg80211_join_mesh(struct cfg80211_registered_device *rdev,
* basic rates
*/
if (!setup->basic_rates) {
+ enum nl80211_bss_scan_width scan_width;
struct ieee80211_supported_band *sband =
rdev->wiphy.bands[setup->chandef.chan->band];
- setup->basic_rates = ieee80211_mandatory_rates(sband);
+ scan_width = cfg80211_chandef_to_scan_width(&setup->chandef);
+ setup->basic_rates = ieee80211_mandatory_rates(sband,
+ scan_width);
}
if (!cfg80211_reg_can_beacon(&rdev->wiphy, &setup->chandef))
diff --git a/net/wireless/mlme.c b/net/wireless/mlme.c
index bfac5e186f57..8d49c1ce3dea 100644
--- a/net/wireless/mlme.c
+++ b/net/wireless/mlme.c
@@ -621,7 +621,7 @@ int cfg80211_mlme_mgmt_tx(struct cfg80211_registered_device *rdev,
}
bool cfg80211_rx_mgmt(struct wireless_dev *wdev, int freq, int sig_mbm,
- const u8 *buf, size_t len, gfp_t gfp)
+ const u8 *buf, size_t len, u32 flags, gfp_t gfp)
{
struct wiphy *wiphy = wdev->wiphy;
struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy);
@@ -664,7 +664,7 @@ bool cfg80211_rx_mgmt(struct wireless_dev *wdev, int freq, int sig_mbm,
/* Indicate the received Action frame to user space */
if (nl80211_send_mgmt(rdev, wdev, reg->nlportid,
freq, sig_mbm,
- buf, len, gfp))
+ buf, len, flags, gfp))
continue;
result = true;
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index 5f6e982cdcf4..626dc3b5fd8d 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -349,6 +349,11 @@ static const struct nla_policy nl80211_policy[NL80211_ATTR_MAX+1] = {
[NL80211_ATTR_IE_RIC] = { .type = NLA_BINARY,
.len = IEEE80211_MAX_DATA_LEN },
[NL80211_ATTR_PEER_AID] = { .type = NLA_U16 },
+ [NL80211_ATTR_CH_SWITCH_COUNT] = { .type = NLA_U32 },
+ [NL80211_ATTR_CH_SWITCH_BLOCK_TX] = { .type = NLA_FLAG },
+ [NL80211_ATTR_CSA_IES] = { .type = NLA_NESTED },
+ [NL80211_ATTR_CSA_C_OFF_BEACON] = { .type = NLA_U16 },
+ [NL80211_ATTR_CSA_C_OFF_PRESP] = { .type = NLA_U16 },
};
/* policy for the key attributes */
@@ -403,6 +408,14 @@ nl80211_wowlan_tcp_policy[NUM_NL80211_WOWLAN_TCP] = {
[NL80211_WOWLAN_TCP_WAKE_MASK] = { .len = 1 },
};
+/* policy for coalesce rule attributes */
+static const struct nla_policy
+nl80211_coalesce_policy[NUM_NL80211_ATTR_COALESCE_RULE] = {
+ [NL80211_ATTR_COALESCE_RULE_DELAY] = { .type = NLA_U32 },
+ [NL80211_ATTR_COALESCE_RULE_CONDITION] = { .type = NLA_U32 },
+ [NL80211_ATTR_COALESCE_RULE_PKT_PATTERN] = { .type = NLA_NESTED },
+};
+
/* policy for GTK rekey offload attributes */
static const struct nla_policy
nl80211_rekey_policy[NUM_NL80211_REKEY_DATA] = {
@@ -976,7 +989,7 @@ static int nl80211_send_wowlan(struct sk_buff *msg,
return -ENOBUFS;
if (dev->wiphy.wowlan->n_patterns) {
- struct nl80211_wowlan_pattern_support pat = {
+ struct nl80211_pattern_support pat = {
.max_patterns = dev->wiphy.wowlan->n_patterns,
.min_pattern_len = dev->wiphy.wowlan->pattern_min_len,
.max_pattern_len = dev->wiphy.wowlan->pattern_max_len,
@@ -997,6 +1010,27 @@ static int nl80211_send_wowlan(struct sk_buff *msg,
}
#endif
+static int nl80211_send_coalesce(struct sk_buff *msg,
+ struct cfg80211_registered_device *dev)
+{
+ struct nl80211_coalesce_rule_support rule;
+
+ if (!dev->wiphy.coalesce)
+ return 0;
+
+ rule.max_rules = dev->wiphy.coalesce->n_rules;
+ rule.max_delay = dev->wiphy.coalesce->max_delay;
+ rule.pat.max_patterns = dev->wiphy.coalesce->n_patterns;
+ rule.pat.min_pattern_len = dev->wiphy.coalesce->pattern_min_len;
+ rule.pat.max_pattern_len = dev->wiphy.coalesce->pattern_max_len;
+ rule.pat.max_pkt_offset = dev->wiphy.coalesce->max_pkt_offset;
+
+ if (nla_put(msg, NL80211_ATTR_COALESCE_RULE, sizeof(rule), &rule))
+ return -ENOBUFS;
+
+ return 0;
+}
+
static int nl80211_send_band_rateinfo(struct sk_buff *msg,
struct ieee80211_supported_band *sband)
{
@@ -1395,6 +1429,8 @@ static int nl80211_send_wiphy(struct cfg80211_registered_device *dev,
if (state->split) {
CMD(crit_proto_start, CRIT_PROTOCOL_START);
CMD(crit_proto_stop, CRIT_PROTOCOL_STOP);
+ if (dev->wiphy.flags & WIPHY_FLAG_HAS_CHANNEL_SWITCH)
+ CMD(channel_switch, CHANNEL_SWITCH);
}
#ifdef CONFIG_NL80211_TESTMODE
@@ -1515,6 +1551,12 @@ static int nl80211_send_wiphy(struct cfg80211_registered_device *dev,
dev->wiphy.vht_capa_mod_mask))
goto nla_put_failure;
+ state->split_start++;
+ break;
+ case 10:
+ if (nl80211_send_coalesce(msg, dev))
+ goto nla_put_failure;
+
/* done */
state->split_start = 0;
break;
@@ -2379,7 +2421,7 @@ static int nl80211_set_interface(struct sk_buff *skb, struct genl_info *info)
change = true;
}
- if (flags && (*flags & NL80211_MNTR_FLAG_ACTIVE) &&
+ if (flags && (*flags & MONITOR_FLAG_ACTIVE) &&
!(rdev->wiphy.features & NL80211_FEATURE_ACTIVE_MONITOR))
return -EOPNOTSUPP;
@@ -2441,7 +2483,7 @@ static int nl80211_new_interface(struct sk_buff *skb, struct genl_info *info)
info->attrs[NL80211_ATTR_MNTR_FLAGS] : NULL,
&flags);
- if (!err && (flags & NL80211_MNTR_FLAG_ACTIVE) &&
+ if (!err && (flags & MONITOR_FLAG_ACTIVE) &&
!(rdev->wiphy.features & NL80211_FEATURE_ACTIVE_MONITOR))
return -EOPNOTSUPP;
@@ -5580,6 +5622,111 @@ static int nl80211_start_radar_detection(struct sk_buff *skb,
return err;
}
+static int nl80211_channel_switch(struct sk_buff *skb, struct genl_info *info)
+{
+ struct cfg80211_registered_device *rdev = info->user_ptr[0];
+ struct net_device *dev = info->user_ptr[1];
+ struct wireless_dev *wdev = dev->ieee80211_ptr;
+ struct cfg80211_csa_settings params;
+ /* csa_attrs is defined static to avoid waste of stack size - this
+ * function is called under RTNL lock, so this should not be a problem.
+ */
+ static struct nlattr *csa_attrs[NL80211_ATTR_MAX+1];
+ u8 radar_detect_width = 0;
+ int err;
+
+ if (!rdev->ops->channel_switch ||
+ !(rdev->wiphy.flags & WIPHY_FLAG_HAS_CHANNEL_SWITCH))
+ return -EOPNOTSUPP;
+
+ /* may add IBSS support later */
+ if (dev->ieee80211_ptr->iftype != NL80211_IFTYPE_AP &&
+ dev->ieee80211_ptr->iftype != NL80211_IFTYPE_P2P_GO)
+ return -EOPNOTSUPP;
+
+ memset(&params, 0, sizeof(params));
+
+ if (!info->attrs[NL80211_ATTR_WIPHY_FREQ] ||
+ !info->attrs[NL80211_ATTR_CH_SWITCH_COUNT])
+ return -EINVAL;
+
+ /* only important for AP, IBSS and mesh create IEs internally */
+ if (!info->attrs[NL80211_ATTR_CSA_IES])
+ return -EINVAL;
+
+ /* useless if AP is not running */
+ if (!wdev->beacon_interval)
+ return -EINVAL;
+
+ params.count = nla_get_u32(info->attrs[NL80211_ATTR_CH_SWITCH_COUNT]);
+
+ err = nl80211_parse_beacon(info->attrs, &params.beacon_after);
+ if (err)
+ return err;
+
+ err = nla_parse_nested(csa_attrs, NL80211_ATTR_MAX,
+ info->attrs[NL80211_ATTR_CSA_IES],
+ nl80211_policy);
+ if (err)
+ return err;
+
+ err = nl80211_parse_beacon(csa_attrs, &params.beacon_csa);
+ if (err)
+ return err;
+
+ if (!csa_attrs[NL80211_ATTR_CSA_C_OFF_BEACON])
+ return -EINVAL;
+
+ params.counter_offset_beacon =
+ nla_get_u16(csa_attrs[NL80211_ATTR_CSA_C_OFF_BEACON]);
+ if (params.counter_offset_beacon >= params.beacon_csa.tail_len)
+ return -EINVAL;
+
+ /* sanity check - counters should be the same */
+ if (params.beacon_csa.tail[params.counter_offset_beacon] !=
+ params.count)
+ return -EINVAL;
+
+ if (csa_attrs[NL80211_ATTR_CSA_C_OFF_PRESP]) {
+ params.counter_offset_presp =
+ nla_get_u16(csa_attrs[NL80211_ATTR_CSA_C_OFF_PRESP]);
+ if (params.counter_offset_presp >=
+ params.beacon_csa.probe_resp_len)
+ return -EINVAL;
+
+ if (params.beacon_csa.probe_resp[params.counter_offset_presp] !=
+ params.count)
+ return -EINVAL;
+ }
+
+ err = nl80211_parse_chandef(rdev, info, &params.chandef);
+ if (err)
+ return err;
+
+ if (!cfg80211_reg_can_beacon(&rdev->wiphy, &params.chandef))
+ return -EINVAL;
+
+ err = cfg80211_chandef_dfs_required(wdev->wiphy, &params.chandef);
+ if (err < 0) {
+ return err;
+ } else if (err) {
+ radar_detect_width = BIT(params.chandef.width);
+ params.radar_required = true;
+ }
+
+ err = cfg80211_can_use_iftype_chan(rdev, wdev, wdev->iftype,
+ params.chandef.chan,
+ CHAN_MODE_SHARED,
+ radar_detect_width);
+ if (err)
+ return err;
+
+ if (info->attrs[NL80211_ATTR_CH_SWITCH_BLOCK_TX])
+ params.block_tx = true;
+
+ return rdev_channel_switch(rdev, dev, &params);
+}
+
static int nl80211_send_bss(struct sk_buff *msg, struct netlink_callback *cb,
u32 seq, int flags,
struct cfg80211_registered_device *rdev,
@@ -5641,6 +5788,7 @@ static int nl80211_send_bss(struct sk_buff *msg, struct netlink_callback *cb,
goto nla_put_failure;
if (nla_put_u16(msg, NL80211_BSS_CAPABILITY, res->capability) ||
nla_put_u32(msg, NL80211_BSS_FREQUENCY, res->channel->center_freq) ||
+ nla_put_u32(msg, NL80211_BSS_CHAN_WIDTH, res->scan_width) ||
nla_put_u32(msg, NL80211_BSS_SEEN_MS_AGO,
jiffies_to_msecs(jiffies - intbss->ts)))
goto nla_put_failure;
@@ -6321,6 +6469,8 @@ static int nl80211_join_ibss(struct sk_buff *skb, struct genl_info *info)
return -EINVAL;
switch (ibss.chandef.width) {
+ case NL80211_CHAN_WIDTH_5:
+ case NL80211_CHAN_WIDTH_10:
case NL80211_CHAN_WIDTH_20_NOHT:
break;
case NL80211_CHAN_WIDTH_20:
@@ -6348,6 +6498,19 @@ static int nl80211_join_ibss(struct sk_buff *skb, struct genl_info *info)
return err;
}
+ if (info->attrs[NL80211_ATTR_HT_CAPABILITY_MASK])
+ memcpy(&ibss.ht_capa_mask,
+ nla_data(info->attrs[NL80211_ATTR_HT_CAPABILITY_MASK]),
+ sizeof(ibss.ht_capa_mask));
+
+ if (info->attrs[NL80211_ATTR_HT_CAPABILITY]) {
+ if (!info->attrs[NL80211_ATTR_HT_CAPABILITY_MASK])
+ return -EINVAL;
+ memcpy(&ibss.ht_capa,
+ nla_data(info->attrs[NL80211_ATTR_HT_CAPABILITY]),
+ sizeof(ibss.ht_capa));
+ }
+
if (info->attrs[NL80211_ATTR_MCAST_RATE] &&
!nl80211_parse_mcast_rate(rdev, ibss.mcast_rate,
nla_get_u32(info->attrs[NL80211_ATTR_MCAST_RATE])))
@@ -6430,19 +6593,30 @@ static struct genl_multicast_group nl80211_testmode_mcgrp = {
static int nl80211_testmode_do(struct sk_buff *skb, struct genl_info *info)
{
struct cfg80211_registered_device *rdev = info->user_ptr[0];
+ struct wireless_dev *wdev =
+ __cfg80211_wdev_from_attrs(genl_info_net(info), info->attrs);
int err;
+ if (!rdev->ops->testmode_cmd)
+ return -EOPNOTSUPP;
+
+ if (IS_ERR(wdev)) {
+ err = PTR_ERR(wdev);
+ if (err != -EINVAL)
+ return err;
+ wdev = NULL;
+ } else if (wdev->wiphy != &rdev->wiphy) {
+ return -EINVAL;
+ }
+
if (!info->attrs[NL80211_ATTR_TESTDATA])
return -EINVAL;
- err = -EOPNOTSUPP;
- if (rdev->ops->testmode_cmd) {
- rdev->testmode_info = info;
- err = rdev_testmode_cmd(rdev,
+ rdev->testmode_info = info;
+ err = rdev_testmode_cmd(rdev, wdev,
nla_data(info->attrs[NL80211_ATTR_TESTDATA]),
nla_len(info->attrs[NL80211_ATTR_TESTDATA]));
- rdev->testmode_info = NULL;
- }
+ rdev->testmode_info = NULL;
return err;
}
@@ -7404,14 +7578,12 @@ static int nl80211_set_cqm_txe(struct genl_info *info,
u32 rate, u32 pkts, u32 intvl)
{
struct cfg80211_registered_device *rdev = info->user_ptr[0];
- struct wireless_dev *wdev;
struct net_device *dev = info->user_ptr[1];
+ struct wireless_dev *wdev = dev->ieee80211_ptr;
if (rate > 100 || intvl > NL80211_CQM_TXE_MAX_INTVL)
return -EINVAL;
- wdev = dev->ieee80211_ptr;
-
if (!rdev->ops->set_cqm_txe_config)
return -EOPNOTSUPP;
@@ -7426,13 +7598,15 @@ static int nl80211_set_cqm_rssi(struct genl_info *info,
s32 threshold, u32 hysteresis)
{
struct cfg80211_registered_device *rdev = info->user_ptr[0];
- struct wireless_dev *wdev;
struct net_device *dev = info->user_ptr[1];
+ struct wireless_dev *wdev = dev->ieee80211_ptr;
if (threshold > 0)
return -EINVAL;
- wdev = dev->ieee80211_ptr;
+ /* disabling - hysteresis should also be zero then */
+ if (threshold == 0)
+ hysteresis = 0;
if (!rdev->ops->set_cqm_rssi_config)
return -EOPNOTSUPP;
@@ -7451,36 +7625,33 @@ static int nl80211_set_cqm(struct sk_buff *skb, struct genl_info *info)
int err;
cqm = info->attrs[NL80211_ATTR_CQM];
- if (!cqm) {
- err = -EINVAL;
- goto out;
- }
+ if (!cqm)
+ return -EINVAL;
err = nla_parse_nested(attrs, NL80211_ATTR_CQM_MAX, cqm,
nl80211_attr_cqm_policy);
if (err)
- goto out;
+ return err;
if (attrs[NL80211_ATTR_CQM_RSSI_THOLD] &&
attrs[NL80211_ATTR_CQM_RSSI_HYST]) {
- s32 threshold;
- u32 hysteresis;
- threshold = nla_get_u32(attrs[NL80211_ATTR_CQM_RSSI_THOLD]);
- hysteresis = nla_get_u32(attrs[NL80211_ATTR_CQM_RSSI_HYST]);
- err = nl80211_set_cqm_rssi(info, threshold, hysteresis);
- } else if (attrs[NL80211_ATTR_CQM_TXE_RATE] &&
- attrs[NL80211_ATTR_CQM_TXE_PKTS] &&
- attrs[NL80211_ATTR_CQM_TXE_INTVL]) {
- u32 rate, pkts, intvl;
- rate = nla_get_u32(attrs[NL80211_ATTR_CQM_TXE_RATE]);
- pkts = nla_get_u32(attrs[NL80211_ATTR_CQM_TXE_PKTS]);
- intvl = nla_get_u32(attrs[NL80211_ATTR_CQM_TXE_INTVL]);
- err = nl80211_set_cqm_txe(info, rate, pkts, intvl);
- } else
- err = -EINVAL;
+ s32 threshold = nla_get_s32(attrs[NL80211_ATTR_CQM_RSSI_THOLD]);
+ u32 hysteresis = nla_get_u32(attrs[NL80211_ATTR_CQM_RSSI_HYST]);
-out:
- return err;
+ return nl80211_set_cqm_rssi(info, threshold, hysteresis);
+ }
+
+ if (attrs[NL80211_ATTR_CQM_TXE_RATE] &&
+ attrs[NL80211_ATTR_CQM_TXE_PKTS] &&
+ attrs[NL80211_ATTR_CQM_TXE_INTVL]) {
+ u32 rate = nla_get_u32(attrs[NL80211_ATTR_CQM_TXE_RATE]);
+ u32 pkts = nla_get_u32(attrs[NL80211_ATTR_CQM_TXE_PKTS]);
+ u32 intvl = nla_get_u32(attrs[NL80211_ATTR_CQM_TXE_INTVL]);
+
+ return nl80211_set_cqm_txe(info, rate, pkts, intvl);
+ }
+
+ return -EINVAL;
}
static int nl80211_join_mesh(struct sk_buff *skb, struct genl_info *info)
@@ -7596,12 +7767,11 @@ static int nl80211_send_wowlan_patterns(struct sk_buff *msg,
if (!nl_pat)
return -ENOBUFS;
pat_len = wowlan->patterns[i].pattern_len;
- if (nla_put(msg, NL80211_WOWLAN_PKTPAT_MASK,
- DIV_ROUND_UP(pat_len, 8),
+ if (nla_put(msg, NL80211_PKTPAT_MASK, DIV_ROUND_UP(pat_len, 8),
wowlan->patterns[i].mask) ||
- nla_put(msg, NL80211_WOWLAN_PKTPAT_PATTERN,
- pat_len, wowlan->patterns[i].pattern) ||
- nla_put_u32(msg, NL80211_WOWLAN_PKTPAT_OFFSET,
+ nla_put(msg, NL80211_PKTPAT_PATTERN, pat_len,
+ wowlan->patterns[i].pattern) ||
+ nla_put_u32(msg, NL80211_PKTPAT_OFFSET,
wowlan->patterns[i].pkt_offset))
return -ENOBUFS;
nla_nest_end(msg, nl_pat);
@@ -7942,7 +8112,7 @@ static int nl80211_set_wowlan(struct sk_buff *skb, struct genl_info *info)
struct nlattr *pat;
int n_patterns = 0;
int rem, pat_len, mask_len, pkt_offset;
- struct nlattr *pat_tb[NUM_NL80211_WOWLAN_PKTPAT];
+ struct nlattr *pat_tb[NUM_NL80211_PKTPAT];
nla_for_each_nested(pat, tb[NL80211_WOWLAN_TRIG_PKT_PATTERN],
rem)
@@ -7961,26 +8131,25 @@ static int nl80211_set_wowlan(struct sk_buff *skb, struct genl_info *info)
nla_for_each_nested(pat, tb[NL80211_WOWLAN_TRIG_PKT_PATTERN],
rem) {
- nla_parse(pat_tb, MAX_NL80211_WOWLAN_PKTPAT,
- nla_data(pat), nla_len(pat), NULL);
+ nla_parse(pat_tb, MAX_NL80211_PKTPAT, nla_data(pat),
+ nla_len(pat), NULL);
err = -EINVAL;
- if (!pat_tb[NL80211_WOWLAN_PKTPAT_MASK] ||
- !pat_tb[NL80211_WOWLAN_PKTPAT_PATTERN])
+ if (!pat_tb[NL80211_PKTPAT_MASK] ||
+ !pat_tb[NL80211_PKTPAT_PATTERN])
goto error;
- pat_len = nla_len(pat_tb[NL80211_WOWLAN_PKTPAT_PATTERN]);
+ pat_len = nla_len(pat_tb[NL80211_PKTPAT_PATTERN]);
mask_len = DIV_ROUND_UP(pat_len, 8);
- if (nla_len(pat_tb[NL80211_WOWLAN_PKTPAT_MASK]) !=
- mask_len)
+ if (nla_len(pat_tb[NL80211_PKTPAT_MASK]) != mask_len)
goto error;
if (pat_len > wowlan->pattern_max_len ||
pat_len < wowlan->pattern_min_len)
goto error;
- if (!pat_tb[NL80211_WOWLAN_PKTPAT_OFFSET])
+ if (!pat_tb[NL80211_PKTPAT_OFFSET])
pkt_offset = 0;
else
pkt_offset = nla_get_u32(
- pat_tb[NL80211_WOWLAN_PKTPAT_OFFSET]);
+ pat_tb[NL80211_PKTPAT_OFFSET]);
if (pkt_offset > wowlan->max_pkt_offset)
goto error;
new_triggers.patterns[i].pkt_offset = pkt_offset;
@@ -7994,11 +8163,11 @@ static int nl80211_set_wowlan(struct sk_buff *skb, struct genl_info *info)
new_triggers.patterns[i].pattern =
new_triggers.patterns[i].mask + mask_len;
memcpy(new_triggers.patterns[i].mask,
- nla_data(pat_tb[NL80211_WOWLAN_PKTPAT_MASK]),
+ nla_data(pat_tb[NL80211_PKTPAT_MASK]),
mask_len);
new_triggers.patterns[i].pattern_len = pat_len;
memcpy(new_triggers.patterns[i].pattern,
- nla_data(pat_tb[NL80211_WOWLAN_PKTPAT_PATTERN]),
+ nla_data(pat_tb[NL80211_PKTPAT_PATTERN]),
pat_len);
i++;
}
@@ -8037,6 +8206,264 @@ static int nl80211_set_wowlan(struct sk_buff *skb, struct genl_info *info)
}
#endif
+static int nl80211_send_coalesce_rules(struct sk_buff *msg,
+ struct cfg80211_registered_device *rdev)
+{
+ struct nlattr *nl_pats, *nl_pat, *nl_rule, *nl_rules;
+ int i, j, pat_len;
+ struct cfg80211_coalesce_rules *rule;
+
+ if (!rdev->coalesce->n_rules)
+ return 0;
+
+ nl_rules = nla_nest_start(msg, NL80211_ATTR_COALESCE_RULE);
+ if (!nl_rules)
+ return -ENOBUFS;
+
+ for (i = 0; i < rdev->coalesce->n_rules; i++) {
+ nl_rule = nla_nest_start(msg, i + 1);
+ if (!nl_rule)
+ return -ENOBUFS;
+
+ rule = &rdev->coalesce->rules[i];
+ if (nla_put_u32(msg, NL80211_ATTR_COALESCE_RULE_DELAY,
+ rule->delay))
+ return -ENOBUFS;
+
+ if (nla_put_u32(msg, NL80211_ATTR_COALESCE_RULE_CONDITION,
+ rule->condition))
+ return -ENOBUFS;
+
+ nl_pats = nla_nest_start(msg,
+ NL80211_ATTR_COALESCE_RULE_PKT_PATTERN);
+ if (!nl_pats)
+ return -ENOBUFS;
+
+ for (j = 0; j < rule->n_patterns; j++) {
+ nl_pat = nla_nest_start(msg, j + 1);
+ if (!nl_pat)
+ return -ENOBUFS;
+ pat_len = rule->patterns[j].pattern_len;
+ if (nla_put(msg, NL80211_PKTPAT_MASK,
+ DIV_ROUND_UP(pat_len, 8),
+ rule->patterns[j].mask) ||
+ nla_put(msg, NL80211_PKTPAT_PATTERN, pat_len,
+ rule->patterns[j].pattern) ||
+ nla_put_u32(msg, NL80211_PKTPAT_OFFSET,
+ rule->patterns[j].pkt_offset))
+ return -ENOBUFS;
+ nla_nest_end(msg, nl_pat);
+ }
+ nla_nest_end(msg, nl_pats);
+ nla_nest_end(msg, nl_rule);
+ }
+ nla_nest_end(msg, nl_rules);
+
+ return 0;
+}
+
+static int nl80211_get_coalesce(struct sk_buff *skb, struct genl_info *info)
+{
+ struct cfg80211_registered_device *rdev = info->user_ptr[0];
+ struct sk_buff *msg;
+ void *hdr;
+
+ if (!rdev->wiphy.coalesce)
+ return -EOPNOTSUPP;
+
+ msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
+ if (!msg)
+ return -ENOMEM;
+
+ hdr = nl80211hdr_put(msg, info->snd_portid, info->snd_seq, 0,
+ NL80211_CMD_GET_COALESCE);
+ if (!hdr)
+ goto nla_put_failure;
+
+ if (rdev->coalesce && nl80211_send_coalesce_rules(msg, rdev))
+ goto nla_put_failure;
+
+ genlmsg_end(msg, hdr);
+ return genlmsg_reply(msg, info);
+
+nla_put_failure:
+ nlmsg_free(msg);
+ return -ENOBUFS;
+}
+
+void cfg80211_rdev_free_coalesce(struct cfg80211_registered_device *rdev)
+{
+ struct cfg80211_coalesce *coalesce = rdev->coalesce;
+ int i, j;
+ struct cfg80211_coalesce_rules *rule;
+
+ if (!coalesce)
+ return;
+
+ for (i = 0; i < coalesce->n_rules; i++) {
+ rule = &coalesce->rules[i];
+ for (j = 0; j < rule->n_patterns; j++)
+ kfree(rule->patterns[j].mask);
+ kfree(rule->patterns);
+ }
+ kfree(coalesce->rules);
+ kfree(coalesce);
+ rdev->coalesce = NULL;
+}
+
+static int nl80211_parse_coalesce_rule(struct cfg80211_registered_device *rdev,
+ struct nlattr *rule,
+ struct cfg80211_coalesce_rules *new_rule)
+{
+ int err, i;
+ const struct wiphy_coalesce_support *coalesce = rdev->wiphy.coalesce;
+ struct nlattr *tb[NUM_NL80211_ATTR_COALESCE_RULE], *pat;
+ int rem, pat_len, mask_len, pkt_offset, n_patterns = 0;
+ struct nlattr *pat_tb[NUM_NL80211_PKTPAT];
+
+ err = nla_parse(tb, NL80211_ATTR_COALESCE_RULE_MAX, nla_data(rule),
+ nla_len(rule), nl80211_coalesce_policy);
+ if (err)
+ return err;
+
+ if (tb[NL80211_ATTR_COALESCE_RULE_DELAY])
+ new_rule->delay =
+ nla_get_u32(tb[NL80211_ATTR_COALESCE_RULE_DELAY]);
+ if (new_rule->delay > coalesce->max_delay)
+ return -EINVAL;
+
+ if (tb[NL80211_ATTR_COALESCE_RULE_CONDITION])
+ new_rule->condition =
+ nla_get_u32(tb[NL80211_ATTR_COALESCE_RULE_CONDITION]);
+ if (new_rule->condition != NL80211_COALESCE_CONDITION_MATCH &&
+ new_rule->condition != NL80211_COALESCE_CONDITION_NO_MATCH)
+ return -EINVAL;
+
+ if (!tb[NL80211_ATTR_COALESCE_RULE_PKT_PATTERN])
+ return -EINVAL;
+
+ nla_for_each_nested(pat, tb[NL80211_ATTR_COALESCE_RULE_PKT_PATTERN],
+ rem)
+ n_patterns++;
+ if (n_patterns > coalesce->n_patterns)
+ return -EINVAL;
+
+ new_rule->patterns = kcalloc(n_patterns, sizeof(new_rule->patterns[0]),
+ GFP_KERNEL);
+ if (!new_rule->patterns)
+ return -ENOMEM;
+
+ new_rule->n_patterns = n_patterns;
+ i = 0;
+
+ nla_for_each_nested(pat, tb[NL80211_ATTR_COALESCE_RULE_PKT_PATTERN],
+ rem) {
+ nla_parse(pat_tb, MAX_NL80211_PKTPAT, nla_data(pat),
+ nla_len(pat), NULL);
+ if (!pat_tb[NL80211_PKTPAT_MASK] ||
+ !pat_tb[NL80211_PKTPAT_PATTERN])
+ return -EINVAL;
+ pat_len = nla_len(pat_tb[NL80211_PKTPAT_PATTERN]);
+ mask_len = DIV_ROUND_UP(pat_len, 8);
+ if (nla_len(pat_tb[NL80211_PKTPAT_MASK]) != mask_len)
+ return -EINVAL;
+ if (pat_len > coalesce->pattern_max_len ||
+ pat_len < coalesce->pattern_min_len)
+ return -EINVAL;
+
+ if (!pat_tb[NL80211_PKTPAT_OFFSET])
+ pkt_offset = 0;
+ else
+ pkt_offset = nla_get_u32(pat_tb[NL80211_PKTPAT_OFFSET]);
+ if (pkt_offset > coalesce->max_pkt_offset)
+ return -EINVAL;
+ new_rule->patterns[i].pkt_offset = pkt_offset;
+
+ new_rule->patterns[i].mask =
+ kmalloc(mask_len + pat_len, GFP_KERNEL);
+ if (!new_rule->patterns[i].mask)
+ return -ENOMEM;
+ new_rule->patterns[i].pattern =
+ new_rule->patterns[i].mask + mask_len;
+ memcpy(new_rule->patterns[i].mask,
+ nla_data(pat_tb[NL80211_PKTPAT_MASK]), mask_len);
+ new_rule->patterns[i].pattern_len = pat_len;
+ memcpy(new_rule->patterns[i].pattern,
+ nla_data(pat_tb[NL80211_PKTPAT_PATTERN]), pat_len);
+ i++;
+ }
+
+ return 0;
+}
+
+static int nl80211_set_coalesce(struct sk_buff *skb, struct genl_info *info)
+{
+ struct cfg80211_registered_device *rdev = info->user_ptr[0];
+ const struct wiphy_coalesce_support *coalesce = rdev->wiphy.coalesce;
+ struct cfg80211_coalesce new_coalesce = {};
+ struct cfg80211_coalesce *n_coalesce;
+ int err, rem_rule, n_rules = 0, i, j;
+ struct nlattr *rule;
+ struct cfg80211_coalesce_rules *tmp_rule;
+
+ if (!rdev->wiphy.coalesce || !rdev->ops->set_coalesce)
+ return -EOPNOTSUPP;
+
+ if (!info->attrs[NL80211_ATTR_COALESCE_RULE]) {
+ cfg80211_rdev_free_coalesce(rdev);
+ rdev->ops->set_coalesce(&rdev->wiphy, NULL);
+ return 0;
+ }
+
+ nla_for_each_nested(rule, info->attrs[NL80211_ATTR_COALESCE_RULE],
+ rem_rule)
+ n_rules++;
+ if (n_rules > coalesce->n_rules)
+ return -EINVAL;
+
+ new_coalesce.rules = kcalloc(n_rules, sizeof(new_coalesce.rules[0]),
+ GFP_KERNEL);
+ if (!new_coalesce.rules)
+ return -ENOMEM;
+
+ new_coalesce.n_rules = n_rules;
+ i = 0;
+
+ nla_for_each_nested(rule, info->attrs[NL80211_ATTR_COALESCE_RULE],
+ rem_rule) {
+ err = nl80211_parse_coalesce_rule(rdev, rule,
+ &new_coalesce.rules[i]);
+ if (err)
+ goto error;
+
+ i++;
+ }
+
+ err = rdev->ops->set_coalesce(&rdev->wiphy, &new_coalesce);
+ if (err)
+ goto error;
+
+ n_coalesce = kmemdup(&new_coalesce, sizeof(new_coalesce), GFP_KERNEL);
+ if (!n_coalesce) {
+ err = -ENOMEM;
+ goto error;
+ }
+ cfg80211_rdev_free_coalesce(rdev);
+ rdev->coalesce = n_coalesce;
+
+ return 0;
+error:
+ for (i = 0; i < new_coalesce.n_rules; i++) {
+ tmp_rule = &new_coalesce.rules[i];
+ for (j = 0; j < tmp_rule->n_patterns; j++)
+ kfree(tmp_rule->patterns[j].mask);
+ kfree(tmp_rule->patterns);
+ }
+ kfree(new_coalesce.rules);
+
+ return err;
+}
+
static int nl80211_set_rekey_data(struct sk_buff *skb, struct genl_info *info)
{
struct cfg80211_registered_device *rdev = info->user_ptr[0];
@@ -9043,7 +9470,30 @@ static struct genl_ops nl80211_ops[] = {
.flags = GENL_ADMIN_PERM,
.internal_flags = NL80211_FLAG_NEED_WDEV_UP |
NL80211_FLAG_NEED_RTNL,
- }
+ },
+ {
+ .cmd = NL80211_CMD_GET_COALESCE,
+ .doit = nl80211_get_coalesce,
+ .policy = nl80211_policy,
+ .internal_flags = NL80211_FLAG_NEED_WIPHY |
+ NL80211_FLAG_NEED_RTNL,
+ },
+ {
+ .cmd = NL80211_CMD_SET_COALESCE,
+ .doit = nl80211_set_coalesce,
+ .policy = nl80211_policy,
+ .flags = GENL_ADMIN_PERM,
+ .internal_flags = NL80211_FLAG_NEED_WIPHY |
+ NL80211_FLAG_NEED_RTNL,
+ },
+ {
+ .cmd = NL80211_CMD_CHANNEL_SWITCH,
+ .doit = nl80211_channel_switch,
+ .policy = nl80211_policy,
+ .flags = GENL_ADMIN_PERM,
+ .internal_flags = NL80211_FLAG_NEED_NETDEV_UP |
+ NL80211_FLAG_NEED_RTNL,
+ },
};
static struct genl_multicast_group nl80211_mlme_mcgrp = {
@@ -10000,7 +10450,7 @@ EXPORT_SYMBOL(cfg80211_rx_unexpected_4addr_frame);
int nl80211_send_mgmt(struct cfg80211_registered_device *rdev,
struct wireless_dev *wdev, u32 nlportid,
int freq, int sig_dbm,
- const u8 *buf, size_t len, gfp_t gfp)
+ const u8 *buf, size_t len, u32 flags, gfp_t gfp)
{
struct net_device *netdev = wdev->netdev;
struct sk_buff *msg;
@@ -10023,7 +10473,9 @@ int nl80211_send_mgmt(struct cfg80211_registered_device *rdev,
nla_put_u32(msg, NL80211_ATTR_WIPHY_FREQ, freq) ||
(sig_dbm &&
nla_put_u32(msg, NL80211_ATTR_RX_SIGNAL_DBM, sig_dbm)) ||
- nla_put(msg, NL80211_ATTR_FRAME, len, buf))
+ nla_put(msg, NL80211_ATTR_FRAME, len, buf) ||
+ (flags &&
+ nla_put_u32(msg, NL80211_ATTR_RXMGMT_FLAGS, flags)))
goto nla_put_failure;
genlmsg_end(msg, hdr);
diff --git a/net/wireless/nl80211.h b/net/wireless/nl80211.h
index a4073e808c13..2c0f2b3c07cb 100644
--- a/net/wireless/nl80211.h
+++ b/net/wireless/nl80211.h
@@ -66,7 +66,7 @@ void nl80211_send_ibss_bssid(struct cfg80211_registered_device *rdev,
int nl80211_send_mgmt(struct cfg80211_registered_device *rdev,
struct wireless_dev *wdev, u32 nlpid,
int freq, int sig_dbm,
- const u8 *buf, size_t len, gfp_t gfp);
+ const u8 *buf, size_t len, u32 flags, gfp_t gfp);
void
nl80211_radar_notify(struct cfg80211_registered_device *rdev,
@@ -74,4 +74,6 @@ nl80211_radar_notify(struct cfg80211_registered_device *rdev,
enum nl80211_radar_event event,
struct net_device *netdev, gfp_t gfp);
+void cfg80211_rdev_free_coalesce(struct cfg80211_registered_device *rdev);
+
#endif /* __NET_WIRELESS_NL80211_H */
diff --git a/net/wireless/radiotap.c b/net/wireless/radiotap.c
index 7d604c06c3dc..a271c27fac77 100644
--- a/net/wireless/radiotap.c
+++ b/net/wireless/radiotap.c
@@ -97,6 +97,10 @@ int ieee80211_radiotap_iterator_init(
struct ieee80211_radiotap_header *radiotap_header,
int max_length, const struct ieee80211_radiotap_vendor_namespaces *vns)
{
+ /* check the radiotap header can actually be present */
+ if (max_length < sizeof(struct ieee80211_radiotap_header))
+ return -EINVAL;
+
/* Linux only supports version 0 radiotap format */
if (radiotap_header->it_version)
return -EINVAL;
@@ -131,7 +135,8 @@ int ieee80211_radiotap_iterator_init(
*/
if ((unsigned long)iterator->_arg -
- (unsigned long)iterator->_rtheader >
+ (unsigned long)iterator->_rtheader +
+ sizeof(uint32_t) >
(unsigned long)iterator->_max_length)
return -EINVAL;
}
diff --git a/net/wireless/rdev-ops.h b/net/wireless/rdev-ops.h
index 9f15f0ac824d..37ce9fdfe934 100644
--- a/net/wireless/rdev-ops.h
+++ b/net/wireless/rdev-ops.h
@@ -516,11 +516,12 @@ static inline void rdev_rfkill_poll(struct cfg80211_registered_device *rdev)
#ifdef CONFIG_NL80211_TESTMODE
static inline int rdev_testmode_cmd(struct cfg80211_registered_device *rdev,
+ struct wireless_dev *wdev,
void *data, int len)
{
int ret;
- trace_rdev_testmode_cmd(&rdev->wiphy);
- ret = rdev->ops->testmode_cmd(&rdev->wiphy, data, len);
+ trace_rdev_testmode_cmd(&rdev->wiphy, wdev);
+ ret = rdev->ops->testmode_cmd(&rdev->wiphy, wdev, data, len);
trace_rdev_return_int(&rdev->wiphy, ret);
return ret;
}
@@ -923,4 +924,16 @@ static inline void rdev_crit_proto_stop(struct cfg80211_registered_device *rdev,
trace_rdev_return_void(&rdev->wiphy);
}
+static inline int rdev_channel_switch(struct cfg80211_registered_device *rdev,
+ struct net_device *dev,
+ struct cfg80211_csa_settings *params)
+{
+ int ret;
+
+ trace_rdev_channel_switch(&rdev->wiphy, dev, params);
+ ret = rdev->ops->channel_switch(&rdev->wiphy, dev, params);
+ trace_rdev_return_int(&rdev->wiphy, ret);
+ return ret;
+}
+
#endif /* __CFG80211_RDEV_OPS */
diff --git a/net/wireless/scan.c b/net/wireless/scan.c
index ae8c186b50d6..eeb71480f1af 100644
--- a/net/wireless/scan.c
+++ b/net/wireless/scan.c
@@ -465,10 +465,6 @@ static int cmp_bss(struct cfg80211_bss *a,
}
}
- /*
- * we can't use compare_ether_addr here since we need a < > operator.
- * The binary return value of compare_ether_addr isn't enough
- */
r = memcmp(a->bssid, b->bssid, sizeof(a->bssid));
if (r)
return r;
@@ -651,6 +647,8 @@ static bool cfg80211_combine_bsses(struct cfg80211_registered_device *dev,
continue;
if (bss->pub.channel != new->pub.channel)
continue;
+ if (bss->pub.scan_width != new->pub.scan_width)
+ continue;
if (rcu_access_pointer(bss->pub.beacon_ies))
continue;
ies = rcu_access_pointer(bss->pub.ies);
@@ -870,11 +868,12 @@ cfg80211_get_bss_channel(struct wiphy *wiphy, const u8 *ie, size_t ielen,
/* Returned bss is reference counted and must be cleaned up appropriately. */
struct cfg80211_bss*
-cfg80211_inform_bss(struct wiphy *wiphy,
- struct ieee80211_channel *channel,
- const u8 *bssid, u64 tsf, u16 capability,
- u16 beacon_interval, const u8 *ie, size_t ielen,
- s32 signal, gfp_t gfp)
+cfg80211_inform_bss_width(struct wiphy *wiphy,
+ struct ieee80211_channel *channel,
+ enum nl80211_bss_scan_width scan_width,
+ const u8 *bssid, u64 tsf, u16 capability,
+ u16 beacon_interval, const u8 *ie, size_t ielen,
+ s32 signal, gfp_t gfp)
{
struct cfg80211_bss_ies *ies;
struct cfg80211_internal_bss tmp = {}, *res;
@@ -892,6 +891,7 @@ cfg80211_inform_bss(struct wiphy *wiphy,
memcpy(tmp.pub.bssid, bssid, ETH_ALEN);
tmp.pub.channel = channel;
+ tmp.pub.scan_width = scan_width;
tmp.pub.signal = signal;
tmp.pub.beacon_interval = beacon_interval;
tmp.pub.capability = capability;
@@ -924,14 +924,15 @@ cfg80211_inform_bss(struct wiphy *wiphy,
/* cfg80211_bss_update gives us a referenced result */
return &res->pub;
}
-EXPORT_SYMBOL(cfg80211_inform_bss);
+EXPORT_SYMBOL(cfg80211_inform_bss_width);
/* Returned bss is reference counted and must be cleaned up appropriately. */
struct cfg80211_bss *
-cfg80211_inform_bss_frame(struct wiphy *wiphy,
- struct ieee80211_channel *channel,
- struct ieee80211_mgmt *mgmt, size_t len,
- s32 signal, gfp_t gfp)
+cfg80211_inform_bss_width_frame(struct wiphy *wiphy,
+ struct ieee80211_channel *channel,
+ enum nl80211_bss_scan_width scan_width,
+ struct ieee80211_mgmt *mgmt, size_t len,
+ s32 signal, gfp_t gfp)
{
struct cfg80211_internal_bss tmp = {}, *res;
struct cfg80211_bss_ies *ies;
@@ -941,7 +942,8 @@ cfg80211_inform_bss_frame(struct wiphy *wiphy,
BUILD_BUG_ON(offsetof(struct ieee80211_mgmt, u.probe_resp.variable) !=
offsetof(struct ieee80211_mgmt, u.beacon.variable));
- trace_cfg80211_inform_bss_frame(wiphy, channel, mgmt, len, signal);
+ trace_cfg80211_inform_bss_width_frame(wiphy, channel, scan_width, mgmt,
+ len, signal);
if (WARN_ON(!mgmt))
return NULL;
@@ -976,6 +978,7 @@ cfg80211_inform_bss_frame(struct wiphy *wiphy,
memcpy(tmp.pub.bssid, mgmt->bssid, ETH_ALEN);
tmp.pub.channel = channel;
+ tmp.pub.scan_width = scan_width;
tmp.pub.signal = signal;
tmp.pub.beacon_interval = le16_to_cpu(mgmt->u.probe_resp.beacon_int);
tmp.pub.capability = le16_to_cpu(mgmt->u.probe_resp.capab_info);
@@ -991,7 +994,7 @@ cfg80211_inform_bss_frame(struct wiphy *wiphy,
/* cfg80211_bss_update gives us a referenced result */
return &res->pub;
}
-EXPORT_SYMBOL(cfg80211_inform_bss_frame);
+EXPORT_SYMBOL(cfg80211_inform_bss_width_frame);
void cfg80211_ref_bss(struct wiphy *wiphy, struct cfg80211_bss *pub)
{
diff --git a/net/wireless/sysfs.c b/net/wireless/sysfs.c
index a23253e06358..9ee6bc1a7610 100644
--- a/net/wireless/sysfs.c
+++ b/net/wireless/sysfs.c
@@ -30,7 +30,8 @@ static ssize_t name ## _show(struct device *dev, \
char *buf) \
{ \
return sprintf(buf, fmt "\n", dev_to_rdev(dev)->member); \
-}
+} \
+static DEVICE_ATTR_RO(name)
SHOW_FMT(index, "%d", wiphy_idx);
SHOW_FMT(macaddress, "%pM", wiphy.perm_addr);
@@ -42,7 +43,7 @@ static ssize_t name_show(struct device *dev,
struct wiphy *wiphy = &dev_to_rdev(dev)->wiphy;
return sprintf(buf, "%s\n", dev_name(&wiphy->dev));
}
-
+static DEVICE_ATTR_RO(name);
static ssize_t addresses_show(struct device *dev,
struct device_attribute *attr,
@@ -60,15 +61,17 @@ static ssize_t addresses_show(struct device *dev,
return buf - start;
}
-
-static struct device_attribute ieee80211_dev_attrs[] = {
- __ATTR_RO(index),
- __ATTR_RO(macaddress),
- __ATTR_RO(address_mask),
- __ATTR_RO(addresses),
- __ATTR_RO(name),
- {}
+static DEVICE_ATTR_RO(addresses);
+
+static struct attribute *ieee80211_attrs[] = {
+ &dev_attr_index.attr,
+ &dev_attr_macaddress.attr,
+ &dev_attr_address_mask.attr,
+ &dev_attr_addresses.attr,
+ &dev_attr_name.attr,
+ NULL,
};
+ATTRIBUTE_GROUPS(ieee80211);
static void wiphy_dev_release(struct device *dev)
{
@@ -146,7 +149,7 @@ struct class ieee80211_class = {
.name = "ieee80211",
.owner = THIS_MODULE,
.dev_release = wiphy_dev_release,
- .dev_attrs = ieee80211_dev_attrs,
+ .dev_groups = ieee80211_groups,
.dev_uevent = wiphy_uevent,
#ifdef CONFIG_PM
.suspend = wiphy_suspend,
diff --git a/net/wireless/trace.h b/net/wireless/trace.h
index e1534baf2ebb..ba5f0d6614d5 100644
--- a/net/wireless/trace.h
+++ b/net/wireless/trace.h
@@ -1293,15 +1293,17 @@ TRACE_EVENT(rdev_return_int_int,
#ifdef CONFIG_NL80211_TESTMODE
TRACE_EVENT(rdev_testmode_cmd,
- TP_PROTO(struct wiphy *wiphy),
- TP_ARGS(wiphy),
+ TP_PROTO(struct wiphy *wiphy, struct wireless_dev *wdev),
+ TP_ARGS(wiphy, wdev),
TP_STRUCT__entry(
WIPHY_ENTRY
+ WDEV_ENTRY
),
TP_fast_assign(
WIPHY_ASSIGN;
+ WDEV_ASSIGN;
),
- TP_printk(WIPHY_PR_FMT, WIPHY_PR_ARG)
+ TP_printk(WIPHY_PR_FMT WDEV_PR_FMT, WIPHY_PR_ARG, WDEV_PR_ARG)
);
TRACE_EVENT(rdev_testmode_dump,
@@ -1841,6 +1843,39 @@ TRACE_EVENT(rdev_crit_proto_stop,
WIPHY_PR_ARG, WDEV_PR_ARG)
);
+TRACE_EVENT(rdev_channel_switch,
+ TP_PROTO(struct wiphy *wiphy, struct net_device *netdev,
+ struct cfg80211_csa_settings *params),
+ TP_ARGS(wiphy, netdev, params),
+ TP_STRUCT__entry(
+ WIPHY_ENTRY
+ NETDEV_ENTRY
+ CHAN_DEF_ENTRY
+ __field(u16, counter_offset_beacon)
+ __field(u16, counter_offset_presp)
+ __field(bool, radar_required)
+ __field(bool, block_tx)
+ __field(u8, count)
+ ),
+ TP_fast_assign(
+ WIPHY_ASSIGN;
+ NETDEV_ASSIGN;
+ CHAN_DEF_ASSIGN(&params->chandef);
+ __entry->counter_offset_beacon = params->counter_offset_beacon;
+ __entry->counter_offset_presp = params->counter_offset_presp;
+ __entry->radar_required = params->radar_required;
+ __entry->block_tx = params->block_tx;
+ __entry->count = params->count;
+ ),
+ TP_printk(WIPHY_PR_FMT ", " NETDEV_PR_FMT ", " CHAN_DEF_PR_FMT
+ ", block_tx: %d, count: %u, radar_required: %d"
+ ", counter offsets (beacon/presp): %u/%u",
+ WIPHY_PR_ARG, NETDEV_PR_ARG, CHAN_DEF_PR_ARG,
+ __entry->block_tx, __entry->count, __entry->radar_required,
+ __entry->counter_offset_beacon,
+ __entry->counter_offset_presp)
+);
+
/*************************************************************
* cfg80211 exported functions traces *
*************************************************************/
@@ -2391,26 +2426,30 @@ TRACE_EVENT(cfg80211_get_bss,
__entry->capa_mask, __entry->capa_val)
);
-TRACE_EVENT(cfg80211_inform_bss_frame,
+TRACE_EVENT(cfg80211_inform_bss_width_frame,
TP_PROTO(struct wiphy *wiphy, struct ieee80211_channel *channel,
+ enum nl80211_bss_scan_width scan_width,
struct ieee80211_mgmt *mgmt, size_t len,
s32 signal),
- TP_ARGS(wiphy, channel, mgmt, len, signal),
+ TP_ARGS(wiphy, channel, scan_width, mgmt, len, signal),
TP_STRUCT__entry(
WIPHY_ENTRY
CHAN_ENTRY
+ __field(enum nl80211_bss_scan_width, scan_width)
__dynamic_array(u8, mgmt, len)
__field(s32, signal)
),
TP_fast_assign(
WIPHY_ASSIGN;
CHAN_ASSIGN(channel);
+ __entry->scan_width = scan_width;
if (mgmt)
memcpy(__get_dynamic_array(mgmt), mgmt, len);
__entry->signal = signal;
),
- TP_printk(WIPHY_PR_FMT ", " CHAN_PR_FMT "signal: %d",
- WIPHY_PR_ARG, CHAN_PR_ARG, __entry->signal)
+ TP_printk(WIPHY_PR_FMT ", " CHAN_PR_FMT "(scan_width: %d) signal: %d",
+ WIPHY_PR_ARG, CHAN_PR_ARG, __entry->scan_width,
+ __entry->signal)
);
DECLARE_EVENT_CLASS(cfg80211_bss_evt,
diff --git a/net/wireless/util.c b/net/wireless/util.c
index 74458b7f61eb..ce090c1c5e4f 100644
--- a/net/wireless/util.c
+++ b/net/wireless/util.c
@@ -33,7 +33,8 @@ ieee80211_get_response_rate(struct ieee80211_supported_band *sband,
}
EXPORT_SYMBOL(ieee80211_get_response_rate);
-u32 ieee80211_mandatory_rates(struct ieee80211_supported_band *sband)
+u32 ieee80211_mandatory_rates(struct ieee80211_supported_band *sband,
+ enum nl80211_bss_scan_width scan_width)
{
struct ieee80211_rate *bitrates;
u32 mandatory_rates = 0;
@@ -43,10 +44,15 @@ u32 ieee80211_mandatory_rates(struct ieee80211_supported_band *sband)
if (WARN_ON(!sband))
return 1;
- if (sband->band == IEEE80211_BAND_2GHZ)
- mandatory_flag = IEEE80211_RATE_MANDATORY_B;
- else
+ if (sband->band == IEEE80211_BAND_2GHZ) {
+ if (scan_width == NL80211_BSS_CHAN_WIDTH_5 ||
+ scan_width == NL80211_BSS_CHAN_WIDTH_10)
+ mandatory_flag = IEEE80211_RATE_MANDATORY_G;
+ else
+ mandatory_flag = IEEE80211_RATE_MANDATORY_B;
+ } else {
mandatory_flag = IEEE80211_RATE_MANDATORY_A;
+ }
bitrates = sband->bitrates;
for (i = 0; i < sband->n_bitrates; i++)
diff --git a/net/x25/x25_facilities.c b/net/x25/x25_facilities.c
index 66c638730c7a..b8253250d723 100644
--- a/net/x25/x25_facilities.c
+++ b/net/x25/x25_facilities.c
@@ -156,6 +156,8 @@ int x25_parse_facilities(struct sk_buff *skb, struct x25_facilities *facilities,
case X25_FAC_CALLING_AE:
if (p[1] > X25_MAX_DTE_FACIL_LEN || p[1] <= 1)
return -1;
+ if (p[2] > X25_MAX_AE_LEN)
+ return -1;
dte_facs->calling_len = p[2];
memcpy(dte_facs->calling_ae, &p[3], p[1] - 1);
*vc_fac_mask |= X25_MASK_CALLING_AE;
@@ -163,6 +165,8 @@ int x25_parse_facilities(struct sk_buff *skb, struct x25_facilities *facilities,
case X25_FAC_CALLED_AE:
if (p[1] > X25_MAX_DTE_FACIL_LEN || p[1] <= 1)
return -1;
+ if (p[2] > X25_MAX_AE_LEN)
+ return -1;
dte_facs->called_len = p[2];
memcpy(dte_facs->called_ae, &p[3], p[1] - 1);
*vc_fac_mask |= X25_MASK_CALLED_AE;
diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c
index f77c371ea72b..76e1873811d4 100644
--- a/net/xfrm/xfrm_policy.c
+++ b/net/xfrm/xfrm_policy.c
@@ -308,7 +308,7 @@ void xfrm_policy_destroy(struct xfrm_policy *policy)
{
BUG_ON(!policy->walk.dead);
- if (del_timer(&policy->timer))
+ if (del_timer(&policy->timer) || del_timer(&policy->polq.hold_timer))
BUG();
security_xfrm_policy_free(policy->security);
@@ -334,7 +334,8 @@ static void xfrm_policy_kill(struct xfrm_policy *policy)
atomic_inc(&policy->genid);
- del_timer(&policy->polq.hold_timer);
+ if (del_timer(&policy->polq.hold_timer))
+ xfrm_pol_put(policy);
xfrm_queue_purge(&policy->polq.hold_queue);
if (del_timer(&policy->timer))
@@ -589,7 +590,8 @@ static void xfrm_policy_requeue(struct xfrm_policy *old,
spin_lock_bh(&pq->hold_queue.lock);
skb_queue_splice_init(&pq->hold_queue, &list);
- del_timer(&pq->hold_timer);
+ if (del_timer(&pq->hold_timer))
+ xfrm_pol_put(old);
spin_unlock_bh(&pq->hold_queue.lock);
if (skb_queue_empty(&list))
@@ -600,7 +602,8 @@ static void xfrm_policy_requeue(struct xfrm_policy *old,
spin_lock_bh(&pq->hold_queue.lock);
skb_queue_splice(&list, &pq->hold_queue);
pq->timeout = XFRM_QUEUE_TMO_MIN;
- mod_timer(&pq->hold_timer, jiffies);
+ if (!mod_timer(&pq->hold_timer, jiffies))
+ xfrm_pol_hold(new);
spin_unlock_bh(&pq->hold_queue.lock);
}
@@ -658,7 +661,13 @@ int xfrm_policy_insert(int dir, struct xfrm_policy *policy, int excl)
xfrm_pol_hold(policy);
net->xfrm.policy_count[dir]++;
atomic_inc(&flow_cache_genid);
- rt_genid_bump(net);
+
+ /* After previous checking, family can either be AF_INET or AF_INET6 */
+ if (policy->family == AF_INET)
+ rt_genid_bump_ipv4(net);
+ else
+ rt_genid_bump_ipv6(net);
+
if (delpol) {
xfrm_policy_requeue(delpol, policy);
__xfrm_policy_unlink(delpol, dir);
@@ -1763,6 +1772,10 @@ static void xfrm_policy_queue_process(unsigned long arg)
spin_lock(&pq->hold_queue.lock);
skb = skb_peek(&pq->hold_queue);
+ if (!skb) {
+ spin_unlock(&pq->hold_queue.lock);
+ goto out;
+ }
dst = skb_dst(skb);
sk = skb->sk;
xfrm_decode_session(skb, &fl, dst->ops->family);
@@ -1781,8 +1794,9 @@ static void xfrm_policy_queue_process(unsigned long arg)
goto purge_queue;
pq->timeout = pq->timeout << 1;
- mod_timer(&pq->hold_timer, jiffies + pq->timeout);
- return;
+ if (!mod_timer(&pq->hold_timer, jiffies + pq->timeout))
+ xfrm_pol_hold(pol);
+ goto out;
}
dst_release(dst);
@@ -1813,11 +1827,14 @@ static void xfrm_policy_queue_process(unsigned long arg)
err = dst_output(skb);
}
+out:
+ xfrm_pol_put(pol);
return;
purge_queue:
pq->timeout = 0;
xfrm_queue_purge(&pq->hold_queue);
+ xfrm_pol_put(pol);
}
static int xdst_queue_output(struct sk_buff *skb)
@@ -1825,7 +1842,8 @@ static int xdst_queue_output(struct sk_buff *skb)
unsigned long sched_next;
struct dst_entry *dst = skb_dst(skb);
struct xfrm_dst *xdst = (struct xfrm_dst *) dst;
- struct xfrm_policy_queue *pq = &xdst->pols[0]->polq;
+ struct xfrm_policy *pol = xdst->pols[0];
+ struct xfrm_policy_queue *pq = &pol->polq;
if (pq->hold_queue.qlen > XFRM_MAX_QUEUE_LEN) {
kfree_skb(skb);
@@ -1844,10 +1862,12 @@ static int xdst_queue_output(struct sk_buff *skb)
if (del_timer(&pq->hold_timer)) {
if (time_before(pq->hold_timer.expires, sched_next))
sched_next = pq->hold_timer.expires;
+ xfrm_pol_put(pol);
}
__skb_queue_tail(&pq->hold_queue, skb);
- mod_timer(&pq->hold_timer, sched_next);
+ if (!mod_timer(&pq->hold_timer, sched_next))
+ xfrm_pol_hold(pol);
spin_unlock_bh(&pq->hold_queue.lock);
@@ -2119,8 +2139,6 @@ restart:
* have the xfrm_state's. We need to wait for KM to
* negotiate new SA's or bail out with error.*/
if (net->xfrm.sysctl_larval_drop) {
- /* EREMOTE tells the caller to generate
- * a one-shot blackhole route. */
dst_release(dst);
xfrm_pols_put(pols, drop_pols);
XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTNOSTATES);
diff --git a/net/xfrm/xfrm_replay.c b/net/xfrm/xfrm_replay.c
index 8dafe6d3c6e4..dab57daae408 100644
--- a/net/xfrm/xfrm_replay.c
+++ b/net/xfrm/xfrm_replay.c
@@ -61,9 +61,9 @@ static void xfrm_replay_notify(struct xfrm_state *x, int event)
switch (event) {
case XFRM_REPLAY_UPDATE:
- if (x->replay_maxdiff &&
- (x->replay.seq - x->preplay.seq < x->replay_maxdiff) &&
- (x->replay.oseq - x->preplay.oseq < x->replay_maxdiff)) {
+ if (!x->replay_maxdiff ||
+ ((x->replay.seq - x->preplay.seq < x->replay_maxdiff) &&
+ (x->replay.oseq - x->preplay.oseq < x->replay_maxdiff))) {
if (x->xflags & XFRM_TIME_DEFER)
event = XFRM_REPLAY_TIMEOUT;
else
@@ -129,8 +129,7 @@ static int xfrm_replay_check(struct xfrm_state *x,
return 0;
diff = x->replay.seq - seq;
- if (diff >= min_t(unsigned int, x->props.replay_window,
- sizeof(x->replay.bitmap) * 8)) {
+ if (diff >= x->props.replay_window) {
x->stats.replay_window++;
goto err;
}
@@ -302,9 +301,10 @@ static void xfrm_replay_notify_bmp(struct xfrm_state *x, int event)
switch (event) {
case XFRM_REPLAY_UPDATE:
- if (x->replay_maxdiff &&
- (replay_esn->seq - preplay_esn->seq < x->replay_maxdiff) &&
- (replay_esn->oseq - preplay_esn->oseq < x->replay_maxdiff)) {
+ if (!x->replay_maxdiff ||
+ ((replay_esn->seq - preplay_esn->seq < x->replay_maxdiff) &&
+ (replay_esn->oseq - preplay_esn->oseq
+ < x->replay_maxdiff))) {
if (x->xflags & XFRM_TIME_DEFER)
event = XFRM_REPLAY_TIMEOUT;
else
@@ -353,28 +353,30 @@ static void xfrm_replay_notify_esn(struct xfrm_state *x, int event)
switch (event) {
case XFRM_REPLAY_UPDATE:
- if (!x->replay_maxdiff)
- break;
-
- if (replay_esn->seq_hi == preplay_esn->seq_hi)
- seq_diff = replay_esn->seq - preplay_esn->seq;
- else
- seq_diff = ~preplay_esn->seq + replay_esn->seq + 1;
-
- if (replay_esn->oseq_hi == preplay_esn->oseq_hi)
- oseq_diff = replay_esn->oseq - preplay_esn->oseq;
- else
- oseq_diff = ~preplay_esn->oseq + replay_esn->oseq + 1;
-
- if (seq_diff < x->replay_maxdiff &&
- oseq_diff < x->replay_maxdiff) {
+ if (x->replay_maxdiff) {
+ if (replay_esn->seq_hi == preplay_esn->seq_hi)
+ seq_diff = replay_esn->seq - preplay_esn->seq;
+ else
+ seq_diff = ~preplay_esn->seq + replay_esn->seq
+ + 1;
- if (x->xflags & XFRM_TIME_DEFER)
- event = XFRM_REPLAY_TIMEOUT;
+ if (replay_esn->oseq_hi == preplay_esn->oseq_hi)
+ oseq_diff = replay_esn->oseq
+ - preplay_esn->oseq;
else
- return;
+ oseq_diff = ~preplay_esn->oseq
+ + replay_esn->oseq + 1;
+
+ if (seq_diff >= x->replay_maxdiff ||
+ oseq_diff >= x->replay_maxdiff)
+ break;
}
+ if (x->xflags & XFRM_TIME_DEFER)
+ event = XFRM_REPLAY_TIMEOUT;
+ else
+ return;
+
break;
case XFRM_REPLAY_TIMEOUT:
diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c
index 54c0acd29468..b9c3f9e943a9 100644
--- a/net/xfrm/xfrm_state.c
+++ b/net/xfrm/xfrm_state.c
@@ -496,7 +496,8 @@ struct xfrm_state *xfrm_state_alloc(struct net *net)
INIT_HLIST_NODE(&x->bydst);
INIT_HLIST_NODE(&x->bysrc);
INIT_HLIST_NODE(&x->byspi);
- tasklet_hrtimer_init(&x->mtimer, xfrm_timer_handler, CLOCK_REALTIME, HRTIMER_MODE_ABS);
+ tasklet_hrtimer_init(&x->mtimer, xfrm_timer_handler,
+ CLOCK_BOOTTIME, HRTIMER_MODE_ABS);
setup_timer(&x->rtimer, xfrm_replay_timer_handler,
(unsigned long)x);
x->curlft.add_time = get_seconds();
@@ -987,11 +988,13 @@ void xfrm_state_insert(struct xfrm_state *x)
EXPORT_SYMBOL(xfrm_state_insert);
/* xfrm_state_lock is held */
-static struct xfrm_state *__find_acq_core(struct net *net, struct xfrm_mark *m,
+static struct xfrm_state *__find_acq_core(struct net *net,
+ const struct xfrm_mark *m,
unsigned short family, u8 mode,
u32 reqid, u8 proto,
const xfrm_address_t *daddr,
- const xfrm_address_t *saddr, int create)
+ const xfrm_address_t *saddr,
+ int create)
{
unsigned int h = xfrm_dst_hash(net, daddr, saddr, reqid, family);
struct xfrm_state *x;
@@ -1396,9 +1399,9 @@ xfrm_state_lookup_byaddr(struct net *net, u32 mark,
EXPORT_SYMBOL(xfrm_state_lookup_byaddr);
struct xfrm_state *
-xfrm_find_acq(struct net *net, struct xfrm_mark *mark, u8 mode, u32 reqid, u8 proto,
- const xfrm_address_t *daddr, const xfrm_address_t *saddr,
- int create, unsigned short family)
+xfrm_find_acq(struct net *net, const struct xfrm_mark *mark, u8 mode, u32 reqid,
+ u8 proto, const xfrm_address_t *daddr,
+ const xfrm_address_t *saddr, int create, unsigned short family)
{
struct xfrm_state *x;
diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c
index 3f565e495ac6..f964d4c00ffb 100644
--- a/net/xfrm/xfrm_user.c
+++ b/net/xfrm/xfrm_user.c
@@ -446,7 +446,8 @@ static void copy_from_user_state(struct xfrm_state *x, struct xfrm_usersa_info *
memcpy(&x->sel, &p->sel, sizeof(x->sel));
memcpy(&x->lft, &p->lft, sizeof(x->lft));
x->props.mode = p->mode;
- x->props.replay_window = p->replay_window;
+ x->props.replay_window = min_t(unsigned int, p->replay_window,
+ sizeof(x->replay.bitmap) * 8);
x->props.reqid = p->reqid;
x->props.family = p->family;
memcpy(&x->props.saddr, &p->saddr, sizeof(x->props.saddr));
@@ -1856,7 +1857,7 @@ static int xfrm_new_ae(struct sk_buff *skb, struct nlmsghdr *nlh,
if (x->km.state != XFRM_STATE_VALID)
goto out;
- err = xfrm_replay_verify_len(x->replay_esn, rp);
+ err = xfrm_replay_verify_len(x->replay_esn, re);
if (err)
goto out;