summaryrefslogtreecommitdiff
path: root/net/switchdev/switchdev.c
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2015-11-04 09:41:05 -0800
committerLinus Torvalds <torvalds@linux-foundation.org>2015-11-04 09:41:05 -0800
commitb0f85fa11aefc4f3e03306b4cd47f113bd57dcba (patch)
tree1333d36d99fde3f97210795941fc246f0ad08a75 /net/switchdev/switchdev.c
parentccc9d4a6d640cbde05d519edeb727881646cf71b (diff)
parentf32bfb9a8ca083f8d148ea90ae5ba66f4831836e (diff)
Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/net-next
Pull networking updates from David Miller: Changes of note: 1) Allow to schedule ICMP packets in IPVS, from Alex Gartrell. 2) Provide FIB table ID in ipv4 route dumps just as ipv6 does, from David Ahern. 3) Allow the user to ask for the statistics to be filtered out of ipv4/ipv6 address netlink dumps. From Sowmini Varadhan. 4) More work to pass the network namespace context around deep into various packet path APIs, starting with the netfilter hooks. From Eric W Biederman. 5) Add layer 2 TX/RX checksum offloading to qeth driver, from Thomas Richter. 6) Use usec resolution for SYN/ACK RTTs in TCP, from Yuchung Cheng. 7) Support Very High Throughput in wireless MESH code, from Bob Copeland. 8) Allow setting the ageing_time in switchdev/rocker. From Scott Feldman. 9) Properly autoload L2TP type modules, from Stephen Hemminger. 10) Fix and enable offload features by default in 8139cp driver, from David Woodhouse. 11) Support both ipv4 and ipv6 sockets in a single vxlan device, from Jiri Benc. 12) Fix CWND limiting of thin streams in TCP, from Bendik Rønning Opstad. 13) Fix IPSEC flowcache overflows on large systems, from Steffen Klassert. 14) Convert bridging to track VLANs using rhashtable entries rather than a bitmap. From Nikolay Aleksandrov. 15) Make TCP listener handling completely lockless, this is a major accomplishment. Incoming request sockets now live in the established hash table just like any other socket too. From Eric Dumazet. 15) Provide more bridging attributes to netlink, from Nikolay Aleksandrov. 16) Use hash based algorithm for ipv4 multipath routing, this was very long overdue. From Peter Nørlund. 17) Several y2038 cures, mostly avoiding timespec. From Arnd Bergmann. 18) Allow non-root execution of EBPF programs, from Alexei Starovoitov. 19) Support SO_INCOMING_CPU as setsockopt, from Eric Dumazet. This influences the port binding selection logic used by SO_REUSEPORT. 20) Add ipv6 support to VRF, from David Ahern. 21) Add support for Mellanox Spectrum switch ASIC, from Jiri Pirko. 22) Add rtl8xxxu Realtek wireless driver, from Jes Sorensen. 23) Implement RACK loss recovery in TCP, from Yuchung Cheng. 24) Support multipath routes in MPLS, from Roopa Prabhu. 25) Fix POLLOUT notification for listening sockets in AF_UNIX, from Eric Dumazet. 26) Add new QED Qlogic river, from Yuval Mintz, Manish Chopra, and Sudarsana Kalluru. 27) Don't fetch timestamps on AF_UNIX sockets, from Hannes Frederic Sowa. 28) Support ipv6 geneve tunnels, from John W Linville. 29) Add flood control support to switchdev layer, from Ido Schimmel. 30) Fix CHECKSUM_PARTIAL handling of potentially fragmented frames, from Hannes Frederic Sowa. 31) Support persistent maps and progs in bpf, from Daniel Borkmann. * git://git.kernel.org/pub/scm/linux/kernel/git/davem/net-next: (1790 commits) sh_eth: use DMA barriers switchdev: respect SKIP_EOPNOTSUPP flag in case there is no recursion net: sched: kill dead code in sch_choke.c irda: Delete an unnecessary check before the function call "irlmp_unregister_service" net: dsa: mv88e6xxx: include DSA ports in VLANs net: dsa: mv88e6xxx: disable SA learning for DSA and CPU ports net/core: fix for_each_netdev_feature vlan: Invoke driver vlan hooks only if device is present arcnet/com20020: add LEDS_CLASS dependency bpf, verifier: annotate verbose printer with __printf dp83640: Only wait for timestamps for packets with timestamping enabled. ptp: Change ptp_class to a proper bitmask dp83640: Prune rx timestamp list before reading from it dp83640: Delay scheduled work. dp83640: Include hash in timestamp/packet matching ipv6: fix tunnel error handling net/mlx5e: Fix LSO vlan insertion net/mlx5e: Re-eanble client vlan TX acceleration net/mlx5e: Return error in case mlx5e_set_features() fails net/mlx5e: Don't allow more than max supported channels ...
Diffstat (limited to 'net/switchdev/switchdev.c')
-rw-r--r--net/switchdev/switchdev.c638
1 files changed, 433 insertions, 205 deletions
diff --git a/net/switchdev/switchdev.c b/net/switchdev/switchdev.c
index 77f5d17e2612..f34e535e93bd 100644
--- a/net/switchdev/switchdev.c
+++ b/net/switchdev/switchdev.c
@@ -1,6 +1,6 @@
/*
* net/switchdev/switchdev.c - Switch device API
- * Copyright (c) 2014 Jiri Pirko <jiri@resnulli.us>
+ * Copyright (c) 2014-2015 Jiri Pirko <jiri@resnulli.us>
* Copyright (c) 2014-2015 Scott Feldman <sfeldma@gmail.com>
*
* This program is free software; you can redistribute it and/or modify
@@ -15,12 +15,166 @@
#include <linux/mutex.h>
#include <linux/notifier.h>
#include <linux/netdevice.h>
+#include <linux/etherdevice.h>
#include <linux/if_bridge.h>
+#include <linux/list.h>
+#include <linux/workqueue.h>
#include <linux/if_vlan.h>
#include <net/ip_fib.h>
#include <net/switchdev.h>
/**
+ * switchdev_trans_item_enqueue - Enqueue data item to transaction queue
+ *
+ * @trans: transaction
+ * @data: pointer to data being queued
+ * @destructor: data destructor
+ * @tritem: transaction item being queued
+ *
+ * Enqeueue data item to transaction queue. tritem is typically placed in
+ * cointainter pointed at by data pointer. Destructor is called on
+ * transaction abort and after successful commit phase in case
+ * the caller did not dequeue the item before.
+ */
+void switchdev_trans_item_enqueue(struct switchdev_trans *trans,
+ void *data, void (*destructor)(void const *),
+ struct switchdev_trans_item *tritem)
+{
+ tritem->data = data;
+ tritem->destructor = destructor;
+ list_add_tail(&tritem->list, &trans->item_list);
+}
+EXPORT_SYMBOL_GPL(switchdev_trans_item_enqueue);
+
+static struct switchdev_trans_item *
+__switchdev_trans_item_dequeue(struct switchdev_trans *trans)
+{
+ struct switchdev_trans_item *tritem;
+
+ if (list_empty(&trans->item_list))
+ return NULL;
+ tritem = list_first_entry(&trans->item_list,
+ struct switchdev_trans_item, list);
+ list_del(&tritem->list);
+ return tritem;
+}
+
+/**
+ * switchdev_trans_item_dequeue - Dequeue data item from transaction queue
+ *
+ * @trans: transaction
+ */
+void *switchdev_trans_item_dequeue(struct switchdev_trans *trans)
+{
+ struct switchdev_trans_item *tritem;
+
+ tritem = __switchdev_trans_item_dequeue(trans);
+ BUG_ON(!tritem);
+ return tritem->data;
+}
+EXPORT_SYMBOL_GPL(switchdev_trans_item_dequeue);
+
+static void switchdev_trans_init(struct switchdev_trans *trans)
+{
+ INIT_LIST_HEAD(&trans->item_list);
+}
+
+static void switchdev_trans_items_destroy(struct switchdev_trans *trans)
+{
+ struct switchdev_trans_item *tritem;
+
+ while ((tritem = __switchdev_trans_item_dequeue(trans)))
+ tritem->destructor(tritem->data);
+}
+
+static void switchdev_trans_items_warn_destroy(struct net_device *dev,
+ struct switchdev_trans *trans)
+{
+ WARN(!list_empty(&trans->item_list), "%s: transaction item queue is not empty.\n",
+ dev->name);
+ switchdev_trans_items_destroy(trans);
+}
+
+static LIST_HEAD(deferred);
+static DEFINE_SPINLOCK(deferred_lock);
+
+typedef void switchdev_deferred_func_t(struct net_device *dev,
+ const void *data);
+
+struct switchdev_deferred_item {
+ struct list_head list;
+ struct net_device *dev;
+ switchdev_deferred_func_t *func;
+ unsigned long data[0];
+};
+
+static struct switchdev_deferred_item *switchdev_deferred_dequeue(void)
+{
+ struct switchdev_deferred_item *dfitem;
+
+ spin_lock_bh(&deferred_lock);
+ if (list_empty(&deferred)) {
+ dfitem = NULL;
+ goto unlock;
+ }
+ dfitem = list_first_entry(&deferred,
+ struct switchdev_deferred_item, list);
+ list_del(&dfitem->list);
+unlock:
+ spin_unlock_bh(&deferred_lock);
+ return dfitem;
+}
+
+/**
+ * switchdev_deferred_process - Process ops in deferred queue
+ *
+ * Called to flush the ops currently queued in deferred ops queue.
+ * rtnl_lock must be held.
+ */
+void switchdev_deferred_process(void)
+{
+ struct switchdev_deferred_item *dfitem;
+
+ ASSERT_RTNL();
+
+ while ((dfitem = switchdev_deferred_dequeue())) {
+ dfitem->func(dfitem->dev, dfitem->data);
+ dev_put(dfitem->dev);
+ kfree(dfitem);
+ }
+}
+EXPORT_SYMBOL_GPL(switchdev_deferred_process);
+
+static void switchdev_deferred_process_work(struct work_struct *work)
+{
+ rtnl_lock();
+ switchdev_deferred_process();
+ rtnl_unlock();
+}
+
+static DECLARE_WORK(deferred_process_work, switchdev_deferred_process_work);
+
+static int switchdev_deferred_enqueue(struct net_device *dev,
+ const void *data, size_t data_len,
+ switchdev_deferred_func_t *func)
+{
+ struct switchdev_deferred_item *dfitem;
+
+ dfitem = kmalloc(sizeof(*dfitem) + data_len, GFP_ATOMIC);
+ if (!dfitem)
+ return -ENOMEM;
+ dfitem->dev = dev;
+ dfitem->func = func;
+ memcpy(dfitem->data, data, data_len);
+ dev_hold(dev);
+ spin_lock_bh(&deferred_lock);
+ list_add_tail(&dfitem->list, &deferred);
+ spin_unlock_bh(&deferred_lock);
+ schedule_work(&deferred_process_work);
+ return 0;
+}
+
+/**
* switchdev_port_attr_get - Get port attribute
*
* @dev: port device
@@ -32,7 +186,7 @@ int switchdev_port_attr_get(struct net_device *dev, struct switchdev_attr *attr)
struct net_device *lower_dev;
struct list_head *iter;
struct switchdev_attr first = {
- .id = SWITCHDEV_ATTR_UNDEFINED
+ .id = SWITCHDEV_ATTR_ID_UNDEFINED
};
int err = -EOPNOTSUPP;
@@ -52,7 +206,7 @@ int switchdev_port_attr_get(struct net_device *dev, struct switchdev_attr *attr)
err = switchdev_port_attr_get(lower_dev, attr);
if (err)
break;
- if (first.id == SWITCHDEV_ATTR_UNDEFINED)
+ if (first.id == SWITCHDEV_ATTR_ID_UNDEFINED)
first = *attr;
else if (memcmp(&first, attr, sizeof(*attr)))
return -ENODATA;
@@ -63,18 +217,21 @@ int switchdev_port_attr_get(struct net_device *dev, struct switchdev_attr *attr)
EXPORT_SYMBOL_GPL(switchdev_port_attr_get);
static int __switchdev_port_attr_set(struct net_device *dev,
- struct switchdev_attr *attr)
+ const struct switchdev_attr *attr,
+ struct switchdev_trans *trans)
{
const struct switchdev_ops *ops = dev->switchdev_ops;
struct net_device *lower_dev;
struct list_head *iter;
int err = -EOPNOTSUPP;
- if (ops && ops->switchdev_port_attr_set)
- return ops->switchdev_port_attr_set(dev, attr);
+ if (ops && ops->switchdev_port_attr_set) {
+ err = ops->switchdev_port_attr_set(dev, attr, trans);
+ goto done;
+ }
if (attr->flags & SWITCHDEV_F_NO_RECURSE)
- return err;
+ goto done;
/* Switch device port(s) may be stacked under
* bond/team/vlan dev, so recurse down to set attr on
@@ -82,80 +239,25 @@ static int __switchdev_port_attr_set(struct net_device *dev,
*/
netdev_for_each_lower_dev(dev, lower_dev, iter) {
- err = __switchdev_port_attr_set(lower_dev, attr);
+ err = __switchdev_port_attr_set(lower_dev, attr, trans);
if (err)
break;
}
- return err;
-}
-
-struct switchdev_attr_set_work {
- struct work_struct work;
- struct net_device *dev;
- struct switchdev_attr attr;
-};
-
-static void switchdev_port_attr_set_work(struct work_struct *work)
-{
- struct switchdev_attr_set_work *asw =
- container_of(work, struct switchdev_attr_set_work, work);
- int err;
-
- rtnl_lock();
- err = switchdev_port_attr_set(asw->dev, &asw->attr);
- if (err && err != -EOPNOTSUPP)
- netdev_err(asw->dev, "failed (err=%d) to set attribute (id=%d)\n",
- err, asw->attr.id);
- rtnl_unlock();
-
- dev_put(asw->dev);
- kfree(work);
-}
+done:
+ if (err == -EOPNOTSUPP && attr->flags & SWITCHDEV_F_SKIP_EOPNOTSUPP)
+ err = 0;
-static int switchdev_port_attr_set_defer(struct net_device *dev,
- struct switchdev_attr *attr)
-{
- struct switchdev_attr_set_work *asw;
-
- asw = kmalloc(sizeof(*asw), GFP_ATOMIC);
- if (!asw)
- return -ENOMEM;
-
- INIT_WORK(&asw->work, switchdev_port_attr_set_work);
-
- dev_hold(dev);
- asw->dev = dev;
- memcpy(&asw->attr, attr, sizeof(asw->attr));
-
- schedule_work(&asw->work);
-
- return 0;
+ return err;
}
-/**
- * switchdev_port_attr_set - Set port attribute
- *
- * @dev: port device
- * @attr: attribute to set
- *
- * Use a 2-phase prepare-commit transaction model to ensure
- * system is not left in a partially updated state due to
- * failure from driver/device.
- */
-int switchdev_port_attr_set(struct net_device *dev, struct switchdev_attr *attr)
+static int switchdev_port_attr_set_now(struct net_device *dev,
+ const struct switchdev_attr *attr)
{
+ struct switchdev_trans trans;
int err;
- if (!rtnl_is_locked()) {
- /* Running prepare-commit transaction across stacked
- * devices requires nothing moves, so if rtnl_lock is
- * not held, schedule a worker thread to hold rtnl_lock
- * while setting attr.
- */
-
- return switchdev_port_attr_set_defer(dev, attr);
- }
+ switchdev_trans_init(&trans);
/* Phase I: prepare for attr set. Driver/device should fail
* here if there are going to be issues in the commit phase,
@@ -164,18 +266,16 @@ int switchdev_port_attr_set(struct net_device *dev, struct switchdev_attr *attr)
* but should not commit the attr.
*/
- attr->trans = SWITCHDEV_TRANS_PREPARE;
- err = __switchdev_port_attr_set(dev, attr);
+ trans.ph_prepare = true;
+ err = __switchdev_port_attr_set(dev, attr, &trans);
if (err) {
/* Prepare phase failed: abort the transaction. Any
* resources reserved in the prepare phase are
* released.
*/
- if (err != -EOPNOTSUPP) {
- attr->trans = SWITCHDEV_TRANS_ABORT;
- __switchdev_port_attr_set(dev, attr);
- }
+ if (err != -EOPNOTSUPP)
+ switchdev_trans_items_destroy(&trans);
return err;
}
@@ -185,17 +285,75 @@ int switchdev_port_attr_set(struct net_device *dev, struct switchdev_attr *attr)
* because the driver said everythings was OK in phase I.
*/
- attr->trans = SWITCHDEV_TRANS_COMMIT;
- err = __switchdev_port_attr_set(dev, attr);
+ trans.ph_prepare = false;
+ err = __switchdev_port_attr_set(dev, attr, &trans);
WARN(err, "%s: Commit of attribute (id=%d) failed.\n",
dev->name, attr->id);
+ switchdev_trans_items_warn_destroy(dev, &trans);
return err;
}
+
+static void switchdev_port_attr_set_deferred(struct net_device *dev,
+ const void *data)
+{
+ const struct switchdev_attr *attr = data;
+ int err;
+
+ err = switchdev_port_attr_set_now(dev, attr);
+ if (err && err != -EOPNOTSUPP)
+ netdev_err(dev, "failed (err=%d) to set attribute (id=%d)\n",
+ err, attr->id);
+}
+
+static int switchdev_port_attr_set_defer(struct net_device *dev,
+ const struct switchdev_attr *attr)
+{
+ return switchdev_deferred_enqueue(dev, attr, sizeof(*attr),
+ switchdev_port_attr_set_deferred);
+}
+
+/**
+ * switchdev_port_attr_set - Set port attribute
+ *
+ * @dev: port device
+ * @attr: attribute to set
+ *
+ * Use a 2-phase prepare-commit transaction model to ensure
+ * system is not left in a partially updated state due to
+ * failure from driver/device.
+ *
+ * rtnl_lock must be held and must not be in atomic section,
+ * in case SWITCHDEV_F_DEFER flag is not set.
+ */
+int switchdev_port_attr_set(struct net_device *dev,
+ const struct switchdev_attr *attr)
+{
+ if (attr->flags & SWITCHDEV_F_DEFER)
+ return switchdev_port_attr_set_defer(dev, attr);
+ ASSERT_RTNL();
+ return switchdev_port_attr_set_now(dev, attr);
+}
EXPORT_SYMBOL_GPL(switchdev_port_attr_set);
+static size_t switchdev_obj_size(const struct switchdev_obj *obj)
+{
+ switch (obj->id) {
+ case SWITCHDEV_OBJ_ID_PORT_VLAN:
+ return sizeof(struct switchdev_obj_port_vlan);
+ case SWITCHDEV_OBJ_ID_IPV4_FIB:
+ return sizeof(struct switchdev_obj_ipv4_fib);
+ case SWITCHDEV_OBJ_ID_PORT_FDB:
+ return sizeof(struct switchdev_obj_port_fdb);
+ default:
+ BUG();
+ }
+ return 0;
+}
+
static int __switchdev_port_obj_add(struct net_device *dev,
- struct switchdev_obj *obj)
+ const struct switchdev_obj *obj,
+ struct switchdev_trans *trans)
{
const struct switchdev_ops *ops = dev->switchdev_ops;
struct net_device *lower_dev;
@@ -203,7 +361,7 @@ static int __switchdev_port_obj_add(struct net_device *dev,
int err = -EOPNOTSUPP;
if (ops && ops->switchdev_port_obj_add)
- return ops->switchdev_port_obj_add(dev, obj);
+ return ops->switchdev_port_obj_add(dev, obj, trans);
/* Switch device port(s) may be stacked under
* bond/team/vlan dev, so recurse down to add object on
@@ -211,7 +369,7 @@ static int __switchdev_port_obj_add(struct net_device *dev,
*/
netdev_for_each_lower_dev(dev, lower_dev, iter) {
- err = __switchdev_port_obj_add(lower_dev, obj);
+ err = __switchdev_port_obj_add(lower_dev, obj, trans);
if (err)
break;
}
@@ -219,24 +377,16 @@ static int __switchdev_port_obj_add(struct net_device *dev,
return err;
}
-/**
- * switchdev_port_obj_add - Add port object
- *
- * @dev: port device
- * @obj: object to add
- *
- * Use a 2-phase prepare-commit transaction model to ensure
- * system is not left in a partially updated state due to
- * failure from driver/device.
- *
- * rtnl_lock must be held.
- */
-int switchdev_port_obj_add(struct net_device *dev, struct switchdev_obj *obj)
+static int switchdev_port_obj_add_now(struct net_device *dev,
+ const struct switchdev_obj *obj)
{
+ struct switchdev_trans trans;
int err;
ASSERT_RTNL();
+ switchdev_trans_init(&trans);
+
/* Phase I: prepare for obj add. Driver/device should fail
* here if there are going to be issues in the commit phase,
* such as lack of resources or support. The driver/device
@@ -244,18 +394,16 @@ int switchdev_port_obj_add(struct net_device *dev, struct switchdev_obj *obj)
* but should not commit the obj.
*/
- obj->trans = SWITCHDEV_TRANS_PREPARE;
- err = __switchdev_port_obj_add(dev, obj);
+ trans.ph_prepare = true;
+ err = __switchdev_port_obj_add(dev, obj, &trans);
if (err) {
/* Prepare phase failed: abort the transaction. Any
* resources reserved in the prepare phase are
* released.
*/
- if (err != -EOPNOTSUPP) {
- obj->trans = SWITCHDEV_TRANS_ABORT;
- __switchdev_port_obj_add(dev, obj);
- }
+ if (err != -EOPNOTSUPP)
+ switchdev_trans_items_destroy(&trans);
return err;
}
@@ -265,21 +413,59 @@ int switchdev_port_obj_add(struct net_device *dev, struct switchdev_obj *obj)
* because the driver said everythings was OK in phase I.
*/
- obj->trans = SWITCHDEV_TRANS_COMMIT;
- err = __switchdev_port_obj_add(dev, obj);
+ trans.ph_prepare = false;
+ err = __switchdev_port_obj_add(dev, obj, &trans);
WARN(err, "%s: Commit of object (id=%d) failed.\n", dev->name, obj->id);
+ switchdev_trans_items_warn_destroy(dev, &trans);
return err;
}
-EXPORT_SYMBOL_GPL(switchdev_port_obj_add);
+
+static void switchdev_port_obj_add_deferred(struct net_device *dev,
+ const void *data)
+{
+ const struct switchdev_obj *obj = data;
+ int err;
+
+ err = switchdev_port_obj_add_now(dev, obj);
+ if (err && err != -EOPNOTSUPP)
+ netdev_err(dev, "failed (err=%d) to add object (id=%d)\n",
+ err, obj->id);
+}
+
+static int switchdev_port_obj_add_defer(struct net_device *dev,
+ const struct switchdev_obj *obj)
+{
+ return switchdev_deferred_enqueue(dev, obj, switchdev_obj_size(obj),
+ switchdev_port_obj_add_deferred);
+}
/**
- * switchdev_port_obj_del - Delete port object
+ * switchdev_port_obj_add - Add port object
*
* @dev: port device
- * @obj: object to delete
+ * @id: object ID
+ * @obj: object to add
+ *
+ * Use a 2-phase prepare-commit transaction model to ensure
+ * system is not left in a partially updated state due to
+ * failure from driver/device.
+ *
+ * rtnl_lock must be held and must not be in atomic section,
+ * in case SWITCHDEV_F_DEFER flag is not set.
*/
-int switchdev_port_obj_del(struct net_device *dev, struct switchdev_obj *obj)
+int switchdev_port_obj_add(struct net_device *dev,
+ const struct switchdev_obj *obj)
+{
+ if (obj->flags & SWITCHDEV_F_DEFER)
+ return switchdev_port_obj_add_defer(dev, obj);
+ ASSERT_RTNL();
+ return switchdev_port_obj_add_now(dev, obj);
+}
+EXPORT_SYMBOL_GPL(switchdev_port_obj_add);
+
+static int switchdev_port_obj_del_now(struct net_device *dev,
+ const struct switchdev_obj *obj)
{
const struct switchdev_ops *ops = dev->switchdev_ops;
struct net_device *lower_dev;
@@ -295,30 +481,75 @@ int switchdev_port_obj_del(struct net_device *dev, struct switchdev_obj *obj)
*/
netdev_for_each_lower_dev(dev, lower_dev, iter) {
- err = switchdev_port_obj_del(lower_dev, obj);
+ err = switchdev_port_obj_del_now(lower_dev, obj);
if (err)
break;
}
return err;
}
+
+static void switchdev_port_obj_del_deferred(struct net_device *dev,
+ const void *data)
+{
+ const struct switchdev_obj *obj = data;
+ int err;
+
+ err = switchdev_port_obj_del_now(dev, obj);
+ if (err && err != -EOPNOTSUPP)
+ netdev_err(dev, "failed (err=%d) to del object (id=%d)\n",
+ err, obj->id);
+}
+
+static int switchdev_port_obj_del_defer(struct net_device *dev,
+ const struct switchdev_obj *obj)
+{
+ return switchdev_deferred_enqueue(dev, obj, switchdev_obj_size(obj),
+ switchdev_port_obj_del_deferred);
+}
+
+/**
+ * switchdev_port_obj_del - Delete port object
+ *
+ * @dev: port device
+ * @id: object ID
+ * @obj: object to delete
+ *
+ * rtnl_lock must be held and must not be in atomic section,
+ * in case SWITCHDEV_F_DEFER flag is not set.
+ */
+int switchdev_port_obj_del(struct net_device *dev,
+ const struct switchdev_obj *obj)
+{
+ if (obj->flags & SWITCHDEV_F_DEFER)
+ return switchdev_port_obj_del_defer(dev, obj);
+ ASSERT_RTNL();
+ return switchdev_port_obj_del_now(dev, obj);
+}
EXPORT_SYMBOL_GPL(switchdev_port_obj_del);
/**
* switchdev_port_obj_dump - Dump port objects
*
* @dev: port device
+ * @id: object ID
* @obj: object to dump
+ * @cb: function to call with a filled object
+ *
+ * rtnl_lock must be held.
*/
-int switchdev_port_obj_dump(struct net_device *dev, struct switchdev_obj *obj)
+int switchdev_port_obj_dump(struct net_device *dev, struct switchdev_obj *obj,
+ switchdev_obj_dump_cb_t *cb)
{
const struct switchdev_ops *ops = dev->switchdev_ops;
struct net_device *lower_dev;
struct list_head *iter;
int err = -EOPNOTSUPP;
+ ASSERT_RTNL();
+
if (ops && ops->switchdev_port_obj_dump)
- return ops->switchdev_port_obj_dump(dev, obj);
+ return ops->switchdev_port_obj_dump(dev, obj, cb);
/* Switch device port(s) may be stacked under
* bond/team/vlan dev, so recurse down to dump objects on
@@ -326,7 +557,7 @@ int switchdev_port_obj_dump(struct net_device *dev, struct switchdev_obj *obj)
*/
netdev_for_each_lower_dev(dev, lower_dev, iter) {
- err = switchdev_port_obj_dump(lower_dev, obj);
+ err = switchdev_port_obj_dump(lower_dev, obj, cb);
break;
}
@@ -398,7 +629,7 @@ int call_switchdev_notifiers(unsigned long val, struct net_device *dev,
EXPORT_SYMBOL_GPL(call_switchdev_notifiers);
struct switchdev_vlan_dump {
- struct switchdev_obj obj;
+ struct switchdev_obj_port_vlan vlan;
struct sk_buff *skb;
u32 filter_mask;
u16 flags;
@@ -406,8 +637,7 @@ struct switchdev_vlan_dump {
u16 end;
};
-static int switchdev_port_vlan_dump_put(struct net_device *dev,
- struct switchdev_vlan_dump *dump)
+static int switchdev_port_vlan_dump_put(struct switchdev_vlan_dump *dump)
{
struct bridge_vlan_info vinfo;
@@ -437,12 +667,11 @@ static int switchdev_port_vlan_dump_put(struct net_device *dev,
return 0;
}
-static int switchdev_port_vlan_dump_cb(struct net_device *dev,
- struct switchdev_obj *obj)
+static int switchdev_port_vlan_dump_cb(struct switchdev_obj *obj)
{
+ struct switchdev_obj_port_vlan *vlan = SWITCHDEV_OBJ_PORT_VLAN(obj);
struct switchdev_vlan_dump *dump =
- container_of(obj, struct switchdev_vlan_dump, obj);
- struct switchdev_obj_vlan *vlan = &dump->obj.u.vlan;
+ container_of(vlan, struct switchdev_vlan_dump, vlan);
int err = 0;
if (vlan->vid_begin > vlan->vid_end)
@@ -453,7 +682,7 @@ static int switchdev_port_vlan_dump_cb(struct net_device *dev,
for (dump->begin = dump->end = vlan->vid_begin;
dump->begin <= vlan->vid_end;
dump->begin++, dump->end++) {
- err = switchdev_port_vlan_dump_put(dev, dump);
+ err = switchdev_port_vlan_dump_put(dump);
if (err)
return err;
}
@@ -465,7 +694,7 @@ static int switchdev_port_vlan_dump_cb(struct net_device *dev,
/* prepend */
dump->begin = vlan->vid_begin;
} else {
- err = switchdev_port_vlan_dump_put(dev, dump);
+ err = switchdev_port_vlan_dump_put(dump);
dump->flags = vlan->flags;
dump->begin = vlan->vid_begin;
dump->end = vlan->vid_end;
@@ -477,7 +706,7 @@ static int switchdev_port_vlan_dump_cb(struct net_device *dev,
/* append */
dump->end = vlan->vid_end;
} else {
- err = switchdev_port_vlan_dump_put(dev, dump);
+ err = switchdev_port_vlan_dump_put(dump);
dump->flags = vlan->flags;
dump->begin = vlan->vid_begin;
dump->end = vlan->vid_end;
@@ -494,10 +723,7 @@ static int switchdev_port_vlan_fill(struct sk_buff *skb, struct net_device *dev,
u32 filter_mask)
{
struct switchdev_vlan_dump dump = {
- .obj = {
- .id = SWITCHDEV_OBJ_PORT_VLAN,
- .cb = switchdev_port_vlan_dump_cb,
- },
+ .vlan.obj.id = SWITCHDEV_OBJ_ID_PORT_VLAN,
.skb = skb,
.filter_mask = filter_mask,
};
@@ -505,12 +731,13 @@ static int switchdev_port_vlan_fill(struct sk_buff *skb, struct net_device *dev,
if ((filter_mask & RTEXT_FILTER_BRVLAN) ||
(filter_mask & RTEXT_FILTER_BRVLAN_COMPRESSED)) {
- err = switchdev_port_obj_dump(dev, &dump.obj);
+ err = switchdev_port_obj_dump(dev, &dump.vlan.obj,
+ switchdev_port_vlan_dump_cb);
if (err)
goto err_out;
if (filter_mask & RTEXT_FILTER_BRVLAN_COMPRESSED)
/* last one */
- err = switchdev_port_vlan_dump_put(dev, &dump);
+ err = switchdev_port_vlan_dump_put(&dump);
}
err_out:
@@ -530,10 +757,10 @@ int switchdev_port_bridge_getlink(struct sk_buff *skb, u32 pid, u32 seq,
int nlflags)
{
struct switchdev_attr attr = {
- .id = SWITCHDEV_ATTR_PORT_BRIDGE_FLAGS,
+ .id = SWITCHDEV_ATTR_ID_PORT_BRIDGE_FLAGS,
};
u16 mode = BRIDGE_MODE_UNDEF;
- u32 mask = BR_LEARNING | BR_LEARNING_SYNC;
+ u32 mask = BR_LEARNING | BR_LEARNING_SYNC | BR_FLOOD;
int err;
err = switchdev_port_attr_get(dev, &attr);
@@ -551,7 +778,7 @@ static int switchdev_port_br_setflag(struct net_device *dev,
unsigned long brport_flag)
{
struct switchdev_attr attr = {
- .id = SWITCHDEV_ATTR_PORT_BRIDGE_FLAGS,
+ .id = SWITCHDEV_ATTR_ID_PORT_BRIDGE_FLAGS,
};
u8 flag = nla_get_u8(nlattr);
int err;
@@ -604,6 +831,9 @@ static int switchdev_port_br_setlink_protinfo(struct net_device *dev,
err = switchdev_port_br_setflag(dev, attr,
BR_LEARNING_SYNC);
break;
+ case IFLA_BRPORT_UNICAST_FLOOD:
+ err = switchdev_port_br_setflag(dev, attr, BR_FLOOD);
+ break;
default:
err = -EOPNOTSUPP;
break;
@@ -618,14 +848,13 @@ static int switchdev_port_br_setlink_protinfo(struct net_device *dev,
static int switchdev_port_br_afspec(struct net_device *dev,
struct nlattr *afspec,
int (*f)(struct net_device *dev,
- struct switchdev_obj *obj))
+ const struct switchdev_obj *obj))
{
struct nlattr *attr;
struct bridge_vlan_info *vinfo;
- struct switchdev_obj obj = {
- .id = SWITCHDEV_OBJ_PORT_VLAN,
+ struct switchdev_obj_port_vlan vlan = {
+ .obj.id = SWITCHDEV_OBJ_ID_PORT_VLAN,
};
- struct switchdev_obj_vlan *vlan = &obj.u.vlan;
int rem;
int err;
@@ -637,30 +866,33 @@ static int switchdev_port_br_afspec(struct net_device *dev,
vinfo = nla_data(attr);
if (!vinfo->vid || vinfo->vid >= VLAN_VID_MASK)
return -EINVAL;
- vlan->flags = vinfo->flags;
+ vlan.flags = vinfo->flags;
if (vinfo->flags & BRIDGE_VLAN_INFO_RANGE_BEGIN) {
- if (vlan->vid_begin)
+ if (vlan.vid_begin)
+ return -EINVAL;
+ vlan.vid_begin = vinfo->vid;
+ /* don't allow range of pvids */
+ if (vlan.flags & BRIDGE_VLAN_INFO_PVID)
return -EINVAL;
- vlan->vid_begin = vinfo->vid;
} else if (vinfo->flags & BRIDGE_VLAN_INFO_RANGE_END) {
- if (!vlan->vid_begin)
+ if (!vlan.vid_begin)
return -EINVAL;
- vlan->vid_end = vinfo->vid;
- if (vlan->vid_end <= vlan->vid_begin)
+ vlan.vid_end = vinfo->vid;
+ if (vlan.vid_end <= vlan.vid_begin)
return -EINVAL;
- err = f(dev, &obj);
+ err = f(dev, &vlan.obj);
if (err)
return err;
- memset(vlan, 0, sizeof(*vlan));
+ vlan.vid_begin = 0;
} else {
- if (vlan->vid_begin)
+ if (vlan.vid_begin)
return -EINVAL;
- vlan->vid_begin = vinfo->vid;
- vlan->vid_end = vinfo->vid;
- err = f(dev, &obj);
+ vlan.vid_begin = vinfo->vid;
+ vlan.vid_end = vinfo->vid;
+ err = f(dev, &vlan.obj);
if (err)
return err;
- memset(vlan, 0, sizeof(*vlan));
+ vlan.vid_begin = 0;
}
}
@@ -742,15 +974,13 @@ int switchdev_port_fdb_add(struct ndmsg *ndm, struct nlattr *tb[],
struct net_device *dev, const unsigned char *addr,
u16 vid, u16 nlm_flags)
{
- struct switchdev_obj obj = {
- .id = SWITCHDEV_OBJ_PORT_FDB,
- .u.fdb = {
- .addr = addr,
- .vid = vid,
- },
+ struct switchdev_obj_port_fdb fdb = {
+ .obj.id = SWITCHDEV_OBJ_ID_PORT_FDB,
+ .vid = vid,
};
- return switchdev_port_obj_add(dev, &obj);
+ ether_addr_copy(fdb.addr, addr);
+ return switchdev_port_obj_add(dev, &fdb.obj);
}
EXPORT_SYMBOL_GPL(switchdev_port_fdb_add);
@@ -769,30 +999,29 @@ int switchdev_port_fdb_del(struct ndmsg *ndm, struct nlattr *tb[],
struct net_device *dev, const unsigned char *addr,
u16 vid)
{
- struct switchdev_obj obj = {
- .id = SWITCHDEV_OBJ_PORT_FDB,
- .u.fdb = {
- .addr = addr,
- .vid = vid,
- },
+ struct switchdev_obj_port_fdb fdb = {
+ .obj.id = SWITCHDEV_OBJ_ID_PORT_FDB,
+ .vid = vid,
};
- return switchdev_port_obj_del(dev, &obj);
+ ether_addr_copy(fdb.addr, addr);
+ return switchdev_port_obj_del(dev, &fdb.obj);
}
EXPORT_SYMBOL_GPL(switchdev_port_fdb_del);
struct switchdev_fdb_dump {
- struct switchdev_obj obj;
+ struct switchdev_obj_port_fdb fdb;
+ struct net_device *dev;
struct sk_buff *skb;
struct netlink_callback *cb;
int idx;
};
-static int switchdev_port_fdb_dump_cb(struct net_device *dev,
- struct switchdev_obj *obj)
+static int switchdev_port_fdb_dump_cb(struct switchdev_obj *obj)
{
+ struct switchdev_obj_port_fdb *fdb = SWITCHDEV_OBJ_PORT_FDB(obj);
struct switchdev_fdb_dump *dump =
- container_of(obj, struct switchdev_fdb_dump, obj);
+ container_of(fdb, struct switchdev_fdb_dump, fdb);
u32 portid = NETLINK_CB(dump->cb->skb).portid;
u32 seq = dump->cb->nlh->nlmsg_seq;
struct nlmsghdr *nlh;
@@ -812,13 +1041,13 @@ static int switchdev_port_fdb_dump_cb(struct net_device *dev,
ndm->ndm_pad2 = 0;
ndm->ndm_flags = NTF_SELF;
ndm->ndm_type = 0;
- ndm->ndm_ifindex = dev->ifindex;
- ndm->ndm_state = obj->u.fdb.ndm_state;
+ ndm->ndm_ifindex = dump->dev->ifindex;
+ ndm->ndm_state = fdb->ndm_state;
- if (nla_put(dump->skb, NDA_LLADDR, ETH_ALEN, obj->u.fdb.addr))
+ if (nla_put(dump->skb, NDA_LLADDR, ETH_ALEN, fdb->addr))
goto nla_put_failure;
- if (obj->u.fdb.vid && nla_put_u16(dump->skb, NDA_VLAN, obj->u.fdb.vid))
+ if (fdb->vid && nla_put_u16(dump->skb, NDA_VLAN, fdb->vid))
goto nla_put_failure;
nlmsg_end(dump->skb, nlh);
@@ -848,16 +1077,14 @@ int switchdev_port_fdb_dump(struct sk_buff *skb, struct netlink_callback *cb,
struct net_device *filter_dev, int idx)
{
struct switchdev_fdb_dump dump = {
- .obj = {
- .id = SWITCHDEV_OBJ_PORT_FDB,
- .cb = switchdev_port_fdb_dump_cb,
- },
+ .fdb.obj.id = SWITCHDEV_OBJ_ID_PORT_FDB,
+ .dev = dev,
.skb = skb,
.cb = cb,
.idx = idx,
};
- switchdev_port_obj_dump(dev, &dump.obj);
+ switchdev_port_obj_dump(dev, &dump.fdb.obj, switchdev_port_fdb_dump_cb);
return dump.idx;
}
EXPORT_SYMBOL_GPL(switchdev_port_fdb_dump);
@@ -888,12 +1115,14 @@ static struct net_device *switchdev_get_lowest_dev(struct net_device *dev)
static struct net_device *switchdev_get_dev_by_nhs(struct fib_info *fi)
{
struct switchdev_attr attr = {
- .id = SWITCHDEV_ATTR_PORT_PARENT_ID,
+ .id = SWITCHDEV_ATTR_ID_PORT_PARENT_ID,
};
struct switchdev_attr prev_attr;
struct net_device *dev = NULL;
int nhsel;
+ ASSERT_RTNL();
+
/* For this route, all nexthop devs must be on the same switch. */
for (nhsel = 0; nhsel < fi->fib_nhs; nhsel++) {
@@ -935,21 +1164,20 @@ static struct net_device *switchdev_get_dev_by_nhs(struct fib_info *fi)
int switchdev_fib_ipv4_add(u32 dst, int dst_len, struct fib_info *fi,
u8 tos, u8 type, u32 nlflags, u32 tb_id)
{
- struct switchdev_obj fib_obj = {
- .id = SWITCHDEV_OBJ_IPV4_FIB,
- .u.ipv4_fib = {
- .dst = dst,
- .dst_len = dst_len,
- .fi = fi,
- .tos = tos,
- .type = type,
- .nlflags = nlflags,
- .tb_id = tb_id,
- },
+ struct switchdev_obj_ipv4_fib ipv4_fib = {
+ .obj.id = SWITCHDEV_OBJ_ID_IPV4_FIB,
+ .dst = dst,
+ .dst_len = dst_len,
+ .tos = tos,
+ .type = type,
+ .nlflags = nlflags,
+ .tb_id = tb_id,
};
struct net_device *dev;
int err = 0;
+ memcpy(&ipv4_fib.fi, fi, sizeof(ipv4_fib.fi));
+
/* Don't offload route if using custom ip rules or if
* IPv4 FIB offloading has been disabled completely.
*/
@@ -966,7 +1194,7 @@ int switchdev_fib_ipv4_add(u32 dst, int dst_len, struct fib_info *fi,
if (!dev)
return 0;
- err = switchdev_port_obj_add(dev, &fib_obj);
+ err = switchdev_port_obj_add(dev, &ipv4_fib.obj);
if (!err)
fi->fib_flags |= RTNH_F_OFFLOAD;
@@ -989,21 +1217,20 @@ EXPORT_SYMBOL_GPL(switchdev_fib_ipv4_add);
int switchdev_fib_ipv4_del(u32 dst, int dst_len, struct fib_info *fi,
u8 tos, u8 type, u32 tb_id)
{
- struct switchdev_obj fib_obj = {
- .id = SWITCHDEV_OBJ_IPV4_FIB,
- .u.ipv4_fib = {
- .dst = dst,
- .dst_len = dst_len,
- .fi = fi,
- .tos = tos,
- .type = type,
- .nlflags = 0,
- .tb_id = tb_id,
- },
+ struct switchdev_obj_ipv4_fib ipv4_fib = {
+ .obj.id = SWITCHDEV_OBJ_ID_IPV4_FIB,
+ .dst = dst,
+ .dst_len = dst_len,
+ .tos = tos,
+ .type = type,
+ .nlflags = 0,
+ .tb_id = tb_id,
};
struct net_device *dev;
int err = 0;
+ memcpy(&ipv4_fib.fi, fi, sizeof(ipv4_fib.fi));
+
if (!(fi->fib_flags & RTNH_F_OFFLOAD))
return 0;
@@ -1011,7 +1238,7 @@ int switchdev_fib_ipv4_del(u32 dst, int dst_len, struct fib_info *fi,
if (!dev)
return 0;
- err = switchdev_port_obj_del(dev, &fib_obj);
+ err = switchdev_port_obj_del(dev, &ipv4_fib.obj);
if (!err)
fi->fib_flags &= ~RTNH_F_OFFLOAD;
@@ -1043,11 +1270,11 @@ static bool switchdev_port_same_parent_id(struct net_device *a,
struct net_device *b)
{
struct switchdev_attr a_attr = {
- .id = SWITCHDEV_ATTR_PORT_PARENT_ID,
+ .id = SWITCHDEV_ATTR_ID_PORT_PARENT_ID,
.flags = SWITCHDEV_F_NO_RECURSE,
};
struct switchdev_attr b_attr = {
- .id = SWITCHDEV_ATTR_PORT_PARENT_ID,
+ .id = SWITCHDEV_ATTR_ID_PORT_PARENT_ID,
.flags = SWITCHDEV_F_NO_RECURSE,
};
@@ -1126,10 +1353,11 @@ void switchdev_port_fwd_mark_set(struct net_device *dev,
u32 mark = dev->ifindex;
u32 reset_mark = 0;
- if (group_dev && joining) {
- mark = switchdev_port_fwd_mark_get(dev, group_dev);
- } else if (group_dev && !joining) {
- if (dev->offload_fwd_mark == mark)
+ if (group_dev) {
+ ASSERT_RTNL();
+ if (joining)
+ mark = switchdev_port_fwd_mark_get(dev, group_dev);
+ else if (dev->offload_fwd_mark == mark)
/* Ohoh, this port was the mark reference port,
* but it's leaving the group, so reset the
* mark for the remaining ports in the group.