diff options
Diffstat (limited to 'drivers')
| -rw-r--r-- | drivers/net/Kconfig | 1 | ||||
| -rw-r--r-- | drivers/net/ethernet/mellanox/mlxsw/core.h | 21 | ||||
| -rw-r--r-- | drivers/net/ethernet/mellanox/mlxsw/pci.c | 55 | ||||
| -rw-r--r-- | drivers/net/ethernet/mellanox/mlxsw/pci_hw.h | 71 | ||||
| -rw-r--r-- | drivers/net/ethernet/mellanox/mlxsw/spectrum.c | 26 | ||||
| -rw-r--r-- | drivers/net/ethernet/mellanox/mlxsw/spectrum.h | 2 | ||||
| -rw-r--r-- | drivers/net/ethernet/mellanox/mlxsw/spectrum_trap.c | 71 | ||||
| -rw-r--r-- | drivers/net/netdevsim/Makefile | 4 | ||||
| -rw-r--r-- | drivers/net/netdevsim/dev.c | 17 | ||||
| -rw-r--r-- | drivers/net/netdevsim/netdevsim.h | 15 | ||||
| -rw-r--r-- | drivers/net/netdevsim/psample.c | 264 |
11 files changed, 512 insertions, 35 deletions
diff --git a/drivers/net/Kconfig b/drivers/net/Kconfig index bcd31f458d1a..5895905b6aa1 100644 --- a/drivers/net/Kconfig +++ b/drivers/net/Kconfig @@ -579,6 +579,7 @@ config NETDEVSIM depends on DEBUG_FS depends on INET depends on IPV6 || IPV6=n + depends on PSAMPLE || PSAMPLE=n select NET_DEVLINK help This driver is a developer testing tool and software model that can diff --git a/drivers/net/ethernet/mellanox/mlxsw/core.h b/drivers/net/ethernet/mellanox/mlxsw/core.h index 8af7d9d03475..80712dc803d0 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/core.h +++ b/drivers/net/ethernet/mellanox/mlxsw/core.h @@ -58,6 +58,25 @@ struct mlxsw_tx_info { bool is_emad; }; +struct mlxsw_rx_md_info { + u32 cookie_index; + u32 latency; + u32 tx_congestion; + union { + /* Valid when 'tx_port_valid' is set. */ + u16 tx_sys_port; + u16 tx_lag_id; + }; + u8 tx_lag_port_index; /* Valid when 'tx_port_is_lag' is set. */ + u8 tx_tc; + u8 latency_valid:1, + tx_congestion_valid:1, + tx_tc_valid:1, + tx_port_valid:1, + tx_port_is_lag:1, + unused:3; +}; + bool mlxsw_core_skb_transmit_busy(struct mlxsw_core *mlxsw_core, const struct mlxsw_tx_info *tx_info); int mlxsw_core_skb_transmit(struct mlxsw_core *mlxsw_core, struct sk_buff *skb, @@ -515,7 +534,7 @@ enum mlxsw_devlink_param_id { struct mlxsw_skb_cb { union { struct mlxsw_tx_info tx_info; - u32 cookie_index; /* Only used during receive */ + struct mlxsw_rx_md_info rx_md_info; }; }; diff --git a/drivers/net/ethernet/mellanox/mlxsw/pci.c b/drivers/net/ethernet/mellanox/mlxsw/pci.c index d0052537e627..8e8456811384 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/pci.c +++ b/drivers/net/ethernet/mellanox/mlxsw/pci.c @@ -540,6 +540,55 @@ static void mlxsw_pci_cqe_sdq_handle(struct mlxsw_pci *mlxsw_pci, spin_unlock(&q->lock); } +static void mlxsw_pci_cqe_rdq_md_tx_port_init(struct sk_buff *skb, + const char *cqe) +{ + struct mlxsw_skb_cb *cb = mlxsw_skb_cb(skb); + + if (mlxsw_pci_cqe2_tx_lag_get(cqe)) { + cb->rx_md_info.tx_port_is_lag = true; + cb->rx_md_info.tx_lag_id = mlxsw_pci_cqe2_tx_lag_id_get(cqe); + cb->rx_md_info.tx_lag_port_index = + mlxsw_pci_cqe2_tx_lag_subport_get(cqe); + } else { + cb->rx_md_info.tx_port_is_lag = false; + cb->rx_md_info.tx_sys_port = + mlxsw_pci_cqe2_tx_system_port_get(cqe); + } + + if (cb->rx_md_info.tx_sys_port != MLXSW_PCI_CQE2_TX_PORT_MULTI_PORT && + cb->rx_md_info.tx_sys_port != MLXSW_PCI_CQE2_TX_PORT_INVALID) + cb->rx_md_info.tx_port_valid = 1; + else + cb->rx_md_info.tx_port_valid = 0; +} + +static void mlxsw_pci_cqe_rdq_md_init(struct sk_buff *skb, const char *cqe) +{ + struct mlxsw_skb_cb *cb = mlxsw_skb_cb(skb); + + cb->rx_md_info.tx_congestion = mlxsw_pci_cqe2_mirror_cong_get(cqe); + if (cb->rx_md_info.tx_congestion != MLXSW_PCI_CQE2_MIRROR_CONG_INVALID) + cb->rx_md_info.tx_congestion_valid = 1; + else + cb->rx_md_info.tx_congestion_valid = 0; + cb->rx_md_info.tx_congestion <<= MLXSW_PCI_CQE2_MIRROR_CONG_SHIFT; + + cb->rx_md_info.latency = mlxsw_pci_cqe2_mirror_latency_get(cqe); + if (cb->rx_md_info.latency != MLXSW_PCI_CQE2_MIRROR_LATENCY_INVALID) + cb->rx_md_info.latency_valid = 1; + else + cb->rx_md_info.latency_valid = 0; + + cb->rx_md_info.tx_tc = mlxsw_pci_cqe2_mirror_tclass_get(cqe); + if (cb->rx_md_info.tx_tc != MLXSW_PCI_CQE2_MIRROR_TCLASS_INVALID) + cb->rx_md_info.tx_tc_valid = 1; + else + cb->rx_md_info.tx_tc_valid = 0; + + mlxsw_pci_cqe_rdq_md_tx_port_init(skb, cqe); +} + static void mlxsw_pci_cqe_rdq_handle(struct mlxsw_pci *mlxsw_pci, struct mlxsw_pci_queue *q, u16 consumer_counter_limit, @@ -581,11 +630,15 @@ static void mlxsw_pci_cqe_rdq_handle(struct mlxsw_pci *mlxsw_pci, if (mlxsw_pci->max_cqe_ver >= MLXSW_PCI_CQE_V2) cookie_index = mlxsw_pci_cqe2_user_def_val_orig_pkt_len_get(cqe); - mlxsw_skb_cb(skb)->cookie_index = cookie_index; + mlxsw_skb_cb(skb)->rx_md_info.cookie_index = cookie_index; } else if (rx_info.trap_id >= MLXSW_TRAP_ID_MIRROR_SESSION0 && rx_info.trap_id <= MLXSW_TRAP_ID_MIRROR_SESSION7 && mlxsw_pci->max_cqe_ver >= MLXSW_PCI_CQE_V2) { rx_info.mirror_reason = mlxsw_pci_cqe2_mirror_reason_get(cqe); + mlxsw_pci_cqe_rdq_md_init(skb, cqe); + } else if (rx_info.trap_id == MLXSW_TRAP_ID_PKT_SAMPLE && + mlxsw_pci->max_cqe_ver >= MLXSW_PCI_CQE_V2) { + mlxsw_pci_cqe_rdq_md_tx_port_init(skb, cqe); } byte_count = mlxsw_pci_cqe_byte_count_get(cqe); diff --git a/drivers/net/ethernet/mellanox/mlxsw/pci_hw.h b/drivers/net/ethernet/mellanox/mlxsw/pci_hw.h index a2c1fbd3e0d1..7b531228d6c0 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/pci_hw.h +++ b/drivers/net/ethernet/mellanox/mlxsw/pci_hw.h @@ -173,6 +173,15 @@ MLXSW_ITEM32(pci, cqe, wqe_counter, 0x04, 16, 16); */ MLXSW_ITEM32(pci, cqe, byte_count, 0x04, 0, 14); +#define MLXSW_PCI_CQE2_MIRROR_CONG_INVALID 0xFFFF + +/* pci_cqe_mirror_cong_high + * Congestion level in units of 8KB of the egress traffic class of the original + * packet that does mirroring to the CPU. Value of 0xFFFF means that the + * congestion level is invalid. + */ +MLXSW_ITEM32(pci, cqe2, mirror_cong_high, 0x08, 16, 4); + /* pci_cqe_trap_id * Trap ID that captured the packet. */ @@ -208,6 +217,59 @@ MLXSW_ITEM32(pci, cqe0, dqn, 0x0C, 1, 5); MLXSW_ITEM32(pci, cqe12, dqn, 0x0C, 1, 6); mlxsw_pci_cqe_item_helpers(dqn, 0, 12, 12); +#define MLXSW_PCI_CQE2_MIRROR_TCLASS_INVALID 0x1F + +/* pci_cqe_mirror_tclass + * The egress traffic class of the original packet that does mirroring to the + * CPU. Value of 0x1F means that the traffic class is invalid. + */ +MLXSW_ITEM32(pci, cqe2, mirror_tclass, 0x10, 27, 5); + +/* pci_cqe_tx_lag + * The Tx port of a packet that is mirrored / sampled to the CPU is a LAG. + */ +MLXSW_ITEM32(pci, cqe2, tx_lag, 0x10, 24, 1); + +/* pci_cqe_tx_lag_subport + * The port index within the LAG of a packet that is mirrored / sampled to the + * CPU. Reserved when tx_lag is 0. + */ +MLXSW_ITEM32(pci, cqe2, tx_lag_subport, 0x10, 16, 8); + +#define MLXSW_PCI_CQE2_TX_PORT_MULTI_PORT 0xFFFE +#define MLXSW_PCI_CQE2_TX_PORT_INVALID 0xFFFF + +/* pci_cqe_tx_lag_id + * The Tx LAG ID of the original packet that is mirrored / sampled to the CPU. + * Value of 0xFFFE means multi-port. Value fo 0xFFFF means that the Tx LAG ID + * is invalid. Reserved when tx_lag is 0. + */ +MLXSW_ITEM32(pci, cqe2, tx_lag_id, 0x10, 0, 16); + +/* pci_cqe_tx_system_port + * The Tx port of the original packet that is mirrored / sampled to the CPU. + * Value of 0xFFFE means multi-port. Value fo 0xFFFF means that the Tx port is + * invalid. Reserved when tx_lag is 1. + */ +MLXSW_ITEM32(pci, cqe2, tx_system_port, 0x10, 0, 16); + +/* pci_cqe_mirror_cong_low + * Congestion level in units of 8KB of the egress traffic class of the original + * packet that does mirroring to the CPU. Value of 0xFFFF means that the + * congestion level is invalid. + */ +MLXSW_ITEM32(pci, cqe2, mirror_cong_low, 0x14, 20, 12); + +#define MLXSW_PCI_CQE2_MIRROR_CONG_SHIFT 13 /* Units of 8KB. */ + +static inline u16 mlxsw_pci_cqe2_mirror_cong_get(const char *cqe) +{ + u16 cong_high = mlxsw_pci_cqe2_mirror_cong_high_get(cqe); + u16 cong_low = mlxsw_pci_cqe2_mirror_cong_low_get(cqe); + + return cong_high << 12 | cong_low; +} + /* pci_cqe_user_def_val_orig_pkt_len * When trap_id is an ACL: User defined value from policy engine action. */ @@ -218,6 +280,15 @@ MLXSW_ITEM32(pci, cqe2, user_def_val_orig_pkt_len, 0x14, 0, 20); */ MLXSW_ITEM32(pci, cqe2, mirror_reason, 0x18, 24, 8); +#define MLXSW_PCI_CQE2_MIRROR_LATENCY_INVALID 0xFFFFFF + +/* pci_cqe_mirror_latency + * End-to-end latency of the original packet that does mirroring to the CPU. + * Value of 0xFFFFFF means that the latency is invalid. Units are according to + * MOGCR.mirror_latency_units. + */ +MLXSW_ITEM32(pci, cqe2, mirror_latency, 0x1C, 8, 24); + /* pci_cqe_owner * Ownership bit. */ diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c index 93b15b8c007e..6054147fd51c 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c @@ -2212,32 +2212,6 @@ void mlxsw_sp_ptp_receive(struct mlxsw_sp *mlxsw_sp, struct sk_buff *skb, mlxsw_sp->ptp_ops->receive(mlxsw_sp, skb, local_port); } -void mlxsw_sp_sample_receive(struct mlxsw_sp *mlxsw_sp, struct sk_buff *skb, - u8 local_port) -{ - struct mlxsw_sp_port *mlxsw_sp_port = mlxsw_sp->ports[local_port]; - struct mlxsw_sp_port_sample *sample; - u32 size; - - if (unlikely(!mlxsw_sp_port)) { - dev_warn_ratelimited(mlxsw_sp->bus_info->dev, "Port %d: sample skb received for non-existent port\n", - local_port); - goto out; - } - - rcu_read_lock(); - sample = rcu_dereference(mlxsw_sp_port->sample); - if (!sample) - goto out_unlock; - size = sample->truncate ? sample->trunc_size : skb->len; - psample_sample_packet(sample->psample_group, skb, size, - mlxsw_sp_port->dev->ifindex, 0, sample->rate); -out_unlock: - rcu_read_unlock(); -out: - consume_skb(skb); -} - #define MLXSW_SP_RXL_NO_MARK(_trap_id, _action, _trap_group, _is_ctrl) \ MLXSW_RXL(mlxsw_sp_rx_listener_no_mark_func, _trap_id, _action, \ _is_ctrl, SP_##_trap_group, DISCARD) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h index bc7006de7873..0082f70daff3 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h @@ -570,8 +570,6 @@ void mlxsw_sp_rx_listener_no_mark_func(struct sk_buff *skb, u8 local_port, void *priv); void mlxsw_sp_ptp_receive(struct mlxsw_sp *mlxsw_sp, struct sk_buff *skb, u8 local_port); -void mlxsw_sp_sample_receive(struct mlxsw_sp *mlxsw_sp, struct sk_buff *skb, - u8 local_port); int mlxsw_sp_port_speed_get(struct mlxsw_sp_port *mlxsw_sp_port, u32 *speed); int mlxsw_sp_port_ets_set(struct mlxsw_sp_port *mlxsw_sp_port, enum mlxsw_reg_qeec_hr hr, u8 index, u8 next_index, diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_trap.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_trap.c index e0e6ee58d31a..056201029ce5 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_trap.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_trap.c @@ -108,7 +108,7 @@ static void mlxsw_sp_rx_drop_listener(struct sk_buff *skb, u8 local_port, static void mlxsw_sp_rx_acl_drop_listener(struct sk_buff *skb, u8 local_port, void *trap_ctx) { - u32 cookie_index = mlxsw_skb_cb(skb)->cookie_index; + u32 cookie_index = mlxsw_skb_cb(skb)->rx_md_info.cookie_index; const struct flow_action_cookie *fa_cookie; struct devlink_port *in_devlink_port; struct mlxsw_sp_port *mlxsw_sp_port; @@ -204,21 +204,86 @@ static void mlxsw_sp_rx_ptp_listener(struct sk_buff *skb, u8 local_port, mlxsw_sp_ptp_receive(mlxsw_sp, skb, local_port); } +static struct mlxsw_sp_port * +mlxsw_sp_sample_tx_port_get(struct mlxsw_sp *mlxsw_sp, + const struct mlxsw_rx_md_info *rx_md_info) +{ + u8 local_port; + + if (!rx_md_info->tx_port_valid) + return NULL; + + if (rx_md_info->tx_port_is_lag) + local_port = mlxsw_core_lag_mapping_get(mlxsw_sp->core, + rx_md_info->tx_lag_id, + rx_md_info->tx_lag_port_index); + else + local_port = rx_md_info->tx_sys_port; + + if (local_port >= mlxsw_core_max_ports(mlxsw_sp->core)) + return NULL; + + return mlxsw_sp->ports[local_port]; +} + +/* The latency units are determined according to MOGCR.mirror_latency_units. It + * defaults to 64 nanoseconds. + */ +#define MLXSW_SP_MIRROR_LATENCY_SHIFT 6 + +static void mlxsw_sp_psample_md_init(struct mlxsw_sp *mlxsw_sp, + struct psample_metadata *md, + struct sk_buff *skb, int in_ifindex, + bool truncate, u32 trunc_size) +{ + struct mlxsw_rx_md_info *rx_md_info = &mlxsw_skb_cb(skb)->rx_md_info; + struct mlxsw_sp_port *mlxsw_sp_port; + + md->trunc_size = truncate ? trunc_size : skb->len; + md->in_ifindex = in_ifindex; + mlxsw_sp_port = mlxsw_sp_sample_tx_port_get(mlxsw_sp, rx_md_info); + md->out_ifindex = mlxsw_sp_port && mlxsw_sp_port->dev ? + mlxsw_sp_port->dev->ifindex : 0; + md->out_tc_valid = rx_md_info->tx_tc_valid; + md->out_tc = rx_md_info->tx_tc; + md->out_tc_occ_valid = rx_md_info->tx_congestion_valid; + md->out_tc_occ = rx_md_info->tx_congestion; + md->latency_valid = rx_md_info->latency_valid; + md->latency = rx_md_info->latency; + md->latency <<= MLXSW_SP_MIRROR_LATENCY_SHIFT; +} + static void mlxsw_sp_rx_sample_listener(struct sk_buff *skb, u8 local_port, void *trap_ctx) { struct mlxsw_sp *mlxsw_sp = devlink_trap_ctx_priv(trap_ctx); + struct mlxsw_sp_port *mlxsw_sp_port; + struct mlxsw_sp_port_sample *sample; + struct psample_metadata md = {}; int err; err = __mlxsw_sp_rx_no_mark_listener(skb, local_port, trap_ctx); if (err) return; - /* The sample handler expects skb->data to point to the start of the + mlxsw_sp_port = mlxsw_sp->ports[local_port]; + if (!mlxsw_sp_port) + goto out; + + sample = rcu_dereference(mlxsw_sp_port->sample); + if (!sample) + goto out; + + /* The psample module expects skb->data to point to the start of the * Ethernet header. */ skb_push(skb, ETH_HLEN); - mlxsw_sp_sample_receive(mlxsw_sp, skb, local_port); + mlxsw_sp_psample_md_init(mlxsw_sp, &md, skb, + mlxsw_sp_port->dev->ifindex, sample->truncate, + sample->trunc_size); + psample_sample_packet(sample->psample_group, skb, sample->rate, &md); +out: + consume_skb(skb); } #define MLXSW_SP_TRAP_DROP(_id, _group_id) \ diff --git a/drivers/net/netdevsim/Makefile b/drivers/net/netdevsim/Makefile index ade086eed955..a1cbfa44a1e1 100644 --- a/drivers/net/netdevsim/Makefile +++ b/drivers/net/netdevsim/Makefile @@ -13,3 +13,7 @@ endif ifneq ($(CONFIG_XFRM_OFFLOAD),) netdevsim-objs += ipsec.o endif + +ifneq ($(CONFIG_PSAMPLE),) +netdevsim-objs += psample.o +endif diff --git a/drivers/net/netdevsim/dev.c b/drivers/net/netdevsim/dev.c index dbeb29fa16e8..6189a4c0d39e 100644 --- a/drivers/net/netdevsim/dev.c +++ b/drivers/net/netdevsim/dev.c @@ -1032,10 +1032,14 @@ static int nsim_dev_reload_create(struct nsim_dev *nsim_dev, if (err) goto err_fib_destroy; - err = nsim_dev_port_add_all(nsim_dev, nsim_bus_dev->port_count); + err = nsim_dev_psample_init(nsim_dev); if (err) goto err_health_exit; + err = nsim_dev_port_add_all(nsim_dev, nsim_bus_dev->port_count); + if (err) + goto err_psample_exit; + nsim_dev->take_snapshot = debugfs_create_file("take_snapshot", 0200, nsim_dev->ddir, @@ -1043,6 +1047,8 @@ static int nsim_dev_reload_create(struct nsim_dev *nsim_dev, &nsim_dev_take_snapshot_fops); return 0; +err_psample_exit: + nsim_dev_psample_exit(nsim_dev); err_health_exit: nsim_dev_health_exit(nsim_dev); err_fib_destroy: @@ -1118,14 +1124,20 @@ int nsim_dev_probe(struct nsim_bus_dev *nsim_bus_dev) if (err) goto err_health_exit; - err = nsim_dev_port_add_all(nsim_dev, nsim_bus_dev->port_count); + err = nsim_dev_psample_init(nsim_dev); if (err) goto err_bpf_dev_exit; + err = nsim_dev_port_add_all(nsim_dev, nsim_bus_dev->port_count); + if (err) + goto err_psample_exit; + devlink_params_publish(devlink); devlink_reload_enable(devlink); return 0; +err_psample_exit: + nsim_dev_psample_exit(nsim_dev); err_bpf_dev_exit: nsim_bpf_dev_exit(nsim_dev); err_health_exit: @@ -1158,6 +1170,7 @@ static void nsim_dev_reload_destroy(struct nsim_dev *nsim_dev) return; debugfs_remove(nsim_dev->take_snapshot); nsim_dev_port_del_all(nsim_dev); + nsim_dev_psample_exit(nsim_dev); nsim_dev_health_exit(nsim_dev); nsim_fib_destroy(devlink, nsim_dev->fib_data); nsim_dev_traps_exit(devlink); diff --git a/drivers/net/netdevsim/netdevsim.h b/drivers/net/netdevsim/netdevsim.h index 48163c5f2ec9..d735c21def4b 100644 --- a/drivers/net/netdevsim/netdevsim.h +++ b/drivers/net/netdevsim/netdevsim.h @@ -180,6 +180,20 @@ struct nsim_dev_health { int nsim_dev_health_init(struct nsim_dev *nsim_dev, struct devlink *devlink); void nsim_dev_health_exit(struct nsim_dev *nsim_dev); +#if IS_ENABLED(CONFIG_PSAMPLE) +int nsim_dev_psample_init(struct nsim_dev *nsim_dev); +void nsim_dev_psample_exit(struct nsim_dev *nsim_dev); +#else +static inline int nsim_dev_psample_init(struct nsim_dev *nsim_dev) +{ + return 0; +} + +static inline void nsim_dev_psample_exit(struct nsim_dev *nsim_dev) +{ +} +#endif + struct nsim_dev_port { struct list_head list; struct devlink_port devlink_port; @@ -229,6 +243,7 @@ struct nsim_dev { bool static_iana_vxlan; u32 sleep; } udp_ports; + struct nsim_dev_psample *psample; }; static inline struct net *nsim_dev_net(struct nsim_dev *nsim_dev) diff --git a/drivers/net/netdevsim/psample.c b/drivers/net/netdevsim/psample.c new file mode 100644 index 000000000000..5ec3bd7f891b --- /dev/null +++ b/drivers/net/netdevsim/psample.c @@ -0,0 +1,264 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2021 Mellanox Technologies. All rights reserved */ + +#include <linux/debugfs.h> +#include <linux/err.h> +#include <linux/etherdevice.h> +#include <linux/inet.h> +#include <linux/kernel.h> +#include <linux/random.h> +#include <linux/slab.h> +#include <net/devlink.h> +#include <net/ip.h> +#include <net/psample.h> +#include <uapi/linux/ip.h> +#include <uapi/linux/udp.h> + +#include "netdevsim.h" + +#define NSIM_PSAMPLE_REPORT_INTERVAL_MS 100 +#define NSIM_PSAMPLE_INVALID_TC 0xFFFF +#define NSIM_PSAMPLE_L4_DATA_LEN 100 + +struct nsim_dev_psample { + struct delayed_work psample_dw; + struct dentry *ddir; + struct psample_group *group; + u32 rate; + u32 group_num; + u32 trunc_size; + int in_ifindex; + int out_ifindex; + u16 out_tc; + u64 out_tc_occ_max; + u64 latency_max; + bool is_active; +}; + +static struct sk_buff *nsim_dev_psample_skb_build(void) +{ + int tot_len, data_len = NSIM_PSAMPLE_L4_DATA_LEN; + struct sk_buff *skb; + struct udphdr *udph; + struct ethhdr *eth; + struct iphdr *iph; + + skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL); + if (!skb) + return NULL; + tot_len = sizeof(struct iphdr) + sizeof(struct udphdr) + data_len; + + skb_reset_mac_header(skb); + eth = skb_put(skb, sizeof(struct ethhdr)); + eth_random_addr(eth->h_dest); + eth_random_addr(eth->h_source); + eth->h_proto = htons(ETH_P_IP); + skb->protocol = htons(ETH_P_IP); + + skb_set_network_header(skb, skb->len); + iph = skb_put(skb, sizeof(struct iphdr)); + iph->protocol = IPPROTO_UDP; + iph->saddr = in_aton("192.0.2.1"); + iph->daddr = in_aton("198.51.100.1"); + iph->version = 0x4; + iph->frag_off = 0; + iph->ihl = 0x5; + iph->tot_len = htons(tot_len); + iph->id = 0; + iph->ttl = 100; + iph->check = 0; + iph->check = ip_fast_csum((unsigned char *)iph, iph->ihl); + + skb_set_transport_header(skb, skb->len); + udph = skb_put_zero(skb, sizeof(struct udphdr) + data_len); + get_random_bytes(&udph->source, sizeof(u16)); + get_random_bytes(&udph->dest, sizeof(u16)); + udph->len = htons(sizeof(struct udphdr) + data_len); + + return skb; +} + +static void nsim_dev_psample_md_prepare(const struct nsim_dev_psample *psample, + struct psample_metadata *md) +{ + md->trunc_size = psample->trunc_size; + md->in_ifindex = psample->in_ifindex; + md->out_ifindex = psample->out_ifindex; + + if (psample->out_tc != NSIM_PSAMPLE_INVALID_TC) { + md->out_tc = psample->out_tc; + md->out_tc_valid = 1; + } + + if (psample->out_tc_occ_max) { + u64 out_tc_occ; + + get_random_bytes(&out_tc_occ, sizeof(u64)); + md->out_tc_occ = out_tc_occ & (psample->out_tc_occ_max - 1); + md->out_tc_occ_valid = 1; + } + + if (psample->latency_max) { + u64 latency; + + get_random_bytes(&latency, sizeof(u64)); + md->latency = latency & (psample->latency_max - 1); + md->latency_valid = 1; + } +} + +static void nsim_dev_psample_report_work(struct work_struct *work) +{ + struct nsim_dev_psample *psample; + struct psample_metadata md = {}; + struct sk_buff *skb; + unsigned long delay; + + psample = container_of(work, struct nsim_dev_psample, psample_dw.work); + + skb = nsim_dev_psample_skb_build(); + if (!skb) + goto out; + + nsim_dev_psample_md_prepare(psample, &md); + psample_sample_packet(psample->group, skb, psample->rate, &md); + consume_skb(skb); + +out: + delay = msecs_to_jiffies(NSIM_PSAMPLE_REPORT_INTERVAL_MS); + schedule_delayed_work(&psample->psample_dw, delay); +} + +static int nsim_dev_psample_enable(struct nsim_dev *nsim_dev) +{ + struct nsim_dev_psample *psample = nsim_dev->psample; + struct devlink *devlink; + unsigned long delay; + + if (psample->is_active) + return -EBUSY; + + devlink = priv_to_devlink(nsim_dev); + psample->group = psample_group_get(devlink_net(devlink), + psample->group_num); + if (!psample->group) + return -EINVAL; + + delay = msecs_to_jiffies(NSIM_PSAMPLE_REPORT_INTERVAL_MS); + schedule_delayed_work(&psample->psample_dw, delay); + + psample->is_active = true; + + return 0; +} + +static int nsim_dev_psample_disable(struct nsim_dev *nsim_dev) +{ + struct nsim_dev_psample *psample = nsim_dev->psample; + + if (!psample->is_active) + return -EINVAL; + + psample->is_active = false; + + cancel_delayed_work_sync(&psample->psample_dw); + psample_group_put(psample->group); + + return 0; +} + +static ssize_t nsim_dev_psample_enable_write(struct file *file, + const char __user *data, + size_t count, loff_t *ppos) +{ + struct nsim_dev *nsim_dev = file->private_data; + bool enable; + int err; + + err = kstrtobool_from_user(data, count, &enable); + if (err) + return err; + + if (enable) + err = nsim_dev_psample_enable(nsim_dev); + else + err = nsim_dev_psample_disable(nsim_dev); + + return err ? err : count; +} + +static const struct file_operations nsim_psample_enable_fops = { + .open = simple_open, + .write = nsim_dev_psample_enable_write, + .llseek = generic_file_llseek, + .owner = THIS_MODULE, +}; + +int nsim_dev_psample_init(struct nsim_dev *nsim_dev) +{ + struct nsim_dev_psample *psample; + int err; + + psample = kzalloc(sizeof(*psample), GFP_KERNEL); + if (!psample) + return -ENOMEM; + nsim_dev->psample = psample; + + INIT_DELAYED_WORK(&psample->psample_dw, nsim_dev_psample_report_work); + + psample->ddir = debugfs_create_dir("psample", nsim_dev->ddir); + if (IS_ERR(psample->ddir)) { + err = PTR_ERR(psample->ddir); + goto err_psample_free; + } + + /* Populate sampling parameters with sane defaults. */ + psample->rate = 100; + debugfs_create_u32("rate", 0600, psample->ddir, &psample->rate); + + psample->group_num = 10; + debugfs_create_u32("group_num", 0600, psample->ddir, + &psample->group_num); + + psample->trunc_size = 0; + debugfs_create_u32("trunc_size", 0600, psample->ddir, + &psample->trunc_size); + + psample->in_ifindex = 1; + debugfs_create_u32("in_ifindex", 0600, psample->ddir, + &psample->in_ifindex); + + psample->out_ifindex = 2; + debugfs_create_u32("out_ifindex", 0600, psample->ddir, + &psample->out_ifindex); + + psample->out_tc = 0; + debugfs_create_u16("out_tc", 0600, psample->ddir, &psample->out_tc); + + psample->out_tc_occ_max = 10000; + debugfs_create_u64("out_tc_occ_max", 0600, psample->ddir, + &psample->out_tc_occ_max); + + psample->latency_max = 50; + debugfs_create_u64("latency_max", 0600, psample->ddir, + &psample->latency_max); + + debugfs_create_file("enable", 0200, psample->ddir, nsim_dev, + &nsim_psample_enable_fops); + + return 0; + +err_psample_free: + kfree(nsim_dev->psample); + return err; +} + +void nsim_dev_psample_exit(struct nsim_dev *nsim_dev) +{ + debugfs_remove_recursive(nsim_dev->psample->ddir); + if (nsim_dev->psample->is_active) { + cancel_delayed_work_sync(&nsim_dev->psample->psample_dw); + psample_group_put(nsim_dev->psample->group); + } + kfree(nsim_dev->psample); +} |
