diff options
| author | David S. Miller <davem@davemloft.net> | 2023-08-27 06:56:54 +0100 |
|---|---|---|
| committer | David S. Miller <davem@davemloft.net> | 2023-08-27 06:56:54 +0100 |
| commit | 2cc88bbcbb612d478e73bcc6b8d8bc1f0612665d (patch) | |
| tree | 06a5cb6681b12a6de186dd8c4a081327f7c9c042 /drivers/net/ethernet/sfc/tc.c | |
| parent | b32add2d20ea6e62f30a3c0a7c2fb306ec5ceb3d (diff) | |
| parent | e8e0bd60e4833e5bcfa220d7b4c07456c83c1ea2 (diff) | |
Merge branch 'sfc-pedit-offloads'
Pieter Jansen van Vuuren says:
====================
sfc: introduce eth, ipv4 and ipv6 pedit offloads
This set introduces mac source and destination pedit set action offloads.
It also adds offload for ipv4 ttl and ipv6 hop limit pedit set action as
well pedit add actions that would result in the same semantics as
decrementing the ttl and hop limit.
v2:
- fix 'efx_tc_mangle' kdoc which was orphaned when adding 'efx_tc_pedit_add'.
- add description of 'match' in 'efx_tc_mangle' kdoc.
- correct some inconsistent kdoc indentation.
v1: https://lore.kernel.org/netdev/20230823111725.28090-1-pieter.jansen-van-vuuren@amd.com/
====================
Reviewed-by: Simon Horman <horms@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'drivers/net/ethernet/sfc/tc.c')
| -rw-r--r-- | drivers/net/ethernet/sfc/tc.c | 476 |
1 files changed, 476 insertions, 0 deletions
diff --git a/drivers/net/ethernet/sfc/tc.c b/drivers/net/ethernet/sfc/tc.c index 039180c61c83..047322b04d4f 100644 --- a/drivers/net/ethernet/sfc/tc.c +++ b/drivers/net/ethernet/sfc/tc.c @@ -31,6 +31,9 @@ enum efx_encap_type efx_tc_indr_netdev_type(struct net_device *net_dev) return EFX_ENCAP_TYPE_NONE; } +#define EFX_TC_HDR_TYPE_TTL_MASK ((u32)0xff) +/* Hoplimit is stored in the most significant byte in the pedit ipv6 header action */ +#define EFX_TC_HDR_TYPE_HLIMIT_MASK ~((u32)0xff000000) #define EFX_EFV_PF NULL /* Look up the representor information (efv) for a device. * May return NULL for the PF (us), or an error pointer for a device that @@ -86,6 +89,12 @@ s64 efx_tc_flower_external_mport(struct efx_nic *efx, struct efx_rep *efv) return mport; } +static const struct rhashtable_params efx_tc_mac_ht_params = { + .key_len = offsetofend(struct efx_tc_mac_pedit_action, h_addr), + .key_offset = 0, + .head_offset = offsetof(struct efx_tc_mac_pedit_action, linkage), +}; + static const struct rhashtable_params efx_tc_encap_match_ht_params = { .key_len = offsetof(struct efx_tc_encap_match, linkage), .key_offset = 0, @@ -110,6 +119,56 @@ static const struct rhashtable_params efx_tc_recirc_ht_params = { .head_offset = offsetof(struct efx_tc_recirc_id, linkage), }; +static struct efx_tc_mac_pedit_action *efx_tc_flower_get_mac(struct efx_nic *efx, + unsigned char h_addr[ETH_ALEN], + struct netlink_ext_ack *extack) +{ + struct efx_tc_mac_pedit_action *ped, *old; + int rc; + + ped = kzalloc(sizeof(*ped), GFP_USER); + if (!ped) + return ERR_PTR(-ENOMEM); + memcpy(ped->h_addr, h_addr, ETH_ALEN); + old = rhashtable_lookup_get_insert_fast(&efx->tc->mac_ht, + &ped->linkage, + efx_tc_mac_ht_params); + if (old) { + /* don't need our new entry */ + kfree(ped); + if (!refcount_inc_not_zero(&old->ref)) + return ERR_PTR(-EAGAIN); + /* existing entry found, ref taken */ + return old; + } + + rc = efx_mae_allocate_pedit_mac(efx, ped); + if (rc < 0) { + NL_SET_ERR_MSG_MOD(extack, "Failed to store pedit MAC address in hw"); + goto out_remove; + } + + /* ref and return */ + refcount_set(&ped->ref, 1); + return ped; +out_remove: + rhashtable_remove_fast(&efx->tc->mac_ht, &ped->linkage, + efx_tc_mac_ht_params); + kfree(ped); + return ERR_PTR(rc); +} + +static void efx_tc_flower_put_mac(struct efx_nic *efx, + struct efx_tc_mac_pedit_action *ped) +{ + if (!refcount_dec_and_test(&ped->ref)) + return; /* still in use */ + rhashtable_remove_fast(&efx->tc->mac_ht, &ped->linkage, + efx_tc_mac_ht_params); + efx_mae_free_pedit_mac(efx, ped); + kfree(ped); +} + static void efx_tc_free_action_set(struct efx_nic *efx, struct efx_tc_action_set *act, bool in_hw) { @@ -135,6 +194,10 @@ static void efx_tc_free_action_set(struct efx_nic *efx, list_del(&act->encap_user); efx_tc_flower_release_encap_md(efx, act->encap_md); } + if (act->src_mac) + efx_tc_flower_put_mac(efx, act->src_mac); + if (act->dst_mac) + efx_tc_flower_put_mac(efx, act->dst_mac); kfree(act); } @@ -697,6 +760,8 @@ static const char *efx_tc_encap_type_name(enum efx_encap_type typ) /* For details of action order constraints refer to SF-123102-TC-1ยง12.6.1 */ enum efx_tc_action_order { EFX_TC_AO_DECAP, + EFX_TC_AO_DEC_TTL, + EFX_TC_AO_PEDIT_MAC_ADDRS, EFX_TC_AO_VLAN_POP, EFX_TC_AO_VLAN_PUSH, EFX_TC_AO_COUNT, @@ -711,6 +776,15 @@ static bool efx_tc_flower_action_order_ok(const struct efx_tc_action_set *act, case EFX_TC_AO_DECAP: if (act->decap) return false; + /* PEDIT_MAC_ADDRS must not happen before DECAP, though it + * can wait until much later + */ + if (act->dst_mac || act->src_mac) + return false; + + /* Decrementing ttl must not happen before DECAP */ + if (act->do_ttl_dec) + return false; fallthrough; case EFX_TC_AO_VLAN_POP: if (act->vlan_pop >= 2) @@ -730,12 +804,17 @@ static bool efx_tc_flower_action_order_ok(const struct efx_tc_action_set *act, if (act->count) return false; fallthrough; + case EFX_TC_AO_PEDIT_MAC_ADDRS: case EFX_TC_AO_ENCAP: if (act->encap_md) return false; fallthrough; case EFX_TC_AO_DELIVER: return !act->deliver; + case EFX_TC_AO_DEC_TTL: + if (act->encap_md) + return false; + return !act->do_ttl_dec; default: /* Bad caller. Whatever they wanted to do, say they can't. */ WARN_ON_ONCE(1); @@ -900,6 +979,375 @@ static void efx_tc_flower_release_lhs_actions(struct efx_nic *efx, efx_tc_flower_put_counter_index(efx, act->count); } +/** + * struct efx_tc_mangler_state - accumulates 32-bit pedits into fields + * + * @dst_mac_32: dst_mac[0:3] has been populated + * @dst_mac_16: dst_mac[4:5] has been populated + * @src_mac_16: src_mac[0:1] has been populated + * @src_mac_32: src_mac[2:5] has been populated + * @dst_mac: h_dest field of ethhdr + * @src_mac: h_source field of ethhdr + * + * Since FLOW_ACTION_MANGLE comes in 32-bit chunks that do not + * necessarily equate to whole fields of the packet header, this + * structure is used to hold the cumulative effect of the partial + * field pedits that have been processed so far. + */ +struct efx_tc_mangler_state { + u8 dst_mac_32:1; /* eth->h_dest[0:3] */ + u8 dst_mac_16:1; /* eth->h_dest[4:5] */ + u8 src_mac_16:1; /* eth->h_source[0:1] */ + u8 src_mac_32:1; /* eth->h_source[2:5] */ + unsigned char dst_mac[ETH_ALEN]; + unsigned char src_mac[ETH_ALEN]; +}; + +/** efx_tc_complete_mac_mangle() - pull complete field pedits out of @mung + * @efx: NIC we're installing a flow rule on + * @act: action set (cursor) to update + * @mung: accumulated partial mangles + * @extack: netlink extended ack for reporting errors + * + * Check @mung to find any combinations of partial mangles that can be + * combined into a complete packet field edit, add that edit to @act, + * and consume the partial mangles from @mung. + */ + +static int efx_tc_complete_mac_mangle(struct efx_nic *efx, + struct efx_tc_action_set *act, + struct efx_tc_mangler_state *mung, + struct netlink_ext_ack *extack) +{ + struct efx_tc_mac_pedit_action *ped; + + if (mung->dst_mac_32 && mung->dst_mac_16) { + ped = efx_tc_flower_get_mac(efx, mung->dst_mac, extack); + if (IS_ERR(ped)) + return PTR_ERR(ped); + + /* Check that we have not already populated dst_mac */ + if (act->dst_mac) + efx_tc_flower_put_mac(efx, act->dst_mac); + + act->dst_mac = ped; + + /* consume the incomplete state */ + mung->dst_mac_32 = 0; + mung->dst_mac_16 = 0; + } + if (mung->src_mac_16 && mung->src_mac_32) { + ped = efx_tc_flower_get_mac(efx, mung->src_mac, extack); + if (IS_ERR(ped)) + return PTR_ERR(ped); + + /* Check that we have not already populated src_mac */ + if (act->src_mac) + efx_tc_flower_put_mac(efx, act->src_mac); + + act->src_mac = ped; + + /* consume the incomplete state */ + mung->src_mac_32 = 0; + mung->src_mac_16 = 0; + } + return 0; +} + +static int efx_tc_pedit_add(struct efx_nic *efx, struct efx_tc_action_set *act, + const struct flow_action_entry *fa, + struct netlink_ext_ack *extack) +{ + switch (fa->mangle.htype) { + case FLOW_ACT_MANGLE_HDR_TYPE_IP4: + switch (fa->mangle.offset) { + case offsetof(struct iphdr, ttl): + /* check that pedit applies to ttl only */ + if (fa->mangle.mask != ~EFX_TC_HDR_TYPE_TTL_MASK) + break; + + /* Adding 0xff is equivalent to decrementing the ttl. + * Other added values are not supported. + */ + if ((fa->mangle.val & EFX_TC_HDR_TYPE_TTL_MASK) != U8_MAX) + break; + + /* check that we do not decrement ttl twice */ + if (!efx_tc_flower_action_order_ok(act, + EFX_TC_AO_DEC_TTL)) { + NL_SET_ERR_MSG_MOD(extack, "Unsupported: multiple dec ttl"); + return -EOPNOTSUPP; + } + act->do_ttl_dec = 1; + return 0; + default: + break; + } + break; + case FLOW_ACT_MANGLE_HDR_TYPE_IP6: + switch (fa->mangle.offset) { + case round_down(offsetof(struct ipv6hdr, hop_limit), 4): + /* check that pedit applies to hoplimit only */ + if (fa->mangle.mask != EFX_TC_HDR_TYPE_HLIMIT_MASK) + break; + + /* Adding 0xff is equivalent to decrementing the hoplimit. + * Other added values are not supported. + */ + if ((fa->mangle.val >> 24) != U8_MAX) + break; + + /* check that we do not decrement hoplimit twice */ + if (!efx_tc_flower_action_order_ok(act, + EFX_TC_AO_DEC_TTL)) { + NL_SET_ERR_MSG_MOD(extack, "Unsupported: multiple dec ttl"); + return -EOPNOTSUPP; + } + act->do_ttl_dec = 1; + return 0; + default: + break; + } + break; + default: + break; + } + + NL_SET_ERR_MSG_FMT_MOD(extack, + "Unsupported: ttl add action type %x %x %x/%x", + fa->mangle.htype, fa->mangle.offset, + fa->mangle.val, fa->mangle.mask); + return -EOPNOTSUPP; +} + +/** + * efx_tc_mangle() - handle a single 32-bit (or less) pedit + * @efx: NIC we're installing a flow rule on + * @act: action set (cursor) to update + * @fa: FLOW_ACTION_MANGLE action metadata + * @mung: accumulator for partial mangles + * @extack: netlink extended ack for reporting errors + * @match: original match used along with the mangle action + * + * Identify the fields written by a FLOW_ACTION_MANGLE, and record + * the partial mangle state in @mung. If this mangle completes an + * earlier partial mangle, consume and apply to @act by calling + * efx_tc_complete_mac_mangle(). + */ + +static int efx_tc_mangle(struct efx_nic *efx, struct efx_tc_action_set *act, + const struct flow_action_entry *fa, + struct efx_tc_mangler_state *mung, + struct netlink_ext_ack *extack, + struct efx_tc_match *match) +{ + __le32 mac32; + __le16 mac16; + u8 tr_ttl; + + switch (fa->mangle.htype) { + case FLOW_ACT_MANGLE_HDR_TYPE_ETH: + BUILD_BUG_ON(offsetof(struct ethhdr, h_dest) != 0); + BUILD_BUG_ON(offsetof(struct ethhdr, h_source) != 6); + if (!efx_tc_flower_action_order_ok(act, EFX_TC_AO_PEDIT_MAC_ADDRS)) { + NL_SET_ERR_MSG_MOD(extack, + "Pedit mangle mac action violates action order"); + return -EOPNOTSUPP; + } + switch (fa->mangle.offset) { + case 0: + if (fa->mangle.mask) { + NL_SET_ERR_MSG_FMT_MOD(extack, + "Unsupported: mask (%#x) of eth.dst32 mangle", + fa->mangle.mask); + return -EOPNOTSUPP; + } + /* Ethernet address is little-endian */ + mac32 = cpu_to_le32(fa->mangle.val); + memcpy(mung->dst_mac, &mac32, sizeof(mac32)); + mung->dst_mac_32 = 1; + return efx_tc_complete_mac_mangle(efx, act, mung, extack); + case 4: + if (fa->mangle.mask == 0xffff) { + mac16 = cpu_to_le16(fa->mangle.val >> 16); + memcpy(mung->src_mac, &mac16, sizeof(mac16)); + mung->src_mac_16 = 1; + } else if (fa->mangle.mask == 0xffff0000) { + mac16 = cpu_to_le16((u16)fa->mangle.val); + memcpy(mung->dst_mac + 4, &mac16, sizeof(mac16)); + mung->dst_mac_16 = 1; + } else { + NL_SET_ERR_MSG_FMT_MOD(extack, + "Unsupported: mask (%#x) of eth+4 mangle is not high or low 16b", + fa->mangle.mask); + return -EOPNOTSUPP; + } + return efx_tc_complete_mac_mangle(efx, act, mung, extack); + case 8: + if (fa->mangle.mask) { + NL_SET_ERR_MSG_FMT_MOD(extack, + "Unsupported: mask (%#x) of eth.src32 mangle", + fa->mangle.mask); + return -EOPNOTSUPP; + } + mac32 = cpu_to_le32(fa->mangle.val); + memcpy(mung->src_mac + 2, &mac32, sizeof(mac32)); + mung->src_mac_32 = 1; + return efx_tc_complete_mac_mangle(efx, act, mung, extack); + default: + NL_SET_ERR_MSG_FMT_MOD(extack, "Unsupported: mangle eth+%u %x/%x", + fa->mangle.offset, fa->mangle.val, fa->mangle.mask); + return -EOPNOTSUPP; + } + break; + case FLOW_ACT_MANGLE_HDR_TYPE_IP4: + switch (fa->mangle.offset) { + case offsetof(struct iphdr, ttl): + /* we currently only support pedit IP4 when it applies + * to TTL and then only when it can be achieved with a + * decrement ttl action + */ + + /* check that pedit applies to ttl only */ + if (fa->mangle.mask != ~EFX_TC_HDR_TYPE_TTL_MASK) { + NL_SET_ERR_MSG_FMT_MOD(extack, + "Unsupported: mask (%#x) out of range, only support mangle action on ipv4.ttl", + fa->mangle.mask); + return -EOPNOTSUPP; + } + + /* we can only convert to a dec ttl when we have an + * exact match on the ttl field + */ + if (match->mask.ip_ttl != U8_MAX) { + NL_SET_ERR_MSG_FMT_MOD(extack, + "Unsupported: only support mangle ipv4.ttl when we have an exact match on ttl, mask used for match (%#x)", + match->mask.ip_ttl); + return -EOPNOTSUPP; + } + + /* check that we don't try to decrement 0, which equates + * to setting the ttl to 0xff + */ + if (match->value.ip_ttl == 0) { + NL_SET_ERR_MSG_MOD(extack, + "Unsupported: we cannot decrement ttl past 0"); + return -EOPNOTSUPP; + } + + /* check that we do not decrement ttl twice */ + if (!efx_tc_flower_action_order_ok(act, + EFX_TC_AO_DEC_TTL)) { + NL_SET_ERR_MSG_MOD(extack, + "Unsupported: multiple dec ttl"); + return -EOPNOTSUPP; + } + + /* check pedit can be achieved with decrement action */ + tr_ttl = match->value.ip_ttl - 1; + if ((fa->mangle.val & EFX_TC_HDR_TYPE_TTL_MASK) == tr_ttl) { + act->do_ttl_dec = 1; + return 0; + } + + fallthrough; + default: + NL_SET_ERR_MSG_FMT_MOD(extack, + "Unsupported: only support mangle on the ttl field (offset is %u)", + fa->mangle.offset); + return -EOPNOTSUPP; + } + break; + case FLOW_ACT_MANGLE_HDR_TYPE_IP6: + switch (fa->mangle.offset) { + case round_down(offsetof(struct ipv6hdr, hop_limit), 4): + /* we currently only support pedit IP6 when it applies + * to the hoplimit and then only when it can be achieved + * with a decrement hoplimit action + */ + + /* check that pedit applies to ttl only */ + if (fa->mangle.mask != EFX_TC_HDR_TYPE_HLIMIT_MASK) { + NL_SET_ERR_MSG_FMT_MOD(extack, + "Unsupported: mask (%#x) out of range, only support mangle action on ipv6.hop_limit", + fa->mangle.mask); + + return -EOPNOTSUPP; + } + + /* we can only convert to a dec ttl when we have an + * exact match on the ttl field + */ + if (match->mask.ip_ttl != U8_MAX) { + NL_SET_ERR_MSG_FMT_MOD(extack, + "Unsupported: only support mangle ipv6.hop_limit when we have an exact match on ttl, mask used for match (%#x)", + match->mask.ip_ttl); + return -EOPNOTSUPP; + } + + /* check that we don't try to decrement 0, which equates + * to setting the ttl to 0xff + */ + if (match->value.ip_ttl == 0) { + NL_SET_ERR_MSG_MOD(extack, + "Unsupported: we cannot decrement hop_limit past 0"); + return -EOPNOTSUPP; + } + + /* check that we do not decrement hoplimit twice */ + if (!efx_tc_flower_action_order_ok(act, + EFX_TC_AO_DEC_TTL)) { + NL_SET_ERR_MSG_MOD(extack, + "Unsupported: multiple dec ttl"); + return -EOPNOTSUPP; + } + + /* check pedit can be achieved with decrement action */ + tr_ttl = match->value.ip_ttl - 1; + if ((fa->mangle.val >> 24) == tr_ttl) { + act->do_ttl_dec = 1; + return 0; + } + + fallthrough; + default: + NL_SET_ERR_MSG_FMT_MOD(extack, + "Unsupported: only support mangle on the hop_limit field"); + return -EOPNOTSUPP; + } + default: + NL_SET_ERR_MSG_FMT_MOD(extack, "Unhandled mangle htype %u for action rule", + fa->mangle.htype); + return -EOPNOTSUPP; + } + return 0; +} + +/** + * efx_tc_incomplete_mangle() - check for leftover partial pedits + * @mung: accumulator for partial mangles + * @extack: netlink extended ack for reporting errors + * + * Since the MAE can only overwrite whole fields, any partial + * field mangle left over on reaching packet delivery (mirred or + * end of TC actions) cannot be offloaded. Check for any such + * and reject them with -%EOPNOTSUPP. + */ + +static int efx_tc_incomplete_mangle(struct efx_tc_mangler_state *mung, + struct netlink_ext_ack *extack) +{ + if (mung->dst_mac_32 || mung->dst_mac_16) { + NL_SET_ERR_MSG_MOD(extack, "Incomplete pedit of destination MAC address"); + return -EOPNOTSUPP; + } + if (mung->src_mac_16 || mung->src_mac_32) { + NL_SET_ERR_MSG_MOD(extack, "Incomplete pedit of source MAC address"); + return -EOPNOTSUPP; + } + return 0; +} + static int efx_tc_flower_replace_foreign(struct efx_nic *efx, struct net_device *net_dev, struct flow_cls_offload *tc) @@ -1295,6 +1743,7 @@ static int efx_tc_flower_replace(struct efx_nic *efx, struct netlink_ext_ack *extack = tc->common.extack; const struct ip_tunnel_info *encap_info = NULL; struct efx_tc_flow_rule *rule = NULL, *old; + struct efx_tc_mangler_state mung = {}; struct efx_tc_action_set *act = NULL; const struct flow_action_entry *fa; struct efx_rep *from_efv, *to_efv; @@ -1631,6 +2080,16 @@ static int efx_tc_flower_replace(struct efx_nic *efx, act->vlan_proto[act->vlan_push] = fa->vlan.proto; act->vlan_push++; break; + case FLOW_ACTION_ADD: + rc = efx_tc_pedit_add(efx, act, fa, extack); + if (rc < 0) + goto release; + break; + case FLOW_ACTION_MANGLE: + rc = efx_tc_mangle(efx, act, fa, &mung, extack, &match); + if (rc < 0) + goto release; + break; case FLOW_ACTION_TUNNEL_ENCAP: if (encap_info) { /* Can't specify encap multiple times. @@ -1670,6 +2129,9 @@ static int efx_tc_flower_replace(struct efx_nic *efx, } } + rc = efx_tc_incomplete_mangle(&mung, extack); + if (rc < 0) + goto release; if (act) { /* Not shot/redirected, so deliver to default dest */ if (from_efv == EFX_EFV_PF) @@ -2156,6 +2618,14 @@ static void efx_tc_lhs_free(void *ptr, void *arg) kfree(rule); } +static void efx_tc_mac_free(void *ptr, void *__unused) +{ + struct efx_tc_mac_pedit_action *ped = ptr; + + WARN_ON(refcount_read(&ped->ref)); + kfree(ped); +} + static void efx_tc_flow_free(void *ptr, void *arg) { struct efx_tc_flow_rule *rule = ptr; @@ -2196,6 +2666,9 @@ int efx_init_struct_tc(struct efx_nic *efx) rc = efx_tc_init_counters(efx); if (rc < 0) goto fail_counters; + rc = rhashtable_init(&efx->tc->mac_ht, &efx_tc_mac_ht_params); + if (rc < 0) + goto fail_mac_ht; rc = rhashtable_init(&efx->tc->encap_match_ht, &efx_tc_encap_match_ht_params); if (rc < 0) goto fail_encap_match_ht; @@ -2233,6 +2706,8 @@ fail_lhs_rule_ht: fail_match_action_ht: rhashtable_destroy(&efx->tc->encap_match_ht); fail_encap_match_ht: + rhashtable_destroy(&efx->tc->mac_ht); +fail_mac_ht: efx_tc_destroy_counters(efx); fail_counters: efx_tc_destroy_encap_actions(efx); @@ -2268,6 +2743,7 @@ void efx_fini_struct_tc(struct efx_nic *efx) rhashtable_free_and_destroy(&efx->tc->recirc_ht, efx_tc_recirc_free, efx); WARN_ON(!ida_is_empty(&efx->tc->recirc_ida)); ida_destroy(&efx->tc->recirc_ida); + rhashtable_free_and_destroy(&efx->tc->mac_ht, efx_tc_mac_free, NULL); efx_tc_fini_counters(efx); efx_tc_fini_encap_actions(efx); mutex_unlock(&efx->tc->mutex); |
