summaryrefslogtreecommitdiff
path: root/tools
diff options
context:
space:
mode:
Diffstat (limited to 'tools')
-rwxr-xr-xtools/net/ynl/pyynl/cli.py285
-rwxr-xr-xtools/net/ynl/pyynl/ethtool.py47
-rw-r--r--tools/net/ynl/pyynl/lib/__init__.py10
-rw-r--r--tools/net/ynl/pyynl/lib/doc_generator.py3
-rw-r--r--tools/net/ynl/pyynl/lib/nlspec.py77
-rw-r--r--tools/net/ynl/pyynl/lib/ynl.py208
-rwxr-xr-xtools/net/ynl/pyynl/ynl_gen_c.py178
-rwxr-xr-xtools/net/ynl/pyynl/ynl_gen_rst.py2
-rw-r--r--tools/net/ynl/ynltool/Makefile2
-rw-r--r--tools/net/ynl/ynltool/qstats.c171
-rw-r--r--tools/testing/selftests/Makefile1
-rw-r--r--tools/testing/selftests/drivers/net/Makefile6
-rw-r--r--tools/testing/selftests/drivers/net/gro.c543
-rwxr-xr-xtools/testing/selftests/drivers/net/gro.py166
-rw-r--r--tools/testing/selftests/drivers/net/hw/Makefile1
-rw-r--r--tools/testing/selftests/drivers/net/hw/iou-zcrx.c72
-rwxr-xr-xtools/testing/selftests/drivers/net/hw/iou-zcrx.py157
-rw-r--r--tools/testing/selftests/drivers/net/hw/ncdevmem.c1
-rwxr-xr-xtools/testing/selftests/drivers/net/hw/nic_timestamp.py128
-rwxr-xr-xtools/testing/selftests/drivers/net/hw/rss_drv.py88
-rwxr-xr-xtools/testing/selftests/drivers/net/hw/rss_flow_label.py11
-rwxr-xr-xtools/testing/selftests/drivers/net/hw/rss_input_xfrm.py44
-rw-r--r--tools/testing/selftests/drivers/net/hw/toeplitz.c28
-rw-r--r--tools/testing/selftests/drivers/net/lib/py/env.py8
-rw-r--r--tools/testing/selftests/drivers/net/lib/sh/lib_netcons.sh35
-rw-r--r--tools/testing/selftests/drivers/net/netconsole/Makefile19
-rw-r--r--tools/testing/selftests/drivers/net/netconsole/config6
-rwxr-xr-xtools/testing/selftests/drivers/net/netconsole/netcons_basic.sh (renamed from tools/testing/selftests/drivers/net/netcons_basic.sh)2
-rwxr-xr-xtools/testing/selftests/drivers/net/netconsole/netcons_cmdline.sh (renamed from tools/testing/selftests/drivers/net/netcons_cmdline.sh)2
-rwxr-xr-xtools/testing/selftests/drivers/net/netconsole/netcons_fragmented_msg.sh (renamed from tools/testing/selftests/drivers/net/netcons_fragmented_msg.sh)2
-rwxr-xr-xtools/testing/selftests/drivers/net/netconsole/netcons_overflow.sh (renamed from tools/testing/selftests/drivers/net/netcons_overflow.sh)2
-rwxr-xr-xtools/testing/selftests/drivers/net/netconsole/netcons_resume.sh124
-rwxr-xr-xtools/testing/selftests/drivers/net/netconsole/netcons_sysdata.sh (renamed from tools/testing/selftests/drivers/net/netcons_sysdata.sh)2
-rwxr-xr-xtools/testing/selftests/drivers/net/netconsole/netcons_torture.sh (renamed from tools/testing/selftests/drivers/net/netcons_torture.sh)2
-rwxr-xr-xtools/testing/selftests/drivers/net/psp.py6
-rw-r--r--tools/testing/selftests/drivers/net/psp_responder.c50
-rw-r--r--tools/testing/selftests/net/.gitignore1
-rw-r--r--tools/testing/selftests/net/Makefile18
-rw-r--r--tools/testing/selftests/net/config1
-rwxr-xr-xtools/testing/selftests/net/double_udp_encap.sh393
-rwxr-xr-xtools/testing/selftests/net/fib-onlink-tests.sh28
-rwxr-xr-xtools/testing/selftests/net/forwarding/local_termination.sh18
-rw-r--r--tools/testing/selftests/net/hsr/Makefile2
-rwxr-xr-xtools/testing/selftests/net/hsr/hsr_ping.sh207
-rwxr-xr-xtools/testing/selftests/net/hsr/link_faults.sh378
-rwxr-xr-xtools/testing/selftests/net/hsr/prp_ping.sh147
-rw-r--r--tools/testing/selftests/net/hsr/settings2
-rw-r--r--tools/testing/selftests/net/icmp_rfc4884.c679
-rwxr-xr-xtools/testing/selftests/net/ioam6.sh2
-rw-r--r--tools/testing/selftests/net/ipsec.c11
-rw-r--r--tools/testing/selftests/net/lib/csum.c2
-rw-r--r--tools/testing/selftests/net/lib/py/ksft.py44
-rw-r--r--tools/testing/selftests/net/lib/py/utils.py32
-rw-r--r--tools/testing/selftests/net/mptcp/Makefile1
-rw-r--r--tools/testing/selftests/net/mptcp/mptcp_connect.c98
-rwxr-xr-xtools/testing/selftests/net/mptcp/mptcp_connect_splice.sh5
-rw-r--r--tools/testing/selftests/net/mptcp/mptcp_diag.c27
-rwxr-xr-xtools/testing/selftests/net/mptcp/mptcp_join.sh107
-rw-r--r--tools/testing/selftests/net/netfilter/config1
-rwxr-xr-xtools/testing/selftests/net/netfilter/nft_flowtable.sh69
-rwxr-xr-xtools/testing/selftests/net/netfilter/nft_queue.sh142
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_accecn_2nd_data_as_first.pkt24
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_accecn_2nd_data_as_first_connect.pkt30
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_accecn_3rd_ack_after_synack_rxmt.pkt19
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_accecn_3rd_ack_ce_updates_received_ce.pkt18
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_accecn_3rd_ack_lost_data_ce.pkt22
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_accecn_3rd_dups.pkt26
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_accecn_acc_ecn_disabled.pkt13
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_accecn_accecn_then_notecn_syn.pkt28
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_accecn_accecn_to_rfc3168.pkt18
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_accecn_client_accecn_options_drop.pkt34
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_accecn_client_accecn_options_lost.pkt38
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_accecn_clientside_disabled.pkt12
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_accecn_close_local_close_then_remote_fin.pkt25
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_accecn_delivered_2ndlargeack.pkt25
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_accecn_delivered_falseoverflow_detect.pkt31
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_accecn_delivered_largeack.pkt24
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_accecn_delivered_largeack2.pkt25
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_accecn_delivered_maxack.pkt25
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_accecn_delivered_updates.pkt70
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_accecn_ecn3.pkt12
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_accecn_ecn_field_updates_opt.pkt35
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_accecn_ipflags_drop.pkt14
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_accecn_listen_opt_drop.pkt16
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_accecn_multiple_syn_ack_drop.pkt28
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_accecn_multiple_syn_drop.pkt18
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_accecn_negotiation_bleach.pkt23
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_accecn_negotiation_connect.pkt23
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_accecn_negotiation_listen.pkt26
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_accecn_negotiation_noopt_connect.pkt23
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_accecn_negotiation_optenable.pkt23
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_accecn_no_ecn_after_accecn.pkt20
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_accecn_noopt.pkt27
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_accecn_noprogress.pkt27
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_accecn_notecn_then_accecn_syn.pkt28
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_accecn_rfc3168_to_fallback.pkt18
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_accecn_rfc3168_to_rfc3168.pkt18
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_accecn_sack_space_grab.pkt28
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_accecn_sack_space_grab_with_ts.pkt39
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_accecn_serverside_accecn_disabled1.pkt20
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_accecn_serverside_accecn_disabled2.pkt20
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_accecn_serverside_broken.pkt19
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_accecn_serverside_ecn_disabled.pkt19
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_accecn_serverside_only.pkt18
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_accecn_syn_ace_flags_acked_after_retransmit.pkt18
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_accecn_syn_ace_flags_drop.pkt16
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_accecn_syn_ack_ace_flags_acked_after_retransmit.pkt27
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_accecn_syn_ack_ace_flags_drop.pkt26
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_accecn_syn_ce.pkt13
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_accecn_syn_ect0.pkt13
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_accecn_syn_ect1.pkt13
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_accecn_synack_ce.pkt27
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_accecn_synack_ce_updates_delivered_ce.pkt22
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_accecn_synack_ect0.pkt24
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_accecn_synack_ect1.pkt24
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_accecn_synack_rexmit.pkt15
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_accecn_synack_rxmt.pkt25
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_accecn_tsnoprogress.pkt26
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_accecn_tsprogress.pkt25
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_basic_client.pkt24
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_basic_server.pkt35
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_timestamping_tcp_tx_timestamp_bug.pkt70
-rw-r--r--tools/testing/selftests/net/tfo.c13
-rwxr-xr-xtools/testing/selftests/net/tfo_passive.sh13
-rw-r--r--tools/testing/selftests/net/tls.c16
-rw-r--r--tools/testing/selftests/net/tun.c898
-rw-r--r--tools/testing/selftests/net/tuntap_helpers.h390
-rw-r--r--tools/testing/selftests/net/txtimestamp.c10
-rwxr-xr-xtools/testing/selftests/ptp/phc.sh60
-rw-r--r--tools/testing/selftests/tc-testing/tc-tests/qdiscs/cake_mq.json559
-rw-r--r--tools/testing/selftests/vsock/settings2
-rwxr-xr-xtools/testing/selftests/vsock/vmtest.sh1055
132 files changed, 8251 insertions, 1161 deletions
diff --git a/tools/net/ynl/pyynl/cli.py b/tools/net/ynl/pyynl/cli.py
index af02a5b7e5a2..94a5ba348b69 100755
--- a/tools/net/ynl/pyynl/cli.py
+++ b/tools/net/ynl/pyynl/cli.py
@@ -1,43 +1,85 @@
#!/usr/bin/env python3
# SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause
+"""
+YNL cli tool
+"""
+
import argparse
import json
import os
import pathlib
import pprint
+import shutil
import sys
import textwrap
+# pylint: disable=no-name-in-module,wrong-import-position
sys.path.append(pathlib.Path(__file__).resolve().parent.as_posix())
-from lib import YnlFamily, Netlink, NlError, SpecFamily
+from lib import YnlFamily, Netlink, NlError, SpecFamily, SpecException, YnlException
+
+SYS_SCHEMA_DIR='/usr/share/ynl'
+RELATIVE_SCHEMA_DIR='../../../../Documentation/netlink'
+
+# pylint: disable=too-few-public-methods,too-many-locals
+class Colors:
+ """ANSI color and font modifier codes"""
+ RESET = '\033[0m'
+
+ BOLD = '\033[1m'
+ ITALICS = '\033[3m'
+ UNDERLINE = '\033[4m'
+ INVERT = '\033[7m'
+
+
+def color(text, modifiers):
+ """Add color to text if output is a TTY
-sys_schema_dir='/usr/share/ynl'
-relative_schema_dir='../../../../Documentation/netlink'
+ Returns:
+ Colored text if stdout is a TTY, otherwise plain text
+ """
+ if sys.stdout.isatty():
+ # Join the colors if they are a list, if it's a string this a noop
+ modifiers = "".join(modifiers)
+ return f"{modifiers}{text}{Colors.RESET}"
+ return text
+
+def term_width():
+ """ Get terminal width in columns (80 if stdout is not a terminal) """
+ return shutil.get_terminal_size().columns
def schema_dir():
+ """
+ Return the effective schema directory, preferring in-tree before
+ system schema directory.
+ """
script_dir = os.path.dirname(os.path.abspath(__file__))
- schema_dir = os.path.abspath(f"{script_dir}/{relative_schema_dir}")
- if not os.path.isdir(schema_dir):
- schema_dir = sys_schema_dir
- if not os.path.isdir(schema_dir):
- raise Exception(f"Schema directory {schema_dir} does not exist")
- return schema_dir
+ schema_dir_ = os.path.abspath(f"{script_dir}/{RELATIVE_SCHEMA_DIR}")
+ if not os.path.isdir(schema_dir_):
+ schema_dir_ = SYS_SCHEMA_DIR
+ if not os.path.isdir(schema_dir_):
+ raise YnlException(f"Schema directory {schema_dir_} does not exist")
+ return schema_dir_
def spec_dir():
- spec_dir = schema_dir() + '/specs'
- if not os.path.isdir(spec_dir):
- raise Exception(f"Spec directory {spec_dir} does not exist")
- return spec_dir
+ """
+ Return the effective spec directory, relative to the effective
+ schema directory.
+ """
+ spec_dir_ = schema_dir() + '/specs'
+ if not os.path.isdir(spec_dir_):
+ raise YnlException(f"Spec directory {spec_dir_} does not exist")
+ return spec_dir_
class YnlEncoder(json.JSONEncoder):
- def default(self, obj):
- if isinstance(obj, bytes):
- return bytes.hex(obj)
- if isinstance(obj, set):
- return list(obj)
- return json.JSONEncoder.default(self, obj)
+ """A custom encoder for emitting JSON with ynl-specific instance types"""
+ def default(self, o):
+ if isinstance(o, bytes):
+ return bytes.hex(o)
+ if isinstance(o, set):
+ return list(o)
+ return json.JSONEncoder.default(self, o)
def print_attr_list(ynl, attr_names, attr_set, indent=2):
@@ -46,7 +88,7 @@ def print_attr_list(ynl, attr_names, attr_set, indent=2):
for attr_name in attr_names:
if attr_name in attr_set.attrs:
attr = attr_set.attrs[attr_name]
- attr_info = f'{prefix}- {attr_name}: {attr.type}'
+ attr_info = f'{prefix}- {color(attr_name, Colors.BOLD)}: {attr.type}'
if 'enum' in attr.yaml:
enum_name = attr.yaml['enum']
attr_info += f" (enum: {enum_name})"
@@ -54,7 +96,8 @@ def print_attr_list(ynl, attr_names, attr_set, indent=2):
if enum_name in ynl.consts:
const = ynl.consts[enum_name]
enum_values = list(const.entries.keys())
- attr_info += f"\n{prefix} {const.type.capitalize()}: {', '.join(enum_values)}"
+ type_fmted = color(const.type.capitalize(), Colors.ITALICS)
+ attr_info += f"\n{prefix} {type_fmted}: {', '.join(enum_values)}"
# Show nested attributes reference and recursively display them
nested_set_name = None
@@ -63,7 +106,10 @@ def print_attr_list(ynl, attr_names, attr_set, indent=2):
attr_info += f" -> {nested_set_name}"
if attr.yaml.get('doc'):
- doc_text = textwrap.indent(attr.yaml['doc'], prefix + ' ')
+ doc_prefix = prefix + ' ' * 4
+ doc_text = textwrap.fill(attr.yaml['doc'], width=term_width(),
+ initial_indent=doc_prefix,
+ subsequent_indent=doc_prefix)
attr_info += f"\n{doc_text}"
print(attr_info)
@@ -77,24 +123,62 @@ def print_attr_list(ynl, attr_names, attr_set, indent=2):
print_attr_list(ynl, nested_names, nested_set, indent + 4)
-def print_mode_attrs(ynl, mode, mode_spec, attr_set, print_request=True):
+def print_mode_attrs(ynl, mode, mode_spec, attr_set, consistent_dd_reply=None):
"""Print a given mode (do/dump/event/notify)."""
mode_title = mode.capitalize()
- if print_request and 'request' in mode_spec and 'attributes' in mode_spec['request']:
+ if 'request' in mode_spec and 'attributes' in mode_spec['request']:
print(f'\n{mode_title} request attributes:')
print_attr_list(ynl, mode_spec['request']['attributes'], attr_set)
if 'reply' in mode_spec and 'attributes' in mode_spec['reply']:
- print(f'\n{mode_title} reply attributes:')
- print_attr_list(ynl, mode_spec['reply']['attributes'], attr_set)
+ if consistent_dd_reply and mode == "do":
+ title = None # Dump handling will print in combined format
+ elif consistent_dd_reply and mode == "dump":
+ title = 'Do and Dump'
+ else:
+ title = f'{mode_title}'
+ if title:
+ print(f'\n{title} reply attributes:')
+ print_attr_list(ynl, mode_spec['reply']['attributes'], attr_set)
+
+
+def do_doc(ynl, op):
+ """Handle --list-attrs $op, print the attr information to stdout"""
+ print(f'Operation: {color(op.name, Colors.BOLD)}')
+ print(op.yaml['doc'])
+
+ consistent_dd_reply = False
+ if 'do' in op.yaml and 'dump' in op.yaml and 'reply' in op.yaml['do'] and \
+ op.yaml['do']['reply'] == op.yaml['dump'].get('reply'):
+ consistent_dd_reply = True
+
+ for mode in ['do', 'dump']:
+ if mode in op.yaml:
+ print_mode_attrs(ynl, mode, op.yaml[mode], op.attr_set,
+ consistent_dd_reply=consistent_dd_reply)
+
+ if 'attributes' in op.yaml.get('event', {}):
+ print('\nEvent attributes:')
+ print_attr_list(ynl, op.yaml['event']['attributes'], op.attr_set)
- if 'attributes' in mode_spec:
- print(f'\n{mode_title} attributes:')
- print_attr_list(ynl, mode_spec['attributes'], attr_set)
+ if 'notify' in op.yaml:
+ mode_spec = op.yaml['notify']
+ ref_spec = ynl.msgs.get(mode_spec).yaml.get('do')
+ if not ref_spec:
+ ref_spec = ynl.msgs.get(mode_spec).yaml.get('dump')
+ if ref_spec:
+ print('\nNotification attributes:')
+ print_attr_list(ynl, ref_spec['reply']['attributes'], op.attr_set)
+ if 'mcgrp' in op.yaml:
+ print(f"\nMulticast group: {op.yaml['mcgrp']}")
+
+# pylint: disable=too-many-locals,too-many-branches,too-many-statements
def main():
+ """YNL cli tool"""
+
description = """
YNL CLI utility - a general purpose netlink utility that uses YAML
specs to drive protocol encoding and decoding.
@@ -105,54 +189,85 @@ def main():
"""
parser = argparse.ArgumentParser(description=description,
- epilog=epilog)
- spec_group = parser.add_mutually_exclusive_group(required=True)
- spec_group.add_argument('--family', dest='family', type=str,
- help='name of the netlink FAMILY')
- spec_group.add_argument('--list-families', action='store_true',
- help='list all netlink families supported by YNL (has spec)')
- spec_group.add_argument('--spec', dest='spec', type=str,
- help='choose the family by SPEC file path')
-
- parser.add_argument('--schema', dest='schema', type=str)
- parser.add_argument('--no-schema', action='store_true')
- parser.add_argument('--json', dest='json_text', type=str)
-
- group = parser.add_mutually_exclusive_group()
- group.add_argument('--do', dest='do', metavar='DO-OPERATION', type=str)
- group.add_argument('--multi', dest='multi', nargs=2, action='append',
- metavar=('DO-OPERATION', 'JSON_TEXT'), type=str)
- group.add_argument('--dump', dest='dump', metavar='DUMP-OPERATION', type=str)
- group.add_argument('--list-ops', action='store_true')
- group.add_argument('--list-msgs', action='store_true')
- group.add_argument('--list-attrs', dest='list_attrs', metavar='OPERATION', type=str,
- help='List attributes for an operation')
- group.add_argument('--validate', action='store_true')
-
- parser.add_argument('--duration', dest='duration', type=int,
- help='when subscribed, watch for DURATION seconds')
- parser.add_argument('--sleep', dest='duration', type=int,
- help='alias for duration')
- parser.add_argument('--subscribe', dest='ntf', type=str)
- parser.add_argument('--replace', dest='flags', action='append_const',
- const=Netlink.NLM_F_REPLACE)
- parser.add_argument('--excl', dest='flags', action='append_const',
- const=Netlink.NLM_F_EXCL)
- parser.add_argument('--create', dest='flags', action='append_const',
- const=Netlink.NLM_F_CREATE)
- parser.add_argument('--append', dest='flags', action='append_const',
- const=Netlink.NLM_F_APPEND)
- parser.add_argument('--process-unknown', action=argparse.BooleanOptionalAction)
- parser.add_argument('--output-json', action='store_true')
- parser.add_argument('--dbg-small-recv', default=0, const=4000,
- action='store', nargs='?', type=int)
+ epilog=epilog, add_help=False)
+
+ gen_group = parser.add_argument_group('General options')
+ gen_group.add_argument('-h', '--help', action='help',
+ help='show this help message and exit')
+
+ spec_group = parser.add_argument_group('Netlink family selection')
+ spec_sel = spec_group.add_mutually_exclusive_group(required=True)
+ spec_sel.add_argument('--list-families', action='store_true',
+ help=('list Netlink families supported by YNL '
+ '(which have a spec available in the standard '
+ 'system path)'))
+ spec_sel.add_argument('--family', dest='family', type=str,
+ help='name of the Netlink FAMILY to use')
+ spec_sel.add_argument('--spec', dest='spec', type=str,
+ help='full file path to the YAML spec file')
+
+ ops_group = parser.add_argument_group('Operations')
+ ops = ops_group.add_mutually_exclusive_group()
+ ops.add_argument('--do', dest='do', metavar='DO-OPERATION', type=str)
+ ops.add_argument('--dump', dest='dump', metavar='DUMP-OPERATION', type=str)
+ ops.add_argument('--multi', dest='multi', nargs=2, action='append',
+ metavar=('DO-OPERATION', 'JSON_TEXT'), type=str,
+ help="Multi-message operation sequence (for nftables)")
+ ops.add_argument('--list-ops', action='store_true',
+ help="List available --do and --dump operations")
+ ops.add_argument('--list-msgs', action='store_true',
+ help="List all messages of the family (incl. notifications)")
+ ops.add_argument('--list-attrs', '--doc', dest='list_attrs', metavar='MSG',
+ type=str, help='List attributes for a message / operation')
+ ops.add_argument('--validate', action='store_true',
+ help="Validate the spec against schema and exit")
+
+ io_group = parser.add_argument_group('Input / Output')
+ io_group.add_argument('--json', dest='json_text', type=str,
+ help=('Specify attributes of the message to send '
+ 'to the kernel in JSON format. Can be left out '
+ 'if the message is expected to be empty.'))
+ io_group.add_argument('--output-json', action='store_true',
+ help='Format output as JSON')
+
+ ntf_group = parser.add_argument_group('Notifications')
+ ntf_group.add_argument('--subscribe', dest='ntf', type=str)
+ ntf_group.add_argument('--duration', dest='duration', type=int,
+ help='when subscribed, watch for DURATION seconds')
+ ntf_group.add_argument('--sleep', dest='duration', type=int,
+ help='alias for duration')
+
+ nlflags = parser.add_argument_group('Netlink message flags (NLM_F_*)',
+ ('Extra flags to set in nlmsg_flags of '
+ 'the request, used mostly by older '
+ 'Classic Netlink families.'))
+ nlflags.add_argument('--replace', dest='flags', action='append_const',
+ const=Netlink.NLM_F_REPLACE)
+ nlflags.add_argument('--excl', dest='flags', action='append_const',
+ const=Netlink.NLM_F_EXCL)
+ nlflags.add_argument('--create', dest='flags', action='append_const',
+ const=Netlink.NLM_F_CREATE)
+ nlflags.add_argument('--append', dest='flags', action='append_const',
+ const=Netlink.NLM_F_APPEND)
+
+ schema_group = parser.add_argument_group('Development options')
+ schema_group.add_argument('--schema', dest='schema', type=str,
+ help="JSON schema to validate the spec")
+ schema_group.add_argument('--no-schema', action='store_true')
+
+ dbg_group = parser.add_argument_group('Debug options')
+ dbg_group.add_argument('--dbg-small-recv', default=0, const=4000,
+ action='store', nargs='?', type=int, metavar='INT',
+ help="Length of buffers used for recv()")
+ dbg_group.add_argument('--process-unknown', action=argparse.BooleanOptionalAction)
+
args = parser.parse_args()
def output(msg):
if args.output_json:
print(json.dumps(msg, cls=YnlEncoder))
else:
- pprint.PrettyPrinter().pprint(msg)
+ pprint.pprint(msg, width=term_width(), compact=True)
if args.list_families:
for filename in sorted(os.listdir(spec_dir())):
@@ -172,18 +287,18 @@ def main():
else:
spec = args.spec
if not os.path.isfile(spec):
- raise Exception(f"Spec file {spec} does not exist")
+ raise YnlException(f"Spec file {spec} does not exist")
if args.validate:
try:
SpecFamily(spec, args.schema)
- except Exception as error:
+ except SpecException as error:
print(error)
- exit(1)
+ sys.exit(1)
return
if args.family: # set behaviour when using installed specs
- if args.schema is None and spec.startswith(sys_schema_dir):
+ if args.schema is None and spec.startswith(SYS_SCHEMA_DIR):
args.schema = '' # disable schema validation when installed
if args.process_unknown is None:
args.process_unknown = True
@@ -207,23 +322,9 @@ def main():
op = ynl.msgs.get(args.list_attrs)
if not op:
print(f'Operation {args.list_attrs} not found')
- exit(1)
-
- print(f'Operation: {op.name}')
- print(op.yaml['doc'])
-
- for mode in ['do', 'dump', 'event']:
- if mode in op.yaml:
- print_mode_attrs(ynl, mode, op.yaml[mode], op.attr_set, True)
-
- if 'notify' in op.yaml:
- mode_spec = op.yaml['notify']
- ref_spec = ynl.msgs.get(mode_spec).yaml.get('do')
- if ref_spec:
- print_mode_attrs(ynl, 'notify', ref_spec, op.attr_set, False)
+ sys.exit(1)
- if 'mcgrp' in op.yaml:
- print(f"\nMulticast group: {op.yaml['mcgrp']}")
+ do_doc(ynl, op)
try:
if args.do:
@@ -242,7 +343,7 @@ def main():
output(msg)
except NlError as e:
print(e)
- exit(1)
+ sys.exit(1)
except KeyboardInterrupt:
pass
except BrokenPipeError:
diff --git a/tools/net/ynl/pyynl/ethtool.py b/tools/net/ynl/pyynl/ethtool.py
index fd0f6b8d54d1..f1a2a2a89985 100755
--- a/tools/net/ynl/pyynl/ethtool.py
+++ b/tools/net/ynl/pyynl/ethtool.py
@@ -1,5 +1,10 @@
#!/usr/bin/env python3
# SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause
+#
+# pylint: disable=too-many-locals, too-many-branches, too-many-statements
+# pylint: disable=too-many-return-statements
+
+""" YNL ethtool utility """
import argparse
import pathlib
@@ -8,9 +13,12 @@ import sys
import re
import os
+# pylint: disable=no-name-in-module,wrong-import-position
sys.path.append(pathlib.Path(__file__).resolve().parent.as_posix())
-from lib import YnlFamily
+# pylint: disable=import-error
from cli import schema_dir, spec_dir
+from lib import YnlFamily
+
def args_to_req(ynl, op_name, args, req):
"""
@@ -48,7 +56,8 @@ def print_field(reply, *desc):
return
if len(desc) == 0:
- return print_field(reply, *zip(reply.keys(), reply.keys()))
+ print_field(reply, *zip(reply.keys(), reply.keys()))
+ return
for spec in desc:
try:
@@ -88,11 +97,12 @@ def doit(ynl, args, op_name):
args_to_req(ynl, op_name, args.args, req)
ynl.do(op_name, req)
-def dumpit(ynl, args, op_name, extra = {}):
+def dumpit(ynl, args, op_name, extra=None):
"""
Prepare request header, parse arguments and dumpit (filtering out the
devices we're not interested in).
"""
+ extra = extra or {}
reply = ynl.dump(op_name, { 'header': {} } | extra)
if not reply:
return {}
@@ -114,9 +124,9 @@ def bits_to_dict(attr):
"""
ret = {}
if 'bits' not in attr:
- return dict()
+ return {}
if 'bit' not in attr['bits']:
- return dict()
+ return {}
for bit in attr['bits']['bit']:
if bit['name'] == '':
continue
@@ -126,6 +136,8 @@ def bits_to_dict(attr):
return ret
def main():
+ """ YNL ethtool utility """
+
parser = argparse.ArgumentParser(description='ethtool wannabe')
parser.add_argument('--json', action=argparse.BooleanOptionalAction)
parser.add_argument('--show-priv-flags', action=argparse.BooleanOptionalAction)
@@ -155,7 +167,7 @@ def main():
# TODO: rss-get
parser.add_argument('device', metavar='device', type=str)
parser.add_argument('args', metavar='args', type=str, nargs='*')
- global args
+
args = parser.parse_args()
spec = os.path.join(spec_dir(), 'ethtool.yaml')
@@ -169,13 +181,16 @@ def main():
return
if args.set_eee:
- return doit(ynl, args, 'eee-set')
+ doit(ynl, args, 'eee-set')
+ return
if args.set_pause:
- return doit(ynl, args, 'pause-set')
+ doit(ynl, args, 'pause-set')
+ return
if args.set_coalesce:
- return doit(ynl, args, 'coalesce-set')
+ doit(ynl, args, 'coalesce-set')
+ return
if args.set_features:
# TODO: parse the bitmask
@@ -183,10 +198,12 @@ def main():
return
if args.set_channels:
- return doit(ynl, args, 'channels-set')
+ doit(ynl, args, 'channels-set')
+ return
if args.set_ring:
- return doit(ynl, args, 'rings-set')
+ doit(ynl, args, 'rings-set')
+ return
if args.show_priv_flags:
flags = bits_to_dict(dumpit(ynl, args, 'privflags-get')['flags'])
@@ -337,25 +354,25 @@ def main():
print(f'Time stamping parameters for {args.device}:')
print('Capabilities:')
- [print(f'\t{v}') for v in bits_to_dict(tsinfo['timestamping'])]
+ _ = [print(f'\t{v}') for v in bits_to_dict(tsinfo['timestamping'])]
print(f'PTP Hardware Clock: {tsinfo.get("phc-index", "none")}')
if 'tx-types' in tsinfo:
print('Hardware Transmit Timestamp Modes:')
- [print(f'\t{v}') for v in bits_to_dict(tsinfo['tx-types'])]
+ _ = [print(f'\t{v}') for v in bits_to_dict(tsinfo['tx-types'])]
else:
print('Hardware Transmit Timestamp Modes: none')
if 'rx-filters' in tsinfo:
print('Hardware Receive Filter Modes:')
- [print(f'\t{v}') for v in bits_to_dict(tsinfo['rx-filters'])]
+ _ = [print(f'\t{v}') for v in bits_to_dict(tsinfo['rx-filters'])]
else:
print('Hardware Receive Filter Modes: none')
if 'stats' in tsinfo and tsinfo['stats']:
print('Statistics:')
- [print(f'\t{k}: {v}') for k, v in tsinfo['stats'].items()]
+ _ = [print(f'\t{k}: {v}') for k, v in tsinfo['stats'].items()]
return
diff --git a/tools/net/ynl/pyynl/lib/__init__.py b/tools/net/ynl/pyynl/lib/__init__.py
index ec9ea00071be..33a96155fb3b 100644
--- a/tools/net/ynl/pyynl/lib/__init__.py
+++ b/tools/net/ynl/pyynl/lib/__init__.py
@@ -1,11 +1,15 @@
# SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause
+""" YNL library """
+
from .nlspec import SpecAttr, SpecAttrSet, SpecEnumEntry, SpecEnumSet, \
- SpecFamily, SpecOperation, SpecSubMessage, SpecSubMessageFormat
-from .ynl import YnlFamily, Netlink, NlError
+ SpecFamily, SpecOperation, SpecSubMessage, SpecSubMessageFormat, \
+ SpecException
+from .ynl import YnlFamily, Netlink, NlError, YnlException
from .doc_generator import YnlDocGenerator
__all__ = ["SpecAttr", "SpecAttrSet", "SpecEnumEntry", "SpecEnumSet",
"SpecFamily", "SpecOperation", "SpecSubMessage", "SpecSubMessageFormat",
- "YnlFamily", "Netlink", "NlError", "YnlDocGenerator"]
+ "SpecException",
+ "YnlFamily", "Netlink", "NlError", "YnlDocGenerator", "YnlException"]
diff --git a/tools/net/ynl/pyynl/lib/doc_generator.py b/tools/net/ynl/pyynl/lib/doc_generator.py
index 8b922d8f89e8..74f5d408e048 100644
--- a/tools/net/ynl/pyynl/lib/doc_generator.py
+++ b/tools/net/ynl/pyynl/lib/doc_generator.py
@@ -109,8 +109,7 @@ class RstFormatters:
'fixed-header': 'definition',
'nested-attributes': 'attribute-set',
'struct': 'definition'}
- if prefix in mappings:
- prefix = mappings[prefix]
+ prefix = mappings.get(prefix, prefix)
return f":ref:`{namespace}-{prefix}-{name}`"
def rst_header(self) -> str:
diff --git a/tools/net/ynl/pyynl/lib/nlspec.py b/tools/net/ynl/pyynl/lib/nlspec.py
index 85c17fe01e35..fcffeb5b7ba3 100644
--- a/tools/net/ynl/pyynl/lib/nlspec.py
+++ b/tools/net/ynl/pyynl/lib/nlspec.py
@@ -1,13 +1,21 @@
# SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause
+#
+# pylint: disable=missing-function-docstring, too-many-instance-attributes, too-many-branches
+
+"""
+The nlspec is a python library for parsing and using YNL netlink
+specifications.
+"""
import collections
import importlib
import os
-import yaml
+import yaml as pyyaml
-# To be loaded dynamically as needed
-jsonschema = None
+class SpecException(Exception):
+ """Netlink spec exception.
+ """
class SpecElement:
@@ -93,8 +101,7 @@ class SpecEnumEntry(SpecElement):
def user_value(self, as_flags=None):
if self.enum_set['type'] == 'flags' or as_flags:
return 1 << self.value
- else:
- return self.value
+ return self.value
class SpecEnumSet(SpecElement):
@@ -117,8 +124,8 @@ class SpecEnumSet(SpecElement):
prev_entry = None
value_start = self.yaml.get('value-start', 0)
- self.entries = dict()
- self.entries_by_val = dict()
+ self.entries = {}
+ self.entries_by_val = {}
for entry in self.yaml['entries']:
e = self.new_entry(entry, prev_entry, value_start)
self.entries[e.name] = e
@@ -182,7 +189,7 @@ class SpecAttr(SpecElement):
self.sub_message = yaml.get('sub-message')
self.selector = yaml.get('selector')
- self.is_auto_scalar = self.type == "sint" or self.type == "uint"
+ self.is_auto_scalar = self.type in ("sint", "uint")
class SpecAttrSet(SpecElement):
@@ -288,7 +295,7 @@ class SpecStruct(SpecElement):
yield from self.members
def items(self):
- return self.members.items()
+ return self.members
class SpecSubMessage(SpecElement):
@@ -306,11 +313,11 @@ class SpecSubMessage(SpecElement):
self.formats = collections.OrderedDict()
for elem in self.yaml['formats']:
- format = self.new_format(family, elem)
- self.formats[format.value] = format
+ msg_format = self.new_format(family, elem)
+ self.formats[msg_format.value] = msg_format
- def new_format(self, family, format):
- return SpecSubMessageFormat(family, format)
+ def new_format(self, family, msg_format):
+ return SpecSubMessageFormat(family, msg_format)
class SpecSubMessageFormat(SpecElement):
@@ -378,7 +385,7 @@ class SpecOperation(SpecElement):
elif self.is_resv:
attr_set_name = ''
else:
- raise Exception(f"Can't resolve attribute set for op '{self.name}'")
+ raise SpecException(f"Can't resolve attribute set for op '{self.name}'")
if attr_set_name:
self.attr_set = self.family.attr_sets[attr_set_name]
@@ -428,17 +435,22 @@ class SpecFamily(SpecElement):
mcast_groups dict of all multicast groups (index by name)
kernel_family dict of kernel family attributes
"""
+
+ # To be loaded dynamically as needed
+ jsonschema = None
+
def __init__(self, spec_path, schema_path=None, exclude_ops=None):
- with open(spec_path, "r") as stream:
+ with open(spec_path, "r", encoding='utf-8') as stream:
prefix = '# SPDX-License-Identifier: '
first = stream.readline().strip()
if not first.startswith(prefix):
- raise Exception('SPDX license tag required in the spec')
+ raise SpecException('SPDX license tag required in the spec')
self.license = first[len(prefix):]
stream.seek(0)
- spec = yaml.safe_load(stream)
+ spec = pyyaml.safe_load(stream)
+ self.fixed_header = None
self._resolution_list = []
super().__init__(self, spec)
@@ -451,15 +463,13 @@ class SpecFamily(SpecElement):
if schema_path is None:
schema_path = os.path.dirname(os.path.dirname(spec_path)) + f'/{self.proto}.yaml'
if schema_path:
- global jsonschema
-
- with open(schema_path, "r") as stream:
- schema = yaml.safe_load(stream)
+ with open(schema_path, "r", encoding='utf-8') as stream:
+ schema = pyyaml.safe_load(stream)
- if jsonschema is None:
- jsonschema = importlib.import_module("jsonschema")
+ if SpecFamily.jsonschema is None:
+ SpecFamily.jsonschema = importlib.import_module("jsonschema")
- jsonschema.validate(self.yaml, schema)
+ SpecFamily.jsonschema.validate(self.yaml, schema)
self.attr_sets = collections.OrderedDict()
self.sub_msgs = collections.OrderedDict()
@@ -548,7 +558,7 @@ class SpecFamily(SpecElement):
req_val_next = req_val + 1
rsp_val_next = rsp_val + rsp_inc
else:
- raise Exception("Can't parse directional ops")
+ raise SpecException("Can't parse directional ops")
if req_val == req_val_next:
req_val = None
@@ -560,20 +570,19 @@ class SpecFamily(SpecElement):
skip |= bool(exclude.match(elem['name']))
if not skip:
op = self.new_operation(elem, req_val, rsp_val)
+ self.msgs[op.name] = op
req_val = req_val_next
rsp_val = rsp_val_next
- self.msgs[op.name] = op
-
def find_operation(self, name):
- """
- For a given operation name, find and return operation spec.
- """
- for op in self.yaml['operations']['list']:
- if name == op['name']:
- return op
- return None
+ """
+ For a given operation name, find and return operation spec.
+ """
+ for op in self.yaml['operations']['list']:
+ if name == op['name']:
+ return op
+ return None
def resolve(self):
self.resolve_up(super())
diff --git a/tools/net/ynl/pyynl/lib/ynl.py b/tools/net/ynl/pyynl/lib/ynl.py
index 36d36eb7e3b8..9774005e7ad1 100644
--- a/tools/net/ynl/pyynl/lib/ynl.py
+++ b/tools/net/ynl/pyynl/lib/ynl.py
@@ -1,4 +1,14 @@
# SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause
+#
+# pylint: disable=missing-class-docstring, missing-function-docstring
+# pylint: disable=too-many-branches, too-many-locals, too-many-instance-attributes
+# pylint: disable=too-many-lines
+
+"""
+YAML Netlink Library
+
+An implementation of the genetlink and raw netlink protocols.
+"""
from collections import namedtuple
from enum import Enum
@@ -22,6 +32,11 @@ from .nlspec import SpecFamily
#
+class YnlException(Exception):
+ pass
+
+
+# pylint: disable=too-few-public-methods
class Netlink:
# Netlink socket
SOL_NETLINK = 270
@@ -144,22 +159,22 @@ class NlAttr:
@classmethod
def get_format(cls, attr_type, byte_order=None):
- format = cls.type_formats[attr_type]
+ format_ = cls.type_formats[attr_type]
if byte_order:
- return format.big if byte_order == "big-endian" \
- else format.little
- return format.native
+ return format_.big if byte_order == "big-endian" \
+ else format_.little
+ return format_.native
def as_scalar(self, attr_type, byte_order=None):
- format = self.get_format(attr_type, byte_order)
- return format.unpack(self.raw)[0]
+ format_ = self.get_format(attr_type, byte_order)
+ return format_.unpack(self.raw)[0]
def as_auto_scalar(self, attr_type, byte_order=None):
if len(self.raw) != 4 and len(self.raw) != 8:
- raise Exception(f"Auto-scalar len payload be 4 or 8 bytes, got {len(self.raw)}")
+ raise YnlException(f"Auto-scalar len payload be 4 or 8 bytes, got {len(self.raw)}")
real_type = attr_type[0] + str(len(self.raw) * 8)
- format = self.get_format(real_type, byte_order)
- return format.unpack(self.raw)[0]
+ format_ = self.get_format(real_type, byte_order)
+ return format_.unpack(self.raw)[0]
def as_strz(self):
return self.raw.decode('ascii')[:-1]
@@ -167,9 +182,9 @@ class NlAttr:
def as_bin(self):
return self.raw
- def as_c_array(self, type):
- format = self.get_format(type)
- return [ x[0] for x in format.iter_unpack(self.raw) ]
+ def as_c_array(self, c_type):
+ format_ = self.get_format(c_type)
+ return [ x[0] for x in format_.iter_unpack(self.raw) ]
def __repr__(self):
return f"[type:{self.type} len:{self._len}] {self.raw}"
@@ -220,7 +235,7 @@ class NlMsg:
self.extack = None
if self.nl_flags & Netlink.NLM_F_ACK_TLVS and extack_off:
- self.extack = dict()
+ self.extack = {}
extack_attrs = NlAttrs(self.raw[extack_off:])
for extack in extack_attrs:
if extack.type == Netlink.NLMSGERR_ATTR_MSG:
@@ -245,8 +260,8 @@ class NlMsg:
policy = {}
for attr in NlAttrs(raw):
if attr.type == Netlink.NL_POLICY_TYPE_ATTR_TYPE:
- type = attr.as_scalar('u32')
- policy['type'] = Netlink.AttrType(type).name
+ type_ = attr.as_scalar('u32')
+ policy['type'] = Netlink.AttrType(type_).name
elif attr.type == Netlink.NL_POLICY_TYPE_ATTR_MIN_VALUE_S:
policy['min-value'] = attr.as_scalar('s64')
elif attr.type == Netlink.NL_POLICY_TYPE_ATTR_MAX_VALUE_S:
@@ -281,7 +296,8 @@ class NlMsg:
return self.nl_type
def __repr__(self):
- msg = f"nl_len = {self.nl_len} ({len(self.raw)}) nl_flags = 0x{self.nl_flags:x} nl_type = {self.nl_type}"
+ msg = (f"nl_len = {self.nl_len} ({len(self.raw)}) "
+ f"nl_flags = 0x{self.nl_flags:x} nl_type = {self.nl_type}")
if self.error:
msg += '\n\terror: ' + str(self.error)
if self.extack:
@@ -289,6 +305,7 @@ class NlMsg:
return msg
+# pylint: disable=too-few-public-methods
class NlMsgs:
def __init__(self, data):
self.msgs = []
@@ -303,9 +320,6 @@ class NlMsgs:
yield from self.msgs
-genl_family_name_to_id = None
-
-
def _genl_msg(nl_type, nl_flags, genl_cmd, genl_version, seq=None):
# we prepend length in _genl_msg_finalize()
if seq is None:
@@ -319,7 +333,10 @@ def _genl_msg_finalize(msg):
return struct.pack("I", len(msg) + 4) + msg
+# pylint: disable=too-many-nested-blocks
def _genl_load_families():
+ genl_family_name_to_id = {}
+
with socket.socket(socket.AF_NETLINK, socket.SOCK_RAW, Netlink.NETLINK_GENERIC) as sock:
sock.setsockopt(Netlink.SOL_NETLINK, Netlink.NETLINK_CAP_ACK, 1)
@@ -330,21 +347,17 @@ def _genl_load_families():
sock.send(msg, 0)
- global genl_family_name_to_id
- genl_family_name_to_id = dict()
-
while True:
reply = sock.recv(128 * 1024)
nms = NlMsgs(reply)
for nl_msg in nms:
if nl_msg.error:
- print("Netlink error:", nl_msg.error)
- return
+ raise YnlException(f"Netlink error: {nl_msg.error}")
if nl_msg.done:
- return
+ return genl_family_name_to_id
gm = GenlMsg(nl_msg)
- fam = dict()
+ fam = {}
for attr in NlAttrs(gm.raw):
if attr.type == Netlink.CTRL_ATTR_FAMILY_ID:
fam['id'] = attr.as_scalar('u16')
@@ -353,7 +366,7 @@ def _genl_load_families():
elif attr.type == Netlink.CTRL_ATTR_MAXATTR:
fam['maxattr'] = attr.as_scalar('u32')
elif attr.type == Netlink.CTRL_ATTR_MCAST_GROUPS:
- fam['mcast'] = dict()
+ fam['mcast'] = {}
for entry in NlAttrs(attr.raw):
mcast_name = None
mcast_id = None
@@ -373,6 +386,7 @@ class GenlMsg:
self.nl = nl_msg
self.genl_cmd, self.genl_version, _ = struct.unpack_from("BBH", nl_msg.raw, 0)
self.raw = nl_msg.raw[4:]
+ self.raw_attrs = []
def cmd(self):
return self.genl_cmd
@@ -396,7 +410,7 @@ class NetlinkProtocol:
nlmsg = struct.pack("HHII", nl_type, nl_flags, seq, 0)
return nlmsg
- def message(self, flags, command, version, seq=None):
+ def message(self, flags, command, _version, seq=None):
return self._message(command, flags, seq)
def _decode(self, nl_msg):
@@ -406,13 +420,13 @@ class NetlinkProtocol:
msg = self._decode(nl_msg)
if op is None:
op = ynl.rsp_by_value[msg.cmd()]
- fixed_header_size = ynl._struct_size(op.fixed_header)
+ fixed_header_size = ynl.struct_size(op.fixed_header)
msg.raw_attrs = NlAttrs(msg.raw, fixed_header_size)
return msg
def get_mcast_id(self, mcast_name, mcast_groups):
if mcast_name not in mcast_groups:
- raise Exception(f'Multicast group "{mcast_name}" not present in the spec')
+ raise YnlException(f'Multicast group "{mcast_name}" not present in the spec')
return mcast_groups[mcast_name].value
def msghdr_size(self):
@@ -420,15 +434,16 @@ class NetlinkProtocol:
class GenlProtocol(NetlinkProtocol):
+ genl_family_name_to_id = {}
+
def __init__(self, family_name):
super().__init__(family_name, Netlink.NETLINK_GENERIC)
- global genl_family_name_to_id
- if genl_family_name_to_id is None:
- _genl_load_families()
+ if not GenlProtocol.genl_family_name_to_id:
+ GenlProtocol.genl_family_name_to_id = _genl_load_families()
- self.genl_family = genl_family_name_to_id[family_name]
- self.family_id = genl_family_name_to_id[family_name]['id']
+ self.genl_family = GenlProtocol.genl_family_name_to_id[family_name]
+ self.family_id = GenlProtocol.genl_family_name_to_id[family_name]['id']
def message(self, flags, command, version, seq=None):
nlmsg = self._message(self.family_id, flags, seq)
@@ -440,13 +455,14 @@ class GenlProtocol(NetlinkProtocol):
def get_mcast_id(self, mcast_name, mcast_groups):
if mcast_name not in self.genl_family['mcast']:
- raise Exception(f'Multicast group "{mcast_name}" not present in the family')
+ raise YnlException(f'Multicast group "{mcast_name}" not present in the family')
return self.genl_family['mcast'][mcast_name]
def msghdr_size(self):
return super().msghdr_size() + 4
+# pylint: disable=too-few-public-methods
class SpaceAttrs:
SpecValuesPair = namedtuple('SpecValuesPair', ['spec', 'values'])
@@ -461,9 +477,9 @@ class SpaceAttrs:
if name in scope.values:
return scope.values[name]
spec_name = scope.spec.yaml['name']
- raise Exception(
+ raise YnlException(
f"No value for '{name}' in attribute space '{spec_name}'")
- raise Exception(f"Attribute '{name}' not defined in any attribute-set")
+ raise YnlException(f"Attribute '{name}' not defined in any attribute-set")
#
@@ -485,8 +501,8 @@ class YnlFamily(SpecFamily):
self.yaml['protonum'])
else:
self.nlproto = GenlProtocol(self.yaml['name'])
- except KeyError:
- raise Exception(f"Family '{self.yaml['name']}' not supported by the kernel")
+ except KeyError as err:
+ raise YnlException(f"Family '{self.yaml['name']}' not supported by the kernel") from err
self._recv_dbg = False
# Note that netlink will use conservative (min) message size for
@@ -542,8 +558,7 @@ class YnlFamily(SpecFamily):
for single_value in value:
scalar += enum.entries[single_value].user_value(as_flags = True)
return scalar
- else:
- return enum.entries[value].user_value()
+ return enum.entries[value].user_value()
def _get_scalar(self, attr_spec, value):
try:
@@ -555,11 +570,12 @@ class YnlFamily(SpecFamily):
return self._from_string(value, attr_spec)
raise e
+ # pylint: disable=too-many-statements
def _add_attr(self, space, name, value, search_attrs):
try:
attr = self.attr_sets[space][name]
- except KeyError:
- raise Exception(f"Space '{space}' has no attribute '{name}'")
+ except KeyError as err:
+ raise YnlException(f"Space '{space}' has no attribute '{name}'") from err
nl_type = attr.value
if attr.is_multi and isinstance(value, list):
@@ -597,18 +613,18 @@ class YnlFamily(SpecFamily):
elif isinstance(value, dict) and attr.struct_name:
attr_payload = self._encode_struct(attr.struct_name, value)
elif isinstance(value, list) and attr.sub_type in NlAttr.type_formats:
- format = NlAttr.get_format(attr.sub_type)
- attr_payload = b''.join([format.pack(x) for x in value])
+ format_ = NlAttr.get_format(attr.sub_type)
+ attr_payload = b''.join([format_.pack(x) for x in value])
else:
- raise Exception(f'Unknown type for binary attribute, value: {value}')
+ raise YnlException(f'Unknown type for binary attribute, value: {value}')
elif attr['type'] in NlAttr.type_formats or attr.is_auto_scalar:
scalar = self._get_scalar(attr, value)
if attr.is_auto_scalar:
attr_type = attr["type"][0] + ('32' if scalar.bit_length() <= 32 else '64')
else:
attr_type = attr["type"]
- format = NlAttr.get_format(attr_type, attr.byte_order)
- attr_payload = format.pack(scalar)
+ format_ = NlAttr.get_format(attr_type, attr.byte_order)
+ attr_payload = format_.pack(scalar)
elif attr['type'] in "bitfield32":
scalar_value = self._get_scalar(attr, value["value"])
scalar_selector = self._get_scalar(attr, value["selector"])
@@ -626,9 +642,9 @@ class YnlFamily(SpecFamily):
attr_payload += self._add_attr(msg_format.attr_set,
subname, subvalue, sub_attrs)
else:
- raise Exception(f"Unknown attribute-set '{msg_format.attr_set}'")
+ raise YnlException(f"Unknown attribute-set '{msg_format.attr_set}'")
else:
- raise Exception(f'Unknown type at {space} {name} {value} {attr["type"]}')
+ raise YnlException(f'Unknown type at {space} {name} {value} {attr["type"]}')
return self._add_attr_raw(nl_type, attr_payload)
@@ -715,7 +731,7 @@ class YnlFamily(SpecFamily):
subattr = self._formatted_string(subattr, attr_spec.display_hint)
decoded.append(subattr)
else:
- raise Exception(f'Unknown {attr_spec["sub-type"]} with name {attr_spec["name"]}')
+ raise YnlException(f'Unknown {attr_spec["sub-type"]} with name {attr_spec["name"]}')
return decoded
def _decode_nest_type_value(self, attr, attr_spec):
@@ -731,12 +747,11 @@ class YnlFamily(SpecFamily):
def _decode_unknown(self, attr):
if attr.is_nest:
return self._decode(NlAttrs(attr.raw), None)
- else:
- return attr.as_bin()
+ return attr.as_bin()
def _rsp_add(self, rsp, name, is_multi, decoded):
if is_multi is None:
- if name in rsp and type(rsp[name]) is not list:
+ if name in rsp and not isinstance(rsp[name], list):
rsp[name] = [rsp[name]]
is_multi = True
else:
@@ -752,13 +767,13 @@ class YnlFamily(SpecFamily):
def _resolve_selector(self, attr_spec, search_attrs):
sub_msg = attr_spec.sub_message
if sub_msg not in self.sub_msgs:
- raise Exception(f"No sub-message spec named {sub_msg} for {attr_spec.name}")
+ raise YnlException(f"No sub-message spec named {sub_msg} for {attr_spec.name}")
sub_msg_spec = self.sub_msgs[sub_msg]
selector = attr_spec.selector
value = search_attrs.lookup(selector)
if value not in sub_msg_spec.formats:
- raise Exception(f"No message format for '{value}' in sub-message spec '{sub_msg}'")
+ raise YnlException(f"No message format for '{value}' in sub-message spec '{sub_msg}'")
spec = sub_msg_spec.formats[value]
return spec, value
@@ -769,17 +784,20 @@ class YnlFamily(SpecFamily):
offset = 0
if msg_format.fixed_header:
decoded.update(self._decode_struct(attr.raw, msg_format.fixed_header))
- offset = self._struct_size(msg_format.fixed_header)
+ offset = self.struct_size(msg_format.fixed_header)
if msg_format.attr_set:
if msg_format.attr_set in self.attr_sets:
subdict = self._decode(NlAttrs(attr.raw, offset), msg_format.attr_set)
decoded.update(subdict)
else:
- raise Exception(f"Unknown attribute-set '{msg_format.attr_set}' when decoding '{attr_spec.name}'")
+ raise YnlException(f"Unknown attribute-set '{msg_format.attr_set}' "
+ f"when decoding '{attr_spec.name}'")
return decoded
+ # pylint: disable=too-many-statements
def _decode(self, attrs, space, outer_attrs = None):
- rsp = dict()
+ rsp = {}
+ search_attrs = {}
if space:
attr_space = self.attr_sets[space]
search_attrs = SpaceAttrs(attr_space, rsp, outer_attrs)
@@ -787,16 +805,19 @@ class YnlFamily(SpecFamily):
for attr in attrs:
try:
attr_spec = attr_space.attrs_by_val[attr.type]
- except (KeyError, UnboundLocalError):
+ except (KeyError, UnboundLocalError) as err:
if not self.process_unknown:
- raise Exception(f"Space '{space}' has no attribute with value '{attr.type}'")
+ raise YnlException(f"Space '{space}' has no attribute "
+ f"with value '{attr.type}'") from err
attr_name = f"UnknownAttr({attr.type})"
self._rsp_add(rsp, attr_name, None, self._decode_unknown(attr))
continue
try:
if attr_spec["type"] == 'nest':
- subdict = self._decode(NlAttrs(attr.raw), attr_spec['nested-attributes'], search_attrs)
+ subdict = self._decode(NlAttrs(attr.raw),
+ attr_spec['nested-attributes'],
+ search_attrs)
decoded = subdict
elif attr_spec["type"] == 'string':
decoded = attr.as_strz()
@@ -828,7 +849,8 @@ class YnlFamily(SpecFamily):
decoded = self._decode_nest_type_value(attr, attr_spec)
else:
if not self.process_unknown:
- raise Exception(f'Unknown {attr_spec["type"]} with name {attr_spec["name"]}')
+ raise YnlException(f'Unknown {attr_spec["type"]} '
+ f'with name {attr_spec["name"]}')
decoded = self._decode_unknown(attr)
self._rsp_add(rsp, attr_spec["name"], attr_spec.is_multi, decoded)
@@ -838,12 +860,14 @@ class YnlFamily(SpecFamily):
return rsp
+ # pylint: disable=too-many-arguments, too-many-positional-arguments
def _decode_extack_path(self, attrs, attr_set, offset, target, search_attrs):
for attr in attrs:
try:
attr_spec = attr_set.attrs_by_val[attr.type]
- except KeyError:
- raise Exception(f"Space '{attr_set.name}' has no attribute with value '{attr.type}'")
+ except KeyError as err:
+ raise YnlException(
+ f"Space '{attr_set.name}' has no attribute with value '{attr.type}'") from err
if offset > target:
break
if offset == target:
@@ -860,11 +884,12 @@ class YnlFamily(SpecFamily):
elif attr_spec['type'] == 'sub-message':
msg_format, value = self._resolve_selector(attr_spec, search_attrs)
if msg_format is None:
- raise Exception(f"Can't resolve sub-message of {attr_spec['name']} for extack")
+ raise YnlException(f"Can't resolve sub-message of "
+ f"{attr_spec['name']} for extack")
sub_attrs = self.attr_sets[msg_format.attr_set]
pathname += f"({value})"
else:
- raise Exception(f"Can't dive into {attr.type} ({attr_spec['name']}) for extack")
+ raise YnlException(f"Can't dive into {attr.type} ({attr_spec['name']}) for extack")
offset += 4
subpath = self._decode_extack_path(NlAttrs(attr.raw), sub_attrs,
offset, target, search_attrs)
@@ -879,7 +904,7 @@ class YnlFamily(SpecFamily):
return
msg = self.nlproto.decode(self, NlMsg(request, 0, op.attr_set), op)
- offset = self.nlproto.msghdr_size() + self._struct_size(op.fixed_header)
+ offset = self.nlproto.msghdr_size() + self.struct_size(op.fixed_header)
search_attrs = SpaceAttrs(op.attr_set, vals)
path = self._decode_extack_path(msg.raw_attrs, op.attr_set, offset,
extack['bad-attr-offs'], search_attrs)
@@ -887,26 +912,25 @@ class YnlFamily(SpecFamily):
del extack['bad-attr-offs']
extack['bad-attr'] = path
- def _struct_size(self, name):
+ def struct_size(self, name):
if name:
members = self.consts[name].members
size = 0
for m in members:
if m.type in ['pad', 'binary']:
if m.struct:
- size += self._struct_size(m.struct)
+ size += self.struct_size(m.struct)
else:
size += m.len
else:
- format = NlAttr.get_format(m.type, m.byte_order)
- size += format.size
+ format_ = NlAttr.get_format(m.type, m.byte_order)
+ size += format_.size
return size
- else:
- return 0
+ return 0
def _decode_struct(self, data, name):
members = self.consts[name].members
- attrs = dict()
+ attrs = {}
offset = 0
for m in members:
value = None
@@ -914,17 +938,17 @@ class YnlFamily(SpecFamily):
offset += m.len
elif m.type == 'binary':
if m.struct:
- len = self._struct_size(m.struct)
- value = self._decode_struct(data[offset : offset + len],
+ len_ = self.struct_size(m.struct)
+ value = self._decode_struct(data[offset : offset + len_],
m.struct)
- offset += len
+ offset += len_
else:
value = data[offset : offset + m.len]
offset += m.len
else:
- format = NlAttr.get_format(m.type, m.byte_order)
- [ value ] = format.unpack_from(data, offset)
- offset += format.size
+ format_ = NlAttr.get_format(m.type, m.byte_order)
+ [ value ] = format_.unpack_from(data, offset)
+ offset += format_.size
if value is not None:
if m.enum:
value = self._decode_enum(value, m)
@@ -943,7 +967,7 @@ class YnlFamily(SpecFamily):
elif m.type == 'binary':
if m.struct:
if value is None:
- value = dict()
+ value = {}
attr_payload += self._encode_struct(m.struct, value)
else:
if value is None:
@@ -953,13 +977,13 @@ class YnlFamily(SpecFamily):
else:
if value is None:
value = 0
- format = NlAttr.get_format(m.type, m.byte_order)
- attr_payload += format.pack(value)
+ format_ = NlAttr.get_format(m.type, m.byte_order)
+ attr_payload += format_.pack(value)
return attr_payload
def _formatted_string(self, raw, display_hint):
if display_hint == 'mac':
- formatted = ':'.join('%02x' % b for b in raw)
+ formatted = ':'.join(f'{b:02x}' for b in raw)
elif display_hint == 'hex':
if isinstance(raw, int):
formatted = hex(raw)
@@ -991,16 +1015,16 @@ class YnlFamily(SpecFamily):
mac_bytes = [int(x, 16) for x in string.split(':')]
else:
if len(string) % 2 != 0:
- raise Exception(f"Invalid MAC address format: {string}")
+ raise YnlException(f"Invalid MAC address format: {string}")
mac_bytes = [int(string[i:i+2], 16) for i in range(0, len(string), 2)]
raw = bytes(mac_bytes)
else:
- raise Exception(f"Display hint '{attr_spec.display_hint}' not implemented"
+ raise YnlException(f"Display hint '{attr_spec.display_hint}' not implemented"
f" when parsing '{attr_spec['name']}'")
return raw
def handle_ntf(self, decoded):
- msg = dict()
+ msg = {}
if self.include_raw:
msg['raw'] = decoded
op = self.rsp_by_value[decoded.cmd()]
@@ -1081,6 +1105,7 @@ class YnlFamily(SpecFamily):
msg = _genl_msg_finalize(msg)
return msg
+ # pylint: disable=too-many-statements
def _ops(self, ops):
reqs_by_seq = {}
req_seq = random.randint(1024, 65535)
@@ -1139,9 +1164,8 @@ class YnlFamily(SpecFamily):
if decoded.cmd() in self.async_msg_ids:
self.handle_ntf(decoded)
continue
- else:
- print('Unexpected message: ' + repr(decoded))
- continue
+ print('Unexpected message: ' + repr(decoded))
+ continue
rsp_msg = self._decode(decoded.raw_attrs, op.attr_set.name)
if op.fixed_header:
diff --git a/tools/net/ynl/pyynl/ynl_gen_c.py b/tools/net/ynl/pyynl/ynl_gen_c.py
index b517d0c605ad..0e1e486c1185 100755
--- a/tools/net/ynl/pyynl/ynl_gen_c.py
+++ b/tools/net/ynl/pyynl/ynl_gen_c.py
@@ -1,5 +1,17 @@
#!/usr/bin/env python3
# SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR BSD-3-Clause)
+#
+# pylint: disable=line-too-long, missing-class-docstring, missing-function-docstring
+# pylint: disable=too-many-positional-arguments, too-many-arguments, too-many-statements
+# pylint: disable=too-many-branches, too-many-locals, too-many-instance-attributes
+# pylint: disable=too-many-nested-blocks, too-many-lines, too-few-public-methods
+# pylint: disable=broad-exception-raised, broad-exception-caught, protected-access
+
+"""
+ynl_gen_c
+
+A YNL to C code generator for both kernel and userspace protocol stubs.
+"""
import argparse
import filecmp
@@ -9,8 +21,9 @@ import re
import shutil
import sys
import tempfile
-import yaml
+import yaml as pyyaml
+# pylint: disable=no-name-in-module,wrong-import-position
sys.path.append(pathlib.Path(__file__).resolve().parent.as_posix())
from lib import SpecFamily, SpecAttrSet, SpecAttr, SpecOperation, SpecEnumSet, SpecEnumEntry
from lib import SpecSubMessage
@@ -157,7 +170,7 @@ class Type(SpecAttr):
def presence_member(self, space, type_filter):
if self.presence_type() != type_filter:
- return
+ return ''
if self.presence_type() == 'present':
pfx = '__' if space == 'user' else ''
@@ -166,14 +179,15 @@ class Type(SpecAttr):
if self.presence_type() in {'len', 'count'}:
pfx = '__' if space == 'user' else ''
return f"{pfx}u32 {self.c_name};"
+ return ''
- def _complex_member_type(self, ri):
+ def _complex_member_type(self, _ri):
return None
def free_needs_iter(self):
return False
- def _free_lines(self, ri, var, ref):
+ def _free_lines(self, _ri, var, ref):
if self.is_multi_val() or self.presence_type() in {'count', 'len'}:
return [f'free({var}->{ref}{self.c_name});']
return []
@@ -183,9 +197,10 @@ class Type(SpecAttr):
for line in lines:
ri.cw.p(line)
+ # pylint: disable=assignment-from-none
def arg_member(self, ri):
member = self._complex_member_type(ri)
- if member:
+ if member is not None:
spc = ' ' if member[-1] != '*' else ''
arg = [member + spc + '*' + self.c_name]
if self.presence_type() == 'count':
@@ -195,7 +210,7 @@ class Type(SpecAttr):
def struct_member(self, ri):
member = self._complex_member_type(ri)
- if member:
+ if member is not None:
ptr = '*' if self.is_multi_val() else ''
if self.is_recursive_for_op(ri):
ptr = '*'
@@ -243,9 +258,9 @@ class Type(SpecAttr):
def attr_get(self, ri, var, first):
lines, init_lines, _ = self._attr_get(ri, var)
- if type(lines) is str:
+ if isinstance(lines, str):
lines = [lines]
- if type(init_lines) is str:
+ if isinstance(init_lines, str):
init_lines = [init_lines]
kw = 'if' if first else 'else if'
@@ -270,7 +285,7 @@ class Type(SpecAttr):
def _setter_lines(self, ri, member, presence):
raise Exception(f"Setter not implemented for class type {self.type}")
- def setter(self, ri, space, direction, deref=False, ref=None, var="req"):
+ def setter(self, ri, _space, direction, deref=False, ref=None, var="req"):
ref = (ref if ref else []) + [self.c_name]
member = f"{var}->{'.'.join(ref)}"
@@ -280,6 +295,7 @@ class Type(SpecAttr):
code = []
presence = ''
+ # pylint: disable=consider-using-enumerate
for i in range(0, len(ref)):
presence = f"{var}->{'.'.join(ref[:i] + [''])}_present.{ref[i]}"
# Every layer below last is a nest, so we know it uses bit presence
@@ -414,6 +430,7 @@ class TypeScalar(Type):
if low < -32768 or high > 32767:
self.checks['full-range'] = True
+ # pylint: disable=too-many-return-statements
def _attr_policy(self, policy):
if 'flags-mask' in self.checks or self.is_bitfield:
if self.is_bitfield:
@@ -424,15 +441,15 @@ class TypeScalar(Type):
flag_cnt = len(flags['entries'])
mask = (1 << flag_cnt) - 1
return f"NLA_POLICY_MASK({policy}, 0x{mask:x})"
- elif 'full-range' in self.checks:
+ if 'full-range' in self.checks:
return f"NLA_POLICY_FULL_RANGE({policy}, &{c_lower(self.enum_name)}_range)"
- elif 'range' in self.checks:
+ if 'range' in self.checks:
return f"NLA_POLICY_RANGE({policy}, {self.get_limit_str('min')}, {self.get_limit_str('max')})"
- elif 'min' in self.checks:
+ if 'min' in self.checks:
return f"NLA_POLICY_MIN({policy}, {self.get_limit_str('min')})"
- elif 'max' in self.checks:
+ if 'max' in self.checks:
return f"NLA_POLICY_MAX({policy}, {self.get_limit_str('max')})"
- elif 'sparse' in self.checks:
+ if 'sparse' in self.checks:
return f"NLA_POLICY_VALIDATE_FN({policy}, &{c_lower(self.enum_name)}_validate)"
return super()._attr_policy(policy)
@@ -554,6 +571,8 @@ class TypeBinary(Type):
mem = 'NLA_POLICY_MIN_LEN(' + self.get_limit_str('min-len') + ')'
elif 'max-len' in self.checks:
mem = 'NLA_POLICY_MAX_LEN(' + self.get_limit_str('max-len') + ')'
+ else:
+ raise Exception('Failed to process policy check for binary type')
return mem
@@ -627,7 +646,7 @@ class TypeBinaryScalarArray(TypeBinary):
class TypeBitfield32(Type):
- def _complex_member_type(self, ri):
+ def _complex_member_type(self, _ri):
return "struct nla_bitfield32"
def _attr_typol(self):
@@ -655,7 +674,7 @@ class TypeNest(Type):
def is_recursive(self):
return self.family.pure_nested_structs[self.nested_attrs].recursive
- def _complex_member_type(self, ri):
+ def _complex_member_type(self, _ri):
return self.nested_struct_type
def _free_lines(self, ri, var, ref):
@@ -689,7 +708,7 @@ class TypeNest(Type):
f"parg.data = &{var}->{self.c_name};"]
return get_lines, init_lines, None
- def setter(self, ri, space, direction, deref=False, ref=None, var="req"):
+ def setter(self, ri, _space, direction, deref=False, ref=None, var="req"):
ref = (ref if ref else []) + [self.c_name]
for _, attr in ri.family.pure_nested_structs[self.nested_attrs].member_list():
@@ -714,19 +733,18 @@ class TypeMultiAttr(Type):
def _complex_member_type(self, ri):
if 'type' not in self.attr or self.attr['type'] == 'nest':
return self.nested_struct_type
- elif self.attr['type'] == 'binary' and 'struct' in self.attr:
+ if self.attr['type'] == 'binary' and 'struct' in self.attr:
return None # use arg_member()
- elif self.attr['type'] == 'string':
+ if self.attr['type'] == 'string':
return 'struct ynl_string *'
- elif self.attr['type'] in scalars:
+ if self.attr['type'] in scalars:
scalar_pfx = '__' if ri.ku_space == 'user' else ''
if self.is_auto_scalar:
name = self.type[0] + '64'
else:
name = self.attr['type']
return scalar_pfx + name
- else:
- raise Exception(f"Sub-type {self.attr['type']} not supported yet")
+ raise Exception(f"Sub-type {self.attr['type']} not supported yet")
def arg_member(self, ri):
if self.type == 'binary' and 'struct' in self.attr:
@@ -737,7 +755,7 @@ class TypeMultiAttr(Type):
def free_needs_iter(self):
return self.attr['type'] in {'nest', 'string'}
- def _free_lines(self, ri, var, ref):
+ def _free_lines(self, _ri, var, ref):
lines = []
if self.attr['type'] in scalars:
lines += [f"free({var}->{ref}{self.c_name});"]
@@ -801,13 +819,12 @@ class TypeIndexedArray(Type):
def _complex_member_type(self, ri):
if 'sub-type' not in self.attr or self.attr['sub-type'] == 'nest':
return self.nested_struct_type
- elif self.attr['sub-type'] in scalars:
+ if self.attr['sub-type'] in scalars:
scalar_pfx = '__' if ri.ku_space == 'user' else ''
return scalar_pfx + self.attr['sub-type']
- elif self.attr['sub-type'] == 'binary' and 'exact-len' in self.checks:
+ if self.attr['sub-type'] == 'binary' and 'exact-len' in self.checks:
return None # use arg_member()
- else:
- raise Exception(f"Sub-type {self.attr['sub-type']} not supported yet")
+ raise Exception(f"Sub-type {self.attr['sub-type']} not supported yet")
def arg_member(self, ri):
if self.sub_type == 'binary' and 'exact-len' in self.checks:
@@ -823,12 +840,11 @@ class TypeIndexedArray(Type):
def _attr_typol(self):
if self.attr['sub-type'] in scalars:
return f'.type = YNL_PT_U{c_upper(self.sub_type[1:])}, '
- elif self.attr['sub-type'] == 'binary' and 'exact-len' in self.checks:
+ if self.attr['sub-type'] == 'binary' and 'exact-len' in self.checks:
return f'.type = YNL_PT_BINARY, .len = {self.checks["exact-len"]}, '
- elif self.attr['sub-type'] == 'nest':
+ if self.attr['sub-type'] == 'nest':
return f'.type = YNL_PT_NEST, .nest = &{self.nested_render_name}_nest, '
- else:
- raise Exception(f"Typol for IndexedArray sub-type {self.attr['sub-type']} not supported, yet")
+ raise Exception(f"Typol for IndexedArray sub-type {self.attr['sub-type']} not supported, yet")
def _attr_get(self, ri, var):
local_vars = ['const struct nlattr *attr2;']
@@ -864,18 +880,18 @@ class TypeIndexedArray(Type):
def free_needs_iter(self):
return self.sub_type == 'nest'
- def _free_lines(self, ri, var, ref):
+ def _free_lines(self, _ri, var, ref):
lines = []
if self.sub_type == 'nest':
lines += [
f"for (i = 0; i < {var}->{ref}_count.{self.c_name}; i++)",
f'{self.nested_render_name}_free(&{var}->{ref}{self.c_name}[i]);',
]
- lines += f"free({var}->{ref}{self.c_name});",
+ lines += (f"free({var}->{ref}{self.c_name});",)
return lines
class TypeNestTypeValue(Type):
- def _complex_member_type(self, ri):
+ def _complex_member_type(self, _ri):
return self.nested_struct_type
def _attr_typol(self):
@@ -921,15 +937,15 @@ class TypeSubMessage(TypeNest):
return typol
def _attr_get(self, ri, var):
- sel = c_lower(self['selector'])
+ selector = self['selector']
+ sel = c_lower(selector)
if self.selector.is_external():
sel_var = f"_sel_{sel}"
else:
sel_var = f"{var}->{sel}"
get_lines = [f'if (!{sel_var})',
- 'return ynl_submsg_failed(yarg, "%s", "%s");' %
- (self.name, self['selector']),
- f"if ({self.nested_render_name}_parse(&parg, {sel_var}, attr))",
+ f'return ynl_submsg_failed(yarg, "{self.name}", "{selector}");',
+ f"if ({self.nested_render_name}_parse(&parg, {sel_var}, attr))",
"return YNL_PARSE_CB_ERROR;"]
init_lines = [f"parg.rsp_policy = &{self.nested_render_name}_nest;",
f"parg.data = &{var}->{self.c_name};"]
@@ -988,7 +1004,7 @@ class Struct:
self.in_multi_val = False # used by a MultiAttr or and legacy arrays
self.attr_list = []
- self.attrs = dict()
+ self.attrs = {}
if type_list is not None:
for t in type_list:
self.attr_list.append((t, self.attr_set[t]),)
@@ -1020,7 +1036,7 @@ class Struct:
def external_selectors(self):
sels = []
- for name, attr in self.attr_list:
+ for _name, attr in self.attr_list:
if isinstance(attr, TypeSubMessage) and attr.selector.is_external():
sels.append(attr.selector)
return sels
@@ -1037,9 +1053,9 @@ class EnumEntry(SpecEnumEntry):
super().__init__(enum_set, yaml, prev, value_start)
if prev:
- self.value_change = (self.value != prev.value + 1)
+ self.value_change = self.value != prev.value + 1
else:
- self.value_change = (self.value != 0)
+ self.value_change = self.value != 0
self.value_change = self.value_change or self.enum_set['type'] == 'flags'
# Added by resolve:
@@ -1080,8 +1096,8 @@ class EnumSet(SpecEnumSet):
return EnumEntry(self, entry, prev_entry, value_start)
def value_range(self):
- low = min([x.value for x in self.entries.values()])
- high = max([x.value for x in self.entries.values()])
+ low = min(x.value for x in self.entries.values())
+ high = max(x.value for x in self.entries.values())
if high - low + 1 != len(self.entries):
return None, None
@@ -1220,6 +1236,12 @@ class Family(SpecFamily):
self.hooks = None
delattr(self, "hooks")
+ self.root_sets = {}
+ self.pure_nested_structs = {}
+ self.kernel_policy = None
+ self.global_policy = None
+ self.global_policy_set = None
+
super().__init__(file_name, exclude_ops=exclude_ops)
self.fam_key = c_upper(self.yaml.get('c-family-name', self.yaml["name"] + '_FAMILY_NAME'))
@@ -1254,18 +1276,18 @@ class Family(SpecFamily):
self.mcgrps = self.yaml.get('mcast-groups', {'list': []})
- self.hooks = dict()
+ self.hooks = {}
for when in ['pre', 'post']:
- self.hooks[when] = dict()
+ self.hooks[when] = {}
for op_mode in ['do', 'dump']:
- self.hooks[when][op_mode] = dict()
+ self.hooks[when][op_mode] = {}
self.hooks[when][op_mode]['set'] = set()
self.hooks[when][op_mode]['list'] = []
# dict space-name -> 'request': set(attrs), 'reply': set(attrs)
- self.root_sets = dict()
+ self.root_sets = {}
# dict space-name -> Struct
- self.pure_nested_structs = dict()
+ self.pure_nested_structs = {}
self._mark_notify()
self._mock_up_events()
@@ -1311,7 +1333,7 @@ class Family(SpecFamily):
}
def _load_root_sets(self):
- for op_name, op in self.msgs.items():
+ for _op_name, op in self.msgs.items():
if 'attribute-set' not in op:
continue
@@ -1427,7 +1449,7 @@ class Family(SpecFamily):
attr_set_queue = list(self.root_sets.keys())
attr_set_seen = set(self.root_sets.keys())
- while len(attr_set_queue):
+ while attr_set_queue:
a_set = attr_set_queue.pop(0)
for attr, spec in self.attr_sets[a_set].items():
if 'nested-attributes' in spec:
@@ -1510,7 +1532,7 @@ class Family(SpecFamily):
for k, _ in self.root_sets.items():
yield k, None # we don't have a struct, but it must be terminal
- for attr_set, struct in all_structs():
+ for attr_set, _struct in all_structs():
for _, spec in self.attr_sets[attr_set].items():
if 'nested-attributes' in spec:
child_name = spec['nested-attributes']
@@ -1530,7 +1552,7 @@ class Family(SpecFamily):
def _load_global_policy(self):
global_set = set()
attr_set_name = None
- for op_name, op in self.ops.items():
+ for _op_name, op in self.ops.items():
if not op:
continue
if 'attribute-set' not in op:
@@ -1613,7 +1635,7 @@ class RenderInfo:
self.cw = cw
- self.struct = dict()
+ self.struct = {}
if op_mode == 'notify':
op_mode = 'do' if 'do' in op else 'dump'
for op_dir in ['request', 'reply']:
@@ -1650,6 +1672,7 @@ class CodeWriter:
if out_file is None:
self._out = os.sys.stdout
else:
+ # pylint: disable=consider-using-with
self._out = tempfile.NamedTemporaryFile('w+')
self._out_file = out_file
@@ -1664,7 +1687,7 @@ class CodeWriter:
if not self._overwrite and os.path.isfile(self._out_file):
if filecmp.cmp(self._out.name, self._out_file, shallow=False):
return
- with open(self._out_file, 'w+') as out_file:
+ with open(self._out_file, 'w+', encoding='utf-8') as out_file:
self._out.seek(0)
shutil.copyfileobj(self._out, out_file)
self._out.close()
@@ -1779,7 +1802,7 @@ class CodeWriter:
if not local_vars:
return
- if type(local_vars) is str:
+ if isinstance(local_vars, str):
local_vars = [local_vars]
local_vars.sort(key=len, reverse=True)
@@ -1799,20 +1822,19 @@ class CodeWriter:
def writes_defines(self, defines):
longest = 0
for define in defines:
- if len(define[0]) > longest:
- longest = len(define[0])
+ longest = max(len(define[0]), longest)
longest = ((longest + 8) // 8) * 8
for define in defines:
line = '#define ' + define[0]
line += '\t' * ((longest - len(define[0]) + 7) // 8)
- if type(define[1]) is int:
+ if isinstance(define[1], int):
line += str(define[1])
- elif type(define[1]) is str:
+ elif isinstance(define[1], str):
line += '"' + define[1] + '"'
self.p(line)
def write_struct_init(self, members):
- longest = max([len(x[0]) for x in members])
+ longest = max(len(x[0]) for x in members)
longest += 1 # because we prepend a .
longest = ((longest + 8) // 8) * 8
for one in members:
@@ -2038,12 +2060,12 @@ def put_op_name(family, cw):
_put_enum_to_str_helper(cw, family.c_name + '_op', map_name, 'op')
-def put_enum_to_str_fwd(family, cw, enum):
+def put_enum_to_str_fwd(_family, cw, enum):
args = [enum.user_type + ' value']
cw.write_func_prot('const char *', f'{enum.render_name}_str', args, suffix=';')
-def put_enum_to_str(family, cw, enum):
+def put_enum_to_str(_family, cw, enum):
map_name = f'{enum.render_name}_strmap'
cw.block_start(line=f"static const char * const {map_name}[] =")
for entry in enum.entries.values():
@@ -2324,7 +2346,8 @@ def parse_rsp_nested_prototype(ri, struct, suffix=';'):
def parse_rsp_nested(ri, struct):
if struct.submsg:
- return parse_rsp_submsg(ri, struct)
+ parse_rsp_submsg(ri, struct)
+ return
parse_rsp_nested_prototype(ri, struct, suffix='')
@@ -2654,7 +2677,7 @@ def print_req_free(ri):
def print_rsp_type(ri):
- if (ri.op_mode == 'do' or ri.op_mode == 'dump') and 'reply' in ri.op[ri.op_mode]:
+ if ri.op_mode in ('do', 'dump') and 'reply' in ri.op[ri.op_mode]:
direction = 'reply'
elif ri.op_mode == 'event':
direction = 'reply'
@@ -2667,7 +2690,7 @@ def print_wrapped_type(ri):
ri.cw.block_start(line=f"{type_name(ri, 'reply')}")
if ri.op_mode == 'dump':
ri.cw.p(f"{type_name(ri, 'reply')} *next;")
- elif ri.op_mode == 'notify' or ri.op_mode == 'event':
+ elif ri.op_mode in ('notify', 'event'):
ri.cw.p('__u16 family;')
ri.cw.p('__u8 cmd;')
ri.cw.p('struct ynl_ntf_base_type *next;')
@@ -2704,7 +2727,7 @@ def _free_type(ri, direction, struct):
def free_rsp_nested_prototype(ri):
- print_free_prototype(ri, "")
+ print_free_prototype(ri, "")
def free_rsp_nested(ri, struct):
@@ -2930,7 +2953,7 @@ def print_kernel_op_table_hdr(family, cw):
def print_kernel_op_table(family, cw):
print_kernel_op_table_fwd(family, cw, terminate=False)
- if family.kernel_policy == 'global' or family.kernel_policy == 'per-op':
+ if family.kernel_policy in ('global', 'per-op'):
for op_name, op in family.ops.items():
if op.is_async:
continue
@@ -3346,7 +3369,7 @@ def render_user_family(family, cw, prototype):
else:
raise Exception('Invalid notification ' + ntf_op_name)
_render_user_ntf_entry(ri, ntf_op)
- for op_name, op in family.ops.items():
+ for _op_name, op in family.ops.items():
if 'event' not in op:
continue
ri = RenderInfo(cw, family, "user", op, "event")
@@ -3418,12 +3441,11 @@ def main():
print('Spec license:', parsed.license)
print('License must be: ((GPL-2.0 WITH Linux-syscall-note) OR BSD-3-Clause)')
os.sys.exit(1)
- except yaml.YAMLError as exc:
+ except pyyaml.YAMLError as exc:
print(exc)
os.sys.exit(1)
- return
- cw = CodeWriter(BaseNlLib(), args.out_file, overwrite=(not args.cmp_out))
+ cw = CodeWriter(BaseNlLib(), args.out_file, overwrite=not args.cmp_out)
_, spec_kernel = find_kernel_root(args.spec)
if args.mode == 'uapi' or args.header:
@@ -3524,7 +3546,7 @@ def main():
cw.nl()
if parsed.kernel_policy in {'per-op', 'split'}:
- for op_name, op in parsed.ops.items():
+ for _op_name, op in parsed.ops.items():
if 'do' in op and 'event' not in op:
ri = RenderInfo(cw, parsed, args.mode, op, "do")
print_req_policy_fwd(cw, ri.struct['request'], ri=ri)
@@ -3553,7 +3575,7 @@ def main():
print_req_policy(cw, struct)
cw.nl()
- for op_name, op in parsed.ops.items():
+ for _op_name, op in parsed.ops.items():
if parsed.kernel_policy in {'per-op', 'split'}:
for op_mode in ['do', 'dump']:
if op_mode in op and 'request' in op[op_mode]:
@@ -3581,7 +3603,7 @@ def main():
ri = RenderInfo(cw, parsed, args.mode, "", "", attr_set)
print_type_full(ri, struct)
- for op_name, op in parsed.ops.items():
+ for _op_name, op in parsed.ops.items():
cw.p(f"/* ============== {op.enum_name} ============== */")
if 'do' in op and 'event' not in op:
@@ -3614,7 +3636,7 @@ def main():
raise Exception(f'Only notifications with consistent types supported ({op.name})')
print_wrapped_type(ri)
- for op_name, op in parsed.ntfs.items():
+ for _op_name, op in parsed.ntfs.items():
if 'event' in op:
ri = RenderInfo(cw, parsed, args.mode, op, 'event')
cw.p(f"/* {op.enum_name} - event */")
@@ -3664,7 +3686,7 @@ def main():
if struct.reply:
parse_rsp_nested(ri, struct)
- for op_name, op in parsed.ops.items():
+ for _op_name, op in parsed.ops.items():
cw.p(f"/* ============== {op.enum_name} ============== */")
if 'do' in op and 'event' not in op:
cw.p(f"/* {op.enum_name} - do */")
@@ -3692,7 +3714,7 @@ def main():
raise Exception(f'Only notifications with consistent types supported ({op.name})')
print_ntf_type_free(ri)
- for op_name, op in parsed.ntfs.items():
+ for _op_name, op in parsed.ntfs.items():
if 'event' in op:
cw.p(f"/* {op.enum_name} - event */")
diff --git a/tools/net/ynl/pyynl/ynl_gen_rst.py b/tools/net/ynl/pyynl/ynl_gen_rst.py
index 90ae19aac89d..30324e2fd682 100755
--- a/tools/net/ynl/pyynl/ynl_gen_rst.py
+++ b/tools/net/ynl/pyynl/ynl_gen_rst.py
@@ -19,6 +19,7 @@ import sys
import argparse
import logging
+# pylint: disable=no-name-in-module,wrong-import-position
sys.path.append(pathlib.Path(__file__).resolve().parent.as_posix())
from lib import YnlDocGenerator # pylint: disable=C0413
@@ -60,6 +61,7 @@ def write_to_rstfile(content: str, filename: str) -> None:
rst_file.write(content)
+# pylint: disable=broad-exception-caught
def main() -> None:
"""Main function that reads the YAML files and generates the RST files"""
diff --git a/tools/net/ynl/ynltool/Makefile b/tools/net/ynl/ynltool/Makefile
index f5b1de32daa5..48b0f32050f0 100644
--- a/tools/net/ynl/ynltool/Makefile
+++ b/tools/net/ynl/ynltool/Makefile
@@ -13,7 +13,7 @@ endif
CFLAGS += -I../lib -I../generated -I../../../include/uapi/
SRC_VERSION := \
- $(shell make --no-print-directory -sC ../../../.. kernelversion || \
+ $(shell make --no-print-directory -sC ../../../.. kernelversion 2>/dev/null || \
echo "unknown")
CFLAGS += -DSRC_VERSION='"$(SRC_VERSION)"'
diff --git a/tools/net/ynl/ynltool/qstats.c b/tools/net/ynl/ynltool/qstats.c
index 31fb45709ffa..a6c28ba4f25c 100644
--- a/tools/net/ynl/ynltool/qstats.c
+++ b/tools/net/ynl/ynltool/qstats.c
@@ -237,13 +237,47 @@ static void print_plain_qstats(struct netdev_qstats_get_list *qstats)
}
}
-static int do_show(int argc, char **argv)
+static struct netdev_qstats_get_list *
+qstats_dump(enum netdev_qstats_scope scope)
{
struct netdev_qstats_get_list *qstats;
struct netdev_qstats_get_req *req;
struct ynl_error yerr;
struct ynl_sock *ys;
- int ret = 0;
+
+ ys = ynl_sock_create(&ynl_netdev_family, &yerr);
+ if (!ys) {
+ p_err("YNL: %s", yerr.msg);
+ return NULL;
+ }
+
+ req = netdev_qstats_get_req_alloc();
+ if (!req) {
+ p_err("failed to allocate qstats request");
+ goto err_close;
+ }
+
+ if (scope)
+ netdev_qstats_get_req_set_scope(req, scope);
+
+ qstats = netdev_qstats_get_dump(ys, req);
+ netdev_qstats_get_req_free(req);
+ if (!qstats) {
+ p_err("failed to get queue stats: %s", ys->err.msg);
+ goto err_close;
+ }
+
+ ynl_sock_destroy(ys);
+ return qstats;
+
+err_close:
+ ynl_sock_destroy(ys);
+ return NULL;
+}
+
+static int do_show(int argc, char **argv)
+{
+ struct netdev_qstats_get_list *qstats;
/* Parse options */
while (argc > 0) {
@@ -268,29 +302,9 @@ static int do_show(int argc, char **argv)
}
}
- ys = ynl_sock_create(&ynl_netdev_family, &yerr);
- if (!ys) {
- p_err("YNL: %s", yerr.msg);
+ qstats = qstats_dump(scope);
+ if (!qstats)
return -1;
- }
-
- req = netdev_qstats_get_req_alloc();
- if (!req) {
- p_err("failed to allocate qstats request");
- ret = -1;
- goto exit_close;
- }
-
- if (scope)
- netdev_qstats_get_req_set_scope(req, scope);
-
- qstats = netdev_qstats_get_dump(ys, req);
- netdev_qstats_get_req_free(req);
- if (!qstats) {
- p_err("failed to get queue stats: %s", ys->err.msg);
- ret = -1;
- goto exit_close;
- }
/* Print the stats as returned by the kernel */
if (json_output)
@@ -299,9 +313,7 @@ static int do_show(int argc, char **argv)
print_plain_qstats(qstats);
netdev_qstats_get_list_free(qstats);
-exit_close:
- ynl_sock_destroy(ys);
- return ret;
+ return 0;
}
static void compute_stats(__u64 *values, unsigned int count,
@@ -406,10 +418,7 @@ static int cmp_ifindex_type(const void *a, const void *b)
static int do_balance(int argc, char **argv __attribute__((unused)))
{
struct netdev_qstats_get_list *qstats;
- struct netdev_qstats_get_req *req;
struct netdev_qstats_get_rsp **sorted;
- struct ynl_error yerr;
- struct ynl_sock *ys;
unsigned int count = 0;
unsigned int i, j;
int ret = 0;
@@ -419,29 +428,9 @@ static int do_balance(int argc, char **argv __attribute__((unused)))
return -1;
}
- ys = ynl_sock_create(&ynl_netdev_family, &yerr);
- if (!ys) {
- p_err("YNL: %s", yerr.msg);
+ qstats = qstats_dump(NETDEV_QSTATS_SCOPE_QUEUE);
+ if (!qstats)
return -1;
- }
-
- req = netdev_qstats_get_req_alloc();
- if (!req) {
- p_err("failed to allocate qstats request");
- ret = -1;
- goto exit_close;
- }
-
- /* Always use queue scope for balance analysis */
- netdev_qstats_get_req_set_scope(req, NETDEV_QSTATS_SCOPE_QUEUE);
-
- qstats = netdev_qstats_get_dump(ys, req);
- netdev_qstats_get_req_free(req);
- if (!qstats) {
- p_err("failed to get queue stats: %s", ys->err.msg);
- ret = -1;
- goto exit_close;
- }
/* Count and sort queues */
ynl_dump_foreach(qstats, qs)
@@ -576,11 +565,68 @@ exit_free_sorted:
free(sorted);
exit_free_qstats:
netdev_qstats_get_list_free(qstats);
-exit_close:
- ynl_sock_destroy(ys);
return ret;
}
+static int do_hw_gro(int argc, char **argv __attribute__((unused)))
+{
+ struct netdev_qstats_get_list *qstats;
+
+ if (argc > 0) {
+ p_err("hw-gro command takes no arguments");
+ return -1;
+ }
+
+ qstats = qstats_dump(0);
+ if (!qstats)
+ return -1;
+
+ if (json_output)
+ jsonw_start_array(json_wtr);
+
+ ynl_dump_foreach(qstats, qs) {
+ char ifname[IF_NAMESIZE];
+ const char *name;
+ double savings;
+
+ if (!qs->_present.rx_packets ||
+ !qs->_present.rx_hw_gro_packets ||
+ !qs->_present.rx_hw_gro_wire_packets)
+ continue;
+
+ if (!qs->rx_packets)
+ continue;
+
+ /* How many skbs did we avoid allocating thanks to HW GRO */
+ savings = (double)(qs->rx_hw_gro_wire_packets -
+ qs->rx_hw_gro_packets) /
+ qs->rx_packets * 100.0;
+
+ name = if_indextoname(qs->ifindex, ifname);
+
+ if (json_output) {
+ jsonw_start_object(json_wtr);
+ jsonw_uint_field(json_wtr, "ifindex", qs->ifindex);
+ if (name)
+ jsonw_string_field(json_wtr, "ifname", name);
+ jsonw_float_field(json_wtr, "savings", savings);
+ jsonw_end_object(json_wtr);
+ } else {
+ if (name)
+ printf("%s", name);
+ else
+ printf("ifindex:%u", qs->ifindex);
+ printf(": %.1f%% savings\n", savings);
+ }
+ }
+
+ if (json_output)
+ jsonw_end_array(json_wtr);
+
+ netdev_qstats_get_list_free(qstats);
+ return 0;
+}
+
static int do_help(int argc __attribute__((unused)),
char **argv __attribute__((unused)))
{
@@ -590,9 +636,10 @@ static int do_help(int argc __attribute__((unused)),
}
fprintf(stderr,
- "Usage: %s qstats { COMMAND | help }\n"
- " %s qstats [ show ] [ OPTIONS ]\n"
- " %s qstats balance\n"
+ "Usage: %1$s qstats { COMMAND | help }\n"
+ " %1$s qstats [ show ] [ OPTIONS ]\n"
+ " %1$s qstats balance\n"
+ " %1$s qstats hw-gro\n"
"\n"
" OPTIONS := { scope queue | group-by { device | queue } }\n"
"\n"
@@ -601,9 +648,14 @@ static int do_help(int argc __attribute__((unused)),
" show scope queue - Display per-queue statistics\n"
" show group-by device - Display device-aggregated statistics (default)\n"
" show group-by queue - Display per-queue statistics\n"
- " balance - Analyze traffic distribution balance.\n"
+ "\n"
+ " Analysis:\n"
+ " balance - Traffic distribution between queues.\n"
+ " hw-gro - HW GRO effectiveness analysis\n"
+ " - savings - delta between packets received\n"
+ " on the wire and packets seen by the kernel.\n"
"",
- bin_name, bin_name, bin_name);
+ bin_name);
return 0;
}
@@ -611,6 +663,7 @@ static int do_help(int argc __attribute__((unused)),
static const struct cmd qstats_cmds[] = {
{ "show", do_show },
{ "balance", do_balance },
+ { "hw-gro", do_hw_gro },
{ "help", do_help },
{ 0 }
};
diff --git a/tools/testing/selftests/Makefile b/tools/testing/selftests/Makefile
index 56e44a98d6a5..450f13ba4cca 100644
--- a/tools/testing/selftests/Makefile
+++ b/tools/testing/selftests/Makefile
@@ -22,6 +22,7 @@ TARGETS += drivers/ntsync
TARGETS += drivers/s390x/uvdevice
TARGETS += drivers/net
TARGETS += drivers/net/bonding
+TARGETS += drivers/net/netconsole
TARGETS += drivers/net/team
TARGETS += drivers/net/virtio_net
TARGETS += drivers/platform/x86/intel/ifs
diff --git a/tools/testing/selftests/drivers/net/Makefile b/tools/testing/selftests/drivers/net/Makefile
index f5c71d993750..8154d6d429d3 100644
--- a/tools/testing/selftests/drivers/net/Makefile
+++ b/tools/testing/selftests/drivers/net/Makefile
@@ -15,12 +15,6 @@ TEST_PROGS := \
hds.py \
napi_id.py \
napi_threaded.py \
- netcons_basic.sh \
- netcons_cmdline.sh \
- netcons_fragmented_msg.sh \
- netcons_overflow.sh \
- netcons_sysdata.sh \
- netcons_torture.sh \
netpoll_basic.py \
ping.py \
psp.py \
diff --git a/tools/testing/selftests/drivers/net/gro.c b/tools/testing/selftests/drivers/net/gro.c
index e894037d2e3e..3c0745b68bfa 100644
--- a/tools/testing/selftests/drivers/net/gro.c
+++ b/tools/testing/selftests/drivers/net/gro.c
@@ -3,26 +3,45 @@
* This testsuite provides conformance testing for GRO coalescing.
*
* Test cases:
- * 1.data
+ *
+ * data_*:
* Data packets of the same size and same header setup with correct
* sequence numbers coalesce. The one exception being the last data
* packet coalesced: it can be smaller than the rest and coalesced
* as long as it is in the same flow.
- * 2.ack
+ * - data_same: same size packets coalesce
+ * - data_lrg_sml: large then small coalesces
+ * - data_sml_lrg: small then large doesn't coalesce
+ *
+ * ack:
* Pure ACK does not coalesce.
- * 3.flags
- * Specific test cases: no packets with PSH, SYN, URG, RST set will
- * be coalesced.
- * 4.tcp
+ *
+ * flags_*:
+ * No packets with PSH, SYN, URG, RST, CWR set will be coalesced.
+ * - flags_psh, flags_syn, flags_rst, flags_urg, flags_cwr
+ *
+ * tcp_*:
* Packets with incorrect checksum, non-consecutive seqno and
* different TCP header options shouldn't coalesce. Nit: given that
* some extension headers have paddings, such as timestamp, headers
- * that are padding differently would not be coalesced.
- * 5.ip:
- * Packets with different (ECN, TTL, TOS) header, ip options or
- * ip fragments (ipv6) shouldn't coalesce.
- * 6.large:
+ * that are padded differently would not be coalesced.
+ * - tcp_csum: incorrect checksum
+ * - tcp_seq: non-consecutive sequence numbers
+ * - tcp_ts: different timestamps
+ * - tcp_opt: different TCP options
+ *
+ * ip_*:
+ * Packets with different (ECN, TTL, TOS) header, IP options or
+ * IP fragments shouldn't coalesce.
+ * - ip_ecn, ip_tos: shared between IPv4/IPv6
+ * - ip_ttl, ip_opt, ip_frag4: IPv4 only
+ * - ip_id_df*: IPv4 IP ID field coalescing tests
+ * - ip_frag6, ip_v6ext_*: IPv6 only
+ *
+ * large_*:
* Packets larger than GRO_MAX_SIZE packets shouldn't coalesce.
+ * - large_max: exceeding max size
+ * - large_rem: remainder handling
*
* MSS is defined as 4096 - header because if it is too small
* (i.e. 1500 MTU - header), it will result in many packets,
@@ -79,6 +98,15 @@
#define ipv6_optlen(p) (((p)->hdrlen+1) << 3) /* calculate IPv6 extension header len */
#define BUILD_BUG_ON(condition) ((void)sizeof(char[1 - 2*!!(condition)]))
+enum flush_id_case {
+ FLUSH_ID_DF1_INC,
+ FLUSH_ID_DF1_FIXED,
+ FLUSH_ID_DF0_INC,
+ FLUSH_ID_DF0_FIXED,
+ FLUSH_ID_DF1_INC_FIXED,
+ FLUSH_ID_DF1_FIXED_INC,
+};
+
static const char *addr6_src = "fdaa::2";
static const char *addr6_dst = "fdaa::1";
static const char *addr4_src = "192.168.1.200";
@@ -95,7 +123,6 @@ static int tcp_offset = -1;
static int total_hdr_len = -1;
static int ethhdr_proto = -1;
static bool ipip;
-static const int num_flush_id_cases = 6;
static void vlog(const char *fmt, ...)
{
@@ -127,19 +154,19 @@ static void setup_sock_filter(int fd)
/* Overridden later if exthdrs are used: */
opt_ipproto_off = ipproto_off;
- if (strcmp(testname, "ip") == 0) {
- if (proto == PF_INET)
- optlen = sizeof(struct ip_timestamp);
- else {
- BUILD_BUG_ON(sizeof(struct ip6_hbh) > MIN_EXTHDR_SIZE);
- BUILD_BUG_ON(sizeof(struct ip6_dest) > MIN_EXTHDR_SIZE);
- BUILD_BUG_ON(sizeof(struct ip6_frag) > MIN_EXTHDR_SIZE);
-
- /* same size for HBH and Fragment extension header types */
- optlen = MIN_EXTHDR_SIZE;
- opt_ipproto_off = ETH_HLEN + sizeof(struct ipv6hdr)
- + offsetof(struct ip6_ext, ip6e_nxt);
- }
+ if (strcmp(testname, "ip_opt") == 0) {
+ optlen = sizeof(struct ip_timestamp);
+ } else if (strcmp(testname, "ip_frag6") == 0 ||
+ strcmp(testname, "ip_v6ext_same") == 0 ||
+ strcmp(testname, "ip_v6ext_diff") == 0) {
+ BUILD_BUG_ON(sizeof(struct ip6_hbh) > MIN_EXTHDR_SIZE);
+ BUILD_BUG_ON(sizeof(struct ip6_dest) > MIN_EXTHDR_SIZE);
+ BUILD_BUG_ON(sizeof(struct ip6_frag) > MIN_EXTHDR_SIZE);
+
+ /* same size for HBH and Fragment extension header types */
+ optlen = MIN_EXTHDR_SIZE;
+ opt_ipproto_off = ETH_HLEN + sizeof(struct ipv6hdr)
+ + offsetof(struct ip6_ext, ip6e_nxt);
}
/* this filter validates the following:
@@ -333,32 +360,58 @@ static void create_packet(void *buf, int seq_offset, int ack_offset,
fill_datalinklayer(buf);
}
-/* send one extra flag, not first and not last pkt */
-static void send_flags(int fd, struct sockaddr_ll *daddr, int psh, int syn,
- int rst, int urg)
+#ifndef TH_CWR
+#define TH_CWR 0x80
+#endif
+static void set_flags(struct tcphdr *tcph, int payload_len, int psh, int syn,
+ int rst, int urg, int cwr)
{
- static char flag_buf[MAX_HDR_LEN + PAYLOAD_LEN];
- static char buf[MAX_HDR_LEN + PAYLOAD_LEN];
- int payload_len, pkt_size, flag, i;
- struct tcphdr *tcph;
-
- payload_len = PAYLOAD_LEN * psh;
- pkt_size = total_hdr_len + payload_len;
- flag = NUM_PACKETS / 2;
-
- create_packet(flag_buf, flag * payload_len, 0, payload_len, 0);
-
- tcph = (struct tcphdr *)(flag_buf + tcp_offset);
tcph->psh = psh;
tcph->syn = syn;
tcph->rst = rst;
tcph->urg = urg;
+ if (cwr)
+ tcph->th_flags |= TH_CWR;
+ else
+ tcph->th_flags &= ~TH_CWR;
tcph->check = 0;
tcph->check = tcp_checksum(tcph, payload_len);
+}
+
+/* send extra flags of the (NUM_PACKETS / 2) and (NUM_PACKETS / 2 - 1)
+ * pkts, not first and not last pkt
+ */
+static void send_flags(int fd, struct sockaddr_ll *daddr, int psh, int syn,
+ int rst, int urg, int cwr)
+{
+ static char flag_buf[2][MAX_HDR_LEN + PAYLOAD_LEN];
+ static char buf[MAX_HDR_LEN + PAYLOAD_LEN];
+ int payload_len, pkt_size, i;
+ struct tcphdr *tcph;
+ int flag[2];
+
+ payload_len = PAYLOAD_LEN * (psh || cwr);
+ pkt_size = total_hdr_len + payload_len;
+ flag[0] = NUM_PACKETS / 2;
+ flag[1] = NUM_PACKETS / 2 - 1;
+
+ /* Create and configure packets with flags
+ */
+ for (i = 0; i < 2; i++) {
+ if (flag[i] > 0) {
+ create_packet(flag_buf[i], flag[i] * payload_len, 0,
+ payload_len, 0);
+ tcph = (struct tcphdr *)(flag_buf[i] + tcp_offset);
+ set_flags(tcph, payload_len, psh, syn, rst, urg, cwr);
+ }
+ }
for (i = 0; i < NUM_PACKETS + 1; i++) {
- if (i == flag) {
- write_packet(fd, flag_buf, pkt_size, daddr);
+ if (i == flag[0]) {
+ write_packet(fd, flag_buf[0], pkt_size, daddr);
+ continue;
+ } else if (i == flag[1] && cwr) {
+ write_packet(fd, flag_buf[1], pkt_size, daddr);
continue;
}
create_packet(buf, i * PAYLOAD_LEN, 0, PAYLOAD_LEN, 0);
@@ -648,7 +701,8 @@ static void fix_ip4_checksum(struct iphdr *iph)
iph->check = checksum_fold(iph, sizeof(struct iphdr), 0);
}
-static void send_flush_id_case(int fd, struct sockaddr_ll *daddr, int tcase)
+static void send_flush_id_case(int fd, struct sockaddr_ll *daddr,
+ enum flush_id_case tcase)
{
static char buf1[MAX_HDR_LEN + PAYLOAD_LEN];
static char buf2[MAX_HDR_LEN + PAYLOAD_LEN];
@@ -667,7 +721,7 @@ static void send_flush_id_case(int fd, struct sockaddr_ll *daddr, int tcase)
create_packet(buf3, PAYLOAD_LEN * 2, 0, PAYLOAD_LEN, 0);
switch (tcase) {
- case 0: /* DF=1, Incrementing - should coalesce */
+ case FLUSH_ID_DF1_INC: /* DF=1, Incrementing - should coalesce */
iph1->frag_off |= htons(IP_DF);
iph1->id = htons(8);
@@ -675,7 +729,7 @@ static void send_flush_id_case(int fd, struct sockaddr_ll *daddr, int tcase)
iph2->id = htons(9);
break;
- case 1: /* DF=1, Fixed - should coalesce */
+ case FLUSH_ID_DF1_FIXED: /* DF=1, Fixed - should coalesce */
iph1->frag_off |= htons(IP_DF);
iph1->id = htons(8);
@@ -683,7 +737,7 @@ static void send_flush_id_case(int fd, struct sockaddr_ll *daddr, int tcase)
iph2->id = htons(8);
break;
- case 2: /* DF=0, Incrementing - should coalesce */
+ case FLUSH_ID_DF0_INC: /* DF=0, Incrementing - should coalesce */
iph1->frag_off &= ~htons(IP_DF);
iph1->id = htons(8);
@@ -691,7 +745,7 @@ static void send_flush_id_case(int fd, struct sockaddr_ll *daddr, int tcase)
iph2->id = htons(9);
break;
- case 3: /* DF=0, Fixed - should coalesce */
+ case FLUSH_ID_DF0_FIXED: /* DF=0, Fixed - should coalesce */
iph1->frag_off &= ~htons(IP_DF);
iph1->id = htons(8);
@@ -699,9 +753,10 @@ static void send_flush_id_case(int fd, struct sockaddr_ll *daddr, int tcase)
iph2->id = htons(8);
break;
- case 4: /* DF=1, two packets incrementing, and one fixed - should
- * coalesce only the first two packets
- */
+ case FLUSH_ID_DF1_INC_FIXED: /* DF=1, two packets incrementing, and
+ * one fixed - should coalesce only the
+ * first two packets
+ */
iph1->frag_off |= htons(IP_DF);
iph1->id = htons(8);
@@ -713,9 +768,10 @@ static void send_flush_id_case(int fd, struct sockaddr_ll *daddr, int tcase)
send_three = true;
break;
- case 5: /* DF=1, two packets fixed, and one incrementing - should
- * coalesce only the first two packets
- */
+ case FLUSH_ID_DF1_FIXED_INC: /* DF=1, two packets fixed, and one
+ * incrementing - should coalesce only
+ * the first two packets
+ */
iph1->frag_off |= htons(IP_DF);
iph1->id = htons(8);
@@ -739,16 +795,6 @@ static void send_flush_id_case(int fd, struct sockaddr_ll *daddr, int tcase)
}
}
-static void test_flush_id(int fd, struct sockaddr_ll *daddr, char *fin_pkt)
-{
- for (int i = 0; i < num_flush_id_cases; i++) {
- sleep(1);
- send_flush_id_case(fd, daddr, i);
- sleep(1);
- write_packet(fd, fin_pkt, total_hdr_len, daddr);
- }
-}
-
static void send_ipv6_exthdr(int fd, struct sockaddr_ll *daddr, char *ext_data1, char *ext_data2)
{
static char buf[MAX_HDR_LEN + PAYLOAD_LEN];
@@ -926,6 +972,28 @@ static void set_timeout(int fd)
error(1, errno, "cannot set timeout, setsockopt failed");
}
+static void set_rcvbuf(int fd)
+{
+ int bufsize = 1 * 1024 * 1024; /* 1 MB */
+
+ if (setsockopt(fd, SOL_SOCKET, SO_RCVBUF, &bufsize, sizeof(bufsize)))
+ error(1, errno, "cannot set rcvbuf size, setsockopt failed");
+}
+
+static void recv_error(int fd, int rcv_errno)
+{
+ struct tpacket_stats stats;
+ socklen_t len;
+
+ len = sizeof(stats);
+ if (getsockopt(fd, SOL_PACKET, PACKET_STATISTICS, &stats, &len))
+ error(1, errno, "can't get stats");
+
+ fprintf(stderr, "Socket stats: packets=%u, drops=%u\n",
+ stats.tp_packets, stats.tp_drops);
+ error(1, rcv_errno, "could not receive");
+}
+
static void check_recv_pkts(int fd, int *correct_payload,
int correct_num_pkts)
{
@@ -950,7 +1018,7 @@ static void check_recv_pkts(int fd, int *correct_payload,
ip_ext_len = 0;
pkt_size = recv(fd, buffer, IP_MAXPACKET + ETH_HLEN + 1, 0);
if (pkt_size < 0)
- error(1, errno, "could not receive");
+ recv_error(fd, errno);
if (iph->version == 4)
ip_ext_len = (iph->ihl - 5) * 4;
@@ -1008,108 +1076,131 @@ static void gro_sender(void)
daddr.sll_halen = ETH_ALEN;
create_packet(fin_pkt, PAYLOAD_LEN * 2, 0, 0, 1);
- if (strcmp(testname, "data") == 0) {
+ /* data sub-tests */
+ if (strcmp(testname, "data_same") == 0) {
send_data_pkts(txfd, &daddr, PAYLOAD_LEN, PAYLOAD_LEN);
write_packet(txfd, fin_pkt, total_hdr_len, &daddr);
-
+ } else if (strcmp(testname, "data_lrg_sml") == 0) {
send_data_pkts(txfd, &daddr, PAYLOAD_LEN, PAYLOAD_LEN / 2);
write_packet(txfd, fin_pkt, total_hdr_len, &daddr);
-
+ } else if (strcmp(testname, "data_sml_lrg") == 0) {
send_data_pkts(txfd, &daddr, PAYLOAD_LEN / 2, PAYLOAD_LEN);
write_packet(txfd, fin_pkt, total_hdr_len, &daddr);
+
+ /* ack test */
} else if (strcmp(testname, "ack") == 0) {
send_ack(txfd, &daddr);
write_packet(txfd, fin_pkt, total_hdr_len, &daddr);
- } else if (strcmp(testname, "flags") == 0) {
- send_flags(txfd, &daddr, 1, 0, 0, 0);
- write_packet(txfd, fin_pkt, total_hdr_len, &daddr);
- send_flags(txfd, &daddr, 0, 1, 0, 0);
+ /* flags sub-tests */
+ } else if (strcmp(testname, "flags_psh") == 0) {
+ send_flags(txfd, &daddr, 1, 0, 0, 0, 0);
write_packet(txfd, fin_pkt, total_hdr_len, &daddr);
-
- send_flags(txfd, &daddr, 0, 0, 1, 0);
+ } else if (strcmp(testname, "flags_syn") == 0) {
+ send_flags(txfd, &daddr, 0, 1, 0, 0, 0);
write_packet(txfd, fin_pkt, total_hdr_len, &daddr);
-
- send_flags(txfd, &daddr, 0, 0, 0, 1);
+ } else if (strcmp(testname, "flags_rst") == 0) {
+ send_flags(txfd, &daddr, 0, 0, 1, 0, 0);
+ write_packet(txfd, fin_pkt, total_hdr_len, &daddr);
+ } else if (strcmp(testname, "flags_urg") == 0) {
+ send_flags(txfd, &daddr, 0, 0, 0, 1, 0);
+ write_packet(txfd, fin_pkt, total_hdr_len, &daddr);
+ } else if (strcmp(testname, "flags_cwr") == 0) {
+ send_flags(txfd, &daddr, 0, 0, 0, 0, 1);
write_packet(txfd, fin_pkt, total_hdr_len, &daddr);
- } else if (strcmp(testname, "tcp") == 0) {
+
+ /* tcp sub-tests */
+ } else if (strcmp(testname, "tcp_csum") == 0) {
send_changed_checksum(txfd, &daddr);
- /* Adding sleep before sending FIN so that it is not
- * received prior to other packets.
- */
usleep(fin_delay_us);
write_packet(txfd, fin_pkt, total_hdr_len, &daddr);
-
+ } else if (strcmp(testname, "tcp_seq") == 0) {
send_changed_seq(txfd, &daddr);
usleep(fin_delay_us);
write_packet(txfd, fin_pkt, total_hdr_len, &daddr);
-
+ } else if (strcmp(testname, "tcp_ts") == 0) {
send_changed_ts(txfd, &daddr);
usleep(fin_delay_us);
write_packet(txfd, fin_pkt, total_hdr_len, &daddr);
-
+ } else if (strcmp(testname, "tcp_opt") == 0) {
send_diff_opt(txfd, &daddr);
usleep(fin_delay_us);
write_packet(txfd, fin_pkt, total_hdr_len, &daddr);
- } else if (strcmp(testname, "ip") == 0) {
+
+ /* ip sub-tests - shared between IPv4 and IPv6 */
+ } else if (strcmp(testname, "ip_ecn") == 0) {
send_changed_ECN(txfd, &daddr);
write_packet(txfd, fin_pkt, total_hdr_len, &daddr);
-
+ } else if (strcmp(testname, "ip_tos") == 0) {
send_changed_tos(txfd, &daddr);
write_packet(txfd, fin_pkt, total_hdr_len, &daddr);
- if (proto == PF_INET) {
- /* Modified packets may be received out of order.
- * Sleep function added to enforce test boundaries
- * so that fin pkts are not received prior to other pkts.
- */
- sleep(1);
- send_changed_ttl(txfd, &daddr);
- write_packet(txfd, fin_pkt, total_hdr_len, &daddr);
-
- sleep(1);
- send_ip_options(txfd, &daddr);
- sleep(1);
- write_packet(txfd, fin_pkt, total_hdr_len, &daddr);
-
- sleep(1);
- send_fragment4(txfd, &daddr);
- sleep(1);
- write_packet(txfd, fin_pkt, total_hdr_len, &daddr);
-
- test_flush_id(txfd, &daddr, fin_pkt);
- } else if (proto == PF_INET6) {
- sleep(1);
- send_fragment6(txfd, &daddr);
- sleep(1);
- write_packet(txfd, fin_pkt, total_hdr_len, &daddr);
-
- sleep(1);
- /* send IPv6 packets with ext header with same payload */
- send_ipv6_exthdr(txfd, &daddr, EXT_PAYLOAD_1, EXT_PAYLOAD_1);
- sleep(1);
- write_packet(txfd, fin_pkt, total_hdr_len, &daddr);
-
- sleep(1);
- /* send IPv6 packets with ext header with different payload */
- send_ipv6_exthdr(txfd, &daddr, EXT_PAYLOAD_1, EXT_PAYLOAD_2);
- sleep(1);
- write_packet(txfd, fin_pkt, total_hdr_len, &daddr);
- }
- } else if (strcmp(testname, "large") == 0) {
- /* 20 is the difference between min iphdr size
- * and min ipv6hdr size. Like MAX_HDR_SIZE,
- * MAX_PAYLOAD is defined with the larger header of the two.
- */
+
+ /* ip sub-tests - IPv4 only */
+ } else if (strcmp(testname, "ip_ttl") == 0) {
+ send_changed_ttl(txfd, &daddr);
+ write_packet(txfd, fin_pkt, total_hdr_len, &daddr);
+ } else if (strcmp(testname, "ip_opt") == 0) {
+ send_ip_options(txfd, &daddr);
+ usleep(fin_delay_us);
+ write_packet(txfd, fin_pkt, total_hdr_len, &daddr);
+ } else if (strcmp(testname, "ip_frag4") == 0) {
+ send_fragment4(txfd, &daddr);
+ usleep(fin_delay_us);
+ write_packet(txfd, fin_pkt, total_hdr_len, &daddr);
+ } else if (strcmp(testname, "ip_id_df1_inc") == 0) {
+ send_flush_id_case(txfd, &daddr, FLUSH_ID_DF1_INC);
+ usleep(fin_delay_us);
+ write_packet(txfd, fin_pkt, total_hdr_len, &daddr);
+ } else if (strcmp(testname, "ip_id_df1_fixed") == 0) {
+ send_flush_id_case(txfd, &daddr, FLUSH_ID_DF1_FIXED);
+ usleep(fin_delay_us);
+ write_packet(txfd, fin_pkt, total_hdr_len, &daddr);
+ } else if (strcmp(testname, "ip_id_df0_inc") == 0) {
+ send_flush_id_case(txfd, &daddr, FLUSH_ID_DF0_INC);
+ usleep(fin_delay_us);
+ write_packet(txfd, fin_pkt, total_hdr_len, &daddr);
+ } else if (strcmp(testname, "ip_id_df0_fixed") == 0) {
+ send_flush_id_case(txfd, &daddr, FLUSH_ID_DF0_FIXED);
+ usleep(fin_delay_us);
+ write_packet(txfd, fin_pkt, total_hdr_len, &daddr);
+ } else if (strcmp(testname, "ip_id_df1_inc_fixed") == 0) {
+ send_flush_id_case(txfd, &daddr, FLUSH_ID_DF1_INC_FIXED);
+ usleep(fin_delay_us);
+ write_packet(txfd, fin_pkt, total_hdr_len, &daddr);
+ } else if (strcmp(testname, "ip_id_df1_fixed_inc") == 0) {
+ send_flush_id_case(txfd, &daddr, FLUSH_ID_DF1_FIXED_INC);
+ usleep(fin_delay_us);
+ write_packet(txfd, fin_pkt, total_hdr_len, &daddr);
+
+ /* ip sub-tests - IPv6 only */
+ } else if (strcmp(testname, "ip_frag6") == 0) {
+ send_fragment6(txfd, &daddr);
+ usleep(fin_delay_us);
+ write_packet(txfd, fin_pkt, total_hdr_len, &daddr);
+ } else if (strcmp(testname, "ip_v6ext_same") == 0) {
+ send_ipv6_exthdr(txfd, &daddr, EXT_PAYLOAD_1, EXT_PAYLOAD_1);
+ usleep(fin_delay_us);
+ write_packet(txfd, fin_pkt, total_hdr_len, &daddr);
+ } else if (strcmp(testname, "ip_v6ext_diff") == 0) {
+ send_ipv6_exthdr(txfd, &daddr, EXT_PAYLOAD_1, EXT_PAYLOAD_2);
+ usleep(fin_delay_us);
+ write_packet(txfd, fin_pkt, total_hdr_len, &daddr);
+
+ /* large sub-tests */
+ } else if (strcmp(testname, "large_max") == 0) {
int offset = (proto == PF_INET && !ipip) ? 20 : 0;
int remainder = (MAX_PAYLOAD + offset) % MSS;
send_large(txfd, &daddr, remainder);
write_packet(txfd, fin_pkt, total_hdr_len, &daddr);
+ } else if (strcmp(testname, "large_rem") == 0) {
+ int offset = (proto == PF_INET && !ipip) ? 20 : 0;
+ int remainder = (MAX_PAYLOAD + offset) % MSS;
send_large(txfd, &daddr, remainder + 1);
write_packet(txfd, fin_pkt, total_hdr_len, &daddr);
} else {
- error(1, 0, "Unknown testcase");
+ error(1, 0, "Unknown testcase: %s", testname);
}
if (close(txfd))
@@ -1126,132 +1217,166 @@ static void gro_receiver(void)
error(1, 0, "socket creation");
setup_sock_filter(rxfd);
set_timeout(rxfd);
+ set_rcvbuf(rxfd);
bind_packetsocket(rxfd);
ksft_ready();
memset(correct_payload, 0, sizeof(correct_payload));
- if (strcmp(testname, "data") == 0) {
+ /* data sub-tests */
+ if (strcmp(testname, "data_same") == 0) {
printf("pure data packet of same size: ");
correct_payload[0] = PAYLOAD_LEN * 2;
check_recv_pkts(rxfd, correct_payload, 1);
-
+ } else if (strcmp(testname, "data_lrg_sml") == 0) {
printf("large data packets followed by a smaller one: ");
correct_payload[0] = PAYLOAD_LEN * 1.5;
check_recv_pkts(rxfd, correct_payload, 1);
-
+ } else if (strcmp(testname, "data_sml_lrg") == 0) {
printf("small data packets followed by a larger one: ");
correct_payload[0] = PAYLOAD_LEN / 2;
correct_payload[1] = PAYLOAD_LEN;
check_recv_pkts(rxfd, correct_payload, 2);
+
+ /* ack test */
} else if (strcmp(testname, "ack") == 0) {
printf("duplicate ack and pure ack: ");
check_recv_pkts(rxfd, correct_payload, 3);
- } else if (strcmp(testname, "flags") == 0) {
+
+ /* flags sub-tests */
+ } else if (strcmp(testname, "flags_psh") == 0) {
correct_payload[0] = PAYLOAD_LEN * 3;
correct_payload[1] = PAYLOAD_LEN * 2;
-
printf("psh flag ends coalescing: ");
check_recv_pkts(rxfd, correct_payload, 2);
-
+ } else if (strcmp(testname, "flags_syn") == 0) {
correct_payload[0] = PAYLOAD_LEN * 2;
correct_payload[1] = 0;
correct_payload[2] = PAYLOAD_LEN * 2;
printf("syn flag ends coalescing: ");
check_recv_pkts(rxfd, correct_payload, 3);
-
+ } else if (strcmp(testname, "flags_rst") == 0) {
+ correct_payload[0] = PAYLOAD_LEN * 2;
+ correct_payload[1] = 0;
+ correct_payload[2] = PAYLOAD_LEN * 2;
printf("rst flag ends coalescing: ");
check_recv_pkts(rxfd, correct_payload, 3);
-
+ } else if (strcmp(testname, "flags_urg") == 0) {
+ correct_payload[0] = PAYLOAD_LEN * 2;
+ correct_payload[1] = 0;
+ correct_payload[2] = PAYLOAD_LEN * 2;
printf("urg flag ends coalescing: ");
check_recv_pkts(rxfd, correct_payload, 3);
- } else if (strcmp(testname, "tcp") == 0) {
+ } else if (strcmp(testname, "flags_cwr") == 0) {
correct_payload[0] = PAYLOAD_LEN;
- correct_payload[1] = PAYLOAD_LEN;
- correct_payload[2] = PAYLOAD_LEN;
- correct_payload[3] = PAYLOAD_LEN;
+ correct_payload[1] = PAYLOAD_LEN * 2;
+ correct_payload[2] = PAYLOAD_LEN * 2;
+ printf("cwr flag ends coalescing: ");
+ check_recv_pkts(rxfd, correct_payload, 3);
+ /* tcp sub-tests */
+ } else if (strcmp(testname, "tcp_csum") == 0) {
+ correct_payload[0] = PAYLOAD_LEN;
+ correct_payload[1] = PAYLOAD_LEN;
printf("changed checksum does not coalesce: ");
check_recv_pkts(rxfd, correct_payload, 2);
-
+ } else if (strcmp(testname, "tcp_seq") == 0) {
+ correct_payload[0] = PAYLOAD_LEN;
+ correct_payload[1] = PAYLOAD_LEN;
printf("Wrong Seq number doesn't coalesce: ");
check_recv_pkts(rxfd, correct_payload, 2);
-
- printf("Different timestamp doesn't coalesce: ");
+ } else if (strcmp(testname, "tcp_ts") == 0) {
correct_payload[0] = PAYLOAD_LEN * 2;
+ correct_payload[1] = PAYLOAD_LEN;
+ correct_payload[2] = PAYLOAD_LEN;
+ correct_payload[3] = PAYLOAD_LEN;
+ printf("Different timestamp doesn't coalesce: ");
check_recv_pkts(rxfd, correct_payload, 4);
-
- printf("Different options doesn't coalesce: ");
+ } else if (strcmp(testname, "tcp_opt") == 0) {
correct_payload[0] = PAYLOAD_LEN * 2;
+ correct_payload[1] = PAYLOAD_LEN;
+ printf("Different options doesn't coalesce: ");
check_recv_pkts(rxfd, correct_payload, 2);
- } else if (strcmp(testname, "ip") == 0) {
+
+ /* ip sub-tests - shared between IPv4 and IPv6 */
+ } else if (strcmp(testname, "ip_ecn") == 0) {
correct_payload[0] = PAYLOAD_LEN;
correct_payload[1] = PAYLOAD_LEN;
-
printf("different ECN doesn't coalesce: ");
check_recv_pkts(rxfd, correct_payload, 2);
-
+ } else if (strcmp(testname, "ip_tos") == 0) {
+ correct_payload[0] = PAYLOAD_LEN;
+ correct_payload[1] = PAYLOAD_LEN;
printf("different tos doesn't coalesce: ");
check_recv_pkts(rxfd, correct_payload, 2);
- if (proto == PF_INET) {
- printf("different ttl doesn't coalesce: ");
- check_recv_pkts(rxfd, correct_payload, 2);
-
- printf("ip options doesn't coalesce: ");
- correct_payload[2] = PAYLOAD_LEN;
- check_recv_pkts(rxfd, correct_payload, 3);
-
- printf("fragmented ip4 doesn't coalesce: ");
- check_recv_pkts(rxfd, correct_payload, 2);
-
- /* is_atomic checks */
- printf("DF=1, Incrementing - should coalesce: ");
- correct_payload[0] = PAYLOAD_LEN * 2;
- check_recv_pkts(rxfd, correct_payload, 1);
-
- printf("DF=1, Fixed - should coalesce: ");
- correct_payload[0] = PAYLOAD_LEN * 2;
- check_recv_pkts(rxfd, correct_payload, 1);
-
- printf("DF=0, Incrementing - should coalesce: ");
- correct_payload[0] = PAYLOAD_LEN * 2;
- check_recv_pkts(rxfd, correct_payload, 1);
-
- printf("DF=0, Fixed - should coalesce: ");
- correct_payload[0] = PAYLOAD_LEN * 2;
- check_recv_pkts(rxfd, correct_payload, 1);
-
- printf("DF=1, 2 Incrementing and one fixed - should coalesce only first 2 packets: ");
- correct_payload[0] = PAYLOAD_LEN * 2;
- correct_payload[1] = PAYLOAD_LEN;
- check_recv_pkts(rxfd, correct_payload, 2);
-
- printf("DF=1, 2 Fixed and one incrementing - should coalesce only first 2 packets: ");
- correct_payload[0] = PAYLOAD_LEN * 2;
- correct_payload[1] = PAYLOAD_LEN;
- check_recv_pkts(rxfd, correct_payload, 2);
- } else if (proto == PF_INET6) {
- /* GRO doesn't check for ipv6 hop limit when flushing.
- * Hence no corresponding test to the ipv4 case.
- */
- printf("fragmented ip6 doesn't coalesce: ");
- correct_payload[0] = PAYLOAD_LEN * 2;
- correct_payload[1] = PAYLOAD_LEN;
- correct_payload[2] = PAYLOAD_LEN;
- check_recv_pkts(rxfd, correct_payload, 3);
-
- printf("ipv6 with ext header does coalesce: ");
- correct_payload[0] = PAYLOAD_LEN * 2;
- check_recv_pkts(rxfd, correct_payload, 1);
-
- printf("ipv6 with ext header with different payloads doesn't coalesce: ");
- correct_payload[0] = PAYLOAD_LEN;
- correct_payload[1] = PAYLOAD_LEN;
- check_recv_pkts(rxfd, correct_payload, 2);
- }
- } else if (strcmp(testname, "large") == 0) {
+ /* ip sub-tests - IPv4 only */
+ } else if (strcmp(testname, "ip_ttl") == 0) {
+ correct_payload[0] = PAYLOAD_LEN;
+ correct_payload[1] = PAYLOAD_LEN;
+ printf("different ttl doesn't coalesce: ");
+ check_recv_pkts(rxfd, correct_payload, 2);
+ } else if (strcmp(testname, "ip_opt") == 0) {
+ correct_payload[0] = PAYLOAD_LEN;
+ correct_payload[1] = PAYLOAD_LEN;
+ correct_payload[2] = PAYLOAD_LEN;
+ printf("ip options doesn't coalesce: ");
+ check_recv_pkts(rxfd, correct_payload, 3);
+ } else if (strcmp(testname, "ip_frag4") == 0) {
+ correct_payload[0] = PAYLOAD_LEN;
+ correct_payload[1] = PAYLOAD_LEN;
+ printf("fragmented ip4 doesn't coalesce: ");
+ check_recv_pkts(rxfd, correct_payload, 2);
+ } else if (strcmp(testname, "ip_id_df1_inc") == 0) {
+ printf("DF=1, Incrementing - should coalesce: ");
+ correct_payload[0] = PAYLOAD_LEN * 2;
+ check_recv_pkts(rxfd, correct_payload, 1);
+ } else if (strcmp(testname, "ip_id_df1_fixed") == 0) {
+ printf("DF=1, Fixed - should coalesce: ");
+ correct_payload[0] = PAYLOAD_LEN * 2;
+ check_recv_pkts(rxfd, correct_payload, 1);
+ } else if (strcmp(testname, "ip_id_df0_inc") == 0) {
+ printf("DF=0, Incrementing - should coalesce: ");
+ correct_payload[0] = PAYLOAD_LEN * 2;
+ check_recv_pkts(rxfd, correct_payload, 1);
+ } else if (strcmp(testname, "ip_id_df0_fixed") == 0) {
+ printf("DF=0, Fixed - should coalesce: ");
+ correct_payload[0] = PAYLOAD_LEN * 2;
+ check_recv_pkts(rxfd, correct_payload, 1);
+ } else if (strcmp(testname, "ip_id_df1_inc_fixed") == 0) {
+ printf("DF=1, 2 Incrementing and one fixed - should coalesce only first 2 packets: ");
+ correct_payload[0] = PAYLOAD_LEN * 2;
+ correct_payload[1] = PAYLOAD_LEN;
+ check_recv_pkts(rxfd, correct_payload, 2);
+ } else if (strcmp(testname, "ip_id_df1_fixed_inc") == 0) {
+ printf("DF=1, 2 Fixed and one incrementing - should coalesce only first 2 packets: ");
+ correct_payload[0] = PAYLOAD_LEN * 2;
+ correct_payload[1] = PAYLOAD_LEN;
+ check_recv_pkts(rxfd, correct_payload, 2);
+
+ /* ip sub-tests - IPv6 only */
+ } else if (strcmp(testname, "ip_frag6") == 0) {
+ /* GRO doesn't check for ipv6 hop limit when flushing.
+ * Hence no corresponding test to the ipv4 case.
+ */
+ printf("fragmented ip6 doesn't coalesce: ");
+ correct_payload[0] = PAYLOAD_LEN * 2;
+ correct_payload[1] = PAYLOAD_LEN;
+ correct_payload[2] = PAYLOAD_LEN;
+ check_recv_pkts(rxfd, correct_payload, 3);
+ } else if (strcmp(testname, "ip_v6ext_same") == 0) {
+ printf("ipv6 with ext header does coalesce: ");
+ correct_payload[0] = PAYLOAD_LEN * 2;
+ check_recv_pkts(rxfd, correct_payload, 1);
+ } else if (strcmp(testname, "ip_v6ext_diff") == 0) {
+ printf("ipv6 with ext header with different payloads doesn't coalesce: ");
+ correct_payload[0] = PAYLOAD_LEN;
+ correct_payload[1] = PAYLOAD_LEN;
+ check_recv_pkts(rxfd, correct_payload, 2);
+
+ /* large sub-tests */
+ } else if (strcmp(testname, "large_max") == 0) {
int offset = (proto == PF_INET && !ipip) ? 20 : 0;
int remainder = (MAX_PAYLOAD + offset) % MSS;
@@ -1259,14 +1384,18 @@ static void gro_receiver(void)
correct_payload[1] = remainder;
printf("Shouldn't coalesce if exceed IP max pkt size: ");
check_recv_pkts(rxfd, correct_payload, 2);
+ } else if (strcmp(testname, "large_rem") == 0) {
+ int offset = (proto == PF_INET && !ipip) ? 20 : 0;
+ int remainder = (MAX_PAYLOAD + offset) % MSS;
/* last segment sent individually, doesn't start new segment */
- correct_payload[0] = correct_payload[0] - remainder;
+ correct_payload[0] = (MAX_PAYLOAD + offset) - remainder;
correct_payload[1] = remainder + 1;
correct_payload[2] = remainder + 1;
+ printf("last segment sent individually: ");
check_recv_pkts(rxfd, correct_payload, 3);
} else {
- error(1, 0, "Test case error, should never trigger");
+ error(1, 0, "Test case error: unknown testname %s", testname);
}
if (close(rxfd))
diff --git a/tools/testing/selftests/drivers/net/gro.py b/tools/testing/selftests/drivers/net/gro.py
index ba83713bf7b5..cbc1b19dbc91 100755
--- a/tools/testing/selftests/drivers/net/gro.py
+++ b/tools/testing/selftests/drivers/net/gro.py
@@ -9,18 +9,36 @@ binary in different configurations and checking for correct packet
coalescing behavior.
Test cases:
- - data: Data packets with same size/headers and correct seq numbers coalesce
+ - data_same: Same size data packets coalesce
+ - data_lrg_sml: Large packet followed by smaller one coalesces
+ - data_sml_lrg: Small packet followed by larger one doesn't coalesce
- ack: Pure ACK packets do not coalesce
- - flags: Packets with PSH, SYN, URG, RST flags do not coalesce
- - tcp: Packets with incorrect checksum, non-consecutive seqno don't coalesce
- - ip: Packets with different ECN, TTL, TOS, or IP options don't coalesce
- - large: Packets larger than GRO_MAX_SIZE don't coalesce
+ - flags_psh: Packets with PSH flag don't coalesce
+ - flags_syn: Packets with SYN flag don't coalesce
+ - flags_rst: Packets with RST flag don't coalesce
+ - flags_urg: Packets with URG flag don't coalesce
+ - flags_cwr: Packets with CWR flag don't coalesce
+ - tcp_csum: Packets with incorrect checksum don't coalesce
+ - tcp_seq: Packets with non-consecutive seqno don't coalesce
+ - tcp_ts: Packets with different timestamp options don't coalesce
+ - tcp_opt: Packets with different TCP options don't coalesce
+ - ip_ecn: Packets with different ECN don't coalesce
+ - ip_tos: Packets with different TOS don't coalesce
+ - ip_ttl: (IPv4) Packets with different TTL don't coalesce
+ - ip_opt: (IPv4) Packets with IP options don't coalesce
+ - ip_frag4: (IPv4) IPv4 fragments don't coalesce
+ - ip_id_df*: (IPv4) IP ID field coalescing tests
+ - ip_frag6: (IPv6) IPv6 fragments don't coalesce
+ - ip_v6ext_same: (IPv6) IPv6 ext header with same payload coalesces
+ - ip_v6ext_diff: (IPv6) IPv6 ext header with different payload doesn't coalesce
+ - large_max: Packets exceeding GRO_MAX_SIZE don't coalesce
+ - large_rem: Large packet remainder handling
"""
import os
from lib.py import ksft_run, ksft_exit, ksft_pr
from lib.py import NetDrvEpEnv, KsftXfailEx
-from lib.py import cmd, defer, bkg, ip
+from lib.py import bkg, cmd, defer, ethtool, ip
from lib.py import ksft_variants
@@ -70,49 +88,150 @@ def _set_mtu_restore(dev, mtu, host):
defer(ip, f"link set dev {dev['ifname']} mtu {dev['mtu']}", host=host)
-def _setup(cfg, test_name):
+def _set_ethtool_feat(dev, current, feats, host=None):
+ s2n = {True: "on", False: "off"}
+
+ new = ["-K", dev]
+ old = ["-K", dev]
+ no_change = True
+ for name, state in feats.items():
+ new += [name, s2n[state]]
+ old += [name, s2n[current[name]["active"]]]
+
+ if current[name]["active"] != state:
+ no_change = False
+ if current[name]["fixed"]:
+ raise KsftXfailEx(f"Device does not support {name}")
+ if no_change:
+ return
+
+ eth_cmd = ethtool(" ".join(new), host=host)
+ defer(ethtool, " ".join(old), host=host)
+
+ # If ethtool printed something kernel must have modified some features
+ if eth_cmd.stdout:
+ ksft_pr(eth_cmd)
+
+
+def _setup(cfg, mode, test_name):
""" Setup hardware loopback mode for GRO testing. """
if not hasattr(cfg, "bin_remote"):
cfg.bin_local = cfg.test_dir / "gro"
cfg.bin_remote = cfg.remote.deploy(cfg.bin_local)
- # "large" test needs at least 4k MTU
- if test_name == "large":
+ if not hasattr(cfg, "feat"):
+ cfg.feat = ethtool(f"-k {cfg.ifname}", json=True)[0]
+ cfg.remote_feat = ethtool(f"-k {cfg.remote_ifname}",
+ host=cfg.remote, json=True)[0]
+
+ # "large_*" tests need at least 4k MTU
+ if test_name.startswith("large_"):
_set_mtu_restore(cfg.dev, 4096, None)
_set_mtu_restore(cfg.remote_dev, 4096, cfg.remote)
- flush_path = f"/sys/class/net/{cfg.ifname}/gro_flush_timeout"
- irq_path = f"/sys/class/net/{cfg.ifname}/napi_defer_hard_irqs"
-
- _write_defer_restore(cfg, flush_path, "200000", defer_undo=True)
- _write_defer_restore(cfg, irq_path, "10", defer_undo=True)
+ if mode == "sw":
+ flush_path = f"/sys/class/net/{cfg.ifname}/gro_flush_timeout"
+ irq_path = f"/sys/class/net/{cfg.ifname}/napi_defer_hard_irqs"
+
+ _write_defer_restore(cfg, flush_path, "200000", defer_undo=True)
+ _write_defer_restore(cfg, irq_path, "10", defer_undo=True)
+
+ _set_ethtool_feat(cfg.ifname, cfg.feat,
+ {"generic-receive-offload": True,
+ "rx-gro-hw": False,
+ "large-receive-offload": False})
+ elif mode == "hw":
+ _set_ethtool_feat(cfg.ifname, cfg.feat,
+ {"generic-receive-offload": False,
+ "rx-gro-hw": True,
+ "large-receive-offload": False})
+
+ # Some NICs treat HW GRO as a GRO sub-feature so disabling GRO
+ # will also clear HW GRO. Use a hack of installing XDP generic
+ # to skip SW GRO, even when enabled.
+ feat = ethtool(f"-k {cfg.ifname}", json=True)[0]
+ if not feat["rx-gro-hw"]["active"]:
+ ksft_pr("Driver clears HW GRO and SW GRO is cleared, using generic XDP workaround")
+ prog = cfg.net_lib_dir / "xdp_dummy.bpf.o"
+ ip(f"link set dev {cfg.ifname} xdpgeneric obj {prog} sec xdp")
+ defer(ip, f"link set dev {cfg.ifname} xdpgeneric off")
+
+ # Attaching XDP may change features, fetch the latest state
+ feat = ethtool(f"-k {cfg.ifname}", json=True)[0]
+
+ _set_ethtool_feat(cfg.ifname, feat,
+ {"generic-receive-offload": True,
+ "rx-gro-hw": True,
+ "large-receive-offload": False})
+ elif mode == "lro":
+ # netdevsim advertises LRO for feature inheritance testing with
+ # bonding/team tests but it doesn't actually perform the offload
+ cfg.require_nsim(nsim_test=False)
+
+ _set_ethtool_feat(cfg.ifname, cfg.feat,
+ {"generic-receive-offload": False,
+ "rx-gro-hw": False,
+ "large-receive-offload": True})
try:
# Disable TSO for local tests
cfg.require_nsim() # will raise KsftXfailEx if not running on nsim
- cmd(f"ethtool -K {cfg.ifname} gro on tso off")
- cmd(f"ethtool -K {cfg.remote_ifname} gro on tso off", host=cfg.remote)
+ _set_ethtool_feat(cfg.remote_ifname, cfg.remote_feat,
+ {"tcp-segmentation-offload": False},
+ host=cfg.remote)
except KsftXfailEx:
pass
+
def _gro_variants():
"""Generator that yields all combinations of protocol and test types."""
- for protocol in ["ipv4", "ipv6", "ipip"]:
- for test_name in ["data", "ack", "flags", "tcp", "ip", "large"]:
- yield protocol, test_name
+ # Tests that work for all protocols
+ common_tests = [
+ "data_same", "data_lrg_sml", "data_sml_lrg",
+ "ack",
+ "flags_psh", "flags_syn", "flags_rst", "flags_urg", "flags_cwr",
+ "tcp_csum", "tcp_seq", "tcp_ts", "tcp_opt",
+ "ip_ecn", "ip_tos",
+ "large_max", "large_rem",
+ ]
+
+ # Tests specific to IPv4
+ ipv4_tests = [
+ "ip_ttl", "ip_opt", "ip_frag4",
+ "ip_id_df1_inc", "ip_id_df1_fixed",
+ "ip_id_df0_inc", "ip_id_df0_fixed",
+ "ip_id_df1_inc_fixed", "ip_id_df1_fixed_inc",
+ ]
+
+ # Tests specific to IPv6
+ ipv6_tests = [
+ "ip_frag6", "ip_v6ext_same", "ip_v6ext_diff",
+ ]
+
+ for mode in ["sw", "hw", "lro"]:
+ for protocol in ["ipv4", "ipv6", "ipip"]:
+ for test_name in common_tests:
+ yield mode, protocol, test_name
+
+ if protocol in ["ipv4", "ipip"]:
+ for test_name in ipv4_tests:
+ yield mode, protocol, test_name
+ elif protocol == "ipv6":
+ for test_name in ipv6_tests:
+ yield mode, protocol, test_name
@ksft_variants(_gro_variants())
-def test(cfg, protocol, test_name):
+def test(cfg, mode, protocol, test_name):
"""Run a single GRO test with retries."""
ipver = "6" if protocol[-1] == "6" else "4"
cfg.require_ipver(ipver)
- _setup(cfg, test_name)
+ _setup(cfg, mode, test_name)
base_cmd_args = [
f"--{protocol}",
@@ -142,10 +261,9 @@ def test(cfg, protocol, test_name):
if rx_proc.ret == 0:
return
- ksft_pr(rx_proc.stdout.strip().replace('\n', '\n# '))
- ksft_pr(rx_proc.stderr.strip().replace('\n', '\n# '))
+ ksft_pr(rx_proc)
- if test_name == "large" and os.environ.get("KSFT_MACHINE_SLOW"):
+ if test_name.startswith("large_") and os.environ.get("KSFT_MACHINE_SLOW"):
ksft_pr(f"Ignoring {protocol}/{test_name} failure due to slow environment")
return
diff --git a/tools/testing/selftests/drivers/net/hw/Makefile b/tools/testing/selftests/drivers/net/hw/Makefile
index 9c163ba6feee..a64140333a46 100644
--- a/tools/testing/selftests/drivers/net/hw/Makefile
+++ b/tools/testing/selftests/drivers/net/hw/Makefile
@@ -35,6 +35,7 @@ TEST_PROGS = \
pp_alloc_fail.py \
rss_api.py \
rss_ctx.py \
+ rss_drv.py \
rss_flow_label.py \
rss_input_xfrm.py \
toeplitz.py \
diff --git a/tools/testing/selftests/drivers/net/hw/iou-zcrx.c b/tools/testing/selftests/drivers/net/hw/iou-zcrx.c
index 62456df947bc..240d13dbc54e 100644
--- a/tools/testing/selftests/drivers/net/hw/iou-zcrx.c
+++ b/tools/testing/selftests/drivers/net/hw/iou-zcrx.c
@@ -12,6 +12,7 @@
#include <unistd.h>
#include <arpa/inet.h>
+#include <linux/mman.h>
#include <linux/errqueue.h>
#include <linux/if_packet.h>
#include <linux/ipv6.h>
@@ -37,6 +38,23 @@
#include <liburing.h>
+#define SKIP_CODE 42
+
+struct t_io_uring_zcrx_ifq_reg {
+ __u32 if_idx;
+ __u32 if_rxq;
+ __u32 rq_entries;
+ __u32 flags;
+
+ __u64 area_ptr; /* pointer to struct io_uring_zcrx_area_reg */
+ __u64 region_ptr; /* struct io_uring_region_desc * */
+
+ struct io_uring_zcrx_offsets offsets;
+ __u32 zcrx_id;
+ __u32 rx_buf_len;
+ __u64 __resv[3];
+};
+
static long page_size;
#define AREA_SIZE (8192 * page_size)
#define SEND_SIZE (512 * 4096)
@@ -65,6 +83,8 @@ static bool cfg_oneshot;
static int cfg_oneshot_recvs;
static int cfg_send_size = SEND_SIZE;
static struct sockaddr_in6 cfg_addr;
+static unsigned int cfg_rx_buf_len;
+static bool cfg_dry_run;
static char *payload;
static void *area_ptr;
@@ -128,14 +148,28 @@ static void setup_zcrx(struct io_uring *ring)
if (!ifindex)
error(1, 0, "bad interface name: %s", cfg_ifname);
- area_ptr = mmap(NULL,
- AREA_SIZE,
- PROT_READ | PROT_WRITE,
- MAP_ANONYMOUS | MAP_PRIVATE,
- 0,
- 0);
- if (area_ptr == MAP_FAILED)
- error(1, 0, "mmap(): zero copy area");
+ if (cfg_rx_buf_len && cfg_rx_buf_len != page_size) {
+ area_ptr = mmap(NULL,
+ AREA_SIZE,
+ PROT_READ | PROT_WRITE,
+ MAP_ANONYMOUS | MAP_PRIVATE |
+ MAP_HUGETLB | MAP_HUGE_2MB,
+ -1,
+ 0);
+ if (area_ptr == MAP_FAILED) {
+ printf("Can't allocate huge pages\n");
+ exit(SKIP_CODE);
+ }
+ } else {
+ area_ptr = mmap(NULL,
+ AREA_SIZE,
+ PROT_READ | PROT_WRITE,
+ MAP_ANONYMOUS | MAP_PRIVATE,
+ 0,
+ 0);
+ if (area_ptr == MAP_FAILED)
+ error(1, 0, "mmap(): zero copy area");
+ }
ring_size = get_refill_ring_size(rq_entries);
ring_ptr = mmap(NULL,
@@ -157,17 +191,23 @@ static void setup_zcrx(struct io_uring *ring)
.flags = 0,
};
- struct io_uring_zcrx_ifq_reg reg = {
+ struct t_io_uring_zcrx_ifq_reg reg = {
.if_idx = ifindex,
.if_rxq = cfg_queue_id,
.rq_entries = rq_entries,
.area_ptr = (__u64)(unsigned long)&area_reg,
.region_ptr = (__u64)(unsigned long)&region_reg,
+ .rx_buf_len = cfg_rx_buf_len,
};
- ret = io_uring_register_ifq(ring, &reg);
- if (ret)
+ ret = io_uring_register_ifq(ring, (void *)&reg);
+ if (cfg_rx_buf_len && (ret == -EINVAL || ret == -EOPNOTSUPP ||
+ ret == -ERANGE)) {
+ printf("Large chunks are not supported %i\n", ret);
+ exit(SKIP_CODE);
+ } else if (ret) {
error(1, 0, "io_uring_register_ifq(): %d", ret);
+ }
rq_ring.khead = (unsigned int *)((char *)ring_ptr + reg.offsets.head);
rq_ring.ktail = (unsigned int *)((char *)ring_ptr + reg.offsets.tail);
@@ -323,6 +363,8 @@ static void run_server(void)
io_uring_queue_init(512, &ring, flags);
setup_zcrx(&ring);
+ if (cfg_dry_run)
+ return;
add_accept(&ring, fd);
@@ -383,7 +425,7 @@ static void parse_opts(int argc, char **argv)
usage(argv[0]);
cfg_payload_len = max_payload_len;
- while ((c = getopt(argc, argv, "sch:p:l:i:q:o:z:")) != -1) {
+ while ((c = getopt(argc, argv, "sch:p:l:i:q:o:z:x:d")) != -1) {
switch (c) {
case 's':
if (cfg_client)
@@ -418,6 +460,12 @@ static void parse_opts(int argc, char **argv)
case 'z':
cfg_send_size = strtoul(optarg, NULL, 0);
break;
+ case 'x':
+ cfg_rx_buf_len = page_size * strtoul(optarg, NULL, 0);
+ break;
+ case 'd':
+ cfg_dry_run = true;
+ break;
}
}
diff --git a/tools/testing/selftests/drivers/net/hw/iou-zcrx.py b/tools/testing/selftests/drivers/net/hw/iou-zcrx.py
index 712c806508b5..c63d6d6450d2 100755
--- a/tools/testing/selftests/drivers/net/hw/iou-zcrx.py
+++ b/tools/testing/selftests/drivers/net/hw/iou-zcrx.py
@@ -3,104 +3,121 @@
import re
from os import path
-from lib.py import ksft_run, ksft_exit, KsftSkipEx
+from lib.py import ksft_run, ksft_exit, KsftSkipEx, ksft_variants, KsftNamedVariant
from lib.py import NetDrvEpEnv
from lib.py import bkg, cmd, defer, ethtool, rand_port, wait_port_listen
+from lib.py import EthtoolFamily
+SKIP_CODE = 42
-def _get_current_settings(cfg):
- output = ethtool(f"-g {cfg.ifname}", json=True)[0]
- return (output['rx'], output['hds-thresh'])
-
-
-def _get_combined_channels(cfg):
- output = ethtool(f"-l {cfg.ifname}").stdout
- values = re.findall(r'Combined:\s+(\d+)', output)
- return int(values[1])
-
-
-def _create_rss_ctx(cfg, chan):
- output = ethtool(f"-X {cfg.ifname} context new start {chan} equal 1").stdout
+def create_rss_ctx(cfg):
+ output = ethtool(f"-X {cfg.ifname} context new start {cfg.target} equal 1").stdout
values = re.search(r'New RSS context is (\d+)', output).group(1)
- ctx_id = int(values)
- return (ctx_id, defer(ethtool, f"-X {cfg.ifname} delete context {ctx_id}"))
+ return int(values)
-def _set_flow_rule(cfg, port, chan):
- output = ethtool(f"-N {cfg.ifname} flow-type tcp6 dst-port {port} action {chan}").stdout
+def set_flow_rule(cfg):
+ output = ethtool(f"-N {cfg.ifname} flow-type tcp6 dst-port {cfg.port} action {cfg.target}").stdout
values = re.search(r'ID (\d+)', output).group(1)
return int(values)
-def _set_flow_rule_rss(cfg, port, ctx_id):
- output = ethtool(f"-N {cfg.ifname} flow-type tcp6 dst-port {port} context {ctx_id}").stdout
+def set_flow_rule_rss(cfg, rss_ctx_id):
+ output = ethtool(f"-N {cfg.ifname} flow-type tcp6 dst-port {cfg.port} context {rss_ctx_id}").stdout
values = re.search(r'ID (\d+)', output).group(1)
return int(values)
-def test_zcrx(cfg) -> None:
- cfg.require_ipver('6')
-
- combined_chans = _get_combined_channels(cfg)
- if combined_chans < 2:
- raise KsftSkipEx('at least 2 combined channels required')
- (rx_ring, hds_thresh) = _get_current_settings(cfg)
- port = rand_port()
-
- ethtool(f"-G {cfg.ifname} tcp-data-split on")
- defer(ethtool, f"-G {cfg.ifname} tcp-data-split auto")
+def single(cfg):
+ channels = cfg.ethnl.channels_get({'header': {'dev-index': cfg.ifindex}})
+ channels = channels['combined-count']
+ if channels < 2:
+ raise KsftSkipEx('Test requires NETIF with at least 2 combined channels')
- ethtool(f"-G {cfg.ifname} hds-thresh 0")
- defer(ethtool, f"-G {cfg.ifname} hds-thresh {hds_thresh}")
+ rings = cfg.ethnl.rings_get({'header': {'dev-index': cfg.ifindex}})
+ rx_rings = rings['rx']
+ hds_thresh = rings.get('hds-thresh', 0)
- ethtool(f"-G {cfg.ifname} rx 64")
- defer(ethtool, f"-G {cfg.ifname} rx {rx_ring}")
+ cfg.ethnl.rings_set({'header': {'dev-index': cfg.ifindex},
+ 'tcp-data-split': 'enabled',
+ 'hds-thresh': 0,
+ 'rx': 64})
+ defer(cfg.ethnl.rings_set, {'header': {'dev-index': cfg.ifindex},
+ 'tcp-data-split': 'unknown',
+ 'hds-thresh': hds_thresh,
+ 'rx': rx_rings})
- ethtool(f"-X {cfg.ifname} equal {combined_chans - 1}")
+ cfg.target = channels - 1
+ ethtool(f"-X {cfg.ifname} equal {cfg.target}")
defer(ethtool, f"-X {cfg.ifname} default")
- flow_rule_id = _set_flow_rule(cfg, port, combined_chans - 1)
+ flow_rule_id = set_flow_rule(cfg)
defer(ethtool, f"-N {cfg.ifname} delete {flow_rule_id}")
- rx_cmd = f"{cfg.bin_local} -s -p {port} -i {cfg.ifname} -q {combined_chans - 1}"
- tx_cmd = f"{cfg.bin_remote} -c -h {cfg.addr_v['6']} -p {port} -l 12840"
- with bkg(rx_cmd, exit_wait=True):
- wait_port_listen(port, proto="tcp")
- cmd(tx_cmd, host=cfg.remote)
+def rss(cfg):
+ channels = cfg.ethnl.channels_get({'header': {'dev-index': cfg.ifindex}})
+ channels = channels['combined-count']
+ if channels < 2:
+ raise KsftSkipEx('Test requires NETIF with at least 2 combined channels')
-def test_zcrx_oneshot(cfg) -> None:
- cfg.require_ipver('6')
+ rings = cfg.ethnl.rings_get({'header': {'dev-index': cfg.ifindex}})
+ rx_rings = rings['rx']
+ hds_thresh = rings.get('hds-thresh', 0)
- combined_chans = _get_combined_channels(cfg)
- if combined_chans < 2:
- raise KsftSkipEx('at least 2 combined channels required')
- (rx_ring, hds_thresh) = _get_current_settings(cfg)
- port = rand_port()
+ cfg.ethnl.rings_set({'header': {'dev-index': cfg.ifindex},
+ 'tcp-data-split': 'enabled',
+ 'hds-thresh': 0,
+ 'rx': 64})
+ defer(cfg.ethnl.rings_set, {'header': {'dev-index': cfg.ifindex},
+ 'tcp-data-split': 'unknown',
+ 'hds-thresh': hds_thresh,
+ 'rx': rx_rings})
- ethtool(f"-G {cfg.ifname} tcp-data-split on")
- defer(ethtool, f"-G {cfg.ifname} tcp-data-split auto")
+ cfg.target = channels - 1
+ ethtool(f"-X {cfg.ifname} equal {cfg.target}")
+ defer(ethtool, f"-X {cfg.ifname} default")
- ethtool(f"-G {cfg.ifname} hds-thresh 0")
- defer(ethtool, f"-G {cfg.ifname} hds-thresh {hds_thresh}")
+ rss_ctx_id = create_rss_ctx(cfg)
+ defer(ethtool, f"-X {cfg.ifname} delete context {rss_ctx_id}")
- ethtool(f"-G {cfg.ifname} rx 64")
- defer(ethtool, f"-G {cfg.ifname} rx {rx_ring}")
+ flow_rule_id = set_flow_rule_rss(cfg, rss_ctx_id)
+ defer(ethtool, f"-N {cfg.ifname} delete {flow_rule_id}")
- ethtool(f"-X {cfg.ifname} equal {combined_chans - 1}")
- defer(ethtool, f"-X {cfg.ifname} default")
- flow_rule_id = _set_flow_rule(cfg, port, combined_chans - 1)
- defer(ethtool, f"-N {cfg.ifname} delete {flow_rule_id}")
+@ksft_variants([
+ KsftNamedVariant("single", single),
+ KsftNamedVariant("rss", rss),
+])
+def test_zcrx(cfg, setup) -> None:
+ cfg.require_ipver('6')
- rx_cmd = f"{cfg.bin_local} -s -p {port} -i {cfg.ifname} -q {combined_chans - 1} -o 4"
- tx_cmd = f"{cfg.bin_remote} -c -h {cfg.addr_v['6']} -p {port} -l 4096 -z 16384"
+ setup(cfg)
+ rx_cmd = f"{cfg.bin_local} -s -p {cfg.port} -i {cfg.ifname} -q {cfg.target}"
+ tx_cmd = f"{cfg.bin_remote} -c -h {cfg.addr_v['6']} -p {cfg.port} -l 12840"
with bkg(rx_cmd, exit_wait=True):
- wait_port_listen(port, proto="tcp")
+ wait_port_listen(cfg.port, proto="tcp")
cmd(tx_cmd, host=cfg.remote)
-def test_zcrx_rss(cfg) -> None:
+@ksft_variants([
+ KsftNamedVariant("single", single),
+ KsftNamedVariant("rss", rss),
+])
+def test_zcrx_oneshot(cfg, setup) -> None:
+ cfg.require_ipver('6')
+
+ setup(cfg)
+ rx_cmd = f"{cfg.bin_local} -s -p {cfg.port} -i {cfg.ifname} -q {cfg.target} -o 4"
+ tx_cmd = f"{cfg.bin_remote} -c -h {cfg.addr_v['6']} -p {cfg.port} -l 4096 -z 16384"
+ with bkg(rx_cmd, exit_wait=True):
+ wait_port_listen(cfg.port, proto="tcp")
+ cmd(tx_cmd, host=cfg.remote)
+
+
+def test_zcrx_large_chunks(cfg) -> None:
+ """Test zcrx with large buffer chunks."""
+
cfg.require_ipver('6')
combined_chans = _get_combined_channels(cfg)
@@ -121,12 +138,16 @@ def test_zcrx_rss(cfg) -> None:
ethtool(f"-X {cfg.ifname} equal {combined_chans - 1}")
defer(ethtool, f"-X {cfg.ifname} default")
- (ctx_id, delete_ctx) = _create_rss_ctx(cfg, combined_chans - 1)
- flow_rule_id = _set_flow_rule_rss(cfg, port, ctx_id)
+ flow_rule_id = _set_flow_rule(cfg, port, combined_chans - 1)
defer(ethtool, f"-N {cfg.ifname} delete {flow_rule_id}")
- rx_cmd = f"{cfg.bin_local} -s -p {port} -i {cfg.ifname} -q {combined_chans - 1}"
+ rx_cmd = f"{cfg.bin_local} -s -p {port} -i {cfg.ifname} -q {combined_chans - 1} -x 2"
tx_cmd = f"{cfg.bin_remote} -c -h {cfg.addr_v['6']} -p {port} -l 12840"
+
+ probe = cmd(rx_cmd + " -d", fail=False)
+ if probe.ret == SKIP_CODE:
+ raise KsftSkipEx(probe.stdout)
+
with bkg(rx_cmd, exit_wait=True):
wait_port_listen(port, proto="tcp")
cmd(tx_cmd, host=cfg.remote)
@@ -137,7 +158,9 @@ def main() -> None:
cfg.bin_local = path.abspath(path.dirname(__file__) + "/../../../drivers/net/hw/iou-zcrx")
cfg.bin_remote = cfg.remote.deploy(cfg.bin_local)
- ksft_run(globs=globals(), case_pfx={"test_"}, args=(cfg, ))
+ cfg.ethnl = EthtoolFamily()
+ cfg.port = rand_port()
+ ksft_run(globs=globals(), cases=[test_zcrx, test_zcrx_oneshot], args=(cfg, ))
ksft_exit()
diff --git a/tools/testing/selftests/drivers/net/hw/ncdevmem.c b/tools/testing/selftests/drivers/net/hw/ncdevmem.c
index 3288ed04ce08..16864c844108 100644
--- a/tools/testing/selftests/drivers/net/hw/ncdevmem.c
+++ b/tools/testing/selftests/drivers/net/hw/ncdevmem.c
@@ -48,6 +48,7 @@
#include <errno.h>
#define __iovec_defined
#include <fcntl.h>
+#include <limits.h>
#include <malloc.h>
#include <error.h>
#include <poll.h>
diff --git a/tools/testing/selftests/drivers/net/hw/nic_timestamp.py b/tools/testing/selftests/drivers/net/hw/nic_timestamp.py
index c1e943d53f19..c632b41e7a23 100755
--- a/tools/testing/selftests/drivers/net/hw/nic_timestamp.py
+++ b/tools/testing/selftests/drivers/net/hw/nic_timestamp.py
@@ -1,15 +1,38 @@
#!/usr/bin/env python3
# SPDX-License-Identifier: GPL-2.0
+# pylint: disable=locally-disabled, invalid-name, attribute-defined-outside-init, too-few-public-methods
"""
Tests related to configuration of HW timestamping
"""
import errno
+import ctypes
+import fcntl
+import socket
from lib.py import ksft_run, ksft_exit, ksft_ge, ksft_eq, KsftSkipEx
from lib.py import NetDrvEnv, EthtoolFamily, NlError
+SIOCSHWTSTAMP = 0x89b0
+SIOCGHWTSTAMP = 0x89b1
+class hwtstamp_config(ctypes.Structure):
+ """ Python copy of struct hwtstamp_config """
+ _fields_ = [
+ ("flags", ctypes.c_int),
+ ("tx_type", ctypes.c_int),
+ ("rx_filter", ctypes.c_int),
+ ]
+
+
+class ifreq(ctypes.Structure):
+ """ Python copy of struct ifreq """
+ _fields_ = [
+ ("ifr_name", ctypes.c_char * 16),
+ ("ifr_data", ctypes.POINTER(hwtstamp_config)),
+ ]
+
+
def __get_hwtimestamp_support(cfg):
""" Retrieve supported configuration information """
@@ -31,8 +54,29 @@ def __get_hwtimestamp_support(cfg):
return ctx
+def __get_hwtimestamp_config_ioctl(cfg):
+ """ Retrieve current TS configuration information (via ioctl) """
+
+ config = hwtstamp_config()
+
+ req = ifreq()
+ req.ifr_name = cfg.ifname.encode()
+ req.ifr_data = ctypes.pointer(config)
+
+ try:
+ sock = socket.socket(socket.AF_INET, socket.SOCK_DGRAM)
+ fcntl.ioctl(sock.fileno(), SIOCGHWTSTAMP, req)
+ sock.close()
+
+ except OSError as e:
+ if e.errno == errno.EOPNOTSUPP:
+ raise KsftSkipEx("timestamping configuration is not supported via ioctl") from e
+ raise
+ return config
+
+
def __get_hwtimestamp_config(cfg):
- """ Retrieve current TS configuration information """
+ """ Retrieve current TS configuration information (via netLink) """
try:
tscfg = cfg.ethnl.tsconfig_get({'header': {'dev-name': cfg.ifname}})
@@ -43,8 +87,27 @@ def __get_hwtimestamp_config(cfg):
return tscfg
+def __set_hwtimestamp_config_ioctl(cfg, ts):
+ """ Setup new TS configuration information (via ioctl) """
+ config = hwtstamp_config()
+ config.rx_filter = ts['rx-filters']['bits']['bit'][0]['index']
+ config.tx_type = ts['tx-types']['bits']['bit'][0]['index']
+ req = ifreq()
+ req.ifr_name = cfg.ifname.encode()
+ req.ifr_data = ctypes.pointer(config)
+ try:
+ sock = socket.socket(socket.AF_INET, socket.SOCK_DGRAM)
+ fcntl.ioctl(sock.fileno(), SIOCSHWTSTAMP, req)
+ sock.close()
+
+ except OSError as e:
+ if e.errno == errno.EOPNOTSUPP:
+ raise KsftSkipEx("timestamping configuration is not supported via ioctl") from e
+ raise
+
+
def __set_hwtimestamp_config(cfg, ts):
- """ Setup new TS configuration information """
+ """ Setup new TS configuration information (via netlink) """
ts['header'] = {'dev-name': cfg.ifname}
try:
@@ -56,9 +119,9 @@ def __set_hwtimestamp_config(cfg, ts):
return res
-def test_hwtstamp_tx(cfg):
+def __perform_hwtstamp_tx(cfg, is_ioctl):
"""
- Test TX timestamp configuration.
+ Test TX timestamp configuration via either netlink or ioctl.
The driver should apply provided config and report back proper state.
"""
@@ -66,16 +129,37 @@ def test_hwtstamp_tx(cfg):
ts = __get_hwtimestamp_support(cfg)
tx = ts['tx']
for t in tx:
+ res = None
tscfg = orig_tscfg
tscfg['tx-types']['bits']['bit'] = [t]
- res = __set_hwtimestamp_config(cfg, tscfg)
+ if is_ioctl:
+ __set_hwtimestamp_config_ioctl(cfg, tscfg)
+ else:
+ res = __set_hwtimestamp_config(cfg, tscfg)
if res is None:
res = __get_hwtimestamp_config(cfg)
+ resioctl = __get_hwtimestamp_config_ioctl(cfg)
ksft_eq(res['tx-types']['bits']['bit'], [t])
+ ksft_eq(resioctl.tx_type, t['index'])
__set_hwtimestamp_config(cfg, orig_tscfg)
+def test_hwtstamp_tx_netlink(cfg):
+ """
+ Test TX timestamp configuration setup via netlink.
+ The driver should apply provided config and report back proper state.
+ """
+ __perform_hwtstamp_tx(cfg, False)
+
+
+def test_hwtstamp_tx_ioctl(cfg):
+ """
+ Test TX timestamp configuration setup via ioctl.
+ The driver should apply provided config and report back proper state.
+ """
+ __perform_hwtstamp_tx(cfg, True)
+
-def test_hwtstamp_rx(cfg):
+def __perform_hwtstamp_rx(cfg, is_ioctl):
"""
Test RX timestamp configuration.
The filter configuration is taken from the list of supported filters.
@@ -87,11 +171,17 @@ def test_hwtstamp_rx(cfg):
ts = __get_hwtimestamp_support(cfg)
rx = ts['rx']
for r in rx:
+ res = None
tscfg = orig_tscfg
tscfg['rx-filters']['bits']['bit'] = [r]
- res = __set_hwtimestamp_config(cfg, tscfg)
+ if is_ioctl:
+ __set_hwtimestamp_config_ioctl(cfg, tscfg)
+ else:
+ res = __set_hwtimestamp_config(cfg, tscfg)
if res is None:
res = __get_hwtimestamp_config(cfg)
+ resioctl = __get_hwtimestamp_config_ioctl(cfg)
+ ksft_eq(resioctl.rx_filter, res['rx-filters']['bits']['bit'][0]['index'])
if r['index'] == 0 or r['index'] == 1:
ksft_eq(res['rx-filters']['bits']['bit'][0]['index'], r['index'])
else:
@@ -100,12 +190,34 @@ def test_hwtstamp_rx(cfg):
__set_hwtimestamp_config(cfg, orig_tscfg)
+def test_hwtstamp_rx_netlink(cfg):
+ """
+ Test RX timestamp configuration via netlink.
+ The filter configuration is taken from the list of supported filters.
+ The driver should apply the config without error and report back proper state.
+ Some extension of the timestamping scope is allowed for PTP filters.
+ """
+ __perform_hwtstamp_rx(cfg, False)
+
+
+def test_hwtstamp_rx_ioctl(cfg):
+ """
+ Test RX timestamp configuration via ioctl.
+ The filter configuration is taken from the list of supported filters.
+ The driver should apply the config without error and report back proper state.
+ Some extension of the timestamping scope is allowed for PTP filters.
+ """
+ __perform_hwtstamp_rx(cfg, True)
+
+
def main() -> None:
""" Ksft boiler plate main """
with NetDrvEnv(__file__, nsim_test=False) as cfg:
cfg.ethnl = EthtoolFamily()
- ksft_run([test_hwtstamp_tx, test_hwtstamp_rx], args=(cfg,))
+ ksft_run([test_hwtstamp_tx_ioctl, test_hwtstamp_tx_netlink,
+ test_hwtstamp_rx_ioctl, test_hwtstamp_rx_netlink],
+ args=(cfg,))
ksft_exit()
diff --git a/tools/testing/selftests/drivers/net/hw/rss_drv.py b/tools/testing/selftests/drivers/net/hw/rss_drv.py
new file mode 100755
index 000000000000..2d1a33189076
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/hw/rss_drv.py
@@ -0,0 +1,88 @@
+#!/usr/bin/env python3
+# SPDX-License-Identifier: GPL-2.0
+
+"""
+Driver-related behavior tests for RSS.
+"""
+
+from lib.py import ksft_run, ksft_exit, ksft_ge
+from lib.py import ksft_variants, KsftNamedVariant, KsftSkipEx
+from lib.py import defer, ethtool
+from lib.py import EthtoolFamily, NlError
+from lib.py import NetDrvEnv
+
+
+def _is_power_of_two(n):
+ return n > 0 and (n & (n - 1)) == 0
+
+
+def _get_rss(cfg, context=0):
+ return ethtool(f"-x {cfg.ifname} context {context}", json=True)[0]
+
+
+def _test_rss_indir_size(cfg, qcnt, context=0):
+ """Test that indirection table size is at least 4x queue count."""
+ ethtool(f"-L {cfg.ifname} combined {qcnt}")
+
+ rss = _get_rss(cfg, context=context)
+ indir = rss['rss-indirection-table']
+ ksft_ge(len(indir), 4 * qcnt, "Table smaller than 4x")
+ return len(indir)
+
+
+def _maybe_create_context(cfg, create_context):
+ """ Either create a context and return its ID or return 0 for main ctx """
+ if not create_context:
+ return 0
+ try:
+ ctx = cfg.ethnl.rss_create_act({'header': {'dev-index': cfg.ifindex}})
+ ctx_id = ctx['context']
+ defer(cfg.ethnl.rss_delete_act,
+ {'header': {'dev-index': cfg.ifindex}, 'context': ctx_id})
+ except NlError:
+ raise KsftSkipEx("Device does not support additional RSS contexts")
+
+ return ctx_id
+
+
+@ksft_variants([
+ KsftNamedVariant("main", False),
+ KsftNamedVariant("ctx", True),
+])
+def indir_size_4x(cfg, create_context):
+ """
+ Test that the indirection table has at least 4 entries per queue.
+ Empirically network-heavy workloads like memcache suffer with the 33%
+ imbalance of a 2x indirection table size.
+ 4x table translates to a 16% imbalance.
+ """
+ channels = cfg.ethnl.channels_get({'header': {'dev-index': cfg.ifindex}})
+ ch_max = channels.get('combined-max', 0)
+ qcnt = channels['combined-count']
+
+ if ch_max < 3:
+ raise KsftSkipEx(f"Not enough queues for the test: max={ch_max}")
+
+ defer(ethtool, f"-L {cfg.ifname} combined {qcnt}")
+ ethtool(f"-L {cfg.ifname} combined 3")
+
+ ctx_id = _maybe_create_context(cfg, create_context)
+
+ indir_sz = _test_rss_indir_size(cfg, 3, context=ctx_id)
+
+ # Test with max queue count (max - 1 if max is a power of two)
+ test_max = ch_max - 1 if _is_power_of_two(ch_max) else ch_max
+ if test_max > 3 and indir_sz < test_max * 4:
+ _test_rss_indir_size(cfg, test_max, context=ctx_id)
+
+
+def main() -> None:
+ """ Ksft boiler plate main """
+ with NetDrvEnv(__file__) as cfg:
+ cfg.ethnl = EthtoolFamily()
+ ksft_run([indir_size_4x], args=(cfg, ))
+ ksft_exit()
+
+
+if __name__ == "__main__":
+ main()
diff --git a/tools/testing/selftests/drivers/net/hw/rss_flow_label.py b/tools/testing/selftests/drivers/net/hw/rss_flow_label.py
index 6fa95fe27c47..7dc80070884a 100755
--- a/tools/testing/selftests/drivers/net/hw/rss_flow_label.py
+++ b/tools/testing/selftests/drivers/net/hw/rss_flow_label.py
@@ -145,9 +145,14 @@ def test_rss_flow_label_6only(cfg):
# Try to enable Flow Labels and check again, in case it leaks thru
initial = _ethtool_get_cfg(cfg, "udp6")
- changed = initial.replace("l", "") if "l" in initial else initial + "l"
-
- cmd(f"ethtool -N {cfg.ifname} rx-flow-hash udp6 {changed}")
+ no_lbl = initial.replace("l", "")
+ if "l" not in initial:
+ try:
+ cmd(f"ethtool -N {cfg.ifname} rx-flow-hash udp6 l{no_lbl}")
+ except CmdExitFailure as exc:
+ raise KsftSkipEx("Device doesn't support Flow Label for UDP6") from exc
+ else:
+ cmd(f"ethtool -N {cfg.ifname} rx-flow-hash udp6 {no_lbl}")
restore = defer(cmd, f"ethtool -N {cfg.ifname} rx-flow-hash udp6 {initial}")
_check_v4_flow_types(cfg)
diff --git a/tools/testing/selftests/drivers/net/hw/rss_input_xfrm.py b/tools/testing/selftests/drivers/net/hw/rss_input_xfrm.py
index 72880e388478..503f1a2a2872 100755
--- a/tools/testing/selftests/drivers/net/hw/rss_input_xfrm.py
+++ b/tools/testing/selftests/drivers/net/hw/rss_input_xfrm.py
@@ -5,9 +5,9 @@ import multiprocessing
import socket
from lib.py import ksft_run, ksft_exit, ksft_eq, ksft_ge, cmd, fd_read_timeout
from lib.py import NetDrvEpEnv
-from lib.py import EthtoolFamily, NetdevFamily
+from lib.py import EthtoolFamily, NetdevFamily, NlError
from lib.py import KsftSkipEx, KsftFailEx
-from lib.py import rand_port
+from lib.py import defer, ksft_pr, rand_port
def traffic(cfg, local_port, remote_port, ipver):
@@ -21,6 +21,40 @@ def traffic(cfg, local_port, remote_port, ipver):
return sock.getsockopt(socket.SOL_SOCKET, socket.SO_INCOMING_CPU)
+def _rss_input_xfrm_try_enable(cfg):
+ """
+ Check if symmetric input-xfrm is already enabled, if not try to enable it
+ and register a cleanup.
+ """
+ rss = cfg.ethnl.rss_get({'header': {'dev-name': cfg.ifname}})
+ orig_xfrm = rss.get('input-xfrm', set())
+ sym_xfrm = set(filter(lambda x: 'sym' in x, orig_xfrm))
+
+ if sym_xfrm:
+ ksft_pr("Sym input xfrm already enabled:", sym_xfrm)
+ return sym_xfrm
+
+ for xfrm in cfg.ethnl.consts["input-xfrm"].entries:
+ # Skip non-symmetric transforms
+ if "sym" not in xfrm:
+ continue
+
+ try_xfrm = {xfrm} | orig_xfrm
+ try:
+ cfg.ethnl.rss_set({"header": {"dev-index": cfg.ifindex},
+ "input-xfrm": try_xfrm})
+ except NlError:
+ continue
+
+ ksft_pr("Sym input xfrm configured:", try_xfrm)
+ defer(cfg.ethnl.rss_set,
+ {"header": {"dev-index": cfg.ifindex},
+ "input-xfrm": orig_xfrm})
+ return {xfrm}
+
+ return set()
+
+
def test_rss_input_xfrm(cfg, ipver):
"""
Test symmetric input_xfrm.
@@ -37,12 +71,10 @@ def test_rss_input_xfrm(cfg, ipver):
if not hasattr(socket, "SO_INCOMING_CPU"):
raise KsftSkipEx("socket.SO_INCOMING_CPU was added in Python 3.11")
- rss = cfg.ethnl.rss_get({'header': {'dev-name': cfg.ifname}})
- input_xfrm = set(filter(lambda x: 'sym' in x, rss.get('input-xfrm', {})))
-
# Check for symmetric xor/or-xor
+ input_xfrm = _rss_input_xfrm_try_enable(cfg)
if not input_xfrm:
- raise KsftSkipEx("Symmetric RSS hash not requested")
+ raise KsftSkipEx("Symmetric RSS hash not supported by device")
cpus = set()
successful = 0
diff --git a/tools/testing/selftests/drivers/net/hw/toeplitz.c b/tools/testing/selftests/drivers/net/hw/toeplitz.c
index 285bb17df9c2..035bf908d8d9 100644
--- a/tools/testing/selftests/drivers/net/hw/toeplitz.c
+++ b/tools/testing/selftests/drivers/net/hw/toeplitz.c
@@ -59,7 +59,7 @@
#include "../../../net/lib/ksft.h"
#define TOEPLITZ_KEY_MIN_LEN 40
-#define TOEPLITZ_KEY_MAX_LEN 60
+#define TOEPLITZ_KEY_MAX_LEN 256
#define TOEPLITZ_STR_LEN(K) (((K) * 3) - 1) /* hex encoded: AA:BB:CC:...:ZZ */
#define TOEPLITZ_STR_MIN_LEN TOEPLITZ_STR_LEN(TOEPLITZ_KEY_MIN_LEN)
@@ -72,6 +72,8 @@
#define RPS_MAX_CPUS 16UL /* must be a power of 2 */
+#define MIN_PKT_SAMPLES 40 /* minimum number of packets to receive */
+
/* configuration options (cmdline arguments) */
static uint16_t cfg_dport = 8000;
static int cfg_family = AF_INET6;
@@ -251,15 +253,31 @@ static bool recv_block(struct ring_state *ring)
return true;
}
-/* simple test: sleep once unconditionally and then process all rings */
+/* simple test: process all rings until MIN_PKT_SAMPLES packets are received,
+ * or the test times out.
+ */
static void process_rings(void)
{
+ struct timeval start, now;
+ bool pkts_found = true;
+ long elapsed_usec;
int i;
- usleep(1000 * cfg_timeout_msec);
+ gettimeofday(&start, NULL);
- for (i = 0; i < num_cpus; i++)
- do {} while (recv_block(&rings[i]));
+ do {
+ if (!pkts_found)
+ usleep(100);
+
+ pkts_found = false;
+ for (i = 0; i < num_cpus; i++)
+ pkts_found |= recv_block(&rings[i]);
+
+ gettimeofday(&now, NULL);
+ elapsed_usec = (now.tv_sec - start.tv_sec) * 1000000 +
+ (now.tv_usec - start.tv_usec);
+ } while (frames_received - frames_nohash < MIN_PKT_SAMPLES &&
+ elapsed_usec < cfg_timeout_msec * 1000);
fprintf(stderr, "count: pass=%u nohash=%u fail=%u\n",
frames_received - frames_nohash - frames_error,
diff --git a/tools/testing/selftests/drivers/net/lib/py/env.py b/tools/testing/selftests/drivers/net/lib/py/env.py
index 8b644fd84ff2..41cc248ac848 100644
--- a/tools/testing/selftests/drivers/net/lib/py/env.py
+++ b/tools/testing/selftests/drivers/net/lib/py/env.py
@@ -170,6 +170,7 @@ class NetDrvEpEnv(NetDrvEnvBase):
self.remote_ifname = self.resolve_remote_ifc()
self.remote_dev = ip("-d link show dev " + self.remote_ifname,
host=self.remote, json=True)[0]
+ self.remote_ifindex = self.remote_dev['ifindex']
self._required_cmd = {}
@@ -247,9 +248,12 @@ class NetDrvEpEnv(NetDrvEnvBase):
if not self.addr_v[ipver] or not self.remote_addr_v[ipver]:
raise KsftSkipEx(f"Test requires IPv{ipver} connectivity")
- def require_nsim(self):
- if self._ns is None:
+ def require_nsim(self, nsim_test=True):
+ """Require or exclude netdevsim for this test"""
+ if nsim_test and self._ns is None:
raise KsftXfailEx("Test only works on netdevsim")
+ if nsim_test is False and self._ns is not None:
+ raise KsftXfailEx("Test does not work on netdevsim")
def _require_cmd(self, comm, key, host=None):
cached = self._required_cmd.get(comm, {})
diff --git a/tools/testing/selftests/drivers/net/lib/sh/lib_netcons.sh b/tools/testing/selftests/drivers/net/lib/sh/lib_netcons.sh
index ae8abff4be40..b6093bcf2b06 100644
--- a/tools/testing/selftests/drivers/net/lib/sh/lib_netcons.sh
+++ b/tools/testing/selftests/drivers/net/lib/sh/lib_netcons.sh
@@ -203,19 +203,21 @@ function do_cleanup() {
function cleanup_netcons() {
# delete netconsole dynamic reconfiguration
# do not fail if the target is already disabled
- if [[ ! -d "${NETCONS_PATH}" ]]
+ local TARGET_PATH=${1:-${NETCONS_PATH}}
+
+ if [[ ! -d "${TARGET_PATH}" ]]
then
# in some cases this is called before netcons path is created
return
fi
- if [[ $(cat "${NETCONS_PATH}"/enabled) != 0 ]]
+ if [[ $(cat "${TARGET_PATH}"/enabled) != 0 ]]
then
- echo 0 > "${NETCONS_PATH}"/enabled || true
+ echo 0 > "${TARGET_PATH}"/enabled || true
fi
# Remove all the keys that got created during the selftest
- find "${NETCONS_PATH}/userdata/" -mindepth 1 -type d -delete
+ find "${TARGET_PATH}/userdata/" -mindepth 1 -type d -delete
# Remove the configfs entry
- rmdir "${NETCONS_PATH}"
+ rmdir "${TARGET_PATH}"
}
function cleanup() {
@@ -377,6 +379,29 @@ function check_netconsole_module() {
fi
}
+function wait_target_state() {
+ local TARGET=${1}
+ local STATE=${2}
+ local TARGET_PATH="${NETCONS_CONFIGFS}"/"${TARGET}"
+ local ENABLED=0
+
+ if [ "${STATE}" == "enabled" ]
+ then
+ ENABLED=1
+ fi
+
+ if [ ! -d "$TARGET_PATH" ]; then
+ echo "FAIL: Target does not exist." >&2
+ exit "${ksft_fail}"
+ fi
+
+ local CHECK_CMD="grep \"$ENABLED\" \"$TARGET_PATH/enabled\""
+ slowwait 2 sh -c "test -n \"\$($CHECK_CMD)\"" || {
+ echo "FAIL: ${TARGET} is not ${STATE}." >&2
+ exit "${ksft_fail}"
+ }
+}
+
# A wrapper to translate protocol version to udp version
function wait_for_port() {
local NAMESPACE=${1}
diff --git a/tools/testing/selftests/drivers/net/netconsole/Makefile b/tools/testing/selftests/drivers/net/netconsole/Makefile
new file mode 100644
index 000000000000..b56c70b7e274
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/netconsole/Makefile
@@ -0,0 +1,19 @@
+# SPDX-License-Identifier: GPL-2.0
+
+TEST_INCLUDES := \
+ ../../../net/lib.sh \
+ ../lib/sh/lib_netcons.sh \
+# end of TEST_INCLUDES
+
+TEST_PROGS := \
+ netcons_basic.sh \
+ netcons_cmdline.sh \
+ netcons_fragmented_msg.sh \
+ netcons_overflow.sh \
+ netcons_resume.sh \
+ netcons_sysdata.sh \
+ netcons_torture.sh \
+# end of TEST_PROGS
+
+include ../../../lib.mk
+
diff --git a/tools/testing/selftests/drivers/net/netconsole/config b/tools/testing/selftests/drivers/net/netconsole/config
new file mode 100644
index 000000000000..a3f6b0fd44ef
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/netconsole/config
@@ -0,0 +1,6 @@
+CONFIG_CONFIGFS_FS=y
+CONFIG_IPV6=y
+CONFIG_NETCONSOLE=m
+CONFIG_NETCONSOLE_DYNAMIC=y
+CONFIG_NETCONSOLE_EXTENDED_LOG=y
+CONFIG_NETDEVSIM=m
diff --git a/tools/testing/selftests/drivers/net/netcons_basic.sh b/tools/testing/selftests/drivers/net/netconsole/netcons_basic.sh
index 2022f3061738..59cf10013ecd 100755
--- a/tools/testing/selftests/drivers/net/netcons_basic.sh
+++ b/tools/testing/selftests/drivers/net/netconsole/netcons_basic.sh
@@ -18,7 +18,7 @@ set -euo pipefail
SCRIPTDIR=$(dirname "$(readlink -e "${BASH_SOURCE[0]}")")
-source "${SCRIPTDIR}"/lib/sh/lib_netcons.sh
+source "${SCRIPTDIR}"/../lib/sh/lib_netcons.sh
modprobe netdevsim 2> /dev/null || true
modprobe netconsole 2> /dev/null || true
diff --git a/tools/testing/selftests/drivers/net/netcons_cmdline.sh b/tools/testing/selftests/drivers/net/netconsole/netcons_cmdline.sh
index d1d23dc67f99..96d704b8d9d9 100755
--- a/tools/testing/selftests/drivers/net/netcons_cmdline.sh
+++ b/tools/testing/selftests/drivers/net/netconsole/netcons_cmdline.sh
@@ -12,7 +12,7 @@ set -euo pipefail
SCRIPTDIR=$(dirname "$(readlink -e "${BASH_SOURCE[0]}")")
-source "${SCRIPTDIR}"/lib/sh/lib_netcons.sh
+source "${SCRIPTDIR}"/../lib/sh/lib_netcons.sh
check_netconsole_module
diff --git a/tools/testing/selftests/drivers/net/netcons_fragmented_msg.sh b/tools/testing/selftests/drivers/net/netconsole/netcons_fragmented_msg.sh
index 4a71e01a230c..0dc7280c3080 100755
--- a/tools/testing/selftests/drivers/net/netcons_fragmented_msg.sh
+++ b/tools/testing/selftests/drivers/net/netconsole/netcons_fragmented_msg.sh
@@ -16,7 +16,7 @@ set -euo pipefail
SCRIPTDIR=$(dirname "$(readlink -e "${BASH_SOURCE[0]}")")
-source "${SCRIPTDIR}"/lib/sh/lib_netcons.sh
+source "${SCRIPTDIR}"/../lib/sh/lib_netcons.sh
modprobe netdevsim 2> /dev/null || true
modprobe netconsole 2> /dev/null || true
diff --git a/tools/testing/selftests/drivers/net/netcons_overflow.sh b/tools/testing/selftests/drivers/net/netconsole/netcons_overflow.sh
index 06089643b771..a8e43d08c166 100755
--- a/tools/testing/selftests/drivers/net/netcons_overflow.sh
+++ b/tools/testing/selftests/drivers/net/netconsole/netcons_overflow.sh
@@ -13,7 +13,7 @@ set -euo pipefail
SCRIPTDIR=$(dirname "$(readlink -e "${BASH_SOURCE[0]}")")
-source "${SCRIPTDIR}"/lib/sh/lib_netcons.sh
+source "${SCRIPTDIR}"/../lib/sh/lib_netcons.sh
# This is coming from netconsole code. Check for it in drivers/net/netconsole.c
MAX_USERDATA_ITEMS=256
diff --git a/tools/testing/selftests/drivers/net/netconsole/netcons_resume.sh b/tools/testing/selftests/drivers/net/netconsole/netcons_resume.sh
new file mode 100755
index 000000000000..cb59cf436dd0
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/netconsole/netcons_resume.sh
@@ -0,0 +1,124 @@
+#!/usr/bin/env bash
+# SPDX-License-Identifier: GPL-2.0
+
+# This test validates that netconsole is able to resume a target that was
+# deactivated when its interface was removed when the interface is brought
+# back up.
+#
+# The test configures a netconsole target and then removes netdevsim module to
+# cause the interface to disappear. Targets are configured via cmdline to ensure
+# targets bound by interface name and mac address can be resumed.
+# The test verifies that the target moved to disabled state before adding
+# netdevsim and the interface back.
+#
+# Finally, the test verifies that the target is re-enabled automatically and
+# the message is received on the destination interface.
+#
+# Author: Andre Carvalho <asantostc@gmail.com>
+
+set -euo pipefail
+
+SCRIPTDIR=$(dirname "$(readlink -e "${BASH_SOURCE[0]}")")
+
+source "${SCRIPTDIR}"/../lib/sh/lib_netcons.sh
+
+SAVED_SRCMAC="" # to be populated later
+SAVED_DSTMAC="" # to be populated later
+
+modprobe netdevsim 2> /dev/null || true
+rmmod netconsole 2> /dev/null || true
+
+check_netconsole_module
+
+function cleanup() {
+ cleanup_netcons "${NETCONS_CONFIGFS}/cmdline0"
+ do_cleanup
+ rmmod netconsole
+}
+
+function trigger_reactivation() {
+ # Add back low level module
+ modprobe netdevsim
+ # Recreate namespace and two interfaces
+ set_network
+ # Restore MACs
+ ip netns exec "${NAMESPACE}" ip link set "${DSTIF}" \
+ address "${SAVED_DSTMAC}"
+ if [ "${BINDMODE}" == "mac" ]; then
+ ip link set dev "${SRCIF}" down
+ ip link set dev "${SRCIF}" address "${SAVED_SRCMAC}"
+ # Rename device in order to trigger target resume, as initial
+ # when device was recreated it didn't have correct mac address.
+ ip link set dev "${SRCIF}" name "${TARGET}"
+ fi
+}
+
+function trigger_deactivation() {
+ # Start by storing mac addresses so we can be restored in reactivate
+ SAVED_DSTMAC=$(ip netns exec "${NAMESPACE}" \
+ cat /sys/class/net/"$DSTIF"/address)
+ SAVED_SRCMAC=$(mac_get "${SRCIF}")
+ # Remove low level module
+ rmmod netdevsim
+}
+
+trap cleanup EXIT
+
+# Run the test twice, with different cmdline parameters
+for BINDMODE in "ifname" "mac"
+do
+ echo "Running with bind mode: ${BINDMODE}" >&2
+ # Set current loglevel to KERN_INFO(6), and default to KERN_NOTICE(5)
+ echo "6 5" > /proc/sys/kernel/printk
+
+ # Create one namespace and two interfaces
+ set_network
+
+ # Create the command line for netconsole, with the configuration from
+ # the function above
+ CMDLINE=$(create_cmdline_str "${BINDMODE}")
+
+ # The content of kmsg will be save to the following file
+ OUTPUT_FILE="/tmp/${TARGET}-${BINDMODE}"
+
+ # Load the module, with the cmdline set
+ modprobe netconsole "${CMDLINE}"
+ # Expose cmdline target in configfs
+ mkdir "${NETCONS_CONFIGFS}/cmdline0"
+
+ # Target should be enabled
+ wait_target_state "cmdline0" "enabled"
+
+ # Trigger deactivation by unloading netdevsim module. Target should be
+ # disabled.
+ trigger_deactivation
+ wait_target_state "cmdline0" "disabled"
+
+ # Trigger reactivation by loading netdevsim, recreating the network and
+ # restoring mac addresses. Target should be re-enabled.
+ trigger_reactivation
+ wait_target_state "cmdline0" "enabled"
+
+ # Listen for netconsole port inside the namespace and destination
+ # interface
+ listen_port_and_save_to "${OUTPUT_FILE}" &
+ # Wait for socat to start and listen to the port.
+ wait_local_port_listen "${NAMESPACE}" "${PORT}" udp
+ # Send the message
+ echo "${MSG}: ${TARGET}" > /dev/kmsg
+ # Wait until socat saves the file to disk
+ busywait "${BUSYWAIT_TIMEOUT}" test -s "${OUTPUT_FILE}"
+ # Make sure the message was received in the dst part
+ # and exit
+ validate_msg "${OUTPUT_FILE}"
+
+ # kill socat in case it is still running
+ pkill_socat
+ # Cleanup & unload the module
+ cleanup
+
+ echo "${BINDMODE} : Test passed" >&2
+done
+
+trap - EXIT
+exit "${EXIT_STATUS}"
diff --git a/tools/testing/selftests/drivers/net/netcons_sysdata.sh b/tools/testing/selftests/drivers/net/netconsole/netcons_sysdata.sh
index baf69031089e..3fb8c4afe3d2 100755
--- a/tools/testing/selftests/drivers/net/netcons_sysdata.sh
+++ b/tools/testing/selftests/drivers/net/netconsole/netcons_sysdata.sh
@@ -18,7 +18,7 @@ set -euo pipefail
SCRIPTDIR=$(dirname "$(readlink -e "${BASH_SOURCE[0]}")")
-source "${SCRIPTDIR}"/lib/sh/lib_netcons.sh
+source "${SCRIPTDIR}"/../lib/sh/lib_netcons.sh
# Enable the sysdata cpu_nr feature
function set_cpu_nr() {
diff --git a/tools/testing/selftests/drivers/net/netcons_torture.sh b/tools/testing/selftests/drivers/net/netconsole/netcons_torture.sh
index 2ce9ee3719d1..33a44adb6f8f 100755
--- a/tools/testing/selftests/drivers/net/netcons_torture.sh
+++ b/tools/testing/selftests/drivers/net/netconsole/netcons_torture.sh
@@ -17,7 +17,7 @@ set -euo pipefail
SCRIPTDIR=$(dirname "$(readlink -e "${BASH_SOURCE[0]}")")
-source "${SCRIPTDIR}"/lib/sh/lib_netcons.sh
+source "${SCRIPTDIR}"/../lib/sh/lib_netcons.sh
# Number of times the main loop run
ITERATIONS=${1:-150}
diff --git a/tools/testing/selftests/drivers/net/psp.py b/tools/testing/selftests/drivers/net/psp.py
index 52523bdad240..864d9fce1094 100755
--- a/tools/testing/selftests/drivers/net/psp.py
+++ b/tools/testing/selftests/drivers/net/psp.py
@@ -266,6 +266,7 @@ def assoc_sk_only_mismatch(cfg):
the_exception = cm.exception
ksft_eq(the_exception.nl_msg.extack['bad-attr'], ".dev-id")
ksft_eq(the_exception.nl_msg.error, -errno.EINVAL)
+ _close_conn(cfg, s)
def assoc_sk_only_mismatch_tx(cfg):
@@ -283,6 +284,7 @@ def assoc_sk_only_mismatch_tx(cfg):
the_exception = cm.exception
ksft_eq(the_exception.nl_msg.extack['bad-attr'], ".dev-id")
ksft_eq(the_exception.nl_msg.error, -errno.EINVAL)
+ _close_conn(cfg, s)
def assoc_sk_only_unconn(cfg):
@@ -601,8 +603,8 @@ def main() -> None:
cfg.comm_port = rand_port()
srv = None
try:
- with bkg(responder + f" -p {cfg.comm_port}", host=cfg.remote,
- exit_wait=True) as srv:
+ with bkg(responder + f" -p {cfg.comm_port} -i {cfg.remote_ifindex}",
+ host=cfg.remote, exit_wait=True) as srv:
wait_port_listen(cfg.comm_port, host=cfg.remote)
cfg.comm_sock = socket.create_connection((cfg.remote_addr,
diff --git a/tools/testing/selftests/drivers/net/psp_responder.c b/tools/testing/selftests/drivers/net/psp_responder.c
index f309e0d73cbf..a26e7628bbb1 100644
--- a/tools/testing/selftests/drivers/net/psp_responder.c
+++ b/tools/testing/selftests/drivers/net/psp_responder.c
@@ -22,7 +22,7 @@ static bool should_quit;
struct opts {
int port;
- int devid;
+ int ifindex;
bool verbose;
};
@@ -360,7 +360,7 @@ static void usage(const char *name, const char *miss)
if (miss)
fprintf(stderr, "Missing argument: %s\n", miss);
- fprintf(stderr, "Usage: %s -p port [-v] [-d psp-dev-id]\n", name);
+ fprintf(stderr, "Usage: %s -p port [-v] [-i ifindex]\n", name);
exit(EXIT_FAILURE);
}
@@ -368,7 +368,7 @@ static void parse_cmd_opts(int argc, char **argv, struct opts *opts)
{
int opt;
- while ((opt = getopt(argc, argv, "vp:d:")) != -1) {
+ while ((opt = getopt(argc, argv, "vp:i:")) != -1) {
switch (opt) {
case 'v':
opts->verbose = 1;
@@ -376,8 +376,8 @@ static void parse_cmd_opts(int argc, char **argv, struct opts *opts)
case 'p':
opts->port = atoi(optarg);
break;
- case 'd':
- opts->devid = atoi(optarg);
+ case 'i':
+ opts->ifindex = atoi(optarg);
break;
default:
usage(argv[0], NULL);
@@ -410,12 +410,11 @@ static int psp_dev_set_ena(struct ynl_sock *ys, __u32 dev_id, __u32 versions)
int main(int argc, char **argv)
{
struct psp_dev_get_list *dev_list;
- bool devid_found = false;
__u32 ver_ena, ver_cap;
struct opts opts = {};
struct ynl_error yerr;
struct ynl_sock *ys;
- int first_id = 0;
+ int devid = -1;
int ret;
parse_cmd_opts(argc, argv, &opts);
@@ -429,20 +428,19 @@ int main(int argc, char **argv)
}
dev_list = psp_dev_get_dump(ys);
- if (ynl_dump_empty(dev_list)) {
- if (ys->err.code)
- goto err_close;
- fprintf(stderr, "No PSP devices\n");
- goto err_close_silent;
- }
+ if (ynl_dump_empty(dev_list) && ys->err.code)
+ goto err_close;
ynl_dump_foreach(dev_list, d) {
- if (opts.devid) {
- devid_found = true;
+ if (opts.ifindex) {
+ if (d->ifindex != opts.ifindex)
+ continue;
+ devid = d->id;
ver_ena = d->psp_versions_ena;
ver_cap = d->psp_versions_cap;
- } else if (!first_id) {
- first_id = d->id;
+ break;
+ } else if (devid < 0) {
+ devid = d->id;
ver_ena = d->psp_versions_ena;
ver_cap = d->psp_versions_cap;
} else {
@@ -452,23 +450,21 @@ int main(int argc, char **argv)
}
psp_dev_get_list_free(dev_list);
- if (opts.devid && !devid_found) {
- fprintf(stderr, "PSP device %d requested on cmdline, not found\n",
- opts.devid);
- goto err_close_silent;
- } else if (!opts.devid) {
- opts.devid = first_id;
- }
+ if (opts.ifindex && devid < 0)
+ fprintf(stderr,
+ "WARN: PSP device with ifindex %d requested on cmdline, not found\n",
+ opts.ifindex);
- if (ver_ena != ver_cap) {
- ret = psp_dev_set_ena(ys, opts.devid, ver_cap);
+ if (devid >= 0 && ver_ena != ver_cap) {
+ ret = psp_dev_set_ena(ys, devid, ver_cap);
if (ret)
goto err_close;
}
ret = run_responder(ys, &opts);
- if (ver_ena != ver_cap && psp_dev_set_ena(ys, opts.devid, ver_ena))
+ if (devid >= 0 && ver_ena != ver_cap &&
+ psp_dev_set_ena(ys, devid, ver_ena))
fprintf(stderr, "WARN: failed to set the PSP versions back\n");
ynl_sock_destroy(ys);
diff --git a/tools/testing/selftests/net/.gitignore b/tools/testing/selftests/net/.gitignore
index 6930fe926c58..97ad4d551d44 100644
--- a/tools/testing/selftests/net/.gitignore
+++ b/tools/testing/selftests/net/.gitignore
@@ -7,6 +7,7 @@ cmsg_sender
epoll_busy_poll
fin_ack_lat
hwtstamp_config
+icmp_rfc4884
io_uring_zerocopy_tx
ioam6_parser
ip_defrag
diff --git a/tools/testing/selftests/net/Makefile b/tools/testing/selftests/net/Makefile
index 45c4ea381bc3..afdea6d95bde 100644
--- a/tools/testing/selftests/net/Makefile
+++ b/tools/testing/selftests/net/Makefile
@@ -22,6 +22,7 @@ TEST_PROGS := \
cmsg_so_mark.sh \
cmsg_so_priority.sh \
cmsg_time.sh \
+ double_udp_encap.sh \
drop_monitor_tests.sh \
fcnal-ipv4.sh \
fcnal-ipv6.sh \
@@ -167,6 +168,7 @@ TEST_GEN_PROGS := \
bind_timewait \
bind_wildcard \
epoll_busy_poll \
+ icmp_rfc4884 \
ipv6_fragmentation \
proc_net_pktgen \
reuseaddr_conflict \
@@ -181,7 +183,6 @@ TEST_GEN_PROGS := \
tap \
tcp_port_share \
tls \
- tun \
# end of TEST_GEN_PROGS
TEST_FILES := \
@@ -193,7 +194,11 @@ TEST_FILES := \
# YNL files, must be before "include ..lib.mk"
YNL_GEN_FILES := busy_poller
-YNL_GEN_PROGS := netlink-dumps
+YNL_GEN_PROGS := \
+ netlink-dumps \
+ tun \
+# end of YNL_GEN_PROGS
+
TEST_GEN_FILES += $(YNL_GEN_FILES)
TEST_GEN_PROGS += $(YNL_GEN_PROGS)
@@ -204,7 +209,14 @@ TEST_INCLUDES := forwarding/lib.sh
include ../lib.mk
# YNL build
-YNL_GENS := netdev
+YNL_GENS := \
+ netdev \
+ rt-addr \
+ rt-link \
+ rt-neigh \
+ rt-route \
+# end of YNL_GENS
+
include ynl.mk
$(OUTPUT)/epoll_busy_poll: LDLIBS += -lcap
diff --git a/tools/testing/selftests/net/config b/tools/testing/selftests/net/config
index b84362b9b508..cd49b7dfe216 100644
--- a/tools/testing/selftests/net/config
+++ b/tools/testing/selftests/net/config
@@ -77,6 +77,7 @@ CONFIG_NET_DROP_MONITOR=m
CONFIG_NETFILTER=y
CONFIG_NETFILTER_ADVANCED=y
CONFIG_NETFILTER_XTABLES_LEGACY=y
+CONFIG_NETFILTER_XT_MATCH_BPF=m
CONFIG_NETFILTER_XT_MATCH_LENGTH=m
CONFIG_NETFILTER_XT_MATCH_POLICY=m
CONFIG_NETFILTER_XT_NAT=m
diff --git a/tools/testing/selftests/net/double_udp_encap.sh b/tools/testing/selftests/net/double_udp_encap.sh
new file mode 100755
index 000000000000..9aaf97cdf141
--- /dev/null
+++ b/tools/testing/selftests/net/double_udp_encap.sh
@@ -0,0 +1,393 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+source lib.sh
+
+# shellcheck disable=SC2155 # prefer RO variable over return value from cmd
+readonly CLI="$(dirname "$(readlink -f "$0")")/../../../net/ynl/pyynl/cli.py"
+
+readonly SRC=1
+readonly DST=2
+
+readonly NET_V4=192.168.1.
+readonly NET_V6=2001:db8::
+readonly OL1_NET_V4=172.16.1.
+readonly OL1_NET_V6=2001:db8:1::
+readonly OL2_NET_V4=172.16.2.
+readonly OL2_NET_V6=2001:db8:2::
+
+trap cleanup_all_ns EXIT
+
+# shellcheck disable=SC2329 # can't figure out usage trough a variable
+is_ipv6() {
+ if [[ $1 =~ .*:.* ]]; then
+ return 0
+ fi
+ return 1
+}
+
+# shellcheck disable=SC2329 # can't figure out usage trough a variable
+create_gnv_endpoint() {
+ local -r netns=$1
+ local -r bm_rem_addr=$2
+ local -r gnv_dev=$3
+ local -r gnv_id=$4
+ local opts=$5
+ local gnv_json
+ local rem
+
+ if is_ipv6 "$bm_rem_addr"; then
+ rem=remote6
+ else
+ rem=remote
+ fi
+
+ # add ynl opt separator, if needed
+ [ -n "$opts" ] && opts=", $opts"
+
+ gnv_json="{ \"id\": $gnv_id, \"$rem\": \"$bm_rem_addr\"$opts }"
+ ip netns exec "$netns" "$CLI" --family rt-link --create --excl \
+ --do newlink --json "{\"ifname\": \"$gnv_dev\",
+ \"linkinfo\": {\"kind\":\"geneve\",
+ \"data\": $gnv_json } }" > /dev/null
+ ip -n "$netns" link set dev "$gnv_dev" up
+}
+
+# shellcheck disable=SC2329 # can't figure out usage trough a variable
+create_vxlan_endpoint() {
+ local -r netns=$1
+ local -r bm_rem_addr=$2
+ local -r vxlan_dev=$3
+ local -r vxlan_id=$4
+ local -r opts_str=$5
+ local oldifs
+ local -a opts
+ local opt
+
+ # convert the arguments from yaml format
+ oldifs=$IFS
+ IFS=','
+ for opt in $opts_str; do
+ local pattern='"port":'
+
+ [ -n "$opt" ] || continue
+
+ opts+=("${opt/$pattern*/dstport}" "${opt/$pattern/}")
+ done
+ IFS=$oldifs
+ [ ${#opts[@]} -gt 0 ] || opts+=("dstport" "4789")
+
+ ip -n "$netns" link add "$vxlan_dev" type vxlan id "$vxlan_id" \
+ remote "$bm_rem_addr" "${opts[@]}"
+ ip -n "$netns" link set dev "$vxlan_dev" up
+}
+
+create_ns() {
+ local nested_opt='"port":6082'
+ local create_endpoint
+ local options="$1"
+ local feature
+ local dev
+ local id
+ local ns
+
+ RET=0
+
+ # +-------------+ +-------------+
+ # | NS_SRC | | NS_NST_DST |
+ # | | | |
+ # | gnv_nst1 | | gnv_nst2 |
+ # | + | | + |
+ # | | | | | |
+ # | + | | + |
+ # | gnv1 | | gnv2 |
+ # | + | | + |
+ # | | | | | |
+ # | + veth1 +--------+ veth2 + |
+ # | | | |
+ # +-------------+ +-------------+
+
+ setup_ns NS_SRC NS_DST
+
+ # concatenate caller provided options and default one
+ [ -n "$2" ] && nested_opt="$nested_opt,$2"
+
+ ip link add name "veth$SRC" netns "$NS_SRC" type veth \
+ peer name "veth$DST" netns "$NS_DST"
+ case "$ENCAP" in
+ vxlan)
+ create_endpoint=create_vxlan_endpoint
+ dev=vx
+ ;;
+ geneve)
+ create_endpoint=create_gnv_endpoint
+ dev=gnv
+ ;;
+ esac
+
+ id=1
+ for ns in "${NS_LIST[@]}"; do
+ ip -n "$ns" link set dev "veth$id" up
+
+ # ensure the sender can do large write just after 3whs
+ ip netns exec "$ns" \
+ sysctl -qw net.ipv4.tcp_wmem="4096 4194304 4194304"
+
+ # note that 3 - $SRC == $DST and 3 - $DST == $SRC
+ if [ $FAMILY = "4" ]; then
+ ip -n "$ns" addr add dev "veth$id" "$NET_V4$id/24"
+ $create_endpoint "$ns" "$NET_V4$((3 - id))" \
+ "$dev$id" 4 "$options"
+ ip -n "$ns" addr add dev "$dev$id" "$OL1_NET_V4$id/24"
+
+ # nested tunnel devices
+ # pmtu can't be propagated to upper layer devices;
+ # need manual adjust
+ $create_endpoint "$ns" "$OL1_NET_V4$((3 - id))" \
+ "$dev"_nst"$id" 40 "$nested_opt"
+ ip -n "$ns" addr add dev "$dev"_nst"$id" \
+ "$OL2_NET_V4$id/24"
+ ip -n "$ns" link set dev "$dev"_nst"$id" mtu 1392
+ else
+ ip -n "$ns" addr add dev "veth$id" "$NET_V6$id/64" \
+ nodad
+ $create_endpoint "$ns" "$NET_V6$((3 - id))" \
+ "$dev"6"$id" 6 "$options"
+ ip -n "$ns" addr add dev "$dev"6"$id" \
+ "$OL1_NET_V6$id/64" nodad
+
+ $create_endpoint "$ns" "$OL1_NET_V6$((3 - id))" \
+ "$dev"6_nst"$id" 60 "$nested_opt"
+ ip -n "$ns" addr add dev "$dev"6_nst"$id" \
+ "$OL2_NET_V6$id/64" nodad
+ ip -n "$ns" link set dev "$dev"6_nst"$id" mtu 1352
+ fi
+ id=$((id+1))
+ done
+
+ # enable GRO heuristic on the veth peer and ensure UDP L4 over tunnel is
+ # actually segmented
+ for feature in tso tx-udp_tnl-segmentation; do
+ ip netns exec "$NS_SRC" ethtool -K "veth$SRC" \
+ "$feature" off 2>/dev/null
+ done
+}
+
+create_ns_gso() {
+ local dev
+
+ create_ns "$@"
+ if [ "$ENCAP" = "geneve" ]; then
+ dev=gnv
+ else
+ dev=vx
+ fi
+ [ "$FAMILY" = "6" ] && dev="$dev"6
+ ip netns exec "$NS_SRC" ethtool -K "$dev$SRC" \
+ tx-gso-partial on \
+ tx-udp_tnl-segmentation on \
+ tx-udp_tnl-csum-segmentation on
+}
+
+create_ns_gso_gro() {
+ create_ns_gso "$@"
+ ip netns exec "$NS_DST" ethtool -K "veth$DST" gro on
+ ip netns exec "$NS_SRC" ethtool -K "veth$SRC" tx off >/dev/null 2>&1
+}
+
+run_test() {
+ local -r dst=$NET$DST
+ local -r msg=$1
+ local -r total_size=$2
+ local -r encappkts=$3
+ local inner_proto_offset=0
+ local inner_maclen=14
+ local rx_family="-4"
+ local ipt=iptables
+ local bpf_filter
+ local -a rx_args
+ local wire_pkts
+ local rcvpkts
+ local encl=8
+ local dport
+ local pkts
+ local snd
+
+ if [ $FAMILY = "6" ]; then
+ ipt=ip6tables
+ else
+ # rx program does not support '-6' and implies ipv6 usage by
+ # default
+ rx_args=("$rx_family")
+ fi
+
+ # The received can only check fixed size packet
+ pkts=$((total_size / GSO_SIZE))
+ if [ -n "$4" ]; then
+ wire_pkts=$4
+ elif [ $((total_size % GSO_SIZE)) -eq 0 ]; then
+ wire_pkts=1
+ rx_args+=("-l" "$GSO_SIZE")
+ else
+ wire_pkts=2
+ pkts=$((pkts + 1))
+ fi
+
+ if [ "$ENCAP" = "geneve" ]; then
+ dport=6081
+ else
+ dport=4789
+ fi
+
+ # Either:
+ # - IPv4, nested tunnel carries UDP over IPv4, with dport 6082,
+ # innermost is TCP over IPv4 on port 8000
+ # - IPv6, nested tunnel carries UDP over IPv6, with dport 6082,
+ # innermost is TCP over IPv6 on port 8000
+ # The nested tunnel port is 6082 and the nested encap len is 8
+ # regardless of the encap type (no geneve opts).
+ # In inherit protocol mode there is no nested mac hdr and the nested
+ # l3 protocol type field belongs to the geneve hdr.
+ [ "$USE_HINT" = true ] && encl=16
+ [ "$INHERIT" = true ] && inner_maclen=0
+ [ "$INHERIT" = true ] && inner_proto_offset=-4
+ local inner=$((inner_maclen+encl))
+ local proto=$((inner_maclen+encl+inner_proto_offset))
+ bpf_filter=$(nfbpf_compile "(ip &&
+ ip[$((40+encl))] == 0x08 && ip[$((41+encl))] == 0x00 &&
+ ip[$((51+encl))] == 0x11 &&
+ ip[$((64+encl))] == 0x17 && ip[$((65+encl))] == 0xc2 &&
+ ip[$((76+proto))] == 0x08 && ip[$((77+proto))] == 0x00 &&
+ ip[$((87+inner))] == 0x6 &&
+ ip[$((100+inner))] == 0x1f && ip[$((101+inner))] == 0x40) ||
+ (ip6 &&
+ ip6[$((60+encl))] == 0x86 && ip6[$((61+encl))] == 0xdd &&
+ ip6[$((68+encl))] == 0x11 &&
+ ip6[$((104+encl))] == 0x17 && ip6[$((105+encl))] == 0xc2 &&
+ ip6[$((116+proto))] == 0x86 && ip6[$((117+proto))] == 0xdd &&
+ ip6[$((124+inner))] == 0x6 &&
+ ip6[$((160+inner))] == 0x1f && ip6[$((161+inner))] == 0x40)")
+
+ # ignore shorts packet, to avoid arp/mld induced noise
+ ip netns exec "$NS_SRC" "$ipt" -A OUTPUT -p udp --dport "$dport" \
+ -m length --length 600:65535 -m bpf --bytecode "$bpf_filter"
+ ip netns exec "$NS_DST" "$ipt" -A INPUT -p udp --dport "$dport" \
+ -m length --length 600:65535 -m bpf --bytecode "$bpf_filter"
+ ip netns exec "$NS_DST" ./udpgso_bench_rx -C 2000 -t -R 100 \
+ -n "$pkts" "${rx_args[@]}" &
+ local pid=$!
+ wait_local_port_listen "$NS_DST" 8000 tcp
+ ip netns exec "$NS_SRC" ./udpgso_bench_tx -"$FAMILY" -t -M 1 \
+ -s "$total_size" -D "$dst"
+ local ret=$?
+ check_err "$ret" "client failure exit code $ret"
+ wait "$pid"
+ ret=$?
+ check_err "$ret" "sever failure exit code $ret"
+
+ snd=$(ip netns exec "$NS_SRC" "$ipt"-save -c |
+ grep "dport $dport" | sed -e 's/\[//' -e 's/:.*//')
+
+ [ "$snd" = "$wire_pkts" ]
+ # shellcheck disable=SC2319 # known false positive
+ check_err $? "send $snd packets on the lowest link, expected $wire_pkts"
+
+ rcvpkts=$(ip netns exec "$NS_DST" "$ipt"-save -c | \
+ grep "dport $dport" | sed -e 's/\[//' -e 's/:.*//')
+
+ [ "$rcvpkts" = "$encappkts" ]
+ check_err $? "received $rcvpkts $ENCAP packets, expected $encappkts"
+ log_test "$msg"
+}
+
+run_tests() {
+ for FAMILY in 4 6; do
+ NET=$OL2_NET_V4
+ GSO_SIZE=1340 # 1392 - 20 - 32
+
+ if [ $FAMILY = 6 ]; then
+ NET=$OL2_NET_V6
+ GSO_SIZE=1280 # 1352 - 40 - 32
+ fi
+
+ echo "IPv$FAMILY"
+
+ unset USE_HINT
+ unset INHERIT
+
+ # "geneve" must be last encap in list, so that later
+ # test cases will run on it
+ for ENCAP in "vxlan" "geneve"; do
+ create_ns
+ run_test "No GSO - $ENCAP" $((GSO_SIZE * 4)) 4 4
+ cleanup_all_ns
+
+ create_ns_gso
+ run_test "GSO without GRO - $ENCAP" $((GSO_SIZE * 4)) \
+ 4 1
+ cleanup_all_ns
+
+ # IPv4 only test
+ [ $FAMILY = "4" ] || continue
+ create_ns_gso
+ ip netns exec "$NS_SRC" \
+ sysctl -qw net.ipv4.ip_no_pmtu_disc=1
+ run_test "GSO disable due to no fixedid - $ENCAP" \
+ $((GSO_SIZE * 4)) 4 4
+ cleanup_all_ns
+ done
+
+ # GRO tests imply/require geneve encap, the only one providing
+ # GRO hints
+ create_ns_gso_gro
+ run_test "double tunnel GRO, no hints" $((GSO_SIZE * 4)) 4
+ cleanup_all_ns
+
+ # hint option is expected for all the following tests in the RX
+ # path
+ USE_HINT=true
+ create_ns_gso_gro \
+ '"gro-hint":1,"udp-zero-csum6-tx":1,"udp-zero-csum6-rx":1' \
+ '"udp-zero-csum6-tx":1,"udp-zero-csum6-rx":1'
+ run_test "double tunnel GRO" $((GSO_SIZE * 4)) 1
+ cleanup_all_ns
+
+ create_ns_gso_gro '"gro-hint":1,"udp-csum":1' '"udp-csum":1'
+ run_test "double tunnel GRO - csum complete" $((GSO_SIZE * 4))\
+ 1
+ cleanup_all_ns
+
+ create_ns_gso_gro '"gro-hint":1' \
+ '"udp-csum":0,"udp-zero-csum6-tx":1,"udp-zero-csum6-rx":1'
+ run_test "double tunnel GRO - no nested csum" \
+ $((GSO_SIZE * 4)) 1
+ cleanup_all_ns
+
+ create_ns_gso_gro \
+ '"gro-hint":1,"udp-zero-csum6-tx":1,"udp-zero-csum6-rx":1' \
+ '"udp-csum":1'
+ run_test "double tunnel GRO - nested csum, outer 0-csum, skip"\
+ $((GSO_SIZE * 4)) 4
+ cleanup_all_ns
+
+ INHERIT=true
+ create_ns_gso_gro '"gro-hint":1,"udp-csum":1' \
+ '"udp-csum":1,"inner-proto-inherit":1'
+ run_test "double tunnel GRO - nested inherit proto" \
+ $((GSO_SIZE * 4)) 1
+ cleanup_all_ns
+ unset INHERIT
+
+ create_ns_gso_gro '"gro-hint":1'
+ run_test "double tunnel GRO - short last pkt" \
+ $((GSO_SIZE * 4 + GSO_SIZE / 2)) 2
+ cleanup_all_ns
+ done
+}
+
+require_command nfbpf_compile
+require_command jq
+
+# tcp retransmisions will break the accounting
+xfail_on_slow run_tests
+exit "$EXIT_STATUS"
diff --git a/tools/testing/selftests/net/fib-onlink-tests.sh b/tools/testing/selftests/net/fib-onlink-tests.sh
index c01be076b210..e0d45292a298 100755
--- a/tools/testing/selftests/net/fib-onlink-tests.sh
+++ b/tools/testing/selftests/net/fib-onlink-tests.sh
@@ -72,7 +72,8 @@ declare -A TEST_NET4IN6IN6
TEST_NET4IN6[1]=10.1.1.254
TEST_NET4IN6[2]=10.2.1.254
-# mcast address
+# mcast addresses
+MCAST4=233.252.0.1
MCAST6=ff02::1
VRF=lisa
@@ -260,11 +261,15 @@ valid_onlink_ipv4()
run_ip 254 ${TEST_NET4[1]}.1 ${CONGW[1]} ${NETIFS[p1]} 0 "unicast connected"
run_ip 254 ${TEST_NET4[1]}.2 ${RECGW4[1]} ${NETIFS[p1]} 0 "unicast recursive"
+ run_ip 254 ${TEST_NET4[1]}.9 ${CONGW[1]} ${NETIFS[p3]} 0 \
+ "nexthop device mismatch"
log_subsection "VRF ${VRF}"
run_ip ${VRF_TABLE} ${TEST_NET4[2]}.1 ${CONGW[3]} ${NETIFS[p5]} 0 "unicast connected"
run_ip ${VRF_TABLE} ${TEST_NET4[2]}.2 ${RECGW4[2]} ${NETIFS[p5]} 0 "unicast recursive"
+ run_ip ${VRF_TABLE} ${TEST_NET4[2]}.10 ${CONGW[3]} ${NETIFS[p7]} 0 \
+ "nexthop device mismatch"
log_subsection "VRF device, PBR table"
@@ -300,17 +305,15 @@ invalid_onlink_ipv4()
{
run_ip 254 ${TEST_NET4[1]}.11 ${V4ADDRS[p1]} ${NETIFS[p1]} 2 \
"Invalid gw - local unicast address"
+ run_ip 254 ${TEST_NET4[1]}.12 ${MCAST4} ${NETIFS[p1]} 2 \
+ "Invalid gw - multicast address"
run_ip ${VRF_TABLE} ${TEST_NET4[2]}.11 ${V4ADDRS[p5]} ${NETIFS[p5]} 2 \
"Invalid gw - local unicast address, VRF"
+ run_ip ${VRF_TABLE} ${TEST_NET4[2]}.12 ${MCAST4} ${NETIFS[p5]} 2 \
+ "Invalid gw - multicast address, VRF"
run_ip 254 ${TEST_NET4[1]}.101 ${V4ADDRS[p1]} "" 2 "No nexthop device given"
-
- run_ip 254 ${TEST_NET4[1]}.102 ${V4ADDRS[p3]} ${NETIFS[p1]} 2 \
- "Gateway resolves to wrong nexthop device"
-
- run_ip ${VRF_TABLE} ${TEST_NET4[2]}.103 ${V4ADDRS[p7]} ${NETIFS[p5]} 2 \
- "Gateway resolves to wrong nexthop device - VRF"
}
################################################################################
@@ -357,12 +360,16 @@ valid_onlink_ipv6()
run_ip6 254 ${TEST_NET6[1]}::1 ${V6ADDRS[p1]/::*}::64 ${NETIFS[p1]} 0 "unicast connected"
run_ip6 254 ${TEST_NET6[1]}::2 ${RECGW6[1]} ${NETIFS[p1]} 0 "unicast recursive"
run_ip6 254 ${TEST_NET6[1]}::3 ::ffff:${TEST_NET4IN6[1]} ${NETIFS[p1]} 0 "v4-mapped"
+ run_ip6 254 ${TEST_NET6[1]}::a ${V6ADDRS[p1]/::*}::64 ${NETIFS[p3]} 0 \
+ "nexthop device mismatch"
log_subsection "VRF ${VRF}"
run_ip6 ${VRF_TABLE} ${TEST_NET6[2]}::1 ${V6ADDRS[p5]/::*}::64 ${NETIFS[p5]} 0 "unicast connected"
run_ip6 ${VRF_TABLE} ${TEST_NET6[2]}::2 ${RECGW6[2]} ${NETIFS[p5]} 0 "unicast recursive"
run_ip6 ${VRF_TABLE} ${TEST_NET6[2]}::3 ::ffff:${TEST_NET4IN6[2]} ${NETIFS[p5]} 0 "v4-mapped"
+ run_ip6 ${VRF_TABLE} ${TEST_NET6[2]}::b ${V6ADDRS[p5]/::*}::64 \
+ ${NETIFS[p7]} 0 "nexthop device mismatch"
log_subsection "VRF device, PBR table"
@@ -428,13 +435,6 @@ invalid_onlink_ipv6()
run_ip6 254 ${TEST_NET6[1]}::101 ${V6ADDRS[p1]} "" 2 \
"No nexthop device given"
-
- # default VRF validation is done against LOCAL table
- # run_ip6 254 ${TEST_NET6[1]}::102 ${V6ADDRS[p3]/::[0-9]/::64} ${NETIFS[p1]} 2 \
- # "Gateway resolves to wrong nexthop device"
-
- run_ip6 ${VRF_TABLE} ${TEST_NET6[2]}::103 ${V6ADDRS[p7]/::[0-9]/::64} ${NETIFS[p5]} 2 \
- "Gateway resolves to wrong nexthop device - VRF"
}
run_onlink_tests()
diff --git a/tools/testing/selftests/net/forwarding/local_termination.sh b/tools/testing/selftests/net/forwarding/local_termination.sh
index 892895659c7e..1f2bf6e81847 100755
--- a/tools/testing/selftests/net/forwarding/local_termination.sh
+++ b/tools/testing/selftests/net/forwarding/local_termination.sh
@@ -306,39 +306,39 @@ run_test()
if [ $skip_ptp = false ]; then
check_rcv $rcv_if_name "1588v2 over L2 transport, Sync" \
- "ethertype PTP (0x88f7).* PTPv2.* msg type : sync msg" \
+ "ethertype PTP (0x88f7).* PTPv2.* msg type *: sync msg" \
true "$test_name"
check_rcv $rcv_if_name "1588v2 over L2 transport, Follow-Up" \
- "ethertype PTP (0x88f7).* PTPv2.* msg type : follow up msg" \
+ "ethertype PTP (0x88f7).* PTPv2.* msg type *: follow up msg" \
true "$test_name"
check_rcv $rcv_if_name "1588v2 over L2 transport, Peer Delay Request" \
- "ethertype PTP (0x88f7).* PTPv2.* msg type : peer delay req msg" \
+ "ethertype PTP (0x88f7).* PTPv2.* msg type *: peer delay req msg" \
true "$test_name"
check_rcv $rcv_if_name "1588v2 over IPv4, Sync" \
- "ethertype IPv4 (0x0800).* PTPv2.* msg type : sync msg" \
+ "ethertype IPv4 (0x0800).* PTPv2.* msg type *: sync msg" \
true "$test_name"
check_rcv $rcv_if_name "1588v2 over IPv4, Follow-Up" \
- "ethertype IPv4 (0x0800).* PTPv2.* msg type : follow up msg" \
+ "ethertype IPv4 (0x0800).* PTPv2.* msg type *: follow up msg" \
true "$test_name"
check_rcv $rcv_if_name "1588v2 over IPv4, Peer Delay Request" \
- "ethertype IPv4 (0x0800).* PTPv2.* msg type : peer delay req msg" \
+ "ethertype IPv4 (0x0800).* PTPv2.* msg type *: peer delay req msg" \
true "$test_name"
check_rcv $rcv_if_name "1588v2 over IPv6, Sync" \
- "ethertype IPv6 (0x86dd).* PTPv2.* msg type : sync msg" \
+ "ethertype IPv6 (0x86dd).* PTPv2.* msg type *: sync msg" \
true "$test_name"
check_rcv $rcv_if_name "1588v2 over IPv6, Follow-Up" \
- "ethertype IPv6 (0x86dd).* PTPv2.* msg type : follow up msg" \
+ "ethertype IPv6 (0x86dd).* PTPv2.* msg type *: follow up msg" \
true "$test_name"
check_rcv $rcv_if_name "1588v2 over IPv6, Peer Delay Request" \
- "ethertype IPv6 (0x86dd).* PTPv2.* msg type : peer delay req msg" \
+ "ethertype IPv6 (0x86dd).* PTPv2.* msg type *: peer delay req msg" \
true "$test_name"
fi
diff --git a/tools/testing/selftests/net/hsr/Makefile b/tools/testing/selftests/net/hsr/Makefile
index 4b6afc0fe9f8..31fb9326cf53 100644
--- a/tools/testing/selftests/net/hsr/Makefile
+++ b/tools/testing/selftests/net/hsr/Makefile
@@ -5,6 +5,8 @@ top_srcdir = ../../../../..
TEST_PROGS := \
hsr_ping.sh \
hsr_redbox.sh \
+ link_faults.sh \
+ prp_ping.sh \
# end of TEST_PROGS
TEST_FILES += hsr_common.sh
diff --git a/tools/testing/selftests/net/hsr/hsr_ping.sh b/tools/testing/selftests/net/hsr/hsr_ping.sh
index 5a65f4f836be..f4d685df4345 100755
--- a/tools/testing/selftests/net/hsr/hsr_ping.sh
+++ b/tools/testing/selftests/net/hsr/hsr_ping.sh
@@ -27,31 +27,34 @@ while getopts "$optstring" option;do
esac
done
-do_complete_ping_test()
+do_ping_tests()
{
- echo "INFO: Initial validation ping."
- # Each node has to be able each one.
- do_ping "$ns1" 100.64.0.2
- do_ping "$ns2" 100.64.0.1
- do_ping "$ns3" 100.64.0.1
- stop_if_error "Initial validation failed."
-
- do_ping "$ns1" 100.64.0.3
- do_ping "$ns2" 100.64.0.3
- do_ping "$ns3" 100.64.0.2
+ local netid="$1"
- do_ping "$ns1" dead:beef:1::2
- do_ping "$ns1" dead:beef:1::3
- do_ping "$ns2" dead:beef:1::1
- do_ping "$ns2" dead:beef:1::2
- do_ping "$ns3" dead:beef:1::1
- do_ping "$ns3" dead:beef:1::2
+ echo "INFO: Running ping tests."
- stop_if_error "Initial validation failed."
+ echo "INFO: Initial validation ping."
+ # Each node has to be able to reach each one.
+ do_ping "$ns1" "100.64.$netid.2"
+ do_ping "$ns1" "100.64.$netid.3"
+ do_ping "$ns2" "100.64.$netid.1"
+ do_ping "$ns2" "100.64.$netid.3"
+ do_ping "$ns3" "100.64.$netid.1"
+ do_ping "$ns3" "100.64.$netid.2"
+ stop_if_error "Initial validation failed on IPv4."
+
+ do_ping "$ns1" "dead:beef:$netid::2"
+ do_ping "$ns1" "dead:beef:$netid::3"
+ do_ping "$ns2" "dead:beef:$netid::1"
+ do_ping "$ns2" "dead:beef:$netid::2"
+ do_ping "$ns3" "dead:beef:$netid::1"
+ do_ping "$ns3" "dead:beef:$netid::2"
+ stop_if_error "Initial validation failed on IPv6."
# Wait until supervisor all supervision frames have been processed and the node
# entries have been merged. Otherwise duplicate frames will be observed which is
# valid at this stage.
+ echo "INFO: Wait for node table entries to be merged."
WAIT=5
while [ ${WAIT} -gt 0 ]
do
@@ -68,62 +71,30 @@ do_complete_ping_test()
sleep 1
echo "INFO: Longer ping test."
- do_ping_long "$ns1" 100.64.0.2
- do_ping_long "$ns1" dead:beef:1::2
- do_ping_long "$ns1" 100.64.0.3
- do_ping_long "$ns1" dead:beef:1::3
-
- stop_if_error "Longer ping test failed."
-
- do_ping_long "$ns2" 100.64.0.1
- do_ping_long "$ns2" dead:beef:1::1
- do_ping_long "$ns2" 100.64.0.3
- do_ping_long "$ns2" dead:beef:1::2
- stop_if_error "Longer ping test failed."
-
- do_ping_long "$ns3" 100.64.0.1
- do_ping_long "$ns3" dead:beef:1::1
- do_ping_long "$ns3" 100.64.0.2
- do_ping_long "$ns3" dead:beef:1::2
- stop_if_error "Longer ping test failed."
-
- echo "INFO: Cutting one link."
- do_ping_long "$ns1" 100.64.0.3 &
-
- sleep 3
- ip -net "$ns3" link set ns3eth1 down
- wait
-
- ip -net "$ns3" link set ns3eth1 up
-
- stop_if_error "Failed with one link down."
-
- echo "INFO: Delay the link and drop a few packages."
- tc -net "$ns3" qdisc add dev ns3eth1 root netem delay 50ms
- tc -net "$ns2" qdisc add dev ns2eth1 root netem delay 5ms loss 25%
-
- do_ping_long "$ns1" 100.64.0.2
- do_ping_long "$ns1" 100.64.0.3
-
- stop_if_error "Failed with delay and packetloss."
-
- do_ping_long "$ns2" 100.64.0.1
- do_ping_long "$ns2" 100.64.0.3
-
- stop_if_error "Failed with delay and packetloss."
-
- do_ping_long "$ns3" 100.64.0.1
- do_ping_long "$ns3" 100.64.0.2
- stop_if_error "Failed with delay and packetloss."
-
- echo "INFO: All good."
+ do_ping_long "$ns1" "100.64.$netid.2"
+ do_ping_long "$ns1" "dead:beef:$netid::2"
+ do_ping_long "$ns1" "100.64.$netid.3"
+ do_ping_long "$ns1" "dead:beef:$netid::3"
+ stop_if_error "Longer ping test failed (ns1)."
+
+ do_ping_long "$ns2" "100.64.$netid.1"
+ do_ping_long "$ns2" "dead:beef:$netid::1"
+ do_ping_long "$ns2" "100.64.$netid.3"
+ do_ping_long "$ns2" "dead:beef:$netid::3"
+ stop_if_error "Longer ping test failed (ns2)."
+
+ do_ping_long "$ns3" "100.64.$netid.1"
+ do_ping_long "$ns3" "dead:beef:$netid::1"
+ do_ping_long "$ns3" "100.64.$netid.2"
+ do_ping_long "$ns3" "dead:beef:$netid::2"
+ stop_if_error "Longer ping test failed (ns3)."
}
setup_hsr_interfaces()
{
local HSRv="$1"
- echo "INFO: preparing interfaces for HSRv${HSRv}."
+ echo "INFO: Preparing interfaces for HSRv${HSRv}."
# Three HSR nodes. Each node has one link to each of its neighbour, two links in total.
#
# ns1eth1 ----- ns2eth1
@@ -140,17 +111,20 @@ setup_hsr_interfaces()
ip link add ns3eth2 netns "$ns3" type veth peer name ns2eth2 netns "$ns2"
# HSRv0/1
- ip -net "$ns1" link add name hsr1 type hsr slave1 ns1eth1 slave2 ns1eth2 supervision 45 version $HSRv proto 0
- ip -net "$ns2" link add name hsr2 type hsr slave1 ns2eth1 slave2 ns2eth2 supervision 45 version $HSRv proto 0
- ip -net "$ns3" link add name hsr3 type hsr slave1 ns3eth1 slave2 ns3eth2 supervision 45 version $HSRv proto 0
+ ip -net "$ns1" link add name hsr1 type hsr slave1 ns1eth1 \
+ slave2 ns1eth2 supervision 45 version "$HSRv" proto 0
+ ip -net "$ns2" link add name hsr2 type hsr slave1 ns2eth1 \
+ slave2 ns2eth2 supervision 45 version "$HSRv" proto 0
+ ip -net "$ns3" link add name hsr3 type hsr slave1 ns3eth1 \
+ slave2 ns3eth2 supervision 45 version "$HSRv" proto 0
# IP for HSR
ip -net "$ns1" addr add 100.64.0.1/24 dev hsr1
- ip -net "$ns1" addr add dead:beef:1::1/64 dev hsr1 nodad
+ ip -net "$ns1" addr add dead:beef:0::1/64 dev hsr1 nodad
ip -net "$ns2" addr add 100.64.0.2/24 dev hsr2
- ip -net "$ns2" addr add dead:beef:1::2/64 dev hsr2 nodad
+ ip -net "$ns2" addr add dead:beef:0::2/64 dev hsr2 nodad
ip -net "$ns3" addr add 100.64.0.3/24 dev hsr3
- ip -net "$ns3" addr add dead:beef:1::3/64 dev hsr3 nodad
+ ip -net "$ns3" addr add dead:beef:0::3/64 dev hsr3 nodad
ip -net "$ns1" link set address 00:11:22:00:01:01 dev ns1eth1
ip -net "$ns1" link set address 00:11:22:00:01:02 dev ns1eth2
@@ -177,113 +151,56 @@ setup_hsr_interfaces()
setup_vlan_interfaces() {
ip -net "$ns1" link add link hsr1 name hsr1.2 type vlan id 2
- ip -net "$ns1" link add link hsr1 name hsr1.3 type vlan id 3
- ip -net "$ns1" link add link hsr1 name hsr1.4 type vlan id 4
- ip -net "$ns1" link add link hsr1 name hsr1.5 type vlan id 5
-
ip -net "$ns2" link add link hsr2 name hsr2.2 type vlan id 2
- ip -net "$ns2" link add link hsr2 name hsr2.3 type vlan id 3
- ip -net "$ns2" link add link hsr2 name hsr2.4 type vlan id 4
- ip -net "$ns2" link add link hsr2 name hsr2.5 type vlan id 5
-
ip -net "$ns3" link add link hsr3 name hsr3.2 type vlan id 2
- ip -net "$ns3" link add link hsr3 name hsr3.3 type vlan id 3
- ip -net "$ns3" link add link hsr3 name hsr3.4 type vlan id 4
- ip -net "$ns3" link add link hsr3 name hsr3.5 type vlan id 5
ip -net "$ns1" addr add 100.64.2.1/24 dev hsr1.2
- ip -net "$ns1" addr add 100.64.3.1/24 dev hsr1.3
- ip -net "$ns1" addr add 100.64.4.1/24 dev hsr1.4
- ip -net "$ns1" addr add 100.64.5.1/24 dev hsr1.5
+ ip -net "$ns1" addr add dead:beef:2::1/64 dev hsr1.2 nodad
ip -net "$ns2" addr add 100.64.2.2/24 dev hsr2.2
- ip -net "$ns2" addr add 100.64.3.2/24 dev hsr2.3
- ip -net "$ns2" addr add 100.64.4.2/24 dev hsr2.4
- ip -net "$ns2" addr add 100.64.5.2/24 dev hsr2.5
+ ip -net "$ns2" addr add dead:beef:2::2/64 dev hsr2.2 nodad
ip -net "$ns3" addr add 100.64.2.3/24 dev hsr3.2
- ip -net "$ns3" addr add 100.64.3.3/24 dev hsr3.3
- ip -net "$ns3" addr add 100.64.4.3/24 dev hsr3.4
- ip -net "$ns3" addr add 100.64.5.3/24 dev hsr3.5
+ ip -net "$ns3" addr add dead:beef:2::3/64 dev hsr3.2 nodad
ip -net "$ns1" link set dev hsr1.2 up
- ip -net "$ns1" link set dev hsr1.3 up
- ip -net "$ns1" link set dev hsr1.4 up
- ip -net "$ns1" link set dev hsr1.5 up
-
ip -net "$ns2" link set dev hsr2.2 up
- ip -net "$ns2" link set dev hsr2.3 up
- ip -net "$ns2" link set dev hsr2.4 up
- ip -net "$ns2" link set dev hsr2.5 up
-
ip -net "$ns3" link set dev hsr3.2 up
- ip -net "$ns3" link set dev hsr3.3 up
- ip -net "$ns3" link set dev hsr3.4 up
- ip -net "$ns3" link set dev hsr3.5 up
}
-hsr_vlan_ping() {
- do_ping "$ns1" 100.64.2.2
- do_ping "$ns1" 100.64.3.2
- do_ping "$ns1" 100.64.4.2
- do_ping "$ns1" 100.64.5.2
-
- do_ping "$ns1" 100.64.2.3
- do_ping "$ns1" 100.64.3.3
- do_ping "$ns1" 100.64.4.3
- do_ping "$ns1" 100.64.5.3
-
- do_ping "$ns2" 100.64.2.1
- do_ping "$ns2" 100.64.3.1
- do_ping "$ns2" 100.64.4.1
- do_ping "$ns2" 100.64.5.1
-
- do_ping "$ns2" 100.64.2.3
- do_ping "$ns2" 100.64.3.3
- do_ping "$ns2" 100.64.4.3
- do_ping "$ns2" 100.64.5.3
-
- do_ping "$ns3" 100.64.2.1
- do_ping "$ns3" 100.64.3.1
- do_ping "$ns3" 100.64.4.1
- do_ping "$ns3" 100.64.5.1
-
- do_ping "$ns3" 100.64.2.2
- do_ping "$ns3" 100.64.3.2
- do_ping "$ns3" 100.64.4.2
- do_ping "$ns3" 100.64.5.2
+run_ping_tests()
+{
+ echo "INFO: Running ping tests."
+ do_ping_tests 0
}
-run_vlan_tests() {
+run_vlan_tests()
+{
vlan_challenged_hsr1=$(ip net exec "$ns1" ethtool -k hsr1 | grep "vlan-challenged" | awk '{print $2}')
vlan_challenged_hsr2=$(ip net exec "$ns2" ethtool -k hsr2 | grep "vlan-challenged" | awk '{print $2}')
vlan_challenged_hsr3=$(ip net exec "$ns3" ethtool -k hsr3 | grep "vlan-challenged" | awk '{print $2}')
if [[ "$vlan_challenged_hsr1" = "off" || "$vlan_challenged_hsr2" = "off" || "$vlan_challenged_hsr3" = "off" ]]; then
- echo "INFO: Running VLAN tests"
+ echo "INFO: Running VLAN ping tests"
setup_vlan_interfaces
- hsr_vlan_ping
+ do_ping_tests 2
else
echo "INFO: Not Running VLAN tests as the device does not support VLAN"
fi
}
check_prerequisites
-setup_ns ns1 ns2 ns3
-
trap cleanup_all_ns EXIT
+setup_ns ns1 ns2 ns3
setup_hsr_interfaces 0
-do_complete_ping_test
-
+run_ping_tests
run_vlan_tests
setup_ns ns1 ns2 ns3
-
setup_hsr_interfaces 1
-do_complete_ping_test
-
+run_ping_tests
run_vlan_tests
exit $ret
diff --git a/tools/testing/selftests/net/hsr/link_faults.sh b/tools/testing/selftests/net/hsr/link_faults.sh
new file mode 100755
index 000000000000..be526281571c
--- /dev/null
+++ b/tools/testing/selftests/net/hsr/link_faults.sh
@@ -0,0 +1,378 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+# shellcheck disable=SC2329
+
+source ../lib.sh
+
+ALL_TESTS="
+ test_clean_hsrv0
+ test_cut_link_hsrv0
+ test_packet_loss_hsrv0
+ test_high_packet_loss_hsrv0
+ test_reordering_hsrv0
+
+ test_clean_hsrv1
+ test_cut_link_hsrv1
+ test_packet_loss_hsrv1
+ test_high_packet_loss_hsrv1
+ test_reordering_hsrv1
+
+ test_clean_prp
+ test_cut_link_prp
+ test_packet_loss_prp
+ test_high_packet_loss_prp
+ test_reordering_prp
+"
+
+# The tests are running ping for 5sec with a relatively short interval in
+# different scenarios with faulty links (cut links, packet loss, delay,
+# reordering) that should be recoverable by HSR/PRP. The ping interval (10ms)
+# is short enough that the base delay (50ms) leads to a queue in the netem
+# qdiscs which is needed for reordering.
+
+setup_hsr_topo()
+{
+ # Three HSR nodes in a ring, every node has a LAN A interface connected
+ # to the LAN B interface of the next node.
+ #
+ # node1 node2
+ #
+ # vethA -------- vethB
+ # hsr1 hsr2
+ # vethB vethA
+ # \ /
+ # vethA vethB
+ # hsr3
+ #
+ # node3
+
+ local ver="$1"
+
+ setup_ns node1 node2 node3
+
+ # veth links
+ # shellcheck disable=SC2154 # variables assigned by setup_ns
+ ip link add vethA netns "$node1" type veth peer name vethB netns "$node2"
+ # shellcheck disable=SC2154 # variables assigned by setup_ns
+ ip link add vethA netns "$node2" type veth peer name vethB netns "$node3"
+ ip link add vethA netns "$node3" type veth peer name vethB netns "$node1"
+
+ # MAC addresses (not needed for HSR operation, but helps with debugging)
+ ip -net "$node1" link set address 00:11:22:00:01:01 dev vethA
+ ip -net "$node1" link set address 00:11:22:00:01:02 dev vethB
+
+ ip -net "$node2" link set address 00:11:22:00:02:01 dev vethA
+ ip -net "$node2" link set address 00:11:22:00:02:02 dev vethB
+
+ ip -net "$node3" link set address 00:11:22:00:03:01 dev vethA
+ ip -net "$node3" link set address 00:11:22:00:03:02 dev vethB
+
+ # HSR interfaces
+ ip -net "$node1" link add name hsr1 type hsr proto 0 version "$ver" \
+ slave1 vethA slave2 vethB supervision 45
+ ip -net "$node2" link add name hsr2 type hsr proto 0 version "$ver" \
+ slave1 vethA slave2 vethB supervision 45
+ ip -net "$node3" link add name hsr3 type hsr proto 0 version "$ver" \
+ slave1 vethA slave2 vethB supervision 45
+
+ # IP addresses
+ ip -net "$node1" addr add 100.64.0.1/24 dev hsr1
+ ip -net "$node2" addr add 100.64.0.2/24 dev hsr2
+ ip -net "$node3" addr add 100.64.0.3/24 dev hsr3
+
+ # Set all links up
+ ip -net "$node1" link set vethA up
+ ip -net "$node1" link set vethB up
+ ip -net "$node1" link set hsr1 up
+
+ ip -net "$node2" link set vethA up
+ ip -net "$node2" link set vethB up
+ ip -net "$node2" link set hsr2 up
+
+ ip -net "$node3" link set vethA up
+ ip -net "$node3" link set vethB up
+ ip -net "$node3" link set hsr3 up
+}
+
+setup_prp_topo()
+{
+ # Two PRP nodes, connected by two links (treated as LAN A and LAN B).
+ #
+ # vethA ----- vethA
+ # prp1 prp2
+ # vethB ----- vethB
+ #
+ # node1 node2
+
+ setup_ns node1 node2
+
+ # veth links
+ ip link add vethA netns "$node1" type veth peer name vethA netns "$node2"
+ ip link add vethB netns "$node1" type veth peer name vethB netns "$node2"
+
+ # MAC addresses will be copied from LAN A interface
+ ip -net "$node1" link set address 00:11:22:00:00:01 dev vethA
+ ip -net "$node2" link set address 00:11:22:00:00:02 dev vethA
+
+ # PRP interfaces
+ ip -net "$node1" link add name prp1 type hsr \
+ slave1 vethA slave2 vethB supervision 45 proto 1
+ ip -net "$node2" link add name prp2 type hsr \
+ slave1 vethA slave2 vethB supervision 45 proto 1
+
+ # IP addresses
+ ip -net "$node1" addr add 100.64.0.1/24 dev prp1
+ ip -net "$node2" addr add 100.64.0.2/24 dev prp2
+
+ # All links up
+ ip -net "$node1" link set vethA up
+ ip -net "$node1" link set vethB up
+ ip -net "$node1" link set prp1 up
+
+ ip -net "$node2" link set vethA up
+ ip -net "$node2" link set vethB up
+ ip -net "$node2" link set prp2 up
+}
+
+wait_for_hsr_node_table()
+{
+ log_info "Wait for node table entries to be merged."
+ WAIT=5
+ while [ "${WAIT}" -gt 0 ]; do
+ nts=$(cat /sys/kernel/debug/hsr/hsr*/node_table)
+
+ # We need entries in the node tables, and they need to be merged
+ if (echo "$nts" | grep -qE "^([0-9a-f]{2}:){5}") && \
+ ! (echo "$nts" | grep -q "00:00:00:00:00:00"); then
+ return
+ fi
+
+ sleep 1
+ ((WAIT--))
+ done
+ check_err 1 "Failed to wait for merged node table entries"
+}
+
+setup_topo()
+{
+ local proto="$1"
+
+ if [ "$proto" = "HSRv0" ]; then
+ setup_hsr_topo 0
+ wait_for_hsr_node_table
+ elif [ "$proto" = "HSRv1" ]; then
+ setup_hsr_topo 1
+ wait_for_hsr_node_table
+ elif [ "$proto" = "PRP" ]; then
+ setup_prp_topo
+ else
+ check_err 1 "Unknown protocol (${proto})"
+ fi
+}
+
+check_ping()
+{
+ local node="$1"
+ local dst="$2"
+ local accepted_dups="$3"
+ local ping_args="-q -i 0.01 -c 400"
+
+ log_info "Running ping $node -> $dst"
+ # shellcheck disable=SC2086
+ output=$(ip netns exec "$node" ping $ping_args "$dst" | \
+ grep "packets transmitted")
+ log_info "$output"
+
+ dups=0
+ loss=0
+
+ if [[ "$output" =~ \+([0-9]+)" duplicates" ]]; then
+ dups="${BASH_REMATCH[1]}"
+ fi
+ if [[ "$output" =~ ([0-9\.]+\%)" packet loss" ]]; then
+ loss="${BASH_REMATCH[1]}"
+ fi
+
+ if [ "$dups" -gt "$accepted_dups" ]; then
+ check_err 1 "Unexpected duplicate packets (${dups})"
+ fi
+ if [ "$loss" != "0%" ]; then
+ check_err 1 "Unexpected packet loss (${loss})"
+ fi
+}
+
+test_clean()
+{
+ local proto="$1"
+
+ RET=0
+ tname="${FUNCNAME[0]} - ${proto}"
+
+ setup_topo "$proto"
+ if ((RET != ksft_pass)); then
+ log_test "${tname} setup"
+ return
+ fi
+
+ check_ping "$node1" "100.64.0.2" 0
+
+ log_test "${tname}"
+}
+
+test_clean_hsrv0()
+{
+ test_clean "HSRv0"
+}
+
+test_clean_hsrv1()
+{
+ test_clean "HSRv1"
+}
+
+test_clean_prp()
+{
+ test_clean "PRP"
+}
+
+test_cut_link()
+{
+ local proto="$1"
+
+ RET=0
+ tname="${FUNCNAME[0]} - ${proto}"
+
+ setup_topo "$proto"
+ if ((RET != ksft_pass)); then
+ log_test "${tname} setup"
+ return
+ fi
+
+ # Cutting link from subshell, so check_ping can run in the normal shell
+ # with access to global variables from the test harness.
+ (
+ sleep 2
+ log_info "Cutting link"
+ ip -net "$node1" link set vethB down
+ ) &
+ check_ping "$node1" "100.64.0.2" 0
+
+ wait
+ log_test "${tname}"
+}
+
+
+test_cut_link_hsrv0()
+{
+ test_cut_link "HSRv0"
+}
+
+test_cut_link_hsrv1()
+{
+ test_cut_link "HSRv1"
+}
+
+test_cut_link_prp()
+{
+ test_cut_link "PRP"
+}
+
+test_packet_loss()
+{
+ local proto="$1"
+ local loss="$2"
+
+ RET=0
+ tname="${FUNCNAME[0]} - ${proto}, ${loss}"
+
+ setup_topo "$proto"
+ if ((RET != ksft_pass)); then
+ log_test "${tname} setup"
+ return
+ fi
+
+ # Packet loss with lower delay makes sure the packets on the lossy link
+ # arrive first.
+ tc -net "$node1" qdisc add dev vethA root netem delay 50ms
+ tc -net "$node1" qdisc add dev vethB root netem delay 20ms loss "$loss"
+
+ check_ping "$node1" "100.64.0.2" 40
+
+ log_test "${tname}"
+}
+
+test_packet_loss_hsrv0()
+{
+ test_packet_loss "HSRv0" "20%"
+}
+
+test_packet_loss_hsrv1()
+{
+ test_packet_loss "HSRv1" "20%"
+}
+
+test_packet_loss_prp()
+{
+ test_packet_loss "PRP" "20%"
+}
+
+test_high_packet_loss_hsrv0()
+{
+ test_packet_loss "HSRv0" "80%"
+}
+
+test_high_packet_loss_hsrv1()
+{
+ test_packet_loss "HSRv1" "80%"
+}
+
+test_high_packet_loss_prp()
+{
+ test_packet_loss "PRP" "80%"
+}
+
+test_reordering()
+{
+ local proto="$1"
+
+ RET=0
+ tname="${FUNCNAME[0]} - ${proto}"
+
+ setup_topo "$proto"
+ if ((RET != ksft_pass)); then
+ log_test "${tname} setup"
+ return
+ fi
+
+ tc -net "$node1" qdisc add dev vethA root netem delay 50ms
+ tc -net "$node1" qdisc add dev vethB root netem delay 50ms reorder 20%
+
+ check_ping "$node1" "100.64.0.2" 40
+
+ log_test "${tname}"
+}
+
+test_reordering_hsrv0()
+{
+ test_reordering "HSRv0"
+}
+
+test_reordering_hsrv1()
+{
+ test_reordering "HSRv1"
+}
+
+test_reordering_prp()
+{
+ test_reordering "PRP"
+}
+
+cleanup()
+{
+ cleanup_all_ns
+}
+
+trap cleanup EXIT
+
+tests_run
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/net/hsr/prp_ping.sh b/tools/testing/selftests/net/hsr/prp_ping.sh
new file mode 100755
index 000000000000..fd2ba9f05d4c
--- /dev/null
+++ b/tools/testing/selftests/net/hsr/prp_ping.sh
@@ -0,0 +1,147 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+ipv6=true
+
+source ./hsr_common.sh
+
+optstring="h4"
+usage() {
+ echo "Usage: $0 [OPTION]"
+ echo -e "\t-4: IPv4 only: disable IPv6 tests (default: test both IPv4 and IPv6)"
+}
+
+while getopts "$optstring" option;do
+ case "$option" in
+ "h")
+ usage "$0"
+ exit 0
+ ;;
+ "4")
+ ipv6=false
+ ;;
+ "?")
+ usage "$0"
+ exit 1
+ ;;
+esac
+done
+
+setup_prp_interfaces()
+{
+ echo "INFO: Preparing interfaces for PRP"
+# Two PRP nodes, connected by two links (treated as LAN A and LAN B).
+#
+# vethA ----- vethA
+# prp1 prp2
+# vethB ----- vethB
+#
+# node1 node2
+
+ # Interfaces
+ # shellcheck disable=SC2154 # variables assigned by setup_ns
+ ip link add vethA netns "$node1" type veth peer name vethA netns "$node2"
+ ip link add vethB netns "$node1" type veth peer name vethB netns "$node2"
+
+ # MAC addresses will be copied from LAN A interface
+ ip -net "$node1" link set address 00:11:22:00:00:01 dev vethA
+ ip -net "$node2" link set address 00:11:22:00:00:02 dev vethA
+
+ # PRP
+ ip -net "$node1" link add name prp1 type hsr \
+ slave1 vethA slave2 vethB supervision 45 proto 1
+ ip -net "$node2" link add name prp2 type hsr \
+ slave1 vethA slave2 vethB supervision 45 proto 1
+
+ # IP addresses
+ ip -net "$node1" addr add 100.64.0.1/24 dev prp1
+ ip -net "$node1" addr add dead:beef:0::1/64 dev prp1 nodad
+ ip -net "$node2" addr add 100.64.0.2/24 dev prp2
+ ip -net "$node2" addr add dead:beef:0::2/64 dev prp2 nodad
+
+ # All links up
+ ip -net "$node1" link set vethA up
+ ip -net "$node1" link set vethB up
+ ip -net "$node1" link set prp1 up
+
+ ip -net "$node2" link set vethA up
+ ip -net "$node2" link set vethB up
+ ip -net "$node2" link set prp2 up
+}
+
+setup_vlan_interfaces()
+{
+ # Interfaces
+ ip -net "$node1" link add link prp1 name prp1.2 type vlan id 2
+ ip -net "$node2" link add link prp2 name prp2.2 type vlan id 2
+
+ # IP addresses
+ ip -net "$node1" addr add 100.64.2.1/24 dev prp1.2
+ ip -net "$node1" addr add dead:beef:2::1/64 dev prp1.2 nodad
+
+ ip -net "$node2" addr add 100.64.2.2/24 dev prp2.2
+ ip -net "$node2" addr add dead:beef:2::2/64 dev prp2.2 nodad
+
+ # All links up
+ ip -net "$node1" link set prp1.2 up
+ ip -net "$node2" link set prp2.2 up
+}
+
+do_ping_tests()
+{
+ local netid="$1"
+
+ echo "INFO: Initial validation ping"
+
+ do_ping "$node1" "100.64.$netid.2"
+ do_ping "$node2" "100.64.$netid.1"
+ stop_if_error "Initial validation failed on IPv4"
+
+ do_ping "$node1" "dead:beef:$netid::2"
+ do_ping "$node2" "dead:beef:$netid::1"
+ stop_if_error "Initial validation failed on IPv6"
+
+ echo "INFO: Longer ping test."
+
+ do_ping_long "$node1" "100.64.$netid.2"
+ do_ping_long "$node2" "100.64.$netid.1"
+ stop_if_error "Longer ping test failed on IPv4."
+
+ do_ping_long "$node1" "dead:beef:$netid::2"
+ do_ping_long "$node2" "dead:beef:$netid::1"
+ stop_if_error "Longer ping test failed on IPv6."
+}
+
+run_ping_tests()
+{
+ echo "INFO: Running ping tests"
+ do_ping_tests 0
+}
+
+run_vlan_ping_tests()
+{
+ vlan_challenged_prp1=$(ip net exec "$node1" ethtool -k prp1 | \
+ grep "vlan-challenged" | awk '{print $2}')
+ vlan_challenged_prp2=$(ip net exec "$node2" ethtool -k prp2 | \
+ grep "vlan-challenged" | awk '{print $2}')
+
+ if [[ "$vlan_challenged_prp1" = "off" || \
+ "$vlan_challenged_prp2" = "off" ]]; then
+ echo "INFO: Running VLAN ping tests"
+ setup_vlan_interfaces
+ do_ping_tests 2
+ else
+ echo "INFO: Not Running VLAN tests as the device does not support VLAN"
+ fi
+}
+
+check_prerequisites
+trap cleanup_all_ns EXIT
+
+setup_ns node1 node2
+setup_prp_interfaces
+
+run_ping_tests
+run_vlan_ping_tests
+
+exit $ret
diff --git a/tools/testing/selftests/net/hsr/settings b/tools/testing/selftests/net/hsr/settings
index 0fbc037f2aa8..a953c96aa16e 100644
--- a/tools/testing/selftests/net/hsr/settings
+++ b/tools/testing/selftests/net/hsr/settings
@@ -1 +1 @@
-timeout=50
+timeout=180
diff --git a/tools/testing/selftests/net/icmp_rfc4884.c b/tools/testing/selftests/net/icmp_rfc4884.c
new file mode 100644
index 000000000000..cd826b913557
--- /dev/null
+++ b/tools/testing/selftests/net/icmp_rfc4884.c
@@ -0,0 +1,679 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <arpa/inet.h>
+#include <error.h>
+#include <linux/errqueue.h>
+#include <linux/icmp.h>
+#include <linux/icmpv6.h>
+#include <linux/in6.h>
+#include <linux/ip.h>
+#include <linux/ipv6.h>
+#include <netinet/in.h>
+#include <netinet/udp.h>
+#include <poll.h>
+#include <sched.h>
+#include <stdbool.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <sys/ioctl.h>
+#include <sys/socket.h>
+
+#include "../kselftest_harness.h"
+
+static const unsigned short src_port = 44444;
+static const unsigned short dst_port = 55555;
+static const int min_orig_dgram_len = 128;
+static const int min_payload_len_v4 =
+ min_orig_dgram_len - sizeof(struct iphdr) - sizeof(struct udphdr);
+static const int min_payload_len_v6 =
+ min_orig_dgram_len - sizeof(struct ipv6hdr) - sizeof(struct udphdr);
+static const uint8_t orig_payload_byte = 0xAA;
+
+struct sockaddr_inet {
+ union {
+ struct sockaddr_in6 v6;
+ struct sockaddr_in v4;
+ struct sockaddr sa;
+ };
+ socklen_t len;
+};
+
+struct ip_case_info {
+ int domain;
+ int level;
+ int opt1;
+ int opt2;
+ int proto;
+ int (*build_func)(uint8_t *buf, ssize_t buflen, bool with_ext,
+ int payload_len, bool bad_csum, bool bad_len,
+ bool smaller_len);
+ int min_payload;
+};
+
+static int bringup_loopback(void)
+{
+ struct ifreq ifr = {
+ .ifr_name = "lo"
+ };
+ int fd;
+
+ fd = socket(AF_INET, SOCK_DGRAM, 0);
+ if (fd < 0)
+ return -1;
+
+ if (ioctl(fd, SIOCGIFFLAGS, &ifr) < 0)
+ goto err;
+
+ ifr.ifr_flags = ifr.ifr_flags | IFF_UP;
+
+ if (ioctl(fd, SIOCSIFFLAGS, &ifr) < 0)
+ goto err;
+
+ close(fd);
+ return 0;
+
+err:
+ close(fd);
+ return -1;
+}
+
+static uint16_t csum(const void *buf, size_t len)
+{
+ const uint8_t *data = buf;
+ uint32_t sum = 0;
+
+ while (len > 1) {
+ sum += (data[0] << 8) | data[1];
+ data += 2;
+ len -= 2;
+ }
+
+ if (len == 1)
+ sum += data[0] << 8;
+
+ while (sum >> 16)
+ sum = (sum & 0xFFFF) + (sum >> 16);
+
+ return ~sum & 0xFFFF;
+}
+
+static int poll_err(int fd)
+{
+ struct pollfd pfd;
+
+ memset(&pfd, 0, sizeof(pfd));
+ pfd.fd = fd;
+
+ if (poll(&pfd, 1, 5000) != 1 || pfd.revents != POLLERR)
+ return -1;
+
+ return 0;
+}
+
+static void set_addr(struct sockaddr_inet *addr, int domain,
+ unsigned short port)
+{
+ memset(addr, 0, sizeof(*addr));
+
+ switch (domain) {
+ case AF_INET:
+ addr->v4.sin_family = AF_INET;
+ addr->v4.sin_port = htons(port);
+ addr->v4.sin_addr.s_addr = htonl(INADDR_LOOPBACK);
+ addr->len = sizeof(addr->v4);
+ break;
+ case AF_INET6:
+ addr->v6.sin6_family = AF_INET6;
+ addr->v6.sin6_port = htons(port);
+ addr->v6.sin6_addr = in6addr_loopback;
+ addr->len = sizeof(addr->v6);
+ break;
+ }
+}
+
+static int bind_and_setsockopt(int fd, const struct ip_case_info *info)
+{
+ struct sockaddr_inet addr;
+ int opt = 1;
+
+ set_addr(&addr, info->domain, src_port);
+
+ if (setsockopt(fd, info->level, info->opt1, &opt, sizeof(opt)) < 0)
+ return -1;
+
+ if (setsockopt(fd, info->level, info->opt2, &opt, sizeof(opt)) < 0)
+ return -1;
+
+ return bind(fd, &addr.sa, addr.len);
+}
+
+static int build_rfc4884_ext(uint8_t *buf, size_t buflen, bool bad_csum,
+ bool bad_len, bool smaller_len)
+{
+ struct icmp_extobj_hdr *objh;
+ struct icmp_ext_hdr *exthdr;
+ size_t obj_len, ext_len;
+ uint16_t sum;
+
+ /* Use an object payload of 4 bytes */
+ obj_len = sizeof(*objh) + sizeof(uint32_t);
+ ext_len = sizeof(*exthdr) + obj_len;
+
+ if (ext_len > buflen)
+ return -EINVAL;
+
+ exthdr = (struct icmp_ext_hdr *)buf;
+ objh = (struct icmp_extobj_hdr *)(buf + sizeof(*exthdr));
+
+ exthdr->version = 2;
+ /* When encoding a bad object length, either encode a length too small
+ * to fit the object header or too big to fit in the packet.
+ */
+ if (bad_len)
+ obj_len = smaller_len ? sizeof(*objh) - 1 : obj_len * 2;
+ objh->length = htons(obj_len);
+
+ sum = csum(buf, ext_len);
+ exthdr->checksum = htons(bad_csum ? sum - 1 : sum);
+
+ return ext_len;
+}
+
+static int build_orig_dgram_v4(uint8_t *buf, ssize_t buflen, int payload_len)
+{
+ struct udphdr *udph;
+ struct iphdr *iph;
+ size_t len = 0;
+
+ len = sizeof(*iph) + sizeof(*udph) + payload_len;
+ if (len > buflen)
+ return -EINVAL;
+
+ iph = (struct iphdr *)buf;
+ udph = (struct udphdr *)(buf + sizeof(*iph));
+
+ iph->version = 4;
+ iph->ihl = 5;
+ iph->protocol = IPPROTO_UDP;
+ iph->saddr = htonl(INADDR_LOOPBACK);
+ iph->daddr = htonl(INADDR_LOOPBACK);
+ iph->tot_len = htons(len);
+ iph->check = htons(csum(iph, sizeof(*iph)));
+
+ udph->source = htons(src_port);
+ udph->dest = htons(dst_port);
+ udph->len = htons(sizeof(*udph) + payload_len);
+
+ memset(buf + sizeof(*iph) + sizeof(*udph), orig_payload_byte,
+ payload_len);
+
+ return len;
+}
+
+static int build_orig_dgram_v6(uint8_t *buf, ssize_t buflen, int payload_len)
+{
+ struct udphdr *udph;
+ struct ipv6hdr *iph;
+ size_t len = 0;
+
+ len = sizeof(*iph) + sizeof(*udph) + payload_len;
+ if (len > buflen)
+ return -EINVAL;
+
+ iph = (struct ipv6hdr *)buf;
+ udph = (struct udphdr *)(buf + sizeof(*iph));
+
+ iph->version = 6;
+ iph->payload_len = htons(sizeof(*udph) + payload_len);
+ iph->nexthdr = IPPROTO_UDP;
+ iph->saddr = in6addr_loopback;
+ iph->daddr = in6addr_loopback;
+
+ udph->source = htons(src_port);
+ udph->dest = htons(dst_port);
+ udph->len = htons(sizeof(*udph) + payload_len);
+
+ memset(buf + sizeof(*iph) + sizeof(*udph), orig_payload_byte,
+ payload_len);
+
+ return len;
+}
+
+static int build_icmpv4_pkt(uint8_t *buf, ssize_t buflen, bool with_ext,
+ int payload_len, bool bad_csum, bool bad_len,
+ bool smaller_len)
+{
+ struct icmphdr *icmph;
+ int len, ret;
+
+ len = sizeof(*icmph);
+ memset(buf, 0, buflen);
+
+ icmph = (struct icmphdr *)buf;
+ icmph->type = ICMP_DEST_UNREACH;
+ icmph->code = ICMP_PORT_UNREACH;
+ icmph->checksum = 0;
+
+ ret = build_orig_dgram_v4(buf + len, buflen - len, payload_len);
+ if (ret < 0)
+ return ret;
+
+ len += ret;
+
+ icmph->un.reserved[1] = (len - sizeof(*icmph)) / sizeof(uint32_t);
+
+ if (with_ext) {
+ ret = build_rfc4884_ext(buf + len, buflen - len,
+ bad_csum, bad_len, smaller_len);
+ if (ret < 0)
+ return ret;
+
+ len += ret;
+ }
+
+ icmph->checksum = htons(csum(icmph, len));
+ return len;
+}
+
+static int build_icmpv6_pkt(uint8_t *buf, ssize_t buflen, bool with_ext,
+ int payload_len, bool bad_csum, bool bad_len,
+ bool smaller_len)
+{
+ struct icmp6hdr *icmph;
+ int len, ret;
+
+ len = sizeof(*icmph);
+ memset(buf, 0, buflen);
+
+ icmph = (struct icmp6hdr *)buf;
+ icmph->icmp6_type = ICMPV6_DEST_UNREACH;
+ icmph->icmp6_code = ICMPV6_PORT_UNREACH;
+ icmph->icmp6_cksum = 0;
+
+ ret = build_orig_dgram_v6(buf + len, buflen - len, payload_len);
+ if (ret < 0)
+ return ret;
+
+ len += ret;
+
+ icmph->icmp6_datagram_len = (len - sizeof(*icmph)) / sizeof(uint64_t);
+
+ if (with_ext) {
+ ret = build_rfc4884_ext(buf + len, buflen - len,
+ bad_csum, bad_len, smaller_len);
+ if (ret < 0)
+ return ret;
+
+ len += ret;
+ }
+
+ icmph->icmp6_cksum = htons(csum(icmph, len));
+ return len;
+}
+
+FIXTURE(rfc4884) {};
+
+FIXTURE_SETUP(rfc4884)
+{
+ int ret;
+
+ ret = unshare(CLONE_NEWNET);
+ ASSERT_EQ(ret, 0) {
+ TH_LOG("unshare(CLONE_NEWNET) failed: %s", strerror(errno));
+ }
+
+ ret = bringup_loopback();
+ ASSERT_EQ(ret, 0) TH_LOG("Failed to bring up loopback interface");
+}
+
+FIXTURE_TEARDOWN(rfc4884)
+{
+}
+
+const struct ip_case_info ipv4_info = {
+ .domain = AF_INET,
+ .level = SOL_IP,
+ .opt1 = IP_RECVERR,
+ .opt2 = IP_RECVERR_RFC4884,
+ .proto = IPPROTO_ICMP,
+ .build_func = build_icmpv4_pkt,
+ .min_payload = min_payload_len_v4,
+};
+
+const struct ip_case_info ipv6_info = {
+ .domain = AF_INET6,
+ .level = SOL_IPV6,
+ .opt1 = IPV6_RECVERR,
+ .opt2 = IPV6_RECVERR_RFC4884,
+ .proto = IPPROTO_ICMPV6,
+ .build_func = build_icmpv6_pkt,
+ .min_payload = min_payload_len_v6,
+};
+
+FIXTURE_VARIANT(rfc4884) {
+ /* IPv4/v6 related information */
+ struct ip_case_info info;
+ /* Whether to append an ICMP extension or not */
+ bool with_ext;
+ /* UDP payload length */
+ int payload_len;
+ /* Whether to generate a bad checksum in the ICMP extension structure */
+ bool bad_csum;
+ /* Whether to generate a bad length in the ICMP object header */
+ bool bad_len;
+ /* Whether it is too small to fit the object header or too big to fit
+ * in the packet
+ */
+ bool smaller_len;
+};
+
+/* Tests that a valid ICMPv4 error message with extension and the original
+ * datagram is smaller than 128 bytes, generates an error with zero offset,
+ * and does not raise the SO_EE_RFC4884_FLAG_INVALID flag.
+ */
+FIXTURE_VARIANT_ADD(rfc4884, ipv4_ext_small_payload) {
+ .info = ipv4_info,
+ .with_ext = true,
+ .payload_len = 64,
+ .bad_csum = false,
+ .bad_len = false,
+};
+
+/* Tests that a valid ICMPv4 error message with extension and 128 bytes original
+ * datagram, generates an error with the expected offset, and does not raise the
+ * SO_EE_RFC4884_FLAG_INVALID flag.
+ */
+FIXTURE_VARIANT_ADD(rfc4884, ipv4_ext) {
+ .info = ipv4_info,
+ .with_ext = true,
+ .payload_len = min_payload_len_v4,
+ .bad_csum = false,
+ .bad_len = false,
+};
+
+/* Tests that a valid ICMPv4 error message with extension and the original
+ * datagram is larger than 128 bytes, generates an error with the expected
+ * offset, and does not raise the SO_EE_RFC4884_FLAG_INVALID flag.
+ */
+FIXTURE_VARIANT_ADD(rfc4884, ipv4_ext_large_payload) {
+ .info = ipv4_info,
+ .with_ext = true,
+ .payload_len = 256,
+ .bad_csum = false,
+ .bad_len = false,
+};
+
+/* Tests that a valid ICMPv4 error message without extension and the original
+ * datagram is smaller than 128 bytes, generates an error with zero offset,
+ * and does not raise the SO_EE_RFC4884_FLAG_INVALID flag.
+ */
+FIXTURE_VARIANT_ADD(rfc4884, ipv4_no_ext_small_payload) {
+ .info = ipv4_info,
+ .with_ext = false,
+ .payload_len = 64,
+ .bad_csum = false,
+ .bad_len = false,
+};
+
+/* Tests that a valid ICMPv4 error message without extension and 128 bytes
+ * original datagram, generates an error with zero offset, and does not raise
+ * the SO_EE_RFC4884_FLAG_INVALID flag.
+ */
+FIXTURE_VARIANT_ADD(rfc4884, ipv4_no_ext_min_payload) {
+ .info = ipv4_info,
+ .with_ext = false,
+ .payload_len = min_payload_len_v4,
+ .bad_csum = false,
+ .bad_len = false,
+};
+
+/* Tests that a valid ICMPv4 error message without extension and the original
+ * datagram is larger than 128 bytes, generates an error with zero offset,
+ * and does not raise the SO_EE_RFC4884_FLAG_INVALID flag.
+ */
+FIXTURE_VARIANT_ADD(rfc4884, ipv4_no_ext_large_payload) {
+ .info = ipv4_info,
+ .with_ext = false,
+ .payload_len = 256,
+ .bad_csum = false,
+ .bad_len = false,
+};
+
+/* Tests that an ICMPv4 error message with extension and an invalid checksum,
+ * generates an error with the expected offset, and raises the
+ * SO_EE_RFC4884_FLAG_INVALID flag.
+ */
+FIXTURE_VARIANT_ADD(rfc4884, ipv4_invalid_ext_checksum) {
+ .info = ipv4_info,
+ .with_ext = true,
+ .payload_len = min_payload_len_v4,
+ .bad_csum = true,
+ .bad_len = false,
+};
+
+/* Tests that an ICMPv4 error message with extension and an object length
+ * smaller than the object header, generates an error with the expected offset,
+ * and raises the SO_EE_RFC4884_FLAG_INVALID flag.
+ */
+FIXTURE_VARIANT_ADD(rfc4884, ipv4_invalid_ext_length_small) {
+ .info = ipv4_info,
+ .with_ext = true,
+ .payload_len = min_payload_len_v4,
+ .bad_csum = false,
+ .bad_len = true,
+ .smaller_len = true,
+};
+
+/* Tests that an ICMPv4 error message with extension and an object length that
+ * is too big to fit in the packet, generates an error with the expected offset,
+ * and raises the SO_EE_RFC4884_FLAG_INVALID flag.
+ */
+FIXTURE_VARIANT_ADD(rfc4884, ipv4_invalid_ext_length_large) {
+ .info = ipv4_info,
+ .with_ext = true,
+ .payload_len = min_payload_len_v4,
+ .bad_csum = false,
+ .bad_len = true,
+ .smaller_len = false,
+};
+
+/* Tests that a valid ICMPv6 error message with extension and the original
+ * datagram is smaller than 128 bytes, generates an error with zero offset,
+ * and does not raise the SO_EE_RFC4884_FLAG_INVALID flag.
+ */
+FIXTURE_VARIANT_ADD(rfc4884, ipv6_ext_small_payload) {
+ .info = ipv6_info,
+ .with_ext = true,
+ .payload_len = 64,
+ .bad_csum = false,
+ .bad_len = false,
+};
+
+/* Tests that a valid ICMPv6 error message with extension and 128 bytes original
+ * datagram, generates an error with the expected offset, and does not raise the
+ * SO_EE_RFC4884_FLAG_INVALID flag.
+ */
+FIXTURE_VARIANT_ADD(rfc4884, ipv6_ext) {
+ .info = ipv6_info,
+ .with_ext = true,
+ .payload_len = min_payload_len_v6,
+ .bad_csum = false,
+ .bad_len = false,
+};
+
+/* Tests that a valid ICMPv6 error message with extension and the original
+ * datagram is larger than 128 bytes, generates an error with the expected
+ * offset, and does not raise the SO_EE_RFC4884_FLAG_INVALID flag.
+ */
+FIXTURE_VARIANT_ADD(rfc4884, ipv6_ext_large_payload) {
+ .info = ipv6_info,
+ .with_ext = true,
+ .payload_len = 256,
+ .bad_csum = false,
+ .bad_len = false,
+};
+/* Tests that a valid ICMPv6 error message without extension and the original
+ * datagram is smaller than 128 bytes, generates an error with zero offset,
+ * and does not raise the SO_EE_RFC4884_FLAG_INVALID flag.
+ */
+FIXTURE_VARIANT_ADD(rfc4884, ipv6_no_ext_small_payload) {
+ .info = ipv6_info,
+ .with_ext = false,
+ .payload_len = 64,
+ .bad_csum = false,
+ .bad_len = false,
+};
+
+/* Tests that a valid ICMPv6 error message without extension and 128 bytes
+ * original datagram, generates an error with zero offset, and does not
+ * raise the SO_EE_RFC4884_FLAG_INVALID flag.
+ */
+FIXTURE_VARIANT_ADD(rfc4884, ipv6_no_ext_min_payload) {
+ .info = ipv6_info,
+ .with_ext = false,
+ .payload_len = min_payload_len_v6,
+ .bad_csum = false,
+ .bad_len = false,
+};
+
+/* Tests that a valid ICMPv6 error message without extension and the original
+ * datagram is larger than 128 bytes, generates an error with zero offset,
+ * and does not raise the SO_EE_RFC4884_FLAG_INVALID flag.
+ */
+FIXTURE_VARIANT_ADD(rfc4884, ipv6_no_ext_large_payload) {
+ .info = ipv6_info,
+ .with_ext = false,
+ .payload_len = 256,
+ .bad_csum = false,
+ .bad_len = false,
+};
+
+/* Tests that an ICMPv6 error message with extension and an invalid checksum,
+ * generates an error with the expected offset, and raises the
+ * SO_EE_RFC4884_FLAG_INVALID flag.
+ */
+FIXTURE_VARIANT_ADD(rfc4884, ipv6_invalid_ext_checksum) {
+ .info = ipv6_info,
+ .with_ext = true,
+ .payload_len = min_payload_len_v6,
+ .bad_csum = true,
+ .bad_len = false,
+};
+
+/* Tests that an ICMPv6 error message with extension and an object length
+ * smaller than the object header, generates an error with the expected offset,
+ * and raises the SO_EE_RFC4884_FLAG_INVALID flag.
+ */
+FIXTURE_VARIANT_ADD(rfc4884, ipv6_invalid_ext_length_small) {
+ .info = ipv6_info,
+ .with_ext = true,
+ .payload_len = min_payload_len_v6,
+ .bad_csum = false,
+ .bad_len = true,
+ .smaller_len = true,
+};
+
+/* Tests that an ICMPv6 error message with extension and an object length that
+ * is too big to fit in the packet, generates an error with the expected offset,
+ * and raises the SO_EE_RFC4884_FLAG_INVALID flag.
+ */
+FIXTURE_VARIANT_ADD(rfc4884, ipv6_invalid_ext_length_large) {
+ .info = ipv6_info,
+ .with_ext = true,
+ .payload_len = min_payload_len_v6,
+ .bad_csum = false,
+ .bad_len = true,
+ .smaller_len = false,
+};
+
+static void
+check_rfc4884_offset(struct __test_metadata *_metadata, int sock,
+ const FIXTURE_VARIANT(rfc4884) *v)
+{
+ char rxbuf[1024];
+ char ctrl[1024];
+ struct iovec iov = {
+ .iov_base = rxbuf,
+ .iov_len = sizeof(rxbuf)
+ };
+ struct msghdr msg = {
+ .msg_iov = &iov,
+ .msg_iovlen = 1,
+ .msg_control = ctrl,
+ .msg_controllen = sizeof(ctrl),
+ };
+ struct cmsghdr *cmsg;
+ int recv;
+
+ ASSERT_EQ(poll_err(sock), 0);
+
+ recv = recvmsg(sock, &msg, MSG_ERRQUEUE);
+ ASSERT_GE(recv, 0) TH_LOG("recvmsg(MSG_ERRQUEUE) failed");
+
+ for (cmsg = CMSG_FIRSTHDR(&msg); cmsg; cmsg = CMSG_NXTHDR(&msg, cmsg)) {
+ bool is_invalid, expected_invalid;
+ struct sock_extended_err *ee;
+ int expected_off;
+ uint16_t off;
+
+ if (cmsg->cmsg_level != v->info.level ||
+ cmsg->cmsg_type != v->info.opt1) {
+ TH_LOG("Unrelated cmsgs were encountered in recvmsg()");
+ continue;
+ }
+
+ ee = (struct sock_extended_err *)CMSG_DATA(cmsg);
+ off = ee->ee_rfc4884.len;
+ is_invalid = ee->ee_rfc4884.flags & SO_EE_RFC4884_FLAG_INVALID;
+
+ expected_invalid = v->bad_csum || v->bad_len;
+ ASSERT_EQ(is_invalid, expected_invalid) {
+ TH_LOG("Expected invalidity flag to be %d, but got %d",
+ expected_invalid, is_invalid);
+ }
+
+ expected_off =
+ (v->with_ext && v->payload_len >= v->info.min_payload) ?
+ v->payload_len : 0;
+ ASSERT_EQ(off, expected_off) {
+ TH_LOG("Expected RFC4884 offset %u, got %u",
+ expected_off, off);
+ }
+ break;
+ }
+}
+
+TEST_F(rfc4884, rfc4884)
+{
+ const typeof(variant) v = variant;
+ struct sockaddr_inet addr;
+ uint8_t pkt[1024];
+ int dgram, raw;
+ int len, sent;
+ int err;
+
+ dgram = socket(v->info.domain, SOCK_DGRAM, 0);
+ ASSERT_GE(dgram, 0) TH_LOG("Opening datagram socket failed");
+
+ err = bind_and_setsockopt(dgram, &v->info);
+ ASSERT_EQ(err, 0) TH_LOG("Bind failed");
+
+ raw = socket(v->info.domain, SOCK_RAW, v->info.proto);
+ ASSERT_GE(raw, 0) TH_LOG("Opening raw socket failed");
+
+ len = v->info.build_func(pkt, sizeof(pkt), v->with_ext, v->payload_len,
+ v->bad_csum, v->bad_len, v->smaller_len);
+ ASSERT_GT(len, 0) TH_LOG("Building packet failed");
+
+ set_addr(&addr, v->info.domain, 0);
+ sent = sendto(raw, pkt, len, 0, &addr.sa, addr.len);
+ ASSERT_EQ(len, sent) TH_LOG("Sending packet failed");
+
+ check_rfc4884_offset(_metadata, dgram, v);
+
+ close(dgram);
+ close(raw);
+}
+
+TEST_HARNESS_MAIN
diff --git a/tools/testing/selftests/net/ioam6.sh b/tools/testing/selftests/net/ioam6.sh
index 845c26dd01a9..b2b99889942f 100755
--- a/tools/testing/selftests/net/ioam6.sh
+++ b/tools/testing/selftests/net/ioam6.sh
@@ -273,8 +273,8 @@ setup()
ip -netns $ioam_node_beta link set ioam-veth-betaR name veth1 &>/dev/null
ip -netns $ioam_node_gamma link set ioam-veth-gamma name veth0 &>/dev/null
- ip -netns $ioam_node_alpha addr add 2001:db8:1::50/64 dev veth0 &>/dev/null
ip -netns $ioam_node_alpha addr add 2001:db8:1::2/64 dev veth0 &>/dev/null
+ ip -netns $ioam_node_alpha addr add 2001:db8:1::50/64 dev veth0 &>/dev/null
ip -netns $ioam_node_alpha link set veth0 up &>/dev/null
ip -netns $ioam_node_alpha link set lo up &>/dev/null
ip -netns $ioam_node_alpha route add 2001:db8:2::/64 \
diff --git a/tools/testing/selftests/net/ipsec.c b/tools/testing/selftests/net/ipsec.c
index 0ccf484b1d9d..f4afef51b930 100644
--- a/tools/testing/selftests/net/ipsec.c
+++ b/tools/testing/selftests/net/ipsec.c
@@ -43,6 +43,10 @@
#define BUILD_BUG_ON(condition) ((void)sizeof(char[1 - 2*!!(condition)]))
+#ifndef offsetof
+#define offsetof(TYPE, MEMBER) __builtin_offsetof(TYPE, MEMBER)
+#endif
+
#define IPV4_STR_SZ 16 /* xxx.xxx.xxx.xxx is longest + \0 */
#define MAX_PAYLOAD 2048
#define XFRM_ALGO_KEY_BUF_SIZE 512
@@ -827,13 +831,16 @@ static int xfrm_fill_key(char *name, char *buf,
static int xfrm_state_pack_algo(struct nlmsghdr *nh, size_t req_sz,
struct xfrm_desc *desc)
{
- struct {
+ union {
union {
struct xfrm_algo alg;
struct xfrm_algo_aead aead;
struct xfrm_algo_auth auth;
} u;
- char buf[XFRM_ALGO_KEY_BUF_SIZE];
+ struct {
+ unsigned char __offset_to_FAM[offsetof(struct xfrm_algo_auth, alg_key)];
+ char buf[XFRM_ALGO_KEY_BUF_SIZE];
+ };
} alg = {};
size_t alen, elen, clen, aelen;
unsigned short type;
diff --git a/tools/testing/selftests/net/lib/csum.c b/tools/testing/selftests/net/lib/csum.c
index 27437590eeb5..e28884ce3ab3 100644
--- a/tools/testing/selftests/net/lib/csum.c
+++ b/tools/testing/selftests/net/lib/csum.c
@@ -707,7 +707,7 @@ static uint32_t recv_get_packet_csum_status(struct msghdr *msg)
cm->cmsg_level, cm->cmsg_type);
if (cm->cmsg_len != CMSG_LEN(sizeof(struct tpacket_auxdata)))
- error(1, 0, "cmsg: len=%lu expected=%lu",
+ error(1, 0, "cmsg: len=%zu expected=%zu",
cm->cmsg_len, CMSG_LEN(sizeof(struct tpacket_auxdata)));
aux = (void *)CMSG_DATA(cm);
diff --git a/tools/testing/selftests/net/lib/py/ksft.py b/tools/testing/selftests/net/lib/py/ksft.py
index 531e7fa1b3ea..6cdfb8afccb5 100644
--- a/tools/testing/selftests/net/lib/py/ksft.py
+++ b/tools/testing/selftests/net/lib/py/ksft.py
@@ -8,7 +8,7 @@ import time
import traceback
from collections import namedtuple
from .consts import KSFT_MAIN_NAME
-from .utils import global_defer_queue
+from . import utils
KSFT_RESULT = None
KSFT_RESULT_ALL = True
@@ -32,8 +32,23 @@ class KsftTerminate(KeyboardInterrupt):
def ksft_pr(*objs, **kwargs):
+ """
+ Print logs to stdout.
+
+ Behaves like print() but log lines will be prefixed
+ with # to prevent breaking the TAP output formatting.
+
+ Extra arguments (on top of what print() supports):
+ line_pfx - add extra string before each line
+ """
+ sep = kwargs.pop("sep", " ")
+ pfx = kwargs.pop("line_pfx", "")
+ pfx = "#" + (" " + pfx if pfx else "")
kwargs["flush"] = True
- print("#", *objs, **kwargs)
+
+ text = sep.join(str(obj) for obj in objs)
+ prefixed = f"\n{pfx} ".join(text.split('\n'))
+ print(pfx, prefixed, **kwargs)
def _fail(*args):
@@ -153,21 +168,24 @@ def ktap_result(ok, cnt=1, case_name="", comment=""):
print(res, flush=True)
+def _ksft_defer_arm(state):
+ """ Allow or disallow the use of defer() """
+ utils.GLOBAL_DEFER_ARMED = state
+
+
def ksft_flush_defer():
global KSFT_RESULT
i = 0
- qlen_start = len(global_defer_queue)
- while global_defer_queue:
+ qlen_start = len(utils.GLOBAL_DEFER_QUEUE)
+ while utils.GLOBAL_DEFER_QUEUE:
i += 1
- entry = global_defer_queue.pop()
+ entry = utils.GLOBAL_DEFER_QUEUE.pop()
try:
entry.exec_only()
except Exception:
ksft_pr(f"Exception while handling defer / cleanup (callback {i} of {qlen_start})!")
- tb = traceback.format_exc()
- for line in tb.strip().split('\n'):
- ksft_pr("Defer Exception|", line)
+ ksft_pr(traceback.format_exc(), line_pfx="Defer Exception|")
KSFT_RESULT = False
@@ -315,6 +333,7 @@ def ksft_run(cases=None, globs=None, case_pfx=None, args=()):
comment = ""
cnt_key = ""
+ _ksft_defer_arm(True)
try:
func(*args)
except KsftSkipEx as e:
@@ -325,20 +344,17 @@ def ksft_run(cases=None, globs=None, case_pfx=None, args=()):
cnt_key = 'xfail'
except BaseException as e:
stop |= isinstance(e, KeyboardInterrupt)
- tb = traceback.format_exc()
- for line in tb.strip().split('\n'):
- ksft_pr("Exception|", line)
+ ksft_pr(traceback.format_exc(), line_pfx="Exception|")
if stop:
ksft_pr(f"Stopping tests due to {type(e).__name__}.")
KSFT_RESULT = False
cnt_key = 'fail'
+ _ksft_defer_arm(False)
try:
ksft_flush_defer()
except BaseException as e:
- tb = traceback.format_exc()
- for line in tb.strip().split('\n'):
- ksft_pr("Exception|", line)
+ ksft_pr(traceback.format_exc(), line_pfx="Exception|")
if isinstance(e, KeyboardInterrupt):
ksft_pr()
ksft_pr("WARN: defer() interrupted, cleanup may be incomplete.")
diff --git a/tools/testing/selftests/net/lib/py/utils.py b/tools/testing/selftests/net/lib/py/utils.py
index 106ee1f2df86..85884f3e827b 100644
--- a/tools/testing/selftests/net/lib/py/utils.py
+++ b/tools/testing/selftests/net/lib/py/utils.py
@@ -41,7 +41,9 @@ class cmd:
self.ret = None
self.ksft_term_fd = None
+ self.host = host
self.comm = comm
+
if host:
self.proc = host.cmd(comm)
else:
@@ -99,6 +101,27 @@ class cmd:
raise CmdExitFailure("Command failed: %s\nSTDOUT: %s\nSTDERR: %s" %
(self.proc.args, stdout, stderr), self)
+ def __repr__(self):
+ def str_fmt(name, s):
+ name += ': '
+ return (name + s.strip().replace('\n', '\n' + ' ' * len(name)))
+
+ ret = "CMD"
+ if self.host:
+ ret += "[remote]"
+ if self.ret is None:
+ ret += f" (unterminated): {self.comm}\n"
+ elif self.ret == 0:
+ ret += f" (success): {self.comm}\n"
+ else:
+ ret += f": {self.comm}\n"
+ ret += f" EXIT: {self.ret}\n"
+ if self.stdout:
+ ret += str_fmt(" STDOUT", self.stdout) + "\n"
+ if self.stderr:
+ ret += str_fmt(" STDERR", self.stderr) + "\n"
+ return ret.strip()
+
class bkg(cmd):
"""
@@ -137,11 +160,12 @@ class bkg(cmd):
def __exit__(self, ex_type, ex_value, ex_tb):
# Force termination on exception
- terminate = self.terminate or (self._exit_wait and ex_type)
+ terminate = self.terminate or (self._exit_wait and ex_type is not None)
return self.process(terminate=terminate, fail=self.check_fail)
-global_defer_queue = []
+GLOBAL_DEFER_QUEUE = []
+GLOBAL_DEFER_ARMED = False
class defer:
@@ -153,7 +177,9 @@ class defer:
self.args = args
self.kwargs = kwargs
- self._queue = global_defer_queue
+ if not GLOBAL_DEFER_ARMED:
+ raise Exception("defer queue not armed, did you use defer() outside of a test case?")
+ self._queue = GLOBAL_DEFER_QUEUE
self._queue.append(self)
def __enter__(self):
diff --git a/tools/testing/selftests/net/mptcp/Makefile b/tools/testing/selftests/net/mptcp/Makefile
index 4dd6278cd3dd..22ba0da2adb8 100644
--- a/tools/testing/selftests/net/mptcp/Makefile
+++ b/tools/testing/selftests/net/mptcp/Makefile
@@ -11,6 +11,7 @@ TEST_PROGS := \
mptcp_connect_checksum.sh \
mptcp_connect_mmap.sh \
mptcp_connect_sendfile.sh \
+ mptcp_connect_splice.sh \
mptcp_join.sh \
mptcp_sockopt.sh \
pm_netlink.sh \
diff --git a/tools/testing/selftests/net/mptcp/mptcp_connect.c b/tools/testing/selftests/net/mptcp/mptcp_connect.c
index 10f6f99cfd4e..cbe573c4ab3a 100644
--- a/tools/testing/selftests/net/mptcp/mptcp_connect.c
+++ b/tools/testing/selftests/net/mptcp/mptcp_connect.c
@@ -52,6 +52,7 @@ enum cfg_mode {
CFG_MODE_POLL,
CFG_MODE_MMAP,
CFG_MODE_SENDFILE,
+ CFG_MODE_SPLICE,
};
enum cfg_peek {
@@ -124,7 +125,7 @@ static void die_usage(void)
fprintf(stderr, "\t-j -- add additional sleep at connection start and tear down "
"-- for MPJ tests\n");
fprintf(stderr, "\t-l -- listens mode, accepts incoming connection\n");
- fprintf(stderr, "\t-m [poll|mmap|sendfile] -- use poll(default)/mmap+write/sendfile\n");
+ fprintf(stderr, "\t-m [poll|mmap|sendfile|splice] -- use poll(default)/mmap+write/sendfile/splice\n");
fprintf(stderr, "\t-M mark -- set socket packet mark\n");
fprintf(stderr, "\t-o option -- test sockopt <option>\n");
fprintf(stderr, "\t-p num -- use port num\n");
@@ -258,7 +259,7 @@ static void set_transparent(int fd, int pf)
}
}
-static void set_mptfo(int fd, int pf)
+static void set_mptfo(int fd)
{
int qlen = 25;
@@ -335,7 +336,7 @@ static int sock_listen_mptcp(const char * const listenaddr,
set_transparent(sock, pf);
if (cfg_sockopt_types.mptfo)
- set_mptfo(sock, pf);
+ set_mptfo(sock);
if (bind(sock, a->ai_addr, a->ai_addrlen) == 0)
break; /* success */
@@ -406,21 +407,18 @@ static int sock_connect_mptcp(const char * const remoteaddr,
*peer = a;
break; /* success */
}
+ perror("sendto()");
} else {
if (connect(sock, a->ai_addr, a->ai_addrlen) == 0) {
*peer = a;
break; /* success */
}
- }
- if (cfg_sockopt_types.mptfo) {
- perror("sendto()");
- close(sock);
- sock = -1;
- } else {
perror("connect()");
- close(sock);
- sock = -1;
}
+
+ /* error */
+ close(sock);
+ sock = -1;
}
freeaddrinfo(addr);
@@ -935,6 +933,71 @@ static int copyfd_io_sendfile(int infd, int peerfd, int outfd,
return err;
}
+static int do_splice(const int infd, const int outfd, const size_t len,
+ struct wstate *winfo)
+{
+ ssize_t in_bytes, out_bytes;
+ int pipefd[2];
+ int err;
+
+ err = pipe(pipefd);
+ if (err) {
+ perror("pipe");
+ return 2;
+ }
+
+again:
+ in_bytes = splice(infd, NULL, pipefd[1], NULL, len - winfo->total_len,
+ SPLICE_F_MOVE | SPLICE_F_MORE);
+ if (in_bytes < 0) {
+ perror("splice in");
+ err = 3;
+ } else if (in_bytes > 0) {
+ out_bytes = splice(pipefd[0], NULL, outfd, NULL, in_bytes,
+ SPLICE_F_MOVE | SPLICE_F_MORE);
+ if (out_bytes < 0) {
+ perror("splice out");
+ err = 4;
+ } else if (in_bytes != out_bytes) {
+ fprintf(stderr, "Unexpected transfer: %zu vs %zu\n",
+ in_bytes, out_bytes);
+ err = 5;
+ } else {
+ goto again;
+ }
+ }
+
+ close(pipefd[0]);
+ close(pipefd[1]);
+
+ return err;
+}
+
+static int copyfd_io_splice(int infd, int peerfd, int outfd, unsigned int size,
+ bool *in_closed_after_out, struct wstate *winfo)
+{
+ int err;
+
+ if (listen_mode) {
+ err = do_splice(peerfd, outfd, size, winfo);
+ if (err)
+ return err;
+
+ err = do_splice(infd, peerfd, size, winfo);
+ } else {
+ err = do_splice(infd, peerfd, size, winfo);
+ if (err)
+ return err;
+
+ shut_wr(peerfd);
+
+ err = do_splice(peerfd, outfd, size, winfo);
+ *in_closed_after_out = true;
+ }
+
+ return err;
+}
+
static int copyfd_io(int infd, int peerfd, int outfd, bool close_peerfd, struct wstate *winfo)
{
bool in_closed_after_out = false;
@@ -967,6 +1030,14 @@ static int copyfd_io(int infd, int peerfd, int outfd, bool close_peerfd, struct
&in_closed_after_out, winfo);
break;
+ case CFG_MODE_SPLICE:
+ file_size = get_infd_size(infd);
+ if (file_size < 0)
+ return file_size;
+ ret = copyfd_io_splice(infd, peerfd, outfd, file_size,
+ &in_closed_after_out, winfo);
+ break;
+
default:
fprintf(stderr, "Invalid mode %d\n", cfg_mode);
@@ -1296,8 +1367,8 @@ void xdisconnect(int fd)
int main_loop(void)
{
+ struct addrinfo *peer = NULL;
int fd = 0, ret, fd_in = 0;
- struct addrinfo *peer;
struct wstate winfo;
if (cfg_input && cfg_sockopt_types.mptfo) {
@@ -1380,12 +1451,15 @@ int parse_mode(const char *mode)
return CFG_MODE_MMAP;
if (!strcasecmp(mode, "sendfile"))
return CFG_MODE_SENDFILE;
+ if (!strcasecmp(mode, "splice"))
+ return CFG_MODE_SPLICE;
fprintf(stderr, "Unknown test mode: %s\n", mode);
fprintf(stderr, "Supported modes are:\n");
fprintf(stderr, "\t\t\"poll\" - interleaved read/write using poll()\n");
fprintf(stderr, "\t\t\"mmap\" - send entire input file (mmap+write), then read response (-l will read input first)\n");
fprintf(stderr, "\t\t\"sendfile\" - send entire input file (sendfile), then read response (-l will read input first)\n");
+ fprintf(stderr, "\t\t\"splice\" - send entire input file (splice), then read response (-l will read input first)\n");
die_usage();
diff --git a/tools/testing/selftests/net/mptcp/mptcp_connect_splice.sh b/tools/testing/selftests/net/mptcp/mptcp_connect_splice.sh
new file mode 100755
index 000000000000..241254a966c9
--- /dev/null
+++ b/tools/testing/selftests/net/mptcp/mptcp_connect_splice.sh
@@ -0,0 +1,5 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+MPTCP_LIB_KSFT_TEST="$(basename "${0}" .sh)" \
+ "$(dirname "${0}")/mptcp_connect.sh" -m splice "${@}"
diff --git a/tools/testing/selftests/net/mptcp/mptcp_diag.c b/tools/testing/selftests/net/mptcp/mptcp_diag.c
index 8e0b1b8d84b6..5e222ba977e4 100644
--- a/tools/testing/selftests/net/mptcp/mptcp_diag.c
+++ b/tools/testing/selftests/net/mptcp/mptcp_diag.c
@@ -1,21 +1,24 @@
// SPDX-License-Identifier: GPL-2.0
/* Copyright (c) 2025, Kylin Software */
-#include <linux/sock_diag.h>
-#include <linux/rtnetlink.h>
-#include <linux/inet_diag.h>
-#include <linux/netlink.h>
-#include <linux/compiler.h>
+#include <errno.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+
#include <sys/socket.h>
-#include <netinet/in.h>
-#include <linux/tcp.h>
+
#include <arpa/inet.h>
-#include <unistd.h>
-#include <stdlib.h>
-#include <string.h>
-#include <errno.h>
-#include <stdio.h>
+#include <netinet/in.h>
+
+#include <linux/compiler.h>
+#include <linux/inet_diag.h>
+#include <linux/netlink.h>
+#include <linux/rtnetlink.h>
+#include <linux/sock_diag.h>
+#include <linux/tcp.h>
#ifndef IPPROTO_MPTCP
#define IPPROTO_MPTCP 262
diff --git a/tools/testing/selftests/net/mptcp/mptcp_join.sh b/tools/testing/selftests/net/mptcp/mptcp_join.sh
index e70d3420954f..dc1f200aaa81 100755
--- a/tools/testing/selftests/net/mptcp/mptcp_join.sh
+++ b/tools/testing/selftests/net/mptcp/mptcp_join.sh
@@ -603,8 +603,7 @@ wait_rm_addr()
local old_cnt="${2}"
local cnt
- local i
- for i in $(seq 10); do
+ for _ in $(seq 10); do
cnt=$(rm_addr_count ${ns})
[ "$cnt" = "${old_cnt}" ] || break
sleep 0.1
@@ -623,25 +622,22 @@ wait_rm_sf()
local old_cnt="${2}"
local cnt
- local i
- for i in $(seq 10); do
+ for _ in $(seq 10); do
cnt=$(rm_sf_count ${ns})
[ "$cnt" = "${old_cnt}" ] || break
sleep 0.1
done
}
+# $1: expected MPJ ACK Rx counter in $ns1
wait_mpj()
{
- local ns="${1}"
- local cnt old_cnt
-
- old_cnt=$(mptcp_lib_get_counter ${ns} "MPTcpExtMPJoinAckRx")
+ local exp_cnt="${1}"
+ local cnt
- local i
- for i in $(seq 10); do
- cnt=$(mptcp_lib_get_counter ${ns} "MPTcpExtMPJoinAckRx")
- [ "$cnt" = "${old_cnt}" ] || break
+ for _ in $(seq 10); do
+ cnt=$(mptcp_lib_get_counter ${ns1} "MPTcpExtMPJoinAckRx")
+ [ "${cnt}" = "${exp_cnt}" ] && break
sleep 0.1
done
}
@@ -650,8 +646,7 @@ wait_ll_ready()
{
local ns="${1}"
- local i
- for i in $(seq 50); do
+ for _ in $(seq 50); do
ip -n "${ns}" -6 addr show scope link | grep "inet6 fe80" |
grep -qw "tentative" || break
sleep 0.1
@@ -1407,7 +1402,7 @@ chk_join_tx_nr()
count=$(mptcp_lib_get_counter ${ns2} "MPTcpExtMPJoinSynTxCreatSkErr")
if [ -z "$count" ]; then
- rc=${KSFT_SKIP}
+ : # ignore skip
elif [ "$count" != "$create" ]; then
rc=${KSFT_FAIL}
print_check "syn tx create socket error"
@@ -1416,7 +1411,7 @@ chk_join_tx_nr()
count=$(mptcp_lib_get_counter ${ns2} "MPTcpExtMPJoinSynTxBindErr")
if [ -z "$count" ]; then
- rc=${KSFT_SKIP}
+ : # ignore skip
elif [ "$count" != "$bind" ]; then
rc=${KSFT_FAIL}
print_check "syn tx bind error"
@@ -1425,7 +1420,7 @@ chk_join_tx_nr()
count=$(mptcp_lib_get_counter ${ns2} "MPTcpExtMPJoinSynTxConnectErr")
if [ -z "$count" ]; then
- rc=${KSFT_SKIP}
+ : # ignore skip
elif [ "$count" != "$connect" ]; then
rc=${KSFT_FAIL}
print_check "syn tx connect error"
@@ -1451,7 +1446,7 @@ chk_fallback_nr()
count=$(mptcp_lib_get_counter ${!ns} "MPTcpExtInfiniteMapTx")
if [ -z "$count" ]; then
- rc=${KSFT_SKIP}
+ : # ignore skip
elif [ "$count" != "$infinite_map_tx" ]; then
rc=${KSFT_FAIL}
print_check "$ns infinite map tx fallback"
@@ -1460,7 +1455,7 @@ chk_fallback_nr()
count=$(mptcp_lib_get_counter ${!ns} "MPTcpExtDSSCorruptionFallback")
if [ -z "$count" ]; then
- rc=${KSFT_SKIP}
+ : # ignore skip
elif [ "$count" != "$dss_corruption" ]; then
rc=${KSFT_FAIL}
print_check "$ns dss corruption fallback"
@@ -1469,7 +1464,7 @@ chk_fallback_nr()
count=$(mptcp_lib_get_counter ${!ns} "MPTcpExtSimultConnectFallback")
if [ -z "$count" ]; then
- rc=${KSFT_SKIP}
+ : # ignore skip
elif [ "$count" != "$simult_conn" ]; then
rc=${KSFT_FAIL}
print_check "$ns simult conn fallback"
@@ -1478,7 +1473,7 @@ chk_fallback_nr()
count=$(mptcp_lib_get_counter ${!ns} "MPTcpExtMPCapableFallbackACK")
if [ -z "$count" ]; then
- rc=${KSFT_SKIP}
+ : # ignore skip
elif [ "$count" != "$mpc_passive" ]; then
rc=${KSFT_FAIL}
print_check "$ns mpc passive fallback"
@@ -1487,7 +1482,7 @@ chk_fallback_nr()
count=$(mptcp_lib_get_counter ${!ns} "MPTcpExtMPCapableFallbackSYNACK")
if [ -z "$count" ]; then
- rc=${KSFT_SKIP}
+ : # ignore skip
elif [ "$count" != "$mpc_active" ]; then
rc=${KSFT_FAIL}
print_check "$ns mpc active fallback"
@@ -1496,7 +1491,7 @@ chk_fallback_nr()
count=$(mptcp_lib_get_counter ${!ns} "MPTcpExtMPCapableDataFallback")
if [ -z "$count" ]; then
- rc=${KSFT_SKIP}
+ : # ignore skip
elif [ "$count" != "$mpc_data" ]; then
rc=${KSFT_FAIL}
print_check "$ns mpc data fallback"
@@ -1505,7 +1500,7 @@ chk_fallback_nr()
count=$(mptcp_lib_get_counter ${!ns} "MPTcpExtMD5SigFallback")
if [ -z "$count" ]; then
- rc=${KSFT_SKIP}
+ : # ignore skip
elif [ "$count" != "$md5_sig" ]; then
rc=${KSFT_FAIL}
print_check "$ns MD5 Sig fallback"
@@ -1514,7 +1509,7 @@ chk_fallback_nr()
count=$(mptcp_lib_get_counter ${!ns} "MPTcpExtDssFallback")
if [ -z "$count" ]; then
- rc=${KSFT_SKIP}
+ : # ignore skip
elif [ "$count" != "$dss" ]; then
rc=${KSFT_FAIL}
print_check "$ns dss fallback"
@@ -1590,7 +1585,7 @@ chk_join_nr()
count=$(mptcp_lib_get_counter ${ns2} "MPTcpExtMPJoinSynAckHMacFailure")
if [ -z "$count" ]; then
- rc=${KSFT_SKIP}
+ : # ignore skip
elif [ "$count" != "0" ]; then
rc=${KSFT_FAIL}
print_check "synack HMAC"
@@ -1599,7 +1594,7 @@ chk_join_nr()
count=$(mptcp_lib_get_counter ${ns1} "MPTcpExtMPJoinAckRx")
if [ -z "$count" ]; then
- rc=${KSFT_SKIP}
+ : # ignore skip
elif [ "$count" != "$ack_nr" ]; then
rc=${KSFT_FAIL}
print_check "ack rx"
@@ -1608,7 +1603,7 @@ chk_join_nr()
count=$(mptcp_lib_get_counter ${ns1} "MPTcpExtMPJoinAckHMacFailure")
if [ -z "$count" ]; then
- rc=${KSFT_SKIP}
+ : # ignore skip
elif [ "$count" != "0" ]; then
rc=${KSFT_FAIL}
print_check "ack HMAC"
@@ -1617,7 +1612,7 @@ chk_join_nr()
count=$(mptcp_lib_get_counter ${ns1} "MPTcpExtMPJoinRejected")
if [ -z "$count" ]; then
- rc=${KSFT_SKIP}
+ : # ignore skip
elif [ "$count" != "$syn_rej" ]; then
rc=${KSFT_FAIL}
print_check "syn rejected"
@@ -1650,7 +1645,6 @@ chk_stale_nr()
local stale_min=$2
local stale_max=$3
local stale_delta=$4
- local dump_stats
local stale_nr
local recover_nr
@@ -1666,16 +1660,11 @@ chk_stale_nr()
fail_test "got $stale_nr stale[s] $recover_nr recover[s], " \
" expected stale in range [$stale_min..$stale_max]," \
" stale-recover delta $stale_delta"
- dump_stats=1
+ echo $ns stats
+ ip -n $ns -s link show
else
print_ok
fi
-
- if [ "${dump_stats}" = 1 ]; then
- echo $ns stats
- ip netns exec $ns ip -s link show
- ip netns exec $ns nstat -as | grep MPTcp
- fi
}
chk_add_nr()
@@ -3718,7 +3707,6 @@ userspace_pm_add_addr()
tk=$(mptcp_lib_evts_get_info token "$evts")
ip netns exec $1 ./pm_nl_ctl ann $2 token $tk id $3
- sleep 1
}
# $1: ns ; $2: id
@@ -3749,7 +3737,6 @@ userspace_pm_add_sf()
ip netns exec $1 ./pm_nl_ctl csf lip $2 lid $3 \
rip $da rport $dp token $tk
- sleep 1
}
# $1: ns ; $2: addr $3: event type
@@ -3999,9 +3986,11 @@ userspace_tests()
{ timeout_test=120 test_linkfail=128 speed=5 \
run_tests $ns1 $ns2 10.0.1.1 & } 2>/dev/null
local tests_pid=$!
- wait_mpj $ns1
+ wait_event ns1 MPTCP_LIB_EVENT_ESTABLISHED 1
userspace_pm_add_addr $ns1 10.0.2.1 10
+ wait_event ns2 MPTCP_LIB_EVENT_ANNOUNCED 1
userspace_pm_add_addr $ns1 10.0.3.1 20
+ wait_event ns2 MPTCP_LIB_EVENT_ANNOUNCED 2
chk_join_nr 2 2 2
chk_add_nr 2 2
chk_mptcp_info subflows 2 subflows 2
@@ -4032,8 +4021,9 @@ userspace_tests()
{ timeout_test=120 test_linkfail=128 speed=5 \
run_tests $ns1 $ns2 10.0.1.1 & } 2>/dev/null
local tests_pid=$!
- wait_mpj $ns2
+ wait_event ns2 MPTCP_LIB_EVENT_ESTABLISHED 1
userspace_pm_add_sf $ns2 10.0.3.2 20
+ wait_event ns2 MPTCP_LIB_EVENT_SUB_ESTABLISHED 1
chk_join_nr 1 1 1
chk_mptcp_info subflows 1 subflows 1
chk_subflows_total 2 2
@@ -4060,10 +4050,11 @@ userspace_tests()
{ timeout_test=120 test_linkfail=128 speed=5 \
run_tests $ns1 $ns2 10.0.1.1 & } 2>/dev/null
local tests_pid=$!
- wait_mpj $ns2
+ wait_event ns2 MPTCP_LIB_EVENT_ESTABLISHED 1
chk_mptcp_info subflows 0 subflows 0
chk_subflows_total 1 1
userspace_pm_add_sf $ns2 10.0.3.2 0
+ wait_event ns2 MPTCP_LIB_EVENT_SUB_ESTABLISHED 1
userspace_pm_chk_dump_addr "${ns2}" \
"id 0 flags subflow 10.0.3.2" "id 0 subflow"
chk_join_nr 1 1 1
@@ -4081,8 +4072,9 @@ userspace_tests()
{ timeout_test=120 test_linkfail=128 speed=5 \
run_tests $ns1 $ns2 10.0.1.1 & } 2>/dev/null
local tests_pid=$!
- wait_mpj $ns2
+ wait_event ns2 MPTCP_LIB_EVENT_ESTABLISHED 1
userspace_pm_add_sf $ns2 10.0.3.2 20
+ wait_event ns2 MPTCP_LIB_EVENT_SUB_ESTABLISHED 1
chk_join_nr 1 1 1
chk_mptcp_info subflows 1 subflows 1
chk_subflows_total 2 2
@@ -4105,8 +4097,9 @@ userspace_tests()
{ timeout_test=120 test_linkfail=128 speed=5 \
run_tests $ns1 $ns2 10.0.1.1 & } 2>/dev/null
local tests_pid=$!
- wait_mpj $ns1
+ wait_event ns1 MPTCP_LIB_EVENT_ESTABLISHED 1
userspace_pm_add_addr $ns1 10.0.2.1 10
+ wait_event ns2 MPTCP_LIB_EVENT_ANNOUNCED 1
chk_join_nr 1 1 1
chk_add_nr 1 1
chk_mptcp_info subflows 1 subflows 1
@@ -4133,6 +4126,7 @@ userspace_tests()
local tests_pid=$!
wait_event ns2 MPTCP_LIB_EVENT_ESTABLISHED 1
userspace_pm_add_sf $ns2 10.0.3.2 20
+ wait_event ns2 MPTCP_LIB_EVENT_SUB_ESTABLISHED 1
chk_mptcp_info subflows 1 subflows 1
chk_subflows_total 2 2
@@ -4158,7 +4152,7 @@ endpoint_tests()
{
# subflow_rebuild_header is needed to support the implicit flag
# userspace pm type prevents add_addr
- if reset "implicit EP" &&
+ if reset_with_events "implicit EP" &&
continue_if mptcp_lib_kallsyms_has "subflow_rebuild_header$"; then
pm_nl_set_limits $ns1 2 2
pm_nl_set_limits $ns2 2 2
@@ -4167,7 +4161,7 @@ endpoint_tests()
run_tests $ns1 $ns2 10.0.1.1 & } 2>/dev/null
local tests_pid=$!
- wait_mpj $ns1
+ wait_event ns2 MPTCP_LIB_EVENT_ESTABLISHED 1
pm_nl_check_endpoint "creation" \
$ns2 10.0.2.2 id 1 flags implicit
chk_mptcp_info subflows 1 subflows 1
@@ -4181,6 +4175,7 @@ endpoint_tests()
pm_nl_check_endpoint "modif is allowed" \
$ns2 10.0.2.2 id 1 flags signal
mptcp_lib_kill_group_wait $tests_pid
+ kill_events_pids
fi
if reset_with_tcp_filter "delete and re-add" ns2 10.0.3.2 REJECT OUTPUT &&
@@ -4194,7 +4189,7 @@ endpoint_tests()
run_tests $ns1 $ns2 10.0.1.1 & } 2>/dev/null
local tests_pid=$!
- wait_mpj $ns2
+ wait_event ns2 MPTCP_LIB_EVENT_ESTABLISHED 1
pm_nl_check_endpoint "creation" \
$ns2 10.0.2.2 id 2 flags subflow dev ns2eth2
chk_subflow_nr "before delete id 2" 2
@@ -4206,7 +4201,7 @@ endpoint_tests()
chk_mptcp_info subflows 0 subflows 0
pm_nl_add_endpoint $ns2 10.0.2.2 id 2 dev ns2eth2 flags subflow
- wait_mpj $ns2
+ wait_mpj 2
chk_subflow_nr "after re-add id 2" 2
chk_mptcp_info subflows 1 subflows 1
@@ -4218,7 +4213,7 @@ endpoint_tests()
ip netns exec "${ns2}" ${iptables} -D OUTPUT -s "10.0.3.2" -p tcp -j REJECT
pm_nl_del_endpoint $ns2 3 10.0.3.2
pm_nl_add_endpoint $ns2 10.0.3.2 id 3 flags subflow
- wait_mpj $ns2
+ wait_mpj 3
chk_subflow_nr "after no reject" 3
chk_mptcp_info subflows 2 subflows 2
@@ -4230,7 +4225,7 @@ endpoint_tests()
chk_mptcp_info subflows 2 subflows 2 # only decr for additional sf
pm_nl_add_endpoint $ns2 10.0.1.2 id 1 dev ns2eth1 flags subflow
- wait_mpj $ns2
+ wait_mpj $((3 + i))
chk_subflow_nr "after re-add id 0 ($i)" 3
chk_mptcp_info subflows 3 subflows 3
done
@@ -4272,7 +4267,7 @@ endpoint_tests()
run_tests $ns1 $ns2 10.0.1.1 & } 2>/dev/null
local tests_pid=$!
- wait_mpj $ns2
+ wait_event ns2 MPTCP_LIB_EVENT_ESTABLISHED 1
pm_nl_check_endpoint "creation" \
$ns1 10.0.2.1 id 1 flags signal
chk_subflow_nr "before delete" 2
@@ -4288,7 +4283,7 @@ endpoint_tests()
pm_nl_add_endpoint $ns1 10.0.2.1 id 1 flags signal
pm_nl_add_endpoint $ns1 10.0.3.1 id 2 flags signal
- wait_mpj $ns2
+ wait_mpj 3
chk_subflow_nr "after re-add" 3
chk_mptcp_info subflows 2 subflows 2
chk_mptcp_info add_addr_signal 2 add_addr_accepted 2
@@ -4300,7 +4295,7 @@ endpoint_tests()
chk_mptcp_info add_addr_signal 2 add_addr_accepted 2
pm_nl_add_endpoint $ns1 10.0.1.1 id 99 flags signal
- wait_mpj $ns2
+ wait_mpj 4
chk_subflow_nr "after re-add ID 0" 3
chk_mptcp_info subflows 3 subflows 3
chk_mptcp_info add_addr_signal 3 add_addr_accepted 2
@@ -4312,7 +4307,7 @@ endpoint_tests()
chk_mptcp_info add_addr_signal 2 add_addr_accepted 2
pm_nl_add_endpoint $ns1 10.0.1.1 id 88 flags signal
- wait_mpj $ns2
+ wait_mpj 5
chk_subflow_nr "after re-re-add ID 0" 3
chk_mptcp_info subflows 3 subflows 3
chk_mptcp_info add_addr_signal 3 add_addr_accepted 2
@@ -4361,9 +4356,9 @@ endpoint_tests()
wait_rm_addr $ns2 0
ip netns exec "${ns2}" ${iptables} -D OUTPUT -s "10.0.3.2" -p tcp -j REJECT
pm_nl_add_endpoint $ns2 10.0.3.2 id 3 flags subflow
- wait_mpj $ns2
+ wait_mpj 1
pm_nl_add_endpoint $ns1 10.0.3.1 id 2 flags signal
- wait_mpj $ns2
+ wait_mpj 2
mptcp_lib_kill_group_wait $tests_pid
join_syn_tx=3 join_connect_err=1 \
diff --git a/tools/testing/selftests/net/netfilter/config b/tools/testing/selftests/net/netfilter/config
index 12ce61fa15a8..979cff56e1f5 100644
--- a/tools/testing/selftests/net/netfilter/config
+++ b/tools/testing/selftests/net/netfilter/config
@@ -29,6 +29,7 @@ CONFIG_IP_NF_RAW=m
CONFIG_IP_SCTP=m
CONFIG_IPV6=y
CONFIG_IPV6_MULTIPLE_TABLES=y
+CONFIG_IPV6_TUNNEL=m
CONFIG_IP_VS=m
CONFIG_IP_VS_PROTO_TCP=y
CONFIG_IP_VS_RR=m
diff --git a/tools/testing/selftests/net/netfilter/nft_flowtable.sh b/tools/testing/selftests/net/netfilter/nft_flowtable.sh
index a68bc882fa4e..7a34ef468975 100755
--- a/tools/testing/selftests/net/netfilter/nft_flowtable.sh
+++ b/tools/testing/selftests/net/netfilter/nft_flowtable.sh
@@ -592,16 +592,33 @@ ip -net "$nsr1" link set tun0 up
ip -net "$nsr1" addr add 192.168.100.1/24 dev tun0
ip netns exec "$nsr1" sysctl net.ipv4.conf.tun0.forwarding=1 > /dev/null
+ip -net "$nsr1" link add name tun6 type ip6tnl local fee1:2::1 remote fee1:2::2
+ip -net "$nsr1" link set tun6 up
+ip -net "$nsr1" addr add fee1:3::1/64 dev tun6 nodad
+
ip -net "$nsr2" link add name tun0 type ipip local 192.168.10.2 remote 192.168.10.1
ip -net "$nsr2" link set tun0 up
ip -net "$nsr2" addr add 192.168.100.2/24 dev tun0
ip netns exec "$nsr2" sysctl net.ipv4.conf.tun0.forwarding=1 > /dev/null
+ip -net "$nsr2" link add name tun6 type ip6tnl local fee1:2::2 remote fee1:2::1 || ret=1
+ip -net "$nsr2" link set tun6 up
+ip -net "$nsr2" addr add fee1:3::2/64 dev tun6 nodad
+
ip -net "$nsr1" route change default via 192.168.100.2
ip -net "$nsr2" route change default via 192.168.100.1
+
+# do not use "route change" and delete old default so
+# socat fails to connect in case new default can't be added.
+ip -6 -net "$nsr1" route delete default
+ip -6 -net "$nsr1" route add default via fee1:3::2
+ip -6 -net "$nsr2" route delete default
+ip -6 -net "$nsr2" route add default via fee1:3::1
ip -net "$ns2" route add default via 10.0.2.1
+ip -6 -net "$ns2" route add default via dead:2::1
ip netns exec "$nsr1" nft -a insert rule inet filter forward 'meta oif tun0 accept'
+ip netns exec "$nsr1" nft -a insert rule inet filter forward 'meta oif tun6 accept'
ip netns exec "$nsr1" nft -a insert rule inet filter forward \
'meta oif "veth0" tcp sport 12345 ct mark set 1 flow add @f1 counter name routed_repl accept'
@@ -611,28 +628,53 @@ if ! test_tcp_forwarding_nat "$ns1" "$ns2" 1 "IPIP tunnel"; then
ret=1
fi
+if test_tcp_forwarding "$ns1" "$ns2" 1 6 "[dead:2::99]" 12345; then
+ echo "PASS: flow offload for ns1/ns2 IP6IP6 tunnel"
+else
+ echo "FAIL: flow offload for ns1/ns2 with IP6IP6 tunnel" 1>&2
+ ip netns exec "$nsr1" nft list ruleset
+ ret=1
+fi
+
# Create vlan tagged devices for IPIP traffic.
ip -net "$nsr1" link add link veth1 name veth1.10 type vlan id 10
ip -net "$nsr1" link set veth1.10 up
ip -net "$nsr1" addr add 192.168.20.1/24 dev veth1.10
+ip -net "$nsr1" addr add fee1:4::1/64 dev veth1.10 nodad
ip netns exec "$nsr1" sysctl net.ipv4.conf.veth1/10.forwarding=1 > /dev/null
ip netns exec "$nsr1" nft -a insert rule inet filter forward 'meta oif veth1.10 accept'
-ip -net "$nsr1" link add name tun1 type ipip local 192.168.20.1 remote 192.168.20.2
-ip -net "$nsr1" link set tun1 up
-ip -net "$nsr1" addr add 192.168.200.1/24 dev tun1
+
+ip -net "$nsr1" link add name tun0.10 type ipip local 192.168.20.1 remote 192.168.20.2
+ip -net "$nsr1" link set tun0.10 up
+ip -net "$nsr1" addr add 192.168.200.1/24 dev tun0.10
ip -net "$nsr1" route change default via 192.168.200.2
-ip netns exec "$nsr1" sysctl net.ipv4.conf.tun1.forwarding=1 > /dev/null
-ip netns exec "$nsr1" nft -a insert rule inet filter forward 'meta oif tun1 accept'
+ip netns exec "$nsr1" sysctl net.ipv4.conf.tun0/10.forwarding=1 > /dev/null
+ip netns exec "$nsr1" nft -a insert rule inet filter forward 'meta oif tun0.10 accept'
+
+ip -net "$nsr1" link add name tun6.10 type ip6tnl local fee1:4::1 remote fee1:4::2
+ip -net "$nsr1" link set tun6.10 up
+ip -net "$nsr1" addr add fee1:5::1/64 dev tun6.10 nodad
+ip -6 -net "$nsr1" route delete default
+ip -6 -net "$nsr1" route add default via fee1:5::2
+ip netns exec "$nsr1" nft -a insert rule inet filter forward 'meta oif tun6.10 accept'
ip -net "$nsr2" link add link veth0 name veth0.10 type vlan id 10
ip -net "$nsr2" link set veth0.10 up
ip -net "$nsr2" addr add 192.168.20.2/24 dev veth0.10
+ip -net "$nsr2" addr add fee1:4::2/64 dev veth0.10 nodad
ip netns exec "$nsr2" sysctl net.ipv4.conf.veth0/10.forwarding=1 > /dev/null
-ip -net "$nsr2" link add name tun1 type ipip local 192.168.20.2 remote 192.168.20.1
-ip -net "$nsr2" link set tun1 up
-ip -net "$nsr2" addr add 192.168.200.2/24 dev tun1
+
+ip -net "$nsr2" link add name tun0.10 type ipip local 192.168.20.2 remote 192.168.20.1
+ip -net "$nsr2" link set tun0.10 up
+ip -net "$nsr2" addr add 192.168.200.2/24 dev tun0.10
ip -net "$nsr2" route change default via 192.168.200.1
-ip netns exec "$nsr2" sysctl net.ipv4.conf.tun1.forwarding=1 > /dev/null
+ip netns exec "$nsr2" sysctl net.ipv4.conf.tun0/10.forwarding=1 > /dev/null
+
+ip -net "$nsr2" link add name tun6.10 type ip6tnl local fee1:4::2 remote fee1:4::1 || ret=1
+ip -net "$nsr2" link set tun6.10 up
+ip -net "$nsr2" addr add fee1:5::2/64 dev tun6.10 nodad
+ip -6 -net "$nsr2" route delete default
+ip -6 -net "$nsr2" route add default via fee1:5::1
if ! test_tcp_forwarding_nat "$ns1" "$ns2" 1 "IPIP tunnel over vlan"; then
echo "FAIL: flow offload for ns1/ns2 with IPIP tunnel over vlan" 1>&2
@@ -640,10 +682,19 @@ if ! test_tcp_forwarding_nat "$ns1" "$ns2" 1 "IPIP tunnel over vlan"; then
ret=1
fi
+if test_tcp_forwarding "$ns1" "$ns2" 1 6 "[dead:2::99]" 12345; then
+ echo "PASS: flow offload for ns1/ns2 IP6IP6 tunnel over vlan"
+else
+ echo "FAIL: flow offload for ns1/ns2 with IP6IP6 tunnel over vlan" 1>&2
+ ip netns exec "$nsr1" nft list ruleset
+ ret=1
+fi
+
# Restore the previous configuration
ip -net "$nsr1" route change default via 192.168.10.2
ip -net "$nsr2" route change default via 192.168.10.1
ip -net "$ns2" route del default via 10.0.2.1
+ip -6 -net "$ns2" route del default via dead:2::1
}
# Another test:
diff --git a/tools/testing/selftests/net/netfilter/nft_queue.sh b/tools/testing/selftests/net/netfilter/nft_queue.sh
index 6136ceec45e0..139bc1211878 100755
--- a/tools/testing/selftests/net/netfilter/nft_queue.sh
+++ b/tools/testing/selftests/net/netfilter/nft_queue.sh
@@ -510,7 +510,7 @@ EOF
udp_listener_ready()
{
- ss -S -N "$1" -uln -o "sport = :12345" | grep -q 12345
+ ss -S -N "$1" -uln -o "sport = :$2" | grep -q "$2"
}
output_files_written()
@@ -518,7 +518,7 @@ output_files_written()
test -s "$1" && test -s "$2"
}
-test_udp_ct_race()
+test_udp_nat_race()
{
ip netns exec "$nsrouter" nft -f /dev/stdin <<EOF
flush ruleset
@@ -545,8 +545,8 @@ EOF
ip netns exec "$nsrouter" ./nf_queue -q 12 -d 1000 &
local nfqpid=$!
- busywait "$BUSYWAIT_TIMEOUT" udp_listener_ready "$ns2"
- busywait "$BUSYWAIT_TIMEOUT" udp_listener_ready "$ns3"
+ busywait "$BUSYWAIT_TIMEOUT" udp_listener_ready "$ns2" 12345
+ busywait "$BUSYWAIT_TIMEOUT" udp_listener_ready "$ns3" 12345
busywait "$BUSYWAIT_TIMEOUT" nf_queue_wait "$nsrouter" 12
# Send two packets, one should end up in ns1, other in ns2.
@@ -557,7 +557,7 @@ EOF
busywait 10000 output_files_written "$TMPFILE1" "$TMPFILE2"
- kill "$nfqpid"
+ kill "$nfqpid" "$rpid1" "$rpid2"
if ! ip netns exec "$nsrouter" bash -c 'conntrack -L -p udp --dport 12345 2>/dev/null | wc -l | grep -q "^1"'; then
echo "FAIL: Expected One udp conntrack entry"
@@ -585,6 +585,135 @@ EOF
echo "PASS: both udp receivers got one packet each"
}
+# Make sure UDPGRO aggregated packets don't lose
+# their skb->nfct entry when nfqueue passes the
+# skb to userspace with software gso segmentation on.
+test_udp_gro_ct()
+{
+ local errprefix="FAIL: test_udp_gro_ct:"
+
+ ip netns exec "$nsrouter" conntrack -F 2>/dev/null
+
+ ip netns exec "$nsrouter" nft -f /dev/stdin <<EOF
+flush ruleset
+table inet udpq {
+ # Number of packets/bytes queued to userspace
+ counter toqueue { }
+ # Number of packets/bytes reinjected from userspace with 'ct new' intact
+ counter fromqueue { }
+ # These two counters should be identical and not 0.
+
+ chain prerouting {
+ type filter hook prerouting priority -300; policy accept;
+
+ # userspace sends small packets, if < 1000, UDPGRO did
+ # not kick in, but test needs a 'new' conntrack with udpgro skb.
+ meta iifname veth0 meta l4proto udp meta length > 1000 accept
+
+ # don't pick up non-gso packets and don't queue them to
+ # userspace.
+ notrack
+ }
+
+ chain postrouting {
+ type filter hook postrouting priority 0; policy accept;
+
+ # Only queue unconfirmed fraglist gro skbs to userspace.
+ udp dport 12346 ct status ! confirmed counter name "toqueue" mark set 1 queue num 1
+ }
+
+ chain validate {
+ type filter hook postrouting priority 1; policy accept;
+ # ... and only count those that were reinjected with the
+ # skb->nfct intact.
+ mark 1 counter name "fromqueue"
+ }
+}
+EOF
+ timeout 10 ip netns exec "$ns2" socat UDP-LISTEN:12346,fork,pf=ipv4 OPEN:"$TMPFILE1",trunc &
+ local rpid=$!
+
+ ip netns exec "$nsrouter" ./nf_queue -G -c -q 1 -t 2 > "$TMPFILE2" &
+ local nfqpid=$!
+
+ ip netns exec "$nsrouter" ethtool -K "veth0" rx-udp-gro-forwarding on rx-gro-list on generic-receive-offload on
+
+ busywait "$BUSYWAIT_TIMEOUT" udp_listener_ready "$ns2" 12346
+ busywait "$BUSYWAIT_TIMEOUT" nf_queue_wait "$nsrouter" 1
+
+ local bs=512
+ local count=$(((32 * 1024 * 1024) / bs))
+ dd if=/dev/zero bs="$bs" count="$count" 2>/dev/null | for i in $(seq 1 16); do
+ timeout 5 ip netns exec "$ns1" \
+ socat -u -b 512 STDIN UDP-DATAGRAM:10.0.2.99:12346,reuseport,bind=0.0.0.0:55221 &
+ done
+
+ busywait 10000 test -s "$TMPFILE1"
+
+ kill "$rpid"
+
+ wait
+
+ local p
+ local b
+ local pqueued
+ local bqueued
+
+ c=$(ip netns exec "$nsrouter" nft list counter inet udpq "toqueue" | grep packets)
+ read p pqueued b bqueued <<EOF
+$c
+EOF
+ local preinject
+ local breinject
+ c=$(ip netns exec "$nsrouter" nft list counter inet udpq "fromqueue" | grep packets)
+ read p preinject b breinject <<EOF
+$c
+EOF
+ ip netns exec "$nsrouter" ethtool -K "veth0" rx-udp-gro-forwarding off
+ ip netns exec "$nsrouter" ethtool -K "veth1" rx-udp-gro-forwarding off
+
+ if [ "$pqueued" -eq 0 ];then
+ # happens when gro did not build at least on aggregate
+ echo "SKIP: No packets were queued"
+ return
+ fi
+
+ local saw_ct_entry=0
+ if ip netns exec "$nsrouter" bash -c 'conntrack -L -p udp --dport 12346 2>/dev/null | wc -l | grep -q "^1"'; then
+ saw_ct_entry=1
+ else
+ echo "$errprefix Expected udp conntrack entry"
+ ip netns exec "$nsrouter" conntrack -L
+ ret=1
+ fi
+
+ if [ "$pqueued" -ge "$preinject" ] ;then
+ echo "$errprefix Expected software segmentation to occur, had $pqueued and $preinject"
+ ret=1
+ return
+ fi
+
+ # sw segmentation adds extra udp and ip headers.
+ local breinject_expect=$((preinject * (512 + 20 + 8)))
+
+ if [ "$breinject" -eq "$breinject_expect" ]; then
+ if [ "$saw_ct_entry" -eq 1 ];then
+ echo "PASS: fraglist gro skb passed with conntrack entry"
+ else
+ echo "$errprefix fraglist gro skb passed without conntrack entry"
+ ret=1
+ fi
+ else
+ echo "$errprefix Counter mismatch, conntrack entry dropped by nfqueue? Queued: $pqueued, $bqueued. Post-queue: $preinject, $breinject. Expected $breinject_expect"
+ ret=1
+ fi
+
+ if ! ip netns exec "$nsrouter" nft delete table inet udpq; then
+ echo "$errprefix: Could not delete udpq table"
+ ret=1
+ fi
+}
+
test_queue_removal()
{
read tainted_then < /proc/sys/kernel/tainted
@@ -663,7 +792,8 @@ test_tcp_localhost_connectclose
test_tcp_localhost_requeue
test_sctp_forward
test_sctp_output
-test_udp_ct_race
+test_udp_nat_race
+test_udp_gro_ct
# should be last, adds vrf device in ns1 and changes routes
test_icmp_vrf
diff --git a/tools/testing/selftests/net/packetdrill/tcp_accecn_2nd_data_as_first.pkt b/tools/testing/selftests/net/packetdrill/tcp_accecn_2nd_data_as_first.pkt
new file mode 100644
index 000000000000..07e9936e70e6
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_accecn_2nd_data_as_first.pkt
@@ -0,0 +1,24 @@
+// 3rd ACK + 1st data segment lost, data segments with ce
+
+`./defaults.sh
+sysctl -q net.ipv4.tcp_ecn=3
+`
+
+ 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
+ +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0
+ +0 bind(3, ..., ...) = 0
+ +0 listen(3, 1) = 0
+
++0.05 < SEWA 0:0(0) win 32767 <mss 1460,nop,nop,sackOK,nop,wscale 8>
++.002 > SW. 0:0(0) ack 1 <mss 1460,ECN e1b 1 ceb 0 e0b 1,nop,nop,nop,sackOK,nop,wscale 8>
+// 3rd ACK lost
+// 1st data segment lost
++0.05 < [ce] EAP. 1001:2001(1000) ack 1 win 264 <ECN e0b 1 ceb 0 e1b 1,nop>
++.002 > [ect0] WA. 1:1(0) ack 1 <ECN e1b 1 ceb 1000 e0b 1,nop,nop,nop,sack 1001:2001>
++.002 accept(3, ..., ...) = 4
+
++0.2 < [ce] EAP. 1:1001(1000) ack 1 win 264 <ECN e0b 1 ceb 0 e1b 1,nop>
++.001 > [ect0] EWA. 1:1(0) ack 2001 <ECN e1b 1 ceb 2000 e0b 1,nop>
+
++0.05 < [ce] EAP. 2001:3001(1000) ack 1 win 264
++.001 > [ect0] . 1:1(0) ack 3001 <ECN e1b 1 ceb 3000 e0b 1,nop>
diff --git a/tools/testing/selftests/net/packetdrill/tcp_accecn_2nd_data_as_first_connect.pkt b/tools/testing/selftests/net/packetdrill/tcp_accecn_2nd_data_as_first_connect.pkt
new file mode 100644
index 000000000000..76b8422b34dc
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_accecn_2nd_data_as_first_connect.pkt
@@ -0,0 +1,30 @@
+// 3rd ACK + 1st data segment lost, 2nd data segments with ce
+
+`./defaults.sh
+sysctl -q net.ipv4.tcp_ecn=3
+`
+
+ 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 4
++.002 ... 0.052 connect(4, ..., ...) = 0
+
++.002 > [noecn] SEWA 0:0(0) <mss 1460,sackOK,TS val 100 ecr 0,nop,wscale 8>
++0.05 < [noecn] SW. 0:0(0) ack 1 win 32767 <mss 1016,ECN e0b 1 ceb 0 e1b 1,nop,nop,nop,sackOK,nop,wscale 8>
+// 3rd ACK lost
++.002 > [ect0] W. 1:1(0) ack 1 <ECN e1b 1 ceb 0 e0b 1,nop>
+
++0.01 write(4, ..., 2000) = 2000
+// 1st data segment lost + 2nd gets CE
++.002 > [ect0] .5 1:1005(1004) ack 1 <ECN e1b 1 ceb 0 e0b 1,nop>
++.000 > [ect0] P.5 1005:2001(996) ack 1 <ECN e1b 1 ceb 0 e0b 1, nop>
++0.05 < [ect0] .6 1:1(0) ack 1 win 264 <ECN e0b 1 ceb 996 e1b 1,nop,nop,nop,sack 1005:2001>
+
++0.01 %{ assert tcpi_delivered_ce == 1, tcpi_delivered_ce }%
+
++0.002~+0.1 > [ect0] .5 1:1005(1004) ack 1 <ECN e1b 1 ceb 0 e0b 1,nop>
++.05 < [ect0] .6 1:1(0) ack 2001 win 264 <ECN e0b 1005 ceb 996 e1b 1,nop>
+
++0.01 write(4, ..., 1000) = 1000
++0~+0.002 > [ect0] P.5 2001:3001(1000) ack 1 <ECN e1b 1 ceb 0 e0b 1,nop>
+
++0.1 < [ect0] .5 1:1001(1000) ack 3001 win 264 <ECN e0b 1 ceb 0 e1b 1,nop>
++0~+0.01 > [ect0] .5 3001:3001(0) ack 1001 <ECN e1b 1 ceb 0 e0b 1001,nop>
diff --git a/tools/testing/selftests/net/packetdrill/tcp_accecn_3rd_ack_after_synack_rxmt.pkt b/tools/testing/selftests/net/packetdrill/tcp_accecn_3rd_ack_after_synack_rxmt.pkt
new file mode 100644
index 000000000000..84060e490589
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_accecn_3rd_ack_after_synack_rxmt.pkt
@@ -0,0 +1,19 @@
+// Test 3rd ACK flags when SYN-ACK is rexmitted
+
+`./defaults.sh
+sysctl -q net.ipv4.tcp_ecn=3
+`
+
+ 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 4
++.002 ... 0.052 connect(4, ..., ...) = 0
+
++.002 > [noecn] SEWA 0:0(0) <mss 1460,sackOK,TS val 100 ecr 0,nop,wscale 8>
++0.05 < [noecn] SW. 0:0(0) ack 1 win 32767 <mss 1460,ECN e0b 1 ceb 0 e1b 1,nop,nop,nop,sackOK,nop,wscale 8>
++.002 > [ect0] W. 1:1(0) ack 1 <ECN e1b 1 ceb 0 e0b 1,nop>
+
++0.1 < [ect0] S. 0:0(0) ack 1 win 32767 <mss 1460,nop,nop,sackOK,nop,wscale 8>
+
+// Our code currently sends a challenge ACK
+// when it receives a SYN in ESTABLISHED state
+// based on the latest SYN
++.002 > [ect0] A. 1:1(0) ack 1 <ECN e1b 1 ceb 0 e0b 1,nop>
diff --git a/tools/testing/selftests/net/packetdrill/tcp_accecn_3rd_ack_ce_updates_received_ce.pkt b/tools/testing/selftests/net/packetdrill/tcp_accecn_3rd_ack_ce_updates_received_ce.pkt
new file mode 100644
index 000000000000..d3fe09d0606f
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_accecn_3rd_ack_ce_updates_received_ce.pkt
@@ -0,0 +1,18 @@
+// Third ACK CE increases r.cep
+
+`./defaults.sh
+sysctl -q net.ipv4.tcp_ecn=3
+`
+
+ 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
+ +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0
+ +0 bind(3, ..., ...) = 0
+ +0 listen(3, 1) = 0
+
++0.05 < SEWA 0:0(0) win 32767 <mss 1050,nop,nop,sackOK,nop,wscale 8>
++.002 > SW. 0:0(0) ack 1 <mss 1460,ECN e1b 1 ceb 0 e0b 1,nop,nop,nop,sackOK,nop,wscale 8>
++0.05 < [ce] W. 1:1(0) ack 1 win 264 <ECN e0b 1 ceb 0 e1b 1,nop>
++.002 accept(3, ..., ...) = 4
+
++0.01 write(4, ..., 1000) = 1000
++.002 > [ect0] WAP. 1:1001(1000) ack 1 <ECN e1b 1 ceb 0 e0b 1,nop>
diff --git a/tools/testing/selftests/net/packetdrill/tcp_accecn_3rd_ack_lost_data_ce.pkt b/tools/testing/selftests/net/packetdrill/tcp_accecn_3rd_ack_lost_data_ce.pkt
new file mode 100644
index 000000000000..d28722db42b1
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_accecn_3rd_ack_lost_data_ce.pkt
@@ -0,0 +1,22 @@
+// 3rd ACK lost, CE for the first data segment
+
+`./defaults.sh
+sysctl -q net.ipv4.tcp_ecn=3
+`
+
+ 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
+ +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0
+ +0 bind(3, ..., ...) = 0
+ +0 listen(3, 1) = 0
+
++0.05 < SEWA 0:0(0) win 32767 <mss 1050,nop,nop,sackOK,nop,wscale 8>
++.002 > SW. 0:0(0) ack 1 <mss 1460,ECN e1b 1 ceb 0 e0b 1,nop,nop,nop,sackOK,nop,wscale 8>
+// 3rd ACK lost
++0.05 < [ce] EAP. 1:1001(1000) ack 1 win 264 <ECN e0b 1 ceb 0 e1b 1,nop>
++.002 > [ect0] WA. 1:1(0) ack 1001 <ECN e1b 1 ceb 1000 e0b 1,nop>
++.002 accept(3, ..., ...) = 4
+
++0.01 %{ assert tcpi_delivered_ce == 0, tcpi_delivered_ce }%
+
++0.05 < [ce] EAP. 1001:2001(1000) ack 1 win 264 <ECN e0b 1 ceb 0 e1b 1,nop>
++.001 > [ect0] EWA. 1:1(0) ack 2001 <ECN e1b 1 ceb 2000 e0b 1 ,nop>
diff --git a/tools/testing/selftests/net/packetdrill/tcp_accecn_3rd_dups.pkt b/tools/testing/selftests/net/packetdrill/tcp_accecn_3rd_dups.pkt
new file mode 100644
index 000000000000..a4d808116e34
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_accecn_3rd_dups.pkt
@@ -0,0 +1,26 @@
+// Test SYN/ACK rexmit triggered 3rd ACK duplicate + CE on first data seg
+
+`./defaults.sh
+sysctl -q net.ipv4.tcp_ecn=3
+`
+
+ 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
+ +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0
+ +0 bind(3, ..., ...) = 0
+ +0 listen(3, 1) = 0
+
+ +0 < SEWA 0:0(0) win 32792 <mss 1050,nop,nop,sackOK,nop,wscale 8>
++.002 > SW. 0:0(0) ack 1 <mss 1460,ECN e1b 1 ceb 0 e0b 1,nop,nop,nop,sackOK,nop,wscale 8>
+
+// SYN/ACK rexmitted => two 3rd ACKs in-flight
++1.0~+1.1 > SW. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK,nop,wscale 8>
+// Delivered 1st 3rd ACK
++0.05 < [ect0] W. 1:1(0) ack 1 win 257 <ECN e0b 1 ceb 0 e1b 1,nop>
++.002 accept(3, ..., ...) = 4
+
+// Duplicate 3rd ACK delivered
++1.05 < [ect0] W. 1:1(0) ack 1 win 257 <ECN e0b 1 ceb 0 e1b 1,nop>
+
++0.05 < [ce] EAP. 1:1001(1000) ack 1 win 257 <ECN e0b 1 ceb 0 e1b 1,nop>
++.002 > [ect0] WA. 1:1(0) ack 1001 <ECN e1b 1 ceb 1000 e0b 1,nop>
+ +0 read(4, ..., 1000) = 1000
diff --git a/tools/testing/selftests/net/packetdrill/tcp_accecn_acc_ecn_disabled.pkt b/tools/testing/selftests/net/packetdrill/tcp_accecn_acc_ecn_disabled.pkt
new file mode 100644
index 000000000000..410a303c6d49
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_accecn_acc_ecn_disabled.pkt
@@ -0,0 +1,13 @@
+// Test that when accurate ECN is disabled,
+// client uses RFC3168 ECN for SYN
+
+`./defaults.sh
+sysctl -q net.ipv4.tcp_ecn=1
+`
+
+ 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 4
++.002 ... 0.052 connect(4, ..., ...) = 0
+
++.002 > [noecn] SEW 0:0(0) <mss 1460,sackOK,TS val 100 ecr 0,nop,wscale 8>
++0.05 < [noecn] S. 0:0(0) ack 1 win 32767 <mss 1460,sackOK,nop,nop,nop,wscale 8>
++.002 > [noecn] . 1:1(0) ack 1
diff --git a/tools/testing/selftests/net/packetdrill/tcp_accecn_accecn_then_notecn_syn.pkt b/tools/testing/selftests/net/packetdrill/tcp_accecn_accecn_then_notecn_syn.pkt
new file mode 100644
index 000000000000..10728114b11b
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_accecn_accecn_then_notecn_syn.pkt
@@ -0,0 +1,28 @@
+// Test that SYN-ACK with ACE flags and without
+// ACE flags got dropped. Although we disable ECN,
+// we shouldn't consider this as blackholed as
+// these are dropped due to congestion
+
+`./defaults.sh
+sysctl -q net.ipv4.tcp_ecn=3
+sysctl -q net.ipv4.tcp_ecn_option=2
+`
+
++0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
++0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0
++0 bind(3, ..., ...) = 0
++0 listen(3, 1) = 0
+
++0 < [ect0] SEWA 0:0(0) win 32792 <mss 1460,nop,nop,sackOK,nop,wscale 8>
++.002 > [noecn] SA. 0:0(0) ack 1 <mss 1460,ECN e1b 1 ceb 0 e0b 1,nop,nop,nop,sackOK,nop,wscale 8>
+
+// Retransmit SYN
++0.1 < [noecn] S 0:0(0) win 32792 <mss 1460,nop,nop,sackOK,nop,wscale 8>
++.002 > [noecn] SW. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK,nop,wscale 8>
+
++0.1 < [noecn] W. 1:1(0) ack 1 win 320 <ECN e0b 1 ceb 0 e1b 1,nop>
++.002 accept(3, ..., ...) = 4
+
+// Write with AccECN option but with ip-noecn since we received one SYN with ACE=0
++0.01 write(4, ..., 100) = 100
++.002 > [noecn] P5. 1:101(100) ack 1 <ECN e1b 1 ceb 0 e0b 1,nop>
diff --git a/tools/testing/selftests/net/packetdrill/tcp_accecn_accecn_to_rfc3168.pkt b/tools/testing/selftests/net/packetdrill/tcp_accecn_accecn_to_rfc3168.pkt
new file mode 100644
index 000000000000..04d928f0d44d
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_accecn_accecn_to_rfc3168.pkt
@@ -0,0 +1,18 @@
+// Test AccECN -> RFC3168 fallback when sysctl asks for RFC3168 ECN
+
+`./defaults.sh
+sysctl -q net.ipv4.tcp_ecn=1
+`
+
+ 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
+ +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0
+ +0 bind(3, ..., ...) = 0
+ +0 listen(3, 1) = 0
+
+ +0 < SEWA 0:0(0) win 32792 <mss 1050,nop,nop,sackOK,nop,wscale 8>
++.002 > SE. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK,nop,wscale 8>
++0.05 < . 1:1(0) ack 1 win 320
++.002 accept(3, ..., ...) = 4
+
++0.01 write(4, ..., 1000) = 1000
++.002 > [ect0] P. 1:1001(1000) ack 1
diff --git a/tools/testing/selftests/net/packetdrill/tcp_accecn_client_accecn_options_drop.pkt b/tools/testing/selftests/net/packetdrill/tcp_accecn_client_accecn_options_drop.pkt
new file mode 100644
index 000000000000..788af6bea69c
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_accecn_client_accecn_options_drop.pkt
@@ -0,0 +1,34 @@
+// Client negotiates AccECN and starts sending
+// AccECN option in last ACK and data segments
+// Middlebox drops AccECN option and client
+// reverts to ACE flags only
+
+`./defaults.sh
+sysctl -q net.ipv4.tcp_ecn=3
+sysctl -q net.ipv4.tcp_ecn_option=2
+sysctl -q net.ipv4.tcp_ecn_option_beacon=1
+`
+
+ 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
+ +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0
+ +0 bind(3, ..., ...) = 0
+ +0 listen(3, 1) = 0
+
+ +0 < SEWA 0:0(0) win 32792 <mss 1050,nop,nop,sackOK,nop,wscale 8>
++.002 > SW. 0:0(0) ack 1 <mss 1460,ECN e1b 1 ceb 0 e0b 1,nop,nop,nop,sackOK,nop,wscale 8>
++0.05 < [ect0] W. 1:1(0) ack 1 win 257 <ECN e0b 1 ceb 0 e1b 1,nop>
++.002 accept(3, ..., ...) = 4
+
++0.05 < [ect0] EAP. 1:1001(1000) ack 1 win 257 <ECN e0b 1 ceb 0 e1b 1,nop>
++.002 > [ect0] EA. 1:1(0) ack 1001 <ECN e1b 1 ceb 0 e0b 1001,nop>
+ +0 read(4, ..., 1000) = 1000
+
++0.05 < [ect0] EAP. 1:1001(1000) ack 1 win 257 <ECN e0b 1 ceb 0 e1b 1,nop>
++.002 > [ect0] EA. 1:1(0) ack 1001 <ECN e1b 1 ceb 0 e0b 2001,nop,nop,nop,sack 1:1001>
+
++0.05 < [ect0] EAP. 1:1001(1000) ack 1 win 257 <ECN e0b 1 ceb 0 e1b 1,nop>
++.002 > [ect0] EA. 1:1(0) ack 1001 <nop,nop,sack 1:1001>
+
++0.05 < [ect0] EAP. 1001:2001(1000) ack 1 win 257 <ECN e0b 1 ceb 0 e1b 1,nop>
++.002 > [ect0] EA. 1:1(0) ack 2001
+ +0 read(4, ..., 1000) = 1000
diff --git a/tools/testing/selftests/net/packetdrill/tcp_accecn_client_accecn_options_lost.pkt b/tools/testing/selftests/net/packetdrill/tcp_accecn_client_accecn_options_lost.pkt
new file mode 100644
index 000000000000..f5839c2e682d
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_accecn_client_accecn_options_lost.pkt
@@ -0,0 +1,38 @@
+// Client negotiates AccECN and starts sending
+// AccECN option in last ACK and data segments
+// Middlebox accepts AccECN option but some packets
+// are lost due to congestion. Client should
+// continue to send AccECN option
+
+`./defaults.sh
+sysctl -q net.ipv4.tcp_ecn=3
+sysctl -q net.ipv4.tcp_ecn_option=2
+`
+
++0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 4
++.002 ... 0.102 connect(4, ..., ...) = 0
+
++.002 > [noecn] SEWA 0:0(0) <mss 1460,sackOK,TS val 100 ecr 0,nop,wscale 8>
++0.1 < [ect0] SW. 0:0(0) ack 1 win 32767 <mss 1024,ECN e0b 1 ceb 0 e1b 1,nop,nop,nop,sackOK,nop,wscale 8>
++.002 > [ect0] A. 1:1(0) ack 1 <ECN e1b 1 ceb 0 e0b 1,nop>
+
+// Send
++0.01 write(4, ..., 3000) = 3000
++.002 > [ect0] .5 1:1013(1012) ack 1 <ECN e1b 1 ceb 0 e0b 1,nop>
++.002 > [ect0] P.5 1013:2025(1012) ack 1 <ECN e1b 1 ceb 0 e0b 1,nop>
++.002 > [ect0] P.5 2025:3001(976) ack 1 <ECN e1b 1 ceb 0 e0b 1,nop>
+
+// First two segments were lost due to congestion as SACK was
+// received acknowledging 3rd segment
++0.1 < [ect0] .5 1:1(0) ack 1 win 264 <ECN e1b 1 ceb 0 e0b 977,nop,nop,nop,sack 2025:3001>
+
+// Since data with option was SACKed, we can
+// continue to use AccECN option for the rest of
+// the connection. This one is a rexmt
++.02~+0.5 > [ect0] .5 1:1013(1012) ack 1 <ECN e1b 1 ceb 0 e0b 1,nop>
++0.1 < [ect0] .5 1:1(0) ack 3001 win 264 <ECN e1b 1 ceb 0 e0b 3000,nop>
+
+// Send new data, it should contain AccECN option
++0.01 write(4, ..., 2000) = 2000
++.002 > [ect0] .5 3001:4013(1012) ack 1 <ECN e1b 1 ceb 0 e0b 1,nop>
++.002 > [ect0] P.5 4013:5001(988) ack 1 <ECN e1b 1 ceb 0 e0b 1,nop>
diff --git a/tools/testing/selftests/net/packetdrill/tcp_accecn_clientside_disabled.pkt b/tools/testing/selftests/net/packetdrill/tcp_accecn_clientside_disabled.pkt
new file mode 100644
index 000000000000..c00b36d6a833
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_accecn_clientside_disabled.pkt
@@ -0,0 +1,12 @@
+// AccECN sysctl server-side only, no ECN/AccECN
+
+`./defaults.sh
+sysctl -q net.ipv4.tcp_ecn=5
+`
+
+ 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 4
++.002 ... 0.052 connect(4, ..., ...) = 0
+
++.002 > S 0:0(0) <mss 1460,sackOK,TS val 100 ecr 0,nop,wscale 8>
++0.05 < S. 0:0(0) ack 1 win 32767 <mss 1460,sackOK,nop,nop,nop,wscale 8>
++.002 > . 1:1(0) ack 1
diff --git a/tools/testing/selftests/net/packetdrill/tcp_accecn_close_local_close_then_remote_fin.pkt b/tools/testing/selftests/net/packetdrill/tcp_accecn_close_local_close_then_remote_fin.pkt
new file mode 100644
index 000000000000..f9c27f39f354
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_accecn_close_local_close_then_remote_fin.pkt
@@ -0,0 +1,25 @@
+// Test basic connection teardown where local process closes first:
+// the local process calls close() first, so we send a FIN, and receive an ACK.
+// Then we receive a FIN and ACK it.
+
+`./defaults.sh
+sysctl -q net.ipv4.tcp_ecn=3
+sysctl -q net.ipv4.tcp_ecn_option=0
+`
+
+ 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
+ +.01...0.011 connect(3, ..., ...) = 0
+ +0 > [noecn] SEWA 0:0(0) <...>
+ +0 < [ect1] SW. 0:0(0) ack 1 win 32768 <mss 1000,nop,wscale 6,nop,nop,sackOK>
+ +0 > [ect0] EW. 1:1(0) ack 1
+
+ +0 write(3, ..., 1000) = 1000
+ +0 > [ect0] P5. 1:1001(1000) ack 1
+ +0 < [ect0] .5 1:1(0) ack 1001 win 257
+
+ +0 close(3) = 0
+ +0 > [ect0] F5. 1001:1001(0) ack 1
+ +0 < [ect0] .5 1:1(0) ack 1002 win 257
+
+ +0 < [ect0] F5. 1:1(0) ack 1002 win 257
+ +0 > [ect0] . 1002:1002(0) ack 2
diff --git a/tools/testing/selftests/net/packetdrill/tcp_accecn_delivered_2ndlargeack.pkt b/tools/testing/selftests/net/packetdrill/tcp_accecn_delivered_2ndlargeack.pkt
new file mode 100644
index 000000000000..6d771234124a
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_accecn_delivered_2ndlargeack.pkt
@@ -0,0 +1,25 @@
+// Test a large ACK (> ACE field max)
+
+`./defaults.sh
+sysctl -q net.ipv4.tcp_ecn=3
+sysctl -q net.ipv4.tcp_ecn_option=0
+`
+
+ 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
+ +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0
+ +0 bind(3, ..., ...) = 0
+ +0 listen(3, 1) = 0
+
+ +0 < SEWA 0:0(0) win 32792 <mss 1460,nop,nop,sackOK,nop,wscale 8>
++.002 > SW. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK,nop,wscale 8>
++0.05 < [ect0] W. 1:1(0) ack 1 win 264
++.002 accept(3, ..., ...) = 4
+
++0.01 %{ assert tcpi_delivered_ce == 0, tcpi_delivered_ce }%
+
++0.01 write(4, ..., 14600) = 14600
++.002 > [ect0] P.5 1:14601(14600) ack 1
++0.05 < [ect0] .5 1:1(0) ack 1461 win 264
++0.05 < [ect0] .5 1:1(0) ack 14601 win 264
+
++0.01 %{ assert tcpi_delivered_ce == 8, tcpi_delivered_ce }%
diff --git a/tools/testing/selftests/net/packetdrill/tcp_accecn_delivered_falseoverflow_detect.pkt b/tools/testing/selftests/net/packetdrill/tcp_accecn_delivered_falseoverflow_detect.pkt
new file mode 100644
index 000000000000..76384f52b021
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_accecn_delivered_falseoverflow_detect.pkt
@@ -0,0 +1,31 @@
+// Test false overflow detection with option used to rule out overflow
+
+`./defaults.sh
+sysctl -q net.ipv4.tcp_ecn=3
+`
+
+ 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
+ +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0
+ +0 bind(3, ..., ...) = 0
+ +0 listen(3, 1) = 0
+
+ +0 < SEWA 0:0(0) win 32792 <mss 1460,nop,nop,sackOK,nop,wscale 8>
++.002 > SW. 0:0(0) ack 1 <mss 1460,ECN e1b 1 ceb 0 e0b 1,nop,nop,nop,sackOK,nop,wscale 8>
++0.05 < [ect0] W. 1:1(0) ack 1 win 264 <ECN e0b 1 ceb 0 e1b 1,nop>
++.002 accept(3, ..., ...) = 4
+
++0.01 %{ assert tcpi_delivered_ce == 0, tcpi_delivered_ce }%
+
+// Stop sending option to allow easier testing
++0 `sysctl -q net.ipv4.tcp_ecn_option=0`
+
++0.002 write(4, ..., 14600) = 14600
++.002 > [ect0] P.5 1:14601(14600) ack 1
+
++0.05 < [ect0] .5 1:1(0) ack 1460 win 264 <ECN e0b 1461 ceb 0 e1b 1,nop>
++0.05 < [ect0] .5 1:1(0) ack 14601 win 264 <ECN e0b 14601 ceb 0 e1b 1,nop>
+
++0.01 %{
+assert tcpi_delivered_ce == 0, tcpi_delivered_ce
+assert tcpi_delivered_e0_bytes == 14600, tcpi_delivered_e0_bytes
+}%
diff --git a/tools/testing/selftests/net/packetdrill/tcp_accecn_delivered_largeack.pkt b/tools/testing/selftests/net/packetdrill/tcp_accecn_delivered_largeack.pkt
new file mode 100644
index 000000000000..8bce5dce35a2
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_accecn_delivered_largeack.pkt
@@ -0,0 +1,24 @@
+// Test a large ACK (> ACE field max)
+
+`./defaults.sh
+sysctl -q net.ipv4.tcp_ecn=3
+sysctl -q net.ipv4.tcp_ecn_option=0
+`
+
+ 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
+ +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0
+ +0 bind(3, ..., ...) = 0
+ +0 listen(3, 1) = 0
+
+ +0 < SEWA 0:0(0) win 32792 <mss 1460,nop,nop,sackOK,nop,wscale 8>
++.002 > SW. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK,nop,wscale 8>
++0.05 < [ect0] W. 1:1(0) ack 1 win 264
++.002 accept(3, ..., ...) = 4
+
++0.01 %{ assert tcpi_delivered_ce == 0, tcpi_delivered_ce }%
+
++0.01 write(4, ..., 14600) = 14600
++.002 > [ect0] P.5 1:14601(14600) ack 1
++0.05 < [ect0] .5 1:1(0) ack 14601 win 264
+
++0.01 %{ assert tcpi_delivered_ce == 0, tcpi_delivered_ce }%
diff --git a/tools/testing/selftests/net/packetdrill/tcp_accecn_delivered_largeack2.pkt b/tools/testing/selftests/net/packetdrill/tcp_accecn_delivered_largeack2.pkt
new file mode 100644
index 000000000000..5f2b147214f4
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_accecn_delivered_largeack2.pkt
@@ -0,0 +1,25 @@
+// Test a large ACK (> ACE field max)
+
+`./defaults.sh
+sysctl -q net.ipv4.tcp_ecn=3
+sysctl -q net.ipv4.tcp_ecn_option=0
+`
+
+ 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
+ +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0
+ +0 bind(3, ..., ...) = 0
+ +0 listen(3, 1) = 0
+
+ +0 < SEWA 0:0(0) win 32792 <mss 1460,nop,nop,sackOK,nop,wscale 8>
++.002 > SW. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK,nop,wscale 8>
++0.05 < [ect0] W. 1:1(0) ack 1 win 264
++.002 accept(3, ..., ...) = 4
+
++0.01 %{ assert tcpi_delivered_ce == 0, tcpi_delivered_ce }%
+
++0.01 write(4, ..., 14600) = 14600
++.002 > [ect0] P.5 1:14601(14600) ack 1
+ // Fake CE
++0.05 < [ect0] .6 1:1(0) ack 14601 win 264
+
++0.01 %{ assert tcpi_delivered_ce == 1, tcpi_delivered_ce }%
diff --git a/tools/testing/selftests/net/packetdrill/tcp_accecn_delivered_maxack.pkt b/tools/testing/selftests/net/packetdrill/tcp_accecn_delivered_maxack.pkt
new file mode 100644
index 000000000000..fd07bdc14f37
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_accecn_delivered_maxack.pkt
@@ -0,0 +1,25 @@
+// Test a large ACK (at ACE field max delta)
+
+`./defaults.sh
+sysctl -q net.ipv4.tcp_ecn=3
+sysctl -q net.ipv4.tcp_ecn_option=0
+`
+
+ 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
+ +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0
+ +0 bind(3, ..., ...) = 0
+ +0 listen(3, 1) = 0
+
+ +0 < SEWA 0:0(0) win 32792 <mss 1460,nop,nop,sackOK,nop,wscale 8>
++.002 > SW. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK,nop,wscale 8>
++0.05 < [ect0] W. 1:1(0) ack 1 win 264
++.002 accept(3, ..., ...) = 4
+
++0.01 %{ assert tcpi_delivered_ce == 0, tcpi_delivered_ce }%
+
++0.01 write(4, ..., 14600) = 14600
++.002 > [ect0] P.5 1:14601(14600) ack 1
+ // Fake CE
++0.05 < [ect0] .4 1:1(0) ack 14601 win 264
+
++0.01 %{ assert tcpi_delivered_ce == 7, tcpi_delivered_ce }%
diff --git a/tools/testing/selftests/net/packetdrill/tcp_accecn_delivered_updates.pkt b/tools/testing/selftests/net/packetdrill/tcp_accecn_delivered_updates.pkt
new file mode 100644
index 000000000000..cb1e70ff2d26
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_accecn_delivered_updates.pkt
@@ -0,0 +1,70 @@
+// Test basic AccECN CEP/CEB/E0B/E1B functionality & CEP wrapping
+
+`./defaults.sh
+sysctl -q net.ipv4.tcp_ecn=3
+`
+
+ 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
+ +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0
+ +0 bind(3, ..., ...) = 0
+ +0 listen(3, 1) = 0
+
+ +0 < SEWA 0:0(0) win 32792 <mss 1050,nop,nop,sackOK,nop,wscale 8>
++.002 > SW. 0:0(0) ack 1 <mss 1460,ECN e1b 1 ceb 0 e0b 1,nop,nop,nop,sackOK,nop,wscale 8>
++0.05 < [ect0] W. 1:1(0) ack 1 win 264 <ECN e0b 1 ceb 0 e1b 1,nop>
++.002 accept(3, ..., ...) = 4
+
++0.01 %{
+assert tcpi_delivered_ce == 0, tcpi_delivered_ce
+assert tcpi_delivered_ce_bytes == 0, tcpi_delivered_ce_bytes
+}%
+
++0.01 write(4, ..., 1000) = 1000
++.002 > [ect0] EAP. 1:1001(1000) ack 1 <ECN e1b 1 ceb 0 e0b 1,nop>
+ // Fake CE
++0.05 < [ect0] WA. 1:1(0) ack 1001 win 264 <ECN e0b 1 ceb 1000 e1b 1,nop>
+
++0.01 %{
+assert tcpi_delivered_ce == 1, tcpi_delivered_ce
+assert tcpi_delivered_ce_bytes == 1000, tcpi_delivered_ce_bytes
+}%
+
++0.01 write(4, ..., 1000) = 1000
++.002 > [ect0] EAP. 1001:2001(1000) ack 1 <ECN e1b 1 ceb 0 e0b 1,nop>
+ // Fake ect0
++0.05 < [ect0] WA. 1:1(0) ack 2001 win 264 <ECN e0b 1001 ceb 1000 e1b 1,nop>
+
++0.01 %{
+assert tcpi_delivered_ce == 1, tcpi_delivered_ce
+assert tcpi_delivered_e0_bytes == 1000, tcpi_delivered_e0_bytes
+}%
+
++0.01 write(4, ..., 1000) = 1000
++.002 > [ect0] EAP. 2001:3001(1000) ack 1 <ECN e1b 1 ceb 0 e0b 1,nop>
+ // Fake ce
++0.05 < [ect0] EWA. 1:1(0) ack 3001 win 264 <ECN e0b 1001 ceb 2000 e1b 1,nop>
+
++0.01 %{
+assert tcpi_delivered_ce == 2, tcpi_delivered_ce
+assert tcpi_delivered_ce_bytes == 2000, tcpi_delivered_ce_bytes
+}%
+
++0.01 write(4, ..., 1000) = 1000
++.002 > [ect0] EAP. 3001:4001(1000) ack 1 <ECN e1b 1 ceb 0 e0b 1,nop>
+ // Fake ect1
++0.05 < [ect0] EWA. 1:1(0) ack 4001 win 264 <ECN e0b 1001 ceb 2000 e1b 1001,nop>
+
++0.01 %{
+assert tcpi_delivered_ce == 2, tcpi_delivered_ce
+assert tcpi_delivered_e1_bytes == 1000, tcpi_delivered_e1_bytes
+}%
+
++0.01 write(4, ..., 1000) = 1000
++.002 > [ect0] EAP. 4001:5001(1000) ack 1 <ECN e1b 1 ceb 0 e0b 1,nop>
+ // Fake ce
++0.05 < [ect0] . 1:1(0) ack 5001 win 264 <ECN e0b 1001 ceb 3000 e1b 1001,nop>
+
++0.01 %{
+assert tcpi_delivered_ce == 3, tcpi_delivered_ce
+assert tcpi_delivered_ce_bytes == 3000, tcpi_delivered_ce_bytes
+}%
diff --git a/tools/testing/selftests/net/packetdrill/tcp_accecn_ecn3.pkt b/tools/testing/selftests/net/packetdrill/tcp_accecn_ecn3.pkt
new file mode 100644
index 000000000000..6627c7bb2d26
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_accecn_ecn3.pkt
@@ -0,0 +1,12 @@
+// Test that tcp_ecn=4 uses RFC3168 ECN for SYN
+
+`./defaults.sh
+sysctl -q net.ipv4.tcp_ecn=4
+`
+
+ 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 4
++.002 ... 0.05 connect(4, ..., ...) = 0
+
++.002 > SEW 0:0(0) <mss 1460,sackOK,TS val 100 ecr 0,nop,wscale 8>
++0.05 < S. 0:0(0) ack 1 win 32767 <mss 1460,nop,nop,sackOK,nop,wscale 8>
++.002 > . 1:1(0) ack 1
diff --git a/tools/testing/selftests/net/packetdrill/tcp_accecn_ecn_field_updates_opt.pkt b/tools/testing/selftests/net/packetdrill/tcp_accecn_ecn_field_updates_opt.pkt
new file mode 100644
index 000000000000..51879477bb50
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_accecn_ecn_field_updates_opt.pkt
@@ -0,0 +1,35 @@
+// Test basic AccECN CEP/CEB/E0B/E1B functionality & CEP wrapping
+
+`./defaults.sh
+sysctl -q net.ipv4.tcp_ecn=3
+`
+
+ 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
+ +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0
+ +0 bind(3, ..., ...) = 0
+ +0 listen(3, 1) = 0
+
+ +0 < SEWA 0:0(0) win 32792 <mss 1050,nop,nop,sackOK,nop,wscale 8>
++.002 > SW. 0:0(0) ack 1 <mss 1460,ECN e1b 1 ceb 0 e0b 1,nop,nop,nop,sackOK,nop,wscale 8>
++0.05 < [ect0] W. 1:1(0) ack 1 win 257 <ECN e0b 1 ceb 0 e1b 1,nop>
++.002 accept(3, ..., ...) = 4
+
++0.05 < [ce] EAP. 1:1001(1000) ack 1 win 257 <ECN e0b 1 ceb 0 e1b 1,nop>
++.002 > [ect0] WA. 1:1(0) ack 1001 <ECN e1b 1 ceb 1000 e0b 1,nop>
+ +0 read(4, ..., 1000) = 1000
+
++0.05 < [ect0] EAP. 1001:2001(1000) ack 1 win 257 <ECN e0b 1 ceb 0 e1b 1,nop>
++.002 > [ect0] WA. 1:1(0) ack 2001 <ECN e1b 1 ceb 1000 e0b 1001,nop>
+ +0 read(4, ..., 1000) = 1000
+
++0.05 < [ce] EAP. 2001:3001(1000) ack 1 win 257 <ECN e0b 1 ceb 0 e1b 1,nop>
++.002 > [ect0] EWA. 1:1(0) ack 3001 <ECN e1b 1 ceb 2000 e0b 1001,nop>
+ +0 read(4, ..., 1000) = 1000
+
++0.05 < [ect1] EAP. 3001:4001(1000) ack 1 win 257 <ECN e0b 1 ceb 0 e1b 1,nop>
++.002 > [ect0] EWA. 1:1(0) ack 4001 <ECN e1b 1001 ceb 2000 e0b 1001,nop>
+ +0 read(4, ..., 1000) = 1000
+
++0.05 < [ce] EAP. 4001:5001(1000) ack 1 win 257 <ECN e0b 1 ceb 0 e1b 1,nop>
++.002 > [ect0] . 1:1(0) ack 5001 <ECN e1b 1001 ceb 3000 e0b 1001,nop>
+ +0 read(4, ..., 1000) = 1000
diff --git a/tools/testing/selftests/net/packetdrill/tcp_accecn_ipflags_drop.pkt b/tools/testing/selftests/net/packetdrill/tcp_accecn_ipflags_drop.pkt
new file mode 100644
index 000000000000..0c72fa4a1251
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_accecn_ipflags_drop.pkt
@@ -0,0 +1,14 @@
+// Test IP flags drop
+--tolerance_usecs=50000
+
+`./defaults.sh
+sysctl -q net.ipv4.tcp_ecn=3
+`
+
+ 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 4
++.002 ... 1.1 connect(4, ..., ...) = 0
+
++.002 > SEWA 0:0(0) <mss 1460,sackOK,TS val 100 ecr 0,nop,wscale 8>
++.02 ~ +1.1 > SEWA 0:0(0) <mss 1460,sackOK,TS val 100 ecr 0,nop,wscale 8>
++0.05 < S. 0:0(0) ack 1 win 32767 <mss 1460,nop,nop,sackOK,nop,wscale 8>
++.002 > [noecn] . 1:1(0) ack 1
diff --git a/tools/testing/selftests/net/packetdrill/tcp_accecn_listen_opt_drop.pkt b/tools/testing/selftests/net/packetdrill/tcp_accecn_listen_opt_drop.pkt
new file mode 100644
index 000000000000..171f9433e55f
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_accecn_listen_opt_drop.pkt
@@ -0,0 +1,16 @@
+// SYN/ACK option drop test
+
+`./defaults.sh
+sysctl -q net.ipv4.tcp_ecn=3
+`
+
+ 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
+ +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0
+ +0 bind(3, ..., ...) = 0
+ +0 listen(3, 1) = 0
+
+ +0 < SEWA 0:0(0) win 32792 <mss 1050,nop,nop,sackOK,nop,wscale 8>
++.002 > SW. 0:0(0) ack 1 <mss 1460,ECN e1b 1 ceb 0 e0b 1,nop,nop,nop,sackOK,nop,wscale 8>
++.02 ~+2 > SW. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK,nop,wscale 8>
++.02 ~+5 > S. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK,nop,wscale 8>
++.02 ~+8 > S. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK,nop,wscale 8>
diff --git a/tools/testing/selftests/net/packetdrill/tcp_accecn_multiple_syn_ack_drop.pkt b/tools/testing/selftests/net/packetdrill/tcp_accecn_multiple_syn_ack_drop.pkt
new file mode 100644
index 000000000000..0f65cf56cd2b
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_accecn_multiple_syn_ack_drop.pkt
@@ -0,0 +1,28 @@
+// Test that SYN-ACK with ACE flags and without
+// ACE flags got dropped. Although we disable ECN,
+// we shouldn't consider this as blackholed as
+// these are dropped due to congestion
+
+`./defaults.sh
+sysctl -q net.ipv4.tcp_ecn=3
+sysctl -q net.ipv4.tcp_ecn_option=2
+`
+
++0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
++0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0
++0 bind(3, ..., ...) = 0
++0 listen(3, 1) = 0
+
++0 < [noecn] SEWA 0:0(0) win 32792 <mss 1460,nop,nop,sackOK,nop,wscale 8>
++.002 > [noecn] SW. 0:0(0) ack 1 <mss 1460,ECN e1b 1 ceb 0 e0b 1,nop,nop,nop,sackOK,nop,wscale 8>
+
+// Retransmit SYN-ACK without option
++1~+1.1 > [noecn] SW. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK,nop,wscale 8>
+
+// SYN-ACK maybe getting blackholed, disable ECN
++2~+2.2 > [noecn] S. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK,nop,wscale 8>
++4~+4.4 > [noecn] S. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK,nop,wscale 8>
+
+// Received an ACK after sending 3rd retransmission, not a blackhole
++0.1 < [noecn] . 1:1(0) ack 1 win 320
++.002 accept(3, ..., ...) = 4
diff --git a/tools/testing/selftests/net/packetdrill/tcp_accecn_multiple_syn_drop.pkt b/tools/testing/selftests/net/packetdrill/tcp_accecn_multiple_syn_drop.pkt
new file mode 100644
index 000000000000..343181633980
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_accecn_multiple_syn_drop.pkt
@@ -0,0 +1,18 @@
+// Test that SYN with ACE flags and without
+// ACE flags got dropped. Although we disable
+// ECN, we shouldn't consider this as blackholed
+// as these are dropped due to congestion
+
+`./defaults.sh
+sysctl -q net.ipv4.tcp_ecn=3
+`
+
+ 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 4
++.002 ... 3.1 connect(4, ..., ...) = 0
+
++.002 > [noecn] SEWA 0:0(0) <mss 1460,sackOK,TS val 100 ecr 0,nop,wscale 8>
++.02~+1.1 > [noecn] SEWA 0:0(0) <mss 1460,sackOK,TS val 100 ecr 0,nop,wscale 8>
++.02~+1.1 > [noecn] S 0:0(0) <mss 1460,sackOK,TS val 100 ecr 0,nop,wscale 8>
++.02~+1.1 > [noecn] S 0:0(0) <mss 1460,sackOK,TS val 100 ecr 0,nop,wscale 8>
++0.1 < [noecn] S. 0:0(0) ack 1 win 32767 <mss 1460,nop,nop,sackOK,nop,wscale 8>
++0~+0.01 > [noecn] . 1:1(0) ack 1
diff --git a/tools/testing/selftests/net/packetdrill/tcp_accecn_negotiation_bleach.pkt b/tools/testing/selftests/net/packetdrill/tcp_accecn_negotiation_bleach.pkt
new file mode 100644
index 000000000000..37dabc4603c8
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_accecn_negotiation_bleach.pkt
@@ -0,0 +1,23 @@
+// Test AccECN flags bleach
+
+`./defaults.sh
+sysctl -q net.ipv4.tcp_ecn=3
+`
+
+ 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
+ +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0
+ +0 bind(3, ..., ...) = 0
+ +0 listen(3, 1) = 0
+
+ +0 < SEWA 0:0(0) win 32792 <mss 1050,nop,nop,sackOK,nop,wscale 8>
++.002 > SW. 0:0(0) ack 1 <mss 1460,ECN e1b 1 ceb 0 e0b 1,nop,nop,nop,sackOK,nop,wscale 8>
++0.05 < [ect0] . 1:1(0) ack 1 win 320 <ECN e0b 1 ceb 0 e1b 1,nop>
++.002 accept(3, ..., ...) = 4
+
++0.01 %{ assert tcpi_delivered_ce == 0, tcpi_delivered_ce }%
+
++0.01 write(4, ..., 1000) = 1000
++.002 > [noecn] EAP. 1:1001(1000) ack 1 <ECN e1b 1 ceb 0 e0b 1,nop>
++0.05 < [ect0] EAP. 1:1(0) ack 1001 win 320
+
++0.01 %{ assert tcpi_delivered_ce == 0, tcpi_delivered_ce }%
diff --git a/tools/testing/selftests/net/packetdrill/tcp_accecn_negotiation_connect.pkt b/tools/testing/selftests/net/packetdrill/tcp_accecn_negotiation_connect.pkt
new file mode 100644
index 000000000000..5b14892fda51
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_accecn_negotiation_connect.pkt
@@ -0,0 +1,23 @@
+// Test basic AccECN negotiation
+
+`./defaults.sh
+sysctl -q net.ipv4.tcp_ecn=3
+`
+
+ 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 4
++.002 ... 0.052 connect(4, ..., ...) = 0
+
++.002 > SEWA 0:0(0) <mss 1460,sackOK,TS val 100 ecr 0,nop,wscale 8>
++0.05 < SW. 0:0(0) ack 1 win 32767 <mss 1460,ECN e0b 1 ceb 0 e1b 1,nop,nop,nop,sackOK,nop,wscale 8>
++.002 > [ect0] W. 1:1(0) ack 1 <ECN e1b 1 ceb 0 e0b 1,nop>
+
++0.01 write(4, ..., 1000) = 1000
++.002 > [ect0] EAP. 1:1001(1000) ack 1 <ECN e1b 1 ceb 0 e0b 1,nop>
++.05 < [ect0] EAP. 1:1(0) ack 1001 win 256 <ECN e0b 1001 ceb 0 e1b 0,nop>
+
++0.01 %{ assert tcpi_delivered_ce == 0, tcpi_delivered_ce }%
+
++0.01 write(4, ..., 1000) = 1000
++.002 > [ect0] EAP. 1001:2001(1000) ack 1 <ECN e1b 1 ceb 0 e0b 1,nop>
+
++0.01 %{ assert tcpi_delivered_ce == 0, tcpi_delivered_ce }%
diff --git a/tools/testing/selftests/net/packetdrill/tcp_accecn_negotiation_listen.pkt b/tools/testing/selftests/net/packetdrill/tcp_accecn_negotiation_listen.pkt
new file mode 100644
index 000000000000..25f7cb2feb25
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_accecn_negotiation_listen.pkt
@@ -0,0 +1,26 @@
+// Test basic AccECN negotiation
+
+`./defaults.sh
+sysctl -q net.ipv4.tcp_ecn=3
+`
+
+ 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
+ +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0
+ +0 bind(3, ..., ...) = 0
+ +0 listen(3, 1) = 0
+
+ +0 < SEWA 0:0(0) win 32792 <mss 1050,nop,nop,sackOK,nop,wscale 8>
++.002 > SW. 0:0(0) ack 1 <mss 1460,ECN e1b 1 ceb 0 e0b 1,nop,nop,nop,sackOK,nop,wscale 8>
++0.05 < [ect0] W. 1:1(0) ack 1 win 320 <ECN e0b 1 ceb 0 e1b 1,nop>
++.002 accept(3, ..., ...) = 4
+
++0.01 %{ assert tcpi_delivered_ce == 0, tcpi_delivered_ce }%
+
++0.01 write(4, ..., 1000) = 1000
++.002 > [ect0] EAP. 1:1001(1000) ack 1 <ECN e1b 1 ceb 0 e0b 1,nop>
++0.05 < [ect0] EAP. 1:1(0) ack 1001 win 320
+
++0.01 %{ assert tcpi_delivered_ce == 0, tcpi_delivered_ce }%
+
++0.01 write(4, ..., 1000) = 1000
++.002 > [ect0] EAP. 1001:2001(1000) ack 1 <ECN e1b 1 ceb 0 e0b 1,nop>
diff --git a/tools/testing/selftests/net/packetdrill/tcp_accecn_negotiation_noopt_connect.pkt b/tools/testing/selftests/net/packetdrill/tcp_accecn_negotiation_noopt_connect.pkt
new file mode 100644
index 000000000000..50e08c492a69
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_accecn_negotiation_noopt_connect.pkt
@@ -0,0 +1,23 @@
+// Test basic AccECN negotiation without option
+
+`./defaults.sh
+sysctl -q net.ipv4.tcp_ecn=3
+`
+
+ 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 4
++.002 ... 0.052 connect(4, ..., ...) = 0
+
++.002 > SEWA 0:0(0) <mss 1460,sackOK,TS val 100 ecr 0,nop,wscale 8>
++0.05 < SW. 0:0(0) ack 1 win 32767 <mss 1460,nop,nop,sackOK,nop,wscale 8>
++.002 > [ect0] W. 1:1(0) ack 1
+
++0.01 write(4, ..., 1000) = 1000
++.002 > [ect0] EAP. 1:1001(1000) ack 1
++.05 < [ect0] EAP. 1:1(0) ack 1001 win 256
+
++0.01 %{ assert tcpi_delivered_ce == 0, tcpi_delivered_ce }%
+
++0.01 write(4, ..., 1000) = 1000
++.002 > [ect0] EAP. 1001:2001(1000) ack 1
+
++0.01 %{ assert tcpi_delivered_ce == 0, tcpi_delivered_ce }%
diff --git a/tools/testing/selftests/net/packetdrill/tcp_accecn_negotiation_optenable.pkt b/tools/testing/selftests/net/packetdrill/tcp_accecn_negotiation_optenable.pkt
new file mode 100644
index 000000000000..2904f1ba9975
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_accecn_negotiation_optenable.pkt
@@ -0,0 +1,23 @@
+// Test basic AccECN negotiation, late option enable
+
+`./defaults.sh
+sysctl -q net.ipv4.tcp_ecn=3
+`
+
+ 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 4
++.002 ... 0.052 connect(4, ..., ...) = 0
+
++.002 > SEWA 0:0(0) <mss 1460,sackOK,TS val 100 ecr 0,nop,wscale 8>
++0.05 < SW. 0:0(0) ack 1 win 32767 <mss 1460,nop,nop,sackOK,nop,wscale 8>
++.002 > [ect0] W. 1:1(0) ack 1
+
++0.01 write(4, ..., 1000) = 1000
++.002 > [ect0] EAP. 1:1001(1000) ack 1
++.05 < [ect0] EAP. 1:1(0) ack 1001 win 256 <ECN e0b 1001 ceb 0 e1b 1,nop>
+
++0.01 %{ assert tcpi_delivered_ce == 0, tcpi_delivered_ce }%
+
++0.01 write(4, ..., 1000) = 1000
++.002 > [ect0] EAP. 1001:2001(1000) ack 1 <ECN e1b 1 ceb 0 e0b 1,nop>
+
++0.01 %{ assert tcpi_delivered_ce == 0, tcpi_delivered_ce }%
diff --git a/tools/testing/selftests/net/packetdrill/tcp_accecn_no_ecn_after_accecn.pkt b/tools/testing/selftests/net/packetdrill/tcp_accecn_no_ecn_after_accecn.pkt
new file mode 100644
index 000000000000..64e0fc1c1f14
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_accecn_no_ecn_after_accecn.pkt
@@ -0,0 +1,20 @@
+// Test client behavior on receiving a non ECN SYN-ACK
+// after receiving an AccECN SYN-ACK and moving to
+// ESTABLISHED state
+
+`./defaults.sh
+sysctl -q net.ipv4.tcp_ecn=3
+sysctl -q net.ipv4.tcp_ecn_option=2
+`
+
+ 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 4
++.002 ... 0.052 connect(4, ..., ...) = 0
+
++.002 > [noecn] SEWA 0:0(0) <mss 1460,sackOK,TS val 100 ecr 0,nop,wscale 8>
+// Receive an AccECN SYN-ACK and move to ESTABLISHED
++0.05 < [noecn] SW. 0:0(0) ack 1 win 32767 <mss 1460,ECN e0b 1 ceb 0 e1b 1,nop,nop,nop,sackOK,nop,wscale 8>
++.002 > [ect0] W. 1:1(0) ack 1 <ECN e1b 1 ceb 0 e0b 1,nop>
+
+// Receive a non ECN SYN-ACK and send a challenge ACK with ACE feedback
++0.1 < [noecn] S. 0:0(0) ack 1 win 32767 <mss 1460,nop,nop,sackOK,nop,wscale 8>
++.002 > [ect0] W. 1:1(0) ack 1 <ECN e1b 1 ceb 0 e0b 1,nop>
diff --git a/tools/testing/selftests/net/packetdrill/tcp_accecn_noopt.pkt b/tools/testing/selftests/net/packetdrill/tcp_accecn_noopt.pkt
new file mode 100644
index 000000000000..f407c629a3f7
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_accecn_noopt.pkt
@@ -0,0 +1,27 @@
+// Test basic AccECN negotiation with option off using sysctl
+
+`./defaults.sh
+sysctl -q net.ipv4.tcp_ecn=3
+sysctl -q net.ipv4.tcp_ecn_option=0
+`
+
+ 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
+ +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0
+ +0 bind(3, ..., ...) = 0
+ +0 listen(3, 1) = 0
+
+ +0 < SEWA 0:0(0) win 32792 <mss 1050,nop,nop,sackOK,nop,wscale 8>
++.002 > SW. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK,nop,wscale 8>
++0.05 < [ect0] W. 1:1(0) ack 1 win 320 <ECN e0b 1 ceb 0 e1b 1,nop>
++.002 accept(3, ..., ...) = 4
+
++0.01 %{ assert tcpi_delivered_ce == 0, tcpi_delivered_ce }%
+
++0.01 write(4, ..., 1000) = 1000
++.002 > [ect0] EAP. 1:1001(1000) ack 1
++0.05 < [ect0] EAP. 1:1(0) ack 1001 win 320
+
++0.01 %{ assert tcpi_delivered_ce == 0, tcpi_delivered_ce }%
+
++0.01 write(4, ..., 1000) = 1000
++.002 > [ect0] EAP. 1001:2001(1000) ack 1
diff --git a/tools/testing/selftests/net/packetdrill/tcp_accecn_noprogress.pkt b/tools/testing/selftests/net/packetdrill/tcp_accecn_noprogress.pkt
new file mode 100644
index 000000000000..32454e7187f9
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_accecn_noprogress.pkt
@@ -0,0 +1,27 @@
+// Test no progress filtering
+
+`./defaults.sh
+sysctl -q net.ipv4.tcp_ecn=3
+`
+
+ 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
+ +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0
+ +0 bind(3, ..., ...) = 0
+ +0 listen(3, 1) = 0
+
+ +0 < SEWA 0:0(0) win 32792 <mss 1050,nop,nop,sackOK,nop,wscale 8>
++.002 > SW. 0:0(0) ack 1 <mss 1460,ECN e1b 1 ceb 0 e0b 1,nop,nop,nop,sackOK,nop,wscale 8>
++0.05 < [ect0] W. 1:1(0) ack 1 win 264 <ECN e0b 1 ceb 0 e1b 1,nop>
++.002 accept(3, ..., ...) = 4
+
++0.01 %{ assert tcpi_delivered_ce == 0, tcpi_delivered_ce }%
+
++0.01 write(4, ..., 1000) = 1000
++.002 > [ect0] EAP. 1:1001(1000) ack 1 <ECN e1b 1 ceb 0 e0b 1,nop>
+ // Fake CE and claim no progress
++0.05 < [ect0] WA. 1:1(0) ack 1 win 264 <ECN e0b 1 ceb 1000 e1b 1,nop>
+
++0.01 %{
+assert tcpi_delivered_ce == 0, tcpi_delivered_ce
+assert tcpi_delivered_ce_bytes == 0, tcpi_delivered_ce_bytes
+}%
diff --git a/tools/testing/selftests/net/packetdrill/tcp_accecn_notecn_then_accecn_syn.pkt b/tools/testing/selftests/net/packetdrill/tcp_accecn_notecn_then_accecn_syn.pkt
new file mode 100644
index 000000000000..6597d5f2d778
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_accecn_notecn_then_accecn_syn.pkt
@@ -0,0 +1,28 @@
+// Test that SYN-ACK with ACE flags and without
+// ACE flags got dropped. Although we disable ECN,
+// we shouldn't consider this as blackholed as
+// these are dropped due to congestion
+
+`./defaults.sh
+sysctl -q net.ipv4.tcp_ecn=3
+sysctl -q net.ipv4.tcp_ecn_option=2
+`
+
++0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
++0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0
++0 bind(3, ..., ...) = 0
++0 listen(3, 1) = 0
+
++0 < [noecn] S 0:0(0) win 32792 <mss 1460,nop,nop,sackOK,nop,wscale 8>
++.002 > [noecn] S. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK,nop,wscale 8>
+
+// Retransmit SYN
++0.1 < [ect0] SEWA 0:0(0) win 32792 <mss 1460,nop,nop,sackOK,nop,wscale 8>
++.002 > [noecn] S. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK,nop,wscale 8>
+
++0.1 < [noecn] . 1:1(0) ack 1 win 320
++.002 accept(3, ..., ...) = 4
+
+// Write with AccECN option but with ip-noecn since we received one SYN with ACE=0
++0.01 write(4, ..., 100) = 100
++.002 > [noecn] P. 1:101(100) ack 1
diff --git a/tools/testing/selftests/net/packetdrill/tcp_accecn_rfc3168_to_fallback.pkt b/tools/testing/selftests/net/packetdrill/tcp_accecn_rfc3168_to_fallback.pkt
new file mode 100644
index 000000000000..0f97dfcfa82d
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_accecn_rfc3168_to_fallback.pkt
@@ -0,0 +1,18 @@
+// Test RFC3168 fallback when sysctl asks for AccECN
+
+`./defaults.sh
+sysctl -q net.ipv4.tcp_ecn=3
+`
+
+ 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
+ +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0
+ +0 bind(3, ..., ...) = 0
+ +0 listen(3, 1) = 0
+
+ +0 < SEW 0:0(0) win 32792 <mss 1050,nop,nop,sackOK,nop,wscale 8>
++.002 > SE. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK,nop,wscale 8>
++0.05 < . 1:1(0) ack 1 win 320
++.002 accept(3, ..., ...) = 4
+
++0.01 write(4, ..., 1000) = 1000
++.002 > [ect0] P. 1:1001(1000) ack 1
diff --git a/tools/testing/selftests/net/packetdrill/tcp_accecn_rfc3168_to_rfc3168.pkt b/tools/testing/selftests/net/packetdrill/tcp_accecn_rfc3168_to_rfc3168.pkt
new file mode 100644
index 000000000000..9baffdd66fe5
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_accecn_rfc3168_to_rfc3168.pkt
@@ -0,0 +1,18 @@
+// Test RFC3168 ECN when sysctl asks for RFC3168 ECN
+
+`./defaults.sh
+sysctl -q net.ipv4.tcp_ecn=1
+`
+
+ 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
+ +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0
+ +0 bind(3, ..., ...) = 0
+ +0 listen(3, 1) = 0
+
+ +0 < SEW 0:0(0) win 32792 <mss 1050,nop,nop,sackOK,nop,wscale 8>
++.002 > SE. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK,nop,wscale 8>
++0.05 < . 1:1(0) ack 1 win 320
++.002 accept(3, ..., ...) = 4
+
++0.01 write(4, ..., 1000) = 1000
++.002 > [ect0] P. 1:1001(1000) ack 1
diff --git a/tools/testing/selftests/net/packetdrill/tcp_accecn_sack_space_grab.pkt b/tools/testing/selftests/net/packetdrill/tcp_accecn_sack_space_grab.pkt
new file mode 100644
index 000000000000..3fc56f9c6a6f
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_accecn_sack_space_grab.pkt
@@ -0,0 +1,28 @@
+// Test SACK space grab to fit AccECN option
+--tcp_ts_tick_usecs=1000
+
+`./defaults.sh
+sysctl -q net.ipv4.tcp_ecn=3
+`
+
+ 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
+ +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0
+ +0 bind(3, ..., ...) = 0
+ +0 listen(3, 1) = 0
+
+ +0 < SEWA 0:0(0) win 32792 <mss 1050,nop,nop,sackOK,nop,wscale 8>
++.002 > SW. 0:0(0) ack 1 <mss 1460,ECN e1b 1 ceb 0 e0b 1,nop,nop,nop,sackOK,nop,wscale 8>
++0.05 < [ect0] W. 1:1(0) ack 1 win 264 <ECN e0b 1 ceb 0 e1b 1,nop>
++.002 accept(3, ..., ...) = 4
+
++.01 < [ect1] EAP. 1001:2001(1000) ack 1 win 264
++0.002 > [ect0] EA. 1:1(0) ack 1 <ECN e1b 1001 ceb 0 e0b 1,nop,nop,nop,sack 1001:2001>
++.01 < [ect0] EAP. 3001:4001(1000) ack 1 win 264
++0.002 > [ect0] EA. 1:1(0) ack 1 <ECN e1b 1001 ceb 0 e0b 1001,nop,nop,nop,sack 3001:4001 1001:2001>
++.01 < [ce] EAP. 5001:6001(1000) ack 1 win 264
++0.002 > [ect0] WA. 1:1(0) ack 1 <ECN e1b 1001 ceb 1000 e0b 1001,nop,nop,nop,sack 5001:6001 3001:4001 1001:2001>
+// DSACK works?
++.01 < [ect0] EAP. 5001:6001(1000) ack 1 win 264
++0.002 > [ect0] WA. 1:1(0) ack 1 <ECN e1b 1001 ceb 1000 e0b 2001,nop,nop,nop,sack 5001:6001 5001:6001 3001:4001>
++.01 < [ect1] EAP. 6001:7001(1000) ack 1 win 264
++0.002 > [ect0] WA. 1:1(0) ack 1 <ECN e1b 2001 ceb 1000 e0b 2001,nop,nop,nop,sack 5001:7001 3001:4001 1001:2001>
diff --git a/tools/testing/selftests/net/packetdrill/tcp_accecn_sack_space_grab_with_ts.pkt b/tools/testing/selftests/net/packetdrill/tcp_accecn_sack_space_grab_with_ts.pkt
new file mode 100644
index 000000000000..1c075b5d81ae
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_accecn_sack_space_grab_with_ts.pkt
@@ -0,0 +1,39 @@
+// Test SACK space grab to fit AccECN option
+--tcp_ts_tick_usecs=1000
+
+`./defaults.sh
+sysctl -q net.ipv4.tcp_ecn=3
+`
+
+ 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
+ +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0
+ +0 bind(3, ..., ...) = 0
+ +0 listen(3, 1) = 0
+
+ +0 < SEWA 0:0(0) win 32792 <mss 1050,sackOK,TS val 1 ecr 0,nop,wscale 8>
++.002 > SW. 0:0(0) ack 1 <mss 1460,sackOK,TS val 100 ecr 1,ECN e1b 1 ceb 0 e0b 1,nop,nop,wscale 8>
++0.05 < [ect0] W. 1:1(0) ack 1 win 264 <nop,nop,TS val 2 ecr 100,ECN e0b 1 ceb 0 e1b 1,nop>
++.002 accept(3, ..., ...) = 4
+
+// One SACK block should allow all 3 AccECN fields:
++.01 < [ect1] EAP. 1001:2001(1000) ack 1 win 264 <nop,nop,TS val 3 ecr 100>
++0.002 > [ect0] EA. 1:1(0) ack 1 <nop,nop,TS val 160 ecr 2,ECN e1b 1001 ceb 0 e0b 1,nop,nop,nop,sack 1001:2001>
+
+// Two SACK blocks should fit w/ AccECN if we only need to use 2 AccECN fields: check ect1 arriving.
++.01 < [ect1] EAP. 3001:4001(1000) ack 1 win 264 <nop,nop,TS val 4 ecr 100>
++0.002 > [ect0] EA. 1:1(0) ack 1 <nop,nop,TS val 172 ecr 2,ECN e1b 2001 ceb 0,nop,nop,sack 3001:4001 1001:2001>
+
+// Two SACK blocks should fit w/ AccECN if we only need to use 2 AccECN fields: check CE arriving.
++.01 < [ce] EAP. 5001:6001(1000) ack 1 win 264 <nop,nop,TS val 5 ecr 100>
++0.002 > [ect0] WA. 1:1(0) ack 1 <nop,nop,TS val 184 ecr 2,ECN e1b 2001 ceb 1000,nop,nop,sack 5001:6001 3001:4001>
+
+// Check that DSACK works, using 2 SACK blocks in total, if we only need to use 2 AccECN fields: check ect1 arriving.
++.01 < [ect1] EAP. 5001:6001(1000) ack 1 win 264 <nop,nop,TS val 5 ecr 100>
++0.002 > [ect0] WA. 1:1(0) ack 1 <nop,nop,TS val 196 ecr 2,ECN e1b 3001 ceb 1000,nop,nop,sack 5001:6001 5001:6001>
+
+// Check the case where the AccECN option doesn't fit, because sending ect0
+// with order 1 would rquire 3 AccECN fields,
+// and TS (12 bytes) + 2 SACK blocks (20 bytes) + 3 AccECN fields (2 + 3*3 bytes) > 40 bytes.
+// That's OK; Linux TCP AccECN is optimized for the ECT1 case, not ECT0.
++.01 < [ect0] EAP. 6001:7001(1000) ack 1 win 264 <nop,nop,TS val 5 ecr 100>
++0.002 > [ect0] WA. 1:1(0) ack 1 <nop,nop,TS val 204 ecr 2,nop,nop,sack 5001:7001 3001:4001 1001:2001>
diff --git a/tools/testing/selftests/net/packetdrill/tcp_accecn_serverside_accecn_disabled1.pkt b/tools/testing/selftests/net/packetdrill/tcp_accecn_serverside_accecn_disabled1.pkt
new file mode 100644
index 000000000000..6b88ab78bfce
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_accecn_serverside_accecn_disabled1.pkt
@@ -0,0 +1,20 @@
+// Test against classic ECN server
+// Not-ECT on SYN and server sets 1|0|1 (AE is unused for classic ECN)
+
+`./defaults.sh
+sysctl -q net.ipv4.tcp_ecn=3
+`
+
+ 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 4
++.002 ... 0.052 connect(4, ..., ...) = 0
+
++.002 > [noecn] SEWA 0:0(0) <mss 1460,sackOK,TS val 100 ecr 0,nop,wscale 8>
++0.05 < [noecn] SEA. 0:0(0) ack 1 win 32767 <mss 1460,sackOK,TS val 700 ecr 100,nop,wscale 8>
++.002 > [ect0] W. 1:1(0) ack 1 <nop, nop, TS val 200 ecr 700>
+
++0 write(4, ..., 100) = 100
++.002 > [ect0] P.5 1:101(100) ack 1 <nop,nop,TS val 300 ecr 700>
++0 close(4) = 0
+
++.002 > [ect0] F.5 101:101(0) ack 1 <nop,nop,TS val 400 ecr 700>
++0.1 < [noecn] R. 1:1(0) ack 102 win 4242
diff --git a/tools/testing/selftests/net/packetdrill/tcp_accecn_serverside_accecn_disabled2.pkt b/tools/testing/selftests/net/packetdrill/tcp_accecn_serverside_accecn_disabled2.pkt
new file mode 100644
index 000000000000..d24ada008ece
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_accecn_serverside_accecn_disabled2.pkt
@@ -0,0 +1,20 @@
+// Test against classic ECN server
+// Not-ECT on SYN and server sets 0|0|1
+
+`./defaults.sh
+sysctl -q net.ipv4.tcp_ecn=3
+`
+
+ 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 4
++.002 ... 0.052 connect(4, ..., ...) = 0
+
++.002 > [noecn] SEWA 0:0(0) <mss 1460,sackOK,TS val 100 ecr 0,nop,wscale 8>
++0.05 < [noecn] SE. 0:0(0) ack 1 win 32767 <mss 1460,sackOK,TS val 700 ecr 100,nop,wscale 8>
++.002 > [noecn] . 1:1(0) ack 1 <nop, nop, TS val 200 ecr 700>
+
++0 write(4, ..., 100) = 100
++.002 > [ect0] P. 1:101(100) ack 1 <nop,nop,TS val 300 ecr 700>
++0 close(4) = 0
+
++0 > [noecn] F. 101:101(0) ack 1 <...>
++0.1 < R. 1:1(0) ack 102 win 4242
diff --git a/tools/testing/selftests/net/packetdrill/tcp_accecn_serverside_broken.pkt b/tools/testing/selftests/net/packetdrill/tcp_accecn_serverside_broken.pkt
new file mode 100644
index 000000000000..a20d7e890ee1
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_accecn_serverside_broken.pkt
@@ -0,0 +1,19 @@
+// Test against broken server (1|1|1)
+
+`./defaults.sh
+sysctl -q net.ipv4.tcp_ecn=3
+`
+
+ 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 4
++.002 ... 0.052 connect(4, ..., ...) = 0
+
++.002 > [noecn] SEWA 0:0(0) <mss 1460,sackOK,TS val 100 ecr 0,nop,wscale 8>
++0.05 < [noecn] SEWA. 0:0(0) ack 1 win 32767 <mss 1460,sackOK,TS val 700 ecr 100,nop,wscale 8>
++.002 > [noecn] . 1:1(0) ack 1 <nop, nop, TS val 200 ecr 700>
+
++0 write(4, ..., 100) = 100
++.002 > [noecn] P. 1:101(100) ack 1 <nop,nop,TS val 300 ecr 700>
++0 close(4) = 0
+
++.002 > [noecn] F. 101:101(0) ack 1 <...>
++0.1 < [noecn] R. 1:1(0) ack 102 win 4242
diff --git a/tools/testing/selftests/net/packetdrill/tcp_accecn_serverside_ecn_disabled.pkt b/tools/testing/selftests/net/packetdrill/tcp_accecn_serverside_ecn_disabled.pkt
new file mode 100644
index 000000000000..428255bedab7
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_accecn_serverside_ecn_disabled.pkt
@@ -0,0 +1,19 @@
+// Test against Non ECN server (0|0|0)
+
+`./defaults.sh
+sysctl -q net.ipv4.tcp_ecn=3
+`
+
+ 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 4
++.002 ... 0.052 connect(4, ..., ...) = 0
+
++.002 > [noecn] SEWA 0:0(0) <mss 1460,sackOK,TS val 100 ecr 0,nop,wscale 8>
++0.05 < [noecn] S. 0:0(0) ack 1 win 32767 <mss 1460,sackOK,TS val 700 ecr 100,nop,wscale 8>
++.002 > [noecn] . 1:1(0) ack 1 <nop, nop, TS val 200 ecr 700>
+
++0 write(4, ..., 100) = 100
++.002 > [noecn] P. 1:101(100) ack 1 <nop,nop,TS val 300 ecr 700>
++0 close(4) = 0
+
++.002 > [noecn] F. 101:101(0) ack 1 <nop,nop,TS val 400 ecr 700>
++0.1 < [noecn] R. 1:1(0) ack 102 win 4242
diff --git a/tools/testing/selftests/net/packetdrill/tcp_accecn_serverside_only.pkt b/tools/testing/selftests/net/packetdrill/tcp_accecn_serverside_only.pkt
new file mode 100644
index 000000000000..e9a5a0d3677c
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_accecn_serverside_only.pkt
@@ -0,0 +1,18 @@
+// Test AccECN with sysctl set to server-side only
+
+`./defaults.sh
+sysctl -q net.ipv4.tcp_ecn=5
+`
+
+ 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
+ +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0
+ +0 bind(3, ..., ...) = 0
+ +0 listen(3, 1) = 0
+
+ +0 < SEWA 0:0(0) win 32792 <mss 1050,nop,nop,sackOK,nop,wscale 8>
++.002 > SW. 0:0(0) ack 1 <mss 1460,ECN e1b 1 ceb 0 e0b 1,nop,nop,nop,sackOK,nop,wscale 8>
++0.05 < [ect0] W. 1:1(0) ack 1 win 320 <ECN e0b 1 ceb 0 e1b 1,nop>
++.002 accept(3, ..., ...) = 4
+
++0.01 write(4, ..., 1000) = 1000
++.002 > [ect0] EAP. 1:1001(1000) ack 1 <ECN e1b 1 ceb 0 e0b 1,nop>
diff --git a/tools/testing/selftests/net/packetdrill/tcp_accecn_syn_ace_flags_acked_after_retransmit.pkt b/tools/testing/selftests/net/packetdrill/tcp_accecn_syn_ace_flags_acked_after_retransmit.pkt
new file mode 100644
index 000000000000..412fa903105c
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_accecn_syn_ace_flags_acked_after_retransmit.pkt
@@ -0,0 +1,18 @@
+// Test that SYN with ACE flags was Acked
+// after 2nd retransmission. In this case,
+// since we got SYN-ACK that supports Accurate
+// ECN, we consider this as successful negotiation
+
+`./defaults.sh
+sysctl -q net.ipv4.tcp_ecn=3
+`
+
+ 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 4
++.002 ... 2.1 connect(4, ..., ...) = 0
+
++.002 > [noecn] SEWA 0:0(0) <mss 1460,sackOK,TS val 100 ecr 0,nop,wscale 8>
++1~+1.1 > [noecn] SEWA 0:0(0) <mss 1460,sackOK,TS val 100 ecr 0,nop,wscale 8>
++1~+1.1 > [noecn] S 0:0(0) <mss 1460,sackOK,TS val 100 ecr 0,nop,wscale 8>
+
++0.1 < [noecn] SW. 0:0(0) ack 1 win 32767 <mss 1016,ECN e0b 1 ceb 0 e1b 1,nop,nop,nop,sackOK,nop,wscale 8>
++0~+0.01 > [ect0] W. 1:1(0) ack 1 <ECN e1b 1 ceb 0 e0b 1,nop>
diff --git a/tools/testing/selftests/net/packetdrill/tcp_accecn_syn_ace_flags_drop.pkt b/tools/testing/selftests/net/packetdrill/tcp_accecn_syn_ace_flags_drop.pkt
new file mode 100644
index 000000000000..4622754a2270
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_accecn_syn_ace_flags_drop.pkt
@@ -0,0 +1,16 @@
+// Test that SYN with ACE flags got dropped
+// We retry one more time with ACE and then
+// fallback to disabled ECN
+
+`./defaults.sh
+sysctl -q net.ipv4.tcp_ecn=3
+`
+
+ 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 4
++.002 ... 2.1 connect(4, ..., ...) = 0
+
++.002 > [noecn] SEWA 0:0(0) <mss 1460,sackOK,TS val 100 ecr 0,nop,wscale 8>
++1~+1.1 > [noecn] SEWA 0:0(0) <mss 1460,sackOK,TS val 100 ecr 0,nop,wscale 8>
++1~+1.1 > [noecn] S 0:0(0) <mss 1460,sackOK,TS val 100 ecr 0,nop,wscale 8>
++0.1 < [noecn] S. 0:0(0) ack 1 win 32767 <mss 1460,nop,nop,sackOK,nop,wscale 8>
++0~+0.01 > [noecn] . 1:1(0) ack 1
diff --git a/tools/testing/selftests/net/packetdrill/tcp_accecn_syn_ack_ace_flags_acked_after_retransmit.pkt b/tools/testing/selftests/net/packetdrill/tcp_accecn_syn_ack_ace_flags_acked_after_retransmit.pkt
new file mode 100644
index 000000000000..ee15f108cafe
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_accecn_syn_ack_ace_flags_acked_after_retransmit.pkt
@@ -0,0 +1,27 @@
+// Test that SYN-ACK with ACE flags was Acked
+// after 2nd retransmission. In this case,
+// since we got the last ACK that supports Accurate
+// ECN, we consider this as successful negotiation
+
+`./defaults.sh
+sysctl -q net.ipv4.tcp_ecn=3
+sysctl -q net.ipv4.tcp_ecn_option=2
+`
+
++0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
++0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0
++0 bind(3, ..., ...) = 0
++0 listen(3, 1) = 0
+
++0 < [noecn] SEWA 0:0(0) win 32792 <mss 1460,nop,nop,sackOK,nop,wscale 8>
++.002 > [noecn] SW. 0:0(0) ack 1 <mss 1460,ECN e1b 1 ceb 0 e0b 1,nop,nop,nop,sackOK,nop,wscale 8>
+
+// Retransmit SYN-ACK without option
++1~+1.1 > [noecn] SW. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK,nop,wscale 8>
+
+// SYN-ACK maybe getting blackholed, disable ECN
++2~+2.2 > [noecn] S. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK,nop,wscale 8>
+
+// Received an ACK with ACE flags, state should be set to negotiation succeeded
++0.1 < [noecn] W. 1:1(0) ack 1 win 320 <ECN e0b 1 ceb 0 e1b 1,nop>
++.002 accept(3, ..., ...) = 4
diff --git a/tools/testing/selftests/net/packetdrill/tcp_accecn_syn_ack_ace_flags_drop.pkt b/tools/testing/selftests/net/packetdrill/tcp_accecn_syn_ack_ace_flags_drop.pkt
new file mode 100644
index 000000000000..ccfe353a8ee4
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_accecn_syn_ack_ace_flags_drop.pkt
@@ -0,0 +1,26 @@
+// Test that SYN-ACK with ACE flags got dropped
+// We retry one more time with ACE and then
+// fallback to disabled ECN
+
+`./defaults.sh
+sysctl -q net.ipv4.tcp_ecn=3
+sysctl -q net.ipv4.tcp_ecn_option=2
+`
+
++0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
++0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0
++0 bind(3, ..., ...) = 0
++0 listen(3, 1) = 0
+
++0 < [noecn] SEWA 0:0(0) win 32792 <mss 1460,nop,nop,sackOK,nop,wscale 8>
++.002 > [noecn] SW. 0:0(0) ack 1 <mss 1460,ECN e1b 1 ceb 0 e0b 1,nop,nop,nop,sackOK,nop,wscale 8>
+
+// Retransmit SYN-ACK without option
++1~+1.1 > [noecn] SW. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK,nop,wscale 8>
+
+// SYN-ACK maybe getting blackholed, disable ECN
++2~+2.2 > [noecn] S. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK,nop,wscale 8>
+
+// Received an ACK with no ACE flags, state should be set to blackholed
++0.1 < [noecn] . 1:1(0) ack 1 win 320
++0 accept(3, ..., ...) = 4
diff --git a/tools/testing/selftests/net/packetdrill/tcp_accecn_syn_ce.pkt b/tools/testing/selftests/net/packetdrill/tcp_accecn_syn_ce.pkt
new file mode 100644
index 000000000000..dc83f7a18180
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_accecn_syn_ce.pkt
@@ -0,0 +1,13 @@
+// Test AccECN ECN field reflector in SYNACK
+
+`./defaults.sh
+sysctl -q net.ipv4.tcp_ecn=3
+`
+
+ 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
+ +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0
+ +0 bind(3, ..., ...) = 0
+ +0 listen(3, 1) = 0
+
+ +0 < [ce] SEWA 0:0(0) win 32792 <mss 1050,nop,nop,sackOK,nop,wscale 8>
++.002 > SWA. 0:0(0) ack 1 <mss 1460,ECN e1b 1 ceb 0 e0b 1,nop,nop,nop,sackOK,nop,wscale 8>
diff --git a/tools/testing/selftests/net/packetdrill/tcp_accecn_syn_ect0.pkt b/tools/testing/selftests/net/packetdrill/tcp_accecn_syn_ect0.pkt
new file mode 100644
index 000000000000..e63a8d018c37
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_accecn_syn_ect0.pkt
@@ -0,0 +1,13 @@
+// Test AccECN ECN field reflector in SYNACK
+
+`./defaults.sh
+sysctl -q net.ipv4.tcp_ecn=3
+`
+
+ 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
+ +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0
+ +0 bind(3, ..., ...) = 0
+ +0 listen(3, 1) = 0
+
+ +0 < [ect0] SEWA 0:0(0) win 32792 <mss 1050,nop,nop,sackOK,nop,wscale 8>
++.002 > SA. 0:0(0) ack 1 <mss 1460,ECN e1b 1 ceb 0 e0b 1,nop,nop,nop,sackOK,nop,wscale 8>
diff --git a/tools/testing/selftests/net/packetdrill/tcp_accecn_syn_ect1.pkt b/tools/testing/selftests/net/packetdrill/tcp_accecn_syn_ect1.pkt
new file mode 100644
index 000000000000..23c0e43b3dbe
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_accecn_syn_ect1.pkt
@@ -0,0 +1,13 @@
+// Test AccECN ECN field reflector in SYNACK
+
+`./defaults.sh
+sysctl -q net.ipv4.tcp_ecn=3
+`
+
+ 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
+ +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0
+ +0 bind(3, ..., ...) = 0
+ +0 listen(3, 1) = 0
+
+ +0 < [ect1] SEWA 0:0(0) win 32792 <mss 1050,nop,nop,sackOK,nop,wscale 8>
++.002 > SEW. 0:0(0) ack 1 <mss 1460,ECN e1b 1 ceb 0 e0b 1,nop,nop,nop,sackOK,nop,wscale 8>
diff --git a/tools/testing/selftests/net/packetdrill/tcp_accecn_synack_ce.pkt b/tools/testing/selftests/net/packetdrill/tcp_accecn_synack_ce.pkt
new file mode 100644
index 000000000000..c3497738f680
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_accecn_synack_ce.pkt
@@ -0,0 +1,27 @@
+// Test SYNACK CE & received_ce update
+
+`./defaults.sh
+sysctl -q net.ipv4.tcp_ecn=3
+sysctl -q net.ipv4.tcp_ecn_option=2
+`
+
+ 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 4
++.002 ... 0.052 connect(4, ..., ...) = 0
+
++.002 > [noecn] SEWA 0:0(0) <mss 1460,sackOK,TS val 100 ecr 0,nop,wscale 8>
++0.05 < [ce] SW. 0:0(0) ack 1 win 32767 <mss 1460,ECN e0b 1 ceb 0 e1b 1,nop,nop,nop,sackOK,nop,wscale 8>
++.002 > [ect0] WA. 1:1(0) ack 1 <ECN e1b 1 ceb 0 e0b 1,nop>
+
++0.01 write(4, ..., 100) = 100
++.002 > [ect0] P.6 1:101(100) ack 1 <ECN e1b 1 ceb 0 e0b 1,nop>
++0.05 < [ect0] P.5 1:101(100) ack 101 win 256 <ECN e0b 101 ceb 0 e1b 1,nop>
++.002 > [ect0] .6 101:101(0) ack 101 <ECN e1b 1 ceb 0 e0b 101,nop>
+
++0.01 write(4, ..., 100) = 100
++.002 > [ect0] P.6 101:201(100) ack 101 <ECN e1b 1 ceb 0 e0b 101,nop>
+
++0.1 < [ect1] P.5 201:301(100) ack 201 win 256 <ECN e0b 101 ceb 0 e1b 1,nop>
++.002 > [ect0] .6 201:201(0) ack 101 <ECN e1b 101 ceb 0 e0b 101,nop,nop,nop,sack 201:301>
+
++0.01 < [ce] .6 401:501(100) ack 201 win 256 <ECN e0b 101 ceb 0 e1b 1,nop>
++.002 > [ect0] .7 201:201(0) ack 101 <ECN e1b 101 ceb 100 e0b 101,nop,nop,nop,sack 401:501 201:301>
diff --git a/tools/testing/selftests/net/packetdrill/tcp_accecn_synack_ce_updates_delivered_ce.pkt b/tools/testing/selftests/net/packetdrill/tcp_accecn_synack_ce_updates_delivered_ce.pkt
new file mode 100644
index 000000000000..5fd77f466572
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_accecn_synack_ce_updates_delivered_ce.pkt
@@ -0,0 +1,22 @@
+// Reflected SYNACK CE mark increases delivered_ce
+
+`./defaults.sh
+sysctl -q net.ipv4.tcp_ecn=3
+sysctl -q net.ipv4.tcp_ecn_fallback=0
+`
+
+ 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
+ +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0
+ +0 bind(3, ..., ...) = 0
+ +0 listen(3, 1) = 0
+
++0.05 < SEWA 0:0(0) win 32767 <mss 1050,nop,nop,sackOK,nop,wscale 8>
++.002 > SW. 0:0(0) ack 1 <mss 1460,ECN e1b 1 ceb 0 e0b 1,nop,nop,nop,sackOK,nop,wscale 8>
+// Fake ce for prev, ECT validator must be disabled for this to work
++0.05 < [ect0] WA. 1:1(0) ack 1 win 264 <ECN e0b 1 ceb 0 e1b 1,nop>
++.002 accept(3, ..., ...) = 4
+
++0.01 %{ assert tcpi_delivered_ce == 1, tcpi_delivered_ce }%
+
++0.01 write(4, ..., 1000) = 1000
++.002 > [ect0] EAP. 1:1001(1000) ack 1 <ECN e1b 1 ceb 0 e0b 1,nop>
diff --git a/tools/testing/selftests/net/packetdrill/tcp_accecn_synack_ect0.pkt b/tools/testing/selftests/net/packetdrill/tcp_accecn_synack_ect0.pkt
new file mode 100644
index 000000000000..f6ad1ea5c0c4
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_accecn_synack_ect0.pkt
@@ -0,0 +1,24 @@
+// Test SYN=0 reflector
+
+`./defaults.sh
+sysctl -q net.ipv4.tcp_ecn=3
+`
+
+ 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 4
++.002 ... 0.052 connect(4, ..., ...) = 0
+
++.002 > SEWA 0:0(0) <mss 1460,sackOK,TS val 100 ecr 0,nop,wscale 8>
++0.05 < [ect0] SW. 0:0(0) ack 1 win 32767 <mss 1460,ECN e0b 1 ceb 0 e1b 1,nop,nop,nop,sackOK,nop,wscale 8>
++.002 > [ect0] A. 1:1(0) ack 1 <ECN e1b 1 ceb 0 e0b 1,nop>
+
++0.01 write(4, ..., 100) = 100
++.002 > [ect0] P.5 1:101(100) ack 1 <ECN e1b 1 ceb 0 e0b 1,nop>
++0.05 < [ect0] P.5 1:1(0) ack 101 win 256 <ECN e0b 101 ceb 0 e1b 1,nop>
+
++0.01 < [ect0] P.5 1:101(100) ack 101 win 256 <ECN e0b 1 ceb 0 e1b 1,nop>
++.002 > [ect0] .5 101:101(0) ack 101 <ECN e1b 1 ceb 0 e0b 101,nop>
++0 read(4, ..., 100) = 100
+
++0 close(4) = 0
++0 > F.5 101:101(0) ack 101 <...>
++0.1 < R. 101:101(0) ack 102 win 4242
diff --git a/tools/testing/selftests/net/packetdrill/tcp_accecn_synack_ect1.pkt b/tools/testing/selftests/net/packetdrill/tcp_accecn_synack_ect1.pkt
new file mode 100644
index 000000000000..7ecfc5fb9dbb
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_accecn_synack_ect1.pkt
@@ -0,0 +1,24 @@
+// Test SYN=0 reflector
+
+`./defaults.sh
+sysctl -q net.ipv4.tcp_ecn=3
+`
+
+ 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 4
++.002 ... 0.052 connect(4, ..., ...) = 0
+
++.002 > SEWA 0:0(0) <mss 1460,sackOK,TS val 100 ecr 0,nop,wscale 8>
++0.05 < [ect1] SW. 0:0(0) ack 1 win 32767 <mss 1460,ECN e0b 1 ceb 0 e1b 1,nop,nop,nop,sackOK,nop,wscale 8>
++.002 > [ect0] EW. 1:1(0) ack 1 <ECN e1b 1 ceb 0 e0b 1,nop>
+
++0.01 write(4, ..., 100) = 100
++.002 > [ect0] P.5 1:101(100) ack 1 <ECN e1b 1 ceb 0 e0b 1,nop>
++0.05 < [ect1] P.5 1:1(0) ack 101 win 256 <ECN e0b 101 ceb 0 e1b 1,nop>
+
++0.01 < [ect1] P.5 1:101(100) ack 101 win 256 <ECN e0b 1 ceb 0 e1b 1,nop>
++.002 > [ect0] .5 101:101(0) ack 101 <ECN e1b 101 ceb 0 e0b 1,nop>
++0 read(4, ..., 100) = 100
+
++0 close(4) = 0
++0 > F5. 101:101(0) ack 101 <...>
++0.1 < R. 101:101(0) ack 102 win 4242
diff --git a/tools/testing/selftests/net/packetdrill/tcp_accecn_synack_rexmit.pkt b/tools/testing/selftests/net/packetdrill/tcp_accecn_synack_rexmit.pkt
new file mode 100644
index 000000000000..9e0959782ef5
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_accecn_synack_rexmit.pkt
@@ -0,0 +1,15 @@
+// Test 3rd ACK flags when SYN-ACK is rexmitted
+
+`./defaults.sh
+sysctl -q net.ipv4.tcp_ecn=3
+`
+
+ 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 4
++.002 ... 0.052 connect(4, ..., ...) = 0
+
++.002 > SEWA 0:0(0) <mss 1460,sackOK,TS val 100 ecr 0,nop,wscale 8>
++0.05 < SW. 0:0(0) ack 1 win 32767 <mss 1460,ECN e0b 1 ceb 0 e1b 1,nop,nop,nop,sackOK,nop,wscale 8>
++.002 > [ect0] W. 1:1(0) ack 1 <ECN e1b 1 ceb 0 e0b 1,nop>
+
++0.05 < SW. 0:0(0) ack 1 win 32767 <mss 1460,ECN e0b 1 ceb 0 e1b 1,nop,nop,nop,sackOK,nop,wscale 8>
++.002 > [ect0] W. 1:1(0) ack 1 <ECN e1b 1 ceb 0 e0b 1,nop>
diff --git a/tools/testing/selftests/net/packetdrill/tcp_accecn_synack_rxmt.pkt b/tools/testing/selftests/net/packetdrill/tcp_accecn_synack_rxmt.pkt
new file mode 100644
index 000000000000..a5a41633af07
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_accecn_synack_rxmt.pkt
@@ -0,0 +1,25 @@
+// Test that we retransmit SYN-ACK with ACE and without
+// AccECN options after
+// SYN-ACK was lost and TCP moved to TCPS_SYN_RECEIVED
+
+`./defaults.sh
+sysctl -q net.ipv4.tcp_ecn=3
+sysctl -q net.ipv4.tcp_ecn_option=2
+`
+
++0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
++0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0
++0 bind(3, ..., ...) = 0
++0 listen(3, 1) = 0
+
++0 < [noecn] SEWA 0:0(0) win 32792 <mss 1460,nop,nop,sackOK,nop,wscale 8>
++.002 > [noecn] SW. 0:0(0) ack 1 <mss 1460,ECN e1b 1 ceb 0 e0b 1,nop,nop,nop,sackOK,nop,wscale 8>
+
+// Retransmit SYN-ACK without option
++1~+1.1 > [noecn] SW. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK,nop,wscale 8>
++0.1 < [noecn] W. 1:1(0) ack 1 win 320 <ECN e0b 1 ceb 0 e1b 1,nop>
++.002 accept(3, ..., ...) = 4
+
+// We try to write with AccECN option
++0.01 write(4, ..., 100) = 100
++.002 > [ect0] P5. 1:101(100) ack 1 <ECN e1b 1 ceb 0 e0b 1,nop>
diff --git a/tools/testing/selftests/net/packetdrill/tcp_accecn_tsnoprogress.pkt b/tools/testing/selftests/net/packetdrill/tcp_accecn_tsnoprogress.pkt
new file mode 100644
index 000000000000..f3fe2f098966
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_accecn_tsnoprogress.pkt
@@ -0,0 +1,26 @@
+// Test TS progress filtering
+--tcp_ts_tick_usecs=1000
+--tolerance_usecs=7000
+
+`./defaults.sh
+sysctl -q net.ipv4.tcp_ecn=3
+`
+
+ 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
+ +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0
+ +0 bind(3, ..., ...) = 0
+ +0 listen(3, 1) = 0
+
+ +0 < SEWA 0:0(0) win 32792 <mss 1050,sackOK,TS val 1 ecr 0,nop,wscale 8>
++.002 > SW. 0:0(0) ack 1 <mss 1460,sackOK,TS val 10 ecr 1,ECN e1b 1 ceb 0 e0b 1,nop,nop,wscale 8>
++0.05 < [ect0] W. 1:1(0) ack 1 win 264 <nop,nop,TS val 2 ecr 10>
++.002 accept(3, ..., ...) = 4
+
++0.01 %{ assert tcpi_delivered_ce == 0, tcpi_delivered_ce }%
+
++0.01 write(4, ..., 1000) = 1000
++.002 > [ect0] EAP. 1:1001(1000) ack 1 <nop,nop,TS val 83 ecr 2>
+ // Fake CE and claim no progress
++0.05 < [ect0] WA. 1:1(0) ack 1 win 264 <nop,nop,TS val 2 ecr 83>
+
++0.01 %{ assert tcpi_delivered_ce == 0, tcpi_delivered_ce }%
diff --git a/tools/testing/selftests/net/packetdrill/tcp_accecn_tsprogress.pkt b/tools/testing/selftests/net/packetdrill/tcp_accecn_tsprogress.pkt
new file mode 100644
index 000000000000..1446799d2481
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_accecn_tsprogress.pkt
@@ -0,0 +1,25 @@
+// Test TS progress filtering
+--tcp_ts_tick_usecs=1000
+
+`./defaults.sh
+sysctl -q net.ipv4.tcp_ecn=3
+`
+
+ 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
+ +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0
+ +0 bind(3, ..., ...) = 0
+ +0 listen(3, 1) = 0
+
+ +0 < SEWA 0:0(0) win 32792 <mss 1050,sackOK,TS val 1 ecr 0,nop,wscale 8>
++.002 > SW. 0:0(0) ack 1 <mss 1460,sackOK,TS val 10 ecr 1,ECN e1b 1 ceb 0 e0b 1,nop,nop,wscale 8>
++0.05 < [ect0] W. 1:1(0) ack 1 win 264 <nop,nop,TS val 2 ecr 10>
++.002 accept(3, ..., ...) = 4
+
++0.01 %{ assert tcpi_delivered_ce == 0, tcpi_delivered_ce }%
+
++0.01 write(4, ..., 1000) = 1000
++.002 > [ect0] EAP. 1:1001(1000) ack 1 <nop,nop,TS val 83 ecr 2>
+ // Fake CE and claim no progress
++0.05 < [ect0] WA. 1:1(0) ack 1 win 264 <nop,nop,TS val 3 ecr 83>
+
++0.01 %{ assert tcpi_delivered_ce == 1, tcpi_delivered_ce }%
diff --git a/tools/testing/selftests/net/packetdrill/tcp_basic_client.pkt b/tools/testing/selftests/net/packetdrill/tcp_basic_client.pkt
new file mode 100644
index 000000000000..319f81dd717d
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_basic_client.pkt
@@ -0,0 +1,24 @@
+// SPDX-License-Identifier: GPL-2.0
+//
+// Minimal active open.
+// First to close connection.
+
+`./defaults.sh`
+
+ 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 4
+
+ // Connect to server: active open: three-way handshake
+ +0...0 connect(4, ..., ...) = 0
+ +0 > S 0:0(0) <mss 1460,sackOK,TS val 0 ecr 0,nop,wscale 8>
+ +0 < S. 0:0(0) ack 1 win 65535 <mss 1460,sackOK,nop,nop,nop,wscale 7>
+ +0 > . 1:1(0) ack 1
+
+ // Send data
+ +0 send(4, ..., 1000, 0) = 1000
+ +0 > P. 1:1001(1000) ack 1
+ +0 < . 1:1(0) ack 1001 win 257
+
+ +0 close(4) = 0
+ +0 > F. 1001:1001(0) ack 1
+ +0 < F. 1:1(0) ack 1002 win 257
+ +0 > . 1002:1002(0) ack 2
diff --git a/tools/testing/selftests/net/packetdrill/tcp_basic_server.pkt b/tools/testing/selftests/net/packetdrill/tcp_basic_server.pkt
new file mode 100644
index 000000000000..e72a291b666e
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_basic_server.pkt
@@ -0,0 +1,35 @@
+// SPDX-License-Identifier: GPL-2.0
+//
+// Minimal passive open.
+// Peer is first to close.
+
+`./defaults.sh`
+
+ // Open listener socket
+ 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
+ +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0
+ +0 bind(3, ..., ...) = 0
+ +0 listen(3, 1) = 0
+
+ // Incoming connection: passive open: three-way handshake
+ +0 < S 0:0(0) win 65535 <mss 1000,sackOK,nop,nop,nop,wscale 8>
+ +0 > S. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK,nop,wscale 8>
+ +0 < . 1:1(0) ack 1 win 257
+
+ // Open connection socket and close listener socket
+ +0 accept(3, ..., ...) = 4
+ +0 close(3) = 0
+
+ // Peer sends data: acknowledge and receive
+ +0 < P. 1:1001(1000) ack 1 win 257
+ +0 > . 1:1(0) ack 1001
+ +0 recv(4, ..., 1000, 0) = 1000
+
+ // Peer initiates connection close
+ +0 < F. 1001:1001(0) ack 1 win 257
+ +.04 > . 1:1(0) ack 1002
+
+ // Local socket also closes its side
+ +0 close(4) = 0
+ +0 > F. 1:1(0) ack 1002
+ +0 < . 1002:1002(0) ack 2 win 257
diff --git a/tools/testing/selftests/net/packetdrill/tcp_timestamping_tcp_tx_timestamp_bug.pkt b/tools/testing/selftests/net/packetdrill/tcp_timestamping_tcp_tx_timestamp_bug.pkt
new file mode 100644
index 000000000000..95a1957a2cf9
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_timestamping_tcp_tx_timestamp_bug.pkt
@@ -0,0 +1,70 @@
+// SPDX-License-Identifier: GPL-2.0
+// Test after "tcp: tcp_tx_timestamp() must look at the rtx queue"
+
+// This test is about receiving the SCM_TSTAMP_ACK,
+// we do not care about its SCM_TIMESTAMPING precision.
+--tolerance_usecs=1000000
+
+`./defaults.sh
+sysctl -q net.ipv4.tcp_min_tso_segs=70
+`
+
+// Create a socket and set it to non-blocking.
+ 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
+ +0 fcntl(3, F_GETFL) = 0x2 (flags O_RDWR)
+ +0 fcntl(3, F_SETFL, O_RDWR|O_NONBLOCK) = 0
+
+// Establish connection and verify that there was no error.
+ +0 connect(3, ..., ...) = -1 EINPROGRESS (Operation now in progress)
+ +0 > S 0:0(0) <mss 1460,sackOK,TS val 100 ecr 0,nop,wscale 8>
++.010 < S. 0:0(0) ack 1 win 65535 <mss 1000,sackOK,TS val 700 ecr 100,nop,wscale 7>
+ +0 > . 1:1(0) ack 1 <nop,nop,TS val 200 ecr 700>
+ +0 getsockopt(3, SOL_SOCKET, SO_ERROR, [0], [4]) = 0
+ +0 setsockopt(3, SOL_SOCKET, SO_SNDBUF, [30000], 4) = 0
+
+ +0 write(3, ..., 9880) = 9880
+ +0 > P. 1:9881(9880) ack 1 <nop,nop,TS val 200 ecr 700>
++.010 < . 1:1(0) ack 9881 win 10000 <nop,nop,TS val 701 ecr 200>
+
+ +0 write(3, ..., 19760) = 19760
+ +0 > P. 9881:29641(19760) ack 1 <nop,nop,TS val 201 ecr 701>
++.010 < . 1:1(0) ack 29641 win 10000 <nop,nop,TS val 702 ecr 201>
+
+ +0 write(3, ..., 39520) = 39520
+ +0 > P. 29641:69161(39520) ack 1 <nop,nop,TS val 202 ecr 702>
++.010 < . 1:1(0) ack 69161 win 10000 <nop,nop,TS val 703 ecr 202>
+
+// One more write to increase cwnd
+ +0 write(3, ..., 79040) = 79040
+ +0 > P. 69161:108681(39520) ack 1 <nop,nop,TS val 203 ecr 703>
+ +0 > P. 108681:148201(39520) ack 1 <nop,nop,TS val 203 ecr 703>
++.010 < . 1:1(0) ack 148201 win 1000 <nop,nop,TS val 704 ecr 203>
+
+ +0 setsockopt(3, SOL_SOCKET, SO_TIMESTAMPING,
+ [SOF_TIMESTAMPING_TX_ACK | SOF_TIMESTAMPING_SOFTWARE |
+ SOF_TIMESTAMPING_OPT_ID], 4) = 0
+
+// We have one write filling one skb
+// last byte can not be stored because of our small SO_SNDBUF
+ +0 write(3, ..., 65209) = 65208
+ +0 > P. 148201:213409(65208) ack 1 <nop,nop,TS val 204 ecr 704>
++.010 < . 1:1(0) ack 213409 win 1000 <nop,nop,TS val 705 ecr 204>
+
+// SCM_TSTAMP_ACK should be received after the last ack at
+// t=60ms.
+ +0 recvmsg(3, {msg_name(...)=...,
+ msg_iov(1)=[{...,0}],
+ msg_flags=MSG_ERRQUEUE|MSG_TRUNC,
+ msg_control=[
+ {cmsg_level=SOL_SOCKET,
+ cmsg_type=SCM_TIMESTAMPING,
+ cmsg_data={scm_sec=0,scm_nsec=60000000}},
+ {cmsg_level=CMSG_LEVEL_IP,
+ cmsg_type=CMSG_TYPE_RECVERR,
+ cmsg_data={ee_errno=ENOMSG,
+ ee_origin=SO_EE_ORIGIN_TIMESTAMPING,
+ ee_type=0,
+ ee_code=0,
+ ee_info=SCM_TSTAMP_ACK,
+ ee_data=65207}}
+ ]}, MSG_ERRQUEUE) = 0
diff --git a/tools/testing/selftests/net/tfo.c b/tools/testing/selftests/net/tfo.c
index 8d82140f0f76..3b1ee2d3d417 100644
--- a/tools/testing/selftests/net/tfo.c
+++ b/tools/testing/selftests/net/tfo.c
@@ -82,8 +82,10 @@ static void run_server(void)
error(1, errno, "getsockopt(SO_INCOMING_NAPI_ID)");
if (read(connfd, buf, 64) < 0)
- perror("read()");
- fprintf(outfile, "%d\n", opt);
+ error(1, errno, "read()");
+
+ if (fprintf(outfile, "%d\n", opt) < 0)
+ error(1, errno, "fprintf()");
fclose(outfile);
close(connfd);
@@ -92,14 +94,17 @@ static void run_server(void)
static void run_client(void)
{
- int fd;
+ int fd, ret;
char *msg = "Hello, world!";
fd = socket(AF_INET6, SOCK_STREAM, 0);
if (fd == -1)
error(1, errno, "socket()");
- sendto(fd, msg, strlen(msg), MSG_FASTOPEN, (struct sockaddr *)&cfg_addr, sizeof(cfg_addr));
+ ret = sendto(fd, msg, strlen(msg), MSG_FASTOPEN,
+ (struct sockaddr *)&cfg_addr, sizeof(cfg_addr));
+ if (ret < 0)
+ error(1, errno, "sendto()");
close(fd);
}
diff --git a/tools/testing/selftests/net/tfo_passive.sh b/tools/testing/selftests/net/tfo_passive.sh
index a4550511830a..f116f888b794 100755
--- a/tools/testing/selftests/net/tfo_passive.sh
+++ b/tools/testing/selftests/net/tfo_passive.sh
@@ -85,12 +85,15 @@ timeout -k 1s 30s ip netns exec nssv ./tfo \
-s \
-p ${SERVER_PORT} \
-o ${out_file}&
+server_pid="$!"
wait_local_port_listen nssv ${SERVER_PORT} tcp
ip netns exec nscl ./tfo -c -h ${SERVER_IP} -p ${SERVER_PORT}
+client_exit_status="$?"
-wait
+wait "$server_pid"
+server_exit_status="$?"
res=$(cat $out_file)
rm $out_file
@@ -101,6 +104,14 @@ if [ "$res" = "0" ]; then
exit 1
fi
+if [ "$client_exit_status" -ne 0 ] || [ "$server_exit_status" -ne 0 ]; then
+ # Note: timeout(1) exits with 124 if it timed out
+ echo "client exited with ${client_exit_status}"
+ echo "server exited with ${server_exit_status}"
+ cleanup_ns
+ exit 1
+fi
+
echo "$NSIM_SV_FD:$NSIM_SV_IFIDX" > $NSIM_DEV_SYS_UNLINK
echo $NSIM_CL_ID > $NSIM_DEV_SYS_DEL
diff --git a/tools/testing/selftests/net/tls.c b/tools/testing/selftests/net/tls.c
index a4d16a460fbe..9e2ccea13d70 100644
--- a/tools/testing/selftests/net/tls.c
+++ b/tools/testing/selftests/net/tls.c
@@ -3260,17 +3260,25 @@ TEST(data_steal) {
ASSERT_EQ(setsockopt(cfd, IPPROTO_TCP, TCP_ULP, "tls", sizeof("tls")), 0);
/* Spawn a child and get it into the read wait path of the underlying
- * TCP socket.
+ * TCP socket (before kernel .recvmsg is replaced with the TLS one).
*/
pid = fork();
ASSERT_GE(pid, 0);
if (!pid) {
- EXPECT_EQ(recv(cfd, buf, sizeof(buf) / 2, MSG_WAITALL),
- sizeof(buf) / 2);
+ EXPECT_EQ(recv(cfd, buf, sizeof(buf) / 2 + 1, MSG_WAITALL),
+ sizeof(buf) / 2 + 1);
exit(!__test_passed(_metadata));
}
- usleep(10000);
+ /* Send a sync byte and poll until it's consumed to ensure
+ * the child is in recv() before we proceed to install TLS.
+ */
+ ASSERT_EQ(send(fd, buf, 1, 0), 1);
+ do {
+ usleep(500);
+ } while (recv(cfd, buf, 1, MSG_PEEK | MSG_DONTWAIT) == 1);
+ EXPECT_EQ(errno, EAGAIN);
+
ASSERT_EQ(setsockopt(fd, SOL_TLS, TLS_TX, &tls, tls.len), 0);
ASSERT_EQ(setsockopt(cfd, SOL_TLS, TLS_RX, &tls, tls.len), 0);
diff --git a/tools/testing/selftests/net/tun.c b/tools/testing/selftests/net/tun.c
index 0efc67b0357a..8a5cd5cb5472 100644
--- a/tools/testing/selftests/net/tun.c
+++ b/tools/testing/selftests/net/tun.c
@@ -8,14 +8,119 @@
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
-#include <linux/if.h>
#include <linux/if_tun.h>
-#include <linux/netlink.h>
-#include <linux/rtnetlink.h>
#include <sys/ioctl.h>
#include <sys/socket.h>
#include "kselftest_harness.h"
+#include "tuntap_helpers.h"
+
+static const char param_dev_geneve_name[] = "geneve1";
+static unsigned char param_hwaddr_outer_dst[] = { 0x00, 0xfe, 0x98,
+ 0x14, 0x22, 0x42 };
+static unsigned char param_hwaddr_outer_src[] = { 0x00, 0xfe, 0x98,
+ 0x94, 0xd2, 0x43 };
+static unsigned char param_hwaddr_inner_dst[] = { 0x00, 0xfe, 0x98,
+ 0x94, 0x22, 0xcc };
+static unsigned char param_hwaddr_inner_src[] = { 0x00, 0xfe, 0x98,
+ 0x94, 0xd2, 0xdd };
+
+static struct in_addr param_ipaddr4_outer_dst = {
+ __constant_htonl(0xac100001),
+};
+
+static struct in_addr param_ipaddr4_outer_src = {
+ __constant_htonl(0xac100002),
+};
+
+static struct in_addr param_ipaddr4_inner_dst = {
+ __constant_htonl(0xac100101),
+};
+
+static struct in_addr param_ipaddr4_inner_src = {
+ __constant_htonl(0xac100102),
+};
+
+static struct in6_addr param_ipaddr6_outer_dst = {
+ { { 0x20, 0x02, 0x0d, 0xb8, 0x01, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1 } },
+};
+
+static struct in6_addr param_ipaddr6_outer_src = {
+ { { 0x20, 0x02, 0x0d, 0xb8, 0x01, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2 } },
+};
+
+static struct in6_addr param_ipaddr6_inner_dst = {
+ { { 0x20, 0x02, 0x0d, 0xb8, 0x02, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1 } },
+};
+
+static struct in6_addr param_ipaddr6_inner_src = {
+ { { 0x20, 0x02, 0x0d, 0xb8, 0x02, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2 } },
+};
+
+#ifndef BIT
+#define BIT(nr) (1UL << (nr))
+#endif
+
+#define VN_ID 1
+#define VN_PORT 4789
+#define UDP_SRC_PORT 22
+#define UDP_DST_PORT 48878
+#define IPPREFIX_LEN 24
+#define IP6PREFIX_LEN 64
+#define TIMEOUT_SEC 10
+#define TIMEOUT_USEC 100000
+#define MAX_RETRIES 20
+
+#define UDP_TUNNEL_GENEVE_4IN4 0x01
+#define UDP_TUNNEL_GENEVE_6IN4 0x02
+#define UDP_TUNNEL_GENEVE_4IN6 0x04
+#define UDP_TUNNEL_GENEVE_6IN6 0x08
+
+#define UDP_TUNNEL_MAX_SEGMENTS BIT(7)
+
+#define UDP_TUNNEL_OUTER_IPV4 (UDP_TUNNEL_GENEVE_4IN4 | UDP_TUNNEL_GENEVE_6IN4)
+#define UDP_TUNNEL_INNER_IPV4 (UDP_TUNNEL_GENEVE_4IN4 | UDP_TUNNEL_GENEVE_4IN6)
+
+#define UDP_TUNNEL_GENEVE_4IN4_HDRLEN \
+ (ETH_HLEN + 2 * sizeof(struct iphdr) + GENEVE_HLEN + \
+ 2 * sizeof(struct udphdr))
+#define UDP_TUNNEL_GENEVE_6IN6_HDRLEN \
+ (ETH_HLEN + 2 * sizeof(struct ipv6hdr) + GENEVE_HLEN + \
+ 2 * sizeof(struct udphdr))
+#define UDP_TUNNEL_GENEVE_4IN6_HDRLEN \
+ (ETH_HLEN + sizeof(struct iphdr) + sizeof(struct ipv6hdr) + \
+ GENEVE_HLEN + 2 * sizeof(struct udphdr))
+#define UDP_TUNNEL_GENEVE_6IN4_HDRLEN \
+ (ETH_HLEN + sizeof(struct ipv6hdr) + sizeof(struct iphdr) + \
+ GENEVE_HLEN + 2 * sizeof(struct udphdr))
+
+#define UDP_TUNNEL_HDRLEN(type) \
+ ((type) == UDP_TUNNEL_GENEVE_4IN4 ? UDP_TUNNEL_GENEVE_4IN4_HDRLEN : \
+ (type) == UDP_TUNNEL_GENEVE_6IN6 ? UDP_TUNNEL_GENEVE_6IN6_HDRLEN : \
+ (type) == UDP_TUNNEL_GENEVE_4IN6 ? UDP_TUNNEL_GENEVE_4IN6_HDRLEN : \
+ (type) == UDP_TUNNEL_GENEVE_6IN4 ? UDP_TUNNEL_GENEVE_6IN4_HDRLEN : \
+ 0)
+
+#define UDP_TUNNEL_MSS(type) (ETH_DATA_LEN - UDP_TUNNEL_HDRLEN(type))
+#define UDP_TUNNEL_MAX(type, is_tap) \
+ (ETH_MAX_MTU - UDP_TUNNEL_HDRLEN(type) - ((is_tap) ? ETH_HLEN : 0))
+
+#define TUN_VNET_TNL_SIZE sizeof(struct virtio_net_hdr_v1_hash_tunnel)
+#define MAX_VNET_TUNNEL_PACKET_SZ \
+ (TUN_VNET_TNL_SIZE + ETH_HLEN + UDP_TUNNEL_GENEVE_6IN6_HDRLEN + \
+ ETH_MAX_MTU)
+
+struct geneve_setup_config {
+ int family;
+ union {
+ struct in_addr r4;
+ struct in6_addr r6;
+ } remote;
+ __be32 vnid;
+ __be16 vnport;
+ unsigned char hwaddr[6];
+ uint8_t csum;
+};
static int tun_attach(int fd, char *dev)
{
@@ -25,7 +130,7 @@ static int tun_attach(int fd, char *dev)
strcpy(ifr.ifr_name, dev);
ifr.ifr_flags = IFF_ATTACH_QUEUE;
- return ioctl(fd, TUNSETQUEUE, (void *) &ifr);
+ return ioctl(fd, TUNSETQUEUE, (void *)&ifr);
}
static int tun_detach(int fd, char *dev)
@@ -36,7 +141,7 @@ static int tun_detach(int fd, char *dev)
strcpy(ifr.ifr_name, dev);
ifr.ifr_flags = IFF_DETACH_QUEUE;
- return ioctl(fd, TUNSETQUEUE, (void *) &ifr);
+ return ioctl(fd, TUNSETQUEUE, (void *)&ifr);
}
static int tun_alloc(char *dev)
@@ -54,7 +159,7 @@ static int tun_alloc(char *dev)
strcpy(ifr.ifr_name, dev);
ifr.ifr_flags = IFF_TAP | IFF_NAPI | IFF_MULTI_QUEUE;
- err = ioctl(fd, TUNSETIFF, (void *) &ifr);
+ err = ioctl(fd, TUNSETIFF, (void *)&ifr);
if (err < 0) {
fprintf(stderr, "can't TUNSETIFF: %s\n", strerror(errno));
close(fd);
@@ -66,42 +171,315 @@ static int tun_alloc(char *dev)
static int tun_delete(char *dev)
{
- struct {
- struct nlmsghdr nh;
- struct ifinfomsg ifm;
- unsigned char data[64];
- } req;
- struct rtattr *rta;
- int ret, rtnl;
+ return ip_link_del(dev);
+}
+
+static int tun_open(char *dev, const int flags, const int hdrlen,
+ const int features, const unsigned char *mac_addr)
+{
+ struct ifreq ifr = { 0 };
+ int fd, sk = -1;
+
+ fd = open("/dev/net/tun", O_RDWR);
+ if (fd < 0) {
+ perror("open");
+ return -1;
+ }
+
+ ifr.ifr_flags = flags;
+ if (ioctl(fd, TUNSETIFF, (void *)&ifr) < 0) {
+ perror("ioctl(TUNSETIFF)");
+ goto err;
+ }
+ strcpy(dev, ifr.ifr_name);
+
+ if (hdrlen > 0) {
+ if (ioctl(fd, TUNSETVNETHDRSZ, &hdrlen) < 0) {
+ perror("ioctl(TUNSETVNETHDRSZ)");
+ goto err;
+ }
+ }
+
+ if (features) {
+ if (ioctl(fd, TUNSETOFFLOAD, features) < 0) {
+ perror("ioctl(TUNSETOFFLOAD)");
+ goto err;
+ }
+ }
+
+ sk = socket(PF_INET, SOCK_DGRAM, 0);
+ if (sk < 0) {
+ perror("socket");
+ goto err;
+ }
+
+ if (ioctl(sk, SIOCGIFFLAGS, &ifr) < 0) {
+ perror("ioctl(SIOCGIFFLAGS)");
+ goto err;
+ }
+
+ ifr.ifr_flags |= (IFF_UP | IFF_RUNNING);
+ if (ioctl(sk, SIOCSIFFLAGS, &ifr) < 0) {
+ perror("ioctl(SIOCSIFFLAGS)");
+ goto err;
+ }
+
+ if (mac_addr && flags & IFF_TAP) {
+ ifr.ifr_hwaddr.sa_family = ARPHRD_ETHER;
+ memcpy(ifr.ifr_hwaddr.sa_data, mac_addr, ETH_ALEN);
+
+ if (ioctl(sk, SIOCSIFHWADDR, &ifr) < 0) {
+ perror("ioctl(SIOCSIFHWADDR)");
+ goto err;
+ }
+ }
+
+out:
+ if (sk >= 0)
+ close(sk);
+ return fd;
+
+err:
+ close(fd);
+ fd = -1;
+ goto out;
+}
+
+static size_t sockaddr_len(int family)
+{
+ return (family == AF_INET) ? sizeof(struct sockaddr_in) :
+ sizeof(struct sockaddr_in6);
+}
+
+static int geneve_fill_newlink(struct rt_link_newlink_req *req, void *data)
+{
+ struct geneve_setup_config *cfg = data;
+
+#define SET_GENEVE_REMOTE rt_link_newlink_req_set_linkinfo_data_geneve_remote
+#define SET_GENEVE_REMOTE6 rt_link_newlink_req_set_linkinfo_data_geneve_remote6
+
+ rt_link_newlink_req_set_address(req, cfg->hwaddr, ETH_ALEN);
+ rt_link_newlink_req_set_linkinfo_data_geneve_id(req, cfg->vnid);
+ rt_link_newlink_req_set_linkinfo_data_geneve_port(req, cfg->vnport);
+ rt_link_newlink_req_set_linkinfo_data_geneve_udp_csum(req, cfg->csum);
+
+ if (cfg->family == AF_INET)
+ SET_GENEVE_REMOTE(req, cfg->remote.r4.s_addr);
+ else
+ SET_GENEVE_REMOTE6(req, &cfg->remote.r6,
+ sizeof(cfg->remote.r6));
+
+ return 0;
+}
+
+static int geneve_create(const char *dev, int family, void *remote,
+ void *hwaddr)
+{
+ struct geneve_setup_config geneve;
+
+ memset(&geneve, 0, sizeof(geneve));
+ geneve.vnid = VN_ID;
+ geneve.vnport = htons(VN_PORT);
+ geneve.csum = 1;
+ geneve.family = family;
+ if (family == AF_INET)
+ memcpy(&geneve.remote.r4, remote, sizeof(struct in_addr));
+ else
+ memcpy(&geneve.remote.r6, remote, sizeof(struct in6_addr));
+ memcpy(geneve.hwaddr, hwaddr, ETH_ALEN);
+
+ return ip_link_add(dev, "geneve", geneve_fill_newlink, (void *)&geneve);
+}
+
+static int set_pmtu_discover(int fd, bool is_ipv4)
+{
+ int level, name, val;
+
+ if (is_ipv4) {
+ level = SOL_IP;
+ name = IP_MTU_DISCOVER;
+ val = IP_PMTUDISC_DO;
+ } else {
+ level = SOL_IPV6;
+ name = IPV6_MTU_DISCOVER;
+ val = IPV6_PMTUDISC_DO;
+ }
+
+ return setsockopt(fd, level, name, &val, sizeof(val));
+}
+
+static int udp_socket_open(struct sockaddr_storage *ssa, bool do_frag,
+ bool do_connect, struct sockaddr_storage *dsa)
+{
+ struct timeval to = { .tv_sec = TIMEOUT_SEC };
+ int fd, family = ssa->ss_family;
+ int salen = sockaddr_len(family);
+
+ fd = socket(family, SOCK_DGRAM, 0);
+ if (fd < 0)
+ return -1;
+
+ if (bind(fd, (struct sockaddr *)ssa, salen) < 0) {
+ perror("bind");
+ goto err;
+ }
+
+ if (do_connect && connect(fd, (struct sockaddr *)dsa, salen) < 0) {
+ perror("connect");
+ goto err;
+ }
+
+ if (setsockopt(fd, SOL_SOCKET, SO_RCVTIMEO, &to, sizeof(to)) < 0) {
+ perror("setsockopt(SO_RCVTIMEO)");
+ goto err;
+ }
+
+ if (!do_frag && set_pmtu_discover(fd, family == AF_INET) < 0) {
+ perror("set_pmtu_discover");
+ goto err;
+ }
+ return fd;
+
+err:
+ close(fd);
+ return -1;
+}
+
+static void parse_route_rsp(struct rt_route_getroute_rsp *rsp, void *rtm_type)
+{
+ *(uint8_t *)rtm_type = rsp->_hdr.rtm_type;
+}
+
+static int ip_route_check(const char *intf, int family, void *addr)
+{
+ uint8_t rtm_type, table = RT_TABLE_LOCAL;
+ int retries = MAX_RETRIES;
- rtnl = socket(AF_NETLINK, SOCK_DGRAM, NETLINK_ROUTE);
- if (rtnl < 0) {
- fprintf(stderr, "can't open rtnl: %s\n", strerror(errno));
- return 1;
+ while (retries-- > 0) {
+ if (ip_route_get(intf, family, table, addr, parse_route_rsp,
+ &rtm_type) == 0 &&
+ rtm_type == RTN_LOCAL)
+ break;
+
+ usleep(TIMEOUT_USEC);
}
- memset(&req, 0, sizeof(req));
- req.nh.nlmsg_len = NLMSG_ALIGN(NLMSG_LENGTH(sizeof(req.ifm)));
- req.nh.nlmsg_flags = NLM_F_REQUEST;
- req.nh.nlmsg_type = RTM_DELLINK;
+ if (retries < 0)
+ return -1;
+
+ return 0;
+}
+
+static int send_gso_udp_msg(int socket, struct sockaddr_storage *addr,
+ uint8_t *send_buf, int send_len, int gso_size)
+{
+ char control[CMSG_SPACE(sizeof(uint16_t))] = { 0 };
+ int alen = sockaddr_len(addr->ss_family);
+ struct msghdr msg = { 0 };
+ struct iovec iov = { 0 };
+ int ret;
+
+ iov.iov_base = send_buf;
+ iov.iov_len = send_len;
+
+ msg.msg_iov = &iov;
+ msg.msg_iovlen = 1;
+ msg.msg_name = addr;
+ msg.msg_namelen = alen;
- req.ifm.ifi_family = AF_UNSPEC;
+ if (gso_size > 0) {
+ struct cmsghdr *cmsg;
- rta = (struct rtattr *)(((char *)&req) + NLMSG_ALIGN(req.nh.nlmsg_len));
- rta->rta_type = IFLA_IFNAME;
- rta->rta_len = RTA_LENGTH(IFNAMSIZ);
- req.nh.nlmsg_len += rta->rta_len;
- memcpy(RTA_DATA(rta), dev, IFNAMSIZ);
+ msg.msg_control = control;
+ msg.msg_controllen = sizeof(control);
- ret = send(rtnl, &req, req.nh.nlmsg_len, 0);
+ cmsg = CMSG_FIRSTHDR(&msg);
+ cmsg->cmsg_level = SOL_UDP;
+ cmsg->cmsg_type = UDP_SEGMENT;
+ cmsg->cmsg_len = CMSG_LEN(sizeof(uint16_t));
+ *(uint16_t *)CMSG_DATA(cmsg) = gso_size;
+ }
+
+ ret = sendmsg(socket, &msg, 0);
if (ret < 0)
- fprintf(stderr, "can't send: %s\n", strerror(errno));
- ret = (unsigned int)ret != req.nh.nlmsg_len;
+ perror("sendmsg");
- close(rtnl);
return ret;
}
+static int validate_hdrlen(uint8_t **cur, int *len, int x)
+{
+ if (*len < x)
+ return -1;
+ *cur += x;
+ *len -= x;
+ return 0;
+}
+
+static int parse_udp_tunnel_vnet_packet(uint8_t *buf, int len, int tunnel_type,
+ bool is_tap)
+{
+ struct ipv6hdr *iph6;
+ struct udphdr *udph;
+ struct iphdr *iph4;
+ uint8_t *cur = buf;
+
+ if (validate_hdrlen(&cur, &len, TUN_VNET_TNL_SIZE))
+ return -1;
+
+ if (is_tap) {
+ if (validate_hdrlen(&cur, &len, ETH_HLEN))
+ return -1;
+ }
+
+ if (tunnel_type & UDP_TUNNEL_OUTER_IPV4) {
+ iph4 = (struct iphdr *)cur;
+ if (validate_hdrlen(&cur, &len, sizeof(struct iphdr)))
+ return -1;
+ if (iph4->version != 4 || iph4->protocol != IPPROTO_UDP)
+ return -1;
+ } else {
+ iph6 = (struct ipv6hdr *)cur;
+ if (validate_hdrlen(&cur, &len, sizeof(struct ipv6hdr)))
+ return -1;
+ if (iph6->version != 6 || iph6->nexthdr != IPPROTO_UDP)
+ return -1;
+ }
+
+ udph = (struct udphdr *)cur;
+ if (validate_hdrlen(&cur, &len, sizeof(struct udphdr)))
+ return -1;
+ if (ntohs(udph->dest) != VN_PORT)
+ return -1;
+
+ if (validate_hdrlen(&cur, &len, GENEVE_HLEN))
+ return -1;
+ if (validate_hdrlen(&cur, &len, ETH_HLEN))
+ return -1;
+
+ if (tunnel_type & UDP_TUNNEL_INNER_IPV4) {
+ iph4 = (struct iphdr *)cur;
+ if (validate_hdrlen(&cur, &len, sizeof(struct iphdr)))
+ return -1;
+ if (iph4->version != 4 || iph4->protocol != IPPROTO_UDP)
+ return -1;
+ } else {
+ iph6 = (struct ipv6hdr *)cur;
+ if (validate_hdrlen(&cur, &len, sizeof(struct ipv6hdr)))
+ return -1;
+ if (iph6->version != 6 || iph6->nexthdr != IPPROTO_UDP)
+ return -1;
+ }
+
+ udph = (struct udphdr *)cur;
+ if (validate_hdrlen(&cur, &len, sizeof(struct udphdr)))
+ return -1;
+ if (ntohs(udph->dest) != UDP_DST_PORT)
+ return -1;
+
+ return len;
+}
+
FIXTURE(tun)
{
char ifname[IFNAMSIZ];
@@ -127,31 +505,36 @@ FIXTURE_TEARDOWN(tun)
close(self->fd2);
}
-TEST_F(tun, delete_detach_close) {
+TEST_F(tun, delete_detach_close)
+{
EXPECT_EQ(tun_delete(self->ifname), 0);
EXPECT_EQ(tun_detach(self->fd, self->ifname), -1);
EXPECT_EQ(errno, 22);
}
-TEST_F(tun, detach_delete_close) {
+TEST_F(tun, detach_delete_close)
+{
EXPECT_EQ(tun_detach(self->fd, self->ifname), 0);
EXPECT_EQ(tun_delete(self->ifname), 0);
}
-TEST_F(tun, detach_close_delete) {
+TEST_F(tun, detach_close_delete)
+{
EXPECT_EQ(tun_detach(self->fd, self->ifname), 0);
close(self->fd);
self->fd = -1;
EXPECT_EQ(tun_delete(self->ifname), 0);
}
-TEST_F(tun, reattach_delete_close) {
+TEST_F(tun, reattach_delete_close)
+{
EXPECT_EQ(tun_detach(self->fd, self->ifname), 0);
EXPECT_EQ(tun_attach(self->fd, self->ifname), 0);
EXPECT_EQ(tun_delete(self->ifname), 0);
}
-TEST_F(tun, reattach_close_delete) {
+TEST_F(tun, reattach_close_delete)
+{
EXPECT_EQ(tun_detach(self->fd, self->ifname), 0);
EXPECT_EQ(tun_attach(self->fd, self->ifname), 0);
close(self->fd);
@@ -159,4 +542,447 @@ TEST_F(tun, reattach_close_delete) {
EXPECT_EQ(tun_delete(self->ifname), 0);
}
+FIXTURE(tun_vnet_udptnl)
+{
+ char ifname[IFNAMSIZ];
+ int fd, sock;
+};
+
+FIXTURE_VARIANT(tun_vnet_udptnl)
+{
+ int tunnel_type;
+ int gso_size;
+ int data_size;
+ int r_num_mss;
+ bool is_tap, no_gso;
+};
+
+/* clang-format off */
+#define TUN_VNET_UDPTNL_VARIANT_ADD(type, desc) \
+ FIXTURE_VARIANT_ADD(tun_vnet_udptnl, desc##_nogsosz_1byte) { \
+ /* no GSO: send a single byte */ \
+ .tunnel_type = type, \
+ .data_size = 1, \
+ .r_num_mss = 1, \
+ .is_tap = true, \
+ .no_gso = true, \
+ }; \
+ FIXTURE_VARIANT_ADD(tun_vnet_udptnl, desc##_nogsosz_1mss) { \
+ /* no GSO: send a single MSS, fall back to no GSO */ \
+ .tunnel_type = type, \
+ .data_size = UDP_TUNNEL_MSS(type), \
+ .r_num_mss = 1, \
+ .is_tap = true, \
+ .no_gso = true, \
+ }; \
+ FIXTURE_VARIANT_ADD(tun_vnet_udptnl, desc##_nogsosz_gtmss) { \
+ /* no GSO: send a single MSS + 1B: fail */ \
+ .tunnel_type = type, \
+ .data_size = UDP_TUNNEL_MSS(type) + 1, \
+ .r_num_mss = 1, \
+ .is_tap = true, \
+ .no_gso = true, \
+ }; \
+ FIXTURE_VARIANT_ADD(tun_vnet_udptnl, desc##_1byte) { \
+ /* GSO: send 1 byte, gso 1 byte, fall back to no GSO */ \
+ .tunnel_type = type, \
+ .gso_size = 1, \
+ .data_size = 1, \
+ .r_num_mss = 1, \
+ .is_tap = true, \
+ .no_gso = true, \
+ }; \
+ FIXTURE_VARIANT_ADD(tun_vnet_udptnl, desc##_1mss) { \
+ /* send a single MSS: fall back to no GSO */ \
+ .tunnel_type = type, \
+ .gso_size = UDP_TUNNEL_MSS(type), \
+ .data_size = UDP_TUNNEL_MSS(type), \
+ .r_num_mss = 1, \
+ .is_tap = true, \
+ .no_gso = true, \
+ }; \
+ FIXTURE_VARIANT_ADD(tun_vnet_udptnl, desc##_ltgso) { \
+ /* data <= MSS < gso: will fall back to no GSO */ \
+ .tunnel_type = type, \
+ .gso_size = UDP_TUNNEL_MSS(type) + 1, \
+ .data_size = UDP_TUNNEL_MSS(type), \
+ .r_num_mss = 1, \
+ .is_tap = true, \
+ .no_gso = true, \
+ }; \
+ FIXTURE_VARIANT_ADD(tun_vnet_udptnl, desc##_gtgso) { \
+ /* GSO: a single MSS + 1B */ \
+ .tunnel_type = type, \
+ .gso_size = UDP_TUNNEL_MSS(type), \
+ .data_size = UDP_TUNNEL_MSS(type) + 1, \
+ .r_num_mss = 2, \
+ .is_tap = true, \
+ }; \
+ FIXTURE_VARIANT_ADD(tun_vnet_udptnl, desc##_2mss) { \
+ /* no GSO: send exactly 2 MSS */ \
+ .tunnel_type = type, \
+ .gso_size = UDP_TUNNEL_MSS(type), \
+ .data_size = UDP_TUNNEL_MSS(type) * 2, \
+ .r_num_mss = 2, \
+ .is_tap = true, \
+ }; \
+ FIXTURE_VARIANT_ADD(tun_vnet_udptnl, desc##_maxbytes) { \
+ /* GSO: send max bytes */ \
+ .tunnel_type = type, \
+ .gso_size = UDP_TUNNEL_MSS(type), \
+ .data_size = UDP_TUNNEL_MAX(type, true), \
+ .r_num_mss = UDP_TUNNEL_MAX(type, true) / \
+ UDP_TUNNEL_MSS(type) + 1, \
+ .is_tap = true, \
+ }; \
+ FIXTURE_VARIANT_ADD(tun_vnet_udptnl, desc##_over_maxbytes) { \
+ /* GSO: send oversize max bytes: fail */ \
+ .tunnel_type = type, \
+ .gso_size = UDP_TUNNEL_MSS(type), \
+ .data_size = ETH_MAX_MTU, \
+ .r_num_mss = ETH_MAX_MTU / UDP_TUNNEL_MSS(type) + 1, \
+ .is_tap = true, \
+ }; \
+ FIXTURE_VARIANT_ADD(tun_vnet_udptnl, desc##_maxsegs) { \
+ /* GSO: send max number of min sized segments */ \
+ .tunnel_type = type, \
+ .gso_size = 1, \
+ .data_size = UDP_TUNNEL_MAX_SEGMENTS, \
+ .r_num_mss = UDP_TUNNEL_MAX_SEGMENTS, \
+ .is_tap = true, \
+ }; \
+ FIXTURE_VARIANT_ADD(tun_vnet_udptnl, desc##_5byte) { \
+ /* GSO: send 5 bytes, gso 2 bytes */ \
+ .tunnel_type = type, \
+ .gso_size = 2, \
+ .data_size = 5, \
+ .r_num_mss = 3, \
+ .is_tap = true, \
+ } /* clang-format on */
+
+TUN_VNET_UDPTNL_VARIANT_ADD(UDP_TUNNEL_GENEVE_4IN4, 4in4);
+TUN_VNET_UDPTNL_VARIANT_ADD(UDP_TUNNEL_GENEVE_6IN4, 6in4);
+TUN_VNET_UDPTNL_VARIANT_ADD(UDP_TUNNEL_GENEVE_4IN6, 4in6);
+TUN_VNET_UDPTNL_VARIANT_ADD(UDP_TUNNEL_GENEVE_6IN6, 6in6);
+
+static void assign_ifaddr_vars(int family, int is_outer, void **srcip,
+ void **dstip, void **srcmac, void **dstmac)
+{
+ if (is_outer) {
+ if (family == AF_INET) {
+ *srcip = (void *)&param_ipaddr4_outer_src;
+ *dstip = (void *)&param_ipaddr4_outer_dst;
+ } else {
+ *srcip = (void *)&param_ipaddr6_outer_src;
+ *dstip = (void *)&param_ipaddr6_outer_dst;
+ }
+ *srcmac = param_hwaddr_outer_src;
+ *dstmac = param_hwaddr_outer_dst;
+ } else {
+ if (family == AF_INET) {
+ *srcip = (void *)&param_ipaddr4_inner_src;
+ *dstip = (void *)&param_ipaddr4_inner_dst;
+ } else {
+ *srcip = (void *)&param_ipaddr6_inner_src;
+ *dstip = (void *)&param_ipaddr6_inner_dst;
+ }
+ *srcmac = param_hwaddr_inner_src;
+ *dstmac = param_hwaddr_inner_dst;
+ }
+}
+
+static void assign_sockaddr_vars(int family, int is_outer,
+ struct sockaddr_storage *src,
+ struct sockaddr_storage *dst)
+{
+ src->ss_family = family;
+ dst->ss_family = family;
+
+ if (family == AF_INET) {
+ struct sockaddr_in *s4 = (struct sockaddr_in *)src;
+ struct sockaddr_in *d4 = (struct sockaddr_in *)dst;
+
+ s4->sin_addr = is_outer ? param_ipaddr4_outer_src :
+ param_ipaddr4_inner_src;
+ d4->sin_addr = is_outer ? param_ipaddr4_outer_dst :
+ param_ipaddr4_inner_dst;
+ if (!is_outer) {
+ s4->sin_port = htons(UDP_SRC_PORT);
+ d4->sin_port = htons(UDP_DST_PORT);
+ }
+ } else {
+ struct sockaddr_in6 *s6 = (struct sockaddr_in6 *)src;
+ struct sockaddr_in6 *d6 = (struct sockaddr_in6 *)dst;
+
+ s6->sin6_addr = is_outer ? param_ipaddr6_outer_src :
+ param_ipaddr6_inner_src;
+ d6->sin6_addr = is_outer ? param_ipaddr6_outer_dst :
+ param_ipaddr6_inner_dst;
+ if (!is_outer) {
+ s6->sin6_port = htons(UDP_SRC_PORT);
+ d6->sin6_port = htons(UDP_DST_PORT);
+ }
+ }
+}
+
+FIXTURE_SETUP(tun_vnet_udptnl)
+{
+ int ret, family, prefix, flags, features;
+ int tunnel_type = variant->tunnel_type;
+ struct sockaddr_storage ssa, dsa;
+ void *sip, *dip, *smac, *dmac;
+
+ flags = (variant->is_tap ? IFF_TAP : IFF_TUN) | IFF_VNET_HDR |
+ IFF_MULTI_QUEUE | IFF_NO_PI;
+ features = TUN_F_CSUM | TUN_F_UDP_TUNNEL_GSO |
+ TUN_F_UDP_TUNNEL_GSO_CSUM | TUN_F_USO4 | TUN_F_USO6;
+ self->fd = tun_open(self->ifname, flags, TUN_VNET_TNL_SIZE, features,
+ param_hwaddr_outer_src);
+ ASSERT_GE(self->fd, 0);
+
+ family = (tunnel_type & UDP_TUNNEL_OUTER_IPV4) ? AF_INET : AF_INET6;
+ prefix = (family == AF_INET) ? IPPREFIX_LEN : IP6PREFIX_LEN;
+ assign_ifaddr_vars(family, 1, &sip, &dip, &smac, &dmac);
+
+ ret = ip_addr_add(self->ifname, family, sip, prefix);
+ ASSERT_EQ(ret, 0);
+ ret = ip_neigh_add(self->ifname, family, dip, dmac);
+ ASSERT_EQ(ret, 0);
+ ret = ip_route_check(self->ifname, family, sip);
+ ASSERT_EQ(ret, 0);
+
+ ret = geneve_create(param_dev_geneve_name, family, dip,
+ param_hwaddr_inner_src);
+ ASSERT_EQ(ret, 0);
+
+ family = (tunnel_type & UDP_TUNNEL_INNER_IPV4) ? AF_INET : AF_INET6;
+ prefix = (family == AF_INET) ? IPPREFIX_LEN : IP6PREFIX_LEN;
+ assign_ifaddr_vars(family, 0, &sip, &dip, &smac, &dmac);
+
+ ret = ip_addr_add(param_dev_geneve_name, family, sip, prefix);
+ ASSERT_EQ(ret, 0);
+ ret = ip_neigh_add(param_dev_geneve_name, family, dip, dmac);
+ ASSERT_EQ(ret, 0);
+ ret = ip_route_check(param_dev_geneve_name, family, sip);
+ ASSERT_EQ(ret, 0);
+
+ assign_sockaddr_vars(family, 0, &ssa, &dsa);
+ self->sock = udp_socket_open(&ssa, false, true, &dsa);
+ ASSERT_GE(self->sock, 0);
+}
+
+FIXTURE_TEARDOWN(tun_vnet_udptnl)
+{
+ int ret;
+
+ if (self->sock != -1)
+ close(self->sock);
+
+ ret = ip_link_del(param_dev_geneve_name);
+ EXPECT_EQ(ret, 0);
+
+ ret = tun_delete(self->ifname);
+ EXPECT_EQ(ret, 0);
+}
+
+static int build_gso_packet_into_tun(const FIXTURE_VARIANT(tun_vnet_udptnl) *
+ variant,
+ uint8_t *buf)
+{
+ int pktlen, hlen, proto, inner_family, outer_family;
+ int tunnel_type = variant->tunnel_type;
+ int payload_len = variant->data_size;
+ int gso_size = variant->gso_size;
+ uint8_t *outer_udph, *cur = buf;
+ void *sip, *dip, *smac, *dmac;
+ bool is_tap = variant->is_tap;
+
+ hlen = (is_tap ? ETH_HLEN : 0) + UDP_TUNNEL_HDRLEN(tunnel_type);
+ inner_family = (tunnel_type & UDP_TUNNEL_INNER_IPV4) ? AF_INET :
+ AF_INET6;
+ outer_family = (tunnel_type & UDP_TUNNEL_OUTER_IPV4) ? AF_INET :
+ AF_INET6;
+
+ cur += build_virtio_net_hdr_v1_hash_tunnel(cur, is_tap, hlen, gso_size,
+ outer_family, inner_family);
+
+ pktlen = hlen + payload_len;
+ assign_ifaddr_vars(outer_family, 1, &sip, &dip, &smac, &dmac);
+
+ if (is_tap) {
+ proto = outer_family == AF_INET ? ETH_P_IP : ETH_P_IPV6;
+ pktlen -= ETH_HLEN;
+ cur += build_eth(cur, proto, dmac, smac);
+ }
+
+ if (outer_family == AF_INET) {
+ pktlen = pktlen - sizeof(struct iphdr);
+ cur += build_ipv4_header(cur, IPPROTO_UDP, pktlen, dip, sip);
+ } else {
+ pktlen = pktlen - sizeof(struct ipv6hdr);
+ cur += build_ipv6_header(cur, IPPROTO_UDP, 0, pktlen, dip, sip);
+ }
+
+ outer_udph = cur;
+ assign_ifaddr_vars(inner_family, 0, &sip, &dip, &smac, &dmac);
+
+ pktlen -= sizeof(struct udphdr);
+ proto = inner_family == AF_INET ? ETH_P_IP : ETH_P_IPV6;
+ cur += build_udp_header(cur, UDP_SRC_PORT, VN_PORT, pktlen);
+ cur += build_geneve_header(cur, VN_ID);
+ cur += build_eth(cur, proto, dmac, smac);
+
+ pktlen = sizeof(struct udphdr) + payload_len;
+ if (inner_family == AF_INET)
+ cur += build_ipv4_header(cur, IPPROTO_UDP, pktlen, dip, sip);
+ else
+ cur += build_ipv6_header(cur, IPPROTO_UDP, 0, pktlen, dip, sip);
+
+ cur += build_udp_packet(cur, UDP_DST_PORT, UDP_SRC_PORT, payload_len,
+ inner_family, false);
+
+ build_udp_packet_csum(outer_udph, outer_family, false);
+
+ return cur - buf;
+}
+
+static int
+receive_gso_packet_from_tunnel(FIXTURE_DATA(tun_vnet_udptnl) * self,
+ const FIXTURE_VARIANT(tun_vnet_udptnl) * variant,
+ int *r_num_mss)
+{
+ uint8_t packet_buf[MAX_VNET_TUNNEL_PACKET_SZ];
+ int len, total_len = 0, socket = self->sock;
+ int payload_len = variant->data_size;
+
+ while (total_len < payload_len) {
+ len = recv(socket, packet_buf, sizeof(packet_buf), 0);
+ if (len <= 0) {
+ if (len < 0 && errno != EAGAIN && errno != EWOULDBLOCK)
+ perror("recv");
+ break;
+ }
+
+ (*r_num_mss)++;
+ total_len += len;
+ }
+
+ return total_len;
+}
+
+static int send_gso_packet_into_tunnel(FIXTURE_DATA(tun_vnet_udptnl) * self,
+ const FIXTURE_VARIANT(tun_vnet_udptnl) *
+ variant)
+{
+ int family = (variant->tunnel_type & UDP_TUNNEL_INNER_IPV4) ? AF_INET :
+ AF_INET6;
+ uint8_t buf[MAX_VNET_TUNNEL_PACKET_SZ] = { 0 };
+ int payload_len = variant->data_size;
+ int gso_size = variant->gso_size;
+ struct sockaddr_storage ssa, dsa;
+
+ assign_sockaddr_vars(family, 0, &ssa, &dsa);
+ return send_gso_udp_msg(self->sock, &dsa, buf, payload_len, gso_size);
+}
+
+static int
+receive_gso_packet_from_tun(FIXTURE_DATA(tun_vnet_udptnl) * self,
+ const FIXTURE_VARIANT(tun_vnet_udptnl) * variant,
+ struct virtio_net_hdr_v1_hash_tunnel *vnet_hdr)
+{
+ struct timeval timeout = { .tv_sec = TIMEOUT_SEC };
+ uint8_t buf[MAX_VNET_TUNNEL_PACKET_SZ];
+ int tunnel_type = variant->tunnel_type;
+ int payload_len = variant->data_size;
+ bool is_tap = variant->is_tap;
+ int ret, len, total_len = 0;
+ int tun_fd = self->fd;
+ fd_set fdset;
+
+ while (total_len < payload_len) {
+ FD_ZERO(&fdset);
+ FD_SET(tun_fd, &fdset);
+
+ ret = select(tun_fd + 1, &fdset, NULL, NULL, &timeout);
+ if (ret <= 0) {
+ perror("select");
+ break;
+ }
+ if (!FD_ISSET(tun_fd, &fdset))
+ continue;
+
+ len = read(tun_fd, buf, sizeof(buf));
+ if (len <= 0) {
+ if (len < 0 && errno != EAGAIN && errno != EWOULDBLOCK)
+ perror("read");
+ break;
+ }
+
+ len = parse_udp_tunnel_vnet_packet(buf, len, tunnel_type,
+ is_tap);
+ if (len < 0)
+ continue;
+
+ if (total_len == 0)
+ memcpy(vnet_hdr, buf, TUN_VNET_TNL_SIZE);
+
+ total_len += len;
+ }
+
+ return total_len;
+}
+
+TEST_F(tun_vnet_udptnl, send_gso_packet)
+{
+ uint8_t pkt[MAX_VNET_TUNNEL_PACKET_SZ];
+ int r_num_mss = 0;
+ int ret, off;
+
+ memset(pkt, 0, sizeof(pkt));
+ off = build_gso_packet_into_tun(variant, pkt);
+ ret = write(self->fd, pkt, off);
+ ASSERT_EQ(ret, off);
+
+ ret = receive_gso_packet_from_tunnel(self, variant, &r_num_mss);
+ ASSERT_EQ(ret, variant->data_size);
+ ASSERT_EQ(r_num_mss, variant->r_num_mss);
+}
+
+TEST_F(tun_vnet_udptnl, recv_gso_packet)
+{
+ struct virtio_net_hdr_v1_hash_tunnel vnet_hdr = { 0 };
+ struct virtio_net_hdr_v1 *vh = &vnet_hdr.hash_hdr.hdr;
+ int ret, gso_type = VIRTIO_NET_HDR_GSO_UDP_L4;
+
+ ret = send_gso_packet_into_tunnel(self, variant);
+ ASSERT_EQ(ret, variant->data_size);
+
+ memset(&vnet_hdr, 0, sizeof(vnet_hdr));
+ ret = receive_gso_packet_from_tun(self, variant, &vnet_hdr);
+ ASSERT_EQ(ret, variant->data_size);
+
+ if (!variant->no_gso) {
+ ASSERT_EQ(vh->gso_size, variant->gso_size);
+ gso_type |= (variant->tunnel_type & UDP_TUNNEL_OUTER_IPV4) ?
+ (VIRTIO_NET_HDR_GSO_UDP_TUNNEL_IPV4) :
+ (VIRTIO_NET_HDR_GSO_UDP_TUNNEL_IPV6);
+ ASSERT_EQ(vh->gso_type, gso_type);
+ }
+}
+
+XFAIL_ADD(tun_vnet_udptnl, 4in4_nogsosz_gtmss, recv_gso_packet);
+XFAIL_ADD(tun_vnet_udptnl, 6in4_nogsosz_gtmss, recv_gso_packet);
+XFAIL_ADD(tun_vnet_udptnl, 4in6_nogsosz_gtmss, recv_gso_packet);
+XFAIL_ADD(tun_vnet_udptnl, 6in6_nogsosz_gtmss, recv_gso_packet);
+
+XFAIL_ADD(tun_vnet_udptnl, 4in4_over_maxbytes, send_gso_packet);
+XFAIL_ADD(tun_vnet_udptnl, 6in4_over_maxbytes, send_gso_packet);
+XFAIL_ADD(tun_vnet_udptnl, 4in6_over_maxbytes, send_gso_packet);
+XFAIL_ADD(tun_vnet_udptnl, 6in6_over_maxbytes, send_gso_packet);
+
+XFAIL_ADD(tun_vnet_udptnl, 4in4_over_maxbytes, recv_gso_packet);
+XFAIL_ADD(tun_vnet_udptnl, 6in4_over_maxbytes, recv_gso_packet);
+XFAIL_ADD(tun_vnet_udptnl, 4in6_over_maxbytes, recv_gso_packet);
+XFAIL_ADD(tun_vnet_udptnl, 6in6_over_maxbytes, recv_gso_packet);
+
TEST_HARNESS_MAIN
diff --git a/tools/testing/selftests/net/tuntap_helpers.h b/tools/testing/selftests/net/tuntap_helpers.h
new file mode 100644
index 000000000000..d6c0437136ec
--- /dev/null
+++ b/tools/testing/selftests/net/tuntap_helpers.h
@@ -0,0 +1,390 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+
+#ifndef _TUNTAP_HELPERS_H
+#define _TUNTAP_HELPERS_H
+
+#include <errno.h>
+#include <linux/if_packet.h>
+#include <linux/ipv6.h>
+#include <linux/virtio_net.h>
+#include <netinet/in.h>
+#include <netinet/if_ether.h>
+#include <netinet/udp.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include <ynl.h>
+
+#include "rt-route-user.h"
+#include "rt-addr-user.h"
+#include "rt-neigh-user.h"
+#include "rt-link-user.h"
+
+#define GENEVE_HLEN 8
+#define PKT_DATA 0xCB
+#define TUNTAP_DEFAULT_TTL 8
+#define TUNTAP_DEFAULT_IPID 1337
+
+unsigned int if_nametoindex(const char *ifname);
+
+static inline int ip_addr_len(int family)
+{
+ return (family == AF_INET) ? sizeof(struct in_addr) :
+ sizeof(struct in6_addr);
+}
+
+static inline void fill_ifaddr_msg(struct ifaddrmsg *ifam, int family,
+ int prefix, int flags, const char *dev)
+{
+ ifam->ifa_family = family;
+ ifam->ifa_prefixlen = prefix;
+ ifam->ifa_index = if_nametoindex(dev);
+ ifam->ifa_flags = flags;
+ ifam->ifa_scope = RT_SCOPE_UNIVERSE;
+}
+
+static inline int ip_addr_add(const char *dev, int family, void *addr,
+ uint8_t prefix)
+{
+ int nl_flags = NLM_F_REQUEST | NLM_F_CREATE | NLM_F_EXCL;
+ int ifa_flags = IFA_F_PERMANENT | IFA_F_NODAD;
+ int ret = -1, ipalen = ip_addr_len(family);
+ struct rt_addr_newaddr_req *req;
+ struct ynl_sock *ys;
+
+ ys = ynl_sock_create(&ynl_rt_addr_family, NULL);
+ if (!ys)
+ return -1;
+
+ req = rt_addr_newaddr_req_alloc();
+ if (!req)
+ goto err_req_alloc;
+
+ fill_ifaddr_msg(&req->_hdr, family, prefix, ifa_flags, dev);
+ rt_addr_newaddr_req_set_nlflags(req, nl_flags);
+ rt_addr_newaddr_req_set_local(req, addr, ipalen);
+
+ ret = rt_addr_newaddr(ys, req);
+ rt_addr_newaddr_req_free(req);
+err_req_alloc:
+ ynl_sock_destroy(ys);
+ return ret;
+}
+
+static inline void fill_neigh_req_header(struct ndmsg *ndm, int family,
+ int state, const char *dev)
+{
+ ndm->ndm_family = family;
+ ndm->ndm_ifindex = if_nametoindex(dev);
+ ndm->ndm_state = state;
+ ndm->ndm_flags = 0;
+ ndm->ndm_type = RTN_UNICAST;
+}
+
+static inline int ip_neigh_add(const char *dev, int family, void *addr,
+ unsigned char *lladdr)
+{
+ int nl_flags = NLM_F_REQUEST | NLM_F_CREATE | NLM_F_EXCL;
+ int ret = -1, ipalen = ip_addr_len(family);
+ struct rt_neigh_newneigh_req *req;
+ struct ynl_sock *ys;
+
+ ys = ynl_sock_create(&ynl_rt_neigh_family, NULL);
+ if (!ys)
+ return -1;
+
+ req = rt_neigh_newneigh_req_alloc();
+ if (!req)
+ goto err_req_alloc;
+
+ fill_neigh_req_header(&req->_hdr, family, NUD_PERMANENT, dev);
+ rt_neigh_newneigh_req_set_nlflags(req, nl_flags);
+ rt_neigh_newneigh_req_set_dst(req, addr, ipalen);
+ rt_neigh_newneigh_req_set_lladdr(req, lladdr, ETH_ALEN);
+ rt_neigh_newneigh_req_set_ifindex(req, if_nametoindex(dev));
+
+ ret = rt_neigh_newneigh(ys, req);
+ rt_neigh_newneigh_req_free(req);
+err_req_alloc:
+ ynl_sock_destroy(ys);
+ return ret;
+}
+
+static inline void fill_route_req_header(struct rtmsg *rtm, int family,
+ int table)
+{
+ rtm->rtm_family = family;
+ rtm->rtm_table = table;
+}
+
+static inline int
+ip_route_get(const char *dev, int family, int table, void *dst,
+ void (*parse_rsp)(struct rt_route_getroute_rsp *rsp, void *out),
+ void *out)
+{
+ int ret = -1, ipalen = ip_addr_len(family);
+ struct rt_route_getroute_req *req;
+ struct rt_route_getroute_rsp *rsp;
+ struct ynl_sock *ys;
+
+ ys = ynl_sock_create(&ynl_rt_route_family, NULL);
+ if (!ys)
+ return -1;
+
+ req = rt_route_getroute_req_alloc();
+ if (!req)
+ goto err_req_alloc;
+
+ fill_route_req_header(&req->_hdr, family, table);
+ rt_route_getroute_req_set_nlflags(req, NLM_F_REQUEST);
+ rt_route_getroute_req_set_dst(req, dst, ipalen);
+ rt_route_getroute_req_set_oif(req, if_nametoindex(dev));
+
+ rsp = rt_route_getroute(ys, req);
+ if (!rsp)
+ goto err_rsp_get;
+
+ ret = 0;
+ if (parse_rsp)
+ parse_rsp(rsp, out);
+
+ rt_route_getroute_rsp_free(rsp);
+err_rsp_get:
+ rt_route_getroute_req_free(req);
+err_req_alloc:
+ ynl_sock_destroy(ys);
+ return ret;
+}
+
+static inline int
+ip_link_add(const char *dev, char *link_type,
+ int (*fill_link_attr)(struct rt_link_newlink_req *req, void *data),
+ void *data)
+{
+ int nl_flags = NLM_F_REQUEST | NLM_F_CREATE | NLM_F_EXCL;
+ struct rt_link_newlink_req *req;
+ struct ynl_sock *ys;
+ int ret = -1;
+
+ ys = ynl_sock_create(&ynl_rt_link_family, NULL);
+ if (!ys)
+ return -1;
+
+ req = rt_link_newlink_req_alloc();
+ if (!req)
+ goto err_req_alloc;
+
+ req->_hdr.ifi_flags = IFF_UP;
+ rt_link_newlink_req_set_nlflags(req, nl_flags);
+ rt_link_newlink_req_set_ifname(req, dev);
+ rt_link_newlink_req_set_linkinfo_kind(req, link_type);
+
+ if (fill_link_attr && fill_link_attr(req, data) < 0)
+ goto err_attr_fill;
+
+ ret = rt_link_newlink(ys, req);
+err_attr_fill:
+ rt_link_newlink_req_free(req);
+err_req_alloc:
+ ynl_sock_destroy(ys);
+ return ret;
+}
+
+static inline int ip_link_del(const char *dev)
+{
+ struct rt_link_dellink_req *req;
+ struct ynl_sock *ys;
+ int ret = -1;
+
+ ys = ynl_sock_create(&ynl_rt_link_family, NULL);
+ if (!ys)
+ return -1;
+
+ req = rt_link_dellink_req_alloc();
+ if (!req)
+ goto err_req_alloc;
+
+ rt_link_dellink_req_set_nlflags(req, NLM_F_REQUEST);
+ rt_link_dellink_req_set_ifname(req, dev);
+
+ ret = rt_link_dellink(ys, req);
+ rt_link_dellink_req_free(req);
+err_req_alloc:
+ ynl_sock_destroy(ys);
+ return ret;
+}
+
+static inline size_t build_eth(uint8_t *buf, uint16_t proto, unsigned char *src,
+ unsigned char *dest)
+{
+ struct ethhdr *eth = (struct ethhdr *)buf;
+
+ eth->h_proto = htons(proto);
+ memcpy(eth->h_source, src, ETH_ALEN);
+ memcpy(eth->h_dest, dest, ETH_ALEN);
+
+ return ETH_HLEN;
+}
+
+static inline uint32_t add_csum(const uint8_t *buf, int len)
+{
+ uint16_t *sbuf = (uint16_t *)buf;
+ uint32_t sum = 0;
+
+ while (len > 1) {
+ sum += *sbuf++;
+ len -= 2;
+ }
+
+ if (len)
+ sum += *(uint8_t *)sbuf;
+
+ return sum;
+}
+
+static inline uint16_t finish_ip_csum(uint32_t sum)
+{
+ while (sum >> 16)
+ sum = (sum & 0xffff) + (sum >> 16);
+ return ~((uint16_t)sum);
+}
+
+static inline uint16_t build_ip_csum(const uint8_t *buf, int len, uint32_t sum)
+{
+ sum += add_csum(buf, len);
+ return finish_ip_csum(sum);
+}
+
+static inline int build_ipv4_header(uint8_t *buf, uint8_t proto,
+ int payload_len, struct in_addr *src,
+ struct in_addr *dst)
+{
+ struct iphdr *iph = (struct iphdr *)buf;
+
+ iph->ihl = 5;
+ iph->version = 4;
+ iph->ttl = TUNTAP_DEFAULT_TTL;
+ iph->tot_len = htons(sizeof(*iph) + payload_len);
+ iph->id = htons(TUNTAP_DEFAULT_IPID);
+ iph->protocol = proto;
+ iph->saddr = src->s_addr;
+ iph->daddr = dst->s_addr;
+ iph->check = build_ip_csum(buf, iph->ihl << 2, 0);
+
+ return iph->ihl << 2;
+}
+
+static inline void ipv6_set_dsfield(struct ipv6hdr *ip6h, uint8_t dsfield)
+{
+ uint16_t val, *ptr = (uint16_t *)ip6h;
+
+ val = ntohs(*ptr);
+ val &= 0xF00F;
+ val |= ((uint16_t)dsfield) << 4;
+ *ptr = htons(val);
+}
+
+static inline int build_ipv6_header(uint8_t *buf, uint8_t proto,
+ uint8_t dsfield, int payload_len,
+ struct in6_addr *src, struct in6_addr *dst)
+{
+ struct ipv6hdr *ip6h = (struct ipv6hdr *)buf;
+
+ ip6h->version = 6;
+ ip6h->payload_len = htons(payload_len);
+ ip6h->nexthdr = proto;
+ ip6h->hop_limit = TUNTAP_DEFAULT_TTL;
+ ipv6_set_dsfield(ip6h, dsfield);
+ memcpy(&ip6h->saddr, src, sizeof(ip6h->saddr));
+ memcpy(&ip6h->daddr, dst, sizeof(ip6h->daddr));
+
+ return sizeof(struct ipv6hdr);
+}
+
+static inline int build_geneve_header(uint8_t *buf, uint32_t vni)
+{
+ uint16_t protocol = htons(ETH_P_TEB);
+ uint32_t geneve_vni = htonl((vni << 8) & 0xffffff00);
+
+ memcpy(buf + 2, &protocol, 2);
+ memcpy(buf + 4, &geneve_vni, 4);
+ return GENEVE_HLEN;
+}
+
+static inline int build_udp_header(uint8_t *buf, uint16_t sport, uint16_t dport,
+ int payload_len)
+{
+ struct udphdr *udph = (struct udphdr *)buf;
+
+ udph->source = htons(sport);
+ udph->dest = htons(dport);
+ udph->len = htons(sizeof(*udph) + payload_len);
+ return sizeof(*udph);
+}
+
+static inline void build_udp_packet_csum(uint8_t *buf, int family,
+ bool csum_off)
+{
+ struct udphdr *udph = (struct udphdr *)buf;
+ size_t ipalen = ip_addr_len(family);
+ uint32_t sum;
+
+ /* No extension IPv4 and IPv6 headers addresses are the last fields */
+ sum = add_csum(buf - 2 * ipalen, 2 * ipalen);
+ sum += htons(IPPROTO_UDP) + udph->len;
+
+ if (!csum_off)
+ sum += add_csum(buf, udph->len);
+
+ udph->check = finish_ip_csum(sum);
+}
+
+static inline int build_udp_packet(uint8_t *buf, uint16_t sport, uint16_t dport,
+ int payload_len, int family, bool csum_off)
+{
+ struct udphdr *udph = (struct udphdr *)buf;
+
+ build_udp_header(buf, sport, dport, payload_len);
+ memset(buf + sizeof(*udph), PKT_DATA, payload_len);
+ build_udp_packet_csum(buf, family, csum_off);
+
+ return sizeof(*udph) + payload_len;
+}
+
+static inline int build_virtio_net_hdr_v1_hash_tunnel(uint8_t *buf, bool is_tap,
+ int hdr_len, int gso_size,
+ int outer_family,
+ int inner_family)
+{
+ struct virtio_net_hdr_v1_hash_tunnel *vh_tunnel = (void *)buf;
+ struct virtio_net_hdr_v1 *vh = &vh_tunnel->hash_hdr.hdr;
+ int outer_iphlen, inner_iphlen, eth_hlen, gso_type;
+
+ eth_hlen = is_tap ? ETH_HLEN : 0;
+ outer_iphlen = (outer_family == AF_INET) ? sizeof(struct iphdr) :
+ sizeof(struct ipv6hdr);
+ inner_iphlen = (inner_family == AF_INET) ? sizeof(struct iphdr) :
+ sizeof(struct ipv6hdr);
+
+ vh_tunnel->outer_th_offset = eth_hlen + outer_iphlen;
+ vh_tunnel->inner_nh_offset = vh_tunnel->outer_th_offset + ETH_HLEN +
+ GENEVE_HLEN + sizeof(struct udphdr);
+
+ vh->csum_start = vh_tunnel->inner_nh_offset + inner_iphlen;
+ vh->csum_offset = __builtin_offsetof(struct udphdr, check);
+ vh->flags = VIRTIO_NET_HDR_F_NEEDS_CSUM;
+ vh->hdr_len = hdr_len;
+ vh->gso_size = gso_size;
+
+ if (gso_size) {
+ gso_type = outer_family == AF_INET ?
+ VIRTIO_NET_HDR_GSO_UDP_TUNNEL_IPV4 :
+ VIRTIO_NET_HDR_GSO_UDP_TUNNEL_IPV6;
+ vh->gso_type = VIRTIO_NET_HDR_GSO_UDP_L4 | gso_type;
+ }
+
+ return sizeof(struct virtio_net_hdr_v1_hash_tunnel);
+}
+
+#endif /* _TUNTAP_HELPERS_H */
diff --git a/tools/testing/selftests/net/txtimestamp.c b/tools/testing/selftests/net/txtimestamp.c
index bcc14688661d..170be192f5c7 100644
--- a/tools/testing/selftests/net/txtimestamp.c
+++ b/tools/testing/selftests/net/txtimestamp.c
@@ -206,12 +206,10 @@ static void __print_timestamp(const char *name, struct timespec *cur,
fprintf(stderr, "\n");
}
-static void print_timestamp_usr(void)
+static void record_timestamp_usr(void)
{
if (clock_gettime(CLOCK_REALTIME, &ts_usr))
error(1, errno, "clock_gettime");
-
- __print_timestamp(" USR", &ts_usr, 0, 0);
}
static void print_timestamp(struct scm_timestamping *tss, int tstype,
@@ -599,8 +597,6 @@ static void do_test(int family, unsigned int report_opt)
fill_header_udp(buf + off, family == PF_INET);
}
- print_timestamp_usr();
-
iov.iov_base = buf;
iov.iov_len = total_len;
@@ -655,10 +651,14 @@ static void do_test(int family, unsigned int report_opt)
}
+ record_timestamp_usr();
+
val = sendmsg(fd, &msg, 0);
if (val != total_len)
error(1, errno, "send");
+ __print_timestamp(" USR", &ts_usr, 0, 0);
+
/* wait for all errors to be queued, else ACKs arrive OOO */
if (cfg_sleep_usec)
usleep(cfg_sleep_usec);
diff --git a/tools/testing/selftests/ptp/phc.sh b/tools/testing/selftests/ptp/phc.sh
index ac6e5a6e1d3a..9f61c1579edf 100755
--- a/tools/testing/selftests/ptp/phc.sh
+++ b/tools/testing/selftests/ptp/phc.sh
@@ -8,17 +8,20 @@ ALL_TESTS="
"
DEV=$1
+# Kselftest framework requirement - SKIP code is 4.
+ksft_skip=4
+
##############################################################################
# Sanity checks
if [[ "$(id -u)" -ne 0 ]]; then
echo "SKIP: need root privileges"
- exit 0
+ exit $ksft_skip
fi
if [[ "$DEV" == "" ]]; then
echo "SKIP: PTP device not provided"
- exit 0
+ exit $ksft_skip
fi
require_command()
@@ -27,7 +30,7 @@ require_command()
if [[ ! -x "$(command -v "$cmd")" ]]; then
echo "SKIP: $cmd not installed"
- exit 1
+ exit $ksft_skip
fi
}
@@ -37,7 +40,7 @@ phc_sanity()
if [ $? != 0 ]; then
echo "SKIP: unknown clock $DEV: No such device"
- exit 1
+ exit $ksft_skip
fi
}
@@ -49,6 +52,7 @@ phc_sanity
# Exit status to return at the end. Set in case one of the tests fails.
EXIT_STATUS=0
+PASS_COUNT=0
# Per-test return value. Clear at the beginning of each test.
RET=0
@@ -65,12 +69,18 @@ log_test()
{
local test_name=$1
+ if [[ $RET -eq $ksft_skip ]]; then
+ printf "TEST: %-60s [SKIP]\n" "$test_name"
+ return 0
+ fi
+
if [[ $RET -ne 0 ]]; then
EXIT_STATUS=1
printf "TEST: %-60s [FAIL]\n" "$test_name"
return 1
fi
+ ((PASS_COUNT++))
printf "TEST: %-60s [ OK ]\n" "$test_name"
return 0
}
@@ -89,34 +99,49 @@ tests_run()
settime_do()
{
- local res
+ local res out
- res=$(phc_ctl $DEV set 0 wait 120.5 get 2> /dev/null \
- | awk '/clock time is/{print $5}' \
- | awk -F. '{print $1}')
+ out=$(LC_ALL=C phc_ctl $DEV set 0 wait 120.5 get 2>&1)
+ if [[ $? -ne 0 ]]; then
+ if echo "$out" | grep -qi "Operation not supported"; then
+ return $ksft_skip
+ fi
+ return 1
+ fi
+ res=$(echo "$out" | awk '/clock time is/{print $5}' | awk -F. '{print $1}')
(( res == 120 ))
}
adjtime_do()
{
- local res
+ local res out
- res=$(phc_ctl $DEV set 0 adj 10 get 2> /dev/null \
- | awk '/clock time is/{print $5}' \
- | awk -F. '{print $1}')
+ out=$(LC_ALL=C phc_ctl $DEV set 0 adj 10 get 2>&1)
+ if [[ $? -ne 0 ]]; then
+ if echo "$out" | grep -qi "Operation not supported"; then
+ return $ksft_skip
+ fi
+ return 1
+ fi
+ res=$(echo "$out" | awk '/clock time is/{print $5}' | awk -F. '{print $1}')
(( res == 10 ))
}
adjfreq_do()
{
- local res
+ local res out
# Set the clock to be 1% faster
- res=$(phc_ctl $DEV freq 10000000 set 0 wait 100.5 get 2> /dev/null \
- | awk '/clock time is/{print $5}' \
- | awk -F. '{print $1}')
+ out=$(LC_ALL=C phc_ctl $DEV freq 10000000 set 0 wait 100.5 get 2>&1)
+ if [[ $? -ne 0 ]]; then
+ if echo "$out" | grep -qi "Operation not supported"; then
+ return $ksft_skip
+ fi
+ return 1
+ fi
+ res=$(echo "$out" | awk '/clock time is/{print $5}' | awk -F. '{print $1}')
(( res == 101 ))
}
@@ -163,4 +188,7 @@ trap cleanup EXIT
tests_run
+if [[ $EXIT_STATUS -eq 0 && $PASS_COUNT -eq 0 ]]; then
+ exit $ksft_skip
+fi
exit $EXIT_STATUS
diff --git a/tools/testing/selftests/tc-testing/tc-tests/qdiscs/cake_mq.json b/tools/testing/selftests/tc-testing/tc-tests/qdiscs/cake_mq.json
new file mode 100644
index 000000000000..0efe229fb86e
--- /dev/null
+++ b/tools/testing/selftests/tc-testing/tc-tests/qdiscs/cake_mq.json
@@ -0,0 +1,559 @@
+[
+ {
+ "id": "684b",
+ "name": "Create CAKE_MQ with default setting (4 queues)",
+ "category": [
+ "qdisc",
+ "cake_mq"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ "echo \"1\" > /sys/bus/netdevsim/del_device || true",
+ "echo \"1 1 4\" > /sys/bus/netdevsim/new_device"
+ ],
+ "cmdUnderTest": "$TC qdisc add dev $ETH handle 1: root cake_mq",
+ "expExitCode": "0",
+ "verifyCmd": "$TC qdisc show dev $ETH",
+ "matchPattern": "qdisc (cake_mq 1: root|cake 0: parent 1:[1-4]) bandwidth unlimited diffserv3 triple-isolate nonat nowash no-ack-filter split-gso rtt 100ms raw overhead 0 ",
+ "matchCount": "5",
+ "teardown": [
+ "echo \"1\" > /sys/bus/netdevsim/del_device"
+ ]
+ },
+ {
+ "id": "7ee8",
+ "name": "Create CAKE_MQ with bandwidth limit (4 queues)",
+ "category": [
+ "qdisc",
+ "cake_mq"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ "echo \"1 1 4\" > /sys/bus/netdevsim/new_device"
+ ],
+ "cmdUnderTest": "$TC qdisc add dev $ETH handle 1: root cake_mq bandwidth 1000",
+ "expExitCode": "0",
+ "verifyCmd": "$TC qdisc show dev $ETH",
+ "matchPattern": "qdisc (cake_mq 1: root|cake 0: parent 1:[1-4]) bandwidth 1Kbit diffserv3 triple-isolate nonat nowash no-ack-filter split-gso rtt 100ms raw overhead 0 ",
+ "matchCount": "5",
+ "teardown": [
+ "echo \"1\" > /sys/bus/netdevsim/del_device"
+ ]
+ },
+ {
+ "id": "1f87",
+ "name": "Create CAKE_MQ with rtt time (4 queues)",
+ "category": [
+ "qdisc",
+ "cake_mq"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ "echo \"1 1 4\" > /sys/bus/netdevsim/new_device"
+ ],
+ "cmdUnderTest": "$TC qdisc add dev $ETH handle 1: root cake_mq rtt 200",
+ "expExitCode": "0",
+ "verifyCmd": "$TC qdisc show dev $ETH",
+ "matchPattern": "qdisc (cake_mq 1: root|cake 0: parent 1:[1-4]) bandwidth unlimited diffserv3 triple-isolate nonat nowash no-ack-filter split-gso rtt 200us raw overhead 0 ",
+ "matchCount": "5",
+ "teardown": [
+ "echo \"1\" > /sys/bus/netdevsim/del_device"
+ ]
+ },
+ {
+ "id": "e9cf",
+ "name": "Create CAKE_MQ with besteffort flag (4 queues)",
+ "category": [
+ "qdisc",
+ "cake_mq"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ "echo \"1 1 4\" > /sys/bus/netdevsim/new_device"
+ ],
+ "cmdUnderTest": "$TC qdisc add dev $ETH handle 1: root cake_mq besteffort",
+ "expExitCode": "0",
+ "verifyCmd": "$TC qdisc show dev $ETH",
+ "matchPattern": "qdisc (cake_mq 1: root|cake 0: parent 1:[1-4]) bandwidth unlimited besteffort triple-isolate nonat nowash no-ack-filter split-gso rtt 100ms raw overhead 0 ",
+ "matchCount": "5",
+ "teardown": [
+ "echo \"1\" > /sys/bus/netdevsim/del_device"
+ ]
+ },
+ {
+ "id": "7c05",
+ "name": "Create CAKE_MQ with diffserv8 flag (4 queues)",
+ "category": [
+ "qdisc",
+ "cake_mq"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ "echo \"1 1 4\" > /sys/bus/netdevsim/new_device"
+ ],
+ "cmdUnderTest": "$TC qdisc add dev $ETH handle 1: root cake_mq diffserv8",
+ "expExitCode": "0",
+ "verifyCmd": "$TC qdisc show dev $ETH",
+ "matchPattern": "qdisc (cake_mq 1: root|cake 0: parent 1:[1-4]) bandwidth unlimited diffserv8 triple-isolate nonat nowash no-ack-filter split-gso rtt 100ms raw overhead 0 ",
+ "matchCount": "5",
+ "teardown": [
+ "echo \"1\" > /sys/bus/netdevsim/del_device"
+ ]
+ },
+ {
+ "id": "5a77",
+ "name": "Create CAKE_MQ with diffserv4 flag (4 queues)",
+ "category": [
+ "qdisc",
+ "cake_mq"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ "echo \"1 1 4\" > /sys/bus/netdevsim/new_device"
+ ],
+ "cmdUnderTest": "$TC qdisc add dev $ETH handle 1: root cake_mq diffserv4",
+ "expExitCode": "0",
+ "verifyCmd": "$TC qdisc show dev $ETH",
+ "matchPattern": "qdisc (cake_mq 1: root|cake 0: parent 1:[1-4]) bandwidth unlimited diffserv4 triple-isolate nonat nowash no-ack-filter split-gso rtt 100ms raw overhead 0 ",
+ "matchCount": "5",
+ "teardown": [
+ "echo \"1\" > /sys/bus/netdevsim/del_device"
+ ]
+ },
+ {
+ "id": "8f7a",
+ "name": "Create CAKE_MQ with flowblind flag (4 queues)",
+ "category": [
+ "qdisc",
+ "cake_mq"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ "echo \"1 1 4\" > /sys/bus/netdevsim/new_device"
+ ],
+ "cmdUnderTest": "$TC qdisc add dev $ETH handle 1: root cake_mq flowblind",
+ "expExitCode": "0",
+ "verifyCmd": "$TC qdisc show dev $ETH",
+ "matchPattern": "qdisc (cake_mq 1: root|cake 0: parent 1:[1-4]) bandwidth unlimited diffserv3 flowblind nonat nowash no-ack-filter split-gso rtt 100ms raw overhead 0 ",
+ "matchCount": "5",
+ "teardown": [
+ "echo \"1\" > /sys/bus/netdevsim/del_device"
+ ]
+ },
+ {
+ "id": "7ef7",
+ "name": "Create CAKE_MQ with dsthost and nat flag (4 queues)",
+ "category": [
+ "qdisc",
+ "cake_mq"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ "echo \"1 1 4\" > /sys/bus/netdevsim/new_device"
+ ],
+ "cmdUnderTest": "$TC qdisc add dev $ETH handle 1: root cake_mq dsthost nat",
+ "expExitCode": "0",
+ "verifyCmd": "$TC qdisc show dev $ETH",
+ "matchPattern": "qdisc (cake_mq 1: root|cake 0: parent 1:[1-4]) bandwidth unlimited diffserv3 dsthost nat nowash no-ack-filter split-gso rtt 100ms raw overhead 0 ",
+ "matchCount": "5",
+ "teardown": [
+ "echo \"1\" > /sys/bus/netdevsim/del_device"
+ ]
+ },
+ {
+ "id": "2e4d",
+ "name": "Create CAKE_MQ with wash flag (4 queues)",
+ "category": [
+ "qdisc",
+ "cake_mq"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ "echo \"1 1 4\" > /sys/bus/netdevsim/new_device"
+ ],
+ "cmdUnderTest": "$TC qdisc add dev $ETH handle 1: root cake_mq hosts wash",
+ "expExitCode": "0",
+ "verifyCmd": "$TC qdisc show dev $ETH",
+ "matchPattern": "qdisc (cake_mq 1: root|cake 0: parent 1:[1-4]) bandwidth unlimited diffserv3 hosts nonat wash no-ack-filter split-gso rtt 100ms raw overhead 0 ",
+ "matchCount": "5",
+ "teardown": [
+ "echo \"1\" > /sys/bus/netdevsim/del_device"
+ ]
+ },
+ {
+ "id": "b3e6",
+ "name": "Create CAKE_MQ with flowblind and no-split-gso flag (4 queues)",
+ "category": [
+ "qdisc",
+ "cake_mq"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ "echo \"1 1 4\" > /sys/bus/netdevsim/new_device"
+ ],
+ "cmdUnderTest": "$TC qdisc add dev $ETH handle 1: root cake_mq flowblind no-split-gso",
+ "expExitCode": "0",
+ "verifyCmd": "$TC qdisc show dev $ETH",
+ "matchPattern": "qdisc (cake_mq 1: root|cake 0: parent 1:[1-4]) bandwidth unlimited diffserv3 flowblind nonat nowash no-ack-filter no-split-gso rtt 100ms raw overhead 0 ",
+ "matchCount": "5",
+ "teardown": [
+ "echo \"1\" > /sys/bus/netdevsim/del_device"
+ ]
+ },
+ {
+ "id": "62cd",
+ "name": "Create CAKE_MQ with dual-srchost and ack-filter flag (4 queues)",
+ "category": [
+ "qdisc",
+ "cake_mq"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ "echo \"1 1 4\" > /sys/bus/netdevsim/new_device"
+ ],
+ "cmdUnderTest": "$TC qdisc add dev $ETH handle 1: root cake_mq dual-srchost ack-filter",
+ "expExitCode": "0",
+ "verifyCmd": "$TC qdisc show dev $ETH",
+ "matchPattern": "qdisc (cake_mq 1: root|cake 0: parent 1:[1-4]) bandwidth unlimited diffserv3 dual-srchost nonat nowash ack-filter split-gso rtt 100ms raw overhead 0 ",
+ "matchCount": "5",
+ "teardown": [
+ "echo \"1\" > /sys/bus/netdevsim/del_device"
+ ]
+ },
+ {
+ "id": "0df3",
+ "name": "Create CAKE_MQ with dual-dsthost and ack-filter-aggressive flag (4 queues)",
+ "category": [
+ "qdisc",
+ "cake_mq"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ "echo \"1 1 4\" > /sys/bus/netdevsim/new_device"
+ ],
+ "cmdUnderTest": "$TC qdisc add dev $ETH handle 1: root cake_mq dual-dsthost ack-filter-aggressive",
+ "expExitCode": "0",
+ "verifyCmd": "$TC qdisc show dev $ETH",
+ "matchPattern": "qdisc (cake_mq 1: root|cake 0: parent 1:[1-4]) bandwidth unlimited diffserv3 dual-dsthost nonat nowash ack-filter-aggressive split-gso rtt 100ms raw overhead 0 ",
+ "matchCount": "5",
+ "teardown": [
+ "echo \"1\" > /sys/bus/netdevsim/del_device"
+ ]
+ },
+ {
+ "id": "9a75",
+ "name": "Create CAKE_MQ with memlimit and ptm flag (4 queues)",
+ "category": [
+ "qdisc",
+ "cake_mq"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ "echo \"1 1 4\" > /sys/bus/netdevsim/new_device"
+ ],
+ "cmdUnderTest": "$TC qdisc add dev $ETH handle 1: root cake_mq memlimit 10000 ptm",
+ "expExitCode": "0",
+ "verifyCmd": "$TC qdisc show dev $ETH",
+ "matchPattern": "qdisc (cake_mq 1: root|cake 0: parent 1:[1-4]) bandwidth unlimited diffserv3 triple-isolate nonat nowash no-ack-filter split-gso rtt 100ms raw ptm overhead 0 memlimit 10000b ",
+ "matchCount": "5",
+ "teardown": [
+ "echo \"1\" > /sys/bus/netdevsim/del_device"
+ ]
+ },
+ {
+ "id": "cdef",
+ "name": "Create CAKE_MQ with fwmark and atm flag (4 queues)",
+ "category": [
+ "qdisc",
+ "cake_mq"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ "echo \"1 1 4\" > /sys/bus/netdevsim/new_device"
+ ],
+ "cmdUnderTest": "$TC qdisc add dev $ETH handle 1: root cake_mq fwmark 8 atm",
+ "expExitCode": "0",
+ "verifyCmd": "$TC qdisc show dev $ETH",
+ "matchPattern": "qdisc (cake_mq 1: root|cake 0: parent 1:[1-4]) bandwidth unlimited diffserv3 triple-isolate nonat nowash no-ack-filter split-gso rtt 100ms raw atm overhead 0 fwmark 0x8 ",
+ "matchCount": "5",
+ "teardown": [
+ "echo \"1\" > /sys/bus/netdevsim/del_device"
+ ]
+ },
+ {
+ "id": "93dd",
+ "name": "Create CAKE_MQ with overhead 0 and mpu (4 queues)",
+ "category": [
+ "qdisc",
+ "cake_mq"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ "echo \"1 1 4\" > /sys/bus/netdevsim/new_device"
+ ],
+ "cmdUnderTest": "$TC qdisc add dev $ETH handle 1: root cake_mq overhead 128 mpu 256",
+ "expExitCode": "0",
+ "verifyCmd": "$TC qdisc show dev $ETH",
+ "matchPattern": "qdisc (cake_mq 1: root|cake 0: parent 1:[1-4]) bandwidth unlimited diffserv3 triple-isolate nonat nowash no-ack-filter split-gso rtt 100ms noatm overhead 128 mpu 256 ",
+ "matchCount": "5",
+ "teardown": [
+ "echo \"1\" > /sys/bus/netdevsim/del_device"
+ ]
+ },
+ {
+ "id": "1475",
+ "name": "Create CAKE_MQ with conservative and ingress flag (4 queues)",
+ "category": [
+ "qdisc",
+ "cake_mq"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ "echo \"1 1 4\" > /sys/bus/netdevsim/new_device"
+ ],
+ "cmdUnderTest": "$TC qdisc add dev $ETH handle 1: root cake_mq conservative ingress",
+ "expExitCode": "0",
+ "verifyCmd": "$TC qdisc show dev $ETH",
+ "matchPattern": "qdisc (cake_mq 1: root|cake 0: parent 1:[1-4]) bandwidth unlimited diffserv3 triple-isolate nonat nowash ingress no-ack-filter split-gso rtt 100ms atm overhead 48 ",
+ "matchCount": "5",
+ "teardown": [
+ "echo \"1\" > /sys/bus/netdevsim/del_device"
+ ]
+ },
+ {
+ "id": "7bf1",
+ "name": "Delete CAKE_MQ with conservative and ingress flag (4 queues)",
+ "category": [
+ "qdisc",
+ "cake_mq"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ "echo \"1 1 4\" > /sys/bus/netdevsim/new_device",
+ "$TC qdisc add dev $ETH handle 1: root cake_mq conservative ingress"
+ ],
+ "cmdUnderTest": "$TC qdisc del dev $ETH handle 1: root",
+ "expExitCode": "0",
+ "verifyCmd": "$TC qdisc show dev $ETH",
+ "matchPattern": "qdisc (cake_mq 1: root|cake 0: parent 1:[1-4]) bandwidth unlimited diffserv3 triple-isolate nonat nowash ingress no-ack-filter split-gso rtt 100ms atm overhead 48 ",
+ "matchCount": "0",
+ "teardown": [
+ "echo \"1\" > /sys/bus/netdevsim/del_device"
+ ]
+ },
+ {
+ "id": "ee55",
+ "name": "Replace CAKE_MQ with mpu (4 queues)",
+ "category": [
+ "qdisc",
+ "cake_mq"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ "echo \"1 1 4\" > /sys/bus/netdevsim/new_device",
+ "$TC qdisc add dev $ETH handle 1: root cake_mq overhead 128 mpu 256"
+ ],
+ "cmdUnderTest": "$TC qdisc replace dev $ETH handle 1: root cake_mq mpu 128",
+ "expExitCode": "0",
+ "verifyCmd": "$TC qdisc show dev $ETH",
+ "matchPattern": "qdisc (cake_mq 1: root|cake 0: parent 1:[1-4]) bandwidth unlimited diffserv3 triple-isolate nonat nowash no-ack-filter split-gso rtt 100ms noatm overhead 128 mpu 128 ",
+ "matchCount": "5",
+ "teardown": [
+ "echo \"1\" > /sys/bus/netdevsim/del_device"
+ ]
+ },
+ {
+ "id": "6df9",
+ "name": "Change CAKE_MQ with mpu (4 queues)",
+ "category": [
+ "qdisc",
+ "cake_mq"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ "echo \"1 1 4\" > /sys/bus/netdevsim/new_device",
+ "$TC qdisc add dev $ETH handle 1: root cake_mq overhead 128 mpu 256"
+ ],
+ "cmdUnderTest": "$TC qdisc change dev $ETH handle 1: root cake_mq mpu 128",
+ "expExitCode": "0",
+ "verifyCmd": "$TC qdisc show dev $ETH",
+ "matchPattern": "qdisc (cake_mq 1: root|cake 0: parent 1:[1-4]) bandwidth unlimited diffserv3 triple-isolate nonat nowash no-ack-filter split-gso rtt 100ms noatm overhead 128 mpu 128 ",
+ "matchCount": "5",
+ "teardown": [
+ "echo \"1\" > /sys/bus/netdevsim/del_device"
+ ]
+ },
+ {
+ "id": "67e2",
+ "name": "Show CAKE_MQ class (4 queues)",
+ "category": [
+ "qdisc",
+ "cake_mq"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ "echo \"1 1 4\" > /sys/bus/netdevsim/new_device"
+ ],
+ "cmdUnderTest": "$TC qdisc add dev $ETH handle 1: root cake_mq",
+ "expExitCode": "0",
+ "verifyCmd": "$TC class show dev $ETH",
+ "matchPattern": "class cake_mq",
+ "matchCount": "4",
+ "teardown": [
+ "echo \"1\" > /sys/bus/netdevsim/del_device"
+ ]
+ },
+ {
+ "id": "2de4",
+ "name": "Change bandwidth of CAKE_MQ (4 queues)",
+ "category": [
+ "qdisc",
+ "cake_mq"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ "echo \"1 1 4\" > /sys/bus/netdevsim/new_device",
+ "$TC qdisc add dev $ETH handle 1: root cake_mq"
+ ],
+ "cmdUnderTest": "$TC qdisc replace dev $ETH handle 1: root cake_mq bandwidth 1000",
+ "expExitCode": "0",
+ "verifyCmd": "$TC qdisc show dev $ETH",
+ "matchPattern": "qdisc (cake_mq 1: root|cake 0: parent 1:[1-4]) bandwidth 1Kbit diffserv3 triple-isolate nonat nowash no-ack-filter split-gso rtt 100ms raw overhead 0 ",
+ "matchCount": "5",
+ "teardown": [
+ "echo \"1\" > /sys/bus/netdevsim/del_device"
+ ]
+ },
+ {
+ "id": "5f62",
+ "name": "Fail to create CAKE_MQ with autorate-ingress flag (4 queues)",
+ "category": [
+ "qdisc",
+ "cake_mq"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ "echo \"1 1 4\" > /sys/bus/netdevsim/new_device"
+ ],
+ "cmdUnderTest": "$TC qdisc add dev $ETH handle 1: root cake_mq autorate-ingress",
+ "expExitCode": "2",
+ "verifyCmd": "$TC qdisc show dev $ETH",
+ "matchPattern": "qdisc (cake_mq 1: root|cake 0: parent 1:[1-4]) bandwidth unlimited autorate-ingress diffserv3 triple-isolate nonat nowash no-ack-filter split-gso rtt 100ms raw overhead 0 ",
+ "matchCount": "0",
+ "teardown": [
+ "echo \"1\" > /sys/bus/netdevsim/del_device"
+ ]
+ },
+ {
+ "id": "038e",
+ "name": "Fail to change setting of sub-qdisc under CAKE_MQ",
+ "category": [
+ "qdisc",
+ "cake_mq"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ "echo \"1 1 4\" > /sys/bus/netdevsim/new_device",
+ "$TC qdisc add dev $ETH handle 1: root cake_mq"
+ ],
+ "cmdUnderTest": "$TC qdisc add dev $ETH parent 1:1 cake besteffort flows",
+ "expExitCode": "2",
+ "verifyCmd": "$TC qdisc show dev $ETH",
+ "matchPattern": "qdisc (cake_mq 1: root|cake 0: parent 1:[1-4]) bandwidth unlimited diffserv3 triple-isolate nonat nowash no-ack-filter split-gso rtt 100ms raw overhead 0 ",
+ "matchCount": "5",
+ "teardown": [
+ "echo \"1\" > /sys/bus/netdevsim/del_device"
+ ]
+ },
+ {
+ "id": "7bdc",
+ "name": "Fail to replace sub-qdisc under CAKE_MQ",
+ "category": [
+ "qdisc",
+ "cake_mq"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ "echo \"1 1 4\" > /sys/bus/netdevsim/new_device",
+ "$TC qdisc add dev $ETH handle 1: root cake_mq"
+ ],
+ "cmdUnderTest": "$TC qdisc add dev $ETH parent 1:1 fq",
+ "expExitCode": "2",
+ "verifyCmd": "$TC qdisc show dev $ETH",
+ "matchPattern": "qdisc (cake_mq 1: root|cake 0: parent 1:[1-4]) bandwidth unlimited diffserv3 triple-isolate nonat nowash no-ack-filter split-gso rtt 100ms raw overhead 0 ",
+ "matchCount": "5",
+ "teardown": [
+ "echo \"1\" > /sys/bus/netdevsim/del_device"
+ ]
+ },
+ {
+ "id": "18e0",
+ "name": "Fail to install CAKE_MQ on single queue device",
+ "category": [
+ "qdisc",
+ "cake_mq"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ "echo \"1 1 1\" > /sys/bus/netdevsim/new_device"
+ ],
+ "cmdUnderTest": "$TC qdisc add dev $ETH handle 1: root cake_mq",
+ "expExitCode": "2",
+ "verifyCmd": "$TC qdisc show dev $ETH",
+ "matchPattern": "qdisc (cake_mq 1: root|cake 0: parent 1:[1-4]) bandwidth unlimited diffserv3 triple-isolate nonat nowash no-ack-filter split-gso rtt 100ms raw overhead 0 ",
+ "matchCount": "0",
+ "teardown": [
+ "echo \"1\" > /sys/bus/netdevsim/del_device"
+ ]
+ }
+]
diff --git a/tools/testing/selftests/vsock/settings b/tools/testing/selftests/vsock/settings
index 694d70710ff0..79b65bdf05db 100644
--- a/tools/testing/selftests/vsock/settings
+++ b/tools/testing/selftests/vsock/settings
@@ -1 +1 @@
-timeout=300
+timeout=1200
diff --git a/tools/testing/selftests/vsock/vmtest.sh b/tools/testing/selftests/vsock/vmtest.sh
index c7b270dd77a9..dc8dbe74a6d0 100755
--- a/tools/testing/selftests/vsock/vmtest.sh
+++ b/tools/testing/selftests/vsock/vmtest.sh
@@ -7,6 +7,7 @@
# * virtme-ng
# * busybox-static (used by virtme-ng)
# * qemu (used by virtme-ng)
+# * socat
#
# shellcheck disable=SC2317,SC2119
@@ -41,14 +42,119 @@ readonly KERNEL_CMDLINE="\
virtme.ssh virtme_ssh_channel=tcp virtme_ssh_user=$USER \
"
readonly LOG=$(mktemp /tmp/vsock_vmtest_XXXX.log)
-readonly TEST_NAMES=(vm_server_host_client vm_client_host_server vm_loopback)
+
+# Namespace tests must use the ns_ prefix. This is checked in check_netns() and
+# is used to determine if a test needs namespace setup before test execution.
+readonly TEST_NAMES=(
+ vm_server_host_client
+ vm_client_host_server
+ vm_loopback
+ ns_host_vsock_ns_mode_ok
+ ns_host_vsock_child_ns_mode_ok
+ ns_global_same_cid_fails
+ ns_local_same_cid_ok
+ ns_global_local_same_cid_ok
+ ns_local_global_same_cid_ok
+ ns_diff_global_host_connect_to_global_vm_ok
+ ns_diff_global_host_connect_to_local_vm_fails
+ ns_diff_global_vm_connect_to_global_host_ok
+ ns_diff_global_vm_connect_to_local_host_fails
+ ns_diff_local_host_connect_to_local_vm_fails
+ ns_diff_local_vm_connect_to_local_host_fails
+ ns_diff_global_to_local_loopback_local_fails
+ ns_diff_local_to_global_loopback_fails
+ ns_diff_local_to_local_loopback_fails
+ ns_diff_global_to_global_loopback_ok
+ ns_same_local_loopback_ok
+ ns_same_local_host_connect_to_local_vm_ok
+ ns_same_local_vm_connect_to_local_host_ok
+ ns_delete_vm_ok
+ ns_delete_host_ok
+ ns_delete_both_ok
+)
readonly TEST_DESCS=(
+ # vm_server_host_client
"Run vsock_test in server mode on the VM and in client mode on the host."
+
+ # vm_client_host_server
"Run vsock_test in client mode on the VM and in server mode on the host."
+
+ # vm_loopback
"Run vsock_test using the loopback transport in the VM."
+
+ # ns_host_vsock_ns_mode_ok
+ "Check /proc/sys/net/vsock/ns_mode strings on the host."
+
+ # ns_host_vsock_child_ns_mode_ok
+ "Check /proc/sys/net/vsock/ns_mode is read-only and child_ns_mode is writable."
+
+ # ns_global_same_cid_fails
+ "Check QEMU fails to start two VMs with same CID in two different global namespaces."
+
+ # ns_local_same_cid_ok
+ "Check QEMU successfully starts two VMs with same CID in two different local namespaces."
+
+ # ns_global_local_same_cid_ok
+ "Check QEMU successfully starts one VM in a global ns and then another VM in a local ns with the same CID."
+
+ # ns_local_global_same_cid_ok
+ "Check QEMU successfully starts one VM in a local ns and then another VM in a global ns with the same CID."
+
+ # ns_diff_global_host_connect_to_global_vm_ok
+ "Run vsock_test client in global ns with server in VM in another global ns."
+
+ # ns_diff_global_host_connect_to_local_vm_fails
+ "Run socat to test a process in a global ns fails to connect to a VM in a local ns."
+
+ # ns_diff_global_vm_connect_to_global_host_ok
+ "Run vsock_test client in VM in a global ns with server in another global ns."
+
+ # ns_diff_global_vm_connect_to_local_host_fails
+ "Run socat to test a VM in a global ns fails to connect to a host process in a local ns."
+
+ # ns_diff_local_host_connect_to_local_vm_fails
+ "Run socat to test a host process in a local ns fails to connect to a VM in another local ns."
+
+ # ns_diff_local_vm_connect_to_local_host_fails
+ "Run socat to test a VM in a local ns fails to connect to a host process in another local ns."
+
+ # ns_diff_global_to_local_loopback_local_fails
+ "Run socat to test a loopback vsock in a global ns fails to connect to a vsock in a local ns."
+
+ # ns_diff_local_to_global_loopback_fails
+ "Run socat to test a loopback vsock in a local ns fails to connect to a vsock in a global ns."
+
+ # ns_diff_local_to_local_loopback_fails
+ "Run socat to test a loopback vsock in a local ns fails to connect to a vsock in another local ns."
+
+ # ns_diff_global_to_global_loopback_ok
+ "Run socat to test a loopback vsock in a global ns successfully connects to a vsock in another global ns."
+
+ # ns_same_local_loopback_ok
+ "Run socat to test a loopback vsock in a local ns successfully connects to a vsock in the same ns."
+
+ # ns_same_local_host_connect_to_local_vm_ok
+ "Run vsock_test client in a local ns with server in VM in same ns."
+
+ # ns_same_local_vm_connect_to_local_host_ok
+ "Run vsock_test client in VM in a local ns with server in same ns."
+
+ # ns_delete_vm_ok
+ "Check that deleting the VM's namespace does not break the socket connection"
+
+ # ns_delete_host_ok
+ "Check that deleting the host's namespace does not break the socket connection"
+
+ # ns_delete_both_ok
+ "Check that deleting the VM and host's namespaces does not break the socket connection"
)
-readonly USE_SHARED_VM=(vm_server_host_client vm_client_host_server vm_loopback)
+readonly USE_SHARED_VM=(
+ vm_server_host_client
+ vm_client_host_server
+ vm_loopback
+)
+readonly NS_MODES=("local" "global")
VERBOSE=0
@@ -71,7 +177,7 @@ usage() {
for ((i = 0; i < ${#TEST_NAMES[@]}; i++)); do
name=${TEST_NAMES[${i}]}
desc=${TEST_DESCS[${i}]}
- printf "\t%-35s%-35s\n" "${name}" "${desc}"
+ printf "\t%-55s%-35s\n" "${name}" "${desc}"
done
echo
@@ -103,13 +209,55 @@ check_result() {
fi
}
+add_namespaces() {
+ local orig_mode
+ orig_mode=$(cat /proc/sys/net/vsock/child_ns_mode)
+
+ for mode in "${NS_MODES[@]}"; do
+ echo "${mode}" > /proc/sys/net/vsock/child_ns_mode
+ ip netns add "${mode}0" 2>/dev/null
+ ip netns add "${mode}1" 2>/dev/null
+ done
+
+ echo "${orig_mode}" > /proc/sys/net/vsock/child_ns_mode
+}
+
+init_namespaces() {
+ for mode in "${NS_MODES[@]}"; do
+ # we need lo for qemu port forwarding
+ ip netns exec "${mode}0" ip link set dev lo up
+ ip netns exec "${mode}1" ip link set dev lo up
+ done
+}
+
+del_namespaces() {
+ for mode in "${NS_MODES[@]}"; do
+ ip netns del "${mode}0" &>/dev/null
+ ip netns del "${mode}1" &>/dev/null
+ log_host "removed ns ${mode}0"
+ log_host "removed ns ${mode}1"
+ done
+}
+
vm_ssh() {
- ssh -q -o UserKnownHostsFile=/dev/null -p ${SSH_HOST_PORT} localhost "$@"
+ local ns_exec
+
+ if [[ "${1}" == init_ns ]]; then
+ ns_exec=""
+ else
+ ns_exec="ip netns exec ${1}"
+ fi
+
+ shift
+
+ ${ns_exec} ssh -q -o UserKnownHostsFile=/dev/null -p "${SSH_HOST_PORT}" localhost "$@"
+
return $?
}
cleanup() {
terminate_pidfiles "${!PIDFILES[@]}"
+ del_namespaces
}
check_args() {
@@ -139,7 +287,7 @@ check_args() {
}
check_deps() {
- for dep in vng ${QEMU} busybox pkill ssh; do
+ for dep in vng ${QEMU} busybox pkill ssh ss socat; do
if [[ ! -x $(command -v "${dep}") ]]; then
echo -e "skip: dependency ${dep} not found!\n"
exit "${KSFT_SKIP}"
@@ -153,6 +301,20 @@ check_deps() {
fi
}
+check_netns() {
+ local tname=$1
+
+ # If the test requires NS support, check if NS support exists
+ # using /proc/self/ns
+ if [[ "${tname}" =~ ^ns_ ]] &&
+ [[ ! -e /proc/self/ns ]]; then
+ log_host "No NS support detected for test ${tname}"
+ return 1
+ fi
+
+ return 0
+}
+
check_vng() {
local tested_versions
local version
@@ -176,6 +338,20 @@ check_vng() {
fi
}
+check_socat() {
+ local support_string
+
+ support_string="$(socat -V)"
+
+ if [[ "${support_string}" != *"WITH_VSOCK 1"* ]]; then
+ die "err: socat is missing vsock support"
+ fi
+
+ if [[ "${support_string}" != *"WITH_UNIX 1"* ]]; then
+ die "err: socat is missing unix support"
+ fi
+}
+
handle_build() {
if [[ ! "${BUILD}" -eq 1 ]]; then
return
@@ -224,12 +400,22 @@ terminate_pidfiles() {
done
}
+terminate_pids() {
+ local pid
+
+ for pid in "$@"; do
+ kill -SIGTERM "${pid}" &>/dev/null || :
+ done
+}
+
vm_start() {
local pidfile=$1
+ local ns=$2
local logfile=/dev/null
local verbose_opt=""
local kernel_opt=""
local qemu_opts=""
+ local ns_exec=""
local qemu
qemu=$(command -v "${QEMU}")
@@ -250,7 +436,11 @@ vm_start() {
kernel_opt="${KERNEL_CHECKOUT}"
fi
- vng \
+ if [[ "${ns}" != "init_ns" ]]; then
+ ns_exec="ip netns exec ${ns}"
+ fi
+
+ ${ns_exec} vng \
--run \
${kernel_opt} \
${verbose_opt} \
@@ -265,6 +455,7 @@ vm_start() {
}
vm_wait_for_ssh() {
+ local ns=$1
local i
i=0
@@ -272,7 +463,8 @@ vm_wait_for_ssh() {
if [[ ${i} -gt ${WAIT_PERIOD_MAX} ]]; then
die "Timed out waiting for guest ssh"
fi
- if vm_ssh -- true; then
+
+ if vm_ssh "${ns}" -- true; then
break
fi
i=$(( i + 1 ))
@@ -286,50 +478,107 @@ wait_for_listener()
local port=$1
local interval=$2
local max_intervals=$3
- local protocol=tcp
- local pattern
+ local protocol=$4
local i
- pattern=":$(printf "%04X" "${port}") "
-
- # for tcp protocol additionally check the socket state
- [ "${protocol}" = "tcp" ] && pattern="${pattern}0A"
-
for i in $(seq "${max_intervals}"); do
- if awk -v pattern="${pattern}" \
- 'BEGIN {rc=1} $2" "$4 ~ pattern {rc=0} END {exit rc}' \
- /proc/net/"${protocol}"*; then
+ case "${protocol}" in
+ tcp)
+ if ss --listening --tcp --numeric | grep -q ":${port} "; then
+ break
+ fi
+ ;;
+ vsock)
+ if ss --listening --vsock --numeric | grep -q ":${port} "; then
+ break
+ fi
+ ;;
+ unix)
+ # For unix sockets, port is actually the socket path
+ if ss --listening --unix | grep -q "${port}"; then
+ break
+ fi
+ ;;
+ *)
+ echo "Unknown protocol: ${protocol}" >&2
break
- fi
+ ;;
+ esac
sleep "${interval}"
done
}
vm_wait_for_listener() {
- local port=$1
+ local ns=$1
+ local port=$2
+ local protocol=$3
- vm_ssh <<EOF
+ vm_ssh "${ns}" <<EOF
$(declare -f wait_for_listener)
-wait_for_listener ${port} ${WAIT_PERIOD} ${WAIT_PERIOD_MAX}
+wait_for_listener ${port} ${WAIT_PERIOD} ${WAIT_PERIOD_MAX} ${protocol}
EOF
}
host_wait_for_listener() {
- local port=$1
+ local ns=$1
+ local port=$2
+ local protocol=$3
+
+ if [[ "${ns}" == "init_ns" ]]; then
+ wait_for_listener "${port}" "${WAIT_PERIOD}" "${WAIT_PERIOD_MAX}" "${protocol}"
+ else
+ ip netns exec "${ns}" bash <<-EOF
+ $(declare -f wait_for_listener)
+ wait_for_listener ${port} ${WAIT_PERIOD} ${WAIT_PERIOD_MAX} ${protocol}
+ EOF
+ fi
+}
+
+vm_dmesg_oops_count() {
+ local ns=$1
+
+ vm_ssh "${ns}" -- dmesg 2>/dev/null | grep -c -i 'Oops'
+}
+
+vm_dmesg_warn_count() {
+ local ns=$1
+
+ vm_ssh "${ns}" -- dmesg --level=warn 2>/dev/null | grep -c -i 'vsock'
+}
+
+vm_dmesg_check() {
+ local pidfile=$1
+ local ns=$2
+ local oops_before=$3
+ local warn_before=$4
+ local oops_after warn_after
+
+ oops_after=$(vm_dmesg_oops_count "${ns}")
+ if [[ "${oops_after}" -gt "${oops_before}" ]]; then
+ echo "FAIL: kernel oops detected on vm in ns ${ns}" | log_host
+ return 1
+ fi
+
+ warn_after=$(vm_dmesg_warn_count "${ns}")
+ if [[ "${warn_after}" -gt "${warn_before}" ]]; then
+ echo "FAIL: kernel warning detected on vm in ns ${ns}" | log_host
+ return 1
+ fi
- wait_for_listener "${port}" "${WAIT_PERIOD}" "${WAIT_PERIOD_MAX}"
+ return 0
}
vm_vsock_test() {
- local host=$1
- local cid=$2
- local port=$3
+ local ns=$1
+ local host=$2
+ local cid=$3
+ local port=$4
local rc
# log output and use pipefail to respect vsock_test errors
set -o pipefail
if [[ "${host}" != server ]]; then
- vm_ssh -- "${VSOCK_TEST}" \
+ vm_ssh "${ns}" -- "${VSOCK_TEST}" \
--mode=client \
--control-host="${host}" \
--peer-cid="${cid}" \
@@ -337,7 +586,7 @@ vm_vsock_test() {
2>&1 | log_guest
rc=$?
else
- vm_ssh -- "${VSOCK_TEST}" \
+ vm_ssh "${ns}" -- "${VSOCK_TEST}" \
--mode=server \
--peer-cid="${cid}" \
--control-port="${port}" \
@@ -349,7 +598,7 @@ vm_vsock_test() {
return $rc
fi
- vm_wait_for_listener "${port}"
+ vm_wait_for_listener "${ns}" "${port}" "tcp"
rc=$?
fi
set +o pipefail
@@ -358,25 +607,35 @@ vm_vsock_test() {
}
host_vsock_test() {
- local host=$1
- local cid=$2
- local port=$3
+ local ns=$1
+ local host=$2
+ local cid=$3
+ local port=$4
+ shift 4
+ local extra_args=("$@")
local rc
+ local cmd="${VSOCK_TEST}"
+ if [[ "${ns}" != "init_ns" ]]; then
+ cmd="ip netns exec ${ns} ${cmd}"
+ fi
+
# log output and use pipefail to respect vsock_test errors
set -o pipefail
if [[ "${host}" != server ]]; then
- ${VSOCK_TEST} \
+ ${cmd} \
--mode=client \
--peer-cid="${cid}" \
--control-host="${host}" \
- --control-port="${port}" 2>&1 | log_host
+ --control-port="${port}" \
+ "${extra_args[@]}" 2>&1 | log_host
rc=$?
else
- ${VSOCK_TEST} \
+ ${cmd} \
--mode=server \
--peer-cid="${cid}" \
- --control-port="${port}" 2>&1 | log_host &
+ --control-port="${port}" \
+ "${extra_args[@]}" 2>&1 | log_host &
rc=$?
if [[ $rc -ne 0 ]]; then
@@ -384,7 +643,7 @@ host_vsock_test() {
return $rc
fi
- host_wait_for_listener "${port}"
+ host_wait_for_listener "${ns}" "${port}" "tcp"
rc=$?
fi
set +o pipefail
@@ -427,12 +686,584 @@ log_guest() {
LOG_PREFIX=guest log "$@"
}
+ns_get_mode() {
+ local ns=$1
+
+ ip netns exec "${ns}" cat /proc/sys/net/vsock/ns_mode 2>/dev/null
+}
+
+test_ns_host_vsock_ns_mode_ok() {
+ for mode in "${NS_MODES[@]}"; do
+ local actual
+
+ actual=$(ns_get_mode "${mode}0")
+ if [[ "${actual}" != "${mode}" ]]; then
+ log_host "expected mode ${mode}, got ${actual}"
+ return "${KSFT_FAIL}"
+ fi
+ done
+
+ return "${KSFT_PASS}"
+}
+
+test_ns_diff_global_host_connect_to_global_vm_ok() {
+ local oops_before warn_before
+ local pids pid pidfile
+ local ns0 ns1 port
+ declare -a pids
+ local unixfile
+ ns0="global0"
+ ns1="global1"
+ port=1234
+ local rc
+
+ init_namespaces
+
+ pidfile="$(create_pidfile)"
+
+ if ! vm_start "${pidfile}" "${ns0}"; then
+ return "${KSFT_FAIL}"
+ fi
+
+ vm_wait_for_ssh "${ns0}"
+ oops_before=$(vm_dmesg_oops_count "${ns0}")
+ warn_before=$(vm_dmesg_warn_count "${ns0}")
+
+ unixfile=$(mktemp -u /tmp/XXXX.sock)
+ ip netns exec "${ns1}" \
+ socat TCP-LISTEN:"${TEST_HOST_PORT}",fork \
+ UNIX-CONNECT:"${unixfile}" &
+ pids+=($!)
+ host_wait_for_listener "${ns1}" "${TEST_HOST_PORT}" "tcp"
+
+ ip netns exec "${ns0}" socat UNIX-LISTEN:"${unixfile}",fork \
+ TCP-CONNECT:localhost:"${TEST_HOST_PORT}" &
+ pids+=($!)
+ host_wait_for_listener "${ns0}" "${unixfile}" "unix"
+
+ vm_vsock_test "${ns0}" "server" 2 "${TEST_GUEST_PORT}"
+ vm_wait_for_listener "${ns0}" "${TEST_GUEST_PORT}" "tcp"
+ host_vsock_test "${ns1}" "127.0.0.1" "${VSOCK_CID}" "${TEST_HOST_PORT}"
+ rc=$?
+
+ vm_dmesg_check "${pidfile}" "${ns0}" "${oops_before}" "${warn_before}"
+ dmesg_rc=$?
+
+ terminate_pids "${pids[@]}"
+ terminate_pidfiles "${pidfile}"
+
+ if [[ "${rc}" -ne 0 ]] || [[ "${dmesg_rc}" -ne 0 ]]; then
+ return "${KSFT_FAIL}"
+ fi
+
+ return "${KSFT_PASS}"
+}
+
+test_ns_diff_global_host_connect_to_local_vm_fails() {
+ local oops_before warn_before
+ local ns0="global0"
+ local ns1="local0"
+ local port=12345
+ local dmesg_rc
+ local pidfile
+ local result
+ local pid
+
+ init_namespaces
+
+ outfile=$(mktemp)
+
+ pidfile="$(create_pidfile)"
+ if ! vm_start "${pidfile}" "${ns1}"; then
+ log_host "failed to start vm (cid=${VSOCK_CID}, ns=${ns0})"
+ return "${KSFT_FAIL}"
+ fi
+
+ vm_wait_for_ssh "${ns1}"
+ oops_before=$(vm_dmesg_oops_count "${ns1}")
+ warn_before=$(vm_dmesg_warn_count "${ns1}")
+
+ vm_ssh "${ns1}" -- socat VSOCK-LISTEN:"${port}" STDOUT > "${outfile}" &
+ vm_wait_for_listener "${ns1}" "${port}" "vsock"
+ echo TEST | ip netns exec "${ns0}" \
+ socat STDIN VSOCK-CONNECT:"${VSOCK_CID}":"${port}" 2>/dev/null
+
+ vm_dmesg_check "${pidfile}" "${ns1}" "${oops_before}" "${warn_before}"
+ dmesg_rc=$?
+
+ terminate_pidfiles "${pidfile}"
+ result=$(cat "${outfile}")
+ rm -f "${outfile}"
+
+ if [[ "${result}" == "TEST" ]] || [[ "${dmesg_rc}" -ne 0 ]]; then
+ return "${KSFT_FAIL}"
+ fi
+
+ return "${KSFT_PASS}"
+}
+
+test_ns_diff_global_vm_connect_to_global_host_ok() {
+ local oops_before warn_before
+ local ns0="global0"
+ local ns1="global1"
+ local port=12345
+ local unixfile
+ local dmesg_rc
+ local pidfile
+ local pids
+ local rc
+
+ init_namespaces
+
+ declare -a pids
+
+ log_host "Setup socat bridge from ns ${ns0} to ns ${ns1} over port ${port}"
+
+ unixfile=$(mktemp -u /tmp/XXXX.sock)
+
+ ip netns exec "${ns0}" \
+ socat TCP-LISTEN:"${port}" UNIX-CONNECT:"${unixfile}" &
+ pids+=($!)
+ host_wait_for_listener "${ns0}" "${port}" "tcp"
+
+ ip netns exec "${ns1}" \
+ socat UNIX-LISTEN:"${unixfile}" TCP-CONNECT:127.0.0.1:"${port}" &
+ pids+=($!)
+ host_wait_for_listener "${ns1}" "${unixfile}" "unix"
+
+ log_host "Launching ${VSOCK_TEST} in ns ${ns1}"
+ host_vsock_test "${ns1}" "server" "${VSOCK_CID}" "${port}"
+
+ pidfile="$(create_pidfile)"
+ if ! vm_start "${pidfile}" "${ns0}"; then
+ log_host "failed to start vm (cid=${cid}, ns=${ns0})"
+ terminate_pids "${pids[@]}"
+ rm -f "${unixfile}"
+ return "${KSFT_FAIL}"
+ fi
+
+ vm_wait_for_ssh "${ns0}"
+
+ oops_before=$(vm_dmesg_oops_count "${ns0}")
+ warn_before=$(vm_dmesg_warn_count "${ns0}")
+
+ vm_vsock_test "${ns0}" "10.0.2.2" 2 "${port}"
+ rc=$?
+
+ vm_dmesg_check "${pidfile}" "${ns0}" "${oops_before}" "${warn_before}"
+ dmesg_rc=$?
+
+ terminate_pidfiles "${pidfile}"
+ terminate_pids "${pids[@]}"
+ rm -f "${unixfile}"
+
+ if [[ "${rc}" -ne 0 ]] || [[ "${dmesg_rc}" -ne 0 ]]; then
+ return "${KSFT_FAIL}"
+ fi
+
+ return "${KSFT_PASS}"
+
+}
+
+test_ns_diff_global_vm_connect_to_local_host_fails() {
+ local ns0="global0"
+ local ns1="local0"
+ local port=12345
+ local oops_before warn_before
+ local dmesg_rc
+ local pidfile
+ local result
+ local pid
+
+ init_namespaces
+
+ log_host "Launching socat in ns ${ns1}"
+ outfile=$(mktemp)
+
+ ip netns exec "${ns1}" socat VSOCK-LISTEN:"${port}" STDOUT &> "${outfile}" &
+ pid=$!
+ host_wait_for_listener "${ns1}" "${port}" "vsock"
+
+ pidfile="$(create_pidfile)"
+ if ! vm_start "${pidfile}" "${ns0}"; then
+ log_host "failed to start vm (cid=${cid}, ns=${ns0})"
+ terminate_pids "${pid}"
+ rm -f "${outfile}"
+ return "${KSFT_FAIL}"
+ fi
+
+ vm_wait_for_ssh "${ns0}"
+
+ oops_before=$(vm_dmesg_oops_count "${ns0}")
+ warn_before=$(vm_dmesg_warn_count "${ns0}")
+
+ vm_ssh "${ns0}" -- \
+ bash -c "echo TEST | socat STDIN VSOCK-CONNECT:2:${port}" 2>&1 | log_guest
+
+ vm_dmesg_check "${pidfile}" "${ns0}" "${oops_before}" "${warn_before}"
+ dmesg_rc=$?
+
+ terminate_pidfiles "${pidfile}"
+ terminate_pids "${pid}"
+
+ result=$(cat "${outfile}")
+ rm -f "${outfile}"
+
+ if [[ "${result}" != TEST ]] && [[ "${dmesg_rc}" -eq 0 ]]; then
+ return "${KSFT_PASS}"
+ fi
+
+ return "${KSFT_FAIL}"
+}
+
+test_ns_diff_local_host_connect_to_local_vm_fails() {
+ local ns0="local0"
+ local ns1="local1"
+ local port=12345
+ local oops_before warn_before
+ local dmesg_rc
+ local pidfile
+ local result
+ local pid
+
+ init_namespaces
+
+ outfile=$(mktemp)
+
+ pidfile="$(create_pidfile)"
+ if ! vm_start "${pidfile}" "${ns1}"; then
+ log_host "failed to start vm (cid=${cid}, ns=${ns0})"
+ return "${KSFT_FAIL}"
+ fi
+
+ vm_wait_for_ssh "${ns1}"
+ oops_before=$(vm_dmesg_oops_count "${ns1}")
+ warn_before=$(vm_dmesg_warn_count "${ns1}")
+
+ vm_ssh "${ns1}" -- socat VSOCK-LISTEN:"${port}" STDOUT > "${outfile}" &
+ vm_wait_for_listener "${ns1}" "${port}" "vsock"
+
+ echo TEST | ip netns exec "${ns0}" \
+ socat STDIN VSOCK-CONNECT:"${VSOCK_CID}":"${port}" 2>/dev/null
+
+ vm_dmesg_check "${pidfile}" "${ns1}" "${oops_before}" "${warn_before}"
+ dmesg_rc=$?
+
+ terminate_pidfiles "${pidfile}"
+
+ result=$(cat "${outfile}")
+ rm -f "${outfile}"
+
+ if [[ "${result}" != TEST ]] && [[ "${dmesg_rc}" -eq 0 ]]; then
+ return "${KSFT_PASS}"
+ fi
+
+ return "${KSFT_FAIL}"
+}
+
+test_ns_diff_local_vm_connect_to_local_host_fails() {
+ local oops_before warn_before
+ local ns0="local0"
+ local ns1="local1"
+ local port=12345
+ local dmesg_rc
+ local pidfile
+ local result
+ local pid
+
+ init_namespaces
+
+ log_host "Launching socat in ns ${ns1}"
+ outfile=$(mktemp)
+ ip netns exec "${ns1}" socat VSOCK-LISTEN:"${port}" STDOUT &> "${outfile}" &
+ pid=$!
+ host_wait_for_listener "${ns1}" "${port}" "vsock"
+
+ pidfile="$(create_pidfile)"
+ if ! vm_start "${pidfile}" "${ns0}"; then
+ log_host "failed to start vm (cid=${cid}, ns=${ns0})"
+ rm -f "${outfile}"
+ return "${KSFT_FAIL}"
+ fi
+
+ vm_wait_for_ssh "${ns0}"
+ oops_before=$(vm_dmesg_oops_count "${ns0}")
+ warn_before=$(vm_dmesg_warn_count "${ns0}")
+
+ vm_ssh "${ns0}" -- \
+ bash -c "echo TEST | socat STDIN VSOCK-CONNECT:2:${port}" 2>&1 | log_guest
+
+ vm_dmesg_check "${pidfile}" "${ns0}" "${oops_before}" "${warn_before}"
+ dmesg_rc=$?
+
+ terminate_pidfiles "${pidfile}"
+ terminate_pids "${pid}"
+
+ result=$(cat "${outfile}")
+ rm -f "${outfile}"
+
+ if [[ "${result}" != TEST ]] && [[ "${dmesg_rc}" -eq 0 ]]; then
+ return "${KSFT_PASS}"
+ fi
+
+ return "${KSFT_FAIL}"
+}
+
+__test_loopback_two_netns() {
+ local ns0=$1
+ local ns1=$2
+ local port=12345
+ local result
+ local pid
+
+ modprobe vsock_loopback &> /dev/null || :
+
+ log_host "Launching socat in ns ${ns1}"
+ outfile=$(mktemp)
+
+ ip netns exec "${ns1}" socat VSOCK-LISTEN:"${port}" STDOUT > "${outfile}" 2>/dev/null &
+ pid=$!
+ host_wait_for_listener "${ns1}" "${port}" "vsock"
+
+ log_host "Launching socat in ns ${ns0}"
+ echo TEST | ip netns exec "${ns0}" socat STDIN VSOCK-CONNECT:1:"${port}" 2>/dev/null
+ terminate_pids "${pid}"
+
+ result=$(cat "${outfile}")
+ rm -f "${outfile}"
+
+ if [[ "${result}" == TEST ]]; then
+ return 0
+ fi
+
+ return 1
+}
+
+test_ns_diff_global_to_local_loopback_local_fails() {
+ init_namespaces
+
+ if ! __test_loopback_two_netns "global0" "local0"; then
+ return "${KSFT_PASS}"
+ fi
+
+ return "${KSFT_FAIL}"
+}
+
+test_ns_diff_local_to_global_loopback_fails() {
+ init_namespaces
+
+ if ! __test_loopback_two_netns "local0" "global0"; then
+ return "${KSFT_PASS}"
+ fi
+
+ return "${KSFT_FAIL}"
+}
+
+test_ns_diff_local_to_local_loopback_fails() {
+ init_namespaces
+
+ if ! __test_loopback_two_netns "local0" "local1"; then
+ return "${KSFT_PASS}"
+ fi
+
+ return "${KSFT_FAIL}"
+}
+
+test_ns_diff_global_to_global_loopback_ok() {
+ init_namespaces
+
+ if __test_loopback_two_netns "global0" "global1"; then
+ return "${KSFT_PASS}"
+ fi
+
+ return "${KSFT_FAIL}"
+}
+
+test_ns_same_local_loopback_ok() {
+ init_namespaces
+
+ if __test_loopback_two_netns "local0" "local0"; then
+ return "${KSFT_PASS}"
+ fi
+
+ return "${KSFT_FAIL}"
+}
+
+test_ns_same_local_host_connect_to_local_vm_ok() {
+ local oops_before warn_before
+ local ns="local0"
+ local port=1234
+ local dmesg_rc
+ local pidfile
+ local rc
+
+ init_namespaces
+
+ pidfile="$(create_pidfile)"
+
+ if ! vm_start "${pidfile}" "${ns}"; then
+ return "${KSFT_FAIL}"
+ fi
+
+ vm_wait_for_ssh "${ns}"
+ oops_before=$(vm_dmesg_oops_count "${ns}")
+ warn_before=$(vm_dmesg_warn_count "${ns}")
+
+ vm_vsock_test "${ns}" "server" 2 "${TEST_GUEST_PORT}"
+
+ # Skip test 29 (transport release use-after-free): This test attempts
+ # binding both G2H and H2G CIDs. Because virtio-vsock (G2H) doesn't
+ # support local namespaces the test will fail when
+ # transport_g2h->stream_allow() returns false. This edge case only
+ # happens for vsock_test in client mode on the host in a local
+ # namespace. This is a false positive.
+ host_vsock_test "${ns}" "127.0.0.1" "${VSOCK_CID}" "${TEST_HOST_PORT}" --skip=29
+ rc=$?
+
+ vm_dmesg_check "${pidfile}" "${ns}" "${oops_before}" "${warn_before}"
+ dmesg_rc=$?
+
+ terminate_pidfiles "${pidfile}"
+
+ if [[ "${rc}" -ne 0 ]] || [[ "${dmesg_rc}" -ne 0 ]]; then
+ return "${KSFT_FAIL}"
+ fi
+
+ return "${KSFT_PASS}"
+}
+
+test_ns_same_local_vm_connect_to_local_host_ok() {
+ local oops_before warn_before
+ local ns="local0"
+ local port=1234
+ local dmesg_rc
+ local pidfile
+ local rc
+
+ init_namespaces
+
+ pidfile="$(create_pidfile)"
+
+ if ! vm_start "${pidfile}" "${ns}"; then
+ return "${KSFT_FAIL}"
+ fi
+
+ vm_wait_for_ssh "${ns}"
+ oops_before=$(vm_dmesg_oops_count "${ns}")
+ warn_before=$(vm_dmesg_warn_count "${ns}")
+
+ host_vsock_test "${ns}" "server" "${VSOCK_CID}" "${port}"
+ vm_vsock_test "${ns}" "10.0.2.2" 2 "${port}"
+ rc=$?
+
+ vm_dmesg_check "${pidfile}" "${ns}" "${oops_before}" "${warn_before}"
+ dmesg_rc=$?
+
+ terminate_pidfiles "${pidfile}"
+
+ if [[ "${rc}" -ne 0 ]] || [[ "${dmesg_rc}" -ne 0 ]]; then
+ return "${KSFT_FAIL}"
+ fi
+
+ return "${KSFT_PASS}"
+}
+
+namespaces_can_boot_same_cid() {
+ local ns0=$1
+ local ns1=$2
+ local pidfile1 pidfile2
+ local rc
+
+ pidfile1="$(create_pidfile)"
+
+ # The first VM should be able to start. If it can't then we have
+ # problems and need to return non-zero.
+ if ! vm_start "${pidfile1}" "${ns0}"; then
+ return 1
+ fi
+
+ pidfile2="$(create_pidfile)"
+ vm_start "${pidfile2}" "${ns1}"
+ rc=$?
+ terminate_pidfiles "${pidfile1}" "${pidfile2}"
+
+ return "${rc}"
+}
+
+test_ns_global_same_cid_fails() {
+ init_namespaces
+
+ if namespaces_can_boot_same_cid "global0" "global1"; then
+ return "${KSFT_FAIL}"
+ fi
+
+ return "${KSFT_PASS}"
+}
+
+test_ns_local_global_same_cid_ok() {
+ init_namespaces
+
+ if namespaces_can_boot_same_cid "local0" "global0"; then
+ return "${KSFT_PASS}"
+ fi
+
+ return "${KSFT_FAIL}"
+}
+
+test_ns_global_local_same_cid_ok() {
+ init_namespaces
+
+ if namespaces_can_boot_same_cid "global0" "local0"; then
+ return "${KSFT_PASS}"
+ fi
+
+ return "${KSFT_FAIL}"
+}
+
+test_ns_local_same_cid_ok() {
+ init_namespaces
+
+ if namespaces_can_boot_same_cid "local0" "local1"; then
+ return "${KSFT_PASS}"
+ fi
+
+ return "${KSFT_FAIL}"
+}
+
+test_ns_host_vsock_child_ns_mode_ok() {
+ local orig_mode
+ local rc
+
+ orig_mode=$(cat /proc/sys/net/vsock/child_ns_mode)
+
+ rc="${KSFT_PASS}"
+ for mode in "${NS_MODES[@]}"; do
+ local ns="${mode}0"
+
+ if echo "${mode}" 2>/dev/null > /proc/sys/net/vsock/ns_mode; then
+ log_host "ns_mode should be read-only but write succeeded"
+ rc="${KSFT_FAIL}"
+ continue
+ fi
+
+ if ! echo "${mode}" > /proc/sys/net/vsock/child_ns_mode; then
+ log_host "child_ns_mode should be writable to ${mode}"
+ rc="${KSFT_FAIL}"
+ continue
+ fi
+ done
+
+ echo "${orig_mode}" > /proc/sys/net/vsock/child_ns_mode
+
+ return "${rc}"
+}
+
test_vm_server_host_client() {
- if ! vm_vsock_test "server" 2 "${TEST_GUEST_PORT}"; then
+ if ! vm_vsock_test "init_ns" "server" 2 "${TEST_GUEST_PORT}"; then
return "${KSFT_FAIL}"
fi
- if ! host_vsock_test "127.0.0.1" "${VSOCK_CID}" "${TEST_HOST_PORT}"; then
+ if ! host_vsock_test "init_ns" "127.0.0.1" "${VSOCK_CID}" "${TEST_HOST_PORT}"; then
return "${KSFT_FAIL}"
fi
@@ -440,11 +1271,11 @@ test_vm_server_host_client() {
}
test_vm_client_host_server() {
- if ! host_vsock_test "server" "${VSOCK_CID}" "${TEST_HOST_PORT_LISTENER}"; then
+ if ! host_vsock_test "init_ns" "server" "${VSOCK_CID}" "${TEST_HOST_PORT_LISTENER}"; then
return "${KSFT_FAIL}"
fi
- if ! vm_vsock_test "10.0.2.2" 2 "${TEST_HOST_PORT_LISTENER}"; then
+ if ! vm_vsock_test "init_ns" "10.0.2.2" 2 "${TEST_HOST_PORT_LISTENER}"; then
return "${KSFT_FAIL}"
fi
@@ -454,19 +1285,92 @@ test_vm_client_host_server() {
test_vm_loopback() {
local port=60000 # non-forwarded local port
- vm_ssh -- modprobe vsock_loopback &> /dev/null || :
+ vm_ssh "init_ns" -- modprobe vsock_loopback &> /dev/null || :
- if ! vm_vsock_test "server" 1 "${port}"; then
+ if ! vm_vsock_test "init_ns" "server" 1 "${port}"; then
return "${KSFT_FAIL}"
fi
- if ! vm_vsock_test "127.0.0.1" 1 "${port}"; then
+
+ if ! vm_vsock_test "init_ns" "127.0.0.1" 1 "${port}"; then
return "${KSFT_FAIL}"
fi
return "${KSFT_PASS}"
}
+check_ns_delete_doesnt_break_connection() {
+ local pipefile pidfile outfile
+ local ns0="global0"
+ local ns1="global1"
+ local port=12345
+ local pids=()
+ local rc=0
+
+ init_namespaces
+
+ pidfile="$(create_pidfile)"
+ if ! vm_start "${pidfile}" "${ns0}"; then
+ return "${KSFT_FAIL}"
+ fi
+ vm_wait_for_ssh "${ns0}"
+
+ outfile=$(mktemp)
+ vm_ssh "${ns0}" -- \
+ socat VSOCK-LISTEN:"${port}",fork STDOUT > "${outfile}" 2>/dev/null &
+ pids+=($!)
+ vm_wait_for_listener "${ns0}" "${port}" "vsock"
+
+ # We use a pipe here so that we can echo into the pipe instead of using
+ # socat and a unix socket file. We just need a name for the pipe (not a
+ # regular file) so use -u.
+ pipefile=$(mktemp -u /tmp/vmtest_pipe_XXXX)
+ ip netns exec "${ns1}" \
+ socat PIPE:"${pipefile}" VSOCK-CONNECT:"${VSOCK_CID}":"${port}" &
+ pids+=($!)
+
+ timeout "${WAIT_PERIOD}" \
+ bash -c 'while [[ ! -e '"${pipefile}"' ]]; do sleep 1; done; exit 0'
+
+ if [[ "$1" == "vm" ]]; then
+ ip netns del "${ns0}"
+ elif [[ "$1" == "host" ]]; then
+ ip netns del "${ns1}"
+ elif [[ "$1" == "both" ]]; then
+ ip netns del "${ns0}"
+ ip netns del "${ns1}"
+ fi
+
+ echo "TEST" > "${pipefile}"
+
+ timeout "${WAIT_PERIOD}" \
+ bash -c 'while [[ ! -s '"${outfile}"' ]]; do sleep 1; done; exit 0'
+
+ if grep -q "TEST" "${outfile}"; then
+ rc="${KSFT_PASS}"
+ else
+ rc="${KSFT_FAIL}"
+ fi
+
+ terminate_pidfiles "${pidfile}"
+ terminate_pids "${pids[@]}"
+ rm -f "${outfile}" "${pipefile}"
+
+ return "${rc}"
+}
+
+test_ns_delete_vm_ok() {
+ check_ns_delete_doesnt_break_connection "vm"
+}
+
+test_ns_delete_host_ok() {
+ check_ns_delete_doesnt_break_connection "host"
+}
+
+test_ns_delete_both_ok() {
+ check_ns_delete_doesnt_break_connection "both"
+}
+
shared_vm_test() {
local tname
@@ -499,6 +1403,11 @@ run_shared_vm_tests() {
continue
fi
+ if ! check_netns "${arg}"; then
+ check_result "${KSFT_SKIP}" "${arg}"
+ continue
+ fi
+
run_shared_vm_test "${arg}"
check_result "$?" "${arg}"
done
@@ -518,8 +1427,8 @@ run_shared_vm_test() {
host_oops_cnt_before=$(dmesg | grep -c -i 'Oops')
host_warn_cnt_before=$(dmesg --level=warn | grep -c -i 'vsock')
- vm_oops_cnt_before=$(vm_ssh -- dmesg | grep -c -i 'Oops')
- vm_warn_cnt_before=$(vm_ssh -- dmesg --level=warn | grep -c -i 'vsock')
+ vm_oops_cnt_before=$(vm_dmesg_oops_count "init_ns")
+ vm_warn_cnt_before=$(vm_dmesg_warn_count "init_ns")
name=$(echo "${1}" | awk '{ print $1 }')
eval test_"${name}"
@@ -537,13 +1446,13 @@ run_shared_vm_test() {
rc=$KSFT_FAIL
fi
- vm_oops_cnt_after=$(vm_ssh -- dmesg | grep -i 'Oops' | wc -l)
+ vm_oops_cnt_after=$(vm_dmesg_oops_count "init_ns")
if [[ ${vm_oops_cnt_after} -gt ${vm_oops_cnt_before} ]]; then
echo "FAIL: kernel oops detected on vm" | log_host
rc=$KSFT_FAIL
fi
- vm_warn_cnt_after=$(vm_ssh -- dmesg --level=warn | grep -c -i 'vsock')
+ vm_warn_cnt_after=$(vm_dmesg_warn_count "init_ns")
if [[ ${vm_warn_cnt_after} -gt ${vm_warn_cnt_before} ]]; then
echo "FAIL: kernel warning detected on vm" | log_host
rc=$KSFT_FAIL
@@ -552,6 +1461,49 @@ run_shared_vm_test() {
return "${rc}"
}
+run_ns_tests() {
+ for arg in "${ARGS[@]}"; do
+ if shared_vm_test "${arg}"; then
+ continue
+ fi
+
+ if ! check_netns "${arg}"; then
+ check_result "${KSFT_SKIP}" "${arg}"
+ continue
+ fi
+
+ add_namespaces
+
+ name=$(echo "${arg}" | awk '{ print $1 }')
+ log_host "Executing test_${name}"
+
+ host_oops_before=$(dmesg 2>/dev/null | grep -c -i 'Oops')
+ host_warn_before=$(dmesg --level=warn 2>/dev/null | grep -c -i 'vsock')
+ eval test_"${name}"
+ rc=$?
+
+ host_oops_after=$(dmesg 2>/dev/null | grep -c -i 'Oops')
+ if [[ "${host_oops_after}" -gt "${host_oops_before}" ]]; then
+ echo "FAIL: kernel oops detected on host" | log_host
+ check_result "${KSFT_FAIL}" "${name}"
+ del_namespaces
+ continue
+ fi
+
+ host_warn_after=$(dmesg --level=warn 2>/dev/null | grep -c -i 'vsock')
+ if [[ "${host_warn_after}" -gt "${host_warn_before}" ]]; then
+ echo "FAIL: kernel warning detected on host" | log_host
+ check_result "${KSFT_FAIL}" "${name}"
+ del_namespaces
+ continue
+ fi
+
+ check_result "${rc}" "${name}"
+
+ del_namespaces
+ done
+}
+
BUILD=0
QEMU="qemu-system-$(uname -m)"
@@ -577,6 +1529,7 @@ fi
check_args "${ARGS[@]}"
check_deps
check_vng
+check_socat
handle_build
echo "1..${#ARGS[@]}"
@@ -589,14 +1542,16 @@ cnt_total=0
if shared_vm_tests_requested "${ARGS[@]}"; then
log_host "Booting up VM"
pidfile="$(create_pidfile)"
- vm_start "${pidfile}"
- vm_wait_for_ssh
+ vm_start "${pidfile}" "init_ns"
+ vm_wait_for_ssh "init_ns"
log_host "VM booted up"
run_shared_vm_tests "${ARGS[@]}"
terminate_pidfiles "${pidfile}"
fi
+run_ns_tests "${ARGS[@]}"
+
echo "SUMMARY: PASS=${cnt_pass} SKIP=${cnt_skip} FAIL=${cnt_fail}"
echo "Log: ${LOG}"