diff options
Diffstat (limited to 'tools')
132 files changed, 8251 insertions, 1161 deletions
diff --git a/tools/net/ynl/pyynl/cli.py b/tools/net/ynl/pyynl/cli.py index af02a5b7e5a2..94a5ba348b69 100755 --- a/tools/net/ynl/pyynl/cli.py +++ b/tools/net/ynl/pyynl/cli.py @@ -1,43 +1,85 @@ #!/usr/bin/env python3 # SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause +""" +YNL cli tool +""" + import argparse import json import os import pathlib import pprint +import shutil import sys import textwrap +# pylint: disable=no-name-in-module,wrong-import-position sys.path.append(pathlib.Path(__file__).resolve().parent.as_posix()) -from lib import YnlFamily, Netlink, NlError, SpecFamily +from lib import YnlFamily, Netlink, NlError, SpecFamily, SpecException, YnlException + +SYS_SCHEMA_DIR='/usr/share/ynl' +RELATIVE_SCHEMA_DIR='../../../../Documentation/netlink' + +# pylint: disable=too-few-public-methods,too-many-locals +class Colors: + """ANSI color and font modifier codes""" + RESET = '\033[0m' + + BOLD = '\033[1m' + ITALICS = '\033[3m' + UNDERLINE = '\033[4m' + INVERT = '\033[7m' + + +def color(text, modifiers): + """Add color to text if output is a TTY -sys_schema_dir='/usr/share/ynl' -relative_schema_dir='../../../../Documentation/netlink' + Returns: + Colored text if stdout is a TTY, otherwise plain text + """ + if sys.stdout.isatty(): + # Join the colors if they are a list, if it's a string this a noop + modifiers = "".join(modifiers) + return f"{modifiers}{text}{Colors.RESET}" + return text + +def term_width(): + """ Get terminal width in columns (80 if stdout is not a terminal) """ + return shutil.get_terminal_size().columns def schema_dir(): + """ + Return the effective schema directory, preferring in-tree before + system schema directory. + """ script_dir = os.path.dirname(os.path.abspath(__file__)) - schema_dir = os.path.abspath(f"{script_dir}/{relative_schema_dir}") - if not os.path.isdir(schema_dir): - schema_dir = sys_schema_dir - if not os.path.isdir(schema_dir): - raise Exception(f"Schema directory {schema_dir} does not exist") - return schema_dir + schema_dir_ = os.path.abspath(f"{script_dir}/{RELATIVE_SCHEMA_DIR}") + if not os.path.isdir(schema_dir_): + schema_dir_ = SYS_SCHEMA_DIR + if not os.path.isdir(schema_dir_): + raise YnlException(f"Schema directory {schema_dir_} does not exist") + return schema_dir_ def spec_dir(): - spec_dir = schema_dir() + '/specs' - if not os.path.isdir(spec_dir): - raise Exception(f"Spec directory {spec_dir} does not exist") - return spec_dir + """ + Return the effective spec directory, relative to the effective + schema directory. + """ + spec_dir_ = schema_dir() + '/specs' + if not os.path.isdir(spec_dir_): + raise YnlException(f"Spec directory {spec_dir_} does not exist") + return spec_dir_ class YnlEncoder(json.JSONEncoder): - def default(self, obj): - if isinstance(obj, bytes): - return bytes.hex(obj) - if isinstance(obj, set): - return list(obj) - return json.JSONEncoder.default(self, obj) + """A custom encoder for emitting JSON with ynl-specific instance types""" + def default(self, o): + if isinstance(o, bytes): + return bytes.hex(o) + if isinstance(o, set): + return list(o) + return json.JSONEncoder.default(self, o) def print_attr_list(ynl, attr_names, attr_set, indent=2): @@ -46,7 +88,7 @@ def print_attr_list(ynl, attr_names, attr_set, indent=2): for attr_name in attr_names: if attr_name in attr_set.attrs: attr = attr_set.attrs[attr_name] - attr_info = f'{prefix}- {attr_name}: {attr.type}' + attr_info = f'{prefix}- {color(attr_name, Colors.BOLD)}: {attr.type}' if 'enum' in attr.yaml: enum_name = attr.yaml['enum'] attr_info += f" (enum: {enum_name})" @@ -54,7 +96,8 @@ def print_attr_list(ynl, attr_names, attr_set, indent=2): if enum_name in ynl.consts: const = ynl.consts[enum_name] enum_values = list(const.entries.keys()) - attr_info += f"\n{prefix} {const.type.capitalize()}: {', '.join(enum_values)}" + type_fmted = color(const.type.capitalize(), Colors.ITALICS) + attr_info += f"\n{prefix} {type_fmted}: {', '.join(enum_values)}" # Show nested attributes reference and recursively display them nested_set_name = None @@ -63,7 +106,10 @@ def print_attr_list(ynl, attr_names, attr_set, indent=2): attr_info += f" -> {nested_set_name}" if attr.yaml.get('doc'): - doc_text = textwrap.indent(attr.yaml['doc'], prefix + ' ') + doc_prefix = prefix + ' ' * 4 + doc_text = textwrap.fill(attr.yaml['doc'], width=term_width(), + initial_indent=doc_prefix, + subsequent_indent=doc_prefix) attr_info += f"\n{doc_text}" print(attr_info) @@ -77,24 +123,62 @@ def print_attr_list(ynl, attr_names, attr_set, indent=2): print_attr_list(ynl, nested_names, nested_set, indent + 4) -def print_mode_attrs(ynl, mode, mode_spec, attr_set, print_request=True): +def print_mode_attrs(ynl, mode, mode_spec, attr_set, consistent_dd_reply=None): """Print a given mode (do/dump/event/notify).""" mode_title = mode.capitalize() - if print_request and 'request' in mode_spec and 'attributes' in mode_spec['request']: + if 'request' in mode_spec and 'attributes' in mode_spec['request']: print(f'\n{mode_title} request attributes:') print_attr_list(ynl, mode_spec['request']['attributes'], attr_set) if 'reply' in mode_spec and 'attributes' in mode_spec['reply']: - print(f'\n{mode_title} reply attributes:') - print_attr_list(ynl, mode_spec['reply']['attributes'], attr_set) + if consistent_dd_reply and mode == "do": + title = None # Dump handling will print in combined format + elif consistent_dd_reply and mode == "dump": + title = 'Do and Dump' + else: + title = f'{mode_title}' + if title: + print(f'\n{title} reply attributes:') + print_attr_list(ynl, mode_spec['reply']['attributes'], attr_set) + + +def do_doc(ynl, op): + """Handle --list-attrs $op, print the attr information to stdout""" + print(f'Operation: {color(op.name, Colors.BOLD)}') + print(op.yaml['doc']) + + consistent_dd_reply = False + if 'do' in op.yaml and 'dump' in op.yaml and 'reply' in op.yaml['do'] and \ + op.yaml['do']['reply'] == op.yaml['dump'].get('reply'): + consistent_dd_reply = True + + for mode in ['do', 'dump']: + if mode in op.yaml: + print_mode_attrs(ynl, mode, op.yaml[mode], op.attr_set, + consistent_dd_reply=consistent_dd_reply) + + if 'attributes' in op.yaml.get('event', {}): + print('\nEvent attributes:') + print_attr_list(ynl, op.yaml['event']['attributes'], op.attr_set) - if 'attributes' in mode_spec: - print(f'\n{mode_title} attributes:') - print_attr_list(ynl, mode_spec['attributes'], attr_set) + if 'notify' in op.yaml: + mode_spec = op.yaml['notify'] + ref_spec = ynl.msgs.get(mode_spec).yaml.get('do') + if not ref_spec: + ref_spec = ynl.msgs.get(mode_spec).yaml.get('dump') + if ref_spec: + print('\nNotification attributes:') + print_attr_list(ynl, ref_spec['reply']['attributes'], op.attr_set) + if 'mcgrp' in op.yaml: + print(f"\nMulticast group: {op.yaml['mcgrp']}") + +# pylint: disable=too-many-locals,too-many-branches,too-many-statements def main(): + """YNL cli tool""" + description = """ YNL CLI utility - a general purpose netlink utility that uses YAML specs to drive protocol encoding and decoding. @@ -105,54 +189,85 @@ def main(): """ parser = argparse.ArgumentParser(description=description, - epilog=epilog) - spec_group = parser.add_mutually_exclusive_group(required=True) - spec_group.add_argument('--family', dest='family', type=str, - help='name of the netlink FAMILY') - spec_group.add_argument('--list-families', action='store_true', - help='list all netlink families supported by YNL (has spec)') - spec_group.add_argument('--spec', dest='spec', type=str, - help='choose the family by SPEC file path') - - parser.add_argument('--schema', dest='schema', type=str) - parser.add_argument('--no-schema', action='store_true') - parser.add_argument('--json', dest='json_text', type=str) - - group = parser.add_mutually_exclusive_group() - group.add_argument('--do', dest='do', metavar='DO-OPERATION', type=str) - group.add_argument('--multi', dest='multi', nargs=2, action='append', - metavar=('DO-OPERATION', 'JSON_TEXT'), type=str) - group.add_argument('--dump', dest='dump', metavar='DUMP-OPERATION', type=str) - group.add_argument('--list-ops', action='store_true') - group.add_argument('--list-msgs', action='store_true') - group.add_argument('--list-attrs', dest='list_attrs', metavar='OPERATION', type=str, - help='List attributes for an operation') - group.add_argument('--validate', action='store_true') - - parser.add_argument('--duration', dest='duration', type=int, - help='when subscribed, watch for DURATION seconds') - parser.add_argument('--sleep', dest='duration', type=int, - help='alias for duration') - parser.add_argument('--subscribe', dest='ntf', type=str) - parser.add_argument('--replace', dest='flags', action='append_const', - const=Netlink.NLM_F_REPLACE) - parser.add_argument('--excl', dest='flags', action='append_const', - const=Netlink.NLM_F_EXCL) - parser.add_argument('--create', dest='flags', action='append_const', - const=Netlink.NLM_F_CREATE) - parser.add_argument('--append', dest='flags', action='append_const', - const=Netlink.NLM_F_APPEND) - parser.add_argument('--process-unknown', action=argparse.BooleanOptionalAction) - parser.add_argument('--output-json', action='store_true') - parser.add_argument('--dbg-small-recv', default=0, const=4000, - action='store', nargs='?', type=int) + epilog=epilog, add_help=False) + + gen_group = parser.add_argument_group('General options') + gen_group.add_argument('-h', '--help', action='help', + help='show this help message and exit') + + spec_group = parser.add_argument_group('Netlink family selection') + spec_sel = spec_group.add_mutually_exclusive_group(required=True) + spec_sel.add_argument('--list-families', action='store_true', + help=('list Netlink families supported by YNL ' + '(which have a spec available in the standard ' + 'system path)')) + spec_sel.add_argument('--family', dest='family', type=str, + help='name of the Netlink FAMILY to use') + spec_sel.add_argument('--spec', dest='spec', type=str, + help='full file path to the YAML spec file') + + ops_group = parser.add_argument_group('Operations') + ops = ops_group.add_mutually_exclusive_group() + ops.add_argument('--do', dest='do', metavar='DO-OPERATION', type=str) + ops.add_argument('--dump', dest='dump', metavar='DUMP-OPERATION', type=str) + ops.add_argument('--multi', dest='multi', nargs=2, action='append', + metavar=('DO-OPERATION', 'JSON_TEXT'), type=str, + help="Multi-message operation sequence (for nftables)") + ops.add_argument('--list-ops', action='store_true', + help="List available --do and --dump operations") + ops.add_argument('--list-msgs', action='store_true', + help="List all messages of the family (incl. notifications)") + ops.add_argument('--list-attrs', '--doc', dest='list_attrs', metavar='MSG', + type=str, help='List attributes for a message / operation') + ops.add_argument('--validate', action='store_true', + help="Validate the spec against schema and exit") + + io_group = parser.add_argument_group('Input / Output') + io_group.add_argument('--json', dest='json_text', type=str, + help=('Specify attributes of the message to send ' + 'to the kernel in JSON format. Can be left out ' + 'if the message is expected to be empty.')) + io_group.add_argument('--output-json', action='store_true', + help='Format output as JSON') + + ntf_group = parser.add_argument_group('Notifications') + ntf_group.add_argument('--subscribe', dest='ntf', type=str) + ntf_group.add_argument('--duration', dest='duration', type=int, + help='when subscribed, watch for DURATION seconds') + ntf_group.add_argument('--sleep', dest='duration', type=int, + help='alias for duration') + + nlflags = parser.add_argument_group('Netlink message flags (NLM_F_*)', + ('Extra flags to set in nlmsg_flags of ' + 'the request, used mostly by older ' + 'Classic Netlink families.')) + nlflags.add_argument('--replace', dest='flags', action='append_const', + const=Netlink.NLM_F_REPLACE) + nlflags.add_argument('--excl', dest='flags', action='append_const', + const=Netlink.NLM_F_EXCL) + nlflags.add_argument('--create', dest='flags', action='append_const', + const=Netlink.NLM_F_CREATE) + nlflags.add_argument('--append', dest='flags', action='append_const', + const=Netlink.NLM_F_APPEND) + + schema_group = parser.add_argument_group('Development options') + schema_group.add_argument('--schema', dest='schema', type=str, + help="JSON schema to validate the spec") + schema_group.add_argument('--no-schema', action='store_true') + + dbg_group = parser.add_argument_group('Debug options') + dbg_group.add_argument('--dbg-small-recv', default=0, const=4000, + action='store', nargs='?', type=int, metavar='INT', + help="Length of buffers used for recv()") + dbg_group.add_argument('--process-unknown', action=argparse.BooleanOptionalAction) + args = parser.parse_args() def output(msg): if args.output_json: print(json.dumps(msg, cls=YnlEncoder)) else: - pprint.PrettyPrinter().pprint(msg) + pprint.pprint(msg, width=term_width(), compact=True) if args.list_families: for filename in sorted(os.listdir(spec_dir())): @@ -172,18 +287,18 @@ def main(): else: spec = args.spec if not os.path.isfile(spec): - raise Exception(f"Spec file {spec} does not exist") + raise YnlException(f"Spec file {spec} does not exist") if args.validate: try: SpecFamily(spec, args.schema) - except Exception as error: + except SpecException as error: print(error) - exit(1) + sys.exit(1) return if args.family: # set behaviour when using installed specs - if args.schema is None and spec.startswith(sys_schema_dir): + if args.schema is None and spec.startswith(SYS_SCHEMA_DIR): args.schema = '' # disable schema validation when installed if args.process_unknown is None: args.process_unknown = True @@ -207,23 +322,9 @@ def main(): op = ynl.msgs.get(args.list_attrs) if not op: print(f'Operation {args.list_attrs} not found') - exit(1) - - print(f'Operation: {op.name}') - print(op.yaml['doc']) - - for mode in ['do', 'dump', 'event']: - if mode in op.yaml: - print_mode_attrs(ynl, mode, op.yaml[mode], op.attr_set, True) - - if 'notify' in op.yaml: - mode_spec = op.yaml['notify'] - ref_spec = ynl.msgs.get(mode_spec).yaml.get('do') - if ref_spec: - print_mode_attrs(ynl, 'notify', ref_spec, op.attr_set, False) + sys.exit(1) - if 'mcgrp' in op.yaml: - print(f"\nMulticast group: {op.yaml['mcgrp']}") + do_doc(ynl, op) try: if args.do: @@ -242,7 +343,7 @@ def main(): output(msg) except NlError as e: print(e) - exit(1) + sys.exit(1) except KeyboardInterrupt: pass except BrokenPipeError: diff --git a/tools/net/ynl/pyynl/ethtool.py b/tools/net/ynl/pyynl/ethtool.py index fd0f6b8d54d1..f1a2a2a89985 100755 --- a/tools/net/ynl/pyynl/ethtool.py +++ b/tools/net/ynl/pyynl/ethtool.py @@ -1,5 +1,10 @@ #!/usr/bin/env python3 # SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause +# +# pylint: disable=too-many-locals, too-many-branches, too-many-statements +# pylint: disable=too-many-return-statements + +""" YNL ethtool utility """ import argparse import pathlib @@ -8,9 +13,12 @@ import sys import re import os +# pylint: disable=no-name-in-module,wrong-import-position sys.path.append(pathlib.Path(__file__).resolve().parent.as_posix()) -from lib import YnlFamily +# pylint: disable=import-error from cli import schema_dir, spec_dir +from lib import YnlFamily + def args_to_req(ynl, op_name, args, req): """ @@ -48,7 +56,8 @@ def print_field(reply, *desc): return if len(desc) == 0: - return print_field(reply, *zip(reply.keys(), reply.keys())) + print_field(reply, *zip(reply.keys(), reply.keys())) + return for spec in desc: try: @@ -88,11 +97,12 @@ def doit(ynl, args, op_name): args_to_req(ynl, op_name, args.args, req) ynl.do(op_name, req) -def dumpit(ynl, args, op_name, extra = {}): +def dumpit(ynl, args, op_name, extra=None): """ Prepare request header, parse arguments and dumpit (filtering out the devices we're not interested in). """ + extra = extra or {} reply = ynl.dump(op_name, { 'header': {} } | extra) if not reply: return {} @@ -114,9 +124,9 @@ def bits_to_dict(attr): """ ret = {} if 'bits' not in attr: - return dict() + return {} if 'bit' not in attr['bits']: - return dict() + return {} for bit in attr['bits']['bit']: if bit['name'] == '': continue @@ -126,6 +136,8 @@ def bits_to_dict(attr): return ret def main(): + """ YNL ethtool utility """ + parser = argparse.ArgumentParser(description='ethtool wannabe') parser.add_argument('--json', action=argparse.BooleanOptionalAction) parser.add_argument('--show-priv-flags', action=argparse.BooleanOptionalAction) @@ -155,7 +167,7 @@ def main(): # TODO: rss-get parser.add_argument('device', metavar='device', type=str) parser.add_argument('args', metavar='args', type=str, nargs='*') - global args + args = parser.parse_args() spec = os.path.join(spec_dir(), 'ethtool.yaml') @@ -169,13 +181,16 @@ def main(): return if args.set_eee: - return doit(ynl, args, 'eee-set') + doit(ynl, args, 'eee-set') + return if args.set_pause: - return doit(ynl, args, 'pause-set') + doit(ynl, args, 'pause-set') + return if args.set_coalesce: - return doit(ynl, args, 'coalesce-set') + doit(ynl, args, 'coalesce-set') + return if args.set_features: # TODO: parse the bitmask @@ -183,10 +198,12 @@ def main(): return if args.set_channels: - return doit(ynl, args, 'channels-set') + doit(ynl, args, 'channels-set') + return if args.set_ring: - return doit(ynl, args, 'rings-set') + doit(ynl, args, 'rings-set') + return if args.show_priv_flags: flags = bits_to_dict(dumpit(ynl, args, 'privflags-get')['flags']) @@ -337,25 +354,25 @@ def main(): print(f'Time stamping parameters for {args.device}:') print('Capabilities:') - [print(f'\t{v}') for v in bits_to_dict(tsinfo['timestamping'])] + _ = [print(f'\t{v}') for v in bits_to_dict(tsinfo['timestamping'])] print(f'PTP Hardware Clock: {tsinfo.get("phc-index", "none")}') if 'tx-types' in tsinfo: print('Hardware Transmit Timestamp Modes:') - [print(f'\t{v}') for v in bits_to_dict(tsinfo['tx-types'])] + _ = [print(f'\t{v}') for v in bits_to_dict(tsinfo['tx-types'])] else: print('Hardware Transmit Timestamp Modes: none') if 'rx-filters' in tsinfo: print('Hardware Receive Filter Modes:') - [print(f'\t{v}') for v in bits_to_dict(tsinfo['rx-filters'])] + _ = [print(f'\t{v}') for v in bits_to_dict(tsinfo['rx-filters'])] else: print('Hardware Receive Filter Modes: none') if 'stats' in tsinfo and tsinfo['stats']: print('Statistics:') - [print(f'\t{k}: {v}') for k, v in tsinfo['stats'].items()] + _ = [print(f'\t{k}: {v}') for k, v in tsinfo['stats'].items()] return diff --git a/tools/net/ynl/pyynl/lib/__init__.py b/tools/net/ynl/pyynl/lib/__init__.py index ec9ea00071be..33a96155fb3b 100644 --- a/tools/net/ynl/pyynl/lib/__init__.py +++ b/tools/net/ynl/pyynl/lib/__init__.py @@ -1,11 +1,15 @@ # SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause +""" YNL library """ + from .nlspec import SpecAttr, SpecAttrSet, SpecEnumEntry, SpecEnumSet, \ - SpecFamily, SpecOperation, SpecSubMessage, SpecSubMessageFormat -from .ynl import YnlFamily, Netlink, NlError + SpecFamily, SpecOperation, SpecSubMessage, SpecSubMessageFormat, \ + SpecException +from .ynl import YnlFamily, Netlink, NlError, YnlException from .doc_generator import YnlDocGenerator __all__ = ["SpecAttr", "SpecAttrSet", "SpecEnumEntry", "SpecEnumSet", "SpecFamily", "SpecOperation", "SpecSubMessage", "SpecSubMessageFormat", - "YnlFamily", "Netlink", "NlError", "YnlDocGenerator"] + "SpecException", + "YnlFamily", "Netlink", "NlError", "YnlDocGenerator", "YnlException"] diff --git a/tools/net/ynl/pyynl/lib/doc_generator.py b/tools/net/ynl/pyynl/lib/doc_generator.py index 8b922d8f89e8..74f5d408e048 100644 --- a/tools/net/ynl/pyynl/lib/doc_generator.py +++ b/tools/net/ynl/pyynl/lib/doc_generator.py @@ -109,8 +109,7 @@ class RstFormatters: 'fixed-header': 'definition', 'nested-attributes': 'attribute-set', 'struct': 'definition'} - if prefix in mappings: - prefix = mappings[prefix] + prefix = mappings.get(prefix, prefix) return f":ref:`{namespace}-{prefix}-{name}`" def rst_header(self) -> str: diff --git a/tools/net/ynl/pyynl/lib/nlspec.py b/tools/net/ynl/pyynl/lib/nlspec.py index 85c17fe01e35..fcffeb5b7ba3 100644 --- a/tools/net/ynl/pyynl/lib/nlspec.py +++ b/tools/net/ynl/pyynl/lib/nlspec.py @@ -1,13 +1,21 @@ # SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause +# +# pylint: disable=missing-function-docstring, too-many-instance-attributes, too-many-branches + +""" +The nlspec is a python library for parsing and using YNL netlink +specifications. +""" import collections import importlib import os -import yaml +import yaml as pyyaml -# To be loaded dynamically as needed -jsonschema = None +class SpecException(Exception): + """Netlink spec exception. + """ class SpecElement: @@ -93,8 +101,7 @@ class SpecEnumEntry(SpecElement): def user_value(self, as_flags=None): if self.enum_set['type'] == 'flags' or as_flags: return 1 << self.value - else: - return self.value + return self.value class SpecEnumSet(SpecElement): @@ -117,8 +124,8 @@ class SpecEnumSet(SpecElement): prev_entry = None value_start = self.yaml.get('value-start', 0) - self.entries = dict() - self.entries_by_val = dict() + self.entries = {} + self.entries_by_val = {} for entry in self.yaml['entries']: e = self.new_entry(entry, prev_entry, value_start) self.entries[e.name] = e @@ -182,7 +189,7 @@ class SpecAttr(SpecElement): self.sub_message = yaml.get('sub-message') self.selector = yaml.get('selector') - self.is_auto_scalar = self.type == "sint" or self.type == "uint" + self.is_auto_scalar = self.type in ("sint", "uint") class SpecAttrSet(SpecElement): @@ -288,7 +295,7 @@ class SpecStruct(SpecElement): yield from self.members def items(self): - return self.members.items() + return self.members class SpecSubMessage(SpecElement): @@ -306,11 +313,11 @@ class SpecSubMessage(SpecElement): self.formats = collections.OrderedDict() for elem in self.yaml['formats']: - format = self.new_format(family, elem) - self.formats[format.value] = format + msg_format = self.new_format(family, elem) + self.formats[msg_format.value] = msg_format - def new_format(self, family, format): - return SpecSubMessageFormat(family, format) + def new_format(self, family, msg_format): + return SpecSubMessageFormat(family, msg_format) class SpecSubMessageFormat(SpecElement): @@ -378,7 +385,7 @@ class SpecOperation(SpecElement): elif self.is_resv: attr_set_name = '' else: - raise Exception(f"Can't resolve attribute set for op '{self.name}'") + raise SpecException(f"Can't resolve attribute set for op '{self.name}'") if attr_set_name: self.attr_set = self.family.attr_sets[attr_set_name] @@ -428,17 +435,22 @@ class SpecFamily(SpecElement): mcast_groups dict of all multicast groups (index by name) kernel_family dict of kernel family attributes """ + + # To be loaded dynamically as needed + jsonschema = None + def __init__(self, spec_path, schema_path=None, exclude_ops=None): - with open(spec_path, "r") as stream: + with open(spec_path, "r", encoding='utf-8') as stream: prefix = '# SPDX-License-Identifier: ' first = stream.readline().strip() if not first.startswith(prefix): - raise Exception('SPDX license tag required in the spec') + raise SpecException('SPDX license tag required in the spec') self.license = first[len(prefix):] stream.seek(0) - spec = yaml.safe_load(stream) + spec = pyyaml.safe_load(stream) + self.fixed_header = None self._resolution_list = [] super().__init__(self, spec) @@ -451,15 +463,13 @@ class SpecFamily(SpecElement): if schema_path is None: schema_path = os.path.dirname(os.path.dirname(spec_path)) + f'/{self.proto}.yaml' if schema_path: - global jsonschema - - with open(schema_path, "r") as stream: - schema = yaml.safe_load(stream) + with open(schema_path, "r", encoding='utf-8') as stream: + schema = pyyaml.safe_load(stream) - if jsonschema is None: - jsonschema = importlib.import_module("jsonschema") + if SpecFamily.jsonschema is None: + SpecFamily.jsonschema = importlib.import_module("jsonschema") - jsonschema.validate(self.yaml, schema) + SpecFamily.jsonschema.validate(self.yaml, schema) self.attr_sets = collections.OrderedDict() self.sub_msgs = collections.OrderedDict() @@ -548,7 +558,7 @@ class SpecFamily(SpecElement): req_val_next = req_val + 1 rsp_val_next = rsp_val + rsp_inc else: - raise Exception("Can't parse directional ops") + raise SpecException("Can't parse directional ops") if req_val == req_val_next: req_val = None @@ -560,20 +570,19 @@ class SpecFamily(SpecElement): skip |= bool(exclude.match(elem['name'])) if not skip: op = self.new_operation(elem, req_val, rsp_val) + self.msgs[op.name] = op req_val = req_val_next rsp_val = rsp_val_next - self.msgs[op.name] = op - def find_operation(self, name): - """ - For a given operation name, find and return operation spec. - """ - for op in self.yaml['operations']['list']: - if name == op['name']: - return op - return None + """ + For a given operation name, find and return operation spec. + """ + for op in self.yaml['operations']['list']: + if name == op['name']: + return op + return None def resolve(self): self.resolve_up(super()) diff --git a/tools/net/ynl/pyynl/lib/ynl.py b/tools/net/ynl/pyynl/lib/ynl.py index 36d36eb7e3b8..9774005e7ad1 100644 --- a/tools/net/ynl/pyynl/lib/ynl.py +++ b/tools/net/ynl/pyynl/lib/ynl.py @@ -1,4 +1,14 @@ # SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause +# +# pylint: disable=missing-class-docstring, missing-function-docstring +# pylint: disable=too-many-branches, too-many-locals, too-many-instance-attributes +# pylint: disable=too-many-lines + +""" +YAML Netlink Library + +An implementation of the genetlink and raw netlink protocols. +""" from collections import namedtuple from enum import Enum @@ -22,6 +32,11 @@ from .nlspec import SpecFamily # +class YnlException(Exception): + pass + + +# pylint: disable=too-few-public-methods class Netlink: # Netlink socket SOL_NETLINK = 270 @@ -144,22 +159,22 @@ class NlAttr: @classmethod def get_format(cls, attr_type, byte_order=None): - format = cls.type_formats[attr_type] + format_ = cls.type_formats[attr_type] if byte_order: - return format.big if byte_order == "big-endian" \ - else format.little - return format.native + return format_.big if byte_order == "big-endian" \ + else format_.little + return format_.native def as_scalar(self, attr_type, byte_order=None): - format = self.get_format(attr_type, byte_order) - return format.unpack(self.raw)[0] + format_ = self.get_format(attr_type, byte_order) + return format_.unpack(self.raw)[0] def as_auto_scalar(self, attr_type, byte_order=None): if len(self.raw) != 4 and len(self.raw) != 8: - raise Exception(f"Auto-scalar len payload be 4 or 8 bytes, got {len(self.raw)}") + raise YnlException(f"Auto-scalar len payload be 4 or 8 bytes, got {len(self.raw)}") real_type = attr_type[0] + str(len(self.raw) * 8) - format = self.get_format(real_type, byte_order) - return format.unpack(self.raw)[0] + format_ = self.get_format(real_type, byte_order) + return format_.unpack(self.raw)[0] def as_strz(self): return self.raw.decode('ascii')[:-1] @@ -167,9 +182,9 @@ class NlAttr: def as_bin(self): return self.raw - def as_c_array(self, type): - format = self.get_format(type) - return [ x[0] for x in format.iter_unpack(self.raw) ] + def as_c_array(self, c_type): + format_ = self.get_format(c_type) + return [ x[0] for x in format_.iter_unpack(self.raw) ] def __repr__(self): return f"[type:{self.type} len:{self._len}] {self.raw}" @@ -220,7 +235,7 @@ class NlMsg: self.extack = None if self.nl_flags & Netlink.NLM_F_ACK_TLVS and extack_off: - self.extack = dict() + self.extack = {} extack_attrs = NlAttrs(self.raw[extack_off:]) for extack in extack_attrs: if extack.type == Netlink.NLMSGERR_ATTR_MSG: @@ -245,8 +260,8 @@ class NlMsg: policy = {} for attr in NlAttrs(raw): if attr.type == Netlink.NL_POLICY_TYPE_ATTR_TYPE: - type = attr.as_scalar('u32') - policy['type'] = Netlink.AttrType(type).name + type_ = attr.as_scalar('u32') + policy['type'] = Netlink.AttrType(type_).name elif attr.type == Netlink.NL_POLICY_TYPE_ATTR_MIN_VALUE_S: policy['min-value'] = attr.as_scalar('s64') elif attr.type == Netlink.NL_POLICY_TYPE_ATTR_MAX_VALUE_S: @@ -281,7 +296,8 @@ class NlMsg: return self.nl_type def __repr__(self): - msg = f"nl_len = {self.nl_len} ({len(self.raw)}) nl_flags = 0x{self.nl_flags:x} nl_type = {self.nl_type}" + msg = (f"nl_len = {self.nl_len} ({len(self.raw)}) " + f"nl_flags = 0x{self.nl_flags:x} nl_type = {self.nl_type}") if self.error: msg += '\n\terror: ' + str(self.error) if self.extack: @@ -289,6 +305,7 @@ class NlMsg: return msg +# pylint: disable=too-few-public-methods class NlMsgs: def __init__(self, data): self.msgs = [] @@ -303,9 +320,6 @@ class NlMsgs: yield from self.msgs -genl_family_name_to_id = None - - def _genl_msg(nl_type, nl_flags, genl_cmd, genl_version, seq=None): # we prepend length in _genl_msg_finalize() if seq is None: @@ -319,7 +333,10 @@ def _genl_msg_finalize(msg): return struct.pack("I", len(msg) + 4) + msg +# pylint: disable=too-many-nested-blocks def _genl_load_families(): + genl_family_name_to_id = {} + with socket.socket(socket.AF_NETLINK, socket.SOCK_RAW, Netlink.NETLINK_GENERIC) as sock: sock.setsockopt(Netlink.SOL_NETLINK, Netlink.NETLINK_CAP_ACK, 1) @@ -330,21 +347,17 @@ def _genl_load_families(): sock.send(msg, 0) - global genl_family_name_to_id - genl_family_name_to_id = dict() - while True: reply = sock.recv(128 * 1024) nms = NlMsgs(reply) for nl_msg in nms: if nl_msg.error: - print("Netlink error:", nl_msg.error) - return + raise YnlException(f"Netlink error: {nl_msg.error}") if nl_msg.done: - return + return genl_family_name_to_id gm = GenlMsg(nl_msg) - fam = dict() + fam = {} for attr in NlAttrs(gm.raw): if attr.type == Netlink.CTRL_ATTR_FAMILY_ID: fam['id'] = attr.as_scalar('u16') @@ -353,7 +366,7 @@ def _genl_load_families(): elif attr.type == Netlink.CTRL_ATTR_MAXATTR: fam['maxattr'] = attr.as_scalar('u32') elif attr.type == Netlink.CTRL_ATTR_MCAST_GROUPS: - fam['mcast'] = dict() + fam['mcast'] = {} for entry in NlAttrs(attr.raw): mcast_name = None mcast_id = None @@ -373,6 +386,7 @@ class GenlMsg: self.nl = nl_msg self.genl_cmd, self.genl_version, _ = struct.unpack_from("BBH", nl_msg.raw, 0) self.raw = nl_msg.raw[4:] + self.raw_attrs = [] def cmd(self): return self.genl_cmd @@ -396,7 +410,7 @@ class NetlinkProtocol: nlmsg = struct.pack("HHII", nl_type, nl_flags, seq, 0) return nlmsg - def message(self, flags, command, version, seq=None): + def message(self, flags, command, _version, seq=None): return self._message(command, flags, seq) def _decode(self, nl_msg): @@ -406,13 +420,13 @@ class NetlinkProtocol: msg = self._decode(nl_msg) if op is None: op = ynl.rsp_by_value[msg.cmd()] - fixed_header_size = ynl._struct_size(op.fixed_header) + fixed_header_size = ynl.struct_size(op.fixed_header) msg.raw_attrs = NlAttrs(msg.raw, fixed_header_size) return msg def get_mcast_id(self, mcast_name, mcast_groups): if mcast_name not in mcast_groups: - raise Exception(f'Multicast group "{mcast_name}" not present in the spec') + raise YnlException(f'Multicast group "{mcast_name}" not present in the spec') return mcast_groups[mcast_name].value def msghdr_size(self): @@ -420,15 +434,16 @@ class NetlinkProtocol: class GenlProtocol(NetlinkProtocol): + genl_family_name_to_id = {} + def __init__(self, family_name): super().__init__(family_name, Netlink.NETLINK_GENERIC) - global genl_family_name_to_id - if genl_family_name_to_id is None: - _genl_load_families() + if not GenlProtocol.genl_family_name_to_id: + GenlProtocol.genl_family_name_to_id = _genl_load_families() - self.genl_family = genl_family_name_to_id[family_name] - self.family_id = genl_family_name_to_id[family_name]['id'] + self.genl_family = GenlProtocol.genl_family_name_to_id[family_name] + self.family_id = GenlProtocol.genl_family_name_to_id[family_name]['id'] def message(self, flags, command, version, seq=None): nlmsg = self._message(self.family_id, flags, seq) @@ -440,13 +455,14 @@ class GenlProtocol(NetlinkProtocol): def get_mcast_id(self, mcast_name, mcast_groups): if mcast_name not in self.genl_family['mcast']: - raise Exception(f'Multicast group "{mcast_name}" not present in the family') + raise YnlException(f'Multicast group "{mcast_name}" not present in the family') return self.genl_family['mcast'][mcast_name] def msghdr_size(self): return super().msghdr_size() + 4 +# pylint: disable=too-few-public-methods class SpaceAttrs: SpecValuesPair = namedtuple('SpecValuesPair', ['spec', 'values']) @@ -461,9 +477,9 @@ class SpaceAttrs: if name in scope.values: return scope.values[name] spec_name = scope.spec.yaml['name'] - raise Exception( + raise YnlException( f"No value for '{name}' in attribute space '{spec_name}'") - raise Exception(f"Attribute '{name}' not defined in any attribute-set") + raise YnlException(f"Attribute '{name}' not defined in any attribute-set") # @@ -485,8 +501,8 @@ class YnlFamily(SpecFamily): self.yaml['protonum']) else: self.nlproto = GenlProtocol(self.yaml['name']) - except KeyError: - raise Exception(f"Family '{self.yaml['name']}' not supported by the kernel") + except KeyError as err: + raise YnlException(f"Family '{self.yaml['name']}' not supported by the kernel") from err self._recv_dbg = False # Note that netlink will use conservative (min) message size for @@ -542,8 +558,7 @@ class YnlFamily(SpecFamily): for single_value in value: scalar += enum.entries[single_value].user_value(as_flags = True) return scalar - else: - return enum.entries[value].user_value() + return enum.entries[value].user_value() def _get_scalar(self, attr_spec, value): try: @@ -555,11 +570,12 @@ class YnlFamily(SpecFamily): return self._from_string(value, attr_spec) raise e + # pylint: disable=too-many-statements def _add_attr(self, space, name, value, search_attrs): try: attr = self.attr_sets[space][name] - except KeyError: - raise Exception(f"Space '{space}' has no attribute '{name}'") + except KeyError as err: + raise YnlException(f"Space '{space}' has no attribute '{name}'") from err nl_type = attr.value if attr.is_multi and isinstance(value, list): @@ -597,18 +613,18 @@ class YnlFamily(SpecFamily): elif isinstance(value, dict) and attr.struct_name: attr_payload = self._encode_struct(attr.struct_name, value) elif isinstance(value, list) and attr.sub_type in NlAttr.type_formats: - format = NlAttr.get_format(attr.sub_type) - attr_payload = b''.join([format.pack(x) for x in value]) + format_ = NlAttr.get_format(attr.sub_type) + attr_payload = b''.join([format_.pack(x) for x in value]) else: - raise Exception(f'Unknown type for binary attribute, value: {value}') + raise YnlException(f'Unknown type for binary attribute, value: {value}') elif attr['type'] in NlAttr.type_formats or attr.is_auto_scalar: scalar = self._get_scalar(attr, value) if attr.is_auto_scalar: attr_type = attr["type"][0] + ('32' if scalar.bit_length() <= 32 else '64') else: attr_type = attr["type"] - format = NlAttr.get_format(attr_type, attr.byte_order) - attr_payload = format.pack(scalar) + format_ = NlAttr.get_format(attr_type, attr.byte_order) + attr_payload = format_.pack(scalar) elif attr['type'] in "bitfield32": scalar_value = self._get_scalar(attr, value["value"]) scalar_selector = self._get_scalar(attr, value["selector"]) @@ -626,9 +642,9 @@ class YnlFamily(SpecFamily): attr_payload += self._add_attr(msg_format.attr_set, subname, subvalue, sub_attrs) else: - raise Exception(f"Unknown attribute-set '{msg_format.attr_set}'") + raise YnlException(f"Unknown attribute-set '{msg_format.attr_set}'") else: - raise Exception(f'Unknown type at {space} {name} {value} {attr["type"]}') + raise YnlException(f'Unknown type at {space} {name} {value} {attr["type"]}') return self._add_attr_raw(nl_type, attr_payload) @@ -715,7 +731,7 @@ class YnlFamily(SpecFamily): subattr = self._formatted_string(subattr, attr_spec.display_hint) decoded.append(subattr) else: - raise Exception(f'Unknown {attr_spec["sub-type"]} with name {attr_spec["name"]}') + raise YnlException(f'Unknown {attr_spec["sub-type"]} with name {attr_spec["name"]}') return decoded def _decode_nest_type_value(self, attr, attr_spec): @@ -731,12 +747,11 @@ class YnlFamily(SpecFamily): def _decode_unknown(self, attr): if attr.is_nest: return self._decode(NlAttrs(attr.raw), None) - else: - return attr.as_bin() + return attr.as_bin() def _rsp_add(self, rsp, name, is_multi, decoded): if is_multi is None: - if name in rsp and type(rsp[name]) is not list: + if name in rsp and not isinstance(rsp[name], list): rsp[name] = [rsp[name]] is_multi = True else: @@ -752,13 +767,13 @@ class YnlFamily(SpecFamily): def _resolve_selector(self, attr_spec, search_attrs): sub_msg = attr_spec.sub_message if sub_msg not in self.sub_msgs: - raise Exception(f"No sub-message spec named {sub_msg} for {attr_spec.name}") + raise YnlException(f"No sub-message spec named {sub_msg} for {attr_spec.name}") sub_msg_spec = self.sub_msgs[sub_msg] selector = attr_spec.selector value = search_attrs.lookup(selector) if value not in sub_msg_spec.formats: - raise Exception(f"No message format for '{value}' in sub-message spec '{sub_msg}'") + raise YnlException(f"No message format for '{value}' in sub-message spec '{sub_msg}'") spec = sub_msg_spec.formats[value] return spec, value @@ -769,17 +784,20 @@ class YnlFamily(SpecFamily): offset = 0 if msg_format.fixed_header: decoded.update(self._decode_struct(attr.raw, msg_format.fixed_header)) - offset = self._struct_size(msg_format.fixed_header) + offset = self.struct_size(msg_format.fixed_header) if msg_format.attr_set: if msg_format.attr_set in self.attr_sets: subdict = self._decode(NlAttrs(attr.raw, offset), msg_format.attr_set) decoded.update(subdict) else: - raise Exception(f"Unknown attribute-set '{msg_format.attr_set}' when decoding '{attr_spec.name}'") + raise YnlException(f"Unknown attribute-set '{msg_format.attr_set}' " + f"when decoding '{attr_spec.name}'") return decoded + # pylint: disable=too-many-statements def _decode(self, attrs, space, outer_attrs = None): - rsp = dict() + rsp = {} + search_attrs = {} if space: attr_space = self.attr_sets[space] search_attrs = SpaceAttrs(attr_space, rsp, outer_attrs) @@ -787,16 +805,19 @@ class YnlFamily(SpecFamily): for attr in attrs: try: attr_spec = attr_space.attrs_by_val[attr.type] - except (KeyError, UnboundLocalError): + except (KeyError, UnboundLocalError) as err: if not self.process_unknown: - raise Exception(f"Space '{space}' has no attribute with value '{attr.type}'") + raise YnlException(f"Space '{space}' has no attribute " + f"with value '{attr.type}'") from err attr_name = f"UnknownAttr({attr.type})" self._rsp_add(rsp, attr_name, None, self._decode_unknown(attr)) continue try: if attr_spec["type"] == 'nest': - subdict = self._decode(NlAttrs(attr.raw), attr_spec['nested-attributes'], search_attrs) + subdict = self._decode(NlAttrs(attr.raw), + attr_spec['nested-attributes'], + search_attrs) decoded = subdict elif attr_spec["type"] == 'string': decoded = attr.as_strz() @@ -828,7 +849,8 @@ class YnlFamily(SpecFamily): decoded = self._decode_nest_type_value(attr, attr_spec) else: if not self.process_unknown: - raise Exception(f'Unknown {attr_spec["type"]} with name {attr_spec["name"]}') + raise YnlException(f'Unknown {attr_spec["type"]} ' + f'with name {attr_spec["name"]}') decoded = self._decode_unknown(attr) self._rsp_add(rsp, attr_spec["name"], attr_spec.is_multi, decoded) @@ -838,12 +860,14 @@ class YnlFamily(SpecFamily): return rsp + # pylint: disable=too-many-arguments, too-many-positional-arguments def _decode_extack_path(self, attrs, attr_set, offset, target, search_attrs): for attr in attrs: try: attr_spec = attr_set.attrs_by_val[attr.type] - except KeyError: - raise Exception(f"Space '{attr_set.name}' has no attribute with value '{attr.type}'") + except KeyError as err: + raise YnlException( + f"Space '{attr_set.name}' has no attribute with value '{attr.type}'") from err if offset > target: break if offset == target: @@ -860,11 +884,12 @@ class YnlFamily(SpecFamily): elif attr_spec['type'] == 'sub-message': msg_format, value = self._resolve_selector(attr_spec, search_attrs) if msg_format is None: - raise Exception(f"Can't resolve sub-message of {attr_spec['name']} for extack") + raise YnlException(f"Can't resolve sub-message of " + f"{attr_spec['name']} for extack") sub_attrs = self.attr_sets[msg_format.attr_set] pathname += f"({value})" else: - raise Exception(f"Can't dive into {attr.type} ({attr_spec['name']}) for extack") + raise YnlException(f"Can't dive into {attr.type} ({attr_spec['name']}) for extack") offset += 4 subpath = self._decode_extack_path(NlAttrs(attr.raw), sub_attrs, offset, target, search_attrs) @@ -879,7 +904,7 @@ class YnlFamily(SpecFamily): return msg = self.nlproto.decode(self, NlMsg(request, 0, op.attr_set), op) - offset = self.nlproto.msghdr_size() + self._struct_size(op.fixed_header) + offset = self.nlproto.msghdr_size() + self.struct_size(op.fixed_header) search_attrs = SpaceAttrs(op.attr_set, vals) path = self._decode_extack_path(msg.raw_attrs, op.attr_set, offset, extack['bad-attr-offs'], search_attrs) @@ -887,26 +912,25 @@ class YnlFamily(SpecFamily): del extack['bad-attr-offs'] extack['bad-attr'] = path - def _struct_size(self, name): + def struct_size(self, name): if name: members = self.consts[name].members size = 0 for m in members: if m.type in ['pad', 'binary']: if m.struct: - size += self._struct_size(m.struct) + size += self.struct_size(m.struct) else: size += m.len else: - format = NlAttr.get_format(m.type, m.byte_order) - size += format.size + format_ = NlAttr.get_format(m.type, m.byte_order) + size += format_.size return size - else: - return 0 + return 0 def _decode_struct(self, data, name): members = self.consts[name].members - attrs = dict() + attrs = {} offset = 0 for m in members: value = None @@ -914,17 +938,17 @@ class YnlFamily(SpecFamily): offset += m.len elif m.type == 'binary': if m.struct: - len = self._struct_size(m.struct) - value = self._decode_struct(data[offset : offset + len], + len_ = self.struct_size(m.struct) + value = self._decode_struct(data[offset : offset + len_], m.struct) - offset += len + offset += len_ else: value = data[offset : offset + m.len] offset += m.len else: - format = NlAttr.get_format(m.type, m.byte_order) - [ value ] = format.unpack_from(data, offset) - offset += format.size + format_ = NlAttr.get_format(m.type, m.byte_order) + [ value ] = format_.unpack_from(data, offset) + offset += format_.size if value is not None: if m.enum: value = self._decode_enum(value, m) @@ -943,7 +967,7 @@ class YnlFamily(SpecFamily): elif m.type == 'binary': if m.struct: if value is None: - value = dict() + value = {} attr_payload += self._encode_struct(m.struct, value) else: if value is None: @@ -953,13 +977,13 @@ class YnlFamily(SpecFamily): else: if value is None: value = 0 - format = NlAttr.get_format(m.type, m.byte_order) - attr_payload += format.pack(value) + format_ = NlAttr.get_format(m.type, m.byte_order) + attr_payload += format_.pack(value) return attr_payload def _formatted_string(self, raw, display_hint): if display_hint == 'mac': - formatted = ':'.join('%02x' % b for b in raw) + formatted = ':'.join(f'{b:02x}' for b in raw) elif display_hint == 'hex': if isinstance(raw, int): formatted = hex(raw) @@ -991,16 +1015,16 @@ class YnlFamily(SpecFamily): mac_bytes = [int(x, 16) for x in string.split(':')] else: if len(string) % 2 != 0: - raise Exception(f"Invalid MAC address format: {string}") + raise YnlException(f"Invalid MAC address format: {string}") mac_bytes = [int(string[i:i+2], 16) for i in range(0, len(string), 2)] raw = bytes(mac_bytes) else: - raise Exception(f"Display hint '{attr_spec.display_hint}' not implemented" + raise YnlException(f"Display hint '{attr_spec.display_hint}' not implemented" f" when parsing '{attr_spec['name']}'") return raw def handle_ntf(self, decoded): - msg = dict() + msg = {} if self.include_raw: msg['raw'] = decoded op = self.rsp_by_value[decoded.cmd()] @@ -1081,6 +1105,7 @@ class YnlFamily(SpecFamily): msg = _genl_msg_finalize(msg) return msg + # pylint: disable=too-many-statements def _ops(self, ops): reqs_by_seq = {} req_seq = random.randint(1024, 65535) @@ -1139,9 +1164,8 @@ class YnlFamily(SpecFamily): if decoded.cmd() in self.async_msg_ids: self.handle_ntf(decoded) continue - else: - print('Unexpected message: ' + repr(decoded)) - continue + print('Unexpected message: ' + repr(decoded)) + continue rsp_msg = self._decode(decoded.raw_attrs, op.attr_set.name) if op.fixed_header: diff --git a/tools/net/ynl/pyynl/ynl_gen_c.py b/tools/net/ynl/pyynl/ynl_gen_c.py index b517d0c605ad..0e1e486c1185 100755 --- a/tools/net/ynl/pyynl/ynl_gen_c.py +++ b/tools/net/ynl/pyynl/ynl_gen_c.py @@ -1,5 +1,17 @@ #!/usr/bin/env python3 # SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR BSD-3-Clause) +# +# pylint: disable=line-too-long, missing-class-docstring, missing-function-docstring +# pylint: disable=too-many-positional-arguments, too-many-arguments, too-many-statements +# pylint: disable=too-many-branches, too-many-locals, too-many-instance-attributes +# pylint: disable=too-many-nested-blocks, too-many-lines, too-few-public-methods +# pylint: disable=broad-exception-raised, broad-exception-caught, protected-access + +""" +ynl_gen_c + +A YNL to C code generator for both kernel and userspace protocol stubs. +""" import argparse import filecmp @@ -9,8 +21,9 @@ import re import shutil import sys import tempfile -import yaml +import yaml as pyyaml +# pylint: disable=no-name-in-module,wrong-import-position sys.path.append(pathlib.Path(__file__).resolve().parent.as_posix()) from lib import SpecFamily, SpecAttrSet, SpecAttr, SpecOperation, SpecEnumSet, SpecEnumEntry from lib import SpecSubMessage @@ -157,7 +170,7 @@ class Type(SpecAttr): def presence_member(self, space, type_filter): if self.presence_type() != type_filter: - return + return '' if self.presence_type() == 'present': pfx = '__' if space == 'user' else '' @@ -166,14 +179,15 @@ class Type(SpecAttr): if self.presence_type() in {'len', 'count'}: pfx = '__' if space == 'user' else '' return f"{pfx}u32 {self.c_name};" + return '' - def _complex_member_type(self, ri): + def _complex_member_type(self, _ri): return None def free_needs_iter(self): return False - def _free_lines(self, ri, var, ref): + def _free_lines(self, _ri, var, ref): if self.is_multi_val() or self.presence_type() in {'count', 'len'}: return [f'free({var}->{ref}{self.c_name});'] return [] @@ -183,9 +197,10 @@ class Type(SpecAttr): for line in lines: ri.cw.p(line) + # pylint: disable=assignment-from-none def arg_member(self, ri): member = self._complex_member_type(ri) - if member: + if member is not None: spc = ' ' if member[-1] != '*' else '' arg = [member + spc + '*' + self.c_name] if self.presence_type() == 'count': @@ -195,7 +210,7 @@ class Type(SpecAttr): def struct_member(self, ri): member = self._complex_member_type(ri) - if member: + if member is not None: ptr = '*' if self.is_multi_val() else '' if self.is_recursive_for_op(ri): ptr = '*' @@ -243,9 +258,9 @@ class Type(SpecAttr): def attr_get(self, ri, var, first): lines, init_lines, _ = self._attr_get(ri, var) - if type(lines) is str: + if isinstance(lines, str): lines = [lines] - if type(init_lines) is str: + if isinstance(init_lines, str): init_lines = [init_lines] kw = 'if' if first else 'else if' @@ -270,7 +285,7 @@ class Type(SpecAttr): def _setter_lines(self, ri, member, presence): raise Exception(f"Setter not implemented for class type {self.type}") - def setter(self, ri, space, direction, deref=False, ref=None, var="req"): + def setter(self, ri, _space, direction, deref=False, ref=None, var="req"): ref = (ref if ref else []) + [self.c_name] member = f"{var}->{'.'.join(ref)}" @@ -280,6 +295,7 @@ class Type(SpecAttr): code = [] presence = '' + # pylint: disable=consider-using-enumerate for i in range(0, len(ref)): presence = f"{var}->{'.'.join(ref[:i] + [''])}_present.{ref[i]}" # Every layer below last is a nest, so we know it uses bit presence @@ -414,6 +430,7 @@ class TypeScalar(Type): if low < -32768 or high > 32767: self.checks['full-range'] = True + # pylint: disable=too-many-return-statements def _attr_policy(self, policy): if 'flags-mask' in self.checks or self.is_bitfield: if self.is_bitfield: @@ -424,15 +441,15 @@ class TypeScalar(Type): flag_cnt = len(flags['entries']) mask = (1 << flag_cnt) - 1 return f"NLA_POLICY_MASK({policy}, 0x{mask:x})" - elif 'full-range' in self.checks: + if 'full-range' in self.checks: return f"NLA_POLICY_FULL_RANGE({policy}, &{c_lower(self.enum_name)}_range)" - elif 'range' in self.checks: + if 'range' in self.checks: return f"NLA_POLICY_RANGE({policy}, {self.get_limit_str('min')}, {self.get_limit_str('max')})" - elif 'min' in self.checks: + if 'min' in self.checks: return f"NLA_POLICY_MIN({policy}, {self.get_limit_str('min')})" - elif 'max' in self.checks: + if 'max' in self.checks: return f"NLA_POLICY_MAX({policy}, {self.get_limit_str('max')})" - elif 'sparse' in self.checks: + if 'sparse' in self.checks: return f"NLA_POLICY_VALIDATE_FN({policy}, &{c_lower(self.enum_name)}_validate)" return super()._attr_policy(policy) @@ -554,6 +571,8 @@ class TypeBinary(Type): mem = 'NLA_POLICY_MIN_LEN(' + self.get_limit_str('min-len') + ')' elif 'max-len' in self.checks: mem = 'NLA_POLICY_MAX_LEN(' + self.get_limit_str('max-len') + ')' + else: + raise Exception('Failed to process policy check for binary type') return mem @@ -627,7 +646,7 @@ class TypeBinaryScalarArray(TypeBinary): class TypeBitfield32(Type): - def _complex_member_type(self, ri): + def _complex_member_type(self, _ri): return "struct nla_bitfield32" def _attr_typol(self): @@ -655,7 +674,7 @@ class TypeNest(Type): def is_recursive(self): return self.family.pure_nested_structs[self.nested_attrs].recursive - def _complex_member_type(self, ri): + def _complex_member_type(self, _ri): return self.nested_struct_type def _free_lines(self, ri, var, ref): @@ -689,7 +708,7 @@ class TypeNest(Type): f"parg.data = &{var}->{self.c_name};"] return get_lines, init_lines, None - def setter(self, ri, space, direction, deref=False, ref=None, var="req"): + def setter(self, ri, _space, direction, deref=False, ref=None, var="req"): ref = (ref if ref else []) + [self.c_name] for _, attr in ri.family.pure_nested_structs[self.nested_attrs].member_list(): @@ -714,19 +733,18 @@ class TypeMultiAttr(Type): def _complex_member_type(self, ri): if 'type' not in self.attr or self.attr['type'] == 'nest': return self.nested_struct_type - elif self.attr['type'] == 'binary' and 'struct' in self.attr: + if self.attr['type'] == 'binary' and 'struct' in self.attr: return None # use arg_member() - elif self.attr['type'] == 'string': + if self.attr['type'] == 'string': return 'struct ynl_string *' - elif self.attr['type'] in scalars: + if self.attr['type'] in scalars: scalar_pfx = '__' if ri.ku_space == 'user' else '' if self.is_auto_scalar: name = self.type[0] + '64' else: name = self.attr['type'] return scalar_pfx + name - else: - raise Exception(f"Sub-type {self.attr['type']} not supported yet") + raise Exception(f"Sub-type {self.attr['type']} not supported yet") def arg_member(self, ri): if self.type == 'binary' and 'struct' in self.attr: @@ -737,7 +755,7 @@ class TypeMultiAttr(Type): def free_needs_iter(self): return self.attr['type'] in {'nest', 'string'} - def _free_lines(self, ri, var, ref): + def _free_lines(self, _ri, var, ref): lines = [] if self.attr['type'] in scalars: lines += [f"free({var}->{ref}{self.c_name});"] @@ -801,13 +819,12 @@ class TypeIndexedArray(Type): def _complex_member_type(self, ri): if 'sub-type' not in self.attr or self.attr['sub-type'] == 'nest': return self.nested_struct_type - elif self.attr['sub-type'] in scalars: + if self.attr['sub-type'] in scalars: scalar_pfx = '__' if ri.ku_space == 'user' else '' return scalar_pfx + self.attr['sub-type'] - elif self.attr['sub-type'] == 'binary' and 'exact-len' in self.checks: + if self.attr['sub-type'] == 'binary' and 'exact-len' in self.checks: return None # use arg_member() - else: - raise Exception(f"Sub-type {self.attr['sub-type']} not supported yet") + raise Exception(f"Sub-type {self.attr['sub-type']} not supported yet") def arg_member(self, ri): if self.sub_type == 'binary' and 'exact-len' in self.checks: @@ -823,12 +840,11 @@ class TypeIndexedArray(Type): def _attr_typol(self): if self.attr['sub-type'] in scalars: return f'.type = YNL_PT_U{c_upper(self.sub_type[1:])}, ' - elif self.attr['sub-type'] == 'binary' and 'exact-len' in self.checks: + if self.attr['sub-type'] == 'binary' and 'exact-len' in self.checks: return f'.type = YNL_PT_BINARY, .len = {self.checks["exact-len"]}, ' - elif self.attr['sub-type'] == 'nest': + if self.attr['sub-type'] == 'nest': return f'.type = YNL_PT_NEST, .nest = &{self.nested_render_name}_nest, ' - else: - raise Exception(f"Typol for IndexedArray sub-type {self.attr['sub-type']} not supported, yet") + raise Exception(f"Typol for IndexedArray sub-type {self.attr['sub-type']} not supported, yet") def _attr_get(self, ri, var): local_vars = ['const struct nlattr *attr2;'] @@ -864,18 +880,18 @@ class TypeIndexedArray(Type): def free_needs_iter(self): return self.sub_type == 'nest' - def _free_lines(self, ri, var, ref): + def _free_lines(self, _ri, var, ref): lines = [] if self.sub_type == 'nest': lines += [ f"for (i = 0; i < {var}->{ref}_count.{self.c_name}; i++)", f'{self.nested_render_name}_free(&{var}->{ref}{self.c_name}[i]);', ] - lines += f"free({var}->{ref}{self.c_name});", + lines += (f"free({var}->{ref}{self.c_name});",) return lines class TypeNestTypeValue(Type): - def _complex_member_type(self, ri): + def _complex_member_type(self, _ri): return self.nested_struct_type def _attr_typol(self): @@ -921,15 +937,15 @@ class TypeSubMessage(TypeNest): return typol def _attr_get(self, ri, var): - sel = c_lower(self['selector']) + selector = self['selector'] + sel = c_lower(selector) if self.selector.is_external(): sel_var = f"_sel_{sel}" else: sel_var = f"{var}->{sel}" get_lines = [f'if (!{sel_var})', - 'return ynl_submsg_failed(yarg, "%s", "%s");' % - (self.name, self['selector']), - f"if ({self.nested_render_name}_parse(&parg, {sel_var}, attr))", + f'return ynl_submsg_failed(yarg, "{self.name}", "{selector}");', + f"if ({self.nested_render_name}_parse(&parg, {sel_var}, attr))", "return YNL_PARSE_CB_ERROR;"] init_lines = [f"parg.rsp_policy = &{self.nested_render_name}_nest;", f"parg.data = &{var}->{self.c_name};"] @@ -988,7 +1004,7 @@ class Struct: self.in_multi_val = False # used by a MultiAttr or and legacy arrays self.attr_list = [] - self.attrs = dict() + self.attrs = {} if type_list is not None: for t in type_list: self.attr_list.append((t, self.attr_set[t]),) @@ -1020,7 +1036,7 @@ class Struct: def external_selectors(self): sels = [] - for name, attr in self.attr_list: + for _name, attr in self.attr_list: if isinstance(attr, TypeSubMessage) and attr.selector.is_external(): sels.append(attr.selector) return sels @@ -1037,9 +1053,9 @@ class EnumEntry(SpecEnumEntry): super().__init__(enum_set, yaml, prev, value_start) if prev: - self.value_change = (self.value != prev.value + 1) + self.value_change = self.value != prev.value + 1 else: - self.value_change = (self.value != 0) + self.value_change = self.value != 0 self.value_change = self.value_change or self.enum_set['type'] == 'flags' # Added by resolve: @@ -1080,8 +1096,8 @@ class EnumSet(SpecEnumSet): return EnumEntry(self, entry, prev_entry, value_start) def value_range(self): - low = min([x.value for x in self.entries.values()]) - high = max([x.value for x in self.entries.values()]) + low = min(x.value for x in self.entries.values()) + high = max(x.value for x in self.entries.values()) if high - low + 1 != len(self.entries): return None, None @@ -1220,6 +1236,12 @@ class Family(SpecFamily): self.hooks = None delattr(self, "hooks") + self.root_sets = {} + self.pure_nested_structs = {} + self.kernel_policy = None + self.global_policy = None + self.global_policy_set = None + super().__init__(file_name, exclude_ops=exclude_ops) self.fam_key = c_upper(self.yaml.get('c-family-name', self.yaml["name"] + '_FAMILY_NAME')) @@ -1254,18 +1276,18 @@ class Family(SpecFamily): self.mcgrps = self.yaml.get('mcast-groups', {'list': []}) - self.hooks = dict() + self.hooks = {} for when in ['pre', 'post']: - self.hooks[when] = dict() + self.hooks[when] = {} for op_mode in ['do', 'dump']: - self.hooks[when][op_mode] = dict() + self.hooks[when][op_mode] = {} self.hooks[when][op_mode]['set'] = set() self.hooks[when][op_mode]['list'] = [] # dict space-name -> 'request': set(attrs), 'reply': set(attrs) - self.root_sets = dict() + self.root_sets = {} # dict space-name -> Struct - self.pure_nested_structs = dict() + self.pure_nested_structs = {} self._mark_notify() self._mock_up_events() @@ -1311,7 +1333,7 @@ class Family(SpecFamily): } def _load_root_sets(self): - for op_name, op in self.msgs.items(): + for _op_name, op in self.msgs.items(): if 'attribute-set' not in op: continue @@ -1427,7 +1449,7 @@ class Family(SpecFamily): attr_set_queue = list(self.root_sets.keys()) attr_set_seen = set(self.root_sets.keys()) - while len(attr_set_queue): + while attr_set_queue: a_set = attr_set_queue.pop(0) for attr, spec in self.attr_sets[a_set].items(): if 'nested-attributes' in spec: @@ -1510,7 +1532,7 @@ class Family(SpecFamily): for k, _ in self.root_sets.items(): yield k, None # we don't have a struct, but it must be terminal - for attr_set, struct in all_structs(): + for attr_set, _struct in all_structs(): for _, spec in self.attr_sets[attr_set].items(): if 'nested-attributes' in spec: child_name = spec['nested-attributes'] @@ -1530,7 +1552,7 @@ class Family(SpecFamily): def _load_global_policy(self): global_set = set() attr_set_name = None - for op_name, op in self.ops.items(): + for _op_name, op in self.ops.items(): if not op: continue if 'attribute-set' not in op: @@ -1613,7 +1635,7 @@ class RenderInfo: self.cw = cw - self.struct = dict() + self.struct = {} if op_mode == 'notify': op_mode = 'do' if 'do' in op else 'dump' for op_dir in ['request', 'reply']: @@ -1650,6 +1672,7 @@ class CodeWriter: if out_file is None: self._out = os.sys.stdout else: + # pylint: disable=consider-using-with self._out = tempfile.NamedTemporaryFile('w+') self._out_file = out_file @@ -1664,7 +1687,7 @@ class CodeWriter: if not self._overwrite and os.path.isfile(self._out_file): if filecmp.cmp(self._out.name, self._out_file, shallow=False): return - with open(self._out_file, 'w+') as out_file: + with open(self._out_file, 'w+', encoding='utf-8') as out_file: self._out.seek(0) shutil.copyfileobj(self._out, out_file) self._out.close() @@ -1779,7 +1802,7 @@ class CodeWriter: if not local_vars: return - if type(local_vars) is str: + if isinstance(local_vars, str): local_vars = [local_vars] local_vars.sort(key=len, reverse=True) @@ -1799,20 +1822,19 @@ class CodeWriter: def writes_defines(self, defines): longest = 0 for define in defines: - if len(define[0]) > longest: - longest = len(define[0]) + longest = max(len(define[0]), longest) longest = ((longest + 8) // 8) * 8 for define in defines: line = '#define ' + define[0] line += '\t' * ((longest - len(define[0]) + 7) // 8) - if type(define[1]) is int: + if isinstance(define[1], int): line += str(define[1]) - elif type(define[1]) is str: + elif isinstance(define[1], str): line += '"' + define[1] + '"' self.p(line) def write_struct_init(self, members): - longest = max([len(x[0]) for x in members]) + longest = max(len(x[0]) for x in members) longest += 1 # because we prepend a . longest = ((longest + 8) // 8) * 8 for one in members: @@ -2038,12 +2060,12 @@ def put_op_name(family, cw): _put_enum_to_str_helper(cw, family.c_name + '_op', map_name, 'op') -def put_enum_to_str_fwd(family, cw, enum): +def put_enum_to_str_fwd(_family, cw, enum): args = [enum.user_type + ' value'] cw.write_func_prot('const char *', f'{enum.render_name}_str', args, suffix=';') -def put_enum_to_str(family, cw, enum): +def put_enum_to_str(_family, cw, enum): map_name = f'{enum.render_name}_strmap' cw.block_start(line=f"static const char * const {map_name}[] =") for entry in enum.entries.values(): @@ -2324,7 +2346,8 @@ def parse_rsp_nested_prototype(ri, struct, suffix=';'): def parse_rsp_nested(ri, struct): if struct.submsg: - return parse_rsp_submsg(ri, struct) + parse_rsp_submsg(ri, struct) + return parse_rsp_nested_prototype(ri, struct, suffix='') @@ -2654,7 +2677,7 @@ def print_req_free(ri): def print_rsp_type(ri): - if (ri.op_mode == 'do' or ri.op_mode == 'dump') and 'reply' in ri.op[ri.op_mode]: + if ri.op_mode in ('do', 'dump') and 'reply' in ri.op[ri.op_mode]: direction = 'reply' elif ri.op_mode == 'event': direction = 'reply' @@ -2667,7 +2690,7 @@ def print_wrapped_type(ri): ri.cw.block_start(line=f"{type_name(ri, 'reply')}") if ri.op_mode == 'dump': ri.cw.p(f"{type_name(ri, 'reply')} *next;") - elif ri.op_mode == 'notify' or ri.op_mode == 'event': + elif ri.op_mode in ('notify', 'event'): ri.cw.p('__u16 family;') ri.cw.p('__u8 cmd;') ri.cw.p('struct ynl_ntf_base_type *next;') @@ -2704,7 +2727,7 @@ def _free_type(ri, direction, struct): def free_rsp_nested_prototype(ri): - print_free_prototype(ri, "") + print_free_prototype(ri, "") def free_rsp_nested(ri, struct): @@ -2930,7 +2953,7 @@ def print_kernel_op_table_hdr(family, cw): def print_kernel_op_table(family, cw): print_kernel_op_table_fwd(family, cw, terminate=False) - if family.kernel_policy == 'global' or family.kernel_policy == 'per-op': + if family.kernel_policy in ('global', 'per-op'): for op_name, op in family.ops.items(): if op.is_async: continue @@ -3346,7 +3369,7 @@ def render_user_family(family, cw, prototype): else: raise Exception('Invalid notification ' + ntf_op_name) _render_user_ntf_entry(ri, ntf_op) - for op_name, op in family.ops.items(): + for _op_name, op in family.ops.items(): if 'event' not in op: continue ri = RenderInfo(cw, family, "user", op, "event") @@ -3418,12 +3441,11 @@ def main(): print('Spec license:', parsed.license) print('License must be: ((GPL-2.0 WITH Linux-syscall-note) OR BSD-3-Clause)') os.sys.exit(1) - except yaml.YAMLError as exc: + except pyyaml.YAMLError as exc: print(exc) os.sys.exit(1) - return - cw = CodeWriter(BaseNlLib(), args.out_file, overwrite=(not args.cmp_out)) + cw = CodeWriter(BaseNlLib(), args.out_file, overwrite=not args.cmp_out) _, spec_kernel = find_kernel_root(args.spec) if args.mode == 'uapi' or args.header: @@ -3524,7 +3546,7 @@ def main(): cw.nl() if parsed.kernel_policy in {'per-op', 'split'}: - for op_name, op in parsed.ops.items(): + for _op_name, op in parsed.ops.items(): if 'do' in op and 'event' not in op: ri = RenderInfo(cw, parsed, args.mode, op, "do") print_req_policy_fwd(cw, ri.struct['request'], ri=ri) @@ -3553,7 +3575,7 @@ def main(): print_req_policy(cw, struct) cw.nl() - for op_name, op in parsed.ops.items(): + for _op_name, op in parsed.ops.items(): if parsed.kernel_policy in {'per-op', 'split'}: for op_mode in ['do', 'dump']: if op_mode in op and 'request' in op[op_mode]: @@ -3581,7 +3603,7 @@ def main(): ri = RenderInfo(cw, parsed, args.mode, "", "", attr_set) print_type_full(ri, struct) - for op_name, op in parsed.ops.items(): + for _op_name, op in parsed.ops.items(): cw.p(f"/* ============== {op.enum_name} ============== */") if 'do' in op and 'event' not in op: @@ -3614,7 +3636,7 @@ def main(): raise Exception(f'Only notifications with consistent types supported ({op.name})') print_wrapped_type(ri) - for op_name, op in parsed.ntfs.items(): + for _op_name, op in parsed.ntfs.items(): if 'event' in op: ri = RenderInfo(cw, parsed, args.mode, op, 'event') cw.p(f"/* {op.enum_name} - event */") @@ -3664,7 +3686,7 @@ def main(): if struct.reply: parse_rsp_nested(ri, struct) - for op_name, op in parsed.ops.items(): + for _op_name, op in parsed.ops.items(): cw.p(f"/* ============== {op.enum_name} ============== */") if 'do' in op and 'event' not in op: cw.p(f"/* {op.enum_name} - do */") @@ -3692,7 +3714,7 @@ def main(): raise Exception(f'Only notifications with consistent types supported ({op.name})') print_ntf_type_free(ri) - for op_name, op in parsed.ntfs.items(): + for _op_name, op in parsed.ntfs.items(): if 'event' in op: cw.p(f"/* {op.enum_name} - event */") diff --git a/tools/net/ynl/pyynl/ynl_gen_rst.py b/tools/net/ynl/pyynl/ynl_gen_rst.py index 90ae19aac89d..30324e2fd682 100755 --- a/tools/net/ynl/pyynl/ynl_gen_rst.py +++ b/tools/net/ynl/pyynl/ynl_gen_rst.py @@ -19,6 +19,7 @@ import sys import argparse import logging +# pylint: disable=no-name-in-module,wrong-import-position sys.path.append(pathlib.Path(__file__).resolve().parent.as_posix()) from lib import YnlDocGenerator # pylint: disable=C0413 @@ -60,6 +61,7 @@ def write_to_rstfile(content: str, filename: str) -> None: rst_file.write(content) +# pylint: disable=broad-exception-caught def main() -> None: """Main function that reads the YAML files and generates the RST files""" diff --git a/tools/net/ynl/ynltool/Makefile b/tools/net/ynl/ynltool/Makefile index f5b1de32daa5..48b0f32050f0 100644 --- a/tools/net/ynl/ynltool/Makefile +++ b/tools/net/ynl/ynltool/Makefile @@ -13,7 +13,7 @@ endif CFLAGS += -I../lib -I../generated -I../../../include/uapi/ SRC_VERSION := \ - $(shell make --no-print-directory -sC ../../../.. kernelversion || \ + $(shell make --no-print-directory -sC ../../../.. kernelversion 2>/dev/null || \ echo "unknown") CFLAGS += -DSRC_VERSION='"$(SRC_VERSION)"' diff --git a/tools/net/ynl/ynltool/qstats.c b/tools/net/ynl/ynltool/qstats.c index 31fb45709ffa..a6c28ba4f25c 100644 --- a/tools/net/ynl/ynltool/qstats.c +++ b/tools/net/ynl/ynltool/qstats.c @@ -237,13 +237,47 @@ static void print_plain_qstats(struct netdev_qstats_get_list *qstats) } } -static int do_show(int argc, char **argv) +static struct netdev_qstats_get_list * +qstats_dump(enum netdev_qstats_scope scope) { struct netdev_qstats_get_list *qstats; struct netdev_qstats_get_req *req; struct ynl_error yerr; struct ynl_sock *ys; - int ret = 0; + + ys = ynl_sock_create(&ynl_netdev_family, &yerr); + if (!ys) { + p_err("YNL: %s", yerr.msg); + return NULL; + } + + req = netdev_qstats_get_req_alloc(); + if (!req) { + p_err("failed to allocate qstats request"); + goto err_close; + } + + if (scope) + netdev_qstats_get_req_set_scope(req, scope); + + qstats = netdev_qstats_get_dump(ys, req); + netdev_qstats_get_req_free(req); + if (!qstats) { + p_err("failed to get queue stats: %s", ys->err.msg); + goto err_close; + } + + ynl_sock_destroy(ys); + return qstats; + +err_close: + ynl_sock_destroy(ys); + return NULL; +} + +static int do_show(int argc, char **argv) +{ + struct netdev_qstats_get_list *qstats; /* Parse options */ while (argc > 0) { @@ -268,29 +302,9 @@ static int do_show(int argc, char **argv) } } - ys = ynl_sock_create(&ynl_netdev_family, &yerr); - if (!ys) { - p_err("YNL: %s", yerr.msg); + qstats = qstats_dump(scope); + if (!qstats) return -1; - } - - req = netdev_qstats_get_req_alloc(); - if (!req) { - p_err("failed to allocate qstats request"); - ret = -1; - goto exit_close; - } - - if (scope) - netdev_qstats_get_req_set_scope(req, scope); - - qstats = netdev_qstats_get_dump(ys, req); - netdev_qstats_get_req_free(req); - if (!qstats) { - p_err("failed to get queue stats: %s", ys->err.msg); - ret = -1; - goto exit_close; - } /* Print the stats as returned by the kernel */ if (json_output) @@ -299,9 +313,7 @@ static int do_show(int argc, char **argv) print_plain_qstats(qstats); netdev_qstats_get_list_free(qstats); -exit_close: - ynl_sock_destroy(ys); - return ret; + return 0; } static void compute_stats(__u64 *values, unsigned int count, @@ -406,10 +418,7 @@ static int cmp_ifindex_type(const void *a, const void *b) static int do_balance(int argc, char **argv __attribute__((unused))) { struct netdev_qstats_get_list *qstats; - struct netdev_qstats_get_req *req; struct netdev_qstats_get_rsp **sorted; - struct ynl_error yerr; - struct ynl_sock *ys; unsigned int count = 0; unsigned int i, j; int ret = 0; @@ -419,29 +428,9 @@ static int do_balance(int argc, char **argv __attribute__((unused))) return -1; } - ys = ynl_sock_create(&ynl_netdev_family, &yerr); - if (!ys) { - p_err("YNL: %s", yerr.msg); + qstats = qstats_dump(NETDEV_QSTATS_SCOPE_QUEUE); + if (!qstats) return -1; - } - - req = netdev_qstats_get_req_alloc(); - if (!req) { - p_err("failed to allocate qstats request"); - ret = -1; - goto exit_close; - } - - /* Always use queue scope for balance analysis */ - netdev_qstats_get_req_set_scope(req, NETDEV_QSTATS_SCOPE_QUEUE); - - qstats = netdev_qstats_get_dump(ys, req); - netdev_qstats_get_req_free(req); - if (!qstats) { - p_err("failed to get queue stats: %s", ys->err.msg); - ret = -1; - goto exit_close; - } /* Count and sort queues */ ynl_dump_foreach(qstats, qs) @@ -576,11 +565,68 @@ exit_free_sorted: free(sorted); exit_free_qstats: netdev_qstats_get_list_free(qstats); -exit_close: - ynl_sock_destroy(ys); return ret; } +static int do_hw_gro(int argc, char **argv __attribute__((unused))) +{ + struct netdev_qstats_get_list *qstats; + + if (argc > 0) { + p_err("hw-gro command takes no arguments"); + return -1; + } + + qstats = qstats_dump(0); + if (!qstats) + return -1; + + if (json_output) + jsonw_start_array(json_wtr); + + ynl_dump_foreach(qstats, qs) { + char ifname[IF_NAMESIZE]; + const char *name; + double savings; + + if (!qs->_present.rx_packets || + !qs->_present.rx_hw_gro_packets || + !qs->_present.rx_hw_gro_wire_packets) + continue; + + if (!qs->rx_packets) + continue; + + /* How many skbs did we avoid allocating thanks to HW GRO */ + savings = (double)(qs->rx_hw_gro_wire_packets - + qs->rx_hw_gro_packets) / + qs->rx_packets * 100.0; + + name = if_indextoname(qs->ifindex, ifname); + + if (json_output) { + jsonw_start_object(json_wtr); + jsonw_uint_field(json_wtr, "ifindex", qs->ifindex); + if (name) + jsonw_string_field(json_wtr, "ifname", name); + jsonw_float_field(json_wtr, "savings", savings); + jsonw_end_object(json_wtr); + } else { + if (name) + printf("%s", name); + else + printf("ifindex:%u", qs->ifindex); + printf(": %.1f%% savings\n", savings); + } + } + + if (json_output) + jsonw_end_array(json_wtr); + + netdev_qstats_get_list_free(qstats); + return 0; +} + static int do_help(int argc __attribute__((unused)), char **argv __attribute__((unused))) { @@ -590,9 +636,10 @@ static int do_help(int argc __attribute__((unused)), } fprintf(stderr, - "Usage: %s qstats { COMMAND | help }\n" - " %s qstats [ show ] [ OPTIONS ]\n" - " %s qstats balance\n" + "Usage: %1$s qstats { COMMAND | help }\n" + " %1$s qstats [ show ] [ OPTIONS ]\n" + " %1$s qstats balance\n" + " %1$s qstats hw-gro\n" "\n" " OPTIONS := { scope queue | group-by { device | queue } }\n" "\n" @@ -601,9 +648,14 @@ static int do_help(int argc __attribute__((unused)), " show scope queue - Display per-queue statistics\n" " show group-by device - Display device-aggregated statistics (default)\n" " show group-by queue - Display per-queue statistics\n" - " balance - Analyze traffic distribution balance.\n" + "\n" + " Analysis:\n" + " balance - Traffic distribution between queues.\n" + " hw-gro - HW GRO effectiveness analysis\n" + " - savings - delta between packets received\n" + " on the wire and packets seen by the kernel.\n" "", - bin_name, bin_name, bin_name); + bin_name); return 0; } @@ -611,6 +663,7 @@ static int do_help(int argc __attribute__((unused)), static const struct cmd qstats_cmds[] = { { "show", do_show }, { "balance", do_balance }, + { "hw-gro", do_hw_gro }, { "help", do_help }, { 0 } }; diff --git a/tools/testing/selftests/Makefile b/tools/testing/selftests/Makefile index 56e44a98d6a5..450f13ba4cca 100644 --- a/tools/testing/selftests/Makefile +++ b/tools/testing/selftests/Makefile @@ -22,6 +22,7 @@ TARGETS += drivers/ntsync TARGETS += drivers/s390x/uvdevice TARGETS += drivers/net TARGETS += drivers/net/bonding +TARGETS += drivers/net/netconsole TARGETS += drivers/net/team TARGETS += drivers/net/virtio_net TARGETS += drivers/platform/x86/intel/ifs diff --git a/tools/testing/selftests/drivers/net/Makefile b/tools/testing/selftests/drivers/net/Makefile index f5c71d993750..8154d6d429d3 100644 --- a/tools/testing/selftests/drivers/net/Makefile +++ b/tools/testing/selftests/drivers/net/Makefile @@ -15,12 +15,6 @@ TEST_PROGS := \ hds.py \ napi_id.py \ napi_threaded.py \ - netcons_basic.sh \ - netcons_cmdline.sh \ - netcons_fragmented_msg.sh \ - netcons_overflow.sh \ - netcons_sysdata.sh \ - netcons_torture.sh \ netpoll_basic.py \ ping.py \ psp.py \ diff --git a/tools/testing/selftests/drivers/net/gro.c b/tools/testing/selftests/drivers/net/gro.c index e894037d2e3e..3c0745b68bfa 100644 --- a/tools/testing/selftests/drivers/net/gro.c +++ b/tools/testing/selftests/drivers/net/gro.c @@ -3,26 +3,45 @@ * This testsuite provides conformance testing for GRO coalescing. * * Test cases: - * 1.data + * + * data_*: * Data packets of the same size and same header setup with correct * sequence numbers coalesce. The one exception being the last data * packet coalesced: it can be smaller than the rest and coalesced * as long as it is in the same flow. - * 2.ack + * - data_same: same size packets coalesce + * - data_lrg_sml: large then small coalesces + * - data_sml_lrg: small then large doesn't coalesce + * + * ack: * Pure ACK does not coalesce. - * 3.flags - * Specific test cases: no packets with PSH, SYN, URG, RST set will - * be coalesced. - * 4.tcp + * + * flags_*: + * No packets with PSH, SYN, URG, RST, CWR set will be coalesced. + * - flags_psh, flags_syn, flags_rst, flags_urg, flags_cwr + * + * tcp_*: * Packets with incorrect checksum, non-consecutive seqno and * different TCP header options shouldn't coalesce. Nit: given that * some extension headers have paddings, such as timestamp, headers - * that are padding differently would not be coalesced. - * 5.ip: - * Packets with different (ECN, TTL, TOS) header, ip options or - * ip fragments (ipv6) shouldn't coalesce. - * 6.large: + * that are padded differently would not be coalesced. + * - tcp_csum: incorrect checksum + * - tcp_seq: non-consecutive sequence numbers + * - tcp_ts: different timestamps + * - tcp_opt: different TCP options + * + * ip_*: + * Packets with different (ECN, TTL, TOS) header, IP options or + * IP fragments shouldn't coalesce. + * - ip_ecn, ip_tos: shared between IPv4/IPv6 + * - ip_ttl, ip_opt, ip_frag4: IPv4 only + * - ip_id_df*: IPv4 IP ID field coalescing tests + * - ip_frag6, ip_v6ext_*: IPv6 only + * + * large_*: * Packets larger than GRO_MAX_SIZE packets shouldn't coalesce. + * - large_max: exceeding max size + * - large_rem: remainder handling * * MSS is defined as 4096 - header because if it is too small * (i.e. 1500 MTU - header), it will result in many packets, @@ -79,6 +98,15 @@ #define ipv6_optlen(p) (((p)->hdrlen+1) << 3) /* calculate IPv6 extension header len */ #define BUILD_BUG_ON(condition) ((void)sizeof(char[1 - 2*!!(condition)])) +enum flush_id_case { + FLUSH_ID_DF1_INC, + FLUSH_ID_DF1_FIXED, + FLUSH_ID_DF0_INC, + FLUSH_ID_DF0_FIXED, + FLUSH_ID_DF1_INC_FIXED, + FLUSH_ID_DF1_FIXED_INC, +}; + static const char *addr6_src = "fdaa::2"; static const char *addr6_dst = "fdaa::1"; static const char *addr4_src = "192.168.1.200"; @@ -95,7 +123,6 @@ static int tcp_offset = -1; static int total_hdr_len = -1; static int ethhdr_proto = -1; static bool ipip; -static const int num_flush_id_cases = 6; static void vlog(const char *fmt, ...) { @@ -127,19 +154,19 @@ static void setup_sock_filter(int fd) /* Overridden later if exthdrs are used: */ opt_ipproto_off = ipproto_off; - if (strcmp(testname, "ip") == 0) { - if (proto == PF_INET) - optlen = sizeof(struct ip_timestamp); - else { - BUILD_BUG_ON(sizeof(struct ip6_hbh) > MIN_EXTHDR_SIZE); - BUILD_BUG_ON(sizeof(struct ip6_dest) > MIN_EXTHDR_SIZE); - BUILD_BUG_ON(sizeof(struct ip6_frag) > MIN_EXTHDR_SIZE); - - /* same size for HBH and Fragment extension header types */ - optlen = MIN_EXTHDR_SIZE; - opt_ipproto_off = ETH_HLEN + sizeof(struct ipv6hdr) - + offsetof(struct ip6_ext, ip6e_nxt); - } + if (strcmp(testname, "ip_opt") == 0) { + optlen = sizeof(struct ip_timestamp); + } else if (strcmp(testname, "ip_frag6") == 0 || + strcmp(testname, "ip_v6ext_same") == 0 || + strcmp(testname, "ip_v6ext_diff") == 0) { + BUILD_BUG_ON(sizeof(struct ip6_hbh) > MIN_EXTHDR_SIZE); + BUILD_BUG_ON(sizeof(struct ip6_dest) > MIN_EXTHDR_SIZE); + BUILD_BUG_ON(sizeof(struct ip6_frag) > MIN_EXTHDR_SIZE); + + /* same size for HBH and Fragment extension header types */ + optlen = MIN_EXTHDR_SIZE; + opt_ipproto_off = ETH_HLEN + sizeof(struct ipv6hdr) + + offsetof(struct ip6_ext, ip6e_nxt); } /* this filter validates the following: @@ -333,32 +360,58 @@ static void create_packet(void *buf, int seq_offset, int ack_offset, fill_datalinklayer(buf); } -/* send one extra flag, not first and not last pkt */ -static void send_flags(int fd, struct sockaddr_ll *daddr, int psh, int syn, - int rst, int urg) +#ifndef TH_CWR +#define TH_CWR 0x80 +#endif +static void set_flags(struct tcphdr *tcph, int payload_len, int psh, int syn, + int rst, int urg, int cwr) { - static char flag_buf[MAX_HDR_LEN + PAYLOAD_LEN]; - static char buf[MAX_HDR_LEN + PAYLOAD_LEN]; - int payload_len, pkt_size, flag, i; - struct tcphdr *tcph; - - payload_len = PAYLOAD_LEN * psh; - pkt_size = total_hdr_len + payload_len; - flag = NUM_PACKETS / 2; - - create_packet(flag_buf, flag * payload_len, 0, payload_len, 0); - - tcph = (struct tcphdr *)(flag_buf + tcp_offset); tcph->psh = psh; tcph->syn = syn; tcph->rst = rst; tcph->urg = urg; + if (cwr) + tcph->th_flags |= TH_CWR; + else + tcph->th_flags &= ~TH_CWR; tcph->check = 0; tcph->check = tcp_checksum(tcph, payload_len); +} + +/* send extra flags of the (NUM_PACKETS / 2) and (NUM_PACKETS / 2 - 1) + * pkts, not first and not last pkt + */ +static void send_flags(int fd, struct sockaddr_ll *daddr, int psh, int syn, + int rst, int urg, int cwr) +{ + static char flag_buf[2][MAX_HDR_LEN + PAYLOAD_LEN]; + static char buf[MAX_HDR_LEN + PAYLOAD_LEN]; + int payload_len, pkt_size, i; + struct tcphdr *tcph; + int flag[2]; + + payload_len = PAYLOAD_LEN * (psh || cwr); + pkt_size = total_hdr_len + payload_len; + flag[0] = NUM_PACKETS / 2; + flag[1] = NUM_PACKETS / 2 - 1; + + /* Create and configure packets with flags + */ + for (i = 0; i < 2; i++) { + if (flag[i] > 0) { + create_packet(flag_buf[i], flag[i] * payload_len, 0, + payload_len, 0); + tcph = (struct tcphdr *)(flag_buf[i] + tcp_offset); + set_flags(tcph, payload_len, psh, syn, rst, urg, cwr); + } + } for (i = 0; i < NUM_PACKETS + 1; i++) { - if (i == flag) { - write_packet(fd, flag_buf, pkt_size, daddr); + if (i == flag[0]) { + write_packet(fd, flag_buf[0], pkt_size, daddr); + continue; + } else if (i == flag[1] && cwr) { + write_packet(fd, flag_buf[1], pkt_size, daddr); continue; } create_packet(buf, i * PAYLOAD_LEN, 0, PAYLOAD_LEN, 0); @@ -648,7 +701,8 @@ static void fix_ip4_checksum(struct iphdr *iph) iph->check = checksum_fold(iph, sizeof(struct iphdr), 0); } -static void send_flush_id_case(int fd, struct sockaddr_ll *daddr, int tcase) +static void send_flush_id_case(int fd, struct sockaddr_ll *daddr, + enum flush_id_case tcase) { static char buf1[MAX_HDR_LEN + PAYLOAD_LEN]; static char buf2[MAX_HDR_LEN + PAYLOAD_LEN]; @@ -667,7 +721,7 @@ static void send_flush_id_case(int fd, struct sockaddr_ll *daddr, int tcase) create_packet(buf3, PAYLOAD_LEN * 2, 0, PAYLOAD_LEN, 0); switch (tcase) { - case 0: /* DF=1, Incrementing - should coalesce */ + case FLUSH_ID_DF1_INC: /* DF=1, Incrementing - should coalesce */ iph1->frag_off |= htons(IP_DF); iph1->id = htons(8); @@ -675,7 +729,7 @@ static void send_flush_id_case(int fd, struct sockaddr_ll *daddr, int tcase) iph2->id = htons(9); break; - case 1: /* DF=1, Fixed - should coalesce */ + case FLUSH_ID_DF1_FIXED: /* DF=1, Fixed - should coalesce */ iph1->frag_off |= htons(IP_DF); iph1->id = htons(8); @@ -683,7 +737,7 @@ static void send_flush_id_case(int fd, struct sockaddr_ll *daddr, int tcase) iph2->id = htons(8); break; - case 2: /* DF=0, Incrementing - should coalesce */ + case FLUSH_ID_DF0_INC: /* DF=0, Incrementing - should coalesce */ iph1->frag_off &= ~htons(IP_DF); iph1->id = htons(8); @@ -691,7 +745,7 @@ static void send_flush_id_case(int fd, struct sockaddr_ll *daddr, int tcase) iph2->id = htons(9); break; - case 3: /* DF=0, Fixed - should coalesce */ + case FLUSH_ID_DF0_FIXED: /* DF=0, Fixed - should coalesce */ iph1->frag_off &= ~htons(IP_DF); iph1->id = htons(8); @@ -699,9 +753,10 @@ static void send_flush_id_case(int fd, struct sockaddr_ll *daddr, int tcase) iph2->id = htons(8); break; - case 4: /* DF=1, two packets incrementing, and one fixed - should - * coalesce only the first two packets - */ + case FLUSH_ID_DF1_INC_FIXED: /* DF=1, two packets incrementing, and + * one fixed - should coalesce only the + * first two packets + */ iph1->frag_off |= htons(IP_DF); iph1->id = htons(8); @@ -713,9 +768,10 @@ static void send_flush_id_case(int fd, struct sockaddr_ll *daddr, int tcase) send_three = true; break; - case 5: /* DF=1, two packets fixed, and one incrementing - should - * coalesce only the first two packets - */ + case FLUSH_ID_DF1_FIXED_INC: /* DF=1, two packets fixed, and one + * incrementing - should coalesce only + * the first two packets + */ iph1->frag_off |= htons(IP_DF); iph1->id = htons(8); @@ -739,16 +795,6 @@ static void send_flush_id_case(int fd, struct sockaddr_ll *daddr, int tcase) } } -static void test_flush_id(int fd, struct sockaddr_ll *daddr, char *fin_pkt) -{ - for (int i = 0; i < num_flush_id_cases; i++) { - sleep(1); - send_flush_id_case(fd, daddr, i); - sleep(1); - write_packet(fd, fin_pkt, total_hdr_len, daddr); - } -} - static void send_ipv6_exthdr(int fd, struct sockaddr_ll *daddr, char *ext_data1, char *ext_data2) { static char buf[MAX_HDR_LEN + PAYLOAD_LEN]; @@ -926,6 +972,28 @@ static void set_timeout(int fd) error(1, errno, "cannot set timeout, setsockopt failed"); } +static void set_rcvbuf(int fd) +{ + int bufsize = 1 * 1024 * 1024; /* 1 MB */ + + if (setsockopt(fd, SOL_SOCKET, SO_RCVBUF, &bufsize, sizeof(bufsize))) + error(1, errno, "cannot set rcvbuf size, setsockopt failed"); +} + +static void recv_error(int fd, int rcv_errno) +{ + struct tpacket_stats stats; + socklen_t len; + + len = sizeof(stats); + if (getsockopt(fd, SOL_PACKET, PACKET_STATISTICS, &stats, &len)) + error(1, errno, "can't get stats"); + + fprintf(stderr, "Socket stats: packets=%u, drops=%u\n", + stats.tp_packets, stats.tp_drops); + error(1, rcv_errno, "could not receive"); +} + static void check_recv_pkts(int fd, int *correct_payload, int correct_num_pkts) { @@ -950,7 +1018,7 @@ static void check_recv_pkts(int fd, int *correct_payload, ip_ext_len = 0; pkt_size = recv(fd, buffer, IP_MAXPACKET + ETH_HLEN + 1, 0); if (pkt_size < 0) - error(1, errno, "could not receive"); + recv_error(fd, errno); if (iph->version == 4) ip_ext_len = (iph->ihl - 5) * 4; @@ -1008,108 +1076,131 @@ static void gro_sender(void) daddr.sll_halen = ETH_ALEN; create_packet(fin_pkt, PAYLOAD_LEN * 2, 0, 0, 1); - if (strcmp(testname, "data") == 0) { + /* data sub-tests */ + if (strcmp(testname, "data_same") == 0) { send_data_pkts(txfd, &daddr, PAYLOAD_LEN, PAYLOAD_LEN); write_packet(txfd, fin_pkt, total_hdr_len, &daddr); - + } else if (strcmp(testname, "data_lrg_sml") == 0) { send_data_pkts(txfd, &daddr, PAYLOAD_LEN, PAYLOAD_LEN / 2); write_packet(txfd, fin_pkt, total_hdr_len, &daddr); - + } else if (strcmp(testname, "data_sml_lrg") == 0) { send_data_pkts(txfd, &daddr, PAYLOAD_LEN / 2, PAYLOAD_LEN); write_packet(txfd, fin_pkt, total_hdr_len, &daddr); + + /* ack test */ } else if (strcmp(testname, "ack") == 0) { send_ack(txfd, &daddr); write_packet(txfd, fin_pkt, total_hdr_len, &daddr); - } else if (strcmp(testname, "flags") == 0) { - send_flags(txfd, &daddr, 1, 0, 0, 0); - write_packet(txfd, fin_pkt, total_hdr_len, &daddr); - send_flags(txfd, &daddr, 0, 1, 0, 0); + /* flags sub-tests */ + } else if (strcmp(testname, "flags_psh") == 0) { + send_flags(txfd, &daddr, 1, 0, 0, 0, 0); write_packet(txfd, fin_pkt, total_hdr_len, &daddr); - - send_flags(txfd, &daddr, 0, 0, 1, 0); + } else if (strcmp(testname, "flags_syn") == 0) { + send_flags(txfd, &daddr, 0, 1, 0, 0, 0); write_packet(txfd, fin_pkt, total_hdr_len, &daddr); - - send_flags(txfd, &daddr, 0, 0, 0, 1); + } else if (strcmp(testname, "flags_rst") == 0) { + send_flags(txfd, &daddr, 0, 0, 1, 0, 0); + write_packet(txfd, fin_pkt, total_hdr_len, &daddr); + } else if (strcmp(testname, "flags_urg") == 0) { + send_flags(txfd, &daddr, 0, 0, 0, 1, 0); + write_packet(txfd, fin_pkt, total_hdr_len, &daddr); + } else if (strcmp(testname, "flags_cwr") == 0) { + send_flags(txfd, &daddr, 0, 0, 0, 0, 1); write_packet(txfd, fin_pkt, total_hdr_len, &daddr); - } else if (strcmp(testname, "tcp") == 0) { + + /* tcp sub-tests */ + } else if (strcmp(testname, "tcp_csum") == 0) { send_changed_checksum(txfd, &daddr); - /* Adding sleep before sending FIN so that it is not - * received prior to other packets. - */ usleep(fin_delay_us); write_packet(txfd, fin_pkt, total_hdr_len, &daddr); - + } else if (strcmp(testname, "tcp_seq") == 0) { send_changed_seq(txfd, &daddr); usleep(fin_delay_us); write_packet(txfd, fin_pkt, total_hdr_len, &daddr); - + } else if (strcmp(testname, "tcp_ts") == 0) { send_changed_ts(txfd, &daddr); usleep(fin_delay_us); write_packet(txfd, fin_pkt, total_hdr_len, &daddr); - + } else if (strcmp(testname, "tcp_opt") == 0) { send_diff_opt(txfd, &daddr); usleep(fin_delay_us); write_packet(txfd, fin_pkt, total_hdr_len, &daddr); - } else if (strcmp(testname, "ip") == 0) { + + /* ip sub-tests - shared between IPv4 and IPv6 */ + } else if (strcmp(testname, "ip_ecn") == 0) { send_changed_ECN(txfd, &daddr); write_packet(txfd, fin_pkt, total_hdr_len, &daddr); - + } else if (strcmp(testname, "ip_tos") == 0) { send_changed_tos(txfd, &daddr); write_packet(txfd, fin_pkt, total_hdr_len, &daddr); - if (proto == PF_INET) { - /* Modified packets may be received out of order. - * Sleep function added to enforce test boundaries - * so that fin pkts are not received prior to other pkts. - */ - sleep(1); - send_changed_ttl(txfd, &daddr); - write_packet(txfd, fin_pkt, total_hdr_len, &daddr); - - sleep(1); - send_ip_options(txfd, &daddr); - sleep(1); - write_packet(txfd, fin_pkt, total_hdr_len, &daddr); - - sleep(1); - send_fragment4(txfd, &daddr); - sleep(1); - write_packet(txfd, fin_pkt, total_hdr_len, &daddr); - - test_flush_id(txfd, &daddr, fin_pkt); - } else if (proto == PF_INET6) { - sleep(1); - send_fragment6(txfd, &daddr); - sleep(1); - write_packet(txfd, fin_pkt, total_hdr_len, &daddr); - - sleep(1); - /* send IPv6 packets with ext header with same payload */ - send_ipv6_exthdr(txfd, &daddr, EXT_PAYLOAD_1, EXT_PAYLOAD_1); - sleep(1); - write_packet(txfd, fin_pkt, total_hdr_len, &daddr); - - sleep(1); - /* send IPv6 packets with ext header with different payload */ - send_ipv6_exthdr(txfd, &daddr, EXT_PAYLOAD_1, EXT_PAYLOAD_2); - sleep(1); - write_packet(txfd, fin_pkt, total_hdr_len, &daddr); - } - } else if (strcmp(testname, "large") == 0) { - /* 20 is the difference between min iphdr size - * and min ipv6hdr size. Like MAX_HDR_SIZE, - * MAX_PAYLOAD is defined with the larger header of the two. - */ + + /* ip sub-tests - IPv4 only */ + } else if (strcmp(testname, "ip_ttl") == 0) { + send_changed_ttl(txfd, &daddr); + write_packet(txfd, fin_pkt, total_hdr_len, &daddr); + } else if (strcmp(testname, "ip_opt") == 0) { + send_ip_options(txfd, &daddr); + usleep(fin_delay_us); + write_packet(txfd, fin_pkt, total_hdr_len, &daddr); + } else if (strcmp(testname, "ip_frag4") == 0) { + send_fragment4(txfd, &daddr); + usleep(fin_delay_us); + write_packet(txfd, fin_pkt, total_hdr_len, &daddr); + } else if (strcmp(testname, "ip_id_df1_inc") == 0) { + send_flush_id_case(txfd, &daddr, FLUSH_ID_DF1_INC); + usleep(fin_delay_us); + write_packet(txfd, fin_pkt, total_hdr_len, &daddr); + } else if (strcmp(testname, "ip_id_df1_fixed") == 0) { + send_flush_id_case(txfd, &daddr, FLUSH_ID_DF1_FIXED); + usleep(fin_delay_us); + write_packet(txfd, fin_pkt, total_hdr_len, &daddr); + } else if (strcmp(testname, "ip_id_df0_inc") == 0) { + send_flush_id_case(txfd, &daddr, FLUSH_ID_DF0_INC); + usleep(fin_delay_us); + write_packet(txfd, fin_pkt, total_hdr_len, &daddr); + } else if (strcmp(testname, "ip_id_df0_fixed") == 0) { + send_flush_id_case(txfd, &daddr, FLUSH_ID_DF0_FIXED); + usleep(fin_delay_us); + write_packet(txfd, fin_pkt, total_hdr_len, &daddr); + } else if (strcmp(testname, "ip_id_df1_inc_fixed") == 0) { + send_flush_id_case(txfd, &daddr, FLUSH_ID_DF1_INC_FIXED); + usleep(fin_delay_us); + write_packet(txfd, fin_pkt, total_hdr_len, &daddr); + } else if (strcmp(testname, "ip_id_df1_fixed_inc") == 0) { + send_flush_id_case(txfd, &daddr, FLUSH_ID_DF1_FIXED_INC); + usleep(fin_delay_us); + write_packet(txfd, fin_pkt, total_hdr_len, &daddr); + + /* ip sub-tests - IPv6 only */ + } else if (strcmp(testname, "ip_frag6") == 0) { + send_fragment6(txfd, &daddr); + usleep(fin_delay_us); + write_packet(txfd, fin_pkt, total_hdr_len, &daddr); + } else if (strcmp(testname, "ip_v6ext_same") == 0) { + send_ipv6_exthdr(txfd, &daddr, EXT_PAYLOAD_1, EXT_PAYLOAD_1); + usleep(fin_delay_us); + write_packet(txfd, fin_pkt, total_hdr_len, &daddr); + } else if (strcmp(testname, "ip_v6ext_diff") == 0) { + send_ipv6_exthdr(txfd, &daddr, EXT_PAYLOAD_1, EXT_PAYLOAD_2); + usleep(fin_delay_us); + write_packet(txfd, fin_pkt, total_hdr_len, &daddr); + + /* large sub-tests */ + } else if (strcmp(testname, "large_max") == 0) { int offset = (proto == PF_INET && !ipip) ? 20 : 0; int remainder = (MAX_PAYLOAD + offset) % MSS; send_large(txfd, &daddr, remainder); write_packet(txfd, fin_pkt, total_hdr_len, &daddr); + } else if (strcmp(testname, "large_rem") == 0) { + int offset = (proto == PF_INET && !ipip) ? 20 : 0; + int remainder = (MAX_PAYLOAD + offset) % MSS; send_large(txfd, &daddr, remainder + 1); write_packet(txfd, fin_pkt, total_hdr_len, &daddr); } else { - error(1, 0, "Unknown testcase"); + error(1, 0, "Unknown testcase: %s", testname); } if (close(txfd)) @@ -1126,132 +1217,166 @@ static void gro_receiver(void) error(1, 0, "socket creation"); setup_sock_filter(rxfd); set_timeout(rxfd); + set_rcvbuf(rxfd); bind_packetsocket(rxfd); ksft_ready(); memset(correct_payload, 0, sizeof(correct_payload)); - if (strcmp(testname, "data") == 0) { + /* data sub-tests */ + if (strcmp(testname, "data_same") == 0) { printf("pure data packet of same size: "); correct_payload[0] = PAYLOAD_LEN * 2; check_recv_pkts(rxfd, correct_payload, 1); - + } else if (strcmp(testname, "data_lrg_sml") == 0) { printf("large data packets followed by a smaller one: "); correct_payload[0] = PAYLOAD_LEN * 1.5; check_recv_pkts(rxfd, correct_payload, 1); - + } else if (strcmp(testname, "data_sml_lrg") == 0) { printf("small data packets followed by a larger one: "); correct_payload[0] = PAYLOAD_LEN / 2; correct_payload[1] = PAYLOAD_LEN; check_recv_pkts(rxfd, correct_payload, 2); + + /* ack test */ } else if (strcmp(testname, "ack") == 0) { printf("duplicate ack and pure ack: "); check_recv_pkts(rxfd, correct_payload, 3); - } else if (strcmp(testname, "flags") == 0) { + + /* flags sub-tests */ + } else if (strcmp(testname, "flags_psh") == 0) { correct_payload[0] = PAYLOAD_LEN * 3; correct_payload[1] = PAYLOAD_LEN * 2; - printf("psh flag ends coalescing: "); check_recv_pkts(rxfd, correct_payload, 2); - + } else if (strcmp(testname, "flags_syn") == 0) { correct_payload[0] = PAYLOAD_LEN * 2; correct_payload[1] = 0; correct_payload[2] = PAYLOAD_LEN * 2; printf("syn flag ends coalescing: "); check_recv_pkts(rxfd, correct_payload, 3); - + } else if (strcmp(testname, "flags_rst") == 0) { + correct_payload[0] = PAYLOAD_LEN * 2; + correct_payload[1] = 0; + correct_payload[2] = PAYLOAD_LEN * 2; printf("rst flag ends coalescing: "); check_recv_pkts(rxfd, correct_payload, 3); - + } else if (strcmp(testname, "flags_urg") == 0) { + correct_payload[0] = PAYLOAD_LEN * 2; + correct_payload[1] = 0; + correct_payload[2] = PAYLOAD_LEN * 2; printf("urg flag ends coalescing: "); check_recv_pkts(rxfd, correct_payload, 3); - } else if (strcmp(testname, "tcp") == 0) { + } else if (strcmp(testname, "flags_cwr") == 0) { correct_payload[0] = PAYLOAD_LEN; - correct_payload[1] = PAYLOAD_LEN; - correct_payload[2] = PAYLOAD_LEN; - correct_payload[3] = PAYLOAD_LEN; + correct_payload[1] = PAYLOAD_LEN * 2; + correct_payload[2] = PAYLOAD_LEN * 2; + printf("cwr flag ends coalescing: "); + check_recv_pkts(rxfd, correct_payload, 3); + /* tcp sub-tests */ + } else if (strcmp(testname, "tcp_csum") == 0) { + correct_payload[0] = PAYLOAD_LEN; + correct_payload[1] = PAYLOAD_LEN; printf("changed checksum does not coalesce: "); check_recv_pkts(rxfd, correct_payload, 2); - + } else if (strcmp(testname, "tcp_seq") == 0) { + correct_payload[0] = PAYLOAD_LEN; + correct_payload[1] = PAYLOAD_LEN; printf("Wrong Seq number doesn't coalesce: "); check_recv_pkts(rxfd, correct_payload, 2); - - printf("Different timestamp doesn't coalesce: "); + } else if (strcmp(testname, "tcp_ts") == 0) { correct_payload[0] = PAYLOAD_LEN * 2; + correct_payload[1] = PAYLOAD_LEN; + correct_payload[2] = PAYLOAD_LEN; + correct_payload[3] = PAYLOAD_LEN; + printf("Different timestamp doesn't coalesce: "); check_recv_pkts(rxfd, correct_payload, 4); - - printf("Different options doesn't coalesce: "); + } else if (strcmp(testname, "tcp_opt") == 0) { correct_payload[0] = PAYLOAD_LEN * 2; + correct_payload[1] = PAYLOAD_LEN; + printf("Different options doesn't coalesce: "); check_recv_pkts(rxfd, correct_payload, 2); - } else if (strcmp(testname, "ip") == 0) { + + /* ip sub-tests - shared between IPv4 and IPv6 */ + } else if (strcmp(testname, "ip_ecn") == 0) { correct_payload[0] = PAYLOAD_LEN; correct_payload[1] = PAYLOAD_LEN; - printf("different ECN doesn't coalesce: "); check_recv_pkts(rxfd, correct_payload, 2); - + } else if (strcmp(testname, "ip_tos") == 0) { + correct_payload[0] = PAYLOAD_LEN; + correct_payload[1] = PAYLOAD_LEN; printf("different tos doesn't coalesce: "); check_recv_pkts(rxfd, correct_payload, 2); - if (proto == PF_INET) { - printf("different ttl doesn't coalesce: "); - check_recv_pkts(rxfd, correct_payload, 2); - - printf("ip options doesn't coalesce: "); - correct_payload[2] = PAYLOAD_LEN; - check_recv_pkts(rxfd, correct_payload, 3); - - printf("fragmented ip4 doesn't coalesce: "); - check_recv_pkts(rxfd, correct_payload, 2); - - /* is_atomic checks */ - printf("DF=1, Incrementing - should coalesce: "); - correct_payload[0] = PAYLOAD_LEN * 2; - check_recv_pkts(rxfd, correct_payload, 1); - - printf("DF=1, Fixed - should coalesce: "); - correct_payload[0] = PAYLOAD_LEN * 2; - check_recv_pkts(rxfd, correct_payload, 1); - - printf("DF=0, Incrementing - should coalesce: "); - correct_payload[0] = PAYLOAD_LEN * 2; - check_recv_pkts(rxfd, correct_payload, 1); - - printf("DF=0, Fixed - should coalesce: "); - correct_payload[0] = PAYLOAD_LEN * 2; - check_recv_pkts(rxfd, correct_payload, 1); - - printf("DF=1, 2 Incrementing and one fixed - should coalesce only first 2 packets: "); - correct_payload[0] = PAYLOAD_LEN * 2; - correct_payload[1] = PAYLOAD_LEN; - check_recv_pkts(rxfd, correct_payload, 2); - - printf("DF=1, 2 Fixed and one incrementing - should coalesce only first 2 packets: "); - correct_payload[0] = PAYLOAD_LEN * 2; - correct_payload[1] = PAYLOAD_LEN; - check_recv_pkts(rxfd, correct_payload, 2); - } else if (proto == PF_INET6) { - /* GRO doesn't check for ipv6 hop limit when flushing. - * Hence no corresponding test to the ipv4 case. - */ - printf("fragmented ip6 doesn't coalesce: "); - correct_payload[0] = PAYLOAD_LEN * 2; - correct_payload[1] = PAYLOAD_LEN; - correct_payload[2] = PAYLOAD_LEN; - check_recv_pkts(rxfd, correct_payload, 3); - - printf("ipv6 with ext header does coalesce: "); - correct_payload[0] = PAYLOAD_LEN * 2; - check_recv_pkts(rxfd, correct_payload, 1); - - printf("ipv6 with ext header with different payloads doesn't coalesce: "); - correct_payload[0] = PAYLOAD_LEN; - correct_payload[1] = PAYLOAD_LEN; - check_recv_pkts(rxfd, correct_payload, 2); - } - } else if (strcmp(testname, "large") == 0) { + /* ip sub-tests - IPv4 only */ + } else if (strcmp(testname, "ip_ttl") == 0) { + correct_payload[0] = PAYLOAD_LEN; + correct_payload[1] = PAYLOAD_LEN; + printf("different ttl doesn't coalesce: "); + check_recv_pkts(rxfd, correct_payload, 2); + } else if (strcmp(testname, "ip_opt") == 0) { + correct_payload[0] = PAYLOAD_LEN; + correct_payload[1] = PAYLOAD_LEN; + correct_payload[2] = PAYLOAD_LEN; + printf("ip options doesn't coalesce: "); + check_recv_pkts(rxfd, correct_payload, 3); + } else if (strcmp(testname, "ip_frag4") == 0) { + correct_payload[0] = PAYLOAD_LEN; + correct_payload[1] = PAYLOAD_LEN; + printf("fragmented ip4 doesn't coalesce: "); + check_recv_pkts(rxfd, correct_payload, 2); + } else if (strcmp(testname, "ip_id_df1_inc") == 0) { + printf("DF=1, Incrementing - should coalesce: "); + correct_payload[0] = PAYLOAD_LEN * 2; + check_recv_pkts(rxfd, correct_payload, 1); + } else if (strcmp(testname, "ip_id_df1_fixed") == 0) { + printf("DF=1, Fixed - should coalesce: "); + correct_payload[0] = PAYLOAD_LEN * 2; + check_recv_pkts(rxfd, correct_payload, 1); + } else if (strcmp(testname, "ip_id_df0_inc") == 0) { + printf("DF=0, Incrementing - should coalesce: "); + correct_payload[0] = PAYLOAD_LEN * 2; + check_recv_pkts(rxfd, correct_payload, 1); + } else if (strcmp(testname, "ip_id_df0_fixed") == 0) { + printf("DF=0, Fixed - should coalesce: "); + correct_payload[0] = PAYLOAD_LEN * 2; + check_recv_pkts(rxfd, correct_payload, 1); + } else if (strcmp(testname, "ip_id_df1_inc_fixed") == 0) { + printf("DF=1, 2 Incrementing and one fixed - should coalesce only first 2 packets: "); + correct_payload[0] = PAYLOAD_LEN * 2; + correct_payload[1] = PAYLOAD_LEN; + check_recv_pkts(rxfd, correct_payload, 2); + } else if (strcmp(testname, "ip_id_df1_fixed_inc") == 0) { + printf("DF=1, 2 Fixed and one incrementing - should coalesce only first 2 packets: "); + correct_payload[0] = PAYLOAD_LEN * 2; + correct_payload[1] = PAYLOAD_LEN; + check_recv_pkts(rxfd, correct_payload, 2); + + /* ip sub-tests - IPv6 only */ + } else if (strcmp(testname, "ip_frag6") == 0) { + /* GRO doesn't check for ipv6 hop limit when flushing. + * Hence no corresponding test to the ipv4 case. + */ + printf("fragmented ip6 doesn't coalesce: "); + correct_payload[0] = PAYLOAD_LEN * 2; + correct_payload[1] = PAYLOAD_LEN; + correct_payload[2] = PAYLOAD_LEN; + check_recv_pkts(rxfd, correct_payload, 3); + } else if (strcmp(testname, "ip_v6ext_same") == 0) { + printf("ipv6 with ext header does coalesce: "); + correct_payload[0] = PAYLOAD_LEN * 2; + check_recv_pkts(rxfd, correct_payload, 1); + } else if (strcmp(testname, "ip_v6ext_diff") == 0) { + printf("ipv6 with ext header with different payloads doesn't coalesce: "); + correct_payload[0] = PAYLOAD_LEN; + correct_payload[1] = PAYLOAD_LEN; + check_recv_pkts(rxfd, correct_payload, 2); + + /* large sub-tests */ + } else if (strcmp(testname, "large_max") == 0) { int offset = (proto == PF_INET && !ipip) ? 20 : 0; int remainder = (MAX_PAYLOAD + offset) % MSS; @@ -1259,14 +1384,18 @@ static void gro_receiver(void) correct_payload[1] = remainder; printf("Shouldn't coalesce if exceed IP max pkt size: "); check_recv_pkts(rxfd, correct_payload, 2); + } else if (strcmp(testname, "large_rem") == 0) { + int offset = (proto == PF_INET && !ipip) ? 20 : 0; + int remainder = (MAX_PAYLOAD + offset) % MSS; /* last segment sent individually, doesn't start new segment */ - correct_payload[0] = correct_payload[0] - remainder; + correct_payload[0] = (MAX_PAYLOAD + offset) - remainder; correct_payload[1] = remainder + 1; correct_payload[2] = remainder + 1; + printf("last segment sent individually: "); check_recv_pkts(rxfd, correct_payload, 3); } else { - error(1, 0, "Test case error, should never trigger"); + error(1, 0, "Test case error: unknown testname %s", testname); } if (close(rxfd)) diff --git a/tools/testing/selftests/drivers/net/gro.py b/tools/testing/selftests/drivers/net/gro.py index ba83713bf7b5..cbc1b19dbc91 100755 --- a/tools/testing/selftests/drivers/net/gro.py +++ b/tools/testing/selftests/drivers/net/gro.py @@ -9,18 +9,36 @@ binary in different configurations and checking for correct packet coalescing behavior. Test cases: - - data: Data packets with same size/headers and correct seq numbers coalesce + - data_same: Same size data packets coalesce + - data_lrg_sml: Large packet followed by smaller one coalesces + - data_sml_lrg: Small packet followed by larger one doesn't coalesce - ack: Pure ACK packets do not coalesce - - flags: Packets with PSH, SYN, URG, RST flags do not coalesce - - tcp: Packets with incorrect checksum, non-consecutive seqno don't coalesce - - ip: Packets with different ECN, TTL, TOS, or IP options don't coalesce - - large: Packets larger than GRO_MAX_SIZE don't coalesce + - flags_psh: Packets with PSH flag don't coalesce + - flags_syn: Packets with SYN flag don't coalesce + - flags_rst: Packets with RST flag don't coalesce + - flags_urg: Packets with URG flag don't coalesce + - flags_cwr: Packets with CWR flag don't coalesce + - tcp_csum: Packets with incorrect checksum don't coalesce + - tcp_seq: Packets with non-consecutive seqno don't coalesce + - tcp_ts: Packets with different timestamp options don't coalesce + - tcp_opt: Packets with different TCP options don't coalesce + - ip_ecn: Packets with different ECN don't coalesce + - ip_tos: Packets with different TOS don't coalesce + - ip_ttl: (IPv4) Packets with different TTL don't coalesce + - ip_opt: (IPv4) Packets with IP options don't coalesce + - ip_frag4: (IPv4) IPv4 fragments don't coalesce + - ip_id_df*: (IPv4) IP ID field coalescing tests + - ip_frag6: (IPv6) IPv6 fragments don't coalesce + - ip_v6ext_same: (IPv6) IPv6 ext header with same payload coalesces + - ip_v6ext_diff: (IPv6) IPv6 ext header with different payload doesn't coalesce + - large_max: Packets exceeding GRO_MAX_SIZE don't coalesce + - large_rem: Large packet remainder handling """ import os from lib.py import ksft_run, ksft_exit, ksft_pr from lib.py import NetDrvEpEnv, KsftXfailEx -from lib.py import cmd, defer, bkg, ip +from lib.py import bkg, cmd, defer, ethtool, ip from lib.py import ksft_variants @@ -70,49 +88,150 @@ def _set_mtu_restore(dev, mtu, host): defer(ip, f"link set dev {dev['ifname']} mtu {dev['mtu']}", host=host) -def _setup(cfg, test_name): +def _set_ethtool_feat(dev, current, feats, host=None): + s2n = {True: "on", False: "off"} + + new = ["-K", dev] + old = ["-K", dev] + no_change = True + for name, state in feats.items(): + new += [name, s2n[state]] + old += [name, s2n[current[name]["active"]]] + + if current[name]["active"] != state: + no_change = False + if current[name]["fixed"]: + raise KsftXfailEx(f"Device does not support {name}") + if no_change: + return + + eth_cmd = ethtool(" ".join(new), host=host) + defer(ethtool, " ".join(old), host=host) + + # If ethtool printed something kernel must have modified some features + if eth_cmd.stdout: + ksft_pr(eth_cmd) + + +def _setup(cfg, mode, test_name): """ Setup hardware loopback mode for GRO testing. """ if not hasattr(cfg, "bin_remote"): cfg.bin_local = cfg.test_dir / "gro" cfg.bin_remote = cfg.remote.deploy(cfg.bin_local) - # "large" test needs at least 4k MTU - if test_name == "large": + if not hasattr(cfg, "feat"): + cfg.feat = ethtool(f"-k {cfg.ifname}", json=True)[0] + cfg.remote_feat = ethtool(f"-k {cfg.remote_ifname}", + host=cfg.remote, json=True)[0] + + # "large_*" tests need at least 4k MTU + if test_name.startswith("large_"): _set_mtu_restore(cfg.dev, 4096, None) _set_mtu_restore(cfg.remote_dev, 4096, cfg.remote) - flush_path = f"/sys/class/net/{cfg.ifname}/gro_flush_timeout" - irq_path = f"/sys/class/net/{cfg.ifname}/napi_defer_hard_irqs" - - _write_defer_restore(cfg, flush_path, "200000", defer_undo=True) - _write_defer_restore(cfg, irq_path, "10", defer_undo=True) + if mode == "sw": + flush_path = f"/sys/class/net/{cfg.ifname}/gro_flush_timeout" + irq_path = f"/sys/class/net/{cfg.ifname}/napi_defer_hard_irqs" + + _write_defer_restore(cfg, flush_path, "200000", defer_undo=True) + _write_defer_restore(cfg, irq_path, "10", defer_undo=True) + + _set_ethtool_feat(cfg.ifname, cfg.feat, + {"generic-receive-offload": True, + "rx-gro-hw": False, + "large-receive-offload": False}) + elif mode == "hw": + _set_ethtool_feat(cfg.ifname, cfg.feat, + {"generic-receive-offload": False, + "rx-gro-hw": True, + "large-receive-offload": False}) + + # Some NICs treat HW GRO as a GRO sub-feature so disabling GRO + # will also clear HW GRO. Use a hack of installing XDP generic + # to skip SW GRO, even when enabled. + feat = ethtool(f"-k {cfg.ifname}", json=True)[0] + if not feat["rx-gro-hw"]["active"]: + ksft_pr("Driver clears HW GRO and SW GRO is cleared, using generic XDP workaround") + prog = cfg.net_lib_dir / "xdp_dummy.bpf.o" + ip(f"link set dev {cfg.ifname} xdpgeneric obj {prog} sec xdp") + defer(ip, f"link set dev {cfg.ifname} xdpgeneric off") + + # Attaching XDP may change features, fetch the latest state + feat = ethtool(f"-k {cfg.ifname}", json=True)[0] + + _set_ethtool_feat(cfg.ifname, feat, + {"generic-receive-offload": True, + "rx-gro-hw": True, + "large-receive-offload": False}) + elif mode == "lro": + # netdevsim advertises LRO for feature inheritance testing with + # bonding/team tests but it doesn't actually perform the offload + cfg.require_nsim(nsim_test=False) + + _set_ethtool_feat(cfg.ifname, cfg.feat, + {"generic-receive-offload": False, + "rx-gro-hw": False, + "large-receive-offload": True}) try: # Disable TSO for local tests cfg.require_nsim() # will raise KsftXfailEx if not running on nsim - cmd(f"ethtool -K {cfg.ifname} gro on tso off") - cmd(f"ethtool -K {cfg.remote_ifname} gro on tso off", host=cfg.remote) + _set_ethtool_feat(cfg.remote_ifname, cfg.remote_feat, + {"tcp-segmentation-offload": False}, + host=cfg.remote) except KsftXfailEx: pass + def _gro_variants(): """Generator that yields all combinations of protocol and test types.""" - for protocol in ["ipv4", "ipv6", "ipip"]: - for test_name in ["data", "ack", "flags", "tcp", "ip", "large"]: - yield protocol, test_name + # Tests that work for all protocols + common_tests = [ + "data_same", "data_lrg_sml", "data_sml_lrg", + "ack", + "flags_psh", "flags_syn", "flags_rst", "flags_urg", "flags_cwr", + "tcp_csum", "tcp_seq", "tcp_ts", "tcp_opt", + "ip_ecn", "ip_tos", + "large_max", "large_rem", + ] + + # Tests specific to IPv4 + ipv4_tests = [ + "ip_ttl", "ip_opt", "ip_frag4", + "ip_id_df1_inc", "ip_id_df1_fixed", + "ip_id_df0_inc", "ip_id_df0_fixed", + "ip_id_df1_inc_fixed", "ip_id_df1_fixed_inc", + ] + + # Tests specific to IPv6 + ipv6_tests = [ + "ip_frag6", "ip_v6ext_same", "ip_v6ext_diff", + ] + + for mode in ["sw", "hw", "lro"]: + for protocol in ["ipv4", "ipv6", "ipip"]: + for test_name in common_tests: + yield mode, protocol, test_name + + if protocol in ["ipv4", "ipip"]: + for test_name in ipv4_tests: + yield mode, protocol, test_name + elif protocol == "ipv6": + for test_name in ipv6_tests: + yield mode, protocol, test_name @ksft_variants(_gro_variants()) -def test(cfg, protocol, test_name): +def test(cfg, mode, protocol, test_name): """Run a single GRO test with retries.""" ipver = "6" if protocol[-1] == "6" else "4" cfg.require_ipver(ipver) - _setup(cfg, test_name) + _setup(cfg, mode, test_name) base_cmd_args = [ f"--{protocol}", @@ -142,10 +261,9 @@ def test(cfg, protocol, test_name): if rx_proc.ret == 0: return - ksft_pr(rx_proc.stdout.strip().replace('\n', '\n# ')) - ksft_pr(rx_proc.stderr.strip().replace('\n', '\n# ')) + ksft_pr(rx_proc) - if test_name == "large" and os.environ.get("KSFT_MACHINE_SLOW"): + if test_name.startswith("large_") and os.environ.get("KSFT_MACHINE_SLOW"): ksft_pr(f"Ignoring {protocol}/{test_name} failure due to slow environment") return diff --git a/tools/testing/selftests/drivers/net/hw/Makefile b/tools/testing/selftests/drivers/net/hw/Makefile index 9c163ba6feee..a64140333a46 100644 --- a/tools/testing/selftests/drivers/net/hw/Makefile +++ b/tools/testing/selftests/drivers/net/hw/Makefile @@ -35,6 +35,7 @@ TEST_PROGS = \ pp_alloc_fail.py \ rss_api.py \ rss_ctx.py \ + rss_drv.py \ rss_flow_label.py \ rss_input_xfrm.py \ toeplitz.py \ diff --git a/tools/testing/selftests/drivers/net/hw/iou-zcrx.c b/tools/testing/selftests/drivers/net/hw/iou-zcrx.c index 62456df947bc..240d13dbc54e 100644 --- a/tools/testing/selftests/drivers/net/hw/iou-zcrx.c +++ b/tools/testing/selftests/drivers/net/hw/iou-zcrx.c @@ -12,6 +12,7 @@ #include <unistd.h> #include <arpa/inet.h> +#include <linux/mman.h> #include <linux/errqueue.h> #include <linux/if_packet.h> #include <linux/ipv6.h> @@ -37,6 +38,23 @@ #include <liburing.h> +#define SKIP_CODE 42 + +struct t_io_uring_zcrx_ifq_reg { + __u32 if_idx; + __u32 if_rxq; + __u32 rq_entries; + __u32 flags; + + __u64 area_ptr; /* pointer to struct io_uring_zcrx_area_reg */ + __u64 region_ptr; /* struct io_uring_region_desc * */ + + struct io_uring_zcrx_offsets offsets; + __u32 zcrx_id; + __u32 rx_buf_len; + __u64 __resv[3]; +}; + static long page_size; #define AREA_SIZE (8192 * page_size) #define SEND_SIZE (512 * 4096) @@ -65,6 +83,8 @@ static bool cfg_oneshot; static int cfg_oneshot_recvs; static int cfg_send_size = SEND_SIZE; static struct sockaddr_in6 cfg_addr; +static unsigned int cfg_rx_buf_len; +static bool cfg_dry_run; static char *payload; static void *area_ptr; @@ -128,14 +148,28 @@ static void setup_zcrx(struct io_uring *ring) if (!ifindex) error(1, 0, "bad interface name: %s", cfg_ifname); - area_ptr = mmap(NULL, - AREA_SIZE, - PROT_READ | PROT_WRITE, - MAP_ANONYMOUS | MAP_PRIVATE, - 0, - 0); - if (area_ptr == MAP_FAILED) - error(1, 0, "mmap(): zero copy area"); + if (cfg_rx_buf_len && cfg_rx_buf_len != page_size) { + area_ptr = mmap(NULL, + AREA_SIZE, + PROT_READ | PROT_WRITE, + MAP_ANONYMOUS | MAP_PRIVATE | + MAP_HUGETLB | MAP_HUGE_2MB, + -1, + 0); + if (area_ptr == MAP_FAILED) { + printf("Can't allocate huge pages\n"); + exit(SKIP_CODE); + } + } else { + area_ptr = mmap(NULL, + AREA_SIZE, + PROT_READ | PROT_WRITE, + MAP_ANONYMOUS | MAP_PRIVATE, + 0, + 0); + if (area_ptr == MAP_FAILED) + error(1, 0, "mmap(): zero copy area"); + } ring_size = get_refill_ring_size(rq_entries); ring_ptr = mmap(NULL, @@ -157,17 +191,23 @@ static void setup_zcrx(struct io_uring *ring) .flags = 0, }; - struct io_uring_zcrx_ifq_reg reg = { + struct t_io_uring_zcrx_ifq_reg reg = { .if_idx = ifindex, .if_rxq = cfg_queue_id, .rq_entries = rq_entries, .area_ptr = (__u64)(unsigned long)&area_reg, .region_ptr = (__u64)(unsigned long)®ion_reg, + .rx_buf_len = cfg_rx_buf_len, }; - ret = io_uring_register_ifq(ring, ®); - if (ret) + ret = io_uring_register_ifq(ring, (void *)®); + if (cfg_rx_buf_len && (ret == -EINVAL || ret == -EOPNOTSUPP || + ret == -ERANGE)) { + printf("Large chunks are not supported %i\n", ret); + exit(SKIP_CODE); + } else if (ret) { error(1, 0, "io_uring_register_ifq(): %d", ret); + } rq_ring.khead = (unsigned int *)((char *)ring_ptr + reg.offsets.head); rq_ring.ktail = (unsigned int *)((char *)ring_ptr + reg.offsets.tail); @@ -323,6 +363,8 @@ static void run_server(void) io_uring_queue_init(512, &ring, flags); setup_zcrx(&ring); + if (cfg_dry_run) + return; add_accept(&ring, fd); @@ -383,7 +425,7 @@ static void parse_opts(int argc, char **argv) usage(argv[0]); cfg_payload_len = max_payload_len; - while ((c = getopt(argc, argv, "sch:p:l:i:q:o:z:")) != -1) { + while ((c = getopt(argc, argv, "sch:p:l:i:q:o:z:x:d")) != -1) { switch (c) { case 's': if (cfg_client) @@ -418,6 +460,12 @@ static void parse_opts(int argc, char **argv) case 'z': cfg_send_size = strtoul(optarg, NULL, 0); break; + case 'x': + cfg_rx_buf_len = page_size * strtoul(optarg, NULL, 0); + break; + case 'd': + cfg_dry_run = true; + break; } } diff --git a/tools/testing/selftests/drivers/net/hw/iou-zcrx.py b/tools/testing/selftests/drivers/net/hw/iou-zcrx.py index 712c806508b5..c63d6d6450d2 100755 --- a/tools/testing/selftests/drivers/net/hw/iou-zcrx.py +++ b/tools/testing/selftests/drivers/net/hw/iou-zcrx.py @@ -3,104 +3,121 @@ import re from os import path -from lib.py import ksft_run, ksft_exit, KsftSkipEx +from lib.py import ksft_run, ksft_exit, KsftSkipEx, ksft_variants, KsftNamedVariant from lib.py import NetDrvEpEnv from lib.py import bkg, cmd, defer, ethtool, rand_port, wait_port_listen +from lib.py import EthtoolFamily +SKIP_CODE = 42 -def _get_current_settings(cfg): - output = ethtool(f"-g {cfg.ifname}", json=True)[0] - return (output['rx'], output['hds-thresh']) - - -def _get_combined_channels(cfg): - output = ethtool(f"-l {cfg.ifname}").stdout - values = re.findall(r'Combined:\s+(\d+)', output) - return int(values[1]) - - -def _create_rss_ctx(cfg, chan): - output = ethtool(f"-X {cfg.ifname} context new start {chan} equal 1").stdout +def create_rss_ctx(cfg): + output = ethtool(f"-X {cfg.ifname} context new start {cfg.target} equal 1").stdout values = re.search(r'New RSS context is (\d+)', output).group(1) - ctx_id = int(values) - return (ctx_id, defer(ethtool, f"-X {cfg.ifname} delete context {ctx_id}")) + return int(values) -def _set_flow_rule(cfg, port, chan): - output = ethtool(f"-N {cfg.ifname} flow-type tcp6 dst-port {port} action {chan}").stdout +def set_flow_rule(cfg): + output = ethtool(f"-N {cfg.ifname} flow-type tcp6 dst-port {cfg.port} action {cfg.target}").stdout values = re.search(r'ID (\d+)', output).group(1) return int(values) -def _set_flow_rule_rss(cfg, port, ctx_id): - output = ethtool(f"-N {cfg.ifname} flow-type tcp6 dst-port {port} context {ctx_id}").stdout +def set_flow_rule_rss(cfg, rss_ctx_id): + output = ethtool(f"-N {cfg.ifname} flow-type tcp6 dst-port {cfg.port} context {rss_ctx_id}").stdout values = re.search(r'ID (\d+)', output).group(1) return int(values) -def test_zcrx(cfg) -> None: - cfg.require_ipver('6') - - combined_chans = _get_combined_channels(cfg) - if combined_chans < 2: - raise KsftSkipEx('at least 2 combined channels required') - (rx_ring, hds_thresh) = _get_current_settings(cfg) - port = rand_port() - - ethtool(f"-G {cfg.ifname} tcp-data-split on") - defer(ethtool, f"-G {cfg.ifname} tcp-data-split auto") +def single(cfg): + channels = cfg.ethnl.channels_get({'header': {'dev-index': cfg.ifindex}}) + channels = channels['combined-count'] + if channels < 2: + raise KsftSkipEx('Test requires NETIF with at least 2 combined channels') - ethtool(f"-G {cfg.ifname} hds-thresh 0") - defer(ethtool, f"-G {cfg.ifname} hds-thresh {hds_thresh}") + rings = cfg.ethnl.rings_get({'header': {'dev-index': cfg.ifindex}}) + rx_rings = rings['rx'] + hds_thresh = rings.get('hds-thresh', 0) - ethtool(f"-G {cfg.ifname} rx 64") - defer(ethtool, f"-G {cfg.ifname} rx {rx_ring}") + cfg.ethnl.rings_set({'header': {'dev-index': cfg.ifindex}, + 'tcp-data-split': 'enabled', + 'hds-thresh': 0, + 'rx': 64}) + defer(cfg.ethnl.rings_set, {'header': {'dev-index': cfg.ifindex}, + 'tcp-data-split': 'unknown', + 'hds-thresh': hds_thresh, + 'rx': rx_rings}) - ethtool(f"-X {cfg.ifname} equal {combined_chans - 1}") + cfg.target = channels - 1 + ethtool(f"-X {cfg.ifname} equal {cfg.target}") defer(ethtool, f"-X {cfg.ifname} default") - flow_rule_id = _set_flow_rule(cfg, port, combined_chans - 1) + flow_rule_id = set_flow_rule(cfg) defer(ethtool, f"-N {cfg.ifname} delete {flow_rule_id}") - rx_cmd = f"{cfg.bin_local} -s -p {port} -i {cfg.ifname} -q {combined_chans - 1}" - tx_cmd = f"{cfg.bin_remote} -c -h {cfg.addr_v['6']} -p {port} -l 12840" - with bkg(rx_cmd, exit_wait=True): - wait_port_listen(port, proto="tcp") - cmd(tx_cmd, host=cfg.remote) +def rss(cfg): + channels = cfg.ethnl.channels_get({'header': {'dev-index': cfg.ifindex}}) + channels = channels['combined-count'] + if channels < 2: + raise KsftSkipEx('Test requires NETIF with at least 2 combined channels') -def test_zcrx_oneshot(cfg) -> None: - cfg.require_ipver('6') + rings = cfg.ethnl.rings_get({'header': {'dev-index': cfg.ifindex}}) + rx_rings = rings['rx'] + hds_thresh = rings.get('hds-thresh', 0) - combined_chans = _get_combined_channels(cfg) - if combined_chans < 2: - raise KsftSkipEx('at least 2 combined channels required') - (rx_ring, hds_thresh) = _get_current_settings(cfg) - port = rand_port() + cfg.ethnl.rings_set({'header': {'dev-index': cfg.ifindex}, + 'tcp-data-split': 'enabled', + 'hds-thresh': 0, + 'rx': 64}) + defer(cfg.ethnl.rings_set, {'header': {'dev-index': cfg.ifindex}, + 'tcp-data-split': 'unknown', + 'hds-thresh': hds_thresh, + 'rx': rx_rings}) - ethtool(f"-G {cfg.ifname} tcp-data-split on") - defer(ethtool, f"-G {cfg.ifname} tcp-data-split auto") + cfg.target = channels - 1 + ethtool(f"-X {cfg.ifname} equal {cfg.target}") + defer(ethtool, f"-X {cfg.ifname} default") - ethtool(f"-G {cfg.ifname} hds-thresh 0") - defer(ethtool, f"-G {cfg.ifname} hds-thresh {hds_thresh}") + rss_ctx_id = create_rss_ctx(cfg) + defer(ethtool, f"-X {cfg.ifname} delete context {rss_ctx_id}") - ethtool(f"-G {cfg.ifname} rx 64") - defer(ethtool, f"-G {cfg.ifname} rx {rx_ring}") + flow_rule_id = set_flow_rule_rss(cfg, rss_ctx_id) + defer(ethtool, f"-N {cfg.ifname} delete {flow_rule_id}") - ethtool(f"-X {cfg.ifname} equal {combined_chans - 1}") - defer(ethtool, f"-X {cfg.ifname} default") - flow_rule_id = _set_flow_rule(cfg, port, combined_chans - 1) - defer(ethtool, f"-N {cfg.ifname} delete {flow_rule_id}") +@ksft_variants([ + KsftNamedVariant("single", single), + KsftNamedVariant("rss", rss), +]) +def test_zcrx(cfg, setup) -> None: + cfg.require_ipver('6') - rx_cmd = f"{cfg.bin_local} -s -p {port} -i {cfg.ifname} -q {combined_chans - 1} -o 4" - tx_cmd = f"{cfg.bin_remote} -c -h {cfg.addr_v['6']} -p {port} -l 4096 -z 16384" + setup(cfg) + rx_cmd = f"{cfg.bin_local} -s -p {cfg.port} -i {cfg.ifname} -q {cfg.target}" + tx_cmd = f"{cfg.bin_remote} -c -h {cfg.addr_v['6']} -p {cfg.port} -l 12840" with bkg(rx_cmd, exit_wait=True): - wait_port_listen(port, proto="tcp") + wait_port_listen(cfg.port, proto="tcp") cmd(tx_cmd, host=cfg.remote) -def test_zcrx_rss(cfg) -> None: +@ksft_variants([ + KsftNamedVariant("single", single), + KsftNamedVariant("rss", rss), +]) +def test_zcrx_oneshot(cfg, setup) -> None: + cfg.require_ipver('6') + + setup(cfg) + rx_cmd = f"{cfg.bin_local} -s -p {cfg.port} -i {cfg.ifname} -q {cfg.target} -o 4" + tx_cmd = f"{cfg.bin_remote} -c -h {cfg.addr_v['6']} -p {cfg.port} -l 4096 -z 16384" + with bkg(rx_cmd, exit_wait=True): + wait_port_listen(cfg.port, proto="tcp") + cmd(tx_cmd, host=cfg.remote) + + +def test_zcrx_large_chunks(cfg) -> None: + """Test zcrx with large buffer chunks.""" + cfg.require_ipver('6') combined_chans = _get_combined_channels(cfg) @@ -121,12 +138,16 @@ def test_zcrx_rss(cfg) -> None: ethtool(f"-X {cfg.ifname} equal {combined_chans - 1}") defer(ethtool, f"-X {cfg.ifname} default") - (ctx_id, delete_ctx) = _create_rss_ctx(cfg, combined_chans - 1) - flow_rule_id = _set_flow_rule_rss(cfg, port, ctx_id) + flow_rule_id = _set_flow_rule(cfg, port, combined_chans - 1) defer(ethtool, f"-N {cfg.ifname} delete {flow_rule_id}") - rx_cmd = f"{cfg.bin_local} -s -p {port} -i {cfg.ifname} -q {combined_chans - 1}" + rx_cmd = f"{cfg.bin_local} -s -p {port} -i {cfg.ifname} -q {combined_chans - 1} -x 2" tx_cmd = f"{cfg.bin_remote} -c -h {cfg.addr_v['6']} -p {port} -l 12840" + + probe = cmd(rx_cmd + " -d", fail=False) + if probe.ret == SKIP_CODE: + raise KsftSkipEx(probe.stdout) + with bkg(rx_cmd, exit_wait=True): wait_port_listen(port, proto="tcp") cmd(tx_cmd, host=cfg.remote) @@ -137,7 +158,9 @@ def main() -> None: cfg.bin_local = path.abspath(path.dirname(__file__) + "/../../../drivers/net/hw/iou-zcrx") cfg.bin_remote = cfg.remote.deploy(cfg.bin_local) - ksft_run(globs=globals(), case_pfx={"test_"}, args=(cfg, )) + cfg.ethnl = EthtoolFamily() + cfg.port = rand_port() + ksft_run(globs=globals(), cases=[test_zcrx, test_zcrx_oneshot], args=(cfg, )) ksft_exit() diff --git a/tools/testing/selftests/drivers/net/hw/ncdevmem.c b/tools/testing/selftests/drivers/net/hw/ncdevmem.c index 3288ed04ce08..16864c844108 100644 --- a/tools/testing/selftests/drivers/net/hw/ncdevmem.c +++ b/tools/testing/selftests/drivers/net/hw/ncdevmem.c @@ -48,6 +48,7 @@ #include <errno.h> #define __iovec_defined #include <fcntl.h> +#include <limits.h> #include <malloc.h> #include <error.h> #include <poll.h> diff --git a/tools/testing/selftests/drivers/net/hw/nic_timestamp.py b/tools/testing/selftests/drivers/net/hw/nic_timestamp.py index c1e943d53f19..c632b41e7a23 100755 --- a/tools/testing/selftests/drivers/net/hw/nic_timestamp.py +++ b/tools/testing/selftests/drivers/net/hw/nic_timestamp.py @@ -1,15 +1,38 @@ #!/usr/bin/env python3 # SPDX-License-Identifier: GPL-2.0 +# pylint: disable=locally-disabled, invalid-name, attribute-defined-outside-init, too-few-public-methods """ Tests related to configuration of HW timestamping """ import errno +import ctypes +import fcntl +import socket from lib.py import ksft_run, ksft_exit, ksft_ge, ksft_eq, KsftSkipEx from lib.py import NetDrvEnv, EthtoolFamily, NlError +SIOCSHWTSTAMP = 0x89b0 +SIOCGHWTSTAMP = 0x89b1 +class hwtstamp_config(ctypes.Structure): + """ Python copy of struct hwtstamp_config """ + _fields_ = [ + ("flags", ctypes.c_int), + ("tx_type", ctypes.c_int), + ("rx_filter", ctypes.c_int), + ] + + +class ifreq(ctypes.Structure): + """ Python copy of struct ifreq """ + _fields_ = [ + ("ifr_name", ctypes.c_char * 16), + ("ifr_data", ctypes.POINTER(hwtstamp_config)), + ] + + def __get_hwtimestamp_support(cfg): """ Retrieve supported configuration information """ @@ -31,8 +54,29 @@ def __get_hwtimestamp_support(cfg): return ctx +def __get_hwtimestamp_config_ioctl(cfg): + """ Retrieve current TS configuration information (via ioctl) """ + + config = hwtstamp_config() + + req = ifreq() + req.ifr_name = cfg.ifname.encode() + req.ifr_data = ctypes.pointer(config) + + try: + sock = socket.socket(socket.AF_INET, socket.SOCK_DGRAM) + fcntl.ioctl(sock.fileno(), SIOCGHWTSTAMP, req) + sock.close() + + except OSError as e: + if e.errno == errno.EOPNOTSUPP: + raise KsftSkipEx("timestamping configuration is not supported via ioctl") from e + raise + return config + + def __get_hwtimestamp_config(cfg): - """ Retrieve current TS configuration information """ + """ Retrieve current TS configuration information (via netLink) """ try: tscfg = cfg.ethnl.tsconfig_get({'header': {'dev-name': cfg.ifname}}) @@ -43,8 +87,27 @@ def __get_hwtimestamp_config(cfg): return tscfg +def __set_hwtimestamp_config_ioctl(cfg, ts): + """ Setup new TS configuration information (via ioctl) """ + config = hwtstamp_config() + config.rx_filter = ts['rx-filters']['bits']['bit'][0]['index'] + config.tx_type = ts['tx-types']['bits']['bit'][0]['index'] + req = ifreq() + req.ifr_name = cfg.ifname.encode() + req.ifr_data = ctypes.pointer(config) + try: + sock = socket.socket(socket.AF_INET, socket.SOCK_DGRAM) + fcntl.ioctl(sock.fileno(), SIOCSHWTSTAMP, req) + sock.close() + + except OSError as e: + if e.errno == errno.EOPNOTSUPP: + raise KsftSkipEx("timestamping configuration is not supported via ioctl") from e + raise + + def __set_hwtimestamp_config(cfg, ts): - """ Setup new TS configuration information """ + """ Setup new TS configuration information (via netlink) """ ts['header'] = {'dev-name': cfg.ifname} try: @@ -56,9 +119,9 @@ def __set_hwtimestamp_config(cfg, ts): return res -def test_hwtstamp_tx(cfg): +def __perform_hwtstamp_tx(cfg, is_ioctl): """ - Test TX timestamp configuration. + Test TX timestamp configuration via either netlink or ioctl. The driver should apply provided config and report back proper state. """ @@ -66,16 +129,37 @@ def test_hwtstamp_tx(cfg): ts = __get_hwtimestamp_support(cfg) tx = ts['tx'] for t in tx: + res = None tscfg = orig_tscfg tscfg['tx-types']['bits']['bit'] = [t] - res = __set_hwtimestamp_config(cfg, tscfg) + if is_ioctl: + __set_hwtimestamp_config_ioctl(cfg, tscfg) + else: + res = __set_hwtimestamp_config(cfg, tscfg) if res is None: res = __get_hwtimestamp_config(cfg) + resioctl = __get_hwtimestamp_config_ioctl(cfg) ksft_eq(res['tx-types']['bits']['bit'], [t]) + ksft_eq(resioctl.tx_type, t['index']) __set_hwtimestamp_config(cfg, orig_tscfg) +def test_hwtstamp_tx_netlink(cfg): + """ + Test TX timestamp configuration setup via netlink. + The driver should apply provided config and report back proper state. + """ + __perform_hwtstamp_tx(cfg, False) + + +def test_hwtstamp_tx_ioctl(cfg): + """ + Test TX timestamp configuration setup via ioctl. + The driver should apply provided config and report back proper state. + """ + __perform_hwtstamp_tx(cfg, True) + -def test_hwtstamp_rx(cfg): +def __perform_hwtstamp_rx(cfg, is_ioctl): """ Test RX timestamp configuration. The filter configuration is taken from the list of supported filters. @@ -87,11 +171,17 @@ def test_hwtstamp_rx(cfg): ts = __get_hwtimestamp_support(cfg) rx = ts['rx'] for r in rx: + res = None tscfg = orig_tscfg tscfg['rx-filters']['bits']['bit'] = [r] - res = __set_hwtimestamp_config(cfg, tscfg) + if is_ioctl: + __set_hwtimestamp_config_ioctl(cfg, tscfg) + else: + res = __set_hwtimestamp_config(cfg, tscfg) if res is None: res = __get_hwtimestamp_config(cfg) + resioctl = __get_hwtimestamp_config_ioctl(cfg) + ksft_eq(resioctl.rx_filter, res['rx-filters']['bits']['bit'][0]['index']) if r['index'] == 0 or r['index'] == 1: ksft_eq(res['rx-filters']['bits']['bit'][0]['index'], r['index']) else: @@ -100,12 +190,34 @@ def test_hwtstamp_rx(cfg): __set_hwtimestamp_config(cfg, orig_tscfg) +def test_hwtstamp_rx_netlink(cfg): + """ + Test RX timestamp configuration via netlink. + The filter configuration is taken from the list of supported filters. + The driver should apply the config without error and report back proper state. + Some extension of the timestamping scope is allowed for PTP filters. + """ + __perform_hwtstamp_rx(cfg, False) + + +def test_hwtstamp_rx_ioctl(cfg): + """ + Test RX timestamp configuration via ioctl. + The filter configuration is taken from the list of supported filters. + The driver should apply the config without error and report back proper state. + Some extension of the timestamping scope is allowed for PTP filters. + """ + __perform_hwtstamp_rx(cfg, True) + + def main() -> None: """ Ksft boiler plate main """ with NetDrvEnv(__file__, nsim_test=False) as cfg: cfg.ethnl = EthtoolFamily() - ksft_run([test_hwtstamp_tx, test_hwtstamp_rx], args=(cfg,)) + ksft_run([test_hwtstamp_tx_ioctl, test_hwtstamp_tx_netlink, + test_hwtstamp_rx_ioctl, test_hwtstamp_rx_netlink], + args=(cfg,)) ksft_exit() diff --git a/tools/testing/selftests/drivers/net/hw/rss_drv.py b/tools/testing/selftests/drivers/net/hw/rss_drv.py new file mode 100755 index 000000000000..2d1a33189076 --- /dev/null +++ b/tools/testing/selftests/drivers/net/hw/rss_drv.py @@ -0,0 +1,88 @@ +#!/usr/bin/env python3 +# SPDX-License-Identifier: GPL-2.0 + +""" +Driver-related behavior tests for RSS. +""" + +from lib.py import ksft_run, ksft_exit, ksft_ge +from lib.py import ksft_variants, KsftNamedVariant, KsftSkipEx +from lib.py import defer, ethtool +from lib.py import EthtoolFamily, NlError +from lib.py import NetDrvEnv + + +def _is_power_of_two(n): + return n > 0 and (n & (n - 1)) == 0 + + +def _get_rss(cfg, context=0): + return ethtool(f"-x {cfg.ifname} context {context}", json=True)[0] + + +def _test_rss_indir_size(cfg, qcnt, context=0): + """Test that indirection table size is at least 4x queue count.""" + ethtool(f"-L {cfg.ifname} combined {qcnt}") + + rss = _get_rss(cfg, context=context) + indir = rss['rss-indirection-table'] + ksft_ge(len(indir), 4 * qcnt, "Table smaller than 4x") + return len(indir) + + +def _maybe_create_context(cfg, create_context): + """ Either create a context and return its ID or return 0 for main ctx """ + if not create_context: + return 0 + try: + ctx = cfg.ethnl.rss_create_act({'header': {'dev-index': cfg.ifindex}}) + ctx_id = ctx['context'] + defer(cfg.ethnl.rss_delete_act, + {'header': {'dev-index': cfg.ifindex}, 'context': ctx_id}) + except NlError: + raise KsftSkipEx("Device does not support additional RSS contexts") + + return ctx_id + + +@ksft_variants([ + KsftNamedVariant("main", False), + KsftNamedVariant("ctx", True), +]) +def indir_size_4x(cfg, create_context): + """ + Test that the indirection table has at least 4 entries per queue. + Empirically network-heavy workloads like memcache suffer with the 33% + imbalance of a 2x indirection table size. + 4x table translates to a 16% imbalance. + """ + channels = cfg.ethnl.channels_get({'header': {'dev-index': cfg.ifindex}}) + ch_max = channels.get('combined-max', 0) + qcnt = channels['combined-count'] + + if ch_max < 3: + raise KsftSkipEx(f"Not enough queues for the test: max={ch_max}") + + defer(ethtool, f"-L {cfg.ifname} combined {qcnt}") + ethtool(f"-L {cfg.ifname} combined 3") + + ctx_id = _maybe_create_context(cfg, create_context) + + indir_sz = _test_rss_indir_size(cfg, 3, context=ctx_id) + + # Test with max queue count (max - 1 if max is a power of two) + test_max = ch_max - 1 if _is_power_of_two(ch_max) else ch_max + if test_max > 3 and indir_sz < test_max * 4: + _test_rss_indir_size(cfg, test_max, context=ctx_id) + + +def main() -> None: + """ Ksft boiler plate main """ + with NetDrvEnv(__file__) as cfg: + cfg.ethnl = EthtoolFamily() + ksft_run([indir_size_4x], args=(cfg, )) + ksft_exit() + + +if __name__ == "__main__": + main() diff --git a/tools/testing/selftests/drivers/net/hw/rss_flow_label.py b/tools/testing/selftests/drivers/net/hw/rss_flow_label.py index 6fa95fe27c47..7dc80070884a 100755 --- a/tools/testing/selftests/drivers/net/hw/rss_flow_label.py +++ b/tools/testing/selftests/drivers/net/hw/rss_flow_label.py @@ -145,9 +145,14 @@ def test_rss_flow_label_6only(cfg): # Try to enable Flow Labels and check again, in case it leaks thru initial = _ethtool_get_cfg(cfg, "udp6") - changed = initial.replace("l", "") if "l" in initial else initial + "l" - - cmd(f"ethtool -N {cfg.ifname} rx-flow-hash udp6 {changed}") + no_lbl = initial.replace("l", "") + if "l" not in initial: + try: + cmd(f"ethtool -N {cfg.ifname} rx-flow-hash udp6 l{no_lbl}") + except CmdExitFailure as exc: + raise KsftSkipEx("Device doesn't support Flow Label for UDP6") from exc + else: + cmd(f"ethtool -N {cfg.ifname} rx-flow-hash udp6 {no_lbl}") restore = defer(cmd, f"ethtool -N {cfg.ifname} rx-flow-hash udp6 {initial}") _check_v4_flow_types(cfg) diff --git a/tools/testing/selftests/drivers/net/hw/rss_input_xfrm.py b/tools/testing/selftests/drivers/net/hw/rss_input_xfrm.py index 72880e388478..503f1a2a2872 100755 --- a/tools/testing/selftests/drivers/net/hw/rss_input_xfrm.py +++ b/tools/testing/selftests/drivers/net/hw/rss_input_xfrm.py @@ -5,9 +5,9 @@ import multiprocessing import socket from lib.py import ksft_run, ksft_exit, ksft_eq, ksft_ge, cmd, fd_read_timeout from lib.py import NetDrvEpEnv -from lib.py import EthtoolFamily, NetdevFamily +from lib.py import EthtoolFamily, NetdevFamily, NlError from lib.py import KsftSkipEx, KsftFailEx -from lib.py import rand_port +from lib.py import defer, ksft_pr, rand_port def traffic(cfg, local_port, remote_port, ipver): @@ -21,6 +21,40 @@ def traffic(cfg, local_port, remote_port, ipver): return sock.getsockopt(socket.SOL_SOCKET, socket.SO_INCOMING_CPU) +def _rss_input_xfrm_try_enable(cfg): + """ + Check if symmetric input-xfrm is already enabled, if not try to enable it + and register a cleanup. + """ + rss = cfg.ethnl.rss_get({'header': {'dev-name': cfg.ifname}}) + orig_xfrm = rss.get('input-xfrm', set()) + sym_xfrm = set(filter(lambda x: 'sym' in x, orig_xfrm)) + + if sym_xfrm: + ksft_pr("Sym input xfrm already enabled:", sym_xfrm) + return sym_xfrm + + for xfrm in cfg.ethnl.consts["input-xfrm"].entries: + # Skip non-symmetric transforms + if "sym" not in xfrm: + continue + + try_xfrm = {xfrm} | orig_xfrm + try: + cfg.ethnl.rss_set({"header": {"dev-index": cfg.ifindex}, + "input-xfrm": try_xfrm}) + except NlError: + continue + + ksft_pr("Sym input xfrm configured:", try_xfrm) + defer(cfg.ethnl.rss_set, + {"header": {"dev-index": cfg.ifindex}, + "input-xfrm": orig_xfrm}) + return {xfrm} + + return set() + + def test_rss_input_xfrm(cfg, ipver): """ Test symmetric input_xfrm. @@ -37,12 +71,10 @@ def test_rss_input_xfrm(cfg, ipver): if not hasattr(socket, "SO_INCOMING_CPU"): raise KsftSkipEx("socket.SO_INCOMING_CPU was added in Python 3.11") - rss = cfg.ethnl.rss_get({'header': {'dev-name': cfg.ifname}}) - input_xfrm = set(filter(lambda x: 'sym' in x, rss.get('input-xfrm', {}))) - # Check for symmetric xor/or-xor + input_xfrm = _rss_input_xfrm_try_enable(cfg) if not input_xfrm: - raise KsftSkipEx("Symmetric RSS hash not requested") + raise KsftSkipEx("Symmetric RSS hash not supported by device") cpus = set() successful = 0 diff --git a/tools/testing/selftests/drivers/net/hw/toeplitz.c b/tools/testing/selftests/drivers/net/hw/toeplitz.c index 285bb17df9c2..035bf908d8d9 100644 --- a/tools/testing/selftests/drivers/net/hw/toeplitz.c +++ b/tools/testing/selftests/drivers/net/hw/toeplitz.c @@ -59,7 +59,7 @@ #include "../../../net/lib/ksft.h" #define TOEPLITZ_KEY_MIN_LEN 40 -#define TOEPLITZ_KEY_MAX_LEN 60 +#define TOEPLITZ_KEY_MAX_LEN 256 #define TOEPLITZ_STR_LEN(K) (((K) * 3) - 1) /* hex encoded: AA:BB:CC:...:ZZ */ #define TOEPLITZ_STR_MIN_LEN TOEPLITZ_STR_LEN(TOEPLITZ_KEY_MIN_LEN) @@ -72,6 +72,8 @@ #define RPS_MAX_CPUS 16UL /* must be a power of 2 */ +#define MIN_PKT_SAMPLES 40 /* minimum number of packets to receive */ + /* configuration options (cmdline arguments) */ static uint16_t cfg_dport = 8000; static int cfg_family = AF_INET6; @@ -251,15 +253,31 @@ static bool recv_block(struct ring_state *ring) return true; } -/* simple test: sleep once unconditionally and then process all rings */ +/* simple test: process all rings until MIN_PKT_SAMPLES packets are received, + * or the test times out. + */ static void process_rings(void) { + struct timeval start, now; + bool pkts_found = true; + long elapsed_usec; int i; - usleep(1000 * cfg_timeout_msec); + gettimeofday(&start, NULL); - for (i = 0; i < num_cpus; i++) - do {} while (recv_block(&rings[i])); + do { + if (!pkts_found) + usleep(100); + + pkts_found = false; + for (i = 0; i < num_cpus; i++) + pkts_found |= recv_block(&rings[i]); + + gettimeofday(&now, NULL); + elapsed_usec = (now.tv_sec - start.tv_sec) * 1000000 + + (now.tv_usec - start.tv_usec); + } while (frames_received - frames_nohash < MIN_PKT_SAMPLES && + elapsed_usec < cfg_timeout_msec * 1000); fprintf(stderr, "count: pass=%u nohash=%u fail=%u\n", frames_received - frames_nohash - frames_error, diff --git a/tools/testing/selftests/drivers/net/lib/py/env.py b/tools/testing/selftests/drivers/net/lib/py/env.py index 8b644fd84ff2..41cc248ac848 100644 --- a/tools/testing/selftests/drivers/net/lib/py/env.py +++ b/tools/testing/selftests/drivers/net/lib/py/env.py @@ -170,6 +170,7 @@ class NetDrvEpEnv(NetDrvEnvBase): self.remote_ifname = self.resolve_remote_ifc() self.remote_dev = ip("-d link show dev " + self.remote_ifname, host=self.remote, json=True)[0] + self.remote_ifindex = self.remote_dev['ifindex'] self._required_cmd = {} @@ -247,9 +248,12 @@ class NetDrvEpEnv(NetDrvEnvBase): if not self.addr_v[ipver] or not self.remote_addr_v[ipver]: raise KsftSkipEx(f"Test requires IPv{ipver} connectivity") - def require_nsim(self): - if self._ns is None: + def require_nsim(self, nsim_test=True): + """Require or exclude netdevsim for this test""" + if nsim_test and self._ns is None: raise KsftXfailEx("Test only works on netdevsim") + if nsim_test is False and self._ns is not None: + raise KsftXfailEx("Test does not work on netdevsim") def _require_cmd(self, comm, key, host=None): cached = self._required_cmd.get(comm, {}) diff --git a/tools/testing/selftests/drivers/net/lib/sh/lib_netcons.sh b/tools/testing/selftests/drivers/net/lib/sh/lib_netcons.sh index ae8abff4be40..b6093bcf2b06 100644 --- a/tools/testing/selftests/drivers/net/lib/sh/lib_netcons.sh +++ b/tools/testing/selftests/drivers/net/lib/sh/lib_netcons.sh @@ -203,19 +203,21 @@ function do_cleanup() { function cleanup_netcons() { # delete netconsole dynamic reconfiguration # do not fail if the target is already disabled - if [[ ! -d "${NETCONS_PATH}" ]] + local TARGET_PATH=${1:-${NETCONS_PATH}} + + if [[ ! -d "${TARGET_PATH}" ]] then # in some cases this is called before netcons path is created return fi - if [[ $(cat "${NETCONS_PATH}"/enabled) != 0 ]] + if [[ $(cat "${TARGET_PATH}"/enabled) != 0 ]] then - echo 0 > "${NETCONS_PATH}"/enabled || true + echo 0 > "${TARGET_PATH}"/enabled || true fi # Remove all the keys that got created during the selftest - find "${NETCONS_PATH}/userdata/" -mindepth 1 -type d -delete + find "${TARGET_PATH}/userdata/" -mindepth 1 -type d -delete # Remove the configfs entry - rmdir "${NETCONS_PATH}" + rmdir "${TARGET_PATH}" } function cleanup() { @@ -377,6 +379,29 @@ function check_netconsole_module() { fi } +function wait_target_state() { + local TARGET=${1} + local STATE=${2} + local TARGET_PATH="${NETCONS_CONFIGFS}"/"${TARGET}" + local ENABLED=0 + + if [ "${STATE}" == "enabled" ] + then + ENABLED=1 + fi + + if [ ! -d "$TARGET_PATH" ]; then + echo "FAIL: Target does not exist." >&2 + exit "${ksft_fail}" + fi + + local CHECK_CMD="grep \"$ENABLED\" \"$TARGET_PATH/enabled\"" + slowwait 2 sh -c "test -n \"\$($CHECK_CMD)\"" || { + echo "FAIL: ${TARGET} is not ${STATE}." >&2 + exit "${ksft_fail}" + } +} + # A wrapper to translate protocol version to udp version function wait_for_port() { local NAMESPACE=${1} diff --git a/tools/testing/selftests/drivers/net/netconsole/Makefile b/tools/testing/selftests/drivers/net/netconsole/Makefile new file mode 100644 index 000000000000..b56c70b7e274 --- /dev/null +++ b/tools/testing/selftests/drivers/net/netconsole/Makefile @@ -0,0 +1,19 @@ +# SPDX-License-Identifier: GPL-2.0 + +TEST_INCLUDES := \ + ../../../net/lib.sh \ + ../lib/sh/lib_netcons.sh \ +# end of TEST_INCLUDES + +TEST_PROGS := \ + netcons_basic.sh \ + netcons_cmdline.sh \ + netcons_fragmented_msg.sh \ + netcons_overflow.sh \ + netcons_resume.sh \ + netcons_sysdata.sh \ + netcons_torture.sh \ +# end of TEST_PROGS + +include ../../../lib.mk + diff --git a/tools/testing/selftests/drivers/net/netconsole/config b/tools/testing/selftests/drivers/net/netconsole/config new file mode 100644 index 000000000000..a3f6b0fd44ef --- /dev/null +++ b/tools/testing/selftests/drivers/net/netconsole/config @@ -0,0 +1,6 @@ +CONFIG_CONFIGFS_FS=y +CONFIG_IPV6=y +CONFIG_NETCONSOLE=m +CONFIG_NETCONSOLE_DYNAMIC=y +CONFIG_NETCONSOLE_EXTENDED_LOG=y +CONFIG_NETDEVSIM=m diff --git a/tools/testing/selftests/drivers/net/netcons_basic.sh b/tools/testing/selftests/drivers/net/netconsole/netcons_basic.sh index 2022f3061738..59cf10013ecd 100755 --- a/tools/testing/selftests/drivers/net/netcons_basic.sh +++ b/tools/testing/selftests/drivers/net/netconsole/netcons_basic.sh @@ -18,7 +18,7 @@ set -euo pipefail SCRIPTDIR=$(dirname "$(readlink -e "${BASH_SOURCE[0]}")") -source "${SCRIPTDIR}"/lib/sh/lib_netcons.sh +source "${SCRIPTDIR}"/../lib/sh/lib_netcons.sh modprobe netdevsim 2> /dev/null || true modprobe netconsole 2> /dev/null || true diff --git a/tools/testing/selftests/drivers/net/netcons_cmdline.sh b/tools/testing/selftests/drivers/net/netconsole/netcons_cmdline.sh index d1d23dc67f99..96d704b8d9d9 100755 --- a/tools/testing/selftests/drivers/net/netcons_cmdline.sh +++ b/tools/testing/selftests/drivers/net/netconsole/netcons_cmdline.sh @@ -12,7 +12,7 @@ set -euo pipefail SCRIPTDIR=$(dirname "$(readlink -e "${BASH_SOURCE[0]}")") -source "${SCRIPTDIR}"/lib/sh/lib_netcons.sh +source "${SCRIPTDIR}"/../lib/sh/lib_netcons.sh check_netconsole_module diff --git a/tools/testing/selftests/drivers/net/netcons_fragmented_msg.sh b/tools/testing/selftests/drivers/net/netconsole/netcons_fragmented_msg.sh index 4a71e01a230c..0dc7280c3080 100755 --- a/tools/testing/selftests/drivers/net/netcons_fragmented_msg.sh +++ b/tools/testing/selftests/drivers/net/netconsole/netcons_fragmented_msg.sh @@ -16,7 +16,7 @@ set -euo pipefail SCRIPTDIR=$(dirname "$(readlink -e "${BASH_SOURCE[0]}")") -source "${SCRIPTDIR}"/lib/sh/lib_netcons.sh +source "${SCRIPTDIR}"/../lib/sh/lib_netcons.sh modprobe netdevsim 2> /dev/null || true modprobe netconsole 2> /dev/null || true diff --git a/tools/testing/selftests/drivers/net/netcons_overflow.sh b/tools/testing/selftests/drivers/net/netconsole/netcons_overflow.sh index 06089643b771..a8e43d08c166 100755 --- a/tools/testing/selftests/drivers/net/netcons_overflow.sh +++ b/tools/testing/selftests/drivers/net/netconsole/netcons_overflow.sh @@ -13,7 +13,7 @@ set -euo pipefail SCRIPTDIR=$(dirname "$(readlink -e "${BASH_SOURCE[0]}")") -source "${SCRIPTDIR}"/lib/sh/lib_netcons.sh +source "${SCRIPTDIR}"/../lib/sh/lib_netcons.sh # This is coming from netconsole code. Check for it in drivers/net/netconsole.c MAX_USERDATA_ITEMS=256 diff --git a/tools/testing/selftests/drivers/net/netconsole/netcons_resume.sh b/tools/testing/selftests/drivers/net/netconsole/netcons_resume.sh new file mode 100755 index 000000000000..cb59cf436dd0 --- /dev/null +++ b/tools/testing/selftests/drivers/net/netconsole/netcons_resume.sh @@ -0,0 +1,124 @@ +#!/usr/bin/env bash +# SPDX-License-Identifier: GPL-2.0 + +# This test validates that netconsole is able to resume a target that was +# deactivated when its interface was removed when the interface is brought +# back up. +# +# The test configures a netconsole target and then removes netdevsim module to +# cause the interface to disappear. Targets are configured via cmdline to ensure +# targets bound by interface name and mac address can be resumed. +# The test verifies that the target moved to disabled state before adding +# netdevsim and the interface back. +# +# Finally, the test verifies that the target is re-enabled automatically and +# the message is received on the destination interface. +# +# Author: Andre Carvalho <asantostc@gmail.com> + +set -euo pipefail + +SCRIPTDIR=$(dirname "$(readlink -e "${BASH_SOURCE[0]}")") + +source "${SCRIPTDIR}"/../lib/sh/lib_netcons.sh + +SAVED_SRCMAC="" # to be populated later +SAVED_DSTMAC="" # to be populated later + +modprobe netdevsim 2> /dev/null || true +rmmod netconsole 2> /dev/null || true + +check_netconsole_module + +function cleanup() { + cleanup_netcons "${NETCONS_CONFIGFS}/cmdline0" + do_cleanup + rmmod netconsole +} + +function trigger_reactivation() { + # Add back low level module + modprobe netdevsim + # Recreate namespace and two interfaces + set_network + # Restore MACs + ip netns exec "${NAMESPACE}" ip link set "${DSTIF}" \ + address "${SAVED_DSTMAC}" + if [ "${BINDMODE}" == "mac" ]; then + ip link set dev "${SRCIF}" down + ip link set dev "${SRCIF}" address "${SAVED_SRCMAC}" + # Rename device in order to trigger target resume, as initial + # when device was recreated it didn't have correct mac address. + ip link set dev "${SRCIF}" name "${TARGET}" + fi +} + +function trigger_deactivation() { + # Start by storing mac addresses so we can be restored in reactivate + SAVED_DSTMAC=$(ip netns exec "${NAMESPACE}" \ + cat /sys/class/net/"$DSTIF"/address) + SAVED_SRCMAC=$(mac_get "${SRCIF}") + # Remove low level module + rmmod netdevsim +} + +trap cleanup EXIT + +# Run the test twice, with different cmdline parameters +for BINDMODE in "ifname" "mac" +do + echo "Running with bind mode: ${BINDMODE}" >&2 + # Set current loglevel to KERN_INFO(6), and default to KERN_NOTICE(5) + echo "6 5" > /proc/sys/kernel/printk + + # Create one namespace and two interfaces + set_network + + # Create the command line for netconsole, with the configuration from + # the function above + CMDLINE=$(create_cmdline_str "${BINDMODE}") + + # The content of kmsg will be save to the following file + OUTPUT_FILE="/tmp/${TARGET}-${BINDMODE}" + + # Load the module, with the cmdline set + modprobe netconsole "${CMDLINE}" + # Expose cmdline target in configfs + mkdir "${NETCONS_CONFIGFS}/cmdline0" + + # Target should be enabled + wait_target_state "cmdline0" "enabled" + + # Trigger deactivation by unloading netdevsim module. Target should be + # disabled. + trigger_deactivation + wait_target_state "cmdline0" "disabled" + + # Trigger reactivation by loading netdevsim, recreating the network and + # restoring mac addresses. Target should be re-enabled. + trigger_reactivation + wait_target_state "cmdline0" "enabled" + + # Listen for netconsole port inside the namespace and destination + # interface + listen_port_and_save_to "${OUTPUT_FILE}" & + # Wait for socat to start and listen to the port. + wait_local_port_listen "${NAMESPACE}" "${PORT}" udp + # Send the message + echo "${MSG}: ${TARGET}" > /dev/kmsg + # Wait until socat saves the file to disk + busywait "${BUSYWAIT_TIMEOUT}" test -s "${OUTPUT_FILE}" + # Make sure the message was received in the dst part + # and exit + validate_msg "${OUTPUT_FILE}" + + # kill socat in case it is still running + pkill_socat + # Cleanup & unload the module + cleanup + + echo "${BINDMODE} : Test passed" >&2 +done + +trap - EXIT +exit "${EXIT_STATUS}" diff --git a/tools/testing/selftests/drivers/net/netcons_sysdata.sh b/tools/testing/selftests/drivers/net/netconsole/netcons_sysdata.sh index baf69031089e..3fb8c4afe3d2 100755 --- a/tools/testing/selftests/drivers/net/netcons_sysdata.sh +++ b/tools/testing/selftests/drivers/net/netconsole/netcons_sysdata.sh @@ -18,7 +18,7 @@ set -euo pipefail SCRIPTDIR=$(dirname "$(readlink -e "${BASH_SOURCE[0]}")") -source "${SCRIPTDIR}"/lib/sh/lib_netcons.sh +source "${SCRIPTDIR}"/../lib/sh/lib_netcons.sh # Enable the sysdata cpu_nr feature function set_cpu_nr() { diff --git a/tools/testing/selftests/drivers/net/netcons_torture.sh b/tools/testing/selftests/drivers/net/netconsole/netcons_torture.sh index 2ce9ee3719d1..33a44adb6f8f 100755 --- a/tools/testing/selftests/drivers/net/netcons_torture.sh +++ b/tools/testing/selftests/drivers/net/netconsole/netcons_torture.sh @@ -17,7 +17,7 @@ set -euo pipefail SCRIPTDIR=$(dirname "$(readlink -e "${BASH_SOURCE[0]}")") -source "${SCRIPTDIR}"/lib/sh/lib_netcons.sh +source "${SCRIPTDIR}"/../lib/sh/lib_netcons.sh # Number of times the main loop run ITERATIONS=${1:-150} diff --git a/tools/testing/selftests/drivers/net/psp.py b/tools/testing/selftests/drivers/net/psp.py index 52523bdad240..864d9fce1094 100755 --- a/tools/testing/selftests/drivers/net/psp.py +++ b/tools/testing/selftests/drivers/net/psp.py @@ -266,6 +266,7 @@ def assoc_sk_only_mismatch(cfg): the_exception = cm.exception ksft_eq(the_exception.nl_msg.extack['bad-attr'], ".dev-id") ksft_eq(the_exception.nl_msg.error, -errno.EINVAL) + _close_conn(cfg, s) def assoc_sk_only_mismatch_tx(cfg): @@ -283,6 +284,7 @@ def assoc_sk_only_mismatch_tx(cfg): the_exception = cm.exception ksft_eq(the_exception.nl_msg.extack['bad-attr'], ".dev-id") ksft_eq(the_exception.nl_msg.error, -errno.EINVAL) + _close_conn(cfg, s) def assoc_sk_only_unconn(cfg): @@ -601,8 +603,8 @@ def main() -> None: cfg.comm_port = rand_port() srv = None try: - with bkg(responder + f" -p {cfg.comm_port}", host=cfg.remote, - exit_wait=True) as srv: + with bkg(responder + f" -p {cfg.comm_port} -i {cfg.remote_ifindex}", + host=cfg.remote, exit_wait=True) as srv: wait_port_listen(cfg.comm_port, host=cfg.remote) cfg.comm_sock = socket.create_connection((cfg.remote_addr, diff --git a/tools/testing/selftests/drivers/net/psp_responder.c b/tools/testing/selftests/drivers/net/psp_responder.c index f309e0d73cbf..a26e7628bbb1 100644 --- a/tools/testing/selftests/drivers/net/psp_responder.c +++ b/tools/testing/selftests/drivers/net/psp_responder.c @@ -22,7 +22,7 @@ static bool should_quit; struct opts { int port; - int devid; + int ifindex; bool verbose; }; @@ -360,7 +360,7 @@ static void usage(const char *name, const char *miss) if (miss) fprintf(stderr, "Missing argument: %s\n", miss); - fprintf(stderr, "Usage: %s -p port [-v] [-d psp-dev-id]\n", name); + fprintf(stderr, "Usage: %s -p port [-v] [-i ifindex]\n", name); exit(EXIT_FAILURE); } @@ -368,7 +368,7 @@ static void parse_cmd_opts(int argc, char **argv, struct opts *opts) { int opt; - while ((opt = getopt(argc, argv, "vp:d:")) != -1) { + while ((opt = getopt(argc, argv, "vp:i:")) != -1) { switch (opt) { case 'v': opts->verbose = 1; @@ -376,8 +376,8 @@ static void parse_cmd_opts(int argc, char **argv, struct opts *opts) case 'p': opts->port = atoi(optarg); break; - case 'd': - opts->devid = atoi(optarg); + case 'i': + opts->ifindex = atoi(optarg); break; default: usage(argv[0], NULL); @@ -410,12 +410,11 @@ static int psp_dev_set_ena(struct ynl_sock *ys, __u32 dev_id, __u32 versions) int main(int argc, char **argv) { struct psp_dev_get_list *dev_list; - bool devid_found = false; __u32 ver_ena, ver_cap; struct opts opts = {}; struct ynl_error yerr; struct ynl_sock *ys; - int first_id = 0; + int devid = -1; int ret; parse_cmd_opts(argc, argv, &opts); @@ -429,20 +428,19 @@ int main(int argc, char **argv) } dev_list = psp_dev_get_dump(ys); - if (ynl_dump_empty(dev_list)) { - if (ys->err.code) - goto err_close; - fprintf(stderr, "No PSP devices\n"); - goto err_close_silent; - } + if (ynl_dump_empty(dev_list) && ys->err.code) + goto err_close; ynl_dump_foreach(dev_list, d) { - if (opts.devid) { - devid_found = true; + if (opts.ifindex) { + if (d->ifindex != opts.ifindex) + continue; + devid = d->id; ver_ena = d->psp_versions_ena; ver_cap = d->psp_versions_cap; - } else if (!first_id) { - first_id = d->id; + break; + } else if (devid < 0) { + devid = d->id; ver_ena = d->psp_versions_ena; ver_cap = d->psp_versions_cap; } else { @@ -452,23 +450,21 @@ int main(int argc, char **argv) } psp_dev_get_list_free(dev_list); - if (opts.devid && !devid_found) { - fprintf(stderr, "PSP device %d requested on cmdline, not found\n", - opts.devid); - goto err_close_silent; - } else if (!opts.devid) { - opts.devid = first_id; - } + if (opts.ifindex && devid < 0) + fprintf(stderr, + "WARN: PSP device with ifindex %d requested on cmdline, not found\n", + opts.ifindex); - if (ver_ena != ver_cap) { - ret = psp_dev_set_ena(ys, opts.devid, ver_cap); + if (devid >= 0 && ver_ena != ver_cap) { + ret = psp_dev_set_ena(ys, devid, ver_cap); if (ret) goto err_close; } ret = run_responder(ys, &opts); - if (ver_ena != ver_cap && psp_dev_set_ena(ys, opts.devid, ver_ena)) + if (devid >= 0 && ver_ena != ver_cap && + psp_dev_set_ena(ys, devid, ver_ena)) fprintf(stderr, "WARN: failed to set the PSP versions back\n"); ynl_sock_destroy(ys); diff --git a/tools/testing/selftests/net/.gitignore b/tools/testing/selftests/net/.gitignore index 6930fe926c58..97ad4d551d44 100644 --- a/tools/testing/selftests/net/.gitignore +++ b/tools/testing/selftests/net/.gitignore @@ -7,6 +7,7 @@ cmsg_sender epoll_busy_poll fin_ack_lat hwtstamp_config +icmp_rfc4884 io_uring_zerocopy_tx ioam6_parser ip_defrag diff --git a/tools/testing/selftests/net/Makefile b/tools/testing/selftests/net/Makefile index 45c4ea381bc3..afdea6d95bde 100644 --- a/tools/testing/selftests/net/Makefile +++ b/tools/testing/selftests/net/Makefile @@ -22,6 +22,7 @@ TEST_PROGS := \ cmsg_so_mark.sh \ cmsg_so_priority.sh \ cmsg_time.sh \ + double_udp_encap.sh \ drop_monitor_tests.sh \ fcnal-ipv4.sh \ fcnal-ipv6.sh \ @@ -167,6 +168,7 @@ TEST_GEN_PROGS := \ bind_timewait \ bind_wildcard \ epoll_busy_poll \ + icmp_rfc4884 \ ipv6_fragmentation \ proc_net_pktgen \ reuseaddr_conflict \ @@ -181,7 +183,6 @@ TEST_GEN_PROGS := \ tap \ tcp_port_share \ tls \ - tun \ # end of TEST_GEN_PROGS TEST_FILES := \ @@ -193,7 +194,11 @@ TEST_FILES := \ # YNL files, must be before "include ..lib.mk" YNL_GEN_FILES := busy_poller -YNL_GEN_PROGS := netlink-dumps +YNL_GEN_PROGS := \ + netlink-dumps \ + tun \ +# end of YNL_GEN_PROGS + TEST_GEN_FILES += $(YNL_GEN_FILES) TEST_GEN_PROGS += $(YNL_GEN_PROGS) @@ -204,7 +209,14 @@ TEST_INCLUDES := forwarding/lib.sh include ../lib.mk # YNL build -YNL_GENS := netdev +YNL_GENS := \ + netdev \ + rt-addr \ + rt-link \ + rt-neigh \ + rt-route \ +# end of YNL_GENS + include ynl.mk $(OUTPUT)/epoll_busy_poll: LDLIBS += -lcap diff --git a/tools/testing/selftests/net/config b/tools/testing/selftests/net/config index b84362b9b508..cd49b7dfe216 100644 --- a/tools/testing/selftests/net/config +++ b/tools/testing/selftests/net/config @@ -77,6 +77,7 @@ CONFIG_NET_DROP_MONITOR=m CONFIG_NETFILTER=y CONFIG_NETFILTER_ADVANCED=y CONFIG_NETFILTER_XTABLES_LEGACY=y +CONFIG_NETFILTER_XT_MATCH_BPF=m CONFIG_NETFILTER_XT_MATCH_LENGTH=m CONFIG_NETFILTER_XT_MATCH_POLICY=m CONFIG_NETFILTER_XT_NAT=m diff --git a/tools/testing/selftests/net/double_udp_encap.sh b/tools/testing/selftests/net/double_udp_encap.sh new file mode 100755 index 000000000000..9aaf97cdf141 --- /dev/null +++ b/tools/testing/selftests/net/double_udp_encap.sh @@ -0,0 +1,393 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +source lib.sh + +# shellcheck disable=SC2155 # prefer RO variable over return value from cmd +readonly CLI="$(dirname "$(readlink -f "$0")")/../../../net/ynl/pyynl/cli.py" + +readonly SRC=1 +readonly DST=2 + +readonly NET_V4=192.168.1. +readonly NET_V6=2001:db8:: +readonly OL1_NET_V4=172.16.1. +readonly OL1_NET_V6=2001:db8:1:: +readonly OL2_NET_V4=172.16.2. +readonly OL2_NET_V6=2001:db8:2:: + +trap cleanup_all_ns EXIT + +# shellcheck disable=SC2329 # can't figure out usage trough a variable +is_ipv6() { + if [[ $1 =~ .*:.* ]]; then + return 0 + fi + return 1 +} + +# shellcheck disable=SC2329 # can't figure out usage trough a variable +create_gnv_endpoint() { + local -r netns=$1 + local -r bm_rem_addr=$2 + local -r gnv_dev=$3 + local -r gnv_id=$4 + local opts=$5 + local gnv_json + local rem + + if is_ipv6 "$bm_rem_addr"; then + rem=remote6 + else + rem=remote + fi + + # add ynl opt separator, if needed + [ -n "$opts" ] && opts=", $opts" + + gnv_json="{ \"id\": $gnv_id, \"$rem\": \"$bm_rem_addr\"$opts }" + ip netns exec "$netns" "$CLI" --family rt-link --create --excl \ + --do newlink --json "{\"ifname\": \"$gnv_dev\", + \"linkinfo\": {\"kind\":\"geneve\", + \"data\": $gnv_json } }" > /dev/null + ip -n "$netns" link set dev "$gnv_dev" up +} + +# shellcheck disable=SC2329 # can't figure out usage trough a variable +create_vxlan_endpoint() { + local -r netns=$1 + local -r bm_rem_addr=$2 + local -r vxlan_dev=$3 + local -r vxlan_id=$4 + local -r opts_str=$5 + local oldifs + local -a opts + local opt + + # convert the arguments from yaml format + oldifs=$IFS + IFS=',' + for opt in $opts_str; do + local pattern='"port":' + + [ -n "$opt" ] || continue + + opts+=("${opt/$pattern*/dstport}" "${opt/$pattern/}") + done + IFS=$oldifs + [ ${#opts[@]} -gt 0 ] || opts+=("dstport" "4789") + + ip -n "$netns" link add "$vxlan_dev" type vxlan id "$vxlan_id" \ + remote "$bm_rem_addr" "${opts[@]}" + ip -n "$netns" link set dev "$vxlan_dev" up +} + +create_ns() { + local nested_opt='"port":6082' + local create_endpoint + local options="$1" + local feature + local dev + local id + local ns + + RET=0 + + # +-------------+ +-------------+ + # | NS_SRC | | NS_NST_DST | + # | | | | + # | gnv_nst1 | | gnv_nst2 | + # | + | | + | + # | | | | | | + # | + | | + | + # | gnv1 | | gnv2 | + # | + | | + | + # | | | | | | + # | + veth1 +--------+ veth2 + | + # | | | | + # +-------------+ +-------------+ + + setup_ns NS_SRC NS_DST + + # concatenate caller provided options and default one + [ -n "$2" ] && nested_opt="$nested_opt,$2" + + ip link add name "veth$SRC" netns "$NS_SRC" type veth \ + peer name "veth$DST" netns "$NS_DST" + case "$ENCAP" in + vxlan) + create_endpoint=create_vxlan_endpoint + dev=vx + ;; + geneve) + create_endpoint=create_gnv_endpoint + dev=gnv + ;; + esac + + id=1 + for ns in "${NS_LIST[@]}"; do + ip -n "$ns" link set dev "veth$id" up + + # ensure the sender can do large write just after 3whs + ip netns exec "$ns" \ + sysctl -qw net.ipv4.tcp_wmem="4096 4194304 4194304" + + # note that 3 - $SRC == $DST and 3 - $DST == $SRC + if [ $FAMILY = "4" ]; then + ip -n "$ns" addr add dev "veth$id" "$NET_V4$id/24" + $create_endpoint "$ns" "$NET_V4$((3 - id))" \ + "$dev$id" 4 "$options" + ip -n "$ns" addr add dev "$dev$id" "$OL1_NET_V4$id/24" + + # nested tunnel devices + # pmtu can't be propagated to upper layer devices; + # need manual adjust + $create_endpoint "$ns" "$OL1_NET_V4$((3 - id))" \ + "$dev"_nst"$id" 40 "$nested_opt" + ip -n "$ns" addr add dev "$dev"_nst"$id" \ + "$OL2_NET_V4$id/24" + ip -n "$ns" link set dev "$dev"_nst"$id" mtu 1392 + else + ip -n "$ns" addr add dev "veth$id" "$NET_V6$id/64" \ + nodad + $create_endpoint "$ns" "$NET_V6$((3 - id))" \ + "$dev"6"$id" 6 "$options" + ip -n "$ns" addr add dev "$dev"6"$id" \ + "$OL1_NET_V6$id/64" nodad + + $create_endpoint "$ns" "$OL1_NET_V6$((3 - id))" \ + "$dev"6_nst"$id" 60 "$nested_opt" + ip -n "$ns" addr add dev "$dev"6_nst"$id" \ + "$OL2_NET_V6$id/64" nodad + ip -n "$ns" link set dev "$dev"6_nst"$id" mtu 1352 + fi + id=$((id+1)) + done + + # enable GRO heuristic on the veth peer and ensure UDP L4 over tunnel is + # actually segmented + for feature in tso tx-udp_tnl-segmentation; do + ip netns exec "$NS_SRC" ethtool -K "veth$SRC" \ + "$feature" off 2>/dev/null + done +} + +create_ns_gso() { + local dev + + create_ns "$@" + if [ "$ENCAP" = "geneve" ]; then + dev=gnv + else + dev=vx + fi + [ "$FAMILY" = "6" ] && dev="$dev"6 + ip netns exec "$NS_SRC" ethtool -K "$dev$SRC" \ + tx-gso-partial on \ + tx-udp_tnl-segmentation on \ + tx-udp_tnl-csum-segmentation on +} + +create_ns_gso_gro() { + create_ns_gso "$@" + ip netns exec "$NS_DST" ethtool -K "veth$DST" gro on + ip netns exec "$NS_SRC" ethtool -K "veth$SRC" tx off >/dev/null 2>&1 +} + +run_test() { + local -r dst=$NET$DST + local -r msg=$1 + local -r total_size=$2 + local -r encappkts=$3 + local inner_proto_offset=0 + local inner_maclen=14 + local rx_family="-4" + local ipt=iptables + local bpf_filter + local -a rx_args + local wire_pkts + local rcvpkts + local encl=8 + local dport + local pkts + local snd + + if [ $FAMILY = "6" ]; then + ipt=ip6tables + else + # rx program does not support '-6' and implies ipv6 usage by + # default + rx_args=("$rx_family") + fi + + # The received can only check fixed size packet + pkts=$((total_size / GSO_SIZE)) + if [ -n "$4" ]; then + wire_pkts=$4 + elif [ $((total_size % GSO_SIZE)) -eq 0 ]; then + wire_pkts=1 + rx_args+=("-l" "$GSO_SIZE") + else + wire_pkts=2 + pkts=$((pkts + 1)) + fi + + if [ "$ENCAP" = "geneve" ]; then + dport=6081 + else + dport=4789 + fi + + # Either: + # - IPv4, nested tunnel carries UDP over IPv4, with dport 6082, + # innermost is TCP over IPv4 on port 8000 + # - IPv6, nested tunnel carries UDP over IPv6, with dport 6082, + # innermost is TCP over IPv6 on port 8000 + # The nested tunnel port is 6082 and the nested encap len is 8 + # regardless of the encap type (no geneve opts). + # In inherit protocol mode there is no nested mac hdr and the nested + # l3 protocol type field belongs to the geneve hdr. + [ "$USE_HINT" = true ] && encl=16 + [ "$INHERIT" = true ] && inner_maclen=0 + [ "$INHERIT" = true ] && inner_proto_offset=-4 + local inner=$((inner_maclen+encl)) + local proto=$((inner_maclen+encl+inner_proto_offset)) + bpf_filter=$(nfbpf_compile "(ip && + ip[$((40+encl))] == 0x08 && ip[$((41+encl))] == 0x00 && + ip[$((51+encl))] == 0x11 && + ip[$((64+encl))] == 0x17 && ip[$((65+encl))] == 0xc2 && + ip[$((76+proto))] == 0x08 && ip[$((77+proto))] == 0x00 && + ip[$((87+inner))] == 0x6 && + ip[$((100+inner))] == 0x1f && ip[$((101+inner))] == 0x40) || + (ip6 && + ip6[$((60+encl))] == 0x86 && ip6[$((61+encl))] == 0xdd && + ip6[$((68+encl))] == 0x11 && + ip6[$((104+encl))] == 0x17 && ip6[$((105+encl))] == 0xc2 && + ip6[$((116+proto))] == 0x86 && ip6[$((117+proto))] == 0xdd && + ip6[$((124+inner))] == 0x6 && + ip6[$((160+inner))] == 0x1f && ip6[$((161+inner))] == 0x40)") + + # ignore shorts packet, to avoid arp/mld induced noise + ip netns exec "$NS_SRC" "$ipt" -A OUTPUT -p udp --dport "$dport" \ + -m length --length 600:65535 -m bpf --bytecode "$bpf_filter" + ip netns exec "$NS_DST" "$ipt" -A INPUT -p udp --dport "$dport" \ + -m length --length 600:65535 -m bpf --bytecode "$bpf_filter" + ip netns exec "$NS_DST" ./udpgso_bench_rx -C 2000 -t -R 100 \ + -n "$pkts" "${rx_args[@]}" & + local pid=$! + wait_local_port_listen "$NS_DST" 8000 tcp + ip netns exec "$NS_SRC" ./udpgso_bench_tx -"$FAMILY" -t -M 1 \ + -s "$total_size" -D "$dst" + local ret=$? + check_err "$ret" "client failure exit code $ret" + wait "$pid" + ret=$? + check_err "$ret" "sever failure exit code $ret" + + snd=$(ip netns exec "$NS_SRC" "$ipt"-save -c | + grep "dport $dport" | sed -e 's/\[//' -e 's/:.*//') + + [ "$snd" = "$wire_pkts" ] + # shellcheck disable=SC2319 # known false positive + check_err $? "send $snd packets on the lowest link, expected $wire_pkts" + + rcvpkts=$(ip netns exec "$NS_DST" "$ipt"-save -c | \ + grep "dport $dport" | sed -e 's/\[//' -e 's/:.*//') + + [ "$rcvpkts" = "$encappkts" ] + check_err $? "received $rcvpkts $ENCAP packets, expected $encappkts" + log_test "$msg" +} + +run_tests() { + for FAMILY in 4 6; do + NET=$OL2_NET_V4 + GSO_SIZE=1340 # 1392 - 20 - 32 + + if [ $FAMILY = 6 ]; then + NET=$OL2_NET_V6 + GSO_SIZE=1280 # 1352 - 40 - 32 + fi + + echo "IPv$FAMILY" + + unset USE_HINT + unset INHERIT + + # "geneve" must be last encap in list, so that later + # test cases will run on it + for ENCAP in "vxlan" "geneve"; do + create_ns + run_test "No GSO - $ENCAP" $((GSO_SIZE * 4)) 4 4 + cleanup_all_ns + + create_ns_gso + run_test "GSO without GRO - $ENCAP" $((GSO_SIZE * 4)) \ + 4 1 + cleanup_all_ns + + # IPv4 only test + [ $FAMILY = "4" ] || continue + create_ns_gso + ip netns exec "$NS_SRC" \ + sysctl -qw net.ipv4.ip_no_pmtu_disc=1 + run_test "GSO disable due to no fixedid - $ENCAP" \ + $((GSO_SIZE * 4)) 4 4 + cleanup_all_ns + done + + # GRO tests imply/require geneve encap, the only one providing + # GRO hints + create_ns_gso_gro + run_test "double tunnel GRO, no hints" $((GSO_SIZE * 4)) 4 + cleanup_all_ns + + # hint option is expected for all the following tests in the RX + # path + USE_HINT=true + create_ns_gso_gro \ + '"gro-hint":1,"udp-zero-csum6-tx":1,"udp-zero-csum6-rx":1' \ + '"udp-zero-csum6-tx":1,"udp-zero-csum6-rx":1' + run_test "double tunnel GRO" $((GSO_SIZE * 4)) 1 + cleanup_all_ns + + create_ns_gso_gro '"gro-hint":1,"udp-csum":1' '"udp-csum":1' + run_test "double tunnel GRO - csum complete" $((GSO_SIZE * 4))\ + 1 + cleanup_all_ns + + create_ns_gso_gro '"gro-hint":1' \ + '"udp-csum":0,"udp-zero-csum6-tx":1,"udp-zero-csum6-rx":1' + run_test "double tunnel GRO - no nested csum" \ + $((GSO_SIZE * 4)) 1 + cleanup_all_ns + + create_ns_gso_gro \ + '"gro-hint":1,"udp-zero-csum6-tx":1,"udp-zero-csum6-rx":1' \ + '"udp-csum":1' + run_test "double tunnel GRO - nested csum, outer 0-csum, skip"\ + $((GSO_SIZE * 4)) 4 + cleanup_all_ns + + INHERIT=true + create_ns_gso_gro '"gro-hint":1,"udp-csum":1' \ + '"udp-csum":1,"inner-proto-inherit":1' + run_test "double tunnel GRO - nested inherit proto" \ + $((GSO_SIZE * 4)) 1 + cleanup_all_ns + unset INHERIT + + create_ns_gso_gro '"gro-hint":1' + run_test "double tunnel GRO - short last pkt" \ + $((GSO_SIZE * 4 + GSO_SIZE / 2)) 2 + cleanup_all_ns + done +} + +require_command nfbpf_compile +require_command jq + +# tcp retransmisions will break the accounting +xfail_on_slow run_tests +exit "$EXIT_STATUS" diff --git a/tools/testing/selftests/net/fib-onlink-tests.sh b/tools/testing/selftests/net/fib-onlink-tests.sh index c01be076b210..e0d45292a298 100755 --- a/tools/testing/selftests/net/fib-onlink-tests.sh +++ b/tools/testing/selftests/net/fib-onlink-tests.sh @@ -72,7 +72,8 @@ declare -A TEST_NET4IN6IN6 TEST_NET4IN6[1]=10.1.1.254 TEST_NET4IN6[2]=10.2.1.254 -# mcast address +# mcast addresses +MCAST4=233.252.0.1 MCAST6=ff02::1 VRF=lisa @@ -260,11 +261,15 @@ valid_onlink_ipv4() run_ip 254 ${TEST_NET4[1]}.1 ${CONGW[1]} ${NETIFS[p1]} 0 "unicast connected" run_ip 254 ${TEST_NET4[1]}.2 ${RECGW4[1]} ${NETIFS[p1]} 0 "unicast recursive" + run_ip 254 ${TEST_NET4[1]}.9 ${CONGW[1]} ${NETIFS[p3]} 0 \ + "nexthop device mismatch" log_subsection "VRF ${VRF}" run_ip ${VRF_TABLE} ${TEST_NET4[2]}.1 ${CONGW[3]} ${NETIFS[p5]} 0 "unicast connected" run_ip ${VRF_TABLE} ${TEST_NET4[2]}.2 ${RECGW4[2]} ${NETIFS[p5]} 0 "unicast recursive" + run_ip ${VRF_TABLE} ${TEST_NET4[2]}.10 ${CONGW[3]} ${NETIFS[p7]} 0 \ + "nexthop device mismatch" log_subsection "VRF device, PBR table" @@ -300,17 +305,15 @@ invalid_onlink_ipv4() { run_ip 254 ${TEST_NET4[1]}.11 ${V4ADDRS[p1]} ${NETIFS[p1]} 2 \ "Invalid gw - local unicast address" + run_ip 254 ${TEST_NET4[1]}.12 ${MCAST4} ${NETIFS[p1]} 2 \ + "Invalid gw - multicast address" run_ip ${VRF_TABLE} ${TEST_NET4[2]}.11 ${V4ADDRS[p5]} ${NETIFS[p5]} 2 \ "Invalid gw - local unicast address, VRF" + run_ip ${VRF_TABLE} ${TEST_NET4[2]}.12 ${MCAST4} ${NETIFS[p5]} 2 \ + "Invalid gw - multicast address, VRF" run_ip 254 ${TEST_NET4[1]}.101 ${V4ADDRS[p1]} "" 2 "No nexthop device given" - - run_ip 254 ${TEST_NET4[1]}.102 ${V4ADDRS[p3]} ${NETIFS[p1]} 2 \ - "Gateway resolves to wrong nexthop device" - - run_ip ${VRF_TABLE} ${TEST_NET4[2]}.103 ${V4ADDRS[p7]} ${NETIFS[p5]} 2 \ - "Gateway resolves to wrong nexthop device - VRF" } ################################################################################ @@ -357,12 +360,16 @@ valid_onlink_ipv6() run_ip6 254 ${TEST_NET6[1]}::1 ${V6ADDRS[p1]/::*}::64 ${NETIFS[p1]} 0 "unicast connected" run_ip6 254 ${TEST_NET6[1]}::2 ${RECGW6[1]} ${NETIFS[p1]} 0 "unicast recursive" run_ip6 254 ${TEST_NET6[1]}::3 ::ffff:${TEST_NET4IN6[1]} ${NETIFS[p1]} 0 "v4-mapped" + run_ip6 254 ${TEST_NET6[1]}::a ${V6ADDRS[p1]/::*}::64 ${NETIFS[p3]} 0 \ + "nexthop device mismatch" log_subsection "VRF ${VRF}" run_ip6 ${VRF_TABLE} ${TEST_NET6[2]}::1 ${V6ADDRS[p5]/::*}::64 ${NETIFS[p5]} 0 "unicast connected" run_ip6 ${VRF_TABLE} ${TEST_NET6[2]}::2 ${RECGW6[2]} ${NETIFS[p5]} 0 "unicast recursive" run_ip6 ${VRF_TABLE} ${TEST_NET6[2]}::3 ::ffff:${TEST_NET4IN6[2]} ${NETIFS[p5]} 0 "v4-mapped" + run_ip6 ${VRF_TABLE} ${TEST_NET6[2]}::b ${V6ADDRS[p5]/::*}::64 \ + ${NETIFS[p7]} 0 "nexthop device mismatch" log_subsection "VRF device, PBR table" @@ -428,13 +435,6 @@ invalid_onlink_ipv6() run_ip6 254 ${TEST_NET6[1]}::101 ${V6ADDRS[p1]} "" 2 \ "No nexthop device given" - - # default VRF validation is done against LOCAL table - # run_ip6 254 ${TEST_NET6[1]}::102 ${V6ADDRS[p3]/::[0-9]/::64} ${NETIFS[p1]} 2 \ - # "Gateway resolves to wrong nexthop device" - - run_ip6 ${VRF_TABLE} ${TEST_NET6[2]}::103 ${V6ADDRS[p7]/::[0-9]/::64} ${NETIFS[p5]} 2 \ - "Gateway resolves to wrong nexthop device - VRF" } run_onlink_tests() diff --git a/tools/testing/selftests/net/forwarding/local_termination.sh b/tools/testing/selftests/net/forwarding/local_termination.sh index 892895659c7e..1f2bf6e81847 100755 --- a/tools/testing/selftests/net/forwarding/local_termination.sh +++ b/tools/testing/selftests/net/forwarding/local_termination.sh @@ -306,39 +306,39 @@ run_test() if [ $skip_ptp = false ]; then check_rcv $rcv_if_name "1588v2 over L2 transport, Sync" \ - "ethertype PTP (0x88f7).* PTPv2.* msg type : sync msg" \ + "ethertype PTP (0x88f7).* PTPv2.* msg type *: sync msg" \ true "$test_name" check_rcv $rcv_if_name "1588v2 over L2 transport, Follow-Up" \ - "ethertype PTP (0x88f7).* PTPv2.* msg type : follow up msg" \ + "ethertype PTP (0x88f7).* PTPv2.* msg type *: follow up msg" \ true "$test_name" check_rcv $rcv_if_name "1588v2 over L2 transport, Peer Delay Request" \ - "ethertype PTP (0x88f7).* PTPv2.* msg type : peer delay req msg" \ + "ethertype PTP (0x88f7).* PTPv2.* msg type *: peer delay req msg" \ true "$test_name" check_rcv $rcv_if_name "1588v2 over IPv4, Sync" \ - "ethertype IPv4 (0x0800).* PTPv2.* msg type : sync msg" \ + "ethertype IPv4 (0x0800).* PTPv2.* msg type *: sync msg" \ true "$test_name" check_rcv $rcv_if_name "1588v2 over IPv4, Follow-Up" \ - "ethertype IPv4 (0x0800).* PTPv2.* msg type : follow up msg" \ + "ethertype IPv4 (0x0800).* PTPv2.* msg type *: follow up msg" \ true "$test_name" check_rcv $rcv_if_name "1588v2 over IPv4, Peer Delay Request" \ - "ethertype IPv4 (0x0800).* PTPv2.* msg type : peer delay req msg" \ + "ethertype IPv4 (0x0800).* PTPv2.* msg type *: peer delay req msg" \ true "$test_name" check_rcv $rcv_if_name "1588v2 over IPv6, Sync" \ - "ethertype IPv6 (0x86dd).* PTPv2.* msg type : sync msg" \ + "ethertype IPv6 (0x86dd).* PTPv2.* msg type *: sync msg" \ true "$test_name" check_rcv $rcv_if_name "1588v2 over IPv6, Follow-Up" \ - "ethertype IPv6 (0x86dd).* PTPv2.* msg type : follow up msg" \ + "ethertype IPv6 (0x86dd).* PTPv2.* msg type *: follow up msg" \ true "$test_name" check_rcv $rcv_if_name "1588v2 over IPv6, Peer Delay Request" \ - "ethertype IPv6 (0x86dd).* PTPv2.* msg type : peer delay req msg" \ + "ethertype IPv6 (0x86dd).* PTPv2.* msg type *: peer delay req msg" \ true "$test_name" fi diff --git a/tools/testing/selftests/net/hsr/Makefile b/tools/testing/selftests/net/hsr/Makefile index 4b6afc0fe9f8..31fb9326cf53 100644 --- a/tools/testing/selftests/net/hsr/Makefile +++ b/tools/testing/selftests/net/hsr/Makefile @@ -5,6 +5,8 @@ top_srcdir = ../../../../.. TEST_PROGS := \ hsr_ping.sh \ hsr_redbox.sh \ + link_faults.sh \ + prp_ping.sh \ # end of TEST_PROGS TEST_FILES += hsr_common.sh diff --git a/tools/testing/selftests/net/hsr/hsr_ping.sh b/tools/testing/selftests/net/hsr/hsr_ping.sh index 5a65f4f836be..f4d685df4345 100755 --- a/tools/testing/selftests/net/hsr/hsr_ping.sh +++ b/tools/testing/selftests/net/hsr/hsr_ping.sh @@ -27,31 +27,34 @@ while getopts "$optstring" option;do esac done -do_complete_ping_test() +do_ping_tests() { - echo "INFO: Initial validation ping." - # Each node has to be able each one. - do_ping "$ns1" 100.64.0.2 - do_ping "$ns2" 100.64.0.1 - do_ping "$ns3" 100.64.0.1 - stop_if_error "Initial validation failed." - - do_ping "$ns1" 100.64.0.3 - do_ping "$ns2" 100.64.0.3 - do_ping "$ns3" 100.64.0.2 + local netid="$1" - do_ping "$ns1" dead:beef:1::2 - do_ping "$ns1" dead:beef:1::3 - do_ping "$ns2" dead:beef:1::1 - do_ping "$ns2" dead:beef:1::2 - do_ping "$ns3" dead:beef:1::1 - do_ping "$ns3" dead:beef:1::2 + echo "INFO: Running ping tests." - stop_if_error "Initial validation failed." + echo "INFO: Initial validation ping." + # Each node has to be able to reach each one. + do_ping "$ns1" "100.64.$netid.2" + do_ping "$ns1" "100.64.$netid.3" + do_ping "$ns2" "100.64.$netid.1" + do_ping "$ns2" "100.64.$netid.3" + do_ping "$ns3" "100.64.$netid.1" + do_ping "$ns3" "100.64.$netid.2" + stop_if_error "Initial validation failed on IPv4." + + do_ping "$ns1" "dead:beef:$netid::2" + do_ping "$ns1" "dead:beef:$netid::3" + do_ping "$ns2" "dead:beef:$netid::1" + do_ping "$ns2" "dead:beef:$netid::2" + do_ping "$ns3" "dead:beef:$netid::1" + do_ping "$ns3" "dead:beef:$netid::2" + stop_if_error "Initial validation failed on IPv6." # Wait until supervisor all supervision frames have been processed and the node # entries have been merged. Otherwise duplicate frames will be observed which is # valid at this stage. + echo "INFO: Wait for node table entries to be merged." WAIT=5 while [ ${WAIT} -gt 0 ] do @@ -68,62 +71,30 @@ do_complete_ping_test() sleep 1 echo "INFO: Longer ping test." - do_ping_long "$ns1" 100.64.0.2 - do_ping_long "$ns1" dead:beef:1::2 - do_ping_long "$ns1" 100.64.0.3 - do_ping_long "$ns1" dead:beef:1::3 - - stop_if_error "Longer ping test failed." - - do_ping_long "$ns2" 100.64.0.1 - do_ping_long "$ns2" dead:beef:1::1 - do_ping_long "$ns2" 100.64.0.3 - do_ping_long "$ns2" dead:beef:1::2 - stop_if_error "Longer ping test failed." - - do_ping_long "$ns3" 100.64.0.1 - do_ping_long "$ns3" dead:beef:1::1 - do_ping_long "$ns3" 100.64.0.2 - do_ping_long "$ns3" dead:beef:1::2 - stop_if_error "Longer ping test failed." - - echo "INFO: Cutting one link." - do_ping_long "$ns1" 100.64.0.3 & - - sleep 3 - ip -net "$ns3" link set ns3eth1 down - wait - - ip -net "$ns3" link set ns3eth1 up - - stop_if_error "Failed with one link down." - - echo "INFO: Delay the link and drop a few packages." - tc -net "$ns3" qdisc add dev ns3eth1 root netem delay 50ms - tc -net "$ns2" qdisc add dev ns2eth1 root netem delay 5ms loss 25% - - do_ping_long "$ns1" 100.64.0.2 - do_ping_long "$ns1" 100.64.0.3 - - stop_if_error "Failed with delay and packetloss." - - do_ping_long "$ns2" 100.64.0.1 - do_ping_long "$ns2" 100.64.0.3 - - stop_if_error "Failed with delay and packetloss." - - do_ping_long "$ns3" 100.64.0.1 - do_ping_long "$ns3" 100.64.0.2 - stop_if_error "Failed with delay and packetloss." - - echo "INFO: All good." + do_ping_long "$ns1" "100.64.$netid.2" + do_ping_long "$ns1" "dead:beef:$netid::2" + do_ping_long "$ns1" "100.64.$netid.3" + do_ping_long "$ns1" "dead:beef:$netid::3" + stop_if_error "Longer ping test failed (ns1)." + + do_ping_long "$ns2" "100.64.$netid.1" + do_ping_long "$ns2" "dead:beef:$netid::1" + do_ping_long "$ns2" "100.64.$netid.3" + do_ping_long "$ns2" "dead:beef:$netid::3" + stop_if_error "Longer ping test failed (ns2)." + + do_ping_long "$ns3" "100.64.$netid.1" + do_ping_long "$ns3" "dead:beef:$netid::1" + do_ping_long "$ns3" "100.64.$netid.2" + do_ping_long "$ns3" "dead:beef:$netid::2" + stop_if_error "Longer ping test failed (ns3)." } setup_hsr_interfaces() { local HSRv="$1" - echo "INFO: preparing interfaces for HSRv${HSRv}." + echo "INFO: Preparing interfaces for HSRv${HSRv}." # Three HSR nodes. Each node has one link to each of its neighbour, two links in total. # # ns1eth1 ----- ns2eth1 @@ -140,17 +111,20 @@ setup_hsr_interfaces() ip link add ns3eth2 netns "$ns3" type veth peer name ns2eth2 netns "$ns2" # HSRv0/1 - ip -net "$ns1" link add name hsr1 type hsr slave1 ns1eth1 slave2 ns1eth2 supervision 45 version $HSRv proto 0 - ip -net "$ns2" link add name hsr2 type hsr slave1 ns2eth1 slave2 ns2eth2 supervision 45 version $HSRv proto 0 - ip -net "$ns3" link add name hsr3 type hsr slave1 ns3eth1 slave2 ns3eth2 supervision 45 version $HSRv proto 0 + ip -net "$ns1" link add name hsr1 type hsr slave1 ns1eth1 \ + slave2 ns1eth2 supervision 45 version "$HSRv" proto 0 + ip -net "$ns2" link add name hsr2 type hsr slave1 ns2eth1 \ + slave2 ns2eth2 supervision 45 version "$HSRv" proto 0 + ip -net "$ns3" link add name hsr3 type hsr slave1 ns3eth1 \ + slave2 ns3eth2 supervision 45 version "$HSRv" proto 0 # IP for HSR ip -net "$ns1" addr add 100.64.0.1/24 dev hsr1 - ip -net "$ns1" addr add dead:beef:1::1/64 dev hsr1 nodad + ip -net "$ns1" addr add dead:beef:0::1/64 dev hsr1 nodad ip -net "$ns2" addr add 100.64.0.2/24 dev hsr2 - ip -net "$ns2" addr add dead:beef:1::2/64 dev hsr2 nodad + ip -net "$ns2" addr add dead:beef:0::2/64 dev hsr2 nodad ip -net "$ns3" addr add 100.64.0.3/24 dev hsr3 - ip -net "$ns3" addr add dead:beef:1::3/64 dev hsr3 nodad + ip -net "$ns3" addr add dead:beef:0::3/64 dev hsr3 nodad ip -net "$ns1" link set address 00:11:22:00:01:01 dev ns1eth1 ip -net "$ns1" link set address 00:11:22:00:01:02 dev ns1eth2 @@ -177,113 +151,56 @@ setup_hsr_interfaces() setup_vlan_interfaces() { ip -net "$ns1" link add link hsr1 name hsr1.2 type vlan id 2 - ip -net "$ns1" link add link hsr1 name hsr1.3 type vlan id 3 - ip -net "$ns1" link add link hsr1 name hsr1.4 type vlan id 4 - ip -net "$ns1" link add link hsr1 name hsr1.5 type vlan id 5 - ip -net "$ns2" link add link hsr2 name hsr2.2 type vlan id 2 - ip -net "$ns2" link add link hsr2 name hsr2.3 type vlan id 3 - ip -net "$ns2" link add link hsr2 name hsr2.4 type vlan id 4 - ip -net "$ns2" link add link hsr2 name hsr2.5 type vlan id 5 - ip -net "$ns3" link add link hsr3 name hsr3.2 type vlan id 2 - ip -net "$ns3" link add link hsr3 name hsr3.3 type vlan id 3 - ip -net "$ns3" link add link hsr3 name hsr3.4 type vlan id 4 - ip -net "$ns3" link add link hsr3 name hsr3.5 type vlan id 5 ip -net "$ns1" addr add 100.64.2.1/24 dev hsr1.2 - ip -net "$ns1" addr add 100.64.3.1/24 dev hsr1.3 - ip -net "$ns1" addr add 100.64.4.1/24 dev hsr1.4 - ip -net "$ns1" addr add 100.64.5.1/24 dev hsr1.5 + ip -net "$ns1" addr add dead:beef:2::1/64 dev hsr1.2 nodad ip -net "$ns2" addr add 100.64.2.2/24 dev hsr2.2 - ip -net "$ns2" addr add 100.64.3.2/24 dev hsr2.3 - ip -net "$ns2" addr add 100.64.4.2/24 dev hsr2.4 - ip -net "$ns2" addr add 100.64.5.2/24 dev hsr2.5 + ip -net "$ns2" addr add dead:beef:2::2/64 dev hsr2.2 nodad ip -net "$ns3" addr add 100.64.2.3/24 dev hsr3.2 - ip -net "$ns3" addr add 100.64.3.3/24 dev hsr3.3 - ip -net "$ns3" addr add 100.64.4.3/24 dev hsr3.4 - ip -net "$ns3" addr add 100.64.5.3/24 dev hsr3.5 + ip -net "$ns3" addr add dead:beef:2::3/64 dev hsr3.2 nodad ip -net "$ns1" link set dev hsr1.2 up - ip -net "$ns1" link set dev hsr1.3 up - ip -net "$ns1" link set dev hsr1.4 up - ip -net "$ns1" link set dev hsr1.5 up - ip -net "$ns2" link set dev hsr2.2 up - ip -net "$ns2" link set dev hsr2.3 up - ip -net "$ns2" link set dev hsr2.4 up - ip -net "$ns2" link set dev hsr2.5 up - ip -net "$ns3" link set dev hsr3.2 up - ip -net "$ns3" link set dev hsr3.3 up - ip -net "$ns3" link set dev hsr3.4 up - ip -net "$ns3" link set dev hsr3.5 up } -hsr_vlan_ping() { - do_ping "$ns1" 100.64.2.2 - do_ping "$ns1" 100.64.3.2 - do_ping "$ns1" 100.64.4.2 - do_ping "$ns1" 100.64.5.2 - - do_ping "$ns1" 100.64.2.3 - do_ping "$ns1" 100.64.3.3 - do_ping "$ns1" 100.64.4.3 - do_ping "$ns1" 100.64.5.3 - - do_ping "$ns2" 100.64.2.1 - do_ping "$ns2" 100.64.3.1 - do_ping "$ns2" 100.64.4.1 - do_ping "$ns2" 100.64.5.1 - - do_ping "$ns2" 100.64.2.3 - do_ping "$ns2" 100.64.3.3 - do_ping "$ns2" 100.64.4.3 - do_ping "$ns2" 100.64.5.3 - - do_ping "$ns3" 100.64.2.1 - do_ping "$ns3" 100.64.3.1 - do_ping "$ns3" 100.64.4.1 - do_ping "$ns3" 100.64.5.1 - - do_ping "$ns3" 100.64.2.2 - do_ping "$ns3" 100.64.3.2 - do_ping "$ns3" 100.64.4.2 - do_ping "$ns3" 100.64.5.2 +run_ping_tests() +{ + echo "INFO: Running ping tests." + do_ping_tests 0 } -run_vlan_tests() { +run_vlan_tests() +{ vlan_challenged_hsr1=$(ip net exec "$ns1" ethtool -k hsr1 | grep "vlan-challenged" | awk '{print $2}') vlan_challenged_hsr2=$(ip net exec "$ns2" ethtool -k hsr2 | grep "vlan-challenged" | awk '{print $2}') vlan_challenged_hsr3=$(ip net exec "$ns3" ethtool -k hsr3 | grep "vlan-challenged" | awk '{print $2}') if [[ "$vlan_challenged_hsr1" = "off" || "$vlan_challenged_hsr2" = "off" || "$vlan_challenged_hsr3" = "off" ]]; then - echo "INFO: Running VLAN tests" + echo "INFO: Running VLAN ping tests" setup_vlan_interfaces - hsr_vlan_ping + do_ping_tests 2 else echo "INFO: Not Running VLAN tests as the device does not support VLAN" fi } check_prerequisites -setup_ns ns1 ns2 ns3 - trap cleanup_all_ns EXIT +setup_ns ns1 ns2 ns3 setup_hsr_interfaces 0 -do_complete_ping_test - +run_ping_tests run_vlan_tests setup_ns ns1 ns2 ns3 - setup_hsr_interfaces 1 -do_complete_ping_test - +run_ping_tests run_vlan_tests exit $ret diff --git a/tools/testing/selftests/net/hsr/link_faults.sh b/tools/testing/selftests/net/hsr/link_faults.sh new file mode 100755 index 000000000000..be526281571c --- /dev/null +++ b/tools/testing/selftests/net/hsr/link_faults.sh @@ -0,0 +1,378 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +# shellcheck disable=SC2329 + +source ../lib.sh + +ALL_TESTS=" + test_clean_hsrv0 + test_cut_link_hsrv0 + test_packet_loss_hsrv0 + test_high_packet_loss_hsrv0 + test_reordering_hsrv0 + + test_clean_hsrv1 + test_cut_link_hsrv1 + test_packet_loss_hsrv1 + test_high_packet_loss_hsrv1 + test_reordering_hsrv1 + + test_clean_prp + test_cut_link_prp + test_packet_loss_prp + test_high_packet_loss_prp + test_reordering_prp +" + +# The tests are running ping for 5sec with a relatively short interval in +# different scenarios with faulty links (cut links, packet loss, delay, +# reordering) that should be recoverable by HSR/PRP. The ping interval (10ms) +# is short enough that the base delay (50ms) leads to a queue in the netem +# qdiscs which is needed for reordering. + +setup_hsr_topo() +{ + # Three HSR nodes in a ring, every node has a LAN A interface connected + # to the LAN B interface of the next node. + # + # node1 node2 + # + # vethA -------- vethB + # hsr1 hsr2 + # vethB vethA + # \ / + # vethA vethB + # hsr3 + # + # node3 + + local ver="$1" + + setup_ns node1 node2 node3 + + # veth links + # shellcheck disable=SC2154 # variables assigned by setup_ns + ip link add vethA netns "$node1" type veth peer name vethB netns "$node2" + # shellcheck disable=SC2154 # variables assigned by setup_ns + ip link add vethA netns "$node2" type veth peer name vethB netns "$node3" + ip link add vethA netns "$node3" type veth peer name vethB netns "$node1" + + # MAC addresses (not needed for HSR operation, but helps with debugging) + ip -net "$node1" link set address 00:11:22:00:01:01 dev vethA + ip -net "$node1" link set address 00:11:22:00:01:02 dev vethB + + ip -net "$node2" link set address 00:11:22:00:02:01 dev vethA + ip -net "$node2" link set address 00:11:22:00:02:02 dev vethB + + ip -net "$node3" link set address 00:11:22:00:03:01 dev vethA + ip -net "$node3" link set address 00:11:22:00:03:02 dev vethB + + # HSR interfaces + ip -net "$node1" link add name hsr1 type hsr proto 0 version "$ver" \ + slave1 vethA slave2 vethB supervision 45 + ip -net "$node2" link add name hsr2 type hsr proto 0 version "$ver" \ + slave1 vethA slave2 vethB supervision 45 + ip -net "$node3" link add name hsr3 type hsr proto 0 version "$ver" \ + slave1 vethA slave2 vethB supervision 45 + + # IP addresses + ip -net "$node1" addr add 100.64.0.1/24 dev hsr1 + ip -net "$node2" addr add 100.64.0.2/24 dev hsr2 + ip -net "$node3" addr add 100.64.0.3/24 dev hsr3 + + # Set all links up + ip -net "$node1" link set vethA up + ip -net "$node1" link set vethB up + ip -net "$node1" link set hsr1 up + + ip -net "$node2" link set vethA up + ip -net "$node2" link set vethB up + ip -net "$node2" link set hsr2 up + + ip -net "$node3" link set vethA up + ip -net "$node3" link set vethB up + ip -net "$node3" link set hsr3 up +} + +setup_prp_topo() +{ + # Two PRP nodes, connected by two links (treated as LAN A and LAN B). + # + # vethA ----- vethA + # prp1 prp2 + # vethB ----- vethB + # + # node1 node2 + + setup_ns node1 node2 + + # veth links + ip link add vethA netns "$node1" type veth peer name vethA netns "$node2" + ip link add vethB netns "$node1" type veth peer name vethB netns "$node2" + + # MAC addresses will be copied from LAN A interface + ip -net "$node1" link set address 00:11:22:00:00:01 dev vethA + ip -net "$node2" link set address 00:11:22:00:00:02 dev vethA + + # PRP interfaces + ip -net "$node1" link add name prp1 type hsr \ + slave1 vethA slave2 vethB supervision 45 proto 1 + ip -net "$node2" link add name prp2 type hsr \ + slave1 vethA slave2 vethB supervision 45 proto 1 + + # IP addresses + ip -net "$node1" addr add 100.64.0.1/24 dev prp1 + ip -net "$node2" addr add 100.64.0.2/24 dev prp2 + + # All links up + ip -net "$node1" link set vethA up + ip -net "$node1" link set vethB up + ip -net "$node1" link set prp1 up + + ip -net "$node2" link set vethA up + ip -net "$node2" link set vethB up + ip -net "$node2" link set prp2 up +} + +wait_for_hsr_node_table() +{ + log_info "Wait for node table entries to be merged." + WAIT=5 + while [ "${WAIT}" -gt 0 ]; do + nts=$(cat /sys/kernel/debug/hsr/hsr*/node_table) + + # We need entries in the node tables, and they need to be merged + if (echo "$nts" | grep -qE "^([0-9a-f]{2}:){5}") && \ + ! (echo "$nts" | grep -q "00:00:00:00:00:00"); then + return + fi + + sleep 1 + ((WAIT--)) + done + check_err 1 "Failed to wait for merged node table entries" +} + +setup_topo() +{ + local proto="$1" + + if [ "$proto" = "HSRv0" ]; then + setup_hsr_topo 0 + wait_for_hsr_node_table + elif [ "$proto" = "HSRv1" ]; then + setup_hsr_topo 1 + wait_for_hsr_node_table + elif [ "$proto" = "PRP" ]; then + setup_prp_topo + else + check_err 1 "Unknown protocol (${proto})" + fi +} + +check_ping() +{ + local node="$1" + local dst="$2" + local accepted_dups="$3" + local ping_args="-q -i 0.01 -c 400" + + log_info "Running ping $node -> $dst" + # shellcheck disable=SC2086 + output=$(ip netns exec "$node" ping $ping_args "$dst" | \ + grep "packets transmitted") + log_info "$output" + + dups=0 + loss=0 + + if [[ "$output" =~ \+([0-9]+)" duplicates" ]]; then + dups="${BASH_REMATCH[1]}" + fi + if [[ "$output" =~ ([0-9\.]+\%)" packet loss" ]]; then + loss="${BASH_REMATCH[1]}" + fi + + if [ "$dups" -gt "$accepted_dups" ]; then + check_err 1 "Unexpected duplicate packets (${dups})" + fi + if [ "$loss" != "0%" ]; then + check_err 1 "Unexpected packet loss (${loss})" + fi +} + +test_clean() +{ + local proto="$1" + + RET=0 + tname="${FUNCNAME[0]} - ${proto}" + + setup_topo "$proto" + if ((RET != ksft_pass)); then + log_test "${tname} setup" + return + fi + + check_ping "$node1" "100.64.0.2" 0 + + log_test "${tname}" +} + +test_clean_hsrv0() +{ + test_clean "HSRv0" +} + +test_clean_hsrv1() +{ + test_clean "HSRv1" +} + +test_clean_prp() +{ + test_clean "PRP" +} + +test_cut_link() +{ + local proto="$1" + + RET=0 + tname="${FUNCNAME[0]} - ${proto}" + + setup_topo "$proto" + if ((RET != ksft_pass)); then + log_test "${tname} setup" + return + fi + + # Cutting link from subshell, so check_ping can run in the normal shell + # with access to global variables from the test harness. + ( + sleep 2 + log_info "Cutting link" + ip -net "$node1" link set vethB down + ) & + check_ping "$node1" "100.64.0.2" 0 + + wait + log_test "${tname}" +} + + +test_cut_link_hsrv0() +{ + test_cut_link "HSRv0" +} + +test_cut_link_hsrv1() +{ + test_cut_link "HSRv1" +} + +test_cut_link_prp() +{ + test_cut_link "PRP" +} + +test_packet_loss() +{ + local proto="$1" + local loss="$2" + + RET=0 + tname="${FUNCNAME[0]} - ${proto}, ${loss}" + + setup_topo "$proto" + if ((RET != ksft_pass)); then + log_test "${tname} setup" + return + fi + + # Packet loss with lower delay makes sure the packets on the lossy link + # arrive first. + tc -net "$node1" qdisc add dev vethA root netem delay 50ms + tc -net "$node1" qdisc add dev vethB root netem delay 20ms loss "$loss" + + check_ping "$node1" "100.64.0.2" 40 + + log_test "${tname}" +} + +test_packet_loss_hsrv0() +{ + test_packet_loss "HSRv0" "20%" +} + +test_packet_loss_hsrv1() +{ + test_packet_loss "HSRv1" "20%" +} + +test_packet_loss_prp() +{ + test_packet_loss "PRP" "20%" +} + +test_high_packet_loss_hsrv0() +{ + test_packet_loss "HSRv0" "80%" +} + +test_high_packet_loss_hsrv1() +{ + test_packet_loss "HSRv1" "80%" +} + +test_high_packet_loss_prp() +{ + test_packet_loss "PRP" "80%" +} + +test_reordering() +{ + local proto="$1" + + RET=0 + tname="${FUNCNAME[0]} - ${proto}" + + setup_topo "$proto" + if ((RET != ksft_pass)); then + log_test "${tname} setup" + return + fi + + tc -net "$node1" qdisc add dev vethA root netem delay 50ms + tc -net "$node1" qdisc add dev vethB root netem delay 50ms reorder 20% + + check_ping "$node1" "100.64.0.2" 40 + + log_test "${tname}" +} + +test_reordering_hsrv0() +{ + test_reordering "HSRv0" +} + +test_reordering_hsrv1() +{ + test_reordering "HSRv1" +} + +test_reordering_prp() +{ + test_reordering "PRP" +} + +cleanup() +{ + cleanup_all_ns +} + +trap cleanup EXIT + +tests_run + +exit $EXIT_STATUS diff --git a/tools/testing/selftests/net/hsr/prp_ping.sh b/tools/testing/selftests/net/hsr/prp_ping.sh new file mode 100755 index 000000000000..fd2ba9f05d4c --- /dev/null +++ b/tools/testing/selftests/net/hsr/prp_ping.sh @@ -0,0 +1,147 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +ipv6=true + +source ./hsr_common.sh + +optstring="h4" +usage() { + echo "Usage: $0 [OPTION]" + echo -e "\t-4: IPv4 only: disable IPv6 tests (default: test both IPv4 and IPv6)" +} + +while getopts "$optstring" option;do + case "$option" in + "h") + usage "$0" + exit 0 + ;; + "4") + ipv6=false + ;; + "?") + usage "$0" + exit 1 + ;; +esac +done + +setup_prp_interfaces() +{ + echo "INFO: Preparing interfaces for PRP" +# Two PRP nodes, connected by two links (treated as LAN A and LAN B). +# +# vethA ----- vethA +# prp1 prp2 +# vethB ----- vethB +# +# node1 node2 + + # Interfaces + # shellcheck disable=SC2154 # variables assigned by setup_ns + ip link add vethA netns "$node1" type veth peer name vethA netns "$node2" + ip link add vethB netns "$node1" type veth peer name vethB netns "$node2" + + # MAC addresses will be copied from LAN A interface + ip -net "$node1" link set address 00:11:22:00:00:01 dev vethA + ip -net "$node2" link set address 00:11:22:00:00:02 dev vethA + + # PRP + ip -net "$node1" link add name prp1 type hsr \ + slave1 vethA slave2 vethB supervision 45 proto 1 + ip -net "$node2" link add name prp2 type hsr \ + slave1 vethA slave2 vethB supervision 45 proto 1 + + # IP addresses + ip -net "$node1" addr add 100.64.0.1/24 dev prp1 + ip -net "$node1" addr add dead:beef:0::1/64 dev prp1 nodad + ip -net "$node2" addr add 100.64.0.2/24 dev prp2 + ip -net "$node2" addr add dead:beef:0::2/64 dev prp2 nodad + + # All links up + ip -net "$node1" link set vethA up + ip -net "$node1" link set vethB up + ip -net "$node1" link set prp1 up + + ip -net "$node2" link set vethA up + ip -net "$node2" link set vethB up + ip -net "$node2" link set prp2 up +} + +setup_vlan_interfaces() +{ + # Interfaces + ip -net "$node1" link add link prp1 name prp1.2 type vlan id 2 + ip -net "$node2" link add link prp2 name prp2.2 type vlan id 2 + + # IP addresses + ip -net "$node1" addr add 100.64.2.1/24 dev prp1.2 + ip -net "$node1" addr add dead:beef:2::1/64 dev prp1.2 nodad + + ip -net "$node2" addr add 100.64.2.2/24 dev prp2.2 + ip -net "$node2" addr add dead:beef:2::2/64 dev prp2.2 nodad + + # All links up + ip -net "$node1" link set prp1.2 up + ip -net "$node2" link set prp2.2 up +} + +do_ping_tests() +{ + local netid="$1" + + echo "INFO: Initial validation ping" + + do_ping "$node1" "100.64.$netid.2" + do_ping "$node2" "100.64.$netid.1" + stop_if_error "Initial validation failed on IPv4" + + do_ping "$node1" "dead:beef:$netid::2" + do_ping "$node2" "dead:beef:$netid::1" + stop_if_error "Initial validation failed on IPv6" + + echo "INFO: Longer ping test." + + do_ping_long "$node1" "100.64.$netid.2" + do_ping_long "$node2" "100.64.$netid.1" + stop_if_error "Longer ping test failed on IPv4." + + do_ping_long "$node1" "dead:beef:$netid::2" + do_ping_long "$node2" "dead:beef:$netid::1" + stop_if_error "Longer ping test failed on IPv6." +} + +run_ping_tests() +{ + echo "INFO: Running ping tests" + do_ping_tests 0 +} + +run_vlan_ping_tests() +{ + vlan_challenged_prp1=$(ip net exec "$node1" ethtool -k prp1 | \ + grep "vlan-challenged" | awk '{print $2}') + vlan_challenged_prp2=$(ip net exec "$node2" ethtool -k prp2 | \ + grep "vlan-challenged" | awk '{print $2}') + + if [[ "$vlan_challenged_prp1" = "off" || \ + "$vlan_challenged_prp2" = "off" ]]; then + echo "INFO: Running VLAN ping tests" + setup_vlan_interfaces + do_ping_tests 2 + else + echo "INFO: Not Running VLAN tests as the device does not support VLAN" + fi +} + +check_prerequisites +trap cleanup_all_ns EXIT + +setup_ns node1 node2 +setup_prp_interfaces + +run_ping_tests +run_vlan_ping_tests + +exit $ret diff --git a/tools/testing/selftests/net/hsr/settings b/tools/testing/selftests/net/hsr/settings index 0fbc037f2aa8..a953c96aa16e 100644 --- a/tools/testing/selftests/net/hsr/settings +++ b/tools/testing/selftests/net/hsr/settings @@ -1 +1 @@ -timeout=50 +timeout=180 diff --git a/tools/testing/selftests/net/icmp_rfc4884.c b/tools/testing/selftests/net/icmp_rfc4884.c new file mode 100644 index 000000000000..cd826b913557 --- /dev/null +++ b/tools/testing/selftests/net/icmp_rfc4884.c @@ -0,0 +1,679 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include <arpa/inet.h> +#include <error.h> +#include <linux/errqueue.h> +#include <linux/icmp.h> +#include <linux/icmpv6.h> +#include <linux/in6.h> +#include <linux/ip.h> +#include <linux/ipv6.h> +#include <netinet/in.h> +#include <netinet/udp.h> +#include <poll.h> +#include <sched.h> +#include <stdbool.h> +#include <stdint.h> +#include <stdio.h> +#include <stdlib.h> +#include <sys/ioctl.h> +#include <sys/socket.h> + +#include "../kselftest_harness.h" + +static const unsigned short src_port = 44444; +static const unsigned short dst_port = 55555; +static const int min_orig_dgram_len = 128; +static const int min_payload_len_v4 = + min_orig_dgram_len - sizeof(struct iphdr) - sizeof(struct udphdr); +static const int min_payload_len_v6 = + min_orig_dgram_len - sizeof(struct ipv6hdr) - sizeof(struct udphdr); +static const uint8_t orig_payload_byte = 0xAA; + +struct sockaddr_inet { + union { + struct sockaddr_in6 v6; + struct sockaddr_in v4; + struct sockaddr sa; + }; + socklen_t len; +}; + +struct ip_case_info { + int domain; + int level; + int opt1; + int opt2; + int proto; + int (*build_func)(uint8_t *buf, ssize_t buflen, bool with_ext, + int payload_len, bool bad_csum, bool bad_len, + bool smaller_len); + int min_payload; +}; + +static int bringup_loopback(void) +{ + struct ifreq ifr = { + .ifr_name = "lo" + }; + int fd; + + fd = socket(AF_INET, SOCK_DGRAM, 0); + if (fd < 0) + return -1; + + if (ioctl(fd, SIOCGIFFLAGS, &ifr) < 0) + goto err; + + ifr.ifr_flags = ifr.ifr_flags | IFF_UP; + + if (ioctl(fd, SIOCSIFFLAGS, &ifr) < 0) + goto err; + + close(fd); + return 0; + +err: + close(fd); + return -1; +} + +static uint16_t csum(const void *buf, size_t len) +{ + const uint8_t *data = buf; + uint32_t sum = 0; + + while (len > 1) { + sum += (data[0] << 8) | data[1]; + data += 2; + len -= 2; + } + + if (len == 1) + sum += data[0] << 8; + + while (sum >> 16) + sum = (sum & 0xFFFF) + (sum >> 16); + + return ~sum & 0xFFFF; +} + +static int poll_err(int fd) +{ + struct pollfd pfd; + + memset(&pfd, 0, sizeof(pfd)); + pfd.fd = fd; + + if (poll(&pfd, 1, 5000) != 1 || pfd.revents != POLLERR) + return -1; + + return 0; +} + +static void set_addr(struct sockaddr_inet *addr, int domain, + unsigned short port) +{ + memset(addr, 0, sizeof(*addr)); + + switch (domain) { + case AF_INET: + addr->v4.sin_family = AF_INET; + addr->v4.sin_port = htons(port); + addr->v4.sin_addr.s_addr = htonl(INADDR_LOOPBACK); + addr->len = sizeof(addr->v4); + break; + case AF_INET6: + addr->v6.sin6_family = AF_INET6; + addr->v6.sin6_port = htons(port); + addr->v6.sin6_addr = in6addr_loopback; + addr->len = sizeof(addr->v6); + break; + } +} + +static int bind_and_setsockopt(int fd, const struct ip_case_info *info) +{ + struct sockaddr_inet addr; + int opt = 1; + + set_addr(&addr, info->domain, src_port); + + if (setsockopt(fd, info->level, info->opt1, &opt, sizeof(opt)) < 0) + return -1; + + if (setsockopt(fd, info->level, info->opt2, &opt, sizeof(opt)) < 0) + return -1; + + return bind(fd, &addr.sa, addr.len); +} + +static int build_rfc4884_ext(uint8_t *buf, size_t buflen, bool bad_csum, + bool bad_len, bool smaller_len) +{ + struct icmp_extobj_hdr *objh; + struct icmp_ext_hdr *exthdr; + size_t obj_len, ext_len; + uint16_t sum; + + /* Use an object payload of 4 bytes */ + obj_len = sizeof(*objh) + sizeof(uint32_t); + ext_len = sizeof(*exthdr) + obj_len; + + if (ext_len > buflen) + return -EINVAL; + + exthdr = (struct icmp_ext_hdr *)buf; + objh = (struct icmp_extobj_hdr *)(buf + sizeof(*exthdr)); + + exthdr->version = 2; + /* When encoding a bad object length, either encode a length too small + * to fit the object header or too big to fit in the packet. + */ + if (bad_len) + obj_len = smaller_len ? sizeof(*objh) - 1 : obj_len * 2; + objh->length = htons(obj_len); + + sum = csum(buf, ext_len); + exthdr->checksum = htons(bad_csum ? sum - 1 : sum); + + return ext_len; +} + +static int build_orig_dgram_v4(uint8_t *buf, ssize_t buflen, int payload_len) +{ + struct udphdr *udph; + struct iphdr *iph; + size_t len = 0; + + len = sizeof(*iph) + sizeof(*udph) + payload_len; + if (len > buflen) + return -EINVAL; + + iph = (struct iphdr *)buf; + udph = (struct udphdr *)(buf + sizeof(*iph)); + + iph->version = 4; + iph->ihl = 5; + iph->protocol = IPPROTO_UDP; + iph->saddr = htonl(INADDR_LOOPBACK); + iph->daddr = htonl(INADDR_LOOPBACK); + iph->tot_len = htons(len); + iph->check = htons(csum(iph, sizeof(*iph))); + + udph->source = htons(src_port); + udph->dest = htons(dst_port); + udph->len = htons(sizeof(*udph) + payload_len); + + memset(buf + sizeof(*iph) + sizeof(*udph), orig_payload_byte, + payload_len); + + return len; +} + +static int build_orig_dgram_v6(uint8_t *buf, ssize_t buflen, int payload_len) +{ + struct udphdr *udph; + struct ipv6hdr *iph; + size_t len = 0; + + len = sizeof(*iph) + sizeof(*udph) + payload_len; + if (len > buflen) + return -EINVAL; + + iph = (struct ipv6hdr *)buf; + udph = (struct udphdr *)(buf + sizeof(*iph)); + + iph->version = 6; + iph->payload_len = htons(sizeof(*udph) + payload_len); + iph->nexthdr = IPPROTO_UDP; + iph->saddr = in6addr_loopback; + iph->daddr = in6addr_loopback; + + udph->source = htons(src_port); + udph->dest = htons(dst_port); + udph->len = htons(sizeof(*udph) + payload_len); + + memset(buf + sizeof(*iph) + sizeof(*udph), orig_payload_byte, + payload_len); + + return len; +} + +static int build_icmpv4_pkt(uint8_t *buf, ssize_t buflen, bool with_ext, + int payload_len, bool bad_csum, bool bad_len, + bool smaller_len) +{ + struct icmphdr *icmph; + int len, ret; + + len = sizeof(*icmph); + memset(buf, 0, buflen); + + icmph = (struct icmphdr *)buf; + icmph->type = ICMP_DEST_UNREACH; + icmph->code = ICMP_PORT_UNREACH; + icmph->checksum = 0; + + ret = build_orig_dgram_v4(buf + len, buflen - len, payload_len); + if (ret < 0) + return ret; + + len += ret; + + icmph->un.reserved[1] = (len - sizeof(*icmph)) / sizeof(uint32_t); + + if (with_ext) { + ret = build_rfc4884_ext(buf + len, buflen - len, + bad_csum, bad_len, smaller_len); + if (ret < 0) + return ret; + + len += ret; + } + + icmph->checksum = htons(csum(icmph, len)); + return len; +} + +static int build_icmpv6_pkt(uint8_t *buf, ssize_t buflen, bool with_ext, + int payload_len, bool bad_csum, bool bad_len, + bool smaller_len) +{ + struct icmp6hdr *icmph; + int len, ret; + + len = sizeof(*icmph); + memset(buf, 0, buflen); + + icmph = (struct icmp6hdr *)buf; + icmph->icmp6_type = ICMPV6_DEST_UNREACH; + icmph->icmp6_code = ICMPV6_PORT_UNREACH; + icmph->icmp6_cksum = 0; + + ret = build_orig_dgram_v6(buf + len, buflen - len, payload_len); + if (ret < 0) + return ret; + + len += ret; + + icmph->icmp6_datagram_len = (len - sizeof(*icmph)) / sizeof(uint64_t); + + if (with_ext) { + ret = build_rfc4884_ext(buf + len, buflen - len, + bad_csum, bad_len, smaller_len); + if (ret < 0) + return ret; + + len += ret; + } + + icmph->icmp6_cksum = htons(csum(icmph, len)); + return len; +} + +FIXTURE(rfc4884) {}; + +FIXTURE_SETUP(rfc4884) +{ + int ret; + + ret = unshare(CLONE_NEWNET); + ASSERT_EQ(ret, 0) { + TH_LOG("unshare(CLONE_NEWNET) failed: %s", strerror(errno)); + } + + ret = bringup_loopback(); + ASSERT_EQ(ret, 0) TH_LOG("Failed to bring up loopback interface"); +} + +FIXTURE_TEARDOWN(rfc4884) +{ +} + +const struct ip_case_info ipv4_info = { + .domain = AF_INET, + .level = SOL_IP, + .opt1 = IP_RECVERR, + .opt2 = IP_RECVERR_RFC4884, + .proto = IPPROTO_ICMP, + .build_func = build_icmpv4_pkt, + .min_payload = min_payload_len_v4, +}; + +const struct ip_case_info ipv6_info = { + .domain = AF_INET6, + .level = SOL_IPV6, + .opt1 = IPV6_RECVERR, + .opt2 = IPV6_RECVERR_RFC4884, + .proto = IPPROTO_ICMPV6, + .build_func = build_icmpv6_pkt, + .min_payload = min_payload_len_v6, +}; + +FIXTURE_VARIANT(rfc4884) { + /* IPv4/v6 related information */ + struct ip_case_info info; + /* Whether to append an ICMP extension or not */ + bool with_ext; + /* UDP payload length */ + int payload_len; + /* Whether to generate a bad checksum in the ICMP extension structure */ + bool bad_csum; + /* Whether to generate a bad length in the ICMP object header */ + bool bad_len; + /* Whether it is too small to fit the object header or too big to fit + * in the packet + */ + bool smaller_len; +}; + +/* Tests that a valid ICMPv4 error message with extension and the original + * datagram is smaller than 128 bytes, generates an error with zero offset, + * and does not raise the SO_EE_RFC4884_FLAG_INVALID flag. + */ +FIXTURE_VARIANT_ADD(rfc4884, ipv4_ext_small_payload) { + .info = ipv4_info, + .with_ext = true, + .payload_len = 64, + .bad_csum = false, + .bad_len = false, +}; + +/* Tests that a valid ICMPv4 error message with extension and 128 bytes original + * datagram, generates an error with the expected offset, and does not raise the + * SO_EE_RFC4884_FLAG_INVALID flag. + */ +FIXTURE_VARIANT_ADD(rfc4884, ipv4_ext) { + .info = ipv4_info, + .with_ext = true, + .payload_len = min_payload_len_v4, + .bad_csum = false, + .bad_len = false, +}; + +/* Tests that a valid ICMPv4 error message with extension and the original + * datagram is larger than 128 bytes, generates an error with the expected + * offset, and does not raise the SO_EE_RFC4884_FLAG_INVALID flag. + */ +FIXTURE_VARIANT_ADD(rfc4884, ipv4_ext_large_payload) { + .info = ipv4_info, + .with_ext = true, + .payload_len = 256, + .bad_csum = false, + .bad_len = false, +}; + +/* Tests that a valid ICMPv4 error message without extension and the original + * datagram is smaller than 128 bytes, generates an error with zero offset, + * and does not raise the SO_EE_RFC4884_FLAG_INVALID flag. + */ +FIXTURE_VARIANT_ADD(rfc4884, ipv4_no_ext_small_payload) { + .info = ipv4_info, + .with_ext = false, + .payload_len = 64, + .bad_csum = false, + .bad_len = false, +}; + +/* Tests that a valid ICMPv4 error message without extension and 128 bytes + * original datagram, generates an error with zero offset, and does not raise + * the SO_EE_RFC4884_FLAG_INVALID flag. + */ +FIXTURE_VARIANT_ADD(rfc4884, ipv4_no_ext_min_payload) { + .info = ipv4_info, + .with_ext = false, + .payload_len = min_payload_len_v4, + .bad_csum = false, + .bad_len = false, +}; + +/* Tests that a valid ICMPv4 error message without extension and the original + * datagram is larger than 128 bytes, generates an error with zero offset, + * and does not raise the SO_EE_RFC4884_FLAG_INVALID flag. + */ +FIXTURE_VARIANT_ADD(rfc4884, ipv4_no_ext_large_payload) { + .info = ipv4_info, + .with_ext = false, + .payload_len = 256, + .bad_csum = false, + .bad_len = false, +}; + +/* Tests that an ICMPv4 error message with extension and an invalid checksum, + * generates an error with the expected offset, and raises the + * SO_EE_RFC4884_FLAG_INVALID flag. + */ +FIXTURE_VARIANT_ADD(rfc4884, ipv4_invalid_ext_checksum) { + .info = ipv4_info, + .with_ext = true, + .payload_len = min_payload_len_v4, + .bad_csum = true, + .bad_len = false, +}; + +/* Tests that an ICMPv4 error message with extension and an object length + * smaller than the object header, generates an error with the expected offset, + * and raises the SO_EE_RFC4884_FLAG_INVALID flag. + */ +FIXTURE_VARIANT_ADD(rfc4884, ipv4_invalid_ext_length_small) { + .info = ipv4_info, + .with_ext = true, + .payload_len = min_payload_len_v4, + .bad_csum = false, + .bad_len = true, + .smaller_len = true, +}; + +/* Tests that an ICMPv4 error message with extension and an object length that + * is too big to fit in the packet, generates an error with the expected offset, + * and raises the SO_EE_RFC4884_FLAG_INVALID flag. + */ +FIXTURE_VARIANT_ADD(rfc4884, ipv4_invalid_ext_length_large) { + .info = ipv4_info, + .with_ext = true, + .payload_len = min_payload_len_v4, + .bad_csum = false, + .bad_len = true, + .smaller_len = false, +}; + +/* Tests that a valid ICMPv6 error message with extension and the original + * datagram is smaller than 128 bytes, generates an error with zero offset, + * and does not raise the SO_EE_RFC4884_FLAG_INVALID flag. + */ +FIXTURE_VARIANT_ADD(rfc4884, ipv6_ext_small_payload) { + .info = ipv6_info, + .with_ext = true, + .payload_len = 64, + .bad_csum = false, + .bad_len = false, +}; + +/* Tests that a valid ICMPv6 error message with extension and 128 bytes original + * datagram, generates an error with the expected offset, and does not raise the + * SO_EE_RFC4884_FLAG_INVALID flag. + */ +FIXTURE_VARIANT_ADD(rfc4884, ipv6_ext) { + .info = ipv6_info, + .with_ext = true, + .payload_len = min_payload_len_v6, + .bad_csum = false, + .bad_len = false, +}; + +/* Tests that a valid ICMPv6 error message with extension and the original + * datagram is larger than 128 bytes, generates an error with the expected + * offset, and does not raise the SO_EE_RFC4884_FLAG_INVALID flag. + */ +FIXTURE_VARIANT_ADD(rfc4884, ipv6_ext_large_payload) { + .info = ipv6_info, + .with_ext = true, + .payload_len = 256, + .bad_csum = false, + .bad_len = false, +}; +/* Tests that a valid ICMPv6 error message without extension and the original + * datagram is smaller than 128 bytes, generates an error with zero offset, + * and does not raise the SO_EE_RFC4884_FLAG_INVALID flag. + */ +FIXTURE_VARIANT_ADD(rfc4884, ipv6_no_ext_small_payload) { + .info = ipv6_info, + .with_ext = false, + .payload_len = 64, + .bad_csum = false, + .bad_len = false, +}; + +/* Tests that a valid ICMPv6 error message without extension and 128 bytes + * original datagram, generates an error with zero offset, and does not + * raise the SO_EE_RFC4884_FLAG_INVALID flag. + */ +FIXTURE_VARIANT_ADD(rfc4884, ipv6_no_ext_min_payload) { + .info = ipv6_info, + .with_ext = false, + .payload_len = min_payload_len_v6, + .bad_csum = false, + .bad_len = false, +}; + +/* Tests that a valid ICMPv6 error message without extension and the original + * datagram is larger than 128 bytes, generates an error with zero offset, + * and does not raise the SO_EE_RFC4884_FLAG_INVALID flag. + */ +FIXTURE_VARIANT_ADD(rfc4884, ipv6_no_ext_large_payload) { + .info = ipv6_info, + .with_ext = false, + .payload_len = 256, + .bad_csum = false, + .bad_len = false, +}; + +/* Tests that an ICMPv6 error message with extension and an invalid checksum, + * generates an error with the expected offset, and raises the + * SO_EE_RFC4884_FLAG_INVALID flag. + */ +FIXTURE_VARIANT_ADD(rfc4884, ipv6_invalid_ext_checksum) { + .info = ipv6_info, + .with_ext = true, + .payload_len = min_payload_len_v6, + .bad_csum = true, + .bad_len = false, +}; + +/* Tests that an ICMPv6 error message with extension and an object length + * smaller than the object header, generates an error with the expected offset, + * and raises the SO_EE_RFC4884_FLAG_INVALID flag. + */ +FIXTURE_VARIANT_ADD(rfc4884, ipv6_invalid_ext_length_small) { + .info = ipv6_info, + .with_ext = true, + .payload_len = min_payload_len_v6, + .bad_csum = false, + .bad_len = true, + .smaller_len = true, +}; + +/* Tests that an ICMPv6 error message with extension and an object length that + * is too big to fit in the packet, generates an error with the expected offset, + * and raises the SO_EE_RFC4884_FLAG_INVALID flag. + */ +FIXTURE_VARIANT_ADD(rfc4884, ipv6_invalid_ext_length_large) { + .info = ipv6_info, + .with_ext = true, + .payload_len = min_payload_len_v6, + .bad_csum = false, + .bad_len = true, + .smaller_len = false, +}; + +static void +check_rfc4884_offset(struct __test_metadata *_metadata, int sock, + const FIXTURE_VARIANT(rfc4884) *v) +{ + char rxbuf[1024]; + char ctrl[1024]; + struct iovec iov = { + .iov_base = rxbuf, + .iov_len = sizeof(rxbuf) + }; + struct msghdr msg = { + .msg_iov = &iov, + .msg_iovlen = 1, + .msg_control = ctrl, + .msg_controllen = sizeof(ctrl), + }; + struct cmsghdr *cmsg; + int recv; + + ASSERT_EQ(poll_err(sock), 0); + + recv = recvmsg(sock, &msg, MSG_ERRQUEUE); + ASSERT_GE(recv, 0) TH_LOG("recvmsg(MSG_ERRQUEUE) failed"); + + for (cmsg = CMSG_FIRSTHDR(&msg); cmsg; cmsg = CMSG_NXTHDR(&msg, cmsg)) { + bool is_invalid, expected_invalid; + struct sock_extended_err *ee; + int expected_off; + uint16_t off; + + if (cmsg->cmsg_level != v->info.level || + cmsg->cmsg_type != v->info.opt1) { + TH_LOG("Unrelated cmsgs were encountered in recvmsg()"); + continue; + } + + ee = (struct sock_extended_err *)CMSG_DATA(cmsg); + off = ee->ee_rfc4884.len; + is_invalid = ee->ee_rfc4884.flags & SO_EE_RFC4884_FLAG_INVALID; + + expected_invalid = v->bad_csum || v->bad_len; + ASSERT_EQ(is_invalid, expected_invalid) { + TH_LOG("Expected invalidity flag to be %d, but got %d", + expected_invalid, is_invalid); + } + + expected_off = + (v->with_ext && v->payload_len >= v->info.min_payload) ? + v->payload_len : 0; + ASSERT_EQ(off, expected_off) { + TH_LOG("Expected RFC4884 offset %u, got %u", + expected_off, off); + } + break; + } +} + +TEST_F(rfc4884, rfc4884) +{ + const typeof(variant) v = variant; + struct sockaddr_inet addr; + uint8_t pkt[1024]; + int dgram, raw; + int len, sent; + int err; + + dgram = socket(v->info.domain, SOCK_DGRAM, 0); + ASSERT_GE(dgram, 0) TH_LOG("Opening datagram socket failed"); + + err = bind_and_setsockopt(dgram, &v->info); + ASSERT_EQ(err, 0) TH_LOG("Bind failed"); + + raw = socket(v->info.domain, SOCK_RAW, v->info.proto); + ASSERT_GE(raw, 0) TH_LOG("Opening raw socket failed"); + + len = v->info.build_func(pkt, sizeof(pkt), v->with_ext, v->payload_len, + v->bad_csum, v->bad_len, v->smaller_len); + ASSERT_GT(len, 0) TH_LOG("Building packet failed"); + + set_addr(&addr, v->info.domain, 0); + sent = sendto(raw, pkt, len, 0, &addr.sa, addr.len); + ASSERT_EQ(len, sent) TH_LOG("Sending packet failed"); + + check_rfc4884_offset(_metadata, dgram, v); + + close(dgram); + close(raw); +} + +TEST_HARNESS_MAIN diff --git a/tools/testing/selftests/net/ioam6.sh b/tools/testing/selftests/net/ioam6.sh index 845c26dd01a9..b2b99889942f 100755 --- a/tools/testing/selftests/net/ioam6.sh +++ b/tools/testing/selftests/net/ioam6.sh @@ -273,8 +273,8 @@ setup() ip -netns $ioam_node_beta link set ioam-veth-betaR name veth1 &>/dev/null ip -netns $ioam_node_gamma link set ioam-veth-gamma name veth0 &>/dev/null - ip -netns $ioam_node_alpha addr add 2001:db8:1::50/64 dev veth0 &>/dev/null ip -netns $ioam_node_alpha addr add 2001:db8:1::2/64 dev veth0 &>/dev/null + ip -netns $ioam_node_alpha addr add 2001:db8:1::50/64 dev veth0 &>/dev/null ip -netns $ioam_node_alpha link set veth0 up &>/dev/null ip -netns $ioam_node_alpha link set lo up &>/dev/null ip -netns $ioam_node_alpha route add 2001:db8:2::/64 \ diff --git a/tools/testing/selftests/net/ipsec.c b/tools/testing/selftests/net/ipsec.c index 0ccf484b1d9d..f4afef51b930 100644 --- a/tools/testing/selftests/net/ipsec.c +++ b/tools/testing/selftests/net/ipsec.c @@ -43,6 +43,10 @@ #define BUILD_BUG_ON(condition) ((void)sizeof(char[1 - 2*!!(condition)])) +#ifndef offsetof +#define offsetof(TYPE, MEMBER) __builtin_offsetof(TYPE, MEMBER) +#endif + #define IPV4_STR_SZ 16 /* xxx.xxx.xxx.xxx is longest + \0 */ #define MAX_PAYLOAD 2048 #define XFRM_ALGO_KEY_BUF_SIZE 512 @@ -827,13 +831,16 @@ static int xfrm_fill_key(char *name, char *buf, static int xfrm_state_pack_algo(struct nlmsghdr *nh, size_t req_sz, struct xfrm_desc *desc) { - struct { + union { union { struct xfrm_algo alg; struct xfrm_algo_aead aead; struct xfrm_algo_auth auth; } u; - char buf[XFRM_ALGO_KEY_BUF_SIZE]; + struct { + unsigned char __offset_to_FAM[offsetof(struct xfrm_algo_auth, alg_key)]; + char buf[XFRM_ALGO_KEY_BUF_SIZE]; + }; } alg = {}; size_t alen, elen, clen, aelen; unsigned short type; diff --git a/tools/testing/selftests/net/lib/csum.c b/tools/testing/selftests/net/lib/csum.c index 27437590eeb5..e28884ce3ab3 100644 --- a/tools/testing/selftests/net/lib/csum.c +++ b/tools/testing/selftests/net/lib/csum.c @@ -707,7 +707,7 @@ static uint32_t recv_get_packet_csum_status(struct msghdr *msg) cm->cmsg_level, cm->cmsg_type); if (cm->cmsg_len != CMSG_LEN(sizeof(struct tpacket_auxdata))) - error(1, 0, "cmsg: len=%lu expected=%lu", + error(1, 0, "cmsg: len=%zu expected=%zu", cm->cmsg_len, CMSG_LEN(sizeof(struct tpacket_auxdata))); aux = (void *)CMSG_DATA(cm); diff --git a/tools/testing/selftests/net/lib/py/ksft.py b/tools/testing/selftests/net/lib/py/ksft.py index 531e7fa1b3ea..6cdfb8afccb5 100644 --- a/tools/testing/selftests/net/lib/py/ksft.py +++ b/tools/testing/selftests/net/lib/py/ksft.py @@ -8,7 +8,7 @@ import time import traceback from collections import namedtuple from .consts import KSFT_MAIN_NAME -from .utils import global_defer_queue +from . import utils KSFT_RESULT = None KSFT_RESULT_ALL = True @@ -32,8 +32,23 @@ class KsftTerminate(KeyboardInterrupt): def ksft_pr(*objs, **kwargs): + """ + Print logs to stdout. + + Behaves like print() but log lines will be prefixed + with # to prevent breaking the TAP output formatting. + + Extra arguments (on top of what print() supports): + line_pfx - add extra string before each line + """ + sep = kwargs.pop("sep", " ") + pfx = kwargs.pop("line_pfx", "") + pfx = "#" + (" " + pfx if pfx else "") kwargs["flush"] = True - print("#", *objs, **kwargs) + + text = sep.join(str(obj) for obj in objs) + prefixed = f"\n{pfx} ".join(text.split('\n')) + print(pfx, prefixed, **kwargs) def _fail(*args): @@ -153,21 +168,24 @@ def ktap_result(ok, cnt=1, case_name="", comment=""): print(res, flush=True) +def _ksft_defer_arm(state): + """ Allow or disallow the use of defer() """ + utils.GLOBAL_DEFER_ARMED = state + + def ksft_flush_defer(): global KSFT_RESULT i = 0 - qlen_start = len(global_defer_queue) - while global_defer_queue: + qlen_start = len(utils.GLOBAL_DEFER_QUEUE) + while utils.GLOBAL_DEFER_QUEUE: i += 1 - entry = global_defer_queue.pop() + entry = utils.GLOBAL_DEFER_QUEUE.pop() try: entry.exec_only() except Exception: ksft_pr(f"Exception while handling defer / cleanup (callback {i} of {qlen_start})!") - tb = traceback.format_exc() - for line in tb.strip().split('\n'): - ksft_pr("Defer Exception|", line) + ksft_pr(traceback.format_exc(), line_pfx="Defer Exception|") KSFT_RESULT = False @@ -315,6 +333,7 @@ def ksft_run(cases=None, globs=None, case_pfx=None, args=()): comment = "" cnt_key = "" + _ksft_defer_arm(True) try: func(*args) except KsftSkipEx as e: @@ -325,20 +344,17 @@ def ksft_run(cases=None, globs=None, case_pfx=None, args=()): cnt_key = 'xfail' except BaseException as e: stop |= isinstance(e, KeyboardInterrupt) - tb = traceback.format_exc() - for line in tb.strip().split('\n'): - ksft_pr("Exception|", line) + ksft_pr(traceback.format_exc(), line_pfx="Exception|") if stop: ksft_pr(f"Stopping tests due to {type(e).__name__}.") KSFT_RESULT = False cnt_key = 'fail' + _ksft_defer_arm(False) try: ksft_flush_defer() except BaseException as e: - tb = traceback.format_exc() - for line in tb.strip().split('\n'): - ksft_pr("Exception|", line) + ksft_pr(traceback.format_exc(), line_pfx="Exception|") if isinstance(e, KeyboardInterrupt): ksft_pr() ksft_pr("WARN: defer() interrupted, cleanup may be incomplete.") diff --git a/tools/testing/selftests/net/lib/py/utils.py b/tools/testing/selftests/net/lib/py/utils.py index 106ee1f2df86..85884f3e827b 100644 --- a/tools/testing/selftests/net/lib/py/utils.py +++ b/tools/testing/selftests/net/lib/py/utils.py @@ -41,7 +41,9 @@ class cmd: self.ret = None self.ksft_term_fd = None + self.host = host self.comm = comm + if host: self.proc = host.cmd(comm) else: @@ -99,6 +101,27 @@ class cmd: raise CmdExitFailure("Command failed: %s\nSTDOUT: %s\nSTDERR: %s" % (self.proc.args, stdout, stderr), self) + def __repr__(self): + def str_fmt(name, s): + name += ': ' + return (name + s.strip().replace('\n', '\n' + ' ' * len(name))) + + ret = "CMD" + if self.host: + ret += "[remote]" + if self.ret is None: + ret += f" (unterminated): {self.comm}\n" + elif self.ret == 0: + ret += f" (success): {self.comm}\n" + else: + ret += f": {self.comm}\n" + ret += f" EXIT: {self.ret}\n" + if self.stdout: + ret += str_fmt(" STDOUT", self.stdout) + "\n" + if self.stderr: + ret += str_fmt(" STDERR", self.stderr) + "\n" + return ret.strip() + class bkg(cmd): """ @@ -137,11 +160,12 @@ class bkg(cmd): def __exit__(self, ex_type, ex_value, ex_tb): # Force termination on exception - terminate = self.terminate or (self._exit_wait and ex_type) + terminate = self.terminate or (self._exit_wait and ex_type is not None) return self.process(terminate=terminate, fail=self.check_fail) -global_defer_queue = [] +GLOBAL_DEFER_QUEUE = [] +GLOBAL_DEFER_ARMED = False class defer: @@ -153,7 +177,9 @@ class defer: self.args = args self.kwargs = kwargs - self._queue = global_defer_queue + if not GLOBAL_DEFER_ARMED: + raise Exception("defer queue not armed, did you use defer() outside of a test case?") + self._queue = GLOBAL_DEFER_QUEUE self._queue.append(self) def __enter__(self): diff --git a/tools/testing/selftests/net/mptcp/Makefile b/tools/testing/selftests/net/mptcp/Makefile index 4dd6278cd3dd..22ba0da2adb8 100644 --- a/tools/testing/selftests/net/mptcp/Makefile +++ b/tools/testing/selftests/net/mptcp/Makefile @@ -11,6 +11,7 @@ TEST_PROGS := \ mptcp_connect_checksum.sh \ mptcp_connect_mmap.sh \ mptcp_connect_sendfile.sh \ + mptcp_connect_splice.sh \ mptcp_join.sh \ mptcp_sockopt.sh \ pm_netlink.sh \ diff --git a/tools/testing/selftests/net/mptcp/mptcp_connect.c b/tools/testing/selftests/net/mptcp/mptcp_connect.c index 10f6f99cfd4e..cbe573c4ab3a 100644 --- a/tools/testing/selftests/net/mptcp/mptcp_connect.c +++ b/tools/testing/selftests/net/mptcp/mptcp_connect.c @@ -52,6 +52,7 @@ enum cfg_mode { CFG_MODE_POLL, CFG_MODE_MMAP, CFG_MODE_SENDFILE, + CFG_MODE_SPLICE, }; enum cfg_peek { @@ -124,7 +125,7 @@ static void die_usage(void) fprintf(stderr, "\t-j -- add additional sleep at connection start and tear down " "-- for MPJ tests\n"); fprintf(stderr, "\t-l -- listens mode, accepts incoming connection\n"); - fprintf(stderr, "\t-m [poll|mmap|sendfile] -- use poll(default)/mmap+write/sendfile\n"); + fprintf(stderr, "\t-m [poll|mmap|sendfile|splice] -- use poll(default)/mmap+write/sendfile/splice\n"); fprintf(stderr, "\t-M mark -- set socket packet mark\n"); fprintf(stderr, "\t-o option -- test sockopt <option>\n"); fprintf(stderr, "\t-p num -- use port num\n"); @@ -258,7 +259,7 @@ static void set_transparent(int fd, int pf) } } -static void set_mptfo(int fd, int pf) +static void set_mptfo(int fd) { int qlen = 25; @@ -335,7 +336,7 @@ static int sock_listen_mptcp(const char * const listenaddr, set_transparent(sock, pf); if (cfg_sockopt_types.mptfo) - set_mptfo(sock, pf); + set_mptfo(sock); if (bind(sock, a->ai_addr, a->ai_addrlen) == 0) break; /* success */ @@ -406,21 +407,18 @@ static int sock_connect_mptcp(const char * const remoteaddr, *peer = a; break; /* success */ } + perror("sendto()"); } else { if (connect(sock, a->ai_addr, a->ai_addrlen) == 0) { *peer = a; break; /* success */ } - } - if (cfg_sockopt_types.mptfo) { - perror("sendto()"); - close(sock); - sock = -1; - } else { perror("connect()"); - close(sock); - sock = -1; } + + /* error */ + close(sock); + sock = -1; } freeaddrinfo(addr); @@ -935,6 +933,71 @@ static int copyfd_io_sendfile(int infd, int peerfd, int outfd, return err; } +static int do_splice(const int infd, const int outfd, const size_t len, + struct wstate *winfo) +{ + ssize_t in_bytes, out_bytes; + int pipefd[2]; + int err; + + err = pipe(pipefd); + if (err) { + perror("pipe"); + return 2; + } + +again: + in_bytes = splice(infd, NULL, pipefd[1], NULL, len - winfo->total_len, + SPLICE_F_MOVE | SPLICE_F_MORE); + if (in_bytes < 0) { + perror("splice in"); + err = 3; + } else if (in_bytes > 0) { + out_bytes = splice(pipefd[0], NULL, outfd, NULL, in_bytes, + SPLICE_F_MOVE | SPLICE_F_MORE); + if (out_bytes < 0) { + perror("splice out"); + err = 4; + } else if (in_bytes != out_bytes) { + fprintf(stderr, "Unexpected transfer: %zu vs %zu\n", + in_bytes, out_bytes); + err = 5; + } else { + goto again; + } + } + + close(pipefd[0]); + close(pipefd[1]); + + return err; +} + +static int copyfd_io_splice(int infd, int peerfd, int outfd, unsigned int size, + bool *in_closed_after_out, struct wstate *winfo) +{ + int err; + + if (listen_mode) { + err = do_splice(peerfd, outfd, size, winfo); + if (err) + return err; + + err = do_splice(infd, peerfd, size, winfo); + } else { + err = do_splice(infd, peerfd, size, winfo); + if (err) + return err; + + shut_wr(peerfd); + + err = do_splice(peerfd, outfd, size, winfo); + *in_closed_after_out = true; + } + + return err; +} + static int copyfd_io(int infd, int peerfd, int outfd, bool close_peerfd, struct wstate *winfo) { bool in_closed_after_out = false; @@ -967,6 +1030,14 @@ static int copyfd_io(int infd, int peerfd, int outfd, bool close_peerfd, struct &in_closed_after_out, winfo); break; + case CFG_MODE_SPLICE: + file_size = get_infd_size(infd); + if (file_size < 0) + return file_size; + ret = copyfd_io_splice(infd, peerfd, outfd, file_size, + &in_closed_after_out, winfo); + break; + default: fprintf(stderr, "Invalid mode %d\n", cfg_mode); @@ -1296,8 +1367,8 @@ void xdisconnect(int fd) int main_loop(void) { + struct addrinfo *peer = NULL; int fd = 0, ret, fd_in = 0; - struct addrinfo *peer; struct wstate winfo; if (cfg_input && cfg_sockopt_types.mptfo) { @@ -1380,12 +1451,15 @@ int parse_mode(const char *mode) return CFG_MODE_MMAP; if (!strcasecmp(mode, "sendfile")) return CFG_MODE_SENDFILE; + if (!strcasecmp(mode, "splice")) + return CFG_MODE_SPLICE; fprintf(stderr, "Unknown test mode: %s\n", mode); fprintf(stderr, "Supported modes are:\n"); fprintf(stderr, "\t\t\"poll\" - interleaved read/write using poll()\n"); fprintf(stderr, "\t\t\"mmap\" - send entire input file (mmap+write), then read response (-l will read input first)\n"); fprintf(stderr, "\t\t\"sendfile\" - send entire input file (sendfile), then read response (-l will read input first)\n"); + fprintf(stderr, "\t\t\"splice\" - send entire input file (splice), then read response (-l will read input first)\n"); die_usage(); diff --git a/tools/testing/selftests/net/mptcp/mptcp_connect_splice.sh b/tools/testing/selftests/net/mptcp/mptcp_connect_splice.sh new file mode 100755 index 000000000000..241254a966c9 --- /dev/null +++ b/tools/testing/selftests/net/mptcp/mptcp_connect_splice.sh @@ -0,0 +1,5 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +MPTCP_LIB_KSFT_TEST="$(basename "${0}" .sh)" \ + "$(dirname "${0}")/mptcp_connect.sh" -m splice "${@}" diff --git a/tools/testing/selftests/net/mptcp/mptcp_diag.c b/tools/testing/selftests/net/mptcp/mptcp_diag.c index 8e0b1b8d84b6..5e222ba977e4 100644 --- a/tools/testing/selftests/net/mptcp/mptcp_diag.c +++ b/tools/testing/selftests/net/mptcp/mptcp_diag.c @@ -1,21 +1,24 @@ // SPDX-License-Identifier: GPL-2.0 /* Copyright (c) 2025, Kylin Software */ -#include <linux/sock_diag.h> -#include <linux/rtnetlink.h> -#include <linux/inet_diag.h> -#include <linux/netlink.h> -#include <linux/compiler.h> +#include <errno.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <unistd.h> + #include <sys/socket.h> -#include <netinet/in.h> -#include <linux/tcp.h> + #include <arpa/inet.h> -#include <unistd.h> -#include <stdlib.h> -#include <string.h> -#include <errno.h> -#include <stdio.h> +#include <netinet/in.h> + +#include <linux/compiler.h> +#include <linux/inet_diag.h> +#include <linux/netlink.h> +#include <linux/rtnetlink.h> +#include <linux/sock_diag.h> +#include <linux/tcp.h> #ifndef IPPROTO_MPTCP #define IPPROTO_MPTCP 262 diff --git a/tools/testing/selftests/net/mptcp/mptcp_join.sh b/tools/testing/selftests/net/mptcp/mptcp_join.sh index e70d3420954f..dc1f200aaa81 100755 --- a/tools/testing/selftests/net/mptcp/mptcp_join.sh +++ b/tools/testing/selftests/net/mptcp/mptcp_join.sh @@ -603,8 +603,7 @@ wait_rm_addr() local old_cnt="${2}" local cnt - local i - for i in $(seq 10); do + for _ in $(seq 10); do cnt=$(rm_addr_count ${ns}) [ "$cnt" = "${old_cnt}" ] || break sleep 0.1 @@ -623,25 +622,22 @@ wait_rm_sf() local old_cnt="${2}" local cnt - local i - for i in $(seq 10); do + for _ in $(seq 10); do cnt=$(rm_sf_count ${ns}) [ "$cnt" = "${old_cnt}" ] || break sleep 0.1 done } +# $1: expected MPJ ACK Rx counter in $ns1 wait_mpj() { - local ns="${1}" - local cnt old_cnt - - old_cnt=$(mptcp_lib_get_counter ${ns} "MPTcpExtMPJoinAckRx") + local exp_cnt="${1}" + local cnt - local i - for i in $(seq 10); do - cnt=$(mptcp_lib_get_counter ${ns} "MPTcpExtMPJoinAckRx") - [ "$cnt" = "${old_cnt}" ] || break + for _ in $(seq 10); do + cnt=$(mptcp_lib_get_counter ${ns1} "MPTcpExtMPJoinAckRx") + [ "${cnt}" = "${exp_cnt}" ] && break sleep 0.1 done } @@ -650,8 +646,7 @@ wait_ll_ready() { local ns="${1}" - local i - for i in $(seq 50); do + for _ in $(seq 50); do ip -n "${ns}" -6 addr show scope link | grep "inet6 fe80" | grep -qw "tentative" || break sleep 0.1 @@ -1407,7 +1402,7 @@ chk_join_tx_nr() count=$(mptcp_lib_get_counter ${ns2} "MPTcpExtMPJoinSynTxCreatSkErr") if [ -z "$count" ]; then - rc=${KSFT_SKIP} + : # ignore skip elif [ "$count" != "$create" ]; then rc=${KSFT_FAIL} print_check "syn tx create socket error" @@ -1416,7 +1411,7 @@ chk_join_tx_nr() count=$(mptcp_lib_get_counter ${ns2} "MPTcpExtMPJoinSynTxBindErr") if [ -z "$count" ]; then - rc=${KSFT_SKIP} + : # ignore skip elif [ "$count" != "$bind" ]; then rc=${KSFT_FAIL} print_check "syn tx bind error" @@ -1425,7 +1420,7 @@ chk_join_tx_nr() count=$(mptcp_lib_get_counter ${ns2} "MPTcpExtMPJoinSynTxConnectErr") if [ -z "$count" ]; then - rc=${KSFT_SKIP} + : # ignore skip elif [ "$count" != "$connect" ]; then rc=${KSFT_FAIL} print_check "syn tx connect error" @@ -1451,7 +1446,7 @@ chk_fallback_nr() count=$(mptcp_lib_get_counter ${!ns} "MPTcpExtInfiniteMapTx") if [ -z "$count" ]; then - rc=${KSFT_SKIP} + : # ignore skip elif [ "$count" != "$infinite_map_tx" ]; then rc=${KSFT_FAIL} print_check "$ns infinite map tx fallback" @@ -1460,7 +1455,7 @@ chk_fallback_nr() count=$(mptcp_lib_get_counter ${!ns} "MPTcpExtDSSCorruptionFallback") if [ -z "$count" ]; then - rc=${KSFT_SKIP} + : # ignore skip elif [ "$count" != "$dss_corruption" ]; then rc=${KSFT_FAIL} print_check "$ns dss corruption fallback" @@ -1469,7 +1464,7 @@ chk_fallback_nr() count=$(mptcp_lib_get_counter ${!ns} "MPTcpExtSimultConnectFallback") if [ -z "$count" ]; then - rc=${KSFT_SKIP} + : # ignore skip elif [ "$count" != "$simult_conn" ]; then rc=${KSFT_FAIL} print_check "$ns simult conn fallback" @@ -1478,7 +1473,7 @@ chk_fallback_nr() count=$(mptcp_lib_get_counter ${!ns} "MPTcpExtMPCapableFallbackACK") if [ -z "$count" ]; then - rc=${KSFT_SKIP} + : # ignore skip elif [ "$count" != "$mpc_passive" ]; then rc=${KSFT_FAIL} print_check "$ns mpc passive fallback" @@ -1487,7 +1482,7 @@ chk_fallback_nr() count=$(mptcp_lib_get_counter ${!ns} "MPTcpExtMPCapableFallbackSYNACK") if [ -z "$count" ]; then - rc=${KSFT_SKIP} + : # ignore skip elif [ "$count" != "$mpc_active" ]; then rc=${KSFT_FAIL} print_check "$ns mpc active fallback" @@ -1496,7 +1491,7 @@ chk_fallback_nr() count=$(mptcp_lib_get_counter ${!ns} "MPTcpExtMPCapableDataFallback") if [ -z "$count" ]; then - rc=${KSFT_SKIP} + : # ignore skip elif [ "$count" != "$mpc_data" ]; then rc=${KSFT_FAIL} print_check "$ns mpc data fallback" @@ -1505,7 +1500,7 @@ chk_fallback_nr() count=$(mptcp_lib_get_counter ${!ns} "MPTcpExtMD5SigFallback") if [ -z "$count" ]; then - rc=${KSFT_SKIP} + : # ignore skip elif [ "$count" != "$md5_sig" ]; then rc=${KSFT_FAIL} print_check "$ns MD5 Sig fallback" @@ -1514,7 +1509,7 @@ chk_fallback_nr() count=$(mptcp_lib_get_counter ${!ns} "MPTcpExtDssFallback") if [ -z "$count" ]; then - rc=${KSFT_SKIP} + : # ignore skip elif [ "$count" != "$dss" ]; then rc=${KSFT_FAIL} print_check "$ns dss fallback" @@ -1590,7 +1585,7 @@ chk_join_nr() count=$(mptcp_lib_get_counter ${ns2} "MPTcpExtMPJoinSynAckHMacFailure") if [ -z "$count" ]; then - rc=${KSFT_SKIP} + : # ignore skip elif [ "$count" != "0" ]; then rc=${KSFT_FAIL} print_check "synack HMAC" @@ -1599,7 +1594,7 @@ chk_join_nr() count=$(mptcp_lib_get_counter ${ns1} "MPTcpExtMPJoinAckRx") if [ -z "$count" ]; then - rc=${KSFT_SKIP} + : # ignore skip elif [ "$count" != "$ack_nr" ]; then rc=${KSFT_FAIL} print_check "ack rx" @@ -1608,7 +1603,7 @@ chk_join_nr() count=$(mptcp_lib_get_counter ${ns1} "MPTcpExtMPJoinAckHMacFailure") if [ -z "$count" ]; then - rc=${KSFT_SKIP} + : # ignore skip elif [ "$count" != "0" ]; then rc=${KSFT_FAIL} print_check "ack HMAC" @@ -1617,7 +1612,7 @@ chk_join_nr() count=$(mptcp_lib_get_counter ${ns1} "MPTcpExtMPJoinRejected") if [ -z "$count" ]; then - rc=${KSFT_SKIP} + : # ignore skip elif [ "$count" != "$syn_rej" ]; then rc=${KSFT_FAIL} print_check "syn rejected" @@ -1650,7 +1645,6 @@ chk_stale_nr() local stale_min=$2 local stale_max=$3 local stale_delta=$4 - local dump_stats local stale_nr local recover_nr @@ -1666,16 +1660,11 @@ chk_stale_nr() fail_test "got $stale_nr stale[s] $recover_nr recover[s], " \ " expected stale in range [$stale_min..$stale_max]," \ " stale-recover delta $stale_delta" - dump_stats=1 + echo $ns stats + ip -n $ns -s link show else print_ok fi - - if [ "${dump_stats}" = 1 ]; then - echo $ns stats - ip netns exec $ns ip -s link show - ip netns exec $ns nstat -as | grep MPTcp - fi } chk_add_nr() @@ -3718,7 +3707,6 @@ userspace_pm_add_addr() tk=$(mptcp_lib_evts_get_info token "$evts") ip netns exec $1 ./pm_nl_ctl ann $2 token $tk id $3 - sleep 1 } # $1: ns ; $2: id @@ -3749,7 +3737,6 @@ userspace_pm_add_sf() ip netns exec $1 ./pm_nl_ctl csf lip $2 lid $3 \ rip $da rport $dp token $tk - sleep 1 } # $1: ns ; $2: addr $3: event type @@ -3999,9 +3986,11 @@ userspace_tests() { timeout_test=120 test_linkfail=128 speed=5 \ run_tests $ns1 $ns2 10.0.1.1 & } 2>/dev/null local tests_pid=$! - wait_mpj $ns1 + wait_event ns1 MPTCP_LIB_EVENT_ESTABLISHED 1 userspace_pm_add_addr $ns1 10.0.2.1 10 + wait_event ns2 MPTCP_LIB_EVENT_ANNOUNCED 1 userspace_pm_add_addr $ns1 10.0.3.1 20 + wait_event ns2 MPTCP_LIB_EVENT_ANNOUNCED 2 chk_join_nr 2 2 2 chk_add_nr 2 2 chk_mptcp_info subflows 2 subflows 2 @@ -4032,8 +4021,9 @@ userspace_tests() { timeout_test=120 test_linkfail=128 speed=5 \ run_tests $ns1 $ns2 10.0.1.1 & } 2>/dev/null local tests_pid=$! - wait_mpj $ns2 + wait_event ns2 MPTCP_LIB_EVENT_ESTABLISHED 1 userspace_pm_add_sf $ns2 10.0.3.2 20 + wait_event ns2 MPTCP_LIB_EVENT_SUB_ESTABLISHED 1 chk_join_nr 1 1 1 chk_mptcp_info subflows 1 subflows 1 chk_subflows_total 2 2 @@ -4060,10 +4050,11 @@ userspace_tests() { timeout_test=120 test_linkfail=128 speed=5 \ run_tests $ns1 $ns2 10.0.1.1 & } 2>/dev/null local tests_pid=$! - wait_mpj $ns2 + wait_event ns2 MPTCP_LIB_EVENT_ESTABLISHED 1 chk_mptcp_info subflows 0 subflows 0 chk_subflows_total 1 1 userspace_pm_add_sf $ns2 10.0.3.2 0 + wait_event ns2 MPTCP_LIB_EVENT_SUB_ESTABLISHED 1 userspace_pm_chk_dump_addr "${ns2}" \ "id 0 flags subflow 10.0.3.2" "id 0 subflow" chk_join_nr 1 1 1 @@ -4081,8 +4072,9 @@ userspace_tests() { timeout_test=120 test_linkfail=128 speed=5 \ run_tests $ns1 $ns2 10.0.1.1 & } 2>/dev/null local tests_pid=$! - wait_mpj $ns2 + wait_event ns2 MPTCP_LIB_EVENT_ESTABLISHED 1 userspace_pm_add_sf $ns2 10.0.3.2 20 + wait_event ns2 MPTCP_LIB_EVENT_SUB_ESTABLISHED 1 chk_join_nr 1 1 1 chk_mptcp_info subflows 1 subflows 1 chk_subflows_total 2 2 @@ -4105,8 +4097,9 @@ userspace_tests() { timeout_test=120 test_linkfail=128 speed=5 \ run_tests $ns1 $ns2 10.0.1.1 & } 2>/dev/null local tests_pid=$! - wait_mpj $ns1 + wait_event ns1 MPTCP_LIB_EVENT_ESTABLISHED 1 userspace_pm_add_addr $ns1 10.0.2.1 10 + wait_event ns2 MPTCP_LIB_EVENT_ANNOUNCED 1 chk_join_nr 1 1 1 chk_add_nr 1 1 chk_mptcp_info subflows 1 subflows 1 @@ -4133,6 +4126,7 @@ userspace_tests() local tests_pid=$! wait_event ns2 MPTCP_LIB_EVENT_ESTABLISHED 1 userspace_pm_add_sf $ns2 10.0.3.2 20 + wait_event ns2 MPTCP_LIB_EVENT_SUB_ESTABLISHED 1 chk_mptcp_info subflows 1 subflows 1 chk_subflows_total 2 2 @@ -4158,7 +4152,7 @@ endpoint_tests() { # subflow_rebuild_header is needed to support the implicit flag # userspace pm type prevents add_addr - if reset "implicit EP" && + if reset_with_events "implicit EP" && continue_if mptcp_lib_kallsyms_has "subflow_rebuild_header$"; then pm_nl_set_limits $ns1 2 2 pm_nl_set_limits $ns2 2 2 @@ -4167,7 +4161,7 @@ endpoint_tests() run_tests $ns1 $ns2 10.0.1.1 & } 2>/dev/null local tests_pid=$! - wait_mpj $ns1 + wait_event ns2 MPTCP_LIB_EVENT_ESTABLISHED 1 pm_nl_check_endpoint "creation" \ $ns2 10.0.2.2 id 1 flags implicit chk_mptcp_info subflows 1 subflows 1 @@ -4181,6 +4175,7 @@ endpoint_tests() pm_nl_check_endpoint "modif is allowed" \ $ns2 10.0.2.2 id 1 flags signal mptcp_lib_kill_group_wait $tests_pid + kill_events_pids fi if reset_with_tcp_filter "delete and re-add" ns2 10.0.3.2 REJECT OUTPUT && @@ -4194,7 +4189,7 @@ endpoint_tests() run_tests $ns1 $ns2 10.0.1.1 & } 2>/dev/null local tests_pid=$! - wait_mpj $ns2 + wait_event ns2 MPTCP_LIB_EVENT_ESTABLISHED 1 pm_nl_check_endpoint "creation" \ $ns2 10.0.2.2 id 2 flags subflow dev ns2eth2 chk_subflow_nr "before delete id 2" 2 @@ -4206,7 +4201,7 @@ endpoint_tests() chk_mptcp_info subflows 0 subflows 0 pm_nl_add_endpoint $ns2 10.0.2.2 id 2 dev ns2eth2 flags subflow - wait_mpj $ns2 + wait_mpj 2 chk_subflow_nr "after re-add id 2" 2 chk_mptcp_info subflows 1 subflows 1 @@ -4218,7 +4213,7 @@ endpoint_tests() ip netns exec "${ns2}" ${iptables} -D OUTPUT -s "10.0.3.2" -p tcp -j REJECT pm_nl_del_endpoint $ns2 3 10.0.3.2 pm_nl_add_endpoint $ns2 10.0.3.2 id 3 flags subflow - wait_mpj $ns2 + wait_mpj 3 chk_subflow_nr "after no reject" 3 chk_mptcp_info subflows 2 subflows 2 @@ -4230,7 +4225,7 @@ endpoint_tests() chk_mptcp_info subflows 2 subflows 2 # only decr for additional sf pm_nl_add_endpoint $ns2 10.0.1.2 id 1 dev ns2eth1 flags subflow - wait_mpj $ns2 + wait_mpj $((3 + i)) chk_subflow_nr "after re-add id 0 ($i)" 3 chk_mptcp_info subflows 3 subflows 3 done @@ -4272,7 +4267,7 @@ endpoint_tests() run_tests $ns1 $ns2 10.0.1.1 & } 2>/dev/null local tests_pid=$! - wait_mpj $ns2 + wait_event ns2 MPTCP_LIB_EVENT_ESTABLISHED 1 pm_nl_check_endpoint "creation" \ $ns1 10.0.2.1 id 1 flags signal chk_subflow_nr "before delete" 2 @@ -4288,7 +4283,7 @@ endpoint_tests() pm_nl_add_endpoint $ns1 10.0.2.1 id 1 flags signal pm_nl_add_endpoint $ns1 10.0.3.1 id 2 flags signal - wait_mpj $ns2 + wait_mpj 3 chk_subflow_nr "after re-add" 3 chk_mptcp_info subflows 2 subflows 2 chk_mptcp_info add_addr_signal 2 add_addr_accepted 2 @@ -4300,7 +4295,7 @@ endpoint_tests() chk_mptcp_info add_addr_signal 2 add_addr_accepted 2 pm_nl_add_endpoint $ns1 10.0.1.1 id 99 flags signal - wait_mpj $ns2 + wait_mpj 4 chk_subflow_nr "after re-add ID 0" 3 chk_mptcp_info subflows 3 subflows 3 chk_mptcp_info add_addr_signal 3 add_addr_accepted 2 @@ -4312,7 +4307,7 @@ endpoint_tests() chk_mptcp_info add_addr_signal 2 add_addr_accepted 2 pm_nl_add_endpoint $ns1 10.0.1.1 id 88 flags signal - wait_mpj $ns2 + wait_mpj 5 chk_subflow_nr "after re-re-add ID 0" 3 chk_mptcp_info subflows 3 subflows 3 chk_mptcp_info add_addr_signal 3 add_addr_accepted 2 @@ -4361,9 +4356,9 @@ endpoint_tests() wait_rm_addr $ns2 0 ip netns exec "${ns2}" ${iptables} -D OUTPUT -s "10.0.3.2" -p tcp -j REJECT pm_nl_add_endpoint $ns2 10.0.3.2 id 3 flags subflow - wait_mpj $ns2 + wait_mpj 1 pm_nl_add_endpoint $ns1 10.0.3.1 id 2 flags signal - wait_mpj $ns2 + wait_mpj 2 mptcp_lib_kill_group_wait $tests_pid join_syn_tx=3 join_connect_err=1 \ diff --git a/tools/testing/selftests/net/netfilter/config b/tools/testing/selftests/net/netfilter/config index 12ce61fa15a8..979cff56e1f5 100644 --- a/tools/testing/selftests/net/netfilter/config +++ b/tools/testing/selftests/net/netfilter/config @@ -29,6 +29,7 @@ CONFIG_IP_NF_RAW=m CONFIG_IP_SCTP=m CONFIG_IPV6=y CONFIG_IPV6_MULTIPLE_TABLES=y +CONFIG_IPV6_TUNNEL=m CONFIG_IP_VS=m CONFIG_IP_VS_PROTO_TCP=y CONFIG_IP_VS_RR=m diff --git a/tools/testing/selftests/net/netfilter/nft_flowtable.sh b/tools/testing/selftests/net/netfilter/nft_flowtable.sh index a68bc882fa4e..7a34ef468975 100755 --- a/tools/testing/selftests/net/netfilter/nft_flowtable.sh +++ b/tools/testing/selftests/net/netfilter/nft_flowtable.sh @@ -592,16 +592,33 @@ ip -net "$nsr1" link set tun0 up ip -net "$nsr1" addr add 192.168.100.1/24 dev tun0 ip netns exec "$nsr1" sysctl net.ipv4.conf.tun0.forwarding=1 > /dev/null +ip -net "$nsr1" link add name tun6 type ip6tnl local fee1:2::1 remote fee1:2::2 +ip -net "$nsr1" link set tun6 up +ip -net "$nsr1" addr add fee1:3::1/64 dev tun6 nodad + ip -net "$nsr2" link add name tun0 type ipip local 192.168.10.2 remote 192.168.10.1 ip -net "$nsr2" link set tun0 up ip -net "$nsr2" addr add 192.168.100.2/24 dev tun0 ip netns exec "$nsr2" sysctl net.ipv4.conf.tun0.forwarding=1 > /dev/null +ip -net "$nsr2" link add name tun6 type ip6tnl local fee1:2::2 remote fee1:2::1 || ret=1 +ip -net "$nsr2" link set tun6 up +ip -net "$nsr2" addr add fee1:3::2/64 dev tun6 nodad + ip -net "$nsr1" route change default via 192.168.100.2 ip -net "$nsr2" route change default via 192.168.100.1 + +# do not use "route change" and delete old default so +# socat fails to connect in case new default can't be added. +ip -6 -net "$nsr1" route delete default +ip -6 -net "$nsr1" route add default via fee1:3::2 +ip -6 -net "$nsr2" route delete default +ip -6 -net "$nsr2" route add default via fee1:3::1 ip -net "$ns2" route add default via 10.0.2.1 +ip -6 -net "$ns2" route add default via dead:2::1 ip netns exec "$nsr1" nft -a insert rule inet filter forward 'meta oif tun0 accept' +ip netns exec "$nsr1" nft -a insert rule inet filter forward 'meta oif tun6 accept' ip netns exec "$nsr1" nft -a insert rule inet filter forward \ 'meta oif "veth0" tcp sport 12345 ct mark set 1 flow add @f1 counter name routed_repl accept' @@ -611,28 +628,53 @@ if ! test_tcp_forwarding_nat "$ns1" "$ns2" 1 "IPIP tunnel"; then ret=1 fi +if test_tcp_forwarding "$ns1" "$ns2" 1 6 "[dead:2::99]" 12345; then + echo "PASS: flow offload for ns1/ns2 IP6IP6 tunnel" +else + echo "FAIL: flow offload for ns1/ns2 with IP6IP6 tunnel" 1>&2 + ip netns exec "$nsr1" nft list ruleset + ret=1 +fi + # Create vlan tagged devices for IPIP traffic. ip -net "$nsr1" link add link veth1 name veth1.10 type vlan id 10 ip -net "$nsr1" link set veth1.10 up ip -net "$nsr1" addr add 192.168.20.1/24 dev veth1.10 +ip -net "$nsr1" addr add fee1:4::1/64 dev veth1.10 nodad ip netns exec "$nsr1" sysctl net.ipv4.conf.veth1/10.forwarding=1 > /dev/null ip netns exec "$nsr1" nft -a insert rule inet filter forward 'meta oif veth1.10 accept' -ip -net "$nsr1" link add name tun1 type ipip local 192.168.20.1 remote 192.168.20.2 -ip -net "$nsr1" link set tun1 up -ip -net "$nsr1" addr add 192.168.200.1/24 dev tun1 + +ip -net "$nsr1" link add name tun0.10 type ipip local 192.168.20.1 remote 192.168.20.2 +ip -net "$nsr1" link set tun0.10 up +ip -net "$nsr1" addr add 192.168.200.1/24 dev tun0.10 ip -net "$nsr1" route change default via 192.168.200.2 -ip netns exec "$nsr1" sysctl net.ipv4.conf.tun1.forwarding=1 > /dev/null -ip netns exec "$nsr1" nft -a insert rule inet filter forward 'meta oif tun1 accept' +ip netns exec "$nsr1" sysctl net.ipv4.conf.tun0/10.forwarding=1 > /dev/null +ip netns exec "$nsr1" nft -a insert rule inet filter forward 'meta oif tun0.10 accept' + +ip -net "$nsr1" link add name tun6.10 type ip6tnl local fee1:4::1 remote fee1:4::2 +ip -net "$nsr1" link set tun6.10 up +ip -net "$nsr1" addr add fee1:5::1/64 dev tun6.10 nodad +ip -6 -net "$nsr1" route delete default +ip -6 -net "$nsr1" route add default via fee1:5::2 +ip netns exec "$nsr1" nft -a insert rule inet filter forward 'meta oif tun6.10 accept' ip -net "$nsr2" link add link veth0 name veth0.10 type vlan id 10 ip -net "$nsr2" link set veth0.10 up ip -net "$nsr2" addr add 192.168.20.2/24 dev veth0.10 +ip -net "$nsr2" addr add fee1:4::2/64 dev veth0.10 nodad ip netns exec "$nsr2" sysctl net.ipv4.conf.veth0/10.forwarding=1 > /dev/null -ip -net "$nsr2" link add name tun1 type ipip local 192.168.20.2 remote 192.168.20.1 -ip -net "$nsr2" link set tun1 up -ip -net "$nsr2" addr add 192.168.200.2/24 dev tun1 + +ip -net "$nsr2" link add name tun0.10 type ipip local 192.168.20.2 remote 192.168.20.1 +ip -net "$nsr2" link set tun0.10 up +ip -net "$nsr2" addr add 192.168.200.2/24 dev tun0.10 ip -net "$nsr2" route change default via 192.168.200.1 -ip netns exec "$nsr2" sysctl net.ipv4.conf.tun1.forwarding=1 > /dev/null +ip netns exec "$nsr2" sysctl net.ipv4.conf.tun0/10.forwarding=1 > /dev/null + +ip -net "$nsr2" link add name tun6.10 type ip6tnl local fee1:4::2 remote fee1:4::1 || ret=1 +ip -net "$nsr2" link set tun6.10 up +ip -net "$nsr2" addr add fee1:5::2/64 dev tun6.10 nodad +ip -6 -net "$nsr2" route delete default +ip -6 -net "$nsr2" route add default via fee1:5::1 if ! test_tcp_forwarding_nat "$ns1" "$ns2" 1 "IPIP tunnel over vlan"; then echo "FAIL: flow offload for ns1/ns2 with IPIP tunnel over vlan" 1>&2 @@ -640,10 +682,19 @@ if ! test_tcp_forwarding_nat "$ns1" "$ns2" 1 "IPIP tunnel over vlan"; then ret=1 fi +if test_tcp_forwarding "$ns1" "$ns2" 1 6 "[dead:2::99]" 12345; then + echo "PASS: flow offload for ns1/ns2 IP6IP6 tunnel over vlan" +else + echo "FAIL: flow offload for ns1/ns2 with IP6IP6 tunnel over vlan" 1>&2 + ip netns exec "$nsr1" nft list ruleset + ret=1 +fi + # Restore the previous configuration ip -net "$nsr1" route change default via 192.168.10.2 ip -net "$nsr2" route change default via 192.168.10.1 ip -net "$ns2" route del default via 10.0.2.1 +ip -6 -net "$ns2" route del default via dead:2::1 } # Another test: diff --git a/tools/testing/selftests/net/netfilter/nft_queue.sh b/tools/testing/selftests/net/netfilter/nft_queue.sh index 6136ceec45e0..139bc1211878 100755 --- a/tools/testing/selftests/net/netfilter/nft_queue.sh +++ b/tools/testing/selftests/net/netfilter/nft_queue.sh @@ -510,7 +510,7 @@ EOF udp_listener_ready() { - ss -S -N "$1" -uln -o "sport = :12345" | grep -q 12345 + ss -S -N "$1" -uln -o "sport = :$2" | grep -q "$2" } output_files_written() @@ -518,7 +518,7 @@ output_files_written() test -s "$1" && test -s "$2" } -test_udp_ct_race() +test_udp_nat_race() { ip netns exec "$nsrouter" nft -f /dev/stdin <<EOF flush ruleset @@ -545,8 +545,8 @@ EOF ip netns exec "$nsrouter" ./nf_queue -q 12 -d 1000 & local nfqpid=$! - busywait "$BUSYWAIT_TIMEOUT" udp_listener_ready "$ns2" - busywait "$BUSYWAIT_TIMEOUT" udp_listener_ready "$ns3" + busywait "$BUSYWAIT_TIMEOUT" udp_listener_ready "$ns2" 12345 + busywait "$BUSYWAIT_TIMEOUT" udp_listener_ready "$ns3" 12345 busywait "$BUSYWAIT_TIMEOUT" nf_queue_wait "$nsrouter" 12 # Send two packets, one should end up in ns1, other in ns2. @@ -557,7 +557,7 @@ EOF busywait 10000 output_files_written "$TMPFILE1" "$TMPFILE2" - kill "$nfqpid" + kill "$nfqpid" "$rpid1" "$rpid2" if ! ip netns exec "$nsrouter" bash -c 'conntrack -L -p udp --dport 12345 2>/dev/null | wc -l | grep -q "^1"'; then echo "FAIL: Expected One udp conntrack entry" @@ -585,6 +585,135 @@ EOF echo "PASS: both udp receivers got one packet each" } +# Make sure UDPGRO aggregated packets don't lose +# their skb->nfct entry when nfqueue passes the +# skb to userspace with software gso segmentation on. +test_udp_gro_ct() +{ + local errprefix="FAIL: test_udp_gro_ct:" + + ip netns exec "$nsrouter" conntrack -F 2>/dev/null + + ip netns exec "$nsrouter" nft -f /dev/stdin <<EOF +flush ruleset +table inet udpq { + # Number of packets/bytes queued to userspace + counter toqueue { } + # Number of packets/bytes reinjected from userspace with 'ct new' intact + counter fromqueue { } + # These two counters should be identical and not 0. + + chain prerouting { + type filter hook prerouting priority -300; policy accept; + + # userspace sends small packets, if < 1000, UDPGRO did + # not kick in, but test needs a 'new' conntrack with udpgro skb. + meta iifname veth0 meta l4proto udp meta length > 1000 accept + + # don't pick up non-gso packets and don't queue them to + # userspace. + notrack + } + + chain postrouting { + type filter hook postrouting priority 0; policy accept; + + # Only queue unconfirmed fraglist gro skbs to userspace. + udp dport 12346 ct status ! confirmed counter name "toqueue" mark set 1 queue num 1 + } + + chain validate { + type filter hook postrouting priority 1; policy accept; + # ... and only count those that were reinjected with the + # skb->nfct intact. + mark 1 counter name "fromqueue" + } +} +EOF + timeout 10 ip netns exec "$ns2" socat UDP-LISTEN:12346,fork,pf=ipv4 OPEN:"$TMPFILE1",trunc & + local rpid=$! + + ip netns exec "$nsrouter" ./nf_queue -G -c -q 1 -t 2 > "$TMPFILE2" & + local nfqpid=$! + + ip netns exec "$nsrouter" ethtool -K "veth0" rx-udp-gro-forwarding on rx-gro-list on generic-receive-offload on + + busywait "$BUSYWAIT_TIMEOUT" udp_listener_ready "$ns2" 12346 + busywait "$BUSYWAIT_TIMEOUT" nf_queue_wait "$nsrouter" 1 + + local bs=512 + local count=$(((32 * 1024 * 1024) / bs)) + dd if=/dev/zero bs="$bs" count="$count" 2>/dev/null | for i in $(seq 1 16); do + timeout 5 ip netns exec "$ns1" \ + socat -u -b 512 STDIN UDP-DATAGRAM:10.0.2.99:12346,reuseport,bind=0.0.0.0:55221 & + done + + busywait 10000 test -s "$TMPFILE1" + + kill "$rpid" + + wait + + local p + local b + local pqueued + local bqueued + + c=$(ip netns exec "$nsrouter" nft list counter inet udpq "toqueue" | grep packets) + read p pqueued b bqueued <<EOF +$c +EOF + local preinject + local breinject + c=$(ip netns exec "$nsrouter" nft list counter inet udpq "fromqueue" | grep packets) + read p preinject b breinject <<EOF +$c +EOF + ip netns exec "$nsrouter" ethtool -K "veth0" rx-udp-gro-forwarding off + ip netns exec "$nsrouter" ethtool -K "veth1" rx-udp-gro-forwarding off + + if [ "$pqueued" -eq 0 ];then + # happens when gro did not build at least on aggregate + echo "SKIP: No packets were queued" + return + fi + + local saw_ct_entry=0 + if ip netns exec "$nsrouter" bash -c 'conntrack -L -p udp --dport 12346 2>/dev/null | wc -l | grep -q "^1"'; then + saw_ct_entry=1 + else + echo "$errprefix Expected udp conntrack entry" + ip netns exec "$nsrouter" conntrack -L + ret=1 + fi + + if [ "$pqueued" -ge "$preinject" ] ;then + echo "$errprefix Expected software segmentation to occur, had $pqueued and $preinject" + ret=1 + return + fi + + # sw segmentation adds extra udp and ip headers. + local breinject_expect=$((preinject * (512 + 20 + 8))) + + if [ "$breinject" -eq "$breinject_expect" ]; then + if [ "$saw_ct_entry" -eq 1 ];then + echo "PASS: fraglist gro skb passed with conntrack entry" + else + echo "$errprefix fraglist gro skb passed without conntrack entry" + ret=1 + fi + else + echo "$errprefix Counter mismatch, conntrack entry dropped by nfqueue? Queued: $pqueued, $bqueued. Post-queue: $preinject, $breinject. Expected $breinject_expect" + ret=1 + fi + + if ! ip netns exec "$nsrouter" nft delete table inet udpq; then + echo "$errprefix: Could not delete udpq table" + ret=1 + fi +} + test_queue_removal() { read tainted_then < /proc/sys/kernel/tainted @@ -663,7 +792,8 @@ test_tcp_localhost_connectclose test_tcp_localhost_requeue test_sctp_forward test_sctp_output -test_udp_ct_race +test_udp_nat_race +test_udp_gro_ct # should be last, adds vrf device in ns1 and changes routes test_icmp_vrf diff --git a/tools/testing/selftests/net/packetdrill/tcp_accecn_2nd_data_as_first.pkt b/tools/testing/selftests/net/packetdrill/tcp_accecn_2nd_data_as_first.pkt new file mode 100644 index 000000000000..07e9936e70e6 --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_accecn_2nd_data_as_first.pkt @@ -0,0 +1,24 @@ +// 3rd ACK + 1st data segment lost, data segments with ce + +`./defaults.sh +sysctl -q net.ipv4.tcp_ecn=3 +` + + 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 + +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 + +0 bind(3, ..., ...) = 0 + +0 listen(3, 1) = 0 + ++0.05 < SEWA 0:0(0) win 32767 <mss 1460,nop,nop,sackOK,nop,wscale 8> ++.002 > SW. 0:0(0) ack 1 <mss 1460,ECN e1b 1 ceb 0 e0b 1,nop,nop,nop,sackOK,nop,wscale 8> +// 3rd ACK lost +// 1st data segment lost ++0.05 < [ce] EAP. 1001:2001(1000) ack 1 win 264 <ECN e0b 1 ceb 0 e1b 1,nop> ++.002 > [ect0] WA. 1:1(0) ack 1 <ECN e1b 1 ceb 1000 e0b 1,nop,nop,nop,sack 1001:2001> ++.002 accept(3, ..., ...) = 4 + ++0.2 < [ce] EAP. 1:1001(1000) ack 1 win 264 <ECN e0b 1 ceb 0 e1b 1,nop> ++.001 > [ect0] EWA. 1:1(0) ack 2001 <ECN e1b 1 ceb 2000 e0b 1,nop> + ++0.05 < [ce] EAP. 2001:3001(1000) ack 1 win 264 ++.001 > [ect0] . 1:1(0) ack 3001 <ECN e1b 1 ceb 3000 e0b 1,nop> diff --git a/tools/testing/selftests/net/packetdrill/tcp_accecn_2nd_data_as_first_connect.pkt b/tools/testing/selftests/net/packetdrill/tcp_accecn_2nd_data_as_first_connect.pkt new file mode 100644 index 000000000000..76b8422b34dc --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_accecn_2nd_data_as_first_connect.pkt @@ -0,0 +1,30 @@ +// 3rd ACK + 1st data segment lost, 2nd data segments with ce + +`./defaults.sh +sysctl -q net.ipv4.tcp_ecn=3 +` + + 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 4 ++.002 ... 0.052 connect(4, ..., ...) = 0 + ++.002 > [noecn] SEWA 0:0(0) <mss 1460,sackOK,TS val 100 ecr 0,nop,wscale 8> ++0.05 < [noecn] SW. 0:0(0) ack 1 win 32767 <mss 1016,ECN e0b 1 ceb 0 e1b 1,nop,nop,nop,sackOK,nop,wscale 8> +// 3rd ACK lost ++.002 > [ect0] W. 1:1(0) ack 1 <ECN e1b 1 ceb 0 e0b 1,nop> + ++0.01 write(4, ..., 2000) = 2000 +// 1st data segment lost + 2nd gets CE ++.002 > [ect0] .5 1:1005(1004) ack 1 <ECN e1b 1 ceb 0 e0b 1,nop> ++.000 > [ect0] P.5 1005:2001(996) ack 1 <ECN e1b 1 ceb 0 e0b 1, nop> ++0.05 < [ect0] .6 1:1(0) ack 1 win 264 <ECN e0b 1 ceb 996 e1b 1,nop,nop,nop,sack 1005:2001> + ++0.01 %{ assert tcpi_delivered_ce == 1, tcpi_delivered_ce }% + ++0.002~+0.1 > [ect0] .5 1:1005(1004) ack 1 <ECN e1b 1 ceb 0 e0b 1,nop> ++.05 < [ect0] .6 1:1(0) ack 2001 win 264 <ECN e0b 1005 ceb 996 e1b 1,nop> + ++0.01 write(4, ..., 1000) = 1000 ++0~+0.002 > [ect0] P.5 2001:3001(1000) ack 1 <ECN e1b 1 ceb 0 e0b 1,nop> + ++0.1 < [ect0] .5 1:1001(1000) ack 3001 win 264 <ECN e0b 1 ceb 0 e1b 1,nop> ++0~+0.01 > [ect0] .5 3001:3001(0) ack 1001 <ECN e1b 1 ceb 0 e0b 1001,nop> diff --git a/tools/testing/selftests/net/packetdrill/tcp_accecn_3rd_ack_after_synack_rxmt.pkt b/tools/testing/selftests/net/packetdrill/tcp_accecn_3rd_ack_after_synack_rxmt.pkt new file mode 100644 index 000000000000..84060e490589 --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_accecn_3rd_ack_after_synack_rxmt.pkt @@ -0,0 +1,19 @@ +// Test 3rd ACK flags when SYN-ACK is rexmitted + +`./defaults.sh +sysctl -q net.ipv4.tcp_ecn=3 +` + + 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 4 ++.002 ... 0.052 connect(4, ..., ...) = 0 + ++.002 > [noecn] SEWA 0:0(0) <mss 1460,sackOK,TS val 100 ecr 0,nop,wscale 8> ++0.05 < [noecn] SW. 0:0(0) ack 1 win 32767 <mss 1460,ECN e0b 1 ceb 0 e1b 1,nop,nop,nop,sackOK,nop,wscale 8> ++.002 > [ect0] W. 1:1(0) ack 1 <ECN e1b 1 ceb 0 e0b 1,nop> + ++0.1 < [ect0] S. 0:0(0) ack 1 win 32767 <mss 1460,nop,nop,sackOK,nop,wscale 8> + +// Our code currently sends a challenge ACK +// when it receives a SYN in ESTABLISHED state +// based on the latest SYN ++.002 > [ect0] A. 1:1(0) ack 1 <ECN e1b 1 ceb 0 e0b 1,nop> diff --git a/tools/testing/selftests/net/packetdrill/tcp_accecn_3rd_ack_ce_updates_received_ce.pkt b/tools/testing/selftests/net/packetdrill/tcp_accecn_3rd_ack_ce_updates_received_ce.pkt new file mode 100644 index 000000000000..d3fe09d0606f --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_accecn_3rd_ack_ce_updates_received_ce.pkt @@ -0,0 +1,18 @@ +// Third ACK CE increases r.cep + +`./defaults.sh +sysctl -q net.ipv4.tcp_ecn=3 +` + + 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 + +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 + +0 bind(3, ..., ...) = 0 + +0 listen(3, 1) = 0 + ++0.05 < SEWA 0:0(0) win 32767 <mss 1050,nop,nop,sackOK,nop,wscale 8> ++.002 > SW. 0:0(0) ack 1 <mss 1460,ECN e1b 1 ceb 0 e0b 1,nop,nop,nop,sackOK,nop,wscale 8> ++0.05 < [ce] W. 1:1(0) ack 1 win 264 <ECN e0b 1 ceb 0 e1b 1,nop> ++.002 accept(3, ..., ...) = 4 + ++0.01 write(4, ..., 1000) = 1000 ++.002 > [ect0] WAP. 1:1001(1000) ack 1 <ECN e1b 1 ceb 0 e0b 1,nop> diff --git a/tools/testing/selftests/net/packetdrill/tcp_accecn_3rd_ack_lost_data_ce.pkt b/tools/testing/selftests/net/packetdrill/tcp_accecn_3rd_ack_lost_data_ce.pkt new file mode 100644 index 000000000000..d28722db42b1 --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_accecn_3rd_ack_lost_data_ce.pkt @@ -0,0 +1,22 @@ +// 3rd ACK lost, CE for the first data segment + +`./defaults.sh +sysctl -q net.ipv4.tcp_ecn=3 +` + + 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 + +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 + +0 bind(3, ..., ...) = 0 + +0 listen(3, 1) = 0 + ++0.05 < SEWA 0:0(0) win 32767 <mss 1050,nop,nop,sackOK,nop,wscale 8> ++.002 > SW. 0:0(0) ack 1 <mss 1460,ECN e1b 1 ceb 0 e0b 1,nop,nop,nop,sackOK,nop,wscale 8> +// 3rd ACK lost ++0.05 < [ce] EAP. 1:1001(1000) ack 1 win 264 <ECN e0b 1 ceb 0 e1b 1,nop> ++.002 > [ect0] WA. 1:1(0) ack 1001 <ECN e1b 1 ceb 1000 e0b 1,nop> ++.002 accept(3, ..., ...) = 4 + ++0.01 %{ assert tcpi_delivered_ce == 0, tcpi_delivered_ce }% + ++0.05 < [ce] EAP. 1001:2001(1000) ack 1 win 264 <ECN e0b 1 ceb 0 e1b 1,nop> ++.001 > [ect0] EWA. 1:1(0) ack 2001 <ECN e1b 1 ceb 2000 e0b 1 ,nop> diff --git a/tools/testing/selftests/net/packetdrill/tcp_accecn_3rd_dups.pkt b/tools/testing/selftests/net/packetdrill/tcp_accecn_3rd_dups.pkt new file mode 100644 index 000000000000..a4d808116e34 --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_accecn_3rd_dups.pkt @@ -0,0 +1,26 @@ +// Test SYN/ACK rexmit triggered 3rd ACK duplicate + CE on first data seg + +`./defaults.sh +sysctl -q net.ipv4.tcp_ecn=3 +` + + 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 + +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 + +0 bind(3, ..., ...) = 0 + +0 listen(3, 1) = 0 + + +0 < SEWA 0:0(0) win 32792 <mss 1050,nop,nop,sackOK,nop,wscale 8> ++.002 > SW. 0:0(0) ack 1 <mss 1460,ECN e1b 1 ceb 0 e0b 1,nop,nop,nop,sackOK,nop,wscale 8> + +// SYN/ACK rexmitted => two 3rd ACKs in-flight ++1.0~+1.1 > SW. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK,nop,wscale 8> +// Delivered 1st 3rd ACK ++0.05 < [ect0] W. 1:1(0) ack 1 win 257 <ECN e0b 1 ceb 0 e1b 1,nop> ++.002 accept(3, ..., ...) = 4 + +// Duplicate 3rd ACK delivered ++1.05 < [ect0] W. 1:1(0) ack 1 win 257 <ECN e0b 1 ceb 0 e1b 1,nop> + ++0.05 < [ce] EAP. 1:1001(1000) ack 1 win 257 <ECN e0b 1 ceb 0 e1b 1,nop> ++.002 > [ect0] WA. 1:1(0) ack 1001 <ECN e1b 1 ceb 1000 e0b 1,nop> + +0 read(4, ..., 1000) = 1000 diff --git a/tools/testing/selftests/net/packetdrill/tcp_accecn_acc_ecn_disabled.pkt b/tools/testing/selftests/net/packetdrill/tcp_accecn_acc_ecn_disabled.pkt new file mode 100644 index 000000000000..410a303c6d49 --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_accecn_acc_ecn_disabled.pkt @@ -0,0 +1,13 @@ +// Test that when accurate ECN is disabled, +// client uses RFC3168 ECN for SYN + +`./defaults.sh +sysctl -q net.ipv4.tcp_ecn=1 +` + + 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 4 ++.002 ... 0.052 connect(4, ..., ...) = 0 + ++.002 > [noecn] SEW 0:0(0) <mss 1460,sackOK,TS val 100 ecr 0,nop,wscale 8> ++0.05 < [noecn] S. 0:0(0) ack 1 win 32767 <mss 1460,sackOK,nop,nop,nop,wscale 8> ++.002 > [noecn] . 1:1(0) ack 1 diff --git a/tools/testing/selftests/net/packetdrill/tcp_accecn_accecn_then_notecn_syn.pkt b/tools/testing/selftests/net/packetdrill/tcp_accecn_accecn_then_notecn_syn.pkt new file mode 100644 index 000000000000..10728114b11b --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_accecn_accecn_then_notecn_syn.pkt @@ -0,0 +1,28 @@ +// Test that SYN-ACK with ACE flags and without +// ACE flags got dropped. Although we disable ECN, +// we shouldn't consider this as blackholed as +// these are dropped due to congestion + +`./defaults.sh +sysctl -q net.ipv4.tcp_ecn=3 +sysctl -q net.ipv4.tcp_ecn_option=2 +` + ++0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 ++0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 ++0 bind(3, ..., ...) = 0 ++0 listen(3, 1) = 0 + ++0 < [ect0] SEWA 0:0(0) win 32792 <mss 1460,nop,nop,sackOK,nop,wscale 8> ++.002 > [noecn] SA. 0:0(0) ack 1 <mss 1460,ECN e1b 1 ceb 0 e0b 1,nop,nop,nop,sackOK,nop,wscale 8> + +// Retransmit SYN ++0.1 < [noecn] S 0:0(0) win 32792 <mss 1460,nop,nop,sackOK,nop,wscale 8> ++.002 > [noecn] SW. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK,nop,wscale 8> + ++0.1 < [noecn] W. 1:1(0) ack 1 win 320 <ECN e0b 1 ceb 0 e1b 1,nop> ++.002 accept(3, ..., ...) = 4 + +// Write with AccECN option but with ip-noecn since we received one SYN with ACE=0 ++0.01 write(4, ..., 100) = 100 ++.002 > [noecn] P5. 1:101(100) ack 1 <ECN e1b 1 ceb 0 e0b 1,nop> diff --git a/tools/testing/selftests/net/packetdrill/tcp_accecn_accecn_to_rfc3168.pkt b/tools/testing/selftests/net/packetdrill/tcp_accecn_accecn_to_rfc3168.pkt new file mode 100644 index 000000000000..04d928f0d44d --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_accecn_accecn_to_rfc3168.pkt @@ -0,0 +1,18 @@ +// Test AccECN -> RFC3168 fallback when sysctl asks for RFC3168 ECN + +`./defaults.sh +sysctl -q net.ipv4.tcp_ecn=1 +` + + 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 + +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 + +0 bind(3, ..., ...) = 0 + +0 listen(3, 1) = 0 + + +0 < SEWA 0:0(0) win 32792 <mss 1050,nop,nop,sackOK,nop,wscale 8> ++.002 > SE. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK,nop,wscale 8> ++0.05 < . 1:1(0) ack 1 win 320 ++.002 accept(3, ..., ...) = 4 + ++0.01 write(4, ..., 1000) = 1000 ++.002 > [ect0] P. 1:1001(1000) ack 1 diff --git a/tools/testing/selftests/net/packetdrill/tcp_accecn_client_accecn_options_drop.pkt b/tools/testing/selftests/net/packetdrill/tcp_accecn_client_accecn_options_drop.pkt new file mode 100644 index 000000000000..788af6bea69c --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_accecn_client_accecn_options_drop.pkt @@ -0,0 +1,34 @@ +// Client negotiates AccECN and starts sending +// AccECN option in last ACK and data segments +// Middlebox drops AccECN option and client +// reverts to ACE flags only + +`./defaults.sh +sysctl -q net.ipv4.tcp_ecn=3 +sysctl -q net.ipv4.tcp_ecn_option=2 +sysctl -q net.ipv4.tcp_ecn_option_beacon=1 +` + + 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 + +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 + +0 bind(3, ..., ...) = 0 + +0 listen(3, 1) = 0 + + +0 < SEWA 0:0(0) win 32792 <mss 1050,nop,nop,sackOK,nop,wscale 8> ++.002 > SW. 0:0(0) ack 1 <mss 1460,ECN e1b 1 ceb 0 e0b 1,nop,nop,nop,sackOK,nop,wscale 8> ++0.05 < [ect0] W. 1:1(0) ack 1 win 257 <ECN e0b 1 ceb 0 e1b 1,nop> ++.002 accept(3, ..., ...) = 4 + ++0.05 < [ect0] EAP. 1:1001(1000) ack 1 win 257 <ECN e0b 1 ceb 0 e1b 1,nop> ++.002 > [ect0] EA. 1:1(0) ack 1001 <ECN e1b 1 ceb 0 e0b 1001,nop> + +0 read(4, ..., 1000) = 1000 + ++0.05 < [ect0] EAP. 1:1001(1000) ack 1 win 257 <ECN e0b 1 ceb 0 e1b 1,nop> ++.002 > [ect0] EA. 1:1(0) ack 1001 <ECN e1b 1 ceb 0 e0b 2001,nop,nop,nop,sack 1:1001> + ++0.05 < [ect0] EAP. 1:1001(1000) ack 1 win 257 <ECN e0b 1 ceb 0 e1b 1,nop> ++.002 > [ect0] EA. 1:1(0) ack 1001 <nop,nop,sack 1:1001> + ++0.05 < [ect0] EAP. 1001:2001(1000) ack 1 win 257 <ECN e0b 1 ceb 0 e1b 1,nop> ++.002 > [ect0] EA. 1:1(0) ack 2001 + +0 read(4, ..., 1000) = 1000 diff --git a/tools/testing/selftests/net/packetdrill/tcp_accecn_client_accecn_options_lost.pkt b/tools/testing/selftests/net/packetdrill/tcp_accecn_client_accecn_options_lost.pkt new file mode 100644 index 000000000000..f5839c2e682d --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_accecn_client_accecn_options_lost.pkt @@ -0,0 +1,38 @@ +// Client negotiates AccECN and starts sending +// AccECN option in last ACK and data segments +// Middlebox accepts AccECN option but some packets +// are lost due to congestion. Client should +// continue to send AccECN option + +`./defaults.sh +sysctl -q net.ipv4.tcp_ecn=3 +sysctl -q net.ipv4.tcp_ecn_option=2 +` + ++0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 4 ++.002 ... 0.102 connect(4, ..., ...) = 0 + ++.002 > [noecn] SEWA 0:0(0) <mss 1460,sackOK,TS val 100 ecr 0,nop,wscale 8> ++0.1 < [ect0] SW. 0:0(0) ack 1 win 32767 <mss 1024,ECN e0b 1 ceb 0 e1b 1,nop,nop,nop,sackOK,nop,wscale 8> ++.002 > [ect0] A. 1:1(0) ack 1 <ECN e1b 1 ceb 0 e0b 1,nop> + +// Send ++0.01 write(4, ..., 3000) = 3000 ++.002 > [ect0] .5 1:1013(1012) ack 1 <ECN e1b 1 ceb 0 e0b 1,nop> ++.002 > [ect0] P.5 1013:2025(1012) ack 1 <ECN e1b 1 ceb 0 e0b 1,nop> ++.002 > [ect0] P.5 2025:3001(976) ack 1 <ECN e1b 1 ceb 0 e0b 1,nop> + +// First two segments were lost due to congestion as SACK was +// received acknowledging 3rd segment ++0.1 < [ect0] .5 1:1(0) ack 1 win 264 <ECN e1b 1 ceb 0 e0b 977,nop,nop,nop,sack 2025:3001> + +// Since data with option was SACKed, we can +// continue to use AccECN option for the rest of +// the connection. This one is a rexmt ++.02~+0.5 > [ect0] .5 1:1013(1012) ack 1 <ECN e1b 1 ceb 0 e0b 1,nop> ++0.1 < [ect0] .5 1:1(0) ack 3001 win 264 <ECN e1b 1 ceb 0 e0b 3000,nop> + +// Send new data, it should contain AccECN option ++0.01 write(4, ..., 2000) = 2000 ++.002 > [ect0] .5 3001:4013(1012) ack 1 <ECN e1b 1 ceb 0 e0b 1,nop> ++.002 > [ect0] P.5 4013:5001(988) ack 1 <ECN e1b 1 ceb 0 e0b 1,nop> diff --git a/tools/testing/selftests/net/packetdrill/tcp_accecn_clientside_disabled.pkt b/tools/testing/selftests/net/packetdrill/tcp_accecn_clientside_disabled.pkt new file mode 100644 index 000000000000..c00b36d6a833 --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_accecn_clientside_disabled.pkt @@ -0,0 +1,12 @@ +// AccECN sysctl server-side only, no ECN/AccECN + +`./defaults.sh +sysctl -q net.ipv4.tcp_ecn=5 +` + + 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 4 ++.002 ... 0.052 connect(4, ..., ...) = 0 + ++.002 > S 0:0(0) <mss 1460,sackOK,TS val 100 ecr 0,nop,wscale 8> ++0.05 < S. 0:0(0) ack 1 win 32767 <mss 1460,sackOK,nop,nop,nop,wscale 8> ++.002 > . 1:1(0) ack 1 diff --git a/tools/testing/selftests/net/packetdrill/tcp_accecn_close_local_close_then_remote_fin.pkt b/tools/testing/selftests/net/packetdrill/tcp_accecn_close_local_close_then_remote_fin.pkt new file mode 100644 index 000000000000..f9c27f39f354 --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_accecn_close_local_close_then_remote_fin.pkt @@ -0,0 +1,25 @@ +// Test basic connection teardown where local process closes first: +// the local process calls close() first, so we send a FIN, and receive an ACK. +// Then we receive a FIN and ACK it. + +`./defaults.sh +sysctl -q net.ipv4.tcp_ecn=3 +sysctl -q net.ipv4.tcp_ecn_option=0 +` + + 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 + +.01...0.011 connect(3, ..., ...) = 0 + +0 > [noecn] SEWA 0:0(0) <...> + +0 < [ect1] SW. 0:0(0) ack 1 win 32768 <mss 1000,nop,wscale 6,nop,nop,sackOK> + +0 > [ect0] EW. 1:1(0) ack 1 + + +0 write(3, ..., 1000) = 1000 + +0 > [ect0] P5. 1:1001(1000) ack 1 + +0 < [ect0] .5 1:1(0) ack 1001 win 257 + + +0 close(3) = 0 + +0 > [ect0] F5. 1001:1001(0) ack 1 + +0 < [ect0] .5 1:1(0) ack 1002 win 257 + + +0 < [ect0] F5. 1:1(0) ack 1002 win 257 + +0 > [ect0] . 1002:1002(0) ack 2 diff --git a/tools/testing/selftests/net/packetdrill/tcp_accecn_delivered_2ndlargeack.pkt b/tools/testing/selftests/net/packetdrill/tcp_accecn_delivered_2ndlargeack.pkt new file mode 100644 index 000000000000..6d771234124a --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_accecn_delivered_2ndlargeack.pkt @@ -0,0 +1,25 @@ +// Test a large ACK (> ACE field max) + +`./defaults.sh +sysctl -q net.ipv4.tcp_ecn=3 +sysctl -q net.ipv4.tcp_ecn_option=0 +` + + 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 + +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 + +0 bind(3, ..., ...) = 0 + +0 listen(3, 1) = 0 + + +0 < SEWA 0:0(0) win 32792 <mss 1460,nop,nop,sackOK,nop,wscale 8> ++.002 > SW. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK,nop,wscale 8> ++0.05 < [ect0] W. 1:1(0) ack 1 win 264 ++.002 accept(3, ..., ...) = 4 + ++0.01 %{ assert tcpi_delivered_ce == 0, tcpi_delivered_ce }% + ++0.01 write(4, ..., 14600) = 14600 ++.002 > [ect0] P.5 1:14601(14600) ack 1 ++0.05 < [ect0] .5 1:1(0) ack 1461 win 264 ++0.05 < [ect0] .5 1:1(0) ack 14601 win 264 + ++0.01 %{ assert tcpi_delivered_ce == 8, tcpi_delivered_ce }% diff --git a/tools/testing/selftests/net/packetdrill/tcp_accecn_delivered_falseoverflow_detect.pkt b/tools/testing/selftests/net/packetdrill/tcp_accecn_delivered_falseoverflow_detect.pkt new file mode 100644 index 000000000000..76384f52b021 --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_accecn_delivered_falseoverflow_detect.pkt @@ -0,0 +1,31 @@ +// Test false overflow detection with option used to rule out overflow + +`./defaults.sh +sysctl -q net.ipv4.tcp_ecn=3 +` + + 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 + +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 + +0 bind(3, ..., ...) = 0 + +0 listen(3, 1) = 0 + + +0 < SEWA 0:0(0) win 32792 <mss 1460,nop,nop,sackOK,nop,wscale 8> ++.002 > SW. 0:0(0) ack 1 <mss 1460,ECN e1b 1 ceb 0 e0b 1,nop,nop,nop,sackOK,nop,wscale 8> ++0.05 < [ect0] W. 1:1(0) ack 1 win 264 <ECN e0b 1 ceb 0 e1b 1,nop> ++.002 accept(3, ..., ...) = 4 + ++0.01 %{ assert tcpi_delivered_ce == 0, tcpi_delivered_ce }% + +// Stop sending option to allow easier testing ++0 `sysctl -q net.ipv4.tcp_ecn_option=0` + ++0.002 write(4, ..., 14600) = 14600 ++.002 > [ect0] P.5 1:14601(14600) ack 1 + ++0.05 < [ect0] .5 1:1(0) ack 1460 win 264 <ECN e0b 1461 ceb 0 e1b 1,nop> ++0.05 < [ect0] .5 1:1(0) ack 14601 win 264 <ECN e0b 14601 ceb 0 e1b 1,nop> + ++0.01 %{ +assert tcpi_delivered_ce == 0, tcpi_delivered_ce +assert tcpi_delivered_e0_bytes == 14600, tcpi_delivered_e0_bytes +}% diff --git a/tools/testing/selftests/net/packetdrill/tcp_accecn_delivered_largeack.pkt b/tools/testing/selftests/net/packetdrill/tcp_accecn_delivered_largeack.pkt new file mode 100644 index 000000000000..8bce5dce35a2 --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_accecn_delivered_largeack.pkt @@ -0,0 +1,24 @@ +// Test a large ACK (> ACE field max) + +`./defaults.sh +sysctl -q net.ipv4.tcp_ecn=3 +sysctl -q net.ipv4.tcp_ecn_option=0 +` + + 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 + +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 + +0 bind(3, ..., ...) = 0 + +0 listen(3, 1) = 0 + + +0 < SEWA 0:0(0) win 32792 <mss 1460,nop,nop,sackOK,nop,wscale 8> ++.002 > SW. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK,nop,wscale 8> ++0.05 < [ect0] W. 1:1(0) ack 1 win 264 ++.002 accept(3, ..., ...) = 4 + ++0.01 %{ assert tcpi_delivered_ce == 0, tcpi_delivered_ce }% + ++0.01 write(4, ..., 14600) = 14600 ++.002 > [ect0] P.5 1:14601(14600) ack 1 ++0.05 < [ect0] .5 1:1(0) ack 14601 win 264 + ++0.01 %{ assert tcpi_delivered_ce == 0, tcpi_delivered_ce }% diff --git a/tools/testing/selftests/net/packetdrill/tcp_accecn_delivered_largeack2.pkt b/tools/testing/selftests/net/packetdrill/tcp_accecn_delivered_largeack2.pkt new file mode 100644 index 000000000000..5f2b147214f4 --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_accecn_delivered_largeack2.pkt @@ -0,0 +1,25 @@ +// Test a large ACK (> ACE field max) + +`./defaults.sh +sysctl -q net.ipv4.tcp_ecn=3 +sysctl -q net.ipv4.tcp_ecn_option=0 +` + + 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 + +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 + +0 bind(3, ..., ...) = 0 + +0 listen(3, 1) = 0 + + +0 < SEWA 0:0(0) win 32792 <mss 1460,nop,nop,sackOK,nop,wscale 8> ++.002 > SW. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK,nop,wscale 8> ++0.05 < [ect0] W. 1:1(0) ack 1 win 264 ++.002 accept(3, ..., ...) = 4 + ++0.01 %{ assert tcpi_delivered_ce == 0, tcpi_delivered_ce }% + ++0.01 write(4, ..., 14600) = 14600 ++.002 > [ect0] P.5 1:14601(14600) ack 1 + // Fake CE ++0.05 < [ect0] .6 1:1(0) ack 14601 win 264 + ++0.01 %{ assert tcpi_delivered_ce == 1, tcpi_delivered_ce }% diff --git a/tools/testing/selftests/net/packetdrill/tcp_accecn_delivered_maxack.pkt b/tools/testing/selftests/net/packetdrill/tcp_accecn_delivered_maxack.pkt new file mode 100644 index 000000000000..fd07bdc14f37 --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_accecn_delivered_maxack.pkt @@ -0,0 +1,25 @@ +// Test a large ACK (at ACE field max delta) + +`./defaults.sh +sysctl -q net.ipv4.tcp_ecn=3 +sysctl -q net.ipv4.tcp_ecn_option=0 +` + + 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 + +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 + +0 bind(3, ..., ...) = 0 + +0 listen(3, 1) = 0 + + +0 < SEWA 0:0(0) win 32792 <mss 1460,nop,nop,sackOK,nop,wscale 8> ++.002 > SW. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK,nop,wscale 8> ++0.05 < [ect0] W. 1:1(0) ack 1 win 264 ++.002 accept(3, ..., ...) = 4 + ++0.01 %{ assert tcpi_delivered_ce == 0, tcpi_delivered_ce }% + ++0.01 write(4, ..., 14600) = 14600 ++.002 > [ect0] P.5 1:14601(14600) ack 1 + // Fake CE ++0.05 < [ect0] .4 1:1(0) ack 14601 win 264 + ++0.01 %{ assert tcpi_delivered_ce == 7, tcpi_delivered_ce }% diff --git a/tools/testing/selftests/net/packetdrill/tcp_accecn_delivered_updates.pkt b/tools/testing/selftests/net/packetdrill/tcp_accecn_delivered_updates.pkt new file mode 100644 index 000000000000..cb1e70ff2d26 --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_accecn_delivered_updates.pkt @@ -0,0 +1,70 @@ +// Test basic AccECN CEP/CEB/E0B/E1B functionality & CEP wrapping + +`./defaults.sh +sysctl -q net.ipv4.tcp_ecn=3 +` + + 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 + +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 + +0 bind(3, ..., ...) = 0 + +0 listen(3, 1) = 0 + + +0 < SEWA 0:0(0) win 32792 <mss 1050,nop,nop,sackOK,nop,wscale 8> ++.002 > SW. 0:0(0) ack 1 <mss 1460,ECN e1b 1 ceb 0 e0b 1,nop,nop,nop,sackOK,nop,wscale 8> ++0.05 < [ect0] W. 1:1(0) ack 1 win 264 <ECN e0b 1 ceb 0 e1b 1,nop> ++.002 accept(3, ..., ...) = 4 + ++0.01 %{ +assert tcpi_delivered_ce == 0, tcpi_delivered_ce +assert tcpi_delivered_ce_bytes == 0, tcpi_delivered_ce_bytes +}% + ++0.01 write(4, ..., 1000) = 1000 ++.002 > [ect0] EAP. 1:1001(1000) ack 1 <ECN e1b 1 ceb 0 e0b 1,nop> + // Fake CE ++0.05 < [ect0] WA. 1:1(0) ack 1001 win 264 <ECN e0b 1 ceb 1000 e1b 1,nop> + ++0.01 %{ +assert tcpi_delivered_ce == 1, tcpi_delivered_ce +assert tcpi_delivered_ce_bytes == 1000, tcpi_delivered_ce_bytes +}% + ++0.01 write(4, ..., 1000) = 1000 ++.002 > [ect0] EAP. 1001:2001(1000) ack 1 <ECN e1b 1 ceb 0 e0b 1,nop> + // Fake ect0 ++0.05 < [ect0] WA. 1:1(0) ack 2001 win 264 <ECN e0b 1001 ceb 1000 e1b 1,nop> + ++0.01 %{ +assert tcpi_delivered_ce == 1, tcpi_delivered_ce +assert tcpi_delivered_e0_bytes == 1000, tcpi_delivered_e0_bytes +}% + ++0.01 write(4, ..., 1000) = 1000 ++.002 > [ect0] EAP. 2001:3001(1000) ack 1 <ECN e1b 1 ceb 0 e0b 1,nop> + // Fake ce ++0.05 < [ect0] EWA. 1:1(0) ack 3001 win 264 <ECN e0b 1001 ceb 2000 e1b 1,nop> + ++0.01 %{ +assert tcpi_delivered_ce == 2, tcpi_delivered_ce +assert tcpi_delivered_ce_bytes == 2000, tcpi_delivered_ce_bytes +}% + ++0.01 write(4, ..., 1000) = 1000 ++.002 > [ect0] EAP. 3001:4001(1000) ack 1 <ECN e1b 1 ceb 0 e0b 1,nop> + // Fake ect1 ++0.05 < [ect0] EWA. 1:1(0) ack 4001 win 264 <ECN e0b 1001 ceb 2000 e1b 1001,nop> + ++0.01 %{ +assert tcpi_delivered_ce == 2, tcpi_delivered_ce +assert tcpi_delivered_e1_bytes == 1000, tcpi_delivered_e1_bytes +}% + ++0.01 write(4, ..., 1000) = 1000 ++.002 > [ect0] EAP. 4001:5001(1000) ack 1 <ECN e1b 1 ceb 0 e0b 1,nop> + // Fake ce ++0.05 < [ect0] . 1:1(0) ack 5001 win 264 <ECN e0b 1001 ceb 3000 e1b 1001,nop> + ++0.01 %{ +assert tcpi_delivered_ce == 3, tcpi_delivered_ce +assert tcpi_delivered_ce_bytes == 3000, tcpi_delivered_ce_bytes +}% diff --git a/tools/testing/selftests/net/packetdrill/tcp_accecn_ecn3.pkt b/tools/testing/selftests/net/packetdrill/tcp_accecn_ecn3.pkt new file mode 100644 index 000000000000..6627c7bb2d26 --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_accecn_ecn3.pkt @@ -0,0 +1,12 @@ +// Test that tcp_ecn=4 uses RFC3168 ECN for SYN + +`./defaults.sh +sysctl -q net.ipv4.tcp_ecn=4 +` + + 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 4 ++.002 ... 0.05 connect(4, ..., ...) = 0 + ++.002 > SEW 0:0(0) <mss 1460,sackOK,TS val 100 ecr 0,nop,wscale 8> ++0.05 < S. 0:0(0) ack 1 win 32767 <mss 1460,nop,nop,sackOK,nop,wscale 8> ++.002 > . 1:1(0) ack 1 diff --git a/tools/testing/selftests/net/packetdrill/tcp_accecn_ecn_field_updates_opt.pkt b/tools/testing/selftests/net/packetdrill/tcp_accecn_ecn_field_updates_opt.pkt new file mode 100644 index 000000000000..51879477bb50 --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_accecn_ecn_field_updates_opt.pkt @@ -0,0 +1,35 @@ +// Test basic AccECN CEP/CEB/E0B/E1B functionality & CEP wrapping + +`./defaults.sh +sysctl -q net.ipv4.tcp_ecn=3 +` + + 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 + +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 + +0 bind(3, ..., ...) = 0 + +0 listen(3, 1) = 0 + + +0 < SEWA 0:0(0) win 32792 <mss 1050,nop,nop,sackOK,nop,wscale 8> ++.002 > SW. 0:0(0) ack 1 <mss 1460,ECN e1b 1 ceb 0 e0b 1,nop,nop,nop,sackOK,nop,wscale 8> ++0.05 < [ect0] W. 1:1(0) ack 1 win 257 <ECN e0b 1 ceb 0 e1b 1,nop> ++.002 accept(3, ..., ...) = 4 + ++0.05 < [ce] EAP. 1:1001(1000) ack 1 win 257 <ECN e0b 1 ceb 0 e1b 1,nop> ++.002 > [ect0] WA. 1:1(0) ack 1001 <ECN e1b 1 ceb 1000 e0b 1,nop> + +0 read(4, ..., 1000) = 1000 + ++0.05 < [ect0] EAP. 1001:2001(1000) ack 1 win 257 <ECN e0b 1 ceb 0 e1b 1,nop> ++.002 > [ect0] WA. 1:1(0) ack 2001 <ECN e1b 1 ceb 1000 e0b 1001,nop> + +0 read(4, ..., 1000) = 1000 + ++0.05 < [ce] EAP. 2001:3001(1000) ack 1 win 257 <ECN e0b 1 ceb 0 e1b 1,nop> ++.002 > [ect0] EWA. 1:1(0) ack 3001 <ECN e1b 1 ceb 2000 e0b 1001,nop> + +0 read(4, ..., 1000) = 1000 + ++0.05 < [ect1] EAP. 3001:4001(1000) ack 1 win 257 <ECN e0b 1 ceb 0 e1b 1,nop> ++.002 > [ect0] EWA. 1:1(0) ack 4001 <ECN e1b 1001 ceb 2000 e0b 1001,nop> + +0 read(4, ..., 1000) = 1000 + ++0.05 < [ce] EAP. 4001:5001(1000) ack 1 win 257 <ECN e0b 1 ceb 0 e1b 1,nop> ++.002 > [ect0] . 1:1(0) ack 5001 <ECN e1b 1001 ceb 3000 e0b 1001,nop> + +0 read(4, ..., 1000) = 1000 diff --git a/tools/testing/selftests/net/packetdrill/tcp_accecn_ipflags_drop.pkt b/tools/testing/selftests/net/packetdrill/tcp_accecn_ipflags_drop.pkt new file mode 100644 index 000000000000..0c72fa4a1251 --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_accecn_ipflags_drop.pkt @@ -0,0 +1,14 @@ +// Test IP flags drop +--tolerance_usecs=50000 + +`./defaults.sh +sysctl -q net.ipv4.tcp_ecn=3 +` + + 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 4 ++.002 ... 1.1 connect(4, ..., ...) = 0 + ++.002 > SEWA 0:0(0) <mss 1460,sackOK,TS val 100 ecr 0,nop,wscale 8> ++.02 ~ +1.1 > SEWA 0:0(0) <mss 1460,sackOK,TS val 100 ecr 0,nop,wscale 8> ++0.05 < S. 0:0(0) ack 1 win 32767 <mss 1460,nop,nop,sackOK,nop,wscale 8> ++.002 > [noecn] . 1:1(0) ack 1 diff --git a/tools/testing/selftests/net/packetdrill/tcp_accecn_listen_opt_drop.pkt b/tools/testing/selftests/net/packetdrill/tcp_accecn_listen_opt_drop.pkt new file mode 100644 index 000000000000..171f9433e55f --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_accecn_listen_opt_drop.pkt @@ -0,0 +1,16 @@ +// SYN/ACK option drop test + +`./defaults.sh +sysctl -q net.ipv4.tcp_ecn=3 +` + + 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 + +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 + +0 bind(3, ..., ...) = 0 + +0 listen(3, 1) = 0 + + +0 < SEWA 0:0(0) win 32792 <mss 1050,nop,nop,sackOK,nop,wscale 8> ++.002 > SW. 0:0(0) ack 1 <mss 1460,ECN e1b 1 ceb 0 e0b 1,nop,nop,nop,sackOK,nop,wscale 8> ++.02 ~+2 > SW. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK,nop,wscale 8> ++.02 ~+5 > S. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK,nop,wscale 8> ++.02 ~+8 > S. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK,nop,wscale 8> diff --git a/tools/testing/selftests/net/packetdrill/tcp_accecn_multiple_syn_ack_drop.pkt b/tools/testing/selftests/net/packetdrill/tcp_accecn_multiple_syn_ack_drop.pkt new file mode 100644 index 000000000000..0f65cf56cd2b --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_accecn_multiple_syn_ack_drop.pkt @@ -0,0 +1,28 @@ +// Test that SYN-ACK with ACE flags and without +// ACE flags got dropped. Although we disable ECN, +// we shouldn't consider this as blackholed as +// these are dropped due to congestion + +`./defaults.sh +sysctl -q net.ipv4.tcp_ecn=3 +sysctl -q net.ipv4.tcp_ecn_option=2 +` + ++0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 ++0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 ++0 bind(3, ..., ...) = 0 ++0 listen(3, 1) = 0 + ++0 < [noecn] SEWA 0:0(0) win 32792 <mss 1460,nop,nop,sackOK,nop,wscale 8> ++.002 > [noecn] SW. 0:0(0) ack 1 <mss 1460,ECN e1b 1 ceb 0 e0b 1,nop,nop,nop,sackOK,nop,wscale 8> + +// Retransmit SYN-ACK without option ++1~+1.1 > [noecn] SW. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK,nop,wscale 8> + +// SYN-ACK maybe getting blackholed, disable ECN ++2~+2.2 > [noecn] S. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK,nop,wscale 8> ++4~+4.4 > [noecn] S. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK,nop,wscale 8> + +// Received an ACK after sending 3rd retransmission, not a blackhole ++0.1 < [noecn] . 1:1(0) ack 1 win 320 ++.002 accept(3, ..., ...) = 4 diff --git a/tools/testing/selftests/net/packetdrill/tcp_accecn_multiple_syn_drop.pkt b/tools/testing/selftests/net/packetdrill/tcp_accecn_multiple_syn_drop.pkt new file mode 100644 index 000000000000..343181633980 --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_accecn_multiple_syn_drop.pkt @@ -0,0 +1,18 @@ +// Test that SYN with ACE flags and without +// ACE flags got dropped. Although we disable +// ECN, we shouldn't consider this as blackholed +// as these are dropped due to congestion + +`./defaults.sh +sysctl -q net.ipv4.tcp_ecn=3 +` + + 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 4 ++.002 ... 3.1 connect(4, ..., ...) = 0 + ++.002 > [noecn] SEWA 0:0(0) <mss 1460,sackOK,TS val 100 ecr 0,nop,wscale 8> ++.02~+1.1 > [noecn] SEWA 0:0(0) <mss 1460,sackOK,TS val 100 ecr 0,nop,wscale 8> ++.02~+1.1 > [noecn] S 0:0(0) <mss 1460,sackOK,TS val 100 ecr 0,nop,wscale 8> ++.02~+1.1 > [noecn] S 0:0(0) <mss 1460,sackOK,TS val 100 ecr 0,nop,wscale 8> ++0.1 < [noecn] S. 0:0(0) ack 1 win 32767 <mss 1460,nop,nop,sackOK,nop,wscale 8> ++0~+0.01 > [noecn] . 1:1(0) ack 1 diff --git a/tools/testing/selftests/net/packetdrill/tcp_accecn_negotiation_bleach.pkt b/tools/testing/selftests/net/packetdrill/tcp_accecn_negotiation_bleach.pkt new file mode 100644 index 000000000000..37dabc4603c8 --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_accecn_negotiation_bleach.pkt @@ -0,0 +1,23 @@ +// Test AccECN flags bleach + +`./defaults.sh +sysctl -q net.ipv4.tcp_ecn=3 +` + + 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 + +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 + +0 bind(3, ..., ...) = 0 + +0 listen(3, 1) = 0 + + +0 < SEWA 0:0(0) win 32792 <mss 1050,nop,nop,sackOK,nop,wscale 8> ++.002 > SW. 0:0(0) ack 1 <mss 1460,ECN e1b 1 ceb 0 e0b 1,nop,nop,nop,sackOK,nop,wscale 8> ++0.05 < [ect0] . 1:1(0) ack 1 win 320 <ECN e0b 1 ceb 0 e1b 1,nop> ++.002 accept(3, ..., ...) = 4 + ++0.01 %{ assert tcpi_delivered_ce == 0, tcpi_delivered_ce }% + ++0.01 write(4, ..., 1000) = 1000 ++.002 > [noecn] EAP. 1:1001(1000) ack 1 <ECN e1b 1 ceb 0 e0b 1,nop> ++0.05 < [ect0] EAP. 1:1(0) ack 1001 win 320 + ++0.01 %{ assert tcpi_delivered_ce == 0, tcpi_delivered_ce }% diff --git a/tools/testing/selftests/net/packetdrill/tcp_accecn_negotiation_connect.pkt b/tools/testing/selftests/net/packetdrill/tcp_accecn_negotiation_connect.pkt new file mode 100644 index 000000000000..5b14892fda51 --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_accecn_negotiation_connect.pkt @@ -0,0 +1,23 @@ +// Test basic AccECN negotiation + +`./defaults.sh +sysctl -q net.ipv4.tcp_ecn=3 +` + + 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 4 ++.002 ... 0.052 connect(4, ..., ...) = 0 + ++.002 > SEWA 0:0(0) <mss 1460,sackOK,TS val 100 ecr 0,nop,wscale 8> ++0.05 < SW. 0:0(0) ack 1 win 32767 <mss 1460,ECN e0b 1 ceb 0 e1b 1,nop,nop,nop,sackOK,nop,wscale 8> ++.002 > [ect0] W. 1:1(0) ack 1 <ECN e1b 1 ceb 0 e0b 1,nop> + ++0.01 write(4, ..., 1000) = 1000 ++.002 > [ect0] EAP. 1:1001(1000) ack 1 <ECN e1b 1 ceb 0 e0b 1,nop> ++.05 < [ect0] EAP. 1:1(0) ack 1001 win 256 <ECN e0b 1001 ceb 0 e1b 0,nop> + ++0.01 %{ assert tcpi_delivered_ce == 0, tcpi_delivered_ce }% + ++0.01 write(4, ..., 1000) = 1000 ++.002 > [ect0] EAP. 1001:2001(1000) ack 1 <ECN e1b 1 ceb 0 e0b 1,nop> + ++0.01 %{ assert tcpi_delivered_ce == 0, tcpi_delivered_ce }% diff --git a/tools/testing/selftests/net/packetdrill/tcp_accecn_negotiation_listen.pkt b/tools/testing/selftests/net/packetdrill/tcp_accecn_negotiation_listen.pkt new file mode 100644 index 000000000000..25f7cb2feb25 --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_accecn_negotiation_listen.pkt @@ -0,0 +1,26 @@ +// Test basic AccECN negotiation + +`./defaults.sh +sysctl -q net.ipv4.tcp_ecn=3 +` + + 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 + +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 + +0 bind(3, ..., ...) = 0 + +0 listen(3, 1) = 0 + + +0 < SEWA 0:0(0) win 32792 <mss 1050,nop,nop,sackOK,nop,wscale 8> ++.002 > SW. 0:0(0) ack 1 <mss 1460,ECN e1b 1 ceb 0 e0b 1,nop,nop,nop,sackOK,nop,wscale 8> ++0.05 < [ect0] W. 1:1(0) ack 1 win 320 <ECN e0b 1 ceb 0 e1b 1,nop> ++.002 accept(3, ..., ...) = 4 + ++0.01 %{ assert tcpi_delivered_ce == 0, tcpi_delivered_ce }% + ++0.01 write(4, ..., 1000) = 1000 ++.002 > [ect0] EAP. 1:1001(1000) ack 1 <ECN e1b 1 ceb 0 e0b 1,nop> ++0.05 < [ect0] EAP. 1:1(0) ack 1001 win 320 + ++0.01 %{ assert tcpi_delivered_ce == 0, tcpi_delivered_ce }% + ++0.01 write(4, ..., 1000) = 1000 ++.002 > [ect0] EAP. 1001:2001(1000) ack 1 <ECN e1b 1 ceb 0 e0b 1,nop> diff --git a/tools/testing/selftests/net/packetdrill/tcp_accecn_negotiation_noopt_connect.pkt b/tools/testing/selftests/net/packetdrill/tcp_accecn_negotiation_noopt_connect.pkt new file mode 100644 index 000000000000..50e08c492a69 --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_accecn_negotiation_noopt_connect.pkt @@ -0,0 +1,23 @@ +// Test basic AccECN negotiation without option + +`./defaults.sh +sysctl -q net.ipv4.tcp_ecn=3 +` + + 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 4 ++.002 ... 0.052 connect(4, ..., ...) = 0 + ++.002 > SEWA 0:0(0) <mss 1460,sackOK,TS val 100 ecr 0,nop,wscale 8> ++0.05 < SW. 0:0(0) ack 1 win 32767 <mss 1460,nop,nop,sackOK,nop,wscale 8> ++.002 > [ect0] W. 1:1(0) ack 1 + ++0.01 write(4, ..., 1000) = 1000 ++.002 > [ect0] EAP. 1:1001(1000) ack 1 ++.05 < [ect0] EAP. 1:1(0) ack 1001 win 256 + ++0.01 %{ assert tcpi_delivered_ce == 0, tcpi_delivered_ce }% + ++0.01 write(4, ..., 1000) = 1000 ++.002 > [ect0] EAP. 1001:2001(1000) ack 1 + ++0.01 %{ assert tcpi_delivered_ce == 0, tcpi_delivered_ce }% diff --git a/tools/testing/selftests/net/packetdrill/tcp_accecn_negotiation_optenable.pkt b/tools/testing/selftests/net/packetdrill/tcp_accecn_negotiation_optenable.pkt new file mode 100644 index 000000000000..2904f1ba9975 --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_accecn_negotiation_optenable.pkt @@ -0,0 +1,23 @@ +// Test basic AccECN negotiation, late option enable + +`./defaults.sh +sysctl -q net.ipv4.tcp_ecn=3 +` + + 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 4 ++.002 ... 0.052 connect(4, ..., ...) = 0 + ++.002 > SEWA 0:0(0) <mss 1460,sackOK,TS val 100 ecr 0,nop,wscale 8> ++0.05 < SW. 0:0(0) ack 1 win 32767 <mss 1460,nop,nop,sackOK,nop,wscale 8> ++.002 > [ect0] W. 1:1(0) ack 1 + ++0.01 write(4, ..., 1000) = 1000 ++.002 > [ect0] EAP. 1:1001(1000) ack 1 ++.05 < [ect0] EAP. 1:1(0) ack 1001 win 256 <ECN e0b 1001 ceb 0 e1b 1,nop> + ++0.01 %{ assert tcpi_delivered_ce == 0, tcpi_delivered_ce }% + ++0.01 write(4, ..., 1000) = 1000 ++.002 > [ect0] EAP. 1001:2001(1000) ack 1 <ECN e1b 1 ceb 0 e0b 1,nop> + ++0.01 %{ assert tcpi_delivered_ce == 0, tcpi_delivered_ce }% diff --git a/tools/testing/selftests/net/packetdrill/tcp_accecn_no_ecn_after_accecn.pkt b/tools/testing/selftests/net/packetdrill/tcp_accecn_no_ecn_after_accecn.pkt new file mode 100644 index 000000000000..64e0fc1c1f14 --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_accecn_no_ecn_after_accecn.pkt @@ -0,0 +1,20 @@ +// Test client behavior on receiving a non ECN SYN-ACK +// after receiving an AccECN SYN-ACK and moving to +// ESTABLISHED state + +`./defaults.sh +sysctl -q net.ipv4.tcp_ecn=3 +sysctl -q net.ipv4.tcp_ecn_option=2 +` + + 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 4 ++.002 ... 0.052 connect(4, ..., ...) = 0 + ++.002 > [noecn] SEWA 0:0(0) <mss 1460,sackOK,TS val 100 ecr 0,nop,wscale 8> +// Receive an AccECN SYN-ACK and move to ESTABLISHED ++0.05 < [noecn] SW. 0:0(0) ack 1 win 32767 <mss 1460,ECN e0b 1 ceb 0 e1b 1,nop,nop,nop,sackOK,nop,wscale 8> ++.002 > [ect0] W. 1:1(0) ack 1 <ECN e1b 1 ceb 0 e0b 1,nop> + +// Receive a non ECN SYN-ACK and send a challenge ACK with ACE feedback ++0.1 < [noecn] S. 0:0(0) ack 1 win 32767 <mss 1460,nop,nop,sackOK,nop,wscale 8> ++.002 > [ect0] W. 1:1(0) ack 1 <ECN e1b 1 ceb 0 e0b 1,nop> diff --git a/tools/testing/selftests/net/packetdrill/tcp_accecn_noopt.pkt b/tools/testing/selftests/net/packetdrill/tcp_accecn_noopt.pkt new file mode 100644 index 000000000000..f407c629a3f7 --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_accecn_noopt.pkt @@ -0,0 +1,27 @@ +// Test basic AccECN negotiation with option off using sysctl + +`./defaults.sh +sysctl -q net.ipv4.tcp_ecn=3 +sysctl -q net.ipv4.tcp_ecn_option=0 +` + + 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 + +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 + +0 bind(3, ..., ...) = 0 + +0 listen(3, 1) = 0 + + +0 < SEWA 0:0(0) win 32792 <mss 1050,nop,nop,sackOK,nop,wscale 8> ++.002 > SW. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK,nop,wscale 8> ++0.05 < [ect0] W. 1:1(0) ack 1 win 320 <ECN e0b 1 ceb 0 e1b 1,nop> ++.002 accept(3, ..., ...) = 4 + ++0.01 %{ assert tcpi_delivered_ce == 0, tcpi_delivered_ce }% + ++0.01 write(4, ..., 1000) = 1000 ++.002 > [ect0] EAP. 1:1001(1000) ack 1 ++0.05 < [ect0] EAP. 1:1(0) ack 1001 win 320 + ++0.01 %{ assert tcpi_delivered_ce == 0, tcpi_delivered_ce }% + ++0.01 write(4, ..., 1000) = 1000 ++.002 > [ect0] EAP. 1001:2001(1000) ack 1 diff --git a/tools/testing/selftests/net/packetdrill/tcp_accecn_noprogress.pkt b/tools/testing/selftests/net/packetdrill/tcp_accecn_noprogress.pkt new file mode 100644 index 000000000000..32454e7187f9 --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_accecn_noprogress.pkt @@ -0,0 +1,27 @@ +// Test no progress filtering + +`./defaults.sh +sysctl -q net.ipv4.tcp_ecn=3 +` + + 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 + +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 + +0 bind(3, ..., ...) = 0 + +0 listen(3, 1) = 0 + + +0 < SEWA 0:0(0) win 32792 <mss 1050,nop,nop,sackOK,nop,wscale 8> ++.002 > SW. 0:0(0) ack 1 <mss 1460,ECN e1b 1 ceb 0 e0b 1,nop,nop,nop,sackOK,nop,wscale 8> ++0.05 < [ect0] W. 1:1(0) ack 1 win 264 <ECN e0b 1 ceb 0 e1b 1,nop> ++.002 accept(3, ..., ...) = 4 + ++0.01 %{ assert tcpi_delivered_ce == 0, tcpi_delivered_ce }% + ++0.01 write(4, ..., 1000) = 1000 ++.002 > [ect0] EAP. 1:1001(1000) ack 1 <ECN e1b 1 ceb 0 e0b 1,nop> + // Fake CE and claim no progress ++0.05 < [ect0] WA. 1:1(0) ack 1 win 264 <ECN e0b 1 ceb 1000 e1b 1,nop> + ++0.01 %{ +assert tcpi_delivered_ce == 0, tcpi_delivered_ce +assert tcpi_delivered_ce_bytes == 0, tcpi_delivered_ce_bytes +}% diff --git a/tools/testing/selftests/net/packetdrill/tcp_accecn_notecn_then_accecn_syn.pkt b/tools/testing/selftests/net/packetdrill/tcp_accecn_notecn_then_accecn_syn.pkt new file mode 100644 index 000000000000..6597d5f2d778 --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_accecn_notecn_then_accecn_syn.pkt @@ -0,0 +1,28 @@ +// Test that SYN-ACK with ACE flags and without +// ACE flags got dropped. Although we disable ECN, +// we shouldn't consider this as blackholed as +// these are dropped due to congestion + +`./defaults.sh +sysctl -q net.ipv4.tcp_ecn=3 +sysctl -q net.ipv4.tcp_ecn_option=2 +` + ++0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 ++0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 ++0 bind(3, ..., ...) = 0 ++0 listen(3, 1) = 0 + ++0 < [noecn] S 0:0(0) win 32792 <mss 1460,nop,nop,sackOK,nop,wscale 8> ++.002 > [noecn] S. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK,nop,wscale 8> + +// Retransmit SYN ++0.1 < [ect0] SEWA 0:0(0) win 32792 <mss 1460,nop,nop,sackOK,nop,wscale 8> ++.002 > [noecn] S. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK,nop,wscale 8> + ++0.1 < [noecn] . 1:1(0) ack 1 win 320 ++.002 accept(3, ..., ...) = 4 + +// Write with AccECN option but with ip-noecn since we received one SYN with ACE=0 ++0.01 write(4, ..., 100) = 100 ++.002 > [noecn] P. 1:101(100) ack 1 diff --git a/tools/testing/selftests/net/packetdrill/tcp_accecn_rfc3168_to_fallback.pkt b/tools/testing/selftests/net/packetdrill/tcp_accecn_rfc3168_to_fallback.pkt new file mode 100644 index 000000000000..0f97dfcfa82d --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_accecn_rfc3168_to_fallback.pkt @@ -0,0 +1,18 @@ +// Test RFC3168 fallback when sysctl asks for AccECN + +`./defaults.sh +sysctl -q net.ipv4.tcp_ecn=3 +` + + 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 + +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 + +0 bind(3, ..., ...) = 0 + +0 listen(3, 1) = 0 + + +0 < SEW 0:0(0) win 32792 <mss 1050,nop,nop,sackOK,nop,wscale 8> ++.002 > SE. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK,nop,wscale 8> ++0.05 < . 1:1(0) ack 1 win 320 ++.002 accept(3, ..., ...) = 4 + ++0.01 write(4, ..., 1000) = 1000 ++.002 > [ect0] P. 1:1001(1000) ack 1 diff --git a/tools/testing/selftests/net/packetdrill/tcp_accecn_rfc3168_to_rfc3168.pkt b/tools/testing/selftests/net/packetdrill/tcp_accecn_rfc3168_to_rfc3168.pkt new file mode 100644 index 000000000000..9baffdd66fe5 --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_accecn_rfc3168_to_rfc3168.pkt @@ -0,0 +1,18 @@ +// Test RFC3168 ECN when sysctl asks for RFC3168 ECN + +`./defaults.sh +sysctl -q net.ipv4.tcp_ecn=1 +` + + 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 + +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 + +0 bind(3, ..., ...) = 0 + +0 listen(3, 1) = 0 + + +0 < SEW 0:0(0) win 32792 <mss 1050,nop,nop,sackOK,nop,wscale 8> ++.002 > SE. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK,nop,wscale 8> ++0.05 < . 1:1(0) ack 1 win 320 ++.002 accept(3, ..., ...) = 4 + ++0.01 write(4, ..., 1000) = 1000 ++.002 > [ect0] P. 1:1001(1000) ack 1 diff --git a/tools/testing/selftests/net/packetdrill/tcp_accecn_sack_space_grab.pkt b/tools/testing/selftests/net/packetdrill/tcp_accecn_sack_space_grab.pkt new file mode 100644 index 000000000000..3fc56f9c6a6f --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_accecn_sack_space_grab.pkt @@ -0,0 +1,28 @@ +// Test SACK space grab to fit AccECN option +--tcp_ts_tick_usecs=1000 + +`./defaults.sh +sysctl -q net.ipv4.tcp_ecn=3 +` + + 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 + +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 + +0 bind(3, ..., ...) = 0 + +0 listen(3, 1) = 0 + + +0 < SEWA 0:0(0) win 32792 <mss 1050,nop,nop,sackOK,nop,wscale 8> ++.002 > SW. 0:0(0) ack 1 <mss 1460,ECN e1b 1 ceb 0 e0b 1,nop,nop,nop,sackOK,nop,wscale 8> ++0.05 < [ect0] W. 1:1(0) ack 1 win 264 <ECN e0b 1 ceb 0 e1b 1,nop> ++.002 accept(3, ..., ...) = 4 + ++.01 < [ect1] EAP. 1001:2001(1000) ack 1 win 264 ++0.002 > [ect0] EA. 1:1(0) ack 1 <ECN e1b 1001 ceb 0 e0b 1,nop,nop,nop,sack 1001:2001> ++.01 < [ect0] EAP. 3001:4001(1000) ack 1 win 264 ++0.002 > [ect0] EA. 1:1(0) ack 1 <ECN e1b 1001 ceb 0 e0b 1001,nop,nop,nop,sack 3001:4001 1001:2001> ++.01 < [ce] EAP. 5001:6001(1000) ack 1 win 264 ++0.002 > [ect0] WA. 1:1(0) ack 1 <ECN e1b 1001 ceb 1000 e0b 1001,nop,nop,nop,sack 5001:6001 3001:4001 1001:2001> +// DSACK works? ++.01 < [ect0] EAP. 5001:6001(1000) ack 1 win 264 ++0.002 > [ect0] WA. 1:1(0) ack 1 <ECN e1b 1001 ceb 1000 e0b 2001,nop,nop,nop,sack 5001:6001 5001:6001 3001:4001> ++.01 < [ect1] EAP. 6001:7001(1000) ack 1 win 264 ++0.002 > [ect0] WA. 1:1(0) ack 1 <ECN e1b 2001 ceb 1000 e0b 2001,nop,nop,nop,sack 5001:7001 3001:4001 1001:2001> diff --git a/tools/testing/selftests/net/packetdrill/tcp_accecn_sack_space_grab_with_ts.pkt b/tools/testing/selftests/net/packetdrill/tcp_accecn_sack_space_grab_with_ts.pkt new file mode 100644 index 000000000000..1c075b5d81ae --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_accecn_sack_space_grab_with_ts.pkt @@ -0,0 +1,39 @@ +// Test SACK space grab to fit AccECN option +--tcp_ts_tick_usecs=1000 + +`./defaults.sh +sysctl -q net.ipv4.tcp_ecn=3 +` + + 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 + +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 + +0 bind(3, ..., ...) = 0 + +0 listen(3, 1) = 0 + + +0 < SEWA 0:0(0) win 32792 <mss 1050,sackOK,TS val 1 ecr 0,nop,wscale 8> ++.002 > SW. 0:0(0) ack 1 <mss 1460,sackOK,TS val 100 ecr 1,ECN e1b 1 ceb 0 e0b 1,nop,nop,wscale 8> ++0.05 < [ect0] W. 1:1(0) ack 1 win 264 <nop,nop,TS val 2 ecr 100,ECN e0b 1 ceb 0 e1b 1,nop> ++.002 accept(3, ..., ...) = 4 + +// One SACK block should allow all 3 AccECN fields: ++.01 < [ect1] EAP. 1001:2001(1000) ack 1 win 264 <nop,nop,TS val 3 ecr 100> ++0.002 > [ect0] EA. 1:1(0) ack 1 <nop,nop,TS val 160 ecr 2,ECN e1b 1001 ceb 0 e0b 1,nop,nop,nop,sack 1001:2001> + +// Two SACK blocks should fit w/ AccECN if we only need to use 2 AccECN fields: check ect1 arriving. ++.01 < [ect1] EAP. 3001:4001(1000) ack 1 win 264 <nop,nop,TS val 4 ecr 100> ++0.002 > [ect0] EA. 1:1(0) ack 1 <nop,nop,TS val 172 ecr 2,ECN e1b 2001 ceb 0,nop,nop,sack 3001:4001 1001:2001> + +// Two SACK blocks should fit w/ AccECN if we only need to use 2 AccECN fields: check CE arriving. ++.01 < [ce] EAP. 5001:6001(1000) ack 1 win 264 <nop,nop,TS val 5 ecr 100> ++0.002 > [ect0] WA. 1:1(0) ack 1 <nop,nop,TS val 184 ecr 2,ECN e1b 2001 ceb 1000,nop,nop,sack 5001:6001 3001:4001> + +// Check that DSACK works, using 2 SACK blocks in total, if we only need to use 2 AccECN fields: check ect1 arriving. ++.01 < [ect1] EAP. 5001:6001(1000) ack 1 win 264 <nop,nop,TS val 5 ecr 100> ++0.002 > [ect0] WA. 1:1(0) ack 1 <nop,nop,TS val 196 ecr 2,ECN e1b 3001 ceb 1000,nop,nop,sack 5001:6001 5001:6001> + +// Check the case where the AccECN option doesn't fit, because sending ect0 +// with order 1 would rquire 3 AccECN fields, +// and TS (12 bytes) + 2 SACK blocks (20 bytes) + 3 AccECN fields (2 + 3*3 bytes) > 40 bytes. +// That's OK; Linux TCP AccECN is optimized for the ECT1 case, not ECT0. ++.01 < [ect0] EAP. 6001:7001(1000) ack 1 win 264 <nop,nop,TS val 5 ecr 100> ++0.002 > [ect0] WA. 1:1(0) ack 1 <nop,nop,TS val 204 ecr 2,nop,nop,sack 5001:7001 3001:4001 1001:2001> diff --git a/tools/testing/selftests/net/packetdrill/tcp_accecn_serverside_accecn_disabled1.pkt b/tools/testing/selftests/net/packetdrill/tcp_accecn_serverside_accecn_disabled1.pkt new file mode 100644 index 000000000000..6b88ab78bfce --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_accecn_serverside_accecn_disabled1.pkt @@ -0,0 +1,20 @@ +// Test against classic ECN server +// Not-ECT on SYN and server sets 1|0|1 (AE is unused for classic ECN) + +`./defaults.sh +sysctl -q net.ipv4.tcp_ecn=3 +` + + 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 4 ++.002 ... 0.052 connect(4, ..., ...) = 0 + ++.002 > [noecn] SEWA 0:0(0) <mss 1460,sackOK,TS val 100 ecr 0,nop,wscale 8> ++0.05 < [noecn] SEA. 0:0(0) ack 1 win 32767 <mss 1460,sackOK,TS val 700 ecr 100,nop,wscale 8> ++.002 > [ect0] W. 1:1(0) ack 1 <nop, nop, TS val 200 ecr 700> + ++0 write(4, ..., 100) = 100 ++.002 > [ect0] P.5 1:101(100) ack 1 <nop,nop,TS val 300 ecr 700> ++0 close(4) = 0 + ++.002 > [ect0] F.5 101:101(0) ack 1 <nop,nop,TS val 400 ecr 700> ++0.1 < [noecn] R. 1:1(0) ack 102 win 4242 diff --git a/tools/testing/selftests/net/packetdrill/tcp_accecn_serverside_accecn_disabled2.pkt b/tools/testing/selftests/net/packetdrill/tcp_accecn_serverside_accecn_disabled2.pkt new file mode 100644 index 000000000000..d24ada008ece --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_accecn_serverside_accecn_disabled2.pkt @@ -0,0 +1,20 @@ +// Test against classic ECN server +// Not-ECT on SYN and server sets 0|0|1 + +`./defaults.sh +sysctl -q net.ipv4.tcp_ecn=3 +` + + 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 4 ++.002 ... 0.052 connect(4, ..., ...) = 0 + ++.002 > [noecn] SEWA 0:0(0) <mss 1460,sackOK,TS val 100 ecr 0,nop,wscale 8> ++0.05 < [noecn] SE. 0:0(0) ack 1 win 32767 <mss 1460,sackOK,TS val 700 ecr 100,nop,wscale 8> ++.002 > [noecn] . 1:1(0) ack 1 <nop, nop, TS val 200 ecr 700> + ++0 write(4, ..., 100) = 100 ++.002 > [ect0] P. 1:101(100) ack 1 <nop,nop,TS val 300 ecr 700> ++0 close(4) = 0 + ++0 > [noecn] F. 101:101(0) ack 1 <...> ++0.1 < R. 1:1(0) ack 102 win 4242 diff --git a/tools/testing/selftests/net/packetdrill/tcp_accecn_serverside_broken.pkt b/tools/testing/selftests/net/packetdrill/tcp_accecn_serverside_broken.pkt new file mode 100644 index 000000000000..a20d7e890ee1 --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_accecn_serverside_broken.pkt @@ -0,0 +1,19 @@ +// Test against broken server (1|1|1) + +`./defaults.sh +sysctl -q net.ipv4.tcp_ecn=3 +` + + 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 4 ++.002 ... 0.052 connect(4, ..., ...) = 0 + ++.002 > [noecn] SEWA 0:0(0) <mss 1460,sackOK,TS val 100 ecr 0,nop,wscale 8> ++0.05 < [noecn] SEWA. 0:0(0) ack 1 win 32767 <mss 1460,sackOK,TS val 700 ecr 100,nop,wscale 8> ++.002 > [noecn] . 1:1(0) ack 1 <nop, nop, TS val 200 ecr 700> + ++0 write(4, ..., 100) = 100 ++.002 > [noecn] P. 1:101(100) ack 1 <nop,nop,TS val 300 ecr 700> ++0 close(4) = 0 + ++.002 > [noecn] F. 101:101(0) ack 1 <...> ++0.1 < [noecn] R. 1:1(0) ack 102 win 4242 diff --git a/tools/testing/selftests/net/packetdrill/tcp_accecn_serverside_ecn_disabled.pkt b/tools/testing/selftests/net/packetdrill/tcp_accecn_serverside_ecn_disabled.pkt new file mode 100644 index 000000000000..428255bedab7 --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_accecn_serverside_ecn_disabled.pkt @@ -0,0 +1,19 @@ +// Test against Non ECN server (0|0|0) + +`./defaults.sh +sysctl -q net.ipv4.tcp_ecn=3 +` + + 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 4 ++.002 ... 0.052 connect(4, ..., ...) = 0 + ++.002 > [noecn] SEWA 0:0(0) <mss 1460,sackOK,TS val 100 ecr 0,nop,wscale 8> ++0.05 < [noecn] S. 0:0(0) ack 1 win 32767 <mss 1460,sackOK,TS val 700 ecr 100,nop,wscale 8> ++.002 > [noecn] . 1:1(0) ack 1 <nop, nop, TS val 200 ecr 700> + ++0 write(4, ..., 100) = 100 ++.002 > [noecn] P. 1:101(100) ack 1 <nop,nop,TS val 300 ecr 700> ++0 close(4) = 0 + ++.002 > [noecn] F. 101:101(0) ack 1 <nop,nop,TS val 400 ecr 700> ++0.1 < [noecn] R. 1:1(0) ack 102 win 4242 diff --git a/tools/testing/selftests/net/packetdrill/tcp_accecn_serverside_only.pkt b/tools/testing/selftests/net/packetdrill/tcp_accecn_serverside_only.pkt new file mode 100644 index 000000000000..e9a5a0d3677c --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_accecn_serverside_only.pkt @@ -0,0 +1,18 @@ +// Test AccECN with sysctl set to server-side only + +`./defaults.sh +sysctl -q net.ipv4.tcp_ecn=5 +` + + 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 + +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 + +0 bind(3, ..., ...) = 0 + +0 listen(3, 1) = 0 + + +0 < SEWA 0:0(0) win 32792 <mss 1050,nop,nop,sackOK,nop,wscale 8> ++.002 > SW. 0:0(0) ack 1 <mss 1460,ECN e1b 1 ceb 0 e0b 1,nop,nop,nop,sackOK,nop,wscale 8> ++0.05 < [ect0] W. 1:1(0) ack 1 win 320 <ECN e0b 1 ceb 0 e1b 1,nop> ++.002 accept(3, ..., ...) = 4 + ++0.01 write(4, ..., 1000) = 1000 ++.002 > [ect0] EAP. 1:1001(1000) ack 1 <ECN e1b 1 ceb 0 e0b 1,nop> diff --git a/tools/testing/selftests/net/packetdrill/tcp_accecn_syn_ace_flags_acked_after_retransmit.pkt b/tools/testing/selftests/net/packetdrill/tcp_accecn_syn_ace_flags_acked_after_retransmit.pkt new file mode 100644 index 000000000000..412fa903105c --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_accecn_syn_ace_flags_acked_after_retransmit.pkt @@ -0,0 +1,18 @@ +// Test that SYN with ACE flags was Acked +// after 2nd retransmission. In this case, +// since we got SYN-ACK that supports Accurate +// ECN, we consider this as successful negotiation + +`./defaults.sh +sysctl -q net.ipv4.tcp_ecn=3 +` + + 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 4 ++.002 ... 2.1 connect(4, ..., ...) = 0 + ++.002 > [noecn] SEWA 0:0(0) <mss 1460,sackOK,TS val 100 ecr 0,nop,wscale 8> ++1~+1.1 > [noecn] SEWA 0:0(0) <mss 1460,sackOK,TS val 100 ecr 0,nop,wscale 8> ++1~+1.1 > [noecn] S 0:0(0) <mss 1460,sackOK,TS val 100 ecr 0,nop,wscale 8> + ++0.1 < [noecn] SW. 0:0(0) ack 1 win 32767 <mss 1016,ECN e0b 1 ceb 0 e1b 1,nop,nop,nop,sackOK,nop,wscale 8> ++0~+0.01 > [ect0] W. 1:1(0) ack 1 <ECN e1b 1 ceb 0 e0b 1,nop> diff --git a/tools/testing/selftests/net/packetdrill/tcp_accecn_syn_ace_flags_drop.pkt b/tools/testing/selftests/net/packetdrill/tcp_accecn_syn_ace_flags_drop.pkt new file mode 100644 index 000000000000..4622754a2270 --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_accecn_syn_ace_flags_drop.pkt @@ -0,0 +1,16 @@ +// Test that SYN with ACE flags got dropped +// We retry one more time with ACE and then +// fallback to disabled ECN + +`./defaults.sh +sysctl -q net.ipv4.tcp_ecn=3 +` + + 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 4 ++.002 ... 2.1 connect(4, ..., ...) = 0 + ++.002 > [noecn] SEWA 0:0(0) <mss 1460,sackOK,TS val 100 ecr 0,nop,wscale 8> ++1~+1.1 > [noecn] SEWA 0:0(0) <mss 1460,sackOK,TS val 100 ecr 0,nop,wscale 8> ++1~+1.1 > [noecn] S 0:0(0) <mss 1460,sackOK,TS val 100 ecr 0,nop,wscale 8> ++0.1 < [noecn] S. 0:0(0) ack 1 win 32767 <mss 1460,nop,nop,sackOK,nop,wscale 8> ++0~+0.01 > [noecn] . 1:1(0) ack 1 diff --git a/tools/testing/selftests/net/packetdrill/tcp_accecn_syn_ack_ace_flags_acked_after_retransmit.pkt b/tools/testing/selftests/net/packetdrill/tcp_accecn_syn_ack_ace_flags_acked_after_retransmit.pkt new file mode 100644 index 000000000000..ee15f108cafe --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_accecn_syn_ack_ace_flags_acked_after_retransmit.pkt @@ -0,0 +1,27 @@ +// Test that SYN-ACK with ACE flags was Acked +// after 2nd retransmission. In this case, +// since we got the last ACK that supports Accurate +// ECN, we consider this as successful negotiation + +`./defaults.sh +sysctl -q net.ipv4.tcp_ecn=3 +sysctl -q net.ipv4.tcp_ecn_option=2 +` + ++0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 ++0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 ++0 bind(3, ..., ...) = 0 ++0 listen(3, 1) = 0 + ++0 < [noecn] SEWA 0:0(0) win 32792 <mss 1460,nop,nop,sackOK,nop,wscale 8> ++.002 > [noecn] SW. 0:0(0) ack 1 <mss 1460,ECN e1b 1 ceb 0 e0b 1,nop,nop,nop,sackOK,nop,wscale 8> + +// Retransmit SYN-ACK without option ++1~+1.1 > [noecn] SW. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK,nop,wscale 8> + +// SYN-ACK maybe getting blackholed, disable ECN ++2~+2.2 > [noecn] S. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK,nop,wscale 8> + +// Received an ACK with ACE flags, state should be set to negotiation succeeded ++0.1 < [noecn] W. 1:1(0) ack 1 win 320 <ECN e0b 1 ceb 0 e1b 1,nop> ++.002 accept(3, ..., ...) = 4 diff --git a/tools/testing/selftests/net/packetdrill/tcp_accecn_syn_ack_ace_flags_drop.pkt b/tools/testing/selftests/net/packetdrill/tcp_accecn_syn_ack_ace_flags_drop.pkt new file mode 100644 index 000000000000..ccfe353a8ee4 --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_accecn_syn_ack_ace_flags_drop.pkt @@ -0,0 +1,26 @@ +// Test that SYN-ACK with ACE flags got dropped +// We retry one more time with ACE and then +// fallback to disabled ECN + +`./defaults.sh +sysctl -q net.ipv4.tcp_ecn=3 +sysctl -q net.ipv4.tcp_ecn_option=2 +` + ++0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 ++0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 ++0 bind(3, ..., ...) = 0 ++0 listen(3, 1) = 0 + ++0 < [noecn] SEWA 0:0(0) win 32792 <mss 1460,nop,nop,sackOK,nop,wscale 8> ++.002 > [noecn] SW. 0:0(0) ack 1 <mss 1460,ECN e1b 1 ceb 0 e0b 1,nop,nop,nop,sackOK,nop,wscale 8> + +// Retransmit SYN-ACK without option ++1~+1.1 > [noecn] SW. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK,nop,wscale 8> + +// SYN-ACK maybe getting blackholed, disable ECN ++2~+2.2 > [noecn] S. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK,nop,wscale 8> + +// Received an ACK with no ACE flags, state should be set to blackholed ++0.1 < [noecn] . 1:1(0) ack 1 win 320 ++0 accept(3, ..., ...) = 4 diff --git a/tools/testing/selftests/net/packetdrill/tcp_accecn_syn_ce.pkt b/tools/testing/selftests/net/packetdrill/tcp_accecn_syn_ce.pkt new file mode 100644 index 000000000000..dc83f7a18180 --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_accecn_syn_ce.pkt @@ -0,0 +1,13 @@ +// Test AccECN ECN field reflector in SYNACK + +`./defaults.sh +sysctl -q net.ipv4.tcp_ecn=3 +` + + 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 + +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 + +0 bind(3, ..., ...) = 0 + +0 listen(3, 1) = 0 + + +0 < [ce] SEWA 0:0(0) win 32792 <mss 1050,nop,nop,sackOK,nop,wscale 8> ++.002 > SWA. 0:0(0) ack 1 <mss 1460,ECN e1b 1 ceb 0 e0b 1,nop,nop,nop,sackOK,nop,wscale 8> diff --git a/tools/testing/selftests/net/packetdrill/tcp_accecn_syn_ect0.pkt b/tools/testing/selftests/net/packetdrill/tcp_accecn_syn_ect0.pkt new file mode 100644 index 000000000000..e63a8d018c37 --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_accecn_syn_ect0.pkt @@ -0,0 +1,13 @@ +// Test AccECN ECN field reflector in SYNACK + +`./defaults.sh +sysctl -q net.ipv4.tcp_ecn=3 +` + + 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 + +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 + +0 bind(3, ..., ...) = 0 + +0 listen(3, 1) = 0 + + +0 < [ect0] SEWA 0:0(0) win 32792 <mss 1050,nop,nop,sackOK,nop,wscale 8> ++.002 > SA. 0:0(0) ack 1 <mss 1460,ECN e1b 1 ceb 0 e0b 1,nop,nop,nop,sackOK,nop,wscale 8> diff --git a/tools/testing/selftests/net/packetdrill/tcp_accecn_syn_ect1.pkt b/tools/testing/selftests/net/packetdrill/tcp_accecn_syn_ect1.pkt new file mode 100644 index 000000000000..23c0e43b3dbe --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_accecn_syn_ect1.pkt @@ -0,0 +1,13 @@ +// Test AccECN ECN field reflector in SYNACK + +`./defaults.sh +sysctl -q net.ipv4.tcp_ecn=3 +` + + 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 + +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 + +0 bind(3, ..., ...) = 0 + +0 listen(3, 1) = 0 + + +0 < [ect1] SEWA 0:0(0) win 32792 <mss 1050,nop,nop,sackOK,nop,wscale 8> ++.002 > SEW. 0:0(0) ack 1 <mss 1460,ECN e1b 1 ceb 0 e0b 1,nop,nop,nop,sackOK,nop,wscale 8> diff --git a/tools/testing/selftests/net/packetdrill/tcp_accecn_synack_ce.pkt b/tools/testing/selftests/net/packetdrill/tcp_accecn_synack_ce.pkt new file mode 100644 index 000000000000..c3497738f680 --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_accecn_synack_ce.pkt @@ -0,0 +1,27 @@ +// Test SYNACK CE & received_ce update + +`./defaults.sh +sysctl -q net.ipv4.tcp_ecn=3 +sysctl -q net.ipv4.tcp_ecn_option=2 +` + + 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 4 ++.002 ... 0.052 connect(4, ..., ...) = 0 + ++.002 > [noecn] SEWA 0:0(0) <mss 1460,sackOK,TS val 100 ecr 0,nop,wscale 8> ++0.05 < [ce] SW. 0:0(0) ack 1 win 32767 <mss 1460,ECN e0b 1 ceb 0 e1b 1,nop,nop,nop,sackOK,nop,wscale 8> ++.002 > [ect0] WA. 1:1(0) ack 1 <ECN e1b 1 ceb 0 e0b 1,nop> + ++0.01 write(4, ..., 100) = 100 ++.002 > [ect0] P.6 1:101(100) ack 1 <ECN e1b 1 ceb 0 e0b 1,nop> ++0.05 < [ect0] P.5 1:101(100) ack 101 win 256 <ECN e0b 101 ceb 0 e1b 1,nop> ++.002 > [ect0] .6 101:101(0) ack 101 <ECN e1b 1 ceb 0 e0b 101,nop> + ++0.01 write(4, ..., 100) = 100 ++.002 > [ect0] P.6 101:201(100) ack 101 <ECN e1b 1 ceb 0 e0b 101,nop> + ++0.1 < [ect1] P.5 201:301(100) ack 201 win 256 <ECN e0b 101 ceb 0 e1b 1,nop> ++.002 > [ect0] .6 201:201(0) ack 101 <ECN e1b 101 ceb 0 e0b 101,nop,nop,nop,sack 201:301> + ++0.01 < [ce] .6 401:501(100) ack 201 win 256 <ECN e0b 101 ceb 0 e1b 1,nop> ++.002 > [ect0] .7 201:201(0) ack 101 <ECN e1b 101 ceb 100 e0b 101,nop,nop,nop,sack 401:501 201:301> diff --git a/tools/testing/selftests/net/packetdrill/tcp_accecn_synack_ce_updates_delivered_ce.pkt b/tools/testing/selftests/net/packetdrill/tcp_accecn_synack_ce_updates_delivered_ce.pkt new file mode 100644 index 000000000000..5fd77f466572 --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_accecn_synack_ce_updates_delivered_ce.pkt @@ -0,0 +1,22 @@ +// Reflected SYNACK CE mark increases delivered_ce + +`./defaults.sh +sysctl -q net.ipv4.tcp_ecn=3 +sysctl -q net.ipv4.tcp_ecn_fallback=0 +` + + 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 + +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 + +0 bind(3, ..., ...) = 0 + +0 listen(3, 1) = 0 + ++0.05 < SEWA 0:0(0) win 32767 <mss 1050,nop,nop,sackOK,nop,wscale 8> ++.002 > SW. 0:0(0) ack 1 <mss 1460,ECN e1b 1 ceb 0 e0b 1,nop,nop,nop,sackOK,nop,wscale 8> +// Fake ce for prev, ECT validator must be disabled for this to work ++0.05 < [ect0] WA. 1:1(0) ack 1 win 264 <ECN e0b 1 ceb 0 e1b 1,nop> ++.002 accept(3, ..., ...) = 4 + ++0.01 %{ assert tcpi_delivered_ce == 1, tcpi_delivered_ce }% + ++0.01 write(4, ..., 1000) = 1000 ++.002 > [ect0] EAP. 1:1001(1000) ack 1 <ECN e1b 1 ceb 0 e0b 1,nop> diff --git a/tools/testing/selftests/net/packetdrill/tcp_accecn_synack_ect0.pkt b/tools/testing/selftests/net/packetdrill/tcp_accecn_synack_ect0.pkt new file mode 100644 index 000000000000..f6ad1ea5c0c4 --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_accecn_synack_ect0.pkt @@ -0,0 +1,24 @@ +// Test SYN=0 reflector + +`./defaults.sh +sysctl -q net.ipv4.tcp_ecn=3 +` + + 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 4 ++.002 ... 0.052 connect(4, ..., ...) = 0 + ++.002 > SEWA 0:0(0) <mss 1460,sackOK,TS val 100 ecr 0,nop,wscale 8> ++0.05 < [ect0] SW. 0:0(0) ack 1 win 32767 <mss 1460,ECN e0b 1 ceb 0 e1b 1,nop,nop,nop,sackOK,nop,wscale 8> ++.002 > [ect0] A. 1:1(0) ack 1 <ECN e1b 1 ceb 0 e0b 1,nop> + ++0.01 write(4, ..., 100) = 100 ++.002 > [ect0] P.5 1:101(100) ack 1 <ECN e1b 1 ceb 0 e0b 1,nop> ++0.05 < [ect0] P.5 1:1(0) ack 101 win 256 <ECN e0b 101 ceb 0 e1b 1,nop> + ++0.01 < [ect0] P.5 1:101(100) ack 101 win 256 <ECN e0b 1 ceb 0 e1b 1,nop> ++.002 > [ect0] .5 101:101(0) ack 101 <ECN e1b 1 ceb 0 e0b 101,nop> ++0 read(4, ..., 100) = 100 + ++0 close(4) = 0 ++0 > F.5 101:101(0) ack 101 <...> ++0.1 < R. 101:101(0) ack 102 win 4242 diff --git a/tools/testing/selftests/net/packetdrill/tcp_accecn_synack_ect1.pkt b/tools/testing/selftests/net/packetdrill/tcp_accecn_synack_ect1.pkt new file mode 100644 index 000000000000..7ecfc5fb9dbb --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_accecn_synack_ect1.pkt @@ -0,0 +1,24 @@ +// Test SYN=0 reflector + +`./defaults.sh +sysctl -q net.ipv4.tcp_ecn=3 +` + + 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 4 ++.002 ... 0.052 connect(4, ..., ...) = 0 + ++.002 > SEWA 0:0(0) <mss 1460,sackOK,TS val 100 ecr 0,nop,wscale 8> ++0.05 < [ect1] SW. 0:0(0) ack 1 win 32767 <mss 1460,ECN e0b 1 ceb 0 e1b 1,nop,nop,nop,sackOK,nop,wscale 8> ++.002 > [ect0] EW. 1:1(0) ack 1 <ECN e1b 1 ceb 0 e0b 1,nop> + ++0.01 write(4, ..., 100) = 100 ++.002 > [ect0] P.5 1:101(100) ack 1 <ECN e1b 1 ceb 0 e0b 1,nop> ++0.05 < [ect1] P.5 1:1(0) ack 101 win 256 <ECN e0b 101 ceb 0 e1b 1,nop> + ++0.01 < [ect1] P.5 1:101(100) ack 101 win 256 <ECN e0b 1 ceb 0 e1b 1,nop> ++.002 > [ect0] .5 101:101(0) ack 101 <ECN e1b 101 ceb 0 e0b 1,nop> ++0 read(4, ..., 100) = 100 + ++0 close(4) = 0 ++0 > F5. 101:101(0) ack 101 <...> ++0.1 < R. 101:101(0) ack 102 win 4242 diff --git a/tools/testing/selftests/net/packetdrill/tcp_accecn_synack_rexmit.pkt b/tools/testing/selftests/net/packetdrill/tcp_accecn_synack_rexmit.pkt new file mode 100644 index 000000000000..9e0959782ef5 --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_accecn_synack_rexmit.pkt @@ -0,0 +1,15 @@ +// Test 3rd ACK flags when SYN-ACK is rexmitted + +`./defaults.sh +sysctl -q net.ipv4.tcp_ecn=3 +` + + 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 4 ++.002 ... 0.052 connect(4, ..., ...) = 0 + ++.002 > SEWA 0:0(0) <mss 1460,sackOK,TS val 100 ecr 0,nop,wscale 8> ++0.05 < SW. 0:0(0) ack 1 win 32767 <mss 1460,ECN e0b 1 ceb 0 e1b 1,nop,nop,nop,sackOK,nop,wscale 8> ++.002 > [ect0] W. 1:1(0) ack 1 <ECN e1b 1 ceb 0 e0b 1,nop> + ++0.05 < SW. 0:0(0) ack 1 win 32767 <mss 1460,ECN e0b 1 ceb 0 e1b 1,nop,nop,nop,sackOK,nop,wscale 8> ++.002 > [ect0] W. 1:1(0) ack 1 <ECN e1b 1 ceb 0 e0b 1,nop> diff --git a/tools/testing/selftests/net/packetdrill/tcp_accecn_synack_rxmt.pkt b/tools/testing/selftests/net/packetdrill/tcp_accecn_synack_rxmt.pkt new file mode 100644 index 000000000000..a5a41633af07 --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_accecn_synack_rxmt.pkt @@ -0,0 +1,25 @@ +// Test that we retransmit SYN-ACK with ACE and without +// AccECN options after +// SYN-ACK was lost and TCP moved to TCPS_SYN_RECEIVED + +`./defaults.sh +sysctl -q net.ipv4.tcp_ecn=3 +sysctl -q net.ipv4.tcp_ecn_option=2 +` + ++0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 ++0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 ++0 bind(3, ..., ...) = 0 ++0 listen(3, 1) = 0 + ++0 < [noecn] SEWA 0:0(0) win 32792 <mss 1460,nop,nop,sackOK,nop,wscale 8> ++.002 > [noecn] SW. 0:0(0) ack 1 <mss 1460,ECN e1b 1 ceb 0 e0b 1,nop,nop,nop,sackOK,nop,wscale 8> + +// Retransmit SYN-ACK without option ++1~+1.1 > [noecn] SW. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK,nop,wscale 8> ++0.1 < [noecn] W. 1:1(0) ack 1 win 320 <ECN e0b 1 ceb 0 e1b 1,nop> ++.002 accept(3, ..., ...) = 4 + +// We try to write with AccECN option ++0.01 write(4, ..., 100) = 100 ++.002 > [ect0] P5. 1:101(100) ack 1 <ECN e1b 1 ceb 0 e0b 1,nop> diff --git a/tools/testing/selftests/net/packetdrill/tcp_accecn_tsnoprogress.pkt b/tools/testing/selftests/net/packetdrill/tcp_accecn_tsnoprogress.pkt new file mode 100644 index 000000000000..f3fe2f098966 --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_accecn_tsnoprogress.pkt @@ -0,0 +1,26 @@ +// Test TS progress filtering +--tcp_ts_tick_usecs=1000 +--tolerance_usecs=7000 + +`./defaults.sh +sysctl -q net.ipv4.tcp_ecn=3 +` + + 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 + +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 + +0 bind(3, ..., ...) = 0 + +0 listen(3, 1) = 0 + + +0 < SEWA 0:0(0) win 32792 <mss 1050,sackOK,TS val 1 ecr 0,nop,wscale 8> ++.002 > SW. 0:0(0) ack 1 <mss 1460,sackOK,TS val 10 ecr 1,ECN e1b 1 ceb 0 e0b 1,nop,nop,wscale 8> ++0.05 < [ect0] W. 1:1(0) ack 1 win 264 <nop,nop,TS val 2 ecr 10> ++.002 accept(3, ..., ...) = 4 + ++0.01 %{ assert tcpi_delivered_ce == 0, tcpi_delivered_ce }% + ++0.01 write(4, ..., 1000) = 1000 ++.002 > [ect0] EAP. 1:1001(1000) ack 1 <nop,nop,TS val 83 ecr 2> + // Fake CE and claim no progress ++0.05 < [ect0] WA. 1:1(0) ack 1 win 264 <nop,nop,TS val 2 ecr 83> + ++0.01 %{ assert tcpi_delivered_ce == 0, tcpi_delivered_ce }% diff --git a/tools/testing/selftests/net/packetdrill/tcp_accecn_tsprogress.pkt b/tools/testing/selftests/net/packetdrill/tcp_accecn_tsprogress.pkt new file mode 100644 index 000000000000..1446799d2481 --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_accecn_tsprogress.pkt @@ -0,0 +1,25 @@ +// Test TS progress filtering +--tcp_ts_tick_usecs=1000 + +`./defaults.sh +sysctl -q net.ipv4.tcp_ecn=3 +` + + 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 + +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 + +0 bind(3, ..., ...) = 0 + +0 listen(3, 1) = 0 + + +0 < SEWA 0:0(0) win 32792 <mss 1050,sackOK,TS val 1 ecr 0,nop,wscale 8> ++.002 > SW. 0:0(0) ack 1 <mss 1460,sackOK,TS val 10 ecr 1,ECN e1b 1 ceb 0 e0b 1,nop,nop,wscale 8> ++0.05 < [ect0] W. 1:1(0) ack 1 win 264 <nop,nop,TS val 2 ecr 10> ++.002 accept(3, ..., ...) = 4 + ++0.01 %{ assert tcpi_delivered_ce == 0, tcpi_delivered_ce }% + ++0.01 write(4, ..., 1000) = 1000 ++.002 > [ect0] EAP. 1:1001(1000) ack 1 <nop,nop,TS val 83 ecr 2> + // Fake CE and claim no progress ++0.05 < [ect0] WA. 1:1(0) ack 1 win 264 <nop,nop,TS val 3 ecr 83> + ++0.01 %{ assert tcpi_delivered_ce == 1, tcpi_delivered_ce }% diff --git a/tools/testing/selftests/net/packetdrill/tcp_basic_client.pkt b/tools/testing/selftests/net/packetdrill/tcp_basic_client.pkt new file mode 100644 index 000000000000..319f81dd717d --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_basic_client.pkt @@ -0,0 +1,24 @@ +// SPDX-License-Identifier: GPL-2.0 +// +// Minimal active open. +// First to close connection. + +`./defaults.sh` + + 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 4 + + // Connect to server: active open: three-way handshake + +0...0 connect(4, ..., ...) = 0 + +0 > S 0:0(0) <mss 1460,sackOK,TS val 0 ecr 0,nop,wscale 8> + +0 < S. 0:0(0) ack 1 win 65535 <mss 1460,sackOK,nop,nop,nop,wscale 7> + +0 > . 1:1(0) ack 1 + + // Send data + +0 send(4, ..., 1000, 0) = 1000 + +0 > P. 1:1001(1000) ack 1 + +0 < . 1:1(0) ack 1001 win 257 + + +0 close(4) = 0 + +0 > F. 1001:1001(0) ack 1 + +0 < F. 1:1(0) ack 1002 win 257 + +0 > . 1002:1002(0) ack 2 diff --git a/tools/testing/selftests/net/packetdrill/tcp_basic_server.pkt b/tools/testing/selftests/net/packetdrill/tcp_basic_server.pkt new file mode 100644 index 000000000000..e72a291b666e --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_basic_server.pkt @@ -0,0 +1,35 @@ +// SPDX-License-Identifier: GPL-2.0 +// +// Minimal passive open. +// Peer is first to close. + +`./defaults.sh` + + // Open listener socket + 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 + +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 + +0 bind(3, ..., ...) = 0 + +0 listen(3, 1) = 0 + + // Incoming connection: passive open: three-way handshake + +0 < S 0:0(0) win 65535 <mss 1000,sackOK,nop,nop,nop,wscale 8> + +0 > S. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK,nop,wscale 8> + +0 < . 1:1(0) ack 1 win 257 + + // Open connection socket and close listener socket + +0 accept(3, ..., ...) = 4 + +0 close(3) = 0 + + // Peer sends data: acknowledge and receive + +0 < P. 1:1001(1000) ack 1 win 257 + +0 > . 1:1(0) ack 1001 + +0 recv(4, ..., 1000, 0) = 1000 + + // Peer initiates connection close + +0 < F. 1001:1001(0) ack 1 win 257 + +.04 > . 1:1(0) ack 1002 + + // Local socket also closes its side + +0 close(4) = 0 + +0 > F. 1:1(0) ack 1002 + +0 < . 1002:1002(0) ack 2 win 257 diff --git a/tools/testing/selftests/net/packetdrill/tcp_timestamping_tcp_tx_timestamp_bug.pkt b/tools/testing/selftests/net/packetdrill/tcp_timestamping_tcp_tx_timestamp_bug.pkt new file mode 100644 index 000000000000..95a1957a2cf9 --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_timestamping_tcp_tx_timestamp_bug.pkt @@ -0,0 +1,70 @@ +// SPDX-License-Identifier: GPL-2.0 +// Test after "tcp: tcp_tx_timestamp() must look at the rtx queue" + +// This test is about receiving the SCM_TSTAMP_ACK, +// we do not care about its SCM_TIMESTAMPING precision. +--tolerance_usecs=1000000 + +`./defaults.sh +sysctl -q net.ipv4.tcp_min_tso_segs=70 +` + +// Create a socket and set it to non-blocking. + 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 + +0 fcntl(3, F_GETFL) = 0x2 (flags O_RDWR) + +0 fcntl(3, F_SETFL, O_RDWR|O_NONBLOCK) = 0 + +// Establish connection and verify that there was no error. + +0 connect(3, ..., ...) = -1 EINPROGRESS (Operation now in progress) + +0 > S 0:0(0) <mss 1460,sackOK,TS val 100 ecr 0,nop,wscale 8> ++.010 < S. 0:0(0) ack 1 win 65535 <mss 1000,sackOK,TS val 700 ecr 100,nop,wscale 7> + +0 > . 1:1(0) ack 1 <nop,nop,TS val 200 ecr 700> + +0 getsockopt(3, SOL_SOCKET, SO_ERROR, [0], [4]) = 0 + +0 setsockopt(3, SOL_SOCKET, SO_SNDBUF, [30000], 4) = 0 + + +0 write(3, ..., 9880) = 9880 + +0 > P. 1:9881(9880) ack 1 <nop,nop,TS val 200 ecr 700> ++.010 < . 1:1(0) ack 9881 win 10000 <nop,nop,TS val 701 ecr 200> + + +0 write(3, ..., 19760) = 19760 + +0 > P. 9881:29641(19760) ack 1 <nop,nop,TS val 201 ecr 701> ++.010 < . 1:1(0) ack 29641 win 10000 <nop,nop,TS val 702 ecr 201> + + +0 write(3, ..., 39520) = 39520 + +0 > P. 29641:69161(39520) ack 1 <nop,nop,TS val 202 ecr 702> ++.010 < . 1:1(0) ack 69161 win 10000 <nop,nop,TS val 703 ecr 202> + +// One more write to increase cwnd + +0 write(3, ..., 79040) = 79040 + +0 > P. 69161:108681(39520) ack 1 <nop,nop,TS val 203 ecr 703> + +0 > P. 108681:148201(39520) ack 1 <nop,nop,TS val 203 ecr 703> ++.010 < . 1:1(0) ack 148201 win 1000 <nop,nop,TS val 704 ecr 203> + + +0 setsockopt(3, SOL_SOCKET, SO_TIMESTAMPING, + [SOF_TIMESTAMPING_TX_ACK | SOF_TIMESTAMPING_SOFTWARE | + SOF_TIMESTAMPING_OPT_ID], 4) = 0 + +// We have one write filling one skb +// last byte can not be stored because of our small SO_SNDBUF + +0 write(3, ..., 65209) = 65208 + +0 > P. 148201:213409(65208) ack 1 <nop,nop,TS val 204 ecr 704> ++.010 < . 1:1(0) ack 213409 win 1000 <nop,nop,TS val 705 ecr 204> + +// SCM_TSTAMP_ACK should be received after the last ack at +// t=60ms. + +0 recvmsg(3, {msg_name(...)=..., + msg_iov(1)=[{...,0}], + msg_flags=MSG_ERRQUEUE|MSG_TRUNC, + msg_control=[ + {cmsg_level=SOL_SOCKET, + cmsg_type=SCM_TIMESTAMPING, + cmsg_data={scm_sec=0,scm_nsec=60000000}}, + {cmsg_level=CMSG_LEVEL_IP, + cmsg_type=CMSG_TYPE_RECVERR, + cmsg_data={ee_errno=ENOMSG, + ee_origin=SO_EE_ORIGIN_TIMESTAMPING, + ee_type=0, + ee_code=0, + ee_info=SCM_TSTAMP_ACK, + ee_data=65207}} + ]}, MSG_ERRQUEUE) = 0 diff --git a/tools/testing/selftests/net/tfo.c b/tools/testing/selftests/net/tfo.c index 8d82140f0f76..3b1ee2d3d417 100644 --- a/tools/testing/selftests/net/tfo.c +++ b/tools/testing/selftests/net/tfo.c @@ -82,8 +82,10 @@ static void run_server(void) error(1, errno, "getsockopt(SO_INCOMING_NAPI_ID)"); if (read(connfd, buf, 64) < 0) - perror("read()"); - fprintf(outfile, "%d\n", opt); + error(1, errno, "read()"); + + if (fprintf(outfile, "%d\n", opt) < 0) + error(1, errno, "fprintf()"); fclose(outfile); close(connfd); @@ -92,14 +94,17 @@ static void run_server(void) static void run_client(void) { - int fd; + int fd, ret; char *msg = "Hello, world!"; fd = socket(AF_INET6, SOCK_STREAM, 0); if (fd == -1) error(1, errno, "socket()"); - sendto(fd, msg, strlen(msg), MSG_FASTOPEN, (struct sockaddr *)&cfg_addr, sizeof(cfg_addr)); + ret = sendto(fd, msg, strlen(msg), MSG_FASTOPEN, + (struct sockaddr *)&cfg_addr, sizeof(cfg_addr)); + if (ret < 0) + error(1, errno, "sendto()"); close(fd); } diff --git a/tools/testing/selftests/net/tfo_passive.sh b/tools/testing/selftests/net/tfo_passive.sh index a4550511830a..f116f888b794 100755 --- a/tools/testing/selftests/net/tfo_passive.sh +++ b/tools/testing/selftests/net/tfo_passive.sh @@ -85,12 +85,15 @@ timeout -k 1s 30s ip netns exec nssv ./tfo \ -s \ -p ${SERVER_PORT} \ -o ${out_file}& +server_pid="$!" wait_local_port_listen nssv ${SERVER_PORT} tcp ip netns exec nscl ./tfo -c -h ${SERVER_IP} -p ${SERVER_PORT} +client_exit_status="$?" -wait +wait "$server_pid" +server_exit_status="$?" res=$(cat $out_file) rm $out_file @@ -101,6 +104,14 @@ if [ "$res" = "0" ]; then exit 1 fi +if [ "$client_exit_status" -ne 0 ] || [ "$server_exit_status" -ne 0 ]; then + # Note: timeout(1) exits with 124 if it timed out + echo "client exited with ${client_exit_status}" + echo "server exited with ${server_exit_status}" + cleanup_ns + exit 1 +fi + echo "$NSIM_SV_FD:$NSIM_SV_IFIDX" > $NSIM_DEV_SYS_UNLINK echo $NSIM_CL_ID > $NSIM_DEV_SYS_DEL diff --git a/tools/testing/selftests/net/tls.c b/tools/testing/selftests/net/tls.c index a4d16a460fbe..9e2ccea13d70 100644 --- a/tools/testing/selftests/net/tls.c +++ b/tools/testing/selftests/net/tls.c @@ -3260,17 +3260,25 @@ TEST(data_steal) { ASSERT_EQ(setsockopt(cfd, IPPROTO_TCP, TCP_ULP, "tls", sizeof("tls")), 0); /* Spawn a child and get it into the read wait path of the underlying - * TCP socket. + * TCP socket (before kernel .recvmsg is replaced with the TLS one). */ pid = fork(); ASSERT_GE(pid, 0); if (!pid) { - EXPECT_EQ(recv(cfd, buf, sizeof(buf) / 2, MSG_WAITALL), - sizeof(buf) / 2); + EXPECT_EQ(recv(cfd, buf, sizeof(buf) / 2 + 1, MSG_WAITALL), + sizeof(buf) / 2 + 1); exit(!__test_passed(_metadata)); } - usleep(10000); + /* Send a sync byte and poll until it's consumed to ensure + * the child is in recv() before we proceed to install TLS. + */ + ASSERT_EQ(send(fd, buf, 1, 0), 1); + do { + usleep(500); + } while (recv(cfd, buf, 1, MSG_PEEK | MSG_DONTWAIT) == 1); + EXPECT_EQ(errno, EAGAIN); + ASSERT_EQ(setsockopt(fd, SOL_TLS, TLS_TX, &tls, tls.len), 0); ASSERT_EQ(setsockopt(cfd, SOL_TLS, TLS_RX, &tls, tls.len), 0); diff --git a/tools/testing/selftests/net/tun.c b/tools/testing/selftests/net/tun.c index 0efc67b0357a..8a5cd5cb5472 100644 --- a/tools/testing/selftests/net/tun.c +++ b/tools/testing/selftests/net/tun.c @@ -8,14 +8,119 @@ #include <stdlib.h> #include <string.h> #include <unistd.h> -#include <linux/if.h> #include <linux/if_tun.h> -#include <linux/netlink.h> -#include <linux/rtnetlink.h> #include <sys/ioctl.h> #include <sys/socket.h> #include "kselftest_harness.h" +#include "tuntap_helpers.h" + +static const char param_dev_geneve_name[] = "geneve1"; +static unsigned char param_hwaddr_outer_dst[] = { 0x00, 0xfe, 0x98, + 0x14, 0x22, 0x42 }; +static unsigned char param_hwaddr_outer_src[] = { 0x00, 0xfe, 0x98, + 0x94, 0xd2, 0x43 }; +static unsigned char param_hwaddr_inner_dst[] = { 0x00, 0xfe, 0x98, + 0x94, 0x22, 0xcc }; +static unsigned char param_hwaddr_inner_src[] = { 0x00, 0xfe, 0x98, + 0x94, 0xd2, 0xdd }; + +static struct in_addr param_ipaddr4_outer_dst = { + __constant_htonl(0xac100001), +}; + +static struct in_addr param_ipaddr4_outer_src = { + __constant_htonl(0xac100002), +}; + +static struct in_addr param_ipaddr4_inner_dst = { + __constant_htonl(0xac100101), +}; + +static struct in_addr param_ipaddr4_inner_src = { + __constant_htonl(0xac100102), +}; + +static struct in6_addr param_ipaddr6_outer_dst = { + { { 0x20, 0x02, 0x0d, 0xb8, 0x01, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1 } }, +}; + +static struct in6_addr param_ipaddr6_outer_src = { + { { 0x20, 0x02, 0x0d, 0xb8, 0x01, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2 } }, +}; + +static struct in6_addr param_ipaddr6_inner_dst = { + { { 0x20, 0x02, 0x0d, 0xb8, 0x02, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1 } }, +}; + +static struct in6_addr param_ipaddr6_inner_src = { + { { 0x20, 0x02, 0x0d, 0xb8, 0x02, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2 } }, +}; + +#ifndef BIT +#define BIT(nr) (1UL << (nr)) +#endif + +#define VN_ID 1 +#define VN_PORT 4789 +#define UDP_SRC_PORT 22 +#define UDP_DST_PORT 48878 +#define IPPREFIX_LEN 24 +#define IP6PREFIX_LEN 64 +#define TIMEOUT_SEC 10 +#define TIMEOUT_USEC 100000 +#define MAX_RETRIES 20 + +#define UDP_TUNNEL_GENEVE_4IN4 0x01 +#define UDP_TUNNEL_GENEVE_6IN4 0x02 +#define UDP_TUNNEL_GENEVE_4IN6 0x04 +#define UDP_TUNNEL_GENEVE_6IN6 0x08 + +#define UDP_TUNNEL_MAX_SEGMENTS BIT(7) + +#define UDP_TUNNEL_OUTER_IPV4 (UDP_TUNNEL_GENEVE_4IN4 | UDP_TUNNEL_GENEVE_6IN4) +#define UDP_TUNNEL_INNER_IPV4 (UDP_TUNNEL_GENEVE_4IN4 | UDP_TUNNEL_GENEVE_4IN6) + +#define UDP_TUNNEL_GENEVE_4IN4_HDRLEN \ + (ETH_HLEN + 2 * sizeof(struct iphdr) + GENEVE_HLEN + \ + 2 * sizeof(struct udphdr)) +#define UDP_TUNNEL_GENEVE_6IN6_HDRLEN \ + (ETH_HLEN + 2 * sizeof(struct ipv6hdr) + GENEVE_HLEN + \ + 2 * sizeof(struct udphdr)) +#define UDP_TUNNEL_GENEVE_4IN6_HDRLEN \ + (ETH_HLEN + sizeof(struct iphdr) + sizeof(struct ipv6hdr) + \ + GENEVE_HLEN + 2 * sizeof(struct udphdr)) +#define UDP_TUNNEL_GENEVE_6IN4_HDRLEN \ + (ETH_HLEN + sizeof(struct ipv6hdr) + sizeof(struct iphdr) + \ + GENEVE_HLEN + 2 * sizeof(struct udphdr)) + +#define UDP_TUNNEL_HDRLEN(type) \ + ((type) == UDP_TUNNEL_GENEVE_4IN4 ? UDP_TUNNEL_GENEVE_4IN4_HDRLEN : \ + (type) == UDP_TUNNEL_GENEVE_6IN6 ? UDP_TUNNEL_GENEVE_6IN6_HDRLEN : \ + (type) == UDP_TUNNEL_GENEVE_4IN6 ? UDP_TUNNEL_GENEVE_4IN6_HDRLEN : \ + (type) == UDP_TUNNEL_GENEVE_6IN4 ? UDP_TUNNEL_GENEVE_6IN4_HDRLEN : \ + 0) + +#define UDP_TUNNEL_MSS(type) (ETH_DATA_LEN - UDP_TUNNEL_HDRLEN(type)) +#define UDP_TUNNEL_MAX(type, is_tap) \ + (ETH_MAX_MTU - UDP_TUNNEL_HDRLEN(type) - ((is_tap) ? ETH_HLEN : 0)) + +#define TUN_VNET_TNL_SIZE sizeof(struct virtio_net_hdr_v1_hash_tunnel) +#define MAX_VNET_TUNNEL_PACKET_SZ \ + (TUN_VNET_TNL_SIZE + ETH_HLEN + UDP_TUNNEL_GENEVE_6IN6_HDRLEN + \ + ETH_MAX_MTU) + +struct geneve_setup_config { + int family; + union { + struct in_addr r4; + struct in6_addr r6; + } remote; + __be32 vnid; + __be16 vnport; + unsigned char hwaddr[6]; + uint8_t csum; +}; static int tun_attach(int fd, char *dev) { @@ -25,7 +130,7 @@ static int tun_attach(int fd, char *dev) strcpy(ifr.ifr_name, dev); ifr.ifr_flags = IFF_ATTACH_QUEUE; - return ioctl(fd, TUNSETQUEUE, (void *) &ifr); + return ioctl(fd, TUNSETQUEUE, (void *)&ifr); } static int tun_detach(int fd, char *dev) @@ -36,7 +141,7 @@ static int tun_detach(int fd, char *dev) strcpy(ifr.ifr_name, dev); ifr.ifr_flags = IFF_DETACH_QUEUE; - return ioctl(fd, TUNSETQUEUE, (void *) &ifr); + return ioctl(fd, TUNSETQUEUE, (void *)&ifr); } static int tun_alloc(char *dev) @@ -54,7 +159,7 @@ static int tun_alloc(char *dev) strcpy(ifr.ifr_name, dev); ifr.ifr_flags = IFF_TAP | IFF_NAPI | IFF_MULTI_QUEUE; - err = ioctl(fd, TUNSETIFF, (void *) &ifr); + err = ioctl(fd, TUNSETIFF, (void *)&ifr); if (err < 0) { fprintf(stderr, "can't TUNSETIFF: %s\n", strerror(errno)); close(fd); @@ -66,42 +171,315 @@ static int tun_alloc(char *dev) static int tun_delete(char *dev) { - struct { - struct nlmsghdr nh; - struct ifinfomsg ifm; - unsigned char data[64]; - } req; - struct rtattr *rta; - int ret, rtnl; + return ip_link_del(dev); +} + +static int tun_open(char *dev, const int flags, const int hdrlen, + const int features, const unsigned char *mac_addr) +{ + struct ifreq ifr = { 0 }; + int fd, sk = -1; + + fd = open("/dev/net/tun", O_RDWR); + if (fd < 0) { + perror("open"); + return -1; + } + + ifr.ifr_flags = flags; + if (ioctl(fd, TUNSETIFF, (void *)&ifr) < 0) { + perror("ioctl(TUNSETIFF)"); + goto err; + } + strcpy(dev, ifr.ifr_name); + + if (hdrlen > 0) { + if (ioctl(fd, TUNSETVNETHDRSZ, &hdrlen) < 0) { + perror("ioctl(TUNSETVNETHDRSZ)"); + goto err; + } + } + + if (features) { + if (ioctl(fd, TUNSETOFFLOAD, features) < 0) { + perror("ioctl(TUNSETOFFLOAD)"); + goto err; + } + } + + sk = socket(PF_INET, SOCK_DGRAM, 0); + if (sk < 0) { + perror("socket"); + goto err; + } + + if (ioctl(sk, SIOCGIFFLAGS, &ifr) < 0) { + perror("ioctl(SIOCGIFFLAGS)"); + goto err; + } + + ifr.ifr_flags |= (IFF_UP | IFF_RUNNING); + if (ioctl(sk, SIOCSIFFLAGS, &ifr) < 0) { + perror("ioctl(SIOCSIFFLAGS)"); + goto err; + } + + if (mac_addr && flags & IFF_TAP) { + ifr.ifr_hwaddr.sa_family = ARPHRD_ETHER; + memcpy(ifr.ifr_hwaddr.sa_data, mac_addr, ETH_ALEN); + + if (ioctl(sk, SIOCSIFHWADDR, &ifr) < 0) { + perror("ioctl(SIOCSIFHWADDR)"); + goto err; + } + } + +out: + if (sk >= 0) + close(sk); + return fd; + +err: + close(fd); + fd = -1; + goto out; +} + +static size_t sockaddr_len(int family) +{ + return (family == AF_INET) ? sizeof(struct sockaddr_in) : + sizeof(struct sockaddr_in6); +} + +static int geneve_fill_newlink(struct rt_link_newlink_req *req, void *data) +{ + struct geneve_setup_config *cfg = data; + +#define SET_GENEVE_REMOTE rt_link_newlink_req_set_linkinfo_data_geneve_remote +#define SET_GENEVE_REMOTE6 rt_link_newlink_req_set_linkinfo_data_geneve_remote6 + + rt_link_newlink_req_set_address(req, cfg->hwaddr, ETH_ALEN); + rt_link_newlink_req_set_linkinfo_data_geneve_id(req, cfg->vnid); + rt_link_newlink_req_set_linkinfo_data_geneve_port(req, cfg->vnport); + rt_link_newlink_req_set_linkinfo_data_geneve_udp_csum(req, cfg->csum); + + if (cfg->family == AF_INET) + SET_GENEVE_REMOTE(req, cfg->remote.r4.s_addr); + else + SET_GENEVE_REMOTE6(req, &cfg->remote.r6, + sizeof(cfg->remote.r6)); + + return 0; +} + +static int geneve_create(const char *dev, int family, void *remote, + void *hwaddr) +{ + struct geneve_setup_config geneve; + + memset(&geneve, 0, sizeof(geneve)); + geneve.vnid = VN_ID; + geneve.vnport = htons(VN_PORT); + geneve.csum = 1; + geneve.family = family; + if (family == AF_INET) + memcpy(&geneve.remote.r4, remote, sizeof(struct in_addr)); + else + memcpy(&geneve.remote.r6, remote, sizeof(struct in6_addr)); + memcpy(geneve.hwaddr, hwaddr, ETH_ALEN); + + return ip_link_add(dev, "geneve", geneve_fill_newlink, (void *)&geneve); +} + +static int set_pmtu_discover(int fd, bool is_ipv4) +{ + int level, name, val; + + if (is_ipv4) { + level = SOL_IP; + name = IP_MTU_DISCOVER; + val = IP_PMTUDISC_DO; + } else { + level = SOL_IPV6; + name = IPV6_MTU_DISCOVER; + val = IPV6_PMTUDISC_DO; + } + + return setsockopt(fd, level, name, &val, sizeof(val)); +} + +static int udp_socket_open(struct sockaddr_storage *ssa, bool do_frag, + bool do_connect, struct sockaddr_storage *dsa) +{ + struct timeval to = { .tv_sec = TIMEOUT_SEC }; + int fd, family = ssa->ss_family; + int salen = sockaddr_len(family); + + fd = socket(family, SOCK_DGRAM, 0); + if (fd < 0) + return -1; + + if (bind(fd, (struct sockaddr *)ssa, salen) < 0) { + perror("bind"); + goto err; + } + + if (do_connect && connect(fd, (struct sockaddr *)dsa, salen) < 0) { + perror("connect"); + goto err; + } + + if (setsockopt(fd, SOL_SOCKET, SO_RCVTIMEO, &to, sizeof(to)) < 0) { + perror("setsockopt(SO_RCVTIMEO)"); + goto err; + } + + if (!do_frag && set_pmtu_discover(fd, family == AF_INET) < 0) { + perror("set_pmtu_discover"); + goto err; + } + return fd; + +err: + close(fd); + return -1; +} + +static void parse_route_rsp(struct rt_route_getroute_rsp *rsp, void *rtm_type) +{ + *(uint8_t *)rtm_type = rsp->_hdr.rtm_type; +} + +static int ip_route_check(const char *intf, int family, void *addr) +{ + uint8_t rtm_type, table = RT_TABLE_LOCAL; + int retries = MAX_RETRIES; - rtnl = socket(AF_NETLINK, SOCK_DGRAM, NETLINK_ROUTE); - if (rtnl < 0) { - fprintf(stderr, "can't open rtnl: %s\n", strerror(errno)); - return 1; + while (retries-- > 0) { + if (ip_route_get(intf, family, table, addr, parse_route_rsp, + &rtm_type) == 0 && + rtm_type == RTN_LOCAL) + break; + + usleep(TIMEOUT_USEC); } - memset(&req, 0, sizeof(req)); - req.nh.nlmsg_len = NLMSG_ALIGN(NLMSG_LENGTH(sizeof(req.ifm))); - req.nh.nlmsg_flags = NLM_F_REQUEST; - req.nh.nlmsg_type = RTM_DELLINK; + if (retries < 0) + return -1; + + return 0; +} + +static int send_gso_udp_msg(int socket, struct sockaddr_storage *addr, + uint8_t *send_buf, int send_len, int gso_size) +{ + char control[CMSG_SPACE(sizeof(uint16_t))] = { 0 }; + int alen = sockaddr_len(addr->ss_family); + struct msghdr msg = { 0 }; + struct iovec iov = { 0 }; + int ret; + + iov.iov_base = send_buf; + iov.iov_len = send_len; + + msg.msg_iov = &iov; + msg.msg_iovlen = 1; + msg.msg_name = addr; + msg.msg_namelen = alen; - req.ifm.ifi_family = AF_UNSPEC; + if (gso_size > 0) { + struct cmsghdr *cmsg; - rta = (struct rtattr *)(((char *)&req) + NLMSG_ALIGN(req.nh.nlmsg_len)); - rta->rta_type = IFLA_IFNAME; - rta->rta_len = RTA_LENGTH(IFNAMSIZ); - req.nh.nlmsg_len += rta->rta_len; - memcpy(RTA_DATA(rta), dev, IFNAMSIZ); + msg.msg_control = control; + msg.msg_controllen = sizeof(control); - ret = send(rtnl, &req, req.nh.nlmsg_len, 0); + cmsg = CMSG_FIRSTHDR(&msg); + cmsg->cmsg_level = SOL_UDP; + cmsg->cmsg_type = UDP_SEGMENT; + cmsg->cmsg_len = CMSG_LEN(sizeof(uint16_t)); + *(uint16_t *)CMSG_DATA(cmsg) = gso_size; + } + + ret = sendmsg(socket, &msg, 0); if (ret < 0) - fprintf(stderr, "can't send: %s\n", strerror(errno)); - ret = (unsigned int)ret != req.nh.nlmsg_len; + perror("sendmsg"); - close(rtnl); return ret; } +static int validate_hdrlen(uint8_t **cur, int *len, int x) +{ + if (*len < x) + return -1; + *cur += x; + *len -= x; + return 0; +} + +static int parse_udp_tunnel_vnet_packet(uint8_t *buf, int len, int tunnel_type, + bool is_tap) +{ + struct ipv6hdr *iph6; + struct udphdr *udph; + struct iphdr *iph4; + uint8_t *cur = buf; + + if (validate_hdrlen(&cur, &len, TUN_VNET_TNL_SIZE)) + return -1; + + if (is_tap) { + if (validate_hdrlen(&cur, &len, ETH_HLEN)) + return -1; + } + + if (tunnel_type & UDP_TUNNEL_OUTER_IPV4) { + iph4 = (struct iphdr *)cur; + if (validate_hdrlen(&cur, &len, sizeof(struct iphdr))) + return -1; + if (iph4->version != 4 || iph4->protocol != IPPROTO_UDP) + return -1; + } else { + iph6 = (struct ipv6hdr *)cur; + if (validate_hdrlen(&cur, &len, sizeof(struct ipv6hdr))) + return -1; + if (iph6->version != 6 || iph6->nexthdr != IPPROTO_UDP) + return -1; + } + + udph = (struct udphdr *)cur; + if (validate_hdrlen(&cur, &len, sizeof(struct udphdr))) + return -1; + if (ntohs(udph->dest) != VN_PORT) + return -1; + + if (validate_hdrlen(&cur, &len, GENEVE_HLEN)) + return -1; + if (validate_hdrlen(&cur, &len, ETH_HLEN)) + return -1; + + if (tunnel_type & UDP_TUNNEL_INNER_IPV4) { + iph4 = (struct iphdr *)cur; + if (validate_hdrlen(&cur, &len, sizeof(struct iphdr))) + return -1; + if (iph4->version != 4 || iph4->protocol != IPPROTO_UDP) + return -1; + } else { + iph6 = (struct ipv6hdr *)cur; + if (validate_hdrlen(&cur, &len, sizeof(struct ipv6hdr))) + return -1; + if (iph6->version != 6 || iph6->nexthdr != IPPROTO_UDP) + return -1; + } + + udph = (struct udphdr *)cur; + if (validate_hdrlen(&cur, &len, sizeof(struct udphdr))) + return -1; + if (ntohs(udph->dest) != UDP_DST_PORT) + return -1; + + return len; +} + FIXTURE(tun) { char ifname[IFNAMSIZ]; @@ -127,31 +505,36 @@ FIXTURE_TEARDOWN(tun) close(self->fd2); } -TEST_F(tun, delete_detach_close) { +TEST_F(tun, delete_detach_close) +{ EXPECT_EQ(tun_delete(self->ifname), 0); EXPECT_EQ(tun_detach(self->fd, self->ifname), -1); EXPECT_EQ(errno, 22); } -TEST_F(tun, detach_delete_close) { +TEST_F(tun, detach_delete_close) +{ EXPECT_EQ(tun_detach(self->fd, self->ifname), 0); EXPECT_EQ(tun_delete(self->ifname), 0); } -TEST_F(tun, detach_close_delete) { +TEST_F(tun, detach_close_delete) +{ EXPECT_EQ(tun_detach(self->fd, self->ifname), 0); close(self->fd); self->fd = -1; EXPECT_EQ(tun_delete(self->ifname), 0); } -TEST_F(tun, reattach_delete_close) { +TEST_F(tun, reattach_delete_close) +{ EXPECT_EQ(tun_detach(self->fd, self->ifname), 0); EXPECT_EQ(tun_attach(self->fd, self->ifname), 0); EXPECT_EQ(tun_delete(self->ifname), 0); } -TEST_F(tun, reattach_close_delete) { +TEST_F(tun, reattach_close_delete) +{ EXPECT_EQ(tun_detach(self->fd, self->ifname), 0); EXPECT_EQ(tun_attach(self->fd, self->ifname), 0); close(self->fd); @@ -159,4 +542,447 @@ TEST_F(tun, reattach_close_delete) { EXPECT_EQ(tun_delete(self->ifname), 0); } +FIXTURE(tun_vnet_udptnl) +{ + char ifname[IFNAMSIZ]; + int fd, sock; +}; + +FIXTURE_VARIANT(tun_vnet_udptnl) +{ + int tunnel_type; + int gso_size; + int data_size; + int r_num_mss; + bool is_tap, no_gso; +}; + +/* clang-format off */ +#define TUN_VNET_UDPTNL_VARIANT_ADD(type, desc) \ + FIXTURE_VARIANT_ADD(tun_vnet_udptnl, desc##_nogsosz_1byte) { \ + /* no GSO: send a single byte */ \ + .tunnel_type = type, \ + .data_size = 1, \ + .r_num_mss = 1, \ + .is_tap = true, \ + .no_gso = true, \ + }; \ + FIXTURE_VARIANT_ADD(tun_vnet_udptnl, desc##_nogsosz_1mss) { \ + /* no GSO: send a single MSS, fall back to no GSO */ \ + .tunnel_type = type, \ + .data_size = UDP_TUNNEL_MSS(type), \ + .r_num_mss = 1, \ + .is_tap = true, \ + .no_gso = true, \ + }; \ + FIXTURE_VARIANT_ADD(tun_vnet_udptnl, desc##_nogsosz_gtmss) { \ + /* no GSO: send a single MSS + 1B: fail */ \ + .tunnel_type = type, \ + .data_size = UDP_TUNNEL_MSS(type) + 1, \ + .r_num_mss = 1, \ + .is_tap = true, \ + .no_gso = true, \ + }; \ + FIXTURE_VARIANT_ADD(tun_vnet_udptnl, desc##_1byte) { \ + /* GSO: send 1 byte, gso 1 byte, fall back to no GSO */ \ + .tunnel_type = type, \ + .gso_size = 1, \ + .data_size = 1, \ + .r_num_mss = 1, \ + .is_tap = true, \ + .no_gso = true, \ + }; \ + FIXTURE_VARIANT_ADD(tun_vnet_udptnl, desc##_1mss) { \ + /* send a single MSS: fall back to no GSO */ \ + .tunnel_type = type, \ + .gso_size = UDP_TUNNEL_MSS(type), \ + .data_size = UDP_TUNNEL_MSS(type), \ + .r_num_mss = 1, \ + .is_tap = true, \ + .no_gso = true, \ + }; \ + FIXTURE_VARIANT_ADD(tun_vnet_udptnl, desc##_ltgso) { \ + /* data <= MSS < gso: will fall back to no GSO */ \ + .tunnel_type = type, \ + .gso_size = UDP_TUNNEL_MSS(type) + 1, \ + .data_size = UDP_TUNNEL_MSS(type), \ + .r_num_mss = 1, \ + .is_tap = true, \ + .no_gso = true, \ + }; \ + FIXTURE_VARIANT_ADD(tun_vnet_udptnl, desc##_gtgso) { \ + /* GSO: a single MSS + 1B */ \ + .tunnel_type = type, \ + .gso_size = UDP_TUNNEL_MSS(type), \ + .data_size = UDP_TUNNEL_MSS(type) + 1, \ + .r_num_mss = 2, \ + .is_tap = true, \ + }; \ + FIXTURE_VARIANT_ADD(tun_vnet_udptnl, desc##_2mss) { \ + /* no GSO: send exactly 2 MSS */ \ + .tunnel_type = type, \ + .gso_size = UDP_TUNNEL_MSS(type), \ + .data_size = UDP_TUNNEL_MSS(type) * 2, \ + .r_num_mss = 2, \ + .is_tap = true, \ + }; \ + FIXTURE_VARIANT_ADD(tun_vnet_udptnl, desc##_maxbytes) { \ + /* GSO: send max bytes */ \ + .tunnel_type = type, \ + .gso_size = UDP_TUNNEL_MSS(type), \ + .data_size = UDP_TUNNEL_MAX(type, true), \ + .r_num_mss = UDP_TUNNEL_MAX(type, true) / \ + UDP_TUNNEL_MSS(type) + 1, \ + .is_tap = true, \ + }; \ + FIXTURE_VARIANT_ADD(tun_vnet_udptnl, desc##_over_maxbytes) { \ + /* GSO: send oversize max bytes: fail */ \ + .tunnel_type = type, \ + .gso_size = UDP_TUNNEL_MSS(type), \ + .data_size = ETH_MAX_MTU, \ + .r_num_mss = ETH_MAX_MTU / UDP_TUNNEL_MSS(type) + 1, \ + .is_tap = true, \ + }; \ + FIXTURE_VARIANT_ADD(tun_vnet_udptnl, desc##_maxsegs) { \ + /* GSO: send max number of min sized segments */ \ + .tunnel_type = type, \ + .gso_size = 1, \ + .data_size = UDP_TUNNEL_MAX_SEGMENTS, \ + .r_num_mss = UDP_TUNNEL_MAX_SEGMENTS, \ + .is_tap = true, \ + }; \ + FIXTURE_VARIANT_ADD(tun_vnet_udptnl, desc##_5byte) { \ + /* GSO: send 5 bytes, gso 2 bytes */ \ + .tunnel_type = type, \ + .gso_size = 2, \ + .data_size = 5, \ + .r_num_mss = 3, \ + .is_tap = true, \ + } /* clang-format on */ + +TUN_VNET_UDPTNL_VARIANT_ADD(UDP_TUNNEL_GENEVE_4IN4, 4in4); +TUN_VNET_UDPTNL_VARIANT_ADD(UDP_TUNNEL_GENEVE_6IN4, 6in4); +TUN_VNET_UDPTNL_VARIANT_ADD(UDP_TUNNEL_GENEVE_4IN6, 4in6); +TUN_VNET_UDPTNL_VARIANT_ADD(UDP_TUNNEL_GENEVE_6IN6, 6in6); + +static void assign_ifaddr_vars(int family, int is_outer, void **srcip, + void **dstip, void **srcmac, void **dstmac) +{ + if (is_outer) { + if (family == AF_INET) { + *srcip = (void *)¶m_ipaddr4_outer_src; + *dstip = (void *)¶m_ipaddr4_outer_dst; + } else { + *srcip = (void *)¶m_ipaddr6_outer_src; + *dstip = (void *)¶m_ipaddr6_outer_dst; + } + *srcmac = param_hwaddr_outer_src; + *dstmac = param_hwaddr_outer_dst; + } else { + if (family == AF_INET) { + *srcip = (void *)¶m_ipaddr4_inner_src; + *dstip = (void *)¶m_ipaddr4_inner_dst; + } else { + *srcip = (void *)¶m_ipaddr6_inner_src; + *dstip = (void *)¶m_ipaddr6_inner_dst; + } + *srcmac = param_hwaddr_inner_src; + *dstmac = param_hwaddr_inner_dst; + } +} + +static void assign_sockaddr_vars(int family, int is_outer, + struct sockaddr_storage *src, + struct sockaddr_storage *dst) +{ + src->ss_family = family; + dst->ss_family = family; + + if (family == AF_INET) { + struct sockaddr_in *s4 = (struct sockaddr_in *)src; + struct sockaddr_in *d4 = (struct sockaddr_in *)dst; + + s4->sin_addr = is_outer ? param_ipaddr4_outer_src : + param_ipaddr4_inner_src; + d4->sin_addr = is_outer ? param_ipaddr4_outer_dst : + param_ipaddr4_inner_dst; + if (!is_outer) { + s4->sin_port = htons(UDP_SRC_PORT); + d4->sin_port = htons(UDP_DST_PORT); + } + } else { + struct sockaddr_in6 *s6 = (struct sockaddr_in6 *)src; + struct sockaddr_in6 *d6 = (struct sockaddr_in6 *)dst; + + s6->sin6_addr = is_outer ? param_ipaddr6_outer_src : + param_ipaddr6_inner_src; + d6->sin6_addr = is_outer ? param_ipaddr6_outer_dst : + param_ipaddr6_inner_dst; + if (!is_outer) { + s6->sin6_port = htons(UDP_SRC_PORT); + d6->sin6_port = htons(UDP_DST_PORT); + } + } +} + +FIXTURE_SETUP(tun_vnet_udptnl) +{ + int ret, family, prefix, flags, features; + int tunnel_type = variant->tunnel_type; + struct sockaddr_storage ssa, dsa; + void *sip, *dip, *smac, *dmac; + + flags = (variant->is_tap ? IFF_TAP : IFF_TUN) | IFF_VNET_HDR | + IFF_MULTI_QUEUE | IFF_NO_PI; + features = TUN_F_CSUM | TUN_F_UDP_TUNNEL_GSO | + TUN_F_UDP_TUNNEL_GSO_CSUM | TUN_F_USO4 | TUN_F_USO6; + self->fd = tun_open(self->ifname, flags, TUN_VNET_TNL_SIZE, features, + param_hwaddr_outer_src); + ASSERT_GE(self->fd, 0); + + family = (tunnel_type & UDP_TUNNEL_OUTER_IPV4) ? AF_INET : AF_INET6; + prefix = (family == AF_INET) ? IPPREFIX_LEN : IP6PREFIX_LEN; + assign_ifaddr_vars(family, 1, &sip, &dip, &smac, &dmac); + + ret = ip_addr_add(self->ifname, family, sip, prefix); + ASSERT_EQ(ret, 0); + ret = ip_neigh_add(self->ifname, family, dip, dmac); + ASSERT_EQ(ret, 0); + ret = ip_route_check(self->ifname, family, sip); + ASSERT_EQ(ret, 0); + + ret = geneve_create(param_dev_geneve_name, family, dip, + param_hwaddr_inner_src); + ASSERT_EQ(ret, 0); + + family = (tunnel_type & UDP_TUNNEL_INNER_IPV4) ? AF_INET : AF_INET6; + prefix = (family == AF_INET) ? IPPREFIX_LEN : IP6PREFIX_LEN; + assign_ifaddr_vars(family, 0, &sip, &dip, &smac, &dmac); + + ret = ip_addr_add(param_dev_geneve_name, family, sip, prefix); + ASSERT_EQ(ret, 0); + ret = ip_neigh_add(param_dev_geneve_name, family, dip, dmac); + ASSERT_EQ(ret, 0); + ret = ip_route_check(param_dev_geneve_name, family, sip); + ASSERT_EQ(ret, 0); + + assign_sockaddr_vars(family, 0, &ssa, &dsa); + self->sock = udp_socket_open(&ssa, false, true, &dsa); + ASSERT_GE(self->sock, 0); +} + +FIXTURE_TEARDOWN(tun_vnet_udptnl) +{ + int ret; + + if (self->sock != -1) + close(self->sock); + + ret = ip_link_del(param_dev_geneve_name); + EXPECT_EQ(ret, 0); + + ret = tun_delete(self->ifname); + EXPECT_EQ(ret, 0); +} + +static int build_gso_packet_into_tun(const FIXTURE_VARIANT(tun_vnet_udptnl) * + variant, + uint8_t *buf) +{ + int pktlen, hlen, proto, inner_family, outer_family; + int tunnel_type = variant->tunnel_type; + int payload_len = variant->data_size; + int gso_size = variant->gso_size; + uint8_t *outer_udph, *cur = buf; + void *sip, *dip, *smac, *dmac; + bool is_tap = variant->is_tap; + + hlen = (is_tap ? ETH_HLEN : 0) + UDP_TUNNEL_HDRLEN(tunnel_type); + inner_family = (tunnel_type & UDP_TUNNEL_INNER_IPV4) ? AF_INET : + AF_INET6; + outer_family = (tunnel_type & UDP_TUNNEL_OUTER_IPV4) ? AF_INET : + AF_INET6; + + cur += build_virtio_net_hdr_v1_hash_tunnel(cur, is_tap, hlen, gso_size, + outer_family, inner_family); + + pktlen = hlen + payload_len; + assign_ifaddr_vars(outer_family, 1, &sip, &dip, &smac, &dmac); + + if (is_tap) { + proto = outer_family == AF_INET ? ETH_P_IP : ETH_P_IPV6; + pktlen -= ETH_HLEN; + cur += build_eth(cur, proto, dmac, smac); + } + + if (outer_family == AF_INET) { + pktlen = pktlen - sizeof(struct iphdr); + cur += build_ipv4_header(cur, IPPROTO_UDP, pktlen, dip, sip); + } else { + pktlen = pktlen - sizeof(struct ipv6hdr); + cur += build_ipv6_header(cur, IPPROTO_UDP, 0, pktlen, dip, sip); + } + + outer_udph = cur; + assign_ifaddr_vars(inner_family, 0, &sip, &dip, &smac, &dmac); + + pktlen -= sizeof(struct udphdr); + proto = inner_family == AF_INET ? ETH_P_IP : ETH_P_IPV6; + cur += build_udp_header(cur, UDP_SRC_PORT, VN_PORT, pktlen); + cur += build_geneve_header(cur, VN_ID); + cur += build_eth(cur, proto, dmac, smac); + + pktlen = sizeof(struct udphdr) + payload_len; + if (inner_family == AF_INET) + cur += build_ipv4_header(cur, IPPROTO_UDP, pktlen, dip, sip); + else + cur += build_ipv6_header(cur, IPPROTO_UDP, 0, pktlen, dip, sip); + + cur += build_udp_packet(cur, UDP_DST_PORT, UDP_SRC_PORT, payload_len, + inner_family, false); + + build_udp_packet_csum(outer_udph, outer_family, false); + + return cur - buf; +} + +static int +receive_gso_packet_from_tunnel(FIXTURE_DATA(tun_vnet_udptnl) * self, + const FIXTURE_VARIANT(tun_vnet_udptnl) * variant, + int *r_num_mss) +{ + uint8_t packet_buf[MAX_VNET_TUNNEL_PACKET_SZ]; + int len, total_len = 0, socket = self->sock; + int payload_len = variant->data_size; + + while (total_len < payload_len) { + len = recv(socket, packet_buf, sizeof(packet_buf), 0); + if (len <= 0) { + if (len < 0 && errno != EAGAIN && errno != EWOULDBLOCK) + perror("recv"); + break; + } + + (*r_num_mss)++; + total_len += len; + } + + return total_len; +} + +static int send_gso_packet_into_tunnel(FIXTURE_DATA(tun_vnet_udptnl) * self, + const FIXTURE_VARIANT(tun_vnet_udptnl) * + variant) +{ + int family = (variant->tunnel_type & UDP_TUNNEL_INNER_IPV4) ? AF_INET : + AF_INET6; + uint8_t buf[MAX_VNET_TUNNEL_PACKET_SZ] = { 0 }; + int payload_len = variant->data_size; + int gso_size = variant->gso_size; + struct sockaddr_storage ssa, dsa; + + assign_sockaddr_vars(family, 0, &ssa, &dsa); + return send_gso_udp_msg(self->sock, &dsa, buf, payload_len, gso_size); +} + +static int +receive_gso_packet_from_tun(FIXTURE_DATA(tun_vnet_udptnl) * self, + const FIXTURE_VARIANT(tun_vnet_udptnl) * variant, + struct virtio_net_hdr_v1_hash_tunnel *vnet_hdr) +{ + struct timeval timeout = { .tv_sec = TIMEOUT_SEC }; + uint8_t buf[MAX_VNET_TUNNEL_PACKET_SZ]; + int tunnel_type = variant->tunnel_type; + int payload_len = variant->data_size; + bool is_tap = variant->is_tap; + int ret, len, total_len = 0; + int tun_fd = self->fd; + fd_set fdset; + + while (total_len < payload_len) { + FD_ZERO(&fdset); + FD_SET(tun_fd, &fdset); + + ret = select(tun_fd + 1, &fdset, NULL, NULL, &timeout); + if (ret <= 0) { + perror("select"); + break; + } + if (!FD_ISSET(tun_fd, &fdset)) + continue; + + len = read(tun_fd, buf, sizeof(buf)); + if (len <= 0) { + if (len < 0 && errno != EAGAIN && errno != EWOULDBLOCK) + perror("read"); + break; + } + + len = parse_udp_tunnel_vnet_packet(buf, len, tunnel_type, + is_tap); + if (len < 0) + continue; + + if (total_len == 0) + memcpy(vnet_hdr, buf, TUN_VNET_TNL_SIZE); + + total_len += len; + } + + return total_len; +} + +TEST_F(tun_vnet_udptnl, send_gso_packet) +{ + uint8_t pkt[MAX_VNET_TUNNEL_PACKET_SZ]; + int r_num_mss = 0; + int ret, off; + + memset(pkt, 0, sizeof(pkt)); + off = build_gso_packet_into_tun(variant, pkt); + ret = write(self->fd, pkt, off); + ASSERT_EQ(ret, off); + + ret = receive_gso_packet_from_tunnel(self, variant, &r_num_mss); + ASSERT_EQ(ret, variant->data_size); + ASSERT_EQ(r_num_mss, variant->r_num_mss); +} + +TEST_F(tun_vnet_udptnl, recv_gso_packet) +{ + struct virtio_net_hdr_v1_hash_tunnel vnet_hdr = { 0 }; + struct virtio_net_hdr_v1 *vh = &vnet_hdr.hash_hdr.hdr; + int ret, gso_type = VIRTIO_NET_HDR_GSO_UDP_L4; + + ret = send_gso_packet_into_tunnel(self, variant); + ASSERT_EQ(ret, variant->data_size); + + memset(&vnet_hdr, 0, sizeof(vnet_hdr)); + ret = receive_gso_packet_from_tun(self, variant, &vnet_hdr); + ASSERT_EQ(ret, variant->data_size); + + if (!variant->no_gso) { + ASSERT_EQ(vh->gso_size, variant->gso_size); + gso_type |= (variant->tunnel_type & UDP_TUNNEL_OUTER_IPV4) ? + (VIRTIO_NET_HDR_GSO_UDP_TUNNEL_IPV4) : + (VIRTIO_NET_HDR_GSO_UDP_TUNNEL_IPV6); + ASSERT_EQ(vh->gso_type, gso_type); + } +} + +XFAIL_ADD(tun_vnet_udptnl, 4in4_nogsosz_gtmss, recv_gso_packet); +XFAIL_ADD(tun_vnet_udptnl, 6in4_nogsosz_gtmss, recv_gso_packet); +XFAIL_ADD(tun_vnet_udptnl, 4in6_nogsosz_gtmss, recv_gso_packet); +XFAIL_ADD(tun_vnet_udptnl, 6in6_nogsosz_gtmss, recv_gso_packet); + +XFAIL_ADD(tun_vnet_udptnl, 4in4_over_maxbytes, send_gso_packet); +XFAIL_ADD(tun_vnet_udptnl, 6in4_over_maxbytes, send_gso_packet); +XFAIL_ADD(tun_vnet_udptnl, 4in6_over_maxbytes, send_gso_packet); +XFAIL_ADD(tun_vnet_udptnl, 6in6_over_maxbytes, send_gso_packet); + +XFAIL_ADD(tun_vnet_udptnl, 4in4_over_maxbytes, recv_gso_packet); +XFAIL_ADD(tun_vnet_udptnl, 6in4_over_maxbytes, recv_gso_packet); +XFAIL_ADD(tun_vnet_udptnl, 4in6_over_maxbytes, recv_gso_packet); +XFAIL_ADD(tun_vnet_udptnl, 6in6_over_maxbytes, recv_gso_packet); + TEST_HARNESS_MAIN diff --git a/tools/testing/selftests/net/tuntap_helpers.h b/tools/testing/selftests/net/tuntap_helpers.h new file mode 100644 index 000000000000..d6c0437136ec --- /dev/null +++ b/tools/testing/selftests/net/tuntap_helpers.h @@ -0,0 +1,390 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ + +#ifndef _TUNTAP_HELPERS_H +#define _TUNTAP_HELPERS_H + +#include <errno.h> +#include <linux/if_packet.h> +#include <linux/ipv6.h> +#include <linux/virtio_net.h> +#include <netinet/in.h> +#include <netinet/if_ether.h> +#include <netinet/udp.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <unistd.h> +#include <ynl.h> + +#include "rt-route-user.h" +#include "rt-addr-user.h" +#include "rt-neigh-user.h" +#include "rt-link-user.h" + +#define GENEVE_HLEN 8 +#define PKT_DATA 0xCB +#define TUNTAP_DEFAULT_TTL 8 +#define TUNTAP_DEFAULT_IPID 1337 + +unsigned int if_nametoindex(const char *ifname); + +static inline int ip_addr_len(int family) +{ + return (family == AF_INET) ? sizeof(struct in_addr) : + sizeof(struct in6_addr); +} + +static inline void fill_ifaddr_msg(struct ifaddrmsg *ifam, int family, + int prefix, int flags, const char *dev) +{ + ifam->ifa_family = family; + ifam->ifa_prefixlen = prefix; + ifam->ifa_index = if_nametoindex(dev); + ifam->ifa_flags = flags; + ifam->ifa_scope = RT_SCOPE_UNIVERSE; +} + +static inline int ip_addr_add(const char *dev, int family, void *addr, + uint8_t prefix) +{ + int nl_flags = NLM_F_REQUEST | NLM_F_CREATE | NLM_F_EXCL; + int ifa_flags = IFA_F_PERMANENT | IFA_F_NODAD; + int ret = -1, ipalen = ip_addr_len(family); + struct rt_addr_newaddr_req *req; + struct ynl_sock *ys; + + ys = ynl_sock_create(&ynl_rt_addr_family, NULL); + if (!ys) + return -1; + + req = rt_addr_newaddr_req_alloc(); + if (!req) + goto err_req_alloc; + + fill_ifaddr_msg(&req->_hdr, family, prefix, ifa_flags, dev); + rt_addr_newaddr_req_set_nlflags(req, nl_flags); + rt_addr_newaddr_req_set_local(req, addr, ipalen); + + ret = rt_addr_newaddr(ys, req); + rt_addr_newaddr_req_free(req); +err_req_alloc: + ynl_sock_destroy(ys); + return ret; +} + +static inline void fill_neigh_req_header(struct ndmsg *ndm, int family, + int state, const char *dev) +{ + ndm->ndm_family = family; + ndm->ndm_ifindex = if_nametoindex(dev); + ndm->ndm_state = state; + ndm->ndm_flags = 0; + ndm->ndm_type = RTN_UNICAST; +} + +static inline int ip_neigh_add(const char *dev, int family, void *addr, + unsigned char *lladdr) +{ + int nl_flags = NLM_F_REQUEST | NLM_F_CREATE | NLM_F_EXCL; + int ret = -1, ipalen = ip_addr_len(family); + struct rt_neigh_newneigh_req *req; + struct ynl_sock *ys; + + ys = ynl_sock_create(&ynl_rt_neigh_family, NULL); + if (!ys) + return -1; + + req = rt_neigh_newneigh_req_alloc(); + if (!req) + goto err_req_alloc; + + fill_neigh_req_header(&req->_hdr, family, NUD_PERMANENT, dev); + rt_neigh_newneigh_req_set_nlflags(req, nl_flags); + rt_neigh_newneigh_req_set_dst(req, addr, ipalen); + rt_neigh_newneigh_req_set_lladdr(req, lladdr, ETH_ALEN); + rt_neigh_newneigh_req_set_ifindex(req, if_nametoindex(dev)); + + ret = rt_neigh_newneigh(ys, req); + rt_neigh_newneigh_req_free(req); +err_req_alloc: + ynl_sock_destroy(ys); + return ret; +} + +static inline void fill_route_req_header(struct rtmsg *rtm, int family, + int table) +{ + rtm->rtm_family = family; + rtm->rtm_table = table; +} + +static inline int +ip_route_get(const char *dev, int family, int table, void *dst, + void (*parse_rsp)(struct rt_route_getroute_rsp *rsp, void *out), + void *out) +{ + int ret = -1, ipalen = ip_addr_len(family); + struct rt_route_getroute_req *req; + struct rt_route_getroute_rsp *rsp; + struct ynl_sock *ys; + + ys = ynl_sock_create(&ynl_rt_route_family, NULL); + if (!ys) + return -1; + + req = rt_route_getroute_req_alloc(); + if (!req) + goto err_req_alloc; + + fill_route_req_header(&req->_hdr, family, table); + rt_route_getroute_req_set_nlflags(req, NLM_F_REQUEST); + rt_route_getroute_req_set_dst(req, dst, ipalen); + rt_route_getroute_req_set_oif(req, if_nametoindex(dev)); + + rsp = rt_route_getroute(ys, req); + if (!rsp) + goto err_rsp_get; + + ret = 0; + if (parse_rsp) + parse_rsp(rsp, out); + + rt_route_getroute_rsp_free(rsp); +err_rsp_get: + rt_route_getroute_req_free(req); +err_req_alloc: + ynl_sock_destroy(ys); + return ret; +} + +static inline int +ip_link_add(const char *dev, char *link_type, + int (*fill_link_attr)(struct rt_link_newlink_req *req, void *data), + void *data) +{ + int nl_flags = NLM_F_REQUEST | NLM_F_CREATE | NLM_F_EXCL; + struct rt_link_newlink_req *req; + struct ynl_sock *ys; + int ret = -1; + + ys = ynl_sock_create(&ynl_rt_link_family, NULL); + if (!ys) + return -1; + + req = rt_link_newlink_req_alloc(); + if (!req) + goto err_req_alloc; + + req->_hdr.ifi_flags = IFF_UP; + rt_link_newlink_req_set_nlflags(req, nl_flags); + rt_link_newlink_req_set_ifname(req, dev); + rt_link_newlink_req_set_linkinfo_kind(req, link_type); + + if (fill_link_attr && fill_link_attr(req, data) < 0) + goto err_attr_fill; + + ret = rt_link_newlink(ys, req); +err_attr_fill: + rt_link_newlink_req_free(req); +err_req_alloc: + ynl_sock_destroy(ys); + return ret; +} + +static inline int ip_link_del(const char *dev) +{ + struct rt_link_dellink_req *req; + struct ynl_sock *ys; + int ret = -1; + + ys = ynl_sock_create(&ynl_rt_link_family, NULL); + if (!ys) + return -1; + + req = rt_link_dellink_req_alloc(); + if (!req) + goto err_req_alloc; + + rt_link_dellink_req_set_nlflags(req, NLM_F_REQUEST); + rt_link_dellink_req_set_ifname(req, dev); + + ret = rt_link_dellink(ys, req); + rt_link_dellink_req_free(req); +err_req_alloc: + ynl_sock_destroy(ys); + return ret; +} + +static inline size_t build_eth(uint8_t *buf, uint16_t proto, unsigned char *src, + unsigned char *dest) +{ + struct ethhdr *eth = (struct ethhdr *)buf; + + eth->h_proto = htons(proto); + memcpy(eth->h_source, src, ETH_ALEN); + memcpy(eth->h_dest, dest, ETH_ALEN); + + return ETH_HLEN; +} + +static inline uint32_t add_csum(const uint8_t *buf, int len) +{ + uint16_t *sbuf = (uint16_t *)buf; + uint32_t sum = 0; + + while (len > 1) { + sum += *sbuf++; + len -= 2; + } + + if (len) + sum += *(uint8_t *)sbuf; + + return sum; +} + +static inline uint16_t finish_ip_csum(uint32_t sum) +{ + while (sum >> 16) + sum = (sum & 0xffff) + (sum >> 16); + return ~((uint16_t)sum); +} + +static inline uint16_t build_ip_csum(const uint8_t *buf, int len, uint32_t sum) +{ + sum += add_csum(buf, len); + return finish_ip_csum(sum); +} + +static inline int build_ipv4_header(uint8_t *buf, uint8_t proto, + int payload_len, struct in_addr *src, + struct in_addr *dst) +{ + struct iphdr *iph = (struct iphdr *)buf; + + iph->ihl = 5; + iph->version = 4; + iph->ttl = TUNTAP_DEFAULT_TTL; + iph->tot_len = htons(sizeof(*iph) + payload_len); + iph->id = htons(TUNTAP_DEFAULT_IPID); + iph->protocol = proto; + iph->saddr = src->s_addr; + iph->daddr = dst->s_addr; + iph->check = build_ip_csum(buf, iph->ihl << 2, 0); + + return iph->ihl << 2; +} + +static inline void ipv6_set_dsfield(struct ipv6hdr *ip6h, uint8_t dsfield) +{ + uint16_t val, *ptr = (uint16_t *)ip6h; + + val = ntohs(*ptr); + val &= 0xF00F; + val |= ((uint16_t)dsfield) << 4; + *ptr = htons(val); +} + +static inline int build_ipv6_header(uint8_t *buf, uint8_t proto, + uint8_t dsfield, int payload_len, + struct in6_addr *src, struct in6_addr *dst) +{ + struct ipv6hdr *ip6h = (struct ipv6hdr *)buf; + + ip6h->version = 6; + ip6h->payload_len = htons(payload_len); + ip6h->nexthdr = proto; + ip6h->hop_limit = TUNTAP_DEFAULT_TTL; + ipv6_set_dsfield(ip6h, dsfield); + memcpy(&ip6h->saddr, src, sizeof(ip6h->saddr)); + memcpy(&ip6h->daddr, dst, sizeof(ip6h->daddr)); + + return sizeof(struct ipv6hdr); +} + +static inline int build_geneve_header(uint8_t *buf, uint32_t vni) +{ + uint16_t protocol = htons(ETH_P_TEB); + uint32_t geneve_vni = htonl((vni << 8) & 0xffffff00); + + memcpy(buf + 2, &protocol, 2); + memcpy(buf + 4, &geneve_vni, 4); + return GENEVE_HLEN; +} + +static inline int build_udp_header(uint8_t *buf, uint16_t sport, uint16_t dport, + int payload_len) +{ + struct udphdr *udph = (struct udphdr *)buf; + + udph->source = htons(sport); + udph->dest = htons(dport); + udph->len = htons(sizeof(*udph) + payload_len); + return sizeof(*udph); +} + +static inline void build_udp_packet_csum(uint8_t *buf, int family, + bool csum_off) +{ + struct udphdr *udph = (struct udphdr *)buf; + size_t ipalen = ip_addr_len(family); + uint32_t sum; + + /* No extension IPv4 and IPv6 headers addresses are the last fields */ + sum = add_csum(buf - 2 * ipalen, 2 * ipalen); + sum += htons(IPPROTO_UDP) + udph->len; + + if (!csum_off) + sum += add_csum(buf, udph->len); + + udph->check = finish_ip_csum(sum); +} + +static inline int build_udp_packet(uint8_t *buf, uint16_t sport, uint16_t dport, + int payload_len, int family, bool csum_off) +{ + struct udphdr *udph = (struct udphdr *)buf; + + build_udp_header(buf, sport, dport, payload_len); + memset(buf + sizeof(*udph), PKT_DATA, payload_len); + build_udp_packet_csum(buf, family, csum_off); + + return sizeof(*udph) + payload_len; +} + +static inline int build_virtio_net_hdr_v1_hash_tunnel(uint8_t *buf, bool is_tap, + int hdr_len, int gso_size, + int outer_family, + int inner_family) +{ + struct virtio_net_hdr_v1_hash_tunnel *vh_tunnel = (void *)buf; + struct virtio_net_hdr_v1 *vh = &vh_tunnel->hash_hdr.hdr; + int outer_iphlen, inner_iphlen, eth_hlen, gso_type; + + eth_hlen = is_tap ? ETH_HLEN : 0; + outer_iphlen = (outer_family == AF_INET) ? sizeof(struct iphdr) : + sizeof(struct ipv6hdr); + inner_iphlen = (inner_family == AF_INET) ? sizeof(struct iphdr) : + sizeof(struct ipv6hdr); + + vh_tunnel->outer_th_offset = eth_hlen + outer_iphlen; + vh_tunnel->inner_nh_offset = vh_tunnel->outer_th_offset + ETH_HLEN + + GENEVE_HLEN + sizeof(struct udphdr); + + vh->csum_start = vh_tunnel->inner_nh_offset + inner_iphlen; + vh->csum_offset = __builtin_offsetof(struct udphdr, check); + vh->flags = VIRTIO_NET_HDR_F_NEEDS_CSUM; + vh->hdr_len = hdr_len; + vh->gso_size = gso_size; + + if (gso_size) { + gso_type = outer_family == AF_INET ? + VIRTIO_NET_HDR_GSO_UDP_TUNNEL_IPV4 : + VIRTIO_NET_HDR_GSO_UDP_TUNNEL_IPV6; + vh->gso_type = VIRTIO_NET_HDR_GSO_UDP_L4 | gso_type; + } + + return sizeof(struct virtio_net_hdr_v1_hash_tunnel); +} + +#endif /* _TUNTAP_HELPERS_H */ diff --git a/tools/testing/selftests/net/txtimestamp.c b/tools/testing/selftests/net/txtimestamp.c index bcc14688661d..170be192f5c7 100644 --- a/tools/testing/selftests/net/txtimestamp.c +++ b/tools/testing/selftests/net/txtimestamp.c @@ -206,12 +206,10 @@ static void __print_timestamp(const char *name, struct timespec *cur, fprintf(stderr, "\n"); } -static void print_timestamp_usr(void) +static void record_timestamp_usr(void) { if (clock_gettime(CLOCK_REALTIME, &ts_usr)) error(1, errno, "clock_gettime"); - - __print_timestamp(" USR", &ts_usr, 0, 0); } static void print_timestamp(struct scm_timestamping *tss, int tstype, @@ -599,8 +597,6 @@ static void do_test(int family, unsigned int report_opt) fill_header_udp(buf + off, family == PF_INET); } - print_timestamp_usr(); - iov.iov_base = buf; iov.iov_len = total_len; @@ -655,10 +651,14 @@ static void do_test(int family, unsigned int report_opt) } + record_timestamp_usr(); + val = sendmsg(fd, &msg, 0); if (val != total_len) error(1, errno, "send"); + __print_timestamp(" USR", &ts_usr, 0, 0); + /* wait for all errors to be queued, else ACKs arrive OOO */ if (cfg_sleep_usec) usleep(cfg_sleep_usec); diff --git a/tools/testing/selftests/ptp/phc.sh b/tools/testing/selftests/ptp/phc.sh index ac6e5a6e1d3a..9f61c1579edf 100755 --- a/tools/testing/selftests/ptp/phc.sh +++ b/tools/testing/selftests/ptp/phc.sh @@ -8,17 +8,20 @@ ALL_TESTS=" " DEV=$1 +# Kselftest framework requirement - SKIP code is 4. +ksft_skip=4 + ############################################################################## # Sanity checks if [[ "$(id -u)" -ne 0 ]]; then echo "SKIP: need root privileges" - exit 0 + exit $ksft_skip fi if [[ "$DEV" == "" ]]; then echo "SKIP: PTP device not provided" - exit 0 + exit $ksft_skip fi require_command() @@ -27,7 +30,7 @@ require_command() if [[ ! -x "$(command -v "$cmd")" ]]; then echo "SKIP: $cmd not installed" - exit 1 + exit $ksft_skip fi } @@ -37,7 +40,7 @@ phc_sanity() if [ $? != 0 ]; then echo "SKIP: unknown clock $DEV: No such device" - exit 1 + exit $ksft_skip fi } @@ -49,6 +52,7 @@ phc_sanity # Exit status to return at the end. Set in case one of the tests fails. EXIT_STATUS=0 +PASS_COUNT=0 # Per-test return value. Clear at the beginning of each test. RET=0 @@ -65,12 +69,18 @@ log_test() { local test_name=$1 + if [[ $RET -eq $ksft_skip ]]; then + printf "TEST: %-60s [SKIP]\n" "$test_name" + return 0 + fi + if [[ $RET -ne 0 ]]; then EXIT_STATUS=1 printf "TEST: %-60s [FAIL]\n" "$test_name" return 1 fi + ((PASS_COUNT++)) printf "TEST: %-60s [ OK ]\n" "$test_name" return 0 } @@ -89,34 +99,49 @@ tests_run() settime_do() { - local res + local res out - res=$(phc_ctl $DEV set 0 wait 120.5 get 2> /dev/null \ - | awk '/clock time is/{print $5}' \ - | awk -F. '{print $1}') + out=$(LC_ALL=C phc_ctl $DEV set 0 wait 120.5 get 2>&1) + if [[ $? -ne 0 ]]; then + if echo "$out" | grep -qi "Operation not supported"; then + return $ksft_skip + fi + return 1 + fi + res=$(echo "$out" | awk '/clock time is/{print $5}' | awk -F. '{print $1}') (( res == 120 )) } adjtime_do() { - local res + local res out - res=$(phc_ctl $DEV set 0 adj 10 get 2> /dev/null \ - | awk '/clock time is/{print $5}' \ - | awk -F. '{print $1}') + out=$(LC_ALL=C phc_ctl $DEV set 0 adj 10 get 2>&1) + if [[ $? -ne 0 ]]; then + if echo "$out" | grep -qi "Operation not supported"; then + return $ksft_skip + fi + return 1 + fi + res=$(echo "$out" | awk '/clock time is/{print $5}' | awk -F. '{print $1}') (( res == 10 )) } adjfreq_do() { - local res + local res out # Set the clock to be 1% faster - res=$(phc_ctl $DEV freq 10000000 set 0 wait 100.5 get 2> /dev/null \ - | awk '/clock time is/{print $5}' \ - | awk -F. '{print $1}') + out=$(LC_ALL=C phc_ctl $DEV freq 10000000 set 0 wait 100.5 get 2>&1) + if [[ $? -ne 0 ]]; then + if echo "$out" | grep -qi "Operation not supported"; then + return $ksft_skip + fi + return 1 + fi + res=$(echo "$out" | awk '/clock time is/{print $5}' | awk -F. '{print $1}') (( res == 101 )) } @@ -163,4 +188,7 @@ trap cleanup EXIT tests_run +if [[ $EXIT_STATUS -eq 0 && $PASS_COUNT -eq 0 ]]; then + exit $ksft_skip +fi exit $EXIT_STATUS diff --git a/tools/testing/selftests/tc-testing/tc-tests/qdiscs/cake_mq.json b/tools/testing/selftests/tc-testing/tc-tests/qdiscs/cake_mq.json new file mode 100644 index 000000000000..0efe229fb86e --- /dev/null +++ b/tools/testing/selftests/tc-testing/tc-tests/qdiscs/cake_mq.json @@ -0,0 +1,559 @@ +[ + { + "id": "684b", + "name": "Create CAKE_MQ with default setting (4 queues)", + "category": [ + "qdisc", + "cake_mq" + ], + "plugins": { + "requires": "nsPlugin" + }, + "setup": [ + "echo \"1\" > /sys/bus/netdevsim/del_device || true", + "echo \"1 1 4\" > /sys/bus/netdevsim/new_device" + ], + "cmdUnderTest": "$TC qdisc add dev $ETH handle 1: root cake_mq", + "expExitCode": "0", + "verifyCmd": "$TC qdisc show dev $ETH", + "matchPattern": "qdisc (cake_mq 1: root|cake 0: parent 1:[1-4]) bandwidth unlimited diffserv3 triple-isolate nonat nowash no-ack-filter split-gso rtt 100ms raw overhead 0 ", + "matchCount": "5", + "teardown": [ + "echo \"1\" > /sys/bus/netdevsim/del_device" + ] + }, + { + "id": "7ee8", + "name": "Create CAKE_MQ with bandwidth limit (4 queues)", + "category": [ + "qdisc", + "cake_mq" + ], + "plugins": { + "requires": "nsPlugin" + }, + "setup": [ + "echo \"1 1 4\" > /sys/bus/netdevsim/new_device" + ], + "cmdUnderTest": "$TC qdisc add dev $ETH handle 1: root cake_mq bandwidth 1000", + "expExitCode": "0", + "verifyCmd": "$TC qdisc show dev $ETH", + "matchPattern": "qdisc (cake_mq 1: root|cake 0: parent 1:[1-4]) bandwidth 1Kbit diffserv3 triple-isolate nonat nowash no-ack-filter split-gso rtt 100ms raw overhead 0 ", + "matchCount": "5", + "teardown": [ + "echo \"1\" > /sys/bus/netdevsim/del_device" + ] + }, + { + "id": "1f87", + "name": "Create CAKE_MQ with rtt time (4 queues)", + "category": [ + "qdisc", + "cake_mq" + ], + "plugins": { + "requires": "nsPlugin" + }, + "setup": [ + "echo \"1 1 4\" > /sys/bus/netdevsim/new_device" + ], + "cmdUnderTest": "$TC qdisc add dev $ETH handle 1: root cake_mq rtt 200", + "expExitCode": "0", + "verifyCmd": "$TC qdisc show dev $ETH", + "matchPattern": "qdisc (cake_mq 1: root|cake 0: parent 1:[1-4]) bandwidth unlimited diffserv3 triple-isolate nonat nowash no-ack-filter split-gso rtt 200us raw overhead 0 ", + "matchCount": "5", + "teardown": [ + "echo \"1\" > /sys/bus/netdevsim/del_device" + ] + }, + { + "id": "e9cf", + "name": "Create CAKE_MQ with besteffort flag (4 queues)", + "category": [ + "qdisc", + "cake_mq" + ], + "plugins": { + "requires": "nsPlugin" + }, + "setup": [ + "echo \"1 1 4\" > /sys/bus/netdevsim/new_device" + ], + "cmdUnderTest": "$TC qdisc add dev $ETH handle 1: root cake_mq besteffort", + "expExitCode": "0", + "verifyCmd": "$TC qdisc show dev $ETH", + "matchPattern": "qdisc (cake_mq 1: root|cake 0: parent 1:[1-4]) bandwidth unlimited besteffort triple-isolate nonat nowash no-ack-filter split-gso rtt 100ms raw overhead 0 ", + "matchCount": "5", + "teardown": [ + "echo \"1\" > /sys/bus/netdevsim/del_device" + ] + }, + { + "id": "7c05", + "name": "Create CAKE_MQ with diffserv8 flag (4 queues)", + "category": [ + "qdisc", + "cake_mq" + ], + "plugins": { + "requires": "nsPlugin" + }, + "setup": [ + "echo \"1 1 4\" > /sys/bus/netdevsim/new_device" + ], + "cmdUnderTest": "$TC qdisc add dev $ETH handle 1: root cake_mq diffserv8", + "expExitCode": "0", + "verifyCmd": "$TC qdisc show dev $ETH", + "matchPattern": "qdisc (cake_mq 1: root|cake 0: parent 1:[1-4]) bandwidth unlimited diffserv8 triple-isolate nonat nowash no-ack-filter split-gso rtt 100ms raw overhead 0 ", + "matchCount": "5", + "teardown": [ + "echo \"1\" > /sys/bus/netdevsim/del_device" + ] + }, + { + "id": "5a77", + "name": "Create CAKE_MQ with diffserv4 flag (4 queues)", + "category": [ + "qdisc", + "cake_mq" + ], + "plugins": { + "requires": "nsPlugin" + }, + "setup": [ + "echo \"1 1 4\" > /sys/bus/netdevsim/new_device" + ], + "cmdUnderTest": "$TC qdisc add dev $ETH handle 1: root cake_mq diffserv4", + "expExitCode": "0", + "verifyCmd": "$TC qdisc show dev $ETH", + "matchPattern": "qdisc (cake_mq 1: root|cake 0: parent 1:[1-4]) bandwidth unlimited diffserv4 triple-isolate nonat nowash no-ack-filter split-gso rtt 100ms raw overhead 0 ", + "matchCount": "5", + "teardown": [ + "echo \"1\" > /sys/bus/netdevsim/del_device" + ] + }, + { + "id": "8f7a", + "name": "Create CAKE_MQ with flowblind flag (4 queues)", + "category": [ + "qdisc", + "cake_mq" + ], + "plugins": { + "requires": "nsPlugin" + }, + "setup": [ + "echo \"1 1 4\" > /sys/bus/netdevsim/new_device" + ], + "cmdUnderTest": "$TC qdisc add dev $ETH handle 1: root cake_mq flowblind", + "expExitCode": "0", + "verifyCmd": "$TC qdisc show dev $ETH", + "matchPattern": "qdisc (cake_mq 1: root|cake 0: parent 1:[1-4]) bandwidth unlimited diffserv3 flowblind nonat nowash no-ack-filter split-gso rtt 100ms raw overhead 0 ", + "matchCount": "5", + "teardown": [ + "echo \"1\" > /sys/bus/netdevsim/del_device" + ] + }, + { + "id": "7ef7", + "name": "Create CAKE_MQ with dsthost and nat flag (4 queues)", + "category": [ + "qdisc", + "cake_mq" + ], + "plugins": { + "requires": "nsPlugin" + }, + "setup": [ + "echo \"1 1 4\" > /sys/bus/netdevsim/new_device" + ], + "cmdUnderTest": "$TC qdisc add dev $ETH handle 1: root cake_mq dsthost nat", + "expExitCode": "0", + "verifyCmd": "$TC qdisc show dev $ETH", + "matchPattern": "qdisc (cake_mq 1: root|cake 0: parent 1:[1-4]) bandwidth unlimited diffserv3 dsthost nat nowash no-ack-filter split-gso rtt 100ms raw overhead 0 ", + "matchCount": "5", + "teardown": [ + "echo \"1\" > /sys/bus/netdevsim/del_device" + ] + }, + { + "id": "2e4d", + "name": "Create CAKE_MQ with wash flag (4 queues)", + "category": [ + "qdisc", + "cake_mq" + ], + "plugins": { + "requires": "nsPlugin" + }, + "setup": [ + "echo \"1 1 4\" > /sys/bus/netdevsim/new_device" + ], + "cmdUnderTest": "$TC qdisc add dev $ETH handle 1: root cake_mq hosts wash", + "expExitCode": "0", + "verifyCmd": "$TC qdisc show dev $ETH", + "matchPattern": "qdisc (cake_mq 1: root|cake 0: parent 1:[1-4]) bandwidth unlimited diffserv3 hosts nonat wash no-ack-filter split-gso rtt 100ms raw overhead 0 ", + "matchCount": "5", + "teardown": [ + "echo \"1\" > /sys/bus/netdevsim/del_device" + ] + }, + { + "id": "b3e6", + "name": "Create CAKE_MQ with flowblind and no-split-gso flag (4 queues)", + "category": [ + "qdisc", + "cake_mq" + ], + "plugins": { + "requires": "nsPlugin" + }, + "setup": [ + "echo \"1 1 4\" > /sys/bus/netdevsim/new_device" + ], + "cmdUnderTest": "$TC qdisc add dev $ETH handle 1: root cake_mq flowblind no-split-gso", + "expExitCode": "0", + "verifyCmd": "$TC qdisc show dev $ETH", + "matchPattern": "qdisc (cake_mq 1: root|cake 0: parent 1:[1-4]) bandwidth unlimited diffserv3 flowblind nonat nowash no-ack-filter no-split-gso rtt 100ms raw overhead 0 ", + "matchCount": "5", + "teardown": [ + "echo \"1\" > /sys/bus/netdevsim/del_device" + ] + }, + { + "id": "62cd", + "name": "Create CAKE_MQ with dual-srchost and ack-filter flag (4 queues)", + "category": [ + "qdisc", + "cake_mq" + ], + "plugins": { + "requires": "nsPlugin" + }, + "setup": [ + "echo \"1 1 4\" > /sys/bus/netdevsim/new_device" + ], + "cmdUnderTest": "$TC qdisc add dev $ETH handle 1: root cake_mq dual-srchost ack-filter", + "expExitCode": "0", + "verifyCmd": "$TC qdisc show dev $ETH", + "matchPattern": "qdisc (cake_mq 1: root|cake 0: parent 1:[1-4]) bandwidth unlimited diffserv3 dual-srchost nonat nowash ack-filter split-gso rtt 100ms raw overhead 0 ", + "matchCount": "5", + "teardown": [ + "echo \"1\" > /sys/bus/netdevsim/del_device" + ] + }, + { + "id": "0df3", + "name": "Create CAKE_MQ with dual-dsthost and ack-filter-aggressive flag (4 queues)", + "category": [ + "qdisc", + "cake_mq" + ], + "plugins": { + "requires": "nsPlugin" + }, + "setup": [ + "echo \"1 1 4\" > /sys/bus/netdevsim/new_device" + ], + "cmdUnderTest": "$TC qdisc add dev $ETH handle 1: root cake_mq dual-dsthost ack-filter-aggressive", + "expExitCode": "0", + "verifyCmd": "$TC qdisc show dev $ETH", + "matchPattern": "qdisc (cake_mq 1: root|cake 0: parent 1:[1-4]) bandwidth unlimited diffserv3 dual-dsthost nonat nowash ack-filter-aggressive split-gso rtt 100ms raw overhead 0 ", + "matchCount": "5", + "teardown": [ + "echo \"1\" > /sys/bus/netdevsim/del_device" + ] + }, + { + "id": "9a75", + "name": "Create CAKE_MQ with memlimit and ptm flag (4 queues)", + "category": [ + "qdisc", + "cake_mq" + ], + "plugins": { + "requires": "nsPlugin" + }, + "setup": [ + "echo \"1 1 4\" > /sys/bus/netdevsim/new_device" + ], + "cmdUnderTest": "$TC qdisc add dev $ETH handle 1: root cake_mq memlimit 10000 ptm", + "expExitCode": "0", + "verifyCmd": "$TC qdisc show dev $ETH", + "matchPattern": "qdisc (cake_mq 1: root|cake 0: parent 1:[1-4]) bandwidth unlimited diffserv3 triple-isolate nonat nowash no-ack-filter split-gso rtt 100ms raw ptm overhead 0 memlimit 10000b ", + "matchCount": "5", + "teardown": [ + "echo \"1\" > /sys/bus/netdevsim/del_device" + ] + }, + { + "id": "cdef", + "name": "Create CAKE_MQ with fwmark and atm flag (4 queues)", + "category": [ + "qdisc", + "cake_mq" + ], + "plugins": { + "requires": "nsPlugin" + }, + "setup": [ + "echo \"1 1 4\" > /sys/bus/netdevsim/new_device" + ], + "cmdUnderTest": "$TC qdisc add dev $ETH handle 1: root cake_mq fwmark 8 atm", + "expExitCode": "0", + "verifyCmd": "$TC qdisc show dev $ETH", + "matchPattern": "qdisc (cake_mq 1: root|cake 0: parent 1:[1-4]) bandwidth unlimited diffserv3 triple-isolate nonat nowash no-ack-filter split-gso rtt 100ms raw atm overhead 0 fwmark 0x8 ", + "matchCount": "5", + "teardown": [ + "echo \"1\" > /sys/bus/netdevsim/del_device" + ] + }, + { + "id": "93dd", + "name": "Create CAKE_MQ with overhead 0 and mpu (4 queues)", + "category": [ + "qdisc", + "cake_mq" + ], + "plugins": { + "requires": "nsPlugin" + }, + "setup": [ + "echo \"1 1 4\" > /sys/bus/netdevsim/new_device" + ], + "cmdUnderTest": "$TC qdisc add dev $ETH handle 1: root cake_mq overhead 128 mpu 256", + "expExitCode": "0", + "verifyCmd": "$TC qdisc show dev $ETH", + "matchPattern": "qdisc (cake_mq 1: root|cake 0: parent 1:[1-4]) bandwidth unlimited diffserv3 triple-isolate nonat nowash no-ack-filter split-gso rtt 100ms noatm overhead 128 mpu 256 ", + "matchCount": "5", + "teardown": [ + "echo \"1\" > /sys/bus/netdevsim/del_device" + ] + }, + { + "id": "1475", + "name": "Create CAKE_MQ with conservative and ingress flag (4 queues)", + "category": [ + "qdisc", + "cake_mq" + ], + "plugins": { + "requires": "nsPlugin" + }, + "setup": [ + "echo \"1 1 4\" > /sys/bus/netdevsim/new_device" + ], + "cmdUnderTest": "$TC qdisc add dev $ETH handle 1: root cake_mq conservative ingress", + "expExitCode": "0", + "verifyCmd": "$TC qdisc show dev $ETH", + "matchPattern": "qdisc (cake_mq 1: root|cake 0: parent 1:[1-4]) bandwidth unlimited diffserv3 triple-isolate nonat nowash ingress no-ack-filter split-gso rtt 100ms atm overhead 48 ", + "matchCount": "5", + "teardown": [ + "echo \"1\" > /sys/bus/netdevsim/del_device" + ] + }, + { + "id": "7bf1", + "name": "Delete CAKE_MQ with conservative and ingress flag (4 queues)", + "category": [ + "qdisc", + "cake_mq" + ], + "plugins": { + "requires": "nsPlugin" + }, + "setup": [ + "echo \"1 1 4\" > /sys/bus/netdevsim/new_device", + "$TC qdisc add dev $ETH handle 1: root cake_mq conservative ingress" + ], + "cmdUnderTest": "$TC qdisc del dev $ETH handle 1: root", + "expExitCode": "0", + "verifyCmd": "$TC qdisc show dev $ETH", + "matchPattern": "qdisc (cake_mq 1: root|cake 0: parent 1:[1-4]) bandwidth unlimited diffserv3 triple-isolate nonat nowash ingress no-ack-filter split-gso rtt 100ms atm overhead 48 ", + "matchCount": "0", + "teardown": [ + "echo \"1\" > /sys/bus/netdevsim/del_device" + ] + }, + { + "id": "ee55", + "name": "Replace CAKE_MQ with mpu (4 queues)", + "category": [ + "qdisc", + "cake_mq" + ], + "plugins": { + "requires": "nsPlugin" + }, + "setup": [ + "echo \"1 1 4\" > /sys/bus/netdevsim/new_device", + "$TC qdisc add dev $ETH handle 1: root cake_mq overhead 128 mpu 256" + ], + "cmdUnderTest": "$TC qdisc replace dev $ETH handle 1: root cake_mq mpu 128", + "expExitCode": "0", + "verifyCmd": "$TC qdisc show dev $ETH", + "matchPattern": "qdisc (cake_mq 1: root|cake 0: parent 1:[1-4]) bandwidth unlimited diffserv3 triple-isolate nonat nowash no-ack-filter split-gso rtt 100ms noatm overhead 128 mpu 128 ", + "matchCount": "5", + "teardown": [ + "echo \"1\" > /sys/bus/netdevsim/del_device" + ] + }, + { + "id": "6df9", + "name": "Change CAKE_MQ with mpu (4 queues)", + "category": [ + "qdisc", + "cake_mq" + ], + "plugins": { + "requires": "nsPlugin" + }, + "setup": [ + "echo \"1 1 4\" > /sys/bus/netdevsim/new_device", + "$TC qdisc add dev $ETH handle 1: root cake_mq overhead 128 mpu 256" + ], + "cmdUnderTest": "$TC qdisc change dev $ETH handle 1: root cake_mq mpu 128", + "expExitCode": "0", + "verifyCmd": "$TC qdisc show dev $ETH", + "matchPattern": "qdisc (cake_mq 1: root|cake 0: parent 1:[1-4]) bandwidth unlimited diffserv3 triple-isolate nonat nowash no-ack-filter split-gso rtt 100ms noatm overhead 128 mpu 128 ", + "matchCount": "5", + "teardown": [ + "echo \"1\" > /sys/bus/netdevsim/del_device" + ] + }, + { + "id": "67e2", + "name": "Show CAKE_MQ class (4 queues)", + "category": [ + "qdisc", + "cake_mq" + ], + "plugins": { + "requires": "nsPlugin" + }, + "setup": [ + "echo \"1 1 4\" > /sys/bus/netdevsim/new_device" + ], + "cmdUnderTest": "$TC qdisc add dev $ETH handle 1: root cake_mq", + "expExitCode": "0", + "verifyCmd": "$TC class show dev $ETH", + "matchPattern": "class cake_mq", + "matchCount": "4", + "teardown": [ + "echo \"1\" > /sys/bus/netdevsim/del_device" + ] + }, + { + "id": "2de4", + "name": "Change bandwidth of CAKE_MQ (4 queues)", + "category": [ + "qdisc", + "cake_mq" + ], + "plugins": { + "requires": "nsPlugin" + }, + "setup": [ + "echo \"1 1 4\" > /sys/bus/netdevsim/new_device", + "$TC qdisc add dev $ETH handle 1: root cake_mq" + ], + "cmdUnderTest": "$TC qdisc replace dev $ETH handle 1: root cake_mq bandwidth 1000", + "expExitCode": "0", + "verifyCmd": "$TC qdisc show dev $ETH", + "matchPattern": "qdisc (cake_mq 1: root|cake 0: parent 1:[1-4]) bandwidth 1Kbit diffserv3 triple-isolate nonat nowash no-ack-filter split-gso rtt 100ms raw overhead 0 ", + "matchCount": "5", + "teardown": [ + "echo \"1\" > /sys/bus/netdevsim/del_device" + ] + }, + { + "id": "5f62", + "name": "Fail to create CAKE_MQ with autorate-ingress flag (4 queues)", + "category": [ + "qdisc", + "cake_mq" + ], + "plugins": { + "requires": "nsPlugin" + }, + "setup": [ + "echo \"1 1 4\" > /sys/bus/netdevsim/new_device" + ], + "cmdUnderTest": "$TC qdisc add dev $ETH handle 1: root cake_mq autorate-ingress", + "expExitCode": "2", + "verifyCmd": "$TC qdisc show dev $ETH", + "matchPattern": "qdisc (cake_mq 1: root|cake 0: parent 1:[1-4]) bandwidth unlimited autorate-ingress diffserv3 triple-isolate nonat nowash no-ack-filter split-gso rtt 100ms raw overhead 0 ", + "matchCount": "0", + "teardown": [ + "echo \"1\" > /sys/bus/netdevsim/del_device" + ] + }, + { + "id": "038e", + "name": "Fail to change setting of sub-qdisc under CAKE_MQ", + "category": [ + "qdisc", + "cake_mq" + ], + "plugins": { + "requires": "nsPlugin" + }, + "setup": [ + "echo \"1 1 4\" > /sys/bus/netdevsim/new_device", + "$TC qdisc add dev $ETH handle 1: root cake_mq" + ], + "cmdUnderTest": "$TC qdisc add dev $ETH parent 1:1 cake besteffort flows", + "expExitCode": "2", + "verifyCmd": "$TC qdisc show dev $ETH", + "matchPattern": "qdisc (cake_mq 1: root|cake 0: parent 1:[1-4]) bandwidth unlimited diffserv3 triple-isolate nonat nowash no-ack-filter split-gso rtt 100ms raw overhead 0 ", + "matchCount": "5", + "teardown": [ + "echo \"1\" > /sys/bus/netdevsim/del_device" + ] + }, + { + "id": "7bdc", + "name": "Fail to replace sub-qdisc under CAKE_MQ", + "category": [ + "qdisc", + "cake_mq" + ], + "plugins": { + "requires": "nsPlugin" + }, + "setup": [ + "echo \"1 1 4\" > /sys/bus/netdevsim/new_device", + "$TC qdisc add dev $ETH handle 1: root cake_mq" + ], + "cmdUnderTest": "$TC qdisc add dev $ETH parent 1:1 fq", + "expExitCode": "2", + "verifyCmd": "$TC qdisc show dev $ETH", + "matchPattern": "qdisc (cake_mq 1: root|cake 0: parent 1:[1-4]) bandwidth unlimited diffserv3 triple-isolate nonat nowash no-ack-filter split-gso rtt 100ms raw overhead 0 ", + "matchCount": "5", + "teardown": [ + "echo \"1\" > /sys/bus/netdevsim/del_device" + ] + }, + { + "id": "18e0", + "name": "Fail to install CAKE_MQ on single queue device", + "category": [ + "qdisc", + "cake_mq" + ], + "plugins": { + "requires": "nsPlugin" + }, + "setup": [ + "echo \"1 1 1\" > /sys/bus/netdevsim/new_device" + ], + "cmdUnderTest": "$TC qdisc add dev $ETH handle 1: root cake_mq", + "expExitCode": "2", + "verifyCmd": "$TC qdisc show dev $ETH", + "matchPattern": "qdisc (cake_mq 1: root|cake 0: parent 1:[1-4]) bandwidth unlimited diffserv3 triple-isolate nonat nowash no-ack-filter split-gso rtt 100ms raw overhead 0 ", + "matchCount": "0", + "teardown": [ + "echo \"1\" > /sys/bus/netdevsim/del_device" + ] + } +] diff --git a/tools/testing/selftests/vsock/settings b/tools/testing/selftests/vsock/settings index 694d70710ff0..79b65bdf05db 100644 --- a/tools/testing/selftests/vsock/settings +++ b/tools/testing/selftests/vsock/settings @@ -1 +1 @@ -timeout=300 +timeout=1200 diff --git a/tools/testing/selftests/vsock/vmtest.sh b/tools/testing/selftests/vsock/vmtest.sh index c7b270dd77a9..dc8dbe74a6d0 100755 --- a/tools/testing/selftests/vsock/vmtest.sh +++ b/tools/testing/selftests/vsock/vmtest.sh @@ -7,6 +7,7 @@ # * virtme-ng # * busybox-static (used by virtme-ng) # * qemu (used by virtme-ng) +# * socat # # shellcheck disable=SC2317,SC2119 @@ -41,14 +42,119 @@ readonly KERNEL_CMDLINE="\ virtme.ssh virtme_ssh_channel=tcp virtme_ssh_user=$USER \ " readonly LOG=$(mktemp /tmp/vsock_vmtest_XXXX.log) -readonly TEST_NAMES=(vm_server_host_client vm_client_host_server vm_loopback) + +# Namespace tests must use the ns_ prefix. This is checked in check_netns() and +# is used to determine if a test needs namespace setup before test execution. +readonly TEST_NAMES=( + vm_server_host_client + vm_client_host_server + vm_loopback + ns_host_vsock_ns_mode_ok + ns_host_vsock_child_ns_mode_ok + ns_global_same_cid_fails + ns_local_same_cid_ok + ns_global_local_same_cid_ok + ns_local_global_same_cid_ok + ns_diff_global_host_connect_to_global_vm_ok + ns_diff_global_host_connect_to_local_vm_fails + ns_diff_global_vm_connect_to_global_host_ok + ns_diff_global_vm_connect_to_local_host_fails + ns_diff_local_host_connect_to_local_vm_fails + ns_diff_local_vm_connect_to_local_host_fails + ns_diff_global_to_local_loopback_local_fails + ns_diff_local_to_global_loopback_fails + ns_diff_local_to_local_loopback_fails + ns_diff_global_to_global_loopback_ok + ns_same_local_loopback_ok + ns_same_local_host_connect_to_local_vm_ok + ns_same_local_vm_connect_to_local_host_ok + ns_delete_vm_ok + ns_delete_host_ok + ns_delete_both_ok +) readonly TEST_DESCS=( + # vm_server_host_client "Run vsock_test in server mode on the VM and in client mode on the host." + + # vm_client_host_server "Run vsock_test in client mode on the VM and in server mode on the host." + + # vm_loopback "Run vsock_test using the loopback transport in the VM." + + # ns_host_vsock_ns_mode_ok + "Check /proc/sys/net/vsock/ns_mode strings on the host." + + # ns_host_vsock_child_ns_mode_ok + "Check /proc/sys/net/vsock/ns_mode is read-only and child_ns_mode is writable." + + # ns_global_same_cid_fails + "Check QEMU fails to start two VMs with same CID in two different global namespaces." + + # ns_local_same_cid_ok + "Check QEMU successfully starts two VMs with same CID in two different local namespaces." + + # ns_global_local_same_cid_ok + "Check QEMU successfully starts one VM in a global ns and then another VM in a local ns with the same CID." + + # ns_local_global_same_cid_ok + "Check QEMU successfully starts one VM in a local ns and then another VM in a global ns with the same CID." + + # ns_diff_global_host_connect_to_global_vm_ok + "Run vsock_test client in global ns with server in VM in another global ns." + + # ns_diff_global_host_connect_to_local_vm_fails + "Run socat to test a process in a global ns fails to connect to a VM in a local ns." + + # ns_diff_global_vm_connect_to_global_host_ok + "Run vsock_test client in VM in a global ns with server in another global ns." + + # ns_diff_global_vm_connect_to_local_host_fails + "Run socat to test a VM in a global ns fails to connect to a host process in a local ns." + + # ns_diff_local_host_connect_to_local_vm_fails + "Run socat to test a host process in a local ns fails to connect to a VM in another local ns." + + # ns_diff_local_vm_connect_to_local_host_fails + "Run socat to test a VM in a local ns fails to connect to a host process in another local ns." + + # ns_diff_global_to_local_loopback_local_fails + "Run socat to test a loopback vsock in a global ns fails to connect to a vsock in a local ns." + + # ns_diff_local_to_global_loopback_fails + "Run socat to test a loopback vsock in a local ns fails to connect to a vsock in a global ns." + + # ns_diff_local_to_local_loopback_fails + "Run socat to test a loopback vsock in a local ns fails to connect to a vsock in another local ns." + + # ns_diff_global_to_global_loopback_ok + "Run socat to test a loopback vsock in a global ns successfully connects to a vsock in another global ns." + + # ns_same_local_loopback_ok + "Run socat to test a loopback vsock in a local ns successfully connects to a vsock in the same ns." + + # ns_same_local_host_connect_to_local_vm_ok + "Run vsock_test client in a local ns with server in VM in same ns." + + # ns_same_local_vm_connect_to_local_host_ok + "Run vsock_test client in VM in a local ns with server in same ns." + + # ns_delete_vm_ok + "Check that deleting the VM's namespace does not break the socket connection" + + # ns_delete_host_ok + "Check that deleting the host's namespace does not break the socket connection" + + # ns_delete_both_ok + "Check that deleting the VM and host's namespaces does not break the socket connection" ) -readonly USE_SHARED_VM=(vm_server_host_client vm_client_host_server vm_loopback) +readonly USE_SHARED_VM=( + vm_server_host_client + vm_client_host_server + vm_loopback +) +readonly NS_MODES=("local" "global") VERBOSE=0 @@ -71,7 +177,7 @@ usage() { for ((i = 0; i < ${#TEST_NAMES[@]}; i++)); do name=${TEST_NAMES[${i}]} desc=${TEST_DESCS[${i}]} - printf "\t%-35s%-35s\n" "${name}" "${desc}" + printf "\t%-55s%-35s\n" "${name}" "${desc}" done echo @@ -103,13 +209,55 @@ check_result() { fi } +add_namespaces() { + local orig_mode + orig_mode=$(cat /proc/sys/net/vsock/child_ns_mode) + + for mode in "${NS_MODES[@]}"; do + echo "${mode}" > /proc/sys/net/vsock/child_ns_mode + ip netns add "${mode}0" 2>/dev/null + ip netns add "${mode}1" 2>/dev/null + done + + echo "${orig_mode}" > /proc/sys/net/vsock/child_ns_mode +} + +init_namespaces() { + for mode in "${NS_MODES[@]}"; do + # we need lo for qemu port forwarding + ip netns exec "${mode}0" ip link set dev lo up + ip netns exec "${mode}1" ip link set dev lo up + done +} + +del_namespaces() { + for mode in "${NS_MODES[@]}"; do + ip netns del "${mode}0" &>/dev/null + ip netns del "${mode}1" &>/dev/null + log_host "removed ns ${mode}0" + log_host "removed ns ${mode}1" + done +} + vm_ssh() { - ssh -q -o UserKnownHostsFile=/dev/null -p ${SSH_HOST_PORT} localhost "$@" + local ns_exec + + if [[ "${1}" == init_ns ]]; then + ns_exec="" + else + ns_exec="ip netns exec ${1}" + fi + + shift + + ${ns_exec} ssh -q -o UserKnownHostsFile=/dev/null -p "${SSH_HOST_PORT}" localhost "$@" + return $? } cleanup() { terminate_pidfiles "${!PIDFILES[@]}" + del_namespaces } check_args() { @@ -139,7 +287,7 @@ check_args() { } check_deps() { - for dep in vng ${QEMU} busybox pkill ssh; do + for dep in vng ${QEMU} busybox pkill ssh ss socat; do if [[ ! -x $(command -v "${dep}") ]]; then echo -e "skip: dependency ${dep} not found!\n" exit "${KSFT_SKIP}" @@ -153,6 +301,20 @@ check_deps() { fi } +check_netns() { + local tname=$1 + + # If the test requires NS support, check if NS support exists + # using /proc/self/ns + if [[ "${tname}" =~ ^ns_ ]] && + [[ ! -e /proc/self/ns ]]; then + log_host "No NS support detected for test ${tname}" + return 1 + fi + + return 0 +} + check_vng() { local tested_versions local version @@ -176,6 +338,20 @@ check_vng() { fi } +check_socat() { + local support_string + + support_string="$(socat -V)" + + if [[ "${support_string}" != *"WITH_VSOCK 1"* ]]; then + die "err: socat is missing vsock support" + fi + + if [[ "${support_string}" != *"WITH_UNIX 1"* ]]; then + die "err: socat is missing unix support" + fi +} + handle_build() { if [[ ! "${BUILD}" -eq 1 ]]; then return @@ -224,12 +400,22 @@ terminate_pidfiles() { done } +terminate_pids() { + local pid + + for pid in "$@"; do + kill -SIGTERM "${pid}" &>/dev/null || : + done +} + vm_start() { local pidfile=$1 + local ns=$2 local logfile=/dev/null local verbose_opt="" local kernel_opt="" local qemu_opts="" + local ns_exec="" local qemu qemu=$(command -v "${QEMU}") @@ -250,7 +436,11 @@ vm_start() { kernel_opt="${KERNEL_CHECKOUT}" fi - vng \ + if [[ "${ns}" != "init_ns" ]]; then + ns_exec="ip netns exec ${ns}" + fi + + ${ns_exec} vng \ --run \ ${kernel_opt} \ ${verbose_opt} \ @@ -265,6 +455,7 @@ vm_start() { } vm_wait_for_ssh() { + local ns=$1 local i i=0 @@ -272,7 +463,8 @@ vm_wait_for_ssh() { if [[ ${i} -gt ${WAIT_PERIOD_MAX} ]]; then die "Timed out waiting for guest ssh" fi - if vm_ssh -- true; then + + if vm_ssh "${ns}" -- true; then break fi i=$(( i + 1 )) @@ -286,50 +478,107 @@ wait_for_listener() local port=$1 local interval=$2 local max_intervals=$3 - local protocol=tcp - local pattern + local protocol=$4 local i - pattern=":$(printf "%04X" "${port}") " - - # for tcp protocol additionally check the socket state - [ "${protocol}" = "tcp" ] && pattern="${pattern}0A" - for i in $(seq "${max_intervals}"); do - if awk -v pattern="${pattern}" \ - 'BEGIN {rc=1} $2" "$4 ~ pattern {rc=0} END {exit rc}' \ - /proc/net/"${protocol}"*; then + case "${protocol}" in + tcp) + if ss --listening --tcp --numeric | grep -q ":${port} "; then + break + fi + ;; + vsock) + if ss --listening --vsock --numeric | grep -q ":${port} "; then + break + fi + ;; + unix) + # For unix sockets, port is actually the socket path + if ss --listening --unix | grep -q "${port}"; then + break + fi + ;; + *) + echo "Unknown protocol: ${protocol}" >&2 break - fi + ;; + esac sleep "${interval}" done } vm_wait_for_listener() { - local port=$1 + local ns=$1 + local port=$2 + local protocol=$3 - vm_ssh <<EOF + vm_ssh "${ns}" <<EOF $(declare -f wait_for_listener) -wait_for_listener ${port} ${WAIT_PERIOD} ${WAIT_PERIOD_MAX} +wait_for_listener ${port} ${WAIT_PERIOD} ${WAIT_PERIOD_MAX} ${protocol} EOF } host_wait_for_listener() { - local port=$1 + local ns=$1 + local port=$2 + local protocol=$3 + + if [[ "${ns}" == "init_ns" ]]; then + wait_for_listener "${port}" "${WAIT_PERIOD}" "${WAIT_PERIOD_MAX}" "${protocol}" + else + ip netns exec "${ns}" bash <<-EOF + $(declare -f wait_for_listener) + wait_for_listener ${port} ${WAIT_PERIOD} ${WAIT_PERIOD_MAX} ${protocol} + EOF + fi +} + +vm_dmesg_oops_count() { + local ns=$1 + + vm_ssh "${ns}" -- dmesg 2>/dev/null | grep -c -i 'Oops' +} + +vm_dmesg_warn_count() { + local ns=$1 + + vm_ssh "${ns}" -- dmesg --level=warn 2>/dev/null | grep -c -i 'vsock' +} + +vm_dmesg_check() { + local pidfile=$1 + local ns=$2 + local oops_before=$3 + local warn_before=$4 + local oops_after warn_after + + oops_after=$(vm_dmesg_oops_count "${ns}") + if [[ "${oops_after}" -gt "${oops_before}" ]]; then + echo "FAIL: kernel oops detected on vm in ns ${ns}" | log_host + return 1 + fi + + warn_after=$(vm_dmesg_warn_count "${ns}") + if [[ "${warn_after}" -gt "${warn_before}" ]]; then + echo "FAIL: kernel warning detected on vm in ns ${ns}" | log_host + return 1 + fi - wait_for_listener "${port}" "${WAIT_PERIOD}" "${WAIT_PERIOD_MAX}" + return 0 } vm_vsock_test() { - local host=$1 - local cid=$2 - local port=$3 + local ns=$1 + local host=$2 + local cid=$3 + local port=$4 local rc # log output and use pipefail to respect vsock_test errors set -o pipefail if [[ "${host}" != server ]]; then - vm_ssh -- "${VSOCK_TEST}" \ + vm_ssh "${ns}" -- "${VSOCK_TEST}" \ --mode=client \ --control-host="${host}" \ --peer-cid="${cid}" \ @@ -337,7 +586,7 @@ vm_vsock_test() { 2>&1 | log_guest rc=$? else - vm_ssh -- "${VSOCK_TEST}" \ + vm_ssh "${ns}" -- "${VSOCK_TEST}" \ --mode=server \ --peer-cid="${cid}" \ --control-port="${port}" \ @@ -349,7 +598,7 @@ vm_vsock_test() { return $rc fi - vm_wait_for_listener "${port}" + vm_wait_for_listener "${ns}" "${port}" "tcp" rc=$? fi set +o pipefail @@ -358,25 +607,35 @@ vm_vsock_test() { } host_vsock_test() { - local host=$1 - local cid=$2 - local port=$3 + local ns=$1 + local host=$2 + local cid=$3 + local port=$4 + shift 4 + local extra_args=("$@") local rc + local cmd="${VSOCK_TEST}" + if [[ "${ns}" != "init_ns" ]]; then + cmd="ip netns exec ${ns} ${cmd}" + fi + # log output and use pipefail to respect vsock_test errors set -o pipefail if [[ "${host}" != server ]]; then - ${VSOCK_TEST} \ + ${cmd} \ --mode=client \ --peer-cid="${cid}" \ --control-host="${host}" \ - --control-port="${port}" 2>&1 | log_host + --control-port="${port}" \ + "${extra_args[@]}" 2>&1 | log_host rc=$? else - ${VSOCK_TEST} \ + ${cmd} \ --mode=server \ --peer-cid="${cid}" \ - --control-port="${port}" 2>&1 | log_host & + --control-port="${port}" \ + "${extra_args[@]}" 2>&1 | log_host & rc=$? if [[ $rc -ne 0 ]]; then @@ -384,7 +643,7 @@ host_vsock_test() { return $rc fi - host_wait_for_listener "${port}" + host_wait_for_listener "${ns}" "${port}" "tcp" rc=$? fi set +o pipefail @@ -427,12 +686,584 @@ log_guest() { LOG_PREFIX=guest log "$@" } +ns_get_mode() { + local ns=$1 + + ip netns exec "${ns}" cat /proc/sys/net/vsock/ns_mode 2>/dev/null +} + +test_ns_host_vsock_ns_mode_ok() { + for mode in "${NS_MODES[@]}"; do + local actual + + actual=$(ns_get_mode "${mode}0") + if [[ "${actual}" != "${mode}" ]]; then + log_host "expected mode ${mode}, got ${actual}" + return "${KSFT_FAIL}" + fi + done + + return "${KSFT_PASS}" +} + +test_ns_diff_global_host_connect_to_global_vm_ok() { + local oops_before warn_before + local pids pid pidfile + local ns0 ns1 port + declare -a pids + local unixfile + ns0="global0" + ns1="global1" + port=1234 + local rc + + init_namespaces + + pidfile="$(create_pidfile)" + + if ! vm_start "${pidfile}" "${ns0}"; then + return "${KSFT_FAIL}" + fi + + vm_wait_for_ssh "${ns0}" + oops_before=$(vm_dmesg_oops_count "${ns0}") + warn_before=$(vm_dmesg_warn_count "${ns0}") + + unixfile=$(mktemp -u /tmp/XXXX.sock) + ip netns exec "${ns1}" \ + socat TCP-LISTEN:"${TEST_HOST_PORT}",fork \ + UNIX-CONNECT:"${unixfile}" & + pids+=($!) + host_wait_for_listener "${ns1}" "${TEST_HOST_PORT}" "tcp" + + ip netns exec "${ns0}" socat UNIX-LISTEN:"${unixfile}",fork \ + TCP-CONNECT:localhost:"${TEST_HOST_PORT}" & + pids+=($!) + host_wait_for_listener "${ns0}" "${unixfile}" "unix" + + vm_vsock_test "${ns0}" "server" 2 "${TEST_GUEST_PORT}" + vm_wait_for_listener "${ns0}" "${TEST_GUEST_PORT}" "tcp" + host_vsock_test "${ns1}" "127.0.0.1" "${VSOCK_CID}" "${TEST_HOST_PORT}" + rc=$? + + vm_dmesg_check "${pidfile}" "${ns0}" "${oops_before}" "${warn_before}" + dmesg_rc=$? + + terminate_pids "${pids[@]}" + terminate_pidfiles "${pidfile}" + + if [[ "${rc}" -ne 0 ]] || [[ "${dmesg_rc}" -ne 0 ]]; then + return "${KSFT_FAIL}" + fi + + return "${KSFT_PASS}" +} + +test_ns_diff_global_host_connect_to_local_vm_fails() { + local oops_before warn_before + local ns0="global0" + local ns1="local0" + local port=12345 + local dmesg_rc + local pidfile + local result + local pid + + init_namespaces + + outfile=$(mktemp) + + pidfile="$(create_pidfile)" + if ! vm_start "${pidfile}" "${ns1}"; then + log_host "failed to start vm (cid=${VSOCK_CID}, ns=${ns0})" + return "${KSFT_FAIL}" + fi + + vm_wait_for_ssh "${ns1}" + oops_before=$(vm_dmesg_oops_count "${ns1}") + warn_before=$(vm_dmesg_warn_count "${ns1}") + + vm_ssh "${ns1}" -- socat VSOCK-LISTEN:"${port}" STDOUT > "${outfile}" & + vm_wait_for_listener "${ns1}" "${port}" "vsock" + echo TEST | ip netns exec "${ns0}" \ + socat STDIN VSOCK-CONNECT:"${VSOCK_CID}":"${port}" 2>/dev/null + + vm_dmesg_check "${pidfile}" "${ns1}" "${oops_before}" "${warn_before}" + dmesg_rc=$? + + terminate_pidfiles "${pidfile}" + result=$(cat "${outfile}") + rm -f "${outfile}" + + if [[ "${result}" == "TEST" ]] || [[ "${dmesg_rc}" -ne 0 ]]; then + return "${KSFT_FAIL}" + fi + + return "${KSFT_PASS}" +} + +test_ns_diff_global_vm_connect_to_global_host_ok() { + local oops_before warn_before + local ns0="global0" + local ns1="global1" + local port=12345 + local unixfile + local dmesg_rc + local pidfile + local pids + local rc + + init_namespaces + + declare -a pids + + log_host "Setup socat bridge from ns ${ns0} to ns ${ns1} over port ${port}" + + unixfile=$(mktemp -u /tmp/XXXX.sock) + + ip netns exec "${ns0}" \ + socat TCP-LISTEN:"${port}" UNIX-CONNECT:"${unixfile}" & + pids+=($!) + host_wait_for_listener "${ns0}" "${port}" "tcp" + + ip netns exec "${ns1}" \ + socat UNIX-LISTEN:"${unixfile}" TCP-CONNECT:127.0.0.1:"${port}" & + pids+=($!) + host_wait_for_listener "${ns1}" "${unixfile}" "unix" + + log_host "Launching ${VSOCK_TEST} in ns ${ns1}" + host_vsock_test "${ns1}" "server" "${VSOCK_CID}" "${port}" + + pidfile="$(create_pidfile)" + if ! vm_start "${pidfile}" "${ns0}"; then + log_host "failed to start vm (cid=${cid}, ns=${ns0})" + terminate_pids "${pids[@]}" + rm -f "${unixfile}" + return "${KSFT_FAIL}" + fi + + vm_wait_for_ssh "${ns0}" + + oops_before=$(vm_dmesg_oops_count "${ns0}") + warn_before=$(vm_dmesg_warn_count "${ns0}") + + vm_vsock_test "${ns0}" "10.0.2.2" 2 "${port}" + rc=$? + + vm_dmesg_check "${pidfile}" "${ns0}" "${oops_before}" "${warn_before}" + dmesg_rc=$? + + terminate_pidfiles "${pidfile}" + terminate_pids "${pids[@]}" + rm -f "${unixfile}" + + if [[ "${rc}" -ne 0 ]] || [[ "${dmesg_rc}" -ne 0 ]]; then + return "${KSFT_FAIL}" + fi + + return "${KSFT_PASS}" + +} + +test_ns_diff_global_vm_connect_to_local_host_fails() { + local ns0="global0" + local ns1="local0" + local port=12345 + local oops_before warn_before + local dmesg_rc + local pidfile + local result + local pid + + init_namespaces + + log_host "Launching socat in ns ${ns1}" + outfile=$(mktemp) + + ip netns exec "${ns1}" socat VSOCK-LISTEN:"${port}" STDOUT &> "${outfile}" & + pid=$! + host_wait_for_listener "${ns1}" "${port}" "vsock" + + pidfile="$(create_pidfile)" + if ! vm_start "${pidfile}" "${ns0}"; then + log_host "failed to start vm (cid=${cid}, ns=${ns0})" + terminate_pids "${pid}" + rm -f "${outfile}" + return "${KSFT_FAIL}" + fi + + vm_wait_for_ssh "${ns0}" + + oops_before=$(vm_dmesg_oops_count "${ns0}") + warn_before=$(vm_dmesg_warn_count "${ns0}") + + vm_ssh "${ns0}" -- \ + bash -c "echo TEST | socat STDIN VSOCK-CONNECT:2:${port}" 2>&1 | log_guest + + vm_dmesg_check "${pidfile}" "${ns0}" "${oops_before}" "${warn_before}" + dmesg_rc=$? + + terminate_pidfiles "${pidfile}" + terminate_pids "${pid}" + + result=$(cat "${outfile}") + rm -f "${outfile}" + + if [[ "${result}" != TEST ]] && [[ "${dmesg_rc}" -eq 0 ]]; then + return "${KSFT_PASS}" + fi + + return "${KSFT_FAIL}" +} + +test_ns_diff_local_host_connect_to_local_vm_fails() { + local ns0="local0" + local ns1="local1" + local port=12345 + local oops_before warn_before + local dmesg_rc + local pidfile + local result + local pid + + init_namespaces + + outfile=$(mktemp) + + pidfile="$(create_pidfile)" + if ! vm_start "${pidfile}" "${ns1}"; then + log_host "failed to start vm (cid=${cid}, ns=${ns0})" + return "${KSFT_FAIL}" + fi + + vm_wait_for_ssh "${ns1}" + oops_before=$(vm_dmesg_oops_count "${ns1}") + warn_before=$(vm_dmesg_warn_count "${ns1}") + + vm_ssh "${ns1}" -- socat VSOCK-LISTEN:"${port}" STDOUT > "${outfile}" & + vm_wait_for_listener "${ns1}" "${port}" "vsock" + + echo TEST | ip netns exec "${ns0}" \ + socat STDIN VSOCK-CONNECT:"${VSOCK_CID}":"${port}" 2>/dev/null + + vm_dmesg_check "${pidfile}" "${ns1}" "${oops_before}" "${warn_before}" + dmesg_rc=$? + + terminate_pidfiles "${pidfile}" + + result=$(cat "${outfile}") + rm -f "${outfile}" + + if [[ "${result}" != TEST ]] && [[ "${dmesg_rc}" -eq 0 ]]; then + return "${KSFT_PASS}" + fi + + return "${KSFT_FAIL}" +} + +test_ns_diff_local_vm_connect_to_local_host_fails() { + local oops_before warn_before + local ns0="local0" + local ns1="local1" + local port=12345 + local dmesg_rc + local pidfile + local result + local pid + + init_namespaces + + log_host "Launching socat in ns ${ns1}" + outfile=$(mktemp) + ip netns exec "${ns1}" socat VSOCK-LISTEN:"${port}" STDOUT &> "${outfile}" & + pid=$! + host_wait_for_listener "${ns1}" "${port}" "vsock" + + pidfile="$(create_pidfile)" + if ! vm_start "${pidfile}" "${ns0}"; then + log_host "failed to start vm (cid=${cid}, ns=${ns0})" + rm -f "${outfile}" + return "${KSFT_FAIL}" + fi + + vm_wait_for_ssh "${ns0}" + oops_before=$(vm_dmesg_oops_count "${ns0}") + warn_before=$(vm_dmesg_warn_count "${ns0}") + + vm_ssh "${ns0}" -- \ + bash -c "echo TEST | socat STDIN VSOCK-CONNECT:2:${port}" 2>&1 | log_guest + + vm_dmesg_check "${pidfile}" "${ns0}" "${oops_before}" "${warn_before}" + dmesg_rc=$? + + terminate_pidfiles "${pidfile}" + terminate_pids "${pid}" + + result=$(cat "${outfile}") + rm -f "${outfile}" + + if [[ "${result}" != TEST ]] && [[ "${dmesg_rc}" -eq 0 ]]; then + return "${KSFT_PASS}" + fi + + return "${KSFT_FAIL}" +} + +__test_loopback_two_netns() { + local ns0=$1 + local ns1=$2 + local port=12345 + local result + local pid + + modprobe vsock_loopback &> /dev/null || : + + log_host "Launching socat in ns ${ns1}" + outfile=$(mktemp) + + ip netns exec "${ns1}" socat VSOCK-LISTEN:"${port}" STDOUT > "${outfile}" 2>/dev/null & + pid=$! + host_wait_for_listener "${ns1}" "${port}" "vsock" + + log_host "Launching socat in ns ${ns0}" + echo TEST | ip netns exec "${ns0}" socat STDIN VSOCK-CONNECT:1:"${port}" 2>/dev/null + terminate_pids "${pid}" + + result=$(cat "${outfile}") + rm -f "${outfile}" + + if [[ "${result}" == TEST ]]; then + return 0 + fi + + return 1 +} + +test_ns_diff_global_to_local_loopback_local_fails() { + init_namespaces + + if ! __test_loopback_two_netns "global0" "local0"; then + return "${KSFT_PASS}" + fi + + return "${KSFT_FAIL}" +} + +test_ns_diff_local_to_global_loopback_fails() { + init_namespaces + + if ! __test_loopback_two_netns "local0" "global0"; then + return "${KSFT_PASS}" + fi + + return "${KSFT_FAIL}" +} + +test_ns_diff_local_to_local_loopback_fails() { + init_namespaces + + if ! __test_loopback_two_netns "local0" "local1"; then + return "${KSFT_PASS}" + fi + + return "${KSFT_FAIL}" +} + +test_ns_diff_global_to_global_loopback_ok() { + init_namespaces + + if __test_loopback_two_netns "global0" "global1"; then + return "${KSFT_PASS}" + fi + + return "${KSFT_FAIL}" +} + +test_ns_same_local_loopback_ok() { + init_namespaces + + if __test_loopback_two_netns "local0" "local0"; then + return "${KSFT_PASS}" + fi + + return "${KSFT_FAIL}" +} + +test_ns_same_local_host_connect_to_local_vm_ok() { + local oops_before warn_before + local ns="local0" + local port=1234 + local dmesg_rc + local pidfile + local rc + + init_namespaces + + pidfile="$(create_pidfile)" + + if ! vm_start "${pidfile}" "${ns}"; then + return "${KSFT_FAIL}" + fi + + vm_wait_for_ssh "${ns}" + oops_before=$(vm_dmesg_oops_count "${ns}") + warn_before=$(vm_dmesg_warn_count "${ns}") + + vm_vsock_test "${ns}" "server" 2 "${TEST_GUEST_PORT}" + + # Skip test 29 (transport release use-after-free): This test attempts + # binding both G2H and H2G CIDs. Because virtio-vsock (G2H) doesn't + # support local namespaces the test will fail when + # transport_g2h->stream_allow() returns false. This edge case only + # happens for vsock_test in client mode on the host in a local + # namespace. This is a false positive. + host_vsock_test "${ns}" "127.0.0.1" "${VSOCK_CID}" "${TEST_HOST_PORT}" --skip=29 + rc=$? + + vm_dmesg_check "${pidfile}" "${ns}" "${oops_before}" "${warn_before}" + dmesg_rc=$? + + terminate_pidfiles "${pidfile}" + + if [[ "${rc}" -ne 0 ]] || [[ "${dmesg_rc}" -ne 0 ]]; then + return "${KSFT_FAIL}" + fi + + return "${KSFT_PASS}" +} + +test_ns_same_local_vm_connect_to_local_host_ok() { + local oops_before warn_before + local ns="local0" + local port=1234 + local dmesg_rc + local pidfile + local rc + + init_namespaces + + pidfile="$(create_pidfile)" + + if ! vm_start "${pidfile}" "${ns}"; then + return "${KSFT_FAIL}" + fi + + vm_wait_for_ssh "${ns}" + oops_before=$(vm_dmesg_oops_count "${ns}") + warn_before=$(vm_dmesg_warn_count "${ns}") + + host_vsock_test "${ns}" "server" "${VSOCK_CID}" "${port}" + vm_vsock_test "${ns}" "10.0.2.2" 2 "${port}" + rc=$? + + vm_dmesg_check "${pidfile}" "${ns}" "${oops_before}" "${warn_before}" + dmesg_rc=$? + + terminate_pidfiles "${pidfile}" + + if [[ "${rc}" -ne 0 ]] || [[ "${dmesg_rc}" -ne 0 ]]; then + return "${KSFT_FAIL}" + fi + + return "${KSFT_PASS}" +} + +namespaces_can_boot_same_cid() { + local ns0=$1 + local ns1=$2 + local pidfile1 pidfile2 + local rc + + pidfile1="$(create_pidfile)" + + # The first VM should be able to start. If it can't then we have + # problems and need to return non-zero. + if ! vm_start "${pidfile1}" "${ns0}"; then + return 1 + fi + + pidfile2="$(create_pidfile)" + vm_start "${pidfile2}" "${ns1}" + rc=$? + terminate_pidfiles "${pidfile1}" "${pidfile2}" + + return "${rc}" +} + +test_ns_global_same_cid_fails() { + init_namespaces + + if namespaces_can_boot_same_cid "global0" "global1"; then + return "${KSFT_FAIL}" + fi + + return "${KSFT_PASS}" +} + +test_ns_local_global_same_cid_ok() { + init_namespaces + + if namespaces_can_boot_same_cid "local0" "global0"; then + return "${KSFT_PASS}" + fi + + return "${KSFT_FAIL}" +} + +test_ns_global_local_same_cid_ok() { + init_namespaces + + if namespaces_can_boot_same_cid "global0" "local0"; then + return "${KSFT_PASS}" + fi + + return "${KSFT_FAIL}" +} + +test_ns_local_same_cid_ok() { + init_namespaces + + if namespaces_can_boot_same_cid "local0" "local1"; then + return "${KSFT_PASS}" + fi + + return "${KSFT_FAIL}" +} + +test_ns_host_vsock_child_ns_mode_ok() { + local orig_mode + local rc + + orig_mode=$(cat /proc/sys/net/vsock/child_ns_mode) + + rc="${KSFT_PASS}" + for mode in "${NS_MODES[@]}"; do + local ns="${mode}0" + + if echo "${mode}" 2>/dev/null > /proc/sys/net/vsock/ns_mode; then + log_host "ns_mode should be read-only but write succeeded" + rc="${KSFT_FAIL}" + continue + fi + + if ! echo "${mode}" > /proc/sys/net/vsock/child_ns_mode; then + log_host "child_ns_mode should be writable to ${mode}" + rc="${KSFT_FAIL}" + continue + fi + done + + echo "${orig_mode}" > /proc/sys/net/vsock/child_ns_mode + + return "${rc}" +} + test_vm_server_host_client() { - if ! vm_vsock_test "server" 2 "${TEST_GUEST_PORT}"; then + if ! vm_vsock_test "init_ns" "server" 2 "${TEST_GUEST_PORT}"; then return "${KSFT_FAIL}" fi - if ! host_vsock_test "127.0.0.1" "${VSOCK_CID}" "${TEST_HOST_PORT}"; then + if ! host_vsock_test "init_ns" "127.0.0.1" "${VSOCK_CID}" "${TEST_HOST_PORT}"; then return "${KSFT_FAIL}" fi @@ -440,11 +1271,11 @@ test_vm_server_host_client() { } test_vm_client_host_server() { - if ! host_vsock_test "server" "${VSOCK_CID}" "${TEST_HOST_PORT_LISTENER}"; then + if ! host_vsock_test "init_ns" "server" "${VSOCK_CID}" "${TEST_HOST_PORT_LISTENER}"; then return "${KSFT_FAIL}" fi - if ! vm_vsock_test "10.0.2.2" 2 "${TEST_HOST_PORT_LISTENER}"; then + if ! vm_vsock_test "init_ns" "10.0.2.2" 2 "${TEST_HOST_PORT_LISTENER}"; then return "${KSFT_FAIL}" fi @@ -454,19 +1285,92 @@ test_vm_client_host_server() { test_vm_loopback() { local port=60000 # non-forwarded local port - vm_ssh -- modprobe vsock_loopback &> /dev/null || : + vm_ssh "init_ns" -- modprobe vsock_loopback &> /dev/null || : - if ! vm_vsock_test "server" 1 "${port}"; then + if ! vm_vsock_test "init_ns" "server" 1 "${port}"; then return "${KSFT_FAIL}" fi - if ! vm_vsock_test "127.0.0.1" 1 "${port}"; then + + if ! vm_vsock_test "init_ns" "127.0.0.1" 1 "${port}"; then return "${KSFT_FAIL}" fi return "${KSFT_PASS}" } +check_ns_delete_doesnt_break_connection() { + local pipefile pidfile outfile + local ns0="global0" + local ns1="global1" + local port=12345 + local pids=() + local rc=0 + + init_namespaces + + pidfile="$(create_pidfile)" + if ! vm_start "${pidfile}" "${ns0}"; then + return "${KSFT_FAIL}" + fi + vm_wait_for_ssh "${ns0}" + + outfile=$(mktemp) + vm_ssh "${ns0}" -- \ + socat VSOCK-LISTEN:"${port}",fork STDOUT > "${outfile}" 2>/dev/null & + pids+=($!) + vm_wait_for_listener "${ns0}" "${port}" "vsock" + + # We use a pipe here so that we can echo into the pipe instead of using + # socat and a unix socket file. We just need a name for the pipe (not a + # regular file) so use -u. + pipefile=$(mktemp -u /tmp/vmtest_pipe_XXXX) + ip netns exec "${ns1}" \ + socat PIPE:"${pipefile}" VSOCK-CONNECT:"${VSOCK_CID}":"${port}" & + pids+=($!) + + timeout "${WAIT_PERIOD}" \ + bash -c 'while [[ ! -e '"${pipefile}"' ]]; do sleep 1; done; exit 0' + + if [[ "$1" == "vm" ]]; then + ip netns del "${ns0}" + elif [[ "$1" == "host" ]]; then + ip netns del "${ns1}" + elif [[ "$1" == "both" ]]; then + ip netns del "${ns0}" + ip netns del "${ns1}" + fi + + echo "TEST" > "${pipefile}" + + timeout "${WAIT_PERIOD}" \ + bash -c 'while [[ ! -s '"${outfile}"' ]]; do sleep 1; done; exit 0' + + if grep -q "TEST" "${outfile}"; then + rc="${KSFT_PASS}" + else + rc="${KSFT_FAIL}" + fi + + terminate_pidfiles "${pidfile}" + terminate_pids "${pids[@]}" + rm -f "${outfile}" "${pipefile}" + + return "${rc}" +} + +test_ns_delete_vm_ok() { + check_ns_delete_doesnt_break_connection "vm" +} + +test_ns_delete_host_ok() { + check_ns_delete_doesnt_break_connection "host" +} + +test_ns_delete_both_ok() { + check_ns_delete_doesnt_break_connection "both" +} + shared_vm_test() { local tname @@ -499,6 +1403,11 @@ run_shared_vm_tests() { continue fi + if ! check_netns "${arg}"; then + check_result "${KSFT_SKIP}" "${arg}" + continue + fi + run_shared_vm_test "${arg}" check_result "$?" "${arg}" done @@ -518,8 +1427,8 @@ run_shared_vm_test() { host_oops_cnt_before=$(dmesg | grep -c -i 'Oops') host_warn_cnt_before=$(dmesg --level=warn | grep -c -i 'vsock') - vm_oops_cnt_before=$(vm_ssh -- dmesg | grep -c -i 'Oops') - vm_warn_cnt_before=$(vm_ssh -- dmesg --level=warn | grep -c -i 'vsock') + vm_oops_cnt_before=$(vm_dmesg_oops_count "init_ns") + vm_warn_cnt_before=$(vm_dmesg_warn_count "init_ns") name=$(echo "${1}" | awk '{ print $1 }') eval test_"${name}" @@ -537,13 +1446,13 @@ run_shared_vm_test() { rc=$KSFT_FAIL fi - vm_oops_cnt_after=$(vm_ssh -- dmesg | grep -i 'Oops' | wc -l) + vm_oops_cnt_after=$(vm_dmesg_oops_count "init_ns") if [[ ${vm_oops_cnt_after} -gt ${vm_oops_cnt_before} ]]; then echo "FAIL: kernel oops detected on vm" | log_host rc=$KSFT_FAIL fi - vm_warn_cnt_after=$(vm_ssh -- dmesg --level=warn | grep -c -i 'vsock') + vm_warn_cnt_after=$(vm_dmesg_warn_count "init_ns") if [[ ${vm_warn_cnt_after} -gt ${vm_warn_cnt_before} ]]; then echo "FAIL: kernel warning detected on vm" | log_host rc=$KSFT_FAIL @@ -552,6 +1461,49 @@ run_shared_vm_test() { return "${rc}" } +run_ns_tests() { + for arg in "${ARGS[@]}"; do + if shared_vm_test "${arg}"; then + continue + fi + + if ! check_netns "${arg}"; then + check_result "${KSFT_SKIP}" "${arg}" + continue + fi + + add_namespaces + + name=$(echo "${arg}" | awk '{ print $1 }') + log_host "Executing test_${name}" + + host_oops_before=$(dmesg 2>/dev/null | grep -c -i 'Oops') + host_warn_before=$(dmesg --level=warn 2>/dev/null | grep -c -i 'vsock') + eval test_"${name}" + rc=$? + + host_oops_after=$(dmesg 2>/dev/null | grep -c -i 'Oops') + if [[ "${host_oops_after}" -gt "${host_oops_before}" ]]; then + echo "FAIL: kernel oops detected on host" | log_host + check_result "${KSFT_FAIL}" "${name}" + del_namespaces + continue + fi + + host_warn_after=$(dmesg --level=warn 2>/dev/null | grep -c -i 'vsock') + if [[ "${host_warn_after}" -gt "${host_warn_before}" ]]; then + echo "FAIL: kernel warning detected on host" | log_host + check_result "${KSFT_FAIL}" "${name}" + del_namespaces + continue + fi + + check_result "${rc}" "${name}" + + del_namespaces + done +} + BUILD=0 QEMU="qemu-system-$(uname -m)" @@ -577,6 +1529,7 @@ fi check_args "${ARGS[@]}" check_deps check_vng +check_socat handle_build echo "1..${#ARGS[@]}" @@ -589,14 +1542,16 @@ cnt_total=0 if shared_vm_tests_requested "${ARGS[@]}"; then log_host "Booting up VM" pidfile="$(create_pidfile)" - vm_start "${pidfile}" - vm_wait_for_ssh + vm_start "${pidfile}" "init_ns" + vm_wait_for_ssh "init_ns" log_host "VM booted up" run_shared_vm_tests "${ARGS[@]}" terminate_pidfiles "${pidfile}" fi +run_ns_tests "${ARGS[@]}" + echo "SUMMARY: PASS=${cnt_pass} SKIP=${cnt_skip} FAIL=${cnt_fail}" echo "Log: ${LOG}" |
