summaryrefslogtreecommitdiff
path: root/tools/lib/python
diff options
context:
space:
mode:
Diffstat (limited to 'tools/lib/python')
-rwxr-xr-xtools/lib/python/jobserver.py4
-rw-r--r--tools/lib/python/kdoc/c_lex.py662
-rw-r--r--tools/lib/python/kdoc/kdoc_files.py151
-rw-r--r--tools/lib/python/kdoc/kdoc_item.py45
-rw-r--r--tools/lib/python/kdoc/kdoc_output.py324
-rw-r--r--tools/lib/python/kdoc/kdoc_parser.py292
-rw-r--r--tools/lib/python/kdoc/kdoc_re.py205
-rw-r--r--tools/lib/python/kdoc/kdoc_yaml_file.py178
-rw-r--r--tools/lib/python/kdoc/xforms_lists.py153
-rwxr-xr-xtools/lib/python/unittest_helper.py363
10 files changed, 1947 insertions, 430 deletions
diff --git a/tools/lib/python/jobserver.py b/tools/lib/python/jobserver.py
index aba22c33393d..0b1ffdf9f7a3 100755
--- a/tools/lib/python/jobserver.py
+++ b/tools/lib/python/jobserver.py
@@ -8,14 +8,14 @@
"""
Interacts with the POSIX jobserver during the Kernel build time.
-A "normal" jobserver task, like the one initiated by a make subrocess would do:
+A "normal" jobserver task, like the one initiated by a make subprocess would do:
- open read/write file descriptors to communicate with the job server;
- ask for one slot by calling::
claim = os.read(reader, 1)
- - when the job finshes, call::
+ - when the job finishes, call::
os.write(writer, b"+") # os.write(writer, claim)
diff --git a/tools/lib/python/kdoc/c_lex.py b/tools/lib/python/kdoc/c_lex.py
new file mode 100644
index 000000000000..cb95f5172448
--- /dev/null
+++ b/tools/lib/python/kdoc/c_lex.py
@@ -0,0 +1,662 @@
+#!/usr/bin/env python3
+# SPDX-License-Identifier: GPL-2.0
+# Copyright(c) 2025: Mauro Carvalho Chehab <mchehab@kernel.org>.
+
+"""
+Regular expression ancillary classes.
+
+Those help caching regular expressions and do matching for kernel-doc.
+
+Please notice that the code here may rise exceptions to indicate bad
+usage inside kdoc to indicate problems at the replace pattern.
+
+Other errors are logged via log instance.
+"""
+
+import logging
+import re
+
+from copy import copy
+
+from .kdoc_re import KernRe
+
+log = logging.getLogger(__name__)
+
+def tokenizer_set_log(logger, prefix = ""):
+ """
+ Replace the module‑level logger with a LoggerAdapter that
+ prepends *prefix* to every message.
+ """
+ global log
+
+ class PrefixAdapter(logging.LoggerAdapter):
+ """
+ Ancillary class to set prefix on all message logs.
+ """
+ def process(self, msg, kwargs):
+ return f"{prefix}{msg}", kwargs
+
+ # Wrap the provided logger in our adapter
+ log = PrefixAdapter(logger, {"prefix": prefix})
+
+class CToken():
+ """
+ Data class to define a C token.
+ """
+
+ # Tokens that can be used by the parser. Works like an C enum.
+
+ COMMENT = 0 #: A standard C or C99 comment, including delimiter.
+ STRING = 1 #: A string, including quotation marks.
+ CHAR = 2 #: A character, including apostophes.
+ NUMBER = 3 #: A number.
+ PUNC = 4 #: A puntuation mark: / ``,`` / ``.``.
+ BEGIN = 5 #: A begin character: ``{`` / ``[`` / ``(``.
+ END = 6 #: A end character: ``}`` / ``]`` / ``)``.
+ CPP = 7 #: A preprocessor macro.
+ HASH = 8 #: The hash character - useful to handle other macros.
+ OP = 9 #: A C operator (add, subtract, ...).
+ STRUCT = 10 #: A ``struct`` keyword.
+ UNION = 11 #: An ``union`` keyword.
+ ENUM = 12 #: A ``struct`` keyword.
+ TYPEDEF = 13 #: A ``typedef`` keyword.
+ NAME = 14 #: A name. Can be an ID or a type.
+ SPACE = 15 #: Any space characters, including new lines
+ ENDSTMT = 16 #: End of an statement (``;``).
+
+ BACKREF = 17 #: Not a valid C sequence, but used at sub regex patterns.
+
+ MISMATCH = 255 #: an error indicator: should never happen in practice.
+
+ # Dict to convert from an enum interger into a string.
+ _name_by_val = {v: k for k, v in dict(vars()).items() if isinstance(v, int)}
+
+ # Dict to convert from string to an enum-like integer value.
+ _name_to_val = {k: v for v, k in _name_by_val.items()}
+
+ @staticmethod
+ def to_name(val):
+ """Convert from an integer value from CToken enum into a string"""
+
+ return CToken._name_by_val.get(val, f"UNKNOWN({val})")
+
+ @staticmethod
+ def from_name(name):
+ """Convert a string into a CToken enum value"""
+ if name in CToken._name_to_val:
+ return CToken._name_to_val[name]
+
+ return CToken.MISMATCH
+
+
+ def __init__(self, kind, value=None, pos=0,
+ brace_level=0, paren_level=0, bracket_level=0):
+ self.kind = kind
+ self.value = value
+ self.pos = pos
+ self.level = (bracket_level, paren_level, brace_level)
+
+ def __repr__(self):
+ name = self.to_name(self.kind)
+ if isinstance(self.value, str):
+ value = '"' + self.value + '"'
+ else:
+ value = self.value
+
+ return f"CToken(CToken.{name}, {value}, {self.pos}, {self.level})"
+
+#: Regexes to parse C code, transforming it into tokens.
+RE_SCANNER_LIST = [
+ #
+ # Note that \s\S is different than .*, as it also catches \n
+ #
+ (CToken.COMMENT, r"//[^\n]*|/\*[\s\S]*?\*/"),
+
+ (CToken.STRING, r'"(?:\\.|[^"\\])*"'),
+ (CToken.CHAR, r"'(?:\\.|[^'\\])'"),
+
+ (CToken.NUMBER, r"0[xX][\da-fA-F]+[uUlL]*|0[0-7]+[uUlL]*|"
+ r"\d+(?:\.\d*)?(?:[eE][+-]?\d+)?[fFlL]*"),
+
+ (CToken.ENDSTMT, r"(?:\s+;|;)"),
+
+ (CToken.PUNC, r"[,\.]"),
+
+ (CToken.BEGIN, r"[\[\(\{]"),
+
+ (CToken.END, r"[\]\)\}]"),
+
+ (CToken.CPP, r"#\s*(?:define|include|ifdef|ifndef|if|else|elif|endif|undef|pragma)\b"),
+
+ (CToken.HASH, r"#"),
+
+ (CToken.OP, r"\+\+|\-\-|\->|==|\!=|<=|>=|&&|\|\||<<|>>|\+=|\-=|\*=|/=|%="
+ r"|&=|\|=|\^=|[=\+\-\*/%<>&\|\^~!\?\:]"),
+
+ (CToken.STRUCT, r"\bstruct\b"),
+ (CToken.UNION, r"\bunion\b"),
+ (CToken.ENUM, r"\benum\b"),
+ (CToken.TYPEDEF, r"\btypedef\b"),
+
+ (CToken.NAME, r"[A-Za-z_]\w*"),
+
+ (CToken.SPACE, r"\s+"),
+
+ (CToken.BACKREF, r"\\\d+"),
+
+ (CToken.MISMATCH,r"."),
+]
+
+def fill_re_scanner(token_list):
+ """Ancillary routine to convert RE_SCANNER_LIST into a finditer regex"""
+ re_tokens = []
+
+ for kind, pattern in token_list:
+ name = CToken.to_name(kind)
+ re_tokens.append(f"(?P<{name}>{pattern})")
+
+ return KernRe("|".join(re_tokens), re.MULTILINE | re.DOTALL)
+
+#: Handle C continuation lines.
+RE_CONT = KernRe(r"\\\n")
+
+RE_COMMENT_START = KernRe(r'/\*\s*')
+
+#: tokenizer regex. Will be filled at the first CTokenizer usage.
+RE_SCANNER = fill_re_scanner(RE_SCANNER_LIST)
+
+
+class CTokenizer():
+ """
+ Scan C statements and definitions and produce tokens.
+
+ When converted to string, it drops comments and handle public/private
+ values, respecting depth.
+ """
+
+ # This class is inspired and follows the basic concepts of:
+ # https://docs.python.org/3/library/re.html#writing-a-tokenizer
+
+ def __init__(self, source=None):
+ """
+ Create a regular expression to handle RE_SCANNER_LIST.
+
+ While I generally don't like using regex group naming via:
+ (?P<name>...)
+
+ in this particular case, it makes sense, as we can pick the name
+ when matching a code via RE_SCANNER.
+ """
+
+ #
+ # Store logger to allow parser classes to re-use it
+ #
+ global log
+ self.log = log
+
+ self.tokens = []
+
+ if not source:
+ return
+
+ if isinstance(source, list):
+ self.tokens = source
+ return
+
+ #
+ # While we could just use _tokenize directly via interator,
+ # As we'll need to use the tokenizer several times inside kernel-doc
+ # to handle macro transforms, cache the results on a list, as
+ # re-using it is cheaper than having to parse everytime.
+ #
+ for tok in self._tokenize(source):
+ self.tokens.append(tok)
+
+ def _tokenize(self, source):
+ """
+ Iterator that parses ``source``, splitting it into tokens, as defined
+ at ``self.RE_SCANNER_LIST``.
+
+ The interactor returns a CToken class object.
+ """
+
+ # Handle continuation lines. Note that kdoc_parser already has a
+ # logic to do that. Still, let's keep it for completeness, as we might
+ # end re-using this tokenizer outsize kernel-doc some day - or we may
+ # eventually remove from there as a future cleanup.
+ source = RE_CONT.sub("", source)
+
+ brace_level = 0
+ paren_level = 0
+ bracket_level = 0
+
+ for match in RE_SCANNER.finditer(source):
+ kind = CToken.from_name(match.lastgroup)
+ pos = match.start()
+ value = match.group()
+
+ if kind == CToken.MISMATCH:
+ log.error(f"Unexpected token '{value}' on pos {pos}:\n\t'{source}'")
+ elif kind == CToken.BEGIN:
+ if value == '(':
+ paren_level += 1
+ elif value == '[':
+ bracket_level += 1
+ else: # value == '{'
+ brace_level += 1
+
+ elif kind == CToken.END:
+ if value == ')' and paren_level > 0:
+ paren_level -= 1
+ elif value == ']' and bracket_level > 0:
+ bracket_level -= 1
+ elif brace_level > 0: # value == '}'
+ brace_level -= 1
+
+ yield CToken(kind, value, pos,
+ brace_level, paren_level, bracket_level)
+
+ def __str__(self):
+ out=""
+ show_stack = [True]
+
+ for i, tok in enumerate(self.tokens):
+ if tok.kind == CToken.BEGIN:
+ show_stack.append(show_stack[-1])
+
+ elif tok.kind == CToken.END:
+ prev = show_stack[-1]
+ if len(show_stack) > 1:
+ show_stack.pop()
+
+ if not prev and show_stack[-1]:
+ #
+ # Try to preserve indent
+ #
+ out += "\t" * (len(show_stack) - 1)
+
+ out += str(tok.value)
+ continue
+
+ elif tok.kind == CToken.COMMENT:
+ comment = RE_COMMENT_START.sub("", tok.value)
+
+ if comment.startswith("private:"):
+ show_stack[-1] = False
+ show = False
+ elif comment.startswith("public:"):
+ show_stack[-1] = True
+
+ continue
+
+ if not show_stack[-1]:
+ continue
+
+ if i < len(self.tokens) - 1:
+ next_tok = self.tokens[i + 1]
+
+ # Do some cleanups before ";"
+
+ if tok.kind == CToken.SPACE and next_tok.kind == CToken.ENDSTMT:
+ continue
+
+ if tok.kind == CToken.ENDSTMT and next_tok.kind == tok.kind:
+ continue
+
+ out += str(tok.value)
+
+ return out
+
+
+class CTokenArgs:
+ """
+ Ancillary class to help using backrefs from sub matches.
+
+ If the highest backref contain a "+" at the last element,
+ the logic will be greedy, picking all other delims.
+
+ This is needed to parse struct_group macros with end with ``MEMBERS...``.
+ """
+ def __init__(self, sub_str):
+ self.sub_groups = set()
+ self.max_group = -1
+ self.greedy = None
+
+ for m in KernRe(r'\\(\d+)([+]?)').finditer(sub_str):
+ group = int(m.group(1))
+ if m.group(2) == "+":
+ if self.greedy and self.greedy != group:
+ raise ValueError("There are multiple greedy patterns!")
+ self.greedy = group
+
+ self.sub_groups.add(group)
+ self.max_group = max(self.max_group, group)
+
+ if self.greedy:
+ if self.greedy != self.max_group:
+ raise ValueError("Greedy pattern is not the last one!")
+
+ sub_str = KernRe(r'(\\\d+)[+]').sub(r"\1", sub_str)
+
+ self.sub_str = sub_str
+ self.sub_tokeninzer = CTokenizer(sub_str)
+
+ def groups(self, new_tokenizer):
+ r"""
+ Create replacement arguments for backrefs like:
+
+ ``\0``, ``\1``, ``\2``, ... ``\{number}``
+
+ It also accepts a ``+`` character to the highest backref, like
+ ``\4+``. When used, the backref will be greedy, picking all other
+ arguments afterwards.
+
+ The logic is smart enough to only go up to the maximum required
+ argument, even if there are more.
+
+ If there is a backref for an argument above the limit, it will
+ raise an exception. Please notice that, on C, square brackets
+ don't have any separator on it. Trying to use ``\1``..``\n`` for
+ brackets also raise an exception.
+ """
+
+ level = (0, 0, 0)
+
+ if self.max_group < 0:
+ return level, []
+
+ tokens = new_tokenizer.tokens
+
+ #
+ # Fill \0 with the full token contents
+ #
+ groups_list = [ [] ]
+
+ if 0 in self.sub_groups:
+ inner_level = 0
+
+ for i in range(0, len(tokens)):
+ tok = tokens[i]
+
+ if tok.kind == CToken.BEGIN:
+ inner_level += 1
+
+ #
+ # Discard first begin
+ #
+ if not groups_list[0]:
+ continue
+ elif tok.kind == CToken.END:
+ inner_level -= 1
+ if inner_level < 0:
+ break
+
+ if inner_level:
+ groups_list[0].append(tok)
+
+ if not self.max_group:
+ return level, groups_list
+
+ delim = None
+
+ #
+ # Ignore everything before BEGIN. The value of begin gives the
+ # delimiter to be used for the matches
+ #
+ for i in range(0, len(tokens)):
+ tok = tokens[i]
+ if tok.kind == CToken.BEGIN:
+ if tok.value == "{":
+ delim = ";"
+ elif tok.value == "(":
+ delim = ","
+ else:
+ self.log.error(fr"Can't handle \1..\n on {sub_str}")
+
+ level = tok.level
+ break
+
+ pos = 1
+ groups_list.append([])
+
+ inner_level = 0
+ for i in range(i + 1, len(tokens)):
+ tok = tokens[i]
+
+ if tok.kind == CToken.BEGIN:
+ inner_level += 1
+ if tok.kind == CToken.END:
+ inner_level -= 1
+ if inner_level < 0:
+ break
+
+ if tok.kind in [CToken.PUNC, CToken.ENDSTMT] and delim == tok.value:
+ pos += 1
+ if self.greedy and pos > self.max_group:
+ pos -= 1
+ else:
+ groups_list.append([])
+
+ if pos > self.max_group:
+ break
+
+ continue
+
+ groups_list[pos].append(tok)
+
+ if pos < self.max_group:
+ log.error(fr"{self.sub_str} groups are up to {pos} instead of {self.max_group}")
+
+ return level, groups_list
+
+ def tokens(self, new_tokenizer):
+ level, groups = self.groups(new_tokenizer)
+
+ new = CTokenizer()
+
+ for tok in self.sub_tokeninzer.tokens:
+ if tok.kind == CToken.BACKREF:
+ group = int(tok.value[1:])
+
+ for group_tok in groups[group]:
+ new_tok = copy(group_tok)
+
+ new_level = [0, 0, 0]
+
+ for i in range(0, len(level)):
+ new_level[i] = new_tok.level[i] + level[i]
+
+ new_tok.level = tuple(new_level)
+
+ new.tokens += [ new_tok ]
+ else:
+ new.tokens += [ tok ]
+
+ return new.tokens
+
+
+class CMatch:
+ """
+ Finding nested delimiters is hard with regular expressions. It is
+ even harder on Python with its normal re module, as there are several
+ advanced regular expressions that are missing.
+
+ This is the case of this pattern::
+
+ '\\bSTRUCT_GROUP(\\(((?:(?>[^)(]+)|(?1))*)\\))[^;]*;'
+
+ which is used to properly match open/close parentheses of the
+ string search STRUCT_GROUP(),
+
+ Add a class that counts pairs of delimiters, using it to match and
+ replace nested expressions.
+
+ The original approach was suggested by:
+
+ https://stackoverflow.com/questions/5454322/python-how-to-match-nested-parentheses-with-regex
+
+ Although I re-implemented it to make it more generic and match 3 types
+ of delimiters. The logic checks if delimiters are paired. If not, it
+ will ignore the search string.
+ """
+
+
+ def __init__(self, regex, delim="("):
+ self.regex = KernRe("^" + regex + r"\b")
+ self.start_delim = delim
+
+ def _search(self, tokenizer):
+ """
+ Finds paired blocks for a regex that ends with a delimiter.
+
+ The suggestion of using finditer to match pairs came from:
+ https://stackoverflow.com/questions/5454322/python-how-to-match-nested-parentheses-with-regex
+ but I ended using a different implementation to align all three types
+ of delimiters and seek for an initial regular expression.
+
+ The algorithm seeks for open/close paired delimiters and places them
+ into a stack, yielding a start/stop position of each match when the
+ stack is zeroed.
+
+ The algorithm should work fine for properly paired lines, but will
+ silently ignore end delimiters that precede a start delimiter.
+ This should be OK for kernel-doc parser, as unaligned delimiters
+ would cause compilation errors. So, we don't need to raise exceptions
+ to cover such issues.
+ """
+
+ start = None
+ started = False
+
+ import sys
+
+ stack = []
+
+ for i, tok in enumerate(tokenizer.tokens):
+ if start is None:
+ if tok.kind == CToken.NAME and self.regex.match(tok.value):
+ start = i
+ stack.append((start, tok.level))
+ started = False
+
+ continue
+
+ if not started:
+ if tok.kind == CToken.SPACE:
+ continue
+
+ if tok.kind == CToken.BEGIN and tok.value == self.start_delim:
+ started = True
+ continue
+
+ # Name only token without BEGIN/END
+ if i > start:
+ i -= 1
+ yield start, i
+ start = None
+
+ if tok.kind == CToken.END and tok.level == stack[-1][1]:
+ start, level = stack.pop()
+
+ yield start, i
+ start = None
+
+ #
+ # If an END zeroing levels is not there, return remaining stuff
+ # This is meant to solve cases where the caller logic might be
+ # picking an incomplete block.
+ #
+ if start and stack:
+ if started:
+ s = str(tokenizer)
+ log.warning(f"can't find a final end at {s}")
+
+ yield start, len(tokenizer.tokens)
+
+ def search(self, source):
+ """
+ This is similar to re.search:
+
+ It matches a regex that it is followed by a delimiter,
+ returning occurrences only if all delimiters are paired.
+ """
+
+ if isinstance(source, CTokenizer):
+ tokenizer = source
+ is_token = True
+ else:
+ tokenizer = CTokenizer(source)
+ is_token = False
+
+ for start, end in self._search(tokenizer):
+ new_tokenizer = CTokenizer(tokenizer.tokens[start:end + 1])
+
+ if is_token:
+ yield new_tokenizer
+ else:
+ yield str(new_tokenizer)
+
+ def sub(self, sub_str, source, count=0):
+ """
+ This is similar to re.sub:
+
+ It matches a regex that it is followed by a delimiter,
+ replacing occurrences only if all delimiters are paired.
+
+ if the sub argument contains::
+
+ r'\0'
+
+ it will work just like re: it places there the matched paired data
+ with the delimiter stripped.
+
+ If count is different than zero, it will replace at most count
+ items.
+ """
+ if isinstance(source, CTokenizer):
+ is_token = True
+ tokenizer = source
+ else:
+ is_token = False
+ tokenizer = CTokenizer(source)
+
+ # Detect if sub_str contains sub arguments
+
+ args_match = CTokenArgs(sub_str)
+
+ new_tokenizer = CTokenizer()
+ pos = 0
+ n = 0
+
+ #
+ # NOTE: the code below doesn't consider overlays at sub.
+ # We may need to add some extra unit tests to check if those
+ # would cause problems. When replacing by "", this should not
+ # be a problem, but other transformations could be problematic
+ #
+ for start, end in self._search(tokenizer):
+ new_tokenizer.tokens += tokenizer.tokens[pos:start]
+
+ new = CTokenizer(tokenizer.tokens[start:end + 1])
+
+ new_tokenizer.tokens += args_match.tokens(new)
+
+ pos = end + 1
+
+ n += 1
+ if count and n >= count:
+ break
+
+ new_tokenizer.tokens += tokenizer.tokens[pos:]
+
+ if not is_token:
+ return str(new_tokenizer)
+
+ return new_tokenizer
+
+ def __repr__(self):
+ """
+ Returns a displayable version of the class init.
+ """
+
+ return f'CMatch("{self.regex.regex.pattern}")'
diff --git a/tools/lib/python/kdoc/kdoc_files.py b/tools/lib/python/kdoc/kdoc_files.py
index 022487ea2cc6..ed82b6e6ab25 100644
--- a/tools/lib/python/kdoc/kdoc_files.py
+++ b/tools/lib/python/kdoc/kdoc_files.py
@@ -9,13 +9,14 @@ Classes for navigating through the files that kernel-doc needs to handle
to generate documentation.
"""
-import argparse
import logging
import os
import re
from kdoc.kdoc_parser import KernelDoc
+from kdoc.xforms_lists import CTransforms
from kdoc.kdoc_output import OutputFormat
+from kdoc.kdoc_yaml_file import KDocTestFile
class GlobSourceFiles:
@@ -86,11 +87,81 @@ class GlobSourceFiles:
file_not_found_cb(fname)
+class KdocConfig():
+ """
+ Stores all configuration attributes that kdoc_parser and kdoc_output
+ needs.
+ """
+ def __init__(self, verbose=False, werror=False, wreturn=False,
+ wshort_desc=False, wcontents_before_sections=False,
+ logger=None):
+
+ self.verbose = verbose
+ self.werror = werror
+ self.wreturn = wreturn
+ self.wshort_desc = wshort_desc
+ self.wcontents_before_sections = wcontents_before_sections
+
+ if logger:
+ self.log = logger
+ else:
+ self.log = logging.getLogger(__file__)
+
+ self.warning = self.log.warning
+
class KernelFiles():
"""
Parse kernel-doc tags on multiple kernel source files.
- There are two type of parsers defined here:
+ This is the main entry point to run kernel-doc. This class is initialized
+ using a series of optional arguments:
+
+ ``verbose``
+ If True, enables kernel-doc verbosity. Default: False.
+
+ ``out_style``
+ Class to be used to format output. If None (default),
+ only report errors.
+
+ ``xforms``
+ Transforms to be applied to C prototypes and data structs.
+ If not specified, defaults to xforms = CFunction()
+
+ ``werror``
+ If True, treat warnings as errors, retuning an error code on warnings.
+
+ Default: False.
+
+ ``wreturn``
+ If True, warns about the lack of a return markup on functions.
+
+ Default: False.
+ ``wshort_desc``
+ If True, warns if initial short description is missing.
+
+ Default: False.
+
+ ``wcontents_before_sections``
+ If True, warn if there are contents before sections (deprecated).
+ This option is kept just for backward-compatibility, but it does
+ nothing, neither here nor at the original Perl script.
+
+ Default: False.
+
+ ``logger``
+ Optional logger class instance.
+
+ If not specified, defaults to use: ``logging.getLogger("kernel-doc")``
+
+ ``yaml_file``
+ If defined, stores the output inside a YAML file.
+
+ ``yaml_content``
+ Defines what will be inside the YAML file.
+
+ Note:
+ There are two type of parsers defined here:
+
- self.parse_file(): parses both kernel-doc markups and
``EXPORT_SYMBOL*`` macros;
- self.process_export_file(): parses only ``EXPORT_SYMBOL*`` macros.
@@ -117,7 +188,12 @@ class KernelFiles():
if fname in self.files:
return
- doc = KernelDoc(self.config, fname)
+ if self.test_file:
+ store_src = True
+ else:
+ store_src = False
+
+ doc = KernelDoc(self.config, fname, self.xforms, store_src=store_src)
export_table, entries = doc.parse_kdoc()
self.export_table[fname] = export_table
@@ -153,16 +229,21 @@ class KernelFiles():
self.error(f"Cannot find file {fname}")
- def __init__(self, verbose=False, out_style=None,
+ def __init__(self, verbose=False, out_style=None, xforms=None,
werror=False, wreturn=False, wshort_desc=False,
wcontents_before_sections=False,
- logger=None):
+ yaml_file=None, yaml_content=None, logger=None):
"""
Initialize startup variables and parse all files.
"""
if not verbose:
- verbose = bool(os.environ.get("KBUILD_VERBOSE", 0))
+ try:
+ verbose = bool(int(os.environ.get("KBUILD_VERBOSE", 0)))
+ except ValueError:
+ # Handles an eventual case where verbosity is not a number
+ # like KBUILD_VERBOSE=""
+ verbose = False
if out_style is None:
out_style = OutputFormat()
@@ -181,29 +262,36 @@ class KernelFiles():
if kdoc_werror:
werror = kdoc_werror
+ if not logger:
+ logger = logging.getLogger("kernel-doc")
+ else:
+ logger = logger
+
# Some variables are global to the parser logic as a whole as they are
# used to send control configuration to KernelDoc class. As such,
# those variables are read-only inside the KernelDoc.
- self.config = argparse.Namespace
+ self.config = KdocConfig(verbose, werror, wreturn, wshort_desc,
+ wcontents_before_sections, logger)
- self.config.verbose = verbose
- self.config.werror = werror
- self.config.wreturn = wreturn
- self.config.wshort_desc = wshort_desc
- self.config.wcontents_before_sections = wcontents_before_sections
+ # Override log warning, as we want to count errors
+ self.config.warning = self.warning
- if not logger:
- self.config.log = logging.getLogger("kernel-doc")
+ if yaml_file:
+ self.test_file = KDocTestFile(self.config, yaml_file, yaml_content)
else:
- self.config.log = logger
+ self.test_file = None
- self.config.warning = self.warning
+ if xforms:
+ self.xforms = xforms
+ else:
+ self.xforms = CTransforms()
self.config.src_tree = os.environ.get("SRCTREE", None)
# Initialize variables that are internal to KernelFiles
self.out_style = out_style
+ self.out_style.set_config(self.config)
self.errors = 0
self.results = {}
@@ -246,8 +334,6 @@ class KernelFiles():
returning kernel-doc markups on each interaction.
"""
- self.out_style.set_config(self.config)
-
if not filenames:
filenames = sorted(self.results.keys())
@@ -267,29 +353,28 @@ class KernelFiles():
for s in symbol:
function_table.add(s)
- self.out_style.set_filter(export, internal, symbol, nosymbol,
- function_table, enable_lineno,
- no_doc_sections)
-
- msg = ""
if fname not in self.results:
self.config.log.warning("No kernel-doc for file %s", fname)
continue
symbols = self.results[fname]
- self.out_style.set_symbols(symbols)
- for arg in symbols:
- m = self.out_msg(fname, arg.name, arg)
+ if self.test_file:
+ self.test_file.set_filter(export, internal, symbol, nosymbol,
+ function_table, enable_lineno,
+ no_doc_sections)
- if m is None:
- ln = arg.get("ln", 0)
- dtype = arg.get('type', "")
+ self.test_file.output_symbols(fname, symbols)
- self.config.log.warning("%s:%d Can't handle %s",
- fname, ln, dtype)
- else:
- msg += m
+ continue
+
+ self.out_style.set_filter(export, internal, symbol, nosymbol,
+ function_table, enable_lineno,
+ no_doc_sections)
+ msg = self.out_style.output_symbols(fname, symbols)
if msg:
yield fname, msg
+
+ if self.test_file:
+ self.test_file.write()
diff --git a/tools/lib/python/kdoc/kdoc_item.py b/tools/lib/python/kdoc/kdoc_item.py
index 2b8a93f79716..a7aa6e1e4c1c 100644
--- a/tools/lib/python/kdoc/kdoc_item.py
+++ b/tools/lib/python/kdoc/kdoc_item.py
@@ -14,7 +14,8 @@ class KdocItem:
then pass into the output modules.
"""
- def __init__(self, name, fname, type, start_line, **other_stuff):
+ def __init__(self, name, fname, type, start_line,
+ **other_stuff):
self.name = name
self.fname = fname
self.type = type
@@ -22,15 +23,34 @@ class KdocItem:
self.sections = {}
self.sections_start_lines = {}
self.parameterlist = []
- self.parameterdesc_start_lines = []
+ self.parameterdesc_start_lines = {}
self.parameterdescs = {}
self.parametertypes = {}
+
+ self.warnings = []
+
#
# Just save everything else into our own dict so that the output
# side can grab it directly as before. As we move things into more
# structured data, this will, hopefully, fade away.
#
- self.other_stuff = other_stuff
+ known_keys = {
+ 'declaration_start_line',
+ 'sections',
+ 'sections_start_lines',
+ 'parameterlist',
+ 'parameterdesc_start_lines',
+ 'parameterdescs',
+ 'parametertypes',
+ 'warnings',
+ }
+
+ self.other_stuff = {}
+ for k, v in other_stuff.items():
+ if k in known_keys:
+ setattr(self, k, v) # real attribute
+ else:
+ self.other_stuff[k] = v
def get(self, key, default = None):
"""
@@ -41,6 +61,23 @@ class KdocItem:
def __getitem__(self, key):
return self.get(key)
+ def __repr__(self):
+ return f"KdocItem({self.name}, {self.fname}, {self.type}, {self.declaration_start_line})"
+
+ @classmethod
+ def from_dict(cls, d):
+ """Create a KdocItem from a plain dict."""
+
+ cp = d.copy()
+ name = cp.pop('name', None)
+ fname = cp.pop('fname', None)
+ type = cp.pop('type', None)
+ start_line = cp.pop('start_line', 1)
+ other_stuff = cp.pop('other_stuff', {})
+
+ # Everything that’s left goes straight to __init__
+ return cls(name, fname, type, start_line, **cp, **other_stuff)
+
#
# Tracking of section and parameter information.
#
@@ -49,7 +86,7 @@ class KdocItem:
Set sections and start lines.
"""
self.sections = sections
- self.section_start_lines = start_lines
+ self.sections_start_lines = start_lines
def set_params(self, names, descs, types, starts):
"""
diff --git a/tools/lib/python/kdoc/kdoc_output.py b/tools/lib/python/kdoc/kdoc_output.py
index 4210b91dde5f..de107ab4a281 100644
--- a/tools/lib/python/kdoc/kdoc_output.py
+++ b/tools/lib/python/kdoc/kdoc_output.py
@@ -222,6 +222,27 @@ class OutputFormat:
return None
+ def output_symbols(self, fname, symbols):
+ """
+ Handles a set of KdocItem symbols.
+ """
+ self.set_symbols(symbols)
+
+ msg = ""
+ for arg in symbols:
+ m = self.msg(fname, arg.name, arg)
+
+ if m is None:
+ ln = arg.get("ln", 0)
+ dtype = arg.get('type', "")
+
+ self.config.log.warning("%s:%d Can't handle %s",
+ fname, ln, dtype)
+ else:
+ msg += m
+
+ return msg
+
# Virtual methods to be overridden by inherited classes
# At the base class, those do nothing.
def set_symbols(self, symbols):
@@ -368,7 +389,7 @@ class RestFormat(OutputFormat):
else:
self.data += f'{self.lineprefix}**{section}**\n\n'
- self.print_lineno(args.section_start_lines.get(section, 0))
+ self.print_lineno(args.sections_start_lines.get(section, 0))
self.output_highlight(text)
self.data += "\n"
self.data += "\n"
@@ -492,7 +513,9 @@ class RestFormat(OutputFormat):
def out_var(self, fname, name, args):
oldprefix = self.lineprefix
ln = args.declaration_start_line
- full_proto = args.other_stuff["full_proto"]
+ full_proto = args.other_stuff.get("full_proto")
+ if not full_proto:
+ raise KeyError(f"Can't find full proto for {name} variable")
self.lineprefix = " "
@@ -580,7 +603,35 @@ class RestFormat(OutputFormat):
class ManFormat(OutputFormat):
- """Consts and functions used by man pages output."""
+ """
+ Consts and functions used by man pages output.
+
+ This class has one mandatory parameter and some optional ones, which
+ are needed to define the title header contents:
+
+ ``modulename``
+ Defines the module name to be used at the troff ``.TH`` output.
+
+ This argument is optional. If not specified, it will be filled
+ with the directory which contains the documented file.
+
+ ``section``
+ Usually a numeric value from 0 to 9, but man pages also accept
+ some strings like "p".
+
+ Defauls to ``9``
+
+ ``manual``
+ Defaults to ``Kernel API Manual``.
+
+ The above controls the output of teh corresponding fields on troff
+ title headers, which will be filled like this::
+
+ .TH "{name}" {section} "{date}" "{modulename}" "{manual}"
+
+ where ``name``` will match the API symbol name, and ``date`` will be
+ either the date where the Kernel was compiled or the current date
+ """
highlights = (
(type_constant, r"\1"),
@@ -607,7 +658,21 @@ class ManFormat(OutputFormat):
"%m %d %Y",
]
- def __init__(self, modulename):
+ def modulename(self, args):
+ if self._modulename:
+ return self._modulename
+
+ return os.path.dirname(args.fname)
+
+ def emit_th(self, name, args):
+ """Emit a title header line."""
+ title = name.strip()
+ module = self.modulename(args)
+
+ self.data += f'.TH "{title}" {self.section} "{self.date}" '
+ self.data += f'"{module}" "{self.manual}"\n'
+
+ def __init__(self, modulename=None, section="9", manual="Kernel API Manual"):
"""
Creates class variables.
@@ -616,7 +681,11 @@ class ManFormat(OutputFormat):
"""
super().__init__()
- self.modulename = modulename
+
+ self._modulename = modulename
+ self.section = section
+ self.manual = manual
+
self.symbols = []
dt = None
@@ -632,7 +701,7 @@ class ManFormat(OutputFormat):
if not dt:
dt = datetime.now()
- self.man_date = dt.strftime("%B %Y")
+ self.date = dt.strftime("%B %Y")
def arg_name(self, args, name):
"""
@@ -647,7 +716,8 @@ class ManFormat(OutputFormat):
dtype = args.type
if dtype == "doc":
- return self.modulename
+ return name
+# return os.path.basename(self.modulename(args))
if dtype in ["function", "typedef"]:
return name
@@ -697,6 +767,185 @@ class ManFormat(OutputFormat):
return self.data
+ def emit_table(self, colspec_row, rows):
+
+ if not rows:
+ return ""
+
+ out = ""
+ colspec = "\t".join(["l"] * len(rows[0]))
+
+ out += "\n.TS\n"
+ out += "box;\n"
+ out += f"{colspec}.\n"
+
+ if colspec_row:
+ out_row = []
+
+ for text in colspec_row:
+ out_row.append(f"\\fB{text}\\fP")
+
+ out += "\t".join(out_row) + "\n_\n"
+
+ for r in rows:
+ out += "\t".join(r) + "\n"
+
+ out += ".TE\n"
+
+ return out
+
+ def grid_table(self, lines, start):
+ """
+ Ancillary function to help handling a grid table inside the text.
+ """
+
+ i = start + 1
+ rows = []
+ colspec_row = None
+
+ while i < len(lines):
+ line = lines[i]
+
+ if KernRe(r"^\s*\|.*\|\s*$").match(line):
+ parts = []
+
+ for p in line.strip('|').split('|'):
+ parts.append(p.strip())
+
+ rows.append(parts)
+
+ elif KernRe(r'^\+\=[\+\=]+\+\s*$').match(line):
+ if rows and rows[0]:
+ if not colspec_row:
+ colspec_row = [""] * len(rows[0])
+
+ for j in range(0, len(rows[0])):
+ content = []
+ for row in rows:
+ content.append(row[j])
+
+ colspec_row[j] = " ".join(content)
+
+ rows = []
+
+ elif KernRe(r"^\s*\+[-+]+\+.*$").match(line):
+ pass
+
+ else:
+ break
+
+ i += 1
+
+ return i, self.emit_table(colspec_row, rows)
+
+ def simple_table(self, lines, start):
+ """
+ Ancillary function to help handling a simple table inside the text.
+ """
+
+ i = start
+ rows = []
+ colspec_row = None
+
+ pos = []
+ for m in KernRe(r'\=+').finditer(lines[i]):
+ pos.append((m.start(), m.end() - 1))
+
+ i += 1
+ while i < len(lines):
+ line = lines[i]
+
+ if KernRe(r"^\s*[\=]+[ \t\=]+$").match(line):
+ i += 1
+ break
+
+ elif KernRe(r'^[\s=]+$').match(line):
+ if rows and rows[0]:
+ if not colspec_row:
+ colspec_row = [""] * len(rows[0])
+
+ for j in range(0, len(rows[0])):
+ content = []
+ for row in rows:
+ content.append(row[j])
+
+ colspec_row[j] = " ".join(content)
+
+ rows = []
+
+ else:
+ row = [""] * len(pos)
+
+ for j in range(0, len(pos)):
+ start, end = pos[j]
+
+ row[j] = line[start:end].strip()
+
+ rows.append(row)
+
+ i += 1
+
+ return i, self.emit_table(colspec_row, rows)
+
+ def code_block(self, lines, start):
+ """
+ Ensure that code blocks won't be messed up at the output.
+
+ By default, troff join lines at the same paragraph. Disable it,
+ on code blocks.
+ """
+
+ line = lines[start]
+
+ if "code-block" in line:
+ out = "\n.nf\n"
+ elif line.startswith("..") and line.endswith("::"):
+ #
+ # Handle note, warning, error, ... markups
+ #
+ line = line[2:-1].strip().upper()
+ out = f"\n.nf\n\\fB{line}\\fP\n"
+ elif line.endswith("::"):
+ out = line[:-1]
+ out += "\n.nf\n"
+ else:
+ # Just in case. Should never happen in practice
+ out = "\n.nf\n"
+
+ i = start + 1
+ ident = None
+
+ while i < len(lines):
+ line = lines[i]
+
+ m = KernRe(r"\S").match(line)
+ if not m:
+ out += line + "\n"
+ i += 1
+ continue
+
+ pos = m.start()
+ if not ident:
+ if pos > 0:
+ ident = pos
+ else:
+ out += "\n.fi\n"
+ if i > start + 1:
+ return i - 1, out
+ else:
+ # Just in case. Should never happen in practice
+ return i, out
+
+ if pos >= ident:
+ out += line + "\n"
+ i += 1
+ continue
+
+ break
+
+ out += "\n.fi\n"
+ return i, out
+
def output_highlight(self, block):
"""
Outputs a C symbol that may require being highlighted with
@@ -708,15 +957,46 @@ class ManFormat(OutputFormat):
if isinstance(contents, list):
contents = "\n".join(contents)
- for line in contents.strip("\n").split("\n"):
- line = KernRe(r"^\s*").sub("", line)
- if not line:
- continue
+ lines = contents.strip("\n").split("\n")
+ i = 0
- if line[0] == ".":
- self.data += "\\&" + line + "\n"
+ while i < len(lines):
+ org_line = lines[i]
+
+ line = KernRe(r"^\s*").sub("", org_line)
+
+ if line:
+ if KernRe(r"^\+\-[-+]+\+.*$").match(line):
+ i, text = self.grid_table(lines, i)
+ self.data += text
+ continue
+
+ if KernRe(r"^\=+[ \t]\=[ \t\=]+$").match(line):
+ i, text = self.simple_table(lines, i)
+ self.data += text
+ continue
+
+ if line.endswith("::") or KernRe(r"\.\.\s+code-block.*::").match(line):
+ i, text = self.code_block(lines, i)
+ self.data += text
+ continue
+
+ if line[0] == ".":
+ self.data += "\\&" + line + "\n"
+ i += 1
+ continue
+
+ #
+ # Handle lists
+ #
+ line = KernRe(r'^[-*]\s+').sub(r'.IP \[bu]\n', line)
+ line = KernRe(r'^(\d+|a-z)[\.\)]\s+').sub(r'.IP \1\n', line)
else:
- self.data += line + "\n"
+ line = ".PP\n"
+
+ i += 1
+
+ self.data += line + "\n"
def out_doc(self, fname, name, args):
if not self.check_doc(name, args):
@@ -724,7 +1004,7 @@ class ManFormat(OutputFormat):
out_name = self.arg_name(args, name)
- self.data += f'.TH "{self.modulename}" 9 "{out_name}" "{self.man_date}" "API Manual" LINUX' + "\n"
+ self.emit_th(out_name, args)
for section, text in args.sections.items():
self.data += f'.SH "{section}"' + "\n"
@@ -734,7 +1014,7 @@ class ManFormat(OutputFormat):
out_name = self.arg_name(args, name)
- self.data += f'.TH "{name}" 9 "{out_name}" "{self.man_date}" "Kernel Hacker\'s Manual" LINUX' + "\n"
+ self.emit_th(out_name, args)
self.data += ".SH NAME\n"
self.data += f"{name} \\- {args['purpose']}\n"
@@ -780,7 +1060,7 @@ class ManFormat(OutputFormat):
def out_enum(self, fname, name, args):
out_name = self.arg_name(args, name)
- self.data += f'.TH "{self.modulename}" 9 "{out_name}" "{self.man_date}" "API Manual" LINUX' + "\n"
+ self.emit_th(out_name, args)
self.data += ".SH NAME\n"
self.data += f"enum {name} \\- {args['purpose']}\n"
@@ -813,7 +1093,7 @@ class ManFormat(OutputFormat):
out_name = self.arg_name(args, name)
full_proto = args.other_stuff["full_proto"]
- self.data += f'.TH "{self.modulename}" 9 "{out_name}" "{self.man_date}" "API Manual" LINUX' + "\n"
+ self.emit_th(out_name, args)
self.data += ".SH NAME\n"
self.data += f"{name} \\- {args['purpose']}\n"
@@ -830,11 +1110,11 @@ class ManFormat(OutputFormat):
self.output_highlight(text)
def out_typedef(self, fname, name, args):
- module = self.modulename
+ module = self.modulename(args)
purpose = args.get('purpose')
out_name = self.arg_name(args, name)
- self.data += f'.TH "{module}" 9 "{out_name}" "{self.man_date}" "API Manual" LINUX' + "\n"
+ self.emit_th(out_name, args)
self.data += ".SH NAME\n"
self.data += f"typedef {name} \\- {purpose}\n"
@@ -844,12 +1124,12 @@ class ManFormat(OutputFormat):
self.output_highlight(text)
def out_struct(self, fname, name, args):
- module = self.modulename
+ module = self.modulename(args)
purpose = args.get('purpose')
definition = args.get('definition')
out_name = self.arg_name(args, name)
- self.data += f'.TH "{module}" 9 "{out_name}" "{self.man_date}" "API Manual" LINUX' + "\n"
+ self.emit_th(out_name, args)
self.data += ".SH NAME\n"
self.data += f"{args.type} {name} \\- {purpose}\n"
diff --git a/tools/lib/python/kdoc/kdoc_parser.py b/tools/lib/python/kdoc/kdoc_parser.py
index ca00695b47b3..74af7ae47aa4 100644
--- a/tools/lib/python/kdoc/kdoc_parser.py
+++ b/tools/lib/python/kdoc/kdoc_parser.py
@@ -13,7 +13,8 @@ import sys
import re
from pprint import pformat
-from kdoc.kdoc_re import NestedMatch, KernRe
+from kdoc.c_lex import CTokenizer, tokenizer_set_log
+from kdoc.kdoc_re import KernRe
from kdoc.kdoc_item import KdocItem
#
@@ -70,140 +71,9 @@ doc_begin_func = KernRe(str(doc_com) + # initial " * '
cache = False)
#
-# Here begins a long set of transformations to turn structure member prefixes
-# and macro invocations into something we can parse and generate kdoc for.
-#
-struct_args_pattern = r'([^,)]+)'
-
-struct_xforms = [
- # Strip attributes
- (KernRe(r"__attribute__\s*\(\([a-z0-9,_\*\s\(\)]*\)\)", flags=re.I | re.S, cache=False), ' '),
- (KernRe(r'\s*__aligned\s*\([^;]*\)', re.S), ' '),
- (KernRe(r'\s*__counted_by\s*\([^;]*\)', re.S), ' '),
- (KernRe(r'\s*__counted_by_(le|be)\s*\([^;]*\)', re.S), ' '),
- (KernRe(r'\s*__packed\s*', re.S), ' '),
- (KernRe(r'\s*CRYPTO_MINALIGN_ATTR', re.S), ' '),
- (KernRe(r'\s*__private', re.S), ' '),
- (KernRe(r'\s*__rcu', re.S), ' '),
- (KernRe(r'\s*____cacheline_aligned_in_smp', re.S), ' '),
- (KernRe(r'\s*____cacheline_aligned', re.S), ' '),
- (KernRe(r'\s*__cacheline_group_(begin|end)\([^\)]+\);'), ''),
- #
- # Unwrap struct_group macros based on this definition:
- # __struct_group(TAG, NAME, ATTRS, MEMBERS...)
- # which has variants like: struct_group(NAME, MEMBERS...)
- # Only MEMBERS arguments require documentation.
- #
- # Parsing them happens on two steps:
- #
- # 1. drop struct group arguments that aren't at MEMBERS,
- # storing them as STRUCT_GROUP(MEMBERS)
- #
- # 2. remove STRUCT_GROUP() ancillary macro.
- #
- # The original logic used to remove STRUCT_GROUP() using an
- # advanced regex:
- #
- # \bSTRUCT_GROUP(\(((?:(?>[^)(]+)|(?1))*)\))[^;]*;
- #
- # with two patterns that are incompatible with
- # Python re module, as it has:
- #
- # - a recursive pattern: (?1)
- # - an atomic grouping: (?>...)
- #
- # I tried a simpler version: but it didn't work either:
- # \bSTRUCT_GROUP\(([^\)]+)\)[^;]*;
- #
- # As it doesn't properly match the end parenthesis on some cases.
- #
- # So, a better solution was crafted: there's now a NestedMatch
- # class that ensures that delimiters after a search are properly
- # matched. So, the implementation to drop STRUCT_GROUP() will be
- # handled in separate.
- #
- (KernRe(r'\bstruct_group\s*\(([^,]*,)', re.S), r'STRUCT_GROUP('),
- (KernRe(r'\bstruct_group_attr\s*\(([^,]*,){2}', re.S), r'STRUCT_GROUP('),
- (KernRe(r'\bstruct_group_tagged\s*\(([^,]*),([^,]*),', re.S), r'struct \1 \2; STRUCT_GROUP('),
- (KernRe(r'\b__struct_group\s*\(([^,]*,){3}', re.S), r'STRUCT_GROUP('),
- #
- # Replace macros
- #
- # TODO: use NestedMatch for FOO($1, $2, ...) matches
- #
- # it is better to also move those to the NestedMatch logic,
- # to ensure that parentheses will be properly matched.
- #
- (KernRe(r'__ETHTOOL_DECLARE_LINK_MODE_MASK\s*\(([^\)]+)\)', re.S),
- r'DECLARE_BITMAP(\1, __ETHTOOL_LINK_MODE_MASK_NBITS)'),
- (KernRe(r'DECLARE_PHY_INTERFACE_MASK\s*\(([^\)]+)\)', re.S),
- r'DECLARE_BITMAP(\1, PHY_INTERFACE_MODE_MAX)'),
- (KernRe(r'DECLARE_BITMAP\s*\(' + struct_args_pattern + r',\s*' + struct_args_pattern + r'\)',
- re.S), r'unsigned long \1[BITS_TO_LONGS(\2)]'),
- (KernRe(r'DECLARE_HASHTABLE\s*\(' + struct_args_pattern + r',\s*' + struct_args_pattern + r'\)',
- re.S), r'unsigned long \1[1 << ((\2) - 1)]'),
- (KernRe(r'DECLARE_KFIFO\s*\(' + struct_args_pattern + r',\s*' + struct_args_pattern +
- r',\s*' + struct_args_pattern + r'\)', re.S), r'\2 *\1'),
- (KernRe(r'DECLARE_KFIFO_PTR\s*\(' + struct_args_pattern + r',\s*' +
- struct_args_pattern + r'\)', re.S), r'\2 *\1'),
- (KernRe(r'(?:__)?DECLARE_FLEX_ARRAY\s*\(' + struct_args_pattern + r',\s*' +
- struct_args_pattern + r'\)', re.S), r'\1 \2[]'),
- (KernRe(r'DEFINE_DMA_UNMAP_ADDR\s*\(' + struct_args_pattern + r'\)', re.S), r'dma_addr_t \1'),
- (KernRe(r'DEFINE_DMA_UNMAP_LEN\s*\(' + struct_args_pattern + r'\)', re.S), r'__u32 \1'),
-]
-#
-# Regexes here are guaranteed to have the end delimiter matching
-# the start delimiter. Yet, right now, only one replace group
-# is allowed.
-#
-struct_nested_prefixes = [
- (re.compile(r'\bSTRUCT_GROUP\('), r'\1'),
-]
-
-#
-# Transforms for function prototypes
-#
-function_xforms = [
- (KernRe(r"^static +"), ""),
- (KernRe(r"^extern +"), ""),
- (KernRe(r"^asmlinkage +"), ""),
- (KernRe(r"^inline +"), ""),
- (KernRe(r"^__inline__ +"), ""),
- (KernRe(r"^__inline +"), ""),
- (KernRe(r"^__always_inline +"), ""),
- (KernRe(r"^noinline +"), ""),
- (KernRe(r"^__FORTIFY_INLINE +"), ""),
- (KernRe(r"__init +"), ""),
- (KernRe(r"__init_or_module +"), ""),
- (KernRe(r"__exit +"), ""),
- (KernRe(r"__deprecated +"), ""),
- (KernRe(r"__flatten +"), ""),
- (KernRe(r"__meminit +"), ""),
- (KernRe(r"__must_check +"), ""),
- (KernRe(r"__weak +"), ""),
- (KernRe(r"__sched +"), ""),
- (KernRe(r"_noprof"), ""),
- (KernRe(r"__always_unused *"), ""),
- (KernRe(r"__printf\s*\(\s*\d*\s*,\s*\d*\s*\) +"), ""),
- (KernRe(r"__(?:re)?alloc_size\s*\(\s*\d+\s*(?:,\s*\d+\s*)?\) +"), ""),
- (KernRe(r"__diagnose_as\s*\(\s*\S+\s*(?:,\s*\d+\s*)*\) +"), ""),
- (KernRe(r"DECL_BUCKET_PARAMS\s*\(\s*(\S+)\s*,\s*(\S+)\s*\)"), r"\1, \2"),
- (KernRe(r"__attribute_const__ +"), ""),
- (KernRe(r"__attribute__\s*\(\((?:[\w\s]+(?:\([^)]*\))?\s*,?)+\)\)\s+"), ""),
-]
-
-#
# Ancillary functions
#
-def apply_transforms(xforms, text):
- """
- Apply a set of transforms to a block of text.
- """
- for search, subst in xforms:
- text = search.sub(subst, text)
- return text
-
multi_space = KernRe(r'\s\s+')
def trim_whitespace(s):
"""
@@ -215,15 +85,9 @@ def trim_private_members(text):
"""
Remove ``struct``/``enum`` members that have been marked "private".
"""
- # First look for a "public:" block that ends a private region, then
- # handle the "private until the end" case.
- #
- text = KernRe(r'/\*\s*private:.*?/\*\s*public:.*?\*/', flags=re.S).sub('', text)
- text = KernRe(r'/\*\s*private:.*', flags=re.S).sub('', text)
- #
- # We needed the comments to do the above, but now we can take them out.
- #
- return KernRe(r'\s*/\*.*?\*/\s*', flags=re.S).sub('', text).strip()
+
+ tokens = CTokenizer(text)
+ return str(tokens)
class state:
"""
@@ -276,7 +140,7 @@ class KernelEntry:
self.parametertypes = {}
self.parameterdesc_start_lines = {}
- self.section_start_lines = {}
+ self.sections_start_lines = {}
self.sections = {}
self.anon_struct_union = False
@@ -356,7 +220,7 @@ class KernelEntry:
self.sections[name] += '\n' + contents
else:
self.sections[name] = contents
- self.section_start_lines[name] = self.new_start_line
+ self.sections_start_lines[name] = self.new_start_line
self.new_start_line = 0
# self.config.log.debug("Section: %s : %s", name, pformat(vars(self)))
@@ -382,11 +246,15 @@ class KernelDoc:
#: String to write when a parameter is not described.
undescribed = "-- undescribed --"
- def __init__(self, config, fname):
+ def __init__(self, config, fname, xforms, store_src=False):
"""Initialize internal variables"""
self.fname = fname
self.config = config
+ self.xforms = xforms
+ self.store_src = store_src
+
+ tokenizer_set_log(self.config.log, f"{self.fname}: CMatch: ")
# Initial state for the state machines
self.state = state.NORMAL
@@ -449,7 +317,7 @@ class KernelDoc:
for section in ["Description", "Return"]:
if section in sections and not sections[section].rstrip():
del sections[section]
- item.set_sections(sections, self.entry.section_start_lines)
+ item.set_sections(sections, self.entry.sections_start_lines)
item.set_params(self.entry.parameterlist, self.entry.parameterdescs,
self.entry.parametertypes,
self.entry.parameterdesc_start_lines)
@@ -849,13 +717,15 @@ class KernelDoc:
return declaration
- def dump_struct(self, ln, proto):
+ def dump_struct(self, ln, proto, source):
"""
Store an entry for a ``struct`` or ``union``
"""
#
# Do the basic parse to get the pieces of the declaration.
#
+ source = source
+ proto = trim_private_members(proto)
struct_parts = self.split_struct_proto(proto)
if not struct_parts:
self.emit_msg(ln, f"{proto} error: Cannot parse struct or union!")
@@ -869,12 +739,8 @@ class KernelDoc:
#
# Go through the list of members applying all of our transformations.
#
- members = trim_private_members(members)
- members = apply_transforms(struct_xforms, members)
+ members = self.xforms.apply("struct", members)
- nested = NestedMatch()
- for search, sub in struct_nested_prefixes:
- members = nested.sub(search, sub, members)
#
# Deal with embedded struct and union members, and drop enums entirely.
#
@@ -888,10 +754,11 @@ class KernelDoc:
declaration_name)
self.check_sections(ln, declaration_name, decl_type)
self.output_declaration(decl_type, declaration_name,
+ source=source,
definition=self.format_struct_decl(declaration),
purpose=self.entry.declaration_purpose)
- def dump_enum(self, ln, proto):
+ def dump_enum(self, ln, proto, source):
"""
Store an ``enum`` inside self.entries array.
"""
@@ -899,6 +766,8 @@ class KernelDoc:
# Strip preprocessor directives. Note that this depends on the
# trailing semicolon we added in process_proto_type().
#
+ source = source
+ proto = trim_private_members(proto)
proto = KernRe(r'#\s*((define|ifdef|if)\s+|endif)[^;]*;', flags=re.S).sub('', proto)
#
# Parse out the name and members of the enum. Typedef form first.
@@ -906,7 +775,7 @@ class KernelDoc:
r = KernRe(r'typedef\s+enum\s*\{(.*)\}\s*(\w*)\s*;')
if r.search(proto):
declaration_name = r.group(2)
- members = trim_private_members(r.group(1))
+ members = r.group(1)
#
# Failing that, look for a straight enum
#
@@ -914,7 +783,7 @@ class KernelDoc:
r = KernRe(r'enum\s+(\w*)\s*\{(.*)\}')
if r.match(proto):
declaration_name = r.group(1)
- members = trim_private_members(r.group(2))
+ members = r.group(2)
#
# OK, this isn't going to work.
#
@@ -943,9 +812,10 @@ class KernelDoc:
member_set = set()
members = KernRe(r'\([^;)]*\)').sub('', members)
for arg in members.split(','):
- if not arg:
- continue
arg = KernRe(r'^\s*(\w+).*').sub(r'\1', arg)
+ if not arg.strip():
+ continue
+
self.entry.parameterlist.append(arg)
if arg not in self.entry.parameterdescs:
self.entry.parameterdescs[arg] = self.undescribed
@@ -961,29 +831,23 @@ class KernelDoc:
f"Excess enum value '@{k}' description in '{declaration_name}'")
self.output_declaration('enum', declaration_name,
+ source=source,
purpose=self.entry.declaration_purpose)
- def dump_var(self, ln, proto):
+ def dump_var(self, ln, proto, source):
"""
Store variables that are part of kAPI.
"""
VAR_ATTRIBS = [
"extern",
+ "const",
]
- OPTIONAL_VAR_ATTR = "^(?:" + "|".join(VAR_ATTRIBS) + ")?"
-
- sub_prefixes = [
- (KernRe(r"__read_mostly"), ""),
- (KernRe(r"__ro_after_init"), ""),
- (KernRe(r"(?://.*)$"), ""),
- (KernRe(r"(?:/\*.*\*/)"), ""),
- (KernRe(r";$"), ""),
- (KernRe(r"=.*"), ""),
- ]
+ OPTIONAL_VAR_ATTR = r"^(?:\b(?:" +"|".join(VAR_ATTRIBS) +r")\b\s*)*"
#
# Store the full prototype before modifying it
#
+ source = source
full_proto = proto
declaration_name = None
@@ -1004,8 +868,7 @@ class KernelDoc:
# Drop comments and macros to have a pure C prototype
#
if not declaration_name:
- for r, sub in sub_prefixes:
- proto = r.sub(sub, proto)
+ proto = self.xforms.apply("var", proto)
proto = proto.rstrip()
@@ -1015,17 +878,17 @@ class KernelDoc:
default_val = None
- r= KernRe(OPTIONAL_VAR_ATTR + r"\w.*\s+(?:\*+)?([\w_]+)\s*[\d\]\[]*\s*(=.*)?")
+ r= KernRe(OPTIONAL_VAR_ATTR + r"\s*[\w_\s]*\s+(?:\*+)?([\w_]+)\s*[\d\]\[]*\s*(=.*)?")
if r.match(proto):
if not declaration_name:
declaration_name = r.group(1)
default_val = r.group(2)
else:
- r= KernRe(OPTIONAL_VAR_ATTR + r"(?:\w.*)?\s+(?:\*+)?(?:[\w_]+)\s*[\d\]\[]*\s*(=.*)?")
- if r.match(proto):
- default_val = r.group(1)
+ r= KernRe(OPTIONAL_VAR_ATTR + r"(?:[\w_\s]*)?\s+(?:\*+)?(?:[\w_]+)\s*[\d\]\[]*\s*(=.*)?")
+ if r.match(proto):
+ default_val = r.group(1)
if not declaration_name:
self.emit_msg(ln,f"{proto}: can't parse variable")
return
@@ -1034,39 +897,38 @@ class KernelDoc:
default_val = default_val.lstrip("=").strip()
self.output_declaration("var", declaration_name,
+ source=source,
full_proto=full_proto,
default_val=default_val,
purpose=self.entry.declaration_purpose)
- def dump_declaration(self, ln, prototype):
+ def dump_declaration(self, ln, prototype, source):
"""
Store a data declaration inside self.entries array.
"""
if self.entry.decl_type == "enum":
- self.dump_enum(ln, prototype)
+ self.dump_enum(ln, prototype, source)
elif self.entry.decl_type == "typedef":
- self.dump_typedef(ln, prototype)
+ self.dump_typedef(ln, prototype, source)
elif self.entry.decl_type in ["union", "struct"]:
- self.dump_struct(ln, prototype)
+ self.dump_struct(ln, prototype, source)
elif self.entry.decl_type == "var":
- self.dump_var(ln, prototype)
+ self.dump_var(ln, prototype, source)
else:
# This would be a bug
self.emit_message(ln, f'Unknown declaration type: {self.entry.decl_type}')
- def dump_function(self, ln, prototype):
+ def dump_function(self, ln, prototype, source):
"""
Store a function or function macro inside self.entries array.
"""
+ source = source
found = func_macro = False
return_type = ''
decl_type = 'function'
- #
- # Apply the initial transformations.
- #
- prototype = apply_transforms(function_xforms, prototype)
+
#
# If we have a macro, remove the "#define" at the front.
#
@@ -1085,6 +947,11 @@ class KernelDoc:
declaration_name = r.group(1)
func_macro = True
found = True
+ else:
+ #
+ # Apply the initial transformations.
+ #
+ prototype = self.xforms.apply("func", prototype)
# Yes, this truly is vile. We are looking for:
# 1. Return type (may be nothing if we're looking at a macro)
@@ -1150,13 +1017,14 @@ class KernelDoc:
# Store the result.
#
self.output_declaration(decl_type, declaration_name,
+ source=source,
typedef=('typedef' in return_type),
functiontype=return_type,
purpose=self.entry.declaration_purpose,
func_macro=func_macro)
- def dump_typedef(self, ln, proto):
+ def dump_typedef(self, ln, proto, source):
"""
Store a ``typedef`` inside self.entries array.
"""
@@ -1167,6 +1035,8 @@ class KernelDoc:
typedef_ident = r'\*?\s*(\w\S+)\s*'
typedef_args = r'\s*\((.*)\);'
+ source = source
+
typedef1 = KernRe(typedef_type + r'\(' + typedef_ident + r'\)' + typedef_args)
typedef2 = KernRe(typedef_type + typedef_ident + typedef_args)
@@ -1187,6 +1057,7 @@ class KernelDoc:
self.create_parameter_list(ln, 'function', args, ',', declaration_name)
self.output_declaration('function', declaration_name,
+ source=source,
typedef=True,
functiontype=return_type,
purpose=self.entry.declaration_purpose)
@@ -1204,6 +1075,7 @@ class KernelDoc:
return
self.output_declaration('typedef', declaration_name,
+ source=source,
purpose=self.entry.declaration_purpose)
return
@@ -1241,7 +1113,7 @@ class KernelDoc:
function_set.add(symbol)
return True
- def process_normal(self, ln, line):
+ def process_normal(self, ln, line, source):
"""
STATE_NORMAL: looking for the ``/**`` to begin everything.
"""
@@ -1255,7 +1127,7 @@ class KernelDoc:
# next line is always the function name
self.state = state.NAME
- def process_name(self, ln, line):
+ def process_name(self, ln, line, source):
"""
STATE_NAME: Looking for the "name - description" line
"""
@@ -1388,7 +1260,7 @@ class KernelDoc:
return False
- def process_decl(self, ln, line):
+ def process_decl(self, ln, line, source):
"""
STATE_DECLARATION: We've seen the beginning of a declaration.
"""
@@ -1417,7 +1289,7 @@ class KernelDoc:
self.emit_msg(ln, f"bad line: {line}")
- def process_special(self, ln, line):
+ def process_special(self, ln, line, source):
"""
STATE_SPECIAL_SECTION: a section ending with a blank line.
"""
@@ -1468,7 +1340,7 @@ class KernelDoc:
# Unknown line, ignore
self.emit_msg(ln, f"bad line: {line}")
- def process_body(self, ln, line):
+ def process_body(self, ln, line, source):
"""
STATE_BODY: the bulk of a kerneldoc comment.
"""
@@ -1482,7 +1354,7 @@ class KernelDoc:
# Unknown line, ignore
self.emit_msg(ln, f"bad line: {line}")
- def process_inline_name(self, ln, line):
+ def process_inline_name(self, ln, line, source):
"""STATE_INLINE_NAME: beginning of docbook comments within a prototype."""
if doc_inline_sect.search(line):
@@ -1495,9 +1367,15 @@ class KernelDoc:
elif doc_content.search(line):
self.emit_msg(ln, f"Incorrect use of kernel-doc format: {line}")
self.state = state.PROTO
+
+ #
+ # Don't let it add partial comments at the code, as breaks the
+ # logic meant to remove comments from prototypes.
+ #
+ self.process_proto_type(ln, "/**\n" + line, source)
# else ... ??
- def process_inline_text(self, ln, line):
+ def process_inline_text(self, ln, line, source):
"""STATE_INLINE_TEXT: docbook comments within a prototype."""
if doc_inline_end.search(line):
@@ -1583,7 +1461,7 @@ class KernelDoc:
return proto
- def process_proto_function(self, ln, line):
+ def process_proto_function(self, ln, line, source):
"""Ancillary routine to process a function prototype."""
# strip C99-style comments to end of line
@@ -1625,10 +1503,10 @@ class KernelDoc:
#
# ... and we're done
#
- self.dump_function(ln, self.entry.prototype)
+ self.dump_function(ln, self.entry.prototype, source)
self.reset_state(ln)
- def process_proto_type(self, ln, line):
+ def process_proto_type(self, ln, line, source):
"""
Ancillary routine to process a type.
"""
@@ -1658,7 +1536,7 @@ class KernelDoc:
elif chunk == '}':
self.entry.brcount -= 1
elif chunk == ';' and self.entry.brcount <= 0:
- self.dump_declaration(ln, self.entry.prototype)
+ self.dump_declaration(ln, self.entry.prototype, source)
self.reset_state(ln)
return
#
@@ -1667,7 +1545,7 @@ class KernelDoc:
#
self.entry.prototype += ' '
- def process_proto(self, ln, line):
+ def process_proto(self, ln, line, source):
"""STATE_PROTO: reading a function/whatever prototype."""
if doc_inline_oneline.search(line):
@@ -1679,17 +1557,18 @@ class KernelDoc:
self.state = state.INLINE_NAME
elif self.entry.decl_type == 'function':
- self.process_proto_function(ln, line)
+ self.process_proto_function(ln, line, source)
else:
- self.process_proto_type(ln, line)
+ self.process_proto_type(ln, line, source)
- def process_docblock(self, ln, line):
+ def process_docblock(self, ln, line, source):
"""STATE_DOCBLOCK: within a ``DOC:`` block."""
if doc_end.search(line):
self.dump_section()
- self.output_declaration("doc", self.entry.identifier)
+ self.output_declaration("doc", self.entry.identifier,
+ source=source)
self.reset_state(ln)
elif doc_content.search(line):
@@ -1740,6 +1619,8 @@ class KernelDoc:
prev = ""
prev_ln = None
export_table = set()
+ self.state = state.NORMAL
+ source = ""
try:
with open(self.fname, "r", encoding="utf8",
@@ -1766,6 +1647,12 @@ class KernelDoc:
ln, state.name[self.state],
line)
+ if self.store_src:
+ if source and self.state == state.NORMAL:
+ source = ""
+ elif self.state != state.NORMAL:
+ source += line + "\n"
+
# This is an optimization over the original script.
# There, when export_file was used for the same file,
# it was read twice. Here, we use the already-existing
@@ -1773,8 +1660,11 @@ class KernelDoc:
#
if (self.state != state.NORMAL) or \
not self.process_export(export_table, line):
+ prev_state = self.state
# Hand this line to the appropriate state handler
- self.state_actions[self.state](self, ln, line)
+ self.state_actions[self.state](self, ln, line, source)
+ if prev_state == state.NORMAL and self.state != state.NORMAL:
+ source += line + "\n"
self.emit_unused_warnings()
diff --git a/tools/lib/python/kdoc/kdoc_re.py b/tools/lib/python/kdoc/kdoc_re.py
index 0bf9e01cdc57..28292efe25a2 100644
--- a/tools/lib/python/kdoc/kdoc_re.py
+++ b/tools/lib/python/kdoc/kdoc_re.py
@@ -52,7 +52,33 @@ class KernRe:
return self.regex.pattern
def __repr__(self):
- return f're.compile("{self.regex.pattern}")'
+ """
+ Returns a displayable version of the class init.
+ """
+
+ flag_map = {
+ re.IGNORECASE: "re.I",
+ re.MULTILINE: "re.M",
+ re.DOTALL: "re.S",
+ re.VERBOSE: "re.X",
+ }
+
+ flags = []
+ for flag, name in flag_map.items():
+ if self.regex.flags & flag:
+ flags.append(name)
+
+ flags_name = " | ".join(flags)
+
+ max_len = 60
+ pattern = ""
+ for pos in range(0, len(self.regex.pattern), max_len):
+ pattern += '"' + self.regex.pattern[pos:max_len + pos] + '" '
+
+ if flags_name:
+ return f'KernRe({pattern}, {flags_name})'
+ else:
+ return f'KernRe({pattern})'
def __add__(self, other):
"""
@@ -78,6 +104,13 @@ class KernRe:
self.last_match = self.regex.search(string)
return self.last_match
+ def finditer(self, string):
+ """
+ Alias to re.finditer.
+ """
+
+ return self.regex.finditer(string)
+
def findall(self, string):
"""
Alias to re.findall.
@@ -106,173 +139,9 @@ class KernRe:
return self.last_match.group(num)
-
-class NestedMatch:
- """
- Finding nested delimiters is hard with regular expressions. It is
- even harder on Python with its normal re module, as there are several
- advanced regular expressions that are missing.
-
- This is the case of this pattern::
-
- '\\bSTRUCT_GROUP(\\(((?:(?>[^)(]+)|(?1))*)\\))[^;]*;'
-
- which is used to properly match open/close parentheses of the
- string search STRUCT_GROUP(),
-
- Add a class that counts pairs of delimiters, using it to match and
- replace nested expressions.
-
- The original approach was suggested by:
-
- https://stackoverflow.com/questions/5454322/python-how-to-match-nested-parentheses-with-regex
-
- Although I re-implemented it to make it more generic and match 3 types
- of delimiters. The logic checks if delimiters are paired. If not, it
- will ignore the search string.
- """
-
- # TODO: make NestedMatch handle multiple match groups
- #
- # Right now, regular expressions to match it are defined only up to
- # the start delimiter, e.g.:
- #
- # \bSTRUCT_GROUP\(
- #
- # is similar to: STRUCT_GROUP\((.*)\)
- # except that the content inside the match group is delimiter-aligned.
- #
- # The content inside parentheses is converted into a single replace
- # group (e.g. r`\1').
- #
- # It would be nice to change such definition to support multiple
- # match groups, allowing a regex equivalent to:
- #
- # FOO\((.*), (.*), (.*)\)
- #
- # it is probably easier to define it not as a regular expression, but
- # with some lexical definition like:
- #
- # FOO(arg1, arg2, arg3)
-
- DELIMITER_PAIRS = {
- '{': '}',
- '(': ')',
- '[': ']',
- }
-
- RE_DELIM = re.compile(r'[\{\}\[\]\(\)]')
-
- def _search(self, regex, line):
- """
- Finds paired blocks for a regex that ends with a delimiter.
-
- The suggestion of using finditer to match pairs came from:
- https://stackoverflow.com/questions/5454322/python-how-to-match-nested-parentheses-with-regex
- but I ended using a different implementation to align all three types
- of delimiters and seek for an initial regular expression.
-
- The algorithm seeks for open/close paired delimiters and places them
- into a stack, yielding a start/stop position of each match when the
- stack is zeroed.
-
- The algorithm should work fine for properly paired lines, but will
- silently ignore end delimiters that precede a start delimiter.
- This should be OK for kernel-doc parser, as unaligned delimiters
- would cause compilation errors. So, we don't need to raise exceptions
- to cover such issues.
- """
-
- stack = []
-
- for match_re in regex.finditer(line):
- start = match_re.start()
- offset = match_re.end()
-
- d = line[offset - 1]
- if d not in self.DELIMITER_PAIRS:
- continue
-
- end = self.DELIMITER_PAIRS[d]
- stack.append(end)
-
- for match in self.RE_DELIM.finditer(line[offset:]):
- pos = match.start() + offset
-
- d = line[pos]
-
- if d in self.DELIMITER_PAIRS:
- end = self.DELIMITER_PAIRS[d]
-
- stack.append(end)
- continue
-
- # Does the end delimiter match what is expected?
- if stack and d == stack[-1]:
- stack.pop()
-
- if not stack:
- yield start, offset, pos + 1
- break
-
- def search(self, regex, line):
+ def groups(self):
"""
- This is similar to re.search:
-
- It matches a regex that it is followed by a delimiter,
- returning occurrences only if all delimiters are paired.
+ Returns the group results of the last match
"""
- for t in self._search(regex, line):
-
- yield line[t[0]:t[2]]
-
- def sub(self, regex, sub, line, count=0):
- r"""
- This is similar to re.sub:
-
- It matches a regex that it is followed by a delimiter,
- replacing occurrences only if all delimiters are paired.
-
- if the sub argument contains::
-
- r'\1'
-
- it will work just like re: it places there the matched paired data
- with the delimiter stripped.
-
- If count is different than zero, it will replace at most count
- items.
- """
- out = ""
-
- cur_pos = 0
- n = 0
-
- for start, end, pos in self._search(regex, line):
- out += line[cur_pos:start]
-
- # Value, ignoring start/end delimiters
- value = line[end:pos - 1]
-
- # replaces \1 at the sub string, if \1 is used there
- new_sub = sub
- new_sub = new_sub.replace(r'\1', value)
-
- out += new_sub
-
- # Drop end ';' if any
- if line[pos] == ';':
- pos += 1
-
- cur_pos = pos
- n += 1
-
- if count and count >= n:
- break
-
- # Append the remaining string
- l = len(line)
- out += line[cur_pos:l]
-
- return out
+ return self.last_match.groups()
diff --git a/tools/lib/python/kdoc/kdoc_yaml_file.py b/tools/lib/python/kdoc/kdoc_yaml_file.py
new file mode 100644
index 000000000000..0be020d50df0
--- /dev/null
+++ b/tools/lib/python/kdoc/kdoc_yaml_file.py
@@ -0,0 +1,178 @@
+#!/usr/bin/env python3
+# SPDX-License-Identifier: GPL-2.0
+# Copyright(c) 2026: Mauro Carvalho Chehab <mchehab@kernel.org>.
+
+import os
+
+from kdoc.kdoc_output import ManFormat, RestFormat
+
+
+class KDocTestFile():
+ """
+ Handles the logic needed to store kernel‑doc output inside a YAML file.
+ Useful for unit tests and regression tests.
+ """
+
+ def __init__(self, config, yaml_file, yaml_content):
+ #
+ # Bail out early if yaml is not available
+ #
+ try:
+ import yaml
+ except ImportError:
+ sys.exit("Warning: yaml package not available. Aborting it.")
+
+ self.config = config
+ self.test_file = os.path.expanduser(yaml_file)
+ self.yaml_content = yaml_content
+ self.test_names = set()
+
+ self.tests = []
+
+ out_dir = os.path.dirname(self.test_file)
+ if out_dir and not os.path.isdir(out_dir):
+ sys.exit(f"Directory {out_dir} doesn't exist.")
+
+ self.out_style = []
+
+ if "man" in self.yaml_content:
+ out_style = ManFormat()
+ out_style.set_config(self.config)
+
+ self.out_style.append(out_style)
+
+ if "rst" in self.yaml_content:
+ out_style = RestFormat()
+ out_style.set_config(self.config)
+
+ self.out_style.append(out_style)
+
+ def set_filter(self, export, internal, symbol, nosymbol,
+ function_table, enable_lineno, no_doc_sections):
+ """
+ Set filters at the output classes.
+ """
+ for out_style in self.out_style:
+ out_style.set_filter(export, internal, symbol,
+ nosymbol, function_table,
+ enable_lineno, no_doc_sections)
+
+ @staticmethod
+ def get_kdoc_item(arg, start_line=1):
+
+ d = vars(arg)
+
+ declaration_start_line = d.get("declaration_start_line")
+ if not declaration_start_line:
+ return d
+
+ d["declaration_start_line"] = start_line
+
+ parameterdesc_start_lines = d.get("parameterdesc_start_lines")
+ if parameterdesc_start_lines:
+ for key in parameterdesc_start_lines:
+ ln = parameterdesc_start_lines[key]
+ ln += start_line - declaration_start_line
+
+ parameterdesc_start_lines[key] = ln
+
+ sections_start_lines = d.get("sections_start_lines")
+ if sections_start_lines:
+ for key in sections_start_lines:
+ ln = sections_start_lines[key]
+ ln += start_line - declaration_start_line
+
+ sections_start_lines[key] = ln
+
+ return d
+
+ def output_symbols(self, fname, symbols):
+ """
+ Store source, symbols and output strings at self.tests.
+ """
+
+ #
+ # KdocItem needs to be converted into dicts
+ #
+ kdoc_item = []
+ expected = []
+
+ #
+ # Source code didn't produce any symbol
+ #
+ if not symbols:
+ return
+
+ expected_dict = {}
+ start_line=1
+
+ for arg in symbols:
+ source = arg.get("source", "")
+
+ if arg and "KdocItem" in self.yaml_content:
+ msg = self.get_kdoc_item(arg)
+
+ other_stuff = msg.get("other_stuff", {})
+ if "source" in other_stuff:
+ del other_stuff["source"]
+
+ expected_dict["kdoc_item"] = msg
+
+ base_name = arg.name
+ if not base_name:
+ base_name = fname
+ base_name = base_name.lower().replace(".", "_").replace("/", "_")
+
+
+ # Don't add duplicated names
+ i = 0
+ name = base_name
+ while name in self.test_names:
+ i += 1
+ name = f"{base_name}_{i:03d}"
+
+ self.test_names.add(name)
+
+ for out_style in self.out_style:
+ if isinstance(out_style, ManFormat):
+ key = "man"
+ else:
+ key = "rst"
+
+ expected_dict[key]= out_style.output_symbols(fname, [arg]).strip()
+
+ test = {
+ "name": name,
+ "description": f"{fname} line {arg.declaration_start_line}",
+ "fname": fname,
+ "source": source,
+ "expected": [expected_dict]
+ }
+
+ self.tests.append(test)
+
+ expected_dict = {}
+
+ def write(self):
+ """
+ Output the content of self.tests to self.test_file.
+ """
+ import yaml
+
+ # Helper function to better handle multilines
+ def str_presenter(dumper, data):
+ if "\n" in data:
+ return dumper.represent_scalar("tag:yaml.org,2002:str", data, style="|")
+
+ return dumper.represent_scalar("tag:yaml.org,2002:str", data)
+
+ # Register the representer
+ yaml.add_representer(str, str_presenter)
+
+ data = {"tests": self.tests}
+
+ with open(self.test_file, "w", encoding="utf-8") as fp:
+ yaml.dump(data, fp,
+ sort_keys=False, width=120, indent=2,
+ default_flow_style=False, allow_unicode=True,
+ explicit_start=False, explicit_end=False)
diff --git a/tools/lib/python/kdoc/xforms_lists.py b/tools/lib/python/kdoc/xforms_lists.py
new file mode 100644
index 000000000000..f6ea9efb11ae
--- /dev/null
+++ b/tools/lib/python/kdoc/xforms_lists.py
@@ -0,0 +1,153 @@
+#!/usr/bin/env python3
+# SPDX-License-Identifier: GPL-2.0
+# Copyright(c) 2026: Mauro Carvalho Chehab <mchehab@kernel.org>.
+
+import re
+
+from kdoc.kdoc_re import KernRe
+from kdoc.c_lex import CMatch, CTokenizer
+
+struct_args_pattern = r"([^,)]+)"
+
+
+class CTransforms:
+ """
+ Data class containing a long set of transformations to turn
+ structure member prefixes, and macro invocations and variables
+ into something we can parse and generate kdoc for.
+ """
+
+ #
+ # NOTE:
+ # Due to performance reasons, place CMatch rules before KernRe,
+ # as this avoids running the C parser every time.
+ #
+
+ #: Transforms for structs and unions.
+ struct_xforms = [
+ (CMatch("__attribute__"), ""),
+ (CMatch("__aligned"), ""),
+ (CMatch("__counted_by"), ""),
+ (CMatch("__counted_by_(le|be)"), ""),
+ (CMatch("__guarded_by"), ""),
+ (CMatch("__pt_guarded_by"), ""),
+ (CMatch("__packed"), ""),
+ (CMatch("CRYPTO_MINALIGN_ATTR"), ""),
+ (CMatch("__private"), ""),
+ (CMatch("__rcu"), ""),
+ (CMatch("____cacheline_aligned_in_smp"), ""),
+ (CMatch("____cacheline_aligned"), ""),
+ (CMatch("__cacheline_group_(?:begin|end)"), ""),
+ (CMatch("__ETHTOOL_DECLARE_LINK_MODE_MASK"), r"DECLARE_BITMAP(\1, __ETHTOOL_LINK_MODE_MASK_NBITS)"),
+ (CMatch("DECLARE_PHY_INTERFACE_MASK",),r"DECLARE_BITMAP(\1, PHY_INTERFACE_MODE_MAX)"),
+ (CMatch("DECLARE_BITMAP"), r"unsigned long \1[BITS_TO_LONGS(\2)]"),
+ (CMatch("DECLARE_HASHTABLE"), r"unsigned long \1[1 << ((\2) - 1)]"),
+ (CMatch("DECLARE_KFIFO"), r"\2 *\1"),
+ (CMatch("DECLARE_KFIFO_PTR"), r"\2 *\1"),
+ (CMatch("(?:__)?DECLARE_FLEX_ARRAY"), r"\1 \2[]"),
+ (CMatch("DEFINE_DMA_UNMAP_ADDR"), r"dma_addr_t \1"),
+ (CMatch("DEFINE_DMA_UNMAP_LEN"), r"__u32 \1"),
+ (CMatch("VIRTIO_DECLARE_FEATURES"), r"union { u64 \1; u64 \1_array[VIRTIO_FEATURES_U64S]; }"),
+ (CMatch("__cond_acquires"), ""),
+ (CMatch("__cond_releases"), ""),
+ (CMatch("__acquires"), ""),
+ (CMatch("__releases"), ""),
+ (CMatch("__must_hold"), ""),
+ (CMatch("__must_not_hold"), ""),
+ (CMatch("__must_hold_shared"), ""),
+ (CMatch("__cond_acquires_shared"), ""),
+ (CMatch("__acquires_shared"), ""),
+ (CMatch("__releases_shared"), ""),
+ (CMatch("__attribute__"), ""),
+
+ #
+ # Macro __struct_group() creates an union with an anonymous
+ # and a non-anonymous struct, depending on the parameters. We only
+ # need one of those at kernel-doc, as we won't be documenting the same
+ # members twice.
+ #
+ (CMatch("struct_group"), r"struct { \2+ };"),
+ (CMatch("struct_group_attr"), r"struct { \3+ };"),
+ (CMatch("struct_group_tagged"), r"struct { \3+ };"),
+ (CMatch("__struct_group"), r"struct { \4+ };"),
+ ]
+
+ #: Transforms for function prototypes.
+ function_xforms = [
+ (CMatch("static"), ""),
+ (CMatch("extern"), ""),
+ (CMatch("asmlinkage"), ""),
+ (CMatch("inline"), ""),
+ (CMatch("__inline__"), ""),
+ (CMatch("__inline"), ""),
+ (CMatch("__always_inline"), ""),
+ (CMatch("noinline"), ""),
+ (CMatch("__FORTIFY_INLINE"), ""),
+ (CMatch("__init"), ""),
+ (CMatch("__init_or_module"), ""),
+ (CMatch("__exit"), ""),
+ (CMatch("__deprecated"), ""),
+ (CMatch("__flatten"), ""),
+ (CMatch("__meminit"), ""),
+ (CMatch("__must_check"), ""),
+ (CMatch("__weak"), ""),
+ (CMatch("__sched"), ""),
+ (CMatch("__always_unused"), ""),
+ (CMatch("__printf"), ""),
+ (CMatch("__(?:re)?alloc_size"), ""),
+ (CMatch("__diagnose_as"), ""),
+ (CMatch("DECL_BUCKET_PARAMS"), r"\1, \2"),
+ (CMatch("__no_context_analysis"), ""),
+ (CMatch("__attribute_const__"), ""),
+ (CMatch("__attribute__"), ""),
+
+ #
+ # HACK: this is similar to process_export() hack. It is meant to
+ # drop _noproof from function name. See for instance:
+ # ahash_request_alloc kernel-doc declaration at include/crypto/hash.h.
+ #
+ (KernRe("_noprof"), ""),
+ ]
+
+ #: Transforms for variable prototypes.
+ var_xforms = [
+ (CMatch("__read_mostly"), ""),
+ (CMatch("__ro_after_init"), ""),
+ (CMatch("__guarded_by"), ""),
+ (CMatch("__pt_guarded_by"), ""),
+ (CMatch("LIST_HEAD"), r"struct list_head \1"),
+
+ (KernRe(r"(?://.*)$"), ""),
+ (KernRe(r"(?:/\*.*\*/)"), ""),
+ (KernRe(r";$"), ""),
+ ]
+
+ #: Transforms main dictionary used at apply_transforms().
+ xforms = {
+ "struct": struct_xforms,
+ "func": function_xforms,
+ "var": var_xforms,
+ }
+
+ def apply(self, xforms_type, source):
+ """
+ Apply a set of transforms to a block of source.
+
+ As tokenizer is used here, this function also remove comments
+ at the end.
+ """
+ if xforms_type not in self.xforms:
+ return source
+
+ if isinstance(source, str):
+ source = CTokenizer(source)
+
+ for search, subst in self.xforms[xforms_type]:
+ #
+ # KernRe only accept strings.
+ #
+ if isinstance(search, KernRe):
+ source = str(source)
+
+ source = search.sub(subst, source)
+ return str(source)
diff --git a/tools/lib/python/unittest_helper.py b/tools/lib/python/unittest_helper.py
new file mode 100755
index 000000000000..f3cba5120401
--- /dev/null
+++ b/tools/lib/python/unittest_helper.py
@@ -0,0 +1,363 @@
+#!/usr/bin/env python3
+# SPDX-License-Identifier: GPL-2.0
+# Copyright(c) 2025-2026: Mauro Carvalho Chehab <mchehab@kernel.org>.
+#
+# pylint: disable=C0103,R0912,R0914,E1101
+
+"""
+Provides helper functions and classes execute python unit tests.
+
+Those help functions provide a nice colored output summary of each
+executed test and, when a test fails, it shows the different in diff
+format when running in verbose mode, like::
+
+ $ tools/unittests/nested_match.py -v
+ ...
+ Traceback (most recent call last):
+ File "/new_devel/docs/tools/unittests/nested_match.py", line 69, in test_count_limit
+ self.assertEqual(replaced, "bar(a); bar(b); foo(c)")
+ ~~~~~~~~~~~~~~~~^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+ AssertionError: 'bar(a) foo(b); foo(c)' != 'bar(a); bar(b); foo(c)'
+ - bar(a) foo(b); foo(c)
+ ? ^^^^
+ + bar(a); bar(b); foo(c)
+ ? ^^^^^
+ ...
+
+It also allows filtering what tests will be executed via ``-k`` parameter.
+
+Typical usage is to do::
+
+ from unittest_helper import run_unittest
+ ...
+
+ if __name__ == "__main__":
+ run_unittest(__file__)
+
+If passing arguments is needed, on a more complex scenario, it can be
+used like on this example::
+
+ from unittest_helper import TestUnits, run_unittest
+ ...
+ env = {'sudo': ""}
+ ...
+ if __name__ == "__main__":
+ runner = TestUnits()
+ base_parser = runner.parse_args()
+ base_parser.add_argument('--sudo', action='store_true',
+ help='Enable tests requiring sudo privileges')
+
+ args = base_parser.parse_args()
+
+ # Update module-level flag
+ if args.sudo:
+ env['sudo'] = "1"
+
+ # Run tests with customized arguments
+ runner.run(__file__, parser=base_parser, args=args, env=env)
+"""
+
+import argparse
+import atexit
+import os
+import re
+import unittest
+import sys
+
+from unittest.mock import patch
+
+
+class Summary(unittest.TestResult):
+ """
+ Overrides ``unittest.TestResult`` class to provide a nice colored
+ summary. When in verbose mode, displays actual/expected difference in
+ unified diff format.
+ """
+ def __init__(self, *args, **kwargs):
+ super().__init__(*args, **kwargs)
+
+ #: Dictionary to store organized test results.
+ self.test_results = {}
+
+ #: max length of the test names.
+ self.max_name_length = 0
+
+ def startTest(self, test):
+ super().startTest(test)
+ test_id = test.id()
+ parts = test_id.split(".")
+
+ # Extract module, class, and method names
+ if len(parts) >= 3:
+ module_name = parts[-3]
+ else:
+ module_name = ""
+ if len(parts) >= 2:
+ class_name = parts[-2]
+ else:
+ class_name = ""
+
+ method_name = parts[-1]
+
+ # Build the hierarchical structure
+ if module_name not in self.test_results:
+ self.test_results[module_name] = {}
+
+ if class_name not in self.test_results[module_name]:
+ self.test_results[module_name][class_name] = []
+
+ # Track maximum test name length for alignment
+ display_name = f"{method_name}:"
+
+ self.max_name_length = max(len(display_name), self.max_name_length)
+
+ def _record_test(self, test, status):
+ test_id = test.id()
+ parts = test_id.split(".")
+ if len(parts) >= 3:
+ module_name = parts[-3]
+ else:
+ module_name = ""
+ if len(parts) >= 2:
+ class_name = parts[-2]
+ else:
+ class_name = ""
+ method_name = parts[-1]
+ self.test_results[module_name][class_name].append((method_name, status))
+
+ def addSuccess(self, test):
+ super().addSuccess(test)
+ self._record_test(test, "OK")
+
+ def addFailure(self, test, err):
+ super().addFailure(test, err)
+ self._record_test(test, "FAIL")
+
+ def addError(self, test, err):
+ super().addError(test, err)
+ self._record_test(test, "ERROR")
+
+ def addSkip(self, test, reason):
+ super().addSkip(test, reason)
+ self._record_test(test, f"SKIP ({reason})")
+
+ def printResults(self, verbose):
+ """
+ Print results using colors if tty.
+ """
+ # Check for ANSI color support
+ use_color = sys.stdout.isatty()
+ COLORS = {
+ "OK": "\033[32m", # Green
+ "FAIL": "\033[31m", # Red
+ "SKIP": "\033[1;33m", # Yellow
+ "PARTIAL": "\033[33m", # Orange
+ "EXPECTED_FAIL": "\033[36m", # Cyan
+ "reset": "\033[0m", # Reset to default terminal color
+ }
+ if not use_color:
+ for c in COLORS:
+ COLORS[c] = ""
+
+ # Calculate maximum test name length
+ if not self.test_results:
+ return
+ try:
+ lengths = []
+ for module in self.test_results.values():
+ for tests in module.values():
+ for test_name, _ in tests:
+ lengths.append(len(test_name) + 1) # +1 for colon
+ max_length = max(lengths) + 2 # Additional padding
+ except ValueError:
+ sys.exit("Test list is empty")
+
+ # Print results
+ for module_name, classes in self.test_results.items():
+ if verbose:
+ print(f"{module_name}:")
+ for class_name, tests in classes.items():
+ if verbose:
+ print(f" {class_name}:")
+ for test_name, status in tests:
+ if not verbose and status in [ "OK", "EXPECTED_FAIL" ]:
+ continue
+
+ # Get base status without reason for SKIP
+ if status.startswith("SKIP"):
+ status_code = status.split()[0]
+ else:
+ status_code = status
+ color = COLORS.get(status_code, "")
+ print(
+ f" {test_name + ':':<{max_length}}{color}{status}{COLORS['reset']}"
+ )
+ if verbose:
+ print()
+
+ # Print summary
+ print(f"\nRan {self.testsRun} tests", end="")
+ if hasattr(self, "timeTaken"):
+ print(f" in {self.timeTaken:.3f}s", end="")
+ print()
+
+ if not self.wasSuccessful():
+ print(f"\n{COLORS['FAIL']}FAILED (", end="")
+ failures = getattr(self, "failures", [])
+ errors = getattr(self, "errors", [])
+ if failures:
+ print(f"failures={len(failures)}", end="")
+ if errors:
+ if failures:
+ print(", ", end="")
+ print(f"errors={len(errors)}", end="")
+ print(f"){COLORS['reset']}")
+
+
+def flatten_suite(suite):
+ """Flatten test suite hierarchy."""
+ tests = []
+ for item in suite:
+ if isinstance(item, unittest.TestSuite):
+ tests.extend(flatten_suite(item))
+ else:
+ tests.append(item)
+ return tests
+
+
+class TestUnits:
+ """
+ Helper class to set verbosity level.
+
+ This class discover test files, import its unittest classes and
+ executes the test on it.
+ """
+ def parse_args(self):
+ """Returns a parser for command line arguments."""
+ parser = argparse.ArgumentParser(description="Test runner with regex filtering")
+ parser.add_argument("-v", "--verbose", action="count", default=1)
+ parser.add_argument("-q", "--quiet", action="store_true")
+ parser.add_argument("-f", "--failfast", action="store_true")
+ parser.add_argument("-k", "--keyword",
+ help="Regex pattern to filter test methods")
+ return parser
+
+ def run(self, caller_file=None, pattern=None,
+ suite=None, parser=None, args=None, env=None):
+ """
+ Execute all tests from the unity test file.
+
+ It contains several optional parameters:
+
+ ``caller_file``:
+ - name of the file that contains test.
+
+ typical usage is to place __file__ at the caller test, e.g.::
+
+ if __name__ == "__main__":
+ TestUnits().run(__file__)
+
+ ``pattern``:
+ - optional pattern to match multiple file names. Defaults
+ to basename of ``caller_file``.
+
+ ``suite``:
+ - an unittest suite initialized by the caller using
+ ``unittest.TestLoader().discover()``.
+
+ ``parser``:
+ - an argparse parser. If not defined, this helper will create
+ one.
+
+ ``args``:
+ - an ``argparse.Namespace`` data filled by the caller.
+
+ ``env``:
+ - environment variables that will be passed to the test suite
+
+ At least ``caller_file`` or ``suite`` must be used, otherwise a
+ ``TypeError`` will be raised.
+ """
+ if not args:
+ if not parser:
+ parser = self.parse_args()
+ args = parser.parse_args()
+
+ if not caller_file and not suite:
+ raise TypeError("Either caller_file or suite is needed at TestUnits")
+
+ if args.quiet:
+ verbose = 0
+ else:
+ verbose = args.verbose
+
+ if not env:
+ env = os.environ.copy()
+
+ env["VERBOSE"] = f"{verbose}"
+
+ patcher = patch.dict(os.environ, env)
+ patcher.start()
+ # ensure it gets stopped after
+ atexit.register(patcher.stop)
+
+
+ if verbose >= 2:
+ unittest.TextTestRunner(verbosity=verbose).run = lambda suite: suite
+
+ # Load ONLY tests from the calling file
+ if not suite:
+ if not pattern:
+ pattern = caller_file
+
+ loader = unittest.TestLoader()
+ suite = loader.discover(start_dir=os.path.dirname(caller_file),
+ pattern=os.path.basename(caller_file))
+
+ # Flatten the suite for environment injection
+ tests_to_inject = flatten_suite(suite)
+
+ # Filter tests by method name if -k specified
+ if args.keyword:
+ try:
+ pattern = re.compile(args.keyword)
+ filtered_suite = unittest.TestSuite()
+ for test in tests_to_inject: # Use the pre-flattened list
+ method_name = test.id().split(".")[-1]
+ if pattern.search(method_name):
+ filtered_suite.addTest(test)
+ suite = filtered_suite
+ except re.error as e:
+ sys.stderr.write(f"Invalid regex pattern: {e}\n")
+ sys.exit(1)
+ else:
+ # Maintain original suite structure if no keyword filtering
+ suite = unittest.TestSuite(tests_to_inject)
+
+ if verbose >= 2:
+ resultclass = None
+ else:
+ resultclass = Summary
+
+ runner = unittest.TextTestRunner(verbosity=args.verbose,
+ resultclass=resultclass,
+ failfast=args.failfast)
+ result = runner.run(suite)
+ if resultclass:
+ result.printResults(verbose)
+
+ sys.exit(not result.wasSuccessful())
+
+
+def run_unittest(fname):
+ """
+ Basic usage of TestUnits class.
+
+ Use it when there's no need to pass any extra argument to the tests
+ with. The recommended way is to place this at the end of each
+ unittest module::
+
+ if __name__ == "__main__":
+ run_unittest(__file__)
+ """
+ TestUnits().run(fname)