diff options
Diffstat (limited to 'tools/lib/python/kdoc')
| -rw-r--r-- | tools/lib/python/kdoc/c_lex.py | 662 | ||||
| -rw-r--r-- | tools/lib/python/kdoc/kdoc_files.py | 151 | ||||
| -rw-r--r-- | tools/lib/python/kdoc/kdoc_item.py | 45 | ||||
| -rw-r--r-- | tools/lib/python/kdoc/kdoc_output.py | 324 | ||||
| -rw-r--r-- | tools/lib/python/kdoc/kdoc_parser.py | 292 | ||||
| -rw-r--r-- | tools/lib/python/kdoc/kdoc_re.py | 205 | ||||
| -rw-r--r-- | tools/lib/python/kdoc/kdoc_yaml_file.py | 178 | ||||
| -rw-r--r-- | tools/lib/python/kdoc/xforms_lists.py | 153 |
8 files changed, 1582 insertions, 428 deletions
diff --git a/tools/lib/python/kdoc/c_lex.py b/tools/lib/python/kdoc/c_lex.py new file mode 100644 index 000000000000..cb95f5172448 --- /dev/null +++ b/tools/lib/python/kdoc/c_lex.py @@ -0,0 +1,662 @@ +#!/usr/bin/env python3 +# SPDX-License-Identifier: GPL-2.0 +# Copyright(c) 2025: Mauro Carvalho Chehab <mchehab@kernel.org>. + +""" +Regular expression ancillary classes. + +Those help caching regular expressions and do matching for kernel-doc. + +Please notice that the code here may rise exceptions to indicate bad +usage inside kdoc to indicate problems at the replace pattern. + +Other errors are logged via log instance. +""" + +import logging +import re + +from copy import copy + +from .kdoc_re import KernRe + +log = logging.getLogger(__name__) + +def tokenizer_set_log(logger, prefix = ""): + """ + Replace the module‑level logger with a LoggerAdapter that + prepends *prefix* to every message. + """ + global log + + class PrefixAdapter(logging.LoggerAdapter): + """ + Ancillary class to set prefix on all message logs. + """ + def process(self, msg, kwargs): + return f"{prefix}{msg}", kwargs + + # Wrap the provided logger in our adapter + log = PrefixAdapter(logger, {"prefix": prefix}) + +class CToken(): + """ + Data class to define a C token. + """ + + # Tokens that can be used by the parser. Works like an C enum. + + COMMENT = 0 #: A standard C or C99 comment, including delimiter. + STRING = 1 #: A string, including quotation marks. + CHAR = 2 #: A character, including apostophes. + NUMBER = 3 #: A number. + PUNC = 4 #: A puntuation mark: / ``,`` / ``.``. + BEGIN = 5 #: A begin character: ``{`` / ``[`` / ``(``. + END = 6 #: A end character: ``}`` / ``]`` / ``)``. + CPP = 7 #: A preprocessor macro. + HASH = 8 #: The hash character - useful to handle other macros. + OP = 9 #: A C operator (add, subtract, ...). + STRUCT = 10 #: A ``struct`` keyword. + UNION = 11 #: An ``union`` keyword. + ENUM = 12 #: A ``struct`` keyword. + TYPEDEF = 13 #: A ``typedef`` keyword. + NAME = 14 #: A name. Can be an ID or a type. + SPACE = 15 #: Any space characters, including new lines + ENDSTMT = 16 #: End of an statement (``;``). + + BACKREF = 17 #: Not a valid C sequence, but used at sub regex patterns. + + MISMATCH = 255 #: an error indicator: should never happen in practice. + + # Dict to convert from an enum interger into a string. + _name_by_val = {v: k for k, v in dict(vars()).items() if isinstance(v, int)} + + # Dict to convert from string to an enum-like integer value. + _name_to_val = {k: v for v, k in _name_by_val.items()} + + @staticmethod + def to_name(val): + """Convert from an integer value from CToken enum into a string""" + + return CToken._name_by_val.get(val, f"UNKNOWN({val})") + + @staticmethod + def from_name(name): + """Convert a string into a CToken enum value""" + if name in CToken._name_to_val: + return CToken._name_to_val[name] + + return CToken.MISMATCH + + + def __init__(self, kind, value=None, pos=0, + brace_level=0, paren_level=0, bracket_level=0): + self.kind = kind + self.value = value + self.pos = pos + self.level = (bracket_level, paren_level, brace_level) + + def __repr__(self): + name = self.to_name(self.kind) + if isinstance(self.value, str): + value = '"' + self.value + '"' + else: + value = self.value + + return f"CToken(CToken.{name}, {value}, {self.pos}, {self.level})" + +#: Regexes to parse C code, transforming it into tokens. +RE_SCANNER_LIST = [ + # + # Note that \s\S is different than .*, as it also catches \n + # + (CToken.COMMENT, r"//[^\n]*|/\*[\s\S]*?\*/"), + + (CToken.STRING, r'"(?:\\.|[^"\\])*"'), + (CToken.CHAR, r"'(?:\\.|[^'\\])'"), + + (CToken.NUMBER, r"0[xX][\da-fA-F]+[uUlL]*|0[0-7]+[uUlL]*|" + r"\d+(?:\.\d*)?(?:[eE][+-]?\d+)?[fFlL]*"), + + (CToken.ENDSTMT, r"(?:\s+;|;)"), + + (CToken.PUNC, r"[,\.]"), + + (CToken.BEGIN, r"[\[\(\{]"), + + (CToken.END, r"[\]\)\}]"), + + (CToken.CPP, r"#\s*(?:define|include|ifdef|ifndef|if|else|elif|endif|undef|pragma)\b"), + + (CToken.HASH, r"#"), + + (CToken.OP, r"\+\+|\-\-|\->|==|\!=|<=|>=|&&|\|\||<<|>>|\+=|\-=|\*=|/=|%=" + r"|&=|\|=|\^=|[=\+\-\*/%<>&\|\^~!\?\:]"), + + (CToken.STRUCT, r"\bstruct\b"), + (CToken.UNION, r"\bunion\b"), + (CToken.ENUM, r"\benum\b"), + (CToken.TYPEDEF, r"\btypedef\b"), + + (CToken.NAME, r"[A-Za-z_]\w*"), + + (CToken.SPACE, r"\s+"), + + (CToken.BACKREF, r"\\\d+"), + + (CToken.MISMATCH,r"."), +] + +def fill_re_scanner(token_list): + """Ancillary routine to convert RE_SCANNER_LIST into a finditer regex""" + re_tokens = [] + + for kind, pattern in token_list: + name = CToken.to_name(kind) + re_tokens.append(f"(?P<{name}>{pattern})") + + return KernRe("|".join(re_tokens), re.MULTILINE | re.DOTALL) + +#: Handle C continuation lines. +RE_CONT = KernRe(r"\\\n") + +RE_COMMENT_START = KernRe(r'/\*\s*') + +#: tokenizer regex. Will be filled at the first CTokenizer usage. +RE_SCANNER = fill_re_scanner(RE_SCANNER_LIST) + + +class CTokenizer(): + """ + Scan C statements and definitions and produce tokens. + + When converted to string, it drops comments and handle public/private + values, respecting depth. + """ + + # This class is inspired and follows the basic concepts of: + # https://docs.python.org/3/library/re.html#writing-a-tokenizer + + def __init__(self, source=None): + """ + Create a regular expression to handle RE_SCANNER_LIST. + + While I generally don't like using regex group naming via: + (?P<name>...) + + in this particular case, it makes sense, as we can pick the name + when matching a code via RE_SCANNER. + """ + + # + # Store logger to allow parser classes to re-use it + # + global log + self.log = log + + self.tokens = [] + + if not source: + return + + if isinstance(source, list): + self.tokens = source + return + + # + # While we could just use _tokenize directly via interator, + # As we'll need to use the tokenizer several times inside kernel-doc + # to handle macro transforms, cache the results on a list, as + # re-using it is cheaper than having to parse everytime. + # + for tok in self._tokenize(source): + self.tokens.append(tok) + + def _tokenize(self, source): + """ + Iterator that parses ``source``, splitting it into tokens, as defined + at ``self.RE_SCANNER_LIST``. + + The interactor returns a CToken class object. + """ + + # Handle continuation lines. Note that kdoc_parser already has a + # logic to do that. Still, let's keep it for completeness, as we might + # end re-using this tokenizer outsize kernel-doc some day - or we may + # eventually remove from there as a future cleanup. + source = RE_CONT.sub("", source) + + brace_level = 0 + paren_level = 0 + bracket_level = 0 + + for match in RE_SCANNER.finditer(source): + kind = CToken.from_name(match.lastgroup) + pos = match.start() + value = match.group() + + if kind == CToken.MISMATCH: + log.error(f"Unexpected token '{value}' on pos {pos}:\n\t'{source}'") + elif kind == CToken.BEGIN: + if value == '(': + paren_level += 1 + elif value == '[': + bracket_level += 1 + else: # value == '{' + brace_level += 1 + + elif kind == CToken.END: + if value == ')' and paren_level > 0: + paren_level -= 1 + elif value == ']' and bracket_level > 0: + bracket_level -= 1 + elif brace_level > 0: # value == '}' + brace_level -= 1 + + yield CToken(kind, value, pos, + brace_level, paren_level, bracket_level) + + def __str__(self): + out="" + show_stack = [True] + + for i, tok in enumerate(self.tokens): + if tok.kind == CToken.BEGIN: + show_stack.append(show_stack[-1]) + + elif tok.kind == CToken.END: + prev = show_stack[-1] + if len(show_stack) > 1: + show_stack.pop() + + if not prev and show_stack[-1]: + # + # Try to preserve indent + # + out += "\t" * (len(show_stack) - 1) + + out += str(tok.value) + continue + + elif tok.kind == CToken.COMMENT: + comment = RE_COMMENT_START.sub("", tok.value) + + if comment.startswith("private:"): + show_stack[-1] = False + show = False + elif comment.startswith("public:"): + show_stack[-1] = True + + continue + + if not show_stack[-1]: + continue + + if i < len(self.tokens) - 1: + next_tok = self.tokens[i + 1] + + # Do some cleanups before ";" + + if tok.kind == CToken.SPACE and next_tok.kind == CToken.ENDSTMT: + continue + + if tok.kind == CToken.ENDSTMT and next_tok.kind == tok.kind: + continue + + out += str(tok.value) + + return out + + +class CTokenArgs: + """ + Ancillary class to help using backrefs from sub matches. + + If the highest backref contain a "+" at the last element, + the logic will be greedy, picking all other delims. + + This is needed to parse struct_group macros with end with ``MEMBERS...``. + """ + def __init__(self, sub_str): + self.sub_groups = set() + self.max_group = -1 + self.greedy = None + + for m in KernRe(r'\\(\d+)([+]?)').finditer(sub_str): + group = int(m.group(1)) + if m.group(2) == "+": + if self.greedy and self.greedy != group: + raise ValueError("There are multiple greedy patterns!") + self.greedy = group + + self.sub_groups.add(group) + self.max_group = max(self.max_group, group) + + if self.greedy: + if self.greedy != self.max_group: + raise ValueError("Greedy pattern is not the last one!") + + sub_str = KernRe(r'(\\\d+)[+]').sub(r"\1", sub_str) + + self.sub_str = sub_str + self.sub_tokeninzer = CTokenizer(sub_str) + + def groups(self, new_tokenizer): + r""" + Create replacement arguments for backrefs like: + + ``\0``, ``\1``, ``\2``, ... ``\{number}`` + + It also accepts a ``+`` character to the highest backref, like + ``\4+``. When used, the backref will be greedy, picking all other + arguments afterwards. + + The logic is smart enough to only go up to the maximum required + argument, even if there are more. + + If there is a backref for an argument above the limit, it will + raise an exception. Please notice that, on C, square brackets + don't have any separator on it. Trying to use ``\1``..``\n`` for + brackets also raise an exception. + """ + + level = (0, 0, 0) + + if self.max_group < 0: + return level, [] + + tokens = new_tokenizer.tokens + + # + # Fill \0 with the full token contents + # + groups_list = [ [] ] + + if 0 in self.sub_groups: + inner_level = 0 + + for i in range(0, len(tokens)): + tok = tokens[i] + + if tok.kind == CToken.BEGIN: + inner_level += 1 + + # + # Discard first begin + # + if not groups_list[0]: + continue + elif tok.kind == CToken.END: + inner_level -= 1 + if inner_level < 0: + break + + if inner_level: + groups_list[0].append(tok) + + if not self.max_group: + return level, groups_list + + delim = None + + # + # Ignore everything before BEGIN. The value of begin gives the + # delimiter to be used for the matches + # + for i in range(0, len(tokens)): + tok = tokens[i] + if tok.kind == CToken.BEGIN: + if tok.value == "{": + delim = ";" + elif tok.value == "(": + delim = "," + else: + self.log.error(fr"Can't handle \1..\n on {sub_str}") + + level = tok.level + break + + pos = 1 + groups_list.append([]) + + inner_level = 0 + for i in range(i + 1, len(tokens)): + tok = tokens[i] + + if tok.kind == CToken.BEGIN: + inner_level += 1 + if tok.kind == CToken.END: + inner_level -= 1 + if inner_level < 0: + break + + if tok.kind in [CToken.PUNC, CToken.ENDSTMT] and delim == tok.value: + pos += 1 + if self.greedy and pos > self.max_group: + pos -= 1 + else: + groups_list.append([]) + + if pos > self.max_group: + break + + continue + + groups_list[pos].append(tok) + + if pos < self.max_group: + log.error(fr"{self.sub_str} groups are up to {pos} instead of {self.max_group}") + + return level, groups_list + + def tokens(self, new_tokenizer): + level, groups = self.groups(new_tokenizer) + + new = CTokenizer() + + for tok in self.sub_tokeninzer.tokens: + if tok.kind == CToken.BACKREF: + group = int(tok.value[1:]) + + for group_tok in groups[group]: + new_tok = copy(group_tok) + + new_level = [0, 0, 0] + + for i in range(0, len(level)): + new_level[i] = new_tok.level[i] + level[i] + + new_tok.level = tuple(new_level) + + new.tokens += [ new_tok ] + else: + new.tokens += [ tok ] + + return new.tokens + + +class CMatch: + """ + Finding nested delimiters is hard with regular expressions. It is + even harder on Python with its normal re module, as there are several + advanced regular expressions that are missing. + + This is the case of this pattern:: + + '\\bSTRUCT_GROUP(\\(((?:(?>[^)(]+)|(?1))*)\\))[^;]*;' + + which is used to properly match open/close parentheses of the + string search STRUCT_GROUP(), + + Add a class that counts pairs of delimiters, using it to match and + replace nested expressions. + + The original approach was suggested by: + + https://stackoverflow.com/questions/5454322/python-how-to-match-nested-parentheses-with-regex + + Although I re-implemented it to make it more generic and match 3 types + of delimiters. The logic checks if delimiters are paired. If not, it + will ignore the search string. + """ + + + def __init__(self, regex, delim="("): + self.regex = KernRe("^" + regex + r"\b") + self.start_delim = delim + + def _search(self, tokenizer): + """ + Finds paired blocks for a regex that ends with a delimiter. + + The suggestion of using finditer to match pairs came from: + https://stackoverflow.com/questions/5454322/python-how-to-match-nested-parentheses-with-regex + but I ended using a different implementation to align all three types + of delimiters and seek for an initial regular expression. + + The algorithm seeks for open/close paired delimiters and places them + into a stack, yielding a start/stop position of each match when the + stack is zeroed. + + The algorithm should work fine for properly paired lines, but will + silently ignore end delimiters that precede a start delimiter. + This should be OK for kernel-doc parser, as unaligned delimiters + would cause compilation errors. So, we don't need to raise exceptions + to cover such issues. + """ + + start = None + started = False + + import sys + + stack = [] + + for i, tok in enumerate(tokenizer.tokens): + if start is None: + if tok.kind == CToken.NAME and self.regex.match(tok.value): + start = i + stack.append((start, tok.level)) + started = False + + continue + + if not started: + if tok.kind == CToken.SPACE: + continue + + if tok.kind == CToken.BEGIN and tok.value == self.start_delim: + started = True + continue + + # Name only token without BEGIN/END + if i > start: + i -= 1 + yield start, i + start = None + + if tok.kind == CToken.END and tok.level == stack[-1][1]: + start, level = stack.pop() + + yield start, i + start = None + + # + # If an END zeroing levels is not there, return remaining stuff + # This is meant to solve cases where the caller logic might be + # picking an incomplete block. + # + if start and stack: + if started: + s = str(tokenizer) + log.warning(f"can't find a final end at {s}") + + yield start, len(tokenizer.tokens) + + def search(self, source): + """ + This is similar to re.search: + + It matches a regex that it is followed by a delimiter, + returning occurrences only if all delimiters are paired. + """ + + if isinstance(source, CTokenizer): + tokenizer = source + is_token = True + else: + tokenizer = CTokenizer(source) + is_token = False + + for start, end in self._search(tokenizer): + new_tokenizer = CTokenizer(tokenizer.tokens[start:end + 1]) + + if is_token: + yield new_tokenizer + else: + yield str(new_tokenizer) + + def sub(self, sub_str, source, count=0): + """ + This is similar to re.sub: + + It matches a regex that it is followed by a delimiter, + replacing occurrences only if all delimiters are paired. + + if the sub argument contains:: + + r'\0' + + it will work just like re: it places there the matched paired data + with the delimiter stripped. + + If count is different than zero, it will replace at most count + items. + """ + if isinstance(source, CTokenizer): + is_token = True + tokenizer = source + else: + is_token = False + tokenizer = CTokenizer(source) + + # Detect if sub_str contains sub arguments + + args_match = CTokenArgs(sub_str) + + new_tokenizer = CTokenizer() + pos = 0 + n = 0 + + # + # NOTE: the code below doesn't consider overlays at sub. + # We may need to add some extra unit tests to check if those + # would cause problems. When replacing by "", this should not + # be a problem, but other transformations could be problematic + # + for start, end in self._search(tokenizer): + new_tokenizer.tokens += tokenizer.tokens[pos:start] + + new = CTokenizer(tokenizer.tokens[start:end + 1]) + + new_tokenizer.tokens += args_match.tokens(new) + + pos = end + 1 + + n += 1 + if count and n >= count: + break + + new_tokenizer.tokens += tokenizer.tokens[pos:] + + if not is_token: + return str(new_tokenizer) + + return new_tokenizer + + def __repr__(self): + """ + Returns a displayable version of the class init. + """ + + return f'CMatch("{self.regex.regex.pattern}")' diff --git a/tools/lib/python/kdoc/kdoc_files.py b/tools/lib/python/kdoc/kdoc_files.py index 022487ea2cc6..ed82b6e6ab25 100644 --- a/tools/lib/python/kdoc/kdoc_files.py +++ b/tools/lib/python/kdoc/kdoc_files.py @@ -9,13 +9,14 @@ Classes for navigating through the files that kernel-doc needs to handle to generate documentation. """ -import argparse import logging import os import re from kdoc.kdoc_parser import KernelDoc +from kdoc.xforms_lists import CTransforms from kdoc.kdoc_output import OutputFormat +from kdoc.kdoc_yaml_file import KDocTestFile class GlobSourceFiles: @@ -86,11 +87,81 @@ class GlobSourceFiles: file_not_found_cb(fname) +class KdocConfig(): + """ + Stores all configuration attributes that kdoc_parser and kdoc_output + needs. + """ + def __init__(self, verbose=False, werror=False, wreturn=False, + wshort_desc=False, wcontents_before_sections=False, + logger=None): + + self.verbose = verbose + self.werror = werror + self.wreturn = wreturn + self.wshort_desc = wshort_desc + self.wcontents_before_sections = wcontents_before_sections + + if logger: + self.log = logger + else: + self.log = logging.getLogger(__file__) + + self.warning = self.log.warning + class KernelFiles(): """ Parse kernel-doc tags on multiple kernel source files. - There are two type of parsers defined here: + This is the main entry point to run kernel-doc. This class is initialized + using a series of optional arguments: + + ``verbose`` + If True, enables kernel-doc verbosity. Default: False. + + ``out_style`` + Class to be used to format output. If None (default), + only report errors. + + ``xforms`` + Transforms to be applied to C prototypes and data structs. + If not specified, defaults to xforms = CFunction() + + ``werror`` + If True, treat warnings as errors, retuning an error code on warnings. + + Default: False. + + ``wreturn`` + If True, warns about the lack of a return markup on functions. + + Default: False. + ``wshort_desc`` + If True, warns if initial short description is missing. + + Default: False. + + ``wcontents_before_sections`` + If True, warn if there are contents before sections (deprecated). + This option is kept just for backward-compatibility, but it does + nothing, neither here nor at the original Perl script. + + Default: False. + + ``logger`` + Optional logger class instance. + + If not specified, defaults to use: ``logging.getLogger("kernel-doc")`` + + ``yaml_file`` + If defined, stores the output inside a YAML file. + + ``yaml_content`` + Defines what will be inside the YAML file. + + Note: + There are two type of parsers defined here: + - self.parse_file(): parses both kernel-doc markups and ``EXPORT_SYMBOL*`` macros; - self.process_export_file(): parses only ``EXPORT_SYMBOL*`` macros. @@ -117,7 +188,12 @@ class KernelFiles(): if fname in self.files: return - doc = KernelDoc(self.config, fname) + if self.test_file: + store_src = True + else: + store_src = False + + doc = KernelDoc(self.config, fname, self.xforms, store_src=store_src) export_table, entries = doc.parse_kdoc() self.export_table[fname] = export_table @@ -153,16 +229,21 @@ class KernelFiles(): self.error(f"Cannot find file {fname}") - def __init__(self, verbose=False, out_style=None, + def __init__(self, verbose=False, out_style=None, xforms=None, werror=False, wreturn=False, wshort_desc=False, wcontents_before_sections=False, - logger=None): + yaml_file=None, yaml_content=None, logger=None): """ Initialize startup variables and parse all files. """ if not verbose: - verbose = bool(os.environ.get("KBUILD_VERBOSE", 0)) + try: + verbose = bool(int(os.environ.get("KBUILD_VERBOSE", 0))) + except ValueError: + # Handles an eventual case where verbosity is not a number + # like KBUILD_VERBOSE="" + verbose = False if out_style is None: out_style = OutputFormat() @@ -181,29 +262,36 @@ class KernelFiles(): if kdoc_werror: werror = kdoc_werror + if not logger: + logger = logging.getLogger("kernel-doc") + else: + logger = logger + # Some variables are global to the parser logic as a whole as they are # used to send control configuration to KernelDoc class. As such, # those variables are read-only inside the KernelDoc. - self.config = argparse.Namespace + self.config = KdocConfig(verbose, werror, wreturn, wshort_desc, + wcontents_before_sections, logger) - self.config.verbose = verbose - self.config.werror = werror - self.config.wreturn = wreturn - self.config.wshort_desc = wshort_desc - self.config.wcontents_before_sections = wcontents_before_sections + # Override log warning, as we want to count errors + self.config.warning = self.warning - if not logger: - self.config.log = logging.getLogger("kernel-doc") + if yaml_file: + self.test_file = KDocTestFile(self.config, yaml_file, yaml_content) else: - self.config.log = logger + self.test_file = None - self.config.warning = self.warning + if xforms: + self.xforms = xforms + else: + self.xforms = CTransforms() self.config.src_tree = os.environ.get("SRCTREE", None) # Initialize variables that are internal to KernelFiles self.out_style = out_style + self.out_style.set_config(self.config) self.errors = 0 self.results = {} @@ -246,8 +334,6 @@ class KernelFiles(): returning kernel-doc markups on each interaction. """ - self.out_style.set_config(self.config) - if not filenames: filenames = sorted(self.results.keys()) @@ -267,29 +353,28 @@ class KernelFiles(): for s in symbol: function_table.add(s) - self.out_style.set_filter(export, internal, symbol, nosymbol, - function_table, enable_lineno, - no_doc_sections) - - msg = "" if fname not in self.results: self.config.log.warning("No kernel-doc for file %s", fname) continue symbols = self.results[fname] - self.out_style.set_symbols(symbols) - for arg in symbols: - m = self.out_msg(fname, arg.name, arg) + if self.test_file: + self.test_file.set_filter(export, internal, symbol, nosymbol, + function_table, enable_lineno, + no_doc_sections) - if m is None: - ln = arg.get("ln", 0) - dtype = arg.get('type', "") + self.test_file.output_symbols(fname, symbols) - self.config.log.warning("%s:%d Can't handle %s", - fname, ln, dtype) - else: - msg += m + continue + + self.out_style.set_filter(export, internal, symbol, nosymbol, + function_table, enable_lineno, + no_doc_sections) + msg = self.out_style.output_symbols(fname, symbols) if msg: yield fname, msg + + if self.test_file: + self.test_file.write() diff --git a/tools/lib/python/kdoc/kdoc_item.py b/tools/lib/python/kdoc/kdoc_item.py index 2b8a93f79716..a7aa6e1e4c1c 100644 --- a/tools/lib/python/kdoc/kdoc_item.py +++ b/tools/lib/python/kdoc/kdoc_item.py @@ -14,7 +14,8 @@ class KdocItem: then pass into the output modules. """ - def __init__(self, name, fname, type, start_line, **other_stuff): + def __init__(self, name, fname, type, start_line, + **other_stuff): self.name = name self.fname = fname self.type = type @@ -22,15 +23,34 @@ class KdocItem: self.sections = {} self.sections_start_lines = {} self.parameterlist = [] - self.parameterdesc_start_lines = [] + self.parameterdesc_start_lines = {} self.parameterdescs = {} self.parametertypes = {} + + self.warnings = [] + # # Just save everything else into our own dict so that the output # side can grab it directly as before. As we move things into more # structured data, this will, hopefully, fade away. # - self.other_stuff = other_stuff + known_keys = { + 'declaration_start_line', + 'sections', + 'sections_start_lines', + 'parameterlist', + 'parameterdesc_start_lines', + 'parameterdescs', + 'parametertypes', + 'warnings', + } + + self.other_stuff = {} + for k, v in other_stuff.items(): + if k in known_keys: + setattr(self, k, v) # real attribute + else: + self.other_stuff[k] = v def get(self, key, default = None): """ @@ -41,6 +61,23 @@ class KdocItem: def __getitem__(self, key): return self.get(key) + def __repr__(self): + return f"KdocItem({self.name}, {self.fname}, {self.type}, {self.declaration_start_line})" + + @classmethod + def from_dict(cls, d): + """Create a KdocItem from a plain dict.""" + + cp = d.copy() + name = cp.pop('name', None) + fname = cp.pop('fname', None) + type = cp.pop('type', None) + start_line = cp.pop('start_line', 1) + other_stuff = cp.pop('other_stuff', {}) + + # Everything that’s left goes straight to __init__ + return cls(name, fname, type, start_line, **cp, **other_stuff) + # # Tracking of section and parameter information. # @@ -49,7 +86,7 @@ class KdocItem: Set sections and start lines. """ self.sections = sections - self.section_start_lines = start_lines + self.sections_start_lines = start_lines def set_params(self, names, descs, types, starts): """ diff --git a/tools/lib/python/kdoc/kdoc_output.py b/tools/lib/python/kdoc/kdoc_output.py index 4210b91dde5f..de107ab4a281 100644 --- a/tools/lib/python/kdoc/kdoc_output.py +++ b/tools/lib/python/kdoc/kdoc_output.py @@ -222,6 +222,27 @@ class OutputFormat: return None + def output_symbols(self, fname, symbols): + """ + Handles a set of KdocItem symbols. + """ + self.set_symbols(symbols) + + msg = "" + for arg in symbols: + m = self.msg(fname, arg.name, arg) + + if m is None: + ln = arg.get("ln", 0) + dtype = arg.get('type', "") + + self.config.log.warning("%s:%d Can't handle %s", + fname, ln, dtype) + else: + msg += m + + return msg + # Virtual methods to be overridden by inherited classes # At the base class, those do nothing. def set_symbols(self, symbols): @@ -368,7 +389,7 @@ class RestFormat(OutputFormat): else: self.data += f'{self.lineprefix}**{section}**\n\n' - self.print_lineno(args.section_start_lines.get(section, 0)) + self.print_lineno(args.sections_start_lines.get(section, 0)) self.output_highlight(text) self.data += "\n" self.data += "\n" @@ -492,7 +513,9 @@ class RestFormat(OutputFormat): def out_var(self, fname, name, args): oldprefix = self.lineprefix ln = args.declaration_start_line - full_proto = args.other_stuff["full_proto"] + full_proto = args.other_stuff.get("full_proto") + if not full_proto: + raise KeyError(f"Can't find full proto for {name} variable") self.lineprefix = " " @@ -580,7 +603,35 @@ class RestFormat(OutputFormat): class ManFormat(OutputFormat): - """Consts and functions used by man pages output.""" + """ + Consts and functions used by man pages output. + + This class has one mandatory parameter and some optional ones, which + are needed to define the title header contents: + + ``modulename`` + Defines the module name to be used at the troff ``.TH`` output. + + This argument is optional. If not specified, it will be filled + with the directory which contains the documented file. + + ``section`` + Usually a numeric value from 0 to 9, but man pages also accept + some strings like "p". + + Defauls to ``9`` + + ``manual`` + Defaults to ``Kernel API Manual``. + + The above controls the output of teh corresponding fields on troff + title headers, which will be filled like this:: + + .TH "{name}" {section} "{date}" "{modulename}" "{manual}" + + where ``name``` will match the API symbol name, and ``date`` will be + either the date where the Kernel was compiled or the current date + """ highlights = ( (type_constant, r"\1"), @@ -607,7 +658,21 @@ class ManFormat(OutputFormat): "%m %d %Y", ] - def __init__(self, modulename): + def modulename(self, args): + if self._modulename: + return self._modulename + + return os.path.dirname(args.fname) + + def emit_th(self, name, args): + """Emit a title header line.""" + title = name.strip() + module = self.modulename(args) + + self.data += f'.TH "{title}" {self.section} "{self.date}" ' + self.data += f'"{module}" "{self.manual}"\n' + + def __init__(self, modulename=None, section="9", manual="Kernel API Manual"): """ Creates class variables. @@ -616,7 +681,11 @@ class ManFormat(OutputFormat): """ super().__init__() - self.modulename = modulename + + self._modulename = modulename + self.section = section + self.manual = manual + self.symbols = [] dt = None @@ -632,7 +701,7 @@ class ManFormat(OutputFormat): if not dt: dt = datetime.now() - self.man_date = dt.strftime("%B %Y") + self.date = dt.strftime("%B %Y") def arg_name(self, args, name): """ @@ -647,7 +716,8 @@ class ManFormat(OutputFormat): dtype = args.type if dtype == "doc": - return self.modulename + return name +# return os.path.basename(self.modulename(args)) if dtype in ["function", "typedef"]: return name @@ -697,6 +767,185 @@ class ManFormat(OutputFormat): return self.data + def emit_table(self, colspec_row, rows): + + if not rows: + return "" + + out = "" + colspec = "\t".join(["l"] * len(rows[0])) + + out += "\n.TS\n" + out += "box;\n" + out += f"{colspec}.\n" + + if colspec_row: + out_row = [] + + for text in colspec_row: + out_row.append(f"\\fB{text}\\fP") + + out += "\t".join(out_row) + "\n_\n" + + for r in rows: + out += "\t".join(r) + "\n" + + out += ".TE\n" + + return out + + def grid_table(self, lines, start): + """ + Ancillary function to help handling a grid table inside the text. + """ + + i = start + 1 + rows = [] + colspec_row = None + + while i < len(lines): + line = lines[i] + + if KernRe(r"^\s*\|.*\|\s*$").match(line): + parts = [] + + for p in line.strip('|').split('|'): + parts.append(p.strip()) + + rows.append(parts) + + elif KernRe(r'^\+\=[\+\=]+\+\s*$').match(line): + if rows and rows[0]: + if not colspec_row: + colspec_row = [""] * len(rows[0]) + + for j in range(0, len(rows[0])): + content = [] + for row in rows: + content.append(row[j]) + + colspec_row[j] = " ".join(content) + + rows = [] + + elif KernRe(r"^\s*\+[-+]+\+.*$").match(line): + pass + + else: + break + + i += 1 + + return i, self.emit_table(colspec_row, rows) + + def simple_table(self, lines, start): + """ + Ancillary function to help handling a simple table inside the text. + """ + + i = start + rows = [] + colspec_row = None + + pos = [] + for m in KernRe(r'\=+').finditer(lines[i]): + pos.append((m.start(), m.end() - 1)) + + i += 1 + while i < len(lines): + line = lines[i] + + if KernRe(r"^\s*[\=]+[ \t\=]+$").match(line): + i += 1 + break + + elif KernRe(r'^[\s=]+$').match(line): + if rows and rows[0]: + if not colspec_row: + colspec_row = [""] * len(rows[0]) + + for j in range(0, len(rows[0])): + content = [] + for row in rows: + content.append(row[j]) + + colspec_row[j] = " ".join(content) + + rows = [] + + else: + row = [""] * len(pos) + + for j in range(0, len(pos)): + start, end = pos[j] + + row[j] = line[start:end].strip() + + rows.append(row) + + i += 1 + + return i, self.emit_table(colspec_row, rows) + + def code_block(self, lines, start): + """ + Ensure that code blocks won't be messed up at the output. + + By default, troff join lines at the same paragraph. Disable it, + on code blocks. + """ + + line = lines[start] + + if "code-block" in line: + out = "\n.nf\n" + elif line.startswith("..") and line.endswith("::"): + # + # Handle note, warning, error, ... markups + # + line = line[2:-1].strip().upper() + out = f"\n.nf\n\\fB{line}\\fP\n" + elif line.endswith("::"): + out = line[:-1] + out += "\n.nf\n" + else: + # Just in case. Should never happen in practice + out = "\n.nf\n" + + i = start + 1 + ident = None + + while i < len(lines): + line = lines[i] + + m = KernRe(r"\S").match(line) + if not m: + out += line + "\n" + i += 1 + continue + + pos = m.start() + if not ident: + if pos > 0: + ident = pos + else: + out += "\n.fi\n" + if i > start + 1: + return i - 1, out + else: + # Just in case. Should never happen in practice + return i, out + + if pos >= ident: + out += line + "\n" + i += 1 + continue + + break + + out += "\n.fi\n" + return i, out + def output_highlight(self, block): """ Outputs a C symbol that may require being highlighted with @@ -708,15 +957,46 @@ class ManFormat(OutputFormat): if isinstance(contents, list): contents = "\n".join(contents) - for line in contents.strip("\n").split("\n"): - line = KernRe(r"^\s*").sub("", line) - if not line: - continue + lines = contents.strip("\n").split("\n") + i = 0 - if line[0] == ".": - self.data += "\\&" + line + "\n" + while i < len(lines): + org_line = lines[i] + + line = KernRe(r"^\s*").sub("", org_line) + + if line: + if KernRe(r"^\+\-[-+]+\+.*$").match(line): + i, text = self.grid_table(lines, i) + self.data += text + continue + + if KernRe(r"^\=+[ \t]\=[ \t\=]+$").match(line): + i, text = self.simple_table(lines, i) + self.data += text + continue + + if line.endswith("::") or KernRe(r"\.\.\s+code-block.*::").match(line): + i, text = self.code_block(lines, i) + self.data += text + continue + + if line[0] == ".": + self.data += "\\&" + line + "\n" + i += 1 + continue + + # + # Handle lists + # + line = KernRe(r'^[-*]\s+').sub(r'.IP \[bu]\n', line) + line = KernRe(r'^(\d+|a-z)[\.\)]\s+').sub(r'.IP \1\n', line) else: - self.data += line + "\n" + line = ".PP\n" + + i += 1 + + self.data += line + "\n" def out_doc(self, fname, name, args): if not self.check_doc(name, args): @@ -724,7 +1004,7 @@ class ManFormat(OutputFormat): out_name = self.arg_name(args, name) - self.data += f'.TH "{self.modulename}" 9 "{out_name}" "{self.man_date}" "API Manual" LINUX' + "\n" + self.emit_th(out_name, args) for section, text in args.sections.items(): self.data += f'.SH "{section}"' + "\n" @@ -734,7 +1014,7 @@ class ManFormat(OutputFormat): out_name = self.arg_name(args, name) - self.data += f'.TH "{name}" 9 "{out_name}" "{self.man_date}" "Kernel Hacker\'s Manual" LINUX' + "\n" + self.emit_th(out_name, args) self.data += ".SH NAME\n" self.data += f"{name} \\- {args['purpose']}\n" @@ -780,7 +1060,7 @@ class ManFormat(OutputFormat): def out_enum(self, fname, name, args): out_name = self.arg_name(args, name) - self.data += f'.TH "{self.modulename}" 9 "{out_name}" "{self.man_date}" "API Manual" LINUX' + "\n" + self.emit_th(out_name, args) self.data += ".SH NAME\n" self.data += f"enum {name} \\- {args['purpose']}\n" @@ -813,7 +1093,7 @@ class ManFormat(OutputFormat): out_name = self.arg_name(args, name) full_proto = args.other_stuff["full_proto"] - self.data += f'.TH "{self.modulename}" 9 "{out_name}" "{self.man_date}" "API Manual" LINUX' + "\n" + self.emit_th(out_name, args) self.data += ".SH NAME\n" self.data += f"{name} \\- {args['purpose']}\n" @@ -830,11 +1110,11 @@ class ManFormat(OutputFormat): self.output_highlight(text) def out_typedef(self, fname, name, args): - module = self.modulename + module = self.modulename(args) purpose = args.get('purpose') out_name = self.arg_name(args, name) - self.data += f'.TH "{module}" 9 "{out_name}" "{self.man_date}" "API Manual" LINUX' + "\n" + self.emit_th(out_name, args) self.data += ".SH NAME\n" self.data += f"typedef {name} \\- {purpose}\n" @@ -844,12 +1124,12 @@ class ManFormat(OutputFormat): self.output_highlight(text) def out_struct(self, fname, name, args): - module = self.modulename + module = self.modulename(args) purpose = args.get('purpose') definition = args.get('definition') out_name = self.arg_name(args, name) - self.data += f'.TH "{module}" 9 "{out_name}" "{self.man_date}" "API Manual" LINUX' + "\n" + self.emit_th(out_name, args) self.data += ".SH NAME\n" self.data += f"{args.type} {name} \\- {purpose}\n" diff --git a/tools/lib/python/kdoc/kdoc_parser.py b/tools/lib/python/kdoc/kdoc_parser.py index ca00695b47b3..74af7ae47aa4 100644 --- a/tools/lib/python/kdoc/kdoc_parser.py +++ b/tools/lib/python/kdoc/kdoc_parser.py @@ -13,7 +13,8 @@ import sys import re from pprint import pformat -from kdoc.kdoc_re import NestedMatch, KernRe +from kdoc.c_lex import CTokenizer, tokenizer_set_log +from kdoc.kdoc_re import KernRe from kdoc.kdoc_item import KdocItem # @@ -70,140 +71,9 @@ doc_begin_func = KernRe(str(doc_com) + # initial " * ' cache = False) # -# Here begins a long set of transformations to turn structure member prefixes -# and macro invocations into something we can parse and generate kdoc for. -# -struct_args_pattern = r'([^,)]+)' - -struct_xforms = [ - # Strip attributes - (KernRe(r"__attribute__\s*\(\([a-z0-9,_\*\s\(\)]*\)\)", flags=re.I | re.S, cache=False), ' '), - (KernRe(r'\s*__aligned\s*\([^;]*\)', re.S), ' '), - (KernRe(r'\s*__counted_by\s*\([^;]*\)', re.S), ' '), - (KernRe(r'\s*__counted_by_(le|be)\s*\([^;]*\)', re.S), ' '), - (KernRe(r'\s*__packed\s*', re.S), ' '), - (KernRe(r'\s*CRYPTO_MINALIGN_ATTR', re.S), ' '), - (KernRe(r'\s*__private', re.S), ' '), - (KernRe(r'\s*__rcu', re.S), ' '), - (KernRe(r'\s*____cacheline_aligned_in_smp', re.S), ' '), - (KernRe(r'\s*____cacheline_aligned', re.S), ' '), - (KernRe(r'\s*__cacheline_group_(begin|end)\([^\)]+\);'), ''), - # - # Unwrap struct_group macros based on this definition: - # __struct_group(TAG, NAME, ATTRS, MEMBERS...) - # which has variants like: struct_group(NAME, MEMBERS...) - # Only MEMBERS arguments require documentation. - # - # Parsing them happens on two steps: - # - # 1. drop struct group arguments that aren't at MEMBERS, - # storing them as STRUCT_GROUP(MEMBERS) - # - # 2. remove STRUCT_GROUP() ancillary macro. - # - # The original logic used to remove STRUCT_GROUP() using an - # advanced regex: - # - # \bSTRUCT_GROUP(\(((?:(?>[^)(]+)|(?1))*)\))[^;]*; - # - # with two patterns that are incompatible with - # Python re module, as it has: - # - # - a recursive pattern: (?1) - # - an atomic grouping: (?>...) - # - # I tried a simpler version: but it didn't work either: - # \bSTRUCT_GROUP\(([^\)]+)\)[^;]*; - # - # As it doesn't properly match the end parenthesis on some cases. - # - # So, a better solution was crafted: there's now a NestedMatch - # class that ensures that delimiters after a search are properly - # matched. So, the implementation to drop STRUCT_GROUP() will be - # handled in separate. - # - (KernRe(r'\bstruct_group\s*\(([^,]*,)', re.S), r'STRUCT_GROUP('), - (KernRe(r'\bstruct_group_attr\s*\(([^,]*,){2}', re.S), r'STRUCT_GROUP('), - (KernRe(r'\bstruct_group_tagged\s*\(([^,]*),([^,]*),', re.S), r'struct \1 \2; STRUCT_GROUP('), - (KernRe(r'\b__struct_group\s*\(([^,]*,){3}', re.S), r'STRUCT_GROUP('), - # - # Replace macros - # - # TODO: use NestedMatch for FOO($1, $2, ...) matches - # - # it is better to also move those to the NestedMatch logic, - # to ensure that parentheses will be properly matched. - # - (KernRe(r'__ETHTOOL_DECLARE_LINK_MODE_MASK\s*\(([^\)]+)\)', re.S), - r'DECLARE_BITMAP(\1, __ETHTOOL_LINK_MODE_MASK_NBITS)'), - (KernRe(r'DECLARE_PHY_INTERFACE_MASK\s*\(([^\)]+)\)', re.S), - r'DECLARE_BITMAP(\1, PHY_INTERFACE_MODE_MAX)'), - (KernRe(r'DECLARE_BITMAP\s*\(' + struct_args_pattern + r',\s*' + struct_args_pattern + r'\)', - re.S), r'unsigned long \1[BITS_TO_LONGS(\2)]'), - (KernRe(r'DECLARE_HASHTABLE\s*\(' + struct_args_pattern + r',\s*' + struct_args_pattern + r'\)', - re.S), r'unsigned long \1[1 << ((\2) - 1)]'), - (KernRe(r'DECLARE_KFIFO\s*\(' + struct_args_pattern + r',\s*' + struct_args_pattern + - r',\s*' + struct_args_pattern + r'\)', re.S), r'\2 *\1'), - (KernRe(r'DECLARE_KFIFO_PTR\s*\(' + struct_args_pattern + r',\s*' + - struct_args_pattern + r'\)', re.S), r'\2 *\1'), - (KernRe(r'(?:__)?DECLARE_FLEX_ARRAY\s*\(' + struct_args_pattern + r',\s*' + - struct_args_pattern + r'\)', re.S), r'\1 \2[]'), - (KernRe(r'DEFINE_DMA_UNMAP_ADDR\s*\(' + struct_args_pattern + r'\)', re.S), r'dma_addr_t \1'), - (KernRe(r'DEFINE_DMA_UNMAP_LEN\s*\(' + struct_args_pattern + r'\)', re.S), r'__u32 \1'), -] -# -# Regexes here are guaranteed to have the end delimiter matching -# the start delimiter. Yet, right now, only one replace group -# is allowed. -# -struct_nested_prefixes = [ - (re.compile(r'\bSTRUCT_GROUP\('), r'\1'), -] - -# -# Transforms for function prototypes -# -function_xforms = [ - (KernRe(r"^static +"), ""), - (KernRe(r"^extern +"), ""), - (KernRe(r"^asmlinkage +"), ""), - (KernRe(r"^inline +"), ""), - (KernRe(r"^__inline__ +"), ""), - (KernRe(r"^__inline +"), ""), - (KernRe(r"^__always_inline +"), ""), - (KernRe(r"^noinline +"), ""), - (KernRe(r"^__FORTIFY_INLINE +"), ""), - (KernRe(r"__init +"), ""), - (KernRe(r"__init_or_module +"), ""), - (KernRe(r"__exit +"), ""), - (KernRe(r"__deprecated +"), ""), - (KernRe(r"__flatten +"), ""), - (KernRe(r"__meminit +"), ""), - (KernRe(r"__must_check +"), ""), - (KernRe(r"__weak +"), ""), - (KernRe(r"__sched +"), ""), - (KernRe(r"_noprof"), ""), - (KernRe(r"__always_unused *"), ""), - (KernRe(r"__printf\s*\(\s*\d*\s*,\s*\d*\s*\) +"), ""), - (KernRe(r"__(?:re)?alloc_size\s*\(\s*\d+\s*(?:,\s*\d+\s*)?\) +"), ""), - (KernRe(r"__diagnose_as\s*\(\s*\S+\s*(?:,\s*\d+\s*)*\) +"), ""), - (KernRe(r"DECL_BUCKET_PARAMS\s*\(\s*(\S+)\s*,\s*(\S+)\s*\)"), r"\1, \2"), - (KernRe(r"__attribute_const__ +"), ""), - (KernRe(r"__attribute__\s*\(\((?:[\w\s]+(?:\([^)]*\))?\s*,?)+\)\)\s+"), ""), -] - -# # Ancillary functions # -def apply_transforms(xforms, text): - """ - Apply a set of transforms to a block of text. - """ - for search, subst in xforms: - text = search.sub(subst, text) - return text - multi_space = KernRe(r'\s\s+') def trim_whitespace(s): """ @@ -215,15 +85,9 @@ def trim_private_members(text): """ Remove ``struct``/``enum`` members that have been marked "private". """ - # First look for a "public:" block that ends a private region, then - # handle the "private until the end" case. - # - text = KernRe(r'/\*\s*private:.*?/\*\s*public:.*?\*/', flags=re.S).sub('', text) - text = KernRe(r'/\*\s*private:.*', flags=re.S).sub('', text) - # - # We needed the comments to do the above, but now we can take them out. - # - return KernRe(r'\s*/\*.*?\*/\s*', flags=re.S).sub('', text).strip() + + tokens = CTokenizer(text) + return str(tokens) class state: """ @@ -276,7 +140,7 @@ class KernelEntry: self.parametertypes = {} self.parameterdesc_start_lines = {} - self.section_start_lines = {} + self.sections_start_lines = {} self.sections = {} self.anon_struct_union = False @@ -356,7 +220,7 @@ class KernelEntry: self.sections[name] += '\n' + contents else: self.sections[name] = contents - self.section_start_lines[name] = self.new_start_line + self.sections_start_lines[name] = self.new_start_line self.new_start_line = 0 # self.config.log.debug("Section: %s : %s", name, pformat(vars(self))) @@ -382,11 +246,15 @@ class KernelDoc: #: String to write when a parameter is not described. undescribed = "-- undescribed --" - def __init__(self, config, fname): + def __init__(self, config, fname, xforms, store_src=False): """Initialize internal variables""" self.fname = fname self.config = config + self.xforms = xforms + self.store_src = store_src + + tokenizer_set_log(self.config.log, f"{self.fname}: CMatch: ") # Initial state for the state machines self.state = state.NORMAL @@ -449,7 +317,7 @@ class KernelDoc: for section in ["Description", "Return"]: if section in sections and not sections[section].rstrip(): del sections[section] - item.set_sections(sections, self.entry.section_start_lines) + item.set_sections(sections, self.entry.sections_start_lines) item.set_params(self.entry.parameterlist, self.entry.parameterdescs, self.entry.parametertypes, self.entry.parameterdesc_start_lines) @@ -849,13 +717,15 @@ class KernelDoc: return declaration - def dump_struct(self, ln, proto): + def dump_struct(self, ln, proto, source): """ Store an entry for a ``struct`` or ``union`` """ # # Do the basic parse to get the pieces of the declaration. # + source = source + proto = trim_private_members(proto) struct_parts = self.split_struct_proto(proto) if not struct_parts: self.emit_msg(ln, f"{proto} error: Cannot parse struct or union!") @@ -869,12 +739,8 @@ class KernelDoc: # # Go through the list of members applying all of our transformations. # - members = trim_private_members(members) - members = apply_transforms(struct_xforms, members) + members = self.xforms.apply("struct", members) - nested = NestedMatch() - for search, sub in struct_nested_prefixes: - members = nested.sub(search, sub, members) # # Deal with embedded struct and union members, and drop enums entirely. # @@ -888,10 +754,11 @@ class KernelDoc: declaration_name) self.check_sections(ln, declaration_name, decl_type) self.output_declaration(decl_type, declaration_name, + source=source, definition=self.format_struct_decl(declaration), purpose=self.entry.declaration_purpose) - def dump_enum(self, ln, proto): + def dump_enum(self, ln, proto, source): """ Store an ``enum`` inside self.entries array. """ @@ -899,6 +766,8 @@ class KernelDoc: # Strip preprocessor directives. Note that this depends on the # trailing semicolon we added in process_proto_type(). # + source = source + proto = trim_private_members(proto) proto = KernRe(r'#\s*((define|ifdef|if)\s+|endif)[^;]*;', flags=re.S).sub('', proto) # # Parse out the name and members of the enum. Typedef form first. @@ -906,7 +775,7 @@ class KernelDoc: r = KernRe(r'typedef\s+enum\s*\{(.*)\}\s*(\w*)\s*;') if r.search(proto): declaration_name = r.group(2) - members = trim_private_members(r.group(1)) + members = r.group(1) # # Failing that, look for a straight enum # @@ -914,7 +783,7 @@ class KernelDoc: r = KernRe(r'enum\s+(\w*)\s*\{(.*)\}') if r.match(proto): declaration_name = r.group(1) - members = trim_private_members(r.group(2)) + members = r.group(2) # # OK, this isn't going to work. # @@ -943,9 +812,10 @@ class KernelDoc: member_set = set() members = KernRe(r'\([^;)]*\)').sub('', members) for arg in members.split(','): - if not arg: - continue arg = KernRe(r'^\s*(\w+).*').sub(r'\1', arg) + if not arg.strip(): + continue + self.entry.parameterlist.append(arg) if arg not in self.entry.parameterdescs: self.entry.parameterdescs[arg] = self.undescribed @@ -961,29 +831,23 @@ class KernelDoc: f"Excess enum value '@{k}' description in '{declaration_name}'") self.output_declaration('enum', declaration_name, + source=source, purpose=self.entry.declaration_purpose) - def dump_var(self, ln, proto): + def dump_var(self, ln, proto, source): """ Store variables that are part of kAPI. """ VAR_ATTRIBS = [ "extern", + "const", ] - OPTIONAL_VAR_ATTR = "^(?:" + "|".join(VAR_ATTRIBS) + ")?" - - sub_prefixes = [ - (KernRe(r"__read_mostly"), ""), - (KernRe(r"__ro_after_init"), ""), - (KernRe(r"(?://.*)$"), ""), - (KernRe(r"(?:/\*.*\*/)"), ""), - (KernRe(r";$"), ""), - (KernRe(r"=.*"), ""), - ] + OPTIONAL_VAR_ATTR = r"^(?:\b(?:" +"|".join(VAR_ATTRIBS) +r")\b\s*)*" # # Store the full prototype before modifying it # + source = source full_proto = proto declaration_name = None @@ -1004,8 +868,7 @@ class KernelDoc: # Drop comments and macros to have a pure C prototype # if not declaration_name: - for r, sub in sub_prefixes: - proto = r.sub(sub, proto) + proto = self.xforms.apply("var", proto) proto = proto.rstrip() @@ -1015,17 +878,17 @@ class KernelDoc: default_val = None - r= KernRe(OPTIONAL_VAR_ATTR + r"\w.*\s+(?:\*+)?([\w_]+)\s*[\d\]\[]*\s*(=.*)?") + r= KernRe(OPTIONAL_VAR_ATTR + r"\s*[\w_\s]*\s+(?:\*+)?([\w_]+)\s*[\d\]\[]*\s*(=.*)?") if r.match(proto): if not declaration_name: declaration_name = r.group(1) default_val = r.group(2) else: - r= KernRe(OPTIONAL_VAR_ATTR + r"(?:\w.*)?\s+(?:\*+)?(?:[\w_]+)\s*[\d\]\[]*\s*(=.*)?") - if r.match(proto): - default_val = r.group(1) + r= KernRe(OPTIONAL_VAR_ATTR + r"(?:[\w_\s]*)?\s+(?:\*+)?(?:[\w_]+)\s*[\d\]\[]*\s*(=.*)?") + if r.match(proto): + default_val = r.group(1) if not declaration_name: self.emit_msg(ln,f"{proto}: can't parse variable") return @@ -1034,39 +897,38 @@ class KernelDoc: default_val = default_val.lstrip("=").strip() self.output_declaration("var", declaration_name, + source=source, full_proto=full_proto, default_val=default_val, purpose=self.entry.declaration_purpose) - def dump_declaration(self, ln, prototype): + def dump_declaration(self, ln, prototype, source): """ Store a data declaration inside self.entries array. """ if self.entry.decl_type == "enum": - self.dump_enum(ln, prototype) + self.dump_enum(ln, prototype, source) elif self.entry.decl_type == "typedef": - self.dump_typedef(ln, prototype) + self.dump_typedef(ln, prototype, source) elif self.entry.decl_type in ["union", "struct"]: - self.dump_struct(ln, prototype) + self.dump_struct(ln, prototype, source) elif self.entry.decl_type == "var": - self.dump_var(ln, prototype) + self.dump_var(ln, prototype, source) else: # This would be a bug self.emit_message(ln, f'Unknown declaration type: {self.entry.decl_type}') - def dump_function(self, ln, prototype): + def dump_function(self, ln, prototype, source): """ Store a function or function macro inside self.entries array. """ + source = source found = func_macro = False return_type = '' decl_type = 'function' - # - # Apply the initial transformations. - # - prototype = apply_transforms(function_xforms, prototype) + # # If we have a macro, remove the "#define" at the front. # @@ -1085,6 +947,11 @@ class KernelDoc: declaration_name = r.group(1) func_macro = True found = True + else: + # + # Apply the initial transformations. + # + prototype = self.xforms.apply("func", prototype) # Yes, this truly is vile. We are looking for: # 1. Return type (may be nothing if we're looking at a macro) @@ -1150,13 +1017,14 @@ class KernelDoc: # Store the result. # self.output_declaration(decl_type, declaration_name, + source=source, typedef=('typedef' in return_type), functiontype=return_type, purpose=self.entry.declaration_purpose, func_macro=func_macro) - def dump_typedef(self, ln, proto): + def dump_typedef(self, ln, proto, source): """ Store a ``typedef`` inside self.entries array. """ @@ -1167,6 +1035,8 @@ class KernelDoc: typedef_ident = r'\*?\s*(\w\S+)\s*' typedef_args = r'\s*\((.*)\);' + source = source + typedef1 = KernRe(typedef_type + r'\(' + typedef_ident + r'\)' + typedef_args) typedef2 = KernRe(typedef_type + typedef_ident + typedef_args) @@ -1187,6 +1057,7 @@ class KernelDoc: self.create_parameter_list(ln, 'function', args, ',', declaration_name) self.output_declaration('function', declaration_name, + source=source, typedef=True, functiontype=return_type, purpose=self.entry.declaration_purpose) @@ -1204,6 +1075,7 @@ class KernelDoc: return self.output_declaration('typedef', declaration_name, + source=source, purpose=self.entry.declaration_purpose) return @@ -1241,7 +1113,7 @@ class KernelDoc: function_set.add(symbol) return True - def process_normal(self, ln, line): + def process_normal(self, ln, line, source): """ STATE_NORMAL: looking for the ``/**`` to begin everything. """ @@ -1255,7 +1127,7 @@ class KernelDoc: # next line is always the function name self.state = state.NAME - def process_name(self, ln, line): + def process_name(self, ln, line, source): """ STATE_NAME: Looking for the "name - description" line """ @@ -1388,7 +1260,7 @@ class KernelDoc: return False - def process_decl(self, ln, line): + def process_decl(self, ln, line, source): """ STATE_DECLARATION: We've seen the beginning of a declaration. """ @@ -1417,7 +1289,7 @@ class KernelDoc: self.emit_msg(ln, f"bad line: {line}") - def process_special(self, ln, line): + def process_special(self, ln, line, source): """ STATE_SPECIAL_SECTION: a section ending with a blank line. """ @@ -1468,7 +1340,7 @@ class KernelDoc: # Unknown line, ignore self.emit_msg(ln, f"bad line: {line}") - def process_body(self, ln, line): + def process_body(self, ln, line, source): """ STATE_BODY: the bulk of a kerneldoc comment. """ @@ -1482,7 +1354,7 @@ class KernelDoc: # Unknown line, ignore self.emit_msg(ln, f"bad line: {line}") - def process_inline_name(self, ln, line): + def process_inline_name(self, ln, line, source): """STATE_INLINE_NAME: beginning of docbook comments within a prototype.""" if doc_inline_sect.search(line): @@ -1495,9 +1367,15 @@ class KernelDoc: elif doc_content.search(line): self.emit_msg(ln, f"Incorrect use of kernel-doc format: {line}") self.state = state.PROTO + + # + # Don't let it add partial comments at the code, as breaks the + # logic meant to remove comments from prototypes. + # + self.process_proto_type(ln, "/**\n" + line, source) # else ... ?? - def process_inline_text(self, ln, line): + def process_inline_text(self, ln, line, source): """STATE_INLINE_TEXT: docbook comments within a prototype.""" if doc_inline_end.search(line): @@ -1583,7 +1461,7 @@ class KernelDoc: return proto - def process_proto_function(self, ln, line): + def process_proto_function(self, ln, line, source): """Ancillary routine to process a function prototype.""" # strip C99-style comments to end of line @@ -1625,10 +1503,10 @@ class KernelDoc: # # ... and we're done # - self.dump_function(ln, self.entry.prototype) + self.dump_function(ln, self.entry.prototype, source) self.reset_state(ln) - def process_proto_type(self, ln, line): + def process_proto_type(self, ln, line, source): """ Ancillary routine to process a type. """ @@ -1658,7 +1536,7 @@ class KernelDoc: elif chunk == '}': self.entry.brcount -= 1 elif chunk == ';' and self.entry.brcount <= 0: - self.dump_declaration(ln, self.entry.prototype) + self.dump_declaration(ln, self.entry.prototype, source) self.reset_state(ln) return # @@ -1667,7 +1545,7 @@ class KernelDoc: # self.entry.prototype += ' ' - def process_proto(self, ln, line): + def process_proto(self, ln, line, source): """STATE_PROTO: reading a function/whatever prototype.""" if doc_inline_oneline.search(line): @@ -1679,17 +1557,18 @@ class KernelDoc: self.state = state.INLINE_NAME elif self.entry.decl_type == 'function': - self.process_proto_function(ln, line) + self.process_proto_function(ln, line, source) else: - self.process_proto_type(ln, line) + self.process_proto_type(ln, line, source) - def process_docblock(self, ln, line): + def process_docblock(self, ln, line, source): """STATE_DOCBLOCK: within a ``DOC:`` block.""" if doc_end.search(line): self.dump_section() - self.output_declaration("doc", self.entry.identifier) + self.output_declaration("doc", self.entry.identifier, + source=source) self.reset_state(ln) elif doc_content.search(line): @@ -1740,6 +1619,8 @@ class KernelDoc: prev = "" prev_ln = None export_table = set() + self.state = state.NORMAL + source = "" try: with open(self.fname, "r", encoding="utf8", @@ -1766,6 +1647,12 @@ class KernelDoc: ln, state.name[self.state], line) + if self.store_src: + if source and self.state == state.NORMAL: + source = "" + elif self.state != state.NORMAL: + source += line + "\n" + # This is an optimization over the original script. # There, when export_file was used for the same file, # it was read twice. Here, we use the already-existing @@ -1773,8 +1660,11 @@ class KernelDoc: # if (self.state != state.NORMAL) or \ not self.process_export(export_table, line): + prev_state = self.state # Hand this line to the appropriate state handler - self.state_actions[self.state](self, ln, line) + self.state_actions[self.state](self, ln, line, source) + if prev_state == state.NORMAL and self.state != state.NORMAL: + source += line + "\n" self.emit_unused_warnings() diff --git a/tools/lib/python/kdoc/kdoc_re.py b/tools/lib/python/kdoc/kdoc_re.py index 0bf9e01cdc57..28292efe25a2 100644 --- a/tools/lib/python/kdoc/kdoc_re.py +++ b/tools/lib/python/kdoc/kdoc_re.py @@ -52,7 +52,33 @@ class KernRe: return self.regex.pattern def __repr__(self): - return f're.compile("{self.regex.pattern}")' + """ + Returns a displayable version of the class init. + """ + + flag_map = { + re.IGNORECASE: "re.I", + re.MULTILINE: "re.M", + re.DOTALL: "re.S", + re.VERBOSE: "re.X", + } + + flags = [] + for flag, name in flag_map.items(): + if self.regex.flags & flag: + flags.append(name) + + flags_name = " | ".join(flags) + + max_len = 60 + pattern = "" + for pos in range(0, len(self.regex.pattern), max_len): + pattern += '"' + self.regex.pattern[pos:max_len + pos] + '" ' + + if flags_name: + return f'KernRe({pattern}, {flags_name})' + else: + return f'KernRe({pattern})' def __add__(self, other): """ @@ -78,6 +104,13 @@ class KernRe: self.last_match = self.regex.search(string) return self.last_match + def finditer(self, string): + """ + Alias to re.finditer. + """ + + return self.regex.finditer(string) + def findall(self, string): """ Alias to re.findall. @@ -106,173 +139,9 @@ class KernRe: return self.last_match.group(num) - -class NestedMatch: - """ - Finding nested delimiters is hard with regular expressions. It is - even harder on Python with its normal re module, as there are several - advanced regular expressions that are missing. - - This is the case of this pattern:: - - '\\bSTRUCT_GROUP(\\(((?:(?>[^)(]+)|(?1))*)\\))[^;]*;' - - which is used to properly match open/close parentheses of the - string search STRUCT_GROUP(), - - Add a class that counts pairs of delimiters, using it to match and - replace nested expressions. - - The original approach was suggested by: - - https://stackoverflow.com/questions/5454322/python-how-to-match-nested-parentheses-with-regex - - Although I re-implemented it to make it more generic and match 3 types - of delimiters. The logic checks if delimiters are paired. If not, it - will ignore the search string. - """ - - # TODO: make NestedMatch handle multiple match groups - # - # Right now, regular expressions to match it are defined only up to - # the start delimiter, e.g.: - # - # \bSTRUCT_GROUP\( - # - # is similar to: STRUCT_GROUP\((.*)\) - # except that the content inside the match group is delimiter-aligned. - # - # The content inside parentheses is converted into a single replace - # group (e.g. r`\1'). - # - # It would be nice to change such definition to support multiple - # match groups, allowing a regex equivalent to: - # - # FOO\((.*), (.*), (.*)\) - # - # it is probably easier to define it not as a regular expression, but - # with some lexical definition like: - # - # FOO(arg1, arg2, arg3) - - DELIMITER_PAIRS = { - '{': '}', - '(': ')', - '[': ']', - } - - RE_DELIM = re.compile(r'[\{\}\[\]\(\)]') - - def _search(self, regex, line): - """ - Finds paired blocks for a regex that ends with a delimiter. - - The suggestion of using finditer to match pairs came from: - https://stackoverflow.com/questions/5454322/python-how-to-match-nested-parentheses-with-regex - but I ended using a different implementation to align all three types - of delimiters and seek for an initial regular expression. - - The algorithm seeks for open/close paired delimiters and places them - into a stack, yielding a start/stop position of each match when the - stack is zeroed. - - The algorithm should work fine for properly paired lines, but will - silently ignore end delimiters that precede a start delimiter. - This should be OK for kernel-doc parser, as unaligned delimiters - would cause compilation errors. So, we don't need to raise exceptions - to cover such issues. - """ - - stack = [] - - for match_re in regex.finditer(line): - start = match_re.start() - offset = match_re.end() - - d = line[offset - 1] - if d not in self.DELIMITER_PAIRS: - continue - - end = self.DELIMITER_PAIRS[d] - stack.append(end) - - for match in self.RE_DELIM.finditer(line[offset:]): - pos = match.start() + offset - - d = line[pos] - - if d in self.DELIMITER_PAIRS: - end = self.DELIMITER_PAIRS[d] - - stack.append(end) - continue - - # Does the end delimiter match what is expected? - if stack and d == stack[-1]: - stack.pop() - - if not stack: - yield start, offset, pos + 1 - break - - def search(self, regex, line): + def groups(self): """ - This is similar to re.search: - - It matches a regex that it is followed by a delimiter, - returning occurrences only if all delimiters are paired. + Returns the group results of the last match """ - for t in self._search(regex, line): - - yield line[t[0]:t[2]] - - def sub(self, regex, sub, line, count=0): - r""" - This is similar to re.sub: - - It matches a regex that it is followed by a delimiter, - replacing occurrences only if all delimiters are paired. - - if the sub argument contains:: - - r'\1' - - it will work just like re: it places there the matched paired data - with the delimiter stripped. - - If count is different than zero, it will replace at most count - items. - """ - out = "" - - cur_pos = 0 - n = 0 - - for start, end, pos in self._search(regex, line): - out += line[cur_pos:start] - - # Value, ignoring start/end delimiters - value = line[end:pos - 1] - - # replaces \1 at the sub string, if \1 is used there - new_sub = sub - new_sub = new_sub.replace(r'\1', value) - - out += new_sub - - # Drop end ';' if any - if line[pos] == ';': - pos += 1 - - cur_pos = pos - n += 1 - - if count and count >= n: - break - - # Append the remaining string - l = len(line) - out += line[cur_pos:l] - - return out + return self.last_match.groups() diff --git a/tools/lib/python/kdoc/kdoc_yaml_file.py b/tools/lib/python/kdoc/kdoc_yaml_file.py new file mode 100644 index 000000000000..0be020d50df0 --- /dev/null +++ b/tools/lib/python/kdoc/kdoc_yaml_file.py @@ -0,0 +1,178 @@ +#!/usr/bin/env python3 +# SPDX-License-Identifier: GPL-2.0 +# Copyright(c) 2026: Mauro Carvalho Chehab <mchehab@kernel.org>. + +import os + +from kdoc.kdoc_output import ManFormat, RestFormat + + +class KDocTestFile(): + """ + Handles the logic needed to store kernel‑doc output inside a YAML file. + Useful for unit tests and regression tests. + """ + + def __init__(self, config, yaml_file, yaml_content): + # + # Bail out early if yaml is not available + # + try: + import yaml + except ImportError: + sys.exit("Warning: yaml package not available. Aborting it.") + + self.config = config + self.test_file = os.path.expanduser(yaml_file) + self.yaml_content = yaml_content + self.test_names = set() + + self.tests = [] + + out_dir = os.path.dirname(self.test_file) + if out_dir and not os.path.isdir(out_dir): + sys.exit(f"Directory {out_dir} doesn't exist.") + + self.out_style = [] + + if "man" in self.yaml_content: + out_style = ManFormat() + out_style.set_config(self.config) + + self.out_style.append(out_style) + + if "rst" in self.yaml_content: + out_style = RestFormat() + out_style.set_config(self.config) + + self.out_style.append(out_style) + + def set_filter(self, export, internal, symbol, nosymbol, + function_table, enable_lineno, no_doc_sections): + """ + Set filters at the output classes. + """ + for out_style in self.out_style: + out_style.set_filter(export, internal, symbol, + nosymbol, function_table, + enable_lineno, no_doc_sections) + + @staticmethod + def get_kdoc_item(arg, start_line=1): + + d = vars(arg) + + declaration_start_line = d.get("declaration_start_line") + if not declaration_start_line: + return d + + d["declaration_start_line"] = start_line + + parameterdesc_start_lines = d.get("parameterdesc_start_lines") + if parameterdesc_start_lines: + for key in parameterdesc_start_lines: + ln = parameterdesc_start_lines[key] + ln += start_line - declaration_start_line + + parameterdesc_start_lines[key] = ln + + sections_start_lines = d.get("sections_start_lines") + if sections_start_lines: + for key in sections_start_lines: + ln = sections_start_lines[key] + ln += start_line - declaration_start_line + + sections_start_lines[key] = ln + + return d + + def output_symbols(self, fname, symbols): + """ + Store source, symbols and output strings at self.tests. + """ + + # + # KdocItem needs to be converted into dicts + # + kdoc_item = [] + expected = [] + + # + # Source code didn't produce any symbol + # + if not symbols: + return + + expected_dict = {} + start_line=1 + + for arg in symbols: + source = arg.get("source", "") + + if arg and "KdocItem" in self.yaml_content: + msg = self.get_kdoc_item(arg) + + other_stuff = msg.get("other_stuff", {}) + if "source" in other_stuff: + del other_stuff["source"] + + expected_dict["kdoc_item"] = msg + + base_name = arg.name + if not base_name: + base_name = fname + base_name = base_name.lower().replace(".", "_").replace("/", "_") + + + # Don't add duplicated names + i = 0 + name = base_name + while name in self.test_names: + i += 1 + name = f"{base_name}_{i:03d}" + + self.test_names.add(name) + + for out_style in self.out_style: + if isinstance(out_style, ManFormat): + key = "man" + else: + key = "rst" + + expected_dict[key]= out_style.output_symbols(fname, [arg]).strip() + + test = { + "name": name, + "description": f"{fname} line {arg.declaration_start_line}", + "fname": fname, + "source": source, + "expected": [expected_dict] + } + + self.tests.append(test) + + expected_dict = {} + + def write(self): + """ + Output the content of self.tests to self.test_file. + """ + import yaml + + # Helper function to better handle multilines + def str_presenter(dumper, data): + if "\n" in data: + return dumper.represent_scalar("tag:yaml.org,2002:str", data, style="|") + + return dumper.represent_scalar("tag:yaml.org,2002:str", data) + + # Register the representer + yaml.add_representer(str, str_presenter) + + data = {"tests": self.tests} + + with open(self.test_file, "w", encoding="utf-8") as fp: + yaml.dump(data, fp, + sort_keys=False, width=120, indent=2, + default_flow_style=False, allow_unicode=True, + explicit_start=False, explicit_end=False) diff --git a/tools/lib/python/kdoc/xforms_lists.py b/tools/lib/python/kdoc/xforms_lists.py new file mode 100644 index 000000000000..f6ea9efb11ae --- /dev/null +++ b/tools/lib/python/kdoc/xforms_lists.py @@ -0,0 +1,153 @@ +#!/usr/bin/env python3 +# SPDX-License-Identifier: GPL-2.0 +# Copyright(c) 2026: Mauro Carvalho Chehab <mchehab@kernel.org>. + +import re + +from kdoc.kdoc_re import KernRe +from kdoc.c_lex import CMatch, CTokenizer + +struct_args_pattern = r"([^,)]+)" + + +class CTransforms: + """ + Data class containing a long set of transformations to turn + structure member prefixes, and macro invocations and variables + into something we can parse and generate kdoc for. + """ + + # + # NOTE: + # Due to performance reasons, place CMatch rules before KernRe, + # as this avoids running the C parser every time. + # + + #: Transforms for structs and unions. + struct_xforms = [ + (CMatch("__attribute__"), ""), + (CMatch("__aligned"), ""), + (CMatch("__counted_by"), ""), + (CMatch("__counted_by_(le|be)"), ""), + (CMatch("__guarded_by"), ""), + (CMatch("__pt_guarded_by"), ""), + (CMatch("__packed"), ""), + (CMatch("CRYPTO_MINALIGN_ATTR"), ""), + (CMatch("__private"), ""), + (CMatch("__rcu"), ""), + (CMatch("____cacheline_aligned_in_smp"), ""), + (CMatch("____cacheline_aligned"), ""), + (CMatch("__cacheline_group_(?:begin|end)"), ""), + (CMatch("__ETHTOOL_DECLARE_LINK_MODE_MASK"), r"DECLARE_BITMAP(\1, __ETHTOOL_LINK_MODE_MASK_NBITS)"), + (CMatch("DECLARE_PHY_INTERFACE_MASK",),r"DECLARE_BITMAP(\1, PHY_INTERFACE_MODE_MAX)"), + (CMatch("DECLARE_BITMAP"), r"unsigned long \1[BITS_TO_LONGS(\2)]"), + (CMatch("DECLARE_HASHTABLE"), r"unsigned long \1[1 << ((\2) - 1)]"), + (CMatch("DECLARE_KFIFO"), r"\2 *\1"), + (CMatch("DECLARE_KFIFO_PTR"), r"\2 *\1"), + (CMatch("(?:__)?DECLARE_FLEX_ARRAY"), r"\1 \2[]"), + (CMatch("DEFINE_DMA_UNMAP_ADDR"), r"dma_addr_t \1"), + (CMatch("DEFINE_DMA_UNMAP_LEN"), r"__u32 \1"), + (CMatch("VIRTIO_DECLARE_FEATURES"), r"union { u64 \1; u64 \1_array[VIRTIO_FEATURES_U64S]; }"), + (CMatch("__cond_acquires"), ""), + (CMatch("__cond_releases"), ""), + (CMatch("__acquires"), ""), + (CMatch("__releases"), ""), + (CMatch("__must_hold"), ""), + (CMatch("__must_not_hold"), ""), + (CMatch("__must_hold_shared"), ""), + (CMatch("__cond_acquires_shared"), ""), + (CMatch("__acquires_shared"), ""), + (CMatch("__releases_shared"), ""), + (CMatch("__attribute__"), ""), + + # + # Macro __struct_group() creates an union with an anonymous + # and a non-anonymous struct, depending on the parameters. We only + # need one of those at kernel-doc, as we won't be documenting the same + # members twice. + # + (CMatch("struct_group"), r"struct { \2+ };"), + (CMatch("struct_group_attr"), r"struct { \3+ };"), + (CMatch("struct_group_tagged"), r"struct { \3+ };"), + (CMatch("__struct_group"), r"struct { \4+ };"), + ] + + #: Transforms for function prototypes. + function_xforms = [ + (CMatch("static"), ""), + (CMatch("extern"), ""), + (CMatch("asmlinkage"), ""), + (CMatch("inline"), ""), + (CMatch("__inline__"), ""), + (CMatch("__inline"), ""), + (CMatch("__always_inline"), ""), + (CMatch("noinline"), ""), + (CMatch("__FORTIFY_INLINE"), ""), + (CMatch("__init"), ""), + (CMatch("__init_or_module"), ""), + (CMatch("__exit"), ""), + (CMatch("__deprecated"), ""), + (CMatch("__flatten"), ""), + (CMatch("__meminit"), ""), + (CMatch("__must_check"), ""), + (CMatch("__weak"), ""), + (CMatch("__sched"), ""), + (CMatch("__always_unused"), ""), + (CMatch("__printf"), ""), + (CMatch("__(?:re)?alloc_size"), ""), + (CMatch("__diagnose_as"), ""), + (CMatch("DECL_BUCKET_PARAMS"), r"\1, \2"), + (CMatch("__no_context_analysis"), ""), + (CMatch("__attribute_const__"), ""), + (CMatch("__attribute__"), ""), + + # + # HACK: this is similar to process_export() hack. It is meant to + # drop _noproof from function name. See for instance: + # ahash_request_alloc kernel-doc declaration at include/crypto/hash.h. + # + (KernRe("_noprof"), ""), + ] + + #: Transforms for variable prototypes. + var_xforms = [ + (CMatch("__read_mostly"), ""), + (CMatch("__ro_after_init"), ""), + (CMatch("__guarded_by"), ""), + (CMatch("__pt_guarded_by"), ""), + (CMatch("LIST_HEAD"), r"struct list_head \1"), + + (KernRe(r"(?://.*)$"), ""), + (KernRe(r"(?:/\*.*\*/)"), ""), + (KernRe(r";$"), ""), + ] + + #: Transforms main dictionary used at apply_transforms(). + xforms = { + "struct": struct_xforms, + "func": function_xforms, + "var": var_xforms, + } + + def apply(self, xforms_type, source): + """ + Apply a set of transforms to a block of source. + + As tokenizer is used here, this function also remove comments + at the end. + """ + if xforms_type not in self.xforms: + return source + + if isinstance(source, str): + source = CTokenizer(source) + + for search, subst in self.xforms[xforms_type]: + # + # KernRe only accept strings. + # + if isinstance(search, KernRe): + source = str(source) + + source = search.sub(subst, source) + return str(source) |
