From 2a14f021210fcbc271591d4c592eb4adca6bf127 Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Thu, 18 Sep 2025 13:54:35 +0200 Subject: scripts/jobserver-exec: move the code to a class Convert the code inside jobserver-exec to a class and properly document it. Using a class allows reusing the jobserver logic on other scripts. While the main code remains unchanged, being compatible with Python 2.6 and 3.0+, its coding style now follows a more modern standard, having tabs replaced by a 4-spaces indent, passing autopep8, black and pylint. The code allows using a pythonic way to enter/exit a python code, e.g. it now supports: with JobserverExec() as jobserver: jobserver.run(sys.argv[1:]) With the new code, the __exit__() function should ensure that the jobserver slot will be closed at the end, even if something bad happens somewhere. Signed-off-by: Mauro Carvalho Chehab Message-ID: <4749921b75d4e0bd85a25d4d94aa2c940fad084e.1758196090.git.mchehab+huawei@kernel.org> Signed-off-by: Jonathan Corbet --- scripts/jobserver-exec | 218 ++++++++++++++++++++++++++++++++++--------------- 1 file changed, 151 insertions(+), 67 deletions(-) (limited to 'scripts') diff --git a/scripts/jobserver-exec b/scripts/jobserver-exec index 7eca035472d3..897c0cca9e6e 100755 --- a/scripts/jobserver-exec +++ b/scripts/jobserver-exec @@ -1,77 +1,161 @@ #!/usr/bin/env python3 # SPDX-License-Identifier: GPL-2.0+ # +# pylint: disable=C0103,C0209 +# # This determines how many parallel tasks "make" is expecting, as it is # not exposed via an special variables, reserves them all, runs a subprocess # with PARALLELISM environment variable set, and releases the jobs back again. # # https://www.gnu.org/software/make/manual/html_node/POSIX-Jobserver.html#POSIX-Jobserver -from __future__ import print_function -import os, sys, errno + +""" +Interacts with the POSIX jobserver during the Kernel build time. + +A "normal" jobserver task, like the one initiated by a make subrocess would do: + + - open read/write file descriptors to communicate with the job server; + - ask for one slot by calling: + claim = os.read(reader, 1) + - when the job finshes, call: + os.write(writer, b"+") # os.write(writer, claim) + +Here, the goal is different: This script aims to get the remaining number +of slots available, using all of them to run a command which handle tasks in +parallel. To to that, it has a loop that ends only after there are no +slots left. It then increments the number by one, in order to allow a +call equivalent to make -j$((claim+1)), e.g. having a parent make creating +$claim child to do the actual work. + +The end goal here is to keep the total number of build tasks under the +limit established by the initial make -j$n_proc call. +""" + +import errno +import os import subprocess +import sys + + +class JobserverExec: + """ + Claim all slots from make using POSIX Jobserver. + + The main methods here are: + - open(): reserves all slots; + - close(): method returns all used slots back to make; + - run(): executes a command setting PARALLELISM= + """ + + def __init__(self): + """Initialize internal vars""" + self.claim = 0 + self.jobs = b"" + self.reader = None + self.writer = None + self.is_open = False + + def open(self): + """Reserve all available slots to be claimed later on""" + + if self.is_open: + return + + try: + # Fetch the make environment options. + flags = os.environ["MAKEFLAGS"] + # Look for "--jobserver=R,W" + # Note that GNU Make has used --jobserver-fds and --jobserver-auth + # so this handles all of them. + opts = [x for x in flags.split(" ") if x.startswith("--jobserver")] + + # Parse out R,W file descriptor numbers and set them nonblocking. + # If the MAKEFLAGS variable contains multiple instances of the + # --jobserver-auth= option, the last one is relevant. + fds = opts[-1].split("=", 1)[1] + + # Starting with GNU Make 4.4, named pipes are used for reader + # and writer. + # Example argument: --jobserver-auth=fifo:/tmp/GMfifo8134 + _, _, path = fds.partition("fifo:") + + if path: + self.reader = os.open(path, os.O_RDONLY | os.O_NONBLOCK) + self.writer = os.open(path, os.O_WRONLY) + else: + self.reader, self.writer = [int(x) for x in fds.split(",", 1)] + # Open a private copy of reader to avoid setting nonblocking + # on an unexpecting process with the same reader fd. + self.reader = os.open("/proc/self/fd/%d" % (self.reader), + os.O_RDONLY | os.O_NONBLOCK) + + # Read out as many jobserver slots as possible + while True: + try: + slot = os.read(self.reader, 8) + self.jobs += slot + except (OSError, IOError) as e: + if e.errno == errno.EWOULDBLOCK: + # Stop at the end of the jobserver queue. + break + # If something went wrong, give back the jobs. + if self.jobs: + os.write(self.writer, self.jobs) + raise e + + # Add a bump for our caller's reserveration, since we're just going + # to sit here blocked on our child. + self.claim = len(self.jobs) + 1 + + except (KeyError, IndexError, ValueError, OSError, IOError): + # Any missing environment strings or bad fds should result in just + # not being parallel. + self.claim = None + + self.is_open = True + + def close(self): + """Return all reserved slots to Jobserver""" + + if not self.is_open: + return + + # Return all the reserved slots. + if len(self.jobs): + os.write(self.writer, self.jobs) + + self.is_open = False + + def __enter__(self): + self.open() + return self + + def __exit__(self, exc_type, exc_value, exc_traceback): + self.close() + + def run(self, cmd): + """ + Run a command setting PARALLELISM env variable to the number of + available job slots (claim) + 1, e.g. it will reserve claim slots + to do the actual build work, plus one to monitor its children. + """ + self.open() # Ensure that self.claim is set + + # We can only claim parallelism if there was a jobserver (i.e. a + # top-level "-jN" argument) and there were no other failures. Otherwise + # leave out the environment variable and let the child figure out what + # is best. + if self.claim: + os.environ["PARALLELISM"] = str(self.claim) + + return subprocess.call(cmd) + + +def main(): + """Main program""" + with JobserverExec() as jobserver: + jobserver.run(sys.argv[1:]) + -# Extract and prepare jobserver file descriptors from environment. -claim = 0 -jobs = b"" -try: - # Fetch the make environment options. - flags = os.environ['MAKEFLAGS'] - - # Look for "--jobserver=R,W" - # Note that GNU Make has used --jobserver-fds and --jobserver-auth - # so this handles all of them. - opts = [x for x in flags.split(" ") if x.startswith("--jobserver")] - - # Parse out R,W file descriptor numbers and set them nonblocking. - # If the MAKEFLAGS variable contains multiple instances of the - # --jobserver-auth= option, the last one is relevant. - fds = opts[-1].split("=", 1)[1] - - # Starting with GNU Make 4.4, named pipes are used for reader and writer. - # Example argument: --jobserver-auth=fifo:/tmp/GMfifo8134 - _, _, path = fds.partition('fifo:') - - if path: - reader = os.open(path, os.O_RDONLY | os.O_NONBLOCK) - writer = os.open(path, os.O_WRONLY) - else: - reader, writer = [int(x) for x in fds.split(",", 1)] - # Open a private copy of reader to avoid setting nonblocking - # on an unexpecting process with the same reader fd. - reader = os.open("/proc/self/fd/%d" % (reader), - os.O_RDONLY | os.O_NONBLOCK) - - # Read out as many jobserver slots as possible. - while True: - try: - slot = os.read(reader, 8) - jobs += slot - except (OSError, IOError) as e: - if e.errno == errno.EWOULDBLOCK: - # Stop at the end of the jobserver queue. - break - # If something went wrong, give back the jobs. - if len(jobs): - os.write(writer, jobs) - raise e - # Add a bump for our caller's reserveration, since we're just going - # to sit here blocked on our child. - claim = len(jobs) + 1 -except (KeyError, IndexError, ValueError, OSError, IOError) as e: - # Any missing environment strings or bad fds should result in just - # not being parallel. - pass - -# We can only claim parallelism if there was a jobserver (i.e. a top-level -# "-jN" argument) and there were no other failures. Otherwise leave out the -# environment variable and let the child figure out what is best. -if claim > 0: - os.environ['PARALLELISM'] = '%d' % (claim) - -rc = subprocess.call(sys.argv[1:]) - -# Return all the reserved slots. -if len(jobs): - os.write(writer, jobs) - -sys.exit(rc) +if __name__ == "__main__": + main() -- cgit v1.2.3 From fce6df7e7384ba82ea718b14974f33c1b697cf18 Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Thu, 18 Sep 2025 13:54:36 +0200 Subject: scripts/jobserver-exec: move its class to the lib directory To make it easier to be re-used, move the JobserverExec class to the library directory. Signed-off-by: Mauro Carvalho Chehab Message-ID: <6be7b161b6c005a9807162ebfd239af6a4e6fa47.1758196090.git.mchehab+huawei@kernel.org> Signed-off-by: Jonathan Corbet --- scripts/jobserver-exec | 152 ++++------------------------------------------- scripts/lib/jobserver.py | 149 ++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 160 insertions(+), 141 deletions(-) create mode 100755 scripts/lib/jobserver.py (limited to 'scripts') diff --git a/scripts/jobserver-exec b/scripts/jobserver-exec index 897c0cca9e6e..40a0f0058733 100755 --- a/scripts/jobserver-exec +++ b/scripts/jobserver-exec @@ -1,155 +1,25 @@ #!/usr/bin/env python3 # SPDX-License-Identifier: GPL-2.0+ -# -# pylint: disable=C0103,C0209 -# -# This determines how many parallel tasks "make" is expecting, as it is -# not exposed via an special variables, reserves them all, runs a subprocess -# with PARALLELISM environment variable set, and releases the jobs back again. -# -# https://www.gnu.org/software/make/manual/html_node/POSIX-Jobserver.html#POSIX-Jobserver -""" -Interacts with the POSIX jobserver during the Kernel build time. - -A "normal" jobserver task, like the one initiated by a make subrocess would do: - - - open read/write file descriptors to communicate with the job server; - - ask for one slot by calling: - claim = os.read(reader, 1) - - when the job finshes, call: - os.write(writer, b"+") # os.write(writer, claim) - -Here, the goal is different: This script aims to get the remaining number -of slots available, using all of them to run a command which handle tasks in -parallel. To to that, it has a loop that ends only after there are no -slots left. It then increments the number by one, in order to allow a -call equivalent to make -j$((claim+1)), e.g. having a parent make creating -$claim child to do the actual work. - -The end goal here is to keep the total number of build tasks under the -limit established by the initial make -j$n_proc call. -""" - -import errno import os -import subprocess import sys +LIB_DIR = "lib" +SRC_DIR = os.path.dirname(os.path.realpath(__file__)) -class JobserverExec: - """ - Claim all slots from make using POSIX Jobserver. - - The main methods here are: - - open(): reserves all slots; - - close(): method returns all used slots back to make; - - run(): executes a command setting PARALLELISM= - """ - - def __init__(self): - """Initialize internal vars""" - self.claim = 0 - self.jobs = b"" - self.reader = None - self.writer = None - self.is_open = False - - def open(self): - """Reserve all available slots to be claimed later on""" - - if self.is_open: - return - - try: - # Fetch the make environment options. - flags = os.environ["MAKEFLAGS"] - # Look for "--jobserver=R,W" - # Note that GNU Make has used --jobserver-fds and --jobserver-auth - # so this handles all of them. - opts = [x for x in flags.split(" ") if x.startswith("--jobserver")] - - # Parse out R,W file descriptor numbers and set them nonblocking. - # If the MAKEFLAGS variable contains multiple instances of the - # --jobserver-auth= option, the last one is relevant. - fds = opts[-1].split("=", 1)[1] - - # Starting with GNU Make 4.4, named pipes are used for reader - # and writer. - # Example argument: --jobserver-auth=fifo:/tmp/GMfifo8134 - _, _, path = fds.partition("fifo:") +sys.path.insert(0, os.path.join(SRC_DIR, LIB_DIR)) - if path: - self.reader = os.open(path, os.O_RDONLY | os.O_NONBLOCK) - self.writer = os.open(path, os.O_WRONLY) - else: - self.reader, self.writer = [int(x) for x in fds.split(",", 1)] - # Open a private copy of reader to avoid setting nonblocking - # on an unexpecting process with the same reader fd. - self.reader = os.open("/proc/self/fd/%d" % (self.reader), - os.O_RDONLY | os.O_NONBLOCK) +from jobserver import JobserverExec # pylint: disable=C0415 - # Read out as many jobserver slots as possible - while True: - try: - slot = os.read(self.reader, 8) - self.jobs += slot - except (OSError, IOError) as e: - if e.errno == errno.EWOULDBLOCK: - # Stop at the end of the jobserver queue. - break - # If something went wrong, give back the jobs. - if self.jobs: - os.write(self.writer, self.jobs) - raise e - # Add a bump for our caller's reserveration, since we're just going - # to sit here blocked on our child. - self.claim = len(self.jobs) + 1 - - except (KeyError, IndexError, ValueError, OSError, IOError): - # Any missing environment strings or bad fds should result in just - # not being parallel. - self.claim = None - - self.is_open = True - - def close(self): - """Return all reserved slots to Jobserver""" - - if not self.is_open: - return - - # Return all the reserved slots. - if len(self.jobs): - os.write(self.writer, self.jobs) - - self.is_open = False - - def __enter__(self): - self.open() - return self - - def __exit__(self, exc_type, exc_value, exc_traceback): - self.close() - - def run(self, cmd): - """ - Run a command setting PARALLELISM env variable to the number of - available job slots (claim) + 1, e.g. it will reserve claim slots - to do the actual build work, plus one to monitor its children. - """ - self.open() # Ensure that self.claim is set - - # We can only claim parallelism if there was a jobserver (i.e. a - # top-level "-jN" argument) and there were no other failures. Otherwise - # leave out the environment variable and let the child figure out what - # is best. - if self.claim: - os.environ["PARALLELISM"] = str(self.claim) - - return subprocess.call(cmd) +""" +Determines how many parallel tasks "make" is expecting, as it is +not exposed via an special variables, reserves them all, runs a subprocess +with PARALLELISM environment variable set, and releases the jobs back again. +See: + https://www.gnu.org/software/make/manual/html_node/POSIX-Jobserver.html#POSIX-Jobserver +""" def main(): """Main program""" diff --git a/scripts/lib/jobserver.py b/scripts/lib/jobserver.py new file mode 100755 index 000000000000..a24f30ef4fa8 --- /dev/null +++ b/scripts/lib/jobserver.py @@ -0,0 +1,149 @@ +#!/usr/bin/env python3 +# SPDX-License-Identifier: GPL-2.0+ +# +# pylint: disable=C0103,C0209 +# +# + +""" +Interacts with the POSIX jobserver during the Kernel build time. + +A "normal" jobserver task, like the one initiated by a make subrocess would do: + + - open read/write file descriptors to communicate with the job server; + - ask for one slot by calling: + claim = os.read(reader, 1) + - when the job finshes, call: + os.write(writer, b"+") # os.write(writer, claim) + +Here, the goal is different: This script aims to get the remaining number +of slots available, using all of them to run a command which handle tasks in +parallel. To to that, it has a loop that ends only after there are no +slots left. It then increments the number by one, in order to allow a +call equivalent to make -j$((claim+1)), e.g. having a parent make creating +$claim child to do the actual work. + +The end goal here is to keep the total number of build tasks under the +limit established by the initial make -j$n_proc call. + +See: + https://www.gnu.org/software/make/manual/html_node/POSIX-Jobserver.html#POSIX-Jobserver +""" + +import errno +import os +import subprocess +import sys + +class JobserverExec: + """ + Claim all slots from make using POSIX Jobserver. + + The main methods here are: + - open(): reserves all slots; + - close(): method returns all used slots back to make; + - run(): executes a command setting PARALLELISM= + """ + + def __init__(self): + """Initialize internal vars""" + self.claim = 0 + self.jobs = b"" + self.reader = None + self.writer = None + self.is_open = False + + def open(self): + """Reserve all available slots to be claimed later on""" + + if self.is_open: + return + + try: + # Fetch the make environment options. + flags = os.environ["MAKEFLAGS"] + # Look for "--jobserver=R,W" + # Note that GNU Make has used --jobserver-fds and --jobserver-auth + # so this handles all of them. + opts = [x for x in flags.split(" ") if x.startswith("--jobserver")] + + # Parse out R,W file descriptor numbers and set them nonblocking. + # If the MAKEFLAGS variable contains multiple instances of the + # --jobserver-auth= option, the last one is relevant. + fds = opts[-1].split("=", 1)[1] + + # Starting with GNU Make 4.4, named pipes are used for reader + # and writer. + # Example argument: --jobserver-auth=fifo:/tmp/GMfifo8134 + _, _, path = fds.partition("fifo:") + + if path: + self.reader = os.open(path, os.O_RDONLY | os.O_NONBLOCK) + self.writer = os.open(path, os.O_WRONLY) + else: + self.reader, self.writer = [int(x) for x in fds.split(",", 1)] + # Open a private copy of reader to avoid setting nonblocking + # on an unexpecting process with the same reader fd. + self.reader = os.open("/proc/self/fd/%d" % (self.reader), + os.O_RDONLY | os.O_NONBLOCK) + + # Read out as many jobserver slots as possible + while True: + try: + slot = os.read(self.reader, 8) + self.jobs += slot + except (OSError, IOError) as e: + if e.errno == errno.EWOULDBLOCK: + # Stop at the end of the jobserver queue. + break + # If something went wrong, give back the jobs. + if self.jobs: + os.write(self.writer, self.jobs) + raise e + + # Add a bump for our caller's reserveration, since we're just going + # to sit here blocked on our child. + self.claim = len(self.jobs) + 1 + + except (KeyError, IndexError, ValueError, OSError, IOError): + # Any missing environment strings or bad fds should result in just + # not being parallel. + self.claim = None + + self.is_open = True + + def close(self): + """Return all reserved slots to Jobserver""" + + if not self.is_open: + return + + # Return all the reserved slots. + if len(self.jobs): + os.write(self.writer, self.jobs) + + self.is_open = False + + def __enter__(self): + self.open() + return self + + def __exit__(self, exc_type, exc_value, exc_traceback): + self.close() + + def run(self, cmd, *args, **pwargs): + """ + Run a command setting PARALLELISM env variable to the number of + available job slots (claim) + 1, e.g. it will reserve claim slots + to do the actual build work, plus one to monitor its children. + """ + self.open() # Ensure that self.claim is set + + # We can only claim parallelism if there was a jobserver (i.e. a + # top-level "-jN" argument) and there were no other failures. Otherwise + # leave out the environment variable and let the child figure out what + # is best. + if self.claim: + os.environ["PARALLELISM"] = str(self.claim) + + return subprocess.call(cmd, *args, **pwargs) -- cgit v1.2.3 From a84a5d0b5a184551eeded75b8df6440bd81e84f4 Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Thu, 18 Sep 2025 13:54:37 +0200 Subject: scripts/jobserver-exec: add a help message Currently, calling it without an argument shows an ugly error message. Instead, print a message using pythondoc as description. Signed-off-by: Mauro Carvalho Chehab Message-ID: <64b0339eac54ac0f2b3de3667a7f4f5becb1c6ae.1758196090.git.mchehab+huawei@kernel.org> Signed-off-by: Jonathan Corbet --- scripts/jobserver-exec | 22 +++++++++++++--------- 1 file changed, 13 insertions(+), 9 deletions(-) (limited to 'scripts') diff --git a/scripts/jobserver-exec b/scripts/jobserver-exec index 40a0f0058733..ae23afd344ec 100755 --- a/scripts/jobserver-exec +++ b/scripts/jobserver-exec @@ -1,6 +1,15 @@ #!/usr/bin/env python3 # SPDX-License-Identifier: GPL-2.0+ +""" +Determines how many parallel tasks "make" is expecting, as it is +not exposed via any special variables, reserves them all, runs a subprocess +with PARALLELISM environment variable set, and releases the jobs back again. + +See: + https://www.gnu.org/software/make/manual/html_node/POSIX-Jobserver.html#POSIX-Jobserver +""" + import os import sys @@ -12,17 +21,12 @@ sys.path.insert(0, os.path.join(SRC_DIR, LIB_DIR)) from jobserver import JobserverExec # pylint: disable=C0415 -""" -Determines how many parallel tasks "make" is expecting, as it is -not exposed via an special variables, reserves them all, runs a subprocess -with PARALLELISM environment variable set, and releases the jobs back again. - -See: - https://www.gnu.org/software/make/manual/html_node/POSIX-Jobserver.html#POSIX-Jobserver -""" - def main(): """Main program""" + if len(sys.argv) < 2: + name = os.path.basename(__file__) + sys.exit("usage: " + name +" command [args ...]\n" + __doc__) + with JobserverExec() as jobserver: jobserver.run(sys.argv[1:]) -- cgit v1.2.3 From 75539bec27ddf4ac206b74d307ba9e92dbaaece7 Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Thu, 18 Sep 2025 13:54:38 +0200 Subject: scripts: check-variable-fonts.sh: convert to Python This script handle errors when trying to build translations with make pdfdocs. As part of our cleanup work to remove hacks from docs Makefile, convert this to python, preparing it to be part of a library to be called by sphinx-build-wrapper. Signed-off-by: Mauro Carvalho Chehab Message-ID: Signed-off-by: Jonathan Corbet --- scripts/check-variable-fonts.py | 165 ++++++++++++++++++++++++++++++++++++++++ scripts/check-variable-fonts.sh | 115 ---------------------------- 2 files changed, 165 insertions(+), 115 deletions(-) create mode 100755 scripts/check-variable-fonts.py delete mode 100755 scripts/check-variable-fonts.sh (limited to 'scripts') diff --git a/scripts/check-variable-fonts.py b/scripts/check-variable-fonts.py new file mode 100755 index 000000000000..8be1c0f39588 --- /dev/null +++ b/scripts/check-variable-fonts.py @@ -0,0 +1,165 @@ +#!/usr/bin/env python3 +# SPDX-License-Identifier: GPL-2.0-only +# Copyright (C) Akira Yokosawa, 2024 +# +# Ported to Python by (c) Mauro Carvalho Chehab, 2025 +# +# For "make pdfdocs", reports of build errors of translations.pdf started +# arriving early 2024 [1, 2]. It turned out that Fedora and openSUSE +# tumbleweed have started deploying variable-font [3] format of "Noto CJK" +# fonts [4, 5]. For PDF, a LaTeX package named xeCJK is used for CJK +# (Chinese, Japanese, Korean) pages. xeCJK requires XeLaTeX/XeTeX, which +# does not (and likely never will) understand variable fonts for historical +# reasons. +# +# The build error happens even when both of variable- and non-variable-format +# fonts are found on the build system. To make matters worse, Fedora enlists +# variable "Noto CJK" fonts in the requirements of langpacks-ja, -ko, -zh_CN, +# -zh_TW, etc. Hence developers who have interest in CJK pages are more +# likely to encounter the build errors. +# +# This script is invoked from the error path of "make pdfdocs" and emits +# suggestions if variable-font files of "Noto CJK" fonts are in the list of +# fonts accessible from XeTeX. +# +# References: +# [1]: https://lore.kernel.org/r/8734tqsrt7.fsf@meer.lwn.net/ +# [2]: https://lore.kernel.org/r/1708585803.600323099@f111.i.mail.ru/ +# [3]: https://en.wikipedia.org/wiki/Variable_font +# [4]: https://fedoraproject.org/wiki/Changes/Noto_CJK_Variable_Fonts +# [5]: https://build.opensuse.org/request/show/1157217 +# +#=========================================================================== +# Workarounds for building translations.pdf +#=========================================================================== +# +# * Denylist "variable font" Noto CJK fonts. +# - Create $HOME/deny-vf/fontconfig/fonts.conf from template below, with +# tweaks if necessary. Remove leading "# ". +# - Path of fontconfig/fonts.conf can be overridden by setting an env +# variable FONTS_CONF_DENY_VF. +# +# * Template: +# ----------------------------------------------------------------- +# +# +# +# +# +# +# +# /usr/share/fonts/google-noto-*-cjk-vf-fonts +# +# /usr/share/fonts/truetype/Noto*CJK*-VF.otf +# +# +# +# ----------------------------------------------------------------- +# +# The denylisting is activated for "make pdfdocs". +# +# * For skipping CJK pages in PDF +# - Uninstall texlive-xecjk. +# Denylisting is not needed in this case. +# +# * For printing CJK pages in PDF +# - Need non-variable "Noto CJK" fonts. +# * Fedora +# - google-noto-sans-cjk-fonts +# - google-noto-serif-cjk-fonts +# * openSUSE tumbleweed +# - Non-variable "Noto CJK" fonts are not available as distro packages +# as of April, 2024. Fetch a set of font files from upstream Noto +# CJK Font released at: +# https://github.com/notofonts/noto-cjk/tree/main/Sans#super-otc +# and at: +# https://github.com/notofonts/noto-cjk/tree/main/Serif#super-otc +# , then uncompress and deploy them. +# - Remember to update fontconfig cache by running fc-cache. +# +# !!! Caution !!! +# Uninstalling "variable font" packages can be dangerous. +# They might be depended upon by other packages important for your work. +# Denylisting should be less invasive, as it is effective only while +# XeLaTeX runs in "make pdfdocs". + +import os +import re +import subprocess +import sys +import textwrap + +class LatexFontChecker: + """ + Detect problems with CJK variable fonts that affect PDF builds for + translations. + """ + + def __init__(self): + deny_vf = os.environ.get('FONTS_CONF_DENY_VF', "~/deny-vf") + + self.environ = os.environ.copy() + self.environ['XDG_CONFIG_HOME'] = os.path.expanduser(deny_vf) + + self.re_cjk = re.compile(r"([^:]+):\s*Noto\s+(Sans|Sans Mono|Serif) CJK") + + def get_noto_cjk_vf_fonts(self): + """Get Noto CJK fonts""" + + cjk_fonts = set() + cmd = ["fc-list", ":", "file", "family", "variable"] + try: + result = subprocess.run(cmd,stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + universal_newlines=True, + env=self.environ, + check=True) + + except subprocess.CalledProcessError as exc: + sys.exit(f"Error running fc-list: {repr(exc)}") + + for line in result.stdout.splitlines(): + if 'variable=True' not in line: + continue + + match = self.re_cjk.search(line) + if match: + cjk_fonts.add(match.group(1)) + + return sorted(cjk_fonts) + + def check(self): + """Check for problems with CJK fonts""" + + fonts = textwrap.indent("\n".join(self.get_noto_cjk_vf_fonts()), " ") + if not fonts: + return None + + rel_file = os.path.relpath(__file__, os.getcwd()) + + msg = "=" * 77 + "\n" + msg += 'XeTeX is confused by "variable font" files listed below:\n' + msg += fonts + "\n" + msg += textwrap.dedent(f""" + For CJK pages in PDF, they need to be hidden from XeTeX by denylisting. + Or, CJK pages can be skipped by uninstalling texlive-xecjk. + + For more info on denylisting, other options, and variable font, see header + comments of {rel_file}. + """) + msg += "=" * 77 + + return msg + +if __name__ == "__main__": + msg = LatexFontChecker().check() + if msg: + print(msg) + + sys.exit(1) diff --git a/scripts/check-variable-fonts.sh b/scripts/check-variable-fonts.sh deleted file mode 100755 index ce63f0acea5f..000000000000 --- a/scripts/check-variable-fonts.sh +++ /dev/null @@ -1,115 +0,0 @@ -#!/bin/sh -# SPDX-License-Identifier: GPL-2.0-only -# Copyright (C) Akira Yokosawa, 2024 -# -# For "make pdfdocs", reports of build errors of translations.pdf started -# arriving early 2024 [1, 2]. It turned out that Fedora and openSUSE -# tumbleweed have started deploying variable-font [3] format of "Noto CJK" -# fonts [4, 5]. For PDF, a LaTeX package named xeCJK is used for CJK -# (Chinese, Japanese, Korean) pages. xeCJK requires XeLaTeX/XeTeX, which -# does not (and likely never will) understand variable fonts for historical -# reasons. -# -# The build error happens even when both of variable- and non-variable-format -# fonts are found on the build system. To make matters worse, Fedora enlists -# variable "Noto CJK" fonts in the requirements of langpacks-ja, -ko, -zh_CN, -# -zh_TW, etc. Hence developers who have interest in CJK pages are more -# likely to encounter the build errors. -# -# This script is invoked from the error path of "make pdfdocs" and emits -# suggestions if variable-font files of "Noto CJK" fonts are in the list of -# fonts accessible from XeTeX. -# -# References: -# [1]: https://lore.kernel.org/r/8734tqsrt7.fsf@meer.lwn.net/ -# [2]: https://lore.kernel.org/r/1708585803.600323099@f111.i.mail.ru/ -# [3]: https://en.wikipedia.org/wiki/Variable_font -# [4]: https://fedoraproject.org/wiki/Changes/Noto_CJK_Variable_Fonts -# [5]: https://build.opensuse.org/request/show/1157217 -# -#=========================================================================== -# Workarounds for building translations.pdf -#=========================================================================== -# -# * Denylist "variable font" Noto CJK fonts. -# - Create $HOME/deny-vf/fontconfig/fonts.conf from template below, with -# tweaks if necessary. Remove leading "# ". -# - Path of fontconfig/fonts.conf can be overridden by setting an env -# variable FONTS_CONF_DENY_VF. -# -# * Template: -# ----------------------------------------------------------------- -# -# -# -# -# -# -# -# /usr/share/fonts/google-noto-*-cjk-vf-fonts -# -# /usr/share/fonts/truetype/Noto*CJK*-VF.otf -# -# -# -# ----------------------------------------------------------------- -# -# The denylisting is activated for "make pdfdocs". -# -# * For skipping CJK pages in PDF -# - Uninstall texlive-xecjk. -# Denylisting is not needed in this case. -# -# * For printing CJK pages in PDF -# - Need non-variable "Noto CJK" fonts. -# * Fedora -# - google-noto-sans-cjk-fonts -# - google-noto-serif-cjk-fonts -# * openSUSE tumbleweed -# - Non-variable "Noto CJK" fonts are not available as distro packages -# as of April, 2024. Fetch a set of font files from upstream Noto -# CJK Font released at: -# https://github.com/notofonts/noto-cjk/tree/main/Sans#super-otc -# and at: -# https://github.com/notofonts/noto-cjk/tree/main/Serif#super-otc -# , then uncompress and deploy them. -# - Remember to update fontconfig cache by running fc-cache. -# -# !!! Caution !!! -# Uninstalling "variable font" packages can be dangerous. -# They might be depended upon by other packages important for your work. -# Denylisting should be less invasive, as it is effective only while -# XeLaTeX runs in "make pdfdocs". - -# Default per-user fontconfig path (overridden by env variable) -: ${FONTS_CONF_DENY_VF:=$HOME/deny-vf} - -export XDG_CONFIG_HOME=${FONTS_CONF_DENY_VF} - -notocjkvffonts=`fc-list : file family variable | \ - grep 'variable=True' | \ - grep -E -e 'Noto (Sans|Sans Mono|Serif) CJK' | \ - sed -e 's/^/ /' -e 's/: Noto S.*$//' | sort | uniq` - -if [ "x$notocjkvffonts" != "x" ] ; then - echo '=============================================================================' - echo 'XeTeX is confused by "variable font" files listed below:' - echo "$notocjkvffonts" - echo - echo 'For CJK pages in PDF, they need to be hidden from XeTeX by denylisting.' - echo 'Or, CJK pages can be skipped by uninstalling texlive-xecjk.' - echo - echo 'For more info on denylisting, other options, and variable font, see header' - echo 'comments of scripts/check-variable-fonts.sh.' - echo '=============================================================================' -fi - -# As this script is invoked from Makefile's error path, always error exit -# regardless of whether any variable font is discovered or not. -exit 1 -- cgit v1.2.3 From 4515ffdf3cbc384cb7bbb699bcd1db5705862cfa Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Thu, 18 Sep 2025 13:54:39 +0200 Subject: tools/docs: check-variable-fonts.py: split into a lib and an exec file As we'll be using the actual code inside sphinx-build-wrapper, split the library from the executable, placing the exec at the new place we've been using: tools/docs No functional changes. Signed-off-by: Mauro Carvalho Chehab Message-ID: <8adbc22df1d43b1c5a673799d2333cc429ffe9fc.1758196090.git.mchehab+huawei@kernel.org> Signed-off-by: Jonathan Corbet --- scripts/check-variable-fonts.py | 165 ---------------------------------------- 1 file changed, 165 deletions(-) delete mode 100755 scripts/check-variable-fonts.py (limited to 'scripts') diff --git a/scripts/check-variable-fonts.py b/scripts/check-variable-fonts.py deleted file mode 100755 index 8be1c0f39588..000000000000 --- a/scripts/check-variable-fonts.py +++ /dev/null @@ -1,165 +0,0 @@ -#!/usr/bin/env python3 -# SPDX-License-Identifier: GPL-2.0-only -# Copyright (C) Akira Yokosawa, 2024 -# -# Ported to Python by (c) Mauro Carvalho Chehab, 2025 -# -# For "make pdfdocs", reports of build errors of translations.pdf started -# arriving early 2024 [1, 2]. It turned out that Fedora and openSUSE -# tumbleweed have started deploying variable-font [3] format of "Noto CJK" -# fonts [4, 5]. For PDF, a LaTeX package named xeCJK is used for CJK -# (Chinese, Japanese, Korean) pages. xeCJK requires XeLaTeX/XeTeX, which -# does not (and likely never will) understand variable fonts for historical -# reasons. -# -# The build error happens even when both of variable- and non-variable-format -# fonts are found on the build system. To make matters worse, Fedora enlists -# variable "Noto CJK" fonts in the requirements of langpacks-ja, -ko, -zh_CN, -# -zh_TW, etc. Hence developers who have interest in CJK pages are more -# likely to encounter the build errors. -# -# This script is invoked from the error path of "make pdfdocs" and emits -# suggestions if variable-font files of "Noto CJK" fonts are in the list of -# fonts accessible from XeTeX. -# -# References: -# [1]: https://lore.kernel.org/r/8734tqsrt7.fsf@meer.lwn.net/ -# [2]: https://lore.kernel.org/r/1708585803.600323099@f111.i.mail.ru/ -# [3]: https://en.wikipedia.org/wiki/Variable_font -# [4]: https://fedoraproject.org/wiki/Changes/Noto_CJK_Variable_Fonts -# [5]: https://build.opensuse.org/request/show/1157217 -# -#=========================================================================== -# Workarounds for building translations.pdf -#=========================================================================== -# -# * Denylist "variable font" Noto CJK fonts. -# - Create $HOME/deny-vf/fontconfig/fonts.conf from template below, with -# tweaks if necessary. Remove leading "# ". -# - Path of fontconfig/fonts.conf can be overridden by setting an env -# variable FONTS_CONF_DENY_VF. -# -# * Template: -# ----------------------------------------------------------------- -# -# -# -# -# -# -# -# /usr/share/fonts/google-noto-*-cjk-vf-fonts -# -# /usr/share/fonts/truetype/Noto*CJK*-VF.otf -# -# -# -# ----------------------------------------------------------------- -# -# The denylisting is activated for "make pdfdocs". -# -# * For skipping CJK pages in PDF -# - Uninstall texlive-xecjk. -# Denylisting is not needed in this case. -# -# * For printing CJK pages in PDF -# - Need non-variable "Noto CJK" fonts. -# * Fedora -# - google-noto-sans-cjk-fonts -# - google-noto-serif-cjk-fonts -# * openSUSE tumbleweed -# - Non-variable "Noto CJK" fonts are not available as distro packages -# as of April, 2024. Fetch a set of font files from upstream Noto -# CJK Font released at: -# https://github.com/notofonts/noto-cjk/tree/main/Sans#super-otc -# and at: -# https://github.com/notofonts/noto-cjk/tree/main/Serif#super-otc -# , then uncompress and deploy them. -# - Remember to update fontconfig cache by running fc-cache. -# -# !!! Caution !!! -# Uninstalling "variable font" packages can be dangerous. -# They might be depended upon by other packages important for your work. -# Denylisting should be less invasive, as it is effective only while -# XeLaTeX runs in "make pdfdocs". - -import os -import re -import subprocess -import sys -import textwrap - -class LatexFontChecker: - """ - Detect problems with CJK variable fonts that affect PDF builds for - translations. - """ - - def __init__(self): - deny_vf = os.environ.get('FONTS_CONF_DENY_VF', "~/deny-vf") - - self.environ = os.environ.copy() - self.environ['XDG_CONFIG_HOME'] = os.path.expanduser(deny_vf) - - self.re_cjk = re.compile(r"([^:]+):\s*Noto\s+(Sans|Sans Mono|Serif) CJK") - - def get_noto_cjk_vf_fonts(self): - """Get Noto CJK fonts""" - - cjk_fonts = set() - cmd = ["fc-list", ":", "file", "family", "variable"] - try: - result = subprocess.run(cmd,stdout=subprocess.PIPE, - stderr=subprocess.PIPE, - universal_newlines=True, - env=self.environ, - check=True) - - except subprocess.CalledProcessError as exc: - sys.exit(f"Error running fc-list: {repr(exc)}") - - for line in result.stdout.splitlines(): - if 'variable=True' not in line: - continue - - match = self.re_cjk.search(line) - if match: - cjk_fonts.add(match.group(1)) - - return sorted(cjk_fonts) - - def check(self): - """Check for problems with CJK fonts""" - - fonts = textwrap.indent("\n".join(self.get_noto_cjk_vf_fonts()), " ") - if not fonts: - return None - - rel_file = os.path.relpath(__file__, os.getcwd()) - - msg = "=" * 77 + "\n" - msg += 'XeTeX is confused by "variable font" files listed below:\n' - msg += fonts + "\n" - msg += textwrap.dedent(f""" - For CJK pages in PDF, they need to be hidden from XeTeX by denylisting. - Or, CJK pages can be skipped by uninstalling texlive-xecjk. - - For more info on denylisting, other options, and variable font, see header - comments of {rel_file}. - """) - msg += "=" * 77 - - return msg - -if __name__ == "__main__": - msg = LatexFontChecker().check() - if msg: - print(msg) - - sys.exit(1) -- cgit v1.2.3 From abd61d1ff8f0ea4cb099a1f3d5015dea7c8471cf Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Thu, 18 Sep 2025 13:54:41 +0200 Subject: scripts: sphinx-pre-install: move it to tools/docs As we're reorganizing the place where doc scripts are located, move this one to tools/docs. No functional changes. Signed-off-by: Mauro Carvalho Chehab Message-ID: <5e2c40d3aebfd67b7ac7817f548bd1fa4ff661a8.1758196090.git.mchehab+huawei@kernel.org> Signed-off-by: Jonathan Corbet --- scripts/sphinx-pre-install | 1621 -------------------------------------------- 1 file changed, 1621 deletions(-) delete mode 100755 scripts/sphinx-pre-install (limited to 'scripts') diff --git a/scripts/sphinx-pre-install b/scripts/sphinx-pre-install deleted file mode 100755 index 954ed3dc0645..000000000000 --- a/scripts/sphinx-pre-install +++ /dev/null @@ -1,1621 +0,0 @@ -#!/usr/bin/env python3 -# SPDX-License-Identifier: GPL-2.0-or-later -# Copyright (c) 2017-2025 Mauro Carvalho Chehab -# -# pylint: disable=C0103,C0114,C0115,C0116,C0301,C0302 -# pylint: disable=R0902,R0904,R0911,R0912,R0914,R0915,R1705,R1710,E1121 - -# Note: this script requires at least Python 3.6 to run. -# Don't add changes not compatible with it, it is meant to report -# incompatible python versions. - -""" -Dependency checker for Sphinx documentation Kernel build. - -This module provides tools to check for all required dependencies needed to -build documentation using Sphinx, including system packages, Python modules -and LaTeX packages for PDF generation. - -It detect packages for a subset of Linux distributions used by Kernel -maintainers, showing hints and missing dependencies. - -The main class SphinxDependencyChecker handles the dependency checking logic -and provides recommendations for installing missing packages. It supports both -system package installations and Python virtual environments. By default, -system pacage install is recommended. -""" - -import argparse -import os -import re -import subprocess -import sys -from glob import glob - - -def parse_version(version): - """Convert a major.minor.patch version into a tuple""" - return tuple(int(x) for x in version.split(".")) - - -def ver_str(version): - """Returns a version tuple as major.minor.patch""" - - return ".".join([str(x) for x in version]) - - -RECOMMENDED_VERSION = parse_version("3.4.3") -MIN_PYTHON_VERSION = parse_version("3.7") - - -class DepManager: - """ - Manage package dependencies. There are three types of dependencies: - - - System: dependencies required for docs build; - - Python: python dependencies for a native distro Sphinx install; - - PDF: dependencies needed by PDF builds. - - Each dependency can be mandatory or optional. Not installing an optional - dependency won't break the build, but will cause degradation at the - docs output. - """ - - # Internal types of dependencies. Don't use them outside DepManager class. - _SYS_TYPE = 0 - _PHY_TYPE = 1 - _PDF_TYPE = 2 - - # Dependencies visible outside the class. - # The keys are tuple with: (type, is_mandatory flag). - # - # Currently we're not using all optional dep types. Yet, we'll keep all - # possible combinations here. They're not many, and that makes easier - # if later needed and for the name() method below - - SYSTEM_MANDATORY = (_SYS_TYPE, True) - PYTHON_MANDATORY = (_PHY_TYPE, True) - PDF_MANDATORY = (_PDF_TYPE, True) - - SYSTEM_OPTIONAL = (_SYS_TYPE, False) - PYTHON_OPTIONAL = (_PHY_TYPE, False) - PDF_OPTIONAL = (_PDF_TYPE, True) - - def __init__(self, pdf): - """ - Initialize internal vars: - - - missing: missing dependencies list, containing a distro-independent - name for a missing dependency and its type. - - missing_pkg: ancillary dict containing missing dependencies in - distro namespace, organized by type. - - need: total number of needed dependencies. Never cleaned. - - optional: total number of optional dependencies. Never cleaned. - - pdf: Is PDF support enabled? - """ - self.missing = {} - self.missing_pkg = {} - self.need = 0 - self.optional = 0 - self.pdf = pdf - - @staticmethod - def name(dtype): - """ - Ancillary routine to output a warn/error message reporting - missing dependencies. - """ - if dtype[0] == DepManager._SYS_TYPE: - msg = "build" - elif dtype[0] == DepManager._PHY_TYPE: - msg = "Python" - else: - msg = "PDF" - - if dtype[1]: - return f"ERROR: {msg} mandatory deps missing" - else: - return f"Warning: {msg} optional deps missing" - - @staticmethod - def is_optional(dtype): - """Ancillary routine to report if a dependency is optional""" - return not dtype[1] - - @staticmethod - def is_pdf(dtype): - """Ancillary routine to report if a dependency is for PDF generation""" - if dtype[0] == DepManager._PDF_TYPE: - return True - - return False - - def add_package(self, package, dtype): - """ - Add a package at the self.missing() dictionary. - Doesn't update missing_pkg. - """ - is_optional = DepManager.is_optional(dtype) - self.missing[package] = dtype - if is_optional: - self.optional += 1 - else: - self.need += 1 - - def del_package(self, package): - """ - Remove a package at the self.missing() dictionary. - Doesn't update missing_pkg. - """ - if package in self.missing: - del self.missing[package] - - def clear_deps(self): - """ - Clear dependencies without changing needed/optional. - - This is an ackward way to have a separate section to recommend - a package after system main dependencies. - - TODO: rework the logic to prevent needing it. - """ - - self.missing = {} - self.missing_pkg = {} - - def check_missing(self, progs): - """ - Update self.missing_pkg, using progs dict to convert from the - agnostic package name to distro-specific one. - - Returns an string with the packages to be installed, sorted and - with eventual duplicates removed. - """ - - self.missing_pkg = {} - - for prog, dtype in sorted(self.missing.items()): - # At least on some LTS distros like CentOS 7, texlive doesn't - # provide all packages we need. When such distros are - # detected, we have to disable PDF output. - # - # So, we need to ignore the packages that distros would - # need for LaTeX to work - if DepManager.is_pdf(dtype) and not self.pdf: - self.optional -= 1 - continue - - if not dtype in self.missing_pkg: - self.missing_pkg[dtype] = [] - - self.missing_pkg[dtype].append(progs.get(prog, prog)) - - install = [] - for dtype, pkgs in self.missing_pkg.items(): - install += pkgs - - return " ".join(sorted(set(install))) - - def warn_install(self): - """ - Emit warnings/errors related to missing packages. - """ - - output_msg = "" - - for dtype in sorted(self.missing_pkg.keys()): - progs = " ".join(sorted(set(self.missing_pkg[dtype]))) - - try: - name = DepManager.name(dtype) - output_msg += f'{name}:\t{progs}\n' - except KeyError: - raise KeyError(f"ERROR!!!: invalid dtype for {progs}: {dtype}") - - if output_msg: - print(f"\n{output_msg}") - -class AncillaryMethods: - """ - Ancillary methods that checks for missing dependencies for different - types of types, like binaries, python modules, rpm deps, etc. - """ - - @staticmethod - def which(prog): - """ - Our own implementation of which(). We could instead use - shutil.which(), but this function is simple enough. - Probably faster to use this implementation than to import shutil. - """ - for path in os.environ.get("PATH", "").split(":"): - full_path = os.path.join(path, prog) - if os.access(full_path, os.X_OK): - return full_path - - return None - - @staticmethod - def get_python_version(cmd): - """ - Get python version from a Python binary. As we need to detect if - are out there newer python binaries, we can't rely on sys.release here. - """ - - result = SphinxDependencyChecker.run([cmd, "--version"], - capture_output=True, text=True) - version = result.stdout.strip() - - match = re.search(r"(\d+\.\d+\.\d+)", version) - if match: - return parse_version(match.group(1)) - - print(f"Can't parse version {version}") - return (0, 0, 0) - - @staticmethod - def find_python(): - """ - Detect if are out there any python 3.xy version newer than the - current one. - - Note: this routine is limited to up to 2 digits for python3. We - may need to update it one day, hopefully on a distant future. - """ - patterns = [ - "python3.[0-9]", - "python3.[0-9][0-9]", - ] - - # Seek for a python binary newer than MIN_PYTHON_VERSION - for path in os.getenv("PATH", "").split(":"): - for pattern in patterns: - for cmd in glob(os.path.join(path, pattern)): - if os.path.isfile(cmd) and os.access(cmd, os.X_OK): - version = SphinxDependencyChecker.get_python_version(cmd) - if version >= MIN_PYTHON_VERSION: - return cmd - - @staticmethod - def check_python(): - """ - Check if the current python binary satisfies our minimal requirement - for Sphinx build. If not, re-run with a newer version if found. - """ - cur_ver = sys.version_info[:3] - if cur_ver >= MIN_PYTHON_VERSION: - ver = ver_str(cur_ver) - print(f"Python version: {ver}") - - # This could be useful for debugging purposes - if SphinxDependencyChecker.which("docutils"): - result = SphinxDependencyChecker.run(["docutils", "--version"], - capture_output=True, text=True) - ver = result.stdout.strip() - match = re.search(r"(\d+\.\d+\.\d+)", ver) - if match: - ver = match.group(1) - - print(f"Docutils version: {ver}") - - return - - python_ver = ver_str(cur_ver) - - new_python_cmd = SphinxDependencyChecker.find_python() - if not new_python_cmd: - print(f"ERROR: Python version {python_ver} is not spported anymore\n") - print(" Can't find a new version. This script may fail") - return - - # Restart script using the newer version - script_path = os.path.abspath(sys.argv[0]) - args = [new_python_cmd, script_path] + sys.argv[1:] - - print(f"Python {python_ver} not supported. Changing to {new_python_cmd}") - - try: - os.execv(new_python_cmd, args) - except OSError as e: - sys.exit(f"Failed to restart with {new_python_cmd}: {e}") - - @staticmethod - def run(*args, **kwargs): - """ - Excecute a command, hiding its output by default. - Preserve comatibility with older Python versions. - """ - - capture_output = kwargs.pop('capture_output', False) - - if capture_output: - if 'stdout' not in kwargs: - kwargs['stdout'] = subprocess.PIPE - if 'stderr' not in kwargs: - kwargs['stderr'] = subprocess.PIPE - else: - if 'stdout' not in kwargs: - kwargs['stdout'] = subprocess.DEVNULL - if 'stderr' not in kwargs: - kwargs['stderr'] = subprocess.DEVNULL - - # Don't break with older Python versions - if 'text' in kwargs and sys.version_info < (3, 7): - kwargs['universal_newlines'] = kwargs.pop('text') - - return subprocess.run(*args, **kwargs) - -class MissingCheckers(AncillaryMethods): - """ - Contains some ancillary checkers for different types of binaries and - package managers. - """ - - def __init__(self, args, texlive): - """ - Initialize its internal variables - """ - self.pdf = args.pdf - self.virtualenv = args.virtualenv - self.version_check = args.version_check - self.texlive = texlive - - self.min_version = (0, 0, 0) - self.cur_version = (0, 0, 0) - - self.deps = DepManager(self.pdf) - - self.need_symlink = 0 - self.need_sphinx = 0 - - self.verbose_warn_install = 1 - - self.virtenv_dir = "" - self.install = "" - self.python_cmd = "" - - self.virtenv_prefix = ["sphinx_", "Sphinx_" ] - - def check_missing_file(self, files, package, dtype): - """ - Does the file exists? If not, add it to missing dependencies. - """ - for f in files: - if os.path.exists(f): - return - self.deps.add_package(package, dtype) - - def check_program(self, prog, dtype): - """ - Does the program exists and it is at the PATH? - If not, add it to missing dependencies. - """ - found = self.which(prog) - if found: - return found - - self.deps.add_package(prog, dtype) - - return None - - def check_perl_module(self, prog, dtype): - """ - Does perl have a dependency? Is it available? - If not, add it to missing dependencies. - - Right now, we still need Perl for doc build, as it is required - by some tools called at docs or kernel build time, like: - - scripts/documentation-file-ref-check - - Also, checkpatch is on Perl. - """ - - # While testing with lxc download template, one of the - # distros (Oracle) didn't have perl - nor even an option to install - # before installing oraclelinux-release-el9 package. - # - # Check it before running an error. If perl is not there, - # add it as a mandatory package, as some parts of the doc builder - # needs it. - if not self.which("perl"): - self.deps.add_package("perl", DepManager.SYSTEM_MANDATORY) - self.deps.add_package(prog, dtype) - return - - try: - self.run(["perl", f"-M{prog}", "-e", "1"], check=True) - except subprocess.CalledProcessError: - self.deps.add_package(prog, dtype) - - def check_python_module(self, module, is_optional=False): - """ - Does a python module exists outside venv? If not, add it to missing - dependencies. - """ - if is_optional: - dtype = DepManager.PYTHON_OPTIONAL - else: - dtype = DepManager.PYTHON_MANDATORY - - try: - self.run([self.python_cmd, "-c", f"import {module}"], check=True) - except subprocess.CalledProcessError: - self.deps.add_package(module, dtype) - - def check_rpm_missing(self, pkgs, dtype): - """ - Does a rpm package exists? If not, add it to missing dependencies. - """ - for prog in pkgs: - try: - self.run(["rpm", "-q", prog], check=True) - except subprocess.CalledProcessError: - self.deps.add_package(prog, dtype) - - def check_pacman_missing(self, pkgs, dtype): - """ - Does a pacman package exists? If not, add it to missing dependencies. - """ - for prog in pkgs: - try: - self.run(["pacman", "-Q", prog], check=True) - except subprocess.CalledProcessError: - self.deps.add_package(prog, dtype) - - def check_missing_tex(self, is_optional=False): - """ - Does a LaTeX package exists? If not, add it to missing dependencies. - """ - if is_optional: - dtype = DepManager.PDF_OPTIONAL - else: - dtype = DepManager.PDF_MANDATORY - - kpsewhich = self.which("kpsewhich") - for prog, package in self.texlive.items(): - - # If kpsewhich is not there, just add it to deps - if not kpsewhich: - self.deps.add_package(package, dtype) - continue - - # Check if the package is needed - try: - result = self.run( - [kpsewhich, prog], stdout=subprocess.PIPE, text=True, check=True - ) - - # Didn't find. Add it - if not result.stdout.strip(): - self.deps.add_package(package, dtype) - - except subprocess.CalledProcessError: - # kpsewhich returned an error. Add it, just in case - self.deps.add_package(package, dtype) - - def get_sphinx_fname(self): - """ - Gets the binary filename for sphinx-build. - """ - if "SPHINXBUILD" in os.environ: - return os.environ["SPHINXBUILD"] - - fname = "sphinx-build" - if self.which(fname): - return fname - - fname = "sphinx-build-3" - if self.which(fname): - self.need_symlink = 1 - return fname - - return "" - - def get_sphinx_version(self, cmd): - """ - Gets sphinx-build version. - """ - try: - result = self.run([cmd, "--version"], - stdout=subprocess.PIPE, - stderr=subprocess.STDOUT, - text=True, check=True) - except (subprocess.CalledProcessError, FileNotFoundError): - return None - - for line in result.stdout.split("\n"): - match = re.match(r"^sphinx-build\s+([\d\.]+)(?:\+(?:/[\da-f]+)|b\d+)?\s*$", line) - if match: - return parse_version(match.group(1)) - - match = re.match(r"^Sphinx.*\s+([\d\.]+)\s*$", line) - if match: - return parse_version(match.group(1)) - - def check_sphinx(self, conf): - """ - Checks Sphinx minimal requirements - """ - try: - with open(conf, "r", encoding="utf-8") as f: - for line in f: - match = re.match(r"^\s*needs_sphinx\s*=\s*[\'\"]([\d\.]+)[\'\"]", line) - if match: - self.min_version = parse_version(match.group(1)) - break - except IOError: - sys.exit(f"Can't open {conf}") - - if not self.min_version: - sys.exit(f"Can't get needs_sphinx version from {conf}") - - self.virtenv_dir = self.virtenv_prefix[0] + "latest" - - sphinx = self.get_sphinx_fname() - if not sphinx: - self.need_sphinx = 1 - return - - self.cur_version = self.get_sphinx_version(sphinx) - if not self.cur_version: - sys.exit(f"{sphinx} didn't return its version") - - if self.cur_version < self.min_version: - curver = ver_str(self.cur_version) - minver = ver_str(self.min_version) - - print(f"ERROR: Sphinx version is {curver}. It should be >= {minver}") - self.need_sphinx = 1 - return - - # On version check mode, just assume Sphinx has all mandatory deps - if self.version_check and self.cur_version >= RECOMMENDED_VERSION: - sys.exit(0) - - def catcheck(self, filename): - """ - Reads a file if it exists, returning as string. - If not found, returns an empty string. - """ - if os.path.exists(filename): - with open(filename, "r", encoding="utf-8") as f: - return f.read().strip() - return "" - - def get_system_release(self): - """ - Determine the system type. There's no unique way that would work - with all distros with a minimal package install. So, several - methods are used here. - - By default, it will use lsb_release function. If not available, it will - fail back to reading the known different places where the distro name - is stored. - - Several modern distros now have /etc/os-release, which usually have - a decent coverage. - """ - - system_release = "" - - if self.which("lsb_release"): - result = self.run(["lsb_release", "-d"], capture_output=True, text=True) - system_release = result.stdout.replace("Description:", "").strip() - - release_files = [ - "/etc/system-release", - "/etc/redhat-release", - "/etc/lsb-release", - "/etc/gentoo-release", - ] - - if not system_release: - for f in release_files: - system_release = self.catcheck(f) - if system_release: - break - - # This seems more common than LSB these days - if not system_release: - os_var = {} - try: - with open("/etc/os-release", "r", encoding="utf-8") as f: - for line in f: - match = re.match(r"^([\w\d\_]+)=\"?([^\"]*)\"?\n", line) - if match: - os_var[match.group(1)] = match.group(2) - - system_release = os_var.get("NAME", "") - if "VERSION_ID" in os_var: - system_release += " " + os_var["VERSION_ID"] - elif "VERSION" in os_var: - system_release += " " + os_var["VERSION"] - except IOError: - pass - - if not system_release: - system_release = self.catcheck("/etc/issue") - - system_release = system_release.strip() - - return system_release - -class SphinxDependencyChecker(MissingCheckers): - """ - Main class for checking Sphinx documentation build dependencies. - - - Check for missing system packages; - - Check for missing Python modules; - - Check for missing LaTeX packages needed by PDF generation; - - Propose Sphinx install via Python Virtual environment; - - Propose Sphinx install via distro-specific package install. - """ - def __init__(self, args): - """Initialize checker variables""" - - # List of required texlive packages on Fedora and OpenSuse - texlive = { - "amsfonts.sty": "texlive-amsfonts", - "amsmath.sty": "texlive-amsmath", - "amssymb.sty": "texlive-amsfonts", - "amsthm.sty": "texlive-amscls", - "anyfontsize.sty": "texlive-anyfontsize", - "atbegshi.sty": "texlive-oberdiek", - "bm.sty": "texlive-tools", - "capt-of.sty": "texlive-capt-of", - "cmap.sty": "texlive-cmap", - "ctexhook.sty": "texlive-ctex", - "ecrm1000.tfm": "texlive-ec", - "eqparbox.sty": "texlive-eqparbox", - "eu1enc.def": "texlive-euenc", - "fancybox.sty": "texlive-fancybox", - "fancyvrb.sty": "texlive-fancyvrb", - "float.sty": "texlive-float", - "fncychap.sty": "texlive-fncychap", - "footnote.sty": "texlive-mdwtools", - "framed.sty": "texlive-framed", - "luatex85.sty": "texlive-luatex85", - "multirow.sty": "texlive-multirow", - "needspace.sty": "texlive-needspace", - "palatino.sty": "texlive-psnfss", - "parskip.sty": "texlive-parskip", - "polyglossia.sty": "texlive-polyglossia", - "tabulary.sty": "texlive-tabulary", - "threeparttable.sty": "texlive-threeparttable", - "titlesec.sty": "texlive-titlesec", - "ucs.sty": "texlive-ucs", - "upquote.sty": "texlive-upquote", - "wrapfig.sty": "texlive-wrapfig", - } - - super().__init__(args, texlive) - - self.need_pip = False - self.rec_sphinx_upgrade = 0 - - self.system_release = self.get_system_release() - self.activate_cmd = "" - - # Some distros may not have a Sphinx shipped package compatible with - # our minimal requirements - self.package_supported = True - - # Recommend a new python version - self.recommend_python = None - - # Certain hints are meant to be shown only once - self.distro_msg = None - - self.latest_avail_ver = (0, 0, 0) - self.venv_ver = (0, 0, 0) - - prefix = os.environ.get("srctree", ".") + "/" - - self.conf = prefix + "Documentation/conf.py" - self.requirement_file = prefix + "Documentation/sphinx/requirements.txt" - - def get_install_progs(self, progs, cmd, extra=None): - """ - Check for missing dependencies using the provided program mapping. - - The actual distro-specific programs are mapped via progs argument. - """ - install = self.deps.check_missing(progs) - - if self.verbose_warn_install: - self.deps.warn_install() - - if not install: - return - - if cmd: - if self.verbose_warn_install: - msg = "You should run:" - else: - msg = "" - - if extra: - msg += "\n\t" + extra.replace("\n", "\n\t") - - return(msg + "\n\tsudo " + cmd + " " + install) - - return None - - # - # Distro-specific hints methods - # - - def give_debian_hints(self): - """ - Provide package installation hints for Debian-based distros. - """ - progs = { - "Pod::Usage": "perl-modules", - "convert": "imagemagick", - "dot": "graphviz", - "ensurepip": "python3-venv", - "python-sphinx": "python3-sphinx", - "rsvg-convert": "librsvg2-bin", - "virtualenv": "virtualenv", - "xelatex": "texlive-xetex", - "yaml": "python3-yaml", - } - - if self.pdf: - pdf_pkgs = { - "fonts-dejavu": [ - "/usr/share/fonts/truetype/dejavu/DejaVuSans.ttf", - ], - "fonts-noto-cjk": [ - "/usr/share/fonts/noto-cjk/NotoSansCJK-Regular.ttc", - "/usr/share/fonts/opentype/noto/NotoSansCJK-Regular.ttc", - "/usr/share/fonts/opentype/noto/NotoSerifCJK-Regular.ttc", - ], - "tex-gyre": [ - "/usr/share/texmf/tex/latex/tex-gyre/tgtermes.sty" - ], - "texlive-fonts-recommended": [ - "/usr/share/texlive/texmf-dist/fonts/tfm/adobe/zapfding/pzdr.tfm", - ], - "texlive-lang-chinese": [ - "/usr/share/texlive/texmf-dist/tex/latex/ctex/ctexhook.sty", - ], - } - - for package, files in pdf_pkgs.items(): - self.check_missing_file(files, package, DepManager.PDF_MANDATORY) - - self.check_program("dvipng", DepManager.PDF_MANDATORY) - - if not self.distro_msg: - self.distro_msg = \ - "Note: ImageMagick is broken on some distros, affecting PDF output. For more details:\n" \ - "\thttps://askubuntu.com/questions/1158894/imagemagick-still-broken-using-with-usr-bin-convert" - - return self.get_install_progs(progs, "apt-get install") - - def give_redhat_hints(self): - """ - Provide package installation hints for RedHat-based distros - (Fedora, RHEL and RHEL-based variants). - """ - progs = { - "Pod::Usage": "perl-Pod-Usage", - "convert": "ImageMagick", - "dot": "graphviz", - "python-sphinx": "python3-sphinx", - "rsvg-convert": "librsvg2-tools", - "virtualenv": "python3-virtualenv", - "xelatex": "texlive-xetex-bin", - "yaml": "python3-pyyaml", - } - - fedora_tex_pkgs = [ - "dejavu-sans-fonts", - "dejavu-sans-mono-fonts", - "dejavu-serif-fonts", - "texlive-collection-fontsrecommended", - "texlive-collection-latex", - "texlive-xecjk", - ] - - fedora = False - rel = None - - match = re.search(r"(release|Linux)\s+(\d+)", self.system_release) - if match: - rel = int(match.group(2)) - - if not rel: - print("Couldn't identify release number") - noto_sans_redhat = None - self.pdf = False - elif re.search("Fedora", self.system_release): - # Fedora 38 and upper use this CJK font - - noto_sans_redhat = "google-noto-sans-cjk-fonts" - fedora = True - else: - # Almalinux, CentOS, RHEL, ... - - # at least up to version 9 (and Fedora < 38), that's the CJK font - noto_sans_redhat = "google-noto-sans-cjk-ttc-fonts" - - progs["virtualenv"] = "python-virtualenv" - - if not rel or rel < 8: - print("ERROR: Distro not supported. Too old?") - return - - # RHEL 8 uses Python 3.6, which is not compatible with - # the build system anymore. Suggest Python 3.11 - if rel == 8: - self.check_program("python3.9", DepManager.SYSTEM_MANDATORY) - progs["python3.9"] = "python39" - progs["yaml"] = "python39-pyyaml" - - self.recommend_python = True - - # There's no python39-sphinx package. Only pip is supported - self.package_supported = False - - if not self.distro_msg: - self.distro_msg = \ - "Note: RHEL-based distros typically require extra repositories.\n" \ - "For most, enabling epel and crb are enough:\n" \ - "\tsudo dnf install -y epel-release\n" \ - "\tsudo dnf config-manager --set-enabled crb\n" \ - "Yet, some may have other required repositories. Those commands could be useful:\n" \ - "\tsudo dnf repolist all\n" \ - "\tsudo dnf repoquery --available --info \n" \ - "\tsudo dnf config-manager --set-enabled '*' # enable all - probably not what you want" - - if self.pdf: - pdf_pkgs = [ - "/usr/share/fonts/google-noto-cjk/NotoSansCJK-Regular.ttc", - "/usr/share/fonts/google-noto-sans-cjk-fonts/NotoSansCJK-Regular.ttc", - ] - - self.check_missing_file(pdf_pkgs, noto_sans_redhat, DepManager.PDF_MANDATORY) - - self.check_rpm_missing(fedora_tex_pkgs, DepManager.PDF_MANDATORY) - - self.check_missing_tex(DepManager.PDF_MANDATORY) - - # There's no texlive-ctex on RHEL 8 repositories. This will - # likely affect CJK pdf build only. - if not fedora and rel == 8: - self.deps.del_package("texlive-ctex") - - return self.get_install_progs(progs, "dnf install") - - def give_opensuse_hints(self): - """ - Provide package installation hints for openSUSE-based distros - (Leap and Tumbleweed). - """ - progs = { - "Pod::Usage": "perl-Pod-Usage", - "convert": "ImageMagick", - "dot": "graphviz", - "python-sphinx": "python3-sphinx", - "virtualenv": "python3-virtualenv", - "xelatex": "texlive-xetex-bin texlive-dejavu", - "yaml": "python3-pyyaml", - } - - suse_tex_pkgs = [ - "texlive-babel-english", - "texlive-caption", - "texlive-colortbl", - "texlive-courier", - "texlive-dvips", - "texlive-helvetic", - "texlive-makeindex", - "texlive-metafont", - "texlive-metapost", - "texlive-palatino", - "texlive-preview", - "texlive-times", - "texlive-zapfchan", - "texlive-zapfding", - ] - - progs["latexmk"] = "texlive-latexmk-bin" - - match = re.search(r"(Leap)\s+(\d+).(\d)", self.system_release) - if match: - rel = int(match.group(2)) - - # Leap 15.x uses Python 3.6, which is not compatible with - # the build system anymore. Suggest Python 3.11 - if rel == 15: - if not self.which(self.python_cmd): - self.check_program("python3.11", DepManager.SYSTEM_MANDATORY) - progs["python3.11"] = "python311" - self.recommend_python = True - - progs.update({ - "python-sphinx": "python311-Sphinx python311-Sphinx-latex", - "virtualenv": "python311-virtualenv", - "yaml": "python311-PyYAML", - }) - else: - # Tumbleweed defaults to Python 3.11 - - progs.update({ - "python-sphinx": "python313-Sphinx python313-Sphinx-latex", - "virtualenv": "python313-virtualenv", - "yaml": "python313-PyYAML", - }) - - # FIXME: add support for installing CJK fonts - # - # I tried hard, but was unable to find a way to install - # "Noto Sans CJK SC" on openSUSE - - if self.pdf: - self.check_rpm_missing(suse_tex_pkgs, DepManager.PDF_MANDATORY) - if self.pdf: - self.check_missing_tex() - - return self.get_install_progs(progs, "zypper install --no-recommends") - - def give_mageia_hints(self): - """ - Provide package installation hints for Mageia and OpenMandriva. - """ - progs = { - "Pod::Usage": "perl-Pod-Usage", - "convert": "ImageMagick", - "dot": "graphviz", - "python-sphinx": "python3-sphinx", - "rsvg-convert": "librsvg2", - "virtualenv": "python3-virtualenv", - "xelatex": "texlive", - "yaml": "python3-yaml", - } - - tex_pkgs = [ - "texlive-fontsextra", - "texlive-fonts-asian", - "fonts-ttf-dejavu", - ] - - if re.search(r"OpenMandriva", self.system_release): - packager_cmd = "dnf install" - noto_sans = "noto-sans-cjk-fonts" - tex_pkgs = [ - "texlive-collection-basic", - "texlive-collection-langcjk", - "texlive-collection-fontsextra", - "texlive-collection-fontsrecommended" - ] - - # Tested on OpenMandriva Lx 4.3 - progs["convert"] = "imagemagick" - progs["yaml"] = "python-pyyaml" - progs["python-virtualenv"] = "python-virtualenv" - progs["python-sphinx"] = "python-sphinx" - progs["xelatex"] = "texlive" - - self.check_program("python-virtualenv", DepManager.PYTHON_MANDATORY) - - # On my tests with openMandriva LX 4.0 docker image, upgraded - # to 4.3, python-virtualenv package is broken: it is missing - # ensurepip. Without it, the alternative would be to run: - # python3 -m venv --without-pip ~/sphinx_latest, but running - # pip there won't install sphinx at venv. - # - # Add a note about that. - - if not self.distro_msg: - self.distro_msg = \ - "Notes:\n"\ - "1. for venv, ensurepip could be broken, preventing its install method.\n" \ - "2. at least on OpenMandriva LX 4.3, texlive packages seem broken" - - else: - packager_cmd = "urpmi" - noto_sans = "google-noto-sans-cjk-ttc-fonts" - - progs["latexmk"] = "texlive-collection-basic" - - if self.pdf: - pdf_pkgs = [ - "/usr/share/fonts/google-noto-cjk/NotoSansCJK-Regular.ttc", - "/usr/share/fonts/TTF/NotoSans-Regular.ttf", - ] - - self.check_missing_file(pdf_pkgs, noto_sans, DepManager.PDF_MANDATORY) - self.check_rpm_missing(tex_pkgs, DepManager.PDF_MANDATORY) - - return self.get_install_progs(progs, packager_cmd) - - def give_arch_linux_hints(self): - """ - Provide package installation hints for ArchLinux. - """ - progs = { - "convert": "imagemagick", - "dot": "graphviz", - "latexmk": "texlive-core", - "rsvg-convert": "extra/librsvg", - "virtualenv": "python-virtualenv", - "xelatex": "texlive-xetex", - "yaml": "python-yaml", - } - - archlinux_tex_pkgs = [ - "texlive-basic", - "texlive-binextra", - "texlive-core", - "texlive-fontsrecommended", - "texlive-langchinese", - "texlive-langcjk", - "texlive-latexextra", - "ttf-dejavu", - ] - - if self.pdf: - self.check_pacman_missing(archlinux_tex_pkgs, - DepManager.PDF_MANDATORY) - - self.check_missing_file(["/usr/share/fonts/noto-cjk/NotoSansCJK-Regular.ttc"], - "noto-fonts-cjk", - DepManager.PDF_MANDATORY) - - - return self.get_install_progs(progs, "pacman -S") - - def give_gentoo_hints(self): - """ - Provide package installation hints for Gentoo. - """ - texlive_deps = [ - "dev-texlive/texlive-fontsrecommended", - "dev-texlive/texlive-latexextra", - "dev-texlive/texlive-xetex", - "media-fonts/dejavu", - ] - - progs = { - "convert": "media-gfx/imagemagick", - "dot": "media-gfx/graphviz", - "rsvg-convert": "gnome-base/librsvg", - "virtualenv": "dev-python/virtualenv", - "xelatex": " ".join(texlive_deps), - "yaml": "dev-python/pyyaml", - "python-sphinx": "dev-python/sphinx", - } - - if self.pdf: - pdf_pkgs = { - "media-fonts/dejavu": [ - "/usr/share/fonts/dejavu/DejaVuSans.ttf", - ], - "media-fonts/noto-cjk": [ - "/usr/share/fonts/noto-cjk/NotoSansCJKsc-Regular.otf", - "/usr/share/fonts/noto-cjk/NotoSerifCJK-Regular.ttc", - ], - } - for package, files in pdf_pkgs.items(): - self.check_missing_file(files, package, DepManager.PDF_MANDATORY) - - # Handling dependencies is a nightmare, as Gentoo refuses to emerge - # some packages if there's no package.use file describing them. - # To make it worse, compilation flags shall also be present there - # for some packages. If USE is not perfect, error/warning messages - # like those are shown: - # - # !!! The following binary packages have been ignored due to non matching USE: - # - # =media-gfx/graphviz-12.2.1-r1 X pdf -python_single_target_python3_13 qt6 svg - # =media-gfx/graphviz-12.2.1-r1 X pdf python_single_target_python3_12 -python_single_target_python3_13 qt6 svg - # =media-gfx/graphviz-12.2.1-r1 X pdf qt6 svg - # =media-gfx/graphviz-12.2.1-r1 X pdf -python_single_target_python3_10 qt6 svg - # =media-gfx/graphviz-12.2.1-r1 X pdf -python_single_target_python3_10 python_single_target_python3_12 -python_single_target_python3_13 qt6 svg - # =media-fonts/noto-cjk-20190416 X - # =app-text/texlive-core-2024-r1 X cjk -xetex - # =app-text/texlive-core-2024-r1 X -xetex - # =app-text/texlive-core-2024-r1 -xetex - # =dev-libs/zziplib-0.13.79-r1 sdl - # - # And will ignore such packages, installing the remaining ones. That - # affects mostly the image extension and PDF generation. - - # Package dependencies and the minimal needed args: - portages = { - "graphviz": "media-gfx/graphviz", - "imagemagick": "media-gfx/imagemagick", - "media-libs": "media-libs/harfbuzz icu", - "media-fonts": "media-fonts/noto-cjk", - "texlive": "app-text/texlive-core xetex", - "zziblib": "dev-libs/zziplib sdl", - } - - extra_cmds = "" - if not self.distro_msg: - self.distro_msg = "Note: Gentoo requires package.use to be adjusted before emerging packages" - - use_base = "/etc/portage/package.use" - files = glob(f"{use_base}/*") - - for fname, portage in portages.items(): - install = False - - while install is False: - if not files: - # No files under package.usage. Install all - install = True - break - - args = portage.split(" ") - - name = args.pop(0) - - cmd = ["grep", "-l", "-E", rf"^{name}\b" ] + files - result = self.run(cmd, stdout=subprocess.PIPE, text=True) - if result.returncode or not result.stdout.strip(): - # File containing portage name not found - install = True - break - - # Ensure that needed USE flags are present - if args: - match_fname = result.stdout.strip() - with open(match_fname, 'r', encoding='utf8', - errors='backslashreplace') as fp: - for line in fp: - for arg in args: - if arg.startswith("-"): - continue - - if not re.search(rf"\s*{arg}\b", line): - # Needed file argument not found - install = True - break - - # Everything looks ok, don't install - break - - # emit a code to setup missing USE - if install: - extra_cmds += (f"sudo su -c 'echo \"{portage}\" > {use_base}/{fname}'\n") - - # Now, we can use emerge and let it respect USE - return self.get_install_progs(progs, - "emerge --ask --changed-use --binpkg-respect-use=y", - extra_cmds) - - def get_install(self): - """ - OS-specific hints logic. Seeks for a hinter. If found, use it to - provide package-manager specific install commands. - - Otherwise, outputs install instructions for the meta-packages. - - Returns a string with the command to be executed to install the - the needed packages, if distro found. Otherwise, return just a - list of packages that require installation. - """ - os_hints = { - re.compile("Red Hat Enterprise Linux"): self.give_redhat_hints, - re.compile("Fedora"): self.give_redhat_hints, - re.compile("AlmaLinux"): self.give_redhat_hints, - re.compile("Amazon Linux"): self.give_redhat_hints, - re.compile("CentOS"): self.give_redhat_hints, - re.compile("openEuler"): self.give_redhat_hints, - re.compile("Oracle Linux Server"): self.give_redhat_hints, - re.compile("Rocky Linux"): self.give_redhat_hints, - re.compile("Springdale Open Enterprise"): self.give_redhat_hints, - - re.compile("Ubuntu"): self.give_debian_hints, - re.compile("Debian"): self.give_debian_hints, - re.compile("Devuan"): self.give_debian_hints, - re.compile("Kali"): self.give_debian_hints, - re.compile("Mint"): self.give_debian_hints, - - re.compile("openSUSE"): self.give_opensuse_hints, - - re.compile("Mageia"): self.give_mageia_hints, - re.compile("OpenMandriva"): self.give_mageia_hints, - - re.compile("Arch Linux"): self.give_arch_linux_hints, - re.compile("Gentoo"): self.give_gentoo_hints, - } - - # If the OS is detected, use per-OS hint logic - for regex, os_hint in os_hints.items(): - if regex.search(self.system_release): - return os_hint() - - # - # Fall-back to generic hint code for other distros - # That's far from ideal, specially for LaTeX dependencies. - # - progs = {"sphinx-build": "sphinx"} - if self.pdf: - self.check_missing_tex() - - self.distro_msg = \ - f"I don't know distro {self.system_release}.\n" \ - "So, I can't provide you a hint with the install procedure.\n" \ - "There are likely missing dependencies." - - return self.get_install_progs(progs, None) - - # - # Common dependencies - # - def deactivate_help(self): - """ - Print a helper message to disable a virtual environment. - """ - - print("\n If you want to exit the virtualenv, you can use:") - print("\tdeactivate") - - def get_virtenv(self): - """ - Give a hint about how to activate an already-existing virtual - environment containing sphinx-build. - - Returns a tuble with (activate_cmd_path, sphinx_version) with - the newest available virtual env. - """ - - cwd = os.getcwd() - - activates = [] - - # Add all sphinx prefixes with possible version numbers - for p in self.virtenv_prefix: - activates += glob(f"{cwd}/{p}[0-9]*/bin/activate") - - activates.sort(reverse=True, key=str.lower) - - # Place sphinx_latest first, if it exists - for p in self.virtenv_prefix: - activates = glob(f"{cwd}/{p}*latest/bin/activate") + activates - - ver = (0, 0, 0) - for f in activates: - # Discard too old Sphinx virtual environments - match = re.search(r"(\d+)\.(\d+)\.(\d+)", f) - if match: - ver = (int(match.group(1)), int(match.group(2)), int(match.group(3))) - - if ver < self.min_version: - continue - - sphinx_cmd = f.replace("activate", "sphinx-build") - if not os.path.isfile(sphinx_cmd): - continue - - ver = self.get_sphinx_version(sphinx_cmd) - - if not ver: - venv_dir = f.replace("/bin/activate", "") - print(f"Warning: virtual environment {venv_dir} is not working.\n" \ - "Python version upgrade? Remove it with:\n\n" \ - "\trm -rf {venv_dir}\n\n") - else: - if self.need_sphinx and ver >= self.min_version: - return (f, ver) - elif parse_version(ver) > self.cur_version: - return (f, ver) - - return ("", ver) - - def recommend_sphinx_upgrade(self): - """ - Check if Sphinx needs to be upgraded. - - Returns a tuple with the higest available Sphinx version if found. - Otherwise, returns None to indicate either that no upgrade is needed - or no venv was found. - """ - - # Avoid running sphinx-builds from venv if cur_version is good - if self.cur_version and self.cur_version >= RECOMMENDED_VERSION: - self.latest_avail_ver = self.cur_version - return None - - # Get the highest version from sphinx_*/bin/sphinx-build and the - # corresponding command to activate the venv/virtenv - self.activate_cmd, self.venv_ver = self.get_virtenv() - - # Store the highest version from Sphinx existing virtualenvs - if self.activate_cmd and self.venv_ver > self.cur_version: - self.latest_avail_ver = self.venv_ver - else: - if self.cur_version: - self.latest_avail_ver = self.cur_version - else: - self.latest_avail_ver = (0, 0, 0) - - # As we don't know package version of Sphinx, and there's no - # virtual environments, don't check if upgrades are needed - if not self.virtualenv: - if not self.latest_avail_ver: - return None - - return self.latest_avail_ver - - # Either there are already a virtual env or a new one should be created - self.need_pip = True - - if not self.latest_avail_ver: - return None - - # Return if the reason is due to an upgrade or not - if self.latest_avail_ver != (0, 0, 0): - if self.latest_avail_ver < RECOMMENDED_VERSION: - self.rec_sphinx_upgrade = 1 - - return self.latest_avail_ver - - def recommend_package(self): - """ - Recommend installing Sphinx as a distro-specific package. - """ - - print("\n2) As a package with:") - - old_need = self.deps.need - old_optional = self.deps.optional - - self.pdf = False - self.deps.optional = 0 - old_verbose = self.verbose_warn_install - self.verbose_warn_install = 0 - - self.deps.clear_deps() - - self.deps.add_package("python-sphinx", DepManager.PYTHON_MANDATORY) - - cmd = self.get_install() - if cmd: - print(cmd) - - self.deps.need = old_need - self.deps.optional = old_optional - self.verbose_warn_install = old_verbose - - def recommend_sphinx_version(self, virtualenv_cmd): - """ - Provide recommendations for installing or upgrading Sphinx based - on current version. - - The logic here is complex, as it have to deal with different versions: - - - minimal supported version; - - minimal PDF version; - - recommended version. - - It also needs to work fine with both distro's package and - venv/virtualenv - """ - - if self.recommend_python: - cur_ver = sys.version_info[:3] - if cur_ver < MIN_PYTHON_VERSION: - print(f"\nPython version {cur_ver} is incompatible with doc build.\n" \ - "Please upgrade it and re-run.\n") - return - - # Version is OK. Nothing to do. - if self.cur_version != (0, 0, 0) and self.cur_version >= RECOMMENDED_VERSION: - return - - if self.latest_avail_ver: - latest_avail_ver = ver_str(self.latest_avail_ver) - - if not self.need_sphinx: - # sphinx-build is present and its version is >= $min_version - - # only recommend enabling a newer virtenv version if makes sense. - if self.latest_avail_ver and self.latest_avail_ver > self.cur_version: - print(f"\nYou may also use the newer Sphinx version {latest_avail_ver} with:") - if f"{self.virtenv_prefix}" in os.getcwd(): - print("\tdeactivate") - print(f"\t. {self.activate_cmd}") - self.deactivate_help() - return - - if self.latest_avail_ver and self.latest_avail_ver >= RECOMMENDED_VERSION: - return - - if not self.virtualenv: - # No sphinx either via package or via virtenv. As we can't - # Compare the versions here, just return, recommending the - # user to install it from the package distro. - if not self.latest_avail_ver or self.latest_avail_ver == (0, 0, 0): - return - - # User doesn't want a virtenv recommendation, but he already - # installed one via virtenv with a newer version. - # So, print commands to enable it - if self.latest_avail_ver > self.cur_version: - print(f"\nYou may also use the Sphinx virtualenv version {latest_avail_ver} with:") - if f"{self.virtenv_prefix}" in os.getcwd(): - print("\tdeactivate") - print(f"\t. {self.activate_cmd}") - self.deactivate_help() - return - print("\n") - else: - if self.need_sphinx: - self.deps.need += 1 - - # Suggest newer versions if current ones are too old - if self.latest_avail_ver and self.latest_avail_ver >= self.min_version: - if self.latest_avail_ver >= RECOMMENDED_VERSION: - print(f"\nNeed to activate Sphinx (version {latest_avail_ver}) on virtualenv with:") - print(f"\t. {self.activate_cmd}") - self.deactivate_help() - return - - # Version is above the minimal required one, but may be - # below the recommended one. So, print warnings/notes - if self.latest_avail_ver < RECOMMENDED_VERSION: - print(f"Warning: It is recommended at least Sphinx version {RECOMMENDED_VERSION}.") - - # At this point, either it needs Sphinx or upgrade is recommended, - # both via pip - - if self.rec_sphinx_upgrade: - if not self.virtualenv: - print("Instead of install/upgrade Python Sphinx pkg, you could use pip/pypi with:\n\n") - else: - print("To upgrade Sphinx, use:\n\n") - else: - print("\nSphinx needs to be installed either:\n1) via pip/pypi with:\n") - - if not virtualenv_cmd: - print(" Currently not possible.\n") - print(" Please upgrade Python to a newer version and run this script again") - else: - print(f"\t{virtualenv_cmd} {self.virtenv_dir}") - print(f"\t. {self.virtenv_dir}/bin/activate") - print(f"\tpip install -r {self.requirement_file}") - self.deactivate_help() - - if self.package_supported: - self.recommend_package() - - print("\n" \ - " Please note that Sphinx currentlys produce false-positive\n" \ - " warnings when the same name is used for more than one type (functions,\n" \ - " structs, enums,...). This is known Sphinx bug. For more details, see:\n" \ - "\thttps://github.com/sphinx-doc/sphinx/pull/8313") - - def check_needs(self): - """ - Main method that checks needed dependencies and provides - recommendations. - """ - self.python_cmd = sys.executable - - # Check if Sphinx is already accessible from current environment - self.check_sphinx(self.conf) - - if self.system_release: - print(f"Detected OS: {self.system_release}.") - else: - print("Unknown OS") - if self.cur_version != (0, 0, 0): - ver = ver_str(self.cur_version) - print(f"Sphinx version: {ver}\n") - - # Check the type of virtual env, depending on Python version - virtualenv_cmd = None - - if sys.version_info < MIN_PYTHON_VERSION: - min_ver = ver_str(MIN_PYTHON_VERSION) - print(f"ERROR: at least python {min_ver} is required to build the kernel docs") - self.need_sphinx = 1 - - self.venv_ver = self.recommend_sphinx_upgrade() - - if self.need_pip: - if sys.version_info < MIN_PYTHON_VERSION: - self.need_pip = False - print("Warning: python version is not supported.") - else: - virtualenv_cmd = f"{self.python_cmd} -m venv" - self.check_python_module("ensurepip") - - # Check for needed programs/tools - self.check_perl_module("Pod::Usage", DepManager.SYSTEM_MANDATORY) - - self.check_program("make", DepManager.SYSTEM_MANDATORY) - self.check_program("which", DepManager.SYSTEM_MANDATORY) - - self.check_program("dot", DepManager.SYSTEM_OPTIONAL) - self.check_program("convert", DepManager.SYSTEM_OPTIONAL) - - self.check_python_module("yaml") - - if self.pdf: - self.check_program("xelatex", DepManager.PDF_MANDATORY) - self.check_program("rsvg-convert", DepManager.PDF_MANDATORY) - self.check_program("latexmk", DepManager.PDF_MANDATORY) - - # Do distro-specific checks and output distro-install commands - cmd = self.get_install() - if cmd: - print(cmd) - - # If distro requires some special instructions, print here. - # Please notice that get_install() needs to be called first. - if self.distro_msg: - print("\n" + self.distro_msg) - - if not self.python_cmd: - if self.need == 1: - sys.exit("Can't build as 1 mandatory dependency is missing") - elif self.need: - sys.exit(f"Can't build as {self.need} mandatory dependencies are missing") - - # Check if sphinx-build is called sphinx-build-3 - if self.need_symlink: - sphinx_path = self.which("sphinx-build-3") - if sphinx_path: - print(f"\tsudo ln -sf {sphinx_path} /usr/bin/sphinx-build\n") - - self.recommend_sphinx_version(virtualenv_cmd) - print("") - - if not self.deps.optional: - print("All optional dependencies are met.") - - if self.deps.need == 1: - sys.exit("Can't build as 1 mandatory dependency is missing") - elif self.deps.need: - sys.exit(f"Can't build as {self.deps.need} mandatory dependencies are missing") - - print("Needed package dependencies are met.") - -DESCRIPTION = """ -Process some flags related to Sphinx installation and documentation build. -""" - - -def main(): - """Main function""" - parser = argparse.ArgumentParser(description=DESCRIPTION) - - parser.add_argument( - "--no-virtualenv", - action="store_false", - dest="virtualenv", - help="Recommend installing Sphinx instead of using a virtualenv", - ) - - parser.add_argument( - "--no-pdf", - action="store_false", - dest="pdf", - help="Don't check for dependencies required to build PDF docs", - ) - - parser.add_argument( - "--version-check", - action="store_true", - dest="version_check", - help="If version is compatible, don't check for missing dependencies", - ) - - args = parser.parse_args() - - checker = SphinxDependencyChecker(args) - - checker.check_python() - checker.check_needs() - -# Call main if not used as module -if __name__ == "__main__": - main() -- cgit v1.2.3 From 7e8a8143ecc3940dbc3664b24b132ec7420d1053 Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Thu, 18 Sep 2025 13:54:53 +0200 Subject: docs: add support to build manpages from kerneldoc output Generating man files currently requires running a separate script. The target also doesn't appear at the docs Makefile. Add support for mandocs at the Makefile, adding the build logic inside sphinx-build-wrapper, updating documentation and dropping the ancillary script. Signed-off-by: Mauro Carvalho Chehab Message-ID: <3d248d724e7f3154f6e3a227e5923d7360201de9.1758196090.git.mchehab+huawei@kernel.org> Signed-off-by: Jonathan Corbet --- scripts/split-man.pl | 28 ---------------------------- 1 file changed, 28 deletions(-) delete mode 100755 scripts/split-man.pl (limited to 'scripts') diff --git a/scripts/split-man.pl b/scripts/split-man.pl deleted file mode 100755 index 96bd99dc977a..000000000000 --- a/scripts/split-man.pl +++ /dev/null @@ -1,28 +0,0 @@ -#!/usr/bin/env perl -# SPDX-License-Identifier: GPL-2.0 -# -# Author: Mauro Carvalho Chehab -# -# Produce manpages from kernel-doc. -# See Documentation/doc-guide/kernel-doc.rst for instructions - -if ($#ARGV < 0) { - die "where do I put the results?\n"; -} - -mkdir $ARGV[0],0777; -$state = 0; -while () { - if (/^\.TH \"[^\"]*\" 9 \"([^\"]*)\"/) { - if ($state == 1) { close OUT } - $state = 1; - $fn = "$ARGV[0]/$1.9"; - print STDERR "Creating $fn\n"; - open OUT, ">$fn" or die "can't open $fn: $!\n"; - print OUT $_; - } elsif ($state != 0) { - print OUT $_; - } -} - -close OUT; -- cgit v1.2.3 From 104e0a682e12e6f52267a945c9ed9cf92a46fa56 Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Thu, 18 Sep 2025 13:54:54 +0200 Subject: tools: kernel-doc: add a see also section at man pages MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit While cross-references are complex, as related ones can be on different files, we can at least correlate the ones that belong to the same file, adding a SEE ALSO section for them. The result is not bad. See for instance: $ tools/docs/sphinx-build-wrapper --sphinxdirs driver-api/media -- mandocs $ man Documentation/output/driver-api/man/edac_pci_add_device.9 edac_pci_add_device(9) Kernel Hacker's Manual edac_pci_add_device(9) NAME edac_pci_add_device - Insert the 'edac_dev' structure into the edac_pci global list and create sysfs entries associated with edac_pci structure. SYNOPSIS int edac_pci_add_device (struct edac_pci_ctl_info *pci , int edac_idx ); ARGUMENTS pci pointer to the edac_device structure to be added to the list edac_idx A unique numeric identifier to be assigned to the RETURN 0 on Success, or an error code on failure SEE ALSO edac_pci_alloc_ctl_info(9), edac_pci_free_ctl_info(9), edac_pci_alloc_index(9), edac_pci_del_device(9), edac_pci_cre‐ ate_generic_ctl(9), edac_pci_release_generic_ctl(9), edac_pci_create_sysfs(9), edac_pci_remove_sysfs(9) August 2025 edac_pci_add_device edac_pci_add_device(9) Signed-off-by: Mauro Carvalho Chehab Message-ID: Signed-off-by: Jonathan Corbet --- scripts/lib/kdoc/kdoc_files.py | 5 ++- scripts/lib/kdoc/kdoc_output.py | 84 ++++++++++++++++++++++++++++++++++++++--- 2 files changed, 83 insertions(+), 6 deletions(-) (limited to 'scripts') diff --git a/scripts/lib/kdoc/kdoc_files.py b/scripts/lib/kdoc/kdoc_files.py index 9e09b45b02fa..061c033f32da 100644 --- a/scripts/lib/kdoc/kdoc_files.py +++ b/scripts/lib/kdoc/kdoc_files.py @@ -275,7 +275,10 @@ class KernelFiles(): self.config.log.warning("No kernel-doc for file %s", fname) continue - for arg in self.results[fname]: + symbols = self.results[fname] + self.out_style.set_symbols(symbols) + + for arg in symbols: m = self.out_msg(fname, arg.name, arg) if m is None: diff --git a/scripts/lib/kdoc/kdoc_output.py b/scripts/lib/kdoc/kdoc_output.py index ea8914537ba0..1eca9a918558 100644 --- a/scripts/lib/kdoc/kdoc_output.py +++ b/scripts/lib/kdoc/kdoc_output.py @@ -215,6 +215,9 @@ class OutputFormat: # Virtual methods to be overridden by inherited classes # At the base class, those do nothing. + def set_symbols(self, symbols): + """Get a list of all symbols from kernel_doc""" + def out_doc(self, fname, name, args): """Outputs a DOC block""" @@ -577,6 +580,7 @@ class ManFormat(OutputFormat): super().__init__() self.modulename = modulename + self.symbols = [] dt = None tstamp = os.environ.get("KBUILD_BUILD_TIMESTAMP") @@ -593,6 +597,68 @@ class ManFormat(OutputFormat): self.man_date = dt.strftime("%B %Y") + def arg_name(self, args, name): + """ + Return the name that will be used for the man page. + + As we may have the same name on different namespaces, + prepend the data type for all types except functions and typedefs. + + The doc section is special: it uses the modulename. + """ + + dtype = args.type + + if dtype == "doc": + return self.modulename + + if dtype in ["function", "typedef"]: + return name + + return f"{dtype} {name}" + + def set_symbols(self, symbols): + """ + Get a list of all symbols from kernel_doc. + + Man pages will uses it to add a SEE ALSO section with other + symbols at the same file. + """ + self.symbols = symbols + + def out_tail(self, fname, name, args): + """Adds a tail for all man pages""" + + # SEE ALSO section + if len(self.symbols) >= 2: + cur_name = self.arg_name(args, name) + + self.data += f'.SH "SEE ALSO"' + "\n.PP\n" + related = [] + for arg in self.symbols: + out_name = self.arg_name(arg, arg.name) + + if cur_name == out_name: + continue + + related.append(f"\\fB{out_name}\\fR(9)") + + self.data += ",\n".join(related) + "\n" + + # TODO: does it make sense to add other sections? Maybe + # REPORTING ISSUES? LICENSE? + + def msg(self, fname, name, args): + """ + Handles a single entry from kernel-doc parser. + + Add a tail at the end of man pages output. + """ + super().msg(fname, name, args) + self.out_tail(fname, name, args) + + return self.data + def output_highlight(self, block): """ Outputs a C symbol that may require being highlighted with @@ -618,7 +684,9 @@ class ManFormat(OutputFormat): if not self.check_doc(name, args): return - self.data += f'.TH "{self.modulename}" 9 "{self.modulename}" "{self.man_date}" "API Manual" LINUX' + "\n" + out_name = self.arg_name(args, name) + + self.data += f'.TH "{self.modulename}" 9 "{out_name}" "{self.man_date}" "API Manual" LINUX' + "\n" for section, text in args.sections.items(): self.data += f'.SH "{section}"' + "\n" @@ -627,7 +695,9 @@ class ManFormat(OutputFormat): def out_function(self, fname, name, args): """output function in man""" - self.data += f'.TH "{name}" 9 "{name}" "{self.man_date}" "Kernel Hacker\'s Manual" LINUX' + "\n" + out_name = self.arg_name(args, name) + + self.data += f'.TH "{name}" 9 "{out_name}" "{self.man_date}" "Kernel Hacker\'s Manual" LINUX' + "\n" self.data += ".SH NAME\n" self.data += f"{name} \\- {args['purpose']}\n" @@ -671,7 +741,9 @@ class ManFormat(OutputFormat): self.output_highlight(text) def out_enum(self, fname, name, args): - self.data += f'.TH "{self.modulename}" 9 "enum {name}" "{self.man_date}" "API Manual" LINUX' + "\n" + out_name = self.arg_name(args, name) + + self.data += f'.TH "{self.modulename}" 9 "{out_name}" "{self.man_date}" "API Manual" LINUX' + "\n" self.data += ".SH NAME\n" self.data += f"enum {name} \\- {args['purpose']}\n" @@ -703,8 +775,9 @@ class ManFormat(OutputFormat): def out_typedef(self, fname, name, args): module = self.modulename purpose = args.get('purpose') + out_name = self.arg_name(args, name) - self.data += f'.TH "{module}" 9 "{name}" "{self.man_date}" "API Manual" LINUX' + "\n" + self.data += f'.TH "{module}" 9 "{out_name}" "{self.man_date}" "API Manual" LINUX' + "\n" self.data += ".SH NAME\n" self.data += f"typedef {name} \\- {purpose}\n" @@ -717,8 +790,9 @@ class ManFormat(OutputFormat): module = self.modulename purpose = args.get('purpose') definition = args.get('definition') + out_name = self.arg_name(args, name) - self.data += f'.TH "{module}" 9 "{args.type} {name}" "{self.man_date}" "API Manual" LINUX' + "\n" + self.data += f'.TH "{module}" 9 "{out_name}" "{self.man_date}" "API Manual" LINUX' + "\n" self.data += ".SH NAME\n" self.data += f"{args.type} {name} \\- {purpose}\n" -- cgit v1.2.3 From ade9b9576e2f000fb2ef0ac3bcd26e1167fd813b Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Thu, 18 Sep 2025 13:54:55 +0200 Subject: scripts: kdoc_parser.py: warn about Python version only once When running kernel-doc over multiple documents, it emits one error message per file with is not what we want: $ python3.6 scripts/kernel-doc.py . --none ... Warning: ./include/trace/events/swiotlb.h:0 Python 3.7 or later is required for correct results Warning: ./include/trace/events/iommu.h:0 Python 3.7 or later is required for correct results Warning: ./include/trace/events/sock.h:0 Python 3.7 or later is required for correct results ... Change the logic to warn it only once at the library: $ python3.6 scripts/kernel-doc.py . --none Warning: Python 3.7 or later is required for correct results Warning: ./include/cxl/features.h:0 Python 3.7 or later is required for correct results When running from command line, it warns twice, but that sounds ok. Signed-off-by: Mauro Carvalho Chehab Message-ID: <68e54cf8b1201d1f683aad9bc710a99421910356.1758196090.git.mchehab+huawei@kernel.org> Signed-off-by: Jonathan Corbet --- scripts/lib/kdoc/kdoc_parser.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'scripts') diff --git a/scripts/lib/kdoc/kdoc_parser.py b/scripts/lib/kdoc/kdoc_parser.py index 2376f180b1fa..89d920e0b65c 100644 --- a/scripts/lib/kdoc/kdoc_parser.py +++ b/scripts/lib/kdoc/kdoc_parser.py @@ -350,6 +350,7 @@ class KernelEntry: self.section = SECTION_DEFAULT self._contents = [] +python_warning = False class KernelDoc: """ @@ -383,9 +384,13 @@ class KernelDoc: # We need Python 3.7 for its "dicts remember the insertion # order" guarantee # - if sys.version_info.major == 3 and sys.version_info.minor < 7: + global python_warning + if (not python_warning and + sys.version_info.major == 3 and sys.version_info.minor < 7): + self.emit_msg(0, 'Python 3.7 or later is required for correct results') + python_warning = True def emit_msg(self, ln, msg, warning=True): """Emit a message""" -- cgit v1.2.3 From c2381e8a6105dcc3eeee93766ba628cf656057f3 Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Sat, 20 Sep 2025 11:40:25 +0200 Subject: scripts: remove sphinx-build-wrapper from scripts/ Commit 8a298579cdfc ("scripts: sphinx-build-wrapper: get rid of uapi/media Makefile") accidentally added scripts/sphinx-build-wrapper, probably due to some rebase issues. The file was added on a separate patch series, at tools/docs, and has other patches on the top of it, so drop this extra version. Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Jonathan Corbet Message-ID: --- scripts/sphinx-build-wrapper | 719 ------------------------------------------- 1 file changed, 719 deletions(-) delete mode 100755 scripts/sphinx-build-wrapper (limited to 'scripts') diff --git a/scripts/sphinx-build-wrapper b/scripts/sphinx-build-wrapper deleted file mode 100755 index abe8c26ae137..000000000000 --- a/scripts/sphinx-build-wrapper +++ /dev/null @@ -1,719 +0,0 @@ -#!/usr/bin/env python3 -# SPDX-License-Identifier: GPL-2.0 -# Copyright (C) 2025 Mauro Carvalho Chehab -# -# pylint: disable=R0902, R0912, R0913, R0914, R0915, R0917, C0103 -# -# Converted from docs Makefile and parallel-wrapper.sh, both under -# GPLv2, copyrighted since 2008 by the following authors: -# -# Akira Yokosawa -# Arnd Bergmann -# Breno Leitao -# Carlos Bilbao -# Dave Young -# Donald Hunter -# Geert Uytterhoeven -# Jani Nikula -# Jan Stancek -# Jonathan Corbet -# Joshua Clayton -# Kees Cook -# Linus Torvalds -# Magnus Damm -# Masahiro Yamada -# Mauro Carvalho Chehab -# Maxim Cournoyer -# Peter Foley -# Randy Dunlap -# Rob Herring -# Shuah Khan -# Thorsten Blum -# Tomas Winkler - - -""" -Sphinx build wrapper that handles Kernel-specific business rules: - -- it gets the Kernel build environment vars; -- it determines what's the best parallelism; -- it handles SPHINXDIRS - -This tool ensures that MIN_PYTHON_VERSION is satisfied. If version is -below that, it seeks for a new Python version. If found, it re-runs using -the newer version. -""" - -import argparse -import locale -import os -import re -import shlex -import shutil -import subprocess -import sys - -from concurrent import futures -from glob import glob - -LIB_DIR = "lib" -SRC_DIR = os.path.dirname(os.path.realpath(__file__)) - -sys.path.insert(0, os.path.join(SRC_DIR, LIB_DIR)) - -from jobserver import JobserverExec # pylint: disable=C0413 - - -def parse_version(version): - """Convert a major.minor.patch version into a tuple""" - return tuple(int(x) for x in version.split(".")) - -def ver_str(version): - """Returns a version tuple as major.minor.patch""" - - return ".".join([str(x) for x in version]) - -# Minimal supported Python version needed by Sphinx and its extensions -MIN_PYTHON_VERSION = parse_version("3.7") - -# Default value for --venv parameter -VENV_DEFAULT = "sphinx_latest" - -# List of make targets and its corresponding builder and output directory -TARGETS = { - "cleandocs": { - "builder": "clean", - }, - "htmldocs": { - "builder": "html", - }, - "epubdocs": { - "builder": "epub", - "out_dir": "epub", - }, - "texinfodocs": { - "builder": "texinfo", - "out_dir": "texinfo", - }, - "infodocs": { - "builder": "texinfo", - "out_dir": "texinfo", - }, - "latexdocs": { - "builder": "latex", - "out_dir": "latex", - }, - "pdfdocs": { - "builder": "latex", - "out_dir": "latex", - }, - "xmldocs": { - "builder": "xml", - "out_dir": "xml", - }, - "linkcheckdocs": { - "builder": "linkcheck" - }, -} - -# Paper sizes. An empty value will pick the default -PAPER = ["", "a4", "letter"] - -class SphinxBuilder: - """ - Handles a sphinx-build target, adding needed arguments to build - with the Kernel. - """ - - def is_rust_enabled(self): - """Check if rust is enabled at .config""" - config_path = os.path.join(self.srctree, ".config") - if os.path.isfile(config_path): - with open(config_path, "r", encoding="utf-8") as f: - return "CONFIG_RUST=y" in f.read() - return False - - def get_path(self, path, abs_path=False): - """ - Ancillary routine to handle patches the right way, as shell does. - - It first expands "~" and "~user". Then, if patch is not absolute, - join self.srctree. Finally, if requested, convert to abspath. - """ - - path = os.path.expanduser(path) - if not path.startswith("/"): - path = os.path.join(self.srctree, path) - - if abs_path: - return os.path.abspath(path) - - return path - - def __init__(self, venv=None, verbose=False, n_jobs=None, interactive=None): - """Initialize internal variables""" - self.venv = venv - self.verbose = None - - # Normal variables passed from Kernel's makefile - self.kernelversion = os.environ.get("KERNELVERSION", "unknown") - self.kernelrelease = os.environ.get("KERNELRELEASE", "unknown") - self.pdflatex = os.environ.get("PDFLATEX", "xelatex") - - if not interactive: - self.latexopts = os.environ.get("LATEXOPTS", "-interaction=batchmode -no-shell-escape") - else: - self.latexopts = os.environ.get("LATEXOPTS", "") - - if not verbose: - verbose = bool(os.environ.get("KBUILD_VERBOSE", "") != "") - - # Handle SPHINXOPTS evironment - sphinxopts = shlex.split(os.environ.get("SPHINXOPTS", "")) - - # As we handle number of jobs and quiet in separate, we need to pick - # it the same way as sphinx-build would pick, so let's use argparse - # do to the right argument expansion - parser = argparse.ArgumentParser() - parser.add_argument('-j', '--jobs', type=int) - parser.add_argument('-q', '--quiet', type=int) - - # Other sphinx-build arguments go as-is, so place them - # at self.sphinxopts - sphinx_args, self.sphinxopts = parser.parse_known_args(sphinxopts) - if sphinx_args.quiet == True: - self.verbose = False - - if sphinx_args.jobs: - self.n_jobs = sphinx_args.jobs - - # Command line arguments was passed, override SPHINXOPTS - if verbose is not None: - self.verbose = verbose - - self.n_jobs = n_jobs - - # Source tree directory. This needs to be at os.environ, as - # Sphinx extensions and media uAPI makefile needs it - self.srctree = os.environ.get("srctree") - if not self.srctree: - self.srctree = "." - os.environ["srctree"] = self.srctree - - # Now that we can expand srctree, get other directories as well - self.sphinxbuild = os.environ.get("SPHINXBUILD", "sphinx-build") - self.kerneldoc = self.get_path(os.environ.get("KERNELDOC", - "scripts/kernel-doc.py")) - self.obj = os.environ.get("obj", "Documentation") - self.builddir = self.get_path(os.path.join(self.obj, "output"), - abs_path=True) - - # Media uAPI needs it - os.environ["BUILDDIR"] = self.builddir - - # Detect if rust is enabled - self.config_rust = self.is_rust_enabled() - - # Get directory locations for LaTeX build toolchain - self.pdflatex_cmd = shutil.which(self.pdflatex) - self.latexmk_cmd = shutil.which("latexmk") - - self.env = os.environ.copy() - - # If venv parameter is specified, run Sphinx from venv - if venv: - bin_dir = os.path.join(venv, "bin") - if os.path.isfile(os.path.join(bin_dir, "activate")): - # "activate" virtual env - self.env["PATH"] = bin_dir + ":" + self.env["PATH"] - self.env["VIRTUAL_ENV"] = venv - if "PYTHONHOME" in self.env: - del self.env["PYTHONHOME"] - print(f"Setting venv to {venv}") - else: - sys.exit(f"Venv {venv} not found.") - - def run_sphinx(self, sphinx_build, build_args, *args, **pwargs): - """ - Executes sphinx-build using current python3 command and setting - -j parameter if possible to run the build in parallel. - """ - - with JobserverExec() as jobserver: - if jobserver.claim: - n_jobs = str(jobserver.claim) - else: - n_jobs = "auto" # Supported since Sphinx 1.7 - - cmd = [] - - if self.venv: - cmd.append("python") - else: - cmd.append(sys.executable) - - cmd.append(sphinx_build) - - # if present, SPHINXOPTS or command line --jobs overrides default - if self.n_jobs: - n_jobs = str(self.n_jobs) - - if n_jobs: - cmd += [f"-j{n_jobs}"] - - if not self.verbose: - cmd.append("-q") - - cmd += self.sphinxopts - - cmd += build_args - - if self.verbose: - print(" ".join(cmd)) - - rc = subprocess.call(cmd, *args, **pwargs) - - def handle_html(self, css, output_dir): - """ - Extra steps for HTML and epub output. - - For such targets, we need to ensure that CSS will be properly - copied to the output _static directory - """ - - if not css: - return - - css = os.path.expanduser(css) - if not css.startswith("/"): - css = os.path.join(self.srctree, css) - - static_dir = os.path.join(output_dir, "_static") - os.makedirs(static_dir, exist_ok=True) - - try: - shutil.copy2(css, static_dir) - except (OSError, IOError) as e: - print(f"Warning: Failed to copy CSS: {e}", file=sys.stderr) - - def build_pdf_file(self, latex_cmd, from_dir, path): - """Builds a single pdf file using latex_cmd""" - try: - subprocess.run(latex_cmd + [path], - cwd=from_dir, check=True) - - return True - except subprocess.CalledProcessError: - # LaTeX PDF error code is almost useless: it returns - # error codes even when build succeeds but has warnings. - # So, we'll ignore the results - return False - - def pdf_parallel_build(self, tex_suffix, latex_cmd, tex_files, n_jobs): - """Build PDF files in parallel if possible""" - builds = {} - build_failed = False - max_len = 0 - has_tex = False - - # Process files in parallel - with futures.ThreadPoolExecutor(max_workers=n_jobs) as executor: - jobs = {} - - for from_dir, pdf_dir, entry in tex_files: - name = entry.name - - if not name.endswith(tex_suffix): - continue - - name = name[:-len(tex_suffix)] - - max_len = max(max_len, len(name)) - - has_tex = True - - future = executor.submit(self.build_pdf_file, latex_cmd, - from_dir, entry.path) - jobs[future] = (from_dir, name, entry.path) - - for future in futures.as_completed(jobs): - from_dir, name, path = jobs[future] - - pdf_name = name + ".pdf" - pdf_from = os.path.join(from_dir, pdf_name) - - try: - success = future.result() - - if success and os.path.exists(pdf_from): - pdf_to = os.path.join(pdf_dir, pdf_name) - - os.rename(pdf_from, pdf_to) - builds[name] = os.path.relpath(pdf_to, self.builddir) - else: - builds[name] = "FAILED" - build_failed = True - except Exception as e: - builds[name] = f"FAILED ({str(e)})" - build_failed = True - - # Handle case where no .tex files were found - if not has_tex: - name = "Sphinx LaTeX builder" - max_len = max(max_len, len(name)) - builds[name] = "FAILED (no .tex file was generated)" - build_failed = True - - return builds, build_failed, max_len - - def handle_pdf(self, output_dirs): - """ - Extra steps for PDF output. - - As PDF is handled via a LaTeX output, after building the .tex file, - a new build is needed to create the PDF output from the latex - directory. - """ - builds = {} - max_len = 0 - tex_suffix = ".tex" - - # Get all tex files that will be used for PDF build - tex_files = [] - for from_dir in output_dirs: - pdf_dir = os.path.join(from_dir, "../pdf") - os.makedirs(pdf_dir, exist_ok=True) - - if self.latexmk_cmd: - latex_cmd = [self.latexmk_cmd, f"-{self.pdflatex}"] - else: - latex_cmd = [self.pdflatex] - - latex_cmd.extend(shlex.split(self.latexopts)) - - # Get a list of tex files to process - with os.scandir(from_dir) as it: - for entry in it: - if entry.name.endswith(tex_suffix): - tex_files.append((from_dir, pdf_dir, entry)) - - # When using make, this won't be used, as the number of jobs comes - # from POSIX jobserver. So, this covers the case where build comes - # from command line. On such case, serialize by default, except if - # the user explicitly sets the number of jobs. - n_jobs = 1 - - # n_jobs is either an integer or "auto". Only use it if it is a number - if self.n_jobs: - try: - n_jobs = int(self.n_jobs) - except ValueError: - pass - - # When using make, jobserver.claim is the number of jobs that were - # used with "-j" and that aren't used by other make targets - with JobserverExec() as jobserver: - n_jobs = 1 - - # Handle the case when a parameter is passed via command line, - # using it as default, if jobserver doesn't claim anything - if self.n_jobs: - try: - n_jobs = int(self.n_jobs) - except ValueError: - pass - - if jobserver.claim: - n_jobs = jobserver.claim - - # Build files in parallel - builds, build_failed, max_len = self.pdf_parallel_build(tex_suffix, - latex_cmd, - tex_files, - n_jobs) - - msg = "Summary" - msg += "\n" + "=" * len(msg) - print() - print(msg) - - for pdf_name, pdf_file in builds.items(): - print(f"{pdf_name:<{max_len}}: {pdf_file}") - - print() - - # return an error if a PDF file is missing - - if build_failed: - sys.exit(f"PDF build failed: not all PDF files were created.") - else: - print("All PDF files were built.") - - def handle_info(self, output_dirs): - """ - Extra steps for Info output. - - For texinfo generation, an additional make is needed from the - texinfo directory. - """ - - for output_dir in output_dirs: - try: - subprocess.run(["make", "info"], cwd=output_dir, check=True) - except subprocess.CalledProcessError as e: - sys.exit(f"Error generating info docs: {e}") - - def cleandocs(self, builder): - - shutil.rmtree(self.builddir, ignore_errors=True) - - def build(self, target, sphinxdirs=None, conf="conf.py", - theme=None, css=None, paper=None): - """ - Build documentation using Sphinx. This is the core function of this - module. It prepares all arguments required by sphinx-build. - """ - - builder = TARGETS[target]["builder"] - out_dir = TARGETS[target].get("out_dir", "") - - # Cleandocs doesn't require sphinx-build - if target == "cleandocs": - self.cleandocs(builder) - return - - # Other targets require sphinx-build - sphinxbuild = shutil.which(self.sphinxbuild, path=self.env["PATH"]) - if not sphinxbuild: - sys.exit(f"Error: {self.sphinxbuild} not found in PATH.\n") - - if builder == "latex": - if not self.pdflatex_cmd and not self.latexmk_cmd: - sys.exit("Error: pdflatex or latexmk required for PDF generation") - - docs_dir = os.path.abspath(os.path.join(self.srctree, "Documentation")) - - # Prepare base arguments for Sphinx build - kerneldoc = self.kerneldoc - if kerneldoc.startswith(self.srctree): - kerneldoc = os.path.relpath(kerneldoc, self.srctree) - - # Prepare common Sphinx options - args = [ - "-b", builder, - "-c", docs_dir, - ] - - if builder == "latex": - if not paper: - paper = PAPER[1] - - args.extend(["-D", f"latex_elements.papersize={paper}paper"]) - - if self.config_rust: - args.extend(["-t", "rustdoc"]) - - if conf: - self.env["SPHINX_CONF"] = self.get_path(conf, abs_path=True) - - if not sphinxdirs: - sphinxdirs = os.environ.get("SPHINXDIRS", ".") - - # The sphinx-build tool has a bug: internally, it tries to set - # locale with locale.setlocale(locale.LC_ALL, ''). This causes a - # crash if language is not set. Detect and fix it. - try: - locale.setlocale(locale.LC_ALL, '') - except Exception: - self.env["LC_ALL"] = "C" - self.env["LANG"] = "C" - - # sphinxdirs can be a list or a whitespace-separated string - sphinxdirs_list = [] - for sphinxdir in sphinxdirs: - if isinstance(sphinxdir, list): - sphinxdirs_list += sphinxdir - else: - for name in sphinxdir.split(" "): - sphinxdirs_list.append(name) - - # Build each directory - output_dirs = [] - for sphinxdir in sphinxdirs_list: - src_dir = os.path.join(docs_dir, sphinxdir) - doctree_dir = os.path.join(self.builddir, ".doctrees") - output_dir = os.path.join(self.builddir, sphinxdir, out_dir) - - # Make directory names canonical - src_dir = os.path.normpath(src_dir) - doctree_dir = os.path.normpath(doctree_dir) - output_dir = os.path.normpath(output_dir) - - os.makedirs(doctree_dir, exist_ok=True) - os.makedirs(output_dir, exist_ok=True) - - output_dirs.append(output_dir) - - build_args = args + [ - "-d", doctree_dir, - "-D", f"kerneldoc_bin={kerneldoc}", - "-D", f"version={self.kernelversion}", - "-D", f"release={self.kernelrelease}", - "-D", f"kerneldoc_srctree={self.srctree}", - src_dir, - output_dir, - ] - - # Execute sphinx-build - try: - self.run_sphinx(sphinxbuild, build_args, env=self.env) - except Exception as e: - sys.exit(f"Build failed: {e}") - - # Ensure that html/epub will have needed static files - if target in ["htmldocs", "epubdocs"]: - self.handle_html(css, output_dir) - - # PDF and Info require a second build step - if target == "pdfdocs": - self.handle_pdf(output_dirs) - elif target == "infodocs": - self.handle_info(output_dirs) - - @staticmethod - def get_python_version(cmd): - """ - Get python version from a Python binary. As we need to detect if - are out there newer python binaries, we can't rely on sys.release here. - """ - - result = subprocess.run([cmd, "--version"], check=True, - stdout=subprocess.PIPE, stderr=subprocess.PIPE, - universal_newlines=True) - version = result.stdout.strip() - - match = re.search(r"(\d+\.\d+\.\d+)", version) - if match: - return parse_version(match.group(1)) - - print(f"Can't parse version {version}") - return (0, 0, 0) - - @staticmethod - def find_python(): - """ - Detect if are out there any python 3.xy version newer than the - current one. - - Note: this routine is limited to up to 2 digits for python3. We - may need to update it one day, hopefully on a distant future. - """ - patterns = [ - "python3.[0-9]", - "python3.[0-9][0-9]", - ] - - # Seek for a python binary newer than MIN_PYTHON_VERSION - for path in os.getenv("PATH", "").split(":"): - for pattern in patterns: - for cmd in glob(os.path.join(path, pattern)): - if os.path.isfile(cmd) and os.access(cmd, os.X_OK): - version = SphinxBuilder.get_python_version(cmd) - if version >= MIN_PYTHON_VERSION: - return cmd - - return None - - @staticmethod - def check_python(): - """ - Check if the current python binary satisfies our minimal requirement - for Sphinx build. If not, re-run with a newer version if found. - """ - cur_ver = sys.version_info[:3] - if cur_ver >= MIN_PYTHON_VERSION: - return - - python_ver = ver_str(cur_ver) - - new_python_cmd = SphinxBuilder.find_python() - if not new_python_cmd: - sys.exit(f"Python version {python_ver} is not supported anymore.") - - # Restart script using the newer version - script_path = os.path.abspath(sys.argv[0]) - args = [new_python_cmd, script_path] + sys.argv[1:] - - print(f"Python {python_ver} not supported. Changing to {new_python_cmd}") - - try: - os.execv(new_python_cmd, args) - except OSError as e: - sys.exit(f"Failed to restart with {new_python_cmd}: {e}") - -def jobs_type(value): - """ - Handle valid values for -j. Accepts Sphinx "-jauto", plus a number - equal or bigger than one. - """ - if value is None: - return None - - if value.lower() == 'auto': - return value.lower() - - try: - if int(value) >= 1: - return value - - raise argparse.ArgumentTypeError(f"Minimum jobs is 1, got {value}") - except ValueError: - raise argparse.ArgumentTypeError(f"Must be 'auto' or positive integer, got {value}") - -def main(): - """ - Main function. The only mandatory argument is the target. If not - specified, the other arguments will use default values if not - specified at os.environ. - """ - parser = argparse.ArgumentParser(description="Kernel documentation builder") - - parser.add_argument("target", choices=list(TARGETS.keys()), - help="Documentation target to build") - parser.add_argument("--sphinxdirs", nargs="+", - help="Specific directories to build") - parser.add_argument("--conf", default="conf.py", - help="Sphinx configuration file") - - parser.add_argument("--theme", help="Sphinx theme to use") - - parser.add_argument("--css", help="Custom CSS file for HTML/EPUB") - - parser.add_argument("--paper", choices=PAPER, default=PAPER[0], - help="Paper size for LaTeX/PDF output") - - parser.add_argument("-v", "--verbose", action='store_true', - help="place build in verbose mode") - - parser.add_argument('-j', '--jobs', type=jobs_type, - help="Sets number of jobs to use with sphinx-build") - - parser.add_argument('-i', '--interactive', action='store_true', - help="Change latex default to run in interactive mode") - - parser.add_argument("-V", "--venv", nargs='?', const=f'{VENV_DEFAULT}', - default=None, - help=f'If used, run Sphinx from a venv dir (default dir: {VENV_DEFAULT})') - - args = parser.parse_args() - - SphinxBuilder.check_python() - - builder = SphinxBuilder(venv=args.venv, verbose=args.verbose, - n_jobs=args.jobs, interactive=args.interactive) - - builder.build(args.target, sphinxdirs=args.sphinxdirs, conf=args.conf, - theme=args.theme, css=args.css, paper=args.paper) - -if __name__ == "__main__": - main() -- cgit v1.2.3 From 2bd22194b26ffbbd9fbb71ffbb608b61e024b563 Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Wed, 1 Oct 2025 16:13:59 +0200 Subject: kernel-doc: output source file name at SEE ALSO for man pages, it is helpful to know from where the man page were generated. Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Jonathan Corbet Message-ID: --- scripts/lib/kdoc/kdoc_item.py | 3 ++- scripts/lib/kdoc/kdoc_output.py | 3 ++- scripts/lib/kdoc/kdoc_parser.py | 8 +++++--- 3 files changed, 9 insertions(+), 5 deletions(-) (limited to 'scripts') diff --git a/scripts/lib/kdoc/kdoc_item.py b/scripts/lib/kdoc/kdoc_item.py index b3b225764550..19805301cb2c 100644 --- a/scripts/lib/kdoc/kdoc_item.py +++ b/scripts/lib/kdoc/kdoc_item.py @@ -5,8 +5,9 @@ # class KdocItem: - def __init__(self, name, type, start_line, **other_stuff): + def __init__(self, name, fname, type, start_line, **other_stuff): self.name = name + self.fname = fname self.type = type self.declaration_start_line = start_line self.sections = {} diff --git a/scripts/lib/kdoc/kdoc_output.py b/scripts/lib/kdoc/kdoc_output.py index 1eca9a918558..58f115059e93 100644 --- a/scripts/lib/kdoc/kdoc_output.py +++ b/scripts/lib/kdoc/kdoc_output.py @@ -630,10 +630,11 @@ class ManFormat(OutputFormat): """Adds a tail for all man pages""" # SEE ALSO section + self.data += f'.SH "SEE ALSO"' + "\n.PP\n" + self.data += (f"Kernel file \\fB{args.fname}\\fR\n") if len(self.symbols) >= 2: cur_name = self.arg_name(args, name) - self.data += f'.SH "SEE ALSO"' + "\n.PP\n" related = [] for arg in self.symbols: out_name = self.arg_name(arg, arg.name) diff --git a/scripts/lib/kdoc/kdoc_parser.py b/scripts/lib/kdoc/kdoc_parser.py index 89d920e0b65c..6e5c115cbdf3 100644 --- a/scripts/lib/kdoc/kdoc_parser.py +++ b/scripts/lib/kdoc/kdoc_parser.py @@ -254,8 +254,9 @@ SECTION_DEFAULT = "Description" # default section class KernelEntry: - def __init__(self, config, ln): + def __init__(self, config, fname, ln): self.config = config + self.fname = fname self._contents = [] self.prototype = "" @@ -422,7 +423,8 @@ class KernelDoc: The actual output and output filters will be handled elsewhere """ - item = KdocItem(name, dtype, self.entry.declaration_start_line, **args) + item = KdocItem(name, self.fname, dtype, + self.entry.declaration_start_line, **args) item.warnings = self.entry.warnings # Drop empty sections @@ -445,7 +447,7 @@ class KernelDoc: variables used by the state machine. """ - self.entry = KernelEntry(self.config, ln) + self.entry = KernelEntry(self.config, self.fname, ln) # State flags self.state = state.NORMAL -- cgit v1.2.3 From 567f9c428f99560fe14e647def9f42f5344ebde9 Mon Sep 17 00:00:00 2001 From: John Wang Date: Fri, 28 Mar 2025 15:38:02 +0800 Subject: scripts/faddr2line: Set LANG=C to enforce ASCII output Force tools like readelf to use the POSIX/C locale by exporting LANG=C This ensures ASCII-only output and avoids locale-specific characters(e.g., UTF-8 symbols or translated strings), which could break text processing utilities like sed in the script Signed-off-by: John Wang Signed-off-by: Josh Poimboeuf --- scripts/faddr2line | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'scripts') diff --git a/scripts/faddr2line b/scripts/faddr2line index 1fa6beef9f97..1f364fbb0cd8 100755 --- a/scripts/faddr2line +++ b/scripts/faddr2line @@ -76,6 +76,10 @@ ADDR2LINE="${UTIL_PREFIX}addr2line${UTIL_SUFFIX}" AWK="awk" GREP="grep" +# Enforce ASCII-only output from tools like readelf +# ensuring sed processes strings correctly. +export LANG=C + command -v ${AWK} >/dev/null 2>&1 || die "${AWK} isn't installed" command -v ${READELF} >/dev/null 2>&1 || die "${READELF} isn't installed" command -v ${ADDR2LINE} >/dev/null 2>&1 || die "${ADDR2LINE} isn't installed" -- cgit v1.2.3 From 6b4679fcbfdf6f27f8455f9c7050ab6c46c6c5e0 Mon Sep 17 00:00:00 2001 From: Pankaj Raghav Date: Sun, 21 Sep 2025 12:03:57 +0200 Subject: scripts/faddr2line: Use /usr/bin/env bash for portability The shebang `#!/bin/bash` assumes a fixed path for the bash interpreter. This path does not exist on some systems, such as NixOS, causing the script to fail. Replace `/bin/bash` with the more portable `#!/usr/bin/env bash`. Signed-off-by: Pankaj Raghav Signed-off-by: Josh Poimboeuf --- scripts/faddr2line | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'scripts') diff --git a/scripts/faddr2line b/scripts/faddr2line index 1f364fbb0cd8..7746d4ad0bfa 100755 --- a/scripts/faddr2line +++ b/scripts/faddr2line @@ -1,4 +1,4 @@ -#!/bin/bash +#!/usr/bin/env bash # SPDX-License-Identifier: GPL-2.0 # # Translate stack dump function offsets. -- cgit v1.2.3 From ff5c0466486ba8d07ab2700380e8fd6d5344b4e9 Mon Sep 17 00:00:00 2001 From: Pankaj Raghav Date: Sun, 21 Sep 2025 12:03:58 +0200 Subject: scripts/faddr2line: Fix "Argument list too long" error The run_readelf() function reads the entire output of readelf into a single shell variable. For large object files with extensive debug information, the size of this variable can exceed the system's command-line argument length limit. When this variable is subsequently passed to sed via `echo "${out}"`, it triggers an "Argument list too long" error, causing the script to fail. Fix this by redirecting the output of readelf to a temporary file instead of a variable. The sed commands are then modified to read from this file, avoiding the argument length limitation entirely. Signed-off-by: Pankaj Raghav Signed-off-by: Josh Poimboeuf --- scripts/faddr2line | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) (limited to 'scripts') diff --git a/scripts/faddr2line b/scripts/faddr2line index 7746d4ad0bfa..622875396bcf 100755 --- a/scripts/faddr2line +++ b/scripts/faddr2line @@ -111,14 +111,19 @@ find_dir_prefix() { run_readelf() { local objfile=$1 - local out=$(${READELF} --file-header --section-headers --symbols --wide $objfile) + local tmpfile + tmpfile=$(mktemp) + + ${READELF} --file-header --section-headers --symbols --wide "$objfile" > "$tmpfile" # This assumes that readelf first prints the file header, then the section headers, then the symbols. # Note: It seems that GNU readelf does not prefix section headers with the "There are X section headers" # line when multiple options are given, so let's also match with the "Section Headers:" line. - ELF_FILEHEADER=$(echo "${out}" | sed -n '/There are [0-9]* section headers, starting at offset\|Section Headers:/q;p') - ELF_SECHEADERS=$(echo "${out}" | sed -n '/There are [0-9]* section headers, starting at offset\|Section Headers:/,$p' | sed -n '/Symbol table .* contains [0-9]* entries:/q;p') - ELF_SYMS=$(echo "${out}" | sed -n '/Symbol table .* contains [0-9]* entries:/,$p') + ELF_FILEHEADER=$(sed -n '/There are [0-9]* section headers, starting at offset\|Section Headers:/q;p' "$tmpfile") + ELF_SECHEADERS=$(sed -n '/There are [0-9]* section headers, starting at offset\|Section Headers:/,$p' "$tmpfile" | sed -n '/Symbol table .* contains [0-9]* entries:/q;p') + ELF_SYMS=$(sed -n '/Symbol table .* contains [0-9]* entries:/,$p' "$tmpfile") + + rm -f -- "$tmpfile" } check_vmlinux() { -- cgit v1.2.3 From 1ba9f8979426590367406c70c1c821f5b943f993 Mon Sep 17 00:00:00 2001 From: Josh Poimboeuf Date: Wed, 17 Sep 2025 09:03:10 -0700 Subject: vmlinux.lds: Unify TEXT_MAIN, DATA_MAIN, and related macros TEXT_MAIN, DATA_MAIN and friends are defined differently depending on whether certain config options enable -ffunction-sections and/or -fdata-sections. There's no technical reason for that beyond voodoo coding. Keeping the separate implementations adds unnecessary complexity, fragments the logic, and increases the risk of subtle bugs. Unify the macros by using the same input section patterns across all configs. This is a prerequisite for the upcoming livepatch klp-build tooling which will manually enable -ffunction-sections and -fdata-sections via KCFLAGS. Cc: Heiko Carstens Cc: Vasily Gorbik Cc: Alexander Gordeev Acked-by: Petr Mladek Tested-by: Joe Lawrence Signed-off-by: Josh Poimboeuf --- scripts/module.lds.S | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) (limited to 'scripts') diff --git a/scripts/module.lds.S b/scripts/module.lds.S index ee79c41059f3..2632c6cb8ebe 100644 --- a/scripts/module.lds.S +++ b/scripts/module.lds.S @@ -38,12 +38,10 @@ SECTIONS { __kcfi_traps : { KEEP(*(.kcfi_traps)) } #endif -#ifdef CONFIG_LTO_CLANG - /* - * With CONFIG_LTO_CLANG, LLD always enables -fdata-sections and - * -ffunction-sections, which increases the size of the final module. - * Merge the split sections in the final binary. - */ + .text : { + *(.text .text.[0-9a-zA-Z_]*) + } + .bss : { *(.bss .bss.[0-9a-zA-Z_]*) *(.bss..L*) @@ -58,7 +56,7 @@ SECTIONS { *(.rodata .rodata.[0-9a-zA-Z_]*) *(.rodata..L*) } -#endif + MOD_SEPARATE_CODETAG_SECTIONS() } -- cgit v1.2.3 From 6717e8f91db71641cb52855ed14c7900972ed0bc Mon Sep 17 00:00:00 2001 From: Josh Poimboeuf Date: Wed, 17 Sep 2025 09:03:16 -0700 Subject: kbuild: Remove 'kmod_' prefix from __KBUILD_MODNAME In preparation for the objtool klp diff subcommand, remove the arbitrary 'kmod_' prefix from __KBUILD_MODNAME and instead add it explicitly in the __initcall_id() macro. This change supports the standardization of "unique" symbol naming by ensuring the non-unique portion of the name comes before the unique part. That will enable objtool to properly correlate symbols across builds. Cc: Masahiro Yamada Acked-by: Petr Mladek Tested-by: Joe Lawrence Signed-off-by: Josh Poimboeuf --- scripts/Makefile.lib | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'scripts') diff --git a/scripts/Makefile.lib b/scripts/Makefile.lib index 1d581ba5df66..b95560266124 100644 --- a/scripts/Makefile.lib +++ b/scripts/Makefile.lib @@ -20,7 +20,7 @@ name-fix-token = $(subst $(comma),_,$(subst -,_,$1)) name-fix = $(call stringify,$(call name-fix-token,$1)) basename_flags = -DKBUILD_BASENAME=$(call name-fix,$(basetarget)) modname_flags = -DKBUILD_MODNAME=$(call name-fix,$(modname)) \ - -D__KBUILD_MODNAME=kmod_$(call name-fix-token,$(modname)) + -D__KBUILD_MODNAME=$(call name-fix-token,$(modname)) modfile_flags = -DKBUILD_MODFILE=$(call stringify,$(modfile)) _c_flags = $(filter-out $(CFLAGS_REMOVE_$(target-stem).o), \ -- cgit v1.2.3 From 4109043bff31f95d3da9ace33eb3c1925fd62cbd Mon Sep 17 00:00:00 2001 From: Josh Poimboeuf Date: Wed, 17 Sep 2025 09:03:17 -0700 Subject: modpost: Ignore unresolved section bounds symbols In preparation for klp-build livepatch module creation tooling, suppress warnings for unresolved references to linker-generated __start_* and __stop_* section bounds symbols. These symbols are expected to be undefined when modpost runs, as they're created later by the linker. Cc: Masahiro Yamada Acked-by: Petr Mladek Tested-by: Joe Lawrence Signed-off-by: Josh Poimboeuf --- scripts/mod/modpost.c | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'scripts') diff --git a/scripts/mod/modpost.c b/scripts/mod/modpost.c index 47c8aa2a6939..755b842f1f9b 100644 --- a/scripts/mod/modpost.c +++ b/scripts/mod/modpost.c @@ -606,6 +606,11 @@ static int ignore_undef_symbol(struct elf_info *info, const char *symname) strstarts(symname, "_savevr_") || strcmp(symname, ".TOC.") == 0) return 1; + + /* ignore linker-created section bounds variables */ + if (strstarts(symname, "__start_") || strstarts(symname, "__stop_")) + return 1; + /* Do not ignore this symbol */ return 0; } -- cgit v1.2.3 From 56754f0f46f6a36ba66e8c1b2878f7a4f1edfe3b Mon Sep 17 00:00:00 2001 From: Josh Poimboeuf Date: Wed, 17 Sep 2025 09:03:42 -0700 Subject: objtool: Rename --Werror to --werror The objtool --Werror option name is stylistically inconsistent: halfway between GCC's single-dash capitalized -Werror and objtool's double-dash --lowercase convention, making it unnecessarily hard to remember. Make the 'W' lower case (--werror) for consistency with objtool's other options. Acked-by: Petr Mladek Tested-by: Joe Lawrence Signed-off-by: Josh Poimboeuf --- scripts/Makefile.lib | 2 +- scripts/Makefile.vmlinux_o | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'scripts') diff --git a/scripts/Makefile.lib b/scripts/Makefile.lib index b95560266124..15fee73e9289 100644 --- a/scripts/Makefile.lib +++ b/scripts/Makefile.lib @@ -191,7 +191,7 @@ objtool-args-$(CONFIG_HAVE_STATIC_CALL_INLINE) += --static-call objtool-args-$(CONFIG_HAVE_UACCESS_VALIDATION) += --uaccess objtool-args-$(or $(CONFIG_GCOV_KERNEL),$(CONFIG_KCOV)) += --no-unreachable objtool-args-$(CONFIG_PREFIX_SYMBOLS) += --prefix=$(CONFIG_FUNCTION_PADDING_BYTES) -objtool-args-$(CONFIG_OBJTOOL_WERROR) += --Werror +objtool-args-$(CONFIG_OBJTOOL_WERROR) += --werror objtool-args = $(objtool-args-y) \ $(if $(delay-objtool), --link) \ diff --git a/scripts/Makefile.vmlinux_o b/scripts/Makefile.vmlinux_o index 23c8751285d7..20533cc0b1ee 100644 --- a/scripts/Makefile.vmlinux_o +++ b/scripts/Makefile.vmlinux_o @@ -41,7 +41,7 @@ objtool-enabled := $(or $(delay-objtool),$(CONFIG_NOINSTR_VALIDATION)) ifeq ($(delay-objtool),y) vmlinux-objtool-args-y += $(objtool-args-y) else -vmlinux-objtool-args-$(CONFIG_OBJTOOL_WERROR) += --Werror +vmlinux-objtool-args-$(CONFIG_OBJTOOL_WERROR) += --werror endif vmlinux-objtool-args-$(CONFIG_NOINSTR_VALIDATION) += --noinstr \ -- cgit v1.2.3 From dd590d4d57ebeeb826823c288741f2ed20f452af Mon Sep 17 00:00:00 2001 From: Josh Poimboeuf Date: Wed, 17 Sep 2025 09:03:59 -0700 Subject: objtool/klp: Introduce klp diff subcommand for diffing object files Add a new klp diff subcommand which performs a binary diff between two object files and extracts changed functions into a new object which can then be linked into a livepatch module. This builds on concepts from the longstanding out-of-tree kpatch [1] project which began in 2012 and has been used for many years to generate livepatch modules for production kernels. However, this is a complete rewrite which incorporates hard-earned lessons from 12+ years of maintaining kpatch. Key improvements compared to kpatch-build: - Integrated with objtool: Leverages objtool's existing control-flow graph analysis to help detect changed functions. - Works on vmlinux.o: Supports late-linked objects, making it compatible with LTO, IBT, and similar. - Simplified code base: ~3k fewer lines of code. - Upstream: No more out-of-tree #ifdef hacks, far less cruft. - Cleaner internals: Vastly simplified logic for symbol/section/reloc inclusion and special section extraction. - Robust __LINE__ macro handling: Avoids false positive binary diffs caused by the __LINE__ macro by introducing a fix-patch-lines script (coming in a later patch) which injects #line directives into the source .patch to preserve the original line numbers at compile time. Note the end result of this subcommand is not yet functionally complete. Livepatch needs some ELF magic which linkers don't like: - Two relocation sections (.rela*, .klp.rela*) for the same text section. - Use of SHN_LIVEPATCH to mark livepatch symbols. Unfortunately linkers tend to mangle such things. To work around that, klp diff generates a linker-compliant intermediate binary which encodes the relevant KLP section/reloc/symbol metadata. After module linking, a klp post-link step (coming soon) will clean up the mess and convert the linked .ko into a fully compliant livepatch module. Note this subcommand requires the diffed binaries to have been compiled with -ffunction-sections and -fdata-sections, and processed with 'objtool --checksum'. Those constraints will be handled by a klp-build script introduced in a later patch. Without '-ffunction-sections -fdata-sections', reliable object diffing would be infeasible due to toolchain limitations: - For intra-file+intra-section references, the compiler might occasionally generated hard-coded instruction offsets instead of relocations. - Section-symbol-based references can be ambiguous: - Overlapping or zero-length symbols create ambiguity as to which symbol is being referenced. - A reference to the end of a symbol (e.g., checking array bounds) can be misinterpreted as a reference to the next symbol, or vice versa. A potential future alternative to '-ffunction-sections -fdata-sections' would be to introduce a toolchain option that forces symbol-based (non-section) relocations. Acked-by: Petr Mladek Tested-by: Joe Lawrence Signed-off-by: Josh Poimboeuf --- scripts/module.lds.S | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) (limited to 'scripts') diff --git a/scripts/module.lds.S b/scripts/module.lds.S index 2632c6cb8ebe..3037d5e5527c 100644 --- a/scripts/module.lds.S +++ b/scripts/module.lds.S @@ -34,8 +34,16 @@ SECTIONS { __patchable_function_entries : { *(__patchable_function_entries) } + __klp_funcs 0: ALIGN(8) { KEEP(*(__klp_funcs)) } + + __klp_objects 0: ALIGN(8) { + __start_klp_objects = .; + KEEP(*(__klp_objects)) + __stop_klp_objects = .; + } + #ifdef CONFIG_ARCH_USES_CFI_TRAPS - __kcfi_traps : { KEEP(*(.kcfi_traps)) } + __kcfi_traps : { KEEP(*(.kcfi_traps)) } #endif .text : { -- cgit v1.2.3 From f2c356d1d0f048e88c281a4178c8b2db138d3ac1 Mon Sep 17 00:00:00 2001 From: Josh Poimboeuf Date: Wed, 17 Sep 2025 09:04:05 -0700 Subject: kbuild,objtool: Defer objtool validation step for CONFIG_KLP_BUILD In preparation for klp-build, defer objtool validation for CONFIG_KLP_BUILD kernels until the final pre-link archive (e.g., vmlinux.o, module-foo.o) is built. This will simplify the process of generating livepatch modules. Delayed objtool is generally preferred anyway, and is already standard for IBT and LTO. Eventually the per-translation-unit mode will be phased out. Acked-by: Petr Mladek Tested-by: Joe Lawrence Signed-off-by: Josh Poimboeuf --- scripts/Makefile.lib | 2 +- scripts/link-vmlinux.sh | 3 ++- 2 files changed, 3 insertions(+), 2 deletions(-) (limited to 'scripts') diff --git a/scripts/Makefile.lib b/scripts/Makefile.lib index 15fee73e9289..28a1c08e3b22 100644 --- a/scripts/Makefile.lib +++ b/scripts/Makefile.lib @@ -197,7 +197,7 @@ objtool-args = $(objtool-args-y) \ $(if $(delay-objtool), --link) \ $(if $(part-of-module), --module) -delay-objtool := $(or $(CONFIG_LTO_CLANG),$(CONFIG_X86_KERNEL_IBT)) +delay-objtool := $(or $(CONFIG_LTO_CLANG),$(CONFIG_X86_KERNEL_IBT),$(CONFIG_KLP_BUILD)) cmd_objtool = $(if $(objtool-enabled), ; $(objtool) $(objtool-args) $@) cmd_gen_objtooldep = $(if $(objtool-enabled), { echo ; echo '$@: $$(wildcard $(objtool))' ; } >> $(dot-target).cmd) diff --git a/scripts/link-vmlinux.sh b/scripts/link-vmlinux.sh index 433849ff7529..2df714ba51a9 100755 --- a/scripts/link-vmlinux.sh +++ b/scripts/link-vmlinux.sh @@ -60,7 +60,8 @@ vmlinux_link() # skip output file argument shift - if is_enabled CONFIG_LTO_CLANG || is_enabled CONFIG_X86_KERNEL_IBT; then + if is_enabled CONFIG_LTO_CLANG || is_enabled CONFIG_X86_KERNEL_IBT || + is_enabled CONFIG_KLP_BUILD; then # Use vmlinux.o instead of performing the slow LTO link again. objs=vmlinux.o libs= -- cgit v1.2.3 From abaf1f42ddd070662fb419aed29c985ea209bd88 Mon Sep 17 00:00:00 2001 From: Josh Poimboeuf Date: Wed, 17 Sep 2025 09:04:06 -0700 Subject: livepatch/klp-build: Introduce fix-patch-lines script to avoid __LINE__ diff noise The __LINE__ macro creates challenges for binary diffing. When a .patch file adds or removes lines, it shifts the line numbers for all code below it. This can cause the code generation of functions using __LINE__ to change due to the line number constant being embedded in a MOV instruction, despite there being no semantic difference. Avoid such false positives by adding a fix-patch-lines script which can be used to insert a #line directive in each patch hunk affecting the line numbering. This script will be used by klp-build, which will be introduced in a subsequent patch. Acked-by: Petr Mladek Tested-by: Joe Lawrence Signed-off-by: Josh Poimboeuf --- scripts/livepatch/fix-patch-lines | 79 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 79 insertions(+) create mode 100755 scripts/livepatch/fix-patch-lines (limited to 'scripts') diff --git a/scripts/livepatch/fix-patch-lines b/scripts/livepatch/fix-patch-lines new file mode 100755 index 000000000000..73c5e3dea46e --- /dev/null +++ b/scripts/livepatch/fix-patch-lines @@ -0,0 +1,79 @@ +#!/usr/bin/awk -f +# SPDX-License-Identifier: GPL-2.0 +# +# Use #line directives to preserve original __LINE__ numbers across patches to +# avoid unwanted compilation changes. + +BEGIN { + in_hunk = 0 + skip = 0 +} + +/^--- / { + skip = $2 !~ /\.(c|h)$/ + print + next +} + +/^@@/ { + if (skip) { + print + next + } + + in_hunk = 1 + + # for @@ -1,3 +1,4 @@: + # 1: line number in old file + # 3: how many lines the hunk covers in old file + # 1: line number in new file + # 4: how many lines the hunk covers in new file + + match($0, /^@@ -([0-9]+)(,([0-9]+))? \+([0-9]+)(,([0-9]+))? @@/, m) + + # Set 'cur' to the old file's line number at the start of the hunk. It + # gets incremented for every context line and every line removal, so + # that it always represents the old file's current line number. + cur = m[1] + + # last = last line number of current hunk + last = cur + (m[3] ? m[3] : 1) - 1 + + need_line_directive = 0 + + print + next +} + +{ + if (skip || !in_hunk || $0 ~ /^\\ No newline at end of file/) { + print + next + } + + # change line + if ($0 ~ /^[+-]/) { + # inject #line after this group of changes + need_line_directive = 1 + + if ($0 ~ /^-/) + cur++ + + print + next + } + + # If this is the first context line after a group of changes, inject + # the #line directive to force the compiler to correct the line + # numbering to match the original file. + if (need_line_directive) { + print "+#line " cur + need_line_directive = 0 + } + + if (cur == last) + in_hunk = 0 + + cur++ + print +} -- cgit v1.2.3 From 59adee07b568fb78e2bf07df1f22f3fe45b7240a Mon Sep 17 00:00:00 2001 From: Josh Poimboeuf Date: Wed, 17 Sep 2025 09:04:07 -0700 Subject: livepatch/klp-build: Add stub init code for livepatch modules Add a module initialization stub which can be linked with binary diff objects to produce a livepatch module. Acked-by: Petr Mladek Tested-by: Joe Lawrence Signed-off-by: Josh Poimboeuf --- scripts/livepatch/init.c | 108 +++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 108 insertions(+) create mode 100644 scripts/livepatch/init.c (limited to 'scripts') diff --git a/scripts/livepatch/init.c b/scripts/livepatch/init.c new file mode 100644 index 000000000000..2274d8f5a482 --- /dev/null +++ b/scripts/livepatch/init.c @@ -0,0 +1,108 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Init code for a livepatch kernel module + */ + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + +#include +#include +#include + +extern struct klp_object_ext __start_klp_objects[]; +extern struct klp_object_ext __stop_klp_objects[]; + +static struct klp_patch *patch; + +static int __init livepatch_mod_init(void) +{ + struct klp_object *objs; + unsigned int nr_objs; + int ret; + + nr_objs = __stop_klp_objects - __start_klp_objects; + + if (!nr_objs) { + pr_err("nothing to patch!\n"); + ret = -EINVAL; + goto err; + } + + patch = kzalloc(sizeof(*patch), GFP_KERNEL); + if (!patch) { + ret = -ENOMEM; + goto err; + } + + objs = kzalloc(sizeof(struct klp_object) * (nr_objs + 1), GFP_KERNEL); + if (!objs) { + ret = -ENOMEM; + goto err_free_patch; + } + + for (int i = 0; i < nr_objs; i++) { + struct klp_object_ext *obj_ext = __start_klp_objects + i; + struct klp_func_ext *funcs_ext = obj_ext->funcs; + unsigned int nr_funcs = obj_ext->nr_funcs; + struct klp_func *funcs = objs[i].funcs; + struct klp_object *obj = objs + i; + + funcs = kzalloc(sizeof(struct klp_func) * (nr_funcs + 1), GFP_KERNEL); + if (!funcs) { + ret = -ENOMEM; + for (int j = 0; j < i; j++) + kfree(objs[i].funcs); + goto err_free_objs; + } + + for (int j = 0; j < nr_funcs; j++) { + funcs[j].old_name = funcs_ext[j].old_name; + funcs[j].new_func = funcs_ext[j].new_func; + funcs[j].old_sympos = funcs_ext[j].sympos; + } + + obj->name = obj_ext->name; + obj->funcs = funcs; + + memcpy(&obj->callbacks, &obj_ext->callbacks, sizeof(struct klp_callbacks)); + } + + patch->mod = THIS_MODULE; + patch->objs = objs; + + /* TODO patch->states */ + +#ifdef KLP_NO_REPLACE + patch->replace = false; +#else + patch->replace = true; +#endif + + return klp_enable_patch(patch); + +err_free_objs: + kfree(objs); +err_free_patch: + kfree(patch); +err: + return ret; +} + +static void __exit livepatch_mod_exit(void) +{ + unsigned int nr_objs; + + nr_objs = __stop_klp_objects - __start_klp_objects; + + for (int i = 0; i < nr_objs; i++) + kfree(patch->objs[i].funcs); + + kfree(patch->objs); + kfree(patch); +} + +module_init(livepatch_mod_init); +module_exit(livepatch_mod_exit); +MODULE_LICENSE("GPL"); +MODULE_INFO(livepatch, "Y"); +MODULE_DESCRIPTION("Livepatch module"); -- cgit v1.2.3 From 24ebfcd65a871df4555b98c49c9ed9a92f146113 Mon Sep 17 00:00:00 2001 From: Josh Poimboeuf Date: Wed, 17 Sep 2025 09:04:08 -0700 Subject: livepatch/klp-build: Introduce klp-build script for generating livepatch modules Add a klp-build script which automates the generation of a livepatch module from a source .patch file by performing the following steps: - Builds an original kernel with -function-sections and -fdata-sections, plus objtool function checksumming. - Applies the .patch file and rebuilds the kernel using the same options. - Runs 'objtool klp diff' to detect changed functions and generate intermediate binary diff objects. - Builds a kernel module which links the diff objects with some livepatch module init code (scripts/livepatch/init.c). - Finalizes the livepatch module (aka work around linker wreckage) using 'objtool klp post-link'. Acked-by: Petr Mladek Tested-by: Joe Lawrence Signed-off-by: Josh Poimboeuf --- scripts/Makefile.lib | 1 + scripts/livepatch/fix-patch-lines | 2 +- scripts/livepatch/klp-build | 743 ++++++++++++++++++++++++++++++++++++++ 3 files changed, 745 insertions(+), 1 deletion(-) create mode 100755 scripts/livepatch/klp-build (limited to 'scripts') diff --git a/scripts/Makefile.lib b/scripts/Makefile.lib index 28a1c08e3b22..f4b33919ec37 100644 --- a/scripts/Makefile.lib +++ b/scripts/Makefile.lib @@ -173,6 +173,7 @@ ifdef CONFIG_OBJTOOL objtool := $(objtree)/tools/objtool/objtool +objtool-args-$(CONFIG_KLP_BUILD) += --checksum objtool-args-$(CONFIG_HAVE_JUMP_LABEL_HACK) += --hacks=jump_label objtool-args-$(CONFIG_HAVE_NOINSTR_HACK) += --hacks=noinstr objtool-args-$(CONFIG_MITIGATION_CALL_DEPTH_TRACKING) += --hacks=skylake diff --git a/scripts/livepatch/fix-patch-lines b/scripts/livepatch/fix-patch-lines index 73c5e3dea46e..fa7d4f6592e6 100755 --- a/scripts/livepatch/fix-patch-lines +++ b/scripts/livepatch/fix-patch-lines @@ -23,7 +23,7 @@ BEGIN { in_hunk = 1 - # for @@ -1,3 +1,4 @@: + # @@ -1,3 +1,4 @@: # 1: line number in old file # 3: how many lines the hunk covers in old file # 1: line number in new file diff --git a/scripts/livepatch/klp-build b/scripts/livepatch/klp-build new file mode 100755 index 000000000000..01ed0b66bfaf --- /dev/null +++ b/scripts/livepatch/klp-build @@ -0,0 +1,743 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +# +# Build a livepatch module + +# shellcheck disable=SC1090,SC2155 + +if (( BASH_VERSINFO[0] < 4 || \ + (BASH_VERSINFO[0] == 4 && BASH_VERSINFO[1] < 4) )); then + echo "error: this script requires bash 4.4+" >&2 + exit 1 +fi + +set -o errexit +set -o errtrace +set -o pipefail +set -o nounset + +# Allow doing 'cmd | mapfile -t array' instead of 'mapfile -t array < <(cmd)'. +# This helps keep execution in pipes so pipefail+errexit can catch errors. +shopt -s lastpipe + +unset SKIP_CLEANUP XTRACE + +REPLACE=1 +SHORT_CIRCUIT=0 +JOBS="$(getconf _NPROCESSORS_ONLN)" +VERBOSE="-s" +shopt -o xtrace | grep -q 'on' && XTRACE=1 + +# Avoid removing the previous $TMP_DIR until args have been fully processed. +KEEP_TMP=1 + +SCRIPT="$(basename "$0")" +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +FIX_PATCH_LINES="$SCRIPT_DIR/fix-patch-lines" + +SRC="$(pwd)" +OBJ="$(pwd)" + +CONFIG="$OBJ/.config" +TMP_DIR="$OBJ/klp-tmp" + +ORIG_DIR="$TMP_DIR/orig" +PATCHED_DIR="$TMP_DIR/patched" +DIFF_DIR="$TMP_DIR/diff" +KMOD_DIR="$TMP_DIR/kmod" + +STASH_DIR="$TMP_DIR/stash" +TIMESTAMP="$TMP_DIR/timestamp" +PATCH_TMP_DIR="$TMP_DIR/tmp" + +KLP_DIFF_LOG="$DIFF_DIR/diff.log" + +grep0() { + command grep "$@" || true +} + +status() { + echo "$*" +} + +warn() { + echo "error: $SCRIPT: $*" >&2 +} + +die() { + warn "$@" + exit 1 +} + +declare -a STASHED_FILES + +stash_file() { + local file="$1" + local rel_file="${file#"$SRC"/}" + + [[ ! -e "$file" ]] && die "no file to stash: $file" + + mkdir -p "$STASH_DIR/$(dirname "$rel_file")" + cp -f "$file" "$STASH_DIR/$rel_file" + + STASHED_FILES+=("$rel_file") +} + +restore_files() { + local file + + for file in "${STASHED_FILES[@]}"; do + mv -f "$STASH_DIR/$file" "$SRC/$file" || warn "can't restore file: $file" + done + + STASHED_FILES=() +} + +cleanup() { + set +o nounset + revert_patches "--recount" + restore_files + [[ "$KEEP_TMP" -eq 0 ]] && rm -rf "$TMP_DIR" + return 0 +} + +trap_err() { + warn "line ${BASH_LINENO[0]}: '$BASH_COMMAND'" +} + +trap cleanup EXIT INT TERM HUP +trap trap_err ERR + +__usage() { + cat < Build jobs to run simultaneously [default: $JOBS] + -o, --output= Output file [default: livepatch-.ko] + --no-replace Disable livepatch atomic replace + -v, --verbose Pass V=1 to kernel/module builds + +Advanced Options: + -S, --short-circuit=STEP Start at build step (requires prior --keep-tmp) + 1|orig Build original kernel (default) + 2|patched Build patched kernel + 3|diff Diff objects + 4|kmod Build patch module + -T, --keep-tmp Preserve tmp dir on exit + +EOF +} + +usage() { + __usage >&2 +} + +process_args() { + local keep_tmp=0 + local short + local long + local args + + short="hj:o:vS:T" + long="help,jobs:,output:,no-replace,verbose,short-circuit:,keep-tmp" + + args=$(getopt --options "$short" --longoptions "$long" -- "$@") || { + echo; usage; exit + } + eval set -- "$args" + + while true; do + case "$1" in + -h | --help) + usage + exit 0 + ;; + -j | --jobs) + JOBS="$2" + shift 2 + ;; + -o | --output) + [[ "$2" != *.ko ]] && die "output filename should end with .ko" + OUTFILE="$2" + NAME="$(basename "$OUTFILE")" + NAME="${NAME%.ko}" + NAME="$(module_name_string "$NAME")" + shift 2 + ;; + --no-replace) + REPLACE=0 + shift + ;; + -v | --verbose) + VERBOSE="V=1" + shift + ;; + -S | --short-circuit) + [[ ! -d "$TMP_DIR" ]] && die "--short-circuit requires preserved klp-tmp dir" + keep_tmp=1 + case "$2" in + 1 | orig) SHORT_CIRCUIT=1; ;; + 2 | patched) SHORT_CIRCUIT=2; ;; + 3 | diff) SHORT_CIRCUIT=3; ;; + 4 | mod) SHORT_CIRCUIT=4; ;; + *) die "invalid short-circuit step '$2'" ;; + esac + shift 2 + ;; + -T | --keep-tmp) + keep_tmp=1 + shift + ;; + --) + shift + break + ;; + *) + usage + exit 1 + ;; + esac + done + + if [[ $# -eq 0 ]]; then + usage + exit 1 + fi + + KEEP_TMP="$keep_tmp" + PATCHES=("$@") +} + +# temporarily disable xtrace for especially verbose code +xtrace_save() { + [[ -v XTRACE ]] && set +x + return 0 +} + +xtrace_restore() { + [[ -v XTRACE ]] && set -x + return 0 +} + +validate_config() { + xtrace_save "reading .config" + source "$CONFIG" || die "no .config file in $(dirname "$CONFIG")" + xtrace_restore + + [[ -v CONFIG_LIVEPATCH ]] || \ + die "CONFIG_LIVEPATCH not enabled" + + [[ -v CONFIG_KLP_BUILD ]] || \ + die "CONFIG_KLP_BUILD not enabled" + + [[ -v CONFIG_GCC_PLUGIN_LATENT_ENTROPY ]] && \ + die "kernel option 'CONFIG_GCC_PLUGIN_LATENT_ENTROPY' not supported" + + [[ -v CONFIG_GCC_PLUGIN_RANDSTRUCT ]] && \ + die "kernel option 'CONFIG_GCC_PLUGIN_RANDSTRUCT' not supported" + + return 0 +} + +# Only allow alphanumerics and '_' and '-' in the module name. Everything else +# is replaced with '-'. Also truncate to 55 chars so the full name + NUL +# terminator fits in the kernel's 56-byte module name array. +module_name_string() { + echo "${1//[^a-zA-Z0-9_-]/-}" | cut -c 1-55 +} + +# If the module name wasn't specified on the cmdline with --output, give it a +# name based on the patch name. +set_module_name() { + [[ -v NAME ]] && return 0 + + if [[ "${#PATCHES[@]}" -eq 1 ]]; then + NAME="$(basename "${PATCHES[0]}")" + NAME="${NAME%.*}" + else + NAME="patch" + fi + + NAME="livepatch-$NAME" + NAME="$(module_name_string "$NAME")" + + OUTFILE="$NAME.ko" +} + +# Hardcode the value printed by the localversion script to prevent patch +# application from appending it with '+' due to a dirty git working tree. +set_kernelversion() { + local file="$SRC/scripts/setlocalversion" + local localversion + + stash_file "$file" + + localversion="$(cd "$SRC" && make --no-print-directory kernelversion)" + localversion="$(cd "$SRC" && KERNELVERSION="$localversion" ./scripts/setlocalversion)" + [[ -z "$localversion" ]] && die "setlocalversion failed" + + sed -i "2i echo $localversion; exit 0" scripts/setlocalversion +} + +get_patch_files() { + local patch="$1" + + grep0 -E '^(--- |\+\+\+ )' "$patch" \ + | gawk '{print $2}' \ + | sed 's|^[^/]*/||' \ + | sort -u +} + +# Make sure git re-stats the changed files +git_refresh() { + local patch="$1" + local files=() + + [[ ! -e "$SRC/.git" ]] && return + + get_patch_files "$patch" | mapfile -t files + + ( + cd "$SRC" + git update-index -q --refresh -- "${files[@]}" + ) +} + +check_unsupported_patches() { + local patch + + for patch in "${PATCHES[@]}"; do + local files=() + + get_patch_files "$patch" | mapfile -t files + + for file in "${files[@]}"; do + case "$file" in + lib/*|*.S) + die "unsupported patch to $file" + ;; + esac + done + done +} + +apply_patch() { + local patch="$1" + shift + local extra_args=("$@") + + [[ ! -f "$patch" ]] && die "$patch doesn't exist" + + ( + cd "$SRC" + + # The sed strips the version signature from 'git format-patch', + # otherwise 'git apply --recount' warns. + sed -n '/^-- /q;p' "$patch" | + git apply "${extra_args[@]}" + ) + + APPLIED_PATCHES+=("$patch") +} + +revert_patch() { + local patch="$1" + shift + local extra_args=("$@") + local tmp=() + + ( + cd "$SRC" + + sed -n '/^-- /q;p' "$patch" | + git apply --reverse "${extra_args[@]}" + ) + git_refresh "$patch" + + for p in "${APPLIED_PATCHES[@]}"; do + [[ "$p" == "$patch" ]] && continue + tmp+=("$p") + done + + APPLIED_PATCHES=("${tmp[@]}") +} + +apply_patches() { + local patch + + for patch in "${PATCHES[@]}"; do + apply_patch "$patch" + done +} + +revert_patches() { + local extra_args=("$@") + local patches=("${APPLIED_PATCHES[@]}") + + for (( i=${#patches[@]}-1 ; i>=0 ; i-- )) ; do + revert_patch "${patches[$i]}" "${extra_args[@]}" + done + + APPLIED_PATCHES=() +} + +validate_patches() { + check_unsupported_patches + apply_patches + revert_patches +} + +do_init() { + # We're not yet smart enough to handle anything other than in-tree + # builds in pwd. + [[ ! "$SRC" -ef "$SCRIPT_DIR/../.." ]] && die "please run from the kernel root directory" + [[ ! "$OBJ" -ef "$SCRIPT_DIR/../.." ]] && die "please run from the kernel root directory" + + (( SHORT_CIRCUIT <= 1 )) && rm -rf "$TMP_DIR" + mkdir -p "$TMP_DIR" + + APPLIED_PATCHES=() + + [[ -x "$FIX_PATCH_LINES" ]] || die "can't find fix-patch-lines" + + validate_config + set_module_name + set_kernelversion +} + +# Refresh the patch hunk headers, specifically the line numbers and counts. +refresh_patch() { + local patch="$1" + local tmpdir="$PATCH_TMP_DIR" + local files=() + + rm -rf "$tmpdir" + mkdir -p "$tmpdir/a" + mkdir -p "$tmpdir/b" + + # Get all source files affected by the patch + get_patch_files "$patch" | mapfile -t files + + # Copy orig source files to 'a' + ( cd "$SRC" && echo "${files[@]}" | xargs cp --parents --target-directory="$tmpdir/a" ) + + # Copy patched source files to 'b' + apply_patch "$patch" --recount + ( cd "$SRC" && echo "${files[@]}" | xargs cp --parents --target-directory="$tmpdir/b" ) + revert_patch "$patch" --recount + + # Diff 'a' and 'b' to make a clean patch + ( cd "$tmpdir" && git diff --no-index --no-prefix a b > "$patch" ) || true +} + +# Copy the patches to a temporary directory, fix their lines so as not to +# affect the __LINE__ macro for otherwise unchanged functions further down the +# file, and update $PATCHES to point to the fixed patches. +fix_patches() { + local idx + local i + + rm -f "$TMP_DIR"/*.patch + + idx=0001 + for i in "${!PATCHES[@]}"; do + local old_patch="${PATCHES[$i]}" + local tmp_patch="$TMP_DIR/tmp.patch" + local patch="${PATCHES[$i]}" + local new_patch + + new_patch="$TMP_DIR/$idx-fixed-$(basename "$patch")" + + cp -f "$old_patch" "$tmp_patch" + refresh_patch "$tmp_patch" + "$FIX_PATCH_LINES" "$tmp_patch" > "$new_patch" + refresh_patch "$new_patch" + + PATCHES[i]="$new_patch" + + rm -f "$tmp_patch" + idx=$(printf "%04d" $(( 10#$idx + 1 ))) + done +} + +clean_kernel() { + local cmd=() + + cmd=("make") + cmd+=("--silent") + cmd+=("-j$JOBS") + cmd+=("clean") + + ( + cd "$SRC" + "${cmd[@]}" + ) +} + +build_kernel() { + local log="$TMP_DIR/build.log" + local cmd=() + + cmd=("make") + + # When a patch to a kernel module references a newly created unexported + # symbol which lives in vmlinux or another kernel module, the patched + # kernel build fails with the following error: + # + # ERROR: modpost: "klp_string" [fs/xfs/xfs.ko] undefined! + # + # The undefined symbols are working as designed in that case. They get + # resolved later when the livepatch module build link pulls all the + # disparate objects together into the same kernel module. + # + # It would be good to have a way to tell modpost to skip checking for + # undefined symbols altogether. For now, just convert the error to a + # warning with KBUILD_MODPOST_WARN, and grep out the warning to avoid + # confusing the user. + # + cmd+=("KBUILD_MODPOST_WARN=1") + + cmd+=("$VERBOSE") + cmd+=("-j$JOBS") + cmd+=("KCFLAGS=-ffunction-sections -fdata-sections") + cmd+=("vmlinux") + cmd+=("modules") + + ( + cd "$SRC" + "${cmd[@]}" \ + 1> >(tee -a "$log") \ + 2> >(tee -a "$log" | grep0 -v "modpost.*undefined!" >&2) + ) +} + +find_objects() { + local opts=("$@") + + # Find root-level vmlinux.o and non-root-level .ko files, + # excluding klp-tmp/ and .git/ + find "$OBJ" \( -path "$TMP_DIR" -o -path "$OBJ/.git" -o -regex "$OBJ/[^/][^/]*\.ko" \) -prune -o \ + -type f "${opts[@]}" \ + \( -name "*.ko" -o -path "$OBJ/vmlinux.o" \) \ + -printf '%P\n' +} + +# Copy all .o archives to $ORIG_DIR +copy_orig_objects() { + local files=() + + rm -rf "$ORIG_DIR" + mkdir -p "$ORIG_DIR" + + find_objects | mapfile -t files + + xtrace_save "copying orig objects" + for _file in "${files[@]}"; do + local rel_file="${_file/.ko/.o}" + local file="$OBJ/$rel_file" + local file_dir="$(dirname "$file")" + local orig_file="$ORIG_DIR/$rel_file" + local orig_dir="$(dirname "$orig_file")" + local cmd_file="$file_dir/.$(basename "$file").cmd" + + [[ ! -f "$file" ]] && die "missing $(basename "$file") for $_file" + + mkdir -p "$orig_dir" + cp -f "$file" "$orig_dir" + [[ -e "$cmd_file" ]] && cp -f "$cmd_file" "$orig_dir" + done + xtrace_restore + + mv -f "$TMP_DIR/build.log" "$ORIG_DIR" + touch "$TIMESTAMP" +} + +# Copy all changed objects to $PATCHED_DIR +copy_patched_objects() { + local files=() + local opts=() + local found=0 + + rm -rf "$PATCHED_DIR" + mkdir -p "$PATCHED_DIR" + + # Note this doesn't work with some configs, thus the 'cmp' below. + opts=("-newer") + opts+=("$TIMESTAMP") + + find_objects "${opts[@]}" | mapfile -t files + + xtrace_save "copying changed objects" + for _file in "${files[@]}"; do + local rel_file="${_file/.ko/.o}" + local file="$OBJ/$rel_file" + local orig_file="$ORIG_DIR/$rel_file" + local patched_file="$PATCHED_DIR/$rel_file" + local patched_dir="$(dirname "$patched_file")" + + [[ ! -f "$file" ]] && die "missing $(basename "$file") for $_file" + + cmp -s "$orig_file" "$file" && continue + + mkdir -p "$patched_dir" + cp -f "$file" "$patched_dir" + found=1 + done + xtrace_restore + + (( found == 0 )) && die "no changes detected" + + mv -f "$TMP_DIR/build.log" "$PATCHED_DIR" +} + +# Diff changed objects, writing output object to $DIFF_DIR +diff_objects() { + local log="$KLP_DIFF_LOG" + local files=() + + rm -rf "$DIFF_DIR" + mkdir -p "$DIFF_DIR" + + find "$PATCHED_DIR" -type f -name "*.o" | mapfile -t files + [[ ${#files[@]} -eq 0 ]] && die "no changes detected" + + # Diff all changed objects + for file in "${files[@]}"; do + local rel_file="${file#"$PATCHED_DIR"/}" + local orig_file="$rel_file" + local patched_file="$PATCHED_DIR/$rel_file" + local out_file="$DIFF_DIR/$rel_file" + local cmd=() + + mkdir -p "$(dirname "$out_file")" + + cmd=("$SRC/tools/objtool/objtool") + cmd+=("klp") + cmd+=("diff") + cmd+=("$orig_file") + cmd+=("$patched_file") + cmd+=("$out_file") + + ( + cd "$ORIG_DIR" + "${cmd[@]}" \ + 1> >(tee -a "$log") \ + 2> >(tee -a "$log" >&2) || \ + die "objtool klp diff failed" + ) + done +} + +# Build and post-process livepatch module in $KMOD_DIR +build_patch_module() { + local makefile="$KMOD_DIR/Kbuild" + local log="$KMOD_DIR/build.log" + local kmod_file + local cflags=() + local files=() + local cmd=() + + rm -rf "$KMOD_DIR" + mkdir -p "$KMOD_DIR" + + cp -f "$SRC/scripts/livepatch/init.c" "$KMOD_DIR" + + echo "obj-m := $NAME.o" > "$makefile" + echo -n "$NAME-y := init.o" >> "$makefile" + + find "$DIFF_DIR" -type f -name "*.o" | mapfile -t files + [[ ${#files[@]} -eq 0 ]] && die "no changes detected" + + for file in "${files[@]}"; do + local rel_file="${file#"$DIFF_DIR"/}" + local orig_file="$ORIG_DIR/$rel_file" + local orig_dir="$(dirname "$orig_file")" + local kmod_file="$KMOD_DIR/$rel_file" + local kmod_dir="$(dirname "$kmod_file")" + local cmd_file="$orig_dir/.$(basename "$file").cmd" + + mkdir -p "$kmod_dir" + cp -f "$file" "$kmod_dir" + [[ -e "$cmd_file" ]] && cp -f "$cmd_file" "$kmod_dir" + + # Tell kbuild this is a prebuilt object + cp -f "$file" "${kmod_file}_shipped" + + echo -n " $rel_file" >> "$makefile" + done + + echo >> "$makefile" + + cflags=("-ffunction-sections") + cflags+=("-fdata-sections") + [[ $REPLACE -eq 0 ]] && cflags+=("-DKLP_NO_REPLACE") + + cmd=("make") + cmd+=("$VERBOSE") + cmd+=("-j$JOBS") + cmd+=("--directory=.") + cmd+=("M=$KMOD_DIR") + cmd+=("KCFLAGS=${cflags[*]}") + + # Build a "normal" kernel module with init.c and the diffed objects + ( + cd "$SRC" + "${cmd[@]}" \ + 1> >(tee -a "$log") \ + 2> >(tee -a "$log" >&2) + ) + + kmod_file="$KMOD_DIR/$NAME.ko" + + # Save off the intermediate binary for debugging + cp -f "$kmod_file" "$kmod_file.orig" + + # Work around issue where slight .config change makes corrupt BTF + objcopy --remove-section=.BTF "$kmod_file" + + # Fix (and work around) linker wreckage for klp syms / relocs + "$SRC/tools/objtool/objtool" klp post-link "$kmod_file" || die "objtool klp post-link failed" + + cp -f "$kmod_file" "$OUTFILE" +} + + +################################################################################ + +process_args "$@" +do_init + +if (( SHORT_CIRCUIT <= 1 )); then + status "Validating patch(es)" + validate_patches + status "Building original kernel" + clean_kernel + build_kernel + status "Copying original object files" + copy_orig_objects +fi + +if (( SHORT_CIRCUIT <= 2 )); then + status "Fixing patch(es)" + fix_patches + apply_patches + status "Building patched kernel" + build_kernel + revert_patches + status "Copying patched object files" + copy_patched_objects +fi + +if (( SHORT_CIRCUIT <= 3 )); then + status "Diffing objects" + diff_objects +fi + +if (( SHORT_CIRCUIT <= 4 )); then + status "Building patch module: $OUTFILE" + build_patch_module +fi + +status "SUCCESS" -- cgit v1.2.3 From 2c2f0b8626917c48e4b12827d296a3c654612b90 Mon Sep 17 00:00:00 2001 From: Josh Poimboeuf Date: Wed, 17 Sep 2025 09:04:09 -0700 Subject: livepatch/klp-build: Add --debug option to show cloning decisions Add a --debug option which gets passed to "objtool klp diff" to enable debug output related to cloning decisions. Acked-by: Petr Mladek Tested-by: Joe Lawrence Signed-off-by: Josh Poimboeuf --- scripts/livepatch/klp-build | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) (limited to 'scripts') diff --git a/scripts/livepatch/klp-build b/scripts/livepatch/klp-build index 01ed0b66bfaf..28ee259ce5f6 100755 --- a/scripts/livepatch/klp-build +++ b/scripts/livepatch/klp-build @@ -20,7 +20,7 @@ set -o nounset # This helps keep execution in pipes so pipefail+errexit can catch errors. shopt -s lastpipe -unset SKIP_CLEANUP XTRACE +unset DEBUG_CLONE SKIP_CLEANUP XTRACE REPLACE=1 SHORT_CIRCUIT=0 @@ -120,6 +120,7 @@ Options: -v, --verbose Pass V=1 to kernel/module builds Advanced Options: + -d, --debug Show symbol/reloc cloning decisions -S, --short-circuit=STEP Start at build step (requires prior --keep-tmp) 1|orig Build original kernel (default) 2|patched Build patched kernel @@ -140,8 +141,8 @@ process_args() { local long local args - short="hj:o:vS:T" - long="help,jobs:,output:,no-replace,verbose,short-circuit:,keep-tmp" + short="hj:o:vdS:T" + long="help,jobs:,output:,no-replace,verbose,debug,short-circuit:,keep-tmp" args=$(getopt --options "$short" --longoptions "$long" -- "$@") || { echo; usage; exit @@ -174,6 +175,11 @@ process_args() { VERBOSE="V=1" shift ;; + -d | --debug) + DEBUG_CLONE=1 + keep_tmp=1 + shift + ;; -S | --short-circuit) [[ ! -d "$TMP_DIR" ]] && die "--short-circuit requires preserved klp-tmp dir" keep_tmp=1 @@ -596,6 +602,7 @@ copy_patched_objects() { diff_objects() { local log="$KLP_DIFF_LOG" local files=() + local opts=() rm -rf "$DIFF_DIR" mkdir -p "$DIFF_DIR" @@ -603,6 +610,8 @@ diff_objects() { find "$PATCHED_DIR" -type f -name "*.o" | mapfile -t files [[ ${#files[@]} -eq 0 ]] && die "no changes detected" + [[ -v DEBUG_CLONE ]] && opts=("--debug") + # Diff all changed objects for file in "${files[@]}"; do local rel_file="${file#"$PATCHED_DIR"/}" @@ -616,6 +625,7 @@ diff_objects() { cmd=("$SRC/tools/objtool/objtool") cmd+=("klp") cmd+=("diff") + (( ${#opts[@]} > 0 )) && cmd+=("${opts[@]}") cmd+=("$orig_file") cmd+=("$patched_file") cmd+=("$out_file") -- cgit v1.2.3 From 78be9facfb5e711e5284ef1856401ea909eceeb2 Mon Sep 17 00:00:00 2001 From: Josh Poimboeuf Date: Wed, 17 Sep 2025 09:04:10 -0700 Subject: livepatch/klp-build: Add --show-first-changed option to show function divergence Add a --show-first-changed option to identify where changed functions begin to diverge: - Parse 'objtool klp diff' output to find changed functions. - Run objtool again on each object with --debug-checksum=. - Diff the per-instruction checksum debug output to locate the first differing instruction. This can be useful for quickly determining where and why a function changed. Acked-by: Petr Mladek Tested-by: Joe Lawrence Signed-off-by: Josh Poimboeuf --- scripts/livepatch/klp-build | 82 ++++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 78 insertions(+), 4 deletions(-) (limited to 'scripts') diff --git a/scripts/livepatch/klp-build b/scripts/livepatch/klp-build index 28ee259ce5f6..881e052e7fae 100755 --- a/scripts/livepatch/klp-build +++ b/scripts/livepatch/klp-build @@ -20,7 +20,7 @@ set -o nounset # This helps keep execution in pipes so pipefail+errexit can catch errors. shopt -s lastpipe -unset DEBUG_CLONE SKIP_CLEANUP XTRACE +unset DEBUG_CLONE DIFF_CHECKSUM SKIP_CLEANUP XTRACE REPLACE=1 SHORT_CIRCUIT=0 @@ -114,6 +114,7 @@ Usage: $SCRIPT [OPTIONS] PATCH_FILE(s) Generate a livepatch module. Options: + -f, --show-first-changed Show address of first changed instruction -j, --jobs= Build jobs to run simultaneously [default: $JOBS] -o, --output= Output file [default: livepatch-.ko] --no-replace Disable livepatch atomic replace @@ -141,8 +142,8 @@ process_args() { local long local args - short="hj:o:vdS:T" - long="help,jobs:,output:,no-replace,verbose,debug,short-circuit:,keep-tmp" + short="hfj:o:vdS:T" + long="help,show-first-changed,jobs:,output:,no-replace,verbose,debug,short-circuit:,keep-tmp" args=$(getopt --options "$short" --longoptions "$long" -- "$@") || { echo; usage; exit @@ -155,6 +156,10 @@ process_args() { usage exit 0 ;; + -f | --show-first-changed) + DIFF_CHECKSUM=1 + shift + ;; -j | --jobs) JOBS="$2" shift 2 @@ -618,6 +623,7 @@ diff_objects() { local orig_file="$rel_file" local patched_file="$PATCHED_DIR/$rel_file" local out_file="$DIFF_DIR/$rel_file" + local filter=() local cmd=() mkdir -p "$(dirname "$out_file")" @@ -630,16 +636,80 @@ diff_objects() { cmd+=("$patched_file") cmd+=("$out_file") + if [[ -v DIFF_CHECKSUM ]]; then + filter=("grep0") + filter+=("-Ev") + filter+=("DEBUG: .*checksum: ") + else + filter=("cat") + fi + ( cd "$ORIG_DIR" "${cmd[@]}" \ 1> >(tee -a "$log") \ - 2> >(tee -a "$log" >&2) || \ + 2> >(tee -a "$log" | "${filter[@]}" >&2) || \ die "objtool klp diff failed" ) done } +# For each changed object, run objtool with --debug-checksum to get the +# per-instruction checksums, and then diff those to find the first changed +# instruction for each function. +diff_checksums() { + local orig_log="$ORIG_DIR/checksum.log" + local patched_log="$PATCHED_DIR/checksum.log" + local -A funcs + local cmd=() + local line + local file + local func + + gawk '/\.o: changed function: / { + sub(/:$/, "", $1) + print $1, $NF + }' "$KLP_DIFF_LOG" | mapfile -t lines + + for line in "${lines[@]}"; do + read -r file func <<< "$line" + if [[ ! -v funcs["$file"] ]]; then + funcs["$file"]="$func" + else + funcs["$file"]+=" $func" + fi + done + + cmd=("$SRC/tools/objtool/objtool") + cmd+=("--checksum") + cmd+=("--link") + cmd+=("--dry-run") + + for file in "${!funcs[@]}"; do + local opt="--debug-checksum=${funcs[$file]// /,}" + + ( + cd "$ORIG_DIR" + "${cmd[@]}" "$opt" "$file" &> "$orig_log" || \ + ( cat "$orig_log" >&2; die "objtool --debug-checksum failed" ) + + cd "$PATCHED_DIR" + "${cmd[@]}" "$opt" "$file" &> "$patched_log" || \ + ( cat "$patched_log" >&2; die "objtool --debug-checksum failed" ) + ) + + for func in ${funcs[$file]}; do + diff <( grep0 -E "^DEBUG: .*checksum: $func " "$orig_log" | sed "s|$ORIG_DIR/||") \ + <( grep0 -E "^DEBUG: .*checksum: $func " "$patched_log" | sed "s|$PATCHED_DIR/||") \ + | gawk '/^< DEBUG: / { + gsub(/:/, "") + printf "%s: %s: %s\n", $3, $5, $6 + exit + }' || true + done + done +} + # Build and post-process livepatch module in $KMOD_DIR build_patch_module() { local makefile="$KMOD_DIR/Kbuild" @@ -743,6 +813,10 @@ fi if (( SHORT_CIRCUIT <= 3 )); then status "Diffing objects" diff_objects + if [[ -v DIFF_CHECKSUM ]]; then + status "Finding first changed instructions" + diff_checksums + fi fi if (( SHORT_CIRCUIT <= 4 )); then -- cgit v1.2.3 From 44472d1b83127e579c798ff92a07ae86d98b61b9 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Mon, 6 Oct 2025 13:07:32 +0200 Subject: atomic: Skip alignment check for try_cmpxchg() old arg The 'old' argument in atomic_try_cmpxchg() and related functions is a pointer to a normal non-atomic integer number, which does not require to be naturally aligned, unlike the atomic_t/atomic64_t types themselves. In order to add an alignment check with CONFIG_DEBUG_ATOMIC into the normal instrument_atomic_read_write() helper, change this check to use the non-atomic instrument_read_write(), the same way that was done earlier for try_cmpxchg() in commit ec570320b09f ("locking/atomic: Correct (cmp)xchg() instrumentation"). This prevents warnings on m68k calling the 32-bit atomic_try_cmpxchg() with 16-bit aligned arguments as well as several more architectures including x86-32 when calling atomic64_try_cmpxchg() with 32-bit aligned u64 arguments. Reported-by: Finn Thain Signed-off-by: Arnd Bergmann Signed-off-by: Peter Zijlstra (Intel) Link: https://lore.kernel.org/all/cover.1757810729.git.fthain@linux-m68k.org/ --- scripts/atomic/gen-atomic-instrumented.sh | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) (limited to 'scripts') diff --git a/scripts/atomic/gen-atomic-instrumented.sh b/scripts/atomic/gen-atomic-instrumented.sh index 592f3ec89b5f..9c1d53f81eb2 100755 --- a/scripts/atomic/gen-atomic-instrumented.sh +++ b/scripts/atomic/gen-atomic-instrumented.sh @@ -12,7 +12,7 @@ gen_param_check() local arg="$1"; shift local type="${arg%%:*}" local name="$(gen_param_name "${arg}")" - local rw="write" + local rw="atomic_write" case "${type#c}" in i) return;; @@ -20,14 +20,17 @@ gen_param_check() if [ ${type#c} != ${type} ]; then # We don't write to constant parameters. - rw="read" + rw="atomic_read" + elif [ "${type}" = "p" ] ; then + # The "old" argument in try_cmpxchg() gets accessed non-atomically + rw="read_write" elif [ "${meta}" != "s" ]; then # An atomic RMW: if this parameter is not a constant, and this atomic is # not just a 's'tore, this parameter is both read from and written to. - rw="read_write" + rw="atomic_read_write" fi - printf "\tinstrument_atomic_${rw}(${name}, sizeof(*${name}));\n" + printf "\tinstrument_${rw}(${name}, sizeof(*${name}));\n" } #gen_params_checks(meta, arg...) -- cgit v1.2.3 From b055f4c431e3d0e0508b7541d7c3fa2f9cd2e0ab Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Tue, 21 Oct 2025 20:43:39 -0400 Subject: sorttable: Move ELF parsing into scripts/elf-parse.[ch] In order to share the elf parsing that is in sorttable.c so that other programs could use the same code, move it into elf-parse.c and elf-parse.h. Cc: Masami Hiramatsu Cc: Mark Rutland Cc: Mathieu Desnoyers Cc: Andrew Morton Cc: Arnd Bergmann Cc: Masahiro Yamada Cc: Nathan Chancellor Cc: Nicolas Schier Cc: Nick Desaulniers Cc: Catalin Marinas Cc: Linus Torvalds Cc: Randy Dunlap Cc: Stephen Rothwell Link: https://lore.kernel.org/20251022004452.752298788@kernel.org Signed-off-by: Steven Rostedt (Google) --- scripts/Makefile | 3 + scripts/elf-parse.c | 198 ++++++++++++++++++++++ scripts/elf-parse.h | 305 +++++++++++++++++++++++++++++++++ scripts/sorttable.c | 477 ++++------------------------------------------------ 4 files changed, 540 insertions(+), 443 deletions(-) create mode 100644 scripts/elf-parse.c create mode 100644 scripts/elf-parse.h (limited to 'scripts') diff --git a/scripts/Makefile b/scripts/Makefile index 46f860529df5..f19624b3ed92 100644 --- a/scripts/Makefile +++ b/scripts/Makefile @@ -12,6 +12,8 @@ hostprogs-always-$(CONFIG_SYSTEM_EXTRA_CERTIFICATE) += insert-sys-cert hostprogs-always-$(CONFIG_RUST_KERNEL_DOCTESTS) += rustdoc_test_builder hostprogs-always-$(CONFIG_RUST_KERNEL_DOCTESTS) += rustdoc_test_gen +sorttable-objs := sorttable.o elf-parse.o + ifneq ($(or $(CONFIG_X86_64),$(CONFIG_X86_32)),) always-$(CONFIG_RUST) += target.json filechk_rust_target = $< < include/config/auto.conf @@ -25,6 +27,7 @@ generate_rust_target-rust := y rustdoc_test_builder-rust := y rustdoc_test_gen-rust := y +HOSTCFLAGS_elf-parse.o = -I$(srctree)/tools/include HOSTCFLAGS_sorttable.o = -I$(srctree)/tools/include HOSTLDLIBS_sorttable = -lpthread HOSTCFLAGS_asn1_compiler.o = -I$(srctree)/include diff --git a/scripts/elf-parse.c b/scripts/elf-parse.c new file mode 100644 index 000000000000..99869ff91a8c --- /dev/null +++ b/scripts/elf-parse.c @@ -0,0 +1,198 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "elf-parse.h" + +struct elf_funcs elf_parser; + +/* + * Get the whole file as a programming convenience in order to avoid + * malloc+lseek+read+free of many pieces. If successful, then mmap + * avoids copying unused pieces; else just read the whole file. + * Open for both read and write. + */ +static void *map_file(char const *fname, size_t *size) +{ + int fd; + struct stat sb; + void *addr = NULL; + + fd = open(fname, O_RDWR); + if (fd < 0) { + perror(fname); + return NULL; + } + if (fstat(fd, &sb) < 0) { + perror(fname); + goto out; + } + if (!S_ISREG(sb.st_mode)) { + fprintf(stderr, "not a regular file: %s\n", fname); + goto out; + } + + addr = mmap(0, sb.st_size, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0); + if (addr == MAP_FAILED) { + fprintf(stderr, "Could not mmap file: %s\n", fname); + goto out; + } + + *size = sb.st_size; + +out: + close(fd); + return addr; +} + +static int elf_parse(const char *fname, void *addr, uint32_t types) +{ + Elf_Ehdr *ehdr = addr; + uint16_t type; + + switch (ehdr->e32.e_ident[EI_DATA]) { + case ELFDATA2LSB: + elf_parser.r = rle; + elf_parser.r2 = r2le; + elf_parser.r8 = r8le; + elf_parser.w = wle; + elf_parser.w8 = w8le; + break; + case ELFDATA2MSB: + elf_parser.r = rbe; + elf_parser.r2 = r2be; + elf_parser.r8 = r8be; + elf_parser.w = wbe; + elf_parser.w8 = w8be; + break; + default: + fprintf(stderr, "unrecognized ELF data encoding %d: %s\n", + ehdr->e32.e_ident[EI_DATA], fname); + return -1; + } + + if (memcmp(ELFMAG, ehdr->e32.e_ident, SELFMAG) != 0 || + ehdr->e32.e_ident[EI_VERSION] != EV_CURRENT) { + fprintf(stderr, "unrecognized ELF file %s\n", fname); + return -1; + } + + type = elf_parser.r2(&ehdr->e32.e_type); + if (!((1 << type) & types)) { + fprintf(stderr, "Invalid ELF type file %s\n", fname); + return -1; + } + + switch (ehdr->e32.e_ident[EI_CLASS]) { + case ELFCLASS32: { + elf_parser.ehdr_shoff = ehdr32_shoff; + elf_parser.ehdr_shentsize = ehdr32_shentsize; + elf_parser.ehdr_shstrndx = ehdr32_shstrndx; + elf_parser.ehdr_shnum = ehdr32_shnum; + elf_parser.shdr_addr = shdr32_addr; + elf_parser.shdr_offset = shdr32_offset; + elf_parser.shdr_link = shdr32_link; + elf_parser.shdr_size = shdr32_size; + elf_parser.shdr_name = shdr32_name; + elf_parser.shdr_type = shdr32_type; + elf_parser.shdr_entsize = shdr32_entsize; + elf_parser.sym_type = sym32_type; + elf_parser.sym_name = sym32_name; + elf_parser.sym_value = sym32_value; + elf_parser.sym_shndx = sym32_shndx; + elf_parser.rela_offset = rela32_offset; + elf_parser.rela_info = rela32_info; + elf_parser.rela_addend = rela32_addend; + elf_parser.rela_write_addend = rela32_write_addend; + + if (elf_parser.r2(&ehdr->e32.e_ehsize) != sizeof(Elf32_Ehdr) || + elf_parser.r2(&ehdr->e32.e_shentsize) != sizeof(Elf32_Shdr)) { + fprintf(stderr, + "unrecognized ET_EXEC/ET_DYN file: %s\n", fname); + return -1; + } + + } + break; + case ELFCLASS64: { + elf_parser.ehdr_shoff = ehdr64_shoff; + elf_parser.ehdr_shentsize = ehdr64_shentsize; + elf_parser.ehdr_shstrndx = ehdr64_shstrndx; + elf_parser.ehdr_shnum = ehdr64_shnum; + elf_parser.shdr_addr = shdr64_addr; + elf_parser.shdr_offset = shdr64_offset; + elf_parser.shdr_link = shdr64_link; + elf_parser.shdr_size = shdr64_size; + elf_parser.shdr_name = shdr64_name; + elf_parser.shdr_type = shdr64_type; + elf_parser.shdr_entsize = shdr64_entsize; + elf_parser.sym_type = sym64_type; + elf_parser.sym_name = sym64_name; + elf_parser.sym_value = sym64_value; + elf_parser.sym_shndx = sym64_shndx; + elf_parser.rela_offset = rela64_offset; + elf_parser.rela_info = rela64_info; + elf_parser.rela_addend = rela64_addend; + elf_parser.rela_write_addend = rela64_write_addend; + + if (elf_parser.r2(&ehdr->e64.e_ehsize) != sizeof(Elf64_Ehdr) || + elf_parser.r2(&ehdr->e64.e_shentsize) != sizeof(Elf64_Shdr)) { + fprintf(stderr, + "unrecognized ET_EXEC/ET_DYN file: %s\n", + fname); + return -1; + } + + } + break; + default: + fprintf(stderr, "unrecognized ELF class %d %s\n", + ehdr->e32.e_ident[EI_CLASS], fname); + return -1; + } + return 0; +} + +int elf_map_machine(void *addr) +{ + Elf_Ehdr *ehdr = addr; + + return elf_parser.r2(&ehdr->e32.e_machine); +} + +int elf_map_long_size(void *addr) +{ + Elf_Ehdr *ehdr = addr; + + return ehdr->e32.e_ident[EI_CLASS] == ELFCLASS32 ? 4 : 8; +} + +void *elf_map(char const *fname, size_t *size, uint32_t types) +{ + void *addr; + int ret; + + addr = map_file(fname, size); + if (!addr) + return NULL; + + ret = elf_parse(fname, addr, types); + if (ret < 0) { + elf_unmap(addr, *size); + return NULL; + } + + return addr; +} + +void elf_unmap(void *addr, size_t size) +{ + munmap(addr, size); +} diff --git a/scripts/elf-parse.h b/scripts/elf-parse.h new file mode 100644 index 000000000000..f4411e03069d --- /dev/null +++ b/scripts/elf-parse.h @@ -0,0 +1,305 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +#ifndef _SCRIPTS_ELF_PARSE_H +#define _SCRIPTS_ELF_PARSE_H + +#include + +#include +#include + +typedef union { + Elf32_Ehdr e32; + Elf64_Ehdr e64; +} Elf_Ehdr; + +typedef union { + Elf32_Shdr e32; + Elf64_Shdr e64; +} Elf_Shdr; + +typedef union { + Elf32_Sym e32; + Elf64_Sym e64; +} Elf_Sym; + +typedef union { + Elf32_Rela e32; + Elf64_Rela e64; +} Elf_Rela; + +struct elf_funcs { + int (*compare_extable)(const void *a, const void *b); + uint64_t (*ehdr_shoff)(Elf_Ehdr *ehdr); + uint16_t (*ehdr_shstrndx)(Elf_Ehdr *ehdr); + uint16_t (*ehdr_shentsize)(Elf_Ehdr *ehdr); + uint16_t (*ehdr_shnum)(Elf_Ehdr *ehdr); + uint64_t (*shdr_addr)(Elf_Shdr *shdr); + uint64_t (*shdr_offset)(Elf_Shdr *shdr); + uint64_t (*shdr_size)(Elf_Shdr *shdr); + uint64_t (*shdr_entsize)(Elf_Shdr *shdr); + uint32_t (*shdr_link)(Elf_Shdr *shdr); + uint32_t (*shdr_name)(Elf_Shdr *shdr); + uint32_t (*shdr_type)(Elf_Shdr *shdr); + uint8_t (*sym_type)(Elf_Sym *sym); + uint32_t (*sym_name)(Elf_Sym *sym); + uint64_t (*sym_value)(Elf_Sym *sym); + uint16_t (*sym_shndx)(Elf_Sym *sym); + uint64_t (*rela_offset)(Elf_Rela *rela); + uint64_t (*rela_info)(Elf_Rela *rela); + uint64_t (*rela_addend)(Elf_Rela *rela); + void (*rela_write_addend)(Elf_Rela *rela, uint64_t val); + uint32_t (*r)(const uint32_t *); + uint16_t (*r2)(const uint16_t *); + uint64_t (*r8)(const uint64_t *); + void (*w)(uint32_t, uint32_t *); + void (*w8)(uint64_t, uint64_t *); +}; + +extern struct elf_funcs elf_parser; + +static inline uint64_t ehdr64_shoff(Elf_Ehdr *ehdr) +{ + return elf_parser.r8(&ehdr->e64.e_shoff); +} + +static inline uint64_t ehdr32_shoff(Elf_Ehdr *ehdr) +{ + return elf_parser.r(&ehdr->e32.e_shoff); +} + +static inline uint64_t ehdr_shoff(Elf_Ehdr *ehdr) +{ + return elf_parser.ehdr_shoff(ehdr); +} + +#define EHDR_HALF(fn_name) \ +static inline uint16_t ehdr64_##fn_name(Elf_Ehdr *ehdr) \ +{ \ + return elf_parser.r2(&ehdr->e64.e_##fn_name); \ +} \ + \ +static inline uint16_t ehdr32_##fn_name(Elf_Ehdr *ehdr) \ +{ \ + return elf_parser.r2(&ehdr->e32.e_##fn_name); \ +} \ + \ +static inline uint16_t ehdr_##fn_name(Elf_Ehdr *ehdr) \ +{ \ + return elf_parser.ehdr_##fn_name(ehdr); \ +} + +EHDR_HALF(shentsize) +EHDR_HALF(shstrndx) +EHDR_HALF(shnum) + +#define SHDR_WORD(fn_name) \ +static inline uint32_t shdr64_##fn_name(Elf_Shdr *shdr) \ +{ \ + return elf_parser.r(&shdr->e64.sh_##fn_name); \ +} \ + \ +static inline uint32_t shdr32_##fn_name(Elf_Shdr *shdr) \ +{ \ + return elf_parser.r(&shdr->e32.sh_##fn_name); \ +} \ + \ +static inline uint32_t shdr_##fn_name(Elf_Shdr *shdr) \ +{ \ + return elf_parser.shdr_##fn_name(shdr); \ +} + +#define SHDR_ADDR(fn_name) \ +static inline uint64_t shdr64_##fn_name(Elf_Shdr *shdr) \ +{ \ + return elf_parser.r8(&shdr->e64.sh_##fn_name); \ +} \ + \ +static inline uint64_t shdr32_##fn_name(Elf_Shdr *shdr) \ +{ \ + return elf_parser.r(&shdr->e32.sh_##fn_name); \ +} \ + \ +static inline uint64_t shdr_##fn_name(Elf_Shdr *shdr) \ +{ \ + return elf_parser.shdr_##fn_name(shdr); \ +} + +#define SHDR_WORD(fn_name) \ +static inline uint32_t shdr64_##fn_name(Elf_Shdr *shdr) \ +{ \ + return elf_parser.r(&shdr->e64.sh_##fn_name); \ +} \ + \ +static inline uint32_t shdr32_##fn_name(Elf_Shdr *shdr) \ +{ \ + return elf_parser.r(&shdr->e32.sh_##fn_name); \ +} \ +static inline uint32_t shdr_##fn_name(Elf_Shdr *shdr) \ +{ \ + return elf_parser.shdr_##fn_name(shdr); \ +} + +SHDR_ADDR(addr) +SHDR_ADDR(offset) +SHDR_ADDR(size) +SHDR_ADDR(entsize) + +SHDR_WORD(link) +SHDR_WORD(name) +SHDR_WORD(type) + +#define SYM_ADDR(fn_name) \ +static inline uint64_t sym64_##fn_name(Elf_Sym *sym) \ +{ \ + return elf_parser.r8(&sym->e64.st_##fn_name); \ +} \ + \ +static inline uint64_t sym32_##fn_name(Elf_Sym *sym) \ +{ \ + return elf_parser.r(&sym->e32.st_##fn_name); \ +} \ + \ +static inline uint64_t sym_##fn_name(Elf_Sym *sym) \ +{ \ + return elf_parser.sym_##fn_name(sym); \ +} + +#define SYM_WORD(fn_name) \ +static inline uint32_t sym64_##fn_name(Elf_Sym *sym) \ +{ \ + return elf_parser.r(&sym->e64.st_##fn_name); \ +} \ + \ +static inline uint32_t sym32_##fn_name(Elf_Sym *sym) \ +{ \ + return elf_parser.r(&sym->e32.st_##fn_name); \ +} \ + \ +static inline uint32_t sym_##fn_name(Elf_Sym *sym) \ +{ \ + return elf_parser.sym_##fn_name(sym); \ +} + +#define SYM_HALF(fn_name) \ +static inline uint16_t sym64_##fn_name(Elf_Sym *sym) \ +{ \ + return elf_parser.r2(&sym->e64.st_##fn_name); \ +} \ + \ +static inline uint16_t sym32_##fn_name(Elf_Sym *sym) \ +{ \ + return elf_parser.r2(&sym->e32.st_##fn_name); \ +} \ + \ +static inline uint16_t sym_##fn_name(Elf_Sym *sym) \ +{ \ + return elf_parser.sym_##fn_name(sym); \ +} + +static inline uint8_t sym64_type(Elf_Sym *sym) +{ + return ELF64_ST_TYPE(sym->e64.st_info); +} + +static inline uint8_t sym32_type(Elf_Sym *sym) +{ + return ELF32_ST_TYPE(sym->e32.st_info); +} + +static inline uint8_t sym_type(Elf_Sym *sym) +{ + return elf_parser.sym_type(sym); +} + +SYM_ADDR(value) +SYM_WORD(name) +SYM_HALF(shndx) + +#define __maybe_unused __attribute__((__unused__)) + +#define RELA_ADDR(fn_name) \ +static inline uint64_t rela64_##fn_name(Elf_Rela *rela) \ +{ \ + return elf_parser.r8((uint64_t *)&rela->e64.r_##fn_name); \ +} \ + \ +static inline uint64_t rela32_##fn_name(Elf_Rela *rela) \ +{ \ + return elf_parser.r((uint32_t *)&rela->e32.r_##fn_name); \ +} \ + \ +static inline uint64_t __maybe_unused rela_##fn_name(Elf_Rela *rela) \ +{ \ + return elf_parser.rela_##fn_name(rela); \ +} + +RELA_ADDR(offset) +RELA_ADDR(info) +RELA_ADDR(addend) + +static inline void rela64_write_addend(Elf_Rela *rela, uint64_t val) +{ + elf_parser.w8(val, (uint64_t *)&rela->e64.r_addend); +} + +static inline void rela32_write_addend(Elf_Rela *rela, uint64_t val) +{ + elf_parser.w(val, (uint32_t *)&rela->e32.r_addend); +} + +static inline uint32_t rbe(const uint32_t *x) +{ + return get_unaligned_be32(x); +} + +static inline uint16_t r2be(const uint16_t *x) +{ + return get_unaligned_be16(x); +} + +static inline uint64_t r8be(const uint64_t *x) +{ + return get_unaligned_be64(x); +} + +static inline uint32_t rle(const uint32_t *x) +{ + return get_unaligned_le32(x); +} + +static inline uint16_t r2le(const uint16_t *x) +{ + return get_unaligned_le16(x); +} + +static inline uint64_t r8le(const uint64_t *x) +{ + return get_unaligned_le64(x); +} + +static inline void wbe(uint32_t val, uint32_t *x) +{ + put_unaligned_be32(val, x); +} + +static inline void wle(uint32_t val, uint32_t *x) +{ + put_unaligned_le32(val, x); +} + +static inline void w8be(uint64_t val, uint64_t *x) +{ + put_unaligned_be64(val, x); +} + +static inline void w8le(uint64_t val, uint64_t *x) +{ + put_unaligned_le64(val, x); +} + +void *elf_map(char const *fname, size_t *size, uint32_t types); +void elf_unmap(void *addr, size_t size); +int elf_map_machine(void *addr); +int elf_map_long_size(void *addr); + +#endif /* _SCRIPTS_ELF_PARSE_H */ diff --git a/scripts/sorttable.c b/scripts/sorttable.c index deed676bfe38..e8ed11c680c6 100644 --- a/scripts/sorttable.c +++ b/scripts/sorttable.c @@ -21,10 +21,8 @@ */ #include -#include #include #include -#include #include #include #include @@ -34,8 +32,7 @@ #include #include -#include -#include +#include "elf-parse.h" #ifndef EM_ARCOMPACT #define EM_ARCOMPACT 93 @@ -65,335 +62,8 @@ #define EM_LOONGARCH 258 #endif -typedef union { - Elf32_Ehdr e32; - Elf64_Ehdr e64; -} Elf_Ehdr; - -typedef union { - Elf32_Shdr e32; - Elf64_Shdr e64; -} Elf_Shdr; - -typedef union { - Elf32_Sym e32; - Elf64_Sym e64; -} Elf_Sym; - -typedef union { - Elf32_Rela e32; - Elf64_Rela e64; -} Elf_Rela; - -static uint32_t (*r)(const uint32_t *); -static uint16_t (*r2)(const uint16_t *); -static uint64_t (*r8)(const uint64_t *); -static void (*w)(uint32_t, uint32_t *); -static void (*w8)(uint64_t, uint64_t *); typedef void (*table_sort_t)(char *, int); -static struct elf_funcs { - int (*compare_extable)(const void *a, const void *b); - uint64_t (*ehdr_shoff)(Elf_Ehdr *ehdr); - uint16_t (*ehdr_shstrndx)(Elf_Ehdr *ehdr); - uint16_t (*ehdr_shentsize)(Elf_Ehdr *ehdr); - uint16_t (*ehdr_shnum)(Elf_Ehdr *ehdr); - uint64_t (*shdr_addr)(Elf_Shdr *shdr); - uint64_t (*shdr_offset)(Elf_Shdr *shdr); - uint64_t (*shdr_size)(Elf_Shdr *shdr); - uint64_t (*shdr_entsize)(Elf_Shdr *shdr); - uint32_t (*shdr_link)(Elf_Shdr *shdr); - uint32_t (*shdr_name)(Elf_Shdr *shdr); - uint32_t (*shdr_type)(Elf_Shdr *shdr); - uint8_t (*sym_type)(Elf_Sym *sym); - uint32_t (*sym_name)(Elf_Sym *sym); - uint64_t (*sym_value)(Elf_Sym *sym); - uint16_t (*sym_shndx)(Elf_Sym *sym); - uint64_t (*rela_offset)(Elf_Rela *rela); - uint64_t (*rela_info)(Elf_Rela *rela); - uint64_t (*rela_addend)(Elf_Rela *rela); - void (*rela_write_addend)(Elf_Rela *rela, uint64_t val); -} e; - -static uint64_t ehdr64_shoff(Elf_Ehdr *ehdr) -{ - return r8(&ehdr->e64.e_shoff); -} - -static uint64_t ehdr32_shoff(Elf_Ehdr *ehdr) -{ - return r(&ehdr->e32.e_shoff); -} - -static uint64_t ehdr_shoff(Elf_Ehdr *ehdr) -{ - return e.ehdr_shoff(ehdr); -} - -#define EHDR_HALF(fn_name) \ -static uint16_t ehdr64_##fn_name(Elf_Ehdr *ehdr) \ -{ \ - return r2(&ehdr->e64.e_##fn_name); \ -} \ - \ -static uint16_t ehdr32_##fn_name(Elf_Ehdr *ehdr) \ -{ \ - return r2(&ehdr->e32.e_##fn_name); \ -} \ - \ -static uint16_t ehdr_##fn_name(Elf_Ehdr *ehdr) \ -{ \ - return e.ehdr_##fn_name(ehdr); \ -} - -EHDR_HALF(shentsize) -EHDR_HALF(shstrndx) -EHDR_HALF(shnum) - -#define SHDR_WORD(fn_name) \ -static uint32_t shdr64_##fn_name(Elf_Shdr *shdr) \ -{ \ - return r(&shdr->e64.sh_##fn_name); \ -} \ - \ -static uint32_t shdr32_##fn_name(Elf_Shdr *shdr) \ -{ \ - return r(&shdr->e32.sh_##fn_name); \ -} \ - \ -static uint32_t shdr_##fn_name(Elf_Shdr *shdr) \ -{ \ - return e.shdr_##fn_name(shdr); \ -} - -#define SHDR_ADDR(fn_name) \ -static uint64_t shdr64_##fn_name(Elf_Shdr *shdr) \ -{ \ - return r8(&shdr->e64.sh_##fn_name); \ -} \ - \ -static uint64_t shdr32_##fn_name(Elf_Shdr *shdr) \ -{ \ - return r(&shdr->e32.sh_##fn_name); \ -} \ - \ -static uint64_t shdr_##fn_name(Elf_Shdr *shdr) \ -{ \ - return e.shdr_##fn_name(shdr); \ -} - -#define SHDR_WORD(fn_name) \ -static uint32_t shdr64_##fn_name(Elf_Shdr *shdr) \ -{ \ - return r(&shdr->e64.sh_##fn_name); \ -} \ - \ -static uint32_t shdr32_##fn_name(Elf_Shdr *shdr) \ -{ \ - return r(&shdr->e32.sh_##fn_name); \ -} \ -static uint32_t shdr_##fn_name(Elf_Shdr *shdr) \ -{ \ - return e.shdr_##fn_name(shdr); \ -} - -SHDR_ADDR(addr) -SHDR_ADDR(offset) -SHDR_ADDR(size) -SHDR_ADDR(entsize) - -SHDR_WORD(link) -SHDR_WORD(name) -SHDR_WORD(type) - -#define SYM_ADDR(fn_name) \ -static uint64_t sym64_##fn_name(Elf_Sym *sym) \ -{ \ - return r8(&sym->e64.st_##fn_name); \ -} \ - \ -static uint64_t sym32_##fn_name(Elf_Sym *sym) \ -{ \ - return r(&sym->e32.st_##fn_name); \ -} \ - \ -static uint64_t sym_##fn_name(Elf_Sym *sym) \ -{ \ - return e.sym_##fn_name(sym); \ -} - -#define SYM_WORD(fn_name) \ -static uint32_t sym64_##fn_name(Elf_Sym *sym) \ -{ \ - return r(&sym->e64.st_##fn_name); \ -} \ - \ -static uint32_t sym32_##fn_name(Elf_Sym *sym) \ -{ \ - return r(&sym->e32.st_##fn_name); \ -} \ - \ -static uint32_t sym_##fn_name(Elf_Sym *sym) \ -{ \ - return e.sym_##fn_name(sym); \ -} - -#define SYM_HALF(fn_name) \ -static uint16_t sym64_##fn_name(Elf_Sym *sym) \ -{ \ - return r2(&sym->e64.st_##fn_name); \ -} \ - \ -static uint16_t sym32_##fn_name(Elf_Sym *sym) \ -{ \ - return r2(&sym->e32.st_##fn_name); \ -} \ - \ -static uint16_t sym_##fn_name(Elf_Sym *sym) \ -{ \ - return e.sym_##fn_name(sym); \ -} - -static uint8_t sym64_type(Elf_Sym *sym) -{ - return ELF64_ST_TYPE(sym->e64.st_info); -} - -static uint8_t sym32_type(Elf_Sym *sym) -{ - return ELF32_ST_TYPE(sym->e32.st_info); -} - -static uint8_t sym_type(Elf_Sym *sym) -{ - return e.sym_type(sym); -} - -SYM_ADDR(value) -SYM_WORD(name) -SYM_HALF(shndx) - -#define __maybe_unused __attribute__((__unused__)) - -#define RELA_ADDR(fn_name) \ -static uint64_t rela64_##fn_name(Elf_Rela *rela) \ -{ \ - return r8((uint64_t *)&rela->e64.r_##fn_name); \ -} \ - \ -static uint64_t rela32_##fn_name(Elf_Rela *rela) \ -{ \ - return r((uint32_t *)&rela->e32.r_##fn_name); \ -} \ - \ -static uint64_t __maybe_unused rela_##fn_name(Elf_Rela *rela) \ -{ \ - return e.rela_##fn_name(rela); \ -} - -RELA_ADDR(offset) -RELA_ADDR(info) -RELA_ADDR(addend) - -static void rela64_write_addend(Elf_Rela *rela, uint64_t val) -{ - w8(val, (uint64_t *)&rela->e64.r_addend); -} - -static void rela32_write_addend(Elf_Rela *rela, uint64_t val) -{ - w(val, (uint32_t *)&rela->e32.r_addend); -} - -/* - * Get the whole file as a programming convenience in order to avoid - * malloc+lseek+read+free of many pieces. If successful, then mmap - * avoids copying unused pieces; else just read the whole file. - * Open for both read and write. - */ -static void *mmap_file(char const *fname, size_t *size) -{ - int fd; - struct stat sb; - void *addr = NULL; - - fd = open(fname, O_RDWR); - if (fd < 0) { - perror(fname); - return NULL; - } - if (fstat(fd, &sb) < 0) { - perror(fname); - goto out; - } - if (!S_ISREG(sb.st_mode)) { - fprintf(stderr, "not a regular file: %s\n", fname); - goto out; - } - - addr = mmap(0, sb.st_size, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0); - if (addr == MAP_FAILED) { - fprintf(stderr, "Could not mmap file: %s\n", fname); - goto out; - } - - *size = sb.st_size; - -out: - close(fd); - return addr; -} - -static uint32_t rbe(const uint32_t *x) -{ - return get_unaligned_be32(x); -} - -static uint16_t r2be(const uint16_t *x) -{ - return get_unaligned_be16(x); -} - -static uint64_t r8be(const uint64_t *x) -{ - return get_unaligned_be64(x); -} - -static uint32_t rle(const uint32_t *x) -{ - return get_unaligned_le32(x); -} - -static uint16_t r2le(const uint16_t *x) -{ - return get_unaligned_le16(x); -} - -static uint64_t r8le(const uint64_t *x) -{ - return get_unaligned_le64(x); -} - -static void wbe(uint32_t val, uint32_t *x) -{ - put_unaligned_be32(val, x); -} - -static void wle(uint32_t val, uint32_t *x) -{ - put_unaligned_le32(val, x); -} - -static void w8be(uint64_t val, uint64_t *x) -{ - put_unaligned_be64(val, x); -} - -static void w8le(uint64_t val, uint64_t *x) -{ - put_unaligned_le64(val, x); -} - /* * Move reserved section indices SHN_LORESERVE..SHN_HIRESERVE out of * the way to -256..-1, to avoid conflicting with real section @@ -415,13 +85,13 @@ static inline unsigned int get_secindex(unsigned int shndx, return SPECIAL(shndx); if (shndx != SHN_XINDEX) return shndx; - return r(&symtab_shndx_start[sym_offs]); + return elf_parser.r(&symtab_shndx_start[sym_offs]); } static int compare_extable_32(const void *a, const void *b) { - Elf32_Addr av = r(a); - Elf32_Addr bv = r(b); + Elf32_Addr av = elf_parser.r(a); + Elf32_Addr bv = elf_parser.r(b); if (av < bv) return -1; @@ -430,18 +100,15 @@ static int compare_extable_32(const void *a, const void *b) static int compare_extable_64(const void *a, const void *b) { - Elf64_Addr av = r8(a); - Elf64_Addr bv = r8(b); + Elf64_Addr av = elf_parser.r8(a); + Elf64_Addr bv = elf_parser.r8(b); if (av < bv) return -1; return av > bv; } -static int compare_extable(const void *a, const void *b) -{ - return e.compare_extable(a, b); -} +static int (*compare_extable)(const void *a, const void *b); static inline void *get_index(void *start, int entsize, int index) { @@ -577,7 +244,7 @@ static int (*compare_values)(const void *a, const void *b); /* Only used for sorting mcount table */ static void rela_write_addend(Elf_Rela *rela, uint64_t val) { - e.rela_write_addend(rela, val); + elf_parser.rela_write_addend(rela, val); } struct func_info { @@ -792,9 +459,9 @@ static int fill_addrs(void *ptr, uint64_t size, void *addrs) for (; ptr < end; ptr += long_size, addrs += long_size, count++) { if (long_size == 4) - *(uint32_t *)ptr = r(addrs); + *(uint32_t *)ptr = elf_parser.r(addrs); else - *(uint64_t *)ptr = r8(addrs); + *(uint64_t *)ptr = elf_parser.r8(addrs); } return count; } @@ -805,9 +472,9 @@ static void replace_addrs(void *ptr, uint64_t size, void *addrs) for (; ptr < end; ptr += long_size, addrs += long_size) { if (long_size == 4) - w(*(uint32_t *)ptr, addrs); + elf_parser.w(*(uint32_t *)ptr, addrs); else - w8(*(uint64_t *)ptr, addrs); + elf_parser.w8(*(uint64_t *)ptr, addrs); } } @@ -1111,7 +778,7 @@ static int do_sort(Elf_Ehdr *ehdr, sym_value(sort_needed_sym) - shdr_addr(sort_needed_sec); /* extable has been sorted, clear the flag */ - w(0, sort_needed_loc); + elf_parser.w(0, sort_needed_loc); rc = 0; out: @@ -1155,8 +822,8 @@ out: static int compare_relative_table(const void *a, const void *b) { - int32_t av = (int32_t)r(a); - int32_t bv = (int32_t)r(b); + int32_t av = (int32_t)elf_parser.r(a); + int32_t bv = (int32_t)elf_parser.r(b); if (av < bv) return -1; @@ -1175,7 +842,7 @@ static void sort_relative_table(char *extab_image, int image_size) */ while (i < image_size) { uint32_t *loc = (uint32_t *)(extab_image + i); - w(r(loc) + i, loc); + elf_parser.w(elf_parser.r(loc) + i, loc); i += 4; } @@ -1185,7 +852,7 @@ static void sort_relative_table(char *extab_image, int image_size) i = 0; while (i < image_size) { uint32_t *loc = (uint32_t *)(extab_image + i); - w(r(loc) - i, loc); + elf_parser.w(elf_parser.r(loc) - i, loc); i += 4; } } @@ -1197,8 +864,8 @@ static void sort_relative_table_with_data(char *extab_image, int image_size) while (i < image_size) { uint32_t *loc = (uint32_t *)(extab_image + i); - w(r(loc) + i, loc); - w(r(loc + 1) + i + 4, loc + 1); + elf_parser.w(elf_parser.r(loc) + i, loc); + elf_parser.w(elf_parser.r(loc + 1) + i + 4, loc + 1); /* Don't touch the fixup type or data */ i += sizeof(uint32_t) * 3; @@ -1210,8 +877,8 @@ static void sort_relative_table_with_data(char *extab_image, int image_size) while (i < image_size) { uint32_t *loc = (uint32_t *)(extab_image + i); - w(r(loc) - i, loc); - w(r(loc + 1) - (i + 4), loc + 1); + elf_parser.w(elf_parser.r(loc) - i, loc); + elf_parser.w(elf_parser.r(loc + 1) - (i + 4), loc + 1); /* Don't touch the fixup type or data */ i += sizeof(uint32_t) * 3; @@ -1223,35 +890,7 @@ static int do_file(char const *const fname, void *addr) Elf_Ehdr *ehdr = addr; table_sort_t custom_sort = NULL; - switch (ehdr->e32.e_ident[EI_DATA]) { - case ELFDATA2LSB: - r = rle; - r2 = r2le; - r8 = r8le; - w = wle; - w8 = w8le; - break; - case ELFDATA2MSB: - r = rbe; - r2 = r2be; - r8 = r8be; - w = wbe; - w8 = w8be; - break; - default: - fprintf(stderr, "unrecognized ELF data encoding %d: %s\n", - ehdr->e32.e_ident[EI_DATA], fname); - return -1; - } - - if (memcmp(ELFMAG, ehdr->e32.e_ident, SELFMAG) != 0 || - (r2(&ehdr->e32.e_type) != ET_EXEC && r2(&ehdr->e32.e_type) != ET_DYN) || - ehdr->e32.e_ident[EI_VERSION] != EV_CURRENT) { - fprintf(stderr, "unrecognized ET_EXEC/ET_DYN file %s\n", fname); - return -1; - } - - switch (r2(&ehdr->e32.e_machine)) { + switch (elf_map_machine(ehdr)) { case EM_AARCH64: #ifdef MCOUNT_SORT_ENABLED sort_reloc = true; @@ -1281,85 +920,37 @@ static int do_file(char const *const fname, void *addr) break; default: fprintf(stderr, "unrecognized e_machine %d %s\n", - r2(&ehdr->e32.e_machine), fname); + elf_parser.r2(&ehdr->e32.e_machine), fname); return -1; } - switch (ehdr->e32.e_ident[EI_CLASS]) { - case ELFCLASS32: { - struct elf_funcs efuncs = { - .compare_extable = compare_extable_32, - .ehdr_shoff = ehdr32_shoff, - .ehdr_shentsize = ehdr32_shentsize, - .ehdr_shstrndx = ehdr32_shstrndx, - .ehdr_shnum = ehdr32_shnum, - .shdr_addr = shdr32_addr, - .shdr_offset = shdr32_offset, - .shdr_link = shdr32_link, - .shdr_size = shdr32_size, - .shdr_name = shdr32_name, - .shdr_type = shdr32_type, - .shdr_entsize = shdr32_entsize, - .sym_type = sym32_type, - .sym_name = sym32_name, - .sym_value = sym32_value, - .sym_shndx = sym32_shndx, - .rela_offset = rela32_offset, - .rela_info = rela32_info, - .rela_addend = rela32_addend, - .rela_write_addend = rela32_write_addend, - }; - - e = efuncs; + switch (elf_map_long_size(addr)) { + case 4: + compare_extable = compare_extable_32, long_size = 4; extable_ent_size = 8; - if (r2(&ehdr->e32.e_ehsize) != sizeof(Elf32_Ehdr) || - r2(&ehdr->e32.e_shentsize) != sizeof(Elf32_Shdr)) { + if (elf_parser.r2(&ehdr->e32.e_ehsize) != sizeof(Elf32_Ehdr) || + elf_parser.r2(&ehdr->e32.e_shentsize) != sizeof(Elf32_Shdr)) { fprintf(stderr, "unrecognized ET_EXEC/ET_DYN file: %s\n", fname); return -1; } - } break; - case ELFCLASS64: { - struct elf_funcs efuncs = { - .compare_extable = compare_extable_64, - .ehdr_shoff = ehdr64_shoff, - .ehdr_shentsize = ehdr64_shentsize, - .ehdr_shstrndx = ehdr64_shstrndx, - .ehdr_shnum = ehdr64_shnum, - .shdr_addr = shdr64_addr, - .shdr_offset = shdr64_offset, - .shdr_link = shdr64_link, - .shdr_size = shdr64_size, - .shdr_name = shdr64_name, - .shdr_type = shdr64_type, - .shdr_entsize = shdr64_entsize, - .sym_type = sym64_type, - .sym_name = sym64_name, - .sym_value = sym64_value, - .sym_shndx = sym64_shndx, - .rela_offset = rela64_offset, - .rela_info = rela64_info, - .rela_addend = rela64_addend, - .rela_write_addend = rela64_write_addend, - }; - - e = efuncs; + case 8: + compare_extable = compare_extable_64, long_size = 8; extable_ent_size = 16; - if (r2(&ehdr->e64.e_ehsize) != sizeof(Elf64_Ehdr) || - r2(&ehdr->e64.e_shentsize) != sizeof(Elf64_Shdr)) { + if (elf_parser.r2(&ehdr->e64.e_ehsize) != sizeof(Elf64_Ehdr) || + elf_parser.r2(&ehdr->e64.e_shentsize) != sizeof(Elf64_Shdr)) { fprintf(stderr, "unrecognized ET_EXEC/ET_DYN file: %s\n", fname); return -1; } - } break; default: fprintf(stderr, "unrecognized ELF class %d %s\n", @@ -1398,7 +989,7 @@ int main(int argc, char *argv[]) /* Process each file in turn, allowing deep failure. */ for (i = optind; i < argc; i++) { - addr = mmap_file(argv[i], &size); + addr = elf_map(argv[i], &size, (1 << ET_EXEC) | (1 << ET_DYN)); if (!addr) { ++n_error; continue; @@ -1407,7 +998,7 @@ int main(int argc, char *argv[]) if (do_file(argv[i], addr)) ++n_error; - munmap(addr, size); + elf_unmap(addr, size); } return !!n_error; -- cgit v1.2.3 From e30f8e61e2518a837837daa26cda3c8cc30f3226 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Tue, 21 Oct 2025 20:43:40 -0400 Subject: tracing: Add a tracepoint verification check at build time If a tracepoint is defined via DECLARE_TRACE() or TRACE_EVENT() but never called (via the trace_() function), its metadata is still around in memory and not discarded. When created via TRACE_EVENT() the situation is worse because the TRACE_EVENT() creates metadata that can be around 5k per trace event. Having unused trace events causes several thousand of wasted bytes. Add a verifier that injects a string of the name of the tracepoint it calls that is added to the discarded section "__tracepoint_check". For every builtin tracepoint, its name (which is saved in the in-memory section "__tracepoint_strings") will have its name also in the "__tracepoint_check" section if it is used. Add a new program that is run on build called tracepoint-update. This is executed on the vmlinux.o before the __tracepoint_check section is discarded (the section is discarded before vmlinux is created). This program will create an array of each string in the __tracepoint_check section and then sort it. Then it will walk the strings in the __tracepoint_strings section and do a binary search to check if its name is in the __tracepoint_check section. If it is not, then it is unused and a warning is printed. Note, this currently only handles tracepoints that are builtin and not in modules. Enabling this currently with a given config produces: warning: tracepoint 'sched_move_numa' is unused. warning: tracepoint 'sched_stick_numa' is unused. warning: tracepoint 'sched_swap_numa' is unused. warning: tracepoint 'pelt_hw_tp' is unused. warning: tracepoint 'pelt_irq_tp' is unused. warning: tracepoint 'rcu_preempt_task' is unused. warning: tracepoint 'rcu_unlock_preempted_task' is unused. warning: tracepoint 'xdp_bulk_tx' is unused. warning: tracepoint 'xdp_redirect_map' is unused. warning: tracepoint 'xdp_redirect_map_err' is unused. warning: tracepoint 'vma_mas_szero' is unused. warning: tracepoint 'vma_store' is unused. warning: tracepoint 'hugepage_set_pmd' is unused. warning: tracepoint 'hugepage_set_pud' is unused. warning: tracepoint 'hugepage_update_pmd' is unused. warning: tracepoint 'hugepage_update_pud' is unused. warning: tracepoint 'block_rq_remap' is unused. warning: tracepoint 'xhci_dbc_handle_event' is unused. warning: tracepoint 'xhci_dbc_handle_transfer' is unused. warning: tracepoint 'xhci_dbc_gadget_ep_queue' is unused. warning: tracepoint 'xhci_dbc_alloc_request' is unused. warning: tracepoint 'xhci_dbc_free_request' is unused. warning: tracepoint 'xhci_dbc_queue_request' is unused. warning: tracepoint 'xhci_dbc_giveback_request' is unused. warning: tracepoint 'tcp_ao_wrong_maclen' is unused. warning: tracepoint 'tcp_ao_mismatch' is unused. warning: tracepoint 'tcp_ao_key_not_found' is unused. warning: tracepoint 'tcp_ao_rnext_request' is unused. warning: tracepoint 'tcp_ao_synack_no_key' is unused. warning: tracepoint 'tcp_ao_snd_sne_update' is unused. warning: tracepoint 'tcp_ao_rcv_sne_update' is unused. Some of the above is totally unused but others are not used due to their "trace_" functions being inside configs, in which case, the defined tracepoints should also be inside those same configs. Others are architecture specific but defined in generic code, where they should either be moved to the architecture or be surrounded by #ifdef for the architectures they are for. This tool could be updated to process modules in the future. I'd like to thank Mathieu Desnoyers for suggesting using strings instead of pointers, as using pointers in vmlinux.o required handling relocations and it required implementing almost a full feature linker to do so. To enable this check, run the build with: make UT=1 Note, when all the existing unused tracepoints are removed from the build, the "UT=1" will be removed and this will always be enabled when tracepoints are configured to warn on any new tracepoints. The reason this isn't always enabled now is because it will introduce a lot of warnings for the current unused tracepoints, and all bisects would end at this commit for those warnings. Link: https://lore.kernel.org/all/20250528114549.4d8a5e03@gandalf.local.home/ Cc: Masami Hiramatsu Cc: Mark Rutland Cc: Mathieu Desnoyers Cc: Andrew Morton Cc: Arnd Bergmann Cc: Masahiro Yamada Cc: Nathan Chancellor Cc: Nicolas Schier Cc: Nick Desaulniers Cc: Catalin Marinas Cc: Linus Torvalds Cc: Randy Dunlap Cc: Stephen Rothwell Link: https://lore.kernel.org/20251022004452.920728129@kernel.org Suggested-by: Mathieu Desnoyers # for using strings instead of pointers Signed-off-by: Steven Rostedt (Google) --- scripts/Makefile | 3 + scripts/link-vmlinux.sh | 7 ++ scripts/tracepoint-update.c | 232 ++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 242 insertions(+) create mode 100644 scripts/tracepoint-update.c (limited to 'scripts') diff --git a/scripts/Makefile b/scripts/Makefile index f19624b3ed92..0941e5ce7b57 100644 --- a/scripts/Makefile +++ b/scripts/Makefile @@ -11,8 +11,10 @@ hostprogs-always-$(CONFIG_MODULE_SIG_FORMAT) += sign-file hostprogs-always-$(CONFIG_SYSTEM_EXTRA_CERTIFICATE) += insert-sys-cert hostprogs-always-$(CONFIG_RUST_KERNEL_DOCTESTS) += rustdoc_test_builder hostprogs-always-$(CONFIG_RUST_KERNEL_DOCTESTS) += rustdoc_test_gen +hostprogs-always-$(CONFIG_TRACEPOINTS) += tracepoint-update sorttable-objs := sorttable.o elf-parse.o +tracepoint-update-objs := tracepoint-update.o elf-parse.o ifneq ($(or $(CONFIG_X86_64),$(CONFIG_X86_32)),) always-$(CONFIG_RUST) += target.json @@ -27,6 +29,7 @@ generate_rust_target-rust := y rustdoc_test_builder-rust := y rustdoc_test_gen-rust := y +HOSTCFLAGS_tracepoint-update.o = -I$(srctree)/tools/include HOSTCFLAGS_elf-parse.o = -I$(srctree)/tools/include HOSTCFLAGS_sorttable.o = -I$(srctree)/tools/include HOSTLDLIBS_sorttable = -lpthread diff --git a/scripts/link-vmlinux.sh b/scripts/link-vmlinux.sh index 433849ff7529..d304029fa6da 100755 --- a/scripts/link-vmlinux.sh +++ b/scripts/link-vmlinux.sh @@ -208,6 +208,13 @@ kallsymso= strip_debug= generate_map= +# Use "make UT=1" to trigger warnings on unused tracepoints +case "${WARN_ON_UNUSED_TRACEPOINTS}" in +*1*) + ${objtree}/scripts/tracepoint-update vmlinux.o + ;; +esac + if is_enabled CONFIG_KALLSYMS; then true > .tmp_vmlinux0.syms kallsyms .tmp_vmlinux0.syms .tmp_vmlinux0.kallsyms diff --git a/scripts/tracepoint-update.c b/scripts/tracepoint-update.c new file mode 100644 index 000000000000..6ec30f39d0ad --- /dev/null +++ b/scripts/tracepoint-update.c @@ -0,0 +1,232 @@ +// SPDX-License-Identifier: GPL-2.0-only + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "elf-parse.h" + +static Elf_Shdr *check_data_sec; +static Elf_Shdr *tracepoint_data_sec; + +static inline void *get_index(void *start, int entsize, int index) +{ + return start + (entsize * index); +} + +static int compare_strings(const void *a, const void *b) +{ + const char *av = *(const char **)a; + const char *bv = *(const char **)b; + + return strcmp(av, bv); +} + +struct elf_tracepoint { + Elf_Ehdr *ehdr; + const char **array; + int count; +}; + +#define REALLOC_SIZE (1 << 10) +#define REALLOC_MASK (REALLOC_SIZE - 1) + +static int add_string(const char *str, const char ***vals, int *count) +{ + const char **array = *vals; + + if (!(*count & REALLOC_MASK)) { + int size = (*count) + REALLOC_SIZE; + + array = realloc(array, sizeof(char *) * size); + if (!array) { + fprintf(stderr, "Failed memory allocation\n"); + return -1; + } + *vals = array; + } + + array[(*count)++] = str; + return 0; +} + +/** + * for_each_shdr_str - iterator that reads strings that are in an ELF section. + * @len: "int" to hold the length of the current string + * @ehdr: A pointer to the ehdr of the ELF file + * @sec: The section that has the strings to iterate on + * + * This is a for loop that iterates over all the nul terminated strings + * that are in a given ELF section. The variable "str" will hold + * the current string for each iteration and the passed in @len will + * contain the strlen() of that string. + */ +#define for_each_shdr_str(len, ehdr, sec) \ + for (const char *str = (void *)(ehdr) + shdr_offset(sec), \ + *end = str + shdr_size(sec); \ + len = strlen(str), str < end; \ + str += (len) + 1) + + +static void make_trace_array(struct elf_tracepoint *etrace) +{ + Elf_Ehdr *ehdr = etrace->ehdr; + const char **vals = NULL; + int count = 0; + int len; + + etrace->array = NULL; + + /* + * The __tracepoint_check section is filled with strings of the + * names of tracepoints (in tracepoint_strings). Create an array + * that points to each string and then sort the array. + */ + for_each_shdr_str(len, ehdr, check_data_sec) { + if (!len) + continue; + if (add_string(str, &vals, &count) < 0) + return; + } + + /* If CONFIG_TRACEPOINT_VERIFY_USED is not set, there's nothing to do */ + if (!count) + return; + + qsort(vals, count, sizeof(char *), compare_strings); + + etrace->array = vals; + etrace->count = count; +} + +static int find_event(const char *str, void *array, size_t size) +{ + return bsearch(&str, array, size, sizeof(char *), compare_strings) != NULL; +} + +static void check_tracepoints(struct elf_tracepoint *etrace) +{ + Elf_Ehdr *ehdr = etrace->ehdr; + int len; + + if (!etrace->array) + return; + + /* + * The __tracepoints_strings section holds all the names of the + * defined tracepoints. If any of them are not in the + * __tracepoint_check_section it means they are not used. + */ + for_each_shdr_str(len, ehdr, tracepoint_data_sec) { + if (!len) + continue; + if (!find_event(str, etrace->array, etrace->count)) { + fprintf(stderr, "warning: tracepoint '%s' is unused.\n", str); + } + } + + free(etrace->array); +} + +static void *tracepoint_check(struct elf_tracepoint *etrace) +{ + make_trace_array(etrace); + check_tracepoints(etrace); + + return NULL; +} + +static int process_tracepoints(void *addr, char const *const fname) +{ + struct elf_tracepoint etrace = {0}; + Elf_Ehdr *ehdr = addr; + Elf_Shdr *shdr_start; + Elf_Shdr *string_sec; + const char *secstrings; + unsigned int shnum; + unsigned int shstrndx; + int shentsize; + int idx; + int done = 2; + + shdr_start = (Elf_Shdr *)((char *)ehdr + ehdr_shoff(ehdr)); + shentsize = ehdr_shentsize(ehdr); + + shstrndx = ehdr_shstrndx(ehdr); + if (shstrndx == SHN_XINDEX) + shstrndx = shdr_link(shdr_start); + string_sec = get_index(shdr_start, shentsize, shstrndx); + secstrings = (const char *)ehdr + shdr_offset(string_sec); + + shnum = ehdr_shnum(ehdr); + if (shnum == SHN_UNDEF) + shnum = shdr_size(shdr_start); + + for (int i = 0; done && i < shnum; i++) { + Elf_Shdr *shdr = get_index(shdr_start, shentsize, i); + + idx = shdr_name(shdr); + + /* locate the __tracepoint_check in vmlinux */ + if (!strcmp(secstrings + idx, "__tracepoint_check")) { + check_data_sec = shdr; + done--; + } + + /* locate the __tracepoints_ptrs section in vmlinux */ + if (!strcmp(secstrings + idx, "__tracepoints_strings")) { + tracepoint_data_sec = shdr; + done--; + } + } + + if (!check_data_sec) { + fprintf(stderr, "no __tracepoint_check in file: %s\n", fname); + return -1; + } + + if (!tracepoint_data_sec) { + fprintf(stderr, "no __tracepoint_strings in file: %s\n", fname); + return -1; + } + + etrace.ehdr = ehdr; + tracepoint_check(&etrace); + return 0; +} + +int main(int argc, char *argv[]) +{ + int n_error = 0; + size_t size = 0; + void *addr = NULL; + + if (argc < 2) { + fprintf(stderr, "usage: tracepoint-update vmlinux...\n"); + return 0; + } + + /* Process each file in turn, allowing deep failure. */ + for (int i = 1; i < argc; i++) { + addr = elf_map(argv[i], &size, 1 << ET_REL); + if (!addr) { + ++n_error; + continue; + } + + if (process_tracepoints(addr, argv[i])) + ++n_error; + + elf_unmap(addr, size); + } + + return !!n_error; +} -- cgit v1.2.3 From eec3516b25069d8cb51b78375e337c69a3f9e789 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Tue, 21 Oct 2025 20:43:42 -0400 Subject: tracing: Allow tracepoint-update.c to work with modules In order for tracepoint-update.c to work with modules, it cannot error out if both "__tracepoint_check" and "__tracepoints_strings" are not found. When enabled, the vmlinux.o may be required to have both, but modules only have these sections if they have tracepoints. Modules without tracepoints will not have either. They should not fail to build because of that. If one section exists the other one should too. Note, if a module defines a tracepoint but doesn't use any, it can cause this to fail. Add a new "--module" parameter to tracepoint-update to be used when running on module code. It will not error out if this is set and both sections are missing. If this is set, and only the "__tracepoint_check" section is missing, it means the module has defined tracepoints but none of them are used. In that case, it prints a warning that the module has only unused tracepoints and exits normally to not fail the build. If the "__tracepoint_check" section exists but not the "__tracepoint_strings", then that is an error and should fail the build. Cc: Masami Hiramatsu Cc: Mark Rutland Cc: Mathieu Desnoyers Cc: Andrew Morton Cc: Arnd Bergmann Cc: Masahiro Yamada Cc: Nathan Chancellor Cc: Nicolas Schier Cc: Nick Desaulniers Cc: Catalin Marinas Cc: Linus Torvalds Cc: Randy Dunlap Cc: Stephen Rothwell Link: https://lore.kernel.org/20251022004453.255696445@kernel.org Signed-off-by: Steven Rostedt (Google) --- scripts/tracepoint-update.c | 45 +++++++++++++++++++++++++++++++++++++-------- 1 file changed, 37 insertions(+), 8 deletions(-) (limited to 'scripts') diff --git a/scripts/tracepoint-update.c b/scripts/tracepoint-update.c index 6ec30f39d0ad..7f7d90df14ce 100644 --- a/scripts/tracepoint-update.c +++ b/scripts/tracepoint-update.c @@ -112,7 +112,7 @@ static int find_event(const char *str, void *array, size_t size) return bsearch(&str, array, size, sizeof(char *), compare_strings) != NULL; } -static void check_tracepoints(struct elf_tracepoint *etrace) +static void check_tracepoints(struct elf_tracepoint *etrace, const char *fname) { Elf_Ehdr *ehdr = etrace->ehdr; int len; @@ -129,22 +129,26 @@ static void check_tracepoints(struct elf_tracepoint *etrace) if (!len) continue; if (!find_event(str, etrace->array, etrace->count)) { - fprintf(stderr, "warning: tracepoint '%s' is unused.\n", str); + fprintf(stderr, "warning: tracepoint '%s' is unused", str); + if (fname) + fprintf(stderr, " in module %s\n", fname); + else + fprintf(stderr, "\n"); } } free(etrace->array); } -static void *tracepoint_check(struct elf_tracepoint *etrace) +static void *tracepoint_check(struct elf_tracepoint *etrace, const char *fname) { make_trace_array(etrace); - check_tracepoints(etrace); + check_tracepoints(etrace, fname); return NULL; } -static int process_tracepoints(void *addr, char const *const fname) +static int process_tracepoints(bool mod, void *addr, const char *fname) { struct elf_tracepoint etrace = {0}; Elf_Ehdr *ehdr = addr; @@ -188,7 +192,19 @@ static int process_tracepoints(void *addr, char const *const fname) } } + /* + * Modules may not have either section. But if it has one section, + * it should have both of them. + */ + if (mod && !check_data_sec && !tracepoint_data_sec) + return 0; + if (!check_data_sec) { + if (mod) { + fprintf(stderr, "warning: Module %s has only unused tracepoints\n", fname); + /* Do not fail build */ + return 0; + } fprintf(stderr, "no __tracepoint_check in file: %s\n", fname); return -1; } @@ -198,8 +214,11 @@ static int process_tracepoints(void *addr, char const *const fname) return -1; } + if (!mod) + fname = NULL; + etrace.ehdr = ehdr; - tracepoint_check(&etrace); + tracepoint_check(&etrace, fname); return 0; } @@ -208,9 +227,19 @@ int main(int argc, char *argv[]) int n_error = 0; size_t size = 0; void *addr = NULL; + bool mod = false; + + if (argc > 1 && strcmp(argv[1], "--module") == 0) { + mod = true; + argc--; + argv++; + } if (argc < 2) { - fprintf(stderr, "usage: tracepoint-update vmlinux...\n"); + if (mod) + fprintf(stderr, "usage: tracepoint-update --module module...\n"); + else + fprintf(stderr, "usage: tracepoint-update vmlinux...\n"); return 0; } @@ -222,7 +251,7 @@ int main(int argc, char *argv[]) continue; } - if (process_tracepoints(addr, argv[i])) + if (process_tracepoints(mod, addr, argv[i])) ++n_error; elf_unmap(addr, size); -- cgit v1.2.3 From 01ecf7af00b86daf7ac441b9f94d4873d2b8fc74 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Tue, 21 Oct 2025 20:43:43 -0400 Subject: tracing: Add warnings for unused tracepoints for modules If a modules has TRACE_EVENT() but does not use it, add a warning about it at build time. Currently, the build must be made by adding "UT=1" to the make command line in order for this to trigger. Cc: Masami Hiramatsu Cc: Mark Rutland Cc: Mathieu Desnoyers Cc: Andrew Morton Cc: Arnd Bergmann Cc: Masahiro Yamada Cc: Nathan Chancellor Cc: Nicolas Schier Cc: Nick Desaulniers Cc: Catalin Marinas Cc: Linus Torvalds Cc: Randy Dunlap Cc: Stephen Rothwell Link: https://lore.kernel.org/20251022004453.422000794@kernel.org Signed-off-by: Steven Rostedt (Google) --- scripts/Makefile.modfinal | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'scripts') diff --git a/scripts/Makefile.modfinal b/scripts/Makefile.modfinal index 542ba462ed3e..149e12ff5700 100644 --- a/scripts/Makefile.modfinal +++ b/scripts/Makefile.modfinal @@ -28,6 +28,10 @@ ccflags-remove-y := $(CC_FLAGS_CFI) .module-common.o: $(srctree)/scripts/module-common.c FORCE $(call if_changed_rule,cc_o_c) +ifneq ($(WARN_ON_UNUSED_TRACEPOINTS),) +cmd_check_tracepoint = $(objtree)/scripts/tracepoint-update --module $<; +endif + quiet_cmd_ld_ko_o = LD [M] $@ cmd_ld_ko_o = \ $(LD) -r $(KBUILD_LDFLAGS) \ @@ -57,6 +61,7 @@ if_changed_except = $(if $(call newer_prereqs_except,$(2))$(cmd-check), \ ifdef CONFIG_DEBUG_INFO_BTF_MODULES +$(if $(newer-prereqs),$(call cmd,btf_ko)) endif + +$(call cmd,check_tracepoint) targets += $(modules:%.o=%.ko) $(modules:%.o=%.mod.o) .module-common.o -- cgit v1.2.3 From 909597fa01f28025d601090b12a028eac71af946 Mon Sep 17 00:00:00 2001 From: Jonathan Corbet Date: Wed, 13 Aug 2025 10:50:20 -0600 Subject: docs: Move the "features" tools to tools/docs The scripts for managing the features docs are found in three different directories; unite them all under tools/docs and update references as needed. Reviewed-by: Mauro Carvalho Chehab Acked-by: Jani Nikula Signed-off-by: Jonathan Corbet --- scripts/get_feat.pl | 641 ---------------------------------------------------- 1 file changed, 641 deletions(-) delete mode 100755 scripts/get_feat.pl (limited to 'scripts') diff --git a/scripts/get_feat.pl b/scripts/get_feat.pl deleted file mode 100755 index 40fb28c8424e..000000000000 --- a/scripts/get_feat.pl +++ /dev/null @@ -1,641 +0,0 @@ -#!/usr/bin/env perl -# SPDX-License-Identifier: GPL-2.0 - -use strict; -use Pod::Usage; -use Getopt::Long; -use File::Find; -use Fcntl ':mode'; -use Cwd 'abs_path'; - -my $help; -my $man; -my $debug; -my $arch; -my $feat; -my $enable_fname; - -my $basename = abs_path($0); -$basename =~ s,/[^/]+$,/,; - -my $prefix=$basename . "../Documentation/features"; - -# Used only at for full features output. The script will auto-adjust -# such values for the minimal possible values -my $status_size = 1; -my $description_size = 1; - -GetOptions( - "debug|d+" => \$debug, - "dir=s" => \$prefix, - 'help|?' => \$help, - 'arch=s' => \$arch, - 'feat=s' => \$feat, - 'feature=s' => \$feat, - "enable-fname" => \$enable_fname, - man => \$man -) or pod2usage(2); - -pod2usage(1) if $help; -pod2usage(-exitstatus => 0, -verbose => 2) if $man; - -pod2usage(1) if (scalar @ARGV < 1 || @ARGV > 2); - -my ($cmd, $arg) = @ARGV; - -pod2usage(2) if ($cmd ne "current" && $cmd ne "rest" && $cmd ne "validate" - && $cmd ne "ls" && $cmd ne "list"); - -require Data::Dumper if ($debug); - -my %data; -my %archs; - -# -# Displays an error message, printing file name and line -# -sub parse_error($$$$) { - my ($file, $ln, $msg, $data) = @_; - - $data =~ s/\s+$/\n/; - - print STDERR "Warning: file $file#$ln:\n\t$msg"; - - if ($data ne "") { - print STDERR ". Line\n\t\t$data"; - } else { - print STDERR "\n"; - } -} - -# -# Parse a features file, storing its contents at %data -# - -my $h_name = "Feature"; -my $h_kconfig = "Kconfig"; -my $h_description = "Description"; -my $h_subsys = "Subsystem"; -my $h_status = "Status"; -my $h_arch = "Architecture"; - -my $max_size_name = length($h_name); -my $max_size_kconfig = length($h_kconfig); -my $max_size_description = length($h_description); -my $max_size_subsys = length($h_subsys); -my $max_size_status = length($h_status); - -my $max_size_arch = 0; -my $max_size_arch_with_header; -my $max_description_word = 0; - -sub parse_feat { - my $file = $File::Find::name; - - my $mode = (stat($file))[2]; - return if ($mode & S_IFDIR); - return if ($file =~ m,($prefix)/arch-support.txt,); - return if (!($file =~ m,arch-support.txt$,)); - - if ($enable_fname) { - printf ".. FILE %s\n", abs_path($file); - } - - my $subsys = ""; - $subsys = $2 if ( m,.*($prefix)/([^/]+).*,); - - if (length($subsys) > $max_size_subsys) { - $max_size_subsys = length($subsys); - } - - my $name; - my $kconfig; - my $description; - my $comments = ""; - my $last_status; - my $ln; - my %arch_table; - - print STDERR "Opening $file\n" if ($debug > 1); - open IN, $file; - - while() { - $ln++; - - if (m/^\#\s+Feature\s+name:\s*(.*\S)/) { - $name = $1; - if (length($name) > $max_size_name) { - $max_size_name = length($name); - } - next; - } - if (m/^\#\s+Kconfig:\s*(.*\S)/) { - $kconfig = $1; - if (length($kconfig) > $max_size_kconfig) { - $max_size_kconfig = length($kconfig); - } - next; - } - if (m/^\#\s+description:\s*(.*\S)/) { - $description = $1; - if (length($description) > $max_size_description) { - $max_size_description = length($description); - } - - foreach my $word (split /\s+/, $description) { - if (length($word) > $max_description_word) { - $max_description_word = length($word); - } - } - - next; - } - next if (m/^\\s*$/); - next if (m/^\s*\-+\s*$/); - next if (m/^\s*\|\s*arch\s*\|\s*status\s*\|\s*$/); - - if (m/^\#\s*(.*)/) { - $comments .= "$1\n"; - next; - } - if (m/^\s*\|\s*(\S+):\s*\|\s*(\S+)\s*\|\s*$/) { - my $a = $1; - my $status = $2; - - if (length($status) > $max_size_status) { - $max_size_status = length($status); - } - if (length($a) > $max_size_arch) { - $max_size_arch = length($a); - } - - $status = "---" if ($status =~ m/^\.\.$/); - - $archs{$a} = 1; - $arch_table{$a} = $status; - next; - } - - #Everything else is an error - parse_error($file, $ln, "line is invalid", $_); - } - close IN; - - if (!$name) { - parse_error($file, $ln, "Feature name not found", ""); - return; - } - - parse_error($file, $ln, "Subsystem not found", "") if (!$subsys); - parse_error($file, $ln, "Kconfig not found", "") if (!$kconfig); - parse_error($file, $ln, "Description not found", "") if (!$description); - - if (!%arch_table) { - parse_error($file, $ln, "Architecture table not found", ""); - return; - } - - $data{$name}->{where} = $file; - $data{$name}->{subsys} = $subsys; - $data{$name}->{kconfig} = $kconfig; - $data{$name}->{description} = $description; - $data{$name}->{comments} = $comments; - $data{$name}->{table} = \%arch_table; - - $max_size_arch_with_header = $max_size_arch + length($h_arch); -} - -# -# Output feature(s) for a given architecture -# -sub output_arch_table { - my $title = "Feature status on $arch architecture"; - - print "=" x length($title) . "\n"; - print "$title\n"; - print "=" x length($title) . "\n\n"; - - print "=" x $max_size_subsys; - print " "; - print "=" x $max_size_name; - print " "; - print "=" x $max_size_kconfig; - print " "; - print "=" x $max_size_status; - print " "; - print "=" x $max_size_description; - print "\n"; - printf "%-${max_size_subsys}s ", $h_subsys; - printf "%-${max_size_name}s ", $h_name; - printf "%-${max_size_kconfig}s ", $h_kconfig; - printf "%-${max_size_status}s ", $h_status; - printf "%-${max_size_description}s\n", $h_description; - print "=" x $max_size_subsys; - print " "; - print "=" x $max_size_name; - print " "; - print "=" x $max_size_kconfig; - print " "; - print "=" x $max_size_status; - print " "; - print "=" x $max_size_description; - print "\n"; - - foreach my $name (sort { - ($data{$a}->{subsys} cmp $data{$b}->{subsys}) || - ("\L$a" cmp "\L$b") - } keys %data) { - next if ($feat && $name ne $feat); - - my %arch_table = %{$data{$name}->{table}}; - printf "%-${max_size_subsys}s ", $data{$name}->{subsys}; - printf "%-${max_size_name}s ", $name; - printf "%-${max_size_kconfig}s ", $data{$name}->{kconfig}; - printf "%-${max_size_status}s ", $arch_table{$arch}; - printf "%-s\n", $data{$name}->{description}; - } - - print "=" x $max_size_subsys; - print " "; - print "=" x $max_size_name; - print " "; - print "=" x $max_size_kconfig; - print " "; - print "=" x $max_size_status; - print " "; - print "=" x $max_size_description; - print "\n"; -} - -# -# list feature(s) for a given architecture -# -sub list_arch_features { - print "#\n# Kernel feature support matrix of the '$arch' architecture:\n#\n"; - - foreach my $name (sort { - ($data{$a}->{subsys} cmp $data{$b}->{subsys}) || - ("\L$a" cmp "\L$b") - } keys %data) { - next if ($feat && $name ne $feat); - - my %arch_table = %{$data{$name}->{table}}; - - my $status = $arch_table{$arch}; - $status = " " x ((4 - length($status)) / 2) . $status; - - printf " %${max_size_subsys}s/ ", $data{$name}->{subsys}; - printf "%-${max_size_name}s: ", $name; - printf "%-5s| ", $status; - printf "%${max_size_kconfig}s # ", $data{$name}->{kconfig}; - printf " %s\n", $data{$name}->{description}; - } -} - -# -# Output a feature on all architectures -# -sub output_feature { - my $title = "Feature $feat"; - - print "=" x length($title) . "\n"; - print "$title\n"; - print "=" x length($title) . "\n\n"; - - print ":Subsystem: $data{$feat}->{subsys} \n" if ($data{$feat}->{subsys}); - print ":Kconfig: $data{$feat}->{kconfig} \n" if ($data{$feat}->{kconfig}); - - my $desc = $data{$feat}->{description}; - $desc =~ s/^([a-z])/\U$1/; - $desc =~ s/\.?\s*//; - print "\n$desc.\n\n"; - - my $com = $data{$feat}->{comments}; - $com =~ s/^\s+//; - $com =~ s/\s+$//; - if ($com) { - print "Comments\n"; - print "--------\n\n"; - print "$com\n\n"; - } - - print "=" x $max_size_arch_with_header; - print " "; - print "=" x $max_size_status; - print "\n"; - - printf "%-${max_size_arch}s ", $h_arch; - printf "%-${max_size_status}s", $h_status . "\n"; - - print "=" x $max_size_arch_with_header; - print " "; - print "=" x $max_size_status; - print "\n"; - - my %arch_table = %{$data{$feat}->{table}}; - foreach my $arch (sort keys %arch_table) { - printf "%-${max_size_arch}s ", $arch; - printf "%-${max_size_status}s\n", $arch_table{$arch}; - } - - print "=" x $max_size_arch_with_header; - print " "; - print "=" x $max_size_status; - print "\n"; -} - -# -# Output all features for all architectures -# - -sub matrix_lines($$$) { - my $desc_size = shift; - my $status_size = shift; - my $header = shift; - my $fill; - my $ln_marker; - - if ($header) { - $ln_marker = "="; - } else { - $ln_marker = "-"; - } - - $fill = $ln_marker; - - print "+"; - print $fill x $max_size_name; - print "+"; - print $fill x $desc_size; - print "+"; - print $ln_marker x $status_size; - print "+\n"; -} - -sub output_matrix { - my $title = "Feature status on all architectures"; - my $notcompat = "Not compatible"; - - print "=" x length($title) . "\n"; - print "$title\n"; - print "=" x length($title) . "\n\n"; - - my $desc_title = "$h_kconfig / $h_description"; - - my $desc_size = $max_size_kconfig + 4; - if (!$description_size) { - $desc_size = $max_size_description if ($max_size_description > $desc_size); - } else { - $desc_size = $description_size if ($description_size > $desc_size); - } - $desc_size = $max_description_word if ($max_description_word > $desc_size); - - $desc_size = length($desc_title) if (length($desc_title) > $desc_size); - - $max_size_status = length($notcompat) if (length($notcompat) > $max_size_status); - - # Ensure that the status will fit - my $min_status_size = $max_size_status + $max_size_arch + 6; - $status_size = $min_status_size if ($status_size < $min_status_size); - - - my $cur_subsys = ""; - foreach my $name (sort { - ($data{$a}->{subsys} cmp $data{$b}->{subsys}) or - ("\L$a" cmp "\L$b") - } keys %data) { - - if ($cur_subsys ne $data{$name}->{subsys}) { - if ($cur_subsys ne "") { - printf "\n"; - } - - $cur_subsys = $data{$name}->{subsys}; - - my $title = "Subsystem: $cur_subsys"; - print "$title\n"; - print "=" x length($title) . "\n\n"; - - - matrix_lines($desc_size, $status_size, 0); - - printf "|%-${max_size_name}s", $h_name; - printf "|%-${desc_size}s", $desc_title; - - printf "|%-${status_size}s|\n", "Status per architecture"; - matrix_lines($desc_size, $status_size, 1); - } - - my %arch_table = %{$data{$name}->{table}}; - my $cur_status = ""; - - my (@lines, @descs); - my $line = ""; - foreach my $arch (sort { - ($arch_table{$b} cmp $arch_table{$a}) or - ("\L$a" cmp "\L$b") - } keys %arch_table) { - - my $status = $arch_table{$arch}; - - if ($status eq "---") { - $status = $notcompat; - } - - if ($status ne $cur_status) { - if ($line ne "") { - push @lines, $line; - $line = ""; - } - $line = "- **" . $status . "**: " . $arch; - } elsif (length($line) + length ($arch) + 2 < $status_size) { - $line .= ", " . $arch; - } else { - push @lines, $line; - $line = " " . $arch; - } - $cur_status = $status; - } - push @lines, $line if ($line ne ""); - - my $description = $data{$name}->{description}; - while (length($description) > $desc_size) { - my $d = substr $description, 0, $desc_size; - - # Ensure that it will end on a space - # if it can't, it means that the size is too small - # Instead of aborting it, let's print what we have - if (!($d =~ s/^(.*)\s+.*/$1/)) { - $d = substr $d, 0, -1; - push @descs, "$d\\"; - $description =~ s/^\Q$d\E//; - } else { - push @descs, $d; - $description =~ s/^\Q$d\E\s+//; - } - } - push @descs, $description; - - # Ensure that the full description will be printed - push @lines, "" while (scalar(@lines) < 2 + scalar(@descs)); - - my $ln = 0; - for my $line(@lines) { - if (!$ln) { - printf "|%-${max_size_name}s", $name; - printf "|%-${desc_size}s", "``" . $data{$name}->{kconfig} . "``"; - } elsif ($ln >= 2 && scalar(@descs)) { - printf "|%-${max_size_name}s", ""; - printf "|%-${desc_size}s", shift @descs; - } else { - printf "|%-${max_size_name}s", ""; - printf "|%-${desc_size}s", ""; - } - - printf "|%-${status_size}s|\n", $line; - - $ln++; - } - matrix_lines($desc_size, $status_size, 0); - } -} - - -# -# Parses all feature files located at $prefix dir -# -find({wanted =>\&parse_feat, no_chdir => 1}, $prefix); - -print STDERR Data::Dumper->Dump([\%data], [qw(*data)]) if ($debug); - -# -# Handles the command -# -if ($cmd eq "current") { - $arch = qx(uname -m | sed 's/x86_64/x86/' | sed 's/i386/x86/' | sed 's/s390x/s390/'); - $arch =~s/\s+$//; -} - -if ($cmd eq "ls" or $cmd eq "list") { - if (!$arch) { - $arch = qx(uname -m | sed 's/x86_64/x86/' | sed 's/i386/x86/' | sed 's/s390x/s390/'); - $arch =~s/\s+$//; - } - - list_arch_features; - - exit; -} - -if ($cmd ne "validate") { - if ($arch) { - output_arch_table; - } elsif ($feat) { - output_feature; - } else { - output_matrix; - } -} - -__END__ - -=head1 NAME - -get_feat.pl - parse the Linux Feature files and produce a ReST book. - -=head1 SYNOPSIS - -B [--debug] [--man] [--help] [--dir=] [--arch=] - [--feature=|--feat=] [] - -Where can be: - -=over 8 - -B - output table in ReST compatible ASCII format - with features for this machine's architecture - -B - output table(s) in ReST compatible ASCII format - with features in ReST markup language. The output - is affected by --arch or --feat/--feature flags. - -B - validate the contents of the files under - Documentation/features. - -B or B - list features for this machine's architecture, - using an easier to parse format. - The output is affected by --arch flag. - -=back - -=head1 OPTIONS - -=over 8 - -=item B<--arch> - -Output features for an specific architecture, optionally filtering for -a single specific feature. - -=item B<--feat> or B<--feature> - -Output features for a single specific feature. - -=item B<--dir> - -Changes the location of the Feature files. By default, it uses -the Documentation/features directory. - -=item B<--enable-fname> - -Prints the file name of the feature files. This can be used in order to -track dependencies during documentation build. - -=item B<--debug> - -Put the script in verbose mode, useful for debugging. Can be called multiple -times, to increase verbosity. - -=item B<--help> - -Prints a brief help message and exits. - -=item B<--man> - -Prints the manual page and exits. - -=back - -=head1 DESCRIPTION - -Parse the Linux feature files from Documentation/features (by default), -optionally producing results at ReST format. - -It supports output data per architecture, per feature or a -feature x arch matrix. - -When used with B command, it will use either one of the tree formats: - -If neither B<--arch> or B<--feature> arguments are used, it will output a -matrix with features per architecture. - -If B<--arch> argument is used, it will output the features availability for -a given architecture. - -If B<--feat> argument is used, it will output the content of the feature -file using ReStructured Text markup. - -=head1 BUGS - -Report bugs to Mauro Carvalho Chehab - -=head1 COPYRIGHT - -Copyright (c) 2019 by Mauro Carvalho Chehab . - -License GPLv2: GNU GPL version 2 . - -This is free software: you are free to change and redistribute it. -There is NO WARRANTY, to the extent permitted by law. - -=cut -- cgit v1.2.3 From d37366cac4ccfb71c77e9620f63e3a6fcdf3816c Mon Sep 17 00:00:00 2001 From: Jonathan Corbet Date: Wed, 13 Aug 2025 11:03:02 -0600 Subject: docs: move checktransupdate.py to tools/docs The checktranslate.py tool currently languishes in scripts/; move it to tools/docs and update references accordingly. Cc: Alex Shi Cc: Yanteng Si Cc: Dongliang Mu Reviewed-by: Mauro Carvalho Chehab Acked-by: Jani Nikula Signed-off-by: Jonathan Corbet --- scripts/checktransupdate.py | 307 -------------------------------------------- 1 file changed, 307 deletions(-) delete mode 100755 scripts/checktransupdate.py (limited to 'scripts') diff --git a/scripts/checktransupdate.py b/scripts/checktransupdate.py deleted file mode 100755 index e39529e46c3d..000000000000 --- a/scripts/checktransupdate.py +++ /dev/null @@ -1,307 +0,0 @@ -#!/usr/bin/env python3 -# SPDX-License-Identifier: GPL-2.0 - -""" -This script helps track the translation status of the documentation -in different locales, e.g., zh_CN. More specially, it uses `git log` -commit to find the latest english commit from the translation commit -(order by author date) and the latest english commits from HEAD. If -differences occur, report the file and commits that need to be updated. - -The usage is as follows: -- ./scripts/checktransupdate.py -l zh_CN -This will print all the files that need to be updated or translated in the zh_CN locale. -- ./scripts/checktransupdate.py Documentation/translations/zh_CN/dev-tools/testing-overview.rst -This will only print the status of the specified file. - -The output is something like: -Documentation/dev-tools/kfence.rst -No translation in the locale of zh_CN - -Documentation/translations/zh_CN/dev-tools/testing-overview.rst -commit 42fb9cfd5b18 ("Documentation: dev-tools: Add link to RV docs") -1 commits needs resolving in total -""" - -import os -import re -import time -import logging -from argparse import ArgumentParser, ArgumentTypeError, BooleanOptionalAction -from datetime import datetime - - -def get_origin_path(file_path): - """Get the origin path from the translation path""" - paths = file_path.split("/") - tidx = paths.index("translations") - opaths = paths[:tidx] - opaths += paths[tidx + 2 :] - return "/".join(opaths) - - -def get_latest_commit_from(file_path, commit): - """Get the latest commit from the specified commit for the specified file""" - command = f"git log --pretty=format:%H%n%aD%n%cD%n%n%B {commit} -1 -- {file_path}" - logging.debug(command) - pipe = os.popen(command) - result = pipe.read() - result = result.split("\n") - if len(result) <= 1: - return None - - logging.debug("Result: %s", result[0]) - - return { - "hash": result[0], - "author_date": datetime.strptime(result[1], "%a, %d %b %Y %H:%M:%S %z"), - "commit_date": datetime.strptime(result[2], "%a, %d %b %Y %H:%M:%S %z"), - "message": result[4:], - } - - -def get_origin_from_trans(origin_path, t_from_head): - """Get the latest origin commit from the translation commit""" - o_from_t = get_latest_commit_from(origin_path, t_from_head["hash"]) - while o_from_t is not None and o_from_t["author_date"] > t_from_head["author_date"]: - o_from_t = get_latest_commit_from(origin_path, o_from_t["hash"] + "^") - if o_from_t is not None: - logging.debug("tracked origin commit id: %s", o_from_t["hash"]) - return o_from_t - - -def get_origin_from_trans_smartly(origin_path, t_from_head): - """Get the latest origin commit from the formatted translation commit: - (1) update to commit HASH (TITLE) - (2) Update the translation through commit HASH (TITLE) - """ - # catch flag for 12-bit commit hash - HASH = r'([0-9a-f]{12})' - # pattern 1: contains "update to commit HASH" - pat_update_to = re.compile(rf'update to commit {HASH}') - # pattern 2: contains "Update the translation through commit HASH" - pat_update_translation = re.compile(rf'Update the translation through commit {HASH}') - - origin_commit_hash = None - for line in t_from_head["message"]: - # check if the line matches the first pattern - match = pat_update_to.search(line) - if match: - origin_commit_hash = match.group(1) - break - # check if the line matches the second pattern - match = pat_update_translation.search(line) - if match: - origin_commit_hash = match.group(1) - break - if origin_commit_hash is None: - return None - o_from_t = get_latest_commit_from(origin_path, origin_commit_hash) - if o_from_t is not None: - logging.debug("tracked origin commit id: %s", o_from_t["hash"]) - return o_from_t - - -def get_commits_count_between(opath, commit1, commit2): - """Get the commits count between two commits for the specified file""" - command = f"git log --pretty=format:%H {commit1}...{commit2} -- {opath}" - logging.debug(command) - pipe = os.popen(command) - result = pipe.read().split("\n") - # filter out empty lines - result = list(filter(lambda x: x != "", result)) - return result - - -def pretty_output(commit): - """Pretty print the commit message""" - command = f"git log --pretty='format:%h (\"%s\")' -1 {commit}" - logging.debug(command) - pipe = os.popen(command) - return pipe.read() - - -def valid_commit(commit): - """Check if the commit is valid or not""" - msg = pretty_output(commit) - return "Merge tag" not in msg - -def check_per_file(file_path): - """Check the translation status for the specified file""" - opath = get_origin_path(file_path) - - if not os.path.isfile(opath): - logging.error("Cannot find the origin path for {file_path}") - return - - o_from_head = get_latest_commit_from(opath, "HEAD") - t_from_head = get_latest_commit_from(file_path, "HEAD") - - if o_from_head is None or t_from_head is None: - logging.error("Cannot find the latest commit for %s", file_path) - return - - o_from_t = get_origin_from_trans_smartly(opath, t_from_head) - # notice, o_from_t from get_*_smartly() is always more accurate than from get_*() - if o_from_t is None: - o_from_t = get_origin_from_trans(opath, t_from_head) - - if o_from_t is None: - logging.error("Error: Cannot find the latest origin commit for %s", file_path) - return - - if o_from_head["hash"] == o_from_t["hash"]: - logging.debug("No update needed for %s", file_path) - else: - logging.info(file_path) - commits = get_commits_count_between( - opath, o_from_t["hash"], o_from_head["hash"] - ) - count = 0 - for commit in commits: - if valid_commit(commit): - logging.info("commit %s", pretty_output(commit)) - count += 1 - logging.info("%d commits needs resolving in total\n", count) - - -def valid_locales(locale): - """Check if the locale is valid or not""" - script_path = os.path.dirname(os.path.abspath(__file__)) - linux_path = os.path.join(script_path, "..") - if not os.path.isdir(f"{linux_path}/Documentation/translations/{locale}"): - raise ArgumentTypeError("Invalid locale: {locale}") - return locale - - -def list_files_with_excluding_folders(folder, exclude_folders, include_suffix): - """List all files with the specified suffix in the folder and its subfolders""" - files = [] - stack = [folder] - - while stack: - pwd = stack.pop() - # filter out the exclude folders - if os.path.basename(pwd) in exclude_folders: - continue - # list all files and folders - for item in os.listdir(pwd): - ab_item = os.path.join(pwd, item) - if os.path.isdir(ab_item): - stack.append(ab_item) - else: - if ab_item.endswith(include_suffix): - files.append(ab_item) - - return files - - -class DmesgFormatter(logging.Formatter): - """Custom dmesg logging formatter""" - def format(self, record): - timestamp = time.time() - formatted_time = f"[{timestamp:>10.6f}]" - log_message = f"{formatted_time} {record.getMessage()}" - return log_message - - -def config_logging(log_level, log_file="checktransupdate.log"): - """configure logging based on the log level""" - # set up the root logger - logger = logging.getLogger() - logger.setLevel(log_level) - - # Create console handler - console_handler = logging.StreamHandler() - console_handler.setLevel(log_level) - - # Create file handler - file_handler = logging.FileHandler(log_file) - file_handler.setLevel(log_level) - - # Create formatter and add it to the handlers - formatter = DmesgFormatter() - console_handler.setFormatter(formatter) - file_handler.setFormatter(formatter) - - # Add the handler to the logger - logger.addHandler(console_handler) - logger.addHandler(file_handler) - - -def main(): - """Main function of the script""" - script_path = os.path.dirname(os.path.abspath(__file__)) - linux_path = os.path.join(script_path, "..") - - parser = ArgumentParser(description="Check the translation update") - parser.add_argument( - "-l", - "--locale", - default="zh_CN", - type=valid_locales, - help="Locale to check when files are not specified", - ) - - parser.add_argument( - "--print-missing-translations", - action=BooleanOptionalAction, - default=True, - help="Print files that do not have translations", - ) - - parser.add_argument( - '--log', - default='INFO', - choices=['DEBUG', 'INFO', 'WARNING', 'ERROR', 'CRITICAL'], - help='Set the logging level') - - parser.add_argument( - '--logfile', - default='checktransupdate.log', - help='Set the logging file (default: checktransupdate.log)') - - parser.add_argument( - "files", nargs="*", help="Files to check, if not specified, check all files" - ) - args = parser.parse_args() - - # Configure logging based on the --log argument - log_level = getattr(logging, args.log.upper(), logging.INFO) - config_logging(log_level) - - # Get files related to linux path - files = args.files - if len(files) == 0: - offical_files = list_files_with_excluding_folders( - os.path.join(linux_path, "Documentation"), ["translations", "output"], "rst" - ) - - for file in offical_files: - # split the path into parts - path_parts = file.split(os.sep) - # find the index of the "Documentation" directory - kindex = path_parts.index("Documentation") - # insert the translations and locale after the Documentation directory - new_path_parts = path_parts[:kindex + 1] + ["translations", args.locale] \ - + path_parts[kindex + 1 :] - # join the path parts back together - new_file = os.sep.join(new_path_parts) - if os.path.isfile(new_file): - files.append(new_file) - else: - if args.print_missing_translations: - logging.info(os.path.relpath(os.path.abspath(file), linux_path)) - logging.info("No translation in the locale of %s\n", args.locale) - - files = list(map(lambda x: os.path.relpath(os.path.abspath(x), linux_path), files)) - - # cd to linux root directory - os.chdir(linux_path) - - for file in files: - check_per_file(file) - - -if __name__ == "__main__": - main() -- cgit v1.2.3 From eaae0ad9720428cd9e2bf9a40fedf137db95184f Mon Sep 17 00:00:00 2001 From: Jonathan Corbet Date: Wed, 13 Aug 2025 11:16:28 -0600 Subject: docs: move scripts/documentation-file-ref-check to tools/docs Add this script to the growing collection of documentation tools. Reviewed-by: Mauro Carvalho Chehab Acked-by: Jani Nikula Signed-off-by: Jonathan Corbet --- scripts/documentation-file-ref-check | 245 ----------------------------------- 1 file changed, 245 deletions(-) delete mode 100755 scripts/documentation-file-ref-check (limited to 'scripts') diff --git a/scripts/documentation-file-ref-check b/scripts/documentation-file-ref-check deleted file mode 100755 index 408b1dbe7884..000000000000 --- a/scripts/documentation-file-ref-check +++ /dev/null @@ -1,245 +0,0 @@ -#!/usr/bin/env perl -# SPDX-License-Identifier: GPL-2.0 -# -# Treewide grep for references to files under Documentation, and report -# non-existing files in stderr. - -use warnings; -use strict; -use Getopt::Long qw(:config no_auto_abbrev); - -# NOTE: only add things here when the file was gone, but the text wants -# to mention a past documentation file, for example, to give credits for -# the original work. -my %false_positives = ( - "Documentation/scsi/scsi_mid_low_api.rst" => "Documentation/Configure.help", - "drivers/vhost/vhost.c" => "Documentation/virtual/lguest/lguest.c", -); - -my $scriptname = $0; -$scriptname =~ s,.*/([^/]+/),$1,; - -# Parse arguments -my $help = 0; -my $fix = 0; -my $warn = 0; - -if (! -e ".git") { - printf "Warning: can't check if file exists, as this is not a git tree\n"; - exit 0; -} - -GetOptions( - 'fix' => \$fix, - 'warn' => \$warn, - 'h|help|usage' => \$help, -); - -if ($help != 0) { - print "$scriptname [--help] [--fix]\n"; - exit -1; -} - -# Step 1: find broken references -print "Finding broken references. This may take a while... " if ($fix); - -my %broken_ref; - -my $doc_fix = 0; - -open IN, "git grep ':doc:\`' Documentation/|" - or die "Failed to run git grep"; -while () { - next if (!m,^([^:]+):.*\:doc\:\`([^\`]+)\`,); - next if (m,sphinx/,); - - my $file = $1; - my $d = $1; - my $doc_ref = $2; - - my $f = $doc_ref; - - $d =~ s,(.*/).*,$1,; - $f =~ s,.*\<([^\>]+)\>,$1,; - - if ($f =~ m,^/,) { - $f = "$f.rst"; - $f =~ s,^/,Documentation/,; - } else { - $f = "$d$f.rst"; - } - - next if (grep -e, glob("$f")); - - if ($fix && !$doc_fix) { - print STDERR "\nWARNING: Currently, can't fix broken :doc:`` fields\n"; - } - $doc_fix++; - - print STDERR "$file: :doc:`$doc_ref`\n"; -} -close IN; - -open IN, "git grep 'Documentation/'|" - or die "Failed to run git grep"; -while () { - next if (!m/^([^:]+):(.*)/); - - my $f = $1; - my $ln = $2; - - # On linux-next, discard the Next/ directory - next if ($f =~ m,^Next/,); - - # Makefiles and scripts contain nasty expressions to parse docs - next if ($f =~ m/Makefile/ || $f =~ m/\.(sh|py|pl|~|rej|org|orig)$/); - - # It doesn't make sense to parse hidden files - next if ($f =~ m#/\.#); - - # Skip this script - next if ($f eq $scriptname); - - # Ignore the dir where documentation will be built - next if ($ln =~ m,\b(\S*)Documentation/output,); - - if ($ln =~ m,\b(\S*)(Documentation/[A-Za-z0-9\_\.\,\~/\*\[\]\?+-]*)(.*),) { - my $prefix = $1; - my $ref = $2; - my $base = $2; - my $extra = $3; - - # some file references are like: - # /usr/src/linux/Documentation/DMA-{API,mapping}.txt - # For now, ignore them - next if ($extra =~ m/^{/); - - # Remove footnotes at the end like: - # Documentation/devicetree/dt-object-internal.txt[1] - $ref =~ s/(txt|rst)\[\d+]$/$1/; - - # Remove ending ']' without any '[' - $ref =~ s/\].*// if (!($ref =~ m/\[/)); - - # Remove puntuation marks at the end - $ref =~ s/[\,\.]+$//; - - my $fulref = "$prefix$ref"; - - $fulref =~ s/^(\ 1) { - print STDERR "WARNING: Won't auto-replace, as found multiple files close to $ref:\n"; - foreach my $j (@find) { - $j =~ s,^./,,; - print STDERR " $j\n"; - } - } else { - $f = $find[0]; - $f =~ s,^./,,; - print "INFO: Replacing $ref to $f\n"; - foreach my $j (qx(git grep -l $ref)) { - qx(sed "s\@$ref\@$f\@g" -i $j); - } - } -} -- cgit v1.2.3 From a5dd93016f20912ec141d569b897e1fc2d94977d Mon Sep 17 00:00:00 2001 From: Jonathan Corbet Date: Wed, 13 Aug 2025 11:33:09 -0600 Subject: docs: move get_abi.py to tools/docs Move this tool out of scripts/ to join the other documentation tools; fix up a couple of erroneous references in the process. It's worth noting that this script will fail badly unless one has a PYTHONPATH referencing scripts/lib/abi. Reviewed-by: Mauro Carvalho Chehab Acked-by: Jani Nikula Signed-off-by: Jonathan Corbet --- scripts/get_abi.py | 214 ----------------------------------------------------- 1 file changed, 214 deletions(-) delete mode 100755 scripts/get_abi.py (limited to 'scripts') diff --git a/scripts/get_abi.py b/scripts/get_abi.py deleted file mode 100755 index 7ce4748a46d2..000000000000 --- a/scripts/get_abi.py +++ /dev/null @@ -1,214 +0,0 @@ -#!/usr/bin/env python3 -# pylint: disable=R0903 -# Copyright(c) 2025: Mauro Carvalho Chehab . -# SPDX-License-Identifier: GPL-2.0 - -""" -Parse ABI documentation and produce results from it. -""" - -import argparse -import logging -import os -import sys - -# Import Python modules - -LIB_DIR = "lib/abi" -SRC_DIR = os.path.dirname(os.path.realpath(__file__)) - -sys.path.insert(0, os.path.join(SRC_DIR, LIB_DIR)) - -from abi_parser import AbiParser # pylint: disable=C0413 -from abi_regex import AbiRegex # pylint: disable=C0413 -from helpers import ABI_DIR, DEBUG_HELP # pylint: disable=C0413 -from system_symbols import SystemSymbols # pylint: disable=C0413 - -# Command line classes - - -REST_DESC = """ -Produce output in ReST format. - -The output is done on two sections: - -- Symbols: show all parsed symbols in alphabetic order; -- Files: cross reference the content of each file with the symbols on it. -""" - -class AbiRest: - """Initialize an argparse subparser for rest output""" - - def __init__(self, subparsers): - """Initialize argparse subparsers""" - - parser = subparsers.add_parser("rest", - formatter_class=argparse.RawTextHelpFormatter, - description=REST_DESC) - - parser.add_argument("--enable-lineno", action="store_true", - help="enable lineno") - parser.add_argument("--raw", action="store_true", - help="output text as contained in the ABI files. " - "It not used, output will contain dynamically" - " generated cross references when possible.") - parser.add_argument("--no-file", action="store_true", - help="Don't the files section") - parser.add_argument("--show-hints", help="Show-hints") - - parser.set_defaults(func=self.run) - - def run(self, args): - """Run subparser""" - - parser = AbiParser(args.dir, debug=args.debug) - parser.parse_abi() - parser.check_issues() - - for t in parser.doc(args.raw, not args.no_file): - if args.enable_lineno: - print (f".. LINENO {t[1]}#{t[2]}\n\n") - - print(t[0]) - -class AbiValidate: - """Initialize an argparse subparser for ABI validation""" - - def __init__(self, subparsers): - """Initialize argparse subparsers""" - - parser = subparsers.add_parser("validate", - formatter_class=argparse.ArgumentDefaultsHelpFormatter, - description="list events") - - parser.set_defaults(func=self.run) - - def run(self, args): - """Run subparser""" - - parser = AbiParser(args.dir, debug=args.debug) - parser.parse_abi() - parser.check_issues() - - -class AbiSearch: - """Initialize an argparse subparser for ABI search""" - - def __init__(self, subparsers): - """Initialize argparse subparsers""" - - parser = subparsers.add_parser("search", - formatter_class=argparse.ArgumentDefaultsHelpFormatter, - description="Search ABI using a regular expression") - - parser.add_argument("expression", - help="Case-insensitive search pattern for the ABI symbol") - - parser.set_defaults(func=self.run) - - def run(self, args): - """Run subparser""" - - parser = AbiParser(args.dir, debug=args.debug) - parser.parse_abi() - parser.search_symbols(args.expression) - -UNDEFINED_DESC=""" -Check undefined ABIs on local machine. - -Read sysfs devnodes and check if the devnodes there are defined inside -ABI documentation. - -The search logic tries to minimize the number of regular expressions to -search per each symbol. - -By default, it runs on a single CPU, as Python support for CPU threads -is still experimental, and multi-process runs on Python is very slow. - -On experimental tests, if the number of ABI symbols to search per devnode -is contained on a limit of ~150 regular expressions, using a single CPU -is a lot faster than using multiple processes. However, if the number of -regular expressions to check is at the order of ~30000, using multiple -CPUs speeds up the check. -""" - -class AbiUndefined: - """ - Initialize an argparse subparser for logic to check undefined ABI at - the current machine's sysfs - """ - - def __init__(self, subparsers): - """Initialize argparse subparsers""" - - parser = subparsers.add_parser("undefined", - formatter_class=argparse.RawTextHelpFormatter, - description=UNDEFINED_DESC) - - parser.add_argument("-S", "--sysfs-dir", default="/sys", - help="directory where sysfs is mounted") - parser.add_argument("-s", "--search-string", - help="search string regular expression to limit symbol search") - parser.add_argument("-H", "--show-hints", action="store_true", - help="Hints about definitions for missing ABI symbols.") - parser.add_argument("-j", "--jobs", "--max-workers", type=int, default=1, - help="If bigger than one, enables multiprocessing.") - parser.add_argument("-c", "--max-chunk-size", type=int, default=50, - help="Maximum number of chunk size") - parser.add_argument("-f", "--found", action="store_true", - help="Also show found items. " - "Helpful to debug the parser."), - parser.add_argument("-d", "--dry-run", action="store_true", - help="Don't actually search for undefined. " - "Helpful to debug the parser."), - - parser.set_defaults(func=self.run) - - def run(self, args): - """Run subparser""" - - abi = AbiRegex(args.dir, debug=args.debug, - search_string=args.search_string) - - abi_symbols = SystemSymbols(abi=abi, hints=args.show_hints, - sysfs=args.sysfs_dir) - - abi_symbols.check_undefined_symbols(dry_run=args.dry_run, - found=args.found, - max_workers=args.jobs, - chunk_size=args.max_chunk_size) - - -def main(): - """Main program""" - - parser = argparse.ArgumentParser(formatter_class=argparse.RawTextHelpFormatter) - - parser.add_argument("-d", "--debug", type=int, default=0, help="debug level") - parser.add_argument("-D", "--dir", default=ABI_DIR, help=DEBUG_HELP) - - subparsers = parser.add_subparsers() - - AbiRest(subparsers) - AbiValidate(subparsers) - AbiSearch(subparsers) - AbiUndefined(subparsers) - - args = parser.parse_args() - - if args.debug: - level = logging.DEBUG - else: - level = logging.INFO - - logging.basicConfig(level=level, format="[%(levelname)s] %(message)s") - - if "func" in args: - args.func(args) - else: - sys.exit(f"Please specify a valid command for {sys.argv[0]}") - - -# Call main method -if __name__ == "__main__": - main() -- cgit v1.2.3 From f1c2db1f145b5c609ae651d229713e3c7422785a Mon Sep 17 00:00:00 2001 From: Jonathan Corbet Date: Wed, 13 Aug 2025 12:21:50 -0600 Subject: docs: move test_doc_build.py to tools/docs Add this tool to tools/docs. Reviewed-by: Mauro Carvalho Chehab Acked-by: Jani Nikula Signed-off-by: Jonathan Corbet --- scripts/test_doc_build.py | 513 ---------------------------------------------- 1 file changed, 513 deletions(-) delete mode 100755 scripts/test_doc_build.py (limited to 'scripts') diff --git a/scripts/test_doc_build.py b/scripts/test_doc_build.py deleted file mode 100755 index 47b4606569f9..000000000000 --- a/scripts/test_doc_build.py +++ /dev/null @@ -1,513 +0,0 @@ -#!/usr/bin/env python3 -# SPDX-License-Identifier: GPL-2.0 -# Copyright(c) 2025: Mauro Carvalho Chehab -# -# pylint: disable=R0903,R0912,R0913,R0914,R0917,C0301 - -""" -Install minimal supported requirements for different Sphinx versions -and optionally test the build. -""" - -import argparse -import asyncio -import os.path -import shutil -import sys -import time -import subprocess - -# Minimal python version supported by the building system. - -PYTHON = os.path.basename(sys.executable) - -min_python_bin = None - -for i in range(9, 13): - p = f"python3.{i}" - if shutil.which(p): - min_python_bin = p - break - -if not min_python_bin: - min_python_bin = PYTHON - -# Starting from 8.0, Python 3.9 is not supported anymore. -PYTHON_VER_CHANGES = {(8, 0, 0): PYTHON} - -DEFAULT_VERSIONS_TO_TEST = [ - (3, 4, 3), # Minimal supported version - (5, 3, 0), # CentOS Stream 9 / AlmaLinux 9 - (6, 1, 1), # Debian 12 - (7, 2, 1), # openSUSE Leap 15.6 - (7, 2, 6), # Ubuntu 24.04 LTS - (7, 4, 7), # Ubuntu 24.10 - (7, 3, 0), # openSUSE Tumbleweed - (8, 1, 3), # Fedora 42 - (8, 2, 3) # Latest version - covers rolling distros -] - -# Sphinx versions to be installed and their incremental requirements -SPHINX_REQUIREMENTS = { - # Oldest versions we support for each package required by Sphinx 3.4.3 - (3, 4, 3): { - "docutils": "0.16", - "alabaster": "0.7.12", - "babel": "2.8.0", - "certifi": "2020.6.20", - "docutils": "0.16", - "idna": "2.10", - "imagesize": "1.2.0", - "Jinja2": "2.11.2", - "MarkupSafe": "1.1.1", - "packaging": "20.4", - "Pygments": "2.6.1", - "PyYAML": "5.1", - "requests": "2.24.0", - "snowballstemmer": "2.0.0", - "sphinxcontrib-applehelp": "1.0.2", - "sphinxcontrib-devhelp": "1.0.2", - "sphinxcontrib-htmlhelp": "1.0.3", - "sphinxcontrib-jsmath": "1.0.1", - "sphinxcontrib-qthelp": "1.0.3", - "sphinxcontrib-serializinghtml": "1.1.4", - "urllib3": "1.25.9", - }, - - # Update package dependencies to a more modern base. The goal here - # is to avoid to many incremental changes for the next entries - (3, 5, 0): { - "alabaster": "0.7.13", - "babel": "2.17.0", - "certifi": "2025.6.15", - "idna": "3.10", - "imagesize": "1.4.1", - "packaging": "25.0", - "Pygments": "2.8.1", - "requests": "2.32.4", - "snowballstemmer": "3.0.1", - "sphinxcontrib-applehelp": "1.0.4", - "sphinxcontrib-htmlhelp": "2.0.1", - "sphinxcontrib-serializinghtml": "1.1.5", - "urllib3": "2.0.0", - }, - - # Starting from here, ensure all docutils versions are covered with - # supported Sphinx versions. Other packages are upgraded only when - # required by pip - (4, 0, 0): { - "PyYAML": "5.1", - }, - (4, 1, 0): { - "docutils": "0.17", - "Pygments": "2.19.1", - "Jinja2": "3.0.3", - "MarkupSafe": "2.0", - }, - (4, 3, 0): {}, - (4, 4, 0): {}, - (4, 5, 0): { - "docutils": "0.17.1", - }, - (5, 0, 0): {}, - (5, 1, 0): {}, - (5, 2, 0): { - "docutils": "0.18", - "Jinja2": "3.1.2", - "MarkupSafe": "2.0", - "PyYAML": "5.3.1", - }, - (5, 3, 0): { - "docutils": "0.18.1", - }, - (6, 0, 0): {}, - (6, 1, 0): {}, - (6, 2, 0): { - "PyYAML": "5.4.1", - }, - (7, 0, 0): {}, - (7, 1, 0): {}, - (7, 2, 0): { - "docutils": "0.19", - "PyYAML": "6.0.1", - "sphinxcontrib-serializinghtml": "1.1.9", - }, - (7, 2, 6): { - "docutils": "0.20", - }, - (7, 3, 0): { - "alabaster": "0.7.14", - "PyYAML": "6.0.1", - "tomli": "2.0.1", - }, - (7, 4, 0): { - "docutils": "0.20.1", - "PyYAML": "6.0.1", - }, - (8, 0, 0): { - "docutils": "0.21", - }, - (8, 1, 0): { - "docutils": "0.21.1", - "PyYAML": "6.0.1", - "sphinxcontrib-applehelp": "1.0.7", - "sphinxcontrib-devhelp": "1.0.6", - "sphinxcontrib-htmlhelp": "2.0.6", - "sphinxcontrib-qthelp": "1.0.6", - }, - (8, 2, 0): { - "docutils": "0.21.2", - "PyYAML": "6.0.1", - "sphinxcontrib-serializinghtml": "1.1.9", - }, -} - - -class AsyncCommands: - """Excecute command synchronously""" - - def __init__(self, fp=None): - - self.stdout = None - self.stderr = None - self.output = None - self.fp = fp - - def log(self, out, verbose, is_info=True): - out = out.removesuffix('\n') - - if verbose: - if is_info: - print(out) - else: - print(out, file=sys.stderr) - - if self.fp: - self.fp.write(out + "\n") - - async def _read(self, stream, verbose, is_info): - """Ancillary routine to capture while displaying""" - - while stream is not None: - line = await stream.readline() - if line: - out = line.decode("utf-8", errors="backslashreplace") - self.log(out, verbose, is_info) - if is_info: - self.stdout += out - else: - self.stderr += out - else: - break - - async def run(self, cmd, capture_output=False, check=False, - env=None, verbose=True): - - """ - Execute an arbitrary command, handling errors. - - Please notice that this class is not thread safe - """ - - self.stdout = "" - self.stderr = "" - - self.log("$ " + " ".join(cmd), verbose) - - proc = await asyncio.create_subprocess_exec(cmd[0], - *cmd[1:], - env=env, - stdout=asyncio.subprocess.PIPE, - stderr=asyncio.subprocess.PIPE) - - # Handle input and output in realtime - await asyncio.gather( - self._read(proc.stdout, verbose, True), - self._read(proc.stderr, verbose, False), - ) - - await proc.wait() - - if check and proc.returncode > 0: - raise subprocess.CalledProcessError(returncode=proc.returncode, - cmd=" ".join(cmd), - output=self.stdout, - stderr=self.stderr) - - if capture_output: - if proc.returncode > 0: - self.log(f"Error {proc.returncode}", verbose=True, is_info=False) - return "" - - return self.output - - ret = subprocess.CompletedProcess(args=cmd, - returncode=proc.returncode, - stdout=self.stdout, - stderr=self.stderr) - - return ret - - -class SphinxVenv: - """ - Installs Sphinx on one virtual env per Sphinx version with a minimal - set of dependencies, adjusting them to each specific version. - """ - - def __init__(self): - """Initialize instance variables""" - - self.built_time = {} - self.first_run = True - - async def _handle_version(self, args, fp, - cur_ver, cur_requirements, python_bin): - """Handle a single Sphinx version""" - - cmd = AsyncCommands(fp) - - ver = ".".join(map(str, cur_ver)) - - if not self.first_run and args.wait_input and args.build: - ret = input("Press Enter to continue or 'a' to abort: ").strip().lower() - if ret == "a": - print("Aborted.") - sys.exit() - else: - self.first_run = False - - venv_dir = f"Sphinx_{ver}" - req_file = f"requirements_{ver}.txt" - - cmd.log(f"\nSphinx {ver} with {python_bin}", verbose=True) - - # Create venv - await cmd.run([python_bin, "-m", "venv", venv_dir], - verbose=args.verbose, check=True) - pip = os.path.join(venv_dir, "bin/pip") - - # Create install list - reqs = [] - for pkg, verstr in cur_requirements.items(): - reqs.append(f"{pkg}=={verstr}") - - reqs.append(f"Sphinx=={ver}") - - await cmd.run([pip, "install"] + reqs, check=True, verbose=args.verbose) - - # Freeze environment - result = await cmd.run([pip, "freeze"], verbose=False, check=True) - - # Pip install succeeded. Write requirements file - if args.req_file: - with open(req_file, "w", encoding="utf-8") as fp: - fp.write(result.stdout) - - if args.build: - start_time = time.time() - - # Prepare a venv environment - env = os.environ.copy() - bin_dir = os.path.join(venv_dir, "bin") - env["PATH"] = bin_dir + ":" + env["PATH"] - env["VIRTUAL_ENV"] = venv_dir - if "PYTHONHOME" in env: - del env["PYTHONHOME"] - - # Test doc build - await cmd.run(["make", "cleandocs"], env=env, check=True) - make = ["make"] - - if args.output: - sphinx_build = os.path.realpath(f"{bin_dir}/sphinx-build") - make += [f"O={args.output}", f"SPHINXBUILD={sphinx_build}"] - - if args.make_args: - make += args.make_args - - make += args.targets - - if args.verbose: - cmd.log(f". {bin_dir}/activate", verbose=True) - await cmd.run(make, env=env, check=True, verbose=True) - if args.verbose: - cmd.log("deactivate", verbose=True) - - end_time = time.time() - elapsed_time = end_time - start_time - hours, minutes = divmod(elapsed_time, 3600) - minutes, seconds = divmod(minutes, 60) - - hours = int(hours) - minutes = int(minutes) - seconds = int(seconds) - - self.built_time[ver] = f"{hours:02d}:{minutes:02d}:{seconds:02d}" - - cmd.log(f"Finished doc build for Sphinx {ver}. Elapsed time: {self.built_time[ver]}", verbose=True) - - async def run(self, args): - """ - Navigate though multiple Sphinx versions, handling each of them - on a loop. - """ - - if args.log: - fp = open(args.log, "w", encoding="utf-8") - if not args.verbose: - args.verbose = False - else: - fp = None - if not args.verbose: - args.verbose = True - - cur_requirements = {} - python_bin = min_python_bin - - vers = set(SPHINX_REQUIREMENTS.keys()) | set(args.versions) - - for cur_ver in sorted(vers): - if cur_ver in SPHINX_REQUIREMENTS: - new_reqs = SPHINX_REQUIREMENTS[cur_ver] - cur_requirements.update(new_reqs) - - if cur_ver in PYTHON_VER_CHANGES: # pylint: disable=R1715 - python_bin = PYTHON_VER_CHANGES[cur_ver] - - if cur_ver not in args.versions: - continue - - if args.min_version: - if cur_ver < args.min_version: - continue - - if args.max_version: - if cur_ver > args.max_version: - break - - await self._handle_version(args, fp, cur_ver, cur_requirements, - python_bin) - - if args.build: - cmd = AsyncCommands(fp) - cmd.log("\nSummary:", verbose=True) - for ver, elapsed_time in sorted(self.built_time.items()): - cmd.log(f"\tSphinx {ver} elapsed time: {elapsed_time}", - verbose=True) - - if fp: - fp.close() - -def parse_version(ver_str): - """Convert a version string into a tuple.""" - - return tuple(map(int, ver_str.split("."))) - - -DEFAULT_VERS = " - " -DEFAULT_VERS += "\n - ".join(map(lambda v: f"{v[0]}.{v[1]}.{v[2]}", - DEFAULT_VERSIONS_TO_TEST)) - -SCRIPT = os.path.relpath(__file__) - -DESCRIPTION = f""" -This tool allows creating Python virtual environments for different -Sphinx versions that are supported by the Linux Kernel build system. - -Besides creating the virtual environment, it can also test building -the documentation using "make htmldocs" (and/or other doc targets). - -If called without "--versions" argument, it covers the versions shipped -on major distros, plus the lowest supported version: - -{DEFAULT_VERS} - -A typical usage is to run: - - {SCRIPT} -m -l sphinx_builds.log - -This will create one virtual env for the default version set and run -"make htmldocs" for each version, creating a log file with the -excecuted commands on it. - -NOTE: The build time can be very long, specially on old versions. Also, there -is a known bug with Sphinx version 6.0.x: each subprocess uses a lot of -memory. That, together with "-jauto" may cause OOM killer to cause -failures at the doc generation. To minimize the risk, you may use the -"-a" command line parameter to constrain the built directories and/or -reduce the number of threads from "-jauto" to, for instance, "-j4": - - {SCRIPT} -m -V 6.0.1 -a "SPHINXDIRS=process" "SPHINXOPTS='-j4'" - -""" - -MAKE_TARGETS = [ - "htmldocs", - "texinfodocs", - "infodocs", - "latexdocs", - "pdfdocs", - "epubdocs", - "xmldocs", -] - -async def main(): - """Main program""" - - parser = argparse.ArgumentParser(description=DESCRIPTION, - formatter_class=argparse.RawDescriptionHelpFormatter) - - ver_group = parser.add_argument_group("Version range options") - - ver_group.add_argument('-V', '--versions', nargs="*", - default=DEFAULT_VERSIONS_TO_TEST,type=parse_version, - help='Sphinx versions to test') - ver_group.add_argument('--min-version', "--min", type=parse_version, - help='Sphinx minimal version') - ver_group.add_argument('--max-version', "--max", type=parse_version, - help='Sphinx maximum version') - ver_group.add_argument('-f', '--full', action='store_true', - help='Add all Sphinx (major,minor) supported versions to the version range') - - build_group = parser.add_argument_group("Build options") - - build_group.add_argument('-b', '--build', action='store_true', - help='Build documentation') - build_group.add_argument('-a', '--make-args', nargs="*", - help='extra arguments for make, like SPHINXDIRS=netlink/specs', - ) - build_group.add_argument('-t', '--targets', nargs="+", choices=MAKE_TARGETS, - default=[MAKE_TARGETS[0]], - help="make build targets. Default: htmldocs.") - build_group.add_argument("-o", '--output', - help="output directory for the make O=OUTPUT") - - other_group = parser.add_argument_group("Other options") - - other_group.add_argument('-r', '--req-file', action='store_true', - help='write a requirements.txt file') - other_group.add_argument('-l', '--log', - help='Log command output on a file') - other_group.add_argument('-v', '--verbose', action='store_true', - help='Verbose all commands') - other_group.add_argument('-i', '--wait-input', action='store_true', - help='Wait for an enter before going to the next version') - - args = parser.parse_args() - - if not args.make_args: - args.make_args = [] - - sphinx_versions = sorted(list(SPHINX_REQUIREMENTS.keys())) - - if args.full: - args.versions += list(SPHINX_REQUIREMENTS.keys()) - - venv = SphinxVenv() - await venv.run(args) - - -# Call main method -if __name__ == "__main__": - asyncio.run(main()) -- cgit v1.2.3 From 184414c6a6cac78ad6c46037a8afad5c9f04fba5 Mon Sep 17 00:00:00 2001 From: Jonathan Corbet Date: Wed, 13 Aug 2025 13:03:08 -0600 Subject: docs: move find-unused-docs.sh to tools/docs ...and update references accordingly. Reviewed-by: Mauro Carvalho Chehab Acked-by: Jani Nikula Signed-off-by: Jonathan Corbet --- scripts/find-unused-docs.sh | 62 --------------------------------------------- 1 file changed, 62 deletions(-) delete mode 100755 scripts/find-unused-docs.sh (limited to 'scripts') diff --git a/scripts/find-unused-docs.sh b/scripts/find-unused-docs.sh deleted file mode 100755 index d6d397fbf917..000000000000 --- a/scripts/find-unused-docs.sh +++ /dev/null @@ -1,62 +0,0 @@ -#!/bin/bash -# (c) 2017, Jonathan Corbet -# sayli karnik -# -# This script detects files with kernel-doc comments for exported functions -# that are not included in documentation. -# -# usage: Run 'scripts/find-unused-docs.sh directory' from top level of kernel -# tree. -# -# example: $scripts/find-unused-docs.sh drivers/scsi -# -# Licensed under the terms of the GNU GPL License - -if ! [ -d "Documentation" ]; then - echo "Run from top level of kernel tree" - exit 1 -fi - -if [ "$#" -ne 1 ]; then - echo "Usage: scripts/find-unused-docs.sh directory" - exit 1 -fi - -if ! [ -d "$1" ]; then - echo "Directory $1 doesn't exist" - exit 1 -fi - -cd "$( dirname "${BASH_SOURCE[0]}" )" -cd .. - -cd Documentation/ - -echo "The following files contain kerneldoc comments for exported functions \ -that are not used in the formatted documentation" - -# FILES INCLUDED - -files_included=($(grep -rHR ".. kernel-doc" --include \*.rst | cut -d " " -f 3)) - -declare -A FILES_INCLUDED - -for each in "${files_included[@]}"; do - FILES_INCLUDED[$each]="$each" - done - -cd .. - -# FILES NOT INCLUDED - -for file in `find $1 -name '*.c'`; do - - if [[ ${FILES_INCLUDED[$file]+_} ]]; then - continue; - fi - str=$(PYTHONDONTWRITEBYTECODE=1 scripts/kernel-doc -export "$file" 2>/dev/null) - if [[ -n "$str" ]]; then - echo "$file" - fi - done - -- cgit v1.2.3 From 683e8cbaba7f0baf94a774ee17a1c0ddf3b243b4 Mon Sep 17 00:00:00 2001 From: Jonathan Corbet Date: Wed, 13 Aug 2025 13:08:22 -0600 Subject: docs: remove kernel-doc.pl We've been using the Python version and nobody has missed this one. All credit goes to Mauro Carvalho Chehab for creating the replacement. Reviewed-by: Mauro Carvalho Chehab Acked-by: Jani Nikula Signed-off-by: Jonathan Corbet --- scripts/kernel-doc.pl | 2439 ------------------------------------------------- 1 file changed, 2439 deletions(-) delete mode 100755 scripts/kernel-doc.pl (limited to 'scripts') diff --git a/scripts/kernel-doc.pl b/scripts/kernel-doc.pl deleted file mode 100755 index 5db23cbf4eb2..000000000000 --- a/scripts/kernel-doc.pl +++ /dev/null @@ -1,2439 +0,0 @@ -#!/usr/bin/env perl -# SPDX-License-Identifier: GPL-2.0 -# vim: softtabstop=4 - -use warnings; -use strict; - -## Copyright (c) 1998 Michael Zucchi, All Rights Reserved ## -## Copyright (C) 2000, 1 Tim Waugh ## -## Copyright (C) 2001 Simon Huggins ## -## Copyright (C) 2005-2012 Randy Dunlap ## -## Copyright (C) 2012 Dan Luedtke ## -## ## -## #define enhancements by Armin Kuster ## -## Copyright (c) 2000 MontaVista Software, Inc. ## -# -# Copyright (C) 2022 Tomasz Warniełło (POD) - -use Pod::Usage qw/pod2usage/; - -=head1 NAME - -kernel-doc - Print formatted kernel documentation to stdout - -=head1 SYNOPSIS - - kernel-doc [-h] [-v] [-Werror] [-Wall] [-Wreturn] [-Wshort-desc[ription]] [-Wcontents-before-sections] - [ -man | - -rst [-enable-lineno] | - -none - ] - [ - -export | - -internal | - [-function NAME] ... | - [-nosymbol NAME] ... - ] - [-no-doc-sections] - [-export-file FILE] ... - FILE ... - -Run `kernel-doc -h` for details. - -=head1 DESCRIPTION - -Read C language source or header FILEs, extract embedded documentation comments, -and print formatted documentation to standard output. - -The documentation comments are identified by the "/**" opening comment mark. - -See Documentation/doc-guide/kernel-doc.rst for the documentation comment syntax. - -=cut - -# more perldoc at the end of the file - -## init lots of data - -my $errors = 0; -my $warnings = 0; -my $anon_struct_union = 0; - -# match expressions used to find embedded type information -my $type_constant = '\b``([^\`]+)``\b'; -my $type_constant2 = '\%([-_*\w]+)'; -my $type_func = '(\w+)\(\)'; -my $type_param = '\@(\w*((\.\w+)|(->\w+))*(\.\.\.)?)'; -my $type_param_ref = '([\!~\*]?)\@(\w*((\.\w+)|(->\w+))*(\.\.\.)?)'; -my $type_fp_param = '\@(\w+)\(\)'; # Special RST handling for func ptr params -my $type_fp_param2 = '\@(\w+->\S+)\(\)'; # Special RST handling for structs with func ptr params -my $type_env = '(\$\w+)'; -my $type_enum = '\&(enum\s*([_\w]+))'; -my $type_struct = '\&(struct\s*([_\w]+))'; -my $type_typedef = '\&(typedef\s*([_\w]+))'; -my $type_union = '\&(union\s*([_\w]+))'; -my $type_member = '\&([_\w]+)(\.|->)([_\w]+)'; -my $type_fallback = '\&([_\w]+)'; -my $type_member_func = $type_member . '\(\)'; - -# Output conversion substitutions. -# One for each output format - -# these are pretty rough -my @highlights_man = ( - [$type_constant, "\$1"], - [$type_constant2, "\$1"], - [$type_func, "\\\\fB\$1\\\\fP"], - [$type_enum, "\\\\fI\$1\\\\fP"], - [$type_struct, "\\\\fI\$1\\\\fP"], - [$type_typedef, "\\\\fI\$1\\\\fP"], - [$type_union, "\\\\fI\$1\\\\fP"], - [$type_param, "\\\\fI\$1\\\\fP"], - [$type_param_ref, "\\\\fI\$1\$2\\\\fP"], - [$type_member, "\\\\fI\$1\$2\$3\\\\fP"], - [$type_fallback, "\\\\fI\$1\\\\fP"] - ); -my $blankline_man = ""; - -# rst-mode -my @highlights_rst = ( - [$type_constant, "``\$1``"], - [$type_constant2, "``\$1``"], - - # Note: need to escape () to avoid func matching later - [$type_member_func, "\\:c\\:type\\:`\$1\$2\$3\\\\(\\\\) <\$1>`"], - [$type_member, "\\:c\\:type\\:`\$1\$2\$3 <\$1>`"], - [$type_fp_param, "**\$1\\\\(\\\\)**"], - [$type_fp_param2, "**\$1\\\\(\\\\)**"], - [$type_func, "\$1()"], - [$type_enum, "\\:c\\:type\\:`\$1 <\$2>`"], - [$type_struct, "\\:c\\:type\\:`\$1 <\$2>`"], - [$type_typedef, "\\:c\\:type\\:`\$1 <\$2>`"], - [$type_union, "\\:c\\:type\\:`\$1 <\$2>`"], - - # in rst this can refer to any type - [$type_fallback, "\\:c\\:type\\:`\$1`"], - [$type_param_ref, "**\$1\$2**"] - ); -my $blankline_rst = "\n"; - -# read arguments -if ($#ARGV == -1) { - pod2usage( - -message => "No arguments!\n", - -exitval => 1, - -verbose => 99, - -sections => 'SYNOPSIS', - -output => \*STDERR, - ); -} - -my $kernelversion; - -my $dohighlight = ""; - -my $verbose = 0; -my $Werror = 0; -my $Wreturn = 0; -my $Wshort_desc = 0; -my $output_mode = "rst"; -my $output_preformatted = 0; -my $no_doc_sections = 0; -my $enable_lineno = 0; -my @highlights = @highlights_rst; -my $blankline = $blankline_rst; -my $modulename = "Kernel API"; - -use constant { - OUTPUT_ALL => 0, # output all symbols and doc sections - OUTPUT_INCLUDE => 1, # output only specified symbols - OUTPUT_EXPORTED => 2, # output exported symbols - OUTPUT_INTERNAL => 3, # output non-exported symbols -}; -my $output_selection = OUTPUT_ALL; -my $show_not_found = 0; # No longer used - -my @export_file_list; - -my @build_time; -if (defined($ENV{'KBUILD_BUILD_TIMESTAMP'}) && - (my $seconds = `date -d "${ENV{'KBUILD_BUILD_TIMESTAMP'}}" +%s`) ne '') { - @build_time = gmtime($seconds); -} else { - @build_time = localtime; -} - -my $man_date = ('January', 'February', 'March', 'April', 'May', 'June', - 'July', 'August', 'September', 'October', - 'November', 'December')[$build_time[4]] . - " " . ($build_time[5]+1900); - -# Essentially these are globals. -# They probably want to be tidied up, made more localised or something. -# CAVEAT EMPTOR! Some of the others I localised may not want to be, which -# could cause "use of undefined value" or other bugs. -my ($function, %function_table, %parametertypes, $declaration_purpose); -my %nosymbol_table = (); -my $declaration_start_line; -my ($type, $declaration_name, $return_type); -my ($newsection, $newcontents, $prototype, $brcount); - -if (defined($ENV{'KBUILD_VERBOSE'}) && $ENV{'KBUILD_VERBOSE'} =~ '1') { - $verbose = 1; -} - -if (defined($ENV{'KCFLAGS'})) { - my $kcflags = "$ENV{'KCFLAGS'}"; - - if ($kcflags =~ /(\s|^)-Werror(\s|$)/) { - $Werror = 1; - } -} - -# reading this variable is for backwards compat just in case -# someone was calling it with the variable from outside the -# kernel's build system -if (defined($ENV{'KDOC_WERROR'})) { - $Werror = "$ENV{'KDOC_WERROR'}"; -} -# other environment variables are converted to command-line -# arguments in cmd_checkdoc in the build system - -# Generated docbook code is inserted in a template at a point where -# docbook v3.1 requires a non-zero sequence of RefEntry's; see: -# https://www.oasis-open.org/docbook/documentation/reference/html/refentry.html -# We keep track of number of generated entries and generate a dummy -# if needs be to ensure the expanded template can be postprocessed -# into html. -my $section_counter = 0; - -my $lineprefix=""; - -# Parser states -use constant { - STATE_NORMAL => 0, # normal code - STATE_NAME => 1, # looking for function name - STATE_BODY_MAYBE => 2, # body - or maybe more description - STATE_BODY => 3, # the body of the comment - STATE_BODY_WITH_BLANK_LINE => 4, # the body, which has a blank line - STATE_PROTO => 5, # scanning prototype - STATE_DOCBLOCK => 6, # documentation block - STATE_INLINE => 7, # gathering doc outside main block -}; -my $state; -my $leading_space; - -# Inline documentation state -use constant { - STATE_INLINE_NA => 0, # not applicable ($state != STATE_INLINE) - STATE_INLINE_NAME => 1, # looking for member name (@foo:) - STATE_INLINE_TEXT => 2, # looking for member documentation - STATE_INLINE_END => 3, # done - STATE_INLINE_ERROR => 4, # error - Comment without header was found. - # Spit a warning as it's not - # proper kernel-doc and ignore the rest. -}; -my $inline_doc_state; - -#declaration types: can be -# 'function', 'struct', 'union', 'enum', 'typedef' -my $decl_type; - -# Name of the kernel-doc identifier for non-DOC markups -my $identifier; - -my $doc_start = '^/\*\*\s*$'; # Allow whitespace at end of comment start. -my $doc_end = '\*/'; -my $doc_com = '\s*\*\s*'; -my $doc_com_body = '\s*\* ?'; -my $doc_decl = $doc_com . '(\w+)'; -# @params and a strictly limited set of supported section names -# Specifically: -# Match @word: -# @...: -# @{section-name}: -# while trying to not match literal block starts like "example::" -# -my $doc_sect = $doc_com . - '\s*(\@[.\w]+|\@\.\.\.|description|context|returns?|notes?|examples?)\s*:([^:].*)?$'; -my $doc_content = $doc_com_body . '(.*)'; -my $doc_block = $doc_com . 'DOC:\s*(.*)?'; -my $doc_inline_start = '^\s*/\*\*\s*$'; -my $doc_inline_sect = '\s*\*\s*(@\s*[\w][\w\.]*\s*):(.*)'; -my $doc_inline_end = '^\s*\*/\s*$'; -my $doc_inline_oneline = '^\s*/\*\*\s*(@[\w\s]+):\s*(.*)\s*\*/\s*$'; -my $export_symbol = '^\s*EXPORT_SYMBOL(_GPL)?\s*\(\s*(\w+)\s*\)\s*;'; -my $export_symbol_ns = '^\s*EXPORT_SYMBOL_NS(_GPL)?\s*\(\s*(\w+)\s*,\s*"\S+"\)\s*;'; -my $function_pointer = qr{([^\(]*\(\*)\s*\)\s*\(([^\)]*)\)}; -my $attribute = qr{__attribute__\s*\(\([a-z0-9,_\*\s\(\)]*\)\)}i; - -my %parameterdescs; -my %parameterdesc_start_lines; -my @parameterlist; -my %sections; -my @sectionlist; -my %section_start_lines; -my $sectcheck; -my $struct_actual; - -my $contents = ""; -my $new_start_line = 0; - -# the canonical section names. see also $doc_sect above. -my $section_default = "Description"; # default section -my $section_intro = "Introduction"; -my $section = $section_default; -my $section_context = "Context"; -my $section_return = "Return"; - -my $undescribed = "-- undescribed --"; - -reset_state(); - -while ($ARGV[0] =~ m/^--?(.*)/) { - my $cmd = $1; - shift @ARGV; - if ($cmd eq "man") { - $output_mode = "man"; - @highlights = @highlights_man; - $blankline = $blankline_man; - } elsif ($cmd eq "rst") { - $output_mode = "rst"; - @highlights = @highlights_rst; - $blankline = $blankline_rst; - } elsif ($cmd eq "none") { - $output_mode = "none"; - } elsif ($cmd eq "module") { # not needed for XML, inherits from calling document - $modulename = shift @ARGV; - } elsif ($cmd eq "function") { # to only output specific functions - $output_selection = OUTPUT_INCLUDE; - $function = shift @ARGV; - $function_table{$function} = 1; - } elsif ($cmd eq "nosymbol") { # Exclude specific symbols - my $symbol = shift @ARGV; - $nosymbol_table{$symbol} = 1; - } elsif ($cmd eq "export") { # only exported symbols - $output_selection = OUTPUT_EXPORTED; - %function_table = (); - } elsif ($cmd eq "internal") { # only non-exported symbols - $output_selection = OUTPUT_INTERNAL; - %function_table = (); - } elsif ($cmd eq "export-file") { - my $file = shift @ARGV; - push(@export_file_list, $file); - } elsif ($cmd eq "v") { - $verbose = 1; - } elsif ($cmd eq "Werror") { - $Werror = 1; - } elsif ($cmd eq "Wreturn") { - $Wreturn = 1; - } elsif ($cmd eq "Wshort-desc" or $cmd eq "Wshort-description") { - $Wshort_desc = 1; - } elsif ($cmd eq "Wall") { - $Wreturn = 1; - $Wshort_desc = 1; - } elsif (($cmd eq "h") || ($cmd eq "help")) { - pod2usage(-exitval => 0, -verbose => 2); - } elsif ($cmd eq 'no-doc-sections') { - $no_doc_sections = 1; - } elsif ($cmd eq 'enable-lineno') { - $enable_lineno = 1; - } elsif ($cmd eq 'show-not-found') { - $show_not_found = 1; # A no-op but don't fail - } else { - # Unknown argument - pod2usage( - -message => "Argument unknown!\n", - -exitval => 1, - -verbose => 99, - -sections => 'SYNOPSIS', - -output => \*STDERR, - ); - } - if ($#ARGV < 0){ - pod2usage( - -message => "FILE argument missing\n", - -exitval => 1, - -verbose => 99, - -sections => 'SYNOPSIS', - -output => \*STDERR, - ); - } -} - -# continue execution near EOF; - -sub findprog($) -{ - foreach(split(/:/, $ENV{PATH})) { - return "$_/$_[0]" if(-x "$_/$_[0]"); - } -} - -# get kernel version from env -sub get_kernel_version() { - my $version = 'unknown kernel version'; - - if (defined($ENV{'KERNELVERSION'})) { - $version = $ENV{'KERNELVERSION'}; - } - return $version; -} - -# -sub print_lineno { - my $lineno = shift; - if ($enable_lineno && defined($lineno)) { - print ".. LINENO " . $lineno . "\n"; - } -} - -sub emit_warning { - my $location = shift; - my $msg = shift; - print STDERR "$location: warning: $msg"; - ++$warnings; -} -## -# dumps section contents to arrays/hashes intended for that purpose. -# -sub dump_section { - my $file = shift; - my $name = shift; - my $contents = join "\n", @_; - - if ($name =~ m/$type_param/) { - $name = $1; - $parameterdescs{$name} = $contents; - $sectcheck = $sectcheck . $name . " "; - $parameterdesc_start_lines{$name} = $new_start_line; - $new_start_line = 0; - } elsif ($name eq "@\.\.\.") { - $name = "..."; - $parameterdescs{$name} = $contents; - $sectcheck = $sectcheck . $name . " "; - $parameterdesc_start_lines{$name} = $new_start_line; - $new_start_line = 0; - } else { - if (defined($sections{$name}) && ($sections{$name} ne "")) { - # Only warn on user specified duplicate section names. - if ($name ne $section_default) { - emit_warning("${file}:$.", "duplicate section name '$name'\n"); - } - $sections{$name} .= $contents; - } else { - $sections{$name} = $contents; - push @sectionlist, $name; - $section_start_lines{$name} = $new_start_line; - $new_start_line = 0; - } - } -} - -## -# dump DOC: section after checking that it should go out -# -sub dump_doc_section { - my $file = shift; - my $name = shift; - my $contents = join "\n", @_; - - if ($no_doc_sections) { - return; - } - - return if (defined($nosymbol_table{$name})); - - if (($output_selection == OUTPUT_ALL) || - (($output_selection == OUTPUT_INCLUDE) && - defined($function_table{$name}))) - { - dump_section($file, $name, $contents); - output_blockhead({'sectionlist' => \@sectionlist, - 'sections' => \%sections, - 'module' => $modulename, - 'content-only' => ($output_selection != OUTPUT_ALL), }); - } -} - -## -# output function -# -# parameterdescs, a hash. -# function => "function name" -# parameterlist => @list of parameters -# parameterdescs => %parameter descriptions -# sectionlist => @list of sections -# sections => %section descriptions -# - -sub output_highlight { - my $contents = join "\n",@_; - my $line; - -# DEBUG -# if (!defined $contents) { -# use Carp; -# confess "output_highlight got called with no args?\n"; -# } - -# print STDERR "contents b4:$contents\n"; - eval $dohighlight; - die $@ if $@; -# print STDERR "contents af:$contents\n"; - - foreach $line (split "\n", $contents) { - if (! $output_preformatted) { - $line =~ s/^\s*//; - } - if ($line eq ""){ - if (! $output_preformatted) { - print $lineprefix, $blankline; - } - } else { - if ($output_mode eq "man" && substr($line, 0, 1) eq ".") { - print "\\&$line"; - } else { - print $lineprefix, $line; - } - } - print "\n"; - } -} - -## -# output function in man -sub output_function_man(%) { - my %args = %{$_[0]}; - my ($parameter, $section); - my $count; - my $func_macro = $args{'func_macro'}; - my $paramcount = $#{$args{'parameterlist'}}; # -1 is empty - - print ".TH \"$args{'function'}\" 9 \"$args{'function'}\" \"$man_date\" \"Kernel Hacker's Manual\" LINUX\n"; - - print ".SH NAME\n"; - print $args{'function'} . " \\- " . $args{'purpose'} . "\n"; - - print ".SH SYNOPSIS\n"; - if ($args{'functiontype'} ne "") { - print ".B \"" . $args{'functiontype'} . "\" " . $args{'function'} . "\n"; - } else { - print ".B \"" . $args{'function'} . "\n"; - } - $count = 0; - my $parenth = "("; - my $post = ","; - foreach my $parameter (@{$args{'parameterlist'}}) { - if ($count == $#{$args{'parameterlist'}}) { - $post = ");"; - } - $type = $args{'parametertypes'}{$parameter}; - if ($type =~ m/$function_pointer/) { - # pointer-to-function - print ".BI \"" . $parenth . $1 . "\" " . " \") (" . $2 . ")" . $post . "\"\n"; - } else { - $type =~ s/([^\*])$/$1 /; - print ".BI \"" . $parenth . $type . "\" " . " \"" . $post . "\"\n"; - } - $count++; - $parenth = ""; - } - - $paramcount = $#{$args{'parameterlist'}}; # -1 is empty - if ($paramcount >= 0) { - print ".SH ARGUMENTS\n"; - } - foreach $parameter (@{$args{'parameterlist'}}) { - my $parameter_name = $parameter; - $parameter_name =~ s/\[.*//; - - print ".IP \"" . $parameter . "\" 12\n"; - output_highlight($args{'parameterdescs'}{$parameter_name}); - } - foreach $section (@{$args{'sectionlist'}}) { - print ".SH \"", uc $section, "\"\n"; - output_highlight($args{'sections'}{$section}); - } -} - -## -# output enum in man -sub output_enum_man(%) { - my %args = %{$_[0]}; - my ($parameter, $section); - my $count; - - print ".TH \"$args{'module'}\" 9 \"enum $args{'enum'}\" \"$man_date\" \"API Manual\" LINUX\n"; - - print ".SH NAME\n"; - print "enum " . $args{'enum'} . " \\- " . $args{'purpose'} . "\n"; - - print ".SH SYNOPSIS\n"; - print "enum " . $args{'enum'} . " {\n"; - $count = 0; - foreach my $parameter (@{$args{'parameterlist'}}) { - print ".br\n.BI \" $parameter\"\n"; - if ($count == $#{$args{'parameterlist'}}) { - print "\n};\n"; - last; - } else { - print ", \n.br\n"; - } - $count++; - } - - print ".SH Constants\n"; - foreach $parameter (@{$args{'parameterlist'}}) { - my $parameter_name = $parameter; - $parameter_name =~ s/\[.*//; - - print ".IP \"" . $parameter . "\" 12\n"; - output_highlight($args{'parameterdescs'}{$parameter_name}); - } - foreach $section (@{$args{'sectionlist'}}) { - print ".SH \"$section\"\n"; - output_highlight($args{'sections'}{$section}); - } -} - -## -# output struct in man -sub output_struct_man(%) { - my %args = %{$_[0]}; - my ($parameter, $section); - - print ".TH \"$args{'module'}\" 9 \"" . $args{'type'} . " " . $args{'struct'} . "\" \"$man_date\" \"API Manual\" LINUX\n"; - - print ".SH NAME\n"; - print $args{'type'} . " " . $args{'struct'} . " \\- " . $args{'purpose'} . "\n"; - - my $declaration = $args{'definition'}; - $declaration =~ s/\t/ /g; - $declaration =~ s/\n/"\n.br\n.BI \"/g; - print ".SH SYNOPSIS\n"; - print $args{'type'} . " " . $args{'struct'} . " {\n.br\n"; - print ".BI \"$declaration\n};\n.br\n\n"; - - print ".SH Members\n"; - foreach $parameter (@{$args{'parameterlist'}}) { - ($parameter =~ /^#/) && next; - - my $parameter_name = $parameter; - $parameter_name =~ s/\[.*//; - - ($args{'parameterdescs'}{$parameter_name} ne $undescribed) || next; - print ".IP \"" . $parameter . "\" 12\n"; - output_highlight($args{'parameterdescs'}{$parameter_name}); - } - foreach $section (@{$args{'sectionlist'}}) { - print ".SH \"$section\"\n"; - output_highlight($args{'sections'}{$section}); - } -} - -## -# output typedef in man -sub output_typedef_man(%) { - my %args = %{$_[0]}; - my ($parameter, $section); - - print ".TH \"$args{'module'}\" 9 \"$args{'typedef'}\" \"$man_date\" \"API Manual\" LINUX\n"; - - print ".SH NAME\n"; - print "typedef " . $args{'typedef'} . " \\- " . $args{'purpose'} . "\n"; - - foreach $section (@{$args{'sectionlist'}}) { - print ".SH \"$section\"\n"; - output_highlight($args{'sections'}{$section}); - } -} - -sub output_blockhead_man(%) { - my %args = %{$_[0]}; - my ($parameter, $section); - my $count; - - print ".TH \"$args{'module'}\" 9 \"$args{'module'}\" \"$man_date\" \"API Manual\" LINUX\n"; - - foreach $section (@{$args{'sectionlist'}}) { - print ".SH \"$section\"\n"; - output_highlight($args{'sections'}{$section}); - } -} - -## -# output in restructured text -# - -# -# This could use some work; it's used to output the DOC: sections, and -# starts by putting out the name of the doc section itself, but that tends -# to duplicate a header already in the template file. -# -sub output_blockhead_rst(%) { - my %args = %{$_[0]}; - my ($parameter, $section); - - foreach $section (@{$args{'sectionlist'}}) { - next if (defined($nosymbol_table{$section})); - - if ($output_selection != OUTPUT_INCLUDE) { - print ".. _$section:\n\n"; - print "**$section**\n\n"; - } - print_lineno($section_start_lines{$section}); - output_highlight_rst($args{'sections'}{$section}); - print "\n"; - } -} - -# -# Apply the RST highlights to a sub-block of text. -# -sub highlight_block($) { - # The dohighlight kludge requires the text be called $contents - my $contents = shift; - eval $dohighlight; - die $@ if $@; - return $contents; -} - -# -# Regexes used only here. -# -my $sphinx_literal = '^[^.].*::$'; -my $sphinx_cblock = '^\.\.\ +code-block::'; - -sub output_highlight_rst { - my $input = join "\n",@_; - my $output = ""; - my $line; - my $in_literal = 0; - my $litprefix; - my $block = ""; - - foreach $line (split "\n",$input) { - # - # If we're in a literal block, see if we should drop out - # of it. Otherwise pass the line straight through unmunged. - # - if ($in_literal) { - if (! ($line =~ /^\s*$/)) { - # - # If this is the first non-blank line in a literal - # block we need to figure out what the proper indent is. - # - if ($litprefix eq "") { - $line =~ /^(\s*)/; - $litprefix = '^' . $1; - $output .= $line . "\n"; - } elsif (! ($line =~ /$litprefix/)) { - $in_literal = 0; - } else { - $output .= $line . "\n"; - } - } else { - $output .= $line . "\n"; - } - } - # - # Not in a literal block (or just dropped out) - # - if (! $in_literal) { - $block .= $line . "\n"; - if (($line =~ /$sphinx_literal/) || ($line =~ /$sphinx_cblock/)) { - $in_literal = 1; - $litprefix = ""; - $output .= highlight_block($block); - $block = "" - } - } - } - - if ($block) { - $output .= highlight_block($block); - } - - $output =~ s/^\n+//g; - $output =~ s/\n+$//g; - - foreach $line (split "\n", $output) { - print $lineprefix . $line . "\n"; - } -} - -sub output_function_rst(%) { - my %args = %{$_[0]}; - my ($parameter, $section); - my $oldprefix = $lineprefix; - - my $signature = ""; - my $func_macro = $args{'func_macro'}; - my $paramcount = $#{$args{'parameterlist'}}; # -1 is empty - - if ($func_macro) { - $signature = $args{'function'}; - } else { - if ($args{'functiontype'}) { - $signature = $args{'functiontype'} . " "; - } - $signature .= $args{'function'} . " ("; - } - - my $count = 0; - foreach my $parameter (@{$args{'parameterlist'}}) { - if ($count ne 0) { - $signature .= ", "; - } - $count++; - $type = $args{'parametertypes'}{$parameter}; - - if ($type =~ m/$function_pointer/) { - # pointer-to-function - $signature .= $1 . $parameter . ") (" . $2 . ")"; - } else { - $signature .= $type; - } - } - - if (!$func_macro) { - $signature .= ")"; - } - - if ($args{'typedef'} || $args{'functiontype'} eq "") { - print ".. c:macro:: ". $args{'function'} . "\n\n"; - - if ($args{'typedef'}) { - print_lineno($declaration_start_line); - print " **Typedef**: "; - $lineprefix = ""; - output_highlight_rst($args{'purpose'}); - print "\n\n**Syntax**\n\n"; - print " ``$signature``\n\n"; - } else { - print "``$signature``\n\n"; - } - } else { - print ".. c:function:: $signature\n\n"; - } - - if (!$args{'typedef'}) { - print_lineno($declaration_start_line); - $lineprefix = " "; - output_highlight_rst($args{'purpose'}); - print "\n"; - } - - # - # Put our descriptive text into a container (thus an HTML
) to help - # set the function prototypes apart. - # - $lineprefix = " "; - if ($paramcount >= 0) { - print ".. container:: kernelindent\n\n"; - print $lineprefix . "**Parameters**\n\n"; - } - foreach $parameter (@{$args{'parameterlist'}}) { - my $parameter_name = $parameter; - $parameter_name =~ s/\[.*//; - $type = $args{'parametertypes'}{$parameter}; - - if ($type ne "") { - print $lineprefix . "``$type``\n"; - } else { - print $lineprefix . "``$parameter``\n"; - } - - print_lineno($parameterdesc_start_lines{$parameter_name}); - - $lineprefix = " "; - if (defined($args{'parameterdescs'}{$parameter_name}) && - $args{'parameterdescs'}{$parameter_name} ne $undescribed) { - output_highlight_rst($args{'parameterdescs'}{$parameter_name}); - } else { - print $lineprefix . "*undescribed*\n"; - } - $lineprefix = " "; - print "\n"; - } - - output_section_rst(@_); - $lineprefix = $oldprefix; -} - -sub output_section_rst(%) { - my %args = %{$_[0]}; - my $section; - my $oldprefix = $lineprefix; - - foreach $section (@{$args{'sectionlist'}}) { - print $lineprefix . "**$section**\n\n"; - print_lineno($section_start_lines{$section}); - output_highlight_rst($args{'sections'}{$section}); - print "\n"; - } - print "\n"; -} - -sub output_enum_rst(%) { - my %args = %{$_[0]}; - my ($parameter); - my $oldprefix = $lineprefix; - my $count; - my $outer; - - my $name = $args{'enum'}; - print "\n\n.. c:enum:: " . $name . "\n\n"; - - print_lineno($declaration_start_line); - $lineprefix = " "; - output_highlight_rst($args{'purpose'}); - print "\n"; - - print ".. container:: kernelindent\n\n"; - $outer = $lineprefix . " "; - $lineprefix = $outer . " "; - print $outer . "**Constants**\n\n"; - foreach $parameter (@{$args{'parameterlist'}}) { - print $outer . "``$parameter``\n"; - - if ($args{'parameterdescs'}{$parameter} ne $undescribed) { - output_highlight_rst($args{'parameterdescs'}{$parameter}); - } else { - print $lineprefix . "*undescribed*\n"; - } - print "\n"; - } - print "\n"; - $lineprefix = $oldprefix; - output_section_rst(@_); -} - -sub output_typedef_rst(%) { - my %args = %{$_[0]}; - my ($parameter); - my $oldprefix = $lineprefix; - my $name; - - $name = $args{'typedef'}; - - print "\n\n.. c:type:: " . $name . "\n\n"; - print_lineno($declaration_start_line); - $lineprefix = " "; - output_highlight_rst($args{'purpose'}); - print "\n"; - - $lineprefix = $oldprefix; - output_section_rst(@_); -} - -sub output_struct_rst(%) { - my %args = %{$_[0]}; - my ($parameter); - my $oldprefix = $lineprefix; - - my $name = $args{'struct'}; - if ($args{'type'} eq 'union') { - print "\n\n.. c:union:: " . $name . "\n\n"; - } else { - print "\n\n.. c:struct:: " . $name . "\n\n"; - } - - print_lineno($declaration_start_line); - $lineprefix = " "; - output_highlight_rst($args{'purpose'}); - print "\n"; - - print ".. container:: kernelindent\n\n"; - print $lineprefix . "**Definition**::\n\n"; - my $declaration = $args{'definition'}; - $lineprefix = $lineprefix . " "; - $declaration =~ s/\t/$lineprefix/g; - print $lineprefix . $args{'type'} . " " . $args{'struct'} . " {\n$declaration" . $lineprefix . "};\n\n"; - - $lineprefix = " "; - print $lineprefix . "**Members**\n\n"; - foreach $parameter (@{$args{'parameterlist'}}) { - ($parameter =~ /^#/) && next; - - my $parameter_name = $parameter; - $parameter_name =~ s/\[.*//; - - ($args{'parameterdescs'}{$parameter_name} ne $undescribed) || next; - $type = $args{'parametertypes'}{$parameter}; - print_lineno($parameterdesc_start_lines{$parameter_name}); - print $lineprefix . "``" . $parameter . "``\n"; - $lineprefix = " "; - output_highlight_rst($args{'parameterdescs'}{$parameter_name}); - $lineprefix = " "; - print "\n"; - } - print "\n"; - - $lineprefix = $oldprefix; - output_section_rst(@_); -} - -## none mode output functions - -sub output_function_none(%) { -} - -sub output_enum_none(%) { -} - -sub output_typedef_none(%) { -} - -sub output_struct_none(%) { -} - -sub output_blockhead_none(%) { -} - -## -# generic output function for all types (function, struct/union, typedef, enum); -# calls the generated, variable output_ function name based on -# functype and output_mode -sub output_declaration { - no strict 'refs'; - my $name = shift; - my $functype = shift; - my $func = "output_${functype}_$output_mode"; - - return if (defined($nosymbol_table{$name})); - - if (($output_selection == OUTPUT_ALL) || - (($output_selection == OUTPUT_INCLUDE || - $output_selection == OUTPUT_EXPORTED) && - defined($function_table{$name})) || - ($output_selection == OUTPUT_INTERNAL && - !($functype eq "function" && defined($function_table{$name})))) - { - &$func(@_); - $section_counter++; - } -} - -## -# generic output function - calls the right one based on current output mode. -sub output_blockhead { - no strict 'refs'; - my $func = "output_blockhead_" . $output_mode; - &$func(@_); - $section_counter++; -} - -## -# takes a declaration (struct, union, enum, typedef) and -# invokes the right handler. NOT called for functions. -sub dump_declaration($$) { - no strict 'refs'; - my ($prototype, $file) = @_; - my $func = "dump_" . $decl_type; - &$func(@_); -} - -sub dump_union($$) { - dump_struct(@_); -} - -sub dump_struct($$) { - my $x = shift; - my $file = shift; - my $decl_type; - my $members; - my $type = qr{struct|union}; - # For capturing struct/union definition body, i.e. "{members*}qualifiers*" - my $qualifiers = qr{$attribute|__packed|__aligned|____cacheline_aligned_in_smp|____cacheline_aligned}; - my $definition_body = qr{\{(.*)\}\s*$qualifiers*}; - my $struct_members = qr{($type)([^\{\};]+)\{([^\{\}]*)\}([^\{\}\;]*)\;}; - - if ($x =~ /($type)\s+(\w+)\s*$definition_body/) { - $decl_type = $1; - $declaration_name = $2; - $members = $3; - } elsif ($x =~ /typedef\s+($type)\s*$definition_body\s*(\w+)\s*;/) { - $decl_type = $1; - $declaration_name = $3; - $members = $2; - } - - if ($members) { - if ($identifier ne $declaration_name) { - emit_warning("${file}:$.", "expecting prototype for $decl_type $identifier. Prototype was for $decl_type $declaration_name instead\n"); - return; - } - - # ignore members marked private: - $members =~ s/\/\*\s*private:.*?\/\*\s*public:.*?\*\///gosi; - $members =~ s/\/\*\s*private:.*//gosi; - # strip comments: - $members =~ s/\/\*.*?\*\///gos; - # strip attributes - $members =~ s/\s*$attribute/ /gi; - $members =~ s/\s*__aligned\s*\([^;]*\)/ /gos; - $members =~ s/\s*__counted_by\s*\([^;]*\)/ /gos; - $members =~ s/\s*__counted_by_(le|be)\s*\([^;]*\)/ /gos; - $members =~ s/\s*__packed\s*/ /gos; - $members =~ s/\s*CRYPTO_MINALIGN_ATTR/ /gos; - $members =~ s/\s*____cacheline_aligned_in_smp/ /gos; - $members =~ s/\s*____cacheline_aligned/ /gos; - # unwrap struct_group(): - # - first eat non-declaration parameters and rewrite for final match - # - then remove macro, outer parens, and trailing semicolon - $members =~ s/\bstruct_group\s*\(([^,]*,)/STRUCT_GROUP(/gos; - $members =~ s/\bstruct_group_attr\s*\(([^,]*,){2}/STRUCT_GROUP(/gos; - $members =~ s/\bstruct_group_tagged\s*\(([^,]*),([^,]*),/struct $1 $2; STRUCT_GROUP(/gos; - $members =~ s/\b__struct_group\s*\(([^,]*,){3}/STRUCT_GROUP(/gos; - $members =~ s/\bSTRUCT_GROUP(\(((?:(?>[^)(]+)|(?1))*)\))[^;]*;/$2/gos; - - my $args = qr{([^,)]+)}; - # replace DECLARE_BITMAP - $members =~ s/__ETHTOOL_DECLARE_LINK_MODE_MASK\s*\(([^\)]+)\)/DECLARE_BITMAP($1, __ETHTOOL_LINK_MODE_MASK_NBITS)/gos; - $members =~ s/DECLARE_PHY_INTERFACE_MASK\s*\(([^\)]+)\)/DECLARE_BITMAP($1, PHY_INTERFACE_MODE_MAX)/gos; - $members =~ s/DECLARE_BITMAP\s*\($args,\s*$args\)/unsigned long $1\[BITS_TO_LONGS($2)\]/gos; - # replace DECLARE_HASHTABLE - $members =~ s/DECLARE_HASHTABLE\s*\($args,\s*$args\)/unsigned long $1\[1 << (($2) - 1)\]/gos; - # replace DECLARE_KFIFO - $members =~ s/DECLARE_KFIFO\s*\($args,\s*$args,\s*$args\)/$2 \*$1/gos; - # replace DECLARE_KFIFO_PTR - $members =~ s/DECLARE_KFIFO_PTR\s*\($args,\s*$args\)/$2 \*$1/gos; - # replace DECLARE_FLEX_ARRAY - $members =~ s/(?:__)?DECLARE_FLEX_ARRAY\s*\($args,\s*$args\)/$1 $2\[\]/gos; - #replace DEFINE_DMA_UNMAP_ADDR - $members =~ s/DEFINE_DMA_UNMAP_ADDR\s*\($args\)/dma_addr_t $1/gos; - #replace DEFINE_DMA_UNMAP_LEN - $members =~ s/DEFINE_DMA_UNMAP_LEN\s*\($args\)/__u32 $1/gos; - my $declaration = $members; - - # Split nested struct/union elements as newer ones - while ($members =~ m/$struct_members/) { - my $newmember; - my $maintype = $1; - my $ids = $4; - my $content = $3; - foreach my $id(split /,/, $ids) { - $newmember .= "$maintype $id; "; - - $id =~ s/[:\[].*//; - $id =~ s/^\s*\**(\S+)\s*/$1/; - foreach my $arg (split /;/, $content) { - next if ($arg =~ m/^\s*$/); - if ($arg =~ m/^([^\(]+\(\*?\s*)([\w\.]*)(\s*\).*)/) { - # pointer-to-function - my $type = $1; - my $name = $2; - my $extra = $3; - next if (!$name); - if ($id =~ m/^\s*$/) { - # anonymous struct/union - $newmember .= "$type$name$extra; "; - } else { - $newmember .= "$type$id.$name$extra; "; - } - } else { - my $type; - my $names; - $arg =~ s/^\s+//; - $arg =~ s/\s+$//; - # Handle bitmaps - $arg =~ s/:\s*\d+\s*//g; - # Handle arrays - $arg =~ s/\[.*\]//g; - # The type may have multiple words, - # and multiple IDs can be defined, like: - # const struct foo, *bar, foobar - # So, we remove spaces when parsing the - # names, in order to match just names - # and commas for the names - $arg =~ s/\s*,\s*/,/g; - if ($arg =~ m/(.*)\s+([\S+,]+)/) { - $type = $1; - $names = $2; - } else { - $newmember .= "$arg; "; - next; - } - foreach my $name (split /,/, $names) { - $name =~ s/^\s*\**(\S+)\s*/$1/; - next if (($name =~ m/^\s*$/)); - if ($id =~ m/^\s*$/) { - # anonymous struct/union - $newmember .= "$type $name; "; - } else { - $newmember .= "$type $id.$name; "; - } - } - } - } - } - $members =~ s/$struct_members/$newmember/; - } - - # Ignore other nested elements, like enums - $members =~ s/(\{[^\{\}]*\})//g; - - create_parameterlist($members, ';', $file, $declaration_name); - check_sections($file, $declaration_name, $decl_type, $sectcheck, $struct_actual); - - # Adjust declaration for better display - $declaration =~ s/([\{;])/$1\n/g; - $declaration =~ s/\}\s+;/};/g; - # Better handle inlined enums - do {} while ($declaration =~ s/(enum\s+\{[^\}]+),([^\n])/$1,\n$2/); - - my @def_args = split /\n/, $declaration; - my $level = 1; - $declaration = ""; - foreach my $clause (@def_args) { - $clause =~ s/^\s+//; - $clause =~ s/\s+$//; - $clause =~ s/\s+/ /; - next if (!$clause); - $level-- if ($clause =~ m/(\})/ && $level > 1); - if (!($clause =~ m/^\s*#/)) { - $declaration .= "\t" x $level; - } - $declaration .= "\t" . $clause . "\n"; - $level++ if ($clause =~ m/(\{)/ && !($clause =~m/\}/)); - } - output_declaration($declaration_name, - 'struct', - {'struct' => $declaration_name, - 'module' => $modulename, - 'definition' => $declaration, - 'parameterlist' => \@parameterlist, - 'parameterdescs' => \%parameterdescs, - 'parametertypes' => \%parametertypes, - 'sectionlist' => \@sectionlist, - 'sections' => \%sections, - 'purpose' => $declaration_purpose, - 'type' => $decl_type - }); - } else { - print STDERR "${file}:$.: error: Cannot parse struct or union!\n"; - ++$errors; - } -} - - -sub show_warnings($$) { - my $functype = shift; - my $name = shift; - - return 0 if (defined($nosymbol_table{$name})); - - return 1 if ($output_selection == OUTPUT_ALL); - - if ($output_selection == OUTPUT_EXPORTED) { - if (defined($function_table{$name})) { - return 1; - } else { - return 0; - } - } - if ($output_selection == OUTPUT_INTERNAL) { - if (!($functype eq "function" && defined($function_table{$name}))) { - return 1; - } else { - return 0; - } - } - if ($output_selection == OUTPUT_INCLUDE) { - if (defined($function_table{$name})) { - return 1; - } else { - return 0; - } - } - die("Please add the new output type at show_warnings()"); -} - -sub dump_enum($$) { - my $x = shift; - my $file = shift; - my $members; - - # ignore members marked private: - $x =~ s/\/\*\s*private:.*?\/\*\s*public:.*?\*\///gosi; - $x =~ s/\/\*\s*private:.*}/}/gosi; - - $x =~ s@/\*.*?\*/@@gos; # strip comments. - # strip #define macros inside enums - $x =~ s@#\s*((define|ifdef|if)\s+|endif)[^;]*;@@gos; - - if ($x =~ /typedef\s+enum\s*\{(.*)\}\s*(\w*)\s*;/) { - $declaration_name = $2; - $members = $1; - } elsif ($x =~ /enum\s+(\w*)\s*\{(.*)\}/) { - $declaration_name = $1; - $members = $2; - } - - if ($members) { - if ($identifier ne $declaration_name) { - if ($identifier eq "") { - emit_warning("${file}:$.", "wrong kernel-doc identifier on line:\n"); - } else { - emit_warning("${file}:$.", "expecting prototype for enum $identifier. Prototype was for enum $declaration_name instead\n"); - } - return; - } - $declaration_name = "(anonymous)" if ($declaration_name eq ""); - - my %_members; - - $members =~ s/\s+$//; - $members =~ s/\([^;]*?[\)]//g; - - foreach my $arg (split ',', $members) { - $arg =~ s/^\s*(\w+).*/$1/; - push @parameterlist, $arg; - if (!$parameterdescs{$arg}) { - $parameterdescs{$arg} = $undescribed; - if (show_warnings("enum", $declaration_name)) { - emit_warning("${file}:$.", "Enum value '$arg' not described in enum '$declaration_name'\n"); - } - } - $_members{$arg} = 1; - } - - while (my ($k, $v) = each %parameterdescs) { - if (!exists($_members{$k})) { - if (show_warnings("enum", $declaration_name)) { - emit_warning("${file}:$.", "Excess enum value '$k' description in '$declaration_name'\n"); - } - } - } - - output_declaration($declaration_name, - 'enum', - {'enum' => $declaration_name, - 'module' => $modulename, - 'parameterlist' => \@parameterlist, - 'parameterdescs' => \%parameterdescs, - 'sectionlist' => \@sectionlist, - 'sections' => \%sections, - 'purpose' => $declaration_purpose - }); - } else { - print STDERR "${file}:$.: error: Cannot parse enum!\n"; - ++$errors; - } -} - -my $typedef_type = qr { ((?:\s+[\w\*]+\b){0,7}\s+(?:\w+\b|\*+))\s* }x; -my $typedef_ident = qr { \*?\s*(\w\S+)\s* }x; -my $typedef_args = qr { \s*\((.*)\); }x; - -my $typedef1 = qr { typedef$typedef_type\($typedef_ident\)$typedef_args }x; -my $typedef2 = qr { typedef$typedef_type$typedef_ident$typedef_args }x; - -sub dump_typedef($$) { - my $x = shift; - my $file = shift; - - $x =~ s@/\*.*?\*/@@gos; # strip comments. - - # Parse function typedef prototypes - if ($x =~ $typedef1 || $x =~ $typedef2) { - $return_type = $1; - $declaration_name = $2; - my $args = $3; - $return_type =~ s/^\s+//; - - if ($identifier ne $declaration_name) { - emit_warning("${file}:$.", "expecting prototype for typedef $identifier. Prototype was for typedef $declaration_name instead\n"); - return; - } - - create_parameterlist($args, ',', $file, $declaration_name); - - output_declaration($declaration_name, - 'function', - {'function' => $declaration_name, - 'typedef' => 1, - 'module' => $modulename, - 'functiontype' => $return_type, - 'parameterlist' => \@parameterlist, - 'parameterdescs' => \%parameterdescs, - 'parametertypes' => \%parametertypes, - 'sectionlist' => \@sectionlist, - 'sections' => \%sections, - 'purpose' => $declaration_purpose - }); - return; - } - - while (($x =~ /\(*.\)\s*;$/) || ($x =~ /\[*.\]\s*;$/)) { - $x =~ s/\(*.\)\s*;$/;/; - $x =~ s/\[*.\]\s*;$/;/; - } - - if ($x =~ /typedef.*\s+(\w+)\s*;/) { - $declaration_name = $1; - - if ($identifier ne $declaration_name) { - emit_warning("${file}:$.", "expecting prototype for typedef $identifier. Prototype was for typedef $declaration_name instead\n"); - return; - } - - output_declaration($declaration_name, - 'typedef', - {'typedef' => $declaration_name, - 'module' => $modulename, - 'sectionlist' => \@sectionlist, - 'sections' => \%sections, - 'purpose' => $declaration_purpose - }); - } else { - print STDERR "${file}:$.: error: Cannot parse typedef!\n"; - ++$errors; - } -} - -sub save_struct_actual($) { - my $actual = shift; - - # strip all spaces from the actual param so that it looks like one string item - $actual =~ s/\s*//g; - $struct_actual = $struct_actual . $actual . " "; -} - -sub create_parameterlist($$$$) { - my $args = shift; - my $splitter = shift; - my $file = shift; - my $declaration_name = shift; - my $type; - my $param; - - # temporarily replace commas inside function pointer definition - my $arg_expr = qr{\([^\),]+}; - while ($args =~ /$arg_expr,/) { - $args =~ s/($arg_expr),/$1#/g; - } - - foreach my $arg (split($splitter, $args)) { - # strip comments - $arg =~ s/\/\*.*\*\///; - # ignore argument attributes - $arg =~ s/\sPOS0?\s/ /; - # strip leading/trailing spaces - $arg =~ s/^\s*//; - $arg =~ s/\s*$//; - $arg =~ s/\s+/ /; - - if ($arg =~ /^#/) { - # Treat preprocessor directive as a typeless variable just to fill - # corresponding data structures "correctly". Catch it later in - # output_* subs. - push_parameter($arg, "", "", $file); - } elsif ($arg =~ m/\(.+\)\s*\(/) { - # pointer-to-function - $arg =~ tr/#/,/; - $arg =~ m/[^\(]+\(\*?\s*([\w\[\]\.]*)\s*\)/; - $param = $1; - $type = $arg; - $type =~ s/([^\(]+\(\*?)\s*$param/$1/; - save_struct_actual($param); - push_parameter($param, $type, $arg, $file, $declaration_name); - } elsif ($arg =~ m/\(.+\)\s*\[/) { - # array-of-pointers - $arg =~ tr/#/,/; - $arg =~ m/[^\(]+\(\s*\*\s*([\w\[\]\.]*?)\s*(\s*\[\s*[\w]+\s*\]\s*)*\)/; - $param = $1; - $type = $arg; - $type =~ s/([^\(]+\(\*?)\s*$param/$1/; - save_struct_actual($param); - push_parameter($param, $type, $arg, $file, $declaration_name); - } elsif ($arg) { - $arg =~ s/\s*:\s*/:/g; - $arg =~ s/\s*\[/\[/g; - - my @args = split('\s*,\s*', $arg); - if ($args[0] =~ m/\*/) { - $args[0] =~ s/(\*+)\s*/ $1/; - } - - my @first_arg; - if ($args[0] =~ /^(.*\s+)(.*?\[.*\].*)$/) { - shift @args; - push(@first_arg, split('\s+', $1)); - push(@first_arg, $2); - } else { - @first_arg = split('\s+', shift @args); - } - - unshift(@args, pop @first_arg); - $type = join " ", @first_arg; - - foreach $param (@args) { - if ($param =~ m/^(\*+)\s*(.*)/) { - save_struct_actual($2); - - push_parameter($2, "$type $1", $arg, $file, $declaration_name); - } elsif ($param =~ m/(.*?):(\w+)/) { - if ($type ne "") { # skip unnamed bit-fields - save_struct_actual($1); - push_parameter($1, "$type:$2", $arg, $file, $declaration_name) - } - } else { - save_struct_actual($param); - push_parameter($param, $type, $arg, $file, $declaration_name); - } - } - } - } -} - -sub push_parameter($$$$$) { - my $param = shift; - my $type = shift; - my $org_arg = shift; - my $file = shift; - my $declaration_name = shift; - - if (($anon_struct_union == 1) && ($type eq "") && - ($param eq "}")) { - return; # ignore the ending }; from anon. struct/union - } - - $anon_struct_union = 0; - $param =~ s/[\[\)].*//; - - if ($type eq "" && $param =~ /\.\.\.$/) - { - if (!$param =~ /\w\.\.\.$/) { - # handles unnamed variable parameters - $param = "..."; - } elsif ($param =~ /\w\.\.\.$/) { - # for named variable parameters of the form `x...`, remove the dots - $param =~ s/\.\.\.$//; - } - if (!defined $parameterdescs{$param} || $parameterdescs{$param} eq "") { - $parameterdescs{$param} = "variable arguments"; - } - } - elsif ($type eq "" && ($param eq "" or $param eq "void")) - { - $param="void"; - $parameterdescs{void} = "no arguments"; - } - elsif ($type eq "" && ($param eq "struct" or $param eq "union")) - # handle unnamed (anonymous) union or struct: - { - $type = $param; - $param = "{unnamed_" . $param . "}"; - $parameterdescs{$param} = "anonymous\n"; - $anon_struct_union = 1; - } - elsif ($param =~ "__cacheline_group" ) - # handle cache group enforcing variables: they do not need be described in header files - { - return; # ignore __cacheline_group_begin and __cacheline_group_end - } - - # warn if parameter has no description - # (but ignore ones starting with # as these are not parameters - # but inline preprocessor statements); - # Note: It will also ignore void params and unnamed structs/unions - if (!defined $parameterdescs{$param} && $param !~ /^#/) { - $parameterdescs{$param} = $undescribed; - - if (show_warnings($type, $declaration_name) && $param !~ /\./) { - emit_warning("${file}:$.", "Function parameter or struct member '$param' not described in '$declaration_name'\n"); - } - } - - # strip spaces from $param so that it is one continuous string - # on @parameterlist; - # this fixes a problem where check_sections() cannot find - # a parameter like "addr[6 + 2]" because it actually appears - # as "addr[6", "+", "2]" on the parameter list; - # but it's better to maintain the param string unchanged for output, - # so just weaken the string compare in check_sections() to ignore - # "[blah" in a parameter string; - ###$param =~ s/\s*//g; - push @parameterlist, $param; - $org_arg =~ s/\s\s+/ /g; - $parametertypes{$param} = $org_arg; -} - -sub check_sections($$$$$) { - my ($file, $decl_name, $decl_type, $sectcheck, $prmscheck) = @_; - my @sects = split ' ', $sectcheck; - my @prms = split ' ', $prmscheck; - my $err; - my ($px, $sx); - my $prm_clean; # strip trailing "[array size]" and/or beginning "*" - - foreach $sx (0 .. $#sects) { - $err = 1; - foreach $px (0 .. $#prms) { - $prm_clean = $prms[$px]; - $prm_clean =~ s/\[.*\]//; - $prm_clean =~ s/$attribute//i; - # ignore array size in a parameter string; - # however, the original param string may contain - # spaces, e.g.: addr[6 + 2] - # and this appears in @prms as "addr[6" since the - # parameter list is split at spaces; - # hence just ignore "[..." for the sections check; - $prm_clean =~ s/\[.*//; - - ##$prm_clean =~ s/^\**//; - if ($prm_clean eq $sects[$sx]) { - $err = 0; - last; - } - } - if ($err) { - if ($decl_type eq "function") { - emit_warning("${file}:$.", - "Excess function parameter " . - "'$sects[$sx]' " . - "description in '$decl_name'\n"); - } elsif (($decl_type eq "struct") or - ($decl_type eq "union")) { - emit_warning("${file}:$.", - "Excess $decl_type member " . - "'$sects[$sx]' " . - "description in '$decl_name'\n"); - } - } - } -} - -## -# Checks the section describing the return value of a function. -sub check_return_section { - my $file = shift; - my $declaration_name = shift; - my $return_type = shift; - - # Ignore an empty return type (It's a macro) - # Ignore functions with a "void" return type. (But don't ignore "void *") - if (($return_type eq "") || ($return_type =~ /void\s*\w*\s*$/)) { - return; - } - - if (!defined($sections{$section_return}) || - $sections{$section_return} eq "") - { - emit_warning("${file}:$.", - "No description found for return value of " . - "'$declaration_name'\n"); - } -} - -## -# takes a function prototype and the name of the current file being -# processed and spits out all the details stored in the global -# arrays/hashes. -sub dump_function($$) { - my $prototype = shift; - my $file = shift; - my $func_macro = 0; - - print_lineno($new_start_line); - - $prototype =~ s/^static +//; - $prototype =~ s/^extern +//; - $prototype =~ s/^asmlinkage +//; - $prototype =~ s/^inline +//; - $prototype =~ s/^__inline__ +//; - $prototype =~ s/^__inline +//; - $prototype =~ s/^__always_inline +//; - $prototype =~ s/^noinline +//; - $prototype =~ s/^__FORTIFY_INLINE +//; - $prototype =~ s/__init +//; - $prototype =~ s/__init_or_module +//; - $prototype =~ s/__deprecated +//; - $prototype =~ s/__flatten +//; - $prototype =~ s/__meminit +//; - $prototype =~ s/__must_check +//; - $prototype =~ s/__weak +//; - $prototype =~ s/__sched +//; - $prototype =~ s/_noprof//; - $prototype =~ s/__printf\s*\(\s*\d*\s*,\s*\d*\s*\) +//; - $prototype =~ s/__(?:re)?alloc_size\s*\(\s*\d+\s*(?:,\s*\d+\s*)?\) +//; - $prototype =~ s/__diagnose_as\s*\(\s*\S+\s*(?:,\s*\d+\s*)*\) +//; - $prototype =~ s/DECL_BUCKET_PARAMS\s*\(\s*(\S+)\s*,\s*(\S+)\s*\)/$1, $2/; - my $define = $prototype =~ s/^#\s*define\s+//; #ak added - $prototype =~ s/__attribute_const__ +//; - $prototype =~ s/__attribute__\s*\(\( - (?: - [\w\s]++ # attribute name - (?:\([^)]*+\))? # attribute arguments - \s*+,? # optional comma at the end - )+ - \)\)\s+//x; - - # Yes, this truly is vile. We are looking for: - # 1. Return type (may be nothing if we're looking at a macro) - # 2. Function name - # 3. Function parameters. - # - # All the while we have to watch out for function pointer parameters - # (which IIRC is what the two sections are for), C types (these - # regexps don't even start to express all the possibilities), and - # so on. - # - # If you mess with these regexps, it's a good idea to check that - # the following functions' documentation still comes out right: - # - parport_register_device (function pointer parameters) - # - atomic_set (macro) - # - pci_match_device, __copy_to_user (long return type) - my $name = qr{[a-zA-Z0-9_~:]+}; - my $prototype_end1 = qr{[^\(]*}; - my $prototype_end2 = qr{[^\{]*}; - my $prototype_end = qr{\(($prototype_end1|$prototype_end2)\)}; - my $type1 = qr{[\w\s]+}; - my $type2 = qr{$type1\*+}; - - if ($define && $prototype =~ m/^()($name)\s+/) { - # This is an object-like macro, it has no return type and no parameter - # list. - # Function-like macros are not allowed to have spaces between - # declaration_name and opening parenthesis (notice the \s+). - $return_type = $1; - $declaration_name = $2; - $func_macro = 1; - } elsif ($prototype =~ m/^()($name)\s*$prototype_end/ || - $prototype =~ m/^($type1)\s+($name)\s*$prototype_end/ || - $prototype =~ m/^($type2+)\s*($name)\s*$prototype_end/) { - $return_type = $1; - $declaration_name = $2; - my $args = $3; - - create_parameterlist($args, ',', $file, $declaration_name); - } else { - emit_warning("${file}:$.", "cannot understand function prototype: '$prototype'\n"); - return; - } - - if ($identifier ne $declaration_name) { - emit_warning("${file}:$.", "expecting prototype for $identifier(). Prototype was for $declaration_name() instead\n"); - return; - } - - my $prms = join " ", @parameterlist; - check_sections($file, $declaration_name, "function", $sectcheck, $prms); - - # This check emits a lot of warnings at the moment, because many - # functions don't have a 'Return' doc section. So until the number - # of warnings goes sufficiently down, the check is only performed in - # -Wreturn mode. - # TODO: always perform the check. - if ($Wreturn && !$func_macro) { - check_return_section($file, $declaration_name, $return_type); - } - - # The function parser can be called with a typedef parameter. - # Handle it. - if ($return_type =~ /typedef/) { - output_declaration($declaration_name, - 'function', - {'function' => $declaration_name, - 'typedef' => 1, - 'module' => $modulename, - 'functiontype' => $return_type, - 'parameterlist' => \@parameterlist, - 'parameterdescs' => \%parameterdescs, - 'parametertypes' => \%parametertypes, - 'sectionlist' => \@sectionlist, - 'sections' => \%sections, - 'purpose' => $declaration_purpose, - 'func_macro' => $func_macro - }); - } else { - output_declaration($declaration_name, - 'function', - {'function' => $declaration_name, - 'module' => $modulename, - 'functiontype' => $return_type, - 'parameterlist' => \@parameterlist, - 'parameterdescs' => \%parameterdescs, - 'parametertypes' => \%parametertypes, - 'sectionlist' => \@sectionlist, - 'sections' => \%sections, - 'purpose' => $declaration_purpose, - 'func_macro' => $func_macro - }); - } -} - -sub reset_state { - $function = ""; - %parameterdescs = (); - %parametertypes = (); - @parameterlist = (); - %sections = (); - @sectionlist = (); - $sectcheck = ""; - $struct_actual = ""; - $prototype = ""; - - $state = STATE_NORMAL; - $inline_doc_state = STATE_INLINE_NA; -} - -sub tracepoint_munge($) { - my $file = shift; - my $tracepointname = 0; - my $tracepointargs = 0; - - if ($prototype =~ m/TRACE_EVENT\((.*?),/) { - $tracepointname = $1; - } - if ($prototype =~ m/DEFINE_SINGLE_EVENT\((.*?),/) { - $tracepointname = $1; - } - if ($prototype =~ m/DEFINE_EVENT\((.*?),(.*?),/) { - $tracepointname = $2; - } - $tracepointname =~ s/^\s+//; #strip leading whitespace - if ($prototype =~ m/TP_PROTO\((.*?)\)/) { - $tracepointargs = $1; - } - if (($tracepointname eq 0) || ($tracepointargs eq 0)) { - emit_warning("${file}:$.", "Unrecognized tracepoint format: \n". - "$prototype\n"); - } else { - $prototype = "static inline void trace_$tracepointname($tracepointargs)"; - $identifier = "trace_$identifier"; - } -} - -sub syscall_munge() { - my $void = 0; - - $prototype =~ s@[\r\n]+@ @gos; # strip newlines/CR's -## if ($prototype =~ m/SYSCALL_DEFINE0\s*\(\s*(a-zA-Z0-9_)*\s*\)/) { - if ($prototype =~ m/SYSCALL_DEFINE0/) { - $void = 1; -## $prototype = "long sys_$1(void)"; - } - - $prototype =~ s/SYSCALL_DEFINE.*\(/long sys_/; # fix return type & func name - if ($prototype =~ m/long (sys_.*?),/) { - $prototype =~ s/,/\(/; - } elsif ($void) { - $prototype =~ s/\)/\(void\)/; - } - - # now delete all of the odd-number commas in $prototype - # so that arg types & arg names don't have a comma between them - my $count = 0; - my $len = length($prototype); - if ($void) { - $len = 0; # skip the for-loop - } - for (my $ix = 0; $ix < $len; $ix++) { - if (substr($prototype, $ix, 1) eq ',') { - $count++; - if ($count % 2 == 1) { - substr($prototype, $ix, 1) = ' '; - } - } - } -} - -sub process_proto_function($$) { - my $x = shift; - my $file = shift; - - $x =~ s@\/\/.*$@@gos; # strip C99-style comments to end of line - - if ($x =~ /^#/ && $x !~ /^#\s*define/) { - # do nothing - } elsif ($x =~ /([^\{]*)/) { - $prototype .= $1; - } - - if (($x =~ /\{/) || ($x =~ /\#\s*define/) || ($x =~ /;/)) { - $prototype =~ s@/\*.*?\*/@@gos; # strip comments. - $prototype =~ s@[\r\n]+@ @gos; # strip newlines/cr's. - $prototype =~ s@^\s+@@gos; # strip leading spaces - - # Handle prototypes for function pointers like: - # int (*pcs_config)(struct foo) - $prototype =~ s@^(\S+\s+)\(\s*\*(\S+)\)@$1$2@gos; - - if ($prototype =~ /SYSCALL_DEFINE/) { - syscall_munge(); - } - if ($prototype =~ /TRACE_EVENT/ || $prototype =~ /DEFINE_EVENT/ || - $prototype =~ /DEFINE_SINGLE_EVENT/) - { - tracepoint_munge($file); - } - dump_function($prototype, $file); - reset_state(); - } -} - -sub process_proto_type($$) { - my $x = shift; - my $file = shift; - - $x =~ s@[\r\n]+@ @gos; # strip newlines/cr's. - $x =~ s@^\s+@@gos; # strip leading spaces - $x =~ s@\s+$@@gos; # strip trailing spaces - $x =~ s@\/\/.*$@@gos; # strip C99-style comments to end of line - - if ($x =~ /^#/) { - # To distinguish preprocessor directive from regular declaration later. - $x .= ";"; - } - - while (1) { - if ( $x =~ /([^\{\};]*)([\{\};])(.*)/ ) { - if( length $prototype ) { - $prototype .= " " - } - $prototype .= $1 . $2; - ($2 eq '{') && $brcount++; - ($2 eq '}') && $brcount--; - if (($2 eq ';') && ($brcount == 0)) { - dump_declaration($prototype, $file); - reset_state(); - last; - } - $x = $3; - } else { - $prototype .= $x; - last; - } - } -} - - -sub map_filename($) { - my $file; - my ($orig_file) = @_; - - if (defined($ENV{'SRCTREE'})) { - $file = "$ENV{'SRCTREE'}" . "/" . $orig_file; - } else { - $file = $orig_file; - } - - return $file; -} - -sub process_export_file($) { - my ($orig_file) = @_; - my $file = map_filename($orig_file); - - if (!open(IN,"<$file")) { - print STDERR "Error: Cannot open file $file\n"; - ++$errors; - return; - } - - while () { - if (/$export_symbol/) { - next if (defined($nosymbol_table{$2})); - $function_table{$2} = 1; - } - if (/$export_symbol_ns/) { - next if (defined($nosymbol_table{$2})); - $function_table{$2} = 1; - } - } - - close(IN); -} - -# -# Parsers for the various processing states. -# -# STATE_NORMAL: looking for the /** to begin everything. -# -sub process_normal() { - if (/$doc_start/o) { - $state = STATE_NAME; # next line is always the function name - $declaration_start_line = $. + 1; - } -} - -# -# STATE_NAME: Looking for the "name - description" line -# -sub process_name($$) { - my $file = shift; - my $descr; - - if (/$doc_block/o) { - $state = STATE_DOCBLOCK; - $contents = ""; - $new_start_line = $.; - - if ( $1 eq "" ) { - $section = $section_intro; - } else { - $section = $1; - } - } elsif (/$doc_decl/o) { - $identifier = $1; - my $is_kernel_comment = 0; - my $decl_start = qr{$doc_com}; - # test for pointer declaration type, foo * bar() - desc - my $fn_type = qr{\w+\s*\*\s*}; - my $parenthesis = qr{\(\w*\)}; - my $decl_end = qr{[-:].*}; - if (/^$decl_start([\w\s]+?)$parenthesis?\s*$decl_end?$/) { - $identifier = $1; - } - if ($identifier =~ m/^(struct|union|enum|typedef)\b\s*(\S*)/) { - $decl_type = $1; - $identifier = $2; - $is_kernel_comment = 1; - } - # Look for foo() or static void foo() - description; or misspelt - # identifier - elsif (/^$decl_start$fn_type?(\w+)\s*$parenthesis?\s*$decl_end?$/ || - /^$decl_start$fn_type?(\w+[^-:]*)$parenthesis?\s*$decl_end$/) { - $identifier = $1; - $decl_type = 'function'; - $identifier =~ s/^define\s+//; - $is_kernel_comment = 1; - } - $identifier =~ s/\s+$//; - - $state = STATE_BODY; - # if there's no @param blocks need to set up default section - # here - $contents = ""; - $section = $section_default; - $new_start_line = $. + 1; - if (/[-:](.*)/) { - # strip leading/trailing/multiple spaces - $descr= $1; - $descr =~ s/^\s*//; - $descr =~ s/\s*$//; - $descr =~ s/\s+/ /g; - $declaration_purpose = $descr; - $state = STATE_BODY_MAYBE; - } else { - $declaration_purpose = ""; - } - - if (!$is_kernel_comment) { - emit_warning("${file}:$.", "This comment starts with '/**', but isn't a kernel-doc comment. Refer Documentation/doc-guide/kernel-doc.rst\n$_"); - $state = STATE_NORMAL; - } - - if (($declaration_purpose eq "") && $Wshort_desc) { - emit_warning("${file}:$.", "missing initial short description on line:\n$_"); - } - - if ($identifier eq "" && $decl_type ne "enum") { - emit_warning("${file}:$.", "wrong kernel-doc identifier on line:\n$_"); - $state = STATE_NORMAL; - } - - if ($verbose) { - print STDERR "${file}:$.: info: Scanning doc for $decl_type $identifier\n"; - } - } else { - emit_warning("${file}:$.", "Cannot understand $_ on line $. - I thought it was a doc line\n"); - $state = STATE_NORMAL; - } -} - - -# -# STATE_BODY and STATE_BODY_MAYBE: the bulk of a kerneldoc comment. -# -sub process_body($$) { - my $file = shift; - - if ($state == STATE_BODY_WITH_BLANK_LINE && /^\s*\*\s?\S/) { - dump_section($file, $section, $contents); - $section = $section_default; - $new_start_line = $.; - $contents = ""; - } - - if (/$doc_sect/i) { # case insensitive for supported section names - $newsection = $1; - $newcontents = $2; - - # map the supported section names to the canonical names - if ($newsection =~ m/^description$/i) { - $newsection = $section_default; - } elsif ($newsection =~ m/^context$/i) { - $newsection = $section_context; - } elsif ($newsection =~ m/^returns?$/i) { - $newsection = $section_return; - } elsif ($newsection =~ m/^\@return$/) { - # special: @return is a section, not a param description - $newsection = $section_return; - } - - if (($contents ne "") && ($contents ne "\n")) { - dump_section($file, $section, $contents); - $section = $section_default; - } - - $state = STATE_BODY; - $contents = $newcontents; - $new_start_line = $.; - while (substr($contents, 0, 1) eq " ") { - $contents = substr($contents, 1); - } - if ($contents ne "") { - $contents .= "\n"; - } - $section = $newsection; - $leading_space = undef; - } elsif (/$doc_end/) { - if (($contents ne "") && ($contents ne "\n")) { - dump_section($file, $section, $contents); - $section = $section_default; - $contents = ""; - } - # look for doc_com + + doc_end: - if ($_ =~ m'\s*\*\s*[a-zA-Z_0-9:\.]+\*/') { - emit_warning("${file}:$.", "suspicious ending line: $_"); - } - - $prototype = ""; - $state = STATE_PROTO; - $brcount = 0; - $new_start_line = $. + 1; - } elsif (/$doc_content/) { - if ($1 eq "") { - if ($section eq $section_context) { - dump_section($file, $section, $contents); - $section = $section_default; - $contents = ""; - $new_start_line = $.; - $state = STATE_BODY; - } else { - if ($section ne $section_default) { - $state = STATE_BODY_WITH_BLANK_LINE; - } else { - $state = STATE_BODY; - } - $contents .= "\n"; - } - } elsif ($state == STATE_BODY_MAYBE) { - # Continued declaration purpose - chomp($declaration_purpose); - $declaration_purpose .= " " . $1; - $declaration_purpose =~ s/\s+/ /g; - } else { - my $cont = $1; - if ($section =~ m/^@/ || $section eq $section_context) { - if (!defined $leading_space) { - if ($cont =~ m/^(\s+)/) { - $leading_space = $1; - } else { - $leading_space = ""; - } - } - $cont =~ s/^$leading_space//; - } - $contents .= $cont . "\n"; - } - } else { - # i dont know - bad line? ignore. - emit_warning("${file}:$.", "bad line: $_"); - } -} - - -# -# STATE_PROTO: reading a function/whatever prototype. -# -sub process_proto($$) { - my $file = shift; - - if (/$doc_inline_oneline/) { - $section = $1; - $contents = $2; - if ($contents ne "") { - $contents .= "\n"; - dump_section($file, $section, $contents); - $section = $section_default; - $contents = ""; - } - } elsif (/$doc_inline_start/) { - $state = STATE_INLINE; - $inline_doc_state = STATE_INLINE_NAME; - } elsif ($decl_type eq 'function') { - process_proto_function($_, $file); - } else { - process_proto_type($_, $file); - } -} - -# -# STATE_DOCBLOCK: within a DOC: block. -# -sub process_docblock($$) { - my $file = shift; - - if (/$doc_end/) { - dump_doc_section($file, $section, $contents); - $section = $section_default; - $contents = ""; - $function = ""; - %parameterdescs = (); - %parametertypes = (); - @parameterlist = (); - %sections = (); - @sectionlist = (); - $prototype = ""; - $state = STATE_NORMAL; - } elsif (/$doc_content/) { - if ( $1 eq "" ) { - $contents .= $blankline; - } else { - $contents .= $1 . "\n"; - } - } -} - -# -# STATE_INLINE: docbook comments within a prototype. -# -sub process_inline($$) { - my $file = shift; - - # First line (state 1) needs to be a @parameter - if ($inline_doc_state == STATE_INLINE_NAME && /$doc_inline_sect/o) { - $section = $1; - $contents = $2; - $new_start_line = $.; - if ($contents ne "") { - while (substr($contents, 0, 1) eq " ") { - $contents = substr($contents, 1); - } - $contents .= "\n"; - } - $inline_doc_state = STATE_INLINE_TEXT; - # Documentation block end */ - } elsif (/$doc_inline_end/) { - if (($contents ne "") && ($contents ne "\n")) { - dump_section($file, $section, $contents); - $section = $section_default; - $contents = ""; - } - $state = STATE_PROTO; - $inline_doc_state = STATE_INLINE_NA; - # Regular text - } elsif (/$doc_content/) { - if ($inline_doc_state == STATE_INLINE_TEXT) { - $contents .= $1 . "\n"; - # nuke leading blank lines - if ($contents =~ /^\s*$/) { - $contents = ""; - } - } elsif ($inline_doc_state == STATE_INLINE_NAME) { - $inline_doc_state = STATE_INLINE_ERROR; - emit_warning("${file}:$.", "Incorrect use of kernel-doc format: $_"); - } - } -} - - -sub process_file($) { - my $file; - my ($orig_file) = @_; - - $file = map_filename($orig_file); - - if (!open(IN_FILE,"<$file")) { - print STDERR "Error: Cannot open file $file\n"; - ++$errors; - return; - } - - $. = 1; - - $section_counter = 0; - while () { - while (!/^ \*/ && s/\\\s*$//) { - $_ .= ; - } - # Replace tabs by spaces - while ($_ =~ s/\t+/' ' x (length($&) * 8 - length($`) % 8)/e) {}; - # Hand this line to the appropriate state handler - if ($state == STATE_NORMAL) { - process_normal(); - } elsif ($state == STATE_NAME) { - process_name($file, $_); - } elsif ($state == STATE_BODY || $state == STATE_BODY_MAYBE || - $state == STATE_BODY_WITH_BLANK_LINE) { - process_body($file, $_); - } elsif ($state == STATE_INLINE) { # scanning for inline parameters - process_inline($file, $_); - } elsif ($state == STATE_PROTO) { - process_proto($file, $_); - } elsif ($state == STATE_DOCBLOCK) { - process_docblock($file, $_); - } - } - - # Make sure we got something interesting. - if (!$section_counter && $output_mode ne "none") { - if ($output_selection == OUTPUT_INCLUDE) { - emit_warning("${file}:1", "'$_' not found\n") - for keys %function_table; - } else { - emit_warning("${file}:1", "no structured comments found\n"); - } - } - close IN_FILE; -} - -$kernelversion = get_kernel_version(); - -# generate a sequence of code that will splice in highlighting information -# using the s// operator. -for (my $k = 0; $k < @highlights; $k++) { - my $pattern = $highlights[$k][0]; - my $result = $highlights[$k][1]; -# print STDERR "scanning pattern:$pattern, highlight:($result)\n"; - $dohighlight .= "\$contents =~ s:$pattern:$result:gs;\n"; -} - -if ($output_selection == OUTPUT_EXPORTED || - $output_selection == OUTPUT_INTERNAL) { - - push(@export_file_list, @ARGV); - - foreach (@export_file_list) { - chomp; - process_export_file($_); - } -} - -foreach (@ARGV) { - chomp; - process_file($_); -} -if ($verbose && $errors) { - print STDERR "$errors errors\n"; -} -if ($verbose && $warnings) { - print STDERR "$warnings warnings\n"; -} - -if ($Werror && $warnings) { - print STDERR "$warnings warnings as Errors\n"; - exit($warnings); -} else { - exit($output_mode eq "none" ? 0 : $errors) -} - -__END__ - -=head1 OPTIONS - -=head2 Output format selection (mutually exclusive): - -=over 8 - -=item -man - -Output troff manual page format. - -=item -rst - -Output reStructuredText format. This is the default. - -=item -none - -Do not output documentation, only warnings. - -=back - -=head2 Output format modifiers - -=head3 reStructuredText only - -=head2 Output selection (mutually exclusive): - -=over 8 - -=item -export - -Only output documentation for the symbols that have been exported using -EXPORT_SYMBOL() and related macros in any input FILE or -export-file FILE. - -=item -internal - -Only output documentation for the symbols that have NOT been exported using -EXPORT_SYMBOL() and related macros in any input FILE or -export-file FILE. - -=item -function NAME - -Only output documentation for the given function or DOC: section title. -All other functions and DOC: sections are ignored. - -May be specified multiple times. - -=item -nosymbol NAME - -Exclude the specified symbol from the output documentation. - -May be specified multiple times. - -=back - -=head2 Output selection modifiers: - -=over 8 - -=item -no-doc-sections - -Do not output DOC: sections. - -=item -export-file FILE - -Specify an additional FILE in which to look for EXPORT_SYMBOL information. - -To be used with -export or -internal. - -May be specified multiple times. - -=back - -=head3 reStructuredText only - -=over 8 - -=item -enable-lineno - -Enable output of .. LINENO lines. - -=back - -=head2 Other parameters: - -=over 8 - -=item -h, -help - -Print this help. - -=item -v - -Verbose output, more warnings and other information. - -=item -Werror - -Treat warnings as errors. - -=back - -=cut -- cgit v1.2.3 From b21f90e2e4503847ffeb00a9ef4d6d390291f902 Mon Sep 17 00:00:00 2001 From: Bartosz Golaszewski Date: Wed, 29 Oct 2025 13:07:09 +0100 Subject: scripts: add tracepoint-update to the list of ignores files The new program for removing unused tracepoints is not ignored as it should. Add it to the local .gitignore. Cc: Vladimir Oltean Cc: Jacob Keller Cc: Jakub Kicinski Link: https://lore.kernel.org/20251029120709.24669-1-brgl@bgdev.pl Fixes: e30f8e61e251 ("tracing: Add a tracepoint verification check at build time") Signed-off-by: Bartosz Golaszewski Signed-off-by: Steven Rostedt (Google) --- scripts/.gitignore | 1 + 1 file changed, 1 insertion(+) (limited to 'scripts') diff --git a/scripts/.gitignore b/scripts/.gitignore index c2ef68848da5..4215c2208f7e 100644 --- a/scripts/.gitignore +++ b/scripts/.gitignore @@ -11,4 +11,5 @@ /sign-file /sorttable /target.json +/tracepoint-update /unifdef -- cgit v1.2.3 From c4781dc3d1cf0e017e1f290607ddc56cfe187afc Mon Sep 17 00:00:00 2001 From: Rasmus Villemoes Date: Mon, 20 Oct 2025 16:22:27 +0200 Subject: Kbuild: enable -fms-extensions Once in a while, it turns out that enabling -fms-extensions could allow some slightly prettier code. But every time it has come up, the code that had to be used instead has been deemed "not too awful" and not worth introducing another compiler flag for. That's probably true for each individual case, but then it's somewhat of a chicken/egg situation. If we just "bite the bullet" as Linus says and enable it once and for all, it is available whenever a use case turns up, and no individual case has to justify it. A lore.kernel.org search provides these examples: - https://lore.kernel.org/lkml/200706301813.58435.agruen@suse.de/ - https://lore.kernel.org/lkml/20180419152817.GD25406@bombadil.infradead.org/ - https://lore.kernel.org/lkml/170622208395.21664.2510213291504081000@noble.neil.brown.name/ - https://lore.kernel.org/lkml/87h6475w9q.fsf@prevas.dk/ - https://lore.kernel.org/lkml/CAHk-=wjeZwww6Zswn6F_iZTpUihTSNKYppLqj36iQDDhfntuEw@mail.gmail.com/ Undoubtedly, there are more places in the code where this could also be used but where -fms-extensions just didn't come up in any discussion. Signed-off-by: Rasmus Villemoes Acked-by: David Sterba Link: https://patch.msgid.link/20251020142228.1819871-2-linux@rasmusvillemoes.dk [nathan: Move disabled clang warning to scripts/Makefile.extrawarn and adjust comment] Signed-off-by: Nathan Chancellor --- scripts/Makefile.extrawarn | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'scripts') diff --git a/scripts/Makefile.extrawarn b/scripts/Makefile.extrawarn index 6af392f9cd02..68e6fafcb80c 100644 --- a/scripts/Makefile.extrawarn +++ b/scripts/Makefile.extrawarn @@ -28,8 +28,10 @@ endif KBUILD_CFLAGS-$(CONFIG_CC_NO_ARRAY_BOUNDS) += -Wno-array-bounds ifdef CONFIG_CC_IS_CLANG -# The kernel builds with '-std=gnu11' so use of GNU extensions is acceptable. +# The kernel builds with '-std=gnu11' and '-fms-extensions' so use of GNU and +# Microsoft extensions is acceptable. KBUILD_CFLAGS += -Wno-gnu +KBUILD_CFLAGS += -Wno-microsoft-anon-tag # Clang checks for overflow/truncation with '%p', while GCC does not: # https://gcc.gnu.org/bugzilla/show_bug.cgi?id=111219 -- cgit v1.2.3 From 04cadb4fe0341304741ef60a297366b553f0ce36 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Fri, 10 Oct 2025 17:10:47 -0700 Subject: lib/crypto: Add FIPS self-tests for SHA-1 and SHA-2 Add FIPS cryptographic algorithm self-tests for all SHA-1 and SHA-2 algorithms. Following the "Implementation Guidance for FIPS 140-3" document, to achieve this it's sufficient to just test a single test vector for each of HMAC-SHA1, HMAC-SHA256, and HMAC-SHA512. Just run these tests in the initcalls, following the example of e.g. crypto/kdf_sp800108.c. Note that this should meet the FIPS self-test requirement even in the built-in case, given that the initcalls run before userspace, storage, network, etc. are accessible. This does not fix a regression, seeing as lib/ has had SHA-1 support since 2005 and SHA-256 support since 2018. Neither ever had FIPS self-tests. Moreover, fips=1 support has always been an unfinished feature upstream. However, with lib/ now being used more widely, it's now seeing more scrutiny and people seem to want these now [1][2]. [1] https://lore.kernel.org/r/3226361.1758126043@warthog.procyon.org.uk/ [2] https://lore.kernel.org/r/f31dbb22-0add-481c-aee0-e337a7731f8e@oracle.com/ Reviewed-by: Ard Biesheuvel Link: https://lore.kernel.org/r/20251011001047.51886-1-ebiggers@kernel.org Signed-off-by: Eric Biggers --- scripts/crypto/gen-fips-testvecs.py | 32 ++++++++++++++++++++++++++++++++ 1 file changed, 32 insertions(+) create mode 100755 scripts/crypto/gen-fips-testvecs.py (limited to 'scripts') diff --git a/scripts/crypto/gen-fips-testvecs.py b/scripts/crypto/gen-fips-testvecs.py new file mode 100755 index 000000000000..2956f88b764a --- /dev/null +++ b/scripts/crypto/gen-fips-testvecs.py @@ -0,0 +1,32 @@ +#!/usr/bin/env python3 +# SPDX-License-Identifier: GPL-2.0-or-later +# +# Script that generates lib/crypto/fips.h +# +# Copyright 2025 Google LLC + +import hmac + +fips_test_data = b"fips test data\0\0" +fips_test_key = b"fips test key\0\0\0" + +def print_static_u8_array_definition(name, value): + print('') + print(f'static const u8 {name}[] __initconst __maybe_unused = {{') + for i in range(0, len(value), 8): + line = '\t' + ''.join(f'0x{b:02x}, ' for b in value[i:i+8]) + print(f'{line.rstrip()}') + print('};') + +print('/* SPDX-License-Identifier: GPL-2.0-or-later */') +print(f'/* This file was generated by: gen-fips-testvecs.py */') +print() +print('#include ') + +print_static_u8_array_definition("fips_test_data", fips_test_data) +print_static_u8_array_definition("fips_test_key", fips_test_key) + +for alg in 'sha1', 'sha256', 'sha512': + ctx = hmac.new(fips_test_key, digestmod=alg) + ctx.update(fips_test_data) + print_static_u8_array_definition(f'fips_test_hmac_{alg}_value', ctx.digest()) -- cgit v1.2.3 From e5e7ca66a7fc6b8073c30a048e1157b88d427980 Mon Sep 17 00:00:00 2001 From: Jacob Keller Date: Thu, 30 Oct 2025 12:58:32 -0700 Subject: docs: kdoc: fix duplicate section warning message The python version of the kernel-doc parser emits some strange warnings with just a line number in certain cases: $ ./scripts/kernel-doc -Wall -none 'include/linux/virtio_config.h' Warning: 174 Warning: 184 Warning: 190 Warning: include/linux/virtio_config.h:226 No description found for return value of '__virtio_test_bit' Warning: include/linux/virtio_config.h:259 No description found for return value of 'virtio_has_feature' Warning: include/linux/virtio_config.h:283 No description found for return value of 'virtio_has_dma_quirk' Warning: include/linux/virtio_config.h:392 No description found for return value of 'virtqueue_set_affinity' I eventually tracked this down to the lone call of emit_msg() in the KernelEntry class, which looks like: self.emit_msg(self.new_start_line, f"duplicate section name '{name}'\n") This looks like all the other emit_msg calls. Unfortunately, the definition within the KernelEntry class takes only a message parameter and not a line number. The intended message is passed as the warning! Pass the filename to the KernelEntry class, and use this to build the log message in the same way as the KernelDoc class does. To avoid future errors, mark the warning parameter for both emit_msg definitions as a keyword-only argument. This will prevent accidentally passing a string as the warning parameter in the future. Also fix the call in dump_section to avoid an unnecessary additional newline. Fixes: e3b42e94cf10 ("scripts/lib/kdoc/kdoc_parser.py: move kernel entry to a class") Signed-off-by: Jacob Keller Signed-off-by: Jonathan Corbet Message-ID: <20251030-jk-fix-kernel-doc-duplicate-return-warning-v2-1-ec4b5c662881@intel.com> --- scripts/lib/kdoc/kdoc_parser.py | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) (limited to 'scripts') diff --git a/scripts/lib/kdoc/kdoc_parser.py b/scripts/lib/kdoc/kdoc_parser.py index 6e5c115cbdf3..ee1a4ea6e725 100644 --- a/scripts/lib/kdoc/kdoc_parser.py +++ b/scripts/lib/kdoc/kdoc_parser.py @@ -275,6 +275,8 @@ class KernelEntry: self.leading_space = None + self.fname = fname + # State flags self.brcount = 0 self.declaration_start_line = ln + 1 @@ -289,9 +291,11 @@ class KernelEntry: return '\n'.join(self._contents) + '\n' # TODO: rename to emit_message after removal of kernel-doc.pl - def emit_msg(self, log_msg, warning=True): + def emit_msg(self, ln, msg, *, warning=True): """Emit a message""" + log_msg = f"{self.fname}:{ln} {msg}" + if not warning: self.config.log.info(log_msg) return @@ -337,7 +341,7 @@ class KernelEntry: # Only warn on user-specified duplicate section names if name != SECTION_DEFAULT: self.emit_msg(self.new_start_line, - f"duplicate section name '{name}'\n") + f"duplicate section name '{name}'") # Treat as a new paragraph - add a blank line self.sections[name] += '\n' + contents else: @@ -393,15 +397,15 @@ class KernelDoc: 'Python 3.7 or later is required for correct results') python_warning = True - def emit_msg(self, ln, msg, warning=True): + def emit_msg(self, ln, msg, *, warning=True): """Emit a message""" - log_msg = f"{self.fname}:{ln} {msg}" - if self.entry: - self.entry.emit_msg(log_msg, warning) + self.entry.emit_msg(ln, msg, warning=warning) return + log_msg = f"{self.fname}:{ln} {msg}" + if warning: self.config.log.warning(log_msg) else: -- cgit v1.2.3 From b36d4b6aa88ef039647228b98c59a875e92f8c8e Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Wed, 29 Oct 2025 13:20:33 +0100 Subject: arch: hookup listns() system call Add the listns() system call to all architectures. Link: https://patch.msgid.link/20251029-work-namespace-nstree-listns-v4-20-2e6f823ebdc0@kernel.org Tested-by: syzbot@syzkaller.appspotmail.com Reviewed-by: Arnd Bergmann Reviewed-by: Jeff Layton Signed-off-by: Christian Brauner --- scripts/syscall.tbl | 1 + 1 file changed, 1 insertion(+) (limited to 'scripts') diff --git a/scripts/syscall.tbl b/scripts/syscall.tbl index d1ae5e92c615..e74868be513c 100644 --- a/scripts/syscall.tbl +++ b/scripts/syscall.tbl @@ -410,3 +410,4 @@ 467 common open_tree_attr sys_open_tree_attr 468 common file_getattr sys_file_getattr 469 common file_setattr sys_file_setattr +470 common listns sys_listns -- cgit v1.2.3 From 469c1c9eb6c9243e4b59ef93518d4e0acb1b2b3e Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Tue, 4 Nov 2025 22:55:02 +0100 Subject: kernel-doc: Issue warnings that were silently discarded When kernel-doc parses the sections for the documentation some errors may occur. In many cases the warning is simply stored to the current "entry" object. However, in the most of such cases this object gets discarded and there is no way for the output engine to even know about that. To avoid that, check if the "entry" is going to be discarded and if there warnings have been collected, issue them to the current logger as is and then flush the "entry". This fixes the problem that original Perl implementation doesn't have. As of Linux kernel v6.18-rc4 the reproducer can be: $ scripts/kernel-doc -v -none -Wall include/linux/util_macros.h ... Info: include/linux/util_macros.h:138 Scanning doc for function to_user_ptr ... while with the proposed change applied it gives one more line: $ scripts/kernel-doc -v -none -Wall include/linux/util_macros.h ... Info: include/linux/util_macros.h:138 Scanning doc for function to_user_ptr Warning: include/linux/util_macros.h:144 expecting prototype for to_user_ptr(). Prototype was for u64_to_user_ptr() instead ... And with the original Perl script: $ scripts/kernel-doc.pl -v -none -Wall include/linux/util_macros.h ... include/linux/util_macros.h:139: info: Scanning doc for function to_user_ptr include/linux/util_macros.h:149: warning: expecting prototype for to_user_ptr(). Prototype was for u64_to_user_ptr() instead ... Fixes: 9cbc2d3b137b ("scripts/kernel-doc.py: postpone warnings to the output plugin") Signed-off-by: Andy Shevchenko Reviewed-by: Randy Dunlap Tested-by: Randy Dunlap Signed-off-by: Jonathan Corbet Message-ID: <20251104215502.1049817-1-andriy.shevchenko@linux.intel.com> --- scripts/lib/kdoc/kdoc_parser.py | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'scripts') diff --git a/scripts/lib/kdoc/kdoc_parser.py b/scripts/lib/kdoc/kdoc_parser.py index ee1a4ea6e725..f7dbb0868367 100644 --- a/scripts/lib/kdoc/kdoc_parser.py +++ b/scripts/lib/kdoc/kdoc_parser.py @@ -451,6 +451,13 @@ class KernelDoc: variables used by the state machine. """ + # + # Flush the warnings out before we proceed further + # + if self.entry and self.entry not in self.entries: + for log_msg in self.entry.warnings: + self.config.log.warning(log_msg) + self.entry = KernelEntry(self.config, self.fname, ln) # State flags -- cgit v1.2.3 From 6fa873641c0bdfa849130a81aa7339ccfd42b52a Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Sat, 25 Oct 2025 22:50:24 -0700 Subject: lib/crypto: sha3: Add FIPS cryptographic algorithm self-test Since the SHA-3 algorithms are FIPS-approved, add the boot-time self-test which is apparently required. This closely follows the corresponding SHA-1, SHA-256, and SHA-512 tests. Tested-by: Harald Freudenberger Reviewed-by: Ard Biesheuvel Link: https://lore.kernel.org/r/20251026055032.1413733-8-ebiggers@kernel.org Signed-off-by: Eric Biggers --- scripts/crypto/gen-fips-testvecs.py | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'scripts') diff --git a/scripts/crypto/gen-fips-testvecs.py b/scripts/crypto/gen-fips-testvecs.py index 2956f88b764a..db873f88619a 100755 --- a/scripts/crypto/gen-fips-testvecs.py +++ b/scripts/crypto/gen-fips-testvecs.py @@ -5,6 +5,7 @@ # # Copyright 2025 Google LLC +import hashlib import hmac fips_test_data = b"fips test data\0\0" @@ -30,3 +31,6 @@ for alg in 'sha1', 'sha256', 'sha512': ctx = hmac.new(fips_test_key, digestmod=alg) ctx.update(fips_test_data) print_static_u8_array_definition(f'fips_test_hmac_{alg}_value', ctx.digest()) + +print_static_u8_array_definition(f'fips_test_sha3_256_value', + hashlib.sha3_256(fips_test_data).digest()) -- cgit v1.2.3 From af61da281f52aba0c5b090bafb3a31c5739850ff Mon Sep 17 00:00:00 2001 From: Mikhail Malyshev Date: Wed, 15 Oct 2025 16:34:52 +0000 Subject: kbuild: Use objtree for module signing key path When building out-of-tree modules with CONFIG_MODULE_SIG_FORCE=y, module signing fails because the private key path uses $(srctree) while the public key path uses $(objtree). Since signing keys are generated in the build directory during kernel compilation, both paths should use $(objtree) for consistency. This causes SSL errors like: SSL error:02001002:system library:fopen:No such file or directory sign-file: /kernel-src/certs/signing_key.pem The issue occurs because: - sig-key uses: $(srctree)/certs/signing_key.pem (source tree) - cmd_sign uses: $(objtree)/certs/signing_key.x509 (build tree) But both keys are generated in $(objtree) during the build. This complements commit 25ff08aa43e37 ("kbuild: Fix signing issue for external modules") which fixed the scripts path and public key path, but missed the private key path inconsistency. Fixes out-of-tree module signing for configurations with separate source and build directories (e.g., O=/kernel-out). Signed-off-by: Mikhail Malyshev Reviewed-by: Nathan Chancellor Tested-by: Nicolas Schier Link: https://patch.msgid.link/20251015163452.3754286-1-mike.malyshev@gmail.com Signed-off-by: Nicolas Schier --- scripts/Makefile.modinst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'scripts') diff --git a/scripts/Makefile.modinst b/scripts/Makefile.modinst index 1628198f3e83..9ba45e5b32b1 100644 --- a/scripts/Makefile.modinst +++ b/scripts/Makefile.modinst @@ -100,7 +100,7 @@ endif # Don't stop modules_install even if we can't sign external modules. # ifeq ($(filter pkcs11:%, $(CONFIG_MODULE_SIG_KEY)),) -sig-key := $(if $(wildcard $(CONFIG_MODULE_SIG_KEY)),,$(srctree)/)$(CONFIG_MODULE_SIG_KEY) +sig-key := $(if $(wildcard $(CONFIG_MODULE_SIG_KEY)),,$(objtree)/)$(CONFIG_MODULE_SIG_KEY) else sig-key := $(CONFIG_MODULE_SIG_KEY) endif -- cgit v1.2.3 From 7319256dda306b867506899b6438e6eb96a1ead0 Mon Sep 17 00:00:00 2001 From: Nathan Chancellor Date: Thu, 23 Oct 2025 22:01:36 +0200 Subject: kbuild: Rename Makefile.extrawarn to Makefile.warn Since commit e88ca24319e4 ("kbuild: consolidate warning flags in scripts/Makefile.extrawarn"), scripts/Makefile.extrawarn contains all warnings for the main kernel build, not just warnings enabled by the values for W=. Rename it to scripts/Makefile.warn to make it clearer that this Makefile is where all Kbuild warning handling should exist. Signed-off-by: Nathan Chancellor Acked-by: Arnd Bergmann Link: https://patch.msgid.link/20251023-rename-scripts-makefile-extrawarn-v1-1-8f7531542169@kernel.org Signed-off-by: Nicolas Schier --- scripts/Makefile.extrawarn | 235 --------------------------------------------- scripts/Makefile.warn | 235 +++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 235 insertions(+), 235 deletions(-) delete mode 100644 scripts/Makefile.extrawarn create mode 100644 scripts/Makefile.warn (limited to 'scripts') diff --git a/scripts/Makefile.extrawarn b/scripts/Makefile.extrawarn deleted file mode 100644 index 68e6fafcb80c..000000000000 --- a/scripts/Makefile.extrawarn +++ /dev/null @@ -1,235 +0,0 @@ -# SPDX-License-Identifier: GPL-2.0 -# ========================================================================== -# make W=... settings -# -# There are four warning groups enabled by W=1, W=2, W=3, and W=e -# They are independent, and can be combined like W=12 or W=123e. -# ========================================================================== - -# Default set of warnings, always enabled -KBUILD_CFLAGS += -Wall -KBUILD_CFLAGS += -Wextra -KBUILD_CFLAGS += -Wundef -KBUILD_CFLAGS += -Werror=implicit-function-declaration -KBUILD_CFLAGS += -Werror=implicit-int -KBUILD_CFLAGS += -Werror=return-type -KBUILD_CFLAGS += -Werror=strict-prototypes -KBUILD_CFLAGS += -Wno-format-security -KBUILD_CFLAGS += -Wno-trigraphs -KBUILD_CFLAGS += $(call cc-option, -Wno-frame-address) -KBUILD_CFLAGS += $(call cc-option, -Wno-address-of-packed-member) -KBUILD_CFLAGS += -Wmissing-declarations -KBUILD_CFLAGS += -Wmissing-prototypes - -ifneq ($(CONFIG_FRAME_WARN),0) -KBUILD_CFLAGS += -Wframe-larger-than=$(CONFIG_FRAME_WARN) -endif - -KBUILD_CFLAGS-$(CONFIG_CC_NO_ARRAY_BOUNDS) += -Wno-array-bounds - -ifdef CONFIG_CC_IS_CLANG -# The kernel builds with '-std=gnu11' and '-fms-extensions' so use of GNU and -# Microsoft extensions is acceptable. -KBUILD_CFLAGS += -Wno-gnu -KBUILD_CFLAGS += -Wno-microsoft-anon-tag - -# Clang checks for overflow/truncation with '%p', while GCC does not: -# https://gcc.gnu.org/bugzilla/show_bug.cgi?id=111219 -KBUILD_CFLAGS += $(call cc-option, -Wno-format-overflow-non-kprintf) -KBUILD_CFLAGS += $(call cc-option, -Wno-format-truncation-non-kprintf) - -# Clang may emit a warning when a const variable, such as the dummy variables -# in typecheck(), or const member of an aggregate type are not initialized, -# which can result in unexpected behavior. However, in many audited cases of -# the "field" variant of the warning, this is intentional because the field is -# never used within a particular call path, the field is within a union with -# other non-const members, or the containing object is not const so the field -# can be modified via memcpy() / memset(). While the variable warning also gets -# disabled with this same switch, there should not be too much coverage lost -# because -Wuninitialized will still flag when an uninitialized const variable -# is used. -KBUILD_CFLAGS += $(call cc-option, -Wno-default-const-init-unsafe) -else - -# gcc inanely warns about local variables called 'main' -KBUILD_CFLAGS += -Wno-main -endif - -# These result in bogus false positives -KBUILD_CFLAGS += $(call cc-option, -Wno-dangling-pointer) - -# Stack Variable Length Arrays (VLAs) must not be used in the kernel. -# Function array parameters should, however, be usable, but -Wvla will -# warn for those. Clang has no way yet to distinguish between the VLA -# types, so depend on GCC for now to keep stack VLAs out of the tree. -# https://github.com/llvm/llvm-project/issues/57098 -# https://gcc.gnu.org/bugzilla/show_bug.cgi?id=98217 -KBUILD_CFLAGS += $(call cc-option,-Wvla-larger-than=1) - -# disable pointer signed / unsigned warnings in gcc 4.0 -KBUILD_CFLAGS += -Wno-pointer-sign - -# In order to make sure new function cast mismatches are not introduced -# in the kernel (to avoid tripping CFI checking), the kernel should be -# globally built with -Wcast-function-type. -KBUILD_CFLAGS += $(call cc-option, -Wcast-function-type) - -# Currently, disable -Wstringop-overflow for GCC 11, globally. -KBUILD_CFLAGS-$(CONFIG_CC_NO_STRINGOP_OVERFLOW) += $(call cc-option, -Wno-stringop-overflow) -KBUILD_CFLAGS-$(CONFIG_CC_STRINGOP_OVERFLOW) += $(call cc-option, -Wstringop-overflow) - -# Currently, disable -Wunterminated-string-initialization as broken -KBUILD_CFLAGS += $(call cc-option, -Wno-unterminated-string-initialization) - -# The allocators already balk at large sizes, so silence the compiler -# warnings for bounds checks involving those possible values. While -# -Wno-alloc-size-larger-than would normally be used here, earlier versions -# of gcc (<9.1) weirdly don't handle the option correctly when _other_ -# warnings are produced (?!). Using -Walloc-size-larger-than=SIZE_MAX -# doesn't work (as it is documented to), silently resolving to "0" prior to -# version 9.1 (and producing an error more recently). Numeric values larger -# than PTRDIFF_MAX also don't work prior to version 9.1, which are silently -# ignored, continuing to default to PTRDIFF_MAX. So, left with no other -# choice, we must perform a versioned check to disable this warning. -# https://lore.kernel.org/lkml/20210824115859.187f272f@canb.auug.org.au -KBUILD_CFLAGS-$(call gcc-min-version, 90100) += -Wno-alloc-size-larger-than -KBUILD_CFLAGS += $(KBUILD_CFLAGS-y) $(CONFIG_CC_IMPLICIT_FALLTHROUGH) - -# Prohibit date/time macros, which would make the build non-deterministic -KBUILD_CFLAGS += -Werror=date-time - -# enforce correct pointer usage -KBUILD_CFLAGS += $(call cc-option,-Werror=incompatible-pointer-types) - -# Require designated initializers for all marked structures -KBUILD_CFLAGS += $(call cc-option,-Werror=designated-init) - -# Warn if there is an enum types mismatch -KBUILD_CFLAGS += $(call cc-option,-Wenum-conversion) - -KBUILD_CFLAGS += -Wunused - -# -# W=1 - warnings which may be relevant and do not occur too often -# -ifneq ($(findstring 1, $(KBUILD_EXTRA_WARN)),) - -KBUILD_CFLAGS += -Wmissing-format-attribute -KBUILD_CFLAGS += -Wmissing-include-dirs -KBUILD_CFLAGS += $(call cc-option, -Wunused-const-variable) - -KBUILD_CPPFLAGS += -Wundef -KBUILD_CPPFLAGS += -DKBUILD_EXTRA_WARN1 - -else - -# Some diagnostics enabled by default are noisy. -# Suppress them by using -Wno... except for W=1. -KBUILD_CFLAGS += $(call cc-option, -Wno-unused-but-set-variable) -KBUILD_CFLAGS += $(call cc-option, -Wno-unused-const-variable) -KBUILD_CFLAGS += $(call cc-option, -Wno-packed-not-aligned) -KBUILD_CFLAGS += $(call cc-option, -Wno-format-overflow) -ifdef CONFIG_CC_IS_GCC -KBUILD_CFLAGS += $(call cc-option, -Wno-format-truncation) -endif -KBUILD_CFLAGS += $(call cc-option, -Wno-stringop-truncation) - -KBUILD_CFLAGS += -Wno-override-init # alias for -Wno-initializer-overrides in clang - -ifdef CONFIG_CC_IS_CLANG -# Clang before clang-16 would warn on default argument promotions. -ifneq ($(call clang-min-version, 160000),y) -# Disable -Wformat -KBUILD_CFLAGS += -Wno-format -# Then re-enable flags that were part of the -Wformat group that aren't -# problematic. -KBUILD_CFLAGS += -Wformat-extra-args -Wformat-invalid-specifier -KBUILD_CFLAGS += -Wformat-zero-length -Wnonnull -# Requires clang-12+. -ifeq ($(call clang-min-version, 120000),y) -KBUILD_CFLAGS += -Wformat-insufficient-args -endif -endif -KBUILD_CFLAGS += $(call cc-option, -Wno-pointer-to-enum-cast) -KBUILD_CFLAGS += -Wno-tautological-constant-out-of-range-compare -KBUILD_CFLAGS += $(call cc-option, -Wno-unaligned-access) -KBUILD_CFLAGS += -Wno-enum-compare-conditional -endif - -endif - -# -# W=2 - warnings which occur quite often but may still be relevant -# -ifneq ($(findstring 2, $(KBUILD_EXTRA_WARN)),) - -KBUILD_CFLAGS += -Wdisabled-optimization -KBUILD_CFLAGS += -Wshadow -KBUILD_CFLAGS += $(call cc-option, -Wlogical-op) -KBUILD_CFLAGS += $(call cc-option, -Wunused-macros) - -KBUILD_CPPFLAGS += -DKBUILD_EXTRA_WARN2 - -else - -# The following turn off the warnings enabled by -Wextra -KBUILD_CFLAGS += -Wno-missing-field-initializers -KBUILD_CFLAGS += -Wno-type-limits -KBUILD_CFLAGS += -Wno-shift-negative-value - -ifdef CONFIG_CC_IS_CLANG -KBUILD_CFLAGS += -Wno-enum-enum-conversion -endif - -ifdef CONFIG_CC_IS_GCC -KBUILD_CFLAGS += -Wno-maybe-uninitialized -endif - -endif - -# -# W=3 - more obscure warnings, can most likely be ignored -# -ifneq ($(findstring 3, $(KBUILD_EXTRA_WARN)),) - -KBUILD_CFLAGS += -Wbad-function-cast -KBUILD_CFLAGS += -Wcast-align -KBUILD_CFLAGS += -Wcast-qual -KBUILD_CFLAGS += -Wconversion -KBUILD_CFLAGS += -Wpacked -KBUILD_CFLAGS += -Wpadded -KBUILD_CFLAGS += -Wpointer-arith -KBUILD_CFLAGS += -Wredundant-decls -KBUILD_CFLAGS += -Wsign-compare -KBUILD_CFLAGS += -Wswitch-default - -KBUILD_CPPFLAGS += -DKBUILD_EXTRA_WARN3 - -else - -# The following turn off the warnings enabled by -Wextra -KBUILD_CFLAGS += -Wno-sign-compare -KBUILD_CFLAGS += -Wno-unused-parameter - -endif - -# -# W=e and CONFIG_WERROR - error out on warnings -# -ifneq ($(findstring e, $(KBUILD_EXTRA_WARN))$(CONFIG_WERROR),) - -KBUILD_CPPFLAGS += -Werror -KBUILD_AFLAGS += -Wa,--fatal-warnings -KBUILD_LDFLAGS += --fatal-warnings -KBUILD_USERCFLAGS += -Werror -KBUILD_USERLDFLAGS += -Wl,--fatal-warnings -KBUILD_RUSTFLAGS += -Dwarnings - -# While hostprog flags are used during build bootstrapping (thus should not -# depend on CONFIG_ symbols), -Werror is disruptive and should be opted into. -# Only apply -Werror to hostprogs built after the initial Kconfig stage. -KBUILD_HOSTCFLAGS += -Werror -KBUILD_HOSTLDFLAGS += -Wl,--fatal-warnings -KBUILD_HOSTRUSTFLAGS += -Dwarnings - -endif diff --git a/scripts/Makefile.warn b/scripts/Makefile.warn new file mode 100644 index 000000000000..68e6fafcb80c --- /dev/null +++ b/scripts/Makefile.warn @@ -0,0 +1,235 @@ +# SPDX-License-Identifier: GPL-2.0 +# ========================================================================== +# make W=... settings +# +# There are four warning groups enabled by W=1, W=2, W=3, and W=e +# They are independent, and can be combined like W=12 or W=123e. +# ========================================================================== + +# Default set of warnings, always enabled +KBUILD_CFLAGS += -Wall +KBUILD_CFLAGS += -Wextra +KBUILD_CFLAGS += -Wundef +KBUILD_CFLAGS += -Werror=implicit-function-declaration +KBUILD_CFLAGS += -Werror=implicit-int +KBUILD_CFLAGS += -Werror=return-type +KBUILD_CFLAGS += -Werror=strict-prototypes +KBUILD_CFLAGS += -Wno-format-security +KBUILD_CFLAGS += -Wno-trigraphs +KBUILD_CFLAGS += $(call cc-option, -Wno-frame-address) +KBUILD_CFLAGS += $(call cc-option, -Wno-address-of-packed-member) +KBUILD_CFLAGS += -Wmissing-declarations +KBUILD_CFLAGS += -Wmissing-prototypes + +ifneq ($(CONFIG_FRAME_WARN),0) +KBUILD_CFLAGS += -Wframe-larger-than=$(CONFIG_FRAME_WARN) +endif + +KBUILD_CFLAGS-$(CONFIG_CC_NO_ARRAY_BOUNDS) += -Wno-array-bounds + +ifdef CONFIG_CC_IS_CLANG +# The kernel builds with '-std=gnu11' and '-fms-extensions' so use of GNU and +# Microsoft extensions is acceptable. +KBUILD_CFLAGS += -Wno-gnu +KBUILD_CFLAGS += -Wno-microsoft-anon-tag + +# Clang checks for overflow/truncation with '%p', while GCC does not: +# https://gcc.gnu.org/bugzilla/show_bug.cgi?id=111219 +KBUILD_CFLAGS += $(call cc-option, -Wno-format-overflow-non-kprintf) +KBUILD_CFLAGS += $(call cc-option, -Wno-format-truncation-non-kprintf) + +# Clang may emit a warning when a const variable, such as the dummy variables +# in typecheck(), or const member of an aggregate type are not initialized, +# which can result in unexpected behavior. However, in many audited cases of +# the "field" variant of the warning, this is intentional because the field is +# never used within a particular call path, the field is within a union with +# other non-const members, or the containing object is not const so the field +# can be modified via memcpy() / memset(). While the variable warning also gets +# disabled with this same switch, there should not be too much coverage lost +# because -Wuninitialized will still flag when an uninitialized const variable +# is used. +KBUILD_CFLAGS += $(call cc-option, -Wno-default-const-init-unsafe) +else + +# gcc inanely warns about local variables called 'main' +KBUILD_CFLAGS += -Wno-main +endif + +# These result in bogus false positives +KBUILD_CFLAGS += $(call cc-option, -Wno-dangling-pointer) + +# Stack Variable Length Arrays (VLAs) must not be used in the kernel. +# Function array parameters should, however, be usable, but -Wvla will +# warn for those. Clang has no way yet to distinguish between the VLA +# types, so depend on GCC for now to keep stack VLAs out of the tree. +# https://github.com/llvm/llvm-project/issues/57098 +# https://gcc.gnu.org/bugzilla/show_bug.cgi?id=98217 +KBUILD_CFLAGS += $(call cc-option,-Wvla-larger-than=1) + +# disable pointer signed / unsigned warnings in gcc 4.0 +KBUILD_CFLAGS += -Wno-pointer-sign + +# In order to make sure new function cast mismatches are not introduced +# in the kernel (to avoid tripping CFI checking), the kernel should be +# globally built with -Wcast-function-type. +KBUILD_CFLAGS += $(call cc-option, -Wcast-function-type) + +# Currently, disable -Wstringop-overflow for GCC 11, globally. +KBUILD_CFLAGS-$(CONFIG_CC_NO_STRINGOP_OVERFLOW) += $(call cc-option, -Wno-stringop-overflow) +KBUILD_CFLAGS-$(CONFIG_CC_STRINGOP_OVERFLOW) += $(call cc-option, -Wstringop-overflow) + +# Currently, disable -Wunterminated-string-initialization as broken +KBUILD_CFLAGS += $(call cc-option, -Wno-unterminated-string-initialization) + +# The allocators already balk at large sizes, so silence the compiler +# warnings for bounds checks involving those possible values. While +# -Wno-alloc-size-larger-than would normally be used here, earlier versions +# of gcc (<9.1) weirdly don't handle the option correctly when _other_ +# warnings are produced (?!). Using -Walloc-size-larger-than=SIZE_MAX +# doesn't work (as it is documented to), silently resolving to "0" prior to +# version 9.1 (and producing an error more recently). Numeric values larger +# than PTRDIFF_MAX also don't work prior to version 9.1, which are silently +# ignored, continuing to default to PTRDIFF_MAX. So, left with no other +# choice, we must perform a versioned check to disable this warning. +# https://lore.kernel.org/lkml/20210824115859.187f272f@canb.auug.org.au +KBUILD_CFLAGS-$(call gcc-min-version, 90100) += -Wno-alloc-size-larger-than +KBUILD_CFLAGS += $(KBUILD_CFLAGS-y) $(CONFIG_CC_IMPLICIT_FALLTHROUGH) + +# Prohibit date/time macros, which would make the build non-deterministic +KBUILD_CFLAGS += -Werror=date-time + +# enforce correct pointer usage +KBUILD_CFLAGS += $(call cc-option,-Werror=incompatible-pointer-types) + +# Require designated initializers for all marked structures +KBUILD_CFLAGS += $(call cc-option,-Werror=designated-init) + +# Warn if there is an enum types mismatch +KBUILD_CFLAGS += $(call cc-option,-Wenum-conversion) + +KBUILD_CFLAGS += -Wunused + +# +# W=1 - warnings which may be relevant and do not occur too often +# +ifneq ($(findstring 1, $(KBUILD_EXTRA_WARN)),) + +KBUILD_CFLAGS += -Wmissing-format-attribute +KBUILD_CFLAGS += -Wmissing-include-dirs +KBUILD_CFLAGS += $(call cc-option, -Wunused-const-variable) + +KBUILD_CPPFLAGS += -Wundef +KBUILD_CPPFLAGS += -DKBUILD_EXTRA_WARN1 + +else + +# Some diagnostics enabled by default are noisy. +# Suppress them by using -Wno... except for W=1. +KBUILD_CFLAGS += $(call cc-option, -Wno-unused-but-set-variable) +KBUILD_CFLAGS += $(call cc-option, -Wno-unused-const-variable) +KBUILD_CFLAGS += $(call cc-option, -Wno-packed-not-aligned) +KBUILD_CFLAGS += $(call cc-option, -Wno-format-overflow) +ifdef CONFIG_CC_IS_GCC +KBUILD_CFLAGS += $(call cc-option, -Wno-format-truncation) +endif +KBUILD_CFLAGS += $(call cc-option, -Wno-stringop-truncation) + +KBUILD_CFLAGS += -Wno-override-init # alias for -Wno-initializer-overrides in clang + +ifdef CONFIG_CC_IS_CLANG +# Clang before clang-16 would warn on default argument promotions. +ifneq ($(call clang-min-version, 160000),y) +# Disable -Wformat +KBUILD_CFLAGS += -Wno-format +# Then re-enable flags that were part of the -Wformat group that aren't +# problematic. +KBUILD_CFLAGS += -Wformat-extra-args -Wformat-invalid-specifier +KBUILD_CFLAGS += -Wformat-zero-length -Wnonnull +# Requires clang-12+. +ifeq ($(call clang-min-version, 120000),y) +KBUILD_CFLAGS += -Wformat-insufficient-args +endif +endif +KBUILD_CFLAGS += $(call cc-option, -Wno-pointer-to-enum-cast) +KBUILD_CFLAGS += -Wno-tautological-constant-out-of-range-compare +KBUILD_CFLAGS += $(call cc-option, -Wno-unaligned-access) +KBUILD_CFLAGS += -Wno-enum-compare-conditional +endif + +endif + +# +# W=2 - warnings which occur quite often but may still be relevant +# +ifneq ($(findstring 2, $(KBUILD_EXTRA_WARN)),) + +KBUILD_CFLAGS += -Wdisabled-optimization +KBUILD_CFLAGS += -Wshadow +KBUILD_CFLAGS += $(call cc-option, -Wlogical-op) +KBUILD_CFLAGS += $(call cc-option, -Wunused-macros) + +KBUILD_CPPFLAGS += -DKBUILD_EXTRA_WARN2 + +else + +# The following turn off the warnings enabled by -Wextra +KBUILD_CFLAGS += -Wno-missing-field-initializers +KBUILD_CFLAGS += -Wno-type-limits +KBUILD_CFLAGS += -Wno-shift-negative-value + +ifdef CONFIG_CC_IS_CLANG +KBUILD_CFLAGS += -Wno-enum-enum-conversion +endif + +ifdef CONFIG_CC_IS_GCC +KBUILD_CFLAGS += -Wno-maybe-uninitialized +endif + +endif + +# +# W=3 - more obscure warnings, can most likely be ignored +# +ifneq ($(findstring 3, $(KBUILD_EXTRA_WARN)),) + +KBUILD_CFLAGS += -Wbad-function-cast +KBUILD_CFLAGS += -Wcast-align +KBUILD_CFLAGS += -Wcast-qual +KBUILD_CFLAGS += -Wconversion +KBUILD_CFLAGS += -Wpacked +KBUILD_CFLAGS += -Wpadded +KBUILD_CFLAGS += -Wpointer-arith +KBUILD_CFLAGS += -Wredundant-decls +KBUILD_CFLAGS += -Wsign-compare +KBUILD_CFLAGS += -Wswitch-default + +KBUILD_CPPFLAGS += -DKBUILD_EXTRA_WARN3 + +else + +# The following turn off the warnings enabled by -Wextra +KBUILD_CFLAGS += -Wno-sign-compare +KBUILD_CFLAGS += -Wno-unused-parameter + +endif + +# +# W=e and CONFIG_WERROR - error out on warnings +# +ifneq ($(findstring e, $(KBUILD_EXTRA_WARN))$(CONFIG_WERROR),) + +KBUILD_CPPFLAGS += -Werror +KBUILD_AFLAGS += -Wa,--fatal-warnings +KBUILD_LDFLAGS += --fatal-warnings +KBUILD_USERCFLAGS += -Werror +KBUILD_USERLDFLAGS += -Wl,--fatal-warnings +KBUILD_RUSTFLAGS += -Dwarnings + +# While hostprog flags are used during build bootstrapping (thus should not +# depend on CONFIG_ symbols), -Werror is disruptive and should be opted into. +# Only apply -Werror to hostprogs built after the initial Kconfig stage. +KBUILD_HOSTCFLAGS += -Werror +KBUILD_HOSTLDFLAGS += -Wl,--fatal-warnings +KBUILD_HOSTRUSTFLAGS += -Dwarnings + +endif -- cgit v1.2.3 From 9362d34acf91a706c543d919ade3e651b9bd2d6f Mon Sep 17 00:00:00 2001 From: Pat Somaru Date: Tue, 7 Oct 2025 20:45:28 -0400 Subject: scripts/clang-tools: Handle included .c files in gen_compile_commands The gen_compile_commands.py script currently only creates entries for the primary source files found in .cmd files, but some kernel source files text-include others (i.e. kernel/sched/build_policy.c). This prevents tools like clangd from working properly on text-included c files, such as kernel/sched/ext.c because the generated compile_commands.json does not have entries for them. Extend process_line() to detect when a source file includes .c files, and generate additional compile_commands.json entries for them. For included c files, use the same compile flags as their parent and add their parents headers. This enables lsp tools like clangd to work properly on files like kernel/sched/ext.c Signed-off-by: Pat Somaru Reviewed-by: Nathan Chancellor Tested-by: Justin Stitt Tested-by: Eduard Zingerman Link: https://patch.msgid.link/20251008004615.2690081-1-patso@likewhatevs.io Signed-off-by: Nicolas Schier --- scripts/clang-tools/gen_compile_commands.py | 135 ++++++++++++++++++++++++++-- 1 file changed, 128 insertions(+), 7 deletions(-) (limited to 'scripts') diff --git a/scripts/clang-tools/gen_compile_commands.py b/scripts/clang-tools/gen_compile_commands.py index 96e6e46ad1a7..6f4afa92a466 100755 --- a/scripts/clang-tools/gen_compile_commands.py +++ b/scripts/clang-tools/gen_compile_commands.py @@ -21,6 +21,12 @@ _DEFAULT_LOG_LEVEL = 'WARNING' _FILENAME_PATTERN = r'^\..*\.cmd$' _LINE_PATTERN = r'^(saved)?cmd_[^ ]*\.o := (?P.* )(?P[^ ]*\.[cS]) *(;|$)' _VALID_LOG_LEVELS = ['DEBUG', 'INFO', 'WARNING', 'ERROR', 'CRITICAL'] + +# Pre-compiled regexes for better performance +_INCLUDE_PATTERN = re.compile(r'^\s*#\s*include\s*[<"]([^>"]*)[>"]') +_C_INCLUDE_PATTERN = re.compile(r'^\s*#\s*include\s*"([^"]*\.c)"\s*$') +_FILENAME_MATCHER = re.compile(_FILENAME_PATTERN) + # The tools/ directory adopts a different build system, and produces .cmd # files in a different format. Do not support it. _EXCLUDE_DIRS = ['.git', 'Documentation', 'include', 'tools'] @@ -82,7 +88,6 @@ def cmdfiles_in_dir(directory): The path to a .cmd file. """ - filename_matcher = re.compile(_FILENAME_PATTERN) exclude_dirs = [ os.path.join(directory, d) for d in _EXCLUDE_DIRS ] for dirpath, dirnames, filenames in os.walk(directory, topdown=True): @@ -92,7 +97,7 @@ def cmdfiles_in_dir(directory): continue for filename in filenames: - if filename_matcher.match(filename): + if _FILENAME_MATCHER.match(filename): yield os.path.join(dirpath, filename) @@ -149,8 +154,87 @@ def cmdfiles_for_modorder(modorder): yield to_cmdfile(mod_line.rstrip()) +def extract_includes_from_file(source_file, root_directory): + """Extract #include statements from a C file. + + Args: + source_file: Path to the source .c file to analyze + root_directory: Root directory for resolving relative paths + + Returns: + List of header files that should be included (without quotes/brackets) + """ + includes = [] + if not os.path.exists(source_file): + return includes + + try: + with open(source_file, 'r') as f: + for line in f: + line = line.strip() + # Look for #include statements. + # Match both #include "header.h" and #include . + match = _INCLUDE_PATTERN.match(line) + if match: + header = match.group(1) + # Skip including other .c files to avoid circular includes. + if not header.endswith('.c'): + # For relative includes (quoted), resolve path relative to source file. + if '"' in line: + src_dir = os.path.dirname(source_file) + header_path = os.path.join(src_dir, header) + if os.path.exists(header_path): + rel_header = os.path.relpath(header_path, root_directory) + includes.append(rel_header) + else: + includes.append(header) + else: + # System include like . + includes.append(header) + except IOError: + pass + + return includes + + +def find_included_c_files(source_file, root_directory): + """Find .c files that are included by the given source file. + + Args: + source_file: Path to the source .c file + root_directory: Root directory for resolving relative paths + + Yields: + Full paths to included .c files + """ + if not os.path.exists(source_file): + return + + try: + with open(source_file, 'r') as f: + for line in f: + line = line.strip() + # Look for #include "*.c" patterns. + match = _C_INCLUDE_PATTERN.match(line) + if match: + included_file = match.group(1) + # Handle relative paths. + if not os.path.isabs(included_file): + src_dir = os.path.dirname(source_file) + included_file = os.path.join(src_dir, included_file) + + # Normalize the path. + included_file = os.path.normpath(included_file) + + # Check if the file exists. + if os.path.exists(included_file): + yield included_file + except IOError: + pass + + def process_line(root_directory, command_prefix, file_path): - """Extracts information from a .cmd line and creates an entry from it. + """Extracts information from a .cmd line and creates entries from it. Args: root_directory: The directory that was searched for .cmd files. Usually @@ -160,7 +244,8 @@ def process_line(root_directory, command_prefix, file_path): Usually relative to root_directory, but sometimes absolute. Returns: - An entry to append to compile_commands. + A list of entries to append to compile_commands (may include multiple + entries if the source file includes other .c files). Raises: ValueError: Could not find the extracted file based on file_path and @@ -176,11 +261,47 @@ def process_line(root_directory, command_prefix, file_path): abs_path = os.path.realpath(os.path.join(root_directory, file_path)) if not os.path.exists(abs_path): raise ValueError('File %s not found' % abs_path) - return { + + entries = [] + + # Create entry for the main source file. + main_entry = { 'directory': root_directory, 'file': abs_path, 'command': prefix + file_path, } + entries.append(main_entry) + + # Find and create entries for included .c files. + for included_c_file in find_included_c_files(abs_path, root_directory): + # For included .c files, create a compilation command that: + # 1. Uses the same compilation flags as the parent file + # 2. But compiles the included file directly (not the parent) + # 3. Includes necessary headers from the parent file for proper macro resolution + + # Convert absolute path to relative for the command. + rel_path = os.path.relpath(included_c_file, root_directory) + + # Extract includes from the parent file to provide proper compilation context. + extra_includes = '' + try: + parent_includes = extract_includes_from_file(abs_path, root_directory) + if parent_includes: + extra_includes = ' ' + ' '.join('-include ' + inc for inc in parent_includes) + except IOError: + pass + + included_entry = { + 'directory': root_directory, + 'file': included_c_file, + # Use the same compilation prefix but target the included file directly. + # Add extra headers for proper macro resolution. + 'command': prefix + extra_includes + ' ' + rel_path, + } + entries.append(included_entry) + logging.debug('Added entry for included file: %s', included_c_file) + + return entries def main(): @@ -213,9 +334,9 @@ def main(): result = line_matcher.match(f.readline()) if result: try: - entry = process_line(directory, result.group('command_prefix'), + entries = process_line(directory, result.group('command_prefix'), result.group('file_path')) - compile_commands.append(entry) + compile_commands.extend(entries) except ValueError as err: logging.info('Could not add line from %s: %s', cmdfile, err) -- cgit v1.2.3 From 7bade3f7e91969985149a66c98ef0d1d842ff464 Mon Sep 17 00:00:00 2001 From: Nicolas Schier Date: Wed, 5 Nov 2025 21:26:02 +0100 Subject: scripts: headers_install.sh: Remove two outdated config leak ignore entries Remove config leak ignore entries for arch/arc/include/uapi/asm/page.h as they have been removed in commit d3e5bab923d3 ("arch: simplify architecture specific page size configuration"). Reviewed-by: Nathan Chancellor Link: https://patch.msgid.link/20251105-update-headers-install-config-leak-ignore-list-v1-1-40be3eed68cb@kernel.org Signed-off-by: Nicolas Schier --- scripts/headers_install.sh | 2 -- 1 file changed, 2 deletions(-) (limited to 'scripts') diff --git a/scripts/headers_install.sh b/scripts/headers_install.sh index 4c20c62c4faf..0e4e939efc94 100755 --- a/scripts/headers_install.sh +++ b/scripts/headers_install.sh @@ -70,8 +70,6 @@ configs=$(sed -e ' # # The format is : in each line. config_leak_ignores=" -arch/arc/include/uapi/asm/page.h:CONFIG_ARC_PAGE_SIZE_16K -arch/arc/include/uapi/asm/page.h:CONFIG_ARC_PAGE_SIZE_4K arch/arc/include/uapi/asm/swab.h:CONFIG_ARC_HAS_SWAPE arch/arm/include/uapi/asm/ptrace.h:CONFIG_CPU_ENDIAN_BE8 arch/nios2/include/uapi/asm/swab.h:CONFIG_NIOS2_CI_SWAB_NO -- cgit v1.2.3 From 6401fd334ddf5e2035a0dca27cd761974d568fcd Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Fri, 17 Oct 2025 21:31:04 -0700 Subject: lib/crypto: tests: Add KUnit tests for BLAKE2b Add a KUnit test suite for the BLAKE2b library API, mirroring the BLAKE2s test suite very closely. As with the BLAKE2s test suite, a benchmark is included. Reviewed-by: Ard Biesheuvel Link: https://lore.kernel.org/r/20251018043106.375964-9-ebiggers@kernel.org Signed-off-by: Eric Biggers --- scripts/crypto/gen-hash-testvecs.py | 29 ++++++++++++++++------------- 1 file changed, 16 insertions(+), 13 deletions(-) (limited to 'scripts') diff --git a/scripts/crypto/gen-hash-testvecs.py b/scripts/crypto/gen-hash-testvecs.py index fc063f2ee95f..c5b7985fe728 100755 --- a/scripts/crypto/gen-hash-testvecs.py +++ b/scripts/crypto/gen-hash-testvecs.py @@ -85,8 +85,8 @@ def print_c_struct_u8_array_field(name, value): print('\t\t},') def alg_digest_size_const(alg): - if alg == 'blake2s': - return 'BLAKE2S_HASH_SIZE' + if alg.startswith('blake2'): + return f'{alg.upper()}_HASH_SIZE' return f'{alg.upper()}_DIGEST_SIZE' def gen_unkeyed_testvecs(alg): @@ -124,19 +124,22 @@ def gen_hmac_testvecs(alg): f'hmac_testvec_consolidated[{alg.upper()}_DIGEST_SIZE]', ctx.digest()) -BLAKE2S_KEY_SIZE = 32 -BLAKE2S_HASH_SIZE = 32 - -def gen_additional_blake2s_testvecs(): +def gen_additional_blake2_testvecs(alg): + if alg == 'blake2s': + (max_key_size, max_hash_size) = (32, 32) + elif alg == 'blake2b': + (max_key_size, max_hash_size) = (64, 64) + else: + raise ValueError(f'Unsupported alg: {alg}') hashes = b'' - for key_len in range(BLAKE2S_KEY_SIZE + 1): - for out_len in range(1, BLAKE2S_HASH_SIZE + 1): - h = hashlib.blake2s(digest_size=out_len, key=rand_bytes(key_len)) + for key_len in range(max_key_size + 1): + for out_len in range(1, max_hash_size + 1): + h = hashlib.new(alg, digest_size=out_len, key=rand_bytes(key_len)) h.update(rand_bytes(100)) hashes += h.digest() print_static_u8_array_definition( - 'blake2s_keyed_testvec_consolidated[BLAKE2S_HASH_SIZE]', - compute_hash('blake2s', hashes)) + f'{alg}_keyed_testvec_consolidated[{alg_digest_size_const(alg)}]', + compute_hash(alg, hashes)) def gen_additional_poly1305_testvecs(): key = b'\xff' * POLY1305_KEY_SIZE @@ -160,8 +163,8 @@ alg = sys.argv[1] print('/* SPDX-License-Identifier: GPL-2.0-or-later */') print(f'/* This file was generated by: {sys.argv[0]} {" ".join(sys.argv[1:])} */') gen_unkeyed_testvecs(alg) -if alg == 'blake2s': - gen_additional_blake2s_testvecs() +if alg.startswith('blake2'): + gen_additional_blake2_testvecs(alg) elif alg == 'poly1305': gen_additional_poly1305_testvecs() else: -- cgit v1.2.3 From 15c64c47e48472875c2b85838581843f05057787 Mon Sep 17 00:00:00 2001 From: David Howells Date: Sat, 25 Oct 2025 22:50:22 -0700 Subject: lib/crypto: tests: Add SHA3 kunit tests Add a SHA3 kunit test suite, providing the following: (*) A simple test of each of SHA3-224, SHA3-256, SHA3-384, SHA3-512, SHAKE128 and SHAKE256. (*) NIST 0- and 1600-bit test vectors for SHAKE128 and SHAKE256. (*) Output tiling (multiple squeezing) tests for SHAKE256. (*) Standard hash template test for SHA3-256. To make this possible, gen-hash-testvecs.py is modified to support sha3-256. (*) Standard benchmark test for SHA3-256. [EB: dropped some unnecessary changes to gen-hash-testvecs.py, moved addition of Testing section in doc file into this commit, and other small cleanups] Signed-off-by: David Howells Reviewed-by: Ard Biesheuvel Tested-by: Harald Freudenberger Link: https://lore.kernel.org/r/20251026055032.1413733-6-ebiggers@kernel.org Signed-off-by: Eric Biggers --- scripts/crypto/gen-hash-testvecs.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'scripts') diff --git a/scripts/crypto/gen-hash-testvecs.py b/scripts/crypto/gen-hash-testvecs.py index c5b7985fe728..47f79602e290 100755 --- a/scripts/crypto/gen-hash-testvecs.py +++ b/scripts/crypto/gen-hash-testvecs.py @@ -87,7 +87,7 @@ def print_c_struct_u8_array_field(name, value): def alg_digest_size_const(alg): if alg.startswith('blake2'): return f'{alg.upper()}_HASH_SIZE' - return f'{alg.upper()}_DIGEST_SIZE' + return f'{alg.upper().replace('-', '_')}_DIGEST_SIZE' def gen_unkeyed_testvecs(alg): print('') @@ -167,5 +167,7 @@ if alg.startswith('blake2'): gen_additional_blake2_testvecs(alg) elif alg == 'poly1305': gen_additional_poly1305_testvecs() +elif alg.startswith('sha3-'): + pass # no HMAC else: gen_hmac_testvecs(alg) -- cgit v1.2.3 From b2210f35161d6202fcca4244800a1d54c80e8bc1 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Sat, 25 Oct 2025 22:50:23 -0700 Subject: lib/crypto: tests: Add additional SHAKE tests Add the following test cases to cover gaps in the SHAKE testing: - test_shake_all_lens_up_to_4096() - test_shake_multiple_squeezes() - test_shake_with_guarded_bufs() Remove test_shake256_tiling() and test_shake256_tiling2() since they are superseded by test_shake_multiple_squeezes(). It provides better test coverage by using randomized testing. E.g., it's able to generate a zero-length squeeze followed by a nonzero-length squeeze, which the first 7 versions of the SHA-3 patchset handled incorrectly. Tested-by: Harald Freudenberger Reviewed-by: Ard Biesheuvel Link: https://lore.kernel.org/r/20251026055032.1413733-7-ebiggers@kernel.org Signed-off-by: Eric Biggers --- scripts/crypto/gen-hash-testvecs.py | 27 +++++++++++++++++++++++---- 1 file changed, 23 insertions(+), 4 deletions(-) (limited to 'scripts') diff --git a/scripts/crypto/gen-hash-testvecs.py b/scripts/crypto/gen-hash-testvecs.py index 47f79602e290..ae2682882cd1 100755 --- a/scripts/crypto/gen-hash-testvecs.py +++ b/scripts/crypto/gen-hash-testvecs.py @@ -111,6 +111,18 @@ def gen_unkeyed_testvecs(alg): f'hash_testvec_consolidated[{alg_digest_size_const(alg)}]', hash_final(ctx)) +def gen_additional_sha3_testvecs(): + max_len = 4096 + in_data = rand_bytes(max_len) + for alg in ['shake128', 'shake256']: + ctx = hashlib.new('sha3-256') + for in_len in range(max_len + 1): + out_len = (in_len * 293) % (max_len + 1) + out = hashlib.new(alg, data=in_data[:in_len]).digest(out_len) + ctx.update(out) + print_static_u8_array_definition(f'{alg}_testvec_consolidated[SHA3_256_DIGEST_SIZE]', + ctx.digest()) + def gen_hmac_testvecs(alg): ctx = hmac.new(rand_bytes(32), digestmod=alg) data = rand_bytes(4096) @@ -155,19 +167,26 @@ def gen_additional_poly1305_testvecs(): if len(sys.argv) != 2: sys.stderr.write('Usage: gen-hash-testvecs.py ALGORITHM\n') - sys.stderr.write('ALGORITHM may be any supported by Python hashlib, or poly1305.\n') + sys.stderr.write('ALGORITHM may be any supported by Python hashlib, or poly1305 or sha3.\n') sys.stderr.write('Example: gen-hash-testvecs.py sha512\n') sys.exit(1) alg = sys.argv[1] print('/* SPDX-License-Identifier: GPL-2.0-or-later */') print(f'/* This file was generated by: {sys.argv[0]} {" ".join(sys.argv[1:])} */') -gen_unkeyed_testvecs(alg) if alg.startswith('blake2'): + gen_unkeyed_testvecs(alg) gen_additional_blake2_testvecs(alg) elif alg == 'poly1305': + gen_unkeyed_testvecs(alg) gen_additional_poly1305_testvecs() -elif alg.startswith('sha3-'): - pass # no HMAC +elif alg == 'sha3': + print() + print('/* SHA3-256 test vectors */') + gen_unkeyed_testvecs('sha3-256') + print() + print('/* SHAKE test vectors */') + gen_additional_sha3_testvecs() else: + gen_unkeyed_testvecs(alg) gen_hmac_testvecs(alg) -- cgit v1.2.3 From b3aed551b3fca753469520c95b6f4c61ada028d3 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Sun, 9 Nov 2025 15:47:18 -0800 Subject: lib/crypto: tests: Add KUnit tests for POLYVAL Add a test suite for the POLYVAL library, including: - All the standard tests and the benchmark from hash-test-template.h - Comparison with a test vector from the RFC - Test with key and message containing all one bits - Additional tests related to the key struct Reviewed-by: Ard Biesheuvel Link: https://lore.kernel.org/r/20251109234726.638437-4-ebiggers@kernel.org Signed-off-by: Eric Biggers --- scripts/crypto/gen-hash-testvecs.py | 47 +++++++++++++++++++++++++++++++++++-- 1 file changed, 45 insertions(+), 2 deletions(-) (limited to 'scripts') diff --git a/scripts/crypto/gen-hash-testvecs.py b/scripts/crypto/gen-hash-testvecs.py index ae2682882cd1..c1d0517140bd 100755 --- a/scripts/crypto/gen-hash-testvecs.py +++ b/scripts/crypto/gen-hash-testvecs.py @@ -1,7 +1,7 @@ #!/usr/bin/env python3 # SPDX-License-Identifier: GPL-2.0-or-later # -# Script that generates test vectors for the given cryptographic hash function. +# Script that generates test vectors for the given hash function. # # Copyright 2025 Google LLC @@ -50,11 +50,42 @@ class Poly1305: m = (self.h + self.s) % 2**128 return m.to_bytes(16, byteorder='little') +POLYVAL_POLY = sum((1 << i) for i in [128, 127, 126, 121, 0]) +POLYVAL_BLOCK_SIZE = 16 + +# A straightforward, unoptimized implementation of POLYVAL. +# Reference: https://datatracker.ietf.org/doc/html/rfc8452 +class Polyval: + def __init__(self, key): + assert len(key) == 16 + self.h = int.from_bytes(key, byteorder='little') + self.acc = 0 + + # Note: this supports partial blocks only at the end. + def update(self, data): + for i in range(0, len(data), 16): + # acc += block + self.acc ^= int.from_bytes(data[i:i+16], byteorder='little') + # acc = (acc * h * x^-128) mod POLYVAL_POLY + product = 0 + for j in range(128): + if (self.h & (1 << j)) != 0: + product ^= self.acc << j + if (product & (1 << j)) != 0: + product ^= POLYVAL_POLY << j + self.acc = product >> 128 + return self + + def digest(self): + return self.acc.to_bytes(16, byteorder='little') + def hash_init(alg): if alg == 'poly1305': # Use a fixed random key here, to present Poly1305 as an unkeyed hash. # This allows all the test cases for unkeyed hashes to work on Poly1305. return Poly1305(rand_bytes(POLY1305_KEY_SIZE)) + if alg == 'polyval': + return Polyval(rand_bytes(POLYVAL_BLOCK_SIZE)) return hashlib.new(alg) def hash_update(ctx, data): @@ -165,9 +196,18 @@ def gen_additional_poly1305_testvecs(): 'poly1305_allones_macofmacs[POLY1305_DIGEST_SIZE]', Poly1305(key).update(data).digest()) +def gen_additional_polyval_testvecs(): + key = b'\xff' * POLYVAL_BLOCK_SIZE + hashes = b'' + for data_len in range(0, 4097, 16): + hashes += Polyval(key).update(b'\xff' * data_len).digest() + print_static_u8_array_definition( + 'polyval_allones_hashofhashes[POLYVAL_DIGEST_SIZE]', + Polyval(key).update(hashes).digest()) + if len(sys.argv) != 2: sys.stderr.write('Usage: gen-hash-testvecs.py ALGORITHM\n') - sys.stderr.write('ALGORITHM may be any supported by Python hashlib, or poly1305 or sha3.\n') + sys.stderr.write('ALGORITHM may be any supported by Python hashlib; or poly1305, polyval, or sha3.\n') sys.stderr.write('Example: gen-hash-testvecs.py sha512\n') sys.exit(1) @@ -180,6 +220,9 @@ if alg.startswith('blake2'): elif alg == 'poly1305': gen_unkeyed_testvecs(alg) gen_additional_poly1305_testvecs() +elif alg == 'polyval': + gen_unkeyed_testvecs(alg) + gen_additional_polyval_testvecs() elif alg == 'sha3': print() print('/* SHA3-256 test vectors */') -- cgit v1.2.3 From fc387a0704cc86cf47de0e64236577af3e148ec2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Onur=20=C3=96zkan?= Date: Wed, 17 Sep 2025 20:37:24 +0300 Subject: checkpatch: detect unhandled placeholders in cover letters MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add a new check PLACEHOLDER_USE to detect unhandled placeholders. This prevents sending patch series with incomplete patches (mostly in cover letters) containing auto generated subject or blurb lines. These placeholders can be seen on mailing lists. With this change, checkpatch will emit an error when such text is found. Link: https://lkml.kernel.org/r/20250917173725.22547-2-work@onurozkan.dev Signed-off-by: Onur Özkan Acked-by: Joe Perches Cc: Andy Whitcroft Cc: Dwaipayan Ray Cc: Jonathan Corbet Cc: Lukas Bulwahn Signed-off-by: Andrew Morton --- scripts/checkpatch.pl | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'scripts') diff --git a/scripts/checkpatch.pl b/scripts/checkpatch.pl index 92669904eecc..6729f18e5654 100755 --- a/scripts/checkpatch.pl +++ b/scripts/checkpatch.pl @@ -3345,6 +3345,13 @@ sub process { } } +# Check for auto-generated unhandled placeholder text (mostly for cover letters) + if (($in_commit_log || $in_header_lines) && + $rawline =~ /(?:SUBJECT|BLURB) HERE/) { + ERROR("PLACEHOLDER_USE", + "Placeholder text detected\n" . $herecurr); + } + # Check for git id commit length and improperly formed commit descriptions # A correctly formed commit description is: # commit ("Complete commit subject") -- cgit v1.2.3 From 4ab2ee307983548b29ddaab0ecaef82d526cf4c9 Mon Sep 17 00:00:00 2001 From: Abel Vesa Date: Tue, 11 Nov 2025 08:43:51 +0200 Subject: kbuild: install-extmod-build: Properly fix CC expansion when ccache is used Currently, when cross-compiling and ccache is used, the expanding of CC turns out to be without any quotes, leading to the following error: make[4]: *** No rule to make target 'aarch64-linux-gnu-gcc'. Stop. make[3]: *** [Makefile:2164: run-command] Error 2 And it makes sense, because after expansion it ends up like this: make run-command KBUILD_RUN_COMMAND=+$(MAKE) \ HOSTCC=ccache aarch64-linux-gnu-gcc VPATH= srcroot=. $(build)= ... So add another set of double quotes to surround whatever CC expands to to make sure the aarch64-linux-gnu-gcc isn't expanded to something that looks like an entirely separate target. Fixes: 140332b6ed72 ("kbuild: fix linux-headers package build when $(CC) cannot link userspace") Signed-off-by: Abel Vesa Reviewed-by: Nicolas Schier Link: https://patch.msgid.link/20251111-kbuild-install-extmod-build-fix-cc-expand-third-try-v2-1-15ba1b37e71a@linaro.org Signed-off-by: Nathan Chancellor --- scripts/package/install-extmod-build | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'scripts') diff --git a/scripts/package/install-extmod-build b/scripts/package/install-extmod-build index 054fdf45cc37..2576cf7902db 100755 --- a/scripts/package/install-extmod-build +++ b/scripts/package/install-extmod-build @@ -63,7 +63,7 @@ if [ "${CC}" != "${HOSTCC}" ]; then # Clear VPATH and srcroot because the source files reside in the output # directory. # shellcheck disable=SC2016 # $(MAKE) and $(build) will be expanded by Make - "${MAKE}" run-command KBUILD_RUN_COMMAND='+$(MAKE) HOSTCC='"${CC}"' VPATH= srcroot=. $(build)='"$(realpath --relative-to=. "${destdir}")"/scripts + "${MAKE}" run-command KBUILD_RUN_COMMAND='+$(MAKE) HOSTCC="'"${CC}"'" VPATH= srcroot=. $(build)='"$(realpath --relative-to=. "${destdir}")"/scripts rm -f "${destdir}/scripts/Kbuild" fi -- cgit v1.2.3 From f64c7e113dc937e0987c83ef51a3cd52a2c277c7 Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Thu, 13 Nov 2025 10:53:43 +0100 Subject: scripts: docs: kdoc_files.py: don't consider symlinks as directories As reported by Randy, currently kdoc_files can go into endless looks when symlinks are used: $ ln -s . Documentation/peci/foo $ ./scripts/kernel-doc Documentation/peci/ ... File "/new_devel/docs/scripts/lib/kdoc/kdoc_files.py", line 52, in _parse_dir if entry.is_dir(): ~~~~~~~~~~~~^^ OSError: [Errno 40] Too many levels of symbolic links: 'Documentation/peci/foo/foo/foo/foo/foo/foo/foo/foo/foo/foo/foo/foo/foo/foo/foo/foo/foo/foo/foo/foo/foo/foo/foo/foo/foo/foo/foo/foo/foo/foo/foo/foo/foo/foo/foo/foo/foo/foo/foo/foo/foo' Prevent that by not considering symlinks as directories. Reported-by: Randy Dunlap Closes: https://lore.kernel.org/linux-doc/80701524-09fd-4d68-8715-331f47c969f2@infradead.org/ Signed-off-by: Mauro Carvalho Chehab Reviewed-by: Randy Dunlap Tested-by: Randy Dunlap Signed-off-by: Jonathan Corbet Message-ID: <73c3450f34e2a4b42ef2ef279d7487c47d22e3bd.1763027622.git.mchehab+huawei@kernel.org> --- scripts/lib/kdoc/kdoc_files.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'scripts') diff --git a/scripts/lib/kdoc/kdoc_files.py b/scripts/lib/kdoc/kdoc_files.py index 061c033f32da..1fd8d17edb32 100644 --- a/scripts/lib/kdoc/kdoc_files.py +++ b/scripts/lib/kdoc/kdoc_files.py @@ -49,7 +49,7 @@ class GlobSourceFiles: for entry in obj: name = os.path.join(dirname, entry.name) - if entry.is_dir(): + if entry.is_dir(follow_symlinks=False): yield from self._parse_dir(name) if not entry.is_file(): -- cgit v1.2.3 From d81d9d389b9b73acd68f300c8889c7fa1acd4977 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thomas=20Wei=C3=9Fschuh?= Date: Fri, 14 Nov 2025 14:43:56 +0100 Subject: kbuild: don't enable CC_CAN_LINK if the dummy program generates warnings MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit It is possible that the kernel toolchain generates warnings when used together with the system toolchain. This happens for example when the older kernel toolchain does not handle new versions of sframe debug information. While these warnings where ignored during the evaluation of CC_CAN_LINK, together with CONFIG_WERROR the actual userprog build will later fail. Example warning: .../x86_64-linux/13.2.0/../../../../x86_64-linux/bin/ld: error in /lib/../lib64/crt1.o(.sframe); no .sframe will be created collect2: error: ld returned 1 exit status Make sure that the very simple example program does not generate warnings already to avoid breaking the userprog compilations. Fixes: ec4a3992bc0b ("kbuild: respect CONFIG_WERROR for linker and assembler") Fixes: 3f0ff4cc6ffb ("kbuild: respect CONFIG_WERROR for userprogs") Signed-off-by: Thomas Weißschuh Reviewed-by: Nicolas Schier Reviewed-by: Nathan Chancellor Link: https://patch.msgid.link/20251114-kbuild-userprogs-bits-v3-1-4dee0d74d439@linutronix.de Signed-off-by: Nicolas Schier --- scripts/cc-can-link.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'scripts') diff --git a/scripts/cc-can-link.sh b/scripts/cc-can-link.sh index 6efcead31989..e67fd8d7b684 100755 --- a/scripts/cc-can-link.sh +++ b/scripts/cc-can-link.sh @@ -1,7 +1,7 @@ #!/bin/sh # SPDX-License-Identifier: GPL-2.0 -cat << "END" | $@ -x c - -o /dev/null >/dev/null 2>&1 +cat << "END" | $@ -Werror -Wl,--fatal-warnings -x c - -o /dev/null >/dev/null 2>&1 #include int main(void) { -- cgit v1.2.3 From 80623f2c83d7de5c289d4240b8f4cef4103c51fc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thomas=20Wei=C3=9Fschuh?= Date: Fri, 14 Nov 2025 14:43:57 +0100 Subject: init: deduplicate cc-can-link.sh invocations MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The command to invoke scripts/cc-can-link.sh is very long and new usages are about to be added. Add a helper variable to make the code easier to read and maintain. Signed-off-by: Thomas Weißschuh Reviewed-by: Nicolas Schier Reviewed-by: Nathan Chancellor Link: https://patch.msgid.link/20251114-kbuild-userprogs-bits-v3-2-4dee0d74d439@linutronix.de Signed-off-by: Nicolas Schier --- scripts/Kconfig.include | 3 +++ 1 file changed, 3 insertions(+) (limited to 'scripts') diff --git a/scripts/Kconfig.include b/scripts/Kconfig.include index 33193ca6e803..d42042b6c9e2 100644 --- a/scripts/Kconfig.include +++ b/scripts/Kconfig.include @@ -65,6 +65,9 @@ cc-option-bit = $(if-success,$(CC) -Werror $(1) -E -x c /dev/null -o /dev/null,$ m32-flag := $(cc-option-bit,-m32) m64-flag := $(cc-option-bit,-m64) +# Test whether the compiler can link userspace applications +cc_can_link_user = $(success,$(srctree)/scripts/cc-can-link.sh $(CC) $(CLANG_FLAGS) $(USERCFLAGS) $(USERLDFLAGS) $(1)) + rustc-version := $(shell,$(srctree)/scripts/rustc-version.sh $(RUSTC)) rustc-llvm-version := $(shell,$(srctree)/scripts/rustc-llvm-version.sh $(RUSTC)) -- cgit v1.2.3 From ab844cf32058d6ba9bd98a921afa2256085e21fc Mon Sep 17 00:00:00 2001 From: Miguel Ojeda Date: Mon, 10 Nov 2025 12:35:27 +0100 Subject: rust: allow `unreachable_pub` for doctests Examples (i.e. doctests) may want to show public items such as structs, thus the `unreachable_pub` warning is not very helpful. Thus allow it for all doctests. In addition, remove it from the existing `expect`s we have in a couple doctests. Suggested-by: Alice Ryhl Link: https://lore.kernel.org/rust-for-linux/aRG9VjsaCjsvAwUn@google.com/ Reviewed-by: David Gow Acked-by: Benno Lossin Reviewed-by: Alice Ryhl Link: https://patch.msgid.link/20251110113528.1658238-1-ojeda@kernel.org Signed-off-by: Miguel Ojeda --- scripts/rustdoc_test_gen.rs | 1 + 1 file changed, 1 insertion(+) (limited to 'scripts') diff --git a/scripts/rustdoc_test_gen.rs b/scripts/rustdoc_test_gen.rs index c8f9dc2ab976..0e6a0542d1bd 100644 --- a/scripts/rustdoc_test_gen.rs +++ b/scripts/rustdoc_test_gen.rs @@ -208,6 +208,7 @@ pub extern "C" fn {kunit_name}(__kunit_test: *mut ::kernel::bindings::kunit) {{ #[allow(unused)] static __DOCTEST_ANCHOR: i32 = ::core::line!() as i32 + {body_offset} + 1; {{ + #![allow(unreachable_pub)] {body} main(); }} -- cgit v1.2.3 From d8c8a575f5aa7afc7cc7718b779c29ae5f7abc58 Mon Sep 17 00:00:00 2001 From: "Rob Herring (Arm)" Date: Fri, 14 Nov 2025 16:25:37 -0600 Subject: kbuild: Ensure .dtbo targets are applied to a base .dtb It is a requirement that DT overlays in the kernel are applied at build time to a base DTB in order to validate they can be applied and to validate them against the DT schemas. DT overlays on their own may be incomplete and can't be validated. Add a kbuild check so this doesn't have to be checked and fixed periodically. Signed-off-by: Rob Herring (Arm) --- scripts/Makefile.dtbs | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'scripts') diff --git a/scripts/Makefile.dtbs b/scripts/Makefile.dtbs index 2d321b813600..e092b460d5a1 100644 --- a/scripts/Makefile.dtbs +++ b/scripts/Makefile.dtbs @@ -1,5 +1,7 @@ # SPDX-License-Identifier: GPL-2.0-only +all-dtb := $(dtb-y) $(dtb-) + # If CONFIG_OF_ALL_DTBS is enabled, all DT blobs are built dtb-$(CONFIG_OF_ALL_DTBS) += $(dtb-) @@ -10,6 +12,13 @@ real-dtb-y := $(call real-search, $(dtb-y), .dtb, -dtbs) # Base DTB that overlay is applied onto base-dtb-y := $(filter %.dtb, $(call real-search, $(multi-dtb-y), .dtb, -dtbs)) +# Ensure that any .dtbo is applied to at least one base .dtb. Otherwise, it +# does not get validated. +applied-dtbo := $(filter %.dtbo, \ + $(call real-search, $(call multi-search, $(all-dtb), .dtb, -dtbs), .dtb, -dtbs)) +unapplied-dtbo := $(filter-out $(applied-dtbo),$(filter %.dtbo, $(dtb-y))) +$(if $(unapplied-dtbo), $(warning .dtbo is not applied to any base: $(unapplied-dtbo))) + dtb-y := $(addprefix $(obj)/, $(dtb-y)) multi-dtb-y := $(addprefix $(obj)/, $(multi-dtb-y)) real-dtb-y := $(addprefix $(obj)/, $(real-dtb-y)) -- cgit v1.2.3 From e5d330e13f67d574f683c052c9a342814fd8fa39 Mon Sep 17 00:00:00 2001 From: Miguel Ojeda Date: Mon, 17 Nov 2025 09:07:13 +0100 Subject: rust: allow `clippy::disallowed_names` for doctests Examples (i.e. doctests) may want to use names such as `foo`, thus the `clippy::disallowed_names` lint [1] gets in the way. Thus allow it for all doctests. In addition, remove it from the existing `expect`s we have in a few doctests. This does not mean that we should stop trying to find good names for our examples, though. Link: https://rust-lang.github.io/rust-clippy/stable/index.html#disallowed_names [1] Suggested-by: Alice Ryhl Link: https://lore.kernel.org/rust-for-linux/aRHSLChi5HYXW4-9@google.com/ Reviewed-by: Alice Ryhl Acked-by: Benno Lossin Link: https://patch.msgid.link/20251117080714.876978-1-ojeda@kernel.org Signed-off-by: Miguel Ojeda --- scripts/rustdoc_test_gen.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'scripts') diff --git a/scripts/rustdoc_test_gen.rs b/scripts/rustdoc_test_gen.rs index 0e6a0542d1bd..be0561049660 100644 --- a/scripts/rustdoc_test_gen.rs +++ b/scripts/rustdoc_test_gen.rs @@ -208,7 +208,7 @@ pub extern "C" fn {kunit_name}(__kunit_test: *mut ::kernel::bindings::kunit) {{ #[allow(unused)] static __DOCTEST_ANCHOR: i32 = ::core::line!() as i32 + {body_offset} + 1; {{ - #![allow(unreachable_pub)] + #![allow(unreachable_pub, clippy::disallowed_names)] {body} main(); }} -- cgit v1.2.3 From 2092007aa32f8dd968c38751bd1b7cac9b1f738d Mon Sep 17 00:00:00 2001 From: Josh Poimboeuf Date: Wed, 12 Nov 2025 15:32:34 -0800 Subject: objtool/klp: Only enable --checksum when needed With CONFIG_KLP_BUILD enabled, checksums are only needed during a klp-build run. There's no need to enable them for normal kernel builds. This also has the benefit of softening the xxhash dependency. Signed-off-by: Josh Poimboeuf Signed-off-by: Peter Zijlstra (Intel) Tested-by: Michael Kelley Link: https://patch.msgid.link/edbb1ca215e4926e02edb493b68b9d6d063e902f.1762990139.git.jpoimboe@kernel.org --- scripts/Makefile.lib | 1 - scripts/livepatch/klp-build | 4 ++++ 2 files changed, 4 insertions(+), 1 deletion(-) (limited to 'scripts') diff --git a/scripts/Makefile.lib b/scripts/Makefile.lib index f4b33919ec37..28a1c08e3b22 100644 --- a/scripts/Makefile.lib +++ b/scripts/Makefile.lib @@ -173,7 +173,6 @@ ifdef CONFIG_OBJTOOL objtool := $(objtree)/tools/objtool/objtool -objtool-args-$(CONFIG_KLP_BUILD) += --checksum objtool-args-$(CONFIG_HAVE_JUMP_LABEL_HACK) += --hacks=jump_label objtool-args-$(CONFIG_HAVE_NOINSTR_HACK) += --hacks=noinstr objtool-args-$(CONFIG_MITIGATION_CALL_DEPTH_TRACKING) += --hacks=skylake diff --git a/scripts/livepatch/klp-build b/scripts/livepatch/klp-build index 881e052e7fae..882272120c9e 100755 --- a/scripts/livepatch/klp-build +++ b/scripts/livepatch/klp-build @@ -489,8 +489,11 @@ clean_kernel() { build_kernel() { local log="$TMP_DIR/build.log" + local objtool_args=() local cmd=() + objtool_args=("--checksum") + cmd=("make") # When a patch to a kernel module references a newly created unexported @@ -513,6 +516,7 @@ build_kernel() { cmd+=("$VERBOSE") cmd+=("-j$JOBS") cmd+=("KCFLAGS=-ffunction-sections -fdata-sections") + cmd+=("OBJTOOL_ARGS=${objtool_args[*]}") cmd+=("vmlinux") cmd+=("modules") -- cgit v1.2.3 From 778b8ebe5192e7a7f00563a7456517dfa63e1d90 Mon Sep 17 00:00:00 2001 From: Jonathan Corbet Date: Mon, 10 Nov 2025 15:04:29 -0700 Subject: docs: Move the python libraries to tools/lib/python "scripts/lib" was always a bit of an awkward place for Python modules. We already have tools/lib; create a tools/lib/python, move the libraries there, and update the users accordingly. While at it, move the contents of tools/docs/lib. Rather than make another directory, just put these documentation-oriented modules under "kdoc". Signed-off-by: Jonathan Corbet Message-ID: <20251110220430.726665-2-corbet@lwn.net> --- scripts/jobserver-exec | 2 +- scripts/kernel-doc.py | 2 +- scripts/lib/abi/abi_parser.py | 628 -------------- scripts/lib/abi/abi_regex.py | 234 ------ scripts/lib/abi/helpers.py | 38 - scripts/lib/abi/system_symbols.py | 378 --------- scripts/lib/jobserver.py | 149 ---- scripts/lib/kdoc/kdoc_files.py | 294 ------- scripts/lib/kdoc/kdoc_item.py | 43 - scripts/lib/kdoc/kdoc_output.py | 824 ------------------ scripts/lib/kdoc/kdoc_parser.py | 1667 ------------------------------------- scripts/lib/kdoc/kdoc_re.py | 270 ------ 12 files changed, 2 insertions(+), 4527 deletions(-) delete mode 100644 scripts/lib/abi/abi_parser.py delete mode 100644 scripts/lib/abi/abi_regex.py delete mode 100644 scripts/lib/abi/helpers.py delete mode 100644 scripts/lib/abi/system_symbols.py delete mode 100755 scripts/lib/jobserver.py delete mode 100644 scripts/lib/kdoc/kdoc_files.py delete mode 100644 scripts/lib/kdoc/kdoc_item.py delete mode 100644 scripts/lib/kdoc/kdoc_output.py delete mode 100644 scripts/lib/kdoc/kdoc_parser.py delete mode 100644 scripts/lib/kdoc/kdoc_re.py (limited to 'scripts') diff --git a/scripts/jobserver-exec b/scripts/jobserver-exec index ae23afd344ec..758e947a6fb9 100755 --- a/scripts/jobserver-exec +++ b/scripts/jobserver-exec @@ -13,7 +13,7 @@ See: import os import sys -LIB_DIR = "lib" +LIB_DIR = "../tools/lib/python" SRC_DIR = os.path.dirname(os.path.realpath(__file__)) sys.path.insert(0, os.path.join(SRC_DIR, LIB_DIR)) diff --git a/scripts/kernel-doc.py b/scripts/kernel-doc.py index d9fe2bcbd39c..bb24bbf73167 100755 --- a/scripts/kernel-doc.py +++ b/scripts/kernel-doc.py @@ -111,7 +111,7 @@ import sys # Import Python modules -LIB_DIR = "lib/kdoc" +LIB_DIR = "../tools/lib/python/kdoc" SRC_DIR = os.path.dirname(os.path.realpath(__file__)) sys.path.insert(0, os.path.join(SRC_DIR, LIB_DIR)) diff --git a/scripts/lib/abi/abi_parser.py b/scripts/lib/abi/abi_parser.py deleted file mode 100644 index 66a738013ce1..000000000000 --- a/scripts/lib/abi/abi_parser.py +++ /dev/null @@ -1,628 +0,0 @@ -#!/usr/bin/env python3 -# pylint: disable=R0902,R0903,R0911,R0912,R0913,R0914,R0915,R0917,C0302 -# Copyright(c) 2025: Mauro Carvalho Chehab . -# SPDX-License-Identifier: GPL-2.0 - -""" -Parse ABI documentation and produce results from it. -""" - -from argparse import Namespace -import logging -import os -import re - -from pprint import pformat -from random import randrange, seed - -# Import Python modules - -from helpers import AbiDebug, ABI_DIR - - -class AbiParser: - """Main class to parse ABI files""" - - TAGS = r"(what|where|date|kernelversion|contact|description|users)" - XREF = r"(?:^|\s|\()(\/(?:sys|config|proc|dev|kvd)\/[^,.:;\)\s]+)(?:[,.:;\)\s]|\Z)" - - def __init__(self, directory, logger=None, - enable_lineno=False, show_warnings=True, debug=0): - """Stores arguments for the class and initialize class vars""" - - self.directory = directory - self.enable_lineno = enable_lineno - self.show_warnings = show_warnings - self.debug = debug - - if not logger: - self.log = logging.getLogger("get_abi") - else: - self.log = logger - - self.data = {} - self.what_symbols = {} - self.file_refs = {} - self.what_refs = {} - - # Ignore files that contain such suffixes - self.ignore_suffixes = (".rej", ".org", ".orig", ".bak", "~") - - # Regular expressions used on parser - self.re_abi_dir = re.compile(r"(.*)" + ABI_DIR) - self.re_tag = re.compile(r"(\S+)(:\s*)(.*)", re.I) - self.re_valid = re.compile(self.TAGS) - self.re_start_spc = re.compile(r"(\s*)(\S.*)") - self.re_whitespace = re.compile(r"^\s+") - - # Regular used on print - self.re_what = re.compile(r"(\/?(?:[\w\-]+\/?){1,2})") - self.re_escape = re.compile(r"([\.\x01-\x08\x0e-\x1f\x21-\x2f\x3a-\x40\x7b-\xff])") - self.re_unprintable = re.compile(r"([\x00-\x2f\x3a-\x40\x5b-\x60\x7b-\xff]+)") - self.re_title_mark = re.compile(r"\n[\-\*\=\^\~]+\n") - self.re_doc = re.compile(r"Documentation/(?!devicetree)(\S+)\.rst") - self.re_abi = re.compile(r"(Documentation/ABI/)([\w\/\-]+)") - self.re_xref_node = re.compile(self.XREF) - - def warn(self, fdata, msg, extra=None): - """Displays a parse error if warning is enabled""" - - if not self.show_warnings: - return - - msg = f"{fdata.fname}:{fdata.ln}: {msg}" - if extra: - msg += "\n\t\t" + extra - - self.log.warning(msg) - - def add_symbol(self, what, fname, ln=None, xref=None): - """Create a reference table describing where each 'what' is located""" - - if what not in self.what_symbols: - self.what_symbols[what] = {"file": {}} - - if fname not in self.what_symbols[what]["file"]: - self.what_symbols[what]["file"][fname] = [] - - if ln and ln not in self.what_symbols[what]["file"][fname]: - self.what_symbols[what]["file"][fname].append(ln) - - if xref: - self.what_symbols[what]["xref"] = xref - - def _parse_line(self, fdata, line): - """Parse a single line of an ABI file""" - - new_what = False - new_tag = False - content = None - - match = self.re_tag.match(line) - if match: - new = match.group(1).lower() - sep = match.group(2) - content = match.group(3) - - match = self.re_valid.search(new) - if match: - new_tag = match.group(1) - else: - if fdata.tag == "description": - # New "tag" is actually part of description. - # Don't consider it a tag - new_tag = False - elif fdata.tag != "": - self.warn(fdata, f"tag '{fdata.tag}' is invalid", line) - - if new_tag: - # "where" is Invalid, but was a common mistake. Warn if found - if new_tag == "where": - self.warn(fdata, "tag 'Where' is invalid. Should be 'What:' instead") - new_tag = "what" - - if new_tag == "what": - fdata.space = None - - if content not in self.what_symbols: - self.add_symbol(what=content, fname=fdata.fname, ln=fdata.ln) - - if fdata.tag == "what": - fdata.what.append(content.strip("\n")) - else: - if fdata.key: - if "description" not in self.data.get(fdata.key, {}): - self.warn(fdata, f"{fdata.key} doesn't have a description") - - for w in fdata.what: - self.add_symbol(what=w, fname=fdata.fname, - ln=fdata.what_ln, xref=fdata.key) - - fdata.label = content - new_what = True - - key = "abi_" + content.lower() - fdata.key = self.re_unprintable.sub("_", key).strip("_") - - # Avoid duplicated keys but using a defined seed, to make - # the namespace identical if there aren't changes at the - # ABI symbols - seed(42) - - while fdata.key in self.data: - char = randrange(0, 51) + ord("A") - if char > ord("Z"): - char += ord("a") - ord("Z") - 1 - - fdata.key += chr(char) - - if fdata.key and fdata.key not in self.data: - self.data[fdata.key] = { - "what": [content], - "file": [fdata.file_ref], - "path": fdata.ftype, - "line_no": fdata.ln, - } - - fdata.what = self.data[fdata.key]["what"] - - self.what_refs[content] = fdata.key - fdata.tag = new_tag - fdata.what_ln = fdata.ln - - if fdata.nametag["what"]: - t = (content, fdata.key) - if t not in fdata.nametag["symbols"]: - fdata.nametag["symbols"].append(t) - - return - - if fdata.tag and new_tag: - fdata.tag = new_tag - - if new_what: - fdata.label = "" - - if "description" in self.data[fdata.key]: - self.data[fdata.key]["description"] += "\n\n" - - if fdata.file_ref not in self.data[fdata.key]["file"]: - self.data[fdata.key]["file"].append(fdata.file_ref) - - if self.debug == AbiDebug.WHAT_PARSING: - self.log.debug("what: %s", fdata.what) - - if not fdata.what: - self.warn(fdata, "'What:' should come first:", line) - return - - if new_tag == "description": - fdata.space = None - - if content: - sep = sep.replace(":", " ") - - c = " " * len(new_tag) + sep + content - c = c.expandtabs() - - match = self.re_start_spc.match(c) - if match: - # Preserve initial spaces for the first line - fdata.space = match.group(1) - content = match.group(2) + "\n" - - self.data[fdata.key][fdata.tag] = content - - return - - # Store any contents before tags at the database - if not fdata.tag and "what" in fdata.nametag: - fdata.nametag["description"] += line - return - - if fdata.tag == "description": - content = line.expandtabs() - - if self.re_whitespace.sub("", content) == "": - self.data[fdata.key][fdata.tag] += "\n" - return - - if fdata.space is None: - match = self.re_start_spc.match(content) - if match: - # Preserve initial spaces for the first line - fdata.space = match.group(1) - - content = match.group(2) + "\n" - else: - if content.startswith(fdata.space): - content = content[len(fdata.space):] - - else: - fdata.space = "" - - if fdata.tag == "what": - w = content.strip("\n") - if w: - self.data[fdata.key][fdata.tag].append(w) - else: - self.data[fdata.key][fdata.tag] += content - return - - content = line.strip() - if fdata.tag: - if fdata.tag == "what": - w = content.strip("\n") - if w: - self.data[fdata.key][fdata.tag].append(w) - else: - self.data[fdata.key][fdata.tag] += "\n" + content.rstrip("\n") - return - - # Everything else is error - if content: - self.warn(fdata, "Unexpected content", line) - - def parse_readme(self, nametag, fname): - """Parse ABI README file""" - - nametag["what"] = ["Introduction"] - nametag["path"] = "README" - with open(fname, "r", encoding="utf8", errors="backslashreplace") as fp: - for line in fp: - match = self.re_tag.match(line) - if match: - new = match.group(1).lower() - - match = self.re_valid.search(new) - if match: - nametag["description"] += "\n:" + line - continue - - nametag["description"] += line - - def parse_file(self, fname, path, basename): - """Parse a single file""" - - ref = f"abi_file_{path}_{basename}" - ref = self.re_unprintable.sub("_", ref).strip("_") - - # Store per-file state into a namespace variable. This will be used - # by the per-line parser state machine and by the warning function. - fdata = Namespace - - fdata.fname = fname - fdata.name = basename - - pos = fname.find(ABI_DIR) - if pos > 0: - f = fname[pos:] - else: - f = fname - - fdata.file_ref = (f, ref) - self.file_refs[f] = ref - - fdata.ln = 0 - fdata.what_ln = 0 - fdata.tag = "" - fdata.label = "" - fdata.what = [] - fdata.key = None - fdata.xrefs = None - fdata.space = None - fdata.ftype = path.split("/")[0] - - fdata.nametag = {} - fdata.nametag["what"] = [f"ABI file {path}/{basename}"] - fdata.nametag["type"] = "File" - fdata.nametag["path"] = fdata.ftype - fdata.nametag["file"] = [fdata.file_ref] - fdata.nametag["line_no"] = 1 - fdata.nametag["description"] = "" - fdata.nametag["symbols"] = [] - - self.data[ref] = fdata.nametag - - if self.debug & AbiDebug.WHAT_OPEN: - self.log.debug("Opening file %s", fname) - - if basename == "README": - self.parse_readme(fdata.nametag, fname) - return - - with open(fname, "r", encoding="utf8", errors="backslashreplace") as fp: - for line in fp: - fdata.ln += 1 - - self._parse_line(fdata, line) - - if "description" in fdata.nametag: - fdata.nametag["description"] = fdata.nametag["description"].lstrip("\n") - - if fdata.key: - if "description" not in self.data.get(fdata.key, {}): - self.warn(fdata, f"{fdata.key} doesn't have a description") - - for w in fdata.what: - self.add_symbol(what=w, fname=fname, xref=fdata.key) - - def _parse_abi(self, root=None): - """Internal function to parse documentation ABI recursively""" - - if not root: - root = self.directory - - with os.scandir(root) as obj: - for entry in obj: - name = os.path.join(root, entry.name) - - if entry.is_dir(): - self._parse_abi(name) - continue - - if not entry.is_file(): - continue - - basename = os.path.basename(name) - - if basename.startswith("."): - continue - - if basename.endswith(self.ignore_suffixes): - continue - - path = self.re_abi_dir.sub("", os.path.dirname(name)) - - self.parse_file(name, path, basename) - - def parse_abi(self, root=None): - """Parse documentation ABI""" - - self._parse_abi(root) - - if self.debug & AbiDebug.DUMP_ABI_STRUCTS: - self.log.debug(pformat(self.data)) - - def desc_txt(self, desc): - """Print description as found inside ABI files""" - - desc = desc.strip(" \t\n") - - return desc + "\n\n" - - def xref(self, fname): - """ - Converts a Documentation/ABI + basename into a ReST cross-reference - """ - - xref = self.file_refs.get(fname) - if not xref: - return None - else: - return xref - - def desc_rst(self, desc): - """Enrich ReST output by creating cross-references""" - - # Remove title markups from the description - # Having titles inside ABI files will only work if extra - # care would be taken in order to strictly follow the same - # level order for each markup. - desc = self.re_title_mark.sub("\n\n", "\n" + desc) - desc = desc.rstrip(" \t\n").lstrip("\n") - - # Python's regex performance for non-compiled expressions is a lot - # than Perl, as Perl automatically caches them at their - # first usage. Here, we'll need to do the same, as otherwise the - # performance penalty is be high - - new_desc = "" - for d in desc.split("\n"): - if d == "": - new_desc += "\n" - continue - - # Use cross-references for doc files where needed - d = self.re_doc.sub(r":doc:`/\1`", d) - - # Use cross-references for ABI generated docs where needed - matches = self.re_abi.findall(d) - for m in matches: - abi = m[0] + m[1] - - xref = self.file_refs.get(abi) - if not xref: - # This may happen if ABI is on a separate directory, - # like parsing ABI testing and symbol is at stable. - # The proper solution is to move this part of the code - # for it to be inside sphinx/kernel_abi.py - self.log.info("Didn't find ABI reference for '%s'", abi) - else: - new = self.re_escape.sub(r"\\\1", m[1]) - d = re.sub(fr"\b{abi}\b", f":ref:`{new} <{xref}>`", d) - - # Seek for cross reference symbols like /sys/... - # Need to be careful to avoid doing it on a code block - if d[0] not in [" ", "\t"]: - matches = self.re_xref_node.findall(d) - for m in matches: - # Finding ABI here is more complex due to wildcards - xref = self.what_refs.get(m) - if xref: - new = self.re_escape.sub(r"\\\1", m) - d = re.sub(fr"\b{m}\b", f":ref:`{new} <{xref}>`", d) - - new_desc += d + "\n" - - return new_desc + "\n\n" - - def doc(self, output_in_txt=False, show_symbols=True, show_file=True, - filter_path=None): - """Print ABI at stdout""" - - part = None - for key, v in sorted(self.data.items(), - key=lambda x: (x[1].get("type", ""), - x[1].get("what"))): - - wtype = v.get("type", "Symbol") - file_ref = v.get("file") - names = v.get("what", [""]) - - if wtype == "File": - if not show_file: - continue - else: - if not show_symbols: - continue - - if filter_path: - if v.get("path") != filter_path: - continue - - msg = "" - - if wtype != "File": - cur_part = names[0] - if cur_part.find("/") >= 0: - match = self.re_what.match(cur_part) - if match: - symbol = match.group(1).rstrip("/") - cur_part = "Symbols under " + symbol - - if cur_part and cur_part != part: - part = cur_part - msg += part + "\n"+ "-" * len(part) +"\n\n" - - msg += f".. _{key}:\n\n" - - max_len = 0 - for i in range(0, len(names)): # pylint: disable=C0200 - names[i] = "**" + self.re_escape.sub(r"\\\1", names[i]) + "**" - - max_len = max(max_len, len(names[i])) - - msg += "+-" + "-" * max_len + "-+\n" - for name in names: - msg += f"| {name}" + " " * (max_len - len(name)) + " |\n" - msg += "+-" + "-" * max_len + "-+\n" - msg += "\n" - - for ref in file_ref: - if wtype == "File": - msg += f".. _{ref[1]}:\n\n" - else: - base = os.path.basename(ref[0]) - msg += f"Defined on file :ref:`{base} <{ref[1]}>`\n\n" - - if wtype == "File": - msg += names[0] +"\n" + "-" * len(names[0]) +"\n\n" - - desc = v.get("description") - if not desc and wtype != "File": - msg += f"DESCRIPTION MISSING for {names[0]}\n\n" - - if desc: - if output_in_txt: - msg += self.desc_txt(desc) - else: - msg += self.desc_rst(desc) - - symbols = v.get("symbols") - if symbols: - msg += "Has the following ABI:\n\n" - - for w, label in symbols: - # Escape special chars from content - content = self.re_escape.sub(r"\\\1", w) - - msg += f"- :ref:`{content} <{label}>`\n\n" - - users = v.get("users") - if users and users.strip(" \t\n"): - users = users.strip("\n").replace('\n', '\n\t') - msg += f"Users:\n\t{users}\n\n" - - ln = v.get("line_no", 1) - - yield (msg, file_ref[0][0], ln) - - def check_issues(self): - """Warn about duplicated ABI entries""" - - for what, v in self.what_symbols.items(): - files = v.get("file") - if not files: - # Should never happen if the parser works properly - self.log.warning("%s doesn't have a file associated", what) - continue - - if len(files) == 1: - continue - - f = [] - for fname, lines in sorted(files.items()): - if not lines: - f.append(f"{fname}") - elif len(lines) == 1: - f.append(f"{fname}:{lines[0]}") - else: - m = fname + "lines " - m += ", ".join(str(x) for x in lines) - f.append(m) - - self.log.warning("%s is defined %d times: %s", what, len(f), "; ".join(f)) - - def search_symbols(self, expr): - """ Searches for ABI symbols """ - - regex = re.compile(expr, re.I) - - found_keys = 0 - for t in sorted(self.data.items(), key=lambda x: [0]): - v = t[1] - - wtype = v.get("type", "") - if wtype == "File": - continue - - for what in v.get("what", [""]): - if regex.search(what): - found_keys += 1 - - kernelversion = v.get("kernelversion", "").strip(" \t\n") - date = v.get("date", "").strip(" \t\n") - contact = v.get("contact", "").strip(" \t\n") - users = v.get("users", "").strip(" \t\n") - desc = v.get("description", "").strip(" \t\n") - - files = [] - for f in v.get("file", ()): - files.append(f[0]) - - what = str(found_keys) + ". " + what - title_tag = "-" * len(what) - - print(f"\n{what}\n{title_tag}\n") - - if kernelversion: - print(f"Kernel version:\t\t{kernelversion}") - - if date: - print(f"Date:\t\t\t{date}") - - if contact: - print(f"Contact:\t\t{contact}") - - if users: - print(f"Users:\t\t\t{users}") - - print("Defined on file(s):\t" + ", ".join(files)) - - if desc: - desc = desc.strip("\n") - print(f"\n{desc}\n") - - if not found_keys: - print(f"Regular expression /{expr}/ not found.") diff --git a/scripts/lib/abi/abi_regex.py b/scripts/lib/abi/abi_regex.py deleted file mode 100644 index 8a57846cbc69..000000000000 --- a/scripts/lib/abi/abi_regex.py +++ /dev/null @@ -1,234 +0,0 @@ -#!/usr/bin/env python3 -# xxpylint: disable=R0903 -# Copyright(c) 2025: Mauro Carvalho Chehab . -# SPDX-License-Identifier: GPL-2.0 - -""" -Convert ABI what into regular expressions -""" - -import re -import sys - -from pprint import pformat - -from abi_parser import AbiParser -from helpers import AbiDebug - -class AbiRegex(AbiParser): - """Extends AbiParser to search ABI nodes with regular expressions""" - - # Escape only ASCII visible characters - escape_symbols = r"([\x21-\x29\x2b-\x2d\x3a-\x40\x5c\x60\x7b-\x7e])" - leave_others = "others" - - # Tuples with regular expressions to be compiled and replacement data - re_whats = [ - # Drop escape characters that might exist - (re.compile("\\\\"), ""), - - # Temporarily escape dot characters - (re.compile(r"\."), "\xf6"), - - # Temporarily change [0-9]+ type of patterns - (re.compile(r"\[0\-9\]\+"), "\xff"), - - # Temporarily change [\d+-\d+] type of patterns - (re.compile(r"\[0\-\d+\]"), "\xff"), - (re.compile(r"\[0:\d+\]"), "\xff"), - (re.compile(r"\[(\d+)\]"), "\xf4\\\\d+\xf5"), - - # Temporarily change [0-9] type of patterns - (re.compile(r"\[(\d)\-(\d)\]"), "\xf4\1-\2\xf5"), - - # Handle multiple option patterns - (re.compile(r"[\{\<\[]([\w_]+)(?:[,|]+([\w_]+)){1,}[\}\>\]]"), r"(\1|\2)"), - - # Handle wildcards - (re.compile(r"([^\/])\*"), "\\1\\\\w\xf7"), - (re.compile(r"/\*/"), "/.*/"), - (re.compile(r"/\xf6\xf6\xf6"), "/.*"), - (re.compile(r"\<[^\>]+\>"), "\\\\w\xf7"), - (re.compile(r"\{[^\}]+\}"), "\\\\w\xf7"), - (re.compile(r"\[[^\]]+\]"), "\\\\w\xf7"), - - (re.compile(r"XX+"), "\\\\w\xf7"), - (re.compile(r"([^A-Z])[XYZ]([^A-Z])"), "\\1\\\\w\xf7\\2"), - (re.compile(r"([^A-Z])[XYZ]$"), "\\1\\\\w\xf7"), - (re.compile(r"_[AB]_"), "_\\\\w\xf7_"), - - # Recover [0-9] type of patterns - (re.compile(r"\xf4"), "["), - (re.compile(r"\xf5"), "]"), - - # Remove duplicated spaces - (re.compile(r"\s+"), r" "), - - # Special case: drop comparison as in: - # What: foo = - # (this happens on a few IIO definitions) - (re.compile(r"\s*\=.*$"), ""), - - # Escape all other symbols - (re.compile(escape_symbols), r"\\\1"), - (re.compile(r"\\\\"), r"\\"), - (re.compile(r"\\([\[\]\(\)\|])"), r"\1"), - (re.compile(r"(\d+)\\(-\d+)"), r"\1\2"), - - (re.compile(r"\xff"), r"\\d+"), - - # Special case: IIO ABI which a parenthesis. - (re.compile(r"sqrt(.*)"), r"sqrt(.*)"), - - # Simplify regexes with multiple .* - (re.compile(r"(?:\.\*){2,}"), ""), - - # Recover dot characters - (re.compile(r"\xf6"), "\\."), - # Recover plus characters - (re.compile(r"\xf7"), "+"), - ] - re_has_num = re.compile(r"\\d") - - # Symbol name after escape_chars that are considered a devnode basename - re_symbol_name = re.compile(r"(\w|\\[\.\-\:])+$") - - # List of popular group names to be skipped to minimize regex group size - # Use AbiDebug.SUBGROUP_SIZE to detect those - skip_names = set(["devices", "hwmon"]) - - def regex_append(self, what, new): - """ - Get a search group for a subset of regular expressions. - - As ABI may have thousands of symbols, using a for to search all - regular expressions is at least O(n^2). When there are wildcards, - the complexity increases substantially, eventually becoming exponential. - - To avoid spending too much time on them, use a logic to split - them into groups. The smaller the group, the better, as it would - mean that searches will be confined to a small number of regular - expressions. - - The conversion to a regex subset is tricky, as we need something - that can be easily obtained from the sysfs symbol and from the - regular expression. So, we need to discard nodes that have - wildcards. - - If it can't obtain a subgroup, place the regular expression inside - a special group (self.leave_others). - """ - - search_group = None - - for search_group in reversed(new.split("/")): - if not search_group or search_group in self.skip_names: - continue - if self.re_symbol_name.match(search_group): - break - - if not search_group: - search_group = self.leave_others - - if self.debug & AbiDebug.SUBGROUP_MAP: - self.log.debug("%s: mapped as %s", what, search_group) - - try: - if search_group not in self.regex_group: - self.regex_group[search_group] = [] - - self.regex_group[search_group].append(re.compile(new)) - if self.search_string: - if what.find(self.search_string) >= 0: - print(f"What: {what}") - except re.PatternError: - self.log.warning("Ignoring '%s' as it produced an invalid regex:\n" - " '%s'", what, new) - - def get_regexes(self, what): - """ - Given an ABI devnode, return a list of all regular expressions that - may match it, based on the sub-groups created by regex_append() - """ - - re_list = [] - - patches = what.split("/") - patches.reverse() - patches.append(self.leave_others) - - for search_group in patches: - if search_group in self.regex_group: - re_list += self.regex_group[search_group] - - return re_list - - def __init__(self, *args, **kwargs): - """ - Override init method to get verbose argument - """ - - self.regex_group = None - self.search_string = None - self.re_string = None - - if "search_string" in kwargs: - self.search_string = kwargs.get("search_string") - del kwargs["search_string"] - - if self.search_string: - - try: - self.re_string = re.compile(self.search_string) - except re.PatternError as e: - msg = f"{self.search_string} is not a valid regular expression" - raise ValueError(msg) from e - - super().__init__(*args, **kwargs) - - def parse_abi(self, *args, **kwargs): - - super().parse_abi(*args, **kwargs) - - self.regex_group = {} - - print("Converting ABI What fields into regexes...", file=sys.stderr) - - for t in sorted(self.data.items(), key=lambda x: x[0]): - v = t[1] - if v.get("type") == "File": - continue - - v["regex"] = [] - - for what in v.get("what", []): - if not what.startswith("/sys"): - continue - - new = what - for r, s in self.re_whats: - try: - new = r.sub(s, new) - except re.PatternError as e: - # Help debugging troubles with new regexes - raise re.PatternError(f"{e}\nwhile re.sub('{r.pattern}', {s}, str)") from e - - v["regex"].append(new) - - if self.debug & AbiDebug.REGEX: - self.log.debug("%-90s <== %s", new, what) - - # Store regex into a subgroup to speedup searches - self.regex_append(what, new) - - if self.debug & AbiDebug.SUBGROUP_DICT: - self.log.debug("%s", pformat(self.regex_group)) - - if self.debug & AbiDebug.SUBGROUP_SIZE: - biggestd_keys = sorted(self.regex_group.keys(), - key= lambda k: len(self.regex_group[k]), - reverse=True) - - print("Top regex subgroups:", file=sys.stderr) - for k in biggestd_keys[:10]: - print(f"{k} has {len(self.regex_group[k])} elements", file=sys.stderr) diff --git a/scripts/lib/abi/helpers.py b/scripts/lib/abi/helpers.py deleted file mode 100644 index 639b23e4ca33..000000000000 --- a/scripts/lib/abi/helpers.py +++ /dev/null @@ -1,38 +0,0 @@ -#!/usr/bin/env python3 -# Copyright(c) 2025: Mauro Carvalho Chehab . -# pylint: disable=R0903 -# SPDX-License-Identifier: GPL-2.0 - -""" -Helper classes for ABI parser -""" - -ABI_DIR = "Documentation/ABI/" - - -class AbiDebug: - """Debug levels""" - - WHAT_PARSING = 1 - WHAT_OPEN = 2 - DUMP_ABI_STRUCTS = 4 - UNDEFINED = 8 - REGEX = 16 - SUBGROUP_MAP = 32 - SUBGROUP_DICT = 64 - SUBGROUP_SIZE = 128 - GRAPH = 256 - - -DEBUG_HELP = """ -1 - enable debug parsing logic -2 - enable debug messages on file open -4 - enable debug for ABI parse data -8 - enable extra debug information to identify troubles - with ABI symbols found at the local machine that - weren't found on ABI documentation (used only for - undefined subcommand) -16 - enable debug for what to regex conversion -32 - enable debug for symbol regex subgroups -64 - enable debug for sysfs graph tree variable -""" diff --git a/scripts/lib/abi/system_symbols.py b/scripts/lib/abi/system_symbols.py deleted file mode 100644 index f15c94a6e33c..000000000000 --- a/scripts/lib/abi/system_symbols.py +++ /dev/null @@ -1,378 +0,0 @@ -#!/usr/bin/env python3 -# pylint: disable=R0902,R0912,R0914,R0915,R1702 -# Copyright(c) 2025: Mauro Carvalho Chehab . -# SPDX-License-Identifier: GPL-2.0 - -""" -Parse ABI documentation and produce results from it. -""" - -import os -import re -import sys - -from concurrent import futures -from datetime import datetime -from random import shuffle - -from helpers import AbiDebug - -class SystemSymbols: - """Stores arguments for the class and initialize class vars""" - - def graph_add_file(self, path, link=None): - """ - add a file path to the sysfs graph stored at self.root - """ - - if path in self.files: - return - - name = "" - ref = self.root - for edge in path.split("/"): - name += edge + "/" - if edge not in ref: - ref[edge] = {"__name": [name.rstrip("/")]} - - ref = ref[edge] - - if link and link not in ref["__name"]: - ref["__name"].append(link.rstrip("/")) - - self.files.add(path) - - def print_graph(self, root_prefix="", root=None, level=0): - """Prints a reference tree graph using UTF-8 characters""" - - if not root: - root = self.root - level = 0 - - # Prevent endless traverse - if level > 5: - return - - if level > 0: - prefix = "├──" - last_prefix = "└──" - else: - prefix = "" - last_prefix = "" - - items = list(root.items()) - - names = root.get("__name", []) - for k, edge in items: - if k == "__name": - continue - - if not k: - k = "/" - - if len(names) > 1: - k += " links: " + ",".join(names[1:]) - - if edge == items[-1][1]: - print(root_prefix + last_prefix + k) - p = root_prefix - if level > 0: - p += " " - self.print_graph(p, edge, level + 1) - else: - print(root_prefix + prefix + k) - p = root_prefix + "│ " - self.print_graph(p, edge, level + 1) - - def _walk(self, root): - """ - Walk through sysfs to get all devnodes that aren't ignored. - - By default, uses /sys as sysfs mounting point. If another - directory is used, it replaces them to /sys at the patches. - """ - - with os.scandir(root) as obj: - for entry in obj: - path = os.path.join(root, entry.name) - if self.sysfs: - p = path.replace(self.sysfs, "/sys", count=1) - else: - p = path - - if self.re_ignore.search(p): - return - - # Handle link first to avoid directory recursion - if entry.is_symlink(): - real = os.path.realpath(path) - if not self.sysfs: - self.aliases[path] = real - else: - real = real.replace(self.sysfs, "/sys", count=1) - - # Add absfile location to graph if it doesn't exist - if not self.re_ignore.search(real): - # Add link to the graph - self.graph_add_file(real, p) - - elif entry.is_file(): - self.graph_add_file(p) - - elif entry.is_dir(): - self._walk(path) - - def __init__(self, abi, sysfs="/sys", hints=False): - """ - Initialize internal variables and get a list of all files inside - sysfs that can currently be parsed. - - Please notice that there are several entries on sysfs that aren't - documented as ABI. Ignore those. - - The real paths will be stored under self.files. Aliases will be - stored in separate, as self.aliases. - """ - - self.abi = abi - self.log = abi.log - - if sysfs != "/sys": - self.sysfs = sysfs.rstrip("/") - else: - self.sysfs = None - - self.hints = hints - - self.root = {} - self.aliases = {} - self.files = set() - - dont_walk = [ - # Those require root access and aren't documented at ABI - f"^{sysfs}/kernel/debug", - f"^{sysfs}/kernel/tracing", - f"^{sysfs}/fs/pstore", - f"^{sysfs}/fs/bpf", - f"^{sysfs}/fs/fuse", - - # This is not documented at ABI - f"^{sysfs}/module", - - f"^{sysfs}/fs/cgroup", # this is big and has zero docs under ABI - f"^{sysfs}/firmware", # documented elsewhere: ACPI, DT bindings - "sections|notes", # aren't actually part of ABI - - # kernel-parameters.txt - not easy to parse - "parameters", - ] - - self.re_ignore = re.compile("|".join(dont_walk)) - - print(f"Reading {sysfs} directory contents...", file=sys.stderr) - self._walk(sysfs) - - def check_file(self, refs, found): - """Check missing ABI symbols for a given sysfs file""" - - res_list = [] - - try: - for names in refs: - fname = names[0] - - res = { - "found": False, - "fname": fname, - "msg": "", - } - res_list.append(res) - - re_what = self.abi.get_regexes(fname) - if not re_what: - self.abi.log.warning(f"missing rules for {fname}") - continue - - for name in names: - for r in re_what: - if self.abi.debug & AbiDebug.UNDEFINED: - self.log.debug("check if %s matches '%s'", name, r.pattern) - if r.match(name): - res["found"] = True - if found: - res["msg"] += f" {fname}: regex:\n\t" - continue - - if self.hints and not res["found"]: - res["msg"] += f" {fname} not found. Tested regexes:\n" - for r in re_what: - res["msg"] += " " + r.pattern + "\n" - - except KeyboardInterrupt: - pass - - return res_list - - def _ref_interactor(self, root): - """Recursive function to interact over the sysfs tree""" - - for k, v in root.items(): - if isinstance(v, dict): - yield from self._ref_interactor(v) - - if root == self.root or k == "__name": - continue - - if self.abi.re_string: - fname = v["__name"][0] - if self.abi.re_string.search(fname): - yield v - else: - yield v - - - def get_fileref(self, all_refs, chunk_size): - """Interactor to group refs into chunks""" - - n = 0 - refs = [] - - for ref in all_refs: - refs.append(ref) - - n += 1 - if n >= chunk_size: - yield refs - n = 0 - refs = [] - - yield refs - - def check_undefined_symbols(self, max_workers=None, chunk_size=50, - found=None, dry_run=None): - """Seach ABI for sysfs symbols missing documentation""" - - self.abi.parse_abi() - - if self.abi.debug & AbiDebug.GRAPH: - self.print_graph() - - all_refs = [] - for ref in self._ref_interactor(self.root): - all_refs.append(ref["__name"]) - - if dry_run: - print("Would check", file=sys.stderr) - for ref in all_refs: - print(", ".join(ref)) - - return - - print("Starting to search symbols (it may take several minutes):", - file=sys.stderr) - start = datetime.now() - old_elapsed = None - - # Python doesn't support multithreading due to limitations on its - # global lock (GIL). While Python 3.13 finally made GIL optional, - # there are still issues related to it. Also, we want to have - # backward compatibility with older versions of Python. - # - # So, use instead multiprocess. However, Python is very slow passing - # data from/to multiple processes. Also, it may consume lots of memory - # if the data to be shared is not small. So, we need to group workload - # in chunks that are big enough to generate performance gains while - # not being so big that would cause out-of-memory. - - num_refs = len(all_refs) - print(f"Number of references to parse: {num_refs}", file=sys.stderr) - - if not max_workers: - max_workers = os.cpu_count() - elif max_workers > os.cpu_count(): - max_workers = os.cpu_count() - - max_workers = max(max_workers, 1) - - max_chunk_size = int((num_refs + max_workers - 1) / max_workers) - chunk_size = min(chunk_size, max_chunk_size) - chunk_size = max(1, chunk_size) - - if max_workers > 1: - executor = futures.ProcessPoolExecutor - - # Place references in a random order. This may help improving - # performance, by mixing complex/simple expressions when creating - # chunks - shuffle(all_refs) - else: - # Python has a high overhead with processes. When there's just - # one worker, it is faster to not create a new process. - # Yet, User still deserves to have a progress print. So, use - # python's "thread", which is actually a single process, using - # an internal schedule to switch between tasks. No performance - # gains for non-IO tasks, but still it can be quickly interrupted - # from time to time to display progress. - executor = futures.ThreadPoolExecutor - - not_found = [] - f_list = [] - with executor(max_workers=max_workers) as exe: - for refs in self.get_fileref(all_refs, chunk_size): - if refs: - try: - f_list.append(exe.submit(self.check_file, refs, found)) - - except KeyboardInterrupt: - return - - total = len(f_list) - - if not total: - if self.abi.re_string: - print(f"No ABI symbol matches {self.abi.search_string}") - else: - self.abi.log.warning("No ABI symbols found") - return - - print(f"{len(f_list):6d} jobs queued on {max_workers} workers", - file=sys.stderr) - - while f_list: - try: - t = futures.wait(f_list, timeout=1, - return_when=futures.FIRST_COMPLETED) - - done = t[0] - - for fut in done: - res_list = fut.result() - - for res in res_list: - if not res["found"]: - not_found.append(res["fname"]) - if res["msg"]: - print(res["msg"]) - - f_list.remove(fut) - except KeyboardInterrupt: - return - - except RuntimeError as e: - self.abi.log.warning(f"Future: {e}") - break - - if sys.stderr.isatty(): - elapsed = str(datetime.now() - start).split(".", maxsplit=1)[0] - if len(f_list) < total: - elapsed += f" ({total - len(f_list)}/{total} jobs completed). " - if elapsed != old_elapsed: - print(elapsed + "\r", end="", flush=True, - file=sys.stderr) - old_elapsed = elapsed - - elapsed = str(datetime.now() - start).split(".", maxsplit=1)[0] - print(elapsed, file=sys.stderr) - - for f in sorted(not_found): - print(f"{f} not found.") diff --git a/scripts/lib/jobserver.py b/scripts/lib/jobserver.py deleted file mode 100755 index a24f30ef4fa8..000000000000 --- a/scripts/lib/jobserver.py +++ /dev/null @@ -1,149 +0,0 @@ -#!/usr/bin/env python3 -# SPDX-License-Identifier: GPL-2.0+ -# -# pylint: disable=C0103,C0209 -# -# - -""" -Interacts with the POSIX jobserver during the Kernel build time. - -A "normal" jobserver task, like the one initiated by a make subrocess would do: - - - open read/write file descriptors to communicate with the job server; - - ask for one slot by calling: - claim = os.read(reader, 1) - - when the job finshes, call: - os.write(writer, b"+") # os.write(writer, claim) - -Here, the goal is different: This script aims to get the remaining number -of slots available, using all of them to run a command which handle tasks in -parallel. To to that, it has a loop that ends only after there are no -slots left. It then increments the number by one, in order to allow a -call equivalent to make -j$((claim+1)), e.g. having a parent make creating -$claim child to do the actual work. - -The end goal here is to keep the total number of build tasks under the -limit established by the initial make -j$n_proc call. - -See: - https://www.gnu.org/software/make/manual/html_node/POSIX-Jobserver.html#POSIX-Jobserver -""" - -import errno -import os -import subprocess -import sys - -class JobserverExec: - """ - Claim all slots from make using POSIX Jobserver. - - The main methods here are: - - open(): reserves all slots; - - close(): method returns all used slots back to make; - - run(): executes a command setting PARALLELISM= - """ - - def __init__(self): - """Initialize internal vars""" - self.claim = 0 - self.jobs = b"" - self.reader = None - self.writer = None - self.is_open = False - - def open(self): - """Reserve all available slots to be claimed later on""" - - if self.is_open: - return - - try: - # Fetch the make environment options. - flags = os.environ["MAKEFLAGS"] - # Look for "--jobserver=R,W" - # Note that GNU Make has used --jobserver-fds and --jobserver-auth - # so this handles all of them. - opts = [x for x in flags.split(" ") if x.startswith("--jobserver")] - - # Parse out R,W file descriptor numbers and set them nonblocking. - # If the MAKEFLAGS variable contains multiple instances of the - # --jobserver-auth= option, the last one is relevant. - fds = opts[-1].split("=", 1)[1] - - # Starting with GNU Make 4.4, named pipes are used for reader - # and writer. - # Example argument: --jobserver-auth=fifo:/tmp/GMfifo8134 - _, _, path = fds.partition("fifo:") - - if path: - self.reader = os.open(path, os.O_RDONLY | os.O_NONBLOCK) - self.writer = os.open(path, os.O_WRONLY) - else: - self.reader, self.writer = [int(x) for x in fds.split(",", 1)] - # Open a private copy of reader to avoid setting nonblocking - # on an unexpecting process with the same reader fd. - self.reader = os.open("/proc/self/fd/%d" % (self.reader), - os.O_RDONLY | os.O_NONBLOCK) - - # Read out as many jobserver slots as possible - while True: - try: - slot = os.read(self.reader, 8) - self.jobs += slot - except (OSError, IOError) as e: - if e.errno == errno.EWOULDBLOCK: - # Stop at the end of the jobserver queue. - break - # If something went wrong, give back the jobs. - if self.jobs: - os.write(self.writer, self.jobs) - raise e - - # Add a bump for our caller's reserveration, since we're just going - # to sit here blocked on our child. - self.claim = len(self.jobs) + 1 - - except (KeyError, IndexError, ValueError, OSError, IOError): - # Any missing environment strings or bad fds should result in just - # not being parallel. - self.claim = None - - self.is_open = True - - def close(self): - """Return all reserved slots to Jobserver""" - - if not self.is_open: - return - - # Return all the reserved slots. - if len(self.jobs): - os.write(self.writer, self.jobs) - - self.is_open = False - - def __enter__(self): - self.open() - return self - - def __exit__(self, exc_type, exc_value, exc_traceback): - self.close() - - def run(self, cmd, *args, **pwargs): - """ - Run a command setting PARALLELISM env variable to the number of - available job slots (claim) + 1, e.g. it will reserve claim slots - to do the actual build work, plus one to monitor its children. - """ - self.open() # Ensure that self.claim is set - - # We can only claim parallelism if there was a jobserver (i.e. a - # top-level "-jN" argument) and there were no other failures. Otherwise - # leave out the environment variable and let the child figure out what - # is best. - if self.claim: - os.environ["PARALLELISM"] = str(self.claim) - - return subprocess.call(cmd, *args, **pwargs) diff --git a/scripts/lib/kdoc/kdoc_files.py b/scripts/lib/kdoc/kdoc_files.py deleted file mode 100644 index 1fd8d17edb32..000000000000 --- a/scripts/lib/kdoc/kdoc_files.py +++ /dev/null @@ -1,294 +0,0 @@ -#!/usr/bin/env python3 -# SPDX-License-Identifier: GPL-2.0 -# Copyright(c) 2025: Mauro Carvalho Chehab . -# -# pylint: disable=R0903,R0913,R0914,R0917 - -""" -Parse lernel-doc tags on multiple kernel source files. -""" - -import argparse -import logging -import os -import re - -from kdoc_parser import KernelDoc -from kdoc_output import OutputFormat - - -class GlobSourceFiles: - """ - Parse C source code file names and directories via an Interactor. - """ - - def __init__(self, srctree=None, valid_extensions=None): - """ - Initialize valid extensions with a tuple. - - If not defined, assume default C extensions (.c and .h) - - It would be possible to use python's glob function, but it is - very slow, and it is not interactive. So, it would wait to read all - directories before actually do something. - - So, let's use our own implementation. - """ - - if not valid_extensions: - self.extensions = (".c", ".h") - else: - self.extensions = valid_extensions - - self.srctree = srctree - - def _parse_dir(self, dirname): - """Internal function to parse files recursively""" - - with os.scandir(dirname) as obj: - for entry in obj: - name = os.path.join(dirname, entry.name) - - if entry.is_dir(follow_symlinks=False): - yield from self._parse_dir(name) - - if not entry.is_file(): - continue - - basename = os.path.basename(name) - - if not basename.endswith(self.extensions): - continue - - yield name - - def parse_files(self, file_list, file_not_found_cb): - """ - Define an interator to parse all source files from file_list, - handling directories if any - """ - - if not file_list: - return - - for fname in file_list: - if self.srctree: - f = os.path.join(self.srctree, fname) - else: - f = fname - - if os.path.isdir(f): - yield from self._parse_dir(f) - elif os.path.isfile(f): - yield f - elif file_not_found_cb: - file_not_found_cb(fname) - - -class KernelFiles(): - """ - Parse kernel-doc tags on multiple kernel source files. - - There are two type of parsers defined here: - - self.parse_file(): parses both kernel-doc markups and - EXPORT_SYMBOL* macros; - - self.process_export_file(): parses only EXPORT_SYMBOL* macros. - """ - - def warning(self, msg): - """Ancillary routine to output a warning and increment error count""" - - self.config.log.warning(msg) - self.errors += 1 - - def error(self, msg): - """Ancillary routine to output an error and increment error count""" - - self.config.log.error(msg) - self.errors += 1 - - def parse_file(self, fname): - """ - Parse a single Kernel source. - """ - - # Prevent parsing the same file twice if results are cached - if fname in self.files: - return - - doc = KernelDoc(self.config, fname) - export_table, entries = doc.parse_kdoc() - - self.export_table[fname] = export_table - - self.files.add(fname) - self.export_files.add(fname) # parse_kdoc() already check exports - - self.results[fname] = entries - - def process_export_file(self, fname): - """ - Parses EXPORT_SYMBOL* macros from a single Kernel source file. - """ - - # Prevent parsing the same file twice if results are cached - if fname in self.export_files: - return - - doc = KernelDoc(self.config, fname) - export_table = doc.parse_export() - - if not export_table: - self.error(f"Error: Cannot check EXPORT_SYMBOL* on {fname}") - export_table = set() - - self.export_table[fname] = export_table - self.export_files.add(fname) - - def file_not_found_cb(self, fname): - """ - Callback to warn if a file was not found. - """ - - self.error(f"Cannot find file {fname}") - - def __init__(self, verbose=False, out_style=None, - werror=False, wreturn=False, wshort_desc=False, - wcontents_before_sections=False, - logger=None): - """ - Initialize startup variables and parse all files - """ - - if not verbose: - verbose = bool(os.environ.get("KBUILD_VERBOSE", 0)) - - if out_style is None: - out_style = OutputFormat() - - if not werror: - kcflags = os.environ.get("KCFLAGS", None) - if kcflags: - match = re.search(r"(\s|^)-Werror(\s|$)/", kcflags) - if match: - werror = True - - # reading this variable is for backwards compat just in case - # someone was calling it with the variable from outside the - # kernel's build system - kdoc_werror = os.environ.get("KDOC_WERROR", None) - if kdoc_werror: - werror = kdoc_werror - - # Some variables are global to the parser logic as a whole as they are - # used to send control configuration to KernelDoc class. As such, - # those variables are read-only inside the KernelDoc. - self.config = argparse.Namespace - - self.config.verbose = verbose - self.config.werror = werror - self.config.wreturn = wreturn - self.config.wshort_desc = wshort_desc - self.config.wcontents_before_sections = wcontents_before_sections - - if not logger: - self.config.log = logging.getLogger("kernel-doc") - else: - self.config.log = logger - - self.config.warning = self.warning - - self.config.src_tree = os.environ.get("SRCTREE", None) - - # Initialize variables that are internal to KernelFiles - - self.out_style = out_style - - self.errors = 0 - self.results = {} - - self.files = set() - self.export_files = set() - self.export_table = {} - - def parse(self, file_list, export_file=None): - """ - Parse all files - """ - - glob = GlobSourceFiles(srctree=self.config.src_tree) - - for fname in glob.parse_files(file_list, self.file_not_found_cb): - self.parse_file(fname) - - for fname in glob.parse_files(export_file, self.file_not_found_cb): - self.process_export_file(fname) - - def out_msg(self, fname, name, arg): - """ - Return output messages from a file name using the output style - filtering. - - If output type was not handled by the syler, return None. - """ - - # NOTE: we can add rules here to filter out unwanted parts, - # although OutputFormat.msg already does that. - - return self.out_style.msg(fname, name, arg) - - def msg(self, enable_lineno=False, export=False, internal=False, - symbol=None, nosymbol=None, no_doc_sections=False, - filenames=None, export_file=None): - """ - Interacts over the kernel-doc results and output messages, - returning kernel-doc markups on each interaction - """ - - self.out_style.set_config(self.config) - - if not filenames: - filenames = sorted(self.results.keys()) - - glob = GlobSourceFiles(srctree=self.config.src_tree) - - for fname in filenames: - function_table = set() - - if internal or export: - if not export_file: - export_file = [fname] - - for f in glob.parse_files(export_file, self.file_not_found_cb): - function_table |= self.export_table[f] - - if symbol: - for s in symbol: - function_table.add(s) - - self.out_style.set_filter(export, internal, symbol, nosymbol, - function_table, enable_lineno, - no_doc_sections) - - msg = "" - if fname not in self.results: - self.config.log.warning("No kernel-doc for file %s", fname) - continue - - symbols = self.results[fname] - self.out_style.set_symbols(symbols) - - for arg in symbols: - m = self.out_msg(fname, arg.name, arg) - - if m is None: - ln = arg.get("ln", 0) - dtype = arg.get('type', "") - - self.config.log.warning("%s:%d Can't handle %s", - fname, ln, dtype) - else: - msg += m - - if msg: - yield fname, msg diff --git a/scripts/lib/kdoc/kdoc_item.py b/scripts/lib/kdoc/kdoc_item.py deleted file mode 100644 index 19805301cb2c..000000000000 --- a/scripts/lib/kdoc/kdoc_item.py +++ /dev/null @@ -1,43 +0,0 @@ -# SPDX-License-Identifier: GPL-2.0 -# -# A class that will, eventually, encapsulate all of the parsed data that we -# then pass into the output modules. -# - -class KdocItem: - def __init__(self, name, fname, type, start_line, **other_stuff): - self.name = name - self.fname = fname - self.type = type - self.declaration_start_line = start_line - self.sections = {} - self.sections_start_lines = {} - self.parameterlist = [] - self.parameterdesc_start_lines = [] - self.parameterdescs = {} - self.parametertypes = {} - # - # Just save everything else into our own dict so that the output - # side can grab it directly as before. As we move things into more - # structured data, this will, hopefully, fade away. - # - self.other_stuff = other_stuff - - def get(self, key, default = None): - return self.other_stuff.get(key, default) - - def __getitem__(self, key): - return self.get(key) - - # - # Tracking of section and parameter information. - # - def set_sections(self, sections, start_lines): - self.sections = sections - self.section_start_lines = start_lines - - def set_params(self, names, descs, types, starts): - self.parameterlist = names - self.parameterdescs = descs - self.parametertypes = types - self.parameterdesc_start_lines = starts diff --git a/scripts/lib/kdoc/kdoc_output.py b/scripts/lib/kdoc/kdoc_output.py deleted file mode 100644 index 58f115059e93..000000000000 --- a/scripts/lib/kdoc/kdoc_output.py +++ /dev/null @@ -1,824 +0,0 @@ -#!/usr/bin/env python3 -# SPDX-License-Identifier: GPL-2.0 -# Copyright(c) 2025: Mauro Carvalho Chehab . -# -# pylint: disable=C0301,R0902,R0911,R0912,R0913,R0914,R0915,R0917 - -""" -Implement output filters to print kernel-doc documentation. - -The implementation uses a virtual base class (OutputFormat) which -contains a dispatches to virtual methods, and some code to filter -out output messages. - -The actual implementation is done on one separate class per each type -of output. Currently, there are output classes for ReST and man/troff. -""" - -import os -import re -from datetime import datetime - -from kdoc_parser import KernelDoc, type_param -from kdoc_re import KernRe - - -function_pointer = KernRe(r"([^\(]*\(\*)\s*\)\s*\(([^\)]*)\)", cache=False) - -# match expressions used to find embedded type information -type_constant = KernRe(r"\b``([^\`]+)``\b", cache=False) -type_constant2 = KernRe(r"\%([-_*\w]+)", cache=False) -type_func = KernRe(r"(\w+)\(\)", cache=False) -type_param_ref = KernRe(r"([\!~\*]?)\@(\w*((\.\w+)|(->\w+))*(\.\.\.)?)", cache=False) - -# Special RST handling for func ptr params -type_fp_param = KernRe(r"\@(\w+)\(\)", cache=False) - -# Special RST handling for structs with func ptr params -type_fp_param2 = KernRe(r"\@(\w+->\S+)\(\)", cache=False) - -type_env = KernRe(r"(\$\w+)", cache=False) -type_enum = KernRe(r"\&(enum\s*([_\w]+))", cache=False) -type_struct = KernRe(r"\&(struct\s*([_\w]+))", cache=False) -type_typedef = KernRe(r"\&(typedef\s*([_\w]+))", cache=False) -type_union = KernRe(r"\&(union\s*([_\w]+))", cache=False) -type_member = KernRe(r"\&([_\w]+)(\.|->)([_\w]+)", cache=False) -type_fallback = KernRe(r"\&([_\w]+)", cache=False) -type_member_func = type_member + KernRe(r"\(\)", cache=False) - - -class OutputFormat: - """ - Base class for OutputFormat. If used as-is, it means that only - warnings will be displayed. - """ - - # output mode. - OUTPUT_ALL = 0 # output all symbols and doc sections - OUTPUT_INCLUDE = 1 # output only specified symbols - OUTPUT_EXPORTED = 2 # output exported symbols - OUTPUT_INTERNAL = 3 # output non-exported symbols - - # Virtual member to be overriden at the inherited classes - highlights = [] - - def __init__(self): - """Declare internal vars and set mode to OUTPUT_ALL""" - - self.out_mode = self.OUTPUT_ALL - self.enable_lineno = None - self.nosymbol = {} - self.symbol = None - self.function_table = None - self.config = None - self.no_doc_sections = False - - self.data = "" - - def set_config(self, config): - """ - Setup global config variables used by both parser and output. - """ - - self.config = config - - def set_filter(self, export, internal, symbol, nosymbol, function_table, - enable_lineno, no_doc_sections): - """ - Initialize filter variables according with the requested mode. - - Only one choice is valid between export, internal and symbol. - - The nosymbol filter can be used on all modes. - """ - - self.enable_lineno = enable_lineno - self.no_doc_sections = no_doc_sections - self.function_table = function_table - - if symbol: - self.out_mode = self.OUTPUT_INCLUDE - elif export: - self.out_mode = self.OUTPUT_EXPORTED - elif internal: - self.out_mode = self.OUTPUT_INTERNAL - else: - self.out_mode = self.OUTPUT_ALL - - if nosymbol: - self.nosymbol = set(nosymbol) - - - def highlight_block(self, block): - """ - Apply the RST highlights to a sub-block of text. - """ - - for r, sub in self.highlights: - block = r.sub(sub, block) - - return block - - def out_warnings(self, args): - """ - Output warnings for identifiers that will be displayed. - """ - - for log_msg in args.warnings: - self.config.warning(log_msg) - - def check_doc(self, name, args): - """Check if DOC should be output""" - - if self.no_doc_sections: - return False - - if name in self.nosymbol: - return False - - if self.out_mode == self.OUTPUT_ALL: - self.out_warnings(args) - return True - - if self.out_mode == self.OUTPUT_INCLUDE: - if name in self.function_table: - self.out_warnings(args) - return True - - return False - - def check_declaration(self, dtype, name, args): - """ - Checks if a declaration should be output or not based on the - filtering criteria. - """ - - if name in self.nosymbol: - return False - - if self.out_mode == self.OUTPUT_ALL: - self.out_warnings(args) - return True - - if self.out_mode in [self.OUTPUT_INCLUDE, self.OUTPUT_EXPORTED]: - if name in self.function_table: - return True - - if self.out_mode == self.OUTPUT_INTERNAL: - if dtype != "function": - self.out_warnings(args) - return True - - if name not in self.function_table: - self.out_warnings(args) - return True - - return False - - def msg(self, fname, name, args): - """ - Handles a single entry from kernel-doc parser - """ - - self.data = "" - - dtype = args.type - - if dtype == "doc": - self.out_doc(fname, name, args) - return self.data - - if not self.check_declaration(dtype, name, args): - return self.data - - if dtype == "function": - self.out_function(fname, name, args) - return self.data - - if dtype == "enum": - self.out_enum(fname, name, args) - return self.data - - if dtype == "typedef": - self.out_typedef(fname, name, args) - return self.data - - if dtype in ["struct", "union"]: - self.out_struct(fname, name, args) - return self.data - - # Warn if some type requires an output logic - self.config.log.warning("doesn't now how to output '%s' block", - dtype) - - return None - - # Virtual methods to be overridden by inherited classes - # At the base class, those do nothing. - def set_symbols(self, symbols): - """Get a list of all symbols from kernel_doc""" - - def out_doc(self, fname, name, args): - """Outputs a DOC block""" - - def out_function(self, fname, name, args): - """Outputs a function""" - - def out_enum(self, fname, name, args): - """Outputs an enum""" - - def out_typedef(self, fname, name, args): - """Outputs a typedef""" - - def out_struct(self, fname, name, args): - """Outputs a struct""" - - -class RestFormat(OutputFormat): - """Consts and functions used by ReST output""" - - highlights = [ - (type_constant, r"``\1``"), - (type_constant2, r"``\1``"), - - # Note: need to escape () to avoid func matching later - (type_member_func, r":c:type:`\1\2\3\\(\\) <\1>`"), - (type_member, r":c:type:`\1\2\3 <\1>`"), - (type_fp_param, r"**\1\\(\\)**"), - (type_fp_param2, r"**\1\\(\\)**"), - (type_func, r"\1()"), - (type_enum, r":c:type:`\1 <\2>`"), - (type_struct, r":c:type:`\1 <\2>`"), - (type_typedef, r":c:type:`\1 <\2>`"), - (type_union, r":c:type:`\1 <\2>`"), - - # in rst this can refer to any type - (type_fallback, r":c:type:`\1`"), - (type_param_ref, r"**\1\2**") - ] - blankline = "\n" - - sphinx_literal = KernRe(r'^[^.].*::$', cache=False) - sphinx_cblock = KernRe(r'^\.\.\ +code-block::', cache=False) - - def __init__(self): - """ - Creates class variables. - - Not really mandatory, but it is a good coding style and makes - pylint happy. - """ - - super().__init__() - self.lineprefix = "" - - def print_lineno(self, ln): - """Outputs a line number""" - - if self.enable_lineno and ln is not None: - ln += 1 - self.data += f".. LINENO {ln}\n" - - def output_highlight(self, args): - """ - Outputs a C symbol that may require being converted to ReST using - the self.highlights variable - """ - - input_text = args - output = "" - in_literal = False - litprefix = "" - block = "" - - for line in input_text.strip("\n").split("\n"): - - # If we're in a literal block, see if we should drop out of it. - # Otherwise, pass the line straight through unmunged. - if in_literal: - if line.strip(): # If the line is not blank - # If this is the first non-blank line in a literal block, - # figure out the proper indent. - if not litprefix: - r = KernRe(r'^(\s*)') - if r.match(line): - litprefix = '^' + r.group(1) - else: - litprefix = "" - - output += line + "\n" - elif not KernRe(litprefix).match(line): - in_literal = False - else: - output += line + "\n" - else: - output += line + "\n" - - # Not in a literal block (or just dropped out) - if not in_literal: - block += line + "\n" - if self.sphinx_literal.match(line) or self.sphinx_cblock.match(line): - in_literal = True - litprefix = "" - output += self.highlight_block(block) - block = "" - - # Handle any remaining block - if block: - output += self.highlight_block(block) - - # Print the output with the line prefix - for line in output.strip("\n").split("\n"): - self.data += self.lineprefix + line + "\n" - - def out_section(self, args, out_docblock=False): - """ - Outputs a block section. - - This could use some work; it's used to output the DOC: sections, and - starts by putting out the name of the doc section itself, but that - tends to duplicate a header already in the template file. - """ - for section, text in args.sections.items(): - # Skip sections that are in the nosymbol_table - if section in self.nosymbol: - continue - - if out_docblock: - if not self.out_mode == self.OUTPUT_INCLUDE: - self.data += f".. _{section}:\n\n" - self.data += f'{self.lineprefix}**{section}**\n\n' - else: - self.data += f'{self.lineprefix}**{section}**\n\n' - - self.print_lineno(args.section_start_lines.get(section, 0)) - self.output_highlight(text) - self.data += "\n" - self.data += "\n" - - def out_doc(self, fname, name, args): - if not self.check_doc(name, args): - return - self.out_section(args, out_docblock=True) - - def out_function(self, fname, name, args): - - oldprefix = self.lineprefix - signature = "" - - func_macro = args.get('func_macro', False) - if func_macro: - signature = name - else: - if args.get('functiontype'): - signature = args['functiontype'] + " " - signature += name + " (" - - ln = args.declaration_start_line - count = 0 - for parameter in args.parameterlist: - if count != 0: - signature += ", " - count += 1 - dtype = args.parametertypes.get(parameter, "") - - if function_pointer.search(dtype): - signature += function_pointer.group(1) + parameter + function_pointer.group(3) - else: - signature += dtype - - if not func_macro: - signature += ")" - - self.print_lineno(ln) - if args.get('typedef') or not args.get('functiontype'): - self.data += f".. c:macro:: {name}\n\n" - - if args.get('typedef'): - self.data += " **Typedef**: " - self.lineprefix = "" - self.output_highlight(args.get('purpose', "")) - self.data += "\n\n**Syntax**\n\n" - self.data += f" ``{signature}``\n\n" - else: - self.data += f"``{signature}``\n\n" - else: - self.data += f".. c:function:: {signature}\n\n" - - if not args.get('typedef'): - self.print_lineno(ln) - self.lineprefix = " " - self.output_highlight(args.get('purpose', "")) - self.data += "\n" - - # Put descriptive text into a container (HTML
) to help set - # function prototypes apart - self.lineprefix = " " - - if args.parameterlist: - self.data += ".. container:: kernelindent\n\n" - self.data += f"{self.lineprefix}**Parameters**\n\n" - - for parameter in args.parameterlist: - parameter_name = KernRe(r'\[.*').sub('', parameter) - dtype = args.parametertypes.get(parameter, "") - - if dtype: - self.data += f"{self.lineprefix}``{dtype}``\n" - else: - self.data += f"{self.lineprefix}``{parameter}``\n" - - self.print_lineno(args.parameterdesc_start_lines.get(parameter_name, 0)) - - self.lineprefix = " " - if parameter_name in args.parameterdescs and \ - args.parameterdescs[parameter_name] != KernelDoc.undescribed: - - self.output_highlight(args.parameterdescs[parameter_name]) - self.data += "\n" - else: - self.data += f"{self.lineprefix}*undescribed*\n\n" - self.lineprefix = " " - - self.out_section(args) - self.lineprefix = oldprefix - - def out_enum(self, fname, name, args): - - oldprefix = self.lineprefix - ln = args.declaration_start_line - - self.data += f"\n\n.. c:enum:: {name}\n\n" - - self.print_lineno(ln) - self.lineprefix = " " - self.output_highlight(args.get('purpose', '')) - self.data += "\n" - - self.data += ".. container:: kernelindent\n\n" - outer = self.lineprefix + " " - self.lineprefix = outer + " " - self.data += f"{outer}**Constants**\n\n" - - for parameter in args.parameterlist: - self.data += f"{outer}``{parameter}``\n" - - if args.parameterdescs.get(parameter, '') != KernelDoc.undescribed: - self.output_highlight(args.parameterdescs[parameter]) - else: - self.data += f"{self.lineprefix}*undescribed*\n\n" - self.data += "\n" - - self.lineprefix = oldprefix - self.out_section(args) - - def out_typedef(self, fname, name, args): - - oldprefix = self.lineprefix - ln = args.declaration_start_line - - self.data += f"\n\n.. c:type:: {name}\n\n" - - self.print_lineno(ln) - self.lineprefix = " " - - self.output_highlight(args.get('purpose', '')) - - self.data += "\n" - - self.lineprefix = oldprefix - self.out_section(args) - - def out_struct(self, fname, name, args): - - purpose = args.get('purpose', "") - declaration = args.get('definition', "") - dtype = args.type - ln = args.declaration_start_line - - self.data += f"\n\n.. c:{dtype}:: {name}\n\n" - - self.print_lineno(ln) - - oldprefix = self.lineprefix - self.lineprefix += " " - - self.output_highlight(purpose) - self.data += "\n" - - self.data += ".. container:: kernelindent\n\n" - self.data += f"{self.lineprefix}**Definition**::\n\n" - - self.lineprefix = self.lineprefix + " " - - declaration = declaration.replace("\t", self.lineprefix) - - self.data += f"{self.lineprefix}{dtype} {name}" + ' {' + "\n" - self.data += f"{declaration}{self.lineprefix}" + "};\n\n" - - self.lineprefix = " " - self.data += f"{self.lineprefix}**Members**\n\n" - for parameter in args.parameterlist: - if not parameter or parameter.startswith("#"): - continue - - parameter_name = parameter.split("[", maxsplit=1)[0] - - if args.parameterdescs.get(parameter_name) == KernelDoc.undescribed: - continue - - self.print_lineno(args.parameterdesc_start_lines.get(parameter_name, 0)) - - self.data += f"{self.lineprefix}``{parameter}``\n" - - self.lineprefix = " " - self.output_highlight(args.parameterdescs[parameter_name]) - self.lineprefix = " " - - self.data += "\n" - - self.data += "\n" - - self.lineprefix = oldprefix - self.out_section(args) - - -class ManFormat(OutputFormat): - """Consts and functions used by man pages output""" - - highlights = ( - (type_constant, r"\1"), - (type_constant2, r"\1"), - (type_func, r"\\fB\1\\fP"), - (type_enum, r"\\fI\1\\fP"), - (type_struct, r"\\fI\1\\fP"), - (type_typedef, r"\\fI\1\\fP"), - (type_union, r"\\fI\1\\fP"), - (type_param, r"\\fI\1\\fP"), - (type_param_ref, r"\\fI\1\2\\fP"), - (type_member, r"\\fI\1\2\3\\fP"), - (type_fallback, r"\\fI\1\\fP") - ) - blankline = "" - - date_formats = [ - "%a %b %d %H:%M:%S %Z %Y", - "%a %b %d %H:%M:%S %Y", - "%Y-%m-%d", - "%b %d %Y", - "%B %d %Y", - "%m %d %Y", - ] - - def __init__(self, modulename): - """ - Creates class variables. - - Not really mandatory, but it is a good coding style and makes - pylint happy. - """ - - super().__init__() - self.modulename = modulename - self.symbols = [] - - dt = None - tstamp = os.environ.get("KBUILD_BUILD_TIMESTAMP") - if tstamp: - for fmt in self.date_formats: - try: - dt = datetime.strptime(tstamp, fmt) - break - except ValueError: - pass - - if not dt: - dt = datetime.now() - - self.man_date = dt.strftime("%B %Y") - - def arg_name(self, args, name): - """ - Return the name that will be used for the man page. - - As we may have the same name on different namespaces, - prepend the data type for all types except functions and typedefs. - - The doc section is special: it uses the modulename. - """ - - dtype = args.type - - if dtype == "doc": - return self.modulename - - if dtype in ["function", "typedef"]: - return name - - return f"{dtype} {name}" - - def set_symbols(self, symbols): - """ - Get a list of all symbols from kernel_doc. - - Man pages will uses it to add a SEE ALSO section with other - symbols at the same file. - """ - self.symbols = symbols - - def out_tail(self, fname, name, args): - """Adds a tail for all man pages""" - - # SEE ALSO section - self.data += f'.SH "SEE ALSO"' + "\n.PP\n" - self.data += (f"Kernel file \\fB{args.fname}\\fR\n") - if len(self.symbols) >= 2: - cur_name = self.arg_name(args, name) - - related = [] - for arg in self.symbols: - out_name = self.arg_name(arg, arg.name) - - if cur_name == out_name: - continue - - related.append(f"\\fB{out_name}\\fR(9)") - - self.data += ",\n".join(related) + "\n" - - # TODO: does it make sense to add other sections? Maybe - # REPORTING ISSUES? LICENSE? - - def msg(self, fname, name, args): - """ - Handles a single entry from kernel-doc parser. - - Add a tail at the end of man pages output. - """ - super().msg(fname, name, args) - self.out_tail(fname, name, args) - - return self.data - - def output_highlight(self, block): - """ - Outputs a C symbol that may require being highlighted with - self.highlights variable using troff syntax - """ - - contents = self.highlight_block(block) - - if isinstance(contents, list): - contents = "\n".join(contents) - - for line in contents.strip("\n").split("\n"): - line = KernRe(r"^\s*").sub("", line) - if not line: - continue - - if line[0] == ".": - self.data += "\\&" + line + "\n" - else: - self.data += line + "\n" - - def out_doc(self, fname, name, args): - if not self.check_doc(name, args): - return - - out_name = self.arg_name(args, name) - - self.data += f'.TH "{self.modulename}" 9 "{out_name}" "{self.man_date}" "API Manual" LINUX' + "\n" - - for section, text in args.sections.items(): - self.data += f'.SH "{section}"' + "\n" - self.output_highlight(text) - - def out_function(self, fname, name, args): - """output function in man""" - - out_name = self.arg_name(args, name) - - self.data += f'.TH "{name}" 9 "{out_name}" "{self.man_date}" "Kernel Hacker\'s Manual" LINUX' + "\n" - - self.data += ".SH NAME\n" - self.data += f"{name} \\- {args['purpose']}\n" - - self.data += ".SH SYNOPSIS\n" - if args.get('functiontype', ''): - self.data += f'.B "{args["functiontype"]}" {name}' + "\n" - else: - self.data += f'.B "{name}' + "\n" - - count = 0 - parenth = "(" - post = "," - - for parameter in args.parameterlist: - if count == len(args.parameterlist) - 1: - post = ");" - - dtype = args.parametertypes.get(parameter, "") - if function_pointer.match(dtype): - # Pointer-to-function - self.data += f'".BI "{parenth}{function_pointer.group(1)}" " ") ({function_pointer.group(2)}){post}"' + "\n" - else: - dtype = KernRe(r'([^\*])$').sub(r'\1 ', dtype) - - self.data += f'.BI "{parenth}{dtype}" "{post}"' + "\n" - count += 1 - parenth = "" - - if args.parameterlist: - self.data += ".SH ARGUMENTS\n" - - for parameter in args.parameterlist: - parameter_name = re.sub(r'\[.*', '', parameter) - - self.data += f'.IP "{parameter}" 12' + "\n" - self.output_highlight(args.parameterdescs.get(parameter_name, "")) - - for section, text in args.sections.items(): - self.data += f'.SH "{section.upper()}"' + "\n" - self.output_highlight(text) - - def out_enum(self, fname, name, args): - out_name = self.arg_name(args, name) - - self.data += f'.TH "{self.modulename}" 9 "{out_name}" "{self.man_date}" "API Manual" LINUX' + "\n" - - self.data += ".SH NAME\n" - self.data += f"enum {name} \\- {args['purpose']}\n" - - self.data += ".SH SYNOPSIS\n" - self.data += f"enum {name}" + " {\n" - - count = 0 - for parameter in args.parameterlist: - self.data += f'.br\n.BI " {parameter}"' + "\n" - if count == len(args.parameterlist) - 1: - self.data += "\n};\n" - else: - self.data += ", \n.br\n" - - count += 1 - - self.data += ".SH Constants\n" - - for parameter in args.parameterlist: - parameter_name = KernRe(r'\[.*').sub('', parameter) - self.data += f'.IP "{parameter}" 12' + "\n" - self.output_highlight(args.parameterdescs.get(parameter_name, "")) - - for section, text in args.sections.items(): - self.data += f'.SH "{section}"' + "\n" - self.output_highlight(text) - - def out_typedef(self, fname, name, args): - module = self.modulename - purpose = args.get('purpose') - out_name = self.arg_name(args, name) - - self.data += f'.TH "{module}" 9 "{out_name}" "{self.man_date}" "API Manual" LINUX' + "\n" - - self.data += ".SH NAME\n" - self.data += f"typedef {name} \\- {purpose}\n" - - for section, text in args.sections.items(): - self.data += f'.SH "{section}"' + "\n" - self.output_highlight(text) - - def out_struct(self, fname, name, args): - module = self.modulename - purpose = args.get('purpose') - definition = args.get('definition') - out_name = self.arg_name(args, name) - - self.data += f'.TH "{module}" 9 "{out_name}" "{self.man_date}" "API Manual" LINUX' + "\n" - - self.data += ".SH NAME\n" - self.data += f"{args.type} {name} \\- {purpose}\n" - - # Replace tabs with two spaces and handle newlines - declaration = definition.replace("\t", " ") - declaration = KernRe(r"\n").sub('"\n.br\n.BI "', declaration) - - self.data += ".SH SYNOPSIS\n" - self.data += f"{args.type} {name} " + "{" + "\n.br\n" - self.data += f'.BI "{declaration}\n' + "};\n.br\n\n" - - self.data += ".SH Members\n" - for parameter in args.parameterlist: - if parameter.startswith("#"): - continue - - parameter_name = re.sub(r"\[.*", "", parameter) - - if args.parameterdescs.get(parameter_name) == KernelDoc.undescribed: - continue - - self.data += f'.IP "{parameter}" 12' + "\n" - self.output_highlight(args.parameterdescs.get(parameter_name)) - - for section, text in args.sections.items(): - self.data += f'.SH "{section}"' + "\n" - self.output_highlight(text) diff --git a/scripts/lib/kdoc/kdoc_parser.py b/scripts/lib/kdoc/kdoc_parser.py deleted file mode 100644 index f7dbb0868367..000000000000 --- a/scripts/lib/kdoc/kdoc_parser.py +++ /dev/null @@ -1,1667 +0,0 @@ -#!/usr/bin/env python3 -# SPDX-License-Identifier: GPL-2.0 -# Copyright(c) 2025: Mauro Carvalho Chehab . -# -# pylint: disable=C0301,C0302,R0904,R0912,R0913,R0914,R0915,R0917,R1702 - -""" -kdoc_parser -=========== - -Read a C language source or header FILE and extract embedded -documentation comments -""" - -import sys -import re -from pprint import pformat - -from kdoc_re import NestedMatch, KernRe -from kdoc_item import KdocItem - -# -# Regular expressions used to parse kernel-doc markups at KernelDoc class. -# -# Let's declare them in lowercase outside any class to make easier to -# convert from the python script. -# -# As those are evaluated at the beginning, no need to cache them -# - -# Allow whitespace at end of comment start. -doc_start = KernRe(r'^/\*\*\s*$', cache=False) - -doc_end = KernRe(r'\*/', cache=False) -doc_com = KernRe(r'\s*\*\s*', cache=False) -doc_com_body = KernRe(r'\s*\* ?', cache=False) -doc_decl = doc_com + KernRe(r'(\w+)', cache=False) - -# @params and a strictly limited set of supported section names -# Specifically: -# Match @word: -# @...: -# @{section-name}: -# while trying to not match literal block starts like "example::" -# -known_section_names = 'description|context|returns?|notes?|examples?' -known_sections = KernRe(known_section_names, flags = re.I) -doc_sect = doc_com + \ - KernRe(r'\s*(@[.\w]+|@\.\.\.|' + known_section_names + r')\s*:([^:].*)?$', - flags=re.I, cache=False) - -doc_content = doc_com_body + KernRe(r'(.*)', cache=False) -doc_inline_start = KernRe(r'^\s*/\*\*\s*$', cache=False) -doc_inline_sect = KernRe(r'\s*\*\s*(@\s*[\w][\w\.]*\s*):(.*)', cache=False) -doc_inline_end = KernRe(r'^\s*\*/\s*$', cache=False) -doc_inline_oneline = KernRe(r'^\s*/\*\*\s*(@[\w\s]+):\s*(.*)\s*\*/\s*$', cache=False) - -export_symbol = KernRe(r'^\s*EXPORT_SYMBOL(_GPL)?\s*\(\s*(\w+)\s*\)\s*', cache=False) -export_symbol_ns = KernRe(r'^\s*EXPORT_SYMBOL_NS(_GPL)?\s*\(\s*(\w+)\s*,\s*"\S+"\)\s*', cache=False) - -type_param = KernRe(r"@(\w*((\.\w+)|(->\w+))*(\.\.\.)?)", cache=False) - -# -# Tests for the beginning of a kerneldoc block in its various forms. -# -doc_block = doc_com + KernRe(r'DOC:\s*(.*)?', cache=False) -doc_begin_data = KernRe(r"^\s*\*?\s*(struct|union|enum|typedef)\b\s*(\w*)", cache = False) -doc_begin_func = KernRe(str(doc_com) + # initial " * ' - r"(?:\w+\s*\*\s*)?" + # type (not captured) - r'(?:define\s+)?' + # possible "define" (not captured) - r'(\w+)\s*(?:\(\w*\))?\s*' + # name and optional "(...)" - r'(?:[-:].*)?$', # description (not captured) - cache = False) - -# -# Here begins a long set of transformations to turn structure member prefixes -# and macro invocations into something we can parse and generate kdoc for. -# -struct_args_pattern = r'([^,)]+)' - -struct_xforms = [ - # Strip attributes - (KernRe(r"__attribute__\s*\(\([a-z0-9,_\*\s\(\)]*\)\)", flags=re.I | re.S, cache=False), ' '), - (KernRe(r'\s*__aligned\s*\([^;]*\)', re.S), ' '), - (KernRe(r'\s*__counted_by\s*\([^;]*\)', re.S), ' '), - (KernRe(r'\s*__counted_by_(le|be)\s*\([^;]*\)', re.S), ' '), - (KernRe(r'\s*__packed\s*', re.S), ' '), - (KernRe(r'\s*CRYPTO_MINALIGN_ATTR', re.S), ' '), - (KernRe(r'\s*____cacheline_aligned_in_smp', re.S), ' '), - (KernRe(r'\s*____cacheline_aligned', re.S), ' '), - (KernRe(r'\s*__cacheline_group_(begin|end)\([^\)]+\);'), ''), - # - # Unwrap struct_group macros based on this definition: - # __struct_group(TAG, NAME, ATTRS, MEMBERS...) - # which has variants like: struct_group(NAME, MEMBERS...) - # Only MEMBERS arguments require documentation. - # - # Parsing them happens on two steps: - # - # 1. drop struct group arguments that aren't at MEMBERS, - # storing them as STRUCT_GROUP(MEMBERS) - # - # 2. remove STRUCT_GROUP() ancillary macro. - # - # The original logic used to remove STRUCT_GROUP() using an - # advanced regex: - # - # \bSTRUCT_GROUP(\(((?:(?>[^)(]+)|(?1))*)\))[^;]*; - # - # with two patterns that are incompatible with - # Python re module, as it has: - # - # - a recursive pattern: (?1) - # - an atomic grouping: (?>...) - # - # I tried a simpler version: but it didn't work either: - # \bSTRUCT_GROUP\(([^\)]+)\)[^;]*; - # - # As it doesn't properly match the end parenthesis on some cases. - # - # So, a better solution was crafted: there's now a NestedMatch - # class that ensures that delimiters after a search are properly - # matched. So, the implementation to drop STRUCT_GROUP() will be - # handled in separate. - # - (KernRe(r'\bstruct_group\s*\(([^,]*,)', re.S), r'STRUCT_GROUP('), - (KernRe(r'\bstruct_group_attr\s*\(([^,]*,){2}', re.S), r'STRUCT_GROUP('), - (KernRe(r'\bstruct_group_tagged\s*\(([^,]*),([^,]*),', re.S), r'struct \1 \2; STRUCT_GROUP('), - (KernRe(r'\b__struct_group\s*\(([^,]*,){3}', re.S), r'STRUCT_GROUP('), - # - # Replace macros - # - # TODO: use NestedMatch for FOO($1, $2, ...) matches - # - # it is better to also move those to the NestedMatch logic, - # to ensure that parenthesis will be properly matched. - # - (KernRe(r'__ETHTOOL_DECLARE_LINK_MODE_MASK\s*\(([^\)]+)\)', re.S), - r'DECLARE_BITMAP(\1, __ETHTOOL_LINK_MODE_MASK_NBITS)'), - (KernRe(r'DECLARE_PHY_INTERFACE_MASK\s*\(([^\)]+)\)', re.S), - r'DECLARE_BITMAP(\1, PHY_INTERFACE_MODE_MAX)'), - (KernRe(r'DECLARE_BITMAP\s*\(' + struct_args_pattern + r',\s*' + struct_args_pattern + r'\)', - re.S), r'unsigned long \1[BITS_TO_LONGS(\2)]'), - (KernRe(r'DECLARE_HASHTABLE\s*\(' + struct_args_pattern + r',\s*' + struct_args_pattern + r'\)', - re.S), r'unsigned long \1[1 << ((\2) - 1)]'), - (KernRe(r'DECLARE_KFIFO\s*\(' + struct_args_pattern + r',\s*' + struct_args_pattern + - r',\s*' + struct_args_pattern + r'\)', re.S), r'\2 *\1'), - (KernRe(r'DECLARE_KFIFO_PTR\s*\(' + struct_args_pattern + r',\s*' + - struct_args_pattern + r'\)', re.S), r'\2 *\1'), - (KernRe(r'(?:__)?DECLARE_FLEX_ARRAY\s*\(' + struct_args_pattern + r',\s*' + - struct_args_pattern + r'\)', re.S), r'\1 \2[]'), - (KernRe(r'DEFINE_DMA_UNMAP_ADDR\s*\(' + struct_args_pattern + r'\)', re.S), r'dma_addr_t \1'), - (KernRe(r'DEFINE_DMA_UNMAP_LEN\s*\(' + struct_args_pattern + r'\)', re.S), r'__u32 \1'), -] -# -# Regexes here are guaranteed to have the end limiter matching -# the start delimiter. Yet, right now, only one replace group -# is allowed. -# -struct_nested_prefixes = [ - (re.compile(r'\bSTRUCT_GROUP\('), r'\1'), -] - -# -# Transforms for function prototypes -# -function_xforms = [ - (KernRe(r"^static +"), ""), - (KernRe(r"^extern +"), ""), - (KernRe(r"^asmlinkage +"), ""), - (KernRe(r"^inline +"), ""), - (KernRe(r"^__inline__ +"), ""), - (KernRe(r"^__inline +"), ""), - (KernRe(r"^__always_inline +"), ""), - (KernRe(r"^noinline +"), ""), - (KernRe(r"^__FORTIFY_INLINE +"), ""), - (KernRe(r"__init +"), ""), - (KernRe(r"__init_or_module +"), ""), - (KernRe(r"__deprecated +"), ""), - (KernRe(r"__flatten +"), ""), - (KernRe(r"__meminit +"), ""), - (KernRe(r"__must_check +"), ""), - (KernRe(r"__weak +"), ""), - (KernRe(r"__sched +"), ""), - (KernRe(r"_noprof"), ""), - (KernRe(r"__printf\s*\(\s*\d*\s*,\s*\d*\s*\) +"), ""), - (KernRe(r"__(?:re)?alloc_size\s*\(\s*\d+\s*(?:,\s*\d+\s*)?\) +"), ""), - (KernRe(r"__diagnose_as\s*\(\s*\S+\s*(?:,\s*\d+\s*)*\) +"), ""), - (KernRe(r"DECL_BUCKET_PARAMS\s*\(\s*(\S+)\s*,\s*(\S+)\s*\)"), r"\1, \2"), - (KernRe(r"__attribute_const__ +"), ""), - (KernRe(r"__attribute__\s*\(\((?:[\w\s]+(?:\([^)]*\))?\s*,?)+\)\)\s+"), ""), -] - -# -# Apply a set of transforms to a block of text. -# -def apply_transforms(xforms, text): - for search, subst in xforms: - text = search.sub(subst, text) - return text - -# -# A little helper to get rid of excess white space -# -multi_space = KernRe(r'\s\s+') -def trim_whitespace(s): - return multi_space.sub(' ', s.strip()) - -# -# Remove struct/enum members that have been marked "private". -# -def trim_private_members(text): - # - # First look for a "public:" block that ends a private region, then - # handle the "private until the end" case. - # - text = KernRe(r'/\*\s*private:.*?/\*\s*public:.*?\*/', flags=re.S).sub('', text) - text = KernRe(r'/\*\s*private:.*', flags=re.S).sub('', text) - # - # We needed the comments to do the above, but now we can take them out. - # - return KernRe(r'\s*/\*.*?\*/\s*', flags=re.S).sub('', text).strip() - -class state: - """ - State machine enums - """ - - # Parser states - NORMAL = 0 # normal code - NAME = 1 # looking for function name - DECLARATION = 2 # We have seen a declaration which might not be done - BODY = 3 # the body of the comment - SPECIAL_SECTION = 4 # doc section ending with a blank line - PROTO = 5 # scanning prototype - DOCBLOCK = 6 # documentation block - INLINE_NAME = 7 # gathering doc outside main block - INLINE_TEXT = 8 # reading the body of inline docs - - name = [ - "NORMAL", - "NAME", - "DECLARATION", - "BODY", - "SPECIAL_SECTION", - "PROTO", - "DOCBLOCK", - "INLINE_NAME", - "INLINE_TEXT", - ] - - -SECTION_DEFAULT = "Description" # default section - -class KernelEntry: - - def __init__(self, config, fname, ln): - self.config = config - self.fname = fname - - self._contents = [] - self.prototype = "" - - self.warnings = [] - - self.parameterlist = [] - self.parameterdescs = {} - self.parametertypes = {} - self.parameterdesc_start_lines = {} - - self.section_start_lines = {} - self.sections = {} - - self.anon_struct_union = False - - self.leading_space = None - - self.fname = fname - - # State flags - self.brcount = 0 - self.declaration_start_line = ln + 1 - - # - # Management of section contents - # - def add_text(self, text): - self._contents.append(text) - - def contents(self): - return '\n'.join(self._contents) + '\n' - - # TODO: rename to emit_message after removal of kernel-doc.pl - def emit_msg(self, ln, msg, *, warning=True): - """Emit a message""" - - log_msg = f"{self.fname}:{ln} {msg}" - - if not warning: - self.config.log.info(log_msg) - return - - # Delegate warning output to output logic, as this way it - # will report warnings/info only for symbols that are output - - self.warnings.append(log_msg) - return - - # - # Begin a new section. - # - def begin_section(self, line_no, title = SECTION_DEFAULT, dump = False): - if dump: - self.dump_section(start_new = True) - self.section = title - self.new_start_line = line_no - - def dump_section(self, start_new=True): - """ - Dumps section contents to arrays/hashes intended for that purpose. - """ - # - # If we have accumulated no contents in the default ("description") - # section, don't bother. - # - if self.section == SECTION_DEFAULT and not self._contents: - return - name = self.section - contents = self.contents() - - if type_param.match(name): - name = type_param.group(1) - - self.parameterdescs[name] = contents - self.parameterdesc_start_lines[name] = self.new_start_line - - self.new_start_line = 0 - - else: - if name in self.sections and self.sections[name] != "": - # Only warn on user-specified duplicate section names - if name != SECTION_DEFAULT: - self.emit_msg(self.new_start_line, - f"duplicate section name '{name}'") - # Treat as a new paragraph - add a blank line - self.sections[name] += '\n' + contents - else: - self.sections[name] = contents - self.section_start_lines[name] = self.new_start_line - self.new_start_line = 0 - -# self.config.log.debug("Section: %s : %s", name, pformat(vars(self))) - - if start_new: - self.section = SECTION_DEFAULT - self._contents = [] - -python_warning = False - -class KernelDoc: - """ - Read a C language source or header FILE and extract embedded - documentation comments. - """ - - # Section names - - section_context = "Context" - section_return = "Return" - - undescribed = "-- undescribed --" - - def __init__(self, config, fname): - """Initialize internal variables""" - - self.fname = fname - self.config = config - - # Initial state for the state machines - self.state = state.NORMAL - - # Store entry currently being processed - self.entry = None - - # Place all potential outputs into an array - self.entries = [] - - # - # We need Python 3.7 for its "dicts remember the insertion - # order" guarantee - # - global python_warning - if (not python_warning and - sys.version_info.major == 3 and sys.version_info.minor < 7): - - self.emit_msg(0, - 'Python 3.7 or later is required for correct results') - python_warning = True - - def emit_msg(self, ln, msg, *, warning=True): - """Emit a message""" - - if self.entry: - self.entry.emit_msg(ln, msg, warning=warning) - return - - log_msg = f"{self.fname}:{ln} {msg}" - - if warning: - self.config.log.warning(log_msg) - else: - self.config.log.info(log_msg) - - def dump_section(self, start_new=True): - """ - Dumps section contents to arrays/hashes intended for that purpose. - """ - - if self.entry: - self.entry.dump_section(start_new) - - # TODO: rename it to store_declaration after removal of kernel-doc.pl - def output_declaration(self, dtype, name, **args): - """ - Stores the entry into an entry array. - - The actual output and output filters will be handled elsewhere - """ - - item = KdocItem(name, self.fname, dtype, - self.entry.declaration_start_line, **args) - item.warnings = self.entry.warnings - - # Drop empty sections - # TODO: improve empty sections logic to emit warnings - sections = self.entry.sections - for section in ["Description", "Return"]: - if section in sections and not sections[section].rstrip(): - del sections[section] - item.set_sections(sections, self.entry.section_start_lines) - item.set_params(self.entry.parameterlist, self.entry.parameterdescs, - self.entry.parametertypes, - self.entry.parameterdesc_start_lines) - self.entries.append(item) - - self.config.log.debug("Output: %s:%s = %s", dtype, name, pformat(args)) - - def reset_state(self, ln): - """ - Ancillary routine to create a new entry. It initializes all - variables used by the state machine. - """ - - # - # Flush the warnings out before we proceed further - # - if self.entry and self.entry not in self.entries: - for log_msg in self.entry.warnings: - self.config.log.warning(log_msg) - - self.entry = KernelEntry(self.config, self.fname, ln) - - # State flags - self.state = state.NORMAL - - def push_parameter(self, ln, decl_type, param, dtype, - org_arg, declaration_name): - """ - Store parameters and their descriptions at self.entry. - """ - - if self.entry.anon_struct_union and dtype == "" and param == "}": - return # Ignore the ending }; from anonymous struct/union - - self.entry.anon_struct_union = False - - param = KernRe(r'[\[\)].*').sub('', param, count=1) - - # - # Look at various "anonymous type" cases. - # - if dtype == '': - if param.endswith("..."): - if len(param) > 3: # there is a name provided, use that - param = param[:-3] - if not self.entry.parameterdescs.get(param): - self.entry.parameterdescs[param] = "variable arguments" - - elif (not param) or param == "void": - param = "void" - self.entry.parameterdescs[param] = "no arguments" - - elif param in ["struct", "union"]: - # Handle unnamed (anonymous) union or struct - dtype = param - param = "{unnamed_" + param + "}" - self.entry.parameterdescs[param] = "anonymous\n" - self.entry.anon_struct_union = True - - # Warn if parameter has no description - # (but ignore ones starting with # as these are not parameters - # but inline preprocessor statements) - if param not in self.entry.parameterdescs and not param.startswith("#"): - self.entry.parameterdescs[param] = self.undescribed - - if "." not in param: - if decl_type == 'function': - dname = f"{decl_type} parameter" - else: - dname = f"{decl_type} member" - - self.emit_msg(ln, - f"{dname} '{param}' not described in '{declaration_name}'") - - # Strip spaces from param so that it is one continuous string on - # parameterlist. This fixes a problem where check_sections() - # cannot find a parameter like "addr[6 + 2]" because it actually - # appears as "addr[6", "+", "2]" on the parameter list. - # However, it's better to maintain the param string unchanged for - # output, so just weaken the string compare in check_sections() - # to ignore "[blah" in a parameter string. - - self.entry.parameterlist.append(param) - org_arg = KernRe(r'\s\s+').sub(' ', org_arg) - self.entry.parametertypes[param] = org_arg - - - def create_parameter_list(self, ln, decl_type, args, - splitter, declaration_name): - """ - Creates a list of parameters, storing them at self.entry. - """ - - # temporarily replace all commas inside function pointer definition - arg_expr = KernRe(r'(\([^\),]+),') - while arg_expr.search(args): - args = arg_expr.sub(r"\1#", args) - - for arg in args.split(splitter): - # Ignore argument attributes - arg = KernRe(r'\sPOS0?\s').sub(' ', arg) - - # Strip leading/trailing spaces - arg = arg.strip() - arg = KernRe(r'\s+').sub(' ', arg, count=1) - - if arg.startswith('#'): - # Treat preprocessor directive as a typeless variable just to fill - # corresponding data structures "correctly". Catch it later in - # output_* subs. - - # Treat preprocessor directive as a typeless variable - self.push_parameter(ln, decl_type, arg, "", - "", declaration_name) - # - # The pointer-to-function case. - # - elif KernRe(r'\(.+\)\s*\(').search(arg): - arg = arg.replace('#', ',') - r = KernRe(r'[^\(]+\(\*?\s*' # Everything up to "(*" - r'([\w\[\].]*)' # Capture the name and possible [array] - r'\s*\)') # Make sure the trailing ")" is there - if r.match(arg): - param = r.group(1) - else: - self.emit_msg(ln, f"Invalid param: {arg}") - param = arg - dtype = arg.replace(param, '') - self.push_parameter(ln, decl_type, param, dtype, arg, declaration_name) - # - # The array-of-pointers case. Dig the parameter name out from the middle - # of the declaration. - # - elif KernRe(r'\(.+\)\s*\[').search(arg): - r = KernRe(r'[^\(]+\(\s*\*\s*' # Up to "(" and maybe "*" - r'([\w.]*?)' # The actual pointer name - r'\s*(\[\s*\w+\s*\]\s*)*\)') # The [array portion] - if r.match(arg): - param = r.group(1) - else: - self.emit_msg(ln, f"Invalid param: {arg}") - param = arg - dtype = arg.replace(param, '') - self.push_parameter(ln, decl_type, param, dtype, arg, declaration_name) - elif arg: - # - # Clean up extraneous spaces and split the string at commas; the first - # element of the resulting list will also include the type information. - # - arg = KernRe(r'\s*:\s*').sub(":", arg) - arg = KernRe(r'\s*\[').sub('[', arg) - args = KernRe(r'\s*,\s*').split(arg) - args[0] = re.sub(r'(\*+)\s*', r' \1', args[0]) - # - # args[0] has a string of "type a". If "a" includes an [array] - # declaration, we want to not be fooled by any white space inside - # the brackets, so detect and handle that case specially. - # - r = KernRe(r'^([^[\]]*\s+)(.*)$') - if r.match(args[0]): - args[0] = r.group(2) - dtype = r.group(1) - else: - # No space in args[0]; this seems wrong but preserves previous behavior - dtype = '' - - bitfield_re = KernRe(r'(.*?):(\w+)') - for param in args: - # - # For pointers, shift the star(s) from the variable name to the - # type declaration. - # - r = KernRe(r'^(\*+)\s*(.*)') - if r.match(param): - self.push_parameter(ln, decl_type, r.group(2), - f"{dtype} {r.group(1)}", - arg, declaration_name) - # - # Perform a similar shift for bitfields. - # - elif bitfield_re.search(param): - if dtype != "": # Skip unnamed bit-fields - self.push_parameter(ln, decl_type, bitfield_re.group(1), - f"{dtype}:{bitfield_re.group(2)}", - arg, declaration_name) - else: - self.push_parameter(ln, decl_type, param, dtype, - arg, declaration_name) - - def check_sections(self, ln, decl_name, decl_type): - """ - Check for errors inside sections, emitting warnings if not found - parameters are described. - """ - for section in self.entry.sections: - if section not in self.entry.parameterlist and \ - not known_sections.search(section): - if decl_type == 'function': - dname = f"{decl_type} parameter" - else: - dname = f"{decl_type} member" - self.emit_msg(ln, - f"Excess {dname} '{section}' description in '{decl_name}'") - - def check_return_section(self, ln, declaration_name, return_type): - """ - If the function doesn't return void, warns about the lack of a - return description. - """ - - if not self.config.wreturn: - return - - # Ignore an empty return type (It's a macro) - # Ignore functions with a "void" return type (but not "void *") - if not return_type or KernRe(r'void\s*\w*\s*$').search(return_type): - return - - if not self.entry.sections.get("Return", None): - self.emit_msg(ln, - f"No description found for return value of '{declaration_name}'") - - # - # Split apart a structure prototype; returns (struct|union, name, members) or None - # - def split_struct_proto(self, proto): - type_pattern = r'(struct|union)' - qualifiers = [ - "__attribute__", - "__packed", - "__aligned", - "____cacheline_aligned_in_smp", - "____cacheline_aligned", - ] - definition_body = r'\{(.*)\}\s*' + "(?:" + '|'.join(qualifiers) + ")?" - - r = KernRe(type_pattern + r'\s+(\w+)\s*' + definition_body) - if r.search(proto): - return (r.group(1), r.group(2), r.group(3)) - else: - r = KernRe(r'typedef\s+' + type_pattern + r'\s*' + definition_body + r'\s*(\w+)\s*;') - if r.search(proto): - return (r.group(1), r.group(3), r.group(2)) - return None - # - # Rewrite the members of a structure or union for easier formatting later on. - # Among other things, this function will turn a member like: - # - # struct { inner_members; } foo; - # - # into: - # - # struct foo; inner_members; - # - def rewrite_struct_members(self, members): - # - # Process struct/union members from the most deeply nested outward. The - # trick is in the ^{ below - it prevents a match of an outer struct/union - # until the inner one has been munged (removing the "{" in the process). - # - struct_members = KernRe(r'(struct|union)' # 0: declaration type - r'([^\{\};]+)' # 1: possible name - r'(\{)' - r'([^\{\}]*)' # 3: Contents of declaration - r'(\})' - r'([^\{\};]*)(;)') # 5: Remaining stuff after declaration - tuples = struct_members.findall(members) - while tuples: - for t in tuples: - newmember = "" - oldmember = "".join(t) # Reconstruct the original formatting - dtype, name, lbr, content, rbr, rest, semi = t - # - # Pass through each field name, normalizing the form and formatting. - # - for s_id in rest.split(','): - s_id = s_id.strip() - newmember += f"{dtype} {s_id}; " - # - # Remove bitfield/array/pointer info, getting the bare name. - # - s_id = KernRe(r'[:\[].*').sub('', s_id) - s_id = KernRe(r'^\s*\**(\S+)\s*').sub(r'\1', s_id) - # - # Pass through the members of this inner structure/union. - # - for arg in content.split(';'): - arg = arg.strip() - # - # Look for (type)(*name)(args) - pointer to function - # - r = KernRe(r'^([^\(]+\(\*?\s*)([\w.]*)(\s*\).*)') - if r.match(arg): - dtype, name, extra = r.group(1), r.group(2), r.group(3) - # Pointer-to-function - if not s_id: - # Anonymous struct/union - newmember += f"{dtype}{name}{extra}; " - else: - newmember += f"{dtype}{s_id}.{name}{extra}; " - # - # Otherwise a non-function member. - # - else: - # - # Remove bitmap and array portions and spaces around commas - # - arg = KernRe(r':\s*\d+\s*').sub('', arg) - arg = KernRe(r'\[.*\]').sub('', arg) - arg = KernRe(r'\s*,\s*').sub(',', arg) - # - # Look for a normal decl - "type name[,name...]" - # - r = KernRe(r'(.*)\s+([\S+,]+)') - if r.search(arg): - for name in r.group(2).split(','): - name = KernRe(r'^\s*\**(\S+)\s*').sub(r'\1', name) - if not s_id: - # Anonymous struct/union - newmember += f"{r.group(1)} {name}; " - else: - newmember += f"{r.group(1)} {s_id}.{name}; " - else: - newmember += f"{arg}; " - # - # At the end of the s_id loop, replace the original declaration with - # the munged version. - # - members = members.replace(oldmember, newmember) - # - # End of the tuple loop - search again and see if there are outer members - # that now turn up. - # - tuples = struct_members.findall(members) - return members - - # - # Format the struct declaration into a standard form for inclusion in the - # resulting docs. - # - def format_struct_decl(self, declaration): - # - # Insert newlines, get rid of extra spaces. - # - declaration = KernRe(r'([\{;])').sub(r'\1\n', declaration) - declaration = KernRe(r'\}\s+;').sub('};', declaration) - # - # Format inline enums with each member on its own line. - # - r = KernRe(r'(enum\s+\{[^\}]+),([^\n])') - while r.search(declaration): - declaration = r.sub(r'\1,\n\2', declaration) - # - # Now go through and supply the right number of tabs - # for each line. - # - def_args = declaration.split('\n') - level = 1 - declaration = "" - for clause in def_args: - clause = KernRe(r'\s+').sub(' ', clause.strip(), count=1) - if clause: - if '}' in clause and level > 1: - level -= 1 - if not clause.startswith('#'): - declaration += "\t" * level - declaration += "\t" + clause + "\n" - if "{" in clause and "}" not in clause: - level += 1 - return declaration - - - def dump_struct(self, ln, proto): - """ - Store an entry for an struct or union - """ - # - # Do the basic parse to get the pieces of the declaration. - # - struct_parts = self.split_struct_proto(proto) - if not struct_parts: - self.emit_msg(ln, f"{proto} error: Cannot parse struct or union!") - return - decl_type, declaration_name, members = struct_parts - - if self.entry.identifier != declaration_name: - self.emit_msg(ln, f"expecting prototype for {decl_type} {self.entry.identifier}. " - f"Prototype was for {decl_type} {declaration_name} instead\n") - return - # - # Go through the list of members applying all of our transformations. - # - members = trim_private_members(members) - members = apply_transforms(struct_xforms, members) - - nested = NestedMatch() - for search, sub in struct_nested_prefixes: - members = nested.sub(search, sub, members) - # - # Deal with embedded struct and union members, and drop enums entirely. - # - declaration = members - members = self.rewrite_struct_members(members) - members = re.sub(r'(\{[^\{\}]*\})', '', members) - # - # Output the result and we are done. - # - self.create_parameter_list(ln, decl_type, members, ';', - declaration_name) - self.check_sections(ln, declaration_name, decl_type) - self.output_declaration(decl_type, declaration_name, - definition=self.format_struct_decl(declaration), - purpose=self.entry.declaration_purpose) - - def dump_enum(self, ln, proto): - """ - Stores an enum inside self.entries array. - """ - # - # Strip preprocessor directives. Note that this depends on the - # trailing semicolon we added in process_proto_type(). - # - proto = KernRe(r'#\s*((define|ifdef|if)\s+|endif)[^;]*;', flags=re.S).sub('', proto) - # - # Parse out the name and members of the enum. Typedef form first. - # - r = KernRe(r'typedef\s+enum\s*\{(.*)\}\s*(\w*)\s*;') - if r.search(proto): - declaration_name = r.group(2) - members = trim_private_members(r.group(1)) - # - # Failing that, look for a straight enum - # - else: - r = KernRe(r'enum\s+(\w*)\s*\{(.*)\}') - if r.match(proto): - declaration_name = r.group(1) - members = trim_private_members(r.group(2)) - # - # OK, this isn't going to work. - # - else: - self.emit_msg(ln, f"{proto}: error: Cannot parse enum!") - return - # - # Make sure we found what we were expecting. - # - if self.entry.identifier != declaration_name: - if self.entry.identifier == "": - self.emit_msg(ln, - f"{proto}: wrong kernel-doc identifier on prototype") - else: - self.emit_msg(ln, - f"expecting prototype for enum {self.entry.identifier}. " - f"Prototype was for enum {declaration_name} instead") - return - - if not declaration_name: - declaration_name = "(anonymous)" - # - # Parse out the name of each enum member, and verify that we - # have a description for it. - # - member_set = set() - members = KernRe(r'\([^;)]*\)').sub('', members) - for arg in members.split(','): - if not arg: - continue - arg = KernRe(r'^\s*(\w+).*').sub(r'\1', arg) - self.entry.parameterlist.append(arg) - if arg not in self.entry.parameterdescs: - self.entry.parameterdescs[arg] = self.undescribed - self.emit_msg(ln, - f"Enum value '{arg}' not described in enum '{declaration_name}'") - member_set.add(arg) - # - # Ensure that every described member actually exists in the enum. - # - for k in self.entry.parameterdescs: - if k not in member_set: - self.emit_msg(ln, - f"Excess enum value '%{k}' description in '{declaration_name}'") - - self.output_declaration('enum', declaration_name, - purpose=self.entry.declaration_purpose) - - def dump_declaration(self, ln, prototype): - """ - Stores a data declaration inside self.entries array. - """ - - if self.entry.decl_type == "enum": - self.dump_enum(ln, prototype) - elif self.entry.decl_type == "typedef": - self.dump_typedef(ln, prototype) - elif self.entry.decl_type in ["union", "struct"]: - self.dump_struct(ln, prototype) - else: - # This would be a bug - self.emit_message(ln, f'Unknown declaration type: {self.entry.decl_type}') - - def dump_function(self, ln, prototype): - """ - Stores a function of function macro inside self.entries array. - """ - - found = func_macro = False - return_type = '' - decl_type = 'function' - # - # Apply the initial transformations. - # - prototype = apply_transforms(function_xforms, prototype) - # - # If we have a macro, remove the "#define" at the front. - # - new_proto = KernRe(r"^#\s*define\s+").sub("", prototype) - if new_proto != prototype: - prototype = new_proto - # - # Dispense with the simple "#define A B" case here; the key - # is the space after the name of the symbol being defined. - # NOTE that the seemingly misnamed "func_macro" indicates a - # macro *without* arguments. - # - r = KernRe(r'^(\w+)\s+') - if r.search(prototype): - return_type = '' - declaration_name = r.group(1) - func_macro = True - found = True - - # Yes, this truly is vile. We are looking for: - # 1. Return type (may be nothing if we're looking at a macro) - # 2. Function name - # 3. Function parameters. - # - # All the while we have to watch out for function pointer parameters - # (which IIRC is what the two sections are for), C types (these - # regexps don't even start to express all the possibilities), and - # so on. - # - # If you mess with these regexps, it's a good idea to check that - # the following functions' documentation still comes out right: - # - parport_register_device (function pointer parameters) - # - atomic_set (macro) - # - pci_match_device, __copy_to_user (long return type) - - name = r'\w+' - type1 = r'(?:[\w\s]+)?' - type2 = r'(?:[\w\s]+\*+)+' - # - # Attempt to match first on (args) with no internal parentheses; this - # lets us easily filter out __acquires() and other post-args stuff. If - # that fails, just grab the rest of the line to the last closing - # parenthesis. - # - proto_args = r'\(([^\(]*|.*)\)' - # - # (Except for the simple macro case) attempt to split up the prototype - # in the various ways we understand. - # - if not found: - patterns = [ - rf'^()({name})\s*{proto_args}', - rf'^({type1})\s+({name})\s*{proto_args}', - rf'^({type2})\s*({name})\s*{proto_args}', - ] - - for p in patterns: - r = KernRe(p) - if r.match(prototype): - return_type = r.group(1) - declaration_name = r.group(2) - args = r.group(3) - self.create_parameter_list(ln, decl_type, args, ',', - declaration_name) - found = True - break - # - # Parsing done; make sure that things are as we expect. - # - if not found: - self.emit_msg(ln, - f"cannot understand function prototype: '{prototype}'") - return - if self.entry.identifier != declaration_name: - self.emit_msg(ln, f"expecting prototype for {self.entry.identifier}(). " - f"Prototype was for {declaration_name}() instead") - return - self.check_sections(ln, declaration_name, "function") - self.check_return_section(ln, declaration_name, return_type) - # - # Store the result. - # - self.output_declaration(decl_type, declaration_name, - typedef=('typedef' in return_type), - functiontype=return_type, - purpose=self.entry.declaration_purpose, - func_macro=func_macro) - - - def dump_typedef(self, ln, proto): - """ - Stores a typedef inside self.entries array. - """ - # - # We start by looking for function typedefs. - # - typedef_type = r'typedef((?:\s+[\w*]+\b){0,7}\s+(?:\w+\b|\*+))\s*' - typedef_ident = r'\*?\s*(\w\S+)\s*' - typedef_args = r'\s*\((.*)\);' - - typedef1 = KernRe(typedef_type + r'\(' + typedef_ident + r'\)' + typedef_args) - typedef2 = KernRe(typedef_type + typedef_ident + typedef_args) - - # Parse function typedef prototypes - for r in [typedef1, typedef2]: - if not r.match(proto): - continue - - return_type = r.group(1).strip() - declaration_name = r.group(2) - args = r.group(3) - - if self.entry.identifier != declaration_name: - self.emit_msg(ln, - f"expecting prototype for typedef {self.entry.identifier}. Prototype was for typedef {declaration_name} instead\n") - return - - self.create_parameter_list(ln, 'function', args, ',', declaration_name) - - self.output_declaration('function', declaration_name, - typedef=True, - functiontype=return_type, - purpose=self.entry.declaration_purpose) - return - # - # Not a function, try to parse a simple typedef. - # - r = KernRe(r'typedef.*\s+(\w+)\s*;') - if r.match(proto): - declaration_name = r.group(1) - - if self.entry.identifier != declaration_name: - self.emit_msg(ln, - f"expecting prototype for typedef {self.entry.identifier}. Prototype was for typedef {declaration_name} instead\n") - return - - self.output_declaration('typedef', declaration_name, - purpose=self.entry.declaration_purpose) - return - - self.emit_msg(ln, "error: Cannot parse typedef!") - - @staticmethod - def process_export(function_set, line): - """ - process EXPORT_SYMBOL* tags - - This method doesn't use any variable from the class, so declare it - with a staticmethod decorator. - """ - - # We support documenting some exported symbols with different - # names. A horrible hack. - suffixes = [ '_noprof' ] - - # Note: it accepts only one EXPORT_SYMBOL* per line, as having - # multiple export lines would violate Kernel coding style. - - if export_symbol.search(line): - symbol = export_symbol.group(2) - elif export_symbol_ns.search(line): - symbol = export_symbol_ns.group(2) - else: - return False - # - # Found an export, trim out any special suffixes - # - for suffix in suffixes: - # Be backward compatible with Python < 3.9 - if symbol.endswith(suffix): - symbol = symbol[:-len(suffix)] - function_set.add(symbol) - return True - - def process_normal(self, ln, line): - """ - STATE_NORMAL: looking for the /** to begin everything. - """ - - if not doc_start.match(line): - return - - # start a new entry - self.reset_state(ln) - - # next line is always the function name - self.state = state.NAME - - def process_name(self, ln, line): - """ - STATE_NAME: Looking for the "name - description" line - """ - # - # Check for a DOC: block and handle them specially. - # - if doc_block.search(line): - - if not doc_block.group(1): - self.entry.begin_section(ln, "Introduction") - else: - self.entry.begin_section(ln, doc_block.group(1)) - - self.entry.identifier = self.entry.section - self.state = state.DOCBLOCK - # - # Otherwise we're looking for a normal kerneldoc declaration line. - # - elif doc_decl.search(line): - self.entry.identifier = doc_decl.group(1) - - # Test for data declaration - if doc_begin_data.search(line): - self.entry.decl_type = doc_begin_data.group(1) - self.entry.identifier = doc_begin_data.group(2) - # - # Look for a function description - # - elif doc_begin_func.search(line): - self.entry.identifier = doc_begin_func.group(1) - self.entry.decl_type = "function" - # - # We struck out. - # - else: - self.emit_msg(ln, - f"This comment starts with '/**', but isn't a kernel-doc comment. Refer Documentation/doc-guide/kernel-doc.rst\n{line}") - self.state = state.NORMAL - return - # - # OK, set up for a new kerneldoc entry. - # - self.state = state.BODY - self.entry.identifier = self.entry.identifier.strip(" ") - # if there's no @param blocks need to set up default section here - self.entry.begin_section(ln + 1) - # - # Find the description portion, which *should* be there but - # isn't always. - # (We should be able to capture this from the previous parsing - someday) - # - r = KernRe("[-:](.*)") - if r.search(line): - self.entry.declaration_purpose = trim_whitespace(r.group(1)) - self.state = state.DECLARATION - else: - self.entry.declaration_purpose = "" - - if not self.entry.declaration_purpose and self.config.wshort_desc: - self.emit_msg(ln, - f"missing initial short description on line:\n{line}") - - if not self.entry.identifier and self.entry.decl_type != "enum": - self.emit_msg(ln, - f"wrong kernel-doc identifier on line:\n{line}") - self.state = state.NORMAL - - if self.config.verbose: - self.emit_msg(ln, - f"Scanning doc for {self.entry.decl_type} {self.entry.identifier}", - warning=False) - # - # Failed to find an identifier. Emit a warning - # - else: - self.emit_msg(ln, f"Cannot find identifier on line:\n{line}") - - # - # Helper function to determine if a new section is being started. - # - def is_new_section(self, ln, line): - if doc_sect.search(line): - self.state = state.BODY - # - # Pick out the name of our new section, tweaking it if need be. - # - newsection = doc_sect.group(1) - if newsection.lower() == 'description': - newsection = 'Description' - elif newsection.lower() == 'context': - newsection = 'Context' - self.state = state.SPECIAL_SECTION - elif newsection.lower() in ["@return", "@returns", - "return", "returns"]: - newsection = "Return" - self.state = state.SPECIAL_SECTION - elif newsection[0] == '@': - self.state = state.SPECIAL_SECTION - # - # Initialize the contents, and get the new section going. - # - newcontents = doc_sect.group(2) - if not newcontents: - newcontents = "" - self.dump_section() - self.entry.begin_section(ln, newsection) - self.entry.leading_space = None - - self.entry.add_text(newcontents.lstrip()) - return True - return False - - # - # Helper function to detect (and effect) the end of a kerneldoc comment. - # - def is_comment_end(self, ln, line): - if doc_end.search(line): - self.dump_section() - - # Look for doc_com + + doc_end: - r = KernRe(r'\s*\*\s*[a-zA-Z_0-9:.]+\*/') - if r.match(line): - self.emit_msg(ln, f"suspicious ending line: {line}") - - self.entry.prototype = "" - self.entry.new_start_line = ln + 1 - - self.state = state.PROTO - return True - return False - - - def process_decl(self, ln, line): - """ - STATE_DECLARATION: We've seen the beginning of a declaration - """ - if self.is_new_section(ln, line) or self.is_comment_end(ln, line): - return - # - # Look for anything with the " * " line beginning. - # - if doc_content.search(line): - cont = doc_content.group(1) - # - # A blank line means that we have moved out of the declaration - # part of the comment (without any "special section" parameter - # descriptions). - # - if cont == "": - self.state = state.BODY - # - # Otherwise we have more of the declaration section to soak up. - # - else: - self.entry.declaration_purpose = \ - trim_whitespace(self.entry.declaration_purpose + ' ' + cont) - else: - # Unknown line, ignore - self.emit_msg(ln, f"bad line: {line}") - - - def process_special(self, ln, line): - """ - STATE_SPECIAL_SECTION: a section ending with a blank line - """ - # - # If we have hit a blank line (only the " * " marker), then this - # section is done. - # - if KernRe(r"\s*\*\s*$").match(line): - self.entry.begin_section(ln, dump = True) - self.state = state.BODY - return - # - # Not a blank line, look for the other ways to end the section. - # - if self.is_new_section(ln, line) or self.is_comment_end(ln, line): - return - # - # OK, we should have a continuation of the text for this section. - # - if doc_content.search(line): - cont = doc_content.group(1) - # - # If the lines of text after the first in a special section have - # leading white space, we need to trim it out or Sphinx will get - # confused. For the second line (the None case), see what we - # find there and remember it. - # - if self.entry.leading_space is None: - r = KernRe(r'^(\s+)') - if r.match(cont): - self.entry.leading_space = len(r.group(1)) - else: - self.entry.leading_space = 0 - # - # Otherwise, before trimming any leading chars, be *sure* - # that they are white space. We should maybe warn if this - # isn't the case. - # - for i in range(0, self.entry.leading_space): - if cont[i] != " ": - self.entry.leading_space = i - break - # - # Add the trimmed result to the section and we're done. - # - self.entry.add_text(cont[self.entry.leading_space:]) - else: - # Unknown line, ignore - self.emit_msg(ln, f"bad line: {line}") - - def process_body(self, ln, line): - """ - STATE_BODY: the bulk of a kerneldoc comment. - """ - if self.is_new_section(ln, line) or self.is_comment_end(ln, line): - return - - if doc_content.search(line): - cont = doc_content.group(1) - self.entry.add_text(cont) - else: - # Unknown line, ignore - self.emit_msg(ln, f"bad line: {line}") - - def process_inline_name(self, ln, line): - """STATE_INLINE_NAME: beginning of docbook comments within a prototype.""" - - if doc_inline_sect.search(line): - self.entry.begin_section(ln, doc_inline_sect.group(1)) - self.entry.add_text(doc_inline_sect.group(2).lstrip()) - self.state = state.INLINE_TEXT - elif doc_inline_end.search(line): - self.dump_section() - self.state = state.PROTO - elif doc_content.search(line): - self.emit_msg(ln, f"Incorrect use of kernel-doc format: {line}") - self.state = state.PROTO - # else ... ?? - - def process_inline_text(self, ln, line): - """STATE_INLINE_TEXT: docbook comments within a prototype.""" - - if doc_inline_end.search(line): - self.dump_section() - self.state = state.PROTO - elif doc_content.search(line): - self.entry.add_text(doc_content.group(1)) - # else ... ?? - - def syscall_munge(self, ln, proto): # pylint: disable=W0613 - """ - Handle syscall definitions - """ - - is_void = False - - # Strip newlines/CR's - proto = re.sub(r'[\r\n]+', ' ', proto) - - # Check if it's a SYSCALL_DEFINE0 - if 'SYSCALL_DEFINE0' in proto: - is_void = True - - # Replace SYSCALL_DEFINE with correct return type & function name - proto = KernRe(r'SYSCALL_DEFINE.*\(').sub('long sys_', proto) - - r = KernRe(r'long\s+(sys_.*?),') - if r.search(proto): - proto = KernRe(',').sub('(', proto, count=1) - elif is_void: - proto = KernRe(r'\)').sub('(void)', proto, count=1) - - # Now delete all of the odd-numbered commas in the proto - # so that argument types & names don't have a comma between them - count = 0 - length = len(proto) - - if is_void: - length = 0 # skip the loop if is_void - - for ix in range(length): - if proto[ix] == ',': - count += 1 - if count % 2 == 1: - proto = proto[:ix] + ' ' + proto[ix + 1:] - - return proto - - def tracepoint_munge(self, ln, proto): - """ - Handle tracepoint definitions - """ - - tracepointname = None - tracepointargs = None - - # Match tracepoint name based on different patterns - r = KernRe(r'TRACE_EVENT\((.*?),') - if r.search(proto): - tracepointname = r.group(1) - - r = KernRe(r'DEFINE_SINGLE_EVENT\((.*?),') - if r.search(proto): - tracepointname = r.group(1) - - r = KernRe(r'DEFINE_EVENT\((.*?),(.*?),') - if r.search(proto): - tracepointname = r.group(2) - - if tracepointname: - tracepointname = tracepointname.lstrip() - - r = KernRe(r'TP_PROTO\((.*?)\)') - if r.search(proto): - tracepointargs = r.group(1) - - if not tracepointname or not tracepointargs: - self.emit_msg(ln, - f"Unrecognized tracepoint format:\n{proto}\n") - else: - proto = f"static inline void trace_{tracepointname}({tracepointargs})" - self.entry.identifier = f"trace_{self.entry.identifier}" - - return proto - - def process_proto_function(self, ln, line): - """Ancillary routine to process a function prototype""" - - # strip C99-style comments to end of line - line = KernRe(r"//.*$", re.S).sub('', line) - # - # Soak up the line's worth of prototype text, stopping at { or ; if present. - # - if KernRe(r'\s*#\s*define').match(line): - self.entry.prototype = line - elif not line.startswith('#'): # skip other preprocessor stuff - r = KernRe(r'([^\{]*)') - if r.match(line): - self.entry.prototype += r.group(1) + " " - # - # If we now have the whole prototype, clean it up and declare victory. - # - if '{' in line or ';' in line or KernRe(r'\s*#\s*define').match(line): - # strip comments and surrounding spaces - self.entry.prototype = KernRe(r'/\*.*\*/').sub('', self.entry.prototype).strip() - # - # Handle self.entry.prototypes for function pointers like: - # int (*pcs_config)(struct foo) - # by turning it into - # int pcs_config(struct foo) - # - r = KernRe(r'^(\S+\s+)\(\s*\*(\S+)\)') - self.entry.prototype = r.sub(r'\1\2', self.entry.prototype) - # - # Handle special declaration syntaxes - # - if 'SYSCALL_DEFINE' in self.entry.prototype: - self.entry.prototype = self.syscall_munge(ln, - self.entry.prototype) - else: - r = KernRe(r'TRACE_EVENT|DEFINE_EVENT|DEFINE_SINGLE_EVENT') - if r.search(self.entry.prototype): - self.entry.prototype = self.tracepoint_munge(ln, - self.entry.prototype) - # - # ... and we're done - # - self.dump_function(ln, self.entry.prototype) - self.reset_state(ln) - - def process_proto_type(self, ln, line): - """Ancillary routine to process a type""" - - # Strip C99-style comments and surrounding whitespace - line = KernRe(r"//.*$", re.S).sub('', line).strip() - if not line: - return # nothing to see here - - # To distinguish preprocessor directive from regular declaration later. - if line.startswith('#'): - line += ";" - # - # Split the declaration on any of { } or ;, and accumulate pieces - # until we hit a semicolon while not inside {brackets} - # - r = KernRe(r'(.*?)([{};])') - for chunk in r.split(line): - if chunk: # Ignore empty matches - self.entry.prototype += chunk - # - # This cries out for a match statement ... someday after we can - # drop Python 3.9 ... - # - if chunk == '{': - self.entry.brcount += 1 - elif chunk == '}': - self.entry.brcount -= 1 - elif chunk == ';' and self.entry.brcount <= 0: - self.dump_declaration(ln, self.entry.prototype) - self.reset_state(ln) - return - # - # We hit the end of the line while still in the declaration; put - # in a space to represent the newline. - # - self.entry.prototype += ' ' - - def process_proto(self, ln, line): - """STATE_PROTO: reading a function/whatever prototype.""" - - if doc_inline_oneline.search(line): - self.entry.begin_section(ln, doc_inline_oneline.group(1)) - self.entry.add_text(doc_inline_oneline.group(2)) - self.dump_section() - - elif doc_inline_start.search(line): - self.state = state.INLINE_NAME - - elif self.entry.decl_type == 'function': - self.process_proto_function(ln, line) - - else: - self.process_proto_type(ln, line) - - def process_docblock(self, ln, line): - """STATE_DOCBLOCK: within a DOC: block.""" - - if doc_end.search(line): - self.dump_section() - self.output_declaration("doc", self.entry.identifier) - self.reset_state(ln) - - elif doc_content.search(line): - self.entry.add_text(doc_content.group(1)) - - def parse_export(self): - """ - Parses EXPORT_SYMBOL* macros from a single Kernel source file. - """ - - export_table = set() - - try: - with open(self.fname, "r", encoding="utf8", - errors="backslashreplace") as fp: - - for line in fp: - self.process_export(export_table, line) - - except IOError: - return None - - return export_table - - # - # The state/action table telling us which function to invoke in - # each state. - # - state_actions = { - state.NORMAL: process_normal, - state.NAME: process_name, - state.BODY: process_body, - state.DECLARATION: process_decl, - state.SPECIAL_SECTION: process_special, - state.INLINE_NAME: process_inline_name, - state.INLINE_TEXT: process_inline_text, - state.PROTO: process_proto, - state.DOCBLOCK: process_docblock, - } - - def parse_kdoc(self): - """ - Open and process each line of a C source file. - The parsing is controlled via a state machine, and the line is passed - to a different process function depending on the state. The process - function may update the state as needed. - - Besides parsing kernel-doc tags, it also parses export symbols. - """ - - prev = "" - prev_ln = None - export_table = set() - - try: - with open(self.fname, "r", encoding="utf8", - errors="backslashreplace") as fp: - for ln, line in enumerate(fp): - - line = line.expandtabs().strip("\n") - - # Group continuation lines on prototypes - if self.state == state.PROTO: - if line.endswith("\\"): - prev += line.rstrip("\\") - if not prev_ln: - prev_ln = ln - continue - - if prev: - ln = prev_ln - line = prev + line - prev = "" - prev_ln = None - - self.config.log.debug("%d %s: %s", - ln, state.name[self.state], - line) - - # This is an optimization over the original script. - # There, when export_file was used for the same file, - # it was read twice. Here, we use the already-existing - # loop to parse exported symbols as well. - # - if (self.state != state.NORMAL) or \ - not self.process_export(export_table, line): - # Hand this line to the appropriate state handler - self.state_actions[self.state](self, ln, line) - - except OSError: - self.config.log.error(f"Error: Cannot open file {self.fname}") - - return export_table, self.entries diff --git a/scripts/lib/kdoc/kdoc_re.py b/scripts/lib/kdoc/kdoc_re.py deleted file mode 100644 index 612223e1e723..000000000000 --- a/scripts/lib/kdoc/kdoc_re.py +++ /dev/null @@ -1,270 +0,0 @@ -#!/usr/bin/env python3 -# SPDX-License-Identifier: GPL-2.0 -# Copyright(c) 2025: Mauro Carvalho Chehab . - -""" -Regular expression ancillary classes. - -Those help caching regular expressions and do matching for kernel-doc. -""" - -import re - -# Local cache for regular expressions -re_cache = {} - - -class KernRe: - """ - Helper class to simplify regex declaration and usage, - - It calls re.compile for a given pattern. It also allows adding - regular expressions and define sub at class init time. - - Regular expressions can be cached via an argument, helping to speedup - searches. - """ - - def _add_regex(self, string, flags): - """ - Adds a new regex or re-use it from the cache. - """ - self.regex = re_cache.get(string, None) - if not self.regex: - self.regex = re.compile(string, flags=flags) - if self.cache: - re_cache[string] = self.regex - - def __init__(self, string, cache=True, flags=0): - """ - Compile a regular expression and initialize internal vars. - """ - - self.cache = cache - self.last_match = None - - self._add_regex(string, flags) - - def __str__(self): - """ - Return the regular expression pattern. - """ - return self.regex.pattern - - def __add__(self, other): - """ - Allows adding two regular expressions into one. - """ - - return KernRe(str(self) + str(other), cache=self.cache or other.cache, - flags=self.regex.flags | other.regex.flags) - - def match(self, string): - """ - Handles a re.match storing its results - """ - - self.last_match = self.regex.match(string) - return self.last_match - - def search(self, string): - """ - Handles a re.search storing its results - """ - - self.last_match = self.regex.search(string) - return self.last_match - - def findall(self, string): - """ - Alias to re.findall - """ - - return self.regex.findall(string) - - def split(self, string): - """ - Alias to re.split - """ - - return self.regex.split(string) - - def sub(self, sub, string, count=0): - """ - Alias to re.sub - """ - - return self.regex.sub(sub, string, count=count) - - def group(self, num): - """ - Returns the group results of the last match - """ - - return self.last_match.group(num) - - -class NestedMatch: - """ - Finding nested delimiters is hard with regular expressions. It is - even harder on Python with its normal re module, as there are several - advanced regular expressions that are missing. - - This is the case of this pattern: - - '\\bSTRUCT_GROUP(\\(((?:(?>[^)(]+)|(?1))*)\\))[^;]*;' - - which is used to properly match open/close parenthesis of the - string search STRUCT_GROUP(), - - Add a class that counts pairs of delimiters, using it to match and - replace nested expressions. - - The original approach was suggested by: - https://stackoverflow.com/questions/5454322/python-how-to-match-nested-parentheses-with-regex - - Although I re-implemented it to make it more generic and match 3 types - of delimiters. The logic checks if delimiters are paired. If not, it - will ignore the search string. - """ - - # TODO: make NestedMatch handle multiple match groups - # - # Right now, regular expressions to match it are defined only up to - # the start delimiter, e.g.: - # - # \bSTRUCT_GROUP\( - # - # is similar to: STRUCT_GROUP\((.*)\) - # except that the content inside the match group is delimiter's aligned. - # - # The content inside parenthesis are converted into a single replace - # group (e.g. r`\1'). - # - # It would be nice to change such definition to support multiple - # match groups, allowing a regex equivalent to. - # - # FOO\((.*), (.*), (.*)\) - # - # it is probably easier to define it not as a regular expression, but - # with some lexical definition like: - # - # FOO(arg1, arg2, arg3) - - DELIMITER_PAIRS = { - '{': '}', - '(': ')', - '[': ']', - } - - RE_DELIM = re.compile(r'[\{\}\[\]\(\)]') - - def _search(self, regex, line): - """ - Finds paired blocks for a regex that ends with a delimiter. - - The suggestion of using finditer to match pairs came from: - https://stackoverflow.com/questions/5454322/python-how-to-match-nested-parentheses-with-regex - but I ended using a different implementation to align all three types - of delimiters and seek for an initial regular expression. - - The algorithm seeks for open/close paired delimiters and place them - into a stack, yielding a start/stop position of each match when the - stack is zeroed. - - The algorithm shoud work fine for properly paired lines, but will - silently ignore end delimiters that preceeds an start delimiter. - This should be OK for kernel-doc parser, as unaligned delimiters - would cause compilation errors. So, we don't need to rise exceptions - to cover such issues. - """ - - stack = [] - - for match_re in regex.finditer(line): - start = match_re.start() - offset = match_re.end() - - d = line[offset - 1] - if d not in self.DELIMITER_PAIRS: - continue - - end = self.DELIMITER_PAIRS[d] - stack.append(end) - - for match in self.RE_DELIM.finditer(line[offset:]): - pos = match.start() + offset - - d = line[pos] - - if d in self.DELIMITER_PAIRS: - end = self.DELIMITER_PAIRS[d] - - stack.append(end) - continue - - # Does the end delimiter match what it is expected? - if stack and d == stack[-1]: - stack.pop() - - if not stack: - yield start, offset, pos + 1 - break - - def search(self, regex, line): - """ - This is similar to re.search: - - It matches a regex that it is followed by a delimiter, - returning occurrences only if all delimiters are paired. - """ - - for t in self._search(regex, line): - - yield line[t[0]:t[2]] - - def sub(self, regex, sub, line, count=0): - """ - This is similar to re.sub: - - It matches a regex that it is followed by a delimiter, - replacing occurrences only if all delimiters are paired. - - if r'\1' is used, it works just like re: it places there the - matched paired data with the delimiter stripped. - - If count is different than zero, it will replace at most count - items. - """ - out = "" - - cur_pos = 0 - n = 0 - - for start, end, pos in self._search(regex, line): - out += line[cur_pos:start] - - # Value, ignoring start/end delimiters - value = line[end:pos - 1] - - # replaces \1 at the sub string, if \1 is used there - new_sub = sub - new_sub = new_sub.replace(r'\1', value) - - out += new_sub - - # Drop end ';' if any - if line[pos] == ';': - pos += 1 - - cur_pos = pos - n += 1 - - if count and count >= n: - break - - # Append the remaining string - l = len(line) - out += line[cur_pos:l] - - return out -- cgit v1.2.3 From 992a9df41ad7173588bf90e15b33d45db2811aea Mon Sep 17 00:00:00 2001 From: Jonathan Corbet Date: Mon, 10 Nov 2025 15:04:30 -0700 Subject: docs: bring some order to our Python module hierarchy Now that we have tools/lib/python for our Python modules, turn them into proper packages with a single namespace so that everything can just use tools/lib/python in sys.path. No functional change. Signed-off-by: Jonathan Corbet Message-ID: <20251110220430.726665-3-corbet@lwn.net> --- scripts/kernel-doc.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'scripts') diff --git a/scripts/kernel-doc.py b/scripts/kernel-doc.py index bb24bbf73167..7a1eaf986bcd 100755 --- a/scripts/kernel-doc.py +++ b/scripts/kernel-doc.py @@ -111,7 +111,7 @@ import sys # Import Python modules -LIB_DIR = "../tools/lib/python/kdoc" +LIB_DIR = "../tools/lib/python" SRC_DIR = os.path.dirname(os.path.realpath(__file__)) sys.path.insert(0, os.path.join(SRC_DIR, LIB_DIR)) @@ -292,8 +292,8 @@ def main(): logger.warning("Python 3.7 or later is required for correct results") # Import kernel-doc libraries only after checking Python version - from kdoc_files import KernelFiles # pylint: disable=C0415 - from kdoc_output import RestFormat, ManFormat # pylint: disable=C0415 + from kdoc.kdoc_files import KernelFiles # pylint: disable=C0415 + from kdoc.kdoc_output import RestFormat, ManFormat # pylint: disable=C0415 if args.man: out_style = ManFormat(modulename=args.modulename) -- cgit v1.2.3 From 464c7ea5c3ffa333a2c1a8dfd68b157ced1edc5a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Carlos=20L=C3=B3pez?= Date: Fri, 31 Oct 2025 12:19:09 +0100 Subject: checkpatch: add IDR to the deprecated list MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit As of commit 85656ec193e9, the IDR interface is marked as deprecated in the documentation, but no checks are made in that regard for new code. Add the existing IDR initialization APIs to the deprecated list in checkpatch, so that if new code is introduced using these APIs, a warning is emitted. Link: https://lkml.kernel.org/r/20251031111908.2266077-2-clopez@suse.de Signed-off-by: Carlos López Suggested-by: Dan Williams Acked-by: Dan Williams Acked-by: Joe Perches Cc: Andy Whitcroft Cc: Dwaipayan Ray Cc: Lukas Bulwahn Cc: Matthew Wilcox (Oracle) Signed-off-by: Andrew Morton --- scripts/checkpatch.pl | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'scripts') diff --git a/scripts/checkpatch.pl b/scripts/checkpatch.pl index 6729f18e5654..d58ca9655ab7 100755 --- a/scripts/checkpatch.pl +++ b/scripts/checkpatch.pl @@ -860,6 +860,10 @@ our %deprecated_apis = ( "kunmap" => "kunmap_local", "kmap_atomic" => "kmap_local_page", "kunmap_atomic" => "kunmap_local", + #These should be enough to drive away new IDR users + "DEFINE_IDR" => "DEFINE_XARRAY", + "idr_init" => "xa_init", + "idr_init_base" => "xa_init_flags" ); #Create a search pattern for all these strings to speed up a loop below -- cgit v1.2.3 From caa71919a622e3f7d290d4f17ae538b15f5cb6d3 Mon Sep 17 00:00:00 2001 From: Ilya Leoshkevich Date: Thu, 6 Nov 2025 13:43:41 +0100 Subject: scripts/gdb/radix-tree: add lx-radix-tree-command Patch series "scripts/gdb/symbols: make BPF debug info available to GDB", v2. This series greatly simplifies debugging BPF progs when using QEMU gdbstub by providing symbol names, sizes, and line numbers to GDB. Patch 1 adds radix tree iteration, which is necessary for parsing prog_idr. Patch 2 is the actual implementation; its description contains some details on how to use this. This patch (of 2): Add a function and a command to iterate over radix tree contents. Duplicate the C implementation in Python, but drop support for tagging. Link: https://lkml.kernel.org/r/20251106124600.86736-1-iii@linux.ibm.com Link: https://lkml.kernel.org/r/20251106124600.86736-2-iii@linux.ibm.com Signed-off-by: Ilya Leoshkevich Cc: Alexander Gordeev Cc: Alexei Starovoitov Cc: Andrii Nakryiko Cc: Daniel Borkman Cc: Heiko Carstens Cc: Jan Kiszka Cc: Kieran Bingham Cc: Vasily Gorbik Signed-off-by: Andrew Morton --- scripts/gdb/linux/radixtree.py | 139 ++++++++++++++++++++++++++++++++++++++--- 1 file changed, 132 insertions(+), 7 deletions(-) (limited to 'scripts') diff --git a/scripts/gdb/linux/radixtree.py b/scripts/gdb/linux/radixtree.py index 074543ac763d..bc2954e45c32 100644 --- a/scripts/gdb/linux/radixtree.py +++ b/scripts/gdb/linux/radixtree.py @@ -30,13 +30,16 @@ def entry_to_node(node): def node_maxindex(node): return (constants.LX_RADIX_TREE_MAP_SIZE << node['shift']) - 1 -def lookup(root, index): +def resolve_root(root): + if root.type == radix_tree_root_type.get_type(): + return root if root.type == radix_tree_root_type.get_type().pointer(): - node = root.dereference() - elif root.type != radix_tree_root_type.get_type(): - raise gdb.GdbError("must be {} not {}" - .format(radix_tree_root_type.get_type(), root.type)) + return root.dereference() + raise gdb.GdbError("must be {} not {}" + .format(radix_tree_root_type.get_type(), root.type)) +def lookup(root, index): + root = resolve_root(root) node = root['xa_head'] if node == 0: return None @@ -71,14 +74,120 @@ def lookup(root, index): return node -class LxRadixTree(gdb.Function): +def descend(parent, index): + offset = (index >> int(parent["shift"])) & constants.LX_RADIX_TREE_MAP_MASK + return offset, parent["slots"][offset] + +def load_root(root): + node = root["xa_head"] + nodep = node + + if is_internal_node(node): + node = entry_to_node(node) + maxindex = node_maxindex(node) + return int(node["shift"]) + constants.LX_RADIX_TREE_MAP_SHIFT, \ + nodep, maxindex + + return 0, nodep, 0 + +class RadixTreeIter: + def __init__(self, start): + self.index = 0 + self.next_index = start + self.node = None + +def xa_mk_internal(v): + return (v << 2) | 2 + +LX_XA_RETRY_ENTRY = xa_mk_internal(256) +LX_RADIX_TREE_RETRY = LX_XA_RETRY_ENTRY + +def next_chunk(root, iter): + mask = (1 << (utils.get_ulong_type().sizeof * 8)) - 1 + + index = iter.next_index + if index == 0 and iter.index != 0: + return None + + restart = True + while restart: + restart = False + + _, child, maxindex = load_root(root) + if index > maxindex: + return None + if not child: + return None + + if not is_internal_node(child): + iter.index = index + iter.next_index = (maxindex + 1) & mask + iter.node = None + return root["xa_head"].address + + while True: + node = entry_to_node(child) + offset, child = descend(node, index) + + if not child: + while True: + offset += 1 + if offset >= constants.LX_RADIX_TREE_MAP_SIZE: + break + slot = node["slots"][offset] + if slot: + break + index &= ~node_maxindex(node) + index = (index + (offset << int(node["shift"]))) & mask + if index == 0: + return None + if offset == constants.LX_RADIX_TREE_MAP_SIZE: + restart = True + break + child = node["slots"][offset] + + if not child: + restart = True + break + if child == LX_XA_RETRY_ENTRY: + break + if not node["shift"] or not is_internal_node(child): + break + + iter.index = (index & ~node_maxindex(node)) | offset + iter.next_index = ((index | node_maxindex(node)) + 1) & mask + iter.node = node + + return node["slots"][offset].address + +def next_slot(slot, iter): + mask = (1 << (utils.get_ulong_type().sizeof * 8)) - 1 + for _ in range(iter.next_index - iter.index - 1): + slot += 1 + iter.index = (iter.index + 1) & mask + if slot.dereference(): + return slot + return None + +def for_each_slot(root, start=0): + iter = RadixTreeIter(start) + slot = None + while True: + if not slot: + slot = next_chunk(root, iter) + if not slot: + break + yield iter.index, slot + slot = next_slot(slot, iter) + +class LxRadixTreeLookup(gdb.Function): """ Lookup and return a node from a RadixTree. $lx_radix_tree_lookup(root_node [, index]): Return the node at the given index. If index is omitted, the root node is dereference and returned.""" def __init__(self): - super(LxRadixTree, self).__init__("lx_radix_tree_lookup") + super(LxRadixTreeLookup, self).__init__("lx_radix_tree_lookup") def invoke(self, root, index=0): result = lookup(root, index) @@ -87,4 +196,20 @@ If index is omitted, the root node is dereference and returned.""" return result +class LxRadixTree(gdb.Command): + """Show all values stored in a RadixTree.""" + + def __init__(self): + super(LxRadixTree, self).__init__("lx-radix-tree", gdb.COMMAND_DATA, + gdb.COMPLETE_NONE) + + def invoke(self, argument, from_tty): + args = gdb.string_to_argv(argument) + if len(args) != 1: + raise gdb.GdbError("Usage: lx-radix-tree ROOT") + root = gdb.parse_and_eval(args[0]) + for index, slot in for_each_slot(root): + gdb.write("[{}] = {}\n".format(index, slot.dereference())) + LxRadixTree() +LxRadixTreeLookup() -- cgit v1.2.3 From 581ee79a254759ea8288057f762389820b39adcc Mon Sep 17 00:00:00 2001 From: Ilya Leoshkevich Date: Thu, 6 Nov 2025 13:43:42 +0100 Subject: scripts/gdb/symbols: make BPF debug info available to GDB One can debug BPF programs with QEMU gdbstub by setting a breakpoint on bpf_prog_kallsyms_add(), waiting for a hit with a matching aux.name, and then setting a breakpoint on bpf_func. This is tedious, error-prone, and also lacks line numbers. Automate this in a way similar to the existing support for modules in lx-symbols. Enumerate and monitor changes to both BPF kallsyms and JITed progs. For each ksym, generate and compile a synthetic .s file containing the name, code, and size. In addition, if this ksym is also a prog, and not a trampoline, add line number information. Ensure that this is a no-op if the kernel is built without BPF support or if "as" is missing. In theory the "as" dependency may be dropped by generating the synthetic .o file manually, but this is too much complexity for too little benefit. Now one can debug BPF progs out of the box like this: (gdb) lx-symbols -bpf (gdb) b bpf_prog_4e612a6a881a086b_arena_list_add Breakpoint 2 (bpf_prog_4e612a6a881a086b_arena_list_add) pending. # ./test_progs -t arena_list Thread 4 hit Breakpoint 2, bpf_prog_4e612a6a881a086b_arena_list_add () at linux/tools/testing/selftests/bpf/progs/arena_list.c:51 51 list_head = &global_head; (gdb) n bpf_prog_4e612a6a881a086b_arena_list_add () at linux/tools/testing/selftests/bpf/progs/arena_list.c:53 53 for (i = zero; i < cnt && can_loop; i++) { This also works for subprogs. Link: https://lkml.kernel.org/r/20251106124600.86736-3-iii@linux.ibm.com Signed-off-by: Ilya Leoshkevich Cc: Alexander Gordeev Cc: Alexei Starovoitov Cc: Andrii Nakryiko Cc: Daniel Borkman Cc: Heiko Carstens Cc: Jan Kiszka Cc: Kieran Bingham Cc: Vasily Gorbik Signed-off-by: Andrew Morton --- scripts/gdb/linux/bpf.py | 253 ++++++++++++++++++++++++++++++++++++++ scripts/gdb/linux/constants.py.in | 3 + scripts/gdb/linux/symbols.py | 105 ++++++++++++++-- 3 files changed, 349 insertions(+), 12 deletions(-) create mode 100644 scripts/gdb/linux/bpf.py (limited to 'scripts') diff --git a/scripts/gdb/linux/bpf.py b/scripts/gdb/linux/bpf.py new file mode 100644 index 000000000000..1870534ef6f9 --- /dev/null +++ b/scripts/gdb/linux/bpf.py @@ -0,0 +1,253 @@ +# SPDX-License-Identifier: GPL-2.0 + +import json +import subprocess +import tempfile + +import gdb + +from linux import constants, lists, radixtree, utils + + +if constants.LX_CONFIG_BPF and constants.LX_CONFIG_BPF_JIT: + bpf_ksym_type = utils.CachedType("struct bpf_ksym") +if constants.LX_CONFIG_BPF_SYSCALL: + bpf_prog_type = utils.CachedType("struct bpf_prog") + + +def get_ksym_name(ksym): + name = ksym["name"].bytes + end = name.find(b"\x00") + if end != -1: + name = name[:end] + return name.decode() + + +def list_ksyms(): + if not (constants.LX_CONFIG_BPF and constants.LX_CONFIG_BPF_JIT): + return [] + bpf_kallsyms = gdb.parse_and_eval("&bpf_kallsyms") + bpf_ksym_ptr_type = bpf_ksym_type.get_type().pointer() + return list(lists.list_for_each_entry(bpf_kallsyms, + bpf_ksym_ptr_type, + "lnode")) + + +class KsymAddBreakpoint(gdb.Breakpoint): + def __init__(self, monitor): + super(KsymAddBreakpoint, self).__init__("bpf_ksym_add", internal=True) + self.silent = True + self.monitor = monitor + + def stop(self): + self.monitor.add(gdb.parse_and_eval("ksym")) + return False + + +class KsymRemoveBreakpoint(gdb.Breakpoint): + def __init__(self, monitor): + super(KsymRemoveBreakpoint, self).__init__("bpf_ksym_del", + internal=True) + self.silent = True + self.monitor = monitor + + def stop(self): + self.monitor.remove(gdb.parse_and_eval("ksym")) + return False + + +class KsymMonitor: + def __init__(self, add, remove): + self.add = add + self.remove = remove + + self.add_bp = KsymAddBreakpoint(self) + self.remove_bp = KsymRemoveBreakpoint(self) + + self.notify_initial() + + def notify_initial(self): + for ksym in list_ksyms(): + self.add(ksym) + + def delete(self): + self.add_bp.delete() + self.remove_bp.delete() + + +def list_progs(): + if not constants.LX_CONFIG_BPF_SYSCALL: + return [] + idr_rt = gdb.parse_and_eval("&prog_idr.idr_rt") + bpf_prog_ptr_type = bpf_prog_type.get_type().pointer() + progs = [] + for _, slot in radixtree.for_each_slot(idr_rt): + prog = slot.dereference().cast(bpf_prog_ptr_type) + progs.append(prog) + # Subprogs are not registered in prog_idr, fetch them manually. + # func[0] is the current prog. + aux = prog["aux"] + func = aux["func"] + real_func_cnt = int(aux["real_func_cnt"]) + for i in range(1, real_func_cnt): + progs.append(func[i]) + return progs + + +class ProgAddBreakpoint(gdb.Breakpoint): + def __init__(self, monitor): + super(ProgAddBreakpoint, self).__init__("bpf_prog_kallsyms_add", + internal=True) + self.silent = True + self.monitor = monitor + + def stop(self): + self.monitor.add(gdb.parse_and_eval("fp")) + return False + + +class ProgRemoveBreakpoint(gdb.Breakpoint): + def __init__(self, monitor): + super(ProgRemoveBreakpoint, self).__init__("bpf_prog_free_id", + internal=True) + self.silent = True + self.monitor = monitor + + def stop(self): + self.monitor.remove(gdb.parse_and_eval("prog")) + return False + + +class ProgMonitor: + def __init__(self, add, remove): + self.add = add + self.remove = remove + + self.add_bp = ProgAddBreakpoint(self) + self.remove_bp = ProgRemoveBreakpoint(self) + + self.notify_initial() + + def notify_initial(self): + for prog in list_progs(): + self.add(prog) + + def delete(self): + self.add_bp.delete() + self.remove_bp.delete() + + +def btf_str_by_offset(btf, offset): + while offset < btf["start_str_off"]: + btf = btf["base_btf"] + + offset -= btf["start_str_off"] + if offset < btf["hdr"]["str_len"]: + return (btf["strings"] + offset).string() + + return None + + +def bpf_line_info_line_num(line_col): + return line_col >> 10 + + +def bpf_line_info_line_col(line_col): + return line_col & 0x3ff + + +class LInfoIter: + def __init__(self, prog): + # See bpf_prog_get_file_line() for details. + self.pos = 0 + self.nr_linfo = 0 + + if prog is None: + return + + self.bpf_func = int(prog["bpf_func"]) + aux = prog["aux"] + self.btf = aux["btf"] + linfo_idx = aux["linfo_idx"] + self.nr_linfo = int(aux["nr_linfo"]) - linfo_idx + if self.nr_linfo == 0: + return + + linfo_ptr = aux["linfo"] + tpe = linfo_ptr.type.target().array(self.nr_linfo).pointer() + self.linfo = (linfo_ptr + linfo_idx).cast(tpe).dereference() + jited_linfo_ptr = aux["jited_linfo"] + tpe = jited_linfo_ptr.type.target().array(self.nr_linfo).pointer() + self.jited_linfo = (jited_linfo_ptr + linfo_idx).cast(tpe).dereference() + + self.filenos = {} + + def get_code_off(self): + if self.pos >= self.nr_linfo: + return -1 + return self.jited_linfo[self.pos] - self.bpf_func + + def advance(self): + self.pos += 1 + + def get_fileno(self): + file_name_off = int(self.linfo[self.pos]["file_name_off"]) + fileno = self.filenos.get(file_name_off) + if fileno is not None: + return fileno, None + file_name = btf_str_by_offset(self.btf, file_name_off) + fileno = len(self.filenos) + 1 + self.filenos[file_name_off] = fileno + return fileno, file_name + + def get_line_col(self): + line_col = int(self.linfo[self.pos]["line_col"]) + return bpf_line_info_line_num(line_col), \ + bpf_line_info_line_col(line_col) + + +def generate_debug_obj(ksym, prog): + name = get_ksym_name(ksym) + # Avoid read_memory(); it throws bogus gdb.MemoryError in some contexts. + start = ksym["start"] + code = start.cast(gdb.lookup_type("unsigned char") + .array(int(ksym["end"]) - int(start)) + .pointer()).dereference().bytes + linfo_iter = LInfoIter(prog) + + result = tempfile.NamedTemporaryFile(suffix=".o", mode="wb") + try: + with tempfile.NamedTemporaryFile(suffix=".s", mode="w") as src: + # ".loc" does not apply to ".byte"s, only to ".insn"s, but since + # this needs to work for all architectures, the latter are not an + # option. Ask the assembler to apply ".loc"s to labels as well, + # and generate dummy labels after each ".loc". + src.write(".loc_mark_labels 1\n") + + src.write(".globl {}\n".format(name)) + src.write(".type {},@function\n".format(name)) + src.write("{}:\n".format(name)) + for code_off, code_byte in enumerate(code): + if linfo_iter.get_code_off() == code_off: + fileno, file_name = linfo_iter.get_fileno() + if file_name is not None: + src.write(".file {} {}\n".format( + fileno, json.dumps(file_name))) + line, col = linfo_iter.get_line_col() + src.write(".loc {} {} {}\n".format(fileno, line, col)) + src.write("0:\n") + linfo_iter.advance() + src.write(".byte {}\n".format(code_byte)) + src.write(".size {},{}\n".format(name, len(code))) + src.flush() + + try: + subprocess.check_call(["as", "-c", src.name, "-o", result.name]) + except FileNotFoundError: + # "as" is not installed. + result.close() + return None + return result + except: + result.close() + raise diff --git a/scripts/gdb/linux/constants.py.in b/scripts/gdb/linux/constants.py.in index c3886739a028..6d475540c6ba 100644 --- a/scripts/gdb/linux/constants.py.in +++ b/scripts/gdb/linux/constants.py.in @@ -170,3 +170,6 @@ LX_CONFIG(CONFIG_PAGE_OWNER) LX_CONFIG(CONFIG_SLUB_DEBUG) LX_CONFIG(CONFIG_SLAB_FREELIST_HARDENED) LX_CONFIG(CONFIG_MMU) +LX_CONFIG(CONFIG_BPF) +LX_CONFIG(CONFIG_BPF_JIT) +LX_CONFIG(CONFIG_BPF_SYSCALL) diff --git a/scripts/gdb/linux/symbols.py b/scripts/gdb/linux/symbols.py index 6edb99221675..d4308b726183 100644 --- a/scripts/gdb/linux/symbols.py +++ b/scripts/gdb/linux/symbols.py @@ -11,13 +11,14 @@ # This work is licensed under the terms of the GNU GPL version 2. # +import atexit import gdb import os import re import struct from itertools import count -from linux import modules, utils, constants +from linux import bpf, constants, modules, utils if hasattr(gdb, 'Breakpoint'): @@ -114,17 +115,27 @@ class LxSymbols(gdb.Command): The kernel (vmlinux) is taken from the current working directly. Modules (.ko) are scanned recursively, starting in the same directory. Optionally, the module search path can be extended by a space separated list of paths passed to the -lx-symbols command.""" +lx-symbols command. + +When the -bpf flag is specified, symbols from the currently loaded BPF programs +are loaded as well.""" module_paths = [] module_files = [] module_files_updated = False loaded_modules = [] breakpoint = None + bpf_prog_monitor = None + bpf_ksym_monitor = None + bpf_progs = {} + # The remove-symbol-file command, even when invoked with -a, requires the + # respective object file to exist, so keep them around. + bpf_debug_objs = {} def __init__(self): super(LxSymbols, self).__init__("lx-symbols", gdb.COMMAND_FILES, gdb.COMPLETE_FILENAME) + atexit.register(self.cleanup_bpf) def _update_module_files(self): self.module_files = [] @@ -197,6 +208,51 @@ lx-symbols command.""" else: gdb.write("no module object found for '{0}'\n".format(module_name)) + def add_bpf_prog(self, prog): + if prog["jited"]: + self.bpf_progs[int(prog["bpf_func"])] = prog + + def remove_bpf_prog(self, prog): + self.bpf_progs.pop(int(prog["bpf_func"]), None) + + def add_bpf_ksym(self, ksym): + addr = int(ksym["start"]) + name = bpf.get_ksym_name(ksym) + with utils.pagination_off(): + gdb.write("loading @{addr}: {name}\n".format( + addr=hex(addr), name=name)) + debug_obj = bpf.generate_debug_obj(ksym, self.bpf_progs.get(addr)) + if debug_obj is None: + return + try: + cmdline = "add-symbol-file {obj} {addr}".format( + obj=debug_obj.name, addr=hex(addr)) + gdb.execute(cmdline, to_string=True) + except: + debug_obj.close() + raise + self.bpf_debug_objs[addr] = debug_obj + + def remove_bpf_ksym(self, ksym): + addr = int(ksym["start"]) + debug_obj = self.bpf_debug_objs.pop(addr, None) + if debug_obj is None: + return + try: + name = bpf.get_ksym_name(ksym) + gdb.write("unloading @{addr}: {name}\n".format( + addr=hex(addr), name=name)) + cmdline = "remove-symbol-file {path}".format(path=debug_obj.name) + gdb.execute(cmdline, to_string=True) + finally: + debug_obj.close() + + def cleanup_bpf(self): + self.bpf_progs = {} + while len(self.bpf_debug_objs) > 0: + self.bpf_debug_objs.popitem()[1].close() + + def load_all_symbols(self): gdb.write("loading vmlinux\n") @@ -224,34 +280,59 @@ lx-symbols command.""" else: [self.load_module_symbols(module) for module in module_list] + self.cleanup_bpf() + if self.bpf_prog_monitor is not None: + self.bpf_prog_monitor.notify_initial() + if self.bpf_ksym_monitor is not None: + self.bpf_ksym_monitor.notify_initial() + for saved_state in saved_states: saved_state['breakpoint'].enabled = saved_state['enabled'] def invoke(self, arg, from_tty): skip_decompressor() - self.module_paths = [os.path.abspath(os.path.expanduser(p)) - for p in arg.split()] + monitor_bpf = False + self.module_paths = [] + for p in arg.split(): + if p == "-bpf": + monitor_bpf = True + else: + p.append(os.path.abspath(os.path.expanduser(p))) self.module_paths.append(os.getcwd()) + if self.breakpoint is not None: + self.breakpoint.delete() + self.breakpoint = None + if self.bpf_prog_monitor is not None: + self.bpf_prog_monitor.delete() + self.bpf_prog_monitor = None + if self.bpf_ksym_monitor is not None: + self.bpf_ksym_monitor.delete() + self.bpf_ksym_monitor = None + # enforce update self.module_files = [] self.module_files_updated = False self.load_all_symbols() - if not modules.has_modules(): + if not hasattr(gdb, 'Breakpoint'): + gdb.write("Note: symbol update on module and BPF loading not " + "supported with this gdb version\n") return - if hasattr(gdb, 'Breakpoint'): - if self.breakpoint is not None: - self.breakpoint.delete() - self.breakpoint = None + if modules.has_modules(): self.breakpoint = LoadModuleBreakpoint( "kernel/module/main.c:do_init_module", self) - else: - gdb.write("Note: symbol update on module loading not supported " - "with this gdb version\n") + + if monitor_bpf: + if constants.LX_CONFIG_BPF_SYSCALL: + self.bpf_prog_monitor = bpf.ProgMonitor(self.add_bpf_prog, + self.remove_bpf_prog) + if constants.LX_CONFIG_BPF and constants.LX_CONFIG_BPF_JIT: + self.bpf_ksym_monitor = bpf.KsymMonitor(self.add_bpf_ksym, + self.remove_bpf_ksym) LxSymbols() -- cgit v1.2.3 From 93863f3f859a626347ce2ec18947b11357b4ca14 Mon Sep 17 00:00:00 2001 From: Josh Poimboeuf Date: Thu, 20 Nov 2025 12:14:20 -0800 Subject: kbuild: Check for functions with ambiguous -ffunction-sections section names Commit 9c7dc1dd897a ("objtool: Warn on functions with ambiguous -ffunction-sections section names") only works for drivers which are compiled on architectures supported by objtool. Make a script to perform the same check for all architectures. Signed-off-by: Josh Poimboeuf Signed-off-by: Peter Zijlstra (Intel) Acked-by: Greg Kroah-Hartman Link: https://patch.msgid.link/a6a49644a34964f7e02f3a8ce43af03e72817180.1763669451.git.jpoimboe@kernel.org --- scripts/Makefile.vmlinux_o | 4 ++++ scripts/check-function-names.sh | 25 +++++++++++++++++++++++++ 2 files changed, 29 insertions(+) create mode 100755 scripts/check-function-names.sh (limited to 'scripts') diff --git a/scripts/Makefile.vmlinux_o b/scripts/Makefile.vmlinux_o index 20533cc0b1ee..527352c222ff 100644 --- a/scripts/Makefile.vmlinux_o +++ b/scripts/Makefile.vmlinux_o @@ -63,11 +63,15 @@ quiet_cmd_ld_vmlinux.o = LD $@ --start-group $(KBUILD_VMLINUX_LIBS) --end-group \ $(cmd_objtool) +cmd_check_function_names = $(srctree)/scripts/check-function-names.sh $@ + define rule_ld_vmlinux.o $(call cmd_and_savecmd,ld_vmlinux.o) $(call cmd,gen_objtooldep) + $(call cmd,check_function_names) endef + vmlinux.o: $(initcalls-lds) vmlinux.a $(KBUILD_VMLINUX_LIBS) FORCE $(call if_changed_rule,ld_vmlinux.o) diff --git a/scripts/check-function-names.sh b/scripts/check-function-names.sh new file mode 100755 index 000000000000..410042591cfc --- /dev/null +++ b/scripts/check-function-names.sh @@ -0,0 +1,25 @@ +#!/bin/sh +# SPDX-License-Identifier: GPL-2.0 +# +# Certain function names are disallowed due to section name ambiguities +# introduced by -ffunction-sections. +# +# See the comment above TEXT_MAIN in include/asm-generic/vmlinux.lds.h. + +objfile="$1" + +if [ ! -f "$objfile" ]; then + echo "usage: $0 " >&2 + exit 1 +fi + +bad_symbols=$(nm "$objfile" | awk '$2 ~ /^[TtWw]$/ {print $3}' | grep -E '^(startup|exit|split|unlikely|hot|unknown)(\.|$)') + +if [ -n "$bad_symbols" ]; then + echo "$bad_symbols" | while read -r sym; do + echo "$objfile: error: $sym() function name creates ambiguity with -ffunction-sections" >&2 + done + exit 1 +fi + +exit 0 -- cgit v1.2.3 From 1181c974421818ff7318e3a211c87b5dd437c13e Mon Sep 17 00:00:00 2001 From: Miguel Ojeda Date: Mon, 24 Nov 2025 16:18:14 +0100 Subject: rust: kbuild: simplify `--cfg` handling We need to handle `cfg`s in both `rustc` and `rust-analyzer`, and in future commits some of those contain double quotes, which complicates things further. Thus, instead of removing the `--cfg ` part in the rust-analyzer generation script, have the `*-cfgs` variables contain just the actual `cfg`, and use that to generate the actual flags in `*-flags`. Reviewed-by: Alice Ryhl Reviewed-by: Gary Guo Tested-by: Gary Guo Tested-by: Jesung Yang Link: https://patch.msgid.link/20251124151837.2184382-3-ojeda@kernel.org Signed-off-by: Miguel Ojeda --- scripts/generate_rust_analyzer.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'scripts') diff --git a/scripts/generate_rust_analyzer.py b/scripts/generate_rust_analyzer.py index fc27f0cca752..dedca470adc1 100755 --- a/scripts/generate_rust_analyzer.py +++ b/scripts/generate_rust_analyzer.py @@ -15,7 +15,7 @@ def args_crates_cfgs(cfgs): crates_cfgs = {} for cfg in cfgs: crate, vals = cfg.split("=", 1) - crates_cfgs[crate] = vals.replace("--cfg", "").split() + crates_cfgs[crate] = vals.split() return crates_cfgs -- cgit v1.2.3 From 158a3b72118a4dab7e7bf2d89afbab9b96eddc1c Mon Sep 17 00:00:00 2001 From: Miguel Ojeda Date: Mon, 24 Nov 2025 16:18:22 +0100 Subject: rust: proc-macro2: enable support in kbuild With all the new files in place and ready from the new crate, enable the support for it in the build system. `proc_macro_byte_character` and `proc_macro_c_str_literals` were stabilized in Rust 1.79.0 [1] and were implemented earlier than our minimum Rust version (1.78) [2][3]. Thus just enable them instead of using the `cfg` that `proc-macro2` uses to emulate them in older compilers. In addition, skip formatting for this vendored crate and take the chance to add a comment mentioning this. Link: https://github.com/rust-lang/rust/pull/123431 [1] Link: https://github.com/rust-lang/rust/pull/112711 [2] Link: https://github.com/rust-lang/rust/pull/119651 [3] Reviewed-by: Gary Guo Tested-by: Gary Guo Tested-by: Jesung Yang Link: https://patch.msgid.link/20251124151837.2184382-11-ojeda@kernel.org Signed-off-by: Miguel Ojeda --- scripts/generate_rust_analyzer.py | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'scripts') diff --git a/scripts/generate_rust_analyzer.py b/scripts/generate_rust_analyzer.py index dedca470adc1..00c6b7cc94b7 100755 --- a/scripts/generate_rust_analyzer.py +++ b/scripts/generate_rust_analyzer.py @@ -86,6 +86,13 @@ def generate_crates(srctree, objtree, sysroot_src, external_src, cfgs, core_edit [], ) + append_crate( + "proc_macro2", + srctree / "rust" / "proc-macro2" / "lib.rs", + ["core", "alloc", "std", "proc_macro"], + cfg=crates_cfgs["proc_macro2"], + ) + append_crate( "macros", srctree / "rust" / "macros" / "lib.rs", -- cgit v1.2.3 From 88de91cc1ce7b3069ccabc1a5fbe16d41c663093 Mon Sep 17 00:00:00 2001 From: Miguel Ojeda Date: Mon, 24 Nov 2025 16:18:26 +0100 Subject: rust: quote: enable support in kbuild With all the new files in place and ready from the new crate, enable the support for it in the build system. Reviewed-by: Alice Ryhl Reviewed-by: Gary Guo Tested-by: Gary Guo Tested-by: Jesung Yang Link: https://patch.msgid.link/20251124151837.2184382-15-ojeda@kernel.org Signed-off-by: Miguel Ojeda --- scripts/generate_rust_analyzer.py | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'scripts') diff --git a/scripts/generate_rust_analyzer.py b/scripts/generate_rust_analyzer.py index 00c6b7cc94b7..4faf153ed2ee 100755 --- a/scripts/generate_rust_analyzer.py +++ b/scripts/generate_rust_analyzer.py @@ -93,6 +93,13 @@ def generate_crates(srctree, objtree, sysroot_src, external_src, cfgs, core_edit cfg=crates_cfgs["proc_macro2"], ) + append_crate( + "quote", + srctree / "rust" / "quote" / "lib.rs", + ["alloc", "proc_macro", "proc_macro2"], + cfg=crates_cfgs["quote"], + ) + append_crate( "macros", srctree / "rust" / "macros" / "lib.rs", -- cgit v1.2.3 From 737401751ace2d806de6854aee52c176141d10e2 Mon Sep 17 00:00:00 2001 From: Miguel Ojeda Date: Mon, 24 Nov 2025 16:18:31 +0100 Subject: rust: syn: enable support in kbuild With all the new files in place and ready from the new crate, enable the support for it in the build system. Reviewed-by: Alice Ryhl Reviewed-by: Gary Guo Tested-by: Gary Guo Tested-by: Jesung Yang Link: https://patch.msgid.link/20251124151837.2184382-20-ojeda@kernel.org Signed-off-by: Miguel Ojeda --- scripts/generate_rust_analyzer.py | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'scripts') diff --git a/scripts/generate_rust_analyzer.py b/scripts/generate_rust_analyzer.py index 4faf153ed2ee..5b6f7b8d6918 100755 --- a/scripts/generate_rust_analyzer.py +++ b/scripts/generate_rust_analyzer.py @@ -100,6 +100,13 @@ def generate_crates(srctree, objtree, sysroot_src, external_src, cfgs, core_edit cfg=crates_cfgs["quote"], ) + append_crate( + "syn", + srctree / "rust" / "syn" / "lib.rs", + ["proc_macro", "proc_macro2", "quote"], + cfg=crates_cfgs["syn"], + ) + append_crate( "macros", srctree / "rust" / "macros" / "lib.rs", -- cgit v1.2.3 From 52ba807f1aa6ac16289e9dc9e381475305afd685 Mon Sep 17 00:00:00 2001 From: Miguel Ojeda Date: Mon, 24 Nov 2025 16:18:32 +0100 Subject: rust: macros: support `proc-macro2`, `quote` and `syn` One of the two main uses cases for adding `proc-macro2`, `quote` and `syn` is the `macros` crates (and the other `pin-init`). Thus add the support for the crates in `macros` already. Tested-by: Jesung Yang Link: https://patch.msgid.link/20251124151837.2184382-21-ojeda@kernel.org Signed-off-by: Miguel Ojeda --- scripts/generate_rust_analyzer.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'scripts') diff --git a/scripts/generate_rust_analyzer.py b/scripts/generate_rust_analyzer.py index 5b6f7b8d6918..147d0cc94068 100755 --- a/scripts/generate_rust_analyzer.py +++ b/scripts/generate_rust_analyzer.py @@ -110,7 +110,7 @@ def generate_crates(srctree, objtree, sysroot_src, external_src, cfgs, core_edit append_crate( "macros", srctree / "rust" / "macros" / "lib.rs", - ["std", "proc_macro"], + ["std", "proc_macro", "proc_macro2", "quote", "syn"], is_proc_macro=True, ) -- cgit v1.2.3 From 2a9c8c0b59d366acabb8f891e84569376f3e2709 Mon Sep 17 00:00:00 2001 From: Sascha Hauer Date: Tue, 25 Nov 2025 14:18:20 +0100 Subject: kbuild: add target to build a cpio containing modules MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add a new package target to build a cpio archive containing the kernel modules. This is particularly useful to supplement an existing initramfs with the kernel modules so that the root filesystem can be started with all needed kernel modules without modifying it. Signed-off-by: Sascha Hauer Reviewed-by: Simon Glass Tested-by: Simon Glass Co-developed-by: Ahmad Fatoum Signed-off-by: Ahmad Fatoum Reviewed-by: Thomas Weißschuh Reviewed-by: Nathan Chancellor Tested-by: Nathan Chancellor Tested-by: Nicolas Schier Link: https://patch.msgid.link/20251125-cpio-modules-pkg-v2-2-aa8277d89682@pengutronix.de Signed-off-by: Nicolas Schier --- scripts/Makefile.package | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) (limited to 'scripts') diff --git a/scripts/Makefile.package b/scripts/Makefile.package index 74bcb9e7f7a4..83bfcf7cb09f 100644 --- a/scripts/Makefile.package +++ b/scripts/Makefile.package @@ -189,6 +189,25 @@ tar-pkg: linux-$(KERNELRELEASE)-$(ARCH).tar tar%-pkg: linux-$(KERNELRELEASE)-$(ARCH).tar.% FORCE @: +# modules-cpio-pkg - generate an initramfs with the modules +# --------------------------------------------------------------------------- + +.tmp_modules_cpio: FORCE + $(Q)$(MAKE) -f $(srctree)/Makefile + $(Q)rm -rf $@ + $(Q)$(MAKE) -f $(srctree)/Makefile INSTALL_MOD_PATH=$@ modules_install + +quiet_cmd_cpio = CPIO $@ + cmd_cpio = $(CONFIG_SHELL) $(srctree)/usr/gen_initramfs.sh -o $@ $< + +modules-$(KERNELRELEASE)-$(ARCH).cpio: .tmp_modules_cpio + $(Q)$(MAKE) $(build)=usr usr/gen_init_cpio + $(call cmd,cpio) + +PHONY += modules-cpio-pkg +modules-cpio-pkg: modules-$(KERNELRELEASE)-$(ARCH).cpio + @: + # perf-tar*-src-pkg - generate a source tarball with perf source # --------------------------------------------------------------------------- @@ -245,6 +264,7 @@ help: @echo ' tarbz2-pkg - Build the kernel as a bzip2 compressed tarball' @echo ' tarxz-pkg - Build the kernel as a xz compressed tarball' @echo ' tarzst-pkg - Build the kernel as a zstd compressed tarball' + @echo ' modules-cpio-pkg - Build the kernel modules as cpio archive' @echo ' perf-tar-src-pkg - Build the perf source tarball with no compression' @echo ' perf-targz-src-pkg - Build the perf source tarball with gzip compression' @echo ' perf-tarbz2-src-pkg - Build the perf source tarball with bz2 compression' -- cgit v1.2.3 From f387d0e1027f2d13cbfc1305b54198af701ede19 Mon Sep 17 00:00:00 2001 From: Josh Poimboeuf Date: Tue, 2 Dec 2025 09:59:37 -0800 Subject: x86/asm: Remove ANNOTATE_DATA_SPECIAL usage Instead of manually annotating each __ex_table entry, just make the section mergeable and store the entry size in the ELF section header. Either way works for objtool create_fake_symbols(), this way produces cleaner code generation. Signed-off-by: Josh Poimboeuf Signed-off-by: Ingo Molnar Cc: Linus Torvalds Link: https://patch.msgid.link/b858cb7891c1ba0080e22a9c32595e6c302435e2.1764694625.git.jpoimboe@kernel.org --- scripts/mod/devicetable-offsets.c | 1 + 1 file changed, 1 insertion(+) (limited to 'scripts') diff --git a/scripts/mod/devicetable-offsets.c b/scripts/mod/devicetable-offsets.c index d3d00e85edf7..ef2ffb68f69d 100644 --- a/scripts/mod/devicetable-offsets.c +++ b/scripts/mod/devicetable-offsets.c @@ -1,4 +1,5 @@ // SPDX-License-Identifier: GPL-2.0 +#define COMPILE_OFFSETS #include #include -- cgit v1.2.3 From 316f0b43fe0131af869a5a58e20ec6e0b6038fa8 Mon Sep 17 00:00:00 2001 From: Brian Norris Date: Tue, 2 Dec 2025 11:30:26 -0800 Subject: coccinelle: Drop pm_runtime_barrier() error code checks This function doesn't return anything any more, so the compiler would notice any bad error handling before any cocci script would. Signed-off-by: Brian Norris Tested-by: Guenter Roeck Link: https://patch.msgid.link/20251202193129.1411419-3-briannorris@chromium.org Signed-off-by: Rafael J. Wysocki --- scripts/coccinelle/api/pm_runtime.cocci | 1 - 1 file changed, 1 deletion(-) (limited to 'scripts') diff --git a/scripts/coccinelle/api/pm_runtime.cocci b/scripts/coccinelle/api/pm_runtime.cocci index 2c931e748dda..bf128ccae921 100644 --- a/scripts/coccinelle/api/pm_runtime.cocci +++ b/scripts/coccinelle/api/pm_runtime.cocci @@ -37,7 +37,6 @@ ret@p = \(pm_runtime_idle\| pm_runtime_put_sync_autosuspend\| pm_runtime_set_active\| pm_schedule_suspend\| - pm_runtime_barrier\| pm_generic_runtime_suspend\| pm_generic_runtime_resume\)(...); ... -- cgit v1.2.3 From 7a7e836684feb33d4f5418e8bd44101faf6b3f44 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Tue, 9 Dec 2025 20:40:23 -0500 Subject: tracing: Fix unused tracepoints when module uses only exported ones Building the KVM intel module failed to build with UT=1: no __tracepoint_strings in file: arch/x86/kvm/kvm-intel.o make[3]: *** [/work/git/test-linux.git/scripts/Makefile.modfinal:62: arch/x86/kvm/kvm-intel.ko] Error 1 The reason is that the module only uses the tracepoints defined and exported by the main kvm module. The tracepoint-update.c code fails the build if a tracepoint is used, but there's no tracepoints defined. But this is acceptable in modules if the tracepoints are defined in the vmlinux proper or another module and exported. Do not fail to build if a tracepoint is used but no tracepoints are defined if the code is a module. This should still never happen for the vmlinux itself. Cc: Masami Hiramatsu Cc: Mathieu Desnoyers Cc: Masahiro Yamada Link: https://patch.msgid.link/20251209204023.76941824@fedora Fixes: e30f8e61e2518 ("tracing: Add a tracepoint verification check at build time") Signed-off-by: Steven Rostedt (Google) --- scripts/tracepoint-update.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'scripts') diff --git a/scripts/tracepoint-update.c b/scripts/tracepoint-update.c index 7f7d90df14ce..90046aedc97b 100644 --- a/scripts/tracepoint-update.c +++ b/scripts/tracepoint-update.c @@ -210,6 +210,9 @@ static int process_tracepoints(bool mod, void *addr, const char *fname) } if (!tracepoint_data_sec) { + /* A module may reference only exported tracepoints */ + if (mod) + return 0; fprintf(stderr, "no __tracepoint_strings in file: %s\n", fname); return -1; } -- cgit v1.2.3 From 01da5216c572f6f8fca4e272451aad6c273b0d57 Mon Sep 17 00:00:00 2001 From: Ally Heev Date: Wed, 3 Dec 2025 20:58:49 +0530 Subject: checkpatch: add uninitialized pointer with __free attribute check Uinitialized pointers with __free attribute can cause undefined behavior as the memory randomly assigned to the pointer is freed automatically when the pointer goes out of scope. add check in checkpatch to detect such issues. Link: https://lkml.kernel.org/r/20251203-aheev-checkpatch-uninitialized-free-v7-1-841e3b31d8f3@gmail.com Signed-off-by: Ally Heev Suggested-by: Dan Carpenter Link: https://lore.kernel.org/all/8a4c0b43-cf63-400d-b33d-d9c447b7e0b9@suswa.mountain/ Link: https://lore.kernel.org/all/58fd478f408a34b578ee8d949c5c4b4da4d4f41d.camel@HansenPartnership.com/ Acked-by: Dan Williams Reviewed-by: Krzysztof Kozlowski Acked-by: Joe Perches Cc: Andy Whitcroft Cc: David Hunter Cc: Dwaipayan Ray Cc: Geert Uytterhoeven Cc: James Bottomley Cc: Jonathan Corbet Cc: Lukas Bulwahn Cc: Menon, Nishanth Cc: Stephen Boyd Cc: Viresh Kumar Signed-off-by: Andrew Morton --- scripts/checkpatch.pl | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'scripts') diff --git a/scripts/checkpatch.pl b/scripts/checkpatch.pl index d58ca9655ab7..c0250244cf7a 100755 --- a/scripts/checkpatch.pl +++ b/scripts/checkpatch.pl @@ -7732,6 +7732,12 @@ sub process { ERROR("MISSING_SENTINEL", "missing sentinel in ID array\n" . "$here\n$stat\n"); } } + +# check for uninitialized pointers with __free attribute + while ($line =~ /\*\s*($Ident)\s+__free\s*\(\s*$Ident\s*\)\s*[,;]/g) { + ERROR("UNINITIALIZED_PTR_WITH_FREE", + "pointer '$1' with __free attribute should be initialized\n" . $herecurr); + } } # If we have no input at all, then there is nothing to report on -- cgit v1.2.3