From 149d623fbefe67b4c3cfbaae3246f559478aff4c Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Mon, 16 May 2022 12:27:22 +0200 Subject: scripts/spdxcheck: Add percentage to statistics Files checked: 75856 Lines checked: 294516 Files with SPDX: 59410 78% Files with errors: 0 Signed-off-by: Thomas Gleixner Signed-off-by: Greg Kroah-Hartman --- scripts/spdxcheck.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'scripts') diff --git a/scripts/spdxcheck.py b/scripts/spdxcheck.py index f3be8ed54f6d..c6ff37e935f5 100755 --- a/scripts/spdxcheck.py +++ b/scripts/spdxcheck.py @@ -285,7 +285,9 @@ if __name__ == '__main__': sys.stderr.write('\n') sys.stderr.write('Files checked: %12d\n' %parser.checked) sys.stderr.write('Lines checked: %12d\n' %parser.lines_checked) - sys.stderr.write('Files with SPDX: %12d\n' %parser.spdx_valid) + if parser.checked: + pc = int(100 * parser.spdx_valid / parser.checked) + sys.stderr.write('Files with SPDX: %12d %3d%%\n' %(parser.spdx_valid, pc)) sys.stderr.write('Files with errors: %12d\n' %parser.spdx_errors) sys.exit(0) -- cgit v1.2.3 From a377ce75e4916da5dbb84672218a7e61e51da3ce Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Mon, 16 May 2022 12:27:24 +0200 Subject: scripts/spdxcheck: Add directory statistics For better insights. Directories accounted: 4646 Directories complete: 2565 55% Signed-off-by: Thomas Gleixner Signed-off-by: Greg Kroah-Hartman --- scripts/spdxcheck.py | 27 +++++++++++++++++++++++++++ 1 file changed, 27 insertions(+) (limited to 'scripts') diff --git a/scripts/spdxcheck.py b/scripts/spdxcheck.py index c6ff37e935f5..80fade8629e7 100755 --- a/scripts/spdxcheck.py +++ b/scripts/spdxcheck.py @@ -28,6 +28,15 @@ class SPDXdata(object): self.licenses = [ ] self.exceptions = { } +class dirinfo(object): + def __init__(self): + self.missing = 0 + self.total = 0 + + def update(self, miss): + self.total += 1 + self.missing += miss + # Read the spdx data from the LICENSES directory def read_spdxdata(repo): @@ -93,6 +102,7 @@ class id_parser(object): self.checked = 0 self.spdx_valid = 0 self.spdx_errors = 0 + self.spdx_dirs = {} self.curline = 0 self.deepest = 0 @@ -167,6 +177,7 @@ class id_parser(object): def parse_lines(self, fd, maxlines, fname): self.checked += 1 self.curline = 0 + fail = 1 try: for line in fd: line = line.decode(locale.getpreferredencoding(False), errors='ignore') @@ -192,6 +203,7 @@ class id_parser(object): # Should we check for more SPDX ids in the same file and # complain if there are any? # + fail = 0 break except ParserException as pe: @@ -203,6 +215,11 @@ class id_parser(object): sys.stdout.write('%s: %d:0 %s\n' %(fname, self.curline, pe.txt)) self.spdx_errors += 1 + base = os.path.dirname(fname) + di = self.spdx_dirs.get(base, dirinfo()) + di.update(fail) + self.spdx_dirs[base] = di + def scan_git_tree(tree): for el in tree.traverse(): # Exclude stuff which would make pointless noise @@ -289,6 +306,16 @@ if __name__ == '__main__': pc = int(100 * parser.spdx_valid / parser.checked) sys.stderr.write('Files with SPDX: %12d %3d%%\n' %(parser.spdx_valid, pc)) sys.stderr.write('Files with errors: %12d\n' %parser.spdx_errors) + ndirs = len(parser.spdx_dirs) + dirsok = 0 + if ndirs: + sys.stderr.write('\n') + sys.stderr.write('Directories accounted: %8d\n' %ndirs) + for di in parser.spdx_dirs.values(): + if not di.missing: + dirsok += 1 + pc = int(100 * dirsok / ndirs) + sys.stderr.write('Directories complete: %8d %3d%%\n' %(dirsok, pc)) sys.exit(0) -- cgit v1.2.3 From 0e7f030687efb7a6f8dddd0e967ca4377aee3001 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Mon, 16 May 2022 12:27:26 +0200 Subject: scripts/spdxcheck: Add [sub]directory statistics Add functionality to display [sub]directory statistics. This is enabled by adding '-d' to the command line. The optional -D parameter allows to limit the directory depth. If supplied the subdirectories are accumulated # scripts/spdxcheck.py -d kernel/ Incomplete directories: SPDX in Files ./kernel : 111 of 114 97% ./kernel/bpf : 43 of 45 95% ./kernel/bpf/preload : 4 of 5 80% ./kernel/bpf/preload/iterators : 4 of 5 80% ./kernel/cgroup : 10 of 13 76% ./kernel/configs : 0 of 9 0% ./kernel/debug : 3 of 4 75% ./kernel/debug/kdb : 1 of 11 9% ./kernel/locking : 29 of 32 90% ./kernel/sched : 38 of 39 97% The result can be accumulated by restricting the depth via the new command line option '-d $DEPTH': # scripts/spdxcheck.py -d -D1 Incomplete directories: SPDX in Files ./ : 6 of 13 46% ./Documentation : 4096 of 8451 48% ./arch : 13476 of 16402 82% ./block : 100 of 101 99% ./certs : 11 of 14 78% ./crypto : 145 of 176 82% ./drivers : 24682 of 30745 80% ./fs : 1876 of 2110 88% ./include : 5175 of 5757 89% ./ipc : 12 of 13 92% ./kernel : 493 of 527 93% ./lib : 393 of 524 75% ./mm : 151 of 159 94% ./net : 1713 of 1900 90% ./samples : 211 of 273 77% ./scripts : 341 of 435 78% ./security : 241 of 250 96% ./sound : 2438 of 2503 97% ./tools : 3810 of 5462 69% ./usr : 9 of 10 90% Signed-off-by: Thomas Gleixner Signed-off-by: Greg Kroah-Hartman --- scripts/spdxcheck.py | 67 ++++++++++++++++++++++++++++++++++++++++++++-------- 1 file changed, 57 insertions(+), 10 deletions(-) (limited to 'scripts') diff --git a/scripts/spdxcheck.py b/scripts/spdxcheck.py index 80fade8629e7..dc605d485dca 100755 --- a/scripts/spdxcheck.py +++ b/scripts/spdxcheck.py @@ -103,9 +103,21 @@ class id_parser(object): self.spdx_valid = 0 self.spdx_errors = 0 self.spdx_dirs = {} + self.dirdepth = -1 + self.basedir = '.' self.curline = 0 self.deepest = 0 + def set_dirinfo(self, basedir, dirdepth): + if dirdepth >= 0: + self.basedir = basedir + bdir = basedir.lstrip('./').rstrip('/') + if bdir != '': + parts = bdir.split('/') + else: + parts = [] + self.dirdepth = dirdepth + len(parts) + # Validate License and Exception IDs def validate(self, tok): id = tok.value.upper() @@ -215,12 +227,29 @@ class id_parser(object): sys.stdout.write('%s: %d:0 %s\n' %(fname, self.curline, pe.txt)) self.spdx_errors += 1 + if fname == '-': + return + base = os.path.dirname(fname) + if self.dirdepth > 0: + parts = base.split('/') + i = 0 + base = '.' + while i < self.dirdepth and i < len(parts) and len(parts[i]): + base += '/' + parts[i] + i += 1 + elif self.dirdepth == 0: + base = self.basedir + else: + base = './' + base.rstrip('/') + base += '/' + di = self.spdx_dirs.get(base, dirinfo()) di.update(fail) self.spdx_dirs[base] = di -def scan_git_tree(tree): +def scan_git_tree(tree, basedir, dirdepth): + parser.set_dirinfo(basedir, dirdepth) for el in tree.traverse(): # Exclude stuff which would make pointless noise # FIXME: Put this somewhere more sensible @@ -233,15 +262,19 @@ def scan_git_tree(tree): with open(el.path, 'rb') as fd: parser.parse_lines(fd, args.maxlines, el.path) -def scan_git_subtree(tree, path): +def scan_git_subtree(tree, path, dirdepth): for p in path.strip('/').split('/'): tree = tree[p] - scan_git_tree(tree) + scan_git_tree(tree, path.strip('/'), dirdepth) if __name__ == '__main__': ap = ArgumentParser(description='SPDX expression checker') ap.add_argument('path', nargs='*', help='Check path or file. If not given full git tree scan. For stdin use "-"') + ap.add_argument('-d', '--dirs', action='store_true', + help='Show [sub]directory statistics.') + ap.add_argument('-D', '--depth', type=int, default=-1, + help='Directory depth for -d statistics. Default: unlimited') ap.add_argument('-m', '--maxlines', type=int, default=15, help='Maximum number of lines to scan in a file. Default 15') ap.add_argument('-v', '--verbose', action='store_true', help='Verbose statistics output') @@ -285,13 +318,21 @@ if __name__ == '__main__': if os.path.isfile(p): parser.parse_lines(open(p, 'rb'), args.maxlines, p) elif os.path.isdir(p): - scan_git_subtree(repo.head.reference.commit.tree, p) + scan_git_subtree(repo.head.reference.commit.tree, p, + args.depth) else: sys.stderr.write('path %s does not exist\n' %p) sys.exit(1) else: # Full git tree scan - scan_git_tree(repo.head.commit.tree) + scan_git_tree(repo.head.commit.tree, '.', args.depth) + + ndirs = len(parser.spdx_dirs) + dirsok = 0 + if ndirs: + for di in parser.spdx_dirs.values(): + if not di.missing: + dirsok += 1 if args.verbose: sys.stderr.write('\n') @@ -306,17 +347,23 @@ if __name__ == '__main__': pc = int(100 * parser.spdx_valid / parser.checked) sys.stderr.write('Files with SPDX: %12d %3d%%\n' %(parser.spdx_valid, pc)) sys.stderr.write('Files with errors: %12d\n' %parser.spdx_errors) - ndirs = len(parser.spdx_dirs) - dirsok = 0 if ndirs: sys.stderr.write('\n') sys.stderr.write('Directories accounted: %8d\n' %ndirs) - for di in parser.spdx_dirs.values(): - if not di.missing: - dirsok += 1 pc = int(100 * dirsok / ndirs) sys.stderr.write('Directories complete: %8d %3d%%\n' %(dirsok, pc)) + if ndirs and ndirs != dirsok and args.dirs: + if args.verbose: + sys.stderr.write('\n') + sys.stderr.write('Incomplete directories: SPDX in Files\n') + for f in sorted(parser.spdx_dirs.keys()): + di = parser.spdx_dirs[f] + if di.missing: + valid = di.total - di.missing + pc = int(100 * valid / di.total) + sys.stderr.write(' %-80s: %5d of %5d %3d%%\n' %(f, valid, di.total, pc)) + sys.exit(0) except Exception as ex: -- cgit v1.2.3 From 67924b71412cd965e0d1c55c0cddb0014c8a725b Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Mon, 16 May 2022 12:27:27 +0200 Subject: scripts/spdxcheck: Add option to display files without SPDX Makes life easier when chasing the missing ones. Is activated with '-f' on the command line. # scripts/spdxcheck.py -f kernel/ Files without SPDX: ./kernel/cpu.c ./kernel/kmod.c ./kernel/relay.c ./kernel/bpf/offload.c ./kernel/bpf/preload/.gitignore ./kernel/bpf/preload/iterators/README ./kernel/bpf/ringbuf.c ./kernel/cgroup/cgroup.c ./kernel/cgroup/cpuset.c ./kernel/cgroup/legacy_freezer.c ./kernel/debug/debug_core.h ./kernel/debug/kdb/Makefile ./kernel/debug/kdb/kdb_bp.c ./kernel/debug/kdb/kdb_bt.c ./kernel/debug/kdb/kdb_cmds ./kernel/debug/kdb/kdb_debugger.c ./kernel/debug/kdb/kdb_io.c ./kernel/debug/kdb/kdb_keyboard.c ./kernel/debug/kdb/kdb_main.c ./kernel/debug/kdb/kdb_private.h ./kernel/debug/kdb/kdb_support.c ./kernel/locking/lockdep_states.h ./kernel/locking/mutex-debug.c ./kernel/locking/spinlock_debug.c ./kernel/sched/pelt.h With the optional -D parameter the directory depth can be limited: # scripts/spdxcheck.py -f -D 0 kernel/ Files without SPDX: ./kernel/cpu.c ./kernel/kmod.c ./kernel/relay.c Signed-off-by: Thomas Gleixner Signed-off-by: Greg Kroah-Hartman --- scripts/spdxcheck.py | 21 +++++++++++++++++++-- 1 file changed, 19 insertions(+), 2 deletions(-) (limited to 'scripts') diff --git a/scripts/spdxcheck.py b/scripts/spdxcheck.py index dc605d485dca..b9e0f1725a0a 100755 --- a/scripts/spdxcheck.py +++ b/scripts/spdxcheck.py @@ -32,10 +32,16 @@ class dirinfo(object): def __init__(self): self.missing = 0 self.total = 0 + self.files = [] - def update(self, miss): + def update(self, fname, basedir, miss): self.total += 1 self.missing += miss + if miss: + fname = './' + fname + bdir = os.path.dirname(fname) + if bdir == basedir.rstrip('/'): + self.files.append(fname) # Read the spdx data from the LICENSES directory def read_spdxdata(repo): @@ -245,7 +251,7 @@ class id_parser(object): base += '/' di = self.spdx_dirs.get(base, dirinfo()) - di.update(fail) + di.update(fname, base, fail) self.spdx_dirs[base] = di def scan_git_tree(tree, basedir, dirdepth): @@ -275,6 +281,8 @@ if __name__ == '__main__': help='Show [sub]directory statistics.') ap.add_argument('-D', '--depth', type=int, default=-1, help='Directory depth for -d statistics. Default: unlimited') + ap.add_argument('-f', '--files', action='store_true', + help='Show files without SPDX.') ap.add_argument('-m', '--maxlines', type=int, default=15, help='Maximum number of lines to scan in a file. Default 15') ap.add_argument('-v', '--verbose', action='store_true', help='Verbose statistics output') @@ -364,6 +372,15 @@ if __name__ == '__main__': pc = int(100 * valid / di.total) sys.stderr.write(' %-80s: %5d of %5d %3d%%\n' %(f, valid, di.total, pc)) + if ndirs and ndirs != dirsok and args.files: + if args.verbose or args.dirs: + sys.stderr.write('\n') + sys.stderr.write('Files without SPDX:\n') + for f in sorted(parser.spdx_dirs.keys()): + di = parser.spdx_dirs[f] + for f in sorted(di.files): + sys.stderr.write(' %s\n' %f) + sys.exit(0) except Exception as ex: -- cgit v1.2.3 From 0509b270a358fa563946368418f8e832d9b63452 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Mon, 16 May 2022 12:27:29 +0200 Subject: scripts/spdxcheck: Put excluded files and directories into a separate file The files and directories which are excluded from scanning are currently hard coded in the script. That's not maintainable and not accessible for external tools. Move the files and directories which should be excluded into a file. The default file is scripts/spdxexclude. This can be overridden with the '-e $FILE' command line option. The file format and syntax is similar to the .gitignore file. Signed-off-by: Thomas Gleixner Signed-off-by: Greg Kroah-Hartman --- scripts/spdxcheck.py | 70 +++++++++++++++++++++++++++++++++++++++++++++++----- scripts/spdxexclude | 8 ++++++ 2 files changed, 72 insertions(+), 6 deletions(-) create mode 100644 scripts/spdxexclude (limited to 'scripts') diff --git a/scripts/spdxcheck.py b/scripts/spdxcheck.py index b9e0f1725a0a..18cb9f5b3d3d 100755 --- a/scripts/spdxcheck.py +++ b/scripts/spdxcheck.py @@ -6,6 +6,7 @@ from argparse import ArgumentParser from ply import lex, yacc import locale import traceback +import fnmatch import sys import git import re @@ -106,6 +107,7 @@ class id_parser(object): self.parser = yacc.yacc(module = self, write_tables = False, debug = False) self.lines_checked = 0 self.checked = 0 + self.excluded = 0 self.spdx_valid = 0 self.spdx_errors = 0 self.spdx_dirs = {} @@ -254,17 +256,47 @@ class id_parser(object): di.update(fname, base, fail) self.spdx_dirs[base] = di +class pattern(object): + def __init__(self, line): + self.pattern = line + self.match = self.match_file + if line == '.*': + self.match = self.match_dot + elif line.endswith('/'): + self.pattern = line[:-1] + self.match = self.match_dir + elif line.startswith('/'): + self.pattern = line[1:] + self.match = self.match_fn + + def match_dot(self, fpath): + return os.path.basename(fpath).startswith('.') + + def match_file(self, fpath): + return os.path.basename(fpath) == self.pattern + + def match_fn(self, fpath): + return fnmatch.fnmatchcase(fpath, self.pattern) + + def match_dir(self, fpath): + if self.match_fn(os.path.dirname(fpath)): + return True + return fpath.startswith(self.pattern) + +def exclude_file(fpath): + for rule in exclude_rules: + if rule.match(fpath): + return True + return False + def scan_git_tree(tree, basedir, dirdepth): parser.set_dirinfo(basedir, dirdepth) for el in tree.traverse(): - # Exclude stuff which would make pointless noise - # FIXME: Put this somewhere more sensible - if el.path.startswith("LICENSES"): - continue - if el.path.find("license-rules.rst") >= 0: - continue if not os.path.isfile(el.path): continue + if exclude_file(el.path): + parser.excluded += 1 + continue with open(el.path, 'rb') as fd: parser.parse_lines(fd, args.maxlines, el.path) @@ -273,6 +305,20 @@ def scan_git_subtree(tree, path, dirdepth): tree = tree[p] scan_git_tree(tree, path.strip('/'), dirdepth) +def read_exclude_file(fname): + rules = [] + if not fname: + return rules + with open(fname) as fd: + for line in fd: + line = line.strip() + if line.startswith('#'): + continue + if not len(line): + continue + rules.append(pattern(line)) + return rules + if __name__ == '__main__': ap = ArgumentParser(description='SPDX expression checker') @@ -281,6 +327,8 @@ if __name__ == '__main__': help='Show [sub]directory statistics.') ap.add_argument('-D', '--depth', type=int, default=-1, help='Directory depth for -d statistics. Default: unlimited') + ap.add_argument('-e', '--exclude', + help='File containing file patterns to exclude. Default: scripts/spdxexclude') ap.add_argument('-f', '--files', action='store_true', help='Show files without SPDX.') ap.add_argument('-m', '--maxlines', type=int, default=15, @@ -316,6 +364,15 @@ if __name__ == '__main__': sys.stderr.write('%s\n' %traceback.format_exc()) sys.exit(1) + try: + fname = args.exclude + if not fname: + fname = os.path.join(os.path.dirname(__file__), 'spdxexclude') + exclude_rules = read_exclude_file(fname) + except Exception as ex: + sys.stderr.write('FAIL: Reading exclude file %s: %s\n' %(fname, ex)) + sys.exit(1) + try: if len(args.path) and args.path[0] == '-': stdin = os.fdopen(sys.stdin.fileno(), 'rb') @@ -349,6 +406,7 @@ if __name__ == '__main__': sys.stderr.write('License IDs %12d\n' %len(spdx.licenses)) sys.stderr.write('Exception IDs %12d\n' %len(spdx.exceptions)) sys.stderr.write('\n') + sys.stderr.write('Files excluded: %12d\n' %parser.excluded) sys.stderr.write('Files checked: %12d\n' %parser.checked) sys.stderr.write('Lines checked: %12d\n' %parser.lines_checked) if parser.checked: diff --git a/scripts/spdxexclude b/scripts/spdxexclude new file mode 100644 index 000000000000..5b30fe246197 --- /dev/null +++ b/scripts/spdxexclude @@ -0,0 +1,8 @@ +# SPDX-License-Identifier: GPL-2.0 +# +# Patterns for excluding files and directories + +# Ignore the license directory and the licensing documentation which would +# create lots of noise for no value +LICENSES/ +license-rules.rst -- cgit v1.2.3 From 2fb977133684bb74d301bd86a9bb1bd2762362fc Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Mon, 16 May 2022 12:27:30 +0200 Subject: scripts/spdxcheck: Exclude config directories Kernel configuration files like default configs are machine generated and pretty useless outside of the kernel context. Signed-off-by: Thomas Gleixner Signed-off-by: Greg Kroah-Hartman --- scripts/spdxexclude | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'scripts') diff --git a/scripts/spdxexclude b/scripts/spdxexclude index 5b30fe246197..4a7fb16dc823 100644 --- a/scripts/spdxexclude +++ b/scripts/spdxexclude @@ -6,3 +6,8 @@ # create lots of noise for no value LICENSES/ license-rules.rst + +# Ignore config files and snippets. The majority is generated +# by the Kconfig tools +kernel/configs/ +arch/*/configs/ -- cgit v1.2.3 From 2ab99ce9780d3c3505db4b83669869627010307e Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Mon, 16 May 2022 12:27:32 +0200 Subject: scripts/spdxcheck: Exclude MAINTAINERS/CREDITS Listings of maintainers and people who deserve credits are not really interesting in terms of copyright. The usage of these files outside of the kernel is pointless and the file format is trivial. No point in chasing them or slapping a SPDX identifier into them just because. Signed-off-by: Thomas Gleixner Signed-off-by: Greg Kroah-Hartman --- scripts/spdxexclude | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'scripts') diff --git a/scripts/spdxexclude b/scripts/spdxexclude index 4a7fb16dc823..73ef8caf5e61 100644 --- a/scripts/spdxexclude +++ b/scripts/spdxexclude @@ -11,3 +11,7 @@ license-rules.rst # by the Kconfig tools kernel/configs/ arch/*/configs/ + +# Other files without copyrightable content +/CREDITS +/MAINTAINERS -- cgit v1.2.3 From e0208351383c19e62f5f04209ce4ecf24db64eaf Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Mon, 16 May 2022 12:27:35 +0200 Subject: scripts/spdxcheck: Exclude top-level README Nothing copyrightable to see here. Signed-off-by: Thomas Gleixner Signed-off-by: Greg Kroah-Hartman --- scripts/spdxexclude | 1 + 1 file changed, 1 insertion(+) (limited to 'scripts') diff --git a/scripts/spdxexclude b/scripts/spdxexclude index 73ef8caf5e61..81bdb13ed789 100644 --- a/scripts/spdxexclude +++ b/scripts/spdxexclude @@ -15,3 +15,4 @@ arch/*/configs/ # Other files without copyrightable content /CREDITS /MAINTAINERS +/README -- cgit v1.2.3