1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
|
"""Helper functions to parse C code in heavily constrained scenarios.
Currently supported functionality:
* read_function_declarations: read function declarations from a header file.
"""
# Copyright The Mbed TLS Contributors
# SPDX-License-Identifier: Apache-2.0 OR GPL-2.0-or-later
### WARNING: the code in this file has not been extensively reviewed yet.
### We do not think it is harmful, but it may be below our normal standards
### for robustness and maintainability.
import re
from typing import Dict, Iterable, Iterator, List, Optional, Tuple
class ArgumentInfo:
"""Information about an argument to an API function."""
#pylint: disable=too-few-public-methods
_KEYWORDS = [
'const', 'register', 'restrict',
'int', 'long', 'short', 'signed', 'unsigned',
]
_DECLARATION_RE = re.compile(
r'(?P<type>\w[\w\s*]*?)\s*' +
r'(?!(?:' + r'|'.join(_KEYWORDS) + r'))(?P<name>\b\w+\b)?' +
r'\s*(?P<suffix>\[[^][]*\])?\Z',
re.A | re.S)
@classmethod
def normalize_type(cls, typ: str) -> str:
"""Normalize whitespace in a type."""
typ = re.sub(r'\s+', r' ', typ)
typ = re.sub(r'\s*\*', r' *', typ)
return typ
def __init__(self, decl: str) -> None:
self.decl = decl.strip()
m = self._DECLARATION_RE.match(self.decl)
if not m:
raise ValueError(self.decl)
self.type = self.normalize_type(m.group('type')) #type: str
self.name = m.group('name') #type: Optional[str]
self.suffix = m.group('suffix') if m.group('suffix') else '' #type: str
class FunctionInfo:
"""Information about an API function."""
#pylint: disable=too-few-public-methods
# Regex matching the declaration of a function that returns void.
VOID_RE = re.compile(r'\s*\bvoid\s*\Z', re.A)
def __init__(self, #pylint: disable=too-many-arguments
filename: str,
line_number: int,
qualifiers: Iterable[str],
return_type: str,
name: str,
arguments: List[str]) -> None:
self.filename = filename
self.line_number = line_number
self.qualifiers = frozenset(qualifiers)
self.return_type = return_type
self.name = name
self.arguments = [ArgumentInfo(arg) for arg in arguments]
def returns_void(self) -> bool:
"""Whether the function returns void."""
return bool(self.VOID_RE.search(self.return_type))
# Match one C comment.
# Note that we match both comment types, so things like // in a /*...*/
# comment are handled correctly.
_C_COMMENT_RE = re.compile(r'//(?:[^\n]|\\\n)*|/\*.*?\*/', re.S)
_NOT_NEWLINES_RE = re.compile(r'[^\n]+')
def read_logical_lines(filename: str) -> Iterator[Tuple[int, str]]:
"""Read logical lines from a file.
Logical lines are one or more physical line, with balanced parentheses.
"""
with open(filename, encoding='utf-8') as inp:
content = inp.read()
# Strip comments, but keep newlines for line numbering
content = re.sub(_C_COMMENT_RE,
lambda m: re.sub(_NOT_NEWLINES_RE, "", m.group(0)),
content)
lines = enumerate(content.splitlines(), 1)
for line_number, line in lines:
# Read a logical line, containing balanced parentheses.
# We assume that parentheses are balanced (this should be ok
# since comments have been stripped), otherwise there will be
# a gigantic logical line at the end.
paren_level = line.count('(') - line.count(')')
while paren_level > 0:
_, more = next(lines) #pylint: disable=stop-iteration-return
paren_level += more.count('(') - more.count(')')
line += '\n' + more
yield line_number, line
_C_FUNCTION_DECLARATION_RE = re.compile(
r'(?P<qualifiers>(?:(?:extern|inline|static)\b\s*)*)'
r'(?P<return_type>\w[\w\s*]*?)\s*' +
r'\b(?P<name>\w+)' +
r'\s*\((?P<arguments>.*)\)\s*;',
re.A | re.S)
def read_function_declarations(functions: Dict[str, FunctionInfo],
filename: str) -> None:
"""Collect function declarations from a C header file."""
for line_number, line in read_logical_lines(filename):
m = _C_FUNCTION_DECLARATION_RE.match(line)
if not m:
continue
qualifiers = m.group('qualifiers').split()
return_type = m.group('return_type')
name = m.group('name')
arguments = m.group('arguments').split(',')
if len(arguments) == 1 and re.match(FunctionInfo.VOID_RE, arguments[0]):
arguments = []
# Note: we replace any existing declaration for the same name.
functions[name] = FunctionInfo(filename, line_number,
qualifiers,
return_type,
name,
arguments)
|