diff options
Diffstat (limited to 'tools/binman/elf.py')
-rw-r--r-- | tools/binman/elf.py | 577 |
1 files changed, 577 insertions, 0 deletions
diff --git a/tools/binman/elf.py b/tools/binman/elf.py new file mode 100644 index 00000000000..6ac960e0419 --- /dev/null +++ b/tools/binman/elf.py @@ -0,0 +1,577 @@ +# SPDX-License-Identifier: GPL-2.0+ +# Copyright (c) 2016 Google, Inc +# Written by Simon Glass <sjg@chromium.org> +# +# Handle various things related to ELF images +# + +from collections import namedtuple, OrderedDict +import io +import os +import re +import shutil +import struct +import tempfile + +from u_boot_pylib import command +from u_boot_pylib import tools +from u_boot_pylib import tout + +ELF_TOOLS = True +try: + from elftools.elf.elffile import ELFFile + from elftools.elf.elffile import ELFError + from elftools.elf.sections import SymbolTableSection +except: # pragma: no cover + ELF_TOOLS = False + +# BSYM in little endian, keep in sync with include/binman_sym.h +BINMAN_SYM_MAGIC_VALUE = 0x4d595342 + +# Information about an ELF symbol: +# section (str): Name of the section containing this symbol +# address (int): Address of the symbol (its value) +# size (int): Size of the symbol in bytes +# weak (bool): True if the symbol is weak +# offset (int or None): Offset of the symbol's data in the ELF file, or None if +# not known +Symbol = namedtuple('Symbol', ['section', 'address', 'size', 'weak', 'offset']) + +# Information about an ELF file: +# data: Extracted program contents of ELF file (this would be loaded by an +# ELF loader when reading this file +# load: Load address of code +# entry: Entry address of code +# memsize: Number of bytes in memory occupied by loading this ELF file +ElfInfo = namedtuple('ElfInfo', ['data', 'load', 'entry', 'memsize']) + + +def GetSymbols(fname, patterns): + """Get the symbols from an ELF file + + Args: + fname: Filename of the ELF file to read + patterns: List of regex patterns to search for, each a string + + Returns: + None, if the file does not exist, or Dict: + key: Name of symbol + value: Hex value of symbol + """ + stdout = tools.run('objdump', '-t', fname) + lines = stdout.splitlines() + if patterns: + re_syms = re.compile('|'.join(patterns)) + else: + re_syms = None + syms = {} + syms_started = False + for line in lines: + if not line or not syms_started: + if 'SYMBOL TABLE' in line: + syms_started = True + line = None # Otherwise code coverage complains about 'continue' + continue + if re_syms and not re_syms.search(line): + continue + + space_pos = line.find(' ') + value, rest = line[:space_pos], line[space_pos + 1:] + flags = rest[:7] + parts = rest[7:].split() + section, size = parts[:2] + if len(parts) > 2: + name = parts[2] if parts[2] != '.hidden' else parts[3] + syms[name] = Symbol(section, int(value, 16), int(size, 16), + flags[1] == 'w', None) + + # Sort dict by address + return OrderedDict(sorted(syms.items(), key=lambda x: x[1].address)) + +def _GetFileOffset(elf, addr): + """Get the file offset for an address + + Args: + elf (ELFFile): ELF file to check + addr (int): Address to search for + + Returns + int: Offset of that address in the ELF file, or None if not valid + """ + for seg in elf.iter_segments(): + seg_end = seg['p_vaddr'] + seg['p_filesz'] + if seg.header['p_type'] == 'PT_LOAD': + if addr >= seg['p_vaddr'] and addr < seg_end: + return addr - seg['p_vaddr'] + seg['p_offset'] + +def GetFileOffset(fname, addr): + """Get the file offset for an address + + Args: + fname (str): Filename of ELF file to check + addr (int): Address to search for + + Returns + int: Offset of that address in the ELF file, or None if not valid + """ + if not ELF_TOOLS: + raise ValueError("Python: No module named 'elftools'") + with open(fname, 'rb') as fd: + elf = ELFFile(fd) + return _GetFileOffset(elf, addr) + +def GetSymbolFromAddress(fname, addr): + """Get the symbol at a particular address + + Args: + fname (str): Filename of ELF file to check + addr (int): Address to search for + + Returns: + str: Symbol name, or None if no symbol at that address + """ + if not ELF_TOOLS: + raise ValueError("Python: No module named 'elftools'") + with open(fname, 'rb') as fd: + elf = ELFFile(fd) + syms = GetSymbols(fname, None) + for name, sym in syms.items(): + if sym.address == addr: + return name + +def GetSymbolFileOffset(fname, patterns): + """Get the symbols from an ELF file + + Args: + fname: Filename of the ELF file to read + patterns: List of regex patterns to search for, each a string + + Returns: + None, if the file does not exist, or Dict: + key: Name of symbol + value: Hex value of symbol + """ + if not ELF_TOOLS: + raise ValueError("Python: No module named 'elftools'") + + syms = {} + with open(fname, 'rb') as fd: + elf = ELFFile(fd) + + re_syms = re.compile('|'.join(patterns)) + for section in elf.iter_sections(): + if isinstance(section, SymbolTableSection): + for symbol in section.iter_symbols(): + if not re_syms or re_syms.search(symbol.name): + addr = symbol.entry['st_value'] + syms[symbol.name] = Symbol( + section.name, addr, symbol.entry['st_size'], + symbol.entry['st_info']['bind'] == 'STB_WEAK', + _GetFileOffset(elf, addr)) + + # Sort dict by address + return OrderedDict(sorted(syms.items(), key=lambda x: x[1].address)) + +def GetSymbolAddress(fname, sym_name): + """Get a value of a symbol from an ELF file + + Args: + fname: Filename of the ELF file to read + patterns: List of regex patterns to search for, each a string + + Returns: + Symbol value (as an integer) or None if not found + """ + syms = GetSymbols(fname, [sym_name]) + sym = syms.get(sym_name) + if not sym: + return None + return sym.address + +def GetPackString(sym, msg): + """Get the struct.pack/unpack string to use with a given symbol + + Args: + sym (Symbol): Symbol to check. Only the size member is checked + @msg (str): String which indicates the entry being processed, used for + errors + + Returns: + str: struct string to use, .e.g. '<I' + + Raises: + ValueError: Symbol has an unexpected size + """ + if sym.size == 4: + return '<I' + elif sym.size == 8: + return '<Q' + else: + raise ValueError('%s has size %d: only 4 and 8 are supported' % + (msg, sym.size)) + +def GetSymbolOffset(elf_fname, sym_name, base_sym=None): + """Read the offset of a symbol compared to base symbol + + This is useful for obtaining the value of a single symbol relative to the + base of a binary blob. + + Args: + elf_fname: Filename of the ELF file to read + sym_name (str): Name of symbol to read + base_sym (str): Base symbol to sue to calculate the offset (or None to + use '__image_copy_start' + + Returns: + int: Offset of the symbol relative to the base symbol + """ + if not base_sym: + base_sym = '__image_copy_start' + fname = tools.get_input_filename(elf_fname) + syms = GetSymbols(fname, [base_sym, sym_name]) + base = syms[base_sym].address + val = syms[sym_name].address + return val - base + +def LookupAndWriteSymbols(elf_fname, entry, section, is_elf=False, + base_sym=None, base_addr=None): + """Replace all symbols in an entry with their correct values + + The entry contents is updated so that values for referenced symbols will be + visible at run time. This is done by finding out the symbols offsets in the + entry (using the ELF file) and replacing them with values from binman's data + structures. + + Args: + elf_fname: Filename of ELF image containing the symbol information for + entry + entry: Entry to process + section: Section which can be used to lookup symbol values + base_sym: Base symbol marking the start of the image (__image_copy_start + by default) + base_addr (int): Base address to use for the entry being written. If + None then the value of base_sym is used + + Returns: + int: Number of symbols written + """ + if not base_sym: + base_sym = '__image_copy_start' + fname = tools.get_input_filename(elf_fname) + syms = GetSymbols(fname, ['image', 'binman']) + if is_elf: + if not ELF_TOOLS: + msg = ("Section '%s': entry '%s'" % + (section.GetPath(), entry.GetPath())) + raise ValueError(f'{msg}: Cannot write symbols to an ELF file without Python elftools') + new_syms = {} + with open(fname, 'rb') as fd: + elf = ELFFile(fd) + for name, sym in syms.items(): + offset = _GetFileOffset(elf, sym.address) + new_syms[name] = Symbol(sym.section, sym.address, sym.size, + sym.weak, offset) + syms = new_syms + + if not syms: + tout.debug('LookupAndWriteSymbols: no syms') + return 0 + base = syms.get(base_sym) + if not base and not is_elf: + tout.debug(f'LookupAndWriteSymbols: no base: elf_fname={elf_fname}, base_sym={base_sym}, is_elf={is_elf}') + return 0 + if base_addr is None: + base_addr = 0 if is_elf else base.address + count = 0 + for name, sym in syms.items(): + if name.startswith('_binman'): + msg = ("Section '%s': Symbol '%s'\n in entry '%s'" % + (section.GetPath(), name, entry.GetPath())) + if is_elf: + # For ELF files, use the file offset + offset = sym.offset + else: + # For blobs use the offset of the symbol, calculated by + # subtracting the base address which by definition is at the + # start + offset = sym.address - base.address + if offset < 0 or offset + sym.size > entry.contents_size: + raise ValueError('%s has offset %x (size %x) but the contents ' + 'size is %x' % (entry.GetPath(), offset, + sym.size, + entry.contents_size)) + pack_string = GetPackString(sym, msg) + if name == '_binman_sym_magic': + value = BINMAN_SYM_MAGIC_VALUE + else: + # Look up the symbol in our entry tables. + value = section.GetImage().GetImageSymbolValue(name, sym.weak, + msg, base_addr) + if value is None: + value = -1 + pack_string = pack_string.lower() + value_bytes = struct.pack(pack_string, value) + tout.debug('%s:\n insert %s, offset %x, value %x, length %d' % + (msg, name, offset, value, len(value_bytes))) + entry.data = (entry.data[:offset] + value_bytes + + entry.data[offset + sym.size:]) + count += 1 + if count: + tout.detail( + f"Section '{section.GetPath()}': entry '{entry.GetPath()}' : {count} symbols") + return count + +def GetSymbolValue(sym, data, msg): + """Get the value of a symbol + + This can only be used on symbols with an integer value. + + Args: + sym (Symbol): Symbol to check + data (butes): Data for the ELF file - the symbol data appears at offset + sym.offset + @msg (str): String which indicates the entry being processed, used for + errors + + Returns: + int: Value of the symbol + + Raises: + ValueError: Symbol has an unexpected size + """ + pack_string = GetPackString(sym, msg) + value = struct.unpack(pack_string, data[sym.offset:sym.offset + sym.size]) + return value[0] + +def MakeElf(elf_fname, text, data): + """Make an elf file with the given data in a single section + + The output file has a several section including '.text' and '.data', + containing the info provided in arguments. + + Args: + elf_fname: Output filename + text: Text (code) to put in the file's .text section + data: Data to put in the file's .data section + """ + outdir = tempfile.mkdtemp(prefix='binman.elf.') + s_file = os.path.join(outdir, 'elf.S') + + # Spilt the text into two parts so that we can make the entry point two + # bytes after the start of the text section + text_bytes1 = ['\t.byte\t%#x' % byte for byte in text[:2]] + text_bytes2 = ['\t.byte\t%#x' % byte for byte in text[2:]] + data_bytes = ['\t.byte\t%#x' % byte for byte in data] + with open(s_file, 'w') as fd: + print('''/* Auto-generated C program to produce an ELF file for testing */ + +.section .text +.code32 +.globl _start +.type _start, @function +%s +_start: +%s +.ident "comment" + +.comm fred,8,4 + +.section .empty +.globl _empty +_empty: +.byte 1 + +.globl ernie +.data +.type ernie, @object +.size ernie, 4 +ernie: +%s +''' % ('\n'.join(text_bytes1), '\n'.join(text_bytes2), '\n'.join(data_bytes)), + file=fd) + lds_file = os.path.join(outdir, 'elf.lds') + + # Use a linker script to set the alignment and text address. + with open(lds_file, 'w') as fd: + print('''/* Auto-generated linker script to produce an ELF file for testing */ + +PHDRS +{ + text PT_LOAD ; + data PT_LOAD ; + empty PT_LOAD FLAGS ( 6 ) ; + note PT_NOTE ; +} + +SECTIONS +{ + . = 0xfef20000; + ENTRY(_start) + .text . : SUBALIGN(0) + { + *(.text) + } :text + .data : { + *(.data) + } :data + _bss_start = .; + .empty : { + *(.empty) + } :empty + /DISCARD/ : { + *(.note.gnu.property) + } + .note : { + *(.comment) + } :note + .bss _bss_start (OVERLAY) : { + *(.bss) + } +} +''', file=fd) + # -static: Avoid requiring any shared libraries + # -nostdlib: Don't link with C library + # -Wl,--build-id=none: Don't generate a build ID, so that we just get the + # text section at the start + # -m32: Build for 32-bit x86 + # -T...: Specifies the link script, which sets the start address + cc, args = tools.get_target_compile_tool('cc') + args += ['-static', '-nostdlib', '-Wl,--build-id=none', '-m32', '-T', + lds_file, '-o', elf_fname, s_file] + stdout = command.output(cc, *args) + shutil.rmtree(outdir) + +def DecodeElf(data, location): + """Decode an ELF file and return information about it + + Args: + data: Data from ELF file + location: Start address of data to return + + Returns: + ElfInfo object containing information about the decoded ELF file + """ + if not ELF_TOOLS: + raise ValueError("Python: No module named 'elftools'") + file_size = len(data) + with io.BytesIO(data) as fd: + elf = ELFFile(fd) + data_start = 0xffffffff + data_end = 0 + mem_end = 0 + virt_to_phys = 0 + + for i in range(elf.num_segments()): + segment = elf.get_segment(i) + if segment['p_type'] != 'PT_LOAD' or not segment['p_memsz']: + skipped = 1 # To make code-coverage see this line + continue + start = segment['p_paddr'] + mend = start + segment['p_memsz'] + rend = start + segment['p_filesz'] + data_start = min(data_start, start) + data_end = max(data_end, rend) + mem_end = max(mem_end, mend) + if not virt_to_phys: + virt_to_phys = segment['p_paddr'] - segment['p_vaddr'] + + output = bytearray(data_end - data_start) + for i in range(elf.num_segments()): + segment = elf.get_segment(i) + if segment['p_type'] != 'PT_LOAD' or not segment['p_memsz']: + skipped = 1 # To make code-coverage see this line + continue + start = segment['p_paddr'] + offset = 0 + if start < location: + offset = location - start + start = location + # A legal ELF file can have a program header with non-zero length + # but zero-length file size and a non-zero offset which, added + # together, are greater than input->size (i.e. the total file size). + # So we need to not even test in the case that p_filesz is zero. + # Note: All of this code is commented out since we don't have a test + # case for it. + size = segment['p_filesz'] + #if not size: + #continue + #end = segment['p_offset'] + segment['p_filesz'] + #if end > file_size: + #raise ValueError('Underflow copying out the segment. File has %#x bytes left, segment end is %#x\n', + #file_size, end) + output[start - data_start:start - data_start + size] = ( + segment.data()[offset:]) + return ElfInfo(output, data_start, elf.header['e_entry'] + virt_to_phys, + mem_end - data_start) + +def UpdateFile(infile, outfile, start_sym, end_sym, insert): + tout.notice("Creating file '%s' with data length %#x (%d) between symbols '%s' and '%s'" % + (outfile, len(insert), len(insert), start_sym, end_sym)) + syms = GetSymbolFileOffset(infile, [start_sym, end_sym]) + if len(syms) != 2: + raise ValueError("Expected two symbols '%s' and '%s': got %d: %s" % + (start_sym, end_sym, len(syms), + ','.join(syms.keys()))) + + size = syms[end_sym].offset - syms[start_sym].offset + if len(insert) > size: + raise ValueError("Not enough space in '%s' for data length %#x (%d); size is %#x (%d)" % + (infile, len(insert), len(insert), size, size)) + + data = tools.read_file(infile) + newdata = data[:syms[start_sym].offset] + newdata += insert + tools.get_bytes(0, size - len(insert)) + newdata += data[syms[end_sym].offset:] + tools.write_file(outfile, newdata) + tout.info('Written to offset %#x' % syms[start_sym].offset) + +def read_loadable_segments(data): + """Read segments from an ELF file + + Args: + data (bytes): Contents of file + + Returns: + tuple: + list of segments, each: + int: Segment number (0 = first) + int: Start address of segment in memory + bytes: Contents of segment + int: entry address for image + + Raises: + ValueError: elftools is not available + """ + if not ELF_TOOLS: + raise ValueError("Python: No module named 'elftools'") + with io.BytesIO(data) as inf: + try: + elf = ELFFile(inf) + except ELFError as err: + raise ValueError(err) + entry = elf.header['e_entry'] + segments = [] + for i in range(elf.num_segments()): + segment = elf.get_segment(i) + if segment['p_type'] != 'PT_LOAD' or not segment['p_memsz']: + skipped = 1 # To make code-coverage see this line + continue + start = segment['p_offset'] + rend = start + segment['p_filesz'] + segments.append((i, segment['p_paddr'], data[start:rend])) + return segments, entry + +def is_valid(data): + """Check if some binary data is a valid ELF file + + Args: + data (bytes): Bytes to check + + Returns: + bool: True if a valid Elf file, False if not + """ + try: + DecodeElf(data, 0) + return True + except ELFError: + return False |