summaryrefslogtreecommitdiff
path: root/tools/binman/elf.py
diff options
context:
space:
mode:
Diffstat (limited to 'tools/binman/elf.py')
-rw-r--r--tools/binman/elf.py577
1 files changed, 577 insertions, 0 deletions
diff --git a/tools/binman/elf.py b/tools/binman/elf.py
new file mode 100644
index 00000000000..6ac960e0419
--- /dev/null
+++ b/tools/binman/elf.py
@@ -0,0 +1,577 @@
+# SPDX-License-Identifier: GPL-2.0+
+# Copyright (c) 2016 Google, Inc
+# Written by Simon Glass <sjg@chromium.org>
+#
+# Handle various things related to ELF images
+#
+
+from collections import namedtuple, OrderedDict
+import io
+import os
+import re
+import shutil
+import struct
+import tempfile
+
+from u_boot_pylib import command
+from u_boot_pylib import tools
+from u_boot_pylib import tout
+
+ELF_TOOLS = True
+try:
+ from elftools.elf.elffile import ELFFile
+ from elftools.elf.elffile import ELFError
+ from elftools.elf.sections import SymbolTableSection
+except: # pragma: no cover
+ ELF_TOOLS = False
+
+# BSYM in little endian, keep in sync with include/binman_sym.h
+BINMAN_SYM_MAGIC_VALUE = 0x4d595342
+
+# Information about an ELF symbol:
+# section (str): Name of the section containing this symbol
+# address (int): Address of the symbol (its value)
+# size (int): Size of the symbol in bytes
+# weak (bool): True if the symbol is weak
+# offset (int or None): Offset of the symbol's data in the ELF file, or None if
+# not known
+Symbol = namedtuple('Symbol', ['section', 'address', 'size', 'weak', 'offset'])
+
+# Information about an ELF file:
+# data: Extracted program contents of ELF file (this would be loaded by an
+# ELF loader when reading this file
+# load: Load address of code
+# entry: Entry address of code
+# memsize: Number of bytes in memory occupied by loading this ELF file
+ElfInfo = namedtuple('ElfInfo', ['data', 'load', 'entry', 'memsize'])
+
+
+def GetSymbols(fname, patterns):
+ """Get the symbols from an ELF file
+
+ Args:
+ fname: Filename of the ELF file to read
+ patterns: List of regex patterns to search for, each a string
+
+ Returns:
+ None, if the file does not exist, or Dict:
+ key: Name of symbol
+ value: Hex value of symbol
+ """
+ stdout = tools.run('objdump', '-t', fname)
+ lines = stdout.splitlines()
+ if patterns:
+ re_syms = re.compile('|'.join(patterns))
+ else:
+ re_syms = None
+ syms = {}
+ syms_started = False
+ for line in lines:
+ if not line or not syms_started:
+ if 'SYMBOL TABLE' in line:
+ syms_started = True
+ line = None # Otherwise code coverage complains about 'continue'
+ continue
+ if re_syms and not re_syms.search(line):
+ continue
+
+ space_pos = line.find(' ')
+ value, rest = line[:space_pos], line[space_pos + 1:]
+ flags = rest[:7]
+ parts = rest[7:].split()
+ section, size = parts[:2]
+ if len(parts) > 2:
+ name = parts[2] if parts[2] != '.hidden' else parts[3]
+ syms[name] = Symbol(section, int(value, 16), int(size, 16),
+ flags[1] == 'w', None)
+
+ # Sort dict by address
+ return OrderedDict(sorted(syms.items(), key=lambda x: x[1].address))
+
+def _GetFileOffset(elf, addr):
+ """Get the file offset for an address
+
+ Args:
+ elf (ELFFile): ELF file to check
+ addr (int): Address to search for
+
+ Returns
+ int: Offset of that address in the ELF file, or None if not valid
+ """
+ for seg in elf.iter_segments():
+ seg_end = seg['p_vaddr'] + seg['p_filesz']
+ if seg.header['p_type'] == 'PT_LOAD':
+ if addr >= seg['p_vaddr'] and addr < seg_end:
+ return addr - seg['p_vaddr'] + seg['p_offset']
+
+def GetFileOffset(fname, addr):
+ """Get the file offset for an address
+
+ Args:
+ fname (str): Filename of ELF file to check
+ addr (int): Address to search for
+
+ Returns
+ int: Offset of that address in the ELF file, or None if not valid
+ """
+ if not ELF_TOOLS:
+ raise ValueError("Python: No module named 'elftools'")
+ with open(fname, 'rb') as fd:
+ elf = ELFFile(fd)
+ return _GetFileOffset(elf, addr)
+
+def GetSymbolFromAddress(fname, addr):
+ """Get the symbol at a particular address
+
+ Args:
+ fname (str): Filename of ELF file to check
+ addr (int): Address to search for
+
+ Returns:
+ str: Symbol name, or None if no symbol at that address
+ """
+ if not ELF_TOOLS:
+ raise ValueError("Python: No module named 'elftools'")
+ with open(fname, 'rb') as fd:
+ elf = ELFFile(fd)
+ syms = GetSymbols(fname, None)
+ for name, sym in syms.items():
+ if sym.address == addr:
+ return name
+
+def GetSymbolFileOffset(fname, patterns):
+ """Get the symbols from an ELF file
+
+ Args:
+ fname: Filename of the ELF file to read
+ patterns: List of regex patterns to search for, each a string
+
+ Returns:
+ None, if the file does not exist, or Dict:
+ key: Name of symbol
+ value: Hex value of symbol
+ """
+ if not ELF_TOOLS:
+ raise ValueError("Python: No module named 'elftools'")
+
+ syms = {}
+ with open(fname, 'rb') as fd:
+ elf = ELFFile(fd)
+
+ re_syms = re.compile('|'.join(patterns))
+ for section in elf.iter_sections():
+ if isinstance(section, SymbolTableSection):
+ for symbol in section.iter_symbols():
+ if not re_syms or re_syms.search(symbol.name):
+ addr = symbol.entry['st_value']
+ syms[symbol.name] = Symbol(
+ section.name, addr, symbol.entry['st_size'],
+ symbol.entry['st_info']['bind'] == 'STB_WEAK',
+ _GetFileOffset(elf, addr))
+
+ # Sort dict by address
+ return OrderedDict(sorted(syms.items(), key=lambda x: x[1].address))
+
+def GetSymbolAddress(fname, sym_name):
+ """Get a value of a symbol from an ELF file
+
+ Args:
+ fname: Filename of the ELF file to read
+ patterns: List of regex patterns to search for, each a string
+
+ Returns:
+ Symbol value (as an integer) or None if not found
+ """
+ syms = GetSymbols(fname, [sym_name])
+ sym = syms.get(sym_name)
+ if not sym:
+ return None
+ return sym.address
+
+def GetPackString(sym, msg):
+ """Get the struct.pack/unpack string to use with a given symbol
+
+ Args:
+ sym (Symbol): Symbol to check. Only the size member is checked
+ @msg (str): String which indicates the entry being processed, used for
+ errors
+
+ Returns:
+ str: struct string to use, .e.g. '<I'
+
+ Raises:
+ ValueError: Symbol has an unexpected size
+ """
+ if sym.size == 4:
+ return '<I'
+ elif sym.size == 8:
+ return '<Q'
+ else:
+ raise ValueError('%s has size %d: only 4 and 8 are supported' %
+ (msg, sym.size))
+
+def GetSymbolOffset(elf_fname, sym_name, base_sym=None):
+ """Read the offset of a symbol compared to base symbol
+
+ This is useful for obtaining the value of a single symbol relative to the
+ base of a binary blob.
+
+ Args:
+ elf_fname: Filename of the ELF file to read
+ sym_name (str): Name of symbol to read
+ base_sym (str): Base symbol to sue to calculate the offset (or None to
+ use '__image_copy_start'
+
+ Returns:
+ int: Offset of the symbol relative to the base symbol
+ """
+ if not base_sym:
+ base_sym = '__image_copy_start'
+ fname = tools.get_input_filename(elf_fname)
+ syms = GetSymbols(fname, [base_sym, sym_name])
+ base = syms[base_sym].address
+ val = syms[sym_name].address
+ return val - base
+
+def LookupAndWriteSymbols(elf_fname, entry, section, is_elf=False,
+ base_sym=None, base_addr=None):
+ """Replace all symbols in an entry with their correct values
+
+ The entry contents is updated so that values for referenced symbols will be
+ visible at run time. This is done by finding out the symbols offsets in the
+ entry (using the ELF file) and replacing them with values from binman's data
+ structures.
+
+ Args:
+ elf_fname: Filename of ELF image containing the symbol information for
+ entry
+ entry: Entry to process
+ section: Section which can be used to lookup symbol values
+ base_sym: Base symbol marking the start of the image (__image_copy_start
+ by default)
+ base_addr (int): Base address to use for the entry being written. If
+ None then the value of base_sym is used
+
+ Returns:
+ int: Number of symbols written
+ """
+ if not base_sym:
+ base_sym = '__image_copy_start'
+ fname = tools.get_input_filename(elf_fname)
+ syms = GetSymbols(fname, ['image', 'binman'])
+ if is_elf:
+ if not ELF_TOOLS:
+ msg = ("Section '%s': entry '%s'" %
+ (section.GetPath(), entry.GetPath()))
+ raise ValueError(f'{msg}: Cannot write symbols to an ELF file without Python elftools')
+ new_syms = {}
+ with open(fname, 'rb') as fd:
+ elf = ELFFile(fd)
+ for name, sym in syms.items():
+ offset = _GetFileOffset(elf, sym.address)
+ new_syms[name] = Symbol(sym.section, sym.address, sym.size,
+ sym.weak, offset)
+ syms = new_syms
+
+ if not syms:
+ tout.debug('LookupAndWriteSymbols: no syms')
+ return 0
+ base = syms.get(base_sym)
+ if not base and not is_elf:
+ tout.debug(f'LookupAndWriteSymbols: no base: elf_fname={elf_fname}, base_sym={base_sym}, is_elf={is_elf}')
+ return 0
+ if base_addr is None:
+ base_addr = 0 if is_elf else base.address
+ count = 0
+ for name, sym in syms.items():
+ if name.startswith('_binman'):
+ msg = ("Section '%s': Symbol '%s'\n in entry '%s'" %
+ (section.GetPath(), name, entry.GetPath()))
+ if is_elf:
+ # For ELF files, use the file offset
+ offset = sym.offset
+ else:
+ # For blobs use the offset of the symbol, calculated by
+ # subtracting the base address which by definition is at the
+ # start
+ offset = sym.address - base.address
+ if offset < 0 or offset + sym.size > entry.contents_size:
+ raise ValueError('%s has offset %x (size %x) but the contents '
+ 'size is %x' % (entry.GetPath(), offset,
+ sym.size,
+ entry.contents_size))
+ pack_string = GetPackString(sym, msg)
+ if name == '_binman_sym_magic':
+ value = BINMAN_SYM_MAGIC_VALUE
+ else:
+ # Look up the symbol in our entry tables.
+ value = section.GetImage().GetImageSymbolValue(name, sym.weak,
+ msg, base_addr)
+ if value is None:
+ value = -1
+ pack_string = pack_string.lower()
+ value_bytes = struct.pack(pack_string, value)
+ tout.debug('%s:\n insert %s, offset %x, value %x, length %d' %
+ (msg, name, offset, value, len(value_bytes)))
+ entry.data = (entry.data[:offset] + value_bytes +
+ entry.data[offset + sym.size:])
+ count += 1
+ if count:
+ tout.detail(
+ f"Section '{section.GetPath()}': entry '{entry.GetPath()}' : {count} symbols")
+ return count
+
+def GetSymbolValue(sym, data, msg):
+ """Get the value of a symbol
+
+ This can only be used on symbols with an integer value.
+
+ Args:
+ sym (Symbol): Symbol to check
+ data (butes): Data for the ELF file - the symbol data appears at offset
+ sym.offset
+ @msg (str): String which indicates the entry being processed, used for
+ errors
+
+ Returns:
+ int: Value of the symbol
+
+ Raises:
+ ValueError: Symbol has an unexpected size
+ """
+ pack_string = GetPackString(sym, msg)
+ value = struct.unpack(pack_string, data[sym.offset:sym.offset + sym.size])
+ return value[0]
+
+def MakeElf(elf_fname, text, data):
+ """Make an elf file with the given data in a single section
+
+ The output file has a several section including '.text' and '.data',
+ containing the info provided in arguments.
+
+ Args:
+ elf_fname: Output filename
+ text: Text (code) to put in the file's .text section
+ data: Data to put in the file's .data section
+ """
+ outdir = tempfile.mkdtemp(prefix='binman.elf.')
+ s_file = os.path.join(outdir, 'elf.S')
+
+ # Spilt the text into two parts so that we can make the entry point two
+ # bytes after the start of the text section
+ text_bytes1 = ['\t.byte\t%#x' % byte for byte in text[:2]]
+ text_bytes2 = ['\t.byte\t%#x' % byte for byte in text[2:]]
+ data_bytes = ['\t.byte\t%#x' % byte for byte in data]
+ with open(s_file, 'w') as fd:
+ print('''/* Auto-generated C program to produce an ELF file for testing */
+
+.section .text
+.code32
+.globl _start
+.type _start, @function
+%s
+_start:
+%s
+.ident "comment"
+
+.comm fred,8,4
+
+.section .empty
+.globl _empty
+_empty:
+.byte 1
+
+.globl ernie
+.data
+.type ernie, @object
+.size ernie, 4
+ernie:
+%s
+''' % ('\n'.join(text_bytes1), '\n'.join(text_bytes2), '\n'.join(data_bytes)),
+ file=fd)
+ lds_file = os.path.join(outdir, 'elf.lds')
+
+ # Use a linker script to set the alignment and text address.
+ with open(lds_file, 'w') as fd:
+ print('''/* Auto-generated linker script to produce an ELF file for testing */
+
+PHDRS
+{
+ text PT_LOAD ;
+ data PT_LOAD ;
+ empty PT_LOAD FLAGS ( 6 ) ;
+ note PT_NOTE ;
+}
+
+SECTIONS
+{
+ . = 0xfef20000;
+ ENTRY(_start)
+ .text . : SUBALIGN(0)
+ {
+ *(.text)
+ } :text
+ .data : {
+ *(.data)
+ } :data
+ _bss_start = .;
+ .empty : {
+ *(.empty)
+ } :empty
+ /DISCARD/ : {
+ *(.note.gnu.property)
+ }
+ .note : {
+ *(.comment)
+ } :note
+ .bss _bss_start (OVERLAY) : {
+ *(.bss)
+ }
+}
+''', file=fd)
+ # -static: Avoid requiring any shared libraries
+ # -nostdlib: Don't link with C library
+ # -Wl,--build-id=none: Don't generate a build ID, so that we just get the
+ # text section at the start
+ # -m32: Build for 32-bit x86
+ # -T...: Specifies the link script, which sets the start address
+ cc, args = tools.get_target_compile_tool('cc')
+ args += ['-static', '-nostdlib', '-Wl,--build-id=none', '-m32', '-T',
+ lds_file, '-o', elf_fname, s_file]
+ stdout = command.output(cc, *args)
+ shutil.rmtree(outdir)
+
+def DecodeElf(data, location):
+ """Decode an ELF file and return information about it
+
+ Args:
+ data: Data from ELF file
+ location: Start address of data to return
+
+ Returns:
+ ElfInfo object containing information about the decoded ELF file
+ """
+ if not ELF_TOOLS:
+ raise ValueError("Python: No module named 'elftools'")
+ file_size = len(data)
+ with io.BytesIO(data) as fd:
+ elf = ELFFile(fd)
+ data_start = 0xffffffff
+ data_end = 0
+ mem_end = 0
+ virt_to_phys = 0
+
+ for i in range(elf.num_segments()):
+ segment = elf.get_segment(i)
+ if segment['p_type'] != 'PT_LOAD' or not segment['p_memsz']:
+ skipped = 1 # To make code-coverage see this line
+ continue
+ start = segment['p_paddr']
+ mend = start + segment['p_memsz']
+ rend = start + segment['p_filesz']
+ data_start = min(data_start, start)
+ data_end = max(data_end, rend)
+ mem_end = max(mem_end, mend)
+ if not virt_to_phys:
+ virt_to_phys = segment['p_paddr'] - segment['p_vaddr']
+
+ output = bytearray(data_end - data_start)
+ for i in range(elf.num_segments()):
+ segment = elf.get_segment(i)
+ if segment['p_type'] != 'PT_LOAD' or not segment['p_memsz']:
+ skipped = 1 # To make code-coverage see this line
+ continue
+ start = segment['p_paddr']
+ offset = 0
+ if start < location:
+ offset = location - start
+ start = location
+ # A legal ELF file can have a program header with non-zero length
+ # but zero-length file size and a non-zero offset which, added
+ # together, are greater than input->size (i.e. the total file size).
+ # So we need to not even test in the case that p_filesz is zero.
+ # Note: All of this code is commented out since we don't have a test
+ # case for it.
+ size = segment['p_filesz']
+ #if not size:
+ #continue
+ #end = segment['p_offset'] + segment['p_filesz']
+ #if end > file_size:
+ #raise ValueError('Underflow copying out the segment. File has %#x bytes left, segment end is %#x\n',
+ #file_size, end)
+ output[start - data_start:start - data_start + size] = (
+ segment.data()[offset:])
+ return ElfInfo(output, data_start, elf.header['e_entry'] + virt_to_phys,
+ mem_end - data_start)
+
+def UpdateFile(infile, outfile, start_sym, end_sym, insert):
+ tout.notice("Creating file '%s' with data length %#x (%d) between symbols '%s' and '%s'" %
+ (outfile, len(insert), len(insert), start_sym, end_sym))
+ syms = GetSymbolFileOffset(infile, [start_sym, end_sym])
+ if len(syms) != 2:
+ raise ValueError("Expected two symbols '%s' and '%s': got %d: %s" %
+ (start_sym, end_sym, len(syms),
+ ','.join(syms.keys())))
+
+ size = syms[end_sym].offset - syms[start_sym].offset
+ if len(insert) > size:
+ raise ValueError("Not enough space in '%s' for data length %#x (%d); size is %#x (%d)" %
+ (infile, len(insert), len(insert), size, size))
+
+ data = tools.read_file(infile)
+ newdata = data[:syms[start_sym].offset]
+ newdata += insert + tools.get_bytes(0, size - len(insert))
+ newdata += data[syms[end_sym].offset:]
+ tools.write_file(outfile, newdata)
+ tout.info('Written to offset %#x' % syms[start_sym].offset)
+
+def read_loadable_segments(data):
+ """Read segments from an ELF file
+
+ Args:
+ data (bytes): Contents of file
+
+ Returns:
+ tuple:
+ list of segments, each:
+ int: Segment number (0 = first)
+ int: Start address of segment in memory
+ bytes: Contents of segment
+ int: entry address for image
+
+ Raises:
+ ValueError: elftools is not available
+ """
+ if not ELF_TOOLS:
+ raise ValueError("Python: No module named 'elftools'")
+ with io.BytesIO(data) as inf:
+ try:
+ elf = ELFFile(inf)
+ except ELFError as err:
+ raise ValueError(err)
+ entry = elf.header['e_entry']
+ segments = []
+ for i in range(elf.num_segments()):
+ segment = elf.get_segment(i)
+ if segment['p_type'] != 'PT_LOAD' or not segment['p_memsz']:
+ skipped = 1 # To make code-coverage see this line
+ continue
+ start = segment['p_offset']
+ rend = start + segment['p_filesz']
+ segments.append((i, segment['p_paddr'], data[start:rend]))
+ return segments, entry
+
+def is_valid(data):
+ """Check if some binary data is a valid ELF file
+
+ Args:
+ data (bytes): Bytes to check
+
+ Returns:
+ bool: True if a valid Elf file, False if not
+ """
+ try:
+ DecodeElf(data, 0)
+ return True
+ except ELFError:
+ return False