1 files changed, 577 insertions, 0 deletions
diff --git a/tools/binman/elf.py b/tools/binman/elf.py
new file mode 100644
index 00000000000..6ac960e0419
--- /dev/null
+++ b/tools/binman/elf.py
@@ -0,0 +1,577 @@
+# SPDX-License-Identifier: GPL-2.0+
+# Copyright (c) 2016 Google, Inc
+# Written by Simon Glass <sjg@chromium.org>
+#
+# Handle various things related to ELF images
+#
+
+from collections import namedtuple, OrderedDict
+import io
+import os
+import re
+import shutil
+import struct
+import tempfile
+
+from u_boot_pylib import command
+from u_boot_pylib import tools
+from u_boot_pylib import tout
+
+ELF_TOOLS = True
+try:
+    from elftools.elf.elffile import ELFFile
+    from elftools.elf.elffile import ELFError
+    from elftools.elf.sections import SymbolTableSection
+except:  # pragma: no cover
+    ELF_TOOLS = False
+
+# BSYM in little endian, keep in sync with include/binman_sym.h
+BINMAN_SYM_MAGIC_VALUE = 0x4d595342
+
+# Information about an ELF symbol:
+# section (str): Name of the section containing this symbol
+# address (int): Address of the symbol (its value)
+# size (int): Size of the symbol in bytes
+# weak (bool): True if the symbol is weak
+# offset (int or None): Offset of the symbol's data in the ELF file, or None if
+#   not known
+Symbol = namedtuple('Symbol', ['section', 'address', 'size', 'weak', 'offset'])
+
+# Information about an ELF file:
+#    data: Extracted program contents of ELF file (this would be loaded by an
+#           ELF loader when reading this file
+#    load: Load address of code
+#    entry: Entry address of code
+#    memsize: Number of bytes in memory occupied by loading this ELF file
+ElfInfo = namedtuple('ElfInfo', ['data', 'load', 'entry', 'memsize'])
+
+
+def GetSymbols(fname, patterns):
+    """Get the symbols from an ELF file
+
+    Args:
+        fname: Filename of the ELF file to read
+        patterns: List of regex patterns to search for, each a string
+
+    Returns:
+        None, if the file does not exist, or Dict:
+          key: Name of symbol
+          value: Hex value of symbol
+    """
+    stdout = tools.run('objdump', '-t', fname)
+    lines = stdout.splitlines()
+    if patterns:
+        re_syms = re.compile('|'.join(patterns))
+    else:
+        re_syms = None
+    syms = {}
+    syms_started = False
+    for line in lines:
+        if not line or not syms_started:
+            if 'SYMBOL TABLE' in line:
+                syms_started = True
+            line = None  # Otherwise code coverage complains about 'continue'
+            continue
+        if re_syms and not re_syms.search(line):
+            continue
+
+        space_pos = line.find(' ')
+        value, rest = line[:space_pos], line[space_pos + 1:]
+        flags = rest[:7]
+        parts = rest[7:].split()
+        section, size =  parts[:2]
+        if len(parts) > 2:
+            name = parts[2] if parts[2] != '.hidden' else parts[3]
+            syms[name] = Symbol(section, int(value, 16), int(size, 16),
+                                flags[1] == 'w', None)
+
+    # Sort dict by address
+    return OrderedDict(sorted(syms.items(), key=lambda x: x[1].address))
+
+def _GetFileOffset(elf, addr):
+    """Get the file offset for an address
+
+    Args:
+        elf (ELFFile): ELF file to check
+        addr (int): Address to search for
+
+    Returns
+        int: Offset of that address in the ELF file, or None if not valid
+    """
+    for seg in elf.iter_segments():
+        seg_end = seg['p_vaddr'] + seg['p_filesz']
+        if seg.header['p_type'] == 'PT_LOAD':
+            if addr >= seg['p_vaddr'] and addr < seg_end:
+                return addr - seg['p_vaddr'] + seg['p_offset']
+
+def GetFileOffset(fname, addr):
+    """Get the file offset for an address
+
+    Args:
+        fname (str): Filename of ELF file to check
+        addr (int): Address to search for
+
+    Returns
+        int: Offset of that address in the ELF file, or None if not valid
+    """
+    if not ELF_TOOLS:
+        raise ValueError("Python: No module named 'elftools'")
+    with open(fname, 'rb') as fd:
+        elf = ELFFile(fd)
+        return _GetFileOffset(elf, addr)
+
+def GetSymbolFromAddress(fname, addr):
+    """Get the symbol at a particular address
+
+    Args:
+        fname (str): Filename of ELF file to check
+        addr (int): Address to search for
+
+    Returns:
+        str: Symbol name, or None if no symbol at that address
+    """
+    if not ELF_TOOLS:
+        raise ValueError("Python: No module named 'elftools'")
+    with open(fname, 'rb') as fd:
+        elf = ELFFile(fd)
+        syms = GetSymbols(fname, None)
+    for name, sym in syms.items():
+        if sym.address == addr:
+            return name
+
+def GetSymbolFileOffset(fname, patterns):
+    """Get the symbols from an ELF file
+
+    Args:
+        fname: Filename of the ELF file to read
+        patterns: List of regex patterns to search for, each a string
+
+    Returns:
+        None, if the file does not exist, or Dict:
+          key: Name of symbol
+          value: Hex value of symbol
+    """
+    if not ELF_TOOLS:
+        raise ValueError("Python: No module named 'elftools'")
+
+    syms = {}
+    with open(fname, 'rb') as fd:
+        elf = ELFFile(fd)
+
+        re_syms = re.compile('|'.join(patterns))
+        for section in elf.iter_sections():
+            if isinstance(section, SymbolTableSection):
+                for symbol in section.iter_symbols():
+                    if not re_syms or re_syms.search(symbol.name):
+                        addr = symbol.entry['st_value']
+                        syms[symbol.name] = Symbol(
+                            section.name, addr, symbol.entry['st_size'],
+                            symbol.entry['st_info']['bind'] == 'STB_WEAK',
+                            _GetFileOffset(elf, addr))
+
+    # Sort dict by address
+    return OrderedDict(sorted(syms.items(), key=lambda x: x[1].address))
+
+def GetSymbolAddress(fname, sym_name):
+    """Get a value of a symbol from an ELF file
+
+    Args:
+        fname: Filename of the ELF file to read
+        patterns: List of regex patterns to search for, each a string
+
+    Returns:
+        Symbol value (as an integer) or None if not found
+    """
+    syms = GetSymbols(fname, [sym_name])
+    sym = syms.get(sym_name)
+    if not sym:
+        return None
+    return sym.address
+
+def GetPackString(sym, msg):
+    """Get the struct.pack/unpack string to use with a given symbol
+
+    Args:
+        sym (Symbol): Symbol to check. Only the size member is checked
+        @msg (str): String which indicates the entry being processed, used for
+            errors
+
+    Returns:
+        str: struct string to use, .e.g. '<I'
+
+    Raises:
+        ValueError: Symbol has an unexpected size
+    """
+    if sym.size == 4:
+        return '<I'
+    elif sym.size == 8:
+        return '<Q'
+    else:
+        raise ValueError('%s has size %d: only 4 and 8 are supported' %
+                         (msg, sym.size))
+
+def GetSymbolOffset(elf_fname, sym_name, base_sym=None):
+    """Read the offset of a symbol compared to base symbol
+
+    This is useful for obtaining the value of a single symbol relative to the
+    base of a binary blob.
+
+    Args:
+        elf_fname: Filename of the ELF file to read
+        sym_name (str): Name of symbol to read
+        base_sym (str): Base symbol to sue to calculate the offset (or None to
+            use '__image_copy_start'
+
+    Returns:
+        int: Offset of the symbol relative to the base symbol
+    """
+    if not base_sym:
+        base_sym = '__image_copy_start'
+    fname = tools.get_input_filename(elf_fname)
+    syms = GetSymbols(fname, [base_sym, sym_name])
+    base = syms[base_sym].address
+    val = syms[sym_name].address
+    return val - base
+
+def LookupAndWriteSymbols(elf_fname, entry, section, is_elf=False,
+                          base_sym=None, base_addr=None):
+    """Replace all symbols in an entry with their correct values
+
+    The entry contents is updated so that values for referenced symbols will be
+    visible at run time. This is done by finding out the symbols offsets in the
+    entry (using the ELF file) and replacing them with values from binman's data
+    structures.
+
+    Args:
+        elf_fname: Filename of ELF image containing the symbol information for
+            entry
+        entry: Entry to process
+        section: Section which can be used to lookup symbol values
+        base_sym: Base symbol marking the start of the image (__image_copy_start
+            by default)
+        base_addr (int): Base address to use for the entry being written. If
+            None then the value of base_sym is used
+
+    Returns:
+        int: Number of symbols written
+    """
+    if not base_sym:
+        base_sym = '__image_copy_start'
+    fname = tools.get_input_filename(elf_fname)
+    syms = GetSymbols(fname, ['image', 'binman'])
+    if is_elf:
+        if not ELF_TOOLS:
+            msg = ("Section '%s': entry '%s'" %
+                   (section.GetPath(), entry.GetPath()))
+            raise ValueError(f'{msg}: Cannot write symbols to an ELF file without Python elftools')
+        new_syms = {}
+        with open(fname, 'rb') as fd:
+            elf = ELFFile(fd)
+            for name, sym in syms.items():
+                offset = _GetFileOffset(elf, sym.address)
+                new_syms[name] = Symbol(sym.section, sym.address, sym.size,
+                                        sym.weak, offset)
+            syms = new_syms
+
+    if not syms:
+        tout.debug('LookupAndWriteSymbols: no syms')
+        return 0
+    base = syms.get(base_sym)
+    if not base and not is_elf:
+        tout.debug(f'LookupAndWriteSymbols: no base: elf_fname={elf_fname}, base_sym={base_sym}, is_elf={is_elf}')
+        return 0
+    if base_addr is None:
+        base_addr = 0 if is_elf else base.address
+    count = 0
+    for name, sym in syms.items():
+        if name.startswith('_binman'):
+            msg = ("Section '%s': Symbol '%s'\n   in entry '%s'" %
+                   (section.GetPath(), name, entry.GetPath()))
+            if is_elf:
+                # For ELF files, use the file offset
+                offset = sym.offset
+            else:
+                # For blobs use the offset of the symbol, calculated by
+                # subtracting the base address which by definition is at the
+                # start
+                offset = sym.address - base.address
+                if offset < 0 or offset + sym.size > entry.contents_size:
+                    raise ValueError('%s has offset %x (size %x) but the contents '
+                                     'size is %x' % (entry.GetPath(), offset,
+                                                     sym.size,
+                                                     entry.contents_size))
+            pack_string = GetPackString(sym, msg)
+            if name == '_binman_sym_magic':
+                value = BINMAN_SYM_MAGIC_VALUE
+            else:
+                # Look up the symbol in our entry tables.
+                value = section.GetImage().GetImageSymbolValue(name, sym.weak,
+                                                               msg, base_addr)
+            if value is None:
+                value = -1
+                pack_string = pack_string.lower()
+            value_bytes = struct.pack(pack_string, value)
+            tout.debug('%s:\n   insert %s, offset %x, value %x, length %d' %
+                       (msg, name, offset, value, len(value_bytes)))
+            entry.data = (entry.data[:offset] + value_bytes +
+                        entry.data[offset + sym.size:])
+            count += 1
+    if count:
+        tout.detail(
+            f"Section '{section.GetPath()}': entry '{entry.GetPath()}' : {count} symbols")
+    return count
+
+def GetSymbolValue(sym, data, msg):
+    """Get the value of a symbol
+
+    This can only be used on symbols with an integer value.
+
+    Args:
+        sym (Symbol): Symbol to check
+        data (butes): Data for the ELF file - the symbol data appears at offset
+            sym.offset
+        @msg (str): String which indicates the entry being processed, used for
+            errors
+
+    Returns:
+        int: Value of the symbol
+
+    Raises:
+        ValueError: Symbol has an unexpected size
+    """
+    pack_string = GetPackString(sym, msg)
+    value = struct.unpack(pack_string, data[sym.offset:sym.offset + sym.size])
+    return value[0]
+
+def MakeElf(elf_fname, text, data):
+    """Make an elf file with the given data in a single section
+
+    The output file has a several section including '.text' and '.data',
+    containing the info provided in arguments.
+
+    Args:
+        elf_fname: Output filename
+        text: Text (code) to put in the file's .text section
+        data: Data to put in the file's .data section
+    """
+    outdir = tempfile.mkdtemp(prefix='binman.elf.')
+    s_file = os.path.join(outdir, 'elf.S')
+
+    # Spilt the text into two parts so that we can make the entry point two
+    # bytes after the start of the text section
+    text_bytes1 = ['\t.byte\t%#x' % byte for byte in text[:2]]
+    text_bytes2 = ['\t.byte\t%#x' % byte for byte in text[2:]]
+    data_bytes = ['\t.byte\t%#x' % byte for byte in data]
+    with open(s_file, 'w') as fd:
+        print('''/* Auto-generated C program to produce an ELF file for testing */
+
+.section .text
+.code32
+.globl _start
+.type _start, @function
+%s
+_start:
+%s
+.ident "comment"
+
+.comm fred,8,4
+
+.section .empty
+.globl _empty
+_empty:
+.byte 1
+
+.globl ernie
+.data
+.type ernie, @object
+.size ernie, 4
+ernie:
+%s
+''' % ('\n'.join(text_bytes1), '\n'.join(text_bytes2), '\n'.join(data_bytes)),
+        file=fd)
+    lds_file = os.path.join(outdir, 'elf.lds')
+
+    # Use a linker script to set the alignment and text address.
+    with open(lds_file, 'w') as fd:
+        print('''/* Auto-generated linker script to produce an ELF file for testing */
+
+PHDRS
+{
+    text PT_LOAD ;
+    data PT_LOAD ;
+    empty PT_LOAD FLAGS ( 6 ) ;
+    note PT_NOTE ;
+}
+
+SECTIONS
+{
+    . = 0xfef20000;
+    ENTRY(_start)
+    .text . : SUBALIGN(0)
+    {
+        *(.text)
+    } :text
+    .data : {
+        *(.data)
+    } :data
+    _bss_start = .;
+    .empty : {
+        *(.empty)
+    } :empty
+    /DISCARD/ : {
+        *(.note.gnu.property)
+    }
+    .note : {
+        *(.comment)
+    } :note
+    .bss _bss_start  (OVERLAY) : {
+        *(.bss)
+    }
+}
+''', file=fd)
+    # -static: Avoid requiring any shared libraries
+    # -nostdlib: Don't link with C library
+    # -Wl,--build-id=none: Don't generate a build ID, so that we just get the
+    #   text section at the start
+    # -m32: Build for 32-bit x86
+    # -T...: Specifies the link script, which sets the start address
+    cc, args = tools.get_target_compile_tool('cc')
+    args += ['-static', '-nostdlib', '-Wl,--build-id=none', '-m32', '-T',
+            lds_file, '-o', elf_fname, s_file]
+    stdout = command.output(cc, *args)
+    shutil.rmtree(outdir)
+
+def DecodeElf(data, location):
+    """Decode an ELF file and return information about it
+
+    Args:
+        data: Data from ELF file
+        location: Start address of data to return
+
+    Returns:
+        ElfInfo object containing information about the decoded ELF file
+    """
+    if not ELF_TOOLS:
+        raise ValueError("Python: No module named 'elftools'")
+    file_size = len(data)
+    with io.BytesIO(data) as fd:
+        elf = ELFFile(fd)
+        data_start = 0xffffffff
+        data_end = 0
+        mem_end = 0
+        virt_to_phys = 0
+
+        for i in range(elf.num_segments()):
+            segment = elf.get_segment(i)
+            if segment['p_type'] != 'PT_LOAD' or not segment['p_memsz']:
+                skipped = 1  # To make code-coverage see this line
+                continue
+            start = segment['p_paddr']
+            mend = start + segment['p_memsz']
+            rend = start + segment['p_filesz']
+            data_start = min(data_start, start)
+            data_end = max(data_end, rend)
+            mem_end = max(mem_end, mend)
+            if not virt_to_phys:
+                virt_to_phys = segment['p_paddr'] - segment['p_vaddr']
+
+        output = bytearray(data_end - data_start)
+        for i in range(elf.num_segments()):
+            segment = elf.get_segment(i)
+            if segment['p_type'] != 'PT_LOAD' or not segment['p_memsz']:
+                skipped = 1  # To make code-coverage see this line
+                continue
+            start = segment['p_paddr']
+            offset = 0
+            if start < location:
+                offset = location - start
+                start = location
+            # A legal ELF file can have a program header with non-zero length
+            # but zero-length file size and a non-zero offset which, added
+            # together, are greater than input->size (i.e. the total file size).
+            #  So we need to not even test in the case that p_filesz is zero.
+            # Note: All of this code is commented out since we don't have a test
+            # case for it.
+            size = segment['p_filesz']
+            #if not size:
+                #continue
+            #end = segment['p_offset'] + segment['p_filesz']
+            #if end > file_size:
+                #raise ValueError('Underflow copying out the segment. File has %#x bytes left, segment end is %#x\n',
+                                 #file_size, end)
+            output[start - data_start:start - data_start + size] = (
+                segment.data()[offset:])
+    return ElfInfo(output, data_start, elf.header['e_entry'] + virt_to_phys,
+                   mem_end - data_start)
+
+def UpdateFile(infile, outfile, start_sym, end_sym, insert):
+    tout.notice("Creating file '%s' with data length %#x (%d) between symbols '%s' and '%s'" %
+                (outfile, len(insert), len(insert), start_sym, end_sym))
+    syms = GetSymbolFileOffset(infile, [start_sym, end_sym])
+    if len(syms) != 2:
+        raise ValueError("Expected two symbols '%s' and '%s': got %d: %s" %
+                         (start_sym, end_sym, len(syms),
+                          ','.join(syms.keys())))
+
+    size = syms[end_sym].offset - syms[start_sym].offset
+    if len(insert) > size:
+        raise ValueError("Not enough space in '%s' for data length %#x (%d); size is %#x (%d)" %
+                         (infile, len(insert), len(insert), size, size))
+
+    data = tools.read_file(infile)
+    newdata = data[:syms[start_sym].offset]
+    newdata += insert + tools.get_bytes(0, size - len(insert))
+    newdata += data[syms[end_sym].offset:]
+    tools.write_file(outfile, newdata)
+    tout.info('Written to offset %#x' % syms[start_sym].offset)
+
+def read_loadable_segments(data):
+    """Read segments from an ELF file
+
+    Args:
+        data (bytes): Contents of file
+
+    Returns:
+        tuple:
+            list of segments, each:
+                int: Segment number (0 = first)
+                int: Start address of segment in memory
+                bytes: Contents of segment
+            int: entry address for image
+
+    Raises:
+        ValueError: elftools is not available
+    """
+    if not ELF_TOOLS:
+        raise ValueError("Python: No module named 'elftools'")
+    with io.BytesIO(data) as inf:
+        try:
+            elf = ELFFile(inf)
+        except ELFError as err:
+            raise ValueError(err)
+        entry = elf.header['e_entry']
+        segments = []
+        for i in range(elf.num_segments()):
+            segment = elf.get_segment(i)
+            if segment['p_type'] != 'PT_LOAD' or not segment['p_memsz']:
+                skipped = 1  # To make code-coverage see this line
+                continue
+            start = segment['p_offset']
+            rend = start + segment['p_filesz']
+            segments.append((i, segment['p_paddr'], data[start:rend]))
+    return segments, entry
+
+def is_valid(data):
+    """Check if some binary data is a valid ELF file
+
+    Args:
+        data (bytes): Bytes to check
+
+    Returns:
+        bool: True if a valid Elf file, False if not
+    """
+    try:
+        DecodeElf(data, 0)
+        return True
+    except ELFError:
+        return False