#!/usr/bin/env python3 # SPDX-License-Identifier: GPL-2.0 # # In Samsung R&D Institute Ukraine, LLC (SRUKR) under a contract between # Samsung R&D Institute Ukraine, LLC (Kyiv, Ukraine) # and "Samsung Electronics Co", Ltd (Seoul, Republic of Korea) # Copyright: (c) Samsung Electronics Co, Ltd 2024. All rights reserved. # # -*- coding: utf-8 -*- """ Module ELF contains ELF, Symbol, Section classes for manipulation over ELF files. It can parse, and change ELF file. This version works only with vmlinux and doesn't properly work with ELF that contains UND symbols """ import subprocess import re import os import struct from collections import OrderedDict from binascii import unhexlify from Utils import Utils from math import ceil __author__ = "Vadym Stupakov" __copyright__ = "Copyright (c) 2017 Samsung Electronics" __credits__ = ["Vadym Stupakov"] __version__ = "1.0" __maintainer__ = "Vadym Stupakov" __email__ = "v.stupakov@samsung.com" __status__ = "Production" DEFAULT_NAME_JUMP_TABLE_START_SYM = "__start___jump_table" DEFAULT_NAME_JUMP_TABLE_END_SYM = "__stop___jump_table" DEFAULT_ARM_INST_WIDTH = 4 class Sec_Jumptable_Data: target_sec_idx = -1 target_offset = -1 code = -1 key = -1 class Symbol: def __init__(self, name=str(), sym_type=str(), bind=str(), visibility=str(), addr=int(), size=int(), ndx=str()): self.utils = Utils() self.name = str(name) self.type = str(sym_type) self.bind = str(bind) self.ndx = str(ndx) self.visibility = str(visibility) self.addr = self.utils.to_int(addr) self.size = self.utils.to_int(size) def __str__(self): return "name: '{}', type: '{}', bind: '{}', ndx: '{}', visibility: '{}', address: '{}', size: '{}'".format( self.name, self.type, self.bind, self.ndx, self.visibility, hex(self.addr), hex(self.size) ) class Section: def __init__(self, name=str(), sec_type=str(), addr=int(), offset=int(), size=int()): self.utils = Utils() self.name = str(name) self.type = str(sec_type) self.addr = self.utils.to_int(addr) self.offset = self.utils.to_int(offset) self.size = self.utils.to_int(size) def __str__(self): return "name: '{}', type: '{}', address: '{}', offset: '{}', size: '{}'".format( self.name, self.type, hex(self.addr), hex(self.offset), hex(self.size) ) class ELF: """ Utils for manipulating over ELF """ def __init__(self, elf_file): self.__elf_file = elf_file self.utils = Utils() self.__sections = OrderedDict() self.__symbols = OrderedDict() self.__symbols_list_text = None self.__symbols_list_rodata = None self.__symbols_list_init_data = None self.__relocs_text = None self.__relocs_rodata = None self.__re_hexadecimal = r"\s*[0-9A-Fa-f]+\s*" self.__re_sec_name = r"\s*[._a-zA-Z]+\s*" self.__re_type = r"\s*[A-Z]+\s*" self.__altinstr_text = None self.__altinstr_rodata = None """ To derive info from the kernel`s ELF and object files, it is necessary to use nm and readelf. The both should be from cross-toolchain which is used at the kernel build. So, let's take a correct tools from NM and READELF environment variables, the variables are set by Makefile from the kernel source root directory. """ self.__readelf_tool = os.environ["READELF"] self.__obj_parser_tool = os.environ["NM"] self.jumptable_struct_format = " parse -> transform to dict() readelf output :returns dict: {sec_addr : Section()} """ if len(self.__sections) == 0: sec_header = self.get_raw_by_tool(self.__readelf_tool, ["-SW", self.__elf_file]).strip() secs = re.compile(r"^.*\[.*\](" + self.__re_sec_name + self.__re_type + self.__re_hexadecimal + self.__re_hexadecimal + self.__re_hexadecimal + ")", re.MULTILINE) found = secs.findall(sec_header) for line in found: line = line.split() if len(line) == 5: self.__sections[int(line[2], 16)] = Section(name=line[0], sec_type=line[1], addr=int(line[2], 16), offset=int(line[3], 16), size=int(line[4], 16)) self.__sections = OrderedDict(sorted(self.__sections.items())) return self.__sections def find_str_in_text(self, req_str, text): for line in text.splitlines(): if req_str in line: return True return False def get_rodata_text_scope(self): raw_sections = self.get_raw_by_tool(self.__readelf_tool, ["-SW", self.__elf_file]).strip() section_rodata = list() section_text = list() for line in raw_sections.splitlines(): line_list = list(line.split()) i = 0 len_list = len(line_list) while i < len_list: if "." not in line_list[i]: del line_list[i] len_list = len(line_list) else: break if len(line_list) >= 6: if line_list[0].strip().startswith(".rodata"): if int(line_list[4].strip(), 16) != 0: section_rodata.append([line_list[2].strip(), line_list[4].strip()]) elif line_list[0].strip().startswith(".text") or line_list[0].strip() == ".init.text": section_text.append([line_list[2].strip(), line_list[4].strip()]) return section_text, section_rodata def get_list_symbols_from_file(self, path_to_files, file_name): """ Extract from object file the symbols from section .text and all .data sections :param path_to_files: path to object files :param file_name: name of parsing object file :return: symbols_text, symbols_rodata """ not_allowed_syms = ["__UNIQUE_ID_", "__kstrtab_", "__ksym_marker_", "__ksymtab_", "__exitcall_", "__initcall_", "$x", "$d"] abs_path_file = os.path.abspath(os.path.join(path_to_files, file_name)) raw_syms_output = self.get_raw_by_tool(self.__obj_parser_tool,["--defined-only", abs_path_file]) symbols_text = list() symbols_rodata = list() for line in raw_syms_output.splitlines(): line_split = line.split() if len(line_split) == 3: skip_symbol = False if line_split[1] in ("D", "d", "T", "t", "R", "r"): for l_sort in not_allowed_syms: if line_split[2].startswith(str(l_sort)): skip_symbol = True break if not skip_symbol: if line_split[1] in ("T", "t"): symbols_text.append([line_split[1], line_split[2]]) elif line_split[1] in ("D", "d", "R", "r"): symbols_rodata.append([line_split[1], line_split[2]]) return symbols_text, symbols_rodata def get_symbols_from_obj_files(self, path_to_files, list_files): """ Forming list with candidates to canister """ text_obj_symbols = list() rodata_obj_symbols = list() for l_file in list_files: if os.path.isfile(str(path_to_files + "/" + l_file)): file_obj_text, file_obj_data = self.get_list_symbols_from_file(path_to_files, l_file) text_obj_symbols.extend(file_obj_text) rodata_obj_symbols.extend(file_obj_data) else: print("\nSKC file ", l_file, "is not found") return text_obj_symbols, rodata_obj_symbols def filtered_addr_by_section(self, addr, section_gap): for l_addr in section_gap: start_addr = self.utils.to_int(l_addr[0]) end_addr = start_addr + self.utils.to_int(l_addr[1]) if self.utils.to_int(addr) >= start_addr and self.utils.to_int(addr) < end_addr: return True return False def get_single_symbol_raw(self, name: str) -> Symbol: """ Derive the info about symbol by name. :param name: symbol name as string, :return: instance of the Symbol class. """ sym_tab = self.get_raw_by_tool(self.__readelf_tool, ["-sW", self.__elf_file]) syms = re.compile(r"^.*\d+:\s(.*$)", re.MULTILINE) found = syms.findall(sym_tab.strip()) for line in found: line = line.split() if len(line) == 7: size = line[1] # This needs, because readelf prints sizes in hex if size is large size = int(size, 0) one_symbol = Symbol(addr=int(line[0], 16), size=size, sym_type=line[2], bind=line[3], visibility=line[4], ndx=line[5], name=line[6]) if one_symbol.name == name: return one_symbol return None def get_elf_symbols_list(self): """" Execute -> parse -> transform readelf symbols output into lists [symbols in .text, .init.text] and [symbols in .rodata] :returns lists: [symbols in .text, .init.text], [symbols in .rodata], [symbols in .init.data] """ if self.__symbols_list_text is None or self.__symbols_list_rodata is None: self.__symbols_list_text = list() self.__symbols_list_rodata = list() self.__symbols_list_init_data = list() section_text, section_rodata = self.get_rodata_text_scope() section_obj_init_data = self.get_section_by_name(".init.data") sym_tab = self.get_raw_by_tool(self.__readelf_tool, ["-sW", self.__elf_file]) syms = re.compile(r"^.*\d+:\s(.*$)", re.MULTILINE) found = syms.findall(sym_tab.strip()) for line in found: line = line.split() if len(line) == 7: size = line[1] # This needs, because readelf prints sizes in hex if size is large if size[:2].upper() == "0X": size = int(size, 16) else: size = int(size, 10) addr_symbol=self.utils.to_int(line[0]) one_symbol = Symbol(addr=int(line[0], 16), size=size, sym_type=line[2], bind=line[3], visibility=line[4], ndx=line[5], name=line[6]) if not line[6].startswith("$") and (size != 0) and ".cfi_jt" not in line[6]: if self.filtered_addr_by_section(addr_symbol, section_text): self.__symbols_list_text.append(one_symbol) elif self.filtered_addr_by_section(addr_symbol, section_rodata): self.__symbols_list_rodata.append(one_symbol) else: if section_obj_init_data is not None: if self.filtered_addr_by_section(addr_symbol, \ [[section_obj_init_data.addr, section_obj_init_data.size]]): self.__symbols_list_init_data.append(one_symbol) return self.__symbols_list_text, self.__symbols_list_rodata, self.__symbols_list_init_data def get_text_symbols(self): list_text, _, _ = self.get_elf_symbols_list() return list_text def get_rodata_symbols(self): _, list_rodata, _ = self.get_elf_symbols_list() return list_rodata def get_init_data_symbols(self): _, _, list_init_data = self.get_elf_symbols_list() return list_init_data def get_symbols(self): """" Execute -> parse -> transform the to dict() readelf output :returns dict: {sym_addr : Symbol()} """ if len(self.__symbols) == 0: list_text, list_rodata, _ = self.get_elf_symbols_list() for l_symbol in list_text: self.__symbols[l_symbol.addr] = l_symbol for l_symbol in list_rodata: self.__symbols[l_symbol.addr] = l_symbol self.__symbols = OrderedDict(sorted(self.__symbols.items())) return self.__symbols def get_relocs_text_rodata(self): """ returns list: [reloc_text1, reloc_text2, ..., reloc_textN], [reloc_rodata1, reloc_rodata2, ..., reloc_rodataN] """ if self.__relocs_text is None or self.__relocs_rodata is None: self.__relocs_text = list() self.__relocs_rodata = list() relocs = self.get_raw_by_tool(self.__readelf_tool, ["-rW", self.__elf_file]) rel = re.compile(r"^(" + self.__re_hexadecimal + r")\s*", re.MULTILINE) section_text, section_rodata = self.get_rodata_text_scope() for el in rel.findall(relocs.strip()): rel_addr = self.utils.to_int(el) if self.filtered_addr_by_section(rel_addr, section_rodata): self.__relocs_rodata.append(rel_addr) elif self.filtered_addr_by_section(rel_addr, section_text): self.__relocs_text.append(rel_addr) self.__relocs_text.sort() self.__relocs_rodata.sort() return self.__relocs_text, self.__relocs_rodata def get_relocs_for_symbol(self, relocs_list, start_addr=None, end_addr=None): """" :param relocs_list: input relocation list :param start_addr: start address :int :param end_addr: end address: int :returns list: [reloc1, reloc2, reloc3, ..., relocN] """ ranged_rela = list() if start_addr and end_addr is not None: for el in relocs_list: if self.utils.to_int(end_addr) <= self.utils.to_int(el): break if self.utils.to_int(start_addr) <= self.utils.to_int(el): ranged_rela.append(el) return ranged_rela def get_text_rodata_altinstructions_lists(self): """ :returns list: [[text_alt_inst1_addr, length1], [text_alt_inst2_addr, length2], ...], [[rodata_alt_inst1_addr, length1], [rodata_alt_inst2_addr, length2], ...] .altinstructions section contains an array of struct alt_instr. As instance, for kernel 4.14 from /arch/arm64/include/asm/alternative.h struct alt_instr { s32 orig_offset; /* offset to original instruction */ s32 alt_offset; /* offset to replacement instruction */ u16 cpufeature; /* cpufeature bit set for replacement */ u8 orig_len; /* size of original instruction(s) */ u8 alt_len; /* size of new instruction(s), <= orig_len */ }; Later, address of original instruction can be calculated as at runtime : &(alt_instr->orig_offset) + alt_instr->orig_offset + kernel offset ELF processing : address of .altinstruction section + in section offset of alt_instr structure + value of alt_instr.orig_offset details in /arch/arm64/kernel/alternative.c, void __apply_alternatives(void *, bool) """ # The struct_format should reflect content struct_format = "= end_addr_int: self.add_addrs_space_to_list(ranged_altinst, l_instr[0], end_addr_int) elif start_addr_int > l_instr[0] and l_instr_addr_end < end_addr_int: self.add_addrs_space_to_list(ranged_altinst, start_addr_int, l_instr_addr_end) elif start_addr_int > l_instr[0] and l_instr_addr_end > end_addr_int: self.add_addrs_space_to_list(ranged_altinst, start_addr_int, end_addr_int) return ranged_altinst def get_jump_table_list(self) -> list: """ Return the list of jump table records in the whole kernel's ELF. :returns list: list of jump table records [Sec_Jumptable_Data_rec1, Sec_Jumptable_Data_rec2, ...] """ jump_table_start_sym = self.get_single_symbol_raw(DEFAULT_NAME_JUMP_TABLE_START_SYM) jump_table_end_sym = self.get_single_symbol_raw(DEFAULT_NAME_JUMP_TABLE_END_SYM) if jump_table_start_sym == None or jump_table_end_sym == None: return [] jumptable_struct_size = struct.calcsize(self.jumptable_struct_format) jump_table_content = self.get_data_by_vaddr(jump_table_start_sym.addr, jump_table_end_sym.addr - jump_table_start_sym.addr) for i in range(ceil((jump_table_end_sym.addr - jump_table_start_sym.addr)/jumptable_struct_size)): jtr = Sec_Jumptable_Data() begin = i * jumptable_struct_size end = begin + jumptable_struct_size (jtr.code, jtr.target_offset, jtr.key) = struct.unpack(self.jumptable_struct_format, jump_table_content[begin: end]) jt_record_addr = jump_table_start_sym.addr + begin jtr.code += jt_record_addr jtr.target_offset += jt_record_addr self.__jt_rec.append(jtr) return self.__jt_rec def get_jump_table_module(self, start_addr: int, end_addr: int, jump_table: list) -> list: """ Return JT related gaps are in range of our module :param start_addr: int :param end_addr: int :param jump_table: list full list (over whole kernel) of JT items :returns list of addrs to be excluded [exclude_addr1, exclude_addr2, ...] """ result_jt_gaps = list() for jt_item in jump_table: if start_addr <= jt_item.code and end_addr > jt_item.code: for __addr in range(jt_item.code, jt_item.code + DEFAULT_ARM_INST_WIDTH): result_jt_gaps.append(__addr) return result_jt_gaps def get_symbol_by_name_text(self, sym_name: str) -> Symbol: """ Get symbol by_name in section .rodata :param sym_name: name of symbol :return: Symbol() """ for symbol_obj in self.get_text_symbols(): if symbol_obj.name == sym_name: return symbol_obj return None def get_symbol_by_name_rodata(self, sym_name: str): """ Get symbol by_name in section .rodata :param sym_name: name of symbol :return: Symbol() """ for symbol_obj in self.get_rodata_symbols(): if symbol_obj.name == sym_name: return symbol_obj return None def get_symbol_by_name_init_data(self, sym_name: str): """ Get symbol by_name in section .init.data :param sym_name: name of symbol :return: Symbol() """ for symbol_obj in self.get_init_data_symbols(): if symbol_obj.name == sym_name: return symbol_obj return None def get_symbol_by_vaddr(self, vaddrs=None): """ Get symbol by virtual address :param vaddrs: vaddr : int or list :return: Symbol() or [Symbol()] """ if isinstance(vaddrs, int): if vaddrs in self.get_symbols(): return self.get_symbols()[vaddrs] for addr, symbol_obj in self.get_symbols().items(): if (addr + symbol_obj.size) >= vaddrs >= addr: return symbol_obj elif isinstance(vaddrs, list): symbol = [self.get_symbol_by_vaddr(vaddr) for vaddr in vaddrs] return symbol else: raise ValueError return None def get_section_by_name(self, sec_names=None): """ Get section by_name :param sec_names: "sec_name" : str or list :return: Section() or [Section()] """ if isinstance(sec_names, str): for _, section_obj in self.get_sections().items(): if section_obj.name == sec_names: return section_obj elif isinstance(sec_names, list): sections = [self.get_section_by_name(sec_name) for sec_name in sec_names] return sections else: raise ValueError return None def get_section_by_vaddr(self, vaddrs=None): """ Get section by virtual address :param vaddrs: vaddr : int or list :return: Section() or [Section()] """ if isinstance(vaddrs, int): if vaddrs in self.get_sections(): return self.get_sections()[vaddrs] for addr, section_obj in self.get_sections().items(): if (addr + section_obj.size) >= vaddrs >= addr: return section_obj elif isinstance(vaddrs, list): sections = [self.get_symbol_by_vaddr(vaddr) for vaddr in vaddrs] return sections else: raise ValueError return None def vaddr_to_file_offset(self, vaddrs): """ Transform virtual address to file offset :param vaddrs: addr string or int or list :returns file offset or list """ if isinstance(vaddrs, str) or isinstance(vaddrs, int): section = self.get_section_by_vaddr(vaddrs) return self.utils.to_int(vaddrs, 16) - section.addr + section.offset elif isinstance(vaddrs, list): return [self.vaddr_to_file_offset(vaddr) for vaddr in vaddrs] else: raise ValueError def read_data_from_vaddr(self, vaddr, size, out_file): with open(self.__elf_file, "rb") as elf_fp: elf_fp.seek(self.vaddr_to_file_offset(vaddr)) with open(out_file, "wb") as out_fp: out_fp.write(elf_fp.read(size)) def get_data_by_vaddr(self, vaddr, size) -> bytearray: with open(self.__elf_file, "rb") as elf_fp: elf_fp.seek(self.vaddr_to_file_offset(vaddr)) outbuff = elf_fp.read(size) return outbuff