""" Utilities for reading and writing Mach-O headers """ from __future__ import print_function import sys import struct import os from .mach_o import MH_FILETYPE_SHORTNAMES, LC_DYSYMTAB, LC_SYMTAB from .mach_o import load_command, S_ZEROFILL, section_64, section from .mach_o import LC_REGISTRY, LC_ID_DYLIB, LC_SEGMENT, fat_header from .mach_o import LC_SEGMENT_64, MH_CIGAM_64, MH_MAGIC_64, FAT_MAGIC from .mach_o import mach_header, fat_arch64, FAT_MAGIC_64, fat_arch from .mach_o import LC_REEXPORT_DYLIB, LC_PREBOUND_DYLIB, LC_LOAD_WEAK_DYLIB from .mach_o import LC_LOAD_UPWARD_DYLIB, LC_LOAD_DYLIB, mach_header_64 from .mach_o import MH_CIGAM, MH_MAGIC from .ptypes import sizeof from macholib.util import fileview try: from macholib.compat import bytes except ImportError: pass try: unicode except NameError: unicode = str if sys.version_info[0] == 2: range = xrange # noqa: F821 __all__ = ['MachO'] _RELOCATABLE = set(( # relocatable commands that should be used for dependency walking LC_LOAD_DYLIB, LC_LOAD_UPWARD_DYLIB, LC_LOAD_WEAK_DYLIB, LC_PREBOUND_DYLIB, LC_REEXPORT_DYLIB, )) _RELOCATABLE_NAMES = { LC_LOAD_DYLIB: 'load_dylib', LC_LOAD_UPWARD_DYLIB: 'load_upward_dylib', LC_LOAD_WEAK_DYLIB: 'load_weak_dylib', LC_PREBOUND_DYLIB: 'prebound_dylib', LC_REEXPORT_DYLIB: 'reexport_dylib', } def _shouldRelocateCommand(cmd): """ Should this command id be investigated for relocation? """ return cmd in _RELOCATABLE def lc_str_value(offset, cmd_info): """ Fetch the actual value of a field of type "lc_str" """ cmd_load, cmd_cmd, cmd_data = cmd_info offset -= sizeof(cmd_load) + sizeof(cmd_cmd) return cmd_data[offset:].strip(b'\x00') class MachO(object): """ Provides reading/writing the Mach-O header of a specific existing file """ # filename - the original filename of this mach-o # sizediff - the current deviation from the initial mach-o size # header - the mach-o header # commands - a list of (load_command, somecommand, data) # data is either a str, or a list of segment structures # total_size - the current mach-o header size (including header) # low_offset - essentially, the maximum mach-o header size # id_cmd - the index of my id command, or None def __init__(self, filename): # supports the ObjectGraph protocol self.graphident = filename self.filename = filename self.loader_path = os.path.dirname(filename) # initialized by load self.fat = None self.headers = [] with open(filename, 'rb') as fp: self.load(fp) def __repr__(self): return "" % (self.filename,) def load(self, fh): assert fh.tell() == 0 header = struct.unpack('>I', fh.read(4))[0] fh.seek(0) if header in (FAT_MAGIC, FAT_MAGIC_64): self.load_fat(fh) else: fh.seek(0, 2) size = fh.tell() fh.seek(0) self.load_header(fh, 0, size) def load_fat(self, fh): self.fat = fat_header.from_fileobj(fh) if self.fat.magic == FAT_MAGIC: archs = [fat_arch.from_fileobj(fh) for i in range(self.fat.nfat_arch)] elif self.fat.magic == FAT_MAGIC_64: archs = [fat_arch64.from_fileobj(fh) for i in range(self.fat.nfat_arch)] else: raise ValueError("Unknown fat header magic: %r" % (self.fat.magic)) for arch in archs: self.load_header(fh, arch.offset, arch.size) def rewriteLoadCommands(self, *args, **kw): changed = False for header in self.headers: if header.rewriteLoadCommands(*args, **kw): changed = True return changed def load_header(self, fh, offset, size): fh.seek(offset) header = struct.unpack('>I', fh.read(4))[0] fh.seek(offset) if header == MH_MAGIC: magic, hdr, endian = MH_MAGIC, mach_header, '>' elif header == MH_CIGAM: magic, hdr, endian = MH_CIGAM, mach_header, '<' elif header == MH_MAGIC_64: magic, hdr, endian = MH_MAGIC_64, mach_header_64, '>' elif header == MH_CIGAM_64: magic, hdr, endian = MH_CIGAM_64, mach_header_64, '<' else: raise ValueError("Unknown Mach-O header: 0x%08x in %r" % ( header, fh)) hdr = MachOHeader(self, fh, offset, size, magic, hdr, endian) self.headers.append(hdr) def write(self, f): for header in self.headers: header.write(f) class MachOHeader(object): """ Provides reading/writing the Mach-O header of a specific existing file """ # filename - the original filename of this mach-o # sizediff - the current deviation from the initial mach-o size # header - the mach-o header # commands - a list of (load_command, somecommand, data) # data is either a str, or a list of segment structures # total_size - the current mach-o header size (including header) # low_offset - essentially, the maximum mach-o header size # id_cmd - the index of my id command, or None def __init__(self, parent, fh, offset, size, magic, hdr, endian): self.MH_MAGIC = magic self.mach_header = hdr # These are all initialized by self.load() self.parent = parent self.offset = offset self.size = size self.endian = endian self.header = None self.commands = None self.id_cmd = None self.sizediff = None self.total_size = None self.low_offset = None self.filetype = None self.headers = [] self.load(fh) def __repr__(self): return "<%s filename=%r offset=%d size=%d endian=%r>" % ( type(self).__name__, self.parent.filename, self.offset, self.size, self.endian) def load(self, fh): fh = fileview(fh, self.offset, self.size) fh.seek(0) self.sizediff = 0 kw = {'_endian_': self.endian} header = self.mach_header.from_fileobj(fh, **kw) self.header = header # if header.magic != self.MH_MAGIC: # raise ValueError("header has magic %08x, expecting %08x" % ( # header.magic, self.MH_MAGIC)) cmd = self.commands = [] self.filetype = self.get_filetype_shortname(header.filetype) read_bytes = 0 low_offset = sys.maxsize for i in range(header.ncmds): # read the load command cmd_load = load_command.from_fileobj(fh, **kw) # read the specific command klass = LC_REGISTRY.get(cmd_load.cmd, None) if klass is None: raise ValueError("Unknown load command: %d" % (cmd_load.cmd,)) cmd_cmd = klass.from_fileobj(fh, **kw) if cmd_load.cmd == LC_ID_DYLIB: # remember where this command was if self.id_cmd is not None: raise ValueError("This dylib already has an id") self.id_cmd = i if cmd_load.cmd in (LC_SEGMENT, LC_SEGMENT_64): # for segment commands, read the list of segments segs = [] # assert that the size makes sense if cmd_load.cmd == LC_SEGMENT: section_cls = section else: # LC_SEGMENT_64 section_cls = section_64 expected_size = ( sizeof(klass) + sizeof(load_command) + (sizeof(section_cls) * cmd_cmd.nsects) ) if cmd_load.cmdsize != expected_size: raise ValueError("Segment size mismatch") # this is a zero block or something # so the beginning is wherever the fileoff of this command is if cmd_cmd.nsects == 0: if cmd_cmd.filesize != 0: low_offset = min(low_offset, cmd_cmd.fileoff) else: # this one has multiple segments for j in range(cmd_cmd.nsects): # read the segment seg = section_cls.from_fileobj(fh, **kw) # if the segment has a size and is not zero filled # then its beginning is the offset of this segment not_zerofill = ((seg.flags & S_ZEROFILL) != S_ZEROFILL) if seg.offset > 0 and seg.size > 0 and not_zerofill: low_offset = min(low_offset, seg.offset) if not_zerofill: c = fh.tell() fh.seek(seg.offset) sd = fh.read(seg.size) seg.add_section_data(sd) fh.seek(c) segs.append(seg) # data is a list of segments cmd_data = segs # XXX: Disabled for now because writing back doesn't work # elif cmd_load.cmd == LC_CODE_SIGNATURE: # c = fh.tell() # fh.seek(cmd_cmd.dataoff) # cmd_data = fh.read(cmd_cmd.datasize) # fh.seek(c) # elif cmd_load.cmd == LC_SYMTAB: # c = fh.tell() # fh.seek(cmd_cmd.stroff) # cmd_data = fh.read(cmd_cmd.strsize) # fh.seek(c) else: # data is a raw str data_size = ( cmd_load.cmdsize - sizeof(klass) - sizeof(load_command) ) cmd_data = fh.read(data_size) cmd.append((cmd_load, cmd_cmd, cmd_data)) read_bytes += cmd_load.cmdsize # make sure the header made sense if read_bytes != header.sizeofcmds: raise ValueError("Read %d bytes, header reports %d bytes" % ( read_bytes, header.sizeofcmds)) self.total_size = sizeof(self.mach_header) + read_bytes self.low_offset = low_offset def walkRelocatables(self, shouldRelocateCommand=_shouldRelocateCommand): """ for all relocatable commands yield (command_index, command_name, filename) """ for (idx, (lc, cmd, data)) in enumerate(self.commands): if shouldRelocateCommand(lc.cmd): name = _RELOCATABLE_NAMES[lc.cmd] ofs = cmd.name - sizeof(lc.__class__) - sizeof(cmd.__class__) yield idx, name, data[ofs:data.find(b'\x00', ofs)].decode( sys.getfilesystemencoding()) def rewriteInstallNameCommand(self, loadcmd): """Rewrite the load command of this dylib""" if self.id_cmd is not None: self.rewriteDataForCommand(self.id_cmd, loadcmd) return True return False def changedHeaderSizeBy(self, bytes): self.sizediff += bytes if (self.total_size + self.sizediff) > self.low_offset: print( "WARNING: Mach-O header in %r may be too large to relocate" % ( self.parent.filename,)) def rewriteLoadCommands(self, changefunc): """ Rewrite the load commands based upon a change dictionary """ data = changefunc(self.parent.filename) changed = False if data is not None: if self.rewriteInstallNameCommand( data.encode(sys.getfilesystemencoding())): changed = True for idx, name, filename in self.walkRelocatables(): data = changefunc(filename) if data is not None: if self.rewriteDataForCommand(idx, data.encode( sys.getfilesystemencoding())): changed = True return changed def rewriteDataForCommand(self, idx, data): lc, cmd, old_data = self.commands[idx] hdrsize = sizeof(lc.__class__) + sizeof(cmd.__class__) align = struct.calcsize('Q') data = data + (b'\x00' * (align - (len(data) % align))) newsize = hdrsize + len(data) self.commands[idx] = (lc, cmd, data) self.changedHeaderSizeBy(newsize - lc.cmdsize) lc.cmdsize, cmd.name = newsize, hdrsize return True def synchronize_size(self): if (self.total_size + self.sizediff) > self.low_offset: raise ValueError( ("New Mach-O header is too large to relocate in %r " "(new size=%r, max size=%r, delta=%r)") % ( self.parent.filename, self.total_size + self.sizediff, self.low_offset, self.sizediff)) self.header.sizeofcmds += self.sizediff self.total_size = sizeof(self.mach_header) + self.header.sizeofcmds self.sizediff = 0 def write(self, fileobj): fileobj = fileview(fileobj, self.offset, self.size) fileobj.seek(0) # serialize all the mach-o commands self.synchronize_size() self.header.to_fileobj(fileobj) for lc, cmd, data in self.commands: lc.to_fileobj(fileobj) cmd.to_fileobj(fileobj) if sys.version_info[0] == 2: if isinstance(data, unicode): fileobj.write(data.encode(sys.getfilesystemencoding())) elif isinstance(data, (bytes, str)): fileobj.write(data) else: # segments.. for obj in data: obj.to_fileobj(fileobj) else: if isinstance(data, str): fileobj.write(data.encode(sys.getfilesystemencoding())) elif isinstance(data, bytes): fileobj.write(data) else: # segments.. for obj in data: obj.to_fileobj(fileobj) # zero out the unused space, doubt this is strictly necessary # and is generally probably already the case fileobj.write(b'\x00' * (self.low_offset - fileobj.tell())) def getSymbolTableCommand(self): for lc, cmd, data in self.commands: if lc.cmd == LC_SYMTAB: return cmd return None def getDynamicSymbolTableCommand(self): for lc, cmd, data in self.commands: if lc.cmd == LC_DYSYMTAB: return cmd return None def get_filetype_shortname(self, filetype): if filetype in MH_FILETYPE_SHORTNAMES: return MH_FILETYPE_SHORTNAMES[filetype] else: return 'unknown' def main(fn): m = MachO(fn) seen = set() for header in m.headers: for idx, name, other in header.walkRelocatables(): if other not in seen: seen.add(other) print('\t' + name + ": " + other) if __name__ == '__main__': import sys files = sys.argv[1:] or ['/bin/ls'] for fn in files: print(fn) main(fn)