#----------------------------------------------------------------------------- # Copyright (c) 2013, PyInstaller Development Team. # # Distributed under the terms of the GNU General Public License with exception # for distributing bootloader. # # The full license is in the file COPYING.txt, distributed with this software. #----------------------------------------------------------------------------- """ Find external dependencies of binary libraries. """ import os import sys import re from glob import glob # Required for extracting eggs. import zipfile from PyInstaller.compat import is_win, is_unix, is_aix, is_cygwin, is_darwin, is_py26, is_py27 from PyInstaller.depend import dylib from PyInstaller.utils import winutils import PyInstaller.compat as compat import PyInstaller.log as logging logger = logging.getLogger(__file__) seen = {} if is_win: if is_py26: try: # For Portable Python it is required to import pywintypes before # win32api module. See for details: # http://www.voidspace.org.uk/python/movpy/reference/win32ext.html#problems-with-win32api import pywintypes import win32api except ImportError: raise SystemExit("Error: PyInstaller for Python 2.6+ on Windows " "needs pywin32.\r\nPlease install from " "http://sourceforge.net/projects/pywin32/") from PyInstaller.utils.winmanifest import RT_MANIFEST from PyInstaller.utils.winmanifest import GetManifestResources from PyInstaller.utils.winmanifest import Manifest try: from PyInstaller.utils.winmanifest import winresource except ImportError, detail: winresource = None def getfullnameof(mod, xtrapath=None): """ Return the full path name of MOD. MOD is the basename of a dll or pyd. XTRAPATH is a path or list of paths to search first. Return the full path name of MOD. Will search the full Windows search path, as well as sys.path """ # TODO: Allow in import-hooks to specify additional paths where the PyInstaller # should look for other libraries. # SciPy/Numpy Windows builds from http://www.lfd.uci.edu/~gohlke/pythonlibs # Contain some dlls in directory like C:\Python27\Lib\site-packages\numpy\core\ from distutils.sysconfig import get_python_lib numpy_core_paths = [os.path.join(get_python_lib(), 'numpy', 'core')] # In virtualenv numpy might be installed directly in real prefix path. # Then include this path too. if hasattr(sys, 'real_prefix'): numpy_core_paths.append( os.path.join(sys.real_prefix, 'Lib', 'site-packages', 'numpy', 'core') ) # Search sys.path first! epath = sys.path + numpy_core_paths + winutils.get_system_path() if xtrapath is not None: if type(xtrapath) == type(''): epath.insert(0, xtrapath) else: epath = xtrapath + epath for p in epath: npth = os.path.join(p, mod) if os.path.exists(npth): return npth # second try: lower case filename for p in epath: npth = os.path.join(p, mod.lower()) if os.path.exists(npth): return npth return '' def _getImports_pe(pth): """ Find the binary dependencies of PTH. This implementation walks through the PE header and uses library pefile for that and supports 32/64bit Windows """ import PyInstaller.lib.pefile as pefile dlls = set() # By default library pefile parses all PE information. # We are only interested in the list of dependent dlls. # Performance is improved by reading only needed information. # https://code.google.com/p/pefile/wiki/UsageExamples pe = pefile.PE(pth, fast_load=True) pe.parse_data_directories(directories=[ pefile.DIRECTORY_ENTRY['IMAGE_DIRECTORY_ENTRY_IMPORT']]) # Some libraries have no other binary dependencies. Use empty list # in that case. Otherwise pefile would return None. # e.g. C:\windows\system32\kernel32.dll on Wine for entry in getattr(pe, 'DIRECTORY_ENTRY_IMPORT', []): dlls.add(entry.dll) return dlls def _extract_from_egg(toc): """ Ensure all binary modules in zipped eggs get extracted and included with the frozen executable. return modified table of content """ new_toc = [] for item in toc: # Item is a tupple # (mod_name, path, type) modname, pth, typ = item if not os.path.isfile(pth): pth = check_extract_from_egg(pth)[0][0] # Add value to new data structure. new_toc.append((modname, pth, typ)) return new_toc def Dependencies(lTOC, xtrapath=None, manifest=None): """ Expand LTOC to include all the closure of binary dependencies. LTOC is a logical table of contents, ie, a seq of tuples (name, path). Return LTOC expanded by all the binary dependencies of the entries in LTOC, except those listed in the module global EXCLUDES manifest should be a winmanifest.Manifest instance on Windows, so that all dependent assemblies can be added """ # Extract all necessary binary modules from Python eggs to be included # directly with PyInstaller. lTOC = _extract_from_egg(lTOC) for nm, pth, typ in lTOC: if seen.get(nm.upper(), 0): continue logger.debug("Analyzing %s", pth) seen[nm.upper()] = 1 if is_win: for ftocnm, fn in selectAssemblies(pth, manifest): lTOC.append((ftocnm, fn, 'BINARY')) for lib, npth in selectImports(pth, xtrapath): if seen.get(lib.upper(), 0) or seen.get(npth.upper(), 0): continue seen[npth.upper()] = 1 lTOC.append((lib, npth, 'BINARY')) return lTOC def pkg_resouces_get_default_cache(): """ Determine the default cache location This returns the ``PYTHON_EGG_CACHE`` environment variable, if set. Otherwise, on Windows, it returns a 'Python-Eggs' subdirectory of the 'Application Data' directory. On all other systems, it's '~/.python-eggs'. """ # This function borrowed from setuptools/pkg_resources egg_cache = compat.getenv('PYTHON_EGG_CACHE') if egg_cache is not None: return egg_cache if os.name != 'nt': return os.path.expanduser('~/.python-eggs') app_data = 'Application Data' # XXX this may be locale-specific! app_homes = [ (('APPDATA',), None), # best option, should be locale-safe (('USERPROFILE',), app_data), (('HOMEDRIVE', 'HOMEPATH'), app_data), (('HOMEPATH',), app_data), (('HOME',), None), (('WINDIR',), app_data), # 95/98/ME ] for keys, subdir in app_homes: dirname = '' for key in keys: if key in os.environ: dirname = os.path.join(dirname, compat.getenv(key)) else: break else: if subdir: dirname = os.path.join(dirname, subdir) return os.path.join(dirname, 'Python-Eggs') else: raise RuntimeError( "Please set the PYTHON_EGG_CACHE enviroment variable" ) def check_extract_from_egg(pth, todir=None): r""" Check if path points to a file inside a python egg file, extract the file from the egg to a cache directory (following pkg_resources convention) and return [(extracted path, egg file path, relative path inside egg file)]. Otherwise, just return [(original path, None, None)]. If path points to an egg file directly, return a list with all files from the egg formatted like above. Example: >>> check_extract_from_egg(r'C:\Python26\Lib\site-packages\my.egg\mymodule\my.pyd') [(r'C:\Users\UserName\AppData\Roaming\Python-Eggs\my.egg-tmp\mymodule\my.pyd', r'C:\Python26\Lib\site-packages\my.egg', r'mymodule/my.pyd')] """ rv = [] if os.path.altsep: pth = pth.replace(os.path.altsep, os.path.sep) components = pth.split(os.path.sep) for i, name in enumerate(components): if name.lower().endswith(".egg"): eggpth = os.path.sep.join(components[:i + 1]) if os.path.isfile(eggpth): # eggs can also be directories! try: egg = zipfile.ZipFile(eggpth) except zipfile.BadZipfile, e: raise SystemExit("Error: %s %s" % (eggpth, e)) if todir is None: # Use the same directory as setuptools/pkg_resources. So, # if the specific egg was accessed before (not necessarily # by pyinstaller), the extracted contents already exist # (pkg_resources puts them there) and can be used. todir = os.path.join(pkg_resouces_get_default_cache(), name + "-tmp") if components[i + 1:]: members = ["/".join(components[i + 1:])] else: members = egg.namelist() for member in members: pth = os.path.join(todir, member) if not os.path.isfile(pth): dirname = os.path.dirname(pth) if not os.path.isdir(dirname): os.makedirs(dirname) f = open(pth, "wb") f.write(egg.read(member)) f.close() rv.append((pth, eggpth, member)) return rv return [(pth, None, None)] def getAssemblies(pth): """ Return the dependent assemblies of a binary. """ if pth.lower().endswith(".manifest"): return [] # check for manifest file manifestnm = pth + ".manifest" if os.path.isfile(manifestnm): fd = open(manifestnm, "rb") res = {RT_MANIFEST: {1: {0: fd.read()}}} fd.close() elif not winresource: # resource access unavailable (needs pywin32) return [] else: # check the binary for embedded manifest try: res = GetManifestResources(pth) except winresource.pywintypes.error, exc: if exc.args[0] == winresource.ERROR_BAD_EXE_FORMAT: logger.info('Cannot get manifest resource from non-PE ' 'file %s', pth) return [] raise rv = [] if RT_MANIFEST in res and len(res[RT_MANIFEST]): for name in res[RT_MANIFEST]: for language in res[RT_MANIFEST][name]: # check the manifest for dependent assemblies try: manifest = Manifest() manifest.filename = ":".join([pth, str(RT_MANIFEST), str(name), str(language)]) manifest.parse_string(res[RT_MANIFEST][name][language], False) except Exception, exc: logger.error("Can not parse manifest resource %s, %s" "from %s", name, language, pth) logger.exception(exc) else: if manifest.dependentAssemblies: logger.debug("Dependent assemblies of %s:", pth) logger.debug(", ".join([assembly.getid() for assembly in manifest.dependentAssemblies])) rv.extend(manifest.dependentAssemblies) return rv def selectAssemblies(pth, manifest=None): """ Return a binary's dependent assemblies files that should be included. Return a list of pairs (name, fullpath) """ rv = [] if manifest: _depNames = set([dep.name for dep in manifest.dependentAssemblies]) for assembly in getAssemblies(pth): if seen.get(assembly.getid().upper(), 0): continue if manifest and not assembly.name in _depNames: # Add assembly as dependency to our final output exe's manifest logger.info("Adding %s to dependent assemblies " "of final executable", assembly.name) manifest.dependentAssemblies.append(assembly) _depNames.add(assembly.name) if not dylib.include_library(assembly.name): logger.debug("Skipping assembly %s", assembly.getid()) continue if assembly.optional: logger.debug("Skipping optional assembly %s", assembly.getid()) continue files = assembly.find_files() if files: seen[assembly.getid().upper()] = 1 for fn in files: fname, fext = os.path.splitext(fn) if fext.lower() == ".manifest": nm = assembly.name + fext else: nm = os.path.basename(fn) ftocnm = nm if assembly.language not in (None, "", "*", "neutral"): ftocnm = os.path.join(assembly.getlanguage(), ftocnm) nm, ftocnm, fn = [item.encode(sys.getfilesystemencoding()) for item in (nm, ftocnm, fn)] if not seen.get(fn.upper(), 0): logger.debug("Adding %s", ftocnm) seen[nm.upper()] = 1 seen[fn.upper()] = 1 rv.append((ftocnm, fn)) else: #logger.info("skipping %s part of assembly %s dependency of %s", # ftocnm, assembly.name, pth) pass else: logger.error("Assembly %s not found", assembly.getid()) return rv def selectImports(pth, xtrapath=None): """ Return the dependencies of a binary that should be included. Return a list of pairs (name, fullpath) """ rv = [] if xtrapath is None: xtrapath = [os.path.dirname(pth)] else: assert isinstance(xtrapath, list) xtrapath = [os.path.dirname(pth)] + xtrapath # make a copy dlls = getImports(pth) for lib in dlls: if seen.get(lib.upper(), 0): continue if not is_win and not is_cygwin: # all other platforms npth = lib lib = os.path.basename(lib) else: # plain win case npth = getfullnameof(lib, xtrapath) # now npth is a candidate lib if found # check again for excludes but with regex FIXME: split the list if npth: candidatelib = npth else: candidatelib = lib if not dylib.include_library(candidatelib): if (candidatelib.find('libpython') < 0 and candidatelib.find('Python.framework') < 0): # skip libs not containing (libpython or Python.framework) if not seen.get(npth.upper(), 0): logger.debug("Skipping %s dependency of %s", lib, os.path.basename(pth)) continue else: pass if npth: if not seen.get(npth.upper(), 0): logger.debug("Adding %s dependency of %s", lib, os.path.basename(pth)) rv.append((lib, npth)) else: logger.warning("lib not found: %s dependency of %s", lib, pth) return rv def _getImports_ldd(pth): """ Find the binary dependencies of PTH. This implementation is for ldd platforms (mostly unix). """ rslt = set() if is_aix: # Match libs of the form 'archive.a(sharedobject.so)' # Will not match the fake lib '/unix' lddPattern = re.compile(r"\s*(.*?)(\(.*\))") else: lddPattern = re.compile(r"\s*(.*?)\s+=>\s+(.*?)\s+\(.*\)") for line in compat.exec_command('ldd', pth).splitlines(): m = lddPattern.search(line) if m: if is_aix: lib = m.group(1) name = os.path.basename(lib) + m.group(2) else: name, lib = m.group(1), m.group(2) if name[:10] in ('linux-gate', 'linux-vdso'): # linux-gate is a fake library which does not exist and # should be ignored. See also: # http://www.trilithium.com/johan/2005/08/linux-gate/ continue if os.path.exists(lib): # Add lib if it is not already found. if lib not in rslt: rslt.add(lib) else: logger.error('Can not find %s in path %s (needed by %s)', name, lib, pth) return rslt def _getImports_macholib(pth): """ Find the binary dependencies of PTH. This implementation is for Mac OS X and uses library macholib. """ from PyInstaller.lib.macholib.MachO import MachO from PyInstaller.lib.macholib.mach_o import LC_RPATH from PyInstaller.lib.macholib.dyld import dyld_find rslt = set() seen = set() # Libraries read from binary headers. ## Walk through mach binary headers. m = MachO(pth) for header in m.headers: for idx, name, lib in header.walkRelocatables(): # Sometimes some libraries are present multiple times. if lib not in seen: seen.add(lib) # Walk through mach binary headers and look for LC_RPATH. # macholib can't handle @rpath. LC_RPATH has to be read # from the MachO header. # TODO Do we need to remove LC_RPATH from MachO load commands? # Will it cause any harm to leave them untouched? # Removing LC_RPATH should be implemented when getting # files from the bincache if it is necessary. run_paths = set() for header in m.headers: for command in header.commands: # A command is a tupple like: # (, # , # '../lib\x00\x00') cmd_type = command[0].cmd if cmd_type == LC_RPATH: rpath = command[2] # Remove trailing '\x00' characters. # e.g. '../lib\x00\x00' rpath = rpath.rstrip('\x00') # Make rpath absolute. According to Apple doc LC_RPATH # is always relative to the binary location. rpath = os.path.normpath(os.path.join(os.path.dirname(pth), rpath)) run_paths.update([rpath]) ## Try to find files in file system. # In cases with @loader_path or @executable_path # try to look in the same directory as the checked binary is. # This seems to work in most cases. exec_path = os.path.abspath(os.path.dirname(pth)) for lib in seen: # Suppose that @rpath is not used for system libraries and # using macholib can be avoided. # macholib can't handle @rpath. if lib.startswith('@rpath'): lib = lib.replace('@rpath', '.') # Make path relative. final_lib = None # Absolute path to existing lib on disk. # Try multiple locations. for run_path in run_paths: # @rpath may contain relative value. Use exec_path as # base path. if not os.path.isabs(run_path): run_path = os.path.join(exec_path, run_path) # Stop looking for lib when found in first location. if os.path.exists(os.path.join(run_path, lib)): final_lib = os.path.abspath(os.path.join(run_path, lib)) rslt.add(final_lib) break # Log error if no existing file found. if not final_lib: logger.error('Can not find path %s (needed by %s)', lib, pth) # Macholib has to be used to get absolute path to libraries. else: # macholib can't handle @loader_path. It has to be # handled the same way as @executable_path. # It is also replaced by 'exec_path'. if lib.startswith('@loader_path'): lib = lib.replace('@loader_path', '@executable_path') try: lib = dyld_find(lib, executable_path=exec_path) rslt.add(lib) except ValueError: logger.error('Can not find path %s (needed by %s)', lib, pth) return rslt def getImports(pth): """ Forwards to the correct getImports implementation for the platform. """ if is_win or is_cygwin: if pth.lower().endswith(".manifest"): return [] try: return _getImports_pe(pth) except Exception, exception: # Assemblies can pull in files which aren't necessarily PE, # but are still needed by the assembly. Any additional binary # dependencies should already have been handled by # selectAssemblies in that case, so just warn, return an empty # list and continue. if logger.isEnabledFor(logging.WARN): # logg excaption only if level >= warn logger.warn('Can not get binary dependencies for file: %s', pth) logger.exception(exception) return [] elif is_darwin: return _getImports_macholib(pth) else: return _getImports_ldd(pth) def findLibrary(name): """ Look for a library in the system. Emulate the algorithm used by dlopen. `name`must include the prefix, e.g. ``libpython2.4.so`` """ assert is_unix, "Current implementation for Unix only (Linux, Solaris, AIX)" lib = None # Look in the LD_LIBRARY_PATH according to platform. if is_aix: lp = compat.getenv('LIBPATH', '') elif is_darwin: lp = compat.getenv('DYLD_LIBRARY_PATH', '') else: lp = compat.getenv('LD_LIBRARY_PATH', '') for path in lp.split(os.pathsep): libs = glob(os.path.join(path, name + '*')) if libs: lib = libs[0] break # Look in /etc/ld.so.cache # TODO Look for ldconfig in /usr/sbin/ldconfig. /sbin is deprecated # in recent linux distributions. # Solaris does not have /sbin/ldconfig. Just check if this file exists. if lib is None and os.path.exists('/sbin/ldconfig'): expr = r'/[^\(\)\s]*%s\.[^\(\)\s]*' % re.escape(name) m = re.search(expr, compat.exec_command('/sbin/ldconfig', '-p')) if m: lib = m.group(0) # Look in the known safe paths if lib is None: paths = ['/lib', '/lib32', '/lib64', '/usr/lib', '/usr/lib32', '/usr/lib64'] # On Debian/Ubuntu /usr/bin/python is linked statically with libpython. # Newer Debian/Ubuntu with multiarch support putsh the libpythonX.Y.so # To paths like /usr/lib/i386-linux-gnu/. try: import sysconfig # Module available only in Python 2.7. arch_subdir = sysconfig.get_config_var('multiarchsubdir') # Ignore if None is returned. if arch_subdir: arch_subdir = os.path.basename(arch_subdir) paths.extend([ os.path.join('/usr/lib', arch_subdir), os.path.join('/usr/lib32', arch_subdir), os.path.join('/usr/lib64', arch_subdir), ]) except ImportError: pass if is_aix: paths.append('/opt/freeware/lib') for path in paths: libs = glob(os.path.join(path, name + '*')) if libs: lib = libs[0] break # give up :( if lib is None: return None # Resolve the file name into the soname dir = os.path.dirname(lib) return os.path.join(dir, getSoname(lib)) def getSoname(filename): """ Return the soname of a library. """ cmd = ["objdump", "-p", "-j", ".dynamic", filename] m = re.search(r'\s+SONAME\s+([^\s]+)', compat.exec_command(*cmd)) if m: return m.group(1) def get_python_library_path(): """ Find dynamic Python library that will be bundled with frozen executable. Return full path to Python dynamic library or None when not found. We need to know name of the Python dynamic library for the bootloader. Bootloader has to know what library to load and not trying to guess. Some linux distributions (e.g. debian-based) statically build the Python executable to the libpython, so bindepend doesn't include it in its output. In this situation let's try to find it. Darwin custom builds could possibly also have non-framework style libraries, so this method also checks for that variant as well. """ pyver = sys.version_info[:2] if is_win: names = ('python%d%d.dll' % pyver,) elif is_cygwin: names = ('libpython%d%d.dll' % pyver,) elif is_darwin: names = ('Python', '.Python', 'libpython%d.%d.dylib' % pyver) elif is_aix: # Shared libs on AIX are archives with shared object members, thus the ".a" suffix. names = ('libpython%d.%d.a' % pyver,) elif is_unix: # Other *nix platforms. names = ('libpython%d.%d.so.1.0' % pyver,) else: raise SystemExit('Your platform is not yet supported.') # Try to get Python library name from the Python executable. It assumes that Python # library is not statically linked. dlls = getImports(sys.executable) for filename in dlls: for name in names: if os.path.basename(filename) == name: # On Windows filename is just like 'python27.dll'. Convert it # to absolute path. if is_win and not os.path.isabs(filename): filename = getfullnameof(filename) # Python library found. Return absolute path to it. return filename # Python library NOT found. Resume searching using alternative methods. # Applies only to non Windows platforms. if is_unix: for name in names: python_libname = findLibrary(name) if python_libname: return python_libname elif is_darwin: # On MacPython, Analysis.assemble is able to find the libpython with # no additional help, asking for sys.executable dependencies. # However, this fails on system python, because the shared library # is not listed as a dependency of the binary (most probably it's # opened at runtime using some dlopen trickery). # This happens on Mac OS X when Python is compiled as Framework. # Python compiled as Framework contains same values in sys.prefix # and exec_prefix. That's why we can use just sys.prefix. # In virtualenv PyInstaller is not able to find Python library. # We need special care for this case. if compat.is_virtualenv: py_prefix = compat.venv_real_prefix else: py_prefix = sys.prefix for name in names: full_path = os.path.join(py_prefix, name) if os.path.exists(full_path): return full_path # Python library NOT found. Return just None. return None