import unicodedata import sys import re from setuptools.extern import six # HFS Plus uses decomposed UTF-8 def decompose(path): if isinstance(path, six.text_type): return unicodedata.normalize('NFD', path) try: path = path.decode('utf-8') path = unicodedata.normalize('NFD', path) path = path.encode('utf-8') except UnicodeError: pass # Not UTF-8 return path def filesys_decode(path): """ Ensure that the given path is decoded, NONE when no expected encoding works """ if isinstance(path, six.text_type): return path fs_enc = sys.getfilesystemencoding() or 'utf-8' candidates = fs_enc, 'utf-8' for enc in candidates: try: return path.decode(enc) except UnicodeDecodeError: continue def try_encode(string, enc): "turn unicode encoding into a functional routine" try: return string.encode(enc) except UnicodeEncodeError: return None CODING_RE = re.compile(br'^[ \t\f]*#.*?coding[:=][ \t]*([-\w.]+)') def detect_encoding(fp): first_line = fp.readline() fp.seek(0) m = CODING_RE.match(first_line) if m is None: return None return m.group(1).decode('ascii')