import numpy as np from numpy.testing import assert_, assert_equal, assert_array_equal def buffer_length(arr): if isinstance(arr, str): if not arr: charmax = 0 else: charmax = max([ord(c) for c in arr]) if charmax < 256: size = 1 elif charmax < 65536: size = 2 else: size = 4 return size * len(arr) v = memoryview(arr) if v.shape is None: return len(v) * v.itemsize else: return np.prod(v.shape) * v.itemsize # In both cases below we need to make sure that the byte swapped value (as # UCS4) is still a valid unicode: # Value that can be represented in UCS2 interpreters ucs2_value = u'\u0900' # Value that cannot be represented in UCS2 interpreters (but can in UCS4) ucs4_value = u'\U00100900' def test_string_cast(): str_arr = np.array(["1234", "1234\0\0"], dtype='S') uni_arr1 = str_arr.astype('>U') uni_arr2 = str_arr.astype('