# -*- coding: utf-8 -*- # pylint: disable-msg=W0612,E1101 """ test fancy indexing & misc """ import pytest import weakref from warnings import catch_warnings from datetime import datetime from pandas.core.dtypes.common import ( is_integer_dtype, is_float_dtype) from pandas.compat import range, lrange, lzip, StringIO import numpy as np import pandas as pd from pandas.core.indexing import (_non_reducing_slice, _maybe_numeric_slice, validate_indices) from pandas import NaT, DataFrame, Index, Series, MultiIndex import pandas.util.testing as tm from pandas.compat import PY2 from pandas.tests.indexing.common import Base, _mklbl # ------------------------------------------------------------------------ # Indexing test cases class TestFancy(Base): """ pure get/set item & fancy indexing """ def test_setitem_ndarray_1d(self): # GH5508 # len of indexer vs length of the 1d ndarray df = DataFrame(index=Index(lrange(1, 11))) df['foo'] = np.zeros(10, dtype=np.float64) df['bar'] = np.zeros(10, dtype=np.complex) # invalid def f(): df.loc[df.index[2:5], 'bar'] = np.array([2.33j, 1.23 + 0.1j, 2.2, 1.0]) pytest.raises(ValueError, f) # valid df.loc[df.index[2:6], 'bar'] = np.array([2.33j, 1.23 + 0.1j, 2.2, 1.0]) result = df.loc[df.index[2:6], 'bar'] expected = Series([2.33j, 1.23 + 0.1j, 2.2, 1.0], index=[3, 4, 5, 6], name='bar') tm.assert_series_equal(result, expected) # dtype getting changed? df = DataFrame(index=Index(lrange(1, 11))) df['foo'] = np.zeros(10, dtype=np.float64) df['bar'] = np.zeros(10, dtype=np.complex) def f(): df[2:5] = np.arange(1, 4) * 1j pytest.raises(ValueError, f) def test_inf_upcast(self): # GH 16957 # We should be able to use np.inf as a key # np.inf should cause an index to convert to float # Test with np.inf in rows df = DataFrame(columns=[0]) df.loc[1] = 1 df.loc[2] = 2 df.loc[np.inf] = 3 # make sure we can look up the value assert df.loc[np.inf, 0] == 3 result = df.index expected = pd.Float64Index([1, 2, np.inf]) tm.assert_index_equal(result, expected) # Test with np.inf in columns df = DataFrame() df.loc[0, 0] = 1 df.loc[1, 1] = 2 df.loc[0, np.inf] = 3 result = df.columns expected = pd.Float64Index([0, 1, np.inf]) tm.assert_index_equal(result, expected) def test_setitem_dtype_upcast(self): # GH3216 df = DataFrame([{"a": 1}, {"a": 3, "b": 2}]) df['c'] = np.nan assert df['c'].dtype == np.float64 df.loc[0, 'c'] = 'foo' expected = DataFrame([{"a": 1, "c": 'foo'}, {"a": 3, "b": 2, "c": np.nan}]) tm.assert_frame_equal(df, expected) # GH10280 df = DataFrame(np.arange(6, dtype='int64').reshape(2, 3), index=list('ab'), columns=['foo', 'bar', 'baz']) for val in [3.14, 'wxyz']: left = df.copy() left.loc['a', 'bar'] = val right = DataFrame([[0, val, 2], [3, 4, 5]], index=list('ab'), columns=['foo', 'bar', 'baz']) tm.assert_frame_equal(left, right) assert is_integer_dtype(left['foo']) assert is_integer_dtype(left['baz']) left = DataFrame(np.arange(6, dtype='int64').reshape(2, 3) / 10.0, index=list('ab'), columns=['foo', 'bar', 'baz']) left.loc['a', 'bar'] = 'wxyz' right = DataFrame([[0, 'wxyz', .2], [.3, .4, .5]], index=list('ab'), columns=['foo', 'bar', 'baz']) tm.assert_frame_equal(left, right) assert is_float_dtype(left['foo']) assert is_float_dtype(left['baz']) def test_dups_fancy_indexing(self): # GH 3455 from pandas.util.testing import makeCustomDataframe as mkdf df = mkdf(10, 3) df.columns = ['a', 'a', 'b'] result = df[['b', 'a']].columns expected = Index(['b', 'a', 'a']) tm.assert_index_equal(result, expected) # across dtypes df = DataFrame([[1, 2, 1., 2., 3., 'foo', 'bar']], columns=list('aaaaaaa')) df.head() str(df) result = DataFrame([[1, 2, 1., 2., 3., 'foo', 'bar']]) result.columns = list('aaaaaaa') # TODO(wesm): unused? df_v = df.iloc[:, 4] # noqa res_v = result.iloc[:, 4] # noqa tm.assert_frame_equal(df, result) # GH 3561, dups not in selected order df = DataFrame( {'test': [5, 7, 9, 11], 'test1': [4., 5, 6, 7], 'other': list('abcd')}, index=['A', 'A', 'B', 'C']) rows = ['C', 'B'] expected = DataFrame( {'test': [11, 9], 'test1': [7., 6], 'other': ['d', 'c']}, index=rows) result = df.loc[rows] tm.assert_frame_equal(result, expected) result = df.loc[Index(rows)] tm.assert_frame_equal(result, expected) rows = ['C', 'B', 'E'] expected = DataFrame( {'test': [11, 9, np.nan], 'test1': [7., 6, np.nan], 'other': ['d', 'c', np.nan]}, index=rows) with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): result = df.loc[rows] tm.assert_frame_equal(result, expected) # see GH5553, make sure we use the right indexer rows = ['F', 'G', 'H', 'C', 'B', 'E'] expected = DataFrame({'test': [np.nan, np.nan, np.nan, 11, 9, np.nan], 'test1': [np.nan, np.nan, np.nan, 7., 6, np.nan], 'other': [np.nan, np.nan, np.nan, 'd', 'c', np.nan]}, index=rows) with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): result = df.loc[rows] tm.assert_frame_equal(result, expected) # List containing only missing label dfnu = DataFrame(np.random.randn(5, 3), index=list('AABCD')) with pytest.raises(KeyError): dfnu.ix[['E']] # ToDo: check_index_type can be True after GH 11497 # GH 4619; duplicate indexer with missing label df = DataFrame({"A": [0, 1, 2]}) with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): result = df.loc[[0, 8, 0]] expected = DataFrame({"A": [0, np.nan, 0]}, index=[0, 8, 0]) tm.assert_frame_equal(result, expected, check_index_type=False) df = DataFrame({"A": list('abc')}) with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): result = df.loc[[0, 8, 0]] expected = DataFrame({"A": ['a', np.nan, 'a']}, index=[0, 8, 0]) tm.assert_frame_equal(result, expected, check_index_type=False) # non unique with non unique selector df = DataFrame({'test': [5, 7, 9, 11]}, index=['A', 'A', 'B', 'C']) expected = DataFrame( {'test': [5, 7, 5, 7, np.nan]}, index=['A', 'A', 'A', 'A', 'E']) with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): result = df.loc[['A', 'A', 'E']] tm.assert_frame_equal(result, expected) @pytest.mark.skipif(PY2, reason="GH-20770. Py2 unreliable warnings catching.") def test_dups_fancy_indexing2(self): # GH 5835 # dups on index and missing values df = DataFrame( np.random.randn(5, 5), columns=['A', 'B', 'B', 'B', 'A']) expected = pd.concat( [df.loc[:, ['A', 'B']], DataFrame(np.nan, columns=['C'], index=df.index)], axis=1) with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): result = df.loc[:, ['A', 'B', 'C']] tm.assert_frame_equal(result, expected) # GH 6504, multi-axis indexing df = DataFrame(np.random.randn(9, 2), index=[1, 1, 1, 2, 2, 2, 3, 3, 3], columns=['a', 'b']) expected = df.iloc[0:6] result = df.loc[[1, 2]] tm.assert_frame_equal(result, expected) expected = df result = df.loc[:, ['a', 'b']] tm.assert_frame_equal(result, expected) expected = df.iloc[0:6, :] result = df.loc[[1, 2], ['a', 'b']] tm.assert_frame_equal(result, expected) def test_indexing_mixed_frame_bug(self): # GH3492 df = DataFrame({'a': {1: 'aaa', 2: 'bbb', 3: 'ccc'}, 'b': {1: 111, 2: 222, 3: 333}}) # this works, new column is created correctly df['test'] = df['a'].apply(lambda x: '_' if x == 'aaa' else x) # this does not work, ie column test is not changed idx = df['test'] == '_' temp = df.loc[idx, 'a'].apply(lambda x: '-----' if x == 'aaa' else x) df.loc[idx, 'test'] = temp assert df.iloc[0, 2] == '-----' # if I look at df, then element [0,2] equals '_'. If instead I type # df.ix[idx,'test'], I get '-----', finally by typing df.iloc[0,2] I # get '_'. def test_multitype_list_index_access(self): # GH 10610 df = DataFrame(np.random.random((10, 5)), columns=["a"] + [20, 21, 22, 23]) with pytest.raises(KeyError): df[[22, 26, -8]] assert df[21].shape[0] == df.shape[0] def test_set_index_nan(self): # GH 3586 df = DataFrame({'PRuid': {17: 'nonQC', 18: 'nonQC', 19: 'nonQC', 20: '10', 21: '11', 22: '12', 23: '13', 24: '24', 25: '35', 26: '46', 27: '47', 28: '48', 29: '59', 30: '10'}, 'QC': {17: 0.0, 18: 0.0, 19: 0.0, 20: np.nan, 21: np.nan, 22: np.nan, 23: np.nan, 24: 1.0, 25: np.nan, 26: np.nan, 27: np.nan, 28: np.nan, 29: np.nan, 30: np.nan}, 'data': {17: 7.9544899999999998, 18: 8.0142609999999994, 19: 7.8591520000000008, 20: 0.86140349999999999, 21: 0.87853110000000001, 22: 0.8427041999999999, 23: 0.78587700000000005, 24: 0.73062459999999996, 25: 0.81668560000000001, 26: 0.81927080000000008, 27: 0.80705009999999999, 28: 0.81440240000000008, 29: 0.80140849999999997, 30: 0.81307740000000006}, 'year': {17: 2006, 18: 2007, 19: 2008, 20: 1985, 21: 1985, 22: 1985, 23: 1985, 24: 1985, 25: 1985, 26: 1985, 27: 1985, 28: 1985, 29: 1985, 30: 1986}}).reset_index() result = df.set_index(['year', 'PRuid', 'QC']).reset_index().reindex( columns=df.columns) tm.assert_frame_equal(result, df) def test_multi_nan_indexing(self): # GH 3588 df = DataFrame({"a": ['R1', 'R2', np.nan, 'R4'], 'b': ["C1", "C2", "C3", "C4"], "c": [10, 15, np.nan, 20]}) result = df.set_index(['a', 'b'], drop=False) expected = DataFrame({"a": ['R1', 'R2', np.nan, 'R4'], 'b': ["C1", "C2", "C3", "C4"], "c": [10, 15, np.nan, 20]}, index=[Index(['R1', 'R2', np.nan, 'R4'], name='a'), Index(['C1', 'C2', 'C3', 'C4'], name='b')]) tm.assert_frame_equal(result, expected) def test_multi_assign(self): # GH 3626, an assignment of a sub-df to a df df = DataFrame({'FC': ['a', 'b', 'a', 'b', 'a', 'b'], 'PF': [0, 0, 0, 0, 1, 1], 'col1': lrange(6), 'col2': lrange(6, 12)}) df.iloc[1, 0] = np.nan df2 = df.copy() mask = ~df2.FC.isna() cols = ['col1', 'col2'] dft = df2 * 2 dft.iloc[3, 3] = np.nan expected = DataFrame({'FC': ['a', np.nan, 'a', 'b', 'a', 'b'], 'PF': [0, 0, 0, 0, 1, 1], 'col1': Series([0, 1, 4, 6, 8, 10]), 'col2': [12, 7, 16, np.nan, 20, 22]}) # frame on rhs df2.loc[mask, cols] = dft.loc[mask, cols] tm.assert_frame_equal(df2, expected) df2.loc[mask, cols] = dft.loc[mask, cols] tm.assert_frame_equal(df2, expected) # with an ndarray on rhs # coerces to float64 because values has float64 dtype # GH 14001 expected = DataFrame({'FC': ['a', np.nan, 'a', 'b', 'a', 'b'], 'PF': [0, 0, 0, 0, 1, 1], 'col1': [0., 1., 4., 6., 8., 10.], 'col2': [12, 7, 16, np.nan, 20, 22]}) df2 = df.copy() df2.loc[mask, cols] = dft.loc[mask, cols].values tm.assert_frame_equal(df2, expected) df2.loc[mask, cols] = dft.loc[mask, cols].values tm.assert_frame_equal(df2, expected) # broadcasting on the rhs is required df = DataFrame(dict(A=[1, 2, 0, 0, 0], B=[0, 0, 0, 10, 11], C=[ 0, 0, 0, 10, 11], D=[3, 4, 5, 6, 7])) expected = df.copy() mask = expected['A'] == 0 for col in ['A', 'B']: expected.loc[mask, col] = df['D'] df.loc[df['A'] == 0, ['A', 'B']] = df['D'] tm.assert_frame_equal(df, expected) def test_setitem_list(self): # GH 6043 # ix with a list df = DataFrame(index=[0, 1], columns=[0]) with catch_warnings(record=True): df.ix[1, 0] = [1, 2, 3] df.ix[1, 0] = [1, 2] result = DataFrame(index=[0, 1], columns=[0]) with catch_warnings(record=True): result.ix[1, 0] = [1, 2] tm.assert_frame_equal(result, df) # ix with an object class TO(object): def __init__(self, value): self.value = value def __str__(self): return "[{0}]".format(self.value) __repr__ = __str__ def __eq__(self, other): return self.value == other.value def view(self): return self df = DataFrame(index=[0, 1], columns=[0]) with catch_warnings(record=True): df.ix[1, 0] = TO(1) df.ix[1, 0] = TO(2) result = DataFrame(index=[0, 1], columns=[0]) with catch_warnings(record=True): result.ix[1, 0] = TO(2) tm.assert_frame_equal(result, df) # remains object dtype even after setting it back df = DataFrame(index=[0, 1], columns=[0]) with catch_warnings(record=True): df.ix[1, 0] = TO(1) df.ix[1, 0] = np.nan result = DataFrame(index=[0, 1], columns=[0]) tm.assert_frame_equal(result, df) def test_string_slice(self): # GH 14424 # string indexing against datetimelike with object # dtype should properly raises KeyError df = DataFrame([1], Index([pd.Timestamp('2011-01-01')], dtype=object)) assert df.index.is_all_dates with pytest.raises(KeyError): df['2011'] with pytest.raises(KeyError): df.loc['2011', 0] df = DataFrame() assert not df.index.is_all_dates with pytest.raises(KeyError): df['2011'] with pytest.raises(KeyError): df.loc['2011', 0] def test_mi_access(self): # GH 4145 data = """h1 main h3 sub h5 0 a A 1 A1 1 1 b B 2 B1 2 2 c B 3 A1 3 3 d A 4 B2 4 4 e A 5 B2 5 5 f B 6 A2 6 """ df = pd.read_csv(StringIO(data), sep=r'\s+', index_col=0) df2 = df.set_index(['main', 'sub']).T.sort_index(1) index = Index(['h1', 'h3', 'h5']) columns = MultiIndex.from_tuples([('A', 'A1')], names=['main', 'sub']) expected = DataFrame([['a', 1, 1]], index=columns, columns=index).T result = df2.loc[:, ('A', 'A1')] tm.assert_frame_equal(result, expected) result = df2[('A', 'A1')] tm.assert_frame_equal(result, expected) # GH 4146, not returning a block manager when selecting a unique index # from a duplicate index # as of 4879, this returns a Series (which is similar to what happens # with a non-unique) expected = Series(['a', 1, 1], index=['h1', 'h3', 'h5'], name='A1') result = df2['A']['A1'] tm.assert_series_equal(result, expected) # selecting a non_unique from the 2nd level expected = DataFrame([['d', 4, 4], ['e', 5, 5]], index=Index(['B2', 'B2'], name='sub'), columns=['h1', 'h3', 'h5'], ).T result = df2['A']['B2'] tm.assert_frame_equal(result, expected) def test_astype_assignment(self): # GH4312 (iloc) df_orig = DataFrame([['1', '2', '3', '.4', 5, 6., 'foo']], columns=list('ABCDEFG')) df = df_orig.copy() df.iloc[:, 0:2] = df.iloc[:, 0:2].astype(np.int64) expected = DataFrame([[1, 2, '3', '.4', 5, 6., 'foo']], columns=list('ABCDEFG')) tm.assert_frame_equal(df, expected) df = df_orig.copy() df.iloc[:, 0:2] = df.iloc[:, 0:2]._convert(datetime=True, numeric=True) expected = DataFrame([[1, 2, '3', '.4', 5, 6., 'foo']], columns=list('ABCDEFG')) tm.assert_frame_equal(df, expected) # GH5702 (loc) df = df_orig.copy() df.loc[:, 'A'] = df.loc[:, 'A'].astype(np.int64) expected = DataFrame([[1, '2', '3', '.4', 5, 6., 'foo']], columns=list('ABCDEFG')) tm.assert_frame_equal(df, expected) df = df_orig.copy() df.loc[:, ['B', 'C']] = df.loc[:, ['B', 'C']].astype(np.int64) expected = DataFrame([['1', 2, 3, '.4', 5, 6., 'foo']], columns=list('ABCDEFG')) tm.assert_frame_equal(df, expected) # full replacements / no nans df = DataFrame({'A': [1., 2., 3., 4.]}) df.iloc[:, 0] = df['A'].astype(np.int64) expected = DataFrame({'A': [1, 2, 3, 4]}) tm.assert_frame_equal(df, expected) df = DataFrame({'A': [1., 2., 3., 4.]}) df.loc[:, 'A'] = df['A'].astype(np.int64) expected = DataFrame({'A': [1, 2, 3, 4]}) tm.assert_frame_equal(df, expected) def test_astype_assignment_with_dups(self): # GH 4686 # assignment with dups that has a dtype change cols = MultiIndex.from_tuples([('A', '1'), ('B', '1'), ('A', '2')]) df = DataFrame(np.arange(3).reshape((1, 3)), columns=cols, dtype=object) index = df.index.copy() df['A'] = df['A'].astype(np.float64) tm.assert_index_equal(df.index, index) # TODO(wesm): unused variables # result = df.get_dtype_counts().sort_index() # expected = Series({'float64': 2, 'object': 1}).sort_index() @pytest.mark.parametrize("index,val", [ (Index([0, 1, 2]), 2), (Index([0, 1, '2']), '2'), (Index([0, 1, 2, np.inf, 4]), 4), (Index([0, 1, 2, np.nan, 4]), 4), (Index([0, 1, 2, np.inf]), np.inf), (Index([0, 1, 2, np.nan]), np.nan), ]) def test_index_contains(self, index, val): assert val in index @pytest.mark.parametrize("index,val", [ (Index([0, 1, 2]), '2'), (Index([0, 1, '2']), 2), (Index([0, 1, 2, np.inf]), 4), (Index([0, 1, 2, np.nan]), 4), (Index([0, 1, 2, np.inf]), np.nan), (Index([0, 1, 2, np.nan]), np.inf), # Checking if np.inf in Int64Index should not cause an OverflowError # Related to GH 16957 (pd.Int64Index([0, 1, 2]), np.inf), (pd.Int64Index([0, 1, 2]), np.nan), (pd.UInt64Index([0, 1, 2]), np.inf), (pd.UInt64Index([0, 1, 2]), np.nan), ]) def test_index_not_contains(self, index, val): assert val not in index def test_index_type_coercion(self): with catch_warnings(record=True): # GH 11836 # if we have an index type and set it with something that looks # to numpy like the same, but is actually, not # (e.g. setting with a float or string '0') # then we need to coerce to object # integer indexes for s in [Series(range(5)), Series(range(5), index=range(1, 6))]: assert s.index.is_integer() for indexer in [lambda x: x.ix, lambda x: x.loc, lambda x: x]: s2 = s.copy() indexer(s2)[0.1] = 0 assert s2.index.is_floating() assert indexer(s2)[0.1] == 0 s2 = s.copy() indexer(s2)[0.0] = 0 exp = s.index if 0 not in s: exp = Index(s.index.tolist() + [0]) tm.assert_index_equal(s2.index, exp) s2 = s.copy() indexer(s2)['0'] = 0 assert s2.index.is_object() for s in [Series(range(5), index=np.arange(5.))]: assert s.index.is_floating() for idxr in [lambda x: x.ix, lambda x: x.loc, lambda x: x]: s2 = s.copy() idxr(s2)[0.1] = 0 assert s2.index.is_floating() assert idxr(s2)[0.1] == 0 s2 = s.copy() idxr(s2)[0.0] = 0 tm.assert_index_equal(s2.index, s.index) s2 = s.copy() idxr(s2)['0'] = 0 assert s2.index.is_object() class TestMisc(Base): def test_indexer_caching(self): # GH5727 # make sure that indexers are in the _internal_names_set n = 1000001 arrays = [lrange(n), lrange(n)] index = MultiIndex.from_tuples(lzip(*arrays)) s = Series(np.zeros(n), index=index) str(s) # setitem expected = Series(np.ones(n), index=index) s = Series(np.zeros(n), index=index) s[s == 0] = 1 tm.assert_series_equal(s, expected) def test_float_index_to_mixed(self): df = DataFrame({0.0: np.random.rand(10), 1.0: np.random.rand(10)}) df['a'] = 10 tm.assert_frame_equal(DataFrame({0.0: df[0.0], 1.0: df[1.0], 'a': [10] * 10}), df) def test_float_index_non_scalar_assignment(self): df = DataFrame({'a': [1, 2, 3], 'b': [3, 4, 5]}, index=[1., 2., 3.]) df.loc[df.index[:2]] = 1 expected = DataFrame({'a': [1, 1, 3], 'b': [1, 1, 5]}, index=df.index) tm.assert_frame_equal(expected, df) df = DataFrame({'a': [1, 2, 3], 'b': [3, 4, 5]}, index=[1., 2., 3.]) df2 = df.copy() df.loc[df.index] = df.loc[df.index] tm.assert_frame_equal(df, df2) def test_float_index_at_iat(self): s = Series([1, 2, 3], index=[0.1, 0.2, 0.3]) for el, item in s.iteritems(): assert s.at[el] == item for i in range(len(s)): assert s.iat[i] == i + 1 def test_rhs_alignment(self): # GH8258, tests that both rows & columns are aligned to what is # assigned to. covers both uniform data-type & multi-type cases def run_tests(df, rhs, right): # label, index, slice r, i, s = list('bcd'), [1, 2, 3], slice(1, 4) c, j, l = ['joe', 'jolie'], [1, 2], slice(1, 3) left = df.copy() left.loc[r, c] = rhs tm.assert_frame_equal(left, right) left = df.copy() left.iloc[i, j] = rhs tm.assert_frame_equal(left, right) left = df.copy() with catch_warnings(record=True): left.ix[s, l] = rhs tm.assert_frame_equal(left, right) left = df.copy() with catch_warnings(record=True): left.ix[i, j] = rhs tm.assert_frame_equal(left, right) left = df.copy() with catch_warnings(record=True): left.ix[r, c] = rhs tm.assert_frame_equal(left, right) xs = np.arange(20).reshape(5, 4) cols = ['jim', 'joe', 'jolie', 'joline'] df = DataFrame(xs, columns=cols, index=list('abcde')) # right hand side; permute the indices and multiplpy by -2 rhs = -2 * df.iloc[3:0:-1, 2:0:-1] # expected `right` result; just multiply by -2 right = df.copy() right.iloc[1:4, 1:3] *= -2 # run tests with uniform dtypes run_tests(df, rhs, right) # make frames multi-type & re-run tests for frame in [df, rhs, right]: frame['joe'] = frame['joe'].astype('float64') frame['jolie'] = frame['jolie'].map('@{0}'.format) run_tests(df, rhs, right) def test_str_label_slicing_with_negative_step(self): SLC = pd.IndexSlice def assert_slices_equivalent(l_slc, i_slc): tm.assert_series_equal(s.loc[l_slc], s.iloc[i_slc]) if not idx.is_integer: # For integer indices, ix and plain getitem are position-based. tm.assert_series_equal(s[l_slc], s.iloc[i_slc]) tm.assert_series_equal(s.loc[l_slc], s.iloc[i_slc]) for idx in [_mklbl('A', 20), np.arange(20) + 100, np.linspace(100, 150, 20)]: idx = Index(idx) s = Series(np.arange(20), index=idx) assert_slices_equivalent(SLC[idx[9]::-1], SLC[9::-1]) assert_slices_equivalent(SLC[:idx[9]:-1], SLC[:8:-1]) assert_slices_equivalent(SLC[idx[13]:idx[9]:-1], SLC[13:8:-1]) assert_slices_equivalent(SLC[idx[9]:idx[13]:-1], SLC[:0]) def test_slice_with_zero_step_raises(self): s = Series(np.arange(20), index=_mklbl('A', 20)) tm.assert_raises_regex(ValueError, 'slice step cannot be zero', lambda: s[::0]) tm.assert_raises_regex(ValueError, 'slice step cannot be zero', lambda: s.loc[::0]) with catch_warnings(record=True): tm.assert_raises_regex(ValueError, 'slice step cannot be zero', lambda: s.ix[::0]) def test_indexing_assignment_dict_already_exists(self): df = DataFrame({'x': [1, 2, 6], 'y': [2, 2, 8], 'z': [-5, 0, 5]}).set_index('z') expected = df.copy() rhs = dict(x=9, y=99) df.loc[5] = rhs expected.loc[5] = [9, 99] tm.assert_frame_equal(df, expected) def test_indexing_dtypes_on_empty(self): # Check that .iloc and .ix return correct dtypes GH9983 df = DataFrame({'a': [1, 2, 3], 'b': ['b', 'b2', 'b3']}) with catch_warnings(record=True): df2 = df.ix[[], :] assert df2.loc[:, 'a'].dtype == np.int64 tm.assert_series_equal(df2.loc[:, 'a'], df2.iloc[:, 0]) with catch_warnings(record=True): tm.assert_series_equal(df2.loc[:, 'a'], df2.ix[:, 0]) def test_range_in_series_indexing(self): # range can cause an indexing error # GH 11652 for x in [5, 999999, 1000000]: s = Series(index=range(x)) s.loc[range(1)] = 42 tm.assert_series_equal(s.loc[range(1)], Series(42.0, index=[0])) s.loc[range(2)] = 43 tm.assert_series_equal(s.loc[range(2)], Series(43.0, index=[0, 1])) def test_non_reducing_slice(self): df = DataFrame([[0, 1], [2, 3]]) slices = [ # pd.IndexSlice[:, :], pd.IndexSlice[:, 1], pd.IndexSlice[1, :], pd.IndexSlice[[1], [1]], pd.IndexSlice[1, [1]], pd.IndexSlice[[1], 1], pd.IndexSlice[1], pd.IndexSlice[1, 1], slice(None, None, None), [0, 1], np.array([0, 1]), Series([0, 1]) ] for slice_ in slices: tslice_ = _non_reducing_slice(slice_) assert isinstance(df.loc[tslice_], DataFrame) def test_list_slice(self): # like dataframe getitem slices = [['A'], Series(['A']), np.array(['A'])] df = DataFrame({'A': [1, 2], 'B': [3, 4]}, index=['A', 'B']) expected = pd.IndexSlice[:, ['A']] for subset in slices: result = _non_reducing_slice(subset) tm.assert_frame_equal(df.loc[result], df.loc[expected]) def test_maybe_numeric_slice(self): df = DataFrame({'A': [1, 2], 'B': ['c', 'd'], 'C': [True, False]}) result = _maybe_numeric_slice(df, slice_=None) expected = pd.IndexSlice[:, ['A']] assert result == expected result = _maybe_numeric_slice(df, None, include_bool=True) expected = pd.IndexSlice[:, ['A', 'C']] result = _maybe_numeric_slice(df, [1]) expected = [1] assert result == expected def test_partial_boolean_frame_indexing(self): # GH 17170 df = DataFrame(np.arange(9.).reshape(3, 3), index=list('abc'), columns=list('ABC')) index_df = DataFrame(1, index=list('ab'), columns=list('AB')) result = df[index_df.notnull()] expected = DataFrame(np.array([[0., 1., np.nan], [3., 4., np.nan], [np.nan] * 3]), index=list('abc'), columns=list('ABC')) tm.assert_frame_equal(result, expected) def test_no_reference_cycle(self): df = DataFrame({'a': [0, 1], 'b': [2, 3]}) for name in ('loc', 'iloc', 'at', 'iat'): getattr(df, name) with catch_warnings(record=True): getattr(df, 'ix') wr = weakref.ref(df) del df assert wr() is None class TestSeriesNoneCoercion(object): EXPECTED_RESULTS = [ # For numeric series, we should coerce to NaN. ([1, 2, 3], [np.nan, 2, 3]), ([1.0, 2.0, 3.0], [np.nan, 2.0, 3.0]), # For datetime series, we should coerce to NaT. ([datetime(2000, 1, 1), datetime(2000, 1, 2), datetime(2000, 1, 3)], [NaT, datetime(2000, 1, 2), datetime(2000, 1, 3)]), # For objects, we should preserve the None value. (["foo", "bar", "baz"], [None, "bar", "baz"]), ] def test_coercion_with_setitem(self): for start_data, expected_result in self.EXPECTED_RESULTS: start_series = Series(start_data) start_series[0] = None expected_series = Series(expected_result) tm.assert_series_equal(start_series, expected_series) def test_coercion_with_loc_setitem(self): for start_data, expected_result in self.EXPECTED_RESULTS: start_series = Series(start_data) start_series.loc[0] = None expected_series = Series(expected_result) tm.assert_series_equal(start_series, expected_series) def test_coercion_with_setitem_and_series(self): for start_data, expected_result in self.EXPECTED_RESULTS: start_series = Series(start_data) start_series[start_series == start_series[0]] = None expected_series = Series(expected_result) tm.assert_series_equal(start_series, expected_series) def test_coercion_with_loc_and_series(self): for start_data, expected_result in self.EXPECTED_RESULTS: start_series = Series(start_data) start_series.loc[start_series == start_series[0]] = None expected_series = Series(expected_result) tm.assert_series_equal(start_series, expected_series) class TestDataframeNoneCoercion(object): EXPECTED_SINGLE_ROW_RESULTS = [ # For numeric series, we should coerce to NaN. ([1, 2, 3], [np.nan, 2, 3]), ([1.0, 2.0, 3.0], [np.nan, 2.0, 3.0]), # For datetime series, we should coerce to NaT. ([datetime(2000, 1, 1), datetime(2000, 1, 2), datetime(2000, 1, 3)], [NaT, datetime(2000, 1, 2), datetime(2000, 1, 3)]), # For objects, we should preserve the None value. (["foo", "bar", "baz"], [None, "bar", "baz"]), ] def test_coercion_with_loc(self): for start_data, expected_result, in self.EXPECTED_SINGLE_ROW_RESULTS: start_dataframe = DataFrame({'foo': start_data}) start_dataframe.loc[0, ['foo']] = None expected_dataframe = DataFrame({'foo': expected_result}) tm.assert_frame_equal(start_dataframe, expected_dataframe) def test_coercion_with_setitem_and_dataframe(self): for start_data, expected_result, in self.EXPECTED_SINGLE_ROW_RESULTS: start_dataframe = DataFrame({'foo': start_data}) start_dataframe[start_dataframe['foo'] == start_dataframe['foo'][ 0]] = None expected_dataframe = DataFrame({'foo': expected_result}) tm.assert_frame_equal(start_dataframe, expected_dataframe) def test_none_coercion_loc_and_dataframe(self): for start_data, expected_result, in self.EXPECTED_SINGLE_ROW_RESULTS: start_dataframe = DataFrame({'foo': start_data}) start_dataframe.loc[start_dataframe['foo'] == start_dataframe[ 'foo'][0]] = None expected_dataframe = DataFrame({'foo': expected_result}) tm.assert_frame_equal(start_dataframe, expected_dataframe) def test_none_coercion_mixed_dtypes(self): start_dataframe = DataFrame({ 'a': [1, 2, 3], 'b': [1.0, 2.0, 3.0], 'c': [datetime(2000, 1, 1), datetime(2000, 1, 2), datetime(2000, 1, 3)], 'd': ['a', 'b', 'c'] }) start_dataframe.iloc[0] = None exp = DataFrame({'a': [np.nan, 2, 3], 'b': [np.nan, 2.0, 3.0], 'c': [NaT, datetime(2000, 1, 2), datetime(2000, 1, 3)], 'd': [None, 'b', 'c']}) tm.assert_frame_equal(start_dataframe, exp) def test_validate_indices_ok(): indices = np.asarray([0, 1]) validate_indices(indices, 2) validate_indices(indices[:0], 0) validate_indices(np.array([-1, -1]), 0) def test_validate_indices_low(): indices = np.asarray([0, -2]) with tm.assert_raises_regex(ValueError, "'indices' contains"): validate_indices(indices, 2) def test_validate_indices_high(): indices = np.asarray([0, 1, 2]) with tm.assert_raises_regex(IndexError, "indices are out"): validate_indices(indices, 2) def test_validate_indices_empty(): with tm.assert_raises_regex(IndexError, "indices are out"): validate_indices(np.array([0, 1]), 0)