import numpy as np import pandas._libs.index as _index from pandas.errors import PerformanceWarning import pandas as pd from pandas import DataFrame, Index, MultiIndex, Series import pandas._testing as tm class TestMultiIndexBasic: def test_multiindex_perf_warn(self): df = DataFrame( { "jim": [0, 0, 1, 1], "joe": ["x", "x", "z", "y"], "jolie": np.random.rand(4), } ).set_index(["jim", "joe"]) with tm.assert_produces_warning(PerformanceWarning): df.loc[(1, "z")] df = df.iloc[[2, 1, 3, 0]] with tm.assert_produces_warning(PerformanceWarning): df.loc[(0,)] def test_multiindex_contains_dropped(self): # GH 19027 # test that dropped MultiIndex levels are not in the MultiIndex # despite continuing to be in the MultiIndex's levels idx = MultiIndex.from_product([[1, 2], [3, 4]]) assert 2 in idx idx = idx.drop(2) # drop implementation keeps 2 in the levels assert 2 in idx.levels[0] # but it should no longer be in the index itself assert 2 not in idx # also applies to strings idx = MultiIndex.from_product([["a", "b"], ["c", "d"]]) assert "a" in idx idx = idx.drop("a") assert "a" in idx.levels[0] assert "a" not in idx def test_indexing_over_hashtable_size_cutoff(self): n = 10000 old_cutoff = _index._SIZE_CUTOFF _index._SIZE_CUTOFF = 20000 s = Series(np.arange(n), MultiIndex.from_arrays((["a"] * n, np.arange(n)))) # hai it works! assert s[("a", 5)] == 5 assert s[("a", 6)] == 6 assert s[("a", 7)] == 7 _index._SIZE_CUTOFF = old_cutoff def test_multi_nan_indexing(self): # GH 3588 df = DataFrame( { "a": ["R1", "R2", np.nan, "R4"], "b": ["C1", "C2", "C3", "C4"], "c": [10, 15, np.nan, 20], } ) result = df.set_index(["a", "b"], drop=False) expected = DataFrame( { "a": ["R1", "R2", np.nan, "R4"], "b": ["C1", "C2", "C3", "C4"], "c": [10, 15, np.nan, 20], }, index=[ Index(["R1", "R2", np.nan, "R4"], name="a"), Index(["C1", "C2", "C3", "C4"], name="b"), ], ) tm.assert_frame_equal(result, expected) def test_contains(self): # GH 24570 tx = pd.timedelta_range("09:30:00", "16:00:00", freq="30 min") idx = MultiIndex.from_arrays([tx, np.arange(len(tx))]) assert tx[0] in idx assert "element_not_exit" not in idx assert "0 day 09:30:00" in idx def test_nested_tuples_duplicates(self): # GH#30892 dti = pd.to_datetime(["20190101", "20190101", "20190102"]) idx = pd.Index(["a", "a", "c"]) mi = pd.MultiIndex.from_arrays([dti, idx], names=["index1", "index2"]) df = pd.DataFrame({"c1": [1, 2, 3], "c2": [np.nan, np.nan, np.nan]}, index=mi) expected = pd.DataFrame({"c1": df["c1"], "c2": [1.0, 1.0, np.nan]}, index=mi) df2 = df.copy(deep=True) df2.loc[(dti[0], "a"), "c2"] = 1.0 tm.assert_frame_equal(df2, expected) df3 = df.copy(deep=True) df3.loc[[(dti[0], "a")], "c2"] = 1.0 tm.assert_frame_equal(df3, expected)