common.py 29 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857
  1. from __future__ import annotations
  2. from datetime import datetime
  3. import gc
  4. import numpy as np
  5. import pytest
  6. from pandas._libs import iNaT
  7. from pandas._libs.tslibs import Timestamp
  8. from pandas.core.dtypes.common import is_datetime64tz_dtype
  9. from pandas.core.dtypes.dtypes import CategoricalDtype
  10. import pandas as pd
  11. from pandas import (
  12. CategoricalIndex,
  13. DatetimeIndex,
  14. Float64Index,
  15. Index,
  16. Int64Index,
  17. IntervalIndex,
  18. MultiIndex,
  19. PeriodIndex,
  20. RangeIndex,
  21. Series,
  22. TimedeltaIndex,
  23. UInt64Index,
  24. isna,
  25. )
  26. import pandas._testing as tm
  27. from pandas.core.indexes.datetimelike import DatetimeIndexOpsMixin
  28. class Base:
  29. """
  30. Base class for index sub-class tests.
  31. """
  32. _index_cls: type[Index]
  33. @pytest.fixture
  34. def simple_index(self):
  35. raise NotImplementedError("Method not implemented")
  36. def create_index(self) -> Index:
  37. raise NotImplementedError("Method not implemented")
  38. def test_pickle_compat_construction(self):
  39. # need an object to create with
  40. msg = "|".join(
  41. [
  42. r"Index\(\.\.\.\) must be called with a collection of some "
  43. r"kind, None was passed",
  44. r"DatetimeIndex\(\) must be called with a collection of some "
  45. r"kind, None was passed",
  46. r"TimedeltaIndex\(\) must be called with a collection of some "
  47. r"kind, None was passed",
  48. r"__new__\(\) missing 1 required positional argument: 'data'",
  49. r"__new__\(\) takes at least 2 arguments \(1 given\)",
  50. ]
  51. )
  52. with pytest.raises(TypeError, match=msg):
  53. self._index_cls()
  54. @pytest.mark.parametrize("name", [None, "new_name"])
  55. def test_to_frame(self, name, simple_index):
  56. # see GH-15230, GH-22580
  57. idx = simple_index
  58. if name:
  59. idx_name = name
  60. else:
  61. idx_name = idx.name or 0
  62. df = idx.to_frame(name=idx_name)
  63. assert df.index is idx
  64. assert len(df.columns) == 1
  65. assert df.columns[0] == idx_name
  66. assert df[idx_name].values is not idx.values
  67. df = idx.to_frame(index=False, name=idx_name)
  68. assert df.index is not idx
  69. def test_shift(self, simple_index):
  70. # GH8083 test the base class for shift
  71. idx = simple_index
  72. msg = (
  73. f"This method is only implemented for DatetimeIndex, PeriodIndex and "
  74. f"TimedeltaIndex; Got type {type(idx).__name__}"
  75. )
  76. with pytest.raises(NotImplementedError, match=msg):
  77. idx.shift(1)
  78. with pytest.raises(NotImplementedError, match=msg):
  79. idx.shift(1, 2)
  80. def test_constructor_name_unhashable(self, simple_index):
  81. # GH#29069 check that name is hashable
  82. # See also same-named test in tests.series.test_constructors
  83. idx = simple_index
  84. with pytest.raises(TypeError, match="Index.name must be a hashable type"):
  85. type(idx)(idx, name=[])
  86. def test_create_index_existing_name(self, simple_index):
  87. # GH11193, when an existing index is passed, and a new name is not
  88. # specified, the new index should inherit the previous object name
  89. expected = simple_index
  90. if not isinstance(expected, MultiIndex):
  91. expected.name = "foo"
  92. result = Index(expected)
  93. tm.assert_index_equal(result, expected)
  94. result = Index(expected, name="bar")
  95. expected.name = "bar"
  96. tm.assert_index_equal(result, expected)
  97. else:
  98. expected.names = ["foo", "bar"]
  99. result = Index(expected)
  100. tm.assert_index_equal(
  101. result,
  102. Index(
  103. Index(
  104. [
  105. ("foo", "one"),
  106. ("foo", "two"),
  107. ("bar", "one"),
  108. ("baz", "two"),
  109. ("qux", "one"),
  110. ("qux", "two"),
  111. ],
  112. dtype="object",
  113. ),
  114. names=["foo", "bar"],
  115. ),
  116. )
  117. result = Index(expected, names=["A", "B"])
  118. tm.assert_index_equal(
  119. result,
  120. Index(
  121. Index(
  122. [
  123. ("foo", "one"),
  124. ("foo", "two"),
  125. ("bar", "one"),
  126. ("baz", "two"),
  127. ("qux", "one"),
  128. ("qux", "two"),
  129. ],
  130. dtype="object",
  131. ),
  132. names=["A", "B"],
  133. ),
  134. )
  135. def test_numeric_compat(self, simple_index):
  136. idx = simple_index
  137. # Check that this doesn't cover MultiIndex case, if/when it does,
  138. # we can remove multi.test_compat.test_numeric_compat
  139. assert not isinstance(idx, MultiIndex)
  140. if type(idx) is Index:
  141. return
  142. typ = type(idx._data).__name__
  143. lmsg = "|".join(
  144. [
  145. rf"unsupported operand type\(s\) for \*: '{typ}' and 'int'",
  146. "cannot perform (__mul__|__truediv__|__floordiv__) with "
  147. f"this index type: {typ}",
  148. ]
  149. )
  150. with pytest.raises(TypeError, match=lmsg):
  151. idx * 1
  152. rmsg = "|".join(
  153. [
  154. rf"unsupported operand type\(s\) for \*: 'int' and '{typ}'",
  155. "cannot perform (__rmul__|__rtruediv__|__rfloordiv__) with "
  156. f"this index type: {typ}",
  157. ]
  158. )
  159. with pytest.raises(TypeError, match=rmsg):
  160. 1 * idx
  161. div_err = lmsg.replace("*", "/")
  162. with pytest.raises(TypeError, match=div_err):
  163. idx / 1
  164. div_err = rmsg.replace("*", "/")
  165. with pytest.raises(TypeError, match=div_err):
  166. 1 / idx
  167. floordiv_err = lmsg.replace("*", "//")
  168. with pytest.raises(TypeError, match=floordiv_err):
  169. idx // 1
  170. floordiv_err = rmsg.replace("*", "//")
  171. with pytest.raises(TypeError, match=floordiv_err):
  172. 1 // idx
  173. def test_logical_compat(self, simple_index):
  174. idx = simple_index
  175. with pytest.raises(TypeError, match="cannot perform all"):
  176. idx.all()
  177. with pytest.raises(TypeError, match="cannot perform any"):
  178. idx.any()
  179. def test_repr_roundtrip(self, simple_index):
  180. idx = simple_index
  181. tm.assert_index_equal(eval(repr(idx)), idx)
  182. def test_repr_max_seq_item_setting(self, simple_index):
  183. # GH10182
  184. idx = simple_index
  185. idx = idx.repeat(50)
  186. with pd.option_context("display.max_seq_items", None):
  187. repr(idx)
  188. assert "..." not in str(idx)
  189. def test_copy_name(self, index):
  190. # gh-12309: Check that the "name" argument
  191. # passed at initialization is honored.
  192. if isinstance(index, MultiIndex):
  193. return
  194. first = type(index)(index, copy=True, name="mario")
  195. second = type(first)(first, copy=False)
  196. # Even though "copy=False", we want a new object.
  197. assert first is not second
  198. # Not using tm.assert_index_equal() since names differ.
  199. assert index.equals(first)
  200. assert first.name == "mario"
  201. assert second.name == "mario"
  202. s1 = Series(2, index=first)
  203. s2 = Series(3, index=second[:-1])
  204. if not isinstance(index, CategoricalIndex):
  205. # See gh-13365
  206. s3 = s1 * s2
  207. assert s3.index.name == "mario"
  208. def test_copy_name2(self, index):
  209. # gh-35592
  210. if isinstance(index, MultiIndex):
  211. return
  212. assert index.copy(name="mario").name == "mario"
  213. with pytest.raises(ValueError, match="Length of new names must be 1, got 2"):
  214. index.copy(name=["mario", "luigi"])
  215. msg = f"{type(index).__name__}.name must be a hashable type"
  216. with pytest.raises(TypeError, match=msg):
  217. index.copy(name=[["mario"]])
  218. def test_ensure_copied_data(self, index):
  219. # Check the "copy" argument of each Index.__new__ is honoured
  220. # GH12309
  221. init_kwargs = {}
  222. if isinstance(index, PeriodIndex):
  223. # Needs "freq" specification:
  224. init_kwargs["freq"] = index.freq
  225. elif isinstance(index, (RangeIndex, MultiIndex, CategoricalIndex)):
  226. # RangeIndex cannot be initialized from data
  227. # MultiIndex and CategoricalIndex are tested separately
  228. return
  229. index_type = type(index)
  230. result = index_type(index.values, copy=True, **init_kwargs)
  231. if is_datetime64tz_dtype(index.dtype):
  232. result = result.tz_localize("UTC").tz_convert(index.tz)
  233. if isinstance(index, (DatetimeIndex, TimedeltaIndex)):
  234. index = index._with_freq(None)
  235. tm.assert_index_equal(index, result)
  236. if isinstance(index, PeriodIndex):
  237. # .values an object array of Period, thus copied
  238. result = index_type(ordinal=index.asi8, copy=False, **init_kwargs)
  239. tm.assert_numpy_array_equal(index.asi8, result.asi8, check_same="same")
  240. elif isinstance(index, IntervalIndex):
  241. # checked in test_interval.py
  242. pass
  243. else:
  244. result = index_type(index.values, copy=False, **init_kwargs)
  245. tm.assert_numpy_array_equal(index.values, result.values, check_same="same")
  246. def test_memory_usage(self, index):
  247. index._engine.clear_mapping()
  248. result = index.memory_usage()
  249. if index.empty:
  250. # we report 0 for no-length
  251. assert result == 0
  252. return
  253. # non-zero length
  254. index.get_loc(index[0])
  255. result2 = index.memory_usage()
  256. result3 = index.memory_usage(deep=True)
  257. # RangeIndex, IntervalIndex
  258. # don't have engines
  259. if not isinstance(index, (RangeIndex, IntervalIndex)):
  260. assert result2 > result
  261. if index.inferred_type == "object":
  262. assert result3 > result2
  263. def test_argsort(self, request, index):
  264. # separately tested
  265. if isinstance(index, CategoricalIndex):
  266. return
  267. result = index.argsort()
  268. expected = np.array(index).argsort()
  269. tm.assert_numpy_array_equal(result, expected, check_dtype=False)
  270. def test_numpy_argsort(self, index):
  271. result = np.argsort(index)
  272. expected = index.argsort()
  273. tm.assert_numpy_array_equal(result, expected)
  274. # these are the only two types that perform
  275. # pandas compatibility input validation - the
  276. # rest already perform separate (or no) such
  277. # validation via their 'values' attribute as
  278. # defined in pandas.core.indexes/base.py - they
  279. # cannot be changed at the moment due to
  280. # backwards compatibility concerns
  281. if isinstance(type(index), (CategoricalIndex, RangeIndex)):
  282. # TODO: why type(index)?
  283. msg = "the 'axis' parameter is not supported"
  284. with pytest.raises(ValueError, match=msg):
  285. np.argsort(index, axis=1)
  286. msg = "the 'kind' parameter is not supported"
  287. with pytest.raises(ValueError, match=msg):
  288. np.argsort(index, kind="mergesort")
  289. msg = "the 'order' parameter is not supported"
  290. with pytest.raises(ValueError, match=msg):
  291. np.argsort(index, order=("a", "b"))
  292. def test_repeat(self, simple_index):
  293. rep = 2
  294. idx = simple_index.copy()
  295. expected = Index(idx.values.repeat(rep), name=idx.name)
  296. tm.assert_index_equal(idx.repeat(rep), expected)
  297. idx = simple_index
  298. rep = np.arange(len(idx))
  299. expected = Index(idx.values.repeat(rep), name=idx.name)
  300. tm.assert_index_equal(idx.repeat(rep), expected)
  301. def test_numpy_repeat(self, simple_index):
  302. rep = 2
  303. idx = simple_index
  304. expected = idx.repeat(rep)
  305. tm.assert_index_equal(np.repeat(idx, rep), expected)
  306. msg = "the 'axis' parameter is not supported"
  307. with pytest.raises(ValueError, match=msg):
  308. np.repeat(idx, rep, axis=0)
  309. @pytest.mark.parametrize("klass", [list, tuple, np.array, Series])
  310. def test_where(self, klass, simple_index):
  311. idx = simple_index
  312. if isinstance(idx, (DatetimeIndex, TimedeltaIndex)):
  313. # where does not preserve freq
  314. idx = idx._with_freq(None)
  315. cond = [True] * len(idx)
  316. result = idx.where(klass(cond))
  317. expected = idx
  318. tm.assert_index_equal(result, expected)
  319. cond = [False] + [True] * len(idx[1:])
  320. expected = Index([idx._na_value] + idx[1:].tolist(), dtype=idx.dtype)
  321. result = idx.where(klass(cond))
  322. tm.assert_index_equal(result, expected)
  323. def test_insert_base(self, index):
  324. result = index[1:4]
  325. if not len(index):
  326. return
  327. # test 0th element
  328. assert index[0:4].equals(result.insert(0, index[0]))
  329. def test_delete_base(self, index):
  330. if not len(index):
  331. return
  332. if isinstance(index, RangeIndex):
  333. # tested in class
  334. return
  335. expected = index[1:]
  336. result = index.delete(0)
  337. assert result.equals(expected)
  338. assert result.name == expected.name
  339. expected = index[:-1]
  340. result = index.delete(-1)
  341. assert result.equals(expected)
  342. assert result.name == expected.name
  343. length = len(index)
  344. msg = f"index {length} is out of bounds for axis 0 with size {length}"
  345. with pytest.raises(IndexError, match=msg):
  346. index.delete(length)
  347. def test_equals(self, index):
  348. if isinstance(index, IntervalIndex):
  349. # IntervalIndex tested separately, the index.equals(index.astype(object))
  350. # fails for IntervalIndex
  351. return
  352. assert index.equals(index)
  353. assert index.equals(index.copy())
  354. assert index.equals(index.astype(object))
  355. assert not index.equals(list(index))
  356. assert not index.equals(np.array(index))
  357. # Cannot pass in non-int64 dtype to RangeIndex
  358. if not isinstance(index, RangeIndex):
  359. same_values = Index(index, dtype=object)
  360. assert index.equals(same_values)
  361. assert same_values.equals(index)
  362. if index.nlevels == 1:
  363. # do not test MultiIndex
  364. assert not index.equals(Series(index))
  365. def test_equals_op(self, simple_index):
  366. # GH9947, GH10637
  367. index_a = simple_index
  368. n = len(index_a)
  369. index_b = index_a[0:-1]
  370. index_c = index_a[0:-1].append(index_a[-2:-1])
  371. index_d = index_a[0:1]
  372. msg = "Lengths must match|could not be broadcast"
  373. with pytest.raises(ValueError, match=msg):
  374. index_a == index_b
  375. expected1 = np.array([True] * n)
  376. expected2 = np.array([True] * (n - 1) + [False])
  377. tm.assert_numpy_array_equal(index_a == index_a, expected1)
  378. tm.assert_numpy_array_equal(index_a == index_c, expected2)
  379. # test comparisons with numpy arrays
  380. array_a = np.array(index_a)
  381. array_b = np.array(index_a[0:-1])
  382. array_c = np.array(index_a[0:-1].append(index_a[-2:-1]))
  383. array_d = np.array(index_a[0:1])
  384. with pytest.raises(ValueError, match=msg):
  385. index_a == array_b
  386. tm.assert_numpy_array_equal(index_a == array_a, expected1)
  387. tm.assert_numpy_array_equal(index_a == array_c, expected2)
  388. # test comparisons with Series
  389. series_a = Series(array_a)
  390. series_b = Series(array_b)
  391. series_c = Series(array_c)
  392. series_d = Series(array_d)
  393. with pytest.raises(ValueError, match=msg):
  394. index_a == series_b
  395. tm.assert_numpy_array_equal(index_a == series_a, expected1)
  396. tm.assert_numpy_array_equal(index_a == series_c, expected2)
  397. # cases where length is 1 for one of them
  398. with pytest.raises(ValueError, match="Lengths must match"):
  399. index_a == index_d
  400. with pytest.raises(ValueError, match="Lengths must match"):
  401. index_a == series_d
  402. with pytest.raises(ValueError, match="Lengths must match"):
  403. index_a == array_d
  404. msg = "Can only compare identically-labeled Series objects"
  405. with pytest.raises(ValueError, match=msg):
  406. series_a == series_d
  407. with pytest.raises(ValueError, match="Lengths must match"):
  408. series_a == array_d
  409. # comparing with a scalar should broadcast; note that we are excluding
  410. # MultiIndex because in this case each item in the index is a tuple of
  411. # length 2, and therefore is considered an array of length 2 in the
  412. # comparison instead of a scalar
  413. if not isinstance(index_a, MultiIndex):
  414. expected3 = np.array([False] * (len(index_a) - 2) + [True, False])
  415. # assuming the 2nd to last item is unique in the data
  416. item = index_a[-2]
  417. tm.assert_numpy_array_equal(index_a == item, expected3)
  418. # For RangeIndex we can convert to Int64Index
  419. tm.assert_series_equal(series_a == item, Series(expected3))
  420. def test_format(self, simple_index):
  421. # GH35439
  422. idx = simple_index
  423. expected = [str(x) for x in idx]
  424. assert idx.format() == expected
  425. def test_format_empty(self):
  426. # GH35712
  427. empty_idx = self._index_cls([])
  428. assert empty_idx.format() == []
  429. assert empty_idx.format(name=True) == [""]
  430. def test_hasnans_isnans(self, index_flat):
  431. # GH 11343, added tests for hasnans / isnans
  432. index = index_flat
  433. # cases in indices doesn't include NaN
  434. idx = index.copy(deep=True)
  435. expected = np.array([False] * len(idx), dtype=bool)
  436. tm.assert_numpy_array_equal(idx._isnan, expected)
  437. assert idx.hasnans is False
  438. idx = index.copy(deep=True)
  439. values = np.asarray(idx.values)
  440. if len(index) == 0:
  441. return
  442. elif isinstance(index, DatetimeIndexOpsMixin):
  443. values[1] = iNaT
  444. elif isinstance(index, (Int64Index, UInt64Index, RangeIndex)):
  445. return
  446. else:
  447. values[1] = np.nan
  448. if isinstance(index, PeriodIndex):
  449. idx = type(index)(values, freq=index.freq)
  450. else:
  451. idx = type(index)(values)
  452. expected = np.array([False] * len(idx), dtype=bool)
  453. expected[1] = True
  454. tm.assert_numpy_array_equal(idx._isnan, expected)
  455. assert idx.hasnans is True
  456. def test_fillna(self, index):
  457. # GH 11343
  458. if len(index) == 0:
  459. pass
  460. elif isinstance(index, MultiIndex):
  461. idx = index.copy(deep=True)
  462. msg = "isna is not defined for MultiIndex"
  463. with pytest.raises(NotImplementedError, match=msg):
  464. idx.fillna(idx[0])
  465. else:
  466. idx = index.copy(deep=True)
  467. result = idx.fillna(idx[0])
  468. tm.assert_index_equal(result, idx)
  469. assert result is not idx
  470. msg = "'value' must be a scalar, passed: "
  471. with pytest.raises(TypeError, match=msg):
  472. idx.fillna([idx[0]])
  473. idx = index.copy(deep=True)
  474. values = np.asarray(idx.values)
  475. if isinstance(index, DatetimeIndexOpsMixin):
  476. values[1] = iNaT
  477. elif isinstance(index, (Int64Index, UInt64Index, RangeIndex)):
  478. return
  479. else:
  480. values[1] = np.nan
  481. if isinstance(index, PeriodIndex):
  482. idx = type(index)(values, freq=index.freq)
  483. else:
  484. idx = type(index)(values)
  485. expected = np.array([False] * len(idx), dtype=bool)
  486. expected[1] = True
  487. tm.assert_numpy_array_equal(idx._isnan, expected)
  488. assert idx.hasnans is True
  489. def test_nulls(self, index):
  490. # this is really a smoke test for the methods
  491. # as these are adequately tested for function elsewhere
  492. if len(index) == 0:
  493. tm.assert_numpy_array_equal(index.isna(), np.array([], dtype=bool))
  494. elif isinstance(index, MultiIndex):
  495. idx = index.copy()
  496. msg = "isna is not defined for MultiIndex"
  497. with pytest.raises(NotImplementedError, match=msg):
  498. idx.isna()
  499. elif not index.hasnans:
  500. tm.assert_numpy_array_equal(index.isna(), np.zeros(len(index), dtype=bool))
  501. tm.assert_numpy_array_equal(index.notna(), np.ones(len(index), dtype=bool))
  502. else:
  503. result = isna(index)
  504. tm.assert_numpy_array_equal(index.isna(), result)
  505. tm.assert_numpy_array_equal(index.notna(), ~result)
  506. def test_empty(self, simple_index):
  507. # GH 15270
  508. idx = simple_index
  509. assert not idx.empty
  510. assert idx[:0].empty
  511. def test_join_self_unique(self, join_type, simple_index):
  512. idx = simple_index
  513. if idx.is_unique:
  514. joined = idx.join(idx, how=join_type)
  515. assert (idx == joined).all()
  516. def test_map(self, simple_index):
  517. # callable
  518. idx = simple_index
  519. # we don't infer UInt64
  520. if isinstance(idx, UInt64Index):
  521. expected = idx.astype("int64")
  522. else:
  523. expected = idx
  524. result = idx.map(lambda x: x)
  525. # For RangeIndex we convert to Int64Index
  526. tm.assert_index_equal(result, expected)
  527. @pytest.mark.parametrize(
  528. "mapper",
  529. [
  530. lambda values, index: {i: e for e, i in zip(values, index)},
  531. lambda values, index: Series(values, index),
  532. ],
  533. )
  534. def test_map_dictlike(self, mapper, simple_index):
  535. idx = simple_index
  536. if isinstance(idx, CategoricalIndex):
  537. pytest.skip(f"skipping tests for {type(idx)}")
  538. identity = mapper(idx.values, idx)
  539. # we don't infer to UInt64 for a dict
  540. if isinstance(idx, UInt64Index) and isinstance(identity, dict):
  541. expected = idx.astype("int64")
  542. else:
  543. expected = idx
  544. result = idx.map(identity)
  545. # For RangeIndex we convert to Int64Index
  546. tm.assert_index_equal(result, expected)
  547. # empty mappable
  548. expected = Index([np.nan] * len(idx))
  549. result = idx.map(mapper(expected, idx))
  550. tm.assert_index_equal(result, expected)
  551. def test_map_str(self, simple_index):
  552. # GH 31202
  553. idx = simple_index
  554. result = idx.map(str)
  555. expected = Index([str(x) for x in idx], dtype=object)
  556. tm.assert_index_equal(result, expected)
  557. @pytest.mark.parametrize("copy", [True, False])
  558. @pytest.mark.parametrize("name", [None, "foo"])
  559. @pytest.mark.parametrize("ordered", [True, False])
  560. def test_astype_category(self, copy, name, ordered, simple_index):
  561. # GH 18630
  562. idx = simple_index
  563. if name:
  564. idx = idx.rename(name)
  565. # standard categories
  566. dtype = CategoricalDtype(ordered=ordered)
  567. result = idx.astype(dtype, copy=copy)
  568. expected = CategoricalIndex(idx, name=name, ordered=ordered)
  569. tm.assert_index_equal(result, expected, exact=True)
  570. # non-standard categories
  571. dtype = CategoricalDtype(idx.unique().tolist()[:-1], ordered)
  572. result = idx.astype(dtype, copy=copy)
  573. expected = CategoricalIndex(idx, name=name, dtype=dtype)
  574. tm.assert_index_equal(result, expected, exact=True)
  575. if ordered is False:
  576. # dtype='category' defaults to ordered=False, so only test once
  577. result = idx.astype("category", copy=copy)
  578. expected = CategoricalIndex(idx, name=name)
  579. tm.assert_index_equal(result, expected, exact=True)
  580. def test_is_unique(self, simple_index):
  581. # initialize a unique index
  582. index = simple_index.drop_duplicates()
  583. assert index.is_unique is True
  584. # empty index should be unique
  585. index_empty = index[:0]
  586. assert index_empty.is_unique is True
  587. # test basic dupes
  588. index_dup = index.insert(0, index[0])
  589. assert index_dup.is_unique is False
  590. # single NA should be unique
  591. index_na = index.insert(0, np.nan)
  592. assert index_na.is_unique is True
  593. # multiple NA should not be unique
  594. index_na_dup = index_na.insert(0, np.nan)
  595. assert index_na_dup.is_unique is False
  596. @pytest.mark.arm_slow
  597. def test_engine_reference_cycle(self, simple_index):
  598. # GH27585
  599. index = simple_index
  600. nrefs_pre = len(gc.get_referrers(index))
  601. index._engine
  602. assert len(gc.get_referrers(index)) == nrefs_pre
  603. def test_getitem_2d_deprecated(self, simple_index):
  604. # GH#30588
  605. idx = simple_index
  606. msg = "Support for multi-dimensional indexing"
  607. check = not isinstance(idx, (RangeIndex, CategoricalIndex))
  608. with tm.assert_produces_warning(
  609. FutureWarning, match=msg, check_stacklevel=check
  610. ):
  611. res = idx[:, None]
  612. assert isinstance(res, np.ndarray), type(res)
  613. def test_copy_shares_cache(self, simple_index):
  614. # GH32898, GH36840
  615. idx = simple_index
  616. idx.get_loc(idx[0]) # populates the _cache.
  617. copy = idx.copy()
  618. assert copy._cache is idx._cache
  619. def test_shallow_copy_shares_cache(self, simple_index):
  620. # GH32669, GH36840
  621. idx = simple_index
  622. idx.get_loc(idx[0]) # populates the _cache.
  623. shallow_copy = idx._view()
  624. assert shallow_copy._cache is idx._cache
  625. shallow_copy = idx._shallow_copy(idx._data)
  626. assert shallow_copy._cache is not idx._cache
  627. assert shallow_copy._cache == {}
  628. def test_index_groupby(self, simple_index):
  629. idx = simple_index[:5]
  630. to_groupby = np.array([1, 2, np.nan, 2, 1])
  631. tm.assert_dict_equal(
  632. idx.groupby(to_groupby), {1.0: idx[[0, 4]], 2.0: idx[[1, 3]]}
  633. )
  634. to_groupby = DatetimeIndex(
  635. [
  636. datetime(2011, 11, 1),
  637. datetime(2011, 12, 1),
  638. pd.NaT,
  639. datetime(2011, 12, 1),
  640. datetime(2011, 11, 1),
  641. ],
  642. tz="UTC",
  643. ).values
  644. ex_keys = [Timestamp("2011-11-01"), Timestamp("2011-12-01")]
  645. expected = {ex_keys[0]: idx[[0, 4]], ex_keys[1]: idx[[1, 3]]}
  646. tm.assert_dict_equal(idx.groupby(to_groupby), expected)
  647. class NumericBase(Base):
  648. """
  649. Base class for numeric index (incl. RangeIndex) sub-class tests.
  650. """
  651. def test_constructor_unwraps_index(self, dtype):
  652. idx = Index([1, 2], dtype=dtype)
  653. result = self._index_cls(idx)
  654. expected = np.array([1, 2], dtype=dtype)
  655. tm.assert_numpy_array_equal(result._data, expected)
  656. def test_where(self):
  657. # Tested in numeric.test_indexing
  658. pass
  659. def test_can_hold_identifiers(self, simple_index):
  660. idx = simple_index
  661. key = idx[0]
  662. assert idx._can_hold_identifiers_and_holds_name(key) is False
  663. def test_format(self, simple_index):
  664. # GH35439
  665. idx = simple_index
  666. max_width = max(len(str(x)) for x in idx)
  667. expected = [str(x).ljust(max_width) for x in idx]
  668. assert idx.format() == expected
  669. def test_numeric_compat(self):
  670. pass # override Base method
  671. def test_insert_na(self, nulls_fixture, simple_index):
  672. # GH 18295 (test missing)
  673. index = simple_index
  674. na_val = nulls_fixture
  675. if na_val is pd.NaT:
  676. expected = Index([index[0], pd.NaT] + list(index[1:]), dtype=object)
  677. else:
  678. expected = Float64Index([index[0], np.nan] + list(index[1:]))
  679. result = index.insert(1, na_val)
  680. tm.assert_index_equal(result, expected)
  681. def test_arithmetic_explicit_conversions(self):
  682. # GH 8608
  683. # add/sub are overridden explicitly for Float/Int Index
  684. index_cls = self._index_cls
  685. if index_cls is RangeIndex:
  686. idx = RangeIndex(5)
  687. else:
  688. idx = index_cls(np.arange(5, dtype="int64"))
  689. # float conversions
  690. arr = np.arange(5, dtype="int64") * 3.2
  691. expected = Float64Index(arr)
  692. fidx = idx * 3.2
  693. tm.assert_index_equal(fidx, expected)
  694. fidx = 3.2 * idx
  695. tm.assert_index_equal(fidx, expected)
  696. # interops with numpy arrays
  697. expected = Float64Index(arr)
  698. a = np.zeros(5, dtype="float64")
  699. result = fidx - a
  700. tm.assert_index_equal(result, expected)
  701. expected = Float64Index(-arr)
  702. a = np.zeros(5, dtype="float64")
  703. result = a - fidx
  704. tm.assert_index_equal(result, expected)
  705. def test_invalid_dtype(self, invalid_dtype):
  706. # GH 29539
  707. dtype = invalid_dtype
  708. msg = fr"Incorrect `dtype` passed: expected \w+(?: \w+)?, received {dtype}"
  709. with pytest.raises(ValueError, match=msg):
  710. self._index_cls([1, 2, 3], dtype=dtype)