index.pyx 24 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747
  1. import warnings
  2. cimport cython
  3. import numpy as np
  4. cimport numpy as cnp
  5. from numpy cimport (
  6. float32_t,
  7. float64_t,
  8. int8_t,
  9. int16_t,
  10. int32_t,
  11. int64_t,
  12. intp_t,
  13. ndarray,
  14. uint8_t,
  15. uint16_t,
  16. uint32_t,
  17. uint64_t,
  18. )
  19. cnp.import_array()
  20. from pandas._libs cimport util
  21. from pandas._libs.hashtable cimport HashTable
  22. from pandas._libs.tslibs.nattype cimport c_NaT as NaT
  23. from pandas._libs.tslibs.period cimport is_period_object
  24. from pandas._libs.tslibs.timedeltas cimport _Timedelta
  25. from pandas._libs.tslibs.timestamps cimport _Timestamp
  26. from pandas._libs import (
  27. algos,
  28. hashtable as _hash,
  29. )
  30. from pandas._libs.missing import checknull
  31. cdef inline bint is_definitely_invalid_key(object val):
  32. try:
  33. hash(val)
  34. except TypeError:
  35. return True
  36. return False
  37. # Don't populate hash tables in monotonic indexes larger than this
  38. _SIZE_CUTOFF = 1_000_000
  39. @cython.freelist(32)
  40. cdef class IndexEngine:
  41. cdef readonly:
  42. object vgetter
  43. HashTable mapping
  44. bint over_size_threshold
  45. cdef:
  46. bint unique, monotonic_inc, monotonic_dec
  47. bint need_monotonic_check, need_unique_check
  48. def __init__(self, vgetter, n):
  49. self.vgetter = vgetter
  50. self.over_size_threshold = n >= _SIZE_CUTOFF
  51. self.clear_mapping()
  52. def __contains__(self, val: object) -> bool:
  53. # We assume before we get here:
  54. # - val is hashable
  55. self._ensure_mapping_populated()
  56. return val in self.mapping
  57. cpdef get_loc(self, object val):
  58. # -> Py_ssize_t | slice | ndarray[bool]
  59. cdef:
  60. Py_ssize_t loc
  61. if is_definitely_invalid_key(val):
  62. raise TypeError(f"'{val}' is an invalid key")
  63. if self.over_size_threshold and self.is_monotonic_increasing:
  64. if not self.is_unique:
  65. return self._get_loc_duplicates(val)
  66. values = self._get_index_values()
  67. self._check_type(val)
  68. try:
  69. loc = _bin_search(values, val) # .searchsorted(val, side='left')
  70. except TypeError:
  71. # GH#35788 e.g. val=None with float64 values
  72. raise KeyError(val)
  73. if loc >= len(values):
  74. raise KeyError(val)
  75. if values[loc] != val:
  76. raise KeyError(val)
  77. return loc
  78. self._ensure_mapping_populated()
  79. if not self.unique:
  80. return self._get_loc_duplicates(val)
  81. self._check_type(val)
  82. try:
  83. return self.mapping.get_item(val)
  84. except (TypeError, ValueError, OverflowError):
  85. # GH#41775 OverflowError e.g. if we are uint64 and val is -1
  86. raise KeyError(val)
  87. cdef inline _get_loc_duplicates(self, object val):
  88. # -> Py_ssize_t | slice | ndarray[bool]
  89. cdef:
  90. Py_ssize_t diff
  91. if self.is_monotonic_increasing:
  92. values = self._get_index_values()
  93. try:
  94. left = values.searchsorted(val, side='left')
  95. right = values.searchsorted(val, side='right')
  96. except TypeError:
  97. # e.g. GH#29189 get_loc(None) with a Float64Index
  98. raise KeyError(val)
  99. diff = right - left
  100. if diff == 0:
  101. raise KeyError(val)
  102. elif diff == 1:
  103. return left
  104. else:
  105. return slice(left, right)
  106. return self._maybe_get_bool_indexer(val)
  107. cdef _maybe_get_bool_indexer(self, object val):
  108. # Returns ndarray[bool] or int
  109. cdef:
  110. ndarray[uint8_t, ndim=1, cast=True] indexer
  111. indexer = self._get_index_values() == val
  112. return self._unpack_bool_indexer(indexer, val)
  113. cdef _unpack_bool_indexer(self,
  114. ndarray[uint8_t, ndim=1, cast=True] indexer,
  115. object val):
  116. # Returns ndarray[bool] or int
  117. cdef:
  118. ndarray[intp_t, ndim=1] found
  119. int count
  120. found = np.where(indexer)[0]
  121. count = len(found)
  122. if count > 1:
  123. return indexer
  124. if count == 1:
  125. return int(found[0])
  126. raise KeyError(val)
  127. def sizeof(self, deep: bool = False) -> int:
  128. """ return the sizeof our mapping """
  129. if not self.is_mapping_populated:
  130. return 0
  131. return self.mapping.sizeof(deep=deep)
  132. def __sizeof__(self) -> int:
  133. return self.sizeof()
  134. @property
  135. def is_unique(self) -> bool:
  136. if self.need_unique_check:
  137. self._do_unique_check()
  138. return self.unique == 1
  139. cdef inline _do_unique_check(self):
  140. # this de-facto the same
  141. self._ensure_mapping_populated()
  142. @property
  143. def is_monotonic_increasing(self) -> bool:
  144. if self.need_monotonic_check:
  145. self._do_monotonic_check()
  146. return self.monotonic_inc == 1
  147. @property
  148. def is_monotonic_decreasing(self) -> bool:
  149. if self.need_monotonic_check:
  150. self._do_monotonic_check()
  151. return self.monotonic_dec == 1
  152. cdef inline _do_monotonic_check(self):
  153. cdef:
  154. bint is_unique
  155. try:
  156. values = self._get_index_values()
  157. self.monotonic_inc, self.monotonic_dec, is_unique = \
  158. self._call_monotonic(values)
  159. except TypeError:
  160. self.monotonic_inc = 0
  161. self.monotonic_dec = 0
  162. is_unique = 0
  163. self.need_monotonic_check = 0
  164. # we can only be sure of uniqueness if is_unique=1
  165. if is_unique:
  166. self.unique = 1
  167. self.need_unique_check = 0
  168. cdef _get_index_values(self):
  169. return self.vgetter()
  170. cdef _call_monotonic(self, values):
  171. return algos.is_monotonic(values, timelike=False)
  172. def get_backfill_indexer(self, other: np.ndarray, limit=None) -> np.ndarray:
  173. return algos.backfill(self._get_index_values(), other, limit=limit)
  174. def get_pad_indexer(self, other: np.ndarray, limit=None) -> np.ndarray:
  175. return algos.pad(self._get_index_values(), other, limit=limit)
  176. cdef _make_hash_table(self, Py_ssize_t n):
  177. raise NotImplementedError
  178. cdef _check_type(self, object val):
  179. hash(val)
  180. @property
  181. def is_mapping_populated(self) -> bool:
  182. return self.mapping is not None
  183. cdef inline _ensure_mapping_populated(self):
  184. # this populates the mapping
  185. # if its not already populated
  186. # also satisfies the need_unique_check
  187. if not self.is_mapping_populated:
  188. values = self._get_index_values()
  189. self.mapping = self._make_hash_table(len(values))
  190. self._call_map_locations(values)
  191. if len(self.mapping) == len(values):
  192. self.unique = 1
  193. self.need_unique_check = 0
  194. cdef void _call_map_locations(self, ndarray values):
  195. self.mapping.map_locations(values)
  196. def clear_mapping(self):
  197. self.mapping = None
  198. self.need_monotonic_check = 1
  199. self.need_unique_check = 1
  200. self.unique = 0
  201. self.monotonic_inc = 0
  202. self.monotonic_dec = 0
  203. def get_indexer(self, ndarray values) -> np.ndarray:
  204. self._ensure_mapping_populated()
  205. return self.mapping.lookup(values)
  206. def get_indexer_non_unique(self, ndarray targets):
  207. """
  208. Return an indexer suitable for taking from a non unique index
  209. return the labels in the same order as the target
  210. and a missing indexer into the targets (which correspond
  211. to the -1 indices in the results
  212. Returns
  213. -------
  214. indexer : np.ndarray[np.intp]
  215. missing : np.ndarray[np.intp]
  216. """
  217. cdef:
  218. ndarray values, x
  219. ndarray[intp_t] result, missing
  220. set stargets, remaining_stargets
  221. dict d = {}
  222. object val
  223. int count = 0, count_missing = 0
  224. Py_ssize_t i, j, n, n_t, n_alloc
  225. self._ensure_mapping_populated()
  226. values = np.array(self._get_index_values(), copy=False)
  227. stargets = set(targets)
  228. n = len(values)
  229. n_t = len(targets)
  230. if n > 10_000:
  231. n_alloc = 10_000
  232. else:
  233. n_alloc = n
  234. result = np.empty(n_alloc, dtype=np.intp)
  235. missing = np.empty(n_t, dtype=np.intp)
  236. # map each starget to its position in the index
  237. if stargets and len(stargets) < 5 and self.is_monotonic_increasing:
  238. # if there are few enough stargets and the index is monotonically
  239. # increasing, then use binary search for each starget
  240. remaining_stargets = set()
  241. for starget in stargets:
  242. try:
  243. start = values.searchsorted(starget, side='left')
  244. end = values.searchsorted(starget, side='right')
  245. except TypeError: # e.g. if we tried to search for string in int array
  246. remaining_stargets.add(starget)
  247. else:
  248. if start != end:
  249. d[starget] = list(range(start, end))
  250. stargets = remaining_stargets
  251. if stargets:
  252. # otherwise, map by iterating through all items in the index
  253. for i in range(n):
  254. val = values[i]
  255. if val in stargets:
  256. if val not in d:
  257. d[val] = []
  258. d[val].append(i)
  259. for i in range(n_t):
  260. val = targets[i]
  261. # found
  262. if val in d:
  263. for j in d[val]:
  264. # realloc if needed
  265. if count >= n_alloc:
  266. n_alloc += 10_000
  267. result = np.resize(result, n_alloc)
  268. result[count] = j
  269. count += 1
  270. # value not found
  271. else:
  272. if count >= n_alloc:
  273. n_alloc += 10_000
  274. result = np.resize(result, n_alloc)
  275. result[count] = -1
  276. count += 1
  277. missing[count_missing] = i
  278. count_missing += 1
  279. return result[0:count], missing[0:count_missing]
  280. cdef Py_ssize_t _bin_search(ndarray values, object val) except -1:
  281. cdef:
  282. Py_ssize_t mid = 0, lo = 0, hi = len(values) - 1
  283. object pval
  284. if hi == 0 or (hi > 0 and val > values[hi]):
  285. return len(values)
  286. while lo < hi:
  287. mid = (lo + hi) // 2
  288. pval = values[mid]
  289. if val < pval:
  290. hi = mid
  291. elif val > pval:
  292. lo = mid + 1
  293. else:
  294. while mid > 0 and val == values[mid - 1]:
  295. mid -= 1
  296. return mid
  297. if val <= values[mid]:
  298. return mid
  299. else:
  300. return mid + 1
  301. cdef class ObjectEngine(IndexEngine):
  302. """
  303. Index Engine for use with object-dtype Index, namely the base class Index.
  304. """
  305. cdef _make_hash_table(self, Py_ssize_t n):
  306. return _hash.PyObjectHashTable(n)
  307. cdef class DatetimeEngine(Int64Engine):
  308. cdef str _get_box_dtype(self):
  309. return 'M8[ns]'
  310. cdef int64_t _unbox_scalar(self, scalar) except? -1:
  311. # NB: caller is responsible for ensuring tzawareness compat
  312. # before we get here
  313. if not (isinstance(scalar, _Timestamp) or scalar is NaT):
  314. raise TypeError(scalar)
  315. return scalar.value
  316. def __contains__(self, val: object) -> bool:
  317. # We assume before we get here:
  318. # - val is hashable
  319. cdef:
  320. int64_t loc, conv
  321. conv = self._unbox_scalar(val)
  322. if self.over_size_threshold and self.is_monotonic_increasing:
  323. if not self.is_unique:
  324. return self._get_loc_duplicates(conv)
  325. values = self._get_index_values()
  326. loc = values.searchsorted(conv, side='left')
  327. return values[loc] == conv
  328. self._ensure_mapping_populated()
  329. return conv in self.mapping
  330. cdef _get_index_values(self):
  331. return self.vgetter().view('i8')
  332. cdef _call_monotonic(self, values):
  333. return algos.is_monotonic(values, timelike=True)
  334. cpdef get_loc(self, object val):
  335. # NB: the caller is responsible for ensuring that we are called
  336. # with either a Timestamp or NaT (Timedelta or NaT for TimedeltaEngine)
  337. cdef:
  338. int64_t loc
  339. if is_definitely_invalid_key(val):
  340. raise TypeError(f"'{val}' is an invalid key")
  341. try:
  342. conv = self._unbox_scalar(val)
  343. except TypeError:
  344. raise KeyError(val)
  345. # Welcome to the spaghetti factory
  346. if self.over_size_threshold and self.is_monotonic_increasing:
  347. if not self.is_unique:
  348. return self._get_loc_duplicates(conv)
  349. values = self._get_index_values()
  350. loc = values.searchsorted(conv, side='left')
  351. if loc == len(values) or values[loc] != conv:
  352. raise KeyError(val)
  353. return loc
  354. self._ensure_mapping_populated()
  355. if not self.unique:
  356. return self._get_loc_duplicates(conv)
  357. try:
  358. return self.mapping.get_item(conv)
  359. except KeyError:
  360. raise KeyError(val)
  361. def get_indexer_non_unique(self, ndarray targets):
  362. # we may get datetime64[ns] or timedelta64[ns], cast these to int64
  363. return super().get_indexer_non_unique(targets.view("i8"))
  364. def get_indexer(self, ndarray values) -> np.ndarray:
  365. self._ensure_mapping_populated()
  366. if values.dtype != self._get_box_dtype():
  367. return np.repeat(-1, len(values)).astype(np.intp)
  368. values = np.asarray(values).view('i8')
  369. return self.mapping.lookup(values)
  370. def get_pad_indexer(self, other: np.ndarray, limit=None) -> np.ndarray:
  371. if other.dtype != self._get_box_dtype():
  372. return np.repeat(-1, len(other)).astype(np.intp)
  373. other = np.asarray(other).view('i8')
  374. return algos.pad(self._get_index_values(), other, limit=limit)
  375. def get_backfill_indexer(self, other: np.ndarray, limit=None) -> np.ndarray:
  376. if other.dtype != self._get_box_dtype():
  377. return np.repeat(-1, len(other)).astype(np.intp)
  378. other = np.asarray(other).view('i8')
  379. return algos.backfill(self._get_index_values(), other, limit=limit)
  380. cdef class TimedeltaEngine(DatetimeEngine):
  381. cdef str _get_box_dtype(self):
  382. return 'm8[ns]'
  383. cdef int64_t _unbox_scalar(self, scalar) except? -1:
  384. if not (isinstance(scalar, _Timedelta) or scalar is NaT):
  385. raise TypeError(scalar)
  386. return scalar.value
  387. cdef class PeriodEngine(Int64Engine):
  388. cdef int64_t _unbox_scalar(self, scalar) except? -1:
  389. if scalar is NaT:
  390. return scalar.value
  391. if is_period_object(scalar):
  392. # NB: we assume that we have the correct freq here.
  393. return scalar.ordinal
  394. raise TypeError(scalar)
  395. cpdef get_loc(self, object val):
  396. # NB: the caller is responsible for ensuring that we are called
  397. # with either a Period or NaT
  398. cdef:
  399. int64_t conv
  400. try:
  401. conv = self._unbox_scalar(val)
  402. except TypeError:
  403. raise KeyError(val)
  404. return Int64Engine.get_loc(self, conv)
  405. cdef _get_index_values(self):
  406. return super(PeriodEngine, self).vgetter().view("i8")
  407. cdef _call_monotonic(self, values):
  408. return algos.is_monotonic(values, timelike=True)
  409. cdef class BaseMultiIndexCodesEngine:
  410. """
  411. Base class for MultiIndexUIntEngine and MultiIndexPyIntEngine, which
  412. represent each label in a MultiIndex as an integer, by juxtaposing the bits
  413. encoding each level, with appropriate offsets.
  414. For instance: if 3 levels have respectively 3, 6 and 1 possible values,
  415. then their labels can be represented using respectively 2, 3 and 1 bits,
  416. as follows:
  417. _ _ _ _____ _ __ __ __
  418. |0|0|0| ... |0| 0|a1|a0| -> offset 0 (first level)
  419. — — — ————— — —— —— ——
  420. |0|0|0| ... |0|b2|b1|b0| -> offset 2 (bits required for first level)
  421. — — — ————— — —— —— ——
  422. |0|0|0| ... |0| 0| 0|c0| -> offset 5 (bits required for first two levels)
  423. ‾ ‾ ‾ ‾‾‾‾‾ ‾ ‾‾ ‾‾ ‾‾
  424. and the resulting unsigned integer representation will be:
  425. _ _ _ _____ _ __ __ __ __ __ __
  426. |0|0|0| ... |0|c0|b2|b1|b0|a1|a0|
  427. ‾ ‾ ‾ ‾‾‾‾‾ ‾ ‾‾ ‾‾ ‾‾ ‾‾ ‾‾ ‾‾
  428. Offsets are calculated at initialization, labels are transformed by method
  429. _codes_to_ints.
  430. Keys are located by first locating each component against the respective
  431. level, then locating (the integer representation of) codes.
  432. """
  433. def __init__(self, object levels, object labels,
  434. ndarray[uint64_t, ndim=1] offsets):
  435. """
  436. Parameters
  437. ----------
  438. levels : list-like of numpy arrays
  439. Levels of the MultiIndex.
  440. labels : list-like of numpy arrays of integer dtype
  441. Labels of the MultiIndex.
  442. offsets : numpy array of uint64 dtype
  443. Pre-calculated offsets, one for each level of the index.
  444. """
  445. self.levels = levels
  446. self.offsets = offsets
  447. # Transform labels in a single array, and add 1 so that we are working
  448. # with positive integers (-1 for NaN becomes 0):
  449. codes = (np.array(labels, dtype='int64').T + 1).astype('uint64',
  450. copy=False)
  451. # Map each codes combination in the index to an integer unambiguously
  452. # (no collisions possible), based on the "offsets", which describe the
  453. # number of bits to switch labels for each level:
  454. lab_ints = self._codes_to_ints(codes)
  455. # Initialize underlying index (e.g. libindex.UInt64Engine) with
  456. # integers representing labels: we will use its get_loc and get_indexer
  457. self._base.__init__(self, lambda: lab_ints, len(lab_ints))
  458. def _codes_to_ints(self, ndarray[uint64_t] codes) -> np.ndarray:
  459. raise NotImplementedError("Implemented by subclass")
  460. def _extract_level_codes(self, ndarray[object] target) -> np.ndarray:
  461. """
  462. Map the requested list of (tuple) keys to their integer representations
  463. for searching in the underlying integer index.
  464. Parameters
  465. ----------
  466. target : ndarray[object]
  467. Each key is a tuple, with a label for each level of the index.
  468. Returns
  469. ------
  470. int_keys : 1-dimensional array of dtype uint64 or object
  471. Integers representing one combination each
  472. """
  473. level_codes = [lev.get_indexer(codes) + 1 for lev, codes
  474. in zip(self.levels, zip(*target))]
  475. return self._codes_to_ints(np.array(level_codes, dtype='uint64').T)
  476. def get_indexer(self, ndarray[object] target) -> np.ndarray:
  477. """
  478. Returns an array giving the positions of each value of `target` in
  479. `self.values`, where -1 represents a value in `target` which does not
  480. appear in `self.values`
  481. Parameters
  482. ----------
  483. target : ndarray[object]
  484. Each key is a tuple, with a label for each level of the index
  485. Returns
  486. -------
  487. np.ndarray[intp_t, ndim=1] of the indexer of `target` into
  488. `self.values`
  489. """
  490. lab_ints = self._extract_level_codes(target)
  491. return self._base.get_indexer(self, lab_ints)
  492. def get_indexer_with_fill(self, ndarray target, ndarray values,
  493. str method, object limit) -> np.ndarray:
  494. """
  495. Returns an array giving the positions of each value of `target` in
  496. `values`, where -1 represents a value in `target` which does not
  497. appear in `values`
  498. If `method` is "backfill" then the position for a value in `target`
  499. which does not appear in `values` is that of the next greater value
  500. in `values` (if one exists), and -1 if there is no such value.
  501. Similarly, if the method is "pad" then the position for a value in
  502. `target` which does not appear in `values` is that of the next smaller
  503. value in `values` (if one exists), and -1 if there is no such value.
  504. Parameters
  505. ----------
  506. target: ndarray[object] of tuples
  507. need not be sorted, but all must have the same length, which must be
  508. the same as the length of all tuples in `values`
  509. values : ndarray[object] of tuples
  510. must be sorted and all have the same length. Should be the set of
  511. the MultiIndex's values.
  512. method: string
  513. "backfill" or "pad"
  514. limit: int or None
  515. if provided, limit the number of fills to this value
  516. Returns
  517. -------
  518. np.ndarray[intp_t, ndim=1] of the indexer of `target` into `values`,
  519. filled with the `method` (and optionally `limit`) specified
  520. """
  521. assert method in ("backfill", "pad")
  522. cdef:
  523. int64_t i, j, next_code
  524. int64_t num_values, num_target_values
  525. ndarray[int64_t, ndim=1] target_order
  526. ndarray[object, ndim=1] target_values
  527. ndarray[int64_t, ndim=1] new_codes, new_target_codes
  528. ndarray[intp_t, ndim=1] sorted_indexer
  529. target_order = np.argsort(target).astype('int64')
  530. target_values = target[target_order]
  531. num_values, num_target_values = len(values), len(target_values)
  532. new_codes, new_target_codes = (
  533. np.empty((num_values,)).astype('int64'),
  534. np.empty((num_target_values,)).astype('int64'),
  535. )
  536. # `values` and `target_values` are both sorted, so we walk through them
  537. # and memoize the (ordered) set of indices in the (implicit) merged-and
  538. # sorted list of the two which belong to each of them
  539. # the effect of this is to create a factorization for the (sorted)
  540. # merger of the index values, where `new_codes` and `new_target_codes`
  541. # are the subset of the factors which appear in `values` and `target`,
  542. # respectively
  543. i, j, next_code = 0, 0, 0
  544. while i < num_values and j < num_target_values:
  545. val, target_val = values[i], target_values[j]
  546. if val <= target_val:
  547. new_codes[i] = next_code
  548. i += 1
  549. if target_val <= val:
  550. new_target_codes[j] = next_code
  551. j += 1
  552. next_code += 1
  553. # at this point, at least one should have reached the end
  554. # the remaining values of the other should be added to the end
  555. assert i == num_values or j == num_target_values
  556. while i < num_values:
  557. new_codes[i] = next_code
  558. i += 1
  559. next_code += 1
  560. while j < num_target_values:
  561. new_target_codes[j] = next_code
  562. j += 1
  563. next_code += 1
  564. # get the indexer, and undo the sorting of `target.values`
  565. algo = algos.backfill if method == "backfill" else algos.pad
  566. sorted_indexer = algo(new_codes, new_target_codes, limit=limit)
  567. return sorted_indexer[np.argsort(target_order)]
  568. def get_loc(self, object key):
  569. if is_definitely_invalid_key(key):
  570. raise TypeError(f"'{key}' is an invalid key")
  571. if not isinstance(key, tuple):
  572. raise KeyError(key)
  573. try:
  574. indices = [0 if checknull(v) else lev.get_loc(v) + 1
  575. for lev, v in zip(self.levels, key)]
  576. except KeyError:
  577. raise KeyError(key)
  578. # Transform indices into single integer:
  579. lab_int = self._codes_to_ints(np.array(indices, dtype='uint64'))
  580. return self._base.get_loc(self, lab_int)
  581. def get_indexer_non_unique(self, ndarray[object] target):
  582. lab_ints = self._extract_level_codes(target)
  583. indexer = self._base.get_indexer_non_unique(self, lab_ints)
  584. return indexer
  585. def __contains__(self, val: object) -> bool:
  586. # We assume before we get here:
  587. # - val is hashable
  588. # Default __contains__ looks in the underlying mapping, which in this
  589. # case only contains integer representations.
  590. try:
  591. self.get_loc(val)
  592. return True
  593. except (KeyError, TypeError, ValueError):
  594. return False
  595. # Generated from template.
  596. include "index_class_helper.pxi"