interval.pyx 16 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557
  1. import numbers
  2. from operator import (
  3. le,
  4. lt,
  5. )
  6. from cpython.datetime cimport (
  7. PyDateTime_IMPORT,
  8. PyDelta_Check,
  9. )
  10. PyDateTime_IMPORT
  11. from cpython.object cimport (
  12. Py_EQ,
  13. Py_GE,
  14. Py_GT,
  15. Py_LE,
  16. Py_LT,
  17. Py_NE,
  18. PyObject_RichCompare,
  19. )
  20. import cython
  21. from cython import Py_ssize_t
  22. import numpy as np
  23. cimport numpy as cnp
  24. from numpy cimport (
  25. NPY_QUICKSORT,
  26. PyArray_ArgSort,
  27. PyArray_Take,
  28. float32_t,
  29. float64_t,
  30. int32_t,
  31. int64_t,
  32. ndarray,
  33. uint64_t,
  34. )
  35. cnp.import_array()
  36. from pandas._libs cimport util
  37. from pandas._libs.hashtable cimport Int64Vector
  38. from pandas._libs.tslibs.timedeltas cimport _Timedelta
  39. from pandas._libs.tslibs.timestamps cimport _Timestamp
  40. from pandas._libs.tslibs.timezones cimport tz_compare
  41. from pandas._libs.tslibs.util cimport (
  42. is_float_object,
  43. is_integer_object,
  44. is_timedelta64_object,
  45. )
  46. VALID_CLOSED = frozenset(['left', 'right', 'both', 'neither'])
  47. cdef class IntervalMixin:
  48. @property
  49. def closed_left(self):
  50. """
  51. Check if the interval is closed on the left side.
  52. For the meaning of `closed` and `open` see :class:`~pandas.Interval`.
  53. Returns
  54. -------
  55. bool
  56. True if the Interval is closed on the left-side.
  57. """
  58. return self.closed in ('left', 'both')
  59. @property
  60. def closed_right(self):
  61. """
  62. Check if the interval is closed on the right side.
  63. For the meaning of `closed` and `open` see :class:`~pandas.Interval`.
  64. Returns
  65. -------
  66. bool
  67. True if the Interval is closed on the left-side.
  68. """
  69. return self.closed in ('right', 'both')
  70. @property
  71. def open_left(self):
  72. """
  73. Check if the interval is open on the left side.
  74. For the meaning of `closed` and `open` see :class:`~pandas.Interval`.
  75. Returns
  76. -------
  77. bool
  78. True if the Interval is closed on the left-side.
  79. """
  80. return not self.closed_left
  81. @property
  82. def open_right(self):
  83. """
  84. Check if the interval is open on the right side.
  85. For the meaning of `closed` and `open` see :class:`~pandas.Interval`.
  86. Returns
  87. -------
  88. bool
  89. True if the Interval is closed on the left-side.
  90. """
  91. return not self.closed_right
  92. @property
  93. def mid(self):
  94. """
  95. Return the midpoint of the Interval.
  96. """
  97. try:
  98. return 0.5 * (self.left + self.right)
  99. except TypeError:
  100. # datetime safe version
  101. return self.left + 0.5 * self.length
  102. @property
  103. def length(self):
  104. """
  105. Return the length of the Interval.
  106. """
  107. return self.right - self.left
  108. @property
  109. def is_empty(self):
  110. """
  111. Indicates if an interval is empty, meaning it contains no points.
  112. .. versionadded:: 0.25.0
  113. Returns
  114. -------
  115. bool or ndarray
  116. A boolean indicating if a scalar :class:`Interval` is empty, or a
  117. boolean ``ndarray`` positionally indicating if an ``Interval`` in
  118. an :class:`~arrays.IntervalArray` or :class:`IntervalIndex` is
  119. empty.
  120. Examples
  121. --------
  122. An :class:`Interval` that contains points is not empty:
  123. >>> pd.Interval(0, 1, closed='right').is_empty
  124. False
  125. An ``Interval`` that does not contain any points is empty:
  126. >>> pd.Interval(0, 0, closed='right').is_empty
  127. True
  128. >>> pd.Interval(0, 0, closed='left').is_empty
  129. True
  130. >>> pd.Interval(0, 0, closed='neither').is_empty
  131. True
  132. An ``Interval`` that contains a single point is not empty:
  133. >>> pd.Interval(0, 0, closed='both').is_empty
  134. False
  135. An :class:`~arrays.IntervalArray` or :class:`IntervalIndex` returns a
  136. boolean ``ndarray`` positionally indicating if an ``Interval`` is
  137. empty:
  138. >>> ivs = [pd.Interval(0, 0, closed='neither'),
  139. ... pd.Interval(1, 2, closed='neither')]
  140. >>> pd.arrays.IntervalArray(ivs).is_empty
  141. array([ True, False])
  142. Missing values are not considered empty:
  143. >>> ivs = [pd.Interval(0, 0, closed='neither'), np.nan]
  144. >>> pd.IntervalIndex(ivs).is_empty
  145. array([ True, False])
  146. """
  147. return (self.right == self.left) & (self.closed != 'both')
  148. def _check_closed_matches(self, other, name='other'):
  149. """
  150. Check if the closed attribute of `other` matches.
  151. Note that 'left' and 'right' are considered different from 'both'.
  152. Parameters
  153. ----------
  154. other : Interval, IntervalIndex, IntervalArray
  155. name : str
  156. Name to use for 'other' in the error message.
  157. Raises
  158. ------
  159. ValueError
  160. When `other` is not closed exactly the same as self.
  161. """
  162. if self.closed != other.closed:
  163. raise ValueError(f"'{name}.closed' is {repr(other.closed)}, "
  164. f"expected {repr(self.closed)}.")
  165. cdef bint _interval_like(other):
  166. return (hasattr(other, 'left')
  167. and hasattr(other, 'right')
  168. and hasattr(other, 'closed'))
  169. cdef class Interval(IntervalMixin):
  170. """
  171. Immutable object implementing an Interval, a bounded slice-like interval.
  172. Parameters
  173. ----------
  174. left : orderable scalar
  175. Left bound for the interval.
  176. right : orderable scalar
  177. Right bound for the interval.
  178. closed : {'right', 'left', 'both', 'neither'}, default 'right'
  179. Whether the interval is closed on the left-side, right-side, both or
  180. neither. See the Notes for more detailed explanation.
  181. See Also
  182. --------
  183. IntervalIndex : An Index of Interval objects that are all closed on the
  184. same side.
  185. cut : Convert continuous data into discrete bins (Categorical
  186. of Interval objects).
  187. qcut : Convert continuous data into bins (Categorical of Interval objects)
  188. based on quantiles.
  189. Period : Represents a period of time.
  190. Notes
  191. -----
  192. The parameters `left` and `right` must be from the same type, you must be
  193. able to compare them and they must satisfy ``left <= right``.
  194. A closed interval (in mathematics denoted by square brackets) contains
  195. its endpoints, i.e. the closed interval ``[0, 5]`` is characterized by the
  196. conditions ``0 <= x <= 5``. This is what ``closed='both'`` stands for.
  197. An open interval (in mathematics denoted by parentheses) does not contain
  198. its endpoints, i.e. the open interval ``(0, 5)`` is characterized by the
  199. conditions ``0 < x < 5``. This is what ``closed='neither'`` stands for.
  200. Intervals can also be half-open or half-closed, i.e. ``[0, 5)`` is
  201. described by ``0 <= x < 5`` (``closed='left'``) and ``(0, 5]`` is
  202. described by ``0 < x <= 5`` (``closed='right'``).
  203. Examples
  204. --------
  205. It is possible to build Intervals of different types, like numeric ones:
  206. >>> iv = pd.Interval(left=0, right=5)
  207. >>> iv
  208. Interval(0, 5, closed='right')
  209. You can check if an element belongs to it
  210. >>> 2.5 in iv
  211. True
  212. You can test the bounds (``closed='right'``, so ``0 < x <= 5``):
  213. >>> 0 in iv
  214. False
  215. >>> 5 in iv
  216. True
  217. >>> 0.0001 in iv
  218. True
  219. Calculate its length
  220. >>> iv.length
  221. 5
  222. You can operate with `+` and `*` over an Interval and the operation
  223. is applied to each of its bounds, so the result depends on the type
  224. of the bound elements
  225. >>> shifted_iv = iv + 3
  226. >>> shifted_iv
  227. Interval(3, 8, closed='right')
  228. >>> extended_iv = iv * 10.0
  229. >>> extended_iv
  230. Interval(0.0, 50.0, closed='right')
  231. To create a time interval you can use Timestamps as the bounds
  232. >>> year_2017 = pd.Interval(pd.Timestamp('2017-01-01 00:00:00'),
  233. ... pd.Timestamp('2018-01-01 00:00:00'),
  234. ... closed='left')
  235. >>> pd.Timestamp('2017-01-01 00:00') in year_2017
  236. True
  237. >>> year_2017.length
  238. Timedelta('365 days 00:00:00')
  239. """
  240. _typ = "interval"
  241. __array_priority__ = 1000
  242. cdef readonly object left
  243. """
  244. Left bound for the interval.
  245. """
  246. cdef readonly object right
  247. """
  248. Right bound for the interval.
  249. """
  250. cdef readonly str closed
  251. """
  252. Whether the interval is closed on the left-side, right-side, both or
  253. neither.
  254. """
  255. def __init__(self, left, right, str closed='right'):
  256. # note: it is faster to just do these checks than to use a special
  257. # constructor (__cinit__/__new__) to avoid them
  258. self._validate_endpoint(left)
  259. self._validate_endpoint(right)
  260. if closed not in VALID_CLOSED:
  261. raise ValueError(f"invalid option for 'closed': {closed}")
  262. if not left <= right:
  263. raise ValueError("left side of interval must be <= right side")
  264. if (isinstance(left, _Timestamp) and
  265. not tz_compare(left.tzinfo, right.tzinfo)):
  266. # GH 18538
  267. raise ValueError("left and right must have the same time zone, got "
  268. f"{repr(left.tzinfo)}' and {repr(right.tzinfo)}")
  269. self.left = left
  270. self.right = right
  271. self.closed = closed
  272. def _validate_endpoint(self, endpoint):
  273. # GH 23013
  274. if not (is_integer_object(endpoint) or is_float_object(endpoint) or
  275. isinstance(endpoint, (_Timestamp, _Timedelta))):
  276. raise ValueError("Only numeric, Timestamp and Timedelta endpoints "
  277. "are allowed when constructing an Interval.")
  278. def __hash__(self):
  279. return hash((self.left, self.right, self.closed))
  280. def __contains__(self, key) -> bool:
  281. if _interval_like(key):
  282. raise TypeError("__contains__ not defined for two intervals")
  283. return ((self.left < key if self.open_left else self.left <= key) and
  284. (key < self.right if self.open_right else key <= self.right))
  285. def __richcmp__(self, other, op: int):
  286. if isinstance(other, Interval):
  287. self_tuple = (self.left, self.right, self.closed)
  288. other_tuple = (other.left, other.right, other.closed)
  289. return PyObject_RichCompare(self_tuple, other_tuple, op)
  290. elif util.is_array(other):
  291. return np.array(
  292. [PyObject_RichCompare(self, x, op) for x in other],
  293. dtype=bool,
  294. )
  295. return NotImplemented
  296. def __reduce__(self):
  297. args = (self.left, self.right, self.closed)
  298. return (type(self), args)
  299. def _repr_base(self):
  300. left = self.left
  301. right = self.right
  302. # TODO: need more general formatting methodology here
  303. if isinstance(left, _Timestamp) and isinstance(right, _Timestamp):
  304. left = left._short_repr
  305. right = right._short_repr
  306. return left, right
  307. def __repr__(self) -> str:
  308. left, right = self._repr_base()
  309. name = type(self).__name__
  310. repr_str = f'{name}({repr(left)}, {repr(right)}, closed={repr(self.closed)})'
  311. return repr_str
  312. def __str__(self) -> str:
  313. left, right = self._repr_base()
  314. start_symbol = '[' if self.closed_left else '('
  315. end_symbol = ']' if self.closed_right else ')'
  316. return f'{start_symbol}{left}, {right}{end_symbol}'
  317. def __add__(self, y):
  318. if (
  319. isinstance(y, numbers.Number)
  320. or PyDelta_Check(y)
  321. or is_timedelta64_object(y)
  322. ):
  323. return Interval(self.left + y, self.right + y, closed=self.closed)
  324. elif (
  325. isinstance(y, Interval)
  326. and (
  327. isinstance(self, numbers.Number)
  328. or PyDelta_Check(self)
  329. or is_timedelta64_object(self)
  330. )
  331. ):
  332. return Interval(y.left + self, y.right + self, closed=y.closed)
  333. return NotImplemented
  334. def __sub__(self, y):
  335. if (
  336. isinstance(y, numbers.Number)
  337. or PyDelta_Check(y)
  338. or is_timedelta64_object(y)
  339. ):
  340. return Interval(self.left - y, self.right - y, closed=self.closed)
  341. return NotImplemented
  342. def __mul__(self, y):
  343. if isinstance(y, numbers.Number):
  344. return Interval(self.left * y, self.right * y, closed=self.closed)
  345. elif isinstance(y, Interval) and isinstance(self, numbers.Number):
  346. return Interval(y.left * self, y.right * self, closed=y.closed)
  347. return NotImplemented
  348. def __truediv__(self, y):
  349. if isinstance(y, numbers.Number):
  350. return Interval(self.left / y, self.right / y, closed=self.closed)
  351. return NotImplemented
  352. def __floordiv__(self, y):
  353. if isinstance(y, numbers.Number):
  354. return Interval(
  355. self.left // y, self.right // y, closed=self.closed)
  356. return NotImplemented
  357. def overlaps(self, other):
  358. """
  359. Check whether two Interval objects overlap.
  360. Two intervals overlap if they share a common point, including closed
  361. endpoints. Intervals that only have an open endpoint in common do not
  362. overlap.
  363. Parameters
  364. ----------
  365. other : Interval
  366. Interval to check against for an overlap.
  367. Returns
  368. -------
  369. bool
  370. True if the two intervals overlap.
  371. See Also
  372. --------
  373. IntervalArray.overlaps : The corresponding method for IntervalArray.
  374. IntervalIndex.overlaps : The corresponding method for IntervalIndex.
  375. Examples
  376. --------
  377. >>> i1 = pd.Interval(0, 2)
  378. >>> i2 = pd.Interval(1, 3)
  379. >>> i1.overlaps(i2)
  380. True
  381. >>> i3 = pd.Interval(4, 5)
  382. >>> i1.overlaps(i3)
  383. False
  384. Intervals that share closed endpoints overlap:
  385. >>> i4 = pd.Interval(0, 1, closed='both')
  386. >>> i5 = pd.Interval(1, 2, closed='both')
  387. >>> i4.overlaps(i5)
  388. True
  389. Intervals that only have an open endpoint in common do not overlap:
  390. >>> i6 = pd.Interval(1, 2, closed='neither')
  391. >>> i4.overlaps(i6)
  392. False
  393. """
  394. if not isinstance(other, Interval):
  395. raise TypeError("`other` must be an Interval, "
  396. f"got {type(other).__name__}")
  397. # equality is okay if both endpoints are closed (overlap at a point)
  398. op1 = le if (self.closed_left and other.closed_right) else lt
  399. op2 = le if (other.closed_left and self.closed_right) else lt
  400. # overlaps is equivalent negation of two interval being disjoint:
  401. # disjoint = (A.left > B.right) or (B.left > A.right)
  402. # (simplifying the negation allows this to be done in less operations)
  403. return op1(self.left, other.right) and op2(other.left, self.right)
  404. @cython.wraparound(False)
  405. @cython.boundscheck(False)
  406. def intervals_to_interval_bounds(ndarray intervals, bint validate_closed=True):
  407. """
  408. Parameters
  409. ----------
  410. intervals : ndarray
  411. Object array of Intervals / nulls.
  412. validate_closed: bool, default True
  413. Boolean indicating if all intervals must be closed on the same side.
  414. Mismatching closed will raise if True, else return None for closed.
  415. Returns
  416. -------
  417. tuple of tuples
  418. left : (ndarray, object, array)
  419. right : (ndarray, object, array)
  420. closed: str
  421. """
  422. cdef:
  423. object closed = None, interval
  424. Py_ssize_t i, n = len(intervals)
  425. ndarray left, right
  426. bint seen_closed = False
  427. left = np.empty(n, dtype=intervals.dtype)
  428. right = np.empty(n, dtype=intervals.dtype)
  429. for i in range(n):
  430. interval = intervals[i]
  431. if interval is None or util.is_nan(interval):
  432. left[i] = np.nan
  433. right[i] = np.nan
  434. continue
  435. if not isinstance(interval, Interval):
  436. raise TypeError(f"type {type(interval)} with value "
  437. f"{interval} is not an interval")
  438. left[i] = interval.left
  439. right[i] = interval.right
  440. if not seen_closed:
  441. seen_closed = True
  442. closed = interval.closed
  443. elif closed != interval.closed:
  444. closed = None
  445. if validate_closed:
  446. raise ValueError("intervals must all be closed on the same side")
  447. return left, right, closed
  448. include "intervaltree.pxi"