123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557 |
- import numbers
- from operator import (
- le,
- lt,
- )
- from cpython.datetime cimport (
- PyDateTime_IMPORT,
- PyDelta_Check,
- )
- PyDateTime_IMPORT
- from cpython.object cimport (
- Py_EQ,
- Py_GE,
- Py_GT,
- Py_LE,
- Py_LT,
- Py_NE,
- PyObject_RichCompare,
- )
- import cython
- from cython import Py_ssize_t
- import numpy as np
- cimport numpy as cnp
- from numpy cimport (
- NPY_QUICKSORT,
- PyArray_ArgSort,
- PyArray_Take,
- float32_t,
- float64_t,
- int32_t,
- int64_t,
- ndarray,
- uint64_t,
- )
- cnp.import_array()
- from pandas._libs cimport util
- from pandas._libs.hashtable cimport Int64Vector
- from pandas._libs.tslibs.timedeltas cimport _Timedelta
- from pandas._libs.tslibs.timestamps cimport _Timestamp
- from pandas._libs.tslibs.timezones cimport tz_compare
- from pandas._libs.tslibs.util cimport (
- is_float_object,
- is_integer_object,
- is_timedelta64_object,
- )
- VALID_CLOSED = frozenset(['left', 'right', 'both', 'neither'])
- cdef class IntervalMixin:
- @property
- def closed_left(self):
- """
- Check if the interval is closed on the left side.
- For the meaning of `closed` and `open` see :class:`~pandas.Interval`.
- Returns
- -------
- bool
- True if the Interval is closed on the left-side.
- """
- return self.closed in ('left', 'both')
- @property
- def closed_right(self):
- """
- Check if the interval is closed on the right side.
- For the meaning of `closed` and `open` see :class:`~pandas.Interval`.
- Returns
- -------
- bool
- True if the Interval is closed on the left-side.
- """
- return self.closed in ('right', 'both')
- @property
- def open_left(self):
- """
- Check if the interval is open on the left side.
- For the meaning of `closed` and `open` see :class:`~pandas.Interval`.
- Returns
- -------
- bool
- True if the Interval is closed on the left-side.
- """
- return not self.closed_left
- @property
- def open_right(self):
- """
- Check if the interval is open on the right side.
- For the meaning of `closed` and `open` see :class:`~pandas.Interval`.
- Returns
- -------
- bool
- True if the Interval is closed on the left-side.
- """
- return not self.closed_right
- @property
- def mid(self):
- """
- Return the midpoint of the Interval.
- """
- try:
- return 0.5 * (self.left + self.right)
- except TypeError:
- # datetime safe version
- return self.left + 0.5 * self.length
- @property
- def length(self):
- """
- Return the length of the Interval.
- """
- return self.right - self.left
- @property
- def is_empty(self):
- """
- Indicates if an interval is empty, meaning it contains no points.
- .. versionadded:: 0.25.0
- Returns
- -------
- bool or ndarray
- A boolean indicating if a scalar :class:`Interval` is empty, or a
- boolean ``ndarray`` positionally indicating if an ``Interval`` in
- an :class:`~arrays.IntervalArray` or :class:`IntervalIndex` is
- empty.
- Examples
- --------
- An :class:`Interval` that contains points is not empty:
- >>> pd.Interval(0, 1, closed='right').is_empty
- False
- An ``Interval`` that does not contain any points is empty:
- >>> pd.Interval(0, 0, closed='right').is_empty
- True
- >>> pd.Interval(0, 0, closed='left').is_empty
- True
- >>> pd.Interval(0, 0, closed='neither').is_empty
- True
- An ``Interval`` that contains a single point is not empty:
- >>> pd.Interval(0, 0, closed='both').is_empty
- False
- An :class:`~arrays.IntervalArray` or :class:`IntervalIndex` returns a
- boolean ``ndarray`` positionally indicating if an ``Interval`` is
- empty:
- >>> ivs = [pd.Interval(0, 0, closed='neither'),
- ... pd.Interval(1, 2, closed='neither')]
- >>> pd.arrays.IntervalArray(ivs).is_empty
- array([ True, False])
- Missing values are not considered empty:
- >>> ivs = [pd.Interval(0, 0, closed='neither'), np.nan]
- >>> pd.IntervalIndex(ivs).is_empty
- array([ True, False])
- """
- return (self.right == self.left) & (self.closed != 'both')
- def _check_closed_matches(self, other, name='other'):
- """
- Check if the closed attribute of `other` matches.
- Note that 'left' and 'right' are considered different from 'both'.
- Parameters
- ----------
- other : Interval, IntervalIndex, IntervalArray
- name : str
- Name to use for 'other' in the error message.
- Raises
- ------
- ValueError
- When `other` is not closed exactly the same as self.
- """
- if self.closed != other.closed:
- raise ValueError(f"'{name}.closed' is {repr(other.closed)}, "
- f"expected {repr(self.closed)}.")
- cdef bint _interval_like(other):
- return (hasattr(other, 'left')
- and hasattr(other, 'right')
- and hasattr(other, 'closed'))
- cdef class Interval(IntervalMixin):
- """
- Immutable object implementing an Interval, a bounded slice-like interval.
- Parameters
- ----------
- left : orderable scalar
- Left bound for the interval.
- right : orderable scalar
- Right bound for the interval.
- closed : {'right', 'left', 'both', 'neither'}, default 'right'
- Whether the interval is closed on the left-side, right-side, both or
- neither. See the Notes for more detailed explanation.
- See Also
- --------
- IntervalIndex : An Index of Interval objects that are all closed on the
- same side.
- cut : Convert continuous data into discrete bins (Categorical
- of Interval objects).
- qcut : Convert continuous data into bins (Categorical of Interval objects)
- based on quantiles.
- Period : Represents a period of time.
- Notes
- -----
- The parameters `left` and `right` must be from the same type, you must be
- able to compare them and they must satisfy ``left <= right``.
- A closed interval (in mathematics denoted by square brackets) contains
- its endpoints, i.e. the closed interval ``[0, 5]`` is characterized by the
- conditions ``0 <= x <= 5``. This is what ``closed='both'`` stands for.
- An open interval (in mathematics denoted by parentheses) does not contain
- its endpoints, i.e. the open interval ``(0, 5)`` is characterized by the
- conditions ``0 < x < 5``. This is what ``closed='neither'`` stands for.
- Intervals can also be half-open or half-closed, i.e. ``[0, 5)`` is
- described by ``0 <= x < 5`` (``closed='left'``) and ``(0, 5]`` is
- described by ``0 < x <= 5`` (``closed='right'``).
- Examples
- --------
- It is possible to build Intervals of different types, like numeric ones:
- >>> iv = pd.Interval(left=0, right=5)
- >>> iv
- Interval(0, 5, closed='right')
- You can check if an element belongs to it
- >>> 2.5 in iv
- True
- You can test the bounds (``closed='right'``, so ``0 < x <= 5``):
- >>> 0 in iv
- False
- >>> 5 in iv
- True
- >>> 0.0001 in iv
- True
- Calculate its length
- >>> iv.length
- 5
- You can operate with `+` and `*` over an Interval and the operation
- is applied to each of its bounds, so the result depends on the type
- of the bound elements
- >>> shifted_iv = iv + 3
- >>> shifted_iv
- Interval(3, 8, closed='right')
- >>> extended_iv = iv * 10.0
- >>> extended_iv
- Interval(0.0, 50.0, closed='right')
- To create a time interval you can use Timestamps as the bounds
- >>> year_2017 = pd.Interval(pd.Timestamp('2017-01-01 00:00:00'),
- ... pd.Timestamp('2018-01-01 00:00:00'),
- ... closed='left')
- >>> pd.Timestamp('2017-01-01 00:00') in year_2017
- True
- >>> year_2017.length
- Timedelta('365 days 00:00:00')
- """
- _typ = "interval"
- __array_priority__ = 1000
- cdef readonly object left
- """
- Left bound for the interval.
- """
- cdef readonly object right
- """
- Right bound for the interval.
- """
- cdef readonly str closed
- """
- Whether the interval is closed on the left-side, right-side, both or
- neither.
- """
- def __init__(self, left, right, str closed='right'):
- # note: it is faster to just do these checks than to use a special
- # constructor (__cinit__/__new__) to avoid them
- self._validate_endpoint(left)
- self._validate_endpoint(right)
- if closed not in VALID_CLOSED:
- raise ValueError(f"invalid option for 'closed': {closed}")
- if not left <= right:
- raise ValueError("left side of interval must be <= right side")
- if (isinstance(left, _Timestamp) and
- not tz_compare(left.tzinfo, right.tzinfo)):
- # GH 18538
- raise ValueError("left and right must have the same time zone, got "
- f"{repr(left.tzinfo)}' and {repr(right.tzinfo)}")
- self.left = left
- self.right = right
- self.closed = closed
- def _validate_endpoint(self, endpoint):
- # GH 23013
- if not (is_integer_object(endpoint) or is_float_object(endpoint) or
- isinstance(endpoint, (_Timestamp, _Timedelta))):
- raise ValueError("Only numeric, Timestamp and Timedelta endpoints "
- "are allowed when constructing an Interval.")
- def __hash__(self):
- return hash((self.left, self.right, self.closed))
- def __contains__(self, key) -> bool:
- if _interval_like(key):
- raise TypeError("__contains__ not defined for two intervals")
- return ((self.left < key if self.open_left else self.left <= key) and
- (key < self.right if self.open_right else key <= self.right))
- def __richcmp__(self, other, op: int):
- if isinstance(other, Interval):
- self_tuple = (self.left, self.right, self.closed)
- other_tuple = (other.left, other.right, other.closed)
- return PyObject_RichCompare(self_tuple, other_tuple, op)
- elif util.is_array(other):
- return np.array(
- [PyObject_RichCompare(self, x, op) for x in other],
- dtype=bool,
- )
- return NotImplemented
- def __reduce__(self):
- args = (self.left, self.right, self.closed)
- return (type(self), args)
- def _repr_base(self):
- left = self.left
- right = self.right
- # TODO: need more general formatting methodology here
- if isinstance(left, _Timestamp) and isinstance(right, _Timestamp):
- left = left._short_repr
- right = right._short_repr
- return left, right
- def __repr__(self) -> str:
- left, right = self._repr_base()
- name = type(self).__name__
- repr_str = f'{name}({repr(left)}, {repr(right)}, closed={repr(self.closed)})'
- return repr_str
- def __str__(self) -> str:
- left, right = self._repr_base()
- start_symbol = '[' if self.closed_left else '('
- end_symbol = ']' if self.closed_right else ')'
- return f'{start_symbol}{left}, {right}{end_symbol}'
- def __add__(self, y):
- if (
- isinstance(y, numbers.Number)
- or PyDelta_Check(y)
- or is_timedelta64_object(y)
- ):
- return Interval(self.left + y, self.right + y, closed=self.closed)
- elif (
- isinstance(y, Interval)
- and (
- isinstance(self, numbers.Number)
- or PyDelta_Check(self)
- or is_timedelta64_object(self)
- )
- ):
- return Interval(y.left + self, y.right + self, closed=y.closed)
- return NotImplemented
- def __sub__(self, y):
- if (
- isinstance(y, numbers.Number)
- or PyDelta_Check(y)
- or is_timedelta64_object(y)
- ):
- return Interval(self.left - y, self.right - y, closed=self.closed)
- return NotImplemented
- def __mul__(self, y):
- if isinstance(y, numbers.Number):
- return Interval(self.left * y, self.right * y, closed=self.closed)
- elif isinstance(y, Interval) and isinstance(self, numbers.Number):
- return Interval(y.left * self, y.right * self, closed=y.closed)
- return NotImplemented
- def __truediv__(self, y):
- if isinstance(y, numbers.Number):
- return Interval(self.left / y, self.right / y, closed=self.closed)
- return NotImplemented
- def __floordiv__(self, y):
- if isinstance(y, numbers.Number):
- return Interval(
- self.left // y, self.right // y, closed=self.closed)
- return NotImplemented
- def overlaps(self, other):
- """
- Check whether two Interval objects overlap.
- Two intervals overlap if they share a common point, including closed
- endpoints. Intervals that only have an open endpoint in common do not
- overlap.
- Parameters
- ----------
- other : Interval
- Interval to check against for an overlap.
- Returns
- -------
- bool
- True if the two intervals overlap.
- See Also
- --------
- IntervalArray.overlaps : The corresponding method for IntervalArray.
- IntervalIndex.overlaps : The corresponding method for IntervalIndex.
- Examples
- --------
- >>> i1 = pd.Interval(0, 2)
- >>> i2 = pd.Interval(1, 3)
- >>> i1.overlaps(i2)
- True
- >>> i3 = pd.Interval(4, 5)
- >>> i1.overlaps(i3)
- False
- Intervals that share closed endpoints overlap:
- >>> i4 = pd.Interval(0, 1, closed='both')
- >>> i5 = pd.Interval(1, 2, closed='both')
- >>> i4.overlaps(i5)
- True
- Intervals that only have an open endpoint in common do not overlap:
- >>> i6 = pd.Interval(1, 2, closed='neither')
- >>> i4.overlaps(i6)
- False
- """
- if not isinstance(other, Interval):
- raise TypeError("`other` must be an Interval, "
- f"got {type(other).__name__}")
- # equality is okay if both endpoints are closed (overlap at a point)
- op1 = le if (self.closed_left and other.closed_right) else lt
- op2 = le if (other.closed_left and self.closed_right) else lt
- # overlaps is equivalent negation of two interval being disjoint:
- # disjoint = (A.left > B.right) or (B.left > A.right)
- # (simplifying the negation allows this to be done in less operations)
- return op1(self.left, other.right) and op2(other.left, self.right)
- @cython.wraparound(False)
- @cython.boundscheck(False)
- def intervals_to_interval_bounds(ndarray intervals, bint validate_closed=True):
- """
- Parameters
- ----------
- intervals : ndarray
- Object array of Intervals / nulls.
- validate_closed: bool, default True
- Boolean indicating if all intervals must be closed on the same side.
- Mismatching closed will raise if True, else return None for closed.
- Returns
- -------
- tuple of tuples
- left : (ndarray, object, array)
- right : (ndarray, object, array)
- closed: str
- """
- cdef:
- object closed = None, interval
- Py_ssize_t i, n = len(intervals)
- ndarray left, right
- bint seen_closed = False
- left = np.empty(n, dtype=intervals.dtype)
- right = np.empty(n, dtype=intervals.dtype)
- for i in range(n):
- interval = intervals[i]
- if interval is None or util.is_nan(interval):
- left[i] = np.nan
- right[i] = np.nan
- continue
- if not isinstance(interval, Interval):
- raise TypeError(f"type {type(interval)} with value "
- f"{interval} is not an interval")
- left[i] = interval.left
- right[i] = interval.right
- if not seen_closed:
- seen_closed = True
- closed = interval.closed
- elif closed != interval.closed:
- closed = None
- if validate_closed:
- raise ValueError("intervals must all be closed on the same side")
- return left, right, closed
- include "intervaltree.pxi"
|