123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310 |
- import operator
- from cpython.object cimport (
- Py_EQ,
- Py_GE,
- Py_GT,
- Py_LE,
- Py_LT,
- Py_NE,
- PyObject_RichCompareBool,
- )
- import cython
- from cython import Py_ssize_t
- import numpy as np
- from numpy cimport (
- import_array,
- ndarray,
- uint8_t,
- )
- import_array()
- from pandas._libs.missing cimport checknull
- from pandas._libs.util cimport is_nan
- @cython.wraparound(False)
- @cython.boundscheck(False)
- def scalar_compare(object[:] values, object val, object op) -> ndarray:
- """
- Compare each element of `values` array with the scalar `val`, with
- the comparison operation described by `op`.
- Parameters
- ----------
- values : ndarray[object]
- val : object
- op : {operator.eq, operator.ne,
- operator.le, operator.lt,
- operator.ge, operator.gt}
- Returns
- -------
- result : ndarray[bool]
- """
- cdef:
- Py_ssize_t i, n = len(values)
- ndarray[uint8_t, cast=True] result
- bint isnull_val
- int flag
- object x
- if op is operator.lt:
- flag = Py_LT
- elif op is operator.le:
- flag = Py_LE
- elif op is operator.gt:
- flag = Py_GT
- elif op is operator.ge:
- flag = Py_GE
- elif op is operator.eq:
- flag = Py_EQ
- elif op is operator.ne:
- flag = Py_NE
- else:
- raise ValueError('Unrecognized operator')
- result = np.empty(n, dtype=bool).view(np.uint8)
- isnull_val = checknull(val)
- if flag == Py_NE:
- for i in range(n):
- x = values[i]
- if checknull(x):
- result[i] = True
- elif isnull_val:
- result[i] = True
- else:
- try:
- result[i] = PyObject_RichCompareBool(x, val, flag)
- except TypeError:
- result[i] = True
- elif flag == Py_EQ:
- for i in range(n):
- x = values[i]
- if checknull(x):
- result[i] = False
- elif isnull_val:
- result[i] = False
- else:
- try:
- result[i] = PyObject_RichCompareBool(x, val, flag)
- except TypeError:
- result[i] = False
- else:
- for i in range(n):
- x = values[i]
- if checknull(x):
- result[i] = False
- elif isnull_val:
- result[i] = False
- else:
- result[i] = PyObject_RichCompareBool(x, val, flag)
- return result.view(bool)
- @cython.wraparound(False)
- @cython.boundscheck(False)
- def vec_compare(ndarray[object] left, ndarray[object] right, object op) -> ndarray:
- """
- Compare the elements of `left` with the elements of `right` pointwise,
- with the comparison operation described by `op`.
- Parameters
- ----------
- left : ndarray[object]
- right : ndarray[object]
- op : {operator.eq, operator.ne,
- operator.le, operator.lt,
- operator.ge, operator.gt}
- Returns
- -------
- result : ndarray[bool]
- """
- cdef:
- Py_ssize_t i, n = len(left)
- ndarray[uint8_t, cast=True] result
- int flag
- if n != <Py_ssize_t>len(right):
- raise ValueError(f'Arrays were different lengths: {n} vs {len(right)}')
- if op is operator.lt:
- flag = Py_LT
- elif op is operator.le:
- flag = Py_LE
- elif op is operator.gt:
- flag = Py_GT
- elif op is operator.ge:
- flag = Py_GE
- elif op is operator.eq:
- flag = Py_EQ
- elif op is operator.ne:
- flag = Py_NE
- else:
- raise ValueError('Unrecognized operator')
- result = np.empty(n, dtype=bool).view(np.uint8)
- if flag == Py_NE:
- for i in range(n):
- x = left[i]
- y = right[i]
- if checknull(x) or checknull(y):
- result[i] = True
- else:
- result[i] = PyObject_RichCompareBool(x, y, flag)
- else:
- for i in range(n):
- x = left[i]
- y = right[i]
- if checknull(x) or checknull(y):
- result[i] = False
- else:
- result[i] = PyObject_RichCompareBool(x, y, flag)
- return result.view(bool)
- @cython.wraparound(False)
- @cython.boundscheck(False)
- def scalar_binop(object[:] values, object val, object op) -> ndarray:
- """
- Apply the given binary operator `op` between each element of the array
- `values` and the scalar `val`.
- Parameters
- ----------
- values : ndarray[object]
- val : object
- op : binary operator
- Returns
- -------
- result : ndarray[object]
- """
- cdef:
- Py_ssize_t i, n = len(values)
- object[:] result
- object x
- result = np.empty(n, dtype=object)
- if val is None or is_nan(val):
- result[:] = val
- return result.base # `.base` to access underlying np.ndarray
- for i in range(n):
- x = values[i]
- if x is None or is_nan(x):
- result[i] = x
- else:
- result[i] = op(x, val)
- return maybe_convert_bool(result.base)[0]
- @cython.wraparound(False)
- @cython.boundscheck(False)
- def vec_binop(object[:] left, object[:] right, object op) -> ndarray:
- """
- Apply the given binary operator `op` pointwise to the elements of
- arrays `left` and `right`.
- Parameters
- ----------
- left : ndarray[object]
- right : ndarray[object]
- op : binary operator
- Returns
- -------
- result : ndarray[object]
- """
- cdef:
- Py_ssize_t i, n = len(left)
- object[:] result
- if n != <Py_ssize_t>len(right):
- raise ValueError(f'Arrays were different lengths: {n} vs {len(right)}')
- result = np.empty(n, dtype=object)
- for i in range(n):
- x = left[i]
- y = right[i]
- try:
- result[i] = op(x, y)
- except TypeError:
- if x is None or is_nan(x):
- result[i] = x
- elif y is None or is_nan(y):
- result[i] = y
- else:
- raise
- return maybe_convert_bool(result.base)[0] # `.base` to access np.ndarray
- def maybe_convert_bool(ndarray[object] arr,
- true_values=None,
- false_values=None,
- convert_to_masked_nullable=False
- ) -> tuple[np.ndarray, np.ndarray | None]:
- cdef:
- Py_ssize_t i, n
- ndarray[uint8_t] result
- ndarray[uint8_t] mask
- object val
- set true_vals, false_vals
- bint has_na = False
- n = len(arr)
- result = np.empty(n, dtype=np.uint8)
- mask = np.zeros(n, dtype=np.uint8)
- # the defaults
- true_vals = {'True', 'TRUE', 'true'}
- false_vals = {'False', 'FALSE', 'false'}
- if true_values is not None:
- true_vals = true_vals | set(true_values)
- if false_values is not None:
- false_vals = false_vals | set(false_values)
- for i in range(n):
- val = arr[i]
- if isinstance(val, bool):
- if val is True:
- result[i] = 1
- else:
- result[i] = 0
- elif val in true_vals:
- result[i] = 1
- elif val in false_vals:
- result[i] = 0
- elif is_nan(val):
- mask[i] = 1
- result[i] = 0 # Value here doesn't matter, will be replaced w/ nan
- has_na = True
- else:
- return (arr, None)
- if has_na:
- if convert_to_masked_nullable:
- return (result.view(np.bool_), mask.view(np.bool_))
- else:
- arr = result.view(np.bool_).astype(object)
- np.putmask(arr, mask, np.nan)
- return (arr, None)
- else:
- return (result.view(np.bool_), None)
|