ops.pyx 7.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310
  1. import operator
  2. from cpython.object cimport (
  3. Py_EQ,
  4. Py_GE,
  5. Py_GT,
  6. Py_LE,
  7. Py_LT,
  8. Py_NE,
  9. PyObject_RichCompareBool,
  10. )
  11. import cython
  12. from cython import Py_ssize_t
  13. import numpy as np
  14. from numpy cimport (
  15. import_array,
  16. ndarray,
  17. uint8_t,
  18. )
  19. import_array()
  20. from pandas._libs.missing cimport checknull
  21. from pandas._libs.util cimport is_nan
  22. @cython.wraparound(False)
  23. @cython.boundscheck(False)
  24. def scalar_compare(object[:] values, object val, object op) -> ndarray:
  25. """
  26. Compare each element of `values` array with the scalar `val`, with
  27. the comparison operation described by `op`.
  28. Parameters
  29. ----------
  30. values : ndarray[object]
  31. val : object
  32. op : {operator.eq, operator.ne,
  33. operator.le, operator.lt,
  34. operator.ge, operator.gt}
  35. Returns
  36. -------
  37. result : ndarray[bool]
  38. """
  39. cdef:
  40. Py_ssize_t i, n = len(values)
  41. ndarray[uint8_t, cast=True] result
  42. bint isnull_val
  43. int flag
  44. object x
  45. if op is operator.lt:
  46. flag = Py_LT
  47. elif op is operator.le:
  48. flag = Py_LE
  49. elif op is operator.gt:
  50. flag = Py_GT
  51. elif op is operator.ge:
  52. flag = Py_GE
  53. elif op is operator.eq:
  54. flag = Py_EQ
  55. elif op is operator.ne:
  56. flag = Py_NE
  57. else:
  58. raise ValueError('Unrecognized operator')
  59. result = np.empty(n, dtype=bool).view(np.uint8)
  60. isnull_val = checknull(val)
  61. if flag == Py_NE:
  62. for i in range(n):
  63. x = values[i]
  64. if checknull(x):
  65. result[i] = True
  66. elif isnull_val:
  67. result[i] = True
  68. else:
  69. try:
  70. result[i] = PyObject_RichCompareBool(x, val, flag)
  71. except TypeError:
  72. result[i] = True
  73. elif flag == Py_EQ:
  74. for i in range(n):
  75. x = values[i]
  76. if checknull(x):
  77. result[i] = False
  78. elif isnull_val:
  79. result[i] = False
  80. else:
  81. try:
  82. result[i] = PyObject_RichCompareBool(x, val, flag)
  83. except TypeError:
  84. result[i] = False
  85. else:
  86. for i in range(n):
  87. x = values[i]
  88. if checknull(x):
  89. result[i] = False
  90. elif isnull_val:
  91. result[i] = False
  92. else:
  93. result[i] = PyObject_RichCompareBool(x, val, flag)
  94. return result.view(bool)
  95. @cython.wraparound(False)
  96. @cython.boundscheck(False)
  97. def vec_compare(ndarray[object] left, ndarray[object] right, object op) -> ndarray:
  98. """
  99. Compare the elements of `left` with the elements of `right` pointwise,
  100. with the comparison operation described by `op`.
  101. Parameters
  102. ----------
  103. left : ndarray[object]
  104. right : ndarray[object]
  105. op : {operator.eq, operator.ne,
  106. operator.le, operator.lt,
  107. operator.ge, operator.gt}
  108. Returns
  109. -------
  110. result : ndarray[bool]
  111. """
  112. cdef:
  113. Py_ssize_t i, n = len(left)
  114. ndarray[uint8_t, cast=True] result
  115. int flag
  116. if n != <Py_ssize_t>len(right):
  117. raise ValueError(f'Arrays were different lengths: {n} vs {len(right)}')
  118. if op is operator.lt:
  119. flag = Py_LT
  120. elif op is operator.le:
  121. flag = Py_LE
  122. elif op is operator.gt:
  123. flag = Py_GT
  124. elif op is operator.ge:
  125. flag = Py_GE
  126. elif op is operator.eq:
  127. flag = Py_EQ
  128. elif op is operator.ne:
  129. flag = Py_NE
  130. else:
  131. raise ValueError('Unrecognized operator')
  132. result = np.empty(n, dtype=bool).view(np.uint8)
  133. if flag == Py_NE:
  134. for i in range(n):
  135. x = left[i]
  136. y = right[i]
  137. if checknull(x) or checknull(y):
  138. result[i] = True
  139. else:
  140. result[i] = PyObject_RichCompareBool(x, y, flag)
  141. else:
  142. for i in range(n):
  143. x = left[i]
  144. y = right[i]
  145. if checknull(x) or checknull(y):
  146. result[i] = False
  147. else:
  148. result[i] = PyObject_RichCompareBool(x, y, flag)
  149. return result.view(bool)
  150. @cython.wraparound(False)
  151. @cython.boundscheck(False)
  152. def scalar_binop(object[:] values, object val, object op) -> ndarray:
  153. """
  154. Apply the given binary operator `op` between each element of the array
  155. `values` and the scalar `val`.
  156. Parameters
  157. ----------
  158. values : ndarray[object]
  159. val : object
  160. op : binary operator
  161. Returns
  162. -------
  163. result : ndarray[object]
  164. """
  165. cdef:
  166. Py_ssize_t i, n = len(values)
  167. object[:] result
  168. object x
  169. result = np.empty(n, dtype=object)
  170. if val is None or is_nan(val):
  171. result[:] = val
  172. return result.base # `.base` to access underlying np.ndarray
  173. for i in range(n):
  174. x = values[i]
  175. if x is None or is_nan(x):
  176. result[i] = x
  177. else:
  178. result[i] = op(x, val)
  179. return maybe_convert_bool(result.base)[0]
  180. @cython.wraparound(False)
  181. @cython.boundscheck(False)
  182. def vec_binop(object[:] left, object[:] right, object op) -> ndarray:
  183. """
  184. Apply the given binary operator `op` pointwise to the elements of
  185. arrays `left` and `right`.
  186. Parameters
  187. ----------
  188. left : ndarray[object]
  189. right : ndarray[object]
  190. op : binary operator
  191. Returns
  192. -------
  193. result : ndarray[object]
  194. """
  195. cdef:
  196. Py_ssize_t i, n = len(left)
  197. object[:] result
  198. if n != <Py_ssize_t>len(right):
  199. raise ValueError(f'Arrays were different lengths: {n} vs {len(right)}')
  200. result = np.empty(n, dtype=object)
  201. for i in range(n):
  202. x = left[i]
  203. y = right[i]
  204. try:
  205. result[i] = op(x, y)
  206. except TypeError:
  207. if x is None or is_nan(x):
  208. result[i] = x
  209. elif y is None or is_nan(y):
  210. result[i] = y
  211. else:
  212. raise
  213. return maybe_convert_bool(result.base)[0] # `.base` to access np.ndarray
  214. def maybe_convert_bool(ndarray[object] arr,
  215. true_values=None,
  216. false_values=None,
  217. convert_to_masked_nullable=False
  218. ) -> tuple[np.ndarray, np.ndarray | None]:
  219. cdef:
  220. Py_ssize_t i, n
  221. ndarray[uint8_t] result
  222. ndarray[uint8_t] mask
  223. object val
  224. set true_vals, false_vals
  225. bint has_na = False
  226. n = len(arr)
  227. result = np.empty(n, dtype=np.uint8)
  228. mask = np.zeros(n, dtype=np.uint8)
  229. # the defaults
  230. true_vals = {'True', 'TRUE', 'true'}
  231. false_vals = {'False', 'FALSE', 'false'}
  232. if true_values is not None:
  233. true_vals = true_vals | set(true_values)
  234. if false_values is not None:
  235. false_vals = false_vals | set(false_values)
  236. for i in range(n):
  237. val = arr[i]
  238. if isinstance(val, bool):
  239. if val is True:
  240. result[i] = 1
  241. else:
  242. result[i] = 0
  243. elif val in true_vals:
  244. result[i] = 1
  245. elif val in false_vals:
  246. result[i] = 0
  247. elif is_nan(val):
  248. mask[i] = 1
  249. result[i] = 0 # Value here doesn't matter, will be replaced w/ nan
  250. has_na = True
  251. else:
  252. return (arr, None)
  253. if has_na:
  254. if convert_to_masked_nullable:
  255. return (result.view(np.bool_), mask.view(np.bool_))
  256. else:
  257. arr = result.view(np.bool_).astype(object)
  258. np.putmask(arr, mask, np.nan)
  259. return (arr, None)
  260. else:
  261. return (result.view(np.bool_), None)