arrays.pyx 5.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167
  1. """
  2. Cython implementations for internal ExtensionArrays.
  3. """
  4. cimport cython
  5. import numpy as np
  6. cimport numpy as cnp
  7. from numpy cimport ndarray
  8. cnp.import_array()
  9. @cython.freelist(16)
  10. cdef class NDArrayBacked:
  11. """
  12. Implementing these methods in cython improves performance quite a bit.
  13. import pandas as pd
  14. from pandas._libs.arrays import NDArrayBacked as cls
  15. dti = pd.date_range("2016-01-01", periods=3)
  16. dta = dti._data
  17. arr = dta._ndarray
  18. obj = cls._simple_new(arr, arr.dtype)
  19. # for foo in [arr, dta, obj]: ...
  20. %timeit foo.copy()
  21. 299 ns ± 30 ns per loop # <-- arr underlying ndarray (for reference)
  22. 530 ns ± 9.24 ns per loop # <-- dta with cython NDArrayBacked
  23. 1.66 µs ± 46.3 ns per loop # <-- dta without cython NDArrayBacked
  24. 328 ns ± 5.29 ns per loop # <-- obj with NDArrayBacked.__cinit__
  25. 371 ns ± 6.97 ns per loop # <-- obj with NDArrayBacked._simple_new
  26. %timeit foo.T
  27. 125 ns ± 6.27 ns per loop # <-- arr underlying ndarray (for reference)
  28. 226 ns ± 7.66 ns per loop # <-- dta with cython NDArrayBacked
  29. 911 ns ± 16.6 ns per loop # <-- dta without cython NDArrayBacked
  30. 215 ns ± 4.54 ns per loop # <-- obj with NDArrayBacked._simple_new
  31. """
  32. # TODO: implement take in terms of cnp.PyArray_TakeFrom
  33. # TODO: implement concat_same_type in terms of cnp.PyArray_Concatenate
  34. # cdef:
  35. # readonly ndarray _ndarray
  36. # readonly object _dtype
  37. def __init__(self, ndarray values, object dtype):
  38. self._ndarray = values
  39. self._dtype = dtype
  40. @classmethod
  41. def _simple_new(cls, ndarray values, object dtype):
  42. cdef:
  43. NDArrayBacked obj
  44. obj = NDArrayBacked.__new__(cls)
  45. obj._ndarray = values
  46. obj._dtype = dtype
  47. return obj
  48. cpdef NDArrayBacked _from_backing_data(self, ndarray values):
  49. """
  50. Construct a new ExtensionArray `new_array` with `arr` as its _ndarray.
  51. This should round-trip:
  52. self == self._from_backing_data(self._ndarray)
  53. """
  54. # TODO: re-reuse simple_new if/when it can be cpdef
  55. cdef:
  56. NDArrayBacked obj
  57. obj = NDArrayBacked.__new__(type(self))
  58. obj._ndarray = values
  59. obj._dtype = self._dtype
  60. return obj
  61. cpdef __setstate__(self, state):
  62. if isinstance(state, dict):
  63. if "_data" in state:
  64. data = state.pop("_data")
  65. elif "_ndarray" in state:
  66. data = state.pop("_ndarray")
  67. else:
  68. raise ValueError
  69. self._ndarray = data
  70. self._dtype = state.pop("_dtype")
  71. for key, val in state.items():
  72. setattr(self, key, val)
  73. elif isinstance(state, tuple):
  74. if len(state) != 3:
  75. if len(state) == 1 and isinstance(state[0], dict):
  76. self.__setstate__(state[0])
  77. return
  78. raise NotImplementedError(state)
  79. data, dtype = state[:2]
  80. if isinstance(dtype, np.ndarray):
  81. dtype, data = data, dtype
  82. self._ndarray = data
  83. self._dtype = dtype
  84. if isinstance(state[2], dict):
  85. for key, val in state[2].items():
  86. setattr(self, key, val)
  87. else:
  88. raise NotImplementedError(state)
  89. else:
  90. raise NotImplementedError(state)
  91. def __len__(self) -> int:
  92. return len(self._ndarray)
  93. @property
  94. def shape(self):
  95. # object cast bc _ndarray.shape is npy_intp*
  96. return (<object>(self._ndarray)).shape
  97. @property
  98. def ndim(self) -> int:
  99. return self._ndarray.ndim
  100. @property
  101. def size(self) -> int:
  102. return self._ndarray.size
  103. @property
  104. def nbytes(self) -> int:
  105. return self._ndarray.nbytes
  106. def copy(self):
  107. # NPY_ANYORDER -> same order as self._ndarray
  108. res_values = cnp.PyArray_NewCopy(self._ndarray, cnp.NPY_ANYORDER)
  109. return self._from_backing_data(res_values)
  110. def delete(self, loc, axis=0):
  111. res_values = np.delete(self._ndarray, loc, axis=axis)
  112. return self._from_backing_data(res_values)
  113. def swapaxes(self, axis1, axis2):
  114. res_values = cnp.PyArray_SwapAxes(self._ndarray, axis1, axis2)
  115. return self._from_backing_data(res_values)
  116. # TODO: pass NPY_MAXDIMS equiv to axis=None?
  117. def repeat(self, repeats, axis: int = 0):
  118. if axis is None:
  119. axis = 0
  120. res_values = cnp.PyArray_Repeat(self._ndarray, repeats, <int>axis)
  121. return self._from_backing_data(res_values)
  122. def reshape(self, *args, **kwargs):
  123. res_values = self._ndarray.reshape(*args, **kwargs)
  124. return self._from_backing_data(res_values)
  125. def ravel(self, order="C"):
  126. # cnp.PyArray_OrderConverter(PyObject* obj, NPY_ORDER* order)
  127. # res_values = cnp.PyArray_Ravel(self._ndarray, order)
  128. res_values = self._ndarray.ravel(order)
  129. return self._from_backing_data(res_values)
  130. @property
  131. def T(self):
  132. res_values = self._ndarray.T
  133. return self._from_backing_data(res_values)