reshape.pyx 3.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159
  1. import cython
  2. from cython import Py_ssize_t
  3. from numpy cimport (
  4. float32_t,
  5. float64_t,
  6. int8_t,
  7. int16_t,
  8. int32_t,
  9. int64_t,
  10. ndarray,
  11. uint8_t,
  12. uint16_t,
  13. uint32_t,
  14. uint64_t,
  15. )
  16. import numpy as np
  17. cimport numpy as cnp
  18. cnp.import_array()
  19. from pandas._libs.lib cimport c_is_list_like
  20. ctypedef fused reshape_t:
  21. uint8_t
  22. uint16_t
  23. uint32_t
  24. uint64_t
  25. int8_t
  26. int16_t
  27. int32_t
  28. int64_t
  29. float32_t
  30. float64_t
  31. object
  32. @cython.wraparound(False)
  33. @cython.boundscheck(False)
  34. def unstack(reshape_t[:, :] values, const uint8_t[:] mask,
  35. Py_ssize_t stride, Py_ssize_t length, Py_ssize_t width,
  36. reshape_t[:, :] new_values, uint8_t[:, :] new_mask) -> None:
  37. """
  38. Transform long values to wide new_values.
  39. Parameters
  40. ----------
  41. values : typed ndarray
  42. mask : np.ndarray[bool]
  43. stride : int
  44. length : int
  45. width : int
  46. new_values : np.ndarray[bool]
  47. result array
  48. new_mask : np.ndarray[bool]
  49. result mask
  50. """
  51. cdef:
  52. Py_ssize_t i, j, w, nulls, s, offset
  53. if reshape_t is not object:
  54. # evaluated at compile-time
  55. with nogil:
  56. for i in range(stride):
  57. nulls = 0
  58. for j in range(length):
  59. for w in range(width):
  60. offset = j * width + w
  61. if mask[offset]:
  62. s = i * width + w
  63. new_values[j, s] = values[offset - nulls, i]
  64. new_mask[j, s] = 1
  65. else:
  66. nulls += 1
  67. else:
  68. # object-dtype, identical to above but we cannot use nogil
  69. for i in range(stride):
  70. nulls = 0
  71. for j in range(length):
  72. for w in range(width):
  73. offset = j * width + w
  74. if mask[offset]:
  75. s = i * width + w
  76. new_values[j, s] = values[offset - nulls, i]
  77. new_mask[j, s] = 1
  78. else:
  79. nulls += 1
  80. @cython.wraparound(False)
  81. @cython.boundscheck(False)
  82. def explode(ndarray[object] values):
  83. """
  84. transform array list-likes to long form
  85. preserve non-list entries
  86. Parameters
  87. ----------
  88. values : object ndarray
  89. Returns
  90. -------
  91. ndarray[object]
  92. result
  93. ndarray[int64_t]
  94. counts
  95. """
  96. cdef:
  97. Py_ssize_t i, j, count, n
  98. object v
  99. ndarray[object] result
  100. ndarray[int64_t] counts
  101. # find the resulting len
  102. n = len(values)
  103. counts = np.zeros(n, dtype='int64')
  104. for i in range(n):
  105. v = values[i]
  106. if c_is_list_like(v, True):
  107. if len(v):
  108. counts[i] += len(v)
  109. else:
  110. # empty list-like, use a nan marker
  111. counts[i] += 1
  112. else:
  113. counts[i] += 1
  114. result = np.empty(counts.sum(), dtype='object')
  115. count = 0
  116. for i in range(n):
  117. v = values[i]
  118. if c_is_list_like(v, True):
  119. if len(v):
  120. v = list(v)
  121. for j in range(len(v)):
  122. result[count] = v[j]
  123. count += 1
  124. else:
  125. # empty list-like, use a nan marker
  126. result[count] = np.nan
  127. count += 1
  128. else:
  129. # replace with the existing scalar
  130. result[count] = v
  131. count += 1
  132. return result, counts