sparse.pyx 23 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805
  1. import cython
  2. import numpy as np
  3. cimport numpy as cnp
  4. from numpy cimport (
  5. float32_t,
  6. float64_t,
  7. int8_t,
  8. int16_t,
  9. int32_t,
  10. int64_t,
  11. ndarray,
  12. uint8_t,
  13. )
  14. cnp.import_array()
  15. # -----------------------------------------------------------------------------
  16. # Preamble stuff
  17. cdef float64_t NaN = <float64_t>np.NaN
  18. cdef float64_t INF = <float64_t>np.inf
  19. # -----------------------------------------------------------------------------
  20. cdef class SparseIndex:
  21. """
  22. Abstract superclass for sparse index types.
  23. """
  24. def __init__(self):
  25. raise NotImplementedError
  26. cdef class IntIndex(SparseIndex):
  27. """
  28. Object for holding exact integer sparse indexing information
  29. Parameters
  30. ----------
  31. length : integer
  32. indices : array-like
  33. Contains integers corresponding to the indices.
  34. check_integrity : bool, default=True
  35. Check integrity of the input.
  36. """
  37. cdef readonly:
  38. Py_ssize_t length, npoints
  39. ndarray indices
  40. def __init__(self, Py_ssize_t length, indices, bint check_integrity=True):
  41. self.length = length
  42. self.indices = np.ascontiguousarray(indices, dtype=np.int32)
  43. self.npoints = len(self.indices)
  44. if check_integrity:
  45. self.check_integrity()
  46. def __reduce__(self):
  47. args = (self.length, self.indices)
  48. return IntIndex, args
  49. def __repr__(self) -> str:
  50. output = 'IntIndex\n'
  51. output += f'Indices: {repr(self.indices)}\n'
  52. return output
  53. @property
  54. def nbytes(self) -> int:
  55. return self.indices.nbytes
  56. def check_integrity(self):
  57. """
  58. Checks the following:
  59. - Indices are strictly ascending
  60. - Number of indices is at most self.length
  61. - Indices are at least 0 and at most the total length less one
  62. A ValueError is raised if any of these conditions is violated.
  63. """
  64. if self.npoints > self.length:
  65. raise ValueError(
  66. f"Too many indices. Expected {self.length} but found {self.npoints}"
  67. )
  68. # Indices are vacuously ordered and non-negative
  69. # if the sequence of indices is empty.
  70. if self.npoints == 0:
  71. return
  72. if self.indices.min() < 0:
  73. raise ValueError("No index can be less than zero")
  74. if self.indices.max() >= self.length:
  75. raise ValueError("All indices must be less than the length")
  76. monotonic = np.all(self.indices[:-1] < self.indices[1:])
  77. if not monotonic:
  78. raise ValueError("Indices must be strictly increasing")
  79. def equals(self, other: object) -> bool:
  80. if not isinstance(other, IntIndex):
  81. return False
  82. if self is other:
  83. return True
  84. same_length = self.length == other.length
  85. same_indices = np.array_equal(self.indices, other.indices)
  86. return same_length and same_indices
  87. @property
  88. def ngaps(self) -> int:
  89. return self.length - self.npoints
  90. def to_int_index(self):
  91. return self
  92. def to_block_index(self):
  93. locs, lens = get_blocks(self.indices)
  94. return BlockIndex(self.length, locs, lens)
  95. cpdef IntIndex intersect(self, SparseIndex y_):
  96. cdef:
  97. Py_ssize_t out_length, xi, yi = 0, result_indexer = 0
  98. int32_t xind
  99. ndarray[int32_t, ndim=1] xindices, yindices, new_indices
  100. IntIndex y
  101. # if is one already, returns self
  102. y = y_.to_int_index()
  103. if self.length != y.length:
  104. raise Exception('Indices must reference same underlying length')
  105. xindices = self.indices
  106. yindices = y.indices
  107. new_indices = np.empty(min(
  108. len(xindices), len(yindices)), dtype=np.int32)
  109. for xi in range(self.npoints):
  110. xind = xindices[xi]
  111. while yi < y.npoints and yindices[yi] < xind:
  112. yi += 1
  113. if yi >= y.npoints:
  114. break
  115. # TODO: would a two-pass algorithm be faster?
  116. if yindices[yi] == xind:
  117. new_indices[result_indexer] = xind
  118. result_indexer += 1
  119. new_indices = new_indices[:result_indexer]
  120. return IntIndex(self.length, new_indices)
  121. cpdef IntIndex make_union(self, SparseIndex y_):
  122. cdef:
  123. ndarray[int32_t, ndim=1] new_indices
  124. IntIndex y
  125. # if is one already, returns self
  126. y = y_.to_int_index()
  127. if self.length != y.length:
  128. raise ValueError('Indices must reference same underlying length')
  129. new_indices = np.union1d(self.indices, y.indices)
  130. return IntIndex(self.length, new_indices)
  131. @cython.wraparound(False)
  132. cpdef int32_t lookup(self, Py_ssize_t index):
  133. """
  134. Return the internal location if value exists on given index.
  135. Return -1 otherwise.
  136. """
  137. cdef:
  138. int32_t res
  139. ndarray[int32_t, ndim=1] inds
  140. inds = self.indices
  141. if self.npoints == 0:
  142. return -1
  143. elif index < 0 or self.length <= index:
  144. return -1
  145. res = inds.searchsorted(index)
  146. if res == self.npoints:
  147. return -1
  148. elif inds[res] == index:
  149. return res
  150. else:
  151. return -1
  152. @cython.wraparound(False)
  153. cpdef ndarray[int32_t] lookup_array(self, ndarray[int32_t, ndim=1] indexer):
  154. """
  155. Vectorized lookup, returns ndarray[int32_t]
  156. """
  157. cdef:
  158. Py_ssize_t n, i, ind_val
  159. ndarray[int32_t, ndim=1] inds
  160. ndarray[uint8_t, ndim=1, cast=True] mask
  161. ndarray[int32_t, ndim=1] masked
  162. ndarray[int32_t, ndim=1] res
  163. ndarray[int32_t, ndim=1] results
  164. n = len(indexer)
  165. results = np.empty(n, dtype=np.int32)
  166. results[:] = -1
  167. if self.npoints == 0:
  168. return results
  169. inds = self.indices
  170. mask = (inds[0] <= indexer) & (indexer <= inds[len(inds) - 1])
  171. masked = indexer[mask]
  172. res = inds.searchsorted(masked).astype(np.int32)
  173. res[inds[res] != masked] = -1
  174. results[mask] = res
  175. return results
  176. cpdef ndarray reindex(self, ndarray[float64_t, ndim=1] values,
  177. float64_t fill_value, SparseIndex other_):
  178. cdef:
  179. Py_ssize_t i = 0, j = 0
  180. IntIndex other
  181. ndarray[float64_t, ndim=1] result
  182. ndarray[int32_t, ndim=1] sinds, oinds
  183. other = other_.to_int_index()
  184. oinds = other.indices
  185. sinds = self.indices
  186. result = np.empty(other.npoints, dtype=np.float64)
  187. result[:] = fill_value
  188. for i in range(other.npoints):
  189. while oinds[i] > sinds[j] and j < self.npoints:
  190. j += 1
  191. if j == self.npoints:
  192. break
  193. if oinds[i] < sinds[j]:
  194. continue
  195. elif oinds[i] == sinds[j]:
  196. result[i] = values[j]
  197. j += 1
  198. return result
  199. cpdef put(self, ndarray[float64_t, ndim=1] values,
  200. ndarray[int32_t, ndim=1] indices, object to_put):
  201. pass
  202. cpdef take(self, ndarray[float64_t, ndim=1] values,
  203. ndarray[int32_t, ndim=1] indices):
  204. pass
  205. cpdef get_blocks(ndarray[int32_t, ndim=1] indices):
  206. cdef:
  207. Py_ssize_t init_len, i, npoints, result_indexer = 0
  208. int32_t block, length = 1, cur, prev
  209. ndarray[int32_t, ndim=1] locs, lens
  210. npoints = len(indices)
  211. # just handle the special empty case separately
  212. if npoints == 0:
  213. return np.array([], dtype=np.int32), np.array([], dtype=np.int32)
  214. # block size can't be longer than npoints
  215. locs = np.empty(npoints, dtype=np.int32)
  216. lens = np.empty(npoints, dtype=np.int32)
  217. # TODO: two-pass algorithm faster?
  218. prev = block = indices[0]
  219. for i in range(1, npoints):
  220. cur = indices[i]
  221. if cur - prev > 1:
  222. # new block
  223. locs[result_indexer] = block
  224. lens[result_indexer] = length
  225. block = cur
  226. length = 1
  227. result_indexer += 1
  228. else:
  229. # same block, increment length
  230. length += 1
  231. prev = cur
  232. locs[result_indexer] = block
  233. lens[result_indexer] = length
  234. result_indexer += 1
  235. locs = locs[:result_indexer]
  236. lens = lens[:result_indexer]
  237. return locs, lens
  238. # -----------------------------------------------------------------------------
  239. # BlockIndex
  240. cdef class BlockIndex(SparseIndex):
  241. """
  242. Object for holding block-based sparse indexing information
  243. Parameters
  244. ----------
  245. """
  246. cdef readonly:
  247. int32_t nblocks, npoints, length
  248. ndarray blocs, blengths
  249. cdef:
  250. object __weakref__ # need to be picklable
  251. int32_t *locbuf
  252. int32_t *lenbuf
  253. def __init__(self, length, blocs, blengths):
  254. self.blocs = np.ascontiguousarray(blocs, dtype=np.int32)
  255. self.blengths = np.ascontiguousarray(blengths, dtype=np.int32)
  256. # in case we need
  257. self.locbuf = <int32_t*>self.blocs.data
  258. self.lenbuf = <int32_t*>self.blengths.data
  259. self.length = length
  260. self.nblocks = np.int32(len(self.blocs))
  261. self.npoints = self.blengths.sum()
  262. # self.block_start = blocs
  263. # self.block_end = blocs + blengths
  264. self.check_integrity()
  265. def __reduce__(self):
  266. args = (self.length, self.blocs, self.blengths)
  267. return BlockIndex, args
  268. def __repr__(self) -> str:
  269. output = 'BlockIndex\n'
  270. output += f'Block locations: {repr(self.blocs)}\n'
  271. output += f'Block lengths: {repr(self.blengths)}'
  272. return output
  273. @property
  274. def nbytes(self) -> int:
  275. return self.blocs.nbytes + self.blengths.nbytes
  276. @property
  277. def ngaps(self) -> int:
  278. return self.length - self.npoints
  279. cpdef check_integrity(self):
  280. """
  281. Check:
  282. - Locations are in ascending order
  283. - No overlapping blocks
  284. - Blocks to not start after end of index, nor extend beyond end
  285. """
  286. cdef:
  287. Py_ssize_t i
  288. ndarray[int32_t, ndim=1] blocs, blengths
  289. blocs = self.blocs
  290. blengths = self.blengths
  291. if len(blocs) != len(blengths):
  292. raise ValueError('block bound arrays must be same length')
  293. for i in range(self.nblocks):
  294. if i > 0:
  295. if blocs[i] <= blocs[i - 1]:
  296. raise ValueError('Locations not in ascending order')
  297. if i < self.nblocks - 1:
  298. if blocs[i] + blengths[i] > blocs[i + 1]:
  299. raise ValueError(f'Block {i} overlaps')
  300. else:
  301. if blocs[i] + blengths[i] > self.length:
  302. raise ValueError(f'Block {i} extends beyond end')
  303. # no zero-length blocks
  304. if blengths[i] == 0:
  305. raise ValueError(f'Zero-length block {i}')
  306. def equals(self, other: object) -> bool:
  307. if not isinstance(other, BlockIndex):
  308. return False
  309. if self is other:
  310. return True
  311. same_length = self.length == other.length
  312. same_blocks = (np.array_equal(self.blocs, other.blocs) and
  313. np.array_equal(self.blengths, other.blengths))
  314. return same_length and same_blocks
  315. def to_block_index(self):
  316. return self
  317. def to_int_index(self):
  318. cdef:
  319. int32_t i = 0, j, b
  320. int32_t offset
  321. ndarray[int32_t, ndim=1] indices
  322. indices = np.empty(self.npoints, dtype=np.int32)
  323. for b in range(self.nblocks):
  324. offset = self.locbuf[b]
  325. for j in range(self.lenbuf[b]):
  326. indices[i] = offset + j
  327. i += 1
  328. return IntIndex(self.length, indices)
  329. cpdef BlockIndex intersect(self, SparseIndex other):
  330. """
  331. Intersect two BlockIndex objects
  332. Returns
  333. -------
  334. BlockIndex
  335. """
  336. cdef:
  337. BlockIndex y
  338. ndarray[int32_t, ndim=1] xloc, xlen, yloc, ylen, out_bloc, out_blen
  339. Py_ssize_t xi = 0, yi = 0, max_len, result_indexer = 0
  340. int32_t cur_loc, cur_length, diff
  341. y = other.to_block_index()
  342. if self.length != y.length:
  343. raise Exception('Indices must reference same underlying length')
  344. xloc = self.blocs
  345. xlen = self.blengths
  346. yloc = y.blocs
  347. ylen = y.blengths
  348. # block may be split, but can't exceed original len / 2 + 1
  349. max_len = min(self.length, y.length) // 2 + 1
  350. out_bloc = np.empty(max_len, dtype=np.int32)
  351. out_blen = np.empty(max_len, dtype=np.int32)
  352. while True:
  353. # we are done (or possibly never began)
  354. if xi >= self.nblocks or yi >= y.nblocks:
  355. break
  356. # completely symmetric...would like to avoid code dup but oh well
  357. if xloc[xi] >= yloc[yi]:
  358. cur_loc = xloc[xi]
  359. diff = xloc[xi] - yloc[yi]
  360. if ylen[yi] <= diff:
  361. # have to skip this block
  362. yi += 1
  363. continue
  364. if ylen[yi] - diff < xlen[xi]:
  365. # take end of y block, move onward
  366. cur_length = ylen[yi] - diff
  367. yi += 1
  368. else:
  369. # take end of x block
  370. cur_length = xlen[xi]
  371. xi += 1
  372. else: # xloc[xi] < yloc[yi]
  373. cur_loc = yloc[yi]
  374. diff = yloc[yi] - xloc[xi]
  375. if xlen[xi] <= diff:
  376. # have to skip this block
  377. xi += 1
  378. continue
  379. if xlen[xi] - diff < ylen[yi]:
  380. # take end of x block, move onward
  381. cur_length = xlen[xi] - diff
  382. xi += 1
  383. else:
  384. # take end of y block
  385. cur_length = ylen[yi]
  386. yi += 1
  387. out_bloc[result_indexer] = cur_loc
  388. out_blen[result_indexer] = cur_length
  389. result_indexer += 1
  390. out_bloc = out_bloc[:result_indexer]
  391. out_blen = out_blen[:result_indexer]
  392. return BlockIndex(self.length, out_bloc, out_blen)
  393. cpdef BlockIndex make_union(self, SparseIndex y):
  394. """
  395. Combine together two BlockIndex objects, accepting indices if contained
  396. in one or the other
  397. Parameters
  398. ----------
  399. other : SparseIndex
  400. Notes
  401. -----
  402. union is a protected keyword in Cython, hence make_union
  403. Returns
  404. -------
  405. BlockIndex
  406. """
  407. return BlockUnion(self, y.to_block_index()).result
  408. cpdef Py_ssize_t lookup(self, Py_ssize_t index):
  409. """
  410. Return the internal location if value exists on given index.
  411. Return -1 otherwise.
  412. """
  413. cdef:
  414. Py_ssize_t i, cum_len
  415. ndarray[int32_t, ndim=1] locs, lens
  416. locs = self.blocs
  417. lens = self.blengths
  418. if self.nblocks == 0:
  419. return -1
  420. elif index < locs[0]:
  421. return -1
  422. cum_len = 0
  423. for i in range(self.nblocks):
  424. if index >= locs[i] and index < locs[i] + lens[i]:
  425. return cum_len + index - locs[i]
  426. cum_len += lens[i]
  427. return -1
  428. @cython.wraparound(False)
  429. cpdef ndarray[int32_t] lookup_array(self, ndarray[int32_t, ndim=1] indexer):
  430. """
  431. Vectorized lookup, returns ndarray[int32_t]
  432. """
  433. cdef:
  434. Py_ssize_t n, i, j, ind_val
  435. ndarray[int32_t, ndim=1] locs, lens
  436. ndarray[int32_t, ndim=1] results
  437. locs = self.blocs
  438. lens = self.blengths
  439. n = len(indexer)
  440. results = np.empty(n, dtype=np.int32)
  441. results[:] = -1
  442. if self.npoints == 0:
  443. return results
  444. for i in range(n):
  445. ind_val = indexer[i]
  446. if not (ind_val < 0 or self.length <= ind_val):
  447. cum_len = 0
  448. for j in range(self.nblocks):
  449. if ind_val >= locs[j] and ind_val < locs[j] + lens[j]:
  450. results[i] = cum_len + ind_val - locs[j]
  451. cum_len += lens[j]
  452. return results
  453. cpdef ndarray reindex(self, ndarray[float64_t, ndim=1] values,
  454. float64_t fill_value, SparseIndex other_):
  455. cdef:
  456. Py_ssize_t i = 0, j = 0, ocur, ocurlen
  457. BlockIndex other
  458. ndarray[float64_t, ndim=1] result
  459. ndarray[int32_t, ndim=1] slocs, slens, olocs, olens
  460. other = other_.to_block_index()
  461. olocs = other.blocs
  462. olens = other.blengths
  463. slocs = self.blocs
  464. slens = self.blengths
  465. result = np.empty(other.npoints, dtype=np.float64)
  466. for i in range(other.nblocks):
  467. ocur = olocs[i]
  468. ocurlen = olens[i]
  469. while slocs[j] + slens[j] < ocur:
  470. j += 1
  471. cpdef put(self, ndarray[float64_t, ndim=1] values,
  472. ndarray[int32_t, ndim=1] indices, object to_put):
  473. pass
  474. cpdef take(self, ndarray[float64_t, ndim=1] values,
  475. ndarray[int32_t, ndim=1] indices):
  476. pass
  477. @cython.internal
  478. cdef class BlockMerge:
  479. """
  480. Object-oriented approach makes sharing state between recursive functions a
  481. lot easier and reduces code duplication
  482. """
  483. cdef:
  484. BlockIndex x, y, result
  485. ndarray xstart, xlen, xend, ystart, ylen, yend
  486. int32_t xi, yi # block indices
  487. def __init__(self, BlockIndex x, BlockIndex y):
  488. self.x = x
  489. self.y = y
  490. if x.length != y.length:
  491. raise Exception('Indices must reference same underlying length')
  492. self.xstart = self.x.blocs
  493. self.ystart = self.y.blocs
  494. self.xend = self.x.blocs + self.x.blengths
  495. self.yend = self.y.blocs + self.y.blengths
  496. # self.xlen = self.x.blengths
  497. # self.ylen = self.y.blengths
  498. self.xi = 0
  499. self.yi = 0
  500. self.result = self._make_merged_blocks()
  501. cdef _make_merged_blocks(self):
  502. raise NotImplementedError
  503. cdef _set_current_indices(self, int32_t xi, int32_t yi, bint mode):
  504. if mode == 0:
  505. self.xi = xi
  506. self.yi = yi
  507. else:
  508. self.xi = yi
  509. self.yi = xi
  510. @cython.internal
  511. cdef class BlockUnion(BlockMerge):
  512. """
  513. Object-oriented approach makes sharing state between recursive functions a
  514. lot easier and reduces code duplication
  515. """
  516. cdef _make_merged_blocks(self):
  517. cdef:
  518. ndarray[int32_t, ndim=1] xstart, xend, ystart
  519. ndarray[int32_t, ndim=1] yend, out_bloc, out_blen
  520. int32_t nstart, nend, diff
  521. Py_ssize_t max_len, result_indexer = 0
  522. xstart = self.xstart
  523. xend = self.xend
  524. ystart = self.ystart
  525. yend = self.yend
  526. max_len = min(self.x.length, self.y.length) // 2 + 1
  527. out_bloc = np.empty(max_len, dtype=np.int32)
  528. out_blen = np.empty(max_len, dtype=np.int32)
  529. while True:
  530. # we are done (or possibly never began)
  531. if self.xi >= self.x.nblocks and self.yi >= self.y.nblocks:
  532. break
  533. elif self.yi >= self.y.nblocks:
  534. # through with y, just pass through x blocks
  535. nstart = xstart[self.xi]
  536. nend = xend[self.xi]
  537. self.xi += 1
  538. elif self.xi >= self.x.nblocks:
  539. # through with x, just pass through y blocks
  540. nstart = ystart[self.yi]
  541. nend = yend[self.yi]
  542. self.yi += 1
  543. else:
  544. # find end of new block
  545. if xstart[self.xi] < ystart[self.yi]:
  546. nstart = xstart[self.xi]
  547. nend = self._find_next_block_end(0)
  548. else:
  549. nstart = ystart[self.yi]
  550. nend = self._find_next_block_end(1)
  551. out_bloc[result_indexer] = nstart
  552. out_blen[result_indexer] = nend - nstart
  553. result_indexer += 1
  554. out_bloc = out_bloc[:result_indexer]
  555. out_blen = out_blen[:result_indexer]
  556. return BlockIndex(self.x.length, out_bloc, out_blen)
  557. cdef int32_t _find_next_block_end(self, bint mode) except -1:
  558. """
  559. Wow, this got complicated in a hurry
  560. mode 0: block started in index x
  561. mode 1: block started in index y
  562. """
  563. cdef:
  564. ndarray[int32_t, ndim=1] xstart, xend, ystart, yend
  565. int32_t xi, yi, xnblocks, ynblocks, nend
  566. if mode != 0 and mode != 1:
  567. raise Exception('Mode must be 0 or 1')
  568. # so symmetric code will work
  569. if mode == 0:
  570. xstart = self.xstart
  571. xend = self.xend
  572. xi = self.xi
  573. ystart = self.ystart
  574. yend = self.yend
  575. yi = self.yi
  576. ynblocks = self.y.nblocks
  577. else:
  578. xstart = self.ystart
  579. xend = self.yend
  580. xi = self.yi
  581. ystart = self.xstart
  582. yend = self.xend
  583. yi = self.xi
  584. ynblocks = self.x.nblocks
  585. nend = xend[xi]
  586. # done with y?
  587. if yi == ynblocks:
  588. self._set_current_indices(xi + 1, yi, mode)
  589. return nend
  590. elif nend < ystart[yi]:
  591. # block ends before y block
  592. self._set_current_indices(xi + 1, yi, mode)
  593. return nend
  594. else:
  595. while yi < ynblocks and nend > yend[yi]:
  596. yi += 1
  597. self._set_current_indices(xi + 1, yi, mode)
  598. if yi == ynblocks:
  599. return nend
  600. if nend < ystart[yi]:
  601. # we're done, return the block end
  602. return nend
  603. else:
  604. # merge blocks, continue searching
  605. # this also catches the case where blocks
  606. return self._find_next_block_end(1 - mode)
  607. # -----------------------------------------------------------------------------
  608. # Sparse arithmetic
  609. include "sparse_op_helper.pxi"
  610. # -----------------------------------------------------------------------------
  611. # SparseArray mask create operations
  612. def make_mask_object_ndarray(ndarray[object, ndim=1] arr, object fill_value):
  613. cdef:
  614. object value
  615. Py_ssize_t i
  616. Py_ssize_t new_length = len(arr)
  617. ndarray[int8_t, ndim=1] mask
  618. mask = np.ones(new_length, dtype=np.int8)
  619. for i in range(new_length):
  620. value = arr[i]
  621. if value == fill_value and type(value) == type(fill_value):
  622. mask[i] = 0
  623. return mask.view(dtype=bool)