import cython from cython import Py_ssize_t from cython cimport floating from libc.stdlib cimport ( free, malloc, ) import numpy as np cimport numpy as cnp from numpy cimport ( complex64_t, complex128_t, float32_t, float64_t, int8_t, int16_t, int32_t, int64_t, intp_t, ndarray, uint8_t, uint16_t, uint32_t, uint64_t, ) from numpy.math cimport NAN cnp.import_array() from pandas._libs.algos cimport kth_smallest_c from pandas._libs.util cimport ( get_nat, numeric, ) from pandas._libs.algos import ( ensure_platform_int, groupsort_indexer, rank_1d, take_2d_axis1_float64_float64, ) from pandas._libs.missing cimport checknull cdef int64_t NPY_NAT = get_nat() _int64_max = np.iinfo(np.int64).max cdef float64_t NaN = np.NaN cdef enum InterpolationEnumType: INTERPOLATION_LINEAR, INTERPOLATION_LOWER, INTERPOLATION_HIGHER, INTERPOLATION_NEAREST, INTERPOLATION_MIDPOINT cdef inline float64_t median_linear(float64_t* a, int n) nogil: cdef: int i, j, na_count = 0 float64_t result float64_t* tmp if n == 0: return NaN # count NAs for i in range(n): if a[i] != a[i]: na_count += 1 if na_count: if na_count == n: return NaN tmp = malloc((n - na_count) * sizeof(float64_t)) j = 0 for i in range(n): if a[i] == a[i]: tmp[j] = a[i] j += 1 a = tmp n -= na_count if n % 2: result = kth_smallest_c(a, n // 2, n) else: result = (kth_smallest_c(a, n // 2, n) + kth_smallest_c(a, n // 2 - 1, n)) / 2 if na_count: free(a) return result @cython.boundscheck(False) @cython.wraparound(False) def group_median_float64(ndarray[float64_t, ndim=2] out, ndarray[int64_t] counts, ndarray[float64_t, ndim=2] values, ndarray[intp_t] labels, Py_ssize_t min_count=-1) -> None: """ Only aggregates on axis=0 """ cdef: Py_ssize_t i, j, N, K, ngroups, size ndarray[intp_t] _counts ndarray[float64_t, ndim=2] data ndarray[intp_t] indexer float64_t* ptr assert min_count == -1, "'min_count' only used in add and prod" ngroups = len(counts) N, K = (