frequencies.py 17 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604
  1. from __future__ import annotations
  2. import warnings
  3. import numpy as np
  4. from pandas._libs.algos import unique_deltas
  5. from pandas._libs.tslibs import (
  6. Timestamp,
  7. tzconversion,
  8. )
  9. from pandas._libs.tslibs.ccalendar import (
  10. DAYS,
  11. MONTH_ALIASES,
  12. MONTH_NUMBERS,
  13. MONTHS,
  14. int_to_weekday,
  15. )
  16. from pandas._libs.tslibs.fields import (
  17. build_field_sarray,
  18. month_position_check,
  19. )
  20. from pandas._libs.tslibs.offsets import ( # noqa:F401
  21. DateOffset,
  22. Day,
  23. _get_offset,
  24. to_offset,
  25. )
  26. from pandas._libs.tslibs.parsing import get_rule_month
  27. from pandas.util._decorators import cache_readonly
  28. from pandas.core.dtypes.common import (
  29. is_datetime64_dtype,
  30. is_period_dtype,
  31. is_timedelta64_dtype,
  32. )
  33. from pandas.core.dtypes.generic import ABCSeries
  34. from pandas.core.algorithms import unique
  35. _ONE_MICRO = 1000
  36. _ONE_MILLI = _ONE_MICRO * 1000
  37. _ONE_SECOND = _ONE_MILLI * 1000
  38. _ONE_MINUTE = 60 * _ONE_SECOND
  39. _ONE_HOUR = 60 * _ONE_MINUTE
  40. _ONE_DAY = 24 * _ONE_HOUR
  41. # ---------------------------------------------------------------------
  42. # Offset names ("time rules") and related functions
  43. _offset_to_period_map = {
  44. "WEEKDAY": "D",
  45. "EOM": "M",
  46. "BM": "M",
  47. "BQS": "Q",
  48. "QS": "Q",
  49. "BQ": "Q",
  50. "BA": "A",
  51. "AS": "A",
  52. "BAS": "A",
  53. "MS": "M",
  54. "D": "D",
  55. "C": "C",
  56. "B": "B",
  57. "T": "T",
  58. "S": "S",
  59. "L": "L",
  60. "U": "U",
  61. "N": "N",
  62. "H": "H",
  63. "Q": "Q",
  64. "A": "A",
  65. "W": "W",
  66. "M": "M",
  67. "Y": "A",
  68. "BY": "A",
  69. "YS": "A",
  70. "BYS": "A",
  71. }
  72. _need_suffix = ["QS", "BQ", "BQS", "YS", "AS", "BY", "BA", "BYS", "BAS"]
  73. for _prefix in _need_suffix:
  74. for _m in MONTHS:
  75. key = f"{_prefix}-{_m}"
  76. _offset_to_period_map[key] = _offset_to_period_map[_prefix]
  77. for _prefix in ["A", "Q"]:
  78. for _m in MONTHS:
  79. _alias = f"{_prefix}-{_m}"
  80. _offset_to_period_map[_alias] = _alias
  81. for _d in DAYS:
  82. _offset_to_period_map[f"W-{_d}"] = f"W-{_d}"
  83. def get_period_alias(offset_str: str) -> str | None:
  84. """
  85. Alias to closest period strings BQ->Q etc.
  86. """
  87. return _offset_to_period_map.get(offset_str, None)
  88. def get_offset(name: str) -> DateOffset:
  89. """
  90. Return DateOffset object associated with rule name.
  91. .. deprecated:: 1.0.0
  92. Examples
  93. --------
  94. get_offset('EOM') --> BMonthEnd(1)
  95. """
  96. warnings.warn(
  97. "get_offset is deprecated and will be removed in a future version, "
  98. "use to_offset instead",
  99. FutureWarning,
  100. stacklevel=2,
  101. )
  102. return _get_offset(name)
  103. # ---------------------------------------------------------------------
  104. # Period codes
  105. def infer_freq(index, warn: bool = True) -> str | None:
  106. """
  107. Infer the most likely frequency given the input index. If the frequency is
  108. uncertain, a warning will be printed.
  109. Parameters
  110. ----------
  111. index : DatetimeIndex or TimedeltaIndex
  112. If passed a Series will use the values of the series (NOT THE INDEX).
  113. warn : bool, default True
  114. Returns
  115. -------
  116. str or None
  117. None if no discernible frequency.
  118. Raises
  119. ------
  120. TypeError
  121. If the index is not datetime-like.
  122. ValueError
  123. If there are fewer than three values.
  124. Examples
  125. --------
  126. >>> idx = pd.date_range(start='2020/12/01', end='2020/12/30', periods=30)
  127. >>> pd.infer_freq(idx)
  128. 'D'
  129. """
  130. import pandas as pd
  131. if isinstance(index, ABCSeries):
  132. values = index._values
  133. if not (
  134. is_datetime64_dtype(values)
  135. or is_timedelta64_dtype(values)
  136. or values.dtype == object
  137. ):
  138. raise TypeError(
  139. "cannot infer freq from a non-convertible dtype "
  140. f"on a Series of {index.dtype}"
  141. )
  142. index = values
  143. inferer: _FrequencyInferer
  144. if not hasattr(index, "dtype"):
  145. pass
  146. elif is_period_dtype(index.dtype):
  147. raise TypeError(
  148. "PeriodIndex given. Check the `freq` attribute "
  149. "instead of using infer_freq."
  150. )
  151. elif is_timedelta64_dtype(index.dtype):
  152. # Allow TimedeltaIndex and TimedeltaArray
  153. inferer = _TimedeltaFrequencyInferer(index, warn=warn)
  154. return inferer.get_freq()
  155. if isinstance(index, pd.Index) and not isinstance(index, pd.DatetimeIndex):
  156. if isinstance(index, (pd.Int64Index, pd.Float64Index)):
  157. raise TypeError(
  158. f"cannot infer freq from a non-convertible index type {type(index)}"
  159. )
  160. index = index._values
  161. if not isinstance(index, pd.DatetimeIndex):
  162. index = pd.DatetimeIndex(index)
  163. inferer = _FrequencyInferer(index, warn=warn)
  164. return inferer.get_freq()
  165. class _FrequencyInferer:
  166. """
  167. Not sure if I can avoid the state machine here
  168. """
  169. def __init__(self, index, warn: bool = True):
  170. self.index = index
  171. self.i8values = index.asi8
  172. # This moves the values, which are implicitly in UTC, to the
  173. # the timezone so they are in local time
  174. if hasattr(index, "tz"):
  175. if index.tz is not None:
  176. self.i8values = tzconversion.tz_convert_from_utc(
  177. self.i8values, index.tz
  178. )
  179. self.warn = warn
  180. if len(index) < 3:
  181. raise ValueError("Need at least 3 dates to infer frequency")
  182. self.is_monotonic = (
  183. self.index._is_monotonic_increasing or self.index._is_monotonic_decreasing
  184. )
  185. @cache_readonly
  186. def deltas(self):
  187. return unique_deltas(self.i8values)
  188. @cache_readonly
  189. def deltas_asi8(self):
  190. # NB: we cannot use self.i8values here because we may have converted
  191. # the tz in __init__
  192. return unique_deltas(self.index.asi8)
  193. @cache_readonly
  194. def is_unique(self) -> bool:
  195. return len(self.deltas) == 1
  196. @cache_readonly
  197. def is_unique_asi8(self) -> bool:
  198. return len(self.deltas_asi8) == 1
  199. def get_freq(self) -> str | None:
  200. """
  201. Find the appropriate frequency string to describe the inferred
  202. frequency of self.i8values
  203. Returns
  204. -------
  205. str or None
  206. """
  207. if not self.is_monotonic or not self.index._is_unique:
  208. return None
  209. delta = self.deltas[0]
  210. if delta and _is_multiple(delta, _ONE_DAY):
  211. return self._infer_daily_rule()
  212. # Business hourly, maybe. 17: one day / 65: one weekend
  213. if self.hour_deltas in ([1, 17], [1, 65], [1, 17, 65]):
  214. return "BH"
  215. # Possibly intraday frequency. Here we use the
  216. # original .asi8 values as the modified values
  217. # will not work around DST transitions. See #8772
  218. if not self.is_unique_asi8:
  219. return None
  220. delta = self.deltas_asi8[0]
  221. if _is_multiple(delta, _ONE_HOUR):
  222. # Hours
  223. return _maybe_add_count("H", delta / _ONE_HOUR)
  224. elif _is_multiple(delta, _ONE_MINUTE):
  225. # Minutes
  226. return _maybe_add_count("T", delta / _ONE_MINUTE)
  227. elif _is_multiple(delta, _ONE_SECOND):
  228. # Seconds
  229. return _maybe_add_count("S", delta / _ONE_SECOND)
  230. elif _is_multiple(delta, _ONE_MILLI):
  231. # Milliseconds
  232. return _maybe_add_count("L", delta / _ONE_MILLI)
  233. elif _is_multiple(delta, _ONE_MICRO):
  234. # Microseconds
  235. return _maybe_add_count("U", delta / _ONE_MICRO)
  236. else:
  237. # Nanoseconds
  238. return _maybe_add_count("N", delta)
  239. @cache_readonly
  240. def day_deltas(self):
  241. return [x / _ONE_DAY for x in self.deltas]
  242. @cache_readonly
  243. def hour_deltas(self):
  244. return [x / _ONE_HOUR for x in self.deltas]
  245. @cache_readonly
  246. def fields(self):
  247. return build_field_sarray(self.i8values)
  248. @cache_readonly
  249. def rep_stamp(self):
  250. return Timestamp(self.i8values[0])
  251. def month_position_check(self):
  252. return month_position_check(self.fields, self.index.dayofweek)
  253. @cache_readonly
  254. def mdiffs(self):
  255. nmonths = self.fields["Y"] * 12 + self.fields["M"]
  256. return unique_deltas(nmonths.astype("i8"))
  257. @cache_readonly
  258. def ydiffs(self):
  259. return unique_deltas(self.fields["Y"].astype("i8"))
  260. def _infer_daily_rule(self) -> str | None:
  261. annual_rule = self._get_annual_rule()
  262. if annual_rule:
  263. nyears = self.ydiffs[0]
  264. month = MONTH_ALIASES[self.rep_stamp.month]
  265. alias = f"{annual_rule}-{month}"
  266. return _maybe_add_count(alias, nyears)
  267. quarterly_rule = self._get_quarterly_rule()
  268. if quarterly_rule:
  269. nquarters = self.mdiffs[0] / 3
  270. mod_dict = {0: 12, 2: 11, 1: 10}
  271. month = MONTH_ALIASES[mod_dict[self.rep_stamp.month % 3]]
  272. alias = f"{quarterly_rule}-{month}"
  273. return _maybe_add_count(alias, nquarters)
  274. monthly_rule = self._get_monthly_rule()
  275. if monthly_rule:
  276. return _maybe_add_count(monthly_rule, self.mdiffs[0])
  277. if self.is_unique:
  278. return self._get_daily_rule()
  279. if self._is_business_daily():
  280. return "B"
  281. wom_rule = self._get_wom_rule()
  282. if wom_rule:
  283. return wom_rule
  284. return None
  285. def _get_daily_rule(self) -> str | None:
  286. days = self.deltas[0] / _ONE_DAY
  287. if days % 7 == 0:
  288. # Weekly
  289. wd = int_to_weekday[self.rep_stamp.weekday()]
  290. alias = f"W-{wd}"
  291. return _maybe_add_count(alias, days / 7)
  292. else:
  293. return _maybe_add_count("D", days)
  294. def _get_annual_rule(self) -> str | None:
  295. if len(self.ydiffs) > 1:
  296. return None
  297. if len(unique(self.fields["M"])) > 1:
  298. return None
  299. pos_check = self.month_position_check()
  300. return {"cs": "AS", "bs": "BAS", "ce": "A", "be": "BA"}.get(pos_check)
  301. def _get_quarterly_rule(self) -> str | None:
  302. if len(self.mdiffs) > 1:
  303. return None
  304. if not self.mdiffs[0] % 3 == 0:
  305. return None
  306. pos_check = self.month_position_check()
  307. return {"cs": "QS", "bs": "BQS", "ce": "Q", "be": "BQ"}.get(pos_check)
  308. def _get_monthly_rule(self) -> str | None:
  309. if len(self.mdiffs) > 1:
  310. return None
  311. pos_check = self.month_position_check()
  312. return {"cs": "MS", "bs": "BMS", "ce": "M", "be": "BM"}.get(pos_check)
  313. def _is_business_daily(self) -> bool:
  314. # quick check: cannot be business daily
  315. if self.day_deltas != [1, 3]:
  316. return False
  317. # probably business daily, but need to confirm
  318. first_weekday = self.index[0].weekday()
  319. shifts = np.diff(self.index.asi8)
  320. shifts = np.floor_divide(shifts, _ONE_DAY)
  321. weekdays = np.mod(first_weekday + np.cumsum(shifts), 7)
  322. # error: Incompatible return value type (got "bool_", expected "bool")
  323. return np.all( # type: ignore[return-value]
  324. ((weekdays == 0) & (shifts == 3))
  325. | ((weekdays > 0) & (weekdays <= 4) & (shifts == 1))
  326. )
  327. def _get_wom_rule(self) -> str | None:
  328. # FIXME: dont leave commented-out
  329. # wdiffs = unique(np.diff(self.index.week))
  330. # We also need -47, -49, -48 to catch index spanning year boundary
  331. # if not lib.ismember(wdiffs, set([4, 5, -47, -49, -48])).all():
  332. # return None
  333. weekdays = unique(self.index.weekday)
  334. if len(weekdays) > 1:
  335. return None
  336. week_of_months = unique((self.index.day - 1) // 7)
  337. # Only attempt to infer up to WOM-4. See #9425
  338. week_of_months = week_of_months[week_of_months < 4]
  339. if len(week_of_months) == 0 or len(week_of_months) > 1:
  340. return None
  341. # get which week
  342. week = week_of_months[0] + 1
  343. wd = int_to_weekday[weekdays[0]]
  344. return f"WOM-{week}{wd}"
  345. class _TimedeltaFrequencyInferer(_FrequencyInferer):
  346. def _infer_daily_rule(self):
  347. if self.is_unique:
  348. return self._get_daily_rule()
  349. def _is_multiple(us, mult: int) -> bool:
  350. return us % mult == 0
  351. def _maybe_add_count(base: str, count: float) -> str:
  352. if count != 1:
  353. assert count == int(count)
  354. count = int(count)
  355. return f"{count}{base}"
  356. else:
  357. return base
  358. # ----------------------------------------------------------------------
  359. # Frequency comparison
  360. def is_subperiod(source, target) -> bool:
  361. """
  362. Returns True if downsampling is possible between source and target
  363. frequencies
  364. Parameters
  365. ----------
  366. source : str or DateOffset
  367. Frequency converting from
  368. target : str or DateOffset
  369. Frequency converting to
  370. Returns
  371. -------
  372. bool
  373. """
  374. if target is None or source is None:
  375. return False
  376. source = _maybe_coerce_freq(source)
  377. target = _maybe_coerce_freq(target)
  378. if _is_annual(target):
  379. if _is_quarterly(source):
  380. return _quarter_months_conform(
  381. get_rule_month(source), get_rule_month(target)
  382. )
  383. return source in {"D", "C", "B", "M", "H", "T", "S", "L", "U", "N"}
  384. elif _is_quarterly(target):
  385. return source in {"D", "C", "B", "M", "H", "T", "S", "L", "U", "N"}
  386. elif _is_monthly(target):
  387. return source in {"D", "C", "B", "H", "T", "S", "L", "U", "N"}
  388. elif _is_weekly(target):
  389. return source in {target, "D", "C", "B", "H", "T", "S", "L", "U", "N"}
  390. elif target == "B":
  391. return source in {"B", "H", "T", "S", "L", "U", "N"}
  392. elif target == "C":
  393. return source in {"C", "H", "T", "S", "L", "U", "N"}
  394. elif target == "D":
  395. return source in {"D", "H", "T", "S", "L", "U", "N"}
  396. elif target == "H":
  397. return source in {"H", "T", "S", "L", "U", "N"}
  398. elif target == "T":
  399. return source in {"T", "S", "L", "U", "N"}
  400. elif target == "S":
  401. return source in {"S", "L", "U", "N"}
  402. elif target == "L":
  403. return source in {"L", "U", "N"}
  404. elif target == "U":
  405. return source in {"U", "N"}
  406. elif target == "N":
  407. return source in {"N"}
  408. else:
  409. return False
  410. def is_superperiod(source, target) -> bool:
  411. """
  412. Returns True if upsampling is possible between source and target
  413. frequencies
  414. Parameters
  415. ----------
  416. source : str or DateOffset
  417. Frequency converting from
  418. target : str or DateOffset
  419. Frequency converting to
  420. Returns
  421. -------
  422. bool
  423. """
  424. if target is None or source is None:
  425. return False
  426. source = _maybe_coerce_freq(source)
  427. target = _maybe_coerce_freq(target)
  428. if _is_annual(source):
  429. if _is_annual(target):
  430. return get_rule_month(source) == get_rule_month(target)
  431. if _is_quarterly(target):
  432. smonth = get_rule_month(source)
  433. tmonth = get_rule_month(target)
  434. return _quarter_months_conform(smonth, tmonth)
  435. return target in {"D", "C", "B", "M", "H", "T", "S", "L", "U", "N"}
  436. elif _is_quarterly(source):
  437. return target in {"D", "C", "B", "M", "H", "T", "S", "L", "U", "N"}
  438. elif _is_monthly(source):
  439. return target in {"D", "C", "B", "H", "T", "S", "L", "U", "N"}
  440. elif _is_weekly(source):
  441. return target in {source, "D", "C", "B", "H", "T", "S", "L", "U", "N"}
  442. elif source == "B":
  443. return target in {"D", "C", "B", "H", "T", "S", "L", "U", "N"}
  444. elif source == "C":
  445. return target in {"D", "C", "B", "H", "T", "S", "L", "U", "N"}
  446. elif source == "D":
  447. return target in {"D", "C", "B", "H", "T", "S", "L", "U", "N"}
  448. elif source == "H":
  449. return target in {"H", "T", "S", "L", "U", "N"}
  450. elif source == "T":
  451. return target in {"T", "S", "L", "U", "N"}
  452. elif source == "S":
  453. return target in {"S", "L", "U", "N"}
  454. elif source == "L":
  455. return target in {"L", "U", "N"}
  456. elif source == "U":
  457. return target in {"U", "N"}
  458. elif source == "N":
  459. return target in {"N"}
  460. else:
  461. return False
  462. def _maybe_coerce_freq(code) -> str:
  463. """we might need to coerce a code to a rule_code
  464. and uppercase it
  465. Parameters
  466. ----------
  467. source : str or DateOffset
  468. Frequency converting from
  469. Returns
  470. -------
  471. str
  472. """
  473. assert code is not None
  474. if isinstance(code, DateOffset):
  475. code = code.rule_code
  476. return code.upper()
  477. def _quarter_months_conform(source: str, target: str) -> bool:
  478. snum = MONTH_NUMBERS[source]
  479. tnum = MONTH_NUMBERS[target]
  480. return snum % 3 == tnum % 3
  481. def _is_annual(rule: str) -> bool:
  482. rule = rule.upper()
  483. return rule == "A" or rule.startswith("A-")
  484. def _is_quarterly(rule: str) -> bool:
  485. rule = rule.upper()
  486. return rule == "Q" or rule.startswith("Q-") or rule.startswith("BQ")
  487. def _is_monthly(rule: str) -> bool:
  488. rule = rule.upper()
  489. return rule == "M" or rule == "BM"
  490. def _is_weekly(rule: str) -> bool:
  491. rule = rule.upper()
  492. return rule == "W" or rule.startswith("W-")