holiday.py 16 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574
  1. from __future__ import annotations
  2. from datetime import (
  3. datetime,
  4. timedelta,
  5. )
  6. import warnings
  7. from dateutil.relativedelta import ( # noqa
  8. FR,
  9. MO,
  10. SA,
  11. SU,
  12. TH,
  13. TU,
  14. WE,
  15. )
  16. import numpy as np
  17. from pandas.errors import PerformanceWarning
  18. from pandas import (
  19. DateOffset,
  20. DatetimeIndex,
  21. Series,
  22. Timestamp,
  23. concat,
  24. date_range,
  25. )
  26. from pandas.tseries.offsets import (
  27. Day,
  28. Easter,
  29. )
  30. def next_monday(dt: datetime) -> datetime:
  31. """
  32. If holiday falls on Saturday, use following Monday instead;
  33. if holiday falls on Sunday, use Monday instead
  34. """
  35. if dt.weekday() == 5:
  36. return dt + timedelta(2)
  37. elif dt.weekday() == 6:
  38. return dt + timedelta(1)
  39. return dt
  40. def next_monday_or_tuesday(dt: datetime) -> datetime:
  41. """
  42. For second holiday of two adjacent ones!
  43. If holiday falls on Saturday, use following Monday instead;
  44. if holiday falls on Sunday or Monday, use following Tuesday instead
  45. (because Monday is already taken by adjacent holiday on the day before)
  46. """
  47. dow = dt.weekday()
  48. if dow == 5 or dow == 6:
  49. return dt + timedelta(2)
  50. elif dow == 0:
  51. return dt + timedelta(1)
  52. return dt
  53. def previous_friday(dt: datetime) -> datetime:
  54. """
  55. If holiday falls on Saturday or Sunday, use previous Friday instead.
  56. """
  57. if dt.weekday() == 5:
  58. return dt - timedelta(1)
  59. elif dt.weekday() == 6:
  60. return dt - timedelta(2)
  61. return dt
  62. def sunday_to_monday(dt: datetime) -> datetime:
  63. """
  64. If holiday falls on Sunday, use day thereafter (Monday) instead.
  65. """
  66. if dt.weekday() == 6:
  67. return dt + timedelta(1)
  68. return dt
  69. def weekend_to_monday(dt: datetime) -> datetime:
  70. """
  71. If holiday falls on Sunday or Saturday,
  72. use day thereafter (Monday) instead.
  73. Needed for holidays such as Christmas observation in Europe
  74. """
  75. if dt.weekday() == 6:
  76. return dt + timedelta(1)
  77. elif dt.weekday() == 5:
  78. return dt + timedelta(2)
  79. return dt
  80. def nearest_workday(dt: datetime) -> datetime:
  81. """
  82. If holiday falls on Saturday, use day before (Friday) instead;
  83. if holiday falls on Sunday, use day thereafter (Monday) instead.
  84. """
  85. if dt.weekday() == 5:
  86. return dt - timedelta(1)
  87. elif dt.weekday() == 6:
  88. return dt + timedelta(1)
  89. return dt
  90. def next_workday(dt: datetime) -> datetime:
  91. """
  92. returns next weekday used for observances
  93. """
  94. dt += timedelta(days=1)
  95. while dt.weekday() > 4:
  96. # Mon-Fri are 0-4
  97. dt += timedelta(days=1)
  98. return dt
  99. def previous_workday(dt: datetime) -> datetime:
  100. """
  101. returns previous weekday used for observances
  102. """
  103. dt -= timedelta(days=1)
  104. while dt.weekday() > 4:
  105. # Mon-Fri are 0-4
  106. dt -= timedelta(days=1)
  107. return dt
  108. def before_nearest_workday(dt: datetime) -> datetime:
  109. """
  110. returns previous workday after nearest workday
  111. """
  112. return previous_workday(nearest_workday(dt))
  113. def after_nearest_workday(dt: datetime) -> datetime:
  114. """
  115. returns next workday after nearest workday
  116. needed for Boxing day or multiple holidays in a series
  117. """
  118. return next_workday(nearest_workday(dt))
  119. class Holiday:
  120. """
  121. Class that defines a holiday with start/end dates and rules
  122. for observance.
  123. """
  124. def __init__(
  125. self,
  126. name,
  127. year=None,
  128. month=None,
  129. day=None,
  130. offset=None,
  131. observance=None,
  132. start_date=None,
  133. end_date=None,
  134. days_of_week=None,
  135. ):
  136. """
  137. Parameters
  138. ----------
  139. name : str
  140. Name of the holiday , defaults to class name
  141. offset : array of pandas.tseries.offsets or
  142. class from pandas.tseries.offsets
  143. computes offset from date
  144. observance: function
  145. computes when holiday is given a pandas Timestamp
  146. days_of_week:
  147. provide a tuple of days e.g (0,1,2,3,) for Monday Through Thursday
  148. Monday=0,..,Sunday=6
  149. Examples
  150. --------
  151. >>> from pandas.tseries.holiday import Holiday, nearest_workday
  152. >>> from dateutil.relativedelta import MO
  153. >>> USMemorialDay = Holiday(
  154. ... "Memorial Day", month=5, day=31, offset=pd.DateOffset(weekday=MO(-1))
  155. ... )
  156. >>> USMemorialDay
  157. Holiday: Memorial Day (month=5, day=31, offset=<DateOffset: weekday=MO(-1)>)
  158. >>> USLaborDay = Holiday(
  159. ... "Labor Day", month=9, day=1, offset=pd.DateOffset(weekday=MO(1))
  160. ... )
  161. >>> USLaborDay
  162. Holiday: Labor Day (month=9, day=1, offset=<DateOffset: weekday=MO(+1)>)
  163. >>> July3rd = Holiday("July 3rd", month=7, day=3)
  164. >>> July3rd
  165. Holiday: July 3rd (month=7, day=3, )
  166. >>> NewYears = Holiday(
  167. ... "New Years Day", month=1, day=1, observance=nearest_workday
  168. ... )
  169. >>> NewYears # doctest: +SKIP
  170. Holiday: New Years Day (
  171. month=1, day=1, observance=<function nearest_workday at 0x66545e9bc440>
  172. )
  173. >>> July3rd = Holiday("July 3rd", month=7, day=3, days_of_week=(0, 1, 2, 3))
  174. >>> July3rd
  175. Holiday: July 3rd (month=7, day=3, )
  176. """
  177. if offset is not None and observance is not None:
  178. raise NotImplementedError("Cannot use both offset and observance.")
  179. self.name = name
  180. self.year = year
  181. self.month = month
  182. self.day = day
  183. self.offset = offset
  184. self.start_date = (
  185. Timestamp(start_date) if start_date is not None else start_date
  186. )
  187. self.end_date = Timestamp(end_date) if end_date is not None else end_date
  188. self.observance = observance
  189. assert days_of_week is None or type(days_of_week) == tuple
  190. self.days_of_week = days_of_week
  191. def __repr__(self) -> str:
  192. info = ""
  193. if self.year is not None:
  194. info += f"year={self.year}, "
  195. info += f"month={self.month}, day={self.day}, "
  196. if self.offset is not None:
  197. info += f"offset={self.offset}"
  198. if self.observance is not None:
  199. info += f"observance={self.observance}"
  200. repr = f"Holiday: {self.name} ({info})"
  201. return repr
  202. def dates(self, start_date, end_date, return_name=False):
  203. """
  204. Calculate holidays observed between start date and end date
  205. Parameters
  206. ----------
  207. start_date : starting date, datetime-like, optional
  208. end_date : ending date, datetime-like, optional
  209. return_name : bool, optional, default=False
  210. If True, return a series that has dates and holiday names.
  211. False will only return dates.
  212. """
  213. start_date = Timestamp(start_date)
  214. end_date = Timestamp(end_date)
  215. filter_start_date = start_date
  216. filter_end_date = end_date
  217. if self.year is not None:
  218. dt = Timestamp(datetime(self.year, self.month, self.day))
  219. if return_name:
  220. return Series(self.name, index=[dt])
  221. else:
  222. return [dt]
  223. dates = self._reference_dates(start_date, end_date)
  224. holiday_dates = self._apply_rule(dates)
  225. if self.days_of_week is not None:
  226. holiday_dates = holiday_dates[
  227. np.in1d(holiday_dates.dayofweek, self.days_of_week)
  228. ]
  229. if self.start_date is not None:
  230. filter_start_date = max(
  231. self.start_date.tz_localize(filter_start_date.tz), filter_start_date
  232. )
  233. if self.end_date is not None:
  234. filter_end_date = min(
  235. self.end_date.tz_localize(filter_end_date.tz), filter_end_date
  236. )
  237. holiday_dates = holiday_dates[
  238. (holiday_dates >= filter_start_date) & (holiday_dates <= filter_end_date)
  239. ]
  240. if return_name:
  241. return Series(self.name, index=holiday_dates)
  242. return holiday_dates
  243. def _reference_dates(self, start_date, end_date):
  244. """
  245. Get reference dates for the holiday.
  246. Return reference dates for the holiday also returning the year
  247. prior to the start_date and year following the end_date. This ensures
  248. that any offsets to be applied will yield the holidays within
  249. the passed in dates.
  250. """
  251. if self.start_date is not None:
  252. start_date = self.start_date.tz_localize(start_date.tz)
  253. if self.end_date is not None:
  254. end_date = self.end_date.tz_localize(start_date.tz)
  255. year_offset = DateOffset(years=1)
  256. reference_start_date = Timestamp(
  257. datetime(start_date.year - 1, self.month, self.day)
  258. )
  259. reference_end_date = Timestamp(
  260. datetime(end_date.year + 1, self.month, self.day)
  261. )
  262. # Don't process unnecessary holidays
  263. dates = date_range(
  264. start=reference_start_date,
  265. end=reference_end_date,
  266. freq=year_offset,
  267. tz=start_date.tz,
  268. )
  269. return dates
  270. def _apply_rule(self, dates):
  271. """
  272. Apply the given offset/observance to a DatetimeIndex of dates.
  273. Parameters
  274. ----------
  275. dates : DatetimeIndex
  276. Dates to apply the given offset/observance rule
  277. Returns
  278. -------
  279. Dates with rules applied
  280. """
  281. if self.observance is not None:
  282. return dates.map(lambda d: self.observance(d))
  283. if self.offset is not None:
  284. if not isinstance(self.offset, list):
  285. offsets = [self.offset]
  286. else:
  287. offsets = self.offset
  288. for offset in offsets:
  289. # if we are adding a non-vectorized value
  290. # ignore the PerformanceWarnings:
  291. with warnings.catch_warnings():
  292. warnings.simplefilter("ignore", PerformanceWarning)
  293. dates += offset
  294. return dates
  295. holiday_calendars = {}
  296. def register(cls):
  297. try:
  298. name = cls.name
  299. except AttributeError:
  300. name = cls.__name__
  301. holiday_calendars[name] = cls
  302. def get_calendar(name):
  303. """
  304. Return an instance of a calendar based on its name.
  305. Parameters
  306. ----------
  307. name : str
  308. Calendar name to return an instance of
  309. """
  310. return holiday_calendars[name]()
  311. class HolidayCalendarMetaClass(type):
  312. def __new__(cls, clsname, bases, attrs):
  313. calendar_class = super().__new__(cls, clsname, bases, attrs)
  314. register(calendar_class)
  315. return calendar_class
  316. class AbstractHolidayCalendar(metaclass=HolidayCalendarMetaClass):
  317. """
  318. Abstract interface to create holidays following certain rules.
  319. """
  320. rules: list[Holiday] = []
  321. start_date = Timestamp(datetime(1970, 1, 1))
  322. end_date = Timestamp(datetime(2200, 12, 31))
  323. _cache = None
  324. def __init__(self, name=None, rules=None):
  325. """
  326. Initializes holiday object with a given set a rules. Normally
  327. classes just have the rules defined within them.
  328. Parameters
  329. ----------
  330. name : str
  331. Name of the holiday calendar, defaults to class name
  332. rules : array of Holiday objects
  333. A set of rules used to create the holidays.
  334. """
  335. super().__init__()
  336. if name is None:
  337. name = type(self).__name__
  338. self.name = name
  339. if rules is not None:
  340. self.rules = rules
  341. def rule_from_name(self, name):
  342. for rule in self.rules:
  343. if rule.name == name:
  344. return rule
  345. return None
  346. def holidays(self, start=None, end=None, return_name=False):
  347. """
  348. Returns a curve with holidays between start_date and end_date
  349. Parameters
  350. ----------
  351. start : starting date, datetime-like, optional
  352. end : ending date, datetime-like, optional
  353. return_name : bool, optional
  354. If True, return a series that has dates and holiday names.
  355. False will only return a DatetimeIndex of dates.
  356. Returns
  357. -------
  358. DatetimeIndex of holidays
  359. """
  360. if self.rules is None:
  361. raise Exception(
  362. f"Holiday Calendar {self.name} does not have any rules specified"
  363. )
  364. if start is None:
  365. start = AbstractHolidayCalendar.start_date
  366. if end is None:
  367. end = AbstractHolidayCalendar.end_date
  368. start = Timestamp(start)
  369. end = Timestamp(end)
  370. # If we don't have a cache or the dates are outside the prior cache, we
  371. # get them again
  372. if self._cache is None or start < self._cache[0] or end > self._cache[1]:
  373. pre_holidays = [
  374. rule.dates(start, end, return_name=True) for rule in self.rules
  375. ]
  376. if pre_holidays:
  377. holidays = concat(pre_holidays)
  378. else:
  379. holidays = Series(index=DatetimeIndex([]), dtype=object)
  380. self._cache = (start, end, holidays.sort_index())
  381. holidays = self._cache[2]
  382. holidays = holidays[start:end]
  383. if return_name:
  384. return holidays
  385. else:
  386. return holidays.index
  387. @staticmethod
  388. def merge_class(base, other):
  389. """
  390. Merge holiday calendars together. The base calendar
  391. will take precedence to other. The merge will be done
  392. based on each holiday's name.
  393. Parameters
  394. ----------
  395. base : AbstractHolidayCalendar
  396. instance/subclass or array of Holiday objects
  397. other : AbstractHolidayCalendar
  398. instance/subclass or array of Holiday objects
  399. """
  400. try:
  401. other = other.rules
  402. except AttributeError:
  403. pass
  404. if not isinstance(other, list):
  405. other = [other]
  406. other_holidays = {holiday.name: holiday for holiday in other}
  407. try:
  408. base = base.rules
  409. except AttributeError:
  410. pass
  411. if not isinstance(base, list):
  412. base = [base]
  413. base_holidays = {holiday.name: holiday for holiday in base}
  414. other_holidays.update(base_holidays)
  415. return list(other_holidays.values())
  416. def merge(self, other, inplace=False):
  417. """
  418. Merge holiday calendars together. The caller's class
  419. rules take precedence. The merge will be done
  420. based on each holiday's name.
  421. Parameters
  422. ----------
  423. other : holiday calendar
  424. inplace : bool (default=False)
  425. If True set rule_table to holidays, else return array of Holidays
  426. """
  427. holidays = self.merge_class(self, other)
  428. if inplace:
  429. self.rules = holidays
  430. else:
  431. return holidays
  432. USMemorialDay = Holiday(
  433. "Memorial Day", month=5, day=31, offset=DateOffset(weekday=MO(-1))
  434. )
  435. USLaborDay = Holiday("Labor Day", month=9, day=1, offset=DateOffset(weekday=MO(1)))
  436. USColumbusDay = Holiday(
  437. "Columbus Day", month=10, day=1, offset=DateOffset(weekday=MO(2))
  438. )
  439. USThanksgivingDay = Holiday(
  440. "Thanksgiving", month=11, day=1, offset=DateOffset(weekday=TH(4))
  441. )
  442. USMartinLutherKingJr = Holiday(
  443. "Martin Luther King Jr. Day",
  444. start_date=datetime(1986, 1, 1),
  445. month=1,
  446. day=1,
  447. offset=DateOffset(weekday=MO(3)),
  448. )
  449. USPresidentsDay = Holiday(
  450. "Presidents Day", month=2, day=1, offset=DateOffset(weekday=MO(3))
  451. )
  452. GoodFriday = Holiday("Good Friday", month=1, day=1, offset=[Easter(), Day(-2)])
  453. EasterMonday = Holiday("Easter Monday", month=1, day=1, offset=[Easter(), Day(1)])
  454. class USFederalHolidayCalendar(AbstractHolidayCalendar):
  455. """
  456. US Federal Government Holiday Calendar based on rules specified by:
  457. https://www.opm.gov/policy-data-oversight/
  458. snow-dismissal-procedures/federal-holidays/
  459. """
  460. rules = [
  461. Holiday("New Years Day", month=1, day=1, observance=nearest_workday),
  462. USMartinLutherKingJr,
  463. USPresidentsDay,
  464. USMemorialDay,
  465. Holiday("July 4th", month=7, day=4, observance=nearest_workday),
  466. USLaborDay,
  467. USColumbusDay,
  468. Holiday("Veterans Day", month=11, day=11, observance=nearest_workday),
  469. USThanksgivingDay,
  470. Holiday("Christmas", month=12, day=25, observance=nearest_workday),
  471. ]
  472. def HolidayCalendarFactory(name, base, other, base_class=AbstractHolidayCalendar):
  473. rules = AbstractHolidayCalendar.merge_class(base, other)
  474. calendar_class = type(name, (base_class,), {"rules": rules, "name": name})
  475. return calendar_class