frv.py 16 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512
  1. """
  2. Finite Discrete Random Variables Module
  3. See Also
  4. ========
  5. sympy.stats.frv_types
  6. sympy.stats.rv
  7. sympy.stats.crv
  8. """
  9. from itertools import product
  10. from sympy.concrete.summations import Sum
  11. from sympy.core.basic import Basic
  12. from sympy.core.cache import cacheit
  13. from sympy.core.function import Lambda
  14. from sympy.core.mul import Mul
  15. from sympy.core.numbers import (I, nan)
  16. from sympy.core.relational import Eq
  17. from sympy.core.singleton import S
  18. from sympy.core.symbol import (Dummy, Symbol)
  19. from sympy.core.sympify import sympify
  20. from sympy.functions.elementary.exponential import exp
  21. from sympy.functions.elementary.piecewise import Piecewise
  22. from sympy.logic.boolalg import (And, Or)
  23. from sympy.sets.sets import Intersection
  24. from sympy.core.containers import Dict
  25. from sympy.core.logic import Logic
  26. from sympy.core.relational import Relational
  27. from sympy.core.sympify import _sympify
  28. from sympy.sets.sets import FiniteSet
  29. from sympy.stats.rv import (RandomDomain, ProductDomain, ConditionalDomain,
  30. PSpace, IndependentProductPSpace, SinglePSpace, random_symbols,
  31. sumsets, rv_subs, NamedArgsMixin, Density, Distribution)
  32. class FiniteDensity(dict):
  33. """
  34. A domain with Finite Density.
  35. """
  36. def __call__(self, item):
  37. """
  38. Make instance of a class callable.
  39. If item belongs to current instance of a class, return it.
  40. Otherwise, return 0.
  41. """
  42. item = sympify(item)
  43. if item in self:
  44. return self[item]
  45. else:
  46. return 0
  47. @property
  48. def dict(self):
  49. """
  50. Return item as dictionary.
  51. """
  52. return dict(self)
  53. class FiniteDomain(RandomDomain):
  54. """
  55. A domain with discrete finite support
  56. Represented using a FiniteSet.
  57. """
  58. is_Finite = True
  59. @property
  60. def symbols(self):
  61. return FiniteSet(sym for sym, val in self.elements)
  62. @property
  63. def elements(self):
  64. return self.args[0]
  65. @property
  66. def dict(self):
  67. return FiniteSet(*[Dict(dict(el)) for el in self.elements])
  68. def __contains__(self, other):
  69. return other in self.elements
  70. def __iter__(self):
  71. return self.elements.__iter__()
  72. def as_boolean(self):
  73. return Or(*[And(*[Eq(sym, val) for sym, val in item]) for item in self])
  74. class SingleFiniteDomain(FiniteDomain):
  75. """
  76. A FiniteDomain over a single symbol/set
  77. Example: The possibilities of a *single* die roll.
  78. """
  79. def __new__(cls, symbol, set):
  80. if not isinstance(set, FiniteSet) and \
  81. not isinstance(set, Intersection):
  82. set = FiniteSet(*set)
  83. return Basic.__new__(cls, symbol, set)
  84. @property
  85. def symbol(self):
  86. return self.args[0]
  87. @property
  88. def symbols(self):
  89. return FiniteSet(self.symbol)
  90. @property
  91. def set(self):
  92. return self.args[1]
  93. @property
  94. def elements(self):
  95. return FiniteSet(*[frozenset(((self.symbol, elem), )) for elem in self.set])
  96. def __iter__(self):
  97. return (frozenset(((self.symbol, elem),)) for elem in self.set)
  98. def __contains__(self, other):
  99. sym, val = tuple(other)[0]
  100. return sym == self.symbol and val in self.set
  101. class ProductFiniteDomain(ProductDomain, FiniteDomain):
  102. """
  103. A Finite domain consisting of several other FiniteDomains
  104. Example: The possibilities of the rolls of three independent dice
  105. """
  106. def __iter__(self):
  107. proditer = product(*self.domains)
  108. return (sumsets(items) for items in proditer)
  109. @property
  110. def elements(self):
  111. return FiniteSet(*self)
  112. class ConditionalFiniteDomain(ConditionalDomain, ProductFiniteDomain):
  113. """
  114. A FiniteDomain that has been restricted by a condition
  115. Example: The possibilities of a die roll under the condition that the
  116. roll is even.
  117. """
  118. def __new__(cls, domain, condition):
  119. """
  120. Create a new instance of ConditionalFiniteDomain class
  121. """
  122. if condition is True:
  123. return domain
  124. cond = rv_subs(condition)
  125. return Basic.__new__(cls, domain, cond)
  126. def _test(self, elem):
  127. """
  128. Test the value. If value is boolean, return it. If value is equality
  129. relational (two objects are equal), return it with left-hand side
  130. being equal to right-hand side. Otherwise, raise ValueError exception.
  131. """
  132. val = self.condition.xreplace(dict(elem))
  133. if val in [True, False]:
  134. return val
  135. elif val.is_Equality:
  136. return val.lhs == val.rhs
  137. raise ValueError("Undecidable if %s" % str(val))
  138. def __contains__(self, other):
  139. return other in self.fulldomain and self._test(other)
  140. def __iter__(self):
  141. return (elem for elem in self.fulldomain if self._test(elem))
  142. @property
  143. def set(self):
  144. if isinstance(self.fulldomain, SingleFiniteDomain):
  145. return FiniteSet(*[elem for elem in self.fulldomain.set
  146. if frozenset(((self.fulldomain.symbol, elem),)) in self])
  147. else:
  148. raise NotImplementedError(
  149. "Not implemented on multi-dimensional conditional domain")
  150. def as_boolean(self):
  151. return FiniteDomain.as_boolean(self)
  152. class SingleFiniteDistribution(Distribution, NamedArgsMixin):
  153. def __new__(cls, *args):
  154. args = list(map(sympify, args))
  155. return Basic.__new__(cls, *args)
  156. @staticmethod
  157. def check(*args):
  158. pass
  159. @property # type: ignore
  160. @cacheit
  161. def dict(self):
  162. if self.is_symbolic:
  163. return Density(self)
  164. return {k: self.pmf(k) for k in self.set}
  165. def pmf(self, *args): # to be overridden by specific distribution
  166. raise NotImplementedError()
  167. @property
  168. def set(self): # to be overridden by specific distribution
  169. raise NotImplementedError()
  170. values = property(lambda self: self.dict.values)
  171. items = property(lambda self: self.dict.items)
  172. is_symbolic = property(lambda self: False)
  173. __iter__ = property(lambda self: self.dict.__iter__)
  174. __getitem__ = property(lambda self: self.dict.__getitem__)
  175. def __call__(self, *args):
  176. return self.pmf(*args)
  177. def __contains__(self, other):
  178. return other in self.set
  179. #=============================================
  180. #========= Probability Space ===============
  181. #=============================================
  182. class FinitePSpace(PSpace):
  183. """
  184. A Finite Probability Space
  185. Represents the probabilities of a finite number of events.
  186. """
  187. is_Finite = True
  188. def __new__(cls, domain, density):
  189. density = {sympify(key): sympify(val)
  190. for key, val in density.items()}
  191. public_density = Dict(density)
  192. obj = PSpace.__new__(cls, domain, public_density)
  193. obj._density = density
  194. return obj
  195. def prob_of(self, elem):
  196. elem = sympify(elem)
  197. density = self._density
  198. if isinstance(list(density.keys())[0], FiniteSet):
  199. return density.get(elem, S.Zero)
  200. return density.get(tuple(elem)[0][1], S.Zero)
  201. def where(self, condition):
  202. assert all(r.symbol in self.symbols for r in random_symbols(condition))
  203. return ConditionalFiniteDomain(self.domain, condition)
  204. def compute_density(self, expr):
  205. expr = rv_subs(expr, self.values)
  206. d = FiniteDensity()
  207. for elem in self.domain:
  208. val = expr.xreplace(dict(elem))
  209. prob = self.prob_of(elem)
  210. d[val] = d.get(val, S.Zero) + prob
  211. return d
  212. @cacheit
  213. def compute_cdf(self, expr):
  214. d = self.compute_density(expr)
  215. cum_prob = S.Zero
  216. cdf = []
  217. for key in sorted(d):
  218. prob = d[key]
  219. cum_prob += prob
  220. cdf.append((key, cum_prob))
  221. return dict(cdf)
  222. @cacheit
  223. def sorted_cdf(self, expr, python_float=False):
  224. cdf = self.compute_cdf(expr)
  225. items = list(cdf.items())
  226. sorted_items = sorted(items, key=lambda val_cumprob: val_cumprob[1])
  227. if python_float:
  228. sorted_items = [(v, float(cum_prob))
  229. for v, cum_prob in sorted_items]
  230. return sorted_items
  231. @cacheit
  232. def compute_characteristic_function(self, expr):
  233. d = self.compute_density(expr)
  234. t = Dummy('t', real=True)
  235. return Lambda(t, sum(exp(I*k*t)*v for k,v in d.items()))
  236. @cacheit
  237. def compute_moment_generating_function(self, expr):
  238. d = self.compute_density(expr)
  239. t = Dummy('t', real=True)
  240. return Lambda(t, sum(exp(k*t)*v for k,v in d.items()))
  241. def compute_expectation(self, expr, rvs=None, **kwargs):
  242. rvs = rvs or self.values
  243. expr = rv_subs(expr, rvs)
  244. probs = [self.prob_of(elem) for elem in self.domain]
  245. if isinstance(expr, (Logic, Relational)):
  246. parse_domain = [tuple(elem)[0][1] for elem in self.domain]
  247. bools = [expr.xreplace(dict(elem)) for elem in self.domain]
  248. else:
  249. parse_domain = [expr.xreplace(dict(elem)) for elem in self.domain]
  250. bools = [True for elem in self.domain]
  251. return sum([Piecewise((prob * elem, blv), (S.Zero, True))
  252. for prob, elem, blv in zip(probs, parse_domain, bools)])
  253. def compute_quantile(self, expr):
  254. cdf = self.compute_cdf(expr)
  255. p = Dummy('p', real=True)
  256. set = ((nan, (p < 0) | (p > 1)),)
  257. for key, value in cdf.items():
  258. set = set + ((key, p <= value), )
  259. return Lambda(p, Piecewise(*set))
  260. def probability(self, condition):
  261. cond_symbols = frozenset(rs.symbol for rs in random_symbols(condition))
  262. cond = rv_subs(condition)
  263. if not cond_symbols.issubset(self.symbols):
  264. raise ValueError("Cannot compare foreign random symbols, %s"
  265. %(str(cond_symbols - self.symbols)))
  266. if isinstance(condition, Relational) and \
  267. (not cond.free_symbols.issubset(self.domain.free_symbols)):
  268. rv = condition.lhs if isinstance(condition.rhs, Symbol) else condition.rhs
  269. return sum(Piecewise(
  270. (self.prob_of(elem), condition.subs(rv, list(elem)[0][1])),
  271. (S.Zero, True)) for elem in self.domain)
  272. return sympify(sum(self.prob_of(elem) for elem in self.where(condition)))
  273. def conditional_space(self, condition):
  274. domain = self.where(condition)
  275. prob = self.probability(condition)
  276. density = {key: val / prob
  277. for key, val in self._density.items() if domain._test(key)}
  278. return FinitePSpace(domain, density)
  279. def sample(self, size=(), library='scipy', seed=None):
  280. """
  281. Internal sample method
  282. Returns dictionary mapping RandomSymbol to realization value.
  283. """
  284. return {self.value: self.distribution.sample(size, library, seed)}
  285. class SingleFinitePSpace(SinglePSpace, FinitePSpace):
  286. """
  287. A single finite probability space
  288. Represents the probabilities of a set of random events that can be
  289. attributed to a single variable/symbol.
  290. This class is implemented by many of the standard FiniteRV types such as
  291. Die, Bernoulli, Coin, etc....
  292. """
  293. @property
  294. def domain(self):
  295. return SingleFiniteDomain(self.symbol, self.distribution.set)
  296. @property
  297. def _is_symbolic(self):
  298. """
  299. Helper property to check if the distribution
  300. of the random variable is having symbolic
  301. dimension.
  302. """
  303. return self.distribution.is_symbolic
  304. @property
  305. def distribution(self):
  306. return self.args[1]
  307. def pmf(self, expr):
  308. return self.distribution.pmf(expr)
  309. @property # type: ignore
  310. @cacheit
  311. def _density(self):
  312. return {FiniteSet((self.symbol, val)): prob
  313. for val, prob in self.distribution.dict.items()}
  314. @cacheit
  315. def compute_characteristic_function(self, expr):
  316. if self._is_symbolic:
  317. d = self.compute_density(expr)
  318. t = Dummy('t', real=True)
  319. ki = Dummy('ki')
  320. return Lambda(t, Sum(d(ki)*exp(I*ki*t), (ki, self.args[1].low, self.args[1].high)))
  321. expr = rv_subs(expr, self.values)
  322. return FinitePSpace(self.domain, self.distribution).compute_characteristic_function(expr)
  323. @cacheit
  324. def compute_moment_generating_function(self, expr):
  325. if self._is_symbolic:
  326. d = self.compute_density(expr)
  327. t = Dummy('t', real=True)
  328. ki = Dummy('ki')
  329. return Lambda(t, Sum(d(ki)*exp(ki*t), (ki, self.args[1].low, self.args[1].high)))
  330. expr = rv_subs(expr, self.values)
  331. return FinitePSpace(self.domain, self.distribution).compute_moment_generating_function(expr)
  332. def compute_quantile(self, expr):
  333. if self._is_symbolic:
  334. raise NotImplementedError("Computing quantile for random variables "
  335. "with symbolic dimension because the bounds of searching the required "
  336. "value is undetermined.")
  337. expr = rv_subs(expr, self.values)
  338. return FinitePSpace(self.domain, self.distribution).compute_quantile(expr)
  339. def compute_density(self, expr):
  340. if self._is_symbolic:
  341. rv = list(random_symbols(expr))[0]
  342. k = Dummy('k', integer=True)
  343. cond = True if not isinstance(expr, (Relational, Logic)) \
  344. else expr.subs(rv, k)
  345. return Lambda(k,
  346. Piecewise((self.pmf(k), And(k >= self.args[1].low,
  347. k <= self.args[1].high, cond)), (S.Zero, True)))
  348. expr = rv_subs(expr, self.values)
  349. return FinitePSpace(self.domain, self.distribution).compute_density(expr)
  350. def compute_cdf(self, expr):
  351. if self._is_symbolic:
  352. d = self.compute_density(expr)
  353. k = Dummy('k')
  354. ki = Dummy('ki')
  355. return Lambda(k, Sum(d(ki), (ki, self.args[1].low, k)))
  356. expr = rv_subs(expr, self.values)
  357. return FinitePSpace(self.domain, self.distribution).compute_cdf(expr)
  358. def compute_expectation(self, expr, rvs=None, **kwargs):
  359. if self._is_symbolic:
  360. rv = random_symbols(expr)[0]
  361. k = Dummy('k', integer=True)
  362. expr = expr.subs(rv, k)
  363. cond = True if not isinstance(expr, (Relational, Logic)) \
  364. else expr
  365. func = self.pmf(k) * k if cond != True else self.pmf(k) * expr
  366. return Sum(Piecewise((func, cond), (S.Zero, True)),
  367. (k, self.distribution.low, self.distribution.high)).doit()
  368. expr = _sympify(expr)
  369. expr = rv_subs(expr, rvs)
  370. return FinitePSpace(self.domain, self.distribution).compute_expectation(expr, rvs, **kwargs)
  371. def probability(self, condition):
  372. if self._is_symbolic:
  373. #TODO: Implement the mechanism for handling queries for symbolic sized distributions.
  374. raise NotImplementedError("Currently, probability queries are not "
  375. "supported for random variables with symbolic sized distributions.")
  376. condition = rv_subs(condition)
  377. return FinitePSpace(self.domain, self.distribution).probability(condition)
  378. def conditional_space(self, condition):
  379. """
  380. This method is used for transferring the
  381. computation to probability method because
  382. conditional space of random variables with
  383. symbolic dimensions is currently not possible.
  384. """
  385. if self._is_symbolic:
  386. self
  387. domain = self.where(condition)
  388. prob = self.probability(condition)
  389. density = {key: val / prob
  390. for key, val in self._density.items() if domain._test(key)}
  391. return FinitePSpace(domain, density)
  392. class ProductFinitePSpace(IndependentProductPSpace, FinitePSpace):
  393. """
  394. A collection of several independent finite probability spaces
  395. """
  396. @property
  397. def domain(self):
  398. return ProductFiniteDomain(*[space.domain for space in self.spaces])
  399. @property # type: ignore
  400. @cacheit
  401. def _density(self):
  402. proditer = product(*[iter(space._density.items())
  403. for space in self.spaces])
  404. d = {}
  405. for items in proditer:
  406. elems, probs = list(zip(*items))
  407. elem = sumsets(elems)
  408. prob = Mul(*probs)
  409. d[elem] = d.get(elem, S.Zero) + prob
  410. return Dict(d)
  411. @property # type: ignore
  412. @cacheit
  413. def density(self):
  414. return Dict(self._density)
  415. def probability(self, condition):
  416. return FinitePSpace.probability(self, condition)
  417. def compute_density(self, expr):
  418. return FinitePSpace.compute_density(self, expr)