_misc.py 16 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553
  1. from contextlib import contextmanager
  2. from pandas.plotting._core import _get_plot_backend
  3. def table(ax, data, rowLabels=None, colLabels=None, **kwargs):
  4. """
  5. Helper function to convert DataFrame and Series to matplotlib.table.
  6. Parameters
  7. ----------
  8. ax : Matplotlib axes object
  9. data : DataFrame or Series
  10. Data for table contents.
  11. **kwargs
  12. Keyword arguments to be passed to matplotlib.table.table.
  13. If `rowLabels` or `colLabels` is not specified, data index or column
  14. name will be used.
  15. Returns
  16. -------
  17. matplotlib table object
  18. """
  19. plot_backend = _get_plot_backend("matplotlib")
  20. return plot_backend.table(
  21. ax=ax, data=data, rowLabels=None, colLabels=None, **kwargs
  22. )
  23. def register():
  24. """
  25. Register pandas formatters and converters with matplotlib.
  26. This function modifies the global ``matplotlib.units.registry``
  27. dictionary. pandas adds custom converters for
  28. * pd.Timestamp
  29. * pd.Period
  30. * np.datetime64
  31. * datetime.datetime
  32. * datetime.date
  33. * datetime.time
  34. See Also
  35. --------
  36. deregister_matplotlib_converters : Remove pandas formatters and converters.
  37. """
  38. plot_backend = _get_plot_backend("matplotlib")
  39. plot_backend.register()
  40. def deregister():
  41. """
  42. Remove pandas formatters and converters.
  43. Removes the custom converters added by :func:`register`. This
  44. attempts to set the state of the registry back to the state before
  45. pandas registered its own units. Converters for pandas' own types like
  46. Timestamp and Period are removed completely. Converters for types
  47. pandas overwrites, like ``datetime.datetime``, are restored to their
  48. original value.
  49. See Also
  50. --------
  51. register_matplotlib_converters : Register pandas formatters and converters
  52. with matplotlib.
  53. """
  54. plot_backend = _get_plot_backend("matplotlib")
  55. plot_backend.deregister()
  56. def scatter_matrix(
  57. frame,
  58. alpha=0.5,
  59. figsize=None,
  60. ax=None,
  61. grid=False,
  62. diagonal="hist",
  63. marker=".",
  64. density_kwds=None,
  65. hist_kwds=None,
  66. range_padding=0.05,
  67. **kwargs,
  68. ):
  69. """
  70. Draw a matrix of scatter plots.
  71. Parameters
  72. ----------
  73. frame : DataFrame
  74. alpha : float, optional
  75. Amount of transparency applied.
  76. figsize : (float,float), optional
  77. A tuple (width, height) in inches.
  78. ax : Matplotlib axis object, optional
  79. grid : bool, optional
  80. Setting this to True will show the grid.
  81. diagonal : {'hist', 'kde'}
  82. Pick between 'kde' and 'hist' for either Kernel Density Estimation or
  83. Histogram plot in the diagonal.
  84. marker : str, optional
  85. Matplotlib marker type, default '.'.
  86. density_kwds : keywords
  87. Keyword arguments to be passed to kernel density estimate plot.
  88. hist_kwds : keywords
  89. Keyword arguments to be passed to hist function.
  90. range_padding : float, default 0.05
  91. Relative extension of axis range in x and y with respect to
  92. (x_max - x_min) or (y_max - y_min).
  93. **kwargs
  94. Keyword arguments to be passed to scatter function.
  95. Returns
  96. -------
  97. numpy.ndarray
  98. A matrix of scatter plots.
  99. Examples
  100. --------
  101. .. plot::
  102. :context: close-figs
  103. >>> df = pd.DataFrame(np.random.randn(1000, 4), columns=['A','B','C','D'])
  104. >>> pd.plotting.scatter_matrix(df, alpha=0.2)
  105. """
  106. plot_backend = _get_plot_backend("matplotlib")
  107. return plot_backend.scatter_matrix(
  108. frame=frame,
  109. alpha=alpha,
  110. figsize=figsize,
  111. ax=ax,
  112. grid=grid,
  113. diagonal=diagonal,
  114. marker=marker,
  115. density_kwds=density_kwds,
  116. hist_kwds=hist_kwds,
  117. range_padding=range_padding,
  118. **kwargs,
  119. )
  120. def radviz(frame, class_column, ax=None, color=None, colormap=None, **kwds):
  121. """
  122. Plot a multidimensional dataset in 2D.
  123. Each Series in the DataFrame is represented as a evenly distributed
  124. slice on a circle. Each data point is rendered in the circle according to
  125. the value on each Series. Highly correlated `Series` in the `DataFrame`
  126. are placed closer on the unit circle.
  127. RadViz allow to project a N-dimensional data set into a 2D space where the
  128. influence of each dimension can be interpreted as a balance between the
  129. influence of all dimensions.
  130. More info available at the `original article
  131. <https://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.135.889>`_
  132. describing RadViz.
  133. Parameters
  134. ----------
  135. frame : `DataFrame`
  136. Object holding the data.
  137. class_column : str
  138. Column name containing the name of the data point category.
  139. ax : :class:`matplotlib.axes.Axes`, optional
  140. A plot instance to which to add the information.
  141. color : list[str] or tuple[str], optional
  142. Assign a color to each category. Example: ['blue', 'green'].
  143. colormap : str or :class:`matplotlib.colors.Colormap`, default None
  144. Colormap to select colors from. If string, load colormap with that
  145. name from matplotlib.
  146. **kwds
  147. Options to pass to matplotlib scatter plotting method.
  148. Returns
  149. -------
  150. class:`matplotlib.axes.Axes`
  151. See Also
  152. --------
  153. plotting.andrews_curves : Plot clustering visualization.
  154. Examples
  155. --------
  156. .. plot::
  157. :context: close-figs
  158. >>> df = pd.DataFrame(
  159. ... {
  160. ... 'SepalLength': [6.5, 7.7, 5.1, 5.8, 7.6, 5.0, 5.4, 4.6, 6.7, 4.6],
  161. ... 'SepalWidth': [3.0, 3.8, 3.8, 2.7, 3.0, 2.3, 3.0, 3.2, 3.3, 3.6],
  162. ... 'PetalLength': [5.5, 6.7, 1.9, 5.1, 6.6, 3.3, 4.5, 1.4, 5.7, 1.0],
  163. ... 'PetalWidth': [1.8, 2.2, 0.4, 1.9, 2.1, 1.0, 1.5, 0.2, 2.1, 0.2],
  164. ... 'Category': [
  165. ... 'virginica',
  166. ... 'virginica',
  167. ... 'setosa',
  168. ... 'virginica',
  169. ... 'virginica',
  170. ... 'versicolor',
  171. ... 'versicolor',
  172. ... 'setosa',
  173. ... 'virginica',
  174. ... 'setosa'
  175. ... ]
  176. ... }
  177. ... )
  178. >>> pd.plotting.radviz(df, 'Category')
  179. """
  180. plot_backend = _get_plot_backend("matplotlib")
  181. return plot_backend.radviz(
  182. frame=frame,
  183. class_column=class_column,
  184. ax=ax,
  185. color=color,
  186. colormap=colormap,
  187. **kwds,
  188. )
  189. def andrews_curves(
  190. frame, class_column, ax=None, samples=200, color=None, colormap=None, **kwargs
  191. ):
  192. """
  193. Generate a matplotlib plot of Andrews curves, for visualising clusters of
  194. multivariate data.
  195. Andrews curves have the functional form:
  196. f(t) = x_1/sqrt(2) + x_2 sin(t) + x_3 cos(t) +
  197. x_4 sin(2t) + x_5 cos(2t) + ...
  198. Where x coefficients correspond to the values of each dimension and t is
  199. linearly spaced between -pi and +pi. Each row of frame then corresponds to
  200. a single curve.
  201. Parameters
  202. ----------
  203. frame : DataFrame
  204. Data to be plotted, preferably normalized to (0.0, 1.0).
  205. class_column : Name of the column containing class names
  206. ax : matplotlib axes object, default None
  207. samples : Number of points to plot in each curve
  208. color : list or tuple, optional
  209. Colors to use for the different classes.
  210. colormap : str or matplotlib colormap object, default None
  211. Colormap to select colors from. If string, load colormap with that name
  212. from matplotlib.
  213. **kwargs
  214. Options to pass to matplotlib plotting method.
  215. Returns
  216. -------
  217. class:`matplotlip.axis.Axes`
  218. Examples
  219. --------
  220. .. plot::
  221. :context: close-figs
  222. >>> df = pd.read_csv(
  223. ... 'https://raw.github.com/pandas-dev/'
  224. ... 'pandas/master/pandas/tests/io/data/csv/iris.csv'
  225. ... )
  226. >>> pd.plotting.andrews_curves(df, 'Name')
  227. """
  228. plot_backend = _get_plot_backend("matplotlib")
  229. return plot_backend.andrews_curves(
  230. frame=frame,
  231. class_column=class_column,
  232. ax=ax,
  233. samples=samples,
  234. color=color,
  235. colormap=colormap,
  236. **kwargs,
  237. )
  238. def bootstrap_plot(series, fig=None, size=50, samples=500, **kwds):
  239. """
  240. Bootstrap plot on mean, median and mid-range statistics.
  241. The bootstrap plot is used to estimate the uncertainty of a statistic
  242. by relaying on random sampling with replacement [1]_. This function will
  243. generate bootstrapping plots for mean, median and mid-range statistics
  244. for the given number of samples of the given size.
  245. .. [1] "Bootstrapping (statistics)" in \
  246. https://en.wikipedia.org/wiki/Bootstrapping_%28statistics%29
  247. Parameters
  248. ----------
  249. series : pandas.Series
  250. Series from where to get the samplings for the bootstrapping.
  251. fig : matplotlib.figure.Figure, default None
  252. If given, it will use the `fig` reference for plotting instead of
  253. creating a new one with default parameters.
  254. size : int, default 50
  255. Number of data points to consider during each sampling. It must be
  256. less than or equal to the length of the `series`.
  257. samples : int, default 500
  258. Number of times the bootstrap procedure is performed.
  259. **kwds
  260. Options to pass to matplotlib plotting method.
  261. Returns
  262. -------
  263. matplotlib.figure.Figure
  264. Matplotlib figure.
  265. See Also
  266. --------
  267. DataFrame.plot : Basic plotting for DataFrame objects.
  268. Series.plot : Basic plotting for Series objects.
  269. Examples
  270. --------
  271. This example draws a basic bootstrap plot for a Series.
  272. .. plot::
  273. :context: close-figs
  274. >>> s = pd.Series(np.random.uniform(size=100))
  275. >>> pd.plotting.bootstrap_plot(s)
  276. """
  277. plot_backend = _get_plot_backend("matplotlib")
  278. return plot_backend.bootstrap_plot(
  279. series=series, fig=fig, size=size, samples=samples, **kwds
  280. )
  281. def parallel_coordinates(
  282. frame,
  283. class_column,
  284. cols=None,
  285. ax=None,
  286. color=None,
  287. use_columns=False,
  288. xticks=None,
  289. colormap=None,
  290. axvlines=True,
  291. axvlines_kwds=None,
  292. sort_labels=False,
  293. **kwargs,
  294. ):
  295. """
  296. Parallel coordinates plotting.
  297. Parameters
  298. ----------
  299. frame : DataFrame
  300. class_column : str
  301. Column name containing class names.
  302. cols : list, optional
  303. A list of column names to use.
  304. ax : matplotlib.axis, optional
  305. Matplotlib axis object.
  306. color : list or tuple, optional
  307. Colors to use for the different classes.
  308. use_columns : bool, optional
  309. If true, columns will be used as xticks.
  310. xticks : list or tuple, optional
  311. A list of values to use for xticks.
  312. colormap : str or matplotlib colormap, default None
  313. Colormap to use for line colors.
  314. axvlines : bool, optional
  315. If true, vertical lines will be added at each xtick.
  316. axvlines_kwds : keywords, optional
  317. Options to be passed to axvline method for vertical lines.
  318. sort_labels : bool, default False
  319. Sort class_column labels, useful when assigning colors.
  320. **kwargs
  321. Options to pass to matplotlib plotting method.
  322. Returns
  323. -------
  324. class:`matplotlib.axis.Axes`
  325. Examples
  326. --------
  327. .. plot::
  328. :context: close-figs
  329. >>> df = pd.read_csv(
  330. ... 'https://raw.github.com/pandas-dev/'
  331. ... 'pandas/master/pandas/tests/io/data/csv/iris.csv'
  332. ... )
  333. >>> pd.plotting.parallel_coordinates(
  334. ... df, 'Name', color=('#556270', '#4ECDC4', '#C7F464')
  335. ... )
  336. """
  337. plot_backend = _get_plot_backend("matplotlib")
  338. return plot_backend.parallel_coordinates(
  339. frame=frame,
  340. class_column=class_column,
  341. cols=cols,
  342. ax=ax,
  343. color=color,
  344. use_columns=use_columns,
  345. xticks=xticks,
  346. colormap=colormap,
  347. axvlines=axvlines,
  348. axvlines_kwds=axvlines_kwds,
  349. sort_labels=sort_labels,
  350. **kwargs,
  351. )
  352. def lag_plot(series, lag=1, ax=None, **kwds):
  353. """
  354. Lag plot for time series.
  355. Parameters
  356. ----------
  357. series : Time series
  358. lag : lag of the scatter plot, default 1
  359. ax : Matplotlib axis object, optional
  360. **kwds
  361. Matplotlib scatter method keyword arguments.
  362. Returns
  363. -------
  364. class:`matplotlib.axis.Axes`
  365. Examples
  366. --------
  367. Lag plots are most commonly used to look for patterns in time series data.
  368. Given the following time series
  369. .. plot::
  370. :context: close-figs
  371. >>> np.random.seed(5)
  372. >>> x = np.cumsum(np.random.normal(loc=1, scale=5, size=50))
  373. >>> s = pd.Series(x)
  374. >>> s.plot()
  375. A lag plot with ``lag=1`` returns
  376. .. plot::
  377. :context: close-figs
  378. >>> pd.plotting.lag_plot(s, lag=1)
  379. """
  380. plot_backend = _get_plot_backend("matplotlib")
  381. return plot_backend.lag_plot(series=series, lag=lag, ax=ax, **kwds)
  382. def autocorrelation_plot(series, ax=None, **kwargs):
  383. """
  384. Autocorrelation plot for time series.
  385. Parameters
  386. ----------
  387. series : Time series
  388. ax : Matplotlib axis object, optional
  389. **kwargs
  390. Options to pass to matplotlib plotting method.
  391. Returns
  392. -------
  393. class:`matplotlib.axis.Axes`
  394. Examples
  395. --------
  396. The horizontal lines in the plot correspond to 95% and 99% confidence bands.
  397. The dashed line is 99% confidence band.
  398. .. plot::
  399. :context: close-figs
  400. >>> spacing = np.linspace(-9 * np.pi, 9 * np.pi, num=1000)
  401. >>> s = pd.Series(0.7 * np.random.rand(1000) + 0.3 * np.sin(spacing))
  402. >>> pd.plotting.autocorrelation_plot(s)
  403. """
  404. plot_backend = _get_plot_backend("matplotlib")
  405. return plot_backend.autocorrelation_plot(series=series, ax=ax, **kwargs)
  406. class _Options(dict):
  407. """
  408. Stores pandas plotting options.
  409. Allows for parameter aliasing so you can just use parameter names that are
  410. the same as the plot function parameters, but is stored in a canonical
  411. format that makes it easy to breakdown into groups later.
  412. """
  413. # alias so the names are same as plotting method parameter names
  414. _ALIASES = {"x_compat": "xaxis.compat"}
  415. _DEFAULT_KEYS = ["xaxis.compat"]
  416. def __init__(self, deprecated=False):
  417. self._deprecated = deprecated
  418. super().__setitem__("xaxis.compat", False)
  419. def __getitem__(self, key):
  420. key = self._get_canonical_key(key)
  421. if key not in self:
  422. raise ValueError(f"{key} is not a valid pandas plotting option")
  423. return super().__getitem__(key)
  424. def __setitem__(self, key, value):
  425. key = self._get_canonical_key(key)
  426. return super().__setitem__(key, value)
  427. def __delitem__(self, key):
  428. key = self._get_canonical_key(key)
  429. if key in self._DEFAULT_KEYS:
  430. raise ValueError(f"Cannot remove default parameter {key}")
  431. return super().__delitem__(key)
  432. def __contains__(self, key) -> bool:
  433. key = self._get_canonical_key(key)
  434. return super().__contains__(key)
  435. def reset(self):
  436. """
  437. Reset the option store to its initial state
  438. Returns
  439. -------
  440. None
  441. """
  442. # error: Cannot access "__init__" directly
  443. self.__init__() # type: ignore[misc]
  444. def _get_canonical_key(self, key):
  445. return self._ALIASES.get(key, key)
  446. @contextmanager
  447. def use(self, key, value):
  448. """
  449. Temporarily set a parameter value using the with statement.
  450. Aliasing allowed.
  451. """
  452. old_value = self[key]
  453. try:
  454. self[key] = value
  455. yield self
  456. finally:
  457. self[key] = old_value
  458. plot_params = _Options()