__init__.py 7.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286
  1. # flake8: noqa
  2. __docformat__ = "restructuredtext"
  3. # Let users know if they're missing any of our hard dependencies
  4. hard_dependencies = ("numpy", "pytz", "dateutil")
  5. missing_dependencies = []
  6. for dependency in hard_dependencies:
  7. try:
  8. __import__(dependency)
  9. except ImportError as e:
  10. missing_dependencies.append(f"{dependency}: {e}")
  11. if missing_dependencies:
  12. raise ImportError(
  13. "Unable to import required dependencies:\n" + "\n".join(missing_dependencies)
  14. )
  15. del hard_dependencies, dependency, missing_dependencies
  16. # numpy compat
  17. from pandas.compat import (
  18. np_version_under1p18 as _np_version_under1p18,
  19. is_numpy_dev as _is_numpy_dev,
  20. )
  21. try:
  22. from pandas._libs import hashtable as _hashtable, lib as _lib, tslib as _tslib
  23. except ImportError as e: # pragma: no cover
  24. # hack but overkill to use re
  25. module = str(e).replace("cannot import name ", "")
  26. raise ImportError(
  27. f"C extension: {module} not built. If you want to import "
  28. "pandas from the source directory, you may need to run "
  29. "'python setup.py build_ext --force' to build the C extensions first."
  30. ) from e
  31. from pandas._config import (
  32. get_option,
  33. set_option,
  34. reset_option,
  35. describe_option,
  36. option_context,
  37. options,
  38. )
  39. # let init-time option registration happen
  40. import pandas.core.config_init
  41. from pandas.core.api import (
  42. # dtype
  43. Int8Dtype,
  44. Int16Dtype,
  45. Int32Dtype,
  46. Int64Dtype,
  47. UInt8Dtype,
  48. UInt16Dtype,
  49. UInt32Dtype,
  50. UInt64Dtype,
  51. Float32Dtype,
  52. Float64Dtype,
  53. CategoricalDtype,
  54. PeriodDtype,
  55. IntervalDtype,
  56. DatetimeTZDtype,
  57. StringDtype,
  58. BooleanDtype,
  59. # missing
  60. NA,
  61. isna,
  62. isnull,
  63. notna,
  64. notnull,
  65. # indexes
  66. Index,
  67. CategoricalIndex,
  68. Int64Index,
  69. UInt64Index,
  70. RangeIndex,
  71. Float64Index,
  72. MultiIndex,
  73. IntervalIndex,
  74. TimedeltaIndex,
  75. DatetimeIndex,
  76. PeriodIndex,
  77. IndexSlice,
  78. # tseries
  79. NaT,
  80. Period,
  81. period_range,
  82. Timedelta,
  83. timedelta_range,
  84. Timestamp,
  85. date_range,
  86. bdate_range,
  87. Interval,
  88. interval_range,
  89. DateOffset,
  90. # conversion
  91. to_numeric,
  92. to_datetime,
  93. to_timedelta,
  94. # misc
  95. Flags,
  96. Grouper,
  97. factorize,
  98. unique,
  99. value_counts,
  100. NamedAgg,
  101. array,
  102. Categorical,
  103. set_eng_float_format,
  104. Series,
  105. DataFrame,
  106. )
  107. from pandas.core.arrays.sparse import SparseDtype
  108. from pandas.tseries.api import infer_freq
  109. from pandas.tseries import offsets
  110. from pandas.core.computation.api import eval
  111. from pandas.core.reshape.api import (
  112. concat,
  113. lreshape,
  114. melt,
  115. wide_to_long,
  116. merge,
  117. merge_asof,
  118. merge_ordered,
  119. crosstab,
  120. pivot,
  121. pivot_table,
  122. get_dummies,
  123. cut,
  124. qcut,
  125. )
  126. import pandas.api
  127. from pandas.util._print_versions import show_versions
  128. from pandas.io.api import (
  129. # excel
  130. ExcelFile,
  131. ExcelWriter,
  132. read_excel,
  133. # parsers
  134. read_csv,
  135. read_fwf,
  136. read_table,
  137. # pickle
  138. read_pickle,
  139. to_pickle,
  140. # pytables
  141. HDFStore,
  142. read_hdf,
  143. # sql
  144. read_sql,
  145. read_sql_query,
  146. read_sql_table,
  147. # misc
  148. read_clipboard,
  149. read_parquet,
  150. read_orc,
  151. read_feather,
  152. read_gbq,
  153. read_html,
  154. read_xml,
  155. read_json,
  156. read_stata,
  157. read_sas,
  158. read_spss,
  159. )
  160. from pandas.io.json import _json_normalize as json_normalize
  161. from pandas.util._tester import test
  162. import pandas.testing
  163. import pandas.arrays
  164. # use the closest tagged version if possible
  165. from pandas._version import get_versions
  166. v = get_versions()
  167. __version__ = v.get("closest-tag", v["version"])
  168. __git_version__ = v.get("full-revisionid")
  169. del get_versions, v
  170. # GH 27101
  171. def __getattr__(name):
  172. import warnings
  173. if name == "datetime":
  174. warnings.warn(
  175. "The pandas.datetime class is deprecated "
  176. "and will be removed from pandas in a future version. "
  177. "Import from datetime module instead.",
  178. FutureWarning,
  179. stacklevel=2,
  180. )
  181. from datetime import datetime as dt
  182. return dt
  183. elif name == "np":
  184. warnings.warn(
  185. "The pandas.np module is deprecated "
  186. "and will be removed from pandas in a future version. "
  187. "Import numpy directly instead",
  188. FutureWarning,
  189. stacklevel=2,
  190. )
  191. import numpy as np
  192. return np
  193. elif name in {"SparseSeries", "SparseDataFrame"}:
  194. warnings.warn(
  195. f"The {name} class is removed from pandas. Accessing it from "
  196. "the top-level namespace will also be removed in the next version",
  197. FutureWarning,
  198. stacklevel=2,
  199. )
  200. return type(name, (), {})
  201. elif name == "SparseArray":
  202. warnings.warn(
  203. "The pandas.SparseArray class is deprecated "
  204. "and will be removed from pandas in a future version. "
  205. "Use pandas.arrays.SparseArray instead.",
  206. FutureWarning,
  207. stacklevel=2,
  208. )
  209. from pandas.core.arrays.sparse import SparseArray as _SparseArray
  210. return _SparseArray
  211. raise AttributeError(f"module 'pandas' has no attribute '{name}'")
  212. # module level doc-string
  213. __doc__ = """
  214. pandas - a powerful data analysis and manipulation library for Python
  215. =====================================================================
  216. **pandas** is a Python package providing fast, flexible, and expressive data
  217. structures designed to make working with "relational" or "labeled" data both
  218. easy and intuitive. It aims to be the fundamental high-level building block for
  219. doing practical, **real world** data analysis in Python. Additionally, it has
  220. the broader goal of becoming **the most powerful and flexible open source data
  221. analysis / manipulation tool available in any language**. It is already well on
  222. its way toward this goal.
  223. Main Features
  224. -------------
  225. Here are just a few of the things that pandas does well:
  226. - Easy handling of missing data in floating point as well as non-floating
  227. point data.
  228. - Size mutability: columns can be inserted and deleted from DataFrame and
  229. higher dimensional objects
  230. - Automatic and explicit data alignment: objects can be explicitly aligned
  231. to a set of labels, or the user can simply ignore the labels and let
  232. `Series`, `DataFrame`, etc. automatically align the data for you in
  233. computations.
  234. - Powerful, flexible group by functionality to perform split-apply-combine
  235. operations on data sets, for both aggregating and transforming data.
  236. - Make it easy to convert ragged, differently-indexed data in other Python
  237. and NumPy data structures into DataFrame objects.
  238. - Intelligent label-based slicing, fancy indexing, and subsetting of large
  239. data sets.
  240. - Intuitive merging and joining data sets.
  241. - Flexible reshaping and pivoting of data sets.
  242. - Hierarchical labeling of axes (possible to have multiple labels per tick).
  243. - Robust IO tools for loading data from flat files (CSV and delimited),
  244. Excel files, databases, and saving/loading data from the ultrafast HDF5
  245. format.
  246. - Time series-specific functionality: date range generation and frequency
  247. conversion, moving window statistics, date shifting and lagging.
  248. """