123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286 |
- # flake8: noqa
- __docformat__ = "restructuredtext"
- # Let users know if they're missing any of our hard dependencies
- hard_dependencies = ("numpy", "pytz", "dateutil")
- missing_dependencies = []
- for dependency in hard_dependencies:
- try:
- __import__(dependency)
- except ImportError as e:
- missing_dependencies.append(f"{dependency}: {e}")
- if missing_dependencies:
- raise ImportError(
- "Unable to import required dependencies:\n" + "\n".join(missing_dependencies)
- )
- del hard_dependencies, dependency, missing_dependencies
- # numpy compat
- from pandas.compat import (
- np_version_under1p18 as _np_version_under1p18,
- is_numpy_dev as _is_numpy_dev,
- )
- try:
- from pandas._libs import hashtable as _hashtable, lib as _lib, tslib as _tslib
- except ImportError as e: # pragma: no cover
- # hack but overkill to use re
- module = str(e).replace("cannot import name ", "")
- raise ImportError(
- f"C extension: {module} not built. If you want to import "
- "pandas from the source directory, you may need to run "
- "'python setup.py build_ext --force' to build the C extensions first."
- ) from e
- from pandas._config import (
- get_option,
- set_option,
- reset_option,
- describe_option,
- option_context,
- options,
- )
- # let init-time option registration happen
- import pandas.core.config_init
- from pandas.core.api import (
- # dtype
- Int8Dtype,
- Int16Dtype,
- Int32Dtype,
- Int64Dtype,
- UInt8Dtype,
- UInt16Dtype,
- UInt32Dtype,
- UInt64Dtype,
- Float32Dtype,
- Float64Dtype,
- CategoricalDtype,
- PeriodDtype,
- IntervalDtype,
- DatetimeTZDtype,
- StringDtype,
- BooleanDtype,
- # missing
- NA,
- isna,
- isnull,
- notna,
- notnull,
- # indexes
- Index,
- CategoricalIndex,
- Int64Index,
- UInt64Index,
- RangeIndex,
- Float64Index,
- MultiIndex,
- IntervalIndex,
- TimedeltaIndex,
- DatetimeIndex,
- PeriodIndex,
- IndexSlice,
- # tseries
- NaT,
- Period,
- period_range,
- Timedelta,
- timedelta_range,
- Timestamp,
- date_range,
- bdate_range,
- Interval,
- interval_range,
- DateOffset,
- # conversion
- to_numeric,
- to_datetime,
- to_timedelta,
- # misc
- Flags,
- Grouper,
- factorize,
- unique,
- value_counts,
- NamedAgg,
- array,
- Categorical,
- set_eng_float_format,
- Series,
- DataFrame,
- )
- from pandas.core.arrays.sparse import SparseDtype
- from pandas.tseries.api import infer_freq
- from pandas.tseries import offsets
- from pandas.core.computation.api import eval
- from pandas.core.reshape.api import (
- concat,
- lreshape,
- melt,
- wide_to_long,
- merge,
- merge_asof,
- merge_ordered,
- crosstab,
- pivot,
- pivot_table,
- get_dummies,
- cut,
- qcut,
- )
- import pandas.api
- from pandas.util._print_versions import show_versions
- from pandas.io.api import (
- # excel
- ExcelFile,
- ExcelWriter,
- read_excel,
- # parsers
- read_csv,
- read_fwf,
- read_table,
- # pickle
- read_pickle,
- to_pickle,
- # pytables
- HDFStore,
- read_hdf,
- # sql
- read_sql,
- read_sql_query,
- read_sql_table,
- # misc
- read_clipboard,
- read_parquet,
- read_orc,
- read_feather,
- read_gbq,
- read_html,
- read_xml,
- read_json,
- read_stata,
- read_sas,
- read_spss,
- )
- from pandas.io.json import _json_normalize as json_normalize
- from pandas.util._tester import test
- import pandas.testing
- import pandas.arrays
- # use the closest tagged version if possible
- from pandas._version import get_versions
- v = get_versions()
- __version__ = v.get("closest-tag", v["version"])
- __git_version__ = v.get("full-revisionid")
- del get_versions, v
- # GH 27101
- def __getattr__(name):
- import warnings
- if name == "datetime":
- warnings.warn(
- "The pandas.datetime class is deprecated "
- "and will be removed from pandas in a future version. "
- "Import from datetime module instead.",
- FutureWarning,
- stacklevel=2,
- )
- from datetime import datetime as dt
- return dt
- elif name == "np":
- warnings.warn(
- "The pandas.np module is deprecated "
- "and will be removed from pandas in a future version. "
- "Import numpy directly instead",
- FutureWarning,
- stacklevel=2,
- )
- import numpy as np
- return np
- elif name in {"SparseSeries", "SparseDataFrame"}:
- warnings.warn(
- f"The {name} class is removed from pandas. Accessing it from "
- "the top-level namespace will also be removed in the next version",
- FutureWarning,
- stacklevel=2,
- )
- return type(name, (), {})
- elif name == "SparseArray":
- warnings.warn(
- "The pandas.SparseArray class is deprecated "
- "and will be removed from pandas in a future version. "
- "Use pandas.arrays.SparseArray instead.",
- FutureWarning,
- stacklevel=2,
- )
- from pandas.core.arrays.sparse import SparseArray as _SparseArray
- return _SparseArray
- raise AttributeError(f"module 'pandas' has no attribute '{name}'")
- # module level doc-string
- __doc__ = """
- pandas - a powerful data analysis and manipulation library for Python
- =====================================================================
- **pandas** is a Python package providing fast, flexible, and expressive data
- structures designed to make working with "relational" or "labeled" data both
- easy and intuitive. It aims to be the fundamental high-level building block for
- doing practical, **real world** data analysis in Python. Additionally, it has
- the broader goal of becoming **the most powerful and flexible open source data
- analysis / manipulation tool available in any language**. It is already well on
- its way toward this goal.
- Main Features
- -------------
- Here are just a few of the things that pandas does well:
- - Easy handling of missing data in floating point as well as non-floating
- point data.
- - Size mutability: columns can be inserted and deleted from DataFrame and
- higher dimensional objects
- - Automatic and explicit data alignment: objects can be explicitly aligned
- to a set of labels, or the user can simply ignore the labels and let
- `Series`, `DataFrame`, etc. automatically align the data for you in
- computations.
- - Powerful, flexible group by functionality to perform split-apply-combine
- operations on data sets, for both aggregating and transforming data.
- - Make it easy to convert ragged, differently-indexed data in other Python
- and NumPy data structures into DataFrame objects.
- - Intelligent label-based slicing, fancy indexing, and subsetting of large
- data sets.
- - Intuitive merging and joining data sets.
- - Flexible reshaping and pivoting of data sets.
- - Hierarchical labeling of axes (possible to have multiple labels per tick).
- - Robust IO tools for loading data from flat files (CSV and delimited),
- Excel files, databases, and saving/loading data from the ultrafast HDF5
- format.
- - Time series-specific functionality: date range generation and frequency
- conversion, moving window statistics, date shifting and lagging.
- """
|