spss.py 1.2 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849
  1. from __future__ import annotations
  2. from pathlib import Path
  3. from typing import Sequence
  4. from pandas.compat._optional import import_optional_dependency
  5. from pandas.core.dtypes.inference import is_list_like
  6. from pandas.core.api import DataFrame
  7. from pandas.io.common import stringify_path
  8. def read_spss(
  9. path: str | Path,
  10. usecols: Sequence[str] | None = None,
  11. convert_categoricals: bool = True,
  12. ) -> DataFrame:
  13. """
  14. Load an SPSS file from the file path, returning a DataFrame.
  15. .. versionadded:: 0.25.0
  16. Parameters
  17. ----------
  18. path : str or Path
  19. File path.
  20. usecols : list-like, optional
  21. Return a subset of the columns. If None, return all columns.
  22. convert_categoricals : bool, default is True
  23. Convert categorical columns into pd.Categorical.
  24. Returns
  25. -------
  26. DataFrame
  27. """
  28. pyreadstat = import_optional_dependency("pyreadstat")
  29. if usecols is not None:
  30. if not is_list_like(usecols):
  31. raise TypeError("usecols must be list-like.")
  32. else:
  33. usecols = list(usecols) # pyreadstat requires a list
  34. df, _ = pyreadstat.read_sav(
  35. stringify_path(path), usecols=usecols, apply_value_formats=convert_categoricals
  36. )
  37. return df