orc.py 1.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657
  1. """ orc compat """
  2. from __future__ import annotations
  3. from typing import TYPE_CHECKING
  4. from pandas._typing import FilePathOrBuffer
  5. from pandas.compat._optional import import_optional_dependency
  6. from pandas.io.common import get_handle
  7. if TYPE_CHECKING:
  8. from pandas import DataFrame
  9. def read_orc(
  10. path: FilePathOrBuffer, columns: list[str] | None = None, **kwargs
  11. ) -> DataFrame:
  12. """
  13. Load an ORC object from the file path, returning a DataFrame.
  14. .. versionadded:: 1.0.0
  15. Parameters
  16. ----------
  17. path : str, path object or file-like object
  18. Any valid string path is acceptable. The string could be a URL. Valid
  19. URL schemes include http, ftp, s3, and file. For file URLs, a host is
  20. expected. A local file could be:
  21. ``file://localhost/path/to/table.orc``.
  22. If you want to pass in a path object, pandas accepts any
  23. ``os.PathLike``.
  24. By file-like object, we refer to objects with a ``read()`` method,
  25. such as a file handle (e.g. via builtin ``open`` function)
  26. or ``StringIO``.
  27. columns : list, default None
  28. If not None, only these columns will be read from the file.
  29. **kwargs
  30. Any additional kwargs are passed to pyarrow.
  31. Returns
  32. -------
  33. DataFrame
  34. Notes
  35. -------
  36. Before using this function you should read the :ref:`user guide about ORC <io.orc>`
  37. and :ref:`install optional dependencies <install.warn_orc>`.
  38. """
  39. # we require a newer version of pyarrow than we support for parquet
  40. orc = import_optional_dependency("pyarrow.orc")
  41. with get_handle(path, "rb", is_text=False) as handles:
  42. orc_file = orc.ORCFile(handles.handle)
  43. return orc_file.read(columns=columns, **kwargs).to_pandas()