pickle.py 7.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225
  1. """ pickle compat """
  2. import pickle
  3. from typing import Any
  4. import warnings
  5. from pandas._typing import (
  6. CompressionOptions,
  7. FilePathOrBuffer,
  8. StorageOptions,
  9. )
  10. from pandas.compat import pickle_compat as pc
  11. from pandas.util._decorators import doc
  12. from pandas.core import generic
  13. from pandas.io.common import get_handle
  14. @doc(storage_options=generic._shared_docs["storage_options"])
  15. def to_pickle(
  16. obj: Any,
  17. filepath_or_buffer: FilePathOrBuffer,
  18. compression: CompressionOptions = "infer",
  19. protocol: int = pickle.HIGHEST_PROTOCOL,
  20. storage_options: StorageOptions = None,
  21. ):
  22. """
  23. Pickle (serialize) object to file.
  24. Parameters
  25. ----------
  26. obj : any object
  27. Any python object.
  28. filepath_or_buffer : str, path object or file-like object
  29. File path, URL, or buffer where the pickled object will be stored.
  30. .. versionchanged:: 1.0.0
  31. Accept URL. URL has to be of S3 or GCS.
  32. compression : {{'infer', 'gzip', 'bz2', 'zip', 'xz', None}}, default 'infer'
  33. If 'infer' and 'path_or_url' is path-like, then detect compression from
  34. the following extensions: '.gz', '.bz2', '.zip', or '.xz' (otherwise no
  35. compression) If 'infer' and 'path_or_url' is not path-like, then use
  36. None (= no decompression).
  37. protocol : int
  38. Int which indicates which protocol should be used by the pickler,
  39. default HIGHEST_PROTOCOL (see [1], paragraph 12.1.2). The possible
  40. values for this parameter depend on the version of Python. For Python
  41. 2.x, possible values are 0, 1, 2. For Python>=3.0, 3 is a valid value.
  42. For Python >= 3.4, 4 is a valid value. A negative value for the
  43. protocol parameter is equivalent to setting its value to
  44. HIGHEST_PROTOCOL.
  45. {storage_options}
  46. .. versionadded:: 1.2.0
  47. .. [1] https://docs.python.org/3/library/pickle.html
  48. See Also
  49. --------
  50. read_pickle : Load pickled pandas object (or any object) from file.
  51. DataFrame.to_hdf : Write DataFrame to an HDF5 file.
  52. DataFrame.to_sql : Write DataFrame to a SQL database.
  53. DataFrame.to_parquet : Write a DataFrame to the binary parquet format.
  54. Examples
  55. --------
  56. >>> original_df = pd.DataFrame({{"foo": range(5), "bar": range(5, 10)}})
  57. >>> original_df
  58. foo bar
  59. 0 0 5
  60. 1 1 6
  61. 2 2 7
  62. 3 3 8
  63. 4 4 9
  64. >>> pd.to_pickle(original_df, "./dummy.pkl")
  65. >>> unpickled_df = pd.read_pickle("./dummy.pkl")
  66. >>> unpickled_df
  67. foo bar
  68. 0 0 5
  69. 1 1 6
  70. 2 2 7
  71. 3 3 8
  72. 4 4 9
  73. >>> import os
  74. >>> os.remove("./dummy.pkl")
  75. """
  76. if protocol < 0:
  77. protocol = pickle.HIGHEST_PROTOCOL
  78. with get_handle(
  79. filepath_or_buffer,
  80. "wb",
  81. compression=compression,
  82. is_text=False,
  83. storage_options=storage_options,
  84. ) as handles:
  85. if handles.compression["method"] in ("bz2", "xz") and protocol >= 5:
  86. # some weird TypeError GH#39002 with pickle 5: fallback to letting
  87. # pickle create the entire object and then write it to the buffer.
  88. # "zip" would also be here if pandas.io.common._BytesZipFile
  89. # wouldn't buffer write calls
  90. handles.handle.write(
  91. # error: Argument 1 to "write" of "TextIOBase" has incompatible type
  92. # "bytes"; expected "str"
  93. pickle.dumps(obj, protocol=protocol) # type: ignore[arg-type]
  94. )
  95. else:
  96. # letting pickle write directly to the buffer is more memory-efficient
  97. pickle.dump(
  98. # error: Argument 2 to "dump" has incompatible type "Union[IO[Any],
  99. # RawIOBase, BufferedIOBase, TextIOBase, TextIOWrapper, mmap]"; expected
  100. # "IO[bytes]"
  101. obj,
  102. handles.handle, # type: ignore[arg-type]
  103. protocol=protocol,
  104. )
  105. @doc(storage_options=generic._shared_docs["storage_options"])
  106. def read_pickle(
  107. filepath_or_buffer: FilePathOrBuffer,
  108. compression: CompressionOptions = "infer",
  109. storage_options: StorageOptions = None,
  110. ):
  111. """
  112. Load pickled pandas object (or any object) from file.
  113. .. warning::
  114. Loading pickled data received from untrusted sources can be
  115. unsafe. See `here <https://docs.python.org/3/library/pickle.html>`__.
  116. Parameters
  117. ----------
  118. filepath_or_buffer : str, path object or file-like object
  119. File path, URL, or buffer where the pickled object will be loaded from.
  120. .. versionchanged:: 1.0.0
  121. Accept URL. URL is not limited to S3 and GCS.
  122. compression : {{'infer', 'gzip', 'bz2', 'zip', 'xz', None}}, default 'infer'
  123. If 'infer' and 'path_or_url' is path-like, then detect compression from
  124. the following extensions: '.gz', '.bz2', '.zip', or '.xz' (otherwise no
  125. compression) If 'infer' and 'path_or_url' is not path-like, then use
  126. None (= no decompression).
  127. {storage_options}
  128. .. versionadded:: 1.2.0
  129. Returns
  130. -------
  131. unpickled : same type as object stored in file
  132. See Also
  133. --------
  134. DataFrame.to_pickle : Pickle (serialize) DataFrame object to file.
  135. Series.to_pickle : Pickle (serialize) Series object to file.
  136. read_hdf : Read HDF5 file into a DataFrame.
  137. read_sql : Read SQL query or database table into a DataFrame.
  138. read_parquet : Load a parquet object, returning a DataFrame.
  139. Notes
  140. -----
  141. read_pickle is only guaranteed to be backwards compatible to pandas 0.20.3.
  142. Examples
  143. --------
  144. >>> original_df = pd.DataFrame({{"foo": range(5), "bar": range(5, 10)}})
  145. >>> original_df
  146. foo bar
  147. 0 0 5
  148. 1 1 6
  149. 2 2 7
  150. 3 3 8
  151. 4 4 9
  152. >>> pd.to_pickle(original_df, "./dummy.pkl")
  153. >>> unpickled_df = pd.read_pickle("./dummy.pkl")
  154. >>> unpickled_df
  155. foo bar
  156. 0 0 5
  157. 1 1 6
  158. 2 2 7
  159. 3 3 8
  160. 4 4 9
  161. >>> import os
  162. >>> os.remove("./dummy.pkl")
  163. """
  164. excs_to_catch = (AttributeError, ImportError, ModuleNotFoundError, TypeError)
  165. with get_handle(
  166. filepath_or_buffer,
  167. "rb",
  168. compression=compression,
  169. is_text=False,
  170. storage_options=storage_options,
  171. ) as handles:
  172. # 1) try standard library Pickle
  173. # 2) try pickle_compat (older pandas version) to handle subclass changes
  174. # 3) try pickle_compat with latin-1 encoding upon a UnicodeDecodeError
  175. try:
  176. # TypeError for Cython complaints about object.__new__ vs Tick.__new__
  177. try:
  178. with warnings.catch_warnings(record=True):
  179. # We want to silence any warnings about, e.g. moved modules.
  180. warnings.simplefilter("ignore", Warning)
  181. # error: Argument 1 to "load" has incompatible type "Union[IO[Any],
  182. # RawIOBase, BufferedIOBase, TextIOBase, TextIOWrapper, mmap]";
  183. # expected "IO[bytes]"
  184. return pickle.load(handles.handle) # type: ignore[arg-type]
  185. except excs_to_catch:
  186. # e.g.
  187. # "No module named 'pandas.core.sparse.series'"
  188. # "Can't get attribute '__nat_unpickle' on <module 'pandas._libs.tslib"
  189. return pc.load(handles.handle, encoding=None)
  190. except UnicodeDecodeError:
  191. # e.g. can occur for files written in py27; see GH#28645 and GH#31988
  192. return pc.load(handles.handle, encoding="latin-1")