conftest.py 8.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268
  1. from itertools import product
  2. import numpy as np
  3. import pytest
  4. from pandas import (
  5. DataFrame,
  6. NaT,
  7. date_range,
  8. )
  9. import pandas._testing as tm
  10. @pytest.fixture(params=product([True, False], [True, False]))
  11. def close_open_fixture(request):
  12. return request.param
  13. @pytest.fixture
  14. def float_frame_with_na():
  15. """
  16. Fixture for DataFrame of floats with index of unique strings
  17. Columns are ['A', 'B', 'C', 'D']; some entries are missing
  18. A B C D
  19. ABwBzA0ljw -1.128865 -0.897161 0.046603 0.274997
  20. DJiRzmbyQF 0.728869 0.233502 0.722431 -0.890872
  21. neMgPD5UBF 0.486072 -1.027393 -0.031553 1.449522
  22. 0yWA4n8VeX -1.937191 -1.142531 0.805215 -0.462018
  23. 3slYUbbqU1 0.153260 1.164691 1.489795 -0.545826
  24. soujjZ0A08 NaN NaN NaN NaN
  25. 7W6NLGsjB9 NaN NaN NaN NaN
  26. ... ... ... ... ...
  27. uhfeaNkCR1 -0.231210 -0.340472 0.244717 -0.901590
  28. n6p7GYuBIV -0.419052 1.922721 -0.125361 -0.727717
  29. ZhzAeY6p1y 1.234374 -1.425359 -0.827038 -0.633189
  30. uWdPsORyUh 0.046738 -0.980445 -1.102965 0.605503
  31. 3DJA6aN590 -0.091018 -1.684734 -1.100900 0.215947
  32. 2GBPAzdbMk -2.883405 -1.021071 1.209877 1.633083
  33. sHadBoyVHw -2.223032 -0.326384 0.258931 0.245517
  34. [30 rows x 4 columns]
  35. """
  36. df = DataFrame(tm.getSeriesData())
  37. # set some NAs
  38. df.iloc[5:10] = np.nan
  39. df.iloc[15:20, -2:] = np.nan
  40. return df
  41. @pytest.fixture
  42. def bool_frame_with_na():
  43. """
  44. Fixture for DataFrame of booleans with index of unique strings
  45. Columns are ['A', 'B', 'C', 'D']; some entries are missing
  46. A B C D
  47. zBZxY2IDGd False False False False
  48. IhBWBMWllt False True True True
  49. ctjdvZSR6R True False True True
  50. AVTujptmxb False True False True
  51. G9lrImrSWq False False False True
  52. sFFwdIUfz2 NaN NaN NaN NaN
  53. s15ptEJnRb NaN NaN NaN NaN
  54. ... ... ... ... ...
  55. UW41KkDyZ4 True True False False
  56. l9l6XkOdqV True False False False
  57. X2MeZfzDYA False True False False
  58. xWkIKU7vfX False True False True
  59. QOhL6VmpGU False False False True
  60. 22PwkRJdat False True False False
  61. kfboQ3VeIK True False True False
  62. [30 rows x 4 columns]
  63. """
  64. df = DataFrame(tm.getSeriesData()) > 0
  65. df = df.astype(object)
  66. # set some NAs
  67. df.iloc[5:10] = np.nan
  68. df.iloc[15:20, -2:] = np.nan
  69. # For `any` tests we need to have at least one True before the first NaN
  70. # in each column
  71. for i in range(4):
  72. df.iloc[i, i] = True
  73. return df
  74. @pytest.fixture
  75. def float_string_frame():
  76. """
  77. Fixture for DataFrame of floats and strings with index of unique strings
  78. Columns are ['A', 'B', 'C', 'D', 'foo'].
  79. A B C D foo
  80. w3orJvq07g -1.594062 -1.084273 -1.252457 0.356460 bar
  81. PeukuVdmz2 0.109855 -0.955086 -0.809485 0.409747 bar
  82. ahp2KvwiM8 -1.533729 -0.142519 -0.154666 1.302623 bar
  83. 3WSJ7BUCGd 2.484964 0.213829 0.034778 -2.327831 bar
  84. khdAmufk0U -0.193480 -0.743518 -0.077987 0.153646 bar
  85. LE2DZiFlrE -0.193566 -1.343194 -0.107321 0.959978 bar
  86. HJXSJhVn7b 0.142590 1.257603 -0.659409 -0.223844 bar
  87. ... ... ... ... ... ...
  88. 9a1Vypttgw -1.316394 1.601354 0.173596 1.213196 bar
  89. h5d1gVFbEy 0.609475 1.106738 -0.155271 0.294630 bar
  90. mK9LsTQG92 1.303613 0.857040 -1.019153 0.369468 bar
  91. oOLksd9gKH 0.558219 -0.134491 -0.289869 -0.951033 bar
  92. 9jgoOjKyHg 0.058270 -0.496110 -0.413212 -0.852659 bar
  93. jZLDHclHAO 0.096298 1.267510 0.549206 -0.005235 bar
  94. lR0nxDp1C2 -2.119350 -0.794384 0.544118 0.145849 bar
  95. [30 rows x 5 columns]
  96. """
  97. df = DataFrame(tm.getSeriesData())
  98. df["foo"] = "bar"
  99. return df
  100. @pytest.fixture
  101. def mixed_float_frame():
  102. """
  103. Fixture for DataFrame of different float types with index of unique strings
  104. Columns are ['A', 'B', 'C', 'D'].
  105. A B C D
  106. GI7bbDaEZe -0.237908 -0.246225 -0.468506 0.752993
  107. KGp9mFepzA -1.140809 -0.644046 -1.225586 0.801588
  108. VeVYLAb1l2 -1.154013 -1.677615 0.690430 -0.003731
  109. kmPME4WKhO 0.979578 0.998274 -0.776367 0.897607
  110. CPyopdXTiz 0.048119 -0.257174 0.836426 0.111266
  111. 0kJZQndAj0 0.274357 -0.281135 -0.344238 0.834541
  112. tqdwQsaHG8 -0.979716 -0.519897 0.582031 0.144710
  113. ... ... ... ... ...
  114. 7FhZTWILQj -2.906357 1.261039 -0.780273 -0.537237
  115. 4pUDPM4eGq -2.042512 -0.464382 -0.382080 1.132612
  116. B8dUgUzwTi -1.506637 -0.364435 1.087891 0.297653
  117. hErlVYjVv9 1.477453 -0.495515 -0.713867 1.438427
  118. 1BKN3o7YLs 0.127535 -0.349812 -0.881836 0.489827
  119. 9S4Ekn7zga 1.445518 -2.095149 0.031982 0.373204
  120. xN1dNn6OV6 1.425017 -0.983995 -0.363281 -0.224502
  121. [30 rows x 4 columns]
  122. """
  123. df = DataFrame(tm.getSeriesData())
  124. df.A = df.A.astype("float32")
  125. df.B = df.B.astype("float32")
  126. df.C = df.C.astype("float16")
  127. df.D = df.D.astype("float64")
  128. return df
  129. @pytest.fixture
  130. def mixed_int_frame():
  131. """
  132. Fixture for DataFrame of different int types with index of unique strings
  133. Columns are ['A', 'B', 'C', 'D'].
  134. A B C D
  135. mUrCZ67juP 0 1 2 2
  136. rw99ACYaKS 0 1 0 0
  137. 7QsEcpaaVU 0 1 1 1
  138. xkrimI2pcE 0 1 0 0
  139. dz01SuzoS8 0 1 255 255
  140. ccQkqOHX75 -1 1 0 0
  141. DN0iXaoDLd 0 1 0 0
  142. ... .. .. ... ...
  143. Dfb141wAaQ 1 1 254 254
  144. IPD8eQOVu5 0 1 0 0
  145. CcaKulsCmv 0 1 0 0
  146. rIBa8gu7E5 0 1 0 0
  147. RP6peZmh5o 0 1 1 1
  148. NMb9pipQWQ 0 1 0 0
  149. PqgbJEzjib 0 1 3 3
  150. [30 rows x 4 columns]
  151. """
  152. df = DataFrame({k: v.astype(int) for k, v in tm.getSeriesData().items()})
  153. df.A = df.A.astype("int32")
  154. df.B = np.ones(len(df.B), dtype="uint64")
  155. df.C = df.C.astype("uint8")
  156. df.D = df.C.astype("int64")
  157. return df
  158. @pytest.fixture
  159. def timezone_frame():
  160. """
  161. Fixture for DataFrame of date_range Series with different time zones
  162. Columns are ['A', 'B', 'C']; some entries are missing
  163. A B C
  164. 0 2013-01-01 2013-01-01 00:00:00-05:00 2013-01-01 00:00:00+01:00
  165. 1 2013-01-02 NaT NaT
  166. 2 2013-01-03 2013-01-03 00:00:00-05:00 2013-01-03 00:00:00+01:00
  167. """
  168. df = DataFrame(
  169. {
  170. "A": date_range("20130101", periods=3),
  171. "B": date_range("20130101", periods=3, tz="US/Eastern"),
  172. "C": date_range("20130101", periods=3, tz="CET"),
  173. }
  174. )
  175. df.iloc[1, 1] = NaT
  176. df.iloc[1, 2] = NaT
  177. return df
  178. @pytest.fixture
  179. def uint64_frame():
  180. """
  181. Fixture for DataFrame with uint64 values
  182. Columns are ['A', 'B']
  183. """
  184. return DataFrame(
  185. {"A": np.arange(3), "B": [2 ** 63, 2 ** 63 + 5, 2 ** 63 + 10]}, dtype=np.uint64
  186. )
  187. @pytest.fixture
  188. def simple_frame():
  189. """
  190. Fixture for simple 3x3 DataFrame
  191. Columns are ['one', 'two', 'three'], index is ['a', 'b', 'c'].
  192. one two three
  193. a 1.0 2.0 3.0
  194. b 4.0 5.0 6.0
  195. c 7.0 8.0 9.0
  196. """
  197. arr = np.array([[1.0, 2.0, 3.0], [4.0, 5.0, 6.0], [7.0, 8.0, 9.0]])
  198. return DataFrame(arr, columns=["one", "two", "three"], index=["a", "b", "c"])
  199. @pytest.fixture
  200. def frame_of_index_cols():
  201. """
  202. Fixture for DataFrame of columns that can be used for indexing
  203. Columns are ['A', 'B', 'C', 'D', 'E', ('tuple', 'as', 'label')];
  204. 'A' & 'B' contain duplicates (but are jointly unique), the rest are unique.
  205. A B C D E (tuple, as, label)
  206. 0 foo one a 0.608477 -0.012500 -1.664297
  207. 1 foo two b -0.633460 0.249614 -0.364411
  208. 2 foo three c 0.615256 2.154968 -0.834666
  209. 3 bar one d 0.234246 1.085675 0.718445
  210. 4 bar two e 0.533841 -0.005702 -3.533912
  211. """
  212. df = DataFrame(
  213. {
  214. "A": ["foo", "foo", "foo", "bar", "bar"],
  215. "B": ["one", "two", "three", "one", "two"],
  216. "C": ["a", "b", "c", "d", "e"],
  217. "D": np.random.randn(5),
  218. "E": np.random.randn(5),
  219. ("tuple", "as", "label"): np.random.randn(5),
  220. }
  221. )
  222. return df