123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268 |
- from itertools import product
- import numpy as np
- import pytest
- from pandas import (
- DataFrame,
- NaT,
- date_range,
- )
- import pandas._testing as tm
- @pytest.fixture(params=product([True, False], [True, False]))
- def close_open_fixture(request):
- return request.param
- @pytest.fixture
- def float_frame_with_na():
- """
- Fixture for DataFrame of floats with index of unique strings
- Columns are ['A', 'B', 'C', 'D']; some entries are missing
- A B C D
- ABwBzA0ljw -1.128865 -0.897161 0.046603 0.274997
- DJiRzmbyQF 0.728869 0.233502 0.722431 -0.890872
- neMgPD5UBF 0.486072 -1.027393 -0.031553 1.449522
- 0yWA4n8VeX -1.937191 -1.142531 0.805215 -0.462018
- 3slYUbbqU1 0.153260 1.164691 1.489795 -0.545826
- soujjZ0A08 NaN NaN NaN NaN
- 7W6NLGsjB9 NaN NaN NaN NaN
- ... ... ... ... ...
- uhfeaNkCR1 -0.231210 -0.340472 0.244717 -0.901590
- n6p7GYuBIV -0.419052 1.922721 -0.125361 -0.727717
- ZhzAeY6p1y 1.234374 -1.425359 -0.827038 -0.633189
- uWdPsORyUh 0.046738 -0.980445 -1.102965 0.605503
- 3DJA6aN590 -0.091018 -1.684734 -1.100900 0.215947
- 2GBPAzdbMk -2.883405 -1.021071 1.209877 1.633083
- sHadBoyVHw -2.223032 -0.326384 0.258931 0.245517
- [30 rows x 4 columns]
- """
- df = DataFrame(tm.getSeriesData())
- # set some NAs
- df.iloc[5:10] = np.nan
- df.iloc[15:20, -2:] = np.nan
- return df
- @pytest.fixture
- def bool_frame_with_na():
- """
- Fixture for DataFrame of booleans with index of unique strings
- Columns are ['A', 'B', 'C', 'D']; some entries are missing
- A B C D
- zBZxY2IDGd False False False False
- IhBWBMWllt False True True True
- ctjdvZSR6R True False True True
- AVTujptmxb False True False True
- G9lrImrSWq False False False True
- sFFwdIUfz2 NaN NaN NaN NaN
- s15ptEJnRb NaN NaN NaN NaN
- ... ... ... ... ...
- UW41KkDyZ4 True True False False
- l9l6XkOdqV True False False False
- X2MeZfzDYA False True False False
- xWkIKU7vfX False True False True
- QOhL6VmpGU False False False True
- 22PwkRJdat False True False False
- kfboQ3VeIK True False True False
- [30 rows x 4 columns]
- """
- df = DataFrame(tm.getSeriesData()) > 0
- df = df.astype(object)
- # set some NAs
- df.iloc[5:10] = np.nan
- df.iloc[15:20, -2:] = np.nan
- # For `any` tests we need to have at least one True before the first NaN
- # in each column
- for i in range(4):
- df.iloc[i, i] = True
- return df
- @pytest.fixture
- def float_string_frame():
- """
- Fixture for DataFrame of floats and strings with index of unique strings
- Columns are ['A', 'B', 'C', 'D', 'foo'].
- A B C D foo
- w3orJvq07g -1.594062 -1.084273 -1.252457 0.356460 bar
- PeukuVdmz2 0.109855 -0.955086 -0.809485 0.409747 bar
- ahp2KvwiM8 -1.533729 -0.142519 -0.154666 1.302623 bar
- 3WSJ7BUCGd 2.484964 0.213829 0.034778 -2.327831 bar
- khdAmufk0U -0.193480 -0.743518 -0.077987 0.153646 bar
- LE2DZiFlrE -0.193566 -1.343194 -0.107321 0.959978 bar
- HJXSJhVn7b 0.142590 1.257603 -0.659409 -0.223844 bar
- ... ... ... ... ... ...
- 9a1Vypttgw -1.316394 1.601354 0.173596 1.213196 bar
- h5d1gVFbEy 0.609475 1.106738 -0.155271 0.294630 bar
- mK9LsTQG92 1.303613 0.857040 -1.019153 0.369468 bar
- oOLksd9gKH 0.558219 -0.134491 -0.289869 -0.951033 bar
- 9jgoOjKyHg 0.058270 -0.496110 -0.413212 -0.852659 bar
- jZLDHclHAO 0.096298 1.267510 0.549206 -0.005235 bar
- lR0nxDp1C2 -2.119350 -0.794384 0.544118 0.145849 bar
- [30 rows x 5 columns]
- """
- df = DataFrame(tm.getSeriesData())
- df["foo"] = "bar"
- return df
- @pytest.fixture
- def mixed_float_frame():
- """
- Fixture for DataFrame of different float types with index of unique strings
- Columns are ['A', 'B', 'C', 'D'].
- A B C D
- GI7bbDaEZe -0.237908 -0.246225 -0.468506 0.752993
- KGp9mFepzA -1.140809 -0.644046 -1.225586 0.801588
- VeVYLAb1l2 -1.154013 -1.677615 0.690430 -0.003731
- kmPME4WKhO 0.979578 0.998274 -0.776367 0.897607
- CPyopdXTiz 0.048119 -0.257174 0.836426 0.111266
- 0kJZQndAj0 0.274357 -0.281135 -0.344238 0.834541
- tqdwQsaHG8 -0.979716 -0.519897 0.582031 0.144710
- ... ... ... ... ...
- 7FhZTWILQj -2.906357 1.261039 -0.780273 -0.537237
- 4pUDPM4eGq -2.042512 -0.464382 -0.382080 1.132612
- B8dUgUzwTi -1.506637 -0.364435 1.087891 0.297653
- hErlVYjVv9 1.477453 -0.495515 -0.713867 1.438427
- 1BKN3o7YLs 0.127535 -0.349812 -0.881836 0.489827
- 9S4Ekn7zga 1.445518 -2.095149 0.031982 0.373204
- xN1dNn6OV6 1.425017 -0.983995 -0.363281 -0.224502
- [30 rows x 4 columns]
- """
- df = DataFrame(tm.getSeriesData())
- df.A = df.A.astype("float32")
- df.B = df.B.astype("float32")
- df.C = df.C.astype("float16")
- df.D = df.D.astype("float64")
- return df
- @pytest.fixture
- def mixed_int_frame():
- """
- Fixture for DataFrame of different int types with index of unique strings
- Columns are ['A', 'B', 'C', 'D'].
- A B C D
- mUrCZ67juP 0 1 2 2
- rw99ACYaKS 0 1 0 0
- 7QsEcpaaVU 0 1 1 1
- xkrimI2pcE 0 1 0 0
- dz01SuzoS8 0 1 255 255
- ccQkqOHX75 -1 1 0 0
- DN0iXaoDLd 0 1 0 0
- ... .. .. ... ...
- Dfb141wAaQ 1 1 254 254
- IPD8eQOVu5 0 1 0 0
- CcaKulsCmv 0 1 0 0
- rIBa8gu7E5 0 1 0 0
- RP6peZmh5o 0 1 1 1
- NMb9pipQWQ 0 1 0 0
- PqgbJEzjib 0 1 3 3
- [30 rows x 4 columns]
- """
- df = DataFrame({k: v.astype(int) for k, v in tm.getSeriesData().items()})
- df.A = df.A.astype("int32")
- df.B = np.ones(len(df.B), dtype="uint64")
- df.C = df.C.astype("uint8")
- df.D = df.C.astype("int64")
- return df
- @pytest.fixture
- def timezone_frame():
- """
- Fixture for DataFrame of date_range Series with different time zones
- Columns are ['A', 'B', 'C']; some entries are missing
- A B C
- 0 2013-01-01 2013-01-01 00:00:00-05:00 2013-01-01 00:00:00+01:00
- 1 2013-01-02 NaT NaT
- 2 2013-01-03 2013-01-03 00:00:00-05:00 2013-01-03 00:00:00+01:00
- """
- df = DataFrame(
- {
- "A": date_range("20130101", periods=3),
- "B": date_range("20130101", periods=3, tz="US/Eastern"),
- "C": date_range("20130101", periods=3, tz="CET"),
- }
- )
- df.iloc[1, 1] = NaT
- df.iloc[1, 2] = NaT
- return df
- @pytest.fixture
- def uint64_frame():
- """
- Fixture for DataFrame with uint64 values
- Columns are ['A', 'B']
- """
- return DataFrame(
- {"A": np.arange(3), "B": [2 ** 63, 2 ** 63 + 5, 2 ** 63 + 10]}, dtype=np.uint64
- )
- @pytest.fixture
- def simple_frame():
- """
- Fixture for simple 3x3 DataFrame
- Columns are ['one', 'two', 'three'], index is ['a', 'b', 'c'].
- one two three
- a 1.0 2.0 3.0
- b 4.0 5.0 6.0
- c 7.0 8.0 9.0
- """
- arr = np.array([[1.0, 2.0, 3.0], [4.0, 5.0, 6.0], [7.0, 8.0, 9.0]])
- return DataFrame(arr, columns=["one", "two", "three"], index=["a", "b", "c"])
- @pytest.fixture
- def frame_of_index_cols():
- """
- Fixture for DataFrame of columns that can be used for indexing
- Columns are ['A', 'B', 'C', 'D', 'E', ('tuple', 'as', 'label')];
- 'A' & 'B' contain duplicates (but are jointly unique), the rest are unique.
- A B C D E (tuple, as, label)
- 0 foo one a 0.608477 -0.012500 -1.664297
- 1 foo two b -0.633460 0.249614 -0.364411
- 2 foo three c 0.615256 2.154968 -0.834666
- 3 bar one d 0.234246 1.085675 0.718445
- 4 bar two e 0.533841 -0.005702 -3.533912
- """
- df = DataFrame(
- {
- "A": ["foo", "foo", "foo", "bar", "bar"],
- "B": ["one", "two", "three", "one", "two"],
- "C": ["a", "b", "c", "d", "e"],
- "D": np.random.randn(5),
- "E": np.random.randn(5),
- ("tuple", "as", "label"): np.random.randn(5),
- }
- )
- return df
|