conftest.py
8.6 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
from itertools import product
import numpy as np
import pytest
from pandas import DataFrame, NaT, date_range
import pandas._testing as tm
@pytest.fixture(params=product([True, False], [True, False]))
def close_open_fixture(request):
return request.param
@pytest.fixture
def float_frame_with_na():
"""
Fixture for DataFrame of floats with index of unique strings
Columns are ['A', 'B', 'C', 'D']; some entries are missing
A B C D
ABwBzA0ljw -1.128865 -0.897161 0.046603 0.274997
DJiRzmbyQF 0.728869 0.233502 0.722431 -0.890872
neMgPD5UBF 0.486072 -1.027393 -0.031553 1.449522
0yWA4n8VeX -1.937191 -1.142531 0.805215 -0.462018
3slYUbbqU1 0.153260 1.164691 1.489795 -0.545826
soujjZ0A08 NaN NaN NaN NaN
7W6NLGsjB9 NaN NaN NaN NaN
... ... ... ... ...
uhfeaNkCR1 -0.231210 -0.340472 0.244717 -0.901590
n6p7GYuBIV -0.419052 1.922721 -0.125361 -0.727717
ZhzAeY6p1y 1.234374 -1.425359 -0.827038 -0.633189
uWdPsORyUh 0.046738 -0.980445 -1.102965 0.605503
3DJA6aN590 -0.091018 -1.684734 -1.100900 0.215947
2GBPAzdbMk -2.883405 -1.021071 1.209877 1.633083
sHadBoyVHw -2.223032 -0.326384 0.258931 0.245517
[30 rows x 4 columns]
"""
df = DataFrame(tm.getSeriesData())
# set some NAs
df.iloc[5:10] = np.nan
df.iloc[15:20, -2:] = np.nan
return df
@pytest.fixture
def bool_frame_with_na():
"""
Fixture for DataFrame of booleans with index of unique strings
Columns are ['A', 'B', 'C', 'D']; some entries are missing
A B C D
zBZxY2IDGd False False False False
IhBWBMWllt False True True True
ctjdvZSR6R True False True True
AVTujptmxb False True False True
G9lrImrSWq False False False True
sFFwdIUfz2 NaN NaN NaN NaN
s15ptEJnRb NaN NaN NaN NaN
... ... ... ... ...
UW41KkDyZ4 True True False False
l9l6XkOdqV True False False False
X2MeZfzDYA False True False False
xWkIKU7vfX False True False True
QOhL6VmpGU False False False True
22PwkRJdat False True False False
kfboQ3VeIK True False True False
[30 rows x 4 columns]
"""
df = DataFrame(tm.getSeriesData()) > 0
df = df.astype(object)
# set some NAs
df.iloc[5:10] = np.nan
df.iloc[15:20, -2:] = np.nan
return df
@pytest.fixture
def float_string_frame():
"""
Fixture for DataFrame of floats and strings with index of unique strings
Columns are ['A', 'B', 'C', 'D', 'foo'].
A B C D foo
w3orJvq07g -1.594062 -1.084273 -1.252457 0.356460 bar
PeukuVdmz2 0.109855 -0.955086 -0.809485 0.409747 bar
ahp2KvwiM8 -1.533729 -0.142519 -0.154666 1.302623 bar
3WSJ7BUCGd 2.484964 0.213829 0.034778 -2.327831 bar
khdAmufk0U -0.193480 -0.743518 -0.077987 0.153646 bar
LE2DZiFlrE -0.193566 -1.343194 -0.107321 0.959978 bar
HJXSJhVn7b 0.142590 1.257603 -0.659409 -0.223844 bar
... ... ... ... ... ...
9a1Vypttgw -1.316394 1.601354 0.173596 1.213196 bar
h5d1gVFbEy 0.609475 1.106738 -0.155271 0.294630 bar
mK9LsTQG92 1.303613 0.857040 -1.019153 0.369468 bar
oOLksd9gKH 0.558219 -0.134491 -0.289869 -0.951033 bar
9jgoOjKyHg 0.058270 -0.496110 -0.413212 -0.852659 bar
jZLDHclHAO 0.096298 1.267510 0.549206 -0.005235 bar
lR0nxDp1C2 -2.119350 -0.794384 0.544118 0.145849 bar
[30 rows x 5 columns]
"""
df = DataFrame(tm.getSeriesData())
df["foo"] = "bar"
return df
@pytest.fixture
def mixed_float_frame():
"""
Fixture for DataFrame of different float types with index of unique strings
Columns are ['A', 'B', 'C', 'D'].
A B C D
GI7bbDaEZe -0.237908 -0.246225 -0.468506 0.752993
KGp9mFepzA -1.140809 -0.644046 -1.225586 0.801588
VeVYLAb1l2 -1.154013 -1.677615 0.690430 -0.003731
kmPME4WKhO 0.979578 0.998274 -0.776367 0.897607
CPyopdXTiz 0.048119 -0.257174 0.836426 0.111266
0kJZQndAj0 0.274357 -0.281135 -0.344238 0.834541
tqdwQsaHG8 -0.979716 -0.519897 0.582031 0.144710
... ... ... ... ...
7FhZTWILQj -2.906357 1.261039 -0.780273 -0.537237
4pUDPM4eGq -2.042512 -0.464382 -0.382080 1.132612
B8dUgUzwTi -1.506637 -0.364435 1.087891 0.297653
hErlVYjVv9 1.477453 -0.495515 -0.713867 1.438427
1BKN3o7YLs 0.127535 -0.349812 -0.881836 0.489827
9S4Ekn7zga 1.445518 -2.095149 0.031982 0.373204
xN1dNn6OV6 1.425017 -0.983995 -0.363281 -0.224502
[30 rows x 4 columns]
"""
df = DataFrame(tm.getSeriesData())
df.A = df.A.astype("float32")
df.B = df.B.astype("float32")
df.C = df.C.astype("float16")
df.D = df.D.astype("float64")
return df
@pytest.fixture
def mixed_int_frame():
"""
Fixture for DataFrame of different int types with index of unique strings
Columns are ['A', 'B', 'C', 'D'].
A B C D
mUrCZ67juP 0 1 2 2
rw99ACYaKS 0 1 0 0
7QsEcpaaVU 0 1 1 1
xkrimI2pcE 0 1 0 0
dz01SuzoS8 0 1 255 255
ccQkqOHX75 -1 1 0 0
DN0iXaoDLd 0 1 0 0
... .. .. ... ...
Dfb141wAaQ 1 1 254 254
IPD8eQOVu5 0 1 0 0
CcaKulsCmv 0 1 0 0
rIBa8gu7E5 0 1 0 0
RP6peZmh5o 0 1 1 1
NMb9pipQWQ 0 1 0 0
PqgbJEzjib 0 1 3 3
[30 rows x 4 columns]
"""
df = DataFrame({k: v.astype(int) for k, v in tm.getSeriesData().items()})
df.A = df.A.astype("int32")
df.B = np.ones(len(df.B), dtype="uint64")
df.C = df.C.astype("uint8")
df.D = df.C.astype("int64")
return df
@pytest.fixture
def mixed_type_frame():
"""
Fixture for DataFrame of float/int/string columns with RangeIndex
Columns are ['a', 'b', 'c', 'float32', 'int32'].
"""
return DataFrame(
{
"a": 1.0,
"b": 2,
"c": "foo",
"float32": np.array([1.0] * 10, dtype="float32"),
"int32": np.array([1] * 10, dtype="int32"),
},
index=np.arange(10),
)
@pytest.fixture
def timezone_frame():
"""
Fixture for DataFrame of date_range Series with different time zones
Columns are ['A', 'B', 'C']; some entries are missing
A B C
0 2013-01-01 2013-01-01 00:00:00-05:00 2013-01-01 00:00:00+01:00
1 2013-01-02 NaT NaT
2 2013-01-03 2013-01-03 00:00:00-05:00 2013-01-03 00:00:00+01:00
"""
df = DataFrame(
{
"A": date_range("20130101", periods=3),
"B": date_range("20130101", periods=3, tz="US/Eastern"),
"C": date_range("20130101", periods=3, tz="CET"),
}
)
df.iloc[1, 1] = NaT
df.iloc[1, 2] = NaT
return df
@pytest.fixture
def uint64_frame():
"""
Fixture for DataFrame with uint64 values
Columns are ['A', 'B']
"""
return DataFrame(
{"A": np.arange(3), "B": [2 ** 63, 2 ** 63 + 5, 2 ** 63 + 10]}, dtype=np.uint64
)
@pytest.fixture
def simple_frame():
"""
Fixture for simple 3x3 DataFrame
Columns are ['one', 'two', 'three'], index is ['a', 'b', 'c'].
one two three
a 1.0 2.0 3.0
b 4.0 5.0 6.0
c 7.0 8.0 9.0
"""
arr = np.array([[1.0, 2.0, 3.0], [4.0, 5.0, 6.0], [7.0, 8.0, 9.0]])
return DataFrame(arr, columns=["one", "two", "three"], index=["a", "b", "c"])
@pytest.fixture
def frame_of_index_cols():
"""
Fixture for DataFrame of columns that can be used for indexing
Columns are ['A', 'B', 'C', 'D', 'E', ('tuple', 'as', 'label')];
'A' & 'B' contain duplicates (but are jointly unique), the rest are unique.
A B C D E (tuple, as, label)
0 foo one a 0.608477 -0.012500 -1.664297
1 foo two b -0.633460 0.249614 -0.364411
2 foo three c 0.615256 2.154968 -0.834666
3 bar one d 0.234246 1.085675 0.718445
4 bar two e 0.533841 -0.005702 -3.533912
"""
df = DataFrame(
{
"A": ["foo", "foo", "foo", "bar", "bar"],
"B": ["one", "two", "three", "one", "two"],
"C": ["a", "b", "c", "d", "e"],
"D": np.random.randn(5),
"E": np.random.randn(5),
("tuple", "as", "label"): np.random.randn(5),
}
)
return df