common.py
6.0 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
import numpy as np
from pandas import Series
import pandas._testing as tm
def check_pairwise_moment(frame, dispatch, name, **kwargs):
def get_result(obj, obj2=None):
return getattr(getattr(obj, dispatch)(**kwargs), name)(obj2)
result = get_result(frame)
result = result.loc[(slice(None), 1), 5]
result.index = result.index.droplevel(1)
expected = get_result(frame[1], frame[5])
expected.index = expected.index._with_freq(None)
tm.assert_series_equal(result, expected, check_names=False)
def ew_func(A, B, com, name, **kwargs):
return getattr(A.ewm(com, **kwargs), name)(B)
def check_binary_ew(name, A, B):
result = ew_func(A=A, B=B, com=20, name=name, min_periods=5)
assert np.isnan(result.values[:14]).all()
assert not np.isnan(result.values[14:]).any()
def check_binary_ew_min_periods(name, min_periods, A, B):
# GH 7898
result = ew_func(A, B, 20, name=name, min_periods=min_periods)
# binary functions (ewmcov, ewmcorr) with bias=False require at
# least two values
assert np.isnan(result.values[:11]).all()
assert not np.isnan(result.values[11:]).any()
# check series of length 0
empty = Series([], dtype=np.float64)
result = ew_func(empty, empty, 50, name=name, min_periods=min_periods)
tm.assert_series_equal(result, empty)
# check series of length 1
result = ew_func(
Series([1.0]), Series([1.0]), 50, name=name, min_periods=min_periods
)
tm.assert_series_equal(result, Series([np.NaN]))
def moments_consistency_mock_mean(x, mean, mock_mean):
mean_x = mean(x)
# check that correlation of a series with itself is either 1 or NaN
if mock_mean:
# check that mean equals mock_mean
expected = mock_mean(x)
tm.assert_equal(mean_x, expected.astype("float64"))
def moments_consistency_is_constant(x, is_constant, min_periods, count, mean, corr):
count_x = count(x)
mean_x = mean(x)
# check that correlation of a series with itself is either 1 or NaN
corr_x_x = corr(x, x)
if is_constant:
exp = x.max() if isinstance(x, Series) else x.max().max()
# check mean of constant series
expected = x * np.nan
expected[count_x >= max(min_periods, 1)] = exp
tm.assert_equal(mean_x, expected)
# check correlation of constant series with itself is NaN
expected[:] = np.nan
tm.assert_equal(corr_x_x, expected)
def moments_consistency_var_debiasing_factors(
x, var_biased, var_unbiased, var_debiasing_factors
):
if var_unbiased and var_biased and var_debiasing_factors:
# check variance debiasing factors
var_unbiased_x = var_unbiased(x)
var_biased_x = var_biased(x)
var_debiasing_factors_x = var_debiasing_factors(x)
tm.assert_equal(var_unbiased_x, var_biased_x * var_debiasing_factors_x)
def moments_consistency_var_data(
x, is_constant, min_periods, count, mean, var_unbiased, var_biased
):
count_x = count(x)
mean_x = mean(x)
for var in [var_biased, var_unbiased]:
var_x = var(x)
assert not (var_x < 0).any().any()
if var is var_biased:
# check that biased var(x) == mean(x^2) - mean(x)^2
mean_x2 = mean(x * x)
tm.assert_equal(var_x, mean_x2 - (mean_x * mean_x))
if is_constant:
# check that variance of constant series is identically 0
assert not (var_x > 0).any().any()
expected = x * np.nan
expected[count_x >= max(min_periods, 1)] = 0.0
if var is var_unbiased:
expected[count_x < 2] = np.nan
tm.assert_equal(var_x, expected)
def moments_consistency_std_data(x, std_unbiased, var_unbiased, std_biased, var_biased):
for (std, var) in [(std_biased, var_biased), (std_unbiased, var_unbiased)]:
var_x = var(x)
std_x = std(x)
assert not (var_x < 0).any().any()
assert not (std_x < 0).any().any()
# check that var(x) == std(x)^2
tm.assert_equal(var_x, std_x * std_x)
def moments_consistency_cov_data(x, cov_unbiased, var_unbiased, cov_biased, var_biased):
for (cov, var) in [(cov_biased, var_biased), (cov_unbiased, var_unbiased)]:
var_x = var(x)
assert not (var_x < 0).any().any()
if cov:
cov_x_x = cov(x, x)
assert not (cov_x_x < 0).any().any()
# check that var(x) == cov(x, x)
tm.assert_equal(var_x, cov_x_x)
def moments_consistency_series_data(
x,
corr,
mean,
std_biased,
std_unbiased,
cov_unbiased,
var_unbiased,
var_biased,
cov_biased,
):
if isinstance(x, Series):
y = x
mean_x = mean(x)
if not x.isna().equals(y.isna()):
# can only easily test two Series with similar
# structure
pass
# check that cor(x, y) is symmetric
corr_x_y = corr(x, y)
corr_y_x = corr(y, x)
tm.assert_equal(corr_x_y, corr_y_x)
for (std, var, cov) in [
(std_biased, var_biased, cov_biased),
(std_unbiased, var_unbiased, cov_unbiased),
]:
var_x = var(x)
std_x = std(x)
if cov:
# check that cov(x, y) is symmetric
cov_x_y = cov(x, y)
cov_y_x = cov(y, x)
tm.assert_equal(cov_x_y, cov_y_x)
# check that cov(x, y) == (var(x+y) - var(x) -
# var(y)) / 2
var_x_plus_y = var(x + y)
var_y = var(y)
tm.assert_equal(cov_x_y, 0.5 * (var_x_plus_y - var_x - var_y))
# check that corr(x, y) == cov(x, y) / (std(x) *
# std(y))
std_y = std(y)
tm.assert_equal(corr_x_y, cov_x_y / (std_x * std_y))
if cov is cov_biased:
# check that biased cov(x, y) == mean(x*y) -
# mean(x)*mean(y)
mean_y = mean(y)
mean_x_times_y = mean(x * y)
tm.assert_equal(cov_x_y, mean_x_times_y - (mean_x * mean_y))