-
Notifications
You must be signed in to change notification settings - Fork 1
Expand file tree
/
Copy pathicc_estimation.py
More file actions
106 lines (64 loc) · 2.6 KB
/
icc_estimation.py
File metadata and controls
106 lines (64 loc) · 2.6 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
import pandas as pd
import numpy as np
from scipy.stats import ttest_ind
from statsmodels.regression.mixed_linear_model import MixedLM
# Simulate DMA-level sales data with a target ICC
def simulate_dma_sales(n_dmas=210, n_weeks=6, icc=0.15, mean_sales=1000,
sd_sales=300, seed=42):
np.random.seed(seed)
# Calculate between and within cluster variance
var_total = sd_sales ** 2
var_between = icc * var_total
var_within = var_total - var_between
# Generate random intercepts for each DMA
dma_effects = np.random.normal(0, np.sqrt(var_between), size=n_dmas)
# Simulate weekly sales per DMA
data = []
for dma in range(n_dmas):
for week in range(n_weeks):
y = mean_sales + dma_effects[dma] + np.random.normal(0,
np.sqrt(var_within))
data.append({‘dma’: dma, ‘week’: week, ‘sales’: y})
return pd.DataFrame(data)
# Estimate ICC using mixed-effects model (robust to imbalance)
def estimate_icc(df):
model = MixedLM.from_formula(‘sales ~ 1’, groups=’dma’, data=df)
result = model.fit()
var_between = result.cov_re.iloc[0, 0] # Random intercept variance
var_within = result.scale# Residual (within-group)
variance
icc = var_between / (var_between + var_within)
return icc
# Compare power at different durations given ICC
def simulate_power(n_weeks_list=[4, 6], icc=0.15, lift=0.05, n_sim=100):
results = []
for weeks in n_weeks_list:
significant = 0
for _ in range(n_sim):
df = simulate_dma_sales(n_weeks=weeks, icc=icc)
treated = np.random.choice(df[‘dma’].unique(), size=105,
replace=False)
df[‘group’] = df[‘dma’].apply(lambda x: ‘T’ if x in treated else
‘C’)
# Apply lift to final week in treatment group
df.loc[(df[‘group’] == ‘T’) & (df[‘week’] == weeks - 1),
‘sales’] *= (1 + lift)
# Difference-in-differences by DMA
pre = df[df[‘week’] < weeks - 1].groupby(‘dma’)[‘sales’].mean()
post = df[df[‘week’] == weeks - 1].groupby(‘dma’)[‘sales’].
mean()
did = (post - pre).reset_index().merge(df[[‘dma’, ‘group’]].
drop_duplicates(), on=’dma’)
t, p = ttest_ind(did[did[‘group’] == ‘T’][‘sales’],
did[did[‘group’] == ‘C’][‘sales’],
equal_var=False)
if p < 0.05:
significant += 1
power = significant / n_sim
results.append({‘weeks’: weeks, ‘power’: power})
return pd.DataFrame(results)
# Example usage
df = simulate_dma_sales(icc=0.18)
print(f”Estimated ICC: {estimate_icc(df):.3f}”)
power_df = simulate_power(n_weeks_list=[4, 6, 8], icc=0.18)
print(power_df)