๐ Growth
๊ทธ๋ก์คํดํน์์์ A/B test : ๊ทธ๋ฃน๊ฐ ์ฑ๊ณผ์งํ์ ์ ์๋ฏธํ ์ฐจ์ด ํ๋จ
fiftyline
2025. 6. 10. 15:29
๊ทธ๋ฃน(A/B)๊ฐ ์ฑ๊ณผ์งํ(CTR, CR, CPA, RPV, Bounce Rate)์ ์ฐจ์ด๊ฐ ์ ์๋ฏธํ์ง ํ๋จํ๊ธฐ ์ํ A/B test
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import statsmodels.api as sm
import statsmodels.formula.api as smf
# A/B ํ
์คํธ ๋ฐ์ดํฐ ํ๋์ฝ๋ฉ (Group ๋ฐ์ดํฐ๋ฅผ ์ง์ ์
๋ ฅ)
data = {
"Group": ["A", "A", "A", "A", "A", "B", "B", "B", "B", "B"],
"Visitors": [1000, 1200, 1100, 1150, 1300, 1050, 1250, 1400, 1350, 1280],
"Clicks": [80, 100, 85, 90, 110, 105, 120, 130, 140, 125],
"Conversions": [50, 55, 53, 60, 65, 70, 75, 80, 85, 90],
"Revenue": [5000, 5500, 5300, 6000, 6500, 7000, 7500, 8000, 8500, 9000],
"Bounce": [400, 420, 410, 430, 450, 390, 400, 420, 410, 430],
"Ad_Cost": [2500, 2600, 2700, 2800, 2900, 2400, 2550, 2700, 2850, 3000]
}
df = pd.DataFrame(data)
# Group ๋ณ์๋ฅผ ๋ฒ์ฃผํ ๋ฐ์ดํฐ๋ก ๋ณํ (FutureWarning ๋ฐฉ์ง)
df["Group"] = df["Group"].astype("category")
# ์ฑ๊ณผ ์งํ ๊ณ์ฐ
df["CTR"] = df["Clicks"] / df["Visitors"]
df["CR"] = df["Conversions"] / df["Visitors"]
df["CPA"] = df["Ad_Cost"] / df["Conversions"]
df["RPV"] = df["Revenue"] / df["Visitors"]
df["Bounce_Rate"] = df["Bounce"] / df["Visitors"]
# A/B ๊ทธ๋ฃน๋ณ ํ๊ท ๋น๊ต (observed=True ์ถ๊ฐํ์ฌ FutureWarning ๋ฐฉ์ง)
summary = df.groupby("Group", observed=True)[["CTR", "CR", "CPA", "RPV", "Bounce_Rate"]].mean()
print(summary)
# ANOVA ๋ถ์ ํจ์
def perform_anova(metric):
model = smf.ols(f"{metric} ~ C(Group)", data=df).fit() # C(Group) ์ฌ์ฉํ์ฌ ๋ฒ์ฃผํ ๋ณ์๋ก ์ฒ๋ฆฌ
anova_table = sm.stats.anova_lm(model, typ=2)
return anova_table
metrics = ["CTR", "CR", "CPA", "RPV", "Bounce_Rate"]
anova_results = {metric: perform_anova(metric) for metric in metrics}
# ANOVA ๊ฒฐ๊ณผ ์ถ๋ ฅ (iloc[0] ์ฌ์ฉํ์ฌ FutureWarning ๋ฐฉ์ง)
for metric, result in anova_results.items():
print(f"\n{metric} ANOVA ๊ฒฐ๊ณผ:\n", result)
p_value = result["PR(>F)"].iloc[0] # FutureWarning ํด๊ฒฐ
if p_value < 0.05:
print(f"{metric}์์ A/B ๊ทธ๋ฃน ๊ฐ ์ฐจ์ด๊ฐ ์ ์๋ฏธํจ (p-value: {p_value:.4f})")
else:
print(f"{metric}์์ A/B ๊ทธ๋ฃน ๊ฐ ์ฐจ์ด๊ฐ ์ ์๋ฏธํ์ง ์์ (p-value: {p_value:.4f})")
# ์ฑ๊ณผ ์งํ ์๊ฐํ
fig, axes = plt.subplots(1, 5, figsize=(20, 4))
for i, metric in enumerate(metrics):
axes[i].bar(summary.index, summary[metric], color=['blue', 'orange'])
axes[i].set_title(metric)
axes[i].set_xlabel("Group")
axes[i].set_ylabel(metric)
plt.tight_layout()
plt.show()
Group | CTR | CR | CPA | RPV | Bounce_Rate |
A | 0.080696 | 0.049238 | 47.899635 | 4.923781 | 0.368559 |
B | 0.098043 | 0.063417 | 33.779692 | 6.341700 | 0.326214 |

CTR ANOVA ๊ฒฐ๊ณผ:
sum_sq | df | F | PR(>F) | |
C(Group) | 0.000752 | 1.0 | 55.963648 | 0.000071 |
Residual | 0.000108 | 8.0 | NaN | NaN |
CTR์์ A/B ๊ทธ๋ฃน ๊ฐ ์ฐจ์ด๊ฐ ์ ์๋ฏธํจ (p-value: 0.0001)