e.g drug test on sampe samples before and after
xbar = df['diff'].mean()
s = df['diff'].std()
n = df.shape[0]
std_err = s/np.sqrt(n)
mu0 = 0.0
T_score = (xbar - mu0) / std_err
n = 10
nsided = 1
dof = n-1
p_value = scipy.stats.t.sf(np.abs(T_score), dof)*nsided # sf is survival function
# REJECT IF p-value < significance.
SE = sqrt(sigma1**2/n1 + sigma2**2/n2)
Z = (xbar - mu0) / sqrt(sigma1**2/n1 + sigma2**2/n2)
# e.g nx,ny = 10,8
# eg apple prices in NY and LA.
# pooled variance
sp**2 = (nx-1) * sx**2 + (ny-1) * sy**2 / ( nx + ny -2)
# standard error
std_err = sqrt(sp**2/nx + sp**2/ny)
# T-score
T = (xbar - ybar) / std_err
# p-value
nsided = 2
dof = nx + ny -2
p_value = scipy.stats.t.sf(np.abs(T),dof)*nsided
-
Use T-score is sample size is small or variances are not known.
-
Reject null hypothesis when T-score is bigger than 2.
-
Generally, for Z and T, a values higher than 4 is extremely significant.