inaz2 · November 2, 2023 02:16
diff --git a/gistfile1.txt b/gistfile1.txt
 $ python goodness_of_fit.py
 H0: the observed frequeicies follows the expected ones
 H1: the observed frequeicies does not follow the expected ones

 observed_frequencies = [0, 10, 6, 4, 5, 5]
 alpha = 0.05

           pearson: p = 0.064663, H0 is NOT rejected
    log-likelihood: p = 0.014007, H0 is rejected
     freeman-tukey: p = 0.000233, H0 is rejected
 mod-log-likelihood: p = 0.000000, H0 is rejected
            neyman: p = 0.000000, H0 is rejected
      cressie-read: p = 0.051902, H0 is NOT rejected

 Description
 pearson: Pearson’s chi-squared statistic. In this case, the function is equivalent to chisquare.
 log-likelihood: Log-likelihood ratio. Also known as the G-test.
 freeman-tukey: Freeman-Tukey statistic.
 mod-log-likelihood: Modified log-likelihood ratio.
 neyman: Neyman’s statistic.
 cressie-read: The power recommended in the paper by Cressie and Read.
diff --git a/goodness_of_fit.py b/goodness_of_fit.py
 import numpy as np
 from scipy.stats import power_divergence

 # observed frequeicies from discrete uniform distribution
 # https://biolab.sakura.ne.jp/multinomial-test.html
 observed_frequencies = [0, 10, 6, 4, 5, 5]

 # significance level
 alpha = 0.05

 print("""H0: the observed frequeicies follows the expected ones
 H1: the observed frequeicies does not follow the expected ones

 observed_frequencies = {}
 alpha = {}
 """.format(observed_frequencies, alpha))

 # replace zero to machine epsilon to avoid zero division error
 eps = np.finfo(float).eps

 for i, v in enumerate(observed_frequencies):
    if observed_frequencies[i] < 1:
        observed_frequencies[i] = eps

 # https://docs.scipy.org/doc/scipy-1.11.3/reference/generated/scipy.stats.power_divergence.html
 methods = {
    "pearson": "Pearson’s chi-squared statistic. In this case, the function is equivalent to chisquare.",
    "log-likelihood": "Log-likelihood ratio. Also known as the G-test.",
    "freeman-tukey": "Freeman-Tukey statistic.",
    "mod-log-likelihood": "Modified log-likelihood ratio.",
    "neyman": "Neyman’s statistic.",
    "cressie-read": "The power recommended in the paper by Cressie and Read.",
 }

 for k, v in methods.items():
    result = power_divergence(observed_frequencies, lambda_=k)
    s = "H0 is rejected" if result.pvalue < alpha else "H0 is NOT rejected"
    print("{:>18s}: p = {:f}, {}".format(k, result.pvalue, s))

 print()
 print("Description")

 for k, v in methods.items():
    print("{}: {}".format(k, v))
	$ python goodness_of_fit.py
	H0: the observed frequeicies follows the expected ones
	H1: the observed frequeicies does not follow the expected ones

	observed_frequencies = [0, 10, 6, 4, 5, 5]
	alpha = 0.05

	pearson: p = 0.064663, H0 is NOT rejected
	log-likelihood: p = 0.014007, H0 is rejected
	freeman-tukey: p = 0.000233, H0 is rejected
	mod-log-likelihood: p = 0.000000, H0 is rejected
	neyman: p = 0.000000, H0 is rejected
	cressie-read: p = 0.051902, H0 is NOT rejected

	Description
	pearson: Pearson’s chi-squared statistic. In this case, the function is equivalent to chisquare.
	log-likelihood: Log-likelihood ratio. Also known as the G-test.
	freeman-tukey: Freeman-Tukey statistic.
	mod-log-likelihood: Modified log-likelihood ratio.
	neyman: Neyman’s statistic.
	cressie-read: The power recommended in the paper by Cressie and Read.
	import numpy as np
	from scipy.stats import power_divergence

	# observed frequeicies from discrete uniform distribution
	# https://biolab.sakura.ne.jp/multinomial-test.html
	observed_frequencies = [0, 10, 6, 4, 5, 5]

	# significance level
	alpha = 0.05

	print("""H0: the observed frequeicies follows the expected ones
	H1: the observed frequeicies does not follow the expected ones

	observed_frequencies = {}
	alpha = {}
	""".format(observed_frequencies, alpha))

	# replace zero to machine epsilon to avoid zero division error
	eps = np.finfo(float).eps

	for i, v in enumerate(observed_frequencies):
	if observed_frequencies[i] < 1:
	observed_frequencies[i] = eps

	# https://docs.scipy.org/doc/scipy-1.11.3/reference/generated/scipy.stats.power_divergence.html
	methods = {
	"pearson": "Pearson’s chi-squared statistic. In this case, the function is equivalent to chisquare.",
	"log-likelihood": "Log-likelihood ratio. Also known as the G-test.",
	"freeman-tukey": "Freeman-Tukey statistic.",
	"mod-log-likelihood": "Modified log-likelihood ratio.",
	"neyman": "Neyman’s statistic.",
	"cressie-read": "The power recommended in the paper by Cressie and Read.",
	}

	for k, v in methods.items():
	result = power_divergence(observed_frequencies, lambda_=k)
	s = "H0 is rejected" if result.pvalue < alpha else "H0 is NOT rejected"
	print("{:>18s}: p = {:f}, {}".format(k, result.pvalue, s))

	print()
	print("Description")

	for k, v in methods.items():
	print("{}: {}".format(k, v))