Skip to content

Instantly share code, notes, and snippets.

@y-mitsui
Created February 19, 2019 10:50
Show Gist options
  • Select an option

  • Save y-mitsui/8be3508746eefc192ed332edcfc049fb to your computer and use it in GitHub Desktop.

Select an option

Save y-mitsui/8be3508746eefc192ed332edcfc049fb to your computer and use it in GitHub Desktop.
G検定による独立性の検定
"""
G検定による独立性の検定
"""
import numpy as np
from scipy.stats import chisqprob, chisquare
def gtest(f_obs, f_exp=None, ddof=0):
"""
http://en.wikipedia.org/wiki/G-test
The G test can test for goodness of fit to a distribution
Parameters
----------
f_obs : array
observed frequencies in each category
f_exp : array, optional
expected frequencies in each category. By default the categories are
assumed to be equally likely.
ddof : int, optional
adjustment to the degrees of freedom for the p-value
Returns
-------
chisquare statistic : float
The chisquare test statistic
p : float
The p-value of the test.
Notes
-----
The p-value indicates the probability that the observed distribution is
drawn from a distribution given frequencies in expected.
So a low p-value inidcates the distributions are different.
Examples
--------
>>> gtest([9.0, 8.1, 2, 1, 0.1, 20.0], [10, 5.01, 6, 4, 2, 1])
(117.94955444335938, 8.5298516190930345e-24)
>>> gtest([1.01, 1.01, 4.01], [1.00, 1.00, 4.00])
(0.060224734246730804, 0.97033649350189344)
>>> gtest([2, 1, 6], [4, 3, 2])
(8.2135343551635742, 0.016460903780063787)
References
----------
http://en.wikipedia.org/wiki/G-test
"""
f_obs = np.asarray(f_obs, 'f')
k = f_obs.shape[0]
f_exp = np.array([np.sum(f_obs, axis=0) / float(k)] * k, 'f') \
if f_exp is None \
else np.asarray(f_exp, 'f')
g = 2 * np.add.reduce(f_obs * np.log(f_obs / f_exp))
return g, chisqprob(g, k - 1 - ddof)
def makeTable(n_sample1, n_sample2, true_prob1, true_prob2):
sample_1 = np.random.multinomial(1, true_prob1, size=n_sample1)
sample_2 = np.random.multinomial(1, true_prob2, size=n_sample2)
return np.array([np.sum(sample_1, 0), np.sum(sample_2, 0)])
def expectedFreq(tab):
tot_col = np.sum(tab, 0)
tot_row = np.sum(tab, 1)
expe = []
for i in range(tab.shape[0]):
row = []
for j in range(tab.shape[1]):
row.append((tot_col[j] * tot_row[i]) / np.sum(tab))
expe.append(row)
return np.array(expe)
sample_tab = makeTable(40, 40, [0.4, 0.6], [0.5, 0.5])
expe_tab = expectedFreq(sample_tab)
print(gtest(sample_tab.reshape(-1,), expe_tab.reshape(-1,)))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment