Created
February 19, 2019 10:50
-
-
Save y-mitsui/8be3508746eefc192ed332edcfc049fb to your computer and use it in GitHub Desktop.
G検定による独立性の検定
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| """ | |
| G検定による独立性の検定 | |
| """ | |
| import numpy as np | |
| from scipy.stats import chisqprob, chisquare | |
| def gtest(f_obs, f_exp=None, ddof=0): | |
| """ | |
| http://en.wikipedia.org/wiki/G-test | |
| The G test can test for goodness of fit to a distribution | |
| Parameters | |
| ---------- | |
| f_obs : array | |
| observed frequencies in each category | |
| f_exp : array, optional | |
| expected frequencies in each category. By default the categories are | |
| assumed to be equally likely. | |
| ddof : int, optional | |
| adjustment to the degrees of freedom for the p-value | |
| Returns | |
| ------- | |
| chisquare statistic : float | |
| The chisquare test statistic | |
| p : float | |
| The p-value of the test. | |
| Notes | |
| ----- | |
| The p-value indicates the probability that the observed distribution is | |
| drawn from a distribution given frequencies in expected. | |
| So a low p-value inidcates the distributions are different. | |
| Examples | |
| -------- | |
| >>> gtest([9.0, 8.1, 2, 1, 0.1, 20.0], [10, 5.01, 6, 4, 2, 1]) | |
| (117.94955444335938, 8.5298516190930345e-24) | |
| >>> gtest([1.01, 1.01, 4.01], [1.00, 1.00, 4.00]) | |
| (0.060224734246730804, 0.97033649350189344) | |
| >>> gtest([2, 1, 6], [4, 3, 2]) | |
| (8.2135343551635742, 0.016460903780063787) | |
| References | |
| ---------- | |
| http://en.wikipedia.org/wiki/G-test | |
| """ | |
| f_obs = np.asarray(f_obs, 'f') | |
| k = f_obs.shape[0] | |
| f_exp = np.array([np.sum(f_obs, axis=0) / float(k)] * k, 'f') \ | |
| if f_exp is None \ | |
| else np.asarray(f_exp, 'f') | |
| g = 2 * np.add.reduce(f_obs * np.log(f_obs / f_exp)) | |
| return g, chisqprob(g, k - 1 - ddof) | |
| def makeTable(n_sample1, n_sample2, true_prob1, true_prob2): | |
| sample_1 = np.random.multinomial(1, true_prob1, size=n_sample1) | |
| sample_2 = np.random.multinomial(1, true_prob2, size=n_sample2) | |
| return np.array([np.sum(sample_1, 0), np.sum(sample_2, 0)]) | |
| def expectedFreq(tab): | |
| tot_col = np.sum(tab, 0) | |
| tot_row = np.sum(tab, 1) | |
| expe = [] | |
| for i in range(tab.shape[0]): | |
| row = [] | |
| for j in range(tab.shape[1]): | |
| row.append((tot_col[j] * tot_row[i]) / np.sum(tab)) | |
| expe.append(row) | |
| return np.array(expe) | |
| sample_tab = makeTable(40, 40, [0.4, 0.6], [0.5, 0.5]) | |
| expe_tab = expectedFreq(sample_tab) | |
| print(gtest(sample_tab.reshape(-1,), expe_tab.reshape(-1,))) | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment