Last active
November 20, 2023 13:10
-
-
Save gramnation/f51cff7dd95fcc3021d2615f2cee678f to your computer and use it in GitHub Desktop.
python helper functions for AP stats
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| # -*- coding: utf-8 -*- | |
| """ | |
| Functions to solve AP stats problems. | |
| """ | |
| import math # python built-in | |
| from statistics import NormalDist # python built-in | |
| import scipy.stats as stats # scipy provides T distribution | |
| def binompdf(n, p, k): | |
| """ | |
| TI-84 binompdf | |
| n: number of trials | |
| p: probability of success | |
| k: number of successes | |
| """ | |
| return math.comb(n, k) * math.pow(p, k) * math.pow(1-p, n-k) | |
| def binomcdf(n, p, k): | |
| """ | |
| TI-84 binomcdf | |
| n: number of trials | |
| p: probability of success | |
| k: max number of successes | |
| """ | |
| return math.fsum(binompdf(n, p, ke) for ke in range(k+1)) | |
| def geometpdf(p, x): | |
| """ | |
| TI-84 geometpdf | |
| p: probability of success | |
| x: number of first success | |
| """ | |
| return (1-p)**(x-1) * p | |
| def geometcdf(p, x): | |
| """ | |
| TI-84 geometcdf | |
| p: probability of success | |
| x: number of attempts | |
| """ | |
| return 1 - (1-p)**x | |
| def normalcdf(stddevs): | |
| """ | |
| Like TI-84 normalcdf, but | |
| only accepts upper bound. | |
| """ | |
| return NormalDist().cdf(stddevs) | |
| def ztable(stddevs): | |
| """ | |
| Return student Z table value, aka cumulative density function (cdf) value. | |
| stddevs: number of standard deviations above or below population mean. | |
| """ | |
| return normalcdf(stddevs) | |
| def binomial_mean_stddev(p, n): | |
| return [n*p, math.sqrt(n*p*(1-p))] | |
| def geometric_mean_stddev(p): | |
| return [1/p, math.sqrt(1-p)/p] | |
| def z_of_confidence_interval(c): | |
| ''' | |
| Returns z value of confidence interval c | |
| ''' | |
| return -NormalDist().inv_cdf((1-c)/2) | |
| def confidence_interval_of_z(z): | |
| ''' | |
| Returns confidence interval of z value | |
| ''' | |
| return ztable(z) - ztable(-z) | |
| def smallest_sample_n_for_z(c, e, p=.5): | |
| ''' | |
| Returns smallest sample size required to obtain | |
| desired margin of error. | |
| c: confidence interval, 90% would be .9 | |
| e: margin of error, 4% would be .04 | |
| p: expected probability of success (.5 is default since it maximizes SE) | |
| ''' | |
| z = z_of_confidence_interval(c) | |
| n = (p*(1-p))/((e/z)**2) | |
| return n | |
| def confidence_interval(c, s1, n1, s2=0.0, n2=1.0): | |
| ''' | |
| Confidence interval of sampled proportion(s). | |
| Pass s2 and n2 in order to compute interval of | |
| difference between two proportions. | |
| ''' | |
| z = z_of_confidence_interval(c) | |
| p1 = s1/n1 | |
| p2 = s2/n2 | |
| std = math.sqrt((p1*(1-p1))/n1 + (p2*(1-p2))/n2) | |
| diff = p1-p2 | |
| return f'({round(diff-z*std,3)}, {round(diff+z*std,3)}), or {round(diff,3)} +/- {round(z*std,3)}' | |
| def z_stat_two_proportions(s1, n1, s2, n2): | |
| ''' | |
| Z statistic for difference of sample proportions | |
| ''' | |
| p1 = s1/n1 | |
| p2 = s2/n2 | |
| pc = (s1+s2)/(n1+n2) | |
| z = (p1-p2)/math.sqrt((pc*(1-pc)/n1)+(pc*(1-pc)/n2)) | |
| return z | |
| def hypothesis_test_gt(s1, n1, s2, n2): | |
| z = z_stat_two_proportions(s1, n1, s2, n2) | |
| pval = 1-ztable(z) | |
| return {'z':z, 'pval': pval} | |
| def hypothesis_test_ne(s1, n1, s2, n2): | |
| z = z_stat_two_proportions(s1, n1, s2, n2) | |
| pval = 1-confidence_interval_of_z(z) | |
| return {'z':z, 'pval': pval} | |
| def tvalue(ci, df): | |
| ''' | |
| Get T Table value | |
| ci: confidence interval, 96% would be .96 | |
| df: degrees of freedom, sample size - 1 | |
| ''' | |
| q = ci + (1-ci)/2 # q: critical level | |
| return stats.t.ppf(q=q, df=df) | |
| def confidence_interval_paired_data(ci, mu, n, s): | |
| ''' | |
| ci: confidence interval | |
| mu: mean of differences between data pairs | |
| n: number of trials | |
| s: standard deviation of differences | |
| ''' | |
| t = tvalue(ci, n-1) | |
| err = t*(s/math.sqrt(n)) | |
| return {'interval': f'{mu}+/-{err}', 'range': f'{mu-err},{mu+err}'} | |
| def mean_normalized_interval(mean, min, max): | |
| ''' | |
| Machine learning (feature scaling) | |
| ''' | |
| delta = max-min | |
| return [(min-mean)/delta, (max-mean)/delta] | |
| def z_normalized_interval(stdev, mean, min, max): | |
| ''' | |
| Machine learning (feature scaling) | |
| ''' | |
| return [(min-mean)/stdev, (max-mean)/stdev] | |
| def main(): | |
| print('ZTable value @ 1.53: ', ztable(1.53)) | |
| print('binompdf(7, .35, 4): ', binompdf(7, .35, 4)) | |
| print('eg smallest sample size:', smallest_sample_n_for_z(.99, .03, .06)) | |
| if __name__ == "__main__": | |
| main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment