Skip to content

Instantly share code, notes, and snippets.

@gramnation
Last active November 20, 2023 13:10
Show Gist options
  • Select an option

  • Save gramnation/f51cff7dd95fcc3021d2615f2cee678f to your computer and use it in GitHub Desktop.

Select an option

Save gramnation/f51cff7dd95fcc3021d2615f2cee678f to your computer and use it in GitHub Desktop.
python helper functions for AP stats
# -*- coding: utf-8 -*-
"""
Functions to solve AP stats problems.
"""
import math # python built-in
from statistics import NormalDist # python built-in
import scipy.stats as stats # scipy provides T distribution
def binompdf(n, p, k):
"""
TI-84 binompdf
n: number of trials
p: probability of success
k: number of successes
"""
return math.comb(n, k) * math.pow(p, k) * math.pow(1-p, n-k)
def binomcdf(n, p, k):
"""
TI-84 binomcdf
n: number of trials
p: probability of success
k: max number of successes
"""
return math.fsum(binompdf(n, p, ke) for ke in range(k+1))
def geometpdf(p, x):
"""
TI-84 geometpdf
p: probability of success
x: number of first success
"""
return (1-p)**(x-1) * p
def geometcdf(p, x):
"""
TI-84 geometcdf
p: probability of success
x: number of attempts
"""
return 1 - (1-p)**x
def normalcdf(stddevs):
"""
Like TI-84 normalcdf, but
only accepts upper bound.
"""
return NormalDist().cdf(stddevs)
def ztable(stddevs):
"""
Return student Z table value, aka cumulative density function (cdf) value.
stddevs: number of standard deviations above or below population mean.
"""
return normalcdf(stddevs)
def binomial_mean_stddev(p, n):
return [n*p, math.sqrt(n*p*(1-p))]
def geometric_mean_stddev(p):
return [1/p, math.sqrt(1-p)/p]
def z_of_confidence_interval(c):
'''
Returns z value of confidence interval c
'''
return -NormalDist().inv_cdf((1-c)/2)
def confidence_interval_of_z(z):
'''
Returns confidence interval of z value
'''
return ztable(z) - ztable(-z)
def smallest_sample_n_for_z(c, e, p=.5):
'''
Returns smallest sample size required to obtain
desired margin of error.
c: confidence interval, 90% would be .9
e: margin of error, 4% would be .04
p: expected probability of success (.5 is default since it maximizes SE)
'''
z = z_of_confidence_interval(c)
n = (p*(1-p))/((e/z)**2)
return n
def confidence_interval(c, s1, n1, s2=0.0, n2=1.0):
'''
Confidence interval of sampled proportion(s).
Pass s2 and n2 in order to compute interval of
difference between two proportions.
'''
z = z_of_confidence_interval(c)
p1 = s1/n1
p2 = s2/n2
std = math.sqrt((p1*(1-p1))/n1 + (p2*(1-p2))/n2)
diff = p1-p2
return f'({round(diff-z*std,3)}, {round(diff+z*std,3)}), or {round(diff,3)} +/- {round(z*std,3)}'
def z_stat_two_proportions(s1, n1, s2, n2):
'''
Z statistic for difference of sample proportions
'''
p1 = s1/n1
p2 = s2/n2
pc = (s1+s2)/(n1+n2)
z = (p1-p2)/math.sqrt((pc*(1-pc)/n1)+(pc*(1-pc)/n2))
return z
def hypothesis_test_gt(s1, n1, s2, n2):
z = z_stat_two_proportions(s1, n1, s2, n2)
pval = 1-ztable(z)
return {'z':z, 'pval': pval}
def hypothesis_test_ne(s1, n1, s2, n2):
z = z_stat_two_proportions(s1, n1, s2, n2)
pval = 1-confidence_interval_of_z(z)
return {'z':z, 'pval': pval}
def tvalue(ci, df):
'''
Get T Table value
ci: confidence interval, 96% would be .96
df: degrees of freedom, sample size - 1
'''
q = ci + (1-ci)/2 # q: critical level
return stats.t.ppf(q=q, df=df)
def confidence_interval_paired_data(ci, mu, n, s):
'''
ci: confidence interval
mu: mean of differences between data pairs
n: number of trials
s: standard deviation of differences
'''
t = tvalue(ci, n-1)
err = t*(s/math.sqrt(n))
return {'interval': f'{mu}+/-{err}', 'range': f'{mu-err},{mu+err}'}
def mean_normalized_interval(mean, min, max):
'''
Machine learning (feature scaling)
'''
delta = max-min
return [(min-mean)/delta, (max-mean)/delta]
def z_normalized_interval(stdev, mean, min, max):
'''
Machine learning (feature scaling)
'''
return [(min-mean)/stdev, (max-mean)/stdev]
def main():
print('ZTable value @ 1.53: ', ztable(1.53))
print('binompdf(7, .35, 4): ', binompdf(7, .35, 4))
print('eg smallest sample size:', smallest_sample_n_for_z(.99, .03, .06))
if __name__ == "__main__":
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment