gramnation/stews_stats_tools.py

## stews_stats_tools.py
# -*- coding: utf-8 -*-
"""
Functions to solve AP stats problems.
"""

import math                        # python built-in
from statistics import NormalDist  # python built-in
import scipy.stats as stats        # scipy provides T distribution

def binompdf(n, p, k):
    """
    TI-84 binompdf
    n: number of trials
    p: probability of success
    k: number of successes
    """
    return math.comb(n, k) * math.pow(p, k) * math.pow(1-p, n-k)

def binomcdf(n, p, k):
    """
    TI-84 binomcdf
    n: number of trials
    p: probability of success
    k: max number of successes
    """
    return math.fsum(binompdf(n, p, ke) for ke in range(k+1))

def geometpdf(p, x):
    """
    TI-84 geometpdf
    p: probability of success
    x: number of first success
    """
    return (1-p)**(x-1) * p

def geometcdf(p, x):
    """
    TI-84 geometcdf
    p: probability of success
    x: number of attempts
    """
    return 1 - (1-p)**x

def normalcdf(stddevs):
    """
    Like TI-84 normalcdf, but
    only accepts upper bound.
    """
    return NormalDist().cdf(stddevs)

def ztable(stddevs):
    """
    Return student Z table value, aka cumulative density function (cdf) value.
    stddevs: number of standard deviations above or below population mean.
    """
    return normalcdf(stddevs)

def binomial_mean_stddev(p, n):
    return [n*p, math.sqrt(n*p*(1-p))]

def geometric_mean_stddev(p):
    return [1/p, math.sqrt(1-p)/p]

def z_of_confidence_interval(c):
    '''
    Returns z value of confidence interval c
    '''
    return -NormalDist().inv_cdf((1-c)/2)

def confidence_interval_of_z(z):
    '''
    Returns confidence interval of z value
    '''
    return ztable(z) - ztable(-z)

def smallest_sample_n_for_z(c, e, p=.5):
    '''
    Returns smallest sample size required to obtain
    desired margin of error.
    c: confidence interval, 90% would be .9
    e: margin of error, 4% would be .04
    p: expected probability of success (.5 is default since it maximizes SE)
    '''
    z = z_of_confidence_interval(c)
    n = (p*(1-p))/((e/z)**2)
    return n

def confidence_interval(c, s1, n1, s2=0.0, n2=1.0):
    '''
    Confidence interval of sampled proportion(s).
    Pass s2 and n2 in order to compute interval of
    difference between two proportions.
    '''
    z = z_of_confidence_interval(c)
    p1 = s1/n1
    p2 = s2/n2
    std = math.sqrt((p1*(1-p1))/n1 + (p2*(1-p2))/n2)
    diff = p1-p2
    return f'({round(diff-z*std,3)}, {round(diff+z*std,3)}), or {round(diff,3)} +/- {round(z*std,3)}'

def z_stat_two_proportions(s1, n1, s2, n2):
    '''
    Z statistic for difference of sample proportions
    '''
    p1 = s1/n1
    p2 = s2/n2
    pc = (s1+s2)/(n1+n2)
    z = (p1-p2)/math.sqrt((pc*(1-pc)/n1)+(pc*(1-pc)/n2))
    return z

def hypothesis_test_gt(s1, n1, s2, n2):
    z = z_stat_two_proportions(s1, n1, s2, n2)
    pval = 1-ztable(z)
    return {'z':z, 'pval': pval}

def hypothesis_test_ne(s1, n1, s2, n2):
    z = z_stat_two_proportions(s1, n1, s2, n2)
    pval = 1-confidence_interval_of_z(z)
    return {'z':z, 'pval': pval}

def tvalue(ci, df):
    '''
    Get T Table value
    ci: confidence interval, 96% would be .96
    df: degrees of freedom, sample size - 1
    '''
    q = ci + (1-ci)/2  # q: critical level
    return stats.t.ppf(q=q, df=df)

def confidence_interval_paired_data(ci, mu, n, s):
    '''
    ci: confidence interval
    mu: mean of differences between data pairs
    n: number of trials
    s: standard deviation of differences
    '''
    t = tvalue(ci, n-1)
    err = t*(s/math.sqrt(n))
    return {'interval': f'{mu}+/-{err}', 'range': f'{mu-err},{mu+err}'}

def mean_normalized_interval(mean, min, max):
    '''
    Machine learning (feature scaling)
    '''
    delta = max-min
    return [(min-mean)/delta, (max-mean)/delta]

def z_normalized_interval(stdev, mean, min, max):
    '''
    Machine learning (feature scaling)
    '''
    return [(min-mean)/stdev, (max-mean)/stdev]


def main():
    print('ZTable value @ 1.53: ', ztable(1.53))
    print('binompdf(7, .35, 4): ', binompdf(7, .35, 4))
    print('eg smallest sample size:', smallest_sample_n_for_z(.99, .03, .06))

if __name__ == "__main__":
    main()
	# -- coding: utf-8 --
	"""
	Functions to solve AP stats problems.
	"""

	import math # python built-in
	from statistics import NormalDist # python built-in
	import scipy.stats as stats # scipy provides T distribution

	def binompdf(n, p, k):
	"""
	TI-84 binompdf
	n: number of trials
	p: probability of success
	k: number of successes
	"""
	return math.comb(n, k) * math.pow(p, k) * math.pow(1-p, n-k)

	def binomcdf(n, p, k):
	"""
	TI-84 binomcdf
	n: number of trials
	p: probability of success
	k: max number of successes
	"""
	return math.fsum(binompdf(n, p, ke) for ke in range(k+1))

	def geometpdf(p, x):
	"""
	TI-84 geometpdf
	p: probability of success
	x: number of first success
	"""
	return (1-p)*(x-1) p

	def geometcdf(p, x):
	"""
	TI-84 geometcdf
	p: probability of success
	x: number of attempts
	"""
	return 1 - (1-p)**x

	def normalcdf(stddevs):
	"""
	Like TI-84 normalcdf, but
	only accepts upper bound.
	"""
	return NormalDist().cdf(stddevs)

	def ztable(stddevs):
	"""
	Return student Z table value, aka cumulative density function (cdf) value.
	stddevs: number of standard deviations above or below population mean.
	"""
	return normalcdf(stddevs)

	def binomial_mean_stddev(p, n):
	return [np, math.sqrt(np*(1-p))]

	def geometric_mean_stddev(p):
	return [1/p, math.sqrt(1-p)/p]

	def z_of_confidence_interval(c):
	'''
	Returns z value of confidence interval c
	'''
	return -NormalDist().inv_cdf((1-c)/2)

	def confidence_interval_of_z(z):
	'''
	Returns confidence interval of z value
	'''
	return ztable(z) - ztable(-z)

	def smallest_sample_n_for_z(c, e, p=.5):
	'''
	Returns smallest sample size required to obtain
	desired margin of error.
	c: confidence interval, 90% would be .9
	e: margin of error, 4% would be .04
	p: expected probability of success (.5 is default since it maximizes SE)
	'''
	z = z_of_confidence_interval(c)
	n = (p(1-p))/((e/z)*2)
	return n

	def confidence_interval(c, s1, n1, s2=0.0, n2=1.0):
	'''
	Confidence interval of sampled proportion(s).
	Pass s2 and n2 in order to compute interval of
	difference between two proportions.
	'''
	z = z_of_confidence_interval(c)
	p1 = s1/n1
	p2 = s2/n2
	std = math.sqrt((p1(1-p1))/n1 + (p2(1-p2))/n2)
	diff = p1-p2
	return f'({round(diff-zstd,3)}, {round(diff+zstd,3)}), or {round(diff,3)} +/- {round(z*std,3)}'

	def z_stat_two_proportions(s1, n1, s2, n2):
	'''
	Z statistic for difference of sample proportions
	'''
	p1 = s1/n1
	p2 = s2/n2
	pc = (s1+s2)/(n1+n2)
	z = (p1-p2)/math.sqrt((pc(1-pc)/n1)+(pc(1-pc)/n2))
	return z

	def hypothesis_test_gt(s1, n1, s2, n2):
	z = z_stat_two_proportions(s1, n1, s2, n2)
	pval = 1-ztable(z)
	return {'z':z, 'pval': pval}

	def hypothesis_test_ne(s1, n1, s2, n2):
	z = z_stat_two_proportions(s1, n1, s2, n2)
	pval = 1-confidence_interval_of_z(z)
	return {'z':z, 'pval': pval}

	def tvalue(ci, df):
	'''
	Get T Table value
	ci: confidence interval, 96% would be .96
	df: degrees of freedom, sample size - 1
	'''
	q = ci + (1-ci)/2 # q: critical level
	return stats.t.ppf(q=q, df=df)

	def confidence_interval_paired_data(ci, mu, n, s):
	'''
	ci: confidence interval
	mu: mean of differences between data pairs
	n: number of trials
	s: standard deviation of differences
	'''
	t = tvalue(ci, n-1)
	err = t*(s/math.sqrt(n))
	return {'interval': f'{mu}+/-{err}', 'range': f'{mu-err},{mu+err}'}

	def mean_normalized_interval(mean, min, max):
	'''
	Machine learning (feature scaling)
	'''
	delta = max-min
	return [(min-mean)/delta, (max-mean)/delta]

	def z_normalized_interval(stdev, mean, min, max):
	'''
	Machine learning (feature scaling)
	'''
	return [(min-mean)/stdev, (max-mean)/stdev]


	def main():
	print('ZTable value @ 1.53: ', ztable(1.53))
	print('binompdf(7, .35, 4): ', binompdf(7, .35, 4))
	print('eg smallest sample size:', smallest_sample_n_for_z(.99, .03, .06))

	if __name__ == "__main__":
	main()
No results found