diamonaj

## CS130_voting.R
#####################################
#####################################
#####################################

# Run a logistic regression and generate expected values of turnout, holding some predictors constant while others vary.


df <- read.csv("https://docs.google.com/spreadsheets/d/e/2PACX-1vSjMB6H07nhXmgdARkAMkUVWt7QPlzdD-RrDor2g_BSL9vil8V4efJ-iO-nQtZuqPE_klZPi6qNJ9Pw/pub?gid=921072292&single=true&output=csv")

glm2 <- glm(turnout ~ ., data = df, family = binomial)

## loess.R

## Step 1: Load the Training Data

#Download and inspect the training set using `read.csv()` from the URL provided:

# **Training data link:**

training <- read.csv("https://docs.google.com/spreadsheets/d/e/2PACX-1vSUROPfTOZfUEpf6Ebby-vta5zWCwt9KK-KAwSvpToGQjQSKdhYsUfoHxYxvbOYxW8_IQxBD9FqWFJg/pub?gid=383144413&single=true&output=csv")
# View the first few rows using head()
head(training)

## gist:3bd7615a96d95bf75e7e638cae58d0bd
---
title: "CS130 Causal Inference Assignment Spring 2024"
output: html_document
date: "2024-04-01"
---

```{r setup, include=FALSE}
knitr::opts_chunk$set(echo = TRUE)
```

## loess_lm.R
rm(list=ls())
training <- read.csv("https://docs.google.com/spreadsheets/d/e/2PACX-1vSUROPfTOZfUEpf6Ebby-vta5zWCwt9KK-KAwSvpToGQjQSKdhYsUfoHxYxvbOYxW8_IQxBD9FqWFJg/pub?gid=383144413&single=true&output=csv")

head(training)

# plot the data with big green dots
plot(training$x, training$y, main = "Training Data", pch = 16, cex = 3, col = "green")

################################################
#### RUN 3 DIFFERENT MODELS ON THE TRAINING SET

## reg_to_mean.R
set.seed(432)

# imagine these are scores before a program:
before <- rnorm(50, mean = 0, sd = 10)

# imagine these are scores after a program
after <- before + rnorm(50, mean = 5, sd = 20)

# the scores are correlated, but not perfectly correlated
# correlation = 0.32... the program helps,

## racquetball.R
simulate_raquetball = function(number_of_games, prob_win_serve,
                               prob_win_noserve, points_to_win)
{
  # variable to track the number of wins
  wins = 0

  # variable to track the number of simulated games
  total_games = 1

  # list where we will store the number of volleys per game

## Using_an_optimizer_to_get_probability_threshold.R
install.packages("MASS")
library(MASS)
data(Pima.tr)
data(Pima.te)
#############


## STEP 1: Logistic regression, predict diabetes yes or no ##
logistic_reg <- glm(type ~ ., data = Pima.tr, family = binomial) # basic model
predict_logistic.tr <- predict(logistic_reg, type = "response")  # predicted probabilities (TRAINING SET)

## conf_and_prediction_intervals_for_logistic_regression.R
## This long coding example shows you how to obtain
## confidence intervals for logistic regression.
## The appendix at the very bottom also shows you how to obtain
## something analogous to prediction intervals
## for a logistic regression.

## Here's a High-level summary of the basic procedure, step-by-step:

## Step 1:  Run desired logistic regression, including any desired interactions
##

## Quiz_3_answers.R
# Quiz 3 ANSWERS
#######

# Read the article:
#  https://www.menshealth.com/trending-news/a30894231/amazon-interview-sock-puzzle/


# 1. Write a function that will simulate the act of pulling 2 socks out of the drawer
#    exactly as described in the Men's Health article. (i.e., selection without replacement)


## Quiz 2
###### Quiz 2 #######

# For this quiz you will analyze UN Peacekeeping data.

# At any given time, the UN is involved with many peacekeeping missions around the world.

# Almost all member-states contribute personnel to those missions. There are five types of personnel:
# experts on mission, troops, staff officers, individual police, and formed police units.
# In its efforts to involve more women in its global operations, the UN reports, monthly, the
# gender of each person sent on a mission. You can read more about it here:
	#####################################
	#####################################
	#####################################

	# Run a logistic regression and generate expected values of turnout, holding some predictors constant while others vary.


	df <- read.csv("https://docs.google.com/spreadsheets/d/e/2PACX-1vSjMB6H07nhXmgdARkAMkUVWt7QPlzdD-RrDor2g_BSL9vil8V4efJ-iO-nQtZuqPE_klZPi6qNJ9Pw/pub?gid=921072292&single=true&output=csv")

	glm2 <- glm(turnout ~ ., data = df, family = binomial)

	## Step 1: Load the Training Data

	#Download and inspect the training set using `read.csv()` from the URL provided:

	# Training data link:

	training <- read.csv("https://docs.google.com/spreadsheets/d/e/2PACX-1vSUROPfTOZfUEpf6Ebby-vta5zWCwt9KK-KAwSvpToGQjQSKdhYsUfoHxYxvbOYxW8_IQxBD9FqWFJg/pub?gid=383144413&single=true&output=csv")
	# View the first few rows using head()
	head(training)
	---
	title: "CS130 Causal Inference Assignment Spring 2024"
	output: html_document
	date: "2024-04-01"
	---

	```{r setup, include=FALSE}
	knitr::opts_chunk$set(echo = TRUE)
	```
	rm(list=ls())
	training <- read.csv("https://docs.google.com/spreadsheets/d/e/2PACX-1vSUROPfTOZfUEpf6Ebby-vta5zWCwt9KK-KAwSvpToGQjQSKdhYsUfoHxYxvbOYxW8_IQxBD9FqWFJg/pub?gid=383144413&single=true&output=csv")

	head(training)

	# plot the data with big green dots
	plot(training$x, training$y, main = "Training Data", pch = 16, cex = 3, col = "green")

	################################################
	#### RUN 3 DIFFERENT MODELS ON THE TRAINING SET
	set.seed(432)

	# imagine these are scores before a program:
	before <- rnorm(50, mean = 0, sd = 10)

	# imagine these are scores after a program
	after <- before + rnorm(50, mean = 5, sd = 20)

	# the scores are correlated, but not perfectly correlated
	# correlation = 0.32... the program helps,
	simulate_raquetball = function(number_of_games, prob_win_serve,
	prob_win_noserve, points_to_win)
	{
	# variable to track the number of wins
	wins = 0

	# variable to track the number of simulated games
	total_games = 1

	# list where we will store the number of volleys per game
	install.packages("MASS")
	library(MASS)
	data(Pima.tr)
	data(Pima.te)
	#############


	## STEP 1: Logistic regression, predict diabetes yes or no ##
	logistic_reg <- glm(type ~ ., data = Pima.tr, family = binomial) # basic model
	predict_logistic.tr <- predict(logistic_reg, type = "response") # predicted probabilities (TRAINING SET)
	## This long coding example shows you how to obtain
	## confidence intervals for logistic regression.
	## The appendix at the very bottom also shows you how to obtain
	## something analogous to prediction intervals
	## for a logistic regression.

	## Here's a High-level summary of the basic procedure, step-by-step:

	## Step 1: Run desired logistic regression, including any desired interactions
	##
	# Quiz 3 ANSWERS
	#######

	# Read the article:
	# https://www.menshealth.com/trending-news/a30894231/amazon-interview-sock-puzzle/


	# 1. Write a function that will simulate the act of pulling 2 socks out of the drawer
	# exactly as described in the Men's Health article. (i.e., selection without replacement)
	###### Quiz 2 #######

	# For this quiz you will analyze UN Peacekeeping data.

	# At any given time, the UN is involved with many peacekeeping missions around the world.

	# Almost all member-states contribute personnel to those missions. There are five types of personnel:
	# experts on mission, troops, staff officers, individual police, and formed police units.
	# In its efforts to involve more women in its global operations, the UN reports, monthly, the
	# gender of each person sent on a mission. You can read more about it here: