# POLS/CSSS 503
# Lab Session 4: April 20, 2012
# Daniel Berliner

# Agenda:

# 1. Questions on HW 2?
# 2. More useful tools
# 3. Examples using loops
# 4. Simulating the Monty Hall Problem
# 5. Exercise: The Birthday Problem


#### 1.  Questions on HW 2?

#More example plots:

setwd("C:/Documents and Settings/Dan B/Desktop/POLS 503 TA 2012/Lab 1")
data1<-read.csv("rossoildata.csv", na.strings="")
data2<-data1[,c("cty_name","id","year","regime1","oil","GDPcap","oecd","govtconsump")]

#basic plot
plot(data2$GDPcap[data2$year==1995], data2$govtconsump[data2$year==1995])

#modifying the point type
plot(data2$GDPcap[data2$year==1995], data2$govtconsump[data2$year==1995], pch=19)

#using text instead of points
plot(data2$GDPcap[data2$year==1995], data2$govtconsump[data2$year==1995], type="n")
text(data2$GDPcap[data2$year==1995], data2$govtconsump[data2$year==1995], 
labels=data2$id[data2$year==1995], cex=.7)

#using conditional selection with different colors and point types
plot(data2$GDPcap[data2$year==1995], data2$govtconsump[data2$year==1995], type="n",
xlab="GDP per Capita", ylab="Government Consumption per GDP", main="Economic Development and the Size of Government\nin 1995")
points(x=data2$GDPcap[data2$year==1995 & data2$regime1<8], 
y=data2$govtconsump[data2$year==1995 & data2$regime1<8],
col="red", pch=19)
points(x=data2$GDPcap[data2$year==1995 & data2$regime1>=8], 
y=data2$govtconsump[data2$year==1995 & data2$regime1>=8],
col="blue", pch=19)
legend(x=10000, y=45, legend=c("Autocracies","Democracies"), pch=c(19,19), col=c("red","blue"))



#### 2.  More useful tools

z<-rnorm(10, mean=100, sd=1) # draw randomly from the normal distribution
z1<-runif(10,0,1) #draw from a uniform distribution between min and max values
sample(z, 2) # samples two numbers from the z vector
sort(z) # puts z in (ascending) order.
sort(z, decreasing=T)
sample(c("red","blue","yellow","green"),10,replace=T)
sample(c("red","blue","yellow","green"),3,replace=F) #note you cannot have a higher number of draws when replace=F

which(data2$id=="CAN") # tells you the position(s) where the condition is met
which.max(z1)


# IF conditions
a<-10
if (a==10) print("Yes, a is 10.")
print("This gets printed no matter what value a was.")
# the statement immediately after the if statement is run only when the argument in parentheses evalues to TRUE.  Try #setting 'a' to another value and see what happens.

# Note: use braces {} after the if statement if you want multiple commands to be executed when the condition is met:
if (a==10) {
    print("Yes, a is 10.")
    print("Because a is 10 I'm doing this too.")
    } else print("DIDNT WORK")

print("This gets printed no matter what value a was.")


# FOR loops
for (i in 1:10){
print(i^2)
} # close the i loop

# you can put as many lines of code as you want in between the {} braces, including other for loops and if statements, and whatever else you want.



#### 3. Examples using loops
data3<-data1[,c("cty_name","year","regime1","oil","GDPcap","oecd")]

#We're going to examine the level of missingness for each country

#a list of every country name
uc<-unique(data3$cty_name)
uc

#convert from factor data to character data
uc<-as.character(uc)

#the number of countries in the dataset
numcountries<-length(uc)

#empty vectors to hold our results
country_obs<-rep(NA, numcountries)
country_na<-rep(NA, numcountries)

# This loop is going to go through each individual country in turn, and find out two things:
# 1. how many observations are there for that country?
# 2. how many observations for that country are left after listwise deletion of missing data?
# After evaluating these questions for each country, and storing the results in two different vectors, the loop will repeat the same tasks for the next country.
# The counter variable "i" simply goes up by 1 for each iteration of the loop. By referencing "uc[i]", we are asking the lines of code inside the loop to apply to the first country, then the second country, then the third, and so on...

for(i in 1:numcountries){
	tempdata<-data3[data3$cty_name==uc[i],]
	na_omit_tempdata<-na.omit(tempdata)
	country_obs[i]<-nrow(tempdata)
	country_na[i]<-nrow(na_omit_tempdata)
	}

cbind(uc,country_obs,country_na,country_na/country_obs)
#Why are there quotes around the numbers? How might you fix this?

na_table<-as.data.frame(cbind(country_obs,country_na,round(country_na/country_obs,2)))
rownames(na_table)<-uc
colnames(na_table)<-c("All obs","Remaining obs","Proportion")

na_table[order(na_table$Proportion, decreasing=T),]



### Another example: Run a model for each year
uy<-unique(data3$year)
yearmodels<-as.list(uy)

#create a dichotomous variable of whether a country is a democracy or an autocracy
data3$dem_indicator<-rep(0,nrow(data3))
data3$dem_indicator[data3$regime1>8]<-1

#transforming GDP per capita to be measured in thousands of dollars
data3$GDPcap1<-data3$GDPcap/1000

#creating vectors to hold the results we are interested in
dem_coef<-rep(NA,length(uy))
dem_se<-rep(NA,length(uy))

for(i in 1:length(uy)){
	tempdata<-data3[data3$year==uy[i],]
	res_temp<-lm(GDPcap1 ~ dem_indicator + oil, data=tempdata)
	dem_coef[i]<-coef(res_temp)[2]
	dem_se[i]<-sqrt(diag(vcov(res_temp)))[2]
}
### What is this loop doing?



plot(uy,dem_coef,ylim=c(0,10), type="n", xlab="Year", ylab="Coefficient for Democracy Indicator",
main="Association between Democracy and Development in each Year")
segments(x0=uy,
	y0=dem_coef - 1.96*dem_se, 
	x1=uy,
	y1=dem_coef + 1.96*dem_se,
	lwd=2, col="gray50")
points(uy, dem_coef, pch=19)
abline(h=0, col="red")





#### 4. SIMULATING THE MONTY HALL PROBLEM

#On Let's Make a Deal, host Monty Hall offers you the following choice:

    #1. There are 3 doors. Behind one is a car. Behind the other two are goats.
    #2. You choose a door. It stays closed.
    #3. Monty picks one of the two remaining doors, and opens it to reveal a goat.
    #4. Your choice: Keep the door you chose in step 1, or switch to the third door.

#What should you do?

#What is the probability of a car from staying?
#What is the probability of a car from switching?

# The simulation approach:

# Set up the doors, goats, and car
# Contestant picks a door
# Monty "picks" a remaining door
# Record where the car and goats were
# Do all of the above many many times
# Print the fraction of times a car was found


sims <- 10000 # Simulations run
doors <- c(1,0,0) # The car (1) and the goats (0)
cars.stay <- 0 # Save cars from first choice here
cars.switch <- 0 # Save cars from switching here
for (i in 1:sims) {
	random.doors <- sample(doors,3,replace=F)
	cars.stay <- cars.stay + random.doors[1] #First choose "door number 1"
	cars.switch <- cars.switch + sort(random.doors[2:3])[2] #Do you understand what this line of code is doing?
}

paste("Probability of a car from staying with 1st door", cars.stay/sims, sep=": ")
paste("Probability of a car from switching to 2nd door", cars.switch/sims, sep=": ")



## A slightly different solution:

# Monty Hall Problem
# Chris Adolph
# 1/6/2005
sims <- 10000 # Simulations run
doors <- c(1,0,0) # The car (1) and the goats (0)
cars.chosen <- 0 # Save cars from first choice here
cars.rejected <- 0 # Save cars from switching here
for (i in 1:sims) { # Loop through simulations
# First, contestant picks a door
first.choice <- sample(doors,3,replace=F)
# Choosing a door means rejecting the other two
chosen <- first.choice[1]
rejected <- first.choice[2:3]
# Monty Hall removes a goat from the rejected doors
rejected <- sort(rejected)
if (rejected[1]==0)
rejected <- rejected[2]
# Record keeping: where was the car?
cars.chosen <- cars.chosen + chosen
cars.rejected <- cars.rejected + rejected
}
cat("Probability of a car from staying with 1st door",
cars.chosen/sims,"\n")
cat("Probability of a car from switching to 2nd door",
cars.rejected/sims,"\n")



### 5. EXERCISE: THE BIRTHDAY PROBLEM

# Question: What is the probability that at least two students will share the same birthday in a class of 20?
# Use a simulation method to answer this.  (Ignore leap years.)



# Bonus question: Modify your quote to answer the question for varying class sizes.

