######################################################### # Reading in the data ######################################################### #Reading in the data containing the sampling frame Commute <- read.csv( file.choose() ) #Looking at the data View(Commute) #Get the dplyr library library(dplyr) #Filter out the missingness, i.e. the na values Commute2 <- filter(Commute,!is.na(MeanTravelTime_Minutes)) ######################################################### ######################################################### #Take a simple random sample of size n=500, doing the sampling without replacement #Sample here simply identifies which rows should be included in the sample mysample<-sample(1:30868,size=500,replace=FALSE) #Getting the outcomes for the rows selected by the simple random sample sample_data <- Commute2 %>% filter(row_number() %in% mysample) %>% select(MeanTravelTime_Minutes) #Getting the mean commute time for the sample sample_data %>% summarize('Mean'= mean(MeanTravelTime_Minutes)) ######################################################### ######################################################### #Creating a function to take a simple random sample MySim_SRS <- function(){ mysample<-sample(1:30868,size=500,replace=FALSE) sample_data <- Commute2 %>% filter(row_number() %in% mysample) %>% select(MeanTravelTime_Minutes) output<-sample_data %>% summarize('Mean'= mean(MeanTravelTime_Minutes)) return(output) } ######################################################### ######################################################### #Doing a simulation with 2 iterations SRS_Output <- data.frame( "Estimates" = unlist( replicate(2, MySim_SRS() ) ) ) #Looking at the simulation outcomes View(SRS_Output) ######################################################### ######################################################### #Doing a simulation with 5000 iterations SRS_Output <- data.frame( "Estimates" = unlist( replicate(5000, MySim_SRS() ) ) ) ######################################################### ######################################################### # Getting summaries of the bias #Computing the bias in the estimated sample means for each of simulated outcomes SRS_Output = mutate(SRS_Output, "Bias" = Estimates - 25.9) #Computing the average bias over all 5000 iterations of the simulation SRS_Output %>% summarize("Overall Average Bias" = mean(Bias)) #Computing the median bias over all 5000 iterations of the simulation SRS_Output %>% summarize("Overall Median Bias" = median(Bias)) #Plotting the bias library(ggplot2) ggplot(SRS_Output, aes(Bias)) + geom_dotplot(binwidth=0.01) #Is the bias distributed centered around 0 length(which(SRS_Output$Bias < 0)) / 5000 #Getting the standard deviation in the estimates SRS_Output %>% summarize("Std Dev of Estimates" = sd(Estimates)) #Using statistical theory to compute the standard error of the estimates Commute2 %>% summarize("Std Dev of Mean Travel Time" = sd(MeanTravelTime_Minutes)) Commute2 %>% summarize("Std Error of Mean Travel Time" = sd(MeanTravelTime_Minutes) / sqrt(500) ) ######################################################### ######################################################### #Creating a function to take a simple random sample with varying sample size MySim_SRS <- function(samplesize=100){ mysample<-sample(1:30868,size=samplesize,replace=FALSE) sample_data <- Commute2 %>% filter(row_number() %in% mysample) %>% select(MeanTravelTime_Minutes) output<-sample_data %>% summarize('Mean'= mean(MeanTravelTime_Minutes)) return(output) } ######################################################### #Getting a simulation with sample size of 250 MySim_SRS(samplesize=250)