#CA Developmentally Disabled Expenditures Analysis #Read in data CAExpend <- read.csv("C:/Users/CMalone/Desktop/DSCI Workshop All Content/CA_DDExpenditures/CA_DDExpenditures.csv") View(CAExpend) #Means only on Ethnicity -- age *ignored* here output<-aggregate(CAExpend$Expenditures,by=list(CAExpend$Ethnicity),mean) #Convert to vector for barplot() function output.vector <- as.vector(output[,2]) names(output.vector) <- output[, 1] barplot(output.vector[c(4,8)],ylim=c(0,60000)) #Get a subset and reconstruct plot CAExpend.Subset<-CAExpend[CAExpend$Ethnicity %in% c("Hispanic","White not Hispanic"),] output<-aggregate(CAExpend.Subset$Expenditures,by=list(CAExpend.Subset$Ethnicity),mean) output.vector <- as.vector(output[,2]) names(output.vector) <- output[, 1] barplot(output.vector,ylim=c(0,60000)) #Means only on Ethnicity AND AgeCohort -- age considered here output2<-aggregate(CAExpend.Subset$Expenditures,by=list(CAExpend.Subset$Ethnicity,CAExpend.Subset$AgeCohort),mean) #Convert to vector for barplot() function output2.vector <- as.vector(output2[,3]) #Make names pretty, use H for Hispanic, W for White not Hispanic, clean up spaces on age cohort names(output2.vector) <- paste(substr(output2[,1],1,1),gsub(" ","",output2[,2]),sep=":") barplot(output2.vector,ylim=c(0,60000),las=2) ###################### #Change order of Age on x-axis #Correct number of factor levels for ethnicity; not necessary for plotting but data quality issue CAExpend.Subset$Ethnicity<-factor(CAExpend.Subset$Ethnicity,levels=c("Hispanic","White not Hispanic")) #Redefine order of Age Cohort, necessary to get order correct for Age CAExpend.Subset$AgeCohort<-factor(CAExpend.Subset$AgeCohort,levels=c(" 0 - 5", "6-12", "13-17", "18-21", "22-50"," 51 +")) #Obtain average expenditures using new ordering of AgeCohort output3<-aggregate(CAExpend.Subset$Expenditures,by=list(CAExpend.Subset$Ethnicity,CAExpend.Subset$AgeCohort),mean) #Convert to vector for barplot() function output3.vector <- as.vector(output3[,3]) #Make names pretty, use H for Hispanic, W for White not Hispanic, clean up spaces on age cohort names(output3.vector) <- paste(substr(output3[,1],1,1),gsub(" ","",output3[,2]),sep=":") barplot(output3.vector,ylim=c(0,60000),las=2) #Adding vertical lines, save divisions into barplot.output barplot.output<-barplot(output3.vector,ylim=c(0,60000),las=2) #Add title, 75% of default font size title(main="Comparing Hispanics to White - Age Adjusted", cex.main=0.75) #Use abline() and v= for vertical lines abline(v=c(mean(barplot.output[2:3,1]),mean(barplot.output[4:5,1]),mean(barplot.output[6:7,1]),mean(barplot.output[8:9,1]),mean(barplot.output[10:11,1])),col="gray")