#LOGISTIC REGRESSION in chapter 8 of Field, Miles, & Field, 2012 #Install packages install.packages ("MCMCglmm") #Initiate packages library(MCMCglmm) #Enter data eelNames<- c("Cured", "Intervention", "Duration") eelData <- ' 0 0 7 1 0 6 1 1 5 0 1 6 1 1 7 1 0 7 1 0 8 0 1 7 1 0 5 1 1 7 1 0 9 0 1 10 1 1 7 0 1 8 0 0 5 0 0 8 0 0 4 0 1 7 0 1 8 1 0 6 1 1 6 1 1 6 1 1 7 1 0 7 0 0 8 1 1 7 1 1 7 0 0 7 1 1 7 1 1 8 1 0 7 0 0 9 1 0 7 0 1 8 1 1 8 0 0 7 0 0 7 1 0 7 0 0 8 0 0 7 1 1 8 0 0 7 0 1 8 1 1 8 1 1 9 1 0 7 1 1 10 0 1 5 1 0 7 0 1 8 1 0 9 0 0 5 1 1 10 1 1 8 0 1 7 1 1 6 1 0 5 0 1 6 1 1 7 0 0 7 1 1 7 0 0 7 1 1 7 1 1 7 0 1 8 1 1 5 1 0 6 0 0 7 0 0 6 1 1 7 0 0 7 0 0 9 1 1 6 1 0 6 0 0 7 1 0 7 0 0 6 0 0 7 1 1 8 1 1 9 0 0 4 0 0 6 1 1 9' eelData <- data.frame( read.table(text=eelData, fill=TRUE, col.names=eelNames) ) ###################### regular, non Bayesian analyses of the data ###################### # conventional logistic regression (p. 330) eelModel.1 <- glm(Cured ~ Intervention + Duration, data = eelData, family = binomial()) summary(eelModel.1) ###################### Bayesian Logistic Regression using MCMCglmm ###################### # broad priors for logistic regression -- see ?gelman.prior priors <- list(B=list(mu=c(0,0,0), V=gelman.prior(Cured~Intervention + Duration, data=eelData, scale=sqrt(pi^2/3+1))), R=list(V=1,fix=1)) model1 = MCMCglmm(Cured ~ Intervention + Duration, data = eelData, family = "categorical", prior = priors, nitt=50000, burnin=3000, thin=10, verbose=F) summary(model1) autocorr(model1$VCV) plot(model1) # running the same analyses using different priors, & then plotting the various posteriors for "Intervention" estimateNum = 2 # the number of regression estimate to be used for the analyses e.g., the 1st (intercept), or 2nd, etc Ndatasets = 5 # the number of additional analyses using different priors # plot data for the first Bayes model densdat <- density(model1$Sol[,estimateNum]) plotdatx <- densdat$x plotdaty <- densdat$y # for MCMCglmm priors, B is for fixed effects. B is a list containing the expected value (mu) # and a (co)varcv bgiance matrix (V) representing the strength of belief: the defaults are B$mu=0 # and B$V=I*1e+10, where where I is an identity matrix of appropriate dimension # the different priors will be derived from the lm, non-Bayesian regression coefficients # the mu values will be random values from a 4 SE range around the lm estimates # the V values will be set at (4 * the SEs)**2 fit.list <- summary.lm(eelModel.1) coeffs <- fit.list$coefficients coefnames <- rownames(coeffs) # ranges around the estimates MUranges <- cbind( (coeffs[,1] - (4 * coeffs[,2])), (coeffs[,1] + (4 * coeffs[,2])) ) # random values from the MUranges for each coefficient MUs <- matrix(-9999,nrow(coeffs),Ndatasets) for (lupe in 1:nrow(coeffs)) { MUs[lupe,] <- runif(Ndatasets, min=MUranges[lupe,1], max=MUranges[lupe,2]) } dimnames(MUs) <-list(rep("", dim(MUs)[1])) colnames(MUs) <- seq(1:Ndatasets) cat('\n\nPrior regression coefficient estimates for each additional analysis:') print(round(MUs,2)) for (lupeSets in 1:Ndatasets) { priors <- list(B=list(mu=MUs[,lupeSets], V=gelman.prior(Cured~Intervention + Duration, data=eelData, scale=sqrt(pi^2/3+1))), R=list(V=1,fix=1)) model2 = MCMCglmm(Cured ~ Intervention + Duration, data = eelData, family = "categorical", prior = priors, nitt=50000, burnin=3000, thin=10, verbose=F) densdat <- density(model2$Sol[,estimateNum]) plotdatx <- cbind( plotdatx, densdat$x) plotdaty <- cbind( plotdaty, densdat$y) } matplot( plotdatx, plotdaty, main="", xlab=paste('Estimate for', coefnames[estimateNum]), ylab='Density', font.lab=1.8, type='l', cex.lab=1.2, col=c(2,rep(1,ncol(plotdatx))), lty=1, lwd=1 ) title(main=paste('Density Plots of the Posteriors for', coefnames[estimateNum], '\nProduced Using Differing Priors')) legend("topright", c("broad priors","random priors"), bty="n", lty=c(1,1), lwd=2, cex=1.2, text.col=c(2,1), col=c(2,1) ) ################ posterior predictive checks using the bayesplot package ###################### install.packages("bayesplot"); library(bayesplot) # from the bayesplot package documentation: # The bayesplot package provides various plotting functions for graphical posterior predictive checking, # that is, creating graphical displays comparing observed data to simulated data from the posterior # predictive distribution. The idea behind posterior predictive checking is simple: if a model is a # good fit then we should be able to use it to generate data that looks a lot like the data we observed. y <- eelData$Cured # the DV that was used for the MCMCglmm model # generating the simulated data using the simulate.MCMCglmm function from the MCMCglmm package yrep <- simulate.MCMCglmm(model1, 25) yrep <- t( yrep ) # transposing yrep for the bayesplot functions # ppc_stat: A histogram of the distribution of a test statistic computed by applying stat to each # dataset (row) in yrep. The value of the statistic in the observed data, stat(y), is # overlaid as a vertical line. ppc_stat(y, yrep, binwidth = 1) # ppc_dens_overlay: Kernel density or empirical CDF estimates of each dataset (row) in yrep are # overlaid, with the distribution of y itself on top (and in a darker shade). ppc_dens_overlay(y, yrep[1:25, ]) # ppc_scatter_avg: A scatterplot of y against the average values of yrep, i.e., the # points (mean(yrep[, n]), y[n]), where each yrep[, n] is a vector of length equal # to the number of posterior draws. ppc_scatter_avg(y, yrep) # ppc_hist: A separate histogram, shaded frequency polygon, smoothed kernel density estimate, # or box and whiskers plot is displayed for y and each dataset (row) in yrep. # For these plots yrep should therefore contain only a small number of rows. ppc_hist(y, yrep[1:8, ], binwidth = .3) ###################### now conduct the Bayesian analyses using the rstanarm package ###################### install.packages("httpuv") install.packages("xts") install.packages("rstanarm") library(httpuv) library(xts) library(rstanarm) model10 <- stan_glm(Cured ~ Intervention + Duration, data = eelData, family = binomial(), warmup = 1000, iter = 2000, sparse = FALSE, seed = 123) summary(model10, digits = 3) plot(model10)