> > #Enter data > eelNames<- c("Cured", "Intervention", "Duration") > > eelData <- ' + 0 0 7 + 1 0 6 + 1 1 5 + 0 1 6 + 1 1 7 + 1 0 7 + 1 0 8 + 0 1 7 + 1 0 5 + 1 1 7 + 1 0 9 + 0 1 10 + 1 1 7 + 0 1 8 + 0 0 5 + 0 0 8 + 0 0 4 + 0 1 7 + 0 1 8 + 1 0 6 + 1 1 6 + 1 1 6 + 1 1 7 + 1 0 7 + 0 0 8 + 1 1 7 + 1 1 7 + 0 0 7 + 1 1 7 + 1 1 8 + 1 0 7 + 0 0 9 + 1 0 7 + 0 1 8 + 1 1 8 + 0 0 7 + 0 0 7 + 1 0 7 + 0 0 8 + 0 0 7 + 1 1 8 + 0 0 7 + 0 1 8 + 1 1 8 + 1 1 9 + 1 0 7 + 1 1 10 + 0 1 5 + 1 0 7 + 0 1 8 + 1 0 9 + 0 0 5 + 1 1 10 + 1 1 8 + 0 1 7 + 1 1 6 + 1 0 5 + 0 1 6 + 1 1 7 + 0 0 7 + 1 1 7 + 0 0 7 + 1 1 7 + 1 1 7 + 0 1 8 + 1 1 5 + 1 0 6 + 0 0 7 + 0 0 6 + 1 1 7 + 0 0 7 + 0 0 9 + 1 1 6 + 1 0 6 + 0 0 7 + 1 0 7 + 0 0 6 + 0 0 7 + 1 1 8 + 1 1 9 + 0 0 4 + 0 0 6 + 1 1 9' > > eelData <- data.frame( read.table(text=eelData, fill=TRUE, col.names=eelNames) ) > > > ###################### regular, non Bayesian analyses of the data ###################### > > # conventional logistic regression (p. 330) > eelModel.1 <- glm(Cured ~ Intervention + Duration, data = eelData, family = binomial()) > > summary(eelModel.1) Call: glm(formula = Cured ~ Intervention + Duration, family = binomial(), data = eelData) Deviance Residuals: Min 1Q Median 3Q Max -1.5475 -1.0361 0.8527 0.8672 1.3375 Coefficients: Estimate Std. Error z value Pr(>|z|) (Intercept) -0.43585 1.32299 -0.329 0.7418 Intervention 1.13952 0.47263 2.411 0.0159 * Duration 0.01342 0.18934 0.071 0.9435 --- Signif. codes: 0 Ô***Ő 0.001 Ô**Ő 0.01 Ô*Ő 0.05 Ô.Ő 0.1 Ô Ő 1 (Dispersion parameter for binomial family taken to be 1) Null deviance: 114.08 on 82 degrees of freedom Residual deviance: 107.60 on 80 degrees of freedom AIC: 113.6 Number of Fisher Scoring iterations: 4 > > > > ###################### Bayesian Logistic Regression using MCMCglmm ###################### > > # broad priors for logistic regression -- see ?gelman.prior > priors <- list(B=list(mu=c(0,0,0), + V=gelman.prior(Cured~Intervention + Duration, data=eelData, scale=sqrt(pi^2/3+1))), + R=list(V=1,fix=1)) > > model1 = MCMCglmm(Cured ~ Intervention + Duration, data = eelData, family = "categorical", + prior = priors, nitt=50000, burnin=3000, thin=10, verbose=F) > > summary(model1) Iterations = 3001:49991 Thinning interval = 10 Sample size = 4700 DIC: 111.9785 R-structure: ~units post.mean l-95% CI u-95% CI eff.samp units 1 1 1 0 Location effects: Cured ~ Intervention + Duration post.mean l-95% CI u-95% CI eff.samp pMCMC (Intercept) -0.49428 -3.50922 2.35912 1609 0.7468 Intervention 1.32197 0.23856 2.41523 1520 0.0166 * Duration 0.01622 -0.42086 0.41930 1591 0.9451 --- Signif. codes: 0 Ô***Ő 0.001 Ô**Ő 0.01 Ô*Ő 0.05 Ô.Ő 0.1 Ô Ő 1 > autocorr(model1$VCV) , , units units Lag 0 NaN Lag 10 NaN Lag 50 NaN Lag 100 NaN Lag 500 NaN > plot(model1) Hit to see next plot: > > # running the same analyses using different priors, & then plotting the various posteriors for "Intervention" > > estimateNum = 2 # the number of regression estimate to be used for the analyses e.g., the 1st (intercept), or 2nd, etc > > Ndatasets = 5 # the number of additional analyses using different priors > > # plot data for the first Bayes model > densdat <- density(model1$Sol[,estimateNum]) > plotdatx <- densdat$x > plotdaty <- densdat$y > > # for MCMCglmm priors, B is for fixed effects. B is a list containing the expected value (mu) > # and a (co)varcv bgiance matrix (V) representing the strength of belief: the defaults are B$mu=0 > # and B$V=I*1e+10, where where I is an identity matrix of appropriate dimension > > # the different priors will be derived from the lm, non-Bayesian regression coefficients > # the mu values will be random values from a 4 SE range around the lm estimates > # the V values will be set at (4 * the SEs)**2 > > fit.list <- summary.lm(eelModel.1) > coeffs <- fit.list$coefficients > coefnames <- rownames(coeffs) > > # ranges around the estimates > MUranges <- cbind( (coeffs[,1] - (4 * coeffs[,2])), (coeffs[,1] + (4 * coeffs[,2])) ) > > # random values from the MUranges for each coefficient > MUs <- matrix(-9999,nrow(coeffs),Ndatasets) > for (lupe in 1:nrow(coeffs)) { MUs[lupe,] <- runif(Ndatasets, min=MUranges[lupe,1], max=MUranges[lupe,2]) } > dimnames(MUs) <-list(rep("", dim(MUs)[1])) > colnames(MUs) <- seq(1:Ndatasets) > cat('\n\nPrior regression coefficient estimates for each additional analysis:') Prior regression coefficient estimates for each additional analysis: > print(round(MUs,2)) 1 2 3 4 5 -1.79 4.87 0.56 4.62 -4.57 2.03 -0.49 2.28 2.64 2.74 -0.28 0.16 -0.20 0.60 0.20 > > > for (lupeSets in 1:Ndatasets) { + + priors <- list(B=list(mu=MUs[,lupeSets], + V=gelman.prior(Cured~Intervention + Duration, data=eelData, scale=sqrt(pi^2/3+1))), + R=list(V=1,fix=1)) + + model2 = MCMCglmm(Cured ~ Intervention + Duration, data = eelData, family = "categorical", + prior = priors, nitt=50000, burnin=3000, thin=10, verbose=F) + + densdat <- density(model2$Sol[,estimateNum]) + plotdatx <- cbind( plotdatx, densdat$x) + plotdaty <- cbind( plotdaty, densdat$y) + } > > matplot( plotdatx, plotdaty, main="", xlab=paste('Estimate for', coefnames[estimateNum]), ylab='Density', + font.lab=1.8, type='l', cex.lab=1.2, col=c(2,rep(1,ncol(plotdatx))), lty=1, lwd=1 ) > title(main=paste('Density Plots of the Posteriors for', coefnames[estimateNum], '\nProduced Using Differing Priors')) > legend("topright", c("broad priors","random priors"), bty="n", lty=c(1,1), + lwd=2, cex=1.2, text.col=c(2,1), col=c(2,1) ) > > > > > ################ posterior predictive checks using the bayesplot package ###################### > > # from the bayesplot package documentation: > # The bayesplot package provides various plotting functions for graphical posterior predictive checking, > # that is, creating graphical displays comparing observed data to simulated data from the posterior > # predictive distribution. The idea behind posterior predictive checking is simple: if a model is a > # good fit then we should be able to use it to generate data that looks a lot like the data we observed. > > y <- eelData$Cured # the DV that was used for the MCMCglmm model > > # generating the simulated data using the simulate.MCMCglmm function from the MCMCglmm package > yrep <- simulate.MCMCglmm(model1, 25) > > yrep <- t( yrep ) # transposing yrep for the bayesplot functions > > > # ppc_stat: A histogram of the distribution of a test statistic computed by applying stat to each > # dataset (row) in yrep. The value of the statistic in the observed data, stat(y), is > # overlaid as a vertical line. > ppc_stat(y, yrep, binwidth = 1) > > > # ppc_dens_overlay: Kernel density or empirical CDF estimates of each dataset (row) in yrep are > # overlaid, with the distribution of y itself on top (and in a darker shade). > ppc_dens_overlay(y, yrep[1:25, ]) > > > # ppc_scatter_avg: A scatterplot of y against the average values of yrep, i.e., the > # points (mean(yrep[, n]), y[n]), where each yrep[, n] is a vector of length equal > # to the number of posterior draws. > ppc_scatter_avg(y, yrep) > > > # ppc_hist: A separate histogram, shaded frequency polygon, smoothed kernel density estimate, > # or box and whiskers plot is displayed for y and each dataset (row) in yrep. > # For these plots yrep should therefore contain only a small number of rows. > ppc_hist(y, yrep[1:8, ], binwidth = .3) > > > > ###################### now conduct the Bayesian analyses using the rstanarm package ###################### > > > > model10 <- stan_glm(Cured ~ Intervention + Duration, data = eelData, family = binomial(), + warmup = 1000, iter = 2000, sparse = FALSE, seed = 123) SAMPLING FOR MODEL 'bernoulli' NOW (CHAIN 1). Gradient evaluation took 0.000385 seconds 1000 transitions using 10 leapfrog steps per transition would take 3.85 seconds. Adjust your expectations accordingly! Iteration: 1 / 2000 [ 0%] (Warmup) Iteration: 200 / 2000 [ 10%] (Warmup) Iteration: 400 / 2000 [ 20%] (Warmup) Iteration: 600 / 2000 [ 30%] (Warmup) Iteration: 800 / 2000 [ 40%] (Warmup) Iteration: 1000 / 2000 [ 50%] (Warmup) Iteration: 1001 / 2000 [ 50%] (Sampling) Iteration: 1200 / 2000 [ 60%] (Sampling) Iteration: 1400 / 2000 [ 70%] (Sampling) Iteration: 1600 / 2000 [ 80%] (Sampling) Iteration: 1800 / 2000 [ 90%] (Sampling) Iteration: 2000 / 2000 [100%] (Sampling) Elapsed Time: 0.079201 seconds (Warm-up) 0.109517 seconds (Sampling) 0.188718 seconds (Total) SAMPLING FOR MODEL 'bernoulli' NOW (CHAIN 2). Gradient evaluation took 1.7e-05 seconds 1000 transitions using 10 leapfrog steps per transition would take 0.17 seconds. Adjust your expectations accordingly! Iteration: 1 / 2000 [ 0%] (Warmup) Iteration: 200 / 2000 [ 10%] (Warmup) Iteration: 400 / 2000 [ 20%] (Warmup) Iteration: 600 / 2000 [ 30%] (Warmup) Iteration: 800 / 2000 [ 40%] (Warmup) Iteration: 1000 / 2000 [ 50%] (Warmup) Iteration: 1001 / 2000 [ 50%] (Sampling) Iteration: 1200 / 2000 [ 60%] (Sampling) Iteration: 1400 / 2000 [ 70%] (Sampling) Iteration: 1600 / 2000 [ 80%] (Sampling) Iteration: 1800 / 2000 [ 90%] (Sampling) Iteration: 2000 / 2000 [100%] (Sampling) Elapsed Time: 0.080903 seconds (Warm-up) 0.099419 seconds (Sampling) 0.180322 seconds (Total) SAMPLING FOR MODEL 'bernoulli' NOW (CHAIN 3). Gradient evaluation took 1.9e-05 seconds 1000 transitions using 10 leapfrog steps per transition would take 0.19 seconds. Adjust your expectations accordingly! Iteration: 1 / 2000 [ 0%] (Warmup) Iteration: 200 / 2000 [ 10%] (Warmup) Iteration: 400 / 2000 [ 20%] (Warmup) Iteration: 600 / 2000 [ 30%] (Warmup) Iteration: 800 / 2000 [ 40%] (Warmup) Iteration: 1000 / 2000 [ 50%] (Warmup) Iteration: 1001 / 2000 [ 50%] (Sampling) Iteration: 1200 / 2000 [ 60%] (Sampling) Iteration: 1400 / 2000 [ 70%] (Sampling) Iteration: 1600 / 2000 [ 80%] (Sampling) Iteration: 1800 / 2000 [ 90%] (Sampling) Iteration: 2000 / 2000 [100%] (Sampling) Elapsed Time: 0.079909 seconds (Warm-up) 0.101262 seconds (Sampling) 0.181171 seconds (Total) SAMPLING FOR MODEL 'bernoulli' NOW (CHAIN 4). Gradient evaluation took 2.1e-05 seconds 1000 transitions using 10 leapfrog steps per transition would take 0.21 seconds. Adjust your expectations accordingly! Iteration: 1 / 2000 [ 0%] (Warmup) Iteration: 200 / 2000 [ 10%] (Warmup) Iteration: 400 / 2000 [ 20%] (Warmup) Iteration: 600 / 2000 [ 30%] (Warmup) Iteration: 800 / 2000 [ 40%] (Warmup) Iteration: 1000 / 2000 [ 50%] (Warmup) Iteration: 1001 / 2000 [ 50%] (Sampling) Iteration: 1200 / 2000 [ 60%] (Sampling) Iteration: 1400 / 2000 [ 70%] (Sampling) Iteration: 1600 / 2000 [ 80%] (Sampling) Iteration: 1800 / 2000 [ 90%] (Sampling) Iteration: 2000 / 2000 [100%] (Sampling) Elapsed Time: 0.099085 seconds (Warm-up) 0.082703 seconds (Sampling) 0.181788 seconds (Total) > > summary(model10, digits = 3) Model Info: function: stan_glm family: binomial [logit] formula: Cured ~ Intervention + Duration algorithm: sampling priors: see help('prior_summary') sample: 4000 (posterior sample size) observations: 83 predictors: 3 Estimates: mean sd 2.5% 25% 50% 75% 97.5% (Intercept) -0.436 1.333 -3.098 -1.321 -0.430 0.461 2.174 Intervention 1.146 0.475 0.206 0.827 1.144 1.462 2.079 Duration 0.014 0.192 -0.356 -0.116 0.011 0.143 0.390 mean_PPD 0.554 0.073 0.410 0.506 0.554 0.602 0.699 log-posterior -60.515 1.242 -63.698 -61.079 -60.202 -59.609 -59.069 Diagnostics: mcse Rhat n_eff (Intercept) 0.023 1.000 3279 Intervention 0.008 1.000 3552 Duration 0.003 1.000 3201 mean_PPD 0.001 1.000 3957 log-posterior 0.028 1.001 2011 For each parameter, mcse is Monte Carlo standard error, n_eff is a crude measure of effective sample size, and Rhat is the potential scale reduction factor on split chains (at convergence Rhat=1). > plot(model10) >