> > # entering ANCOVA data (p. 470) > libido <- c(3,2,5,2,2,2,7,2,4,7,5,3,4,4,7,5,4,9,2,6,3,4,4,4,6,4,6,2,8,5) > partnerLibido <- c(4,1,5,1,2,2,7,4,5,5,3,1,2,2,6,4,2,1,3,5,4,3,3,2,0,1,3,0,1,0) > dose <- c(rep(1,9),rep(2,8), rep(3,13)) > dose <- factor(dose, levels = c(1:3), labels = c("Placebo", "Low Dose", "High Dose")) > viagraData <- data.frame(dose, libido, partnerLibido) > > > > ###################### regular, non Bayesian analyses of the data ###################### > > # conventional ANCOVA (p. 477) > > contrasts(viagraData$dose) <- cbind(c(-2,1,1), c(0,-1,1)) > viagraModel <- aov(libido ~ partnerLibido + dose, data = viagraData) > Anova(viagraModel, type="III") Error in Anova(viagraModel, type = "III") : could not find function "Anova" > summary.lm(viagraModel) Call: aov(formula = libido ~ partnerLibido + dose, data = viagraData) Residuals: Min 1Q Median 3Q Max -3.2622 -0.7899 -0.3230 0.8811 4.5699 Coefficients: Estimate Std. Error t value Pr(>|t|) (Intercept) 3.1260 0.6250 5.002 3.34e-05 *** partnerLibido 0.4160 0.1868 2.227 0.03483 * dose1 0.6684 0.2400 2.785 0.00985 ** dose2 0.2196 0.4056 0.541 0.59284 --- Signif. codes: 0 Ô***Ő 0.001 Ô**Ő 0.01 Ô*Ő 0.05 Ô.Ő 0.1 Ô Ő 1 Residual standard error: 1.744 on 26 degrees of freedom Multiple R-squared: 0.2876, Adjusted R-squared: 0.2055 F-statistic: 3.5 on 3 and 26 DF, p-value: 0.02954 > > adjustedMeans <- effect("dose", viagraModel, se=TRUE) Error in effect("dose", viagraModel, se = TRUE) : could not find function "effect" > summary(adjustedMeans) Error in summary(adjustedMeans) : object 'adjustedMeans' not found > > > > ###################### Bayesian ANCOVA using MCMCglmm ###################### > > > # the MCMCglmm function does not recognize the above contrasts for dose, so the > # contrasts will be extracted and used instead of dose in the MCMCglmm analyses > contrast_codes <- model.matrix(~ viagraData$dose) > viagraData$dose_contrast1 <- contrast_codes[,2] > viagraData$dose_contrast2 <- contrast_codes[,3] > > model1 = MCMCglmm(libido ~ partnerLibido + dose_contrast1 + dose_contrast2, data=viagraData, verbose=F, + nitt=50000, burnin=3000, thin=10) > summary(model1) Iterations = 3001:49991 Thinning interval = 10 Sample size = 4700 DIC: 124.3964 R-structure: ~units post.mean l-95% CI u-95% CI eff.samp units 3.287 1.708 5.222 4452 Location effects: libido ~ partnerLibido + dose_contrast1 + dose_contrast2 post.mean l-95% CI u-95% CI eff.samp pMCMC (Intercept) 3.12590 1.87062 4.36940 4700 < 2e-04 *** partnerLibido 0.41637 0.04712 0.79461 4700 0.02681 * dose_contrast1 0.66831 0.18090 1.17185 4700 0.00766 ** dose_contrast2 0.23301 -0.60134 1.06762 4998 0.57191 --- Signif. codes: 0 Ô***Ő 0.001 Ô**Ő 0.01 Ô*Ő 0.05 Ô.Ő 0.1 Ô Ő 1 > autocorr(model1$VCV) , , units units Lag 0 1.000000000 Lag 10 0.027000296 Lag 50 0.011394911 Lag 100 0.006901764 Lag 500 0.006598486 > plot(model1) Hit to see next plot: > > # running the same analyses using different priors, & then plotting the various posteriors for "dose_contrast1" > > estimateNum = 3 # the number of regression estimate to be used for the analyses e.g., the 1st (intercept), or 2nd, etc > > Ndatasets = 5 # the number of additional analyses using different priors > > # plot data for the first Bayes model > densdat <- density(model1$Sol[,estimateNum]) > plotdatx <- densdat$x > plotdaty <- densdat$y > > # for MCMCglmm priors, B is for fixed effects. B is a list containing the expected value (mu) > # and a (co)varcv bgiance matrix (V) representing the strength of belief: the defaults are B$mu=0 > # and B$V=I*1e+10, where where I is an identity matrix of appropriate dimension > > # the different priors will be derived from the lm, non-Bayesian regression coefficients > # the mu values will be random values from a 4 SE range around the lm estimates > # the V values will be set at (4 * the SEs)**2 > > fit.list <- summary.lm(viagraModel) > coeffs <- fit.list$coefficients > coefnames <- rownames(coeffs) > > # ranges around the lm estimates > MUranges <- cbind( (coeffs[,1] - (4 * coeffs[,2])), (coeffs[,1] + (4 * coeffs[,2])) ) > > # random values from the MUranges for each coefficient > MUs <- matrix(-9999,nrow(coeffs),Ndatasets) > for (lupe in 1:nrow(coeffs)) { MUs[lupe,] <- runif(Ndatasets, min=MUranges[lupe,1], max=MUranges[lupe,2]) } > dimnames(MUs) <-list(rep("", dim(MUs)[1])) > colnames(MUs) <- seq(1:Ndatasets) > cat('\n\nPrior regression coefficient estimates for each additional analysis:') Prior regression coefficient estimates for each additional analysis: > print(round(MUs,2)) 1 2 3 4 5 1.62 2.68 2.59 4.07 2.12 0.70 0.21 0.27 0.91 0.84 0.10 0.59 0.90 0.69 0.76 0.13 -0.48 0.39 -0.67 0.89 > > # (co)variance matrix (V) representing the strength of belief - which will be (2 * the lm SEs)**2 > Vlmx2 <- diag((4 * coeffs[,2])**2) > colnames(Vlmx2) <- seq(1:nrow(MUs)) > cat('\n\nPrior variance (strength of belief) value used for each regression estimate in the additional analyses:\n', + round(diag(Vlmx2),5), sep="\n") Prior variance (strength of belief) value used for each regression estimate in the additional analyses: 6.24906 0.55851 0.92166 2.63232 > > > for (lupeSets in 1:Ndatasets) { + prior.list <- list(B = list(mu = MUs[,lupeSets], V = Vlmx2)) + model2 = MCMCglmm(libido ~ partnerLibido + dose_contrast1 + dose_contrast2, data=viagraData, + prior = prior.list, nitt=50000, burnin=3000, thin=10, verbose=F) + + densdat <- density(model2$Sol[,estimateNum]) + plotdatx <- cbind( plotdatx, densdat$x) + plotdaty <- cbind( plotdaty, densdat$y) + } > > # matplot( plotdatx, plotdaty, main="", xlab='Estimate for dose_contrast1', ylab='Density', font.lab=1.8, > # type='l', cex.lab=1.2, col=c(2,rep(1,ncol(plotdatx))), lty=1, lwd=1 ) > # title(main='Density Plots of the Posteriors for "dose_contrast1" Produced Using Differing Priors') > # legend("topright", c("broad priors","random priors"), bty="n", lty=c(1,1), > # lwd=2, cex=1.2, text.col=c(2,1), col=c(2,1) ) > > matplot( plotdatx, plotdaty, main="", xlab=paste('Estimate for', coefnames[estimateNum]), ylab='Density', + font.lab=1.8, type='l', cex.lab=1.2, col=c(2,rep(1,ncol(plotdatx))), lty=1, lwd=1 ) > title(main=paste('Density Plots of the Posteriors for', coefnames[estimateNum], '\nProduced Using Differing Priors')) > legend("topright", c("broad priors","random priors"), bty="n", lty=c(1,1), + lwd=2, cex=1.2, text.col=c(2,1), col=c(2,1) ) > > > ################ posterior predictive checks using the bayesplot package ###################### > > # from the bayesplot package documentation: > # The bayesplot package provides various plotting functions for graphical posterior predictive checking, > # that is, creating graphical displays comparing observed data to simulated data from the posterior > # predictive distribution. The idea behind posterior predictive checking is simple: if a model is a > # good fit then we should be able to use it to generate data that looks a lot like the data we observed. > > y <- viagraData$libido # the DV that was used for the MCMCglmm model > > # generating the simulated data using the simulate.MCMCglmm function from the MCMCglmm package > yrep <- simulate.MCMCglmm(model1, 25) > > yrep <- t( yrep ) # transposing yrep for the bayesplot functions > > > # ppc_stat: A histogram of the distribution of a test statistic computed by applying stat to each > # dataset (row) in yrep. The value of the statistic in the observed data, stat(y), is > # overlaid as a vertical line. > ppc_stat(y, yrep, binwidth = 1) > > > # ppc_dens_overlay: Kernel density or empirical CDF estimates of each dataset (row) in yrep are > # overlaid, with the distribution of y itself on top (and in a darker shade). > ppc_dens_overlay(y, yrep[1:25, ]) > > > # ppc_scatter_avg: A scatterplot of y against the average values of yrep, i.e., the > # points (mean(yrep[, n]), y[n]), where each yrep[, n] is a vector of length equal > # to the number of posterior draws. > ppc_scatter_avg(y, yrep) > > > # ppc_hist: A separate histogram, shaded frequency polygon, smoothed kernel density estimate, > # or box and whiskers plot is displayed for y and each dataset (row) in yrep. > # For these plots yrep should therefore contain only a small number of rows. > ppc_hist(y, yrep[1:8, ], binwidth = .3) > > > > ###################### now conduct the Bayesian analyses using the rstanarm package ###################### > > model10 <- stan_glm(libido~ partnerLibido + dose, data=viagraData, + warmup = 1000, iter = 2000, sparse = FALSE, seed = 123) SAMPLING FOR MODEL 'continuous' NOW (CHAIN 1). Gradient evaluation took 1.7e-05 seconds 1000 transitions using 10 leapfrog steps per transition would take 0.17 seconds. Adjust your expectations accordingly! Iteration: 1 / 2000 [ 0%] (Warmup) Iteration: 200 / 2000 [ 10%] (Warmup) Iteration: 400 / 2000 [ 20%] (Warmup) Iteration: 600 / 2000 [ 30%] (Warmup) Iteration: 800 / 2000 [ 40%] (Warmup) Iteration: 1000 / 2000 [ 50%] (Warmup) Iteration: 1001 / 2000 [ 50%] (Sampling) Iteration: 1200 / 2000 [ 60%] (Sampling) Iteration: 1400 / 2000 [ 70%] (Sampling) Iteration: 1600 / 2000 [ 80%] (Sampling) Iteration: 1800 / 2000 [ 90%] (Sampling) Iteration: 2000 / 2000 [100%] (Sampling) Elapsed Time: 0.04286 seconds (Warm-up) 0.038877 seconds (Sampling) 0.081737 seconds (Total) SAMPLING FOR MODEL 'continuous' NOW (CHAIN 2). Gradient evaluation took 1.1e-05 seconds 1000 transitions using 10 leapfrog steps per transition would take 0.11 seconds. Adjust your expectations accordingly! Iteration: 1 / 2000 [ 0%] (Warmup) Iteration: 200 / 2000 [ 10%] (Warmup) Iteration: 400 / 2000 [ 20%] (Warmup) Iteration: 600 / 2000 [ 30%] (Warmup) Iteration: 800 / 2000 [ 40%] (Warmup) Iteration: 1000 / 2000 [ 50%] (Warmup) Iteration: 1001 / 2000 [ 50%] (Sampling) Iteration: 1200 / 2000 [ 60%] (Sampling) Iteration: 1400 / 2000 [ 70%] (Sampling) Iteration: 1600 / 2000 [ 80%] (Sampling) Iteration: 1800 / 2000 [ 90%] (Sampling) Iteration: 2000 / 2000 [100%] (Sampling) Elapsed Time: 0.061054 seconds (Warm-up) 0.040101 seconds (Sampling) 0.101155 seconds (Total) SAMPLING FOR MODEL 'continuous' NOW (CHAIN 3). Gradient evaluation took 1.1e-05 seconds 1000 transitions using 10 leapfrog steps per transition would take 0.11 seconds. Adjust your expectations accordingly! Iteration: 1 / 2000 [ 0%] (Warmup) Iteration: 200 / 2000 [ 10%] (Warmup) Iteration: 400 / 2000 [ 20%] (Warmup) Iteration: 600 / 2000 [ 30%] (Warmup) Iteration: 800 / 2000 [ 40%] (Warmup) Iteration: 1000 / 2000 [ 50%] (Warmup) Iteration: 1001 / 2000 [ 50%] (Sampling) Iteration: 1200 / 2000 [ 60%] (Sampling) Iteration: 1400 / 2000 [ 70%] (Sampling) Iteration: 1600 / 2000 [ 80%] (Sampling) Iteration: 1800 / 2000 [ 90%] (Sampling) Iteration: 2000 / 2000 [100%] (Sampling) Elapsed Time: 0.044635 seconds (Warm-up) 0.039322 seconds (Sampling) 0.083957 seconds (Total) SAMPLING FOR MODEL 'continuous' NOW (CHAIN 4). Gradient evaluation took 1e-05 seconds 1000 transitions using 10 leapfrog steps per transition would take 0.1 seconds. Adjust your expectations accordingly! Iteration: 1 / 2000 [ 0%] (Warmup) Iteration: 200 / 2000 [ 10%] (Warmup) Iteration: 400 / 2000 [ 20%] (Warmup) Iteration: 600 / 2000 [ 30%] (Warmup) Iteration: 800 / 2000 [ 40%] (Warmup) Iteration: 1000 / 2000 [ 50%] (Warmup) Iteration: 1001 / 2000 [ 50%] (Sampling) Iteration: 1200 / 2000 [ 60%] (Sampling) Iteration: 1400 / 2000 [ 70%] (Sampling) Iteration: 1600 / 2000 [ 80%] (Sampling) Iteration: 1800 / 2000 [ 90%] (Sampling) Iteration: 2000 / 2000 [100%] (Sampling) Elapsed Time: 0.070061 seconds (Warm-up) 0.037559 seconds (Sampling) 0.10762 seconds (Total) > > summary(model10, digits = 3) Model Info: function: stan_glm family: gaussian [identity] formula: libido ~ partnerLibido + dose algorithm: sampling priors: see help('prior_summary') sample: 4000 (posterior sample size) observations: 30 predictors: 4 Estimates: mean sd 2.5% 25% 50% 75% 97.5% (Intercept) 3.124 0.646 1.829 2.699 3.135 3.554 4.385 partnerLibido 0.414 0.192 0.035 0.287 0.412 0.543 0.790 dose1 0.655 0.244 0.196 0.493 0.654 0.813 1.133 dose2 0.239 0.424 -0.584 -0.040 0.238 0.516 1.094 sigma 1.799 0.254 1.384 1.622 1.769 1.941 2.369 mean_PPD 4.358 0.466 3.447 4.050 4.361 4.665 5.257 log-posterior -67.606 1.712 -71.822 -68.493 -67.232 -66.369 -65.335 Diagnostics: mcse Rhat n_eff (Intercept) 0.010 1.000 4000 partnerLibido 0.003 1.000 4000 dose1 0.004 1.000 4000 dose2 0.007 0.999 4000 sigma 0.004 1.000 4000 mean_PPD 0.007 0.999 4000 log-posterior 0.043 1.001 1603 For each parameter, mcse is Monte Carlo standard error, n_eff is a crude measure of effective sample size, and Rhat is the potential scale reduction factor on split chains (at convergence Rhat=1). > plot(model10) >