#LOGISTIC REGRESSION in chapter 8 of Field, Miles, & Field, 2012

#Install packages
install.packages ("MCMCglmm")

#Initiate packages
library(MCMCglmm)


#Enter data
eelNames<- c("Cured", "Intervention", "Duration")

eelData <- '
0	0	7
1	0	6
1	1	5
0	1	6
1	1	7
1	0	7
1	0	8
0	1	7
1	0	5
1	1	7
1	0	9
0	1	10
1	1	7
0	1	8
0	0	5
0	0	8
0	0	4
0	1	7
0	1	8
1	0	6
1	1	6
1	1	6
1	1	7
1	0	7
0	0	8
1	1	7
1	1	7
0	0	7
1	1	7
1	1	8
1	0	7
0	0	9
1	0	7
0	1	8
1	1	8
0	0	7
0	0	7
1	0	7
0	0	8
0	0	7
1	1	8
0	0	7
0	1	8
1	1	8
1	1	9
1	0	7
1	1	10
0	1	5
1	0	7
0	1	8
1	0	9
0	0	5
1	1	10
1	1	8
0	1	7
1	1	6
1	0	5
0	1	6
1	1	7
0	0	7
1	1	7
0	0	7
1	1	7
1	1	7
0	1	8
1	1	5
1	0	6
0	0	7
0	0	6
1	1	7
0	0	7
0	0	9
1	1	6
1	0	6
0	0	7
1	0	7
0	0	6
0	0	7
1	1	8
1	1	9
0	0	4
0	0	6
1	1	9'

eelData <- data.frame( read.table(text=eelData, fill=TRUE, col.names=eelNames)  )


###################### regular, non Bayesian analyses of the data ######################

# conventional logistic regression (p. 330)
eelModel.1 <- glm(Cured ~ Intervention + Duration, data = eelData, family = binomial())

summary(eelModel.1)


###################### Bayesian Logistic Regression using MCMCglmm ######################

# broad priors for logistic regression -- see ?gelman.prior
priors <- list(B=list(mu=c(0,0,0), 
               V=gelman.prior(Cured~Intervention + Duration, data=eelData, scale=sqrt(pi^2/3+1))), 
		       R=list(V=1,fix=1))

model1 = MCMCglmm(Cured ~ Intervention + Duration, data = eelData, family = "categorical", 
				  prior = priors, nitt=50000, burnin=3000, thin=10, verbose=F)

summary(model1)
autocorr(model1$VCV)
plot(model1)


# running the same analyses using different priors, & then plotting the various posteriors for "Intervention"

estimateNum = 2 # the number of regression estimate to be used for the analyses e.g., the 1st (intercept), or 2nd, etc

Ndatasets = 5   # the number of additional analyses using different priors

# plot data for the first Bayes model
densdat  <- density(model1$Sol[,estimateNum])
plotdatx <- densdat$x
plotdaty <- densdat$y

# for MCMCglmm priors, B is for fixed effects. B is a list containing the expected value (mu) 
# and a (co)varcv    bgiance matrix (V) representing the strength of belief: the defaults are B$mu=0
# and B$V=I*1e+10, where where I is an identity matrix of appropriate dimension

# the different priors will be derived from the lm, non-Bayesian regression coefficients
# the mu values will be random values from a 4 SE range around the lm estimates
# the V values will be set at (4 * the SEs)**2

fit.list <- summary.lm(eelModel.1)
coeffs <- fit.list$coefficients
coefnames <- rownames(coeffs)

# ranges around the estimates
MUranges <- cbind( (coeffs[,1] - (4 * coeffs[,2])), (coeffs[,1] + (4 * coeffs[,2]))  )

# random values from the MUranges for each coefficient
MUs <- matrix(-9999,nrow(coeffs),Ndatasets)
for (lupe in 1:nrow(coeffs)) { MUs[lupe,] <- runif(Ndatasets, min=MUranges[lupe,1], max=MUranges[lupe,2]) }
dimnames(MUs) <-list(rep("", dim(MUs)[1]))
colnames(MUs) <- seq(1:Ndatasets)
cat('\n\nPrior regression coefficient estimates for each additional analysis:')
print(round(MUs,2))


for (lupeSets in 1:Ndatasets) {

	priors <- list(B=list(mu=MUs[,lupeSets], 
	               V=gelman.prior(Cured~Intervention + Duration, data=eelData, scale=sqrt(pi^2/3+1))), 
			       R=list(V=1,fix=1))

	model2 = MCMCglmm(Cured ~ Intervention + Duration, data = eelData, family = "categorical",
                      prior = priors, nitt=50000, burnin=3000, thin=10, verbose=F)

	densdat  <- density(model2$Sol[,estimateNum])
	plotdatx <- cbind( plotdatx, densdat$x)
	plotdaty <- cbind( plotdaty, densdat$y)
}

matplot( plotdatx, plotdaty, main="", xlab=paste('Estimate for', coefnames[estimateNum]), ylab='Density',  
         font.lab=1.8, type='l', cex.lab=1.2, col=c(2,rep(1,ncol(plotdatx))), lty=1, lwd=1 )
title(main=paste('Density Plots of the Posteriors for', coefnames[estimateNum], '\nProduced Using Differing Priors'))
legend("topright", c("broad priors","random priors"), bty="n", lty=c(1,1),
       lwd=2, cex=1.2, text.col=c(2,1), col=c(2,1) )


################ posterior predictive checks using the bayesplot package ######################

install.packages("bayesplot");    library(bayesplot)

# from the bayesplot package documentation:
# The bayesplot package provides various plotting functions for graphical posterior predictive checking, 
# that is, creating graphical displays comparing observed data to simulated data from the posterior 
# predictive distribution. The idea behind posterior predictive checking is simple: if a model is a 
# good fit then we should be able to use it to generate data that looks a lot like the data we observed. 

y <- eelData$Cured   # the DV that was used for the MCMCglmm model

# generating the simulated data using the simulate.MCMCglmm function from the MCMCglmm package
yrep <- simulate.MCMCglmm(model1, 25) 

yrep <- t( yrep ) # transposing yrep for the bayesplot functions


# ppc_stat: A histogram of the distribution of a test statistic computed by applying stat to each 
#           dataset (row) in yrep. The value of the statistic in the observed data, stat(y), is 
#           overlaid as a vertical line.
ppc_stat(y, yrep, binwidth = 1)


# ppc_dens_overlay: Kernel density or empirical CDF estimates of each dataset (row) in yrep are 
#                   overlaid, with the distribution of y itself on top (and in a darker shade).
ppc_dens_overlay(y, yrep[1:25, ])


# ppc_scatter_avg: A scatterplot of y against the average values of yrep, i.e., the 
#                  points (mean(yrep[, n]), y[n]), where each yrep[, n] is a vector of length equal 
#                  to the number of posterior draws.
ppc_scatter_avg(y, yrep)


# ppc_hist: A separate histogram, shaded frequency polygon, smoothed kernel density estimate, 
#           or box and whiskers plot is displayed for y and each dataset (row) in yrep. 
#           For these plots yrep should therefore contain only a small number of rows.
ppc_hist(y, yrep[1:8, ], binwidth = .3)


###################### now conduct the Bayesian analyses using the rstanarm package ######################

install.packages("httpuv")
install.packages("xts")
install.packages("rstanarm")

library(httpuv)
library(xts)
library(rstanarm)


model10 <- stan_glm(Cured ~ Intervention + Duration, data = eelData, family = binomial(),
                    warmup = 1000, iter = 2000, sparse = FALSE, seed = 123)

summary(model10, digits = 3)
plot(model10)