#======================= # Set-up #======================= # Clear the environment rm(list = ls()) # The following packages are required library(dplyr) library(ggplot2) library(cowplot) library(rjags) library(coda) # The Bayesian_Analysis_Functions.R file contains several additional functions # needed for this script. source("Bayesian_Analysis_Functions.R") #=========================== # Exporting files and tables #=========================== # If RUN_..._MODEL == TRUE, the model will be re-run and new MCMC samples # will be collected. # If RUN_..._MODEL == FALSE, then the previous MCMC samples from the model # will be reloaded into memory and used to create graphs. This is the # recommended option to exactly reproduce the results of the paper. # Graphs and tables will exported if SAVE_..._OUTPUT == TRUE # These will be exported into the Output subdirectory. # Make sure this folder exists. Edit to alter EXPORT_PATH # variable to change export location. RUN_GAMMA_MODEL = TRUE SAVE_GAMMA_OUTPUT = TRUE RUN_POISSON_MODEL = TRUE SAVE_POISSON_OUTPUT = TRUE EXPORT_PATH = './Output/' #======================= # Graph colour info #======================= # The colours for the graphs come from the R Brewer Palette RdYlBu: # red = ca0020 # blue = 0571b0 # These should be colour-blind safe. #======================= # Load data sets #======================= df65 = read.csv('DATA_Husk_Fragmentation_GS65.csv', na.strings = c('', NA)) df77 = read.csv('DATA_Husk_Fragmentation_GS77.csv', na.strings = c('', NA)) # There were only 6 pots for GP due to germination problems but 10 pots for everything else. # For GP, 2 reps were collected from each of the 6 pots (to give 12 samples). 10 samples were # randomly chosen. It is important to test the same number of samples across varieties, as this # will affect the number of fragments and therefore the estimate of lambda. # These GP samples were used in the impact trials. All varieties therefore have data from 10 husk impact trials. #=============================== # Histograms of the data #=============================== Plot.Histogram.Fragment.Area = function(data){ histogram = data %>% ggplot(., aes(x = Fragment_Area, group = Variety, fill = Variety))+ geom_histogram(binwidth = 500000)+ facet_wrap(~Variety)+ theme(panel.background = element_rect(fill = 'grey95'))+ theme(strip.background = element_rect(fill="white"))+ theme(strip.text.x = element_text(size = 14))+ theme(axis.title.x = element_text(margin = margin(t = 20, r = 0, b = 0, l = 0), size=13))+ theme(axis.title.y = element_text(margin = margin(t = 0, r = 20, b = 0, l = 0), size=13))+ scale_fill_manual(values = c('#0571b0', '#0571b0', '#ca0020', '#ca0020'), guide = FALSE)+ xlab('Fragment area (pixels)')+ ylab('Count') return(histogram) } histogram_fragment_areas_gs65 = Plot.Histogram.Fragment.Area(df65) histogram_fragment_areas_gs77 = Plot.Histogram.Fragment.Area(df77) if(SAVE_GAMMA_OUTPUT == TRUE){ ggsave(paste0(EXPORT_PATH, 'Histogram_Fragment_Area_GS65.pdf'), histogram_fragment_areas_gs65, height = 6, width = 8) ggsave(paste0(EXPORT_PATH, 'Histogram_Fragment_Area_GS77.pdf'), histogram_fragment_areas_gs77, height = 6, width = 8) } Plot.Histogram.Fragment.Number = function(data){ data = data %>% group_by(Variety, Pot, Rep) %>% summarise(N = length(Fragment_Area)) histogram = data %>% ggplot(., aes(x = N, group = Variety, fill = Variety))+ geom_histogram(binwidth = 1)+ facet_wrap(~Variety)+ theme(panel.background = element_rect(fill = 'grey95'))+ theme(strip.background = element_rect(fill="white"))+ theme(strip.text.x = element_text(size = 14))+ theme(axis.title.x = element_text(margin = margin(t = 20, r = 0, b = 0, l = 0), size=13))+ theme(axis.title.y = element_text(margin = margin(t = 0, r = 20, b = 0, l = 0), size=13))+ scale_fill_manual(values = c('#0571b0', '#0571b0', '#ca0020', '#ca0020'), guide = FALSE)+ xlab('Number of fragments per husk')+ ylab('Count') return(histogram) } histogram_fragment_number_gs65 = Plot.Histogram.Fragment.Number(df65) histogram_fragment_number_gs77 = Plot.Histogram.Fragment.Number(df77) if(SAVE_POISSON_OUTPUT == TRUE){ ggsave(paste0(EXPORT_PATH, 'Histogram_Fragment_Number_GS65.pdf'), histogram_fragment_number_gs65, height = 6, width = 8) ggsave(paste0(EXPORT_PATH, 'Histogram_Fragment_Number_GS77.pdf'), histogram_fragment_number_gs77, height = 6, width = 8) } ############################################################ # MODELLING FRAGMENT AREA ############################################################ #============ # Set up #============ # rjags stores factors (such as variety name) as integers. This function converts the # integer back into variety name. This function is used for plotting data. Get.Variety = function(i){ if (i == 1){return('Golden Promise')} if (i == 2){return('Henni')} if (i == 3){return('Poker')} if (i == 4){return('Propino')} } # This function controls what colours are used for each variety in plots. Get.Variety.Colour = function(var){ if (var == 'GP' | var == 'Golden Promise' ){return('#0571b0')} if (var == 'Henni') {return('#0571b0')} if (var == 'Poker') {return('#ca0020')} if (var == 'Propino') {return('#ca0020')} } #============================== # Gamma Model #============================== # Model is only run if RUN_GAMMA_MODEL == TRUE. Each time the model is run # it will produce slightly different results. If RUN_GAMMA_MODEL == FALSE, # the results from the previous model are loaded into memory. This is the # recommended option to exactly reproduce the results of the paper. for (growth_stage in c("GS65", "GS77")){ if (RUN_GAMMA_MODEL == TRUE) { #------------------- # Set up model #------------------- gamma_model_string = " model { #----------------------------------- # Distribution of data (Likelihood): #----------------------------------- for ( i in 1:Ntotal ) { y[i] ~ dgamma( alpha[s[i]] , beta[s[i]] ) } #----------------------------------- # Distribution of parameters (Prior): #----------------------------------- for (j in 1:Nsubj) { #Old: #alpha[j] ~ dunif(0,10000000) #beta[j] ~ dunif(0,10000000) alpha[j] ~ dgamma(0.001,0.001) beta[j] ~ dgamma(0.001,0.001) mu[j] = alpha[j]/beta[j] } }" # Write model to a file, and send to BUGS: writeLines(gamma_model_string, con = "Gamma_Model.txt") #---------------------- # Set-up data for model #---------------------- # Load the data: y = 0 s = 0 if(growth_stage == "GS65"){ y = df65$Fragment_Area s = as.integer(df65$Variety) } if(growth_stage == "GS77"){ y = df77$Fragment_Area s = as.integer(df77$Variety) } Ntotal = length(y) Nsubj = length(unique(s)) # Specify the data as a list: gamma_data_list = list(y = y, s = s, Ntotal = Ntotal, Nsubj = Nsubj) #----------------------- # Run the chains #----------------------- parameters = c("alpha" , "beta", "mu") adaptSteps = 1000 # Number of steps to "tune" the samplers. burnInSteps = 10000 # Number of steps to "burn-in" the samplers. nChains = 3 # Number of chains to run. numSavedSteps = 100000 # Total number of steps in chains to save. thinSteps = 1 # Number of steps to "thin" (1=keep every step). nPerChain = ceiling( ( numSavedSteps * thinSteps ) / nChains ) # Steps per chain. #---------------------------------------- # Create, intialise and adapt the model: #---------------------------------------- gamma_jags_model = jags.model(file = "Gamma_Model.txt", data = gamma_data_list, n.chains = nChains, n.adapt = adaptSteps) #---------------------------------------- # Burn-in: #---------------------------------------- cat( "Burning in the MCMC chain...\n" ) update(gamma_jags_model , n.iter=burnInSteps ) #---------------------------------------- # Run the MCMC chain: #---------------------------------------- cat( "Sampling final MCMC chain...\n" ) coda_samples_gamma_model = coda.samples(gamma_jags_model, variable.names = parameters, n.iter = nPerChain, thin = thinSteps) # The resulting codaSamples object has these indices: # codaSamples[[ chainIdx ]][ stepIdx , paramIdx ] #---------------------------------------- # Save the MCMC chain: #---------------------------------------- gamma_model = coda_samples_gamma_model if (growth_stage == "GS65") { save(coda_samples_gamma_model, file = 'MCMC_Samples_Gamma_Model_GS65.rda') } if (growth_stage == "GS77") { save(coda_samples_gamma_model, file = 'MCMC_Samples_Gamma_Model_GS77.rda') } #======================= # MCMC Diagnostics #======================= # Check the quality of the MCMC. Specifically: # - Prior distributions are sensible # - Parameter space well explored # - Effective sample size > 10,000 # - Auto-correlation low # - Shrink factor low (< 1.1)indicating the chains have converged. # (Technically the Gelman-Rubin statistic). # - Parameter estimates are similiar and accurate (MCSE ~ 0). # MCSE = Monte Carlo standard error = (sd of the chain / sqrt(ESS)) # The following code creates a MCMC diagnostic polt for the parameters, # alpha, beta and mu. Plots will only be exported if SAVE_GAMMA_OUTPUT == TRUE. # This function calls the MCMC.Diagnotic.Plots() function from # KateCustomBayesianAnalysisFunctions.R. for (i in 1:4){ parName = paste("alpha[", i, "]", sep = "") variety = Get.Variety(i) title = bquote('MCMC Diagnositics for '*.(variety)*' (Parameter = '*~alpha*")") if (SAVE_GAMMA_OUTPUT == TRUE){ save_name = paste(EXPORT_PATH, 'Graph_MCMC_Diagnositics_Gamma_Model_Parameter_Alpha_', variety, '_', growth_stage, sep = '')} else {save_name = NULL} MCMC.Diagnotic.Plots(gamma_model, parName, title, save_name, save_type = 'jpeg') } for (i in 1:4){ parName = paste("beta[", i, "]", sep = "") variety = Get.Variety(i) title = bquote('MCMC Diagnositics for '*.(variety)*' (Parameter = '*~beta*")") if (SAVE_GAMMA_OUTPUT == TRUE) { save_name = paste(EXPORT_PATH, 'Graph_MCMC_Diagnositics_Gamma_Model_Parameter_Beta_', variety, '_', growth_stage, sep = '')} else {save_name = NULL} MCMC.Diagnotic.Plots(gamma_model, parName, title, save_name, save_type = 'jpeg') } for (i in 1:4){ parName = paste("mu[", i, "]", sep = "") variety = Get.Variety(i) title = bquote('MCMC Diagnositics for '*.(variety)*' (Parameter = '*~mu*")") if (SAVE_GAMMA_OUTPUT == TRUE) { save_name = paste(EXPORT_PATH, 'Graph_MCMC_Diagnositics_Gamma_Model_Parameter_Mu_', variety, '_', growth_stage, sep = '')} else {save_name = NULL} MCMC.Diagnotic.Plots(gamma_model, parName, title, save_name, save_type="jpeg") } } if (RUN_GAMMA_MODEL == FALSE){ if (growth_stage == "GS65"){ load('MCMC_Samples_Gamma_Model_GS65.rda') gamma_model = NULL try_info = try(gammavarietycodasamples) if(class(try_info) == "mcmc.list"){ gamma_model = gammavarietycodasamples } else{gamma_model = coda_samples_gamma_model} } if (growth_stage == "GS77"){ load('MCMC_Samples_Gamma_Model_GS77.rda') gamma_model = NULL try_info = try(gammavarietycodasamples) if(class(try_info) == "mcmc.list"){ gamma_model = gammavarietycodasamples } else{gamma_model = coda_samples_gamma_model} } } #====================================== # Summarise the posterior distributions #====================================== # This function create a graphical summary of the posterior distribution. # It takes the following arguments: (1) variety (2) parameter, in this # case 'a' for alpha, 'b' for beta and 'm' for mu (3) choose whether to # use the 'mean', 'median' or 'mode' as the statistc to summarise the # distribution (4) the x-axis limits (5) the colour of the histogram (6) # the coulor of the border (7) whether to export the file. # This function calls the Plot.Posterior.Distribution() function from # KateCustomBayesianAnalysisFunctions.R. Plot.Gamma.Posterior.Dist = function(variety, parameter='a', parameter_estimate = 'mean', xlims, col = 'skyblue', border = 'skyblue', savefile = TRUE){ if(parameter == 'a'){ parameter = paste('alpha', "[", variety, "]", sep = "") Plot.Posterior.Distribution(gamma_model[,parameter], main = Get.Variety(variety), central_tendency = parameter_estimate, xlab = expression(alpha), xlim = xlims, ylab = expression(paste("P(", alpha, "|D)")), col = col, border = border, mgp = c(2.5, 1, 0)) if (savefile == TRUE){ plot_name = paste(EXPORT_PATH, 'Posterior_Distribution_Gamma_Model_Parameter_Alpha_', Get.Variety(variety), '_', growth_stage, ' .pdf', sep = '') dev.copy(pdf, file = plot_name) dev.off()} } if(parameter == 'b'){ parameter = paste('beta', "[", variety, "]", sep = "") Plot.Posterior.Distribution(gamma_model[,parameter], main = Get.Variety(variety), central_tendency = parameter_estimate, xlab = expression(beta), xlim = xlims, ylab = expression(paste("P(", beta, "|D)")), col = col, border = border, mgp = c(2.5, 1, 0)) if (savefile == TRUE){ plot_name = paste(EXPORT_PATH, 'Posterior_Distribution_Gamma_Model_Parameter_Beta_', Get.Variety(variety), '_', growth_stage, ' .pdf', sep = '') dev.copy(pdf, file = plot_name) dev.off()} } if(parameter == 'm'){ parameter = paste('mu', "[", variety, "]", sep = "") Plot.Posterior.Distribution(gamma_model[,parameter], main = Get.Variety(variety), central_tendency = parameter_estimate, xlab = expression(mu), xlim = xlims, ylab = expression(paste("P(", mu, "|D)")), col = col, border = border, mgp = c(2.5, 1, 0)) if (savefile == TRUE){ plot_name = paste(EXPORT_PATH,'Posterior_Distribution_Gamma_Model_Parameter_Mu_', Get.Variety(variety), '_', growth_stage, ' .pdf', sep = '') dev.copy(pdf, file = plot_name) dev.off()} } } #--------------------------------- # Posterior Distribution of Alpha #--------------------------------- if(growth_stage == "GS65"){ Plot.Gamma.Posterior.Dist(1, 'a', 'mean', c(0,1.5), col = '#0571b0', border = '#0571b0', savefile = SAVE_GAMMA_OUTPUT) Plot.Gamma.Posterior.Dist(2, 'a', 'mean', c(0,1.5), col = '#0571b0', border = '#0571b0', savefile = SAVE_GAMMA_OUTPUT) Plot.Gamma.Posterior.Dist(3, 'a', 'mean', c(0,1.5), col = '#ca0020', border = '#ca0020', savefile = SAVE_GAMMA_OUTPUT) Plot.Gamma.Posterior.Dist(4, 'a', 'mean', c(0,1.5), col = '#ca0020', border = '#ca0020', savefile = SAVE_GAMMA_OUTPUT) } if(growth_stage == "GS77"){ Plot.Gamma.Posterior.Dist(1, 'a', 'mean', c(0,2.5), col = '#0571b0', border = '#0571b0', savefile = SAVE_GAMMA_OUTPUT) Plot.Gamma.Posterior.Dist(2, 'a', 'mean', c(0,25), col = '#0571b0', border = '#0571b0', savefile = SAVE_GAMMA_OUTPUT) Plot.Gamma.Posterior.Dist(3, 'a', 'mean', c(0,2.5), col = '#ca0020', border = '#ca0020', savefile = SAVE_GAMMA_OUTPUT) Plot.Gamma.Posterior.Dist(4, 'a', 'mean', c(0,2.5), col = '#ca0020', border = '#ca0020', savefile = SAVE_GAMMA_OUTPUT) } #--------------------------------- # Posterior Distribution of Beta #--------------------------------- Plot.Gamma.Posterior.Dist(1, 'b', 'mean', c(0,0.00001), col = '#0571b0', border = '#0571b0', savefile = SAVE_GAMMA_OUTPUT) Plot.Gamma.Posterior.Dist(2, 'b', 'mean', c(0,0.00001), col = '#0571b0', border = '#0571b0', savefile = SAVE_GAMMA_OUTPUT) Plot.Gamma.Posterior.Dist(3, 'b', 'mean', c(0,0.00001), col = '#ca0020', border = '#ca0020', savefile = SAVE_GAMMA_OUTPUT) Plot.Gamma.Posterior.Dist(4, 'b', 'mean', c(0,0.00001), col = '#ca0020', border = '#ca0020', savefile = SAVE_GAMMA_OUTPUT) #--------------------------------- # Posterior Distribution of Mu #--------------------------------- Plot.Gamma.Posterior.Dist(1, 'm', 'mean', c(0,10000000), col = '#0571b0', border = '#0571b0', savefile = SAVE_GAMMA_OUTPUT) Plot.Gamma.Posterior.Dist(2, 'm', 'mean', c(0,10000000), col = '#0571b0', border = '#0571b0', savefile = SAVE_GAMMA_OUTPUT) Plot.Gamma.Posterior.Dist(3, 'm', 'mean', c(0,10000000), col = '#ca0020', border = '#ca0020', savefile = SAVE_GAMMA_OUTPUT) Plot.Gamma.Posterior.Dist(4, 'm', 'mean', c(0,10000000), col = '#ca0020', border = '#ca0020', savefile = SAVE_GAMMA_OUTPUT) #================================ # Differences in mu #================================ Plot.Posterior.Dist.Parameter.Differences.Mu <- function(parameter_estimate = 'mean', xlims, col = 'grey75', border = 'grey75', savefile = SAVE_GAMMA_OUTPUT){ comparisons = c('1,3', '1,4', '2,3', '2,4') for (i in comparisons){ variety1 = as.integer(substr(i, 1, 1)) variety2 = as.integer(substr(i, 3, 3)) namevariety1 = Get.Variety(variety1) namevariety2 = Get.Variety(variety2) differences = (as.matrix(gamma_model[,paste0('mu[', variety1,']')]) - as.matrix(gamma_model[,paste0('mu[', variety2,']')])) Plot.Posterior.Distribution(differences, main = paste0(namevariety1, ' - ', namevariety2), central_tendency = parameter_estimate, comparison_value = 0, xlab = bquote(mu[.(namevariety1)]-mu[.(namevariety2)]), xlim = c(-4000000, 10000000), ylab = bquote('P('*mu[.(namevariety1)] - mu[.(namevariety2)]*'|D)'), col = col, border = border, mgp = c(2.5, 1, 0)) if (savefile == TRUE){ plot_name = paste(EXPORT_PATH, "Posterior_Distribution_Gamma_Model_Mu_Diffs_", Get.Variety(variety1), "-", Get.Variety(variety2), "_", growth_stage, '.pdf', sep = '') dev.copy(pdf, plot_name) dev.off() } } } Plot.Posterior.Dist.Parameter.Differences.Mu() #=========================== # Posterior predictive check #=========================== # The function below calculate the 95% most likely posterior distributions # and super-imposes these over the original data. Plot.Gamma.Posterior.Predictive.Check = function(i, ylim = c(0,0.000002)){ # Get the data for the variety y = 0 if (growth_stage == "GS65"){ y = df65$Fragment_Area[as.integer(df65$Variety) == i] } if (growth_stage == "GS77"){ y = df77$Fragment_Area[as.integer(df77$Variety) == i] } # Construct histogram of the data: histInfo = hist(y, probability = TRUE ,# breaks=histBreaks , xlab = "Fragment area" , ylab = "Probability density" , ylim = c(0,0.000001), yaxt ='n', cex.lab = 1, cex.main = 1, main = Get.Variety(i), col = Get.Variety.Colour(Get.Variety(i)) , border = Get.Variety.Colour(Get.Variety(i)), xlim = c(0,8000000)) # Get the possible posterior distributions: yGrid = seq(min(0),max(8000000),length=501) mcmcMat = as.matrix(gamma_model) chainIdx = round(seq(1,nrow(mcmcMat),length=20)) # Plot the posterior distributions: for (stepIdx in chainIdx) { lines(yGrid , dgamma(yGrid, mcmcMat[stepIdx,paste0("alpha[",i,"]")], mcmcMat[stepIdx,paste0("beta[",i,"]")]) *length(y)/sum(is.finite(y)) , lwd = 1.5, col="grey75" ) } } # Apply the Plot.Posterior.Predictive.Check() to each variety. for(var in 1:4){ if (SAVE_GAMMA_OUTPUT == TRUE){ name = paste0(EXPORT_PATH, 'Posterior_Predictive_Check_Gamma_Model_', Get.Variety(var), '_', growth_stage, '.pdf') print(name) pdf(file = name) plot = Plot.Gamma.Posterior.Predictive.Check(var) plot dev.off() } } #====================== # Export posterior HDIs #====================== HDI_Data = data.frame(Variety = character(0), Growth_Stage = integer(0), Mean_Alpha = numeric(0), Lower_HDI_Alpha = numeric(0), Upper_HDI_Alpha = numeric(0), Mean_Beta = numeric(0), Lower_HDI_Beta = numeric(0), Upper_HDI_Beta = numeric(0), Mean_Mu = numeric(0), Lower_HDI_Mu = numeric(0), Upper_HDI_Mu = numeric(0)) for(i in 1:4){ Variety = Get.Variety(i) Growth_Stage = as.integer(stringr::str_extract(growth_stage, "\\d\\d")) Lower_HDI_Alpha = HDI.From.MCMC(as.matrix(gamma_model)[,paste0('alpha[', i, ']')])[1] Upper_HDI_Alpha = HDI.From.MCMC(as.matrix(gamma_model)[,paste0('alpha[', i, ']')])[2] Mean_Alpha = mean(as.matrix(gamma_model)[,paste0('alpha[', i, ']')]) Lower_HDI_Beta = HDI.From.MCMC(as.matrix(gamma_model)[,paste0('beta[', i, ']')])[1] Upper_HDI_Beta = HDI.From.MCMC(as.matrix(gamma_model)[,paste0('beta[', i, ']')])[2] Mean_Beta = mean(as.matrix(gamma_model)[,paste0('beta[', i, ']')]) Lower_HDI_Mu = HDI.From.MCMC(as.matrix(gamma_model)[,paste0('mu[', i, ']')])[1] Upper_HDI_Mu = HDI.From.MCMC(as.matrix(gamma_model)[,paste0('mu[', i, ']')])[2] Mean_Mu = mean(as.matrix(gamma_model)[,paste0('mu[', i, ']')]) var_data = data.frame(Variety,Growth_Stage, Mean_Alpha,Lower_HDI_Alpha, Upper_HDI_Alpha, Mean_Beta, Lower_HDI_Beta, Upper_HDI_Beta, Mean_Mu, Lower_HDI_Mu, Upper_HDI_Mu) HDI_Data = full_join(HDI_Data, var_data) } print(HDI_Data) if(SAVE_GAMMA_OUTPUT == TRUE){ write.csv(file = paste0(EXPORT_PATH, 'HDI_Data_Fragment_Area_Gamma_Model_', growth_stage, '.csv'), HDI_Data) } } ############################################################################################### # MODELLING FRAGMENT NUMBER ############################################################################################### #============ # Set up #============ df65_fragment_count = df65 %>% group_by(Variety, Pot, Rep) %>% summarise(N = length(Fragment_Area)) df77_fragment_count = df77 %>% group_by(Variety, Pot, Rep) %>% summarise(N = length(Fragment_Area)) #============================== # Poisson Model #============================== # Model is only run if RUN_POISSON_MODEL == TRUE. Each time the model is run # it will produce slightly different results. If RUN_POISSON_MODEL == FALSE, # the results from the previous model are loaded into memory. This is the # recommended option to exactly reproduce the results of the paper. for (growth_stage in c("GS65", "GS77")){ if (RUN_POISSON_MODEL == TRUE) { #------------------- # Set up model #------------------- poisson_model_string = " model { #----------------------------------- # Distribution of data (Likelihood): #----------------------------------- for ( i in 1:Ntotal ) { y[i] ~ dpois(lambda[s[i]]) } #----------------------------------- # Distribution of parameters (Prior): #----------------------------------- for (j in 1:Nsubj){ #lambda[j] ~ dunif(0,10000000) lambda[j] ~ dgamma(0.001,0.001) } }" # Write model to a file, and send to BUGS: writeLines(poisson_model_string, con = "Poisson_Model.txt") #---------------------- # Set-up data for model #---------------------- # Load the data: y = 0 s = 0 if(growth_stage == "GS65"){ y = df65_fragment_count$N s = as.integer(df65_fragment_count$Variety) } if(growth_stage == "GS77"){ y = df77_fragment_count$N s = as.integer(df77_fragment_count$Variety) } Ntotal = length(y) Nsubj = length(unique(s)) # Specify the data as a list: poisson_data_list = list(y = y, s = s, Ntotal = Ntotal, Nsubj = Nsubj) #----------------------- # Run the chains #----------------------- parameters = c("lambda") adaptSteps = 1000 # Number of steps to "tune" the samplers. burnInSteps = 10000 # Number of steps to "burn-in" the samplers. nChains = 3 # Number of chains to run. numSavedSteps = 50000 # Total number of steps in chains to save. thinSteps = 1 # Number of steps to "thin" (1=keep every step). nPerChain = ceiling( ( numSavedSteps * thinSteps ) / nChains ) # Steps per chain. #---------------------------------------- # Create, intialise and adapt the model: #---------------------------------------- poisson_jags_model = jags.model(file = "Poisson_Model.txt", data = poisson_data_list, n.chains = nChains, n.adapt = adaptSteps) #---------------------------------------- # Burn-in: #---------------------------------------- cat( "Burning in the MCMC chain...\n" ) update(poisson_jags_model , n.iter = burnInSteps ) #---------------------------------------- # Run the MCMC chain: #---------------------------------------- cat( "Sampling final MCMC chain...\n" ) coda_samples_poisson_model = coda.samples(poisson_jags_model, variable.names = parameters, n.iter = nPerChain, thin = thinSteps) # The resulting codaSamples object has these indices: # codaSamples[[ chainIdx ]][ stepIdx , paramIdx ] #---------------------------------------- # Save the MCMC chain: #---------------------------------------- poisson_model = coda_samples_poisson_model if (growth_stage == "GS65") { save(coda_samples_poisson_model, file = 'MCMC_Samples_Poisson_Model_GS65.rda') } if (growth_stage == "GS77") { save(coda_samples_poisson_model, file = 'MCMC_Samples_Poisson_Model_GS77.rda') } #======================= # MCMC Diagnostics #======================= # Check the quality of the MCMC. Specifically: # - Prior distributions are sensible # - Parameter space well explored # - Effective sample size > 10,000 # - Auto-correlation low # - Shrink factor low (< 1.1)indicating the chains have converged. # (Technically the Gelman-Rubin statistic). # - Parameter estimates are similiar and accurate (MCSE ~ 0). # MCSE = Monte Carlo standard error = (sd of the chain / sqrt(ESS)) # The following code creates a MCMC diagnostic polt for the parameters, # alpha, beta and mu. Plots will only be exported if SAVE_POISSON_OUTPUT == TRUE. # This function calls the MCMC.Diagnotic.Plots() function from # KateCustomBayesianAnalysisFunctions.R. for (i in 1:4){ parName = paste("lambda[", i, "]", sep = "") variety = Get.Variety(i) title = bquote('MCMC Diagnositics for '*.(variety)*' (Parameter = '*~lambda*")") if (SAVE_POISSON_OUTPUT == TRUE) {save_name = paste(EXPORT_PATH, 'Graph_MCMC_Diagnositics_Poisson_Model_Parameter_Lambda_', variety, '_', growth_stage, sep = '')} else {save_name = NULL} MCMC.Diagnotic.Plots(poisson_model, parName, title, save_name, save_type = 'jpeg') } } if (RUN_POISSON_MODEL == FALSE){ if (growth_stage == "GS65"){ load('MCMC_Samples_Poisson_Model_GS65.rda') poisson_model = NULL try_info = try(poissonvarietycodasamples) if(class(try_info) == "mcmc.list"){ poisson_model = poissonvarietycodasamples } else{poisson_model = coda_samples_poisson_model} } if (growth_stage == "GS77"){ load('MCMC_Samples_Poisson_Model_GS77.rda') poisson_model = NULL try_info = try(poissonvarietycodasamples) if(class(try_info) == "mcmc.list"){ poisson_model = poissonvarietycodasamples } else{poisson_model = coda_samples_poisson_model} } } #====================================== # Summarise the posterior distributions #====================================== # This function create a graphical summary of the posterior distribution. # It takes the following arguments: (1) variety (2) parameter, in this # case 'a' for alpha, 'b' for beta and 'm' for mu (3) choose whether to # use the 'mean', 'median' or 'mode' as the statistc to summarise the # distribution (4) the x-axis limits (5) the colour of the histogram (6) # the coulor of the border (7) whether to export the file. # This function calls the Plot.Posterior.Distribution() function from # KateCustomBayesianAnalysisFunctions.R. Plot.Poisson.Posterior.Dist = function(variety, parameter='l', parameter_estimate = 'mean', xlims, col = 'skyblue', border = 'skyblue', savefile = TRUE){ if(parameter == 'l'){ parameter = paste('lambda', "[", variety, "]", sep = "") Plot.Posterior.Distribution(poisson_model[,parameter], main = Get.Variety(variety), central_tendency = parameter_estimate, xlab = expression(lambda), xlim = xlims, ylab = expression(paste("P(", lambda, "|D)")), col = col, border = border, mgp = c(2.5, 1, 0)) if (savefile == TRUE){ plot_name = paste(EXPORT_PATH, 'Posterior_Distribution_Poisson_Model_Parameter_Lambda_', Get.Variety(variety), '_', growth_stage, ' .pdf', sep = '') dev.copy(pdf, file = plot_name) dev.off() } } } #--------------------------------- # Posterior Distribution of Lambda #--------------------------------- Plot.Poisson.Posterior.Dist(1, 'l', 'mean', c(0,15), col = '#0571b0', border = '#0571b0', savefile = SAVE_POISSON_OUTPUT) Plot.Poisson.Posterior.Dist(2, 'l', 'mean', c(0,15), col = '#0571b0', border = '#0571b0', savefile = SAVE_POISSON_OUTPUT) Plot.Poisson.Posterior.Dist(3, 'l', 'mean', c(0,15), col = '#ca0020', border = '#ca0020', savefile = SAVE_POISSON_OUTPUT) Plot.Poisson.Posterior.Dist(4, 'l', 'mean', c(0,15), col = '#ca0020', border = '#ca0020', savefile = SAVE_POISSON_OUTPUT) #=========================== # Posterior predictive check #=========================== Plot.Posterior.Dist.Parameter.Differences.Lambda <- function(parameter_estimate = 'mean', xlims = c(-12,2), col = 'grey75', border = 'grey75', savefile = SAVE_POISSON_OUTPUT){ comparisons = c('1,3', '1,4', '2,3', '2,4') for (i in comparisons){ variety1 = as.integer(substr(i, 1, 1)) variety2 = as.integer(substr(i, 3, 3)) namevariety1 = Get.Variety(variety1) namevariety2 = Get.Variety(variety2) differences = as.matrix(poisson_model[,paste0('lambda[', variety1,']')]) - as.matrix(poisson_model[,paste0('lambda[', variety2,']')]) Plot.Posterior.Distribution(differences, main = paste0(namevariety1, ' - ', namevariety2), central_tendency = parameter_estimate, comparison_value = 0, xlab = bquote(lambda[.(namevariety1)]-lambda[.(namevariety2)]), xlim = xlims, ylab = bquote('P('*lambda[.(namevariety1)] - lambda[.(namevariety2)]*'|D)'), col = col, border = border, mgp = c(2.5, 1, 0)) if (savefile == TRUE){ plot_name = paste(EXPORT_PATH, "Posterior_Distribution_Poisson_Model_Lambda_Diffs_", Get.Variety(variety1), "-", Get.Variety(variety2), "_", growth_stage, '.pdf', sep = '') dev.copy(pdf, plot_name) dev.off() } } } Plot.Posterior.Dist.Parameter.Differences.Lambda() #=========================== # Posterior predictive check #=========================== # The function below calculate the 95% most likely posterior distributions # and super-imposes these over the original data. Plot.Poisson.Posterior.Predictive.Check = function(i, ylim = c(0,0.000002)){ # Get the data for the variety y = 0 if (growth_stage == "GS65"){ y = df65_fragment_count$N[as.integer(df65_fragment_count$Variety) == i] } if (growth_stage == "GS77"){ y = df77_fragment_count$N[as.integer(df77_fragment_count$Variety) == i] } # Construct histogram of the data: histInfo = hist(y, probability = TRUE, breaks = max(y), xlab = "Fragment number", ylab = "Probability density", yaxt ='n', cex.lab = 1, cex.main = 1, main = Get.Variety(i), col = Get.Variety.Colour(Get.Variety(i)) , border = Get.Variety.Colour(Get.Variety(i)), xlim = c(0,20)) # Get the possible posterior distributions: yGrid = seq(min(0),max(20),length=21) mcmcMat = as.matrix(poisson_model) chainIdx = round(seq(1,nrow(mcmcMat),length=20)) # Plot the posterior distributions: for (stepIdx in chainIdx) { lines(yGrid , dpois(yGrid, mcmcMat[stepIdx,paste0("lambda[",var,"]")]) *length(y)/sum(is.finite(y)), lwd = 2.5, col="grey" ) } } # Apply the Plot.Posterior.Predictive.Check() to each variety. for(var in 1:4){ if (SAVE_POISSON_OUTPUT == TRUE){ name = paste0(EXPORT_PATH, 'Posterior_Predictive_Check_Poisson_Model_', Get.Variety(var), '_', growth_stage, '.pdf') print(name) pdf(file = name) plot = Plot.Poisson.Posterior.Predictive.Check(var) plot dev.off() } } #====================== # Export posterior HDIs #====================== Poisson_HDI_Data = data.frame(Variety = character(0), Growth_Stage = integer(0), Mean_Lambda = numeric(0), Lower_HDI_Lambda = numeric(0), Upper_HDI_Lambda = numeric(0)) for(i in 1:4){ Variety = Get.Variety(i) Growth_Stage = as.integer(stringr::str_extract(growth_stage, "\\d\\d")) Lower_HDI_Lambda = HDI.From.MCMC(as.matrix(poisson_model)[,paste0('lambda[', i, ']')])[1] Upper_HDI_Lambda = HDI.From.MCMC(as.matrix(poisson_model)[,paste0('lambda[', i, ']')])[2] Mean_Lambda = mean(as.matrix(poisson_model)[,paste0('lambda[', i, ']')]) var_data = data.frame(Variety, Growth_Stage, Mean_Lambda, Lower_HDI_Lambda, Upper_HDI_Lambda) Poisson_HDI_Data = full_join(Poisson_HDI_Data, var_data) } print(Poisson_HDI_Data) if(SAVE_POISSON_OUTPUT == TRUE){ write.csv(file = paste0(EXPORT_PATH, 'HDI_Data_Fragment_Number_Poisson_Model_', growth_stage, '.csv'), Poisson_HDI_Data) } }