This Markdown will go through the Toy example used in the earlier section of the Article; generating the data, fitting the models, and reproducing the plots.
Let’s set the seed first:
set.seed(99999)
And load necesarry packages:
library(lhs)
library(gridExtra)
library(ggplot2)
And load in required functions (for fitting and predicting HetGP and DetHetGP)
source("HetGP Fit.R")
source("HetGP Predict.R")
source("DetGP Fit.R")
source("DetGP Predict.R")
source("DetHetGP Fit.R")
source("DetHetGP Predict.R")
So first, let’s fit HetGP
Here is the toy simulator:
dimen <- 1 #1D problem
#toy simulator
simulator <- function(x){
sin(pi+6*pi*x[,1])*(1-x[,1]) + log(0.2+x[,1]) + rnorm(length(x[,1]),0,1)*(1.2-x[,1])
}
Let’s generate the training data
N <- 50 #number of input points
x <- maximinLHS(N, dimen) #x values
#output data
yclean <- simulator(x)
#standardise:
meany <- mean(c(yclean)) #mean of data sample
sdy <- sd(c(yclean)) #standard deviation
y <- (yclean-meany)/sdy #standardised output
And what points do we want to predict for?
Npred <- 100 #how many prediction points
xpred <-as.matrix(seq(0,1,len = Npred)) #x locations
We can also simulate many, many, times to get the ``truth’’ (we could’ve also just get the truth analytically)
#########
Rep <- 100000 #number of repeats
Ntruth <- Npred*Rep #total number of inputs
xtruth <- do.call("rbind",replicate(Rep, xpred, simplify = FALSE)) #repeated x values
#output data
ytruth <- simulator(xtruth)
#standardise:
ytruth <- (ytruth-meany)/sdy
#means and variance of output (for each unique input value)
ytruth_mean <- rep(0, Npred)
ytruth_var <- rep(0, Npred)
for (i in 1:Npred){
ytruth_mean[i] <- mean(ytruth[(1:Rep)*Npred - (Npred-i)]) #get mean
ytruth_var[i] <- var(ytruth[(1:Rep)*Npred - (Npred-i)]) #get variance
}
Truefunc <- data.frame(x=xpred, mean = ytruth_mean, var = ytruth_var) #put into data frame
We can now fit HetGP and make predictions
parameterestimates <- HetGPFit(x, y) #fit HetGP
samples <- HetGPPredict(x, y, parameterestimates, xpred, M=1000) #make predictions (increasing M will reduce `spikeyness' of plot)
ysamples <- samples[[1]] #extract the HetGP predictions
And we can generate the plots as used in the paper
#the plot of just the data
p1 <- ggplot()+
geom_point(aes(x=x, y = y)) +
theme_minimal(base_size = 16) +
ylim(-5,3.5)+
xlim(0,1)
#The plot of the HetGP fit
GPinterHet <- data.frame(xpred=xpred, postmean = apply(ysamples,2,mean), postvar = apply(ysamples,2,var)) #create data frames for ggplot to use
GPdataHet <- data.frame(x=x, y=y)
#plot it
p2 <- ggplot(data = GPinterHet) +
geom_line(aes(x=xpred, y=postmean, colour = "Het")) +
geom_point(data = GPdataHet, aes(x=x, y=y, colour = "True")) +
geom_ribbon(aes(x=xpred, ymin = (postmean -2*sqrt(postvar)), ymax = postmean + 2*sqrt(postvar), fill = "Het"), alpha = 0.15) +
geom_line(data = Truefunc, inherit.aes = FALSE, aes(x=x, y=mean, colour = "True")) +
geom_ribbon(data = Truefunc, aes(x=x, ymin = (mean -2*sqrt(var)), ymax = mean + 2*sqrt(var), fill = "True"), alpha = 0.15) +
theme_minimal(base_size = 16) +
ylab("y") +
xlab("x")+
ylim(-5,3.5) +
xlim(0,1) +
scale_fill_manual(values = c("True" = "#000000", "DetHet" = "#0072B2", "Het" = "#0072B2", "Det" = "#D55E00"), guide=FALSE) +
scale_colour_manual(values = c("True" = "#000000", "DetHet" = "#0072B2", "Het" = "#0072B2", "Adjust" = "#CC79A7", "Det" = "#D55E00"), guide=FALSE)
grid.arrange(p1, p2, nrow = 1) #plot both plots
Now let’s fit the DetGP
Here is the deterministic approximation
#deterministic simulator
detsimulator <- function(x){
sin(pi+6*pi*x[,1])*(1-x[,1]) + log(0.2+x[,1]) + 1*(1.2-x[,1])
}
Now let’s generate the deterministic training data
set.seed(12345) # set the seed again (adjusting M earlier would change results here otherwise)
Ndet <- 12 #number of input points
xdet <- maximinLHS(Ndet, dimen) #x values
#output data
ydetclean <- detsimulator(xdet)
#standardise:
ydet <- (ydetclean-meany)/sdy
And now can now fit a DetGP and make predictions
parameterestimates <- DetGPFit(xdet, ydet) #fit DetGP
GPpred <- DetGPPredict(xdet,ydet, parameterestimates, xpred) #make predictions
And we can generate the plots as used in the paper
#the plot of just the data
p3 <- ggplot()+
geom_point(aes(x=xdet, y = ydet), shape = 3) +
theme_minimal(base_size = 16) +
ylab("y") +
xlab("x")+
ylim(-1.5,2.5) +
xlim(0,1)
#The plot of the DetGP fit
GPinterDet <- data.frame(x=xpred, postmean = GPpred$mean, postvar = GPpred$var) #create data frames for ggplot to use
GPdataDet <- data.frame(x=xdet, y=ydet)
#plot it
p4 <- ggplot(data = GPinterDet) +
geom_line(aes(x=x, y=postmean, colour = "Het")) +
geom_point(data = GPdataDet, aes(x=x, y=y, colour = "True"), shape = 3) +
geom_ribbon(aes(x=x, ymin = (postmean -2*sqrt(postvar)), ymax = postmean + 2*sqrt(postvar), fill = "Het"), alpha = 0.15) +
theme_minimal(base_size = 16) +
ylab("y") +
ylim(-1.5,2.5) +
xlim(0,1) +
scale_fill_manual(values = c("True" = "#000000", "DetHet" = "#0072B2", "Het" = "#0072B2", "Det" = "#D55E00"), guide=FALSE) +
scale_colour_manual(values = c("True" = "#000000", "DetHet" = "#0072B2", "Het" = "#0072B2", "Adjust" = "#CC79A7", "Det" = "#D55E00"), guide=FALSE)
grid.arrange(p3, p4, nrow = 1) #plot both plots
And now let’s fit DetHetGP
We already have all the data needed, so lets fit DetHetGP and make predictions
parameterestimates <- DetHetGPFit(x, y, xdet, ydet) #Fit DetHetGP
samples <- DetHetGPPredict(x, y, xdet, ydet, parameterestimates, xpred, M = 1000) #make predictions (increasing M will reduce `spikeyness' of plot)
ysamples <- samples[[1]] #extract the DetHetGP predictions
And we can generate the plots as used in the paper
#the plot of just the data
p5 <- ggplot() +
geom_point(aes(x=x, y=y)) +
geom_point(aes(x=xdet, y=ydet), shape = 3) +
theme_minimal(base_size = 16) +
ylab("y") +
xlab("x")+
ylim(-5,3.5) +
xlim(0,1)
#The plot of the DetHetGP fit
GPinterDetHet <- data.frame(x=xpred, postmean = apply(ysamples,2,mean), postvar = apply(ysamples,2,var)) #create data frames for ggplot to use
GPdataDetHeth <- data.frame(x = x, y = y)
GPdataDetHetd <- data.frame(xdet=xdet, ydet=ydet)
#plot it
p6 <- ggplot(data = GPinterDetHet) +
geom_ribbon(data = GPinterDetHet, aes(x=x, ymin = (postmean -2*sqrt(postvar)), ymax = postmean + 2*sqrt(postvar), fill = "DetHet"), alpha = 0.15) +
geom_ribbon(data = Truefunc, aes(x=x, ymin = (mean -2*sqrt(var)), ymax = mean + 2*sqrt(var), fill = "True"), alpha = 0.15) +
geom_line(data = Truefunc, inherit.aes = FALSE, aes(x=x, y=mean, colour = "True")) +
geom_line(data = GPinterDetHet, aes(x=x, y=postmean, colour = "DetHet")) +
geom_point(data = GPdataDetHeth, aes(x=x, y=y, colour = "True")) +
geom_point(data = GPdataDetHetd, aes(x=xdet, y=ydet, colour = "True"), shape = 3) +
geom_line(data = GPinterDetHet, aes(x=x, y = apply(samples[[4]],2, mean), colour = "Det")) +
geom_hline(yintercept = 0, linetype="dashed", colour = "black") +
theme_minimal(base_size = 16) +
ylab("y") +
xlim(0,1)+
ylim(-5,3.5) +
scale_fill_manual(values = c("True" = "#000000", "DetHet" = "#0072B2", "Het" = "#0072B2", "Det" = "#D55E00"), guide=FALSE) +
scale_colour_manual(values = c("True" = "#000000", "DetHet" = "#0072B2", "Het" = "#0072B2", "Adjust" = "#CC79A7", "Det" = "#D55E00"), guide=FALSE)
grid.arrange(p5, p6, nrow = 1) #plot both plots
Now let’s add extra stochastic points, and show that HetGP still struggles
set.seed(54321) # set the seed again (adjusting M earlier would change results here otherwise)
xnew <- xdet #the new x locations should be the same as the previous deterministic locations
ynewclean <- simulator(xnew) #obtain new stochastic runs
#standardise:
ynew <- (ynewclean-meany)/sdy
#Merge old and new data sets
x <- rbind(x, xnew) #merge x
y <- c(y, ynew) #merge y
N <- N+Ndet #update N
We can now fit HetGP again and make predictions
parameterestimates <- HetGPFit(x, y) #fit HetGP
samples <- HetGPPredict(x, y, parameterestimates, xpred, M=1000) #make predictions (increasing M will reduce `spikeyness' of plot)
ysamples <- samples[[1]] #extract the HetGP predictions
And we can again generate the plots as used in the paper
#the plot of just the data
p7 <- ggplot()+
geom_point(aes(x=x, y = y)) +
theme_minimal(base_size = 16) +
ylim(-5,3.5) +
xlim(0,1)
#The plot of the HetGP fit
GPinterHet <- data.frame(xpred=xpred, postmean = apply(ysamples,2,mean), postvar = apply(ysamples,2,var)) #create data frames for ggplot to use
GPdataHet <- data.frame(x=x, y=y)
#plot it
p8 <- ggplot(data = GPinterHet) +
geom_line(aes(x=xpred, y=postmean, colour = "Het")) +
geom_point(data = GPdataHet, aes(x=x, y=y, colour = "True")) +
geom_ribbon(aes(x=xpred, ymin = (postmean -2*sqrt(postvar)), ymax = postmean + 2*sqrt(postvar), fill = "Het"), alpha = 0.15) +
geom_line(data = Truefunc, inherit.aes = FALSE, aes(x=x, y=mean, colour = "True")) +
geom_ribbon(data = Truefunc, aes(x=x, ymin = (mean -2*sqrt(var)), ymax = mean + 2*sqrt(var), fill = "True"), alpha = 0.15) +
theme_minimal(base_size = 16) +
ylab("y") +
xlab("x")+
ylim(-5,3.5) +
xlim(0,1) +
scale_fill_manual(values = c("True" = "#000000", "DetHet" = "#0072B2", "Het" = "#0072B2", "Det" = "#D55E00"), guide=FALSE) +
scale_colour_manual(values = c("True" = "#000000", "DetHet" = "#0072B2", "Het" = "#0072B2", "Adjust" = "#CC79A7", "Det" = "#D55E00"), guide=FALSE)
grid.arrange(p7, p8, nrow = 1) #plot both plots