This Markdown will go through the Toy example used in the earlier section of the Article; generating the data, fitting the models, and reproducing the plots.

Setup

Let’s set the seed first:

set.seed(99999)

And load necesarry packages:

library(lhs)
library(gridExtra)
library(ggplot2)

And load in required functions (for fitting and predicting HetGP and DetHetGP)

source("HetGP Fit.R")
source("HetGP Predict.R")
source("DetGP Fit.R")
source("DetGP Predict.R")
source("DetHetGP Fit.R")
source("DetHetGP Predict.R")

HetGP

So first, let’s fit HetGP

Data generation

Here is the toy simulator:

dimen <- 1 #1D problem

#toy simulator
simulator <- function(x){
  sin(pi+6*pi*x[,1])*(1-x[,1]) + log(0.2+x[,1])  + rnorm(length(x[,1]),0,1)*(1.2-x[,1])
}

Let’s generate the training data

N <- 50 #number of input points
x <- maximinLHS(N, dimen) #x values

#output data
yclean <- simulator(x)

#standardise:
meany <- mean(c(yclean)) #mean of data sample
sdy <- sd(c(yclean)) #standard deviation
y <- (yclean-meany)/sdy #standardised output

And what points do we want to predict for?

Npred <- 100 #how many prediction points
xpred <-as.matrix(seq(0,1,len = Npred)) #x locations

We can also simulate many, many, times to get the ``truth’’ (we could’ve also just get the truth analytically)

#########

Rep <- 100000 #number of repeats
Ntruth <- Npred*Rep #total number of inputs

xtruth <- do.call("rbind",replicate(Rep, xpred, simplify = FALSE)) #repeated x values

#output data
ytruth <- simulator(xtruth)

#standardise:
ytruth <- (ytruth-meany)/sdy

#means and variance of output (for each unique input value)
ytruth_mean <- rep(0, Npred)
ytruth_var <- rep(0, Npred)
for (i in 1:Npred){
  ytruth_mean[i] <- mean(ytruth[(1:Rep)*Npred - (Npred-i)]) #get mean
  ytruth_var[i] <- var(ytruth[(1:Rep)*Npred - (Npred-i)])   #get variance
}

Truefunc <-  data.frame(x=xpred, mean = ytruth_mean, var = ytruth_var) #put into data frame

Fitting and Prediction

We can now fit HetGP and make predictions

parameterestimates <- HetGPFit(x, y) #fit HetGP
samples <- HetGPPredict(x, y, parameterestimates, xpred, M=1000) #make predictions (increasing M will reduce `spikeyness' of plot)
ysamples <- samples[[1]] #extract the HetGP predictions

And we can generate the plots as used in the paper

#the plot of just the data
p1 <- ggplot()+
  geom_point(aes(x=x, y = y)) +
  theme_minimal(base_size = 16) +
  ylim(-5,3.5)+
  xlim(0,1)


#The plot of the HetGP fit

GPinterHet <- data.frame(xpred=xpred, postmean = apply(ysamples,2,mean), postvar = apply(ysamples,2,var)) #create data frames for ggplot to use
GPdataHet <- data.frame(x=x, y=y)

#plot it 
p2 <- ggplot(data = GPinterHet) +
        geom_line(aes(x=xpred, y=postmean, colour = "Het")) +
        geom_point(data = GPdataHet, aes(x=x, y=y, colour = "True")) +
        geom_ribbon(aes(x=xpred, ymin = (postmean -2*sqrt(postvar)), ymax = postmean + 2*sqrt(postvar), fill = "Het"), alpha = 0.15) +
        geom_line(data = Truefunc, inherit.aes = FALSE, aes(x=x, y=mean, colour = "True")) +
        geom_ribbon(data = Truefunc, aes(x=x, ymin = (mean -2*sqrt(var)), ymax = mean + 2*sqrt(var), fill = "True"), alpha = 0.15) +
        theme_minimal(base_size = 16) +
        ylab("y") +
        xlab("x")+
        ylim(-5,3.5) +
        xlim(0,1) +
        scale_fill_manual(values = c("True" = "#000000", "DetHet" = "#0072B2", "Het" = "#0072B2", "Det" = "#D55E00"), guide=FALSE) +
        scale_colour_manual(values = c("True" = "#000000", "DetHet" = "#0072B2", "Het" = "#0072B2", "Adjust" = "#CC79A7", "Det" = "#D55E00"), guide=FALSE)



grid.arrange(p1, p2, nrow = 1) #plot both plots

DetGP

Now let’s fit the DetGP

Data generation

Here is the deterministic approximation

#deterministic simulator
detsimulator <- function(x){
  sin(pi+6*pi*x[,1])*(1-x[,1]) + log(0.2+x[,1])  + 1*(1.2-x[,1])
}

Now let’s generate the deterministic training data

set.seed(12345) # set the seed again (adjusting M earlier would change results here otherwise)
Ndet <- 12 #number of input points
xdet <- maximinLHS(Ndet, dimen) #x values

#output data
ydetclean <- detsimulator(xdet)

#standardise:
ydet <- (ydetclean-meany)/sdy

Fitting and Prediction

And now can now fit a DetGP and make predictions

parameterestimates <- DetGPFit(xdet, ydet) #fit DetGP
GPpred <- DetGPPredict(xdet,ydet, parameterestimates, xpred) #make predictions

And we can generate the plots as used in the paper

#the plot of just the data
p3 <- ggplot()+
  geom_point(aes(x=xdet, y = ydet), shape = 3) +
  theme_minimal(base_size = 16) +
  ylab("y") +
  xlab("x")+
  ylim(-1.5,2.5) +
  xlim(0,1)


#The plot of the DetGP fit

GPinterDet <- data.frame(x=xpred, postmean = GPpred$mean, postvar = GPpred$var) #create data frames for ggplot to use
GPdataDet <- data.frame(x=xdet, y=ydet)

#plot it 
p4 <- ggplot(data = GPinterDet) +
  geom_line(aes(x=x, y=postmean, colour = "Het")) +
  geom_point(data = GPdataDet, aes(x=x, y=y, colour = "True"), shape = 3) +
  geom_ribbon(aes(x=x, ymin = (postmean -2*sqrt(postvar)), ymax = postmean + 2*sqrt(postvar), fill = "Het"), alpha = 0.15) +
  theme_minimal(base_size = 16) +
  ylab("y") +
  ylim(-1.5,2.5) +
  xlim(0,1) +
  scale_fill_manual(values = c("True" = "#000000", "DetHet" = "#0072B2", "Het" = "#0072B2", "Det" = "#D55E00"), guide=FALSE) +
  scale_colour_manual(values = c("True" = "#000000", "DetHet" = "#0072B2", "Het" = "#0072B2", "Adjust" = "#CC79A7", "Det" = "#D55E00"), guide=FALSE)


grid.arrange(p3, p4, nrow = 1) #plot both plots

DetHetGP

And now let’s fit DetHetGP

Fitting and Prediction

We already have all the data needed, so lets fit DetHetGP and make predictions

parameterestimates <- DetHetGPFit(x, y, xdet, ydet) #Fit DetHetGP
samples <- DetHetGPPredict(x, y, xdet, ydet, parameterestimates, xpred, M = 1000) #make predictions (increasing M will reduce `spikeyness' of plot)
ysamples <- samples[[1]] #extract the DetHetGP predictions 

And we can generate the plots as used in the paper

#the plot of just the data
p5 <- ggplot() +
  geom_point(aes(x=x, y=y)) +
  geom_point(aes(x=xdet, y=ydet), shape = 3) +
  theme_minimal(base_size = 16) +
  ylab("y") +
  xlab("x")+
  ylim(-5,3.5) +
  xlim(0,1)


#The plot of the DetHetGP fit

GPinterDetHet <- data.frame(x=xpred, postmean = apply(ysamples,2,mean), postvar = apply(ysamples,2,var)) #create data frames for ggplot to use
GPdataDetHeth <- data.frame(x = x, y = y)
GPdataDetHetd <- data.frame(xdet=xdet, ydet=ydet)

#plot it 
p6 <- ggplot(data = GPinterDetHet) +
  geom_ribbon(data = GPinterDetHet, aes(x=x, ymin = (postmean -2*sqrt(postvar)), ymax = postmean + 2*sqrt(postvar), fill = "DetHet"), alpha = 0.15) +
  geom_ribbon(data = Truefunc, aes(x=x, ymin = (mean -2*sqrt(var)), ymax = mean + 2*sqrt(var), fill = "True"), alpha = 0.15) +
  geom_line(data = Truefunc, inherit.aes = FALSE, aes(x=x, y=mean, colour = "True")) +
  geom_line(data = GPinterDetHet, aes(x=x, y=postmean, colour = "DetHet")) +
  geom_point(data = GPdataDetHeth, aes(x=x, y=y, colour = "True")) +
  geom_point(data = GPdataDetHetd, aes(x=xdet, y=ydet, colour = "True"), shape = 3) +
  geom_line(data = GPinterDetHet, aes(x=x, y = apply(samples[[4]],2, mean), colour = "Det")) +
  geom_hline(yintercept = 0, linetype="dashed", colour = "black") +
  theme_minimal(base_size = 16) +
  ylab("y") +
  xlim(0,1)+
  ylim(-5,3.5) +
  scale_fill_manual(values = c("True" = "#000000", "DetHet" = "#0072B2", "Het" = "#0072B2", "Det" = "#D55E00"), guide=FALSE) +
  scale_colour_manual(values = c("True" = "#000000", "DetHet" = "#0072B2", "Het" = "#0072B2", "Adjust" = "#CC79A7", "Det" = "#D55E00"), guide=FALSE)


grid.arrange(p5, p6, nrow = 1) #plot both plots

HetGP with more data

Data generation

Now let’s add extra stochastic points, and show that HetGP still struggles

set.seed(54321) # set the seed again (adjusting M earlier would change results here otherwise)
xnew <- xdet #the new x locations should be the same as the previous deterministic locations
ynewclean <- simulator(xnew) #obtain new stochastic runs
#standardise:
ynew <- (ynewclean-meany)/sdy

#Merge old and new data sets
x <- rbind(x, xnew) #merge x
y <- c(y, ynew)     #merge y
N <- N+Ndet         #update N

Fitting and Prediction

We can now fit HetGP again and make predictions

parameterestimates <- HetGPFit(x, y) #fit HetGP
samples <- HetGPPredict(x, y, parameterestimates, xpred, M=1000) #make predictions (increasing M will reduce `spikeyness' of plot)
ysamples <- samples[[1]] #extract the HetGP predictions

And we can again generate the plots as used in the paper

#the plot of just the data
p7 <- ggplot()+
  geom_point(aes(x=x, y = y)) +
  theme_minimal(base_size = 16) +
  ylim(-5,3.5) +
  xlim(0,1)


#The plot of the HetGP fit

GPinterHet <- data.frame(xpred=xpred, postmean = apply(ysamples,2,mean), postvar = apply(ysamples,2,var)) #create data frames for ggplot to use
GPdataHet <- data.frame(x=x, y=y)

#plot it 
p8 <- ggplot(data = GPinterHet) +
        geom_line(aes(x=xpred, y=postmean, colour = "Het")) +
        geom_point(data = GPdataHet, aes(x=x, y=y, colour = "True")) +
        geom_ribbon(aes(x=xpred, ymin = (postmean -2*sqrt(postvar)), ymax = postmean + 2*sqrt(postvar), fill = "Het"), alpha = 0.15) +
        geom_line(data = Truefunc, inherit.aes = FALSE, aes(x=x, y=mean, colour = "True")) +
        geom_ribbon(data = Truefunc, aes(x=x, ymin = (mean -2*sqrt(var)), ymax = mean + 2*sqrt(var), fill = "True"), alpha = 0.15) +
        theme_minimal(base_size = 16) +
        ylab("y") +
        xlab("x")+
        ylim(-5,3.5) +
        xlim(0,1) +
        scale_fill_manual(values = c("True" = "#000000", "DetHet" = "#0072B2", "Het" = "#0072B2", "Det" = "#D55E00"), guide=FALSE) +
        scale_colour_manual(values = c("True" = "#000000", "DetHet" = "#0072B2", "Het" = "#0072B2", "Adjust" = "#CC79A7", "Det" = "#D55E00"), guide=FALSE)



grid.arrange(p7, p8, nrow = 1) #plot both plots