rm(list=ls(all=T)) library(mgcv) library(gss) ################################################################################ # Function for preliminary detection of outliers ################################################################################ fn.outliers <- function(X,Y,a1=2.5) { x1<- X[,1];x2<- X[,2];x3<- X[,3];x4<- X[,4] # Must be corresponding to the dimension of X temp.model<- ssanova(Y~x1+x2+x3+x4,alpha=1) temp.X<- data.frame(x1=x1,x2=x2,x3=x3,x4=x4) temp.fitted<- predict(temp.model,newdata=temp.X) temp.resid<- Y-temp.fitted temp.resid<- (temp.resid-mean(temp.resid))/sd(temp.resid) n.out <- length(which(abs(temp.resid)>a1))- 2*(1-pnorm(a1))*length(Y) if (n.out< 0) n.out<- 0 return(round(n.out)) } ################################################################################ # Outlier Shifting Smoothing Splines ANOVA ################################################################################ out.shift.ssanova<-function(X,Y,lambda,a1=2.5,weights=NULL){ n<-length(Y) n.out <-fn.outliers(X,Y,a1) x1<- X[,1];x2<- X[,2];x3<- X[,3];x4<- X[,4] # Must be corresponding to the dimension of X temp.ss<- ssanova(Y ~ x1+x2+x3+x4,alpha=lambda,method="m",weights=weights) NEW.data<- data.frame(x1=x1,x2=x2,x3=x3,x4=x4) fit.est<- predict(temp.ss,NEW.data) sigma.est<- mad(Y-fit.est) gamma.est<-rep(0,n) Y.old<- Y if (n.out>0) { lambda.gamma<- sigma.est*qnorm((2*n-n.out)/(2*n))} if (n.out==0) {lambda.gamma<- sigma.est*qnorm((2*n-1e-06)/(2*n))} tol<- 100000 n.iter <- 0 while(tol>1e-5 & n.iter<100 ) { fit.pre<- fit.est nonzero.index<-which(abs(Y.old-fit.est)>=lambda.gamma) gamma.est[nonzero.index]<- (Y.old-fit.est)[nonzero.index] Y.new<- Y.old - gamma.est model.update <- ssanova(Y.new~ x1+x2+x3+x4,alpha=lambda,weights = weights,method="m") fit.est<- predict(model.update,NEW.data) tol<- mean((fit.pre-fit.est)^2) n.iter<- n.iter+1 } Y.fit<- predict(model.update,NEW.data) object<- list(fit=Y.fit, iter = n.iter, sigma.est = sigma.est, gamma.est = gamma.est,model=model.update,n.out=n.out) } ################################################################################ # Outlier Shifting Generalized Additive Model with \tilde{lambda}_gamma ## ################################################################################ out.shift.gam<-function(X,Y,a1=2.5,weights=NULL) { n<-length(Y) n.out <-fn.outliers(X,Y,a1) x1<- X[,1]; x2<- X[,2]; x3<- X[,3]; x4<- X[,4] # Should be corresponding to the dimension of X temp.gam<- gam(Y ~ s(x1,bs="ts")+ s(x2,bs="ts")+s(x3,bs="ts")+s(x4,bs="ts"),weights=weights,method="REML") fit.est<- temp.gam$fitted sigma.est<- mad(Y-fit.est) gamma.est<-rep(0,n) Y.old<- Y lambda.gamma<- sigma.est*qnorm((2*n-n.out)/(2*n)) tol<- 100000 n.iter <- 0 while(tol>1e-5 & n.iter<100 ) { fit.pre<- fit.est nonzero.index<-which(abs(Y.old-fit.est)>=lambda.gamma) gamma.est[nonzero.index]<- (Y.old-fit.est)[nonzero.index] Y.new<- Y.old - gamma.est model.update <- gam(Y.new ~ s(x1,bs="ts")+ s(x2,bs="ts")+s(x3,bs="ts")+s(x4,bs="ts"),weights=weights,method="REML") fit.est<- model.update$fitted tol<- mean((fit.pre-fit.est)^2) n.iter<- n.iter+1 } Y.fit<- model.update$fitted object<- list(fit=Y.fit, iter = n.iter, sigma.est = sigma.est, gamma.est = gamma.est,model=model.update) } ################################## # Simulation Begins from here. # ################################## f1<-function(x){ temp<- 4*sin(pi*x) return(temp) } f2<-function(x){ temp<- exp(2*x)-2.76 return(temp) } f3<-function(x){ temp<- x^11*(10*(1-x))^6+10^4*x^3*(1-x)^10-1.4 return(temp) } f4<- function(x) { return(rep(-1,length(x))) } n.sam = 200 n.rep=500 con.prop=0.05 n.con = n.sam*con.prop lambda.ssanova = lambda.ssanova.w02 = lambda.ssanova.w04 = lambda.ssanova.w05 = lambda.ssanova.w06 = lambda.ssanova.w08 = rep(0,n.rep) lambda.ssanova.S = lambda.ssanova.w02.S = lambda.ssanova.w04.S = lambda.ssanova.w05.S = lambda.ssanova.w06.S = lambda.ssanova.w08.S = rep(0,n.rep) gamma.ssanova = gamma.ssanova.w02= gamma.ssanova.w04= gamma.ssanova.w05= gamma.ssanova.w06= gamma.ssanova.w08 = matrix(0,nrow=n.rep,ncol=n.sam) outlier.mat = matrix(0,nrow=n.rep,ncol=n.sam) fitted.ssanova = fitted.ssanova.w02 = fitted.ssanova.w04 = fitted.ssanova.w05 = fitted.ssanova.w06 = fitted.ssanova.w08 =matrix(0,nrow=n.rep,ncol=n.sam) fitted.ssanova.S = fitted.ssanova.w02.S = fitted.ssanova.w04.S = fitted.ssanova.w05.S = fitted.ssanova.w06.S = fitted.ssanova.w08.S =matrix(0,nrow=n.rep,ncol=n.sam) abs.err.ssanova = abs.err.ssanova.w02= abs.err.ssanova.w04 =abs.err.ssanova.w05 =abs.err.ssanova.w06 =abs.err.ssanova.w08 = rep(0,n.rep) # absolute error. Cannot find for each term abs.err.ssanova.S=abs.err.ssanova.w02.S=abs.err.ssanova.w04.S=abs.err.ssanova.w05.S=abs.err.ssanova.w06.S=abs.err.ssanova.w08.S= rep(0,n.rep) sq.err.ssanova = sq.err.ssanova.w02= sq.err.ssanova.w04=sq.err.ssanova.w05=sq.err.ssanova.w06=sq.err.ssanova.w08 = rep(0,n.rep) # square error by term sq.err.ssanova.S = sq.err.ssanova.w02.S = sq.err.ssanova.w04.S=sq.err.ssanova.w05.S=sq.err.ssanova.w06.S=sq.err.ssanova.w08.S = rep(0,n.rep) sigma.ssanova = sigma.ssanova.w02 = sigma.ssanova.w04 = sigma.ssanova.w05 = sigma.ssanova.w06 = sigma.ssanova.w08 = rep(0,n.rep) sigma.ssanova.S = sigma.ssanova.w02.S = sigma.ssanova.w04.S = sigma.ssanova.w05.S = sigma.ssanova.w06.S = sigma.ssanova.w08.S = rep(0,n.rep) ################################################################################ lambda.gam = lambda.gam.w02 = lambda.gam.w04 = lambda.gam.w05 = lambda.gam.w06 = lambda.gam.w08 = rep(0,n.rep) lambda.gam.S = lambda.gam.w02.S = lambda.gam.w04.S = lambda.gam.w05.S = lambda.gam.w06.S = lambda.gam.w08.S = rep(0,n.rep) gamma.gam = gamma.gam.w02= gamma.gam.w04= gamma.gam.w05= gamma.gam.w06= gamma.gam.w08 = matrix(0,nrow=n.rep,ncol=n.sam) fitted.gam = fitted.gam.w02 = fitted.gam.w04 = fitted.gam.w05 = fitted.gam.w06 = fitted.gam.w08 =matrix(0,nrow=n.rep,ncol=n.sam) fitted.gam.S = fitted.gam.w02.S = fitted.gam.w04.S = fitted.gam.w05.S = fitted.gam.w06.S = fitted.gam.w08.S =matrix(0,nrow=n.rep,ncol=n.sam) abs.err.gam = abs.err.gam.w02= abs.err.gam.w04 =abs.err.gam.w05 =abs.err.gam.w06 =abs.err.gam.w08 = matrix(0,nrow=n.rep,ncol=4) # absolute error by term abs.err.gam.S=abs.err.gam.w02.S=abs.err.gam.w04.S=abs.err.gam.w05.S=abs.err.gam.w06.S=abs.err.gam.w08.S= matrix(0,nrow=n.rep,ncol=4) sq.err.gam = sq.err.gam.w02= sq.err.gam.w04=sq.err.gam.w05=sq.err.gam.w06=sq.err.gam.w08 = matrix(0,nrow=n.rep,ncol=4) # square error by term sq.err.gam.S = sq.err.gam.w02.S = sq.err.gam.w04.S=sq.err.gam.w05.S=sq.err.gam.w06.S=sq.err.gam.w08.S = matrix(0,nrow=n.rep,ncol=4) abs.err = abs.err.w02 = abs.err.w04 = abs.err.w05=abs.err.w06=abs.err.w08=rep(0,n.rep) abs.err.S = abs.err.w02.S = abs.err.w04.S = abs.err.w05.S=abs.err.w06.S=abs.err.w08.S=rep(0,n.rep) sq.err = sq.err.w02 = sq.err.w04 = sq.err.w05=sq.err.w06=sq.err.w08=rep(0,n.rep) sq.err.S = sq.err.w02.S = sq.err.w04.S = sq.err.w05.S=sq.err.w06.S=sq.err.w08.S=rep(0,n.rep) sigma.gam = sigma.gam.w02 = sigma.gam.w04 = sigma.gam.w05 = sigma.gam.w06 = sigma.gam.w08 = rep(0,n.rep) sigma.gam.S = sigma.gam.w02.S = sigma.gam.w04.S = sigma.gam.w05.S = sigma.gam.w06.S = sigma.gam.w08.S = rep(0,n.rep) for (i in 1:n.rep){ set.seed(i+100) x1<-runif(n.sam);x2<-runif(n.sam);x3<-runif(n.sam);x4<-runif(n.sam) error = epsilon = rnorm(n.sam,0,2) index.out<- which(rank(-abs(epsilon))