Compute a functional supervised classification with FGSAM and FGLM methods applied to fully-convolved signals at 38 kHz (3rd and 4th columns in Table 1)

Load libraries used throughout the whole script

library(foreign)
library(cvTools)
library(fda.usc)

Load libraries and required setup used to compute cross-validation loops in parallel

library(doParallel)
numCores <- detectCores()
registerDoParallel(numCores)

Read echoes data with the fully-correction (including Poulliquen convolution) Signals for 38 kHz

PS<-read.csv("./data/signals-convolved-38kHz.csv",header=FALSE)

Read the right classification for each signal

signal_class<-read.table("./data/signal-classification-38kHz.csv",header=FALSE, sep=",", 
                       na.strings="NA", strip.white=TRUE)

Compute the number fo signals for each class: Point measurements 1,2,3,4,5,6 and 7 correspond to the SP1 class

d=length(signal_class[,(signal_class[1,]==1) | (signal_class[1,]==2) | (signal_class[1,]==3) |
                     (signal_class[1,]==4) | (signal_class[1,]==5) | (signal_class[1,]==6) |
                     (signal_class[1,]==7)])[1]

Point measurements 8,9, and 10 correspond to the SP2 class

e=length(signal_class[,(signal_class[1,]==8) | (signal_class[1,]==9) | (signal_class[1,]==10)])[1]

Point measurements 11,12,13, and 14 correspond to the SP3 class

f=length(signal_class[,(signal_class[1,]==11) | (signal_class[1,]==12) | (signal_class[1,]==13) |
                     (signal_class[1,]==14)])[1]

Define class identifiers for each signal: 1 for SP1, 2 for SP2, and 3 for SP3

groups=c(rep(1,d),rep(2,e),rep(3,f))

Data depuration: erase those incompleta data or NaN values in 38 kHz signals

mlearn=fdata((PS[which(!complete.cases(PS)==FALSE),]))
mlearn_depured=PS[which(!complete.cases(PS)==FALSE),]
groups_depured=groups[which(!complete.cases(PS)==FALSE)]
diml=dim(mlearn_depured)[1]

Descriptive statistic for echo signal acdquiered at 38 kHz Plot all the echo curves Classify the curves using the identifiers

groups_cat=as.factor(groups_depured)
curves.fdata1=fdata(mlearn_depured[groups_cat=="1",])
curves.fdata2=fdata(mlearn_depured[groups_cat=="2",])
curves.fdata3=fdata(mlearn_depured[groups_cat=="3",])

Rescale the time scale using the sampling time value for 38kHz

sampling_time=1.024/4
x=curves.fdata1$argvals*sampling_time

Plot the echo curves in pdf file

pdf(height=5, width=7, file="DataConvolved38kHz.pdf")
plot(x,curves.fdata1$data[1,],type="l",lty=4,
     xlab="Time (ms)",ylab="Sound intensity (dB)",
     cex.lab=1.2,cex.axis=1.2,ylim=c(-110,-15))
for(i in 2:dim(curves.fdata1$data)[1]){
  lines(x,curves.fdata1$data[i,],lty=4)
}
for(i in 1:dim(curves.fdata2$data)[1]){
  lines(x,curves.fdata2$data[i,],col=2,lty=2)
}
for(i in 1:dim(curves.fdata3$data)[1]){
  lines(x,curves.fdata3$data[i,],col=4,lty=3)
}
legend("topright",c("Sand", "Sand with vegetation", "Rocks"),cex=1.2,
       col=c(1,2,4),lty=c(4,2,3),lwd=2, bty="n")
dev.off()

Plot only the first 20 curves

nc=20
plot(x,curves.fdata1$data[1,],type="l",lty=4,
     xlab="Time (ms)",ylab="Sound intensity (dB)",
cex.lab=1.2,cex.axis=1.2,ylim=c(-110,-15))
for(i in 2:nc){
  lines(x,curves.fdata1$data[i,],lty=4)
}
for(i in 1:nc){
  lines(x,curves.fdata2$data[i,],col=2,lty=2)
}
for(i in 1:nc){
  lines(x,curves.fdata3$data[i,],col=4,lty=3)
}
legend("topright",c("Sand", "Sand with vegetation", "Rocks"),cex=1.2,
col=c(1,2,4),lty=c(4,2,3),lwd=2, bty="n")

Supervised classification at 38 kHz: taking only 1 curve for each subset of 4 curves in the SP1 class

mlearn_data=fdata(mlearn_depured[c(seq(1,d,by=4),(d+1):diml),])
groups_data=groups_depured[c(seq(1,d,by=4),(d+1):diml)]
rg<-rangeval(mlearn_data)

Compute 10-fold cross-validation

set.seed(1)
fold=cvFolds(dim(mlearn_data)[1], K = 10, type = "random")

Define a function to functional GLM classification purposes

classify_data_glm <- function(x,y,basisx,basisb,fold,k_pc) {
  error <- foreach(j=1:fold$K, .combine=c) %dopar% {
    ind=fold$subset[fold$which==j]
    xx<-x[-ind,]
    yy<-y[-ind]
    ldata<-list("df"=data.frame(yy),"xx"=xx)
    if(is.null(basisx) & is.null(basisb)){
      basispc<-list("xx"=create.pc.basis(xx,1:k_pc))
      out=classif.glm(yy~xx,data=ldata,basis.x=basispc)
    } else {
      out=classif.glm(yy~xx,data=ldata,basis.x=basisx,basis.b=basisb)
    }
    newldata<-list("xx"=x[ind])
    sum(predict.classif(out,newldata) != y[ind])/length(y[ind])
  }
  return(error)
}

Define a function to functional GSAM classification purposes

classify_data_gsam <- function(x,y,basisx,basisb,fold,k_val,k_pc) {
  error <- foreach(j=1:fold$K, .combine=c) %dopar% {
    ind=fold$subset[fold$which==j]
    xx<-x[-ind,]
    yy<-y[-ind]
    ldata<-list("df"=data.frame(yy),"xx"=xx)
    if(is.null(basisx) & is.null(basisb)){
      basispc<-list("xx"=create.pc.basis(xx,1:k_pc))
      out=classif.gsam(yy~s(xx,k=k_val),data=ldata,basis.x=basispc)
    } else {
      out=classif.gsam(yy~s(xx,k=k_val),data=ldata,basis.x=basisx,basis.b=basisb)
    }
    newldata<-list("xx"=x[ind])
    sum(predict.classif(out,newldata) != y[ind])/length(y[ind])
  }
  return(error)
}

Functional classification using different options for FGLM and FGSAM methods using functional spline basis corresponding to FGLM1 and FGSAM1 cases (see Table 1)

nbx<-15;nbb<-7;k_val<-5;k_pc<-NULL
basisx<-list("xx"=create.bspline.basis(rg,nbx))
basisb<-list("xx"=create.bspline.basis(rg,nbb))
error_fgsam1=classify_data_gsam(mlearn_data,groups_data,basisx,basisb,fold,k_val,k_pc) 
error_fglm1=classify_data_glm(mlearn_data,groups_data,basisx,basisb,fold,k_pc) 

Functional classification using different options for FGLM method using functional Fourier basis corresponding to FGLM2 and FGSAM2 cases (see Table 1)

nbb<-7;k_pc<-NULL
basisx<-list("xx"=create.fourier.basis(rg,nbb))
basisb<-list("xx"=create.fourier.basis(rg,nbb))
error_fgsam2=classify_data_gsam(mlearn_data,groups_data,basisx,basisb,fold,k_val,k_pc) 
error_fglm2=classify_data_glm(mlearn_data,groups_data,basisx,basisb,fold,k_pc) 

Functional classification using different options for FGLM and FGSAM methods using functional principal component basis corresponding to FGSAM3 and FGLM3 cases (see Table 1)

basisx<-NULL; basisb<-NULL
k_val<-5; k_pc<-10
error_fgsam3=classify_data_gsam(mlearn_data,groups_data,basisx,basisb,fold,k_val,k_pc) 
error_fglm3=classify_data_glm(mlearn_data,groups_data,basisx,basisb,fold,k_pc) 

Collect all the classification results Compute mean and standard deviation of error classification reported in Table 1

error=rbind(error_fgsam1,error_fglm1,error_fgsam2, error_fglm2,error_fgsam3,error_fglm3)

The classification results are slightly more accurate if the functional classification techniques are applied to the signals which include the two echoes Compute mean of correct classification (3rd column in Table 1)

round(1-apply(error,1,mean),4)
## error_fgsam1  error_fglm1 error_fgsam2  error_fglm2 error_fgsam3 
##       0.9624       0.9024       0.9569       0.9008       0.9756 
##  error_fglm3 
##       0.9664

Compute standard deviation of correct classification (4th column in Table 1)

round(apply(error,1,sd),4)
## error_fgsam1  error_fglm1 error_fgsam2  error_fglm2 error_fgsam3 
##       0.0251       0.0283       0.0199       0.0316       0.0235 
##  error_fglm3 
##       0.0191