Load libraries used throughout the whole script
library(foreign)
library(cvTools)
library(fda.usc)
Load libraries and required setup used to compute cross-validation loops in parallel
library(doParallel)
numCores <- detectCores()
registerDoParallel(numCores)
Read echoes data with the fully-correction (including Poulliquen convolution) Signals for 38 kHz
PS<-read.csv("./data/signals-convolved-38kHz.csv",header=FALSE)
Read the right classification for each signal
signal_class<-read.table("./data/signal-classification-38kHz.csv",header=FALSE, sep=",",
na.strings="NA", strip.white=TRUE)
Compute the number fo signals for each class: Point measurements 1,2,3,4,5,6 and 7 correspond to the SP1 class
d=length(signal_class[,(signal_class[1,]==1) | (signal_class[1,]==2) | (signal_class[1,]==3) |
(signal_class[1,]==4) | (signal_class[1,]==5) | (signal_class[1,]==6) |
(signal_class[1,]==7)])[1]
Point measurements 8,9, and 10 correspond to the SP2 class
e=length(signal_class[,(signal_class[1,]==8) | (signal_class[1,]==9) | (signal_class[1,]==10)])[1]
Point measurements 11,12,13, and 14 correspond to the SP3 class
f=length(signal_class[,(signal_class[1,]==11) | (signal_class[1,]==12) | (signal_class[1,]==13) |
(signal_class[1,]==14)])[1]
Define class identifiers for each signal: 1 for SP1, 2 for SP2, and 3 for SP3
groups=c(rep(1,d),rep(2,e),rep(3,f))
Data depuration: erase those incompleta data or NaN values in 38 kHz signals
mlearn=fdata((PS[which(!complete.cases(PS)==FALSE),]))
mlearn_depured=PS[which(!complete.cases(PS)==FALSE),]
groups_depured=groups[which(!complete.cases(PS)==FALSE)]
diml=dim(mlearn_depured)[1]
Descriptive statistic for echo signal acdquiered at 38 kHz Plot all the echo curves Classify the curves using the identifiers
groups_cat=as.factor(groups_depured)
curves.fdata1=fdata(mlearn_depured[groups_cat=="1",])
curves.fdata2=fdata(mlearn_depured[groups_cat=="2",])
curves.fdata3=fdata(mlearn_depured[groups_cat=="3",])
Rescale the time scale using the sampling time value for 38kHz
sampling_time=1.024/4
x=curves.fdata1$argvals*sampling_time
Plot the echo curves in pdf file
pdf(height=5, width=7, file="DataConvolved38kHz.pdf")
plot(x,curves.fdata1$data[1,],type="l",lty=4,
xlab="Time (ms)",ylab="Sound intensity (dB)",
cex.lab=1.2,cex.axis=1.2,ylim=c(-110,-15))
for(i in 2:dim(curves.fdata1$data)[1]){
lines(x,curves.fdata1$data[i,],lty=4)
}
for(i in 1:dim(curves.fdata2$data)[1]){
lines(x,curves.fdata2$data[i,],col=2,lty=2)
}
for(i in 1:dim(curves.fdata3$data)[1]){
lines(x,curves.fdata3$data[i,],col=4,lty=3)
}
legend("topright",c("Sand", "Sand with vegetation", "Rocks"),cex=1.2,
col=c(1,2,4),lty=c(4,2,3),lwd=2, bty="n")
dev.off()
Plot only the first 20 curves
nc=20
plot(x,curves.fdata1$data[1,],type="l",lty=4,
xlab="Time (ms)",ylab="Sound intensity (dB)",
cex.lab=1.2,cex.axis=1.2,ylim=c(-110,-15))
for(i in 2:nc){
lines(x,curves.fdata1$data[i,],lty=4)
}
for(i in 1:nc){
lines(x,curves.fdata2$data[i,],col=2,lty=2)
}
for(i in 1:nc){
lines(x,curves.fdata3$data[i,],col=4,lty=3)
}
legend("topright",c("Sand", "Sand with vegetation", "Rocks"),cex=1.2,
col=c(1,2,4),lty=c(4,2,3),lwd=2, bty="n")
Supervised classification at 38 kHz: taking only 1 curve for each subset of 4 curves in the SP1 class
mlearn_data=fdata(mlearn_depured[c(seq(1,d,by=4),(d+1):diml),])
groups_data=groups_depured[c(seq(1,d,by=4),(d+1):diml)]
rg<-rangeval(mlearn_data)
Compute 10-fold cross-validation
set.seed(1)
fold=cvFolds(dim(mlearn_data)[1], K = 10, type = "random")
Define a function to functional GLM classification purposes
classify_data_glm <- function(x,y,basisx,basisb,fold,k_pc) {
error <- foreach(j=1:fold$K, .combine=c) %dopar% {
ind=fold$subset[fold$which==j]
xx<-x[-ind,]
yy<-y[-ind]
ldata<-list("df"=data.frame(yy),"xx"=xx)
if(is.null(basisx) & is.null(basisb)){
basispc<-list("xx"=create.pc.basis(xx,1:k_pc))
out=classif.glm(yy~xx,data=ldata,basis.x=basispc)
} else {
out=classif.glm(yy~xx,data=ldata,basis.x=basisx,basis.b=basisb)
}
newldata<-list("xx"=x[ind])
sum(predict.classif(out,newldata) != y[ind])/length(y[ind])
}
return(error)
}
Define a function to functional GSAM classification purposes
classify_data_gsam <- function(x,y,basisx,basisb,fold,k_val,k_pc) {
error <- foreach(j=1:fold$K, .combine=c) %dopar% {
ind=fold$subset[fold$which==j]
xx<-x[-ind,]
yy<-y[-ind]
ldata<-list("df"=data.frame(yy),"xx"=xx)
if(is.null(basisx) & is.null(basisb)){
basispc<-list("xx"=create.pc.basis(xx,1:k_pc))
out=classif.gsam(yy~s(xx,k=k_val),data=ldata,basis.x=basispc)
} else {
out=classif.gsam(yy~s(xx,k=k_val),data=ldata,basis.x=basisx,basis.b=basisb)
}
newldata<-list("xx"=x[ind])
sum(predict.classif(out,newldata) != y[ind])/length(y[ind])
}
return(error)
}
Functional classification using different options for FGLM and FGSAM methods using functional spline basis corresponding to FGLM1 and FGSAM1 cases (see Table 1)
nbx<-15;nbb<-7;k_val<-5;k_pc<-NULL
basisx<-list("xx"=create.bspline.basis(rg,nbx))
basisb<-list("xx"=create.bspline.basis(rg,nbb))
error_fgsam1=classify_data_gsam(mlearn_data,groups_data,basisx,basisb,fold,k_val,k_pc)
error_fglm1=classify_data_glm(mlearn_data,groups_data,basisx,basisb,fold,k_pc)
Functional classification using different options for FGLM method using functional Fourier basis corresponding to FGLM2 and FGSAM2 cases (see Table 1)
nbb<-7;k_pc<-NULL
basisx<-list("xx"=create.fourier.basis(rg,nbb))
basisb<-list("xx"=create.fourier.basis(rg,nbb))
error_fgsam2=classify_data_gsam(mlearn_data,groups_data,basisx,basisb,fold,k_val,k_pc)
error_fglm2=classify_data_glm(mlearn_data,groups_data,basisx,basisb,fold,k_pc)
Functional classification using different options for FGLM and FGSAM methods using functional principal component basis corresponding to FGSAM3 and FGLM3 cases (see Table 1)
basisx<-NULL; basisb<-NULL
k_val<-5; k_pc<-10
error_fgsam3=classify_data_gsam(mlearn_data,groups_data,basisx,basisb,fold,k_val,k_pc)
error_fglm3=classify_data_glm(mlearn_data,groups_data,basisx,basisb,fold,k_pc)
Collect all the classification results Compute mean and standard deviation of error classification reported in Table 1
error=rbind(error_fgsam1,error_fglm1,error_fgsam2, error_fglm2,error_fgsam3,error_fglm3)
The classification results are slightly more accurate if the functional classification techniques are applied to the signals which include the two echoes Compute mean of correct classification (3rd column in Table 1)
round(1-apply(error,1,mean),4)
## error_fgsam1 error_fglm1 error_fgsam2 error_fglm2 error_fgsam3
## 0.9624 0.9024 0.9569 0.9008 0.9756
## error_fglm3
## 0.9664
Compute standard deviation of correct classification (4th column in Table 1)
round(apply(error,1,sd),4)
## error_fgsam1 error_fglm1 error_fgsam2 error_fglm2 error_fgsam3
## 0.0251 0.0283 0.0199 0.0316 0.0235
## error_fglm3
## 0.0191