Compute cluster classification with FDA \(k\)-means, weighted \(k\)-means, COR and CORT dissimilarity methods applied to signals at 38 kHz with only time and power corrections (left-most columns in Table 3 and Table 5)

Load libraries used throughout the whole script

library(foreign)
library(fda.usc)
library(TSclust)
library(psych)
library(combinat)

# Source modified kmeans functions of the fda.usc package
source("kmeans.fda.assig.groups.R")
source("kmeans.fda.centers.update.R")
source("kmeans.fda.center.ini.R")
source("kmeans.fda.R")

Load libraries and required setup used to compute cross-validation loops in parallel

library(doParallel)
numCores <- detectCores()
registerDoParallel(numCores)

Read echoes data with only the power and time corrections Signals for 38 kHz

PS<-t(read.csv("../data/signals-unconvolved-38kHz.csv",header=FALSE))

Read the right classification for each signal

signal_class<-read.table("../data/signal-classification-unconvolved-38kHz.csv",header=FALSE, sep=",", 
                       na.strings="NA", strip.white=TRUE)

Compute the number fo signals for each class: Point measurements 1,2,3,4,5,6 and 7 correspond to the SP1 class

d=length(signal_class[,(signal_class[1,]==1) | (signal_class[1,]==2) | (signal_class[1,]==3) |
                     (signal_class[1,]==4) | (signal_class[1,]==5) | (signal_class[1,]==6) |
                     (signal_class[1,]==7)])[1]

Point measurements 8,9, and 10 correspond to the SP2 class

e=length(signal_class[,(signal_class[1,]==8) | (signal_class[1,]==9) | (signal_class[1,]==10)])[1]

Point measurements 11,12,13, and 14 correspond to the SP3 class

f=length(signal_class[,(signal_class[1,]==11) | (signal_class[1,]==12) | (signal_class[1,]==13) |
                     (signal_class[1,]==14)])[1]

Define class identifiers for each signal: 1 for SP1, 2 for SP2, and 3 for SP3

groups=c(rep(1,d),rep(2,e),rep(3,f))

Data depuration: erase those incompleta data or NaN values in 38 kHz signals

mlearn=fdata((PS[which(!complete.cases(PS)==FALSE),]))
mlearn_depured=PS[which(!complete.cases(PS)==FALSE),]
groups_depured=groups[which(!complete.cases(PS)==FALSE)]
diml=dim(mlearn_depured)[1]

Descriptive statistic for echo signal acdquiered at 38 kHz Plot all the echo curves Classify the curves using the identifiers

groups_cat=as.factor(groups_depured)
curves.fdata1=fdata(mlearn_depured[groups_cat=="1",])
curves.fdata2=fdata(mlearn_depured[groups_cat=="2",])
curves.fdata3=fdata(mlearn_depured[groups_cat=="3",])

Rescale the time scale using the ping length

x=curves.fdata1$argvals*1.024/4

Plot the echo curves

plot(x,curves.fdata1$data[1,],type="l",lty=4,
     xlab=expression(paste("Time (", mu,"s)")),ylab="Sound intensity (dB)",
     cex.lab=1.2,cex.axis=1.2,ylim=c(-110,-15))
for(i in 2:dim(curves.fdata1$data)[1]){
  lines(x,curves.fdata1$data[i,],lty=4)
}
for(i in 1:dim(curves.fdata2$data)[1]){
  lines(x,curves.fdata2$data[i,],col=2,lty=2)
}
for(i in 1:dim(curves.fdata3$data)[1]){
  lines(x,curves.fdata3$data[i,],col=4,lty=3)
}
legend("topright",c("Sand", "Sand with vegetation", "Rocks"),cex=1.2,
       col=c(1,2,4),lty=c(4,2,3),lwd=2, bty="n")

Plot only the first 20 curves

nc=20
plot(x,curves.fdata1$data[1,],type="l",lty=4,
     xlab=expression(paste("Time (", mu,"s)")),ylab="Sound intensity (dB)",
cex.lab=1.2,cex.axis=1.2,ylim=c(-110,-15))
for(i in 2:nc){
  lines(x,curves.fdata1$data[i,],lty=4)
}
for(i in 1:nc){
  lines(x,curves.fdata2$data[i,],col=2,lty=2)
}
for(i in 1:nc){
  lines(x,curves.fdata3$data[i,],col=4,lty=3)
}
legend("topright",c("Sand", "Sand with vegetation", "Rocks"),cex=1.2,
col=c(1,2,4),lty=c(4,2,3),lwd=2, bty="n")

Relabelling procedure to identify the groups classification in the cluster method, permuting the label groups and getting the best accuracy cluster classification

relabelling <- function(groups,out_clust,n) {
  per=permn(seq(1,n)); best_perm=1; best_accuracy=0.
  for(j in 1:length(per)){
    out_val=out_clust
    for(i in 1:n){out_val[out_clust==i]=per[[j]][i]}
    accuracy=sum(groups==out_val)/sum(table(groups,out_val))
    if(accuracy>best_accuracy){best_perm=j;best_accuracy=accuracy}
  }
  out_val=out_clust
  for(i in 1:n){out_val[out_clust==i]=per[[best_perm]][i]}
  return(out_val)
}

Cluster classification at 38 kHz: taking only 1 curve for each subset of 4 curves in the SP1 class

mlearn_data=fdata(mlearn_depured[c(seq(1,d,by=4),(d+1):diml),],argvals=NULL)
groups_data=groups_depured[c(seq(1,d,by=4),(d+1):diml)]
n_clusters=3

Number of sample realizations kmeans.fda and set seed for reproductibility

B=100; set.seed(1234)

Functional \(k\)-means classification for three classes

Unsupervised classification using FDA k-means algorithm. Centers of groups are obtained using functional data depth: In this case, the func.med.FM (functional median) is used. It returns the deepest curve following Fraiman Muniz criteria.

class_matrix=matrix(0.,nrow=dim(mlearn_data$data)[1],ncol=B)
class_matrix  <- foreach(j=seq(1,B), .combine=cbind) %dopar% {
    out=kmeans.fda(mlearn_data,ncl=n_clusters,max.iter=1000,draw=FALSE,par.ini=list(method="sample",iter=100))
    graphics.off()
    #' Relabel each cluster group with the number associated to SP1, SP2 and SP3 classes
    relabelling(groups_data,out$cluster,n_clusters)
}

# Compute the median with respect to the accuracy value
accuracy_array=array(0.,dim=B)
accuracy_array  <- foreach(j=seq(1,B), .combine=cbind) %dopar% {
    #' Compute accuracy
    sum(groups_data==class_matrix[,j])/sum(table(groups_data,class_matrix[,j]))
}
ind=which.min(abs(median(accuracy_array)-accuracy_array))
cat("Accuracy: ",accuracy_array[ind]," \n")
## Accuracy:  0.5516224
cat("CEI: ",cluster.evaluation(groups_data, class_matrix[,ind]) ," \n")
## CEI:  0.5228226
kappa_out=cohen.kappa(data.frame(groups_data, class_matrix[,ind]))
cat("Kappa 95% interval: (",kappa_out$confid[1,1],",",kappa_out$confid[1,3],")\n")
## Kappa 95% interval: ( 0.210067 , 0.3021572 )

Confusion table

tab_best=table(groups_data,class_matrix[,ind])
for(i in 1:n_clusters){
  tab_best[i,]=tab_best[i,]/sum(tab_best[i,])
}
cat("Confusion table: \n")
## Confusion table:
round(tab_best,2)
##            
## groups_data    1    2    3
##           1 0.45 0.00 0.55
##           2 0.40 0.00 0.60
##           3 0.05 0.00 0.95

Plot all the echo curves by using the computed classification

groups_cat=as.factor(class_matrix[,ind])
curves.fdata1=mlearn_data[groups_cat=="1",]
curves.fdata2=mlearn_data[groups_cat=="2",]
curves.fdata3=mlearn_data[groups_cat=="3",]
# Rescale the time scale using the ping length
x=curves.fdata1$argvals*1.024/4
# Plot the echo curves
plot(x,curves.fdata1$data[1,],type="l",lty=4,
     xlab=expression(paste("Time (", mu,"s)")),ylab="Sound intensity (dB)",
     cex.lab=1.2,cex.axis=1.2,ylim=c(-110,-15))
for(i in 2:dim(curves.fdata1$data)[1]){
  lines(x,curves.fdata1$data[i,],lty=4)
}
for(i in 1:dim(curves.fdata2$data)[1]){
  lines(x,curves.fdata2$data[i,],col=2,lty=2)
}
for(i in 1:dim(curves.fdata3$data)[1]){
  lines(x,curves.fdata3$data[i,],col=4,lty=3)
}
legend("topright",c("SP1", "SP2", "SP3"),cex=1.2,
       col=c(1,2,4),lty=c(4,2,3),lwd=2, bty="n")

Functional W\(k\)-means classification for three classes

Unsupervised classification using FDA k-means algorithm with weighed \(L^2\)-norm. Centers of groups are obtained using functional data depth: In this case, the func.med.FM (functional median) is used. It returns the deepest curve following Fraiman Muniz criteria. \(L^2\)-norm is weighed with a normal distribution for which central points are more relevant

v=dnorm(seq(-3,3,len=dim(mlearn_data)[2]))
class_matrix=matrix(0.,nrow=dim(mlearn_data$data)[1],ncol=B)
class_matrix  <- foreach(j=seq(1,B), .combine=cbind) %dopar% {
    out=kmeans.fda(mlearn_data,ncl=n_clusters,max.iter=1000,draw=FALSE,par.ini=list(method="sample",iter=100),par.metric=list(lp=2,w=v))
    graphics.off()
    #' Relabel each cluster group with the number associated to SP1, SP2 and SP3 classes
    relabelling(groups_data,out$cluster,n_clusters)
}

# Compute the median with respect to the accuracy value
accuracy_array=array(0.,dim=B)
accuracy_array  <- foreach(j=seq(1,B), .combine=cbind) %dopar% {
    #' Compute accuracy
    sum(groups_data==class_matrix[,j])/sum(table(groups_data,class_matrix[,j]))
}
ind=which.min(abs(median(accuracy_array)-accuracy_array))
cat("Accuracy: ",accuracy_array[ind]," \n")
## Accuracy:  0.5619469
cat("CEI: ",cluster.evaluation(groups_data, class_matrix[,ind]) ," \n")
## CEI:  0.5452709
kappa_out=cohen.kappa(data.frame(groups_data, class_matrix[,ind]))
cat("Kappa 95% interval: (",kappa_out$confid[1,1],",",kappa_out$confid[1,3],")\n")
## Kappa 95% interval: ( 0.2723422 , 0.3660924 )

Confusion table

tab_best=table(groups_data,class_matrix[,ind])
for(i in 1:n_clusters){
  tab_best[i,]=tab_best[i,]/sum(tab_best[i,])
}
cat("Confusion table: \n")
## Confusion table:
round(tab_best,2)
##            
## groups_data    1    2    3
##           1 0.95 0.02 0.03
##           2 0.81 0.14 0.05
##           3 0.49 0.04 0.48

Plot all the echo curves by using the computed classification

groups_cat=as.factor(class_matrix[,ind])
curves.fdata1=mlearn_data[groups_cat=="1",]
curves.fdata2=mlearn_data[groups_cat=="2",]
curves.fdata3=mlearn_data[groups_cat=="3",]
# Rescale the time scale using the ping length
x=curves.fdata1$argvals*1.024/4
# Plot the echo curves
plot(x,curves.fdata1$data[1,],type="l",lty=4,
     xlab=expression(paste("Time (", mu,"s)")),ylab="Sound intensity (dB)",
     cex.lab=1.2,cex.axis=1.2,ylim=c(-110,-15))
for(i in 2:dim(curves.fdata1$data)[1]){
  lines(x,curves.fdata1$data[i,],lty=4)
}
for(i in 1:dim(curves.fdata2$data)[1]){
  lines(x,curves.fdata2$data[i,],col=2,lty=2)
}
for(i in 1:dim(curves.fdata3$data)[1]){
  lines(x,curves.fdata3$data[i,],col=4,lty=3)
}
legend("topright",c("SP1", "SP2", "SP3"),cex=1.2,
       col=c(1,2,4),lty=c(4,2,3),lwd=2, bty="n")

Time series clustering using COR dissimilarity and hierarchical cluster algorithm for three classes

Time series clustering is applied: first, the dissimilarity matrix is obtaned (COR dissimilarity), then, the hierarchical cluster algorithm is applied to the obtained matrix, assuming three classes

cor.diss <- diss(data.frame(t(mlearn_data$data)), "COR")
hc <- hclust(cor.diss)  

Relabel each cluster group with the number associated to SP1, SP2 and SP3 classes

out_values=relabelling(groups_data,cutree(hc, k=n_clusters),n_clusters)

Evaluation of Functional \(k\)-means: accuracy (proportion of correct classification) and classification matrix

accuracy=sum(groups_data==out_values)/sum(table(groups_data,out_values))
cat("Accuracy: ",accuracy,"\n")
## Accuracy:  0.6224189

Evaluation of Functional \(k\)-means: classification index

cei=cluster.evaluation(groups_data, out_values) 
cat("CEI: ",cei,"\n")
## CEI:  0.6052112

Evaluation of Functional \(k\)-means: Cohen’s kappa

kappa_out=cohen.kappa(data.frame(groups_data, out_values))
cat("Kappa 95% interval: ",kappa_out$confid[1,c(1,3)],"\n")
## Kappa 95% interval:  0.3507396 0.4532295

Confusion table

tab_best=table(groups_data,out_values)
for(i in 1:n_clusters){
  tab_best[i,]=tab_best[i,]/sum(tab_best[i,])
}
cat("Confusion table: \n")
## Confusion table:
round(tab_best,2)
##            out_values
## groups_data    1    2    3
##           1 0.84 0.00 0.16
##           2 0.42 0.06 0.51
##           3 0.00 0.24 0.76

Plot all the echo curves by using the computed classification

groups_cat=as.factor(out_values)
curves.fdata1=mlearn_data[groups_cat=="1",]
curves.fdata2=mlearn_data[groups_cat=="2",]
curves.fdata3=mlearn_data[groups_cat=="3",]
# Rescale the time scale using the ping length
x=curves.fdata1$argvals*1.024/4
# Plot the echo curves
plot(x,curves.fdata1$data[1,],type="l",lty=4,
     xlab=expression(paste("Time (", mu,"s)")),ylab="Sound intensity (dB)",
     cex.lab=1.2,cex.axis=1.2,ylim=c(-110,-15))
for(i in 2:dim(curves.fdata1$data)[1]){
  lines(x,curves.fdata1$data[i,],lty=4)
}
for(i in 1:dim(curves.fdata2$data)[1]){
  lines(x,curves.fdata2$data[i,],col=2,lty=2)
}
for(i in 1:dim(curves.fdata3$data)[1]){
  lines(x,curves.fdata3$data[i,],col=4,lty=3)
}
legend("topright",c("SP1", "SP2", "SP3"),cex=1.2,
       col=c(1,2,4),lty=c(4,2,3),lwd=2, bty="n")

Time series clustering using CORT dissimilarity and hierarchical cluster algorithm for three classes

Time series clustering is applied: first, the dissimilarity matrix is obtaned (CORT dissimilarity), then, the hierarchical cluster algorithm is applied to the obtained matrix, assuming three classes

cort.diss <- diss(data.frame(t(mlearn_data$data)), "CORT")
hc <- hclust(cort.diss)  

Relabel each cluster group with the number associated to SP1, SP2 and SP3 classes

out_values=relabelling(groups_data,cutree(hc, k=n_clusters),n_clusters)

Evaluation of Functional \(k\)-means: accuracy (proportion of correct classification) and classification matrix

accuracy=sum(groups_data==out_values)/sum(table(groups_data,out_values))
cat("Accuracy: ",accuracy,"\n")
## Accuracy:  0.699115

Evaluation of Functional \(k\)-means: classification index

cei=cluster.evaluation(groups_data, out_values) 
cat("CEI: ",cei,"\n")
## CEI:  0.6375229

Evaluation of Functional \(k\)-means: Cohen’s kappa

kappa_out=cohen.kappa(data.frame(groups_data, out_values))
cat("Kappa 95% interval: ",kappa_out$confid[1,c(1,3)],"\n")
## Kappa 95% interval:  0.4630385 0.5547297

Confusion table

tab_best=table(groups_data,out_values)
for(i in 1:n_clusters){
  tab_best[i,]=tab_best[i,]/sum(tab_best[i,])
}
cat("Confusion table: \n")
## Confusion table:
round(tab_best,2)
##            out_values
## groups_data    1    2    3
##           1 0.86 0.00 0.14
##           2 0.43 0.00 0.57
##           3 0.04 0.00 0.96

Plot all the echo curves by using the computed classification

groups_cat=as.factor(out_values)
curves.fdata1=mlearn_data[groups_cat=="1",]
curves.fdata2=mlearn_data[groups_cat=="2",]
curves.fdata3=mlearn_data[groups_cat=="3",]
# Rescale the time scale using the ping length
x=curves.fdata1$argvals*1.024/4
# Plot the echo curves
plot(x,curves.fdata1$data[1,],type="l",lty=4,
     xlab=expression(paste("Time (", mu,"s)")),ylab="Sound intensity (dB)",
     cex.lab=1.2,cex.axis=1.2,ylim=c(-110,-15))
for(i in 2:dim(curves.fdata1$data)[1]){
  lines(x,curves.fdata1$data[i,],lty=4)
}
for(i in 1:dim(curves.fdata2$data)[1]){
  lines(x,curves.fdata2$data[i,],col=2,lty=2)
}
for(i in 1:dim(curves.fdata3$data)[1]){
  lines(x,curves.fdata3$data[i,],col=4,lty=3)
}
legend("topright",c("SP1", "SP2", "SP3"),cex=1.2,
       col=c(1,2,4),lty=c(4,2,3),lwd=2, bty="n")

Time series clustering using DWT dissimilarity and hierarchical cluster algorithm for three classes

Time series clustering is applied: first, the dissimilarity matrix is obtaned (DWT dissimilarity), then, the hierarchical cluster algorithm is applied to the obtained matrix, assuming three classes

cort.diss <- diss(data.frame(t(mlearn_data$data)), "DWT")
hc <- hclust(cort.diss)  

Relabel each cluster group with the number associated to SP1, SP2 and SP3 classes

out_values=relabelling(groups_data,cutree(hc, k=n_clusters),n_clusters)

Evaluation of Functional \(k\)-means: accuracy (proportion of correct classification) and classification matrix

accuracy=sum(groups_data==out_values)/sum(table(groups_data,out_values))
cat("Accuracy: ",accuracy,"\n")
## Accuracy:  0.5221239

Evaluation of Functional \(k\)-means: classification index

cei=cluster.evaluation(groups_data, out_values) 
cat("CEI: ",cei,"\n")
## CEI:  0.4985596

Evaluation of Functional \(k\)-means: Cohen’s kappa

kappa_out=cohen.kappa(data.frame(groups_data, out_values))
cat("Kappa 95% interval: ",kappa_out$confid[1,c(1,3)],"\n")
## Kappa 95% interval:  0.160285 0.2469194

Confusion table

tab_best=table(groups_data,out_values)
for(i in 1:n_clusters){
  tab_best[i,]=tab_best[i,]/sum(tab_best[i,])
}
cat("Confusion table: \n")
## Confusion table:
round(tab_best,2)
##            out_values
## groups_data    1    2    3
##           1 0.35 0.00 0.64
##           2 0.39 0.00 0.61
##           3 0.04 0.00 0.96

Plot all the echo curves by using the computed classification

groups_cat=as.factor(out_values)
curves.fdata1=mlearn_data[groups_cat=="1",]
curves.fdata2=mlearn_data[groups_cat=="2",]
curves.fdata3=mlearn_data[groups_cat=="3",]
# Rescale the time scale using the ping length
x=curves.fdata1$argvals*1.024/4
# Plot the echo curves
plot(x,curves.fdata1$data[1,],type="l",lty=4,
     xlab=expression(paste("Time (", mu,"s)")),ylab="Sound intensity (dB)",
     cex.lab=1.2,cex.axis=1.2,ylim=c(-110,-15))
for(i in 2:dim(curves.fdata1$data)[1]){
  lines(x,curves.fdata1$data[i,],lty=4)
}
for(i in 1:dim(curves.fdata2$data)[1]){
  lines(x,curves.fdata2$data[i,],col=2,lty=2)
}
for(i in 1:dim(curves.fdata3$data)[1]){
  lines(x,curves.fdata3$data[i,],col=4,lty=3)
}
legend("topright",c("SP1", "SP2", "SP3"),cex=1.2,
       col=c(1,2,4),lty=c(4,2,3),lwd=2, bty="n")

Time series clustering using DTWARP dissimilarity and hierarchical cluster algorithm for three classes

Time series clustering is applied: first, the dissimilarity matrix is obtaned (DTWARP dissimilarity), then, the hierarchical cluster algorithm is applied to the obtained matrix, assuming three classes

cort.diss <- diss(data.frame(t(mlearn_data$data)), "DTWARP")
hc <- hclust(cort.diss)  

Relabel each cluster group with the number associated to SP1, SP2 and SP3 classes

out_values=relabelling(groups_data,cutree(hc, k=n_clusters),n_clusters)

Evaluation of Functional \(k\)-means: accuracy (proportion of correct classification) and classification matrix

accuracy=sum(groups_data==out_values)/sum(table(groups_data,out_values))
cat("Accuracy: ",accuracy,"\n")
## Accuracy:  0.5486726

Evaluation of Functional \(k\)-means: classification index

cei=cluster.evaluation(groups_data, out_values) 
cat("CEI: ",cei,"\n")
## CEI:  0.5135645

Evaluation of Functional \(k\)-means: Cohen’s kappa

kappa_out=cohen.kappa(data.frame(groups_data, out_values))
cat("Kappa 95% interval: ",kappa_out$confid[1,c(1,3)],"\n")
## Kappa 95% interval:  0.2049736 0.2989528

Confusion table

tab_best=table(groups_data,out_values)
for(i in 1:n_clusters){
  tab_best[i,]=tab_best[i,]/sum(tab_best[i,])
}
cat("Confusion table: \n")
## Confusion table:
round(tab_best,2)
##            out_values
## groups_data    1    2    3
##           1 0.47 0.00 0.53
##           2 0.38 0.00 0.62
##           3 0.07 0.00 0.93

Plot all the echo curves by using the computed classification

groups_cat=as.factor(out_values)
curves.fdata1=mlearn_data[groups_cat=="1",]
curves.fdata2=mlearn_data[groups_cat=="2",]
curves.fdata3=mlearn_data[groups_cat=="3",]
# Rescale the time scale using the ping length
x=curves.fdata1$argvals*1.024/4
# Plot the echo curves
plot(x,curves.fdata1$data[1,],type="l",lty=4,
     xlab=expression(paste("Time (", mu,"s)")),ylab="Sound intensity (dB)",
     cex.lab=1.2,cex.axis=1.2,ylim=c(-110,-15))
for(i in 2:dim(curves.fdata1$data)[1]){
  lines(x,curves.fdata1$data[i,],lty=4)
}
for(i in 1:dim(curves.fdata2$data)[1]){
  lines(x,curves.fdata2$data[i,],col=2,lty=2)
}
for(i in 1:dim(curves.fdata3$data)[1]){
  lines(x,curves.fdata3$data[i,],col=4,lty=3)
}
legend("topright",c("SP1", "SP2", "SP3"),cex=1.2,
       col=c(1,2,4),lty=c(4,2,3),lwd=2, bty="n")

Time series clustering using EUCL dissimilarity and hierarchical cluster algorithm for three classes

Time series clustering is applied: first, the dissimilarity matrix is obtaned (EUCL dissimilarity), then, the hierarchical cluster algorithm is applied to the obtained matrix, assuming three classes

cort.diss <- diss(data.frame(t(mlearn_data$data)), "EUCL")
hc <- hclust(cort.diss)  

Relabel each cluster group with the number associated to SP1, SP2 and SP3 classes

out_values=relabelling(groups_data,cutree(hc, k=n_clusters),n_clusters)

Evaluation of Functional \(k\)-means: accuracy (proportion of correct classification) and classification matrix

accuracy=sum(groups_data==out_values)/sum(table(groups_data,out_values))
cat("Accuracy: ",accuracy,"\n")
## Accuracy:  0.4262537

Evaluation of Functional \(k\)-means: classification index

cei=cluster.evaluation(groups_data, out_values) 
cat("CEI: ",cei,"\n")
## CEI:  0.4883225

Evaluation of Functional \(k\)-means: Cohen’s kappa

kappa_out=cohen.kappa(data.frame(groups_data, out_values))
cat("Kappa 95% interval: ",kappa_out$confid[1,c(1,3)],"\n")
## Kappa 95% interval:  0.004789702 0.05818126

Confusion table

tab_best=table(groups_data,out_values)
for(i in 1:n_clusters){
  tab_best[i,]=tab_best[i,]/sum(tab_best[i,])
}
cat("Confusion table: \n")
## Confusion table:
round(tab_best,2)
##            out_values
## groups_data    1    2    3
##           1 0.00 0.01 0.98
##           2 0.00 0.11 0.89
##           3 0.00 0.04 0.96

Plot all the echo curves by using the computed classification

groups_cat=as.factor(out_values)
curves.fdata1=mlearn_data[groups_cat=="1",]
curves.fdata2=mlearn_data[groups_cat=="2",]
curves.fdata3=mlearn_data[groups_cat=="3",]
# Rescale the time scale using the ping length
x=curves.fdata1$argvals*1.024/4
# Plot the echo curves
plot(x,curves.fdata1$data[1,],type="l",lty=4,
     xlab=expression(paste("Time (", mu,"s)")),ylab="Sound intensity (dB)",
     cex.lab=1.2,cex.axis=1.2,ylim=c(-110,-15))
for(i in 1:dim(curves.fdata1$data)[1]){
  lines(x,curves.fdata1$data[i,],lty=4)
}
for(i in 1:dim(curves.fdata2$data)[1]){
  lines(x,curves.fdata2$data[i,],col=2,lty=2)
}
for(i in 1:dim(curves.fdata3$data)[1]){
  lines(x,curves.fdata3$data[i,],col=4,lty=3)
}
legend("topright",c("SP1", "SP2", "SP3"),cex=1.2,
       col=c(1,2,4),lty=c(4,2,3),lwd=2, bty="n")