Loading packages


library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(ggplot2)
library(caret)
## Loading required package: lattice
library(randomForest)
## randomForest 4.7-1.1
## Type rfNews() to see new features/changes/bug fixes.
## 
## Attaching package: 'randomForest'
## The following object is masked from 'package:ggplot2':
## 
##     margin
## The following object is masked from 'package:dplyr':
## 
##     combine
library(pROC)
## Type 'citation("pROC")' for a citation.
## 
## Attaching package: 'pROC'
## The following objects are masked from 'package:stats':
## 
##     cov, smooth, var
library(corrplot)
## corrplot 0.92 loaded
library(mltools)
library(rockchalk)
## 
## Attaching package: 'rockchalk'
## The following object is masked from 'package:mltools':
## 
##     skewness
## The following object is masked from 'package:dplyr':
## 
##     summarize

Loading database

data2 <- read.csv("Area_01_Filtered_.csv", sep=",", header= TRUE)

summary(data2)
##      trngcv           bio14             slope            rugg500s       
##  Min.   : 8.713   Min.   :0.01801   Min.   : 0.2666   Min.   : 0.02999  
##  1st Qu.: 9.242   1st Qu.:0.11977   1st Qu.: 1.9476   1st Qu.: 1.41555  
##  Median : 9.323   Median :0.16467   Median : 3.3912   Median : 2.87604  
##  Mean   : 9.343   Mean   :0.17485   Mean   : 4.8679   Mean   : 3.28088  
##  3rd Qu.: 9.414   3rd Qu.:0.21515   3rd Qu.: 7.5301   3rd Qu.: 4.88870  
##  Max.   :10.125   Max.   :0.42712   Max.   :17.2472   Max.   :10.10635  
##     pil_twim        pil_twicv         pil_topocv         pil_slps       
##  Min.   : 5.611   Min.   :0.03184   Min.   :0.00000   Min.   : 0.08751  
##  1st Qu.: 7.941   1st Qu.:0.12586   1st Qu.:0.00000   1st Qu.: 1.09978  
##  Median : 8.988   Median :0.16938   Median :0.02253   Median : 3.08139  
##  Mean   : 8.994   Mean   :0.17768   Mean   :0.06768   Mean   : 4.12518  
##  3rd Qu.:10.064   3rd Qu.:0.22529   3rd Qu.:0.13598   3rd Qu.: 5.90168  
##  Max.   :13.544   Max.   :0.40829   Max.   :0.35341   Max.   :19.05097  
##    pil_slpcv        pil_elr3cv          mrvbf            mrrtf        
##  Min.   :0.1719   Min.   :0.01924   Min.   :0.0000   Min.   :0.00000  
##  1st Qu.:0.3731   1st Qu.:0.14779   1st Qu.:0.0000   1st Qu.:0.00000  
##  Median :0.4711   Median :0.22176   Median :0.0000   Median :0.00000  
##  Mean   :0.5192   Mean   :0.24437   Mean   :0.3047   Mean   :0.05822  
##  3rd Qu.:0.6152   3rd Qu.:0.34663   3rd Qu.:0.0000   3rd Qu.:0.00000  
##  Max.   :1.6098   Max.   :0.71240   Max.   :2.8579   Max.   :2.59788  
##     minfertf         lf7rup         hstructn       geolrngaggn      
##  Min.   :2.000   Min.   :1.000   Min.   : 0.000   Min.   :  0.0115  
##  1st Qu.:2.000   1st Qu.:2.000   1st Qu.: 1.043   1st Qu.:  2.5880  
##  Median :2.000   Median :3.000   Median : 1.043   Median :  2.5880  
##  Mean   :2.222   Mean   :3.192   Mean   : 1.470   Mean   :185.9895  
##  3rd Qu.:2.000   3rd Qu.:4.000   3rd Qu.: 1.043   3rd Qu.:300.0000  
##  Max.   :3.000   Max.   :7.000   Max.   :15.429   Max.   :900.0000  
##    elevationm       wr_unrn        solpawhcn        slopern        
##  Min.   :443.9   Min.   : 0.00   Min.   : 82.0   Min.   : 0.00162  
##  1st Qu.:558.3   1st Qu.:17.86   1st Qu.: 96.0   1st Qu.: 0.01991  
##  Median :575.0   Median :17.86   Median : 96.0   Median : 0.05780  
##  Mean   :583.5   Mean   :17.25   Mean   :100.4   Mean   : 0.18529  
##  3rd Qu.:611.7   3rd Qu.:17.86   3rd Qu.: 96.0   3rd Qu.: 0.16345  
##  Max.   :726.1   Max.   :72.00   Max.   :157.0   Max.   :10.06944  
##    MIN_AGE_MA      HubDist        month_collection     UNITNAME        
##  Min.   :2454   Min.   :  1.294   Length:634         Length:634        
##  1st Qu.:2454   1st Qu.: 16.807   Class :character   Class :character  
##  Median :2454   Median : 21.831   Mode  :character   Mode  :character  
##  Mean   :2473   Mean   : 41.478                                        
##  3rd Qu.:2494   3rd Qu.: 78.344                                        
##  Max.   :2597   Max.   :104.012                                        
##   ROCKTYPE1          FORMATION           HubName          true_troglofauna
##  Length:634         Length:634         Length:634         Min.   :0.0000  
##  Class :character   Class :character   Class :character   1st Qu.:0.0000  
##  Mode  :character   Mode  :character   Mode  :character   Median :1.0000  
##                                                           Mean   :0.6625  
##                                                           3rd Qu.:1.0000  
##                                                           Max.   :1.0000  
##     LATITUDE        LONGITUDE    
##  Min.   :-22.64   Min.   :117.0  
##  1st Qu.:-22.22   1st Qu.:117.7  
##  Median :-22.16   Median :117.8  
##  Mean   :-22.20   Mean   :117.7  
##  3rd Qu.:-22.12   3rd Qu.:117.9  
##  Max.   :-22.04   Max.   :118.0
dim(data2)
## [1] 634  30
sapply(data2,class)
##           trngcv            bio14            slope         rugg500s 
##        "numeric"        "numeric"        "numeric"        "numeric" 
##         pil_twim        pil_twicv       pil_topocv         pil_slps 
##        "numeric"        "numeric"        "numeric"        "numeric" 
##        pil_slpcv       pil_elr3cv            mrvbf            mrrtf 
##        "numeric"        "numeric"        "numeric"        "numeric" 
##         minfertf           lf7rup         hstructn      geolrngaggn 
##        "integer"        "integer"        "numeric"        "numeric" 
##       elevationm          wr_unrn        solpawhcn          slopern 
##        "numeric"        "numeric"        "integer"        "numeric" 
##       MIN_AGE_MA          HubDist month_collection         UNITNAME 
##        "integer"        "numeric"      "character"      "character" 
##        ROCKTYPE1        FORMATION          HubName true_troglofauna 
##      "character"      "character"      "character"        "integer" 
##         LATITUDE        LONGITUDE 
##        "numeric"        "numeric"
#Converting "true_stygofauna", "true_troglofauna" to a factor
data2$true_troglofauna <- factor(data2$true_troglofauna)

#Converting categorical variables to factor

data2$HubName <- factor(data2$HubName)
data2$FORMATION <- factor(data2$FORMATION)
data2$ROCKTYPE1 <- factor(data2$ROCKTYPE1)
data2$UNITNAME <- factor(data2$UNITNAME)
data2$month_collection <- factor(data2$month_collection)

sapply(data2,class)
##           trngcv            bio14            slope         rugg500s 
##        "numeric"        "numeric"        "numeric"        "numeric" 
##         pil_twim        pil_twicv       pil_topocv         pil_slps 
##        "numeric"        "numeric"        "numeric"        "numeric" 
##        pil_slpcv       pil_elr3cv            mrvbf            mrrtf 
##        "numeric"        "numeric"        "numeric"        "numeric" 
##         minfertf           lf7rup         hstructn      geolrngaggn 
##        "integer"        "integer"        "numeric"        "numeric" 
##       elevationm          wr_unrn        solpawhcn          slopern 
##        "numeric"        "numeric"        "integer"        "numeric" 
##       MIN_AGE_MA          HubDist month_collection         UNITNAME 
##        "integer"        "numeric"         "factor"         "factor" 
##        ROCKTYPE1        FORMATION          HubName true_troglofauna 
##         "factor"         "factor"         "factor"         "factor" 
##         LATITUDE        LONGITUDE 
##        "numeric"        "numeric"
summary(data2$month_collection)
##     April    August  December  February   January      July      June     March 
##        56         2        45        26        92         7        27       128 
##       May  November   October September 
##       122        39        13        77
data2$month_collection<-factor(data2$month_collection, levels=c("January", "February", "March","April","May","June","July","August","September","October","November","December"))
summary(data2$month_collection)
##   January  February     March     April       May      June      July    August 
##        92        26       128        56       122        27         7         2 
## September   October  November  December 
##        77        13        39        45
plot(data2$month_collection,main="Monthly Samples",ylim=c(0,200),ylab="Number of Samples")

m1<-which(data2$month_collection == "January");length(m1)
## [1] 92
m2<-which(data2$month_collection == "February");length(m2)
## [1] 26
m3<-which(data2$month_collection == "March");length(m3)
## [1] 128
m4<-which(data2$month_collection == "April");length(m4)
## [1] 56
m5<-which(data2$month_collection == "May");length(m5)
## [1] 122
m6<-which(data2$month_collection == "June");length(m6)
## [1] 27
m7<-which(data2$month_collection == "July");length(m7)
## [1] 7
m8<-which(data2$month_collection == "August");length(m8)
## [1] 2
m9<-which(data2$month_collection == "September");length(m9)
## [1] 77
m10<-which(data2$month_collection == "October");length(m10)
## [1] 13
m11<-which(data2$month_collection == "November");length(m11)
## [1] 39
m12<-which(data2$month_collection == "December");length(m12)
## [1] 45
tm<-c(m1,m2,m3,m4,m5,m6,m7,m8,m9,m10,m11,m12);length(tm)
## [1] 634
ltm<-c(length(m1),length(m2),length(m3),length(m4),length(m5),length(m6),length(m7),length(m8),length(m9),length(m10),length(m11),length(m12));median(ltm)
## [1] 42
set.seed(78945)
sm1<-sample(m1, 50, replace = FALSE);length(sm1);head(sm1)
## [1] 50
## [1] 110 102  27 370  86 377
sm3<-sample(m3, 50, replace = FALSE);length(sm3);head(sm3)
## [1] 50
## [1] 261  40 372 503 279 578
sm4<-sample(m4, 50, replace = FALSE);length(sm4);head(sm4)
## [1] 50
## [1] 155 449 156 135  38  57
sm5<-sample(m5, 50, replace = FALSE);length(sm5);head(sm5)
## [1] 50
## [1] 438 141 127 472 126 298
sm9<-sample(m9, 50, replace = FALSE);length(sm9);head(sm9)
## [1] 50
## [1] 321 195 308 355 339 243
utm<-c(sm1,m2,sm3,sm4,sm5,m6,m7,m8,sm9,m10,m11,m12);length(utm)
## [1] 409
data1<-data2[utm,];dim(data1)
## [1] 409  30
summary(data1)
##      trngcv           bio14             slope            rugg500s       
##  Min.   : 8.713   Min.   :0.01801   Min.   : 0.2666   Min.   : 0.02999  
##  1st Qu.: 9.234   1st Qu.:0.12241   1st Qu.: 1.9117   1st Qu.: 1.33417  
##  Median : 9.317   Median :0.17267   Median : 3.3563   Median : 2.77642  
##  Mean   : 9.324   Mean   :0.18034   Mean   : 4.8591   Mean   : 3.24606  
##  3rd Qu.: 9.410   3rd Qu.:0.22232   3rd Qu.: 7.1721   3rd Qu.: 4.97943  
##  Max.   :10.026   Max.   :0.42288   Max.   :17.2472   Max.   :10.10635  
##                                                                         
##     pil_twim        pil_twicv         pil_topocv         pil_slps       
##  Min.   : 5.749   Min.   :0.03184   Min.   :0.00000   Min.   : 0.08751  
##  1st Qu.: 7.965   1st Qu.:0.12336   1st Qu.:0.00000   1st Qu.: 1.07585  
##  Median : 9.006   Median :0.16938   Median :0.01735   Median : 2.73185  
##  Mean   : 9.007   Mean   :0.17782   Mean   :0.06433   Mean   : 3.96138  
##  3rd Qu.:10.122   3rd Qu.:0.23025   3rd Qu.:0.13316   3rd Qu.: 5.71830  
##  Max.   :13.544   Max.   :0.40758   Max.   :0.35341   Max.   :15.25064  
##                                                                         
##    pil_slpcv        pil_elr3cv          mrvbf           mrrtf        
##  Min.   :0.1719   Min.   :0.03157   Min.   :0.000   Min.   :0.00000  
##  1st Qu.:0.3758   1st Qu.:0.13822   1st Qu.:0.000   1st Qu.:0.00000  
##  Median :0.4696   Median :0.22558   Median :0.000   Median :0.00000  
##  Mean   :0.5165   Mean   :0.24364   Mean   :0.306   Mean   :0.06968  
##  3rd Qu.:0.6153   3rd Qu.:0.33502   3rd Qu.:0.000   3rd Qu.:0.00000  
##  Max.   :1.6098   Max.   :0.71240   Max.   :2.858   Max.   :2.59788  
##                                                                      
##     minfertf         lf7rup         hstructn       geolrngaggn      
##  Min.   :2.000   Min.   :1.000   Min.   : 0.000   Min.   :  0.0115  
##  1st Qu.:2.000   1st Qu.:2.000   1st Qu.: 1.043   1st Qu.:  2.5880  
##  Median :2.000   Median :3.000   Median : 1.043   Median :  2.5880  
##  Mean   :2.208   Mean   :3.227   Mean   : 1.647   Mean   :171.7449  
##  3rd Qu.:2.000   3rd Qu.:4.000   3rd Qu.: 1.043   3rd Qu.:300.0000  
##  Max.   :3.000   Max.   :7.000   Max.   :15.429   Max.   :900.0000  
##                                                                     
##    elevationm       wr_unrn        solpawhcn        slopern        
##  Min.   :443.9   Min.   : 0.00   Min.   : 82.0   Min.   : 0.00162  
##  1st Qu.:559.7   1st Qu.:17.86   1st Qu.: 96.0   1st Qu.: 0.02093  
##  Median :577.0   Median :17.86   Median : 96.0   Median : 0.06396  
##  Mean   :585.3   Mean   :16.78   Mean   :101.3   Mean   : 0.21262  
##  3rd Qu.:614.1   3rd Qu.:17.86   3rd Qu.: 96.0   3rd Qu.: 0.16934  
##  Max.   :726.1   Max.   :72.00   Max.   :157.0   Max.   :10.06944  
##                                                                    
##    MIN_AGE_MA      HubDist         month_collection
##  Min.   :2454   Min.   :  1.294   January  : 50    
##  1st Qu.:2454   1st Qu.: 16.613   March    : 50    
##  Median :2454   Median : 21.793   April    : 50    
##  Mean   :2477   Mean   : 39.334   May      : 50    
##  3rd Qu.:2494   3rd Qu.: 76.847   September: 50    
##  Max.   :2597   Max.   :104.012   December : 45    
##                                   (Other)  :114    
##                                          UNITNAME  
##  Brockman Iron Formation                     :273  
##  Marra Mamba Iron Formation                  : 35  
##  Mount McRae Shale and Mount Sylvia Formation: 75  
##  Wittenoom Formation                         : 26  
##                                                    
##                                                    
##                                                    
##                                      ROCKTYPE1  
##  sedimentary carbonate                    : 26  
##  sedimentary other chemical or biochemical:308  
##  sedimentary siliciclastic                : 75  
##                                                 
##                                                 
##                                                 
##                                                 
##                                         FORMATION  
##  Brockman Iron Formation                     :273  
##  Marra Mamba Iron Formation                  : 35  
##  Mount McRae Shale and Mount Sylvia Formation: 75  
##  Wittenoom Formation                         : 26  
##                                                    
##                                                    
##                                                    
##                                                         HubName   
##  anticline, exposed                                         :235  
##  exposed                                                    : 21  
##  strike-slip, exposed, showing relative dextral displacement: 10  
##  syncline, exposed                                          :143  
##                                                                   
##                                                                   
##                                                                   
##  true_troglofauna    LATITUDE        LONGITUDE    
##  0:147            Min.   :-22.64   Min.   :117.0  
##  1:262            1st Qu.:-22.23   1st Qu.:117.7  
##                   Median :-22.16   Median :117.8  
##                   Mean   :-22.21   Mean   :117.7  
##                   3rd Qu.:-22.12   3rd Qu.:117.9  
##                   Max.   :-22.04   Max.   :118.0  
## 
data2<-data1


summary(data2$month_collection)
##   January  February     March     April       May      June      July    August 
##        50        26        50        50        50        27         7         2 
## September   October  November  December 
##        50        13        39        45
plot(data2$month_collection,main="Monthly Samples",ylim=c(0,200),ylab="Number of Samples")

levels(data2$true_troglofauna);levels(data2$HubName);levels(data2$FORMATION);levels(data2$ROCKTYPE1);levels(data2$UNITNAME);levels(data2$month_collection)
## [1] "0" "1"
## [1] "anticline, exposed"                                         
## [2] "exposed"                                                    
## [3] "strike-slip, exposed, showing relative dextral displacement"
## [4] "syncline, exposed"
## [1] "Brockman Iron Formation"                     
## [2] "Marra Mamba Iron Formation"                  
## [3] "Mount McRae Shale and Mount Sylvia Formation"
## [4] "Wittenoom Formation"
## [1] "sedimentary carbonate"                    
## [2] "sedimentary other chemical or biochemical"
## [3] "sedimentary siliciclastic"
## [1] "Brockman Iron Formation"                     
## [2] "Marra Mamba Iron Formation"                  
## [3] "Mount McRae Shale and Mount Sylvia Formation"
## [4] "Wittenoom Formation"
##  [1] "January"   "February"  "March"     "April"     "May"       "June"     
##  [7] "July"      "August"    "September" "October"   "November"  "December"
summary(data2)
##      trngcv           bio14             slope            rugg500s       
##  Min.   : 8.713   Min.   :0.01801   Min.   : 0.2666   Min.   : 0.02999  
##  1st Qu.: 9.234   1st Qu.:0.12241   1st Qu.: 1.9117   1st Qu.: 1.33417  
##  Median : 9.317   Median :0.17267   Median : 3.3563   Median : 2.77642  
##  Mean   : 9.324   Mean   :0.18034   Mean   : 4.8591   Mean   : 3.24606  
##  3rd Qu.: 9.410   3rd Qu.:0.22232   3rd Qu.: 7.1721   3rd Qu.: 4.97943  
##  Max.   :10.026   Max.   :0.42288   Max.   :17.2472   Max.   :10.10635  
##                                                                         
##     pil_twim        pil_twicv         pil_topocv         pil_slps       
##  Min.   : 5.749   Min.   :0.03184   Min.   :0.00000   Min.   : 0.08751  
##  1st Qu.: 7.965   1st Qu.:0.12336   1st Qu.:0.00000   1st Qu.: 1.07585  
##  Median : 9.006   Median :0.16938   Median :0.01735   Median : 2.73185  
##  Mean   : 9.007   Mean   :0.17782   Mean   :0.06433   Mean   : 3.96138  
##  3rd Qu.:10.122   3rd Qu.:0.23025   3rd Qu.:0.13316   3rd Qu.: 5.71830  
##  Max.   :13.544   Max.   :0.40758   Max.   :0.35341   Max.   :15.25064  
##                                                                         
##    pil_slpcv        pil_elr3cv          mrvbf           mrrtf        
##  Min.   :0.1719   Min.   :0.03157   Min.   :0.000   Min.   :0.00000  
##  1st Qu.:0.3758   1st Qu.:0.13822   1st Qu.:0.000   1st Qu.:0.00000  
##  Median :0.4696   Median :0.22558   Median :0.000   Median :0.00000  
##  Mean   :0.5165   Mean   :0.24364   Mean   :0.306   Mean   :0.06968  
##  3rd Qu.:0.6153   3rd Qu.:0.33502   3rd Qu.:0.000   3rd Qu.:0.00000  
##  Max.   :1.6098   Max.   :0.71240   Max.   :2.858   Max.   :2.59788  
##                                                                      
##     minfertf         lf7rup         hstructn       geolrngaggn      
##  Min.   :2.000   Min.   :1.000   Min.   : 0.000   Min.   :  0.0115  
##  1st Qu.:2.000   1st Qu.:2.000   1st Qu.: 1.043   1st Qu.:  2.5880  
##  Median :2.000   Median :3.000   Median : 1.043   Median :  2.5880  
##  Mean   :2.208   Mean   :3.227   Mean   : 1.647   Mean   :171.7449  
##  3rd Qu.:2.000   3rd Qu.:4.000   3rd Qu.: 1.043   3rd Qu.:300.0000  
##  Max.   :3.000   Max.   :7.000   Max.   :15.429   Max.   :900.0000  
##                                                                     
##    elevationm       wr_unrn        solpawhcn        slopern        
##  Min.   :443.9   Min.   : 0.00   Min.   : 82.0   Min.   : 0.00162  
##  1st Qu.:559.7   1st Qu.:17.86   1st Qu.: 96.0   1st Qu.: 0.02093  
##  Median :577.0   Median :17.86   Median : 96.0   Median : 0.06396  
##  Mean   :585.3   Mean   :16.78   Mean   :101.3   Mean   : 0.21262  
##  3rd Qu.:614.1   3rd Qu.:17.86   3rd Qu.: 96.0   3rd Qu.: 0.16934  
##  Max.   :726.1   Max.   :72.00   Max.   :157.0   Max.   :10.06944  
##                                                                    
##    MIN_AGE_MA      HubDist         month_collection
##  Min.   :2454   Min.   :  1.294   January  : 50    
##  1st Qu.:2454   1st Qu.: 16.613   March    : 50    
##  Median :2454   Median : 21.793   April    : 50    
##  Mean   :2477   Mean   : 39.334   May      : 50    
##  3rd Qu.:2494   3rd Qu.: 76.847   September: 50    
##  Max.   :2597   Max.   :104.012   December : 45    
##                                   (Other)  :114    
##                                          UNITNAME  
##  Brockman Iron Formation                     :273  
##  Marra Mamba Iron Formation                  : 35  
##  Mount McRae Shale and Mount Sylvia Formation: 75  
##  Wittenoom Formation                         : 26  
##                                                    
##                                                    
##                                                    
##                                      ROCKTYPE1  
##  sedimentary carbonate                    : 26  
##  sedimentary other chemical or biochemical:308  
##  sedimentary siliciclastic                : 75  
##                                                 
##                                                 
##                                                 
##                                                 
##                                         FORMATION  
##  Brockman Iron Formation                     :273  
##  Marra Mamba Iron Formation                  : 35  
##  Mount McRae Shale and Mount Sylvia Formation: 75  
##  Wittenoom Formation                         : 26  
##                                                    
##                                                    
##                                                    
##                                                         HubName   
##  anticline, exposed                                         :235  
##  exposed                                                    : 21  
##  strike-slip, exposed, showing relative dextral displacement: 10  
##  syncline, exposed                                          :143  
##                                                                   
##                                                                   
##                                                                   
##  true_troglofauna    LATITUDE        LONGITUDE    
##  0:147            Min.   :-22.64   Min.   :117.0  
##  1:262            1st Qu.:-22.23   1st Qu.:117.7  
##                   Median :-22.16   Median :117.8  
##                   Mean   :-22.21   Mean   :117.7  
##                   3rd Qu.:-22.12   3rd Qu.:117.9  
##                   Max.   :-22.04   Max.   :118.0  
## 
dim(data2)
## [1] 409  30
instanceconvert <- colnames(data2[, -c((ncol(data2) - 8):ncol(data2))])
instanceconvert
##  [1] "trngcv"      "bio14"       "slope"       "rugg500s"    "pil_twim"   
##  [6] "pil_twicv"   "pil_topocv"  "pil_slps"    "pil_slpcv"   "pil_elr3cv" 
## [11] "mrvbf"       "mrrtf"       "minfertf"    "lf7rup"      "hstructn"   
## [16] "geolrngaggn" "elevationm"  "wr_unrn"     "solpawhcn"   "slopern"    
## [21] "MIN_AGE_MA"
for (i in instanceconvert)
{
  data2[[i]] <- as.numeric(data2[[i]])
}

sapply(data2,class)
##           trngcv            bio14            slope         rugg500s 
##        "numeric"        "numeric"        "numeric"        "numeric" 
##         pil_twim        pil_twicv       pil_topocv         pil_slps 
##        "numeric"        "numeric"        "numeric"        "numeric" 
##        pil_slpcv       pil_elr3cv            mrvbf            mrrtf 
##        "numeric"        "numeric"        "numeric"        "numeric" 
##         minfertf           lf7rup         hstructn      geolrngaggn 
##        "numeric"        "numeric"        "numeric"        "numeric" 
##       elevationm          wr_unrn        solpawhcn          slopern 
##        "numeric"        "numeric"        "numeric"        "numeric" 
##       MIN_AGE_MA          HubDist month_collection         UNITNAME 
##        "numeric"        "numeric"         "factor"         "factor" 
##        ROCKTYPE1        FORMATION          HubName true_troglofauna 
##         "factor"         "factor"         "factor"         "factor" 
##         LATITUDE        LONGITUDE 
##        "numeric"        "numeric"
names(data2)
##  [1] "trngcv"           "bio14"            "slope"            "rugg500s"        
##  [5] "pil_twim"         "pil_twicv"        "pil_topocv"       "pil_slps"        
##  [9] "pil_slpcv"        "pil_elr3cv"       "mrvbf"            "mrrtf"           
## [13] "minfertf"         "lf7rup"           "hstructn"         "geolrngaggn"     
## [17] "elevationm"       "wr_unrn"          "solpawhcn"        "slopern"         
## [21] "MIN_AGE_MA"       "HubDist"          "month_collection" "UNITNAME"        
## [25] "ROCKTYPE1"        "FORMATION"        "HubName"          "true_troglofauna"
## [29] "LATITUDE"         "LONGITUDE"
sapply(data2,class)
##           trngcv            bio14            slope         rugg500s 
##        "numeric"        "numeric"        "numeric"        "numeric" 
##         pil_twim        pil_twicv       pil_topocv         pil_slps 
##        "numeric"        "numeric"        "numeric"        "numeric" 
##        pil_slpcv       pil_elr3cv            mrvbf            mrrtf 
##        "numeric"        "numeric"        "numeric"        "numeric" 
##         minfertf           lf7rup         hstructn      geolrngaggn 
##        "numeric"        "numeric"        "numeric"        "numeric" 
##       elevationm          wr_unrn        solpawhcn          slopern 
##        "numeric"        "numeric"        "numeric"        "numeric" 
##       MIN_AGE_MA          HubDist month_collection         UNITNAME 
##        "numeric"        "numeric"         "factor"         "factor" 
##        ROCKTYPE1        FORMATION          HubName true_troglofauna 
##         "factor"         "factor"         "factor"         "factor" 
##         LATITUDE        LONGITUDE 
##        "numeric"        "numeric"
levels(data2$true_troglofauna);levels(data2$HubName);levels(data2$FORMATION);levels(data2$ROCKTYPE1);levels(data2$UNITNAME);levels(data2$month_collection)
## [1] "0" "1"
## [1] "anticline, exposed"                                         
## [2] "exposed"                                                    
## [3] "strike-slip, exposed, showing relative dextral displacement"
## [4] "syncline, exposed"
## [1] "Brockman Iron Formation"                     
## [2] "Marra Mamba Iron Formation"                  
## [3] "Mount McRae Shale and Mount Sylvia Formation"
## [4] "Wittenoom Formation"
## [1] "sedimentary carbonate"                    
## [2] "sedimentary other chemical or biochemical"
## [3] "sedimentary siliciclastic"
## [1] "Brockman Iron Formation"                     
## [2] "Marra Mamba Iron Formation"                  
## [3] "Mount McRae Shale and Mount Sylvia Formation"
## [4] "Wittenoom Formation"
##  [1] "January"   "February"  "March"     "April"     "May"       "June"     
##  [7] "July"      "August"    "September" "October"   "November"  "December"
# Get column names with their indexes
column_indexes <- seq_along(names(data2))

# Display the column names and their corresponding indexes
column_indexes_named <- setNames(column_indexes, names(data2))
print(column_indexes_named)
##           trngcv            bio14            slope         rugg500s 
##                1                2                3                4 
##         pil_twim        pil_twicv       pil_topocv         pil_slps 
##                5                6                7                8 
##        pil_slpcv       pil_elr3cv            mrvbf            mrrtf 
##                9               10               11               12 
##         minfertf           lf7rup         hstructn      geolrngaggn 
##               13               14               15               16 
##       elevationm          wr_unrn        solpawhcn          slopern 
##               17               18               19               20 
##       MIN_AGE_MA          HubDist month_collection         UNITNAME 
##               21               22               23               24 
##        ROCKTYPE1        FORMATION          HubName true_troglofauna 
##               25               26               27               28 
##         LATITUDE        LONGITUDE 
##               29               30
sapply(data2,class)
##           trngcv            bio14            slope         rugg500s 
##        "numeric"        "numeric"        "numeric"        "numeric" 
##         pil_twim        pil_twicv       pil_topocv         pil_slps 
##        "numeric"        "numeric"        "numeric"        "numeric" 
##        pil_slpcv       pil_elr3cv            mrvbf            mrrtf 
##        "numeric"        "numeric"        "numeric"        "numeric" 
##         minfertf           lf7rup         hstructn      geolrngaggn 
##        "numeric"        "numeric"        "numeric"        "numeric" 
##       elevationm          wr_unrn        solpawhcn          slopern 
##        "numeric"        "numeric"        "numeric"        "numeric" 
##       MIN_AGE_MA          HubDist month_collection         UNITNAME 
##        "numeric"        "numeric"         "factor"         "factor" 
##        ROCKTYPE1        FORMATION          HubName true_troglofauna 
##         "factor"         "factor"         "factor"         "factor" 
##         LATITUDE        LONGITUDE 
##        "numeric"        "numeric"

Convert to numeric

instanceconvert <- colnames(data2[, -c((ncol(data2) - 8):ncol(data2))])

for (i in instanceconvert)
{
  data2[[i]] <- as.numeric(data2[[i]])
}

sapply(data2,class)
##           trngcv            bio14            slope         rugg500s 
##        "numeric"        "numeric"        "numeric"        "numeric" 
##         pil_twim        pil_twicv       pil_topocv         pil_slps 
##        "numeric"        "numeric"        "numeric"        "numeric" 
##        pil_slpcv       pil_elr3cv            mrvbf            mrrtf 
##        "numeric"        "numeric"        "numeric"        "numeric" 
##         minfertf           lf7rup         hstructn      geolrngaggn 
##        "numeric"        "numeric"        "numeric"        "numeric" 
##       elevationm          wr_unrn        solpawhcn          slopern 
##        "numeric"        "numeric"        "numeric"        "numeric" 
##       MIN_AGE_MA          HubDist month_collection         UNITNAME 
##        "numeric"        "numeric"         "factor"         "factor" 
##        ROCKTYPE1        FORMATION          HubName true_troglofauna 
##         "factor"         "factor"         "factor"         "factor" 
##         LATITUDE        LONGITUDE 
##        "numeric"        "numeric"
str(data2)
## 'data.frame':    409 obs. of  30 variables:
##  $ trngcv          : num  9.89 9.47 9.44 9.32 9.42 ...
##  $ bio14           : num  0.254 0.134 0.125 0.139 0.12 ...
##  $ slope           : num  9.74 11.57 13.37 1.15 10.27 ...
##  $ rugg500s        : num  5.46 6.52 3.05 1.47 3.42 ...
##  $ pil_twim        : num  5.75 8.43 9.24 9.98 6.86 ...
##  $ pil_twicv       : num  0.0979 0.1694 0.3028 0.1597 0.1373 ...
##  $ pil_topocv      : num  0.1106 0.1269 0.1116 0 0.0426 ...
##  $ pil_slps        : num  10.968 3.564 4.554 0.951 14.71 ...
##  $ pil_slpcv       : num  0.323 0.289 0.454 0.615 0.59 ...
##  $ pil_elr3cv      : num  0.2294 0.2977 0.1921 0.36 0.0948 ...
##  $ mrvbf           : num  0 0 0 0 0 ...
##  $ mrrtf           : num  0 0 0 0.661 0 ...
##  $ minfertf        : num  2 2 2 3 2 3 2 2 2 2 ...
##  $ lf7rup          : num  2 2 2 6 3 4 4 1 4 3 ...
##  $ hstructn        : num  1.04 1.04 1.04 1.04 1.04 ...
##  $ geolrngaggn     : num  300 2.588 2.588 0.0115 2.588 ...
##  $ elevationm      : num  726 543 543 562 551 ...
##  $ wr_unrn         : num  17.9 17.9 17.9 17.9 17.9 ...
##  $ solpawhcn       : num  96 96 96 96 96 96 96 96 96 96 ...
##  $ slopern         : num  0.00341 0.00742 0.08237 0.19806 0.01298 ...
##  $ MIN_AGE_MA      : num  2454 2494 2494 2454 2454 ...
##  $ HubDist         : num  76.1 21.5 21.7 15.3 21.6 ...
##  $ month_collection: Factor w/ 12 levels "January","February",..: 1 1 1 1 1 1 1 1 1 1 ...
##  $ UNITNAME        : Factor w/ 4 levels "Brockman Iron Formation",..: 1 3 3 1 1 1 3 1 3 1 ...
##  $ ROCKTYPE1       : Factor w/ 3 levels "sedimentary carbonate",..: 2 3 3 2 2 2 3 2 3 2 ...
##  $ FORMATION       : Factor w/ 4 levels "Brockman Iron Formation",..: 1 3 3 1 1 1 3 1 3 1 ...
##  $ HubName         : Factor w/ 4 levels "anticline, exposed",..: 4 1 1 1 1 1 1 1 1 1 ...
##  $ true_troglofauna: Factor w/ 2 levels "0","1": 1 1 2 1 2 1 2 1 2 2 ...
##  $ LATITUDE        : num  -22.2 -22.1 -22.1 -22.1 -22.1 ...
##  $ LONGITUDE       : num  118 118 118 118 118 ...
dim(data2)
## [1] 409  30
 num_rows <- 409
 
# Randomly select row indices
selected_rows <- sample(nrow(data2), num_rows)

# Create a new dataframe with randomly selected rows
data2 <- data2[selected_rows, ]
dim(data2)
## [1] 409  30
tg <- which(data2$true_troglofauna == 1)
tgg <- data2[tg, "true_troglofauna"]
tgg <- length(tgg)

fg <- which(data2$true_troglofauna == 0)
fgg <- data2[fg, "true_troglofauna"]
fgg <- length(fgg)

# Data for pie chart
plot_num_ident <- data.frame(Absence = 100 * fgg / (tgg + fgg), 
                             Presence = 100 * tgg / (tgg + fgg))

# Convert data to a numeric vector
pie_data <- as.numeric(plot_num_ident)

# Labels for the pie chart (rounded to 1 decimal place)
labels <- c(paste0("Absent\n", round(pie_data[1], 1), "%"), 
            paste0("Present\n", round(pie_data[2], 1), "%"))

# Create pie chart
pie(pie_data, 
    labels = NA, # Remove default labels
    main = "AREA 1", 
    col = c("orange", "lightblue"))

# Calculate the midpoints of each pie slice for label positioning
pie_slices <- cumsum(pie_data) - pie_data / 2

# Add the labels inside the pie chart
text(x = 0.5 * cos(2 * pi * pie_slices / sum(pie_data)), 
     y = 0.5 * sin(2 * pi * pie_slices / sum(pie_data)), 
     labels = labels, cex = 1.5)

selectcol_Data2 <- data2[, -c((ncol(data2) - 7):ncol(data2))]
dim(selectcol_Data2)
## [1] 409  22
names(selectcol_Data2)
##  [1] "trngcv"      "bio14"       "slope"       "rugg500s"    "pil_twim"   
##  [6] "pil_twicv"   "pil_topocv"  "pil_slps"    "pil_slpcv"   "pil_elr3cv" 
## [11] "mrvbf"       "mrrtf"       "minfertf"    "lf7rup"      "hstructn"   
## [16] "geolrngaggn" "elevationm"  "wr_unrn"     "solpawhcn"   "slopern"    
## [21] "MIN_AGE_MA"  "HubDist"
correlations<-cor(selectcol_Data2)
correlations
##                  trngcv       bio14       slope     rugg500s     pil_twim
## trngcv       1.00000000 -0.14438838  0.69176889  0.477926942 -0.279056003
## bio14       -0.14438838  1.00000000  0.07029883 -0.128492177 -0.226561962
## slope        0.69176889  0.07029883  1.00000000  0.517511499 -0.387364499
## rugg500s     0.47792694 -0.12849218  0.51751150  1.000000000 -0.138864300
## pil_twim    -0.27905600 -0.22656196 -0.38736450 -0.138864300  1.000000000
## pil_twicv    0.01702041 -0.29376592 -0.09729433  0.005626062  0.367094043
## pil_topocv   0.05959847 -0.10025211  0.04272442  0.288421473  0.035020538
## pil_slps     0.42525435 -0.02594281  0.47393877  0.385615519 -0.612390330
## pil_slpcv   -0.03527169 -0.24291583 -0.10400086  0.185707019  0.252591021
## pil_elr3cv  -0.02048230 -0.23334121 -0.09470449  0.449626784  0.499634174
## mrvbf       -0.26619272 -0.03411611 -0.44723384 -0.338185583  0.334840763
## mrrtf       -0.21269176  0.14716955 -0.22818463 -0.150700825 -0.007752769
## minfertf    -0.20113479 -0.33116358 -0.33421583 -0.267348143  0.295346069
## lf7rup      -0.36740253 -0.16925245 -0.48975316 -0.492283947  0.281602595
## hstructn    -0.44239447  0.32588806 -0.20271250 -0.269254064  0.053314883
## geolrngaggn  0.23224359  0.16859387  0.17634243  0.132026204 -0.431930571
## elevationm   0.32827534  0.45904523  0.28126622  0.035784792 -0.297688074
## wr_unrn     -0.06234149 -0.03941867  0.05620525  0.073818209 -0.206746105
## solpawhcn   -0.14671433  0.23912653 -0.17955216 -0.202149466  0.178602521
## slopern     -0.18198480 -0.02970640 -0.15398297 -0.157829835  0.264328275
## MIN_AGE_MA  -0.46854175  0.41961974 -0.18098149 -0.332963519 -0.133647611
## HubDist      0.25900610 -0.18161858  0.19846532  0.154051977  0.030718072
##                pil_twicv  pil_topocv    pil_slps    pil_slpcv  pil_elr3cv
## trngcv       0.017020409  0.05959847  0.42525435 -0.035271690 -0.02048230
## bio14       -0.293765922 -0.10025211 -0.02594281 -0.242915826 -0.23334121
## slope       -0.097294332  0.04272442  0.47393877 -0.104000860 -0.09470449
## rugg500s     0.005626062  0.28842147  0.38561552  0.185707019  0.44962678
## pil_twim     0.367094043  0.03502054 -0.61239033  0.252591021  0.49963417
## pil_twicv    1.000000000  0.13935933 -0.04876311  0.280298038  0.25864734
## pil_topocv   0.139359332  1.00000000  0.13690929  0.212719258  0.21220279
## pil_slps    -0.048763108  0.13690929  1.00000000  0.316376132 -0.29073744
## pil_slpcv    0.280298038  0.21271926  0.31637613  1.000000000  0.25064028
## pil_elr3cv   0.258647345  0.21220279 -0.29073744  0.250640275  1.00000000
## mrvbf        0.143789374 -0.19705951 -0.34099450  0.029400935  0.01624646
## mrrtf       -0.154008477  0.11667742 -0.10285428 -0.065781453 -0.04792797
## minfertf     0.213223063 -0.18208188 -0.25711115  0.033170964  0.12410535
## lf7rup       0.127503291 -0.19397234 -0.38134680 -0.010497415 -0.03479760
## hstructn    -0.198425125 -0.14170688 -0.15945066 -0.096202710 -0.14666667
## geolrngaggn -0.185108425  0.05077707  0.36090860 -0.140684002 -0.21925198
## elevationm  -0.308216732 -0.06616776  0.21818619 -0.117813496 -0.22561452
## wr_unrn     -0.106794102 -0.07493941  0.11352341  0.002507419 -0.02330359
## solpawhcn   -0.012078107  0.05138687 -0.22465768 -0.047500788 -0.02096902
## slopern      0.031603804 -0.13003649 -0.16139194  0.104391301  0.01645636
## MIN_AGE_MA  -0.195963275 -0.12402301 -0.14535780 -0.114125936 -0.25250500
## HubDist      0.039375610  0.10601700  0.17613904  0.266842636  0.12772916
##                   mrvbf        mrrtf    minfertf       lf7rup    hstructn
## trngcv      -0.26619272 -0.212691756 -0.20113479 -0.367402529 -0.44239447
## bio14       -0.03411611  0.147169552 -0.33116358 -0.169252446  0.32588806
## slope       -0.44723384 -0.228184626 -0.33421583 -0.489753156 -0.20271250
## rugg500s    -0.33818558 -0.150700825 -0.26734814 -0.492283947 -0.26925406
## pil_twim     0.33484076 -0.007752769  0.29534607  0.281602595  0.05331488
## pil_twicv    0.14378937 -0.154008477  0.21322306  0.127503291 -0.19842512
## pil_topocv  -0.19705951  0.116677416 -0.18208188 -0.193972339 -0.14170688
## pil_slps    -0.34099450 -0.102854276 -0.25711115 -0.381346802 -0.15945066
## pil_slpcv    0.02940093 -0.065781453  0.03317096 -0.010497415 -0.09620271
## pil_elr3cv   0.01624646 -0.047927972  0.12410535 -0.034797599 -0.14666667
## mrvbf        1.00000000 -0.065557280  0.32959668  0.527999249  0.20005240
## mrrtf       -0.06555728  1.000000000  0.02801221  0.031133482  0.29308847
## minfertf     0.32959668  0.028012213  1.00000000  0.444817211  0.03598896
## lf7rup       0.52799925  0.031133482  0.44481721  1.000000000  0.07011658
## hstructn     0.20005240  0.293088472  0.03598896  0.070116581  1.00000000
## geolrngaggn -0.21208074  0.029608944 -0.23450476 -0.197103865 -0.03969216
## elevationm  -0.11948245 -0.010279115 -0.24459358 -0.200140640  0.01344205
## wr_unrn     -0.09664253  0.010595057 -0.10583662 -0.022543060 -0.07426712
## solpawhcn    0.13166310  0.159540420  0.13802186  0.101190355  0.14136903
## slopern      0.33641174 -0.034854407  0.17182369  0.228577910  0.39431854
## MIN_AGE_MA   0.00730014  0.323227324 -0.09194556  0.001095106  0.57904539
## HubDist     -0.15219624 -0.119165302 -0.11781342 -0.263770024 -0.09757715
##             geolrngaggn  elevationm      wr_unrn   solpawhcn     slopern
## trngcv       0.23224359  0.32827534 -0.062341490 -0.14671433 -0.18198480
## bio14        0.16859387  0.45904523 -0.039418673  0.23912653 -0.02970640
## slope        0.17634243  0.28126622  0.056205249 -0.17955216 -0.15398297
## rugg500s     0.13202620  0.03578479  0.073818209 -0.20214947 -0.15782983
## pil_twim    -0.43193057 -0.29768807 -0.206746105  0.17860252  0.26432828
## pil_twicv   -0.18510842 -0.30821673 -0.106794102 -0.01207811  0.03160380
## pil_topocv   0.05077707 -0.06616776 -0.074939411  0.05138687 -0.13003649
## pil_slps     0.36090860  0.21818619  0.113523411 -0.22465768 -0.16139194
## pil_slpcv   -0.14068400 -0.11781350  0.002507419 -0.04750079  0.10439130
## pil_elr3cv  -0.21925198 -0.22561452 -0.023303594 -0.02096902  0.01645636
## mrvbf       -0.21208074 -0.11948245 -0.096642533  0.13166310  0.33641174
## mrrtf        0.02960894 -0.01027911  0.010595057  0.15954042 -0.03485441
## minfertf    -0.23450476 -0.24459358 -0.105836620  0.13802186  0.17182369
## lf7rup      -0.19710387 -0.20014064 -0.022543060  0.10119036  0.22857791
## hstructn    -0.03969216  0.01344205 -0.074267122  0.14136903  0.39431854
## geolrngaggn  1.00000000  0.23599976  0.100423279 -0.10953559 -0.10709799
## elevationm   0.23599976  1.00000000 -0.054646005  0.08226795 -0.05950478
## wr_unrn      0.10042328 -0.05464600  1.000000000 -0.64879855 -0.09753958
## solpawhcn   -0.10953559  0.08226795 -0.648798550  1.00000000  0.16282807
## slopern     -0.10709799 -0.05950478 -0.097539581  0.16282807  1.00000000
## MIN_AGE_MA  -0.07323784 -0.14465058  0.055594693  0.18305242  0.08046612
## HubDist     -0.01398595  0.23769455  0.062436255 -0.16628773 -0.09694540
##               MIN_AGE_MA     HubDist
## trngcv      -0.468541755  0.25900610
## bio14        0.419619739 -0.18161858
## slope       -0.180981491  0.19846532
## rugg500s    -0.332963519  0.15405198
## pil_twim    -0.133647611  0.03071807
## pil_twicv   -0.195963275  0.03937561
## pil_topocv  -0.124023011  0.10601700
## pil_slps    -0.145357804  0.17613904
## pil_slpcv   -0.114125936  0.26684264
## pil_elr3cv  -0.252505000  0.12772916
## mrvbf        0.007300140 -0.15219624
## mrrtf        0.323227324 -0.11916530
## minfertf    -0.091945560 -0.11781342
## lf7rup       0.001095106 -0.26377002
## hstructn     0.579045392 -0.09757715
## geolrngaggn -0.073237842 -0.01398595
## elevationm  -0.144650575  0.23769455
## wr_unrn      0.055594693  0.06243625
## solpawhcn    0.183052422 -0.16628773
## slopern      0.080466116 -0.09694540
## MIN_AGE_MA   1.000000000 -0.20084125
## HubDist     -0.200841251  1.00000000


corrplot


corrplot(correlations, method = "circle", tl.cex = 1.5, cl.cex = 1.5)

corrplot(correlations, method = "circle", type = "upper", tl.cex = 1.5, cl.cex = 1.5)


Covariates Density Plots


 dim(data2)
## [1] 409  30
# Create a density plot for each column
par(mfrow = c(2, 3))  # Arrange plots in a 2x3 grid
for (col in seq_along(selectcol_Data2)) {
  plot(density(selectcol_Data2[, col]), main = colnames(selectcol_Data2)[col])
}

set.seed(78945)
Index1 <- createDataPartition(data2$true_troglofauna, p=0.5, list=FALSE)
data_train <- data2[ Index1,]
data_prov <- data2[-Index1,]
dim(data_train)
## [1] 205  30
summary(data_train)
##      trngcv          bio14             slope            rugg500s       
##  Min.   :8.713   Min.   :0.02988   Min.   : 0.3154   Min.   : 0.02999  
##  1st Qu.:9.222   1st Qu.:0.12881   1st Qu.: 1.7353   1st Qu.: 1.26590  
##  Median :9.320   Median :0.18033   Median : 3.7947   Median : 2.63082  
##  Mean   :9.319   Mean   :0.18687   Mean   : 4.9174   Mean   : 3.12256  
##  3rd Qu.:9.411   3rd Qu.:0.23410   3rd Qu.: 7.2988   3rd Qu.: 4.78618  
##  Max.   :9.972   Max.   :0.38452   Max.   :17.2472   Max.   :10.10635  
##                                                                        
##     pil_twim        pil_twicv         pil_topocv         pil_slps       
##  Min.   : 5.975   Min.   :0.03184   Min.   :0.00000   Min.   : 0.08751  
##  1st Qu.: 8.095   1st Qu.:0.12767   1st Qu.:0.00000   1st Qu.: 1.02469  
##  Median : 9.164   Median :0.16995   Median :0.01947   Median : 2.37349  
##  Mean   : 9.078   Mean   :0.17922   Mean   :0.06160   Mean   : 3.65220  
##  3rd Qu.:10.071   3rd Qu.:0.22963   3rd Qu.:0.12689   3rd Qu.: 5.18772  
##  Max.   :13.544   Max.   :0.36039   Max.   :0.33982   Max.   :14.88655  
##                                                                         
##    pil_slpcv        pil_elr3cv          mrvbf            mrrtf        
##  Min.   :0.1908   Min.   :0.03157   Min.   :0.0000   Min.   :0.00000  
##  1st Qu.:0.3766   1st Qu.:0.14646   1st Qu.:0.0000   1st Qu.:0.00000  
##  Median :0.4692   Median :0.23215   Median :0.0000   Median :0.00000  
##  Mean   :0.4998   Mean   :0.24334   Mean   :0.3391   Mean   :0.08205  
##  3rd Qu.:0.5842   3rd Qu.:0.31863   3rd Qu.:0.0000   3rd Qu.:0.00000  
##  Max.   :1.2105   Max.   :0.59121   Max.   :2.8579   Max.   :2.59788  
##                                                                       
##     minfertf        lf7rup        hstructn       geolrngaggn      
##  Min.   :2.00   Min.   :1.00   Min.   : 0.000   Min.   :  0.0115  
##  1st Qu.:2.00   1st Qu.:2.00   1st Qu.: 1.043   1st Qu.:  2.5880  
##  Median :2.00   Median :3.00   Median : 1.043   Median :  2.5880  
##  Mean   :2.18   Mean   :3.21   Mean   : 1.877   Mean   :176.0964  
##  3rd Qu.:2.00   3rd Qu.:4.00   3rd Qu.: 1.043   3rd Qu.:300.0000  
##  Max.   :3.00   Max.   :7.00   Max.   :15.429   Max.   :900.0000  
##                                                                   
##    elevationm       wr_unrn        solpawhcn        slopern        
##  Min.   :443.9   Min.   : 0.00   Min.   : 82.0   Min.   : 0.00214  
##  1st Qu.:558.7   1st Qu.:17.86   1st Qu.: 96.0   1st Qu.: 0.02306  
##  Median :582.6   Median :17.86   Median : 96.0   Median : 0.07592  
##  Mean   :587.3   Mean   :17.20   Mean   :101.6   Mean   : 0.19765  
##  3rd Qu.:619.4   3rd Qu.:17.86   3rd Qu.: 96.0   3rd Qu.: 0.17402  
##  Max.   :723.7   Max.   :72.00   Max.   :157.0   Max.   :10.06944  
##                                                                    
##    MIN_AGE_MA      HubDist         month_collection
##  Min.   :2454   Min.   :  2.087   April    :31     
##  1st Qu.:2454   1st Qu.: 16.862   March    :27     
##  Median :2454   Median : 21.842   May      :24     
##  Mean   :2479   Mean   : 38.612   January  :23     
##  3rd Qu.:2494   3rd Qu.: 76.503   December :21     
##  Max.   :2597   Max.   :104.012   September:19     
##                                   (Other)  :60     
##                                          UNITNAME  
##  Brockman Iron Formation                     :132  
##  Marra Mamba Iron Formation                  : 18  
##  Mount McRae Shale and Mount Sylvia Formation: 34  
##  Wittenoom Formation                         : 21  
##                                                    
##                                                    
##                                                    
##                                      ROCKTYPE1  
##  sedimentary carbonate                    : 21  
##  sedimentary other chemical or biochemical:150  
##  sedimentary siliciclastic                : 34  
##                                                 
##                                                 
##                                                 
##                                                 
##                                         FORMATION  
##  Brockman Iron Formation                     :132  
##  Marra Mamba Iron Formation                  : 18  
##  Mount McRae Shale and Mount Sylvia Formation: 34  
##  Wittenoom Formation                         : 21  
##                                                    
##                                                    
##                                                    
##                                                         HubName   
##  anticline, exposed                                         :122  
##  exposed                                                    : 10  
##  strike-slip, exposed, showing relative dextral displacement:  7  
##  syncline, exposed                                          : 66  
##                                                                   
##                                                                   
##                                                                   
##  true_troglofauna    LATITUDE        LONGITUDE    
##  0: 74            Min.   :-22.61   Min.   :117.0  
##  1:131            1st Qu.:-22.25   1st Qu.:117.6  
##                   Median :-22.16   Median :117.8  
##                   Mean   :-22.22   Mean   :117.7  
##                   3rd Qu.:-22.12   3rd Qu.:117.9  
##                   Max.   :-22.04   Max.   :118.0  
## 
data_train <- data_train[, -which(names(data_train) %in% c("LATITUDE", "LONGITUDE"))]
dim(data_train)
## [1] 205  28
head(data_train)
##      trngcv   bio14    slope rugg500s pil_twim pil_twicv pil_topocv pil_slps
## 173 9.22221 0.11984  2.62358  3.22619  8.90416   0.15090    0.15976  0.80723
## 50  9.83993 0.22232 15.87183 10.08672  7.72970   0.14045    0.02834  7.59460
## 458 9.28573 0.18881 10.57231  5.03119  8.63300   0.08212    0.00000  1.58531
## 383 9.32038 0.13525  1.22187  2.82780  9.56502   0.15810    0.00000  0.72478
## 24  9.40258 0.11255  6.00854  3.17529  8.35731   0.35754    0.00000  4.45767
## 246 9.41757 0.16629  8.93139  5.11192  8.63891   0.23263    0.05131 10.49186
##     pil_slpcv pil_elr3cv   mrvbf mrrtf minfertf lf7rup hstructn geolrngaggn
## 173   0.25382    0.30016 0.00000     0        2      4  1.04286      2.5880
## 50    0.40627    0.25831 0.00000     0        2      1  1.04286      2.5880
## 458   0.33915    0.42026 0.00000     0        2      4  1.04286      2.5880
## 383   0.41914    0.56449 0.85295     0        3      2  1.04286      0.0115
## 24    0.57378    0.26367 0.00000     0        2      2  1.04286      2.5880
## 246   0.90002    0.24122 0.00000     0        2      3  1.04286      2.5880
##     elevationm  wr_unrn solpawhcn slopern MIN_AGE_MA  HubDist month_collection
## 173   530.6564 17.85714        96 0.03171       2454 16.25063        September
## 50    603.8304 17.85714        96 0.01338       2454 23.47565            March
## 458   592.9053 17.85714        96 0.01833       2454 14.07495            April
## 383   561.8668 17.85714        96 0.12433       2454 15.77570          January
## 24    558.6679 17.85714        96 0.06396       2494 22.50256         December
## 246   601.5659 17.85714        96 0.03762       2454 86.82374            March
##                                         UNITNAME
## 173                      Brockman Iron Formation
## 50                       Brockman Iron Formation
## 458                      Brockman Iron Formation
## 383                      Brockman Iron Formation
## 24  Mount McRae Shale and Mount Sylvia Formation
## 246                      Brockman Iron Formation
##                                     ROCKTYPE1
## 173 sedimentary other chemical or biochemical
## 50  sedimentary other chemical or biochemical
## 458 sedimentary other chemical or biochemical
## 383 sedimentary other chemical or biochemical
## 24                  sedimentary siliciclastic
## 246 sedimentary other chemical or biochemical
##                                        FORMATION            HubName
## 173                      Brockman Iron Formation anticline, exposed
## 50                       Brockman Iron Formation anticline, exposed
## 458                      Brockman Iron Formation anticline, exposed
## 383                      Brockman Iron Formation anticline, exposed
## 24  Mount McRae Shale and Mount Sylvia Formation anticline, exposed
## 246                      Brockman Iron Formation  syncline, exposed
##     true_troglofauna
## 173                0
## 50                 1
## 458                0
## 383                1
## 24                 0
## 246                1
summary(data_train)
##      trngcv          bio14             slope            rugg500s       
##  Min.   :8.713   Min.   :0.02988   Min.   : 0.3154   Min.   : 0.02999  
##  1st Qu.:9.222   1st Qu.:0.12881   1st Qu.: 1.7353   1st Qu.: 1.26590  
##  Median :9.320   Median :0.18033   Median : 3.7947   Median : 2.63082  
##  Mean   :9.319   Mean   :0.18687   Mean   : 4.9174   Mean   : 3.12256  
##  3rd Qu.:9.411   3rd Qu.:0.23410   3rd Qu.: 7.2988   3rd Qu.: 4.78618  
##  Max.   :9.972   Max.   :0.38452   Max.   :17.2472   Max.   :10.10635  
##                                                                        
##     pil_twim        pil_twicv         pil_topocv         pil_slps       
##  Min.   : 5.975   Min.   :0.03184   Min.   :0.00000   Min.   : 0.08751  
##  1st Qu.: 8.095   1st Qu.:0.12767   1st Qu.:0.00000   1st Qu.: 1.02469  
##  Median : 9.164   Median :0.16995   Median :0.01947   Median : 2.37349  
##  Mean   : 9.078   Mean   :0.17922   Mean   :0.06160   Mean   : 3.65220  
##  3rd Qu.:10.071   3rd Qu.:0.22963   3rd Qu.:0.12689   3rd Qu.: 5.18772  
##  Max.   :13.544   Max.   :0.36039   Max.   :0.33982   Max.   :14.88655  
##                                                                         
##    pil_slpcv        pil_elr3cv          mrvbf            mrrtf        
##  Min.   :0.1908   Min.   :0.03157   Min.   :0.0000   Min.   :0.00000  
##  1st Qu.:0.3766   1st Qu.:0.14646   1st Qu.:0.0000   1st Qu.:0.00000  
##  Median :0.4692   Median :0.23215   Median :0.0000   Median :0.00000  
##  Mean   :0.4998   Mean   :0.24334   Mean   :0.3391   Mean   :0.08205  
##  3rd Qu.:0.5842   3rd Qu.:0.31863   3rd Qu.:0.0000   3rd Qu.:0.00000  
##  Max.   :1.2105   Max.   :0.59121   Max.   :2.8579   Max.   :2.59788  
##                                                                       
##     minfertf        lf7rup        hstructn       geolrngaggn      
##  Min.   :2.00   Min.   :1.00   Min.   : 0.000   Min.   :  0.0115  
##  1st Qu.:2.00   1st Qu.:2.00   1st Qu.: 1.043   1st Qu.:  2.5880  
##  Median :2.00   Median :3.00   Median : 1.043   Median :  2.5880  
##  Mean   :2.18   Mean   :3.21   Mean   : 1.877   Mean   :176.0964  
##  3rd Qu.:2.00   3rd Qu.:4.00   3rd Qu.: 1.043   3rd Qu.:300.0000  
##  Max.   :3.00   Max.   :7.00   Max.   :15.429   Max.   :900.0000  
##                                                                   
##    elevationm       wr_unrn        solpawhcn        slopern        
##  Min.   :443.9   Min.   : 0.00   Min.   : 82.0   Min.   : 0.00214  
##  1st Qu.:558.7   1st Qu.:17.86   1st Qu.: 96.0   1st Qu.: 0.02306  
##  Median :582.6   Median :17.86   Median : 96.0   Median : 0.07592  
##  Mean   :587.3   Mean   :17.20   Mean   :101.6   Mean   : 0.19765  
##  3rd Qu.:619.4   3rd Qu.:17.86   3rd Qu.: 96.0   3rd Qu.: 0.17402  
##  Max.   :723.7   Max.   :72.00   Max.   :157.0   Max.   :10.06944  
##                                                                    
##    MIN_AGE_MA      HubDist         month_collection
##  Min.   :2454   Min.   :  2.087   April    :31     
##  1st Qu.:2454   1st Qu.: 16.862   March    :27     
##  Median :2454   Median : 21.842   May      :24     
##  Mean   :2479   Mean   : 38.612   January  :23     
##  3rd Qu.:2494   3rd Qu.: 76.503   December :21     
##  Max.   :2597   Max.   :104.012   September:19     
##                                   (Other)  :60     
##                                          UNITNAME  
##  Brockman Iron Formation                     :132  
##  Marra Mamba Iron Formation                  : 18  
##  Mount McRae Shale and Mount Sylvia Formation: 34  
##  Wittenoom Formation                         : 21  
##                                                    
##                                                    
##                                                    
##                                      ROCKTYPE1  
##  sedimentary carbonate                    : 21  
##  sedimentary other chemical or biochemical:150  
##  sedimentary siliciclastic                : 34  
##                                                 
##                                                 
##                                                 
##                                                 
##                                         FORMATION  
##  Brockman Iron Formation                     :132  
##  Marra Mamba Iron Formation                  : 18  
##  Mount McRae Shale and Mount Sylvia Formation: 34  
##  Wittenoom Formation                         : 21  
##                                                    
##                                                    
##                                                    
##                                                         HubName   
##  anticline, exposed                                         :122  
##  exposed                                                    : 10  
##  strike-slip, exposed, showing relative dextral displacement:  7  
##  syncline, exposed                                          : 66  
##                                                                   
##                                                                   
##                                                                   
##  true_troglofauna
##  0: 74           
##  1:131           
##                  
##                  
##                  
##                  
## 
set.seed(78945)

trainIndex1 <- createDataPartition(data_prov$true_troglofauna, p=0.6, list=FALSE)
data_test <- data_prov[ trainIndex1,]
finalTest1 <- data_prov[-trainIndex1,]
summary(data_test)
##      trngcv           bio14             slope            rugg500s      
##  Min.   : 8.779   Min.   :0.02054   Min.   : 0.3373   Min.   : 0.1128  
##  1st Qu.: 9.241   1st Qu.:0.11885   1st Qu.: 2.0234   1st Qu.: 1.4004  
##  Median : 9.300   Median :0.16241   Median : 3.1520   Median : 2.9601  
##  Mean   : 9.326   Mean   :0.16978   Mean   : 4.6339   Mean   : 3.3532  
##  3rd Qu.: 9.402   3rd Qu.:0.20572   3rd Qu.: 6.2128   3rd Qu.: 5.0275  
##  Max.   :10.026   Max.   :0.33206   Max.   :16.3552   Max.   :10.1064  
##                                                                        
##     pil_twim        pil_twicv         pil_topocv         pil_slps      
##  Min.   : 5.944   Min.   :0.03919   Min.   :0.00000   Min.   : 0.1607  
##  1st Qu.: 8.005   1st Qu.:0.12469   1st Qu.:0.00000   1st Qu.: 1.2269  
##  Median : 8.975   Median :0.16440   Median :0.01535   Median : 2.7404  
##  Mean   : 9.050   Mean   :0.17676   Mean   :0.06513   Mean   : 4.0601  
##  3rd Qu.:10.169   3rd Qu.:0.21979   3rd Qu.:0.13731   3rd Qu.: 5.5650  
##  Max.   :11.582   Max.   :0.40758   Max.   :0.35341   Max.   :15.2506  
##                                                                        
##    pil_slpcv        pil_elr3cv          mrvbf            mrrtf        
##  Min.   :0.1719   Min.   :0.05163   Min.   :0.0000   Min.   :0.00000  
##  1st Qu.:0.3834   1st Qu.:0.12926   1st Qu.:0.0000   1st Qu.:0.00000  
##  Median :0.4809   Median :0.22894   Median :0.0000   Median :0.00000  
##  Mean   :0.5469   Mean   :0.25332   Mean   :0.3064   Mean   :0.03911  
##  3rd Qu.:0.6665   3rd Qu.:0.37929   3rd Qu.:0.5784   3rd Qu.:0.00000  
##  Max.   :1.2105   Max.   :0.71240   Max.   :1.9928   Max.   :1.56825  
##                                                                       
##     minfertf         lf7rup         hstructn       geolrngaggn      
##  Min.   :2.000   Min.   :1.000   Min.   : 0.000   Min.   :  0.0115  
##  1st Qu.:2.000   1st Qu.:2.000   1st Qu.: 1.043   1st Qu.:  2.5880  
##  Median :2.000   Median :3.000   Median : 1.043   Median :  2.5880  
##  Mean   :2.236   Mean   :3.341   Mean   : 1.340   Mean   :168.6057  
##  3rd Qu.:2.000   3rd Qu.:4.000   3rd Qu.: 1.043   3rd Qu.: 21.9000  
##  Max.   :3.000   Max.   :7.000   Max.   :15.429   Max.   :900.0000  
##                                                                     
##    elevationm       wr_unrn        solpawhcn        slopern       
##  Min.   :518.3   Min.   : 0.00   Min.   : 82.0   Min.   :0.00162  
##  1st Qu.:560.5   1st Qu.:17.86   1st Qu.: 96.0   1st Qu.:0.02080  
##  Median :576.1   Median :17.86   Median : 96.0   Median :0.06480  
##  Mean   :583.6   Mean   :16.97   Mean   :100.3   Mean   :0.21548  
##  3rd Qu.:607.7   3rd Qu.:17.86   3rd Qu.: 96.0   3rd Qu.:0.18920  
##  Max.   :721.9   Max.   :72.00   Max.   :157.0   Max.   :6.36852  
##                                                                   
##    MIN_AGE_MA      HubDist        month_collection
##  Min.   :2454   Min.   : 1.294   September:18     
##  1st Qu.:2454   1st Qu.:15.756   November :18     
##  Median :2454   Median :21.772   May      :17     
##  Mean   :2471   Mean   :39.947   January  :16     
##  3rd Qu.:2494   3rd Qu.:79.998   March    :14     
##  Max.   :2597   Max.   :91.703   April    :13     
##                                  (Other)  :27     
##                                          UNITNAME 
##  Brockman Iron Formation                     :90  
##  Marra Mamba Iron Formation                  : 7  
##  Mount McRae Shale and Mount Sylvia Formation:22  
##  Wittenoom Formation                         : 4  
##                                                   
##                                                   
##                                                   
##                                      ROCKTYPE1 
##  sedimentary carbonate                    : 4  
##  sedimentary other chemical or biochemical:97  
##  sedimentary siliciclastic                :22  
##                                                
##                                                
##                                                
##                                                
##                                         FORMATION 
##  Brockman Iron Formation                     :90  
##  Marra Mamba Iron Formation                  : 7  
##  Mount McRae Shale and Mount Sylvia Formation:22  
##  Wittenoom Formation                         : 4  
##                                                   
##                                                   
##                                                   
##                                                         HubName  
##  anticline, exposed                                         :70  
##  exposed                                                    : 5  
##  strike-slip, exposed, showing relative dextral displacement: 1  
##  syncline, exposed                                          :47  
##                                                                  
##                                                                  
##                                                                  
##  true_troglofauna    LATITUDE        LONGITUDE    
##  0:44             Min.   :-22.61   Min.   :117.1  
##  1:79             1st Qu.:-22.21   1st Qu.:117.7  
##                   Median :-22.16   Median :117.8  
##                   Mean   :-22.20   Mean   :117.8  
##                   3rd Qu.:-22.13   3rd Qu.:117.9  
##                   Max.   :-22.09   Max.   :118.0  
## 
dim(data_test)
## [1] 123  30
summary(finalTest1)
##      trngcv          bio14             slope            rugg500s     
##  Min.   :8.808   Min.   :0.01801   Min.   : 0.2666   Min.   :0.0407  
##  1st Qu.:9.244   1st Qu.:0.11957   1st Qu.: 2.0234   1st Qu.:1.4870  
##  Median :9.320   Median :0.17200   Median : 3.3626   Median :2.7764  
##  Mean   :9.335   Mean   :0.17983   Mean   : 5.0533   Mean   :3.3959  
##  3rd Qu.:9.410   3rd Qu.:0.22522   3rd Qu.: 7.4120   3rd Qu.:5.2247  
##  Max.   :9.889   Max.   :0.42288   Max.   :15.8156   Max.   :9.9756  
##                                                                      
##     pil_twim        pil_twicv         pil_topocv         pil_slps      
##  Min.   : 5.749   Min.   :0.04324   Min.   :0.00000   Min.   : 0.1344  
##  1st Qu.: 7.738   1st Qu.:0.11417   1st Qu.:0.00000   1st Qu.: 1.2658  
##  Median : 8.587   Median :0.16848   Median :0.02168   Median : 3.9729  
##  Mean   : 8.762   Mean   :0.17586   Mean   :0.07001   Mean   : 4.5940  
##  3rd Qu.: 9.944   3rd Qu.:0.23585   3rd Qu.:0.13900   3rd Qu.: 6.9364  
##  Max.   :11.582   Max.   :0.36698   Max.   :0.32611   Max.   :12.7276  
##                                                                        
##    pil_slpcv        pil_elr3cv         mrvbf            mrrtf        
##  Min.   :0.1797   Min.   :0.0359   Min.   :0.0000   Min.   :0.00000  
##  1st Qu.:0.3642   1st Qu.:0.1489   1st Qu.:0.0000   1st Qu.:0.00000  
##  Median :0.4696   Median :0.2056   Median :0.0000   Median :0.00000  
##  Mean   :0.5129   Mean   :0.2297   Mean   :0.2218   Mean   :0.08481  
##  3rd Qu.:0.6206   3rd Qu.:0.3127   3rd Qu.:0.0000   3rd Qu.:0.00000  
##  Max.   :1.6098   Max.   :0.5645   Max.   :2.8219   Max.   :1.88966  
##                                                                      
##     minfertf         lf7rup         hstructn       geolrngaggn      
##  Min.   :2.000   Min.   :1.000   Min.   : 0.000   Min.   :  0.0115  
##  1st Qu.:2.000   1st Qu.:2.000   1st Qu.: 1.043   1st Qu.:  2.5880  
##  Median :2.000   Median :3.000   Median : 1.043   Median :  2.5880  
##  Mean   :2.235   Mean   :3.099   Mean   : 1.535   Mean   :165.4988  
##  3rd Qu.:2.000   3rd Qu.:4.000   3rd Qu.: 1.043   3rd Qu.:300.0000  
##  Max.   :3.000   Max.   :7.000   Max.   :15.429   Max.   :900.0000  
##                                                                     
##    elevationm       wr_unrn        solpawhcn        slopern       
##  Min.   :515.2   Min.   : 0.00   Min.   : 96.0   Min.   :0.00179  
##  1st Qu.:559.7   1st Qu.:17.86   1st Qu.: 96.0   1st Qu.:0.01995  
##  Median :570.5   Median :17.86   Median : 96.0   Median :0.04257  
##  Mean   :582.6   Mean   :15.42   Mean   :102.1   Mean   :0.24617  
##  3rd Qu.:606.6   3rd Qu.:17.86   3rd Qu.: 96.0   3rd Qu.:0.10689  
##  Max.   :726.1   Max.   :17.86   Max.   :157.0   Max.   :4.28009  
##                                                                   
##    MIN_AGE_MA      HubDist        month_collection
##  Min.   :2454   Min.   : 1.931   September:13     
##  1st Qu.:2454   1st Qu.:16.501   December :12     
##  Median :2454   Median :21.793   January  :11     
##  Mean   :2482   Mean   :40.231   March    : 9     
##  3rd Qu.:2494   3rd Qu.:76.311   May      : 9     
##  Max.   :2597   Max.   :88.221   June     : 7     
##                                  (Other)  :20     
##                                          UNITNAME 
##  Brockman Iron Formation                     :51  
##  Marra Mamba Iron Formation                  :10  
##  Mount McRae Shale and Mount Sylvia Formation:19  
##  Wittenoom Formation                         : 1  
##                                                   
##                                                   
##                                                   
##                                      ROCKTYPE1 
##  sedimentary carbonate                    : 1  
##  sedimentary other chemical or biochemical:61  
##  sedimentary siliciclastic                :19  
##                                                
##                                                
##                                                
##                                                
##                                         FORMATION 
##  Brockman Iron Formation                     :51  
##  Marra Mamba Iron Formation                  :10  
##  Mount McRae Shale and Mount Sylvia Formation:19  
##  Wittenoom Formation                         : 1  
##                                                   
##                                                   
##                                                   
##                                                         HubName  
##  anticline, exposed                                         :43  
##  exposed                                                    : 6  
##  strike-slip, exposed, showing relative dextral displacement: 2  
##  syncline, exposed                                          :30  
##                                                                  
##                                                                  
##                                                                  
##  true_troglofauna    LATITUDE        LONGITUDE    
##  0:29             Min.   :-22.64   Min.   :117.1  
##  1:52             1st Qu.:-22.22   1st Qu.:117.6  
##                   Median :-22.16   Median :117.8  
##                   Mean   :-22.21   Mean   :117.7  
##                   3rd Qu.:-22.12   3rd Qu.:117.9  
##                   Max.   :-22.09   Max.   :118.0  
## 
dim(finalTest1)
## [1] 81 30
data_test <- data_test[, -which(names(data2) %in% c("LATITUDE", "LONGITUDE"))]

finalTest <- finalTest1[, -which(names(data2) %in% c("LATITUDE", "LONGITUDE"))]

sapply(data_test,class);sapply(finalTest1,class)
##           trngcv            bio14            slope         rugg500s 
##        "numeric"        "numeric"        "numeric"        "numeric" 
##         pil_twim        pil_twicv       pil_topocv         pil_slps 
##        "numeric"        "numeric"        "numeric"        "numeric" 
##        pil_slpcv       pil_elr3cv            mrvbf            mrrtf 
##        "numeric"        "numeric"        "numeric"        "numeric" 
##         minfertf           lf7rup         hstructn      geolrngaggn 
##        "numeric"        "numeric"        "numeric"        "numeric" 
##       elevationm          wr_unrn        solpawhcn          slopern 
##        "numeric"        "numeric"        "numeric"        "numeric" 
##       MIN_AGE_MA          HubDist month_collection         UNITNAME 
##        "numeric"        "numeric"         "factor"         "factor" 
##        ROCKTYPE1        FORMATION          HubName true_troglofauna 
##         "factor"         "factor"         "factor"         "factor"
##           trngcv            bio14            slope         rugg500s 
##        "numeric"        "numeric"        "numeric"        "numeric" 
##         pil_twim        pil_twicv       pil_topocv         pil_slps 
##        "numeric"        "numeric"        "numeric"        "numeric" 
##        pil_slpcv       pil_elr3cv            mrvbf            mrrtf 
##        "numeric"        "numeric"        "numeric"        "numeric" 
##         minfertf           lf7rup         hstructn      geolrngaggn 
##        "numeric"        "numeric"        "numeric"        "numeric" 
##       elevationm          wr_unrn        solpawhcn          slopern 
##        "numeric"        "numeric"        "numeric"        "numeric" 
##       MIN_AGE_MA          HubDist month_collection         UNITNAME 
##        "numeric"        "numeric"         "factor"         "factor" 
##        ROCKTYPE1        FORMATION          HubName true_troglofauna 
##         "factor"         "factor"         "factor"         "factor" 
##         LATITUDE        LONGITUDE 
##        "numeric"        "numeric"
head(data_test)
##      trngcv   bio14    slope rugg500s pil_twim pil_twicv pil_topocv pil_slps
## 538 8.94549 0.20508  0.33730  0.38791 10.79223   0.13333    0.00000  0.16074
## 580 9.30815 0.15320  3.35630  6.98694  8.78567   0.28006    0.00000  1.81942
## 449 9.61349 0.28381 16.35521  2.20207  5.94411   0.04918    0.00000  6.81287
## 149 9.41111 0.11746  4.08951  0.66187  7.66015   0.20674    0.00000  2.03130
## 304 9.29545 0.11761  2.02341  3.37463 11.58232   0.19793    0.05766  3.97289
## 15  8.77937 0.29834  1.52777  1.09473  7.32769   0.07640    0.00000  4.35278
##     pil_slpcv pil_elr3cv  mrvbf   mrrtf minfertf lf7rup hstructn geolrngaggn
## 538   0.61696    0.26306 1.5954 0.00000        3      4 15.42857      0.0115
## 580   0.50594    0.53529 0.0000 0.00000        2      3  1.04286      2.5880
## 449   0.25979    0.12502 0.0000 0.00000        2      4  1.04286    900.0000
## 149   0.42033    0.09569 0.0000 0.00000        2      3  1.04286      2.5880
## 304   0.94816    0.42085 0.0000 0.00000        2      4  1.04286      2.5880
## 15    0.60188    0.08611 0.0000 0.91144        2      4  3.77143    300.0000
##     elevationm  wr_unrn solpawhcn slopern MIN_AGE_MA  HubDist month_collection
## 538   593.5888  8.33333       131 6.36852       2506 17.36909             June
## 580   606.9091 17.85714        96 0.62784       2454 82.98079         February
## 449   721.9289 17.85714        96 0.00249       2454 88.50439            April
## 149   561.2512 17.85714        96 0.16245       2494 21.98282            April
## 304   560.4824 17.85714        96 0.27924       2454 81.98112              May
## 15    548.1772 72.00000        82 0.05721       2597 56.67423         November
##                                         UNITNAME
## 538                          Wittenoom Formation
## 580                      Brockman Iron Formation
## 449                      Brockman Iron Formation
## 149 Mount McRae Shale and Mount Sylvia Formation
## 304                      Brockman Iron Formation
## 15                    Marra Mamba Iron Formation
##                                     ROCKTYPE1
## 538                     sedimentary carbonate
## 580 sedimentary other chemical or biochemical
## 449 sedimentary other chemical or biochemical
## 149                 sedimentary siliciclastic
## 304 sedimentary other chemical or biochemical
## 15  sedimentary other chemical or biochemical
##                                        FORMATION            HubName
## 538                          Wittenoom Formation anticline, exposed
## 580                      Brockman Iron Formation  syncline, exposed
## 449                      Brockman Iron Formation  syncline, exposed
## 149 Mount McRae Shale and Mount Sylvia Formation anticline, exposed
## 304                      Brockman Iron Formation  syncline, exposed
## 15                    Marra Mamba Iron Formation anticline, exposed
##     true_troglofauna
## 538                0
## 580                1
## 449                1
## 149                1
## 304                1
## 15                 1
dim(data_test)
## [1] 123  28
summary(data_test)
##      trngcv           bio14             slope            rugg500s      
##  Min.   : 8.779   Min.   :0.02054   Min.   : 0.3373   Min.   : 0.1128  
##  1st Qu.: 9.241   1st Qu.:0.11885   1st Qu.: 2.0234   1st Qu.: 1.4004  
##  Median : 9.300   Median :0.16241   Median : 3.1520   Median : 2.9601  
##  Mean   : 9.326   Mean   :0.16978   Mean   : 4.6339   Mean   : 3.3532  
##  3rd Qu.: 9.402   3rd Qu.:0.20572   3rd Qu.: 6.2128   3rd Qu.: 5.0275  
##  Max.   :10.026   Max.   :0.33206   Max.   :16.3552   Max.   :10.1064  
##                                                                        
##     pil_twim        pil_twicv         pil_topocv         pil_slps      
##  Min.   : 5.944   Min.   :0.03919   Min.   :0.00000   Min.   : 0.1607  
##  1st Qu.: 8.005   1st Qu.:0.12469   1st Qu.:0.00000   1st Qu.: 1.2269  
##  Median : 8.975   Median :0.16440   Median :0.01535   Median : 2.7404  
##  Mean   : 9.050   Mean   :0.17676   Mean   :0.06513   Mean   : 4.0601  
##  3rd Qu.:10.169   3rd Qu.:0.21979   3rd Qu.:0.13731   3rd Qu.: 5.5650  
##  Max.   :11.582   Max.   :0.40758   Max.   :0.35341   Max.   :15.2506  
##                                                                        
##    pil_slpcv        pil_elr3cv          mrvbf            mrrtf        
##  Min.   :0.1719   Min.   :0.05163   Min.   :0.0000   Min.   :0.00000  
##  1st Qu.:0.3834   1st Qu.:0.12926   1st Qu.:0.0000   1st Qu.:0.00000  
##  Median :0.4809   Median :0.22894   Median :0.0000   Median :0.00000  
##  Mean   :0.5469   Mean   :0.25332   Mean   :0.3064   Mean   :0.03911  
##  3rd Qu.:0.6665   3rd Qu.:0.37929   3rd Qu.:0.5784   3rd Qu.:0.00000  
##  Max.   :1.2105   Max.   :0.71240   Max.   :1.9928   Max.   :1.56825  
##                                                                       
##     minfertf         lf7rup         hstructn       geolrngaggn      
##  Min.   :2.000   Min.   :1.000   Min.   : 0.000   Min.   :  0.0115  
##  1st Qu.:2.000   1st Qu.:2.000   1st Qu.: 1.043   1st Qu.:  2.5880  
##  Median :2.000   Median :3.000   Median : 1.043   Median :  2.5880  
##  Mean   :2.236   Mean   :3.341   Mean   : 1.340   Mean   :168.6057  
##  3rd Qu.:2.000   3rd Qu.:4.000   3rd Qu.: 1.043   3rd Qu.: 21.9000  
##  Max.   :3.000   Max.   :7.000   Max.   :15.429   Max.   :900.0000  
##                                                                     
##    elevationm       wr_unrn        solpawhcn        slopern       
##  Min.   :518.3   Min.   : 0.00   Min.   : 82.0   Min.   :0.00162  
##  1st Qu.:560.5   1st Qu.:17.86   1st Qu.: 96.0   1st Qu.:0.02080  
##  Median :576.1   Median :17.86   Median : 96.0   Median :0.06480  
##  Mean   :583.6   Mean   :16.97   Mean   :100.3   Mean   :0.21548  
##  3rd Qu.:607.7   3rd Qu.:17.86   3rd Qu.: 96.0   3rd Qu.:0.18920  
##  Max.   :721.9   Max.   :72.00   Max.   :157.0   Max.   :6.36852  
##                                                                   
##    MIN_AGE_MA      HubDist        month_collection
##  Min.   :2454   Min.   : 1.294   September:18     
##  1st Qu.:2454   1st Qu.:15.756   November :18     
##  Median :2454   Median :21.772   May      :17     
##  Mean   :2471   Mean   :39.947   January  :16     
##  3rd Qu.:2494   3rd Qu.:79.998   March    :14     
##  Max.   :2597   Max.   :91.703   April    :13     
##                                  (Other)  :27     
##                                          UNITNAME 
##  Brockman Iron Formation                     :90  
##  Marra Mamba Iron Formation                  : 7  
##  Mount McRae Shale and Mount Sylvia Formation:22  
##  Wittenoom Formation                         : 4  
##                                                   
##                                                   
##                                                   
##                                      ROCKTYPE1 
##  sedimentary carbonate                    : 4  
##  sedimentary other chemical or biochemical:97  
##  sedimentary siliciclastic                :22  
##                                                
##                                                
##                                                
##                                                
##                                         FORMATION 
##  Brockman Iron Formation                     :90  
##  Marra Mamba Iron Formation                  : 7  
##  Mount McRae Shale and Mount Sylvia Formation:22  
##  Wittenoom Formation                         : 4  
##                                                   
##                                                   
##                                                   
##                                                         HubName  
##  anticline, exposed                                         :70  
##  exposed                                                    : 5  
##  strike-slip, exposed, showing relative dextral displacement: 1  
##  syncline, exposed                                          :47  
##                                                                  
##                                                                  
##                                                                  
##  true_troglofauna
##  0:44            
##  1:79            
##                  
##                  
##                  
##                  
## 
names(data_test)
##  [1] "trngcv"           "bio14"            "slope"            "rugg500s"        
##  [5] "pil_twim"         "pil_twicv"        "pil_topocv"       "pil_slps"        
##  [9] "pil_slpcv"        "pil_elr3cv"       "mrvbf"            "mrrtf"           
## [13] "minfertf"         "lf7rup"           "hstructn"         "geolrngaggn"     
## [17] "elevationm"       "wr_unrn"          "solpawhcn"        "slopern"         
## [21] "MIN_AGE_MA"       "HubDist"          "month_collection" "UNITNAME"        
## [25] "ROCKTYPE1"        "FORMATION"        "HubName"          "true_troglofauna"
head(finalTest)
##      trngcv   bio14    slope rugg500s pil_twim pil_twicv pil_topocv pil_slps
## 95  9.88856 0.25342 10.56614  4.74161  8.49100   0.23228    0.00000  6.65473
## 626 8.80830 0.28342  2.23671  1.25996  7.13560   0.05745    0.00000  2.40606
## 607 9.32483 0.28510  2.18706  0.46594  9.42484   0.23025    0.17897  1.49514
## 623 8.89422 0.31750  3.19405  2.99698  7.67751   0.12228    0.00000  1.90480
## 18  9.33742 0.09946  6.24483  3.65277 10.97026   0.25139    0.00000  4.37832
## 470 9.14680 0.22522  2.94459  7.08095  7.82185   0.23485    0.19927 10.64527
##     pil_slpcv pil_elr3cv mrvbf   mrrtf minfertf lf7rup hstructn geolrngaggn
## 95    0.37579    0.12946     0 0.00000        2      2  1.04286     900.000
## 626   0.47131    0.28907     0 0.00000        2      4  5.14286     300.000
## 607   0.50164    0.16386     0 0.70467        2      3  0.00000     300.000
## 623   0.27703    0.07601     0 0.00000        2      4  5.14286     300.000
## 18    0.79251    0.24706     0 0.00000        2      3  1.04286       2.588
## 470   0.65664    0.24246     0 0.00000        2      4  1.04286     300.000
##     elevationm  wr_unrn solpawhcn slopern MIN_AGE_MA   HubDist month_collection
## 95    627.5781 17.85714        96 0.01311       2454 77.101771            April
## 626   542.7686 10.00000       100 0.04463       2597  7.811622             June
## 607   570.4954  0.00000       157 0.09306       2597 16.627614        September
## 623   566.4947 10.00000       100 0.00955       2597  7.772825         November
## 18    525.2358 17.85714        96 4.24974       2494 21.317005         December
## 470   646.1981 17.85714        96 0.20195       2494 15.040323             June
##                                         UNITNAME
## 95                       Brockman Iron Formation
## 626                   Marra Mamba Iron Formation
## 607                   Marra Mamba Iron Formation
## 623                   Marra Mamba Iron Formation
## 18  Mount McRae Shale and Mount Sylvia Formation
## 470 Mount McRae Shale and Mount Sylvia Formation
##                                     ROCKTYPE1
## 95  sedimentary other chemical or biochemical
## 626 sedimentary other chemical or biochemical
## 607 sedimentary other chemical or biochemical
## 623 sedimentary other chemical or biochemical
## 18                  sedimentary siliciclastic
## 470                 sedimentary siliciclastic
##                                        FORMATION            HubName
## 95                       Brockman Iron Formation  syncline, exposed
## 626                   Marra Mamba Iron Formation            exposed
## 607                   Marra Mamba Iron Formation anticline, exposed
## 623                   Marra Mamba Iron Formation            exposed
## 18  Mount McRae Shale and Mount Sylvia Formation anticline, exposed
## 470 Mount McRae Shale and Mount Sylvia Formation anticline, exposed
##     true_troglofauna
## 95                 1
## 626                1
## 607                1
## 623                1
## 18                 0
## 470                1
dim(finalTest)
## [1] 81 28
summary(finalTest)
##      trngcv          bio14             slope            rugg500s     
##  Min.   :8.808   Min.   :0.01801   Min.   : 0.2666   Min.   :0.0407  
##  1st Qu.:9.244   1st Qu.:0.11957   1st Qu.: 2.0234   1st Qu.:1.4870  
##  Median :9.320   Median :0.17200   Median : 3.3626   Median :2.7764  
##  Mean   :9.335   Mean   :0.17983   Mean   : 5.0533   Mean   :3.3959  
##  3rd Qu.:9.410   3rd Qu.:0.22522   3rd Qu.: 7.4120   3rd Qu.:5.2247  
##  Max.   :9.889   Max.   :0.42288   Max.   :15.8156   Max.   :9.9756  
##                                                                      
##     pil_twim        pil_twicv         pil_topocv         pil_slps      
##  Min.   : 5.749   Min.   :0.04324   Min.   :0.00000   Min.   : 0.1344  
##  1st Qu.: 7.738   1st Qu.:0.11417   1st Qu.:0.00000   1st Qu.: 1.2658  
##  Median : 8.587   Median :0.16848   Median :0.02168   Median : 3.9729  
##  Mean   : 8.762   Mean   :0.17586   Mean   :0.07001   Mean   : 4.5940  
##  3rd Qu.: 9.944   3rd Qu.:0.23585   3rd Qu.:0.13900   3rd Qu.: 6.9364  
##  Max.   :11.582   Max.   :0.36698   Max.   :0.32611   Max.   :12.7276  
##                                                                        
##    pil_slpcv        pil_elr3cv         mrvbf            mrrtf        
##  Min.   :0.1797   Min.   :0.0359   Min.   :0.0000   Min.   :0.00000  
##  1st Qu.:0.3642   1st Qu.:0.1489   1st Qu.:0.0000   1st Qu.:0.00000  
##  Median :0.4696   Median :0.2056   Median :0.0000   Median :0.00000  
##  Mean   :0.5129   Mean   :0.2297   Mean   :0.2218   Mean   :0.08481  
##  3rd Qu.:0.6206   3rd Qu.:0.3127   3rd Qu.:0.0000   3rd Qu.:0.00000  
##  Max.   :1.6098   Max.   :0.5645   Max.   :2.8219   Max.   :1.88966  
##                                                                      
##     minfertf         lf7rup         hstructn       geolrngaggn      
##  Min.   :2.000   Min.   :1.000   Min.   : 0.000   Min.   :  0.0115  
##  1st Qu.:2.000   1st Qu.:2.000   1st Qu.: 1.043   1st Qu.:  2.5880  
##  Median :2.000   Median :3.000   Median : 1.043   Median :  2.5880  
##  Mean   :2.235   Mean   :3.099   Mean   : 1.535   Mean   :165.4988  
##  3rd Qu.:2.000   3rd Qu.:4.000   3rd Qu.: 1.043   3rd Qu.:300.0000  
##  Max.   :3.000   Max.   :7.000   Max.   :15.429   Max.   :900.0000  
##                                                                     
##    elevationm       wr_unrn        solpawhcn        slopern       
##  Min.   :515.2   Min.   : 0.00   Min.   : 96.0   Min.   :0.00179  
##  1st Qu.:559.7   1st Qu.:17.86   1st Qu.: 96.0   1st Qu.:0.01995  
##  Median :570.5   Median :17.86   Median : 96.0   Median :0.04257  
##  Mean   :582.6   Mean   :15.42   Mean   :102.1   Mean   :0.24617  
##  3rd Qu.:606.6   3rd Qu.:17.86   3rd Qu.: 96.0   3rd Qu.:0.10689  
##  Max.   :726.1   Max.   :17.86   Max.   :157.0   Max.   :4.28009  
##                                                                   
##    MIN_AGE_MA      HubDist        month_collection
##  Min.   :2454   Min.   : 1.931   September:13     
##  1st Qu.:2454   1st Qu.:16.501   December :12     
##  Median :2454   Median :21.793   January  :11     
##  Mean   :2482   Mean   :40.231   March    : 9     
##  3rd Qu.:2494   3rd Qu.:76.311   May      : 9     
##  Max.   :2597   Max.   :88.221   June     : 7     
##                                  (Other)  :20     
##                                          UNITNAME 
##  Brockman Iron Formation                     :51  
##  Marra Mamba Iron Formation                  :10  
##  Mount McRae Shale and Mount Sylvia Formation:19  
##  Wittenoom Formation                         : 1  
##                                                   
##                                                   
##                                                   
##                                      ROCKTYPE1 
##  sedimentary carbonate                    : 1  
##  sedimentary other chemical or biochemical:61  
##  sedimentary siliciclastic                :19  
##                                                
##                                                
##                                                
##                                                
##                                         FORMATION 
##  Brockman Iron Formation                     :51  
##  Marra Mamba Iron Formation                  :10  
##  Mount McRae Shale and Mount Sylvia Formation:19  
##  Wittenoom Formation                         : 1  
##                                                   
##                                                   
##                                                   
##                                                         HubName  
##  anticline, exposed                                         :43  
##  exposed                                                    : 6  
##  strike-slip, exposed, showing relative dextral displacement: 2  
##  syncline, exposed                                          :30  
##                                                                  
##                                                                  
##                                                                  
##  true_troglofauna
##  0:29            
##  1:52            
##                  
##                  
##                  
##                  
## 
names(finalTest)
##  [1] "trngcv"           "bio14"            "slope"            "rugg500s"        
##  [5] "pil_twim"         "pil_twicv"        "pil_topocv"       "pil_slps"        
##  [9] "pil_slpcv"        "pil_elr3cv"       "mrvbf"            "mrrtf"           
## [13] "minfertf"         "lf7rup"           "hstructn"         "geolrngaggn"     
## [17] "elevationm"       "wr_unrn"          "solpawhcn"        "slopern"         
## [21] "MIN_AGE_MA"       "HubDist"          "month_collection" "UNITNAME"        
## [25] "ROCKTYPE1"        "FORMATION"        "HubName"          "true_troglofauna"
ContraProva<-data_test$true_troglofauna
summary(ContraProva)
##  0  1 
## 44 79


confusion_matrix


get_confusion_elements <- function(caret_confusion_matrix) {
     tp <- as.numeric(caret_confusion_matrix$table[4])   # true positives
     fn <- as.numeric(caret_confusion_matrix$table[3])   # false negatives
     fp <- as.numeric(caret_confusion_matrix$table[2])   # false positives
     tn <- as.numeric(caret_confusion_matrix$table[1])   # true negatives
     return( c(tp, fp, tn, fn) )
}


calculate_mcc <- function(tp, fp, tn, fn) {
     # calculates Matthews correlation coefficient
     # tp - true positives
     # fp - false positives
     # tn - true negatives
     # fn - false negatives
     mcc <- ((tp * tn) - (fp * fn)) /
            (sqrt( (tp + fp) * (tp + fn)) * sqrt((tn + fp) * (tn + fn)) )
     return(mcc)
}


calculate_mcc1 <- function(caret_confusion_matrix) {
     # calculates Matthews correlation coefficient
     # tp - true positives
     # fp - false positives
     # tn - true negatives
     # fn - false negatives
     mcc <- ((caret_confusion_matrix$table[4] * caret_confusion_matrix$table[1]) - (caret_confusion_matrix$table[2] * caret_confusion_matrix$table[3])) /
            (sqrt( (caret_confusion_matrix$table[4] + caret_confusion_matrix$table[2]) * (caret_confusion_matrix$table[4] + caret_confusion_matrix$table[3])) * sqrt((caret_confusion_matrix$table[1] + caret_confusion_matrix$table[2]) * (caret_confusion_matrix$table[1] + caret_confusion_matrix$table[3])) )
     return(mcc)
}



calculate_F2 <- function(CM_predictions) {
dbF2_11<-((1+2^2)*CM_predictions$byClass["Precision"]*CM_predictions$byClass["Sensitivity"])/(2^2*CM_predictions$byClass["Precision"] + CM_predictions$byClass["Sensitivity"])
dbF2_11<-as.numeric(dbF2_11)
return(dbF2_11)
}
set.seed(78945)

grid <- expand.grid(.mtry=seq(from = 2, to = 26, by = 2))

trControl <- trainControl(
  method = "repeatedcv",   # Resampling method
  repeats = 10,            # Number of repetitions for repeated cross-validation
  number = 5,              # Number of folds in each iteration of cross-validation
  classProbs = TRUE,       # Calculate class probabilities
  savePredictions = "final",  # Save final predictions
  summaryFunction = twoClassSummary  # Function for summarizing results (assumed to be defined elsewhere)
)

rf_mtry <- train(
  make.names(true_troglofauna) ~ .,               # Formula for the model, predicting 'true_troglofauna' based on other columns
  data = data_train,                             # Training data
  method = "rf",                                 # Random Forest method
  strata = data_train$true_troglofauna,          # Stratification based on the target variable
  sampsize = c(min(sum(data_train$true_troglofauna == 0), sum(data_train$true_troglofauna == 1)),
             min(sum(data_train$true_troglofauna == 1), sum(data_train$true_troglofauna == 1))),
  metric = "ROC",                                # Evaluation metric (Receiver Operating Characteristic)
  tuneGrid = grid,
  trControl = trControl,                         # Control parameters for the training process
  importance = TRUE,                             # Calculate variable importance
  ntree = 500                                    # Number of trees in the Random Forest
)


#In the above code, sampsize = rep(sum(training$class == 1), 2) means both the classes will have same frequency.e.g. sampsize = c(100 cases of 0, 100 cases of 1).

rf_mtry
## Random Forest 
## 
## 205 samples
##  27 predictor
##   2 classes: 'X0', 'X1' 
## 
## No pre-processing
## Resampling: Cross-Validated (5 fold, repeated 10 times) 
## Summary of sample sizes: 165, 163, 164, 164, 164, 164, ... 
## Resampling results across tuning parameters:
## 
##   mtry  ROC        Sens       Spec     
##    2    0.7042073  0.3484762  0.9099145
##    4    0.6867728  0.4027619  0.8428490
##    6    0.6809710  0.4026667  0.8321937
##    8    0.6790754  0.4027619  0.8153846
##   10    0.6790832  0.3973333  0.8092877
##   12    0.6815026  0.4149524  0.8100570
##   14    0.6822932  0.4069524  0.8031339
##   16    0.6812318  0.4124762  0.8032194
##   18    0.6809225  0.4080952  0.8054416
##   20    0.6817251  0.4125714  0.8062678
##   22    0.6827418  0.4000952  0.7947578
##   24    0.6824823  0.4162857  0.7986610
##   26    0.6835513  0.4080952  0.7940456
## 
## ROC was used to select the optimal model using the largest value.
## The final value used for the model was mtry = 2.
getTrainPerf(rf_mtry)
##    TrainROC TrainSens TrainSpec method
## 1 0.7042073 0.3484762 0.9099145     rf
summary(rf_mtry)
##                 Length Class      Mode     
## call               8   -none-     call     
## type               1   -none-     character
## predicted        205   factor     numeric  
## err.rate        1500   -none-     numeric  
## confusion          6   -none-     numeric  
## votes            410   matrix     numeric  
## oob.times        205   -none-     numeric  
## classes            2   -none-     character
## importance       176   -none-     numeric  
## importanceSD     132   -none-     numeric  
## localImportance    0   -none-     NULL     
## proximity          0   -none-     NULL     
## ntree              1   -none-     numeric  
## mtry               1   -none-     numeric  
## forest            14   -none-     list     
## y                205   factor     numeric  
## test               0   -none-     NULL     
## inbag              0   -none-     NULL     
## xNames            44   -none-     character
## problemType        1   -none-     character
## tuneValue          1   data.frame list     
## obsLevels          2   -none-     character
## param              4   -none-     list
rf_mtry$bestTune$mtry
## [1] 2
rf_mtry$finalModel
## 
## Call:
##  randomForest(x = x, y = y, ntree = 500, mtry = param$mtry, strata = ..1,      sampsize = ..2, importance = TRUE) 
##                Type of random forest: classification
##                      Number of trees: 500
## No. of variables tried at each split: 2
## 
##         OOB estimate of  error rate: 29.27%
## Confusion matrix:
##    X0  X1 class.error
## X0 25  49  0.66216216
## X1 11 120  0.08396947
rf_mtry$finalModel$confusion
##    X0  X1 class.error
## X0 25  49  0.66216216
## X1 11 120  0.08396947
summary(rf_mtry$pred)
##       mtry   pred      obs             X0               X1        
##  Min.   :2   X0: 376   X0: 740   Min.   :0.0120   Min.   :0.0460  
##  1st Qu.:2   X1:1674   X1:1310   1st Qu.:0.2105   1st Qu.:0.5485  
##  Median :2                       Median :0.3260   Median :0.6740  
##  Mean   :2                       Mean   :0.3467   Mean   :0.6533  
##  3rd Qu.:2                       3rd Qu.:0.4515   3rd Qu.:0.7895  
##  Max.   :2                       Max.   :0.9540   Max.   :0.9880  
##     rowIndex     Resample        
##  Min.   :  1   Length:2050       
##  1st Qu.: 52   Class :character  
##  Median :103   Mode  :character  
##  Mean   :103                     
##  3rd Qu.:154                     
##  Max.   :205
summary(rf_mtry$pred$pred)
##   X0   X1 
##  376 1674
summary(rf_mtry$pred$obs)
##   X0   X1 
##  740 1310
head(rf_mtry$pred$X0,20)
##  [1] 0.144 0.324 0.522 0.486 0.314 0.184 0.652 0.058 0.342 0.478 0.158 0.484
## [13] 0.378 0.118 0.824 0.332 0.282 0.018 0.360 0.362
head(rf_mtry$pred$X1,20)
##  [1] 0.856 0.676 0.478 0.514 0.686 0.816 0.348 0.942 0.658 0.522 0.842 0.516
## [13] 0.622 0.882 0.176 0.668 0.718 0.982 0.640 0.638
summary(data_train$true_troglofauna)
##   0   1 
##  74 131
head(rf_mtry$pred,20)
##    mtry pred obs    X0    X1 rowIndex    Resample
## 1     2   X1  X1 0.144 0.856      165 Fold1.Rep02
## 2     2   X1  X0 0.324 0.676      203 Fold2.Rep02
## 3     2   X0  X0 0.522 0.478      174 Fold1.Rep02
## 4     2   X1  X1 0.486 0.514      133 Fold1.Rep02
## 5     2   X1  X0 0.314 0.686      169 Fold1.Rep02
## 6     2   X1  X1 0.184 0.816      147 Fold1.Rep02
## 7     2   X0  X0 0.652 0.348      163 Fold1.Rep02
## 8     2   X1  X1 0.058 0.942       97 Fold3.Rep02
## 9     2   X1  X0 0.342 0.658      175 Fold1.Rep02
## 10    2   X1  X0 0.478 0.522      106 Fold3.Rep02
## 11    2   X1  X1 0.158 0.842       25 Fold3.Rep02
## 12    2   X1  X1 0.484 0.516        4 Fold1.Rep10
## 13    2   X1  X1 0.378 0.622       54 Fold4.Rep02
## 14    2   X1  X1 0.118 0.882      200 Fold2.Rep02
## 15    2   X0  X0 0.824 0.176       84 Fold3.Rep02
## 16    2   X1  X1 0.332 0.668       87 Fold3.Rep02
## 17    2   X1  X1 0.282 0.718      123 Fold1.Rep02
## 18    2   X1  X1 0.018 0.982        7 Fold1.Rep10
## 19    2   X1  X1 0.360 0.640       26 Fold2.Rep01
## 20    2   X1  X0 0.362 0.638      154 Fold5.Rep09
sapply(rf_mtry$pred, class)
##        mtry        pred         obs          X0          X1    rowIndex 
##   "numeric"    "factor"    "factor"   "numeric"   "numeric"   "integer" 
##    Resample 
## "character"
rf_mtry$results$Sens
##  [1] 0.3484762 0.4027619 0.4026667 0.4027619 0.3973333 0.4149524 0.4069524
##  [8] 0.4124762 0.4080952 0.4125714 0.4000952 0.4162857 0.4080952
rf_mtry$results$SensSD
##  [1] 0.09915507 0.11913152 0.11058203 0.12022581 0.10725093 0.11698311
##  [7] 0.10857198 0.11577380 0.12211690 0.11574530 0.12665304 0.13353480
## [13] 0.12062224
rf_mtry$results$Spec
##  [1] 0.9099145 0.8428490 0.8321937 0.8153846 0.8092877 0.8100570 0.8031339
##  [8] 0.8032194 0.8054416 0.8062678 0.7947578 0.7986610 0.7940456
rf_mtry$results$SpecSD
##  [1] 0.05863530 0.06689955 0.06557887 0.07675331 0.07793853 0.06927836
##  [7] 0.08024981 0.07738255 0.06959058 0.07771896 0.07938952 0.06657823
## [13] 0.07312943
rf_mtry$results$ROC
##  [1] 0.7042073 0.6867728 0.6809710 0.6790754 0.6790832 0.6815026 0.6822932
##  [8] 0.6812318 0.6809225 0.6817251 0.6827418 0.6824823 0.6835513
rf_mtry$results$ROCSD
##  [1] 0.08257257 0.09245001 0.09116581 0.09309791 0.09275517 0.09224488
##  [7] 0.09542614 0.09565111 0.09641345 0.09529136 0.09392111 0.09614994
## [13] 0.09635085
rf_mtry$finalModel$confusion
##    X0  X1 class.error
## X0 25  49  0.66216216
## X1 11 120  0.08396947
rf_mtry$finalModel$forest$cutoff
## [1] 0.5 0.5
result<-data.frame(Sens=rf_mtry$results$Sens,SensSD= rf_mtry$results$SensSD,Spec=rf_mtry$results$Spec,SpecSD= rf_mtry$results$SpecSD, ROC=rf_mtry$results$ROC,ROCSD= rf_mtry$results$ROCSD)
result
##         Sens     SensSD      Spec     SpecSD       ROC      ROCSD
## 1  0.3484762 0.09915507 0.9099145 0.05863530 0.7042073 0.08257257
## 2  0.4027619 0.11913152 0.8428490 0.06689955 0.6867728 0.09245001
## 3  0.4026667 0.11058203 0.8321937 0.06557887 0.6809710 0.09116581
## 4  0.4027619 0.12022581 0.8153846 0.07675331 0.6790754 0.09309791
## 5  0.3973333 0.10725093 0.8092877 0.07793853 0.6790832 0.09275517
## 6  0.4149524 0.11698311 0.8100570 0.06927836 0.6815026 0.09224488
## 7  0.4069524 0.10857198 0.8031339 0.08024981 0.6822932 0.09542614
## 8  0.4124762 0.11577380 0.8032194 0.07738255 0.6812318 0.09565111
## 9  0.4080952 0.12211690 0.8054416 0.06959058 0.6809225 0.09641345
## 10 0.4125714 0.11574530 0.8062678 0.07771896 0.6817251 0.09529136
## 11 0.4000952 0.12665304 0.7947578 0.07938952 0.6827418 0.09392111
## 12 0.4162857 0.13353480 0.7986610 0.06657823 0.6824823 0.09614994
## 13 0.4080952 0.12062224 0.7940456 0.07312943 0.6835513 0.09635085
rf_mtry$finalModel$forest$cutoff
## [1] 0.5 0.5
V_Imp_rf_mtry <- varImp(rf_mtry, scale = FALSE)
V_Imp_rf_mtry
## rf variable importance
## 
##   only 20 most important variables shown (out of 44)
## 
##                             Importance
## month_collectionMay              7.524
## month_collectionDecember         5.275
## month_collectionSeptember        4.272
## elevationm                       4.088
## month_collectionApril            3.641
## trngcv                           3.495
## HubDist                          3.431
## mrrtf                            3.402
## month_collectionOctober          3.353
## month_collectionFebruary         3.296
## rugg500s                         3.048
## month_collectionMarch            2.851
## slope                            2.848
## hstructn                         2.796
## slopern                          2.788
## UNITNAMEWittenoom Formation      2.653
## bio14                            2.649
## pil_slps                         2.638
## wr_unrn                          2.562
## month_collectionJune             2.325
plot(V_Imp_rf_mtry, main="Variable Importance - Area_01")

V_Imp_rf_mtry <- varImp(rf_mtry, scale = TRUE)
V_Imp_rf_mtry
## rf variable importance
## 
##   only 20 most important variables shown (out of 44)
## 
##                             Importance
## month_collectionMay             100.00
## month_collectionDecember         72.70
## month_collectionSeptember        60.53
## elevationm                       58.30
## month_collectionApril            52.88
## trngcv                           51.10
## HubDist                          50.33
## mrrtf                            49.98
## month_collectionOctober          49.38
## month_collectionFebruary         48.70
## rugg500s                         45.68
## month_collectionMarch            43.29
## slope                            43.26
## hstructn                         42.62
## slopern                          42.52
## UNITNAMEWittenoom Formation      40.89
## bio14                            40.85
## pil_slps                         40.70
## wr_unrn                          39.78
## month_collectionJune             36.91
plot(V_Imp_rf_mtry, main="Variable Importance - Area_01")

PDP

library(pdp)
## Warning: package 'pdp' was built under R version 4.3.3
name2 <- c("1", "2", "3", "4", "5", "6", "7", "8", "9", "10", "11", "12")

pd_month_collection <- partial(rf_mtry, pred.var = "month_collection", quantiles = FALSE, prob = TRUE, which.class = "X1", grid.resolution = 100)

# Create the plot
plot(pd_month_collection, 
     main = "Area 1 - Month of collection", 
     ylim = c(0.0, 1.0), 
     names = name2, 
     cex.lab = 1.4,     # Font size for axis titles
     cex.axis = 1.9,    # Font size for axis numbers
     cex.main = 1.9,    # Font size for the title
     cex = 1.6,         # Font size for any other text
     ylab = "Predicted outcome (yhat)"  # Set the y-axis title
)


# Add text annotations
text(1.5, 0.1, 
     "1 - Jan\n2 - Feb\n3 - Mar\n4 - Apr\n5 - May\n6 - Jun", 
     cex = 1.4, 
     adj = c(0, 0))

text(3.5, 0.1, 
     "7 - Jul\n8 - Aug\n9 - Sept\n10 - Oct\n11 - Nov\n12 - Dec", 
     cex = 1.4, 
     adj = c(0, 0))

# Define names for the categories in UNITNAME
unitname_labels <- c("Brockman Iron Formation", 
                     "Wittenoom Formation", 
                     "Mount McRae Shale and Mount Sylvia Formation", 
                     "Marra Mamba Iron Formation")

# Compute partial dependence for UNITNAME
pd_unitname <- partial(rf_mtry, pred.var = "UNITNAME", quantiles = FALSE, prob = TRUE, which.class = "X1", grid.resolution = 100)

# Create the plot
plot(pd_unitname, 
     main = "Partial Dependence of UNITNAME", 
     ylim = c(0.0, 1.0), 
     names = unitname_labels, 
     cex.lab = 1,     # Font size for axis titles
     cex.axis = 1,    # Font size for axis numbers
     cex.main = 1.9,    # Font size for the title
     cex = 1.6,         # Font size for any other text
     ylab = "Predicted Outcome (yhat)",  # Set the y-axis title
     las = 2            # Rotate x-axis labels for better readability
)

# Add text annotations if needed (example for grouping units)
text(1.5, 0.1, 
     "Brockman Iron Formation\nWittenoom Formation", 
     cex = 1.4, 
     adj = c(0, 0))

text(3.5, 0.1, 
     "Mount McRae Shale and Mount Sylvia Formation\nMarra Mamba Iron Formation", 
     cex = 1.4, 
     adj = c(0, 0))

pd_hstructn <- partial(rf_mtry, pred.var = "hstructn",type = "classification",prob=TRUE, which.class="X1",grid.resolution=100)
plot(pd_hstructn, main = "Area_01 - Hydrological scores for grades of pedality", ylab = "Predicted outcome (yhat)",
     cex.lab = 1.9,     # Font size for axis titles
     cex.axis = 1.9,    # Font size for axis numbers
     cex.main = 1.9,    # Font size for the title
     cex = 1.2)         # Font size for any other text

pd_wr_unrn <- partial(rf_mtry, pred.var = "wr_unrn",type = "classification",prob=TRUE, which.class="X1",grid.resolution=100)
plot(pd_wr_unrn, main = "Area_01 - Proportion of soil with unreliable water retention properties", ylab = "Predicted outcome (yhat)",
     cex.lab = 1.9,     # Font size for axis titles
     cex.axis = 1.9,    # Font size for axis numbers
     cex.main = 1.9,    # Font size for the title
     cex = 1.2)         # Font size for any other text

pd_elevationm <- partial(rf_mtry, pred.var = "elevationm",type = "classification",prob=TRUE, which.class="X1",grid.resolution=100)
plot(pd_elevationm, main = "Area_01 - Mean elevation", ylab = "Predicted outcome (yhat)",
     cex.lab = 1.9,     # Font size for axis titles
     cex.axis = 1.9,    # Font size for axis numbers
     cex.main = 1.9,    # Font size for the title
     cex = 1.2)         # Font size for any other text

# Histogram of elevationm
hist(data2$elevationm, 
     main = "Histogram of elevationm", 
     xlab = "elevationm", 
     border = "black",
     cex.lab = 1.9,     # Font size for axis titles
     cex.axis = 1.9,    # Font size for axis numbers
     cex.main = 1.9,    # Font size for the title
     cex = 1.9,         # Font size for any other text
     col = "darkgoldenrod2") # Set color for histogram bars

pd_trngcv <- partial(rf_mtry, pred.var = "trngcv",type = "classification",prob=TRUE, which.class="X1",grid.resolution=100)
 plot(pd_trngcv, main = "Area_01 - C of V of monthly diurnal temperature range (index)", ylab = "Predicted outcome (yhat)",
     cex.lab = 1.9,     # Font size for axis titles
     cex.axis = 1.9,    # Font size for axis numbers
     cex.main = 1.9,    # Font size for the title
     cex = 1.9)         # Font size for any other text

# Histogram of trngcv
hist(data2$trngcv, 
     main = "Histogram of trngcv", 
     xlab = "trngcv", 
     border = "black",
     cex.lab = 1.9,     # Font size for axis titles
     cex.axis = 1.9,    # Font size for axis numbers
     cex.main = 1.9,    # Font size for the title
     cex = 1.9,         # Font size for any other text
     col = "darkgoldenrod2") # Set color for histogram bars

pd_HubDist <- partial(rf_mtry, pred.var = "HubDist",type = "classification",prob=TRUE, which.class="X1",grid.resolution=100)
plot(pd_HubDist, main = "Area_01 - Minimum distance to a linear structure in km", ylab = "Predicted outcome (yhat)",
          cex.lab = 1.9,     # Font size for axis titles
     cex.axis = 1.9,    # Font size for axis numbers
     cex.main = 1.9,    # Font size for the title
     cex = 1.9)         # Font size for any other text)

#Gráfico com contorno. Para centralizar o título use theme()
#https://stackoverflow.com/questions/60678369/center-allign-the-title-in-autoplot
plot.pd_HubDist <- autoplot(pd_HubDist, main = "Area_01 - HubDist", contour = TRUE)
plot.pd_HubDist+theme(plot.title = element_text(hjust = 0.5))

# Histogram of HubDist
hist(data2$HubDist, 
     main = "Histogram of HubDist", 
     xlab = "HubDist", 
     border = "black",
     cex.lab = 1.9,     # Font size for axis titles
     cex.axis = 1.9,    # Font size for axis numbers
     cex.main = 1.9,    # Font size for the title
     cex = 1.9,         # Font size for any other text
     col = "darkgoldenrod2") # Set color for histogram bars

pd_mrrtf <- partial(rf_mtry, pred.var = "mrrtf",type = "classification",prob=TRUE, which.class="X1",grid.resolution=100)
plot(pd_mrrtf, main = "Area_01 - Multi-resolution ridgetop flatness index", ylab = "Predicted outcome (yhat)", 
          cex.lab = 1.9,     # Font size for axis titles
     cex.axis = 1.9,    # Font size for axis numbers
     cex.main = 1.9,    # Font size for the title
     cex = 1.9)         # Font size for any other text

# Histogram of mrrtf
hist(data2$mrrtf, 
     main = "Histogram of mrrtf", 
     xlab = "mrrtf", 
     border = "black",
     cex.lab = 1.9,     # Font size for axis titles
     cex.axis = 1.9,    # Font size for axis numbers
     cex.main = 1.9,    # Font size for the title
     cex = 1.9,         # Font size for any other text
     col = "darkgoldenrod2") # Set color for histogram bars

pd_rugg500s <- partial(rf_mtry, pred.var = "rugg500s",type = "classification",prob=TRUE, which.class="X1",grid.resolution=100)
plot(pd_rugg500s, main = "Area_01 - Standard deviation of terrain ruggedness", ylab = "Predicted outcome (yhat)",
          cex.lab = 1.9,     # Font size for axis titles
     cex.axis = 1.9,    # Font size for axis numbers
     cex.main = 1.9,    # Font size for the title
     cex = 1.9)         # Font size for any other text

# Histogram of rugg500s
hist(data2$rugg500s, 
     main = "Histogram of rugg500s", 
     xlab = "rugg500s", 
     border = "black",
     cex.lab = 1.9,     # Font size for axis titles
     cex.axis = 1.9,    # Font size for axis numbers
     cex.main = 1.9,    # Font size for the title
     cex = 1.9,         # Font size for any other text
     col = "darkgoldenrod2") # Set color for histogram bars

Export

# Export to PDF
pdf("vara_1.pdf", width = 14, height = 20) # Set appropriate dimensions

# Set up a 5x2 layout
par(mfrow = c(5, 2),     # 5 rows and 2 columns
    mar = c(4.5, 4.5, 2, 1), # Margins for each plot (bottom, left, top, right)
    oma = c(2, 2, 2, 2)) # Outer margins

# 1. elevationm
plot(pd_elevationm, main = "Area 1 - Mean elevation", ylab = "Predicted outcome (yhat)",
     cex.lab = 1.9, cex.axis = 1.9, cex.main = 1.9, cex = 1.2)
rug(data_train$elevationm)
hist(data2$elevationm, 
     main = "Histogram of elevationm", xlab = "elevationm", 
     border = "black", cex.lab = 1.9, cex.axis = 1.9, cex.main = 1.9, cex = 1.9, col = "darkgoldenrod2")
rug(data_train$elevationm)

# 2. trngcv
plot(pd_trngcv, main = "Area 1 - C of V of monthly diurnal temperature range (index)", ylab = "Predicted outcome (yhat)",
     cex.lab = 1.9, cex.axis = 1.9, cex.main = 1.9, cex = 1.9)
rug(data_train$trngcv)
hist(data2$trngcv, 
     main = "Histogram of trngcv", xlab = "trngcv", 
     border = "black", cex.lab = 1.9, cex.axis = 1.9, cex.main = 1.9, cex = 1.9, col = "darkgoldenrod2")
rug(data_train$trngcv)

# 3. HubDist
plot(pd_HubDist, main = "Area 1 - Distance to the nearest linear structure in km", ylab = "Predicted outcome (yhat)",
     cex.lab = 1.9, cex.axis = 1.9, cex.main = 1.9, cex = 1.9)
rug(data_train$HubDist)
hist(data2$HubDist, 
     main = "Histogram of HubDist", xlab = "HubDist", 
     border = "black", cex.lab = 1.9, cex.axis = 1.9, cex.main = 1.9, cex = 1.9, col = "darkgoldenrod2")
rug(data_train$HubDist)

# 4. mrrtf
plot(pd_mrrtf, main = "Area 1 - Multi-resolution ridgetop flatness index", ylab = "Predicted outcome (yhat)", 
     cex.lab = 1.9, cex.axis = 1.9, cex.main = 1.9, cex = 1.9)
rug(data_train$mrrtf)
hist(data2$mrrtf, 
     main = "Histogram of mrrtf", xlab = "mrrtf", 
     border = "black", cex.lab = 1.9, cex.axis = 1.9, cex.main = 1.9, cex = 1.9, col = "darkgoldenrod2")
rug(data_train$mrrtf)

# 5. rugg500s
plot(pd_rugg500s, main = "Area 1 - Standard deviation of terrain ruggedness", ylab = "Predicted outcome (yhat)",
     cex.lab = 1.9, cex.axis = 1.9, cex.main = 1.9, cex = 1.9)
rug(data_train$rugg500s)
hist(data2$rugg500s, 
     main = "Histogram of rugg500s", xlab = "rugg500s", 
     border = "black", cex.lab = 1.9, cex.axis = 1.9, cex.main = 1.9, cex = 1.9, col = "darkgoldenrod2")
rug(data_train$rugg500s)

# Close the PDF device
dev.off()
## png 
##   2
dim(data_test)
## [1] 123  28
colnames(data_test)
##  [1] "trngcv"           "bio14"            "slope"            "rugg500s"        
##  [5] "pil_twim"         "pil_twicv"        "pil_topocv"       "pil_slps"        
##  [9] "pil_slpcv"        "pil_elr3cv"       "mrvbf"            "mrrtf"           
## [13] "minfertf"         "lf7rup"           "hstructn"         "geolrngaggn"     
## [17] "elevationm"       "wr_unrn"          "solpawhcn"        "slopern"         
## [21] "MIN_AGE_MA"       "HubDist"          "month_collection" "UNITNAME"        
## [25] "ROCKTYPE1"        "FORMATION"        "HubName"          "true_troglofauna"
selectcol_data_test <- data_test[, -c((ncol(data_test) - 0):ncol(data_test))]
dim(selectcol_data_test)
## [1] 123  27
names(selectcol_data_test)
##  [1] "trngcv"           "bio14"            "slope"            "rugg500s"        
##  [5] "pil_twim"         "pil_twicv"        "pil_topocv"       "pil_slps"        
##  [9] "pil_slpcv"        "pil_elr3cv"       "mrvbf"            "mrrtf"           
## [13] "minfertf"         "lf7rup"           "hstructn"         "geolrngaggn"     
## [17] "elevationm"       "wr_unrn"          "solpawhcn"        "slopern"         
## [21] "MIN_AGE_MA"       "HubDist"          "month_collection" "UNITNAME"        
## [25] "ROCKTYPE1"        "FORMATION"        "HubName"
set.seed(78945)

predictions <- predict(rf_mtry, newdata = selectcol_data_test,type = "prob")
head(predictions,5)
##        X0    X1
## 538 0.634 0.366
## 580 0.290 0.710
## 449 0.552 0.448
## 149 0.086 0.914
## 304 0.104 0.896
dim(predictions)
## [1] 123   2
sapply(predictions,class)
##        X0        X1 
## "numeric" "numeric"
summary(predictions)
##        X0               X1        
##  Min.   :0.0440   Min.   :0.2060  
##  1st Qu.:0.1740   1st Qu.:0.5420  
##  Median :0.2960   Median :0.7040  
##  Mean   :0.3257   Mean   :0.6743  
##  3rd Qu.:0.4580   3rd Qu.:0.8260  
##  Max.   :0.7940   Max.   :0.9560
set.seed(78945)
predictions_raw <- predict(rf_mtry, newdata = selectcol_data_test,type = "raw")#The number/class predictions ("raw"). 
head(predictions_raw,5)
## [1] X0 X1 X0 X1 X1
## Levels: X0 X1
length(predictions_raw)
## [1] 123
head(sapply(predictions_raw,class))
## [1] "factor" "factor" "factor" "factor" "factor" "factor"
summary(predictions_raw)
##  X0  X1 
##  22 101
set.seed(78945)

predictions1 <- predict(rf_mtry, newdata = selectcol_data_test)
head(predictions1,5)
## [1] X0 X1 X0 X1 X1
## Levels: X0 X1
length(predictions1)
## [1] 123
head(sapply(predictions1,class))
## [1] "factor" "factor" "factor" "factor" "factor" "factor"
summary(predictions1)
##  X0  X1 
##  22 101
levels(predictions1) <- c(0,1)
head(predictions1,5)
## [1] 0 1 0 1 1
## Levels: 0 1
summary(predictions1)
##   0   1 
##  22 101
set.seed(78945)
CM_predictions1<-confusionMatrix(predictions1, ContraProva,positive="1")
CM_predictions1
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction  0  1
##          0 16  6
##          1 28 73
##                                           
##                Accuracy : 0.7236          
##                  95% CI : (0.6357, 0.8004)
##     No Information Rate : 0.6423          
##     P-Value [Acc > NIR] : 0.0350973       
##                                           
##                   Kappa : 0.3235          
##                                           
##  Mcnemar's Test P-Value : 0.0003164       
##                                           
##             Sensitivity : 0.9241          
##             Specificity : 0.3636          
##          Pos Pred Value : 0.7228          
##          Neg Pred Value : 0.7273          
##              Prevalence : 0.6423          
##          Detection Rate : 0.5935          
##    Detection Prevalence : 0.8211          
##       Balanced Accuracy : 0.6438          
##                                           
##        'Positive' Class : 1               
## 
str(CM_predictions1)
## List of 6
##  $ positive: chr "1"
##  $ table   : 'table' int [1:2, 1:2] 16 28 6 73
##   ..- attr(*, "dimnames")=List of 2
##   .. ..$ Prediction: chr [1:2] "0" "1"
##   .. ..$ Reference : chr [1:2] "0" "1"
##  $ overall : Named num [1:7] 0.724 0.324 0.636 0.8 0.642 ...
##   ..- attr(*, "names")= chr [1:7] "Accuracy" "Kappa" "AccuracyLower" "AccuracyUpper" ...
##  $ byClass : Named num [1:11] 0.924 0.364 0.723 0.727 0.723 ...
##   ..- attr(*, "names")= chr [1:11] "Sensitivity" "Specificity" "Pos Pred Value" "Neg Pred Value" ...
##  $ mode    : chr "sens_spec"
##  $ dots    : list()
##  - attr(*, "class")= chr "confusionMatrix"
CM_predictions1$byClass[1]#Sensitivity de CM_predictions1
## Sensitivity 
##   0.9240506
CM_predictions1$byClass[2]#Specificity de CM_predictions1
## Specificity 
##   0.3636364
CM_predictions1$byClass
##          Sensitivity          Specificity       Pos Pred Value 
##            0.9240506            0.3636364            0.7227723 
##       Neg Pred Value            Precision               Recall 
##            0.7272727            0.7227723            0.9240506 
##                   F1           Prevalence       Detection Rate 
##            0.8111111            0.6422764            0.5934959 
## Detection Prevalence    Balanced Accuracy 
##            0.8211382            0.6438435
CM_predictions1$byClass["Sensitivity"]
## Sensitivity 
##   0.9240506
CM_predictions1$byClass[1]
## Sensitivity 
##   0.9240506
CM_predictions1$byClass["Balanced Accuracy"]
## Balanced Accuracy 
##         0.6438435
CM_predictions1$byClass[11]
## Balanced Accuracy 
##         0.6438435
CM_predictions1$overall
##       Accuracy          Kappa  AccuracyLower  AccuracyUpper   AccuracyNull 
##   0.7235772358   0.3235198965   0.6356994386   0.8003713114   0.6422764228 
## AccuracyPValue  McnemarPValue 
##   0.0350972745   0.0003164226
CM_predictions1$overall["McnemarPValue"]
## McnemarPValue 
##  0.0003164226
CM_predictions1$overall[7]
## McnemarPValue 
##  0.0003164226
CM_predictions1$table
##           Reference
## Prediction  0  1
##          0 16  6
##          1 28 73
tn<-CM_predictions1$table[1]#TN
fp<-CM_predictions1$table[2]#FP
fn<-CM_predictions1$table[3]#FN
tp<-CM_predictions1$table[4]#TP

#Youden's J statistic
J_CM_predictions1<-(CM_predictions1$byClass[1] + CM_predictions1$byClass[2]) - 1
J_CM_predictions1
## Sensitivity 
##    0.287687
J_CM_predictions1<-as.numeric(CM_predictions1$byClass[1] + CM_predictions1$byClass[2]) - 1
J_CM_predictions1
## [1] 0.287687
rf_mtry$finalModel$forest$cutoff
## [1] 0.5 0.5
mcc(predictions1, ContraProva)
## [1] 0.3598223
get_confusion_elements(CM_predictions1)
## [1] 73 28 16  6
get_confusion_elements(CM_predictions1)[1]
## [1] 73
calculate_mcc(tp, fp, tn, fn)
## [1] 0.3598223
calculate_mcc1(CM_predictions1)
## [1] 0.3598223
calculate_F2(CM_predictions1)
## [1] 0.8752998
model_pred_class <- ifelse(predictions < 0.5, "X0", "X1")
head(model_pred_class,5)
##     X0   X1  
## 538 "X1" "X0"
## 580 "X0" "X1"
## 449 "X1" "X0"
## 149 "X0" "X1"
## 304 "X0" "X1"
dim(model_pred_class)
## [1] 123   2
length(ContraProva)
## [1] 123
summary(model_pred_class)
##       X0                 X1           
##  Length:123         Length:123        
##  Class :character   Class :character  
##  Mode  :character   Mode  :character
Test1<-as.factor(model_pred_class[,2])
head(Test1)
## 538 580 449 149 304  15 
##  X0  X1  X0  X1  X1  X1 
## Levels: X0 X1
summary(Test1)
##  X0  X1 
##  22 101
levels(Test1) <- c(0,1)
summary(Test1)
##   0   1 
##  22 101
head(Test1)
## 538 580 449 149 304  15 
##   0   1   0   1   1   1 
## Levels: 0 1
confusionMatrix(Test1, ContraProva,positive="1")
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction  0  1
##          0 16  6
##          1 28 73
##                                           
##                Accuracy : 0.7236          
##                  95% CI : (0.6357, 0.8004)
##     No Information Rate : 0.6423          
##     P-Value [Acc > NIR] : 0.0350973       
##                                           
##                   Kappa : 0.3235          
##                                           
##  Mcnemar's Test P-Value : 0.0003164       
##                                           
##             Sensitivity : 0.9241          
##             Specificity : 0.3636          
##          Pos Pred Value : 0.7228          
##          Neg Pred Value : 0.7273          
##              Prevalence : 0.6423          
##          Detection Rate : 0.5935          
##    Detection Prevalence : 0.8211          
##       Balanced Accuracy : 0.6438          
##                                           
##        'Positive' Class : 1               
## 
summary(Test1); summary(predictions1)
##   0   1 
##  22 101
##   0   1 
##  22 101
model_pred_class <- ifelse(predictions < 0.7, "X0", "X1")

head(model_pred_class,20)
##     X0   X1  
## 538 "X0" "X0"
## 580 "X0" "X1"
## 449 "X0" "X0"
## 149 "X0" "X1"
## 304 "X0" "X1"
## 15  "X0" "X1"
## 104 "X0" "X1"
## 394 "X0" "X0"
## 392 "X0" "X1"
## 503 "X0" "X1"
## 99  "X0" "X1"
## 198 "X0" "X1"
## 629 "X0" "X1"
## 505 "X0" "X1"
## 442 "X1" "X0"
## 32  "X0" "X0"
## 195 "X0" "X0"
## 30  "X0" "X1"
## 332 "X0" "X1"
## 327 "X0" "X1"
dim(model_pred_class)
## [1] 123   2
length(ContraProva)
## [1] 123
head(sapply(model_pred_class, class))
##          X0          X0          X0          X0          X0          X0 
## "character" "character" "character" "character" "character" "character"
Test11<-as.factor(model_pred_class[,2])
summary(Test11)
## X0 X1 
## 59 64
head(Test11)
## 538 580 449 149 304  15 
##  X0  X1  X0  X1  X1  X1 
## Levels: X0 X1
levels(Test11) <- c(0,1)
summary(Test11)
##  0  1 
## 59 64
summary(predictions1)
##   0   1 
##  22 101
CM_Test11<-confusionMatrix(Test11, ContraProva,positive="1")
CM_Test11
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction  0  1
##          0 30 29
##          1 14 50
##                                           
##                Accuracy : 0.6504          
##                  95% CI : (0.5592, 0.7342)
##     No Information Rate : 0.6423          
##     P-Value [Acc > NIR] : 0.46609         
##                                           
##                   Kappa : 0.2926          
##                                           
##  Mcnemar's Test P-Value : 0.03276         
##                                           
##             Sensitivity : 0.6329          
##             Specificity : 0.6818          
##          Pos Pred Value : 0.7812          
##          Neg Pred Value : 0.5085          
##              Prevalence : 0.6423          
##          Detection Rate : 0.4065          
##    Detection Prevalence : 0.5203          
##       Balanced Accuracy : 0.6574          
##                                           
##        'Positive' Class : 1               
## 
str(CM_Test11)
## List of 6
##  $ positive: chr "1"
##  $ table   : 'table' int [1:2, 1:2] 30 14 29 50
##   ..- attr(*, "dimnames")=List of 2
##   .. ..$ Prediction: chr [1:2] "0" "1"
##   .. ..$ Reference : chr [1:2] "0" "1"
##  $ overall : Named num [1:7] 0.65 0.293 0.559 0.734 0.642 ...
##   ..- attr(*, "names")= chr [1:7] "Accuracy" "Kappa" "AccuracyLower" "AccuracyUpper" ...
##  $ byClass : Named num [1:11] 0.633 0.682 0.781 0.508 0.781 ...
##   ..- attr(*, "names")= chr [1:11] "Sensitivity" "Specificity" "Pos Pred Value" "Neg Pred Value" ...
##  $ mode    : chr "sens_spec"
##  $ dots    : list()
##  - attr(*, "class")= chr "confusionMatrix"
CM_Test11$byClass[1]#Sensitivity de CM_predictions1
## Sensitivity 
##   0.6329114
CM_Test11$byClass[2]#Specificity de CM_predictions1
## Specificity 
##   0.6818182
J_CM_Test11<-as.numeric(CM_Test11$byClass[1] + CM_Test11$byClass[2] - 1) ; J_CM_Test11
## [1] 0.3147296
confusionMatrix(predictions1, ContraProva,positive="1")
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction  0  1
##          0 16  6
##          1 28 73
##                                           
##                Accuracy : 0.7236          
##                  95% CI : (0.6357, 0.8004)
##     No Information Rate : 0.6423          
##     P-Value [Acc > NIR] : 0.0350973       
##                                           
##                   Kappa : 0.3235          
##                                           
##  Mcnemar's Test P-Value : 0.0003164       
##                                           
##             Sensitivity : 0.9241          
##             Specificity : 0.3636          
##          Pos Pred Value : 0.7228          
##          Neg Pred Value : 0.7273          
##              Prevalence : 0.6423          
##          Detection Rate : 0.5935          
##    Detection Prevalence : 0.8211          
##       Balanced Accuracy : 0.6438          
##                                           
##        'Positive' Class : 1               
## 
dim(finalTest)
## [1] 81 28
names(finalTest)
##  [1] "trngcv"           "bio14"            "slope"            "rugg500s"        
##  [5] "pil_twim"         "pil_twicv"        "pil_topocv"       "pil_slps"        
##  [9] "pil_slpcv"        "pil_elr3cv"       "mrvbf"            "mrrtf"           
## [13] "minfertf"         "lf7rup"           "hstructn"         "geolrngaggn"     
## [17] "elevationm"       "wr_unrn"          "solpawhcn"        "slopern"         
## [21] "MIN_AGE_MA"       "HubDist"          "month_collection" "UNITNAME"        
## [25] "ROCKTYPE1"        "FORMATION"        "HubName"          "true_troglofauna"
selectcol_finalTest <- finalTest[, -c((ncol(finalTest) - 0):ncol(finalTest))]
dim(selectcol_finalTest)
## [1] 81 27
names(selectcol_finalTest)
##  [1] "trngcv"           "bio14"            "slope"            "rugg500s"        
##  [5] "pil_twim"         "pil_twicv"        "pil_topocv"       "pil_slps"        
##  [9] "pil_slpcv"        "pil_elr3cv"       "mrvbf"            "mrrtf"           
## [13] "minfertf"         "lf7rup"           "hstructn"         "geolrngaggn"     
## [17] "elevationm"       "wr_unrn"          "solpawhcn"        "slopern"         
## [21] "MIN_AGE_MA"       "HubDist"          "month_collection" "UNITNAME"        
## [25] "ROCKTYPE1"        "FORMATION"        "HubName"
predictions_2 <- predict(rf_mtry, newdata = selectcol_finalTest,type = "raw")

summary(predictions_2)
## X0 X1 
## 10 71
summary(finalTest[, ncol(finalTest)])
##  0  1 
## 29 52
levels(predictions_2) <- c(0,1)
summary(predictions_2)
##  0  1 
## 10 71
CM_predictions_2<-confusionMatrix(predictions_2, finalTest[, ncol(finalTest)],positive="1")
CM_predictions_2
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction  0  1
##          0  9  1
##          1 20 51
##                                           
##                Accuracy : 0.7407          
##                  95% CI : (0.6314, 0.8318)
##     No Information Rate : 0.642           
##     P-Value [Acc > NIR] : 0.0387          
##                                           
##                   Kappa : 0.3404          
##                                           
##  Mcnemar's Test P-Value : 8.568e-05       
##                                           
##             Sensitivity : 0.9808          
##             Specificity : 0.3103          
##          Pos Pred Value : 0.7183          
##          Neg Pred Value : 0.9000          
##              Prevalence : 0.6420          
##          Detection Rate : 0.6296          
##    Detection Prevalence : 0.8765          
##       Balanced Accuracy : 0.6456          
##                                           
##        'Positive' Class : 1               
## 
mcc(predictions_2, finalTest[, ncol(finalTest)])
## [1] 0.4242625
calculate_mcc1(CM_predictions_2)
## [1] 0.4242625
calculate_F2(CM_predictions_2)
## [1] 0.9139785
predictions_3<- predict(rf_mtry, newdata = selectcol_finalTest,type = "prob")

summary(predictions_3)
##        X0               X1        
##  Min.   :0.0780   Min.   :0.2020  
##  1st Qu.:0.1940   1st Qu.:0.5880  
##  Median :0.2980   Median :0.7020  
##  Mean   :0.3123   Mean   :0.6877  
##  3rd Qu.:0.4120   3rd Qu.:0.8060  
##  Max.   :0.7980   Max.   :0.9220
sapply(predictions_3, class)
##        X0        X1 
## "numeric" "numeric"
head(predictions_3)
##        X0    X1
## 95  0.334 0.666
## 626 0.102 0.898
## 607 0.344 0.656
## 623 0.154 0.846
## 18  0.514 0.486
## 470 0.282 0.718
model_pred_class_3 <- ifelse(predictions_3 < 0.5, "X0", "X1")
head(sapply(model_pred_class_3, class))
##          X0          X0          X0          X0          X1          X0 
## "character" "character" "character" "character" "character" "character"
Test_3<-as.factor(model_pred_class_3[,2])
head(sapply(Test_3, class))
##       95      626      607      623       18      470 
## "factor" "factor" "factor" "factor" "factor" "factor"
levels(Test_3)
## [1] "X0" "X1"
head(Test_3)
##  95 626 607 623  18 470 
##  X1  X1  X1  X1  X0  X1 
## Levels: X0 X1
levels(Test_3) <- c(0,1)
head(sapply(Test_3, class))
##       95      626      607      623       18      470 
## "factor" "factor" "factor" "factor" "factor" "factor"
levels(Test_3)
## [1] "0" "1"
head(Test_3)
##  95 626 607 623  18 470 
##   1   1   1   1   0   1 
## Levels: 0 1
summary(Test_3)
##  0  1 
## 10 71
CM_predictions_3<-confusionMatrix(Test_3, finalTest[, ncol(finalTest)],positive="1")
CM_predictions_3
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction  0  1
##          0  9  1
##          1 20 51
##                                           
##                Accuracy : 0.7407          
##                  95% CI : (0.6314, 0.8318)
##     No Information Rate : 0.642           
##     P-Value [Acc > NIR] : 0.0387          
##                                           
##                   Kappa : 0.3404          
##                                           
##  Mcnemar's Test P-Value : 8.568e-05       
##                                           
##             Sensitivity : 0.9808          
##             Specificity : 0.3103          
##          Pos Pred Value : 0.7183          
##          Neg Pred Value : 0.9000          
##              Prevalence : 0.6420          
##          Detection Rate : 0.6296          
##    Detection Prevalence : 0.8765          
##       Balanced Accuracy : 0.6456          
##                                           
##        'Positive' Class : 1               
## 
calculate_mcc1(CM_predictions_3)
## [1] 0.4242625
calculate_F2(CM_predictions_3)
## [1] 0.9139785
testrf_mtry<- predict(rf_mtry)
summary(testrf_mtry)
##  X0  X1 
##  60 145
levels(testrf_mtry) <- c(0,1);
summary(testrf_mtry)
##   0   1 
##  60 145
CM_testrf_mtry<-confusionMatrix(testrf_mtry, data_train[, ncol(data_train)],positive="1");
CM_testrf_mtry
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction   0   1
##          0  60   0
##          1  14 131
##                                           
##                Accuracy : 0.9317          
##                  95% CI : (0.8881, 0.9622)
##     No Information Rate : 0.639           
##     P-Value [Acc > NIR] : < 2.2e-16       
##                                           
##                   Kappa : 0.8456          
##                                           
##  Mcnemar's Test P-Value : 0.000512        
##                                           
##             Sensitivity : 1.0000          
##             Specificity : 0.8108          
##          Pos Pred Value : 0.9034          
##          Neg Pred Value : 1.0000          
##              Prevalence : 0.6390          
##          Detection Rate : 0.6390          
##    Detection Prevalence : 0.7073          
##       Balanced Accuracy : 0.9054          
##                                           
##        'Positive' Class : 1               
## 
calculate_mcc1(CM_testrf_mtry)
## [1] 0.8558771
calculate_F2(CM_testrf_mtry)
## [1] 0.9790732
testrf_mtry1<- predict(rf_mtry,type="prob")
summary(testrf_mtry1)
##        X0               X1        
##  Min.   :0.0200   Min.   :0.0400  
##  1st Qu.:0.1160   1st Qu.:0.4400  
##  Median :0.2200   Median :0.7800  
##  Mean   :0.3312   Mean   :0.6688  
##  3rd Qu.:0.5600   3rd Qu.:0.8840  
##  Max.   :0.9600   Max.   :0.9800
model_pred_class_4 <- ifelse(testrf_mtry1 < 0.5, "X0", "X1")
summary(model_pred_class_4)
##       X0                 X1           
##  Length:205         Length:205        
##  Class :character   Class :character  
##  Mode  :character   Mode  :character
Test_4<-as.factor(model_pred_class_4[,2])
head(Test_4)
## 173  50 458 383  24 246 
##  X0  X1  X0  X1  X1  X1 
## Levels: X0 X1
levels(Test_4) <- c(0,1);
summary(Test_4)
##   0   1 
##  60 145
CM_testrf_mtry1<-confusionMatrix(Test_4, data_train[, ncol(data_train)],positive="1");
CM_testrf_mtry1
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction   0   1
##          0  60   0
##          1  14 131
##                                           
##                Accuracy : 0.9317          
##                  95% CI : (0.8881, 0.9622)
##     No Information Rate : 0.639           
##     P-Value [Acc > NIR] : < 2.2e-16       
##                                           
##                   Kappa : 0.8456          
##                                           
##  Mcnemar's Test P-Value : 0.000512        
##                                           
##             Sensitivity : 1.0000          
##             Specificity : 0.8108          
##          Pos Pred Value : 0.9034          
##          Neg Pred Value : 1.0000          
##              Prevalence : 0.6390          
##          Detection Rate : 0.6390          
##    Detection Prevalence : 0.7073          
##       Balanced Accuracy : 0.9054          
##                                           
##        'Positive' Class : 1               
## 
calculate_mcc1(CM_testrf_mtry1)
## [1] 0.8558771
calculate_F2(CM_testrf_mtry1)
## [1] 0.9790732
prediction.probabilities <- predictions[,"X1"]
head(prediction.probabilities,5)
## [1] 0.366 0.710 0.448 0.914 0.896
res.roc <-roc(ContraProva,prediction.probabilities)
## Setting levels: control = 0, case = 1
## Setting direction: controls < cases
res.roc
## 
## Call:
## roc.default(response = ContraProva, predictor = prediction.probabilities)
## 
## Data: prediction.probabilities in 44 controls (ContraProva 0) < 79 cases (ContraProva 1).
## Area under the curve: 0.7392
str(res.roc)
## List of 15
##  $ percent           : logi FALSE
##  $ sensitivities     : num [1:100] 1 1 0.987 0.987 0.987 ...
##  $ specificities     : num [1:100] 0 0.0227 0.0227 0.0682 0.0909 ...
##  $ thresholds        : num [1:100] -Inf 0.217 0.233 0.257 0.305 ...
##  $ direction         : chr "<"
##  $ cases             : num [1:79] 0.71 0.448 0.914 0.896 0.852 0.92 0.704 0.786 0.736 0.842 ...
##  $ controls          : num [1:44] 0.366 0.522 0.72 0.876 0.41 0.442 0.548 0.494 0.238 0.542 ...
##  $ fun.sesp          :function (thresholds, controls, cases, direction)  
##  $ auc               : 'auc' num 0.739
##   ..- attr(*, "partial.auc")= logi FALSE
##   ..- attr(*, "percent")= logi FALSE
##   ..- attr(*, "roc")=List of 15
##   .. ..$ percent           : logi FALSE
##   .. ..$ sensitivities     : num [1:100] 1 1 0.987 0.987 0.987 ...
##   .. ..$ specificities     : num [1:100] 0 0.0227 0.0227 0.0682 0.0909 ...
##   .. ..$ thresholds        : num [1:100] -Inf 0.217 0.233 0.257 0.305 ...
##   .. ..$ direction         : chr "<"
##   .. ..$ cases             : num [1:79] 0.71 0.448 0.914 0.896 0.852 0.92 0.704 0.786 0.736 0.842 ...
##   .. ..$ controls          : num [1:44] 0.366 0.522 0.72 0.876 0.41 0.442 0.548 0.494 0.238 0.542 ...
##   .. ..$ fun.sesp          :function (thresholds, controls, cases, direction)  
##   .. ..$ auc               : 'auc' num 0.739
##   .. .. ..- attr(*, "partial.auc")= logi FALSE
##   .. .. ..- attr(*, "percent")= logi FALSE
##   .. .. ..- attr(*, "roc")=List of 8
##   .. .. .. ..$ percent      : logi FALSE
##   .. .. .. ..$ sensitivities: num [1:100] 1 1 0.987 0.987 0.987 ...
##   .. .. .. ..$ specificities: num [1:100] 0 0.0227 0.0227 0.0682 0.0909 ...
##   .. .. .. ..$ thresholds   : num [1:100] -Inf 0.217 0.233 0.257 0.305 ...
##   .. .. .. ..$ direction    : chr "<"
##   .. .. .. ..$ cases        : num [1:79] 0.71 0.448 0.914 0.896 0.852 0.92 0.704 0.786 0.736 0.842 ...
##   .. .. .. ..$ controls     : num [1:44] 0.366 0.522 0.72 0.876 0.41 0.442 0.548 0.494 0.238 0.542 ...
##   .. .. .. ..$ fun.sesp     :function (thresholds, controls, cases, direction)  
##   .. .. .. ..- attr(*, "class")= chr "roc"
##   .. ..$ call              : language roc.default(response = ContraProva, predictor = prediction.probabilities)
##   .. ..$ original.predictor: num [1:123] 0.366 0.71 0.448 0.914 0.896 0.852 0.92 0.522 0.704 0.72 ...
##   .. ..$ original.response : Factor w/ 2 levels "0","1": 1 2 2 2 2 2 2 1 2 1 ...
##   .. ..$ predictor         : num [1:123] 0.366 0.71 0.448 0.914 0.896 0.852 0.92 0.522 0.704 0.72 ...
##   .. ..$ response          : Factor w/ 2 levels "0","1": 1 2 2 2 2 2 2 1 2 1 ...
##   .. ..$ levels            : chr [1:2] "0" "1"
##   .. ..- attr(*, "class")= chr "roc"
##  $ call              : language roc.default(response = ContraProva, predictor = prediction.probabilities)
##  $ original.predictor: num [1:123] 0.366 0.71 0.448 0.914 0.896 0.852 0.92 0.522 0.704 0.72 ...
##  $ original.response : Factor w/ 2 levels "0","1": 1 2 2 2 2 2 2 1 2 1 ...
##  $ predictor         : num [1:123] 0.366 0.71 0.448 0.914 0.896 0.852 0.92 0.522 0.704 0.72 ...
##  $ response          : Factor w/ 2 levels "0","1": 1 2 2 2 2 2 2 1 2 1 ...
##  $ levels            : chr [1:2] "0" "1"
##  - attr(*, "class")= chr "roc"
head(res.roc$cases,5) 
## [1] 0.710 0.448 0.914 0.896 0.852
length(res.roc$cases)
## [1] 79
summary(res.roc$cases)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##  0.2280  0.6450  0.7540  0.7274  0.8360  0.9520
head(res.roc$controls,5) 
## [1] 0.366 0.522 0.720 0.876 0.410
length(res.roc$controls)
## [1] 44
summary(res.roc$controls)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##  0.2060  0.4740  0.5430  0.5789  0.7200  0.9560
head(res.roc$thresholds,5)
## [1]  -Inf 0.217 0.233 0.257 0.305
tail(res.roc$thresholds,5)
## [1] 0.919 0.921 0.937 0.954   Inf
length(res.roc$thresholds)
## [1] 100
summary(res.roc$thresholds)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##    -Inf  0.5180  0.6965          0.8230     Inf
auc(res.roc)#auc() compute the area under the ROC curve. 
## Area under the curve: 0.7392
ci.auc(res.roc)#ci.auc() Compute the confidence interval of the AUC, default method = "delong". 
## 95% CI: 0.6442-0.8343 (DeLong)
ci.auc(res.roc,method = "bootstrap", boot.n = 10000)#ci.auc() Compute the confidence interval of the AUC, default method = "bootstrap". 
## 95% CI: 0.6385-0.8288 (10000 stratified bootstrap replicates)
#A value of x = "all" will return the coordinates for the curve and their associated cutoffs. 
rfThresh_all <- coords(res.roc, x = "all", best.method = "youden")
head(rfThresh_all)
##   threshold specificity sensitivity
## 1      -Inf  0.00000000   1.0000000
## 2     0.217  0.02272727   1.0000000
## 3     0.233  0.02272727   0.9873418
## 4     0.257  0.06818182   0.9873418
## 5     0.305  0.09090909   0.9873418
## 6     0.335  0.09090909   0.9746835
summary(rfThresh_all)
##    threshold       specificity      sensitivity    
##  Min.   :  -Inf   Min.   :0.0000   Min.   :0.0000  
##  1st Qu.:0.5180   1st Qu.:0.4318   1st Qu.:0.3513  
##  Median :0.6965   Median :0.6818   Median :0.6392  
##  Mean   :   NaN   Mean   :0.6405   Mean   :0.5903  
##  3rd Qu.:0.8230   3rd Qu.:0.9091   3rd Qu.:0.8892  
##  Max.   :   Inf   Max.   :1.0000   Max.   :1.0000
dim(rfThresh_all)
## [1] 100   3
#“local maximas”(the local maximas of the ROC curve). 
rfThresh_max <- coords(res.roc, x = "local maximas", best.method = "youden")
head(rfThresh_max)
##   threshold specificity sensitivity
## 1     0.217  0.02272727   1.0000000
## 2     0.305  0.09090909   0.9873418
## 3     0.422  0.15909091   0.9620253
## 4     0.447  0.20454545   0.9367089
## 5     0.498  0.36363636   0.9240506
## 6     0.503  0.38636364   0.9113924
summary(rfThresh_max)
##    threshold      specificity       sensitivity    
##  Min.   :0.217   Min.   :0.02273   Min.   :0.0000  
##  1st Qu.:0.520   1st Qu.:0.44318   1st Qu.:0.5063  
##  Median :0.641   Median :0.63636   Median :0.7595  
##  Mean   :  Inf   Mean   :0.60943   Mean   :0.6606  
##  3rd Qu.:0.760   3rd Qu.:0.82955   3rd Qu.:0.8924  
##  Max.   :  Inf   Max.   :1.00000   Max.   :1.0000
dim(rfThresh_max)
## [1] 27  3
#Best thresholds: If x="best" e best.method = "youden", Youden’s J statistic (Youden, 1950) is employed. The optimal cut-off is the threshold that maximizes the distance to the identity (diagonal) line. 
rfThresh_youden <- coords(res.roc, x = "best", best.method = "youden")
rfThresh_youden
##   threshold specificity sensitivity
## 1     0.554   0.5454545   0.8607595
rfThresh_youden[1,1]
## [1] 0.554
#Best thresholds: If x="best" e best.method = "closest.topleft", The optimal threshold is the point closest to the top-left part of the plot with perfect sensitivity or specificity. 
rfThresh_topleft <- coords(res.roc, x = "best", best.method = "closest.topleft")
rfThresh_topleft
##   threshold specificity sensitivity
## 1     0.641   0.6363636   0.7594937
rfThresh_topleft[1,1]
## [1] 0.641
coords(res.roc, "best", ret="all", transpose = FALSE,
       best.method="youden")
##           threshold specificity sensitivity  accuracy tn tp fn fp       npv
## threshold     0.554   0.5454545   0.8607595 0.7479675 24 68 11 20 0.6857143
##                 ppv       fdr       fpr       tpr       tnr       fnr
## threshold 0.7727273 0.2272727 0.4545455 0.8607595 0.5454545 0.1392405
##           1-specificity 1-sensitivity 1-accuracy     1-npv     1-ppv precision
## threshold     0.4545455     0.1392405  0.2520325 0.3142857 0.2272727 0.7727273
##              recall   youden closest.topleft
## threshold 0.8607595 1.406214       0.2259995
coords(res.roc, "best", ret="all", transpose = FALSE,
       best.method="closest.topleft")
##           threshold specificity sensitivity  accuracy tn tp fn fp       npv
## threshold     0.641   0.6363636   0.7594937 0.7154472 28 60 19 16 0.5957447
##                 ppv       fdr       fpr       tpr       tnr       fnr
## threshold 0.7894737 0.2105263 0.3636364 0.7594937 0.6363636 0.2405063
##           1-specificity 1-sensitivity 1-accuracy     1-npv     1-ppv precision
## threshold     0.3636364     0.2405063  0.2845528 0.4042553 0.2105263 0.7894737
##              recall   youden closest.topleft
## threshold 0.7594937 1.395857       0.1900747
coords(res.roc, "best", ret="threshold", transpose = FALSE,
       best.method="youden") # default
##   threshold
## 1     0.554
coords(res.roc, "best", ret="threshold", transpose = FALSE,
       best.method="closest.topleft")
##   threshold
## 1     0.641
coords(res.roc, "best", ret="tn", transpose = FALSE)
##           tn
## threshold 24
coords(res.roc, "best", ret="tn", transpose = FALSE,
       best.method="closest.topleft")
##           tn
## threshold 28
coords(res.roc, "best", ret="tp", transpose = FALSE)
##           tp
## threshold 68
coords(res.roc, "best", ret="tp", transpose = FALSE,
       best.method="closest.topleft")
##           tp
## threshold 60
coords(res.roc, "best", ret="youden", transpose = FALSE)
##     youden
## 1 1.406214
coords(res.roc, "best", ret="closest.topleft", transpose = FALSE,best.method="closest.topleft")
##   closest.topleft
## 1       0.1900747
rfThresh_youden <- coords(res.roc, x = "best", best.method = "youden")
rfThresh_youden
##   threshold specificity sensitivity
## 1     0.554   0.5454545   0.8607595
rfThresh_youden[1,1]
## [1] 0.554
model_pred_class <- ifelse(predictions < rfThresh_youden[1,1], "X0", "X1")
head(model_pred_class)
##     X0   X1  
## 538 "X1" "X0"
## 580 "X0" "X1"
## 449 "X0" "X0"
## 149 "X0" "X1"
## 304 "X0" "X1"
## 15  "X0" "X1"
summary(model_pred_class)
##       X0                 X1           
##  Length:123         Length:123        
##  Class :character   Class :character  
##  Mode  :character   Mode  :character
Test2<-as.factor(model_pred_class[,2])


levels(Test2) <- c(0,1)
summary(Test2)
##  0  1 
## 35 88
CM_Test2<-confusionMatrix(Test2, ContraProva,positive="1")
CM_Test2
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction  0  1
##          0 24 11
##          1 20 68
##                                           
##                Accuracy : 0.748           
##                  95% CI : (0.6617, 0.8219)
##     No Information Rate : 0.6423          
##     P-Value [Acc > NIR] : 0.008134        
##                                           
##                   Kappa : 0.4255          
##                                           
##  Mcnemar's Test P-Value : 0.150763        
##                                           
##             Sensitivity : 0.8608          
##             Specificity : 0.5455          
##          Pos Pred Value : 0.7727          
##          Neg Pred Value : 0.6857          
##              Prevalence : 0.6423          
##          Detection Rate : 0.5528          
##    Detection Prevalence : 0.7154          
##       Balanced Accuracy : 0.7031          
##                                           
##        'Positive' Class : 1               
## 
str(CM_Test2)
## List of 6
##  $ positive: chr "1"
##  $ table   : 'table' int [1:2, 1:2] 24 20 11 68
##   ..- attr(*, "dimnames")=List of 2
##   .. ..$ Prediction: chr [1:2] "0" "1"
##   .. ..$ Reference : chr [1:2] "0" "1"
##  $ overall : Named num [1:7] 0.748 0.425 0.662 0.822 0.642 ...
##   ..- attr(*, "names")= chr [1:7] "Accuracy" "Kappa" "AccuracyLower" "AccuracyUpper" ...
##  $ byClass : Named num [1:11] 0.861 0.545 0.773 0.686 0.773 ...
##   ..- attr(*, "names")= chr [1:11] "Sensitivity" "Specificity" "Pos Pred Value" "Neg Pred Value" ...
##  $ mode    : chr "sens_spec"
##  $ dots    : list()
##  - attr(*, "class")= chr "confusionMatrix"
CM_Test2$byClass[1]#Sensitivity de CM_predictions1
## Sensitivity 
##   0.8607595
CM_Test2$byClass[2]#Specificity de CM_predictions1
## Specificity 
##   0.5454545
J_CM_Test2<-as.numeric(CM_Test2$byClass[1] + CM_Test2$byClass[2] - 1) ; J_CM_Test2
## [1] 0.406214
calculate_mcc1(CM_Test2)
## [1] 0.4315384
calculate_F2(CM_Test2)
## [1] 0.8415842
rfThresh_youden <- coords(res.roc, x = "best", best.method = "youden")
rfThresh_youden
##   threshold specificity sensitivity
## 1     0.554   0.5454545   0.8607595
rfThresh_youden[1,1]
## [1] 0.554
model_pred_class_31 <- ifelse(predictions_3 < rfThresh_youden[1,1], "X0", "X1")
head(sapply(model_pred_class_31, class))
##          X0          X0          X0          X0          X0          X0 
## "character" "character" "character" "character" "character" "character"
Test_31<-as.factor(model_pred_class_31[,2])
head(sapply(Test_31, class))
##       95      626      607      623       18      470 
## "factor" "factor" "factor" "factor" "factor" "factor"
levels(Test_31) <- c(0,1)
head(sapply(Test_31, class))
##       95      626      607      623       18      470 
## "factor" "factor" "factor" "factor" "factor" "factor"
levels(Test_31)
## [1] "0" "1"
head(Test_31)
##  95 626 607 623  18 470 
##   1   1   1   1   0   1 
## Levels: 0 1
summary(Test_31)
##  0  1 
## 15 66
CM_predictions_31<-confusionMatrix(Test_31, finalTest$true_troglofauna,positive="1")
CM_predictions_31
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction  0  1
##          0 12  3
##          1 17 49
##                                           
##                Accuracy : 0.7531          
##                  95% CI : (0.6447, 0.8422)
##     No Information Rate : 0.642           
##     P-Value [Acc > NIR] : 0.02218         
##                                           
##                   Kappa : 0.3987          
##                                           
##  Mcnemar's Test P-Value : 0.00365         
##                                           
##             Sensitivity : 0.9423          
##             Specificity : 0.4138          
##          Pos Pred Value : 0.7424          
##          Neg Pred Value : 0.8000          
##              Prevalence : 0.6420          
##          Detection Rate : 0.6049          
##    Detection Prevalence : 0.8148          
##       Balanced Accuracy : 0.6781          
##                                           
##        'Positive' Class : 1               
## 
calculate_mcc1(CM_predictions_31)
## [1] 0.4394971
calculate_F2(CM_predictions_31)
## [1] 0.8941606
rfThresh_topleft <- coords(res.roc, x = "best", best.method = "closest.topleft")
rfThresh_topleft
##   threshold specificity sensitivity
## 1     0.641   0.6363636   0.7594937
rfThresh_topleft[1,1]
## [1] 0.641
model_pred_class <- ifelse(predictions < rfThresh_topleft[1,1], "X0", "X1")
Test3<-as.factor(model_pred_class[,2])


levels(Test3) <- c(0,1)
summary(Test3)
##  0  1 
## 47 76
CM_Test3<-confusionMatrix(Test3, ContraProva,positive="1")
CM_Test3
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction  0  1
##          0 28 19
##          1 16 60
##                                           
##                Accuracy : 0.7154          
##                  95% CI : (0.6271, 0.7931)
##     No Information Rate : 0.6423          
##     P-Value [Acc > NIR] : 0.05311         
##                                           
##                   Kappa : 0.39            
##                                           
##  Mcnemar's Test P-Value : 0.73532         
##                                           
##             Sensitivity : 0.7595          
##             Specificity : 0.6364          
##          Pos Pred Value : 0.7895          
##          Neg Pred Value : 0.5957          
##              Prevalence : 0.6423          
##          Detection Rate : 0.4878          
##    Detection Prevalence : 0.6179          
##       Balanced Accuracy : 0.6979          
##                                           
##        'Positive' Class : 1               
## 
str(CM_Test3)
## List of 6
##  $ positive: chr "1"
##  $ table   : 'table' int [1:2, 1:2] 28 16 19 60
##   ..- attr(*, "dimnames")=List of 2
##   .. ..$ Prediction: chr [1:2] "0" "1"
##   .. ..$ Reference : chr [1:2] "0" "1"
##  $ overall : Named num [1:7] 0.715 0.39 0.627 0.793 0.642 ...
##   ..- attr(*, "names")= chr [1:7] "Accuracy" "Kappa" "AccuracyLower" "AccuracyUpper" ...
##  $ byClass : Named num [1:11] 0.759 0.636 0.789 0.596 0.789 ...
##   ..- attr(*, "names")= chr [1:11] "Sensitivity" "Specificity" "Pos Pred Value" "Neg Pred Value" ...
##  $ mode    : chr "sens_spec"
##  $ dots    : list()
##  - attr(*, "class")= chr "confusionMatrix"
CM_Test3$byClass[1]#Sensitivity de CM_predictions1
## Sensitivity 
##   0.7594937
CM_Test3$byClass[2]#Specificity de CM_predictions1
## Specificity 
##   0.6363636
J_CM_Test3<-as.numeric(CM_Test3$byClass[1] + CM_Test3$byClass[2] - 1) ; J_CM_Test3
## [1] 0.3958573
calculate_mcc1(CM_Test3)
## [1] 0.3905016
calculate_F2(CM_Test3)
## [1] 0.7653061
rfThresh_topleft <- coords(res.roc, x = "best", best.method = "closest.topleft")
rfThresh_topleft
##   threshold specificity sensitivity
## 1     0.641   0.6363636   0.7594937
rfThresh_topleft[1,1]
## [1] 0.641
model_pred_class_41 <- ifelse(predictions_3 < rfThresh_topleft[1,1], "X0", "X1")
head(sapply(model_pred_class_41, class))
##          X0          X0          X0          X0          X0          X0 
## "character" "character" "character" "character" "character" "character"
Test_41<-as.factor(model_pred_class_41[,2])#
head(sapply(Test_41, class))
##       95      626      607      623       18      470 
## "factor" "factor" "factor" "factor" "factor" "factor"
levels(Test_41) <- c(0,1)
head(sapply(Test_41, class))
##       95      626      607      623       18      470 
## "factor" "factor" "factor" "factor" "factor" "factor"
levels(Test_41)
## [1] "0" "1"
head(Test_41)
##  95 626 607 623  18 470 
##   1   1   1   1   0   1 
## Levels: 0 1
summary(Test_41)
##  0  1 
## 28 53
CM_predictions_41<-confusionMatrix(Test_41, finalTest$true_troglofauna,positive="1") 
CM_predictions_41
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction  0  1
##          0 22  6
##          1  7 46
##                                           
##                Accuracy : 0.8395          
##                  95% CI : (0.7412, 0.9117)
##     No Information Rate : 0.642           
##     P-Value [Acc > NIR] : 7.19e-05        
##                                           
##                   Kappa : 0.6482          
##                                           
##  Mcnemar's Test P-Value : 1               
##                                           
##             Sensitivity : 0.8846          
##             Specificity : 0.7586          
##          Pos Pred Value : 0.8679          
##          Neg Pred Value : 0.7857          
##              Prevalence : 0.6420          
##          Detection Rate : 0.5679          
##    Detection Prevalence : 0.6543          
##       Balanced Accuracy : 0.8216          
##                                           
##        'Positive' Class : 1               
## 
calculate_mcc1(CM_predictions_41)
## [1] 0.6484166
calculate_F2(CM_predictions_41)
## [1] 0.8812261
roc.data <- data_frame( # Create a dataframe 'roc.data' for ROC analysis results
  thresholds = res.roc$thresholds, # Column for ROC curve thresholds
  sensitivity = res.roc$sensitivities, # Column for sensitivity (true positive rate)
  specificity = res.roc$specificities # Column for specificity (true negative rate)
)
## Warning: `data_frame()` was deprecated in tibble 1.1.0.
## ℹ Please use `tibble()` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
roc.data # Display the 'roc.data' dataframe in the R console
## # A tibble: 100 × 3
##    thresholds sensitivity specificity
##         <dbl>       <dbl>       <dbl>
##  1   -Inf           1          0     
##  2      0.217       1          0.0227
##  3      0.233       0.987      0.0227
##  4      0.257       0.987      0.0682
##  5      0.305       0.987      0.0909
##  6      0.335       0.975      0.0909
##  7      0.351       0.962      0.0909
##  8      0.379       0.962      0.114 
##  9      0.401       0.962      0.136 
## 10      0.422       0.962      0.159 
## # ℹ 90 more rows
tibble(roc.data)# Convert the 'roc.data' dataframe into a tibble for further analysis (if needed)
## # A tibble: 100 × 3
##    thresholds sensitivity specificity
##         <dbl>       <dbl>       <dbl>
##  1   -Inf           1          0     
##  2      0.217       1          0.0227
##  3      0.233       0.987      0.0227
##  4      0.257       0.987      0.0682
##  5      0.305       0.987      0.0909
##  6      0.335       0.975      0.0909
##  7      0.351       0.962      0.0909
##  8      0.379       0.962      0.114 
##  9      0.401       0.962      0.136 
## 10      0.422       0.962      0.159 
## # ℹ 90 more rows
#Get the probality threshold for specificity >= 0.6 e sensitivity >= 0.6
roc.data %>% filter(specificity >= 0.6)#comando com  %>% "forward pipe operator"
## # A tibble: 62 × 3
##    thresholds sensitivity specificity
##         <dbl>       <dbl>       <dbl>
##  1      0.639       0.772       0.614
##  2      0.641       0.759       0.636
##  3      0.645       0.747       0.636
##  4      0.654       0.734       0.636
##  5      0.661       0.722       0.659
##  6      0.665       0.709       0.659
##  7      0.669       0.696       0.659
##  8      0.675       0.696       0.682
##  9      0.682       0.684       0.682
## 10      0.685       0.671       0.682
## # ℹ 52 more rows
roc.data %>% filter(sensitivity >= 0.6)#comando com  %>% "forward pipe operator"
## # A tibble: 54 × 3
##    thresholds sensitivity specificity
##         <dbl>       <dbl>       <dbl>
##  1   -Inf           1          0     
##  2      0.217       1          0.0227
##  3      0.233       0.987      0.0227
##  4      0.257       0.987      0.0682
##  5      0.305       0.987      0.0909
##  6      0.335       0.975      0.0909
##  7      0.351       0.962      0.0909
##  8      0.379       0.962      0.114 
##  9      0.401       0.962      0.136 
## 10      0.422       0.962      0.159 
## # ℹ 44 more rows
par(pty="s")

plot.roc(res.roc, print.auc =TRUE,col="blue", print.thres =
           "best",print.auc.y=0.4,main="ROC curve (Point: best.method = 'youden')")


grid(nx = NULL, ny = NULL, 
     lty = 3, 
     lwd = 1, 
     col = "gray")

segments(rfThresh_youden[1,2],1-rfThresh_youden[1,2],rfThresh_youden[1,2], rfThresh_youden[1,3],lwd = 3, col = "red",lty=2)

text(0.4,0.2,"The number on the left represents the probability cutoff,\nand the numbers in the parentheses are\nthe specificity and sensitivity, respectively")

par(pty="s")

plot.roc(res.roc, print.auc =TRUE,col="blue", print.thres =
           c(0.3, 0.5, 0.7,rfThresh_topleft[1,1]),print.auc.y=0.4,main="ROC curve (Point: best.method = 'closest.topleft')")


grid(nx = NULL, ny = NULL, 
     lty = 3, 
     lwd = 1, 
     col = "gray")


segments(rfThresh_topleft[1,2],rfThresh_topleft[1,3],1, 1,lwd = 3, col = "red",lty=2)

text(0.3,0.2,"The number on the left represents the probability cutoff,\nand the numbers in the parentheses are\nthe specificity and sensitivity, respectively")

par(pty="s")


plot.roc(res.roc, print.auc =TRUE,print.auc.y=0.4,legacy.axes = TRUE, col="blue", print.thres = c(0.2, 0.5, 0.7,rfThresh_youden[1,1]),main="ROC curve - Youden",xlab = "1-Specificity = False Positive Rate = FPR",ylab = "Sensitivity = True Positive Rate = TPR")


segments(rfThresh_youden[1,2], 1-rfThresh_youden[1,2],rfThresh_youden[1,2],rfThresh_youden[1,3],lwd = 3, col = "red",lty=2)


grid(nx = NULL, ny = NULL, 
     lty = 3, 
     lwd = 1, 
     col = "gray")

text(0.4,0.2,"The number on the left represents the probability cutoff,\nand the numbers in the parentheses are\nthe specificity and sensitivity, respectively")

par(pty="s")



plot.roc(res.roc, print.auc =TRUE,print.auc.y=0.4,legacy.axes = TRUE, col="blue", print.thres =  c(0.3, 0.5, 0.7,rfThresh_topleft[1,1]),main="ROC curve - Closest Topleft",xlab = "1-Specificity = False Positive Rate = FPR",ylab = "Sensitivity = True Positive Rate = TPR")


segments(rfThresh_topleft[1,2],rfThresh_topleft[1,3],1, 1,lwd = 3, col = "red",lty=2)




grid(nx = NULL, ny = NULL, 
     lty = 3, 
     lwd = 1, 
     col = "gray")

text(0.4,0.2,"The number on the left represents the probability cutoff,\nand the numbers in the parentheses are\nthe specificity and sensitivity, respectively")

par(pty="s")


rocCurve <- roc(ContraProva,prediction.probabilities, plot=TRUE,legacy.axes = TRUE, col="blue",main="ROC curve (Point: best.method = 'youden')",xlab="False Positive Rate = FPR", ylab="True Positive Rate = TPR",print.thres =
                  "best",print.auc =TRUE,cex.main=0.9)
## Setting levels: control = 0, case = 1
## Setting direction: controls < cases
segments(rfThresh_youden[1,2], 1-rfThresh_youden[1,2],rfThresh_youden[1,2],rfThresh_youden[1,3],lwd = 3, col = "red",lty=2)


grid(nx = NULL, ny = NULL, 
     lty = 3, 
     lwd = 1, 
     col = "gray")

text(0.4,0.2,"The number on the left represents the probability cutoff,\nand the numbers in the parentheses are\nthe specificity and sensitivity, respectively")

dim(finalTest)
## [1] 81 28
names(finalTest)
##  [1] "trngcv"           "bio14"            "slope"            "rugg500s"        
##  [5] "pil_twim"         "pil_twicv"        "pil_topocv"       "pil_slps"        
##  [9] "pil_slpcv"        "pil_elr3cv"       "mrvbf"            "mrrtf"           
## [13] "minfertf"         "lf7rup"           "hstructn"         "geolrngaggn"     
## [17] "elevationm"       "wr_unrn"          "solpawhcn"        "slopern"         
## [21] "MIN_AGE_MA"       "HubDist"          "month_collection" "UNITNAME"        
## [25] "ROCKTYPE1"        "FORMATION"        "HubName"          "true_troglofauna"
dim(selectcol_finalTest)
## [1] 81 27
names(selectcol_finalTest)
##  [1] "trngcv"           "bio14"            "slope"            "rugg500s"        
##  [5] "pil_twim"         "pil_twicv"        "pil_topocv"       "pil_slps"        
##  [9] "pil_slpcv"        "pil_elr3cv"       "mrvbf"            "mrrtf"           
## [13] "minfertf"         "lf7rup"           "hstructn"         "geolrngaggn"     
## [17] "elevationm"       "wr_unrn"          "solpawhcn"        "slopern"         
## [21] "MIN_AGE_MA"       "HubDist"          "month_collection" "UNITNAME"        
## [25] "ROCKTYPE1"        "FORMATION"        "HubName"
CM_predictions_2$byClass
##          Sensitivity          Specificity       Pos Pred Value 
##            0.9807692            0.3103448            0.7183099 
##       Neg Pred Value            Precision               Recall 
##            0.9000000            0.7183099            0.9807692 
##                   F1           Prevalence       Detection Rate 
##            0.8292683            0.6419753            0.6296296 
## Detection Prevalence    Balanced Accuracy 
##            0.8765432            0.6455570
beta<-2


dbF1_11<-CM_predictions_2$byClass
dbF1_11["F1"]
##        F1 
## 0.8292683
dbF2_11<-((1+beta^2)*dbF1_11["Precision"]*dbF1_11["Sensitivity"])/(beta^2*dbF1_11["Precision"] + dbF1_11["Sensitivity"])
dbF2_11<-as.numeric(dbF2_11)
dbF2_11
## [1] 0.9139785
dbF1_11["Precision"];dbF1_11["Sensitivity"]
## Precision 
## 0.7183099
## Sensitivity 
##   0.9807692
dbF1_22<-CM_predictions_31$byClass
dbF1_22["F1"]
##        F1 
## 0.8305085
dbF2_22<-((1+beta^2)*dbF1_22["Precision"]*dbF1_22["Sensitivity"])/(beta^2*dbF1_22["Precision"] + dbF1_22["Sensitivity"])
dbF2_22<-as.numeric(dbF2_22)
dbF2_22
## [1] 0.8941606
dbF1_22["Precision"];dbF1_22["Sensitivity"]
## Precision 
## 0.7424242
## Sensitivity 
##   0.9423077
dbF1_33<-CM_predictions_41$byClass
dbF1_33["F1"]
##        F1 
## 0.8761905
dbF2_33<-((1+beta^2)*dbF1_33["Precision"]*dbF1_33["Sensitivity"])/(beta^2*dbF1_33["Precision"] + dbF1_33["Sensitivity"])
dbF2_33<-as.numeric(dbF2_33)
dbF2_33
## [1] 0.8812261
dbF1_33["Precision"];dbF1_33["Sensitivity"]
## Precision 
## 0.8679245
## Sensitivity 
##   0.8846154
dim(finalTest)
## [1] 81 28
basmcc<-mcc(predictions_2, finalTest[, ncol(finalTest)])
basmcc
## [1] 0.4242625
youmcc<-mcc(Test_31, finalTest[, ncol(finalTest)])
youmcc
## [1] 0.4394971
topmcc<-mcc(Test_41, finalTest[, ncol(finalTest)])
topmcc
## [1] 0.6484166

#Results

cat("\n\n","TABLE 1 - RESULTS OF THE 'finalTest' DATABASE WITH THE MODELS USING THE train() FUNCTION OF THE caret PACKAGE"
    
    ,"\n\n\n", "1 - Basic model"
    
    ,"\n\n","Area Under ROC (AUC)- Basic model =", auc(res.roc)
    
    ,"\n\n\n","Accuracy =", CM_predictions_2$overall[1]
    
    ,"\n\n","Sensitivity =", CM_predictions_2$byClass[1]
    
    ,"\n\n","Specificity =", CM_predictions_2$byClass[2]
    
    ,"\n\n","Balanced Accuracy =", CM_predictions_2$byClass[11]
    
    ,"\n\n","F1 =", dbF1_11["F1"]
    
    ,"\n\n","F2 =", dbF2_11
    
    ,"\n\n","matthews correlation coefficient =", calculate_mcc1(CM_predictions_2)
    
    ,"\n\n","Threshold =", 0.5
    
    
    ,"\n\n\n", "2 - Youden's J statistic Threshold Method"
    
    ,"\n\n","Accuracy =", CM_predictions_31$overall[1]
    
    ,"\n\n","Sensitivity =", CM_predictions_31$byClass[1]
    
    ,"\n\n","Specificity =", CM_predictions_31$byClass[2]
    
    ,"\n\n","Balanced Accuracy =", CM_predictions_31$byClass[11]
    
    ,"\n\n","F1 =", dbF1_22["F1"]
    
    ,"\n\n","F2 =", dbF2_22
    
    ,"\n\n","matthews correlation coefficient =", calculate_mcc1(CM_predictions_31)
    
    ,"\n\n","Threshold =", rfThresh_youden[1,1]
    
    
    ,"\n\n\n", "3 - Closest Top-left Threshold Method"
    
    ,"\n\n","Accuracy =", CM_predictions_41$overall[1]
    
    ,"\n\n","Sensitivity =", CM_predictions_41$byClass[1]
    
    ,"\n\n","Specificity =", CM_predictions_41$byClass[2]
    
    ,"\n\n","Balanced Accuracy =", CM_predictions_41$byClass[11]
    
    ,"\n\n","F1 =", dbF1_33["F1"]
    
    ,"\n\n","F2 =", dbF2_33
    
    ,"\n\n","matthews correlation coefficient =", calculate_mcc1(CM_predictions_41)
    
    ,"\n\n","Threshold =", rfThresh_topleft[1,1]
    
    )
## 
## 
##  TABLE 1 - RESULTS OF THE 'finalTest' DATABASE WITH THE MODELS USING THE train() FUNCTION OF THE caret PACKAGE 
## 
## 
##  1 - Basic model 
## 
##  Area Under ROC (AUC)- Basic model = 0.7392117 
## 
## 
##  Accuracy = 0.7407407 
## 
##  Sensitivity = 0.9807692 
## 
##  Specificity = 0.3103448 
## 
##  Balanced Accuracy = 0.645557 
## 
##  F1 = 0.8292683 
## 
##  F2 = 0.9139785 
## 
##  matthews correlation coefficient = 0.4242625 
## 
##  Threshold = 0.5 
## 
## 
##  2 - Youden's J statistic Threshold Method 
## 
##  Accuracy = 0.7530864 
## 
##  Sensitivity = 0.9423077 
## 
##  Specificity = 0.4137931 
## 
##  Balanced Accuracy = 0.6780504 
## 
##  F1 = 0.8305085 
## 
##  F2 = 0.8941606 
## 
##  matthews correlation coefficient = 0.4394971 
## 
##  Threshold = 0.554 
## 
## 
##  3 - Closest Top-left Threshold Method 
## 
##  Accuracy = 0.8395062 
## 
##  Sensitivity = 0.8846154 
## 
##  Specificity = 0.7586207 
## 
##  Balanced Accuracy = 0.821618 
## 
##  F1 = 0.8761905 
## 
##  F2 = 0.8812261 
## 
##  matthews correlation coefficient = 0.6484166 
## 
##  Threshold = 0.641

Predicting presence

# Identify rows with presence of troglofauna
gaa <- which(finalTest1$true_troglofauna == 1)
geo1 <- finalTest1[gaa,]

# Identify rows with absence of troglofauna
gaaAu <- which(finalTest1$true_troglofauna == 0)
geoAu <- finalTest1[gaaAu,]

# Plot the data
Min_Lg <- min(finalTest1$LONGITUDE) - 0.05
Max_Lg <- max(finalTest1$LONGITUDE) + 0.05
Min_Lt <- min(finalTest1$LATITUDE) - 0.05
Max_Lt <- max(finalTest1$LATITUDE) + 0.05

plot(geo1$LONGITUDE, geo1$LATITUDE, xlim = c(Min_Lg, Max_Lg), ylim = c(Min_Lt, Max_Lt), 
     cex = 2, pch = 20, col = "blue", 
     xlab = 'Longitude\n', 
     ylab = 'Latitude', 
     main = 'Basic Model. Coordinates of troglofauna presence: Observed and Predicted', 
     sub = "\nObserved Presence = blue, Predicted Presence = red, Observed Absence = black")

# Find lines in predictions_3 with presence of troglofauna
gbb <- which(predictions_2 == 1)
geo2 <- finalTest1[gbb,]

# Add predicted presence points to the plot
points(geo2$LONGITUDE, geo2$LATITUDE, pch = 5, col = "red")

# Add absence points to the plot
points(geoAu$LONGITUDE, geoAu$LATITUDE, pch = 4, col = rgb(0, 0, 0, 0.15))

gaa<-which(finalTest1$true_troglofauna==1);gaa
##  [1]  1  2  3  4  6  7  9 10 11 12 16 17 18 19 21 22 23 26 27 29 32 33 34 35 36
## [26] 37 38 40 42 43 45 46 47 50 51 52 53 54 55 57 59 61 65 69 70 71 72 73 76 77
## [51] 78 80
geo1<-finalTest1[gaa,]
summary(geo1)
##      trngcv          bio14             slope            rugg500s     
##  Min.   :8.808   Min.   :0.01801   Min.   : 0.5627   Min.   :0.2331  
##  1st Qu.:9.244   1st Qu.:0.13297   1st Qu.: 2.2721   1st Qu.:1.5072  
##  Median :9.312   Median :0.17503   Median : 3.4064   Median :2.6873  
##  Mean   :9.336   Mean   :0.18598   Mean   : 5.3447   Mean   :3.5658  
##  3rd Qu.:9.424   3rd Qu.:0.24458   3rd Qu.: 8.0816   3rd Qu.:5.4057  
##  Max.   :9.889   Max.   :0.34399   Max.   :15.8156   Max.   :9.9756  
##                                                                      
##     pil_twim        pil_twicv         pil_topocv         pil_slps      
##  Min.   : 5.831   Min.   :0.04324   Min.   :0.00000   Min.   : 0.5535  
##  1st Qu.: 7.543   1st Qu.:0.11117   1st Qu.:0.00000   1st Qu.: 1.5083  
##  Median : 8.304   Median :0.16271   Median :0.01835   Median : 4.3828  
##  Mean   : 8.594   Mean   :0.16939   Mean   :0.06513   Mean   : 5.1668  
##  3rd Qu.: 9.460   3rd Qu.:0.23025   3rd Qu.:0.12912   3rd Qu.: 8.0398  
##  Max.   :11.582   Max.   :0.30956   Max.   :0.20161   Max.   :12.7276  
##                                                                        
##    pil_slpcv        pil_elr3cv          mrvbf            mrrtf       
##  Min.   :0.2188   Min.   :0.04501   Min.   :0.0000   Min.   :0.0000  
##  1st Qu.:0.3770   1st Qu.:0.12937   1st Qu.:0.0000   1st Qu.:0.0000  
##  Median :0.4892   Median :0.20425   Median :0.0000   Median :0.0000  
##  Mean   :0.5491   Mean   :0.23279   Mean   :0.2087   Mean   :0.0733  
##  3rd Qu.:0.6604   3rd Qu.:0.31706   3rd Qu.:0.0000   3rd Qu.:0.0000  
##  Max.   :1.6098   Max.   :0.56449   Max.   :1.8239   Max.   :1.8897  
##                                                                      
##     minfertf        lf7rup         hstructn      geolrngaggn      
##  Min.   :2.00   Min.   :1.000   Min.   :0.000   Min.   :  0.0115  
##  1st Qu.:2.00   1st Qu.:2.000   1st Qu.:1.043   1st Qu.:  2.5880  
##  Median :2.00   Median :3.000   Median :1.043   Median :  2.5880  
##  Mean   :2.25   Mean   :2.808   Mean   :1.515   Mean   :175.7587  
##  3rd Qu.:2.25   3rd Qu.:4.000   3rd Qu.:1.043   3rd Qu.:300.0000  
##  Max.   :3.00   Max.   :6.000   Max.   :5.143   Max.   :900.0000  
##                                                                   
##    elevationm       wr_unrn        solpawhcn        slopern       
##  Min.   :518.3   Min.   : 0.00   Min.   : 96.0   Min.   :0.00179  
##  1st Qu.:561.1   1st Qu.:17.86   1st Qu.: 96.0   1st Qu.:0.01874  
##  Median :570.5   Median :17.86   Median : 96.0   Median :0.04360  
##  Mean   :585.6   Mean   :15.43   Mean   :101.2   Mean   :0.09140  
##  3rd Qu.:611.1   3rd Qu.:17.86   3rd Qu.: 96.0   3rd Qu.:0.09728  
##  Max.   :703.9   Max.   :17.86   Max.   :157.0   Max.   :0.70049  
##                                                                   
##    MIN_AGE_MA      HubDist        month_collection
##  Min.   :2454   Min.   : 3.218   May      : 8     
##  1st Qu.:2454   1st Qu.:16.263   September: 8     
##  Median :2454   Median :26.703   March    : 7     
##  Mean   :2487   Mean   :43.441   June     : 6     
##  3rd Qu.:2494   3rd Qu.:76.671   November : 6     
##  Max.   :2597   Max.   :86.707   January  : 5     
##                                  (Other)  :12     
##                                          UNITNAME 
##  Brockman Iron Formation                     :32  
##  Marra Mamba Iron Formation                  : 9  
##  Mount McRae Shale and Mount Sylvia Formation:11  
##  Wittenoom Formation                         : 0  
##                                                   
##                                                   
##                                                   
##                                      ROCKTYPE1 
##  sedimentary carbonate                    : 0  
##  sedimentary other chemical or biochemical:41  
##  sedimentary siliciclastic                :11  
##                                                
##                                                
##                                                
##                                                
##                                         FORMATION 
##  Brockman Iron Formation                     :32  
##  Marra Mamba Iron Formation                  : 9  
##  Mount McRae Shale and Mount Sylvia Formation:11  
##  Wittenoom Formation                         : 0  
##                                                   
##                                                   
##                                                   
##                                                         HubName  
##  anticline, exposed                                         :23  
##  exposed                                                    : 6  
##  strike-slip, exposed, showing relative dextral displacement: 1  
##  syncline, exposed                                          :22  
##                                                                  
##                                                                  
##                                                                  
##  true_troglofauna    LATITUDE        LONGITUDE    
##  0: 0             Min.   :-22.61   Min.   :117.2  
##  1:52             1st Qu.:-22.23   1st Qu.:117.5  
##                   Median :-22.16   Median :117.8  
##                   Mean   :-22.23   Mean   :117.7  
##                   3rd Qu.:-22.12   3rd Qu.:117.9  
##                   Max.   :-22.09   Max.   :118.0  
## 
gaaAu<-which(finalTest1$true_troglofauna==0);gaaAu
##  [1]  5  8 13 14 15 20 24 25 28 30 31 39 41 44 48 49 56 58 60 62 63 64 66 67 68
## [26] 74 75 79 81
geoAu<-finalTest1[gaaAu,]
summary(geoAu)
##      trngcv          bio14             slope            rugg500s     
##  Min.   :8.894   Min.   :0.02573   Min.   : 0.2666   Min.   :0.0407  
##  1st Qu.:9.244   1st Qu.:0.11380   1st Qu.: 1.4436   1st Qu.:1.2771  
##  Median :9.337   Median :0.14993   Median : 3.0893   Median :2.7764  
##  Mean   :9.335   Mean   :0.16881   Mean   : 4.5309   Mean   :3.0913  
##  3rd Qu.:9.394   3rd Qu.:0.20636   3rd Qu.: 6.2448   3rd Qu.:4.1029  
##  Max.   :9.887   Max.   :0.42288   Max.   :14.3589   Max.   :8.0468  
##                                                                      
##     pil_twim        pil_twicv         pil_topocv         pil_slps      
##  Min.   : 5.749   Min.   :0.04954   Min.   :0.00000   Min.   : 0.1344  
##  1st Qu.: 7.956   1st Qu.:0.12372   1st Qu.:0.00000   1st Qu.: 0.8647  
##  Median : 9.267   Median :0.21281   Median :0.02834   Median : 3.2229  
##  Mean   : 9.061   Mean   :0.18747   Mean   :0.07876   Mean   : 3.5669  
##  3rd Qu.:10.169   3rd Qu.:0.25139   3rd Qu.:0.14605   3rd Qu.: 5.3619  
##  Max.   :11.442   Max.   :0.36698   Max.   :0.32611   Max.   :10.9676  
##                                                                        
##    pil_slpcv        pil_elr3cv         mrvbf            mrrtf       
##  Min.   :0.1797   Min.   :0.0359   Min.   :0.0000   Min.   :0.0000  
##  1st Qu.:0.3397   1st Qu.:0.1528   1st Qu.:0.0000   1st Qu.:0.0000  
##  Median :0.4196   Median :0.2261   Median :0.0000   Median :0.0000  
##  Mean   :0.4479   Mean   :0.2242   Mean   :0.2452   Mean   :0.1054  
##  3rd Qu.:0.5034   3rd Qu.:0.2841   3rd Qu.:0.0000   3rd Qu.:0.0000  
##  Max.   :0.7925   Max.   :0.4060   Max.   :2.8219   Max.   :0.6614  
##                                                                     
##     minfertf         lf7rup         hstructn       geolrngaggn      
##  Min.   :2.000   Min.   :2.000   Min.   : 0.000   Min.   :  0.0115  
##  1st Qu.:2.000   1st Qu.:3.000   1st Qu.: 1.043   1st Qu.:  2.5880  
##  Median :2.000   Median :3.000   Median : 1.043   Median :  2.5880  
##  Mean   :2.207   Mean   :3.621   Mean   : 1.572   Mean   :147.1018  
##  3rd Qu.:2.000   3rd Qu.:4.000   3rd Qu.: 1.043   3rd Qu.:  2.5880  
##  Max.   :3.000   Max.   :7.000   Max.   :15.429   Max.   :900.0000  
##                                                                     
##    elevationm       wr_unrn        solpawhcn        slopern       
##  Min.   :515.2   Min.   : 0.00   Min.   : 96.0   Min.   :0.00341  
##  1st Qu.:536.0   1st Qu.:17.86   1st Qu.: 96.0   1st Qu.:0.02670  
##  Median :568.4   Median :17.86   Median : 96.0   Median :0.03745  
##  Mean   :577.2   Mean   :15.41   Mean   :103.7   Mean   :0.52368  
##  3rd Qu.:601.6   3rd Qu.:17.86   3rd Qu.: 96.0   3rd Qu.:0.12267  
##  Max.   :726.1   Max.   :17.86   Max.   :157.0   Max.   :4.28009  
##                                                                   
##    MIN_AGE_MA      HubDist        month_collection
##  Min.   :2454   Min.   : 1.931   December :8      
##  1st Qu.:2454   1st Qu.:17.066   January  :6      
##  Median :2454   Median :21.429   September:5      
##  Mean   :2472   Mean   :34.474   April    :4      
##  3rd Qu.:2494   3rd Qu.:54.509   March    :2      
##  Max.   :2597   Max.   :88.221   February :1      
##                                  (Other)  :3      
##                                          UNITNAME 
##  Brockman Iron Formation                     :19  
##  Marra Mamba Iron Formation                  : 1  
##  Mount McRae Shale and Mount Sylvia Formation: 8  
##  Wittenoom Formation                         : 1  
##                                                   
##                                                   
##                                                   
##                                      ROCKTYPE1 
##  sedimentary carbonate                    : 1  
##  sedimentary other chemical or biochemical:20  
##  sedimentary siliciclastic                : 8  
##                                                
##                                                
##                                                
##                                                
##                                         FORMATION 
##  Brockman Iron Formation                     :19  
##  Marra Mamba Iron Formation                  : 1  
##  Mount McRae Shale and Mount Sylvia Formation: 8  
##  Wittenoom Formation                         : 1  
##                                                   
##                                                   
##                                                   
##                                                         HubName  
##  anticline, exposed                                         :20  
##  exposed                                                    : 0  
##  strike-slip, exposed, showing relative dextral displacement: 1  
##  syncline, exposed                                          : 8  
##                                                                  
##                                                                  
##                                                                  
##  true_troglofauna    LATITUDE        LONGITUDE    
##  0:29             Min.   :-22.64   Min.   :117.1  
##  1: 0             1st Qu.:-22.21   1st Qu.:117.7  
##                   Median :-22.15   Median :117.8  
##                   Mean   :-22.19   Mean   :117.7  
##                   3rd Qu.:-22.12   3rd Qu.:117.9  
##                   Max.   :-22.11   Max.   :118.0  
## 
Min_Lg<-min(finalTest1$LONGITUDE)-0.05
Max_Lg<-max(finalTest1$LONGITUDE)+0.05


Min_Lt<-min(finalTest1$LATITUDE)-0.05
Max_Lt<-max(finalTest1$LATITUDE)+0.05

plot(geo1$LONGITUDE,geo1$LATITUDE,xlim = c(Min_Lg,Max_Lg),ylim = c(Min_Lt,Max_Lt), cex=2, pch=20, col="blue", xlab='Longitude\n', ylab='Latitude', main = 'Basic Model. Coordinates of troglofauna presence: Observed and Predicted', 
     sub = "\nObserved Presence = blue, Predicted Presence = red, Observed Absence = black")


gbb<-which(predictions_2==1);gbb
##  [1]  1  2  3  4  6  7  8 10 11 12 13 15 16 17 18 19 20 21 22 23 25 26 27 28 29
## [26] 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 49 50 51 52 53 54 55 56
## [51] 57 58 59 60 61 65 66 67 68 69 70 71 72 73 75 76 77 78 79 80 81
geo2<-finalTest1[gbb,]
summary(geo2)
##      trngcv          bio14             slope            rugg500s     
##  Min.   :8.808   Min.   :0.01801   Min.   : 0.2666   Min.   :0.1508  
##  1st Qu.:9.243   1st Qu.:0.11960   1st Qu.: 2.1535   1st Qu.:1.5066  
##  Median :9.325   Median :0.17200   Median : 3.3626   Median :2.7764  
##  Mean   :9.344   Mean   :0.18270   Mean   : 5.1586   Mean   :3.4517  
##  3rd Qu.:9.417   3rd Qu.:0.23916   3rd Qu.: 7.5055   3rd Qu.:5.2003  
##  Max.   :9.889   Max.   :0.42288   Max.   :15.8156   Max.   :9.9756  
##                                                                      
##     pil_twim        pil_twicv         pil_topocv         pil_slps      
##  Min.   : 5.749   Min.   :0.04324   Min.   :0.00000   Min.   : 0.4874  
##  1st Qu.: 7.693   1st Qu.:0.11822   1st Qu.:0.00000   1st Qu.: 1.4048  
##  Median : 8.453   Median :0.16354   Median :0.02168   Median : 3.9729  
##  Mean   : 8.645   Mean   :0.17513   Mean   :0.06882   Mean   : 4.7409  
##  3rd Qu.: 9.662   3rd Qu.:0.23356   3rd Qu.:0.13549   3rd Qu.: 7.0271  
##  Max.   :11.582   Max.   :0.36698   Max.   :0.32611   Max.   :12.7276  
##                                                                        
##    pil_slpcv        pil_elr3cv         mrvbf           mrrtf        
##  Min.   :0.2188   Min.   :0.0359   Min.   :0.000   Min.   :0.00000  
##  1st Qu.:0.3624   1st Qu.:0.1301   1st Qu.:0.000   1st Qu.:0.00000  
##  Median :0.4713   Median :0.2056   Median :0.000   Median :0.00000  
##  Mean   :0.5201   Mean   :0.2294   Mean   :0.204   Mean   :0.07929  
##  3rd Qu.:0.6220   3rd Qu.:0.3147   3rd Qu.:0.000   3rd Qu.:0.00000  
##  Max.   :1.6098   Max.   :0.5645   Max.   :2.806   Max.   :1.88966  
##                                                                     
##     minfertf         lf7rup         hstructn      geolrngaggn      
##  Min.   :2.000   Min.   :1.000   Min.   :0.000   Min.   :  0.0115  
##  1st Qu.:2.000   1st Qu.:2.000   1st Qu.:1.043   1st Qu.:  2.5880  
##  Median :2.000   Median :3.000   Median :1.043   Median :  2.5880  
##  Mean   :2.239   Mean   :2.986   Mean   :1.431   Mean   :175.8770  
##  3rd Qu.:2.000   3rd Qu.:4.000   3rd Qu.:1.043   3rd Qu.:300.0000  
##  Max.   :3.000   Max.   :7.000   Max.   :5.143   Max.   :900.0000  
##                                                                    
##    elevationm       wr_unrn        solpawhcn        slopern       
##  Min.   :515.2   Min.   : 0.00   Min.   : 96.0   Min.   :0.00179  
##  1st Qu.:560.1   1st Qu.:17.86   1st Qu.: 96.0   1st Qu.:0.01985  
##  Median :570.5   Median :17.86   Median : 96.0   Median :0.04257  
##  Mean   :584.8   Mean   :15.71   Mean   :100.7   Mean   :0.15044  
##  3rd Qu.:613.0   3rd Qu.:17.86   3rd Qu.: 96.0   3rd Qu.:0.09939  
##  Max.   :726.1   Max.   :17.86   Max.   :157.0   Max.   :4.24974  
##                                                                   
##    MIN_AGE_MA      HubDist        month_collection
##  Min.   :2454   Min.   : 2.183   September:10     
##  1st Qu.:2454   1st Qu.:16.549   December :10     
##  Median :2454   Median :22.459   January  : 9     
##  Mean   :2482   Mean   :42.584   March    : 9     
##  3rd Qu.:2494   3rd Qu.:76.502   May      : 9     
##  Max.   :2597   Max.   :88.221   June     : 6     
##                                  (Other)  :18     
##                                          UNITNAME 
##  Brockman Iron Formation                     :45  
##  Marra Mamba Iron Formation                  : 9  
##  Mount McRae Shale and Mount Sylvia Formation:16  
##  Wittenoom Formation                         : 1  
##                                                   
##                                                   
##                                                   
##                                      ROCKTYPE1 
##  sedimentary carbonate                    : 1  
##  sedimentary other chemical or biochemical:54  
##  sedimentary siliciclastic                :16  
##                                                
##                                                
##                                                
##                                                
##                                         FORMATION 
##  Brockman Iron Formation                     :45  
##  Marra Mamba Iron Formation                  : 9  
##  Mount McRae Shale and Mount Sylvia Formation:16  
##  Wittenoom Formation                         : 1  
##                                                   
##                                                   
##                                                   
##                                                         HubName  
##  anticline, exposed                                         :34  
##  exposed                                                    : 6  
##  strike-slip, exposed, showing relative dextral displacement: 2  
##  syncline, exposed                                          :29  
##                                                                  
##                                                                  
##                                                                  
##  true_troglofauna    LATITUDE        LONGITUDE    
##  0:20             Min.   :-22.64   Min.   :117.1  
##  1:51             1st Qu.:-22.22   1st Qu.:117.7  
##                   Median :-22.16   Median :117.8  
##                   Mean   :-22.22   Mean   :117.7  
##                   3rd Qu.:-22.12   3rd Qu.:117.9  
##                   Max.   :-22.09   Max.   :118.0  
## 
points(geo2$LONGITUDE,geo2$LATITUDE,xlim = c(Min_Lg,Max_Lg),ylim = c(Min_Lt,Max_Lt),pch=5, col="red")


points(geoAu$LONGITUDE,geoAu$LATITUDE,xlim = c(115,122),ylim = c(-26,-21),pch = 4, col = rgb(0, 0, 0, 0.15))

Closest Top-left Threshold Method

gaa<-which(finalTest1$true_troglofauna==1);gaa
##  [1]  1  2  3  4  6  7  9 10 11 12 16 17 18 19 21 22 23 26 27 29 32 33 34 35 36
## [26] 37 38 40 42 43 45 46 47 50 51 52 53 54 55 57 59 61 65 69 70 71 72 73 76 77
## [51] 78 80
geo1<-finalTest1[gaa,]
summary(geo1)
##      trngcv          bio14             slope            rugg500s     
##  Min.   :8.808   Min.   :0.01801   Min.   : 0.5627   Min.   :0.2331  
##  1st Qu.:9.244   1st Qu.:0.13297   1st Qu.: 2.2721   1st Qu.:1.5072  
##  Median :9.312   Median :0.17503   Median : 3.4064   Median :2.6873  
##  Mean   :9.336   Mean   :0.18598   Mean   : 5.3447   Mean   :3.5658  
##  3rd Qu.:9.424   3rd Qu.:0.24458   3rd Qu.: 8.0816   3rd Qu.:5.4057  
##  Max.   :9.889   Max.   :0.34399   Max.   :15.8156   Max.   :9.9756  
##                                                                      
##     pil_twim        pil_twicv         pil_topocv         pil_slps      
##  Min.   : 5.831   Min.   :0.04324   Min.   :0.00000   Min.   : 0.5535  
##  1st Qu.: 7.543   1st Qu.:0.11117   1st Qu.:0.00000   1st Qu.: 1.5083  
##  Median : 8.304   Median :0.16271   Median :0.01835   Median : 4.3828  
##  Mean   : 8.594   Mean   :0.16939   Mean   :0.06513   Mean   : 5.1668  
##  3rd Qu.: 9.460   3rd Qu.:0.23025   3rd Qu.:0.12912   3rd Qu.: 8.0398  
##  Max.   :11.582   Max.   :0.30956   Max.   :0.20161   Max.   :12.7276  
##                                                                        
##    pil_slpcv        pil_elr3cv          mrvbf            mrrtf       
##  Min.   :0.2188   Min.   :0.04501   Min.   :0.0000   Min.   :0.0000  
##  1st Qu.:0.3770   1st Qu.:0.12937   1st Qu.:0.0000   1st Qu.:0.0000  
##  Median :0.4892   Median :0.20425   Median :0.0000   Median :0.0000  
##  Mean   :0.5491   Mean   :0.23279   Mean   :0.2087   Mean   :0.0733  
##  3rd Qu.:0.6604   3rd Qu.:0.31706   3rd Qu.:0.0000   3rd Qu.:0.0000  
##  Max.   :1.6098   Max.   :0.56449   Max.   :1.8239   Max.   :1.8897  
##                                                                      
##     minfertf        lf7rup         hstructn      geolrngaggn      
##  Min.   :2.00   Min.   :1.000   Min.   :0.000   Min.   :  0.0115  
##  1st Qu.:2.00   1st Qu.:2.000   1st Qu.:1.043   1st Qu.:  2.5880  
##  Median :2.00   Median :3.000   Median :1.043   Median :  2.5880  
##  Mean   :2.25   Mean   :2.808   Mean   :1.515   Mean   :175.7587  
##  3rd Qu.:2.25   3rd Qu.:4.000   3rd Qu.:1.043   3rd Qu.:300.0000  
##  Max.   :3.00   Max.   :6.000   Max.   :5.143   Max.   :900.0000  
##                                                                   
##    elevationm       wr_unrn        solpawhcn        slopern       
##  Min.   :518.3   Min.   : 0.00   Min.   : 96.0   Min.   :0.00179  
##  1st Qu.:561.1   1st Qu.:17.86   1st Qu.: 96.0   1st Qu.:0.01874  
##  Median :570.5   Median :17.86   Median : 96.0   Median :0.04360  
##  Mean   :585.6   Mean   :15.43   Mean   :101.2   Mean   :0.09140  
##  3rd Qu.:611.1   3rd Qu.:17.86   3rd Qu.: 96.0   3rd Qu.:0.09728  
##  Max.   :703.9   Max.   :17.86   Max.   :157.0   Max.   :0.70049  
##                                                                   
##    MIN_AGE_MA      HubDist        month_collection
##  Min.   :2454   Min.   : 3.218   May      : 8     
##  1st Qu.:2454   1st Qu.:16.263   September: 8     
##  Median :2454   Median :26.703   March    : 7     
##  Mean   :2487   Mean   :43.441   June     : 6     
##  3rd Qu.:2494   3rd Qu.:76.671   November : 6     
##  Max.   :2597   Max.   :86.707   January  : 5     
##                                  (Other)  :12     
##                                          UNITNAME 
##  Brockman Iron Formation                     :32  
##  Marra Mamba Iron Formation                  : 9  
##  Mount McRae Shale and Mount Sylvia Formation:11  
##  Wittenoom Formation                         : 0  
##                                                   
##                                                   
##                                                   
##                                      ROCKTYPE1 
##  sedimentary carbonate                    : 0  
##  sedimentary other chemical or biochemical:41  
##  sedimentary siliciclastic                :11  
##                                                
##                                                
##                                                
##                                                
##                                         FORMATION 
##  Brockman Iron Formation                     :32  
##  Marra Mamba Iron Formation                  : 9  
##  Mount McRae Shale and Mount Sylvia Formation:11  
##  Wittenoom Formation                         : 0  
##                                                   
##                                                   
##                                                   
##                                                         HubName  
##  anticline, exposed                                         :23  
##  exposed                                                    : 6  
##  strike-slip, exposed, showing relative dextral displacement: 1  
##  syncline, exposed                                          :22  
##                                                                  
##                                                                  
##                                                                  
##  true_troglofauna    LATITUDE        LONGITUDE    
##  0: 0             Min.   :-22.61   Min.   :117.2  
##  1:52             1st Qu.:-22.23   1st Qu.:117.5  
##                   Median :-22.16   Median :117.8  
##                   Mean   :-22.23   Mean   :117.7  
##                   3rd Qu.:-22.12   3rd Qu.:117.9  
##                   Max.   :-22.09   Max.   :118.0  
## 
gaaAu<-which(finalTest1$true_troglofauna==0);gaaAu
##  [1]  5  8 13 14 15 20 24 25 28 30 31 39 41 44 48 49 56 58 60 62 63 64 66 67 68
## [26] 74 75 79 81
geoAu<-finalTest1[gaaAu,]
summary(geoAu)
##      trngcv          bio14             slope            rugg500s     
##  Min.   :8.894   Min.   :0.02573   Min.   : 0.2666   Min.   :0.0407  
##  1st Qu.:9.244   1st Qu.:0.11380   1st Qu.: 1.4436   1st Qu.:1.2771  
##  Median :9.337   Median :0.14993   Median : 3.0893   Median :2.7764  
##  Mean   :9.335   Mean   :0.16881   Mean   : 4.5309   Mean   :3.0913  
##  3rd Qu.:9.394   3rd Qu.:0.20636   3rd Qu.: 6.2448   3rd Qu.:4.1029  
##  Max.   :9.887   Max.   :0.42288   Max.   :14.3589   Max.   :8.0468  
##                                                                      
##     pil_twim        pil_twicv         pil_topocv         pil_slps      
##  Min.   : 5.749   Min.   :0.04954   Min.   :0.00000   Min.   : 0.1344  
##  1st Qu.: 7.956   1st Qu.:0.12372   1st Qu.:0.00000   1st Qu.: 0.8647  
##  Median : 9.267   Median :0.21281   Median :0.02834   Median : 3.2229  
##  Mean   : 9.061   Mean   :0.18747   Mean   :0.07876   Mean   : 3.5669  
##  3rd Qu.:10.169   3rd Qu.:0.25139   3rd Qu.:0.14605   3rd Qu.: 5.3619  
##  Max.   :11.442   Max.   :0.36698   Max.   :0.32611   Max.   :10.9676  
##                                                                        
##    pil_slpcv        pil_elr3cv         mrvbf            mrrtf       
##  Min.   :0.1797   Min.   :0.0359   Min.   :0.0000   Min.   :0.0000  
##  1st Qu.:0.3397   1st Qu.:0.1528   1st Qu.:0.0000   1st Qu.:0.0000  
##  Median :0.4196   Median :0.2261   Median :0.0000   Median :0.0000  
##  Mean   :0.4479   Mean   :0.2242   Mean   :0.2452   Mean   :0.1054  
##  3rd Qu.:0.5034   3rd Qu.:0.2841   3rd Qu.:0.0000   3rd Qu.:0.0000  
##  Max.   :0.7925   Max.   :0.4060   Max.   :2.8219   Max.   :0.6614  
##                                                                     
##     minfertf         lf7rup         hstructn       geolrngaggn      
##  Min.   :2.000   Min.   :2.000   Min.   : 0.000   Min.   :  0.0115  
##  1st Qu.:2.000   1st Qu.:3.000   1st Qu.: 1.043   1st Qu.:  2.5880  
##  Median :2.000   Median :3.000   Median : 1.043   Median :  2.5880  
##  Mean   :2.207   Mean   :3.621   Mean   : 1.572   Mean   :147.1018  
##  3rd Qu.:2.000   3rd Qu.:4.000   3rd Qu.: 1.043   3rd Qu.:  2.5880  
##  Max.   :3.000   Max.   :7.000   Max.   :15.429   Max.   :900.0000  
##                                                                     
##    elevationm       wr_unrn        solpawhcn        slopern       
##  Min.   :515.2   Min.   : 0.00   Min.   : 96.0   Min.   :0.00341  
##  1st Qu.:536.0   1st Qu.:17.86   1st Qu.: 96.0   1st Qu.:0.02670  
##  Median :568.4   Median :17.86   Median : 96.0   Median :0.03745  
##  Mean   :577.2   Mean   :15.41   Mean   :103.7   Mean   :0.52368  
##  3rd Qu.:601.6   3rd Qu.:17.86   3rd Qu.: 96.0   3rd Qu.:0.12267  
##  Max.   :726.1   Max.   :17.86   Max.   :157.0   Max.   :4.28009  
##                                                                   
##    MIN_AGE_MA      HubDist        month_collection
##  Min.   :2454   Min.   : 1.931   December :8      
##  1st Qu.:2454   1st Qu.:17.066   January  :6      
##  Median :2454   Median :21.429   September:5      
##  Mean   :2472   Mean   :34.474   April    :4      
##  3rd Qu.:2494   3rd Qu.:54.509   March    :2      
##  Max.   :2597   Max.   :88.221   February :1      
##                                  (Other)  :3      
##                                          UNITNAME 
##  Brockman Iron Formation                     :19  
##  Marra Mamba Iron Formation                  : 1  
##  Mount McRae Shale and Mount Sylvia Formation: 8  
##  Wittenoom Formation                         : 1  
##                                                   
##                                                   
##                                                   
##                                      ROCKTYPE1 
##  sedimentary carbonate                    : 1  
##  sedimentary other chemical or biochemical:20  
##  sedimentary siliciclastic                : 8  
##                                                
##                                                
##                                                
##                                                
##                                         FORMATION 
##  Brockman Iron Formation                     :19  
##  Marra Mamba Iron Formation                  : 1  
##  Mount McRae Shale and Mount Sylvia Formation: 8  
##  Wittenoom Formation                         : 1  
##                                                   
##                                                   
##                                                   
##                                                         HubName  
##  anticline, exposed                                         :20  
##  exposed                                                    : 0  
##  strike-slip, exposed, showing relative dextral displacement: 1  
##  syncline, exposed                                          : 8  
##                                                                  
##                                                                  
##                                                                  
##  true_troglofauna    LATITUDE        LONGITUDE    
##  0:29             Min.   :-22.64   Min.   :117.1  
##  1: 0             1st Qu.:-22.21   1st Qu.:117.7  
##                   Median :-22.15   Median :117.8  
##                   Mean   :-22.19   Mean   :117.7  
##                   3rd Qu.:-22.12   3rd Qu.:117.9  
##                   Max.   :-22.11   Max.   :118.0  
## 
plot(geo1$LONGITUDE,geo1$LATITUDE,xlim = c(Min_Lg,Max_Lg),ylim = c(Min_Lt,Max_Lt), cex=2, pch=20, col="blue", xlab='Longitude\n', ylab='Latitude', main = 'Basic Model. Coordinates of troglofauna presence: Observed and Predicted', 
     sub = "\nObserved Presence = blue, Predicted Presence = red, Observed Absence = black")


gbb<-which(Test_41==1);gbb
##  [1]  1  2  3  4  6  7 11 12 15 16 17 18 19 21 23 25 26 27 28 32 33 34 35 36 37
## [26] 38 40 42 43 45 47 49 50 51 52 53 54 55 57 59 61 65 66 69 70 71 72 73 75 76
## [51] 78 80 81
geo2<-finalTest1[gbb,]
summary(geo2)
##      trngcv          bio14             slope            rugg500s     
##  Min.   :8.808   Min.   :0.01801   Min.   : 0.6517   Min.   :0.2331  
##  1st Qu.:9.246   1st Qu.:0.11871   1st Qu.: 2.3146   1st Qu.:1.5054  
##  Median :9.325   Median :0.17200   Median : 3.4502   Median :2.7764  
##  Mean   :9.347   Mean   :0.18559   Mean   : 5.1285   Mean   :3.4713  
##  3rd Qu.:9.422   3rd Qu.:0.25342   3rd Qu.: 7.4120   3rd Qu.:5.3713  
##  Max.   :9.889   Max.   :0.42288   Max.   :15.8156   Max.   :9.5490  
##                                                                      
##     pil_twim        pil_twicv         pil_topocv         pil_slps      
##  Min.   : 5.749   Min.   :0.04324   Min.   :0.00000   Min.   : 0.6456  
##  1st Qu.: 7.552   1st Qu.:0.11417   1st Qu.:0.00000   1st Qu.: 1.5126  
##  Median : 8.184   Median :0.16848   Median :0.02834   Median : 3.7289  
##  Mean   : 8.520   Mean   :0.17198   Mean   :0.06683   Mean   : 5.0224  
##  3rd Qu.: 9.425   3rd Qu.:0.23025   3rd Qu.:0.12639   3rd Qu.: 7.5190  
##  Max.   :11.582   Max.   :0.30956   Max.   :0.20161   Max.   :12.7276  
##                                                                        
##    pil_slpcv        pil_elr3cv          mrvbf            mrrtf        
##  Min.   :0.2188   Min.   :0.04501   Min.   :0.0000   Min.   :0.00000  
##  1st Qu.:0.3607   1st Qu.:0.12910   1st Qu.:0.0000   1st Qu.:0.00000  
##  Median :0.4713   Median :0.20074   Median :0.0000   Median :0.00000  
##  Mean   :0.5312   Mean   :0.22534   Mean   :0.1595   Mean   :0.07192  
##  3rd Qu.:0.6235   3rd Qu.:0.29237   3rd Qu.:0.0000   3rd Qu.:0.00000  
##  Max.   :1.6098   Max.   :0.56449   Max.   :1.7200   Max.   :1.88966  
##                                                                       
##     minfertf         lf7rup         hstructn      geolrngaggn      
##  Min.   :2.000   Min.   :1.000   Min.   :0.000   Min.   :  0.0115  
##  1st Qu.:2.000   1st Qu.:2.000   1st Qu.:1.043   1st Qu.:  2.5880  
##  Median :2.000   Median :3.000   Median :1.043   Median :  2.5880  
##  Mean   :2.245   Mean   :2.755   Mean   :1.622   Mean   :178.5159  
##  3rd Qu.:2.000   3rd Qu.:4.000   3rd Qu.:1.043   3rd Qu.:300.0000  
##  Max.   :3.000   Max.   :6.000   Max.   :5.143   Max.   :900.0000  
##                                                                    
##    elevationm       wr_unrn        solpawhcn         slopern       
##  Min.   :518.3   Min.   : 0.00   Min.   : 96.00   Min.   :0.00179  
##  1st Qu.:561.2   1st Qu.:17.86   1st Qu.: 96.00   1st Qu.:0.01875  
##  Median :568.4   Median :17.86   Median : 96.00   Median :0.03679  
##  Mean   :586.3   Mean   :16.00   Mean   : 98.91   Mean   :0.07684  
##  3rd Qu.:610.9   3rd Qu.:17.86   3rd Qu.: 96.00   3rd Qu.:0.09517  
##  Max.   :726.1   Max.   :17.86   Max.   :157.00   Max.   :0.51337  
##                                                                    
##    MIN_AGE_MA      HubDist        month_collection
##  Min.   :2454   Min.   : 3.218   March    : 8     
##  1st Qu.:2454   1st Qu.:16.501   May      : 8     
##  Median :2454   Median :34.110   September: 7     
##  Mean   :2489   Mean   :46.053   June     : 6     
##  3rd Qu.:2494   3rd Qu.:76.527   November : 6     
##  Max.   :2597   Max.   :84.554   January  : 5     
##                                  (Other)  :13     
##                                          UNITNAME 
##  Brockman Iron Formation                     :30  
##  Marra Mamba Iron Formation                  : 9  
##  Mount McRae Shale and Mount Sylvia Formation:14  
##  Wittenoom Formation                         : 0  
##                                                   
##                                                   
##                                                   
##                                      ROCKTYPE1 
##  sedimentary carbonate                    : 0  
##  sedimentary other chemical or biochemical:39  
##  sedimentary siliciclastic                :14  
##                                                
##                                                
##                                                
##                                                
##                                         FORMATION 
##  Brockman Iron Formation                     :30  
##  Marra Mamba Iron Formation                  : 9  
##  Mount McRae Shale and Mount Sylvia Formation:14  
##  Wittenoom Formation                         : 0  
##                                                   
##                                                   
##                                                   
##                                                         HubName  
##  anticline, exposed                                         :21  
##  exposed                                                    : 6  
##  strike-slip, exposed, showing relative dextral displacement: 2  
##  syncline, exposed                                          :24  
##                                                                  
##                                                                  
##                                                                  
##  true_troglofauna    LATITUDE        LONGITUDE    
##  0: 7             Min.   :-22.64   Min.   :117.1  
##  1:46             1st Qu.:-22.23   1st Qu.:117.6  
##                   Median :-22.16   Median :117.8  
##                   Mean   :-22.23   Mean   :117.7  
##                   3rd Qu.:-22.12   3rd Qu.:117.9  
##                   Max.   :-22.09   Max.   :118.0  
## 
points(geo2$LONGITUDE,geo2$LATITUDE,xlim = c(Min_Lg,Max_Lg),ylim = c(Min_Lt,Max_Lt),pch=5, col="red")


points(geoAu$LONGITUDE,geoAu$LATITUDE,xlim = c(115,122),ylim = c(-26,-21),pch = 4, col = rgb(0, 0, 0, 0.15))

write.csv(geo2, file = "Area1_predicted_presence_points_troglofauna_coordinates.csv", row.names = FALSE)