library(dplyr)       
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(ggplot2)     
library(caret)       
## Loading required package: lattice
library(randomForest)#
## randomForest 4.7-1.1
## Type rfNews() to see new features/changes/bug fixes.
## 
## Attaching package: 'randomForest'
## The following object is masked from 'package:ggplot2':
## 
##     margin
## The following object is masked from 'package:dplyr':
## 
##     combine
library(pROC)
## Type 'citation("pROC")' for a citation.
## 
## Attaching package: 'pROC'
## The following objects are masked from 'package:stats':
## 
##     cov, smooth, var
library(corrplot)
## corrplot 0.92 loaded
library(mltools)

library(rockchalk)
## 
## Attaching package: 'rockchalk'
## The following object is masked from 'package:mltools':
## 
##     skewness
## The following object is masked from 'package:dplyr':
## 
##     summarize
data2 <- read.csv("Area_02_Filtered_.csv", sep=",", header= TRUE)

summary(data2)
##     srain2mp          bio21           bio16           slope         
##  Min.   :0.1333   Min.   :29.62   Min.   :195.1   Min.   : 0.08862  
##  1st Qu.:0.1439   1st Qu.:29.74   1st Qu.:221.5   1st Qu.: 1.36495  
##  Median :0.1481   Median :29.76   Median :224.9   Median : 2.67317  
##  Mean   :0.1485   Mean   :29.75   Mean   :225.8   Mean   : 3.46866  
##  3rd Qu.:0.1520   3rd Qu.:29.78   3rd Qu.:229.6   3rd Qu.: 4.63968  
##  Max.   :0.1690   Max.   :29.84   Max.   :249.6   Max.   :16.25081  
##     rugg500s         rugg500cv         pil_twicv         pil_topos    
##  Min.   :0.00849   Min.   :0.00854   Min.   :0.02523   Min.   : 0.00  
##  1st Qu.:0.69745   1st Qu.:0.06169   1st Qu.:0.08197   1st Qu.: 0.00  
##  Median :1.38321   Median :0.10038   Median :0.11972   Median : 0.00  
##  Mean   :1.96571   Mean   :0.12201   Mean   :0.13704   Mean   : 1.58  
##  3rd Qu.:2.85446   3rd Qu.:0.16462   3rd Qu.:0.18162   3rd Qu.: 3.29  
##  Max.   :9.29160   Max.   :0.59943   Max.   :0.45404   Max.   :15.55  
##     pil_slps          pil_slpcv        nutrientsn        mrvbf       
##  Min.   : 0.04903   Min.   :0.1652   Min.   :1.000   Min.   :0.0000  
##  1st Qu.: 1.21168   1st Qu.:0.3648   1st Qu.:1.300   1st Qu.:0.0000  
##  Median : 3.20706   Median :0.4553   Median :1.300   Median :0.0000  
##  Mean   : 4.27314   Mean   :0.4725   Mean   :1.302   Mean   :0.3611  
##  3rd Qu.: 6.36604   3rd Qu.:0.5608   3rd Qu.:1.300   3rd Qu.:0.0000  
##  Max.   :20.80731   Max.   :1.3978   Max.   :1.600   Max.   :4.7155  
##      mrrtf           minfertf         lf7rup         hstructn      
##  Min.   :0.0000   Min.   :1.000   Min.   :1.000   Min.   : 0.0000  
##  1st Qu.:0.0000   1st Qu.:2.000   1st Qu.:2.000   1st Qu.: 1.0429  
##  Median :0.0000   Median :2.000   Median :3.000   Median : 1.0429  
##  Mean   :0.2375   Mean   :2.034   Mean   :2.889   Mean   : 0.9554  
##  3rd Qu.:0.0000   3rd Qu.:2.000   3rd Qu.:4.000   3rd Qu.: 1.0429  
##  Max.   :3.5547   Max.   :3.000   Max.   :7.000   Max.   :10.0000  
##   geolrngaggn         geolmnaggn         bdensity50n       slopern        
##  Min.   :  0.0115   Min.   :   0.0058   Min.   :1.247   Min.   : 0.00132  
##  1st Qu.: 65.5000   1st Qu.:  32.7500   1st Qu.:1.341   1st Qu.: 0.01630  
##  Median :300.0000   Median :2050.0000   Median :1.367   Median : 0.04050  
##  Mean   :429.7593   Mean   :1554.2860   Mean   :1.351   Mean   : 0.22506  
##  3rd Qu.:900.0000   3rd Qu.:2650.0000   3rd Qu.:1.367   3rd Qu.: 0.14081  
##  Max.   :900.0000   Max.   :2650.0000   Max.   :1.367   Max.   :21.14644  
##     HubDist          MIN_AGE_MA   month_collection     HubName         
##  Min.   : 0.6774   Min.   :2444   Length:1938        Length:1938       
##  1st Qu.: 6.4399   1st Qu.:2454   Class :character   Class :character  
##  Median :21.3848   Median :2454   Mode  :character   Mode  :character  
##  Mean   :31.3849   Mean   :2508                                        
##  3rd Qu.:58.2123   3rd Qu.:2597                                        
##  Max.   :81.3633   Max.   :2629                                        
##    UNITNAME          ROCKTYPE1          FORMATION         true_troglofauna
##  Length:1938        Length:1938        Length:1938        Min.   :0.0000  
##  Class :character   Class :character   Class :character   1st Qu.:0.0000  
##  Mode  :character   Mode  :character   Mode  :character   Median :1.0000  
##                                                           Mean   :0.6192  
##                                                           3rd Qu.:1.0000  
##                                                           Max.   :1.0000  
##     LATITUDE        LONGITUDE    
##  Min.   :-23.34   Min.   :118.6  
##  1st Qu.:-22.98   1st Qu.:118.9  
##  Median :-22.92   Median :119.0  
##  Mean   :-22.89   Mean   :119.0  
##  3rd Qu.:-22.90   3rd Qu.:119.0  
##  Max.   :-22.51   Max.   :119.5
dim(data2)
## [1] 1938   30
sapply(data2,class)
##         srain2mp            bio21            bio16            slope 
##        "numeric"        "numeric"        "numeric"        "numeric" 
##         rugg500s        rugg500cv        pil_twicv        pil_topos 
##        "numeric"        "numeric"        "numeric"        "numeric" 
##         pil_slps        pil_slpcv       nutrientsn            mrvbf 
##        "numeric"        "numeric"        "numeric"        "numeric" 
##            mrrtf         minfertf           lf7rup         hstructn 
##        "numeric"        "integer"        "integer"        "numeric" 
##      geolrngaggn       geolmnaggn      bdensity50n          slopern 
##        "numeric"        "numeric"        "numeric"        "numeric" 
##          HubDist       MIN_AGE_MA month_collection          HubName 
##        "numeric"        "integer"      "character"      "character" 
##         UNITNAME        ROCKTYPE1        FORMATION true_troglofauna 
##      "character"      "character"      "character"        "integer" 
##         LATITUDE        LONGITUDE 
##        "numeric"        "numeric"
#Converting "true_stygofauna", "true_troglofauna" to a factor
data2$true_troglofauna <- factor(data2$true_troglofauna)

#Converting categorical variables to factor

data2$HubName <- factor(data2$HubName)
data2$FORMATION <- factor(data2$FORMATION)
data2$ROCKTYPE1 <- factor(data2$ROCKTYPE1)
data2$UNITNAME <- factor(data2$UNITNAME)
data2$month_collection <- factor(data2$month_collection)

sapply(data2,class)
##         srain2mp            bio21            bio16            slope 
##        "numeric"        "numeric"        "numeric"        "numeric" 
##         rugg500s        rugg500cv        pil_twicv        pil_topos 
##        "numeric"        "numeric"        "numeric"        "numeric" 
##         pil_slps        pil_slpcv       nutrientsn            mrvbf 
##        "numeric"        "numeric"        "numeric"        "numeric" 
##            mrrtf         minfertf           lf7rup         hstructn 
##        "numeric"        "integer"        "integer"        "numeric" 
##      geolrngaggn       geolmnaggn      bdensity50n          slopern 
##        "numeric"        "numeric"        "numeric"        "numeric" 
##          HubDist       MIN_AGE_MA month_collection          HubName 
##        "numeric"        "integer"         "factor"         "factor" 
##         UNITNAME        ROCKTYPE1        FORMATION true_troglofauna 
##         "factor"         "factor"         "factor"         "factor" 
##         LATITUDE        LONGITUDE 
##        "numeric"        "numeric"
summary(data2$month_collection)
##     April    August  December  February   January      July      June     March 
##       297        51        91       213        51        62       226       410 
##       May  November   October September 
##       252       131       103        51
data2$month_collection<-factor(data2$month_collection, levels=c("January", "February", "March","April","May","June","July","August","September","October","November","December"))
summary(data2$month_collection)
##   January  February     March     April       May      June      July    August 
##        51       213       410       297       252       226        62        51 
## September   October  November  December 
##        51       103       131        91
plot(data2$month_collection,main="Amostras Mensais",ylim=c(0,200),ylab="Num. de Amostras")

m1<-which(data2$month_collection == "January");length(m1)
## [1] 51
m2<-which(data2$month_collection == "February");length(m2)
## [1] 213
m3<-which(data2$month_collection == "March");length(m3)
## [1] 410
m4<-which(data2$month_collection == "April");length(m4)
## [1] 297
m5<-which(data2$month_collection == "May");length(m5)
## [1] 252
m6<-which(data2$month_collection == "June");length(m6)
## [1] 226
m7<-which(data2$month_collection == "July");length(m7)
## [1] 62
m8<-which(data2$month_collection == "August");length(m8)
## [1] 51
m9<-which(data2$month_collection == "September");length(m9)
## [1] 51
m10<-which(data2$month_collection == "October");length(m10)
## [1] 103
m11<-which(data2$month_collection == "November");length(m11)
## [1] 131
m12<-which(data2$month_collection == "December");length(m12)
## [1] 91
tm<-c(m1,m2,m3,m4,m5,m6,m7,m8,m9,m10,m11,m12);length(tm)
## [1] 1938
ltm<-c(length(m1),length(m2),length(m3),length(m4),length(m5),length(m6),length(m7),length(m8),length(m9),length(m10),length(m11),length(m12));median(ltm)
## [1] 117
set.seed(78945)
sm1<-sample(m1, 50, replace = FALSE);length(sm1);head(sm1)
## [1] 50
## [1] 1901  171   75  155 1931  160
sm2<-sample(m2, 50, replace = FALSE);length(sm2);head(sm2)
## [1] 50
## [1] 1581 1065 1020 1057 1553 1155
sm3<-sample(m3, 50, replace = FALSE);length(sm3);head(sm3)
## [1] 50
## [1] 1864  454 1846  433  418 1880
sm4<-sample(m4, 50, replace = FALSE);length(sm4);head(sm4)
## [1] 50
## [1] 1304 1607 1601 1254  353 1449
sm5<-sample(m5, 50, replace = FALSE);length(sm5);head(sm5)
## [1] 50
## [1] 1512  258 1659 1627  411  846
sm6<-sample(m6, 50, replace = FALSE);length(sm6);head(sm6)
## [1] 50
## [1] 269 646 716 761 811 934
sm7<-sample(m7, 50, replace = FALSE);length(sm7);head(sm7)
## [1] 50
## [1] 756 388 814 928 372 177
sm10<-sample(m10, 50, replace = FALSE);length(sm10);head(sm10)
## [1] 50
## [1]   43   46   41 1655 1321 1725
sm11<-sample(m11, 50, replace = FALSE);length(sm11);head(sm11)
## [1] 50
## [1] 1903 1862 1420 1821 1451 1789
sm12<-sample(m12, 50, replace = FALSE);length(sm12);head(sm12)
## [1] 50
## [1]   52   67 1337   90  130 1660
utm<-c(sm1,sm2,sm3,sm4,sm5,sm6,sm7,m8,m9,sm10,sm11,sm12);length(utm)
## [1] 602
data1<-data2[utm,];dim(data1)
## [1] 602  30
summary(data1)
##     srain2mp          bio21           bio16           slope         
##  Min.   :0.1338   Min.   :29.63   Min.   :195.6   Min.   : 0.08862  
##  1st Qu.:0.1427   1st Qu.:29.74   1st Qu.:222.0   1st Qu.: 1.26266  
##  Median :0.1471   Median :29.76   Median :224.9   Median : 2.49078  
##  Mean   :0.1477   Mean   :29.75   Mean   :226.3   Mean   : 3.28377  
##  3rd Qu.:0.1513   3rd Qu.:29.78   3rd Qu.:230.1   3rd Qu.: 4.34199  
##  Max.   :0.1678   Max.   :29.84   Max.   :245.4   Max.   :16.02798  
##                                                                     
##     rugg500s         rugg500cv         pil_twicv         pil_topos     
##  Min.   :0.00849   Min.   :0.00854   Min.   :0.03348   Min.   : 0.000  
##  1st Qu.:0.58854   1st Qu.:0.06136   1st Qu.:0.08392   1st Qu.: 0.000  
##  Median :1.26653   Median :0.09135   Median :0.12248   Median : 0.000  
##  Mean   :1.81579   Mean   :0.11782   Mean   :0.14265   Mean   : 1.512  
##  3rd Qu.:2.52863   3rd Qu.:0.15941   3rd Qu.:0.19717   3rd Qu.: 2.647  
##  Max.   :9.29160   Max.   :0.59943   Max.   :0.45404   Max.   :10.806  
##                                                                        
##     pil_slps          pil_slpcv        nutrientsn        mrvbf       
##  Min.   : 0.04903   Min.   :0.1885   Min.   :1.000   Min.   :0.0000  
##  1st Qu.: 1.15162   1st Qu.:0.3505   1st Qu.:1.300   1st Qu.:0.0000  
##  Median : 3.14211   Median :0.4586   Median :1.300   Median :0.0000  
##  Mean   : 4.05948   Mean   :0.4698   Mean   :1.297   Mean   :0.3827  
##  3rd Qu.: 6.15380   3rd Qu.:0.5545   3rd Qu.:1.300   3rd Qu.:0.0000  
##  Max.   :17.00972   Max.   :1.3978   Max.   :1.600   Max.   :4.6129  
##                                                                      
##      mrrtf           minfertf        lf7rup         hstructn      
##  Min.   :0.0000   Min.   :1.00   Min.   :1.000   Min.   : 0.0000  
##  1st Qu.:0.0000   1st Qu.:2.00   1st Qu.:2.000   1st Qu.: 0.4286  
##  Median :0.0000   Median :2.00   Median :3.000   Median : 1.0429  
##  Mean   :0.3018   Mean   :2.04   Mean   :3.027   Mean   : 0.9961  
##  3rd Qu.:0.0000   3rd Qu.:2.00   3rd Qu.:4.000   3rd Qu.: 1.0429  
##  Max.   :3.5547   Max.   :3.00   Max.   :7.000   Max.   :10.0000  
##                                                                   
##   geolrngaggn         geolmnaggn         bdensity50n       slopern        
##  Min.   :  0.0115   Min.   :   0.0058   Min.   :1.247   Min.   : 0.00132  
##  1st Qu.:  2.5880   1st Qu.:   1.2940   1st Qu.:1.341   1st Qu.: 0.01670  
##  Median :300.0000   Median :2050.0000   Median :1.367   Median : 0.04383  
##  Mean   :385.4664   Mean   :1411.4505   Mean   :1.349   Mean   : 0.31208  
##  3rd Qu.:900.0000   3rd Qu.:2650.0000   3rd Qu.:1.367   3rd Qu.: 0.16815  
##  Max.   :900.0000   Max.   :2650.0000   Max.   :1.367   Max.   :21.14644  
##                                                                           
##     HubDist          MIN_AGE_MA    month_collection
##  Min.   : 0.8949   Min.   :2453   August   : 51    
##  1st Qu.: 6.4556   1st Qu.:2454   September: 51    
##  Median :21.3475   Median :2454   January  : 50    
##  Mean   :30.9694   Mean   :2508   February : 50    
##  3rd Qu.:57.8053   3rd Qu.:2597   March    : 50    
##  Max.   :81.1207   Max.   :2597   April    : 50    
##                                   (Other)  :300    
##                                      HubName   
##  anticline, exposed                      : 70  
##  concealed                               :  1  
##  exposed                                 : 73  
##  normal, exposed, tick on downthrown side: 78  
##  overturned syncline, exposed            : 59  
##  syncline, exposed                       :321  
##                                                
##                                          UNITNAME  
##  Brockman Iron Formation                     :307  
##  Jeerinah Formation                          :  0  
##  Marra Mamba Iron Formation                  :205  
##  Mount McRae Shale and Mount Sylvia Formation:  9  
##  Weeli Wolli Formation                       : 26  
##  Wittenoom Formation                         : 55  
##  Woongarra Rhyolite                          :  0  
##                                      ROCKTYPE1  
##  igneous felsic volcanic                  :  0  
##  sedimentary carbonate                    : 55  
##  sedimentary other chemical or biochemical:538  
##  sedimentary siliciclastic                :  9  
##                                                 
##                                                 
##                                                 
##                                         FORMATION   true_troglofauna
##  Brockman Iron Formation                     :307   0:190           
##  Jeerinah Formation                          :  0   1:412           
##  Marra Mamba Iron Formation                  :205                   
##  Mount McRae Shale and Mount Sylvia Formation:  9                   
##  Weeli Wolli Formation                       : 26                   
##  Wittenoom Formation                         : 55                   
##  Woongarra Rhyolite                          :  0                   
##     LATITUDE        LONGITUDE    
##  Min.   :-23.33   Min.   :118.6  
##  1st Qu.:-22.96   1st Qu.:118.9  
##  Median :-22.92   Median :119.0  
##  Mean   :-22.87   Mean   :119.0  
##  3rd Qu.:-22.76   3rd Qu.:119.0  
##  Max.   :-22.51   Max.   :119.4  
## 
data2<-data1


summary(data2$month_collection)
##   January  February     March     April       May      June      July    August 
##        50        50        50        50        50        50        50        51 
## September   October  November  December 
##        51        50        50        50
plot(data2$month_collection,main="Amostras Mensais",ylim=c(0,200),ylab="Num. de Amostras")

levels(data2$true_troglofauna);levels(data2$HubName);levels(data2$FORMATION);levels(data2$ROCKTYPE1);levels(data2$UNITNAME);levels(data2$month_collection)
## [1] "0" "1"
## [1] "anticline, exposed"                      
## [2] "concealed"                               
## [3] "exposed"                                 
## [4] "normal, exposed, tick on downthrown side"
## [5] "overturned syncline, exposed"            
## [6] "syncline, exposed"
## [1] "Brockman Iron Formation"                     
## [2] "Jeerinah Formation"                          
## [3] "Marra Mamba Iron Formation"                  
## [4] "Mount McRae Shale and Mount Sylvia Formation"
## [5] "Weeli Wolli Formation"                       
## [6] "Wittenoom Formation"                         
## [7] "Woongarra Rhyolite"
## [1] "igneous felsic volcanic"                  
## [2] "sedimentary carbonate"                    
## [3] "sedimentary other chemical or biochemical"
## [4] "sedimentary siliciclastic"
## [1] "Brockman Iron Formation"                     
## [2] "Jeerinah Formation"                          
## [3] "Marra Mamba Iron Formation"                  
## [4] "Mount McRae Shale and Mount Sylvia Formation"
## [5] "Weeli Wolli Formation"                       
## [6] "Wittenoom Formation"                         
## [7] "Woongarra Rhyolite"
##  [1] "January"   "February"  "March"     "April"     "May"       "June"     
##  [7] "July"      "August"    "September" "October"   "November"  "December"
summary(data2)
##     srain2mp          bio21           bio16           slope         
##  Min.   :0.1338   Min.   :29.63   Min.   :195.6   Min.   : 0.08862  
##  1st Qu.:0.1427   1st Qu.:29.74   1st Qu.:222.0   1st Qu.: 1.26266  
##  Median :0.1471   Median :29.76   Median :224.9   Median : 2.49078  
##  Mean   :0.1477   Mean   :29.75   Mean   :226.3   Mean   : 3.28377  
##  3rd Qu.:0.1513   3rd Qu.:29.78   3rd Qu.:230.1   3rd Qu.: 4.34199  
##  Max.   :0.1678   Max.   :29.84   Max.   :245.4   Max.   :16.02798  
##                                                                     
##     rugg500s         rugg500cv         pil_twicv         pil_topos     
##  Min.   :0.00849   Min.   :0.00854   Min.   :0.03348   Min.   : 0.000  
##  1st Qu.:0.58854   1st Qu.:0.06136   1st Qu.:0.08392   1st Qu.: 0.000  
##  Median :1.26653   Median :0.09135   Median :0.12248   Median : 0.000  
##  Mean   :1.81579   Mean   :0.11782   Mean   :0.14265   Mean   : 1.512  
##  3rd Qu.:2.52863   3rd Qu.:0.15941   3rd Qu.:0.19717   3rd Qu.: 2.647  
##  Max.   :9.29160   Max.   :0.59943   Max.   :0.45404   Max.   :10.806  
##                                                                        
##     pil_slps          pil_slpcv        nutrientsn        mrvbf       
##  Min.   : 0.04903   Min.   :0.1885   Min.   :1.000   Min.   :0.0000  
##  1st Qu.: 1.15162   1st Qu.:0.3505   1st Qu.:1.300   1st Qu.:0.0000  
##  Median : 3.14211   Median :0.4586   Median :1.300   Median :0.0000  
##  Mean   : 4.05948   Mean   :0.4698   Mean   :1.297   Mean   :0.3827  
##  3rd Qu.: 6.15380   3rd Qu.:0.5545   3rd Qu.:1.300   3rd Qu.:0.0000  
##  Max.   :17.00972   Max.   :1.3978   Max.   :1.600   Max.   :4.6129  
##                                                                      
##      mrrtf           minfertf        lf7rup         hstructn      
##  Min.   :0.0000   Min.   :1.00   Min.   :1.000   Min.   : 0.0000  
##  1st Qu.:0.0000   1st Qu.:2.00   1st Qu.:2.000   1st Qu.: 0.4286  
##  Median :0.0000   Median :2.00   Median :3.000   Median : 1.0429  
##  Mean   :0.3018   Mean   :2.04   Mean   :3.027   Mean   : 0.9961  
##  3rd Qu.:0.0000   3rd Qu.:2.00   3rd Qu.:4.000   3rd Qu.: 1.0429  
##  Max.   :3.5547   Max.   :3.00   Max.   :7.000   Max.   :10.0000  
##                                                                   
##   geolrngaggn         geolmnaggn         bdensity50n       slopern        
##  Min.   :  0.0115   Min.   :   0.0058   Min.   :1.247   Min.   : 0.00132  
##  1st Qu.:  2.5880   1st Qu.:   1.2940   1st Qu.:1.341   1st Qu.: 0.01670  
##  Median :300.0000   Median :2050.0000   Median :1.367   Median : 0.04383  
##  Mean   :385.4664   Mean   :1411.4505   Mean   :1.349   Mean   : 0.31208  
##  3rd Qu.:900.0000   3rd Qu.:2650.0000   3rd Qu.:1.367   3rd Qu.: 0.16815  
##  Max.   :900.0000   Max.   :2650.0000   Max.   :1.367   Max.   :21.14644  
##                                                                           
##     HubDist          MIN_AGE_MA    month_collection
##  Min.   : 0.8949   Min.   :2453   August   : 51    
##  1st Qu.: 6.4556   1st Qu.:2454   September: 51    
##  Median :21.3475   Median :2454   January  : 50    
##  Mean   :30.9694   Mean   :2508   February : 50    
##  3rd Qu.:57.8053   3rd Qu.:2597   March    : 50    
##  Max.   :81.1207   Max.   :2597   April    : 50    
##                                   (Other)  :300    
##                                      HubName   
##  anticline, exposed                      : 70  
##  concealed                               :  1  
##  exposed                                 : 73  
##  normal, exposed, tick on downthrown side: 78  
##  overturned syncline, exposed            : 59  
##  syncline, exposed                       :321  
##                                                
##                                          UNITNAME  
##  Brockman Iron Formation                     :307  
##  Jeerinah Formation                          :  0  
##  Marra Mamba Iron Formation                  :205  
##  Mount McRae Shale and Mount Sylvia Formation:  9  
##  Weeli Wolli Formation                       : 26  
##  Wittenoom Formation                         : 55  
##  Woongarra Rhyolite                          :  0  
##                                      ROCKTYPE1  
##  igneous felsic volcanic                  :  0  
##  sedimentary carbonate                    : 55  
##  sedimentary other chemical or biochemical:538  
##  sedimentary siliciclastic                :  9  
##                                                 
##                                                 
##                                                 
##                                         FORMATION   true_troglofauna
##  Brockman Iron Formation                     :307   0:190           
##  Jeerinah Formation                          :  0   1:412           
##  Marra Mamba Iron Formation                  :205                   
##  Mount McRae Shale and Mount Sylvia Formation:  9                   
##  Weeli Wolli Formation                       : 26                   
##  Wittenoom Formation                         : 55                   
##  Woongarra Rhyolite                          :  0                   
##     LATITUDE        LONGITUDE    
##  Min.   :-23.33   Min.   :118.6  
##  1st Qu.:-22.96   1st Qu.:118.9  
##  Median :-22.92   Median :119.0  
##  Mean   :-22.87   Mean   :119.0  
##  3rd Qu.:-22.76   3rd Qu.:119.0  
##  Max.   :-22.51   Max.   :119.4  
## 
dim(data2)
## [1] 602  30
instanceconvert <- colnames(data2[, -c((ncol(data2) - 8):ncol(data2))])
instanceconvert
##  [1] "srain2mp"    "bio21"       "bio16"       "slope"       "rugg500s"   
##  [6] "rugg500cv"   "pil_twicv"   "pil_topos"   "pil_slps"    "pil_slpcv"  
## [11] "nutrientsn"  "mrvbf"       "mrrtf"       "minfertf"    "lf7rup"     
## [16] "hstructn"    "geolrngaggn" "geolmnaggn"  "bdensity50n" "slopern"    
## [21] "HubDist"
for (i in instanceconvert)
{
  data2[[i]] <- as.numeric(data2[[i]])
}

sapply(data2,class)
##         srain2mp            bio21            bio16            slope 
##        "numeric"        "numeric"        "numeric"        "numeric" 
##         rugg500s        rugg500cv        pil_twicv        pil_topos 
##        "numeric"        "numeric"        "numeric"        "numeric" 
##         pil_slps        pil_slpcv       nutrientsn            mrvbf 
##        "numeric"        "numeric"        "numeric"        "numeric" 
##            mrrtf         minfertf           lf7rup         hstructn 
##        "numeric"        "numeric"        "numeric"        "numeric" 
##      geolrngaggn       geolmnaggn      bdensity50n          slopern 
##        "numeric"        "numeric"        "numeric"        "numeric" 
##          HubDist       MIN_AGE_MA month_collection          HubName 
##        "numeric"        "integer"         "factor"         "factor" 
##         UNITNAME        ROCKTYPE1        FORMATION true_troglofauna 
##         "factor"         "factor"         "factor"         "factor" 
##         LATITUDE        LONGITUDE 
##        "numeric"        "numeric"
names(data2)
##  [1] "srain2mp"         "bio21"            "bio16"            "slope"           
##  [5] "rugg500s"         "rugg500cv"        "pil_twicv"        "pil_topos"       
##  [9] "pil_slps"         "pil_slpcv"        "nutrientsn"       "mrvbf"           
## [13] "mrrtf"            "minfertf"         "lf7rup"           "hstructn"        
## [17] "geolrngaggn"      "geolmnaggn"       "bdensity50n"      "slopern"         
## [21] "HubDist"          "MIN_AGE_MA"       "month_collection" "HubName"         
## [25] "UNITNAME"         "ROCKTYPE1"        "FORMATION"        "true_troglofauna"
## [29] "LATITUDE"         "LONGITUDE"
data2$true_troglofauna <- factor(data2$true_troglofauna)

data2$HubName <- factor(data2$HubName)
data2$FORMATION <- factor(data2$FORMATION)
data2$ROCKTYPE1 <- factor(data2$ROCKTYPE1)
data2$UNITNAME <- factor(data2$UNITNAME)
data2$month_collection <- factor(data2$month_collection)


sapply(data2,class)
##         srain2mp            bio21            bio16            slope 
##        "numeric"        "numeric"        "numeric"        "numeric" 
##         rugg500s        rugg500cv        pil_twicv        pil_topos 
##        "numeric"        "numeric"        "numeric"        "numeric" 
##         pil_slps        pil_slpcv       nutrientsn            mrvbf 
##        "numeric"        "numeric"        "numeric"        "numeric" 
##            mrrtf         minfertf           lf7rup         hstructn 
##        "numeric"        "numeric"        "numeric"        "numeric" 
##      geolrngaggn       geolmnaggn      bdensity50n          slopern 
##        "numeric"        "numeric"        "numeric"        "numeric" 
##          HubDist       MIN_AGE_MA month_collection          HubName 
##        "numeric"        "integer"         "factor"         "factor" 
##         UNITNAME        ROCKTYPE1        FORMATION true_troglofauna 
##         "factor"         "factor"         "factor"         "factor" 
##         LATITUDE        LONGITUDE 
##        "numeric"        "numeric"
levels(data2$true_troglofauna);levels(data2$HubName);levels(data2$FORMATION);levels(data2$ROCKTYPE1);levels(data2$UNITNAME);levels(data2$month_collection)
## [1] "0" "1"
## [1] "anticline, exposed"                      
## [2] "concealed"                               
## [3] "exposed"                                 
## [4] "normal, exposed, tick on downthrown side"
## [5] "overturned syncline, exposed"            
## [6] "syncline, exposed"
## [1] "Brockman Iron Formation"                     
## [2] "Marra Mamba Iron Formation"                  
## [3] "Mount McRae Shale and Mount Sylvia Formation"
## [4] "Weeli Wolli Formation"                       
## [5] "Wittenoom Formation"
## [1] "sedimentary carbonate"                    
## [2] "sedimentary other chemical or biochemical"
## [3] "sedimentary siliciclastic"
## [1] "Brockman Iron Formation"                     
## [2] "Marra Mamba Iron Formation"                  
## [3] "Mount McRae Shale and Mount Sylvia Formation"
## [4] "Weeli Wolli Formation"                       
## [5] "Wittenoom Formation"
##  [1] "January"   "February"  "March"     "April"     "May"       "June"     
##  [7] "July"      "August"    "September" "October"   "November"  "December"
# Get column names with their indexes
column_indexes <- seq_along(names(data2))

# Display the column names and their corresponding indexes
column_indexes_named <- setNames(column_indexes, names(data2))
print(column_indexes_named)
##         srain2mp            bio21            bio16            slope 
##                1                2                3                4 
##         rugg500s        rugg500cv        pil_twicv        pil_topos 
##                5                6                7                8 
##         pil_slps        pil_slpcv       nutrientsn            mrvbf 
##                9               10               11               12 
##            mrrtf         minfertf           lf7rup         hstructn 
##               13               14               15               16 
##      geolrngaggn       geolmnaggn      bdensity50n          slopern 
##               17               18               19               20 
##          HubDist       MIN_AGE_MA month_collection          HubName 
##               21               22               23               24 
##         UNITNAME        ROCKTYPE1        FORMATION true_troglofauna 
##               25               26               27               28 
##         LATITUDE        LONGITUDE 
##               29               30
sapply(data2,class)
##         srain2mp            bio21            bio16            slope 
##        "numeric"        "numeric"        "numeric"        "numeric" 
##         rugg500s        rugg500cv        pil_twicv        pil_topos 
##        "numeric"        "numeric"        "numeric"        "numeric" 
##         pil_slps        pil_slpcv       nutrientsn            mrvbf 
##        "numeric"        "numeric"        "numeric"        "numeric" 
##            mrrtf         minfertf           lf7rup         hstructn 
##        "numeric"        "numeric"        "numeric"        "numeric" 
##      geolrngaggn       geolmnaggn      bdensity50n          slopern 
##        "numeric"        "numeric"        "numeric"        "numeric" 
##          HubDist       MIN_AGE_MA month_collection          HubName 
##        "numeric"        "integer"         "factor"         "factor" 
##         UNITNAME        ROCKTYPE1        FORMATION true_troglofauna 
##         "factor"         "factor"         "factor"         "factor" 
##         LATITUDE        LONGITUDE 
##        "numeric"        "numeric"
instanceconvert <- colnames(data2[, -c((ncol(data2) - 7):ncol(data2))])

for (i in instanceconvert)
{
  data2[[i]] <- as.numeric(data2[[i]])
}

sapply(data2,class)
##         srain2mp            bio21            bio16            slope 
##        "numeric"        "numeric"        "numeric"        "numeric" 
##         rugg500s        rugg500cv        pil_twicv        pil_topos 
##        "numeric"        "numeric"        "numeric"        "numeric" 
##         pil_slps        pil_slpcv       nutrientsn            mrvbf 
##        "numeric"        "numeric"        "numeric"        "numeric" 
##            mrrtf         minfertf           lf7rup         hstructn 
##        "numeric"        "numeric"        "numeric"        "numeric" 
##      geolrngaggn       geolmnaggn      bdensity50n          slopern 
##        "numeric"        "numeric"        "numeric"        "numeric" 
##          HubDist       MIN_AGE_MA month_collection          HubName 
##        "numeric"        "numeric"         "factor"         "factor" 
##         UNITNAME        ROCKTYPE1        FORMATION true_troglofauna 
##         "factor"         "factor"         "factor"         "factor" 
##         LATITUDE        LONGITUDE 
##        "numeric"        "numeric"
str(data2)
## 'data.frame':    602 obs. of  30 variables:
##  $ srain2mp        : num  0.138 0.147 0.148 0.146 0.137 ...
##  $ bio21           : num  29.7 29.8 29.8 29.8 29.7 ...
##  $ bio16           : num  236 225 225 224 234 ...
##  $ slope           : num  1.51 2.26 1.78 1.39 5.37 ...
##  $ rugg500s        : num  2.59 1.697 1.032 1.074 0.686 ...
##  $ rugg500cv       : num  0.1091 0.0935 0.0423 0.0471 0.0527 ...
##  $ pil_twicv       : num  0.1524 0.1803 0.0521 0.2545 0.2685 ...
##  $ pil_topos       : num  4.94 9.34 1.55 0 0 ...
##  $ pil_slps        : num  12.83 6.84 9.14 6.63 4.02 ...
##  $ pil_slpcv       : num  0.646 0.865 0.45 0.455 0.415 ...
##  $ nutrientsn      : num  1.1 1.3 1.3 1.3 1.1 1.3 1.3 1.3 1.3 1.3 ...
##  $ mrvbf           : num  0 0 0 0.661 0 ...
##  $ mrrtf           : num  0.745 0 0 0 0 ...
##  $ minfertf        : num  2 2 2 2 2 2 2 2 2 2 ...
##  $ lf7rup          : num  3 3 3 7 1 3 6 3 7 7 ...
##  $ hstructn        : num  0.429 1.043 1.043 1.043 0.429 ...
##  $ geolrngaggn     : num  65.5 2.59 2.59 300 900 ...
##  $ geolmnaggn      : num  32.75 1.29 1.29 2650 2050 ...
##  $ bdensity50n     : num  1.34 1.37 1.37 1.37 1.34 ...
##  $ slopern         : num  0.0186 0.0898 0.028 0.0107 0.0184 ...
##  $ HubDist         : num  57.23 13.63 9.26 2.71 61.12 ...
##  $ MIN_AGE_MA      : num  2454 2597 2597 2597 2454 ...
##  $ month_collection: Factor w/ 12 levels "January","February",..: 1 1 1 1 1 1 1 1 1 1 ...
##  $ HubName         : Factor w/ 6 levels "anticline, exposed",..: 6 5 5 4 6 5 4 5 4 4 ...
##  $ UNITNAME        : Factor w/ 5 levels "Brockman Iron Formation",..: 1 2 2 2 1 2 2 2 2 2 ...
##  $ ROCKTYPE1       : Factor w/ 3 levels "sedimentary carbonate",..: 2 2 2 2 2 2 2 2 2 2 ...
##  $ FORMATION       : Factor w/ 5 levels "Brockman Iron Formation",..: 1 2 2 2 1 2 2 2 2 2 ...
##  $ true_troglofauna: Factor w/ 2 levels "0","1": 2 1 1 1 1 1 2 1 2 2 ...
##  $ LATITUDE        : num  -22.6 -22.9 -22.9 -22.9 -22.6 ...
##  $ LONGITUDE       : num  119 119 119 119 119 ...
dim(data2)
## [1] 602  30
dim(data2)
## [1] 602  30
num_rows <- 409

# Randomly select row indices
selected_rows <- sample(nrow(data2), num_rows)

# Create a new dataframe with randomly selected rows
data2 <- data2[selected_rows, ]
dim(data2)
## [1] 409  30
tg <- which(data2$true_troglofauna == 1)
tgg <- data2[tg, "true_troglofauna"]
tgg <- length(tgg)

fg <- which(data2$true_troglofauna == 0)
fgg <- data2[fg, "true_troglofauna"]
fgg <- length(fgg)

# Data for pie chart
plot_num_ident <- data.frame(Absence = 100 * fgg / (tgg + fgg), 
                             Presence = 100 * tgg / (tgg + fgg))

# Convert data to a numeric vector
pie_data <- as.numeric(plot_num_ident)

# Labels for the pie chart (rounded to 1 decimal place)
labels <- c(paste0("Absent\n", round(pie_data[1], 1), "%"), 
            paste0("Present\n", round(pie_data[2], 1), "%"))

# Create pie chart
pie(pie_data, 
    labels = NA, # Remove default labels
    main = "AREa 2Percentage of Presence and Absence of Troglobites", 
    col = c("orange", "lightblue"))

# Calculate the midpoints of each pie slice for label positioning
pie_slices <- cumsum(pie_data) - pie_data / 2

# Add the labels inside the pie chart
text(x = 0.5 * cos(2 * pi * pie_slices / sum(pie_data)), 
     y = 0.5 * sin(2 * pi * pie_slices / sum(pie_data)), 
     labels = labels, cex = 1.5)

selectcol_Data2 <- data2[, -c((ncol(data2) - 7):ncol(data2))]
dim(selectcol_Data2)
## [1] 409  22
names(selectcol_Data2)
##  [1] "srain2mp"    "bio21"       "bio16"       "slope"       "rugg500s"   
##  [6] "rugg500cv"   "pil_twicv"   "pil_topos"   "pil_slps"    "pil_slpcv"  
## [11] "nutrientsn"  "mrvbf"       "mrrtf"       "minfertf"    "lf7rup"     
## [16] "hstructn"    "geolrngaggn" "geolmnaggn"  "bdensity50n" "slopern"    
## [21] "HubDist"     "MIN_AGE_MA"
correlations<-cor(selectcol_Data2)
correlations
##                srain2mp        bio21       bio16       slope    rugg500s
## srain2mp     1.00000000 -0.036188741 -0.11293552  0.54725461  0.43096627
## bio21       -0.03618874  1.000000000 -0.09471667  0.09086516  0.14903786
## bio16       -0.11293552 -0.094716666  1.00000000  0.08512271  0.01039644
## slope        0.54725461  0.090865157  0.08512271  1.00000000  0.50274397
## rugg500s     0.43096627  0.149037862  0.01039644  0.50274397  1.00000000
## rugg500cv   -0.05521694  0.142342905 -0.19481648 -0.05134236  0.53816608
## pil_twicv   -0.10610188 -0.088439495 -0.13223566 -0.06336495 -0.15957223
## pil_topos   -0.02813466  0.003698451 -0.14715317 -0.05420059  0.22185299
## pil_slps     0.40727177  0.080909640  0.13681881  0.59729931  0.44226310
## pil_slpcv   -0.03049940 -0.049269756  0.05470746 -0.06979420  0.11825844
## nutrientsn   0.07581793  0.637431868 -0.01803084 -0.09426969 -0.05300757
## mrvbf       -0.25628774  0.089807085  0.18820731 -0.39123049 -0.29679702
## mrrtf       -0.21645723 -0.187961770  0.16257424 -0.38825647 -0.34618304
## minfertf    -0.04919400 -0.008360016 -0.08319762 -0.05801783 -0.03667740
## lf7rup      -0.36930933 -0.054452098  0.00722027 -0.47994892 -0.31732814
## hstructn     0.05863203  0.131762943  0.16061571 -0.02986026 -0.03847043
## geolrngaggn  0.56971300  0.141930684  0.05306587  0.58810194  0.43125958
## geolmnaggn   0.45787624  0.296349683 -0.28657359  0.32073719  0.22879221
## bdensity50n  0.43749936 -0.081614778 -0.30861322  0.35403693  0.29659908
## slopern     -0.12242657 -0.019422607  0.16086261 -0.17243734 -0.15379213
## HubDist      0.10505023 -0.311245192  0.51459314  0.29078095  0.20611131
## MIN_AGE_MA  -0.01033253  0.262242485 -0.53715483 -0.20484326 -0.13715325
##                rugg500cv    pil_twicv    pil_topos    pil_slps    pil_slpcv
## srain2mp    -0.055216936 -0.106101880 -0.028134656  0.40727177 -0.030499399
## bio21        0.142342905 -0.088439495  0.003698451  0.08090964 -0.049269756
## bio16       -0.194816476 -0.132235658 -0.147153167  0.13681881  0.054707458
## slope       -0.051342363 -0.063364947 -0.054200587  0.59729931 -0.069794199
## rugg500s     0.538166078 -0.159572228  0.221852987  0.44226310  0.118258440
## rugg500cv    1.000000000 -0.023195908  0.209185139 -0.22647460  0.070197071
## pil_twicv   -0.023195908  1.000000000  0.016965288  0.03319852  0.170672618
## pil_topos    0.209185139  0.016965288  1.000000000  0.05093714  0.308686706
## pil_slps    -0.226474600  0.033198518  0.050937142  1.00000000  0.264584461
## pil_slpcv    0.070197071  0.170672618  0.308686706  0.26458446  1.000000000
## nutrientsn   0.063179128 -0.030021677 -0.011206861 -0.14076750 -0.035279010
## mrvbf       -0.056713064  0.100888841 -0.029218016 -0.29035192  0.124925800
## mrrtf       -0.186484301 -0.217884507 -0.133607959 -0.32691431  0.004603624
## minfertf    -0.032750908 -0.020742167  0.043266417 -0.09522095  0.039238286
## lf7rup      -0.002415671  0.211608640  0.033705334 -0.33387843  0.065881139
## hstructn     0.019610056 -0.044422361 -0.047133726 -0.03142007  0.050759651
## geolrngaggn -0.102544186 -0.041566029 -0.075558714  0.58248262  0.006146993
## geolmnaggn  -0.066000777  0.019194504 -0.070780400  0.32312221 -0.118829493
## bdensity50n -0.039331028  0.055331652  0.107651689  0.35952526 -0.024261598
## slopern     -0.026151987 -0.008662173 -0.086047821 -0.17548406  0.118882346
## HubDist     -0.056583116  0.077693347 -0.135862654  0.29408277  0.057877154
## MIN_AGE_MA   0.028028857  0.101908570  0.129697849 -0.19812276 -0.145880579
##              nutrientsn        mrvbf        mrrtf     minfertf       lf7rup
## srain2mp     0.07581793 -0.256287743 -0.216457234 -0.049194002 -0.369309333
## bio21        0.63743187  0.089807085 -0.187961770 -0.008360016 -0.054452098
## bio16       -0.01803084  0.188207308  0.162574238 -0.083197622  0.007220270
## slope       -0.09426969 -0.391230489 -0.388256472 -0.058017828 -0.479948921
## rugg500s    -0.05300757 -0.296797021 -0.346183035 -0.036677401 -0.317328137
## rugg500cv    0.06317913 -0.056713064 -0.186484301 -0.032750908 -0.002415671
## pil_twicv   -0.03002168  0.100888841 -0.217884507 -0.020742167  0.211608640
## pil_topos   -0.01120686 -0.029218016 -0.133607959  0.043266417  0.033705334
## pil_slps    -0.14076750 -0.290351919 -0.326914312 -0.095220949 -0.333878429
## pil_slpcv   -0.03527901  0.124925800  0.004603624  0.039238286  0.065881139
## nutrientsn   1.00000000  0.302761679  0.062028636 -0.039203433  0.160408509
## mrvbf        0.30276168  1.000000000  0.070381347 -0.003802492  0.538132530
## mrrtf        0.06202864  0.070381347  1.000000000  0.157183925  0.122849752
## minfertf    -0.03920343 -0.003802492  0.157183925  1.000000000 -0.023185959
## lf7rup       0.16040851  0.538132530  0.122849752 -0.023185959  1.000000000
## hstructn     0.04157830  0.405557629 -0.045078431 -0.006064680  0.212538506
## geolrngaggn -0.03277473 -0.282697514 -0.319438714 -0.069717342 -0.383998647
## geolmnaggn   0.07558009 -0.315430328 -0.319160720 -0.095200819 -0.339086028
## bdensity50n -0.54424628 -0.424117111 -0.403816175  0.052577887 -0.304565962
## slopern      0.04300266  0.457099194  0.077395936  0.012417176  0.269053842
## HubDist     -0.24388350  0.002259312 -0.073614373 -0.049744858 -0.039387080
## MIN_AGE_MA   0.17758264 -0.108641720 -0.084136452 -0.057131199  0.083285368
##                 hstructn  geolrngaggn   geolmnaggn  bdensity50n      slopern
## srain2mp     0.058632026  0.569713002  0.457876241  0.437499365 -0.122426567
## bio21        0.131762943  0.141930684  0.296349683 -0.081614778 -0.019422607
## bio16        0.160615706  0.053065872 -0.286573593 -0.308613216  0.160862614
## slope       -0.029860259  0.588101944  0.320737193  0.354036931 -0.172437342
## rugg500s    -0.038470425  0.431259576  0.228792212  0.296599077 -0.153792133
## rugg500cv    0.019610056 -0.102544186 -0.066000777 -0.039331028 -0.026151987
## pil_twicv   -0.044422361 -0.041566029  0.019194504  0.055331652 -0.008662173
## pil_topos   -0.047133726 -0.075558714 -0.070780400  0.107651689 -0.086047821
## pil_slps    -0.031420066  0.582482621  0.323122208  0.359525258 -0.175484056
## pil_slpcv    0.050759651  0.006146993 -0.118829493 -0.024261598  0.118882346
## nutrientsn   0.041578302 -0.032774730  0.075580087 -0.544246284  0.043002661
## mrvbf        0.405557629 -0.282697514 -0.315430328 -0.424117111  0.457099194
## mrrtf       -0.045078431 -0.319438714 -0.319160720 -0.403816175  0.077395936
## minfertf    -0.006064680 -0.069717342 -0.095200819  0.052577887  0.012417176
## lf7rup       0.212538506 -0.383998647 -0.339086028 -0.304565962  0.269053842
## hstructn     1.000000000  0.015917490  0.006829071 -0.001219126  0.632584576
## geolrngaggn  0.015917490  1.000000000  0.639303189  0.389034758 -0.151960908
## geolmnaggn   0.006829071  0.639303189  1.000000000  0.434440924 -0.191605703
## bdensity50n -0.001219126  0.389034758  0.434440924  1.000000000 -0.242365677
## slopern      0.632584576 -0.151960908 -0.191605703 -0.242365677  1.000000000
## HubDist     -0.025128202  0.365678291 -0.157274111  0.011530038 -0.010031043
## MIN_AGE_MA  -0.023834271 -0.354316165  0.291254377  0.161235109 -0.102443205
##                  HubDist  MIN_AGE_MA
## srain2mp     0.105050226 -0.01033253
## bio21       -0.311245192  0.26224249
## bio16        0.514593140 -0.53715483
## slope        0.290780950 -0.20484326
## rugg500s     0.206111311 -0.13715325
## rugg500cv   -0.056583116  0.02802886
## pil_twicv    0.077693347  0.10190857
## pil_topos   -0.135862654  0.12969785
## pil_slps     0.294082766 -0.19812276
## pil_slpcv    0.057877154 -0.14588058
## nutrientsn  -0.243883502  0.17758264
## mrvbf        0.002259312 -0.10864172
## mrrtf       -0.073614373 -0.08413645
## minfertf    -0.049744858 -0.05713120
## lf7rup      -0.039387080  0.08328537
## hstructn    -0.025128202 -0.02383427
## geolrngaggn  0.365678291 -0.35431617
## geolmnaggn  -0.157274111  0.29125438
## bdensity50n  0.011530038  0.16123511
## slopern     -0.010031043 -0.10244320
## HubDist      1.000000000 -0.67278580
## MIN_AGE_MA  -0.672785797  1.00000000
corrplot(correlations, method = "circle", tl.cex = 1.5, cl.cex = 1.5)

# Corrplot (upper triangle only) with larger font
corrplot(correlations, method = "circle", type = "upper", tl.cex = 1.5, cl.cex = 1.5)

 dim(data2)
## [1] 409  30
# Create a density plot for each column
par(mfrow = c(2, 3))  # Arrange plots in a 2x3 grid
for (col in seq_along(selectcol_Data2)) {
  plot(density(selectcol_Data2[, col]), main = colnames(selectcol_Data2)[col])
}

set.seed(78945)
Index1 <- createDataPartition(data2$true_troglofauna, p=0.5, list=FALSE)
data_train <- data2[ Index1,]
data_prov <- data2[-Index1,]
dim(data_train)
## [1] 205  30
summary(data_train)
##     srain2mp          bio21           bio16           slope         
##  Min.   :0.1344   Min.   :29.63   Min.   :195.6   Min.   : 0.08862  
##  1st Qu.:0.1419   1st Qu.:29.73   1st Qu.:223.0   1st Qu.: 1.04867  
##  Median :0.1470   Median :29.76   Median :225.8   Median : 2.15997  
##  Mean   :0.1475   Mean   :29.75   Mean   :227.3   Mean   : 3.13080  
##  3rd Qu.:0.1512   3rd Qu.:29.78   3rd Qu.:231.3   3rd Qu.: 4.45382  
##  Max.   :0.1678   Max.   :29.84   Max.   :244.2   Max.   :12.80595  
##                                                                     
##     rugg500s         rugg500cv         pil_twicv         pil_topos     
##  Min.   :0.00942   Min.   :0.00854   Min.   :0.03348   Min.   : 0.000  
##  1st Qu.:0.54119   1st Qu.:0.05971   1st Qu.:0.07750   1st Qu.: 0.000  
##  Median :1.32722   Median :0.08920   Median :0.11607   Median : 0.000  
##  Mean   :1.73832   Mean   :0.11282   Mean   :0.13714   Mean   : 1.366  
##  3rd Qu.:2.45758   3rd Qu.:0.15902   3rd Qu.:0.18838   3rd Qu.: 2.077  
##  Max.   :7.79036   Max.   :0.59943   Max.   :0.45404   Max.   :10.435  
##                                                                        
##     pil_slps          pil_slpcv        nutrientsn        mrvbf       
##  Min.   : 0.04903   Min.   :0.2086   Min.   :1.000   Min.   :0.0000  
##  1st Qu.: 1.15748   1st Qu.:0.3447   1st Qu.:1.300   1st Qu.:0.0000  
##  Median : 3.28653   Median :0.4470   Median :1.300   Median :0.0000  
##  Mean   : 4.16924   Mean   :0.4728   Mean   :1.282   Mean   :0.4067  
##  3rd Qu.: 6.29694   3rd Qu.:0.5672   3rd Qu.:1.300   3rd Qu.:0.5784  
##  Max.   :16.74641   Max.   :1.3978   Max.   :1.600   Max.   :4.5904  
##                                                                      
##      mrrtf           minfertf         lf7rup         hstructn      
##  Min.   :0.0000   Min.   :1.000   Min.   :1.000   Min.   : 0.0000  
##  1st Qu.:0.0000   1st Qu.:2.000   1st Qu.:2.000   1st Qu.: 0.4286  
##  Median :0.0000   Median :2.000   Median :3.000   Median : 1.0429  
##  Mean   :0.3521   Mean   :2.024   Mean   :3.044   Mean   : 0.9131  
##  3rd Qu.:0.5762   3rd Qu.:2.000   3rd Qu.:4.000   3rd Qu.: 1.0429  
##  Max.   :3.5547   Max.   :3.000   Max.   :7.000   Max.   :10.0000  
##                                                                    
##   geolrngaggn         geolmnaggn         bdensity50n       slopern        
##  Min.   :  0.0115   Min.   :   0.0058   Min.   :1.247   Min.   : 0.00250  
##  1st Qu.: 65.5000   1st Qu.:  32.7500   1st Qu.:1.341   1st Qu.: 0.01579  
##  Median :300.0000   Median :2050.0000   Median :1.367   Median : 0.04265  
##  Mean   :404.1882   Mean   :1410.5522   Mean   :1.347   Mean   : 0.33502  
##  3rd Qu.:900.0000   3rd Qu.:2650.0000   3rd Qu.:1.367   3rd Qu.: 0.17062  
##  Max.   :900.0000   Max.   :2650.0000   Max.   :1.367   Max.   :21.14644  
##                                                                           
##     HubDist         MIN_AGE_MA   month_collection
##  Min.   : 1.237   Min.   :2453   October :22     
##  1st Qu.: 8.994   1st Qu.:2454   April   :20     
##  Median :31.730   Median :2454   July    :20     
##  Mean   :34.802   Mean   :2498   March   :19     
##  3rd Qu.:60.020   3rd Qu.:2597   November:18     
##  Max.   :79.744   Max.   :2597   May     :16     
##                                  (Other) :90     
##                                      HubName   
##  anticline, exposed                      : 19  
##  concealed                               :  0  
##  exposed                                 : 18  
##  normal, exposed, tick on downthrown side: 28  
##  overturned syncline, exposed            : 21  
##  syncline, exposed                       :119  
##                                                
##                                          UNITNAME  
##  Brockman Iron Formation                     :120  
##  Marra Mamba Iron Formation                  : 56  
##  Mount McRae Shale and Mount Sylvia Formation:  2  
##  Weeli Wolli Formation                       :  7  
##  Wittenoom Formation                         : 20  
##                                                    
##                                                    
##                                      ROCKTYPE1  
##  sedimentary carbonate                    : 20  
##  sedimentary other chemical or biochemical:183  
##  sedimentary siliciclastic                :  2  
##                                                 
##                                                 
##                                                 
##                                                 
##                                         FORMATION   true_troglofauna
##  Brockman Iron Formation                     :120   0: 59           
##  Marra Mamba Iron Formation                  : 56   1:146           
##  Mount McRae Shale and Mount Sylvia Formation:  2                   
##  Weeli Wolli Formation                       :  7                   
##  Wittenoom Formation                         : 20                   
##                                                                     
##                                                                     
##     LATITUDE        LONGITUDE    
##  Min.   :-23.33   Min.   :118.6  
##  1st Qu.:-22.94   1st Qu.:118.9  
##  Median :-22.91   Median :119.0  
##  Mean   :-22.84   Mean   :119.0  
##  3rd Qu.:-22.71   3rd Qu.:119.1  
##  Max.   :-22.51   Max.   :119.4  
## 
data_train <- data_train[, -which(names(data_train) %in% c("LATITUDE", "LONGITUDE"))]
dim(data_train)
## [1] 205  28
head(data_train)
##      srain2mp    bio21    bio16   slope rugg500s rugg500cv pil_twicv pil_topos
## 1001  0.16029 29.73996 224.6990 4.82564  1.74834   0.08714   0.17703   0.00000
## 171   0.14708 29.77693 224.6783 2.26336  1.69707   0.09354   0.18033   9.34053
## 865   0.14544 29.65465 223.7451 0.92271  0.00960   0.01070   0.08952   0.00000
## 1616  0.13816 29.70033 231.0552 0.70888  0.72522   0.06054   0.09249   0.00000
## 1750  0.15230 29.72014 219.9479 4.43650  1.34984   0.06894   0.21494   5.37750
## 903   0.15055 29.78418 221.7930 4.34855  3.76135   0.20322   0.07274   2.38332
##      pil_slps pil_slpcv nutrientsn mrvbf   mrrtf minfertf lf7rup hstructn
## 1001  3.59536   0.36466        1.3     0 0.00000        2      3  1.04286
## 171   6.83865   0.86478        1.3     0 0.00000        2      3  1.04286
## 865   0.11971   0.64403        1.1     0 1.60444        2      3  0.42857
## 1616  3.82764   0.45171        1.1     0 1.82401        2      1  0.42857
## 1750  4.40210   0.42944        1.3     0 0.00000        2      1  1.04286
## 903   2.51954   0.57590        1.3     0 0.00000        2      2  1.04286
##      geolrngaggn geolmnaggn bdensity50n slopern   HubDist MIN_AGE_MA
## 1001     300.000   2650.000     1.36732 0.02515  3.954500       2597
## 171        2.588      1.294     1.36732 0.08978 13.631670       2597
## 865        2.588      1.294     1.34060 1.77740 20.744690       2454
## 1616      65.500     32.750     1.34060 0.03185 67.270712       2454
## 1750     300.000   2650.000     1.36732 0.01402  5.236846       2597
## 903      300.000   2650.000     1.36732 0.12920  3.802839       2597
##      month_collection                                  HubName
## 1001             July                                  exposed
## 171           January             overturned syncline, exposed
## 865               May normal, exposed, tick on downthrown side
## 1616          October                        syncline, exposed
## 1750              May                                  exposed
## 903              July                        syncline, exposed
##                        UNITNAME                                 ROCKTYPE1
## 1001 Marra Mamba Iron Formation sedimentary other chemical or biochemical
## 171  Marra Mamba Iron Formation sedimentary other chemical or biochemical
## 865     Brockman Iron Formation sedimentary other chemical or biochemical
## 1616    Brockman Iron Formation sedimentary other chemical or biochemical
## 1750 Marra Mamba Iron Formation sedimentary other chemical or biochemical
## 903  Marra Mamba Iron Formation sedimentary other chemical or biochemical
##                       FORMATION true_troglofauna
## 1001 Marra Mamba Iron Formation                0
## 171  Marra Mamba Iron Formation                0
## 865     Brockman Iron Formation                0
## 1616    Brockman Iron Formation                1
## 1750 Marra Mamba Iron Formation                1
## 903  Marra Mamba Iron Formation                1
summary(data_train)
##     srain2mp          bio21           bio16           slope         
##  Min.   :0.1344   Min.   :29.63   Min.   :195.6   Min.   : 0.08862  
##  1st Qu.:0.1419   1st Qu.:29.73   1st Qu.:223.0   1st Qu.: 1.04867  
##  Median :0.1470   Median :29.76   Median :225.8   Median : 2.15997  
##  Mean   :0.1475   Mean   :29.75   Mean   :227.3   Mean   : 3.13080  
##  3rd Qu.:0.1512   3rd Qu.:29.78   3rd Qu.:231.3   3rd Qu.: 4.45382  
##  Max.   :0.1678   Max.   :29.84   Max.   :244.2   Max.   :12.80595  
##                                                                     
##     rugg500s         rugg500cv         pil_twicv         pil_topos     
##  Min.   :0.00942   Min.   :0.00854   Min.   :0.03348   Min.   : 0.000  
##  1st Qu.:0.54119   1st Qu.:0.05971   1st Qu.:0.07750   1st Qu.: 0.000  
##  Median :1.32722   Median :0.08920   Median :0.11607   Median : 0.000  
##  Mean   :1.73832   Mean   :0.11282   Mean   :0.13714   Mean   : 1.366  
##  3rd Qu.:2.45758   3rd Qu.:0.15902   3rd Qu.:0.18838   3rd Qu.: 2.077  
##  Max.   :7.79036   Max.   :0.59943   Max.   :0.45404   Max.   :10.435  
##                                                                        
##     pil_slps          pil_slpcv        nutrientsn        mrvbf       
##  Min.   : 0.04903   Min.   :0.2086   Min.   :1.000   Min.   :0.0000  
##  1st Qu.: 1.15748   1st Qu.:0.3447   1st Qu.:1.300   1st Qu.:0.0000  
##  Median : 3.28653   Median :0.4470   Median :1.300   Median :0.0000  
##  Mean   : 4.16924   Mean   :0.4728   Mean   :1.282   Mean   :0.4067  
##  3rd Qu.: 6.29694   3rd Qu.:0.5672   3rd Qu.:1.300   3rd Qu.:0.5784  
##  Max.   :16.74641   Max.   :1.3978   Max.   :1.600   Max.   :4.5904  
##                                                                      
##      mrrtf           minfertf         lf7rup         hstructn      
##  Min.   :0.0000   Min.   :1.000   Min.   :1.000   Min.   : 0.0000  
##  1st Qu.:0.0000   1st Qu.:2.000   1st Qu.:2.000   1st Qu.: 0.4286  
##  Median :0.0000   Median :2.000   Median :3.000   Median : 1.0429  
##  Mean   :0.3521   Mean   :2.024   Mean   :3.044   Mean   : 0.9131  
##  3rd Qu.:0.5762   3rd Qu.:2.000   3rd Qu.:4.000   3rd Qu.: 1.0429  
##  Max.   :3.5547   Max.   :3.000   Max.   :7.000   Max.   :10.0000  
##                                                                    
##   geolrngaggn         geolmnaggn         bdensity50n       slopern        
##  Min.   :  0.0115   Min.   :   0.0058   Min.   :1.247   Min.   : 0.00250  
##  1st Qu.: 65.5000   1st Qu.:  32.7500   1st Qu.:1.341   1st Qu.: 0.01579  
##  Median :300.0000   Median :2050.0000   Median :1.367   Median : 0.04265  
##  Mean   :404.1882   Mean   :1410.5522   Mean   :1.347   Mean   : 0.33502  
##  3rd Qu.:900.0000   3rd Qu.:2650.0000   3rd Qu.:1.367   3rd Qu.: 0.17062  
##  Max.   :900.0000   Max.   :2650.0000   Max.   :1.367   Max.   :21.14644  
##                                                                           
##     HubDist         MIN_AGE_MA   month_collection
##  Min.   : 1.237   Min.   :2453   October :22     
##  1st Qu.: 8.994   1st Qu.:2454   April   :20     
##  Median :31.730   Median :2454   July    :20     
##  Mean   :34.802   Mean   :2498   March   :19     
##  3rd Qu.:60.020   3rd Qu.:2597   November:18     
##  Max.   :79.744   Max.   :2597   May     :16     
##                                  (Other) :90     
##                                      HubName   
##  anticline, exposed                      : 19  
##  concealed                               :  0  
##  exposed                                 : 18  
##  normal, exposed, tick on downthrown side: 28  
##  overturned syncline, exposed            : 21  
##  syncline, exposed                       :119  
##                                                
##                                          UNITNAME  
##  Brockman Iron Formation                     :120  
##  Marra Mamba Iron Formation                  : 56  
##  Mount McRae Shale and Mount Sylvia Formation:  2  
##  Weeli Wolli Formation                       :  7  
##  Wittenoom Formation                         : 20  
##                                                    
##                                                    
##                                      ROCKTYPE1  
##  sedimentary carbonate                    : 20  
##  sedimentary other chemical or biochemical:183  
##  sedimentary siliciclastic                :  2  
##                                                 
##                                                 
##                                                 
##                                                 
##                                         FORMATION   true_troglofauna
##  Brockman Iron Formation                     :120   0: 59           
##  Marra Mamba Iron Formation                  : 56   1:146           
##  Mount McRae Shale and Mount Sylvia Formation:  2                   
##  Weeli Wolli Formation                       :  7                   
##  Wittenoom Formation                         : 20                   
##                                                                     
## 
set.seed(78945)

trainIndex1 <- createDataPartition(data_prov$true_troglofauna, p=0.6, list=FALSE)
data_test <- data_prov[ trainIndex1,]
finalTest1 <- data_prov[-trainIndex1,]
summary(data_test)
##     srain2mp          bio21           bio16           slope         
##  Min.   :0.1338   Min.   :29.64   Min.   :196.1   Min.   : 0.08862  
##  1st Qu.:0.1432   1st Qu.:29.74   1st Qu.:221.5   1st Qu.: 1.40481  
##  Median :0.1478   Median :29.76   Median :224.6   Median : 2.53936  
##  Mean   :0.1477   Mean   :29.75   Mean   :226.3   Mean   : 3.22529  
##  3rd Qu.:0.1510   3rd Qu.:29.78   3rd Qu.:229.3   3rd Qu.: 4.02498  
##  Max.   :0.1614   Max.   :29.84   Max.   :245.2   Max.   :15.00457  
##                                                                     
##     rugg500s         rugg500cv         pil_twicv         pil_topos     
##  Min.   :0.03319   Min.   :0.01076   Min.   :0.03348   Min.   : 0.000  
##  1st Qu.:0.47334   1st Qu.:0.06180   1st Qu.:0.08586   1st Qu.: 0.000  
##  Median :1.32736   Median :0.08941   Median :0.12202   Median : 0.000  
##  Mean   :1.78291   Mean   :0.11772   Mean   :0.14844   Mean   : 1.631  
##  3rd Qu.:2.53294   3rd Qu.:0.15165   3rd Qu.:0.20438   3rd Qu.: 3.480  
##  Max.   :9.29160   Max.   :0.43959   Max.   :0.39956   Max.   :10.110  
##                                                                        
##     pil_slps         pil_slpcv        nutrientsn        mrvbf       
##  Min.   : 0.1107   Min.   :0.1951   Min.   :1.100   Min.   :0.0000  
##  1st Qu.: 1.0131   1st Qu.:0.3506   1st Qu.:1.300   1st Qu.:0.0000  
##  Median : 2.7769   Median :0.4659   Median :1.300   Median :0.0000  
##  Mean   : 4.1962   Mean   :0.4726   Mean   :1.315   Mean   :0.3674  
##  3rd Qu.: 6.6343   3rd Qu.:0.5579   3rd Qu.:1.300   3rd Qu.:0.0000  
##  Max.   :16.6383   Max.   :0.8918   Max.   :1.600   Max.   :4.5741  
##                                                                     
##      mrrtf           minfertf         lf7rup         hstructn      
##  Min.   :0.0000   Min.   :2.000   Min.   :1.000   Min.   : 0.0000  
##  1st Qu.:0.0000   1st Qu.:2.000   1st Qu.:2.000   1st Qu.: 0.4286  
##  Median :0.0000   Median :2.000   Median :3.000   Median : 1.0429  
##  Mean   :0.2309   Mean   :2.041   Mean   :3.008   Mean   : 0.9729  
##  3rd Qu.:0.0000   3rd Qu.:2.000   3rd Qu.:4.000   3rd Qu.: 1.0429  
##  Max.   :2.7292   Max.   :3.000   Max.   :7.000   Max.   :10.0000  
##                                                                    
##   geolrngaggn         geolmnaggn         bdensity50n       slopern        
##  Min.   :  0.0115   Min.   :   0.0058   Min.   :1.255   Min.   : 0.00344  
##  1st Qu.:  2.5880   1st Qu.:   1.2940   1st Qu.:1.341   1st Qu.: 0.01630  
##  Median :300.0000   Median :2050.0000   Median :1.367   Median : 0.03951  
##  Mean   :335.2414   Mean   :1308.5467   Mean   :1.348   Mean   : 0.40178  
##  3rd Qu.:900.0000   3rd Qu.:2650.0000   3rd Qu.:1.367   3rd Qu.: 0.16592  
##  Max.   :900.0000   Max.   :2650.0000   Max.   :1.367   Max.   :21.14644  
##                                                                           
##     HubDist         MIN_AGE_MA   month_collection
##  Min.   : 1.236   Min.   :2453   August  :15     
##  1st Qu.: 5.492   1st Qu.:2454   March   :14     
##  Median :21.187   Median :2454   February:13     
##  Mean   :29.232   Mean   :2511   November:13     
##  3rd Qu.:55.606   3rd Qu.:2597   May     :11     
##  Max.   :81.121   Max.   :2597   April   :10     
##                                  (Other) :47     
##                                      HubName  
##  anticline, exposed                      :18  
##  concealed                               : 1  
##  exposed                                 :22  
##  normal, exposed, tick on downthrown side:12  
##  overturned syncline, exposed            : 9  
##  syncline, exposed                       :61  
##                                               
##                                          UNITNAME 
##  Brockman Iron Formation                     :58  
##  Marra Mamba Iron Formation                  :45  
##  Mount McRae Shale and Mount Sylvia Formation: 1  
##  Weeli Wolli Formation                       : 9  
##  Wittenoom Formation                         :10  
##                                                   
##                                                   
##                                      ROCKTYPE1  
##  sedimentary carbonate                    : 10  
##  sedimentary other chemical or biochemical:112  
##  sedimentary siliciclastic                :  1  
##                                                 
##                                                 
##                                                 
##                                                 
##                                         FORMATION  true_troglofauna
##  Brockman Iron Formation                     :58   0:35            
##  Marra Mamba Iron Formation                  :45   1:88            
##  Mount McRae Shale and Mount Sylvia Formation: 1                   
##  Weeli Wolli Formation                       : 9                   
##  Wittenoom Formation                         :10                   
##                                                                    
##                                                                    
##     LATITUDE        LONGITUDE    
##  Min.   :-23.33   Min.   :118.6  
##  1st Qu.:-22.98   1st Qu.:118.9  
##  Median :-22.92   Median :119.0  
##  Mean   :-22.88   Mean   :119.0  
##  3rd Qu.:-22.82   3rd Qu.:119.1  
##  Max.   :-22.52   Max.   :119.4  
## 
dim(data_test)
## [1] 123  30
summary(finalTest1)
##     srain2mp          bio21           bio16           slope         
##  Min.   :0.1356   Min.   :29.64   Min.   :202.2   Min.   : 0.08943  
##  1st Qu.:0.1434   1st Qu.:29.74   1st Qu.:221.4   1st Qu.: 1.20730  
##  Median :0.1473   Median :29.76   Median :224.3   Median : 2.31213  
##  Mean   :0.1475   Mean   :29.75   Mean   :225.8   Mean   : 3.33565  
##  3rd Qu.:0.1517   3rd Qu.:29.77   3rd Qu.:230.1   3rd Qu.: 4.51866  
##  Max.   :0.1633   Max.   :29.83   Max.   :245.4   Max.   :13.58547  
##                                                                     
##     rugg500s         rugg500cv         pil_twicv         pil_topos      
##  Min.   :0.01647   Min.   :0.01810   Min.   :0.04523   Min.   : 0.0000  
##  1st Qu.:0.60450   1st Qu.:0.07030   1st Qu.:0.08987   1st Qu.: 0.0000  
##  Median :1.14453   Median :0.09675   Median :0.13399   Median : 0.1104  
##  Mean   :1.72459   Mean   :0.11947   Mean   :0.14705   Mean   : 1.4945  
##  3rd Qu.:2.42691   3rd Qu.:0.17444   3rd Qu.:0.20072   3rd Qu.: 2.4066  
##  Max.   :7.05156   Max.   :0.30012   Max.   :0.36527   Max.   :10.3639  
##                                                                         
##     pil_slps         pil_slpcv        nutrientsn       mrvbf       
##  Min.   : 0.1401   Min.   :0.1951   Min.   :1.10   Min.   :0.0000  
##  1st Qu.: 1.0510   1st Qu.:0.3434   1st Qu.:1.30   1st Qu.:0.0000  
##  Median : 2.4139   Median :0.4704   Median :1.30   Median :0.0000  
##  Mean   : 3.6002   Mean   :0.4613   Mean   :1.29   Mean   :0.4266  
##  3rd Qu.: 5.1142   3rd Qu.:0.5202   3rd Qu.:1.30   3rd Qu.:0.0000  
##  Max.   :17.0097   Max.   :1.1404   Max.   :1.60   Max.   :4.5610  
##                                                                    
##      mrrtf           minfertf         lf7rup         hstructn     
##  Min.   :0.0000   Min.   :1.000   Min.   :1.000   Min.   : 0.000  
##  1st Qu.:0.0000   1st Qu.:2.000   1st Qu.:2.000   1st Qu.: 1.043  
##  Median :0.0000   Median :2.000   Median :3.000   Median : 1.043  
##  Mean   :0.3551   Mean   :2.049   Mean   :3.074   Mean   : 1.081  
##  3rd Qu.:0.5385   3rd Qu.:2.000   3rd Qu.:4.000   3rd Qu.: 1.043  
##  Max.   :3.5544   Max.   :3.000   Max.   :7.000   Max.   :10.000  
##                                                                   
##   geolrngaggn         geolmnaggn         bdensity50n       slopern        
##  Min.   :  0.0115   Min.   :   0.0058   Min.   :1.255   Min.   : 0.00132  
##  1st Qu.: 65.5000   1st Qu.:  32.7500   1st Qu.:1.341   1st Qu.: 0.01814  
##  Median :300.0000   Median :2050.0000   Median :1.367   Median : 0.05924  
##  Mean   :362.0352   Mean   :1510.6472   Mean   :1.352   Mean   : 0.48230  
##  3rd Qu.:900.0000   3rd Qu.:2650.0000   3rd Qu.:1.367   3rd Qu.: 0.20859  
##  Max.   :900.0000   Max.   :2650.0000   Max.   :1.367   Max.   :21.14644  
##                                                                           
##     HubDist         MIN_AGE_MA    month_collection
##  Min.   : 1.294   Min.   :2453   September:11     
##  1st Qu.: 7.096   1st Qu.:2454   June     :10     
##  Median :17.310   Median :2506   January  : 8     
##  Mean   :27.576   Mean   :2517   July     : 8     
##  3rd Qu.:53.489   3rd Qu.:2597   March    : 7     
##  Max.   :77.437   Max.   :2597   April    : 7     
##                                  (Other)  :30     
##                                      HubName  
##  anticline, exposed                      :16  
##  concealed                               : 0  
##  exposed                                 :10  
##  normal, exposed, tick on downthrown side:10  
##  overturned syncline, exposed            : 6  
##  syncline, exposed                       :39  
##                                               
##                                          UNITNAME 
##  Brockman Iron Formation                     :34  
##  Marra Mamba Iron Formation                  :32  
##  Mount McRae Shale and Mount Sylvia Formation: 2  
##  Weeli Wolli Formation                       : 4  
##  Wittenoom Formation                         : 9  
##                                                   
##                                                   
##                                      ROCKTYPE1 
##  sedimentary carbonate                    : 9  
##  sedimentary other chemical or biochemical:70  
##  sedimentary siliciclastic                : 2  
##                                                
##                                                
##                                                
##                                                
##                                         FORMATION  true_troglofauna
##  Brockman Iron Formation                     :34   0:23            
##  Marra Mamba Iron Formation                  :32   1:58            
##  Mount McRae Shale and Mount Sylvia Formation: 2                   
##  Weeli Wolli Formation                       : 4                   
##  Wittenoom Formation                         : 9                   
##                                                                    
##                                                                    
##     LATITUDE        LONGITUDE    
##  Min.   :-23.26   Min.   :118.7  
##  1st Qu.:-22.98   1st Qu.:118.9  
##  Median :-22.93   Median :119.0  
##  Mean   :-22.88   Mean   :119.0  
##  3rd Qu.:-22.75   3rd Qu.:119.0  
##  Max.   :-22.53   Max.   :119.4  
## 
dim(finalTest1)
## [1] 81 30
data_test <- data_test[, -which(names(data2) %in% c("LATITUDE", "LONGITUDE"))]

finalTest <- finalTest1[, -which(names(data2) %in% c("LATITUDE", "LONGITUDE"))]

sapply(data_test,class);sapply(finalTest1,class)
##         srain2mp            bio21            bio16            slope 
##        "numeric"        "numeric"        "numeric"        "numeric" 
##         rugg500s        rugg500cv        pil_twicv        pil_topos 
##        "numeric"        "numeric"        "numeric"        "numeric" 
##         pil_slps        pil_slpcv       nutrientsn            mrvbf 
##        "numeric"        "numeric"        "numeric"        "numeric" 
##            mrrtf         minfertf           lf7rup         hstructn 
##        "numeric"        "numeric"        "numeric"        "numeric" 
##      geolrngaggn       geolmnaggn      bdensity50n          slopern 
##        "numeric"        "numeric"        "numeric"        "numeric" 
##          HubDist       MIN_AGE_MA month_collection          HubName 
##        "numeric"        "numeric"         "factor"         "factor" 
##         UNITNAME        ROCKTYPE1        FORMATION true_troglofauna 
##         "factor"         "factor"         "factor"         "factor"
##         srain2mp            bio21            bio16            slope 
##        "numeric"        "numeric"        "numeric"        "numeric" 
##         rugg500s        rugg500cv        pil_twicv        pil_topos 
##        "numeric"        "numeric"        "numeric"        "numeric" 
##         pil_slps        pil_slpcv       nutrientsn            mrvbf 
##        "numeric"        "numeric"        "numeric"        "numeric" 
##            mrrtf         minfertf           lf7rup         hstructn 
##        "numeric"        "numeric"        "numeric"        "numeric" 
##      geolrngaggn       geolmnaggn      bdensity50n          slopern 
##        "numeric"        "numeric"        "numeric"        "numeric" 
##          HubDist       MIN_AGE_MA month_collection          HubName 
##        "numeric"        "numeric"         "factor"         "factor" 
##         UNITNAME        ROCKTYPE1        FORMATION true_troglofauna 
##         "factor"         "factor"         "factor"         "factor" 
##         LATITUDE        LONGITUDE 
##        "numeric"        "numeric"
head(data_test)
##      srain2mp    bio21    bio16   slope rugg500s rugg500cv pil_twicv pil_topos
## 157   0.14583 29.76095 223.8901 1.38940  1.07357   0.04715   0.25449   0.00000
## 407   0.15129 29.75593 222.2424 1.62006  1.98837   0.23432   0.08385   5.42778
## 258   0.14013 29.81410 212.6796 2.18761  2.66450   0.19765   0.12132   2.31889
## 1805  0.15167 29.72817 219.5120 5.65121  4.93231   0.24911   0.12501   6.50757
## 433   0.15929 29.70598 227.4977 9.74579  4.19078   0.07515   0.06091   0.29835
## 732   0.15565 29.76270 230.0836 7.95223  1.07037   0.08113   0.08246   0.00000
##      pil_slps pil_slpcv nutrientsn   mrvbf mrrtf minfertf lf7rup hstructn
## 157   6.63430   0.45474        1.3 0.66138     0        2      7  1.04286
## 407   1.17587   0.37240        1.3 0.57837     0        2      1  1.04286
## 258   1.74186   0.63626        1.3 0.00000     0        3      3  1.04286
## 1805  3.77481   0.78681        1.3 0.00000     0        2      3  1.04286
## 433  15.56535   0.65365        1.3 0.00000     0        2      1  1.04286
## 732   2.98576   0.35082        1.3 0.00000     0        3      3  1.04286
##      geolrngaggn geolmnaggn bdensity50n slopern   HubDist MIN_AGE_MA
## 157     300.0000   2.65e+03     1.36732 0.01070  2.927153       2597
## 407     300.0000   2.65e+03     1.36732 0.09888  8.014421       2597
## 258       0.0115   5.75e-03     1.36732 0.08187  1.564773       2506
## 1805    300.0000   2.65e+03     1.36732 0.07794  3.182699       2506
## 433     900.0000   2.05e+03     1.36732 0.06761  5.842786       2454
## 732     900.0000   2.05e+03     1.36732 0.01505 58.112543       2454
##      month_collection                                  HubName
## 157           January normal, exposed, tick on downthrown side
## 407          February                                  exposed
## 258               May                        syncline, exposed
## 1805          October                                  exposed
## 433             March                                  exposed
## 732             April                        syncline, exposed
##                        UNITNAME                                 ROCKTYPE1
## 157  Marra Mamba Iron Formation sedimentary other chemical or biochemical
## 407  Marra Mamba Iron Formation sedimentary other chemical or biochemical
## 258         Wittenoom Formation                     sedimentary carbonate
## 1805        Wittenoom Formation                     sedimentary carbonate
## 433     Brockman Iron Formation sedimentary other chemical or biochemical
## 732     Brockman Iron Formation sedimentary other chemical or biochemical
##                       FORMATION true_troglofauna
## 157  Marra Mamba Iron Formation                1
## 407  Marra Mamba Iron Formation                1
## 258         Wittenoom Formation                1
## 1805        Wittenoom Formation                1
## 433     Brockman Iron Formation                1
## 732     Brockman Iron Formation                1
dim(data_test)
## [1] 123  28
summary(data_test)
##     srain2mp          bio21           bio16           slope         
##  Min.   :0.1338   Min.   :29.64   Min.   :196.1   Min.   : 0.08862  
##  1st Qu.:0.1432   1st Qu.:29.74   1st Qu.:221.5   1st Qu.: 1.40481  
##  Median :0.1478   Median :29.76   Median :224.6   Median : 2.53936  
##  Mean   :0.1477   Mean   :29.75   Mean   :226.3   Mean   : 3.22529  
##  3rd Qu.:0.1510   3rd Qu.:29.78   3rd Qu.:229.3   3rd Qu.: 4.02498  
##  Max.   :0.1614   Max.   :29.84   Max.   :245.2   Max.   :15.00457  
##                                                                     
##     rugg500s         rugg500cv         pil_twicv         pil_topos     
##  Min.   :0.03319   Min.   :0.01076   Min.   :0.03348   Min.   : 0.000  
##  1st Qu.:0.47334   1st Qu.:0.06180   1st Qu.:0.08586   1st Qu.: 0.000  
##  Median :1.32736   Median :0.08941   Median :0.12202   Median : 0.000  
##  Mean   :1.78291   Mean   :0.11772   Mean   :0.14844   Mean   : 1.631  
##  3rd Qu.:2.53294   3rd Qu.:0.15165   3rd Qu.:0.20438   3rd Qu.: 3.480  
##  Max.   :9.29160   Max.   :0.43959   Max.   :0.39956   Max.   :10.110  
##                                                                        
##     pil_slps         pil_slpcv        nutrientsn        mrvbf       
##  Min.   : 0.1107   Min.   :0.1951   Min.   :1.100   Min.   :0.0000  
##  1st Qu.: 1.0131   1st Qu.:0.3506   1st Qu.:1.300   1st Qu.:0.0000  
##  Median : 2.7769   Median :0.4659   Median :1.300   Median :0.0000  
##  Mean   : 4.1962   Mean   :0.4726   Mean   :1.315   Mean   :0.3674  
##  3rd Qu.: 6.6343   3rd Qu.:0.5579   3rd Qu.:1.300   3rd Qu.:0.0000  
##  Max.   :16.6383   Max.   :0.8918   Max.   :1.600   Max.   :4.5741  
##                                                                     
##      mrrtf           minfertf         lf7rup         hstructn      
##  Min.   :0.0000   Min.   :2.000   Min.   :1.000   Min.   : 0.0000  
##  1st Qu.:0.0000   1st Qu.:2.000   1st Qu.:2.000   1st Qu.: 0.4286  
##  Median :0.0000   Median :2.000   Median :3.000   Median : 1.0429  
##  Mean   :0.2309   Mean   :2.041   Mean   :3.008   Mean   : 0.9729  
##  3rd Qu.:0.0000   3rd Qu.:2.000   3rd Qu.:4.000   3rd Qu.: 1.0429  
##  Max.   :2.7292   Max.   :3.000   Max.   :7.000   Max.   :10.0000  
##                                                                    
##   geolrngaggn         geolmnaggn         bdensity50n       slopern        
##  Min.   :  0.0115   Min.   :   0.0058   Min.   :1.255   Min.   : 0.00344  
##  1st Qu.:  2.5880   1st Qu.:   1.2940   1st Qu.:1.341   1st Qu.: 0.01630  
##  Median :300.0000   Median :2050.0000   Median :1.367   Median : 0.03951  
##  Mean   :335.2414   Mean   :1308.5467   Mean   :1.348   Mean   : 0.40178  
##  3rd Qu.:900.0000   3rd Qu.:2650.0000   3rd Qu.:1.367   3rd Qu.: 0.16592  
##  Max.   :900.0000   Max.   :2650.0000   Max.   :1.367   Max.   :21.14644  
##                                                                           
##     HubDist         MIN_AGE_MA   month_collection
##  Min.   : 1.236   Min.   :2453   August  :15     
##  1st Qu.: 5.492   1st Qu.:2454   March   :14     
##  Median :21.187   Median :2454   February:13     
##  Mean   :29.232   Mean   :2511   November:13     
##  3rd Qu.:55.606   3rd Qu.:2597   May     :11     
##  Max.   :81.121   Max.   :2597   April   :10     
##                                  (Other) :47     
##                                      HubName  
##  anticline, exposed                      :18  
##  concealed                               : 1  
##  exposed                                 :22  
##  normal, exposed, tick on downthrown side:12  
##  overturned syncline, exposed            : 9  
##  syncline, exposed                       :61  
##                                               
##                                          UNITNAME 
##  Brockman Iron Formation                     :58  
##  Marra Mamba Iron Formation                  :45  
##  Mount McRae Shale and Mount Sylvia Formation: 1  
##  Weeli Wolli Formation                       : 9  
##  Wittenoom Formation                         :10  
##                                                   
##                                                   
##                                      ROCKTYPE1  
##  sedimentary carbonate                    : 10  
##  sedimentary other chemical or biochemical:112  
##  sedimentary siliciclastic                :  1  
##                                                 
##                                                 
##                                                 
##                                                 
##                                         FORMATION  true_troglofauna
##  Brockman Iron Formation                     :58   0:35            
##  Marra Mamba Iron Formation                  :45   1:88            
##  Mount McRae Shale and Mount Sylvia Formation: 1                   
##  Weeli Wolli Formation                       : 9                   
##  Wittenoom Formation                         :10                   
##                                                                    
## 
names(data_test)
##  [1] "srain2mp"         "bio21"            "bio16"            "slope"           
##  [5] "rugg500s"         "rugg500cv"        "pil_twicv"        "pil_topos"       
##  [9] "pil_slps"         "pil_slpcv"        "nutrientsn"       "mrvbf"           
## [13] "mrrtf"            "minfertf"         "lf7rup"           "hstructn"        
## [17] "geolrngaggn"      "geolmnaggn"       "bdensity50n"      "slopern"         
## [21] "HubDist"          "MIN_AGE_MA"       "month_collection" "HubName"         
## [25] "UNITNAME"         "ROCKTYPE1"        "FORMATION"        "true_troglofauna"
head(finalTest)
##      srain2mp    bio21    bio16   slope rugg500s rugg500cv pil_twicv pil_topos
## 883   0.15706 29.67899 221.4037 0.89097  0.96237   0.10369   0.29959   5.08126
## 1171  0.14855 29.76123 226.2116 3.02034  3.73254   0.15767   0.16215   0.00000
## 307   0.15033 29.79736 216.0963 9.00398  1.80702   0.05613   0.11547   0.48605
## 1743  0.14529 29.76137 245.3545 1.21520  0.23500   0.14213   0.14027   0.00000
## 1820  0.15231 29.72789 245.1607 5.02147  6.69625   0.17764   0.23004   0.47942
## 176   0.14965 29.75139 224.3239 3.81876  0.58854   0.06064   0.15573   0.49352
##      pil_slps pil_slpcv nutrientsn   mrvbf   mrrtf minfertf lf7rup hstructn
## 883   5.32363   1.14040        1.3 1.68056 0.00000        2      6  1.04286
## 1171  6.11757   0.38974        1.3 0.00000 0.00000        2      3  1.04286
## 307   7.15268   0.33769        1.3 0.00000 0.00000        2      2  1.04286
## 1743  0.24637   0.55030        1.6 0.00000 0.57837        2      3  0.00000
## 1820  7.74808   0.52370        1.1 0.00000 0.00000        2      2  0.42857
## 176   1.21059   0.19508        1.3 0.00000 0.00000        2      2  1.04286
##      geolrngaggn geolmnaggn bdensity50n slopern   HubDist MIN_AGE_MA
## 883      900.000   2050.000     1.36732 0.15237  2.123917       2454
## 1171     900.000   2050.000     1.36732 0.02540 60.107338       2454
## 307      900.000   2050.000     1.36732 0.00468  6.289232       2454
## 1743       2.588      1.294     1.25500 1.90476 36.325288       2454
## 1820     900.000   2050.000     1.34060 0.01748 51.666414       2454
## 176      300.000   2650.000     1.36732 0.01359 16.867923       2597
##      month_collection            HubName                   UNITNAME
## 883         September            exposed    Brockman Iron Formation
## 1171            April  syncline, exposed    Brockman Iron Formation
## 307             March  syncline, exposed    Brockman Iron Formation
## 1743        September  syncline, exposed    Brockman Iron Formation
## 1820         November  syncline, exposed    Brockman Iron Formation
## 176               May anticline, exposed Marra Mamba Iron Formation
##                                      ROCKTYPE1                  FORMATION
## 883  sedimentary other chemical or biochemical    Brockman Iron Formation
## 1171 sedimentary other chemical or biochemical    Brockman Iron Formation
## 307  sedimentary other chemical or biochemical    Brockman Iron Formation
## 1743 sedimentary other chemical or biochemical    Brockman Iron Formation
## 1820 sedimentary other chemical or biochemical    Brockman Iron Formation
## 176  sedimentary other chemical or biochemical Marra Mamba Iron Formation
##      true_troglofauna
## 883                 1
## 1171                1
## 307                 0
## 1743                1
## 1820                1
## 176                 1
dim(finalTest)
## [1] 81 28
summary(finalTest)
##     srain2mp          bio21           bio16           slope         
##  Min.   :0.1356   Min.   :29.64   Min.   :202.2   Min.   : 0.08943  
##  1st Qu.:0.1434   1st Qu.:29.74   1st Qu.:221.4   1st Qu.: 1.20730  
##  Median :0.1473   Median :29.76   Median :224.3   Median : 2.31213  
##  Mean   :0.1475   Mean   :29.75   Mean   :225.8   Mean   : 3.33565  
##  3rd Qu.:0.1517   3rd Qu.:29.77   3rd Qu.:230.1   3rd Qu.: 4.51866  
##  Max.   :0.1633   Max.   :29.83   Max.   :245.4   Max.   :13.58547  
##                                                                     
##     rugg500s         rugg500cv         pil_twicv         pil_topos      
##  Min.   :0.01647   Min.   :0.01810   Min.   :0.04523   Min.   : 0.0000  
##  1st Qu.:0.60450   1st Qu.:0.07030   1st Qu.:0.08987   1st Qu.: 0.0000  
##  Median :1.14453   Median :0.09675   Median :0.13399   Median : 0.1104  
##  Mean   :1.72459   Mean   :0.11947   Mean   :0.14705   Mean   : 1.4945  
##  3rd Qu.:2.42691   3rd Qu.:0.17444   3rd Qu.:0.20072   3rd Qu.: 2.4066  
##  Max.   :7.05156   Max.   :0.30012   Max.   :0.36527   Max.   :10.3639  
##                                                                         
##     pil_slps         pil_slpcv        nutrientsn       mrvbf       
##  Min.   : 0.1401   Min.   :0.1951   Min.   :1.10   Min.   :0.0000  
##  1st Qu.: 1.0510   1st Qu.:0.3434   1st Qu.:1.30   1st Qu.:0.0000  
##  Median : 2.4139   Median :0.4704   Median :1.30   Median :0.0000  
##  Mean   : 3.6002   Mean   :0.4613   Mean   :1.29   Mean   :0.4266  
##  3rd Qu.: 5.1142   3rd Qu.:0.5202   3rd Qu.:1.30   3rd Qu.:0.0000  
##  Max.   :17.0097   Max.   :1.1404   Max.   :1.60   Max.   :4.5610  
##                                                                    
##      mrrtf           minfertf         lf7rup         hstructn     
##  Min.   :0.0000   Min.   :1.000   Min.   :1.000   Min.   : 0.000  
##  1st Qu.:0.0000   1st Qu.:2.000   1st Qu.:2.000   1st Qu.: 1.043  
##  Median :0.0000   Median :2.000   Median :3.000   Median : 1.043  
##  Mean   :0.3551   Mean   :2.049   Mean   :3.074   Mean   : 1.081  
##  3rd Qu.:0.5385   3rd Qu.:2.000   3rd Qu.:4.000   3rd Qu.: 1.043  
##  Max.   :3.5544   Max.   :3.000   Max.   :7.000   Max.   :10.000  
##                                                                   
##   geolrngaggn         geolmnaggn         bdensity50n       slopern        
##  Min.   :  0.0115   Min.   :   0.0058   Min.   :1.255   Min.   : 0.00132  
##  1st Qu.: 65.5000   1st Qu.:  32.7500   1st Qu.:1.341   1st Qu.: 0.01814  
##  Median :300.0000   Median :2050.0000   Median :1.367   Median : 0.05924  
##  Mean   :362.0352   Mean   :1510.6472   Mean   :1.352   Mean   : 0.48230  
##  3rd Qu.:900.0000   3rd Qu.:2650.0000   3rd Qu.:1.367   3rd Qu.: 0.20859  
##  Max.   :900.0000   Max.   :2650.0000   Max.   :1.367   Max.   :21.14644  
##                                                                           
##     HubDist         MIN_AGE_MA    month_collection
##  Min.   : 1.294   Min.   :2453   September:11     
##  1st Qu.: 7.096   1st Qu.:2454   June     :10     
##  Median :17.310   Median :2506   January  : 8     
##  Mean   :27.576   Mean   :2517   July     : 8     
##  3rd Qu.:53.489   3rd Qu.:2597   March    : 7     
##  Max.   :77.437   Max.   :2597   April    : 7     
##                                  (Other)  :30     
##                                      HubName  
##  anticline, exposed                      :16  
##  concealed                               : 0  
##  exposed                                 :10  
##  normal, exposed, tick on downthrown side:10  
##  overturned syncline, exposed            : 6  
##  syncline, exposed                       :39  
##                                               
##                                          UNITNAME 
##  Brockman Iron Formation                     :34  
##  Marra Mamba Iron Formation                  :32  
##  Mount McRae Shale and Mount Sylvia Formation: 2  
##  Weeli Wolli Formation                       : 4  
##  Wittenoom Formation                         : 9  
##                                                   
##                                                   
##                                      ROCKTYPE1 
##  sedimentary carbonate                    : 9  
##  sedimentary other chemical or biochemical:70  
##  sedimentary siliciclastic                : 2  
##                                                
##                                                
##                                                
##                                                
##                                         FORMATION  true_troglofauna
##  Brockman Iron Formation                     :34   0:23            
##  Marra Mamba Iron Formation                  :32   1:58            
##  Mount McRae Shale and Mount Sylvia Formation: 2                   
##  Weeli Wolli Formation                       : 4                   
##  Wittenoom Formation                         : 9                   
##                                                                    
## 
names(finalTest)
##  [1] "srain2mp"         "bio21"            "bio16"            "slope"           
##  [5] "rugg500s"         "rugg500cv"        "pil_twicv"        "pil_topos"       
##  [9] "pil_slps"         "pil_slpcv"        "nutrientsn"       "mrvbf"           
## [13] "mrrtf"            "minfertf"         "lf7rup"           "hstructn"        
## [17] "geolrngaggn"      "geolmnaggn"       "bdensity50n"      "slopern"         
## [21] "HubDist"          "MIN_AGE_MA"       "month_collection" "HubName"         
## [25] "UNITNAME"         "ROCKTYPE1"        "FORMATION"        "true_troglofauna"
ContraProva<-data_test$true_troglofauna
summary(ContraProva)
##  0  1 
## 35 88
get_confusion_elements <- function(caret_confusion_matrix) {
     tp <- as.numeric(caret_confusion_matrix$table[4])   # true positives
     fn <- as.numeric(caret_confusion_matrix$table[3])   # false negatives
     fp <- as.numeric(caret_confusion_matrix$table[2])   # false positives
     tn <- as.numeric(caret_confusion_matrix$table[1])   # true negatives
     return( c(tp, fp, tn, fn) )
}


calculate_mcc <- function(tp, fp, tn, fn) {
     # calculates Matthews correlation coefficient
     # tp - true positives
     # fp - false positives
     # tn - true negatives
     # fn - false negatives
     mcc <- ((tp * tn) - (fp * fn)) /
            (sqrt( (tp + fp) * (tp + fn)) * sqrt((tn + fp) * (tn + fn)) )
     return(mcc)
}


calculate_mcc1 <- function(caret_confusion_matrix) {
     # calculates Matthews correlation coefficient
     # tp - true positives
     # fp - false positives
     # tn - true negatives
     # fn - false negatives
     mcc <- ((caret_confusion_matrix$table[4] * caret_confusion_matrix$table[1]) - (caret_confusion_matrix$table[2] * caret_confusion_matrix$table[3])) /
            (sqrt( (caret_confusion_matrix$table[4] + caret_confusion_matrix$table[2]) * (caret_confusion_matrix$table[4] + caret_confusion_matrix$table[3])) * sqrt((caret_confusion_matrix$table[1] + caret_confusion_matrix$table[2]) * (caret_confusion_matrix$table[1] + caret_confusion_matrix$table[3])) )
     return(mcc)
}



calculate_F2 <- function(CM_predictions) {
dbF2_11<-((1+2^2)*CM_predictions$byClass["Precision"]*CM_predictions$byClass["Sensitivity"])/(2^2*CM_predictions$byClass["Precision"] + CM_predictions$byClass["Sensitivity"])
dbF2_11<-as.numeric(dbF2_11)
return(dbF2_11)
}
set.seed(78945)



grid <- expand.grid(.mtry=seq(from = 2, to = 26, by = 2))

trControl <- trainControl(
  method = "repeatedcv",   # Resampling method
  repeats = 10,            # Number of repetitions for repeated cross-validation
  number = 5,              # Number of folds in each iteration of cross-validation
  classProbs = TRUE,       # Calculate class probabilities
  savePredictions = "final",  # Save final predictions
  summaryFunction = twoClassSummary  # Function for summarizing results (assumed to be defined elsewhere)
)

rf_mtry <- train(
  make.names(true_troglofauna) ~ .,               # Formula for the model, predicting 'true_troglofauna' based on other columns
  data = data_train,                             # Training data
  method = "rf",                                 # Random Forest method
  strata = data_train$true_troglofauna,          # Stratification based on the target variable
  sampsize = c(min(sum(data_train$true_troglofauna == 0), sum(data_train$true_troglofauna == 1)),
             min(sum(data_train$true_troglofauna == 1), sum(data_train$true_troglofauna == 1))),
  metric = "ROC",                                # Evaluation metric (Receiver Operating Characteristic)
  tuneGrid = grid,
  trControl = trControl,                         # Control parameters for the training process
  importance = TRUE,                             # Calculate variable importance
  ntree = 500                                    # Number of trees in the Random Forest
)


#In the above code, sampsize = rep(sum(training$class == 1), 2) means both the classes will have same frequency.e.g. sampsize = c(100 cases of 0, 100 cases of 1).

rf_mtry
## Random Forest 
## 
## 205 samples
##  27 predictor
##   2 classes: 'X0', 'X1' 
## 
## No pre-processing
## Resampling: Cross-Validated (5 fold, repeated 10 times) 
## Summary of sample sizes: 164, 164, 164, 164, 164, 164, ... 
## Resampling results across tuning parameters:
## 
##   mtry  ROC        Sens       Spec     
##    2    0.7587364  0.2459091  0.9430805
##    4    0.7535971  0.3392424  0.9088736
##    6    0.7429007  0.3409091  0.9012414
##    8    0.7405398  0.3660606  0.8916092
##   10    0.7332342  0.3628788  0.8827816
##   12    0.7281541  0.3745455  0.8806207
##   14    0.7268698  0.3730303  0.8779310
##   16    0.7284028  0.3760606  0.8751264
##   18    0.7225212  0.3725758  0.8697471
##   20    0.7202178  0.3798485  0.8663448
##   22    0.7228754  0.3828788  0.8615632
##   24    0.7192207  0.3778788  0.8608966
##   26    0.7137715  0.3863636  0.8602529
## 
## ROC was used to select the optimal model using the largest value.
## The final value used for the model was mtry = 2.
getTrainPerf(rf_mtry)
##    TrainROC TrainSens TrainSpec method
## 1 0.7587364 0.2459091 0.9430805     rf
summary(rf_mtry)
##                 Length Class      Mode     
## call               8   -none-     call     
## type               1   -none-     character
## predicted        205   factor     numeric  
## err.rate        1500   -none-     numeric  
## confusion          6   -none-     numeric  
## votes            410   matrix     numeric  
## oob.times        205   -none-     numeric  
## classes            2   -none-     character
## importance       192   -none-     numeric  
## importanceSD     144   -none-     numeric  
## localImportance    0   -none-     NULL     
## proximity          0   -none-     NULL     
## ntree              1   -none-     numeric  
## mtry               1   -none-     numeric  
## forest            14   -none-     list     
## y                205   factor     numeric  
## test               0   -none-     NULL     
## inbag              0   -none-     NULL     
## xNames            48   -none-     character
## problemType        1   -none-     character
## tuneValue          1   data.frame list     
## obsLevels          2   -none-     character
## param              4   -none-     list
#str(rf_mtry)
rf_mtry$bestTune$mtry
## [1] 2
rf_mtry$finalModel
## 
## Call:
##  randomForest(x = x, y = y, ntree = 500, mtry = param$mtry, strata = ..1,      sampsize = ..2, importance = TRUE) 
##                Type of random forest: classification
##                      Number of trees: 500
## No. of variables tried at each split: 2
## 
##         OOB estimate of  error rate: 24.88%
## Confusion matrix:
##    X0  X1 class.error
## X0 16  43  0.72881356
## X1  8 138  0.05479452
rf_mtry$finalModel$confusion
##    X0  X1 class.error
## X0 16  43  0.72881356
## X1  8 138  0.05479452
summary(rf_mtry$pred)
##       mtry   pred      obs             X0               X1        
##  Min.   :2   X0: 228   X0: 590   Min.   :0.0100   Min.   :0.1640  
##  1st Qu.:2   X1:1822   X1:1460   1st Qu.:0.1140   1st Qu.:0.6640  
##  Median :2                       Median :0.2000   Median :0.8000  
##  Mean   :2                       Mean   :0.2448   Mean   :0.7552  
##  3rd Qu.:2                       3rd Qu.:0.3360   3rd Qu.:0.8860  
##  Max.   :2                       Max.   :0.8360   Max.   :0.9900  
##     rowIndex     Resample        
##  Min.   :  1   Length:2050       
##  1st Qu.: 52   Class :character  
##  Median :103   Mode  :character  
##  Mean   :103                     
##  3rd Qu.:154                     
##  Max.   :205
summary(rf_mtry$pred$pred)
##   X0   X1 
##  228 1822
summary(rf_mtry$pred$obs)
##   X0   X1 
##  590 1460
head(rf_mtry$pred$X0,20)
##  [1] 0.562 0.240 0.244 0.434 0.158 0.058 0.050 0.174 0.150 0.128 0.062 0.146
## [13] 0.124 0.054 0.114 0.508 0.130 0.266 0.276 0.286
head(rf_mtry$pred$X1,20)
##  [1] 0.438 0.760 0.756 0.566 0.842 0.942 0.950 0.826 0.850 0.872 0.938 0.854
## [13] 0.876 0.946 0.886 0.492 0.870 0.734 0.724 0.714
summary(data_train$true_troglofauna)
##   0   1 
##  59 146
head(rf_mtry$pred,20)
##    mtry pred obs    X0    X1 rowIndex    Resample
## 1     2   X0  X0 0.562 0.438       83 Fold5.Rep01
## 2     2   X1  X1 0.240 0.760       27 Fold4.Rep01
## 3     2   X1  X1 0.244 0.756      190 Fold2.Rep02
## 4     2   X1  X0 0.434 0.566       37 Fold4.Rep01
## 5     2   X1  X1 0.158 0.842       38 Fold3.Rep06
## 6     2   X1  X1 0.058 0.942       22 Fold3.Rep06
## 7     2   X1  X1 0.050 0.950       41 Fold2.Rep01
## 8     2   X1  X1 0.174 0.826       39 Fold2.Rep01
## 9     2   X1  X1 0.150 0.850       50 Fold4.Rep01
## 10    2   X1  X1 0.128 0.872      175 Fold3.Rep02
## 11    2   X1  X1 0.062 0.938       22 Fold4.Rep01
## 12    2   X1  X0 0.146 0.854       24 Fold4.Rep01
## 13    2   X1  X1 0.124 0.876       70 Fold5.Rep01
## 14    2   X1  X1 0.054 0.946       43 Fold4.Rep01
## 15    2   X1  X1 0.114 0.886       38 Fold2.Rep01
## 16    2   X0  X1 0.508 0.492       58 Fold5.Rep02
## 17    2   X1  X1 0.130 0.870       44 Fold3.Rep01
## 18    2   X1  X1 0.266 0.734       27 Fold4.Rep02
## 19    2   X1  X1 0.276 0.724      205 Fold2.Rep02
## 20    2   X1  X0 0.286 0.714      188 Fold5.Rep09
sapply(rf_mtry$pred, class)
##        mtry        pred         obs          X0          X1    rowIndex 
##   "numeric"    "factor"    "factor"   "numeric"   "numeric"   "integer" 
##    Resample 
## "character"
rf_mtry$results$Sens
##  [1] 0.2459091 0.3392424 0.3409091 0.3660606 0.3628788 0.3745455 0.3730303
##  [8] 0.3760606 0.3725758 0.3798485 0.3828788 0.3778788 0.3863636
rf_mtry$results$SensSD
##  [1] 0.1182204 0.1335718 0.1154007 0.1215059 0.1143735 0.1073794 0.1090076
##  [8] 0.1159349 0.1213540 0.1258394 0.1124938 0.1267171 0.1120326
rf_mtry$results$Spec
##  [1] 0.9430805 0.9088736 0.9012414 0.8916092 0.8827816 0.8806207 0.8779310
##  [8] 0.8751264 0.8697471 0.8663448 0.8615632 0.8608966 0.8602529
rf_mtry$results$SpecSD
##  [1] 0.03973404 0.03829715 0.03741237 0.04376342 0.04602536 0.04531782
##  [7] 0.04626581 0.04820083 0.04951083 0.04659625 0.04763386 0.05113169
## [13] 0.04677013
rf_mtry$results$ROC
##  [1] 0.7587364 0.7535971 0.7429007 0.7405398 0.7332342 0.7281541 0.7268698
##  [8] 0.7284028 0.7225212 0.7202178 0.7228754 0.7192207 0.7137715
rf_mtry$results$ROCSD
##  [1] 0.06641356 0.06675007 0.05969507 0.05936158 0.05996436 0.06182858
##  [7] 0.06063451 0.06119038 0.05918342 0.05979263 0.06102751 0.05808001
## [13] 0.06219119
rf_mtry$finalModel$confusion
##    X0  X1 class.error
## X0 16  43  0.72881356
## X1  8 138  0.05479452
rf_mtry$finalModel$forest$cutoff
## [1] 0.5 0.5
result<-data.frame(Sens=rf_mtry$results$Sens,SensSD= rf_mtry$results$SensSD,Spec=rf_mtry$results$Spec,SpecSD= rf_mtry$results$SpecSD, ROC=rf_mtry$results$ROC,ROCSD= rf_mtry$results$ROCSD)
result
##         Sens    SensSD      Spec     SpecSD       ROC      ROCSD
## 1  0.2459091 0.1182204 0.9430805 0.03973404 0.7587364 0.06641356
## 2  0.3392424 0.1335718 0.9088736 0.03829715 0.7535971 0.06675007
## 3  0.3409091 0.1154007 0.9012414 0.03741237 0.7429007 0.05969507
## 4  0.3660606 0.1215059 0.8916092 0.04376342 0.7405398 0.05936158
## 5  0.3628788 0.1143735 0.8827816 0.04602536 0.7332342 0.05996436
## 6  0.3745455 0.1073794 0.8806207 0.04531782 0.7281541 0.06182858
## 7  0.3730303 0.1090076 0.8779310 0.04626581 0.7268698 0.06063451
## 8  0.3760606 0.1159349 0.8751264 0.04820083 0.7284028 0.06119038
## 9  0.3725758 0.1213540 0.8697471 0.04951083 0.7225212 0.05918342
## 10 0.3798485 0.1258394 0.8663448 0.04659625 0.7202178 0.05979263
## 11 0.3828788 0.1124938 0.8615632 0.04763386 0.7228754 0.06102751
## 12 0.3778788 0.1267171 0.8608966 0.05113169 0.7192207 0.05808001
## 13 0.3863636 0.1120326 0.8602529 0.04677013 0.7137715 0.06219119
rf_mtry$finalModel$forest$cutoff
## [1] 0.5 0.5
V_Imp_rf_mtry <- varImp(rf_mtry, scale = FALSE)
V_Imp_rf_mtry
## rf variable importance
## 
##   only 20 most important variables shown (out of 48)
## 
##                                                 Importance
## HubNamenormal, exposed, tick on downthrown side      7.618
## month_collectionMarch                                6.297
## bio21                                                6.212
## srain2mp                                             5.182
## bio16                                                4.945
## pil_slps                                             4.270
## nutrientsn                                           3.940
## rugg500s                                             3.677
## slope                                                3.410
## lf7rup                                               3.328
## month_collectionJune                                 3.291
## bdensity50n                                          3.133
## month_collectionSeptember                            3.043
## pil_topos                                            2.896
## month_collectionAugust                               2.597
## hstructn                                             2.590
## geolrngaggn                                          2.583
## UNITNAMEWittenoom Formation                          2.363
## HubNamesyncline, exposed                             2.312
## HubDist                                              2.266
plot(V_Imp_rf_mtry, main="Variable Importance - Area_01")

V_Imp_rf_mtry <- varImp(rf_mtry, scale = TRUE)
V_Imp_rf_mtry
## rf variable importance
## 
##   only 20 most important variables shown (out of 48)
## 
##                                                 Importance
## HubNamenormal, exposed, tick on downthrown side     100.00
## month_collectionMarch                                85.22
## bio21                                                84.26
## srain2mp                                             72.75
## bio16                                                70.09
## pil_slps                                             62.55
## nutrientsn                                           58.85
## rugg500s                                             55.91
## slope                                                52.92
## lf7rup                                               52.00
## month_collectionJune                                 51.59
## bdensity50n                                          49.82
## month_collectionSeptember                            48.82
## pil_topos                                            47.17
## month_collectionAugust                               43.83
## hstructn                                             43.75
## geolrngaggn                                          43.67
## UNITNAMEWittenoom Formation                          41.21
## HubNamesyncline, exposed                             40.64
## HubDist                                              40.12
plot(V_Imp_rf_mtry, main="Variable Importance - Area_01")

library(pdp)
## Warning: package 'pdp' was built under R version 4.3.3
name2<-c("1","2","3","4","5","6","7","8","9","10","11","12")

pd_month_collection <- partial(rf_mtry, pred.var = "month_collection",quantiles=F,prob=TRUE,which.class="X1",grid.resolution=100)

# Set file path for saving the plot
file_path <- "C:/Users/23276776/OneDrive - UWA/DARE/9 - Paper 1/1_Figures/Arrumando_Figuras_LaTExxxX/Area2_PDP_month_collection.pdf"

# Open a PDF device
pdf(file = file_path, width = 9, height = 6)  # Adjust width and height as needed


# Create the plot
plot(pd_month_collection, 
     main = "Area 2 - Month of collection", 
     ylim = c(0.0, 1.0), 
     names = name2, 
     cex.lab = 1.4,     # Font size for axis titles
     cex.axis = 1.9,    # Font size for axis numbers
     cex.main = 1.9,    # Font size for the title
     cex = 1.6,        # Font size for any other text
     ylab = "Predicted outcome (yhat)"  # Set the y-axis title
     )  # Adjust axis number font size


# Add a red dashed line at y = 0.8
#abline(h = 0.8, col = "red", lty = 2, lwd = 2)  # `lty = 2` for dashed line, `lwd` for line width



text(1.5, 0.1, 
     "1 - Jan\n2 - Feb\n3 - Mar\n4 - Apr\n5 - May\n6 - Jun", 
     cex = 1.4, 
     adj = c(0, 0))

text(3.5, 0.1, 
     "7 - Jul\n8 - Aug\n9 - Sept\n10 - Oct\n11 - Nov\n12 - Dec", 
     cex = 1.4, 
     adj = c(0, 0))

# Close the PDF device
dev.off()
## png 
##   2
# Define names for the categories in UNITNAME
unitname_labels <- c("Brockman Iron Formation", 
                     "Wittenoom Formation", 
                     "Weeli Wolli Formation",
                     "Mount McRae Shale and Mount Sylvia Formation", 
                     "Marra Mamba Iron Formation")

# Compute partial dependence for UNITNAME
pd_unitname <- partial(rf_mtry, pred.var = "UNITNAME", quantiles = FALSE, prob = TRUE, which.class = "X1", grid.resolution = 100)

# Create the plot
plot(pd_unitname, 
     main = "Partial Dependence of UNITNAME", 
     ylim = c(0.0, 1.0), 
     names = unitname_labels, 
     cex.lab = 1,     # Font size for axis titles
     cex.axis = 1,    # Font size for axis numbers
     cex.main = 1.9,    # Font size for the title
     cex = 1.6,         # Font size for any other text
     ylab = "Predicted Outcome (yhat)",  # Set the y-axis title
     las = 2            # Rotate x-axis labels for better readability
)

# Add text annotations if needed (example for grouping units)
text(1.5, 0.1, 
     "Brockman Iron Formation\nWittenoom Formation", 
     cex = 1.4, 
     adj = c(0, 0))

text(3.5, 0.1, 
     "Mount McRae Shale and Mount Sylvia Formation\nMarra Mamba Iron Formation", 
     cex = 1.4, 
     adj = c(0, 0))

pd_bio21 <- partial(rf_mtry, pred.var = "bio21",type = "classification",prob=TRUE, which.class="X1",grid.resolution=100)
plot(pd_bio21, main = "Area_02 - Highest Period Radiation", 
     ylab = "Predicted outcome (yhat)",
     cex.lab = 1.9,     # Font size for axis titles
     cex.axis = 1.9,    # Font size for axis numbers
     cex.main = 1.9,    # Font size for the title
     cex = 1.9        # Font size for any other text
     ) # Set color for histogram bars

# Histogram of bio21
hist(data2$bio21, 
     main = "Histogram of bio21", 
     xlab = "bio21", 
     border = "black",
     cex.lab = 1.9,     # Font size for axis titles
     cex.axis = 1.9,    # Font size for axis numbers
     cex.main = 1.9,    # Font size for the title
     cex = 1.9,         # Font size for any other text
     col = "aquamarine4") # Set color for histogram bars

pd_srain2mp <- partial(rf_mtry, pred.var = "srain2mp",type = "classification",prob=TRUE, which.class="X1",grid.resolution=100)
 plot(pd_srain2mp, main = "Area_02 - Equinox rainfall seasonality ratio", ylab = "Predicted outcome (yhat)", 
     cex.lab = 1.9,     # Font size for axis titles
     cex.axis = 1.9,    # Font size for axis numbers
     cex.main = 1.9,    # Font size for the title
     cex = 1.9       # Font size for any other text
 )

# Histogram of srain2mp
hist(data2$srain2mp, 
     main = "Histogram of srain2mp", 
     xlab = "srain2mp", 
     border = "black",
     cex.lab = 1.9,     # Font size for axis titles
     cex.axis = 1.9,    # Font size for axis numbers
     cex.main = 1.9,    # Font size for the title
     cex = 1.9,         # Font size for any other text
     col = "aquamarine4") # Set color for histogram bars

pd_bio16 <- partial(rf_mtry, pred.var = "bio16",type = "classification",prob=TRUE, which.class="X1",grid.resolution=100)
plot(pd_bio16, main = "Area_02 - Precipitation of Wettest Quarter", ylab = "Predicted outcome (yhat)", 
     cex.lab = 1.9,     # Font size for axis titles
     cex.axis = 1.9,    # Font size for axis numbers
     cex.main = 1.9,    # Font size for the title
     cex = 1.9         # Font size for any other text
     )         # Font size for any other text

# Histogram of bio16
hist(data2$bio16, 
     main = "Histogram of bio16", 
     xlab = "bio16", 
     border = "black",
     cex.lab = 1.9,     # Font size for axis titles
     cex.axis = 1.9,    # Font size for axis numbers
     cex.main = 1.9,    # Font size for the title
     cex = 1.9,         # Font size for any other text
     col = "aquamarine4") # Set color for histogram bars

pd_pil_slps <- partial(rf_mtry, pred.var = "pil_slps",type = "classification",prob=TRUE, which.class="X1",grid.resolution=100)
plot(pd_pil_slps, main = "Area_02 - Standard deviation of percent slope", ylab = "Predicted outcome (yhat)", 
     cex.lab = 1.9,     # Font size for axis titles
     cex.axis = 1.9,    # Font size for axis numbers
     cex.main = 1.9,    # Font size for the title
     cex = 1.9         # Font size for any other text
)

# Histogram of pil_slps
hist(data2$pil_slps, 
     main = "Histogram of pil_slps", 
     xlab = "pil_slps", 
     border = "black",
     cex.lab = 1.5,     # Font size for axis titles
     cex.axis = 1.5,    # Font size for axis numbers
     cex.main = 1.5,    # Font size for the title
     cex = 1.5,         # Font size for any other text
     col = "aquamarine4") # Set color for histogram bars

pd_nutrientsn <- partial(rf_mtry, pred.var = "nutrientsn",type = "classification",prob=TRUE, which.class="X1",grid.resolution=100)
plot(pd_nutrientsn, main = "Area_02 - Gross soil nutrient status", ylab = "Predicted outcome (yhat)",
     cex.lab = 1.9,     # Font size for axis titles
     cex.axis = 1.9,    # Font size for axis numbers
     cex.main = 1.9,    # Font size for the title
     cex = 1.9
     )         # Font size for any other text

# Histogram of nutrientsn
hist(data2$nutrientsn, 
     main = "Histogram of nutrientsn", 
     xlab = "nutrientsn", 
     border = "black",
     cex.lab = 1.9,     # Font size for axis titles
     cex.axis = 1.9,    # Font size for axis numbers
     cex.main = 1.9,    # Font size for the title
     cex = 1.9,         # Font size for any other text
     col = "aquamarine4"
     ) # Set color for histogram bars

pd_rugg500s <- partial(rf_mtry, pred.var = "rugg500s",type = "classification",prob=TRUE, which.class="X1",grid.resolution=100)
plot(pd_rugg500s, main = "Area_02 - Standard deviation of terrain ruggedness", ylab = "Predicted outcome (yhat)",
     cex.lab = 1.8,     # Font size for axis titles
     cex.axis = 1.9,    # Font size for axis numbers
     cex.main = 1.9,    # Font size for the title
     cex = 1.9
     )         # Font size for any other text

pd_pil_topos <- partial(rf_mtry, pred.var = "pil_topos",type = "classification",prob=TRUE, which.class="X1",grid.resolution=100)
plot(pd_pil_topos, main = "Area_02 -  Standard deviation of topographic class", ylab = "Predicted outcome (yhat)",
     cex.lab = 1.8,     # Font size for axis titles
     cex.axis = 1.9,    # Font size for axis numbers
     cex.main = 1.9,    # Font size for the title
     cex = 1.9
     )         # Font size for any other text

Export

# Export to PDF
pdf("varA2.pdf", width = 14, height = 20) # Set appropriate dimensions

# Set up a 5x2 layout
par(mfrow = c(5, 2),     # 5 rows and 2 columns
    mar = c(4.5, 4.5, 2, 1), # Margins for each plot (bottom, left, top, right)
    oma = c(2, 2, 2, 2)) # Outer margins

# PDP and histogram pairs

# 5. bio21
plot(pd_bio21, main = "Area 2 - Highest Period Radiation", ylab = "Predicted outcome (yhat)",
     cex.lab = 1.9, cex.axis = 1.9, cex.main = 1.9, cex = 1.9)
rug(data_train$bio21)
hist(data2$bio21, 
     main = "Histogram of bio21", xlab = "bio21", 
     border = "black", cex.lab = 1.9, cex.axis = 1.9, cex.main = 1.9, cex = 1.9, col = "aquamarine4")
rug(data_train$bio21)

# 4. srain2mp
plot(pd_srain2mp, main = "Area 2 - Equinox rainfall seasonality ratio", ylab = "Predicted outcome (yhat)",
     cex.lab = 1.9, cex.axis = 1.9, cex.main = 1.9, cex = 1.9)
rug(data_train$srain2mp)
hist(data2$srain2mp, 
     main = "Histogram of srain2mp", xlab = "srain2mp", 
     border = "black", cex.lab = 1.9, cex.axis = 1.9, cex.main = 1.9, cex = 1.9, col = "aquamarine4")
rug(data_train$srain2mp)


# 3. bio16
plot(pd_bio16, main = "Area 2 - Precipitation of wettest quarter", ylab = "Predicted outcome (yhat)",
     cex.lab = 1.9, cex.axis = 1.9, cex.main = 1.9, cex = 1.9)
rug(data_train$bio16)
hist(data2$bio16, 
     main = "Histogram of bio16", xlab = "bio16", 
     border = "black", cex.lab = 1.9, cex.axis = 1.9, cex.main = 1.9, cex = 1.9, col = "aquamarine4")
rug(data_train$bio16)


# 2. pil_slps
plot(pd_pil_slps, main = "Area 2 - Standard deviation of percent slope", ylab = "Predicted outcome (yhat)",
     cex.lab = 1.9, cex.axis = 1.9, cex.main = 1.9, cex = 1.9)
rug(data_train$pil_slps)
hist(data2$pil_slps, 
     main = "Histogram of pil_slps", xlab = "pil_slps", 
     border = "black", cex.lab = 1.9, cex.axis = 1.9, cex.main = 1.9, cex = 1.9, col = "aquamarine4")
rug(data_train$pil_slps)


# 1. nutrientsn
plot(pd_nutrientsn, main = "Area 2 - Gross soil nutrient status", ylab = "Predicted outcome (yhat)",
     cex.lab = 1.9, cex.axis = 1.9, cex.main = 1.9, cex = 1.9)
rug(data_train$nutrientsn)
hist(data2$nutrientsn, 
     main = "Histogram of nutrientsn", xlab = "nutrientsn", 
     border = "black", cex.lab = 1.9, cex.axis = 1.9, cex.main = 1.9, cex = 1.9, col = "aquamarine4")
rug(data_train$nutrientsn)


# Close the PDF device
dev.off()
## png 
##   2
dim(data_test)
## [1] 123  28
colnames(data_test)
##  [1] "srain2mp"         "bio21"            "bio16"            "slope"           
##  [5] "rugg500s"         "rugg500cv"        "pil_twicv"        "pil_topos"       
##  [9] "pil_slps"         "pil_slpcv"        "nutrientsn"       "mrvbf"           
## [13] "mrrtf"            "minfertf"         "lf7rup"           "hstructn"        
## [17] "geolrngaggn"      "geolmnaggn"       "bdensity50n"      "slopern"         
## [21] "HubDist"          "MIN_AGE_MA"       "month_collection" "HubName"         
## [25] "UNITNAME"         "ROCKTYPE1"        "FORMATION"        "true_troglofauna"
selectcol_data_test <- data_test[, -c((ncol(data_test) - 0):ncol(data_test))]
dim(selectcol_data_test)
## [1] 123  27
names(selectcol_data_test)
##  [1] "srain2mp"         "bio21"            "bio16"            "slope"           
##  [5] "rugg500s"         "rugg500cv"        "pil_twicv"        "pil_topos"       
##  [9] "pil_slps"         "pil_slpcv"        "nutrientsn"       "mrvbf"           
## [13] "mrrtf"            "minfertf"         "lf7rup"           "hstructn"        
## [17] "geolrngaggn"      "geolmnaggn"       "bdensity50n"      "slopern"         
## [21] "HubDist"          "MIN_AGE_MA"       "month_collection" "HubName"         
## [25] "UNITNAME"         "ROCKTYPE1"        "FORMATION"
set.seed(78945)

predictions <- predict(rf_mtry, newdata = selectcol_data_test,type = "prob")#
head(predictions,5)
##         X0    X1
## 157  0.442 0.558
## 407  0.420 0.580
## 258  0.220 0.780
## 1805 0.248 0.752
## 433  0.442 0.558
dim(predictions)
## [1] 123   2
sapply(predictions,class)
##        X0        X1 
## "numeric" "numeric"
summary(predictions)
##        X0               X1        
##  Min.   :0.0100   Min.   :0.3620  
##  1st Qu.:0.1070   1st Qu.:0.6840  
##  Median :0.2020   Median :0.7980  
##  Mean   :0.2308   Mean   :0.7692  
##  3rd Qu.:0.3160   3rd Qu.:0.8930  
##  Max.   :0.6380   Max.   :0.9900
set.seed(78945)
predictions_raw <- predict(rf_mtry, newdata = selectcol_data_test,type = "raw")#
head(predictions_raw,5)
## [1] X1 X1 X1 X1 X1
## Levels: X0 X1
length(predictions_raw)
## [1] 123
head(sapply(predictions_raw,class))
## [1] "factor" "factor" "factor" "factor" "factor" "factor"
summary(predictions_raw)
##  X0  X1 
##   9 114
set.seed(78945)

predictions1 <- predict(rf_mtry, newdata = selectcol_data_test)
head(predictions1,5)
## [1] X1 X1 X1 X1 X1
## Levels: X0 X1
length(predictions1)
## [1] 123
head(sapply(predictions1,class))
## [1] "factor" "factor" "factor" "factor" "factor" "factor"
summary(predictions1)
##  X0  X1 
##   9 114
levels(predictions1) <- c(0,1)
head(predictions1,5)
## [1] 1 1 1 1 1
## Levels: 0 1
summary(predictions1)
##   0   1 
##   9 114
set.seed(78945)
CM_predictions1<-confusionMatrix(predictions1, ContraProva,positive="1")
CM_predictions1
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction  0  1
##          0  4  5
##          1 31 83
##                                           
##                Accuracy : 0.7073          
##                  95% CI : (0.6185, 0.7859)
##     No Information Rate : 0.7154          
##     P-Value [Acc > NIR] : 0.6227          
##                                           
##                   Kappa : 0.074           
##                                           
##  Mcnemar's Test P-Value : 3.091e-05       
##                                           
##             Sensitivity : 0.9432          
##             Specificity : 0.1143          
##          Pos Pred Value : 0.7281          
##          Neg Pred Value : 0.4444          
##              Prevalence : 0.7154          
##          Detection Rate : 0.6748          
##    Detection Prevalence : 0.9268          
##       Balanced Accuracy : 0.5287          
##                                           
##        'Positive' Class : 1               
## 
str(CM_predictions1)
## List of 6
##  $ positive: chr "1"
##  $ table   : 'table' int [1:2, 1:2] 4 31 5 83
##   ..- attr(*, "dimnames")=List of 2
##   .. ..$ Prediction: chr [1:2] "0" "1"
##   .. ..$ Reference : chr [1:2] "0" "1"
##  $ overall : Named num [1:7] 0.707 0.074 0.619 0.786 0.715 ...
##   ..- attr(*, "names")= chr [1:7] "Accuracy" "Kappa" "AccuracyLower" "AccuracyUpper" ...
##  $ byClass : Named num [1:11] 0.943 0.114 0.728 0.444 0.728 ...
##   ..- attr(*, "names")= chr [1:11] "Sensitivity" "Specificity" "Pos Pred Value" "Neg Pred Value" ...
##  $ mode    : chr "sens_spec"
##  $ dots    : list()
##  - attr(*, "class")= chr "confusionMatrix"
CM_predictions1$byClass[1]#Sensitivity de CM_predictions1
## Sensitivity 
##   0.9431818
CM_predictions1$byClass[2]#Specificity de CM_predictions1
## Specificity 
##   0.1142857
CM_predictions1$byClass
##          Sensitivity          Specificity       Pos Pred Value 
##            0.9431818            0.1142857            0.7280702 
##       Neg Pred Value            Precision               Recall 
##            0.4444444            0.7280702            0.9431818 
##                   F1           Prevalence       Detection Rate 
##            0.8217822            0.7154472            0.6747967 
## Detection Prevalence    Balanced Accuracy 
##            0.9268293            0.5287338
CM_predictions1$byClass["Sensitivity"]
## Sensitivity 
##   0.9431818
CM_predictions1$byClass[1]
## Sensitivity 
##   0.9431818
CM_predictions1$byClass["Balanced Accuracy"]
## Balanced Accuracy 
##         0.5287338
CM_predictions1$byClass[11]
## Balanced Accuracy 
##         0.5287338
CM_predictions1$overall
##       Accuracy          Kappa  AccuracyLower  AccuracyUpper   AccuracyNull 
##   7.073171e-01   7.402760e-02   6.185043e-01   7.858625e-01   7.154472e-01 
## AccuracyPValue  McnemarPValue 
##   6.227260e-01   3.090859e-05
CM_predictions1$overall["McnemarPValue"]
## McnemarPValue 
##  3.090859e-05
CM_predictions1$overall[7]
## McnemarPValue 
##  3.090859e-05
CM_predictions1$table
##           Reference
## Prediction  0  1
##          0  4  5
##          1 31 83
tn<-CM_predictions1$table[1]#TN
fp<-CM_predictions1$table[2]#FP
fn<-CM_predictions1$table[3]#FN
tp<-CM_predictions1$table[4]#TP

#Youden's J statistic 
J_CM_predictions1<-(CM_predictions1$byClass[1] + CM_predictions1$byClass[2]) - 1
J_CM_predictions1
## Sensitivity 
##  0.05746753
J_CM_predictions1<-as.numeric(CM_predictions1$byClass[1] + CM_predictions1$byClass[2]) - 1
J_CM_predictions1
## [1] 0.05746753
rf_mtry$finalModel$forest$cutoff
## [1] 0.5 0.5
mcc(predictions1, ContraProva)
## [1] 0.09956902
get_confusion_elements(CM_predictions1)
## [1] 83 31  4  5
get_confusion_elements(CM_predictions1)[1]
## [1] 83
calculate_mcc(tp, fp, tn, fn)
## [1] 0.09956902
calculate_mcc1(CM_predictions1)
## [1] 0.09956902
calculate_F2(CM_predictions1)
## [1] 0.8905579
model_pred_class <- ifelse(predictions < 0.5, "X0", "X1")
head(model_pred_class,5)
##      X0   X1  
## 157  "X0" "X1"
## 407  "X0" "X1"
## 258  "X0" "X1"
## 1805 "X0" "X1"
## 433  "X0" "X1"
dim(model_pred_class)
## [1] 123   2
length(ContraProva)
## [1] 123
summary(model_pred_class)
##       X0                 X1           
##  Length:123         Length:123        
##  Class :character   Class :character  
##  Mode  :character   Mode  :character
Test1<-as.factor(model_pred_class[,2])
head(Test1)
##  157  407  258 1805  433  732 
##   X1   X1   X1   X1   X1   X1 
## Levels: X0 X1
summary(Test1)
##  X0  X1 
##   9 114
levels(Test1) <- c(0,1)
summary(Test1)
##   0   1 
##   9 114
head(Test1)
##  157  407  258 1805  433  732 
##    1    1    1    1    1    1 
## Levels: 0 1
confusionMatrix(Test1, ContraProva,positive="1")
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction  0  1
##          0  4  5
##          1 31 83
##                                           
##                Accuracy : 0.7073          
##                  95% CI : (0.6185, 0.7859)
##     No Information Rate : 0.7154          
##     P-Value [Acc > NIR] : 0.6227          
##                                           
##                   Kappa : 0.074           
##                                           
##  Mcnemar's Test P-Value : 3.091e-05       
##                                           
##             Sensitivity : 0.9432          
##             Specificity : 0.1143          
##          Pos Pred Value : 0.7281          
##          Neg Pred Value : 0.4444          
##              Prevalence : 0.7154          
##          Detection Rate : 0.6748          
##    Detection Prevalence : 0.9268          
##       Balanced Accuracy : 0.5287          
##                                           
##        'Positive' Class : 1               
## 
summary(Test1); summary(predictions1)
##   0   1 
##   9 114
##   0   1 
##   9 114
model_pred_class <- ifelse(predictions < 0.7, "X0", "X1")

head(model_pred_class,20)
##      X0   X1  
## 157  "X0" "X0"
## 407  "X0" "X0"
## 258  "X0" "X1"
## 1805 "X0" "X1"
## 433  "X0" "X0"
## 732  "X0" "X1"
## 1475 "X0" "X1"
## 957  "X0" "X1"
## 1704 "X0" "X1"
## 108  "X0" "X0"
## 961  "X0" "X1"
## 180  "X0" "X1"
## 665  "X0" "X1"
## 1119 "X0" "X1"
## 1369 "X0" "X0"
## 943  "X0" "X1"
## 283  "X0" "X1"
## 788  "X0" "X1"
## 605  "X0" "X1"
## 1601 "X0" "X1"
dim(model_pred_class)
## [1] 123   2
length(ContraProva)
## [1] 123
head(sapply(model_pred_class, class))
##          X0          X0          X0          X0          X0          X0 
## "character" "character" "character" "character" "character" "character"
Test11<-as.factor(model_pred_class[,2])
summary(Test11)
## X0 X1 
## 34 89
head(Test11)
##  157  407  258 1805  433  732 
##   X0   X0   X1   X1   X0   X1 
## Levels: X0 X1
levels(Test11) <- c(0,1)
summary(Test11)
##  0  1 
## 34 89
summary(predictions1)
##   0   1 
##   9 114
CM_Test11<-confusionMatrix(Test11, ContraProva,positive="1")
CM_Test11
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction  0  1
##          0 19 15
##          1 16 73
##                                           
##                Accuracy : 0.748           
##                  95% CI : (0.6617, 0.8219)
##     No Information Rate : 0.7154          
##     P-Value [Acc > NIR] : 0.2445          
##                                           
##                   Kappa : 0.3756          
##                                           
##  Mcnemar's Test P-Value : 1.0000          
##                                           
##             Sensitivity : 0.8295          
##             Specificity : 0.5429          
##          Pos Pred Value : 0.8202          
##          Neg Pred Value : 0.5588          
##              Prevalence : 0.7154          
##          Detection Rate : 0.5935          
##    Detection Prevalence : 0.7236          
##       Balanced Accuracy : 0.6862          
##                                           
##        'Positive' Class : 1               
## 
str(CM_Test11)
## List of 6
##  $ positive: chr "1"
##  $ table   : 'table' int [1:2, 1:2] 19 16 15 73
##   ..- attr(*, "dimnames")=List of 2
##   .. ..$ Prediction: chr [1:2] "0" "1"
##   .. ..$ Reference : chr [1:2] "0" "1"
##  $ overall : Named num [1:7] 0.748 0.376 0.662 0.822 0.715 ...
##   ..- attr(*, "names")= chr [1:7] "Accuracy" "Kappa" "AccuracyLower" "AccuracyUpper" ...
##  $ byClass : Named num [1:11] 0.83 0.543 0.82 0.559 0.82 ...
##   ..- attr(*, "names")= chr [1:11] "Sensitivity" "Specificity" "Pos Pred Value" "Neg Pred Value" ...
##  $ mode    : chr "sens_spec"
##  $ dots    : list()
##  - attr(*, "class")= chr "confusionMatrix"
CM_Test11$byClass[1]#Sensitivity de CM_predictions1
## Sensitivity 
##   0.8295455
CM_Test11$byClass[2]#Specificity de CM_predictions1
## Specificity 
##   0.5428571
J_CM_Test11<-as.numeric(CM_Test11$byClass[1] + CM_Test11$byClass[2] - 1) ; J_CM_Test11
## [1] 0.3724026
confusionMatrix(predictions1, ContraProva,positive="1")
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction  0  1
##          0  4  5
##          1 31 83
##                                           
##                Accuracy : 0.7073          
##                  95% CI : (0.6185, 0.7859)
##     No Information Rate : 0.7154          
##     P-Value [Acc > NIR] : 0.6227          
##                                           
##                   Kappa : 0.074           
##                                           
##  Mcnemar's Test P-Value : 3.091e-05       
##                                           
##             Sensitivity : 0.9432          
##             Specificity : 0.1143          
##          Pos Pred Value : 0.7281          
##          Neg Pred Value : 0.4444          
##              Prevalence : 0.7154          
##          Detection Rate : 0.6748          
##    Detection Prevalence : 0.9268          
##       Balanced Accuracy : 0.5287          
##                                           
##        'Positive' Class : 1               
## 
dim(finalTest)
## [1] 81 28
names(finalTest)
##  [1] "srain2mp"         "bio21"            "bio16"            "slope"           
##  [5] "rugg500s"         "rugg500cv"        "pil_twicv"        "pil_topos"       
##  [9] "pil_slps"         "pil_slpcv"        "nutrientsn"       "mrvbf"           
## [13] "mrrtf"            "minfertf"         "lf7rup"           "hstructn"        
## [17] "geolrngaggn"      "geolmnaggn"       "bdensity50n"      "slopern"         
## [21] "HubDist"          "MIN_AGE_MA"       "month_collection" "HubName"         
## [25] "UNITNAME"         "ROCKTYPE1"        "FORMATION"        "true_troglofauna"
selectcol_finalTest <- finalTest[, -c((ncol(finalTest) - 0):ncol(finalTest))]
dim(selectcol_finalTest)
## [1] 81 27
names(selectcol_finalTest)
##  [1] "srain2mp"         "bio21"            "bio16"            "slope"           
##  [5] "rugg500s"         "rugg500cv"        "pil_twicv"        "pil_topos"       
##  [9] "pil_slps"         "pil_slpcv"        "nutrientsn"       "mrvbf"           
## [13] "mrrtf"            "minfertf"         "lf7rup"           "hstructn"        
## [17] "geolrngaggn"      "geolmnaggn"       "bdensity50n"      "slopern"         
## [21] "HubDist"          "MIN_AGE_MA"       "month_collection" "HubName"         
## [25] "UNITNAME"         "ROCKTYPE1"        "FORMATION"
predictions_2 <- predict(rf_mtry, newdata = selectcol_finalTest,type = "raw")

summary(predictions_2)
## X0 X1 
##  4 77
summary(finalTest[, ncol(finalTest)])
##  0  1 
## 23 58
levels(predictions_2) <- c(0,1)
summary(predictions_2)
##  0  1 
##  4 77
CM_predictions_2<-confusionMatrix(predictions_2, finalTest[, ncol(finalTest)],positive="1")
CM_predictions_2
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction  0  1
##          0  3  1
##          1 20 57
##                                           
##                Accuracy : 0.7407          
##                  95% CI : (0.6314, 0.8318)
##     No Information Rate : 0.716           
##     P-Value [Acc > NIR] : 0.3618          
##                                           
##                   Kappa : 0.1508          
##                                           
##  Mcnemar's Test P-Value : 8.568e-05       
##                                           
##             Sensitivity : 0.9828          
##             Specificity : 0.1304          
##          Pos Pred Value : 0.7403          
##          Neg Pred Value : 0.7500          
##              Prevalence : 0.7160          
##          Detection Rate : 0.7037          
##    Detection Prevalence : 0.9506          
##       Balanced Accuracy : 0.5566          
##                                           
##        'Positive' Class : 1               
## 
mcc(predictions_2, finalTest[, ncol(finalTest)])
## [1] 0.235572
calculate_mcc1(CM_predictions_2)
## [1] 0.235572
calculate_F2(CM_predictions_2)
## [1] 0.9223301
predictions_3<- predict(rf_mtry, newdata = selectcol_finalTest,type = "prob")

summary(predictions_3)
##        X0               X1        
##  Min.   :0.0260   Min.   :0.3520  
##  1st Qu.:0.1080   1st Qu.:0.6740  
##  Median :0.1960   Median :0.8040  
##  Mean   :0.2254   Mean   :0.7746  
##  3rd Qu.:0.3260   3rd Qu.:0.8920  
##  Max.   :0.6480   Max.   :0.9740
sapply(predictions_3, class)
##        X0        X1 
## "numeric" "numeric"
head(predictions_3)
##         X0    X1
## 883  0.330 0.670
## 1171 0.158 0.842
## 307  0.470 0.530
## 1743 0.118 0.882
## 1820 0.220 0.780
## 176  0.076 0.924
model_pred_class_3 <- ifelse(predictions_3 < 0.5, "X0", "X1")
head(sapply(model_pred_class_3, class))
##          X0          X0          X0          X0          X0          X0 
## "character" "character" "character" "character" "character" "character"
Test_3<-as.factor(model_pred_class_3[,2])
head(sapply(Test_3, class))
##      883     1171      307     1743     1820      176 
## "factor" "factor" "factor" "factor" "factor" "factor"
levels(Test_3)
## [1] "X0" "X1"
head(Test_3)
##  883 1171  307 1743 1820  176 
##   X1   X1   X1   X1   X1   X1 
## Levels: X0 X1
levels(Test_3) <- c(0,1)
head(sapply(Test_3, class))
##      883     1171      307     1743     1820      176 
## "factor" "factor" "factor" "factor" "factor" "factor"
levels(Test_3)
## [1] "0" "1"
head(Test_3)
##  883 1171  307 1743 1820  176 
##    1    1    1    1    1    1 
## Levels: 0 1
summary(Test_3)
##  0  1 
##  4 77
CM_predictions_3<-confusionMatrix(Test_3, finalTest[, ncol(finalTest)],positive="1")
CM_predictions_3
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction  0  1
##          0  3  1
##          1 20 57
##                                           
##                Accuracy : 0.7407          
##                  95% CI : (0.6314, 0.8318)
##     No Information Rate : 0.716           
##     P-Value [Acc > NIR] : 0.3618          
##                                           
##                   Kappa : 0.1508          
##                                           
##  Mcnemar's Test P-Value : 8.568e-05       
##                                           
##             Sensitivity : 0.9828          
##             Specificity : 0.1304          
##          Pos Pred Value : 0.7403          
##          Neg Pred Value : 0.7500          
##              Prevalence : 0.7160          
##          Detection Rate : 0.7037          
##    Detection Prevalence : 0.9506          
##       Balanced Accuracy : 0.5566          
##                                           
##        'Positive' Class : 1               
## 
calculate_mcc1(CM_predictions_3)
## [1] 0.235572
calculate_F2(CM_predictions_3)
## [1] 0.9223301
testrf_mtry<- predict(rf_mtry)
summary(testrf_mtry)
##  X0  X1 
##  42 163
levels(testrf_mtry) <- c(0,1);
summary(testrf_mtry)
##   0   1 
##  42 163
CM_testrf_mtry<-confusionMatrix(testrf_mtry, data_train[, ncol(data_train)],positive="1");
CM_testrf_mtry
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction   0   1
##          0  41   1
##          1  18 145
##                                          
##                Accuracy : 0.9073         
##                  95% CI : (0.859, 0.9433)
##     No Information Rate : 0.7122         
##     P-Value [Acc > NIR] : 7.833e-12      
##                                          
##                   Kappa : 0.7527         
##                                          
##  Mcnemar's Test P-Value : 0.0002419      
##                                          
##             Sensitivity : 0.9932         
##             Specificity : 0.6949         
##          Pos Pred Value : 0.8896         
##          Neg Pred Value : 0.9762         
##              Prevalence : 0.7122         
##          Detection Rate : 0.7073         
##    Detection Prevalence : 0.7951         
##       Balanced Accuracy : 0.8440         
##                                          
##        'Positive' Class : 1              
## 
calculate_mcc1(CM_testrf_mtry)
## [1] 0.7718165
calculate_F2(CM_testrf_mtry)
## [1] 0.9705489
testrf_mtry1<- predict(rf_mtry,type="prob")
summary(testrf_mtry1)
##        X0               X1        
##  Min.   :0.0020   Min.   :0.1540  
##  1st Qu.:0.0600   1st Qu.:0.5780  
##  Median :0.1180   Median :0.8820  
##  Mean   :0.2429   Mean   :0.7571  
##  3rd Qu.:0.4220   3rd Qu.:0.9400  
##  Max.   :0.8460   Max.   :0.9980
model_pred_class_4 <- ifelse(testrf_mtry1 < 0.5, "X0", "X1")
summary(model_pred_class_4)
##       X0                 X1           
##  Length:205         Length:205        
##  Class :character   Class :character  
##  Mode  :character   Mode  :character
Test_4<-as.factor(model_pred_class_4[,2])
head(Test_4)
## 1001  171  865 1616 1750  903 
##   X1   X0   X0   X1   X1   X1 
## Levels: X0 X1
levels(Test_4) <- c(0,1);
summary(Test_4)
##   0   1 
##  42 163
CM_testrf_mtry1<-confusionMatrix(Test_4, data_train[, ncol(data_train)],positive="1");
CM_testrf_mtry1
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction   0   1
##          0  41   1
##          1  18 145
##                                          
##                Accuracy : 0.9073         
##                  95% CI : (0.859, 0.9433)
##     No Information Rate : 0.7122         
##     P-Value [Acc > NIR] : 7.833e-12      
##                                          
##                   Kappa : 0.7527         
##                                          
##  Mcnemar's Test P-Value : 0.0002419      
##                                          
##             Sensitivity : 0.9932         
##             Specificity : 0.6949         
##          Pos Pred Value : 0.8896         
##          Neg Pred Value : 0.9762         
##              Prevalence : 0.7122         
##          Detection Rate : 0.7073         
##    Detection Prevalence : 0.7951         
##       Balanced Accuracy : 0.8440         
##                                          
##        'Positive' Class : 1              
## 
calculate_mcc1(CM_testrf_mtry1)
## [1] 0.7718165
calculate_F2(CM_testrf_mtry1)
## [1] 0.9705489
prediction.probabilities <- predictions[,"X1"]
head(prediction.probabilities,5)
## [1] 0.558 0.580 0.780 0.752 0.558
res.roc <-roc(ContraProva,prediction.probabilities)
## Setting levels: control = 0, case = 1
## Setting direction: controls < cases
res.roc
## 
## Call:
## roc.default(response = ContraProva, predictor = prediction.probabilities)
## 
## Data: prediction.probabilities in 35 controls (ContraProva 0) < 88 cases (ContraProva 1).
## Area under the curve: 0.781
str(res.roc)
## List of 15
##  $ percent           : logi FALSE
##  $ sensitivities     : num [1:100] 1 1 1 0.989 0.977 ...
##  $ specificities     : num [1:100] 0 0.0286 0.0571 0.0571 0.0571 ...
##  $ thresholds        : num [1:100] -Inf 0.383 0.414 0.425 0.43 ...
##  $ direction         : chr "<"
##  $ cases             : num [1:88] 0.558 0.58 0.78 0.752 0.558 0.748 0.92 0.9 0.78 0.766 ...
##  $ controls          : num [1:35] 0.752 0.556 0.896 0.578 0.538 0.656 0.708 0.648 0.818 0.772 ...
##  $ fun.sesp          :function (thresholds, controls, cases, direction)  
##  $ auc               : 'auc' num 0.781
##   ..- attr(*, "partial.auc")= logi FALSE
##   ..- attr(*, "percent")= logi FALSE
##   ..- attr(*, "roc")=List of 15
##   .. ..$ percent           : logi FALSE
##   .. ..$ sensitivities     : num [1:100] 1 1 1 0.989 0.977 ...
##   .. ..$ specificities     : num [1:100] 0 0.0286 0.0571 0.0571 0.0571 ...
##   .. ..$ thresholds        : num [1:100] -Inf 0.383 0.414 0.425 0.43 ...
##   .. ..$ direction         : chr "<"
##   .. ..$ cases             : num [1:88] 0.558 0.58 0.78 0.752 0.558 0.748 0.92 0.9 0.78 0.766 ...
##   .. ..$ controls          : num [1:35] 0.752 0.556 0.896 0.578 0.538 0.656 0.708 0.648 0.818 0.772 ...
##   .. ..$ fun.sesp          :function (thresholds, controls, cases, direction)  
##   .. ..$ auc               : 'auc' num 0.781
##   .. .. ..- attr(*, "partial.auc")= logi FALSE
##   .. .. ..- attr(*, "percent")= logi FALSE
##   .. .. ..- attr(*, "roc")=List of 8
##   .. .. .. ..$ percent      : logi FALSE
##   .. .. .. ..$ sensitivities: num [1:100] 1 1 1 0.989 0.977 ...
##   .. .. .. ..$ specificities: num [1:100] 0 0.0286 0.0571 0.0571 0.0571 ...
##   .. .. .. ..$ thresholds   : num [1:100] -Inf 0.383 0.414 0.425 0.43 ...
##   .. .. .. ..$ direction    : chr "<"
##   .. .. .. ..$ cases        : num [1:88] 0.558 0.58 0.78 0.752 0.558 0.748 0.92 0.9 0.78 0.766 ...
##   .. .. .. ..$ controls     : num [1:35] 0.752 0.556 0.896 0.578 0.538 0.656 0.708 0.648 0.818 0.772 ...
##   .. .. .. ..$ fun.sesp     :function (thresholds, controls, cases, direction)  
##   .. .. .. ..- attr(*, "class")= chr "roc"
##   .. ..$ call              : language roc.default(response = ContraProva, predictor = prediction.probabilities)
##   .. ..$ original.predictor: num [1:123] 0.558 0.58 0.78 0.752 0.558 0.748 0.752 0.92 0.9 0.556 ...
##   .. ..$ original.response : Factor w/ 2 levels "0","1": 2 2 2 2 2 2 1 2 2 1 ...
##   .. ..$ predictor         : num [1:123] 0.558 0.58 0.78 0.752 0.558 0.748 0.752 0.92 0.9 0.556 ...
##   .. ..$ response          : Factor w/ 2 levels "0","1": 2 2 2 2 2 2 1 2 2 1 ...
##   .. ..$ levels            : chr [1:2] "0" "1"
##   .. ..- attr(*, "class")= chr "roc"
##  $ call              : language roc.default(response = ContraProva, predictor = prediction.probabilities)
##  $ original.predictor: num [1:123] 0.558 0.58 0.78 0.752 0.558 0.748 0.752 0.92 0.9 0.556 ...
##  $ original.response : Factor w/ 2 levels "0","1": 2 2 2 2 2 2 1 2 2 1 ...
##  $ predictor         : num [1:123] 0.558 0.58 0.78 0.752 0.558 0.748 0.752 0.92 0.9 0.556 ...
##  $ response          : Factor w/ 2 levels "0","1": 2 2 2 2 2 2 1 2 2 1 ...
##  $ levels            : chr [1:2] "0" "1"
##  - attr(*, "class")= chr "roc"
head(res.roc$cases,5) 
## [1] 0.558 0.580 0.780 0.752 0.558
length(res.roc$cases)
## [1] 88
summary(res.roc$cases)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##  0.4240  0.7510  0.8420  0.8072  0.9140  0.9900
head(res.roc$controls,5) 
## [1] 0.752 0.556 0.896 0.578 0.538
length(res.roc$controls)
## [1] 35
summary(res.roc$controls)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##  0.3620  0.5860  0.6920  0.6736  0.7680  0.8960
head(res.roc$thresholds,5)
## [1]  -Inf 0.383 0.414 0.425 0.430
tail(res.roc$thresholds,5)
## [1] 0.947 0.962 0.975 0.984   Inf
length(res.roc$thresholds)
## [1] 100
summary(res.roc$thresholds)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##    -Inf  0.6735  0.7885          0.8820     Inf
auc(res.roc)
## Area under the curve: 0.781
ci.auc(res.roc)
## 95% CI: 0.699-0.863 (DeLong)
ci.auc(res.roc,method = "bootstrap", boot.n = 10000)
## 95% CI: 0.6966-0.8588 (10000 stratified bootstrap replicates)
rfThresh_all <- coords(res.roc, x = "all", best.method = "youden")
head(rfThresh_all)
##   threshold specificity sensitivity
## 1      -Inf  0.00000000   1.0000000
## 2     0.383  0.02857143   1.0000000
## 3     0.414  0.05714286   1.0000000
## 4     0.425  0.05714286   0.9886364
## 5     0.430  0.05714286   0.9772727
## 6     0.436  0.08571429   0.9772727
summary(rfThresh_all)
##    threshold       specificity      sensitivity    
##  Min.   :  -Inf   Min.   :0.0000   Min.   :0.0000  
##  1st Qu.:0.6735   1st Qu.:0.4000   1st Qu.:0.3949  
##  Median :0.7885   Median :0.7714   Median :0.6420  
##  Mean   :   NaN   Mean   :0.6723   Mean   :0.6038  
##  3rd Qu.:0.8820   3rd Qu.:0.9714   3rd Qu.:0.8438  
##  Max.   :   Inf   Max.   :1.0000   Max.   :1.0000
dim(rfThresh_all)
## [1] 100   3
rfThresh_max <- coords(res.roc, x = "local maximas", best.method = "youden")
head(rfThresh_max)
##   threshold specificity sensitivity
## 1     0.414  0.05714286   1.0000000
## 2     0.436  0.08571429   0.9772727
## 3     0.479  0.11428571   0.9545455
## 4     0.557  0.22857143   0.9431818
## 5     0.579  0.25714286   0.9090909
## 6     0.603  0.34285714   0.8977273
summary(rfThresh_max)
##    threshold       specificity       sensitivity    
##  Min.   :0.4140   Min.   :0.05714   Min.   :0.3182  
##  1st Qu.:0.6315   1st Qu.:0.37143   1st Qu.:0.5739  
##  Median :0.7450   Median :0.65714   Median :0.7614  
##  Mean   :0.7086   Mean   :0.59876   Mean   :0.7302  
##  3rd Qu.:0.8175   3rd Qu.:0.84286   3rd Qu.:0.8807  
##  Max.   :0.8980   Max.   :1.00000   Max.   :1.0000
dim(rfThresh_max)
## [1] 23  3
rfThresh_youden <- coords(res.roc, x = "best", best.method = "youden")
rfThresh_youden
##   threshold specificity sensitivity
## 1     0.774   0.7714286   0.7045455
rfThresh_youden[1,1]
## [1] 0.774
rfThresh_topleft <- coords(res.roc, x = "best", best.method = "closest.topleft")
rfThresh_topleft
##   threshold specificity sensitivity
## 1     0.774   0.7714286   0.7045455
rfThresh_topleft[1,1]
## [1] 0.774
coords(res.roc, "best", ret="all", transpose = FALSE,
       best.method="youden")
##           threshold specificity sensitivity  accuracy tn tp fn fp      npv
## threshold     0.774   0.7714286   0.7045455 0.7235772 27 62 26  8 0.509434
##                 ppv       fdr       fpr       tpr       tnr       fnr
## threshold 0.8857143 0.1142857 0.2285714 0.7045455 0.7714286 0.2954545
##           1-specificity 1-sensitivity 1-accuracy    1-npv     1-ppv precision
## threshold     0.2285714     0.2954545  0.2764228 0.490566 0.1142857 0.8857143
##              recall   youden closest.topleft
## threshold 0.7045455 1.475974       0.1395383
coords(res.roc, "best", ret="all", transpose = FALSE,
       best.method="closest.topleft")
##           threshold specificity sensitivity  accuracy tn tp fn fp      npv
## threshold     0.774   0.7714286   0.7045455 0.7235772 27 62 26  8 0.509434
##                 ppv       fdr       fpr       tpr       tnr       fnr
## threshold 0.8857143 0.1142857 0.2285714 0.7045455 0.7714286 0.2954545
##           1-specificity 1-sensitivity 1-accuracy    1-npv     1-ppv precision
## threshold     0.2285714     0.2954545  0.2764228 0.490566 0.1142857 0.8857143
##              recall   youden closest.topleft
## threshold 0.7045455 1.475974       0.1395383
# "threshold", usando "ret", youden method
coords(res.roc, "best", ret="threshold", transpose = FALSE,
       best.method="youden") # este é o default
##   threshold
## 1     0.774
# "threshold" usando "ret", closest.topleft method
coords(res.roc, "best", ret="threshold", transpose = FALSE,
       best.method="closest.topleft")
##   threshold
## 1     0.774
#tn = True negative count usando "ret", youden method
coords(res.roc, "best", ret="tn", transpose = FALSE)
##           tn
## threshold 27
#tn = True negative count usando "ret", closest.topleft method
coords(res.roc, "best", ret="tn", transpose = FALSE,
       best.method="closest.topleft")
##           tn
## threshold 27
#tp = True positive count usando "ret", youden method
coords(res.roc, "best", ret="tp", transpose = FALSE)
##           tp
## threshold 62
#tp = True positive count usando "ret", closest.topleft method
coords(res.roc, "best", ret="tp", transpose = FALSE,
       best.method="closest.topleft")
##           tp
## threshold 62
#Obtendo Youden Index
coords(res.roc, "best", ret="youden", transpose = FALSE)
##     youden
## 1 1.475974
#obtendo "Distance to the top left corner of the ROC space"
coords(res.roc, "best", ret="closest.topleft", transpose = FALSE,best.method="closest.topleft")
##   closest.topleft
## 1       0.1395383
rfThresh_youden <- coords(res.roc, x = "best", best.method = "youden")
rfThresh_youden
##   threshold specificity sensitivity
## 1     0.774   0.7714286   0.7045455
rfThresh_youden[1,1]
## [1] 0.774
model_pred_class <- ifelse(predictions < rfThresh_youden[1,1], "X0", "X1")
head(model_pred_class)
##      X0   X1  
## 157  "X0" "X0"
## 407  "X0" "X0"
## 258  "X0" "X1"
## 1805 "X0" "X0"
## 433  "X0" "X0"
## 732  "X0" "X0"
summary(model_pred_class)
##       X0                 X1           
##  Length:123         Length:123        
##  Class :character   Class :character  
##  Mode  :character   Mode  :character
Test2<-as.factor(model_pred_class[,2])


levels(Test2) <- c(0,1)
summary(Test2)
##  0  1 
## 53 70
CM_Test2<-confusionMatrix(Test2, ContraProva,positive="1")
CM_Test2
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction  0  1
##          0 27 26
##          1  8 62
##                                           
##                Accuracy : 0.7236          
##                  95% CI : (0.6357, 0.8004)
##     No Information Rate : 0.7154          
##     P-Value [Acc > NIR] : 0.465909        
##                                           
##                   Kappa : 0.4121          
##                                           
##  Mcnemar's Test P-Value : 0.003551        
##                                           
##             Sensitivity : 0.7045          
##             Specificity : 0.7714          
##          Pos Pred Value : 0.8857          
##          Neg Pred Value : 0.5094          
##              Prevalence : 0.7154          
##          Detection Rate : 0.5041          
##    Detection Prevalence : 0.5691          
##       Balanced Accuracy : 0.7380          
##                                           
##        'Positive' Class : 1               
## 
str(CM_Test2)
## List of 6
##  $ positive: chr "1"
##  $ table   : 'table' int [1:2, 1:2] 27 8 26 62
##   ..- attr(*, "dimnames")=List of 2
##   .. ..$ Prediction: chr [1:2] "0" "1"
##   .. ..$ Reference : chr [1:2] "0" "1"
##  $ overall : Named num [1:7] 0.724 0.412 0.636 0.8 0.715 ...
##   ..- attr(*, "names")= chr [1:7] "Accuracy" "Kappa" "AccuracyLower" "AccuracyUpper" ...
##  $ byClass : Named num [1:11] 0.705 0.771 0.886 0.509 0.886 ...
##   ..- attr(*, "names")= chr [1:11] "Sensitivity" "Specificity" "Pos Pred Value" "Neg Pred Value" ...
##  $ mode    : chr "sens_spec"
##  $ dots    : list()
##  - attr(*, "class")= chr "confusionMatrix"
CM_Test2$byClass[1]#Sensitivity de CM_predictions1
## Sensitivity 
##   0.7045455
CM_Test2$byClass[2]#Specificity de CM_predictions1
## Specificity 
##   0.7714286
J_CM_Test2<-as.numeric(CM_Test2$byClass[1] + CM_Test2$byClass[2] - 1) ; J_CM_Test2
## [1] 0.475974
calculate_mcc1(CM_Test2)
## [1] 0.4336823
calculate_F2(CM_Test2)
## [1] 0.7345972
new_threshold <- 0.65


model_pred_class_33New <- ifelse(predictions_3 < new_threshold, "X0", "X1")
head(sapply(model_pred_class_33New, class))
##          X0          X0          X0          X0          X0          X0 
## "character" "character" "character" "character" "character" "character"
Test_33New<-as.factor(model_pred_class_33New[,2])#
head(sapply(Test_33New, class))
##      883     1171      307     1743     1820      176 
## "factor" "factor" "factor" "factor" "factor" "factor"
levels(Test_33New) <- c(0,1)
head(sapply(Test_33New, class))
##      883     1171      307     1743     1820      176 
## "factor" "factor" "factor" "factor" "factor" "factor"
levels(Test_33New)
## [1] "0" "1"
head(Test_33New)
##  883 1171  307 1743 1820  176 
##    1    1    0    1    1    1 
## Levels: 0 1
summary(Test_33New)
##  0  1 
## 16 65
CM_predictions_33New<-confusionMatrix(Test_33New, finalTest$true_troglofauna,positive="1")
CM_predictions_33New
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction  0  1
##          0 10  6
##          1 13 52
##                                           
##                Accuracy : 0.7654          
##                  95% CI : (0.6582, 0.8525)
##     No Information Rate : 0.716           
##     P-Value [Acc > NIR] : 0.1955          
##                                           
##                   Kappa : 0.3648          
##                                           
##  Mcnemar's Test P-Value : 0.1687          
##                                           
##             Sensitivity : 0.8966          
##             Specificity : 0.4348          
##          Pos Pred Value : 0.8000          
##          Neg Pred Value : 0.6250          
##              Prevalence : 0.7160          
##          Detection Rate : 0.6420          
##    Detection Prevalence : 0.8025          
##       Balanced Accuracy : 0.6657          
##                                           
##        'Positive' Class : 1               
## 
calculate_mcc1(CM_predictions_33New)
## [1] 0.375256
calculate_F2(CM_predictions_33New)
## [1] 0.8754209
rfThresh_youden <- coords(res.roc, x = "best", best.method = "youden")
rfThresh_youden
##   threshold specificity sensitivity
## 1     0.774   0.7714286   0.7045455
rfThresh_youden[1,1]
## [1] 0.774
model_pred_class_31 <- ifelse(predictions_3 < rfThresh_youden[1,1], "X0", "X1")
head(sapply(model_pred_class_31, class))
##          X0          X0          X0          X0          X0          X0 
## "character" "character" "character" "character" "character" "character"
Test_31<-as.factor(model_pred_class_31[,2])
head(sapply(Test_31, class))
##      883     1171      307     1743     1820      176 
## "factor" "factor" "factor" "factor" "factor" "factor"
levels(Test_31) <- c(0,1)
head(sapply(Test_31, class))
##      883     1171      307     1743     1820      176 
## "factor" "factor" "factor" "factor" "factor" "factor"
levels(Test_31)
## [1] "0" "1"
head(Test_31)
##  883 1171  307 1743 1820  176 
##    0    1    0    1    1    1 
## Levels: 0 1
summary(Test_31)
##  0  1 
## 34 47
CM_predictions_31<-confusionMatrix(Test_31, finalTest$true_troglofauna,positive="1")
CM_predictions_31
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction  0  1
##          0 17 17
##          1  6 41
##                                          
##                Accuracy : 0.716          
##                  95% CI : (0.605, 0.8107)
##     No Information Rate : 0.716          
##     P-Value [Acc > NIR] : 0.55592        
##                                          
##                   Kappa : 0.3898         
##                                          
##  Mcnemar's Test P-Value : 0.03706        
##                                          
##             Sensitivity : 0.7069         
##             Specificity : 0.7391         
##          Pos Pred Value : 0.8723         
##          Neg Pred Value : 0.5000         
##              Prevalence : 0.7160         
##          Detection Rate : 0.5062         
##    Detection Prevalence : 0.5802         
##       Balanced Accuracy : 0.7230         
##                                          
##        'Positive' Class : 1              
## 
calculate_mcc1(CM_predictions_31)
## [1] 0.4075216
calculate_F2(CM_predictions_31)
## [1] 0.734767
rfThresh_topleft <- coords(res.roc, x = "best", best.method = "closest.topleft")
rfThresh_topleft
##   threshold specificity sensitivity
## 1     0.774   0.7714286   0.7045455
rfThresh_topleft[1,1]
## [1] 0.774
model_pred_class <- ifelse(predictions < rfThresh_topleft[1,1], "X0", "X1")
Test3<-as.factor(model_pred_class[,2])


levels(Test3) <- c(0,1)
summary(Test3)
##  0  1 
## 53 70
CM_Test3<-confusionMatrix(Test3, ContraProva,positive="1")
CM_Test3
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction  0  1
##          0 27 26
##          1  8 62
##                                           
##                Accuracy : 0.7236          
##                  95% CI : (0.6357, 0.8004)
##     No Information Rate : 0.7154          
##     P-Value [Acc > NIR] : 0.465909        
##                                           
##                   Kappa : 0.4121          
##                                           
##  Mcnemar's Test P-Value : 0.003551        
##                                           
##             Sensitivity : 0.7045          
##             Specificity : 0.7714          
##          Pos Pred Value : 0.8857          
##          Neg Pred Value : 0.5094          
##              Prevalence : 0.7154          
##          Detection Rate : 0.5041          
##    Detection Prevalence : 0.5691          
##       Balanced Accuracy : 0.7380          
##                                           
##        'Positive' Class : 1               
## 
str(CM_Test3)
## List of 6
##  $ positive: chr "1"
##  $ table   : 'table' int [1:2, 1:2] 27 8 26 62
##   ..- attr(*, "dimnames")=List of 2
##   .. ..$ Prediction: chr [1:2] "0" "1"
##   .. ..$ Reference : chr [1:2] "0" "1"
##  $ overall : Named num [1:7] 0.724 0.412 0.636 0.8 0.715 ...
##   ..- attr(*, "names")= chr [1:7] "Accuracy" "Kappa" "AccuracyLower" "AccuracyUpper" ...
##  $ byClass : Named num [1:11] 0.705 0.771 0.886 0.509 0.886 ...
##   ..- attr(*, "names")= chr [1:11] "Sensitivity" "Specificity" "Pos Pred Value" "Neg Pred Value" ...
##  $ mode    : chr "sens_spec"
##  $ dots    : list()
##  - attr(*, "class")= chr "confusionMatrix"
CM_Test3$byClass[1]#Sensitivity de CM_predictions1
## Sensitivity 
##   0.7045455
CM_Test3$byClass[2]#Specificity de CM_predictions1
## Specificity 
##   0.7714286
J_CM_Test3<-as.numeric(CM_Test3$byClass[1] + CM_Test3$byClass[2] - 1) ; J_CM_Test3
## [1] 0.475974
calculate_mcc1(CM_Test3)
## [1] 0.4336823
calculate_F2(CM_Test3)
## [1] 0.7345972
rfThresh_topleft <- coords(res.roc, x = "best", best.method = "closest.topleft")
rfThresh_topleft
##   threshold specificity sensitivity
## 1     0.774   0.7714286   0.7045455
rfThresh_topleft[1,1]
## [1] 0.774
model_pred_class_41 <- ifelse(predictions_3 < rfThresh_topleft[1,1], "X0", "X1")
head(sapply(model_pred_class_41, class))
##          X0          X0          X0          X0          X0          X0 
## "character" "character" "character" "character" "character" "character"
Test_41<-as.factor(model_pred_class_41[,2])
head(sapply(Test_41, class))
##      883     1171      307     1743     1820      176 
## "factor" "factor" "factor" "factor" "factor" "factor"
levels(Test_41) <- c(0,1)
head(sapply(Test_41, class))
##      883     1171      307     1743     1820      176 
## "factor" "factor" "factor" "factor" "factor" "factor"
levels(Test_41)
## [1] "0" "1"
head(Test_41)
##  883 1171  307 1743 1820  176 
##    0    1    0    1    1    1 
## Levels: 0 1
summary(Test_41)
##  0  1 
## 34 47
CM_predictions_41<-confusionMatrix(Test_41, finalTest$true_troglofauna,positive="1") 
CM_predictions_41
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction  0  1
##          0 17 17
##          1  6 41
##                                          
##                Accuracy : 0.716          
##                  95% CI : (0.605, 0.8107)
##     No Information Rate : 0.716          
##     P-Value [Acc > NIR] : 0.55592        
##                                          
##                   Kappa : 0.3898         
##                                          
##  Mcnemar's Test P-Value : 0.03706        
##                                          
##             Sensitivity : 0.7069         
##             Specificity : 0.7391         
##          Pos Pred Value : 0.8723         
##          Neg Pred Value : 0.5000         
##              Prevalence : 0.7160         
##          Detection Rate : 0.5062         
##    Detection Prevalence : 0.5802         
##       Balanced Accuracy : 0.7230         
##                                          
##        'Positive' Class : 1              
## 
calculate_mcc1(CM_predictions_41)
## [1] 0.4075216
calculate_F2(CM_predictions_41)
## [1] 0.734767
roc.data <- data_frame( # Create a dataframe 'roc.data' for ROC analysis results
  thresholds = res.roc$thresholds, # Column for ROC curve thresholds
  sensitivity = res.roc$sensitivities, # Column for sensitivity (true positive rate)
  specificity = res.roc$specificities # Column for specificity (true negative rate)
)
## Warning: `data_frame()` was deprecated in tibble 1.1.0.
## ℹ Please use `tibble()` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
roc.data # Display the 'roc.data' dataframe in the R console
## # A tibble: 100 × 3
##    thresholds sensitivity specificity
##         <dbl>       <dbl>       <dbl>
##  1   -Inf           1          0     
##  2      0.383       1          0.0286
##  3      0.414       1          0.0571
##  4      0.425       0.989      0.0571
##  5      0.43        0.977      0.0571
##  6      0.436       0.977      0.0857
##  7      0.449       0.966      0.0857
##  8      0.464       0.955      0.0857
##  9      0.479       0.955      0.114 
## 10      0.511       0.943      0.114 
## # ℹ 90 more rows
tibble(roc.data)# Convert the 'roc.data' dataframe into a tibble for further analysis (if needed)
## # A tibble: 100 × 3
##    thresholds sensitivity specificity
##         <dbl>       <dbl>       <dbl>
##  1   -Inf           1          0     
##  2      0.383       1          0.0286
##  3      0.414       1          0.0571
##  4      0.425       0.989      0.0571
##  5      0.43        0.977      0.0571
##  6      0.436       0.977      0.0857
##  7      0.449       0.966      0.0857
##  8      0.464       0.955      0.0857
##  9      0.479       0.955      0.114 
## 10      0.511       0.943      0.114 
## # ℹ 90 more rows
#Get the probality threshold for specificity >= 0.6 e sensitivity >= 0.6
roc.data %>% filter(specificity >= 0.6)#comando com  %>% "forward pipe operator"
## # A tibble: 64 × 3
##    thresholds sensitivity specificity
##         <dbl>       <dbl>       <dbl>
##  1      0.727       0.784       0.6  
##  2      0.731       0.784       0.629
##  3      0.737       0.761       0.629
##  4      0.745       0.761       0.657
##  5      0.75        0.75        0.657
##  6      0.754       0.739       0.714
##  7      0.76        0.727       0.714
##  8      0.765       0.727       0.743
##  9      0.768       0.716       0.743
## 10      0.771       0.705       0.743
## # ℹ 54 more rows
roc.data %>% filter(sensitivity >= 0.6)#comando com  %>% "forward pipe operator"
## # A tibble: 55 × 3
##    thresholds sensitivity specificity
##         <dbl>       <dbl>       <dbl>
##  1   -Inf           1          0     
##  2      0.383       1          0.0286
##  3      0.414       1          0.0571
##  4      0.425       0.989      0.0571
##  5      0.43        0.977      0.0571
##  6      0.436       0.977      0.0857
##  7      0.449       0.966      0.0857
##  8      0.464       0.955      0.0857
##  9      0.479       0.955      0.114 
## 10      0.511       0.943      0.114 
## # ℹ 45 more rows
par(pty="s")
plot.roc(res.roc, print.auc =TRUE,col="blue", print.thres =
           "best",print.auc.y=0.4,main="ROC curve (Point: best.method = 'youden')")


grid(nx = NULL, ny = NULL, 
     lty = 3, 
     lwd = 1, 
     col = "gray")

segments(rfThresh_youden[1,2],1-rfThresh_youden[1,2],rfThresh_youden[1,2], rfThresh_youden[1,3],lwd = 3, col = "red",lty=2)

text(0.4,0.2,"The number on the left represents the probability cutoff,\nand the numbers in the parentheses are\nthe specificity and sensitivity, respectively")

par(pty="s")

plot.roc(res.roc, print.auc =TRUE,col="blue", print.thres =
           c(0.3, 0.5, 0.7,rfThresh_topleft[1,1]),print.auc.y=0.4,main="ROC curve (Point: best.method = 'closest.topleft')")


grid(nx = NULL, ny = NULL, 
     lty = 3, 
     lwd = 1, 
     col = "gray")


segments(rfThresh_topleft[1,2],rfThresh_topleft[1,3],1, 1,lwd = 3, col = "red",lty=2)

text(0.3,0.2,"The number on the left represents the probability cutoff,\nand the numbers in the parentheses are\nthe specificity and sensitivity, respectively")

par(pty="s")


plot.roc(res.roc, print.auc =TRUE,print.auc.y=0.4,legacy.axes = TRUE, col="blue", print.thres = c(0.2, 0.5, 0.7,rfThresh_youden[1,1]),main="ROC curve - Youden",xlab = "1-Specificity = False Positive Rate = FPR",ylab = "Sensitivity = True Positive Rate = TPR")


segments(rfThresh_youden[1,2], 1-rfThresh_youden[1,2],rfThresh_youden[1,2],rfThresh_youden[1,3],lwd = 3, col = "red",lty=2)


grid(nx = NULL, ny = NULL, 
     lty = 3, 
     lwd = 1, 
     col = "gray")

text(0.4,0.2,"The number on the left represents the probability cutoff,\nand the numbers in the parentheses are\nthe specificity and sensitivity, respectively")

par(pty="s")



plot.roc(res.roc, print.auc =TRUE,print.auc.y=0.4,legacy.axes = TRUE, col="blue", print.thres =  c(0.3, 0.5, 0.7,rfThresh_topleft[1,1]),main="ROC curve - Closest Topleft",xlab = "1-Specificity = False Positive Rate = FPR",ylab = "Sensitivity = True Positive Rate = TPR")


segments(rfThresh_topleft[1,2],rfThresh_topleft[1,3],1, 1,lwd = 3, col = "red",lty=2)




grid(nx = NULL, ny = NULL, 
     lty = 3, 
     lwd = 1, 
     col = "gray")

text(0.4,0.2,"The number on the left represents the probability cutoff,\nand the numbers in the parentheses are\nthe specificity and sensitivity, respectively")

par(pty="s")


rocCurve <- roc(ContraProva,prediction.probabilities, plot=TRUE,legacy.axes = TRUE, col="blue",main="ROC curve (Point: best.method = 'youden')",xlab="False Positive Rate = FPR", ylab="True Positive Rate = TPR",print.thres =
                  "best",print.auc =TRUE,cex.main=0.9)
## Setting levels: control = 0, case = 1
## Setting direction: controls < cases
segments(rfThresh_youden[1,2], 1-rfThresh_youden[1,2],rfThresh_youden[1,2],rfThresh_youden[1,3],lwd = 3, col = "red",lty=2)


grid(nx = NULL, ny = NULL, 
     lty = 3, 
     lwd = 1, 
     col = "gray")

text(0.4,0.2,"The number on the left represents the probability cutoff,\nand the numbers in the parentheses are\nthe specificity and sensitivity, respectively")

dim(finalTest)
## [1] 81 28
names(finalTest)
##  [1] "srain2mp"         "bio21"            "bio16"            "slope"           
##  [5] "rugg500s"         "rugg500cv"        "pil_twicv"        "pil_topos"       
##  [9] "pil_slps"         "pil_slpcv"        "nutrientsn"       "mrvbf"           
## [13] "mrrtf"            "minfertf"         "lf7rup"           "hstructn"        
## [17] "geolrngaggn"      "geolmnaggn"       "bdensity50n"      "slopern"         
## [21] "HubDist"          "MIN_AGE_MA"       "month_collection" "HubName"         
## [25] "UNITNAME"         "ROCKTYPE1"        "FORMATION"        "true_troglofauna"
dim(selectcol_finalTest)
## [1] 81 27
names(selectcol_finalTest)
##  [1] "srain2mp"         "bio21"            "bio16"            "slope"           
##  [5] "rugg500s"         "rugg500cv"        "pil_twicv"        "pil_topos"       
##  [9] "pil_slps"         "pil_slpcv"        "nutrientsn"       "mrvbf"           
## [13] "mrrtf"            "minfertf"         "lf7rup"           "hstructn"        
## [17] "geolrngaggn"      "geolmnaggn"       "bdensity50n"      "slopern"         
## [21] "HubDist"          "MIN_AGE_MA"       "month_collection" "HubName"         
## [25] "UNITNAME"         "ROCKTYPE1"        "FORMATION"
CM_predictions_2$byClass
##          Sensitivity          Specificity       Pos Pred Value 
##            0.9827586            0.1304348            0.7402597 
##       Neg Pred Value            Precision               Recall 
##            0.7500000            0.7402597            0.9827586 
##                   F1           Prevalence       Detection Rate 
##            0.8444444            0.7160494            0.7037037 
## Detection Prevalence    Balanced Accuracy 
##            0.9506173            0.5565967
beta<-2


dbF1_11<-CM_predictions_2$byClass
dbF1_11["F1"]
##        F1 
## 0.8444444
dbF2_11<-((1+beta^2)*dbF1_11["Precision"]*dbF1_11["Sensitivity"])/(beta^2*dbF1_11["Precision"] + dbF1_11["Sensitivity"])
dbF2_11<-as.numeric(dbF2_11)
dbF2_11
## [1] 0.9223301
dbF1_11["Precision"];dbF1_11["Sensitivity"]
## Precision 
## 0.7402597
## Sensitivity 
##   0.9827586
dbF1_22<-CM_predictions_31$byClass
dbF1_22["F1"]
##        F1 
## 0.7809524
dbF2_22<-((1+beta^2)*dbF1_22["Precision"]*dbF1_22["Sensitivity"])/(beta^2*dbF1_22["Precision"] + dbF1_22["Sensitivity"])
dbF2_22<-as.numeric(dbF2_22)
dbF2_22
## [1] 0.734767
dbF1_22["Precision"];dbF1_22["Sensitivity"]
## Precision 
## 0.8723404
## Sensitivity 
##   0.7068966
dbF1_33<-CM_predictions_41$byClass
dbF1_33["F1"]
##        F1 
## 0.7809524
dbF2_33<-((1+beta^2)*dbF1_33["Precision"]*dbF1_33["Sensitivity"])/(beta^2*dbF1_33["Precision"] + dbF1_33["Sensitivity"])
dbF2_33<-as.numeric(dbF2_33)
dbF2_33
## [1] 0.734767
dbF1_33["Precision"];dbF1_33["Sensitivity"]
## Precision 
## 0.8723404
## Sensitivity 
##   0.7068966
dim(finalTest)
## [1] 81 28
basmcc<-mcc(predictions_2, finalTest[, ncol(finalTest)])
basmcc
## [1] 0.235572
youmcc<-mcc(Test_31, finalTest[, ncol(finalTest)])
youmcc
## [1] 0.4075216
topmcc<-mcc(Test_41, finalTest[, ncol(finalTest)])
topmcc
## [1] 0.4075216
cat("\n\n","TABELA 1 - RESULTADOS DA BASE DE DADOS 'finalTest' COM OS MODELOS USANDO A FUNÇÃO train() DO PACOTE caret"
    
    ,"\n\n\n", "1 - Modelo básico"
    
    ,"\n\n","Area Under ROC (AUC) do Modelo Básico =", auc(res.roc)
    
    ,"\n\n\n","Accuracy =", CM_predictions_2$overall[1]
    
    ,"\n\n","Sensitivity =", CM_predictions_2$byClass[1]
    
    ,"\n\n","Specificity =", CM_predictions_2$byClass[2]
    
    ,"\n\n","Balanced Accuracy =", CM_predictions_2$byClass[11]
    
    ,"\n\n","F1 =", dbF1_11["F1"]
    
    ,"\n\n","F2 =", dbF2_11
    
    ,"\n\n","matthews correlation coefficient =", calculate_mcc1(CM_predictions_2)
    
    ,"\n\n","Threshold =", 0.5
    
    
    
    ,"\n\n\n", " 33New "
    
    ,"\n\n","Accuracy =", CM_predictions_33New$overall[1]
    
    ,"\n\n","Sensitivity =", CM_predictions_33New$byClass[1]
    
    ,"\n\n","Specificity =", CM_predictions_33New$byClass[2]
    
    ,"\n\n","Balanced Accuracy =", CM_predictions_33New$byClass[11]
    
    ,"\n\n","matthews correlation coefficient =", calculate_mcc1(CM_predictions_33New)
    
    ,"\n\n","Threshold =", 0.65
    
    
    ,"\n\n\n", "2 - Aplicando o critério de 'youden' ao modelo"
    
    ,"\n\n","Accuracy =", CM_predictions_31$overall[1]
    
    ,"\n\n","Sensitivity =", CM_predictions_31$byClass[1]
    
    ,"\n\n","Specificity =", CM_predictions_31$byClass[2]
    
    ,"\n\n","Balanced Accuracy =", CM_predictions_31$byClass[11]
    
    ,"\n\n","F1 =", dbF1_22["F1"]
    
    ,"\n\n","F2 =", dbF2_22
    
    ,"\n\n","matthews correlation coefficient =", calculate_mcc1(CM_predictions_31)
    
    ,"\n\n","Threshold =", rfThresh_youden[1,1]
    
    
    ,"\n\n\n", "3 - Aplicando o critério 'closest.topleft' ao modelo"
    
    ,"\n\n","Accuracy =", CM_predictions_41$overall[1]
    
    ,"\n\n","Sensitivity =", CM_predictions_41$byClass[1]
    
    ,"\n\n","Specificity =", CM_predictions_41$byClass[2]
    
    ,"\n\n","Balanced Accuracy =", CM_predictions_41$byClass[11]
    
    ,"\n\n","F1 =", dbF1_33["F1"]
    
    ,"\n\n","F2 =", dbF2_33
    
    ,"\n\n","matthews correlation coefficient =", calculate_mcc1(CM_predictions_41)
    
    ,"\n\n","Threshold =", rfThresh_topleft[1,1]
    
    )
## 
## 
##  TABELA 1 - RESULTADOS DA BASE DE DADOS 'finalTest' COM OS MODELOS USANDO A FUNÇÃO train() DO PACOTE caret 
## 
## 
##  1 - Modelo básico 
## 
##  Area Under ROC (AUC) do Modelo Básico = 0.7810065 
## 
## 
##  Accuracy = 0.7407407 
## 
##  Sensitivity = 0.9827586 
## 
##  Specificity = 0.1304348 
## 
##  Balanced Accuracy = 0.5565967 
## 
##  F1 = 0.8444444 
## 
##  F2 = 0.9223301 
## 
##  matthews correlation coefficient = 0.235572 
## 
##  Threshold = 0.5 
## 
## 
##   33New  
## 
##  Accuracy = 0.7654321 
## 
##  Sensitivity = 0.8965517 
## 
##  Specificity = 0.4347826 
## 
##  Balanced Accuracy = 0.6656672 
## 
##  matthews correlation coefficient = 0.375256 
## 
##  Threshold = 0.65 
## 
## 
##  2 - Aplicando o critério de 'youden' ao modelo 
## 
##  Accuracy = 0.7160494 
## 
##  Sensitivity = 0.7068966 
## 
##  Specificity = 0.7391304 
## 
##  Balanced Accuracy = 0.7230135 
## 
##  F1 = 0.7809524 
## 
##  F2 = 0.734767 
## 
##  matthews correlation coefficient = 0.4075216 
## 
##  Threshold = 0.774 
## 
## 
##  3 - Aplicando o critério 'closest.topleft' ao modelo 
## 
##  Accuracy = 0.7160494 
## 
##  Sensitivity = 0.7068966 
## 
##  Specificity = 0.7391304 
## 
##  Balanced Accuracy = 0.7230135 
## 
##  F1 = 0.7809524 
## 
##  F2 = 0.734767 
## 
##  matthews correlation coefficient = 0.4075216 
## 
##  Threshold = 0.774
gaa<-which(finalTest1$true_troglofauna==1);gaa
##  [1]  1  2  4  5  6  7 10 11 12 17 20 21 24 25 26 27 28 29 30 31 32 33 36 37 38
## [26] 39 40 42 43 44 45 46 47 48 49 50 51 54 55 58 60 61 62 63 65 66 67 68 70 71
## [51] 72 74 75 76 77 79 80 81
geo1<-finalTest1[gaa,]
summary(geo1)
##     srain2mp          bio21           bio16           slope         
##  Min.   :0.1362   Min.   :29.64   Min.   :202.2   Min.   : 0.08943  
##  1st Qu.:0.1443   1st Qu.:29.74   1st Qu.:221.2   1st Qu.: 1.08182  
##  Median :0.1481   Median :29.75   Median :224.2   Median : 2.01742  
##  Mean   :0.1480   Mean   :29.75   Mean   :225.8   Mean   : 2.95951  
##  3rd Qu.:0.1519   3rd Qu.:29.77   3rd Qu.:229.7   3rd Qu.: 3.73666  
##  Max.   :0.1631   Max.   :29.83   Max.   :245.4   Max.   :13.58547  
##                                                                     
##     rugg500s         rugg500cv         pil_twicv         pil_topos     
##  Min.   :0.01647   Min.   :0.01810   Min.   :0.05175   Min.   :0.0000  
##  1st Qu.:0.44700   1st Qu.:0.07039   1st Qu.:0.09118   1st Qu.:0.0000  
##  Median :0.96114   Median :0.09685   Median :0.14486   Median :0.2028  
##  Mean   :1.46073   Mean   :0.11370   Mean   :0.15158   Mean   :1.1509  
##  3rd Qu.:1.77356   3rd Qu.:0.14303   3rd Qu.:0.20167   3rd Qu.:1.7150  
##  Max.   :7.05156   Max.   :0.30012   Max.   :0.29959   Max.   :5.4794  
##                                                                        
##     pil_slps         pil_slpcv        nutrientsn        mrvbf       
##  Min.   : 0.1401   Min.   :0.1951   Min.   :1.100   Min.   :0.0000  
##  1st Qu.: 0.8979   1st Qu.:0.3583   1st Qu.:1.300   1st Qu.:0.0000  
##  Median : 2.2965   Median :0.4684   Median :1.300   Median :0.0000  
##  Mean   : 3.1087   Mean   :0.4533   Mean   :1.286   Mean   :0.4320  
##  3rd Qu.: 4.6968   3rd Qu.:0.5173   3rd Qu.:1.300   3rd Qu.:0.6406  
##  Max.   :12.3162   Max.   :1.1404   Max.   :1.600   Max.   :4.5610  
##                                                                     
##      mrrtf           minfertf         lf7rup         hstructn      
##  Min.   :0.0000   Min.   :1.000   Min.   :1.000   Min.   : 0.0000  
##  1st Qu.:0.0000   1st Qu.:2.000   1st Qu.:2.000   1st Qu.: 0.5821  
##  Median :0.0000   Median :2.000   Median :3.000   Median : 1.0429  
##  Mean   :0.4231   Mean   :2.052   Mean   :3.086   Mean   : 1.1633  
##  3rd Qu.:0.5784   3rd Qu.:2.000   3rd Qu.:4.000   3rd Qu.: 1.0429  
##  Max.   :3.5544   Max.   :3.000   Max.   :7.000   Max.   :10.0000  
##                                                                    
##   geolrngaggn         geolmnaggn         bdensity50n       slopern        
##  Min.   :  0.0115   Min.   :   0.0058   Min.   :1.255   Min.   : 0.00132  
##  1st Qu.: 65.5000   1st Qu.:  32.7500   1st Qu.:1.341   1st Qu.: 0.01840  
##  Median :300.0000   Median :2050.0000   Median :1.367   Median : 0.05969  
##  Mean   :362.2899   Mean   :1586.3174   Mean   :1.351   Mean   : 0.61471  
##  3rd Qu.:750.0000   3rd Qu.:2650.0000   3rd Qu.:1.367   3rd Qu.: 0.24807  
##  Max.   :900.0000   Max.   :2650.0000   Max.   :1.367   Max.   :21.14644  
##                                                                           
##     HubDist         MIN_AGE_MA    month_collection
##  Min.   : 2.124   Min.   :2453   June     : 9     
##  1st Qu.: 9.731   1st Qu.:2454   September: 9     
##  Median :18.518   Median :2506   July     : 8     
##  Mean   :29.461   Mean   :2516   August   : 6     
##  3rd Qu.:53.033   3rd Qu.:2597   April    : 5     
##  Max.   :77.437   Max.   :2597   February : 4     
##                                  (Other)  :17     
##                                      HubName  
##  anticline, exposed                      :15  
##  concealed                               : 0  
##  exposed                                 :10  
##  normal, exposed, tick on downthrown side: 4  
##  overturned syncline, exposed            : 2  
##  syncline, exposed                       :27  
##                                               
##                                          UNITNAME 
##  Brockman Iron Formation                     :24  
##  Marra Mamba Iron Formation                  :22  
##  Mount McRae Shale and Mount Sylvia Formation: 1  
##  Weeli Wolli Formation                       : 3  
##  Wittenoom Formation                         : 8  
##                                                   
##                                                   
##                                      ROCKTYPE1 
##  sedimentary carbonate                    : 8  
##  sedimentary other chemical or biochemical:49  
##  sedimentary siliciclastic                : 1  
##                                                
##                                                
##                                                
##                                                
##                                         FORMATION  true_troglofauna
##  Brockman Iron Formation                     :24   0: 0            
##  Marra Mamba Iron Formation                  :22   1:58            
##  Mount McRae Shale and Mount Sylvia Formation: 1                   
##  Weeli Wolli Formation                       : 3                   
##  Wittenoom Formation                         : 8                   
##                                                                    
##                                                                    
##     LATITUDE        LONGITUDE    
##  Min.   :-23.26   Min.   :118.7  
##  1st Qu.:-22.98   1st Qu.:118.9  
##  Median :-22.94   Median :119.0  
##  Mean   :-22.87   Mean   :119.0  
##  3rd Qu.:-22.70   3rd Qu.:119.1  
##  Max.   :-22.53   Max.   :119.4  
## 
gaaAu<-which(finalTest1$true_troglofauna==0);gaaAu
##  [1]  3  8  9 13 14 15 16 18 19 22 23 34 35 41 52 53 56 57 59 64 69 73 78
geoAu<-finalTest1[gaaAu,]
summary(geoAu)
##     srain2mp          bio21           bio16           slope        
##  Min.   :0.1356   Min.   :29.69   Min.   :214.2   Min.   : 0.1489  
##  1st Qu.:0.1423   1st Qu.:29.76   1st Qu.:223.8   1st Qu.: 1.8485  
##  Median :0.1463   Median :29.77   Median :224.5   Median : 3.4507  
##  Mean   :0.1465   Mean   :29.77   Mean   :225.6   Mean   : 4.2842  
##  3rd Qu.:0.1481   3rd Qu.:29.80   3rd Qu.:229.8   3rd Qu.: 5.3867  
##  Max.   :0.1633   Max.   :29.82   Max.   :238.9   Max.   :12.8002  
##                                                                    
##     rugg500s         rugg500cv         pil_twicv         pil_topos     
##  Min.   :0.09931   Min.   :0.03795   Min.   :0.04523   Min.   : 0.000  
##  1st Qu.:1.16273   1st Qu.:0.06686   1st Qu.:0.08127   1st Qu.: 0.000  
##  Median :1.80702   Median :0.09112   Median :0.11014   Median : 0.000  
##  Mean   :2.38998   Mean   :0.13402   Mean   :0.13561   Mean   : 2.361  
##  3rd Qu.:3.74337   3rd Qu.:0.20341   3rd Qu.:0.18286   3rd Qu.: 4.808  
##  Max.   :6.26119   Max.   :0.29515   Max.   :0.36527   Max.   :10.364  
##                                                                        
##     pil_slps         pil_slpcv        nutrientsn      mrvbf       
##  Min.   : 0.5099   Min.   :0.2334   Min.   :1.1   Min.   :0.0000  
##  1st Qu.: 1.1570   1st Qu.:0.3374   1st Qu.:1.3   1st Qu.:0.0000  
##  Median : 4.9075   Median :0.4747   Median :1.3   Median :0.0000  
##  Mean   : 4.8395   Mean   :0.4815   Mean   :1.3   Mean   :0.4131  
##  3rd Qu.: 5.9913   3rd Qu.:0.5641   3rd Qu.:1.3   3rd Qu.:0.0000  
##  Max.   :17.0097   Max.   :0.9767   Max.   :1.6   Max.   :4.5058  
##                                                                   
##      mrrtf           minfertf         lf7rup         hstructn     
##  Min.   :0.0000   Min.   :2.000   Min.   :1.000   Min.   :0.0000  
##  1st Qu.:0.0000   1st Qu.:2.000   1st Qu.:1.500   1st Qu.:1.0429  
##  Median :0.0000   Median :2.000   Median :3.000   Median :1.0429  
##  Mean   :0.1835   Mean   :2.043   Mean   :3.043   Mean   :0.8721  
##  3rd Qu.:0.0000   3rd Qu.:2.000   3rd Qu.:4.000   3rd Qu.:1.0429  
##  Max.   :1.9350   Max.   :3.000   Max.   :7.000   Max.   :1.0429  
##                                                                   
##   geolrngaggn         geolmnaggn         bdensity50n       slopern       
##  Min.   :  0.0115   Min.   :   0.0058   Min.   :1.255   Min.   :0.00392  
##  1st Qu.:  2.5880   1st Qu.:   1.2940   1st Qu.:1.367   1st Qu.:0.01738  
##  Median :300.0000   Median :2050.0000   Median :1.367   Median :0.03976  
##  Mean   :361.3930   Mean   :1319.8269   Mean   :1.354   Mean   :0.14841  
##  3rd Qu.:900.0000   3rd Qu.:2350.0000   3rd Qu.:1.367   3rd Qu.:0.14600  
##  Max.   :900.0000   Max.   :2650.0000   Max.   :1.367   Max.   :1.26735  
##                                                                          
##     HubDist         MIN_AGE_MA    month_collection
##  Min.   : 1.294   Min.   :2453   January  :7      
##  1st Qu.: 3.746   1st Qu.:2454   March    :3      
##  Median : 6.289   Median :2494   November :3      
##  Mean   :22.821   Mean   :2520   February :2      
##  3rd Qu.:54.325   3rd Qu.:2597   April    :2      
##  Max.   :70.168   Max.   :2597   September:2      
##                                  (Other)  :4      
##                                      HubName  
##  anticline, exposed                      : 1  
##  concealed                               : 0  
##  exposed                                 : 0  
##  normal, exposed, tick on downthrown side: 6  
##  overturned syncline, exposed            : 4  
##  syncline, exposed                       :12  
##                                               
##                                          UNITNAME 
##  Brockman Iron Formation                     :10  
##  Marra Mamba Iron Formation                  :10  
##  Mount McRae Shale and Mount Sylvia Formation: 1  
##  Weeli Wolli Formation                       : 1  
##  Wittenoom Formation                         : 1  
##                                                   
##                                                   
##                                      ROCKTYPE1 
##  sedimentary carbonate                    : 1  
##  sedimentary other chemical or biochemical:21  
##  sedimentary siliciclastic                : 1  
##                                                
##                                                
##                                                
##                                                
##                                         FORMATION  true_troglofauna
##  Brockman Iron Formation                     :10   0:23            
##  Marra Mamba Iron Formation                  :10   1: 0            
##  Mount McRae Shale and Mount Sylvia Formation: 1                   
##  Weeli Wolli Formation                       : 1                   
##  Wittenoom Formation                         : 1                   
##                                                                    
##                                                                    
##     LATITUDE        LONGITUDE    
##  Min.   :-23.07   Min.   :118.7  
##  1st Qu.:-22.94   1st Qu.:118.8  
##  Median :-22.93   Median :118.9  
##  Mean   :-22.89   Mean   :118.9  
##  3rd Qu.:-22.91   3rd Qu.:119.0  
##  Max.   :-22.55   Max.   :119.2  
## 
Min_Lg<-min(finalTest1$LONGITUDE)-0.05
Max_Lg<-max(finalTest1$LONGITUDE)+0.05


Min_Lt<-min(finalTest1$LATITUDE)-0.05
Max_Lt<-max(finalTest1$LATITUDE)+0.05

plot(geo1$LONGITUDE,geo1$LATITUDE,xlim = c(Min_Lg,Max_Lg),ylim = c(Min_Lt,Max_Lt), cex=2, pch=20, col="blue", xlab='Longitude\n', ylab='Latitude', main='Modelo Básico. Cordenadas da presença de troglofauna: Observadas e Preditas ',sub="\nObservação Presença  = azul, Predição Presença = vermelha, Observação Ausência = Preta")


gbb<-which(predictions_2==1);gbb
##  [1]  1  2  3  4  5  6  7  8  9 10 11 12 14 15 16 17 19 20 21 22 23 24 25 26 27
## [26] 28 29 30 31 32 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53
## [51] 54 55 56 57 58 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79
## [76] 80 81
geo2<-finalTest1[gbb,]
summary(geo2)
##     srain2mp          bio21           bio16           slope         
##  Min.   :0.1356   Min.   :29.66   Min.   :202.2   Min.   : 0.08943  
##  1st Qu.:0.1431   1st Qu.:29.74   1st Qu.:221.4   1st Qu.: 1.20730  
##  Median :0.1473   Median :29.76   Median :224.2   Median : 2.30398  
##  Mean   :0.1474   Mean   :29.75   Mean   :225.8   Mean   : 3.22096  
##  3rd Qu.:0.1517   3rd Qu.:29.77   3rd Qu.:230.1   3rd Qu.: 4.06712  
##  Max.   :0.1631   Max.   :29.83   Max.   :245.4   Max.   :13.58547  
##                                                                     
##     rugg500s         rugg500cv         pil_twicv         pil_topos      
##  Min.   :0.01647   Min.   :0.01810   Min.   :0.04839   Min.   : 0.0000  
##  1st Qu.:0.60450   1st Qu.:0.06913   1st Qu.:0.09014   1st Qu.: 0.0000  
##  Median :1.09004   Median :0.09504   Median :0.13883   Median : 0.1104  
##  Mean   :1.68214   Mean   :0.11970   Mean   :0.14929   Mean   : 1.5034  
##  3rd Qu.:2.42691   3rd Qu.:0.17764   3rd Qu.:0.20128   3rd Qu.: 2.4066  
##  Max.   :7.05156   Max.   :0.30012   Max.   :0.36527   Max.   :10.3639  
##                                                                         
##     pil_slps         pil_slpcv        nutrientsn        mrvbf       
##  Min.   : 0.1401   Min.   :0.1951   Min.   :1.100   Min.   :0.0000  
##  1st Qu.: 1.0510   1st Qu.:0.3434   1st Qu.:1.300   1st Qu.:0.0000  
##  Median : 2.3887   Median :0.4664   Median :1.300   Median :0.0000  
##  Mean   : 3.5202   Mean   :0.4613   Mean   :1.292   Mean   :0.4488  
##  3rd Qu.: 4.9667   3rd Qu.:0.5202   3rd Qu.:1.300   3rd Qu.:0.5784  
##  Max.   :17.0097   Max.   :1.1404   Max.   :1.600   Max.   :4.5610  
##                                                                     
##      mrrtf           minfertf         lf7rup         hstructn     
##  Min.   :0.0000   Min.   :2.000   Min.   :1.000   Min.   : 0.000  
##  1st Qu.:0.0000   1st Qu.:2.000   1st Qu.:2.000   1st Qu.: 1.043  
##  Median :0.0000   Median :2.000   Median :3.000   Median : 1.043  
##  Mean   :0.3538   Mean   :2.065   Mean   :3.143   Mean   : 1.091  
##  3rd Qu.:0.5385   3rd Qu.:2.000   3rd Qu.:4.000   3rd Qu.: 1.043  
##  Max.   :3.5544   Max.   :3.000   Max.   :7.000   Max.   :10.000  
##                                                                   
##   geolrngaggn         geolmnaggn         bdensity50n       slopern        
##  Min.   :  0.0115   Min.   :   0.0058   Min.   :1.255   Min.   : 0.00132  
##  1st Qu.: 65.5000   1st Qu.:  32.7500   1st Qu.:1.341   1st Qu.: 0.01814  
##  Median :300.0000   Median :2050.0000   Median :1.367   Median : 0.05865  
##  Mean   :344.9267   Mean   :1508.8270   Mean   :1.352   Mean   : 0.49968  
##  3rd Qu.:300.0000   3rd Qu.:2650.0000   3rd Qu.:1.367   3rd Qu.: 0.20859  
##  Max.   :900.0000   Max.   :2650.0000   Max.   :1.367   Max.   :21.14644  
##                                                                           
##     HubDist         MIN_AGE_MA    month_collection
##  Min.   : 2.124   Min.   :2453   June     :10     
##  1st Qu.: 7.414   1st Qu.:2454   September: 9     
##  Median :17.310   Median :2506   January  : 8     
##  Mean   :27.857   Mean   :2520   July     : 8     
##  3rd Qu.:53.489   3rd Qu.:2597   March    : 7     
##  Max.   :77.437   Max.   :2597   April    : 7     
##                                  (Other)  :28     
##                                      HubName  
##  anticline, exposed                      :16  
##  concealed                               : 0  
##  exposed                                 :10  
##  normal, exposed, tick on downthrown side: 9  
##  overturned syncline, exposed            : 4  
##  syncline, exposed                       :38  
##                                               
##                                          UNITNAME 
##  Brockman Iron Formation                     :30  
##  Marra Mamba Iron Formation                  :32  
##  Mount McRae Shale and Mount Sylvia Formation: 2  
##  Weeli Wolli Formation                       : 4  
##  Wittenoom Formation                         : 9  
##                                                   
##                                                   
##                                      ROCKTYPE1 
##  sedimentary carbonate                    : 9  
##  sedimentary other chemical or biochemical:66  
##  sedimentary siliciclastic                : 2  
##                                                
##                                                
##                                                
##                                                
##                                         FORMATION  true_troglofauna
##  Brockman Iron Formation                     :30   0:20            
##  Marra Mamba Iron Formation                  :32   1:57            
##  Mount McRae Shale and Mount Sylvia Formation: 2                   
##  Weeli Wolli Formation                       : 4                   
##  Wittenoom Formation                         : 9                   
##                                                                    
##                                                                    
##     LATITUDE        LONGITUDE    
##  Min.   :-23.26   Min.   :118.7  
##  1st Qu.:-22.98   1st Qu.:118.9  
##  Median :-22.93   Median :119.0  
##  Mean   :-22.88   Mean   :119.0  
##  3rd Qu.:-22.70   3rd Qu.:119.0  
##  Max.   :-22.53   Max.   :119.4  
## 
points(geo2$LONGITUDE,geo2$LATITUDE,xlim = c(Min_Lg,Max_Lg),ylim = c(Min_Lt,Max_Lt),pch=5, col="red")


points(geoAu$LONGITUDE,geoAu$LATITUDE,xlim = c(115,122),ylim = c(-26,-21),pch = 4, col = rgb(0, 0, 0, 0.15))

gaa<-which(finalTest1$true_troglofauna==1);gaa
##  [1]  1  2  4  5  6  7 10 11 12 17 20 21 24 25 26 27 28 29 30 31 32 33 36 37 38
## [26] 39 40 42 43 44 45 46 47 48 49 50 51 54 55 58 60 61 62 63 65 66 67 68 70 71
## [51] 72 74 75 76 77 79 80 81
geo1<-finalTest1[gaa,]
summary(geo1)
##     srain2mp          bio21           bio16           slope         
##  Min.   :0.1362   Min.   :29.64   Min.   :202.2   Min.   : 0.08943  
##  1st Qu.:0.1443   1st Qu.:29.74   1st Qu.:221.2   1st Qu.: 1.08182  
##  Median :0.1481   Median :29.75   Median :224.2   Median : 2.01742  
##  Mean   :0.1480   Mean   :29.75   Mean   :225.8   Mean   : 2.95951  
##  3rd Qu.:0.1519   3rd Qu.:29.77   3rd Qu.:229.7   3rd Qu.: 3.73666  
##  Max.   :0.1631   Max.   :29.83   Max.   :245.4   Max.   :13.58547  
##                                                                     
##     rugg500s         rugg500cv         pil_twicv         pil_topos     
##  Min.   :0.01647   Min.   :0.01810   Min.   :0.05175   Min.   :0.0000  
##  1st Qu.:0.44700   1st Qu.:0.07039   1st Qu.:0.09118   1st Qu.:0.0000  
##  Median :0.96114   Median :0.09685   Median :0.14486   Median :0.2028  
##  Mean   :1.46073   Mean   :0.11370   Mean   :0.15158   Mean   :1.1509  
##  3rd Qu.:1.77356   3rd Qu.:0.14303   3rd Qu.:0.20167   3rd Qu.:1.7150  
##  Max.   :7.05156   Max.   :0.30012   Max.   :0.29959   Max.   :5.4794  
##                                                                        
##     pil_slps         pil_slpcv        nutrientsn        mrvbf       
##  Min.   : 0.1401   Min.   :0.1951   Min.   :1.100   Min.   :0.0000  
##  1st Qu.: 0.8979   1st Qu.:0.3583   1st Qu.:1.300   1st Qu.:0.0000  
##  Median : 2.2965   Median :0.4684   Median :1.300   Median :0.0000  
##  Mean   : 3.1087   Mean   :0.4533   Mean   :1.286   Mean   :0.4320  
##  3rd Qu.: 4.6968   3rd Qu.:0.5173   3rd Qu.:1.300   3rd Qu.:0.6406  
##  Max.   :12.3162   Max.   :1.1404   Max.   :1.600   Max.   :4.5610  
##                                                                     
##      mrrtf           minfertf         lf7rup         hstructn      
##  Min.   :0.0000   Min.   :1.000   Min.   :1.000   Min.   : 0.0000  
##  1st Qu.:0.0000   1st Qu.:2.000   1st Qu.:2.000   1st Qu.: 0.5821  
##  Median :0.0000   Median :2.000   Median :3.000   Median : 1.0429  
##  Mean   :0.4231   Mean   :2.052   Mean   :3.086   Mean   : 1.1633  
##  3rd Qu.:0.5784   3rd Qu.:2.000   3rd Qu.:4.000   3rd Qu.: 1.0429  
##  Max.   :3.5544   Max.   :3.000   Max.   :7.000   Max.   :10.0000  
##                                                                    
##   geolrngaggn         geolmnaggn         bdensity50n       slopern        
##  Min.   :  0.0115   Min.   :   0.0058   Min.   :1.255   Min.   : 0.00132  
##  1st Qu.: 65.5000   1st Qu.:  32.7500   1st Qu.:1.341   1st Qu.: 0.01840  
##  Median :300.0000   Median :2050.0000   Median :1.367   Median : 0.05969  
##  Mean   :362.2899   Mean   :1586.3174   Mean   :1.351   Mean   : 0.61471  
##  3rd Qu.:750.0000   3rd Qu.:2650.0000   3rd Qu.:1.367   3rd Qu.: 0.24807  
##  Max.   :900.0000   Max.   :2650.0000   Max.   :1.367   Max.   :21.14644  
##                                                                           
##     HubDist         MIN_AGE_MA    month_collection
##  Min.   : 2.124   Min.   :2453   June     : 9     
##  1st Qu.: 9.731   1st Qu.:2454   September: 9     
##  Median :18.518   Median :2506   July     : 8     
##  Mean   :29.461   Mean   :2516   August   : 6     
##  3rd Qu.:53.033   3rd Qu.:2597   April    : 5     
##  Max.   :77.437   Max.   :2597   February : 4     
##                                  (Other)  :17     
##                                      HubName  
##  anticline, exposed                      :15  
##  concealed                               : 0  
##  exposed                                 :10  
##  normal, exposed, tick on downthrown side: 4  
##  overturned syncline, exposed            : 2  
##  syncline, exposed                       :27  
##                                               
##                                          UNITNAME 
##  Brockman Iron Formation                     :24  
##  Marra Mamba Iron Formation                  :22  
##  Mount McRae Shale and Mount Sylvia Formation: 1  
##  Weeli Wolli Formation                       : 3  
##  Wittenoom Formation                         : 8  
##                                                   
##                                                   
##                                      ROCKTYPE1 
##  sedimentary carbonate                    : 8  
##  sedimentary other chemical or biochemical:49  
##  sedimentary siliciclastic                : 1  
##                                                
##                                                
##                                                
##                                                
##                                         FORMATION  true_troglofauna
##  Brockman Iron Formation                     :24   0: 0            
##  Marra Mamba Iron Formation                  :22   1:58            
##  Mount McRae Shale and Mount Sylvia Formation: 1                   
##  Weeli Wolli Formation                       : 3                   
##  Wittenoom Formation                         : 8                   
##                                                                    
##                                                                    
##     LATITUDE        LONGITUDE    
##  Min.   :-23.26   Min.   :118.7  
##  1st Qu.:-22.98   1st Qu.:118.9  
##  Median :-22.94   Median :119.0  
##  Mean   :-22.87   Mean   :119.0  
##  3rd Qu.:-22.70   3rd Qu.:119.1  
##  Max.   :-22.53   Max.   :119.4  
## 
gaaAu<-which(finalTest1$true_troglofauna==0);gaaAu
##  [1]  3  8  9 13 14 15 16 18 19 22 23 34 35 41 52 53 56 57 59 64 69 73 78
geoAu<-finalTest1[gaaAu,]
summary(geoAu)
##     srain2mp          bio21           bio16           slope        
##  Min.   :0.1356   Min.   :29.69   Min.   :214.2   Min.   : 0.1489  
##  1st Qu.:0.1423   1st Qu.:29.76   1st Qu.:223.8   1st Qu.: 1.8485  
##  Median :0.1463   Median :29.77   Median :224.5   Median : 3.4507  
##  Mean   :0.1465   Mean   :29.77   Mean   :225.6   Mean   : 4.2842  
##  3rd Qu.:0.1481   3rd Qu.:29.80   3rd Qu.:229.8   3rd Qu.: 5.3867  
##  Max.   :0.1633   Max.   :29.82   Max.   :238.9   Max.   :12.8002  
##                                                                    
##     rugg500s         rugg500cv         pil_twicv         pil_topos     
##  Min.   :0.09931   Min.   :0.03795   Min.   :0.04523   Min.   : 0.000  
##  1st Qu.:1.16273   1st Qu.:0.06686   1st Qu.:0.08127   1st Qu.: 0.000  
##  Median :1.80702   Median :0.09112   Median :0.11014   Median : 0.000  
##  Mean   :2.38998   Mean   :0.13402   Mean   :0.13561   Mean   : 2.361  
##  3rd Qu.:3.74337   3rd Qu.:0.20341   3rd Qu.:0.18286   3rd Qu.: 4.808  
##  Max.   :6.26119   Max.   :0.29515   Max.   :0.36527   Max.   :10.364  
##                                                                        
##     pil_slps         pil_slpcv        nutrientsn      mrvbf       
##  Min.   : 0.5099   Min.   :0.2334   Min.   :1.1   Min.   :0.0000  
##  1st Qu.: 1.1570   1st Qu.:0.3374   1st Qu.:1.3   1st Qu.:0.0000  
##  Median : 4.9075   Median :0.4747   Median :1.3   Median :0.0000  
##  Mean   : 4.8395   Mean   :0.4815   Mean   :1.3   Mean   :0.4131  
##  3rd Qu.: 5.9913   3rd Qu.:0.5641   3rd Qu.:1.3   3rd Qu.:0.0000  
##  Max.   :17.0097   Max.   :0.9767   Max.   :1.6   Max.   :4.5058  
##                                                                   
##      mrrtf           minfertf         lf7rup         hstructn     
##  Min.   :0.0000   Min.   :2.000   Min.   :1.000   Min.   :0.0000  
##  1st Qu.:0.0000   1st Qu.:2.000   1st Qu.:1.500   1st Qu.:1.0429  
##  Median :0.0000   Median :2.000   Median :3.000   Median :1.0429  
##  Mean   :0.1835   Mean   :2.043   Mean   :3.043   Mean   :0.8721  
##  3rd Qu.:0.0000   3rd Qu.:2.000   3rd Qu.:4.000   3rd Qu.:1.0429  
##  Max.   :1.9350   Max.   :3.000   Max.   :7.000   Max.   :1.0429  
##                                                                   
##   geolrngaggn         geolmnaggn         bdensity50n       slopern       
##  Min.   :  0.0115   Min.   :   0.0058   Min.   :1.255   Min.   :0.00392  
##  1st Qu.:  2.5880   1st Qu.:   1.2940   1st Qu.:1.367   1st Qu.:0.01738  
##  Median :300.0000   Median :2050.0000   Median :1.367   Median :0.03976  
##  Mean   :361.3930   Mean   :1319.8269   Mean   :1.354   Mean   :0.14841  
##  3rd Qu.:900.0000   3rd Qu.:2350.0000   3rd Qu.:1.367   3rd Qu.:0.14600  
##  Max.   :900.0000   Max.   :2650.0000   Max.   :1.367   Max.   :1.26735  
##                                                                          
##     HubDist         MIN_AGE_MA    month_collection
##  Min.   : 1.294   Min.   :2453   January  :7      
##  1st Qu.: 3.746   1st Qu.:2454   March    :3      
##  Median : 6.289   Median :2494   November :3      
##  Mean   :22.821   Mean   :2520   February :2      
##  3rd Qu.:54.325   3rd Qu.:2597   April    :2      
##  Max.   :70.168   Max.   :2597   September:2      
##                                  (Other)  :4      
##                                      HubName  
##  anticline, exposed                      : 1  
##  concealed                               : 0  
##  exposed                                 : 0  
##  normal, exposed, tick on downthrown side: 6  
##  overturned syncline, exposed            : 4  
##  syncline, exposed                       :12  
##                                               
##                                          UNITNAME 
##  Brockman Iron Formation                     :10  
##  Marra Mamba Iron Formation                  :10  
##  Mount McRae Shale and Mount Sylvia Formation: 1  
##  Weeli Wolli Formation                       : 1  
##  Wittenoom Formation                         : 1  
##                                                   
##                                                   
##                                      ROCKTYPE1 
##  sedimentary carbonate                    : 1  
##  sedimentary other chemical or biochemical:21  
##  sedimentary siliciclastic                : 1  
##                                                
##                                                
##                                                
##                                                
##                                         FORMATION  true_troglofauna
##  Brockman Iron Formation                     :10   0:23            
##  Marra Mamba Iron Formation                  :10   1: 0            
##  Mount McRae Shale and Mount Sylvia Formation: 1                   
##  Weeli Wolli Formation                       : 1                   
##  Wittenoom Formation                         : 1                   
##                                                                    
##                                                                    
##     LATITUDE        LONGITUDE    
##  Min.   :-23.07   Min.   :118.7  
##  1st Qu.:-22.94   1st Qu.:118.8  
##  Median :-22.93   Median :118.9  
##  Mean   :-22.89   Mean   :118.9  
##  3rd Qu.:-22.91   3rd Qu.:119.0  
##  Max.   :-22.55   Max.   :119.2  
## 
plot(geo1$LONGITUDE,geo1$LATITUDE,xlim = c(Min_Lg,Max_Lg),ylim = c(Min_Lt,Max_Lt), cex=2, pch=20, col="blue", xlab='Longitude\n', ylab='Latitude', main='Modelo de ‘Youden’. Cordenadas da presença de troglofauna: Observadas e Preditas ',sub="\nObservação Presença  = azul, Predição Presença = vermelha, Observação Ausência = Preta")


gbb<-which(Test_33New==1);gbb
##  [1]  1  2  4  5  6  7  8  9 10 11 12 15 16 20 21 22 23 24 25 26 27 28 29 30 31
## [26] 32 34 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 52 53 54 55 57 60 61 63
## [51] 65 66 67 68 69 71 72 73 74 75 76 77 79 80 81
geo2<-finalTest1[gbb,]
summary(geo2)
##     srain2mp          bio21           bio16           slope         
##  Min.   :0.1356   Min.   :29.66   Min.   :202.2   Min.   : 0.08943  
##  1st Qu.:0.1427   1st Qu.:29.74   1st Qu.:221.2   1st Qu.: 1.20730  
##  Median :0.1473   Median :29.76   Median :224.2   Median : 2.09344  
##  Mean   :0.1469   Mean   :29.75   Mean   :225.9   Mean   : 2.66784  
##  3rd Qu.:0.1508   3rd Qu.:29.77   3rd Qu.:230.1   3rd Qu.: 3.49038  
##  Max.   :0.1631   Max.   :29.83   Max.   :245.4   Max.   :12.80595  
##                                                                     
##     rugg500s         rugg500cv         pil_twicv         pil_topos      
##  Min.   :0.03319   Min.   :0.03287   Min.   :0.05175   Min.   : 0.0000  
##  1st Qu.:0.58854   1st Qu.:0.07169   1st Qu.:0.08987   1st Qu.: 0.0000  
##  Median :1.03983   Median :0.10369   Median :0.14482   Median : 0.1888  
##  Mean   :1.55604   Mean   :0.12589   Mean   :0.15181   Mean   : 1.5266  
##  3rd Qu.:2.03901   3rd Qu.:0.17857   3rd Qu.:0.20128   3rd Qu.: 2.4066  
##  Max.   :6.69625   Max.   :0.30012   Max.   :0.36527   Max.   :10.3639  
##                                                                         
##     pil_slps         pil_slpcv        nutrientsn        mrvbf       
##  Min.   : 0.1401   Min.   :0.1951   Min.   :1.100   Min.   :0.0000  
##  1st Qu.: 1.0099   1st Qu.:0.3656   1st Qu.:1.300   1st Qu.:0.0000  
##  Median : 2.2539   Median :0.4704   Median :1.300   Median :0.0000  
##  Mean   : 3.0265   Mean   :0.4607   Mean   :1.297   Mean   :0.4896  
##  3rd Qu.: 4.7576   3rd Qu.:0.5202   3rd Qu.:1.300   3rd Qu.:0.6614  
##  Max.   :10.1882   Max.   :1.1404   Max.   :1.600   Max.   :4.5610  
##                                                                     
##      mrrtf           minfertf         lf7rup         hstructn      
##  Min.   :0.0000   Min.   :2.000   Min.   :1.000   Min.   : 0.0000  
##  1st Qu.:0.0000   1st Qu.:2.000   1st Qu.:2.000   1st Qu.: 0.4286  
##  Median :0.0000   Median :2.000   Median :3.000   Median : 1.0429  
##  Mean   :0.3102   Mean   :2.031   Mean   :3.308   Mean   : 1.1182  
##  3rd Qu.:0.5385   3rd Qu.:2.000   3rd Qu.:4.000   3rd Qu.: 1.0429  
##  Max.   :2.8142   Max.   :3.000   Max.   :7.000   Max.   :10.0000  
##                                                                    
##   geolrngaggn         geolmnaggn         bdensity50n       slopern        
##  Min.   :  0.0115   Min.   :   0.0058   Min.   :1.255   Min.   : 0.00626  
##  1st Qu.: 65.5000   1st Qu.:  32.7500   1st Qu.:1.341   1st Qu.: 0.02296  
##  Median :300.0000   Median :2050.0000   Median :1.367   Median : 0.06010  
##  Mean   :320.7933   Mean   :1557.3198   Mean   :1.349   Mean   : 0.54566  
##  3rd Qu.:300.0000   3rd Qu.:2650.0000   3rd Qu.:1.367   3rd Qu.: 0.20859  
##  Max.   :900.0000   Max.   :2650.0000   Max.   :1.367   Max.   :21.14644  
##                                                                           
##     HubDist         MIN_AGE_MA    month_collection
##  Min.   : 2.124   Min.   :2453   June     : 9     
##  1st Qu.: 7.975   1st Qu.:2454   September: 9     
##  Median :17.310   Median :2506   April    : 6     
##  Mean   :28.264   Mean   :2525   July     : 6     
##  3rd Qu.:51.892   3rd Qu.:2597   August   : 6     
##  Max.   :77.437   Max.   :2597   December : 6     
##                                  (Other)  :23     
##                                      HubName  
##  anticline, exposed                      :16  
##  concealed                               : 0  
##  exposed                                 :10  
##  normal, exposed, tick on downthrown side: 5  
##  overturned syncline, exposed            : 2  
##  syncline, exposed                       :32  
##                                               
##                                          UNITNAME 
##  Brockman Iron Formation                     :23  
##  Marra Mamba Iron Formation                  :29  
##  Mount McRae Shale and Mount Sylvia Formation: 1  
##  Weeli Wolli Formation                       : 4  
##  Wittenoom Formation                         : 8  
##                                                   
##                                                   
##                                      ROCKTYPE1 
##  sedimentary carbonate                    : 8  
##  sedimentary other chemical or biochemical:56  
##  sedimentary siliciclastic                : 1  
##                                                
##                                                
##                                                
##                                                
##                                         FORMATION  true_troglofauna
##  Brockman Iron Formation                     :23   0:13            
##  Marra Mamba Iron Formation                  :29   1:52            
##  Mount McRae Shale and Mount Sylvia Formation: 1                   
##  Weeli Wolli Formation                       : 4                   
##  Wittenoom Formation                         : 8                   
##                                                                    
##                                                                    
##     LATITUDE        LONGITUDE    
##  Min.   :-23.26   Min.   :118.7  
##  1st Qu.:-22.98   1st Qu.:118.9  
##  Median :-22.94   Median :119.0  
##  Mean   :-22.88   Mean   :119.0  
##  3rd Qu.:-22.68   3rd Qu.:119.0  
##  Max.   :-22.53   Max.   :119.3  
## 
points(geo2$LONGITUDE,geo2$LATITUDE,xlim = c(Min_Lg,Max_Lg),ylim = c(Min_Lt,Max_Lt),pch=5, col="red")


points(geoAu$LONGITUDE,geoAu$LATITUDE,xlim = c(115,122),ylim = c(-26,-21),pch = 4, col = rgb(0, 0, 0, 0.15))

combined_geo <- data.frame(
  LONGITUDE = c(geo2$LONGITUDE, geoAu$LONGITUDE),
  LATITUDE = c(geo2$LATITUDE, geoAu$LATITUDE)
)


write.csv(geo2, file = "Area2_predicted_presence_points_troglofauna_coordinates.csv", row.names = FALSE)