library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(ggplot2)
library(caret)
## Loading required package: lattice
library(randomForest)
## randomForest 4.7-1.1
## Type rfNews() to see new features/changes/bug fixes.
##
## Attaching package: 'randomForest'
## The following object is masked from 'package:ggplot2':
##
## margin
## The following object is masked from 'package:dplyr':
##
## combine
library(pROC)
## Type 'citation("pROC")' for a citation.
##
## Attaching package: 'pROC'
## The following objects are masked from 'package:stats':
##
## cov, smooth, var
library(corrplot)
## corrplot 0.92 loaded
library(mltools)
library(rockchalk)
##
## Attaching package: 'rockchalk'
## The following object is masked from 'package:mltools':
##
## skewness
## The following object is masked from 'package:dplyr':
##
## summarize
data2 <- read.csv("Area_01_Filtered_.csv", sep=",", header= TRUE)
summary(data2)
## trngcv bio14 slope rugg500s
## Min. : 8.713 Min. :0.01801 Min. : 0.2666 Min. : 0.02999
## 1st Qu.: 9.242 1st Qu.:0.11977 1st Qu.: 1.9476 1st Qu.: 1.41555
## Median : 9.323 Median :0.16467 Median : 3.3912 Median : 2.87604
## Mean : 9.343 Mean :0.17485 Mean : 4.8679 Mean : 3.28088
## 3rd Qu.: 9.414 3rd Qu.:0.21515 3rd Qu.: 7.5301 3rd Qu.: 4.88870
## Max. :10.125 Max. :0.42712 Max. :17.2472 Max. :10.10635
## pil_twim pil_twicv pil_topocv pil_slps
## Min. : 5.611 Min. :0.03184 Min. :0.00000 Min. : 0.08751
## 1st Qu.: 7.941 1st Qu.:0.12586 1st Qu.:0.00000 1st Qu.: 1.09978
## Median : 8.988 Median :0.16938 Median :0.02253 Median : 3.08139
## Mean : 8.994 Mean :0.17768 Mean :0.06768 Mean : 4.12518
## 3rd Qu.:10.064 3rd Qu.:0.22529 3rd Qu.:0.13598 3rd Qu.: 5.90168
## Max. :13.544 Max. :0.40829 Max. :0.35341 Max. :19.05097
## pil_slpcv pil_elr3cv mrvbf mrrtf
## Min. :0.1719 Min. :0.01924 Min. :0.0000 Min. :0.00000
## 1st Qu.:0.3731 1st Qu.:0.14779 1st Qu.:0.0000 1st Qu.:0.00000
## Median :0.4711 Median :0.22176 Median :0.0000 Median :0.00000
## Mean :0.5192 Mean :0.24437 Mean :0.3047 Mean :0.05822
## 3rd Qu.:0.6152 3rd Qu.:0.34663 3rd Qu.:0.0000 3rd Qu.:0.00000
## Max. :1.6098 Max. :0.71240 Max. :2.8579 Max. :2.59788
## minfertf lf7rup hstructn geolrngaggn
## Min. :2.000 Min. :1.000 Min. : 0.000 Min. : 0.0115
## 1st Qu.:2.000 1st Qu.:2.000 1st Qu.: 1.043 1st Qu.: 2.5880
## Median :2.000 Median :3.000 Median : 1.043 Median : 2.5880
## Mean :2.222 Mean :3.192 Mean : 1.470 Mean :185.9895
## 3rd Qu.:2.000 3rd Qu.:4.000 3rd Qu.: 1.043 3rd Qu.:300.0000
## Max. :3.000 Max. :7.000 Max. :15.429 Max. :900.0000
## elevationm wr_unrn solpawhcn slopern
## Min. :443.9 Min. : 0.00 Min. : 82.0 Min. : 0.00162
## 1st Qu.:558.3 1st Qu.:17.86 1st Qu.: 96.0 1st Qu.: 0.01991
## Median :575.0 Median :17.86 Median : 96.0 Median : 0.05780
## Mean :583.5 Mean :17.25 Mean :100.4 Mean : 0.18529
## 3rd Qu.:611.7 3rd Qu.:17.86 3rd Qu.: 96.0 3rd Qu.: 0.16345
## Max. :726.1 Max. :72.00 Max. :157.0 Max. :10.06944
## MIN_AGE_MA HubDist month_collection UNITNAME
## Min. :2454 Min. : 1.294 Length:634 Length:634
## 1st Qu.:2454 1st Qu.: 16.807 Class :character Class :character
## Median :2454 Median : 21.831 Mode :character Mode :character
## Mean :2473 Mean : 41.478
## 3rd Qu.:2494 3rd Qu.: 78.344
## Max. :2597 Max. :104.012
## ROCKTYPE1 FORMATION HubName true_troglofauna
## Length:634 Length:634 Length:634 Min. :0.0000
## Class :character Class :character Class :character 1st Qu.:0.0000
## Mode :character Mode :character Mode :character Median :1.0000
## Mean :0.6625
## 3rd Qu.:1.0000
## Max. :1.0000
## LATITUDE LONGITUDE
## Min. :-22.64 Min. :117.0
## 1st Qu.:-22.22 1st Qu.:117.7
## Median :-22.16 Median :117.8
## Mean :-22.20 Mean :117.7
## 3rd Qu.:-22.12 3rd Qu.:117.9
## Max. :-22.04 Max. :118.0
dim(data2)
## [1] 634 30
sapply(data2,class)
## trngcv bio14 slope rugg500s
## "numeric" "numeric" "numeric" "numeric"
## pil_twim pil_twicv pil_topocv pil_slps
## "numeric" "numeric" "numeric" "numeric"
## pil_slpcv pil_elr3cv mrvbf mrrtf
## "numeric" "numeric" "numeric" "numeric"
## minfertf lf7rup hstructn geolrngaggn
## "integer" "integer" "numeric" "numeric"
## elevationm wr_unrn solpawhcn slopern
## "numeric" "numeric" "integer" "numeric"
## MIN_AGE_MA HubDist month_collection UNITNAME
## "integer" "numeric" "character" "character"
## ROCKTYPE1 FORMATION HubName true_troglofauna
## "character" "character" "character" "integer"
## LATITUDE LONGITUDE
## "numeric" "numeric"
#Converting "true_stygofauna", "true_troglofauna" to a factor
data2$true_troglofauna <- factor(data2$true_troglofauna)
#Converting categorical variables to factor
data2$HubName <- factor(data2$HubName)
data2$FORMATION <- factor(data2$FORMATION)
data2$ROCKTYPE1 <- factor(data2$ROCKTYPE1)
data2$UNITNAME <- factor(data2$UNITNAME)
data2$month_collection <- factor(data2$month_collection)
sapply(data2,class)
## trngcv bio14 slope rugg500s
## "numeric" "numeric" "numeric" "numeric"
## pil_twim pil_twicv pil_topocv pil_slps
## "numeric" "numeric" "numeric" "numeric"
## pil_slpcv pil_elr3cv mrvbf mrrtf
## "numeric" "numeric" "numeric" "numeric"
## minfertf lf7rup hstructn geolrngaggn
## "integer" "integer" "numeric" "numeric"
## elevationm wr_unrn solpawhcn slopern
## "numeric" "numeric" "integer" "numeric"
## MIN_AGE_MA HubDist month_collection UNITNAME
## "integer" "numeric" "factor" "factor"
## ROCKTYPE1 FORMATION HubName true_troglofauna
## "factor" "factor" "factor" "factor"
## LATITUDE LONGITUDE
## "numeric" "numeric"
summary(data2$month_collection)
## April August December February January July June March
## 56 2 45 26 92 7 27 128
## May November October September
## 122 39 13 77
data2$month_collection<-factor(data2$month_collection, levels=c("January", "February", "March","April","May","June","July","August","September","October","November","December"))
summary(data2$month_collection)
## January February March April May June July August
## 92 26 128 56 122 27 7 2
## September October November December
## 77 13 39 45
plot(data2$month_collection,main="Monthly Samples",ylim=c(0,200),ylab="Number of Samples")
m1<-which(data2$month_collection == "January");length(m1)
## [1] 92
m2<-which(data2$month_collection == "February");length(m2)
## [1] 26
m3<-which(data2$month_collection == "March");length(m3)
## [1] 128
m4<-which(data2$month_collection == "April");length(m4)
## [1] 56
m5<-which(data2$month_collection == "May");length(m5)
## [1] 122
m6<-which(data2$month_collection == "June");length(m6)
## [1] 27
m7<-which(data2$month_collection == "July");length(m7)
## [1] 7
m8<-which(data2$month_collection == "August");length(m8)
## [1] 2
m9<-which(data2$month_collection == "September");length(m9)
## [1] 77
m10<-which(data2$month_collection == "October");length(m10)
## [1] 13
m11<-which(data2$month_collection == "November");length(m11)
## [1] 39
m12<-which(data2$month_collection == "December");length(m12)
## [1] 45
tm<-c(m1,m2,m3,m4,m5,m6,m7,m8,m9,m10,m11,m12);length(tm)
## [1] 634
ltm<-c(length(m1),length(m2),length(m3),length(m4),length(m5),length(m6),length(m7),length(m8),length(m9),length(m10),length(m11),length(m12));median(ltm)
## [1] 42
set.seed(78945)
sm1<-sample(m1, 50, replace = FALSE);length(sm1);head(sm1)
## [1] 50
## [1] 110 102 27 370 86 377
sm3<-sample(m3, 50, replace = FALSE);length(sm3);head(sm3)
## [1] 50
## [1] 261 40 372 503 279 578
sm4<-sample(m4, 50, replace = FALSE);length(sm4);head(sm4)
## [1] 50
## [1] 155 449 156 135 38 57
sm5<-sample(m5, 50, replace = FALSE);length(sm5);head(sm5)
## [1] 50
## [1] 438 141 127 472 126 298
sm9<-sample(m9, 50, replace = FALSE);length(sm9);head(sm9)
## [1] 50
## [1] 321 195 308 355 339 243
utm<-c(sm1,m2,sm3,sm4,sm5,m6,m7,m8,sm9,m10,m11,m12);length(utm)
## [1] 409
data1<-data2[utm,];dim(data1)
## [1] 409 30
summary(data1)
## trngcv bio14 slope rugg500s
## Min. : 8.713 Min. :0.01801 Min. : 0.2666 Min. : 0.02999
## 1st Qu.: 9.234 1st Qu.:0.12241 1st Qu.: 1.9117 1st Qu.: 1.33417
## Median : 9.317 Median :0.17267 Median : 3.3563 Median : 2.77642
## Mean : 9.324 Mean :0.18034 Mean : 4.8591 Mean : 3.24606
## 3rd Qu.: 9.410 3rd Qu.:0.22232 3rd Qu.: 7.1721 3rd Qu.: 4.97943
## Max. :10.026 Max. :0.42288 Max. :17.2472 Max. :10.10635
##
## pil_twim pil_twicv pil_topocv pil_slps
## Min. : 5.749 Min. :0.03184 Min. :0.00000 Min. : 0.08751
## 1st Qu.: 7.965 1st Qu.:0.12336 1st Qu.:0.00000 1st Qu.: 1.07585
## Median : 9.006 Median :0.16938 Median :0.01735 Median : 2.73185
## Mean : 9.007 Mean :0.17782 Mean :0.06433 Mean : 3.96138
## 3rd Qu.:10.122 3rd Qu.:0.23025 3rd Qu.:0.13316 3rd Qu.: 5.71830
## Max. :13.544 Max. :0.40758 Max. :0.35341 Max. :15.25064
##
## pil_slpcv pil_elr3cv mrvbf mrrtf
## Min. :0.1719 Min. :0.03157 Min. :0.000 Min. :0.00000
## 1st Qu.:0.3758 1st Qu.:0.13822 1st Qu.:0.000 1st Qu.:0.00000
## Median :0.4696 Median :0.22558 Median :0.000 Median :0.00000
## Mean :0.5165 Mean :0.24364 Mean :0.306 Mean :0.06968
## 3rd Qu.:0.6153 3rd Qu.:0.33502 3rd Qu.:0.000 3rd Qu.:0.00000
## Max. :1.6098 Max. :0.71240 Max. :2.858 Max. :2.59788
##
## minfertf lf7rup hstructn geolrngaggn
## Min. :2.000 Min. :1.000 Min. : 0.000 Min. : 0.0115
## 1st Qu.:2.000 1st Qu.:2.000 1st Qu.: 1.043 1st Qu.: 2.5880
## Median :2.000 Median :3.000 Median : 1.043 Median : 2.5880
## Mean :2.208 Mean :3.227 Mean : 1.647 Mean :171.7449
## 3rd Qu.:2.000 3rd Qu.:4.000 3rd Qu.: 1.043 3rd Qu.:300.0000
## Max. :3.000 Max. :7.000 Max. :15.429 Max. :900.0000
##
## elevationm wr_unrn solpawhcn slopern
## Min. :443.9 Min. : 0.00 Min. : 82.0 Min. : 0.00162
## 1st Qu.:559.7 1st Qu.:17.86 1st Qu.: 96.0 1st Qu.: 0.02093
## Median :577.0 Median :17.86 Median : 96.0 Median : 0.06396
## Mean :585.3 Mean :16.78 Mean :101.3 Mean : 0.21262
## 3rd Qu.:614.1 3rd Qu.:17.86 3rd Qu.: 96.0 3rd Qu.: 0.16934
## Max. :726.1 Max. :72.00 Max. :157.0 Max. :10.06944
##
## MIN_AGE_MA HubDist month_collection
## Min. :2454 Min. : 1.294 January : 50
## 1st Qu.:2454 1st Qu.: 16.613 March : 50
## Median :2454 Median : 21.793 April : 50
## Mean :2477 Mean : 39.334 May : 50
## 3rd Qu.:2494 3rd Qu.: 76.847 September: 50
## Max. :2597 Max. :104.012 December : 45
## (Other) :114
## UNITNAME
## Brockman Iron Formation :273
## Marra Mamba Iron Formation : 35
## Mount McRae Shale and Mount Sylvia Formation: 75
## Wittenoom Formation : 26
##
##
##
## ROCKTYPE1
## sedimentary carbonate : 26
## sedimentary other chemical or biochemical:308
## sedimentary siliciclastic : 75
##
##
##
##
## FORMATION
## Brockman Iron Formation :273
## Marra Mamba Iron Formation : 35
## Mount McRae Shale and Mount Sylvia Formation: 75
## Wittenoom Formation : 26
##
##
##
## HubName
## anticline, exposed :235
## exposed : 21
## strike-slip, exposed, showing relative dextral displacement: 10
## syncline, exposed :143
##
##
##
## true_troglofauna LATITUDE LONGITUDE
## 0:147 Min. :-22.64 Min. :117.0
## 1:262 1st Qu.:-22.23 1st Qu.:117.7
## Median :-22.16 Median :117.8
## Mean :-22.21 Mean :117.7
## 3rd Qu.:-22.12 3rd Qu.:117.9
## Max. :-22.04 Max. :118.0
##
data2<-data1
summary(data2$month_collection)
## January February March April May June July August
## 50 26 50 50 50 27 7 2
## September October November December
## 50 13 39 45
plot(data2$month_collection,main="Monthly Samples",ylim=c(0,200),ylab="Number of Samples")
levels(data2$true_troglofauna);levels(data2$HubName);levels(data2$FORMATION);levels(data2$ROCKTYPE1);levels(data2$UNITNAME);levels(data2$month_collection)
## [1] "0" "1"
## [1] "anticline, exposed"
## [2] "exposed"
## [3] "strike-slip, exposed, showing relative dextral displacement"
## [4] "syncline, exposed"
## [1] "Brockman Iron Formation"
## [2] "Marra Mamba Iron Formation"
## [3] "Mount McRae Shale and Mount Sylvia Formation"
## [4] "Wittenoom Formation"
## [1] "sedimentary carbonate"
## [2] "sedimentary other chemical or biochemical"
## [3] "sedimentary siliciclastic"
## [1] "Brockman Iron Formation"
## [2] "Marra Mamba Iron Formation"
## [3] "Mount McRae Shale and Mount Sylvia Formation"
## [4] "Wittenoom Formation"
## [1] "January" "February" "March" "April" "May" "June"
## [7] "July" "August" "September" "October" "November" "December"
summary(data2)
## trngcv bio14 slope rugg500s
## Min. : 8.713 Min. :0.01801 Min. : 0.2666 Min. : 0.02999
## 1st Qu.: 9.234 1st Qu.:0.12241 1st Qu.: 1.9117 1st Qu.: 1.33417
## Median : 9.317 Median :0.17267 Median : 3.3563 Median : 2.77642
## Mean : 9.324 Mean :0.18034 Mean : 4.8591 Mean : 3.24606
## 3rd Qu.: 9.410 3rd Qu.:0.22232 3rd Qu.: 7.1721 3rd Qu.: 4.97943
## Max. :10.026 Max. :0.42288 Max. :17.2472 Max. :10.10635
##
## pil_twim pil_twicv pil_topocv pil_slps
## Min. : 5.749 Min. :0.03184 Min. :0.00000 Min. : 0.08751
## 1st Qu.: 7.965 1st Qu.:0.12336 1st Qu.:0.00000 1st Qu.: 1.07585
## Median : 9.006 Median :0.16938 Median :0.01735 Median : 2.73185
## Mean : 9.007 Mean :0.17782 Mean :0.06433 Mean : 3.96138
## 3rd Qu.:10.122 3rd Qu.:0.23025 3rd Qu.:0.13316 3rd Qu.: 5.71830
## Max. :13.544 Max. :0.40758 Max. :0.35341 Max. :15.25064
##
## pil_slpcv pil_elr3cv mrvbf mrrtf
## Min. :0.1719 Min. :0.03157 Min. :0.000 Min. :0.00000
## 1st Qu.:0.3758 1st Qu.:0.13822 1st Qu.:0.000 1st Qu.:0.00000
## Median :0.4696 Median :0.22558 Median :0.000 Median :0.00000
## Mean :0.5165 Mean :0.24364 Mean :0.306 Mean :0.06968
## 3rd Qu.:0.6153 3rd Qu.:0.33502 3rd Qu.:0.000 3rd Qu.:0.00000
## Max. :1.6098 Max. :0.71240 Max. :2.858 Max. :2.59788
##
## minfertf lf7rup hstructn geolrngaggn
## Min. :2.000 Min. :1.000 Min. : 0.000 Min. : 0.0115
## 1st Qu.:2.000 1st Qu.:2.000 1st Qu.: 1.043 1st Qu.: 2.5880
## Median :2.000 Median :3.000 Median : 1.043 Median : 2.5880
## Mean :2.208 Mean :3.227 Mean : 1.647 Mean :171.7449
## 3rd Qu.:2.000 3rd Qu.:4.000 3rd Qu.: 1.043 3rd Qu.:300.0000
## Max. :3.000 Max. :7.000 Max. :15.429 Max. :900.0000
##
## elevationm wr_unrn solpawhcn slopern
## Min. :443.9 Min. : 0.00 Min. : 82.0 Min. : 0.00162
## 1st Qu.:559.7 1st Qu.:17.86 1st Qu.: 96.0 1st Qu.: 0.02093
## Median :577.0 Median :17.86 Median : 96.0 Median : 0.06396
## Mean :585.3 Mean :16.78 Mean :101.3 Mean : 0.21262
## 3rd Qu.:614.1 3rd Qu.:17.86 3rd Qu.: 96.0 3rd Qu.: 0.16934
## Max. :726.1 Max. :72.00 Max. :157.0 Max. :10.06944
##
## MIN_AGE_MA HubDist month_collection
## Min. :2454 Min. : 1.294 January : 50
## 1st Qu.:2454 1st Qu.: 16.613 March : 50
## Median :2454 Median : 21.793 April : 50
## Mean :2477 Mean : 39.334 May : 50
## 3rd Qu.:2494 3rd Qu.: 76.847 September: 50
## Max. :2597 Max. :104.012 December : 45
## (Other) :114
## UNITNAME
## Brockman Iron Formation :273
## Marra Mamba Iron Formation : 35
## Mount McRae Shale and Mount Sylvia Formation: 75
## Wittenoom Formation : 26
##
##
##
## ROCKTYPE1
## sedimentary carbonate : 26
## sedimentary other chemical or biochemical:308
## sedimentary siliciclastic : 75
##
##
##
##
## FORMATION
## Brockman Iron Formation :273
## Marra Mamba Iron Formation : 35
## Mount McRae Shale and Mount Sylvia Formation: 75
## Wittenoom Formation : 26
##
##
##
## HubName
## anticline, exposed :235
## exposed : 21
## strike-slip, exposed, showing relative dextral displacement: 10
## syncline, exposed :143
##
##
##
## true_troglofauna LATITUDE LONGITUDE
## 0:147 Min. :-22.64 Min. :117.0
## 1:262 1st Qu.:-22.23 1st Qu.:117.7
## Median :-22.16 Median :117.8
## Mean :-22.21 Mean :117.7
## 3rd Qu.:-22.12 3rd Qu.:117.9
## Max. :-22.04 Max. :118.0
##
dim(data2)
## [1] 409 30
instanceconvert <- colnames(data2[, -c((ncol(data2) - 8):ncol(data2))])
instanceconvert
## [1] "trngcv" "bio14" "slope" "rugg500s" "pil_twim"
## [6] "pil_twicv" "pil_topocv" "pil_slps" "pil_slpcv" "pil_elr3cv"
## [11] "mrvbf" "mrrtf" "minfertf" "lf7rup" "hstructn"
## [16] "geolrngaggn" "elevationm" "wr_unrn" "solpawhcn" "slopern"
## [21] "MIN_AGE_MA"
for (i in instanceconvert)
{
data2[[i]] <- as.numeric(data2[[i]])
}
sapply(data2,class)
## trngcv bio14 slope rugg500s
## "numeric" "numeric" "numeric" "numeric"
## pil_twim pil_twicv pil_topocv pil_slps
## "numeric" "numeric" "numeric" "numeric"
## pil_slpcv pil_elr3cv mrvbf mrrtf
## "numeric" "numeric" "numeric" "numeric"
## minfertf lf7rup hstructn geolrngaggn
## "numeric" "numeric" "numeric" "numeric"
## elevationm wr_unrn solpawhcn slopern
## "numeric" "numeric" "numeric" "numeric"
## MIN_AGE_MA HubDist month_collection UNITNAME
## "numeric" "numeric" "factor" "factor"
## ROCKTYPE1 FORMATION HubName true_troglofauna
## "factor" "factor" "factor" "factor"
## LATITUDE LONGITUDE
## "numeric" "numeric"
names(data2)
## [1] "trngcv" "bio14" "slope" "rugg500s"
## [5] "pil_twim" "pil_twicv" "pil_topocv" "pil_slps"
## [9] "pil_slpcv" "pil_elr3cv" "mrvbf" "mrrtf"
## [13] "minfertf" "lf7rup" "hstructn" "geolrngaggn"
## [17] "elevationm" "wr_unrn" "solpawhcn" "slopern"
## [21] "MIN_AGE_MA" "HubDist" "month_collection" "UNITNAME"
## [25] "ROCKTYPE1" "FORMATION" "HubName" "true_troglofauna"
## [29] "LATITUDE" "LONGITUDE"
sapply(data2,class)
## trngcv bio14 slope rugg500s
## "numeric" "numeric" "numeric" "numeric"
## pil_twim pil_twicv pil_topocv pil_slps
## "numeric" "numeric" "numeric" "numeric"
## pil_slpcv pil_elr3cv mrvbf mrrtf
## "numeric" "numeric" "numeric" "numeric"
## minfertf lf7rup hstructn geolrngaggn
## "numeric" "numeric" "numeric" "numeric"
## elevationm wr_unrn solpawhcn slopern
## "numeric" "numeric" "numeric" "numeric"
## MIN_AGE_MA HubDist month_collection UNITNAME
## "numeric" "numeric" "factor" "factor"
## ROCKTYPE1 FORMATION HubName true_troglofauna
## "factor" "factor" "factor" "factor"
## LATITUDE LONGITUDE
## "numeric" "numeric"
levels(data2$true_troglofauna);levels(data2$HubName);levels(data2$FORMATION);levels(data2$ROCKTYPE1);levels(data2$UNITNAME);levels(data2$month_collection)
## [1] "0" "1"
## [1] "anticline, exposed"
## [2] "exposed"
## [3] "strike-slip, exposed, showing relative dextral displacement"
## [4] "syncline, exposed"
## [1] "Brockman Iron Formation"
## [2] "Marra Mamba Iron Formation"
## [3] "Mount McRae Shale and Mount Sylvia Formation"
## [4] "Wittenoom Formation"
## [1] "sedimentary carbonate"
## [2] "sedimentary other chemical or biochemical"
## [3] "sedimentary siliciclastic"
## [1] "Brockman Iron Formation"
## [2] "Marra Mamba Iron Formation"
## [3] "Mount McRae Shale and Mount Sylvia Formation"
## [4] "Wittenoom Formation"
## [1] "January" "February" "March" "April" "May" "June"
## [7] "July" "August" "September" "October" "November" "December"
# Get column names with their indexes
column_indexes <- seq_along(names(data2))
# Display the column names and their corresponding indexes
column_indexes_named <- setNames(column_indexes, names(data2))
print(column_indexes_named)
## trngcv bio14 slope rugg500s
## 1 2 3 4
## pil_twim pil_twicv pil_topocv pil_slps
## 5 6 7 8
## pil_slpcv pil_elr3cv mrvbf mrrtf
## 9 10 11 12
## minfertf lf7rup hstructn geolrngaggn
## 13 14 15 16
## elevationm wr_unrn solpawhcn slopern
## 17 18 19 20
## MIN_AGE_MA HubDist month_collection UNITNAME
## 21 22 23 24
## ROCKTYPE1 FORMATION HubName true_troglofauna
## 25 26 27 28
## LATITUDE LONGITUDE
## 29 30
sapply(data2,class)
## trngcv bio14 slope rugg500s
## "numeric" "numeric" "numeric" "numeric"
## pil_twim pil_twicv pil_topocv pil_slps
## "numeric" "numeric" "numeric" "numeric"
## pil_slpcv pil_elr3cv mrvbf mrrtf
## "numeric" "numeric" "numeric" "numeric"
## minfertf lf7rup hstructn geolrngaggn
## "numeric" "numeric" "numeric" "numeric"
## elevationm wr_unrn solpawhcn slopern
## "numeric" "numeric" "numeric" "numeric"
## MIN_AGE_MA HubDist month_collection UNITNAME
## "numeric" "numeric" "factor" "factor"
## ROCKTYPE1 FORMATION HubName true_troglofauna
## "factor" "factor" "factor" "factor"
## LATITUDE LONGITUDE
## "numeric" "numeric"
instanceconvert <- colnames(data2[, -c((ncol(data2) - 8):ncol(data2))])
for (i in instanceconvert)
{
data2[[i]] <- as.numeric(data2[[i]])
}
sapply(data2,class)
## trngcv bio14 slope rugg500s
## "numeric" "numeric" "numeric" "numeric"
## pil_twim pil_twicv pil_topocv pil_slps
## "numeric" "numeric" "numeric" "numeric"
## pil_slpcv pil_elr3cv mrvbf mrrtf
## "numeric" "numeric" "numeric" "numeric"
## minfertf lf7rup hstructn geolrngaggn
## "numeric" "numeric" "numeric" "numeric"
## elevationm wr_unrn solpawhcn slopern
## "numeric" "numeric" "numeric" "numeric"
## MIN_AGE_MA HubDist month_collection UNITNAME
## "numeric" "numeric" "factor" "factor"
## ROCKTYPE1 FORMATION HubName true_troglofauna
## "factor" "factor" "factor" "factor"
## LATITUDE LONGITUDE
## "numeric" "numeric"
str(data2)
## 'data.frame': 409 obs. of 30 variables:
## $ trngcv : num 9.89 9.47 9.44 9.32 9.42 ...
## $ bio14 : num 0.254 0.134 0.125 0.139 0.12 ...
## $ slope : num 9.74 11.57 13.37 1.15 10.27 ...
## $ rugg500s : num 5.46 6.52 3.05 1.47 3.42 ...
## $ pil_twim : num 5.75 8.43 9.24 9.98 6.86 ...
## $ pil_twicv : num 0.0979 0.1694 0.3028 0.1597 0.1373 ...
## $ pil_topocv : num 0.1106 0.1269 0.1116 0 0.0426 ...
## $ pil_slps : num 10.968 3.564 4.554 0.951 14.71 ...
## $ pil_slpcv : num 0.323 0.289 0.454 0.615 0.59 ...
## $ pil_elr3cv : num 0.2294 0.2977 0.1921 0.36 0.0948 ...
## $ mrvbf : num 0 0 0 0 0 ...
## $ mrrtf : num 0 0 0 0.661 0 ...
## $ minfertf : num 2 2 2 3 2 3 2 2 2 2 ...
## $ lf7rup : num 2 2 2 6 3 4 4 1 4 3 ...
## $ hstructn : num 1.04 1.04 1.04 1.04 1.04 ...
## $ geolrngaggn : num 300 2.588 2.588 0.0115 2.588 ...
## $ elevationm : num 726 543 543 562 551 ...
## $ wr_unrn : num 17.9 17.9 17.9 17.9 17.9 ...
## $ solpawhcn : num 96 96 96 96 96 96 96 96 96 96 ...
## $ slopern : num 0.00341 0.00742 0.08237 0.19806 0.01298 ...
## $ MIN_AGE_MA : num 2454 2494 2494 2454 2454 ...
## $ HubDist : num 76.1 21.5 21.7 15.3 21.6 ...
## $ month_collection: Factor w/ 12 levels "January","February",..: 1 1 1 1 1 1 1 1 1 1 ...
## $ UNITNAME : Factor w/ 4 levels "Brockman Iron Formation",..: 1 3 3 1 1 1 3 1 3 1 ...
## $ ROCKTYPE1 : Factor w/ 3 levels "sedimentary carbonate",..: 2 3 3 2 2 2 3 2 3 2 ...
## $ FORMATION : Factor w/ 4 levels "Brockman Iron Formation",..: 1 3 3 1 1 1 3 1 3 1 ...
## $ HubName : Factor w/ 4 levels "anticline, exposed",..: 4 1 1 1 1 1 1 1 1 1 ...
## $ true_troglofauna: Factor w/ 2 levels "0","1": 1 1 2 1 2 1 2 1 2 2 ...
## $ LATITUDE : num -22.2 -22.1 -22.1 -22.1 -22.1 ...
## $ LONGITUDE : num 118 118 118 118 118 ...
dim(data2)
## [1] 409 30
num_rows <- 409
# Randomly select row indices
selected_rows <- sample(nrow(data2), num_rows)
# Create a new dataframe with randomly selected rows
data2 <- data2[selected_rows, ]
dim(data2)
## [1] 409 30
tg <- which(data2$true_troglofauna == 1)
tgg <- data2[tg, "true_troglofauna"]
tgg <- length(tgg)
fg <- which(data2$true_troglofauna == 0)
fgg <- data2[fg, "true_troglofauna"]
fgg <- length(fgg)
# Data for pie chart
plot_num_ident <- data.frame(Absence = 100 * fgg / (tgg + fgg),
Presence = 100 * tgg / (tgg + fgg))
# Convert data to a numeric vector
pie_data <- as.numeric(plot_num_ident)
# Labels for the pie chart (rounded to 1 decimal place)
labels <- c(paste0("Absent\n", round(pie_data[1], 1), "%"),
paste0("Present\n", round(pie_data[2], 1), "%"))
# Create pie chart
pie(pie_data,
labels = NA, # Remove default labels
main = "AREA 1",
col = c("orange", "lightblue"))
# Calculate the midpoints of each pie slice for label positioning
pie_slices <- cumsum(pie_data) - pie_data / 2
# Add the labels inside the pie chart
text(x = 0.5 * cos(2 * pi * pie_slices / sum(pie_data)),
y = 0.5 * sin(2 * pi * pie_slices / sum(pie_data)),
labels = labels, cex = 1.5)
selectcol_Data2 <- data2[, -c((ncol(data2) - 7):ncol(data2))]
dim(selectcol_Data2)
## [1] 409 22
names(selectcol_Data2)
## [1] "trngcv" "bio14" "slope" "rugg500s" "pil_twim"
## [6] "pil_twicv" "pil_topocv" "pil_slps" "pil_slpcv" "pil_elr3cv"
## [11] "mrvbf" "mrrtf" "minfertf" "lf7rup" "hstructn"
## [16] "geolrngaggn" "elevationm" "wr_unrn" "solpawhcn" "slopern"
## [21] "MIN_AGE_MA" "HubDist"
correlations<-cor(selectcol_Data2)
correlations
## trngcv bio14 slope rugg500s pil_twim
## trngcv 1.00000000 -0.14438838 0.69176889 0.477926942 -0.279056003
## bio14 -0.14438838 1.00000000 0.07029883 -0.128492177 -0.226561962
## slope 0.69176889 0.07029883 1.00000000 0.517511499 -0.387364499
## rugg500s 0.47792694 -0.12849218 0.51751150 1.000000000 -0.138864300
## pil_twim -0.27905600 -0.22656196 -0.38736450 -0.138864300 1.000000000
## pil_twicv 0.01702041 -0.29376592 -0.09729433 0.005626062 0.367094043
## pil_topocv 0.05959847 -0.10025211 0.04272442 0.288421473 0.035020538
## pil_slps 0.42525435 -0.02594281 0.47393877 0.385615519 -0.612390330
## pil_slpcv -0.03527169 -0.24291583 -0.10400086 0.185707019 0.252591021
## pil_elr3cv -0.02048230 -0.23334121 -0.09470449 0.449626784 0.499634174
## mrvbf -0.26619272 -0.03411611 -0.44723384 -0.338185583 0.334840763
## mrrtf -0.21269176 0.14716955 -0.22818463 -0.150700825 -0.007752769
## minfertf -0.20113479 -0.33116358 -0.33421583 -0.267348143 0.295346069
## lf7rup -0.36740253 -0.16925245 -0.48975316 -0.492283947 0.281602595
## hstructn -0.44239447 0.32588806 -0.20271250 -0.269254064 0.053314883
## geolrngaggn 0.23224359 0.16859387 0.17634243 0.132026204 -0.431930571
## elevationm 0.32827534 0.45904523 0.28126622 0.035784792 -0.297688074
## wr_unrn -0.06234149 -0.03941867 0.05620525 0.073818209 -0.206746105
## solpawhcn -0.14671433 0.23912653 -0.17955216 -0.202149466 0.178602521
## slopern -0.18198480 -0.02970640 -0.15398297 -0.157829835 0.264328275
## MIN_AGE_MA -0.46854175 0.41961974 -0.18098149 -0.332963519 -0.133647611
## HubDist 0.25900610 -0.18161858 0.19846532 0.154051977 0.030718072
## pil_twicv pil_topocv pil_slps pil_slpcv pil_elr3cv
## trngcv 0.017020409 0.05959847 0.42525435 -0.035271690 -0.02048230
## bio14 -0.293765922 -0.10025211 -0.02594281 -0.242915826 -0.23334121
## slope -0.097294332 0.04272442 0.47393877 -0.104000860 -0.09470449
## rugg500s 0.005626062 0.28842147 0.38561552 0.185707019 0.44962678
## pil_twim 0.367094043 0.03502054 -0.61239033 0.252591021 0.49963417
## pil_twicv 1.000000000 0.13935933 -0.04876311 0.280298038 0.25864734
## pil_topocv 0.139359332 1.00000000 0.13690929 0.212719258 0.21220279
## pil_slps -0.048763108 0.13690929 1.00000000 0.316376132 -0.29073744
## pil_slpcv 0.280298038 0.21271926 0.31637613 1.000000000 0.25064028
## pil_elr3cv 0.258647345 0.21220279 -0.29073744 0.250640275 1.00000000
## mrvbf 0.143789374 -0.19705951 -0.34099450 0.029400935 0.01624646
## mrrtf -0.154008477 0.11667742 -0.10285428 -0.065781453 -0.04792797
## minfertf 0.213223063 -0.18208188 -0.25711115 0.033170964 0.12410535
## lf7rup 0.127503291 -0.19397234 -0.38134680 -0.010497415 -0.03479760
## hstructn -0.198425125 -0.14170688 -0.15945066 -0.096202710 -0.14666667
## geolrngaggn -0.185108425 0.05077707 0.36090860 -0.140684002 -0.21925198
## elevationm -0.308216732 -0.06616776 0.21818619 -0.117813496 -0.22561452
## wr_unrn -0.106794102 -0.07493941 0.11352341 0.002507419 -0.02330359
## solpawhcn -0.012078107 0.05138687 -0.22465768 -0.047500788 -0.02096902
## slopern 0.031603804 -0.13003649 -0.16139194 0.104391301 0.01645636
## MIN_AGE_MA -0.195963275 -0.12402301 -0.14535780 -0.114125936 -0.25250500
## HubDist 0.039375610 0.10601700 0.17613904 0.266842636 0.12772916
## mrvbf mrrtf minfertf lf7rup hstructn
## trngcv -0.26619272 -0.212691756 -0.20113479 -0.367402529 -0.44239447
## bio14 -0.03411611 0.147169552 -0.33116358 -0.169252446 0.32588806
## slope -0.44723384 -0.228184626 -0.33421583 -0.489753156 -0.20271250
## rugg500s -0.33818558 -0.150700825 -0.26734814 -0.492283947 -0.26925406
## pil_twim 0.33484076 -0.007752769 0.29534607 0.281602595 0.05331488
## pil_twicv 0.14378937 -0.154008477 0.21322306 0.127503291 -0.19842512
## pil_topocv -0.19705951 0.116677416 -0.18208188 -0.193972339 -0.14170688
## pil_slps -0.34099450 -0.102854276 -0.25711115 -0.381346802 -0.15945066
## pil_slpcv 0.02940093 -0.065781453 0.03317096 -0.010497415 -0.09620271
## pil_elr3cv 0.01624646 -0.047927972 0.12410535 -0.034797599 -0.14666667
## mrvbf 1.00000000 -0.065557280 0.32959668 0.527999249 0.20005240
## mrrtf -0.06555728 1.000000000 0.02801221 0.031133482 0.29308847
## minfertf 0.32959668 0.028012213 1.00000000 0.444817211 0.03598896
## lf7rup 0.52799925 0.031133482 0.44481721 1.000000000 0.07011658
## hstructn 0.20005240 0.293088472 0.03598896 0.070116581 1.00000000
## geolrngaggn -0.21208074 0.029608944 -0.23450476 -0.197103865 -0.03969216
## elevationm -0.11948245 -0.010279115 -0.24459358 -0.200140640 0.01344205
## wr_unrn -0.09664253 0.010595057 -0.10583662 -0.022543060 -0.07426712
## solpawhcn 0.13166310 0.159540420 0.13802186 0.101190355 0.14136903
## slopern 0.33641174 -0.034854407 0.17182369 0.228577910 0.39431854
## MIN_AGE_MA 0.00730014 0.323227324 -0.09194556 0.001095106 0.57904539
## HubDist -0.15219624 -0.119165302 -0.11781342 -0.263770024 -0.09757715
## geolrngaggn elevationm wr_unrn solpawhcn slopern
## trngcv 0.23224359 0.32827534 -0.062341490 -0.14671433 -0.18198480
## bio14 0.16859387 0.45904523 -0.039418673 0.23912653 -0.02970640
## slope 0.17634243 0.28126622 0.056205249 -0.17955216 -0.15398297
## rugg500s 0.13202620 0.03578479 0.073818209 -0.20214947 -0.15782983
## pil_twim -0.43193057 -0.29768807 -0.206746105 0.17860252 0.26432828
## pil_twicv -0.18510842 -0.30821673 -0.106794102 -0.01207811 0.03160380
## pil_topocv 0.05077707 -0.06616776 -0.074939411 0.05138687 -0.13003649
## pil_slps 0.36090860 0.21818619 0.113523411 -0.22465768 -0.16139194
## pil_slpcv -0.14068400 -0.11781350 0.002507419 -0.04750079 0.10439130
## pil_elr3cv -0.21925198 -0.22561452 -0.023303594 -0.02096902 0.01645636
## mrvbf -0.21208074 -0.11948245 -0.096642533 0.13166310 0.33641174
## mrrtf 0.02960894 -0.01027911 0.010595057 0.15954042 -0.03485441
## minfertf -0.23450476 -0.24459358 -0.105836620 0.13802186 0.17182369
## lf7rup -0.19710387 -0.20014064 -0.022543060 0.10119036 0.22857791
## hstructn -0.03969216 0.01344205 -0.074267122 0.14136903 0.39431854
## geolrngaggn 1.00000000 0.23599976 0.100423279 -0.10953559 -0.10709799
## elevationm 0.23599976 1.00000000 -0.054646005 0.08226795 -0.05950478
## wr_unrn 0.10042328 -0.05464600 1.000000000 -0.64879855 -0.09753958
## solpawhcn -0.10953559 0.08226795 -0.648798550 1.00000000 0.16282807
## slopern -0.10709799 -0.05950478 -0.097539581 0.16282807 1.00000000
## MIN_AGE_MA -0.07323784 -0.14465058 0.055594693 0.18305242 0.08046612
## HubDist -0.01398595 0.23769455 0.062436255 -0.16628773 -0.09694540
## MIN_AGE_MA HubDist
## trngcv -0.468541755 0.25900610
## bio14 0.419619739 -0.18161858
## slope -0.180981491 0.19846532
## rugg500s -0.332963519 0.15405198
## pil_twim -0.133647611 0.03071807
## pil_twicv -0.195963275 0.03937561
## pil_topocv -0.124023011 0.10601700
## pil_slps -0.145357804 0.17613904
## pil_slpcv -0.114125936 0.26684264
## pil_elr3cv -0.252505000 0.12772916
## mrvbf 0.007300140 -0.15219624
## mrrtf 0.323227324 -0.11916530
## minfertf -0.091945560 -0.11781342
## lf7rup 0.001095106 -0.26377002
## hstructn 0.579045392 -0.09757715
## geolrngaggn -0.073237842 -0.01398595
## elevationm -0.144650575 0.23769455
## wr_unrn 0.055594693 0.06243625
## solpawhcn 0.183052422 -0.16628773
## slopern 0.080466116 -0.09694540
## MIN_AGE_MA 1.000000000 -0.20084125
## HubDist -0.200841251 1.00000000
corrplot(correlations, method = "circle", tl.cex = 1.5, cl.cex = 1.5)
corrplot(correlations, method = "circle", type = "upper", tl.cex = 1.5, cl.cex = 1.5)
dim(data2)
## [1] 409 30
# Create a density plot for each column
par(mfrow = c(2, 3)) # Arrange plots in a 2x3 grid
for (col in seq_along(selectcol_Data2)) {
plot(density(selectcol_Data2[, col]), main = colnames(selectcol_Data2)[col])
}
set.seed(78945)
Index1 <- createDataPartition(data2$true_troglofauna, p=0.5, list=FALSE)
data_train <- data2[ Index1,]
data_prov <- data2[-Index1,]
dim(data_train)
## [1] 205 30
summary(data_train)
## trngcv bio14 slope rugg500s
## Min. :8.713 Min. :0.02988 Min. : 0.3154 Min. : 0.02999
## 1st Qu.:9.222 1st Qu.:0.12881 1st Qu.: 1.7353 1st Qu.: 1.26590
## Median :9.320 Median :0.18033 Median : 3.7947 Median : 2.63082
## Mean :9.319 Mean :0.18687 Mean : 4.9174 Mean : 3.12256
## 3rd Qu.:9.411 3rd Qu.:0.23410 3rd Qu.: 7.2988 3rd Qu.: 4.78618
## Max. :9.972 Max. :0.38452 Max. :17.2472 Max. :10.10635
##
## pil_twim pil_twicv pil_topocv pil_slps
## Min. : 5.975 Min. :0.03184 Min. :0.00000 Min. : 0.08751
## 1st Qu.: 8.095 1st Qu.:0.12767 1st Qu.:0.00000 1st Qu.: 1.02469
## Median : 9.164 Median :0.16995 Median :0.01947 Median : 2.37349
## Mean : 9.078 Mean :0.17922 Mean :0.06160 Mean : 3.65220
## 3rd Qu.:10.071 3rd Qu.:0.22963 3rd Qu.:0.12689 3rd Qu.: 5.18772
## Max. :13.544 Max. :0.36039 Max. :0.33982 Max. :14.88655
##
## pil_slpcv pil_elr3cv mrvbf mrrtf
## Min. :0.1908 Min. :0.03157 Min. :0.0000 Min. :0.00000
## 1st Qu.:0.3766 1st Qu.:0.14646 1st Qu.:0.0000 1st Qu.:0.00000
## Median :0.4692 Median :0.23215 Median :0.0000 Median :0.00000
## Mean :0.4998 Mean :0.24334 Mean :0.3391 Mean :0.08205
## 3rd Qu.:0.5842 3rd Qu.:0.31863 3rd Qu.:0.0000 3rd Qu.:0.00000
## Max. :1.2105 Max. :0.59121 Max. :2.8579 Max. :2.59788
##
## minfertf lf7rup hstructn geolrngaggn
## Min. :2.00 Min. :1.00 Min. : 0.000 Min. : 0.0115
## 1st Qu.:2.00 1st Qu.:2.00 1st Qu.: 1.043 1st Qu.: 2.5880
## Median :2.00 Median :3.00 Median : 1.043 Median : 2.5880
## Mean :2.18 Mean :3.21 Mean : 1.877 Mean :176.0964
## 3rd Qu.:2.00 3rd Qu.:4.00 3rd Qu.: 1.043 3rd Qu.:300.0000
## Max. :3.00 Max. :7.00 Max. :15.429 Max. :900.0000
##
## elevationm wr_unrn solpawhcn slopern
## Min. :443.9 Min. : 0.00 Min. : 82.0 Min. : 0.00214
## 1st Qu.:558.7 1st Qu.:17.86 1st Qu.: 96.0 1st Qu.: 0.02306
## Median :582.6 Median :17.86 Median : 96.0 Median : 0.07592
## Mean :587.3 Mean :17.20 Mean :101.6 Mean : 0.19765
## 3rd Qu.:619.4 3rd Qu.:17.86 3rd Qu.: 96.0 3rd Qu.: 0.17402
## Max. :723.7 Max. :72.00 Max. :157.0 Max. :10.06944
##
## MIN_AGE_MA HubDist month_collection
## Min. :2454 Min. : 2.087 April :31
## 1st Qu.:2454 1st Qu.: 16.862 March :27
## Median :2454 Median : 21.842 May :24
## Mean :2479 Mean : 38.612 January :23
## 3rd Qu.:2494 3rd Qu.: 76.503 December :21
## Max. :2597 Max. :104.012 September:19
## (Other) :60
## UNITNAME
## Brockman Iron Formation :132
## Marra Mamba Iron Formation : 18
## Mount McRae Shale and Mount Sylvia Formation: 34
## Wittenoom Formation : 21
##
##
##
## ROCKTYPE1
## sedimentary carbonate : 21
## sedimentary other chemical or biochemical:150
## sedimentary siliciclastic : 34
##
##
##
##
## FORMATION
## Brockman Iron Formation :132
## Marra Mamba Iron Formation : 18
## Mount McRae Shale and Mount Sylvia Formation: 34
## Wittenoom Formation : 21
##
##
##
## HubName
## anticline, exposed :122
## exposed : 10
## strike-slip, exposed, showing relative dextral displacement: 7
## syncline, exposed : 66
##
##
##
## true_troglofauna LATITUDE LONGITUDE
## 0: 74 Min. :-22.61 Min. :117.0
## 1:131 1st Qu.:-22.25 1st Qu.:117.6
## Median :-22.16 Median :117.8
## Mean :-22.22 Mean :117.7
## 3rd Qu.:-22.12 3rd Qu.:117.9
## Max. :-22.04 Max. :118.0
##
data_train <- data_train[, -which(names(data_train) %in% c("LATITUDE", "LONGITUDE"))]
dim(data_train)
## [1] 205 28
head(data_train)
## trngcv bio14 slope rugg500s pil_twim pil_twicv pil_topocv pil_slps
## 173 9.22221 0.11984 2.62358 3.22619 8.90416 0.15090 0.15976 0.80723
## 50 9.83993 0.22232 15.87183 10.08672 7.72970 0.14045 0.02834 7.59460
## 458 9.28573 0.18881 10.57231 5.03119 8.63300 0.08212 0.00000 1.58531
## 383 9.32038 0.13525 1.22187 2.82780 9.56502 0.15810 0.00000 0.72478
## 24 9.40258 0.11255 6.00854 3.17529 8.35731 0.35754 0.00000 4.45767
## 246 9.41757 0.16629 8.93139 5.11192 8.63891 0.23263 0.05131 10.49186
## pil_slpcv pil_elr3cv mrvbf mrrtf minfertf lf7rup hstructn geolrngaggn
## 173 0.25382 0.30016 0.00000 0 2 4 1.04286 2.5880
## 50 0.40627 0.25831 0.00000 0 2 1 1.04286 2.5880
## 458 0.33915 0.42026 0.00000 0 2 4 1.04286 2.5880
## 383 0.41914 0.56449 0.85295 0 3 2 1.04286 0.0115
## 24 0.57378 0.26367 0.00000 0 2 2 1.04286 2.5880
## 246 0.90002 0.24122 0.00000 0 2 3 1.04286 2.5880
## elevationm wr_unrn solpawhcn slopern MIN_AGE_MA HubDist month_collection
## 173 530.6564 17.85714 96 0.03171 2454 16.25063 September
## 50 603.8304 17.85714 96 0.01338 2454 23.47565 March
## 458 592.9053 17.85714 96 0.01833 2454 14.07495 April
## 383 561.8668 17.85714 96 0.12433 2454 15.77570 January
## 24 558.6679 17.85714 96 0.06396 2494 22.50256 December
## 246 601.5659 17.85714 96 0.03762 2454 86.82374 March
## UNITNAME
## 173 Brockman Iron Formation
## 50 Brockman Iron Formation
## 458 Brockman Iron Formation
## 383 Brockman Iron Formation
## 24 Mount McRae Shale and Mount Sylvia Formation
## 246 Brockman Iron Formation
## ROCKTYPE1
## 173 sedimentary other chemical or biochemical
## 50 sedimentary other chemical or biochemical
## 458 sedimentary other chemical or biochemical
## 383 sedimentary other chemical or biochemical
## 24 sedimentary siliciclastic
## 246 sedimentary other chemical or biochemical
## FORMATION HubName
## 173 Brockman Iron Formation anticline, exposed
## 50 Brockman Iron Formation anticline, exposed
## 458 Brockman Iron Formation anticline, exposed
## 383 Brockman Iron Formation anticline, exposed
## 24 Mount McRae Shale and Mount Sylvia Formation anticline, exposed
## 246 Brockman Iron Formation syncline, exposed
## true_troglofauna
## 173 0
## 50 1
## 458 0
## 383 1
## 24 0
## 246 1
summary(data_train)
## trngcv bio14 slope rugg500s
## Min. :8.713 Min. :0.02988 Min. : 0.3154 Min. : 0.02999
## 1st Qu.:9.222 1st Qu.:0.12881 1st Qu.: 1.7353 1st Qu.: 1.26590
## Median :9.320 Median :0.18033 Median : 3.7947 Median : 2.63082
## Mean :9.319 Mean :0.18687 Mean : 4.9174 Mean : 3.12256
## 3rd Qu.:9.411 3rd Qu.:0.23410 3rd Qu.: 7.2988 3rd Qu.: 4.78618
## Max. :9.972 Max. :0.38452 Max. :17.2472 Max. :10.10635
##
## pil_twim pil_twicv pil_topocv pil_slps
## Min. : 5.975 Min. :0.03184 Min. :0.00000 Min. : 0.08751
## 1st Qu.: 8.095 1st Qu.:0.12767 1st Qu.:0.00000 1st Qu.: 1.02469
## Median : 9.164 Median :0.16995 Median :0.01947 Median : 2.37349
## Mean : 9.078 Mean :0.17922 Mean :0.06160 Mean : 3.65220
## 3rd Qu.:10.071 3rd Qu.:0.22963 3rd Qu.:0.12689 3rd Qu.: 5.18772
## Max. :13.544 Max. :0.36039 Max. :0.33982 Max. :14.88655
##
## pil_slpcv pil_elr3cv mrvbf mrrtf
## Min. :0.1908 Min. :0.03157 Min. :0.0000 Min. :0.00000
## 1st Qu.:0.3766 1st Qu.:0.14646 1st Qu.:0.0000 1st Qu.:0.00000
## Median :0.4692 Median :0.23215 Median :0.0000 Median :0.00000
## Mean :0.4998 Mean :0.24334 Mean :0.3391 Mean :0.08205
## 3rd Qu.:0.5842 3rd Qu.:0.31863 3rd Qu.:0.0000 3rd Qu.:0.00000
## Max. :1.2105 Max. :0.59121 Max. :2.8579 Max. :2.59788
##
## minfertf lf7rup hstructn geolrngaggn
## Min. :2.00 Min. :1.00 Min. : 0.000 Min. : 0.0115
## 1st Qu.:2.00 1st Qu.:2.00 1st Qu.: 1.043 1st Qu.: 2.5880
## Median :2.00 Median :3.00 Median : 1.043 Median : 2.5880
## Mean :2.18 Mean :3.21 Mean : 1.877 Mean :176.0964
## 3rd Qu.:2.00 3rd Qu.:4.00 3rd Qu.: 1.043 3rd Qu.:300.0000
## Max. :3.00 Max. :7.00 Max. :15.429 Max. :900.0000
##
## elevationm wr_unrn solpawhcn slopern
## Min. :443.9 Min. : 0.00 Min. : 82.0 Min. : 0.00214
## 1st Qu.:558.7 1st Qu.:17.86 1st Qu.: 96.0 1st Qu.: 0.02306
## Median :582.6 Median :17.86 Median : 96.0 Median : 0.07592
## Mean :587.3 Mean :17.20 Mean :101.6 Mean : 0.19765
## 3rd Qu.:619.4 3rd Qu.:17.86 3rd Qu.: 96.0 3rd Qu.: 0.17402
## Max. :723.7 Max. :72.00 Max. :157.0 Max. :10.06944
##
## MIN_AGE_MA HubDist month_collection
## Min. :2454 Min. : 2.087 April :31
## 1st Qu.:2454 1st Qu.: 16.862 March :27
## Median :2454 Median : 21.842 May :24
## Mean :2479 Mean : 38.612 January :23
## 3rd Qu.:2494 3rd Qu.: 76.503 December :21
## Max. :2597 Max. :104.012 September:19
## (Other) :60
## UNITNAME
## Brockman Iron Formation :132
## Marra Mamba Iron Formation : 18
## Mount McRae Shale and Mount Sylvia Formation: 34
## Wittenoom Formation : 21
##
##
##
## ROCKTYPE1
## sedimentary carbonate : 21
## sedimentary other chemical or biochemical:150
## sedimentary siliciclastic : 34
##
##
##
##
## FORMATION
## Brockman Iron Formation :132
## Marra Mamba Iron Formation : 18
## Mount McRae Shale and Mount Sylvia Formation: 34
## Wittenoom Formation : 21
##
##
##
## HubName
## anticline, exposed :122
## exposed : 10
## strike-slip, exposed, showing relative dextral displacement: 7
## syncline, exposed : 66
##
##
##
## true_troglofauna
## 0: 74
## 1:131
##
##
##
##
##
set.seed(78945)
trainIndex1 <- createDataPartition(data_prov$true_troglofauna, p=0.6, list=FALSE)
data_test <- data_prov[ trainIndex1,]
finalTest1 <- data_prov[-trainIndex1,]
summary(data_test)
## trngcv bio14 slope rugg500s
## Min. : 8.779 Min. :0.02054 Min. : 0.3373 Min. : 0.1128
## 1st Qu.: 9.241 1st Qu.:0.11885 1st Qu.: 2.0234 1st Qu.: 1.4004
## Median : 9.300 Median :0.16241 Median : 3.1520 Median : 2.9601
## Mean : 9.326 Mean :0.16978 Mean : 4.6339 Mean : 3.3532
## 3rd Qu.: 9.402 3rd Qu.:0.20572 3rd Qu.: 6.2128 3rd Qu.: 5.0275
## Max. :10.026 Max. :0.33206 Max. :16.3552 Max. :10.1064
##
## pil_twim pil_twicv pil_topocv pil_slps
## Min. : 5.944 Min. :0.03919 Min. :0.00000 Min. : 0.1607
## 1st Qu.: 8.005 1st Qu.:0.12469 1st Qu.:0.00000 1st Qu.: 1.2269
## Median : 8.975 Median :0.16440 Median :0.01535 Median : 2.7404
## Mean : 9.050 Mean :0.17676 Mean :0.06513 Mean : 4.0601
## 3rd Qu.:10.169 3rd Qu.:0.21979 3rd Qu.:0.13731 3rd Qu.: 5.5650
## Max. :11.582 Max. :0.40758 Max. :0.35341 Max. :15.2506
##
## pil_slpcv pil_elr3cv mrvbf mrrtf
## Min. :0.1719 Min. :0.05163 Min. :0.0000 Min. :0.00000
## 1st Qu.:0.3834 1st Qu.:0.12926 1st Qu.:0.0000 1st Qu.:0.00000
## Median :0.4809 Median :0.22894 Median :0.0000 Median :0.00000
## Mean :0.5469 Mean :0.25332 Mean :0.3064 Mean :0.03911
## 3rd Qu.:0.6665 3rd Qu.:0.37929 3rd Qu.:0.5784 3rd Qu.:0.00000
## Max. :1.2105 Max. :0.71240 Max. :1.9928 Max. :1.56825
##
## minfertf lf7rup hstructn geolrngaggn
## Min. :2.000 Min. :1.000 Min. : 0.000 Min. : 0.0115
## 1st Qu.:2.000 1st Qu.:2.000 1st Qu.: 1.043 1st Qu.: 2.5880
## Median :2.000 Median :3.000 Median : 1.043 Median : 2.5880
## Mean :2.236 Mean :3.341 Mean : 1.340 Mean :168.6057
## 3rd Qu.:2.000 3rd Qu.:4.000 3rd Qu.: 1.043 3rd Qu.: 21.9000
## Max. :3.000 Max. :7.000 Max. :15.429 Max. :900.0000
##
## elevationm wr_unrn solpawhcn slopern
## Min. :518.3 Min. : 0.00 Min. : 82.0 Min. :0.00162
## 1st Qu.:560.5 1st Qu.:17.86 1st Qu.: 96.0 1st Qu.:0.02080
## Median :576.1 Median :17.86 Median : 96.0 Median :0.06480
## Mean :583.6 Mean :16.97 Mean :100.3 Mean :0.21548
## 3rd Qu.:607.7 3rd Qu.:17.86 3rd Qu.: 96.0 3rd Qu.:0.18920
## Max. :721.9 Max. :72.00 Max. :157.0 Max. :6.36852
##
## MIN_AGE_MA HubDist month_collection
## Min. :2454 Min. : 1.294 September:18
## 1st Qu.:2454 1st Qu.:15.756 November :18
## Median :2454 Median :21.772 May :17
## Mean :2471 Mean :39.947 January :16
## 3rd Qu.:2494 3rd Qu.:79.998 March :14
## Max. :2597 Max. :91.703 April :13
## (Other) :27
## UNITNAME
## Brockman Iron Formation :90
## Marra Mamba Iron Formation : 7
## Mount McRae Shale and Mount Sylvia Formation:22
## Wittenoom Formation : 4
##
##
##
## ROCKTYPE1
## sedimentary carbonate : 4
## sedimentary other chemical or biochemical:97
## sedimentary siliciclastic :22
##
##
##
##
## FORMATION
## Brockman Iron Formation :90
## Marra Mamba Iron Formation : 7
## Mount McRae Shale and Mount Sylvia Formation:22
## Wittenoom Formation : 4
##
##
##
## HubName
## anticline, exposed :70
## exposed : 5
## strike-slip, exposed, showing relative dextral displacement: 1
## syncline, exposed :47
##
##
##
## true_troglofauna LATITUDE LONGITUDE
## 0:44 Min. :-22.61 Min. :117.1
## 1:79 1st Qu.:-22.21 1st Qu.:117.7
## Median :-22.16 Median :117.8
## Mean :-22.20 Mean :117.8
## 3rd Qu.:-22.13 3rd Qu.:117.9
## Max. :-22.09 Max. :118.0
##
dim(data_test)
## [1] 123 30
summary(finalTest1)
## trngcv bio14 slope rugg500s
## Min. :8.808 Min. :0.01801 Min. : 0.2666 Min. :0.0407
## 1st Qu.:9.244 1st Qu.:0.11957 1st Qu.: 2.0234 1st Qu.:1.4870
## Median :9.320 Median :0.17200 Median : 3.3626 Median :2.7764
## Mean :9.335 Mean :0.17983 Mean : 5.0533 Mean :3.3959
## 3rd Qu.:9.410 3rd Qu.:0.22522 3rd Qu.: 7.4120 3rd Qu.:5.2247
## Max. :9.889 Max. :0.42288 Max. :15.8156 Max. :9.9756
##
## pil_twim pil_twicv pil_topocv pil_slps
## Min. : 5.749 Min. :0.04324 Min. :0.00000 Min. : 0.1344
## 1st Qu.: 7.738 1st Qu.:0.11417 1st Qu.:0.00000 1st Qu.: 1.2658
## Median : 8.587 Median :0.16848 Median :0.02168 Median : 3.9729
## Mean : 8.762 Mean :0.17586 Mean :0.07001 Mean : 4.5940
## 3rd Qu.: 9.944 3rd Qu.:0.23585 3rd Qu.:0.13900 3rd Qu.: 6.9364
## Max. :11.582 Max. :0.36698 Max. :0.32611 Max. :12.7276
##
## pil_slpcv pil_elr3cv mrvbf mrrtf
## Min. :0.1797 Min. :0.0359 Min. :0.0000 Min. :0.00000
## 1st Qu.:0.3642 1st Qu.:0.1489 1st Qu.:0.0000 1st Qu.:0.00000
## Median :0.4696 Median :0.2056 Median :0.0000 Median :0.00000
## Mean :0.5129 Mean :0.2297 Mean :0.2218 Mean :0.08481
## 3rd Qu.:0.6206 3rd Qu.:0.3127 3rd Qu.:0.0000 3rd Qu.:0.00000
## Max. :1.6098 Max. :0.5645 Max. :2.8219 Max. :1.88966
##
## minfertf lf7rup hstructn geolrngaggn
## Min. :2.000 Min. :1.000 Min. : 0.000 Min. : 0.0115
## 1st Qu.:2.000 1st Qu.:2.000 1st Qu.: 1.043 1st Qu.: 2.5880
## Median :2.000 Median :3.000 Median : 1.043 Median : 2.5880
## Mean :2.235 Mean :3.099 Mean : 1.535 Mean :165.4988
## 3rd Qu.:2.000 3rd Qu.:4.000 3rd Qu.: 1.043 3rd Qu.:300.0000
## Max. :3.000 Max. :7.000 Max. :15.429 Max. :900.0000
##
## elevationm wr_unrn solpawhcn slopern
## Min. :515.2 Min. : 0.00 Min. : 96.0 Min. :0.00179
## 1st Qu.:559.7 1st Qu.:17.86 1st Qu.: 96.0 1st Qu.:0.01995
## Median :570.5 Median :17.86 Median : 96.0 Median :0.04257
## Mean :582.6 Mean :15.42 Mean :102.1 Mean :0.24617
## 3rd Qu.:606.6 3rd Qu.:17.86 3rd Qu.: 96.0 3rd Qu.:0.10689
## Max. :726.1 Max. :17.86 Max. :157.0 Max. :4.28009
##
## MIN_AGE_MA HubDist month_collection
## Min. :2454 Min. : 1.931 September:13
## 1st Qu.:2454 1st Qu.:16.501 December :12
## Median :2454 Median :21.793 January :11
## Mean :2482 Mean :40.231 March : 9
## 3rd Qu.:2494 3rd Qu.:76.311 May : 9
## Max. :2597 Max. :88.221 June : 7
## (Other) :20
## UNITNAME
## Brockman Iron Formation :51
## Marra Mamba Iron Formation :10
## Mount McRae Shale and Mount Sylvia Formation:19
## Wittenoom Formation : 1
##
##
##
## ROCKTYPE1
## sedimentary carbonate : 1
## sedimentary other chemical or biochemical:61
## sedimentary siliciclastic :19
##
##
##
##
## FORMATION
## Brockman Iron Formation :51
## Marra Mamba Iron Formation :10
## Mount McRae Shale and Mount Sylvia Formation:19
## Wittenoom Formation : 1
##
##
##
## HubName
## anticline, exposed :43
## exposed : 6
## strike-slip, exposed, showing relative dextral displacement: 2
## syncline, exposed :30
##
##
##
## true_troglofauna LATITUDE LONGITUDE
## 0:29 Min. :-22.64 Min. :117.1
## 1:52 1st Qu.:-22.22 1st Qu.:117.6
## Median :-22.16 Median :117.8
## Mean :-22.21 Mean :117.7
## 3rd Qu.:-22.12 3rd Qu.:117.9
## Max. :-22.09 Max. :118.0
##
dim(finalTest1)
## [1] 81 30
data_test <- data_test[, -which(names(data2) %in% c("LATITUDE", "LONGITUDE"))]
finalTest <- finalTest1[, -which(names(data2) %in% c("LATITUDE", "LONGITUDE"))]
sapply(data_test,class);sapply(finalTest1,class)
## trngcv bio14 slope rugg500s
## "numeric" "numeric" "numeric" "numeric"
## pil_twim pil_twicv pil_topocv pil_slps
## "numeric" "numeric" "numeric" "numeric"
## pil_slpcv pil_elr3cv mrvbf mrrtf
## "numeric" "numeric" "numeric" "numeric"
## minfertf lf7rup hstructn geolrngaggn
## "numeric" "numeric" "numeric" "numeric"
## elevationm wr_unrn solpawhcn slopern
## "numeric" "numeric" "numeric" "numeric"
## MIN_AGE_MA HubDist month_collection UNITNAME
## "numeric" "numeric" "factor" "factor"
## ROCKTYPE1 FORMATION HubName true_troglofauna
## "factor" "factor" "factor" "factor"
## trngcv bio14 slope rugg500s
## "numeric" "numeric" "numeric" "numeric"
## pil_twim pil_twicv pil_topocv pil_slps
## "numeric" "numeric" "numeric" "numeric"
## pil_slpcv pil_elr3cv mrvbf mrrtf
## "numeric" "numeric" "numeric" "numeric"
## minfertf lf7rup hstructn geolrngaggn
## "numeric" "numeric" "numeric" "numeric"
## elevationm wr_unrn solpawhcn slopern
## "numeric" "numeric" "numeric" "numeric"
## MIN_AGE_MA HubDist month_collection UNITNAME
## "numeric" "numeric" "factor" "factor"
## ROCKTYPE1 FORMATION HubName true_troglofauna
## "factor" "factor" "factor" "factor"
## LATITUDE LONGITUDE
## "numeric" "numeric"
head(data_test)
## trngcv bio14 slope rugg500s pil_twim pil_twicv pil_topocv pil_slps
## 538 8.94549 0.20508 0.33730 0.38791 10.79223 0.13333 0.00000 0.16074
## 580 9.30815 0.15320 3.35630 6.98694 8.78567 0.28006 0.00000 1.81942
## 449 9.61349 0.28381 16.35521 2.20207 5.94411 0.04918 0.00000 6.81287
## 149 9.41111 0.11746 4.08951 0.66187 7.66015 0.20674 0.00000 2.03130
## 304 9.29545 0.11761 2.02341 3.37463 11.58232 0.19793 0.05766 3.97289
## 15 8.77937 0.29834 1.52777 1.09473 7.32769 0.07640 0.00000 4.35278
## pil_slpcv pil_elr3cv mrvbf mrrtf minfertf lf7rup hstructn geolrngaggn
## 538 0.61696 0.26306 1.5954 0.00000 3 4 15.42857 0.0115
## 580 0.50594 0.53529 0.0000 0.00000 2 3 1.04286 2.5880
## 449 0.25979 0.12502 0.0000 0.00000 2 4 1.04286 900.0000
## 149 0.42033 0.09569 0.0000 0.00000 2 3 1.04286 2.5880
## 304 0.94816 0.42085 0.0000 0.00000 2 4 1.04286 2.5880
## 15 0.60188 0.08611 0.0000 0.91144 2 4 3.77143 300.0000
## elevationm wr_unrn solpawhcn slopern MIN_AGE_MA HubDist month_collection
## 538 593.5888 8.33333 131 6.36852 2506 17.36909 June
## 580 606.9091 17.85714 96 0.62784 2454 82.98079 February
## 449 721.9289 17.85714 96 0.00249 2454 88.50439 April
## 149 561.2512 17.85714 96 0.16245 2494 21.98282 April
## 304 560.4824 17.85714 96 0.27924 2454 81.98112 May
## 15 548.1772 72.00000 82 0.05721 2597 56.67423 November
## UNITNAME
## 538 Wittenoom Formation
## 580 Brockman Iron Formation
## 449 Brockman Iron Formation
## 149 Mount McRae Shale and Mount Sylvia Formation
## 304 Brockman Iron Formation
## 15 Marra Mamba Iron Formation
## ROCKTYPE1
## 538 sedimentary carbonate
## 580 sedimentary other chemical or biochemical
## 449 sedimentary other chemical or biochemical
## 149 sedimentary siliciclastic
## 304 sedimentary other chemical or biochemical
## 15 sedimentary other chemical or biochemical
## FORMATION HubName
## 538 Wittenoom Formation anticline, exposed
## 580 Brockman Iron Formation syncline, exposed
## 449 Brockman Iron Formation syncline, exposed
## 149 Mount McRae Shale and Mount Sylvia Formation anticline, exposed
## 304 Brockman Iron Formation syncline, exposed
## 15 Marra Mamba Iron Formation anticline, exposed
## true_troglofauna
## 538 0
## 580 1
## 449 1
## 149 1
## 304 1
## 15 1
dim(data_test)
## [1] 123 28
summary(data_test)
## trngcv bio14 slope rugg500s
## Min. : 8.779 Min. :0.02054 Min. : 0.3373 Min. : 0.1128
## 1st Qu.: 9.241 1st Qu.:0.11885 1st Qu.: 2.0234 1st Qu.: 1.4004
## Median : 9.300 Median :0.16241 Median : 3.1520 Median : 2.9601
## Mean : 9.326 Mean :0.16978 Mean : 4.6339 Mean : 3.3532
## 3rd Qu.: 9.402 3rd Qu.:0.20572 3rd Qu.: 6.2128 3rd Qu.: 5.0275
## Max. :10.026 Max. :0.33206 Max. :16.3552 Max. :10.1064
##
## pil_twim pil_twicv pil_topocv pil_slps
## Min. : 5.944 Min. :0.03919 Min. :0.00000 Min. : 0.1607
## 1st Qu.: 8.005 1st Qu.:0.12469 1st Qu.:0.00000 1st Qu.: 1.2269
## Median : 8.975 Median :0.16440 Median :0.01535 Median : 2.7404
## Mean : 9.050 Mean :0.17676 Mean :0.06513 Mean : 4.0601
## 3rd Qu.:10.169 3rd Qu.:0.21979 3rd Qu.:0.13731 3rd Qu.: 5.5650
## Max. :11.582 Max. :0.40758 Max. :0.35341 Max. :15.2506
##
## pil_slpcv pil_elr3cv mrvbf mrrtf
## Min. :0.1719 Min. :0.05163 Min. :0.0000 Min. :0.00000
## 1st Qu.:0.3834 1st Qu.:0.12926 1st Qu.:0.0000 1st Qu.:0.00000
## Median :0.4809 Median :0.22894 Median :0.0000 Median :0.00000
## Mean :0.5469 Mean :0.25332 Mean :0.3064 Mean :0.03911
## 3rd Qu.:0.6665 3rd Qu.:0.37929 3rd Qu.:0.5784 3rd Qu.:0.00000
## Max. :1.2105 Max. :0.71240 Max. :1.9928 Max. :1.56825
##
## minfertf lf7rup hstructn geolrngaggn
## Min. :2.000 Min. :1.000 Min. : 0.000 Min. : 0.0115
## 1st Qu.:2.000 1st Qu.:2.000 1st Qu.: 1.043 1st Qu.: 2.5880
## Median :2.000 Median :3.000 Median : 1.043 Median : 2.5880
## Mean :2.236 Mean :3.341 Mean : 1.340 Mean :168.6057
## 3rd Qu.:2.000 3rd Qu.:4.000 3rd Qu.: 1.043 3rd Qu.: 21.9000
## Max. :3.000 Max. :7.000 Max. :15.429 Max. :900.0000
##
## elevationm wr_unrn solpawhcn slopern
## Min. :518.3 Min. : 0.00 Min. : 82.0 Min. :0.00162
## 1st Qu.:560.5 1st Qu.:17.86 1st Qu.: 96.0 1st Qu.:0.02080
## Median :576.1 Median :17.86 Median : 96.0 Median :0.06480
## Mean :583.6 Mean :16.97 Mean :100.3 Mean :0.21548
## 3rd Qu.:607.7 3rd Qu.:17.86 3rd Qu.: 96.0 3rd Qu.:0.18920
## Max. :721.9 Max. :72.00 Max. :157.0 Max. :6.36852
##
## MIN_AGE_MA HubDist month_collection
## Min. :2454 Min. : 1.294 September:18
## 1st Qu.:2454 1st Qu.:15.756 November :18
## Median :2454 Median :21.772 May :17
## Mean :2471 Mean :39.947 January :16
## 3rd Qu.:2494 3rd Qu.:79.998 March :14
## Max. :2597 Max. :91.703 April :13
## (Other) :27
## UNITNAME
## Brockman Iron Formation :90
## Marra Mamba Iron Formation : 7
## Mount McRae Shale and Mount Sylvia Formation:22
## Wittenoom Formation : 4
##
##
##
## ROCKTYPE1
## sedimentary carbonate : 4
## sedimentary other chemical or biochemical:97
## sedimentary siliciclastic :22
##
##
##
##
## FORMATION
## Brockman Iron Formation :90
## Marra Mamba Iron Formation : 7
## Mount McRae Shale and Mount Sylvia Formation:22
## Wittenoom Formation : 4
##
##
##
## HubName
## anticline, exposed :70
## exposed : 5
## strike-slip, exposed, showing relative dextral displacement: 1
## syncline, exposed :47
##
##
##
## true_troglofauna
## 0:44
## 1:79
##
##
##
##
##
names(data_test)
## [1] "trngcv" "bio14" "slope" "rugg500s"
## [5] "pil_twim" "pil_twicv" "pil_topocv" "pil_slps"
## [9] "pil_slpcv" "pil_elr3cv" "mrvbf" "mrrtf"
## [13] "minfertf" "lf7rup" "hstructn" "geolrngaggn"
## [17] "elevationm" "wr_unrn" "solpawhcn" "slopern"
## [21] "MIN_AGE_MA" "HubDist" "month_collection" "UNITNAME"
## [25] "ROCKTYPE1" "FORMATION" "HubName" "true_troglofauna"
head(finalTest)
## trngcv bio14 slope rugg500s pil_twim pil_twicv pil_topocv pil_slps
## 95 9.88856 0.25342 10.56614 4.74161 8.49100 0.23228 0.00000 6.65473
## 626 8.80830 0.28342 2.23671 1.25996 7.13560 0.05745 0.00000 2.40606
## 607 9.32483 0.28510 2.18706 0.46594 9.42484 0.23025 0.17897 1.49514
## 623 8.89422 0.31750 3.19405 2.99698 7.67751 0.12228 0.00000 1.90480
## 18 9.33742 0.09946 6.24483 3.65277 10.97026 0.25139 0.00000 4.37832
## 470 9.14680 0.22522 2.94459 7.08095 7.82185 0.23485 0.19927 10.64527
## pil_slpcv pil_elr3cv mrvbf mrrtf minfertf lf7rup hstructn geolrngaggn
## 95 0.37579 0.12946 0 0.00000 2 2 1.04286 900.000
## 626 0.47131 0.28907 0 0.00000 2 4 5.14286 300.000
## 607 0.50164 0.16386 0 0.70467 2 3 0.00000 300.000
## 623 0.27703 0.07601 0 0.00000 2 4 5.14286 300.000
## 18 0.79251 0.24706 0 0.00000 2 3 1.04286 2.588
## 470 0.65664 0.24246 0 0.00000 2 4 1.04286 300.000
## elevationm wr_unrn solpawhcn slopern MIN_AGE_MA HubDist month_collection
## 95 627.5781 17.85714 96 0.01311 2454 77.101771 April
## 626 542.7686 10.00000 100 0.04463 2597 7.811622 June
## 607 570.4954 0.00000 157 0.09306 2597 16.627614 September
## 623 566.4947 10.00000 100 0.00955 2597 7.772825 November
## 18 525.2358 17.85714 96 4.24974 2494 21.317005 December
## 470 646.1981 17.85714 96 0.20195 2494 15.040323 June
## UNITNAME
## 95 Brockman Iron Formation
## 626 Marra Mamba Iron Formation
## 607 Marra Mamba Iron Formation
## 623 Marra Mamba Iron Formation
## 18 Mount McRae Shale and Mount Sylvia Formation
## 470 Mount McRae Shale and Mount Sylvia Formation
## ROCKTYPE1
## 95 sedimentary other chemical or biochemical
## 626 sedimentary other chemical or biochemical
## 607 sedimentary other chemical or biochemical
## 623 sedimentary other chemical or biochemical
## 18 sedimentary siliciclastic
## 470 sedimentary siliciclastic
## FORMATION HubName
## 95 Brockman Iron Formation syncline, exposed
## 626 Marra Mamba Iron Formation exposed
## 607 Marra Mamba Iron Formation anticline, exposed
## 623 Marra Mamba Iron Formation exposed
## 18 Mount McRae Shale and Mount Sylvia Formation anticline, exposed
## 470 Mount McRae Shale and Mount Sylvia Formation anticline, exposed
## true_troglofauna
## 95 1
## 626 1
## 607 1
## 623 1
## 18 0
## 470 1
dim(finalTest)
## [1] 81 28
summary(finalTest)
## trngcv bio14 slope rugg500s
## Min. :8.808 Min. :0.01801 Min. : 0.2666 Min. :0.0407
## 1st Qu.:9.244 1st Qu.:0.11957 1st Qu.: 2.0234 1st Qu.:1.4870
## Median :9.320 Median :0.17200 Median : 3.3626 Median :2.7764
## Mean :9.335 Mean :0.17983 Mean : 5.0533 Mean :3.3959
## 3rd Qu.:9.410 3rd Qu.:0.22522 3rd Qu.: 7.4120 3rd Qu.:5.2247
## Max. :9.889 Max. :0.42288 Max. :15.8156 Max. :9.9756
##
## pil_twim pil_twicv pil_topocv pil_slps
## Min. : 5.749 Min. :0.04324 Min. :0.00000 Min. : 0.1344
## 1st Qu.: 7.738 1st Qu.:0.11417 1st Qu.:0.00000 1st Qu.: 1.2658
## Median : 8.587 Median :0.16848 Median :0.02168 Median : 3.9729
## Mean : 8.762 Mean :0.17586 Mean :0.07001 Mean : 4.5940
## 3rd Qu.: 9.944 3rd Qu.:0.23585 3rd Qu.:0.13900 3rd Qu.: 6.9364
## Max. :11.582 Max. :0.36698 Max. :0.32611 Max. :12.7276
##
## pil_slpcv pil_elr3cv mrvbf mrrtf
## Min. :0.1797 Min. :0.0359 Min. :0.0000 Min. :0.00000
## 1st Qu.:0.3642 1st Qu.:0.1489 1st Qu.:0.0000 1st Qu.:0.00000
## Median :0.4696 Median :0.2056 Median :0.0000 Median :0.00000
## Mean :0.5129 Mean :0.2297 Mean :0.2218 Mean :0.08481
## 3rd Qu.:0.6206 3rd Qu.:0.3127 3rd Qu.:0.0000 3rd Qu.:0.00000
## Max. :1.6098 Max. :0.5645 Max. :2.8219 Max. :1.88966
##
## minfertf lf7rup hstructn geolrngaggn
## Min. :2.000 Min. :1.000 Min. : 0.000 Min. : 0.0115
## 1st Qu.:2.000 1st Qu.:2.000 1st Qu.: 1.043 1st Qu.: 2.5880
## Median :2.000 Median :3.000 Median : 1.043 Median : 2.5880
## Mean :2.235 Mean :3.099 Mean : 1.535 Mean :165.4988
## 3rd Qu.:2.000 3rd Qu.:4.000 3rd Qu.: 1.043 3rd Qu.:300.0000
## Max. :3.000 Max. :7.000 Max. :15.429 Max. :900.0000
##
## elevationm wr_unrn solpawhcn slopern
## Min. :515.2 Min. : 0.00 Min. : 96.0 Min. :0.00179
## 1st Qu.:559.7 1st Qu.:17.86 1st Qu.: 96.0 1st Qu.:0.01995
## Median :570.5 Median :17.86 Median : 96.0 Median :0.04257
## Mean :582.6 Mean :15.42 Mean :102.1 Mean :0.24617
## 3rd Qu.:606.6 3rd Qu.:17.86 3rd Qu.: 96.0 3rd Qu.:0.10689
## Max. :726.1 Max. :17.86 Max. :157.0 Max. :4.28009
##
## MIN_AGE_MA HubDist month_collection
## Min. :2454 Min. : 1.931 September:13
## 1st Qu.:2454 1st Qu.:16.501 December :12
## Median :2454 Median :21.793 January :11
## Mean :2482 Mean :40.231 March : 9
## 3rd Qu.:2494 3rd Qu.:76.311 May : 9
## Max. :2597 Max. :88.221 June : 7
## (Other) :20
## UNITNAME
## Brockman Iron Formation :51
## Marra Mamba Iron Formation :10
## Mount McRae Shale and Mount Sylvia Formation:19
## Wittenoom Formation : 1
##
##
##
## ROCKTYPE1
## sedimentary carbonate : 1
## sedimentary other chemical or biochemical:61
## sedimentary siliciclastic :19
##
##
##
##
## FORMATION
## Brockman Iron Formation :51
## Marra Mamba Iron Formation :10
## Mount McRae Shale and Mount Sylvia Formation:19
## Wittenoom Formation : 1
##
##
##
## HubName
## anticline, exposed :43
## exposed : 6
## strike-slip, exposed, showing relative dextral displacement: 2
## syncline, exposed :30
##
##
##
## true_troglofauna
## 0:29
## 1:52
##
##
##
##
##
names(finalTest)
## [1] "trngcv" "bio14" "slope" "rugg500s"
## [5] "pil_twim" "pil_twicv" "pil_topocv" "pil_slps"
## [9] "pil_slpcv" "pil_elr3cv" "mrvbf" "mrrtf"
## [13] "minfertf" "lf7rup" "hstructn" "geolrngaggn"
## [17] "elevationm" "wr_unrn" "solpawhcn" "slopern"
## [21] "MIN_AGE_MA" "HubDist" "month_collection" "UNITNAME"
## [25] "ROCKTYPE1" "FORMATION" "HubName" "true_troglofauna"
ContraProva<-data_test$true_troglofauna
summary(ContraProva)
## 0 1
## 44 79
get_confusion_elements <- function(caret_confusion_matrix) {
tp <- as.numeric(caret_confusion_matrix$table[4]) # true positives
fn <- as.numeric(caret_confusion_matrix$table[3]) # false negatives
fp <- as.numeric(caret_confusion_matrix$table[2]) # false positives
tn <- as.numeric(caret_confusion_matrix$table[1]) # true negatives
return( c(tp, fp, tn, fn) )
}
calculate_mcc <- function(tp, fp, tn, fn) {
# calculates Matthews correlation coefficient
# tp - true positives
# fp - false positives
# tn - true negatives
# fn - false negatives
mcc <- ((tp * tn) - (fp * fn)) /
(sqrt( (tp + fp) * (tp + fn)) * sqrt((tn + fp) * (tn + fn)) )
return(mcc)
}
calculate_mcc1 <- function(caret_confusion_matrix) {
# calculates Matthews correlation coefficient
# tp - true positives
# fp - false positives
# tn - true negatives
# fn - false negatives
mcc <- ((caret_confusion_matrix$table[4] * caret_confusion_matrix$table[1]) - (caret_confusion_matrix$table[2] * caret_confusion_matrix$table[3])) /
(sqrt( (caret_confusion_matrix$table[4] + caret_confusion_matrix$table[2]) * (caret_confusion_matrix$table[4] + caret_confusion_matrix$table[3])) * sqrt((caret_confusion_matrix$table[1] + caret_confusion_matrix$table[2]) * (caret_confusion_matrix$table[1] + caret_confusion_matrix$table[3])) )
return(mcc)
}
calculate_F2 <- function(CM_predictions) {
dbF2_11<-((1+2^2)*CM_predictions$byClass["Precision"]*CM_predictions$byClass["Sensitivity"])/(2^2*CM_predictions$byClass["Precision"] + CM_predictions$byClass["Sensitivity"])
dbF2_11<-as.numeric(dbF2_11)
return(dbF2_11)
}
set.seed(78945)
grid <- expand.grid(.mtry=seq(from = 2, to = 26, by = 2))
trControl <- trainControl(
method = "repeatedcv", # Resampling method
repeats = 10, # Number of repetitions for repeated cross-validation
number = 5, # Number of folds in each iteration of cross-validation
classProbs = TRUE, # Calculate class probabilities
savePredictions = "final", # Save final predictions
summaryFunction = twoClassSummary # Function for summarizing results (assumed to be defined elsewhere)
)
rf_mtry <- train(
make.names(true_troglofauna) ~ ., # Formula for the model, predicting 'true_troglofauna' based on other columns
data = data_train, # Training data
method = "rf", # Random Forest method
strata = data_train$true_troglofauna, # Stratification based on the target variable
sampsize = c(min(sum(data_train$true_troglofauna == 0), sum(data_train$true_troglofauna == 1)),
min(sum(data_train$true_troglofauna == 1), sum(data_train$true_troglofauna == 1))),
metric = "ROC", # Evaluation metric (Receiver Operating Characteristic)
tuneGrid = grid,
trControl = trControl, # Control parameters for the training process
importance = TRUE, # Calculate variable importance
ntree = 500 # Number of trees in the Random Forest
)
#In the above code, sampsize = rep(sum(training$class == 1), 2) means both the classes will have same frequency.e.g. sampsize = c(100 cases of 0, 100 cases of 1).
rf_mtry
## Random Forest
##
## 205 samples
## 27 predictor
## 2 classes: 'X0', 'X1'
##
## No pre-processing
## Resampling: Cross-Validated (5 fold, repeated 10 times)
## Summary of sample sizes: 165, 163, 164, 164, 164, 164, ...
## Resampling results across tuning parameters:
##
## mtry ROC Sens Spec
## 2 0.7042073 0.3484762 0.9099145
## 4 0.6867728 0.4027619 0.8428490
## 6 0.6809710 0.4026667 0.8321937
## 8 0.6790754 0.4027619 0.8153846
## 10 0.6790832 0.3973333 0.8092877
## 12 0.6815026 0.4149524 0.8100570
## 14 0.6822932 0.4069524 0.8031339
## 16 0.6812318 0.4124762 0.8032194
## 18 0.6809225 0.4080952 0.8054416
## 20 0.6817251 0.4125714 0.8062678
## 22 0.6827418 0.4000952 0.7947578
## 24 0.6824823 0.4162857 0.7986610
## 26 0.6835513 0.4080952 0.7940456
##
## ROC was used to select the optimal model using the largest value.
## The final value used for the model was mtry = 2.
getTrainPerf(rf_mtry)
## TrainROC TrainSens TrainSpec method
## 1 0.7042073 0.3484762 0.9099145 rf
summary(rf_mtry)
## Length Class Mode
## call 8 -none- call
## type 1 -none- character
## predicted 205 factor numeric
## err.rate 1500 -none- numeric
## confusion 6 -none- numeric
## votes 410 matrix numeric
## oob.times 205 -none- numeric
## classes 2 -none- character
## importance 176 -none- numeric
## importanceSD 132 -none- numeric
## localImportance 0 -none- NULL
## proximity 0 -none- NULL
## ntree 1 -none- numeric
## mtry 1 -none- numeric
## forest 14 -none- list
## y 205 factor numeric
## test 0 -none- NULL
## inbag 0 -none- NULL
## xNames 44 -none- character
## problemType 1 -none- character
## tuneValue 1 data.frame list
## obsLevels 2 -none- character
## param 4 -none- list
rf_mtry$bestTune$mtry
## [1] 2
rf_mtry$finalModel
##
## Call:
## randomForest(x = x, y = y, ntree = 500, mtry = param$mtry, strata = ..1, sampsize = ..2, importance = TRUE)
## Type of random forest: classification
## Number of trees: 500
## No. of variables tried at each split: 2
##
## OOB estimate of error rate: 29.27%
## Confusion matrix:
## X0 X1 class.error
## X0 25 49 0.66216216
## X1 11 120 0.08396947
rf_mtry$finalModel$confusion
## X0 X1 class.error
## X0 25 49 0.66216216
## X1 11 120 0.08396947
summary(rf_mtry$pred)
## mtry pred obs X0 X1
## Min. :2 X0: 376 X0: 740 Min. :0.0120 Min. :0.0460
## 1st Qu.:2 X1:1674 X1:1310 1st Qu.:0.2105 1st Qu.:0.5485
## Median :2 Median :0.3260 Median :0.6740
## Mean :2 Mean :0.3467 Mean :0.6533
## 3rd Qu.:2 3rd Qu.:0.4515 3rd Qu.:0.7895
## Max. :2 Max. :0.9540 Max. :0.9880
## rowIndex Resample
## Min. : 1 Length:2050
## 1st Qu.: 52 Class :character
## Median :103 Mode :character
## Mean :103
## 3rd Qu.:154
## Max. :205
summary(rf_mtry$pred$pred)
## X0 X1
## 376 1674
summary(rf_mtry$pred$obs)
## X0 X1
## 740 1310
head(rf_mtry$pred$X0,20)
## [1] 0.144 0.324 0.522 0.486 0.314 0.184 0.652 0.058 0.342 0.478 0.158 0.484
## [13] 0.378 0.118 0.824 0.332 0.282 0.018 0.360 0.362
head(rf_mtry$pred$X1,20)
## [1] 0.856 0.676 0.478 0.514 0.686 0.816 0.348 0.942 0.658 0.522 0.842 0.516
## [13] 0.622 0.882 0.176 0.668 0.718 0.982 0.640 0.638
summary(data_train$true_troglofauna)
## 0 1
## 74 131
head(rf_mtry$pred,20)
## mtry pred obs X0 X1 rowIndex Resample
## 1 2 X1 X1 0.144 0.856 165 Fold1.Rep02
## 2 2 X1 X0 0.324 0.676 203 Fold2.Rep02
## 3 2 X0 X0 0.522 0.478 174 Fold1.Rep02
## 4 2 X1 X1 0.486 0.514 133 Fold1.Rep02
## 5 2 X1 X0 0.314 0.686 169 Fold1.Rep02
## 6 2 X1 X1 0.184 0.816 147 Fold1.Rep02
## 7 2 X0 X0 0.652 0.348 163 Fold1.Rep02
## 8 2 X1 X1 0.058 0.942 97 Fold3.Rep02
## 9 2 X1 X0 0.342 0.658 175 Fold1.Rep02
## 10 2 X1 X0 0.478 0.522 106 Fold3.Rep02
## 11 2 X1 X1 0.158 0.842 25 Fold3.Rep02
## 12 2 X1 X1 0.484 0.516 4 Fold1.Rep10
## 13 2 X1 X1 0.378 0.622 54 Fold4.Rep02
## 14 2 X1 X1 0.118 0.882 200 Fold2.Rep02
## 15 2 X0 X0 0.824 0.176 84 Fold3.Rep02
## 16 2 X1 X1 0.332 0.668 87 Fold3.Rep02
## 17 2 X1 X1 0.282 0.718 123 Fold1.Rep02
## 18 2 X1 X1 0.018 0.982 7 Fold1.Rep10
## 19 2 X1 X1 0.360 0.640 26 Fold2.Rep01
## 20 2 X1 X0 0.362 0.638 154 Fold5.Rep09
sapply(rf_mtry$pred, class)
## mtry pred obs X0 X1 rowIndex
## "numeric" "factor" "factor" "numeric" "numeric" "integer"
## Resample
## "character"
rf_mtry$results$Sens
## [1] 0.3484762 0.4027619 0.4026667 0.4027619 0.3973333 0.4149524 0.4069524
## [8] 0.4124762 0.4080952 0.4125714 0.4000952 0.4162857 0.4080952
rf_mtry$results$SensSD
## [1] 0.09915507 0.11913152 0.11058203 0.12022581 0.10725093 0.11698311
## [7] 0.10857198 0.11577380 0.12211690 0.11574530 0.12665304 0.13353480
## [13] 0.12062224
rf_mtry$results$Spec
## [1] 0.9099145 0.8428490 0.8321937 0.8153846 0.8092877 0.8100570 0.8031339
## [8] 0.8032194 0.8054416 0.8062678 0.7947578 0.7986610 0.7940456
rf_mtry$results$SpecSD
## [1] 0.05863530 0.06689955 0.06557887 0.07675331 0.07793853 0.06927836
## [7] 0.08024981 0.07738255 0.06959058 0.07771896 0.07938952 0.06657823
## [13] 0.07312943
rf_mtry$results$ROC
## [1] 0.7042073 0.6867728 0.6809710 0.6790754 0.6790832 0.6815026 0.6822932
## [8] 0.6812318 0.6809225 0.6817251 0.6827418 0.6824823 0.6835513
rf_mtry$results$ROCSD
## [1] 0.08257257 0.09245001 0.09116581 0.09309791 0.09275517 0.09224488
## [7] 0.09542614 0.09565111 0.09641345 0.09529136 0.09392111 0.09614994
## [13] 0.09635085
rf_mtry$finalModel$confusion
## X0 X1 class.error
## X0 25 49 0.66216216
## X1 11 120 0.08396947
rf_mtry$finalModel$forest$cutoff
## [1] 0.5 0.5
result<-data.frame(Sens=rf_mtry$results$Sens,SensSD= rf_mtry$results$SensSD,Spec=rf_mtry$results$Spec,SpecSD= rf_mtry$results$SpecSD, ROC=rf_mtry$results$ROC,ROCSD= rf_mtry$results$ROCSD)
result
## Sens SensSD Spec SpecSD ROC ROCSD
## 1 0.3484762 0.09915507 0.9099145 0.05863530 0.7042073 0.08257257
## 2 0.4027619 0.11913152 0.8428490 0.06689955 0.6867728 0.09245001
## 3 0.4026667 0.11058203 0.8321937 0.06557887 0.6809710 0.09116581
## 4 0.4027619 0.12022581 0.8153846 0.07675331 0.6790754 0.09309791
## 5 0.3973333 0.10725093 0.8092877 0.07793853 0.6790832 0.09275517
## 6 0.4149524 0.11698311 0.8100570 0.06927836 0.6815026 0.09224488
## 7 0.4069524 0.10857198 0.8031339 0.08024981 0.6822932 0.09542614
## 8 0.4124762 0.11577380 0.8032194 0.07738255 0.6812318 0.09565111
## 9 0.4080952 0.12211690 0.8054416 0.06959058 0.6809225 0.09641345
## 10 0.4125714 0.11574530 0.8062678 0.07771896 0.6817251 0.09529136
## 11 0.4000952 0.12665304 0.7947578 0.07938952 0.6827418 0.09392111
## 12 0.4162857 0.13353480 0.7986610 0.06657823 0.6824823 0.09614994
## 13 0.4080952 0.12062224 0.7940456 0.07312943 0.6835513 0.09635085
rf_mtry$finalModel$forest$cutoff
## [1] 0.5 0.5
V_Imp_rf_mtry <- varImp(rf_mtry, scale = FALSE)
V_Imp_rf_mtry
## rf variable importance
##
## only 20 most important variables shown (out of 44)
##
## Importance
## month_collectionMay 7.524
## month_collectionDecember 5.275
## month_collectionSeptember 4.272
## elevationm 4.088
## month_collectionApril 3.641
## trngcv 3.495
## HubDist 3.431
## mrrtf 3.402
## month_collectionOctober 3.353
## month_collectionFebruary 3.296
## rugg500s 3.048
## month_collectionMarch 2.851
## slope 2.848
## hstructn 2.796
## slopern 2.788
## UNITNAMEWittenoom Formation 2.653
## bio14 2.649
## pil_slps 2.638
## wr_unrn 2.562
## month_collectionJune 2.325
plot(V_Imp_rf_mtry, main="Variable Importance - Area_01")
V_Imp_rf_mtry <- varImp(rf_mtry, scale = TRUE)
V_Imp_rf_mtry
## rf variable importance
##
## only 20 most important variables shown (out of 44)
##
## Importance
## month_collectionMay 100.00
## month_collectionDecember 72.70
## month_collectionSeptember 60.53
## elevationm 58.30
## month_collectionApril 52.88
## trngcv 51.10
## HubDist 50.33
## mrrtf 49.98
## month_collectionOctober 49.38
## month_collectionFebruary 48.70
## rugg500s 45.68
## month_collectionMarch 43.29
## slope 43.26
## hstructn 42.62
## slopern 42.52
## UNITNAMEWittenoom Formation 40.89
## bio14 40.85
## pil_slps 40.70
## wr_unrn 39.78
## month_collectionJune 36.91
plot(V_Imp_rf_mtry, main="Variable Importance - Area_01")
library(pdp)
## Warning: package 'pdp' was built under R version 4.3.3
name2 <- c("1", "2", "3", "4", "5", "6", "7", "8", "9", "10", "11", "12")
pd_month_collection <- partial(rf_mtry, pred.var = "month_collection", quantiles = FALSE, prob = TRUE, which.class = "X1", grid.resolution = 100)
# Create the plot
plot(pd_month_collection,
main = "Area 1 - Month of collection",
ylim = c(0.0, 1.0),
names = name2,
cex.lab = 1.4, # Font size for axis titles
cex.axis = 1.9, # Font size for axis numbers
cex.main = 1.9, # Font size for the title
cex = 1.6, # Font size for any other text
ylab = "Predicted outcome (yhat)" # Set the y-axis title
)
# Add text annotations
text(1.5, 0.1,
"1 - Jan\n2 - Feb\n3 - Mar\n4 - Apr\n5 - May\n6 - Jun",
cex = 1.4,
adj = c(0, 0))
text(3.5, 0.1,
"7 - Jul\n8 - Aug\n9 - Sept\n10 - Oct\n11 - Nov\n12 - Dec",
cex = 1.4,
adj = c(0, 0))
# Define names for the categories in UNITNAME
unitname_labels <- c("Brockman Iron Formation",
"Wittenoom Formation",
"Mount McRae Shale and Mount Sylvia Formation",
"Marra Mamba Iron Formation")
# Compute partial dependence for UNITNAME
pd_unitname <- partial(rf_mtry, pred.var = "UNITNAME", quantiles = FALSE, prob = TRUE, which.class = "X1", grid.resolution = 100)
# Create the plot
plot(pd_unitname,
main = "Partial Dependence of UNITNAME",
ylim = c(0.0, 1.0),
names = unitname_labels,
cex.lab = 1, # Font size for axis titles
cex.axis = 1, # Font size for axis numbers
cex.main = 1.9, # Font size for the title
cex = 1.6, # Font size for any other text
ylab = "Predicted Outcome (yhat)", # Set the y-axis title
las = 2 # Rotate x-axis labels for better readability
)
# Add text annotations if needed (example for grouping units)
text(1.5, 0.1,
"Brockman Iron Formation\nWittenoom Formation",
cex = 1.4,
adj = c(0, 0))
text(3.5, 0.1,
"Mount McRae Shale and Mount Sylvia Formation\nMarra Mamba Iron Formation",
cex = 1.4,
adj = c(0, 0))
pd_hstructn <- partial(rf_mtry, pred.var = "hstructn",type = "classification",prob=TRUE, which.class="X1",grid.resolution=100)
plot(pd_hstructn, main = "Area_01 - Hydrological scores for grades of pedality", ylab = "Predicted outcome (yhat)",
cex.lab = 1.9, # Font size for axis titles
cex.axis = 1.9, # Font size for axis numbers
cex.main = 1.9, # Font size for the title
cex = 1.2) # Font size for any other text
pd_wr_unrn <- partial(rf_mtry, pred.var = "wr_unrn",type = "classification",prob=TRUE, which.class="X1",grid.resolution=100)
plot(pd_wr_unrn, main = "Area_01 - Proportion of soil with unreliable water retention properties", ylab = "Predicted outcome (yhat)",
cex.lab = 1.9, # Font size for axis titles
cex.axis = 1.9, # Font size for axis numbers
cex.main = 1.9, # Font size for the title
cex = 1.2) # Font size for any other text
pd_elevationm <- partial(rf_mtry, pred.var = "elevationm",type = "classification",prob=TRUE, which.class="X1",grid.resolution=100)
plot(pd_elevationm, main = "Area_01 - Mean elevation", ylab = "Predicted outcome (yhat)",
cex.lab = 1.9, # Font size for axis titles
cex.axis = 1.9, # Font size for axis numbers
cex.main = 1.9, # Font size for the title
cex = 1.2) # Font size for any other text
# Histogram of elevationm
hist(data2$elevationm,
main = "Histogram of elevationm",
xlab = "elevationm",
border = "black",
cex.lab = 1.9, # Font size for axis titles
cex.axis = 1.9, # Font size for axis numbers
cex.main = 1.9, # Font size for the title
cex = 1.9, # Font size for any other text
col = "darkgoldenrod2") # Set color for histogram bars
pd_trngcv <- partial(rf_mtry, pred.var = "trngcv",type = "classification",prob=TRUE, which.class="X1",grid.resolution=100)
plot(pd_trngcv, main = "Area_01 - C of V of monthly diurnal temperature range (index)", ylab = "Predicted outcome (yhat)",
cex.lab = 1.9, # Font size for axis titles
cex.axis = 1.9, # Font size for axis numbers
cex.main = 1.9, # Font size for the title
cex = 1.9) # Font size for any other text
# Histogram of trngcv
hist(data2$trngcv,
main = "Histogram of trngcv",
xlab = "trngcv",
border = "black",
cex.lab = 1.9, # Font size for axis titles
cex.axis = 1.9, # Font size for axis numbers
cex.main = 1.9, # Font size for the title
cex = 1.9, # Font size for any other text
col = "darkgoldenrod2") # Set color for histogram bars
pd_HubDist <- partial(rf_mtry, pred.var = "HubDist",type = "classification",prob=TRUE, which.class="X1",grid.resolution=100)
plot(pd_HubDist, main = "Area_01 - Minimum distance to a linear structure in km", ylab = "Predicted outcome (yhat)",
cex.lab = 1.9, # Font size for axis titles
cex.axis = 1.9, # Font size for axis numbers
cex.main = 1.9, # Font size for the title
cex = 1.9) # Font size for any other text)
#Gráfico com contorno. Para centralizar o título use theme()
#https://stackoverflow.com/questions/60678369/center-allign-the-title-in-autoplot
plot.pd_HubDist <- autoplot(pd_HubDist, main = "Area_01 - HubDist", contour = TRUE)
plot.pd_HubDist+theme(plot.title = element_text(hjust = 0.5))
# Histogram of HubDist
hist(data2$HubDist,
main = "Histogram of HubDist",
xlab = "HubDist",
border = "black",
cex.lab = 1.9, # Font size for axis titles
cex.axis = 1.9, # Font size for axis numbers
cex.main = 1.9, # Font size for the title
cex = 1.9, # Font size for any other text
col = "darkgoldenrod2") # Set color for histogram bars
pd_mrrtf <- partial(rf_mtry, pred.var = "mrrtf",type = "classification",prob=TRUE, which.class="X1",grid.resolution=100)
plot(pd_mrrtf, main = "Area_01 - Multi-resolution ridgetop flatness index", ylab = "Predicted outcome (yhat)",
cex.lab = 1.9, # Font size for axis titles
cex.axis = 1.9, # Font size for axis numbers
cex.main = 1.9, # Font size for the title
cex = 1.9) # Font size for any other text
# Histogram of mrrtf
hist(data2$mrrtf,
main = "Histogram of mrrtf",
xlab = "mrrtf",
border = "black",
cex.lab = 1.9, # Font size for axis titles
cex.axis = 1.9, # Font size for axis numbers
cex.main = 1.9, # Font size for the title
cex = 1.9, # Font size for any other text
col = "darkgoldenrod2") # Set color for histogram bars
pd_rugg500s <- partial(rf_mtry, pred.var = "rugg500s",type = "classification",prob=TRUE, which.class="X1",grid.resolution=100)
plot(pd_rugg500s, main = "Area_01 - Standard deviation of terrain ruggedness", ylab = "Predicted outcome (yhat)",
cex.lab = 1.9, # Font size for axis titles
cex.axis = 1.9, # Font size for axis numbers
cex.main = 1.9, # Font size for the title
cex = 1.9) # Font size for any other text
# Histogram of rugg500s
hist(data2$rugg500s,
main = "Histogram of rugg500s",
xlab = "rugg500s",
border = "black",
cex.lab = 1.9, # Font size for axis titles
cex.axis = 1.9, # Font size for axis numbers
cex.main = 1.9, # Font size for the title
cex = 1.9, # Font size for any other text
col = "darkgoldenrod2") # Set color for histogram bars
# Export to PDF
pdf("vara_1.pdf", width = 14, height = 20) # Set appropriate dimensions
# Set up a 5x2 layout
par(mfrow = c(5, 2), # 5 rows and 2 columns
mar = c(4.5, 4.5, 2, 1), # Margins for each plot (bottom, left, top, right)
oma = c(2, 2, 2, 2)) # Outer margins
# 1. elevationm
plot(pd_elevationm, main = "Area 1 - Mean elevation", ylab = "Predicted outcome (yhat)",
cex.lab = 1.9, cex.axis = 1.9, cex.main = 1.9, cex = 1.2)
rug(data_train$elevationm)
hist(data2$elevationm,
main = "Histogram of elevationm", xlab = "elevationm",
border = "black", cex.lab = 1.9, cex.axis = 1.9, cex.main = 1.9, cex = 1.9, col = "darkgoldenrod2")
rug(data_train$elevationm)
# 2. trngcv
plot(pd_trngcv, main = "Area 1 - C of V of monthly diurnal temperature range (index)", ylab = "Predicted outcome (yhat)",
cex.lab = 1.9, cex.axis = 1.9, cex.main = 1.9, cex = 1.9)
rug(data_train$trngcv)
hist(data2$trngcv,
main = "Histogram of trngcv", xlab = "trngcv",
border = "black", cex.lab = 1.9, cex.axis = 1.9, cex.main = 1.9, cex = 1.9, col = "darkgoldenrod2")
rug(data_train$trngcv)
# 3. HubDist
plot(pd_HubDist, main = "Area 1 - Distance to the nearest linear structure in km", ylab = "Predicted outcome (yhat)",
cex.lab = 1.9, cex.axis = 1.9, cex.main = 1.9, cex = 1.9)
rug(data_train$HubDist)
hist(data2$HubDist,
main = "Histogram of HubDist", xlab = "HubDist",
border = "black", cex.lab = 1.9, cex.axis = 1.9, cex.main = 1.9, cex = 1.9, col = "darkgoldenrod2")
rug(data_train$HubDist)
# 4. mrrtf
plot(pd_mrrtf, main = "Area 1 - Multi-resolution ridgetop flatness index", ylab = "Predicted outcome (yhat)",
cex.lab = 1.9, cex.axis = 1.9, cex.main = 1.9, cex = 1.9)
rug(data_train$mrrtf)
hist(data2$mrrtf,
main = "Histogram of mrrtf", xlab = "mrrtf",
border = "black", cex.lab = 1.9, cex.axis = 1.9, cex.main = 1.9, cex = 1.9, col = "darkgoldenrod2")
rug(data_train$mrrtf)
# 5. rugg500s
plot(pd_rugg500s, main = "Area 1 - Standard deviation of terrain ruggedness", ylab = "Predicted outcome (yhat)",
cex.lab = 1.9, cex.axis = 1.9, cex.main = 1.9, cex = 1.9)
rug(data_train$rugg500s)
hist(data2$rugg500s,
main = "Histogram of rugg500s", xlab = "rugg500s",
border = "black", cex.lab = 1.9, cex.axis = 1.9, cex.main = 1.9, cex = 1.9, col = "darkgoldenrod2")
rug(data_train$rugg500s)
# Close the PDF device
dev.off()
## png
## 2
dim(data_test)
## [1] 123 28
colnames(data_test)
## [1] "trngcv" "bio14" "slope" "rugg500s"
## [5] "pil_twim" "pil_twicv" "pil_topocv" "pil_slps"
## [9] "pil_slpcv" "pil_elr3cv" "mrvbf" "mrrtf"
## [13] "minfertf" "lf7rup" "hstructn" "geolrngaggn"
## [17] "elevationm" "wr_unrn" "solpawhcn" "slopern"
## [21] "MIN_AGE_MA" "HubDist" "month_collection" "UNITNAME"
## [25] "ROCKTYPE1" "FORMATION" "HubName" "true_troglofauna"
selectcol_data_test <- data_test[, -c((ncol(data_test) - 0):ncol(data_test))]
dim(selectcol_data_test)
## [1] 123 27
names(selectcol_data_test)
## [1] "trngcv" "bio14" "slope" "rugg500s"
## [5] "pil_twim" "pil_twicv" "pil_topocv" "pil_slps"
## [9] "pil_slpcv" "pil_elr3cv" "mrvbf" "mrrtf"
## [13] "minfertf" "lf7rup" "hstructn" "geolrngaggn"
## [17] "elevationm" "wr_unrn" "solpawhcn" "slopern"
## [21] "MIN_AGE_MA" "HubDist" "month_collection" "UNITNAME"
## [25] "ROCKTYPE1" "FORMATION" "HubName"
set.seed(78945)
predictions <- predict(rf_mtry, newdata = selectcol_data_test,type = "prob")
head(predictions,5)
## X0 X1
## 538 0.634 0.366
## 580 0.290 0.710
## 449 0.552 0.448
## 149 0.086 0.914
## 304 0.104 0.896
dim(predictions)
## [1] 123 2
sapply(predictions,class)
## X0 X1
## "numeric" "numeric"
summary(predictions)
## X0 X1
## Min. :0.0440 Min. :0.2060
## 1st Qu.:0.1740 1st Qu.:0.5420
## Median :0.2960 Median :0.7040
## Mean :0.3257 Mean :0.6743
## 3rd Qu.:0.4580 3rd Qu.:0.8260
## Max. :0.7940 Max. :0.9560
set.seed(78945)
predictions_raw <- predict(rf_mtry, newdata = selectcol_data_test,type = "raw")#The number/class predictions ("raw").
head(predictions_raw,5)
## [1] X0 X1 X0 X1 X1
## Levels: X0 X1
length(predictions_raw)
## [1] 123
head(sapply(predictions_raw,class))
## [1] "factor" "factor" "factor" "factor" "factor" "factor"
summary(predictions_raw)
## X0 X1
## 22 101
set.seed(78945)
predictions1 <- predict(rf_mtry, newdata = selectcol_data_test)
head(predictions1,5)
## [1] X0 X1 X0 X1 X1
## Levels: X0 X1
length(predictions1)
## [1] 123
head(sapply(predictions1,class))
## [1] "factor" "factor" "factor" "factor" "factor" "factor"
summary(predictions1)
## X0 X1
## 22 101
levels(predictions1) <- c(0,1)
head(predictions1,5)
## [1] 0 1 0 1 1
## Levels: 0 1
summary(predictions1)
## 0 1
## 22 101
set.seed(78945)
CM_predictions1<-confusionMatrix(predictions1, ContraProva,positive="1")
CM_predictions1
## Confusion Matrix and Statistics
##
## Reference
## Prediction 0 1
## 0 16 6
## 1 28 73
##
## Accuracy : 0.7236
## 95% CI : (0.6357, 0.8004)
## No Information Rate : 0.6423
## P-Value [Acc > NIR] : 0.0350973
##
## Kappa : 0.3235
##
## Mcnemar's Test P-Value : 0.0003164
##
## Sensitivity : 0.9241
## Specificity : 0.3636
## Pos Pred Value : 0.7228
## Neg Pred Value : 0.7273
## Prevalence : 0.6423
## Detection Rate : 0.5935
## Detection Prevalence : 0.8211
## Balanced Accuracy : 0.6438
##
## 'Positive' Class : 1
##
str(CM_predictions1)
## List of 6
## $ positive: chr "1"
## $ table : 'table' int [1:2, 1:2] 16 28 6 73
## ..- attr(*, "dimnames")=List of 2
## .. ..$ Prediction: chr [1:2] "0" "1"
## .. ..$ Reference : chr [1:2] "0" "1"
## $ overall : Named num [1:7] 0.724 0.324 0.636 0.8 0.642 ...
## ..- attr(*, "names")= chr [1:7] "Accuracy" "Kappa" "AccuracyLower" "AccuracyUpper" ...
## $ byClass : Named num [1:11] 0.924 0.364 0.723 0.727 0.723 ...
## ..- attr(*, "names")= chr [1:11] "Sensitivity" "Specificity" "Pos Pred Value" "Neg Pred Value" ...
## $ mode : chr "sens_spec"
## $ dots : list()
## - attr(*, "class")= chr "confusionMatrix"
CM_predictions1$byClass[1]#Sensitivity de CM_predictions1
## Sensitivity
## 0.9240506
CM_predictions1$byClass[2]#Specificity de CM_predictions1
## Specificity
## 0.3636364
CM_predictions1$byClass
## Sensitivity Specificity Pos Pred Value
## 0.9240506 0.3636364 0.7227723
## Neg Pred Value Precision Recall
## 0.7272727 0.7227723 0.9240506
## F1 Prevalence Detection Rate
## 0.8111111 0.6422764 0.5934959
## Detection Prevalence Balanced Accuracy
## 0.8211382 0.6438435
CM_predictions1$byClass["Sensitivity"]
## Sensitivity
## 0.9240506
CM_predictions1$byClass[1]
## Sensitivity
## 0.9240506
CM_predictions1$byClass["Balanced Accuracy"]
## Balanced Accuracy
## 0.6438435
CM_predictions1$byClass[11]
## Balanced Accuracy
## 0.6438435
CM_predictions1$overall
## Accuracy Kappa AccuracyLower AccuracyUpper AccuracyNull
## 0.7235772358 0.3235198965 0.6356994386 0.8003713114 0.6422764228
## AccuracyPValue McnemarPValue
## 0.0350972745 0.0003164226
CM_predictions1$overall["McnemarPValue"]
## McnemarPValue
## 0.0003164226
CM_predictions1$overall[7]
## McnemarPValue
## 0.0003164226
CM_predictions1$table
## Reference
## Prediction 0 1
## 0 16 6
## 1 28 73
tn<-CM_predictions1$table[1]#TN
fp<-CM_predictions1$table[2]#FP
fn<-CM_predictions1$table[3]#FN
tp<-CM_predictions1$table[4]#TP
#Youden's J statistic
J_CM_predictions1<-(CM_predictions1$byClass[1] + CM_predictions1$byClass[2]) - 1
J_CM_predictions1
## Sensitivity
## 0.287687
J_CM_predictions1<-as.numeric(CM_predictions1$byClass[1] + CM_predictions1$byClass[2]) - 1
J_CM_predictions1
## [1] 0.287687
rf_mtry$finalModel$forest$cutoff
## [1] 0.5 0.5
mcc(predictions1, ContraProva)
## [1] 0.3598223
get_confusion_elements(CM_predictions1)
## [1] 73 28 16 6
get_confusion_elements(CM_predictions1)[1]
## [1] 73
calculate_mcc(tp, fp, tn, fn)
## [1] 0.3598223
calculate_mcc1(CM_predictions1)
## [1] 0.3598223
calculate_F2(CM_predictions1)
## [1] 0.8752998
model_pred_class <- ifelse(predictions < 0.5, "X0", "X1")
head(model_pred_class,5)
## X0 X1
## 538 "X1" "X0"
## 580 "X0" "X1"
## 449 "X1" "X0"
## 149 "X0" "X1"
## 304 "X0" "X1"
dim(model_pred_class)
## [1] 123 2
length(ContraProva)
## [1] 123
summary(model_pred_class)
## X0 X1
## Length:123 Length:123
## Class :character Class :character
## Mode :character Mode :character
Test1<-as.factor(model_pred_class[,2])
head(Test1)
## 538 580 449 149 304 15
## X0 X1 X0 X1 X1 X1
## Levels: X0 X1
summary(Test1)
## X0 X1
## 22 101
levels(Test1) <- c(0,1)
summary(Test1)
## 0 1
## 22 101
head(Test1)
## 538 580 449 149 304 15
## 0 1 0 1 1 1
## Levels: 0 1
confusionMatrix(Test1, ContraProva,positive="1")
## Confusion Matrix and Statistics
##
## Reference
## Prediction 0 1
## 0 16 6
## 1 28 73
##
## Accuracy : 0.7236
## 95% CI : (0.6357, 0.8004)
## No Information Rate : 0.6423
## P-Value [Acc > NIR] : 0.0350973
##
## Kappa : 0.3235
##
## Mcnemar's Test P-Value : 0.0003164
##
## Sensitivity : 0.9241
## Specificity : 0.3636
## Pos Pred Value : 0.7228
## Neg Pred Value : 0.7273
## Prevalence : 0.6423
## Detection Rate : 0.5935
## Detection Prevalence : 0.8211
## Balanced Accuracy : 0.6438
##
## 'Positive' Class : 1
##
summary(Test1); summary(predictions1)
## 0 1
## 22 101
## 0 1
## 22 101
model_pred_class <- ifelse(predictions < 0.7, "X0", "X1")
head(model_pred_class,20)
## X0 X1
## 538 "X0" "X0"
## 580 "X0" "X1"
## 449 "X0" "X0"
## 149 "X0" "X1"
## 304 "X0" "X1"
## 15 "X0" "X1"
## 104 "X0" "X1"
## 394 "X0" "X0"
## 392 "X0" "X1"
## 503 "X0" "X1"
## 99 "X0" "X1"
## 198 "X0" "X1"
## 629 "X0" "X1"
## 505 "X0" "X1"
## 442 "X1" "X0"
## 32 "X0" "X0"
## 195 "X0" "X0"
## 30 "X0" "X1"
## 332 "X0" "X1"
## 327 "X0" "X1"
dim(model_pred_class)
## [1] 123 2
length(ContraProva)
## [1] 123
head(sapply(model_pred_class, class))
## X0 X0 X0 X0 X0 X0
## "character" "character" "character" "character" "character" "character"
Test11<-as.factor(model_pred_class[,2])
summary(Test11)
## X0 X1
## 59 64
head(Test11)
## 538 580 449 149 304 15
## X0 X1 X0 X1 X1 X1
## Levels: X0 X1
levels(Test11) <- c(0,1)
summary(Test11)
## 0 1
## 59 64
summary(predictions1)
## 0 1
## 22 101
CM_Test11<-confusionMatrix(Test11, ContraProva,positive="1")
CM_Test11
## Confusion Matrix and Statistics
##
## Reference
## Prediction 0 1
## 0 30 29
## 1 14 50
##
## Accuracy : 0.6504
## 95% CI : (0.5592, 0.7342)
## No Information Rate : 0.6423
## P-Value [Acc > NIR] : 0.46609
##
## Kappa : 0.2926
##
## Mcnemar's Test P-Value : 0.03276
##
## Sensitivity : 0.6329
## Specificity : 0.6818
## Pos Pred Value : 0.7812
## Neg Pred Value : 0.5085
## Prevalence : 0.6423
## Detection Rate : 0.4065
## Detection Prevalence : 0.5203
## Balanced Accuracy : 0.6574
##
## 'Positive' Class : 1
##
str(CM_Test11)
## List of 6
## $ positive: chr "1"
## $ table : 'table' int [1:2, 1:2] 30 14 29 50
## ..- attr(*, "dimnames")=List of 2
## .. ..$ Prediction: chr [1:2] "0" "1"
## .. ..$ Reference : chr [1:2] "0" "1"
## $ overall : Named num [1:7] 0.65 0.293 0.559 0.734 0.642 ...
## ..- attr(*, "names")= chr [1:7] "Accuracy" "Kappa" "AccuracyLower" "AccuracyUpper" ...
## $ byClass : Named num [1:11] 0.633 0.682 0.781 0.508 0.781 ...
## ..- attr(*, "names")= chr [1:11] "Sensitivity" "Specificity" "Pos Pred Value" "Neg Pred Value" ...
## $ mode : chr "sens_spec"
## $ dots : list()
## - attr(*, "class")= chr "confusionMatrix"
CM_Test11$byClass[1]#Sensitivity de CM_predictions1
## Sensitivity
## 0.6329114
CM_Test11$byClass[2]#Specificity de CM_predictions1
## Specificity
## 0.6818182
J_CM_Test11<-as.numeric(CM_Test11$byClass[1] + CM_Test11$byClass[2] - 1) ; J_CM_Test11
## [1] 0.3147296
confusionMatrix(predictions1, ContraProva,positive="1")
## Confusion Matrix and Statistics
##
## Reference
## Prediction 0 1
## 0 16 6
## 1 28 73
##
## Accuracy : 0.7236
## 95% CI : (0.6357, 0.8004)
## No Information Rate : 0.6423
## P-Value [Acc > NIR] : 0.0350973
##
## Kappa : 0.3235
##
## Mcnemar's Test P-Value : 0.0003164
##
## Sensitivity : 0.9241
## Specificity : 0.3636
## Pos Pred Value : 0.7228
## Neg Pred Value : 0.7273
## Prevalence : 0.6423
## Detection Rate : 0.5935
## Detection Prevalence : 0.8211
## Balanced Accuracy : 0.6438
##
## 'Positive' Class : 1
##
dim(finalTest)
## [1] 81 28
names(finalTest)
## [1] "trngcv" "bio14" "slope" "rugg500s"
## [5] "pil_twim" "pil_twicv" "pil_topocv" "pil_slps"
## [9] "pil_slpcv" "pil_elr3cv" "mrvbf" "mrrtf"
## [13] "minfertf" "lf7rup" "hstructn" "geolrngaggn"
## [17] "elevationm" "wr_unrn" "solpawhcn" "slopern"
## [21] "MIN_AGE_MA" "HubDist" "month_collection" "UNITNAME"
## [25] "ROCKTYPE1" "FORMATION" "HubName" "true_troglofauna"
selectcol_finalTest <- finalTest[, -c((ncol(finalTest) - 0):ncol(finalTest))]
dim(selectcol_finalTest)
## [1] 81 27
names(selectcol_finalTest)
## [1] "trngcv" "bio14" "slope" "rugg500s"
## [5] "pil_twim" "pil_twicv" "pil_topocv" "pil_slps"
## [9] "pil_slpcv" "pil_elr3cv" "mrvbf" "mrrtf"
## [13] "minfertf" "lf7rup" "hstructn" "geolrngaggn"
## [17] "elevationm" "wr_unrn" "solpawhcn" "slopern"
## [21] "MIN_AGE_MA" "HubDist" "month_collection" "UNITNAME"
## [25] "ROCKTYPE1" "FORMATION" "HubName"
predictions_2 <- predict(rf_mtry, newdata = selectcol_finalTest,type = "raw")
summary(predictions_2)
## X0 X1
## 10 71
summary(finalTest[, ncol(finalTest)])
## 0 1
## 29 52
levels(predictions_2) <- c(0,1)
summary(predictions_2)
## 0 1
## 10 71
CM_predictions_2<-confusionMatrix(predictions_2, finalTest[, ncol(finalTest)],positive="1")
CM_predictions_2
## Confusion Matrix and Statistics
##
## Reference
## Prediction 0 1
## 0 9 1
## 1 20 51
##
## Accuracy : 0.7407
## 95% CI : (0.6314, 0.8318)
## No Information Rate : 0.642
## P-Value [Acc > NIR] : 0.0387
##
## Kappa : 0.3404
##
## Mcnemar's Test P-Value : 8.568e-05
##
## Sensitivity : 0.9808
## Specificity : 0.3103
## Pos Pred Value : 0.7183
## Neg Pred Value : 0.9000
## Prevalence : 0.6420
## Detection Rate : 0.6296
## Detection Prevalence : 0.8765
## Balanced Accuracy : 0.6456
##
## 'Positive' Class : 1
##
mcc(predictions_2, finalTest[, ncol(finalTest)])
## [1] 0.4242625
calculate_mcc1(CM_predictions_2)
## [1] 0.4242625
calculate_F2(CM_predictions_2)
## [1] 0.9139785
predictions_3<- predict(rf_mtry, newdata = selectcol_finalTest,type = "prob")
summary(predictions_3)
## X0 X1
## Min. :0.0780 Min. :0.2020
## 1st Qu.:0.1940 1st Qu.:0.5880
## Median :0.2980 Median :0.7020
## Mean :0.3123 Mean :0.6877
## 3rd Qu.:0.4120 3rd Qu.:0.8060
## Max. :0.7980 Max. :0.9220
sapply(predictions_3, class)
## X0 X1
## "numeric" "numeric"
head(predictions_3)
## X0 X1
## 95 0.334 0.666
## 626 0.102 0.898
## 607 0.344 0.656
## 623 0.154 0.846
## 18 0.514 0.486
## 470 0.282 0.718
model_pred_class_3 <- ifelse(predictions_3 < 0.5, "X0", "X1")
head(sapply(model_pred_class_3, class))
## X0 X0 X0 X0 X1 X0
## "character" "character" "character" "character" "character" "character"
Test_3<-as.factor(model_pred_class_3[,2])
head(sapply(Test_3, class))
## 95 626 607 623 18 470
## "factor" "factor" "factor" "factor" "factor" "factor"
levels(Test_3)
## [1] "X0" "X1"
head(Test_3)
## 95 626 607 623 18 470
## X1 X1 X1 X1 X0 X1
## Levels: X0 X1
levels(Test_3) <- c(0,1)
head(sapply(Test_3, class))
## 95 626 607 623 18 470
## "factor" "factor" "factor" "factor" "factor" "factor"
levels(Test_3)
## [1] "0" "1"
head(Test_3)
## 95 626 607 623 18 470
## 1 1 1 1 0 1
## Levels: 0 1
summary(Test_3)
## 0 1
## 10 71
CM_predictions_3<-confusionMatrix(Test_3, finalTest[, ncol(finalTest)],positive="1")
CM_predictions_3
## Confusion Matrix and Statistics
##
## Reference
## Prediction 0 1
## 0 9 1
## 1 20 51
##
## Accuracy : 0.7407
## 95% CI : (0.6314, 0.8318)
## No Information Rate : 0.642
## P-Value [Acc > NIR] : 0.0387
##
## Kappa : 0.3404
##
## Mcnemar's Test P-Value : 8.568e-05
##
## Sensitivity : 0.9808
## Specificity : 0.3103
## Pos Pred Value : 0.7183
## Neg Pred Value : 0.9000
## Prevalence : 0.6420
## Detection Rate : 0.6296
## Detection Prevalence : 0.8765
## Balanced Accuracy : 0.6456
##
## 'Positive' Class : 1
##
calculate_mcc1(CM_predictions_3)
## [1] 0.4242625
calculate_F2(CM_predictions_3)
## [1] 0.9139785
testrf_mtry<- predict(rf_mtry)
summary(testrf_mtry)
## X0 X1
## 60 145
levels(testrf_mtry) <- c(0,1);
summary(testrf_mtry)
## 0 1
## 60 145
CM_testrf_mtry<-confusionMatrix(testrf_mtry, data_train[, ncol(data_train)],positive="1");
CM_testrf_mtry
## Confusion Matrix and Statistics
##
## Reference
## Prediction 0 1
## 0 60 0
## 1 14 131
##
## Accuracy : 0.9317
## 95% CI : (0.8881, 0.9622)
## No Information Rate : 0.639
## P-Value [Acc > NIR] : < 2.2e-16
##
## Kappa : 0.8456
##
## Mcnemar's Test P-Value : 0.000512
##
## Sensitivity : 1.0000
## Specificity : 0.8108
## Pos Pred Value : 0.9034
## Neg Pred Value : 1.0000
## Prevalence : 0.6390
## Detection Rate : 0.6390
## Detection Prevalence : 0.7073
## Balanced Accuracy : 0.9054
##
## 'Positive' Class : 1
##
calculate_mcc1(CM_testrf_mtry)
## [1] 0.8558771
calculate_F2(CM_testrf_mtry)
## [1] 0.9790732
testrf_mtry1<- predict(rf_mtry,type="prob")
summary(testrf_mtry1)
## X0 X1
## Min. :0.0200 Min. :0.0400
## 1st Qu.:0.1160 1st Qu.:0.4400
## Median :0.2200 Median :0.7800
## Mean :0.3312 Mean :0.6688
## 3rd Qu.:0.5600 3rd Qu.:0.8840
## Max. :0.9600 Max. :0.9800
model_pred_class_4 <- ifelse(testrf_mtry1 < 0.5, "X0", "X1")
summary(model_pred_class_4)
## X0 X1
## Length:205 Length:205
## Class :character Class :character
## Mode :character Mode :character
Test_4<-as.factor(model_pred_class_4[,2])
head(Test_4)
## 173 50 458 383 24 246
## X0 X1 X0 X1 X1 X1
## Levels: X0 X1
levels(Test_4) <- c(0,1);
summary(Test_4)
## 0 1
## 60 145
CM_testrf_mtry1<-confusionMatrix(Test_4, data_train[, ncol(data_train)],positive="1");
CM_testrf_mtry1
## Confusion Matrix and Statistics
##
## Reference
## Prediction 0 1
## 0 60 0
## 1 14 131
##
## Accuracy : 0.9317
## 95% CI : (0.8881, 0.9622)
## No Information Rate : 0.639
## P-Value [Acc > NIR] : < 2.2e-16
##
## Kappa : 0.8456
##
## Mcnemar's Test P-Value : 0.000512
##
## Sensitivity : 1.0000
## Specificity : 0.8108
## Pos Pred Value : 0.9034
## Neg Pred Value : 1.0000
## Prevalence : 0.6390
## Detection Rate : 0.6390
## Detection Prevalence : 0.7073
## Balanced Accuracy : 0.9054
##
## 'Positive' Class : 1
##
calculate_mcc1(CM_testrf_mtry1)
## [1] 0.8558771
calculate_F2(CM_testrf_mtry1)
## [1] 0.9790732
prediction.probabilities <- predictions[,"X1"]
head(prediction.probabilities,5)
## [1] 0.366 0.710 0.448 0.914 0.896
res.roc <-roc(ContraProva,prediction.probabilities)
## Setting levels: control = 0, case = 1
## Setting direction: controls < cases
res.roc
##
## Call:
## roc.default(response = ContraProva, predictor = prediction.probabilities)
##
## Data: prediction.probabilities in 44 controls (ContraProva 0) < 79 cases (ContraProva 1).
## Area under the curve: 0.7392
str(res.roc)
## List of 15
## $ percent : logi FALSE
## $ sensitivities : num [1:100] 1 1 0.987 0.987 0.987 ...
## $ specificities : num [1:100] 0 0.0227 0.0227 0.0682 0.0909 ...
## $ thresholds : num [1:100] -Inf 0.217 0.233 0.257 0.305 ...
## $ direction : chr "<"
## $ cases : num [1:79] 0.71 0.448 0.914 0.896 0.852 0.92 0.704 0.786 0.736 0.842 ...
## $ controls : num [1:44] 0.366 0.522 0.72 0.876 0.41 0.442 0.548 0.494 0.238 0.542 ...
## $ fun.sesp :function (thresholds, controls, cases, direction)
## $ auc : 'auc' num 0.739
## ..- attr(*, "partial.auc")= logi FALSE
## ..- attr(*, "percent")= logi FALSE
## ..- attr(*, "roc")=List of 15
## .. ..$ percent : logi FALSE
## .. ..$ sensitivities : num [1:100] 1 1 0.987 0.987 0.987 ...
## .. ..$ specificities : num [1:100] 0 0.0227 0.0227 0.0682 0.0909 ...
## .. ..$ thresholds : num [1:100] -Inf 0.217 0.233 0.257 0.305 ...
## .. ..$ direction : chr "<"
## .. ..$ cases : num [1:79] 0.71 0.448 0.914 0.896 0.852 0.92 0.704 0.786 0.736 0.842 ...
## .. ..$ controls : num [1:44] 0.366 0.522 0.72 0.876 0.41 0.442 0.548 0.494 0.238 0.542 ...
## .. ..$ fun.sesp :function (thresholds, controls, cases, direction)
## .. ..$ auc : 'auc' num 0.739
## .. .. ..- attr(*, "partial.auc")= logi FALSE
## .. .. ..- attr(*, "percent")= logi FALSE
## .. .. ..- attr(*, "roc")=List of 8
## .. .. .. ..$ percent : logi FALSE
## .. .. .. ..$ sensitivities: num [1:100] 1 1 0.987 0.987 0.987 ...
## .. .. .. ..$ specificities: num [1:100] 0 0.0227 0.0227 0.0682 0.0909 ...
## .. .. .. ..$ thresholds : num [1:100] -Inf 0.217 0.233 0.257 0.305 ...
## .. .. .. ..$ direction : chr "<"
## .. .. .. ..$ cases : num [1:79] 0.71 0.448 0.914 0.896 0.852 0.92 0.704 0.786 0.736 0.842 ...
## .. .. .. ..$ controls : num [1:44] 0.366 0.522 0.72 0.876 0.41 0.442 0.548 0.494 0.238 0.542 ...
## .. .. .. ..$ fun.sesp :function (thresholds, controls, cases, direction)
## .. .. .. ..- attr(*, "class")= chr "roc"
## .. ..$ call : language roc.default(response = ContraProva, predictor = prediction.probabilities)
## .. ..$ original.predictor: num [1:123] 0.366 0.71 0.448 0.914 0.896 0.852 0.92 0.522 0.704 0.72 ...
## .. ..$ original.response : Factor w/ 2 levels "0","1": 1 2 2 2 2 2 2 1 2 1 ...
## .. ..$ predictor : num [1:123] 0.366 0.71 0.448 0.914 0.896 0.852 0.92 0.522 0.704 0.72 ...
## .. ..$ response : Factor w/ 2 levels "0","1": 1 2 2 2 2 2 2 1 2 1 ...
## .. ..$ levels : chr [1:2] "0" "1"
## .. ..- attr(*, "class")= chr "roc"
## $ call : language roc.default(response = ContraProva, predictor = prediction.probabilities)
## $ original.predictor: num [1:123] 0.366 0.71 0.448 0.914 0.896 0.852 0.92 0.522 0.704 0.72 ...
## $ original.response : Factor w/ 2 levels "0","1": 1 2 2 2 2 2 2 1 2 1 ...
## $ predictor : num [1:123] 0.366 0.71 0.448 0.914 0.896 0.852 0.92 0.522 0.704 0.72 ...
## $ response : Factor w/ 2 levels "0","1": 1 2 2 2 2 2 2 1 2 1 ...
## $ levels : chr [1:2] "0" "1"
## - attr(*, "class")= chr "roc"
head(res.roc$cases,5)
## [1] 0.710 0.448 0.914 0.896 0.852
length(res.roc$cases)
## [1] 79
summary(res.roc$cases)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.2280 0.6450 0.7540 0.7274 0.8360 0.9520
head(res.roc$controls,5)
## [1] 0.366 0.522 0.720 0.876 0.410
length(res.roc$controls)
## [1] 44
summary(res.roc$controls)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.2060 0.4740 0.5430 0.5789 0.7200 0.9560
head(res.roc$thresholds,5)
## [1] -Inf 0.217 0.233 0.257 0.305
tail(res.roc$thresholds,5)
## [1] 0.919 0.921 0.937 0.954 Inf
length(res.roc$thresholds)
## [1] 100
summary(res.roc$thresholds)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## -Inf 0.5180 0.6965 0.8230 Inf
auc(res.roc)#auc() compute the area under the ROC curve.
## Area under the curve: 0.7392
ci.auc(res.roc)#ci.auc() Compute the confidence interval of the AUC, default method = "delong".
## 95% CI: 0.6442-0.8343 (DeLong)
ci.auc(res.roc,method = "bootstrap", boot.n = 10000)#ci.auc() Compute the confidence interval of the AUC, default method = "bootstrap".
## 95% CI: 0.6385-0.8288 (10000 stratified bootstrap replicates)
#A value of x = "all" will return the coordinates for the curve and their associated cutoffs.
rfThresh_all <- coords(res.roc, x = "all", best.method = "youden")
head(rfThresh_all)
## threshold specificity sensitivity
## 1 -Inf 0.00000000 1.0000000
## 2 0.217 0.02272727 1.0000000
## 3 0.233 0.02272727 0.9873418
## 4 0.257 0.06818182 0.9873418
## 5 0.305 0.09090909 0.9873418
## 6 0.335 0.09090909 0.9746835
summary(rfThresh_all)
## threshold specificity sensitivity
## Min. : -Inf Min. :0.0000 Min. :0.0000
## 1st Qu.:0.5180 1st Qu.:0.4318 1st Qu.:0.3513
## Median :0.6965 Median :0.6818 Median :0.6392
## Mean : NaN Mean :0.6405 Mean :0.5903
## 3rd Qu.:0.8230 3rd Qu.:0.9091 3rd Qu.:0.8892
## Max. : Inf Max. :1.0000 Max. :1.0000
dim(rfThresh_all)
## [1] 100 3
#“local maximas”(the local maximas of the ROC curve).
rfThresh_max <- coords(res.roc, x = "local maximas", best.method = "youden")
head(rfThresh_max)
## threshold specificity sensitivity
## 1 0.217 0.02272727 1.0000000
## 2 0.305 0.09090909 0.9873418
## 3 0.422 0.15909091 0.9620253
## 4 0.447 0.20454545 0.9367089
## 5 0.498 0.36363636 0.9240506
## 6 0.503 0.38636364 0.9113924
summary(rfThresh_max)
## threshold specificity sensitivity
## Min. :0.217 Min. :0.02273 Min. :0.0000
## 1st Qu.:0.520 1st Qu.:0.44318 1st Qu.:0.5063
## Median :0.641 Median :0.63636 Median :0.7595
## Mean : Inf Mean :0.60943 Mean :0.6606
## 3rd Qu.:0.760 3rd Qu.:0.82955 3rd Qu.:0.8924
## Max. : Inf Max. :1.00000 Max. :1.0000
dim(rfThresh_max)
## [1] 27 3
#Best thresholds: If x="best" e best.method = "youden", Youden’s J statistic (Youden, 1950) is employed. The optimal cut-off is the threshold that maximizes the distance to the identity (diagonal) line.
rfThresh_youden <- coords(res.roc, x = "best", best.method = "youden")
rfThresh_youden
## threshold specificity sensitivity
## 1 0.554 0.5454545 0.8607595
rfThresh_youden[1,1]
## [1] 0.554
#Best thresholds: If x="best" e best.method = "closest.topleft", The optimal threshold is the point closest to the top-left part of the plot with perfect sensitivity or specificity.
rfThresh_topleft <- coords(res.roc, x = "best", best.method = "closest.topleft")
rfThresh_topleft
## threshold specificity sensitivity
## 1 0.641 0.6363636 0.7594937
rfThresh_topleft[1,1]
## [1] 0.641
coords(res.roc, "best", ret="all", transpose = FALSE,
best.method="youden")
## threshold specificity sensitivity accuracy tn tp fn fp npv
## threshold 0.554 0.5454545 0.8607595 0.7479675 24 68 11 20 0.6857143
## ppv fdr fpr tpr tnr fnr
## threshold 0.7727273 0.2272727 0.4545455 0.8607595 0.5454545 0.1392405
## 1-specificity 1-sensitivity 1-accuracy 1-npv 1-ppv precision
## threshold 0.4545455 0.1392405 0.2520325 0.3142857 0.2272727 0.7727273
## recall youden closest.topleft
## threshold 0.8607595 1.406214 0.2259995
coords(res.roc, "best", ret="all", transpose = FALSE,
best.method="closest.topleft")
## threshold specificity sensitivity accuracy tn tp fn fp npv
## threshold 0.641 0.6363636 0.7594937 0.7154472 28 60 19 16 0.5957447
## ppv fdr fpr tpr tnr fnr
## threshold 0.7894737 0.2105263 0.3636364 0.7594937 0.6363636 0.2405063
## 1-specificity 1-sensitivity 1-accuracy 1-npv 1-ppv precision
## threshold 0.3636364 0.2405063 0.2845528 0.4042553 0.2105263 0.7894737
## recall youden closest.topleft
## threshold 0.7594937 1.395857 0.1900747
coords(res.roc, "best", ret="threshold", transpose = FALSE,
best.method="youden") # default
## threshold
## 1 0.554
coords(res.roc, "best", ret="threshold", transpose = FALSE,
best.method="closest.topleft")
## threshold
## 1 0.641
coords(res.roc, "best", ret="tn", transpose = FALSE)
## tn
## threshold 24
coords(res.roc, "best", ret="tn", transpose = FALSE,
best.method="closest.topleft")
## tn
## threshold 28
coords(res.roc, "best", ret="tp", transpose = FALSE)
## tp
## threshold 68
coords(res.roc, "best", ret="tp", transpose = FALSE,
best.method="closest.topleft")
## tp
## threshold 60
coords(res.roc, "best", ret="youden", transpose = FALSE)
## youden
## 1 1.406214
coords(res.roc, "best", ret="closest.topleft", transpose = FALSE,best.method="closest.topleft")
## closest.topleft
## 1 0.1900747
rfThresh_youden <- coords(res.roc, x = "best", best.method = "youden")
rfThresh_youden
## threshold specificity sensitivity
## 1 0.554 0.5454545 0.8607595
rfThresh_youden[1,1]
## [1] 0.554
model_pred_class <- ifelse(predictions < rfThresh_youden[1,1], "X0", "X1")
head(model_pred_class)
## X0 X1
## 538 "X1" "X0"
## 580 "X0" "X1"
## 449 "X0" "X0"
## 149 "X0" "X1"
## 304 "X0" "X1"
## 15 "X0" "X1"
summary(model_pred_class)
## X0 X1
## Length:123 Length:123
## Class :character Class :character
## Mode :character Mode :character
Test2<-as.factor(model_pred_class[,2])
levels(Test2) <- c(0,1)
summary(Test2)
## 0 1
## 35 88
CM_Test2<-confusionMatrix(Test2, ContraProva,positive="1")
CM_Test2
## Confusion Matrix and Statistics
##
## Reference
## Prediction 0 1
## 0 24 11
## 1 20 68
##
## Accuracy : 0.748
## 95% CI : (0.6617, 0.8219)
## No Information Rate : 0.6423
## P-Value [Acc > NIR] : 0.008134
##
## Kappa : 0.4255
##
## Mcnemar's Test P-Value : 0.150763
##
## Sensitivity : 0.8608
## Specificity : 0.5455
## Pos Pred Value : 0.7727
## Neg Pred Value : 0.6857
## Prevalence : 0.6423
## Detection Rate : 0.5528
## Detection Prevalence : 0.7154
## Balanced Accuracy : 0.7031
##
## 'Positive' Class : 1
##
str(CM_Test2)
## List of 6
## $ positive: chr "1"
## $ table : 'table' int [1:2, 1:2] 24 20 11 68
## ..- attr(*, "dimnames")=List of 2
## .. ..$ Prediction: chr [1:2] "0" "1"
## .. ..$ Reference : chr [1:2] "0" "1"
## $ overall : Named num [1:7] 0.748 0.425 0.662 0.822 0.642 ...
## ..- attr(*, "names")= chr [1:7] "Accuracy" "Kappa" "AccuracyLower" "AccuracyUpper" ...
## $ byClass : Named num [1:11] 0.861 0.545 0.773 0.686 0.773 ...
## ..- attr(*, "names")= chr [1:11] "Sensitivity" "Specificity" "Pos Pred Value" "Neg Pred Value" ...
## $ mode : chr "sens_spec"
## $ dots : list()
## - attr(*, "class")= chr "confusionMatrix"
CM_Test2$byClass[1]#Sensitivity de CM_predictions1
## Sensitivity
## 0.8607595
CM_Test2$byClass[2]#Specificity de CM_predictions1
## Specificity
## 0.5454545
J_CM_Test2<-as.numeric(CM_Test2$byClass[1] + CM_Test2$byClass[2] - 1) ; J_CM_Test2
## [1] 0.406214
calculate_mcc1(CM_Test2)
## [1] 0.4315384
calculate_F2(CM_Test2)
## [1] 0.8415842
rfThresh_youden <- coords(res.roc, x = "best", best.method = "youden")
rfThresh_youden
## threshold specificity sensitivity
## 1 0.554 0.5454545 0.8607595
rfThresh_youden[1,1]
## [1] 0.554
model_pred_class_31 <- ifelse(predictions_3 < rfThresh_youden[1,1], "X0", "X1")
head(sapply(model_pred_class_31, class))
## X0 X0 X0 X0 X0 X0
## "character" "character" "character" "character" "character" "character"
Test_31<-as.factor(model_pred_class_31[,2])
head(sapply(Test_31, class))
## 95 626 607 623 18 470
## "factor" "factor" "factor" "factor" "factor" "factor"
levels(Test_31) <- c(0,1)
head(sapply(Test_31, class))
## 95 626 607 623 18 470
## "factor" "factor" "factor" "factor" "factor" "factor"
levels(Test_31)
## [1] "0" "1"
head(Test_31)
## 95 626 607 623 18 470
## 1 1 1 1 0 1
## Levels: 0 1
summary(Test_31)
## 0 1
## 15 66
CM_predictions_31<-confusionMatrix(Test_31, finalTest$true_troglofauna,positive="1")
CM_predictions_31
## Confusion Matrix and Statistics
##
## Reference
## Prediction 0 1
## 0 12 3
## 1 17 49
##
## Accuracy : 0.7531
## 95% CI : (0.6447, 0.8422)
## No Information Rate : 0.642
## P-Value [Acc > NIR] : 0.02218
##
## Kappa : 0.3987
##
## Mcnemar's Test P-Value : 0.00365
##
## Sensitivity : 0.9423
## Specificity : 0.4138
## Pos Pred Value : 0.7424
## Neg Pred Value : 0.8000
## Prevalence : 0.6420
## Detection Rate : 0.6049
## Detection Prevalence : 0.8148
## Balanced Accuracy : 0.6781
##
## 'Positive' Class : 1
##
calculate_mcc1(CM_predictions_31)
## [1] 0.4394971
calculate_F2(CM_predictions_31)
## [1] 0.8941606
rfThresh_topleft <- coords(res.roc, x = "best", best.method = "closest.topleft")
rfThresh_topleft
## threshold specificity sensitivity
## 1 0.641 0.6363636 0.7594937
rfThresh_topleft[1,1]
## [1] 0.641
model_pred_class <- ifelse(predictions < rfThresh_topleft[1,1], "X0", "X1")
Test3<-as.factor(model_pred_class[,2])
levels(Test3) <- c(0,1)
summary(Test3)
## 0 1
## 47 76
CM_Test3<-confusionMatrix(Test3, ContraProva,positive="1")
CM_Test3
## Confusion Matrix and Statistics
##
## Reference
## Prediction 0 1
## 0 28 19
## 1 16 60
##
## Accuracy : 0.7154
## 95% CI : (0.6271, 0.7931)
## No Information Rate : 0.6423
## P-Value [Acc > NIR] : 0.05311
##
## Kappa : 0.39
##
## Mcnemar's Test P-Value : 0.73532
##
## Sensitivity : 0.7595
## Specificity : 0.6364
## Pos Pred Value : 0.7895
## Neg Pred Value : 0.5957
## Prevalence : 0.6423
## Detection Rate : 0.4878
## Detection Prevalence : 0.6179
## Balanced Accuracy : 0.6979
##
## 'Positive' Class : 1
##
str(CM_Test3)
## List of 6
## $ positive: chr "1"
## $ table : 'table' int [1:2, 1:2] 28 16 19 60
## ..- attr(*, "dimnames")=List of 2
## .. ..$ Prediction: chr [1:2] "0" "1"
## .. ..$ Reference : chr [1:2] "0" "1"
## $ overall : Named num [1:7] 0.715 0.39 0.627 0.793 0.642 ...
## ..- attr(*, "names")= chr [1:7] "Accuracy" "Kappa" "AccuracyLower" "AccuracyUpper" ...
## $ byClass : Named num [1:11] 0.759 0.636 0.789 0.596 0.789 ...
## ..- attr(*, "names")= chr [1:11] "Sensitivity" "Specificity" "Pos Pred Value" "Neg Pred Value" ...
## $ mode : chr "sens_spec"
## $ dots : list()
## - attr(*, "class")= chr "confusionMatrix"
CM_Test3$byClass[1]#Sensitivity de CM_predictions1
## Sensitivity
## 0.7594937
CM_Test3$byClass[2]#Specificity de CM_predictions1
## Specificity
## 0.6363636
J_CM_Test3<-as.numeric(CM_Test3$byClass[1] + CM_Test3$byClass[2] - 1) ; J_CM_Test3
## [1] 0.3958573
calculate_mcc1(CM_Test3)
## [1] 0.3905016
calculate_F2(CM_Test3)
## [1] 0.7653061
rfThresh_topleft <- coords(res.roc, x = "best", best.method = "closest.topleft")
rfThresh_topleft
## threshold specificity sensitivity
## 1 0.641 0.6363636 0.7594937
rfThresh_topleft[1,1]
## [1] 0.641
model_pred_class_41 <- ifelse(predictions_3 < rfThresh_topleft[1,1], "X0", "X1")
head(sapply(model_pred_class_41, class))
## X0 X0 X0 X0 X0 X0
## "character" "character" "character" "character" "character" "character"
Test_41<-as.factor(model_pred_class_41[,2])#
head(sapply(Test_41, class))
## 95 626 607 623 18 470
## "factor" "factor" "factor" "factor" "factor" "factor"
levels(Test_41) <- c(0,1)
head(sapply(Test_41, class))
## 95 626 607 623 18 470
## "factor" "factor" "factor" "factor" "factor" "factor"
levels(Test_41)
## [1] "0" "1"
head(Test_41)
## 95 626 607 623 18 470
## 1 1 1 1 0 1
## Levels: 0 1
summary(Test_41)
## 0 1
## 28 53
CM_predictions_41<-confusionMatrix(Test_41, finalTest$true_troglofauna,positive="1")
CM_predictions_41
## Confusion Matrix and Statistics
##
## Reference
## Prediction 0 1
## 0 22 6
## 1 7 46
##
## Accuracy : 0.8395
## 95% CI : (0.7412, 0.9117)
## No Information Rate : 0.642
## P-Value [Acc > NIR] : 7.19e-05
##
## Kappa : 0.6482
##
## Mcnemar's Test P-Value : 1
##
## Sensitivity : 0.8846
## Specificity : 0.7586
## Pos Pred Value : 0.8679
## Neg Pred Value : 0.7857
## Prevalence : 0.6420
## Detection Rate : 0.5679
## Detection Prevalence : 0.6543
## Balanced Accuracy : 0.8216
##
## 'Positive' Class : 1
##
calculate_mcc1(CM_predictions_41)
## [1] 0.6484166
calculate_F2(CM_predictions_41)
## [1] 0.8812261
roc.data <- data_frame( # Create a dataframe 'roc.data' for ROC analysis results
thresholds = res.roc$thresholds, # Column for ROC curve thresholds
sensitivity = res.roc$sensitivities, # Column for sensitivity (true positive rate)
specificity = res.roc$specificities # Column for specificity (true negative rate)
)
## Warning: `data_frame()` was deprecated in tibble 1.1.0.
## ℹ Please use `tibble()` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
roc.data # Display the 'roc.data' dataframe in the R console
## # A tibble: 100 × 3
## thresholds sensitivity specificity
## <dbl> <dbl> <dbl>
## 1 -Inf 1 0
## 2 0.217 1 0.0227
## 3 0.233 0.987 0.0227
## 4 0.257 0.987 0.0682
## 5 0.305 0.987 0.0909
## 6 0.335 0.975 0.0909
## 7 0.351 0.962 0.0909
## 8 0.379 0.962 0.114
## 9 0.401 0.962 0.136
## 10 0.422 0.962 0.159
## # ℹ 90 more rows
tibble(roc.data)# Convert the 'roc.data' dataframe into a tibble for further analysis (if needed)
## # A tibble: 100 × 3
## thresholds sensitivity specificity
## <dbl> <dbl> <dbl>
## 1 -Inf 1 0
## 2 0.217 1 0.0227
## 3 0.233 0.987 0.0227
## 4 0.257 0.987 0.0682
## 5 0.305 0.987 0.0909
## 6 0.335 0.975 0.0909
## 7 0.351 0.962 0.0909
## 8 0.379 0.962 0.114
## 9 0.401 0.962 0.136
## 10 0.422 0.962 0.159
## # ℹ 90 more rows
#Get the probality threshold for specificity >= 0.6 e sensitivity >= 0.6
roc.data %>% filter(specificity >= 0.6)#comando com %>% "forward pipe operator"
## # A tibble: 62 × 3
## thresholds sensitivity specificity
## <dbl> <dbl> <dbl>
## 1 0.639 0.772 0.614
## 2 0.641 0.759 0.636
## 3 0.645 0.747 0.636
## 4 0.654 0.734 0.636
## 5 0.661 0.722 0.659
## 6 0.665 0.709 0.659
## 7 0.669 0.696 0.659
## 8 0.675 0.696 0.682
## 9 0.682 0.684 0.682
## 10 0.685 0.671 0.682
## # ℹ 52 more rows
roc.data %>% filter(sensitivity >= 0.6)#comando com %>% "forward pipe operator"
## # A tibble: 54 × 3
## thresholds sensitivity specificity
## <dbl> <dbl> <dbl>
## 1 -Inf 1 0
## 2 0.217 1 0.0227
## 3 0.233 0.987 0.0227
## 4 0.257 0.987 0.0682
## 5 0.305 0.987 0.0909
## 6 0.335 0.975 0.0909
## 7 0.351 0.962 0.0909
## 8 0.379 0.962 0.114
## 9 0.401 0.962 0.136
## 10 0.422 0.962 0.159
## # ℹ 44 more rows
par(pty="s")
plot.roc(res.roc, print.auc =TRUE,col="blue", print.thres =
"best",print.auc.y=0.4,main="ROC curve (Point: best.method = 'youden')")
grid(nx = NULL, ny = NULL,
lty = 3,
lwd = 1,
col = "gray")
segments(rfThresh_youden[1,2],1-rfThresh_youden[1,2],rfThresh_youden[1,2], rfThresh_youden[1,3],lwd = 3, col = "red",lty=2)
text(0.4,0.2,"The number on the left represents the probability cutoff,\nand the numbers in the parentheses are\nthe specificity and sensitivity, respectively")
par(pty="s")
plot.roc(res.roc, print.auc =TRUE,col="blue", print.thres =
c(0.3, 0.5, 0.7,rfThresh_topleft[1,1]),print.auc.y=0.4,main="ROC curve (Point: best.method = 'closest.topleft')")
grid(nx = NULL, ny = NULL,
lty = 3,
lwd = 1,
col = "gray")
segments(rfThresh_topleft[1,2],rfThresh_topleft[1,3],1, 1,lwd = 3, col = "red",lty=2)
text(0.3,0.2,"The number on the left represents the probability cutoff,\nand the numbers in the parentheses are\nthe specificity and sensitivity, respectively")
par(pty="s")
plot.roc(res.roc, print.auc =TRUE,print.auc.y=0.4,legacy.axes = TRUE, col="blue", print.thres = c(0.2, 0.5, 0.7,rfThresh_youden[1,1]),main="ROC curve - Youden",xlab = "1-Specificity = False Positive Rate = FPR",ylab = "Sensitivity = True Positive Rate = TPR")
segments(rfThresh_youden[1,2], 1-rfThresh_youden[1,2],rfThresh_youden[1,2],rfThresh_youden[1,3],lwd = 3, col = "red",lty=2)
grid(nx = NULL, ny = NULL,
lty = 3,
lwd = 1,
col = "gray")
text(0.4,0.2,"The number on the left represents the probability cutoff,\nand the numbers in the parentheses are\nthe specificity and sensitivity, respectively")
par(pty="s")
plot.roc(res.roc, print.auc =TRUE,print.auc.y=0.4,legacy.axes = TRUE, col="blue", print.thres = c(0.3, 0.5, 0.7,rfThresh_topleft[1,1]),main="ROC curve - Closest Topleft",xlab = "1-Specificity = False Positive Rate = FPR",ylab = "Sensitivity = True Positive Rate = TPR")
segments(rfThresh_topleft[1,2],rfThresh_topleft[1,3],1, 1,lwd = 3, col = "red",lty=2)
grid(nx = NULL, ny = NULL,
lty = 3,
lwd = 1,
col = "gray")
text(0.4,0.2,"The number on the left represents the probability cutoff,\nand the numbers in the parentheses are\nthe specificity and sensitivity, respectively")
par(pty="s")
rocCurve <- roc(ContraProva,prediction.probabilities, plot=TRUE,legacy.axes = TRUE, col="blue",main="ROC curve (Point: best.method = 'youden')",xlab="False Positive Rate = FPR", ylab="True Positive Rate = TPR",print.thres =
"best",print.auc =TRUE,cex.main=0.9)
## Setting levels: control = 0, case = 1
## Setting direction: controls < cases
segments(rfThresh_youden[1,2], 1-rfThresh_youden[1,2],rfThresh_youden[1,2],rfThresh_youden[1,3],lwd = 3, col = "red",lty=2)
grid(nx = NULL, ny = NULL,
lty = 3,
lwd = 1,
col = "gray")
text(0.4,0.2,"The number on the left represents the probability cutoff,\nand the numbers in the parentheses are\nthe specificity and sensitivity, respectively")
dim(finalTest)
## [1] 81 28
names(finalTest)
## [1] "trngcv" "bio14" "slope" "rugg500s"
## [5] "pil_twim" "pil_twicv" "pil_topocv" "pil_slps"
## [9] "pil_slpcv" "pil_elr3cv" "mrvbf" "mrrtf"
## [13] "minfertf" "lf7rup" "hstructn" "geolrngaggn"
## [17] "elevationm" "wr_unrn" "solpawhcn" "slopern"
## [21] "MIN_AGE_MA" "HubDist" "month_collection" "UNITNAME"
## [25] "ROCKTYPE1" "FORMATION" "HubName" "true_troglofauna"
dim(selectcol_finalTest)
## [1] 81 27
names(selectcol_finalTest)
## [1] "trngcv" "bio14" "slope" "rugg500s"
## [5] "pil_twim" "pil_twicv" "pil_topocv" "pil_slps"
## [9] "pil_slpcv" "pil_elr3cv" "mrvbf" "mrrtf"
## [13] "minfertf" "lf7rup" "hstructn" "geolrngaggn"
## [17] "elevationm" "wr_unrn" "solpawhcn" "slopern"
## [21] "MIN_AGE_MA" "HubDist" "month_collection" "UNITNAME"
## [25] "ROCKTYPE1" "FORMATION" "HubName"
CM_predictions_2$byClass
## Sensitivity Specificity Pos Pred Value
## 0.9807692 0.3103448 0.7183099
## Neg Pred Value Precision Recall
## 0.9000000 0.7183099 0.9807692
## F1 Prevalence Detection Rate
## 0.8292683 0.6419753 0.6296296
## Detection Prevalence Balanced Accuracy
## 0.8765432 0.6455570
beta<-2
dbF1_11<-CM_predictions_2$byClass
dbF1_11["F1"]
## F1
## 0.8292683
dbF2_11<-((1+beta^2)*dbF1_11["Precision"]*dbF1_11["Sensitivity"])/(beta^2*dbF1_11["Precision"] + dbF1_11["Sensitivity"])
dbF2_11<-as.numeric(dbF2_11)
dbF2_11
## [1] 0.9139785
dbF1_11["Precision"];dbF1_11["Sensitivity"]
## Precision
## 0.7183099
## Sensitivity
## 0.9807692
dbF1_22<-CM_predictions_31$byClass
dbF1_22["F1"]
## F1
## 0.8305085
dbF2_22<-((1+beta^2)*dbF1_22["Precision"]*dbF1_22["Sensitivity"])/(beta^2*dbF1_22["Precision"] + dbF1_22["Sensitivity"])
dbF2_22<-as.numeric(dbF2_22)
dbF2_22
## [1] 0.8941606
dbF1_22["Precision"];dbF1_22["Sensitivity"]
## Precision
## 0.7424242
## Sensitivity
## 0.9423077
dbF1_33<-CM_predictions_41$byClass
dbF1_33["F1"]
## F1
## 0.8761905
dbF2_33<-((1+beta^2)*dbF1_33["Precision"]*dbF1_33["Sensitivity"])/(beta^2*dbF1_33["Precision"] + dbF1_33["Sensitivity"])
dbF2_33<-as.numeric(dbF2_33)
dbF2_33
## [1] 0.8812261
dbF1_33["Precision"];dbF1_33["Sensitivity"]
## Precision
## 0.8679245
## Sensitivity
## 0.8846154
dim(finalTest)
## [1] 81 28
basmcc<-mcc(predictions_2, finalTest[, ncol(finalTest)])
basmcc
## [1] 0.4242625
youmcc<-mcc(Test_31, finalTest[, ncol(finalTest)])
youmcc
## [1] 0.4394971
topmcc<-mcc(Test_41, finalTest[, ncol(finalTest)])
topmcc
## [1] 0.6484166
#Results
cat("\n\n","TABLE 1 - RESULTS OF THE 'finalTest' DATABASE WITH THE MODELS USING THE train() FUNCTION OF THE caret PACKAGE"
,"\n\n\n", "1 - Basic model"
,"\n\n","Area Under ROC (AUC)- Basic model =", auc(res.roc)
,"\n\n\n","Accuracy =", CM_predictions_2$overall[1]
,"\n\n","Sensitivity =", CM_predictions_2$byClass[1]
,"\n\n","Specificity =", CM_predictions_2$byClass[2]
,"\n\n","Balanced Accuracy =", CM_predictions_2$byClass[11]
,"\n\n","F1 =", dbF1_11["F1"]
,"\n\n","F2 =", dbF2_11
,"\n\n","matthews correlation coefficient =", calculate_mcc1(CM_predictions_2)
,"\n\n","Threshold =", 0.5
,"\n\n\n", "2 - Youden's J statistic Threshold Method"
,"\n\n","Accuracy =", CM_predictions_31$overall[1]
,"\n\n","Sensitivity =", CM_predictions_31$byClass[1]
,"\n\n","Specificity =", CM_predictions_31$byClass[2]
,"\n\n","Balanced Accuracy =", CM_predictions_31$byClass[11]
,"\n\n","F1 =", dbF1_22["F1"]
,"\n\n","F2 =", dbF2_22
,"\n\n","matthews correlation coefficient =", calculate_mcc1(CM_predictions_31)
,"\n\n","Threshold =", rfThresh_youden[1,1]
,"\n\n\n", "3 - Closest Top-left Threshold Method"
,"\n\n","Accuracy =", CM_predictions_41$overall[1]
,"\n\n","Sensitivity =", CM_predictions_41$byClass[1]
,"\n\n","Specificity =", CM_predictions_41$byClass[2]
,"\n\n","Balanced Accuracy =", CM_predictions_41$byClass[11]
,"\n\n","F1 =", dbF1_33["F1"]
,"\n\n","F2 =", dbF2_33
,"\n\n","matthews correlation coefficient =", calculate_mcc1(CM_predictions_41)
,"\n\n","Threshold =", rfThresh_topleft[1,1]
)
##
##
## TABLE 1 - RESULTS OF THE 'finalTest' DATABASE WITH THE MODELS USING THE train() FUNCTION OF THE caret PACKAGE
##
##
## 1 - Basic model
##
## Area Under ROC (AUC)- Basic model = 0.7392117
##
##
## Accuracy = 0.7407407
##
## Sensitivity = 0.9807692
##
## Specificity = 0.3103448
##
## Balanced Accuracy = 0.645557
##
## F1 = 0.8292683
##
## F2 = 0.9139785
##
## matthews correlation coefficient = 0.4242625
##
## Threshold = 0.5
##
##
## 2 - Youden's J statistic Threshold Method
##
## Accuracy = 0.7530864
##
## Sensitivity = 0.9423077
##
## Specificity = 0.4137931
##
## Balanced Accuracy = 0.6780504
##
## F1 = 0.8305085
##
## F2 = 0.8941606
##
## matthews correlation coefficient = 0.4394971
##
## Threshold = 0.554
##
##
## 3 - Closest Top-left Threshold Method
##
## Accuracy = 0.8395062
##
## Sensitivity = 0.8846154
##
## Specificity = 0.7586207
##
## Balanced Accuracy = 0.821618
##
## F1 = 0.8761905
##
## F2 = 0.8812261
##
## matthews correlation coefficient = 0.6484166
##
## Threshold = 0.641
# Identify rows with presence of troglofauna
gaa <- which(finalTest1$true_troglofauna == 1)
geo1 <- finalTest1[gaa,]
# Identify rows with absence of troglofauna
gaaAu <- which(finalTest1$true_troglofauna == 0)
geoAu <- finalTest1[gaaAu,]
# Plot the data
Min_Lg <- min(finalTest1$LONGITUDE) - 0.05
Max_Lg <- max(finalTest1$LONGITUDE) + 0.05
Min_Lt <- min(finalTest1$LATITUDE) - 0.05
Max_Lt <- max(finalTest1$LATITUDE) + 0.05
plot(geo1$LONGITUDE, geo1$LATITUDE, xlim = c(Min_Lg, Max_Lg), ylim = c(Min_Lt, Max_Lt),
cex = 2, pch = 20, col = "blue",
xlab = 'Longitude\n',
ylab = 'Latitude',
main = 'Basic Model. Coordinates of troglofauna presence: Observed and Predicted',
sub = "\nObserved Presence = blue, Predicted Presence = red, Observed Absence = black")
# Find lines in predictions_3 with presence of troglofauna
gbb <- which(predictions_2 == 1)
geo2 <- finalTest1[gbb,]
# Add predicted presence points to the plot
points(geo2$LONGITUDE, geo2$LATITUDE, pch = 5, col = "red")
# Add absence points to the plot
points(geoAu$LONGITUDE, geoAu$LATITUDE, pch = 4, col = rgb(0, 0, 0, 0.15))
gaa<-which(finalTest1$true_troglofauna==1);gaa
## [1] 1 2 3 4 6 7 9 10 11 12 16 17 18 19 21 22 23 26 27 29 32 33 34 35 36
## [26] 37 38 40 42 43 45 46 47 50 51 52 53 54 55 57 59 61 65 69 70 71 72 73 76 77
## [51] 78 80
geo1<-finalTest1[gaa,]
summary(geo1)
## trngcv bio14 slope rugg500s
## Min. :8.808 Min. :0.01801 Min. : 0.5627 Min. :0.2331
## 1st Qu.:9.244 1st Qu.:0.13297 1st Qu.: 2.2721 1st Qu.:1.5072
## Median :9.312 Median :0.17503 Median : 3.4064 Median :2.6873
## Mean :9.336 Mean :0.18598 Mean : 5.3447 Mean :3.5658
## 3rd Qu.:9.424 3rd Qu.:0.24458 3rd Qu.: 8.0816 3rd Qu.:5.4057
## Max. :9.889 Max. :0.34399 Max. :15.8156 Max. :9.9756
##
## pil_twim pil_twicv pil_topocv pil_slps
## Min. : 5.831 Min. :0.04324 Min. :0.00000 Min. : 0.5535
## 1st Qu.: 7.543 1st Qu.:0.11117 1st Qu.:0.00000 1st Qu.: 1.5083
## Median : 8.304 Median :0.16271 Median :0.01835 Median : 4.3828
## Mean : 8.594 Mean :0.16939 Mean :0.06513 Mean : 5.1668
## 3rd Qu.: 9.460 3rd Qu.:0.23025 3rd Qu.:0.12912 3rd Qu.: 8.0398
## Max. :11.582 Max. :0.30956 Max. :0.20161 Max. :12.7276
##
## pil_slpcv pil_elr3cv mrvbf mrrtf
## Min. :0.2188 Min. :0.04501 Min. :0.0000 Min. :0.0000
## 1st Qu.:0.3770 1st Qu.:0.12937 1st Qu.:0.0000 1st Qu.:0.0000
## Median :0.4892 Median :0.20425 Median :0.0000 Median :0.0000
## Mean :0.5491 Mean :0.23279 Mean :0.2087 Mean :0.0733
## 3rd Qu.:0.6604 3rd Qu.:0.31706 3rd Qu.:0.0000 3rd Qu.:0.0000
## Max. :1.6098 Max. :0.56449 Max. :1.8239 Max. :1.8897
##
## minfertf lf7rup hstructn geolrngaggn
## Min. :2.00 Min. :1.000 Min. :0.000 Min. : 0.0115
## 1st Qu.:2.00 1st Qu.:2.000 1st Qu.:1.043 1st Qu.: 2.5880
## Median :2.00 Median :3.000 Median :1.043 Median : 2.5880
## Mean :2.25 Mean :2.808 Mean :1.515 Mean :175.7587
## 3rd Qu.:2.25 3rd Qu.:4.000 3rd Qu.:1.043 3rd Qu.:300.0000
## Max. :3.00 Max. :6.000 Max. :5.143 Max. :900.0000
##
## elevationm wr_unrn solpawhcn slopern
## Min. :518.3 Min. : 0.00 Min. : 96.0 Min. :0.00179
## 1st Qu.:561.1 1st Qu.:17.86 1st Qu.: 96.0 1st Qu.:0.01874
## Median :570.5 Median :17.86 Median : 96.0 Median :0.04360
## Mean :585.6 Mean :15.43 Mean :101.2 Mean :0.09140
## 3rd Qu.:611.1 3rd Qu.:17.86 3rd Qu.: 96.0 3rd Qu.:0.09728
## Max. :703.9 Max. :17.86 Max. :157.0 Max. :0.70049
##
## MIN_AGE_MA HubDist month_collection
## Min. :2454 Min. : 3.218 May : 8
## 1st Qu.:2454 1st Qu.:16.263 September: 8
## Median :2454 Median :26.703 March : 7
## Mean :2487 Mean :43.441 June : 6
## 3rd Qu.:2494 3rd Qu.:76.671 November : 6
## Max. :2597 Max. :86.707 January : 5
## (Other) :12
## UNITNAME
## Brockman Iron Formation :32
## Marra Mamba Iron Formation : 9
## Mount McRae Shale and Mount Sylvia Formation:11
## Wittenoom Formation : 0
##
##
##
## ROCKTYPE1
## sedimentary carbonate : 0
## sedimentary other chemical or biochemical:41
## sedimentary siliciclastic :11
##
##
##
##
## FORMATION
## Brockman Iron Formation :32
## Marra Mamba Iron Formation : 9
## Mount McRae Shale and Mount Sylvia Formation:11
## Wittenoom Formation : 0
##
##
##
## HubName
## anticline, exposed :23
## exposed : 6
## strike-slip, exposed, showing relative dextral displacement: 1
## syncline, exposed :22
##
##
##
## true_troglofauna LATITUDE LONGITUDE
## 0: 0 Min. :-22.61 Min. :117.2
## 1:52 1st Qu.:-22.23 1st Qu.:117.5
## Median :-22.16 Median :117.8
## Mean :-22.23 Mean :117.7
## 3rd Qu.:-22.12 3rd Qu.:117.9
## Max. :-22.09 Max. :118.0
##
gaaAu<-which(finalTest1$true_troglofauna==0);gaaAu
## [1] 5 8 13 14 15 20 24 25 28 30 31 39 41 44 48 49 56 58 60 62 63 64 66 67 68
## [26] 74 75 79 81
geoAu<-finalTest1[gaaAu,]
summary(geoAu)
## trngcv bio14 slope rugg500s
## Min. :8.894 Min. :0.02573 Min. : 0.2666 Min. :0.0407
## 1st Qu.:9.244 1st Qu.:0.11380 1st Qu.: 1.4436 1st Qu.:1.2771
## Median :9.337 Median :0.14993 Median : 3.0893 Median :2.7764
## Mean :9.335 Mean :0.16881 Mean : 4.5309 Mean :3.0913
## 3rd Qu.:9.394 3rd Qu.:0.20636 3rd Qu.: 6.2448 3rd Qu.:4.1029
## Max. :9.887 Max. :0.42288 Max. :14.3589 Max. :8.0468
##
## pil_twim pil_twicv pil_topocv pil_slps
## Min. : 5.749 Min. :0.04954 Min. :0.00000 Min. : 0.1344
## 1st Qu.: 7.956 1st Qu.:0.12372 1st Qu.:0.00000 1st Qu.: 0.8647
## Median : 9.267 Median :0.21281 Median :0.02834 Median : 3.2229
## Mean : 9.061 Mean :0.18747 Mean :0.07876 Mean : 3.5669
## 3rd Qu.:10.169 3rd Qu.:0.25139 3rd Qu.:0.14605 3rd Qu.: 5.3619
## Max. :11.442 Max. :0.36698 Max. :0.32611 Max. :10.9676
##
## pil_slpcv pil_elr3cv mrvbf mrrtf
## Min. :0.1797 Min. :0.0359 Min. :0.0000 Min. :0.0000
## 1st Qu.:0.3397 1st Qu.:0.1528 1st Qu.:0.0000 1st Qu.:0.0000
## Median :0.4196 Median :0.2261 Median :0.0000 Median :0.0000
## Mean :0.4479 Mean :0.2242 Mean :0.2452 Mean :0.1054
## 3rd Qu.:0.5034 3rd Qu.:0.2841 3rd Qu.:0.0000 3rd Qu.:0.0000
## Max. :0.7925 Max. :0.4060 Max. :2.8219 Max. :0.6614
##
## minfertf lf7rup hstructn geolrngaggn
## Min. :2.000 Min. :2.000 Min. : 0.000 Min. : 0.0115
## 1st Qu.:2.000 1st Qu.:3.000 1st Qu.: 1.043 1st Qu.: 2.5880
## Median :2.000 Median :3.000 Median : 1.043 Median : 2.5880
## Mean :2.207 Mean :3.621 Mean : 1.572 Mean :147.1018
## 3rd Qu.:2.000 3rd Qu.:4.000 3rd Qu.: 1.043 3rd Qu.: 2.5880
## Max. :3.000 Max. :7.000 Max. :15.429 Max. :900.0000
##
## elevationm wr_unrn solpawhcn slopern
## Min. :515.2 Min. : 0.00 Min. : 96.0 Min. :0.00341
## 1st Qu.:536.0 1st Qu.:17.86 1st Qu.: 96.0 1st Qu.:0.02670
## Median :568.4 Median :17.86 Median : 96.0 Median :0.03745
## Mean :577.2 Mean :15.41 Mean :103.7 Mean :0.52368
## 3rd Qu.:601.6 3rd Qu.:17.86 3rd Qu.: 96.0 3rd Qu.:0.12267
## Max. :726.1 Max. :17.86 Max. :157.0 Max. :4.28009
##
## MIN_AGE_MA HubDist month_collection
## Min. :2454 Min. : 1.931 December :8
## 1st Qu.:2454 1st Qu.:17.066 January :6
## Median :2454 Median :21.429 September:5
## Mean :2472 Mean :34.474 April :4
## 3rd Qu.:2494 3rd Qu.:54.509 March :2
## Max. :2597 Max. :88.221 February :1
## (Other) :3
## UNITNAME
## Brockman Iron Formation :19
## Marra Mamba Iron Formation : 1
## Mount McRae Shale and Mount Sylvia Formation: 8
## Wittenoom Formation : 1
##
##
##
## ROCKTYPE1
## sedimentary carbonate : 1
## sedimentary other chemical or biochemical:20
## sedimentary siliciclastic : 8
##
##
##
##
## FORMATION
## Brockman Iron Formation :19
## Marra Mamba Iron Formation : 1
## Mount McRae Shale and Mount Sylvia Formation: 8
## Wittenoom Formation : 1
##
##
##
## HubName
## anticline, exposed :20
## exposed : 0
## strike-slip, exposed, showing relative dextral displacement: 1
## syncline, exposed : 8
##
##
##
## true_troglofauna LATITUDE LONGITUDE
## 0:29 Min. :-22.64 Min. :117.1
## 1: 0 1st Qu.:-22.21 1st Qu.:117.7
## Median :-22.15 Median :117.8
## Mean :-22.19 Mean :117.7
## 3rd Qu.:-22.12 3rd Qu.:117.9
## Max. :-22.11 Max. :118.0
##
Min_Lg<-min(finalTest1$LONGITUDE)-0.05
Max_Lg<-max(finalTest1$LONGITUDE)+0.05
Min_Lt<-min(finalTest1$LATITUDE)-0.05
Max_Lt<-max(finalTest1$LATITUDE)+0.05
plot(geo1$LONGITUDE,geo1$LATITUDE,xlim = c(Min_Lg,Max_Lg),ylim = c(Min_Lt,Max_Lt), cex=2, pch=20, col="blue", xlab='Longitude\n', ylab='Latitude', main = 'Basic Model. Coordinates of troglofauna presence: Observed and Predicted',
sub = "\nObserved Presence = blue, Predicted Presence = red, Observed Absence = black")
gbb<-which(predictions_2==1);gbb
## [1] 1 2 3 4 6 7 8 10 11 12 13 15 16 17 18 19 20 21 22 23 25 26 27 28 29
## [26] 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 49 50 51 52 53 54 55 56
## [51] 57 58 59 60 61 65 66 67 68 69 70 71 72 73 75 76 77 78 79 80 81
geo2<-finalTest1[gbb,]
summary(geo2)
## trngcv bio14 slope rugg500s
## Min. :8.808 Min. :0.01801 Min. : 0.2666 Min. :0.1508
## 1st Qu.:9.243 1st Qu.:0.11960 1st Qu.: 2.1535 1st Qu.:1.5066
## Median :9.325 Median :0.17200 Median : 3.3626 Median :2.7764
## Mean :9.344 Mean :0.18270 Mean : 5.1586 Mean :3.4517
## 3rd Qu.:9.417 3rd Qu.:0.23916 3rd Qu.: 7.5055 3rd Qu.:5.2003
## Max. :9.889 Max. :0.42288 Max. :15.8156 Max. :9.9756
##
## pil_twim pil_twicv pil_topocv pil_slps
## Min. : 5.749 Min. :0.04324 Min. :0.00000 Min. : 0.4874
## 1st Qu.: 7.693 1st Qu.:0.11822 1st Qu.:0.00000 1st Qu.: 1.4048
## Median : 8.453 Median :0.16354 Median :0.02168 Median : 3.9729
## Mean : 8.645 Mean :0.17513 Mean :0.06882 Mean : 4.7409
## 3rd Qu.: 9.662 3rd Qu.:0.23356 3rd Qu.:0.13549 3rd Qu.: 7.0271
## Max. :11.582 Max. :0.36698 Max. :0.32611 Max. :12.7276
##
## pil_slpcv pil_elr3cv mrvbf mrrtf
## Min. :0.2188 Min. :0.0359 Min. :0.000 Min. :0.00000
## 1st Qu.:0.3624 1st Qu.:0.1301 1st Qu.:0.000 1st Qu.:0.00000
## Median :0.4713 Median :0.2056 Median :0.000 Median :0.00000
## Mean :0.5201 Mean :0.2294 Mean :0.204 Mean :0.07929
## 3rd Qu.:0.6220 3rd Qu.:0.3147 3rd Qu.:0.000 3rd Qu.:0.00000
## Max. :1.6098 Max. :0.5645 Max. :2.806 Max. :1.88966
##
## minfertf lf7rup hstructn geolrngaggn
## Min. :2.000 Min. :1.000 Min. :0.000 Min. : 0.0115
## 1st Qu.:2.000 1st Qu.:2.000 1st Qu.:1.043 1st Qu.: 2.5880
## Median :2.000 Median :3.000 Median :1.043 Median : 2.5880
## Mean :2.239 Mean :2.986 Mean :1.431 Mean :175.8770
## 3rd Qu.:2.000 3rd Qu.:4.000 3rd Qu.:1.043 3rd Qu.:300.0000
## Max. :3.000 Max. :7.000 Max. :5.143 Max. :900.0000
##
## elevationm wr_unrn solpawhcn slopern
## Min. :515.2 Min. : 0.00 Min. : 96.0 Min. :0.00179
## 1st Qu.:560.1 1st Qu.:17.86 1st Qu.: 96.0 1st Qu.:0.01985
## Median :570.5 Median :17.86 Median : 96.0 Median :0.04257
## Mean :584.8 Mean :15.71 Mean :100.7 Mean :0.15044
## 3rd Qu.:613.0 3rd Qu.:17.86 3rd Qu.: 96.0 3rd Qu.:0.09939
## Max. :726.1 Max. :17.86 Max. :157.0 Max. :4.24974
##
## MIN_AGE_MA HubDist month_collection
## Min. :2454 Min. : 2.183 September:10
## 1st Qu.:2454 1st Qu.:16.549 December :10
## Median :2454 Median :22.459 January : 9
## Mean :2482 Mean :42.584 March : 9
## 3rd Qu.:2494 3rd Qu.:76.502 May : 9
## Max. :2597 Max. :88.221 June : 6
## (Other) :18
## UNITNAME
## Brockman Iron Formation :45
## Marra Mamba Iron Formation : 9
## Mount McRae Shale and Mount Sylvia Formation:16
## Wittenoom Formation : 1
##
##
##
## ROCKTYPE1
## sedimentary carbonate : 1
## sedimentary other chemical or biochemical:54
## sedimentary siliciclastic :16
##
##
##
##
## FORMATION
## Brockman Iron Formation :45
## Marra Mamba Iron Formation : 9
## Mount McRae Shale and Mount Sylvia Formation:16
## Wittenoom Formation : 1
##
##
##
## HubName
## anticline, exposed :34
## exposed : 6
## strike-slip, exposed, showing relative dextral displacement: 2
## syncline, exposed :29
##
##
##
## true_troglofauna LATITUDE LONGITUDE
## 0:20 Min. :-22.64 Min. :117.1
## 1:51 1st Qu.:-22.22 1st Qu.:117.7
## Median :-22.16 Median :117.8
## Mean :-22.22 Mean :117.7
## 3rd Qu.:-22.12 3rd Qu.:117.9
## Max. :-22.09 Max. :118.0
##
points(geo2$LONGITUDE,geo2$LATITUDE,xlim = c(Min_Lg,Max_Lg),ylim = c(Min_Lt,Max_Lt),pch=5, col="red")
points(geoAu$LONGITUDE,geoAu$LATITUDE,xlim = c(115,122),ylim = c(-26,-21),pch = 4, col = rgb(0, 0, 0, 0.15))
gaa<-which(finalTest1$true_troglofauna==1);gaa
## [1] 1 2 3 4 6 7 9 10 11 12 16 17 18 19 21 22 23 26 27 29 32 33 34 35 36
## [26] 37 38 40 42 43 45 46 47 50 51 52 53 54 55 57 59 61 65 69 70 71 72 73 76 77
## [51] 78 80
geo1<-finalTest1[gaa,]
summary(geo1)
## trngcv bio14 slope rugg500s
## Min. :8.808 Min. :0.01801 Min. : 0.5627 Min. :0.2331
## 1st Qu.:9.244 1st Qu.:0.13297 1st Qu.: 2.2721 1st Qu.:1.5072
## Median :9.312 Median :0.17503 Median : 3.4064 Median :2.6873
## Mean :9.336 Mean :0.18598 Mean : 5.3447 Mean :3.5658
## 3rd Qu.:9.424 3rd Qu.:0.24458 3rd Qu.: 8.0816 3rd Qu.:5.4057
## Max. :9.889 Max. :0.34399 Max. :15.8156 Max. :9.9756
##
## pil_twim pil_twicv pil_topocv pil_slps
## Min. : 5.831 Min. :0.04324 Min. :0.00000 Min. : 0.5535
## 1st Qu.: 7.543 1st Qu.:0.11117 1st Qu.:0.00000 1st Qu.: 1.5083
## Median : 8.304 Median :0.16271 Median :0.01835 Median : 4.3828
## Mean : 8.594 Mean :0.16939 Mean :0.06513 Mean : 5.1668
## 3rd Qu.: 9.460 3rd Qu.:0.23025 3rd Qu.:0.12912 3rd Qu.: 8.0398
## Max. :11.582 Max. :0.30956 Max. :0.20161 Max. :12.7276
##
## pil_slpcv pil_elr3cv mrvbf mrrtf
## Min. :0.2188 Min. :0.04501 Min. :0.0000 Min. :0.0000
## 1st Qu.:0.3770 1st Qu.:0.12937 1st Qu.:0.0000 1st Qu.:0.0000
## Median :0.4892 Median :0.20425 Median :0.0000 Median :0.0000
## Mean :0.5491 Mean :0.23279 Mean :0.2087 Mean :0.0733
## 3rd Qu.:0.6604 3rd Qu.:0.31706 3rd Qu.:0.0000 3rd Qu.:0.0000
## Max. :1.6098 Max. :0.56449 Max. :1.8239 Max. :1.8897
##
## minfertf lf7rup hstructn geolrngaggn
## Min. :2.00 Min. :1.000 Min. :0.000 Min. : 0.0115
## 1st Qu.:2.00 1st Qu.:2.000 1st Qu.:1.043 1st Qu.: 2.5880
## Median :2.00 Median :3.000 Median :1.043 Median : 2.5880
## Mean :2.25 Mean :2.808 Mean :1.515 Mean :175.7587
## 3rd Qu.:2.25 3rd Qu.:4.000 3rd Qu.:1.043 3rd Qu.:300.0000
## Max. :3.00 Max. :6.000 Max. :5.143 Max. :900.0000
##
## elevationm wr_unrn solpawhcn slopern
## Min. :518.3 Min. : 0.00 Min. : 96.0 Min. :0.00179
## 1st Qu.:561.1 1st Qu.:17.86 1st Qu.: 96.0 1st Qu.:0.01874
## Median :570.5 Median :17.86 Median : 96.0 Median :0.04360
## Mean :585.6 Mean :15.43 Mean :101.2 Mean :0.09140
## 3rd Qu.:611.1 3rd Qu.:17.86 3rd Qu.: 96.0 3rd Qu.:0.09728
## Max. :703.9 Max. :17.86 Max. :157.0 Max. :0.70049
##
## MIN_AGE_MA HubDist month_collection
## Min. :2454 Min. : 3.218 May : 8
## 1st Qu.:2454 1st Qu.:16.263 September: 8
## Median :2454 Median :26.703 March : 7
## Mean :2487 Mean :43.441 June : 6
## 3rd Qu.:2494 3rd Qu.:76.671 November : 6
## Max. :2597 Max. :86.707 January : 5
## (Other) :12
## UNITNAME
## Brockman Iron Formation :32
## Marra Mamba Iron Formation : 9
## Mount McRae Shale and Mount Sylvia Formation:11
## Wittenoom Formation : 0
##
##
##
## ROCKTYPE1
## sedimentary carbonate : 0
## sedimentary other chemical or biochemical:41
## sedimentary siliciclastic :11
##
##
##
##
## FORMATION
## Brockman Iron Formation :32
## Marra Mamba Iron Formation : 9
## Mount McRae Shale and Mount Sylvia Formation:11
## Wittenoom Formation : 0
##
##
##
## HubName
## anticline, exposed :23
## exposed : 6
## strike-slip, exposed, showing relative dextral displacement: 1
## syncline, exposed :22
##
##
##
## true_troglofauna LATITUDE LONGITUDE
## 0: 0 Min. :-22.61 Min. :117.2
## 1:52 1st Qu.:-22.23 1st Qu.:117.5
## Median :-22.16 Median :117.8
## Mean :-22.23 Mean :117.7
## 3rd Qu.:-22.12 3rd Qu.:117.9
## Max. :-22.09 Max. :118.0
##
gaaAu<-which(finalTest1$true_troglofauna==0);gaaAu
## [1] 5 8 13 14 15 20 24 25 28 30 31 39 41 44 48 49 56 58 60 62 63 64 66 67 68
## [26] 74 75 79 81
geoAu<-finalTest1[gaaAu,]
summary(geoAu)
## trngcv bio14 slope rugg500s
## Min. :8.894 Min. :0.02573 Min. : 0.2666 Min. :0.0407
## 1st Qu.:9.244 1st Qu.:0.11380 1st Qu.: 1.4436 1st Qu.:1.2771
## Median :9.337 Median :0.14993 Median : 3.0893 Median :2.7764
## Mean :9.335 Mean :0.16881 Mean : 4.5309 Mean :3.0913
## 3rd Qu.:9.394 3rd Qu.:0.20636 3rd Qu.: 6.2448 3rd Qu.:4.1029
## Max. :9.887 Max. :0.42288 Max. :14.3589 Max. :8.0468
##
## pil_twim pil_twicv pil_topocv pil_slps
## Min. : 5.749 Min. :0.04954 Min. :0.00000 Min. : 0.1344
## 1st Qu.: 7.956 1st Qu.:0.12372 1st Qu.:0.00000 1st Qu.: 0.8647
## Median : 9.267 Median :0.21281 Median :0.02834 Median : 3.2229
## Mean : 9.061 Mean :0.18747 Mean :0.07876 Mean : 3.5669
## 3rd Qu.:10.169 3rd Qu.:0.25139 3rd Qu.:0.14605 3rd Qu.: 5.3619
## Max. :11.442 Max. :0.36698 Max. :0.32611 Max. :10.9676
##
## pil_slpcv pil_elr3cv mrvbf mrrtf
## Min. :0.1797 Min. :0.0359 Min. :0.0000 Min. :0.0000
## 1st Qu.:0.3397 1st Qu.:0.1528 1st Qu.:0.0000 1st Qu.:0.0000
## Median :0.4196 Median :0.2261 Median :0.0000 Median :0.0000
## Mean :0.4479 Mean :0.2242 Mean :0.2452 Mean :0.1054
## 3rd Qu.:0.5034 3rd Qu.:0.2841 3rd Qu.:0.0000 3rd Qu.:0.0000
## Max. :0.7925 Max. :0.4060 Max. :2.8219 Max. :0.6614
##
## minfertf lf7rup hstructn geolrngaggn
## Min. :2.000 Min. :2.000 Min. : 0.000 Min. : 0.0115
## 1st Qu.:2.000 1st Qu.:3.000 1st Qu.: 1.043 1st Qu.: 2.5880
## Median :2.000 Median :3.000 Median : 1.043 Median : 2.5880
## Mean :2.207 Mean :3.621 Mean : 1.572 Mean :147.1018
## 3rd Qu.:2.000 3rd Qu.:4.000 3rd Qu.: 1.043 3rd Qu.: 2.5880
## Max. :3.000 Max. :7.000 Max. :15.429 Max. :900.0000
##
## elevationm wr_unrn solpawhcn slopern
## Min. :515.2 Min. : 0.00 Min. : 96.0 Min. :0.00341
## 1st Qu.:536.0 1st Qu.:17.86 1st Qu.: 96.0 1st Qu.:0.02670
## Median :568.4 Median :17.86 Median : 96.0 Median :0.03745
## Mean :577.2 Mean :15.41 Mean :103.7 Mean :0.52368
## 3rd Qu.:601.6 3rd Qu.:17.86 3rd Qu.: 96.0 3rd Qu.:0.12267
## Max. :726.1 Max. :17.86 Max. :157.0 Max. :4.28009
##
## MIN_AGE_MA HubDist month_collection
## Min. :2454 Min. : 1.931 December :8
## 1st Qu.:2454 1st Qu.:17.066 January :6
## Median :2454 Median :21.429 September:5
## Mean :2472 Mean :34.474 April :4
## 3rd Qu.:2494 3rd Qu.:54.509 March :2
## Max. :2597 Max. :88.221 February :1
## (Other) :3
## UNITNAME
## Brockman Iron Formation :19
## Marra Mamba Iron Formation : 1
## Mount McRae Shale and Mount Sylvia Formation: 8
## Wittenoom Formation : 1
##
##
##
## ROCKTYPE1
## sedimentary carbonate : 1
## sedimentary other chemical or biochemical:20
## sedimentary siliciclastic : 8
##
##
##
##
## FORMATION
## Brockman Iron Formation :19
## Marra Mamba Iron Formation : 1
## Mount McRae Shale and Mount Sylvia Formation: 8
## Wittenoom Formation : 1
##
##
##
## HubName
## anticline, exposed :20
## exposed : 0
## strike-slip, exposed, showing relative dextral displacement: 1
## syncline, exposed : 8
##
##
##
## true_troglofauna LATITUDE LONGITUDE
## 0:29 Min. :-22.64 Min. :117.1
## 1: 0 1st Qu.:-22.21 1st Qu.:117.7
## Median :-22.15 Median :117.8
## Mean :-22.19 Mean :117.7
## 3rd Qu.:-22.12 3rd Qu.:117.9
## Max. :-22.11 Max. :118.0
##
plot(geo1$LONGITUDE,geo1$LATITUDE,xlim = c(Min_Lg,Max_Lg),ylim = c(Min_Lt,Max_Lt), cex=2, pch=20, col="blue", xlab='Longitude\n', ylab='Latitude', main = 'Basic Model. Coordinates of troglofauna presence: Observed and Predicted',
sub = "\nObserved Presence = blue, Predicted Presence = red, Observed Absence = black")
gbb<-which(Test_41==1);gbb
## [1] 1 2 3 4 6 7 11 12 15 16 17 18 19 21 23 25 26 27 28 32 33 34 35 36 37
## [26] 38 40 42 43 45 47 49 50 51 52 53 54 55 57 59 61 65 66 69 70 71 72 73 75 76
## [51] 78 80 81
geo2<-finalTest1[gbb,]
summary(geo2)
## trngcv bio14 slope rugg500s
## Min. :8.808 Min. :0.01801 Min. : 0.6517 Min. :0.2331
## 1st Qu.:9.246 1st Qu.:0.11871 1st Qu.: 2.3146 1st Qu.:1.5054
## Median :9.325 Median :0.17200 Median : 3.4502 Median :2.7764
## Mean :9.347 Mean :0.18559 Mean : 5.1285 Mean :3.4713
## 3rd Qu.:9.422 3rd Qu.:0.25342 3rd Qu.: 7.4120 3rd Qu.:5.3713
## Max. :9.889 Max. :0.42288 Max. :15.8156 Max. :9.5490
##
## pil_twim pil_twicv pil_topocv pil_slps
## Min. : 5.749 Min. :0.04324 Min. :0.00000 Min. : 0.6456
## 1st Qu.: 7.552 1st Qu.:0.11417 1st Qu.:0.00000 1st Qu.: 1.5126
## Median : 8.184 Median :0.16848 Median :0.02834 Median : 3.7289
## Mean : 8.520 Mean :0.17198 Mean :0.06683 Mean : 5.0224
## 3rd Qu.: 9.425 3rd Qu.:0.23025 3rd Qu.:0.12639 3rd Qu.: 7.5190
## Max. :11.582 Max. :0.30956 Max. :0.20161 Max. :12.7276
##
## pil_slpcv pil_elr3cv mrvbf mrrtf
## Min. :0.2188 Min. :0.04501 Min. :0.0000 Min. :0.00000
## 1st Qu.:0.3607 1st Qu.:0.12910 1st Qu.:0.0000 1st Qu.:0.00000
## Median :0.4713 Median :0.20074 Median :0.0000 Median :0.00000
## Mean :0.5312 Mean :0.22534 Mean :0.1595 Mean :0.07192
## 3rd Qu.:0.6235 3rd Qu.:0.29237 3rd Qu.:0.0000 3rd Qu.:0.00000
## Max. :1.6098 Max. :0.56449 Max. :1.7200 Max. :1.88966
##
## minfertf lf7rup hstructn geolrngaggn
## Min. :2.000 Min. :1.000 Min. :0.000 Min. : 0.0115
## 1st Qu.:2.000 1st Qu.:2.000 1st Qu.:1.043 1st Qu.: 2.5880
## Median :2.000 Median :3.000 Median :1.043 Median : 2.5880
## Mean :2.245 Mean :2.755 Mean :1.622 Mean :178.5159
## 3rd Qu.:2.000 3rd Qu.:4.000 3rd Qu.:1.043 3rd Qu.:300.0000
## Max. :3.000 Max. :6.000 Max. :5.143 Max. :900.0000
##
## elevationm wr_unrn solpawhcn slopern
## Min. :518.3 Min. : 0.00 Min. : 96.00 Min. :0.00179
## 1st Qu.:561.2 1st Qu.:17.86 1st Qu.: 96.00 1st Qu.:0.01875
## Median :568.4 Median :17.86 Median : 96.00 Median :0.03679
## Mean :586.3 Mean :16.00 Mean : 98.91 Mean :0.07684
## 3rd Qu.:610.9 3rd Qu.:17.86 3rd Qu.: 96.00 3rd Qu.:0.09517
## Max. :726.1 Max. :17.86 Max. :157.00 Max. :0.51337
##
## MIN_AGE_MA HubDist month_collection
## Min. :2454 Min. : 3.218 March : 8
## 1st Qu.:2454 1st Qu.:16.501 May : 8
## Median :2454 Median :34.110 September: 7
## Mean :2489 Mean :46.053 June : 6
## 3rd Qu.:2494 3rd Qu.:76.527 November : 6
## Max. :2597 Max. :84.554 January : 5
## (Other) :13
## UNITNAME
## Brockman Iron Formation :30
## Marra Mamba Iron Formation : 9
## Mount McRae Shale and Mount Sylvia Formation:14
## Wittenoom Formation : 0
##
##
##
## ROCKTYPE1
## sedimentary carbonate : 0
## sedimentary other chemical or biochemical:39
## sedimentary siliciclastic :14
##
##
##
##
## FORMATION
## Brockman Iron Formation :30
## Marra Mamba Iron Formation : 9
## Mount McRae Shale and Mount Sylvia Formation:14
## Wittenoom Formation : 0
##
##
##
## HubName
## anticline, exposed :21
## exposed : 6
## strike-slip, exposed, showing relative dextral displacement: 2
## syncline, exposed :24
##
##
##
## true_troglofauna LATITUDE LONGITUDE
## 0: 7 Min. :-22.64 Min. :117.1
## 1:46 1st Qu.:-22.23 1st Qu.:117.6
## Median :-22.16 Median :117.8
## Mean :-22.23 Mean :117.7
## 3rd Qu.:-22.12 3rd Qu.:117.9
## Max. :-22.09 Max. :118.0
##
points(geo2$LONGITUDE,geo2$LATITUDE,xlim = c(Min_Lg,Max_Lg),ylim = c(Min_Lt,Max_Lt),pch=5, col="red")
points(geoAu$LONGITUDE,geoAu$LATITUDE,xlim = c(115,122),ylim = c(-26,-21),pch = 4, col = rgb(0, 0, 0, 0.15))
write.csv(geo2, file = "Area1_predicted_presence_points_troglofauna_coordinates.csv", row.names = FALSE)