library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(ggplot2)
library(caret)
## Loading required package: lattice
library(randomForest)#
## randomForest 4.7-1.1
## Type rfNews() to see new features/changes/bug fixes.
##
## Attaching package: 'randomForest'
## The following object is masked from 'package:ggplot2':
##
## margin
## The following object is masked from 'package:dplyr':
##
## combine
library(pROC)
## Type 'citation("pROC")' for a citation.
##
## Attaching package: 'pROC'
## The following objects are masked from 'package:stats':
##
## cov, smooth, var
library(corrplot)
## corrplot 0.92 loaded
library(mltools)
library(rockchalk)
##
## Attaching package: 'rockchalk'
## The following object is masked from 'package:mltools':
##
## skewness
## The following object is masked from 'package:dplyr':
##
## summarize
data2 <- read.csv("Area_02_Filtered_.csv", sep=",", header= TRUE)
summary(data2)
## srain2mp bio21 bio16 slope
## Min. :0.1333 Min. :29.62 Min. :195.1 Min. : 0.08862
## 1st Qu.:0.1439 1st Qu.:29.74 1st Qu.:221.5 1st Qu.: 1.36495
## Median :0.1481 Median :29.76 Median :224.9 Median : 2.67317
## Mean :0.1485 Mean :29.75 Mean :225.8 Mean : 3.46866
## 3rd Qu.:0.1520 3rd Qu.:29.78 3rd Qu.:229.6 3rd Qu.: 4.63968
## Max. :0.1690 Max. :29.84 Max. :249.6 Max. :16.25081
## rugg500s rugg500cv pil_twicv pil_topos
## Min. :0.00849 Min. :0.00854 Min. :0.02523 Min. : 0.00
## 1st Qu.:0.69745 1st Qu.:0.06169 1st Qu.:0.08197 1st Qu.: 0.00
## Median :1.38321 Median :0.10038 Median :0.11972 Median : 0.00
## Mean :1.96571 Mean :0.12201 Mean :0.13704 Mean : 1.58
## 3rd Qu.:2.85446 3rd Qu.:0.16462 3rd Qu.:0.18162 3rd Qu.: 3.29
## Max. :9.29160 Max. :0.59943 Max. :0.45404 Max. :15.55
## pil_slps pil_slpcv nutrientsn mrvbf
## Min. : 0.04903 Min. :0.1652 Min. :1.000 Min. :0.0000
## 1st Qu.: 1.21168 1st Qu.:0.3648 1st Qu.:1.300 1st Qu.:0.0000
## Median : 3.20706 Median :0.4553 Median :1.300 Median :0.0000
## Mean : 4.27314 Mean :0.4725 Mean :1.302 Mean :0.3611
## 3rd Qu.: 6.36604 3rd Qu.:0.5608 3rd Qu.:1.300 3rd Qu.:0.0000
## Max. :20.80731 Max. :1.3978 Max. :1.600 Max. :4.7155
## mrrtf minfertf lf7rup hstructn
## Min. :0.0000 Min. :1.000 Min. :1.000 Min. : 0.0000
## 1st Qu.:0.0000 1st Qu.:2.000 1st Qu.:2.000 1st Qu.: 1.0429
## Median :0.0000 Median :2.000 Median :3.000 Median : 1.0429
## Mean :0.2375 Mean :2.034 Mean :2.889 Mean : 0.9554
## 3rd Qu.:0.0000 3rd Qu.:2.000 3rd Qu.:4.000 3rd Qu.: 1.0429
## Max. :3.5547 Max. :3.000 Max. :7.000 Max. :10.0000
## geolrngaggn geolmnaggn bdensity50n slopern
## Min. : 0.0115 Min. : 0.0058 Min. :1.247 Min. : 0.00132
## 1st Qu.: 65.5000 1st Qu.: 32.7500 1st Qu.:1.341 1st Qu.: 0.01630
## Median :300.0000 Median :2050.0000 Median :1.367 Median : 0.04050
## Mean :429.7593 Mean :1554.2860 Mean :1.351 Mean : 0.22506
## 3rd Qu.:900.0000 3rd Qu.:2650.0000 3rd Qu.:1.367 3rd Qu.: 0.14081
## Max. :900.0000 Max. :2650.0000 Max. :1.367 Max. :21.14644
## HubDist MIN_AGE_MA month_collection HubName
## Min. : 0.6774 Min. :2444 Length:1938 Length:1938
## 1st Qu.: 6.4399 1st Qu.:2454 Class :character Class :character
## Median :21.3848 Median :2454 Mode :character Mode :character
## Mean :31.3849 Mean :2508
## 3rd Qu.:58.2123 3rd Qu.:2597
## Max. :81.3633 Max. :2629
## UNITNAME ROCKTYPE1 FORMATION true_troglofauna
## Length:1938 Length:1938 Length:1938 Min. :0.0000
## Class :character Class :character Class :character 1st Qu.:0.0000
## Mode :character Mode :character Mode :character Median :1.0000
## Mean :0.6192
## 3rd Qu.:1.0000
## Max. :1.0000
## LATITUDE LONGITUDE
## Min. :-23.34 Min. :118.6
## 1st Qu.:-22.98 1st Qu.:118.9
## Median :-22.92 Median :119.0
## Mean :-22.89 Mean :119.0
## 3rd Qu.:-22.90 3rd Qu.:119.0
## Max. :-22.51 Max. :119.5
dim(data2)
## [1] 1938 30
sapply(data2,class)
## srain2mp bio21 bio16 slope
## "numeric" "numeric" "numeric" "numeric"
## rugg500s rugg500cv pil_twicv pil_topos
## "numeric" "numeric" "numeric" "numeric"
## pil_slps pil_slpcv nutrientsn mrvbf
## "numeric" "numeric" "numeric" "numeric"
## mrrtf minfertf lf7rup hstructn
## "numeric" "integer" "integer" "numeric"
## geolrngaggn geolmnaggn bdensity50n slopern
## "numeric" "numeric" "numeric" "numeric"
## HubDist MIN_AGE_MA month_collection HubName
## "numeric" "integer" "character" "character"
## UNITNAME ROCKTYPE1 FORMATION true_troglofauna
## "character" "character" "character" "integer"
## LATITUDE LONGITUDE
## "numeric" "numeric"
#Converting "true_stygofauna", "true_troglofauna" to a factor
data2$true_troglofauna <- factor(data2$true_troglofauna)
#Converting categorical variables to factor
data2$HubName <- factor(data2$HubName)
data2$FORMATION <- factor(data2$FORMATION)
data2$ROCKTYPE1 <- factor(data2$ROCKTYPE1)
data2$UNITNAME <- factor(data2$UNITNAME)
data2$month_collection <- factor(data2$month_collection)
sapply(data2,class)
## srain2mp bio21 bio16 slope
## "numeric" "numeric" "numeric" "numeric"
## rugg500s rugg500cv pil_twicv pil_topos
## "numeric" "numeric" "numeric" "numeric"
## pil_slps pil_slpcv nutrientsn mrvbf
## "numeric" "numeric" "numeric" "numeric"
## mrrtf minfertf lf7rup hstructn
## "numeric" "integer" "integer" "numeric"
## geolrngaggn geolmnaggn bdensity50n slopern
## "numeric" "numeric" "numeric" "numeric"
## HubDist MIN_AGE_MA month_collection HubName
## "numeric" "integer" "factor" "factor"
## UNITNAME ROCKTYPE1 FORMATION true_troglofauna
## "factor" "factor" "factor" "factor"
## LATITUDE LONGITUDE
## "numeric" "numeric"
summary(data2$month_collection)
## April August December February January July June March
## 297 51 91 213 51 62 226 410
## May November October September
## 252 131 103 51
data2$month_collection<-factor(data2$month_collection, levels=c("January", "February", "March","April","May","June","July","August","September","October","November","December"))
summary(data2$month_collection)
## January February March April May June July August
## 51 213 410 297 252 226 62 51
## September October November December
## 51 103 131 91
plot(data2$month_collection,main="Amostras Mensais",ylim=c(0,200),ylab="Num. de Amostras")

m1<-which(data2$month_collection == "January");length(m1)
## [1] 51
m2<-which(data2$month_collection == "February");length(m2)
## [1] 213
m3<-which(data2$month_collection == "March");length(m3)
## [1] 410
m4<-which(data2$month_collection == "April");length(m4)
## [1] 297
m5<-which(data2$month_collection == "May");length(m5)
## [1] 252
m6<-which(data2$month_collection == "June");length(m6)
## [1] 226
m7<-which(data2$month_collection == "July");length(m7)
## [1] 62
m8<-which(data2$month_collection == "August");length(m8)
## [1] 51
m9<-which(data2$month_collection == "September");length(m9)
## [1] 51
m10<-which(data2$month_collection == "October");length(m10)
## [1] 103
m11<-which(data2$month_collection == "November");length(m11)
## [1] 131
m12<-which(data2$month_collection == "December");length(m12)
## [1] 91
tm<-c(m1,m2,m3,m4,m5,m6,m7,m8,m9,m10,m11,m12);length(tm)
## [1] 1938
ltm<-c(length(m1),length(m2),length(m3),length(m4),length(m5),length(m6),length(m7),length(m8),length(m9),length(m10),length(m11),length(m12));median(ltm)
## [1] 117
set.seed(78945)
sm1<-sample(m1, 50, replace = FALSE);length(sm1);head(sm1)
## [1] 50
## [1] 1901 171 75 155 1931 160
sm2<-sample(m2, 50, replace = FALSE);length(sm2);head(sm2)
## [1] 50
## [1] 1581 1065 1020 1057 1553 1155
sm3<-sample(m3, 50, replace = FALSE);length(sm3);head(sm3)
## [1] 50
## [1] 1864 454 1846 433 418 1880
sm4<-sample(m4, 50, replace = FALSE);length(sm4);head(sm4)
## [1] 50
## [1] 1304 1607 1601 1254 353 1449
sm5<-sample(m5, 50, replace = FALSE);length(sm5);head(sm5)
## [1] 50
## [1] 1512 258 1659 1627 411 846
sm6<-sample(m6, 50, replace = FALSE);length(sm6);head(sm6)
## [1] 50
## [1] 269 646 716 761 811 934
sm7<-sample(m7, 50, replace = FALSE);length(sm7);head(sm7)
## [1] 50
## [1] 756 388 814 928 372 177
sm10<-sample(m10, 50, replace = FALSE);length(sm10);head(sm10)
## [1] 50
## [1] 43 46 41 1655 1321 1725
sm11<-sample(m11, 50, replace = FALSE);length(sm11);head(sm11)
## [1] 50
## [1] 1903 1862 1420 1821 1451 1789
sm12<-sample(m12, 50, replace = FALSE);length(sm12);head(sm12)
## [1] 50
## [1] 52 67 1337 90 130 1660
utm<-c(sm1,sm2,sm3,sm4,sm5,sm6,sm7,m8,m9,sm10,sm11,sm12);length(utm)
## [1] 602
data1<-data2[utm,];dim(data1)
## [1] 602 30
summary(data1)
## srain2mp bio21 bio16 slope
## Min. :0.1338 Min. :29.63 Min. :195.6 Min. : 0.08862
## 1st Qu.:0.1427 1st Qu.:29.74 1st Qu.:222.0 1st Qu.: 1.26266
## Median :0.1471 Median :29.76 Median :224.9 Median : 2.49078
## Mean :0.1477 Mean :29.75 Mean :226.3 Mean : 3.28377
## 3rd Qu.:0.1513 3rd Qu.:29.78 3rd Qu.:230.1 3rd Qu.: 4.34199
## Max. :0.1678 Max. :29.84 Max. :245.4 Max. :16.02798
##
## rugg500s rugg500cv pil_twicv pil_topos
## Min. :0.00849 Min. :0.00854 Min. :0.03348 Min. : 0.000
## 1st Qu.:0.58854 1st Qu.:0.06136 1st Qu.:0.08392 1st Qu.: 0.000
## Median :1.26653 Median :0.09135 Median :0.12248 Median : 0.000
## Mean :1.81579 Mean :0.11782 Mean :0.14265 Mean : 1.512
## 3rd Qu.:2.52863 3rd Qu.:0.15941 3rd Qu.:0.19717 3rd Qu.: 2.647
## Max. :9.29160 Max. :0.59943 Max. :0.45404 Max. :10.806
##
## pil_slps pil_slpcv nutrientsn mrvbf
## Min. : 0.04903 Min. :0.1885 Min. :1.000 Min. :0.0000
## 1st Qu.: 1.15162 1st Qu.:0.3505 1st Qu.:1.300 1st Qu.:0.0000
## Median : 3.14211 Median :0.4586 Median :1.300 Median :0.0000
## Mean : 4.05948 Mean :0.4698 Mean :1.297 Mean :0.3827
## 3rd Qu.: 6.15380 3rd Qu.:0.5545 3rd Qu.:1.300 3rd Qu.:0.0000
## Max. :17.00972 Max. :1.3978 Max. :1.600 Max. :4.6129
##
## mrrtf minfertf lf7rup hstructn
## Min. :0.0000 Min. :1.00 Min. :1.000 Min. : 0.0000
## 1st Qu.:0.0000 1st Qu.:2.00 1st Qu.:2.000 1st Qu.: 0.4286
## Median :0.0000 Median :2.00 Median :3.000 Median : 1.0429
## Mean :0.3018 Mean :2.04 Mean :3.027 Mean : 0.9961
## 3rd Qu.:0.0000 3rd Qu.:2.00 3rd Qu.:4.000 3rd Qu.: 1.0429
## Max. :3.5547 Max. :3.00 Max. :7.000 Max. :10.0000
##
## geolrngaggn geolmnaggn bdensity50n slopern
## Min. : 0.0115 Min. : 0.0058 Min. :1.247 Min. : 0.00132
## 1st Qu.: 2.5880 1st Qu.: 1.2940 1st Qu.:1.341 1st Qu.: 0.01670
## Median :300.0000 Median :2050.0000 Median :1.367 Median : 0.04383
## Mean :385.4664 Mean :1411.4505 Mean :1.349 Mean : 0.31208
## 3rd Qu.:900.0000 3rd Qu.:2650.0000 3rd Qu.:1.367 3rd Qu.: 0.16815
## Max. :900.0000 Max. :2650.0000 Max. :1.367 Max. :21.14644
##
## HubDist MIN_AGE_MA month_collection
## Min. : 0.8949 Min. :2453 August : 51
## 1st Qu.: 6.4556 1st Qu.:2454 September: 51
## Median :21.3475 Median :2454 January : 50
## Mean :30.9694 Mean :2508 February : 50
## 3rd Qu.:57.8053 3rd Qu.:2597 March : 50
## Max. :81.1207 Max. :2597 April : 50
## (Other) :300
## HubName
## anticline, exposed : 70
## concealed : 1
## exposed : 73
## normal, exposed, tick on downthrown side: 78
## overturned syncline, exposed : 59
## syncline, exposed :321
##
## UNITNAME
## Brockman Iron Formation :307
## Jeerinah Formation : 0
## Marra Mamba Iron Formation :205
## Mount McRae Shale and Mount Sylvia Formation: 9
## Weeli Wolli Formation : 26
## Wittenoom Formation : 55
## Woongarra Rhyolite : 0
## ROCKTYPE1
## igneous felsic volcanic : 0
## sedimentary carbonate : 55
## sedimentary other chemical or biochemical:538
## sedimentary siliciclastic : 9
##
##
##
## FORMATION true_troglofauna
## Brockman Iron Formation :307 0:190
## Jeerinah Formation : 0 1:412
## Marra Mamba Iron Formation :205
## Mount McRae Shale and Mount Sylvia Formation: 9
## Weeli Wolli Formation : 26
## Wittenoom Formation : 55
## Woongarra Rhyolite : 0
## LATITUDE LONGITUDE
## Min. :-23.33 Min. :118.6
## 1st Qu.:-22.96 1st Qu.:118.9
## Median :-22.92 Median :119.0
## Mean :-22.87 Mean :119.0
## 3rd Qu.:-22.76 3rd Qu.:119.0
## Max. :-22.51 Max. :119.4
##
data2<-data1
summary(data2$month_collection)
## January February March April May June July August
## 50 50 50 50 50 50 50 51
## September October November December
## 51 50 50 50
plot(data2$month_collection,main="Amostras Mensais",ylim=c(0,200),ylab="Num. de Amostras")

levels(data2$true_troglofauna);levels(data2$HubName);levels(data2$FORMATION);levels(data2$ROCKTYPE1);levels(data2$UNITNAME);levels(data2$month_collection)
## [1] "0" "1"
## [1] "anticline, exposed"
## [2] "concealed"
## [3] "exposed"
## [4] "normal, exposed, tick on downthrown side"
## [5] "overturned syncline, exposed"
## [6] "syncline, exposed"
## [1] "Brockman Iron Formation"
## [2] "Jeerinah Formation"
## [3] "Marra Mamba Iron Formation"
## [4] "Mount McRae Shale and Mount Sylvia Formation"
## [5] "Weeli Wolli Formation"
## [6] "Wittenoom Formation"
## [7] "Woongarra Rhyolite"
## [1] "igneous felsic volcanic"
## [2] "sedimentary carbonate"
## [3] "sedimentary other chemical or biochemical"
## [4] "sedimentary siliciclastic"
## [1] "Brockman Iron Formation"
## [2] "Jeerinah Formation"
## [3] "Marra Mamba Iron Formation"
## [4] "Mount McRae Shale and Mount Sylvia Formation"
## [5] "Weeli Wolli Formation"
## [6] "Wittenoom Formation"
## [7] "Woongarra Rhyolite"
## [1] "January" "February" "March" "April" "May" "June"
## [7] "July" "August" "September" "October" "November" "December"
summary(data2)
## srain2mp bio21 bio16 slope
## Min. :0.1338 Min. :29.63 Min. :195.6 Min. : 0.08862
## 1st Qu.:0.1427 1st Qu.:29.74 1st Qu.:222.0 1st Qu.: 1.26266
## Median :0.1471 Median :29.76 Median :224.9 Median : 2.49078
## Mean :0.1477 Mean :29.75 Mean :226.3 Mean : 3.28377
## 3rd Qu.:0.1513 3rd Qu.:29.78 3rd Qu.:230.1 3rd Qu.: 4.34199
## Max. :0.1678 Max. :29.84 Max. :245.4 Max. :16.02798
##
## rugg500s rugg500cv pil_twicv pil_topos
## Min. :0.00849 Min. :0.00854 Min. :0.03348 Min. : 0.000
## 1st Qu.:0.58854 1st Qu.:0.06136 1st Qu.:0.08392 1st Qu.: 0.000
## Median :1.26653 Median :0.09135 Median :0.12248 Median : 0.000
## Mean :1.81579 Mean :0.11782 Mean :0.14265 Mean : 1.512
## 3rd Qu.:2.52863 3rd Qu.:0.15941 3rd Qu.:0.19717 3rd Qu.: 2.647
## Max. :9.29160 Max. :0.59943 Max. :0.45404 Max. :10.806
##
## pil_slps pil_slpcv nutrientsn mrvbf
## Min. : 0.04903 Min. :0.1885 Min. :1.000 Min. :0.0000
## 1st Qu.: 1.15162 1st Qu.:0.3505 1st Qu.:1.300 1st Qu.:0.0000
## Median : 3.14211 Median :0.4586 Median :1.300 Median :0.0000
## Mean : 4.05948 Mean :0.4698 Mean :1.297 Mean :0.3827
## 3rd Qu.: 6.15380 3rd Qu.:0.5545 3rd Qu.:1.300 3rd Qu.:0.0000
## Max. :17.00972 Max. :1.3978 Max. :1.600 Max. :4.6129
##
## mrrtf minfertf lf7rup hstructn
## Min. :0.0000 Min. :1.00 Min. :1.000 Min. : 0.0000
## 1st Qu.:0.0000 1st Qu.:2.00 1st Qu.:2.000 1st Qu.: 0.4286
## Median :0.0000 Median :2.00 Median :3.000 Median : 1.0429
## Mean :0.3018 Mean :2.04 Mean :3.027 Mean : 0.9961
## 3rd Qu.:0.0000 3rd Qu.:2.00 3rd Qu.:4.000 3rd Qu.: 1.0429
## Max. :3.5547 Max. :3.00 Max. :7.000 Max. :10.0000
##
## geolrngaggn geolmnaggn bdensity50n slopern
## Min. : 0.0115 Min. : 0.0058 Min. :1.247 Min. : 0.00132
## 1st Qu.: 2.5880 1st Qu.: 1.2940 1st Qu.:1.341 1st Qu.: 0.01670
## Median :300.0000 Median :2050.0000 Median :1.367 Median : 0.04383
## Mean :385.4664 Mean :1411.4505 Mean :1.349 Mean : 0.31208
## 3rd Qu.:900.0000 3rd Qu.:2650.0000 3rd Qu.:1.367 3rd Qu.: 0.16815
## Max. :900.0000 Max. :2650.0000 Max. :1.367 Max. :21.14644
##
## HubDist MIN_AGE_MA month_collection
## Min. : 0.8949 Min. :2453 August : 51
## 1st Qu.: 6.4556 1st Qu.:2454 September: 51
## Median :21.3475 Median :2454 January : 50
## Mean :30.9694 Mean :2508 February : 50
## 3rd Qu.:57.8053 3rd Qu.:2597 March : 50
## Max. :81.1207 Max. :2597 April : 50
## (Other) :300
## HubName
## anticline, exposed : 70
## concealed : 1
## exposed : 73
## normal, exposed, tick on downthrown side: 78
## overturned syncline, exposed : 59
## syncline, exposed :321
##
## UNITNAME
## Brockman Iron Formation :307
## Jeerinah Formation : 0
## Marra Mamba Iron Formation :205
## Mount McRae Shale and Mount Sylvia Formation: 9
## Weeli Wolli Formation : 26
## Wittenoom Formation : 55
## Woongarra Rhyolite : 0
## ROCKTYPE1
## igneous felsic volcanic : 0
## sedimentary carbonate : 55
## sedimentary other chemical or biochemical:538
## sedimentary siliciclastic : 9
##
##
##
## FORMATION true_troglofauna
## Brockman Iron Formation :307 0:190
## Jeerinah Formation : 0 1:412
## Marra Mamba Iron Formation :205
## Mount McRae Shale and Mount Sylvia Formation: 9
## Weeli Wolli Formation : 26
## Wittenoom Formation : 55
## Woongarra Rhyolite : 0
## LATITUDE LONGITUDE
## Min. :-23.33 Min. :118.6
## 1st Qu.:-22.96 1st Qu.:118.9
## Median :-22.92 Median :119.0
## Mean :-22.87 Mean :119.0
## 3rd Qu.:-22.76 3rd Qu.:119.0
## Max. :-22.51 Max. :119.4
##
dim(data2)
## [1] 602 30
instanceconvert <- colnames(data2[, -c((ncol(data2) - 8):ncol(data2))])
instanceconvert
## [1] "srain2mp" "bio21" "bio16" "slope" "rugg500s"
## [6] "rugg500cv" "pil_twicv" "pil_topos" "pil_slps" "pil_slpcv"
## [11] "nutrientsn" "mrvbf" "mrrtf" "minfertf" "lf7rup"
## [16] "hstructn" "geolrngaggn" "geolmnaggn" "bdensity50n" "slopern"
## [21] "HubDist"
for (i in instanceconvert)
{
data2[[i]] <- as.numeric(data2[[i]])
}
sapply(data2,class)
## srain2mp bio21 bio16 slope
## "numeric" "numeric" "numeric" "numeric"
## rugg500s rugg500cv pil_twicv pil_topos
## "numeric" "numeric" "numeric" "numeric"
## pil_slps pil_slpcv nutrientsn mrvbf
## "numeric" "numeric" "numeric" "numeric"
## mrrtf minfertf lf7rup hstructn
## "numeric" "numeric" "numeric" "numeric"
## geolrngaggn geolmnaggn bdensity50n slopern
## "numeric" "numeric" "numeric" "numeric"
## HubDist MIN_AGE_MA month_collection HubName
## "numeric" "integer" "factor" "factor"
## UNITNAME ROCKTYPE1 FORMATION true_troglofauna
## "factor" "factor" "factor" "factor"
## LATITUDE LONGITUDE
## "numeric" "numeric"
names(data2)
## [1] "srain2mp" "bio21" "bio16" "slope"
## [5] "rugg500s" "rugg500cv" "pil_twicv" "pil_topos"
## [9] "pil_slps" "pil_slpcv" "nutrientsn" "mrvbf"
## [13] "mrrtf" "minfertf" "lf7rup" "hstructn"
## [17] "geolrngaggn" "geolmnaggn" "bdensity50n" "slopern"
## [21] "HubDist" "MIN_AGE_MA" "month_collection" "HubName"
## [25] "UNITNAME" "ROCKTYPE1" "FORMATION" "true_troglofauna"
## [29] "LATITUDE" "LONGITUDE"
data2$true_troglofauna <- factor(data2$true_troglofauna)
data2$HubName <- factor(data2$HubName)
data2$FORMATION <- factor(data2$FORMATION)
data2$ROCKTYPE1 <- factor(data2$ROCKTYPE1)
data2$UNITNAME <- factor(data2$UNITNAME)
data2$month_collection <- factor(data2$month_collection)
sapply(data2,class)
## srain2mp bio21 bio16 slope
## "numeric" "numeric" "numeric" "numeric"
## rugg500s rugg500cv pil_twicv pil_topos
## "numeric" "numeric" "numeric" "numeric"
## pil_slps pil_slpcv nutrientsn mrvbf
## "numeric" "numeric" "numeric" "numeric"
## mrrtf minfertf lf7rup hstructn
## "numeric" "numeric" "numeric" "numeric"
## geolrngaggn geolmnaggn bdensity50n slopern
## "numeric" "numeric" "numeric" "numeric"
## HubDist MIN_AGE_MA month_collection HubName
## "numeric" "integer" "factor" "factor"
## UNITNAME ROCKTYPE1 FORMATION true_troglofauna
## "factor" "factor" "factor" "factor"
## LATITUDE LONGITUDE
## "numeric" "numeric"
levels(data2$true_troglofauna);levels(data2$HubName);levels(data2$FORMATION);levels(data2$ROCKTYPE1);levels(data2$UNITNAME);levels(data2$month_collection)
## [1] "0" "1"
## [1] "anticline, exposed"
## [2] "concealed"
## [3] "exposed"
## [4] "normal, exposed, tick on downthrown side"
## [5] "overturned syncline, exposed"
## [6] "syncline, exposed"
## [1] "Brockman Iron Formation"
## [2] "Marra Mamba Iron Formation"
## [3] "Mount McRae Shale and Mount Sylvia Formation"
## [4] "Weeli Wolli Formation"
## [5] "Wittenoom Formation"
## [1] "sedimentary carbonate"
## [2] "sedimentary other chemical or biochemical"
## [3] "sedimentary siliciclastic"
## [1] "Brockman Iron Formation"
## [2] "Marra Mamba Iron Formation"
## [3] "Mount McRae Shale and Mount Sylvia Formation"
## [4] "Weeli Wolli Formation"
## [5] "Wittenoom Formation"
## [1] "January" "February" "March" "April" "May" "June"
## [7] "July" "August" "September" "October" "November" "December"
# Get column names with their indexes
column_indexes <- seq_along(names(data2))
# Display the column names and their corresponding indexes
column_indexes_named <- setNames(column_indexes, names(data2))
print(column_indexes_named)
## srain2mp bio21 bio16 slope
## 1 2 3 4
## rugg500s rugg500cv pil_twicv pil_topos
## 5 6 7 8
## pil_slps pil_slpcv nutrientsn mrvbf
## 9 10 11 12
## mrrtf minfertf lf7rup hstructn
## 13 14 15 16
## geolrngaggn geolmnaggn bdensity50n slopern
## 17 18 19 20
## HubDist MIN_AGE_MA month_collection HubName
## 21 22 23 24
## UNITNAME ROCKTYPE1 FORMATION true_troglofauna
## 25 26 27 28
## LATITUDE LONGITUDE
## 29 30
sapply(data2,class)
## srain2mp bio21 bio16 slope
## "numeric" "numeric" "numeric" "numeric"
## rugg500s rugg500cv pil_twicv pil_topos
## "numeric" "numeric" "numeric" "numeric"
## pil_slps pil_slpcv nutrientsn mrvbf
## "numeric" "numeric" "numeric" "numeric"
## mrrtf minfertf lf7rup hstructn
## "numeric" "numeric" "numeric" "numeric"
## geolrngaggn geolmnaggn bdensity50n slopern
## "numeric" "numeric" "numeric" "numeric"
## HubDist MIN_AGE_MA month_collection HubName
## "numeric" "integer" "factor" "factor"
## UNITNAME ROCKTYPE1 FORMATION true_troglofauna
## "factor" "factor" "factor" "factor"
## LATITUDE LONGITUDE
## "numeric" "numeric"
instanceconvert <- colnames(data2[, -c((ncol(data2) - 7):ncol(data2))])
for (i in instanceconvert)
{
data2[[i]] <- as.numeric(data2[[i]])
}
sapply(data2,class)
## srain2mp bio21 bio16 slope
## "numeric" "numeric" "numeric" "numeric"
## rugg500s rugg500cv pil_twicv pil_topos
## "numeric" "numeric" "numeric" "numeric"
## pil_slps pil_slpcv nutrientsn mrvbf
## "numeric" "numeric" "numeric" "numeric"
## mrrtf minfertf lf7rup hstructn
## "numeric" "numeric" "numeric" "numeric"
## geolrngaggn geolmnaggn bdensity50n slopern
## "numeric" "numeric" "numeric" "numeric"
## HubDist MIN_AGE_MA month_collection HubName
## "numeric" "numeric" "factor" "factor"
## UNITNAME ROCKTYPE1 FORMATION true_troglofauna
## "factor" "factor" "factor" "factor"
## LATITUDE LONGITUDE
## "numeric" "numeric"
str(data2)
## 'data.frame': 602 obs. of 30 variables:
## $ srain2mp : num 0.138 0.147 0.148 0.146 0.137 ...
## $ bio21 : num 29.7 29.8 29.8 29.8 29.7 ...
## $ bio16 : num 236 225 225 224 234 ...
## $ slope : num 1.51 2.26 1.78 1.39 5.37 ...
## $ rugg500s : num 2.59 1.697 1.032 1.074 0.686 ...
## $ rugg500cv : num 0.1091 0.0935 0.0423 0.0471 0.0527 ...
## $ pil_twicv : num 0.1524 0.1803 0.0521 0.2545 0.2685 ...
## $ pil_topos : num 4.94 9.34 1.55 0 0 ...
## $ pil_slps : num 12.83 6.84 9.14 6.63 4.02 ...
## $ pil_slpcv : num 0.646 0.865 0.45 0.455 0.415 ...
## $ nutrientsn : num 1.1 1.3 1.3 1.3 1.1 1.3 1.3 1.3 1.3 1.3 ...
## $ mrvbf : num 0 0 0 0.661 0 ...
## $ mrrtf : num 0.745 0 0 0 0 ...
## $ minfertf : num 2 2 2 2 2 2 2 2 2 2 ...
## $ lf7rup : num 3 3 3 7 1 3 6 3 7 7 ...
## $ hstructn : num 0.429 1.043 1.043 1.043 0.429 ...
## $ geolrngaggn : num 65.5 2.59 2.59 300 900 ...
## $ geolmnaggn : num 32.75 1.29 1.29 2650 2050 ...
## $ bdensity50n : num 1.34 1.37 1.37 1.37 1.34 ...
## $ slopern : num 0.0186 0.0898 0.028 0.0107 0.0184 ...
## $ HubDist : num 57.23 13.63 9.26 2.71 61.12 ...
## $ MIN_AGE_MA : num 2454 2597 2597 2597 2454 ...
## $ month_collection: Factor w/ 12 levels "January","February",..: 1 1 1 1 1 1 1 1 1 1 ...
## $ HubName : Factor w/ 6 levels "anticline, exposed",..: 6 5 5 4 6 5 4 5 4 4 ...
## $ UNITNAME : Factor w/ 5 levels "Brockman Iron Formation",..: 1 2 2 2 1 2 2 2 2 2 ...
## $ ROCKTYPE1 : Factor w/ 3 levels "sedimentary carbonate",..: 2 2 2 2 2 2 2 2 2 2 ...
## $ FORMATION : Factor w/ 5 levels "Brockman Iron Formation",..: 1 2 2 2 1 2 2 2 2 2 ...
## $ true_troglofauna: Factor w/ 2 levels "0","1": 2 1 1 1 1 1 2 1 2 2 ...
## $ LATITUDE : num -22.6 -22.9 -22.9 -22.9 -22.6 ...
## $ LONGITUDE : num 119 119 119 119 119 ...
dim(data2)
## [1] 602 30
dim(data2)
## [1] 602 30
num_rows <- 409
# Randomly select row indices
selected_rows <- sample(nrow(data2), num_rows)
# Create a new dataframe with randomly selected rows
data2 <- data2[selected_rows, ]
dim(data2)
## [1] 409 30
tg <- which(data2$true_troglofauna == 1)
tgg <- data2[tg, "true_troglofauna"]
tgg <- length(tgg)
fg <- which(data2$true_troglofauna == 0)
fgg <- data2[fg, "true_troglofauna"]
fgg <- length(fgg)
# Data for pie chart
plot_num_ident <- data.frame(Absence = 100 * fgg / (tgg + fgg),
Presence = 100 * tgg / (tgg + fgg))
# Convert data to a numeric vector
pie_data <- as.numeric(plot_num_ident)
# Labels for the pie chart (rounded to 1 decimal place)
labels <- c(paste0("Absent\n", round(pie_data[1], 1), "%"),
paste0("Present\n", round(pie_data[2], 1), "%"))
# Create pie chart
pie(pie_data,
labels = NA, # Remove default labels
main = "AREa 2Percentage of Presence and Absence of Troglobites",
col = c("orange", "lightblue"))
# Calculate the midpoints of each pie slice for label positioning
pie_slices <- cumsum(pie_data) - pie_data / 2
# Add the labels inside the pie chart
text(x = 0.5 * cos(2 * pi * pie_slices / sum(pie_data)),
y = 0.5 * sin(2 * pi * pie_slices / sum(pie_data)),
labels = labels, cex = 1.5)

selectcol_Data2 <- data2[, -c((ncol(data2) - 7):ncol(data2))]
dim(selectcol_Data2)
## [1] 409 22
names(selectcol_Data2)
## [1] "srain2mp" "bio21" "bio16" "slope" "rugg500s"
## [6] "rugg500cv" "pil_twicv" "pil_topos" "pil_slps" "pil_slpcv"
## [11] "nutrientsn" "mrvbf" "mrrtf" "minfertf" "lf7rup"
## [16] "hstructn" "geolrngaggn" "geolmnaggn" "bdensity50n" "slopern"
## [21] "HubDist" "MIN_AGE_MA"
correlations<-cor(selectcol_Data2)
correlations
## srain2mp bio21 bio16 slope rugg500s
## srain2mp 1.00000000 -0.036188741 -0.11293552 0.54725461 0.43096627
## bio21 -0.03618874 1.000000000 -0.09471667 0.09086516 0.14903786
## bio16 -0.11293552 -0.094716666 1.00000000 0.08512271 0.01039644
## slope 0.54725461 0.090865157 0.08512271 1.00000000 0.50274397
## rugg500s 0.43096627 0.149037862 0.01039644 0.50274397 1.00000000
## rugg500cv -0.05521694 0.142342905 -0.19481648 -0.05134236 0.53816608
## pil_twicv -0.10610188 -0.088439495 -0.13223566 -0.06336495 -0.15957223
## pil_topos -0.02813466 0.003698451 -0.14715317 -0.05420059 0.22185299
## pil_slps 0.40727177 0.080909640 0.13681881 0.59729931 0.44226310
## pil_slpcv -0.03049940 -0.049269756 0.05470746 -0.06979420 0.11825844
## nutrientsn 0.07581793 0.637431868 -0.01803084 -0.09426969 -0.05300757
## mrvbf -0.25628774 0.089807085 0.18820731 -0.39123049 -0.29679702
## mrrtf -0.21645723 -0.187961770 0.16257424 -0.38825647 -0.34618304
## minfertf -0.04919400 -0.008360016 -0.08319762 -0.05801783 -0.03667740
## lf7rup -0.36930933 -0.054452098 0.00722027 -0.47994892 -0.31732814
## hstructn 0.05863203 0.131762943 0.16061571 -0.02986026 -0.03847043
## geolrngaggn 0.56971300 0.141930684 0.05306587 0.58810194 0.43125958
## geolmnaggn 0.45787624 0.296349683 -0.28657359 0.32073719 0.22879221
## bdensity50n 0.43749936 -0.081614778 -0.30861322 0.35403693 0.29659908
## slopern -0.12242657 -0.019422607 0.16086261 -0.17243734 -0.15379213
## HubDist 0.10505023 -0.311245192 0.51459314 0.29078095 0.20611131
## MIN_AGE_MA -0.01033253 0.262242485 -0.53715483 -0.20484326 -0.13715325
## rugg500cv pil_twicv pil_topos pil_slps pil_slpcv
## srain2mp -0.055216936 -0.106101880 -0.028134656 0.40727177 -0.030499399
## bio21 0.142342905 -0.088439495 0.003698451 0.08090964 -0.049269756
## bio16 -0.194816476 -0.132235658 -0.147153167 0.13681881 0.054707458
## slope -0.051342363 -0.063364947 -0.054200587 0.59729931 -0.069794199
## rugg500s 0.538166078 -0.159572228 0.221852987 0.44226310 0.118258440
## rugg500cv 1.000000000 -0.023195908 0.209185139 -0.22647460 0.070197071
## pil_twicv -0.023195908 1.000000000 0.016965288 0.03319852 0.170672618
## pil_topos 0.209185139 0.016965288 1.000000000 0.05093714 0.308686706
## pil_slps -0.226474600 0.033198518 0.050937142 1.00000000 0.264584461
## pil_slpcv 0.070197071 0.170672618 0.308686706 0.26458446 1.000000000
## nutrientsn 0.063179128 -0.030021677 -0.011206861 -0.14076750 -0.035279010
## mrvbf -0.056713064 0.100888841 -0.029218016 -0.29035192 0.124925800
## mrrtf -0.186484301 -0.217884507 -0.133607959 -0.32691431 0.004603624
## minfertf -0.032750908 -0.020742167 0.043266417 -0.09522095 0.039238286
## lf7rup -0.002415671 0.211608640 0.033705334 -0.33387843 0.065881139
## hstructn 0.019610056 -0.044422361 -0.047133726 -0.03142007 0.050759651
## geolrngaggn -0.102544186 -0.041566029 -0.075558714 0.58248262 0.006146993
## geolmnaggn -0.066000777 0.019194504 -0.070780400 0.32312221 -0.118829493
## bdensity50n -0.039331028 0.055331652 0.107651689 0.35952526 -0.024261598
## slopern -0.026151987 -0.008662173 -0.086047821 -0.17548406 0.118882346
## HubDist -0.056583116 0.077693347 -0.135862654 0.29408277 0.057877154
## MIN_AGE_MA 0.028028857 0.101908570 0.129697849 -0.19812276 -0.145880579
## nutrientsn mrvbf mrrtf minfertf lf7rup
## srain2mp 0.07581793 -0.256287743 -0.216457234 -0.049194002 -0.369309333
## bio21 0.63743187 0.089807085 -0.187961770 -0.008360016 -0.054452098
## bio16 -0.01803084 0.188207308 0.162574238 -0.083197622 0.007220270
## slope -0.09426969 -0.391230489 -0.388256472 -0.058017828 -0.479948921
## rugg500s -0.05300757 -0.296797021 -0.346183035 -0.036677401 -0.317328137
## rugg500cv 0.06317913 -0.056713064 -0.186484301 -0.032750908 -0.002415671
## pil_twicv -0.03002168 0.100888841 -0.217884507 -0.020742167 0.211608640
## pil_topos -0.01120686 -0.029218016 -0.133607959 0.043266417 0.033705334
## pil_slps -0.14076750 -0.290351919 -0.326914312 -0.095220949 -0.333878429
## pil_slpcv -0.03527901 0.124925800 0.004603624 0.039238286 0.065881139
## nutrientsn 1.00000000 0.302761679 0.062028636 -0.039203433 0.160408509
## mrvbf 0.30276168 1.000000000 0.070381347 -0.003802492 0.538132530
## mrrtf 0.06202864 0.070381347 1.000000000 0.157183925 0.122849752
## minfertf -0.03920343 -0.003802492 0.157183925 1.000000000 -0.023185959
## lf7rup 0.16040851 0.538132530 0.122849752 -0.023185959 1.000000000
## hstructn 0.04157830 0.405557629 -0.045078431 -0.006064680 0.212538506
## geolrngaggn -0.03277473 -0.282697514 -0.319438714 -0.069717342 -0.383998647
## geolmnaggn 0.07558009 -0.315430328 -0.319160720 -0.095200819 -0.339086028
## bdensity50n -0.54424628 -0.424117111 -0.403816175 0.052577887 -0.304565962
## slopern 0.04300266 0.457099194 0.077395936 0.012417176 0.269053842
## HubDist -0.24388350 0.002259312 -0.073614373 -0.049744858 -0.039387080
## MIN_AGE_MA 0.17758264 -0.108641720 -0.084136452 -0.057131199 0.083285368
## hstructn geolrngaggn geolmnaggn bdensity50n slopern
## srain2mp 0.058632026 0.569713002 0.457876241 0.437499365 -0.122426567
## bio21 0.131762943 0.141930684 0.296349683 -0.081614778 -0.019422607
## bio16 0.160615706 0.053065872 -0.286573593 -0.308613216 0.160862614
## slope -0.029860259 0.588101944 0.320737193 0.354036931 -0.172437342
## rugg500s -0.038470425 0.431259576 0.228792212 0.296599077 -0.153792133
## rugg500cv 0.019610056 -0.102544186 -0.066000777 -0.039331028 -0.026151987
## pil_twicv -0.044422361 -0.041566029 0.019194504 0.055331652 -0.008662173
## pil_topos -0.047133726 -0.075558714 -0.070780400 0.107651689 -0.086047821
## pil_slps -0.031420066 0.582482621 0.323122208 0.359525258 -0.175484056
## pil_slpcv 0.050759651 0.006146993 -0.118829493 -0.024261598 0.118882346
## nutrientsn 0.041578302 -0.032774730 0.075580087 -0.544246284 0.043002661
## mrvbf 0.405557629 -0.282697514 -0.315430328 -0.424117111 0.457099194
## mrrtf -0.045078431 -0.319438714 -0.319160720 -0.403816175 0.077395936
## minfertf -0.006064680 -0.069717342 -0.095200819 0.052577887 0.012417176
## lf7rup 0.212538506 -0.383998647 -0.339086028 -0.304565962 0.269053842
## hstructn 1.000000000 0.015917490 0.006829071 -0.001219126 0.632584576
## geolrngaggn 0.015917490 1.000000000 0.639303189 0.389034758 -0.151960908
## geolmnaggn 0.006829071 0.639303189 1.000000000 0.434440924 -0.191605703
## bdensity50n -0.001219126 0.389034758 0.434440924 1.000000000 -0.242365677
## slopern 0.632584576 -0.151960908 -0.191605703 -0.242365677 1.000000000
## HubDist -0.025128202 0.365678291 -0.157274111 0.011530038 -0.010031043
## MIN_AGE_MA -0.023834271 -0.354316165 0.291254377 0.161235109 -0.102443205
## HubDist MIN_AGE_MA
## srain2mp 0.105050226 -0.01033253
## bio21 -0.311245192 0.26224249
## bio16 0.514593140 -0.53715483
## slope 0.290780950 -0.20484326
## rugg500s 0.206111311 -0.13715325
## rugg500cv -0.056583116 0.02802886
## pil_twicv 0.077693347 0.10190857
## pil_topos -0.135862654 0.12969785
## pil_slps 0.294082766 -0.19812276
## pil_slpcv 0.057877154 -0.14588058
## nutrientsn -0.243883502 0.17758264
## mrvbf 0.002259312 -0.10864172
## mrrtf -0.073614373 -0.08413645
## minfertf -0.049744858 -0.05713120
## lf7rup -0.039387080 0.08328537
## hstructn -0.025128202 -0.02383427
## geolrngaggn 0.365678291 -0.35431617
## geolmnaggn -0.157274111 0.29125438
## bdensity50n 0.011530038 0.16123511
## slopern -0.010031043 -0.10244320
## HubDist 1.000000000 -0.67278580
## MIN_AGE_MA -0.672785797 1.00000000
corrplot(correlations, method = "circle", tl.cex = 1.5, cl.cex = 1.5)

# Corrplot (upper triangle only) with larger font
corrplot(correlations, method = "circle", type = "upper", tl.cex = 1.5, cl.cex = 1.5)

dim(data2)
## [1] 409 30
# Create a density plot for each column
par(mfrow = c(2, 3)) # Arrange plots in a 2x3 grid
for (col in seq_along(selectcol_Data2)) {
plot(density(selectcol_Data2[, col]), main = colnames(selectcol_Data2)[col])
}




set.seed(78945)
Index1 <- createDataPartition(data2$true_troglofauna, p=0.5, list=FALSE)
data_train <- data2[ Index1,]
data_prov <- data2[-Index1,]
dim(data_train)
## [1] 205 30
summary(data_train)
## srain2mp bio21 bio16 slope
## Min. :0.1344 Min. :29.63 Min. :195.6 Min. : 0.08862
## 1st Qu.:0.1419 1st Qu.:29.73 1st Qu.:223.0 1st Qu.: 1.04867
## Median :0.1470 Median :29.76 Median :225.8 Median : 2.15997
## Mean :0.1475 Mean :29.75 Mean :227.3 Mean : 3.13080
## 3rd Qu.:0.1512 3rd Qu.:29.78 3rd Qu.:231.3 3rd Qu.: 4.45382
## Max. :0.1678 Max. :29.84 Max. :244.2 Max. :12.80595
##
## rugg500s rugg500cv pil_twicv pil_topos
## Min. :0.00942 Min. :0.00854 Min. :0.03348 Min. : 0.000
## 1st Qu.:0.54119 1st Qu.:0.05971 1st Qu.:0.07750 1st Qu.: 0.000
## Median :1.32722 Median :0.08920 Median :0.11607 Median : 0.000
## Mean :1.73832 Mean :0.11282 Mean :0.13714 Mean : 1.366
## 3rd Qu.:2.45758 3rd Qu.:0.15902 3rd Qu.:0.18838 3rd Qu.: 2.077
## Max. :7.79036 Max. :0.59943 Max. :0.45404 Max. :10.435
##
## pil_slps pil_slpcv nutrientsn mrvbf
## Min. : 0.04903 Min. :0.2086 Min. :1.000 Min. :0.0000
## 1st Qu.: 1.15748 1st Qu.:0.3447 1st Qu.:1.300 1st Qu.:0.0000
## Median : 3.28653 Median :0.4470 Median :1.300 Median :0.0000
## Mean : 4.16924 Mean :0.4728 Mean :1.282 Mean :0.4067
## 3rd Qu.: 6.29694 3rd Qu.:0.5672 3rd Qu.:1.300 3rd Qu.:0.5784
## Max. :16.74641 Max. :1.3978 Max. :1.600 Max. :4.5904
##
## mrrtf minfertf lf7rup hstructn
## Min. :0.0000 Min. :1.000 Min. :1.000 Min. : 0.0000
## 1st Qu.:0.0000 1st Qu.:2.000 1st Qu.:2.000 1st Qu.: 0.4286
## Median :0.0000 Median :2.000 Median :3.000 Median : 1.0429
## Mean :0.3521 Mean :2.024 Mean :3.044 Mean : 0.9131
## 3rd Qu.:0.5762 3rd Qu.:2.000 3rd Qu.:4.000 3rd Qu.: 1.0429
## Max. :3.5547 Max. :3.000 Max. :7.000 Max. :10.0000
##
## geolrngaggn geolmnaggn bdensity50n slopern
## Min. : 0.0115 Min. : 0.0058 Min. :1.247 Min. : 0.00250
## 1st Qu.: 65.5000 1st Qu.: 32.7500 1st Qu.:1.341 1st Qu.: 0.01579
## Median :300.0000 Median :2050.0000 Median :1.367 Median : 0.04265
## Mean :404.1882 Mean :1410.5522 Mean :1.347 Mean : 0.33502
## 3rd Qu.:900.0000 3rd Qu.:2650.0000 3rd Qu.:1.367 3rd Qu.: 0.17062
## Max. :900.0000 Max. :2650.0000 Max. :1.367 Max. :21.14644
##
## HubDist MIN_AGE_MA month_collection
## Min. : 1.237 Min. :2453 October :22
## 1st Qu.: 8.994 1st Qu.:2454 April :20
## Median :31.730 Median :2454 July :20
## Mean :34.802 Mean :2498 March :19
## 3rd Qu.:60.020 3rd Qu.:2597 November:18
## Max. :79.744 Max. :2597 May :16
## (Other) :90
## HubName
## anticline, exposed : 19
## concealed : 0
## exposed : 18
## normal, exposed, tick on downthrown side: 28
## overturned syncline, exposed : 21
## syncline, exposed :119
##
## UNITNAME
## Brockman Iron Formation :120
## Marra Mamba Iron Formation : 56
## Mount McRae Shale and Mount Sylvia Formation: 2
## Weeli Wolli Formation : 7
## Wittenoom Formation : 20
##
##
## ROCKTYPE1
## sedimentary carbonate : 20
## sedimentary other chemical or biochemical:183
## sedimentary siliciclastic : 2
##
##
##
##
## FORMATION true_troglofauna
## Brockman Iron Formation :120 0: 59
## Marra Mamba Iron Formation : 56 1:146
## Mount McRae Shale and Mount Sylvia Formation: 2
## Weeli Wolli Formation : 7
## Wittenoom Formation : 20
##
##
## LATITUDE LONGITUDE
## Min. :-23.33 Min. :118.6
## 1st Qu.:-22.94 1st Qu.:118.9
## Median :-22.91 Median :119.0
## Mean :-22.84 Mean :119.0
## 3rd Qu.:-22.71 3rd Qu.:119.1
## Max. :-22.51 Max. :119.4
##
data_train <- data_train[, -which(names(data_train) %in% c("LATITUDE", "LONGITUDE"))]
dim(data_train)
## [1] 205 28
head(data_train)
## srain2mp bio21 bio16 slope rugg500s rugg500cv pil_twicv pil_topos
## 1001 0.16029 29.73996 224.6990 4.82564 1.74834 0.08714 0.17703 0.00000
## 171 0.14708 29.77693 224.6783 2.26336 1.69707 0.09354 0.18033 9.34053
## 865 0.14544 29.65465 223.7451 0.92271 0.00960 0.01070 0.08952 0.00000
## 1616 0.13816 29.70033 231.0552 0.70888 0.72522 0.06054 0.09249 0.00000
## 1750 0.15230 29.72014 219.9479 4.43650 1.34984 0.06894 0.21494 5.37750
## 903 0.15055 29.78418 221.7930 4.34855 3.76135 0.20322 0.07274 2.38332
## pil_slps pil_slpcv nutrientsn mrvbf mrrtf minfertf lf7rup hstructn
## 1001 3.59536 0.36466 1.3 0 0.00000 2 3 1.04286
## 171 6.83865 0.86478 1.3 0 0.00000 2 3 1.04286
## 865 0.11971 0.64403 1.1 0 1.60444 2 3 0.42857
## 1616 3.82764 0.45171 1.1 0 1.82401 2 1 0.42857
## 1750 4.40210 0.42944 1.3 0 0.00000 2 1 1.04286
## 903 2.51954 0.57590 1.3 0 0.00000 2 2 1.04286
## geolrngaggn geolmnaggn bdensity50n slopern HubDist MIN_AGE_MA
## 1001 300.000 2650.000 1.36732 0.02515 3.954500 2597
## 171 2.588 1.294 1.36732 0.08978 13.631670 2597
## 865 2.588 1.294 1.34060 1.77740 20.744690 2454
## 1616 65.500 32.750 1.34060 0.03185 67.270712 2454
## 1750 300.000 2650.000 1.36732 0.01402 5.236846 2597
## 903 300.000 2650.000 1.36732 0.12920 3.802839 2597
## month_collection HubName
## 1001 July exposed
## 171 January overturned syncline, exposed
## 865 May normal, exposed, tick on downthrown side
## 1616 October syncline, exposed
## 1750 May exposed
## 903 July syncline, exposed
## UNITNAME ROCKTYPE1
## 1001 Marra Mamba Iron Formation sedimentary other chemical or biochemical
## 171 Marra Mamba Iron Formation sedimentary other chemical or biochemical
## 865 Brockman Iron Formation sedimentary other chemical or biochemical
## 1616 Brockman Iron Formation sedimentary other chemical or biochemical
## 1750 Marra Mamba Iron Formation sedimentary other chemical or biochemical
## 903 Marra Mamba Iron Formation sedimentary other chemical or biochemical
## FORMATION true_troglofauna
## 1001 Marra Mamba Iron Formation 0
## 171 Marra Mamba Iron Formation 0
## 865 Brockman Iron Formation 0
## 1616 Brockman Iron Formation 1
## 1750 Marra Mamba Iron Formation 1
## 903 Marra Mamba Iron Formation 1
summary(data_train)
## srain2mp bio21 bio16 slope
## Min. :0.1344 Min. :29.63 Min. :195.6 Min. : 0.08862
## 1st Qu.:0.1419 1st Qu.:29.73 1st Qu.:223.0 1st Qu.: 1.04867
## Median :0.1470 Median :29.76 Median :225.8 Median : 2.15997
## Mean :0.1475 Mean :29.75 Mean :227.3 Mean : 3.13080
## 3rd Qu.:0.1512 3rd Qu.:29.78 3rd Qu.:231.3 3rd Qu.: 4.45382
## Max. :0.1678 Max. :29.84 Max. :244.2 Max. :12.80595
##
## rugg500s rugg500cv pil_twicv pil_topos
## Min. :0.00942 Min. :0.00854 Min. :0.03348 Min. : 0.000
## 1st Qu.:0.54119 1st Qu.:0.05971 1st Qu.:0.07750 1st Qu.: 0.000
## Median :1.32722 Median :0.08920 Median :0.11607 Median : 0.000
## Mean :1.73832 Mean :0.11282 Mean :0.13714 Mean : 1.366
## 3rd Qu.:2.45758 3rd Qu.:0.15902 3rd Qu.:0.18838 3rd Qu.: 2.077
## Max. :7.79036 Max. :0.59943 Max. :0.45404 Max. :10.435
##
## pil_slps pil_slpcv nutrientsn mrvbf
## Min. : 0.04903 Min. :0.2086 Min. :1.000 Min. :0.0000
## 1st Qu.: 1.15748 1st Qu.:0.3447 1st Qu.:1.300 1st Qu.:0.0000
## Median : 3.28653 Median :0.4470 Median :1.300 Median :0.0000
## Mean : 4.16924 Mean :0.4728 Mean :1.282 Mean :0.4067
## 3rd Qu.: 6.29694 3rd Qu.:0.5672 3rd Qu.:1.300 3rd Qu.:0.5784
## Max. :16.74641 Max. :1.3978 Max. :1.600 Max. :4.5904
##
## mrrtf minfertf lf7rup hstructn
## Min. :0.0000 Min. :1.000 Min. :1.000 Min. : 0.0000
## 1st Qu.:0.0000 1st Qu.:2.000 1st Qu.:2.000 1st Qu.: 0.4286
## Median :0.0000 Median :2.000 Median :3.000 Median : 1.0429
## Mean :0.3521 Mean :2.024 Mean :3.044 Mean : 0.9131
## 3rd Qu.:0.5762 3rd Qu.:2.000 3rd Qu.:4.000 3rd Qu.: 1.0429
## Max. :3.5547 Max. :3.000 Max. :7.000 Max. :10.0000
##
## geolrngaggn geolmnaggn bdensity50n slopern
## Min. : 0.0115 Min. : 0.0058 Min. :1.247 Min. : 0.00250
## 1st Qu.: 65.5000 1st Qu.: 32.7500 1st Qu.:1.341 1st Qu.: 0.01579
## Median :300.0000 Median :2050.0000 Median :1.367 Median : 0.04265
## Mean :404.1882 Mean :1410.5522 Mean :1.347 Mean : 0.33502
## 3rd Qu.:900.0000 3rd Qu.:2650.0000 3rd Qu.:1.367 3rd Qu.: 0.17062
## Max. :900.0000 Max. :2650.0000 Max. :1.367 Max. :21.14644
##
## HubDist MIN_AGE_MA month_collection
## Min. : 1.237 Min. :2453 October :22
## 1st Qu.: 8.994 1st Qu.:2454 April :20
## Median :31.730 Median :2454 July :20
## Mean :34.802 Mean :2498 March :19
## 3rd Qu.:60.020 3rd Qu.:2597 November:18
## Max. :79.744 Max. :2597 May :16
## (Other) :90
## HubName
## anticline, exposed : 19
## concealed : 0
## exposed : 18
## normal, exposed, tick on downthrown side: 28
## overturned syncline, exposed : 21
## syncline, exposed :119
##
## UNITNAME
## Brockman Iron Formation :120
## Marra Mamba Iron Formation : 56
## Mount McRae Shale and Mount Sylvia Formation: 2
## Weeli Wolli Formation : 7
## Wittenoom Formation : 20
##
##
## ROCKTYPE1
## sedimentary carbonate : 20
## sedimentary other chemical or biochemical:183
## sedimentary siliciclastic : 2
##
##
##
##
## FORMATION true_troglofauna
## Brockman Iron Formation :120 0: 59
## Marra Mamba Iron Formation : 56 1:146
## Mount McRae Shale and Mount Sylvia Formation: 2
## Weeli Wolli Formation : 7
## Wittenoom Formation : 20
##
##
set.seed(78945)
trainIndex1 <- createDataPartition(data_prov$true_troglofauna, p=0.6, list=FALSE)
data_test <- data_prov[ trainIndex1,]
finalTest1 <- data_prov[-trainIndex1,]
summary(data_test)
## srain2mp bio21 bio16 slope
## Min. :0.1338 Min. :29.64 Min. :196.1 Min. : 0.08862
## 1st Qu.:0.1432 1st Qu.:29.74 1st Qu.:221.5 1st Qu.: 1.40481
## Median :0.1478 Median :29.76 Median :224.6 Median : 2.53936
## Mean :0.1477 Mean :29.75 Mean :226.3 Mean : 3.22529
## 3rd Qu.:0.1510 3rd Qu.:29.78 3rd Qu.:229.3 3rd Qu.: 4.02498
## Max. :0.1614 Max. :29.84 Max. :245.2 Max. :15.00457
##
## rugg500s rugg500cv pil_twicv pil_topos
## Min. :0.03319 Min. :0.01076 Min. :0.03348 Min. : 0.000
## 1st Qu.:0.47334 1st Qu.:0.06180 1st Qu.:0.08586 1st Qu.: 0.000
## Median :1.32736 Median :0.08941 Median :0.12202 Median : 0.000
## Mean :1.78291 Mean :0.11772 Mean :0.14844 Mean : 1.631
## 3rd Qu.:2.53294 3rd Qu.:0.15165 3rd Qu.:0.20438 3rd Qu.: 3.480
## Max. :9.29160 Max. :0.43959 Max. :0.39956 Max. :10.110
##
## pil_slps pil_slpcv nutrientsn mrvbf
## Min. : 0.1107 Min. :0.1951 Min. :1.100 Min. :0.0000
## 1st Qu.: 1.0131 1st Qu.:0.3506 1st Qu.:1.300 1st Qu.:0.0000
## Median : 2.7769 Median :0.4659 Median :1.300 Median :0.0000
## Mean : 4.1962 Mean :0.4726 Mean :1.315 Mean :0.3674
## 3rd Qu.: 6.6343 3rd Qu.:0.5579 3rd Qu.:1.300 3rd Qu.:0.0000
## Max. :16.6383 Max. :0.8918 Max. :1.600 Max. :4.5741
##
## mrrtf minfertf lf7rup hstructn
## Min. :0.0000 Min. :2.000 Min. :1.000 Min. : 0.0000
## 1st Qu.:0.0000 1st Qu.:2.000 1st Qu.:2.000 1st Qu.: 0.4286
## Median :0.0000 Median :2.000 Median :3.000 Median : 1.0429
## Mean :0.2309 Mean :2.041 Mean :3.008 Mean : 0.9729
## 3rd Qu.:0.0000 3rd Qu.:2.000 3rd Qu.:4.000 3rd Qu.: 1.0429
## Max. :2.7292 Max. :3.000 Max. :7.000 Max. :10.0000
##
## geolrngaggn geolmnaggn bdensity50n slopern
## Min. : 0.0115 Min. : 0.0058 Min. :1.255 Min. : 0.00344
## 1st Qu.: 2.5880 1st Qu.: 1.2940 1st Qu.:1.341 1st Qu.: 0.01630
## Median :300.0000 Median :2050.0000 Median :1.367 Median : 0.03951
## Mean :335.2414 Mean :1308.5467 Mean :1.348 Mean : 0.40178
## 3rd Qu.:900.0000 3rd Qu.:2650.0000 3rd Qu.:1.367 3rd Qu.: 0.16592
## Max. :900.0000 Max. :2650.0000 Max. :1.367 Max. :21.14644
##
## HubDist MIN_AGE_MA month_collection
## Min. : 1.236 Min. :2453 August :15
## 1st Qu.: 5.492 1st Qu.:2454 March :14
## Median :21.187 Median :2454 February:13
## Mean :29.232 Mean :2511 November:13
## 3rd Qu.:55.606 3rd Qu.:2597 May :11
## Max. :81.121 Max. :2597 April :10
## (Other) :47
## HubName
## anticline, exposed :18
## concealed : 1
## exposed :22
## normal, exposed, tick on downthrown side:12
## overturned syncline, exposed : 9
## syncline, exposed :61
##
## UNITNAME
## Brockman Iron Formation :58
## Marra Mamba Iron Formation :45
## Mount McRae Shale and Mount Sylvia Formation: 1
## Weeli Wolli Formation : 9
## Wittenoom Formation :10
##
##
## ROCKTYPE1
## sedimentary carbonate : 10
## sedimentary other chemical or biochemical:112
## sedimentary siliciclastic : 1
##
##
##
##
## FORMATION true_troglofauna
## Brockman Iron Formation :58 0:35
## Marra Mamba Iron Formation :45 1:88
## Mount McRae Shale and Mount Sylvia Formation: 1
## Weeli Wolli Formation : 9
## Wittenoom Formation :10
##
##
## LATITUDE LONGITUDE
## Min. :-23.33 Min. :118.6
## 1st Qu.:-22.98 1st Qu.:118.9
## Median :-22.92 Median :119.0
## Mean :-22.88 Mean :119.0
## 3rd Qu.:-22.82 3rd Qu.:119.1
## Max. :-22.52 Max. :119.4
##
dim(data_test)
## [1] 123 30
summary(finalTest1)
## srain2mp bio21 bio16 slope
## Min. :0.1356 Min. :29.64 Min. :202.2 Min. : 0.08943
## 1st Qu.:0.1434 1st Qu.:29.74 1st Qu.:221.4 1st Qu.: 1.20730
## Median :0.1473 Median :29.76 Median :224.3 Median : 2.31213
## Mean :0.1475 Mean :29.75 Mean :225.8 Mean : 3.33565
## 3rd Qu.:0.1517 3rd Qu.:29.77 3rd Qu.:230.1 3rd Qu.: 4.51866
## Max. :0.1633 Max. :29.83 Max. :245.4 Max. :13.58547
##
## rugg500s rugg500cv pil_twicv pil_topos
## Min. :0.01647 Min. :0.01810 Min. :0.04523 Min. : 0.0000
## 1st Qu.:0.60450 1st Qu.:0.07030 1st Qu.:0.08987 1st Qu.: 0.0000
## Median :1.14453 Median :0.09675 Median :0.13399 Median : 0.1104
## Mean :1.72459 Mean :0.11947 Mean :0.14705 Mean : 1.4945
## 3rd Qu.:2.42691 3rd Qu.:0.17444 3rd Qu.:0.20072 3rd Qu.: 2.4066
## Max. :7.05156 Max. :0.30012 Max. :0.36527 Max. :10.3639
##
## pil_slps pil_slpcv nutrientsn mrvbf
## Min. : 0.1401 Min. :0.1951 Min. :1.10 Min. :0.0000
## 1st Qu.: 1.0510 1st Qu.:0.3434 1st Qu.:1.30 1st Qu.:0.0000
## Median : 2.4139 Median :0.4704 Median :1.30 Median :0.0000
## Mean : 3.6002 Mean :0.4613 Mean :1.29 Mean :0.4266
## 3rd Qu.: 5.1142 3rd Qu.:0.5202 3rd Qu.:1.30 3rd Qu.:0.0000
## Max. :17.0097 Max. :1.1404 Max. :1.60 Max. :4.5610
##
## mrrtf minfertf lf7rup hstructn
## Min. :0.0000 Min. :1.000 Min. :1.000 Min. : 0.000
## 1st Qu.:0.0000 1st Qu.:2.000 1st Qu.:2.000 1st Qu.: 1.043
## Median :0.0000 Median :2.000 Median :3.000 Median : 1.043
## Mean :0.3551 Mean :2.049 Mean :3.074 Mean : 1.081
## 3rd Qu.:0.5385 3rd Qu.:2.000 3rd Qu.:4.000 3rd Qu.: 1.043
## Max. :3.5544 Max. :3.000 Max. :7.000 Max. :10.000
##
## geolrngaggn geolmnaggn bdensity50n slopern
## Min. : 0.0115 Min. : 0.0058 Min. :1.255 Min. : 0.00132
## 1st Qu.: 65.5000 1st Qu.: 32.7500 1st Qu.:1.341 1st Qu.: 0.01814
## Median :300.0000 Median :2050.0000 Median :1.367 Median : 0.05924
## Mean :362.0352 Mean :1510.6472 Mean :1.352 Mean : 0.48230
## 3rd Qu.:900.0000 3rd Qu.:2650.0000 3rd Qu.:1.367 3rd Qu.: 0.20859
## Max. :900.0000 Max. :2650.0000 Max. :1.367 Max. :21.14644
##
## HubDist MIN_AGE_MA month_collection
## Min. : 1.294 Min. :2453 September:11
## 1st Qu.: 7.096 1st Qu.:2454 June :10
## Median :17.310 Median :2506 January : 8
## Mean :27.576 Mean :2517 July : 8
## 3rd Qu.:53.489 3rd Qu.:2597 March : 7
## Max. :77.437 Max. :2597 April : 7
## (Other) :30
## HubName
## anticline, exposed :16
## concealed : 0
## exposed :10
## normal, exposed, tick on downthrown side:10
## overturned syncline, exposed : 6
## syncline, exposed :39
##
## UNITNAME
## Brockman Iron Formation :34
## Marra Mamba Iron Formation :32
## Mount McRae Shale and Mount Sylvia Formation: 2
## Weeli Wolli Formation : 4
## Wittenoom Formation : 9
##
##
## ROCKTYPE1
## sedimentary carbonate : 9
## sedimentary other chemical or biochemical:70
## sedimentary siliciclastic : 2
##
##
##
##
## FORMATION true_troglofauna
## Brockman Iron Formation :34 0:23
## Marra Mamba Iron Formation :32 1:58
## Mount McRae Shale and Mount Sylvia Formation: 2
## Weeli Wolli Formation : 4
## Wittenoom Formation : 9
##
##
## LATITUDE LONGITUDE
## Min. :-23.26 Min. :118.7
## 1st Qu.:-22.98 1st Qu.:118.9
## Median :-22.93 Median :119.0
## Mean :-22.88 Mean :119.0
## 3rd Qu.:-22.75 3rd Qu.:119.0
## Max. :-22.53 Max. :119.4
##
dim(finalTest1)
## [1] 81 30
data_test <- data_test[, -which(names(data2) %in% c("LATITUDE", "LONGITUDE"))]
finalTest <- finalTest1[, -which(names(data2) %in% c("LATITUDE", "LONGITUDE"))]
sapply(data_test,class);sapply(finalTest1,class)
## srain2mp bio21 bio16 slope
## "numeric" "numeric" "numeric" "numeric"
## rugg500s rugg500cv pil_twicv pil_topos
## "numeric" "numeric" "numeric" "numeric"
## pil_slps pil_slpcv nutrientsn mrvbf
## "numeric" "numeric" "numeric" "numeric"
## mrrtf minfertf lf7rup hstructn
## "numeric" "numeric" "numeric" "numeric"
## geolrngaggn geolmnaggn bdensity50n slopern
## "numeric" "numeric" "numeric" "numeric"
## HubDist MIN_AGE_MA month_collection HubName
## "numeric" "numeric" "factor" "factor"
## UNITNAME ROCKTYPE1 FORMATION true_troglofauna
## "factor" "factor" "factor" "factor"
## srain2mp bio21 bio16 slope
## "numeric" "numeric" "numeric" "numeric"
## rugg500s rugg500cv pil_twicv pil_topos
## "numeric" "numeric" "numeric" "numeric"
## pil_slps pil_slpcv nutrientsn mrvbf
## "numeric" "numeric" "numeric" "numeric"
## mrrtf minfertf lf7rup hstructn
## "numeric" "numeric" "numeric" "numeric"
## geolrngaggn geolmnaggn bdensity50n slopern
## "numeric" "numeric" "numeric" "numeric"
## HubDist MIN_AGE_MA month_collection HubName
## "numeric" "numeric" "factor" "factor"
## UNITNAME ROCKTYPE1 FORMATION true_troglofauna
## "factor" "factor" "factor" "factor"
## LATITUDE LONGITUDE
## "numeric" "numeric"
head(data_test)
## srain2mp bio21 bio16 slope rugg500s rugg500cv pil_twicv pil_topos
## 157 0.14583 29.76095 223.8901 1.38940 1.07357 0.04715 0.25449 0.00000
## 407 0.15129 29.75593 222.2424 1.62006 1.98837 0.23432 0.08385 5.42778
## 258 0.14013 29.81410 212.6796 2.18761 2.66450 0.19765 0.12132 2.31889
## 1805 0.15167 29.72817 219.5120 5.65121 4.93231 0.24911 0.12501 6.50757
## 433 0.15929 29.70598 227.4977 9.74579 4.19078 0.07515 0.06091 0.29835
## 732 0.15565 29.76270 230.0836 7.95223 1.07037 0.08113 0.08246 0.00000
## pil_slps pil_slpcv nutrientsn mrvbf mrrtf minfertf lf7rup hstructn
## 157 6.63430 0.45474 1.3 0.66138 0 2 7 1.04286
## 407 1.17587 0.37240 1.3 0.57837 0 2 1 1.04286
## 258 1.74186 0.63626 1.3 0.00000 0 3 3 1.04286
## 1805 3.77481 0.78681 1.3 0.00000 0 2 3 1.04286
## 433 15.56535 0.65365 1.3 0.00000 0 2 1 1.04286
## 732 2.98576 0.35082 1.3 0.00000 0 3 3 1.04286
## geolrngaggn geolmnaggn bdensity50n slopern HubDist MIN_AGE_MA
## 157 300.0000 2.65e+03 1.36732 0.01070 2.927153 2597
## 407 300.0000 2.65e+03 1.36732 0.09888 8.014421 2597
## 258 0.0115 5.75e-03 1.36732 0.08187 1.564773 2506
## 1805 300.0000 2.65e+03 1.36732 0.07794 3.182699 2506
## 433 900.0000 2.05e+03 1.36732 0.06761 5.842786 2454
## 732 900.0000 2.05e+03 1.36732 0.01505 58.112543 2454
## month_collection HubName
## 157 January normal, exposed, tick on downthrown side
## 407 February exposed
## 258 May syncline, exposed
## 1805 October exposed
## 433 March exposed
## 732 April syncline, exposed
## UNITNAME ROCKTYPE1
## 157 Marra Mamba Iron Formation sedimentary other chemical or biochemical
## 407 Marra Mamba Iron Formation sedimentary other chemical or biochemical
## 258 Wittenoom Formation sedimentary carbonate
## 1805 Wittenoom Formation sedimentary carbonate
## 433 Brockman Iron Formation sedimentary other chemical or biochemical
## 732 Brockman Iron Formation sedimentary other chemical or biochemical
## FORMATION true_troglofauna
## 157 Marra Mamba Iron Formation 1
## 407 Marra Mamba Iron Formation 1
## 258 Wittenoom Formation 1
## 1805 Wittenoom Formation 1
## 433 Brockman Iron Formation 1
## 732 Brockman Iron Formation 1
dim(data_test)
## [1] 123 28
summary(data_test)
## srain2mp bio21 bio16 slope
## Min. :0.1338 Min. :29.64 Min. :196.1 Min. : 0.08862
## 1st Qu.:0.1432 1st Qu.:29.74 1st Qu.:221.5 1st Qu.: 1.40481
## Median :0.1478 Median :29.76 Median :224.6 Median : 2.53936
## Mean :0.1477 Mean :29.75 Mean :226.3 Mean : 3.22529
## 3rd Qu.:0.1510 3rd Qu.:29.78 3rd Qu.:229.3 3rd Qu.: 4.02498
## Max. :0.1614 Max. :29.84 Max. :245.2 Max. :15.00457
##
## rugg500s rugg500cv pil_twicv pil_topos
## Min. :0.03319 Min. :0.01076 Min. :0.03348 Min. : 0.000
## 1st Qu.:0.47334 1st Qu.:0.06180 1st Qu.:0.08586 1st Qu.: 0.000
## Median :1.32736 Median :0.08941 Median :0.12202 Median : 0.000
## Mean :1.78291 Mean :0.11772 Mean :0.14844 Mean : 1.631
## 3rd Qu.:2.53294 3rd Qu.:0.15165 3rd Qu.:0.20438 3rd Qu.: 3.480
## Max. :9.29160 Max. :0.43959 Max. :0.39956 Max. :10.110
##
## pil_slps pil_slpcv nutrientsn mrvbf
## Min. : 0.1107 Min. :0.1951 Min. :1.100 Min. :0.0000
## 1st Qu.: 1.0131 1st Qu.:0.3506 1st Qu.:1.300 1st Qu.:0.0000
## Median : 2.7769 Median :0.4659 Median :1.300 Median :0.0000
## Mean : 4.1962 Mean :0.4726 Mean :1.315 Mean :0.3674
## 3rd Qu.: 6.6343 3rd Qu.:0.5579 3rd Qu.:1.300 3rd Qu.:0.0000
## Max. :16.6383 Max. :0.8918 Max. :1.600 Max. :4.5741
##
## mrrtf minfertf lf7rup hstructn
## Min. :0.0000 Min. :2.000 Min. :1.000 Min. : 0.0000
## 1st Qu.:0.0000 1st Qu.:2.000 1st Qu.:2.000 1st Qu.: 0.4286
## Median :0.0000 Median :2.000 Median :3.000 Median : 1.0429
## Mean :0.2309 Mean :2.041 Mean :3.008 Mean : 0.9729
## 3rd Qu.:0.0000 3rd Qu.:2.000 3rd Qu.:4.000 3rd Qu.: 1.0429
## Max. :2.7292 Max. :3.000 Max. :7.000 Max. :10.0000
##
## geolrngaggn geolmnaggn bdensity50n slopern
## Min. : 0.0115 Min. : 0.0058 Min. :1.255 Min. : 0.00344
## 1st Qu.: 2.5880 1st Qu.: 1.2940 1st Qu.:1.341 1st Qu.: 0.01630
## Median :300.0000 Median :2050.0000 Median :1.367 Median : 0.03951
## Mean :335.2414 Mean :1308.5467 Mean :1.348 Mean : 0.40178
## 3rd Qu.:900.0000 3rd Qu.:2650.0000 3rd Qu.:1.367 3rd Qu.: 0.16592
## Max. :900.0000 Max. :2650.0000 Max. :1.367 Max. :21.14644
##
## HubDist MIN_AGE_MA month_collection
## Min. : 1.236 Min. :2453 August :15
## 1st Qu.: 5.492 1st Qu.:2454 March :14
## Median :21.187 Median :2454 February:13
## Mean :29.232 Mean :2511 November:13
## 3rd Qu.:55.606 3rd Qu.:2597 May :11
## Max. :81.121 Max. :2597 April :10
## (Other) :47
## HubName
## anticline, exposed :18
## concealed : 1
## exposed :22
## normal, exposed, tick on downthrown side:12
## overturned syncline, exposed : 9
## syncline, exposed :61
##
## UNITNAME
## Brockman Iron Formation :58
## Marra Mamba Iron Formation :45
## Mount McRae Shale and Mount Sylvia Formation: 1
## Weeli Wolli Formation : 9
## Wittenoom Formation :10
##
##
## ROCKTYPE1
## sedimentary carbonate : 10
## sedimentary other chemical or biochemical:112
## sedimentary siliciclastic : 1
##
##
##
##
## FORMATION true_troglofauna
## Brockman Iron Formation :58 0:35
## Marra Mamba Iron Formation :45 1:88
## Mount McRae Shale and Mount Sylvia Formation: 1
## Weeli Wolli Formation : 9
## Wittenoom Formation :10
##
##
names(data_test)
## [1] "srain2mp" "bio21" "bio16" "slope"
## [5] "rugg500s" "rugg500cv" "pil_twicv" "pil_topos"
## [9] "pil_slps" "pil_slpcv" "nutrientsn" "mrvbf"
## [13] "mrrtf" "minfertf" "lf7rup" "hstructn"
## [17] "geolrngaggn" "geolmnaggn" "bdensity50n" "slopern"
## [21] "HubDist" "MIN_AGE_MA" "month_collection" "HubName"
## [25] "UNITNAME" "ROCKTYPE1" "FORMATION" "true_troglofauna"
head(finalTest)
## srain2mp bio21 bio16 slope rugg500s rugg500cv pil_twicv pil_topos
## 883 0.15706 29.67899 221.4037 0.89097 0.96237 0.10369 0.29959 5.08126
## 1171 0.14855 29.76123 226.2116 3.02034 3.73254 0.15767 0.16215 0.00000
## 307 0.15033 29.79736 216.0963 9.00398 1.80702 0.05613 0.11547 0.48605
## 1743 0.14529 29.76137 245.3545 1.21520 0.23500 0.14213 0.14027 0.00000
## 1820 0.15231 29.72789 245.1607 5.02147 6.69625 0.17764 0.23004 0.47942
## 176 0.14965 29.75139 224.3239 3.81876 0.58854 0.06064 0.15573 0.49352
## pil_slps pil_slpcv nutrientsn mrvbf mrrtf minfertf lf7rup hstructn
## 883 5.32363 1.14040 1.3 1.68056 0.00000 2 6 1.04286
## 1171 6.11757 0.38974 1.3 0.00000 0.00000 2 3 1.04286
## 307 7.15268 0.33769 1.3 0.00000 0.00000 2 2 1.04286
## 1743 0.24637 0.55030 1.6 0.00000 0.57837 2 3 0.00000
## 1820 7.74808 0.52370 1.1 0.00000 0.00000 2 2 0.42857
## 176 1.21059 0.19508 1.3 0.00000 0.00000 2 2 1.04286
## geolrngaggn geolmnaggn bdensity50n slopern HubDist MIN_AGE_MA
## 883 900.000 2050.000 1.36732 0.15237 2.123917 2454
## 1171 900.000 2050.000 1.36732 0.02540 60.107338 2454
## 307 900.000 2050.000 1.36732 0.00468 6.289232 2454
## 1743 2.588 1.294 1.25500 1.90476 36.325288 2454
## 1820 900.000 2050.000 1.34060 0.01748 51.666414 2454
## 176 300.000 2650.000 1.36732 0.01359 16.867923 2597
## month_collection HubName UNITNAME
## 883 September exposed Brockman Iron Formation
## 1171 April syncline, exposed Brockman Iron Formation
## 307 March syncline, exposed Brockman Iron Formation
## 1743 September syncline, exposed Brockman Iron Formation
## 1820 November syncline, exposed Brockman Iron Formation
## 176 May anticline, exposed Marra Mamba Iron Formation
## ROCKTYPE1 FORMATION
## 883 sedimentary other chemical or biochemical Brockman Iron Formation
## 1171 sedimentary other chemical or biochemical Brockman Iron Formation
## 307 sedimentary other chemical or biochemical Brockman Iron Formation
## 1743 sedimentary other chemical or biochemical Brockman Iron Formation
## 1820 sedimentary other chemical or biochemical Brockman Iron Formation
## 176 sedimentary other chemical or biochemical Marra Mamba Iron Formation
## true_troglofauna
## 883 1
## 1171 1
## 307 0
## 1743 1
## 1820 1
## 176 1
dim(finalTest)
## [1] 81 28
summary(finalTest)
## srain2mp bio21 bio16 slope
## Min. :0.1356 Min. :29.64 Min. :202.2 Min. : 0.08943
## 1st Qu.:0.1434 1st Qu.:29.74 1st Qu.:221.4 1st Qu.: 1.20730
## Median :0.1473 Median :29.76 Median :224.3 Median : 2.31213
## Mean :0.1475 Mean :29.75 Mean :225.8 Mean : 3.33565
## 3rd Qu.:0.1517 3rd Qu.:29.77 3rd Qu.:230.1 3rd Qu.: 4.51866
## Max. :0.1633 Max. :29.83 Max. :245.4 Max. :13.58547
##
## rugg500s rugg500cv pil_twicv pil_topos
## Min. :0.01647 Min. :0.01810 Min. :0.04523 Min. : 0.0000
## 1st Qu.:0.60450 1st Qu.:0.07030 1st Qu.:0.08987 1st Qu.: 0.0000
## Median :1.14453 Median :0.09675 Median :0.13399 Median : 0.1104
## Mean :1.72459 Mean :0.11947 Mean :0.14705 Mean : 1.4945
## 3rd Qu.:2.42691 3rd Qu.:0.17444 3rd Qu.:0.20072 3rd Qu.: 2.4066
## Max. :7.05156 Max. :0.30012 Max. :0.36527 Max. :10.3639
##
## pil_slps pil_slpcv nutrientsn mrvbf
## Min. : 0.1401 Min. :0.1951 Min. :1.10 Min. :0.0000
## 1st Qu.: 1.0510 1st Qu.:0.3434 1st Qu.:1.30 1st Qu.:0.0000
## Median : 2.4139 Median :0.4704 Median :1.30 Median :0.0000
## Mean : 3.6002 Mean :0.4613 Mean :1.29 Mean :0.4266
## 3rd Qu.: 5.1142 3rd Qu.:0.5202 3rd Qu.:1.30 3rd Qu.:0.0000
## Max. :17.0097 Max. :1.1404 Max. :1.60 Max. :4.5610
##
## mrrtf minfertf lf7rup hstructn
## Min. :0.0000 Min. :1.000 Min. :1.000 Min. : 0.000
## 1st Qu.:0.0000 1st Qu.:2.000 1st Qu.:2.000 1st Qu.: 1.043
## Median :0.0000 Median :2.000 Median :3.000 Median : 1.043
## Mean :0.3551 Mean :2.049 Mean :3.074 Mean : 1.081
## 3rd Qu.:0.5385 3rd Qu.:2.000 3rd Qu.:4.000 3rd Qu.: 1.043
## Max. :3.5544 Max. :3.000 Max. :7.000 Max. :10.000
##
## geolrngaggn geolmnaggn bdensity50n slopern
## Min. : 0.0115 Min. : 0.0058 Min. :1.255 Min. : 0.00132
## 1st Qu.: 65.5000 1st Qu.: 32.7500 1st Qu.:1.341 1st Qu.: 0.01814
## Median :300.0000 Median :2050.0000 Median :1.367 Median : 0.05924
## Mean :362.0352 Mean :1510.6472 Mean :1.352 Mean : 0.48230
## 3rd Qu.:900.0000 3rd Qu.:2650.0000 3rd Qu.:1.367 3rd Qu.: 0.20859
## Max. :900.0000 Max. :2650.0000 Max. :1.367 Max. :21.14644
##
## HubDist MIN_AGE_MA month_collection
## Min. : 1.294 Min. :2453 September:11
## 1st Qu.: 7.096 1st Qu.:2454 June :10
## Median :17.310 Median :2506 January : 8
## Mean :27.576 Mean :2517 July : 8
## 3rd Qu.:53.489 3rd Qu.:2597 March : 7
## Max. :77.437 Max. :2597 April : 7
## (Other) :30
## HubName
## anticline, exposed :16
## concealed : 0
## exposed :10
## normal, exposed, tick on downthrown side:10
## overturned syncline, exposed : 6
## syncline, exposed :39
##
## UNITNAME
## Brockman Iron Formation :34
## Marra Mamba Iron Formation :32
## Mount McRae Shale and Mount Sylvia Formation: 2
## Weeli Wolli Formation : 4
## Wittenoom Formation : 9
##
##
## ROCKTYPE1
## sedimentary carbonate : 9
## sedimentary other chemical or biochemical:70
## sedimentary siliciclastic : 2
##
##
##
##
## FORMATION true_troglofauna
## Brockman Iron Formation :34 0:23
## Marra Mamba Iron Formation :32 1:58
## Mount McRae Shale and Mount Sylvia Formation: 2
## Weeli Wolli Formation : 4
## Wittenoom Formation : 9
##
##
names(finalTest)
## [1] "srain2mp" "bio21" "bio16" "slope"
## [5] "rugg500s" "rugg500cv" "pil_twicv" "pil_topos"
## [9] "pil_slps" "pil_slpcv" "nutrientsn" "mrvbf"
## [13] "mrrtf" "minfertf" "lf7rup" "hstructn"
## [17] "geolrngaggn" "geolmnaggn" "bdensity50n" "slopern"
## [21] "HubDist" "MIN_AGE_MA" "month_collection" "HubName"
## [25] "UNITNAME" "ROCKTYPE1" "FORMATION" "true_troglofauna"
ContraProva<-data_test$true_troglofauna
summary(ContraProva)
## 0 1
## 35 88
get_confusion_elements <- function(caret_confusion_matrix) {
tp <- as.numeric(caret_confusion_matrix$table[4]) # true positives
fn <- as.numeric(caret_confusion_matrix$table[3]) # false negatives
fp <- as.numeric(caret_confusion_matrix$table[2]) # false positives
tn <- as.numeric(caret_confusion_matrix$table[1]) # true negatives
return( c(tp, fp, tn, fn) )
}
calculate_mcc <- function(tp, fp, tn, fn) {
# calculates Matthews correlation coefficient
# tp - true positives
# fp - false positives
# tn - true negatives
# fn - false negatives
mcc <- ((tp * tn) - (fp * fn)) /
(sqrt( (tp + fp) * (tp + fn)) * sqrt((tn + fp) * (tn + fn)) )
return(mcc)
}
calculate_mcc1 <- function(caret_confusion_matrix) {
# calculates Matthews correlation coefficient
# tp - true positives
# fp - false positives
# tn - true negatives
# fn - false negatives
mcc <- ((caret_confusion_matrix$table[4] * caret_confusion_matrix$table[1]) - (caret_confusion_matrix$table[2] * caret_confusion_matrix$table[3])) /
(sqrt( (caret_confusion_matrix$table[4] + caret_confusion_matrix$table[2]) * (caret_confusion_matrix$table[4] + caret_confusion_matrix$table[3])) * sqrt((caret_confusion_matrix$table[1] + caret_confusion_matrix$table[2]) * (caret_confusion_matrix$table[1] + caret_confusion_matrix$table[3])) )
return(mcc)
}
calculate_F2 <- function(CM_predictions) {
dbF2_11<-((1+2^2)*CM_predictions$byClass["Precision"]*CM_predictions$byClass["Sensitivity"])/(2^2*CM_predictions$byClass["Precision"] + CM_predictions$byClass["Sensitivity"])
dbF2_11<-as.numeric(dbF2_11)
return(dbF2_11)
}
set.seed(78945)
grid <- expand.grid(.mtry=seq(from = 2, to = 26, by = 2))
trControl <- trainControl(
method = "repeatedcv", # Resampling method
repeats = 10, # Number of repetitions for repeated cross-validation
number = 5, # Number of folds in each iteration of cross-validation
classProbs = TRUE, # Calculate class probabilities
savePredictions = "final", # Save final predictions
summaryFunction = twoClassSummary # Function for summarizing results (assumed to be defined elsewhere)
)
rf_mtry <- train(
make.names(true_troglofauna) ~ ., # Formula for the model, predicting 'true_troglofauna' based on other columns
data = data_train, # Training data
method = "rf", # Random Forest method
strata = data_train$true_troglofauna, # Stratification based on the target variable
sampsize = c(min(sum(data_train$true_troglofauna == 0), sum(data_train$true_troglofauna == 1)),
min(sum(data_train$true_troglofauna == 1), sum(data_train$true_troglofauna == 1))),
metric = "ROC", # Evaluation metric (Receiver Operating Characteristic)
tuneGrid = grid,
trControl = trControl, # Control parameters for the training process
importance = TRUE, # Calculate variable importance
ntree = 500 # Number of trees in the Random Forest
)
#In the above code, sampsize = rep(sum(training$class == 1), 2) means both the classes will have same frequency.e.g. sampsize = c(100 cases of 0, 100 cases of 1).
rf_mtry
## Random Forest
##
## 205 samples
## 27 predictor
## 2 classes: 'X0', 'X1'
##
## No pre-processing
## Resampling: Cross-Validated (5 fold, repeated 10 times)
## Summary of sample sizes: 164, 164, 164, 164, 164, 164, ...
## Resampling results across tuning parameters:
##
## mtry ROC Sens Spec
## 2 0.7587364 0.2459091 0.9430805
## 4 0.7535971 0.3392424 0.9088736
## 6 0.7429007 0.3409091 0.9012414
## 8 0.7405398 0.3660606 0.8916092
## 10 0.7332342 0.3628788 0.8827816
## 12 0.7281541 0.3745455 0.8806207
## 14 0.7268698 0.3730303 0.8779310
## 16 0.7284028 0.3760606 0.8751264
## 18 0.7225212 0.3725758 0.8697471
## 20 0.7202178 0.3798485 0.8663448
## 22 0.7228754 0.3828788 0.8615632
## 24 0.7192207 0.3778788 0.8608966
## 26 0.7137715 0.3863636 0.8602529
##
## ROC was used to select the optimal model using the largest value.
## The final value used for the model was mtry = 2.
getTrainPerf(rf_mtry)
## TrainROC TrainSens TrainSpec method
## 1 0.7587364 0.2459091 0.9430805 rf
summary(rf_mtry)
## Length Class Mode
## call 8 -none- call
## type 1 -none- character
## predicted 205 factor numeric
## err.rate 1500 -none- numeric
## confusion 6 -none- numeric
## votes 410 matrix numeric
## oob.times 205 -none- numeric
## classes 2 -none- character
## importance 192 -none- numeric
## importanceSD 144 -none- numeric
## localImportance 0 -none- NULL
## proximity 0 -none- NULL
## ntree 1 -none- numeric
## mtry 1 -none- numeric
## forest 14 -none- list
## y 205 factor numeric
## test 0 -none- NULL
## inbag 0 -none- NULL
## xNames 48 -none- character
## problemType 1 -none- character
## tuneValue 1 data.frame list
## obsLevels 2 -none- character
## param 4 -none- list
#str(rf_mtry)
rf_mtry$bestTune$mtry
## [1] 2
rf_mtry$finalModel
##
## Call:
## randomForest(x = x, y = y, ntree = 500, mtry = param$mtry, strata = ..1, sampsize = ..2, importance = TRUE)
## Type of random forest: classification
## Number of trees: 500
## No. of variables tried at each split: 2
##
## OOB estimate of error rate: 24.88%
## Confusion matrix:
## X0 X1 class.error
## X0 16 43 0.72881356
## X1 8 138 0.05479452
rf_mtry$finalModel$confusion
## X0 X1 class.error
## X0 16 43 0.72881356
## X1 8 138 0.05479452
summary(rf_mtry$pred)
## mtry pred obs X0 X1
## Min. :2 X0: 228 X0: 590 Min. :0.0100 Min. :0.1640
## 1st Qu.:2 X1:1822 X1:1460 1st Qu.:0.1140 1st Qu.:0.6640
## Median :2 Median :0.2000 Median :0.8000
## Mean :2 Mean :0.2448 Mean :0.7552
## 3rd Qu.:2 3rd Qu.:0.3360 3rd Qu.:0.8860
## Max. :2 Max. :0.8360 Max. :0.9900
## rowIndex Resample
## Min. : 1 Length:2050
## 1st Qu.: 52 Class :character
## Median :103 Mode :character
## Mean :103
## 3rd Qu.:154
## Max. :205
summary(rf_mtry$pred$pred)
## X0 X1
## 228 1822
summary(rf_mtry$pred$obs)
## X0 X1
## 590 1460
head(rf_mtry$pred$X0,20)
## [1] 0.562 0.240 0.244 0.434 0.158 0.058 0.050 0.174 0.150 0.128 0.062 0.146
## [13] 0.124 0.054 0.114 0.508 0.130 0.266 0.276 0.286
head(rf_mtry$pred$X1,20)
## [1] 0.438 0.760 0.756 0.566 0.842 0.942 0.950 0.826 0.850 0.872 0.938 0.854
## [13] 0.876 0.946 0.886 0.492 0.870 0.734 0.724 0.714
summary(data_train$true_troglofauna)
## 0 1
## 59 146
head(rf_mtry$pred,20)
## mtry pred obs X0 X1 rowIndex Resample
## 1 2 X0 X0 0.562 0.438 83 Fold5.Rep01
## 2 2 X1 X1 0.240 0.760 27 Fold4.Rep01
## 3 2 X1 X1 0.244 0.756 190 Fold2.Rep02
## 4 2 X1 X0 0.434 0.566 37 Fold4.Rep01
## 5 2 X1 X1 0.158 0.842 38 Fold3.Rep06
## 6 2 X1 X1 0.058 0.942 22 Fold3.Rep06
## 7 2 X1 X1 0.050 0.950 41 Fold2.Rep01
## 8 2 X1 X1 0.174 0.826 39 Fold2.Rep01
## 9 2 X1 X1 0.150 0.850 50 Fold4.Rep01
## 10 2 X1 X1 0.128 0.872 175 Fold3.Rep02
## 11 2 X1 X1 0.062 0.938 22 Fold4.Rep01
## 12 2 X1 X0 0.146 0.854 24 Fold4.Rep01
## 13 2 X1 X1 0.124 0.876 70 Fold5.Rep01
## 14 2 X1 X1 0.054 0.946 43 Fold4.Rep01
## 15 2 X1 X1 0.114 0.886 38 Fold2.Rep01
## 16 2 X0 X1 0.508 0.492 58 Fold5.Rep02
## 17 2 X1 X1 0.130 0.870 44 Fold3.Rep01
## 18 2 X1 X1 0.266 0.734 27 Fold4.Rep02
## 19 2 X1 X1 0.276 0.724 205 Fold2.Rep02
## 20 2 X1 X0 0.286 0.714 188 Fold5.Rep09
sapply(rf_mtry$pred, class)
## mtry pred obs X0 X1 rowIndex
## "numeric" "factor" "factor" "numeric" "numeric" "integer"
## Resample
## "character"
rf_mtry$results$Sens
## [1] 0.2459091 0.3392424 0.3409091 0.3660606 0.3628788 0.3745455 0.3730303
## [8] 0.3760606 0.3725758 0.3798485 0.3828788 0.3778788 0.3863636
rf_mtry$results$SensSD
## [1] 0.1182204 0.1335718 0.1154007 0.1215059 0.1143735 0.1073794 0.1090076
## [8] 0.1159349 0.1213540 0.1258394 0.1124938 0.1267171 0.1120326
rf_mtry$results$Spec
## [1] 0.9430805 0.9088736 0.9012414 0.8916092 0.8827816 0.8806207 0.8779310
## [8] 0.8751264 0.8697471 0.8663448 0.8615632 0.8608966 0.8602529
rf_mtry$results$SpecSD
## [1] 0.03973404 0.03829715 0.03741237 0.04376342 0.04602536 0.04531782
## [7] 0.04626581 0.04820083 0.04951083 0.04659625 0.04763386 0.05113169
## [13] 0.04677013
rf_mtry$results$ROC
## [1] 0.7587364 0.7535971 0.7429007 0.7405398 0.7332342 0.7281541 0.7268698
## [8] 0.7284028 0.7225212 0.7202178 0.7228754 0.7192207 0.7137715
rf_mtry$results$ROCSD
## [1] 0.06641356 0.06675007 0.05969507 0.05936158 0.05996436 0.06182858
## [7] 0.06063451 0.06119038 0.05918342 0.05979263 0.06102751 0.05808001
## [13] 0.06219119
rf_mtry$finalModel$confusion
## X0 X1 class.error
## X0 16 43 0.72881356
## X1 8 138 0.05479452
rf_mtry$finalModel$forest$cutoff
## [1] 0.5 0.5
result<-data.frame(Sens=rf_mtry$results$Sens,SensSD= rf_mtry$results$SensSD,Spec=rf_mtry$results$Spec,SpecSD= rf_mtry$results$SpecSD, ROC=rf_mtry$results$ROC,ROCSD= rf_mtry$results$ROCSD)
result
## Sens SensSD Spec SpecSD ROC ROCSD
## 1 0.2459091 0.1182204 0.9430805 0.03973404 0.7587364 0.06641356
## 2 0.3392424 0.1335718 0.9088736 0.03829715 0.7535971 0.06675007
## 3 0.3409091 0.1154007 0.9012414 0.03741237 0.7429007 0.05969507
## 4 0.3660606 0.1215059 0.8916092 0.04376342 0.7405398 0.05936158
## 5 0.3628788 0.1143735 0.8827816 0.04602536 0.7332342 0.05996436
## 6 0.3745455 0.1073794 0.8806207 0.04531782 0.7281541 0.06182858
## 7 0.3730303 0.1090076 0.8779310 0.04626581 0.7268698 0.06063451
## 8 0.3760606 0.1159349 0.8751264 0.04820083 0.7284028 0.06119038
## 9 0.3725758 0.1213540 0.8697471 0.04951083 0.7225212 0.05918342
## 10 0.3798485 0.1258394 0.8663448 0.04659625 0.7202178 0.05979263
## 11 0.3828788 0.1124938 0.8615632 0.04763386 0.7228754 0.06102751
## 12 0.3778788 0.1267171 0.8608966 0.05113169 0.7192207 0.05808001
## 13 0.3863636 0.1120326 0.8602529 0.04677013 0.7137715 0.06219119
rf_mtry$finalModel$forest$cutoff
## [1] 0.5 0.5
V_Imp_rf_mtry <- varImp(rf_mtry, scale = FALSE)
V_Imp_rf_mtry
## rf variable importance
##
## only 20 most important variables shown (out of 48)
##
## Importance
## HubNamenormal, exposed, tick on downthrown side 7.618
## month_collectionMarch 6.297
## bio21 6.212
## srain2mp 5.182
## bio16 4.945
## pil_slps 4.270
## nutrientsn 3.940
## rugg500s 3.677
## slope 3.410
## lf7rup 3.328
## month_collectionJune 3.291
## bdensity50n 3.133
## month_collectionSeptember 3.043
## pil_topos 2.896
## month_collectionAugust 2.597
## hstructn 2.590
## geolrngaggn 2.583
## UNITNAMEWittenoom Formation 2.363
## HubNamesyncline, exposed 2.312
## HubDist 2.266
plot(V_Imp_rf_mtry, main="Variable Importance - Area_01")

V_Imp_rf_mtry <- varImp(rf_mtry, scale = TRUE)
V_Imp_rf_mtry
## rf variable importance
##
## only 20 most important variables shown (out of 48)
##
## Importance
## HubNamenormal, exposed, tick on downthrown side 100.00
## month_collectionMarch 85.22
## bio21 84.26
## srain2mp 72.75
## bio16 70.09
## pil_slps 62.55
## nutrientsn 58.85
## rugg500s 55.91
## slope 52.92
## lf7rup 52.00
## month_collectionJune 51.59
## bdensity50n 49.82
## month_collectionSeptember 48.82
## pil_topos 47.17
## month_collectionAugust 43.83
## hstructn 43.75
## geolrngaggn 43.67
## UNITNAMEWittenoom Formation 41.21
## HubNamesyncline, exposed 40.64
## HubDist 40.12
plot(V_Imp_rf_mtry, main="Variable Importance - Area_01")

library(pdp)
## Warning: package 'pdp' was built under R version 4.3.3
name2<-c("1","2","3","4","5","6","7","8","9","10","11","12")
pd_month_collection <- partial(rf_mtry, pred.var = "month_collection",quantiles=F,prob=TRUE,which.class="X1",grid.resolution=100)
# Set file path for saving the plot
file_path <- "C:/Users/23276776/OneDrive - UWA/DARE/9 - Paper 1/1_Figures/Arrumando_Figuras_LaTExxxX/Area2_PDP_month_collection.pdf"
# Open a PDF device
pdf(file = file_path, width = 9, height = 6) # Adjust width and height as needed
# Create the plot
plot(pd_month_collection,
main = "Area 2 - Month of collection",
ylim = c(0.0, 1.0),
names = name2,
cex.lab = 1.4, # Font size for axis titles
cex.axis = 1.9, # Font size for axis numbers
cex.main = 1.9, # Font size for the title
cex = 1.6, # Font size for any other text
ylab = "Predicted outcome (yhat)" # Set the y-axis title
) # Adjust axis number font size
# Add a red dashed line at y = 0.8
#abline(h = 0.8, col = "red", lty = 2, lwd = 2) # `lty = 2` for dashed line, `lwd` for line width
text(1.5, 0.1,
"1 - Jan\n2 - Feb\n3 - Mar\n4 - Apr\n5 - May\n6 - Jun",
cex = 1.4,
adj = c(0, 0))
text(3.5, 0.1,
"7 - Jul\n8 - Aug\n9 - Sept\n10 - Oct\n11 - Nov\n12 - Dec",
cex = 1.4,
adj = c(0, 0))
# Close the PDF device
dev.off()
## png
## 2
# Define names for the categories in UNITNAME
unitname_labels <- c("Brockman Iron Formation",
"Wittenoom Formation",
"Weeli Wolli Formation",
"Mount McRae Shale and Mount Sylvia Formation",
"Marra Mamba Iron Formation")
# Compute partial dependence for UNITNAME
pd_unitname <- partial(rf_mtry, pred.var = "UNITNAME", quantiles = FALSE, prob = TRUE, which.class = "X1", grid.resolution = 100)
# Create the plot
plot(pd_unitname,
main = "Partial Dependence of UNITNAME",
ylim = c(0.0, 1.0),
names = unitname_labels,
cex.lab = 1, # Font size for axis titles
cex.axis = 1, # Font size for axis numbers
cex.main = 1.9, # Font size for the title
cex = 1.6, # Font size for any other text
ylab = "Predicted Outcome (yhat)", # Set the y-axis title
las = 2 # Rotate x-axis labels for better readability
)
# Add text annotations if needed (example for grouping units)
text(1.5, 0.1,
"Brockman Iron Formation\nWittenoom Formation",
cex = 1.4,
adj = c(0, 0))
text(3.5, 0.1,
"Mount McRae Shale and Mount Sylvia Formation\nMarra Mamba Iron Formation",
cex = 1.4,
adj = c(0, 0))

pd_bio21 <- partial(rf_mtry, pred.var = "bio21",type = "classification",prob=TRUE, which.class="X1",grid.resolution=100)
plot(pd_bio21, main = "Area_02 - Highest Period Radiation",
ylab = "Predicted outcome (yhat)",
cex.lab = 1.9, # Font size for axis titles
cex.axis = 1.9, # Font size for axis numbers
cex.main = 1.9, # Font size for the title
cex = 1.9 # Font size for any other text
) # Set color for histogram bars

# Histogram of bio21
hist(data2$bio21,
main = "Histogram of bio21",
xlab = "bio21",
border = "black",
cex.lab = 1.9, # Font size for axis titles
cex.axis = 1.9, # Font size for axis numbers
cex.main = 1.9, # Font size for the title
cex = 1.9, # Font size for any other text
col = "aquamarine4") # Set color for histogram bars

pd_srain2mp <- partial(rf_mtry, pred.var = "srain2mp",type = "classification",prob=TRUE, which.class="X1",grid.resolution=100)
plot(pd_srain2mp, main = "Area_02 - Equinox rainfall seasonality ratio", ylab = "Predicted outcome (yhat)",
cex.lab = 1.9, # Font size for axis titles
cex.axis = 1.9, # Font size for axis numbers
cex.main = 1.9, # Font size for the title
cex = 1.9 # Font size for any other text
)

# Histogram of srain2mp
hist(data2$srain2mp,
main = "Histogram of srain2mp",
xlab = "srain2mp",
border = "black",
cex.lab = 1.9, # Font size for axis titles
cex.axis = 1.9, # Font size for axis numbers
cex.main = 1.9, # Font size for the title
cex = 1.9, # Font size for any other text
col = "aquamarine4") # Set color for histogram bars

pd_bio16 <- partial(rf_mtry, pred.var = "bio16",type = "classification",prob=TRUE, which.class="X1",grid.resolution=100)
plot(pd_bio16, main = "Area_02 - Precipitation of Wettest Quarter", ylab = "Predicted outcome (yhat)",
cex.lab = 1.9, # Font size for axis titles
cex.axis = 1.9, # Font size for axis numbers
cex.main = 1.9, # Font size for the title
cex = 1.9 # Font size for any other text
) # Font size for any other text

# Histogram of bio16
hist(data2$bio16,
main = "Histogram of bio16",
xlab = "bio16",
border = "black",
cex.lab = 1.9, # Font size for axis titles
cex.axis = 1.9, # Font size for axis numbers
cex.main = 1.9, # Font size for the title
cex = 1.9, # Font size for any other text
col = "aquamarine4") # Set color for histogram bars

pd_pil_slps <- partial(rf_mtry, pred.var = "pil_slps",type = "classification",prob=TRUE, which.class="X1",grid.resolution=100)
plot(pd_pil_slps, main = "Area_02 - Standard deviation of percent slope", ylab = "Predicted outcome (yhat)",
cex.lab = 1.9, # Font size for axis titles
cex.axis = 1.9, # Font size for axis numbers
cex.main = 1.9, # Font size for the title
cex = 1.9 # Font size for any other text
)

# Histogram of pil_slps
hist(data2$pil_slps,
main = "Histogram of pil_slps",
xlab = "pil_slps",
border = "black",
cex.lab = 1.5, # Font size for axis titles
cex.axis = 1.5, # Font size for axis numbers
cex.main = 1.5, # Font size for the title
cex = 1.5, # Font size for any other text
col = "aquamarine4") # Set color for histogram bars

pd_nutrientsn <- partial(rf_mtry, pred.var = "nutrientsn",type = "classification",prob=TRUE, which.class="X1",grid.resolution=100)
plot(pd_nutrientsn, main = "Area_02 - Gross soil nutrient status", ylab = "Predicted outcome (yhat)",
cex.lab = 1.9, # Font size for axis titles
cex.axis = 1.9, # Font size for axis numbers
cex.main = 1.9, # Font size for the title
cex = 1.9
) # Font size for any other text

# Histogram of nutrientsn
hist(data2$nutrientsn,
main = "Histogram of nutrientsn",
xlab = "nutrientsn",
border = "black",
cex.lab = 1.9, # Font size for axis titles
cex.axis = 1.9, # Font size for axis numbers
cex.main = 1.9, # Font size for the title
cex = 1.9, # Font size for any other text
col = "aquamarine4"
) # Set color for histogram bars

pd_rugg500s <- partial(rf_mtry, pred.var = "rugg500s",type = "classification",prob=TRUE, which.class="X1",grid.resolution=100)
plot(pd_rugg500s, main = "Area_02 - Standard deviation of terrain ruggedness", ylab = "Predicted outcome (yhat)",
cex.lab = 1.8, # Font size for axis titles
cex.axis = 1.9, # Font size for axis numbers
cex.main = 1.9, # Font size for the title
cex = 1.9
) # Font size for any other text

pd_pil_topos <- partial(rf_mtry, pred.var = "pil_topos",type = "classification",prob=TRUE, which.class="X1",grid.resolution=100)
plot(pd_pil_topos, main = "Area_02 - Standard deviation of topographic class", ylab = "Predicted outcome (yhat)",
cex.lab = 1.8, # Font size for axis titles
cex.axis = 1.9, # Font size for axis numbers
cex.main = 1.9, # Font size for the title
cex = 1.9
) # Font size for any other text

Export
# Export to PDF
pdf("varA2.pdf", width = 14, height = 20) # Set appropriate dimensions
# Set up a 5x2 layout
par(mfrow = c(5, 2), # 5 rows and 2 columns
mar = c(4.5, 4.5, 2, 1), # Margins for each plot (bottom, left, top, right)
oma = c(2, 2, 2, 2)) # Outer margins
# PDP and histogram pairs
# 5. bio21
plot(pd_bio21, main = "Area 2 - Highest Period Radiation", ylab = "Predicted outcome (yhat)",
cex.lab = 1.9, cex.axis = 1.9, cex.main = 1.9, cex = 1.9)
rug(data_train$bio21)
hist(data2$bio21,
main = "Histogram of bio21", xlab = "bio21",
border = "black", cex.lab = 1.9, cex.axis = 1.9, cex.main = 1.9, cex = 1.9, col = "aquamarine4")
rug(data_train$bio21)
# 4. srain2mp
plot(pd_srain2mp, main = "Area 2 - Equinox rainfall seasonality ratio", ylab = "Predicted outcome (yhat)",
cex.lab = 1.9, cex.axis = 1.9, cex.main = 1.9, cex = 1.9)
rug(data_train$srain2mp)
hist(data2$srain2mp,
main = "Histogram of srain2mp", xlab = "srain2mp",
border = "black", cex.lab = 1.9, cex.axis = 1.9, cex.main = 1.9, cex = 1.9, col = "aquamarine4")
rug(data_train$srain2mp)
# 3. bio16
plot(pd_bio16, main = "Area 2 - Precipitation of wettest quarter", ylab = "Predicted outcome (yhat)",
cex.lab = 1.9, cex.axis = 1.9, cex.main = 1.9, cex = 1.9)
rug(data_train$bio16)
hist(data2$bio16,
main = "Histogram of bio16", xlab = "bio16",
border = "black", cex.lab = 1.9, cex.axis = 1.9, cex.main = 1.9, cex = 1.9, col = "aquamarine4")
rug(data_train$bio16)
# 2. pil_slps
plot(pd_pil_slps, main = "Area 2 - Standard deviation of percent slope", ylab = "Predicted outcome (yhat)",
cex.lab = 1.9, cex.axis = 1.9, cex.main = 1.9, cex = 1.9)
rug(data_train$pil_slps)
hist(data2$pil_slps,
main = "Histogram of pil_slps", xlab = "pil_slps",
border = "black", cex.lab = 1.9, cex.axis = 1.9, cex.main = 1.9, cex = 1.9, col = "aquamarine4")
rug(data_train$pil_slps)
# 1. nutrientsn
plot(pd_nutrientsn, main = "Area 2 - Gross soil nutrient status", ylab = "Predicted outcome (yhat)",
cex.lab = 1.9, cex.axis = 1.9, cex.main = 1.9, cex = 1.9)
rug(data_train$nutrientsn)
hist(data2$nutrientsn,
main = "Histogram of nutrientsn", xlab = "nutrientsn",
border = "black", cex.lab = 1.9, cex.axis = 1.9, cex.main = 1.9, cex = 1.9, col = "aquamarine4")
rug(data_train$nutrientsn)
# Close the PDF device
dev.off()
## png
## 2
dim(data_test)
## [1] 123 28
colnames(data_test)
## [1] "srain2mp" "bio21" "bio16" "slope"
## [5] "rugg500s" "rugg500cv" "pil_twicv" "pil_topos"
## [9] "pil_slps" "pil_slpcv" "nutrientsn" "mrvbf"
## [13] "mrrtf" "minfertf" "lf7rup" "hstructn"
## [17] "geolrngaggn" "geolmnaggn" "bdensity50n" "slopern"
## [21] "HubDist" "MIN_AGE_MA" "month_collection" "HubName"
## [25] "UNITNAME" "ROCKTYPE1" "FORMATION" "true_troglofauna"
selectcol_data_test <- data_test[, -c((ncol(data_test) - 0):ncol(data_test))]
dim(selectcol_data_test)
## [1] 123 27
names(selectcol_data_test)
## [1] "srain2mp" "bio21" "bio16" "slope"
## [5] "rugg500s" "rugg500cv" "pil_twicv" "pil_topos"
## [9] "pil_slps" "pil_slpcv" "nutrientsn" "mrvbf"
## [13] "mrrtf" "minfertf" "lf7rup" "hstructn"
## [17] "geolrngaggn" "geolmnaggn" "bdensity50n" "slopern"
## [21] "HubDist" "MIN_AGE_MA" "month_collection" "HubName"
## [25] "UNITNAME" "ROCKTYPE1" "FORMATION"
set.seed(78945)
predictions <- predict(rf_mtry, newdata = selectcol_data_test,type = "prob")#
head(predictions,5)
## X0 X1
## 157 0.442 0.558
## 407 0.420 0.580
## 258 0.220 0.780
## 1805 0.248 0.752
## 433 0.442 0.558
dim(predictions)
## [1] 123 2
sapply(predictions,class)
## X0 X1
## "numeric" "numeric"
summary(predictions)
## X0 X1
## Min. :0.0100 Min. :0.3620
## 1st Qu.:0.1070 1st Qu.:0.6840
## Median :0.2020 Median :0.7980
## Mean :0.2308 Mean :0.7692
## 3rd Qu.:0.3160 3rd Qu.:0.8930
## Max. :0.6380 Max. :0.9900
set.seed(78945)
predictions_raw <- predict(rf_mtry, newdata = selectcol_data_test,type = "raw")#
head(predictions_raw,5)
## [1] X1 X1 X1 X1 X1
## Levels: X0 X1
length(predictions_raw)
## [1] 123
head(sapply(predictions_raw,class))
## [1] "factor" "factor" "factor" "factor" "factor" "factor"
summary(predictions_raw)
## X0 X1
## 9 114
set.seed(78945)
predictions1 <- predict(rf_mtry, newdata = selectcol_data_test)
head(predictions1,5)
## [1] X1 X1 X1 X1 X1
## Levels: X0 X1
length(predictions1)
## [1] 123
head(sapply(predictions1,class))
## [1] "factor" "factor" "factor" "factor" "factor" "factor"
summary(predictions1)
## X0 X1
## 9 114
levels(predictions1) <- c(0,1)
head(predictions1,5)
## [1] 1 1 1 1 1
## Levels: 0 1
summary(predictions1)
## 0 1
## 9 114
set.seed(78945)
CM_predictions1<-confusionMatrix(predictions1, ContraProva,positive="1")
CM_predictions1
## Confusion Matrix and Statistics
##
## Reference
## Prediction 0 1
## 0 4 5
## 1 31 83
##
## Accuracy : 0.7073
## 95% CI : (0.6185, 0.7859)
## No Information Rate : 0.7154
## P-Value [Acc > NIR] : 0.6227
##
## Kappa : 0.074
##
## Mcnemar's Test P-Value : 3.091e-05
##
## Sensitivity : 0.9432
## Specificity : 0.1143
## Pos Pred Value : 0.7281
## Neg Pred Value : 0.4444
## Prevalence : 0.7154
## Detection Rate : 0.6748
## Detection Prevalence : 0.9268
## Balanced Accuracy : 0.5287
##
## 'Positive' Class : 1
##
str(CM_predictions1)
## List of 6
## $ positive: chr "1"
## $ table : 'table' int [1:2, 1:2] 4 31 5 83
## ..- attr(*, "dimnames")=List of 2
## .. ..$ Prediction: chr [1:2] "0" "1"
## .. ..$ Reference : chr [1:2] "0" "1"
## $ overall : Named num [1:7] 0.707 0.074 0.619 0.786 0.715 ...
## ..- attr(*, "names")= chr [1:7] "Accuracy" "Kappa" "AccuracyLower" "AccuracyUpper" ...
## $ byClass : Named num [1:11] 0.943 0.114 0.728 0.444 0.728 ...
## ..- attr(*, "names")= chr [1:11] "Sensitivity" "Specificity" "Pos Pred Value" "Neg Pred Value" ...
## $ mode : chr "sens_spec"
## $ dots : list()
## - attr(*, "class")= chr "confusionMatrix"
CM_predictions1$byClass[1]#Sensitivity de CM_predictions1
## Sensitivity
## 0.9431818
CM_predictions1$byClass[2]#Specificity de CM_predictions1
## Specificity
## 0.1142857
CM_predictions1$byClass
## Sensitivity Specificity Pos Pred Value
## 0.9431818 0.1142857 0.7280702
## Neg Pred Value Precision Recall
## 0.4444444 0.7280702 0.9431818
## F1 Prevalence Detection Rate
## 0.8217822 0.7154472 0.6747967
## Detection Prevalence Balanced Accuracy
## 0.9268293 0.5287338
CM_predictions1$byClass["Sensitivity"]
## Sensitivity
## 0.9431818
CM_predictions1$byClass[1]
## Sensitivity
## 0.9431818
CM_predictions1$byClass["Balanced Accuracy"]
## Balanced Accuracy
## 0.5287338
CM_predictions1$byClass[11]
## Balanced Accuracy
## 0.5287338
CM_predictions1$overall
## Accuracy Kappa AccuracyLower AccuracyUpper AccuracyNull
## 7.073171e-01 7.402760e-02 6.185043e-01 7.858625e-01 7.154472e-01
## AccuracyPValue McnemarPValue
## 6.227260e-01 3.090859e-05
CM_predictions1$overall["McnemarPValue"]
## McnemarPValue
## 3.090859e-05
CM_predictions1$overall[7]
## McnemarPValue
## 3.090859e-05
CM_predictions1$table
## Reference
## Prediction 0 1
## 0 4 5
## 1 31 83
tn<-CM_predictions1$table[1]#TN
fp<-CM_predictions1$table[2]#FP
fn<-CM_predictions1$table[3]#FN
tp<-CM_predictions1$table[4]#TP
#Youden's J statistic
J_CM_predictions1<-(CM_predictions1$byClass[1] + CM_predictions1$byClass[2]) - 1
J_CM_predictions1
## Sensitivity
## 0.05746753
J_CM_predictions1<-as.numeric(CM_predictions1$byClass[1] + CM_predictions1$byClass[2]) - 1
J_CM_predictions1
## [1] 0.05746753
rf_mtry$finalModel$forest$cutoff
## [1] 0.5 0.5
mcc(predictions1, ContraProva)
## [1] 0.09956902
get_confusion_elements(CM_predictions1)
## [1] 83 31 4 5
get_confusion_elements(CM_predictions1)[1]
## [1] 83
calculate_mcc(tp, fp, tn, fn)
## [1] 0.09956902
calculate_mcc1(CM_predictions1)
## [1] 0.09956902
calculate_F2(CM_predictions1)
## [1] 0.8905579
model_pred_class <- ifelse(predictions < 0.5, "X0", "X1")
head(model_pred_class,5)
## X0 X1
## 157 "X0" "X1"
## 407 "X0" "X1"
## 258 "X0" "X1"
## 1805 "X0" "X1"
## 433 "X0" "X1"
dim(model_pred_class)
## [1] 123 2
length(ContraProva)
## [1] 123
summary(model_pred_class)
## X0 X1
## Length:123 Length:123
## Class :character Class :character
## Mode :character Mode :character
Test1<-as.factor(model_pred_class[,2])
head(Test1)
## 157 407 258 1805 433 732
## X1 X1 X1 X1 X1 X1
## Levels: X0 X1
summary(Test1)
## X0 X1
## 9 114
levels(Test1) <- c(0,1)
summary(Test1)
## 0 1
## 9 114
head(Test1)
## 157 407 258 1805 433 732
## 1 1 1 1 1 1
## Levels: 0 1
confusionMatrix(Test1, ContraProva,positive="1")
## Confusion Matrix and Statistics
##
## Reference
## Prediction 0 1
## 0 4 5
## 1 31 83
##
## Accuracy : 0.7073
## 95% CI : (0.6185, 0.7859)
## No Information Rate : 0.7154
## P-Value [Acc > NIR] : 0.6227
##
## Kappa : 0.074
##
## Mcnemar's Test P-Value : 3.091e-05
##
## Sensitivity : 0.9432
## Specificity : 0.1143
## Pos Pred Value : 0.7281
## Neg Pred Value : 0.4444
## Prevalence : 0.7154
## Detection Rate : 0.6748
## Detection Prevalence : 0.9268
## Balanced Accuracy : 0.5287
##
## 'Positive' Class : 1
##
summary(Test1); summary(predictions1)
## 0 1
## 9 114
## 0 1
## 9 114
model_pred_class <- ifelse(predictions < 0.7, "X0", "X1")
head(model_pred_class,20)
## X0 X1
## 157 "X0" "X0"
## 407 "X0" "X0"
## 258 "X0" "X1"
## 1805 "X0" "X1"
## 433 "X0" "X0"
## 732 "X0" "X1"
## 1475 "X0" "X1"
## 957 "X0" "X1"
## 1704 "X0" "X1"
## 108 "X0" "X0"
## 961 "X0" "X1"
## 180 "X0" "X1"
## 665 "X0" "X1"
## 1119 "X0" "X1"
## 1369 "X0" "X0"
## 943 "X0" "X1"
## 283 "X0" "X1"
## 788 "X0" "X1"
## 605 "X0" "X1"
## 1601 "X0" "X1"
dim(model_pred_class)
## [1] 123 2
length(ContraProva)
## [1] 123
head(sapply(model_pred_class, class))
## X0 X0 X0 X0 X0 X0
## "character" "character" "character" "character" "character" "character"
Test11<-as.factor(model_pred_class[,2])
summary(Test11)
## X0 X1
## 34 89
head(Test11)
## 157 407 258 1805 433 732
## X0 X0 X1 X1 X0 X1
## Levels: X0 X1
levels(Test11) <- c(0,1)
summary(Test11)
## 0 1
## 34 89
summary(predictions1)
## 0 1
## 9 114
CM_Test11<-confusionMatrix(Test11, ContraProva,positive="1")
CM_Test11
## Confusion Matrix and Statistics
##
## Reference
## Prediction 0 1
## 0 19 15
## 1 16 73
##
## Accuracy : 0.748
## 95% CI : (0.6617, 0.8219)
## No Information Rate : 0.7154
## P-Value [Acc > NIR] : 0.2445
##
## Kappa : 0.3756
##
## Mcnemar's Test P-Value : 1.0000
##
## Sensitivity : 0.8295
## Specificity : 0.5429
## Pos Pred Value : 0.8202
## Neg Pred Value : 0.5588
## Prevalence : 0.7154
## Detection Rate : 0.5935
## Detection Prevalence : 0.7236
## Balanced Accuracy : 0.6862
##
## 'Positive' Class : 1
##
str(CM_Test11)
## List of 6
## $ positive: chr "1"
## $ table : 'table' int [1:2, 1:2] 19 16 15 73
## ..- attr(*, "dimnames")=List of 2
## .. ..$ Prediction: chr [1:2] "0" "1"
## .. ..$ Reference : chr [1:2] "0" "1"
## $ overall : Named num [1:7] 0.748 0.376 0.662 0.822 0.715 ...
## ..- attr(*, "names")= chr [1:7] "Accuracy" "Kappa" "AccuracyLower" "AccuracyUpper" ...
## $ byClass : Named num [1:11] 0.83 0.543 0.82 0.559 0.82 ...
## ..- attr(*, "names")= chr [1:11] "Sensitivity" "Specificity" "Pos Pred Value" "Neg Pred Value" ...
## $ mode : chr "sens_spec"
## $ dots : list()
## - attr(*, "class")= chr "confusionMatrix"
CM_Test11$byClass[1]#Sensitivity de CM_predictions1
## Sensitivity
## 0.8295455
CM_Test11$byClass[2]#Specificity de CM_predictions1
## Specificity
## 0.5428571
J_CM_Test11<-as.numeric(CM_Test11$byClass[1] + CM_Test11$byClass[2] - 1) ; J_CM_Test11
## [1] 0.3724026
confusionMatrix(predictions1, ContraProva,positive="1")
## Confusion Matrix and Statistics
##
## Reference
## Prediction 0 1
## 0 4 5
## 1 31 83
##
## Accuracy : 0.7073
## 95% CI : (0.6185, 0.7859)
## No Information Rate : 0.7154
## P-Value [Acc > NIR] : 0.6227
##
## Kappa : 0.074
##
## Mcnemar's Test P-Value : 3.091e-05
##
## Sensitivity : 0.9432
## Specificity : 0.1143
## Pos Pred Value : 0.7281
## Neg Pred Value : 0.4444
## Prevalence : 0.7154
## Detection Rate : 0.6748
## Detection Prevalence : 0.9268
## Balanced Accuracy : 0.5287
##
## 'Positive' Class : 1
##
dim(finalTest)
## [1] 81 28
names(finalTest)
## [1] "srain2mp" "bio21" "bio16" "slope"
## [5] "rugg500s" "rugg500cv" "pil_twicv" "pil_topos"
## [9] "pil_slps" "pil_slpcv" "nutrientsn" "mrvbf"
## [13] "mrrtf" "minfertf" "lf7rup" "hstructn"
## [17] "geolrngaggn" "geolmnaggn" "bdensity50n" "slopern"
## [21] "HubDist" "MIN_AGE_MA" "month_collection" "HubName"
## [25] "UNITNAME" "ROCKTYPE1" "FORMATION" "true_troglofauna"
selectcol_finalTest <- finalTest[, -c((ncol(finalTest) - 0):ncol(finalTest))]
dim(selectcol_finalTest)
## [1] 81 27
names(selectcol_finalTest)
## [1] "srain2mp" "bio21" "bio16" "slope"
## [5] "rugg500s" "rugg500cv" "pil_twicv" "pil_topos"
## [9] "pil_slps" "pil_slpcv" "nutrientsn" "mrvbf"
## [13] "mrrtf" "minfertf" "lf7rup" "hstructn"
## [17] "geolrngaggn" "geolmnaggn" "bdensity50n" "slopern"
## [21] "HubDist" "MIN_AGE_MA" "month_collection" "HubName"
## [25] "UNITNAME" "ROCKTYPE1" "FORMATION"
predictions_2 <- predict(rf_mtry, newdata = selectcol_finalTest,type = "raw")
summary(predictions_2)
## X0 X1
## 4 77
summary(finalTest[, ncol(finalTest)])
## 0 1
## 23 58
levels(predictions_2) <- c(0,1)
summary(predictions_2)
## 0 1
## 4 77
CM_predictions_2<-confusionMatrix(predictions_2, finalTest[, ncol(finalTest)],positive="1")
CM_predictions_2
## Confusion Matrix and Statistics
##
## Reference
## Prediction 0 1
## 0 3 1
## 1 20 57
##
## Accuracy : 0.7407
## 95% CI : (0.6314, 0.8318)
## No Information Rate : 0.716
## P-Value [Acc > NIR] : 0.3618
##
## Kappa : 0.1508
##
## Mcnemar's Test P-Value : 8.568e-05
##
## Sensitivity : 0.9828
## Specificity : 0.1304
## Pos Pred Value : 0.7403
## Neg Pred Value : 0.7500
## Prevalence : 0.7160
## Detection Rate : 0.7037
## Detection Prevalence : 0.9506
## Balanced Accuracy : 0.5566
##
## 'Positive' Class : 1
##
mcc(predictions_2, finalTest[, ncol(finalTest)])
## [1] 0.235572
calculate_mcc1(CM_predictions_2)
## [1] 0.235572
calculate_F2(CM_predictions_2)
## [1] 0.9223301
predictions_3<- predict(rf_mtry, newdata = selectcol_finalTest,type = "prob")
summary(predictions_3)
## X0 X1
## Min. :0.0260 Min. :0.3520
## 1st Qu.:0.1080 1st Qu.:0.6740
## Median :0.1960 Median :0.8040
## Mean :0.2254 Mean :0.7746
## 3rd Qu.:0.3260 3rd Qu.:0.8920
## Max. :0.6480 Max. :0.9740
sapply(predictions_3, class)
## X0 X1
## "numeric" "numeric"
head(predictions_3)
## X0 X1
## 883 0.330 0.670
## 1171 0.158 0.842
## 307 0.470 0.530
## 1743 0.118 0.882
## 1820 0.220 0.780
## 176 0.076 0.924
model_pred_class_3 <- ifelse(predictions_3 < 0.5, "X0", "X1")
head(sapply(model_pred_class_3, class))
## X0 X0 X0 X0 X0 X0
## "character" "character" "character" "character" "character" "character"
Test_3<-as.factor(model_pred_class_3[,2])
head(sapply(Test_3, class))
## 883 1171 307 1743 1820 176
## "factor" "factor" "factor" "factor" "factor" "factor"
levels(Test_3)
## [1] "X0" "X1"
head(Test_3)
## 883 1171 307 1743 1820 176
## X1 X1 X1 X1 X1 X1
## Levels: X0 X1
levels(Test_3) <- c(0,1)
head(sapply(Test_3, class))
## 883 1171 307 1743 1820 176
## "factor" "factor" "factor" "factor" "factor" "factor"
levels(Test_3)
## [1] "0" "1"
head(Test_3)
## 883 1171 307 1743 1820 176
## 1 1 1 1 1 1
## Levels: 0 1
summary(Test_3)
## 0 1
## 4 77
CM_predictions_3<-confusionMatrix(Test_3, finalTest[, ncol(finalTest)],positive="1")
CM_predictions_3
## Confusion Matrix and Statistics
##
## Reference
## Prediction 0 1
## 0 3 1
## 1 20 57
##
## Accuracy : 0.7407
## 95% CI : (0.6314, 0.8318)
## No Information Rate : 0.716
## P-Value [Acc > NIR] : 0.3618
##
## Kappa : 0.1508
##
## Mcnemar's Test P-Value : 8.568e-05
##
## Sensitivity : 0.9828
## Specificity : 0.1304
## Pos Pred Value : 0.7403
## Neg Pred Value : 0.7500
## Prevalence : 0.7160
## Detection Rate : 0.7037
## Detection Prevalence : 0.9506
## Balanced Accuracy : 0.5566
##
## 'Positive' Class : 1
##
calculate_mcc1(CM_predictions_3)
## [1] 0.235572
calculate_F2(CM_predictions_3)
## [1] 0.9223301
testrf_mtry<- predict(rf_mtry)
summary(testrf_mtry)
## X0 X1
## 42 163
levels(testrf_mtry) <- c(0,1);
summary(testrf_mtry)
## 0 1
## 42 163
CM_testrf_mtry<-confusionMatrix(testrf_mtry, data_train[, ncol(data_train)],positive="1");
CM_testrf_mtry
## Confusion Matrix and Statistics
##
## Reference
## Prediction 0 1
## 0 41 1
## 1 18 145
##
## Accuracy : 0.9073
## 95% CI : (0.859, 0.9433)
## No Information Rate : 0.7122
## P-Value [Acc > NIR] : 7.833e-12
##
## Kappa : 0.7527
##
## Mcnemar's Test P-Value : 0.0002419
##
## Sensitivity : 0.9932
## Specificity : 0.6949
## Pos Pred Value : 0.8896
## Neg Pred Value : 0.9762
## Prevalence : 0.7122
## Detection Rate : 0.7073
## Detection Prevalence : 0.7951
## Balanced Accuracy : 0.8440
##
## 'Positive' Class : 1
##
calculate_mcc1(CM_testrf_mtry)
## [1] 0.7718165
calculate_F2(CM_testrf_mtry)
## [1] 0.9705489
testrf_mtry1<- predict(rf_mtry,type="prob")
summary(testrf_mtry1)
## X0 X1
## Min. :0.0020 Min. :0.1540
## 1st Qu.:0.0600 1st Qu.:0.5780
## Median :0.1180 Median :0.8820
## Mean :0.2429 Mean :0.7571
## 3rd Qu.:0.4220 3rd Qu.:0.9400
## Max. :0.8460 Max. :0.9980
model_pred_class_4 <- ifelse(testrf_mtry1 < 0.5, "X0", "X1")
summary(model_pred_class_4)
## X0 X1
## Length:205 Length:205
## Class :character Class :character
## Mode :character Mode :character
Test_4<-as.factor(model_pred_class_4[,2])
head(Test_4)
## 1001 171 865 1616 1750 903
## X1 X0 X0 X1 X1 X1
## Levels: X0 X1
levels(Test_4) <- c(0,1);
summary(Test_4)
## 0 1
## 42 163
CM_testrf_mtry1<-confusionMatrix(Test_4, data_train[, ncol(data_train)],positive="1");
CM_testrf_mtry1
## Confusion Matrix and Statistics
##
## Reference
## Prediction 0 1
## 0 41 1
## 1 18 145
##
## Accuracy : 0.9073
## 95% CI : (0.859, 0.9433)
## No Information Rate : 0.7122
## P-Value [Acc > NIR] : 7.833e-12
##
## Kappa : 0.7527
##
## Mcnemar's Test P-Value : 0.0002419
##
## Sensitivity : 0.9932
## Specificity : 0.6949
## Pos Pred Value : 0.8896
## Neg Pred Value : 0.9762
## Prevalence : 0.7122
## Detection Rate : 0.7073
## Detection Prevalence : 0.7951
## Balanced Accuracy : 0.8440
##
## 'Positive' Class : 1
##
calculate_mcc1(CM_testrf_mtry1)
## [1] 0.7718165
calculate_F2(CM_testrf_mtry1)
## [1] 0.9705489
prediction.probabilities <- predictions[,"X1"]
head(prediction.probabilities,5)
## [1] 0.558 0.580 0.780 0.752 0.558
res.roc <-roc(ContraProva,prediction.probabilities)
## Setting levels: control = 0, case = 1
## Setting direction: controls < cases
res.roc
##
## Call:
## roc.default(response = ContraProva, predictor = prediction.probabilities)
##
## Data: prediction.probabilities in 35 controls (ContraProva 0) < 88 cases (ContraProva 1).
## Area under the curve: 0.781
str(res.roc)
## List of 15
## $ percent : logi FALSE
## $ sensitivities : num [1:100] 1 1 1 0.989 0.977 ...
## $ specificities : num [1:100] 0 0.0286 0.0571 0.0571 0.0571 ...
## $ thresholds : num [1:100] -Inf 0.383 0.414 0.425 0.43 ...
## $ direction : chr "<"
## $ cases : num [1:88] 0.558 0.58 0.78 0.752 0.558 0.748 0.92 0.9 0.78 0.766 ...
## $ controls : num [1:35] 0.752 0.556 0.896 0.578 0.538 0.656 0.708 0.648 0.818 0.772 ...
## $ fun.sesp :function (thresholds, controls, cases, direction)
## $ auc : 'auc' num 0.781
## ..- attr(*, "partial.auc")= logi FALSE
## ..- attr(*, "percent")= logi FALSE
## ..- attr(*, "roc")=List of 15
## .. ..$ percent : logi FALSE
## .. ..$ sensitivities : num [1:100] 1 1 1 0.989 0.977 ...
## .. ..$ specificities : num [1:100] 0 0.0286 0.0571 0.0571 0.0571 ...
## .. ..$ thresholds : num [1:100] -Inf 0.383 0.414 0.425 0.43 ...
## .. ..$ direction : chr "<"
## .. ..$ cases : num [1:88] 0.558 0.58 0.78 0.752 0.558 0.748 0.92 0.9 0.78 0.766 ...
## .. ..$ controls : num [1:35] 0.752 0.556 0.896 0.578 0.538 0.656 0.708 0.648 0.818 0.772 ...
## .. ..$ fun.sesp :function (thresholds, controls, cases, direction)
## .. ..$ auc : 'auc' num 0.781
## .. .. ..- attr(*, "partial.auc")= logi FALSE
## .. .. ..- attr(*, "percent")= logi FALSE
## .. .. ..- attr(*, "roc")=List of 8
## .. .. .. ..$ percent : logi FALSE
## .. .. .. ..$ sensitivities: num [1:100] 1 1 1 0.989 0.977 ...
## .. .. .. ..$ specificities: num [1:100] 0 0.0286 0.0571 0.0571 0.0571 ...
## .. .. .. ..$ thresholds : num [1:100] -Inf 0.383 0.414 0.425 0.43 ...
## .. .. .. ..$ direction : chr "<"
## .. .. .. ..$ cases : num [1:88] 0.558 0.58 0.78 0.752 0.558 0.748 0.92 0.9 0.78 0.766 ...
## .. .. .. ..$ controls : num [1:35] 0.752 0.556 0.896 0.578 0.538 0.656 0.708 0.648 0.818 0.772 ...
## .. .. .. ..$ fun.sesp :function (thresholds, controls, cases, direction)
## .. .. .. ..- attr(*, "class")= chr "roc"
## .. ..$ call : language roc.default(response = ContraProva, predictor = prediction.probabilities)
## .. ..$ original.predictor: num [1:123] 0.558 0.58 0.78 0.752 0.558 0.748 0.752 0.92 0.9 0.556 ...
## .. ..$ original.response : Factor w/ 2 levels "0","1": 2 2 2 2 2 2 1 2 2 1 ...
## .. ..$ predictor : num [1:123] 0.558 0.58 0.78 0.752 0.558 0.748 0.752 0.92 0.9 0.556 ...
## .. ..$ response : Factor w/ 2 levels "0","1": 2 2 2 2 2 2 1 2 2 1 ...
## .. ..$ levels : chr [1:2] "0" "1"
## .. ..- attr(*, "class")= chr "roc"
## $ call : language roc.default(response = ContraProva, predictor = prediction.probabilities)
## $ original.predictor: num [1:123] 0.558 0.58 0.78 0.752 0.558 0.748 0.752 0.92 0.9 0.556 ...
## $ original.response : Factor w/ 2 levels "0","1": 2 2 2 2 2 2 1 2 2 1 ...
## $ predictor : num [1:123] 0.558 0.58 0.78 0.752 0.558 0.748 0.752 0.92 0.9 0.556 ...
## $ response : Factor w/ 2 levels "0","1": 2 2 2 2 2 2 1 2 2 1 ...
## $ levels : chr [1:2] "0" "1"
## - attr(*, "class")= chr "roc"
head(res.roc$cases,5)
## [1] 0.558 0.580 0.780 0.752 0.558
length(res.roc$cases)
## [1] 88
summary(res.roc$cases)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.4240 0.7510 0.8420 0.8072 0.9140 0.9900
head(res.roc$controls,5)
## [1] 0.752 0.556 0.896 0.578 0.538
length(res.roc$controls)
## [1] 35
summary(res.roc$controls)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.3620 0.5860 0.6920 0.6736 0.7680 0.8960
head(res.roc$thresholds,5)
## [1] -Inf 0.383 0.414 0.425 0.430
tail(res.roc$thresholds,5)
## [1] 0.947 0.962 0.975 0.984 Inf
length(res.roc$thresholds)
## [1] 100
summary(res.roc$thresholds)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## -Inf 0.6735 0.7885 0.8820 Inf
auc(res.roc)
## Area under the curve: 0.781
ci.auc(res.roc)
## 95% CI: 0.699-0.863 (DeLong)
ci.auc(res.roc,method = "bootstrap", boot.n = 10000)
## 95% CI: 0.6966-0.8588 (10000 stratified bootstrap replicates)
rfThresh_all <- coords(res.roc, x = "all", best.method = "youden")
head(rfThresh_all)
## threshold specificity sensitivity
## 1 -Inf 0.00000000 1.0000000
## 2 0.383 0.02857143 1.0000000
## 3 0.414 0.05714286 1.0000000
## 4 0.425 0.05714286 0.9886364
## 5 0.430 0.05714286 0.9772727
## 6 0.436 0.08571429 0.9772727
summary(rfThresh_all)
## threshold specificity sensitivity
## Min. : -Inf Min. :0.0000 Min. :0.0000
## 1st Qu.:0.6735 1st Qu.:0.4000 1st Qu.:0.3949
## Median :0.7885 Median :0.7714 Median :0.6420
## Mean : NaN Mean :0.6723 Mean :0.6038
## 3rd Qu.:0.8820 3rd Qu.:0.9714 3rd Qu.:0.8438
## Max. : Inf Max. :1.0000 Max. :1.0000
dim(rfThresh_all)
## [1] 100 3
rfThresh_max <- coords(res.roc, x = "local maximas", best.method = "youden")
head(rfThresh_max)
## threshold specificity sensitivity
## 1 0.414 0.05714286 1.0000000
## 2 0.436 0.08571429 0.9772727
## 3 0.479 0.11428571 0.9545455
## 4 0.557 0.22857143 0.9431818
## 5 0.579 0.25714286 0.9090909
## 6 0.603 0.34285714 0.8977273
summary(rfThresh_max)
## threshold specificity sensitivity
## Min. :0.4140 Min. :0.05714 Min. :0.3182
## 1st Qu.:0.6315 1st Qu.:0.37143 1st Qu.:0.5739
## Median :0.7450 Median :0.65714 Median :0.7614
## Mean :0.7086 Mean :0.59876 Mean :0.7302
## 3rd Qu.:0.8175 3rd Qu.:0.84286 3rd Qu.:0.8807
## Max. :0.8980 Max. :1.00000 Max. :1.0000
dim(rfThresh_max)
## [1] 23 3
rfThresh_youden <- coords(res.roc, x = "best", best.method = "youden")
rfThresh_youden
## threshold specificity sensitivity
## 1 0.774 0.7714286 0.7045455
rfThresh_youden[1,1]
## [1] 0.774
rfThresh_topleft <- coords(res.roc, x = "best", best.method = "closest.topleft")
rfThresh_topleft
## threshold specificity sensitivity
## 1 0.774 0.7714286 0.7045455
rfThresh_topleft[1,1]
## [1] 0.774
coords(res.roc, "best", ret="all", transpose = FALSE,
best.method="youden")
## threshold specificity sensitivity accuracy tn tp fn fp npv
## threshold 0.774 0.7714286 0.7045455 0.7235772 27 62 26 8 0.509434
## ppv fdr fpr tpr tnr fnr
## threshold 0.8857143 0.1142857 0.2285714 0.7045455 0.7714286 0.2954545
## 1-specificity 1-sensitivity 1-accuracy 1-npv 1-ppv precision
## threshold 0.2285714 0.2954545 0.2764228 0.490566 0.1142857 0.8857143
## recall youden closest.topleft
## threshold 0.7045455 1.475974 0.1395383
coords(res.roc, "best", ret="all", transpose = FALSE,
best.method="closest.topleft")
## threshold specificity sensitivity accuracy tn tp fn fp npv
## threshold 0.774 0.7714286 0.7045455 0.7235772 27 62 26 8 0.509434
## ppv fdr fpr tpr tnr fnr
## threshold 0.8857143 0.1142857 0.2285714 0.7045455 0.7714286 0.2954545
## 1-specificity 1-sensitivity 1-accuracy 1-npv 1-ppv precision
## threshold 0.2285714 0.2954545 0.2764228 0.490566 0.1142857 0.8857143
## recall youden closest.topleft
## threshold 0.7045455 1.475974 0.1395383
# "threshold", usando "ret", youden method
coords(res.roc, "best", ret="threshold", transpose = FALSE,
best.method="youden") # este é o default
## threshold
## 1 0.774
# "threshold" usando "ret", closest.topleft method
coords(res.roc, "best", ret="threshold", transpose = FALSE,
best.method="closest.topleft")
## threshold
## 1 0.774
#tn = True negative count usando "ret", youden method
coords(res.roc, "best", ret="tn", transpose = FALSE)
## tn
## threshold 27
#tn = True negative count usando "ret", closest.topleft method
coords(res.roc, "best", ret="tn", transpose = FALSE,
best.method="closest.topleft")
## tn
## threshold 27
#tp = True positive count usando "ret", youden method
coords(res.roc, "best", ret="tp", transpose = FALSE)
## tp
## threshold 62
#tp = True positive count usando "ret", closest.topleft method
coords(res.roc, "best", ret="tp", transpose = FALSE,
best.method="closest.topleft")
## tp
## threshold 62
#Obtendo Youden Index
coords(res.roc, "best", ret="youden", transpose = FALSE)
## youden
## 1 1.475974
#obtendo "Distance to the top left corner of the ROC space"
coords(res.roc, "best", ret="closest.topleft", transpose = FALSE,best.method="closest.topleft")
## closest.topleft
## 1 0.1395383
rfThresh_youden <- coords(res.roc, x = "best", best.method = "youden")
rfThresh_youden
## threshold specificity sensitivity
## 1 0.774 0.7714286 0.7045455
rfThresh_youden[1,1]
## [1] 0.774
model_pred_class <- ifelse(predictions < rfThresh_youden[1,1], "X0", "X1")
head(model_pred_class)
## X0 X1
## 157 "X0" "X0"
## 407 "X0" "X0"
## 258 "X0" "X1"
## 1805 "X0" "X0"
## 433 "X0" "X0"
## 732 "X0" "X0"
summary(model_pred_class)
## X0 X1
## Length:123 Length:123
## Class :character Class :character
## Mode :character Mode :character
Test2<-as.factor(model_pred_class[,2])
levels(Test2) <- c(0,1)
summary(Test2)
## 0 1
## 53 70
CM_Test2<-confusionMatrix(Test2, ContraProva,positive="1")
CM_Test2
## Confusion Matrix and Statistics
##
## Reference
## Prediction 0 1
## 0 27 26
## 1 8 62
##
## Accuracy : 0.7236
## 95% CI : (0.6357, 0.8004)
## No Information Rate : 0.7154
## P-Value [Acc > NIR] : 0.465909
##
## Kappa : 0.4121
##
## Mcnemar's Test P-Value : 0.003551
##
## Sensitivity : 0.7045
## Specificity : 0.7714
## Pos Pred Value : 0.8857
## Neg Pred Value : 0.5094
## Prevalence : 0.7154
## Detection Rate : 0.5041
## Detection Prevalence : 0.5691
## Balanced Accuracy : 0.7380
##
## 'Positive' Class : 1
##
str(CM_Test2)
## List of 6
## $ positive: chr "1"
## $ table : 'table' int [1:2, 1:2] 27 8 26 62
## ..- attr(*, "dimnames")=List of 2
## .. ..$ Prediction: chr [1:2] "0" "1"
## .. ..$ Reference : chr [1:2] "0" "1"
## $ overall : Named num [1:7] 0.724 0.412 0.636 0.8 0.715 ...
## ..- attr(*, "names")= chr [1:7] "Accuracy" "Kappa" "AccuracyLower" "AccuracyUpper" ...
## $ byClass : Named num [1:11] 0.705 0.771 0.886 0.509 0.886 ...
## ..- attr(*, "names")= chr [1:11] "Sensitivity" "Specificity" "Pos Pred Value" "Neg Pred Value" ...
## $ mode : chr "sens_spec"
## $ dots : list()
## - attr(*, "class")= chr "confusionMatrix"
CM_Test2$byClass[1]#Sensitivity de CM_predictions1
## Sensitivity
## 0.7045455
CM_Test2$byClass[2]#Specificity de CM_predictions1
## Specificity
## 0.7714286
J_CM_Test2<-as.numeric(CM_Test2$byClass[1] + CM_Test2$byClass[2] - 1) ; J_CM_Test2
## [1] 0.475974
calculate_mcc1(CM_Test2)
## [1] 0.4336823
calculate_F2(CM_Test2)
## [1] 0.7345972
new_threshold <- 0.65
model_pred_class_33New <- ifelse(predictions_3 < new_threshold, "X0", "X1")
head(sapply(model_pred_class_33New, class))
## X0 X0 X0 X0 X0 X0
## "character" "character" "character" "character" "character" "character"
Test_33New<-as.factor(model_pred_class_33New[,2])#
head(sapply(Test_33New, class))
## 883 1171 307 1743 1820 176
## "factor" "factor" "factor" "factor" "factor" "factor"
levels(Test_33New) <- c(0,1)
head(sapply(Test_33New, class))
## 883 1171 307 1743 1820 176
## "factor" "factor" "factor" "factor" "factor" "factor"
levels(Test_33New)
## [1] "0" "1"
head(Test_33New)
## 883 1171 307 1743 1820 176
## 1 1 0 1 1 1
## Levels: 0 1
summary(Test_33New)
## 0 1
## 16 65
CM_predictions_33New<-confusionMatrix(Test_33New, finalTest$true_troglofauna,positive="1")
CM_predictions_33New
## Confusion Matrix and Statistics
##
## Reference
## Prediction 0 1
## 0 10 6
## 1 13 52
##
## Accuracy : 0.7654
## 95% CI : (0.6582, 0.8525)
## No Information Rate : 0.716
## P-Value [Acc > NIR] : 0.1955
##
## Kappa : 0.3648
##
## Mcnemar's Test P-Value : 0.1687
##
## Sensitivity : 0.8966
## Specificity : 0.4348
## Pos Pred Value : 0.8000
## Neg Pred Value : 0.6250
## Prevalence : 0.7160
## Detection Rate : 0.6420
## Detection Prevalence : 0.8025
## Balanced Accuracy : 0.6657
##
## 'Positive' Class : 1
##
calculate_mcc1(CM_predictions_33New)
## [1] 0.375256
calculate_F2(CM_predictions_33New)
## [1] 0.8754209
rfThresh_youden <- coords(res.roc, x = "best", best.method = "youden")
rfThresh_youden
## threshold specificity sensitivity
## 1 0.774 0.7714286 0.7045455
rfThresh_youden[1,1]
## [1] 0.774
model_pred_class_31 <- ifelse(predictions_3 < rfThresh_youden[1,1], "X0", "X1")
head(sapply(model_pred_class_31, class))
## X0 X0 X0 X0 X0 X0
## "character" "character" "character" "character" "character" "character"
Test_31<-as.factor(model_pred_class_31[,2])
head(sapply(Test_31, class))
## 883 1171 307 1743 1820 176
## "factor" "factor" "factor" "factor" "factor" "factor"
levels(Test_31) <- c(0,1)
head(sapply(Test_31, class))
## 883 1171 307 1743 1820 176
## "factor" "factor" "factor" "factor" "factor" "factor"
levels(Test_31)
## [1] "0" "1"
head(Test_31)
## 883 1171 307 1743 1820 176
## 0 1 0 1 1 1
## Levels: 0 1
summary(Test_31)
## 0 1
## 34 47
CM_predictions_31<-confusionMatrix(Test_31, finalTest$true_troglofauna,positive="1")
CM_predictions_31
## Confusion Matrix and Statistics
##
## Reference
## Prediction 0 1
## 0 17 17
## 1 6 41
##
## Accuracy : 0.716
## 95% CI : (0.605, 0.8107)
## No Information Rate : 0.716
## P-Value [Acc > NIR] : 0.55592
##
## Kappa : 0.3898
##
## Mcnemar's Test P-Value : 0.03706
##
## Sensitivity : 0.7069
## Specificity : 0.7391
## Pos Pred Value : 0.8723
## Neg Pred Value : 0.5000
## Prevalence : 0.7160
## Detection Rate : 0.5062
## Detection Prevalence : 0.5802
## Balanced Accuracy : 0.7230
##
## 'Positive' Class : 1
##
calculate_mcc1(CM_predictions_31)
## [1] 0.4075216
calculate_F2(CM_predictions_31)
## [1] 0.734767
rfThresh_topleft <- coords(res.roc, x = "best", best.method = "closest.topleft")
rfThresh_topleft
## threshold specificity sensitivity
## 1 0.774 0.7714286 0.7045455
rfThresh_topleft[1,1]
## [1] 0.774
model_pred_class <- ifelse(predictions < rfThresh_topleft[1,1], "X0", "X1")
Test3<-as.factor(model_pred_class[,2])
levels(Test3) <- c(0,1)
summary(Test3)
## 0 1
## 53 70
CM_Test3<-confusionMatrix(Test3, ContraProva,positive="1")
CM_Test3
## Confusion Matrix and Statistics
##
## Reference
## Prediction 0 1
## 0 27 26
## 1 8 62
##
## Accuracy : 0.7236
## 95% CI : (0.6357, 0.8004)
## No Information Rate : 0.7154
## P-Value [Acc > NIR] : 0.465909
##
## Kappa : 0.4121
##
## Mcnemar's Test P-Value : 0.003551
##
## Sensitivity : 0.7045
## Specificity : 0.7714
## Pos Pred Value : 0.8857
## Neg Pred Value : 0.5094
## Prevalence : 0.7154
## Detection Rate : 0.5041
## Detection Prevalence : 0.5691
## Balanced Accuracy : 0.7380
##
## 'Positive' Class : 1
##
str(CM_Test3)
## List of 6
## $ positive: chr "1"
## $ table : 'table' int [1:2, 1:2] 27 8 26 62
## ..- attr(*, "dimnames")=List of 2
## .. ..$ Prediction: chr [1:2] "0" "1"
## .. ..$ Reference : chr [1:2] "0" "1"
## $ overall : Named num [1:7] 0.724 0.412 0.636 0.8 0.715 ...
## ..- attr(*, "names")= chr [1:7] "Accuracy" "Kappa" "AccuracyLower" "AccuracyUpper" ...
## $ byClass : Named num [1:11] 0.705 0.771 0.886 0.509 0.886 ...
## ..- attr(*, "names")= chr [1:11] "Sensitivity" "Specificity" "Pos Pred Value" "Neg Pred Value" ...
## $ mode : chr "sens_spec"
## $ dots : list()
## - attr(*, "class")= chr "confusionMatrix"
CM_Test3$byClass[1]#Sensitivity de CM_predictions1
## Sensitivity
## 0.7045455
CM_Test3$byClass[2]#Specificity de CM_predictions1
## Specificity
## 0.7714286
J_CM_Test3<-as.numeric(CM_Test3$byClass[1] + CM_Test3$byClass[2] - 1) ; J_CM_Test3
## [1] 0.475974
calculate_mcc1(CM_Test3)
## [1] 0.4336823
calculate_F2(CM_Test3)
## [1] 0.7345972
rfThresh_topleft <- coords(res.roc, x = "best", best.method = "closest.topleft")
rfThresh_topleft
## threshold specificity sensitivity
## 1 0.774 0.7714286 0.7045455
rfThresh_topleft[1,1]
## [1] 0.774
model_pred_class_41 <- ifelse(predictions_3 < rfThresh_topleft[1,1], "X0", "X1")
head(sapply(model_pred_class_41, class))
## X0 X0 X0 X0 X0 X0
## "character" "character" "character" "character" "character" "character"
Test_41<-as.factor(model_pred_class_41[,2])
head(sapply(Test_41, class))
## 883 1171 307 1743 1820 176
## "factor" "factor" "factor" "factor" "factor" "factor"
levels(Test_41) <- c(0,1)
head(sapply(Test_41, class))
## 883 1171 307 1743 1820 176
## "factor" "factor" "factor" "factor" "factor" "factor"
levels(Test_41)
## [1] "0" "1"
head(Test_41)
## 883 1171 307 1743 1820 176
## 0 1 0 1 1 1
## Levels: 0 1
summary(Test_41)
## 0 1
## 34 47
CM_predictions_41<-confusionMatrix(Test_41, finalTest$true_troglofauna,positive="1")
CM_predictions_41
## Confusion Matrix and Statistics
##
## Reference
## Prediction 0 1
## 0 17 17
## 1 6 41
##
## Accuracy : 0.716
## 95% CI : (0.605, 0.8107)
## No Information Rate : 0.716
## P-Value [Acc > NIR] : 0.55592
##
## Kappa : 0.3898
##
## Mcnemar's Test P-Value : 0.03706
##
## Sensitivity : 0.7069
## Specificity : 0.7391
## Pos Pred Value : 0.8723
## Neg Pred Value : 0.5000
## Prevalence : 0.7160
## Detection Rate : 0.5062
## Detection Prevalence : 0.5802
## Balanced Accuracy : 0.7230
##
## 'Positive' Class : 1
##
calculate_mcc1(CM_predictions_41)
## [1] 0.4075216
calculate_F2(CM_predictions_41)
## [1] 0.734767
roc.data <- data_frame( # Create a dataframe 'roc.data' for ROC analysis results
thresholds = res.roc$thresholds, # Column for ROC curve thresholds
sensitivity = res.roc$sensitivities, # Column for sensitivity (true positive rate)
specificity = res.roc$specificities # Column for specificity (true negative rate)
)
## Warning: `data_frame()` was deprecated in tibble 1.1.0.
## ℹ Please use `tibble()` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
roc.data # Display the 'roc.data' dataframe in the R console
## # A tibble: 100 × 3
## thresholds sensitivity specificity
## <dbl> <dbl> <dbl>
## 1 -Inf 1 0
## 2 0.383 1 0.0286
## 3 0.414 1 0.0571
## 4 0.425 0.989 0.0571
## 5 0.43 0.977 0.0571
## 6 0.436 0.977 0.0857
## 7 0.449 0.966 0.0857
## 8 0.464 0.955 0.0857
## 9 0.479 0.955 0.114
## 10 0.511 0.943 0.114
## # ℹ 90 more rows
tibble(roc.data)# Convert the 'roc.data' dataframe into a tibble for further analysis (if needed)
## # A tibble: 100 × 3
## thresholds sensitivity specificity
## <dbl> <dbl> <dbl>
## 1 -Inf 1 0
## 2 0.383 1 0.0286
## 3 0.414 1 0.0571
## 4 0.425 0.989 0.0571
## 5 0.43 0.977 0.0571
## 6 0.436 0.977 0.0857
## 7 0.449 0.966 0.0857
## 8 0.464 0.955 0.0857
## 9 0.479 0.955 0.114
## 10 0.511 0.943 0.114
## # ℹ 90 more rows
#Get the probality threshold for specificity >= 0.6 e sensitivity >= 0.6
roc.data %>% filter(specificity >= 0.6)#comando com %>% "forward pipe operator"
## # A tibble: 64 × 3
## thresholds sensitivity specificity
## <dbl> <dbl> <dbl>
## 1 0.727 0.784 0.6
## 2 0.731 0.784 0.629
## 3 0.737 0.761 0.629
## 4 0.745 0.761 0.657
## 5 0.75 0.75 0.657
## 6 0.754 0.739 0.714
## 7 0.76 0.727 0.714
## 8 0.765 0.727 0.743
## 9 0.768 0.716 0.743
## 10 0.771 0.705 0.743
## # ℹ 54 more rows
roc.data %>% filter(sensitivity >= 0.6)#comando com %>% "forward pipe operator"
## # A tibble: 55 × 3
## thresholds sensitivity specificity
## <dbl> <dbl> <dbl>
## 1 -Inf 1 0
## 2 0.383 1 0.0286
## 3 0.414 1 0.0571
## 4 0.425 0.989 0.0571
## 5 0.43 0.977 0.0571
## 6 0.436 0.977 0.0857
## 7 0.449 0.966 0.0857
## 8 0.464 0.955 0.0857
## 9 0.479 0.955 0.114
## 10 0.511 0.943 0.114
## # ℹ 45 more rows
par(pty="s")
plot.roc(res.roc, print.auc =TRUE,col="blue", print.thres =
"best",print.auc.y=0.4,main="ROC curve (Point: best.method = 'youden')")
grid(nx = NULL, ny = NULL,
lty = 3,
lwd = 1,
col = "gray")
segments(rfThresh_youden[1,2],1-rfThresh_youden[1,2],rfThresh_youden[1,2], rfThresh_youden[1,3],lwd = 3, col = "red",lty=2)
text(0.4,0.2,"The number on the left represents the probability cutoff,\nand the numbers in the parentheses are\nthe specificity and sensitivity, respectively")

par(pty="s")
plot.roc(res.roc, print.auc =TRUE,col="blue", print.thres =
c(0.3, 0.5, 0.7,rfThresh_topleft[1,1]),print.auc.y=0.4,main="ROC curve (Point: best.method = 'closest.topleft')")
grid(nx = NULL, ny = NULL,
lty = 3,
lwd = 1,
col = "gray")
segments(rfThresh_topleft[1,2],rfThresh_topleft[1,3],1, 1,lwd = 3, col = "red",lty=2)
text(0.3,0.2,"The number on the left represents the probability cutoff,\nand the numbers in the parentheses are\nthe specificity and sensitivity, respectively")

par(pty="s")
plot.roc(res.roc, print.auc =TRUE,print.auc.y=0.4,legacy.axes = TRUE, col="blue", print.thres = c(0.2, 0.5, 0.7,rfThresh_youden[1,1]),main="ROC curve - Youden",xlab = "1-Specificity = False Positive Rate = FPR",ylab = "Sensitivity = True Positive Rate = TPR")
segments(rfThresh_youden[1,2], 1-rfThresh_youden[1,2],rfThresh_youden[1,2],rfThresh_youden[1,3],lwd = 3, col = "red",lty=2)
grid(nx = NULL, ny = NULL,
lty = 3,
lwd = 1,
col = "gray")
text(0.4,0.2,"The number on the left represents the probability cutoff,\nand the numbers in the parentheses are\nthe specificity and sensitivity, respectively")

par(pty="s")
plot.roc(res.roc, print.auc =TRUE,print.auc.y=0.4,legacy.axes = TRUE, col="blue", print.thres = c(0.3, 0.5, 0.7,rfThresh_topleft[1,1]),main="ROC curve - Closest Topleft",xlab = "1-Specificity = False Positive Rate = FPR",ylab = "Sensitivity = True Positive Rate = TPR")
segments(rfThresh_topleft[1,2],rfThresh_topleft[1,3],1, 1,lwd = 3, col = "red",lty=2)
grid(nx = NULL, ny = NULL,
lty = 3,
lwd = 1,
col = "gray")
text(0.4,0.2,"The number on the left represents the probability cutoff,\nand the numbers in the parentheses are\nthe specificity and sensitivity, respectively")

par(pty="s")
rocCurve <- roc(ContraProva,prediction.probabilities, plot=TRUE,legacy.axes = TRUE, col="blue",main="ROC curve (Point: best.method = 'youden')",xlab="False Positive Rate = FPR", ylab="True Positive Rate = TPR",print.thres =
"best",print.auc =TRUE,cex.main=0.9)
## Setting levels: control = 0, case = 1
## Setting direction: controls < cases
segments(rfThresh_youden[1,2], 1-rfThresh_youden[1,2],rfThresh_youden[1,2],rfThresh_youden[1,3],lwd = 3, col = "red",lty=2)
grid(nx = NULL, ny = NULL,
lty = 3,
lwd = 1,
col = "gray")
text(0.4,0.2,"The number on the left represents the probability cutoff,\nand the numbers in the parentheses are\nthe specificity and sensitivity, respectively")

dim(finalTest)
## [1] 81 28
names(finalTest)
## [1] "srain2mp" "bio21" "bio16" "slope"
## [5] "rugg500s" "rugg500cv" "pil_twicv" "pil_topos"
## [9] "pil_slps" "pil_slpcv" "nutrientsn" "mrvbf"
## [13] "mrrtf" "minfertf" "lf7rup" "hstructn"
## [17] "geolrngaggn" "geolmnaggn" "bdensity50n" "slopern"
## [21] "HubDist" "MIN_AGE_MA" "month_collection" "HubName"
## [25] "UNITNAME" "ROCKTYPE1" "FORMATION" "true_troglofauna"
dim(selectcol_finalTest)
## [1] 81 27
names(selectcol_finalTest)
## [1] "srain2mp" "bio21" "bio16" "slope"
## [5] "rugg500s" "rugg500cv" "pil_twicv" "pil_topos"
## [9] "pil_slps" "pil_slpcv" "nutrientsn" "mrvbf"
## [13] "mrrtf" "minfertf" "lf7rup" "hstructn"
## [17] "geolrngaggn" "geolmnaggn" "bdensity50n" "slopern"
## [21] "HubDist" "MIN_AGE_MA" "month_collection" "HubName"
## [25] "UNITNAME" "ROCKTYPE1" "FORMATION"
CM_predictions_2$byClass
## Sensitivity Specificity Pos Pred Value
## 0.9827586 0.1304348 0.7402597
## Neg Pred Value Precision Recall
## 0.7500000 0.7402597 0.9827586
## F1 Prevalence Detection Rate
## 0.8444444 0.7160494 0.7037037
## Detection Prevalence Balanced Accuracy
## 0.9506173 0.5565967
beta<-2
dbF1_11<-CM_predictions_2$byClass
dbF1_11["F1"]
## F1
## 0.8444444
dbF2_11<-((1+beta^2)*dbF1_11["Precision"]*dbF1_11["Sensitivity"])/(beta^2*dbF1_11["Precision"] + dbF1_11["Sensitivity"])
dbF2_11<-as.numeric(dbF2_11)
dbF2_11
## [1] 0.9223301
dbF1_11["Precision"];dbF1_11["Sensitivity"]
## Precision
## 0.7402597
## Sensitivity
## 0.9827586
dbF1_22<-CM_predictions_31$byClass
dbF1_22["F1"]
## F1
## 0.7809524
dbF2_22<-((1+beta^2)*dbF1_22["Precision"]*dbF1_22["Sensitivity"])/(beta^2*dbF1_22["Precision"] + dbF1_22["Sensitivity"])
dbF2_22<-as.numeric(dbF2_22)
dbF2_22
## [1] 0.734767
dbF1_22["Precision"];dbF1_22["Sensitivity"]
## Precision
## 0.8723404
## Sensitivity
## 0.7068966
dbF1_33<-CM_predictions_41$byClass
dbF1_33["F1"]
## F1
## 0.7809524
dbF2_33<-((1+beta^2)*dbF1_33["Precision"]*dbF1_33["Sensitivity"])/(beta^2*dbF1_33["Precision"] + dbF1_33["Sensitivity"])
dbF2_33<-as.numeric(dbF2_33)
dbF2_33
## [1] 0.734767
dbF1_33["Precision"];dbF1_33["Sensitivity"]
## Precision
## 0.8723404
## Sensitivity
## 0.7068966
dim(finalTest)
## [1] 81 28
basmcc<-mcc(predictions_2, finalTest[, ncol(finalTest)])
basmcc
## [1] 0.235572
youmcc<-mcc(Test_31, finalTest[, ncol(finalTest)])
youmcc
## [1] 0.4075216
topmcc<-mcc(Test_41, finalTest[, ncol(finalTest)])
topmcc
## [1] 0.4075216
cat("\n\n","TABELA 1 - RESULTADOS DA BASE DE DADOS 'finalTest' COM OS MODELOS USANDO A FUNÇÃO train() DO PACOTE caret"
,"\n\n\n", "1 - Modelo básico"
,"\n\n","Area Under ROC (AUC) do Modelo Básico =", auc(res.roc)
,"\n\n\n","Accuracy =", CM_predictions_2$overall[1]
,"\n\n","Sensitivity =", CM_predictions_2$byClass[1]
,"\n\n","Specificity =", CM_predictions_2$byClass[2]
,"\n\n","Balanced Accuracy =", CM_predictions_2$byClass[11]
,"\n\n","F1 =", dbF1_11["F1"]
,"\n\n","F2 =", dbF2_11
,"\n\n","matthews correlation coefficient =", calculate_mcc1(CM_predictions_2)
,"\n\n","Threshold =", 0.5
,"\n\n\n", " 33New "
,"\n\n","Accuracy =", CM_predictions_33New$overall[1]
,"\n\n","Sensitivity =", CM_predictions_33New$byClass[1]
,"\n\n","Specificity =", CM_predictions_33New$byClass[2]
,"\n\n","Balanced Accuracy =", CM_predictions_33New$byClass[11]
,"\n\n","matthews correlation coefficient =", calculate_mcc1(CM_predictions_33New)
,"\n\n","Threshold =", 0.65
,"\n\n\n", "2 - Aplicando o critério de 'youden' ao modelo"
,"\n\n","Accuracy =", CM_predictions_31$overall[1]
,"\n\n","Sensitivity =", CM_predictions_31$byClass[1]
,"\n\n","Specificity =", CM_predictions_31$byClass[2]
,"\n\n","Balanced Accuracy =", CM_predictions_31$byClass[11]
,"\n\n","F1 =", dbF1_22["F1"]
,"\n\n","F2 =", dbF2_22
,"\n\n","matthews correlation coefficient =", calculate_mcc1(CM_predictions_31)
,"\n\n","Threshold =", rfThresh_youden[1,1]
,"\n\n\n", "3 - Aplicando o critério 'closest.topleft' ao modelo"
,"\n\n","Accuracy =", CM_predictions_41$overall[1]
,"\n\n","Sensitivity =", CM_predictions_41$byClass[1]
,"\n\n","Specificity =", CM_predictions_41$byClass[2]
,"\n\n","Balanced Accuracy =", CM_predictions_41$byClass[11]
,"\n\n","F1 =", dbF1_33["F1"]
,"\n\n","F2 =", dbF2_33
,"\n\n","matthews correlation coefficient =", calculate_mcc1(CM_predictions_41)
,"\n\n","Threshold =", rfThresh_topleft[1,1]
)
##
##
## TABELA 1 - RESULTADOS DA BASE DE DADOS 'finalTest' COM OS MODELOS USANDO A FUNÇÃO train() DO PACOTE caret
##
##
## 1 - Modelo básico
##
## Area Under ROC (AUC) do Modelo Básico = 0.7810065
##
##
## Accuracy = 0.7407407
##
## Sensitivity = 0.9827586
##
## Specificity = 0.1304348
##
## Balanced Accuracy = 0.5565967
##
## F1 = 0.8444444
##
## F2 = 0.9223301
##
## matthews correlation coefficient = 0.235572
##
## Threshold = 0.5
##
##
## 33New
##
## Accuracy = 0.7654321
##
## Sensitivity = 0.8965517
##
## Specificity = 0.4347826
##
## Balanced Accuracy = 0.6656672
##
## matthews correlation coefficient = 0.375256
##
## Threshold = 0.65
##
##
## 2 - Aplicando o critério de 'youden' ao modelo
##
## Accuracy = 0.7160494
##
## Sensitivity = 0.7068966
##
## Specificity = 0.7391304
##
## Balanced Accuracy = 0.7230135
##
## F1 = 0.7809524
##
## F2 = 0.734767
##
## matthews correlation coefficient = 0.4075216
##
## Threshold = 0.774
##
##
## 3 - Aplicando o critério 'closest.topleft' ao modelo
##
## Accuracy = 0.7160494
##
## Sensitivity = 0.7068966
##
## Specificity = 0.7391304
##
## Balanced Accuracy = 0.7230135
##
## F1 = 0.7809524
##
## F2 = 0.734767
##
## matthews correlation coefficient = 0.4075216
##
## Threshold = 0.774
gaa<-which(finalTest1$true_troglofauna==1);gaa
## [1] 1 2 4 5 6 7 10 11 12 17 20 21 24 25 26 27 28 29 30 31 32 33 36 37 38
## [26] 39 40 42 43 44 45 46 47 48 49 50 51 54 55 58 60 61 62 63 65 66 67 68 70 71
## [51] 72 74 75 76 77 79 80 81
geo1<-finalTest1[gaa,]
summary(geo1)
## srain2mp bio21 bio16 slope
## Min. :0.1362 Min. :29.64 Min. :202.2 Min. : 0.08943
## 1st Qu.:0.1443 1st Qu.:29.74 1st Qu.:221.2 1st Qu.: 1.08182
## Median :0.1481 Median :29.75 Median :224.2 Median : 2.01742
## Mean :0.1480 Mean :29.75 Mean :225.8 Mean : 2.95951
## 3rd Qu.:0.1519 3rd Qu.:29.77 3rd Qu.:229.7 3rd Qu.: 3.73666
## Max. :0.1631 Max. :29.83 Max. :245.4 Max. :13.58547
##
## rugg500s rugg500cv pil_twicv pil_topos
## Min. :0.01647 Min. :0.01810 Min. :0.05175 Min. :0.0000
## 1st Qu.:0.44700 1st Qu.:0.07039 1st Qu.:0.09118 1st Qu.:0.0000
## Median :0.96114 Median :0.09685 Median :0.14486 Median :0.2028
## Mean :1.46073 Mean :0.11370 Mean :0.15158 Mean :1.1509
## 3rd Qu.:1.77356 3rd Qu.:0.14303 3rd Qu.:0.20167 3rd Qu.:1.7150
## Max. :7.05156 Max. :0.30012 Max. :0.29959 Max. :5.4794
##
## pil_slps pil_slpcv nutrientsn mrvbf
## Min. : 0.1401 Min. :0.1951 Min. :1.100 Min. :0.0000
## 1st Qu.: 0.8979 1st Qu.:0.3583 1st Qu.:1.300 1st Qu.:0.0000
## Median : 2.2965 Median :0.4684 Median :1.300 Median :0.0000
## Mean : 3.1087 Mean :0.4533 Mean :1.286 Mean :0.4320
## 3rd Qu.: 4.6968 3rd Qu.:0.5173 3rd Qu.:1.300 3rd Qu.:0.6406
## Max. :12.3162 Max. :1.1404 Max. :1.600 Max. :4.5610
##
## mrrtf minfertf lf7rup hstructn
## Min. :0.0000 Min. :1.000 Min. :1.000 Min. : 0.0000
## 1st Qu.:0.0000 1st Qu.:2.000 1st Qu.:2.000 1st Qu.: 0.5821
## Median :0.0000 Median :2.000 Median :3.000 Median : 1.0429
## Mean :0.4231 Mean :2.052 Mean :3.086 Mean : 1.1633
## 3rd Qu.:0.5784 3rd Qu.:2.000 3rd Qu.:4.000 3rd Qu.: 1.0429
## Max. :3.5544 Max. :3.000 Max. :7.000 Max. :10.0000
##
## geolrngaggn geolmnaggn bdensity50n slopern
## Min. : 0.0115 Min. : 0.0058 Min. :1.255 Min. : 0.00132
## 1st Qu.: 65.5000 1st Qu.: 32.7500 1st Qu.:1.341 1st Qu.: 0.01840
## Median :300.0000 Median :2050.0000 Median :1.367 Median : 0.05969
## Mean :362.2899 Mean :1586.3174 Mean :1.351 Mean : 0.61471
## 3rd Qu.:750.0000 3rd Qu.:2650.0000 3rd Qu.:1.367 3rd Qu.: 0.24807
## Max. :900.0000 Max. :2650.0000 Max. :1.367 Max. :21.14644
##
## HubDist MIN_AGE_MA month_collection
## Min. : 2.124 Min. :2453 June : 9
## 1st Qu.: 9.731 1st Qu.:2454 September: 9
## Median :18.518 Median :2506 July : 8
## Mean :29.461 Mean :2516 August : 6
## 3rd Qu.:53.033 3rd Qu.:2597 April : 5
## Max. :77.437 Max. :2597 February : 4
## (Other) :17
## HubName
## anticline, exposed :15
## concealed : 0
## exposed :10
## normal, exposed, tick on downthrown side: 4
## overturned syncline, exposed : 2
## syncline, exposed :27
##
## UNITNAME
## Brockman Iron Formation :24
## Marra Mamba Iron Formation :22
## Mount McRae Shale and Mount Sylvia Formation: 1
## Weeli Wolli Formation : 3
## Wittenoom Formation : 8
##
##
## ROCKTYPE1
## sedimentary carbonate : 8
## sedimentary other chemical or biochemical:49
## sedimentary siliciclastic : 1
##
##
##
##
## FORMATION true_troglofauna
## Brockman Iron Formation :24 0: 0
## Marra Mamba Iron Formation :22 1:58
## Mount McRae Shale and Mount Sylvia Formation: 1
## Weeli Wolli Formation : 3
## Wittenoom Formation : 8
##
##
## LATITUDE LONGITUDE
## Min. :-23.26 Min. :118.7
## 1st Qu.:-22.98 1st Qu.:118.9
## Median :-22.94 Median :119.0
## Mean :-22.87 Mean :119.0
## 3rd Qu.:-22.70 3rd Qu.:119.1
## Max. :-22.53 Max. :119.4
##
gaaAu<-which(finalTest1$true_troglofauna==0);gaaAu
## [1] 3 8 9 13 14 15 16 18 19 22 23 34 35 41 52 53 56 57 59 64 69 73 78
geoAu<-finalTest1[gaaAu,]
summary(geoAu)
## srain2mp bio21 bio16 slope
## Min. :0.1356 Min. :29.69 Min. :214.2 Min. : 0.1489
## 1st Qu.:0.1423 1st Qu.:29.76 1st Qu.:223.8 1st Qu.: 1.8485
## Median :0.1463 Median :29.77 Median :224.5 Median : 3.4507
## Mean :0.1465 Mean :29.77 Mean :225.6 Mean : 4.2842
## 3rd Qu.:0.1481 3rd Qu.:29.80 3rd Qu.:229.8 3rd Qu.: 5.3867
## Max. :0.1633 Max. :29.82 Max. :238.9 Max. :12.8002
##
## rugg500s rugg500cv pil_twicv pil_topos
## Min. :0.09931 Min. :0.03795 Min. :0.04523 Min. : 0.000
## 1st Qu.:1.16273 1st Qu.:0.06686 1st Qu.:0.08127 1st Qu.: 0.000
## Median :1.80702 Median :0.09112 Median :0.11014 Median : 0.000
## Mean :2.38998 Mean :0.13402 Mean :0.13561 Mean : 2.361
## 3rd Qu.:3.74337 3rd Qu.:0.20341 3rd Qu.:0.18286 3rd Qu.: 4.808
## Max. :6.26119 Max. :0.29515 Max. :0.36527 Max. :10.364
##
## pil_slps pil_slpcv nutrientsn mrvbf
## Min. : 0.5099 Min. :0.2334 Min. :1.1 Min. :0.0000
## 1st Qu.: 1.1570 1st Qu.:0.3374 1st Qu.:1.3 1st Qu.:0.0000
## Median : 4.9075 Median :0.4747 Median :1.3 Median :0.0000
## Mean : 4.8395 Mean :0.4815 Mean :1.3 Mean :0.4131
## 3rd Qu.: 5.9913 3rd Qu.:0.5641 3rd Qu.:1.3 3rd Qu.:0.0000
## Max. :17.0097 Max. :0.9767 Max. :1.6 Max. :4.5058
##
## mrrtf minfertf lf7rup hstructn
## Min. :0.0000 Min. :2.000 Min. :1.000 Min. :0.0000
## 1st Qu.:0.0000 1st Qu.:2.000 1st Qu.:1.500 1st Qu.:1.0429
## Median :0.0000 Median :2.000 Median :3.000 Median :1.0429
## Mean :0.1835 Mean :2.043 Mean :3.043 Mean :0.8721
## 3rd Qu.:0.0000 3rd Qu.:2.000 3rd Qu.:4.000 3rd Qu.:1.0429
## Max. :1.9350 Max. :3.000 Max. :7.000 Max. :1.0429
##
## geolrngaggn geolmnaggn bdensity50n slopern
## Min. : 0.0115 Min. : 0.0058 Min. :1.255 Min. :0.00392
## 1st Qu.: 2.5880 1st Qu.: 1.2940 1st Qu.:1.367 1st Qu.:0.01738
## Median :300.0000 Median :2050.0000 Median :1.367 Median :0.03976
## Mean :361.3930 Mean :1319.8269 Mean :1.354 Mean :0.14841
## 3rd Qu.:900.0000 3rd Qu.:2350.0000 3rd Qu.:1.367 3rd Qu.:0.14600
## Max. :900.0000 Max. :2650.0000 Max. :1.367 Max. :1.26735
##
## HubDist MIN_AGE_MA month_collection
## Min. : 1.294 Min. :2453 January :7
## 1st Qu.: 3.746 1st Qu.:2454 March :3
## Median : 6.289 Median :2494 November :3
## Mean :22.821 Mean :2520 February :2
## 3rd Qu.:54.325 3rd Qu.:2597 April :2
## Max. :70.168 Max. :2597 September:2
## (Other) :4
## HubName
## anticline, exposed : 1
## concealed : 0
## exposed : 0
## normal, exposed, tick on downthrown side: 6
## overturned syncline, exposed : 4
## syncline, exposed :12
##
## UNITNAME
## Brockman Iron Formation :10
## Marra Mamba Iron Formation :10
## Mount McRae Shale and Mount Sylvia Formation: 1
## Weeli Wolli Formation : 1
## Wittenoom Formation : 1
##
##
## ROCKTYPE1
## sedimentary carbonate : 1
## sedimentary other chemical or biochemical:21
## sedimentary siliciclastic : 1
##
##
##
##
## FORMATION true_troglofauna
## Brockman Iron Formation :10 0:23
## Marra Mamba Iron Formation :10 1: 0
## Mount McRae Shale and Mount Sylvia Formation: 1
## Weeli Wolli Formation : 1
## Wittenoom Formation : 1
##
##
## LATITUDE LONGITUDE
## Min. :-23.07 Min. :118.7
## 1st Qu.:-22.94 1st Qu.:118.8
## Median :-22.93 Median :118.9
## Mean :-22.89 Mean :118.9
## 3rd Qu.:-22.91 3rd Qu.:119.0
## Max. :-22.55 Max. :119.2
##
Min_Lg<-min(finalTest1$LONGITUDE)-0.05
Max_Lg<-max(finalTest1$LONGITUDE)+0.05
Min_Lt<-min(finalTest1$LATITUDE)-0.05
Max_Lt<-max(finalTest1$LATITUDE)+0.05
plot(geo1$LONGITUDE,geo1$LATITUDE,xlim = c(Min_Lg,Max_Lg),ylim = c(Min_Lt,Max_Lt), cex=2, pch=20, col="blue", xlab='Longitude\n', ylab='Latitude', main='Modelo Básico. Cordenadas da presença de troglofauna: Observadas e Preditas ',sub="\nObservação Presença = azul, Predição Presença = vermelha, Observação Ausência = Preta")
gbb<-which(predictions_2==1);gbb
## [1] 1 2 3 4 5 6 7 8 9 10 11 12 14 15 16 17 19 20 21 22 23 24 25 26 27
## [26] 28 29 30 31 32 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53
## [51] 54 55 56 57 58 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79
## [76] 80 81
geo2<-finalTest1[gbb,]
summary(geo2)
## srain2mp bio21 bio16 slope
## Min. :0.1356 Min. :29.66 Min. :202.2 Min. : 0.08943
## 1st Qu.:0.1431 1st Qu.:29.74 1st Qu.:221.4 1st Qu.: 1.20730
## Median :0.1473 Median :29.76 Median :224.2 Median : 2.30398
## Mean :0.1474 Mean :29.75 Mean :225.8 Mean : 3.22096
## 3rd Qu.:0.1517 3rd Qu.:29.77 3rd Qu.:230.1 3rd Qu.: 4.06712
## Max. :0.1631 Max. :29.83 Max. :245.4 Max. :13.58547
##
## rugg500s rugg500cv pil_twicv pil_topos
## Min. :0.01647 Min. :0.01810 Min. :0.04839 Min. : 0.0000
## 1st Qu.:0.60450 1st Qu.:0.06913 1st Qu.:0.09014 1st Qu.: 0.0000
## Median :1.09004 Median :0.09504 Median :0.13883 Median : 0.1104
## Mean :1.68214 Mean :0.11970 Mean :0.14929 Mean : 1.5034
## 3rd Qu.:2.42691 3rd Qu.:0.17764 3rd Qu.:0.20128 3rd Qu.: 2.4066
## Max. :7.05156 Max. :0.30012 Max. :0.36527 Max. :10.3639
##
## pil_slps pil_slpcv nutrientsn mrvbf
## Min. : 0.1401 Min. :0.1951 Min. :1.100 Min. :0.0000
## 1st Qu.: 1.0510 1st Qu.:0.3434 1st Qu.:1.300 1st Qu.:0.0000
## Median : 2.3887 Median :0.4664 Median :1.300 Median :0.0000
## Mean : 3.5202 Mean :0.4613 Mean :1.292 Mean :0.4488
## 3rd Qu.: 4.9667 3rd Qu.:0.5202 3rd Qu.:1.300 3rd Qu.:0.5784
## Max. :17.0097 Max. :1.1404 Max. :1.600 Max. :4.5610
##
## mrrtf minfertf lf7rup hstructn
## Min. :0.0000 Min. :2.000 Min. :1.000 Min. : 0.000
## 1st Qu.:0.0000 1st Qu.:2.000 1st Qu.:2.000 1st Qu.: 1.043
## Median :0.0000 Median :2.000 Median :3.000 Median : 1.043
## Mean :0.3538 Mean :2.065 Mean :3.143 Mean : 1.091
## 3rd Qu.:0.5385 3rd Qu.:2.000 3rd Qu.:4.000 3rd Qu.: 1.043
## Max. :3.5544 Max. :3.000 Max. :7.000 Max. :10.000
##
## geolrngaggn geolmnaggn bdensity50n slopern
## Min. : 0.0115 Min. : 0.0058 Min. :1.255 Min. : 0.00132
## 1st Qu.: 65.5000 1st Qu.: 32.7500 1st Qu.:1.341 1st Qu.: 0.01814
## Median :300.0000 Median :2050.0000 Median :1.367 Median : 0.05865
## Mean :344.9267 Mean :1508.8270 Mean :1.352 Mean : 0.49968
## 3rd Qu.:300.0000 3rd Qu.:2650.0000 3rd Qu.:1.367 3rd Qu.: 0.20859
## Max. :900.0000 Max. :2650.0000 Max. :1.367 Max. :21.14644
##
## HubDist MIN_AGE_MA month_collection
## Min. : 2.124 Min. :2453 June :10
## 1st Qu.: 7.414 1st Qu.:2454 September: 9
## Median :17.310 Median :2506 January : 8
## Mean :27.857 Mean :2520 July : 8
## 3rd Qu.:53.489 3rd Qu.:2597 March : 7
## Max. :77.437 Max. :2597 April : 7
## (Other) :28
## HubName
## anticline, exposed :16
## concealed : 0
## exposed :10
## normal, exposed, tick on downthrown side: 9
## overturned syncline, exposed : 4
## syncline, exposed :38
##
## UNITNAME
## Brockman Iron Formation :30
## Marra Mamba Iron Formation :32
## Mount McRae Shale and Mount Sylvia Formation: 2
## Weeli Wolli Formation : 4
## Wittenoom Formation : 9
##
##
## ROCKTYPE1
## sedimentary carbonate : 9
## sedimentary other chemical or biochemical:66
## sedimentary siliciclastic : 2
##
##
##
##
## FORMATION true_troglofauna
## Brockman Iron Formation :30 0:20
## Marra Mamba Iron Formation :32 1:57
## Mount McRae Shale and Mount Sylvia Formation: 2
## Weeli Wolli Formation : 4
## Wittenoom Formation : 9
##
##
## LATITUDE LONGITUDE
## Min. :-23.26 Min. :118.7
## 1st Qu.:-22.98 1st Qu.:118.9
## Median :-22.93 Median :119.0
## Mean :-22.88 Mean :119.0
## 3rd Qu.:-22.70 3rd Qu.:119.0
## Max. :-22.53 Max. :119.4
##
points(geo2$LONGITUDE,geo2$LATITUDE,xlim = c(Min_Lg,Max_Lg),ylim = c(Min_Lt,Max_Lt),pch=5, col="red")
points(geoAu$LONGITUDE,geoAu$LATITUDE,xlim = c(115,122),ylim = c(-26,-21),pch = 4, col = rgb(0, 0, 0, 0.15))

gaa<-which(finalTest1$true_troglofauna==1);gaa
## [1] 1 2 4 5 6 7 10 11 12 17 20 21 24 25 26 27 28 29 30 31 32 33 36 37 38
## [26] 39 40 42 43 44 45 46 47 48 49 50 51 54 55 58 60 61 62 63 65 66 67 68 70 71
## [51] 72 74 75 76 77 79 80 81
geo1<-finalTest1[gaa,]
summary(geo1)
## srain2mp bio21 bio16 slope
## Min. :0.1362 Min. :29.64 Min. :202.2 Min. : 0.08943
## 1st Qu.:0.1443 1st Qu.:29.74 1st Qu.:221.2 1st Qu.: 1.08182
## Median :0.1481 Median :29.75 Median :224.2 Median : 2.01742
## Mean :0.1480 Mean :29.75 Mean :225.8 Mean : 2.95951
## 3rd Qu.:0.1519 3rd Qu.:29.77 3rd Qu.:229.7 3rd Qu.: 3.73666
## Max. :0.1631 Max. :29.83 Max. :245.4 Max. :13.58547
##
## rugg500s rugg500cv pil_twicv pil_topos
## Min. :0.01647 Min. :0.01810 Min. :0.05175 Min. :0.0000
## 1st Qu.:0.44700 1st Qu.:0.07039 1st Qu.:0.09118 1st Qu.:0.0000
## Median :0.96114 Median :0.09685 Median :0.14486 Median :0.2028
## Mean :1.46073 Mean :0.11370 Mean :0.15158 Mean :1.1509
## 3rd Qu.:1.77356 3rd Qu.:0.14303 3rd Qu.:0.20167 3rd Qu.:1.7150
## Max. :7.05156 Max. :0.30012 Max. :0.29959 Max. :5.4794
##
## pil_slps pil_slpcv nutrientsn mrvbf
## Min. : 0.1401 Min. :0.1951 Min. :1.100 Min. :0.0000
## 1st Qu.: 0.8979 1st Qu.:0.3583 1st Qu.:1.300 1st Qu.:0.0000
## Median : 2.2965 Median :0.4684 Median :1.300 Median :0.0000
## Mean : 3.1087 Mean :0.4533 Mean :1.286 Mean :0.4320
## 3rd Qu.: 4.6968 3rd Qu.:0.5173 3rd Qu.:1.300 3rd Qu.:0.6406
## Max. :12.3162 Max. :1.1404 Max. :1.600 Max. :4.5610
##
## mrrtf minfertf lf7rup hstructn
## Min. :0.0000 Min. :1.000 Min. :1.000 Min. : 0.0000
## 1st Qu.:0.0000 1st Qu.:2.000 1st Qu.:2.000 1st Qu.: 0.5821
## Median :0.0000 Median :2.000 Median :3.000 Median : 1.0429
## Mean :0.4231 Mean :2.052 Mean :3.086 Mean : 1.1633
## 3rd Qu.:0.5784 3rd Qu.:2.000 3rd Qu.:4.000 3rd Qu.: 1.0429
## Max. :3.5544 Max. :3.000 Max. :7.000 Max. :10.0000
##
## geolrngaggn geolmnaggn bdensity50n slopern
## Min. : 0.0115 Min. : 0.0058 Min. :1.255 Min. : 0.00132
## 1st Qu.: 65.5000 1st Qu.: 32.7500 1st Qu.:1.341 1st Qu.: 0.01840
## Median :300.0000 Median :2050.0000 Median :1.367 Median : 0.05969
## Mean :362.2899 Mean :1586.3174 Mean :1.351 Mean : 0.61471
## 3rd Qu.:750.0000 3rd Qu.:2650.0000 3rd Qu.:1.367 3rd Qu.: 0.24807
## Max. :900.0000 Max. :2650.0000 Max. :1.367 Max. :21.14644
##
## HubDist MIN_AGE_MA month_collection
## Min. : 2.124 Min. :2453 June : 9
## 1st Qu.: 9.731 1st Qu.:2454 September: 9
## Median :18.518 Median :2506 July : 8
## Mean :29.461 Mean :2516 August : 6
## 3rd Qu.:53.033 3rd Qu.:2597 April : 5
## Max. :77.437 Max. :2597 February : 4
## (Other) :17
## HubName
## anticline, exposed :15
## concealed : 0
## exposed :10
## normal, exposed, tick on downthrown side: 4
## overturned syncline, exposed : 2
## syncline, exposed :27
##
## UNITNAME
## Brockman Iron Formation :24
## Marra Mamba Iron Formation :22
## Mount McRae Shale and Mount Sylvia Formation: 1
## Weeli Wolli Formation : 3
## Wittenoom Formation : 8
##
##
## ROCKTYPE1
## sedimentary carbonate : 8
## sedimentary other chemical or biochemical:49
## sedimentary siliciclastic : 1
##
##
##
##
## FORMATION true_troglofauna
## Brockman Iron Formation :24 0: 0
## Marra Mamba Iron Formation :22 1:58
## Mount McRae Shale and Mount Sylvia Formation: 1
## Weeli Wolli Formation : 3
## Wittenoom Formation : 8
##
##
## LATITUDE LONGITUDE
## Min. :-23.26 Min. :118.7
## 1st Qu.:-22.98 1st Qu.:118.9
## Median :-22.94 Median :119.0
## Mean :-22.87 Mean :119.0
## 3rd Qu.:-22.70 3rd Qu.:119.1
## Max. :-22.53 Max. :119.4
##
gaaAu<-which(finalTest1$true_troglofauna==0);gaaAu
## [1] 3 8 9 13 14 15 16 18 19 22 23 34 35 41 52 53 56 57 59 64 69 73 78
geoAu<-finalTest1[gaaAu,]
summary(geoAu)
## srain2mp bio21 bio16 slope
## Min. :0.1356 Min. :29.69 Min. :214.2 Min. : 0.1489
## 1st Qu.:0.1423 1st Qu.:29.76 1st Qu.:223.8 1st Qu.: 1.8485
## Median :0.1463 Median :29.77 Median :224.5 Median : 3.4507
## Mean :0.1465 Mean :29.77 Mean :225.6 Mean : 4.2842
## 3rd Qu.:0.1481 3rd Qu.:29.80 3rd Qu.:229.8 3rd Qu.: 5.3867
## Max. :0.1633 Max. :29.82 Max. :238.9 Max. :12.8002
##
## rugg500s rugg500cv pil_twicv pil_topos
## Min. :0.09931 Min. :0.03795 Min. :0.04523 Min. : 0.000
## 1st Qu.:1.16273 1st Qu.:0.06686 1st Qu.:0.08127 1st Qu.: 0.000
## Median :1.80702 Median :0.09112 Median :0.11014 Median : 0.000
## Mean :2.38998 Mean :0.13402 Mean :0.13561 Mean : 2.361
## 3rd Qu.:3.74337 3rd Qu.:0.20341 3rd Qu.:0.18286 3rd Qu.: 4.808
## Max. :6.26119 Max. :0.29515 Max. :0.36527 Max. :10.364
##
## pil_slps pil_slpcv nutrientsn mrvbf
## Min. : 0.5099 Min. :0.2334 Min. :1.1 Min. :0.0000
## 1st Qu.: 1.1570 1st Qu.:0.3374 1st Qu.:1.3 1st Qu.:0.0000
## Median : 4.9075 Median :0.4747 Median :1.3 Median :0.0000
## Mean : 4.8395 Mean :0.4815 Mean :1.3 Mean :0.4131
## 3rd Qu.: 5.9913 3rd Qu.:0.5641 3rd Qu.:1.3 3rd Qu.:0.0000
## Max. :17.0097 Max. :0.9767 Max. :1.6 Max. :4.5058
##
## mrrtf minfertf lf7rup hstructn
## Min. :0.0000 Min. :2.000 Min. :1.000 Min. :0.0000
## 1st Qu.:0.0000 1st Qu.:2.000 1st Qu.:1.500 1st Qu.:1.0429
## Median :0.0000 Median :2.000 Median :3.000 Median :1.0429
## Mean :0.1835 Mean :2.043 Mean :3.043 Mean :0.8721
## 3rd Qu.:0.0000 3rd Qu.:2.000 3rd Qu.:4.000 3rd Qu.:1.0429
## Max. :1.9350 Max. :3.000 Max. :7.000 Max. :1.0429
##
## geolrngaggn geolmnaggn bdensity50n slopern
## Min. : 0.0115 Min. : 0.0058 Min. :1.255 Min. :0.00392
## 1st Qu.: 2.5880 1st Qu.: 1.2940 1st Qu.:1.367 1st Qu.:0.01738
## Median :300.0000 Median :2050.0000 Median :1.367 Median :0.03976
## Mean :361.3930 Mean :1319.8269 Mean :1.354 Mean :0.14841
## 3rd Qu.:900.0000 3rd Qu.:2350.0000 3rd Qu.:1.367 3rd Qu.:0.14600
## Max. :900.0000 Max. :2650.0000 Max. :1.367 Max. :1.26735
##
## HubDist MIN_AGE_MA month_collection
## Min. : 1.294 Min. :2453 January :7
## 1st Qu.: 3.746 1st Qu.:2454 March :3
## Median : 6.289 Median :2494 November :3
## Mean :22.821 Mean :2520 February :2
## 3rd Qu.:54.325 3rd Qu.:2597 April :2
## Max. :70.168 Max. :2597 September:2
## (Other) :4
## HubName
## anticline, exposed : 1
## concealed : 0
## exposed : 0
## normal, exposed, tick on downthrown side: 6
## overturned syncline, exposed : 4
## syncline, exposed :12
##
## UNITNAME
## Brockman Iron Formation :10
## Marra Mamba Iron Formation :10
## Mount McRae Shale and Mount Sylvia Formation: 1
## Weeli Wolli Formation : 1
## Wittenoom Formation : 1
##
##
## ROCKTYPE1
## sedimentary carbonate : 1
## sedimentary other chemical or biochemical:21
## sedimentary siliciclastic : 1
##
##
##
##
## FORMATION true_troglofauna
## Brockman Iron Formation :10 0:23
## Marra Mamba Iron Formation :10 1: 0
## Mount McRae Shale and Mount Sylvia Formation: 1
## Weeli Wolli Formation : 1
## Wittenoom Formation : 1
##
##
## LATITUDE LONGITUDE
## Min. :-23.07 Min. :118.7
## 1st Qu.:-22.94 1st Qu.:118.8
## Median :-22.93 Median :118.9
## Mean :-22.89 Mean :118.9
## 3rd Qu.:-22.91 3rd Qu.:119.0
## Max. :-22.55 Max. :119.2
##
plot(geo1$LONGITUDE,geo1$LATITUDE,xlim = c(Min_Lg,Max_Lg),ylim = c(Min_Lt,Max_Lt), cex=2, pch=20, col="blue", xlab='Longitude\n', ylab='Latitude', main='Modelo de ‘Youden’. Cordenadas da presença de troglofauna: Observadas e Preditas ',sub="\nObservação Presença = azul, Predição Presença = vermelha, Observação Ausência = Preta")
gbb<-which(Test_33New==1);gbb
## [1] 1 2 4 5 6 7 8 9 10 11 12 15 16 20 21 22 23 24 25 26 27 28 29 30 31
## [26] 32 34 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 52 53 54 55 57 60 61 63
## [51] 65 66 67 68 69 71 72 73 74 75 76 77 79 80 81
geo2<-finalTest1[gbb,]
summary(geo2)
## srain2mp bio21 bio16 slope
## Min. :0.1356 Min. :29.66 Min. :202.2 Min. : 0.08943
## 1st Qu.:0.1427 1st Qu.:29.74 1st Qu.:221.2 1st Qu.: 1.20730
## Median :0.1473 Median :29.76 Median :224.2 Median : 2.09344
## Mean :0.1469 Mean :29.75 Mean :225.9 Mean : 2.66784
## 3rd Qu.:0.1508 3rd Qu.:29.77 3rd Qu.:230.1 3rd Qu.: 3.49038
## Max. :0.1631 Max. :29.83 Max. :245.4 Max. :12.80595
##
## rugg500s rugg500cv pil_twicv pil_topos
## Min. :0.03319 Min. :0.03287 Min. :0.05175 Min. : 0.0000
## 1st Qu.:0.58854 1st Qu.:0.07169 1st Qu.:0.08987 1st Qu.: 0.0000
## Median :1.03983 Median :0.10369 Median :0.14482 Median : 0.1888
## Mean :1.55604 Mean :0.12589 Mean :0.15181 Mean : 1.5266
## 3rd Qu.:2.03901 3rd Qu.:0.17857 3rd Qu.:0.20128 3rd Qu.: 2.4066
## Max. :6.69625 Max. :0.30012 Max. :0.36527 Max. :10.3639
##
## pil_slps pil_slpcv nutrientsn mrvbf
## Min. : 0.1401 Min. :0.1951 Min. :1.100 Min. :0.0000
## 1st Qu.: 1.0099 1st Qu.:0.3656 1st Qu.:1.300 1st Qu.:0.0000
## Median : 2.2539 Median :0.4704 Median :1.300 Median :0.0000
## Mean : 3.0265 Mean :0.4607 Mean :1.297 Mean :0.4896
## 3rd Qu.: 4.7576 3rd Qu.:0.5202 3rd Qu.:1.300 3rd Qu.:0.6614
## Max. :10.1882 Max. :1.1404 Max. :1.600 Max. :4.5610
##
## mrrtf minfertf lf7rup hstructn
## Min. :0.0000 Min. :2.000 Min. :1.000 Min. : 0.0000
## 1st Qu.:0.0000 1st Qu.:2.000 1st Qu.:2.000 1st Qu.: 0.4286
## Median :0.0000 Median :2.000 Median :3.000 Median : 1.0429
## Mean :0.3102 Mean :2.031 Mean :3.308 Mean : 1.1182
## 3rd Qu.:0.5385 3rd Qu.:2.000 3rd Qu.:4.000 3rd Qu.: 1.0429
## Max. :2.8142 Max. :3.000 Max. :7.000 Max. :10.0000
##
## geolrngaggn geolmnaggn bdensity50n slopern
## Min. : 0.0115 Min. : 0.0058 Min. :1.255 Min. : 0.00626
## 1st Qu.: 65.5000 1st Qu.: 32.7500 1st Qu.:1.341 1st Qu.: 0.02296
## Median :300.0000 Median :2050.0000 Median :1.367 Median : 0.06010
## Mean :320.7933 Mean :1557.3198 Mean :1.349 Mean : 0.54566
## 3rd Qu.:300.0000 3rd Qu.:2650.0000 3rd Qu.:1.367 3rd Qu.: 0.20859
## Max. :900.0000 Max. :2650.0000 Max. :1.367 Max. :21.14644
##
## HubDist MIN_AGE_MA month_collection
## Min. : 2.124 Min. :2453 June : 9
## 1st Qu.: 7.975 1st Qu.:2454 September: 9
## Median :17.310 Median :2506 April : 6
## Mean :28.264 Mean :2525 July : 6
## 3rd Qu.:51.892 3rd Qu.:2597 August : 6
## Max. :77.437 Max. :2597 December : 6
## (Other) :23
## HubName
## anticline, exposed :16
## concealed : 0
## exposed :10
## normal, exposed, tick on downthrown side: 5
## overturned syncline, exposed : 2
## syncline, exposed :32
##
## UNITNAME
## Brockman Iron Formation :23
## Marra Mamba Iron Formation :29
## Mount McRae Shale and Mount Sylvia Formation: 1
## Weeli Wolli Formation : 4
## Wittenoom Formation : 8
##
##
## ROCKTYPE1
## sedimentary carbonate : 8
## sedimentary other chemical or biochemical:56
## sedimentary siliciclastic : 1
##
##
##
##
## FORMATION true_troglofauna
## Brockman Iron Formation :23 0:13
## Marra Mamba Iron Formation :29 1:52
## Mount McRae Shale and Mount Sylvia Formation: 1
## Weeli Wolli Formation : 4
## Wittenoom Formation : 8
##
##
## LATITUDE LONGITUDE
## Min. :-23.26 Min. :118.7
## 1st Qu.:-22.98 1st Qu.:118.9
## Median :-22.94 Median :119.0
## Mean :-22.88 Mean :119.0
## 3rd Qu.:-22.68 3rd Qu.:119.0
## Max. :-22.53 Max. :119.3
##
points(geo2$LONGITUDE,geo2$LATITUDE,xlim = c(Min_Lg,Max_Lg),ylim = c(Min_Lt,Max_Lt),pch=5, col="red")
points(geoAu$LONGITUDE,geoAu$LATITUDE,xlim = c(115,122),ylim = c(-26,-21),pch = 4, col = rgb(0, 0, 0, 0.15))

combined_geo <- data.frame(
LONGITUDE = c(geo2$LONGITUDE, geoAu$LONGITUDE),
LATITUDE = c(geo2$LATITUDE, geoAu$LATITUDE)
)
write.csv(geo2, file = "Area2_predicted_presence_points_troglofauna_coordinates.csv", row.names = FALSE)