Show code
::opts_chunk$set(echo = TRUE, cache = FALSE, message = FALSE, warning = FALSE, error = TRUE, comment = "")
knitroptions(tidyverse.quiet = TRUE)
options(dplyr.summarise.inform = FALSE)
options(warn=-1)
Annotated source code needed to reproduce the analysis and figures from Freer and Tarling (2023) Front. Mar. Sci. doi: 10.3389/fmars.2023.908112
::opts_chunk$set(echo = TRUE, cache = FALSE, message = FALSE, warning = FALSE, error = TRUE, comment = "")
knitroptions(tidyverse.quiet = TRUE)
options(dplyr.summarise.inform = FALSE)
options(warn=-1)
Load packages to be used throughout
library(here)
library(plyr)
library(dplyr)
library(stringr)
library(tidyr)
library(tidyverse)
library(ggplot2)
library(ggspatial)
library(egg)
library(sf)
library(tidync)
library(forcats)
library(patchwork)
library(data.table)
library(rnaturalearthhires)
library(rnaturalearth)
library(raster)
library(sp)
library(viridis)
library(spatialEco)
library(car)
Load any custom functions that will be used
<- function(x, y, sp = TRUE, duplicate = TRUE, ...) {
point.in.poly if(!any(class(x)[1] == c("SpatialPoints", "SpatialPointsDataFrame", "sf"))) {
stop("x is not a suitable point feature object class") }
if(!any(class(y)[1] == c("SpatialPolygons", "SpatialPolygonsDataFrame", "sf"))) {
stop("y is not a suitable polygon feature object class") }
if(any(class(x) == "sfc")) { x <- sf::st_sf(x) }
if(duplicate == FALSE) {
if(!any(class(x)[1] == c("SpatialPoints", "SpatialPointsDataFrame"))) {
<- methods::as(x, "Spatial")
x if(dim(x@data)[2] == 0) stop("There are no attributes associated with points")
} if(!any(class(y)[1] == c("SpatialPolygons", "SpatialPolygonsDataFrame"))) {
<- methods::as(y, "Spatial")
y if(dim(y@data)[2] == 0) stop("There are no attributes associated with polygons")
}<- sp::over(x, y, returnList = TRUE)
o <- max(unlist(lapply(o, nrow)))
m <- row.names(y)
ids <- data.frame(t(sapply(1:length(o),
xy function(i) c(ids[i], c(o[[i]][,1], rep(NA, m))[1:m])
)))colnames(xy) <- c("p",paste0("pid", 1:m))
@data <- data.frame(x@data, xy)
xif( sp == FALSE ) sf::st_as_sf(x)
return( x )
else {
} if(any( class(x) == c("SpatialPoints", "SpatialPointsDataFrame"))) {
<- sf::st_as_sf(x)
x
}if(any(class(y) == "sfc")) { x <- sf::st_sf(y) }
if(any( class(y) == c("SpatialPolygons", "SpatialPolygonsDataFrame"))) {
<- sf::st_as_sf(y)
y
} if(dim(x)[2] == 1) x$pt.ids <- 1:nrow(x)
if(dim(y)[2] == 1) y$poly.ids <- 1:nrow(y)
<- sf::st_join(x, y, ...)
o if( sp ) o <- methods::as(o, "Spatial")
# if( sp ) o <- sf::as_Spatial(o)
return( o )
} }
The folders containing the raw occurrence data from OBIS were downloaded (using links below) and stored locally within the folder “input_data”.
The occurrence files for the three species are merged into one dataframe and various cleaning and quality control steps are applied.
::i_am("code_frontiers_2023.qmd")
here
# read in raw obis files
<-read.csv("input_data/e114c56d-7073-4957-9af3-1b92f2e3a85b/Occurrence.csv", sep = ',', header = T)
cfin<-read.csv("input_data/ebdfc962-d62d-48eb-9095-bd49cc177b6a/Occurrence.csv", sep = ',', header = T)
chyp<-read.csv("input_data/e55e5e2d-3817-4429-a54f-1d3356c1c396/Occurrence.csv", sep = ',', header = T)
cgla
#merge files of each species
<-rbind.fill(cfin,cgla,chyp)
dat_merge
#remove large redundant files
rm(cfin,cgla,chyp)
#new lat and lon cols with 2 dp
$decimallongitude2<-round(dat_merge$decimallongitude, digits = 2)
dat_merge$decimallatitude2<-round(dat_merge$decimallatitude, digits = 2)
dat_merge
#remove records without species name
<- dat_merge[-which(dat_merge$scientificname == ""), ]
dat_merge
#format date cells, return as numeric
$eventdate<-as.Date(dat_merge$eventdate)
dat_merge$month<- strftime(dat_merge$eventdate, "%m")
dat_merge$day<- strftime(dat_merge$eventdate, "%d")
dat_merge$year<- strftime(dat_merge$eventdate, "%Y")
dat_merge
$month<- as.numeric(dat_merge$month)
dat_merge$day<- as.numeric(dat_merge$day)
dat_merge$year<- as.numeric(dat_merge$year)
dat_merge
#remove records in southern hemisphere
<-filter(dat_merge, decimallatitude2 >0)
dat_merge
#read in shapefiles for land and pacific sector
<- ne_countries(scale = "large", returnclass = "sf") # country polygons
world <-read_sf(dsn = here::here("input_data"), layer = "PACPOLY1") #pacific polygon 1
pacpoly1<-read_sf(dsn = here::here("input_data"), layer = "PACPOLY2") #pacific polygon 2
pacpoly2
#make obis dataset an sf object
<-dat_merge %>%
dat_mergest_as_sf(coords = c("decimallongitude", "decimallatitude"), remove=F) %>%
st_cast("MULTIPOINT") %>%
st_set_crs(4326)
#remove records outside each shapefile
<- sapply(st_intersects(dat_merge, world),function(x){length(x)==0})
outside <- dat_merge[outside,]
dat_merge rm(world,outside)
<- sapply(st_intersects(dat_merge, pacpoly1),function(x){length(x)==0})
outsidepacpoly1 <- dat_merge[outsidepacpoly1,]
dat_merge rm(pacpoly1,outsidepacpoly1)
<- sapply(st_intersects(dat_merge, pacpoly2),function(x){length(x)==0})
outsidepacpoly2 <- dat_merge[outsidepacpoly2,]
dat_merge rm(pacpoly2,outsidepacpoly2)
Metadata fields of interest are cleaned and formatted to condense inputs into meaningful and usable categories
#list all levels within lifestage column
$lifestage<-as.character(dat_merge$lifestage)
dat_mergelevels(as.factor(dat_merge$lifestage))
#re-name lifestage levels
#if data are from CPR, specify CV-CVI lifestage
$lifestage[dat_merge$collectioncode == "CPR"] <- "CV-CVI"
dat_merge
# use information from "sex" column to specify missing lifestage information
levels(as.factor(dat_merge$sex))
$lifestage[dat_merge$sex == "F"] <- "CVI_F"
dat_merge$lifestage[dat_merge$sex == "Female"] <- "CVI_F"
dat_merge$lifestage[dat_merge$sex == "female"] <- "CVI_F"
dat_merge$lifestage[dat_merge$sex == "female;;"] <- "CVI_F"
dat_merge$lifestage[dat_merge$sex == "females"] <- "CVI_F"
dat_merge$lifestage[dat_merge$sex == "females only"] <- "CVI_F"
dat_merge$lifestage[dat_merge$sex == "females present"] <- "CVI_F"
dat_merge$lifestage[dat_merge$sex == "M"] <- "CVI_M"
dat_merge$lifestage[dat_merge$sex == "male"] <- "CVI_M"
dat_merge$lifestage[dat_merge$sex == "Male"] <- "CVI_M"
dat_merge$lifestage[dat_merge$sex == "MALE"] <- "CVI_M"
dat_merge$lifestage[dat_merge$sex == "male;;"] <- "CVI_M"
dat_merge$lifestage[dat_merge$sex == "female; male"] <- "CVI"
dat_merge$lifestage[dat_merge$sex == "male; female"] <- "CVI"
dat_merge$lifestage[dat_merge$sex == "males and females"] <- "CVI"
dat_merge$lifestage[dat_merge$sex == "M,F"] <- "CVI"
dat_merge$lifestage[dat_merge$sex == "U"] <- "CVI"
dat_merge
# group all entries that are unknown
$lifestage[dat_merge$lifestage == "Not specified"] <- "Unknown"
dat_merge$lifestage[dat_merge$lifestage == "unassigned"] <- "Unknown"
dat_merge$lifestage[dat_merge$lifestage == "Unstaged"] <- "Unknown"
dat_merge$lifestage[dat_merge$lifestage == ""] <- "Unknown"
dat_merge$lifestage[dat_merge$lifestage == "^$"] <- "Unknown"
dat_merge$lifestage <- dat_merge$lifestage %>% replace_na('Unknown')
dat_merge
$lifestage <- as.factor(dat_merge$lifestage)
dat_merge#group all entries in to a condensed list of options
levels(dat_merge$lifestage) <- list(CI = c("copepoditeI","copepodite stage I","Copepodite I","copepodite I","C1","C1: COPEPODITE I","CI","C1 (Copepodite I)"),
CII = c("copepoditeII","copepodite stage II","Copepodite II", "copepodite II","C2","C2: COPEPODITE II","CII","C2 (Copepodite II)"),
CIII = c("copepoditeIII", "copepodite stage III","Copepodite III","copepodite III", "C3","C3: COPEPODITE III","CIII","C3 (Copepodite III)"),
CIV = c("copepoditeIV","copepodite stage IV","Copepodite IV","C4","C4: COPEPODITE IV","CIV","C4 (Copepodite IV)"),
CV = c("CV","copepoditeV ","copepodite V ","Copepodite V","copepodite V","C5","C5: COPEPODITE V","C5 (Copepodite V)"),
CVI = c("CVI","copepoditeVI","copepodite VI SPENT","copepodite VI GRAVID","copepodite VI DEVELOPING","copepodite VI", "adult","Adult", "ADULT","adult - no sex (copepodite VI)"),
CVI_F = c("F","CVI female","ADULT F BEARING SPERMATOPHORES","adult female (copepodite VI)","AF","adult female"),
CVI_M = c("CVI male","adult male (copepodite VI)","AM","adult male"),
CV_CVI = c("CV +","copepodite V-VI","C5-6: COPEPODITE 5 - 6 stages (5-6) were counted in one group", "CV-CVI"),
CI_CV = c("Copepodite IV-V","copepodite IV-V","copepodite I-V","C3-4: COPEPODITE 3 - 4 stages (3-4) were counted in one group","copepodite I-IV","Copepodite I-III", "CIV +", "C1-C3 (Copepodite I - Copepodite III)","C4-C5 (Copepodite IV - Copepodite V)"),
C_unass. = c("COPEPODITE(S) sum of various (unspecified) copepodite stages","copepodite 400-800 um - cyclopoids and harpacticoi","copepodite - no stage assigned","copepodite"),
NI_NVI = c("naupliusVI","naupliusV","naupliusIV","naupliusIII","naupliusII","naupliusI","NAUPLIUS/NAUPLII","Nauplius","nauplius","nauplii V","nauplii 5","nauplii","N6: NAUPLIUS VI could be metanauplius", "N5: NAUPLIUS V","N5-6: NAUPLII 5 - 6","N4: NAUPLIUS IV","N4-5: NAUPLII 4 - 5","N4","N3: NAUPLIUS III","N3-4: NAUPLII 3 - 4","N3","N2: NAUPLIUS II","N2-3: NAUPLII 2 - 3","N2","N1: NAUPLIUS I","N1","NP (Nauplius)"),
Other = c("POSTLARVAE/SUB-ADULT (Codes 7+8)","stage IV","VI","Vi","V,VI","V","stage IV","MOLT STAGE 6 (decapods)","MOLT STAGE 5 (decapods)", "MOLT STAGE 3 (decapods)" ,"MOLT STAGE 2 (decapods)","MOLT STAGE 1 (decapods)","macroplankton","leptocephalus","Larvae","larvae","JUVENILE","juvenile","; juvenile","juv","IV","III","II", "I-IV","exuviae","exoskeletons","EPHYRA life stage genus Aurelia", "1","2","3","4","5","6","7"),
Ova = c("ova,","egg? 0.29-0.31mm","EGG/OVA"),
Unknown = "Unknown")
$lifestage <- dat_merge$lifestage %>% replace_na('Unknown')
dat_merge
#check new categories
levels(as.factor(dat_merge$lifestage))
summary(dat_merge$lifestage)
#repeat for sampling protocol levels
$samplingprotocol<- as.character(dat_merge$samplingprotocol)
dat_mergelevels(as.factor(dat_merge$samplingprotocol))
$samplingprotocol[dat_merge$collectioncode == "CPR"] <- "CPR"
dat_merge$samplingprotocol[dat_merge$collectioncode == "cpr_"] <- "CPR"
dat_merge$samplingprotocol[dat_merge$collectioncode == "MOC10GB"] <- "MOCNESS"
dat_merge
$samplingprotocol[dat_merge$samplingprotocol == "Horizontal tows with the No 5 net"] <- "nonspecific"
dat_merge$samplingprotocol[dat_merge$samplingprotocol == "Horizontal tow made with a No 5 net"] <- "nonspecific"
dat_merge$samplingprotocol[dat_merge$samplingprotocol == "T-160 cm"] <- "nonspecific"
dat_merge$samplingprotocol[dat_merge$samplingprotocol == "T-80 cm"] <- "nonspecific"
dat_merge$samplingprotocol[dat_merge$samplingprotocol == "No 5 net"] <- "nonspecific"
dat_merge$samplingprotocol[dat_merge$samplingprotocol == "Plankton vertical tows using No.0 mesh net with mouth diameter of one metre and a length of seventeen feet."] <- "nonspecific"
dat_merge$samplingprotocol[dat_merge$samplingprotocol == "No 0 metre net"] <- "nonspecific"
dat_merge$samplingprotocol[dat_merge$samplingprotocol == "GearType=24-mesh to the inch net"] <- "nonspecific"
dat_merge$samplingprotocol[dat_merge$samplingprotocol == "JNCCMNCR10000129_METH_001"] <- "nonspecific"
dat_merge$samplingprotocol[dat_merge$samplingprotocol == "MRCCW30000000037_METH_002"] <- "nonspecific"
dat_merge$samplingprotocol[dat_merge$samplingprotocol == "MRCED00500000002_METH_001"] <- "nonspecific"
dat_merge$samplingprotocol[dat_merge$samplingprotocol == "MRCED00500000005_METH_001"] <- "nonspecific"
dat_merge$samplingprotocol[dat_merge$samplingprotocol == "MRCED00500000005_METH_004"] <- "nonspecific"
dat_merge$samplingprotocol[dat_merge$samplingprotocol == "MRCED00500000005_METH_005"] <- "nonspecific"
dat_merge$samplingprotocol[dat_merge$samplingprotocol == "Mesh net cast; Mitchell et al. 2002"] <- "nonspecific"
dat_merge$samplingprotocol[dat_merge$samplingprotocol == "JNCCMNCR10000129_METH_001"] <- "nonspecific"
dat_merge$samplingprotocol[dat_merge$samplingprotocol == "https://www.bodc.ac.uk/resources/inventories/cruise_inventory/report/11432/"] <- "nonspecific"
dat_merge$samplingprotocol <- dat_merge$samplingprotocol %>% replace_na('Unknown')
dat_merge$samplingprotocol[dat_merge$samplingprotocol == ""] <- "Unknown"
dat_merge$samplingprotocol[dat_merge$samplingprotocol == "^$"] <- "Unknown"
dat_merge
$samplingprotocol[grepl("Nansen", dat_merge$occurrenceremarks, ignore.case = T)] <- "Nansen"
dat_merge$samplingprotocol[grepl("Multi", dat_merge$occurrenceremarks, ignore.case = T)] <- "Multinet"
dat_merge$samplingprotocol[grepl("Pump", dat_merge$occurrenceremarks, ignore.case = T)] <- "Water pump"
dat_merge$samplingprotocol[grepl("Juday", dat_merge$occurrenceremarks, ignore.case = T)] <- "Juday"
dat_merge$samplingprotocol[grepl("WP", dat_merge$occurrenceremarks, ignore.case = T)] <- "WPII"
dat_merge$samplingprotocol[grepl("egg", dat_merge$occurrenceremarks, ignore.case = T)] <- "Egg net"
dat_merge$samplingprotocol[grepl("N70V", dat_merge$occurrenceremarks, ignore.case = T)] <- "VCN"
dat_merge$samplingprotocol[grepl("BONGO", dat_merge$catalognumber, ignore.case = T)] <- "BONGO"
dat_merge
$samplingprotocol <- as.factor(dat_merge$samplingprotocol)
dat_merge
levels(dat_merge$samplingprotocol) <- list(BIONESS = c("BIONESS 1.0m","BIONESS 0.5m"),
Bongo = c("Bongo net hauled vertically","BONGO_505UM_MICROSCOPY", "BONGO"),
CPR = "CPR",
Juday = c("Juday-36 cm" ,"Juday-80 cm","Russeh?v Juday-h?v 37", "Juday"),
MOCNESS = "MOCNESS",
Ring = c("Ring net hauled vertically","Ring net 0.75m xbow","Ring net 0.75m","Ring net 0.75 o/c","Ring net 0.5m", "TWINRING_150UM_MICROSCOPY"),
WPII = c("WPII","WP II-56 cm","SamplerTypeCode=WP2; MethodReference=HC-C-C7; MeshSize(um)=90","SamplerTypeCode=WP-2; MethodReference=HC-C-C7; MeshSize(um)=90","SamplerTypeCode=WP-2; MeshSize(um)=90"),
Grab = c("Smith-McIntyre grab sampler (surface area sampled=0.1 m2; mesh=1.0mm2)","Sampling instrument: TOOL0653: Van Veen Grab; Seive mesh size: 1.000mm","Sampling instrument: 50: Sediment Grab; Seive mesh size: 1.000mm"),
Sledge = "RP-sledge,Subsample method: Decanted - Mesh size (mm): 0.5",
OPC = c("OPC side nets","OPC experimental"),
Campelen = "Campelen",
Water_pump = "Water pump",
Egg_net = "Egg net",
Nansen = "Nansen",
VCN = "VCN",
Multinet= "Multinet",
Nonspec. = "nonspecific",
Unknown = "Unknown")
$samplingprotocol <- dat_merge$samplingprotocol %>% replace_na('Unknown')
dat_merge
#check new categories
levels(as.factor(dat_merge$samplingprotocol))
summary(dat_merge$samplingprotocol)
Duplicates are removed, the cleaned dataset is trimmed to columns of interest and then saved
#remove duplicates based on certain fields
<-distinct(dat_merge, scientificname, year, month, day, lifestage, decimallongitude2, decimallatitude2,minimumdepthinmeters,maximumdepthinmeters,.keep_all=T)
dat_nodups
#get n of duplicates
nrow(dat_merge) - nrow(dat_nodups)
#remove unnecessary columns
<-dat_nodups[c(1,2,3,4,9,11,12,80,81,82,89,91,95,96,97,99,100,120,121,122,123,124,128,129,130,226,227)]
dat_nodups
#save data to subfolder "output_data"
fwrite(dat_nodups,here::here("output_data", "calanus_clean_nodups_obis.csv"))
rm(dat_merge)
gc()
We can now plot histogram and barplots of the most interesting metadata fields - year of collection, sampling protocol, and life-stage
#shorten name for ease
<-dat_nodups
dat
#plot of year metadata
<-ggplot(dat, aes(x=year,fill=scientificname)) +
yearplotgeom_histogram(color="grey60", binwidth = 5)+
geom_vline(aes(xintercept=1930),color="grey", linetype="dashed", size=1)+
geom_vline(aes(xintercept=1955),color="grey", linetype="dashed", size=1)+
geom_vline(aes(xintercept=2005),color="grey", linetype="dashed", size=1)+
geom_vline(aes(xintercept=1995),color="grey", linetype="dashed", size=1)+
geom_text(aes(x=1930,y=7000, label="\nCPR began 1931"), colour="black", angle=90) +
geom_text(aes(x=1955,y=20000, label="\n IPY 1957"), colour="black", angle=90) +
geom_text(aes(x=2005,y=26000, label="\n IPY 2007"), colour="black", angle=90) +
geom_text(aes(x=1997,y=28000, label="\n GLOBEC 1996"), colour="black", angle=90) +
labs(y = "Number of records", x= "Year") +
scale_x_continuous(breaks=seq(1880, 2020, 20))+
scale_fill_viridis(discrete = T, option = "rocket", direction = 1,limits = c("Calanus finmarchicus", "Calanus glacialis", "Calanus hyperboreus"), labels=c("C. finmarchicus","C. glacialis","C.hyperboreus"))+
theme(
panel.background = element_blank(),
panel.border = element_rect(colour = "grey", fill = NA),
axis.line = element_line(colour = "grey"),
axis.title.x = element_text(size = 14),
axis.title.y = element_text(size = 14),
axis.text.x = element_text(size = 12),
axis.text.y = element_text(size = 12),
legend.title = element_blank(),
legend.text = element_text(size = 12, face = "italic"),
plot.title = element_text(size = 14, face = "bold"),
legend.position="right")
#plot of sampling protocol metadata
<-ggplot(data=dat %>% group_by(samplingprotocol) %>% filter(n() >50), aes(x=forcats::fct_infreq(samplingprotocol),fill=scientificname)) +
netplotgeom_bar(color="grey60", stat="count")+
labs(y = "Number of records", x= "Sampling protocol") +
#scale_x_continuous(breaks=seq(1880, 2020, 20))+
scale_fill_viridis(discrete = T, option = "rocket", direction = 1,limits = c("Calanus finmarchicus", "Calanus glacialis", "Calanus hyperboreus"), labels=c("C. finmarchicus","C. glacialis","C.hyperboreus"))+
theme(
panel.background = element_blank(),
panel.border = element_rect(colour = "grey", fill = NA),
axis.line = element_line(colour = "grey"),
axis.title.x = element_text(size = 14),
axis.title.y = element_text(size = 14),
axis.text.x = element_text(size = 12),
axis.text.y = element_text(size = 12),
legend.title = element_blank(),
legend.text = element_text(size = 12, face = "italic"),
plot.title = element_text(size = 14, face = "bold"),
legend.position="right")
#plot of lifestage metadata
<-ggplot(data=dat %>% group_by(lifestage) %>% filter(n() >1000), aes(x=forcats::fct_infreq(lifestage),fill=scientificname)) +
stageplotgeom_bar(color="grey60", stat="count")+
labs(y = "Number of records", x= "Lifestage") +
#scale_x_continuous(breaks=seq(1880, 2020, 20))+
scale_fill_viridis(discrete = T, option = "rocket", direction = 1,limits = c("Calanus finmarchicus", "Calanus glacialis", "Calanus hyperboreus"), labels=c("C. finmarchicus","C. glacialis","C.hyperboreus"))+
theme(
panel.background = element_blank(),
panel.border = element_rect(colour = "grey", fill = NA),
axis.line = element_line(colour = "grey"),
axis.title.x = element_text(size = 14),
axis.title.y = element_text(size = 14),
axis.text.x = element_text(size = 12),
axis.text.y = element_text(size = 12),
legend.title = element_blank(),
legend.text = element_text(size = 12, face = "italic"),
plot.title = element_text(size = 14, face = "bold"),
legend.position=c(.85,.85))
#combine plots to multiple panels
<- yearplot + netplot / stageplot +
plot_combineplot_layout(guides = 'collect') + plot_annotation(tag_levels = 'a') &
theme(plot.tag = element_text(face = 'bold'))
#save figure to a subfolder "output_figs"
ggsave(file = here::here("output_figs", "figs1.png"), plot_combine, scale=1.5, device= "png", dpi = 500, width=12, height=5)
dev.set(dev.next())
rm(plot_combine,netplot,yearplot,stageplot,dat_nodups,dat)
gc()
::include_graphics(here::here("output_figs", "figs1.png")) knitr
The folder containing the raw occurrence data from GBIF were downloaded (using link below) and stored locally in the folder “input_data”
#read in gbif file from download
<-read.delim("input_data/0246981-220831081235567/occurrence.txt", header=T)
gbif
$decimallongitude2<-round(gbif$decimalLongitude, digits = 2)
gbif$decimallatitude2<-round(gbif$decimalLatitude, digits = 2)
gbif$eventDate<-as.Date(gbif$eventDate)
gbif$month<- strftime(gbif$eventDate, "%m")
gbif$day<- strftime(gbif$eventDate, "%d")
gbif$year<- strftime(gbif$eventDate, "%Y")
gbif
$month<- as.numeric(gbif$month)
gbif$day<- as.numeric(gbif$day)
gbif$year<- as.numeric(gbif$year)
gbif
#remove those in southern hemisphere
<-filter(gbif, decimallatitude2 >0)
dat_merge
#read in shapefiles for land and pacific sector
<- ne_countries(scale = "large", returnclass = "sf") # country polygons
world <-read_sf(dsn = here::here("input_data"), layer = "PACPOLY1") #pacific polygon 1
pacpoly1<-read_sf(dsn = here::here("input_data"), layer = "PACPOLY2") #pacific polygon 2
pacpoly2
#make gbif dataset an sf object
<-dat_merge %>%
dat_mergest_as_sf(coords = c("decimalLongitude", "decimalLatitude"), remove=F) %>%
st_cast("MULTIPOINT") %>%
st_set_crs(4326)
#remove records outside each shapefile
<- sapply(st_intersects(dat_merge, world),function(x){length(x)==0})
outside <- dat_merge[outside,]
dat_merge rm(world,outside)
<- sapply(st_intersects(dat_merge, pacpoly1),function(x){length(x)==0})
outsidepacpoly1 <- dat_merge[outsidepacpoly1,]
dat_merge rm(pacpoly1,outsidepacpoly1)
<- sapply(st_intersects(dat_merge, pacpoly2),function(x){length(x)==0})
outsidepacpoly2 <- dat_merge[outsidepacpoly2,]
dat_merge rm(pacpoly2,outsidepacpoly2)
Metadata fields of interest are cleaned and formatted to condense inputs into meaningful and usable categories
#re-name lifestage levels
$lifeStage <- as.character(dat_merge$lifeStage)
dat_mergelevels(as.factor(dat_merge$lifeStage))
$lifeStage[dat_merge$collectionCode == "CPR"] <- "CV-CVI"
dat_merge$lifeStage[dat_merge$datasetName == "CPR"] <- "CV-CVI"
dat_merge
levels(as.factor(dat_merge$sex))
$lifeStage[dat_merge$sex == "FEMALE"] <- "CVI_F"
dat_merge$lifeStage[dat_merge$sex == "MALE"] <- "CVI_M"
dat_merge
$lifeStage[grepl("lifehistory=CI", dat_merge$occurrenceRemarks, ignore.case = T)] <- "CI"
dat_merge$lifeStage[grepl("lifehistory=CII", dat_merge$occurrenceRemarks, ignore.case = T)] <- "CII"
dat_merge$lifeStage[grepl("lifehistory=CIII", dat_merge$occurrenceRemarks, ignore.case = T)] <- "CIII"
dat_merge$lifeStage[grepl("lifehistory=CIV", dat_merge$occurrenceRemarks, ignore.case = T)] <- "CIV"
dat_merge$lifeStage[grepl("lifehistory=CV", dat_merge$occurrenceRemarks, ignore.case = T)] <- "CV"
dat_merge$lifeStage[grepl("lifehistory=CVI", dat_merge$occurrenceRemarks, ignore.case = T)] <- "CVI"
dat_merge$lifeStage[grepl("lifehistory=M", dat_merge$occurrenceRemarks, ignore.case = T)] <- "CVI_M"
dat_merge$lifeStage[grepl("lifehistory=AM", dat_merge$occurrenceRemarks, ignore.case = T)] <- "CVI_M"
dat_merge$lifeStage[grepl("lifehistory=male", dat_merge$occurrenceRemarks, ignore.case = T)] <- "CVI_M"
dat_merge$lifeStage[grepl("lifehistory=F", dat_merge$occurrenceRemarks, ignore.case = T)] <- "CVI_F"
dat_merge$lifeStage[grepl("lifehistory=AF", dat_merge$occurrenceRemarks, ignore.case = T)] <- "CVI_F"
dat_merge$lifeStage[grepl("lifehistory=female", dat_merge$occurrenceRemarks, ignore.case = T)] <- "CVI_F"
dat_merge
$lifeStage[dat_merge$lifeStage == "^$"] <- "Unknown"
dat_merge$lifeStage[dat_merge$lifeStage==""] <- "Unknown"
dat_merge$lifeStage <- dat_merge$lifeStage %>% replace_na('Unknown')
dat_merge
$lifeStage <- as.factor(dat_merge$lifeStage)
dat_merge
levels(dat_merge$lifeStage) <- list(CI = "CI",
CII = "CII",
CIII = "CIII",
CIV = "CIV",
CV = "CV",
CVI = c("CVI","Adult"),
CVI_F ="CVI_F",
CVI_M = "CVI_M",
CV_CVI = "CV-CVI",
NI_NVI = c("Metanaupilus","Nauplius"),
Other = c("Juvenile","Larva"),
Unknown = c("Unknown"))
$lifeStage <- dat_merge$lifeStage %>% replace_na('Unknown')
dat_merge
levels(as.factor(dat_merge$lifeStage))
summary(dat_merge$lifeStage)
#re-name protocol levels
$samplingProtocol<- as.factor(dat_merge$samplingProtocol)
dat_mergelevels(dat_merge$samplingProtocol)
$samplingProtocol<- as.character(dat_merge$samplingProtocol)
dat_merge
$samplingProtocol[dat_merge$collectionCode == "CPR"] <- "CPR"
dat_merge$samplingProtocol[dat_merge$datasetName == "CPR"] <- "CPR"
dat_merge
$samplingProtocol[dat_merge$samplingProtocol == "Plankton tow net"] <- "nonspecific"
dat_merge$samplingProtocol[dat_merge$samplingProtocol == "GearType=24-mesh to the inch net"] <- "nonspecific"
dat_merge$samplingProtocol[dat_merge$samplingProtocol == "MRPMN00100000033_METH_008"] <- "nonspecific"
dat_merge$samplingProtocol[dat_merge$samplingProtocol == "Mesh net cast; Mitchell et al. 2002"] <- "nonspecific"
dat_merge$samplingProtocol[dat_merge$samplingProtocol == "https://www.bodc.ac.uk/resources/inventories/cruise_inventory/report/11432/"] <- "nonspecific"
dat_merge$samplingProtocol <- dat_merge$samplingProtocol %>% replace_na('Unknown')
dat_merge$samplingProtocol[dat_merge$samplingProtocol == ""] <- "Unknown"
dat_merge$samplingProtocol[dat_merge$samplingProtocol == "^$"] <- "Unknown"
dat_merge
$samplingProtocol[grepl("Nansen", dat_merge$occurrenceRemarks, ignore.case = T)] <- "Nansen"
dat_merge$samplingProtocol[grepl("Multi", dat_merge$occurrenceRemarks, ignore.case = T)] <- "Multinet"
dat_merge$samplingProtocol[grepl("Pump", dat_merge$occurrenceRemarks, ignore.case = T)] <- "Water pump"
dat_merge$samplingProtocol[grepl("Juday", dat_merge$occurrenceRemarks, ignore.case = T)] <- "Juday"
dat_merge$samplingProtocol[grepl("WP", dat_merge$occurrenceRemarks, ignore.case = T)] <- "WPII"
dat_merge$samplingProtocol[grepl("egg", dat_merge$occurrenceRemarks, ignore.case = T)] <- "Egg net"
dat_merge$samplingProtocol[grepl("N70V", dat_merge$occurrenceRemarks, ignore.case = T)] <- "VCN"
dat_merge
$samplingProtocol <- as.factor(dat_merge$samplingProtocol)
dat_merge
levels(dat_merge$samplingProtocol) <- list(Bongo = c("BONGO_505UM_MICROSCOPY", "BONGO"),
CPR = "CPR",
Juday = "Juday",
MOCNESS = "MOCNESS",
Ring = c("Ring net 0.75m","Ring net 0.5m", "TWINRING_150UM_MICROSCOPY"),
WPII = c("WPII","WP II-56 cm","WP-2 64-µm mesh net"),
Grab = c("Grab", "modified Smith-McIntyre-grab, 3-5 samples, sampling area 0.1 m"),
Sledge = c("RP-sledge,Subsample method: Decanted - Mesh size (mm): 0.5","RP-sledge"),
Campelen = "Campelen",
Dredge = "3nglr dredge",
Sediment_trap = "500ml sediment trap bottle",
Water_pump = "Water pump",
ROV = "ROV",
Egg_net = "Egg net",
Nansen = "Nansen",
VCN = c("VCN","Vertical net"),
Multinet= c("Multinet","Multinet 64-µm mesh net"),
Nonspec. = "nonspecific",
Unknown = "Unknown")
$samplingProtocol <- dat_merge$samplingProtocol %>% replace_na('Unknown')
dat_merge
levels(as.factor(dat_merge$samplingProtocol))
summary(dat_merge$samplingProtocol)
Duplicates are removed, the cleaned dataset is trimmed to columns of interest and then saved
#remove duplicates based on certain fields
<-distinct(dat_merge, species, year, month, day, lifeStage, decimallongitude2, decimallatitude2,depth,.keep_all=T)
dat_nodups
#get n of duplicates
nrow(dat_merge) - nrow(dat_nodups)
#subset to columns of interest
<-dat_nodups[c(1,60,61,62,68,73,74,75,77,78,92,103,107,108,109,112,113,114,115,138,139,217,222,223,241,260,261)]
dat_nodups
#save data to a subfolder "output_data"
fwrite(dat_nodups, here::here("output_data","calanus_clean_nodups_gbif.csv"))
rm(dat_merge,gbif)
gc()
We can now plot histogram and barplots of the most interesting metadata fields - year of collection, sampling protocol, and life-stage
#shorten name for ease
<-dat_nodups
dat
#plot of year metadata column
<-ggplot(dat, aes(x=year,fill=species)) +
yearplotgeom_histogram(color="grey60", binwidth = 5)+
geom_vline(aes(xintercept=1930),color="grey", linetype="dashed", size=1)+
geom_vline(aes(xintercept=1955),color="grey", linetype="dashed", size=1)+
geom_vline(aes(xintercept=2005),color="grey", linetype="dashed", size=1)+
geom_vline(aes(xintercept=1995),color="grey", linetype="dashed", size=1)+
geom_text(aes(x=1930,y=3000, label="\nCPR began 1931"), colour="black", angle=90) +
geom_text(aes(x=1955,y=10000, label="\n IPY 1957"), colour="black", angle=90) +
geom_text(aes(x=2005,y=14000, label="\n IPY 2007"), colour="black", angle=90) +
geom_text(aes(x=1996,y=12000, label="\n GLOBEC 1996"), colour="black", angle=90) +
labs(y = "Number of records", x= "Year") +
scale_x_continuous(breaks=seq(1880, 2020, 20))+
scale_fill_viridis(discrete = T, option = "rocket", direction = 1,limits = c("Calanus finmarchicus", "Calanus glacialis", "Calanus hyperboreus"), labels=c("C. finmarchicus","C. glacialis","C. hyperboreus"))+
theme(
panel.background = element_blank(),
panel.border = element_rect(colour = "grey", fill = NA),
axis.line = element_line(colour = "grey"),
axis.title.x = element_text(size = 14),
axis.title.y = element_text(size = 14),
axis.text.x = element_text(size = 12),
axis.text.y = element_text(size = 12),
legend.title = element_blank(),
legend.text = element_text(size = 12, face = "italic"),
plot.title = element_text(size = 14, face = "bold"),
legend.position="right")
#plot of sampling protocol metadata column
<-ggplot(data=dat %>% group_by(samplingProtocol) %>% filter(n() >200), aes(x=forcats::fct_infreq(samplingProtocol),fill=species)) +
netplotgeom_bar(color="grey60", stat="count")+
labs(y = "Number of records", x= "Sampling protocol") +
#scale_x_continuous(breaks=seq(1880, 2020, 20))+
scale_fill_viridis(discrete = T, option = "rocket", direction = 1,limits = c("Calanus finmarchicus", "Calanus glacialis", "Calanus hyperboreus"), labels=c("C. finmarchicus","C. glacialis","C. hyperboreus"))+
theme(
panel.background = element_blank(),
panel.border = element_rect(colour = "grey", fill = NA),
axis.line = element_line(colour = "grey"),
axis.title.x = element_text(size = 14),
axis.title.y = element_text(size = 14),
axis.text.x = element_text(size = 12),
axis.text.y = element_text(size = 12),
legend.title = element_blank(),
legend.text = element_text(size = 12, face = "italic"),
plot.title = element_text(size = 14, face = "bold"),
legend.position="right")
#plot of lifestage metadata column
<-ggplot(data=dat %>% group_by(lifeStage) %>% filter(n() >200), aes(x=forcats::fct_infreq(lifeStage),fill=species)) +
stageplotgeom_bar(color="grey60", stat="count")+
labs(y = "Number of records", x= "Lifestage") +
#scale_x_continuous(breaks=seq(1880, 2020, 20))+
scale_fill_viridis(discrete = T, option = "rocket", direction = 1,limits = c("Calanus finmarchicus", "Calanus glacialis", "Calanus hyperboreus"), labels=c("C. finmarchicus","C. glacialis","C. hyperboreus"))+
theme(
panel.background = element_blank(),
panel.border = element_rect(colour = "grey", fill = NA),
axis.line = element_line(colour = "grey"),
axis.title.x = element_text(size = 14),
axis.title.y = element_text(size = 14),
axis.text.x = element_text(size = 12),
axis.text.y = element_text(size = 12),
legend.title = element_blank(),
legend.text = element_text(size = 12, face = "italic"),
plot.title = element_text(size = 14, face = "bold"),
legend.position=c(.85,.85))
<- yearplot + netplot / stageplot +
plot_combineplot_layout(guides = 'collect') + plot_annotation(tag_levels = 'a') &
theme(plot.tag = element_text(face = 'bold'))
#save figure to a subfolder "output_figs"
ggsave(file = here::here("output_figs", "figs2.png"), plot_combine, scale=1.5, device= "png", dpi = 500, width=12, height=5)
dev.set(dev.next())
rm(plot_combine,netplot,yearplot,stageplot,dat_nodups,dat)
gc()
::include_graphics(here::here("output_figs", "figs2.png")) knitr
Here we format GBIF and OBIS datasets to have columns that can be merged and then duplicates across the two databases are removed.
#read in saved, cleaned datasets
<-read.csv(here::here("output_data", "calanus_clean_nodups_obis.csv"))
obis_nodups<-read.csv(here::here("output_data", "calanus_clean_nodups_gbif.csv"))
gbif_nodups
#rename relevant cols in gbif to match obis fields
colnames(gbif_nodups)[1] <- "id"
colnames(gbif_nodups)[2] <- "institutioncode"
colnames(gbif_nodups)[3] <- "collectioncode"
colnames(gbif_nodups)[4] <- "datasetname"
colnames(gbif_nodups)[5] <- "occurrenceid"
colnames(gbif_nodups)[9] <- "lifestage"
colnames(gbif_nodups)[11] <- "occurrenceremarks"
colnames(gbif_nodups)[12] <- "eventdate"
colnames(gbif_nodups)[16] <- "samplingprotocol"
colnames(gbif_nodups)[20] <- "decimallatitude"
colnames(gbif_nodups)[21] <- "decimallongitude"
colnames(gbif_nodups)[22] <- "dataset_id"
colnames(gbif_nodups)[23] <- "av_depth"
#subset gbif to the columns using
<-gbif_nodups[c(1,2,3,4,5,9,11,12,13,14,15,16,20,21,22,23,25,26,27)]
gbif_ref$database <-"gbif"
gbif_ref$eventdate <- as.Date(gbif_ref$eventdate)
gbif_ref
#rename relevant cols in gbif to match obis fields
colnames(obis_nodups)[5] <- "species"
#subset obis to the columns using
<-obis_nodups[c(1,2,3,4,5,6,7,8,9,10,11,12,16,18,22,23,24,25,26,27)]
obis_ref$database <-"obis"
obis_ref$eventdate<-as.Date(obis_ref$eventdate)
obis_ref
#make sure all min and max depths in obis are positive (ie without - signs infront of depth val)
$mindepth <- with(obis_ref, ifelse(minimumdepthinmeters < 0, -minimumdepthinmeters, minimumdepthinmeters))
obis_ref$maxdepth <- with(obis_ref, ifelse(maximumdepthinmeters < 0, -maximumdepthinmeters, maximumdepthinmeters))
obis_ref$maxdepth <- with(obis_ref, ifelse(is.na(maxdepth), mindepth, maxdepth))
obis_ref<-obis_ref[-c(6,7)]
obis_ref
#get av depth column to match gbif depth information
$av_depth <- rowMeans(obis_ref[20:21], na.rm=TRUE)
obis_ref$av_depth[obis_ref$av_depth == "NaN"] <- NA
obis_ref
#merge obis and gbif
<-rbind.fill(obis_ref,gbif_ref)
merge_dat
#remove duplicates based on certain fields
<-distinct(merge_dat, species, year, month, day, lifestage, decimallongitude2, decimallatitude2, av_depth, .keep_all=T) merge_dat_nodups
Bathymetry data were downloaded from GEBCO, available here and stored locally in the folder “input_data”.
#add associate bathymetry information for each point
coordinates(merge_dat_nodups)<- ~ decimallongitude + decimallatitude
<-raster("input_data/bathy.asc")
bathy<-extract(bathy,merge_dat_nodups)
f<-as.data.frame(cbind(merge_dat_nodups,f))
merge_dat_nodups_bathycolnames(merge_dat_nodups_bathy)[21]<-"bathy"
#flag records with depth = NA and depth > bathy
#first negate av_depth column to match bathymetry depth signs
$av_depth<- merge_dat_nodups_bathy$av_depth*-1
merge_dat_nodups_bathy
#second create new column to flag records where av depth is greater than bathy
$depth_flag[merge_dat_nodups_bathy$av_depth < merge_dat_nodups_bathy$bathy] = 1
merge_dat_nodups_bathy$depth_flag[merge_dat_nodups_bathy$av_depth >= merge_dat_nodups_bathy$bathy] = 0
merge_dat_nodups_bathy$depth_flag[is.na(merge_dat_nodups_bathy$bathy)] = 0
merge_dat_nodups_bathy
#save merged dataset to a subfolder "output_data"
fwrite(merge_dat_nodups_bathy,here::here("output_data", "calanus_clean_nodups_gbifobis.csv"))
gc()
rm(gbif_nodups, obis_nodups, obis_ref, gbif_ref, f, merge_dat, merge_dat_nodups)
We can now plot histogram and barplots of the most interesting metadata fields - year of collection, sampling protocol, and life-stage
#shorten name for ease
<-merge_dat_nodups_bathy
dat
#plot of year metadata column
<-ggplot(dat, aes(x=year,fill=species)) +
yearplotgeom_histogram(color="grey60", binwidth = 5)+
geom_vline(aes(xintercept=1930),color="grey", linetype="dashed", size=1)+
geom_vline(aes(xintercept=1955),color="grey", linetype="dashed", size=1)+
geom_vline(aes(xintercept=2005),color="grey", linetype="dashed", size=1)+
geom_vline(aes(xintercept=1995),color="grey", linetype="dashed", size=1)+
geom_text(aes(x=1930,y=7000, label="\nCPR began 1931"), colour="black", angle=90) +
geom_text(aes(x=1955,y=20000, label="\n IPY 1957"), colour="black", angle=90) +
geom_text(aes(x=2005,y=28000, label="\n IPY 2007"), colour="black", angle=90) +
geom_text(aes(x=1997,y=31000, label="\n GLOBEC 1996"), colour="black", angle=90) +
labs(y = "Number of records", x= "Year") +
scale_x_continuous(breaks=seq(1880, 2020, 20))+
scale_fill_viridis(discrete = T, option = "rocket", direction = 1,limits = c("Calanus finmarchicus", "Calanus glacialis", "Calanus hyperboreus"), labels=c("C. finmarchicus","C. glacialis","C. hyperboreus"))+
theme(
panel.background = element_blank(),
panel.border = element_rect(colour = "grey", fill = NA),
axis.line = element_line(colour = "grey"),
axis.title.x = element_text(size = 14),
axis.title.y = element_text(size = 14),
axis.text.x = element_text(size = 12),
axis.text.y = element_text(size = 12),
legend.title = element_blank(),
legend.text = element_text(size = 12, face = "italic"),
plot.title = element_text(size = 14, face = "bold"),
legend.position="right")
#plot of sampling protocol metadata column
<-ggplot(data=dat %>% group_by(samplingprotocol) %>% filter(n() >200), aes(x=forcats::fct_infreq(samplingprotocol),fill=species)) +
netplotgeom_bar(color="grey60", stat="count")+
labs(y = "Number of records", x= "Sampling protocol") +
#scale_x_continuous(breaks=seq(1880, 2020, 20))+
scale_fill_viridis(discrete = T, option = "rocket", direction = 1,limits = c("Calanus finmarchicus", "Calanus glacialis", "Calanus hyperboreus"), labels=c("C. finmarchicus","C. glacialis","C. hyperboreus"))+
theme(
panel.background = element_blank(),
panel.border = element_rect(colour = "grey", fill = NA),
axis.line = element_line(colour = "grey"),
axis.title.x = element_text(size = 14),
axis.title.y = element_text(size = 14),
axis.text.x = element_text(size = 12),
axis.text.y = element_text(size = 12),
legend.title = element_blank(),
legend.text = element_text(size = 12, face = "italic"),
plot.title = element_text(size = 14, face = "bold"),
legend.position="right")
#plot of lifestage metadata column
<-ggplot(data=dat %>% group_by(lifestage) %>% filter(n() >4000), aes(x=forcats::fct_infreq(lifestage),fill=species)) +
stageplotgeom_bar(color="grey60", stat="count")+
labs(y = "Number of records", x= "Lifestage") +
#scale_x_continuous(breaks=seq(1880, 2020, 20))+
scale_fill_viridis(discrete = T, option = "rocket", direction = 1,limits = c("Calanus finmarchicus", "Calanus glacialis", "Calanus hyperboreus"), labels=c("C. finmarchicus","C. glacialis","C. hyperboreus"))+
theme(
panel.background = element_blank(),
panel.border = element_rect(colour = "grey", fill = NA),
axis.line = element_line(colour = "grey"),
axis.title.x = element_text(size = 14),
axis.title.y = element_text(size = 14),
axis.text.x = element_text(size = 12),
axis.text.y = element_text(size = 12),
legend.title = element_blank(),
legend.text = element_text(size = 12, face = "italic"),
plot.title = element_text(size = 14, face = "bold"),
legend.position=c(.85,.85))
<- yearplot + netplot / stageplot +
plot_combineplot_layout(guides = 'collect') + plot_annotation(tag_levels = 'a') &
theme(plot.tag = element_text(face = 'bold'))
#save figure to a subfolder "output_figs"
ggsave(file = here::here("output_figs", "fig1.png"), plot_combine, scale=1.5, device= "png", dpi = 500, width=12, height=5)
dev.set(dev.next())
rm(plot_combine,netplot,yearplot,stageplot,dat)
gc()
::include_graphics(here::here("output_figs", "fig1.png")) knitr
Code for panel A of this figure is adapted from Webb et al. (2010)
#subset data to only those with no depth innacuracies
<-subset(merge_dat_nodups_bathy, merge_dat_nodups_bathy$depth_flag == 0)
dat_bathy_filt
#for plot
$count<-1
dat_bathy_filt
#define latitude categories
$lat.cat <- cut(dat_bathy_filt$decimallatitude, breaks = seq(30,90, by = 1), include.lowest = T)
dat_bathy_filt#define depth categories
$sd.cat <- cut(dat_bathy_filt$av_depth, breaks = c(
dat_bathy_filtseq(-4000,0, by = 50)),include.lowest = T)
#tablulate summed record counts by sample depth category (rows) and latitude category (columns)
<- tapply(dat_bathy_filt$count, list(dat_bathy_filt$sd.cat, dat_bathy_filt$lat.cat), sum)
recs.sum #For the figure to work properly, need to flip the matrices vertically (which has the effect of making depth 'negative', so that greater depths appear lower down on the figure)
<- recs.sum[nrow(recs.sum):1,]
recs.sum
#create a dataframe to be used within ggplot
<- data.frame(recs.sum)
df colnames(df)<-seq(30,89)
<-gather(df,key,value)
df<-seq(50,4000,50)
n<-rep(n, times=60)
n$z<-n*-1
dfcolnames(df)<-c("lat","count","z")
#repeat previous steps with bathymetry categories and latitude categories, rather than sample depth
<- rasterToPoints(bathy)
batt<-as.data.frame(batt)
batt$lat.cat <- cut(batt$y, breaks = seq(30,90, by = 1), include.lowest = T)
batt$sd.cat <- cut(batt$bathy, breaks = seq(-4000,0, by = 50), include.lowest = T)
batt$minbathy[batt$bathy == ave(batt$bathy, batt$lat.cat, FUN=min)]<-1
batt$minbathy[!batt$bathy == ave(batt$bathy, batt$lat.cat, FUN=min)]<-0
batt<- tapply(batt$minbathy, list(batt$sd.cat, batt$lat.cat), max)
bat <- bat[nrow(bat):1,]
bat
#loop through each cell to find locations where bathymetric depth is exceeded. If found, make these cells and all those below it =1. Will be used to black out cells in panel a.
for (i in 1:nrow(bat)-1){
for (j in 1:ncol(bat)-1){
if(isTRUE(bat[i, j] == 1)==TRUE){
+1,j] = 1
bat[i
}
}
}
#create a dataframe to be used within ggplot
<-data.frame(bat)
batdfcolnames(batdf)<-seq(30,89)
<-gather(batdf,key,value)
batdf<-seq(50,4000,50)
n<-rep(n, times=60)
n$z<-n*-1
batdfcolnames(batdf)<-c("lat","bathymin","z")
#combine dataframes
<-cbind(df,batdf)
df2<-df2[,c(-4,-6)]
df2
#ggplot panel a
<- "grey40"
textcol <- ggplot(df2,aes(x=lat,y=z))+
p geom_tile(data=df2,aes(fill=log(count)),colour="white",size=0.2) +
geom_tile(data=subset(df2,df2$bathymin==1),fill="grey20",colour= NA,size=0.2) +
guides(fill=guide_legend(title="Log no. of records"))+
labs(x="Latitude (degrees)",y="Depth (metres)")+
scale_y_continuous(expand=c(0,0),breaks=c(-50,-500,-1000,-1500,-2000,-2500,-3000,-3500,-4000))+
scale_x_discrete(position="top",expand=c(0,0),breaks=c(30,40,50,60,70,80))+
scale_fill_viridis(limits = c(0, 10),aesthetics = "fill", na.value="light grey")+
#coord_fixed()+
theme_grey(base_size=10)+
theme(legend.position="right",legend.direction="vertical",
legend.title=element_text(size=12),
legend.margin=margin(grid::unit(0,"cm")),
legend.text=element_text(size=10),
legend.key.height=grid::unit(0.8,"cm"),
legend.key.width=grid::unit(0.2,"cm"),
axis.line = element_line(colour = "grey"),
axis.title.x = element_text(size = 14),
axis.title.y = element_text(size = 14),
axis.text.x = element_text(size = 12),
axis.text.y = element_text(size = 12),
plot.background=element_blank(),
panel.border=element_blank())
#get arctic region shapefile
::sf_use_s2(FALSE)
sf<- rnaturalearth::ne_countries(
Arctic_sf scale = 50,
continent = c("Europe", "North America"),
returnclass = "sf")
<-sf::st_crop(Arctic_sf, c(xmin=-180, xmax=180, ymin=40, ymax=90)) %>% sf::st_transform(3995)
Arctic_sf
#ggplot panel b
#make sf object in arctic stereographic projection
<- merge_dat_nodups_bathy %>%
dat_bathy_obis filter(database == "obis") %>%
filter(!decimallatitude <30) %>%
::st_as_sf(coords = c("decimallongitude","decimallatitude")) %>%
sf::st_set_crs(4326) %>%
sf::st_transform(3995)
sf
<-
p_obis ggplot() +
layer_spatial(dat_bathy_obis, mapping = aes(colour = as.factor(depth_flag), alpha = 0.2), size = 1, shape=20) +
layer_spatial(Arctic_sf, fill = "light grey", colour = "grey40",lwd=0.5) +
scale_colour_manual(breaks = c("1","0",NA),labels=c('>bathymetry', '<bathymetry', NA),values = c("#222255","#bbccee","black"),na.value="#ee7733") +
guides(colour = guide_legend(title="Average collection depth",override.aes = list(size=4))) +
scale_alpha(guide = 'none') +
theme(
panel.background = element_blank(),
panel.border = element_rect(colour = "grey", fill = NA),
panel.grid = element_line(colour = "grey"),
axis.line = element_line(colour = "grey"),
axis.title.y = element_text(size = 14, face= "bold"),
plot.title = element_text(size = 14, face = "bold.italic",hjust = 0.5),
legend.position="right",legend.direction="vertical",
legend.title=element_text(size=12),
legend.margin=margin(grid::unit(0,"cm")),
legend.text=element_text(size=10))
#ggplot panel c
#make sf object in arctic stereographic projection
<- merge_dat_nodups_bathy %>%
dat_bathy_gbif filter(database == "gbif") %>%
filter(!decimallatitude <30) %>%
::st_as_sf(coords = c("decimallongitude","decimallatitude")) %>%
sf::st_set_crs(4326) %>%
sf::st_transform(3995)
sf
<-
p_gbif ggplot() +
layer_spatial(dat_bathy_gbif, mapping = aes(colour = as.factor(depth_flag), alpha = 0.2), size = 1, shape=20) +
layer_spatial(Arctic_sf, fill = "light grey", colour = "grey40", lwd=0.5) +
scale_colour_manual(breaks = c("1","0",NA),labels=c('>bathymetry', '<bathymetry', NA),values = c("#222255","#bbccee","black"),na.value="#ee7733") +
guides(colour = guide_legend(title="Average collection depth",override.aes = list(size=4))) +
scale_alpha(guide = 'none') +
theme(
panel.background = element_blank(),
panel.border = element_rect(colour = "grey", fill = NA),
panel.grid = element_line(colour = "grey"),
axis.line = element_line(colour = "grey"),
axis.title.y = element_text(size = 14, face= "bold"),
plot.title = element_text(size = 14, face = "bold.italic",hjust = 0.5),
legend.position="right",legend.direction="vertical",
legend.title=element_text(size=12),
legend.margin=margin(grid::unit(0,"cm")),
legend.text=element_text(size=10))
<-p / (p_obis | p_gbif) +
p_combineplot_layout(guides = 'collect') +
plot_annotation(tag_levels = 'a') &
theme(plot.tag = element_text(face = 'bold'))
#save figure to a subfolder "output_figs"
ggsave(file = here::here("output_figs", "fig2.png"), p_combine,scale=2, device= "png", dpi = 500, width=5, height=5)
dev.set(dev.next())
rm(list=setdiff(ls(), "point.in.poly"))
gc()
::include_graphics(here::here("output_figs", "fig2.png")) knitr
Data from the OBIS database contained information on maximum and minimum collections depths, allowing for the separation of these data in to discrete shallow and deep subsets. We reload in the cleaned OBIS dataset, and add several environmental parameters to each point, in preparation to explore how shallow and deep records of each species associate to these variables.
Bathymetry data were downloaded from GEBCO, available here and stored locally in the folder “input_data”.
# reload cleaned obis data
<-read.csv(here::here("output_data", "calanus_clean_nodups_obis.csv"))
obis_nodups
#subset to cols we are using
$eventdate<-as.Date(obis_nodups$eventdate)
obis_nodups
#make sure all min and max depths are positive (ie without - signs infront of depth val)
$mindepth <- with(obis_nodups, ifelse(minimumdepthinmeters < 0, -minimumdepthinmeters, minimumdepthinmeters))
obis_nodups$maxdepth <- with(obis_nodups, ifelse(maximumdepthinmeters < 0, -maximumdepthinmeters, maximumdepthinmeters))
obis_nodups$maxdepth <- with(obis_nodups, ifelse(is.na(maxdepth), mindepth, maxdepth))
obis_nodups<-obis_nodups[-c(6,7)]
obis_nodups
#get av depth column
$av_depth <- rowMeans(obis_nodups[27:28], na.rm=TRUE)
obis_nodups$av_depth[obis_nodups$av_depth == "NaN"] <- NA
obis_nodups
#get geometry
coordinates(obis_nodups)<- ~ decimallongitude + decimallatitude
#load bathymetry data and extract to each point
<-raster("input_data/bathy.asc")
bathy<-extract(bathy,obis_nodups)
f<-as.data.frame(cbind(obis_nodups,f))
obis_nodups_bathycolnames(obis_nodups_bathy)[28]<-"bathy"
rm(obis_nodups,bathy,f)
#filter records with depth = NA and depth>bathy
#first negate av_depth column to match bathymetry depth signs
$av_depth<- obis_nodups_bathy$av_depth*-1
obis_nodups_bathy
#create new column with 1 or 0 depending on depth and bathymetry values
$depth_flag = NA
obis_nodups_bathy$depth_flag[obis_nodups_bathy$av_depth < obis_nodups_bathy$bathy] = 1
obis_nodups_bathy$depth_flag[obis_nodups_bathy$av_depth >= obis_nodups_bathy$bathy] = 0
obis_nodups_bathy$depth_flag[is.na(obis_nodups_bathy$bathy)] = 0
obis_nodups_bathy
#count how many records are flagged
summary(obis_nodups_bathy$depth_flag == 1)
rm(list=setdiff(ls(), c("point.in.poly","obis_nodups_bathy")))
#make season column based on month of collection
$season[obis_nodups_bathy$month >= 1 & obis_nodups_bathy$month <= 3] = 1
obis_nodups_bathy$season[obis_nodups_bathy$month >= 4 & obis_nodups_bathy$month <= 6] = 2
obis_nodups_bathy$season[obis_nodups_bathy$month >= 7 & obis_nodups_bathy$month <= 9] = 3
obis_nodups_bathy$season[obis_nodups_bathy$month >= 10 & obis_nodups_bathy$month <= 12] = 4 obis_nodups_bathy
Temperature data are from the World Ocean Atlas, available here
Rasters of temperature data (0.25 degree grid size) were downloaded for each available season (jfm,amj,jas,ond) and decade (1955-2017). Decadal rasters were combined into two time periods (1955-1984 = timeperiod “55” and 1985-2017 = timeperiod “85”).
Separate rasters were saved for nine depth layers (0, 50, 100, 200, 400, 600, 800, 1000 and 1500m) for each of these seasons and timeperiods. Thus, 72 rasters of temperature data are stored locally in “input_data/tempdat”. Subfolders within “tempdat” have the naming format: timeperiod_depth_season e.g. subfolder 55_x0_amj would contain temperature raster from timeperiod 1955-1984, 0m depth, and season april-may-june.
#make depth category column based on average depth (will be used to guide depths of relevant temperature data)
$depth_cat[obis_nodups_bathy$av_depth <= 0 & obis_nodups_bathy$av_depth > -25] = 0
obis_nodups_bathy$depth_cat[obis_nodups_bathy$av_depth <= -25 & obis_nodups_bathy$av_depth > -75] = 50
obis_nodups_bathy$depth_cat[obis_nodups_bathy$av_depth <= -75 & obis_nodups_bathy$av_depth > -150] = 100
obis_nodups_bathy$depth_cat[obis_nodups_bathy$av_depth <= -150 & obis_nodups_bathy$av_depth > -300] = 200
obis_nodups_bathy$depth_cat[obis_nodups_bathy$av_depth <= -300 & obis_nodups_bathy$av_depth > -500] = 400
obis_nodups_bathy$depth_cat[obis_nodups_bathy$av_depth <= -500 & obis_nodups_bathy$av_depth > -700] = 600
obis_nodups_bathy$depth_cat[obis_nodups_bathy$av_depth <= -700 & obis_nodups_bathy$av_depth > -900] = 800
obis_nodups_bathy$depth_cat[obis_nodups_bathy$av_depth <= -900 & obis_nodups_bathy$av_depth > -1250] = 1000
obis_nodups_bathy$depth_cat[obis_nodups_bathy$av_depth <= -1250] = 1500
obis_nodups_bathy
#all temperature data are stored within folder "tempdat". This code lists the name of each subfolder: subfolder names are in the format: timeperiod_depth_season e.g. 55_x0_amj would contain temperature raster from timeperiod 1955-1984, 0m depth, and season april-may-june.
<-list.dirs(path = here::here("input_data", "tempdat"), full.names = TRUE, recursive = TRUE)
folds<- folds %>% str_replace(".*?/tempdat/", "")
folds<-folds[2:73]
folds
#input each temperature raster into list and change filenames to match folder names
<- list.files(file.path(here::here("input_data"), paste0("tempdat/")), pattern = "temp", full.names = T, recursive =T)
files_temp <- files_temp %>% str_replace(".*?/temp.asc", paste0(folds,"_temp"))
filenames_temp
#stack rasters, replace names, check and plot
<- stack(files_temp)
files_tempcrs(files_temp) <- "+proj=longlat +datum=WGS84 +no_defs +ellps=WGS84 +towgs84=0,0,0"
names(files_temp) <- filenames_temp
#subset occurrences to only match temperature to those from 1955 or after, and from >30 latitude or greater
<-subset(obis_nodups_bathy, !decimallatitude < 30)
dat<-subset(dat, !year<=1954)
dat
#give occurrences category to match to 1st or 2nd time period of temperature data
$year_cat<-ifelse(dat$year >= 1955 & dat$year <= 1984, 1,2)
dat#make spatial object
coordinates(dat) <- ~ decimallongitude + decimallatitude
#extract temperature data (from all 72 temperature rasters) for each occurrence point - then use leftjoin function to save only the temperature data from the relevant season, time period and depth of each point.
<-raster::extract(files_temp,dat)
e1<-as_tibble(e1)
e1<-as.data.frame(dat)
dat$decimallongitude<-dat$decimallongitude
e1$decimallatitude<-dat$decimallatitude
e1$id<-dat$id
e1<-pivot_longer(e1, cols=1:72,values_to = "temp", names_to = "depth_cat")
e1
$season<-ifelse(grepl("jfm", e1$depth_cat), 1,
e1ifelse(grepl("amj", e1$depth_cat), 2,
ifelse(grepl("jas", e1$depth_cat), 3,4)))
$year_cat<-ifelse(grepl("X55", e1$depth_cat), 1,2)
e1<-mutate(e1, depth_cat=str_replace(depth_cat,".*?x",""))
e1$depth_cat<-gsub("[^0-9.-]", "", e1$depth_cat)
e1$depth_cat<-as.numeric(e1$depth_cat)
e1
<-left_join(dat,e1,by=c("decimallongitude", "decimallatitude", "id", "season", "year_cat","depth_cat"))
dat_withtemp
rm(e1,dat,filenames_temp,files_temp)
gc()
Gridded Sea ice concentration data are from the NSIDC, available here
Rasters of sea ice con data (0.25 degree grid size) were downloaded for each available month and year. Months were combined in to seasonal averages (jfm,amj,jas,ond) for the two time periods (1955-1984 = timeperiod “55” and 1985-2017 = timeperiod “85”).
Thus, 8 rasters of seaicecon data are stored locally in “input_data/tempdat”. Subfolders within “tempdat” have the naming format: timeperiod_depth_season e.g. 55_x0_amj would be timeperiod 1955-1984, 0m depth, and season april-may-june.
#repeat above steps for sea ice concentration data which are within the folder "tempdat" but only for surface depths. i.e. only interested in the first 8 folders (timeperiods x2 and seasons x4)
#remove all but the relevant 8 folders
<-folds[c(1:4,37:40)]
folds
#get list of sea ice concentration rasters and rename to same as folder names
<- list.files(file.path(here::here("input_data"), paste0("tempdat/")), pattern = "seaicecon", full.names = T, recursive =T)
files_ice <- files_ice %>% str_replace(".*?/seaicecon.asc", paste0(folds,"_ice"))
filenames_ice
<- stack(files_ice)
files_icecrs(files_ice) <- "+proj=longlat +datum=WGS84 +no_defs +ellps=WGS84 +towgs84=0,0,0"
names(files_ice) <- filenames_ice
#subset occurrences to only match temperature to those from 1955 or after, and from >30 latitude or greater
<-subset(obis_nodups_bathy, !decimallatitude < 30)
dat<-subset(dat, !year<=1954)
dat
#give occurrences category to match to 1st or 2nd time period
$year_cat<-ifelse(dat$year >= 1955 & dat$year <= 1984, 1,2)
dat#make spatial object
coordinates(dat) <- ~ decimallongitude + decimallatitude
#extract all sea ice concentration data (from all 8 seaicecon files) for each point - then use leftjoin function to save only the sea ice concentration data from the relevant season and time period that matches each point.
<-raster::extract(files_ice,dat)
e1<-as_tibble(e1)
e1<-as.data.frame(dat)
dat$decimallongitude<-dat$decimallongitude
e1$decimallatitude<-dat$decimallatitude
e1$id<-dat$id
e1<-pivot_longer(e1, cols=1:8,values_to = "seaicecon", names_to = "depth_cat")
e1$season<-ifelse(grepl("jfm", e1$depth_cat), 1,
e1ifelse(grepl("amj", e1$depth_cat), 2,
ifelse(grepl("jas", e1$depth_cat), 3,4)))
$year_cat<-ifelse(grepl("X55", e1$depth_cat), 1,2)
e1<-mutate(e1, depth_cat=str_replace(depth_cat,".*?x",""))
e1$depth_cat<-gsub("[^0-9.-]", "", e1$depth_cat)
e1$depth_cat<-as.numeric(e1$depth_cat)
e1
<-left_join(dat_withtemp,e1,by=c("decimallongitude", "decimallatitude", "id", "season", "year_cat"))
dat_withice<-dat_withice[-36] # remove extra depthcat col.
dat_withice
#save new file with all environmental data added within the subfolder "output_data"
fwrite(dat_withice,here::here("output_data", "calanus_clean_nodups_obis_envdat.csv"))
rm(e1,dat_withtemp,dat,folds, files_ice,filenames_ice)
gc()
These subsets will be used when creating plots 3-7.
#save shallow and deep datasets for all OBIS records
<-obis_nodups_bathy %>%
dat_nodup_shallow subset(., maxdepth <= 200)
<-obis_nodups_bathy %>%
dat_nodup_deep subset(., mindepth >= 400)
#save shallow and deep datasets for OBIS records with temp and ice data (ie. subset by year 1955-2017, no lat<30)
<-dat_withice %>%
dat_nodup_shallow_sub subset(., maxdepth <= 200)
<-dat_withice %>%
dat_nodup_deep_sub subset(., mindepth >= 400)
rm(obis_nodups_bathy)
gc()
The original shapefile required to define the ocean and seas in plots 3 and 4 is available here
This shapefile was modified in ESRI ArcMap v10.6 and saved locally as “regions.shp” within “input_data/IHO_seas_modified”. Only Arctic and North Atlantic regions were retained, with smaller areas being combined for ease of visualizing the number of records collected in a given area. 11 regions in total are used.
#get arctic region shapefile
::sf_use_s2(FALSE)
sf<- rnaturalearth::ne_countries(
Arctic_sf scale = 50,
continent = c("Europe", "North America"),
returnclass = "sf")
<-sf::st_crop(Arctic_sf, c(xmin=-180, xmax=180, ymin=40, ymax=90)) %>% sf::st_transform(3995)
Arctic_sf
#load in polygon of Arctic regions
<- sf::st_read("input_data/IHO_seas_modified/regions.shp")
region_poly <- st_as_sf(region_poly) # convert to sf object
region_poly st_crs(region_poly) <- 4326
#make points sf objects
coordinates(dat_nodup_shallow)<- ~ decimallongitude + decimallatitude
proj4string(dat_nodup_shallow) <- CRS("+proj=longlat +datum=WGS84 +no_defs +ellps=WGS84 +towgs84=0,0,0")
<- st_as_sf(dat_nodup_shallow)
dat_nodup_shallow
#merge poly and point info
<- dat_nodup_shallow %>% point.in.poly(region_poly) %>% st_as_sf()
int_shallow
#summarize by region, species and season
<- int_shallow %>%
int_shallow_result count(NAME,scientificname,season) %>%
st_drop_geometry()
#remove large redundant objects
rm(int_shallow)
#crop to plot area
<- sf::st_crop(region_poly, c(xmin=-180, xmax=180, ymin=40, ymax=90)) %>% sf::st_transform(3995)
region_poly
#merge count data into polygon object for plotting
<- merge(region_poly, int_shallow_result, by = "NAME", all.X = TRUE, all.y = TRUE)
int_shallow
#plots
<-
cfin_s_jfmggplot() +
geom_sf(data = int_shallow %>% filter(scientificname == "Calanus finmarchicus", season == 1), aes(fill = n))+
geom_sf(data = Arctic_sf, fill = "light grey")+
scale_fill_viridis(option = "turbo", direction = 1,limits = c(0,42000), breaks = c(0,10000,20000,30000,40000))+
ggtitle("C. finmarchicus")+
labs(y= "Jan-Feb-Mar\n") +
theme(
panel.background = element_blank(),
panel.border = element_rect(colour = "grey", fill = NA),
panel.grid = element_line(colour = "grey"),
axis.line = element_line(colour = "grey"),
axis.title.y = element_text(size = 14, face= "bold"),
legend.title = element_blank(),
plot.title = element_text(size = 14, face = "bold.italic",hjust = 0.5),
legend.position="none",
legend.key.width = unit(1.3, "cm"))
<-
cfin_s_amjggplot() +
geom_sf(data = int_shallow %>% filter(scientificname == "Calanus finmarchicus", season == 2), aes(fill = n))+
geom_sf(data = Arctic_sf, fill = "light grey")+
scale_fill_viridis(option = "turbo", direction = 1,limits = c(0,42000), breaks = c(0,10000,20000,30000,40000))+
labs(y= "Apr-May-Jun\n") +
theme(
panel.background = element_blank(),
panel.border = element_rect(colour = "grey", fill = NA),
panel.grid = element_line(colour = "grey"),
axis.line = element_line(colour = "grey"),
axis.title.y = element_text(size = 14, face="bold"),
legend.title = element_blank(),
plot.title = element_text(size = 14, face = "bold"),
legend.position="none",
legend.key.width = unit(1.3, "cm"))
<-
cfin_s_jasggplot() +
geom_sf(data = int_shallow %>% filter(scientificname == "Calanus finmarchicus", season == 3), aes(fill = n))+
geom_sf(data = Arctic_sf, fill = "light grey")+
scale_fill_viridis(option = "turbo", direction = 1,limits = c(0,42000), breaks = c(0,10000,20000,30000,40000))+
labs(y= "Jul-Aug-Sep\n") +
theme(
panel.background = element_blank(),
panel.border = element_rect(colour = "grey", fill = NA),
panel.grid = element_line(colour = "grey"),
axis.line = element_line(colour = "grey"),
axis.title.y = element_text(size = 14, face="bold"),
legend.title = element_blank(),
plot.title = element_text(size = 14, face = "bold"),
legend.position="none",
legend.key.width = unit(1.3, "cm"))
<-
cfin_s_ondggplot() +
geom_sf(data = int_shallow %>% filter(scientificname == "Calanus finmarchicus", season == 4), aes(fill = n))+
geom_sf(data = Arctic_sf, fill = "light grey")+
labs(fill='occurrence record count')+
scale_fill_viridis(option = "turbo", direction = 1,limits = c(0,42000), breaks = c(0,10000,20000,30000,40000),
guide = guide_colorbar (title.position = "top",title.hjust = 0.5))+
labs(y= "Oct-Nov-Dec\n") +
theme(
panel.background = element_blank(),
panel.border = element_rect(colour = "grey", fill = NA),
panel.grid = element_line(colour = "grey"),
axis.line = element_line(colour = "grey"),
axis.title.y = element_text(size = 14, face="bold"),
#legend.title = element_blank(),
legend.position="bottom",
legend.key.width = unit(1.3, "cm"))
<-
cgla_s_jfmggplot() +
geom_sf(data = int_shallow %>% filter(scientificname == "Calanus glacialis", season == 1), aes(fill = n))+
geom_sf(data = Arctic_sf, fill = "light grey")+
scale_fill_viridis(option = "turbo", direction = 1,limits = c(0,4000), breaks = c(0,1000,2000,3000,4000))+
ggtitle("C. glacialis")+
#labs(y= "Jan-Feb-Mar\n") +
theme(
panel.background = element_blank(),
panel.border = element_rect(colour = "grey", fill = NA),
panel.grid = element_line(colour = "grey"),
axis.line = element_line(colour = "grey"),
axis.title.y = element_text(size = 14, face= "bold"),
legend.title = element_blank(),
plot.title = element_text(size = 14, face = "bold.italic",hjust = 0.5),
legend.position="none",
legend.key.width = unit(1.3, "cm"))
<-
cgla_s_amjggplot() +
geom_sf(data = int_shallow %>% filter(scientificname == "Calanus glacialis", season == 2), aes(fill = n))+
geom_sf(data = Arctic_sf, fill = "light grey")+
scale_fill_viridis(option = "turbo", direction = 1,limits = c(0,4000), breaks = c(0,1000,2000,3000,4000))+
#labs(y= "Apr-May-Jun\n") +
theme(
panel.background = element_blank(),
panel.border = element_rect(colour = "grey", fill = NA),
panel.grid = element_line(colour = "grey"),
axis.line = element_line(colour = "grey"),
axis.title.y = element_text(size = 14, face="bold"),
legend.title = element_blank(),
plot.title = element_text(size = 14, face = "bold"),
legend.position="none",
legend.key.width = unit(1.3, "cm"))
<-
cgla_s_jasggplot() +
geom_sf(data = int_shallow %>% filter(scientificname == "Calanus glacialis", season == 3), aes(fill = n))+
geom_sf(data = Arctic_sf, fill = "light grey")+
scale_fill_viridis(option = "turbo", direction = 1,limits = c(0,4000), breaks = c(0,1000,2000,3000,4000))+
#labs(y= "Jul-Aug-Sep\n") +
theme(
panel.background = element_blank(),
panel.border = element_rect(colour = "grey", fill = NA),
panel.grid = element_line(colour = "grey"),
axis.line = element_line(colour = "grey"),
axis.title.y = element_text(size = 14, face="bold"),
legend.title = element_blank(),
plot.title = element_text(size = 14, face = "bold"),
legend.position="none",
legend.key.width = unit(1.3, "cm"))
<-
cgla_s_ondggplot() +
geom_sf(data = int_shallow %>% filter(scientificname == "Calanus glacialis", season == 4), aes(fill = n))+
geom_sf(data = Arctic_sf, fill = "light grey")+
labs(fill='occurrence record count')+
scale_fill_viridis(option = "turbo", direction = 1,limits = c(0,4000), breaks = c(0,1000,2000,3000,4000),
guide = guide_colorbar (title.position = "top",title.hjust = 0.5))+
#labs(y= "Oct-Nov-Dec\n") +
theme(
panel.background = element_blank(),
panel.border = element_rect(colour = "grey", fill = NA),
panel.grid = element_line(colour = "grey"),
axis.line = element_line(colour = "grey"),
axis.title.y = element_text(size = 14, face="bold"),
#legend.title = element_blank(),
legend.position="bottom",
legend.key.width = unit(1.3, "cm"))
<-
chyp_s_jfmggplot() +
geom_sf(data = int_shallow %>% filter(scientificname == "Calanus hyperboreus", season == 1), aes(fill = n))+
geom_sf(data = Arctic_sf, fill = "light grey")+
scale_fill_viridis(option = "turbo", direction = 1,limits = c(0,4000), breaks = c(0,1000,2000,3000,4000))+
ggtitle("C. hyperboreus")+
#labs(y= "Jan-Feb-Mar\n") +
theme(
panel.background = element_blank(),
panel.border = element_rect(colour = "grey", fill = NA),
panel.grid = element_line(colour = "grey"),
axis.line = element_line(colour = "grey"),
axis.title.y = element_text(size = 14, face= "bold"),
legend.title = element_blank(),
plot.title = element_text(size = 14, face = "bold.italic",hjust = 0.5),
legend.position="none",
legend.key.width = unit(1.3, "cm"))
<-
chyp_s_amjggplot() +
geom_sf(data = int_shallow %>% filter(scientificname == "Calanus hyperboreus", season == 2), aes(fill = n))+
geom_sf(data = Arctic_sf, fill = "light grey")+
scale_fill_viridis(option = "turbo", direction = 1,limits = c(0,4000), breaks = c(0,1000,2000,3000,4000))+
#labs(y= "Apr-May-Jun\n") +
theme(
panel.background = element_blank(),
panel.border = element_rect(colour = "grey", fill = NA),
panel.grid = element_line(colour = "grey"),
axis.line = element_line(colour = "grey"),
axis.title.y = element_text(size = 14, face="bold"),
legend.title = element_blank(),
plot.title = element_text(size = 14, face = "bold"),
legend.position="none",
legend.key.width = unit(1.3, "cm"))
<-
chyp_s_jasggplot() +
geom_sf(data = int_shallow %>% filter(scientificname == "Calanus hyperboreus", season == 3), aes(fill = n))+
geom_sf(data = Arctic_sf, fill = "light grey")+
scale_fill_viridis(option = "turbo", direction = 1,limits = c(0,4000), breaks = c(0,1000,2000,3000,4000))+
#labs(y= "Jul-Aug-Sep\n") +
theme(
panel.background = element_blank(),
panel.border = element_rect(colour = "grey", fill = NA),
panel.grid = element_line(colour = "grey"),
axis.line = element_line(colour = "grey"),
axis.title.y = element_text(size = 14, face="bold"),
legend.title = element_blank(),
plot.title = element_text(size = 14, face = "bold"),
legend.position="none",
legend.key.width = unit(1.3, "cm"))
<-
chyp_s_ondggplot() +
geom_sf(data = int_shallow %>% filter(scientificname == "Calanus hyperboreus", season == 4), aes(fill = n))+
geom_sf(data = Arctic_sf, fill = "light grey")+
labs(fill='occurrence record count')+
scale_fill_viridis(option = "turbo", direction = 1,limits = c(0,4000), breaks = c(0,1000,2000,3000,4000),
guide = guide_colorbar (title.position = "top",title.hjust = 0.5))+
#labs(y= "Oct-Nov-Dec\n") +
theme(
panel.background = element_blank(),
panel.border = element_rect(colour = "grey", fill = NA),
panel.grid = element_line(colour = "grey"),
axis.line = element_line(colour = "grey"),
axis.title.y = element_text(size = 14, face="bold"),
#legend.title = element_blank(),
legend.position="bottom",
legend.key.width = unit(1.3, "cm"))
<- (cfin_s_jfm | cgla_s_jfm | chyp_s_jfm) / (cfin_s_amj | cgla_s_amj | chyp_s_amj) / (cfin_s_jas | cgla_s_jas | chyp_s_jas)/ (cfin_s_ond | cgla_s_ond | chyp_s_ond)
g
#save figure to a subfolder "output_figs"
ggsave(file = here::here("output_figs", "fig3.png"), g, device= "png", dpi = 500, width=12, height=15)
dev.set(dev.next())
rm(g,cfin_s_jfm,cgla_s_jfm,chyp_s_jfm,cfin_s_amj,cgla_s_amj,chyp_s_amj,cfin_s_jas,cgla_s_jas,chyp_s_jas,cfin_s_ond,cgla_s_ond,chyp_s_ond)
rm(int_shallow,int_shallow_result)
gc()
::include_graphics(here::here("output_figs", "fig3.png")) knitr
repeat previous section with records collected at depths greater than or equal to 400m.
<- sf::st_read("input_data/IHO_seas_modified/regions.shp")
region_poly <- st_as_sf(region_poly) # convert to sf object
region_poly st_crs(region_poly) <- 4326
#make points sf objects
coordinates(dat_nodup_deep)= ~ decimallongitude + decimallatitude
proj4string(dat_nodup_deep) <- CRS("+proj=longlat +datum=WGS84 +no_defs +ellps=WGS84 +towgs84=0,0,0")
<- st_as_sf(dat_nodup_deep)
dat_nodup_deep
#merge poly and point info
<- dat_nodup_deep %>% point.in.poly(region_poly) %>% st_as_sf()
int_deep
#summarize by region, species and season
<- int_deep %>%
int_deep_result count(NAME,scientificname,season) %>%
st_drop_geometry()
#remove large redundant objects
rm(int_deep)
gc()
<- sf::st_crop(region_poly, c(xmin=-180, xmax=180, ymin=40, ymax=90)) %>% sf::st_transform(3995)
region_poly
<- merge(region_poly, int_deep_result, by = "NAME", all.X = TRUE, all.y = TRUE)
int_deep
#plots
<-
cfin_d_jfmggplot() +
geom_sf(data = int_deep %>% filter(scientificname == "Calanus finmarchicus", season == 1), aes(fill = n))+
geom_sf(data = Arctic_sf, fill = "light grey")+
scale_fill_viridis(option = "turbo", direction = 1,limits = c(0,800), breaks = c(0,200,400,600,800))+
ggtitle("C. finmarchicus")+
labs(y= "Jan-Feb-Mar\n") +
theme(
panel.background = element_blank(),
panel.border = element_rect(colour = "grey", fill = NA),
panel.grid = element_line(colour = "grey"),
axis.line = element_line(colour = "grey"),
axis.title.y = element_text(size = 14, face= "bold"),
legend.title = element_blank(),
plot.title = element_text(size = 14, face = "bold.italic",hjust = 0.5),
legend.position="none",
legend.key.width = unit(1.3, "cm"))
<-
cfin_d_amjggplot() +
geom_sf(data = int_deep %>% filter(scientificname == "Calanus finmarchicus", season == 2), aes(fill = n))+
geom_sf(data = Arctic_sf, fill = "light grey")+
scale_fill_viridis(option = "turbo", direction = 1,limits = c(0,800), breaks = c(0,200,400,600,800))+
labs(y= "Apr-May-Jun\n") +
theme(
panel.background = element_blank(),
panel.border = element_rect(colour = "grey", fill = NA),
panel.grid = element_line(colour = "grey"),
axis.line = element_line(colour = "grey"),
axis.title.y = element_text(size = 14, face="bold"),
legend.title = element_blank(),
plot.title = element_text(size = 14, face = "bold"),
legend.position="none",
legend.key.width = unit(1.3, "cm"))
<-
cfin_d_jasggplot() +
geom_sf(data = int_deep %>% filter(scientificname == "Calanus finmarchicus", season == 3), aes(fill = n))+
geom_sf(data = Arctic_sf, fill = "light grey")+
scale_fill_viridis(option = "turbo", direction = 1,limits = c(0,800), breaks = c(0,200,400,600,800))+
labs(y= "Jul-Aug-Sep\n") +
theme(
panel.background = element_blank(),
panel.border = element_rect(colour = "grey", fill = NA),
panel.grid = element_line(colour = "grey"),
axis.line = element_line(colour = "grey"),
axis.title.y = element_text(size = 14, face="bold"),
legend.title = element_blank(),
plot.title = element_text(size = 14, face = "bold"),
legend.position="none",
legend.key.width = unit(1.3, "cm"))
<-
cfin_d_ondggplot() +
geom_sf(data = int_deep %>% filter(scientificname == "Calanus finmarchicus", season == 4), aes(fill = n))+
geom_sf(data = Arctic_sf, fill = "light grey")+
labs(fill='occurrence record count')+
scale_fill_viridis(option = "turbo", direction = 1,limits = c(0,800), breaks = c(0,200,400,600,800),
guide = guide_colorbar (title.position = "top",title.hjust = 0.5))+
labs(y= "Oct-Nov-Dec\n") +
theme(
panel.background = element_blank(),
panel.border = element_rect(colour = "grey", fill = NA),
panel.grid = element_line(colour = "grey"),
axis.line = element_line(colour = "grey"),
axis.title.y = element_text(size = 14, face="bold"),
#legend.title = element_blank(),
legend.position="bottom",
legend.key.width = unit(1.3, "cm"))
<-
cgla_d_jfmggplot() +
geom_sf(data = int_deep %>% filter(scientificname == "Calanus glacialis", season == 1), aes(fill = n))+
geom_sf(data = Arctic_sf, fill = "light grey")+
scale_fill_viridis(option = "turbo", direction = 1,limits = c(0,200), breaks = c(0,50,100,150,200))+
ggtitle("C. glacialis")+
#labs(y= "Jan-Feb-Mar\n") +
theme(
panel.background = element_blank(),
panel.border = element_rect(colour = "grey", fill = NA),
panel.grid = element_line(colour = "grey"),
axis.line = element_line(colour = "grey"),
axis.title.y = element_text(size = 14, face= "bold"),
legend.title = element_blank(),
plot.title = element_text(size = 14, face = "bold.italic",hjust = 0.5),
legend.position="none",
legend.key.width = unit(1.3, "cm"))
<-
cgla_d_amjggplot() +
geom_sf(data = int_deep %>% filter(scientificname == "Calanus glacialis", season == 2), aes(fill = n))+
geom_sf(data = Arctic_sf, fill = "light grey")+
scale_fill_viridis(option = "turbo", direction = 1,limits = c(0,200), breaks = c(0,50,100,150,200))+
#labs(y= "Apr-May-Jun\n") +
theme(
panel.background = element_blank(),
panel.border = element_rect(colour = "grey", fill = NA),
panel.grid = element_line(colour = "grey"),
axis.line = element_line(colour = "grey"),
axis.title.y = element_text(size = 14, face="bold"),
legend.title = element_blank(),
plot.title = element_text(size = 14, face = "bold"),
legend.position="none",
legend.key.width = unit(1.3, "cm"))
<-
cgla_d_jasggplot() +
geom_sf(data = int_deep %>% filter(scientificname == "Calanus glacialis", season == 3), aes(fill = n))+
geom_sf(data = Arctic_sf, fill = "light grey")+
scale_fill_viridis(option = "turbo", direction = 1,limits =c(0,200), breaks = c(0,50,100,150,200))+
#labs(y= "Jul-Aug-Sep\n") +
theme(
panel.background = element_blank(),
panel.border = element_rect(colour = "grey", fill = NA),
panel.grid = element_line(colour = "grey"),
axis.line = element_line(colour = "grey"),
axis.title.y = element_text(size = 14, face="bold"),
legend.title = element_blank(),
plot.title = element_text(size = 14, face = "bold"),
legend.position="none",
legend.key.width = unit(1.3, "cm"))
<-
cgla_d_ondggplot() +
geom_sf(data = int_deep %>% filter(scientificname == "Calanus glacialis", season == 4), aes(fill = n))+
geom_sf(data = Arctic_sf, fill = "light grey")+
labs(fill='occurrence record count')+
scale_fill_viridis(option = "turbo", direction = 1,limits = c(0,200), breaks = c(0,50,100,150,200),
guide = guide_colorbar (title.position = "top",title.hjust = 0.5))+
#labs(y= "Oct-Nov-Dec\n") +
theme(
panel.background = element_blank(),
panel.border = element_rect(colour = "grey", fill = NA),
panel.grid = element_line(colour = "grey"),
axis.line = element_line(colour = "grey"),
axis.title.y = element_text(size = 14, face="bold"),
#legend.title = element_blank(),
legend.position="bottom",
legend.key.width = unit(1.3, "cm"))
<-
chyp_d_jfmggplot() +
geom_sf(data = int_deep %>% filter(scientificname == "Calanus hyperboreus", season == 1), aes(fill = n))+
geom_sf(data = Arctic_sf, fill = "light grey")+
scale_fill_viridis(option = "turbo", direction = 1,limits =c(0,200), breaks = c(0,50,100,150,200))+
ggtitle("C. hyperboreus")+
#labs(y= "Jan-Feb-Mar\n") +
theme(
panel.background = element_blank(),
panel.border = element_rect(colour = "grey", fill = NA),
panel.grid = element_line(colour = "grey"),
axis.line = element_line(colour = "grey"),
axis.title.y = element_text(size = 14, face= "bold"),
legend.title = element_blank(),
plot.title = element_text(size = 14, face = "bold.italic",hjust = 0.5),
legend.position="none",
legend.key.width = unit(1.3, "cm"))
<-
chyp_d_amjggplot() +
geom_sf(data = int_deep %>% filter(scientificname == "Calanus hyperboreus", season == 2), aes(fill = n))+
geom_sf(data = Arctic_sf, fill = "light grey")+
scale_fill_viridis(option = "turbo", direction = 1,limits = c(0,200), breaks = c(0,50,100,150,200))+
#labs(y= "Apr-May-Jun\n") +
theme(
panel.background = element_blank(),
panel.border = element_rect(colour = "grey", fill = NA),
panel.grid = element_line(colour = "grey"),
axis.line = element_line(colour = "grey"),
axis.title.y = element_text(size = 14, face="bold"),
legend.title = element_blank(),
plot.title = element_text(size = 14, face = "bold"),
legend.position="none",
legend.key.width = unit(1.3, "cm"))
<-
chyp_d_jasggplot() +
geom_sf(data = int_deep %>% filter(scientificname == "Calanus hyperboreus", season == 3), aes(fill = n))+
geom_sf(data = Arctic_sf, fill = "light grey")+
scale_fill_viridis(option = "turbo", direction = 1,limits = c(0,200), breaks = c(0,50,100,150,200))+
#labs(y= "Jul-Aug-Sep\n") +
theme(
panel.background = element_blank(),
panel.border = element_rect(colour = "grey", fill = NA),
panel.grid = element_line(colour = "grey"),
axis.line = element_line(colour = "grey"),
axis.title.y = element_text(size = 14, face="bold"),
legend.title = element_blank(),
plot.title = element_text(size = 14, face = "bold"),
legend.position="none",
legend.key.width = unit(1.3, "cm"))
<-
chyp_d_ondggplot() +
geom_sf(data = int_deep %>% filter(scientificname == "Calanus hyperboreus", season == 4), aes(fill = n))+
geom_sf(data = Arctic_sf, fill = "light grey")+
labs(fill='occurrence record count')+
scale_fill_viridis(option = "turbo", direction = 1,limits = c(0,200), breaks = c(0,50,100,150,200),
guide = guide_colorbar (title.position = "top",title.hjust = 0.5))+
#labs(y= "Oct-Nov-Dec\n") +
theme(
panel.background = element_blank(),
panel.border = element_rect(colour = "grey", fill = NA),
panel.grid = element_line(colour = "grey"),
axis.line = element_line(colour = "grey"),
axis.title.y = element_text(size = 14, face="bold"),
#legend.title = element_blank(),
legend.position="bottom",
legend.key.width = unit(1.3, "cm"))
<- (cfin_d_jfm | cgla_d_jfm | chyp_d_jfm) / (cfin_d_amj | cgla_d_amj | chyp_d_amj) / (cfin_d_jas | cgla_d_jas | chyp_d_jas)/ (cfin_d_ond | cgla_d_ond | chyp_d_ond)
g
#save figure to a subfolder "output_figs"
ggsave(file = here::here("output_figs","fig4.png"), g, device= "png", dpi = 500, width=12, height=15)
dev.set(dev.next())
rm(g,cfin_d_jfm,cgla_d_jfm,chyp_d_jfm,cfin_d_amj,cgla_d_amj,chyp_d_amj,cfin_d_jas,cgla_d_jas,chyp_d_jas,cfin_d_ond,cgla_d_ond,chyp_d_ond)
rm(int_deep,int_deep_result, Arctic_sf,region_poly)
gc()
::include_graphics(here::here("output_figs", "fig4.png")) knitr
#remove any points with no month of collection information
<- dat_nodup_shallow[-which(is.na(dat_nodup_shallow$month)), ]
dat_nodup_shallow
#plots
<-
month_gggplot(dat_nodup_shallow, aes(x=as.factor(month), fill=scientificname)) +
geom_bar(colour="grey60") +
labs(y = "Number of records", x= "") +
scale_fill_viridis(discrete = T, option = "rocket", direction = 1,limits = c("Calanus finmarchicus", "Calanus glacialis", "Calanus hyperboreus"), labels=c("C. finmarchicus","C. glacialis","C.hyperboreus"))+
scale_x_discrete(breaks = c(1,2,3,4,5,6,7,8,9,10,11,12),
labels = c("Jan", "Feb", "Mar","Apr", "May", "Jun", "Jul", "Aug", "Sep", "Oct", "Nov", "Dec"))+
theme(
panel.background = element_blank(),
panel.border = element_rect(colour = "grey", fill = NA),
axis.line = element_line(colour = "grey"),
axis.title.x = element_text(size = 14),
axis.title.y = element_text(size = 14),
axis.text.x = element_text(size = 12),
axis.text.y = element_text(size = 12),
legend.title = element_blank(),
legend.text = element_text(size = 12, face = "italic"),
legend.position="none")
<-
month_dggplot(dat_nodup_deep, aes(x=as.factor(month), fill=scientificname)) +
geom_bar(colour="grey60") +
labs(y = "Number of records", x= "Month Collected") +
scale_fill_viridis(discrete = T, option = "rocket", direction = 1,limits = c("Calanus finmarchicus", "Calanus glacialis", "Calanus hyperboreus"), labels=c("C. finmarchicus","C. glacialis","C.hyperboreus"))+
scale_x_discrete(breaks = c(1,2,3,4,5,6,7,8,9,10,11,12),
labels = c("Jan", "Feb", "Mar","Apr", "May", "Jun", "Jul", "Aug", "Sep", "Oct", "Nov", "Dec"))+
theme(
panel.background = element_blank(),
panel.border = element_rect(colour = "grey", fill = NA),
axis.line = element_line(colour = "grey"),
axis.title.x = element_text(size = 14),
axis.title.y = element_text(size = 14),
axis.text.x = element_text(size = 12),
axis.text.y = element_text(size = 12),
legend.title = element_blank(),
legend.text = element_text(size = 12, face = "italic"),
legend.position="bottom")
#combine plots in panels
<-month_g/month_d +
g2plot_annotation(tag_levels = 'a') &
theme(plot.tag = element_text(face = 'bold'))
#save figure to a subfolder "output_figs"
ggsave(file = here::here("output_figs", "fig5.png"), g2, scale=2, device= "png", dpi = 500, width=5, height=6)
dev.set(dev.next())
rm(g2, month_d, month_g)
::include_graphics(here::here("output_figs", "fig5.png")) knitr
#create shallow summer only dataset, removing any records with depth innaccuracy
<- dat_nodup_shallow_sub %>% filter(depth_flag == 0)
dat_nodup_shallow_sub_summer <- dat_nodup_shallow_sub_summer %>% filter(month >= 4 & month <=7)
dat_nodup_shallow_sub_summer
#plot summer shallow data
<-
temp_gggplot(dat_nodup_shallow_sub_summer, aes(x=temp, col=scientificname, fill = scientificname)) +
geom_density(lwd = 1.2, alpha=0.3, bw= 1.3)+
labs(x = "", y= "Density") +
xlim(-5,28)+
scale_fill_viridis(begin = 0, discrete = T, option = "rocket", direction = 1,limits = c("Calanus finmarchicus", "Calanus glacialis", "Calanus hyperboreus"), labels=c("C. finmarchicus", "C. glacialis", "C. hyperboreus"))+
scale_color_viridis(begin = 0, discrete = T, option = "rocket", direction = 1,limits = c("Calanus finmarchicus", "Calanus glacialis", "Calanus hyperboreus"), labels=c("C. finmarchicus", "C. glacialis", "C. hyperboreus"))+
theme(
panel.background = element_blank(),
panel.border = element_rect(colour = "grey", fill = NA),
axis.line = element_line(colour = "grey"),
axis.title.x = element_text(size = 14),
axis.title.y = element_text(size = 14),
axis.text.x = element_text(size = 12),
axis.text.y = element_text(size = 12),
legend.title = element_blank(),
legend.text = element_text(size = 12, face = "italic"),
legend.position="none") #bottom
#create deep winter only dataset
<- dat_nodup_deep_sub %>% filter(depth_flag == 0)
dat_nodup_deep_sub_winter <- dat_nodup_deep_sub_winter %>% filter(month <= 3 | month >=8)
dat_nodup_deep_sub_winter
#plot deep winter data
<-
temp_dggplot(dat_nodup_deep_sub_winter, aes(x=temp, col=scientificname, fill = scientificname)) +
geom_density(lwd = 1.2, alpha=0.3, bw= 0.5)+
labs(x = "Average seasonal\n temperature (°C)", y= "Density") +
xlim(-5,28)+
scale_fill_viridis(begin = 0, discrete = T, option = "rocket", direction = 1,limits = c("Calanus finmarchicus", "Calanus glacialis", "Calanus hyperboreus"), labels=c("C. finmarchicus", "C. glacialis", "C. hyperboreus"))+
scale_color_viridis(begin = 0, discrete = T, option = "rocket", direction = 1,limits = c("Calanus finmarchicus", "Calanus glacialis", "Calanus hyperboreus"), labels=c("C. finmarchicus", "C. glacialis", "C. hyperboreus"))+
theme(
panel.background = element_blank(),
panel.border = element_rect(colour = "grey", fill = NA),
axis.line = element_line(colour = "grey"),
axis.title.x = element_text(size = 14),
axis.title.y = element_text(size = 14),
axis.text.x = element_text(size = 12),
axis.text.y = element_text(size = 12),
legend.title = element_blank(),
legend.text = element_text(size = 12, face = "italic"),
legend.position=c(.8,.80))
#create north of 66oN dataset
<- filter(dat_withice, dat_withice$decimallatitude >66)
dat_withice_north #tapply(datwithice$seaicecon, datwithice$scientificname, summary)
#summary(datwithice$seaicecon)
#plot north data
<-ggplot(dat_withice_north, aes(x=seaicecon, col=scientificname, fill = scientificname)) +
ice_allnorthgeom_histogram(colour = "grey60", binwidth = 10, position = "stack")+
geom_vline(aes(xintercept=6.6),color="#03051a", linetype="dashed", size=1)+ ##03051a
geom_vline(aes(xintercept=22),color="#cb1b4f", linetype="dashed", size=1)+ ##cb1b4f
geom_vline(aes(xintercept=25),color="#e8d9d0", linetype="dashed", size=1)+ ##e1d4c7
labs(x = "Average seasonal\n sea ice concentration (%)", y = "Number of records (latitude >66°N)") +
scale_colour_manual(limits = c("Calanus finmarchicus", "Calanus glacialis", "Calanus hyperboreus"), labels=c("C. finmarchicus", "C. glacialis", "C. hyperboreus"),values = c("#03051a","#cb1b4f","#faebdd"))+
scale_fill_manual(limits = c("Calanus finmarchicus", "Calanus glacialis", "Calanus hyperboreus"), labels=c("C. finmarchicus", "C. glacialis", "C. hyperboreus"),values = c("#03051a","#cb1b4f","#faebdd"))+
theme(
panel.background = element_blank(),
panel.border = element_rect(colour = "grey", fill = NA),
axis.line = element_line(colour = "grey"),
axis.title.x = element_text(size = 14),
axis.title.y = element_text(size = 14),
axis.text.x = element_text(size = 12),
axis.text.y = element_text(size = 12),
legend.title = element_blank(),
legend.text = element_text(size = 12, face = "italic"))
#ggplot_build(ice_allnorth)$data
#combine in mutltiple panels
<-(temp_g/temp_d | ice_allnorth) +
p2plot_annotation(tag_levels = 'a') &
theme(plot.tag = element_text(face = 'bold'))
#save figure to a subfolder "output_figs"
ggsave(file = here::here("output_figs","fig6.png"), p2, scale=2, device= "png", dpi = 500, width=7, height=3.5)
dev.off()
rm(dat_nodup_shallow_sub_summer,ice_allnorth, temp_d,temp_g,p2,dat_withice_north)
gc()
::include_graphics(here::here("output_figs", "fig6.png")) knitr
<-
bathy_dggplot(dat_nodup_deep_sub_winter, aes(y=bathy, x=as.factor(scientificname), fill=scientificname)) +
geom_boxplot() +
labs(y = "Bathymetry depth (m)",x="") +
scale_fill_viridis(begin = 0, discrete = T, option = "rocket", direction = 1,limits = c("Calanus finmarchicus", "Calanus glacialis", "Calanus hyperboreus"), labels=c("C. finmarchicus", "C. glacialis", "C. hyperboreus"))+
scale_x_discrete(labels=c("Calanus finmarchicus" = "C.\n finmarchicus","Calanus glacialis" = "C.\n glacialis", "Calanus hyperboreus" = "C.\n hyperboreus"))+
scale_y_continuous(limits= c(-4500,0))+
theme(
panel.background = element_blank(),
panel.border = element_rect(colour = "grey", fill = NA),
axis.line = element_line(colour = "grey"),
axis.title.x = element_text(size = 14),
axis.title.y = element_text(size = 14),
axis.text.x = element_text(size = 12, face = "italic"),
axis.text.y = element_text(size = 12),
legend.title = element_blank(),
legend.text = element_text(size = 12, face = "italic"),
legend.position= "right")
#ggplot_build(bathy_d)$data
<-
avdepth_dggplot(dat_nodup_deep_sub_winter, aes(y=av_depth, x=as.factor(scientificname), fill=scientificname)) +
geom_boxplot() +
labs(y = "Average collection depth (m)", x="") +
scale_fill_viridis(begin = 0, discrete = T, option = "rocket", direction = 1,limits = c("Calanus finmarchicus", "Calanus glacialis", "Calanus hyperboreus"), labels=c("C. finmarchicus", "C. glacialis", "C. hyperboreus"))+
scale_x_discrete(labels=c("Calanus finmarchicus" = "C.\n finmarchicus","Calanus glacialis" = "C.\n glacialis", "Calanus hyperboreus" = "C.\n hyperboreus"))+
scale_y_continuous(limits= c(-4500,0))+
theme(
panel.background = element_blank(),
panel.border = element_rect(colour = "grey", fill = NA),
axis.line = element_line(colour = "grey"),
axis.title.x = element_text(size = 14),
axis.title.y = element_text(size = 14),
axis.text.x = element_text(size = 12, face = "italic"),
axis.text.y = element_text(size = 12),
legend.title = element_blank(),
legend.text = element_text(size = 12, face = "italic"),
legend.position= "right")
#ggplot_build(avdepth_d)$data
<-(bathy_d | avdepth_d) +
p2plot_layout(guides = 'collect') +
plot_annotation(tag_levels = 'a') &
theme(plot.tag = element_text(face = 'bold'))
#save figure to a subfolder "output_figs"
ggsave(file = here::here("output_figs","fig7.png"), p2, scale=2, device= "png", dpi = 500, width=6, height=3)
dev.off()
rm(p2, bathy_d, avdepth_d)
::include_graphics(here::here("output_figs", "fig7.png")) knitr
First we look at the summary statistics for the bathymetric depth of sampling during deep winter for all three species
#get summary stats
tapply(dat_nodup_deep_sub_winter$bathy, dat_nodup_deep_sub_winter$scientificname, summary)
$`Calanus finmarchicus`
Min. 1st Qu. Median Mean 3rd Qu. Max.
-4266.6 -2730.4 -2557.2 -2350.9 -2019.8 -637.7
$`Calanus glacialis`
Min. 1st Qu. Median Mean 3rd Qu. Max.
-4266.6 -3475.2 -2666.6 -2575.2 -1740.4 -921.3
$`Calanus hyperboreus`
Min. 1st Qu. Median Mean 3rd Qu. Max.
-4266.6 -2789.0 -2557.2 -2595.7 -2237.9 -921.3
We can use a Levene’s test for homogeneity of variance and a Shapiro-Wilk test of normality to check what statistical test would be appropriate
#tests
leveneTest(bathy ~ scientificname, data = dat_nodup_deep_sub_winter)
Levene's Test for Homogeneity of Variance (center = median)
Df F value Pr(>F)
group 2 15.689 2.086e-07 ***
782
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
shapiro.test(dat_nodup_deep_sub_winter$bathy)
Shapiro-Wilk normality test
data: dat_nodup_deep_sub_winter$bathy
W = 0.97288, p-value = 6.773e-11
Significant results indicates that a non-parametric test should used to test for differences between species and bathymetric depth.
#non-parametric test
kruskal.test(bathy ~ scientificname, data = dat_nodup_deep_sub_winter)
Kruskal-Wallis rank sum test
data: bathy by scientificname
Kruskal-Wallis chi-squared = 7.0532, df = 2, p-value = 0.0294
A post-hoc test of pairwise comparisons is also applied
#post-hoc test of pairwise comparisons
pairwise.wilcox.test(dat_nodup_deep_sub_winter$bathy, as.factor(dat_nodup_deep_sub_winter$scientificname))
Pairwise comparisons using Wilcoxon rank sum test with continuity correction
data: dat_nodup_deep_sub_winter$bathy and as.factor(dat_nodup_deep_sub_winter$scientificname)
Calanus finmarchicus Calanus glacialis
Calanus glacialis 0.28 -
Calanus hyperboreus 0.05 0.84
P value adjustment method: holm
Second, we look at the summary statistics for the collection depth of sampling during deep winter for all three species
#get summary stats
tapply(dat_nodup_deep_sub_winter$av_depth,dat_nodup_deep_sub_winter$scientificname, summary)
$`Calanus finmarchicus`
Min. 1st Qu. Median Mean 3rd Qu. Max.
-3950.0 -750.0 -481.5 -720.1 -442.8 -405.0
$`Calanus glacialis`
Min. 1st Qu. Median Mean 3rd Qu. Max.
-2500 -1500 -755 -1282 -750 -700
$`Calanus hyperboreus`
Min. 1st Qu. Median Mean 3rd Qu. Max.
-3950.0 -1744.4 -1407.2 -1400.4 -945.5 -480.0
We can use a Levene’s test of homogeneity of variance and a Shapiro-Wilk test of normality to check what statistical test would be appropriate
#tests
leveneTest(av_depth ~ scientificname, data = dat_nodup_deep_sub_winter)
Levene's Test for Homogeneity of Variance (center = median)
Df F value Pr(>F)
group 2 14.734 5.234e-07 ***
782
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
shapiro.test(dat_nodup_deep_sub_winter$av_depth)
Shapiro-Wilk normality test
data: dat_nodup_deep_sub_winter$av_depth
W = 0.80528, p-value < 2.2e-16
Significant results indicates that a non-parametric test should used to test for differences between species and bathymetric depth.
#non-parametric test
kruskal.test(av_depth ~ scientificname, data = dat_nodup_deep_sub_winter)
Kruskal-Wallis rank sum test
data: av_depth by scientificname
Kruskal-Wallis chi-squared = 254.94, df = 2, p-value < 2.2e-16
A post-hoc test of pairwise comparisons is also applied
#post-hoc test of pairwise comparisons
pairwise.wilcox.test(dat_nodup_deep_sub_winter$av_depth, as.factor(dat_nodup_deep_sub_winter$scientificname))
Pairwise comparisons using Wilcoxon rank sum test with continuity correction
data: dat_nodup_deep_sub_winter$av_depth and as.factor(dat_nodup_deep_sub_winter$scientificname)
Calanus finmarchicus Calanus glacialis
Calanus glacialis 2.1e-13 -
Calanus hyperboreus < 2e-16 0.071
P value adjustment method: holm
sessionInfo()
R version 4.2.1 (2022-06-23 ucrt)
Platform: x86_64-w64-mingw32/x64 (64-bit)
Running under: Windows 10 x64 (build 19043)
Matrix products: default
locale:
[1] LC_COLLATE=English_United Kingdom.utf8
[2] LC_CTYPE=English_United Kingdom.utf8
[3] LC_MONETARY=English_United Kingdom.utf8
[4] LC_NUMERIC=C
[5] LC_TIME=English_United Kingdom.utf8
attached base packages:
[1] stats graphics grDevices utils datasets methods base
other attached packages:
[1] car_3.1-1 carData_3.0-5 spatialEco_2.0-0
[4] viridis_0.6.2 viridisLite_0.4.0 raster_3.5-29
[7] sp_1.5-0 rnaturalearth_0.1.0 rnaturalearthhires_0.2.0
[10] data.table_1.14.2 patchwork_1.1.2 tidync_0.2.4
[13] sf_1.0-8 egg_0.4.5 gridExtra_2.3
[16] ggspatial_1.1.6 forcats_0.5.2 purrr_1.0.1
[19] readr_2.1.2 tibble_3.1.8 ggplot2_3.4.0
[22] tidyverse_1.3.2 tidyr_1.3.0 stringr_1.5.0
[25] dplyr_1.1.0 plyr_1.8.7 here_1.0.1
loaded via a namespace (and not attached):
[1] fs_1.5.2 lubridate_1.8.0 httr_1.4.4
[4] rprojroot_2.0.3 tools_4.2.1 backports_1.4.1
[7] rgdal_1.5-32 utf8_1.2.2 R6_2.5.1
[10] KernSmooth_2.23-20 DBI_1.1.3 colorspace_2.0-3
[13] withr_2.5.0 rnaturalearthdata_0.1.0 tidyselect_1.2.0
[16] compiler_4.2.1 textshaping_0.3.6 cli_3.6.0
[19] rvest_1.0.3 RNetCDF_2.6-1 xml2_1.3.3
[22] labeling_0.4.2 scales_1.2.1 classInt_0.4-7
[25] proxy_0.4-27 systemfonts_1.0.4 digest_0.6.29
[28] rmarkdown_2.15 pkgconfig_2.0.3 htmltools_0.5.3
[31] dbplyr_2.3.0 fastmap_1.1.0 htmlwidgets_1.5.4
[34] rlang_1.0.6 readxl_1.4.1 rstudioapi_0.13
[37] farver_2.1.1 generics_0.1.3 jsonlite_1.8.0
[40] googlesheets4_1.0.1 magrittr_2.0.3 ncmeta_0.3.0
[43] s2_1.1.0 Rcpp_1.0.9 munsell_0.5.0
[46] fansi_1.0.3 abind_1.4-5 lifecycle_1.0.3
[49] terra_1.6-7 stringi_1.7.8 yaml_2.3.5
[52] grid_4.2.1 crayon_1.5.1 lattice_0.20-45
[55] haven_2.5.0 hms_1.1.2 knitr_1.39
[58] pillar_1.8.1 codetools_0.2-18 wk_0.6.0
[61] reprex_2.0.2 glue_1.6.2 evaluate_0.16
[64] modelr_0.1.9 png_0.1-7 vctrs_0.5.2
[67] tzdb_0.3.0 cellranger_1.1.0 gtable_0.3.0
[70] assertthat_0.2.1 xfun_0.32 broom_1.0.0
[73] e1071_1.7-11 ragg_1.2.4 class_7.3-20
[76] ncdf4_1.19 googledrive_2.0.0 gargle_1.2.0
[79] units_0.8-0 ellipsis_0.3.2
rm(list = ls())