Analysis of Calanus occurrence data from GBIF and OBIS

Annotated source code needed to reproduce the analysis and figures from Freer and Tarling (2023) Front. Mar. Sci. doi: 10.3389/fmars.2023.908112

Author

JJ Freer

Show code
knitr::opts_chunk$set(echo = TRUE, cache = FALSE, message = FALSE, warning = FALSE, error = TRUE, comment = "")
options(tidyverse.quiet = TRUE)
options(dplyr.summarise.inform = FALSE)
options(warn=-1)

1. Load packages

Load packages to be used throughout

Show code
library(here)
library(plyr)
library(dplyr)
library(stringr)
library(tidyr)
library(tidyverse)
library(ggplot2)
library(ggspatial)
library(egg)
library(sf)
library(tidync)
library(forcats)
library(patchwork)
library(data.table)
library(rnaturalearthhires)
library(rnaturalearth)
library(raster)
library(sp)
library(viridis)
library(spatialEco)
library(car)

2. Load functions

Load any custom functions that will be used

Show code
point.in.poly <- function(x, y, sp = TRUE, duplicate = TRUE, ...) {
  if(!any(class(x)[1] == c("SpatialPoints", "SpatialPointsDataFrame", "sf"))) {
    stop("x is not a suitable point feature object class") }
  if(!any(class(y)[1] == c("SpatialPolygons", "SpatialPolygonsDataFrame", "sf"))) {
    stop("y is not a suitable polygon feature object class") }
  if(any(class(x) == "sfc")) { x <- sf::st_sf(x) }  
  if(duplicate == FALSE) {
    if(!any(class(x)[1] == c("SpatialPoints", "SpatialPointsDataFrame"))) {
      x <- methods::as(x, "Spatial")
      if(dim(x@data)[2] == 0) stop("There are no attributes associated with points")
    }   
    if(!any(class(y)[1] == c("SpatialPolygons", "SpatialPolygonsDataFrame"))) {
      y <- methods::as(y, "Spatial")
      if(dim(y@data)[2] == 0) stop("There are no attributes associated with polygons")
    }
    o <- sp::over(x, y, returnList = TRUE)
    m <- max(unlist(lapply(o, nrow)))
    ids <- row.names(y)
    xy <- data.frame(t(sapply(1:length(o), 
                              function(i) c(ids[i], c(o[[i]][,1], rep(NA, m))[1:m])
    )))
    colnames(xy) <- c("p",paste0("pid", 1:m))
    x@data <- data.frame(x@data, xy)
    if( sp == FALSE ) sf::st_as_sf(x) 
    return( x )
  } else {
    if(any( class(x) == c("SpatialPoints", "SpatialPointsDataFrame"))) {
      x <- sf::st_as_sf(x)
    }
    if(any(class(y) == "sfc")) { x <- sf::st_sf(y) }
    if(any( class(y) == c("SpatialPolygons", "SpatialPolygonsDataFrame"))) {
      y <- sf::st_as_sf(y)  
    } 
    if(dim(x)[2] == 1) x$pt.ids <- 1:nrow(x)    
    if(dim(y)[2] == 1) y$poly.ids <- 1:nrow(y)    
    o <- sf::st_join(x, y, ...)
    if( sp ) o <- methods::as(o, "Spatial")
    # if( sp ) o <- sf::as_Spatial(o)
    return( o ) 
  } 
}

3. Load and clean data from OBIS

3.1. Merge, clean and filter records

The folders containing the raw occurrence data from OBIS were downloaded (using links below) and stored locally within the folder “input_data”.

C. glacialis OBIS download

C hyperboreus OBIS download

C. finmarchicus OBIS download

The occurrence files for the three species are merged into one dataframe and various cleaning and quality control steps are applied.

Show code
here::i_am("code_frontiers_2023.qmd")

# read in raw obis files 
cfin<-read.csv("input_data/e114c56d-7073-4957-9af3-1b92f2e3a85b/Occurrence.csv", sep = ',', header = T)
chyp<-read.csv("input_data/ebdfc962-d62d-48eb-9095-bd49cc177b6a/Occurrence.csv", sep = ',', header = T)
cgla<-read.csv("input_data/e55e5e2d-3817-4429-a54f-1d3356c1c396/Occurrence.csv", sep = ',', header = T)

#merge files of each species
dat_merge<-rbind.fill(cfin,cgla,chyp)

#remove large redundant files
rm(cfin,cgla,chyp)

#new lat and lon cols with 2 dp
dat_merge$decimallongitude2<-round(dat_merge$decimallongitude, digits = 2)
dat_merge$decimallatitude2<-round(dat_merge$decimallatitude, digits = 2)

#remove records without species name
dat_merge <- dat_merge[-which(dat_merge$scientificname == ""), ]  

#format date cells, return as numeric
dat_merge$eventdate<-as.Date(dat_merge$eventdate)
dat_merge$month<- strftime(dat_merge$eventdate, "%m")
dat_merge$day<- strftime(dat_merge$eventdate, "%d")
dat_merge$year<- strftime(dat_merge$eventdate, "%Y")

dat_merge$month<- as.numeric(dat_merge$month)
dat_merge$day<- as.numeric(dat_merge$day)
dat_merge$year<- as.numeric(dat_merge$year)

#remove records in southern hemisphere
dat_merge<-filter(dat_merge, decimallatitude2 >0)

#read in shapefiles for land and pacific sector
world <- ne_countries(scale = "large", returnclass = "sf") # country polygons
pacpoly1<-read_sf(dsn = here::here("input_data"), layer = "PACPOLY1") #pacific polygon 1
pacpoly2<-read_sf(dsn = here::here("input_data"), layer = "PACPOLY2") #pacific polygon 2

#make obis dataset an sf object
dat_merge<-dat_merge %>% 
  st_as_sf(coords = c("decimallongitude", "decimallatitude"), remove=F) %>%
  st_cast("MULTIPOINT") %>%
  st_set_crs(4326)

#remove records outside each shapefile
outside <- sapply(st_intersects(dat_merge, world),function(x){length(x)==0})
dat_merge <- dat_merge[outside,]
rm(world,outside)

outsidepacpoly1 <- sapply(st_intersects(dat_merge, pacpoly1),function(x){length(x)==0})
dat_merge <- dat_merge[outsidepacpoly1,]
rm(pacpoly1,outsidepacpoly1)

outsidepacpoly2 <- sapply(st_intersects(dat_merge, pacpoly2),function(x){length(x)==0})
dat_merge <- dat_merge[outsidepacpoly2,]
rm(pacpoly2,outsidepacpoly2)

3.2. Format metadata columns

Metadata fields of interest are cleaned and formatted to condense inputs into meaningful and usable categories

Show code
#list all levels within lifestage column
dat_merge$lifestage<-as.character(dat_merge$lifestage)
levels(as.factor(dat_merge$lifestage))

#re-name lifestage levels

#if data are from CPR, specify CV-CVI lifestage
dat_merge$lifestage[dat_merge$collectioncode == "CPR"] <- "CV-CVI" 

# use information from "sex" column to specify missing lifestage information
levels(as.factor(dat_merge$sex)) 
dat_merge$lifestage[dat_merge$sex == "F"] <- "CVI_F"  
dat_merge$lifestage[dat_merge$sex == "Female"] <- "CVI_F" 
dat_merge$lifestage[dat_merge$sex == "female"] <- "CVI_F"  
dat_merge$lifestage[dat_merge$sex == "female;;"] <- "CVI_F"  
dat_merge$lifestage[dat_merge$sex == "females"] <- "CVI_F"
dat_merge$lifestage[dat_merge$sex == "females only"] <- "CVI_F"
dat_merge$lifestage[dat_merge$sex == "females present"] <- "CVI_F"  
dat_merge$lifestage[dat_merge$sex == "M"] <- "CVI_M"  
dat_merge$lifestage[dat_merge$sex == "male"] <- "CVI_M" 
dat_merge$lifestage[dat_merge$sex == "Male"] <- "CVI_M"  
dat_merge$lifestage[dat_merge$sex == "MALE"] <- "CVI_M"
dat_merge$lifestage[dat_merge$sex == "male;;"] <- "CVI_M"  
dat_merge$lifestage[dat_merge$sex == "female; male"] <- "CVI"  
dat_merge$lifestage[dat_merge$sex == "male; female"] <- "CVI"  
dat_merge$lifestage[dat_merge$sex == "males and females"] <- "CVI" 
dat_merge$lifestage[dat_merge$sex == "M,F"] <- "CVI" 
dat_merge$lifestage[dat_merge$sex == "U"] <- "CVI" 

# group all entries that are unknown
dat_merge$lifestage[dat_merge$lifestage == "Not specified"] <- "Unknown"  
dat_merge$lifestage[dat_merge$lifestage == "unassigned"] <- "Unknown"  
dat_merge$lifestage[dat_merge$lifestage == "Unstaged"] <- "Unknown" 
dat_merge$lifestage[dat_merge$lifestage == ""] <- "Unknown" 
dat_merge$lifestage[dat_merge$lifestage == "^$"] <- "Unknown" 
dat_merge$lifestage <- dat_merge$lifestage %>% replace_na('Unknown')

dat_merge$lifestage <- as.factor(dat_merge$lifestage)
#group all entries in to a condensed list of options
levels(dat_merge$lifestage) <- list(CI = c("copepoditeI","copepodite stage I","Copepodite I","copepodite I","C1","C1: COPEPODITE I","CI","C1 (Copepodite I)"), 
                                            CII = c("copepoditeII","copepodite stage II","Copepodite II", "copepodite II","C2","C2: COPEPODITE II","CII","C2 (Copepodite II)"),
                                            CIII = c("copepoditeIII", "copepodite stage III","Copepodite III","copepodite III", "C3","C3: COPEPODITE III","CIII","C3 (Copepodite III)"), 
                                            CIV = c("copepoditeIV","copepodite stage IV","Copepodite IV","C4","C4: COPEPODITE IV","CIV","C4 (Copepodite IV)"),
                                            CV = c("CV","copepoditeV ","copepodite V ","Copepodite V","copepodite V","C5","C5: COPEPODITE V","C5 (Copepodite V)"), 
                                            CVI = c("CVI","copepoditeVI","copepodite VI SPENT","copepodite VI GRAVID","copepodite VI DEVELOPING","copepodite VI", "adult","Adult", "ADULT","adult - no sex (copepodite VI)"),  
                                            CVI_F = c("F","CVI female","ADULT F BEARING SPERMATOPHORES","adult female (copepodite VI)","AF","adult female"),
                                            CVI_M = c("CVI male","adult male (copepodite VI)","AM","adult male"),
                                            CV_CVI = c("CV +","copepodite V-VI","C5-6: COPEPODITE 5 - 6  stages (5-6) were counted in one group", "CV-CVI"),
                                            CI_CV = c("Copepodite IV-V","copepodite IV-V","copepodite I-V","C3-4: COPEPODITE 3 - 4  stages (3-4) were counted in one group","copepodite I-IV","Copepodite I-III", "CIV +", "C1-C3 (Copepodite I - Copepodite III)","C4-C5 (Copepodite IV - Copepodite V)"),
                                            C_unass. = c("COPEPODITE(S) sum of various (unspecified) copepodite stages","copepodite 400-800 um - cyclopoids and harpacticoi","copepodite - no stage assigned","copepodite"),
                                            NI_NVI = c("naupliusVI","naupliusV","naupliusIV","naupliusIII","naupliusII","naupliusI","NAUPLIUS/NAUPLII","Nauplius","nauplius","nauplii V","nauplii 5","nauplii","N6: NAUPLIUS VI         could be metanauplius", "N5: NAUPLIUS V","N5-6: NAUPLII 5 - 6","N4: NAUPLIUS IV","N4-5: NAUPLII 4 - 5","N4","N3: NAUPLIUS III","N3-4: NAUPLII 3 - 4","N3","N2: NAUPLIUS II","N2-3: NAUPLII 2 - 3","N2","N1: NAUPLIUS I","N1","NP (Nauplius)"),
                                            Other = c("POSTLARVAE/SUB-ADULT    (Codes 7+8)","stage IV","VI","Vi","V,VI","V","stage IV","MOLT STAGE 6  (decapods)","MOLT STAGE 5  (decapods)", "MOLT STAGE 3  (decapods)" ,"MOLT STAGE 2  (decapods)","MOLT STAGE 1  (decapods)","macroplankton","leptocephalus","Larvae","larvae","JUVENILE","juvenile","; juvenile","juv","IV","III","II", "I-IV","exuviae","exoskeletons","EPHYRA life stage genus Aurelia", "1","2","3","4","5","6","7"),
                                            Ova = c("ova,","egg? 0.29-0.31mm","EGG/OVA"),
                                            Unknown = "Unknown")

dat_merge$lifestage <- dat_merge$lifestage %>% replace_na('Unknown')

#check new categories
levels(as.factor(dat_merge$lifestage))
summary(dat_merge$lifestage)

#repeat for sampling protocol levels

dat_merge$samplingprotocol<- as.character(dat_merge$samplingprotocol)
levels(as.factor(dat_merge$samplingprotocol))

dat_merge$samplingprotocol[dat_merge$collectioncode == "CPR"] <- "CPR" 
dat_merge$samplingprotocol[dat_merge$collectioncode == "cpr_"] <- "CPR" 
dat_merge$samplingprotocol[dat_merge$collectioncode == "MOC10GB"] <- "MOCNESS"

dat_merge$samplingprotocol[dat_merge$samplingprotocol == "Horizontal tows with the No 5 net"] <- "nonspecific" 
dat_merge$samplingprotocol[dat_merge$samplingprotocol == "Horizontal tow made with a No 5 net"] <- "nonspecific"  
dat_merge$samplingprotocol[dat_merge$samplingprotocol == "T-160 cm"] <- "nonspecific"  
dat_merge$samplingprotocol[dat_merge$samplingprotocol == "T-80 cm"] <- "nonspecific"  
dat_merge$samplingprotocol[dat_merge$samplingprotocol == "No 5 net"] <- "nonspecific" 
dat_merge$samplingprotocol[dat_merge$samplingprotocol == "Plankton vertical tows using No.0 mesh net with mouth diameter of one metre and a length of seventeen feet."] <- "nonspecific"  
dat_merge$samplingprotocol[dat_merge$samplingprotocol == "No 0 metre net"] <- "nonspecific"  
dat_merge$samplingprotocol[dat_merge$samplingprotocol == "GearType=24-mesh to the inch net"] <- "nonspecific"  
dat_merge$samplingprotocol[dat_merge$samplingprotocol == "JNCCMNCR10000129_METH_001"] <- "nonspecific" 
dat_merge$samplingprotocol[dat_merge$samplingprotocol == "MRCCW30000000037_METH_002"] <- "nonspecific" 
dat_merge$samplingprotocol[dat_merge$samplingprotocol == "MRCED00500000002_METH_001"] <- "nonspecific" 
dat_merge$samplingprotocol[dat_merge$samplingprotocol == "MRCED00500000005_METH_001"] <- "nonspecific" 
dat_merge$samplingprotocol[dat_merge$samplingprotocol == "MRCED00500000005_METH_004"] <- "nonspecific" 
dat_merge$samplingprotocol[dat_merge$samplingprotocol == "MRCED00500000005_METH_005"] <- "nonspecific" 
dat_merge$samplingprotocol[dat_merge$samplingprotocol == "Mesh net cast; Mitchell et al. 2002"] <- "nonspecific" 
dat_merge$samplingprotocol[dat_merge$samplingprotocol == "JNCCMNCR10000129_METH_001"] <- "nonspecific" 
dat_merge$samplingprotocol[dat_merge$samplingprotocol == "https://www.bodc.ac.uk/resources/inventories/cruise_inventory/report/11432/"] <- "nonspecific" 
dat_merge$samplingprotocol <- dat_merge$samplingprotocol %>% replace_na('Unknown')
dat_merge$samplingprotocol[dat_merge$samplingprotocol == ""] <- "Unknown" 
dat_merge$samplingprotocol[dat_merge$samplingprotocol == "^$"] <- "Unknown" 

dat_merge$samplingprotocol[grepl("Nansen", dat_merge$occurrenceremarks, ignore.case = T)] <- "Nansen"
dat_merge$samplingprotocol[grepl("Multi", dat_merge$occurrenceremarks, ignore.case = T)] <- "Multinet"
dat_merge$samplingprotocol[grepl("Pump", dat_merge$occurrenceremarks, ignore.case = T)] <- "Water pump"
dat_merge$samplingprotocol[grepl("Juday", dat_merge$occurrenceremarks, ignore.case = T)] <- "Juday"
dat_merge$samplingprotocol[grepl("WP", dat_merge$occurrenceremarks, ignore.case = T)] <- "WPII"
dat_merge$samplingprotocol[grepl("egg", dat_merge$occurrenceremarks, ignore.case = T)] <- "Egg net"
dat_merge$samplingprotocol[grepl("N70V", dat_merge$occurrenceremarks, ignore.case = T)] <- "VCN"
dat_merge$samplingprotocol[grepl("BONGO", dat_merge$catalognumber, ignore.case = T)] <- "BONGO"


dat_merge$samplingprotocol <- as.factor(dat_merge$samplingprotocol)

levels(dat_merge$samplingprotocol) <- list(BIONESS = c("BIONESS 1.0m","BIONESS 0.5m"), 
                                          Bongo = c("Bongo net hauled vertically","BONGO_505UM_MICROSCOPY", "BONGO"),
                                          CPR = "CPR", 
                                          Juday = c("Juday-36  cm" ,"Juday-80  cm","Russeh?v Juday-h?v 37", "Juday"),
                                          MOCNESS = "MOCNESS", 
                                          Ring = c("Ring net hauled vertically","Ring net 0.75m xbow","Ring net 0.75m","Ring net 0.75 o/c","Ring net 0.5m", "TWINRING_150UM_MICROSCOPY"),  
                                          WPII = c("WPII","WP II-56 cm","SamplerTypeCode=WP2; MethodReference=HC-C-C7; MeshSize(um)=90","SamplerTypeCode=WP-2; MethodReference=HC-C-C7; MeshSize(um)=90","SamplerTypeCode=WP-2; MeshSize(um)=90"),
                                          Grab = c("Smith-McIntyre grab sampler (surface area sampled=0.1 m2; mesh=1.0mm2)","Sampling instrument: TOOL0653: Van Veen Grab; Seive mesh size: 1.000mm","Sampling instrument: 50: Sediment Grab; Seive mesh size: 1.000mm"), 
                                          Sledge = "RP-sledge,Subsample method: Decanted - Mesh size (mm): 0.5",
                                          OPC = c("OPC side nets","OPC experimental"), 
                                          Campelen = "Campelen",
                                          Water_pump = "Water pump",
                                          Egg_net = "Egg net",
                                          Nansen = "Nansen",
                                          VCN = "VCN",
                                          Multinet= "Multinet",
                                          Nonspec. = "nonspecific",
                                          Unknown = "Unknown")
                                          
dat_merge$samplingprotocol <- dat_merge$samplingprotocol %>% replace_na('Unknown')

#check new categories
levels(as.factor(dat_merge$samplingprotocol))
summary(dat_merge$samplingprotocol)

3.3. Remove duplicates and save final dataset

Duplicates are removed, the cleaned dataset is trimmed to columns of interest and then saved

Show code
#remove duplicates based on certain fields
dat_nodups<-distinct(dat_merge, scientificname, year, month, day, lifestage, decimallongitude2, decimallatitude2,minimumdepthinmeters,maximumdepthinmeters,.keep_all=T)

#get n of duplicates
nrow(dat_merge) - nrow(dat_nodups)

#remove unnecessary columns
dat_nodups<-dat_nodups[c(1,2,3,4,9,11,12,80,81,82,89,91,95,96,97,99,100,120,121,122,123,124,128,129,130,226,227)]

#save data to subfolder "output_data"
fwrite(dat_nodups,here::here("output_data", "calanus_clean_nodups_obis.csv"))

rm(dat_merge)
gc()

3.4. Plot metadata - Figure S1

We can now plot histogram and barplots of the most interesting metadata fields - year of collection, sampling protocol, and life-stage

Show code
#shorten name for ease
dat<-dat_nodups

#plot of year metadata
yearplot<-ggplot(dat, aes(x=year,fill=scientificname)) + 
  geom_histogram(color="grey60", binwidth = 5)+
  geom_vline(aes(xintercept=1930),color="grey", linetype="dashed", size=1)+
  geom_vline(aes(xintercept=1955),color="grey", linetype="dashed", size=1)+
  geom_vline(aes(xintercept=2005),color="grey", linetype="dashed", size=1)+
  geom_vline(aes(xintercept=1995),color="grey", linetype="dashed", size=1)+
  geom_text(aes(x=1930,y=7000, label="\nCPR began 1931"), colour="black", angle=90) +
  geom_text(aes(x=1955,y=20000, label="\n IPY 1957"), colour="black", angle=90) +
  geom_text(aes(x=2005,y=26000, label="\n IPY 2007"), colour="black", angle=90) +
  geom_text(aes(x=1997,y=28000, label="\n GLOBEC 1996"), colour="black", angle=90) +
  labs(y = "Number of records", x= "Year") +
  scale_x_continuous(breaks=seq(1880, 2020, 20))+
  scale_fill_viridis(discrete = T, option = "rocket", direction = 1,limits = c("Calanus finmarchicus", "Calanus glacialis", "Calanus hyperboreus"), labels=c("C. finmarchicus","C. glacialis","C.hyperboreus"))+
  theme(
    panel.background = element_blank(),
    panel.border = element_rect(colour = "grey", fill = NA),
    axis.line = element_line(colour = "grey"),
    axis.title.x = element_text(size = 14),
    axis.title.y = element_text(size = 14),
    axis.text.x = element_text(size = 12),
    axis.text.y = element_text(size = 12),
    legend.title = element_blank(),
    legend.text = element_text(size = 12, face = "italic"),
    plot.title = element_text(size = 14, face = "bold"),
    legend.position="right") 

#plot of sampling protocol metadata
netplot<-ggplot(data=dat %>% group_by(samplingprotocol) %>% filter(n() >50), aes(x=forcats::fct_infreq(samplingprotocol),fill=scientificname)) + 
  geom_bar(color="grey60", stat="count")+
  labs(y = "Number of records", x= "Sampling protocol") +
  #scale_x_continuous(breaks=seq(1880, 2020, 20))+
  scale_fill_viridis(discrete = T, option = "rocket", direction = 1,limits = c("Calanus finmarchicus", "Calanus glacialis", "Calanus hyperboreus"), labels=c("C. finmarchicus","C. glacialis","C.hyperboreus"))+
  theme(
    panel.background = element_blank(),
    panel.border = element_rect(colour = "grey", fill = NA),
    axis.line = element_line(colour = "grey"),
    axis.title.x = element_text(size = 14),
    axis.title.y = element_text(size = 14),
    axis.text.x = element_text(size = 12),
    axis.text.y = element_text(size = 12),
    legend.title = element_blank(),
    legend.text = element_text(size = 12, face = "italic"),
    plot.title = element_text(size = 14, face = "bold"),
    legend.position="right") 

#plot of lifestage metadata
stageplot<-ggplot(data=dat %>% group_by(lifestage) %>% filter(n() >1000), aes(x=forcats::fct_infreq(lifestage),fill=scientificname)) + 
  geom_bar(color="grey60", stat="count")+
  labs(y = "Number of records", x= "Lifestage") +
  #scale_x_continuous(breaks=seq(1880, 2020, 20))+
  scale_fill_viridis(discrete = T, option = "rocket", direction = 1,limits = c("Calanus finmarchicus", "Calanus glacialis", "Calanus hyperboreus"), labels=c("C. finmarchicus","C. glacialis","C.hyperboreus"))+
  theme(
    panel.background = element_blank(),
    panel.border = element_rect(colour = "grey", fill = NA),
    axis.line = element_line(colour = "grey"),
    axis.title.x = element_text(size = 14),
    axis.title.y = element_text(size = 14),
    axis.text.x = element_text(size = 12),
    axis.text.y = element_text(size = 12),
    legend.title = element_blank(),
    legend.text = element_text(size = 12, face = "italic"),
    plot.title = element_text(size = 14, face = "bold"),
    legend.position=c(.85,.85)) 

#combine plots to multiple panels
plot_combine<- yearplot + netplot / stageplot  +
  plot_layout(guides = 'collect') + plot_annotation(tag_levels = 'a') &
  theme(plot.tag = element_text(face = 'bold'))

#save figure to a subfolder "output_figs"
ggsave(file = here::here("output_figs", "figs1.png"), plot_combine, scale=1.5, device= "png", dpi = 500, width=12, height=5) 
dev.set(dev.next())
rm(plot_combine,netplot,yearplot,stageplot,dat_nodups,dat)
gc()
Show code
knitr::include_graphics(here::here("output_figs", "figs1.png"))

Figure S1. Histograms of cleaned and quality controlled OBIS data, indicating the frequency of species records available via per (a) calendar year (b) sampling method (c) and life-stage. Dashed vertical lines on panel (a) represent years of significant data collection efforts; CPR = Continuous Plankton Recorder, IPY = International Polar Year, GLOBEC = Global Ocean Ecosystems Dynamics. Due to a high number of categories, only those with more than 200 occurrences were included in plots b-c.

4. Load and clean data from GBIF

4.1. Merge, clean and filter records

The folder containing the raw occurrence data from GBIF were downloaded (using link below) and stored locally in the folder “input_data”

Calanus GBIF download

Show code
#read in gbif file from download

gbif<-read.delim("input_data/0246981-220831081235567/occurrence.txt", header=T)

gbif$decimallongitude2<-round(gbif$decimalLongitude, digits = 2)
gbif$decimallatitude2<-round(gbif$decimalLatitude, digits = 2)
gbif$eventDate<-as.Date(gbif$eventDate)
gbif$month<- strftime(gbif$eventDate, "%m")
gbif$day<- strftime(gbif$eventDate, "%d")
gbif$year<- strftime(gbif$eventDate, "%Y")

gbif$month<- as.numeric(gbif$month)
gbif$day<- as.numeric(gbif$day)
gbif$year<- as.numeric(gbif$year)

#remove those in southern hemisphere
dat_merge<-filter(gbif, decimallatitude2 >0)

#read in shapefiles for land and pacific sector
world <- ne_countries(scale = "large", returnclass = "sf") # country polygons
pacpoly1<-read_sf(dsn = here::here("input_data"), layer = "PACPOLY1") #pacific polygon 1
pacpoly2<-read_sf(dsn = here::here("input_data"), layer = "PACPOLY2") #pacific polygon 2

#make gbif dataset an sf object
dat_merge<-dat_merge %>% 
  st_as_sf(coords = c("decimalLongitude", "decimalLatitude"), remove=F) %>%
  st_cast("MULTIPOINT") %>%
  st_set_crs(4326)

#remove records outside each shapefile
outside <- sapply(st_intersects(dat_merge, world),function(x){length(x)==0})
dat_merge <- dat_merge[outside,]
rm(world,outside)

outsidepacpoly1 <- sapply(st_intersects(dat_merge, pacpoly1),function(x){length(x)==0})
dat_merge <- dat_merge[outsidepacpoly1,]
rm(pacpoly1,outsidepacpoly1)

outsidepacpoly2 <- sapply(st_intersects(dat_merge, pacpoly2),function(x){length(x)==0})
dat_merge <- dat_merge[outsidepacpoly2,]
rm(pacpoly2,outsidepacpoly2)

4.2. Format metadata columns

Metadata fields of interest are cleaned and formatted to condense inputs into meaningful and usable categories

Show code
#re-name lifestage levels
dat_merge$lifeStage <- as.character(dat_merge$lifeStage)
levels(as.factor(dat_merge$lifeStage))

dat_merge$lifeStage[dat_merge$collectionCode == "CPR"] <- "CV-CVI" 
dat_merge$lifeStage[dat_merge$datasetName == "CPR"] <- "CV-CVI" 

levels(as.factor(dat_merge$sex))

dat_merge$lifeStage[dat_merge$sex == "FEMALE"] <- "CVI_F"  
dat_merge$lifeStage[dat_merge$sex == "MALE"] <- "CVI_M"

dat_merge$lifeStage[grepl("lifehistory=CI", dat_merge$occurrenceRemarks, ignore.case = T)] <- "CI"
dat_merge$lifeStage[grepl("lifehistory=CII", dat_merge$occurrenceRemarks, ignore.case = T)] <- "CII"
dat_merge$lifeStage[grepl("lifehistory=CIII", dat_merge$occurrenceRemarks, ignore.case = T)] <- "CIII"
dat_merge$lifeStage[grepl("lifehistory=CIV", dat_merge$occurrenceRemarks, ignore.case = T)] <- "CIV"
dat_merge$lifeStage[grepl("lifehistory=CV", dat_merge$occurrenceRemarks, ignore.case = T)] <- "CV"
dat_merge$lifeStage[grepl("lifehistory=CVI", dat_merge$occurrenceRemarks, ignore.case = T)] <- "CVI"
dat_merge$lifeStage[grepl("lifehistory=M", dat_merge$occurrenceRemarks, ignore.case = T)] <- "CVI_M"
dat_merge$lifeStage[grepl("lifehistory=AM", dat_merge$occurrenceRemarks, ignore.case = T)] <- "CVI_M"
dat_merge$lifeStage[grepl("lifehistory=male", dat_merge$occurrenceRemarks, ignore.case = T)] <- "CVI_M"
dat_merge$lifeStage[grepl("lifehistory=F", dat_merge$occurrenceRemarks, ignore.case = T)] <- "CVI_F"
dat_merge$lifeStage[grepl("lifehistory=AF", dat_merge$occurrenceRemarks, ignore.case = T)] <- "CVI_F"
dat_merge$lifeStage[grepl("lifehistory=female", dat_merge$occurrenceRemarks, ignore.case = T)] <- "CVI_F"

dat_merge$lifeStage[dat_merge$lifeStage == "^$"] <- "Unknown"
dat_merge$lifeStage[dat_merge$lifeStage==""] <- "Unknown"
dat_merge$lifeStage <- dat_merge$lifeStage %>% replace_na('Unknown')

dat_merge$lifeStage <- as.factor(dat_merge$lifeStage)

levels(dat_merge$lifeStage) <- list(CI = "CI",
                                    CII = "CII",
                                    CIII = "CIII",
                                    CIV = "CIV",
                                    CV = "CV",
                                    CVI = c("CVI","Adult"),
                                    CVI_F ="CVI_F",
                                    CVI_M = "CVI_M",
                                    CV_CVI = "CV-CVI",
                                    NI_NVI = c("Metanaupilus","Nauplius"),
                                    Other = c("Juvenile","Larva"),
                                    Unknown = c("Unknown"))

dat_merge$lifeStage <- dat_merge$lifeStage %>% replace_na('Unknown')

levels(as.factor(dat_merge$lifeStage))
summary(dat_merge$lifeStage)

#re-name protocol levels

dat_merge$samplingProtocol<- as.factor(dat_merge$samplingProtocol)
levels(dat_merge$samplingProtocol)

dat_merge$samplingProtocol<- as.character(dat_merge$samplingProtocol)

dat_merge$samplingProtocol[dat_merge$collectionCode == "CPR"] <- "CPR" 
dat_merge$samplingProtocol[dat_merge$datasetName == "CPR"] <- "CPR" 

dat_merge$samplingProtocol[dat_merge$samplingProtocol == "Plankton tow net"] <- "nonspecific"  
dat_merge$samplingProtocol[dat_merge$samplingProtocol == "GearType=24-mesh to the inch net"] <- "nonspecific"  
dat_merge$samplingProtocol[dat_merge$samplingProtocol == "MRPMN00100000033_METH_008"] <- "nonspecific" 
dat_merge$samplingProtocol[dat_merge$samplingProtocol == "Mesh net cast; Mitchell et al. 2002"] <- "nonspecific" 
dat_merge$samplingProtocol[dat_merge$samplingProtocol == "https://www.bodc.ac.uk/resources/inventories/cruise_inventory/report/11432/"] <- "nonspecific" 
dat_merge$samplingProtocol <- dat_merge$samplingProtocol %>% replace_na('Unknown')
dat_merge$samplingProtocol[dat_merge$samplingProtocol == ""] <- "Unknown" 
dat_merge$samplingProtocol[dat_merge$samplingProtocol == "^$"] <- "Unknown" 

dat_merge$samplingProtocol[grepl("Nansen", dat_merge$occurrenceRemarks, ignore.case = T)] <- "Nansen"
dat_merge$samplingProtocol[grepl("Multi", dat_merge$occurrenceRemarks, ignore.case = T)] <- "Multinet"
dat_merge$samplingProtocol[grepl("Pump", dat_merge$occurrenceRemarks, ignore.case = T)] <- "Water pump"
dat_merge$samplingProtocol[grepl("Juday", dat_merge$occurrenceRemarks, ignore.case = T)] <- "Juday"
dat_merge$samplingProtocol[grepl("WP", dat_merge$occurrenceRemarks, ignore.case = T)] <- "WPII"
dat_merge$samplingProtocol[grepl("egg", dat_merge$occurrenceRemarks, ignore.case = T)] <- "Egg net"
dat_merge$samplingProtocol[grepl("N70V", dat_merge$occurrenceRemarks, ignore.case = T)] <- "VCN"

dat_merge$samplingProtocol <- as.factor(dat_merge$samplingProtocol)

levels(dat_merge$samplingProtocol) <- list(Bongo = c("BONGO_505UM_MICROSCOPY", "BONGO"),
                                           CPR = "CPR", 
                                           Juday = "Juday",
                                           MOCNESS = "MOCNESS", 
                                           Ring = c("Ring net 0.75m","Ring net 0.5m", "TWINRING_150UM_MICROSCOPY"),  
                                           WPII = c("WPII","WP II-56 cm","WP-2 64-µm mesh net"),
                                           Grab = c("Grab", "modified Smith-McIntyre-grab, 3-5 samples, sampling area 0.1 m"), 
                                           Sledge = c("RP-sledge,Subsample method: Decanted - Mesh size (mm): 0.5","RP-sledge"),
                                           Campelen = "Campelen",
                                           Dredge = "3nglr dredge",
                                           Sediment_trap = "500ml sediment trap bottle",
                                           Water_pump = "Water pump",
                                           ROV = "ROV",
                                           Egg_net = "Egg net",
                                           Nansen = "Nansen",
                                           VCN = c("VCN","Vertical net"),
                                           Multinet= c("Multinet","Multinet 64-µm mesh net"),
                                           Nonspec. = "nonspecific",
                                           Unknown = "Unknown")

dat_merge$samplingProtocol <- dat_merge$samplingProtocol %>% replace_na('Unknown')

levels(as.factor(dat_merge$samplingProtocol))
summary(dat_merge$samplingProtocol)

4.3. Remove duplicates and save final dataset

Duplicates are removed, the cleaned dataset is trimmed to columns of interest and then saved

Show code
#remove duplicates based on certain fields
dat_nodups<-distinct(dat_merge, species, year, month, day, lifeStage, decimallongitude2, decimallatitude2,depth,.keep_all=T)

#get n of duplicates
nrow(dat_merge) - nrow(dat_nodups)

#subset to columns of interest
dat_nodups<-dat_nodups[c(1,60,61,62,68,73,74,75,77,78,92,103,107,108,109,112,113,114,115,138,139,217,222,223,241,260,261)]

#save data to a subfolder "output_data"
fwrite(dat_nodups, here::here("output_data","calanus_clean_nodups_gbif.csv"))

rm(dat_merge,gbif)
gc()

4.4. Plot metadata - Figure S2

We can now plot histogram and barplots of the most interesting metadata fields - year of collection, sampling protocol, and life-stage

Show code
#shorten name for ease
dat<-dat_nodups

#plot of year metadata column
yearplot<-ggplot(dat, aes(x=year,fill=species)) + 
  geom_histogram(color="grey60", binwidth = 5)+
  geom_vline(aes(xintercept=1930),color="grey", linetype="dashed", size=1)+
  geom_vline(aes(xintercept=1955),color="grey", linetype="dashed", size=1)+
  geom_vline(aes(xintercept=2005),color="grey", linetype="dashed", size=1)+
  geom_vline(aes(xintercept=1995),color="grey", linetype="dashed", size=1)+
  geom_text(aes(x=1930,y=3000, label="\nCPR began 1931"), colour="black", angle=90) +
  geom_text(aes(x=1955,y=10000, label="\n IPY 1957"), colour="black", angle=90) +
  geom_text(aes(x=2005,y=14000, label="\n IPY 2007"), colour="black", angle=90) +
  geom_text(aes(x=1996,y=12000, label="\n GLOBEC 1996"), colour="black", angle=90) +
  labs(y = "Number of records", x= "Year") +
  scale_x_continuous(breaks=seq(1880, 2020, 20))+
  scale_fill_viridis(discrete = T, option = "rocket", direction = 1,limits = c("Calanus finmarchicus", "Calanus glacialis", "Calanus hyperboreus"), labels=c("C. finmarchicus","C. glacialis","C. hyperboreus"))+
  theme(
    panel.background = element_blank(),
    panel.border = element_rect(colour = "grey", fill = NA),
    axis.line = element_line(colour = "grey"),
    axis.title.x = element_text(size = 14),
    axis.title.y = element_text(size = 14),
    axis.text.x = element_text(size = 12),
    axis.text.y = element_text(size = 12),
    legend.title = element_blank(),
    legend.text = element_text(size = 12, face = "italic"),
    plot.title = element_text(size = 14, face = "bold"),
    legend.position="right") 

#plot of sampling protocol metadata column
netplot<-ggplot(data=dat %>% group_by(samplingProtocol) %>% filter(n() >200), aes(x=forcats::fct_infreq(samplingProtocol),fill=species)) + 
  geom_bar(color="grey60", stat="count")+
  labs(y = "Number of records", x= "Sampling protocol") +
  #scale_x_continuous(breaks=seq(1880, 2020, 20))+
  scale_fill_viridis(discrete = T, option = "rocket", direction = 1,limits = c("Calanus finmarchicus", "Calanus glacialis", "Calanus hyperboreus"), labels=c("C. finmarchicus","C. glacialis","C. hyperboreus"))+
  theme(
    panel.background = element_blank(),
    panel.border = element_rect(colour = "grey", fill = NA),
    axis.line = element_line(colour = "grey"),
    axis.title.x = element_text(size = 14),
    axis.title.y = element_text(size = 14),
    axis.text.x = element_text(size = 12),
    axis.text.y = element_text(size = 12),
    legend.title = element_blank(),
    legend.text = element_text(size = 12, face = "italic"),
    plot.title = element_text(size = 14, face = "bold"),
    legend.position="right") 

#plot of lifestage metadata column
stageplot<-ggplot(data=dat %>% group_by(lifeStage) %>% filter(n() >200), aes(x=forcats::fct_infreq(lifeStage),fill=species)) + 
  geom_bar(color="grey60", stat="count")+
  labs(y = "Number of records", x= "Lifestage") +
  #scale_x_continuous(breaks=seq(1880, 2020, 20))+
  scale_fill_viridis(discrete = T, option = "rocket", direction = 1,limits = c("Calanus finmarchicus", "Calanus glacialis", "Calanus hyperboreus"), labels=c("C. finmarchicus","C. glacialis","C. hyperboreus"))+
  theme(
    panel.background = element_blank(),
    panel.border = element_rect(colour = "grey", fill = NA),
    axis.line = element_line(colour = "grey"),
    axis.title.x = element_text(size = 14),
    axis.title.y = element_text(size = 14),
    axis.text.x = element_text(size = 12),
    axis.text.y = element_text(size = 12),
    legend.title = element_blank(),
    legend.text = element_text(size = 12, face = "italic"),
    plot.title = element_text(size = 14, face = "bold"),
    legend.position=c(.85,.85)) 

plot_combine<- yearplot + netplot / stageplot  +
  plot_layout(guides = 'collect') + plot_annotation(tag_levels = 'a') &
  theme(plot.tag = element_text(face = 'bold'))

#save figure to a subfolder "output_figs"
ggsave(file = here::here("output_figs", "figs2.png"), plot_combine, scale=1.5, device= "png", dpi = 500, width=12, height=5) 

dev.set(dev.next())
rm(plot_combine,netplot,yearplot,stageplot,dat_nodups,dat)
gc()
Show code
knitr::include_graphics(here::here("output_figs", "figs2.png"))

Figure S2. Histograms of cleaned and quality controlled GBIF data, indicating the frequency of species records available per (a) calendar year (b) sampling method (c) and life-stage. Dashed vertical lines on panel (a) represent years of significant data collection efforts; CPR = Continuous Plankton Recorder, IPY = International Polar Year, GLOBEC = Global Ocean Ecosystems Dynamics. Due to a high number of categories, only those with more than 50 (b) and 1000 (c) occurrences were included.

5. Combine OBIS and GBIF datasets

5.1. Merge and remove duplicates

Here we format GBIF and OBIS datasets to have columns that can be merged and then duplicates across the two databases are removed.

Show code
#read in saved, cleaned datasets
obis_nodups<-read.csv(here::here("output_data", "calanus_clean_nodups_obis.csv"))
gbif_nodups<-read.csv(here::here("output_data", "calanus_clean_nodups_gbif.csv"))

#rename relevant cols in gbif to match obis fields
colnames(gbif_nodups)[1]  <- "id" 
colnames(gbif_nodups)[2]  <- "institutioncode" 
colnames(gbif_nodups)[3]  <- "collectioncode" 
colnames(gbif_nodups)[4]  <- "datasetname"
colnames(gbif_nodups)[5]  <- "occurrenceid"
colnames(gbif_nodups)[9]  <- "lifestage"
colnames(gbif_nodups)[11]  <- "occurrenceremarks" 
colnames(gbif_nodups)[12]  <- "eventdate"
colnames(gbif_nodups)[16]  <- "samplingprotocol" 
colnames(gbif_nodups)[20]  <- "decimallatitude"
colnames(gbif_nodups)[21]  <- "decimallongitude" 
colnames(gbif_nodups)[22]  <- "dataset_id" 
colnames(gbif_nodups)[23]  <- "av_depth"

#subset gbif to the columns using
gbif_ref<-gbif_nodups[c(1,2,3,4,5,9,11,12,13,14,15,16,20,21,22,23,25,26,27)]
gbif_ref$database <-"gbif"
gbif_ref$eventdate <- as.Date(gbif_ref$eventdate)

#rename relevant cols in gbif to match obis fields
colnames(obis_nodups)[5]  <- "species"

#subset obis to the columns using
obis_ref<-obis_nodups[c(1,2,3,4,5,6,7,8,9,10,11,12,16,18,22,23,24,25,26,27)]
obis_ref$database <-"obis"
obis_ref$eventdate<-as.Date(obis_ref$eventdate)

#make sure all min and max depths in obis are positive (ie without - signs infront of depth val)
obis_ref$mindepth <- with(obis_ref, ifelse(minimumdepthinmeters < 0, -minimumdepthinmeters, minimumdepthinmeters))
obis_ref$maxdepth <- with(obis_ref, ifelse(maximumdepthinmeters < 0, -maximumdepthinmeters, maximumdepthinmeters))
obis_ref$maxdepth <- with(obis_ref, ifelse(is.na(maxdepth), mindepth, maxdepth))
obis_ref<-obis_ref[-c(6,7)]

#get av depth column to match gbif depth information
obis_ref$av_depth <- rowMeans(obis_ref[20:21], na.rm=TRUE)
obis_ref$av_depth[obis_ref$av_depth == "NaN"] <- NA 

#merge obis and gbif
merge_dat<-rbind.fill(obis_ref,gbif_ref)

#remove duplicates based on certain fields
merge_dat_nodups<-distinct(merge_dat, species, year, month, day, lifestage, decimallongitude2, decimallatitude2, av_depth, .keep_all=T)

5.2. Add bathymetry, flag inaccuracies and save merged dataset

Bathymetry data were downloaded from GEBCO, available here and stored locally in the folder “input_data”.

Show code
#add associate bathymetry information for each point
coordinates(merge_dat_nodups)<- ~ decimallongitude + decimallatitude

bathy<-raster("input_data/bathy.asc")
f<-extract(bathy,merge_dat_nodups)
merge_dat_nodups_bathy<-as.data.frame(cbind(merge_dat_nodups,f))
colnames(merge_dat_nodups_bathy)[21]<-"bathy"

#flag records with depth = NA and depth > bathy
#first negate av_depth column to match bathymetry depth signs
merge_dat_nodups_bathy$av_depth<- merge_dat_nodups_bathy$av_depth*-1

#second create new column to flag records where av depth is greater than bathy 
merge_dat_nodups_bathy$depth_flag[merge_dat_nodups_bathy$av_depth < merge_dat_nodups_bathy$bathy] = 1
merge_dat_nodups_bathy$depth_flag[merge_dat_nodups_bathy$av_depth >= merge_dat_nodups_bathy$bathy] = 0
merge_dat_nodups_bathy$depth_flag[is.na(merge_dat_nodups_bathy$bathy)] = 0

#save merged dataset to a subfolder "output_data"
fwrite(merge_dat_nodups_bathy,here::here("output_data", "calanus_clean_nodups_gbifobis.csv"))
gc()
rm(gbif_nodups, obis_nodups, obis_ref, gbif_ref, f, merge_dat, merge_dat_nodups)

5.3. Plot metadata - Figure 1

We can now plot histogram and barplots of the most interesting metadata fields - year of collection, sampling protocol, and life-stage

Show code
#shorten name for ease
dat<-merge_dat_nodups_bathy

#plot of year metadata column
yearplot<-ggplot(dat, aes(x=year,fill=species)) + 
  geom_histogram(color="grey60", binwidth = 5)+
  geom_vline(aes(xintercept=1930),color="grey", linetype="dashed", size=1)+
  geom_vline(aes(xintercept=1955),color="grey", linetype="dashed", size=1)+
  geom_vline(aes(xintercept=2005),color="grey", linetype="dashed", size=1)+
  geom_vline(aes(xintercept=1995),color="grey", linetype="dashed", size=1)+
  geom_text(aes(x=1930,y=7000, label="\nCPR began 1931"), colour="black", angle=90) +
  geom_text(aes(x=1955,y=20000, label="\n IPY 1957"), colour="black", angle=90) +
  geom_text(aes(x=2005,y=28000, label="\n IPY 2007"), colour="black", angle=90) +
  geom_text(aes(x=1997,y=31000, label="\n GLOBEC 1996"), colour="black", angle=90) +
  labs(y = "Number of records", x= "Year") +
  scale_x_continuous(breaks=seq(1880, 2020, 20))+
  scale_fill_viridis(discrete = T, option = "rocket", direction = 1,limits = c("Calanus finmarchicus", "Calanus glacialis", "Calanus hyperboreus"), labels=c("C. finmarchicus","C. glacialis","C. hyperboreus"))+
  theme(
    panel.background = element_blank(),
    panel.border = element_rect(colour = "grey", fill = NA),
    axis.line = element_line(colour = "grey"),
    axis.title.x = element_text(size = 14),
    axis.title.y = element_text(size = 14),
    axis.text.x = element_text(size = 12),
    axis.text.y = element_text(size = 12),
    legend.title = element_blank(),
    legend.text = element_text(size = 12, face = "italic"),
    plot.title = element_text(size = 14, face = "bold"),
    legend.position="right") 

#plot of sampling protocol metadata column
netplot<-ggplot(data=dat %>% group_by(samplingprotocol) %>% filter(n() >200), aes(x=forcats::fct_infreq(samplingprotocol),fill=species)) + 
  geom_bar(color="grey60", stat="count")+
  labs(y = "Number of records", x= "Sampling protocol") +
  #scale_x_continuous(breaks=seq(1880, 2020, 20))+
  scale_fill_viridis(discrete = T, option = "rocket", direction = 1,limits = c("Calanus finmarchicus", "Calanus glacialis", "Calanus hyperboreus"), labels=c("C. finmarchicus","C. glacialis","C. hyperboreus"))+
  theme(
    panel.background = element_blank(),
    panel.border = element_rect(colour = "grey", fill = NA),
    axis.line = element_line(colour = "grey"),
    axis.title.x = element_text(size = 14),
    axis.title.y = element_text(size = 14),
    axis.text.x = element_text(size = 12),
    axis.text.y = element_text(size = 12),
    legend.title = element_blank(),
    legend.text = element_text(size = 12, face = "italic"),
    plot.title = element_text(size = 14, face = "bold"),
    legend.position="right") 

#plot of lifestage metadata column
stageplot<-ggplot(data=dat %>% group_by(lifestage) %>% filter(n() >4000), aes(x=forcats::fct_infreq(lifestage),fill=species)) + 
  geom_bar(color="grey60", stat="count")+
  labs(y = "Number of records", x= "Lifestage") +
  #scale_x_continuous(breaks=seq(1880, 2020, 20))+
  scale_fill_viridis(discrete = T, option = "rocket", direction = 1,limits = c("Calanus finmarchicus", "Calanus glacialis", "Calanus hyperboreus"), labels=c("C. finmarchicus","C. glacialis","C. hyperboreus"))+
  theme(
    panel.background = element_blank(),
    panel.border = element_rect(colour = "grey", fill = NA),
    axis.line = element_line(colour = "grey"),
    axis.title.x = element_text(size = 14),
    axis.title.y = element_text(size = 14),
    axis.text.x = element_text(size = 12),
    axis.text.y = element_text(size = 12),
    legend.title = element_blank(),
    legend.text = element_text(size = 12, face = "italic"),
    plot.title = element_text(size = 14, face = "bold"),
    legend.position=c(.85,.85)) 

plot_combine<- yearplot + netplot / stageplot  +
  plot_layout(guides = 'collect') + plot_annotation(tag_levels = 'a') &
  theme(plot.tag = element_text(face = 'bold'))

#save figure to a subfolder "output_figs"
ggsave(file = here::here("output_figs", "fig1.png"), plot_combine, scale=1.5, device= "png", dpi = 500, width=12, height=5) 
dev.set(dev.next())

rm(plot_combine,netplot,yearplot,stageplot,dat)
gc()
Show code
knitr::include_graphics(here::here("output_figs", "fig1.png"))

Figure 1. Histograms of cleaned and quality controlled OBIS and GBIF databases combined, indicating the frequency of species records available per (A) calendar year (B) sampling protocol (C) and life-stage. Dashed vertical lines on panel (A) represent years of significant data collection efforts; CPR = Continuous Plankton Recorder, IPY = International Polar Year, GLOBEC = Global Ocean Ecosystems Dynamics. Due to a high number of categories, only categories with more than 200 (B) and 4000 (C) occurrences were included.

6. Figure 2.

Code for panel A of this figure is adapted from Webb et al. (2010)

Show code
#subset data to only those with no depth innacuracies
dat_bathy_filt<-subset(merge_dat_nodups_bathy, merge_dat_nodups_bathy$depth_flag == 0)

#for plot

dat_bathy_filt$count<-1

#define latitude categories
dat_bathy_filt$lat.cat <- cut(dat_bathy_filt$decimallatitude, breaks = seq(30,90, by = 1), include.lowest = T)
#define depth categories
dat_bathy_filt$sd.cat <- cut(dat_bathy_filt$av_depth, breaks = c(
  seq(-4000,0, by = 50)),include.lowest = T)

#tablulate summed record counts by sample depth category (rows) and latitude category (columns)
recs.sum <- tapply(dat_bathy_filt$count, list(dat_bathy_filt$sd.cat, dat_bathy_filt$lat.cat), sum)
#For the figure to work properly, need to flip the matrices vertically (which has the effect of making depth 'negative', so that greater depths appear lower down on the figure)
recs.sum <- recs.sum[nrow(recs.sum):1,]

#create a dataframe to be used within ggplot
df <- data.frame(recs.sum)
colnames(df)<-seq(30,89)
df<-gather(df,key,value)
n<-seq(50,4000,50)
n<-rep(n, times=60)
df$z<-n*-1
colnames(df)<-c("lat","count","z")

#repeat previous steps with bathymetry categories and latitude categories, rather than sample depth
batt<- rasterToPoints(bathy)
batt<-as.data.frame(batt)
batt$lat.cat <- cut(batt$y, breaks = seq(30,90, by = 1), include.lowest = T)
batt$sd.cat <- cut(batt$bathy, breaks = seq(-4000,0, by = 50), include.lowest = T)
batt$minbathy[batt$bathy == ave(batt$bathy, batt$lat.cat, FUN=min)]<-1
batt$minbathy[!batt$bathy == ave(batt$bathy, batt$lat.cat, FUN=min)]<-0
bat <- tapply(batt$minbathy, list(batt$sd.cat, batt$lat.cat), max)
bat <- bat[nrow(bat):1,]

#loop through each cell to find locations where bathymetric depth is exceeded. If found, make these cells and all those below it =1. Will be used to black out cells in panel a.

for (i in 1:nrow(bat)-1){
  for (j in 1:ncol(bat)-1){
    
    if(isTRUE(bat[i, j] == 1)==TRUE){
      
      bat[i+1,j] = 1
    } 

  }
}

#create a dataframe to be used within ggplot
batdf<-data.frame(bat)
colnames(batdf)<-seq(30,89)
batdf<-gather(batdf,key,value)
n<-seq(50,4000,50)
n<-rep(n, times=60)
batdf$z<-n*-1
colnames(batdf)<-c("lat","bathymin","z")

#combine dataframes
df2<-cbind(df,batdf)
df2<-df2[,c(-4,-6)]

#ggplot panel a
textcol <- "grey40"
p <- ggplot(df2,aes(x=lat,y=z))+
  geom_tile(data=df2,aes(fill=log(count)),colour="white",size=0.2) +
  geom_tile(data=subset(df2,df2$bathymin==1),fill="grey20",colour= NA,size=0.2) +
  guides(fill=guide_legend(title="Log no. of records"))+
  labs(x="Latitude (degrees)",y="Depth (metres)")+
  scale_y_continuous(expand=c(0,0),breaks=c(-50,-500,-1000,-1500,-2000,-2500,-3000,-3500,-4000))+
  scale_x_discrete(position="top",expand=c(0,0),breaks=c(30,40,50,60,70,80))+
  scale_fill_viridis(limits = c(0, 10),aesthetics = "fill", na.value="light grey")+
  #coord_fixed()+
  theme_grey(base_size=10)+
  theme(legend.position="right",legend.direction="vertical",
        legend.title=element_text(size=12),
        legend.margin=margin(grid::unit(0,"cm")),
        legend.text=element_text(size=10),
        legend.key.height=grid::unit(0.8,"cm"),
        legend.key.width=grid::unit(0.2,"cm"),
        axis.line = element_line(colour = "grey"),
        axis.title.x = element_text(size = 14),
        axis.title.y = element_text(size = 14),
        axis.text.x = element_text(size = 12),
        axis.text.y = element_text(size = 12),
        plot.background=element_blank(),
        panel.border=element_blank())

#get arctic region shapefile
sf::sf_use_s2(FALSE)
Arctic_sf <- rnaturalearth::ne_countries(
  scale = 50,
  continent = c("Europe", "North America"),
  returnclass = "sf") 
Arctic_sf <-sf::st_crop(Arctic_sf, c(xmin=-180, xmax=180, ymin=40, ymax=90)) %>% sf::st_transform(3995)

#ggplot panel b
#make sf object in arctic stereographic projection
dat_bathy_obis <- merge_dat_nodups_bathy %>% 
  filter(database == "obis") %>%
  filter(!decimallatitude <30) %>% 
  sf::st_as_sf(coords = c("decimallongitude","decimallatitude")) %>% 
  sf::st_set_crs(4326) %>% 
  sf::st_transform(3995)

p_obis <- 
  ggplot() +
  layer_spatial(dat_bathy_obis, mapping = aes(colour = as.factor(depth_flag), alpha = 0.2), size = 1, shape=20) +
  layer_spatial(Arctic_sf, fill = "light grey", colour = "grey40",lwd=0.5) +
  scale_colour_manual(breaks = c("1","0",NA),labels=c('>bathymetry', '<bathymetry', NA),values = c("#222255","#bbccee","black"),na.value="#ee7733") +
  guides(colour = guide_legend(title="Average collection depth",override.aes = list(size=4))) +
  scale_alpha(guide = 'none') +
  theme(
    panel.background = element_blank(),
    panel.border = element_rect(colour = "grey", fill = NA),
    panel.grid = element_line(colour = "grey"),
    axis.line = element_line(colour = "grey"),
    axis.title.y = element_text(size = 14, face= "bold"),
    plot.title = element_text(size = 14, face = "bold.italic",hjust = 0.5),
    legend.position="right",legend.direction="vertical",
    legend.title=element_text(size=12),
    legend.margin=margin(grid::unit(0,"cm")),
    legend.text=element_text(size=10))

#ggplot panel c
#make sf object in arctic stereographic projection
dat_bathy_gbif <- merge_dat_nodups_bathy %>% 
  filter(database == "gbif") %>%
  filter(!decimallatitude <30) %>% 
  sf::st_as_sf(coords = c("decimallongitude","decimallatitude")) %>% 
  sf::st_set_crs(4326) %>% 
  sf::st_transform(3995)

p_gbif <- 
  ggplot() +
  layer_spatial(dat_bathy_gbif, mapping = aes(colour = as.factor(depth_flag), alpha = 0.2), size = 1, shape=20) +
  layer_spatial(Arctic_sf, fill = "light grey", colour = "grey40", lwd=0.5) +
  scale_colour_manual(breaks = c("1","0",NA),labels=c('>bathymetry', '<bathymetry', NA),values = c("#222255","#bbccee","black"),na.value="#ee7733") +
  guides(colour = guide_legend(title="Average collection depth",override.aes = list(size=4))) +
  scale_alpha(guide = 'none') +
  theme(
    panel.background = element_blank(),
    panel.border = element_rect(colour = "grey", fill = NA),
    panel.grid = element_line(colour = "grey"),
    axis.line = element_line(colour = "grey"),
    axis.title.y = element_text(size = 14, face= "bold"),
    plot.title = element_text(size = 14, face = "bold.italic",hjust = 0.5),
    legend.position="right",legend.direction="vertical",
    legend.title=element_text(size=12),
    legend.margin=margin(grid::unit(0,"cm")),
    legend.text=element_text(size=10))

p_combine<-p / (p_obis | p_gbif) +
  plot_layout(guides = 'collect') +
  plot_annotation(tag_levels = 'a') &
  theme(plot.tag = element_text(face = 'bold'))

#save figure to a subfolder "output_figs"
ggsave(file = here::here("output_figs", "fig2.png"), p_combine,scale=2, device= "png", dpi = 500, width=5, height=5)
dev.set(dev.next())

rm(list=setdiff(ls(), "point.in.poly"))
gc()
Show code
knitr::include_graphics(here::here("output_figs", "fig2.png"))

Figure 2. (A) The number of Calanus occurrence records within each unique combination of sample depth (50m) and latitude (1 degree) available via OBIS and GBIF combined. Records for all three species are combined, records for which average collection depth was missing or greater than bathymetric depth were removed, cells containing one record are shown as log=0, dark grey cells indicate maximum bathymetric depth. (B, C) maps showing locations of unique records present within OBIS and GBIF databases respectively. Records for which average collection depth was missing or deeper than bathymetric depth are highlighted.

7. Add environmental parameters to OBIS data

Data from the OBIS database contained information on maximum and minimum collections depths, allowing for the separation of these data in to discrete shallow and deep subsets. We reload in the cleaned OBIS dataset, and add several environmental parameters to each point, in preparation to explore how shallow and deep records of each species associate to these variables.

7.1. Bathymetry

Bathymetry data were downloaded from GEBCO, available here and stored locally in the folder “input_data”.

Show code
# reload cleaned obis data
obis_nodups<-read.csv(here::here("output_data", "calanus_clean_nodups_obis.csv"))

#subset to cols we are using

obis_nodups$eventdate<-as.Date(obis_nodups$eventdate)

#make sure all min and max depths are positive (ie without - signs infront of depth val)
obis_nodups$mindepth <- with(obis_nodups, ifelse(minimumdepthinmeters < 0, -minimumdepthinmeters, minimumdepthinmeters))
obis_nodups$maxdepth <- with(obis_nodups, ifelse(maximumdepthinmeters < 0, -maximumdepthinmeters, maximumdepthinmeters))
obis_nodups$maxdepth <- with(obis_nodups, ifelse(is.na(maxdepth), mindepth, maxdepth))
obis_nodups<-obis_nodups[-c(6,7)]

#get av depth column
obis_nodups$av_depth <- rowMeans(obis_nodups[27:28], na.rm=TRUE)
obis_nodups$av_depth[obis_nodups$av_depth == "NaN"] <- NA 

#get geometry
coordinates(obis_nodups)<- ~ decimallongitude + decimallatitude

#load bathymetry data and extract to each point
bathy<-raster("input_data/bathy.asc")
f<-extract(bathy,obis_nodups)
obis_nodups_bathy<-as.data.frame(cbind(obis_nodups,f))
colnames(obis_nodups_bathy)[28]<-"bathy"

rm(obis_nodups,bathy,f)

#filter records with depth = NA and depth>bathy
#first negate av_depth column to match bathymetry depth signs
obis_nodups_bathy$av_depth<- obis_nodups_bathy$av_depth*-1

#create new column with 1 or 0 depending on depth and bathymetry values
obis_nodups_bathy$depth_flag = NA
obis_nodups_bathy$depth_flag[obis_nodups_bathy$av_depth < obis_nodups_bathy$bathy] = 1
obis_nodups_bathy$depth_flag[obis_nodups_bathy$av_depth >= obis_nodups_bathy$bathy] = 0
obis_nodups_bathy$depth_flag[is.na(obis_nodups_bathy$bathy)] = 0

#count how many records are flagged 
summary(obis_nodups_bathy$depth_flag == 1)

rm(list=setdiff(ls(), c("point.in.poly","obis_nodups_bathy")))

7.2. Season

Show code
#make season column based on month of collection
obis_nodups_bathy$season[obis_nodups_bathy$month >= 1 & obis_nodups_bathy$month <= 3] = 1
obis_nodups_bathy$season[obis_nodups_bathy$month >= 4 & obis_nodups_bathy$month <= 6] = 2
obis_nodups_bathy$season[obis_nodups_bathy$month >= 7 & obis_nodups_bathy$month <= 9] = 3
obis_nodups_bathy$season[obis_nodups_bathy$month >= 10 & obis_nodups_bathy$month <= 12] = 4

7.3. Temperature

Temperature data are from the World Ocean Atlas, available here

Rasters of temperature data (0.25 degree grid size) were downloaded for each available season (jfm,amj,jas,ond) and decade (1955-2017). Decadal rasters were combined into two time periods (1955-1984 = timeperiod “55” and 1985-2017 = timeperiod “85”).

Separate rasters were saved for nine depth layers (0, 50, 100, 200, 400, 600, 800, 1000 and 1500m) for each of these seasons and timeperiods. Thus, 72 rasters of temperature data are stored locally in “input_data/tempdat”. Subfolders within “tempdat” have the naming format: timeperiod_depth_season e.g. subfolder 55_x0_amj would contain temperature raster from timeperiod 1955-1984, 0m depth, and season april-may-june.

Show code
#make depth category column based on average depth (will be used to guide depths of relevant temperature data)
obis_nodups_bathy$depth_cat[obis_nodups_bathy$av_depth <= 0 & obis_nodups_bathy$av_depth > -25] = 0
obis_nodups_bathy$depth_cat[obis_nodups_bathy$av_depth <= -25 & obis_nodups_bathy$av_depth > -75] = 50
obis_nodups_bathy$depth_cat[obis_nodups_bathy$av_depth <= -75 & obis_nodups_bathy$av_depth > -150] = 100
obis_nodups_bathy$depth_cat[obis_nodups_bathy$av_depth <= -150 & obis_nodups_bathy$av_depth > -300] = 200
obis_nodups_bathy$depth_cat[obis_nodups_bathy$av_depth <= -300 & obis_nodups_bathy$av_depth > -500] = 400
obis_nodups_bathy$depth_cat[obis_nodups_bathy$av_depth <= -500 & obis_nodups_bathy$av_depth > -700] = 600
obis_nodups_bathy$depth_cat[obis_nodups_bathy$av_depth <= -700 & obis_nodups_bathy$av_depth > -900] = 800
obis_nodups_bathy$depth_cat[obis_nodups_bathy$av_depth <= -900 & obis_nodups_bathy$av_depth > -1250] = 1000
obis_nodups_bathy$depth_cat[obis_nodups_bathy$av_depth <= -1250] = 1500

#all temperature data are stored within folder "tempdat". This code lists the name of each subfolder: subfolder names are in the format: timeperiod_depth_season e.g. 55_x0_amj would contain temperature raster from timeperiod 1955-1984, 0m depth, and season april-may-june.

folds<-list.dirs(path = here::here("input_data", "tempdat"), full.names = TRUE, recursive = TRUE)
folds<- folds %>% str_replace(".*?/tempdat/", "") 
folds<-folds[2:73]

#input each temperature raster into list and change filenames to match folder names
files_temp <- list.files(file.path(here::here("input_data"), paste0("tempdat/")), pattern = "temp", full.names = T, recursive =T)
filenames_temp<- files_temp %>% str_replace(".*?/temp.asc", paste0(folds,"_temp"))

#stack rasters, replace names, check and plot
files_temp<- stack(files_temp)
crs(files_temp) <- "+proj=longlat +datum=WGS84 +no_defs +ellps=WGS84 +towgs84=0,0,0" 
names(files_temp) <- filenames_temp

#subset occurrences to only match temperature to those from 1955 or after, and from >30 latitude or greater
dat<-subset(obis_nodups_bathy, !decimallatitude < 30)
dat<-subset(dat, !year<=1954)

#give occurrences category to match to 1st or 2nd time period of temperature data
dat$year_cat<-ifelse(dat$year >= 1955 & dat$year <= 1984, 1,2)
#make spatial object
coordinates(dat) <- ~ decimallongitude + decimallatitude

#extract temperature data (from all 72 temperature rasters) for each occurrence point - then use leftjoin function to save only the temperature data from the relevant season, time period and depth of each point.

e1<-raster::extract(files_temp,dat)
e1<-as_tibble(e1) 
dat<-as.data.frame(dat)
e1$decimallongitude<-dat$decimallongitude
e1$decimallatitude<-dat$decimallatitude
e1$id<-dat$id
e1<-pivot_longer(e1, cols=1:72,values_to = "temp", names_to = "depth_cat")

e1$season<-ifelse(grepl("jfm", e1$depth_cat), 1,
                  ifelse(grepl("amj", e1$depth_cat), 2,
                         ifelse(grepl("jas", e1$depth_cat), 3,4)))

e1$year_cat<-ifelse(grepl("X55", e1$depth_cat), 1,2)
e1<-mutate(e1, depth_cat=str_replace(depth_cat,".*?x",""))
e1$depth_cat<-gsub("[^0-9.-]", "", e1$depth_cat)
e1$depth_cat<-as.numeric(e1$depth_cat)

dat_withtemp<-left_join(dat,e1,by=c("decimallongitude", "decimallatitude", "id", "season", "year_cat","depth_cat"))

rm(e1,dat,filenames_temp,files_temp)
gc()

7.4. Sea ice concentration

Gridded Sea ice concentration data are from the NSIDC, available here

Rasters of sea ice con data (0.25 degree grid size) were downloaded for each available month and year. Months were combined in to seasonal averages (jfm,amj,jas,ond) for the two time periods (1955-1984 = timeperiod “55” and 1985-2017 = timeperiod “85”).

Thus, 8 rasters of seaicecon data are stored locally in “input_data/tempdat”. Subfolders within “tempdat” have the naming format: timeperiod_depth_season e.g. 55_x0_amj would be timeperiod 1955-1984, 0m depth, and season april-may-june.

Show code
#repeat above steps for sea ice concentration data which are within the folder "tempdat" but only for surface depths. i.e. only interested in the first 8 folders (timeperiods x2 and seasons x4)

#remove all but the relevant 8 folders
folds<-folds[c(1:4,37:40)]

#get list of sea ice concentration rasters and rename to same as folder names
files_ice <- list.files(file.path(here::here("input_data"), paste0("tempdat/")), pattern = "seaicecon", full.names = T, recursive =T)
filenames_ice<- files_ice %>% str_replace(".*?/seaicecon.asc", paste0(folds,"_ice"))

files_ice<- stack(files_ice)
crs(files_ice) <- "+proj=longlat +datum=WGS84 +no_defs +ellps=WGS84 +towgs84=0,0,0" 
names(files_ice) <- filenames_ice

#subset occurrences to only match temperature to those from 1955 or after, and from >30 latitude or greater
dat<-subset(obis_nodups_bathy, !decimallatitude < 30)
dat<-subset(dat, !year<=1954)

#give occurrences category to match to 1st or 2nd time period
dat$year_cat<-ifelse(dat$year >= 1955 & dat$year <= 1984, 1,2)
#make spatial object
coordinates(dat) <- ~ decimallongitude + decimallatitude

#extract all sea ice concentration data (from all 8 seaicecon files) for each point - then use leftjoin function to save only the sea ice concentration data from the relevant season and time period that matches each point.
e1<-raster::extract(files_ice,dat)
e1<-as_tibble(e1) 
dat<-as.data.frame(dat)
e1$decimallongitude<-dat$decimallongitude
e1$decimallatitude<-dat$decimallatitude
e1$id<-dat$id
e1<-pivot_longer(e1, cols=1:8,values_to = "seaicecon", names_to = "depth_cat")
e1$season<-ifelse(grepl("jfm", e1$depth_cat), 1,
                  ifelse(grepl("amj", e1$depth_cat), 2,
                         ifelse(grepl("jas", e1$depth_cat), 3,4)))

e1$year_cat<-ifelse(grepl("X55", e1$depth_cat), 1,2)
e1<-mutate(e1, depth_cat=str_replace(depth_cat,".*?x",""))
e1$depth_cat<-gsub("[^0-9.-]", "", e1$depth_cat)
e1$depth_cat<-as.numeric(e1$depth_cat)

dat_withice<-left_join(dat_withtemp,e1,by=c("decimallongitude", "decimallatitude", "id", "season", "year_cat"))
dat_withice<-dat_withice[-36] # remove extra depthcat col.

#save new file with all environmental data added within the subfolder "output_data"
fwrite(dat_withice,here::here("output_data", "calanus_clean_nodups_obis_envdat.csv"))
rm(e1,dat_withtemp,dat,folds, files_ice,filenames_ice)
gc()

7.5. Shallow and deep subsets

These subsets will be used when creating plots 3-7.

Show code
#save shallow and deep datasets for all OBIS records
dat_nodup_shallow <-obis_nodups_bathy %>%
  subset(., maxdepth <= 200)

dat_nodup_deep <-obis_nodups_bathy %>%
  subset(., mindepth >= 400)


#save shallow and deep datasets for OBIS records with temp and ice data (ie. subset by year 1955-2017, no lat<30)
dat_nodup_shallow_sub <-dat_withice %>%
  subset(., maxdepth <= 200)

dat_nodup_deep_sub <-dat_withice %>%
  subset(., mindepth >= 400)

rm(obis_nodups_bathy)
gc()

8. Figure 3.

The original shapefile required to define the ocean and seas in plots 3 and 4 is available here

This shapefile was modified in ESRI ArcMap v10.6 and saved locally as “regions.shp” within “input_data/IHO_seas_modified”. Only Arctic and North Atlantic regions were retained, with smaller areas being combined for ease of visualizing the number of records collected in a given area. 11 regions in total are used.

Show code
#get arctic region shapefile
sf::sf_use_s2(FALSE)
Arctic_sf <- rnaturalearth::ne_countries(
  scale = 50,
  continent = c("Europe", "North America"),
  returnclass = "sf") 
Arctic_sf <-sf::st_crop(Arctic_sf, c(xmin=-180, xmax=180, ymin=40, ymax=90)) %>% sf::st_transform(3995)

#load in polygon of Arctic regions
region_poly <- sf::st_read("input_data/IHO_seas_modified/regions.shp")
region_poly <- st_as_sf(region_poly) # convert to sf object
st_crs(region_poly) <- 4326

#make points sf objects
coordinates(dat_nodup_shallow)<- ~ decimallongitude + decimallatitude
proj4string(dat_nodup_shallow) <- CRS("+proj=longlat +datum=WGS84 +no_defs +ellps=WGS84 +towgs84=0,0,0")
dat_nodup_shallow <- st_as_sf(dat_nodup_shallow)

#merge poly and point info
int_shallow <- dat_nodup_shallow %>% point.in.poly(region_poly) %>% st_as_sf()

#summarize by region, species and season
int_shallow_result <- int_shallow %>% 
  count(NAME,scientificname,season) %>%
  st_drop_geometry()

#remove large redundant objects
rm(int_shallow)

#crop to plot area
region_poly <- sf::st_crop(region_poly, c(xmin=-180, xmax=180, ymin=40, ymax=90)) %>% sf::st_transform(3995)

#merge count data into polygon object for plotting
int_shallow <- merge(region_poly, int_shallow_result, by = "NAME", all.X = TRUE, all.y = TRUE)

#plots
cfin_s_jfm<-
  ggplot() +
  geom_sf(data = int_shallow %>% filter(scientificname == "Calanus finmarchicus", season == 1), aes(fill = n))+
  geom_sf(data = Arctic_sf, fill = "light grey")+
  scale_fill_viridis(option = "turbo", direction = 1,limits = c(0,42000), breaks = c(0,10000,20000,30000,40000))+
  ggtitle("C. finmarchicus")+
  labs(y= "Jan-Feb-Mar\n") +
  theme(
    panel.background = element_blank(),
    panel.border = element_rect(colour = "grey", fill = NA),
    panel.grid = element_line(colour = "grey"),
    axis.line = element_line(colour = "grey"),
    axis.title.y = element_text(size = 14, face= "bold"),
    legend.title = element_blank(),
    plot.title = element_text(size = 14, face = "bold.italic",hjust = 0.5),
    legend.position="none",
    legend.key.width = unit(1.3, "cm"))

cfin_s_amj<-
  ggplot() +
  geom_sf(data = int_shallow %>% filter(scientificname == "Calanus finmarchicus", season == 2), aes(fill = n))+
  geom_sf(data = Arctic_sf, fill = "light grey")+
  scale_fill_viridis(option = "turbo", direction = 1,limits = c(0,42000), breaks = c(0,10000,20000,30000,40000))+
  labs(y= "Apr-May-Jun\n") +
  theme(
    panel.background = element_blank(),
    panel.border = element_rect(colour = "grey", fill = NA),
    panel.grid = element_line(colour = "grey"),
    axis.line = element_line(colour = "grey"),
    axis.title.y = element_text(size = 14, face="bold"),
    legend.title = element_blank(),
    plot.title = element_text(size = 14, face = "bold"),
    legend.position="none",
    legend.key.width = unit(1.3, "cm"))

cfin_s_jas<-
  ggplot() +
  geom_sf(data = int_shallow %>% filter(scientificname == "Calanus finmarchicus", season == 3), aes(fill = n))+
  geom_sf(data = Arctic_sf, fill = "light grey")+
  scale_fill_viridis(option = "turbo", direction = 1,limits = c(0,42000), breaks = c(0,10000,20000,30000,40000))+
  labs(y= "Jul-Aug-Sep\n") +
  theme(
    panel.background = element_blank(),
    panel.border = element_rect(colour = "grey", fill = NA),
    panel.grid = element_line(colour = "grey"),
    axis.line = element_line(colour = "grey"),
    axis.title.y = element_text(size = 14, face="bold"),
    legend.title = element_blank(),
    plot.title = element_text(size = 14, face = "bold"),
    legend.position="none",
    legend.key.width = unit(1.3, "cm"))

cfin_s_ond<-
  ggplot() +
  geom_sf(data = int_shallow %>% filter(scientificname == "Calanus finmarchicus", season == 4), aes(fill = n))+
  geom_sf(data = Arctic_sf, fill = "light grey")+
  labs(fill='occurrence record count')+
  scale_fill_viridis(option = "turbo", direction = 1,limits = c(0,42000), breaks = c(0,10000,20000,30000,40000),
                     guide = guide_colorbar (title.position = "top",title.hjust = 0.5))+
  labs(y= "Oct-Nov-Dec\n") +
  theme(
    panel.background = element_blank(),
    panel.border = element_rect(colour = "grey", fill = NA),
    panel.grid = element_line(colour = "grey"),
    axis.line = element_line(colour = "grey"),
    axis.title.y = element_text(size = 14, face="bold"),
    #legend.title = element_blank(),
    legend.position="bottom",
    legend.key.width = unit(1.3, "cm"))

cgla_s_jfm<-
  ggplot() +
  geom_sf(data = int_shallow %>% filter(scientificname == "Calanus glacialis", season == 1), aes(fill = n))+
  geom_sf(data = Arctic_sf, fill = "light grey")+
  scale_fill_viridis(option = "turbo", direction = 1,limits = c(0,4000), breaks = c(0,1000,2000,3000,4000))+
  ggtitle("C. glacialis")+
  #labs(y= "Jan-Feb-Mar\n") +
  theme(
    panel.background = element_blank(),
    panel.border = element_rect(colour = "grey", fill = NA),
    panel.grid = element_line(colour = "grey"),
    axis.line = element_line(colour = "grey"),
    axis.title.y = element_text(size = 14, face= "bold"),
    legend.title = element_blank(),
    plot.title = element_text(size = 14, face = "bold.italic",hjust = 0.5),
    legend.position="none",
    legend.key.width = unit(1.3, "cm"))

cgla_s_amj<-
  ggplot() +
  geom_sf(data = int_shallow %>% filter(scientificname == "Calanus glacialis", season == 2), aes(fill = n))+
  geom_sf(data = Arctic_sf, fill = "light grey")+
  scale_fill_viridis(option = "turbo", direction = 1,limits = c(0,4000), breaks = c(0,1000,2000,3000,4000))+
  #labs(y= "Apr-May-Jun\n") +
  theme(
    panel.background = element_blank(),
    panel.border = element_rect(colour = "grey", fill = NA),
    panel.grid = element_line(colour = "grey"),
    axis.line = element_line(colour = "grey"),
    axis.title.y = element_text(size = 14, face="bold"),
    legend.title = element_blank(),
    plot.title = element_text(size = 14, face = "bold"),
    legend.position="none",
    legend.key.width = unit(1.3, "cm"))

cgla_s_jas<-
  ggplot() +
  geom_sf(data = int_shallow %>% filter(scientificname == "Calanus glacialis", season == 3), aes(fill = n))+
  geom_sf(data = Arctic_sf, fill = "light grey")+
  scale_fill_viridis(option = "turbo", direction = 1,limits = c(0,4000), breaks = c(0,1000,2000,3000,4000))+
  #labs(y= "Jul-Aug-Sep\n") +
  theme(
    panel.background = element_blank(),
    panel.border = element_rect(colour = "grey", fill = NA),
    panel.grid = element_line(colour = "grey"),
    axis.line = element_line(colour = "grey"),
    axis.title.y = element_text(size = 14, face="bold"),
    legend.title = element_blank(),
    plot.title = element_text(size = 14, face = "bold"),
    legend.position="none",
    legend.key.width = unit(1.3, "cm"))

cgla_s_ond<-
  ggplot() +
  geom_sf(data = int_shallow %>% filter(scientificname == "Calanus glacialis", season == 4), aes(fill = n))+
  geom_sf(data = Arctic_sf, fill = "light grey")+
  labs(fill='occurrence record count')+
  scale_fill_viridis(option = "turbo", direction = 1,limits = c(0,4000), breaks = c(0,1000,2000,3000,4000),
                     guide = guide_colorbar (title.position = "top",title.hjust = 0.5))+
  #labs(y= "Oct-Nov-Dec\n") +
  theme(
    panel.background = element_blank(),
    panel.border = element_rect(colour = "grey", fill = NA),
    panel.grid = element_line(colour = "grey"),
    axis.line = element_line(colour = "grey"),
    axis.title.y = element_text(size = 14, face="bold"),
    #legend.title = element_blank(),
    legend.position="bottom",
    legend.key.width = unit(1.3, "cm"))

chyp_s_jfm<-
  ggplot() +
  geom_sf(data = int_shallow %>% filter(scientificname == "Calanus hyperboreus", season == 1), aes(fill = n))+
  geom_sf(data = Arctic_sf, fill = "light grey")+
  scale_fill_viridis(option = "turbo", direction = 1,limits = c(0,4000), breaks = c(0,1000,2000,3000,4000))+
  ggtitle("C. hyperboreus")+
  #labs(y= "Jan-Feb-Mar\n") +
  theme(
    panel.background = element_blank(),
    panel.border = element_rect(colour = "grey", fill = NA),
    panel.grid = element_line(colour = "grey"),
    axis.line = element_line(colour = "grey"),
    axis.title.y = element_text(size = 14, face= "bold"),
    legend.title = element_blank(),
    plot.title = element_text(size = 14, face = "bold.italic",hjust = 0.5),
    legend.position="none",
    legend.key.width = unit(1.3, "cm"))

chyp_s_amj<-
  ggplot() +
  geom_sf(data = int_shallow %>% filter(scientificname == "Calanus hyperboreus", season == 2), aes(fill = n))+
  geom_sf(data = Arctic_sf, fill = "light grey")+
  scale_fill_viridis(option = "turbo", direction = 1,limits = c(0,4000), breaks = c(0,1000,2000,3000,4000))+
  #labs(y= "Apr-May-Jun\n") +
  theme(
    panel.background = element_blank(),
    panel.border = element_rect(colour = "grey", fill = NA),
    panel.grid = element_line(colour = "grey"),
    axis.line = element_line(colour = "grey"),
    axis.title.y = element_text(size = 14, face="bold"),
    legend.title = element_blank(),
    plot.title = element_text(size = 14, face = "bold"),
    legend.position="none",
    legend.key.width = unit(1.3, "cm"))

chyp_s_jas<-
  ggplot() +
  geom_sf(data = int_shallow %>% filter(scientificname == "Calanus hyperboreus", season == 3), aes(fill = n))+
  geom_sf(data = Arctic_sf, fill = "light grey")+
  scale_fill_viridis(option = "turbo", direction = 1,limits = c(0,4000), breaks = c(0,1000,2000,3000,4000))+
  #labs(y= "Jul-Aug-Sep\n") +
  theme(
    panel.background = element_blank(),
    panel.border = element_rect(colour = "grey", fill = NA),
    panel.grid = element_line(colour = "grey"),
    axis.line = element_line(colour = "grey"),
    axis.title.y = element_text(size = 14, face="bold"),
    legend.title = element_blank(),
    plot.title = element_text(size = 14, face = "bold"),
    legend.position="none",
    legend.key.width = unit(1.3, "cm"))

chyp_s_ond<-
  ggplot() +
  geom_sf(data = int_shallow %>% filter(scientificname == "Calanus hyperboreus", season == 4), aes(fill = n))+
  geom_sf(data = Arctic_sf, fill = "light grey")+
  labs(fill='occurrence record count')+
  scale_fill_viridis(option = "turbo", direction = 1,limits = c(0,4000), breaks = c(0,1000,2000,3000,4000),
                     guide = guide_colorbar (title.position = "top",title.hjust = 0.5))+
  #labs(y= "Oct-Nov-Dec\n") +
  theme(
    panel.background = element_blank(),
    panel.border = element_rect(colour = "grey", fill = NA),
    panel.grid = element_line(colour = "grey"),
    axis.line = element_line(colour = "grey"),
    axis.title.y = element_text(size = 14, face="bold"),
    #legend.title = element_blank(),
    legend.position="bottom",
    legend.key.width = unit(1.3, "cm"))

g <- (cfin_s_jfm | cgla_s_jfm | chyp_s_jfm) / (cfin_s_amj | cgla_s_amj | chyp_s_amj) / (cfin_s_jas | cgla_s_jas | chyp_s_jas)/ (cfin_s_ond | cgla_s_ond | chyp_s_ond)

#save figure to a subfolder "output_figs"
ggsave(file = here::here("output_figs", "fig3.png"), g, device= "png", dpi = 500, width=12, height=15)
dev.set(dev.next())

rm(g,cfin_s_jfm,cgla_s_jfm,chyp_s_jfm,cfin_s_amj,cgla_s_amj,chyp_s_amj,cfin_s_jas,cgla_s_jas,chyp_s_jas,cfin_s_ond,cgla_s_ond,chyp_s_ond)
rm(int_shallow,int_shallow_result)
gc()
Show code
knitr::include_graphics(here::here("output_figs", "fig3.png"))

Figure 3. Heatmaps showing counts of occurrence records collected between 0-200m in different seasons within 11 Arctic and Subarctic regions for (left to right) Calanus finmarchicus, C. glacialis and C. hyperboreus. Note difference in scales between species. Records are from cleaned OBIS database only.

9. Figure 4.

repeat previous section with records collected at depths greater than or equal to 400m.

Show code
region_poly <- sf::st_read("input_data/IHO_seas_modified/regions.shp")
region_poly <- st_as_sf(region_poly) # convert to sf object
st_crs(region_poly) <- 4326

#make points sf objects
coordinates(dat_nodup_deep)= ~ decimallongitude + decimallatitude
proj4string(dat_nodup_deep) <- CRS("+proj=longlat +datum=WGS84 +no_defs +ellps=WGS84 +towgs84=0,0,0")
dat_nodup_deep <- st_as_sf(dat_nodup_deep)

#merge poly and point info
int_deep <- dat_nodup_deep %>% point.in.poly(region_poly) %>% st_as_sf()

#summarize by region, species and season
int_deep_result <- int_deep %>% 
  count(NAME,scientificname,season) %>% 
  st_drop_geometry()

#remove large redundant objects
rm(int_deep)
gc()

region_poly <- sf::st_crop(region_poly, c(xmin=-180, xmax=180, ymin=40, ymax=90)) %>% sf::st_transform(3995)

int_deep <- merge(region_poly, int_deep_result, by = "NAME", all.X = TRUE, all.y = TRUE)

#plots
cfin_d_jfm<-
  ggplot() +
  geom_sf(data = int_deep %>% filter(scientificname == "Calanus finmarchicus", season == 1), aes(fill = n))+
  geom_sf(data = Arctic_sf, fill = "light grey")+
  scale_fill_viridis(option = "turbo", direction = 1,limits = c(0,800), breaks = c(0,200,400,600,800))+
  ggtitle("C. finmarchicus")+
  labs(y= "Jan-Feb-Mar\n") +
  theme(
    panel.background = element_blank(),
    panel.border = element_rect(colour = "grey", fill = NA),
    panel.grid = element_line(colour = "grey"),
    axis.line = element_line(colour = "grey"),
    axis.title.y = element_text(size = 14, face= "bold"),
    legend.title = element_blank(),
    plot.title = element_text(size = 14, face = "bold.italic",hjust = 0.5),
    legend.position="none",
    legend.key.width = unit(1.3, "cm"))

cfin_d_amj<-
  ggplot() +
  geom_sf(data = int_deep %>% filter(scientificname == "Calanus finmarchicus", season == 2), aes(fill = n))+
  geom_sf(data = Arctic_sf, fill = "light grey")+
  scale_fill_viridis(option = "turbo", direction = 1,limits = c(0,800), breaks = c(0,200,400,600,800))+
  labs(y= "Apr-May-Jun\n") +
  theme(
    panel.background = element_blank(),
    panel.border = element_rect(colour = "grey", fill = NA),
    panel.grid = element_line(colour = "grey"),
    axis.line = element_line(colour = "grey"),
    axis.title.y = element_text(size = 14, face="bold"),
    legend.title = element_blank(),
    plot.title = element_text(size = 14, face = "bold"),
    legend.position="none",
    legend.key.width = unit(1.3, "cm"))

cfin_d_jas<-
  ggplot() +
  geom_sf(data = int_deep %>% filter(scientificname == "Calanus finmarchicus", season == 3), aes(fill = n))+
  geom_sf(data = Arctic_sf, fill = "light grey")+
  scale_fill_viridis(option = "turbo", direction = 1,limits = c(0,800), breaks = c(0,200,400,600,800))+
  labs(y= "Jul-Aug-Sep\n") +
  theme(
    panel.background = element_blank(),
    panel.border = element_rect(colour = "grey", fill = NA),
    panel.grid = element_line(colour = "grey"),
    axis.line = element_line(colour = "grey"),
    axis.title.y = element_text(size = 14, face="bold"),
    legend.title = element_blank(),
    plot.title = element_text(size = 14, face = "bold"),
    legend.position="none",
    legend.key.width = unit(1.3, "cm"))

cfin_d_ond<-
  ggplot() +
  geom_sf(data = int_deep %>% filter(scientificname == "Calanus finmarchicus", season == 4), aes(fill = n))+
  geom_sf(data = Arctic_sf, fill = "light grey")+
  labs(fill='occurrence record count')+
  scale_fill_viridis(option = "turbo", direction = 1,limits = c(0,800), breaks = c(0,200,400,600,800),
                     guide = guide_colorbar (title.position = "top",title.hjust = 0.5))+
  labs(y= "Oct-Nov-Dec\n") +
  theme(
    panel.background = element_blank(),
    panel.border = element_rect(colour = "grey", fill = NA),
    panel.grid = element_line(colour = "grey"),
    axis.line = element_line(colour = "grey"),
    axis.title.y = element_text(size = 14, face="bold"),
    #legend.title = element_blank(),
    legend.position="bottom",
    legend.key.width = unit(1.3, "cm"))

cgla_d_jfm<-
  ggplot() +
  geom_sf(data = int_deep %>% filter(scientificname == "Calanus glacialis", season == 1), aes(fill = n))+
  geom_sf(data = Arctic_sf, fill = "light grey")+
  scale_fill_viridis(option = "turbo", direction = 1,limits = c(0,200), breaks = c(0,50,100,150,200))+
  ggtitle("C. glacialis")+
  #labs(y= "Jan-Feb-Mar\n") +
  theme(
    panel.background = element_blank(),
    panel.border = element_rect(colour = "grey", fill = NA),
    panel.grid = element_line(colour = "grey"),
    axis.line = element_line(colour = "grey"),
    axis.title.y = element_text(size = 14, face= "bold"),
    legend.title = element_blank(),
    plot.title = element_text(size = 14, face = "bold.italic",hjust = 0.5),
    legend.position="none",
    legend.key.width = unit(1.3, "cm"))

cgla_d_amj<-
  ggplot() +
  geom_sf(data = int_deep %>% filter(scientificname == "Calanus glacialis", season == 2), aes(fill = n))+
  geom_sf(data = Arctic_sf, fill = "light grey")+
  scale_fill_viridis(option = "turbo", direction = 1,limits = c(0,200), breaks = c(0,50,100,150,200))+
  #labs(y= "Apr-May-Jun\n") +
  theme(
    panel.background = element_blank(),
    panel.border = element_rect(colour = "grey", fill = NA),
    panel.grid = element_line(colour = "grey"),
    axis.line = element_line(colour = "grey"),
    axis.title.y = element_text(size = 14, face="bold"),
    legend.title = element_blank(),
    plot.title = element_text(size = 14, face = "bold"),
    legend.position="none",
    legend.key.width = unit(1.3, "cm"))

cgla_d_jas<-
  ggplot() +
  geom_sf(data = int_deep %>% filter(scientificname == "Calanus glacialis", season == 3), aes(fill = n))+
  geom_sf(data = Arctic_sf, fill = "light grey")+
  scale_fill_viridis(option = "turbo", direction = 1,limits =c(0,200), breaks = c(0,50,100,150,200))+
  #labs(y= "Jul-Aug-Sep\n") +
  theme(
    panel.background = element_blank(),
    panel.border = element_rect(colour = "grey", fill = NA),
    panel.grid = element_line(colour = "grey"),
    axis.line = element_line(colour = "grey"),
    axis.title.y = element_text(size = 14, face="bold"),
    legend.title = element_blank(),
    plot.title = element_text(size = 14, face = "bold"),
    legend.position="none",
    legend.key.width = unit(1.3, "cm"))

cgla_d_ond<-
  ggplot() +
  geom_sf(data = int_deep %>% filter(scientificname == "Calanus glacialis", season == 4), aes(fill = n))+
  geom_sf(data = Arctic_sf, fill = "light grey")+
  labs(fill='occurrence record count')+
  scale_fill_viridis(option = "turbo", direction = 1,limits = c(0,200), breaks = c(0,50,100,150,200),
                     guide = guide_colorbar (title.position = "top",title.hjust = 0.5))+
  #labs(y= "Oct-Nov-Dec\n") +
  theme(
    panel.background = element_blank(),
    panel.border = element_rect(colour = "grey", fill = NA),
    panel.grid = element_line(colour = "grey"),
    axis.line = element_line(colour = "grey"),
    axis.title.y = element_text(size = 14, face="bold"),
    #legend.title = element_blank(),
    legend.position="bottom",
    legend.key.width = unit(1.3, "cm"))

chyp_d_jfm<-
  ggplot() +
  geom_sf(data = int_deep %>% filter(scientificname == "Calanus hyperboreus", season == 1), aes(fill = n))+
  geom_sf(data = Arctic_sf, fill = "light grey")+
  scale_fill_viridis(option = "turbo", direction = 1,limits =c(0,200), breaks = c(0,50,100,150,200))+
  ggtitle("C. hyperboreus")+
  #labs(y= "Jan-Feb-Mar\n") +
  theme(
    panel.background = element_blank(),
    panel.border = element_rect(colour = "grey", fill = NA),
    panel.grid = element_line(colour = "grey"),
    axis.line = element_line(colour = "grey"),
    axis.title.y = element_text(size = 14, face= "bold"),
    legend.title = element_blank(),
    plot.title = element_text(size = 14, face = "bold.italic",hjust = 0.5),
    legend.position="none",
    legend.key.width = unit(1.3, "cm"))

chyp_d_amj<-
  ggplot() +
  geom_sf(data = int_deep %>% filter(scientificname == "Calanus hyperboreus", season == 2), aes(fill = n))+
  geom_sf(data = Arctic_sf, fill = "light grey")+
  scale_fill_viridis(option = "turbo", direction = 1,limits = c(0,200), breaks = c(0,50,100,150,200))+
  #labs(y= "Apr-May-Jun\n") +
  theme(
    panel.background = element_blank(),
    panel.border = element_rect(colour = "grey", fill = NA),
    panel.grid = element_line(colour = "grey"),
    axis.line = element_line(colour = "grey"),
    axis.title.y = element_text(size = 14, face="bold"),
    legend.title = element_blank(),
    plot.title = element_text(size = 14, face = "bold"),
    legend.position="none",
    legend.key.width = unit(1.3, "cm"))

chyp_d_jas<-
  ggplot() +
  geom_sf(data = int_deep %>% filter(scientificname == "Calanus hyperboreus", season == 3), aes(fill = n))+
  geom_sf(data = Arctic_sf, fill = "light grey")+
  scale_fill_viridis(option = "turbo", direction = 1,limits = c(0,200), breaks = c(0,50,100,150,200))+
  #labs(y= "Jul-Aug-Sep\n") +
  theme(
    panel.background = element_blank(),
    panel.border = element_rect(colour = "grey", fill = NA),
    panel.grid = element_line(colour = "grey"),
    axis.line = element_line(colour = "grey"),
    axis.title.y = element_text(size = 14, face="bold"),
    legend.title = element_blank(),
    plot.title = element_text(size = 14, face = "bold"),
    legend.position="none",
    legend.key.width = unit(1.3, "cm"))

chyp_d_ond<-
  ggplot() +
  geom_sf(data = int_deep %>% filter(scientificname == "Calanus hyperboreus", season == 4), aes(fill = n))+
  geom_sf(data = Arctic_sf, fill = "light grey")+
  labs(fill='occurrence record count')+
  scale_fill_viridis(option = "turbo", direction = 1,limits = c(0,200), breaks = c(0,50,100,150,200),
                     guide = guide_colorbar (title.position = "top",title.hjust = 0.5))+
  #labs(y= "Oct-Nov-Dec\n") +
  theme(
    panel.background = element_blank(),
    panel.border = element_rect(colour = "grey", fill = NA),
    panel.grid = element_line(colour = "grey"),
    axis.line = element_line(colour = "grey"),
    axis.title.y = element_text(size = 14, face="bold"),
    #legend.title = element_blank(),
    legend.position="bottom",
    legend.key.width = unit(1.3, "cm"))

g <- (cfin_d_jfm | cgla_d_jfm | chyp_d_jfm) / (cfin_d_amj | cgla_d_amj | chyp_d_amj) / (cfin_d_jas | cgla_d_jas | chyp_d_jas)/ (cfin_d_ond | cgla_d_ond | chyp_d_ond)

#save figure to a subfolder "output_figs"
ggsave(file = here::here("output_figs","fig4.png"), g, device= "png", dpi = 500, width=12, height=15)
dev.set(dev.next())

rm(g,cfin_d_jfm,cgla_d_jfm,chyp_d_jfm,cfin_d_amj,cgla_d_amj,chyp_d_amj,cfin_d_jas,cgla_d_jas,chyp_d_jas,cfin_d_ond,cgla_d_ond,chyp_d_ond)
rm(int_deep,int_deep_result, Arctic_sf,region_poly)
gc()
Show code
knitr::include_graphics(here::here("output_figs", "fig4.png"))

Figure 4. Heatmaps showing counts of occurrence records collected at depths equal to or deeper than 400m in different seasons within 11 Arctic and Subarctic regions for (left to right) Calanus finmarchicus, C. glacialis and C. hyperboreus. Note difference in scales between species. Records are from cleaned OBIS database only.

10. Figure 5.

Show code
#remove any points with no month of collection information
dat_nodup_shallow <- dat_nodup_shallow[-which(is.na(dat_nodup_shallow$month)), ]  

#plots
month_g<-
  ggplot(dat_nodup_shallow, aes(x=as.factor(month), fill=scientificname)) +
  geom_bar(colour="grey60") +
  labs(y = "Number of records", x= "") +
  scale_fill_viridis(discrete = T, option = "rocket", direction = 1,limits = c("Calanus finmarchicus", "Calanus glacialis", "Calanus hyperboreus"), labels=c("C. finmarchicus","C. glacialis","C.hyperboreus"))+
  scale_x_discrete(breaks = c(1,2,3,4,5,6,7,8,9,10,11,12),
                   labels = c("Jan", "Feb", "Mar","Apr", "May", "Jun", "Jul", "Aug", "Sep", "Oct", "Nov", "Dec"))+
  theme(
    panel.background = element_blank(),
    panel.border = element_rect(colour = "grey", fill = NA),
    axis.line = element_line(colour = "grey"),
    axis.title.x = element_text(size = 14),
    axis.title.y = element_text(size = 14),
    axis.text.x = element_text(size = 12),
    axis.text.y = element_text(size = 12),
    legend.title = element_blank(),
    legend.text = element_text(size = 12, face = "italic"),
    legend.position="none") 

month_d<-
  ggplot(dat_nodup_deep, aes(x=as.factor(month), fill=scientificname)) +
  geom_bar(colour="grey60") +
  labs(y = "Number of records", x= "Month Collected") +
  scale_fill_viridis(discrete = T, option = "rocket", direction = 1,limits = c("Calanus finmarchicus", "Calanus glacialis", "Calanus hyperboreus"), labels=c("C. finmarchicus","C. glacialis","C.hyperboreus"))+
  scale_x_discrete(breaks = c(1,2,3,4,5,6,7,8,9,10,11,12),
                   labels = c("Jan", "Feb", "Mar","Apr", "May", "Jun", "Jul", "Aug", "Sep", "Oct", "Nov", "Dec"))+
  theme(
    panel.background = element_blank(),
    panel.border = element_rect(colour = "grey", fill = NA),
    axis.line = element_line(colour = "grey"),
    axis.title.x = element_text(size = 14),
    axis.title.y = element_text(size = 14),
    axis.text.x = element_text(size = 12),
    axis.text.y = element_text(size = 12),
    legend.title = element_blank(),
    legend.text = element_text(size = 12, face = "italic"),
    legend.position="bottom") 

#combine plots in panels
g2<-month_g/month_d +
  plot_annotation(tag_levels = 'a') &
  theme(plot.tag = element_text(face = 'bold'))

#save figure to a subfolder "output_figs"
ggsave(file = here::here("output_figs", "fig5.png"), g2, scale=2, device= "png", dpi = 500, width=5, height=6)
dev.set(dev.next())

rm(g2, month_d, month_g)
Show code
knitr::include_graphics(here::here("output_figs", "fig5.png"))

Figure 5. Monthly counts of occurrence records for each Calanus species collected (A) between 0-200m, and (B) equal to or deeper than 400m. Note difference in scales between panels (A, B). Records are from cleaned OBIS database only.

11. Figure 6.

Show code
#create shallow summer only dataset, removing any records with depth innaccuracy
dat_nodup_shallow_sub_summer <- dat_nodup_shallow_sub %>% filter(depth_flag == 0)
dat_nodup_shallow_sub_summer <- dat_nodup_shallow_sub_summer %>% filter(month >= 4 & month <=7)

#plot summer shallow data
temp_g<-
  ggplot(dat_nodup_shallow_sub_summer, aes(x=temp, col=scientificname, fill = scientificname)) +
  geom_density(lwd = 1.2, alpha=0.3, bw= 1.3)+
  labs(x = "", y= "Density") +
  xlim(-5,28)+
  scale_fill_viridis(begin = 0, discrete = T, option = "rocket", direction = 1,limits = c("Calanus finmarchicus", "Calanus glacialis", "Calanus hyperboreus"), labels=c("C. finmarchicus", "C. glacialis", "C. hyperboreus"))+
  scale_color_viridis(begin = 0, discrete = T, option = "rocket", direction = 1,limits = c("Calanus finmarchicus", "Calanus glacialis", "Calanus hyperboreus"), labels=c("C. finmarchicus", "C. glacialis", "C. hyperboreus"))+
  theme(
    panel.background = element_blank(),
    panel.border = element_rect(colour = "grey", fill = NA),
    axis.line = element_line(colour = "grey"),
    axis.title.x = element_text(size = 14),
    axis.title.y = element_text(size = 14),
    axis.text.x = element_text(size = 12),
    axis.text.y = element_text(size = 12),
    legend.title = element_blank(),
    legend.text = element_text(size = 12, face = "italic"),
    legend.position="none") #bottom

#create deep winter only dataset
dat_nodup_deep_sub_winter <- dat_nodup_deep_sub %>% filter(depth_flag == 0)
dat_nodup_deep_sub_winter <- dat_nodup_deep_sub_winter %>% filter(month <= 3 | month >=8)

#plot deep winter data
temp_d<-
  ggplot(dat_nodup_deep_sub_winter, aes(x=temp, col=scientificname, fill = scientificname)) +
  geom_density(lwd = 1.2, alpha=0.3, bw= 0.5)+
  labs(x = "Average seasonal\n temperature (°C)", y= "Density") +
  xlim(-5,28)+
  scale_fill_viridis(begin = 0, discrete = T, option = "rocket", direction = 1,limits = c("Calanus finmarchicus", "Calanus glacialis", "Calanus hyperboreus"), labels=c("C. finmarchicus", "C. glacialis", "C. hyperboreus"))+
  scale_color_viridis(begin = 0, discrete = T, option = "rocket", direction = 1,limits = c("Calanus finmarchicus", "Calanus glacialis", "Calanus hyperboreus"), labels=c("C. finmarchicus", "C. glacialis", "C. hyperboreus"))+
  theme(
    panel.background = element_blank(),
    panel.border = element_rect(colour = "grey", fill = NA),
    axis.line = element_line(colour = "grey"),
    axis.title.x = element_text(size = 14),
    axis.title.y = element_text(size = 14),
    axis.text.x = element_text(size = 12),
    axis.text.y = element_text(size = 12),
    legend.title = element_blank(),
    legend.text = element_text(size = 12, face = "italic"),
    legend.position=c(.8,.80)) 

#create north of 66oN dataset
dat_withice_north <- filter(dat_withice, dat_withice$decimallatitude >66)
#tapply(datwithice$seaicecon, datwithice$scientificname, summary)
#summary(datwithice$seaicecon)

#plot north data
ice_allnorth<-ggplot(dat_withice_north, aes(x=seaicecon, col=scientificname, fill = scientificname)) +
  geom_histogram(colour = "grey60", binwidth = 10, position = "stack")+
  geom_vline(aes(xintercept=6.6),color="#03051a", linetype="dashed", size=1)+ ##03051a
  geom_vline(aes(xintercept=22),color="#cb1b4f", linetype="dashed", size=1)+ ##cb1b4f
  geom_vline(aes(xintercept=25),color="#e8d9d0", linetype="dashed", size=1)+ ##e1d4c7
  labs(x = "Average seasonal\n sea ice concentration (%)", y = "Number of records (latitude >66°N)") +
  scale_colour_manual(limits = c("Calanus finmarchicus", "Calanus glacialis", "Calanus hyperboreus"), labels=c("C. finmarchicus", "C. glacialis", "C. hyperboreus"),values = c("#03051a","#cb1b4f","#faebdd"))+
  scale_fill_manual(limits = c("Calanus finmarchicus", "Calanus glacialis", "Calanus hyperboreus"), labels=c("C. finmarchicus", "C. glacialis", "C. hyperboreus"),values = c("#03051a","#cb1b4f","#faebdd"))+
   theme(
    panel.background = element_blank(),
    panel.border = element_rect(colour = "grey", fill = NA),
    axis.line = element_line(colour = "grey"),
    axis.title.x = element_text(size = 14),
    axis.title.y = element_text(size = 14),
    axis.text.x = element_text(size = 12),
    axis.text.y = element_text(size = 12),
    legend.title = element_blank(),
    legend.text = element_text(size = 12, face = "italic"))
#ggplot_build(ice_allnorth)$data

#combine in mutltiple panels
p2<-(temp_g/temp_d | ice_allnorth) +
  plot_annotation(tag_levels = 'a') &
  theme(plot.tag = element_text(face = 'bold'))

#save figure to a subfolder "output_figs"
ggsave(file = here::here("output_figs","fig6.png"), p2, scale=2, device= "png", dpi = 500, width=7, height=3.5) 
dev.off()

rm(dat_nodup_shallow_sub_summer,ice_allnorth, temp_d,temp_g,p2,dat_withice_north)
gc()
Show code
knitr::include_graphics(here::here("output_figs", "fig6.png"))

Figure 6. The density of records associated to average seasonal temperatures for records collected (A) during the “shallow summer” period, defined by records collected between 0-200m in April-July only and (B) the “deep winter” period, defined by records collected at depths greater than or equal to 400m in August-March only. (C) The number of records (all three Calanus species combined) collected above 66° north associated with different average seasonal sea ice concentrations. Dashed vertical lines represent the mean sea ice concentration association for each species. Records are from cleaned OBIS database and between years 1955-2017 only. Records for which average collection depth was missing or greater than bathymetric depth were removed.

12. Figure 7.

Show code
bathy_d<-
  ggplot(dat_nodup_deep_sub_winter, aes(y=bathy, x=as.factor(scientificname), fill=scientificname)) +
  geom_boxplot() +
  labs(y = "Bathymetry depth (m)",x="") +
  scale_fill_viridis(begin = 0, discrete = T, option = "rocket", direction = 1,limits = c("Calanus finmarchicus", "Calanus glacialis", "Calanus hyperboreus"), labels=c("C. finmarchicus", "C. glacialis", "C. hyperboreus"))+
  scale_x_discrete(labels=c("Calanus finmarchicus" = "C.\n finmarchicus","Calanus glacialis" = "C.\n glacialis", "Calanus hyperboreus" = "C.\n hyperboreus"))+
  scale_y_continuous(limits= c(-4500,0))+
  theme(
    panel.background = element_blank(),
    panel.border = element_rect(colour = "grey", fill = NA),
    axis.line = element_line(colour = "grey"),
    axis.title.x = element_text(size = 14),
    axis.title.y = element_text(size = 14),
    axis.text.x = element_text(size = 12, face = "italic"),
    axis.text.y = element_text(size = 12),
    legend.title = element_blank(),
    legend.text = element_text(size = 12, face = "italic"),
    legend.position= "right") 

#ggplot_build(bathy_d)$data

avdepth_d<-
  ggplot(dat_nodup_deep_sub_winter, aes(y=av_depth, x=as.factor(scientificname), fill=scientificname)) +
  geom_boxplot() +
  labs(y = "Average collection depth (m)", x="") +
  scale_fill_viridis(begin = 0, discrete = T, option = "rocket", direction = 1,limits = c("Calanus finmarchicus", "Calanus glacialis", "Calanus hyperboreus"), labels=c("C. finmarchicus", "C. glacialis", "C. hyperboreus"))+
  scale_x_discrete(labels=c("Calanus finmarchicus" = "C.\n finmarchicus","Calanus glacialis" = "C.\n glacialis", "Calanus hyperboreus" = "C.\n hyperboreus"))+
  scale_y_continuous(limits= c(-4500,0))+
  theme(
    panel.background = element_blank(),
    panel.border = element_rect(colour = "grey", fill = NA),
    axis.line = element_line(colour = "grey"),
    axis.title.x = element_text(size = 14),
    axis.title.y = element_text(size = 14),
    axis.text.x = element_text(size = 12, face = "italic"),
    axis.text.y = element_text(size = 12),
    legend.title = element_blank(),
    legend.text = element_text(size = 12, face = "italic"),
    legend.position= "right") 

#ggplot_build(avdepth_d)$data

p2<-(bathy_d | avdepth_d) +
  plot_layout(guides = 'collect') +
  plot_annotation(tag_levels = 'a') &
  theme(plot.tag = element_text(face = 'bold'))

#save figure to a subfolder "output_figs"
ggsave(file = here::here("output_figs","fig7.png"), p2, scale=2, device= "png", dpi = 500, width=6, height=3)
dev.off()

rm(p2, bathy_d, avdepth_d)
Show code
knitr::include_graphics(here::here("output_figs", "fig7.png"))

Figure 7. Boxplots of (A) bottom depth and (B) average collection depth associated with Calanus records (all three species combined) during the “Deep overwinter” period, defined as records collected at depths greater than or equal to 400m in August-March only. Boxplots show minimum, first quartile, median, third quartile, and maximum; outliers as dots. Records are from the cleaned OBIS database only and records for which average collection depth was missing or greater than bathymetric depth were removed.

13. Statistical analyses

First we look at the summary statistics for the bathymetric depth of sampling during deep winter for all three species

Show code
#get summary stats
tapply(dat_nodup_deep_sub_winter$bathy, dat_nodup_deep_sub_winter$scientificname, summary)
$`Calanus finmarchicus`
   Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
-4266.6 -2730.4 -2557.2 -2350.9 -2019.8  -637.7 

$`Calanus glacialis`
   Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
-4266.6 -3475.2 -2666.6 -2575.2 -1740.4  -921.3 

$`Calanus hyperboreus`
   Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
-4266.6 -2789.0 -2557.2 -2595.7 -2237.9  -921.3 

We can use a Levene’s test for homogeneity of variance and a Shapiro-Wilk test of normality to check what statistical test would be appropriate

Show code
#tests
leveneTest(bathy ~ scientificname, data = dat_nodup_deep_sub_winter)
Levene's Test for Homogeneity of Variance (center = median)
       Df F value    Pr(>F)    
group   2  15.689 2.086e-07 ***
      782                      
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Show code
shapiro.test(dat_nodup_deep_sub_winter$bathy)

    Shapiro-Wilk normality test

data:  dat_nodup_deep_sub_winter$bathy
W = 0.97288, p-value = 6.773e-11

Significant results indicates that a non-parametric test should used to test for differences between species and bathymetric depth.

Show code
#non-parametric test
kruskal.test(bathy ~ scientificname, data = dat_nodup_deep_sub_winter)

    Kruskal-Wallis rank sum test

data:  bathy by scientificname
Kruskal-Wallis chi-squared = 7.0532, df = 2, p-value = 0.0294

A post-hoc test of pairwise comparisons is also applied

Show code
#post-hoc test of pairwise comparisons
pairwise.wilcox.test(dat_nodup_deep_sub_winter$bathy, as.factor(dat_nodup_deep_sub_winter$scientificname))

    Pairwise comparisons using Wilcoxon rank sum test with continuity correction 

data:  dat_nodup_deep_sub_winter$bathy and as.factor(dat_nodup_deep_sub_winter$scientificname) 

                    Calanus finmarchicus Calanus glacialis
Calanus glacialis   0.28                 -                
Calanus hyperboreus 0.05                 0.84             

P value adjustment method: holm 

Second, we look at the summary statistics for the collection depth of sampling during deep winter for all three species

Show code
#get summary stats
tapply(dat_nodup_deep_sub_winter$av_depth,dat_nodup_deep_sub_winter$scientificname, summary)
$`Calanus finmarchicus`
   Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
-3950.0  -750.0  -481.5  -720.1  -442.8  -405.0 

$`Calanus glacialis`
   Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
  -2500   -1500    -755   -1282    -750    -700 

$`Calanus hyperboreus`
   Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
-3950.0 -1744.4 -1407.2 -1400.4  -945.5  -480.0 

We can use a Levene’s test of homogeneity of variance and a Shapiro-Wilk test of normality to check what statistical test would be appropriate

Show code
#tests
leveneTest(av_depth ~ scientificname, data = dat_nodup_deep_sub_winter)
Levene's Test for Homogeneity of Variance (center = median)
       Df F value    Pr(>F)    
group   2  14.734 5.234e-07 ***
      782                      
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Show code
shapiro.test(dat_nodup_deep_sub_winter$av_depth)

    Shapiro-Wilk normality test

data:  dat_nodup_deep_sub_winter$av_depth
W = 0.80528, p-value < 2.2e-16

Significant results indicates that a non-parametric test should used to test for differences between species and bathymetric depth.

Show code
#non-parametric test
kruskal.test(av_depth ~ scientificname, data = dat_nodup_deep_sub_winter)

    Kruskal-Wallis rank sum test

data:  av_depth by scientificname
Kruskal-Wallis chi-squared = 254.94, df = 2, p-value < 2.2e-16

A post-hoc test of pairwise comparisons is also applied

Show code
#post-hoc test of pairwise comparisons
pairwise.wilcox.test(dat_nodup_deep_sub_winter$av_depth, as.factor(dat_nodup_deep_sub_winter$scientificname))

    Pairwise comparisons using Wilcoxon rank sum test with continuity correction 

data:  dat_nodup_deep_sub_winter$av_depth and as.factor(dat_nodup_deep_sub_winter$scientificname) 

                    Calanus finmarchicus Calanus glacialis
Calanus glacialis   2.1e-13              -                
Calanus hyperboreus < 2e-16              0.071            

P value adjustment method: holm 

14. Session info

Show code
sessionInfo()
R version 4.2.1 (2022-06-23 ucrt)
Platform: x86_64-w64-mingw32/x64 (64-bit)
Running under: Windows 10 x64 (build 19043)

Matrix products: default

locale:
[1] LC_COLLATE=English_United Kingdom.utf8 
[2] LC_CTYPE=English_United Kingdom.utf8   
[3] LC_MONETARY=English_United Kingdom.utf8
[4] LC_NUMERIC=C                           
[5] LC_TIME=English_United Kingdom.utf8    

attached base packages:
[1] stats     graphics  grDevices utils     datasets  methods   base     

other attached packages:
 [1] car_3.1-1                carData_3.0-5            spatialEco_2.0-0        
 [4] viridis_0.6.2            viridisLite_0.4.0        raster_3.5-29           
 [7] sp_1.5-0                 rnaturalearth_0.1.0      rnaturalearthhires_0.2.0
[10] data.table_1.14.2        patchwork_1.1.2          tidync_0.2.4            
[13] sf_1.0-8                 egg_0.4.5                gridExtra_2.3           
[16] ggspatial_1.1.6          forcats_0.5.2            purrr_1.0.1             
[19] readr_2.1.2              tibble_3.1.8             ggplot2_3.4.0           
[22] tidyverse_1.3.2          tidyr_1.3.0              stringr_1.5.0           
[25] dplyr_1.1.0              plyr_1.8.7               here_1.0.1              

loaded via a namespace (and not attached):
 [1] fs_1.5.2                lubridate_1.8.0         httr_1.4.4             
 [4] rprojroot_2.0.3         tools_4.2.1             backports_1.4.1        
 [7] rgdal_1.5-32            utf8_1.2.2              R6_2.5.1               
[10] KernSmooth_2.23-20      DBI_1.1.3               colorspace_2.0-3       
[13] withr_2.5.0             rnaturalearthdata_0.1.0 tidyselect_1.2.0       
[16] compiler_4.2.1          textshaping_0.3.6       cli_3.6.0              
[19] rvest_1.0.3             RNetCDF_2.6-1           xml2_1.3.3             
[22] labeling_0.4.2          scales_1.2.1            classInt_0.4-7         
[25] proxy_0.4-27            systemfonts_1.0.4       digest_0.6.29          
[28] rmarkdown_2.15          pkgconfig_2.0.3         htmltools_0.5.3        
[31] dbplyr_2.3.0            fastmap_1.1.0           htmlwidgets_1.5.4      
[34] rlang_1.0.6             readxl_1.4.1            rstudioapi_0.13        
[37] farver_2.1.1            generics_0.1.3          jsonlite_1.8.0         
[40] googlesheets4_1.0.1     magrittr_2.0.3          ncmeta_0.3.0           
[43] s2_1.1.0                Rcpp_1.0.9              munsell_0.5.0          
[46] fansi_1.0.3             abind_1.4-5             lifecycle_1.0.3        
[49] terra_1.6-7             stringi_1.7.8           yaml_2.3.5             
[52] grid_4.2.1              crayon_1.5.1            lattice_0.20-45        
[55] haven_2.5.0             hms_1.1.2               knitr_1.39             
[58] pillar_1.8.1            codetools_0.2-18        wk_0.6.0               
[61] reprex_2.0.2            glue_1.6.2              evaluate_0.16          
[64] modelr_0.1.9            png_0.1-7               vctrs_0.5.2            
[67] tzdb_0.3.0              cellranger_1.1.0        gtable_0.3.0           
[70] assertthat_0.2.1        xfun_0.32               broom_1.0.0            
[73] e1071_1.7-11            ragg_1.2.4              class_7.3-20           
[76] ncdf4_1.19              googledrive_2.0.0       gargle_1.2.0           
[79] units_0.8-0             ellipsis_0.3.2         
Show code
rm(list = ls())