--- title: "R script for **Lenoir et al.** published in a special issue on historical ecology in *Journal of Vegetation Science*" author: "Jonathan Lenoir" date: "22 octobre 2019" output: html_document --- ```{r setup, include=FALSE} knitr::opts_chunk$set(echo=TRUE) ``` ## Supporting information This is the R Markdown document to run the analyses of the paper **Lenoir, J.** et al. Historical continuity and spatial connectivity ensure hedgerows are effective corridors for forest plants: evidence from the species-time-area relationship. *Journal of Vegetation Science*. To access the paper, go to the [journal website](https://onlinelibrary.wiley.com/journal/16541103). ## List of packages used ```{r, message=FALSE, warning=FALSE} # library(knitr) library(kableExtra) library(sf) library(leaflet) library(mapview) library(viridis) library(Hmisc) library(corrplot) library(MuMIn) library(jtools) library(lme4) library(gridExtra) library(tidyverse) # ``` ## A template for figures Here is the code for the figure template used in the paper. ```{r, message=FALSE, warning=FALSE} # fig <- theme_classic() + theme(legend.position=c(1, 1), legend.direction="vertical", legend.title=element_text(face="bold"), legend.justification=c(1, 1), legend.title.align=0) + theme(axis.title.x=element_text(face="bold"), axis.title.y=element_text(face="bold"), plot.tag=element_text(face="bold"), plot.title=element_text(face="bold")) # ``` ## Import hedgerows' data The first file to import is entitled "data_carac_haie.txt". It contains information on each of the 99 sampled hedgerows, such as hedgerow age, length, width, etc. ```{r, message=FALSE, warning=FALSE} # carac <- read.table("data_carac_haie.txt", header=TRUE, sep="\t") carac <- as_tibble(carac) # ``` ```{r, results='asis', echo=FALSE, message=FALSE, warning=FALSE} # carac %>% kable(caption="**Table 1:** Characteristics of the 99 sampled hedgerows") %>% kable_styling(bootstrap_options=c("striped", "hover", "condensed", "responsive"), position="left", fixed_thead=TRUE) %>% scroll_box(width="80%", height="200px") # ``` ## Import the floristic data The second file to import is entitled "data_flo_haie.txt". It contains the floristic composition of each hedgerow and is displayed as a species-by-hedgerow matrix. Note that some hedgerows are divided into several and consecutive 5-m segments providing the species composition of each 5-m segment. ```{r, message=FALSE, warning=FALSE} # flo <- read.table("data_flo_haie_pa.txt", header=TRUE, sep="\t") flo <- as_tibble(flo) # ``` ```{r, results='asis', echo=FALSE, message=FALSE, warning=FALSE} # flo %>% kable(caption="**Table 2:** Species composition matrix of the 99 sampled hedgerows") %>% kable_styling(bootstrap_options=c("striped", "hover", "condensed", "responsive"), position="left", fixed_thead=TRUE) %>% scroll_box(width="80%", height="200px") # ``` ## Species list Using the floristic composition of each hedgerow, we compiled a list of species occurring across the 99 sampled hedgerows. For each species, we also computed its frequency across the 99 sampled hedgerows in both the herbaceous ($\le 1$-m height) and tree ($> 1$-m height) vegetation layers. ```{r, message=FALSE, warning=FALSE} # flo %>% select(-c(multi_bande, xl93_bande, yl93_bande, dist_bande, num_bande, largeur_bande, rich_bande)) %>% gather(key="id_taxon", value="pa", -c(id_bande, id_haie)) %>% filter(pa==1) %>% select(-c(id_bande)) %>% distinct() %>% group_by(id_taxon) %>% summarize(n_tot=n_distinct(id_haie)) %>% separate(id_taxon, into=c("veg_layer", "sp_name"), sep=2) %>% spread(key="veg_layer", value="n_tot") %>% rename(occ_tree=A_, occ_herb=H_) %>% replace_na(list(occ_tree=0, occ_herb=0)) %>% arrange(desc(occ_herb, occ_tree)) %>% mutate_if(is.character, str_replace_all, pattern="_\\.", replacement=" ") %>% mutate_if(is.character, str_remove_all, pattern=" .+") %>% mutate_if(is.character, str_replace_all, pattern="([a-z])\\.([a-z])", replacement="\\1-\\2") %>% mutate_if(is.character, str_replace_all, pattern="_", replacement=" ") -> species_list # ``` ```{r, results='asis', echo=FALSE, message=FALSE, warning=FALSE} # species_list %>% kable(caption="**Table 3:** List of forest plant species occurring in the 99 sampled hedgerows") %>% kable_styling(bootstrap_options=c("striped", "hover", "condensed", "responsive"), full_width=FALSE, position="left", fixed_thead=TRUE) %>% scroll_box(width="80%", height="200px") # ``` ## Where are the hedgerows? We built an interactive leaflet map to display the geographic location of each of the 99 hedgerows that we sampled across the Picardy region in northern France (Hauts-de-France). ```{r, message=FALSE, warning=FALSE} # crs_data <- rgdal::make_EPSG() # tibble(id=carac$id_haie, lon=carac$l93_x, lat=carac$l93_y, yr=carac$annee_med) %>% st_as_sf(coords=c("lon", "lat")) %>% st_set_crs(2154) -> hedgerow_coord_l93 # hedgerow_coord_l93 %>% st_transform(4326) -> hedgerow_coord_lonlat # ``` ```{r, message=FALSE, warning=FALSE} map_hedgerow <- leaflet() %>% addProviderTiles("Esri.WorldImagery") %>% addProviderTiles(providers$Stamen.TonerLines, options=providerTileOptions(opacity=0.35)) %>% addProviderTiles(providers$Stamen.TonerLabels) %>% addMarkers(data=hedgerow_coord_lonlat, clusterOptions=markerClusterOptions(), popup=~as.character(yr), label=~as.character(yr)) # map_hedgerow ``` **Figure 1:** *Distribution of the 99 hedgerows sampled across the study area in Picardy (France)* ## How many forest plants do we find in hedgerows? Using the species-by-hedgerow matrix, we computed species richness (cf. total number of forest plant species) for each hedgerow, distinguishing between the herbaceous ($\le 1$-m height) and tree ($> 1$-m height) vegetation layers. ```{r, message=FALSE, warning=FALSE} # flo %>% select(-c(multi_bande, xl93_bande, yl93_bande, dist_bande, num_bande, largeur_bande, rich_bande)) %>% gather(key="id_taxon", value="pa", -c(id_bande, id_haie)) %>% filter(pa==1) %>% separate(id_taxon, into=c("layer", "name"), sep=1) %>% group_by(id_haie) %>% summarize(n_tot=n_distinct(name)) -> rich_all # flo %>% select(-c(multi_bande, num_bande, largeur_bande, rich_bande)) %>% gather(key="id_taxon", value="pa", -c(id_bande, id_haie)) %>% filter(pa==1) %>% separate(id_taxon, into=c("layer", "name"), sep=1) %>% group_by(id_haie, layer) %>% summarize(rich=n_distinct(name)) %>% spread(key=layer, value=rich) %>% rename(n_tree=A, n_herb=H) -> rich_layer # carac %>% left_join(rich_all, by="id_haie") %>% left_join(rich_layer, by="id_haie") -> carac # carac %>% select(c(id_haie, n_tree, n_herb)) %>% rename(T=n_tree, H=n_herb) %>% gather(key=layer, value=n, -id_haie) %>% mutate(layer=factor(layer)) -> richness # ``` ```{r, message=FALSE, warning=FALSE} plot_r <- ggplot(data=richness, aes(x=n, fill=layer)) + geom_histogram(breaks=seq(2, 50, 2), alpha=0.5, col="white", position="identity") + scale_y_continuous(limits=c(0, 20), breaks=seq(0, 20, 5), labels=c("0.0", "5.0", "10.0", "15.0", "20.0")) + scale_fill_manual(values=c("green4", "purple4"), name="Vegetation layer", labels=c("Below 1-m height", "Above 1-m height")) + labs(x="Species richness", y="Number of hedgerows") + fig # plot_r ``` **Figure 2:** *Histogram of the total number of forest plant species occurring in both the herbaceous ($\le 1$-m height) and tree ($> 1$-m height) vegetation layers of a given hedgerow (n = 99)* ## What are the main determinants of the number of herbaceous forest plant species we can find in hedgerows? First of all, we computed the pairwise correlation matrix among all quantitative variables to detect variables that are too much correlated among each others and to assess which variables are positively and negatively correlated with species richness in the herbecous layer: our main response variable. ```{r, message=FALSE, warning=FALSE} # carac %>% select(c(id_haie, annee_med, longueur, largeur, h_moy, h_amp, dist_for, surf_for_500, ipa_adj, connection, n_tree, n_herb)) %>% rename(birth_yr=annee_med, tot_length=longueur, max_width=largeur, mean_height=h_moy, range_height=h_amp, for_dist=dist_for, for_cov=surf_for_500, adj_lui=ipa_adj, for_con=connection) %>% mutate(for_con=factor(for_con)) -> richness # richness %>% select(-c(id_haie, for_con)) %>% as.matrix() %>% rcorr(type="spearman") -> tab_cor # rval_cor <- as.data.frame(tab_cor$r) pval_cor <- as.data.frame(tab_cor$P) # ``` ```{r, message=FALSE, warning=FALSE} # corrplot(tab_cor$r, method="pie", type="upper", order="hclust", p.mat=tab_cor$P, sig.level=0.001/45, insig="label_sig") ``` **Figure 3:** *Correlation matrix of the variables used to analyse the main determinants of forest plant species richness in the herbaceous layer of hedgerows (n_herb). Potential predictor variables are: maximum width of the hedgerow (max_width); average height of the hedgerow (mean_height); height range of the focal hedgerow (range_height); cumulated amount of forest cover within a buffer area of 500 m around the hedgerow (for_cov); total number of tree and shrub species occurring in the hedgerow overstorey (n_tree); total length of the hedgerow (tot_length); distance to the closest woodland (for_dist); year of first appearance of a hedgerow in the historical archives as a proxy for hedgerow age (birth_yr); and adjacentland-use intensity (adj_lui). Asterisks shows significant (P $<$ 0.001) correlation values after accounting for multiple pairwise comparisons (10 × 9 / 2 = 45) using a Bonferroni correction (P / 45)* ### List of candidate models Here is a list of candidate models we tested to explain species richness in the herbaceous layer of hedgerows. We used generalised linear models (GLMs) with a Poisson family as the response variable is a count data (i.e. species richness in the herbaceous layer of hedgerows). ```{r, message=FALSE, warning=FALSE} # M_rich_1 <- glm(n_herb ~ for_con + birth_yr + tot_length + max_width + mean_height + for_cov + adj_lui + n_tree + birth_yr:tot_length, family=poisson, data=richness) # M_rich_2 <- glm(n_herb ~ for_con + birth_yr + tot_length + max_width + mean_height + for_cov + adj_lui + n_tree + birth_yr:max_width, family=poisson, data=richness) # M_rich_3 <- glm(n_herb ~ for_con + birth_yr + tot_length + max_width + mean_height + for_cov + adj_lui + n_tree + birth_yr:mean_height, family=poisson, data=richness) # M_rich_4 <- glm(n_herb ~ for_con + birth_yr + tot_length + max_width + mean_height + for_cov + adj_lui + n_tree + birth_yr:for_cov, family=poisson, data=richness) # M_rich_5 <- glm(n_herb ~ for_con + birth_yr + tot_length + max_width + mean_height + for_cov + adj_lui + n_tree + birth_yr:for_con, family=poisson, data=richness) # ``` ### Best model We computed the Aikaike information criteria (AIC) to select the best model with the lowest AIC value. ```{r, message=FALSE, warning=FALSE} # AICs <- c(AIC(M_rich_1), AIC(M_rich_2), AIC(M_rich_3), AIC(M_rich_4), AIC(M_rich_5)) # AICs <- as.data.frame(AICs) # names(AICs) <- c("aic_val") # AICs$mod_id <- c("M_rich_1", "M_rich_2", "M_rich_3", "M_rich_4", "M_rich_5") # AICs$delta_aic <- AICs$aic_val-min(AICs$aic_val) # ``` ```{r, results='asis', echo=FALSE, message=FALSE, warning=FALSE} # AICs %>% kable(caption="**Table 4:** List of candidate models and their Aikaike information criteria (AIC) values") %>% kable_styling(bootstrap_options=c("striped", "hover", "condensed", "responsive"), full_width=FALSE, position="left", fixed_thead=TRUE) %>% scroll_box(width="80%", height="200px") # ``` ### Outputs from the best model Once the best model was selected, we reran it with rescaled (using the gscale function from the jtools package) predictor variables to be able to compare coefficient estimates as effect sizes. ```{r, message=FALSE, warning=FALSE} # best_M_rich_scaled <- glm(n_herb ~ for_con + gscale(birth_yr) + gscale(tot_length) + gscale(max_width) + gscale(mean_height) + gscale(for_cov) + gscale(adj_lui) + gscale(n_tree) + gscale(birth_yr):gscale(tot_length), family=poisson, data=richness) # table_rich <- round(summary(best_M_rich_scaled)$coeff, digits=5) table_rich <- as.data.frame(table_rich) names(table_rich) <- c("Coefficient", "Standard error", "Z-value", "P-value") row.names(table_rich)[1] <- "Intercept" row.names(table_rich)[2] <- "Attached" row.names(table_rich)[3] <- "Year" row.names(table_rich)[4] <- "Length" row.names(table_rich)[5] <- "Width" row.names(table_rich)[6] <- "Height" row.names(table_rich)[7] <- "Forest cover" row.names(table_rich)[8] <- "Land-use" row.names(table_rich)[9] <- "N tree" row.names(table_rich)[10] <- "Year:Length" # ``` ```{r, results='asis', echo=FALSE, message=FALSE, warning=FALSE} # table_rich %>% kable(caption="**Table 5:** Best candidate model of the main determinants of the number of herbaceous forest species found in hedgerows. All continuous variables were z-transformed using the gscale function from the jtools package in R so that coefficient estimates can be interpreted as effect sizes. We used a generalized linear model with a logit link (family = Poisson). The intercept gives the effect size with all other continuous variables set to zero and the connectivity factor variable set to the non-connected level") %>% kable_styling(bootstrap_options=c("striped", "hover", "condensed", "responsive"), full_width=FALSE, position="left", fixed_thead=TRUE) %>% scroll_box(width="80%", height="200px") # ``` ### Species-time-area relationship (STAR) ```{r, message=FALSE, warning=FALSE} # best_M_rich <- M_rich_1 # ilink <- family(best_M_rich)$linkinv # pred <- as_tibble(expand.grid(birth_yr=seq(1720, 2020, 10), tot_length=seq(10, 500, 10), for_con=c(0, 1))) # pred %>% mutate(for_con=factor(for_con), adj_lui=rep(median(richness$adj_lui)), max_width=rep(median(richness$max_width)), mean_height=rep(median(richness$mean_height)), n_tree=rep(median(richness$n_tree)), for_cov=rep(median(richness$for_cov))) -> pred # pred %>% mutate(fit=predict(best_M_rich, pred, type="link", se.fit=TRUE)[[1]], se_fit=predict(best_M_rich, pred, type="link", se.fit=TRUE)[[2]], n_herb_fit=ilink(fit), ci_upper=ilink(fit+(2*se_fit)), ci_lower=ilink(fit-(2*se_fit))) -> pred # pred %>% filter(for_con==0) -> con_no # pred %>% filter(for_con==1) -> con_yes # plot_inte_a <- ggplot(con_no, aes(x=birth_yr, y=n_herb_fit)) + geom_line(aes(colour=tot_length, group=tot_length)) + scale_color_viridis(begin=1, end=0, name="Length\n(m)", breaks=seq(100, 500, 100), labels=c(" 100", " 200", " 300", " 400", " 500")) + scale_y_continuous(limits=c(0, 100), breaks=seq(0, 100, 20)) + labs(x="Year of hedgerow\nestablishment", y="Species richness", tag="(a)") + ggtitle("Not attached") + fig # plot_inte_b <- ggplot(con_yes, aes(x=birth_yr, y=n_herb_fit)) + geom_line(aes(colour=tot_length, group=tot_length)) + scale_color_viridis(begin=1, end=0, name="Length\n(m)") + scale_y_continuous(limits=c(0, 100), breaks=seq(0, 100, 20)) + labs(x="Year of hedgerow\nestablishment", y="Species richness", tag="(b)") + ggtitle("Attached to a woodland") + fig # plot_inte_c <- ggplot(con_no, aes(x=tot_length, y=n_herb_fit)) + geom_line(aes(colour=birth_yr, group=birth_yr)) + scale_color_viridis(option="magma", name="Year") + scale_y_continuous(limits=c(0, 100), breaks=seq(0, 100, 20)) + labs(x="Total length of\nhedgerow (m)", y="Species richness", tag="(c)") + ggtitle("Not attached") + fig # plot_inte_d <- ggplot(con_yes, aes(x=tot_length, y=n_herb_fit)) + geom_line(aes(colour=birth_yr, group=birth_yr)) + scale_color_viridis(option="magma", name="Year") + scale_y_continuous(limits=c(0, 100), breaks=seq(0, 100, 20)) + labs(x="Total length of\nhedgerow (m)", y="Species richness", tag="(d)") + ggtitle("Attached to a woodland") + fig # get_legend<-function(myggplot){ tmp <- ggplot_gtable(ggplot_build(myggplot)) leg <- which(sapply(tmp$grobs, function(x) x$name)=="guide-box") legend <- tmp$grobs[[leg]] return(legend) } # legend_ab <- get_legend(plot_inte_a) # legend_cd <- get_legend(plot_inte_c) # plot_inte_a <- plot_inte_a + theme(legend.position="none") # plot_inte_b <- plot_inte_b + theme(legend.position="none") # plot_inte_c <- plot_inte_c + theme(legend.position="none") # plot_inte_d <- plot_inte_d + theme(legend.position="none") # grid.arrange(plot_inte_a, plot_inte_b, legend_ab, plot_inte_c, plot_inte_d, legend_cd, ncol=3, nrow=2, widths=c(8, 8, 2), heights=c(8, 8)) ``` **Figure 4:** *Space-time interaction between the year of first appearance of a hedgerow in the historical archives (**a**, **b**), as a proxy for hedgerow age, and the total length of the hedgerow (**c**, **d**) on the total number of forest plant species occurring in the understorey of hedgerows not attached (**a**, **c**) or attached (**b**, **d**) to a woodland. Predictions are based on the best candidate model (see Table 5 for standardized coefficients). All other covariates not displayed on a given plot are set to their median values* ## How are specialist and generalist species distributed along hedgerows and away from woodlands? From the initial dataset, we selected only the 29 hedgerows with information on species presence-absence within each of the consecutive 5-m segments along the hedgerow. Similarly, we extracted only the species composition matrix of these 29 hedgerows from the initial species-by-hedgerow matrix. ```{r, message=FALSE, warning=FALSE} # carac %>% filter(multi_bande==1) %>% select(c(id_haie, annee_med, connection)) %>% rename(birth_yr=annee_med, for_con=connection) %>% mutate(for_con=factor(for_con)) -> carac_hedgerows_with_bands # flo %>% filter(multi_bande==1) %>% left_join(carac_hedgerows_with_bands, by="id_haie") %>% select(c(id_haie, id_bande, num_bande, dist_bande, birth_yr, for_con, starts_with("H_"))) %>% rename(dist_start=num_bande, dist_for=dist_bande) %>% mutate(dist_start=dist_start*5-2.5) %>% gather(starts_with("H_"), key="species_id", value="p_a") %>% mutate_if(is.character, str_replace_all, pattern="H_", replacement="") %>% mutate_if(is.character, str_replace_all, pattern="_\\.", replacement=" ") %>% mutate_if(is.character, str_remove_all, pattern=" .+") %>% mutate_if(is.character, str_replace_all, pattern="([a-z])\\.([a-z])", replacement="\\1-\\2") %>% mutate_if(is.character, str_replace_all, pattern="_", replacement=" ") %>% arrange(id_haie, id_bande, dist_for, species_id) -> flo_by_band # ``` For model calibration, we focused on the list of species (specialists and generalists) that are occurring more than 25 times across all 5-m segements that we surveyed. Here is the code to select those species. ```{r, message=FALSE, warning=FALSE} flo_by_band %>% filter(p_a==1) %>% group_by(species_id) %>% summarise(tot_occ=n()) %>% arrange(desc(tot_occ)) -> occ_by_sp # flo_by_band %>% left_join(occ_by_sp, by="species_id") -> flo_by_band # for_stat <- read.table("species_list.txt", header=TRUE, sep="\t") for_stat <- as_tibble(for_stat) # for_stat %>% select(-c(life_form, below_1m, above_1m)) %>% rename(species_id=sp_name) %>% mutate(species_id=as.character(species_id)) -> for_stat # flo_by_band %>% left_join(for_stat, by="species_id") -> flo_by_band # flo_by_band %>% filter(for_stat %in% c("specialist", "generalist")) %>% filter(tot_occ>25) -> flo_by_band # ``` ### Modelling species distribution Here is the model we ran to test whether species response curves along hedgerows and away from woodlands are differing between forest specialists and generalists. We used generalised linear mixed-effets models (GLMMs) with a Binomial family as the response variable is binary (i.e. species presence-absence). The list of fixed-effect variables is provided below. We used species name and the hedgerow id as two random intercept terms in our GLMM. ```{r, message=FALSE, warning=FALSE} # best_M_distrib <- glmer(p_a ~ dist_for + birth_yr + for_stat + for_con + birth_yr:for_stat + dist_for:for_stat + dist_for:for_con + (1|id_haie) + (1|species_id), family=binomial, data=flo_by_band) # best_M_distrib_scaled <- glmer(p_a ~ gscale(dist_for) + gscale(birth_yr) + for_stat + for_con + gscale(birth_yr):for_stat + gscale(dist_for):for_stat + gscale(dist_for):for_con + (1|id_haie) + (1|species_id), family=binomial, data=flo_by_band) # table_distrib <- round(summary(best_M_distrib_scaled)$coeff, digits=5) table_distrib <- as.data.frame(table_distrib) names(table_distrib) <- c("Coefficient", "Standard error", "Z-value", "P-value") row.names(table_distrib)[1] <- "Intercept" row.names(table_distrib)[2] <- "Distance" row.names(table_distrib)[3] <- "Year" row.names(table_distrib)[4] <- "Specialists" row.names(table_distrib)[5] <- "Attached" row.names(table_distrib)[6] <- "Year:Specialists" row.names(table_distrib)[7] <- "Distance:Specialists" row.names(table_distrib)[8] <- "Distance:Attached" # ``` ```{r, results='asis', echo=FALSE, message=FALSE, warning=FALSE} # table_distrib %>% kable(caption="**Table 6:** Best candidate model of the main determinants of the distribution of herbaceous forest species along hedgerows. All continuous variables were z-transformed using the gscale function from the jtools package in R so that coefficient estimates can be interpreted as effect sizes. We used a generalized linear mixed-effects model with a logit link (family = Binomial). The intercept gives the effect size with all other continuous variables set to zero and the forest status factor variable set to the generalists level and the connectivity factor variable set to the non-connected level") %>% kable_styling(bootstrap_options=c("striped", "hover", "condensed", "responsive"), full_width=FALSE, position="left", fixed_thead=TRUE) %>% scroll_box(width="80%", height="200px") # ``` ### Distribution profile of forest specialists and generalists along hedgerows Here is the code to predict the effect of distance to the closest woodland on the probability of occurrence of both forest specialists and generalists along a given hedgerow, either attached or not (isolated) to a woodland. ```{r message=FALSE, warning=FALSE} # ilink <- family(best_M_distrib)$linkinv # pred <- as_tibble(expand.grid(dist_for=seq(0, 1800, 1), birth_yr=seq(1500, 2020, 10), for_stat=c("generalist", "specialist"), for_con=c(0, 1))) # pred %>% mutate(for_stat=factor(for_stat)) %>% mutate(for_con=factor(for_con)) -> pred # pred %>% mutate(fit=predict(best_M_distrib, pred, re.form=NA, type="link", se.fit=TRUE)[[1]], se_fit=predict(best_M_distrib, pred, re.form=NA, type="link", se.fit=TRUE)[[2]], resp_fit=ilink(fit), ci_upper=ilink(fit+(2*se_fit)), ci_lower=ilink(fit-(2*se_fit))) -> pred # pred %>% filter(birth_yr==1700, for_stat=="specialist", for_con==1, dist_for<=170) -> pred_old_specialist_con # pred %>% filter(birth_yr==2000, for_stat=="specialist", for_con==1, dist_for<=170) -> pred_new_specialist_con # pred %>% filter(birth_yr==1700, for_stat=="generalist", for_con==1, dist_for<=170) -> pred_old_generalist_con # pred %>% filter(birth_yr==2000, for_stat=="generalist", for_con==1, dist_for<=170) -> pred_new_generalist_con # pred %>% filter(birth_yr==1700, for_stat=="specialist", for_con==0, dist_for<=1800) -> pred_old_specialist_discon # pred %>% filter(birth_yr==2000, for_stat=="specialist", for_con==0, dist_for<=1800) -> pred_new_specialist_discon # pred %>% filter(birth_yr==1700, for_stat=="generalist", for_con==0, dist_for<=1800) -> pred_old_generalist_discon # pred %>% filter(birth_yr==2000, for_stat=="generalist", for_con==0, dist_for<=1800) -> pred_new_generalist_discon # flo_by_band %>% filter(for_con==1, birth_yr<=median(.$birth_yr)) -> flo_by_band_old_con # flo_by_band %>% filter(for_con==1, birth_yr>median(.$birth_yr)) -> flo_by_band_new_con # flo_by_band %>% filter(for_con==0, birth_yr<=median(.$birth_yr)) -> flo_by_band_old_discon # flo_by_band %>% filter(for_con==0, birth_yr>median(.$birth_yr)) -> flo_by_band_new_discon # plot_dist_a <- ggplot(flo_by_band_old_con, aes(x=dist_for, y=p_a, colour=for_stat)) + geom_point(size=3, alpha=0.005) + geom_line(data=pred_old_specialist_con, aes(x=dist_for, y=resp_fit), colour="green4", linetype="solid", size=1) + geom_ribbon(data=pred_old_specialist_con, aes(x=dist_for, y=resp_fit, ymin=ci_lower, ymax=ci_upper), fill="green4", colour=NA, alpha=0.25) + geom_line(data=pred_old_generalist_con, aes(x=dist_for, y=resp_fit), linetype="solid", colour="purple4", size=1) + geom_ribbon(data=pred_old_generalist_con, aes(x=dist_for, y=resp_fit, ymin=ci_lower, ymax=ci_upper), fill="purple4", colour=NA, alpha=0.25) + scale_colour_manual(values=c("purple4", "green4"), name="Species\ntype", labels=c("Generalists", "Specialists")) + scale_fill_manual(values=c("purple4", "green4"), guide=FALSE) + guides(color=guide_legend(override.aes=list(size=8, shape=19, alpha=0.25))) + labs(x="Distance to woodland (m)", y="Probability of occurrence", tag="(a)") + ggtitle("Attached and ancient") + fig # plot_dist_b <- ggplot(flo_by_band_new_con, aes(x=dist_for, y=p_a, colour=for_stat)) + geom_point(size=3, alpha=0.005) + geom_line(data=pred_new_specialist_con, aes(x=dist_for, y=resp_fit), colour="green4", linetype="solid", size=1) + geom_ribbon(data=pred_new_specialist_con, aes(x=dist_for, y=resp_fit, ymin=ci_lower, ymax=ci_upper), fill="green4", colour=NA, alpha=0.25) + geom_line(data=pred_new_generalist_con, aes(x=dist_for, y=resp_fit), linetype="solid", colour="purple4", size=1) + geom_ribbon(data=pred_new_generalist_con, aes(x=dist_for, y=resp_fit, ymin=ci_lower, ymax=ci_upper), fill="purple4", colour=NA, alpha=0.25) + scale_colour_manual(values=c("purple4", "green4"), name="Forest herbs", labels=c("Generalists", "Specialists")) + scale_fill_manual(values=c("purple4", "green4"), guide=FALSE) + guides(color=guide_legend(override.aes=list(size=3, shape=19, alpha=0.25))) + labs(x="Distance to woodland (m)", y="Probability of occurrence", tag="(b)") + ggtitle("Attached and recent") + fig # plot_dist_c <- ggplot(flo_by_band_old_discon, aes(x=dist_for, y=p_a, colour=for_stat)) + geom_point(size=3, alpha=0.005) + geom_line(data=pred_old_generalist_discon, aes(x=dist_for, y=resp_fit), colour="purple4", linetype="solid", size=1) + geom_ribbon(data=pred_old_generalist_discon, aes(x=dist_for, y=resp_fit, ymin=ci_lower, ymax=ci_upper), fill="purple4", colour=NA, alpha=0.25) + geom_line(data=pred_old_specialist_discon, aes(x=dist_for, y=resp_fit), linetype="solid", colour="green4", size=1) + geom_ribbon(data=pred_old_specialist_discon, aes(x=dist_for, y=resp_fit, ymin=ci_lower, ymax=ci_upper), fill="green4", colour=NA, alpha=0.25) + scale_colour_manual(values=c("purple4", "green4"), name="Species affinity\nto closed forest", labels=c("Generalists", "Specialists")) + scale_fill_manual(values=c("purple4", "green4"), guide=FALSE) + guides(color=guide_legend(override.aes=list(size=8, shape=19, alpha=0.25))) + labs(x="Distance to woodland (m)", y="Probability of occurrence", tag="(c)") + ggtitle("Isolated and ancient") + fig # plot_dist_d <- ggplot(flo_by_band_new_discon, aes(x=dist_for, y=p_a, colour=for_stat)) + geom_point(size=3, alpha=0.005) + geom_line(data=pred_new_generalist_discon, aes(x=dist_for, y=resp_fit), colour="purple4", linetype="solid", size=1) + geom_ribbon(data=pred_new_generalist_discon, aes(x=dist_for, y=resp_fit, ymin=ci_lower, ymax=ci_upper), fill="purple4", colour=NA, alpha=0.25) + geom_line(data=pred_new_specialist_discon, aes(x=dist_for, y=resp_fit), linetype="solid", colour="green4", size=1) + geom_ribbon(data=pred_new_specialist_discon, aes(x=dist_for, y=resp_fit, ymin=ci_lower, ymax=ci_upper), fill="green4", colour=NA, alpha=0.25) + scale_colour_manual(values=c("purple4", "green4"), name="Species affinity\nto closed forest", labels=c("Generalists", "Specialists")) + scale_fill_manual(values=c("purple4", "green4"), guide=FALSE) + guides(color=guide_legend(override.aes=list(size=8, shape=19, alpha=0.25))) + labs(x="Distance to woodland (m)", y="Probability of occurrence", tag="(d)") + ggtitle("Isolated and recent") + fig # plot_dist_a <- plot_dist_a + theme(legend.position="none") # plot_dist_b <- plot_dist_b + theme(legend.position=c(1, 0.9)) # plot_dist_c <- plot_dist_c + theme(legend.position="none") # plot_dist_d <- plot_dist_d + theme(legend.position="none") # grid.arrange(plot_dist_a, plot_dist_b, plot_dist_c, plot_dist_d, nrow=2, ncol=2, widths=c(8, 8), heights=c(8, 8)) ``` **Figure 5:** *Effect of distance to woodland (m) on the distribution of both forest specialists and generalists along hedgerows that are either attached (**a**, **b**) or not (isolated) (**c**, **d**) to a woodland. For illustrative purposes, we set the year of first appearance of the hedgerow in the historical archives to 1700 (**a**, **c**) and 2000 (**b**, **d**) for ancient and recent hedgerows, respectively.* ### Distribution profile of two forest specialist species Here is the code to run species-specific GLMMs. ```{r message=FALSE, warning=FALSE} # flo_by_band %>% group_by(species_id) %>% nest() -> flo_by_band_nested # model_sp <- function(df) { glmer(p_a ~ dist_for + birth_yr + for_con + dist_for:for_con + (1|id_haie), family=binomial, data=df) } # ``` #### Predictions for *Stellaria holostea* ```{r message=FALSE, warning=FALSE} # sp_i <- flo_by_band_nested$data[[25]] # ilink <- family(model_sp(sp_i))$linkinv # pred <- as_tibble(expand.grid(dist_for=seq(0, 1800, 1), birth_yr=seq(1700, 2020, 10), for_con=c(0, 1))) # pred %>% mutate(for_con=factor(for_con)) -> pred # pred %>% mutate(fit=predict(model_sp(sp_i), pred, re.form=NA, type="link", se.fit=TRUE)[[1]], se_fit=predict(model_sp(sp_i), pred, re.form=NA, type="link", se.fit=TRUE)[[2]], resp_fit=ilink(fit), ci_upper=ilink(fit+(2*se_fit)), ci_lower=ilink(fit-(2*se_fit))) -> pred # pred %>% filter(birth_yr==1700, for_con==1, dist_for<=170) %>% mutate(hedge_age=ifelse(birth_yr<=median(sp_i$birth_yr), "Ancient", "Recent")) %>% mutate(hedge_age=factor(hedge_age)) -> old_con # pred %>% filter(birth_yr==2000, for_con==1, dist_for<=170) %>% mutate(hedge_age=ifelse(birth_yr<=median(sp_i$birth_yr), "Ancient", "Recent")) %>% mutate(hedge_age=factor(hedge_age))-> new_con # sp_i %>% filter(for_con==1) %>% mutate(hedge_age=ifelse(birth_yr<=median(.$birth_yr), "Ancient", "Recent")) %>% mutate(hedge_age=factor(hedge_age)) -> sp_i # plot_ste_hol <- ggplot(sp_i, aes(x=dist_for, y=p_a, colour=hedge_age, group=hedge_age)) + geom_point(size=3, alpha=0.25) + geom_line(data=old_con, aes(x=dist_for, y=resp_fit), colour="green4", size=0.5) + geom_ribbon(data=old_con, aes(x=dist_for, y=resp_fit, ymin=ci_lower, ymax=ci_upper), fill="green4", colour=NA, alpha=0.25) + geom_line(data=new_con, aes(x=dist_for, y=resp_fit), colour="purple4", size=0.5) + geom_ribbon(data=new_con, aes(x=dist_for, y=resp_fit, ymin=ci_lower, ymax=ci_upper), fill="purple4", colour=NA, alpha=0.25) + scale_colour_manual(values=c("purple4", "green4"), name="Hedgerow\nage", labels=c("Recent", "Ancient")) + scale_fill_manual(values=c("purple4", "green4"), guide=FALSE) + labs(x="Distance to the \nattached woodland (m)", y="Stellaria holostea", tag="(b)") + fig # ``` #### Predictions for *Viola reichenbachiana* ```{r message=FALSE, warning=FALSE} # sp_i <- flo_by_band_nested$data[[30]] # ilink <- family(model_sp(sp_i))$linkinv # pred <- as_tibble(expand.grid(dist_for=seq(0, 1800, 1), birth_yr=seq(1700, 2020, 10), for_con=c(0, 1))) # pred %>% mutate(for_con=factor(for_con)) -> pred # pred %>% mutate(fit=predict(model_sp(sp_i), pred, re.form=NA, type="link", se.fit=TRUE)[[1]], se_fit=predict(model_sp(sp_i), pred, re.form=NA, type="link", se.fit=TRUE)[[2]], resp_fit=ilink(fit), ci_upper=ilink(fit+(2*se_fit)), ci_lower=ilink(fit-(2*se_fit))) -> pred # pred %>% filter(birth_yr==1700, for_con==1, dist_for<=170) %>% mutate(hedge_age=ifelse(birth_yr<=median(sp_i$birth_yr), "Ancient", "Recent")) %>% mutate(hedge_age=factor(hedge_age))-> old_con # pred %>% filter(birth_yr==2000, for_con==1, dist_for<=170) %>% mutate(hedge_age=ifelse(birth_yr<=median(sp_i$birth_yr), "Ancient", "Recent")) %>% mutate(hedge_age=factor(hedge_age))-> new_con # sp_i %>% filter(for_con==1) %>% mutate(hedge_age=ifelse(birth_yr<=median(.$birth_yr), "Ancient", "Recent")) %>% mutate(hedge_age=factor(hedge_age)) -> sp_i # plot_vio_rei <- ggplot(sp_i, aes(x=dist_for, y=p_a, colour=hedge_age, group=hedge_age)) + geom_point(size=3, alpha=0.25) + geom_line(data=old_con, aes(x=dist_for, y=resp_fit), colour="green4", size=0.5) + geom_ribbon(data=old_con, aes(x=dist_for, y=resp_fit, ymin=ci_lower, ymax=ci_upper), fill="green4", colour=NA, alpha=0.25) + geom_line(data=new_con, aes(x=dist_for, y=resp_fit), colour="purple4", size=0.5) + geom_ribbon(data=new_con, aes(x=dist_for, y=resp_fit, ymin=ci_lower, ymax=ci_upper), fill="purple4", colour=NA, alpha=0.25) + scale_colour_manual(values=c("purple4", "green4"), name="Hedgerow\nage", labels=c("Recent", "Ancient")) + scale_fill_manual(values=c("purple4", "green4"), guide=FALSE) + labs(x="Distance to the \nattached woodland (m)", y="Viola reichenbachiana", tag="(a)") + fig # ``` #### Plotting both species on the same figure ```{r message=FALSE, warning=FALSE} # plot_ste_hol <- plot_ste_hol + theme(legend.position="none", axis.title.y=element_text(face="bold.italic")) # plot_vio_rei <- plot_vio_rei + theme(axis.title.y=element_text(face="bold.italic")) # grid.arrange(plot_vio_rei, plot_ste_hol, nrow=1, ncol=2, widths=c(8, 8), heights=c(8)) ``` **Figure 6:** *Effect of distance to woodland (m) on the probability of occurrence of (**a**) Viola reichenbachiana and (**b**) Stellaria holostea along both recent (year of first appearance of the hedgerow in the historical archives set to 2000) and ancient (year of first appearance of the hedgerow in the historical archives set to 1700) hedgerows that are attached to a woodland*