This script is used to create the figures for the paper “Cultural evolution beyond adoption dynamics: the case of (dis) continuation of modern contraception” by Alexandra Alvergne and Rose Stevens (2020).
It aims to explore the ecological correlations between total fertility rate (TFR) at the national level for several LMICs for which data is available and different measures of contraceptive dynamics relevant to this paper:
% ever used modern methods (as a proxy for those that have ever adopted contraception)
12 month discontinuation rates due to all reasons for all modern methods (as a proxy for discontinuation levels in the case of both unmet need and no unmet need (e.g. desire to have another child)
12 month discontinuation rates due to side-effects for all modern methods (as a proxy for discontinuation levels in the case of unmet need driven in part by either experience or fear of side-effects
For each correlation, r values are given along with asterisks showing p value levels. r values indicate the results of Pearson’s product-moment correlation tests and p values are represented with *** = p<0.001, ** p<0.01, * p<0.05.
It also will create a figure with multiple boxplots showing the variation in different reasons for 12 month discontinuation rates across countries.
These figures will use data from all countries for which DHS data is available for all of the above variables for all women. Data only from the most recent survey will be used to avoid non-independence of data points (N = 31). All data used in this analysis are aggregate results from DHS surveys, representative at the country level, and can be accessed through https://www.statcompiler.com/en/. Data were downloaded on 3rd May 2020.
Caution should be used when interpreting these results, as they are exploratory, susceptible to ecological fallacy, and only suggest a potential further relationship to be investigated.
library(ggplot2)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(tidyverse)
## -- Attaching packages ------------------------------------------------------------------------------- tidyverse 1.3.0 --
## v tibble 3.0.1 v purrr 0.3.4
## v tidyr 1.0.3 v stringr 1.4.0
## v readr 1.3.1 v forcats 0.5.0
## -- Conflicts ---------------------------------------------------------------------------------- tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
library(tidyr)
library(ggpubr)
library(ggrepel)
library(ggthemes)
library(scales)
##
## Attaching package: 'scales'
## The following object is masked from 'package:purrr':
##
## discard
## The following object is masked from 'package:readr':
##
## col_factor
dhs_long<-read.csv(file.choose(), header=T)
#Removing any white space around the data
dhs<-dhs_long[1:31,1:18]
Plot the correlation between TFR and ever use of modern methods among all women.
#Correlation test - to determine r value and p value of correlation
cor.test(dhs$ever_use, dhs$tfr)
##
## Pearson's product-moment correlation
##
## data: dhs$ever_use and dhs$tfr
## t = -3.0809, df = 29, p-value = 0.004489
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## -0.7236014 -0.1726209
## sample estimates:
## cor
## -0.4965865
#Create plot
plot1<-ggplot(dhs, aes(x=ever_use, y=tfr)) +
labs(x ="Ever use of modern methods (%)", y = "Total fertility rate")+
geom_point(size=3,color="black") + geom_smooth(method = "lm", se = TRUE, color="black") +
geom_text(x = 25, y = 3,label = "r = -0.497**", size=5) +
annotate(geom = 'text', label = '(a)', x = -Inf, y = Inf, hjust = -0.2, vjust = 1, size=5)+theme_classic()
plot1+ theme(axis.title.x = element_text(family = "sans",size=15)) + theme(axis.title.y = element_text(family = "sans",size=15))
## `geom_smooth()` using formula 'y ~ x'
#If you want to add the country labels to the plot add below
# + geom_label_repel(size=3, aes(label=dhs$country),
# box.padding = 0.35,
# point.padding = 0.5,
# segment.color = 'grey50')
Violin plots for 12 month discontinuation rates for modern methods for all reasons, showing the median and interquartile rate. Whiskers show full range.
# dataset for violin plot
values<-c(dhs$discont12_all, dhs$discont12_desire_preg, dhs$discont12_other_fert, dhs$discont12_failure, dhs$discont12_se, dhs$discont12_more_effective, dhs$discont12_other_meth, dhs$discont12_other)
reasons<-c(rep("All reasons", length(dhs$discont12_all)), rep("Desire to become pregnant", length(dhs$discont12_all)),rep("Other fertility-related reasons", length(dhs$discont12_all)),rep("Failure", length(dhs$discont12_all)),rep("Side-effects", length(dhs$discont12_all)),rep("Wants more effective method", length(dhs$discont12_all)),rep("Other method-related reasons", length(dhs$discont12_all)),rep("Other reasons", length(dhs$discont12_all)))
df<-as.data.frame(cbind(values,reasons))
df$values<-as.numeric(df$values)
# ggplot
plot2 <- ggplot(df, aes(x = reasons, y = values))
plot2 + geom_violin(aes(fill = reasons), show.legend = TRUE, trim = FALSE) +
# geom_boxplot(width = 0.2)+
# scale_fill_manual(values = c("#00AFBB", "#E7B800", "#FC4E07","#00AFBB", "#E7B800", "#FC4E07", "#E7B800", "#FC4E07"))+
labs(x ="Reason for discontinuation", y = "12 month discontinuation rate (%)")+
annotate(geom = 'text', label = '(b)', x = -Inf, y = Inf, hjust = -0.2, vjust = 1, size=5)+
theme(legend.position = "none")+
theme_classic() + theme(axis.title.x = element_text(family = "sans",size=15)) +
# theme(axis.text.x=element_text(angle=35, hjust=1, size=11,family="sans"))+
theme(axis.title.y = element_text(family = "sans",size=15), axis.text.x=element_blank(),
axis.ticks.x=element_blank())
Plot the correlation between TFR and 12 month discontinuation rate of modern methods due to all reasons
#Correlation test - to determine r value and p value of correlation
cor.test(dhs$discont12_all, dhs$tfr)
##
## Pearson's product-moment correlation
##
## data: dhs$discont12_all and dhs$tfr
## t = 1.5006, df = 29, p-value = 0.1443
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## -0.09493609 0.56868280
## sample estimates:
## cor
## 0.2684341
#Create plot
plot3<-ggplot(dhs, aes(x=discont12_all, y=tfr)) +
labs(x ="12 month discontinuation rate: all reasons (%)", y = "Total fertility rate")+
geom_point(size=3,color="red") + geom_smooth(method = "lm", se = TRUE, color="black") +
geom_text(x = 25, y = 5, label = "r = 0.268", size=5)+
annotate(geom = 'text', label = '(c)', x = -Inf, y = Inf, hjust = -0.2, vjust = 1, size=5)+theme_classic()
plot3+ theme(axis.title.x = element_text(family = "sans",size=15)) + theme(axis.title.y = element_text(family = "sans",size=15))
## `geom_smooth()` using formula 'y ~ x'
#If you want to add the country labels to the plot add below
#+ geom_label_repel(size=3, aes(label=dhs$country),
# box.padding = 0.35,
# point.padding = 0.5,
# segment.color = 'grey50')
Plot the correlation between TFR and 12 month discontinuation rate of modern methods due to side-effects
#Correlation test - to determine r value and p value of correlation
cor.test(dhs$discont12_se, dhs$tfr)
##
## Pearson's product-moment correlation
##
## data: dhs$discont12_se and dhs$tfr
## t = 3.8055, df = 29, p-value = 0.0006767
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## 0.2800270 0.7733093
## sample estimates:
## cor
## 0.5771041
#Create plot
plot4<-ggplot(dhs, aes(x=discont12_se, y=tfr)) +
labs(x ="12 month discontinuation rate: side-effects (%)", y = "Total fertility rate")+
geom_point(size=3,color="purple") + geom_smooth(method = "lm", se = TRUE, color="black") +
geom_text(x = 5, y = 5.5,label = "r = 0.577***", size=5)+
annotate(geom = 'text', label = '(d)', x = -Inf, y = Inf, hjust = -0.2, vjust = 1, size=5)+theme_classic()
plot4+ theme(axis.title.x = element_text(family = "sans",size=15)) + theme(axis.title.y = element_text(family = "sans",size=15))
## `geom_smooth()` using formula 'y ~ x'
#If you want to add the country labels to the plot add below
# + geom_label_repel(size=3, aes(label=dhs$country),
# box.padding = 0.35,
# point.padding = 0.5,
# segment.color = 'grey50') +
# theme_classic()