OSD Chlorophyta - Sci Rep 2018
OSD Chlorophyta - Sci Rep 2018
1 Aim
- Analysis of OSD Chlorophyta data for Sci Report paper 2018
1.1 Notes
2 Initialize.
This file defines all the necessary libraries and variables
source('OSD Chlorophyta_init.R', echo=FALSE)
3 BLAST of Chlorophyta OTUs
3.1 Run BLAST on server
Using a qsub run the following code
# submitted with
# qsub -q short.q qsub_blast_osd.sh
DIR_PROJECT="/projet/umr7144/dipo/vaulot/metabarcodes/OSD/chlorophyta_margot_2018/"
cd $DIR_PROJECT
FILE=OSD_LGC_Chlorophyta
FASTA=$DIR_PROJECT$FILE".fasta"
BLAST_XLM=$DIR_PROJECT$FILE".xml"
BLAST_TSV=$DIR_PROJECT$FILE".tsv"
OUT_FMT="6 qseqid sseqid sacc stitle sscinames staxids sskingdoms sblastnames pident slen length mismatch gapopen qstart qend sstart send evalue bitscore"
blastn -max_target_seqs 100 -evalue 1.00e-10 -query $FASTA -out $BLAST_TSV -db /db/blast/all/nt -outfmt "$OUT_FMT"
3.2 Process BLAST output
We call the blast_18S function to create two files
- a modified BLAST output file which includes additional coolumns
- kingdom -> species : PR2 taxonomy for those accession numbers that are present in PR2
- hit_rank : the rank of the hit based on decreasing % identity and decreasing bit scores
- uncultured : TRUE if the hit corresponds to an uncultured item
- hit_lineage : GenBank taxonomy of the hit
- a summary file with one line per query that contains three sets of columns
- The top hit (column with prefix hit_top_)
- The top hit for which a PR2 sequence is available (columns starting with hit_pr2_).
- The top hit corresponding to a culture or an isolate (columns starting with hit_cult_).
blast_18S_reformat("../blast/OSD_LGC_Chlorophyta.tsv")
4 Read data
4.1 Read excel file
otu <- read_excel(path = "../supplementary data/Supplementary_dataS2 version 5.0.xlsx",
sheet = "otus")
samples <- read_excel(path = "../supplementary data/Supplementary_dataS2 version 5.0.xlsx",
sheet = "samples")
metadata <- read_excel(path = "../supplementary data/Supplementary_dataS2 version 5.0.xlsx",
sheet = "metadata")
4.2 Merge otu table with BLAST file
blast_summary_file <- "../blast/OSD_LGC_Chlorophyta.blast.summary.tsv"
blast_summary <- read_tsv(blast_summary_file, guess_max = 10000) %>% mutate(OTUNumber = str_sub(query_id,
start = 1, end = 8))
otu <- left_join(otu, blast_summary)
5 Summarize at class level
5.1 Compute different dataframes
samples_surface <- samples %>% filter(Surface_sample == 1)
sample_number <- nrow(samples_surface)
glue::glue("Number of surface samples: {sample_number}")
Number of surface samples: 141
otu_long <- otu %>% select(OTUNumber, Class:Species, contains("OSD")) %>% gather(station,
n_seq, contains("OSD")) %>% filter(station %in% samples_surface$Sample)
otu_seq <- otu_long %>% group_by(OTUNumber) %>% summarise(n_seq_otu = sum(n_seq)) %>%
filter(n_seq_otu > 0)
glue::glue("Number of OTUs for surface samples: {nrow(otu_seq)}")
Number of OTUs for surface samples: 749
glue::glue("Number of reads for surface samples: {sum(otu_seq$n_seq_otu)}")
Number of reads for surface samples: 313240
class_seq <- otu_long %>% group_by(Class) %>% summarise(n_seq_class = sum(n_seq))
# write_tsv(class_seq, 'OSD Chlorophyta sequences.tsv')
station_seq <- otu_long %>% group_by(station) %>% summarise(n_seq_station = sum(n_seq))
sample_number_above100 <- nrow(filter(station_seq, n_seq_station >= 100))
glue::glue("Number of surface samples with more than 100 Chlorophyta reads: {sample_number_above100}")
Number of surface samples with more than 100 Chlorophyta reads: 122
otu_long <- otu_long %>% left_join(otu_seq) %>% left_join(class_seq) %>% left_join(station_seq)
class_stations <- otu_long %>% group_by(Class, station, n_seq_station) %>% summarise(n_seq = sum(n_seq)) %>%
mutate(pct_seq = n_seq/n_seq_station * 100)
class_stations_metadata <- class_stations %>% mutate(OSD_station = as.numeric(str_extract(station,
"[0-9]{1,3}"))) %>% left_join(metadata) %>% mutate(Latitude = as.numeric(Latitude),
Longitude = as.numeric(Longitude))
# Summaries at class level
class_pct <- class_stations %>% group_by(Class) %>% summarise(mean_pct_seq_class = mean(pct_seq))
class_presence <- class_stations %>% filter(n_seq_station >= 100) %>% filter(n_seq >
0) %>% group_by(Class) %>% summarise(n_stations_present = n(), pct_stations_present = 100 *
n()/sample_number_above100)
class_more_1pct <- class_stations %>% filter(n_seq_station >= 100) %>% filter(pct_seq >
1) %>% group_by(Class) %>% summarise(n_stations_more_1pct = n(), pct_stations_more_1pct = 100 *
n()/sample_number_above100)
class_summary <- class_seq %>% left_join(class_pct) %>% left_join(class_presence) %>%
left_join(class_more_1pct)
5.2 Graphics at class levels
Histograms and treemaps
theme_chlorophyta_map <- theme(legend.position = c(0.1, 0.2), legend.background = element_rect(fill = "transparent"),
legend.text = element_text(size = 7), legend.title = element_text(size = 9),
plot.tag.position = "topright", plot.tag = element_text(size = 24, face = "bold"),
plot.title = element_text(size = 14))
# Number of sequences per class
ggplot(class_summary, aes(x = reorder(Class, n_seq_class), y = n_seq_class)) +
geom_col() + coord_flip() + xlab("Class")
treemap_dv(class_summary, "Class", "n_seq_class", "OSD - number of sequences")
# Mean pct of sequences per class
treemap_dv(class_summary, "Class", "mean_pct_seq_class", "OSD - mean of contribution")
# Fig 5
graph_stations_present <- ggplot(class_summary, aes(x = reorder(Class, pct_stations_present),
y = pct_stations_present)) + geom_col() + geom_text(aes(label = n_stations_present),
hjust = -0.25) + coord_flip() + xlab("Class") + ylab("% of stations where class detected") +
ylim(0, 100) + theme_dviz_grid() + theme_chlorophyta_map + labs(title = "",
tag = "A")
graph_stations_present
graph_stations_more_1pct <- ggplot(class_summary, aes(x = reorder(Class, pct_stations_present),
y = pct_stations_more_1pct)) + geom_col() + geom_text(aes(label = n_stations_more_1pct),
hjust = -0.25) + coord_flip() + xlab("Class") + ylab("% of stations where class represesents more than 1% of Chlorophyta") +
ylim(0, 100) + theme_dviz_grid() + theme_chlorophyta_map + labs(title = "",
tag = "B")
graph_stations_more_1pct
grid_graphs <- gridExtra::grid.arrange(grobs = list(graph_stations_present,
graph_stations_more_1pct), ncol = 1, nrow = 2, clip = FALSE, padding = unit(0,
"line"))
ggsave(plot = grid_graphs, filename = "pdf/Fig 5 new Class and stations.pdf",
width = 15, height = 20, scale = 1.8, units = "cm", useDingbats = FALSE)
# Relation between stations present and stations more than 1 pct
ggplot(class_summary, aes(x = pct_stations_more_1pct, y = pct_stations_present,
label = Class)) + geom_point() + geom_text(size = 2, check_overlap = TRUE,
vjust = 1) + xlim(0, 100)
# Relation between sequence per class and stations present
ggplot(class_summary, aes(x = mean_pct_seq_class, y = pct_stations_present,
label = Class)) + geom_point() + geom_text(size = 2, check_overlap = TRUE,
vjust = 1)
6 Metadata
6.1 Statistics
metadata_summarized <- metadata %>% transmute(Temperature = as.numeric(Temperature),
Salinity = as.numeric(Salinity), Nitrates = as.numeric(Nitrates), Phosphates = as.numeric(Phosphates),
Chlorophylle_a = as.numeric(Chlorophylle_a)) %>% summarise_all(funs(min,
max, mean, median, sd), na.rm = TRUE)
knitr::kable(metadata_summarized)
Temperature_min | Salinity_min | Nitrates_min | Phosphates_min | Chlorophylle_a_min | Temperature_max | Salinity_max | Nitrates_max | Phosphates_max | Chlorophylle_a_max | Temperature_mean | Salinity_mean | Nitrates_mean | Phosphates_mean | Chlorophylle_a_mean | Temperature_median | Salinity_median | Nitrates_median | Phosphates_median | Chlorophylle_a_median | Temperature_sd | Salinity_sd | Nitrates_sd | Phosphates_sd | Chlorophylle_a_sd |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
-1.6 | 0.14 | 0 | 0 | 0.07 | 31 | 100 | 12 | 1.6 | 75 | 20 | 32 | 2.3 | 0.23 | 7.4 | 20 | 34 | 0.77 | 0.16 | 2.6 | 6.3 | 10 | 3.3 | 0.23 | 13 |
6.2 Graph
# metadata <- metadata %>% filter(Chlorophyta_reads > 100)
# Histogram of Chlorophyta %
ggplot(metadata, aes(x = Chlorophyta_pct_reads)) + geom_histogram(binwidth = 5,
center = 2.5) + ggtitle("Chllorophyta %")
# Chlorophyta % vs Chla
ggplot(metadata, aes(x = Chlorophylle_a, y = Chlorophyta_pct_reads)) + geom_point() +
scale_x_log10()
# Chlorophyta % vs Chlorophyta OTUs
ggplot(metadata, aes(x = Chlorophyta_OTUs, y = Chlorophyta_pct_reads)) + geom_point() +
geom_smooth(method = "lm", formula = y ~ x)
summary(lm(metadata$Chlorophyta_OTUs ~ metadata$Chlorophyta_pct_reads))
Call:
lm(formula = metadata$Chlorophyta_OTUs ~ metadata$Chlorophyta_pct_reads)
Residuals:
Min 1Q Median 3Q Max
-42.18 -12.70 -2.48 11.11 52.54
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 29.4453 2.4930 11.81 < 2e-16 ***
metadata$Chlorophyta_pct_reads 0.2955 0.0653 4.53 1.3e-05 ***
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Residual standard error: 19 on 139 degrees of freedom
Multiple R-squared: 0.128, Adjusted R-squared: 0.122
F-statistic: 20.5 on 1 and 139 DF, p-value: 1.28e-05
# Chlorophyta % vs Latitude
ggplot(metadata, aes(x = as.numeric(Latitude), y = Chlorophyta_pct_reads)) +
geom_point() + geom_smooth(method = lm, formula = y ~ splines::ns(x, df = 4),
se = FALSE) + xlim(-90, 90)
# Chlorophyta vs range of latitudes
ggplot(metadata, aes(x = as.numeric(Latitude), y = Chlorophyta_pct_reads)) +
geom_boxplot(aes(group = cut_width(as.numeric(Latitude), 10)))
# Chlorophyta % vs Temp
ggplot(metadata, aes(x = as.numeric(Temperature), y = Chlorophyta_pct_reads)) +
geom_point() + geom_smooth(method = "lm", formula = y ~ x)
# Chlorophyta % vs Nitrates
ggplot(metadata, aes(x = as.numeric(Nitrates), y = Chlorophyta_pct_reads)) +
geom_point() + geom_smooth(method = "lm", formula = y ~ x)
# Chlorophyta % vs Phosphates
ggplot(metadata, aes(x = as.numeric(Phosphates), y = Chlorophyta_pct_reads)) +
geom_point() + geom_smooth(method = "lm", formula = y ~ x)
7 Similarity of OTUs to culture sequences
7.1 Plot histograms of % identity
ggplot(otu, aes(x = hit_cult_pct_identity)) + geom_histogram(binwidth = 0.5,
boundary = 0) + xlim(100, 85) + ggtitle("Top hit culture")
ggplot(otu, aes(x = hit_top_pct_identity)) + geom_histogram(binwidth = 0.5,
boundary = 0) + xlim(100, 85) + ggtitle("Top hit GenBank")
7.2 Filters
Filter for top hit culture < 0.98 and OTUs > 10 reads compute number of OTUs uncultured Also compute mean % to cultured organism (if no hit to cultured, assume 80% similarity)
otu_10 <- otu %>% filter(Size > 10) %>% select(OTUNumber:Size, hit_cult_pct_identity,
hit_cult_hit_title)
otu_10$hit_cult_pct_identity <- otu_10$hit_cult_pct_identity %>% replace_na(80)
class_otu_10 <- otu_10 %>% group_by(Class) %>% summarize(n_otu_10 = n(), mean_hit_cult_pct_identity = mean(hit_cult_pct_identity,
na.rm = TRUE))
class_summary <- class_summary %>% left_join(class_otu_10)
otu_uncult <- otu_10 %>% filter(hit_cult_pct_identity < 98)
class_otu_uncult <- otu_uncult %>% group_by(Class) %>% summarize(n_otu_uncult = n())
class_summary <- class_summary %>% left_join(class_otu_uncult)
class_summary <- class_summary %>% mutate(pct_otu_uncult = 100 * n_otu_uncult/n_otu_10)
class_summary$pct_otu_uncult <- class_summary$pct_otu_uncult %>% replace_na(0)
7.3 Plot bargraphs
ggplot(class_summary, aes(x = reorder(Class, pct_otu_uncult), y = pct_otu_uncult)) +
geom_col() + geom_text(aes(label = n_otu_uncult), hjust = -0.7) + coord_flip() +
xlab("Class") + ylab("% of uncultured OTUs") + theme_dviz_grid()
ggsave("pdf/Fig S2 uncultured scale1.pdf", scale = 1, width = 22, height = 13,
units = "cm")
ggplot(class_summary, aes(x = reorder(Class, n_otu_uncult), y = n_otu_uncult)) +
geom_col() + coord_flip() + xlab("Class") + ylab("Number of uncultured OTUs") +
theme_dviz_grid()
ggplot(class_summary, aes(x = reorder(Class, pct_otu_uncult), y = mean_hit_cult_pct_identity)) +
geom_point() + coord_flip() + xlab("Class") + ylab("Mean percent identity to culture (more than 10 seq)") +
ylim(100, 80) + cowplot::theme_cowplot()
8 World Maps for each class
# Define constants for the map
size_factor = 1
size_points <- 2.5
size_cross <- 1
color_ice <- "lightslateblue"
color_water <- "red"
color_cultures <- "green"
color_not_present <- "blue"
color_morpho <- "brown"
classes_main_fig <- c("Mamiellophyceae", "Chlorodendrophyceae", "Chlorophyceae",
"Pyramimonadales", "Trebouxiophyceae", "Ulvophyceae")
classes_abundant <- c(classes_main_fig, "Chloropicophyceae", "Palmophyllophyceae",
"Prasinophytes clade IX")
map_array_main_fig <- list() # to store the maps to do tiled graph
map_array_supp_fig <- list()
# Map zoomed for EU
map_array_main_fig_eu <- list() # to store the maps to do tiled graph
map_array_supp_fig_eu <- list()
for (one_class in class_summary$Class) {
if (one_class %in% classes_abundant) {
class_limits = c(0, 100)
class_breaks = c(10, 25, 50, 75, 100)
} else {
class_limits = c(0, 20)
class_breaks = c(1, 2.5, 5, 10, 20)
}
class.absent <- class_stations_metadata %>% filter(n_seq_station >= 100 &
pct_seq < 1 & Class == one_class)
class.present <- class_stations_metadata %>% filter(n_seq_station >= 100 &
pct_seq >= 1 & Class == one_class)
one_class_map <- map_world() + geom_point(data = class.absent, aes(x = Longitude,
y = Latitude), color = color_not_present, size = size_cross, shape = 3) +
geom_point(data = class.present, aes(x = Longitude, y = Latitude, size = pct_seq,
color = pct_seq, alpha = pct_seq)) + theme(legend.position = c(0.1,
0.2), legend.background = element_rect(fill = "transparent"), legend.text = element_text(size = 7),
legend.title = element_text(size = 9)) + scale_size(name = "% of Chlorophyta",
range = c(0, 5), limits = class_limits, breaks = class_breaks) + scale_alpha_continuous(name = "% of Chlorophyta",
range = c(0.5, 0.9), limits = class_limits, breaks = class_breaks) +
viridis::scale_color_viridis(option = "magma", name = "% of Chlorophyta",
limits = class_limits, breaks = class_breaks) + guides(colour = guide_legend()) +
theme(plot.title = element_text(margin = margin(t = 10, b = 5))) + ggtitle(one_class)
# range gives maximum and minimum size of symbols, limits the extent of the
# scale replace guide = 'legend' by guide=FALSE to remove the legend....
# NOT USED : guides(color = guide_legend(override.aes = list(size=5))) +
one_class_map_eu <- one_class_map + coord_fixed(1.3, xlim = c(-40, 40),
ylim = c(30, 70)) + scale_x_continuous(breaks = (-4:4) * 10) + scale_y_continuous(breaks = (3:7) *
10)
print(one_class_map)
print(one_class_map_eu)
if (one_class %in% classes_main_fig) {
map_array_main_fig[[one_class]] <- one_class_map
} else {
map_array_supp_fig[[one_class]] <- one_class_map
}
}
grid_map_main_fig <- gridExtra::grid.arrange(grobs = map_array_main_fig, ncol = 2,
nrow = 3, clip = FALSE, padding = unit(0, "line"))
grid_map_supp_fig <- gridExtra::grid.arrange(grobs = map_array_supp_fig, ncol = 2,
nrow = 4, clip = FALSE, padding = unit(0, "line"))
# Save pdf
ggsave(plot = grid_map_main_fig, filename = "pdf/Fig 4 Map Classes main.pdf",
width = 20, height = 20, scale = 1.8, units = "cm", useDingbats = FALSE)
ggsave(plot = grid_map_supp_fig, filename = "pdf/Fig S3 Map Classes supp.pdf",
width = 20, height = 26, scale = 1.8, units = "cm", useDingbats = FALSE)
9 Maps for % of Chlorophyta and OTU
9.1 Prepare the data
Chlorophyta_surface <- metadata %>% select(OSD_station, Chlorophyta_reads, Chlorophyta_pct_reads,
Chlorophyta_OTUs, Latitude, Longitude) %>% mutate(Latitude = as.numeric(Latitude),
Longitude = as.numeric(Longitude))
9.2 Statistics
Chlorophyta_surface_summary <- Chlorophyta_surface %>% select(Chlorophyta_reads,
Chlorophyta_pct_reads, Chlorophyta_OTUs) %>% summarise_all(funs(min, max,
mean, median, sd), na.rm = TRUE)
knitr::kable(Chlorophyta_surface_summary)
Chlorophyta_reads_min | Chlorophyta_pct_reads_min | Chlorophyta_OTUs_min | Chlorophyta_reads_max | Chlorophyta_pct_reads_max | Chlorophyta_OTUs_max | Chlorophyta_reads_mean | Chlorophyta_pct_reads_mean | Chlorophyta_OTUs_mean | Chlorophyta_reads_median | Chlorophyta_pct_reads_median | Chlorophyta_OTUs_median | Chlorophyta_reads_sd | Chlorophyta_pct_reads_sd | Chlorophyta_OTUs_sd |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
9 | 0.21 | 4 | 18569 | 94 | 98 | 2217 | 29 | 38 | 937 | 19 | 37 | 3172 | 25 | 20 |
9.3 Map for Chlorophyta %
pct_limits = c(0, 100)
pct_breaks = c(10, 25, 50, 75, 100)
theme_chlorophyta_map <- theme(legend.position = c(0.1, 0.2), legend.background = element_rect(fill = "transparent"),
legend.text = element_text(size = 7), legend.title = element_text(size = 9),
plot.tag.position = "topright", plot.tag = element_text(size = 18, face = "bold"),
plot.title = element_text(size = 14))
Chlorophyta_pct_map <- map_world() + # coord_map(xlim = c(-180, 180),ylim = c(-90, 90)) +
# scale_x_continuous(breaks = (-4:4) * 45) + scale_y_continuous(breaks =
# (-2:2) * 30) +
geom_point(data = filter(Chlorophyta_surface, Chlorophyta_pct_reads < 1), aes(x = Longitude,
y = Latitude), color = color_not_present, size = size_cross, shape = 3) +
geom_point(data = filter(Chlorophyta_surface, Chlorophyta_pct_reads >= 1),
aes(x = Longitude, y = Latitude, size = Chlorophyta_pct_reads, color = Chlorophyta_pct_reads,
alpha = Chlorophyta_pct_reads)) + scale_size(name = "%", range = c(0,
5), limits = pct_limits, breaks = pct_breaks) + scale_alpha_continuous(name = "%",
range = c(0.5, 0.9), limits = pct_limits, breaks = pct_breaks) + viridis::scale_color_viridis(option = "magma",
name = "%", limits = pct_limits, breaks = pct_breaks) + guides(colour = guide_legend()) +
labs(title = "Chlorophyta - % of Photosynthetic reads", tag = "A") + theme_chlorophyta_map
print(Chlorophyta_pct_map)
# Zoom on Europe
Chlorophyta_pct_europe <- Chlorophyta_pct_map + # coord_map(xlim = c(-30, 40),ylim = c(30, 70)) +
coord_fixed(1.3, xlim = c(-40, 40), ylim = c(30, 70)) + scale_x_continuous(breaks = (-4:4) *
10) + scale_y_continuous(breaks = (3:7) * 10) + labs(title = "", tag = "B")
print(Chlorophyta_pct_europe)
9.4 Map for OTUs
otu_limits = c(0, 100)
otu_breaks = c(10, 25, 50, 75, 100)
Chlorophyta_otu_map <- map_world() + # coord_map(xlim = c(-180, 180),ylim = c(-90, 90)) +
# scale_x_continuous(breaks = (-4:4) * 45) + scale_y_continuous(breaks =
# (-2:2) * 30) +
geom_point(data = Chlorophyta_surface, aes(x = Longitude, y = Latitude, size = Chlorophyta_OTUs,
color = Chlorophyta_OTUs, alpha = Chlorophyta_OTUs)) + scale_size(name = "# OTUs",
range = c(0, 5), limits = otu_limits, breaks = otu_breaks) + scale_alpha_continuous(name = "# OTUs",
range = c(0.5, 0.9), limits = otu_limits, breaks = otu_breaks) + viridis::scale_color_viridis(option = "magma",
name = "# OTUs", limits = otu_limits, breaks = otu_breaks) + guides(colour = guide_legend()) +
# theme(plot.title = element_text(margin = margin(t = 10, b = 5))) +
labs(title = "Chlorophyta - Number of OTUs", tag = "C") + theme_chlorophyta_map
print(Chlorophyta_otu_map)
# Zoom on Europe
Chlorophyta_otu_europe <- Chlorophyta_otu_map + # coord_map(xlim = c(-30, 40),ylim = c(30, 70)) +
coord_fixed(1.3, xlim = c(-40, 40), ylim = c(30, 70)) + scale_x_continuous(breaks = (-4:4) *
10) + scale_y_continuous(breaks = (3:7) * 10) + labs(title = "", tag = "D")
print(Chlorophyta_otu_europe)
9.5 Arrange the files into the final figures with 4 panels
map_array_chloro <- list(Chlorophyta_pct_map, Chlorophyta_pct_europe, Chlorophyta_otu_map,
Chlorophyta_otu_europe)
grid_map_chloro <- gridExtra::grid.arrange(grobs = map_array_chloro, ncol = 2,
nrow = 2, widths = c(1, 0.95), clip = FALSE, padding = unit(0, "line"))
ggsave(plot = grid_map_chloro, filename = "pdf/Fig 2 Map Chlorophyta.pdf", width = 20,
height = 15, scale = 1.8, units = "cm", useDingbats = FALSE)
10 Heatmaps
10.1 Filter the data to remove the class that have less than 1% contribution
class_summary_abundant <- class_summary %>% filter(mean_pct_seq_class >= 1)
class_abundant <- class_summary_abundant$Class
class_heatmap_data <- class_stations_metadata %>% filter((Class %in% class_abundant) &
(Chlorophyta_reads >= 100)) %>% ungroup() %>% mutate(OSD_station_label = sprintf("%s %03d",
Ocean_code, OSD_station)) %>% select(OSD_station_label, Class, pct_seq) %>%
spread(Class, pct_seq) %>% tibble::column_to_rownames(var = "OSD_station_label") %>%
t()
class_heatmap_data_vertical <- class_stations_metadata %>% filter((Class %in%
class_abundant) & (Chlorophyta_reads >= 100)) %>% ungroup() %>% mutate(OSD_station_label = sprintf("%03d %s",
OSD_station, Ocean_code)) %>% select(OSD_station_label, Class, pct_seq) %>%
spread(Class, pct_seq) %>% tibble::column_to_rownames(var = "OSD_station_label")
10.2 Use ComplexHeatmap
See Web site
library(ComplexHeatmap)
# Palette de couleurs
reds = colorRampPalette(c("grey95", "orange", "red3"))
couleurs = reds(10)
pdf(file = "pdf/Fig 6 heatmap horizontal.pdf", width = 12, height = 6)
Heatmap(as.matrix(class_heatmap_data), col = couleurs, clustering_distance_columns = function(m) vegan::vegdist(m),
cluster_rows = TRUE, cluster_columns = TRUE, show_column_dend = TRUE, show_row_dend = FALSE,
column_dend_height = unit(30, "mm"), column_title = "Station", row_title = "",
column_title_side = "bottom", row_title_side = "left", row_title_gp = gpar(fontsize = 12,
fontface = "plain"), column_title_gp = gpar(fontsize = 12, fontface = "plain"),
column_names_gp = gpar(fontsize = 6, fontface = "plain"), name = "%", heatmap_legend_param = list(title_gp = gpar(fontface = "plain")))
dev.off
function (which = dev.cur())
{
if (which == 1)
stop("cannot shut down device 1 (the null device)")
.External(C_devoff, as.integer(which))
dev.cur()
}
<bytecode: 0x0000000008fc3c80>
<environment: namespace:grDevices>
pdf(file = "pdf/Fig 6 heatmap vertical.pdf", width = 6, height = 12)
Heatmap(as.matrix(class_heatmap_data_vertical), col = couleurs, split = 8, combined_name_fun = NULL,
km_title = "", clustering_distance_rows = function(m) vegan::vegdist(m),
clustering_distance_columns = function(m) vegan::vegdist(m), cluster_rows = TRUE,
cluster_columns = TRUE, show_column_dend = FALSE, show_row_dend = FALSE,
row_dend_width = unit(30, "mm"), column_title = "", row_title = "Station",
column_title_side = "bottom", row_title_side = "right", row_title_gp = gpar(fontsize = 0,
fontface = "plain"), column_title_gp = gpar(fontsize = 15, fontface = "plain"),
row_names_gp = gpar(fontsize = 6, fontface = "plain"), name = "%", heatmap_legend_param = list(title_gp = gpar(fontface = "plain")))
dev.off
function (which = dev.cur())
{
if (which == 1)
stop("cannot shut down device 1 (the null device)")
.External(C_devoff, as.integer(which))
dev.cur()
}
<bytecode: 0x0000000008fc3c80>
<environment: namespace:grDevices>
10.3 Use heatmap.2 (Not used)
# pdf(file='Fig heatmap.pdf', width =15, height = 12)
gplots::heatmap.2(as.matrix(class_heatmap_data), col = couleurs, breaks = c(1,
5, 1:9 * 10), colsep = T, rowsep = T, trace = "none", margins = c(10, 10),
keysize = 0.8, key.title = "% of Chlorophyta", key.xlab = "", density.info = "none",
cexCol = 0.8, cexRow = 1.3, dendrogram = "column", Rowv = TRUE)
# dev.off
gplots::heatmap.2(as.matrix(class_heatmap_data), col = rev(terrain.colors(10)),
breaks = 11, colsep = T, rowsep = T, trace = "none", margins = c(5, 10),
keysize = 1, cexCol = 0.4, cexRow = 0.8, density.info = "none", dendrogram = "column",
Rowv = TRUE)
# colsep, rowsep, sepcolor (optional) vector of integers indicating which
# columns or rows should be separated from the preceding columns or rows by
# a narrow space of color sepcolor
# trace character string indicating whether a solid 'trace' line should be
# drawn across 'row's or down 'column's, 'both' or 'none'. The distance of
# the line from the center of each color-cell is proportional to the size of
# the measurement. Defaults to 'column'.