Data file contains average trait value for each of the 34 isolates used in this work. Columns are described below.
variable | description |
---|---|
num | the isolate ID |
group | the group |
culture ID | the culture the isolate has been sampled from |
day | the day the isolate has been sampled |
mutation_position | the position of the lrp mutation (if any) |
mutation_type | the type of mutation (if any) |
lag | the lag in culture, the number of hours before culture reaches a threshold GFP intensity |
CFU | log number of CFU per mL LB at 91 hours |
FSC | the log FSC, which we used as a proxy for cell size |
freqRed | the frequency of red colonies when plated on NBTA |
motility | the average motility halo size (in mm) |
antibiose | the average antiobiosis halo size in (mm) |
hemolysis | hemolytic activity (yes/no) |
lipolysis | lipolytic activity (yes/no) |
invivoCFU | average CFU per insect |
beta | probability of successful infestation |
f | average number of nematode IJs produced per successful infestation |
nu | IJs mortality rate |
bact_per_IJ | average number of bacteria per IJ |
R0 | transmission |
# setwd("/home/mar/Documents/These/Papier_variants_JBF/complete_R_workflow/")
rm(list=ls())
library(mapplots)
d <- read.table("data_xenorhabdus_F1D3_isolates.csv",header=T,sep=";",dec=".")
str(d)
## 'data.frame': 34 obs. of 20 variables:
## $ num : int 12 20 21 23 26 30 35 38 42 43 ...
## $ group : int 1 1 1 1 1 1 1 1 1 1 ...
## $ culture.ID : int 9 7 7 9 11 12 1 2 3 3 ...
## $ day : int 3 7 7 7 7 7 13 13 13 13 ...
## $ mutation_position: int NA NA NA NA NA NA NA NA NA NA ...
## $ mutation_type : Factor w/ 6 levels "","dupl.","FS",..: 1 1 1 1 1 1 1 1 1 1 ...
## $ lag : num 11.5 11.3 12.3 14.4 11.7 ...
## $ CFU : num 8.5 8.25 8.31 8.06 8.24 ...
## $ FSC : num 4.92 4.91 4.94 4.97 4.92 ...
## $ freqRed : num 0 0 0 0 0 ...
## $ motility : num 25.6 20.6 27.8 29 27.4 25.8 25.4 25 25.4 18.2 ...
## $ antibiosis : num 14.7 15 22.3 14.2 19.2 ...
## $ hemolysis : int 1 1 1 1 1 1 1 1 1 1 ...
## $ lipolysis : int 1 1 1 1 1 1 1 1 1 0 ...
## $ invivoCFU : num 1.56 1.3 3.05 2.97 2.96 ...
## $ beta : num 0.889 0.889 1 0.778 1 ...
## $ f : num 284889 203778 599333 290000 352667 ...
## $ nu : num 0.156 0.131 0.165 0.188 0.144 ...
## $ bact_per_IJ : num 53.5 30.3 57.8 54.9 42.2 ...
## $ R0 : num 1619256 1371677 3642647 1202589 2455701 ...
Log CFU per mL (91 hours culture in LB broth) and cell size clearly define two groups of isolates. The proportion of red colonies is another discriminant phenotype.
plot(motility~FSC,data=d,type="n",ylab="motility (halo diameter in mm)",xlab="cell size (log FSC)",las=1)
d$z <- 0.2+as.numeric(cut(d$antibiosis,10))/5
with(d,{
for(i in order(group,1-freqRed)) {
try(add.pie(c(0.001+freqRed[i],1.001-freqRed[i])*100,
x = FSC[i],
y = motility[i],
edges=200,
radius=0.50*z[i],labels=NA,col=c("indianred1","steelblue1")))
}
}
)
A clustering analyses based on the same three traits confirms the existence of three phenotypic groups: 9, 22, 27 and 48 form a distinctive cluster within the group 1 variants. Variant 47 falls within group 1 cluster but forms red colonies.
m.dist <- dist(scale(d[,c("motility","FSC","freqRed")]))
dendr <- hclust(m.dist,members = d$num)
dendr$labels <- d$num
plot(as.dendrogram(dendr),axes=F,xlab="",ylab="")
Average trait values per group
v <- c("motility","FSC","CFU","antibiosis","hemolysis","lipolysis")
## average values
t1 <- t(apply(d[,v],2,function(x) tapply(x,d$group,mean,na.rm=T)))
## standard errors
t2 <- t(apply(d[,v],2,function(x) tapply(x,d$group,function(z) sd(z,na.rm=T)/sqrt(length(z)))))
m <- matrix(paste(as.character(round(t1,digits=2)),as.character(round(t2,digits=2)),sep="+-"),ncol=3)
rownames(m) <- v
colnames(m) <- paste("group",1:3)
m
## group 1 group 2 group 3
## motility "24.72+-0.71" "8.73+-0.84" "26.72+-1.14"
## FSC "4.94+-0.02" "4.24+-0.05" "4.93+-0.04"
## CFU "8.24+-0.04" "8.98+-0.04" "8.53+-0.04"
## antibiosis "17.89+-1.38" "11.35+-0.62" "7.95+-3.13"
## hemolysis "1+-0" "0+-0" "0.4+-0.24"
## lipolysis "0.93+-0.07" "0.07+-0.07" "1+-0"
Non parametric comparisons among group of variants
with(d,pairwise.wilcox.test(antibiosis,group))
## Warning in wilcox.test.default(xi, xj, paired = paired, ...): cannot
## compute exact p-value with ties
## Warning in wilcox.test.default(xi, xj, paired = paired, ...): cannot
## compute exact p-value with ties
##
## Pairwise comparisons using Wilcoxon rank sum test
##
## data: antibiosis and group
##
## 1 2
## 2 0.0027 -
## 3 0.0292 0.3306
##
## P value adjustment method: holm
with(d,pairwise.prop.test(x=tapply(hemolysis,group,sum),n=tapply(hemolysis,group,length)))
## Warning in prop.test(x[c(i, j)], n[c(i, j)], ...): Chi-squared
## approximation may be incorrect
## Warning in prop.test(x[c(i, j)], n[c(i, j)], ...): Chi-squared
## approximation may be incorrect
##
## Pairwise comparisons using Pairwise comparison of proportions
##
## data: tapply(hemolysis, group, sum) out of tapply(hemolysis, group, length)
##
## 1 2
## 2 1.6e-06 -
## 3 0.023 0.098
##
## P value adjustment method: holm
with(d,pairwise.prop.test(x=tapply(lipolysis,group,sum),n=tapply(lipolysis,group,length)))
## Warning in prop.test(x[c(i, j)], n[c(i, j)], ...): Chi-squared
## approximation may be incorrect
## Warning in prop.test(x[c(i, j)], n[c(i, j)], ...): Chi-squared
## approximation may be incorrect
##
## Pairwise comparisons using Pairwise comparison of proportions
##
## data: tapply(lipolysis, group, sum) out of tapply(lipolysis, group, length)
##
## 1 2
## 2 5.9e-05 -
## 3 1.0000 0.0021
##
## P value adjustment method: holm
Log CFU per mL (91 hours culture in LB broth) and lag (figure presented in supp. mat. 2).
plot(CFU~lag,data=d,type="n",ylab="log CFU per mL",xlab="lag in hours",ylim=c(8,9.4),xlim=c(10,35),las=1)
d$z <- 0.2+as.numeric(cut(d$FSC,10))/5
with(d,{
for(i in order(group,1-freqRed)) {
try(add.pie(c(0.001+freqRed[i],1.001-freqRed[i])*100,
x = lag[i],
y = CFU[i],
edges=200,
radius=0.0250*z[i],labels=NA,col=c("indianred1","steelblue1")))
}
}
)
Data analyzed here come from an experiment where 24 independent cultures of G1#23 have been sampled at days 3,5, 7 and 8 and where isolates have been phenotyped and sequenced.
d <- read.table("data_gasp_emergence.csv",header=T)
rownames(d) <- paste("TR2_",d$ID,sep="")
str(d)
## 'data.frame': 1230 obs. of 22 variables:
## $ ID : Factor w/ 1230 levels "A_J0_1","A_J1_1",..: 1 61 100 135 170 205 239 274 342 374 ...
## $ culture : Factor w/ 24 levels "A","B","C","D",..: 1 2 3 4 5 6 7 8 9 10 ...
## $ day : int 0 0 0 0 0 0 0 0 0 0 ...
## $ colony : int 1 1 1 1 1 1 1 1 1 1 ...
## $ glycerol : int 1 1 1 1 1 1 1 1 1 1 ...
## $ extraction : int 1 1 1 1 1 1 1 1 1 1 ...
## $ PCR : int NA NA NA NA NA NA NA NA NA NA ...
## $ sequence : int 0 0 0 0 0 0 0 0 0 0 ...
## $ color_1 : Factor w/ 17 levels "B","BR","G","GB",..: 1 1 1 1 1 1 1 1 1 1 ...
## $ color_2 : Factor w/ 5 levels "","B","BR","M",..: NA NA NA NA NA NA NA NA NA NA ...
## $ hemolysis_1 : Factor w/ 8 levels ""," ","N","NP",..: 7 7 7 7 7 7 7 7 7 7 ...
## $ hemolysis_2 : Factor w/ 8 levels ""," ","N","NP",..: 7 7 7 7 7 7 7 7 7 7 ...
## $ antibiosis : num NA NA NA NA NA NA NA NA NA NA ...
## $ length : num NA NA NA NA NA NA NA NA NA NA ...
## $ size : num 1.86 1.79 1.8 1.85 1.83 ...
## $ rel.size : num -7.40e-17 2.15e-17 -6.94e-17 1.07e-16 6.53e-17 ...
## $ cv.size : num 0.0774 0.1068 0.0878 0.1005 0.0729 ...
## $ motility : num 1.53 2.37 1.91 1.82 2.17 ...
## $ rel.motility : num -4.16e-01 4.16e-01 1.20e-01 -1.20e-01 -3.90e-17 ...
## $ cv.motility : num 0.924 0.722 0.786 0.874 0.9 ...
## $ first_red : int 8 8 8 NA NA 8 6 5 NA 8 ...
## $ first_non_blue: int 4 6 3 3 4 4 4 5 7 5 ...
Color as been measured twice, with second measurement performed the same day motility, hemolysis, antibiosis and size measurements have been performed. Unless not possible, this second measurement is therefore prefered over the first one. Phenotypic groups are defined as to gathering colonies with somewhat similar colors
d$color <- with(d,ifelse(!is.na(color_2),as.character(color_2),as.character(color_1)))
d$phenotypic_group <- factor(c("B"="B","BG"="B","BR"="m","BRG"="m","BRT"="m","G"="A","R"="R","RB"="m","T"="A","TR"="A")[as.character(d$color)])
for(i in c(1,5,7)) {
plot(motility~length,
ylim=c(0,40),xlim=c(0.75,7.5),
las=1,
cex=ifelse(phenotypic_group=="m",2,3.5)/2,
pch=ifelse(phenotypic_group=="m",-as.hexmode("25D1"),
ifelse(phenotypic_group=="R",-as.hexmode("25CF"),-as.hexmode("25CF"))),
col=ifelse(phenotypic_group=="m","indianred1",
ifelse(phenotypic_group=="R","indianred1","steelblue1")),
main=paste("Day",i),
xlab="cell length (micrometer)",
ylab="mobility (halo size in mm)",
data=subset(d,day==i))
legend("topleft",
pt.cex=c(3.5,3.5,2)/2,
pch=c(-as.hexmode("25CF"),-as.hexmode("25CF"),-as.hexmode("25D1")),
col=c("steelblue1","indianred1","indianred1"),legend=c("blue","red","mixture"),ncol=1)
}
Measurements are first averaged by culture, so that data points are really independent. Red isolates have lower motility and antibiotic activity than blue isolates at day 5, but blue and red isolates have similar sizes.
x <- with(subset(d,phenotypic_group=="R" & day==5),tapply(antibiosis,culture,mean,na.rm=T))
y <- with(subset(d,phenotypic_group=="B" & day==5),tapply(antibiosis,culture,mean,na.rm=T))
wilcox.test(x,y)
##
## Wilcoxon rank sum test
##
## data: x and y
## W = 0, p-value = 1.083e-05
## alternative hypothesis: true location shift is not equal to 0
x <- with(subset(d,phenotypic_group=="R" & day==5),tapply(motility,culture,mean,na.rm=T))
y <- with(subset(d,phenotypic_group=="B" & day==5),tapply(motility,culture,mean,na.rm=T))
wilcox.test(x,y)
##
## Wilcoxon rank sum test
##
## data: x and y
## W = 26, p-value = 0.0001417
## alternative hypothesis: true location shift is not equal to 0
x <- with(subset(d,phenotypic_group=="R" & day==5),tapply(length,culture,mean,na.rm=T))
y <- with(subset(d,phenotypic_group=="B" & day==5),tapply(length,culture,mean,na.rm=T))
wilcox.test(x,y)
##
## Wilcoxon rank sum test
##
## data: x and y
## W = 161, p-value = 0.1273
## alternative hypothesis: true location shift is not equal to 0
At day 7 both motility and size are lower for red isolates than for blue ones.
## antibiosis measurements failed for isolates sampled at day 7 !
x <- with(subset(d,phenotypic_group=="R" & day==7),tapply(motility,culture,mean,na.rm=T))
y <- with(subset(d,phenotypic_group=="B" & day==7),tapply(motility,culture,mean,na.rm=T))
wilcox.test(x,y)
##
## Wilcoxon rank sum test
##
## data: x and y
## W = 0, p-value = 5.655e-12
## alternative hypothesis: true location shift is not equal to 0
x <- with(subset(d,phenotypic_group=="R" & day==7),tapply(length,culture,mean,na.rm=T))
y <- with(subset(d,phenotypic_group=="B" & day==7),tapply(length,culture,mean,na.rm=T))
wilcox.test(x,y)
##
## Wilcoxon rank sum test
##
## data: x and y
## W = 0, p-value = 5.655e-12
## alternative hypothesis: true location shift is not equal to 0
Loading sequencing data, and crossing them with phenotypic measurements.
mutations <- read.table("data_mutations_gasp_emergence.csv",header=T,sep=",")
d$ID.mutation <- NA
listMutations <- strsplit(as.character(mutations$Collection),";")
for(i in 1:length(listMutations)) {
pos <- match(listMutations[[i]],rownames(d))
d$ID.mutation[pos] <- i
}
d$mutation <- !is.na(d$ID.mutation)
d$pos.mutation <- with(d,mutations[ID.mutation,"Start"])
d$type.mutation <- factor(with(d,ifelse(is.na(ID.mutation),ifelse(sequence==1,"No mutation",NA),gsub("Mutation/","",mutations[ID.mutation,"Group"]))))
d$effect.mutation <- factor(with(d,ifelse(is.na(ID.mutation),ifelse(sequence==1,"No mutation",NA),as.character(mutations[ID.mutation,"Name"]))))
d$type.mutation <- relevel(d$type.mutation,"No mutation")
d$effect.mutation <- relevel(d$effect.mutation,"No mutation")
with(d,table(sequence,type.mutation))
## type.mutation
## sequence No mutation ? Duplication InDel Substitution Transposon
## 0 0 0 0 0 0 0
## 1 47 2 2 5 15 6
with(subset(d,sequence==1),table(phenotypic_group,type.mutation))
## type.mutation
## phenotypic_group No mutation ? Duplication InDel Substitution Transposon
## A 0 0 0 0 0 0
## B 33 0 0 1 1 0
## m 7 1 0 2 1 0
## R 5 1 2 2 13 6
plot(motility~size,
cex=ifelse(phenotypic_group=="m",2,3.5)/2,
pch=ifelse(phenotypic_group=="m",-as.hexmode("25D1"),
ifelse(phenotypic_group=="R",-as.hexmode("25CF"),-as.hexmode("25CF"))),
col=ifelse(phenotypic_group=="m","indianred1",
ifelse(phenotypic_group=="R","indianred1","steelblue1")),
main=paste("Day 8"),
xlab="cell size",
ylab="mobility (halo size in mm)",
data=subset(d,day==5 & sequence==1))
points(motility~size,pch="x",
data=subset(d,day==5 & sequence==1 & mutation))
plot(motility~size,
cex=ifelse(phenotypic_group=="m",2,3.5)/2,
pch=ifelse(phenotypic_group=="m",-as.hexmode("25D1"),
ifelse(phenotypic_group=="R",-as.hexmode("25CF"),-as.hexmode("25CF"))),
col=ifelse(phenotypic_group=="m","indianred1",
ifelse(phenotypic_group=="R","indianred1","steelblue1")),
main=paste("Day 8"),
xlab="cell size",
ylab="mobility (halo size in mm)",
data=subset(d,day==8 & sequence==1))
points(motility~size,pch="x",
data=subset(d,day==8 & sequence==1 & mutation))
Testing the difference between the red isolates that carry a lrp mutation and those that do not have any mutation. First comparison, at day 5.
rm(x,y)
x <- with(subset(d,day==5 & sequence==1 & phenotypic_group %in% c("R","m") & mutation),
tapply(antibiosis,culture,mean,na.rm=T))
y <- with(subset(d,day==5 & sequence==1 & phenotypic_group %in% c("R","m") & !mutation),
tapply(antibiosis,culture,mean,na.rm=T))
wilcox.test(x,y)
##
## Wilcoxon rank sum test
##
## data: x and y
## W = 6, p-value = 0.2571
## alternative hypothesis: true location shift is not equal to 0
rm(x,y)
x <- with(subset(d,day==5 & sequence==1 & phenotypic_group %in% c("R","m") & mutation),
tapply(size,culture,mean,na.rm=T))
y <- with(subset(d,day==5 & sequence==1 & phenotypic_group %in% c("R","m") & !mutation),
tapply(size,culture,mean,na.rm=T))
wilcox.test(x,y)
##
## Wilcoxon rank sum test
##
## data: x and y
## W = 11, p-value = 0.1079
## alternative hypothesis: true location shift is not equal to 0
rm(x,y)
x <- with(subset(d,day==5 & sequence==1 & phenotypic_group %in% c("R","m") & mutation),
tapply(motility,culture,mean,na.rm=T))
y <- with(subset(d,day==5 & sequence==1 & phenotypic_group %in% c("R","m") & !mutation),
tapply(motility,culture,mean,na.rm=T))
wilcox.test(x,y)
##
## Wilcoxon rank sum test
##
## data: x and y
## W = 2, p-value = 0.002664
## alternative hypothesis: true location shift is not equal to 0
Second comparison, at day 8.
rm(x,y)
x <- with(subset(d,day==8 & sequence==1 & phenotypic_group %in% c("R","m") & mutation),
tapply(size,culture,mean,na.rm=T))
y <- with(subset(d,day==8 & sequence==1 & phenotypic_group %in% c("R","m") & !mutation),
tapply(size,culture,mean,na.rm=T))
wilcox.test(x,y)
##
## Wilcoxon rank sum test
##
## data: x and y
## W = 2, p-value = 0.001651
## alternative hypothesis: true location shift is not equal to 0
rm(x,y)
x <- with(subset(d,day==8 & sequence==1 & phenotypic_group %in% c("R","m") & mutation),
tapply(motility,culture,mean,na.rm=T))
y <- with(subset(d,day==8 & sequence==1 & phenotypic_group %in% c("R","m") & !mutation),
tapply(motility,culture,mean,na.rm=T))
wilcox.test(x,y)
##
## Wilcoxon rank sum test
##
## data: x and y
## W = 2, p-value = 0.001651
## alternative hypothesis: true location shift is not equal to 0
rm(list=ls())
d <- read.table("data_gasp_reversion.csv",header=T)
str(d)
## 'data.frame': 384 obs. of 17 variables:
## $ ID : Factor w/ 384 levels "J5_A2_1","J5_A2_2",..: 1 2 3 4 5 6 7 8 9 10 ...
## $ culture : Factor w/ 96 levels "25_cult1","25_cult10",..: 89 89 89 89 93 93 93 93 93 93 ...
## $ day : int 5 5 5 5 5 5 5 5 5 5 ...
## $ colony : int 1 2 3 4 1 2 3 4 5 6 ...
## $ Clone : Factor w/ 8 levels "25","A_J8_1",..: 8 8 8 8 8 8 8 8 8 8 ...
## $ well : Factor w/ 96 levels "A1","A10","A11",..: 5 5 5 5 9 9 9 9 9 9 ...
## $ row : Factor w/ 8 levels "A","B","C","D",..: 1 1 1 1 1 1 1 1 1 1 ...
## $ dil_sampling: int 5 5 5 5 5 5 5 5 5 5 ...
## $ dish : int NA NA NA NA NA NA NA NA NA NA ...
## $ color_1 : Factor w/ 5 levels "B","M","R","RV",..: 3 3 3 3 1 1 1 1 3 3 ...
## $ hemolysis_1 : Factor w/ 6 levels "N","P","P?","PF",..: 1 1 1 1 1 1 1 1 1 1 ...
## $ hemolysis_2 : Factor w/ 5 levels "N","P","P?","PF",..: 1 1 1 1 1 1 1 1 1 1 ...
## $ hemolysis_3 : Factor w/ 5 levels "N","P","P?","PF",..: 1 1 1 1 1 1 1 1 1 1 ...
## $ size : num 1.78 1.8 2.02 2.06 1.85 ...
## $ sdSize : num 0.73 0.513 0.473 0.593 0.746 ...
## $ motility : num 18.65 18.39 11.26 9.63 24.36 ...
## $ antibiosis : num 32 26.9 27.2 36.6 38.3 ...
plot(motility~size,data=d,type="n",xlab="cell size",ylab="motility",xlim=c(1,7),las=1,cex.axis=0.75)
points(motility~size,data=subset(d,color_1=="R"),pch=21,col="indianred1",bg="indianred1")
points(motility~size,data=subset(d,color_1=="B"),pch=21,col="lightskyblue",bg="lightskyblue")
points(motility~size,data=subset(d,!color_1 %in% c("B","R")),pch="+",col="gray")
legend("bottomright",legend=c("blue","red","other"),pch=c(21,21,3),col=c("lightskyblue","indianred1","gray"),pt.bg=c("lightskyblue","indianred1","gray"),cex=0.75)
mutation <- matrix(c("X_J5_3","HTH","InDel","FS",
"B_J8_1","HTH","SNP","S",
"A_J8_1","HTH","SNP","*",
"S_J5_1","AsnC","Dupl","FS",
"25","AsnC","SNP","S",
"G_J8_4","AsnC","SNP","*",
"T_J8_1","IS","Tn1","Tn1",
"Q_J8_1","IS","Tn2","Tn2"),ncol=4,byrow=T)
y <- with(subset(d,!is.na(hemolysis_1) & !is.na(color_1)),tapply(color_1=="B",culture,any))
x <- gsub("^(.*)_.*","\\1",names(y))
x <- paste(mutation[match(x,mutation[,1]),2],mutation[match(x,mutation[,1]),4],sep=" ")
z <- tapply(y,x,sum,na.rm=T)
gr <- barplot(as.numeric(z),names.arg=names(z),horiz=T,axes=F,density=10,las=1)
axis(1,0:5)
rm(list=ls())
d <- read.table("data_cell_survival_stationary.csv",header=T)
str(d)
## 'data.frame': 108 obs. of 11 variables:
## $ LOAD : num 4.6 5.35 4.8 5.1 5.08 ...
## $ stderr_LOAD: num 0.1475 0.0432 0.1067 0.0782 0.0453 ...
## $ time : int 18 18 18 18 18 18 18 18 18 18 ...
## $ isolate : int 23 23 23 23 23 23 23 23 23 23 ...
## $ culture : int 1 10 11 12 2 3 4 5 6 7 ...
## $ thoma : int 13 24 12 25 35 34 27 16 21 14 ...
## $ dil_thoma : int 100 100 100 100 100 100 100 100 100 100 ...
## $ fluo : int 15240 18967 15612 20258 20046 18844 18119 16972 17519 16571 ...
## $ DO : num 0.496 0.702 0.498 0.727 0.727 0.655 0.623 0.579 0.624 0.565 ...
## $ LOAD_thoma : num 5.51 5.78 5.48 5.8 5.94 ...
## $ pc_alive : num 0.123 0.375 0.209 0.201 0.136 ...
Transform raw data so that differential in survival is computed between exponential phase and early or late stationary phase. Computation are done for each replicate culture.
ll <- split(d,with(d,paste(isolate,culture)))
res <- t(sapply(ll,function(dtmp) {
pos <- order(dtmp$time)
v <- dtmp[pos,"LOAD"] - dtmp[pos,"LOAD_thoma"]
v <- c(1,v[2]-v[1])
return(v)
}))
res <- as.data.frame(res)
res <- rbind(res,
t(sapply(ll,function(dtmp) {
pos <- order(dtmp$time)
v <- dtmp[pos,"LOAD"] - dtmp[pos,"LOAD_thoma"]
v <- c(2,v[3]-v[1])
return(v)
}))
)
colnames(res) <- c("t","diff")
res$isolate <- sapply(strsplit(rownames(res)," "),function(x) x[1])
res$t_isolate <-with(res,paste(t,isolate))
res$isolate <- factor(res$isolate)
res$t_isolate <- factor(res$t_isolate)
str(res)
## 'data.frame': 72 obs. of 4 variables:
## $ t : num 1 1 1 1 1 1 1 1 1 1 ...
## $ diff : num 0.0137 -0.2264 -0.1503 -0.044 0.0263 ...
## $ isolate : Factor w/ 3 levels "23","25","9": 1 1 1 1 1 1 1 1 1 1 ...
## $ t_isolate: Factor w/ 6 levels "1 23","1 25",..: 1 1 1 1 1 1 1 1 1 1 ...
library(beanplot)
beanplot(diff~t_isolate,
las=1,
names=NA,
what=c(F,T,F,F),
col=list("steelblue1","indianred1","chartreuse1"),
main="",xlab="",ylab="log survical differential",
data=res)
## new function
axis(2,pretty(res$diff),las=1)
abline(h=0,lty=2)
abline(v=3.5)
points(diff~jitter(as.numeric(t_isolate),0.5),
pch=21,
cex=0.75,
bg="gray",
data=res)
box()
mtext(side=3,at=4,line=-2,"***",cex=2)
mtext(side=3,at=c(2,5),paste(c("early","late"),"stationary"),line=0.5,cex=1.5)
mtext(side=1,at=1:6,line=1,c("G1#23","G2#25","G3#9"),las=3)
Comparison of survival differential to zero: the only significant deviation from zero occurs for isolate G1#23, which belongs to group 1.
with(res,tapply(diff,t_isolate,wilcox.test))
## $`1 23`
##
## Wilcoxon signed rank test
##
## data: X[[i]]
## V = 28, p-value = 0.4238
## alternative hypothesis: true location is not equal to 0
##
##
## $`1 25`
##
## Wilcoxon signed rank test
##
## data: X[[i]]
## V = 62, p-value = 0.07715
## alternative hypothesis: true location is not equal to 0
##
##
## $`1 9`
##
## Wilcoxon signed rank test
##
## data: X[[i]]
## V = 39, p-value = 1
## alternative hypothesis: true location is not equal to 0
##
##
## $`2 23`
##
## Wilcoxon signed rank test
##
## data: X[[i]]
## V = 0, p-value = 0.0004883
## alternative hypothesis: true location is not equal to 0
##
##
## $`2 25`
##
## Wilcoxon signed rank test
##
## data: X[[i]]
## V = 61, p-value = 0.09229
## alternative hypothesis: true location is not equal to 0
##
##
## $`2 9`
##
## Wilcoxon signed rank test
##
## data: X[[i]]
## V = 50, p-value = 0.4238
## alternative hypothesis: true location is not equal to 0
A more refined analysis, using mixed models, yields the exact same results! Here we analyze raw cell Thoma chamber counts using density estimates obtained from plating as an offset. A positive deviation from this offset will therefore corresponds to cell mortality, as it will mean that more cells have been counted than expected from the number of CFU. The model will predict this deviation for the three isolates we have studied and for the three time periods of the experiment.
library(spaMM)
## spaMM (version 2.6.1) is loaded.
## Type 'help(spaMM)' for a short introduction,
## and news(package='spaMM') for news.
d$isolate <- factor(d$isolate)
d$time <- factor(d$time)
d$ID_culture <- with(d,factor(paste(isolate,culture,sep="_")))
m.full <- HLfit(thoma~isolate*time+(1|ID_culture)
+offset(log(10^LOAD)-log(dil_thoma*250)),HLmethod="ML",
family=poisson,
rand.family=Gamma(log),
data=subset(d,pc_alive<5))
m1 <- update(m.full,.~.-isolate:time)
summary(m.full)
## formula: thoma ~ isolate * time + (1 | ID_culture) + offset(log(10^LOAD) -
## log(dil_thoma * 250))
## Estimation of lambda by Laplace ML approximation (p_v).
## Estimation of fixed effects by Laplace ML approximation (p_v).
## Family: poisson ( link = log )
## ------------ Fixed effects (beta) ------------
## Estimate Cond. SE t-value
## (Intercept) 0.30663 0.11709 2.6189
## isolate23 1.44015 0.15570 9.2497
## isolate25 -0.15414 0.16561 -0.9307
## time36 0.37260 0.08816 4.2266
## time64 0.06081 0.09061 0.6711
## isolate23:time36 -0.27484 0.11057 -2.4857
## isolate25:time36 -0.66683 0.12545 -5.3153
## isolate23:time64 2.32352 0.11570 20.0815
## isolate25:time64 -0.56732 0.12738 -4.4538
## --------------- Random effects ---------------
## Family: Gamma ( link = log )
## --- Variance parameters ('lambda'):
## lambda = var(u) for u ~ Gamma(sh=1/lambda, sc=1/lambda);
## ID_culture : 0.08223
## --- Coefficients for log(lambda):
## Group Term Estimate Cond.SE
## ID_culture (Intercept) -2.498 0.2423
## # of obs: 107; # of groups: ID_culture, 36
## ------------- Likelihood values -------------
## logLik
## p_v(h) (marginal L): -554.8729
anova(m.full,m1)
## chi2_LR df p_value
## p_v 1634.337 4 0
The interaction between isolate and time is therefore highly significant, with isolate G1#23 having the highest mortality rate after 64 hours.
rm(list=ls())
d.ci <- read.table("data_CI.csv",header=TRUE)
d.ci$F1 <-as.factor(d.ci$F1)
d.ci$F1D3 <-factor(d.ci$F1D3,levels = c("23","25","9"))
d.ci$F1_F1D3 <-with(d.ci,factor(paste(F1,F1D3)))
d.ci$F1D3_F1 <-with(d.ci,factor(paste(F1D3,F1)))
# load of GFP / non-GFP bacteria
d.ci$J0_GFP <- with(d.ci,log10(10^J0_XGFPROUGE+10^J0_XGFPBLEU))
d.ci$J0_NONGFP <- with(d.ci,log10(10^J0_XROUGE+10^J0_XBLEU))
d.ci$J3_GFP <- with(d.ci,log10(10^J3_XGFPROUGE+10^J3_XGFPBLEU))
d.ci$J3_NONGFP <- with(d.ci,log10(10^J3_XROUGE+10^J3_XBLEU))
d.ci$J5_GFP <- with(d.ci,log10(10^J5_XGFPROUGE+10^J5_XGFPBLEU))
d.ci$J5_NONGFP <- with(d.ci,log10(10^J5_XROUGE+10^J5_XBLEU))
# CI estimation
d.ci$r5_GFP <-with(d.ci,log10(10^J5_GFP/10^J0_GFP)) #growth rate for GFP
d.ci$r5_NONGFP <-with(d.ci,log10(10^J5_NONGFP/10^J0_NONGFP)) #growth rate for non-GFP
d.ci$CI5 <-with(d.ci,log10(10^r5_GFP/10^r5_NONGFP)) #CI
d.ci$se_J5 <- with(d.ci,ifelse(!is.na(J5_XBLEU_se),J5_XBLEU_se^2,0))
d.ci$se_J5 <- d.ci$se_J5 + with(d.ci,ifelse(!is.na(J5_XROUGE_se),J5_XROUGE_se^2,0))
d.ci$se_J5 <- d.ci$se_J5 + with(d.ci,ifelse(!is.na(J5_XGFPBLEU_se),J5_XGFPBLEU_se^2,0))
d.ci$se_J5 <- d.ci$se_J5 + with(d.ci,ifelse(!is.na(J5_XGFPROUGE_se),J5_XGFPROUGE_se^2,0))
d.ci$se_J5 <- sqrt(d.ci$se_J5) # CI standard error
Graphics and non-parametric comparison:
library(beanplot)
beanplot(CI5~F1D3,
what=c(F,T,F,F),
col=list("23"="steelblue1","25"="indianred1","9"="chartreuse1"),
names=paste("G",1:3,"#",levels(d.ci$F1D3),sep=""),
ylab="log CI",
data=d.ci)
## new function
abline(h=0,lty=2)
points(CI5~jitter(as.numeric(F1D3),0.5),
cex=0.75,pch=21,bg="gray",
data=d.ci)
Compare log CI to zero for each isolate: log CI is (weakly) significantly negative for G1#23, and strongly significantly positive for G2#25 and G3#9. G2#25 and G3#9 can therefore increase in frequency when inoculated in a V1 culture.
with(d.ci,tapply(CI5,F1D3,wilcox.test))
## $`23`
##
## Wilcoxon signed rank test
##
## data: X[[i]]
## V = 1, p-value = 0.01563
## alternative hypothesis: true location is not equal to 0
##
##
## $`25`
##
## Wilcoxon signed rank test
##
## data: X[[i]]
## V = 78, p-value = 0.0004883
## alternative hypothesis: true location is not equal to 0
##
##
## $`9`
##
## Wilcoxon signed rank test
##
## data: X[[i]]
## V = 78, p-value = 0.0004883
## alternative hypothesis: true location is not equal to 0
Compare log CI among isolates : G2#25 has a significantly higher log CI than the two other isolates.
with(d.ci,pairwise.wilcox.test(CI5,F1D3))
##
## Pairwise comparisons using Wilcoxon rank sum test
##
## data: CI5 and F1D3
##
## 23 25
## 25 4.8e-05 -
## 9 4.8e-05 1e-04
##
## P value adjustment method: holm
rm(list=ls())
d <- read.table(file = "data_emergence_insect.csv",header=TRUE)
beanplot(I(CFU_ROUGE/CFU)~factor(day),
las=1,
what=c(F,T,F,F),
col="steelblue1",
log="",
data=subset(d,day>=1 & day<=10),
xlab="days post injection",ylab="proportion of red colonies")
## new function
points(I(CFU_ROUGE/CFU)~jitter(as.numeric(factor(day))),
pch=21,bg="indianred1",cex=0.5,
data=subset(d,day>=1 & day<=10))
A glm is ajusted on colony count data. For each sample, the most informative dilution (i.e. the dilution for which the highest number of red colonies has been counted) is analyzed. A quasibinomial model is used here to correct for sur-dispersion. The prediction of the model are then represented on a graph together which raw data.
d$best_dilution <- 3+unlist(apply(d[,paste("NB_ROUGE",4:6,sep="_")],1,function(x) if(!any(!is.na(x))) return(NA) else return(which.max(x))))
d$NB_RED <- apply(d[,c("best_dilution","NB_ROUGE_4","NB_ROUGE_5","NB_ROUGE_6")],1,function(x) ifelse(is.na(x[1]),0,x[x[1]-2]))
d$NB_TOTAL <- apply(d[,c("best_dilution","NB_TOTAL_4","NB_TOTAL_5","NB_TOTAL_6")],1,function(x) ifelse(is.na(x[1]),x[2],x[x[1]-2]))
d$DIL <- ifelse(is.na(d$best_dilution),4,d$best_dilution)
m <- glm(cbind(NB_RED,NB_TOTAL-NB_RED)~day,data=subset(d,day>=1 & day<=10),family="quasibinomial")
plot(I(CFU_ROUGE/CFU)~jitter(day),
pch=21,bg="indianred1",cex=0.5,
xlab="day post injection",
ylab="proportion of red CFU",
data=subset(d,day>=1 & day<=10))
t <- seq(1,10,length=100)
pr <- predict(m,type = "response",newdata = data.frame(day=t))
lines(t,pr,col="red")
anova(m,test="F")
## Analysis of Deviance Table
##
## Model: quasibinomial, link: logit
##
## Response: cbind(NB_RED, NB_TOTAL - NB_RED)
##
## Terms added sequentially (first to last)
##
##
## Df Deviance Resid. Df Resid. Dev F Pr(>F)
## NULL 98 5070.7
## day 1 3315.1 97 1755.6 149.19 < 2.2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
rm(list=ls())
d <- read.table(file = "data_emergence_insect_motility.csv",header=TRUE)
Compute difference in median motility between red and blue clones.
ld <- split(d,with(d,paste(insect,well))) ## split data by insect and well
d2 <- sapply(ld,function(dtmp) {
pos <- -(1:nrow(dtmp))
## motility by color
z1 <- with(subset(dtmp[-pos,],color_NBTA=="R"), motility)
z2 <- with(subset(dtmp[-pos,],color_NBTA=="B"), motility)
## difference in median motility
m <- median(z1,na.rm=T)-median(z2,na.rm=T) ## difference between red and blue
res <- c(unique(dtmp$day),length(z1),m)
return(res)
})
d2 <- as.data.frame(t(d2))
colnames(d2) <- c("day","n","median")
rm(ld)
library(beanplot)
beanplot(median~factor(day),
las=1,
what=c(F,T,T,F),
col="steelblue1",
log="",
beanlines="median",
data=subset(d2,day>0 & day<=10),xlab="days post injection",ylab="difference in motility")
## new function
abline(h=0,lty=2)
points(median~jitter(as.numeric(factor(day))),
pch=21,
bg="indianred1",
cex=0.5,
data=subset(d2,day>0 & day<=10),xlab="",ylab="")
Comparison of delta in motility to zero : red clones become less motile than blue clones at day 3.
with(subset(d2,day>0),tapply(median,day,wilcox.test))
## $`1`
##
## Wilcoxon signed rank test
##
## data: X[[i]]
## V = 14, p-value = 0.5625
## alternative hypothesis: true location is not equal to 0
##
##
## $`2`
##
## Wilcoxon signed rank test
##
## data: X[[i]]
## V = 13, p-value = 0.5469
## alternative hypothesis: true location is not equal to 0
##
##
## $`3`
##
## Wilcoxon signed rank test
##
## data: X[[i]]
## V = 7, p-value = 0.009277
## alternative hypothesis: true location is not equal to 0
##
##
## $`6`
##
## Wilcoxon signed rank test
##
## data: X[[i]]
## V = 29, p-value = 0.003153
## alternative hypothesis: true location is not equal to 0
##
##
## $`10`
##
## Wilcoxon signed rank test
##
## data: X[[i]]
## V = 35, p-value = 0.791
## alternative hypothesis: true location is not equal to 0
In vivo and in vitro growth are positively correlated (figure 4C).
rm(list=ls())
d <- read.table("data_xenorhabdus_F1D3_isolates.csv", h=T, sep=";")
plot(invivoCFU~CFU,
data=d,
pch=21,cex=0.5+as.numeric(cut(FSC,4))/2,
bg=c("steelblue1","indianred1","chartreuse1")[group],
ylab="log CFU per insect",xlab="log CFU per mL")
with(d,cor.test(invivoCFU,CFU,method="kendall"))
##
## Kendall's rank correlation tau
##
## data: invivoCFU and CFU
## T = 393, p-value = 0.0006766
## alternative hypothesis: true tau is not equal to 0
## sample estimates:
## tau
## 0.4010695
rm(list = ls())
d <- read.table("data_virulence.csv", h=T)
library(survival)
library(coxme)
## Loading required package: bdsmatrix
##
## Attaching package: 'bdsmatrix'
## The following object is masked from 'package:base':
##
## backsolve
plot(LT~log_CFU_inoculum,
pch=21,
las=1,
xlab="log number of injected cells",
ylab="hours to death",
data=subset(d, group!="LB"),
col=c("steelblue1","indianred1","chartreuse1")[group],
bg=c("steelblue1","indianred1","chartreuse1")[group])
legend("topright", c("group 1", "group 2", "group 3"),
col=c("steelblue1","indianred1","chartreuse1"),
pt.bg = c("steelblue1","indianred1","chartreuse1"),
pch = c(rep(21,2)),
cex=1.2)
nd <- data.frame(log_CFU_inoculum=rep(seq(3,8,length.out = 60),3), group=c(rep(1,60), rep(2,60), rep(3,60)))
p1 <- predict(survreg(Surv(LT,dead)~factor(group)*log_CFU_inoculum,
data=subset(d, group!="LB"),
dist="lognormal",
control=list(maxiter = 100)), newdata=nd, se.fit=T)
lines(p1$fit[1:60]~seq(3,8,length.out = 60),col ="blue4", lwd=2)
lines(p1$fit[61:120]~seq(3,8,length.out = 60),col ="firebrick", lwd=2)
lines(p1$fit[121:180]~seq(3,8,length.out = 60),col ="olivedrab3", lwd=2)
m <- coxme(Surv(LT,dead)~factor(group)*log_CFU_inoculum+(1|isolate)+(1|experiment),
data=subset(d, isolate!= "LB"))
m1 <- coxme(Surv(LT,dead)~factor(group)+log_CFU_inoculum+(1|isolate)+(1|experiment),
data=subset(d, isolate!= "LB"))
m2 <- coxme(Surv(LT,dead)~factor(group)+(1|isolate)+(1|experiment),
data=subset(d, isolate!= "LB"))
m3 <- coxme(Surv(LT,dead)~log_CFU_inoculum+(1|isolate)+(1|experiment),
data=subset(d, isolate!= "LB"))
ma <- coxme(Surv(LT,dead)~factor(group)*log_CFU_inoculum+(1|isolate),
data=subset(d, isolate!= "LB"))
mb <- coxme(Surv(LT,dead)~factor(group)*log_CFU_inoculum+(1|experiment),
data=subset(d, isolate!= "LB"))
anova(m,m1) # test of the interaction effect
## Analysis of Deviance Table
## Cox model: response is Surv(LT, dead)
## Model 1: ~factor(group) * log_CFU_inoculum + (1 | isolate) + (1 | experiment)
## Model 2: ~factor(group) + log_CFU_inoculum + (1 | isolate) + (1 | experiment)
## loglik Chisq Df P(>|Chi|)
## 1 -689.19
## 2 -691.27 4.1485 2 0.1256
anova(m1,m2) # test of the log_CFU_inoculum effect
## Analysis of Deviance Table
## Cox model: response is Surv(LT, dead)
## Model 1: ~factor(group) + log_CFU_inoculum + (1 | isolate) + (1 | experiment)
## Model 2: ~factor(group) + (1 | isolate) + (1 | experiment)
## loglik Chisq Df P(>|Chi|)
## 1 -691.27
## 2 -738.73 94.923 1 < 2.2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
anova(m1,m3) # test of the group effect
## Analysis of Deviance Table
## Cox model: response is Surv(LT, dead)
## Model 1: ~factor(group) + log_CFU_inoculum + (1 | isolate) + (1 | experiment)
## Model 2: ~log_CFU_inoculum + (1 | isolate) + (1 | experiment)
## loglik Chisq Df P(>|Chi|)
## 1 -691.27
## 2 -700.23 17.926 2 0.000128 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
anova(m, ma) # test of the random effect experiment
## Analysis of Deviance Table
## Cox model: response is Surv(LT, dead)
## Model 1: ~factor(group) * log_CFU_inoculum + (1 | isolate) + (1 | experiment)
## Model 2: ~factor(group) * log_CFU_inoculum + (1 | isolate)
## loglik Chisq Df P(>|Chi|)
## 1 -689.19
## 2 -692.28 6.1802 1 0.01292 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
anova(m, mb) # test of the random effect isolate
## Analysis of Deviance Table
## Cox model: response is Surv(LT, dead)
## Model 1: ~factor(group) * log_CFU_inoculum + (1 | isolate) + (1 | experiment)
## Model 2: ~factor(group) * log_CFU_inoculum + (1 | experiment)
## loglik Chisq Df P(>|Chi|)
## 1 -689.19
## 2 -689.31 0.2482 1 0.6183
# Verif hypothèses test de cox :
cox.zph(coxph(Surv(LT,dead)~factor(group)*log_CFU_inoculum,
data=subset(d, isolate!= "LB")))
## rho chisq p
## factor(group)2 0.0848 1.243 0.265
## factor(group)3 -0.0795 1.116 0.291
## log_CFU_inoculum -0.0432 0.325 0.569
## factor(group)2:log_CFU_inoculum -0.0562 0.556 0.456
## factor(group)3:log_CFU_inoculum 0.0712 0.894 0.344
## GLOBAL NA 8.244 0.143
# Ho : les coef sont stables au cours du temps (hypothèse des risques proportionnels)
# Ici OK pour tous les coef
# Mais utilisé sur un coxph, dans quelle mesure est-ce bon pour coxme ?
rm(list=ls())
d <- read.table("data_xenorhabdus_F1D3_isolates.csv", h=T, sep=";")
plot(log(1+R0)~invivoCFU,
data=d,
pch=21,cex=0.5+as.numeric(cut(CFU,4))/2,
bg=c("steelblue1","indianred1","chartreuse1")[group],
col=c("steelblue1","indianred1","chartreuse1")[group],
xlab="log CFU per insect",ylab="log(1+R0)")
with(d,cor.test(invivoCFU,R0,method="kendall"))
##
## Kendall's rank correlation tau
##
## data: invivoCFU and R0
## T = 158, p-value = 0.0001944
## alternative hypothesis: true tau is not equal to 0
## sample estimates:
## tau
## -0.4367201
Number of bacteria carried per IJ marginaly, and positively, correlates with CFU (figure 5C)
plot(bact_per_IJ~invivoCFU,
data=d,
pch=21,cex=0.5+as.numeric(cut(CFU,4))/2,
bg=c("steelblue1","indianred1","chartreuse1")[group],
col=c("steelblue1","indianred1","chartreuse1")[group],
xlab="log CFU per insect",ylab="bacteria per IJ")
with(d,cor.test(invivoCFU,bact_per_IJ,method="kendall"))
##
## Kendall's rank correlation tau
##
## data: invivoCFU and bact_per_IJ
## T = 353, p-value = 0.03203
## alternative hypothesis: true tau is not equal to 0
## sample estimates:
## tau
## 0.258467
NB: for a detailed analysis of variant transmission, see supplemental file S5