library(ggplot2)
library(plyr)
grenoble <- read.csv('./grenoble.csv', header=FALSE,sep = ";", stringsAsFactors=FALSE)
luxembourg <- read.csv('./luxembourg.csv', header=FALSE,sep = ";", stringsAsFactors=FALSE)
nancy <- read.csv('./nancy.csv', header=FALSE,sep = ";", stringsAsFactors=FALSE)
all <- rbind(grenoble,nancy)
all <- rbind(all,luxembourg)
Chargement des données de l’infrasturcture Grid5000
infra <- read.csv('infra.csv', header=FALSE,sep = ";", stringsAsFactors=FALSE)
names(infra) <- c("Hostname","Model","Size")
Fonction pour ajouter les noms de colonnes, enlever les erreurs, merger avec les données “infra”, formater les unités et ne conserver que les écritures directes sur disque (direct_io=1)
clean_up <- function (df, infra){
names(df) <- c("Hostname","Date","DirectIO","IOengine","IOscheduler","Error","Operation","Jobs","BufferSize","FileSize","Runtime","Bandwidth","BandwidthMin","BandwidthMax","Latency", "LatencyMin", "LatencyMax","IOPS")
df=subset(df,Error=="0")
df=subset(df,DirectIO=="1")
df <- merge(df,infra,by="Hostname")
df$Hostname = sapply(strsplit(df$Hostname, "[.]"), "[", 1)
df$HostModel = paste(df$Hostname, df$Model, sep=" - ")
df$Duration = df$Runtime/1000 # fio outputs runtime in msec, we want to display seconds
df$Size = df$FileSize/1024/1024
df$Bwi=df$Duration/df$Size
return(df)
}
all = clean_up(all, infra)
sata = subset(all,Model=="SATA" & Jobs=="1")
sata2 = subset(all,Model=="SATA II" & Jobs=="1")
sas = subset(all,Model=="SAS" & Jobs=="1")
griffon = subset(sata2,grepl("^griffon", Hostname))
edel = subset(sata,grepl("^edel", Hostname))
granduc = subset(sas,grepl("^granduc", Hostname))
plot_duration <- function (df, cluster){
ggplot(df,aes(x=Size,y=Duration,color=Operation)) + theme_bw() +
geom_point(alpha=1/10 ) +
facet_wrap( ~ IOscheduler) +
ylab("Runtime (sec.)") +
xlab("FileSize (Mo)")+
ggtitle(paste("Runtime vs File size for ",cluster));
}
plot_duration(griffon,"GRIFFON (SATA II)")
plot_duration(edel,"EDEL (SATA)")
plot_duration(granduc,"GRANDUC (SAS)")
plot_latency <- function (df, cluster){
ggplot(df,aes(x=Size,y=Latency,color=Operation)) + theme_bw() +
geom_point(alpha=1/10 ) +
facet_wrap( ~ IOscheduler) +
ylab("Latency (µsec.)") +
xlab("File size (Mo)")+
ggtitle(paste("Latency vs File size for ",cluster));
}
plot_latency(griffon,"GRIFFON (SATA II)")+ ylim(0,1000)
## Warning: Removed 250 rows containing missing values (geom_point).
## Warning: Removed 218 rows containing missing values (geom_point).
plot_latency(edel,"EDEL (SATA)")
plot_latency(granduc,"GRANDUC (SAS)")
plot_bandwidth <- function (df, cluster){
ggplot(data=df,aes(x=Size,y=Bwi, color=Operation)) +
geom_point(alpha=.5) +
facet_wrap( ~ IOscheduler) +
theme_bw() + geom_smooth(method="lm") +
ggtitle(paste("Bandwidth (Duration/Size) for ",cluster)) +
ylim(0,0.1);
}
plot_bandwidth(griffon,"GRIFFON (SATA II)")
## Warning: Removed 4 rows containing missing values (stat_smooth).
## Warning: Removed 2 rows containing missing values (stat_smooth).
## Warning: Removed 1 rows containing missing values (stat_smooth).
## Warning: Removed 4 rows containing missing values (geom_point).
## Warning: Removed 3 rows containing missing values (geom_point).
plot_bandwidth(edel,"EDEL (SATA)")
plot_bandwidth(granduc,"GRANDUC (SAS)")
## Warning: Removed 879 rows containing missing values (stat_smooth).
## Warning: Removed 873 rows containing missing values (stat_smooth).
## Warning: Removed 879 rows containing missing values (geom_point).
## Warning: Removed 873 rows containing missing values (geom_point).
plot_bandwidth_linearity <- function (df, cluster){
ggplot(df,aes(x=Size,y=Bwi,color=Operation)) +
theme_bw() + geom_point(alpha=.2) +
geom_smooth(method=lm,se=FALSE,fullrange=T) +
facet_wrap( ~ IOscheduler) +
ylab("Bandwidth") + xlab("File Size (MiB)") +
ggtitle(paste("Checking the linearity of bandwidth for ",cluster));
}
plot_bandwidth_linearity(griffon,"GRIFFON (SATA II)")+ ylim(0,0.03)
## Warning: Removed 324 rows containing missing values (stat_smooth).
## Warning: Removed 2 rows containing missing values (stat_smooth).
## Warning: Removed 310 rows containing missing values (stat_smooth).
## Warning: Removed 324 rows containing missing values (geom_point).
## Warning: Removed 312 rows containing missing values (geom_point).
plot_bandwidth_linearity(edel,"EDEL (SATA)")+ ylim(0,0.01)
## Warning: Removed 243 rows containing missing values (stat_smooth).
## Warning: Removed 1 rows containing missing values (stat_smooth).
## Warning: Removed 250 rows containing missing values (stat_smooth).
## Warning: Removed 243 rows containing missing values (geom_point).
## Warning: Removed 251 rows containing missing values (geom_point).
plot_bandwidth_linearity(granduc,"GRANDUC (SAS)") + ylim(0,0.7)
plot_linearity_iops <- function (df, cluster){
ggplot(df,aes(x=Size,y=IOPS, colour=Operation)) + theme_bw() +
geom_point(alpha=.2) + geom_smooth(method=lm,se=FALSE,fullrange=T) +
facet_wrap( ~ IOscheduler) +
ylab("IOPS") + xlab("File Size (in MiB)") +
ggtitle(paste("Checking the linearity of IOPS for ",cluster));
}
plot_linearity_iops(griffon,"GRIFFON (SATA II)")
plot_linearity_iops(edel,"EDEL (SATA)")
plot_linearity_iops(granduc,"GRANDUC (SAS)")
plot_variability <- function (df, cluster){
df_read<-subset(df,Operation=="read")
df_write<-subset(df,Operation=="write")
hist(df_read$Bwi, freq=FALSE, xlab="Bandwidth",
breaks=20,
main=paste("Variability of Bwi for read operations for",cluster),
col="lightgreen")
curve(dnorm(x, mean=mean(df_read$Bwi), sd=sd(df_read$Bwi)), add=TRUE, col="darkblue", lwd=2)
hist(df_write$Bwi, freq=FALSE, xlab="Bandwidth",
breaks=20,
main=paste("Variability of Bwi for write operations for",cluster),
col="lightgreen")
curve(dnorm(x, mean=mean(df_write$Bwi), sd=sd(df_write$Bwi)), add=TRUE, col="darkblue", lwd=2)
}
plot_variability(griffon,"GRIFFON (SATA II)")
plot_variability(edel,"EDEL (SATA)")
plot_variability(granduc,"GRANDUC (SAS)")
sata_c = subset(all,Model=="SATA" & Jobs>1)
sata2_c = subset(all,Model=="SATA II" & Jobs>1)
sas_c = subset(all,Model=="SAS" & Jobs>1)
griffon_c = subset(sata2_c,grepl("^griffon", Hostname))
edel_c = subset(sata_c,grepl("^edel", Hostname))
granduc_c = subset(sas_c,grepl("^granduc", Hostname))
plot_crw <- function (df, title){
ggplot(df,aes(x=factor(Jobs),y=Bwi, color=Operation)) + theme_bw() +
geom_point(alpha=1/10 ) + geom_line() + facet_wrap(Size ~ IOscheduler) +
ylab("Bandwidth") + xlab("Jobs")+ ggtitle(title)+ geom_boxplot();
}
plot_crw(sata_c,"SATA CONCURRENT OPERATIONS")
plot_crw(sata2_c,"SATA II CONCURRENT OPERATIONS")
plot_crw(sas_c,"SAS CONCURRENT OPERATIONS")
plot_aggregate <- function (df, cluster){
df$TotalSize=df$FileSize * df$Jobs
df$BW = (df$TotalSize) / df$Runtime
ggplot(data=df,aes(x=Jobs,y=BW, color=Operation)) + theme_bw() +
geom_point(alpha=.5) + facet_wrap( ~ IOscheduler) +
geom_smooth(method=lm,se=FALSE,fullrange=T) +
xlab("Number of concurrent operations") + ylab("Aggregated Bandwidth (MiB/s)") +
ggtitle(paste("Influence of the number of concurrent operations on aggregate bandwidth \n on ",cluster));
}
plot_aggregate(griffon_c,"GRIFFON (SATA II)")
plot_aggregate(edel_c,"EDEL (SATA)")
plot_aggregate(granduc_c,"GRANDUC (SAS)")