GSVA (V1.25.0) software package is an open source package available from R/Bioconductor and is used as a non-parametric, unsupervised method for estimating the variation of pre-defined gene sets in patient and control samples of microarray expression data sets 
www.bioconductor.org/packages/release/bioc/html/GSVA.html

Hänzelmann, S. et al. GSVA: gene set variation analysis for microarray and RNA-seq data. BMC Bioinformatics. 14:7(2013).

Example of Gene Set Variation Analysis Used by AMPLE BioSolutions

########GSVA script######
#######Geneset Variation analysis (GSVA) script######
#######Load the eset generated using the brain array CDF########
if (!requireNamespace("BiocManager", quietly = TRUE))
    install.packages("BiocManager")
BiocManager::install("GSVA", version = "3.8")
library(GSVA)


load("~/Catalina-et-al_Nat-Comm_Suppl-Scripts/Expression_set_EXAMPLE/GSE88884_Batch-1_RMA-Normalized_ESET.Rdata")
ls()
eset_ba <- eset
pDATA <- pData(eset_ba)
table(eset_ba$Cohort) #10 CTL, 813 SLE
table(eset_ba$Race)

#####Removing the control probe####
index <- which( substr(rownames(eset_ba),1,4)=="AFFX" ) # 0 probes have been removed
eset_ba <- eset_ba[ -index, ]

fData(eset_ba)[,"SYMBOL"] <- as.character(fData(eset_ba)[,"geneSymbol"])

cohort = as.character(eset_ba$Cohort)
eset_ba$friendlyName <- paste(cohort, substr(eset_ba$Subject_ID,6,9),eset_ba$SLEDAI, sep="_")
sampleNames(eset_ba) <- eset_ba$friendlyName

FD_ba <- fData(eset_ba)
#####Exactly has to match with filtered eset (above one)######
IQR_BA <- data.frame()
for(i in 1:nrow(exprs(eset_ba))) {
  row <- as.vector(exprs(eset_ba)[i,])
  IQR_BA[i,1] <- IQR(row)
}
rownames(IQR_BA) <- rownames(exprs(eset_ba))
IQR_BA$probe <- rownames(IQR_BA)
colnames(IQR_BA)[1] <- "IQR"

index <- which(IQR_BA$IQR>0.05) #32,500
IQR_ba_filtered <- IQR_BA[index,]

IQR_hist <- hist(IQR_ba_filtered$IQR, breaks=1000, xlab="probe IQRs",
                 main="probe Density at IQR Intervals",
                 sub=paste(nrow(IQR_ba_filtered), "total probes"), xlim=c(0,1), col="yellow")
IQR_max <- which(IQR_hist$counts==max(IQR_hist$counts))
iqr <- as.vector(IQR_hist$mids)[IQR_max]
abline(v=iqr, lty=2, lwd=2, col="red")

IQR_ba_filtered <- IQR_ba_filtered[which(IQR_ba_filtered$IQR>iqr),,FALSE] #21,945

index <- which(rownames(eset_ba)%in%IQR_ba_filtered$probe)#21945
eset.iqr.filtered <- eset_ba[index,]
FD_ba_filtered <- fData(eset.iqr.filtered)
eset_ba_iqr_filtered <- exprs(eset.iqr.filtered)
identical(rownames(FD_ba_filtered),rownames(eset_ba_iqr_filtered))
identical(rownames(FD_ba_filtered),IQR_ba_filtered$probe)
eset_ba_iqr_filtered <- cbind(eset_ba_iqr_filtered,IQR_ba_filtered,FD_ba_filtered[,c(2,4)])
eset_ba_iqr_filtered[,828] = "BA_CDF"
colnames(eset_ba_iqr_filtered)[828] = "CDF"

#index <- which(is.na(eset_ba_iqr_filtered)[,"geneEntrezID"]) # 52 probes removed without gene mapping
index <- which(is.na(eset_ba_iqr_filtered)[,"geneSymbol"]) #866
eset_ba_iqr_filtered_NA <- eset_ba_iqr_filtered[-index,] #22,106
eset_ba_iqr_filtered_NA = eset_ba_iqr_filtered_NA[ order(eset_ba_iqr_filtered_NA$IQR), ] #21079
setwd("~/Catalina-et-al_Nat-Comm_Suppl-Scripts/GSVA_EXAMPLE")
save(eset_ba_iqr_filtered_NA, file="eset_ba_probes_filtered_IQR.Rdata")

#######Now trying to remove duplicate gene symbols based on the IQR.####
eset_ba_iqr_filtered_NA = eset_ba_iqr_filtered_NA[ order(-eset_ba_iqr_filtered_NA$IQR), ]
eset_ba_filtered_iqr_genes <- eset_ba_iqr_filtered_NA[ !duplicated(eset_ba_iqr_filtered_NA$geneSymbol), ]#21,078
eset_ba_filtered_iqr_entrezid <- eset_ba_iqr_filtered_NA[ !duplicated(eset_ba_iqr_filtered_NA$geneEntrezID), ]#21,079

setwd("~/Catalina-et-al_Nat-Comm_Suppl-Scripts/GSVA_EXAMPLE")
save(eset_ba_iqr_filtered_NA,file="ba_iqr_filtered.Rdata")
save(eset_ba_filtered_iqr_genes,eset_ba_filtered_iqr_entrezid,file="eset_ba_iqr_filtered_sig.Rdata")

####Removing duplicate gene symbols based on the highest IQR vales. Retaining the probes(genesymbol) with high IQR values####
eset_genes <- eset_ba_filtered_iqr_genes
colnames(eset_genes)
rownames(eset_genes) <- eset_genes$geneSymbol
eset_genes <- eset_genes[,c(1:823)]
#colnames(eset_genes) <- sampleNames(eset_ba)
eset_genes <- as.matrix(eset_genes)
####GSVA script Load the genesets######

IFN_GSVA <- read.delim("~/Catalina-et-al_Nat-Comm_Suppl-Scripts/GSVA_EXAMPLE/IFN_Signatues.txt", sep="\t", dec=",")
IFN_GSVA[,] <- lapply(IFN_GSVA[,],as.character)
IFN_GSVA_stacked <- stack(IFN_GSVA)
IFN_GSVA_stacked <- IFN_GSVA_stacked[-which(IFN_GSVA_stacked$values == ""),]
colnames(IFN_GSVA_stacked)[1] <- "geneSymbol"
colnames(IFN_GSVA_stacked)[2] <- "Category"

testList <- tapply(IFN_GSVA_stacked$geneSymbol,IFN_GSVA_stacked$Category,c)

n <- names(testList)
uniqueList <- lapply(testList, unique)

makeSet <- function(geneIds, n) {
  GeneSet(geneIds, geneIdType=SymbolIdentifier(), setName=n)
}

library(dplyr)
library(GSEABase)
library(GSVA)
gsList <- gsc <- mapply(makeSet, uniqueList[], n)
newlist = gsList %>% lapply(function(x) trimws(x@geneIds))
#gsc <- GeneSetCollection(gsc)

#### AFTER CREATING THE GENESET COLLECTION OBJECT NOW WE CAN PERFORM GSVA FUNCTION AND 
#### GET EXPRESSION SET WHICH IS TRANSFORMED FROM GENE BY SAMPLE TO GENESET BY SAMPLE

eset <- eset_genes

###computing GSVA enrichment scores#####
gse88884_illum1_SLE_CTL_gsva <- gsva(eset, newlist, method = c("gsva"), rnaseq=FALSE,
                                     min.sz=1, max.sz=Inf, verbose=TRUE)$es.obs


overlap_genes <- merge(eset_ba_filtered_iqr_genes,IFN_GSVA_stacked,by.x="geneSymbol",by.y="geneSymbol")

#####Save the enrichment scores object#######
setwd("~/Catalina-et-al_Nat-Comm_Suppl-Scripts/GSVA_EXAMPLE")
write.csv(gse88884_illum1_SLE_CTL_gsva,file="GSE88884_Lilly_Illuminate-1_Female_MixedRace_SLEDAI6-27_813SLE_10CTL_BA_Probes_IFN-Paper-IQR_GSVA_Enrichment_scores.csv")
write.csv(overlap_genes,file="GSE88884_Lilly_Illuminate-1_Female_MixedRace_SLEDAI6-27_813SLE_10CTL_BA_Probes_IFN-Paper-IQR_GSVA_Overlapping_genes.csv")


########Script to generate heat map using gsva enrichment scores#################
exprs_hm <- gse88884_illum1_SLE_CTL_gsva

######Generating heatmap with log2 expression values for each genes that is overlapping with each category####
# We must remove rows where all columns are NA
index <- which(is.na(rowSums(exprs_hm)))
if (length(index) > 0) { exprs_hm <- exprs_hm[-index,] }

# Here we'll use Euclidean distances to figure cluster colors
distance <- dist(t(exprs_hm),method="euclidean")
hc <- hclust(distance)

hc.cut = cutree(hc,k=3) # k = number of clusters
table(hc.cut) # show cluster cut results
hc.cut.levels = levels(as.factor(hc.cut)) # factor cluster level numbers for coloring
hc.cut.levels # review cluster level factors

colors = rainbow(length(hc.cut.levels));colors
cluster.patient.colors = c()
for (i in 1:length(hc.cut.levels)){
  cluster.patient.colors[names(which(hc.cut==hc.cut.levels[i]))] = colors[i] }
cluster.patient.colors

# Euclidean distances for both probes and arrays
dist.euclidean = dist.euclidean <- function(x, ...)
  dist(x,method="euclidean")

label.size = 0.8
samples = colnames(exprs_hm)
hmcol <- colorRampPalette(c("blue","grey","red"))(256)


## MAKE SURE TO MAKE THE R STUDIO PLOTS FRAME AS LARGE AS POSSIBLE BEFORE RUNNING (SHRINK THE OTHER FRAMES)
dev.off()
library(gplots)
nrow(exprs_hm)
genes.heat = heatmap.2(data.matrix(exprs_hm), col=hmcol, Rowv = FALSE, scale="none", distfun = dist.euclidean,key=TRUE,
                       symkey=FALSE, density.info="none", trace="none",
                       ColSideColors=cluster.patient.colors[samples],cexCol=label.size,
                       cexRow = label.size,margins=c(9,9))