########################################################################### # # Mark Cembrowski, Janelia Research Campus, Nov 27 2017 # # Classify cells with random forest classifier. # ########################################################################### rfClassify <- function(seuratObj,its=10){ nTrains <- c(25,50,100,200,400,800) perf <- matrix(nrow=length(nTrains),ncol=its) nCells <- length(seuratObj@cell.names) for (jj in 1:length(nTrains)){ nTrain <- nTrains[jj] for (ii in 1:its){ set.seed((jj-1)*its+ii) indTrain <- sample(1:nCells,nTrain) indTest <- 1:nCells indTest <- indTest[!indTest%in%indTrain] train <- SubsetData(seuratObj, cells.use = seuratObj@cell.names[indTrain]) test <- SubsetData(seuratObj, cells.use = seuratObj@cell.names[indTest]) classesPred <- ClassifyCells( object = train, training.classes = train@ident, new.data = test@data ) diff <- as.character(classesPred)!=as.character(test@ident) successRat <- round(sum(abs(diff)<0.1)/length(diff)*100) perf[jj,ii] <- successRat print(paste('Correct class predicted with following percent:',successRat)) } } rownames(perf) <- nTrains # Do stats. df <- data.frame( mu=apply(perf,1,mean), sd=apply(perf,1,sd), lo=apply(perf,1,mean)-apply(perf,1,sd), hi=apply(perf,1,mean)+apply(perf,1,sd), nTrain=nTrains ) # Plot. gg <- ggplot(df,aes(x=nTrain,y=mu)) gg <- gg + geom_ribbon(aes(ymin=lo,ymax=hi),colour='grey',fill='grey') gg <- gg + geom_line() + geom_point() + theme_bw() gg <- gg + xlab('Number in training dataset') gg <- gg + ylab('Percent success') gg <- gg + coord_cartesian(xlim=c(0,nCells),ylim=c(0,100)) print(gg) invisible(df) }