# Load conda environment with python instalation. This depends on each user's.reticulate::use_condaenv(condaenv ="rstudio_server")# Import scrublet.scrublet<-reticulate::import("scrublet")# Compute doublets per patient.list.output<-list()for(sampinunique(sample$orig.ident)){rlang::inform(paste0(add_info(), crayon_body("Analysing sample: "), crayon_key(samp)))sample.use<-sample[, sample$orig.ident==samp]counts_transposed<-Matrix::t(sample.use@assays$RNA@counts)# Run scrublet.# Code adapted from: https://github.com/swolock/scrublet/blob/master/examples/scrublet_basics.ipynbscrub=scrublet$Scrublet(counts_transposed, expected_doublet_rate =0.06)# Compute the doublets.return_list=scrub$scrub_doublets()# List with the output from scrublet.scrublet_score<-return_list[[1]]# Scrublet scores per cell.scrublet_binary<-return_list[[2]]# Scrublet assignment for each cell.# Add cell names to the output, so it can be integrated in the Seurat object.row.names(scrublet_score)<-colnames(sample.use)list.output[[samp]]<-scrublet_score}# Get the doublet scores.list.matrices<-list()for(sampinnames(list.output)){data<-list.output[[samp]]data.use<-data.frame("Cell"=names(data),"Score"=unname(data),"orig.ident"=rep(samp, length(data)))%>%tibble::as_tibble()%>%dplyr::left_join(y =sample@meta.data%>%tibble::rownames_to_column(var ="Cell")%>%dplyr::select(dplyr::all_of(c("Cell", "nCount_RNA", "nFeature_RNA"))), by ="Cell")list.matrices[[samp]]<-data.use}data<-do.call(rbind, list.matrices)# Add them as metadata.sample@meta.data<-sample@meta.data%>%tibble::rownames_to_column(var ="Cell")%>%dplyr::left_join(y =data%>%dplyr::select(dplyr::all_of(c("Cell", "Score"))), by ="Cell")%>%tibble::column_to_rownames(var ="Cell")%>%dplyr::rename("Scrublet Score"="Score")# Inspect the distribution of scores per patient and manually select a doublet score cutoff for each of them.cutoffs<-list("H049-ATRT-0014"=0.175,"H049-ATRT-0005"=0.225,"H049-JVCT"=0.175,"H049-ATRT-0001"=0.175,"H049-ATRT-0013"=0.175,"H049-ATRT-0003"=0.175,"H049-ATRT-0010"=0.175,"H049-ATRT-0006"=0.2,"H049-ATRT-0022-P"=0.175,"H049-UV6K"=0.2,"H049-ATRT-0007"=0.2,"H049-ATRT-0009"=0.175,"ATRT21"=0.175,"ATRT24"=0.175,"ATRT04"=0.175,"ATRT05.2"=0.175,"ATRT15_RV3"=0.2,"JD113T"=0.175,"ATRT14"=0.175,"CG_SB_NB8358"=0.175)# Retrieve cells to exclude.cells.exclude<-NULLfor(patientinunique(sample$orig.ident)){cutoff<-cutoffs[[patient]]sample.test<-sample[, sample$orig.ident==patient]cells.exclude<-append(cells.exclude, names(sample.test$`Scrublet Score`[sample.test$`Scrublet Score`>=cutoff]))}# Exclude them.sample<-sample[, !(colnames(sample)%in%cells.exclude)]