Figure S4

path.to.sample <- "path_to_snRNAseq_sample"
sample <- readRDS(path.to.sample)

subtype.colors <- c("ATRT-TYR"    = "#87191c",
                    "ATRT-SHH"    = "#243a76",
                    "ATRT-MYC"    = "#096837")

colors.use.reduced <- c("Tumor"              = "#C0C0C0",
                        "TME"                = "#70798c",
                        "IPC-like"                 = "#be920e",
                        "CP-like"                  = "#be660e",
                        "Cilia-like"               = "#be0e0e",
                        "Mesenchymal-like"         = "#0ebe66",
                        "RG-like"                  = "#0497c8",
                        "NPC-like"                 = "#0466c8",
                        "OPC-like"                 = "#0435c8",
                        "Hypoxic"                  = "#92be0e",
                        "Immune-like"              = "#920ebe")

markers.path <- "path_to_supervised_annotation_set"
markers <- readRDS(markers.path)

markers.stem <- "path_to_stemness_markers"
stem.markers <- readRDS(markers.stem)

markers$PSC <- stem.markers$PSC
markers <- markers[c("Neuronal_IPC", "Cycle", "PSC")]
names(markers) <- c("Neuronal.IPC", "Cycle", "PSC")

out <- SCpubr::do_EnrichmentHeatmap(sample = sample, input_gene_list = markers, flavor = "UCell", return_object = TRUE)
sample <- out$Object
sample <- Seurat::ScaleData(sample, features = rownames(sample))

# Figure S4A ---------
p <- SCpubr::do_DimPlot(sample = sample,
                         group.by = "Annotation",
                         idents.keep = "IPC-like",
                         font.size = 16,
                         raster = TRUE,
                         raster.dpi = 2048,
                         pt.size = 8,
                         colors.use = colors.use.reduced,
                         legend.icon.size = 8,
                         legend.ncol = 1,
                         legend.position = "bottom")


# Figure S4B ---------
sample <- Seurat::SetIdent(sample, value = "Full_Annotation")
 
markers <- COSG::cosg(sample,
                      expressed_pct = 0.2,
                      n_genes_user = 250)
 
markers <- markers$names
# Retrieve the df containing the genes and transform it in a list of genes.
markers <- markers %>% as.list()
 
# Filter out not interesting genes and keep top 100.
## - Mitochondrial genes: ^MT-
## - Ribosomal genes: ^RP
## - Unannotated genes: ^AP0, ^AC0, 
## - Long, non coding: ^LINC.*
## - Alternative splice variants: *-AS
regex <- "[[:alnum:]]+\\.[[:xdigit:]]+|^MT-.*|^RP|.*-AS[[:xdigit:]]$|^LINC.*"
 
# Perform the filtering.
markers <- lapply(markers, function(x){x[grep(regex, x, invert = TRUE)][1:100]})["IPC-like"][[1]]
 
 
#GO ontology
ensembl = biomaRt::useEnsembl(biomart = "ensembl", 
                              dataset = "hsapiens_gene_ensembl")
 
# Change from SYMBOL to ENTREZID.
ans <- unique(biomaRt::getBM(attributes = c("hgnc_symbol", "entrezgene_id"),   
                             filters = "hgnc_symbol",
                             values = markers,
                             mart = ensembl))
 
er  <- clusterProfiler::enrichGO(gene = ans$entrezgene_id,
                                  OrgDb = org.Hs.eg.db,
                                  ont = 'BP')

p <- SCpubr::do_TermEnrichmentPlot(mat = er,
                                   n.terms = 10,
                                   n.chars = 35,
                                   legend.length = 10,
                                   font.size = 16)

# Figure S4C ---------
p <- SCpubr::do_FeaturePlot(sample = sample,
                            slot = "scale.data",
                            enforce_symmetry = TRUE,
                            max.cutoff = 3,
                         features = "Neuronal.IPC",
                         raster = TRUE,
                         raster.dpi = 2048,
                         pt.size = 8,
                         legend.position = "bottom",
                         legend.length = 15)

# Figure S4D ---------
p <- SCpubr::do_FeaturePlot(sample = sample,
                            slot = "scale.data",
                            enforce_symmetry = TRUE,
                            max.cutoff = 3,
                         features = "Cycle",
                         raster = TRUE,
                         raster.dpi = 2048,
                         pt.size = 8,
                         legend.position = "bottom",
                         legend.length = 15)


# Figure S4E ---------
Seurat::DefaultAssay(sample) <- "RNA"
sample@assays$Enrichment <- NULL

markers.path <- "path_to_top100_markers"
markers <- readRDS(markers.path)

order.use <- c("IPC-like", "CP-like", "Cilia-like", "RG-like", "NPC-like", "OPC-like", "Mesenchymal-like")
markers.short <- markers[order.use]
sample$Groups <- NULL
out <- SCpubr::do_EnrichmentHeatmap(sample = sample, input_gene_list = markers.short, flavor = "UCell", return_object = TRUE)
sample <- out$Object
sample <- Seurat::ScaleData(sample, features = rownames(sample))




# Reclustering.
Seurat::DefaultAssay(sample) <- "RNA"
sample <- sample[, sample$Annotation == "IPC-like"]



regress_out_vars <- c("nCount_RNA", "nFeature_RNA", "percent.mt")
normalization_batch <- "orig.ident"
integration_batch <- "technology"

sample <- Seurat::FindVariableFeatures(sample)
sample <- Seurat::ScaleData(sample, assay = "RNA", vars.to.regress = regress_out_vars)

sample <- Seurat::RunPCA(sample)

sample <- harmony::RunHarmony(sample,
                              assay = "RNA",
                              group.by.vars = c(normalization_batch, integration_batch),
                              theta = c(1, 2))

sample <- Seurat::FindNeighbors(sample, reduction = "harmony", dims = 1:30)
sample <- Seurat::FindClusters(sample, cluster.name = "harmony_clusters")
sample <- Seurat::RunUMAP(sample, reduction = "harmony", dims = 1:30, reduction.name = "umap.harmony", reduction.key = "UMAPHARMONY_")

Seurat::DefaultAssay(sample) <- "Enrichment"


p <- SCpubr::do_DimPlot(sample = sample,
                        group.by = "subtype",
                        font.size = 16,
                        raster = TRUE,
                        raster.dpi = 2048,
                        pt.size = 8,
                        colors.use = subtype.colors,
                        legend.icon.size = 8,
                        legend.ncol = 3,
                        legend.position = "bottom")



# Figure S4F ---------
path.to.top100.markers <- "path_to_top100_markers_snRNAseq"
markers <- readRDS(path.to.top100.markers)

order.use <- c("CP-like", "Cilia-like", "RG-like", "NPC-like", "OPC-like", "Mesenchymal-like")
markers.short <- markers[order.use]
markers.short <- lapply(markers.short, function(x){x[1:10]})

Seurat::DefaultAssay(sample) <- "RNA"

p <- SCpubr::do_DotPlot(sample, 
                        features = markers.short, 
                        slot = "data", 
                        group.by = "subtype", 
                        zscore.data =  TRUE, 
                        dot.scale = 8) + 
     ggplot2::scale_size_continuous(range = c(3, 8))
# Code commented due to compilation problems with Quarto.

# import scanpy as sc
# import scvelo as scv
# import pandas as pd
# import matplotlib.pyplot as plt
# 
# 
# # Set beautiful plotting parameters.
# scv.settings.verbosity = 3  # show errors(0), warnings(1), info(2), hints(3)
# scv.settings.presenter_view = True  # set max width size for presenter view
# scv.set_figure_params('scvelo')  # for beautified visualization
# 
# adata = sc.read(path.to.adata, cache = True)
# 
# custom_colors = {
#     "IPC-like": "#be920e",  
#     "CP-like": "#be660e",  
#     "Cilia-like": "#be0e0e", 
#     "NPC-like": "#0466c8",
#     "Other": "#C0C0C0",
# }
# 
# custom_colors = {
#     "IPC-like": "#be920e",  
#     "CP-like": "#be660e",  
#     "Cilia-like": "#be0e0e", 
#     "Neurons": "#0466c8",
#     "Other": "#C0C0C0",
# }
# 
# # This is Figure S4 H
# fig, ax = plt.subplots(figsize=(7,7))
# scv.pl.velocity_embedding_stream(adata, basis='umap', color = "annotation", palette = custom_colors, legend_loc="right", add_outline = True, ax = ax, frameon = False, figsize = (7, 7), size = 50, alpha = 1, arrow_size = 2, linewidth=2)
# 
# # This is Figure S4 G
# fig, ax = plt.subplots(figsize=(7,7))
# sc.pl.umap(adata, color = "annotation", legend_loc="best", add_outline=True, size = 50,  palette = custom_colors, ax=ax, frameon = False)