diff --git a/.Rhistory b/.Rhistory index 07a9d36..9059bf2 100644 --- a/.Rhistory +++ b/.Rhistory @@ -67,8 +67,8 @@ complexity.limits = c(NA,NA), mad.complexity.limits = c(5,NA), topNgenes.limits = c(NA,NA), mad.topNgenes.limits = c(5,5), -n.topgnes=20, -do.doublets.fitler=T +n.topgenes=20, +do.doublets.filter=T ) ggsave(SO_filtered$plots$PostFilterCombined, filename = "./images/QC1.png", width = 10, height = 10) ggsave(SO_filtered$plots$ViolinPlotCombine, filename = "./images/QC2.png", width = 10, height = 10) @@ -315,12 +315,12 @@ dev.off() modScore MS_object=modScore(object=Anno_SO$object, marker.table=Marker_Table, -use_columns = c("Macrophages","M1","M2" ) -ms_threshold=c("Macrophages .40","M1 .25","M2 .14"), +use.columns = c("Macrophages","M1","M2" ) +ms.threshold=c("Macrophages .40","M1 .25","M2 .14"), MS_object=modScore(object=Anno_SO$object, marker.table=Marker_Table, -use_columns = c("Macrophages","M1","M2" ), -ms_threshold=c("Macrophages .40","M1 .25","M2 .14"), +use.columns = c("Macrophages","M1","M2" ), +ms.threshold=c("Macrophages .40","M1 .25","M2 .14"), use_assay = "SCT", general.class=c("Macrophages"), lvl.vec = c('Macrophages-M1','Macrophages-M2'), @@ -333,8 +333,8 @@ step.size = 0.1 modScore MS_object=modScore(object=Anno_SO$object, marker.table=Marker_Table, -use_columns = c("Macrophages","M1","M2" ), -ms_threshold=c("Macrophages .40","M1 .25","M2 .14"), +use.columns = c("Macrophages","M1","M2" ), +ms.threshold=c("Macrophages .40","M1 .25","M2 .14"), use_assay = "SCT", general.class=c("Macrophages"), multi.lvl = FALSE, @@ -347,8 +347,8 @@ step.size = 0.1 modScore MS_object=modScore(object=Anno_SO$object, marker.table=Marker_Table, -use_columns = c("Macrophages","M1","M2" ), -ms_threshold=c("Macrophages .40","M1 .25","M2 .14"), +use.columns = c("Macrophages","M1","M2" ), +ms.threshold=c("Macrophages .40","M1 .25","M2 .14"), general.class=c("Macrophages"), multi.lvl = FALSE, reduction = "umap", @@ -359,8 +359,8 @@ step.size = 0.1 ) MS_object=modScore(object=Anno_SO$object, marker.table=Marker_Table, -use_columns = c("Macrophages","M1","M2" ), -ms_threshold=c("Macrophages .40","M1 .25","M2 .14"), +use.columns = c("Macrophages","M1","M2" ), +ms.threshold=c("Macrophages .40","M1 .25","M2 .14"), multi.lvl = FALSE, reduction = "umap", nbins = 10, @@ -370,8 +370,8 @@ step.size = 0.1 ) MS_object=modScore(object=Anno_SO$object, marker.table=Marker_Table, -use_columns = c("Macrophages","M1","M2" ), -ms_threshold=c("Macrophages .40","M1 .25","M2 .14"), +use.columns = c("Macrophages","M1","M2" ), +ms.threshold=c("Macrophages .40","M1 .25","M2 .14"), general.class=c("Macrophages","M1","M2"), multi.lvl = FALSE, reduction = "umap", @@ -383,8 +383,8 @@ step.size = 0.1 Marker_Table MS_object=modScore(object=Anno_SO$object, marker.table=Marker_Table, -use_columns = c("Macrophages","Monocytes","CD8_T" ), -ms_threshold=c("Macrophages .40","Monocytes .25","CD8_T .14"), +use.columns = c("Macrophages","Monocytes","CD8_T" ), +ms.threshold=c("Macrophages .40","Monocytes .25","CD8_T .14"), general.class=c("Macrophages","Monocytes","CD8_T"), multi.lvl = FALSE, reduction = "umap", @@ -395,8 +395,8 @@ step.size = 0.1 ) MS_object=modScore(object=Anno_SO$object, marker.table=Marker_Table, -use_columns = c("Macrophages","Neutrophils","CD8_T" ), -ms_threshold=c("Macrophages .40","Neutrophils .25","CD8_T .14"), +use.columns = c("Macrophages","Neutrophils","CD8_T" ), +ms.threshold=c("Macrophages .40","Neutrophils .25","CD8_T .14"), general.class=c("Macrophages","Neutrophils","CD8_T"), multi.lvl = FALSE, reduction = "umap", @@ -407,8 +407,8 @@ step.size = 0.1 ) MS_object=modScore(object=Anno_SO$object, marker.table=Marker_Table, -use_columns = c("Neutrophils","Macrophages","CD8_T" ), -ms_threshold=c("Neutrophils .25","Macrophages .40","CD8_T .14"), +use.columns = c("Neutrophils","Macrophages","CD8_T" ), +ms.threshold=c("Neutrophils .25","Macrophages .40","CD8_T .14"), general.class=c("Neutrophils","Macrophages","CD8_T"), multi.lvl = FALSE, reduction = "umap", diff --git a/.gitignore b/.gitignore index fdd4445..d878017 100644 --- a/.gitignore +++ b/.gitignore @@ -16,3 +16,7 @@ inst/doc *.rds /doc/ /Meta/ + +.github +.github.zip +decision_log.md \ No newline at end of file diff --git a/DESCRIPTION b/DESCRIPTION index e74cdc5..38364a5 100755 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -92,4 +92,5 @@ Imports: htmlwidgets, scDblFinder , BiocParallel + , patchwork Config/testthat/edition: 3 diff --git a/NAMESPACE b/NAMESPACE index 6e6b355..49bc794 100755 --- a/NAMESPACE +++ b/NAMESPACE @@ -14,11 +14,11 @@ export(dotPlotMet) export(dualLabeling) export(filterQC) export(filterSeuratObjectByMetadata) +export(harmonyBatchCorrect) export(heatmapSC) export(launch_module_score_app) export(modScore) export(nameClusters) -export(object) export(palantir_api_call) export(plotMetadata) export(processRawData) @@ -46,12 +46,16 @@ import(httr) import(jsonlite) import(magrittr) import(parallel) +importFrom(tidyr, pivot_longer) import(plotly) import(quantmod) import(reshape2) import(rlang) import(scales) +importFrom(stringr, str_split) +import(stringr) import(tibble) +import(tidyr) import(tidyverse) import(tools) import(utils) @@ -135,6 +139,7 @@ importFrom(gridExtra,arrangeGrob) importFrom(gridExtra,tableGrob) importFrom(htmlwidgets,saveWidget) importFrom(magrittr,"%>%") +importFrom(patchwork,plot_layout) importFrom(plotly,as_widget) importFrom(plotly,ggplotly) importFrom(plotly,plot_ly) diff --git a/R/3D_tSNE.R b/R/3D_tSNE.R index fb8ab05..9896cc5 100644 --- a/R/3D_tSNE.R +++ b/R/3D_tSNE.R @@ -19,6 +19,20 @@ #' @importFrom htmlwidgets saveWidget #' #' @export +#' +#' @return A list with a plotly 3D TSNE plot (`figure`) and TSNE coordinates +#' (`tsne.df`). +#' +#' @examples +#' \dontrun{ +#' out <- tSNE3D( +#' object = seurat_obj, +#' color.variable = "cell_type", +#' label.variable = "orig.ident", +#' npcs = 15, +#' save.plot = FALSE +#' ) +#' } tSNE3D <- function(object, color.variable, diff --git a/R/AggregateCounts.R b/R/AggregateCounts.R index 7fc9f0b..699dab1 100644 --- a/R/AggregateCounts.R +++ b/R/AggregateCounts.R @@ -1,33 +1,44 @@ -##' @title Aggregate Counts (Pseudobulk) -##' @description Compute pseudobulk expression by averaging expression across groups -##' defined by one or more metadata columns, and return a tidy table. -##' @details Uses Seurat's `AverageExpression()` on the `SCT` assay to compute -##' group-wise average expression for each feature. Also produces a -##' bar plot (via `ggplot2`/`plotly`) showing the number of cells per -##' pseudobulk group and warns if any group contains only one cell. -##' -##' @param object Seurat-class object. -##' @param var.group Character vector of metadata column names used to define -##' pseudobulk groups. When multiple columns are supplied, an -##' interaction of these columns defines the groups. -##' @param slot Character name of the assay data layer passed to -##' `AverageExpression()` (e.g., "data", "counts", or "scale.data"). -##' -##' @return A data.frame of pseudobulk expression with columns `Gene` followed by -##' one column per pseudobulk group. Column names are sanitized to -##' contain only alphanumeric/underscore characters. -##' -##' @import Seurat -##' @import tidyverse -##' @import ggplot2 -##' @import plotly -##' @importFrom dplyr select -##' -##' @export +#' @title Aggregate Counts (Pseudobulk) +#' @description Compute pseudobulk expression by averaging expression across groups +#' defined by one or more metadata columns, and return a tidy table. +#' @details Uses Seurat's `AverageExpression()` on the `SCT` assay to compute +#' group-wise average expression for each feature. Also produces a +#' bar plot (via `ggplot2`/`plotly`) showing the number of cells per +#' pseudobulk group and warns if any group contains only one cell. +#' +#' @param object Seurat-class object. +#' @param var.group Character vector of metadata column names used to define +#' pseudobulk groups. When multiple columns are supplied, an +#' interaction of these columns defines the groups. +#' @param slot Character name of the assay data layer passed to +#' `AverageExpression()` (e.g., "data", "counts", or "scale.data"). +#' @param interactive If TRUE, draw plotly plot (default is FALSE) +#' +#' @import Seurat +#' @import tidyverse +#' @import ggplot2 +#' @import plotly +#' @importFrom dplyr select +#' +#' @export +#' +#' @return A data.frame of pseudobulk expression with columns `Gene` followed by +#' one column per pseudobulk group. Column names are sanitized to +#' contain only alphanumeric/underscore characters. +#' +#' @examples +#' \dontrun{ +#' out <- aggregateCounts( +#' object = seurat_obj, +#' var.group = c("orig.ident", "condition"), +#' slot = "data" +#' ) +#' } aggregateCounts <- function(object, var.group, - slot){ + slot="data", + interactive=FALSE){ ## --------------- ## @@ -73,16 +84,19 @@ aggregateCounts <- function(object, )) } - p <- ggplotly(ggplot(df, aes(x = pseudobulk_group, y = Freq)) + + p <- ggplot(df, aes(x = pseudobulk_group, y = Freq)) + geom_bar(stat = "identity", position = "stack") + labs(y = "Counts", x = "Pseudobulk Groups", title = "Number of Cells in each Pseudobulk Group") + - theme(axis.text.x = element_text(angle = 90, hjust = 1))) + theme(axis.text.x = element_text(angle = 90, hjust = 1)) - print(p) + if(interactive==T){ + p <- ggplotly(p) + } } else { stop("All columns in var.group must be factors or characters") } - return(pseudobulk) + return(list(data=pseudobulk, + plots=p)) } \ No newline at end of file diff --git a/R/Annotate_Cell_Types.R b/R/Annotate_Cell_Types.R index 8a8cf0a..bbdc84b 100755 --- a/R/Annotate_Cell_Types.R +++ b/R/Annotate_Cell_Types.R @@ -1,8 +1,7 @@ -#' @title Annotating cell types using SingleR module -#' @description SingleR is an automatic annotation method for single-cell -#' RNA sequencing (scRNAseq) data (Aran et al. 2019). Given a reference dataset -#' of samples (single-cell or bulk) with known labels, it labels new cells -#' from a test dataset based on similarity to the reference. +#' @title Cell Type Annotation with SingleR [CCBR] [scRNA-seq] +#' @description Annotate the cell types of your cells using SingleR (Aran et al., 2019). This +#' function takes a combined Seurat object after PC reduction and assigns cells +#' to a category (for example, stem cells or T cells) based on genomic profile. #' @details This function is Step 5 of the basic Single-Cell RNA-seq workflow. #' It is the starting point for downstream visualization, subsetting, and #' analysis. It takes a combined seurat object as input, such as the one created @@ -22,8 +21,7 @@ #' Default is NULL #' @param use.clusters Provide cluster identities for each cell. #' Default is NULL - - +#' #' #' @import Seurat #' @import cowplot @@ -36,9 +34,17 @@ #' @export #' #' @return a Seurat object with additional metadata - - - +#' +#' @examples +#' \dontrun{ +#' out <- annotateCellTypes( +#' object = combined_so, +#' species = "Human", +#' reduction.type = "umap" +#' ) +#' } +#' +#' annotateCellTypes <- function(object, species = "Mouse", reduction.type = "umap", diff --git a/R/Color_by_Gene.R b/R/Color_by_Gene.R index 3ddaab9..82e96dd 100644 --- a/R/Color_by_Gene.R +++ b/R/Color_by_Gene.R @@ -35,8 +35,17 @@ #' @export #' #' @return a Seurat object with additional metadata or gene table and plot - - +#' +#' @examples +#' \dontrun{ +#' out <- colorByGene( +#' object = anno_so, +#' samples.to.include = c("sample1", "sample2"), +#' gene = c("CD3D", "MS4A1"), +#' reduction.type = "umap" +#' ) +#' } +#' colorByGene <- function(object, samples.to.include, @@ -65,16 +74,14 @@ colorByGene <- function(object, ## --------------- ## - print(object) # checking for samples - if(any(grepl('c\\(|\\[\\]',samples))) { - samples = eval(parse(text = gsub('\\[\\]', 'c()', samples))) - }else{ - samples=samples + if (is.character(samples.to.include) && length(samples.to.include) == 1 && + grepl('c\\(|\\[\\]', samples.to.include)) { + samples.to.include <- eval(parse(text = gsub('\\[\\]', 'c()', samples.to.include))) } # if none specified, using ALL - if (length(samples) == 0) { - samples = unique(object@meta.data$orig.ident) + if (length(samples.to.include) == 0) { + samples.to.include = unique(object@meta.data$orig.ident) } # Fix for underscore @@ -85,13 +92,13 @@ colorByGene <- function(object, names(sample.name) = names(object@active.ident) object@active.ident <- as.factor(vector()) object@active.ident <- sample.name - object.sub = subset(object, ident = samples) + object.sub = subset(object, ident = samples.to.include) } else { sample.name = as.factor(object@meta.data$orig.ident) names(sample.name) = names(object@active.ident) object@active.ident <- as.factor(vector()) object@active.ident <- sample.name - object.sub = subset(object, ident = samples) + object.sub = subset(object, ident = samples.to.include) } #Check input for missing genes diff --git a/R/Color_by_Genes_Automatic.R b/R/Color_by_Genes_Automatic.R index d81409b..88bff98 100644 --- a/R/Color_by_Genes_Automatic.R +++ b/R/Color_by_Genes_Automatic.R @@ -46,17 +46,21 @@ #' @return arranged grob of dimension reduction plots colored by individual #' marker expression -colorByMarkerTable <- function (object, samples.subset, samples.to.display, - manual.genes = c(), marker.table, - cells.of.interest, protein.presence = FALSE, assay = "SCT", slot = "scale.data", - reduction.type = "umap", point.transparency = 0.5, point.shape = 16, - cite.seq = FALSE){ +colorByMarkerTable <- function(object, + samples.subset, + samples.to.display, + manual.genes = c(), + marker.table, + cells.of.interest, + protein.presence = FALSE, + assay = "SCT", + slot = "scale.data", + reduction.type = "umap", + point.transparency = 0.5, + point.shape = 16, + cite.seq = FALSE + ){ - library(ggplot2) - library(Seurat) - library(stringr) - library(grid) - library(gridExtra) .plotMarkers <- function(markers) { if (is.na(markers) == TRUE) { @@ -211,10 +215,12 @@ colorByMarkerTable <- function (object, samples.subset, samples.to.display, }) } - results <- list( - overall = cons.gg.storage, - celltype = indv_arranged, - manual_entry = manual.arranged) + results <- list("plots"=list( + "overall" = cons.gg.storage, + "celltype" = indv_arranged, + "manual_entry" = manual.arranged + ) + ) return(results) } diff --git a/R/Combine_and_Normalize.R b/R/Combine_and_Normalize.R index 98e8f65..ec351e3 100755 --- a/R/Combine_and_Normalize.R +++ b/R/Combine_and_Normalize.R @@ -92,6 +92,16 @@ #' #' @return Seurat Objects and QC plots #' +#' @examples +#' \dontrun{ +#' out <- combineNormalize( +#' object = filtered_so_list, +#' npcs = 30, +#' draw.umap = TRUE, +#' draw.tsne = TRUE +#' ) +#' } +#' combineNormalize <- function(object, diff --git a/R/Compare_Cell_Populations.R b/R/Compare_Cell_Populations.R index d427394..56fe700 100644 --- a/R/Compare_Cell_Populations.R +++ b/R/Compare_Cell_Populations.R @@ -249,8 +249,8 @@ compareCellPopulations <- function( # Return results result <- list( - 'Plots' = list('Barplot' = p2, 'Boxplot' = p2_Box), - 'Table' = ColTables$OutTable + 'plots' = list('Barplot' = p2, 'Boxplot' = p2_Box), + 'data' = ColTables$OutTable ) return(result) diff --git a/R/DEG_Gene_Expression_Markers.R b/R/DEG_Gene_Expression_Markers.R index 321333c..463c507 100755 --- a/R/DEG_Gene_Expression_Markers.R +++ b/R/DEG_Gene_Expression_Markers.R @@ -1,7 +1,7 @@ -#' @title DEG (Gene Expression Markers) -#' @description This function performs a DEG (differential expression of genes) -#' analysis on a merged Seurat object to identify expression markers -#' between different groups of cells (contrasts). +#' @title DE with Find Markers [CCBR] [scRNA-seq] +#' @description This function performs DE (differential expression) analysis on +#' a merged Seurat object to identify expression markers between different +#' groups of cells (contrasts). #' @details The recommended input is a merged Seurat object #' with SingleR annotations, along with its associated sample names and metadata #' @@ -20,8 +20,8 @@ #' Default is FALSE #' @param assay.to.use The assay to use for your DEG analysis. #' Default is SCT, but can use linearly scaled data by selecting RNA instead - - +#' +#' #' @import Seurat #' @import ggplot2 #' @import RColorBrewer @@ -41,9 +41,19 @@ #' @export #' #' @return a dataframe with DEG. - - - +#' +#' @examples +#' \dontrun{ +#' deg <- degGeneExpressionMarkers( +#' object = anno_so, +#' samples = c("sample1", "sample2"), +#' contrasts = c("A-B"), +#' parameter.to.test = "cluster" +#' ) +#' } +#' +#' +#' degGeneExpressionMarkers <- function (object, samples, contrasts, parameter.to.test = "orig_ident", test.to.use = "MAST", log.fc.threshold = 0.25, use.spark = FALSE, assay.to.use = "SCT") diff --git a/R/Dotplot_by_Metadata.R b/R/Dotplot_by_Metadata.R index f5beed2..d7f87ad 100644 --- a/R/Dotplot_by_Metadata.R +++ b/R/Dotplot_by_Metadata.R @@ -19,11 +19,23 @@ #' @param cell.reverse.sort If TRUE, Reverse plot order of metadata category #' factors (default is FALSE) #' @param dot.color Dot color (default is "dark blue") +#' #' @importFrom tidyr pivot_wider #' @importFrom Seurat Idents DotPlot +#' #' @export #' #' @return Dotplot with markers and cell types. +#' +#' @examples +#' \dontrun{ +#' p <- dotPlotMet( +#' object = anno_so, +#' metadata = "celltype", +#' cells = c("T cell", "B cell"), +#' markers = c("CD3D", "MS4A1") +#' ) +#' } dotPlotMet <- function(object, metadata, diff --git a/R/Dual_Labeling.R b/R/Dual_Labeling.R index b4ef133..9e0f906 100755 --- a/R/Dual_Labeling.R +++ b/R/Dual_Labeling.R @@ -1,33 +1,30 @@ -#' @title Plot coexpression of 2 markers using transcript and/or protein -#' expression values -#' @description This method provides visualization of coexpression of 2 genes -#' (or proteins) and additional methods for filtering for cells with gene -#' expression values that are above or below thresholds set for one or both -#' markers. The method allows for filtering (optional) of the Seurat object -#' using manually set expression thresholds. +#' @title Cell Annotation with Co-Expression [CCBR] [scRNA-seq] +#' @description Display co-expression of two chosen markers in your Seurat +#' object. Creates a metadata column containing annotations for cells that +#' correspond to marker expression thresholds. #' #' @param object Seurat-class object #' @param samples Samples to be included in the analysis #' @param marker.1 First gene/marker for coexpression analysis #' @param marker.2 Second gene/marker for coexpression analysis #' @param marker.1.type Slot to use for first marker. Choices are "SCT", -#' "protein","HTO" (default is "SCT") +#' "protein","HTO", or "Spatial" (default is "SCT") #' @param marker.2.type Slot to use for second marker. Choices are "SCT", -#' "protein","HTO" (default is "SCT") +#' "protein","HTO", or "Spatial" (default is "SCT") #' @param data.reduction Dimension Reduction method to use for image. Options -#' are "umap" or "tsne" (default is "umap") +#' are "umap", "tsne", or "both" (default is "both") #' @param point.size Point size for image (default is 0.5) #' @param point.shape Point shape for image (default is 16) #' @param point.transparency Point transparency for image (default is 0.5) -#' @param add.marker.thresholds Add marker thresholds on plot (default is FALSE) +#' @param add.marker.thresholds Add marker thresholds on plot (default is TRUE) #' @param marker.1.threshold Threshold set for first marker (default is 0.5) #' @param marker.2.threshold Threshold set for second marker (default is 0.5) #' @param filter.data Add new parameter column to metadata annotating where #' marker thresholds are applied (default is TRUE) -#' @param M1.filter.direction Annotate cells that have gene expression levels +#' @param marker.1.filter.direction Annotate cells that have gene expression levels #' for marker 1 using the marker 1 threshold. Choices are "greater than" #' or "less than" (default is "greater than") -#' @param M2.filter.direction Annotate cells that have gene expression levels +#' @param marker.2.filter.direction Annotate cells that have gene expression levels #' for marker 2 using the marker 2 threshold. Choices are "greater than" #' or "less than" (default is "greater than") #' @param apply.filter.1 If TRUE, apply the first filter (default is TRUE) @@ -35,19 +32,17 @@ #' @param filter.condition If TRUE, apply both filters 1 and 2 and take #' intersection. If FALSE, apply both filters and take the union. #' @param parameter.name Name for metadata column for new marker filters -#' (Default is "Marker") +#' (default is "My_CoExp") #' @param trim.marker.1 Trim top and bottom percentile of marker 1 signal to #' pre-scale trim values (below) to remove extremely low and high values -#' (Default is TRUE) +#' (default is FALSE) #' @param trim.marker.2 Trim top and bottom percentile of marker 2 signal to #' pre-scale trim values (below) to remove extremely low and high values -#' (Default is TRUE) -#' @param pre.scale.trim Set trimming percentile values (Defalut is 0.99) -#' @param density.heatmap Creates a additional heatmap showing the density -#' distribution of cells. (Default is FALSE) +#' (default is FALSE) +#' @param pre.scale.trim Set trimming percentile value (default is 0.99) #' @param display.unscaled.values Set to TRUE if you want to view the unscaled -#' gene/protein expression values (Default is FALSE) - +#' gene/protein expression values (default is FALSE) +#' #' @import Seurat #' @importFrom scales rescale #' @importFrom gridExtra arrangeGrob tableGrob @@ -63,7 +58,18 @@ #' @return a seurat object with optional additional metadata for cells that are #' positive or negative for gene markers, a coexpression plot and contingency #' table showing sum of cells filtered. - +#' +#' @examples +#' \dontrun{ +#' out <- dualLabeling( +#' object = anno_so, +#' samples = c("sample1"), +#' marker.1 = "CD3D", +#' marker.2 = "MS4A1", +#' data.reduction = "umap" +#' ) +#' } +#' dualLabeling <- function (object, samples, marker.1, @@ -568,16 +574,26 @@ dualLabeling <- function (object, } - if (data.reduction=='tsne'|data.reduction=='umap') { + if (data.reduction=='tsne') { result.list <- list("object" = so.sub, + "data"=list("plot_table" = g), "plots"=list( - "plot" = grob, - "plot_densityHM" = grobHM, - "plot_table" = g) + 'tsne' = grob, + "densityHM" = grobHM) ) - } else if (data.reduction=='both'){ + }else if (data.reduction=='umap') { + + result.list <- list("object" = so.sub, + "data"=list("plot_table" = g), + "plots"=list( + 'umap' = grob, + "densityHM" = grobHM) + ) + + + }else if (data.reduction=='both'){ result.list <- list("object" = so.sub, "data"=list("plot_table" = g), diff --git a/R/Filter_QC.R b/R/Filter_QC.R index e9a13a7..9592e76 100755 --- a/R/Filter_QC.R +++ b/R/Filter_QC.R @@ -1,6 +1,7 @@ -#' @title Filter & QC Samples -#' @description Filters cells and Genes for each sample and generates QC Plots -#' to evaluate data before and after filtering. +#' @title Filter Low Quality Cells (CCBR scRNA-seq) +#' @description Filters cells and genes across various criteria for each sample. +#' Multiple cell and gene filters can be selected to remove poor quality data +#' and noise while generating QC plots before and after filtering. #' @details This is Step 2 in the basic Single-Cell RNA-seq workflow. Multiple #' cell and gene filters can be selected to remove poor quality data and noise. #' Workflows can use this downstream of any Seurat Object. This tool is @@ -71,18 +72,18 @@ #' Usage c(lower limit, Upper Limit). E.g. setting to c(NA,50) will not set a #' lower limit and remove cells with greater than 50% of reads in the top N #' genes. (Default: c(NA,NA)) -#' @param mad.topNgenes.limitsSet Filter limits based on how many Median +#' @param mad.topNgenes.limits Filter limits based on how many Median #' Absolute Deviations an outlier cell will have. Calculated from the Median #' percentage of counts in the top N Genes. #' Usage c(lower limit, Upper Limit). E.g. setting to c(5,5) will remove all #' cells with more than 5 absolute deviations greater than or 5 absolute #' deviations less than the median percentage. (Default: c(5,5)) -#' @param n.topgnes Select the number of top highly expressed genes used to +#' @param n.topgenes Select the number of top highly expressed genes used to #' calculate the percentage of reads found in these genes. #' E.g. a value of 20 calculates the percentage of reads found in the top 20 #' most highly expressed Genes. #' (Default: 20) -#' @param do.doublets.fitler Use scDblFinder to identify and remove doublet +#' @param do.doublets.filter Use scDblFinder to identify and remove doublet #' cells. Doublets are defined as two cells that are sequenced under the same #' cellular barcode, for example, if they were captured in the same droplet. #' (Default: TRUE) @@ -104,12 +105,22 @@ #' @importFrom stringr str_split_fixed #' @importFrom stats mad median #' @importFrom grid grobHeight textGrob grid.newpage gTree grid.draw - +#' #' #' @export #' #' @return Seurat Object and QC plots - +#' +#' @examples +#' \dontrun{ +#' out <- filterQC( +#' object = so_list, +#' min.cells = 20, +#' n.topgenes = 20, +#' do.doublets.filter = TRUE +#' ) +#' } +#' filterQC <- function(object, ## Filter Samples @@ -125,8 +136,8 @@ filterQC <- function(object, mad.complexity.limits = c(5,NA), topNgenes.limits = c(NA,NA), mad.topNgenes.limits = c(5,5), - n.topgnes=20, - do.doublets.fitler=T, + n.topgenes=20, + do.doublets.filter=TRUE, ## dim Reduction settings plot.outliers="None", #options(None,UMAP,tSNE) @@ -138,7 +149,7 @@ filterQC <- function(object, high.cut.disp = 100000, selection.method = "vst", npcs = 30, - vars_to_regress=NULL, + vars.to.regress=NULL, seed.for.PCA = 42, seed.for.TSNE = 1, seed.for.UMAP = 42 @@ -155,7 +166,7 @@ filterQC <- function(object, ### Helper Functions ##### - .topNGenes <- function(so,n.topgnes) { + .topNGenes <- function(so,n.topgenes) { ##Extract counts table counts_matrix = GetAssayData(so, slot="counts") @@ -163,7 +174,7 @@ filterQC <- function(object, tbl= apply(counts_matrix,2,function(i){ cnts=i[order(i,decreasing=T)] - t20=sum(cnts[1:n.topgnes]) + t20=sum(cnts[1:n.topgenes]) total=sum(cnts) pertop20=(t20/total)*100 @@ -214,7 +225,7 @@ filterQC <- function(object, .plotViolin2=function(count.df,value){ axis.lab = unique(count.df$filt) ylabs=gsub(" \\(", "\n\\(",value) - ylabs=gsub(paste0(" Top",n.topgnes), paste0("\nTop",n.topgnes),ylabs) + ylabs=gsub(paste0(" Top",n.topgenes), paste0("\nTop",n.topgenes),ylabs) ### Set up table fore cut off lines ## clean up cutoff values @@ -301,7 +312,7 @@ filterQC <- function(object, # count.df$filt=factor(count.df$filt,levels = c('filt','raw')) count.df$filt=factor(count.df$filt,levels = c('raw','filt')) ylabs=gsub(" \\(", "\n\\(",value) - ylabs=gsub(paste0(" Top",n.topgnes), paste0("\nTop",n.topgnes),ylabs) + ylabs=gsub(paste0(" Top",n.topgenes), paste0("\nTop",n.topgenes),ylabs) ### Set up table fore cut off lines ## clean up cutoff values @@ -469,7 +480,7 @@ filterQC <- function(object, xlab = as.character(xaxis) ylab = as.character(yaxis) ylab=gsub(" \\(", "\n\\(",ylab) - ylab=gsub(paste0(" Top",n.topgnes), paste0("\nTop",n.topgnes),ylab) + ylab=gsub(paste0(" Top",n.topgenes), paste0("\nTop",n.topgenes),ylab) name = paste(ylab,"vs.",xlab) g =ggplot(count.df, aes(x=.data[[xaxis]], y=.data[[yaxis]],color = Sample))+ @@ -510,7 +521,7 @@ filterQC <- function(object, .plotViolinPost2=function(count.df,yaxis){ axis.lab = unique(count.df$Sample) ylabs=gsub(" \\(", "\n\\(",yaxis) - ylabs=gsub(paste0(" Top",n.topgnes), paste0("\nTop",n.topgnes),ylabs) + ylabs=gsub(paste0(" Top",n.topgenes), paste0("\nTop",n.topgenes),ylabs) g=ggplot(count.df, aes(x=Sample, y=(.data[[yaxis]]))) + @@ -548,7 +559,7 @@ filterQC <- function(object, if(plot.outliers!="none"){ so.nf.qcFiltr <- SCTransform(so.nf,do.correct.umi = TRUE, - vars.to.regress=vars_to_regress, + vars.to.regress=vars.to.regress, return.only.var.genes = FALSE) so.nf.qcFiltr = FindVariableFeatures(object = so.nf.qcFiltr, nfeatures = nfeatures, @@ -625,7 +636,7 @@ filterQC <- function(object, ## Caluclate filter Metrics ## calculate Counts in Top 20 Genes - so=.topNGenes(so,n.topgnes) + so=.topNGenes(so,n.topgenes) ## Counts(umi) Filter mad.ncounts.limits=.madCalc(so,'nCount_RNA',mad.ncounts.limits) @@ -700,7 +711,7 @@ filterQC <- function(object, ) ## doublets Filter - if(do.doublets.fitler==T){ + if(do.doublets.filter==T){ doublets.fitler <- so@meta.data$Doublet%in%"singlet" }else{ doublets.fitler=rep(TRUE,nrow(so@meta.data)) @@ -749,7 +760,7 @@ filterQC <- function(object, filtSum[,"Cells before Filtering"]=nrow(filter_matrix) filtSum[,"Cells after all Filters"]=sum(filterIndex) filtSum[,"Percent Remaining"]=perc.remain - topN.filterRename=paste0('% Counts in Top',n.topgnes,' Genes filter') + topN.filterRename=paste0('% Counts in Top',n.topgenes,' Genes filter') filtTbl=colSums(filter_matrix==F)%>%t()%>%as.data.frame() filtTbl=rename(filtTbl, 'UMI Count (nCount_RNA)' = 'ncounts.filter', @@ -767,7 +778,7 @@ filterQC <- function(object, ########################################################## # ## create Filter Limits table - topN.filterRename=paste0('% Counts in Top',n.topgnes,' Genes') + topN.filterRename=paste0('% Counts in Top',n.topgenes,' Genes') cat('VDJ Genes Removed: ',length(VDJgenesOut), '\n') cat('Minimum Cells per Gene: ',min.cells,'\n') cat('UMI Count (nCount_RNA) Limits: ',ncounts.limits,'\n') @@ -780,7 +791,7 @@ filterQC <- function(object, cat('MAD Complexity (log10GenesPerUMI) Limits: ',mad.complexity.limits,'\n') cat(topN.filterRename,' Limits: ',topNgenes.limits,'\n') cat('MAD ',topN.filterRename,' Limits: ',mad.topNgenes.limits,'\n') - cat('Doublets Filter: ',do.doublets.fitler,'\n') + cat('Doublets Filter: ',do.doublets.filter,'\n') @@ -821,7 +832,7 @@ filterQC <- function(object, paste0(c("Low:","High:"),topNgenes.limits)%>%paste(collapse = "\n") FiltLmts[,paste0('MAD ',topN.filterRename,'')]= paste0(c("Low:","High:"),mad.topNgenes.limits)%>%paste(collapse = "\n") - FiltLmts[,'DoubletFinder (scDblFinder)']=do.doublets.fitler + FiltLmts[,'DoubletFinder (scDblFinder)']=do.doublets.filter rownames(FiltLmts)=i ### Apply Filters #### @@ -878,13 +889,13 @@ filterQC <- function(object, ## calculate Counts in Top 20 Genes ##calculated after min.cell filter as well - so=.topNGenes(so,n.topgnes) + so=.topNGenes(so,n.topgenes) ## Annotate Doublets: #### ## Gene filter does not effect doublet ident and so not recalculated - if( do.doublets.fitler==T){ + if( do.doublets.filter==T){ sce <- as.SingleCellExperiment(so) set.seed(123) @@ -995,7 +1006,7 @@ filterQC <- function(object, table.meta$nFeature_RNA=as.numeric(table.meta$nFeature_RNA) table.meta$filt=factor(table.meta$filt,levels = c('raw','filt')) - topN.filterRename=paste0('% Counts in Top',n.topgnes,' Genes') + topN.filterRename=paste0('% Counts in Top',n.topgenes,' Genes') table.meta=rename(table.meta, 'UMI Count (nCount_RNA)' = 'nCount_RNA', diff --git a/R/Filter_Seurat_Object_by_Metadata.R b/R/Filter_Seurat_Object_by_Metadata.R index 2d76180..7b4a7a6 100644 --- a/R/Filter_Seurat_Object_by_Metadata.R +++ b/R/Filter_Seurat_Object_by_Metadata.R @@ -1,5 +1,6 @@ -#' @title Filter Seurat Object by Metadata -#' @description Filter and subset your Seurat object based on metadata column +#' @title Subset Seurat Object [CCBR] [scRNA-seq] +#' @description This function subsets your Seurat object by selecting a +#' metadata column and values matching the cells to pass forward in analysis. #' @details This is a downstream template that should be loaded after #' Step 5 of the pipeline (SingleR Annotations on Seurat Object) #' @@ -32,7 +33,7 @@ #' which have been highlighted. Default is 0.5 #' @param use.cite.seq.data TRUE if you would like to plot Antibody clusters #' from CITEseq instead of scRNA. - +#' #' #' @import Seurat #' @import ggplot2 @@ -49,8 +50,19 @@ #' @export #' #' @return a subset Seurat object - - +#' +#' @examples +#' \dontrun{ +#' out <- filterSeuratObjectByMetadata( +#' object = anno_so, +#' samples.to.include = c("sample1", "sample2"), +#' sample.name = "orig.ident", +#' category.to.filter = "celltype", +#' values.to.filter = c("T cell", "B cell") +#' ) +#' } +#' +#' filterSeuratObjectByMetadata <- function(object, samples.to.include, sample.name, diff --git a/R/Harmony.R b/R/Harmony.R index 85401dc..619eb1f 100644 --- a/R/Harmony.R +++ b/R/Harmony.R @@ -5,13 +5,16 @@ #' (SCT scale.data) to obtain PCA embeddings. Performs harmony on #' decomposed embedding and adjusts decomposed gene expression values #' by harmonized embedding. -#' @param seurat_object Seurat-class object +#' @param object Seurat-class object containing gene expression data and +#' metadata with the batch variable. #' @param nvar Number of variable genes to subset the gene expression data by #' (Default: 2000) #' @param genes.to.add Add genes that might not be found among variably #' expressed genes #' @param group.by.var Which variable should be accounted for when running #' batch correction +#' @param return.lognorm Logical; if TRUE, retain log-normalized assay behavior +#' in downstream handling. (Default: TRUE) #' @param npc Number of principal components to use when running Harmony #' (Default: 20) @@ -20,6 +23,7 @@ #' @import gridExtra #' @import RColorBrewer #' @import ggplot2 +#' @importFrom patchwork plot_layout #' #' @export #' @examples @@ -36,20 +40,13 @@ #' @return A list: adj.object with harmony-adjusted gene expression (SCT slot) #' adj.tsne: harmonized tSNE plot -object = readRDS('tests/testthat/fixtures/BRCA/BRCA_Combine_and_Renormalize_SO_downsample.rds') - harmonyBatchCorrect <- function(object, nvar = 2000, genes.to.add = c(), group.by.var, - return_lognorm = T, + return.lognorm = T, npc = 30) { -library(patchwork) -library(harmony) -library(Seurat) -library(ggplot2) -library(RColorBrewer) # Error and Warning Messages if(is.null(genes.to.add)){ @@ -145,14 +142,10 @@ library(RColorBrewer) object@reductions$pca@stdev <- pppca$d # Store original log-normalized data and scaling parameters for back-calculation - if (return_lognorm) { + if (return.lognorm) { library(Matrix) # Get log-normalized data for the variable features lognorm_data <- object@assays$SCT@data[mvf, , drop = FALSE] - print(str(object)) - print("hello") - print(class(lognorm_data)) - print(dim(lognorm_data)) # Calculate scaling parameters from the original scaled data #scale_center <- Matrix::rowMeans(lognorm_data) @@ -216,16 +209,17 @@ library(RColorBrewer) guides(colour = guide_legend(override.aes = list(size=5, alpha = 1))) + annotate("text", x = Inf, y = -Inf, label = "Harmonized UMAP", hjust = 1.1, vjust = -1, size = 5) - print((orig.tsne + harm.tsne) + plot_layout(ncol = 2)) - print((orig.umap + harm.umap) + plot_layout(ncol = 2)) - + + tsneComb=(orig.tsne + harm.tsne) + plot_layout(ncol = 2) + umapComb=(orig.umap + harm.umap) + plot_layout(ncol = 2) + # Calculate adjusted gene expression from embeddings harm.embeds <- object@reductions$harmony@cell.embeddings harm.lvl.backcalc.scaled <- harm.embeds %*% t(ppldngs) # Store batch-corrected scaled data in Harmony assay - if (return_lognorm) { + if (return.lognorm) { # Fast conversion back to log-normalized space # Direct vectorized operations on the transposed matrix harm.lvl.backcalc.lognorm <- t(harm.lvl.backcalc.scaled) * scaling_params$scale[mvf] + scaling_params$center[mvf] @@ -237,19 +231,28 @@ library(RColorBrewer) print("Batch-corrected scaled data stored in object@assays$Harmony@scale.data") } - # Insert back-calculated data into seurat + # Insert back-calculated data into seurat + print( "Insert back-calculated data into seurat") object[["Harmony"]] <- CreateAssayObject(data = harm.lvl.backcalc.lognorm) #object[["Harmony"]] <- CreateAssayObject(data = Matrix::Matrix(t(harm.lvl.backcalc.lognorm), sparse = TRUE)) object@assays$Harmony@scale.data <- t(harm.lvl.backcalc.scaled) + print( "Scale Harmony data") object <- ScaleData(object, assay = "Harmony", verbose = FALSE) + print( "re-run PCA on harmony") # re-run PCA on harmony embeddings using top variable genes (mvf) object <- RunPCA(object, assay = "Harmony", verbose = FALSE, features = rownames(object)) object <- FindNeighbors(object, reduction = "harmony", dims = 1:10, assay = "Harmony") + return( - list("object"=object) - ) + list("object"=object, + "plots"=list( + "tsne"=tsneComb, + "umap"=umapComb + ) + ) + ) } diff --git a/R/Heatmap.R b/R/Heatmap.R index 077bea8..0c50340 100755 --- a/R/Heatmap.R +++ b/R/Heatmap.R @@ -7,6 +7,8 @@ #' @param sample.names Sample names #' @param metadata Metadata column to plot #' @param transcripts Transcripts to plot +#' @param use.assay Assay to use for transcript expression values. Choices are +#' "SCT" or "Harmony" (default is "SCT") #' @param proteins Proteins to plot (default is NULL) #' @param heatmap.color Color for heatmap. Choices are "Cyan to Mustard", #' "Blue to Red", "Red to Vanilla", "Violet to Pink", "Bu Yl Rd", @@ -27,8 +29,8 @@ #' @param trim.outliers Remove outlier data (default is TRUE) #' @param trim.outliers.percentage Set outlier percentage (default is 0.01) #' @param order.heatmap.rows Order heatmap rows (default is FALSE) -#' @param row.order Gene vector to set row order. If NULL, use cluster order -#' (default is NULL) +#' @param row.order Gene vector to set row order. If empty, use cluster order +#' (default is empty vector) #' #' @import Seurat #' @importFrom ComplexHeatmap pheatmap @@ -47,11 +49,21 @@ #' @return This function returns a heatmap plot and the data underlying the #' heatmap. #' +#' @examples +#' \dontrun{ +#' out <- heatmapSC( +#' object = anno_so, +#' sample.names = c("sample1", "sample2"), +#' metadata = "celltype", +#' transcripts = c("CD3D", "MS4A1") +#' ) +#' } +#' heatmapSC <- function(object, sample.names, metadata, transcripts, - use_assay = 'SCT', + use.assay = 'SCT', proteins = NULL, heatmap.color = "Bu Yl Rd", plot.title = "Heatmap", @@ -314,21 +326,21 @@ heatmapSC <- function(object, df.mat1 = NULL if (length(transcripts) > 0) { if (length(transcripts) == 1) { - if(use_assay == 'SCT'){ + if(use.assay == 'SCT'){ df.mat1 <- vector(mode = "numeric", length = length(object$SCT@scale.data[transcripts,])) df.mat1 <- object$SCT@scale.data[transcripts,] - } else if (use_assay == 'Harmony'){ + } else if (use.assay == 'Harmony'){ df.mat1 <- vector(mode = "numeric", length = length(object$Harmony@scale.data[transcripts,])) df.mat1 <- object$Harmony@scale.data[transcripts,] } } else { - if(use_assay == 'SCT'){ + if(use.assay == 'SCT'){ df.mat1 <- as.matrix(object$SCT@scale.data[transcripts,]) - } else if (use_assay == 'Harmony'){ + } else if (use.assay == 'Harmony'){ df.mat1 <- as.matrix(object$Harmony@scale.data[transcripts,]) } } diff --git a/R/ModuleScore.R b/R/ModuleScore.R index ed57712..4839763 100644 --- a/R/ModuleScore.R +++ b/R/ModuleScore.R @@ -11,11 +11,13 @@ #' as the column names, and marker(s) as the entries #' in each column. #' Requires SCT@data to be present within Seurat Object -#' @param use_columns Select specific columns within Marker Table to analyze. +#' @param group.var Metadata column used for grouping in diagnostic plots. +#' (Default: "orig.ident") +#' @param use.columns Select specific columns within Marker Table to analyze. #' Markers from unselected columns won't be included. -#' @param ms_threshold Allow user-specified module score thresholds. +#' @param ms.threshold Allow user-specified module score thresholds. #' Provide one threshold for each Celltype you included -#' in the "use_columns" parameter. +#' in the "use.columns" parameter. #' For each Celltype, provide the Celltype name, #' then a space, then type your threshold for that Celltype. #' This threshold must be a number between 0.0 and 1.0. @@ -68,6 +70,7 @@ #' @import grid #' @import data.table #' @import utils +#' @import stringr str_split #' @importFrom dplyr select #' #' @export @@ -76,8 +79,8 @@ #' modScore( #' object = seuratObject, #' marker.table = immuneCellMarkers, -#' use_columns = c("CD4_T", "Treg", "Monocytes"), -#' ms_threshold = c("CD4_T 0.1", "Treg 0.4", "Monocytes 0.3"), +#' use.columns = c("CD4_T", "Treg", "Monocytes"), +#' ms.threshold = c("CD4_T 0.1", "Treg 0.4", "Monocytes 0.3"), #' general.class = c("CD4_T", "Monocytes"), #' multi.lvl = FALSE #' ) @@ -85,8 +88,8 @@ #' modScore( #' object = seuratObject, #' marker.table = immuneCellMarkers, -#' use_columns = c("CD4_T", "Treg", "Monocytes"), -#' ms_threshold = c("CD4_T 0.1", "Treg 0.4", "Monocytes 0.3"), +#' use.columns = c("CD4_T", "Treg", "Monocytes"), +#' ms.threshold = c("CD4_T 0.1", "Treg 0.4", "Monocytes 0.3"), #' general.class = c("CD4_T", "Monocytes"), #' multi.lvl = TRUE, #' lvl.df = parentChildTable @@ -99,9 +102,9 @@ modScore <- function(object, marker.table, - group_var = "orig.ident", - use_columns, - ms_threshold, + group.var = "orig.ident", + use.columns, + ms.threshold, general.class, multi.lvl = FALSE, lvl.df=NULL, @@ -111,18 +114,12 @@ modScore <- function(object, violin.ft.size = 6, step.size = 0.1) { - library(Seurat) - library(gridExtra) - library(grid) - library(dplyr) - library(stringr) - library(ggplot2) # Function for separating and calling cells by bimodal thresholds - .modScoreCall <- function(ms.meta, numeric_threshold, reject) { + .modScoreCall <- function(ms.meta, numeric.threshold, reject) { thres.ls <- list() for (i in 1:ncol(ms.meta)) { - thres.ls[[i]] <- rep(numeric_threshold[i], nrow(ms.meta)) + thres.ls[[i]] <- rep(numeric.threshold[i], nrow(ms.meta)) } thres.df <- data.frame(matrix(unlist(thres.ls), nrow = nrow(ms.meta))) thres.filter <- ms.meta > thres.df @@ -138,7 +135,7 @@ modScore <- function(object, # Upstream processing # String split celltype_thresholds - numeric portion - numeric_threshold <- sapply(stringr::str_split(ms_threshold, " "), function(x) as.numeric(x[2])) + numeric.threshold <- sapply(stringr::str_split(ms.threshold, " "), function(x) as.numeric(x[2])) if (!"Barcode" %in% colnames(object@meta.data)) { object@meta.data$Barcode <- rownames(object@meta.data) @@ -147,12 +144,12 @@ modScore <- function(object, colnames(object@meta.data)) # Marker table processing - marker.table <- marker.table[,use_columns] + marker.table <- marker.table[,use.columns] marker.tab <- unlist(marker.table) - celltypes <- sapply(str_split(ms_threshold, " "), function(x) as.character(x[1])) + celltypes <- sapply(str_split(ms.threshold, " "), function(x) as.character(x[1])) - if (any(!celltypes %in% use_columns)){ - unmatched_celltypes <- celltypes[!celltypes %in% use_columns] + if (any(!celltypes %in% use.columns)){ + unmatched_celltypes <- celltypes[!celltypes %in% use.columns] celltype_mismatch_message <- paste0("Labels from thresholds does not match columns from marker table: ",paste(unmatched_celltypes, collapse = ", ")) stop(celltype_mismatch_message) } @@ -163,9 +160,9 @@ modScore <- function(object, 0) { stop("No genes from list was found in data") } - if (length(numeric_threshold) != length(celltypes)) { - if (sum(numeric_threshold) == 0) { - numeric_threshold <- rep(0, length(celltypes)) + if (length(numeric.threshold) != length(celltypes)) { + if (sum(numeric.threshold) == 0) { + numeric.threshold <- rep(0, length(celltypes)) print("Module Score threshold set to zero - outputing preliminary data") } else { stop("Threshold length does not match # celltypes to analyze") @@ -173,7 +170,7 @@ modScore <- function(object, } # For each celltype, print out present / nonpresent genes, calculate MS and generate plots - names(numeric_threshold) <- celltypes + names(numeric.threshold) <- celltypes figures <- list() exclude_cells <- c() h = 0 @@ -238,26 +235,26 @@ modScore <- function(object, umap.pos <- clusmat %>% group_by(clusid) %>% dplyr::summarise(umap1.mean = mean(umap1), umap2.mean = mean(umap2)) title = as.character(m) clusmat <- clusmat %>% dplyr::arrange(clusid) - clusid.df <- data.frame(id = object@meta.data[[group_var]], + clusid.df <- data.frame(id = object@meta.data[[group.var]], ModuleScore = object@meta.data[[m]]) g <- ggplot(clusmat, aes(x = umap1, y = umap2)) + theme_bw() + theme(legend.title = element_blank()) + geom_point(aes(colour = sample_clusid), alpha = 0.5, shape = 20, size = 1) + scale_color_gradientn(colours = c("blue4", "lightgrey", "red"), values = scales::rescale(c(0, - numeric_threshold[celltype_name]/2, numeric_threshold[celltype_name], (numeric_threshold[celltype_name] + 1)/2, + numeric.threshold[celltype_name]/2, numeric.threshold[celltype_name], (numeric.threshold[celltype_name] + 1)/2, 1), limits = c(0, 1))) + guides(colour = guide_legend(override.aes = list(size = 5, alpha = 1))) + theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(), panel.background = element_blank()) + xlab("tsne-1") + ylab("tsne-2") - g1 <- RidgePlot(object, features = m, group.by = group_var) + + g1 <- RidgePlot(object, features = m, group.by = group.var) + theme(legend.position = "none", title = element_blank(), axis.text.x = element_text(size = gradient.ft.size)) + - geom_vline(xintercept = numeric_threshold[celltype_name], linetype = "dashed", + geom_vline(xintercept = numeric.threshold[celltype_name], linetype = "dashed", color = "red3") + scale_x_continuous(breaks = seq(0, 1, step.size)) g2 <- ggplot(clusid.df, aes(x = id, y = ModuleScore)) + geom_violin(aes(fill = id)) + theme_classic() + theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(), legend.title = element_blank(), panel.background = element_blank(), axis.text.x = element_blank(), legend.text = element_text(size = rel(0.8)), legend.position = "top", axis.text.y = element_text(size = violin.ft.size)) + guides(colour = guide_legend(override.aes = list(size = 5, - alpha = 1))) + geom_hline(yintercept = numeric_threshold[celltype_name], + alpha = 1))) + geom_hline(yintercept = numeric.threshold[celltype_name], linetype = "dashed", color = "red3") + scale_y_continuous(breaks = seq(0, 1, step.size)) g3 <- ggplot(data.frame(x = d$x, y = d$y), aes(x, y)) + @@ -265,9 +262,9 @@ modScore <- function(object, geom_segment(aes(xend = d$x, yend = 0, colour = x)) + scale_y_log10() + scale_color_gradientn(colours = c("blue4", "lightgrey", "red"), values = scales::rescale(c(0, - numeric_threshold[celltype_name]/2, numeric_threshold[celltype_name], (numeric_threshold[celltype_name] + 1)/2, - 1), limits = c(0, 1))) + geom_vline(xintercept = numeric_threshold[celltype_name], - linetype = "dashed", color = "red3") + geom_vline(xintercept = numeric_threshold[celltype_name], linetype = "dashed", color = "red3") + scale_x_continuous(breaks = seq(0, 1, step.size)) + theme(legend.title = element_blank(), + numeric.threshold[celltype_name]/2, numeric.threshold[celltype_name], (numeric.threshold[celltype_name] + 1)/2, + 1), limits = c(0, 1))) + geom_vline(xintercept = numeric.threshold[celltype_name], + linetype = "dashed", color = "red3") + geom_vline(xintercept = numeric.threshold[celltype_name], linetype = "dashed", color = "red3") + scale_x_continuous(breaks = seq(0, 1, step.size)) + theme(legend.title = element_blank(), axis.text.x = element_text(size = 6)) figures[[celltype_name]] = arrangeGrob(g, g1, g2, g3, ncol = 2, top = textGrob(paste0(celltype_name," (General Class)"), gp = gpar(fontsize = 14, fontface = "bold"))) @@ -281,7 +278,7 @@ modScore <- function(object, # Heirarchical classification: general.class > subtypes general.class <- general.class[general.class %in% colnames(object@meta.data)] trunc.meta.gen <- object@meta.data[general.class] - gen.thrs.vec <- numeric_threshold[general.class] + gen.thrs.vec <- numeric.threshold[general.class] call.res <- .modScoreCall(trunc.meta.gen, gen.thrs.vec, reject = "unknown") call.res$Barcode <- rownames(call.res) @@ -313,14 +310,14 @@ modScore <- function(object, figures <- append(figures, list(NA), after = gap_ind) - figures[[gap_ind+1]] <- ggplot(trunc.meta.parent, aes_string(x = child)) + geom_density() + ggtitle(plot.title) + geom_vline(xintercept = numeric_threshold[child], linetype = "dashed", color = "red3") + theme_classic() + figures[[gap_ind+1]] <- ggplot(trunc.meta.parent, aes_string(x = child)) + geom_density() + ggtitle(plot.title) + geom_vline(xintercept = numeric.threshold[child], linetype = "dashed", color = "red3") + theme_classic() names(figures)[gap_ind+1] <- child } trunc.meta.no.parent <- call.res[!call.res$MS_Celltype == parent, ] non.parent <- rownames(trunc.meta.no.parent) - child.thres.vec <- numeric_threshold[children_class] + child.thres.vec <- numeric.threshold[children_class] sub.class.call[[match(parent, parent.class)]] <- .modScoreCall(trunc.meta.parent, child.thres.vec, reject = parent) %>% select(MS_Celltype)} @@ -338,7 +335,7 @@ modScore <- function(object, return( list("object"=object, - "figures" = figures) + "plots" = figures) ) } - \ No newline at end of file + diff --git a/R/ModuleScoreHelpers.R b/R/ModuleScoreHelpers.R index 462e582..2e55b43 100644 --- a/R/ModuleScoreHelpers.R +++ b/R/ModuleScoreHelpers.R @@ -9,13 +9,23 @@ #' @importFrom ggplot2 scale_y_continuous geom_line geom_segment scale_y_log10 scale_x_continuous #' @importFrom gridExtra arrangeGrob #' @importFrom grid textGrob gpar -NULL - +#' +#' +#' @examples +#' \dontrun{ +#' res <- compute_modscore_data( +#' object = seurat_obj, +#' marker.list = list(Tcell = c("CD3D", "TRBC1")), +#' use.columns = c("Tcell"), +#' reduction = "umap" +#' ) +#' } +#' #' @export -compute_modscore_data <- function(object, marker_list, use_columns, +compute_modscore_data <- function(object, marker.list, use.columns, reduction = c("tsne","umap","pca"), nbins = 10, - group_var = "orig.ident") { + group.var = "orig.ident") { reduction <- match.arg(reduction) if (!"Barcode" %in% colnames(object@meta.data)) { @@ -30,8 +40,8 @@ compute_modscore_data <- function(object, marker_list, use_columns, # Build per-celltype data res <- list() - for (celltype_name in use_columns) { - genes <- marker_list[[celltype_name]] + for (celltype_name in use.columns) { + genes <- marker.list[[celltype_name]] if (is.null(genes)) next object <- Seurat::AddModuleScore(object, list(genes), name = celltype_name, nbin = nbins, assay = "SCT") @@ -60,7 +70,7 @@ compute_modscore_data <- function(object, marker_list, use_columns, coords <- dplyr::mutate(coords, sample_clusid = coords$clusid) coords <- dplyr::arrange(coords, clusid) - clusid_df <- data.frame(id = object@meta.data[[group_var]], + clusid.df <- data.frame(id = object@meta.data[[group.var]], ModuleScore = object@meta.data[[m]], stringsAsFactors = FALSE) @@ -68,7 +78,7 @@ compute_modscore_data <- function(object, marker_list, use_columns, object = object, m = m, coords = coords, - clusid_df = clusid_df, + clusid.df = clusid.df, density = d ) } @@ -77,9 +87,9 @@ compute_modscore_data <- function(object, marker_list, use_columns, } #' @export -build_modscore_plots <- function(object, m, coords, clusid_df, d, threshold, - gradient_ft_size = 6, violin_ft_size = 6, step_size = 0.1, - group_var = "orig.ident", +build_modscore_plots <- function(object, m, coords, clusid.df, d, threshold, + gradient.ft.size = 6, violin.ft.size = 6, step.size = 0.1, + group.var = "orig.ident", reduction = c("tsne","umap","pca")) { reduction <- match.arg(reduction) @@ -93,10 +103,10 @@ build_modscore_plots <- function(object, m, coords, clusid_df, d, threshold, ggplot2::xlab(if (reduction == "tsne") "tsne-1" else if (reduction == "umap") "umap-1" else "pc-1") + ggplot2::ylab(if (reduction == "tsne") "tsne-2" else if (reduction == "umap") "umap-2" else "pc-2") - g1 <- Seurat::RidgePlot(object, features = m, group.by = group_var) + - ggplot2::theme(legend.position = "none", title = ggplot2::element_blank(), axis.text.x = ggplot2::element_text(size = gradient_ft_size)) + + g1 <- Seurat::RidgePlot(object, features = m, group.by = group.var) + + ggplot2::theme(legend.position = "none", title = ggplot2::element_blank(), axis.text.x = ggplot2::element_text(size = gradient.ft.size)) + ggplot2::geom_vline(xintercept = threshold, linetype = "dashed", color = "red3") + - ggplot2::scale_x_continuous(breaks = seq(0, 1, step_size)) + ggplot2::scale_x_continuous(breaks = seq(0, 1, step.size)) g3 <- ggplot2::ggplot(data.frame(x = d$x, y = d$y), ggplot2::aes(x, y)) + ggplot2::xlab("ModuleScore") + ggplot2::ylab("Density") + ggplot2::geom_line() + @@ -104,7 +114,7 @@ build_modscore_plots <- function(object, m, coords, clusid_df, d, threshold, ggplot2::scale_color_gradientn(colours = c("blue4", "lightgrey", "red"), values = scales::rescale(c(0, threshold/2, threshold, (threshold + 1)/2, 1), limits = c(0, 1))) + ggplot2::geom_vline(xintercept = threshold, linetype = "dashed", color = "red3") + - ggplot2::scale_x_continuous(breaks = seq(0, 1, step_size)) + ggplot2::theme(legend.title = ggplot2::element_blank(), axis.text.x = ggplot2::element_text(size = 6)) + ggplot2::scale_x_continuous(breaks = seq(0, 1, step.size)) + ggplot2::theme(legend.title = ggplot2::element_blank(), axis.text.x = ggplot2::element_text(size = 6)) arranged <- gridExtra::arrangeGrob(g, g1, g3, ncol = 2, top = grid::textGrob(m, gp = grid::gpar(fontsize = 14, fontface = "bold"))) list(g = g, g1 = g1, g3 = g3, arranged = arranged) diff --git a/R/ModuleScoreHelpers_011726.R b/R/ModuleScoreHelpers_011726.R index 6a76384..24a6e82 100644 --- a/R/ModuleScoreHelpers_011726.R +++ b/R/ModuleScoreHelpers_011726.R @@ -12,10 +12,10 @@ NULL #' @export -compute_modscore_data <- function(object, marker_list, use_columns, +compute_modscore_data <- function(object, marker.list, use.columns, reduction = c("tsne","umap","pca"), nbins = 10, - group_var = "orig.ident") { + group.var = "orig.ident") { reduction <- match.arg(reduction) if (!"Barcode" %in% colnames(object@meta.data)) { @@ -30,8 +30,8 @@ compute_modscore_data <- function(object, marker_list, use_columns, # Build per-celltype data res <- list() - for (celltype_name in use_columns) { - genes <- marker_list[[celltype_name]] + for (celltype_name in use.columns) { + genes <- marker.list[[celltype_name]] if (is.null(genes)) next object <- Seurat::AddModuleScore(object, list(genes), name = celltype_name, nbin = nbins, assay = "SCT") @@ -60,7 +60,7 @@ compute_modscore_data <- function(object, marker_list, use_columns, coords <- dplyr::mutate(coords, sample_clusid = coords$clusid) coords <- dplyr::arrange(coords, clusid) - clusid_df <- data.frame(id = object@meta.data[[group_var]], + clusid.df <- data.frame(id = object@meta.data[[group.var]], ModuleScore = object@meta.data[[m]], stringsAsFactors = FALSE) @@ -68,7 +68,7 @@ compute_modscore_data <- function(object, marker_list, use_columns, object = object, m = m, coords = coords, - clusid_df = clusid_df, + clusid.df = clusid.df, density = d ) } @@ -77,9 +77,9 @@ compute_modscore_data <- function(object, marker_list, use_columns, } #' @export -build_modscore_plots <- function(object, m, coords, clusid_df, d, threshold, - gradient_ft_size = 6, violin_ft_size = 6, step_size = 0.1, - group_var = "orig.ident", +build_modscore_plots <- function(object, m, coords, clusid.df, d, threshold, + gradient.ft.size = 6, violin.ft.size = 6, step.size = 0.1, + group.var = "orig.ident", reduction = c("tsne","umap","pca")) { reduction <- match.arg(reduction) @@ -93,10 +93,10 @@ build_modscore_plots <- function(object, m, coords, clusid_df, d, threshold, ggplot2::xlab(if (reduction == "tsne") "tsne-1" else if (reduction == "umap") "umap-1" else "pc-1") + ggplot2::ylab(if (reduction == "tsne") "tsne-2" else if (reduction == "umap") "umap-2" else "pc-2") - g1 <- Seurat::RidgePlot(object, features = m, group.by = group_var) + - ggplot2::theme(legend.position = "none", title = ggplot2::element_blank(), axis.text.x = ggplot2::element_text(size = gradient_ft_size)) + + g1 <- Seurat::RidgePlot(object, features = m, group.by = group.var) + + ggplot2::theme(legend.position = "none", title = ggplot2::element_blank(), axis.text.x = ggplot2::element_text(size = gradient.ft.size)) + ggplot2::geom_vline(xintercept = threshold, linetype = "dashed", color = "red3") + - ggplot2::scale_x_continuous(breaks = seq(0, 1, step_size)) + ggplot2::scale_x_continuous(breaks = seq(0, 1, step.size)) g3 <- ggplot2::ggplot(data.frame(x = d$x, y = d$y), ggplot2::aes(x, y)) + ggplot2::xlab("ModuleScore") + ggplot2::ylab("Density") + ggplot2::geom_line() + @@ -104,7 +104,7 @@ build_modscore_plots <- function(object, m, coords, clusid_df, d, threshold, ggplot2::scale_color_gradientn(colours = c("blue4", "lightgrey", "red"), values = scales::rescale(c(0, threshold/2, threshold, (threshold + 1)/2, 1), limits = c(0, 1))) + ggplot2::geom_vline(xintercept = threshold, linetype = "dashed", color = "red3") + - ggplot2::scale_x_continuous(breaks = seq(0, 1, step_size)) + ggplot2::theme(legend.title = ggplot2::element_blank(), axis.text.x = ggplot2::element_text(size = 6)) + ggplot2::scale_x_continuous(breaks = seq(0, 1, step.size)) + ggplot2::theme(legend.title = ggplot2::element_blank(), axis.text.x = ggplot2::element_text(size = 6)) arranged <- gridExtra::arrangeGrob(g, g1, g3, ncol = 2, top = grid::textGrob(m, gp = grid::gpar(fontsize = 14, fontface = "bold"))) list(g = g, g1 = g1, g3 = g3, arranged = arranged) diff --git a/R/Name_Clusters_by_Enriched_Cell_Type.R b/R/Name_Clusters_by_Enriched_Cell_Type.R index 52a1a08..aa7af4d 100644 --- a/R/Name_Clusters_by_Enriched_Cell_Type.R +++ b/R/Name_Clusters_by_Enriched_Cell_Type.R @@ -7,13 +7,16 @@ #' #' @param object Seurat-class object with cluster IDs column and cell type #' column present -#' @param cluster.numbers Vector containing cluster numbers that match the -#' (numeric) cluster ID's in the cluster.column in Seurat Object metadata -#' @param cluster.names Vector containing custom cluster labels #' @param cluster.column Column name containing cluster ID in the metadata slot #' in the object #' @param labels.column Column name containing labels (usually cell type) in the #' metadata slot in the object +#' @param cluster.identities.table Data frame containing cluster IDs and custom +#' cluster labels +#' @param cluster.numbers Column name in cluster.identities.table +#' containing cluster numbers that match values in cluster.column +#' @param cluster.names Column name in cluster.identities.table +#' containing custom cluster labels #' @param order.clusters.by Vector containing order of clusters in graph. Can #' contain a subset of cluster numbers to plot that match at least some of #' the values in the cluster.column. If NULL, use default order @@ -36,13 +39,29 @@ #' @export #' @return Returns Seurat-class object with updated meta.data slot containing #' custom cluster annotation and a plot +#' +#' @examples +#' \dontrun{ +#' map_tbl <- data.frame( +#' cluster_id = c("0", "1"), +#' label = c("T cell", "B cell") +#' ) +#' out <- nameClusters( +#' object = anno_so, +#' cluster.column = "seurat_clusters", +#' labels.column = "celltype", +#' cluster.identities.table = map_tbl, +#' cluster.numbers = "cluster_id", +#' cluster.names = "label" +#' ) +#' } nameClusters <- function(object, + cluster.column, + labels.column, cluster.identities.table, cluster.numbers, cluster.names, - cluster.column, - labels.column, order.clusters.by = NULL, order.celltypes.by = NULL, interactive = FALSE) diff --git a/R/Plot_Metadata.R b/R/Plot_Metadata.R index 2003c62..3f591de 100644 --- a/R/Plot_Metadata.R +++ b/R/Plot_Metadata.R @@ -41,19 +41,31 @@ #' @export #' #' @return a data.frame extracted from the Seurat object and plot +#' +#' @examples +#' \dontrun{ +#' out <- plotMetadata( +#' object = anno_so, +#' samples.to.include = c("sample1", "sample2"), +#' metadata.to.plot = c("celltype", "orig.ident"), +#' columns.to.summarize = c(), +#' reduction.type = "umap" +#' ) +#' } -plotMetadata <- function(#Basic Parameters: - object, - samples.to.include, - metadata.to.plot, - columns.to.summarize, - summarization.cut.off = 5, - reduction.type = "tsne", - use.cite.seq = FALSE, - show.labels = FALSE, - legend.text.size = 1, - legend.position = "right", - dot.size = 0.01 +plotMetadata <- function( + #Basic Parameters: + object, + samples.to.include, + metadata.to.plot, + columns.to.summarize, + summarization.cut.off = 5, + reduction.type = "tsne", + use.cite.seq = FALSE, + show.labels = FALSE, + legend.text.size = 1, + legend.position = "right", + dot.size = 0.01 ) { ################### @@ -358,15 +370,14 @@ plotMetadata <- function(#Basic Parameters: summarize.cut.off <- min(summarization.cut.off, 20) # checking for samples included: - if(any(grepl('c\\(|\\[\\]',samples))) { - samples = eval(parse(text = gsub('\\[\\]', 'c()', samples))) - }else{ - samples=samples + samples <- samples.to.include + if (is.character(samples) && any(grepl('c\\(|\\[\\]', samples))) { + samples <- eval(parse(text = gsub('\\[\\]', 'c()', samples))) } if (length(samples) == 0) { print("No samples specified. Using all samples...") - samples = unique(object@meta.data$sample_name) + samples = unique(object@meta.data$orig.ident) } ## Goal is to have column 1 of the new metadata be named "orig.ident" @@ -416,7 +427,7 @@ plotMetadata <- function(#Basic Parameters: # checking metadata for sanity - if(any(grepl('c\\(|\\[\\]',samples))) { + if (is.character(metadata.to.plot) && any(grepl('c\\(|\\[\\]', metadata.to.plot))) { m = eval(parse(text = gsub('\\[\\]', 'c()', metadata.to.plot))) }else{ m=metadata.to.plot @@ -452,7 +463,7 @@ plotMetadata <- function(#Basic Parameters: col <- meta.df[[i]] val.count <- length(unique(col)) - if ((val.count >= summarizeCutOff) & + if ((val.count >= summarize.cut.off) & (i != 'Barcode') & (!is.element(class(meta.df[[i]][1]), c("numeric", "integer")))) { freq.vals <- as.data.frame(-sort(-table(col)))$col[1:summarize.cut.off] diff --git a/R/Process_Raw_Data.R b/R/Process_Raw_Data.R index 4ba759d..43a8176 100755 --- a/R/Process_Raw_Data.R +++ b/R/Process_Raw_Data.R @@ -52,6 +52,15 @@ #' @export #' #' @return Seurat Object and QC plots +#' +#' @examples +#' \dontrun{ +#' out <- processRawData( +#' input = c("sample1_filtered_feature_bc_matrix.h5"), +#' organism = "Human", +#' do.normalize.data = TRUE +#' ) +#' } processRawData <- function(input, sample.metadata.table=NULL, diff --git a/R/Violin_Plots_by_Metadata.R b/R/Violin_Plots_by_Metadata.R index 290f78c..2b81d02 100644 --- a/R/Violin_Plots_by_Metadata.R +++ b/R/Violin_Plots_by_Metadata.R @@ -9,16 +9,17 @@ #' @param layer Slot to extract gene expression data from (Default: scale.data) #' @param genes Genes to visualize on the violin plot #' @param group Split violin plot based on metadata group -#' @param facet_by Split violin plot based on a second metadata group -#' @param filter_outliers Filter outliers from the data (TRUE/FALSE) -#' @param outlier_low Filter lower bound outliers (Default = 0.05) -#' @param outlier_high Filter upper bound outliers (Default = 0.95) -#' @param jitter_points Scatter points on the plot (TRUE/FALSE) -#' @param jitter_dot_size Set size of individual points - +#' @param facet.by Split violin plot based on a second metadata group +#' @param filter.outliers Filter outliers from the data (TRUE/FALSE) +#' @param outlier.low Filter lower bound outliers (Default = 0.05) +#' @param outlier.high Filter upper bound outliers (Default = 0.95) +#' @param jitter.points Scatter points on the plot (TRUE/FALSE) +#' @param jitter.dot.size Set size of individual points +#' #' @import Seurat #' @import reshape2 #' @import tidyverse +#' @importFrom tidyr pivot_longer #' @import cowplot #' @import rlang #' @import ggplot2 @@ -32,10 +33,10 @@ #' layer = "data", #' genes = c("Cd4", "Cd8a"), #' group = "celltype", -#' facet_by = "orig.ident", -#' filter_outliers = TRUE, -#' jitter_points = TRUE, -#' jitter_dot_size = 0.5 +#' facet.by = "orig.ident", +#' filter.outliers = TRUE, +#' jitter.points = TRUE, +#' jitter.dot.size = 0.5 #' ) #' } @@ -46,21 +47,15 @@ violinPlot <- function (object, layer, genes, group, - facet_by = "", - filter_outliers = F, - outlier_low = 0.05, - outlier_high = 0.95, - jitter_points, - jitter_dot_size) + facet.by = "", + filter.outliers = F, + outlier.low = 0.05, + outlier.high = 0.95, + jitter.points, + jitter.dot.size) { - library(Seurat) - library(ggplot2) - library(gridExtra) - library(tidyr) - library(dplyr) - library(broom) - - facet_data = facet_by != "" + + facet_data = facet.by != "" # for handling orig ident if (group == "orig.ident" | group == "orig_ident"){ @@ -96,7 +91,7 @@ violinPlot <- function (object, genes.present <- genes[genes %in% rownames(gene_mtx)] if(facet_data){ - meta_sub <- object@meta.data[,c(group,facet_by)] + meta_sub <- object@meta.data[,c(group,facet.by)] } else { meta_sub <- object@meta.data[c(group)] } @@ -111,7 +106,7 @@ violinPlot <- function (object, data_df$Gene <- factor(data_df$Gene, levels = genes.present) if(facet_data){ - unique_facets <- unique(object@meta.data[,facet_by]) + unique_facets <- unique(object@meta.data[,facet.by]) } else{ unique_facets <- NULL } @@ -126,18 +121,18 @@ violinPlot <- function (object, # Define the outlier removal function .removeOutliers <- function(x, na.rm = TRUE){ - qnt <- quantile(x, probs = c(outlier_low, outlier_high), na.rm = na.rm) + qnt <- quantile(x, probs = c(outlier.low, outlier.high), na.rm = na.rm) H <- 1.5 * IQR(x, na.rm = na.rm) x[x < (qnt[1] - H) | x > (qnt[2] + H)] <- NA x } # Apply only if filtering is enabled - if (filter_outliers) { - group_vars <- colnames(data_df)[colnames(data_df) != 'Expression'] + if (filter.outliers) { + group.vars <- colnames(data_df)[colnames(data_df) != 'Expression'] data_df <- data_df %>% - group_by(across(all_of(group_vars))) %>% + group_by(across(all_of(group.vars))) %>% mutate(Expression = .removeOutliers(Expression)) %>% ungroup() } @@ -148,7 +143,7 @@ violinPlot <- function (object, color_mapping <- setNames(rep(available_colors, length.out = length(unique_facets)), unique_facets) # Set up the common elements of the plot - g <- ggplot(data_df, aes(x = .data[[group]], y = Expression, fill = .data[[facet_by]])) + + g <- ggplot(data_df, aes(x = .data[[group]], y = Expression, fill = .data[[facet.by]])) + geom_violin(scale = "width", position = position_dodge(width = 0.9), trim = TRUE) + geom_boxplot(width = 0.2, position = position_dodge(width = 0.9), outlier.shape = NA) + scale_fill_manual(values = color_mapping) + @@ -180,9 +175,9 @@ violinPlot <- function (object, } # Add jitter points conditionally - if (jitter_points) { - g <- g + geom_jitter(size = jitter_dot_size, shape = 1, position = position_dodge(width = 0.9), alpha = 0.5) + if (jitter.points) { + g <- g + geom_jitter(size = jitter.dot.size, shape = 1, position = position_dodge(width = 0.9), alpha = 0.5) } - return(list(plots=g)) + return(list("plots"=g)) } diff --git a/docs/articles/SCWorkflow-Annotations.html b/docs/articles/SCWorkflow-Annotations.html index a545672..3feac25 100644 --- a/docs/articles/SCWorkflow-Annotations.html +++ b/docs/articles/SCWorkflow-Annotations.html @@ -101,62 +101,10 @@


This function will merge an external table of cell annotations into -an existing Seurat Object’s metadata table. The input external metadata -table must have a column named “Barcode” that contains barcodes matching -those found in the metadata already present in the input Seurat Object. -The output will be a new Seurat Object with metadata that now includes -the additional columns from the external table.
-
-
-CellType_Anno_Table=read.csv("./images/PerCell_Metadata.csv")| Barcode | -Cell.Type | -
|---|---|
| PBS_AAACCTGCAAGGTTTC-1 | -Monocytes | -
| PBS_AAACCTGGTCTAGCGC-1 | -Monocytes | -
| PBS_AAACCTGTCACTTACT-1 | -Monocytes | -
| PBS_AAACCTGTCAGCGATT-1 | -Monocytes | -
| PBS_AAACCTGTCAGCTGGC-1 | -Macrophages | -
| PBS_AAACCTGTCGGCTTGG-1 | -Macrophages | -
-
-ExtAnno_SO=ExternalAnnotation(object = Anno_SO$object,
- external_metadata = CellType_Anno_Table,
- seurat_object_filename = "seurat_object.rds",
- barcode_column = "Barcode",
- external_cols_to_add = c("Cell Type"),
- col_to_viz = "Cell Type"
- )
-grep('Cd4',rownames(Anno_SO$object@assays$RNA),ignore.case = T,value=T)
+
+grep("Cd4",rownames(Anno_SO$object@assays$RNA),ignore.case = T,value=T)
DLAnno_SO=dualLabeling(object = Anno_SO$object,
samples <- c("PBS","CD8dep","ENT","NHSIL12","Combo"),
@@ -239,9 +187,9 @@ Color by Gene Lists
+
Marker_Table <- read.csv("Marker_Table_demo.csv")
-
+
colorByMarkerTable(object=Anno_SO$object,
samples.subset=c("PBS","ENT","NHSIL12", "Combo","CD8dep" ),
@@ -300,12 +248,12 @@ Module Score Cell ClassificationOutput: An updated scRNA-seq
object with new cell type labels.
-
+
MS_object=modScore(object=Anno_SO$object,
marker.table=Marker_Table,
- use_columns = c("Neutrophils","Macrophages","CD8_T" ),
- ms_threshold=c("Neutrophils .25","Macrophages .40","CD8_T .14"),
+ use.columns = c("Neutrophils","Macrophages","CD8_T" ),
+ ms.threshold=c("Neutrophils .25","Macrophages .40","CD8_T .14"),
general.class=c("Neutrophils","Macrophages","CD8_T"),
multi.lvl = FALSE,
reduction = "umap",
@@ -353,7 +301,7 @@ Rename Clusters by Cell Type
+
clstrTable <- read.table(file = "./images/Cluster_Names.txt", sep = '\t',header = T)