Last active
March 18, 2021 17:14
-
-
Save k3yavi/cf71cbe753d57cb6acf1c1d34c013045 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| suppressPackageStartupMessages({ | |
| library(Signac) | |
| library(Seurat) | |
| library(Matrix) | |
| library(ggplot2) | |
| library(patchwork) | |
| library(dplyr) | |
| library(RcppRoll) | |
| library(fastmatch) | |
| library(EnsDb.Hsapiens.v86) | |
| library(reshape2) | |
| sourceCpp("~/MyHmm.cpp") | |
| }) | |
| # Usage | |
| # data <- LoadData() | |
| # idents <- factor(c("B", "CD4 T", "Mono")) | |
| # HorizontalCoveragePlot(data = data, | |
| # region = "PAX5", | |
| # gname = "PAX5", | |
| # extend.upstream = 5000, | |
| # extend.downstream = 5000, | |
| # state.plot = "/home/srivastavaa/v10_bingjie_cut_n_tag/chromHMM/model_12/POSTERIOR/", | |
| # idents = idents, | |
| # marks = c("asap", "k4me1", "k4me2", "k4me3", "k27me3", "k27ac", "pol2", "apol2") | |
| # ) | |
| { | |
| GetALLPosterior <- function(data, roi, num.threads=10) { | |
| qregions <- GetTileRegions(roi) | |
| print("extracting 200bp regions") | |
| data <- GetRegionMatrix(data, qregions) | |
| print("imputing") | |
| imp.matrices <- ImputeCounts(data) | |
| print("extracting tiles") | |
| qregions <- GetTileRegions(roi, combined = F) | |
| initialize <- data$hmm$init # rep(1.153589e-153, 12) | |
| emission <- as.matrix(data$hmm$emission[names(data[1:5])]) | |
| transition <- as.matrix(data$hmm$transition) | |
| print("calculating posterior") | |
| GetThreadPosterior <- function(x){ | |
| range <- length(qregions) %/% num.threads | |
| start <- x*range + 1 | |
| end <- (x+1)*range | |
| probs <- lapply(seq(start, end), function(qid){ | |
| string.query <- GRangesToString(qregions[[qid]]) | |
| matrices <- lapply(names(data[1:5]), function(mark){ | |
| as.matrix(imp.matrices[[mark]]@assays$imputed@data[string.query, Cells(data$az.obj)]) | |
| }) | |
| posterior.probs <- posterior_fast(initialize, emission, transition, matrices) | |
| posterior.probs <- lapply(posterior.probs, function(prob) { | |
| rownames(prob) <- string.query | |
| prob | |
| }) | |
| names(posterior.probs) <- Cells(data$az.obj) | |
| posterior.probs | |
| }) | |
| probs | |
| } | |
| all.post <- bplapply(X = seq(0, num.threads-1), FUN = GetThreadPosterior) | |
| all.post <- do.call(c, all.post) | |
| all.post | |
| } | |
| GetPosterior <- function(data, qregion, imp.matrices, full=F) { | |
| initialize <- data$hmm$init #rep(1.153589e-153, 12) | |
| emission <- as.matrix(data$hmm$emission[names(data[1:5])]) | |
| transition <- as.matrix(data$hmm$transition) | |
| string.query <- GRangesToString(qregion) | |
| posterior.probs <- pblapply(Cells(data$az.obj), function(qcell.id){ | |
| observations <- lapply(names(data[1:5]), function(mark){ | |
| df <- data.frame(imp.matrices[[mark]]@assays$imputed@data[string.query, qcell.id]) | |
| df[is.na(df[1])] <- 0 | |
| df | |
| }) | |
| observations <- as.matrix(do.call(cbind.data.frame, observations)) | |
| posterior.prob <- posterior(initialize, emission, transition, observations) | |
| if (full) { | |
| posterior.prob | |
| } else { | |
| unlist(apply(posterior.prob, 1, which.max)) | |
| } | |
| }) | |
| names(posterior.probs) <- Cells(data$az.obj) | |
| length(posterior.probs) | |
| length(posterior.probs[[1]]) | |
| posterior.probs | |
| } | |
| GetPosteriorFast <- function(data, qregions, imp.matrices) { | |
| initialize <- data$hmm$init #rep(1.153589e-153, 12) | |
| emission <- as.matrix(data$hmm$emission[names(data[1:5])]) | |
| transition <- as.matrix(data$hmm$transition) | |
| if (class(qregions) == "GRanges") { | |
| matrices <- lapply(names(data[1:5]), function(mark){ | |
| as.matrix(imp.matrices[[mark]][GRangesToString(qregions), Cells(data$az.obj)]) | |
| }) | |
| all.probs <- posterior_fast(initialize, emission, transition, matrices) | |
| names(all.probs) <- Cells(data$az.obj) | |
| } else { | |
| all.probs <- pblapply(qregions, function(qregion) { | |
| string.query <- GRangesToString(qregion) | |
| matrices <- lapply(names(data[1:5]), function(mark){ | |
| as.matrix(imp.matrices[[mark]][string.query, Cells(data$az.obj)]) | |
| }) | |
| posterior.probs <- posterior_fast(initialize, emission, transition, matrices) | |
| names(posterior.probs) <- Cells(data$az.obj) | |
| posterior.probs | |
| }) | |
| } | |
| all.probs | |
| } | |
| PlotStates <- function(roi.region.id, posterior.probs, az.obj, split=F, full.posterior=F) { | |
| if (full.posterior) { | |
| vals <- lapply(posterior.probs, function(probs) { | |
| if (length(probs) >= roi.region.id) { | |
| probs[roi.region.id, ] | |
| } else { | |
| rep(0.0, 12) | |
| } | |
| }) | |
| states <- do.call(rbind.data.frame, vals) | |
| rownames(states) <- names(vals) | |
| dim(states) | |
| colnames(states) <- seq(12) | |
| states$"1" <- states$"1" + states$"2" | |
| states$"3" <- states$"3" + states$"4" + states$"5" | |
| states$"9" <- states$"9" + states$"10" + states$"11" + states$"12" | |
| states$"6" <- states$"6" + states$"7" + states$"8" | |
| states$"2" <- NULL | |
| states$"4" <- NULL | |
| states$"5" <- NULL | |
| states$"10" <- NULL | |
| states$"11" <- NULL | |
| states$"12" <- NULL | |
| states$"7" <- NULL | |
| states$"8" <- NULL | |
| colnames(states) <- c("Promoter", "Enhancer", "NA", "Repression") | |
| states <- states / rowSums(states) | |
| az.obj <- data$az.obj | |
| az.obj[["states"]] <- CreateAssayObject(t(states)) | |
| FeaturePlot(az.obj, reduction = "jumap", features = c("Promoter", "Enhancer", "Repression"), ncol = 3) & | |
| scale_colour_gradientn(colours = rev(brewer.pal(n = 5, name = "RdBu"))) | |
| } else { | |
| vals <- lapply(posterior.probs, function(probs) { | |
| if (length(probs) >= roi.region.id) { | |
| probs[[roi.region.id]] | |
| } else { | |
| 0.0 | |
| } | |
| }) | |
| states <- unlist(vals) | |
| names(states) <- names(vals) | |
| print(table(states)) | |
| az.obj$states <- states | |
| az.obj$states[az.obj$states %in% c(1, 2)] <- "Promoter" | |
| az.obj$states[az.obj$states %in% c(3, 4, 5)] <- "Enhancer" | |
| az.obj$states[az.obj$states %in% c(9, 10, 11, 12)] <- "Repression" | |
| az.obj$states[az.obj$states %in% c(0,6,7,8)] <- NA | |
| az.obj$states <- factor(az.obj$states, levels = c("Promoter", "Enhancer", "Repression", "NA")) | |
| if (split) { | |
| DimPlot(az.obj, reduction = "jumap", group.by = "states", split.by = "states", ncol = 3) | |
| } else { | |
| DimPlot(az.obj, reduction = "jumap", group.by = "states", label = T, repel = T) | |
| } | |
| } | |
| } | |
| GetPlots <- function(roi.region.id, data, imp.matrices) { | |
| plots <- lapply(names(data[1:5]), function(mark) { | |
| obj <- data[[mark]] | |
| DefaultAssay(obj) <- "region" | |
| keep.cells <- obj@assays$region@data[rownames(obj@assays$region)[roi.region.id], ] | |
| keep.cells <- names(keep.cells[keep.cells > 0]) | |
| DimPlot(obj, cells.highlight = keep.cells, reduction = "ref.umap") + NoLegend() + ggtitle(mark) | |
| #FeaturePlot(obj, reduction = "ref.umap", features = rownames(obj@assays$region)[roi.region.id]) + NoLegend() + ggtitle(mark) | |
| }) | |
| i.plots <- lapply(names(data[1:5]), function(mark) { | |
| obj <- imp.matrices[[mark]] | |
| keep.cells <- obj@assays$imputed@data[roi.region.id, ] | |
| keep.cells <- names(keep.cells[keep.cells > 0]) | |
| # DimPlot(obj, cells.highlight = keep.cells, reduction = "jumap", group.by = "celltype.l2") + NoLegend() + ggtitle(mark) | |
| FeaturePlot(obj, reduction = "jumap", features = rownames(obj@assays$imputed@data)[roi.region.id]) + ggtitle(paste0("imputed_", mark)) | |
| }) | |
| wrap_plots(wrap_plots(plots, ncol = 2), wrap_plots(i.plots, ncol = 2), ncol = 2) | |
| } | |
| ImputeCounts <- function(data, region.data) { | |
| imp.mat <- lapply(names(data[1:5]), function(mark){ | |
| anchors <- Matrix(as.matrix(data$anchors[[mark]]), sparse = T) | |
| mat <- region.data[[mark]][, rownames(anchors)] | |
| imp.mat <- t(mat %*% anchors) | |
| norm.mat <- colSums((anchors > 0)*1) | |
| imp.mat <- imp.mat / norm.mat | |
| imp.mat <- t(imp.mat) | |
| missing.cells <- setdiff(Cells(data$az.obj), colnames(imp.mat)) | |
| if(length(missing.cells) > 0) { | |
| zeros <- matrix(0, nrow = length(rownames(imp.mat)), ncol = length(missing.cells)) | |
| colnames(zeros) <- missing.cells | |
| rownames(zeros) <- rownames(imp.mat) | |
| imp.mat <- cbind(imp.mat, zeros) | |
| } | |
| imp.mat[, Cells(data$az.obj)] | |
| #obj <- data$az.obj | |
| #obj[["imputed"]] <- CreateAssayObject(imp.mat) | |
| #obj | |
| }) | |
| names(imp.mat) <- names(data[1:5]) | |
| imp.mat | |
| } | |
| ImputeVals <- function(data, feats) { | |
| imp.mat <- lapply(names(data[1:5]), function(mark){ | |
| anchors <- Matrix(as.matrix(data$anchors[[mark]]), sparse = T) | |
| mat <- data[[mark]]@assays$region@data[feats, rownames(anchors)] | |
| imp.mat <- t(mat %*% anchors) | |
| norm.mat <- colSums((anchors > 0)*1) | |
| #norm.mat <- ((mat > 0)*1) %*% ((anchors > 0)*1) | |
| imp.mat <- imp.mat / norm.mat | |
| t(imp.mat) | |
| }) | |
| names(imp.mat) <- names(data[1:5]) | |
| imp.mat | |
| } | |
| GetRegionMatrix <- function(data, qregion) { | |
| region.mat <- lapply(names(data[1:5]), function(mark){ | |
| obj <- data[[mark]] | |
| cells <- rownames(Matrix(as.matrix(data$anchors[[mark]]), sparse = T)) | |
| mat <- SingleFeatureMatrix( | |
| obj@assays$tiles@fragments[[1]], | |
| qregion, | |
| cells = cells, | |
| verbose = F | |
| ) | |
| mat <- mat[, cells] | |
| mat | |
| }) | |
| names(region.mat) <- names(data[1:5]) | |
| region.mat | |
| } | |
| GetRegionMatrixOld <- function(data, qregion) { | |
| region.mat <- lapply(data[1:5], function(obj){ | |
| mat <- SingleFeatureMatrix( | |
| obj@assays$tiles@fragments[[1]], | |
| qregion, | |
| cells = Cells(obj), | |
| verbose = F | |
| ) | |
| mat | |
| }) | |
| names(region.mat) <- names(data[1:5]) | |
| for (mark in names(data[1:5])) { | |
| data[[mark]][["region"]] <- CreateAssayObject(region.mat[[mark]]) | |
| } | |
| data | |
| } | |
| GetTileRegion <- function(gname, window=200, extend.upstream = 5000, extend.downstream = 5000) { | |
| region <- Signac:::FindRegion(data[["k4me1"]], region = gname, | |
| extend.upstream=extend.upstream, | |
| extend.downstream=extend.downstream) | |
| start <- region@ranges@start | |
| end <- region@ranges@start + region@ranges@width | |
| start <- start %/% window | |
| end <- end %/% window | |
| qregion <- StringToGRanges( paste0(as.character(region@seqnames@values), "-", | |
| seq(start*window, end*window, window), "-", | |
| seq(start*window + window, end*window + window, window) | |
| ) | |
| ) | |
| qregion | |
| } | |
| GetTileRegions <- function(regions, window=200, extend.upstream = 5000, extend.downstream = 5000, combined=T) { | |
| if (combined) { | |
| ranges.string <- unlist(lapply(regions, function(region) { | |
| region <- Signac:::FindRegion(region = region, extend.upstream=extend.upstream, | |
| extend.downstream=extend.downstream) | |
| start <- region@ranges@start | |
| end <- region@ranges@start + region@ranges@width | |
| start <- start %/% window | |
| end <- end %/% window | |
| qregion <- paste0(as.character(region@seqnames@values), "-", seq(start*window, end*window, window), | |
| "-", seq(start*window + window, end*window + window, window)) | |
| qregion | |
| })) | |
| StringToGRanges(ranges.string) | |
| } else{ | |
| lapply(regions, function(region) { | |
| GetTileRegion(region) | |
| }) | |
| } | |
| } | |
| GetModelParams <- function(file.path) { | |
| init.model <- read.table(file.path, skip = 1, nrows = 12) | |
| trans.model <- read.table(file.path, skip = 13, nrows = 144) | |
| trans.model <- trans.model[, c("V2", "V3", "V4")] | |
| trans.model <- dcast(trans.model, V2 ~ V3, value.var = 'V4') | |
| trans.model$V2 <- NULL | |
| emi.model <- read.table(file.path, skip = 157) | |
| emi.model <- emi.model[emi.model$V5 == 1, c("V2", "V4", "V6")] | |
| emi.model <- dcast(emi.model, V2 ~ V4, value.var = 'V6') | |
| emi.model$V2 <- NULL | |
| list( | |
| "init" = init.model$V3, | |
| "transition" = trans.model, | |
| "emission" = emi.model | |
| ) | |
| } | |
| LoadData <- function() { | |
| k4me1.data <- readRDS("/home/srivastavaa/v10_bingjie_cut_n_tag/seurat_objects/pseudotime/k4me1.rds") | |
| k4me2.data <- readRDS("/home/srivastavaa/v10_bingjie_cut_n_tag/seurat_objects/pseudotime/k4me2.rds") | |
| k4me3.data <- readRDS("/home/srivastavaa/v10_bingjie_cut_n_tag/seurat_objects/pseudotime/k4me3.rds") | |
| k27ac.data <- readRDS("/home/srivastavaa/v10_bingjie_cut_n_tag/seurat_objects/pseudotime/k27ac.rds") | |
| k27me3.data <- readRDS("/home/srivastavaa/v10_bingjie_cut_n_tag/seurat_objects/pseudotime/k27me3.rds") | |
| k4me1.data$disc.ptime <- round(k4me1.data$pseudo.time, 1) | |
| k4me2.data$disc.ptime <- round(k4me2.data$pseudo.time, 1) | |
| k4me3.data$disc.ptime <- round(k4me3.data$pseudo.time, 1) | |
| k27ac.data$disc.ptime <- round(k27ac.data$pseudo.time, 1) | |
| k27me3.data$disc.ptime <- round(k27me3.data$pseudo.time, 1) | |
| DefaultAssay(k4me1.data) <- "tiles" | |
| DefaultAssay(k4me2.data) <- "tiles" | |
| DefaultAssay(k4me3.data) <- "tiles" | |
| DefaultAssay(k27ac.data) <- "tiles" | |
| DefaultAssay(k27me3.data) <- "tiles" | |
| hmm <- GetModelParams("/home/srivastavaa/v10_bingjie_cut_n_tag/chromHMM/model_12/model_12.txt") | |
| hmm$init <- hmm$init + 1.153589e-153 | |
| roi <- readRDS("~/test_rds/roi.rds") | |
| roi <- GRangesToString(roi) | |
| cell.types <- names(table(k4me1.data$predicted.celltype.l2)) | |
| cell.types <- setdiff(cell.types, c("CD4 Proliferating", "Eryth", "ILC", "Doublet", "NK_CD56bright")) | |
| cell.types | |
| data <- list( | |
| k4me1 = k4me1.data, | |
| k4me2 = k4me2.data, | |
| k4me3 = k4me3.data, | |
| k27ac = k27ac.data, | |
| k27me3 = k27me3.data, | |
| hmm = hmm, | |
| roi = roi, | |
| cell.types = cell.types, | |
| az.obj = readRDS("/home/srivastavaa/v10_bingjie_cut_n_tag/seurat_objects/anchors/small.azimuth.rds") | |
| ) | |
| marks <- c("k4me1", "k4me2", "k4me3", "k27ac", "k27me3") | |
| anchors <- lapply(marks, function(mark){ | |
| anchor <- readRDS(paste0("~/v10_bingjie_cut_n_tag/seurat_objects/fwd_anchors/anchors.", mark, ".rds")) | |
| df <- data.frame(anchor@anchors) | |
| df$cell2 <- Cells(data[[mark]])[df$cell2] | |
| df$cell1 <- Cells(data$az.obj)[df$cell1] | |
| colnames(df) <- c("azimuth", "mark", "score") | |
| #df <- df[data[[mark]]$predicted.celltype.l1[df$mark] == data$az.obj$celltype.l1[df$azimuth], ] | |
| df <- df[df$score > 0, ] | |
| score.mat <- dcast(df, mark ~ azimuth, value.var = 'score') | |
| rownames(score.mat) <- score.mat$mark | |
| score.mat$mark <- NULL | |
| score.mat[is.na(score.mat)] <- 0 | |
| score.mat | |
| }) | |
| names(anchors) <- marks | |
| data[["anchors"]] <- anchors | |
| data | |
| } | |
| LoadFullData <- function() { | |
| k4me1.data <- readRDS("/home/srivastavaa/v10_bingjie_cut_n_tag/seurat_objects/pseudotime/k4me1.rds") | |
| k4me2.data <- readRDS("/home/srivastavaa/v10_bingjie_cut_n_tag/seurat_objects/pseudotime/k4me2.rds") | |
| k4me3.data <- readRDS("/home/srivastavaa/v10_bingjie_cut_n_tag/seurat_objects/pseudotime/k4me3.rds") | |
| k27ac.data <- readRDS("/home/srivastavaa/v10_bingjie_cut_n_tag/seurat_objects/pseudotime/k27ac.rds") | |
| k27me3.data <- readRDS("/home/srivastavaa/v10_bingjie_cut_n_tag/seurat_objects/pseudotime/k27me3.rds") | |
| k9me3.data <- readRDS("/home/srivastavaa/v10_bingjie_cut_n_tag/seurat_objects/pseudotime/k9me3.rds") | |
| poll2.data <- readRDS("/home/srivastavaa/v10_bingjie_cut_n_tag/seurat_objects/pseudotime/polII_total.rds") | |
| active.poll2.data <- readRDS("/home/srivastavaa/v10_bingjie_cut_n_tag/seurat_objects/pseudotime/polII_ser2.rds") | |
| asap.data <- readRDS("/home/srivastavaa/v10_bingjie_cut_n_tag/seurat_objects/pseudotime/asap.rds") | |
| rna.data <- readRDS("/home/srivastavaa/v8_bingjie_cut_n_tag/seurat_objects/citeseq.rds") | |
| DefaultAssay(k4me1.data) <- "tiles" | |
| DefaultAssay(k4me2.data) <- "tiles" | |
| DefaultAssay(k4me3.data) <- "tiles" | |
| DefaultAssay(k27ac.data) <- "tiles" | |
| DefaultAssay(k27me3.data) <- "tiles" | |
| DefaultAssay(k9me3.data) <- "tiles" | |
| DefaultAssay(poll2.data) <- "tiles" | |
| DefaultAssay(active.poll2.data) <- "tiles" | |
| DefaultAssay(asap.data) <- "ASAP" | |
| list( | |
| k4me1 = k4me1.data, | |
| k4me2 = k4me2.data, | |
| k4me3 = k4me3.data, | |
| k27ac = k27ac.data, | |
| k27me3 = k27me3.data, | |
| k9me3 = k9me3.data, | |
| pol2 = poll2.data, | |
| apol2 = active.poll2.data, | |
| asap = asap.data, | |
| rna = rna.data | |
| ) | |
| } | |
| GetMaxIndex <- function(data, mark, cell.type, group.by, region.data) { | |
| #print(table(data[[mark]]@meta.data[group.by])) | |
| keep.cells <- rownames(data[[mark]]@meta.data[group.by][data[[mark]]@meta.data[group.by] == cell.type, , drop=F]) | |
| which.max(rowSums(region.data[[mark]][, keep.cells])) | |
| } | |
| SingleFeatureMatrix <- function( | |
| fragment, | |
| features, | |
| cells = NULL, | |
| process_n = 2000, | |
| sep = c("-", "-"), | |
| verbose = TRUE | |
| ) { | |
| fragment.path <- GetFragmentData(object = fragment, slot = "path") | |
| if (!is.null(cells)) { | |
| # only look for cells that are in the fragment file | |
| frag.cells <- GetFragmentData(object = fragment, slot = "cells") | |
| # first subset frag.cells | |
| cell.idx <- fmatch( | |
| x = names(x = frag.cells), | |
| table = cells, | |
| nomatch = 0L | |
| ) > 0 | |
| cells <- frag.cells[cell.idx] | |
| } | |
| tbx <- TabixFile(file = fragment.path) | |
| features <- keepSeqlevels( | |
| x = features, | |
| value = intersect( | |
| x = seqnames(x = features), | |
| y = seqnamesTabix(file = tbx) | |
| ), | |
| pruning.mode = "coarse" | |
| ) | |
| if (length(x = features) == 0) { | |
| stop("No matching chromosomes found in fragment file.") | |
| } | |
| feature.list <- Signac:::ChunkGRanges( | |
| granges = features, | |
| nchunk = ceiling(x = length(x = features) / process_n) | |
| ) | |
| if (verbose) { | |
| message("Extracting reads overlapping genomic regions") | |
| } | |
| #if (nbrOfWorkers() > 1) { | |
| mylapply <- bplapply | |
| #} else { | |
| # mylapply <- ifelse(test = verbose, yes = pblapply, no = lapply) | |
| #} | |
| matrix.parts <- mylapply( | |
| X = feature.list, | |
| FUN = Signac:::PartialMatrix, | |
| tabix = tbx, | |
| cells = cells, | |
| sep = sep | |
| ) | |
| # remove any that are NULL (no fragments for any cells in the region) | |
| null.parts <- sapply(X = matrix.parts, FUN = is.null) | |
| matrix.parts <- matrix.parts[!null.parts] | |
| if (is.null(x = cells)) { | |
| all.cells <- unique( | |
| x = unlist(x = lapply(X = matrix.parts, FUN = colnames)) | |
| ) | |
| matrix.parts <- lapply( | |
| X = matrix.parts, | |
| FUN = AddMissingCells, | |
| cells = all.cells | |
| ) | |
| } | |
| featmat <- Reduce(f = rbind, x = matrix.parts) | |
| if (!is.null(x = cells)) { | |
| # cells supplied, rename with cell name from object rather than file | |
| cell.convert <- names(x = cells) | |
| names(x = cell.convert) <- cells | |
| colnames(x = featmat) <- unname(obj = cell.convert[colnames(x = featmat)]) | |
| } | |
| # reorder features | |
| feat.str <- GRangesToString(grange = features) | |
| featmat <- featmat[feat.str, ] | |
| return(featmat) | |
| } | |
| PlotSCCoverage <- function(data, data_full, select.roi, gname) { | |
| qregion <- GetTileRegion(select.roi, window=200, | |
| extend.upstream = 5000, | |
| extend.downstream = 5000) | |
| region.data <- GetRegionMatrix(data, qregion) | |
| imp.matrices <- ImputeCounts(data, region.data) | |
| posterior.probs <- GetPosteriorFast(data, qregion, imp.matrices) | |
| celltypes <- c("B", "CD4 T", "CD8 T", "NK", "Mono", "DC") | |
| sc.plot <- lapply(celltypes, function(ct) { | |
| keep.cells <- names(data$az.obj$celltype.l1[data$az.obj$celltype.l1 == ct]) | |
| nmat <- Reduce("+", posterior.probs[keep.cells]) / length(keep.cells) | |
| nmat <- nmat / rowSums(nmat) | |
| colnames(nmat) <- paste0("E", seq(12)) | |
| data.frame(nmat) | |
| }) | |
| names(sc.plot) <- celltypes | |
| HorizontalCoveragePlot(data = data_full, | |
| region = select.roi, | |
| gname = gname, | |
| extend.upstream = 5000, | |
| extend.downstream = 5000, | |
| group.by = "predicted.celltype.l1", | |
| idents = c("B", "CD4 T", "CD8 T", "NK", "Mono", "DC"), | |
| state.plot = "~/v10_bingjie_cut_n_tag/chromHMM/model_12/POSTERIOR/", | |
| sc.state.plot = sc.plot, | |
| marks = c("k4me1", "k4me2", "k4me3", "k27ac", "k27me3", "apol2", "pol2"), | |
| window = 2000 | |
| ) | |
| } | |
| PlotThings <- function(data, loci, gname, | |
| group.by="predicted.celltype.l1", | |
| window=500, | |
| idents = c("B", "CD4 T", "CD8 T", "DC", "Mono", "NK")) { | |
| p6 <- CoveragePlot( | |
| object = data[["asap"]], | |
| region = loci, | |
| peaks = F, | |
| window = window, | |
| annotation = F, | |
| group.by = group.by, | |
| idents = idents, | |
| ) + ggtitle('ATAC') | |
| p7 <- CoveragePlot( | |
| object = data[["k27me3"]], | |
| region = loci, | |
| peaks = F, | |
| window = window, | |
| annotation = F, | |
| group.by = group.by, | |
| idents = idents, | |
| ) + ggtitle('H3K27me3') | |
| p1 <- CoveragePlot( | |
| object = data[["k4me1"]], | |
| region = loci, | |
| peaks = F, | |
| window = window, | |
| annotation = F, | |
| group.by = group.by, | |
| idents = idents, | |
| ) + ggtitle('H3K4me1') | |
| p2 <- CoveragePlot( | |
| object = data[["k4me2"]], | |
| region = loci, | |
| peaks = F, | |
| window = window, | |
| annotation = F, | |
| group.by = group.by, | |
| idents = idents, | |
| ) + ggtitle('H3K4me2') | |
| p3 <- CoveragePlot( | |
| object = data[["k4me3"]], | |
| region = loci, | |
| peaks = F, | |
| window = window, | |
| annotation = F, | |
| group.by = group.by, | |
| idents = idents, | |
| ) + ggtitle('H3K4me3') | |
| p4 <- CoveragePlot( | |
| object = data[["k27ac"]], | |
| region = loci, | |
| peaks = F, | |
| window = window, | |
| annotation = F, | |
| group.by = group.by, | |
| idents = idents, | |
| ) + ggtitle('H3K27ac') | |
| p5 <- AnnotationPlot( | |
| object = data[["k27ac"]], | |
| region = loci | |
| ) | |
| expression.plot <- ExpressionPlot( | |
| object = data[["rna"]], | |
| features = gname, | |
| assay = "RNA", | |
| group.by = group.by, | |
| idents = idents, | |
| ) | |
| plotlist = list(p6, p1, p2, p3, p4, p7, p5) | |
| # remove x-axis from all but last plot | |
| for (i in 1:(length(x = plotlist) - 1)) { | |
| plotlist[[i]] <- plotlist[[i]] + theme( | |
| axis.title.x = element_blank(), | |
| axis.text.x = element_blank(), | |
| axis.line.x.bottom = element_blank(), | |
| axis.ticks.x.bottom = element_blank() | |
| ) | |
| } | |
| heights = c(10, 10, 10, 10, 10, 10, 1) | |
| widths = c(10, 1) | |
| # align expression plot with the first element in plot list | |
| plotlist[[1]] <- (plotlist[[1]] + expression.plot) + | |
| plot_layout(widths = widths) | |
| plotlist[[2]] <- (plotlist[[2]] + expression.plot) + | |
| plot_layout(widths = widths) | |
| plotlist[[3]] <- (plotlist[[3]] + expression.plot) + | |
| plot_layout(widths = widths) | |
| plotlist[[4]] <- (plotlist[[4]] + expression.plot) + | |
| plot_layout(widths = widths) | |
| plotlist[[5]] <- (plotlist[[5]] + expression.plot) + | |
| plot_layout(widths = widths) | |
| plotlist[[6]] <- (plotlist[[6]] + expression.plot) + | |
| plot_layout(widths = widths) | |
| p2 <- wrap_plots(plotlist[1:5], | |
| heights = heights[1:5], | |
| byrow = T | |
| ) | |
| p <- plotlist[[6]] + plotlist[[7]] + guide_area() + plot_layout( | |
| ncol = 2, heights = c(10, 2), | |
| guides = "collect") | |
| design <- "B | |
| A" | |
| wrap_plots(B = p2, A = p, design = design, heights = c(50, 10)) | |
| } | |
| HorizontalCoverageTrack <- function( | |
| cutmat, | |
| region, | |
| group.scale.factors, | |
| scale.factor, | |
| obj.groups, | |
| ymax, | |
| downsample.rate, | |
| window = 100, | |
| max.downsample = 3000, | |
| tile = NULL, | |
| mark, | |
| color | |
| ) { | |
| window.size <- width(x = region) | |
| levels.use <- levels(x = obj.groups) | |
| coverages <- Signac:::ApplyMatrixByGroup( | |
| mat = cutmat, | |
| fun = colSums, | |
| groups = obj.groups, | |
| group.scale.factors = group.scale.factors, | |
| scale.factor = scale.factor, | |
| normalize = TRUE | |
| ) | |
| if (!is.na(x = window)) { | |
| coverages <- group_by(.data = coverages, group) | |
| coverages <- mutate(.data = coverages, coverage = roll_sum( | |
| x = norm.value, n = window, fill = NA, align = "center" | |
| )) | |
| coverages <- ungroup(x = coverages) | |
| } else { | |
| coverages$coverage <- coverages$norm.value | |
| } | |
| chromosome <- as.character(x = seqnames(x = region)) | |
| start.pos <- start(x = region) | |
| end.pos <- end(x = region) | |
| coverages <- coverages[!is.na(x = coverages$coverage), ] | |
| coverages <- group_by(.data = coverages, group) | |
| sampling <- min(max.downsample, window.size * downsample.rate) | |
| coverages <- slice_sample(.data = coverages, n = sampling) | |
| # restore factor levels | |
| if (!is.null(x = levels.use)) { | |
| colors_all <- hue_pal()(length(x = levels.use)) | |
| names(x = colors_all) <- levels.use | |
| coverages$group <- factor(x = coverages$group, levels = levels.use) | |
| } | |
| if (is.null(ymax)) { | |
| ymax <- signif(x = max(coverages$coverage, na.rm = TRUE), digits = 2) | |
| } | |
| ymin <- 0 | |
| gr <- GRanges( | |
| seqnames = chromosome, | |
| IRanges(start = start.pos, end = end.pos) | |
| ) | |
| if (!is.null(tile)) { | |
| diffs <- coverages$position - tile | |
| coverages$tile <- coverages$position[which.min(diffs)] | |
| } | |
| num.groups <- length(obj.groups) | |
| p <- ggplot( | |
| data = coverages, | |
| mapping = aes(x = position, y = coverage, fill = group) | |
| ) + geom_area(stat = "identity") + | |
| geom_hline(yintercept = 0, size = 0.1) + | |
| geom_vline(xintercept = tile, size = 0.1) + | |
| scale_fill_manual(values = rep(color, num.groups)) + | |
| facet_wrap(facets = ~group, strip.position = "top", nrow = 1) + | |
| xlab(label = paste0(chromosome, " position (bp)")) + | |
| ylab(label = paste0(mark)#, " \n(range ", | |
| #as.character(x = ymin), " - ", | |
| #as.character(x = ymax), ")") | |
| ) + | |
| ylim(c(ymin, ymax)) + | |
| theme_browser(legend = FALSE) + | |
| theme(axis.text.x = element_text(angle = 90), | |
| strip.text = element_text(size = 20), | |
| #axis.title.y = element_text(size = 16) | |
| panel.spacing = unit(1, "lines") | |
| ) | |
| if (!is.null(x = levels.use)) { | |
| p <- p + scale_fill_manual(values = colors_all) | |
| } | |
| return(p) | |
| } | |
| HorizontalExpressionPlot <- function( | |
| object, | |
| features, | |
| assay = NULL, | |
| group.by = NULL, | |
| idents = NULL, | |
| slot = "data" | |
| ) { | |
| # get data | |
| assay <- DefaultAssay(object = object) | |
| data.plot <- GetAssayData( | |
| object = object, | |
| assay = assay, | |
| slot = slot | |
| )[features, ] | |
| obj.groups <- Signac:::GetGroups( | |
| object = object, | |
| group.by = group.by, | |
| idents = NULL | |
| ) | |
| # if levels set, define colors based on all groups | |
| levels.use <- levels(x = obj.groups) | |
| if (!is.null(x = levels.use)) { | |
| colors_all <- hue_pal()(length(x = levels.use)) | |
| names(x = colors_all) <- levels.use | |
| } | |
| if (!is.null(x = idents)) { | |
| cells.keep <- names(x = obj.groups)[ | |
| fmatch(x = obj.groups, table = idents, nomatch = 0L) > 0 | |
| ] | |
| if (length(x = features) > 1) { | |
| data.plot <- data.plot[, cells.keep] | |
| } else { | |
| data.plot <- data.plot[cells.keep] | |
| } | |
| obj.groups <- obj.groups[cells.keep] | |
| } | |
| # construct data frame | |
| if (length(x = features) == 1) { | |
| df <- data.frame( | |
| gene = features, | |
| expression = data.plot, | |
| group = obj.groups | |
| ) | |
| } else { | |
| df <- data.frame() | |
| for (i in features) { | |
| df.1 <- data.frame( | |
| gene = i, | |
| expression = data.plot[i, ], | |
| group = obj.groups | |
| ) | |
| df <- rbind(df, df.1) | |
| } | |
| } | |
| p.list <- list() | |
| for (i in seq_along(along.with = features)) { | |
| df.use <- df[df$gene == features[[i]], ] | |
| p <- ggplot(data = df.use, aes(x = expression, y = gene, fill = group)) + | |
| geom_violin(size = 1/4) + | |
| facet_wrap(~group, nrow = 1, strip.position = "right") + | |
| theme_classic() + | |
| scale_y_discrete(position = "top") + | |
| scale_x_continuous(position = "bottom", limits = c(0, NA)) + | |
| theme( | |
| axis.text.y = element_blank(), | |
| axis.text.x = element_text(size = 8), | |
| axis.title.x = element_blank(), | |
| strip.background = element_blank(), | |
| strip.text.y = element_blank(), | |
| legend.position = "none", | |
| panel.spacing = unit(1, "lines") | |
| ) | |
| if (!is.null(x = levels.use)) { | |
| p <- p + scale_fill_manual(values = colors_all) | |
| } | |
| p.list[[i]] <- p | |
| } | |
| p <- wrap_plots(p.list, ncol = length(x = p.list)) | |
| return(p) | |
| } | |
| HorizontalSingleCoveragePlot <- function( | |
| object, | |
| region, | |
| mark, | |
| color, | |
| twoh.tile.region = NULL, | |
| features = NULL, | |
| assay = NULL, | |
| show.bulk = FALSE, | |
| expression.assay = NULL, | |
| expression.slot = "data", | |
| annotation = TRUE, | |
| peaks = TRUE, | |
| ranges = NULL, | |
| ranges.title = "Ranges", | |
| links = TRUE, | |
| tile = FALSE, | |
| tile.size = 100, | |
| tile.cells = 100, | |
| group.by = NULL, | |
| window = 100, | |
| extend.upstream = 0, | |
| extend.downstream = 0, | |
| ymax = NULL, | |
| scale.factor = NULL, | |
| cells = NULL, | |
| idents = NULL, | |
| sep = c("-", "-"), | |
| heights = NULL, | |
| max.downsample = 3000, | |
| downsample.rate = 0.1 | |
| ) { | |
| cells <- colnames(x = object) | |
| assay <- DefaultAssay(object = object) | |
| if (!is.null(x = group.by)) { | |
| Idents(object = object) <- group.by | |
| } | |
| if (!is.null(x = idents)) { | |
| ident.cells <- WhichCells(object = object, idents = idents) | |
| cells <- intersect(x = cells, y = ident.cells) | |
| } | |
| region <- Signac:::FindRegion( | |
| object = object, | |
| region = region, | |
| sep = sep, | |
| assay = assay, | |
| extend.upstream = extend.upstream, | |
| extend.downstream = extend.downstream | |
| ) | |
| if (!is.null(twoh.tile.region)) { | |
| twoh.tile.region <- start(twoh.tile.region) | |
| } | |
| reads.per.group <- AverageCounts( | |
| object = object, | |
| group.by = group.by, | |
| verbose = FALSE | |
| ) | |
| cells.per.group <- CellsPerGroup( | |
| object = object, | |
| group.by = group.by | |
| ) | |
| cutmat <- Signac:::CutMatrix( | |
| object = object, | |
| region = region, | |
| assay = assay, | |
| cells = cells, | |
| verbose = FALSE | |
| ) | |
| colnames(cutmat) <- start(x = region):end(x = region) | |
| group.scale.factors <- suppressWarnings(reads.per.group * cells.per.group) | |
| scale.factor <- median(x = group.scale.factors) | |
| obj.groups <- Signac:::GetGroups( | |
| object = object, | |
| group.by = group.by, | |
| idents = idents | |
| ) | |
| p <- HorizontalCoverageTrack( | |
| cutmat = cutmat, | |
| region = region, | |
| group.scale.factors = group.scale.factors, | |
| scale.factor = scale.factor, | |
| window = window, | |
| ymax = ymax, | |
| obj.groups = obj.groups, | |
| downsample.rate = downsample.rate, | |
| max.downsample = max.downsample, | |
| mark = mark, | |
| color = color, | |
| tile = twoh.tile.region | |
| ) | |
| if (!is.null(x = features)) { | |
| ex.plot <- HorizontalExpressionPlot( | |
| object = object, | |
| features = features, | |
| assay = expression.assay, | |
| idents = idents, | |
| group.by = group.by, | |
| slot = expression.slot | |
| ) | |
| ex.plot | |
| widths <- c(10, length(x = features)) | |
| } else { | |
| ex.plot <- NULL | |
| widths <- NULL | |
| } | |
| if (annotation) { | |
| gene.plot <- AnnotationPlot(object = object, region = region) | |
| } else { | |
| gene.plot <- NULL | |
| } | |
| if (links) { | |
| link.plot <- LinkPlot(object = object, region = region) | |
| } else { | |
| link.plot <- NULL | |
| } | |
| if (peaks) { | |
| peak.plot <- PeakPlot(object = object, region = region) | |
| } else { | |
| peak.plot <- NULL | |
| } | |
| if (!is.null(x = ranges)) { | |
| range.plot <- PeakPlot( | |
| object = object, | |
| region = region, | |
| peaks = ranges, | |
| color = "brown3") + | |
| ylab(ranges.title) | |
| } else { | |
| range.plot <- NULL | |
| } | |
| if (tile) { | |
| # reuse cut matrix | |
| tile.df <- ComputeTile( | |
| cutmatrix = cutmat, | |
| groups = obj.groups, | |
| window = tile.size, | |
| n = tile.cells, | |
| order = "total" | |
| ) | |
| tile.plot <- CreateTilePlot( | |
| df = tile.df, | |
| n = tile.cells | |
| ) | |
| } else { | |
| tile.plot <- NULL | |
| } | |
| if (show.bulk) { | |
| object$bulk <- "All cells" | |
| reads.per.group <- AverageCounts( | |
| object = object, | |
| group.by = "bulk", | |
| verbose = FALSE | |
| ) | |
| cells.per.group <- CellsPerGroup( | |
| object = object, | |
| group.by = "bulk" | |
| ) | |
| bulk.scale.factor <- suppressWarnings(reads.per.group * cells.per.group) | |
| bulk.groups <- rep(x = "All cells", length(x = obj.groups)) | |
| names(x = bulk.groups) <- names(x = obj.groups) | |
| bulk.plot <- HorizontalCoverageTrack( | |
| cutmat = cutmat, | |
| region = region, | |
| group.scale.factors = bulk.scale.factor, | |
| scale.factor = scale.factor, | |
| window = window, | |
| ymax = ymax, | |
| obj.groups = bulk.groups, | |
| downsample.rate = downsample.rate, | |
| max.downsample = max.downsample | |
| ) + | |
| scale_fill_manual(values = "grey") + | |
| ylab("") | |
| } else { | |
| bulk.plot <- NULL | |
| } | |
| nident <- length(x = unique(x = obj.groups)) | |
| heights <- c(10, (1 / nident) * 10, 10, 2, 1, 1, 3) | |
| p <- CombineTracks( | |
| plotlist = list(p, bulk.plot, tile.plot, gene.plot, | |
| peak.plot, range.plot, link.plot), | |
| expression.plot = ex.plot, | |
| heights = heights, | |
| widths = widths | |
| ) | |
| return(p) | |
| } | |
| gg_color_hue <- function(n) { | |
| hues = seq(15, 375, length = n + 1) | |
| hcl(h = hues, l = 65, c = 100)[1:n] | |
| } | |
| HorizontalMotifPlot <- function(object, region, motif) { | |
| label = motif | |
| motif.names <- object@assays$tiles@motifs@motif.names | |
| motif <- names(motif.names[grep(motif, motif.names)]) | |
| if (length(motif) == 0) { print("ERROR: can't find the motif name") } | |
| mpeaks <- object$tiles@motifs@data[, motif] | |
| mpeaks <- mpeaks[mpeaks > 0] | |
| mpeaks.ranges <- StringToGRanges(names(mpeaks)) | |
| if (!inherits(x = region, what = "GRanges")) { | |
| region <- StringToGRanges(regions = region) | |
| } | |
| motif.peaks <-GRangesToString(subsetByOverlaps(mpeaks.ranges, region)) | |
| all.peaks <- granges(x = object) | |
| peak.intersect <- subsetByOverlaps(x = all.peaks, ranges = region) | |
| peak.df <- as.data.frame(x = peak.intersect) | |
| peak.df$ctype <-"dimgrey" | |
| rownames(peak.df) <- paste0(peak.df$seqnames, "-", peak.df$start, "-", peak.df$end) | |
| peak.df[motif.peaks, ]$ctype <- "#F8766D" | |
| start.pos <- start(x = region) | |
| end.pos <- end(x = region) | |
| chromosome <- seqnames(x = region) | |
| if (nrow(x = peak.df) > 0) { | |
| peak.df$start[peak.df$start < start.pos] <- start.pos | |
| peak.df$end[peak.df$end > end.pos] <- end.pos | |
| peak.plot <- ggplot(data = peak.df, mapping = aes(color = ctype)) + | |
| geom_segment(aes(x = start, y = 0, xend = end, yend = 0), | |
| size = 2, | |
| data = peak.df) | |
| } else { | |
| # no peaks present in region, make empty panel | |
| peak.plot <- ggplot(data = peak.df) | |
| } | |
| peak.plot <- peak.plot + theme_classic() + | |
| ylab(label = label) + | |
| theme(axis.ticks.y = element_blank(), | |
| axis.ticks.x = element_blank(), | |
| axis.text.y = element_blank(), | |
| legend.position = "none", | |
| axis.title.x = element_blank(), | |
| axis.text.x = element_blank() | |
| ) + | |
| #xlab(label = paste0(chromosome, " position (bp)")) + | |
| xlim(c(start.pos, end.pos)) + | |
| scale_color_identity() | |
| return(peak.plot) | |
| } | |
| HorizontalTilePlot <- function(object, region, tile) { | |
| label = "tile" | |
| tile.ranges <- StringToGRanges(tile) | |
| if (!inherits(x = region, what = "GRanges")) { | |
| region <- StringToGRanges(regions = region) | |
| } | |
| #peak.df <- as.data.frame(x = region) | |
| #peak.df$ctype <- "dimgrey" | |
| peak.df <- as.data.frame(x = tile.ranges) | |
| peak.df$ctype <- "dimgrey" | |
| #peak.df <- rbind(peak.df, tile.df) | |
| #rownames(peak.df) <- paste0(peak.df$seqnames, "-", peak.df$start, "-", peak.df$end) | |
| start.pos <- start(x = region) | |
| end.pos <- end(x = region) | |
| chromosome <- seqnames(x = region) | |
| if (nrow(x = peak.df) > 0) { | |
| peak.df$start[peak.df$start < start.pos] <- start.pos | |
| peak.df$end[peak.df$end > end.pos] <- end.pos | |
| peak.plot <- ggplot(data = peak.df, mapping = aes(color = ctype)) + | |
| geom_segment(aes(x = start, y = 0, xend = end, yend = 0), | |
| size = 10, | |
| data = peak.df) | |
| } else { | |
| # no peaks present in region, make empty panel | |
| peak.plot <- ggplot(data = peak.df) | |
| } | |
| peak.plot <- peak.plot + theme_classic() + | |
| ylab(label = label) + | |
| theme(axis.ticks.y = element_blank(), | |
| axis.ticks.x = element_blank(), | |
| axis.text.y = element_blank(), | |
| legend.position = "none", | |
| axis.title.x = element_blank(), | |
| axis.text.x = element_blank() | |
| ) + | |
| #xlab(label = paste0(chromosome, " position (bp)")) + | |
| xlim(c(start.pos, end.pos)) + | |
| scale_color_identity() | |
| return(peak.plot) | |
| } | |
| HorizontalCoveragePlot <- function(data, region, | |
| gname=NULL, | |
| pname=NULL, | |
| motif = NULL, | |
| tile = NULL, | |
| state.plot = NULL, | |
| sc.state.plot = NULL, | |
| time.plot = F, | |
| group.by="predicted.celltype.l1", | |
| window=500, | |
| idents = c("B", "CD4 T", "CD8 T", "DC", "Mono", "NK"), | |
| extend.upstream = 0, | |
| extend.downstream = 0, | |
| marks = c("asap", "k4me1", "k4me2", "k4me3", "k27ac", "k27me3", "k9me3") | |
| ) { | |
| num.marks <- length(marks) | |
| colors <- gg_color_hue(num.marks) | |
| region.range <- Signac:::FindRegion( | |
| object = data[["k27me3"]], | |
| region = region, | |
| extend.upstream = extend.upstream, | |
| extend.downstream = extend.downstream | |
| ) | |
| region <- GRangesToString(region.range) | |
| marks.map <- c("ATAC", "H3K4me1", "H3K4me2", "H3K4me3", "H3K27me3", "H3K27ac", "PolII", "active_PolII", "H3K9me3") | |
| names(marks.map) <- c("asap", "k4me1", "k4me2", "k4me3", "k27me3", "k27ac", "pol2", "apol2", "k9me3") | |
| if (!is.null(tile)) { | |
| tile = StringToGRanges(tile) | |
| } | |
| plotlist <- lapply(seq(num.marks), function(mark.id) { | |
| mark <- marks[[mark.id]] | |
| HorizontalSingleCoveragePlot( object = data[[mark]], | |
| region = region, | |
| peaks = F, | |
| window = window, | |
| annotation = F, | |
| group.by = group.by, | |
| idents = idents, | |
| twoh.tile.region = tile, | |
| mark = marks.map[[mark]], | |
| color = colors[[mark.id]] | |
| ) | |
| }) | |
| for (i in 1:(length(x = plotlist))) { | |
| plotlist[[i]] <- plotlist[[i]] + theme( | |
| axis.title.x = element_blank(), | |
| axis.text.x = element_blank(), | |
| axis.line.x.bottom = element_blank(), | |
| axis.ticks.x.bottom = element_blank() | |
| ) | |
| } | |
| for (i in 2:(length(x = plotlist))) { | |
| plotlist[[i]] <- plotlist[[i]] + theme( | |
| strip.text.x = element_blank() | |
| ) | |
| } | |
| gene_plot <- AnnotationPlot( | |
| object = data[["k27me3"]], | |
| region = region | |
| ) | |
| if (!is.null(gname)) { | |
| exp.obj <- data[["rna"]] | |
| DefaultAssay(exp.obj) <- "RNA" | |
| expr_plot <- lapply(gname, function(feat) | |
| HorizontalExpressionPlot( | |
| object = exp.obj, | |
| features = feat, | |
| assay = "RNA", | |
| group.by = group.by, | |
| idents = idents | |
| ) | |
| ) | |
| expr_plot <- wrap_plots(expr_plot, nrow = length(gname)) | |
| } else if (!is.null(pname)) { | |
| expr_plot <- lapply(marks, function(feat) { | |
| exp.obj <- data[[feat]] | |
| DefaultAssay(exp.obj) <- "ADT" | |
| if (feat == "asap") { | |
| if (pname == "CD57") { | |
| pname <- "CD57-Recombinant" | |
| } | |
| ProteinPlot( | |
| object = exp.obj, | |
| name = marks.map[[feat]], | |
| features = pname, | |
| assay = "ADT", | |
| group.by = group.by, | |
| idents = idents | |
| ) | |
| } else { | |
| ProteinPlot( | |
| object = exp.obj, | |
| name = marks.map[[feat]], | |
| features = pname, | |
| assay = "ADT", | |
| group.by = group.by, | |
| idents = idents | |
| ) | |
| } | |
| }) | |
| expr_plot <- wrap_plots(expr_plot, nrow = length(pname)) | |
| } else { | |
| print("ERROR: must provide gname or pname") | |
| return (NULL) | |
| } | |
| num.idents <- length(idents) | |
| gene_plots <- lapply(seq(num.idents), function(x) gene_plot) | |
| gene_plots[[1]] <- gene_plots[[1]] + theme( | |
| axis.text.x = element_text(angle = 15), | |
| ) | |
| for (i in 2:length(gene_plots)) { | |
| gene_plots[[i]] <- gene_plots[[i]] + theme( | |
| axis.text.y=element_blank(), | |
| axis.text.x = element_text(angle = 15), | |
| axis.line.y=element_blank(), | |
| axis.title.y=element_blank(), | |
| panel.spacing = unit(2, "lines") | |
| ) | |
| } | |
| gene_plots <- wrap_plots(gene_plots, nrow = 1) | |
| if (!is.null(motif)) { | |
| motif.plot <- HorizontalMotifPlot(object = data[["k4me1"]], | |
| region = region.range, | |
| motif = motif) | |
| motif_plots <- lapply(seq(num.idents), function(x) motif.plot) | |
| for (i in 2:length(motif_plots)) { | |
| motif_plots[[i]] <- motif_plots[[i]] + theme( | |
| axis.text.y=element_blank(), | |
| axis.line.y=element_blank(), | |
| axis.title.y=element_blank() | |
| ) | |
| } | |
| gene_plots <- wrap_plots(motif_plots, nrow = 1) / gene_plots | |
| } | |
| #if (!is.null(tile)) { | |
| # tile.plot <- HorizontalTilePlot(object = data[["k4me1"]], | |
| # region = region.range, | |
| # tile = tile) | |
| # tile_plots <- lapply(seq(num.idents), function(x) tile.plot) | |
| # | |
| # for (i in 2:length(tile_plots)) { | |
| # tile_plots[[i]] <- tile_plots[[i]] + theme( | |
| # axis.text.y=element_blank(), | |
| # axis.line.y=element_blank(), | |
| # axis.title.y=element_blank() | |
| # ) | |
| # } | |
| # | |
| # gene_plots <- wrap_plots(tile_plots, nrow = 1) / gene_plots | |
| #} | |
| if (!is.null(state.plot)) { | |
| splots <- lapply(idents, function(x) StatePlot(state.plot, region.range, gsub(" ", "_", x))) | |
| for (i in 2:length(splots)) { | |
| splots[[i]] <- splots[[i]] + theme( | |
| axis.text.y=element_blank(), | |
| axis.line.y=element_blank(), | |
| axis.title.y=element_blank(), | |
| axis.ticks.y=element_blank() | |
| ) | |
| } | |
| splots <- wrap_plots(splots, nrow = 1) | |
| } | |
| if (!is.null(sc.state.plot)) { | |
| sc.splots <- lapply(idents, function(x) SCStatePlot(sc.state.plot, x)) | |
| for (i in 2:length(sc.splots)) { | |
| sc.splots[[i]] <- sc.splots[[i]] + theme( | |
| axis.text.y=element_blank(), | |
| axis.line.y=element_blank(), | |
| axis.title.y=element_blank(), | |
| axis.ticks.y=element_blank() | |
| ) | |
| } | |
| sc.splots <- wrap_plots(sc.splots, nrow = 1) | |
| } | |
| if (!time.plot) { | |
| if(is.null(state.plot)) { | |
| plotlist[[num.marks + 1]] <- gene_plots | |
| plotlist[[num.marks + 2]] <- expr_plot | |
| wrap_plots(plotlist, nrow = length(plotlist), heights = c(rep(10, num.marks), 5, 10)) | |
| } else if(!is.null(sc.state.plot)) { | |
| plotlist[[num.marks + 1]] <- gene_plots | |
| plotlist[[num.marks + 2]] <- splots | |
| plotlist[[num.marks + 3]] <- sc.splots | |
| plotlist[[num.marks + 4]] <- expr_plot | |
| wrap_plots( plotlist, nrow = length(plotlist), heights = c(rep(10, num.marks), 5, 10, 10, 10)) | |
| } else { | |
| plotlist[[num.marks + 1]] <- gene_plots | |
| plotlist[[num.marks + 2]] <- splots | |
| plotlist[[num.marks + 3]] <- expr_plot | |
| wrap_plots( plotlist, nrow = length(plotlist), heights = c(rep(10, num.marks), 5, 10, 10)) | |
| } | |
| } else { | |
| wrap_plots( list( | |
| expr_plot[[1]], | |
| expr_plot[[2]], | |
| expr_plot[[3]], | |
| expr_plot[[4]], | |
| expr_plot[[5]], | |
| expr_plot[[6]], | |
| expr_plot[[7]] | |
| ), | |
| nrow = 7, | |
| heights = c(rep(10, 7)) | |
| ) | |
| } | |
| } | |
| ProteinPlot <- function( | |
| object, | |
| name, | |
| features, | |
| assay = NULL, | |
| group.by = NULL, | |
| idents = NULL | |
| ) { | |
| print(name) | |
| ptime <- object@meta.data[, group.by, drop=F] | |
| ptime <- ptime[ptime[, 1] %in% idents, , drop=F] | |
| #olaps <- data.frame(findOverlaps(StringToGRanges(rownames(object[[assay]]@data)), features)) | |
| #features <- olaps$queryHits | |
| ptime$cts <- colSums(object[[assay]]@data[features, , drop=F])[rownames(ptime)] | |
| ptime$time <- object$pseudo.time[rownames(ptime)] | |
| colnames(ptime) <- c("round.ptime", "counts", "pseudo.time") | |
| #ptime <- ptime[ptime$counts > 0, ] | |
| #ptime$categ <- 0 | |
| #ptime[ptime$round.ptime == idents[2], ]$categ <- 1 | |
| #ptime[ptime$round.ptime == idents[3], ]$categ <- 2 | |
| #print(head(ptime)) | |
| fit = mgcv::gam(counts ~ s(pseudo.time, k=50), data = ptime) | |
| newx = data.frame(pseudo.time=with(ptime, seq(min(pseudo.time), max(pseudo.time), len=100))) | |
| pred = predict(fit, newdata=newx, se=T) | |
| pred = cbind(pseudo.time=newx, counts=pred$fit) | |
| pred.range <- max(pred$counts) - min(pred$counts) | |
| actual.range <- max(ptime$counts) - min(ptime$counts) | |
| norm <- actual.range / pred.range | |
| pred$counts <- pred$counts - min(pred$counts) | |
| pred$counts <- pred$counts * norm | |
| ptime$round.ptime <- factor(ptime$round.ptime, levels=idents) | |
| p <- ggplot(ptime, aes(x=pseudo.time, y=counts)) + | |
| geom_point(aes(color=round.ptime)) | |
| p_smooth <- geom_smooth(data=ptime, se=F, fullrange=TRUE, | |
| method = "gam", formula = y ~ s(x, k = 100)) | |
| #p_smooth <- geom_smooth(data=pred, stat='identity') | |
| p <- p + p_smooth + | |
| scale_x_continuous(limits = c(0.0, 1.0), expand = c(0, 0)) + | |
| theme_bw() + | |
| theme(panel.border = element_blank(), | |
| #panel.grid.major = element_blank(), | |
| #panel.grid.minor = element_blank(), | |
| axis.text.x = element_blank(), | |
| axis.text.y = element_blank(), | |
| axis.title.x = element_blank(), | |
| axis.line.x = element_blank(), | |
| axis.line = element_line(colour = "black")) + | |
| ylab(label = name) + | |
| theme( | |
| axis.title.y = element_text(size = 16) | |
| ) + | |
| theme(legend.position = "none") | |
| return(p) | |
| } | |
| SCStatePlot <- function( | |
| matrices, | |
| celltype, | |
| show.legend = F | |
| ) { | |
| df <- matrices[[celltype]] | |
| df$E4 <- df$E4 + df$E6 + df$E7 + df$E8 + df$E9 | |
| #df$E4 <- NULL | |
| df$E6 <- NULL | |
| df$E7 <- NULL | |
| df$E8 <- NULL | |
| df$E9 <- NULL | |
| colnames(df)[[4]] <- "NA" | |
| df$E1 <- df$E1 + df$E2 | |
| df$E2 <- NULL | |
| df$E3 <- df$E3 + df$E5 | |
| df$E5 <- NULL | |
| df$E11 <- df$E11 + df$E12 | |
| df$E12 <- NULL | |
| #colnames(df) <- c("Repression", "ATAC_only", "NA", "Enhancer", "ATAC+k27ac", "Promoter") | |
| #df <- df[, c(1, 8, 10)] | |
| #df <- df / rowSums(df) | |
| df <- melt(as.matrix(df)) | |
| p <- ggplot(df, aes(fill=Var2, y=value, x=Var1)) + | |
| geom_bar(position="stack", stat="identity", width = 1) + | |
| #scale_x_continuous(limits = c(0.0, 1.0), expand = c(0, 0)) + | |
| theme_bw() + | |
| theme(panel.border = element_blank(), | |
| panel.grid.major = element_blank(), | |
| panel.grid.minor = element_blank(), | |
| axis.text.x = element_blank(), | |
| axis.text.y = element_blank(), | |
| axis.title.x = element_blank(), | |
| axis.line.x = element_blank(), | |
| axis.ticks.x=element_blank(), | |
| axis.line = element_line(colour = "black")) + | |
| ylab(label = "State Probabilty") | |
| if (!show.legend) { | |
| p <- p + theme(legend.position = "none") | |
| } | |
| return(p) | |
| } | |
| StatePlot <- function( | |
| base.path, | |
| region, | |
| celltype, | |
| show.legend = F | |
| ) { | |
| chr.name <- region@seqinfo@seqnames | |
| #celltype <- gsub(" ", "_", celltype) | |
| file.name <- paste0(base.path, celltype, "_12_", chr.name, "_posterior.txt") | |
| print(file.exists(file.name)) | |
| df <- read.table(file.name, skip = 1, header = T) | |
| start <- region@ranges@start | |
| end <- region@ranges@start + region@ranges@width | |
| start <- start %/% 200 | |
| end <- end %/% 200 | |
| df <- df[start:end, ] | |
| #df$E1 <- df$E1 + df$E2 + df$E3 | |
| #df$E4 <- df$E4 + df$E6 + df$E7 | |
| df$E4 <- df$E4 + df$E6 + df$E7 + df$E8 + df$E9 | |
| #df$E4 <- NULL | |
| df$E6 <- NULL | |
| df$E7 <- NULL | |
| df$E8 <- NULL | |
| df$E9 <- NULL | |
| colnames(df)[[4]] <- "NA" | |
| df$E1 <- df$E1 + df$E2 | |
| df$E2 <- NULL | |
| df$E3 <- df$E3 + df$E5 | |
| df$E5 <- NULL | |
| df$E11 <- df$E11 + df$E12 | |
| df$E12 <- NULL | |
| #colnames(df) <- c("Repression", "ATAC_only", "NA", "Enhancer", "ATAC+k27ac", "Promoter") | |
| #df <- df[, c(1, 8, 10)] | |
| #df <- df / rowSums(df) | |
| df <- melt(as.matrix(df)) | |
| p <- ggplot(df, aes(fill=Var2, y=value, x=Var1)) + | |
| geom_bar(position="stack", stat="identity", width = 1) + | |
| #scale_x_continuous(limits = c(0.0, 1.0), expand = c(0, 0)) + | |
| theme_bw() + | |
| theme(panel.border = element_blank(), | |
| panel.grid.major = element_blank(), | |
| panel.grid.minor = element_blank(), | |
| axis.text.x = element_blank(), | |
| axis.text.y = element_blank(), | |
| axis.title.x = element_blank(), | |
| axis.line.x = element_blank(), | |
| axis.ticks.x=element_blank(), | |
| axis.line = element_line(colour = "black")) + | |
| ylab(label = "State Probabilty") | |
| if (!show.legend) { | |
| p <- p + theme(legend.position = "none") | |
| } | |
| return(p) | |
| } | |
| GetCoverage <- function( | |
| object, | |
| region, | |
| group.by, | |
| window = 500, | |
| extend.upstream = 0, | |
| extend.downstream = 0, | |
| idents = NULL, | |
| max.downsample = 3000, | |
| downsample.rate = 0.1 | |
| ) { | |
| cells <- colnames(x = object) | |
| assay <- DefaultAssay(object = object) | |
| if (!is.null(x = group.by)) { | |
| Idents(object = object) <- group.by | |
| } | |
| if (!is.null(x = idents)) { | |
| ident.cells <- WhichCells(object = object, idents = idents) | |
| cells <- intersect(x = cells, y = ident.cells) | |
| } | |
| region <- Signac:::FindRegion( | |
| object = object, | |
| region = region, | |
| assay = assay | |
| ) | |
| reads.per.group <- AverageCounts( | |
| object = object, | |
| group.by = group.by, | |
| verbose = FALSE | |
| ) | |
| cells.per.group <- CellsPerGroup( | |
| object = object, | |
| group.by = group.by | |
| ) | |
| cutmat <- Signac:::CutMatrix( | |
| object = object, | |
| region = region, | |
| assay = assay, | |
| cells = cells, | |
| verbose = FALSE | |
| ) | |
| colnames(cutmat) <- start(x = region):end(x = region) | |
| group.scale.factors <- suppressWarnings(reads.per.group * cells.per.group) | |
| scale.factor <- median(x = group.scale.factors) | |
| obj.groups <- Signac:::GetGroups( | |
| object = object, | |
| group.by = group.by, | |
| idents = idents | |
| ) | |
| window.size <- width(x = region) | |
| levels.use <- levels(x = obj.groups) | |
| coverages <- Signac:::ApplyMatrixByGroup( | |
| mat = cutmat, | |
| fun = colSums, | |
| groups = obj.groups, | |
| group.scale.factors = group.scale.factors, | |
| scale.factor = scale.factor, | |
| normalize = TRUE | |
| ) | |
| if (!is.na(x = window)) { | |
| coverages <- group_by(.data = coverages, group) | |
| coverages <- mutate(.data = coverages, coverage = roll_sum( | |
| x = norm.value, n = window, fill = NA, align = "center" | |
| )) | |
| coverages <- ungroup(x = coverages) | |
| } else { | |
| coverages$coverage <- coverages$norm.value | |
| } | |
| chromosome <- as.character(x = seqnames(x = region)) | |
| start.pos <- start(x = region) | |
| end.pos <- end(x = region) | |
| coverages <- coverages[!is.na(x = coverages$coverage), ] | |
| coverages <- group_by(.data = coverages, group) | |
| sampling <- min(max.downsample, window.size * downsample.rate) | |
| coverages <- slice_sample(.data = coverages, n = sampling) | |
| # restore factor levels | |
| if (!is.null(x = levels.use)) { | |
| colors_all <- hue_pal()(length(x = levels.use)) | |
| names(x = colors_all) <- levels.use | |
| coverages$group <- factor(x = coverages$group, levels = levels.use) | |
| } | |
| coverages | |
| } | |
| } |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment