cory-weller/example_doublet_finder.R

## example_doublet_finder.R

#!/usr/bin/env Rscript
library(Seurat)
library(DoubletFinder)

## Standard pre-processing
seurat_obj <- CreateSeuratObject(kidney.data)
seurat_obj <- NormalizeData(seurat_obj)
seurat_obj <- FindVariableFeatures(seurat_obj, selection.method = "vst", nfeatures = 2000)

# optionally with ScaleData, include vars.to.regress='BatchNo' (or whatever batch var is named)
seurat_obj <- ScaleData(seurat_obj)
seurat_obj <- RunPCA(seurat_obj)
seurat_obj <- RunUMAP(seurat_obj, dims = 1:10)


## PC neighborhood size (pK) selection
sweep.res.list <- paramSweep_v3(seurat_obj, PCs = 1:10, sct = FALSE)
sweep.stats <- summarizeSweep(sweep.res.list, GT = FALSE)
bcmvn <- find.pK(sweep.stats)   # bimodality coefficient, mean-variance normalized

## Visualize bcmvn to find pK value that optimizes AUC
chosen_pK <- VALUEHERE
chosen_pN <- 0.25       # pN shouldn't influence results much

## Estimate doublet proportion
# 7.5% initial estimate
# can be tuned higher for more filtering, or lower for less
doublet_est <- 0.075
annotations <- seurat_obj@meta.data$ClusteringResults
homotypic.prop <- modelHomotypic(annotations)
nExp_poi <- round(doublet_est*nrow(seurat_obj@meta.data))
nExp_poi.adj <- round(nExp_poi*(1-homotypic.prop))


## Initial tuning run of DoubletFinder
# Done to estimate proportion of artificial nearest neighbors (pANN)
seurat_obj <- doubletFinder_V3(
    seurat_obj,
    PCs = 1:10,
    pN = 0.25,
    pK = chosen_pK,
    nExp = nExp_poi,
    reuse.pANN = FALSE,
    sct = FALSE
)

# Should generate pANN column within seurat_obj@meta.data
# get name of this column for second round of doublet detection

pANN_colname <- VALUHERE

## Final run of Doublet Finder using
seurat_obj <- doubletFinder_V3(
    seurat_obj,
    PCs = 1:10,
    pN = 0.25,
    pK = chosen_pK,
    nExp = nExp_poi.adj,
    reuse.pANN = pANN_colname,
    sct = FALSE
)

# within @meta.data a new column with a name including something like
# 'DF.classification' will indicate samples as 'Doublet' or 'Singlet'

	#!/usr/bin/env Rscript
	library(Seurat)
	library(DoubletFinder)

	## Standard pre-processing
	seurat_obj <- CreateSeuratObject(kidney.data)
	seurat_obj <- NormalizeData(seurat_obj)
	seurat_obj <- FindVariableFeatures(seurat_obj, selection.method = "vst", nfeatures = 2000)

	# optionally with ScaleData, include vars.to.regress='BatchNo' (or whatever batch var is named)
	seurat_obj <- ScaleData(seurat_obj)
	seurat_obj <- RunPCA(seurat_obj)
	seurat_obj <- RunUMAP(seurat_obj, dims = 1:10)


	## PC neighborhood size (pK) selection
	sweep.res.list <- paramSweep_v3(seurat_obj, PCs = 1:10, sct = FALSE)
	sweep.stats <- summarizeSweep(sweep.res.list, GT = FALSE)
	bcmvn <- find.pK(sweep.stats) # bimodality coefficient, mean-variance normalized

	## Visualize bcmvn to find pK value that optimizes AUC
	chosen_pK <- VALUEHERE
	chosen_pN <- 0.25 # pN shouldn't influence results much

	## Estimate doublet proportion
	# 7.5% initial estimate
	# can be tuned higher for more filtering, or lower for less
	doublet_est <- 0.075
	annotations <- seurat_obj@meta.data$ClusteringResults
	homotypic.prop <- modelHomotypic(annotations)
	nExp_poi <- round(doublet_est*nrow(seurat_obj@meta.data))
	nExp_poi.adj <- round(nExp_poi*(1-homotypic.prop))


	## Initial tuning run of DoubletFinder
	# Done to estimate proportion of artificial nearest neighbors (pANN)
	seurat_obj <- doubletFinder_V3(
	seurat_obj,
	PCs = 1:10,
	pN = 0.25,
	pK = chosen_pK,
	nExp = nExp_poi,
	reuse.pANN = FALSE,
	sct = FALSE
	)

	# Should generate pANN column within seurat_obj@meta.data
	# get name of this column for second round of doublet detection

	pANN_colname <- VALUHERE

	## Final run of Doublet Finder using
	seurat_obj <- doubletFinder_V3(
	seurat_obj,
	PCs = 1:10,
	pN = 0.25,
	pK = chosen_pK,
	nExp = nExp_poi.adj,
	reuse.pANN = pANN_colname,
	sct = FALSE
	)

	# within @meta.data a new column with a name including something like
	# 'DF.classification' will indicate samples as 'Doublet' or 'Singlet'
No results found