Skip to content

Instantly share code, notes, and snippets.

View vjcitn's full-sized avatar

Vince Carey vjcitn

View GitHub Profile
@vjcitn
vjcitn / dogencode.R
Created March 12, 2026 10:31
Explore GenomicState+txdbmaker functions for working with Gencode V49
library(GenomicState)
library(RSQLite)
library(txdbmaker)
hgenc49 = gencode_txdb(
version = "49",
genome = c("hg38"),
chrs = paste0("chr", c(seq_len(22), "X", "Y", "M"))
)
hgenc49$conn # 'path' is empty
md = dbGetQuery(hgenc49$conn, "select * from metadata") # lacks 'Resource URL' record in metadata
@vjcitn
vjcitn / condamessages.txt
Created February 21, 2026 16:40
tracking an OSTA build error
8/51 [install-miniconda]
trying URL 'https://github.com/conda-forge/miniforge/releases/latest/download/Miniforge3-Linux-x86_64.sh'
Content type 'application/octet-stream' length 105158150 bytes (100.3 MB)
==================================================
downloaded 100.3 MB
PREFIX=/root/.local/share/r-miniconda
Unpacking bootstrapper...
Unpacking payload...
Extracting _libgcc_mutex-0.1-conda_forge.tar.bz2
@vjcitn
vjcitn / gencodeError.txt
Last active January 9, 2026 13:42
traceback for using GENCODE in bioconductor; need CENTREannotation attached to avoid this
> query(ah, c("GENCODE", "human"))
AnnotationHub with 87 records
# snapshotDate(): 2025-10-29
# $dataprovider: GENCODE, Gencode, BroadInstitute, ENCODE
# $species: Homo sapiens
# $rdataclass: GRanges, list, TxDb, SQLiteConnection
# additional mcols(): taxonomyid, genome, description,
# coordinate_1_based, maintainer, rdatadateadded, preparerclass, tags,
# rdatapath, sourceurl, sourcetype
# retrieve records with, e.g., 'object[["AH49010"]]'
@vjcitn
vjcitn / bedarray.R
Last active November 13, 2025 18:33
DelayedArray back end for plink2 bed, before I learned about BEDMatrix
library(DelayedArray)
setClass("plBedSeed",
contains="Array",
slots = c(
dim = "integer",
bedreaderRef = "ANY",
dimnames = "list"
)
)
@vjcitn
vjcitn / make1zip.R
Last active November 10, 2025 22:09
create one zip file for a source package on windows
make1zip = function(pname, target="zips322") {
if (!dir.exists(target)) dir.create(target)
target = file.path(getwd(), target)
ins = try(BiocManager::install(pname, ask=FALSE, update=FALSE, type="binary",
dependencies=TRUE, force=TRUE)) # acquire dependencies as needed
if (inherits(ins, "try-error")) stop(sprintf("can't install %s with BiocManager", pname))
#real_inst = try(install.packages(pname, type="source", repos=NULL)) # this uses native windows build tools from Rtools
bpath = pkgbuild::build(pname)
if (inherits(bpath, "try-error")) stop(sprintf("can't build %s from source", pname))
install.packages(bpath, type="source", repos=NULL)
@vjcitn
vjcitn / getsrc322.R
Created November 10, 2025 16:45
set up source folders for bioc 3.22
library(BiocPkgTools)
all322 = biocPkgList(version="3.22", repo="BioCsoft")$Package
library(BiocBuildTools) # from github vjcitn
set322=PackageSet(all322, biocversion="3.22", branchname="RELEASE_3_22")
dir.create("srcs_322")
populate_local_gits(set322, gitspath="srcs_322")
@vjcitn
vjcitn / reas.py
Created November 6, 2025 10:31
a reasoning demonstration produced by perplexity.ai
from owlready2 import get_ontology, onto_path, sync_reasoner
def find_terms_by_keyword(ontology, keyword):
# Simple case: search class labels and names
results = []
for cls in ontology.classes():
label = cls.label.first() if hasattr(cls, "label") and cls.label else cls.name
if keyword.lower() in label.lower():
results.append(cls)
return results
@vjcitn
vjcitn / term_specificity.py
Created November 6, 2025 10:10
for a combination of string and ontology (importable via owlready2) compute term specificity
from owlready2 import get_ontology, onto_path
import numpy as np
def term_to_class(user_term, onto):
all_terms = list(onto.classes())
excluded_terms = get_excluded_terms(onto)
# Try to find by label first, then by name
found = None
for cls in all_terms:
if cls in excluded_terms:
@vjcitn
vjcitn / ibiplot.R
Created November 1, 2025 13:09
adjustable biplot for SummarizedExperiment
library(SummarizedExperiment)
library(irlba)
ibiplot = function(se, choices=1:2, nfeat=5, ircomp=6, ...) {
dat = t(assay(se))
pcs = prcomp_irlba(dat, ircomp)
rot = pcs$rotation
ss = rowSums(abs(rot[,choices])^2)
kp = order(ss, decreasing=TRUE)[seq_len(nfeat)]
labs = rep(" ", nrow(rot))
labs[kp] = rownames(se)[kp]
@vjcitn
vjcitn / bm2SE.R
Created October 9, 2025 09:06
use GDSArray to interact with bigmelon serialization as SummarizedExperiment
library(S4Vectors)
library(SummarizedExperiment)
library(GDSArray)
library(bigmelon)
data(melon)
tf = tempfile()
es2gds(melon, tf)
bm2SE = function(gdsf, elem="betas",
fdkeep=c("ProbeID_A", "ProbeID_B", "ILMNID", "NAME",