Skip to content

Instantly share code, notes, and snippets.

@karpanGit
karpanGit / PyMC compare treatment to control, individual measurements.py
Created October 31, 2025 18:33
PyMC compare treatment to control, individual measurements
# compare treatment vs control, individual animals
import numpy as np
import pymc as pm
import arviz as az
import matplotlib.pyplot as plt
# === REPLACE these with your 10 raw measurements per group ===
control = np.array([120, 118, 122, 121, 119, 117, 123, 116, 120, 119])
treatment = np.array([125, 128, 130, 127, 126, 124, 129, 125, 127, 126])
with pm.Model() as model:
@karpanGit
karpanGit / understand pandas groupby apply arguments group_keys and include_groups.py
Created July 29, 2025 09:55
understand pandas groupby/apply arguments group_keys and include_groups
import pandas as pd
df = pd.DataFrame(
{
"A": ["foo", "bar", "foo", "bar", "foo", "bar", "foo", "foo"],
"B": ["one", "one", "two", "three", "two", "two", "one", "three"],
"C": np.random.randn(8),
"D": np.random.randn(8),
}
)
@karpanGit
karpanGit / Pandas assign with lambda function with additional (default) arguments.py
Created July 27, 2025 14:10
Pandas assign with lambda function with additional (default) arguments
# simple dataframe with three columns
df = pd.DataFrame({'a': [1, 2, 3, 4, 5],
'b': [5, 4, 3, 2, 1],
'c': ['A', 'B', 'C', 'D', 'E']})
df.assign(**{col: lambda df_, col=col: df[col]*2 for col in ['a', 'b']})
# a b c
# 0 2 10 A
# 1 4 8 B
@karpanGit
karpanGit / indigo, R-group decomposition with user specified query.py
Created March 30, 2024 08:34
indigo, R-group decomposition with user specified query
# the query may be passed by the user directly
# prepare query scaffold (e.g. '(R1)C1CC(R3)CCC1(R2)')
# scaffold = indigo.loadQueryMoleculeFromFile(r"D:/tmp/query_mol.mol")
scaffold = indigo.loadQueryMolecule('C1%91CCC%92CC%931.[*:1]%91.[*:2]%93.[*:3]%92 |$;;;;;;_R1;_R2;_R3$|')
# init decomposition
deco = indigo.createDecomposer(scaffold)
# load molecule
# if Br was H it would not match, even with implicit hydrogen atoms
# hence need to repeat with multiple queries with R groups removed
mol = indigo.loadMolecule('NC1CC(Br)CCC1(O)') #
@karpanGit
karpanGit / indigo, substructure search with tautomers.py
Last active March 29, 2024 21:00
indigo, substructure search with tautomers
##### method 1, builtin highlighting
# experiment with substructure matching (when tautomers, show the scaffold as in the target)
indigo = Indigo()
renderer = IndigoRenderer(indigo)
indigo.setOption("render-output-format", "png")
smiles1 = 'CCC(O)=CCCCC'
mol1 = indigo.loadMolecule(smiles1)
smiles2 = 'CC(=O)CC'
mol2 = indigo.loadQueryMolecule(smiles2)
flag = 'TAU' # other flags 'RES', 'TAU', 'TAU INCHI', 'TAU RSMARTS'
@karpanGit
karpanGit / embeddings through sentencepiece or with PyTorch directly.py
Created November 20, 2023 06:44
embeddings through sentencepiece or with PyTorch directly
# for more details see
# https://huggingface.co/sentence-transformers/multi-qa-MiniLM-L6-cos-v1
# compute embeddings with sentencepiece
from sentence_transformers import SentenceTransformer, util
docs = ["Around 9 Million people live in London", "This is nice"]
#Load the model
model = SentenceTransformer('sentence-transformers/multi-qa-MiniLM-L6-cos-v1')
@karpanGit
karpanGit / pyspark, apply mapping.py
Last active February 7, 2023 06:00
pyspark, apply mapping
# map values in pyspark
import pyspark.sql.functions as F
from itertools import chain
data = [['a', 1], ['b', 2], ['a', 3], ['d', 4]]
data = spark.createDataFrame(data, schema=['name', 'val'])
data.show()
# create mapping column
mapping = {'a': 'hello a', 'b': 'hello b', 'c': 'hello c'}
@karpanGit
karpanGit / pyspark, local vs global views.py
Created May 3, 2022 19:18
pyspark, local vs global views
spark = (
SparkSession.builder
.appName('learn')
# .config('spark.sql.shuffle.partitions', 10)
# .config('spark.default.parallelism', 10)
# .config('spark.executor.memory', '1g')
# .config('spark.driver.memory', '1g')
# .config('spark.executor.instances', 1)
#.config('spark.executor.cores', 2)
.getOrCreate()
@karpanGit
karpanGit / pyspark, generate dataframe from dictionary with and without a schema.py
Created May 1, 2022 15:27
pyspark, generate dataframe from dictionary with and without a schema
# create dataframe from dictionary, without a schema
df = [{'one': 1, 'two': [1,2,3]}, {'one': 101}]
df = spark.createDataFrame(df)
df.printSchema()
# root
# |-- one: long (nullable = true)
# |-- two: array (nullable = true)
# | |-- element: long (containsNull = true)
df.show()
# |one| two|
@karpanGit
karpanGit / pyspark, create struct from columns.py
Created May 1, 2022 14:20
pyspark, create struct from columns
# simple example, create struct
import pyspark.sql.functions as F
df = [[1, 'mplah', 'gogo'], [2, 'mplah2', 'gogo2'], [3, 'mplah3', 'gogo3']]
df = spark.createDataFrame(df, schema=['x', 'y', 'z'])
res = df.select(F.col('x'), F.struct(F.col('x').alias('_x'), F.col('y').alias('_y')).alias('_xy'))
res.show()
# | x| _xy|
# +---+-----------+
# | 1| {1, mplah}|
# | 2|{2, mplah2}|