Rafal Wojdyla ravwojdyla

## pq.py
#!/usr/bin/env -S uv run --script
# /// script
# dependencies = [
#   "pyarrow",
#   "fsspec",
#   "click",
#   "s3fs",
#   "gcsfs",
# ]
# ///

## normcore-llm.md

      
              1 file
            
          
              340 forks
            
          
                67 comments
              
            
              3508 stars
            
          
                veekaybee
                / normcore-llm.md
            
            
              Last active
              March 10, 2026 18:08
            
              
                Normcore LLM Reads
              
          
    Anti-hype LLM reading list

Goals: Add links that are reasonable and good explanations of how stuff works. No hype and no vendor content if possible. Practical first-hand accounts of models in prod eagerly sought.
Foundational Concepts


Pre-Transformer Models


## 1-pw_op_fusion.py
import torch
import torch._inductor.config
import time

torch._inductor.config.triton.cudagraphs = False
torch.set_float32_matmul_precision('high')

def bench(f, name=None, iters=100, warmup=5, display=True, profile=False):
    for _ in range(warmup):
        f()

## kaplan_meier_for_revenue.py
from matplotlib import pyplot
import random
import time

pyplot.style.use("ggplot")
now = time.time()

def generate_user(censor=now):
    # Pick some point in time the user was created
    t_created = t = now - random.random() * 1e7

## 2021_approvals.R
library("tidyverse")
library("sparklyr")
library("sparklyr.nested")
library("cowplot")
library("ggsci")

#Spark config
config <- spark_config()

# Allowing to GCP datasets access

## ds-training.md

      
              1 file
            
          
              23 forks
            
          
                11 comments
              
            
              118 stars
            
          
                hadley
                / ds-training.md
            
            
              Created
              March 13, 2015 18:49
            
              
                My advise on what you need to do to become a data scientist...
              
          
If you were to give recommendations to your "little brother/sister" on things
that they need to do to become a data scientist, what would those things be?

I think the "Data Science Venn Diagram" (http://drewconway.com/zia/2013/3/26/the-data-science-venn-diagram) is a great place to start. You need three things to be a good data scientist:

Statistical knowledge
Programming/hacking skills
Domain expertise

Statistical knowledge


## luigi_time_tasks_example.py
import luigi
import time

class TimeTaskMixin(object):
    '''
    A mixin that when added to a luigi task, will print out
    the tasks execution time to standard out, when the task is
    finished
    '''
    @luigi.Task.event_handler(luigi.Event.PROCESSING_TIME)

## Presto_REST_API.md

      
              1 file
            
          
              3 forks
            
          
                0 comments
              
            
              8 stars
            
          
                electrum
                / Presto_REST_API.md
            
            
              Last active
              January 23, 2016 00:02
                — forked from martint/gist:7343947
            
              
                Presto REST API
              
          
    See https://github.com/facebook/presto/wiki/HTTP-Protocol

  
## .bashrc
_ssh_auth_save() {
    ln -sf "$SSH_AUTH_SOCK" "$HOME/.ssh/ssh-auth-sock.$HOSTNAME"
}
alias screen='_ssh_auth_save ; export HOSTNAME=$(hostname) ; screen'
alias tmux='_ssh_auth_save ; export HOSTNAME=$(hostname) ; tmux'

## README
# Intro

extremely simple and unsophisticated cross process data sharing

supports one read-write master process and an arbitrary number of read-only processes

please consider using pickle/cPickle/ctype to store complex data

# References
	#!/usr/bin/env -S uv run --script
	# /// script
	# dependencies = [
	# "pyarrow",
	# "fsspec",
	# "click",
	# "s3fs",
	# "gcsfs",
	# ]
	# ///
	import torch
	import torch._inductor.config
	import time

	torch._inductor.config.triton.cudagraphs = False
	torch.set_float32_matmul_precision('high')

	def bench(f, name=None, iters=100, warmup=5, display=True, profile=False):
	for _ in range(warmup):
	f()
	from matplotlib import pyplot
	import random
	import time

	pyplot.style.use("ggplot")
	now = time.time()

	def generate_user(censor=now):
	# Pick some point in time the user was created
	t_created = t = now - random.random() * 1e7
	library("tidyverse")
	library("sparklyr")
	library("sparklyr.nested")
	library("cowplot")
	library("ggsci")

	#Spark config
	config <- spark_config()

	# Allowing to GCP datasets access
	import luigi
	import time

	class TimeTaskMixin(object):
	'''
	A mixin that when added to a luigi task, will print out
	the tasks execution time to standard out, when the task is
	finished
	'''
	@luigi.Task.event_handler(luigi.Event.PROCESSING_TIME)
	_ssh_auth_save() {
	ln -sf "$SSH_AUTH_SOCK" "$HOME/.ssh/ssh-auth-sock.$HOSTNAME"
	}
	alias screen='_ssh_auth_save ; export HOSTNAME=$(hostname) ; screen'
	alias tmux='_ssh_auth_save ; export HOSTNAME=$(hostname) ; tmux'
	# Intro

	extremely simple and unsophisticated cross process data sharing

	supports one read-write master process and an arbitrary number of read-only processes

	please consider using pickle/cPickle/ctype to store complex data

	# References