Skip to content

Instantly share code, notes, and snippets.

@3outeille
3outeille / oom_frontier_models.py
Created March 4, 2026 11:19
Reproduce the ~0.5 OOM/year (and actual ~0.6-0.7 OOM/year) training compute trend using Epoch AI's public frontier models dataset.
"""
Reproduce the ~0.5 OOM/year (and actual ~0.6-0.7 OOM/year) training compute trend
using Epoch AI's public frontier models dataset.
Data source: https://epoch.ai/data/ai-models
License: Creative Commons Attribution (CC BY)
"""
import pandas as pd
import numpy as np
import torch
import triton
import triton.language as tl
def assert_is_matrix(x):
if x.ndim != 2:
raise ValueError(f'Expected 2-tensor but got {x.ndim}-tensor')
def assert_is_vector(x):
if x.ndim != 1:
import os
import torch
import torch.distributed as dist
import lovely_tensors as lt; lt.monkey_patch()
def split_tensor(data: torch.Tensor, dim: int) -> torch.Tensor:
rank = dist.get_rank()
world_size = dist.get_world_size()
@3outeille
3outeille / pipeline_parallel.py
Last active November 6, 2025 15:55
Self contained example of how pipeline parallel works (AFAB and 1F1B) in 200 LOC
#VERBOSE=0 torchrun --nproc_per_node 3 self_contained_pp_LOC.py
import os, random, numpy as np, torch, torch.nn as nn, torch.distributed as dist, torch.nn.functional as F
from torch.optim import AdamW
from torch.utils.data import DataLoader, DistributedSampler
from datasets import load_dataset
from transformers import AutoConfig, AutoModelForCausalLM, AutoTokenizer
STEP, local_rank, world_size, verbose = 0, int(os.environ["LOCAL_RANK"]), int(os.environ["WORLD_SIZE"]), os.environ.get("VERBOSE", "0") == "1"
def set_all_seed(seed):
@3outeille
3outeille / pipeline-model-parallel-visualization.ipynb
Created June 14, 2024 19:58 — forked from sighingnow/pipeline-model-parallel-visualization.ipynb
Visualizing various different pipeline model parallel scheduling algorithms: GPipe, Pipedream(1F1B), Pipedream-2BW(async, no-flushes), and eager-1F1B
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
@3outeille
3outeille / pipeline-model-parallel-visualization.ipynb
Created June 14, 2024 19:58 — forked from sighingnow/pipeline-model-parallel-visualization.ipynb
Visualizing various different pipeline model parallel scheduling algorithms: GPipe, Pipedream(1F1B), Pipedream-2BW(async, no-flushes), and eager-1F1B
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
@3outeille
3outeille / test_hf.py
Last active December 11, 2023 11:48
RuntimeError: a leaf Variable that requires grad is being used in an in-place operation.
import torch
from torch.nn import functional as F
from torch import distributed as dist
import os
import numpy as np
import random
def set_random_seed(seed: int):
torch.manual_seed(seed)
if torch.cuda.is_available():
from copy import deepcopy
import torch
from datasets import load_dataset
from torch.optim import SGD
from torch.utils.data import DataLoader
from transformers import AutoModelForCausalLM, AutoTokenizer
import random
import os
import numpy as np
@3outeille
3outeille / full_cpu.py
Last active May 11, 2023 09:37
rwvk perplexity measure
import torch
from tqdm import tqdm
from transformers import AutoModelForCausalLM, AutoTokenizer
from datasets import load_dataset
# Model
device = "cpu"
device_map = {
@3outeille
3outeille / README.md
Last active April 14, 2023 13:55
Triton Matmul Group-ordering vs Row-major ordering