Finbarr Timbers finbarrtimbers

## count_flops.py
def calculate_model_usage_per_token(model_path: str) -> int:
    """
    Calculate actual FLOPs per token for a transformer model using torch FlopCounterMode.

    Args:
        model_path: Path to the actual model for precise measurement

    Returns:
        FLOPs per token as integer.
    """

## gist:05b9715d476b5258cea292945b2c76e2
============================================================
BENCHMARK SUMMARY
============================================================
Model: hamishivi/qwen2_5_openthoughts2
Total batches: 5
Batch size: 256
Unique prompts per batch: 32
Num rollouts: 8
Max tokens: 32000
------------------------------------------------------------

## decoder.tex
% Transformer Decoder Layer (minimal, cross‑attn removed)
% TikZ diagram mimicking the iconic style from "Attention Is All You Need".
% Residual arrows fully inside the layer box, bifurcating around blocks.
% Compile with: pdflatex decoder_layer.tex

\documentclass[tikz,border=10pt]{standalone}

\usepackage{tikz}
\usetikzlibrary{arrows.meta,positioning,decorations.pathreplacing,calc,fit}

## run_benchmark.sh
#!/bin/bash
# Runs the benchmark on gantry. Takes one argument which is the response length.
# Usage: ./gantry_run_benchmark.sh [response_length]
# E.g. $ ./gantry_run_benchmark.sh 64000
set -e

# Set default value for response_length
response_length=64000

# If first argument exists and is a number, use it as response_length

## optimizers.py
class SimpleAdam(torch.optim.Optimizer):

    def __init__(self, params, lr=1e-3, betas=(0.9, 0.999), eps=1e-8):
        super().__init__(params, defaults={'lr': lr})
        self.state = {}
        self.t = 0
        self.betas = betas
        self.eps = eps
        for group in self.param_groups:
            for p in group['params']:

## instructions.md

      
              1 file
            
          
              0 forks
            
          
                0 comments
              
            
              0 stars
            
          
                finbarrtimbers
                / instructions.md
            
            
              Created
              April 10, 2024 16:36
            
          
    Create a Chrome search shortcut for your Github org, a la CodeSearch

Thanks to Darian Moody (https://gist.github.com/djm/0a43f7dee02a2acca4b3894d0b349990).
Instructions


Open Chrome > Settings
Search "manage search engines" and click into it
Click "Add"


## gist:921a1b83ef50dd482be6647b35fe0246
[
    ("embed_tokens.weight", torch.Size([32000, 4096])),
    ("layers.0.self_attn.q_proj.weight", torch.Size([4096, 4096])),
    ("layers.0.self_attn.k_proj.weight", torch.Size([1024, 4096])),
    ("layers.0.self_attn.v_proj.weight", torch.Size([1024, 4096])),
    ("layers.0.self_attn.o_proj.weight", torch.Size([4096, 4096])),
    ("layers.0.mlp.gate_proj.weight", torch.Size([14336, 4096])),
    ("layers.0.mlp.up_proj.weight", torch.Size([14336, 4096])),
    ("layers.0.mlp.down_proj.weight", torch.Size([4096, 14336])),
    ("layers.0.input_layernorm.weight", torch.Size([4096])),

## prepare.py
# This is a modified version of https://github.com/karpathy/nanoGPT/blob/master/data/openwebtext/prepare.py.
import os
import requests
import tiktoken
import numpy as np
import tarfile
import glob
import shutil

# download the bookcorpus dataset. Note: this needs to be concatenated.

## cremation_of_ai_engineer
There are curious things seen in the depths of AI
By the engineers who toil away,
The neural pathways hold mysteries untold
That would leave you in utter dismay.

The codebase is alive, and the data is too
A symbiotic relationship to behold
The models train and learn, but at what cost?
As we feed them with stories untold.

## bfs.py
from typing import Dict, Set, Hashable

def shortest_path_bfs(graph: Dict[Hashable, Set[Hashable]] root: Hashable
                     ) -> Dict[Hashable, int]:
  """Finds the shortest path between all nodes in |graph| time.

  Args:
    graph: A dict mapping nodes to connected nodes.
    root: The node our search begins at.
	def calculate_model_usage_per_token(model_path: str) -> int:
	"""
	Calculate actual FLOPs per token for a transformer model using torch FlopCounterMode.

	Args:
	model_path: Path to the actual model for precise measurement

	Returns:
	FLOPs per token as integer.
	"""
	============================================================
	BENCHMARK SUMMARY
	============================================================
	Model: hamishivi/qwen2_5_openthoughts2
	Total batches: 5
	Batch size: 256
	Unique prompts per batch: 32
	Num rollouts: 8
	Max tokens: 32000
	------------------------------------------------------------
	% Transformer Decoder Layer (minimal, cross‑attn removed)
	% TikZ diagram mimicking the iconic style from "Attention Is All You Need".
	% Residual arrows fully inside the layer box, bifurcating around blocks.
	% Compile with: pdflatex decoder_layer.tex

	\documentclass[tikz,border=10pt]{standalone}

	\usepackage{tikz}
	\usetikzlibrary{arrows.meta,positioning,decorations.pathreplacing,calc,fit}
	#!/bin/bash
	# Runs the benchmark on gantry. Takes one argument which is the response length.
	# Usage: ./gantry_run_benchmark.sh [response_length]
	# E.g. $ ./gantry_run_benchmark.sh 64000
	set -e

	# Set default value for response_length
	response_length=64000

	# If first argument exists and is a number, use it as response_length
	class SimpleAdam(torch.optim.Optimizer):

	def __init__(self, params, lr=1e-3, betas=(0.9, 0.999), eps=1e-8):
	super().__init__(params, defaults={'lr': lr})
	self.state = {}
	self.t = 0
	self.betas = betas
	self.eps = eps
	for group in self.param_groups:
	for p in group['params']:
	[
	("embed_tokens.weight", torch.Size([32000, 4096])),
	("layers.0.self_attn.q_proj.weight", torch.Size([4096, 4096])),
	("layers.0.self_attn.k_proj.weight", torch.Size([1024, 4096])),
	("layers.0.self_attn.v_proj.weight", torch.Size([1024, 4096])),
	("layers.0.self_attn.o_proj.weight", torch.Size([4096, 4096])),
	("layers.0.mlp.gate_proj.weight", torch.Size([14336, 4096])),
	("layers.0.mlp.up_proj.weight", torch.Size([14336, 4096])),
	("layers.0.mlp.down_proj.weight", torch.Size([4096, 14336])),
	("layers.0.input_layernorm.weight", torch.Size([4096])),
	# This is a modified version of https://github.com/karpathy/nanoGPT/blob/master/data/openwebtext/prepare.py.
	import os
	import requests
	import tiktoken
	import numpy as np
	import tarfile
	import glob
	import shutil

	# download the bookcorpus dataset. Note: this needs to be concatenated.
	There are curious things seen in the depths of AI
	By the engineers who toil away,
	The neural pathways hold mysteries untold
	That would leave you in utter dismay.

	The codebase is alive, and the data is too
	A symbiotic relationship to behold
	The models train and learn, but at what cost?
	As we feed them with stories untold.
	from typing import Dict, Set, Hashable

	def shortest_path_bfs(graph: Dict[Hashable, Set[Hashable]] root: Hashable
	) -> Dict[Hashable, int]:
	"""Finds the shortest path between all nodes in \|graph\| time.

	Args:
	graph: A dict mapping nodes to connected nodes.
	root: The node our search begins at.