Created
May 18, 2025 13:49
-
-
Save DSamuelHodge/408d49935af2511a2ef929711f02bc0a to your computer and use it in GitHub Desktop.
Thermodynamics Analyzer: Analyzes language models through the lens of statistical thermodynamics. Calculates temperature (weight/gradient norm ratio), entropy (from singular values), energy (curvature), and derived metrics across layers. Identifies potential phase transitions using susceptibility (dG/dT), compressibility (dS/dF), and inter-laye…
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| import torch | |
| import numpy as np | |
| import pandas as pd | |
| from scipy.linalg import svdvals | |
| from transformers import AutoTokenizer, AutoModelForCausalLM | |
| from transformers import logging as transformers_logging | |
| import logging | |
| # ──────────────────── CONFIGURATION PARAMETERS ──────────────────────────────── | |
| MODEL_NAME = "Qwen/Qwen2.5-0.5B" | |
| PROMPT = "The future of AI will" | |
| USE_FIXED_THRESHOLDS = False # If False, use data-driven (z-score) thresholds | |
| SAVE_PATH = f"{MODEL_NAME.replace('/', '_')}_transformer_thermo_analysis.csv" | |
| # Threshold constants for phenomena detection | |
| LATENT_DT_THRESHOLD = 1e-2 | |
| LATENT_DE_THRESHOLD = 5 | |
| SUSCEPT_THRESHOLD = 1e-3 # Fixed threshold for susceptibility divergence | |
| COMPRESS_THRESHOLD = 0.3 # Fixed threshold for high compressibility | |
| Z_SUSCEPT_THRESHOLD = 3 # Z-score threshold (3-sigma) for susceptibility | |
| Z_COMPRESS_THRESHOLD = 2 # Z-score threshold (2-sigma) for compressibility | |
| # ──────────────────── HELPER FUNCTIONS ───────────────────────────────────────── | |
| def spectral_entropy(matrix: torch.Tensor) -> float: | |
| """Entropy of singular-value spectrum.""" | |
| try: | |
| s = torch.linalg.svdvals(matrix.cpu()) | |
| except RuntimeError: # CUDA fallback or rank-deficient | |
| return 0.0 | |
| s = s[s > 0] | |
| if s.numel() == 0: return 0.0 | |
| p = s / s.sum() | |
| return (-p * torch.log(p)).sum().item() | |
| def curvature(matrix: torch.Tensor) -> float: | |
| """Proxy for 'energy' = Σσ² / Σσ.""" | |
| s = svdvals(matrix.detach().cpu().numpy()) | |
| if s.sum() == 0: return 0.0 | |
| return (s**2).sum() / (s.sum() + 1e-8) | |
| def central_diff(arr: np.ndarray): | |
| """Central difference between neighbors.""" | |
| return np.concatenate(([np.nan], (arr[2:] - arr[:-2]) / 2, [np.nan])) | |
| def z_score(x): | |
| """Compute z-score (standardization).""" | |
| return (x - x.mean()) / x.std() | |
| # ──────────────────── 1. LOAD MODEL & TAKE ONE BACKWARD PASS ──────────────────── | |
| transformers_logging.set_verbosity_error() | |
| logging.getLogger("transformers").setLevel(logging.ERROR) | |
| model = AutoModelForCausalLM.from_pretrained( | |
| MODEL_NAME, | |
| torch_dtype=torch.float32, | |
| local_files_only=False | |
| ) | |
| tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME) | |
| model.eval() | |
| print(f"Analyzing {MODEL_NAME}, this will take a few mins.") | |
| input_ids = tokenizer(PROMPT, return_tensors="pt").input_ids | |
| loss = model(input_ids, labels=input_ids).loss | |
| loss.backward() | |
| # ──────────────────── 2. COLLECT PER-LAYER THERMODYNAMIC PRIMITIVES ───────────── | |
| records = [] | |
| for name, param in model.named_parameters(): | |
| if 'weight' in name and param.ndim == 2 and param.grad is not None: | |
| W, G = param.detach(), param.grad.detach() | |
| Wn, Gn = torch.norm(W).item(), torch.norm(G).item() | |
| T = Wn / (Gn + 1e-8) if Gn > 0 else float('inf') | |
| S = spectral_entropy(W) | |
| E = curvature(W) | |
| F = E - T * S # Helmholtz-style free energy | |
| records.append(dict( | |
| name=name, temperature=T, entropy=S, energy=E, | |
| free_energy=F, W_norm=Wn, G_norm=Gn)) | |
| df = pd.DataFrame(records).sort_values('name').reset_index(drop=True) | |
| # ──────────────────── 3. DERIVED THERMODYNAMIC-ANALOGUE METRICS ───────────────── | |
| df['dE'] = central_diff(df['energy'].values) | |
| df['dT'] = central_diff(df['temperature'].values) | |
| df['dG_norm'] = central_diff(df['G_norm'].values) | |
| df['dS'] = central_diff(df['entropy'].values) | |
| df['dF'] = central_diff(df['free_energy'].values) | |
| # Thermodynamic metrics | |
| df['specific_heat'] = df['dE'] / (df['dT'] + 1e-6) | |
| df['susceptibility'] = df['dG_norm'] / (df['dT'] + 1e-6) | |
| df['compressibility'] = -df['dS'] / (df['dF'] + 1e-6) | |
| # ──────────────────── 4. PHENOMENA DETECTION ───────────────────────────────── | |
| # Always use this rule for latent heat and negative specific heat | |
| df['latent_heat_jump'] = (df['dT'].abs() < LATENT_DT_THRESHOLD) & (df['dE'].abs() > LATENT_DE_THRESHOLD) | |
| df['neg_specific_heat'] = df['specific_heat'] < 0 | |
| # Use either fixed or z-score thresholds based on configuration | |
| if USE_FIXED_THRESHOLDS: | |
| df['suscept_divergent'] = df['susceptibility'].abs() > SUSCEPT_THRESHOLD | |
| df['high_compress'] = df['compressibility'].abs() > COMPRESS_THRESHOLD | |
| else: | |
| df['suscept_divergent'] = z_score(df['susceptibility']).abs() > Z_SUSCEPT_THRESHOLD | |
| df['high_compress'] = z_score(df['compressibility']).abs() > Z_COMPRESS_THRESHOLD | |
| # ──────────────────── 5. SAVE & QUICK-LOOK SUMMARIES ─────────────────────────── | |
| df.to_csv(SAVE_PATH, index=False) | |
| print(f"Enhanced metrics written → {SAVE_PATH}\n") | |
| print("Thermodynamic Phenomena Summary:") | |
| df |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment