Skip to content

Instantly share code, notes, and snippets.

@kkoomen
Created January 17, 2026 13:37
Show Gist options
  • Select an option

  • Save kkoomen/3858650cc339ae59f806ae5db9011944 to your computer and use it in GitHub Desktop.

Select an option

Save kkoomen/3858650cc339ae59f806ae5db9011944 to your computer and use it in GitHub Desktop.
import numpy as np
import matplotlib.pyplot as plt
from huggingface_hub import hf_hub_download
from sae import SAE
from utils import SAEDataset
from sae_naming import compute_similarities
MODEL_PATH = hf_hub_download(
repo_id="WolodjaZ/MSAE",
filename=f"ViT-L_14/not_centered/24576_768_TopKReLU_64_RW_False_False_0.0_cc3m_ViT-L~14_train_image_2905936_768.pth"
)
model = SAE(MODEL_PATH)
VOCAB_EMB_PATH = "data/laion_unigram_ViT-L~14_-1_text_37445_768.npy"
vocab_dataset = SAEDataset(VOCAB_EMB_PATH, mean_center=False, target_norm=0.0)
similarity_matrix = compute_similarities(
model,
vocab_dataset,
patch_diff=True,
batch_size=1024,
num_workers=9
) # shape: (vocab_size, num_sae_neurons)
plt.figure(figsize=(6, 4))
plt.hist(similarity_matrix.max(axis=1), bins=50, alpha=0.7, edgecolor='white')
plt.xlabel("Similarity")
plt.ylabel("Number of concepts")
plt.title("Max Similarity Distribution with bias for MSAE (RW)")
plt.grid(True, alpha=0.3)
plt.show()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment