Created
January 17, 2026 13:37
-
-
Save kkoomen/3858650cc339ae59f806ae5db9011944 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| import numpy as np | |
| import matplotlib.pyplot as plt | |
| from huggingface_hub import hf_hub_download | |
| from sae import SAE | |
| from utils import SAEDataset | |
| from sae_naming import compute_similarities | |
| MODEL_PATH = hf_hub_download( | |
| repo_id="WolodjaZ/MSAE", | |
| filename=f"ViT-L_14/not_centered/24576_768_TopKReLU_64_RW_False_False_0.0_cc3m_ViT-L~14_train_image_2905936_768.pth" | |
| ) | |
| model = SAE(MODEL_PATH) | |
| VOCAB_EMB_PATH = "data/laion_unigram_ViT-L~14_-1_text_37445_768.npy" | |
| vocab_dataset = SAEDataset(VOCAB_EMB_PATH, mean_center=False, target_norm=0.0) | |
| similarity_matrix = compute_similarities( | |
| model, | |
| vocab_dataset, | |
| patch_diff=True, | |
| batch_size=1024, | |
| num_workers=9 | |
| ) # shape: (vocab_size, num_sae_neurons) | |
| plt.figure(figsize=(6, 4)) | |
| plt.hist(similarity_matrix.max(axis=1), bins=50, alpha=0.7, edgecolor='white') | |
| plt.xlabel("Similarity") | |
| plt.ylabel("Number of concepts") | |
| plt.title("Max Similarity Distribution with bias for MSAE (RW)") | |
| plt.grid(True, alpha=0.3) | |
| plt.show() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment