Skip to content

Instantly share code, notes, and snippets.

@chaokunyang
Last active July 18, 2025 14:43
Show Gist options
  • Select an option

  • Save chaokunyang/4cf60383ffbaf102e129394c156471ca to your computer and use it in GitHub Desktop.

Select an option

Save chaokunyang/4cf60383ffbaf102e129394c156471ca to your computer and use it in GitHub Desktop.
read_tensor_from_huggingface.py
#!/usr/bin/env python3
import os
import torch
import json
import sys
def read_tensor_with_safetensors(model_dir, tensor_name):
"""Read tensor using safetensors library"""
try:
from safetensors import safe_open
except ImportError:
print("safetensors library not installed. Install with: pip install safetensors")
return None
index_path = os.path.join(model_dir, "model.safetensors.index.json")
if not os.path.exists(index_path):
print(f"Index file not found: {index_path}")
return None
with open(index_path, 'r') as f:
index = json.load(f)
# Find which file contains our tensor
if tensor_name in index["weight_map"]:
file_name = index["weight_map"][tensor_name]
file_path = os.path.join(model_dir, file_name)
print(f"Tensor '{tensor_name}' is in file: {file_name}")
with safe_open(file_path, framework="pt", device="cpu") as f:
tensor = f.get_tensor(tensor_name)
print(f"Tensor shape: {tensor.shape}")
print(f"Tensor dtype: {tensor.dtype}")
print(f"Tensor device: {tensor.device}")
return tensor
else:
print(f"Tensor '{tensor_name}' not found in weight_map")
print("Available tensors (first 10):")
for i, name in enumerate(index["weight_map"].keys()):
if i < 10:
print(f" {name}")
return None
def read_tensor_with_transformers(model_dir, tensor_name):
"""Read tensor by loading the model with transformers"""
try:
from transformers import AutoModel
except ImportError:
print("transformers library not installed. Install with: pip install transformers")
return None
print("Loading model with transformers...")
try:
model = AutoModel.from_pretrained(model_dir, torch_dtype=torch.float16, device_map="cpu")
# Get the tensor from the model
tensor = None
for name, param in model.named_parameters():
if name == tensor_name:
tensor = param
break
if tensor is not None:
print(f"Tensor shape: {tensor.shape}")
print(f"Tensor dtype: {tensor.dtype}")
print(f"Tensor device: {tensor.device}")
return tensor
else:
print(f"Tensor '{tensor_name}' not found in model")
print("Available tensors (first 10):")
for i, (name, _) in enumerate(model.named_parameters()):
if i < 10:
print(f" {name}")
return None
except Exception as e:
print(f"Error loading model: {e}")
return None
def read_tensor_with_torch(model_dir, tensor_name):
"""Read tensor using torch.load (for .pt files)"""
# Look for .pt files in the directory
pt_files = [f for f in os.listdir(model_dir) if f.endswith('.pt')]
if pt_files:
print(f"Found .pt files: {pt_files}")
for pt_file in pt_files:
file_path = os.path.join(model_dir, pt_file)
try:
loaded = torch.load(file_path, map_location='cpu')
if isinstance(loaded, dict) and tensor_name in loaded:
tensor = loaded[tensor_name]
print(f"Found tensor in {pt_file}")
print(f"Tensor shape: {tensor.shape}")
print(f"Tensor dtype: {tensor.dtype}")
return tensor
except Exception as e:
print(f"Error loading {pt_file}: {e}")
print("No suitable .pt files found")
return None
def save_tensor_for_comparison(tensor, output_path):
"""Save tensor for comparison with your debug files"""
if tensor is not None:
# Save as clean tensor without module state
clean_tensor = tensor.detach().cpu().contiguous()
torch.save(clean_tensor, output_path)
print(f"Saved tensor to: {output_path}")
return output_path
return None
def list_available_tensors(model_dir):
"""List available tensors in the model directory"""
print(f"Listing available tensors in: {model_dir}")
# Try safetensors index
index_path = os.path.join(model_dir, "model.safetensors.index.json")
if os.path.exists(index_path):
with open(index_path, 'r') as f:
index = json.load(f)
print(f"Found {len(index['weight_map'])} tensors in safetensors index")
print("First 10 tensors:")
for i, name in enumerate(index["weight_map"].keys()):
if i < 10:
print(f" {name}")
return
# Try transformers
try:
from transformers import AutoModel
model = AutoModel.from_pretrained(model_dir, torch_dtype=torch.float16, device_map="cpu")
print(f"Found {len(list(model.named_parameters()))} tensors in model")
print("First 10 tensors:")
for i, (name, _) in enumerate(model.named_parameters()):
if i < 10:
print(f" {name}")
except Exception as e:
print(f"Could not load model with transformers: {e}")
if __name__ == "__main__":
if len(sys.argv) < 2:
print("Usage: python read_tensor.py <model_directory> [tensor_name]")
print("Example: python read_tensor.py /path/to/model model.layers.5.attention.query_key_value.weight")
sys.exit(1)
model_dir = sys.argv[1]
tensor_name = sys.argv[2] if len(sys.argv) > 2 else "model.layers.5.attention.query_key_value.weight"
if not os.path.exists(model_dir):
print(f"Model directory does not exist: {model_dir}")
sys.exit(1)
output_path = f"/tmp/hf_model_{tensor_name.replace('.', '_').replace('/', '_')}.pt"
print(f"Reading tensor: {tensor_name}")
print(f"From model directory: {model_dir}")
print("=" * 60)
# First, list available tensors
list_available_tensors(model_dir)
print("\n" + "=" * 60)
# Method 1: Using safetensors (recommended for safetensors format)
print("Method 1: Using safetensors library")
tensor = read_tensor_with_safetensors(model_dir, tensor_name)
if tensor is not None:
save_tensor_for_comparison(tensor, output_path)
print("\n" + "=" * 60)
# Method 2: Using transformers (slower but more comprehensive)
print("Method 2: Using transformers library")
if tensor is None:
tensor2 = read_tensor_with_transformers(model_dir, tensor_name)
if tensor2 is not None:
save_tensor_for_comparison(tensor2, output_path)
print("\n" + "=" * 60)
# Method 3: Using torch.load
print("Method 3: Using torch.load")
if tensor is None:
tensor3 = read_tensor_with_torch(model_dir, tensor_name)
if tensor3 is not None:
save_tensor_for_comparison(tensor3, output_path)
print(f"\nSaved tensor can be compared with your debug files:")
print(f" HF model tensor: {output_path}")
print(f" Your debug files: /tmp/reader_*_*_*.pt")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment