Skip to content

Instantly share code, notes, and snippets.

@hweller1
Created August 13, 2025 17:32
Show Gist options
  • Select an option

  • Save hweller1/156455d97d68c5b8958f1758f8eecf7d to your computer and use it in GitHub Desktop.

Select an option

Save hweller1/156455d97d68c5b8958f1758f8eecf7d to your computer and use it in GitHub Desktop.
Simple script to normalize new vectors or update existing array of floats or binData vectors in MongoDB
import math
from pymongo import MongoClient, UpdateOne
from bson.binary import Binary
def normalize(vector):
"""Normalize a vector to unit length."""
magnitude = math.sqrt(sum(x**2 for x in vector))
return [x / magnitude for x in vector] if magnitude > 0 else vector
def normalize_vectors(vectors=None, db_uri="mongodb://localhost:27017", db_name="vectors", collection_name="data", batch_size=1000):
"""Normalize vectors from list or MongoDB collection with bulk operations."""
client = MongoClient(db_uri)
collection = client[db_name][collection_name]
if vectors:
results = [{"vector": v, "normalized": normalize(v)} for v in vectors]
collection.insert_many(results)
return results
cursor = collection.find({"normalized": {"$exists": False}}, batch_size=batch_size)
updates = []
for doc in cursor:
if "vector" in doc:
vector_data = doc["vector"]
if isinstance(vector_data, Binary):
vector_array = vector_data.as_vector()
normalized_array = normalize(vector_array)
normalized_data = Binary.from_vector(normalized_array)
else:
normalized_array = normalize(vector_data)
normalized_data = normalized_array
updates.append(UpdateOne({"_id": doc["_id"]}, {"$set": {"normalized": normalized_data}}))
if len(updates) >= batch_size:
collection.bulk_write(updates)
updates = []
if updates:
collection.bulk_write(updates)
return collection.count_documents({"normalized": {"$exists": True}})
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment