Created
August 13, 2025 17:32
-
-
Save hweller1/156455d97d68c5b8958f1758f8eecf7d to your computer and use it in GitHub Desktop.
Simple script to normalize new vectors or update existing array of floats or binData vectors in MongoDB
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| import math | |
| from pymongo import MongoClient, UpdateOne | |
| from bson.binary import Binary | |
| def normalize(vector): | |
| """Normalize a vector to unit length.""" | |
| magnitude = math.sqrt(sum(x**2 for x in vector)) | |
| return [x / magnitude for x in vector] if magnitude > 0 else vector | |
| def normalize_vectors(vectors=None, db_uri="mongodb://localhost:27017", db_name="vectors", collection_name="data", batch_size=1000): | |
| """Normalize vectors from list or MongoDB collection with bulk operations.""" | |
| client = MongoClient(db_uri) | |
| collection = client[db_name][collection_name] | |
| if vectors: | |
| results = [{"vector": v, "normalized": normalize(v)} for v in vectors] | |
| collection.insert_many(results) | |
| return results | |
| cursor = collection.find({"normalized": {"$exists": False}}, batch_size=batch_size) | |
| updates = [] | |
| for doc in cursor: | |
| if "vector" in doc: | |
| vector_data = doc["vector"] | |
| if isinstance(vector_data, Binary): | |
| vector_array = vector_data.as_vector() | |
| normalized_array = normalize(vector_array) | |
| normalized_data = Binary.from_vector(normalized_array) | |
| else: | |
| normalized_array = normalize(vector_data) | |
| normalized_data = normalized_array | |
| updates.append(UpdateOne({"_id": doc["_id"]}, {"$set": {"normalized": normalized_data}})) | |
| if len(updates) >= batch_size: | |
| collection.bulk_write(updates) | |
| updates = [] | |
| if updates: | |
| collection.bulk_write(updates) | |
| return collection.count_documents({"normalized": {"$exists": True}}) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment