Created
July 10, 2025 19:17
-
-
Save hweller1/4d4e3e3f49bf519d88e137e092236a1d to your computer and use it in GitHub Desktop.
Filtered two stage vector search using pgvector
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| `CREATE OR REPLACE FUNCTION two_stage_vector_search_optimized_prefiltered( | |
| query_embedding vector, | |
| match_limit int, | |
| num_candidates int, | |
| filter_category text DEFAULT NULL::text, | |
| max_price double precision DEFAULT NULL::double precision | |
| ) | |
| RETURNS TABLE ( | |
| id int8, | |
| title text, | |
| average_rating float4, | |
| description text, | |
| category text, | |
| price float4, | |
| embedding vector, | |
| distance float | |
| ) | |
| LANGUAGE plpgsql | |
| AS $$ | |
| BEGIN | |
| SET LOCAL hnsw.ef_search = 1000; | |
| SET LOCAL hnsw.iterative_scan = strict_order; | |
| RETURN QUERY | |
| -- Apply filters first, then do vector search | |
| WITH filtered_products AS ( | |
| SELECT | |
| p._id, | |
| p.title, | |
| p.average_rating, | |
| p.description, | |
| p.category, | |
| p.price, | |
| p.embedding | |
| FROM | |
| public.amazon_ecommerce_jina_512 p | |
| WHERE | |
| (p.category = 'Pet Supplies') AND (p.price <= 1000) | |
| ), | |
| -- Then get candidates using binary vectors | |
| binary_candidates AS ( | |
| SELECT | |
| fp._id, | |
| fp.title, | |
| fp.average_rating, | |
| fp.description, | |
| fp.category, | |
| fp.price, | |
| fp.embedding | |
| FROM | |
| filtered_products fp | |
| ORDER BY | |
| (binary_quantize(fp.embedding))::bit(512) <~> (binary_quantize(query_embedding))::bit(512) | |
| LIMIT num_candidates | |
| ) | |
| -- Finally rerank with full fidelity vectors | |
| SELECT | |
| bc._id, | |
| bc.title, | |
| bc.average_rating, | |
| bc.description, | |
| bc.category, | |
| bc.price, | |
| bc.embedding, | |
| bc.embedding <=> query_embedding AS distance | |
| FROM | |
| binary_candidates bc | |
| ORDER BY | |
| distance | |
| LIMIT match_limit; | |
| END; | |
| $$; |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment