Skip to content

Instantly share code, notes, and snippets.

Show Gist options
  • Select an option

  • Save hweller1/4d4e3e3f49bf519d88e137e092236a1d to your computer and use it in GitHub Desktop.

Select an option

Save hweller1/4d4e3e3f49bf519d88e137e092236a1d to your computer and use it in GitHub Desktop.
Filtered two stage vector search using pgvector
`CREATE OR REPLACE FUNCTION two_stage_vector_search_optimized_prefiltered(
query_embedding vector,
match_limit int,
num_candidates int,
filter_category text DEFAULT NULL::text,
max_price double precision DEFAULT NULL::double precision
)
RETURNS TABLE (
id int8,
title text,
average_rating float4,
description text,
category text,
price float4,
embedding vector,
distance float
)
LANGUAGE plpgsql
AS $$
BEGIN
SET LOCAL hnsw.ef_search = 1000;
SET LOCAL hnsw.iterative_scan = strict_order;
RETURN QUERY
-- Apply filters first, then do vector search
WITH filtered_products AS (
SELECT
p._id,
p.title,
p.average_rating,
p.description,
p.category,
p.price,
p.embedding
FROM
public.amazon_ecommerce_jina_512 p
WHERE
(p.category = 'Pet Supplies') AND (p.price <= 1000)
),
-- Then get candidates using binary vectors
binary_candidates AS (
SELECT
fp._id,
fp.title,
fp.average_rating,
fp.description,
fp.category,
fp.price,
fp.embedding
FROM
filtered_products fp
ORDER BY
(binary_quantize(fp.embedding))::bit(512) <~> (binary_quantize(query_embedding))::bit(512)
LIMIT num_candidates
)
-- Finally rerank with full fidelity vectors
SELECT
bc._id,
bc.title,
bc.average_rating,
bc.description,
bc.category,
bc.price,
bc.embedding,
bc.embedding <=> query_embedding AS distance
FROM
binary_candidates bc
ORDER BY
distance
LIMIT match_limit;
END;
$$;
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment