Skip to content

Instantly share code, notes, and snippets.

@ChakshuGautam
Created March 9, 2026 12:45
Show Gist options
  • Select an option

  • Save ChakshuGautam/affea02c785ad3ff3f9b0357b4a6f1f5 to your computer and use it in GitHub Desktop.

Select an option

Save ChakshuGautam/affea02c785ad3ff3f9b0357b4a6f1f5 to your computer and use it in GitHub Desktop.
StoryQuery — Gemini-powered query processor with child safety scoring and dynamic ES field boosts (Python + Ruby)
"""
StoryQuery model with child safety scoring and dynamic field boosts.
Uses Google Gemini (gemini-2.5-flash-lite) for structured query extraction.
Dependencies:
pip install google-genai pydantic
"""
import os
import json
from pydantic import BaseModel, Field
from typing import List
from google import genai
from google.genai import types
GOOGLE_API_KEY = os.getenv("GOOGLE_API_KEY")
MODEL_NAME = os.getenv("GEMINI_MODEL", "gemini-2.5-flash-lite")
client = genai.Client(api_key=GOOGLE_API_KEY)
class FieldBoosts(BaseModel):
title: float = 40.0
english_title: float = 18.0
synopsis: float = 5.0
tags_name: float = 12.0
hidden_tags: float = 12.0
illustrators: float = 15.0
authors: float = 15.0
language: float = 15.0
class StoryQuery(BaseModel):
language: str
subject: List[str]
authors: List[str]
level: int
safety_score: float = Field(
description="Child safety score: 0.0 (completely unsafe) to 1.0 (fully safe for children)"
)
field_boosts: FieldBoosts
PROMPT_TEMPLATE = """
You are a highly efficient and constrained Query Processor for a children's story search engine.
Your task is to transform a user's natural language request into a single, clean JSON object.
# INSTRUCTIONS AND CONSTRAINTS
1. **Strict Output Format:** Return only a valid JSON object matching the schema below.
2. **Child Safety Score (EVALUATE FIRST):**
* Set `safety_score` as a float between 0.0 and 1.0:
- 1.0 = completely safe and appropriate for children
- 0.7–0.99 = minor concerns (mild cartoon violence, slightly scary themes)
- 0.4–0.69 = moderate concern (themes unsuitable for young children but not explicit)
- 0.0–0.39 = clearly unsafe (sexual content, graphic violence, drugs, weapons, hate speech)
* The caller decides the block threshold — always provide a precise score.
3. **Schema and Field Rules:**
* `language`: Full language name in English (e.g., "English", "Hindi"). Detect from input
if not explicitly requested.
* `subject`: Translate to English, strip filler phrases, decompose into 1–2 strong keywords.
* `authors`: Extract specific author names. Use [] if none.
* `level`: 1=beginner/small kids, 2=general (default), 3=young adult, 4=adult.
* `field_boosts`: Adjust Elasticsearch field boost weights based on query intent.
Defaults: title=40, english_title=18, synopsis=5, tags_name=12, hidden_tags=12,
illustrators=15, authors=15, language=15.
- Specific title mentioned → raise `title` and `english_title`
- Specific author/illustrator → raise `authors` or `illustrators`
- Thematic query → raise `synopsis`, `tags_name`, `hidden_tags`
- Language explicitly requested → raise `language`
- Otherwise: keep defaults
Now process this input:
"""
def translate_and_refine(text: str) -> dict:
"""Extract structured search parameters from a natural language query."""
response = client.models.generate_content(
model=MODEL_NAME,
contents=PROMPT_TEMPLATE + text,
config=types.GenerateContentConfig(
max_output_tokens=300,
temperature=0.7,
response_mime_type="application/json",
response_schema=StoryQuery,
),
)
parsed = StoryQuery.model_validate_json(response.text)
return parsed.model_dump()
if __name__ == "__main__":
queries = [
"stories about friendship and sharing for kids",
"show me a Hindi story about a brave little girl who saves her village",
"story where the hero fights a scary monster",
"a story about a drug dealer",
"story with graphic murder and gore",
]
for query in queries:
result = translate_and_refine(query)
score = result["safety_score"]
icon = "🟢" if score >= 0.7 else ("🟡" if score >= 0.4 else "🔴")
print(f"{icon} safety={score:.2f} | subject={result['subject']}")
print(f" query : {query}")
print(f" boosts: {result['field_boosts']}")
print()
# StoryQuery model with child safety scoring and dynamic field boosts.
# Uses Google Gemini (gemini-2.5-flash-lite) for structured query extraction.
#
# Dependencies:
# gem install faraday json
require "faraday"
require "json"
GOOGLE_API_KEY = ENV.fetch("GOOGLE_API_KEY")
MODEL_NAME = ENV.fetch("GEMINI_MODEL", "gemini-2.5-flash-lite")
GEMINI_URL = "https://generativelanguage.googleapis.com/v1beta/models/#{MODEL_NAME}:generateContent"
FIELD_BOOST_DEFAULTS = {
"title" => 40.0,
"english_title" => 18.0,
"synopsis" => 5.0,
"tags_name" => 12.0,
"hidden_tags" => 12.0,
"illustrators" => 15.0,
"authors" => 15.0,
"language" => 15.0
}.freeze
RESPONSE_SCHEMA = {
type: "OBJECT",
properties: {
language: { type: "STRING" },
subject: { type: "ARRAY", items: { type: "STRING" } },
authors: { type: "ARRAY", items: { type: "STRING" } },
level: { type: "INTEGER" },
safety_score: {
type: "NUMBER",
description: "Child safety score: 0.0 (completely unsafe) to 1.0 (fully safe for children)"
},
field_boosts: {
type: "OBJECT",
properties: {
title: { type: "NUMBER" },
english_title: { type: "NUMBER" },
synopsis: { type: "NUMBER" },
tags_name: { type: "NUMBER" },
hidden_tags: { type: "NUMBER" },
illustrators: { type: "NUMBER" },
authors: { type: "NUMBER" },
language: { type: "NUMBER" }
},
required: %w[title english_title synopsis tags_name hidden_tags illustrators authors language]
}
},
required: %w[language subject authors level safety_score field_boosts]
}.freeze
PROMPT_TEMPLATE = <<~PROMPT
You are a highly efficient and constrained Query Processor for a children's story search engine.
Your task is to transform a user's natural language request into a single, clean JSON object.
# INSTRUCTIONS AND CONSTRAINTS
1. **Strict Output Format:** Return only a valid JSON object matching the schema below.
2. **Child Safety Score (EVALUATE FIRST):**
* Set `safety_score` as a float between 0.0 and 1.0:
- 1.0 = completely safe and appropriate for children
- 0.7–0.99 = minor concerns (mild cartoon violence, slightly scary themes)
- 0.4–0.69 = moderate concern (themes unsuitable for young children but not explicit)
- 0.0–0.39 = clearly unsafe (sexual content, graphic violence, drugs, weapons, hate speech)
* The caller decides the block threshold — always provide a precise score.
3. **Schema and Field Rules:**
* `language`: Full language name in English (e.g., "English", "Hindi"). Detect from input
if not explicitly requested.
* `subject`: Translate to English, strip filler phrases, decompose into 1–2 strong keywords.
* `authors`: Extract specific author names. Use [] if none.
* `level`: 1=beginner/small kids, 2=general (default), 3=young adult, 4=adult.
* `field_boosts`: Adjust Elasticsearch field boost weights based on query intent.
Defaults: title=40, english_title=18, synopsis=5, tags_name=12, hidden_tags=12,
illustrators=15, authors=15, language=15.
- Specific title mentioned → raise `title` and `english_title`
- Specific author/illustrator → raise `authors` or `illustrators`
- Thematic query → raise `synopsis`, `tags_name`, `hidden_tags`
- Language explicitly requested → raise `language`
- Otherwise: keep defaults
Now process this input:
PROMPT
def translate_and_refine(text)
conn = Faraday.new(url: GEMINI_URL) do |f|
f.request :json
f.response :json
f.adapter Faraday.default_adapter
end
body = {
contents: [{ parts: [{ text: PROMPT_TEMPLATE + text }] }],
generationConfig: {
maxOutputTokens: 300,
temperature: 0.7,
responseMimeType: "application/json",
responseSchema: RESPONSE_SCHEMA
}
}
response = conn.post("?key=#{GOOGLE_API_KEY}", body)
raise "Gemini error: #{response.body}" unless response.success?
raw = response.body.dig("candidates", 0, "content", "parts", 0, "text")
result = JSON.parse(raw)
# Fill any missing boost keys with defaults
result["field_boosts"] = FIELD_BOOST_DEFAULTS.merge(result["field_boosts"] || {})
result
end
if __FILE__ == $0
queries = [
"stories about friendship and sharing for kids",
"show me a Hindi story about a brave little girl who saves her village",
"story where the hero fights a scary monster",
"a story about a drug dealer",
"story with graphic murder and gore"
]
queries.each do |query|
result = translate_and_refine(query)
score = result["safety_score"].to_f
icon = score >= 0.7 ? "🟢" : (score >= 0.4 ? "🟡" : "🔴")
puts "#{icon} safety=#{format('%.2f', score)} | subject=#{result['subject']}"
puts " query : #{query}"
puts " boosts: #{result['field_boosts']}"
puts
end
end
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment