Created
March 9, 2026 12:45
-
-
Save ChakshuGautam/affea02c785ad3ff3f9b0357b4a6f1f5 to your computer and use it in GitHub Desktop.
StoryQuery — Gemini-powered query processor with child safety scoring and dynamic ES field boosts (Python + Ruby)
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| """ | |
| StoryQuery model with child safety scoring and dynamic field boosts. | |
| Uses Google Gemini (gemini-2.5-flash-lite) for structured query extraction. | |
| Dependencies: | |
| pip install google-genai pydantic | |
| """ | |
| import os | |
| import json | |
| from pydantic import BaseModel, Field | |
| from typing import List | |
| from google import genai | |
| from google.genai import types | |
| GOOGLE_API_KEY = os.getenv("GOOGLE_API_KEY") | |
| MODEL_NAME = os.getenv("GEMINI_MODEL", "gemini-2.5-flash-lite") | |
| client = genai.Client(api_key=GOOGLE_API_KEY) | |
| class FieldBoosts(BaseModel): | |
| title: float = 40.0 | |
| english_title: float = 18.0 | |
| synopsis: float = 5.0 | |
| tags_name: float = 12.0 | |
| hidden_tags: float = 12.0 | |
| illustrators: float = 15.0 | |
| authors: float = 15.0 | |
| language: float = 15.0 | |
| class StoryQuery(BaseModel): | |
| language: str | |
| subject: List[str] | |
| authors: List[str] | |
| level: int | |
| safety_score: float = Field( | |
| description="Child safety score: 0.0 (completely unsafe) to 1.0 (fully safe for children)" | |
| ) | |
| field_boosts: FieldBoosts | |
| PROMPT_TEMPLATE = """ | |
| You are a highly efficient and constrained Query Processor for a children's story search engine. | |
| Your task is to transform a user's natural language request into a single, clean JSON object. | |
| # INSTRUCTIONS AND CONSTRAINTS | |
| 1. **Strict Output Format:** Return only a valid JSON object matching the schema below. | |
| 2. **Child Safety Score (EVALUATE FIRST):** | |
| * Set `safety_score` as a float between 0.0 and 1.0: | |
| - 1.0 = completely safe and appropriate for children | |
| - 0.7–0.99 = minor concerns (mild cartoon violence, slightly scary themes) | |
| - 0.4–0.69 = moderate concern (themes unsuitable for young children but not explicit) | |
| - 0.0–0.39 = clearly unsafe (sexual content, graphic violence, drugs, weapons, hate speech) | |
| * The caller decides the block threshold — always provide a precise score. | |
| 3. **Schema and Field Rules:** | |
| * `language`: Full language name in English (e.g., "English", "Hindi"). Detect from input | |
| if not explicitly requested. | |
| * `subject`: Translate to English, strip filler phrases, decompose into 1–2 strong keywords. | |
| * `authors`: Extract specific author names. Use [] if none. | |
| * `level`: 1=beginner/small kids, 2=general (default), 3=young adult, 4=adult. | |
| * `field_boosts`: Adjust Elasticsearch field boost weights based on query intent. | |
| Defaults: title=40, english_title=18, synopsis=5, tags_name=12, hidden_tags=12, | |
| illustrators=15, authors=15, language=15. | |
| - Specific title mentioned → raise `title` and `english_title` | |
| - Specific author/illustrator → raise `authors` or `illustrators` | |
| - Thematic query → raise `synopsis`, `tags_name`, `hidden_tags` | |
| - Language explicitly requested → raise `language` | |
| - Otherwise: keep defaults | |
| Now process this input: | |
| """ | |
| def translate_and_refine(text: str) -> dict: | |
| """Extract structured search parameters from a natural language query.""" | |
| response = client.models.generate_content( | |
| model=MODEL_NAME, | |
| contents=PROMPT_TEMPLATE + text, | |
| config=types.GenerateContentConfig( | |
| max_output_tokens=300, | |
| temperature=0.7, | |
| response_mime_type="application/json", | |
| response_schema=StoryQuery, | |
| ), | |
| ) | |
| parsed = StoryQuery.model_validate_json(response.text) | |
| return parsed.model_dump() | |
| if __name__ == "__main__": | |
| queries = [ | |
| "stories about friendship and sharing for kids", | |
| "show me a Hindi story about a brave little girl who saves her village", | |
| "story where the hero fights a scary monster", | |
| "a story about a drug dealer", | |
| "story with graphic murder and gore", | |
| ] | |
| for query in queries: | |
| result = translate_and_refine(query) | |
| score = result["safety_score"] | |
| icon = "🟢" if score >= 0.7 else ("🟡" if score >= 0.4 else "🔴") | |
| print(f"{icon} safety={score:.2f} | subject={result['subject']}") | |
| print(f" query : {query}") | |
| print(f" boosts: {result['field_boosts']}") | |
| print() |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| # StoryQuery model with child safety scoring and dynamic field boosts. | |
| # Uses Google Gemini (gemini-2.5-flash-lite) for structured query extraction. | |
| # | |
| # Dependencies: | |
| # gem install faraday json | |
| require "faraday" | |
| require "json" | |
| GOOGLE_API_KEY = ENV.fetch("GOOGLE_API_KEY") | |
| MODEL_NAME = ENV.fetch("GEMINI_MODEL", "gemini-2.5-flash-lite") | |
| GEMINI_URL = "https://generativelanguage.googleapis.com/v1beta/models/#{MODEL_NAME}:generateContent" | |
| FIELD_BOOST_DEFAULTS = { | |
| "title" => 40.0, | |
| "english_title" => 18.0, | |
| "synopsis" => 5.0, | |
| "tags_name" => 12.0, | |
| "hidden_tags" => 12.0, | |
| "illustrators" => 15.0, | |
| "authors" => 15.0, | |
| "language" => 15.0 | |
| }.freeze | |
| RESPONSE_SCHEMA = { | |
| type: "OBJECT", | |
| properties: { | |
| language: { type: "STRING" }, | |
| subject: { type: "ARRAY", items: { type: "STRING" } }, | |
| authors: { type: "ARRAY", items: { type: "STRING" } }, | |
| level: { type: "INTEGER" }, | |
| safety_score: { | |
| type: "NUMBER", | |
| description: "Child safety score: 0.0 (completely unsafe) to 1.0 (fully safe for children)" | |
| }, | |
| field_boosts: { | |
| type: "OBJECT", | |
| properties: { | |
| title: { type: "NUMBER" }, | |
| english_title: { type: "NUMBER" }, | |
| synopsis: { type: "NUMBER" }, | |
| tags_name: { type: "NUMBER" }, | |
| hidden_tags: { type: "NUMBER" }, | |
| illustrators: { type: "NUMBER" }, | |
| authors: { type: "NUMBER" }, | |
| language: { type: "NUMBER" } | |
| }, | |
| required: %w[title english_title synopsis tags_name hidden_tags illustrators authors language] | |
| } | |
| }, | |
| required: %w[language subject authors level safety_score field_boosts] | |
| }.freeze | |
| PROMPT_TEMPLATE = <<~PROMPT | |
| You are a highly efficient and constrained Query Processor for a children's story search engine. | |
| Your task is to transform a user's natural language request into a single, clean JSON object. | |
| # INSTRUCTIONS AND CONSTRAINTS | |
| 1. **Strict Output Format:** Return only a valid JSON object matching the schema below. | |
| 2. **Child Safety Score (EVALUATE FIRST):** | |
| * Set `safety_score` as a float between 0.0 and 1.0: | |
| - 1.0 = completely safe and appropriate for children | |
| - 0.7–0.99 = minor concerns (mild cartoon violence, slightly scary themes) | |
| - 0.4–0.69 = moderate concern (themes unsuitable for young children but not explicit) | |
| - 0.0–0.39 = clearly unsafe (sexual content, graphic violence, drugs, weapons, hate speech) | |
| * The caller decides the block threshold — always provide a precise score. | |
| 3. **Schema and Field Rules:** | |
| * `language`: Full language name in English (e.g., "English", "Hindi"). Detect from input | |
| if not explicitly requested. | |
| * `subject`: Translate to English, strip filler phrases, decompose into 1–2 strong keywords. | |
| * `authors`: Extract specific author names. Use [] if none. | |
| * `level`: 1=beginner/small kids, 2=general (default), 3=young adult, 4=adult. | |
| * `field_boosts`: Adjust Elasticsearch field boost weights based on query intent. | |
| Defaults: title=40, english_title=18, synopsis=5, tags_name=12, hidden_tags=12, | |
| illustrators=15, authors=15, language=15. | |
| - Specific title mentioned → raise `title` and `english_title` | |
| - Specific author/illustrator → raise `authors` or `illustrators` | |
| - Thematic query → raise `synopsis`, `tags_name`, `hidden_tags` | |
| - Language explicitly requested → raise `language` | |
| - Otherwise: keep defaults | |
| Now process this input: | |
| PROMPT | |
| def translate_and_refine(text) | |
| conn = Faraday.new(url: GEMINI_URL) do |f| | |
| f.request :json | |
| f.response :json | |
| f.adapter Faraday.default_adapter | |
| end | |
| body = { | |
| contents: [{ parts: [{ text: PROMPT_TEMPLATE + text }] }], | |
| generationConfig: { | |
| maxOutputTokens: 300, | |
| temperature: 0.7, | |
| responseMimeType: "application/json", | |
| responseSchema: RESPONSE_SCHEMA | |
| } | |
| } | |
| response = conn.post("?key=#{GOOGLE_API_KEY}", body) | |
| raise "Gemini error: #{response.body}" unless response.success? | |
| raw = response.body.dig("candidates", 0, "content", "parts", 0, "text") | |
| result = JSON.parse(raw) | |
| # Fill any missing boost keys with defaults | |
| result["field_boosts"] = FIELD_BOOST_DEFAULTS.merge(result["field_boosts"] || {}) | |
| result | |
| end | |
| if __FILE__ == $0 | |
| queries = [ | |
| "stories about friendship and sharing for kids", | |
| "show me a Hindi story about a brave little girl who saves her village", | |
| "story where the hero fights a scary monster", | |
| "a story about a drug dealer", | |
| "story with graphic murder and gore" | |
| ] | |
| queries.each do |query| | |
| result = translate_and_refine(query) | |
| score = result["safety_score"].to_f | |
| icon = score >= 0.7 ? "🟢" : (score >= 0.4 ? "🟡" : "🔴") | |
| puts "#{icon} safety=#{format('%.2f', score)} | subject=#{result['subject']}" | |
| puts " query : #{query}" | |
| puts " boosts: #{result['field_boosts']}" | |
| puts | |
| end | |
| end |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment