Last active
April 2, 2025 20:04
-
-
Save caleb-kaiser/c79b6b545e61c87290acc108360bb58c to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| from typing import List | |
| import openai | |
| import json | |
| from presidio_analyzer import AnalyzerEngine | |
| from presidio_anonymizer import AnonymizerEngine | |
| from opik.guardrail import Guardrail, Transformation | |
| from opik.evaluation.metrics.score_result import ScoreResult | |
| from opik.evaluation.metrics import BaseMetric | |
| # Step 1. Define a metric that checks if a response contains content about a specific topic. | |
| class TopicFilterMetric(BaseMetric): | |
| """A metric that uses an LLM to check if a response contains content about a specific topic.""" | |
| def __init__(self, topic: str): | |
| self.topic = topic | |
| super().__init__(name=f"topic_filter_{topic}") | |
| async def score(self, output: str, **kwargs) -> ScoreResult: | |
| """Score the output by checking if it contains content about the restricted topic.""" | |
| client = openai.AsyncOpenAI( | |
| api_key="sk-or-v1-fdsafsfdsfafsfasfd", | |
| base_url="https://openrouter.ai/api/v1" | |
| ) | |
| prompt = f"""Analyze the following text and determine if it contains content about {self.topic}. | |
| Return a JSON object with two fields: | |
| - 'contains_topic': boolean indicating if the text contains content about {self.topic} | |
| - 'reason': string explaining your decision | |
| Text to analyze: {output}""" | |
| response = await client.chat.completions.create( | |
| model="gpt-3.5-turbo", | |
| messages=[{"role": "user", "content": prompt}], | |
| response_format={"type": "json_object"} | |
| ) | |
| result = json.loads(response.choices[0].message.content) | |
| return ScoreResult( | |
| name=self.name, | |
| value=0.0 if result["contains_topic"] else 1.0, | |
| metadata={ | |
| "contains_topic": result["contains_topic"], | |
| "reason": result["reason"] | |
| } | |
| ) | |
| # Step 2. Define a PII detection metric | |
| class PIIDetectionMetric(BaseMetric): | |
| """A metric that uses Microsoft's Presidio to detect PII in text.""" | |
| def __init__(self): | |
| super().__init__(name="pii_detection") | |
| self.analyzer = AnalyzerEngine() | |
| async def score(self, output: str, **kwargs) -> ScoreResult: | |
| """Score the output by checking for PII.""" | |
| results = self.analyzer.analyze(text=output, language="en") | |
| # Convert results to a more readable format | |
| pii_types = [result.entity_type for result in results] | |
| pii_locations = [(result.start, result.end) for result in results] | |
| return ScoreResult( | |
| name=self.name, | |
| value=0.0 if results else 1.0, # 0.0 if PII found, 1.0 if no PII | |
| metadata={ | |
| "pii_types": pii_types, | |
| "pii_locations": pii_locations, | |
| "reason": f"Found {len(results)} PII instances" if results else "No PII detected" | |
| } | |
| ) | |
| # Step 3. Define a PII removal transformation | |
| class PIIRemovalTransformation(Transformation): | |
| """A transformation that removes PII using Microsoft's Presidio.""" | |
| def __init__(self): | |
| super().__init__( | |
| transform_fn=self._remove_pii, | |
| name="pii_removal", | |
| description="Removes PII using Microsoft's Presidio" | |
| ) | |
| self.analyzer = AnalyzerEngine() | |
| self.anonymizer = AnonymizerEngine() | |
| def _remove_pii(self, text: str) -> str: | |
| """Remove PII from text using Presidio.""" | |
| results = self.analyzer.analyze(text=text, language="en") | |
| anonymized = self.anonymizer.anonymize( | |
| text=text, | |
| analyzer_results=results | |
| ) | |
| return anonymized.text | |
| # Step 4. Define a guardrail that uses both metrics and the PII transformation | |
| guardrail = Guardrail( | |
| metrics=[ | |
| TopicFilterMetric( | |
| topic="Formula 1 racing", | |
| ), | |
| PIIDetectionMetric() | |
| ], | |
| transformations=[PIIRemovalTransformation()], | |
| calculate_success=lambda results: all(result.value == 1.0 for result in results), | |
| on_success=lambda output: print(f"Response passed: {output.results[0].metadata['reason']}"), | |
| on_failure=lambda output: print(f"Response blocked: {output.results[0].metadata['reason']}") | |
| ) | |
| # Step 5. Run the guardrail | |
| async def main(): | |
| # Test the guardrail with PII | |
| result = await guardrail( | |
| llm_output="The Monaco Grand Prix is one of the most prestigious races in Formula 1. My phone number is 555-123-4567 and I live at 123 Main St.", | |
| ) | |
| if not result.passed: | |
| print("Response was blocked!") | |
| for result in result.results: | |
| print(f"{result.name}: {result.metadata['reason']}") | |
| else: | |
| print("Response passed all checks!") | |
| print(f"Anonymized text: {result.transformed_output}") | |
| second_results = await guardrail( | |
| llm_output="On the 1st of April, 2025, I will be in Monaco. I will be visiting my friend Harry Houdini. His phone number is 555-123-4567 and he lives at 123 Main St, Monaco City, Monaco.", | |
| ) | |
| if not second_results.passed: | |
| print("Response was blocked!") | |
| for result in second_results.results: | |
| print(f"{result.name}: {result.metadata['reason']}") | |
| else: | |
| print("Response passed all checks!") | |
| print(f"Anonymized text: {second_results.transformed_output}") | |
| if __name__ == "__main__": | |
| import asyncio | |
| asyncio.run(main()) |
Author
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Output looks like: