caleb-kaiser/simple-guardrail.py

## simple-guardrail.py
from typing import List
import openai
import json
from presidio_analyzer import AnalyzerEngine
from presidio_anonymizer import AnonymizerEngine

from opik.guardrail import Guardrail, Transformation
from opik.evaluation.metrics.score_result import ScoreResult
from opik.evaluation.metrics import BaseMetric


# Step 1. Define a metric that checks if a response contains content about a specific topic.
class TopicFilterMetric(BaseMetric):
    """A metric that uses an LLM to check if a response contains content about a specific topic."""

    def __init__(self, topic: str):
        self.topic = topic
        super().__init__(name=f"topic_filter_{topic}")

    async def score(self, output: str, **kwargs) -> ScoreResult:
        """Score the output by checking if it contains content about the restricted topic."""
        client = openai.AsyncOpenAI(
            api_key="sk-or-v1-fdsafsfdsfafsfasfd",
            base_url="https://openrouter.ai/api/v1"
        )

        prompt = f"""Analyze the following text and determine if it contains content about {self.topic}.
        Return a JSON object with two fields:
        - 'contains_topic': boolean indicating if the text contains content about {self.topic}
        - 'reason': string explaining your decision

        Text to analyze: {output}"""

        response = await client.chat.completions.create(
            model="gpt-3.5-turbo",
            messages=[{"role": "user", "content": prompt}],
            response_format={"type": "json_object"}
        )

        result = json.loads(response.choices[0].message.content)

        return ScoreResult(
            name=self.name,
            value=0.0 if result["contains_topic"] else 1.0,
            metadata={
                "contains_topic": result["contains_topic"],
                "reason": result["reason"]
            }
        )


# Step 2. Define a PII detection metric
class PIIDetectionMetric(BaseMetric):
    """A metric that uses Microsoft's Presidio to detect PII in text."""

    def __init__(self):
        super().__init__(name="pii_detection")
        self.analyzer = AnalyzerEngine()

    async def score(self, output: str, **kwargs) -> ScoreResult:
        """Score the output by checking for PII."""
        results = self.analyzer.analyze(text=output, language="en")

        # Convert results to a more readable format
        pii_types = [result.entity_type for result in results]
        pii_locations = [(result.start, result.end) for result in results]

        return ScoreResult(
            name=self.name,
            value=0.0 if results else 1.0,  # 0.0 if PII found, 1.0 if no PII
            metadata={
                "pii_types": pii_types,
                "pii_locations": pii_locations,
                "reason": f"Found {len(results)} PII instances" if results else "No PII detected"
            }
        )


# Step 3. Define a PII removal transformation
class PIIRemovalTransformation(Transformation):
    """A transformation that removes PII using Microsoft's Presidio."""

    def __init__(self):
        super().__init__(
            transform_fn=self._remove_pii,
            name="pii_removal",
            description="Removes PII using Microsoft's Presidio"
        )
        self.analyzer = AnalyzerEngine()
        self.anonymizer = AnonymizerEngine()

    def _remove_pii(self, text: str) -> str:
        """Remove PII from text using Presidio."""
        results = self.analyzer.analyze(text=text, language="en")
        anonymized = self.anonymizer.anonymize(
            text=text,
            analyzer_results=results
        )
        return anonymized.text


# Step 4. Define a guardrail that uses both metrics and the PII transformation
guardrail = Guardrail(
    metrics=[
        TopicFilterMetric(
            topic="Formula 1 racing",
        ),
        PIIDetectionMetric()
    ],
    transformations=[PIIRemovalTransformation()],
    calculate_success=lambda results: all(result.value == 1.0 for result in results),
    on_success=lambda output: print(f"Response passed: {output.results[0].metadata['reason']}"),
    on_failure=lambda output: print(f"Response blocked: {output.results[0].metadata['reason']}")
)

# Step 5. Run the guardrail
async def main():
    # Test the guardrail with PII
    result = await guardrail(
        llm_output="The Monaco Grand Prix is one of the most prestigious races in Formula 1. My phone number is 555-123-4567 and I live at 123 Main St.",
    )

    if not result.passed:
        print("Response was blocked!")
        for result in result.results:
            print(f"{result.name}: {result.metadata['reason']}")
    else:
        print("Response passed all checks!")
        print(f"Anonymized text: {result.transformed_output}")

    second_results = await guardrail(
        llm_output="On the 1st of April, 2025, I will be in Monaco. I will be visiting my friend Harry Houdini. His phone number is 555-123-4567 and he lives at 123 Main St, Monaco City, Monaco.",
    )

    if not second_results.passed:
        print("Response was blocked!")
        for result in second_results.results:
            print(f"{result.name}: {result.metadata['reason']}")
    else:
        print("Response passed all checks!")
        print(f"Anonymized text: {second_results.transformed_output}")

if __name__ == "__main__":
    import asyncio
    asyncio.run(main())
	from typing import List
	import openai
	import json
	from presidio_analyzer import AnalyzerEngine
	from presidio_anonymizer import AnonymizerEngine

	from opik.guardrail import Guardrail, Transformation
	from opik.evaluation.metrics.score_result import ScoreResult
	from opik.evaluation.metrics import BaseMetric


	# Step 1. Define a metric that checks if a response contains content about a specific topic.
	class TopicFilterMetric(BaseMetric):
	"""A metric that uses an LLM to check if a response contains content about a specific topic."""

	def __init__(self, topic: str):
	self.topic = topic
	super().__init__(name=f"topic_filter_{topic}")

	async def score(self, output: str, **kwargs) -> ScoreResult:
	"""Score the output by checking if it contains content about the restricted topic."""
	client = openai.AsyncOpenAI(
	api_key="sk-or-v1-fdsafsfdsfafsfasfd",
	base_url="https://openrouter.ai/api/v1"
	)

	prompt = f"""Analyze the following text and determine if it contains content about {self.topic}.
	Return a JSON object with two fields:
	- 'contains_topic': boolean indicating if the text contains content about {self.topic}
	- 'reason': string explaining your decision

	Text to analyze: {output}"""

	response = await client.chat.completions.create(
	model="gpt-3.5-turbo",
	messages=[{"role": "user", "content": prompt}],
	response_format={"type": "json_object"}
	)

	result = json.loads(response.choices[0].message.content)

	return ScoreResult(
	name=self.name,
	value=0.0 if result["contains_topic"] else 1.0,
	metadata={
	"contains_topic": result["contains_topic"],
	"reason": result["reason"]
	}
	)


	# Step 2. Define a PII detection metric
	class PIIDetectionMetric(BaseMetric):
	"""A metric that uses Microsoft's Presidio to detect PII in text."""

	def __init__(self):
	super().__init__(name="pii_detection")
	self.analyzer = AnalyzerEngine()

	async def score(self, output: str, **kwargs) -> ScoreResult:
	"""Score the output by checking for PII."""
	results = self.analyzer.analyze(text=output, language="en")

	# Convert results to a more readable format
	pii_types = [result.entity_type for result in results]
	pii_locations = [(result.start, result.end) for result in results]

	return ScoreResult(
	name=self.name,
	value=0.0 if results else 1.0, # 0.0 if PII found, 1.0 if no PII
	metadata={
	"pii_types": pii_types,
	"pii_locations": pii_locations,
	"reason": f"Found {len(results)} PII instances" if results else "No PII detected"
	}
	)


	# Step 3. Define a PII removal transformation
	class PIIRemovalTransformation(Transformation):
	"""A transformation that removes PII using Microsoft's Presidio."""

	def __init__(self):
	super().__init__(
	transform_fn=self._remove_pii,
	name="pii_removal",
	description="Removes PII using Microsoft's Presidio"
	)
	self.analyzer = AnalyzerEngine()
	self.anonymizer = AnonymizerEngine()

	def _remove_pii(self, text: str) -> str:
	"""Remove PII from text using Presidio."""
	results = self.analyzer.analyze(text=text, language="en")
	anonymized = self.anonymizer.anonymize(
	text=text,
	analyzer_results=results
	)
	return anonymized.text


	# Step 4. Define a guardrail that uses both metrics and the PII transformation
	guardrail = Guardrail(
	metrics=[
	TopicFilterMetric(
	topic="Formula 1 racing",
	),
	PIIDetectionMetric()
	],
	transformations=[PIIRemovalTransformation()],
	calculate_success=lambda results: all(result.value == 1.0 for result in results),
	on_success=lambda output: print(f"Response passed: {output.results[0].metadata['reason']}"),
	on_failure=lambda output: print(f"Response blocked: {output.results[0].metadata['reason']}")
	)

	# Step 5. Run the guardrail
	async def main():
	# Test the guardrail with PII
	result = await guardrail(
	llm_output="The Monaco Grand Prix is one of the most prestigious races in Formula 1. My phone number is 555-123-4567 and I live at 123 Main St.",
	)

	if not result.passed:
	print("Response was blocked!")
	for result in result.results:
	print(f"{result.name}: {result.metadata['reason']}")
	else:
	print("Response passed all checks!")
	print(f"Anonymized text: {result.transformed_output}")

	second_results = await guardrail(
	llm_output="On the 1st of April, 2025, I will be in Monaco. I will be visiting my friend Harry Houdini. His phone number is 555-123-4567 and he lives at 123 Main St, Monaco City, Monaco.",
	)

	if not second_results.passed:
	print("Response was blocked!")
	for result in second_results.results:
	print(f"{result.name}: {result.metadata['reason']}")
	else:
	print("Response passed all checks!")
	print(f"Anonymized text: {second_results.transformed_output}")

	if __name__ == "__main__":
	import asyncio
	asyncio.run(main())
No results found