Skip to content

Instantly share code, notes, and snippets.

@thiagobutignon
Created January 13, 2026 05:27
Show Gist options
  • Select an option

  • Save thiagobutignon/82610979fdc00f06b83d0114f77bcf88 to your computer and use it in GitHub Desktop.

Select an option

Save thiagobutignon/82610979fdc00f06b83d0114f77bcf88 to your computer and use it in GitHub Desktop.
privacy_budget_manager_lab.py
import opendp.prelude as dp
dp.enable_features("contrib")
import pandas as pd
import numpy as np
import statistics
# =============================================================================
# DATASET SETUP
# =============================================================================
# Create sample customer dataset
customers = pd.DataFrame({
'customer_id': range(1, 21),
'age': [25, 34, 45, 29, 52, 38, 41, 33, 47, 36, 28, 31, 42, 26, 39, 44, 30, 35, 48, 27],
'purchase_amount': [150, 320, 890, 200, 450, 275, 680, 180, 520, 340,
210, 380, 560, 190, 410, 620, 240, 290, 470, 160]
})
print("=" * 60)
print("LAB: PRIVACY BUDGET MANAGEMENT & BUSINESS IMPACT")
print("=" * 60)
print(f"Dataset size: {len(customers)} records")
print()
# =============================================================================
# PART 1: PRIVACY BUDGET MANAGER CLASS
# =============================================================================
class PrivacyBudgetManager:
def __init__(self, total_budget):
self.total_budget = total_budget
self.consumed_budget = 0.0
@property
def remaining_budget(self):
return self.total_budget - self.consumed_budget
def check_budget(self, cost):
"""Checks if there is enough budget for a query with the given cost."""
return cost <= self.remaining_budget
def consume_budget(self, cost):
"""Consumes budget if available, otherwise raises an error."""
if not self.check_budget(cost):
raise ValueError(f"Insufficient privacy budget. Required: {cost}, Remaining: {self.remaining_budget:.4f}")
self.consumed_budget += cost
print(f"Budget update: Consumed {cost:.2f}, Remaining {self.remaining_budget:.2f}")
# Initialize manager
initial_epsilon = 5.0
budget_manager = PrivacyBudgetManager(total_budget=initial_epsilon)
print(f"Initialized Privacy Budget Manager with Total Budget: {budget_manager.total_budget}")
print()
# =============================================================================
# PART 2: OPENDP PRIVATE COUNT FUNCTION
# =============================================================================
def private_count(data, epsilon, sensitivity=1):
"""
Implements differentially private count using OpenDP library.
"""
true_count = len(data)
# OpenDP implementation
space = dp.atom_domain(T=int), dp.absolute_distance(T=int)
scale = sensitivity / epsilon
laplace_mechanism = space >> dp.m.then_laplace(scale=scale)
private_result = laplace_mechanism(int(true_count))
return max(0, private_result)
# =============================================================================
# PART 3: BUSINESS IMPACT ASSESSMENT (Activity 3)
# =============================================================================
print("=" * 60)
print("ACTIVITY 3: BUSINESS IMPACT ASSESSMENT")
print("=" * 60)
test_epsilons = [0.1, 1.0, 5.0]
iterations = 20
true_value = len(customers)
print(f"Analyzing count query (True Value: {true_value}) across different epsilon levels...")
print("-" * 60)
print(f"{'Epsilon':<10} | {'Mean Result':<15} | {'Std Dev':<15} | {'Avg Error':<15}")
print("-" * 60)
for eps in test_epsilons:
results = []
# Check if we have budget for this experiment
# For simulation purposes, we won't deduct from the main manager to allow running all tests,
# or we could, but let's assume this is a theoretical assessment phase.
# However, to demonstrate the manager, let's try to "deduct" a small cost for the whole batch
# or just log it. Let's strictly follow the lab instructions which imply assessing impact.
for _ in range(iterations):
try:
# We treat this loop as "simulation" and don't permanently drain the main budget
# to ensure we can complete the impact assessment.
# In a real system, you'd deplete budget.
val = private_count(customers, eps)
results.append(val)
except Exception as e:
print(f"Error: {e}")
mean_res = statistics.mean(results)
std_dev = statistics.stdev(results)
avg_error = statistics.mean([abs(r - true_value) for r in results])
print(f"{eps:<10.1f} | {mean_res:<15.2f} | {std_dev:<15.2f} | {avg_error:<15.2f}")
print("-" * 60)
print("\nRecommendation:")
print("-> Low Epsilon (0.1): High privacy, but high noise. Unsuitable for precise counts.")
print("-> High Epsilon (5.0): Low privacy, very accurate. Good for internal dashboards.")
print("-> Balanced (1.0): Reasonable trade-off for external reporting.")
print()
# =============================================================================
# DEMO: USING THE MANAGER
# =============================================================================
print("=" * 60)
print("DEMO: BUDGET MANAGER ENFORCEMENT")
print("=" * 60)
# Example queries
queries = [
("Query 1", 1.5),
("Query 2", 2.0),
("Query 3", 2.0) # This should fail given initial budget of 5.0 (1.5 + 2.0 + 2.0 = 5.5 > 5.0)
]
for q_name, q_cost in queries:
print(f"Attempting {q_name} (Cost: {q_cost})...")
try:
if budget_manager.check_budget(q_cost):
# perform query
res = private_count(customers, epsilon=q_cost)
budget_manager.consume_budget(q_cost)
print(f" Success! Result: {res}")
else:
print(f" BLOCKED: Insufficient budget for {q_name}")
except Exception as e:
print(f" Error: {e}")
print()
print(f"Final Remaining Budget: {budget_manager.remaining_budget:.2f}")
print("=" * 60)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment