thiagobutignon/privacy_budget_manager_lab.py

## privacy_budget_manager_lab.py

import opendp.prelude as dp
dp.enable_features("contrib")
import pandas as pd
import numpy as np
import statistics

# =============================================================================
# DATASET SETUP
# =============================================================================

# Create sample customer dataset
customers = pd.DataFrame({
    'customer_id': range(1, 21),
    'age': [25, 34, 45, 29, 52, 38, 41, 33, 47, 36, 28, 31, 42, 26, 39, 44, 30, 35, 48, 27],
    'purchase_amount': [150, 320, 890, 200, 450, 275, 680, 180, 520, 340,
                        210, 380, 560, 190, 410, 620, 240, 290, 470, 160]
})

print("=" * 60)
print("LAB: PRIVACY BUDGET MANAGEMENT & BUSINESS IMPACT")
print("=" * 60)
print(f"Dataset size: {len(customers)} records")
print()

# =============================================================================
# PART 1: PRIVACY BUDGET MANAGER CLASS
# =============================================================================

class PrivacyBudgetManager:
    def __init__(self, total_budget):
        self.total_budget = total_budget
        self.consumed_budget = 0.0

    @property
    def remaining_budget(self):
        return self.total_budget - self.consumed_budget

    def check_budget(self, cost):
        """Checks if there is enough budget for a query with the given cost."""
        return cost <= self.remaining_budget

    def consume_budget(self, cost):
        """Consumes budget if available, otherwise raises an error."""
        if not self.check_budget(cost):
            raise ValueError(f"Insufficient privacy budget. Required: {cost}, Remaining: {self.remaining_budget:.4f}")
        self.consumed_budget += cost
        print(f"Budget update: Consumed {cost:.2f}, Remaining {self.remaining_budget:.2f}")

# Initialize manager
initial_epsilon = 5.0
budget_manager = PrivacyBudgetManager(total_budget=initial_epsilon)
print(f"Initialized Privacy Budget Manager with Total Budget: {budget_manager.total_budget}")
print()

# =============================================================================
# PART 2: OPENDP PRIVATE COUNT FUNCTION
# =============================================================================

def private_count(data, epsilon, sensitivity=1):
    """
    Implements differentially private count using OpenDP library.
    """
    true_count = len(data)

    # OpenDP implementation
    space = dp.atom_domain(T=int), dp.absolute_distance(T=int)
    scale = sensitivity / epsilon
    laplace_mechanism = space >> dp.m.then_laplace(scale=scale)

    private_result = laplace_mechanism(int(true_count))
    return max(0, private_result)

# =============================================================================
# PART 3: BUSINESS IMPACT ASSESSMENT (Activity 3)
# =============================================================================

print("=" * 60)
print("ACTIVITY 3: BUSINESS IMPACT ASSESSMENT")
print("=" * 60)

test_epsilons = [0.1, 1.0, 5.0]
iterations = 20
true_value = len(customers)

print(f"Analyzing count query (True Value: {true_value}) across different epsilon levels...")
print("-" * 60)
print(f"{'Epsilon':<10} | {'Mean Result':<15} | {'Std Dev':<15} | {'Avg Error':<15}")
print("-" * 60)

for eps in test_epsilons:
    results = []

    # Check if we have budget for this experiment
    # For simulation purposes, we won't deduct from the main manager to allow running all tests,
    # or we could, but let's assume this is a theoretical assessment phase.
    # However, to demonstrate the manager, let's try to "deduct" a small cost for the whole batch
    # or just log it. Let's strictly follow the lab instructions which imply assessing impact.

    for _ in range(iterations):
        try:
            # We treat this loop as "simulation" and don't permanently drain the main budget
            # to ensure we can complete the impact assessment.
            # In a real system, you'd deplete budget.
            val = private_count(customers, eps)
            results.append(val)
        except Exception as e:
            print(f"Error: {e}")

    mean_res = statistics.mean(results)
    std_dev = statistics.stdev(results)
    avg_error = statistics.mean([abs(r - true_value) for r in results])

    print(f"{eps:<10.1f} | {mean_res:<15.2f} | {std_dev:<15.2f} | {avg_error:<15.2f}")

print("-" * 60)
print("\nRecommendation:")
print("-> Low Epsilon (0.1): High privacy, but high noise. Unsuitable for precise counts.")
print("-> High Epsilon (5.0): Low privacy, very accurate. Good for internal dashboards.")
print("-> Balanced (1.0): Reasonable trade-off for external reporting.")
print()

# =============================================================================
# DEMO: USING THE MANAGER
# =============================================================================

print("=" * 60)
print("DEMO: BUDGET MANAGER ENFORCEMENT")
print("=" * 60)

# Example queries
queries = [
    ("Query 1", 1.5),
    ("Query 2", 2.0),
    ("Query 3", 2.0) # This should fail given initial budget of 5.0 (1.5 + 2.0 + 2.0 = 5.5 > 5.0)
]

for q_name, q_cost in queries:
    print(f"Attempting {q_name} (Cost: {q_cost})...")
    try:
        if budget_manager.check_budget(q_cost):
            # perform query
            res = private_count(customers, epsilon=q_cost)
            budget_manager.consume_budget(q_cost)
            print(f"  Success! Result: {res}")
        else:
            print(f"  BLOCKED: Insufficient budget for {q_name}")
    except Exception as e:
        print(f"  Error: {e}")
    print()

print(f"Final Remaining Budget: {budget_manager.remaining_budget:.2f}")
print("=" * 60)

	import opendp.prelude as dp
	dp.enable_features("contrib")
	import pandas as pd
	import numpy as np
	import statistics

	# =============================================================================
	# DATASET SETUP
	# =============================================================================

	# Create sample customer dataset
	customers = pd.DataFrame({
	'customer_id': range(1, 21),
	'age': [25, 34, 45, 29, 52, 38, 41, 33, 47, 36, 28, 31, 42, 26, 39, 44, 30, 35, 48, 27],
	'purchase_amount': [150, 320, 890, 200, 450, 275, 680, 180, 520, 340,
	210, 380, 560, 190, 410, 620, 240, 290, 470, 160]
	})

	print("=" * 60)
	print("LAB: PRIVACY BUDGET MANAGEMENT & BUSINESS IMPACT")
	print("=" * 60)
	print(f"Dataset size: {len(customers)} records")
	print()

	# =============================================================================
	# PART 1: PRIVACY BUDGET MANAGER CLASS
	# =============================================================================

	class PrivacyBudgetManager:
	def __init__(self, total_budget):
	self.total_budget = total_budget
	self.consumed_budget = 0.0

	@property
	def remaining_budget(self):
	return self.total_budget - self.consumed_budget

	def check_budget(self, cost):
	"""Checks if there is enough budget for a query with the given cost."""
	return cost <= self.remaining_budget

	def consume_budget(self, cost):
	"""Consumes budget if available, otherwise raises an error."""
	if not self.check_budget(cost):
	raise ValueError(f"Insufficient privacy budget. Required: {cost}, Remaining: {self.remaining_budget:.4f}")
	self.consumed_budget += cost
	print(f"Budget update: Consumed {cost:.2f}, Remaining {self.remaining_budget:.2f}")

	# Initialize manager
	initial_epsilon = 5.0
	budget_manager = PrivacyBudgetManager(total_budget=initial_epsilon)
	print(f"Initialized Privacy Budget Manager with Total Budget: {budget_manager.total_budget}")
	print()

	# =============================================================================
	# PART 2: OPENDP PRIVATE COUNT FUNCTION
	# =============================================================================

	def private_count(data, epsilon, sensitivity=1):
	"""
	Implements differentially private count using OpenDP library.
	"""
	true_count = len(data)

	# OpenDP implementation
	space = dp.atom_domain(T=int), dp.absolute_distance(T=int)
	scale = sensitivity / epsilon
	laplace_mechanism = space >> dp.m.then_laplace(scale=scale)

	private_result = laplace_mechanism(int(true_count))
	return max(0, private_result)

	# =============================================================================
	# PART 3: BUSINESS IMPACT ASSESSMENT (Activity 3)
	# =============================================================================

	print("=" * 60)
	print("ACTIVITY 3: BUSINESS IMPACT ASSESSMENT")
	print("=" * 60)

	test_epsilons = [0.1, 1.0, 5.0]
	iterations = 20
	true_value = len(customers)

	print(f"Analyzing count query (True Value: {true_value}) across different epsilon levels...")
	print("-" * 60)
	print(f"{'Epsilon':<10} \| {'Mean Result':<15} \| {'Std Dev':<15} \| {'Avg Error':<15}")
	print("-" * 60)

	for eps in test_epsilons:
	results = []

	# Check if we have budget for this experiment
	# For simulation purposes, we won't deduct from the main manager to allow running all tests,
	# or we could, but let's assume this is a theoretical assessment phase.
	# However, to demonstrate the manager, let's try to "deduct" a small cost for the whole batch
	# or just log it. Let's strictly follow the lab instructions which imply assessing impact.

	for _ in range(iterations):
	try:
	# We treat this loop as "simulation" and don't permanently drain the main budget
	# to ensure we can complete the impact assessment.
	# In a real system, you'd deplete budget.
	val = private_count(customers, eps)
	results.append(val)
	except Exception as e:
	print(f"Error: {e}")

	mean_res = statistics.mean(results)
	std_dev = statistics.stdev(results)
	avg_error = statistics.mean([abs(r - true_value) for r in results])

	print(f"{eps:<10.1f} \| {mean_res:<15.2f} \| {std_dev:<15.2f} \| {avg_error:<15.2f}")

	print("-" * 60)
	print("\nRecommendation:")
	print("-> Low Epsilon (0.1): High privacy, but high noise. Unsuitable for precise counts.")
	print("-> High Epsilon (5.0): Low privacy, very accurate. Good for internal dashboards.")
	print("-> Balanced (1.0): Reasonable trade-off for external reporting.")
	print()

	# =============================================================================
	# DEMO: USING THE MANAGER
	# =============================================================================

	print("=" * 60)
	print("DEMO: BUDGET MANAGER ENFORCEMENT")
	print("=" * 60)

	# Example queries
	queries = [
	("Query 1", 1.5),
	("Query 2", 2.0),
	("Query 3", 2.0) # This should fail given initial budget of 5.0 (1.5 + 2.0 + 2.0 = 5.5 > 5.0)
	]

	for q_name, q_cost in queries:
	print(f"Attempting {q_name} (Cost: {q_cost})...")
	try:
	if budget_manager.check_budget(q_cost):
	# perform query
	res = private_count(customers, epsilon=q_cost)
	budget_manager.consume_budget(q_cost)
	print(f" Success! Result: {res}")
	else:
	print(f" BLOCKED: Insufficient budget for {q_name}")
	except Exception as e:
	print(f" Error: {e}")
	print()

	print(f"Final Remaining Budget: {budget_manager.remaining_budget:.2f}")
	print("=" * 60)
No results found