Skip to content

Instantly share code, notes, and snippets.

@thiagobutignon
Created January 13, 2026 00:45
Show Gist options
  • Select an option

  • Save thiagobutignon/8e30ca0e6522d874cb88ea247be06541 to your computer and use it in GitHub Desktop.

Select an option

Save thiagobutignon/8e30ca0e6522d874cb88ea247be06541 to your computer and use it in GitHub Desktop.
privacy_budget_lab.py
import opendp.prelude as dp
dp.enable_features("contrib")
import pandas as pd
import numpy as np
# =============================================================================
# DATASET SETUP
# =============================================================================
# Create sample customer dataset
customers = pd.DataFrame({
'customer_id': range(1, 21),
'age': [25, 34, 45, 29, 52, 38, 41, 33, 47, 36, 28, 31, 42, 26, 39, 44, 30, 35, 48, 27],
'purchase_amount': [150, 320, 890, 200, 450, 275, 680, 180, 520, 340,
210, 380, 560, 190, 410, 620, 240, 290, 470, 160]
})
print("=" * 60)
print("CUSTOMER DATASET")
print("=" * 60)
print(customers.head(10))
print(f"\nTotal customers in dataset: {len(customers)}")
print()
# =============================================================================
# ACTIVITY 1: BASIC PRIVACY BUDGET IMPLEMENTATION
# =============================================================================
# Configure privacy parameters
epsilon = 0.5 # Privacy budget - strong privacy protection
sensitivity = 1 # For count queries (adding/removing one customer changes count by 1)
# Track total privacy budget consumption
total_epsilon_consumed = 0.0
print("=" * 60)
print("ACTIVITY 1: BASIC PRIVACY BUDGET IMPLEMENTATION")
print("=" * 60)
print(f"Initial Privacy Budget (ε): {epsilon}")
print(f"Query Sensitivity: {sensitivity}")
print()
# Create differentially private count function using Laplace noise
def private_count(data, epsilon, sensitivity):
"""
Implements differentially private count query using Laplace mechanism.
Parameters:
- data: DataFrame or array to count
- epsilon: Privacy budget parameter (smaller = more privacy)
- sensitivity: Query sensitivity (for count = 1)
Returns:
- Noisy count (non-negative integer)
"""
# Calculate true count
true_count = len(data)
# OpenDP implementation
# Define the domain and metric for the count (integer scalar)
space = dp.atom_domain(T=int), dp.absolute_distance(T=int)
# Create the Laplace measurement
# Scale = sensitivity / epsilon
scale = sensitivity / epsilon
laplace_mechanism = space >> dp.m.then_laplace(scale=scale)
# Get private result
private_result = laplace_mechanism(int(true_count))
# Ensure non-negative result
return max(0, private_result)
# Execute private count query on full dataset
true_customer_count = len(customers)
private_customer_count = private_count(customers, epsilon, sensitivity)
print(f"True count (all customers): {true_customer_count}")
print(f"Private count (with noise): {private_customer_count}")
print(f"Noise added: {private_customer_count - true_customer_count}")
print(f"Privacy budget consumed: {epsilon}")
print()
# Update total budget consumed
total_epsilon_consumed += epsilon
# =============================================================================
# PRACTICE CHALLENGE 1: AGE RANGE QUERY (25-35 years)
# =============================================================================
print("=" * 60)
print("PRACTICE CHALLENGE 1: AGE RANGE QUERY (25-35)")
print("=" * 60)
# Filter customers between ages 25-35
age_filtered = customers[(customers['age'] >= 25) & (customers['age'] <= 35)]
# True count for age range
true_age_count = len(age_filtered)
# Apply differential privacy with same epsilon
private_age_count = private_count(age_filtered, epsilon, sensitivity)
# Calculate noise scale for transparency
noise_scale = sensitivity / epsilon
print(f"Age range filter: 25-35 years")
print(f"True count (ages 25-35): {true_age_count}")
print(f"Private count (with noise): {private_age_count}")
print(f"Noise added: {private_age_count - true_age_count}")
print(f"Noise scale (b = Δf/ε): {noise_scale:.2f}")
print(f"Privacy budget consumed for this query: {epsilon}")
print()
# Update total budget consumed
total_epsilon_consumed += epsilon
# =============================================================================
# ADDITIONAL ANALYSIS: MULTIPLE AGE SEGMENTS
# =============================================================================
print("=" * 60)
print("BONUS: MULTIPLE AGE SEGMENT ANALYSIS")
print("=" * 60)
# Define age segments for marketing
age_segments = [
("Young Adults (25-30)", 25, 30),
("Mid-Career (31-40)", 31, 40),
("Mature (41-50)", 41, 50)
]
# Allocate privacy budget across segments
epsilon_per_segment = 0.3 # Lower epsilon for multiple queries
print(f"Privacy budget per segment: {epsilon_per_segment}")
print()
for segment_name, min_age, max_age in age_segments:
# Filter by age segment
segment_data = customers[(customers['age'] >= min_age) & (customers['age'] <= max_age)]
# Calculate true and private counts
true_segment_count = len(segment_data)
private_segment_count = private_count(segment_data, epsilon_per_segment, sensitivity)
print(f"{segment_name}:")
print(f" True count: {true_segment_count}")
print(f" Private count: {private_segment_count}")
print(f" Budget consumed: {epsilon_per_segment}")
print()
# Track budget
total_epsilon_consumed += epsilon_per_segment
# =============================================================================
# PRIVACY BUDGET SUMMARY
# =============================================================================
print("=" * 60)
print("PRIVACY BUDGET SUMMARY")
print("=" * 60)
print(f"Total privacy budget consumed (ε): {total_epsilon_consumed:.2f}")
print(f"Number of queries executed: 5")
print(f"Average ε per query: {total_epsilon_consumed / 5:.2f}")
print()
print("Key Insights:")
print("- Lower ε values provide stronger privacy but more noise")
print("- Privacy budget is additive across queries")
print("- Must carefully allocate budget for multiple analyses")
print("- Trade-off between privacy protection and analytical utility")
print("=" * 60)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment