zzzeek/benchmark_anonymous_ids.py

## benchmark_anonymous_ids.py
"""Benchmark performance of different anonymous identifier generation methods.

This script compares the performance of:
- id() - Python object id (fastest but not globally unique)
- uuid.uuid4() - UUID version 4 (cryptographically random but slower)
- secrets.token_hex() - Cryptographically secure random hex (preferred)

These are relevant for the _anonymous_label.safe_construct() feature where
we need to generate unique identifiers that won't collide.
"""
import timeit
import uuid
import secrets


def test_id():
    """Using id() of a new object."""
    return id(object())


def test_uuid4():
    """Using uuid.uuid4() for unique identifier."""
    return str(uuid.uuid4())


def test_token_hex_8():
    """Using secrets.token_hex(8) - 16 character hex string."""
    return secrets.token_hex(8)


def test_token_hex_4():
    """Using secrets.token_hex(4) - 8 character hex string."""
    return secrets.token_hex(4)


def test_token_hex_16():
    """Using secrets.token_hex(16) - 32 character hex string."""
    return secrets.token_hex(16)


def run_benchmark():
    """Run benchmarks and display results."""
    iterations = 100000

    print(f"Benchmarking {iterations:,} iterations of each method\n")
    print("=" * 70)

    # Benchmark id()
    time_id = timeit.timeit(test_id, number=iterations)
    print(f"id():                    {time_id:.4f} seconds")
    print(f"  Per call:              {time_id/iterations*1e6:.2f} µs")
    print(f"  Relative speed:        1.00x (baseline)")
    print()

    # Benchmark uuid4()
    time_uuid4 = timeit.timeit(test_uuid4, number=iterations)
    print(f"uuid.uuid4():            {time_uuid4:.4f} seconds")
    print(f"  Per call:              {time_uuid4/iterations*1e6:.2f} µs")
    print(f"  Relative to id():      {time_uuid4/time_id:.2f}x slower")
    print()

    # Benchmark token_hex(4)
    time_token4 = timeit.timeit(test_token_hex_4, number=iterations)
    print(f"secrets.token_hex(4):    {time_token4:.4f} seconds")
    print(f"  Per call:              {time_token4/iterations*1e6:.2f} µs")
    print(f"  Relative to id():      {time_token4/time_id:.2f}x slower")
    print(f"  Relative to uuid4():   {time_token4/time_uuid4:.2f}x faster")
    print()

    # Benchmark token_hex(8)
    time_token8 = timeit.timeit(test_token_hex_8, number=iterations)
    print(f"secrets.token_hex(8):    {time_token8:.4f} seconds")
    print(f"  Per call:              {time_token8/iterations*1e6:.2f} µs")
    print(f"  Relative to id():      {time_token8/time_id:.2f}x slower")
    print(f"  Relative to uuid4():   {time_token8/time_uuid4:.2f}x faster")
    print()

    # Benchmark token_hex(16)
    time_token16 = timeit.timeit(test_token_hex_16, number=iterations)
    print(f"secrets.token_hex(16):   {time_token16:.4f} seconds")
    print(f"  Per call:              {time_token16/iterations*1e6:.2f} µs")
    print(f"  Relative to id():      {time_token16/time_id:.2f}x slower")
    print(f"  Relative to uuid4():   {time_token16/time_uuid4:.2f}x faster")
    print()

    print("=" * 70)
    print("\nConclusions:")
    print("-" * 70)
    print("1. id() is fastest but not suitable for cross-process uniqueness")
    print("2. uuid.uuid4() is significantly slower than secrets.token_hex()")
    print(f"3. secrets.token_hex(8) provides good collision resistance")
    print("   (2^64 possibilities) and is much faster than uuid4()")
    print("4. token_hex() performance scales linearly with byte count")
    print()
    print("Recommendation: Use secrets.token_hex(8) for anonymous labels")
    print("  - Fast performance (faster than uuid4())")
    print("  - Cryptographically secure")
    print("  - 16-char hex string (2^64 unique values)")
    print("  - Negligible collision probability for typical use cases")


if __name__ == "__main__":
    run_benchmark()
	"""Benchmark performance of different anonymous identifier generation methods.

	This script compares the performance of:
	- id() - Python object id (fastest but not globally unique)
	- uuid.uuid4() - UUID version 4 (cryptographically random but slower)
	- secrets.token_hex() - Cryptographically secure random hex (preferred)

	These are relevant for the _anonymous_label.safe_construct() feature where
	we need to generate unique identifiers that won't collide.
	"""
	import timeit
	import uuid
	import secrets


	def test_id():
	"""Using id() of a new object."""
	return id(object())


	def test_uuid4():
	"""Using uuid.uuid4() for unique identifier."""
	return str(uuid.uuid4())


	def test_token_hex_8():
	"""Using secrets.token_hex(8) - 16 character hex string."""
	return secrets.token_hex(8)


	def test_token_hex_4():
	"""Using secrets.token_hex(4) - 8 character hex string."""
	return secrets.token_hex(4)


	def test_token_hex_16():
	"""Using secrets.token_hex(16) - 32 character hex string."""
	return secrets.token_hex(16)


	def run_benchmark():
	"""Run benchmarks and display results."""
	iterations = 100000

	print(f"Benchmarking {iterations:,} iterations of each method\n")
	print("=" * 70)

	# Benchmark id()
	time_id = timeit.timeit(test_id, number=iterations)
	print(f"id(): {time_id:.4f} seconds")
	print(f" Per call: {time_id/iterations*1e6:.2f} µs")
	print(f" Relative speed: 1.00x (baseline)")
	print()

	# Benchmark uuid4()
	time_uuid4 = timeit.timeit(test_uuid4, number=iterations)
	print(f"uuid.uuid4(): {time_uuid4:.4f} seconds")
	print(f" Per call: {time_uuid4/iterations*1e6:.2f} µs")
	print(f" Relative to id(): {time_uuid4/time_id:.2f}x slower")
	print()

	# Benchmark token_hex(4)
	time_token4 = timeit.timeit(test_token_hex_4, number=iterations)
	print(f"secrets.token_hex(4): {time_token4:.4f} seconds")
	print(f" Per call: {time_token4/iterations*1e6:.2f} µs")
	print(f" Relative to id(): {time_token4/time_id:.2f}x slower")
	print(f" Relative to uuid4(): {time_token4/time_uuid4:.2f}x faster")
	print()

	# Benchmark token_hex(8)
	time_token8 = timeit.timeit(test_token_hex_8, number=iterations)
	print(f"secrets.token_hex(8): {time_token8:.4f} seconds")
	print(f" Per call: {time_token8/iterations*1e6:.2f} µs")
	print(f" Relative to id(): {time_token8/time_id:.2f}x slower")
	print(f" Relative to uuid4(): {time_token8/time_uuid4:.2f}x faster")
	print()

	# Benchmark token_hex(16)
	time_token16 = timeit.timeit(test_token_hex_16, number=iterations)
	print(f"secrets.token_hex(16): {time_token16:.4f} seconds")
	print(f" Per call: {time_token16/iterations*1e6:.2f} µs")
	print(f" Relative to id(): {time_token16/time_id:.2f}x slower")
	print(f" Relative to uuid4(): {time_token16/time_uuid4:.2f}x faster")
	print()

	print("=" * 70)
	print("\nConclusions:")
	print("-" * 70)
	print("1. id() is fastest but not suitable for cross-process uniqueness")
	print("2. uuid.uuid4() is significantly slower than secrets.token_hex()")
	print(f"3. secrets.token_hex(8) provides good collision resistance")
	print(" (2^64 possibilities) and is much faster than uuid4()")
	print("4. token_hex() performance scales linearly with byte count")
	print()
	print("Recommendation: Use secrets.token_hex(8) for anonymous labels")
	print(" - Fast performance (faster than uuid4())")
	print(" - Cryptographically secure")
	print(" - 16-char hex string (2^64 unique values)")
	print(" - Negligible collision probability for typical use cases")


	if __name__ == "__main__":
	run_benchmark()
No results found