Skip to content

Instantly share code, notes, and snippets.

@m-ad
Created December 1, 2025 09:28
Show Gist options
  • Select an option

  • Save m-ad/97836d6f3508940e67a6bd4f26c8ef00 to your computer and use it in GitHub Desktop.

Select an option

Save m-ad/97836d6f3508940e67a6bd4f26c8ef00 to your computer and use it in GitHub Desktop.
Compare performance of `is_float` check with try/except and regex
# /// script
# dependencies = ["numpy"]
# ///
"""
Goal:
Compare performance of two strategies to check if a string
can be converted to a float:
1) using try/except around float()
2) using a regular expression to match float shape
How to run:
uv run float_perf.py 9000000
Sample output:
----------------------------------------
try/except: 0.304s for 900000 cases
regex: 0.254s for 900000 cases
----------------------------------------
try/except: 0.288s for 900000 cases
regex: 0.249s for 900000 cases
----------------------------------------
try/except: 0.286s for 900000 cases
regex: 0.241s for 900000 cases
----------------------------------------
try/except: 0.292s for 900000 cases
regex: 0.249s for 900000 cases
----------------------------------------
try/except: 0.286s for 900000 cases
regex: 0.244s for 900000 cases
----------------------------------------
try/except: 0.290s for 900000 cases
regex: 0.238s for 900000 cases
----------------------------------------
try/except: 0.294s for 900000 cases
regex: 0.249s for 900000 cases
----------------------------------------
try/except: 0.289s for 900000 cases
regex: 0.245s for 900000 cases
----------------------------------------
try/except: 0.286s for 900000 cases
regex: 0.243s for 900000 cases
----------------------------------------
try/except: 0.286s for 900000 cases
regex: 0.245s for 900000 cases
========================================
Total try/except time: 2.901s for 9000000 cases
Total regex time: 2.456s for 9000000 cases
ratio try/regex: 1.18
Conclusion:
The try/except method is slower than the regex method for this
particular dataset (50/50 mix of float/non-float strings).
This is likely due to the overhead of exception handling in Python.
Both are super-fast for practical purposes at <0.3 µs per sample.
"""
from __future__ import annotations
import re
from argparse import ArgumentParser
from collections.abc import Callable
from time import perf_counter
import numpy as np
_FLOAT_PATTERN = re.compile(r"^-?\d+\.\d+$")
def parse_args() -> int:
"""Parse CLI arguments.
Returns
-------
int
Number of test cases to evaluate.
"""
parser = ArgumentParser(description="Compare float parsing strategies.")
parser.add_argument(
"num_cases",
nargs="?",
default=200_000,
type=int,
help="number of random samples to benchmark (must be even)",
)
args = parser.parse_args()
if args.num_cases % 2 != 0:
raise ValueError("num_cases must be even to balance valid and invalid cases.")
return args.num_cases
def build_test_cases(num_cases: int, seed: int = 42) -> list[str]:
"""Construct deterministic test samples with equal valid/invalid ratios.
Parameters
----------
num_cases : int
Total number of cases to build (must be even).
seed : int, optional
RNG seed to keep datasets identical across runs.
Returns
-------
list[str]
Shuffled list of string samples.
"""
rng = np.random.default_rng(seed)
half = num_cases // 2
float_samples = [f"{value:.8f}" for value in rng.uniform(-1e6, 1e6, half)]
invalid_samples = [
f"{value}x" for value in rng.integers(-1_000_000, 1_000_000, half)
]
cases = np.array(float_samples + invalid_samples, dtype=object)
rng.shuffle(cases)
return cases.tolist()
def is_float_try(candidate: str) -> bool:
"""Check float convertibility via try/except."""
try:
float(candidate)
except ValueError:
return False
return True
def is_float_regex(candidate: str) -> bool:
"""Check float shape via regular expression."""
return bool(_FLOAT_PATTERN.match(candidate))
def benchmark(func: Callable[[str], bool], samples: list[str]) -> float:
"""Measure execution time for a predicate over provided samples."""
start = perf_counter()
for sample in samples:
func(sample)
return perf_counter() - start
def main() -> None:
"""CLI entry point."""
num_cases = parse_args()
try_time_total, regex_time_total = 0.0, 0.0
divisor = 10 # divide num_cases by this to intersperse timing
N = num_cases // divisor
for i in range(divisor):
samples = build_test_cases(num_cases=N, seed=i)
try_time = benchmark(is_float_try, samples)
regex_time = benchmark(is_float_regex, samples)
print("-" * 40)
print(f"try/except: {try_time:.3f}s for {N} cases")
print(f"regex: {regex_time:.3f}s for {N} cases")
try_time_total += try_time
regex_time_total += regex_time
print("=" * 40)
print(f"Total try/except time: {try_time_total:.3f}s for {num_cases} cases")
print(f"Total regex time: {regex_time_total:.3f}s for {num_cases} cases")
print(f"ratio try/regex: {try_time_total / regex_time_total:.2f}")
if __name__ == "__main__":
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment