Created
December 1, 2025 09:28
-
-
Save m-ad/97836d6f3508940e67a6bd4f26c8ef00 to your computer and use it in GitHub Desktop.
Compare performance of `is_float` check with try/except and regex
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| # /// script | |
| # dependencies = ["numpy"] | |
| # /// | |
| """ | |
| Goal: | |
| Compare performance of two strategies to check if a string | |
| can be converted to a float: | |
| 1) using try/except around float() | |
| 2) using a regular expression to match float shape | |
| How to run: | |
| uv run float_perf.py 9000000 | |
| Sample output: | |
| ---------------------------------------- | |
| try/except: 0.304s for 900000 cases | |
| regex: 0.254s for 900000 cases | |
| ---------------------------------------- | |
| try/except: 0.288s for 900000 cases | |
| regex: 0.249s for 900000 cases | |
| ---------------------------------------- | |
| try/except: 0.286s for 900000 cases | |
| regex: 0.241s for 900000 cases | |
| ---------------------------------------- | |
| try/except: 0.292s for 900000 cases | |
| regex: 0.249s for 900000 cases | |
| ---------------------------------------- | |
| try/except: 0.286s for 900000 cases | |
| regex: 0.244s for 900000 cases | |
| ---------------------------------------- | |
| try/except: 0.290s for 900000 cases | |
| regex: 0.238s for 900000 cases | |
| ---------------------------------------- | |
| try/except: 0.294s for 900000 cases | |
| regex: 0.249s for 900000 cases | |
| ---------------------------------------- | |
| try/except: 0.289s for 900000 cases | |
| regex: 0.245s for 900000 cases | |
| ---------------------------------------- | |
| try/except: 0.286s for 900000 cases | |
| regex: 0.243s for 900000 cases | |
| ---------------------------------------- | |
| try/except: 0.286s for 900000 cases | |
| regex: 0.245s for 900000 cases | |
| ======================================== | |
| Total try/except time: 2.901s for 9000000 cases | |
| Total regex time: 2.456s for 9000000 cases | |
| ratio try/regex: 1.18 | |
| Conclusion: | |
| The try/except method is slower than the regex method for this | |
| particular dataset (50/50 mix of float/non-float strings). | |
| This is likely due to the overhead of exception handling in Python. | |
| Both are super-fast for practical purposes at <0.3 µs per sample. | |
| """ | |
| from __future__ import annotations | |
| import re | |
| from argparse import ArgumentParser | |
| from collections.abc import Callable | |
| from time import perf_counter | |
| import numpy as np | |
| _FLOAT_PATTERN = re.compile(r"^-?\d+\.\d+$") | |
| def parse_args() -> int: | |
| """Parse CLI arguments. | |
| Returns | |
| ------- | |
| int | |
| Number of test cases to evaluate. | |
| """ | |
| parser = ArgumentParser(description="Compare float parsing strategies.") | |
| parser.add_argument( | |
| "num_cases", | |
| nargs="?", | |
| default=200_000, | |
| type=int, | |
| help="number of random samples to benchmark (must be even)", | |
| ) | |
| args = parser.parse_args() | |
| if args.num_cases % 2 != 0: | |
| raise ValueError("num_cases must be even to balance valid and invalid cases.") | |
| return args.num_cases | |
| def build_test_cases(num_cases: int, seed: int = 42) -> list[str]: | |
| """Construct deterministic test samples with equal valid/invalid ratios. | |
| Parameters | |
| ---------- | |
| num_cases : int | |
| Total number of cases to build (must be even). | |
| seed : int, optional | |
| RNG seed to keep datasets identical across runs. | |
| Returns | |
| ------- | |
| list[str] | |
| Shuffled list of string samples. | |
| """ | |
| rng = np.random.default_rng(seed) | |
| half = num_cases // 2 | |
| float_samples = [f"{value:.8f}" for value in rng.uniform(-1e6, 1e6, half)] | |
| invalid_samples = [ | |
| f"{value}x" for value in rng.integers(-1_000_000, 1_000_000, half) | |
| ] | |
| cases = np.array(float_samples + invalid_samples, dtype=object) | |
| rng.shuffle(cases) | |
| return cases.tolist() | |
| def is_float_try(candidate: str) -> bool: | |
| """Check float convertibility via try/except.""" | |
| try: | |
| float(candidate) | |
| except ValueError: | |
| return False | |
| return True | |
| def is_float_regex(candidate: str) -> bool: | |
| """Check float shape via regular expression.""" | |
| return bool(_FLOAT_PATTERN.match(candidate)) | |
| def benchmark(func: Callable[[str], bool], samples: list[str]) -> float: | |
| """Measure execution time for a predicate over provided samples.""" | |
| start = perf_counter() | |
| for sample in samples: | |
| func(sample) | |
| return perf_counter() - start | |
| def main() -> None: | |
| """CLI entry point.""" | |
| num_cases = parse_args() | |
| try_time_total, regex_time_total = 0.0, 0.0 | |
| divisor = 10 # divide num_cases by this to intersperse timing | |
| N = num_cases // divisor | |
| for i in range(divisor): | |
| samples = build_test_cases(num_cases=N, seed=i) | |
| try_time = benchmark(is_float_try, samples) | |
| regex_time = benchmark(is_float_regex, samples) | |
| print("-" * 40) | |
| print(f"try/except: {try_time:.3f}s for {N} cases") | |
| print(f"regex: {regex_time:.3f}s for {N} cases") | |
| try_time_total += try_time | |
| regex_time_total += regex_time | |
| print("=" * 40) | |
| print(f"Total try/except time: {try_time_total:.3f}s for {num_cases} cases") | |
| print(f"Total regex time: {regex_time_total:.3f}s for {num_cases} cases") | |
| print(f"ratio try/regex: {try_time_total / regex_time_total:.2f}") | |
| if __name__ == "__main__": | |
| main() | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment