oliveigah/oban_py_cron_discovery_bench.py

## oban_py_cron_discovery_bench.py
import os
import tempfile
import time
from pathlib import Path
from textwrap import dedent


def make_file(i: int, *, has_cron: bool) -> str:
    if has_cron:
        header = dedent(f"""\
            from oban import worker

            @worker(cron="*/5 * * * *")
            class Worker{i}:
                async def process(self, job):
                    pass
        """)
    else:
        header = dedent(f"""\
            import os
            import sys
            import logging

            logger = logging.getLogger(__name__)
        """)

    # Pad to ~1000 lines
    body = []
    for j in range(50):
        body.append(
            dedent(f"""\
            class Model{i}_{j}:
                name: str = 'model_{i}_{j}'
                value: int = {j}

                def compute(self):
                    return self.value * 2

                def validate(self):
                    if self.value < 0:
                        raise ValueError('negative')
                    return True
        """)
        )

    return header + "\n".join(body)


def bench(total: int, cron_count: int) -> None:
    tmpdir = Path(tempfile.mkdtemp())

    for i in range(total):
        content = make_file(i, has_cron=i < cron_count)
        (tmpdir / f"module_{i}.py").write_text(content)

    os.chdir(tmpdir)

    from oban.cli import _import_cron_paths as grep_discover
    from oban.cli_ast import _import_cron_paths as ast_discover
    from oban.cli_better_grep import _import_cron_paths as better_grep_discover

    start = time.perf_counter()
    grep_results = grep_discover([str(tmpdir)])
    grep_ms = (time.perf_counter() - start) * 1000

    start = time.perf_counter()
    better_grep_results = better_grep_discover([str(tmpdir)])
    better_grep_ms = (time.perf_counter() - start) * 1000

    start = time.perf_counter()
    ast_results = ast_discover([str(tmpdir)])
    ast_ms = (time.perf_counter() - start) * 1000

    assert sorted(better_grep_results) == sorted(ast_results) == sorted(grep_results)

    print(f"  {total} files, {cron_count} with cron (~1000 lines each)")
    print(f"    Grep:        {grep_ms:7.1f}ms  found {len(grep_results):>4}")
    print(
        f"    Better Grep: {better_grep_ms:7.1f}ms  found {len(better_grep_results):>4}"
    )
    print(f"    AST:         {ast_ms:7.1f}ms  found {len(ast_results):>4}")
    print()


if __name__ == "__main__":
    print("=== grep vs AST cron discovery benchmark ===\n")

    bench(100, 10)
    bench(100, 100)
    bench(1000, 10)
    bench(1000, 100)
    bench(1000, 1000)
	import os
	import tempfile
	import time
	from pathlib import Path
	from textwrap import dedent


	def make_file(i: int, *, has_cron: bool) -> str:
	if has_cron:
	header = dedent(f"""\
	from oban import worker

	@worker(cron="/5 * * *")
	class Worker{i}:
	async def process(self, job):
	pass
	""")
	else:
	header = dedent(f"""\
	import os
	import sys
	import logging

	logger = logging.getLogger(__name__)
	""")

	# Pad to ~1000 lines
	body = []
	for j in range(50):
	body.append(
	dedent(f"""\
	class Model{i}_{j}:
	name: str = 'model_{i}_{j}'
	value: int = {j}

	def compute(self):
	return self.value * 2

	def validate(self):
	if self.value < 0:
	raise ValueError('negative')
	return True
	""")
	)

	return header + "\n".join(body)


	def bench(total: int, cron_count: int) -> None:
	tmpdir = Path(tempfile.mkdtemp())

	for i in range(total):
	content = make_file(i, has_cron=i < cron_count)
	(tmpdir / f"module_{i}.py").write_text(content)

	os.chdir(tmpdir)

	from oban.cli import _import_cron_paths as grep_discover
	from oban.cli_ast import _import_cron_paths as ast_discover
	from oban.cli_better_grep import _import_cron_paths as better_grep_discover

	start = time.perf_counter()
	grep_results = grep_discover([str(tmpdir)])
	grep_ms = (time.perf_counter() - start) * 1000

	start = time.perf_counter()
	better_grep_results = better_grep_discover([str(tmpdir)])
	better_grep_ms = (time.perf_counter() - start) * 1000

	start = time.perf_counter()
	ast_results = ast_discover([str(tmpdir)])
	ast_ms = (time.perf_counter() - start) * 1000

	assert sorted(better_grep_results) == sorted(ast_results) == sorted(grep_results)

	print(f" {total} files, {cron_count} with cron (~1000 lines each)")
	print(f" Grep: {grep_ms:7.1f}ms found {len(grep_results):>4}")
	print(
	f" Better Grep: {better_grep_ms:7.1f}ms found {len(better_grep_results):>4}"
	)
	print(f" AST: {ast_ms:7.1f}ms found {len(ast_results):>4}")
	print()


	if __name__ == "__main__":
	print("=== grep vs AST cron discovery benchmark ===\n")

	bench(100, 10)
	bench(100, 100)
	bench(1000, 10)
	bench(1000, 100)
	bench(1000, 1000)
No results found