Last active
March 3, 2026 10:50
-
-
Save dhrp/c8087e3befebfe7e567b7274e7b47ab1 to your computer and use it in GitHub Desktop.
fsspec driver prefix compatibility tester
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| """ | |
| probe_driver_compat.py | |
| Probe fsspec filesystem drivers for `prefix=` kwarg compatibility on find()/_find(). | |
| Usage: | |
| python probe_driver_compat.py # probe all known protocols | |
| python probe_driver_compat.py gcs s3 abfs # probe specific protocols | |
| """ | |
| import inspect | |
| import json | |
| import re | |
| import sys | |
| from pathlib import Path | |
| from fsspec.registry import get_filesystem_class | |
| EXTERNAL_PROTOCOLS = [ | |
| "gcs", | |
| "gs", | |
| "s3", | |
| "s3a", | |
| "abfs", | |
| "adl", | |
| "az", | |
| "asynclocal", | |
| "box", | |
| "dropbox", | |
| "dvc", | |
| "gdrive", | |
| "hf", | |
| "lakefs", | |
| "oci", | |
| "ocilake", | |
| "oss", | |
| "pyscript", | |
| "root", | |
| "tos", | |
| "tosfs", | |
| "wandb", | |
| "webdav", | |
| ] | |
| # Matches the s3fs-style guard: if (withdirs or maxdepth) and prefix: raise ValueError(...) | |
| _WITHDIRS_PREFIX_GUARD_RE = re.compile( | |
| r"if\s+.*?(?:withdirs|maxdepth).*?prefix.*?raise\s+ValueError" | |
| r"|if\s+.*?prefix.*?(?:withdirs|maxdepth).*?raise\s+ValueError", | |
| re.DOTALL, | |
| ) | |
| def _has_withdirs_prefix_conflict(method) -> bool: | |
| """Return True if the method raises ValueError when prefix + withdirs/maxdepth | |
| are combined (detected via source inspection).""" | |
| if method is None: | |
| return False | |
| try: | |
| src = inspect.getsource(method) | |
| # Fast pre-check: all three tokens must appear in the source | |
| if "prefix" not in src or "withdirs" not in src and "maxdepth" not in src: | |
| return False | |
| if "ValueError" not in src: | |
| return False | |
| return bool(_WITHDIRS_PREFIX_GUARD_RE.search(src)) | |
| except (OSError, TypeError): | |
| return False | |
| def signature_info(method): | |
| if method is None: | |
| return None | |
| sig = inspect.signature(method) | |
| params = sig.parameters | |
| has_prefix = "prefix" in params | |
| has_varkw = any(p.kind == inspect.Parameter.VAR_KEYWORD for p in params.values()) | |
| if has_prefix: | |
| prefix_handling = "explicit" # method declares prefix= and reads it | |
| elif has_varkw: | |
| prefix_handling = "via_kwargs" # swallowed by **kwargs; may be silently ignored | |
| else: | |
| prefix_handling = "no" # not accepted at all | |
| return { | |
| "signature": str(sig), | |
| "has_prefix": has_prefix, | |
| "has_varkw": has_varkw, | |
| "prefix_handling": prefix_handling, | |
| } | |
| def probe_protocol(protocol): | |
| try: | |
| cls = get_filesystem_class(protocol) | |
| find_info = signature_info(getattr(cls, "find", None)) | |
| async_find_info = signature_info(getattr(cls, "_find", None)) | |
| if find_info is None and async_find_info is None: | |
| return { | |
| "protocol": protocol, | |
| "status": "error", | |
| "class": f"{cls.__module__}.{cls.__name__}", | |
| "error": "Neither find nor _find exists", | |
| } | |
| # Determine the best prefix_handling across find and _find: | |
| # explicit > via_kwargs > no | |
| _rank = {"explicit": 2, "via_kwargs": 1, "no": 0} | |
| best = max( | |
| (info["prefix_handling"] for info in (find_info, async_find_info) if info), | |
| key=lambda h: _rank[h], | |
| default="no", | |
| ) | |
| accepts_prefix = best in ("explicit", "via_kwargs") | |
| reads_prefix = best == "explicit" | |
| # Check both find and _find for the withdirs+prefix conflict guard | |
| withdirs_conflict = _has_withdirs_prefix_conflict( | |
| getattr(cls, "_find", None) | |
| ) or _has_withdirs_prefix_conflict( | |
| getattr(cls, "find", None) | |
| ) | |
| return { | |
| "protocol": protocol, | |
| "status": "ok", | |
| "class": f"{cls.__module__}.{cls.__name__}", | |
| "find": find_info, | |
| "_find": async_find_info, | |
| "prefix_handling": best, # explicit | via_kwargs | no | |
| "accepts_prefix_kwarg": accepts_prefix, | |
| "reads_prefix": reads_prefix, | |
| "breaks": withdirs_conflict, | |
| } | |
| except Exception as exc: | |
| return { | |
| "protocol": protocol, | |
| "status": "error", | |
| "error": f"{type(exc).__name__}: {exc}", | |
| } | |
| def main(protocols): | |
| results = [probe_protocol(p) for p in protocols] | |
| summary = { | |
| "protocols_total": len(protocols), | |
| "ok_count": sum(1 for r in results if r["status"] == "ok"), | |
| "error_count": sum(1 for r in results if r["status"] == "error"), | |
| "reads_prefix_count": sum( | |
| 1 for r in results if r.get("reads_prefix") | |
| ), | |
| "via_kwargs_only_count": sum( | |
| 1 for r in results | |
| if r.get("prefix_handling") == "via_kwargs" | |
| ), | |
| "accepts_prefix_count": sum( | |
| 1 for r in results if r.get("status") == "ok" and r.get("accepts_prefix_kwarg") | |
| ), | |
| "breaks_count": sum( | |
| 1 for r in results if r.get("breaks") | |
| ), | |
| } | |
| out = {"summary": summary, "results": results} | |
| output_path = Path("driver_compat_report.json") | |
| output_path.write_text(json.dumps(out, indent=2)) | |
| # Print a quick summary table to stdout | |
| rows = [] | |
| for r in results: | |
| status = r["status"] | |
| handling = r.get("prefix_handling", "n/a") if status == "ok" else "n/a" | |
| conflict = "YES" if r.get("breaks") else ("-" if status == "error" else "no") | |
| cls = r.get("class", r.get("error", "")) | |
| rows.append((r["protocol"], status, handling, conflict, cls)) | |
| c0 = max(len("protocol"), max(len(row[0]) for row in rows)) | |
| c1 = max(len("status"), max(len(row[1]) for row in rows)) | |
| c2 = max(len("prefix_read"), max(len(row[2]) for row in rows)) | |
| c3 = max(len("breaks"),max(len(row[3]) for row in rows)) | |
| header = (f"{'protocol':<{c0}} {'status':<{c1}} " | |
| f"{'prefix_read':<{c2}} {'breaks':<{c3}} class") | |
| print(f"\n{header}") | |
| print("-" * (c0 + c1 + c2 + c3 + len(" ") * 4 + 40)) | |
| for protocol, status, handling, conflict, cls in rows: | |
| print(f"{protocol:<{c0}} {status:<{c1}} {handling:<{c2}} {conflict:<{c3}} {cls}") | |
| print(f"\nSummary: {summary['ok_count']}/{summary['protocols_total']} ok | " | |
| f"prefix: {summary['reads_prefix_count']} explicit, " | |
| f"{summary['via_kwargs_only_count']} via_kwargs only | " | |
| f"{summary['breaks_count']} break with withdirs+prefix") | |
| print(f"Report written to {output_path.resolve()}") | |
| if __name__ == "__main__": | |
| if len(sys.argv) > 1: | |
| chosen = sys.argv[1:] | |
| else: | |
| chosen = EXTERNAL_PROTOCOLS | |
| main(chosen) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| gcsfs | |
| s3fs | |
| adlfs | |
| fsspec | |
| google-cloud-storage | |
| pytest | |
| pytest-asyncio | |
| # External filesystem drivers audited for prefix/find compatibility | |
| morefs[asynclocalfs] | |
| boxfs | |
| dropboxdrivefs | |
| dropbox | |
| dvc | |
| gdrive-fsspec | |
| gdrivefs | |
| huggingface_hub | |
| lakefs-spec | |
| ocifs | |
| ossfs | |
| pyscript-fsspec-client | |
| fsspec-xrootd | |
| # xrootd requires cmake | |
| xrootd | |
| tosfs | |
| wandbfs | |
| webdav4 |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment