Skip to content

Instantly share code, notes, and snippets.

@dhrp
Last active March 3, 2026 10:50
Show Gist options
  • Select an option

  • Save dhrp/c8087e3befebfe7e567b7274e7b47ab1 to your computer and use it in GitHub Desktop.

Select an option

Save dhrp/c8087e3befebfe7e567b7274e7b47ab1 to your computer and use it in GitHub Desktop.
fsspec driver prefix compatibility tester
"""
probe_driver_compat.py
Probe fsspec filesystem drivers for `prefix=` kwarg compatibility on find()/_find().
Usage:
python probe_driver_compat.py # probe all known protocols
python probe_driver_compat.py gcs s3 abfs # probe specific protocols
"""
import inspect
import json
import re
import sys
from pathlib import Path
from fsspec.registry import get_filesystem_class
EXTERNAL_PROTOCOLS = [
"gcs",
"gs",
"s3",
"s3a",
"abfs",
"adl",
"az",
"asynclocal",
"box",
"dropbox",
"dvc",
"gdrive",
"hf",
"lakefs",
"oci",
"ocilake",
"oss",
"pyscript",
"root",
"tos",
"tosfs",
"wandb",
"webdav",
]
# Matches the s3fs-style guard: if (withdirs or maxdepth) and prefix: raise ValueError(...)
_WITHDIRS_PREFIX_GUARD_RE = re.compile(
r"if\s+.*?(?:withdirs|maxdepth).*?prefix.*?raise\s+ValueError"
r"|if\s+.*?prefix.*?(?:withdirs|maxdepth).*?raise\s+ValueError",
re.DOTALL,
)
def _has_withdirs_prefix_conflict(method) -> bool:
"""Return True if the method raises ValueError when prefix + withdirs/maxdepth
are combined (detected via source inspection)."""
if method is None:
return False
try:
src = inspect.getsource(method)
# Fast pre-check: all three tokens must appear in the source
if "prefix" not in src or "withdirs" not in src and "maxdepth" not in src:
return False
if "ValueError" not in src:
return False
return bool(_WITHDIRS_PREFIX_GUARD_RE.search(src))
except (OSError, TypeError):
return False
def signature_info(method):
if method is None:
return None
sig = inspect.signature(method)
params = sig.parameters
has_prefix = "prefix" in params
has_varkw = any(p.kind == inspect.Parameter.VAR_KEYWORD for p in params.values())
if has_prefix:
prefix_handling = "explicit" # method declares prefix= and reads it
elif has_varkw:
prefix_handling = "via_kwargs" # swallowed by **kwargs; may be silently ignored
else:
prefix_handling = "no" # not accepted at all
return {
"signature": str(sig),
"has_prefix": has_prefix,
"has_varkw": has_varkw,
"prefix_handling": prefix_handling,
}
def probe_protocol(protocol):
try:
cls = get_filesystem_class(protocol)
find_info = signature_info(getattr(cls, "find", None))
async_find_info = signature_info(getattr(cls, "_find", None))
if find_info is None and async_find_info is None:
return {
"protocol": protocol,
"status": "error",
"class": f"{cls.__module__}.{cls.__name__}",
"error": "Neither find nor _find exists",
}
# Determine the best prefix_handling across find and _find:
# explicit > via_kwargs > no
_rank = {"explicit": 2, "via_kwargs": 1, "no": 0}
best = max(
(info["prefix_handling"] for info in (find_info, async_find_info) if info),
key=lambda h: _rank[h],
default="no",
)
accepts_prefix = best in ("explicit", "via_kwargs")
reads_prefix = best == "explicit"
# Check both find and _find for the withdirs+prefix conflict guard
withdirs_conflict = _has_withdirs_prefix_conflict(
getattr(cls, "_find", None)
) or _has_withdirs_prefix_conflict(
getattr(cls, "find", None)
)
return {
"protocol": protocol,
"status": "ok",
"class": f"{cls.__module__}.{cls.__name__}",
"find": find_info,
"_find": async_find_info,
"prefix_handling": best, # explicit | via_kwargs | no
"accepts_prefix_kwarg": accepts_prefix,
"reads_prefix": reads_prefix,
"breaks": withdirs_conflict,
}
except Exception as exc:
return {
"protocol": protocol,
"status": "error",
"error": f"{type(exc).__name__}: {exc}",
}
def main(protocols):
results = [probe_protocol(p) for p in protocols]
summary = {
"protocols_total": len(protocols),
"ok_count": sum(1 for r in results if r["status"] == "ok"),
"error_count": sum(1 for r in results if r["status"] == "error"),
"reads_prefix_count": sum(
1 for r in results if r.get("reads_prefix")
),
"via_kwargs_only_count": sum(
1 for r in results
if r.get("prefix_handling") == "via_kwargs"
),
"accepts_prefix_count": sum(
1 for r in results if r.get("status") == "ok" and r.get("accepts_prefix_kwarg")
),
"breaks_count": sum(
1 for r in results if r.get("breaks")
),
}
out = {"summary": summary, "results": results}
output_path = Path("driver_compat_report.json")
output_path.write_text(json.dumps(out, indent=2))
# Print a quick summary table to stdout
rows = []
for r in results:
status = r["status"]
handling = r.get("prefix_handling", "n/a") if status == "ok" else "n/a"
conflict = "YES" if r.get("breaks") else ("-" if status == "error" else "no")
cls = r.get("class", r.get("error", ""))
rows.append((r["protocol"], status, handling, conflict, cls))
c0 = max(len("protocol"), max(len(row[0]) for row in rows))
c1 = max(len("status"), max(len(row[1]) for row in rows))
c2 = max(len("prefix_read"), max(len(row[2]) for row in rows))
c3 = max(len("breaks"),max(len(row[3]) for row in rows))
header = (f"{'protocol':<{c0}} {'status':<{c1}} "
f"{'prefix_read':<{c2}} {'breaks':<{c3}} class")
print(f"\n{header}")
print("-" * (c0 + c1 + c2 + c3 + len(" ") * 4 + 40))
for protocol, status, handling, conflict, cls in rows:
print(f"{protocol:<{c0}} {status:<{c1}} {handling:<{c2}} {conflict:<{c3}} {cls}")
print(f"\nSummary: {summary['ok_count']}/{summary['protocols_total']} ok | "
f"prefix: {summary['reads_prefix_count']} explicit, "
f"{summary['via_kwargs_only_count']} via_kwargs only | "
f"{summary['breaks_count']} break with withdirs+prefix")
print(f"Report written to {output_path.resolve()}")
if __name__ == "__main__":
if len(sys.argv) > 1:
chosen = sys.argv[1:]
else:
chosen = EXTERNAL_PROTOCOLS
main(chosen)
gcsfs
s3fs
adlfs
fsspec
google-cloud-storage
pytest
pytest-asyncio
# External filesystem drivers audited for prefix/find compatibility
morefs[asynclocalfs]
boxfs
dropboxdrivefs
dropbox
dvc
gdrive-fsspec
gdrivefs
huggingface_hub
lakefs-spec
ocifs
ossfs
pyscript-fsspec-client
fsspec-xrootd
# xrootd requires cmake
xrootd
tosfs
wandbfs
webdav4
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment