Skip to content

Instantly share code, notes, and snippets.

@aaronwolen
Last active November 7, 2025 18:50
Show Gist options
  • Select an option

  • Save aaronwolen/eb6278b2481a4088c161f2e23f5a8199 to your computer and use it in GitHub Desktop.

Select an option

Save aaronwolen/eb6278b2481a4088c161f2e23f5a8199 to your computer and use it in GitHub Desktop.
Download 10X Visium output files for squidpy example datasets.
#!/usr/bin/env python3
"""
Download 10X Visium datasets using squidpy.
This tool wraps squidpy's internal visium() function to provide an easy way
to download and extract 10X Genomics Visium spatial transcriptomics datasets.
"""
from __future__ import annotations
import argparse
import os
import sys
import typing
import warnings
from pathlib import Path
# Suppress warnings from squidpy imports
warnings.filterwarnings("ignore")
# Import squidpy's visium function
try:
from squidpy.datasets._10x_datasets import VisiumDatasets # type: ignore
# Extract valid dataset choices
VALID_DATASETS = list(typing.get_args(VisiumDatasets))
except ImportError:
print("Error: squidpy must be installed to use this tool.")
sys.exit(1)
def download_visium_dataset(
sample_id: VisiumDatasets,
*,
include_hires_tiff: bool = False,
base_dir: os.PathLike[str],
) -> None:
"""
Download Visium `datasets <https://support.10xgenomics.com/spatial-gene-expression/datasets>`_ from *10x Genomics*.
Modified version of squidpy's visium() function that only downloads the 10X
data without reading it into an AnnData object.
Parameters
----------
sample_id
Name of the Visium dataset.
include_hires_tiff
Whether to download the high-resolution tissue image.
base_dir
Directory where to download the data.
Returns
-------
None
"""
import tarfile
from squidpy._constants._constants import TenxVersions # type: ignore
from squidpy.datasets._10x_datasets import VisiumFiles
from squidpy.datasets._utils import check_presence_download # type: ignore
if sample_id.startswith("V1_"):
spaceranger_version = TenxVersions.V1
elif sample_id.startswith("Targeted_") or sample_id.startswith("Parent_"):
spaceranger_version = TenxVersions.V2
else:
spaceranger_version = TenxVersions.V3
base_dir = Path(base_dir)
sample_dir = base_dir / sample_id
sample_dir.mkdir(exist_ok=True, parents=True)
url_prefix = f"https://cf.10xgenomics.com/samples/spatial-exp/{spaceranger_version}/{sample_id}/"
visium_files = VisiumFiles(
f"{sample_id}_filtered_feature_bc_matrix.h5",
f"{sample_id}_spatial.tar.gz",
f"{sample_id}_image.tif",
)
# download spatial data
tar_pth = sample_dir / visium_files.spatial_attrs
check_presence_download(filename=tar_pth, backup_url=url_prefix + visium_files.spatial_attrs)
try:
with tarfile.open(tar_pth) as f:
for el in f:
if not (sample_dir / el.name).exists():
f.extract(el, sample_dir)
except (tarfile.TarError, OSError) as e:
raise RuntimeError(f"Failed to extract spatial data tarball: {e}") from e
else:
tar_pth.unlink() # remove tarball after extraction
# download counts
check_presence_download(
filename=sample_dir / "filtered_feature_bc_matrix.h5",
backup_url=url_prefix + visium_files.feature_matrix,
)
if include_hires_tiff: # download image
check_presence_download(
filename=sample_dir / "image.tif",
backup_url=url_prefix + visium_files.tif_image,
)
def main() -> None:
"""Main entry point for the CLI."""
parser = argparse.ArgumentParser(
description="Download 10X Visium spatial datasets",
formatter_class=argparse.RawDescriptionHelpFormatter,
epilog="""
Examples:
# Download a dataset to the current directory
%(prog)s V1_Human_Heart
# Download to a specific directory
%(prog)s V1_Human_Heart --output-dir /path/to/datasets
# Include high-resolution TIFF image
%(prog)s V1_Human_Heart --include-hires-tiff
# Also creates V1_Human_Heart.h5ad next to the folder by default
# Disable h5ad writing if you only want raw 10x outputs
%(prog)s V1_Human_Heart --no-h5ad
# List all available datasets
%(prog)s --list-datasets
""",
)
parser.add_argument(
"dataset",
nargs="?",
help="Name of Visium dataset to download. Use --list-datasets to see valid options.",
choices=VALID_DATASETS,
metavar="DATASET",
)
parser.add_argument(
"-o",
"--output-dir",
type=Path,
default=Path.cwd(),
help="Directory where the dataset will be downloaded (default: current directory)",
)
parser.add_argument(
"--include-hires-tiff",
action="store_true",
help="Download the high-resolution tissue section image (larger file size)",
)
parser.add_argument(
"--no-h5ad",
action="store_true",
help="Do not read with squidpy nor write the consolidated {DATASET}.h5ad file (default: write)",
)
parser.add_argument(
"-l",
"--list-datasets",
action="store_true",
help="List all available dataset IDs and exit",
)
args = parser.parse_args()
# Handle --list-datasets
if args.list_datasets:
print("Available Visium datasets:\n")
for ds in VALID_DATASETS:
print(f" {ds}")
return
# Require dataset_id if not listing datasets
if not args.dataset:
parser.error("the following arguments are required: DATASET")
# Create output directory if it doesn't exist
output_dir = args.output_dir.expanduser().absolute()
output_dir.mkdir(parents=True, exist_ok=True)
print(f"Downloading dataset: {args.dataset}")
print(f"Output directory: {output_dir}")
if args.include_hires_tiff:
print("Including high-resolution TIFF image")
try:
download_visium_dataset(
sample_id=args.dataset,
include_hires_tiff=args.include_hires_tiff,
base_dir=output_dir,
)
print("\nDownload complete!")
print(f"Dataset location: {output_dir / args.dataset}")
# Load into AnnData via squidpy and write an .h5ad next to the folder
if not args.no_h5ad:
try:
from squidpy.read._read import visium as read_visium # type: ignore
sample_path = output_dir / args.dataset
source_img = None
if args.include_hires_tiff:
# Only pass if we actually downloaded it
potential_img = sample_path / "image.tif"
if potential_img.exists():
source_img = potential_img
print("Reading dataset into AnnData …")
adata = (
read_visium(sample_path, source_image_path=source_img)
if source_img is not None
else read_visium(sample_path)
)
h5ad_path = output_dir / f"{args.dataset}.h5ad"
print(f"Writing {h5ad_path} …")
# Use write_h5ad for clarity; overwrites if exists
adata.write_h5ad(h5ad_path)
print("h5ad save complete!")
except (ImportError, FileNotFoundError, OSError, RuntimeError, ValueError) as e:
print(
f"\nError while creating h5ad (use --no-h5ad to skip): {e}",
file=sys.stderr,
)
sys.exit(1)
except (RuntimeError, OSError) as e:
print(f"\nError downloading dataset: {e}", file=sys.stderr)
sys.exit(1)
if __name__ == "__main__":
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment