Last active
November 7, 2025 18:50
-
-
Save aaronwolen/eb6278b2481a4088c161f2e23f5a8199 to your computer and use it in GitHub Desktop.
Download 10X Visium output files for squidpy example datasets.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #!/usr/bin/env python3 | |
| """ | |
| Download 10X Visium datasets using squidpy. | |
| This tool wraps squidpy's internal visium() function to provide an easy way | |
| to download and extract 10X Genomics Visium spatial transcriptomics datasets. | |
| """ | |
| from __future__ import annotations | |
| import argparse | |
| import os | |
| import sys | |
| import typing | |
| import warnings | |
| from pathlib import Path | |
| # Suppress warnings from squidpy imports | |
| warnings.filterwarnings("ignore") | |
| # Import squidpy's visium function | |
| try: | |
| from squidpy.datasets._10x_datasets import VisiumDatasets # type: ignore | |
| # Extract valid dataset choices | |
| VALID_DATASETS = list(typing.get_args(VisiumDatasets)) | |
| except ImportError: | |
| print("Error: squidpy must be installed to use this tool.") | |
| sys.exit(1) | |
| def download_visium_dataset( | |
| sample_id: VisiumDatasets, | |
| *, | |
| include_hires_tiff: bool = False, | |
| base_dir: os.PathLike[str], | |
| ) -> None: | |
| """ | |
| Download Visium `datasets <https://support.10xgenomics.com/spatial-gene-expression/datasets>`_ from *10x Genomics*. | |
| Modified version of squidpy's visium() function that only downloads the 10X | |
| data without reading it into an AnnData object. | |
| Parameters | |
| ---------- | |
| sample_id | |
| Name of the Visium dataset. | |
| include_hires_tiff | |
| Whether to download the high-resolution tissue image. | |
| base_dir | |
| Directory where to download the data. | |
| Returns | |
| ------- | |
| None | |
| """ | |
| import tarfile | |
| from squidpy._constants._constants import TenxVersions # type: ignore | |
| from squidpy.datasets._10x_datasets import VisiumFiles | |
| from squidpy.datasets._utils import check_presence_download # type: ignore | |
| if sample_id.startswith("V1_"): | |
| spaceranger_version = TenxVersions.V1 | |
| elif sample_id.startswith("Targeted_") or sample_id.startswith("Parent_"): | |
| spaceranger_version = TenxVersions.V2 | |
| else: | |
| spaceranger_version = TenxVersions.V3 | |
| base_dir = Path(base_dir) | |
| sample_dir = base_dir / sample_id | |
| sample_dir.mkdir(exist_ok=True, parents=True) | |
| url_prefix = f"https://cf.10xgenomics.com/samples/spatial-exp/{spaceranger_version}/{sample_id}/" | |
| visium_files = VisiumFiles( | |
| f"{sample_id}_filtered_feature_bc_matrix.h5", | |
| f"{sample_id}_spatial.tar.gz", | |
| f"{sample_id}_image.tif", | |
| ) | |
| # download spatial data | |
| tar_pth = sample_dir / visium_files.spatial_attrs | |
| check_presence_download(filename=tar_pth, backup_url=url_prefix + visium_files.spatial_attrs) | |
| try: | |
| with tarfile.open(tar_pth) as f: | |
| for el in f: | |
| if not (sample_dir / el.name).exists(): | |
| f.extract(el, sample_dir) | |
| except (tarfile.TarError, OSError) as e: | |
| raise RuntimeError(f"Failed to extract spatial data tarball: {e}") from e | |
| else: | |
| tar_pth.unlink() # remove tarball after extraction | |
| # download counts | |
| check_presence_download( | |
| filename=sample_dir / "filtered_feature_bc_matrix.h5", | |
| backup_url=url_prefix + visium_files.feature_matrix, | |
| ) | |
| if include_hires_tiff: # download image | |
| check_presence_download( | |
| filename=sample_dir / "image.tif", | |
| backup_url=url_prefix + visium_files.tif_image, | |
| ) | |
| def main() -> None: | |
| """Main entry point for the CLI.""" | |
| parser = argparse.ArgumentParser( | |
| description="Download 10X Visium spatial datasets", | |
| formatter_class=argparse.RawDescriptionHelpFormatter, | |
| epilog=""" | |
| Examples: | |
| # Download a dataset to the current directory | |
| %(prog)s V1_Human_Heart | |
| # Download to a specific directory | |
| %(prog)s V1_Human_Heart --output-dir /path/to/datasets | |
| # Include high-resolution TIFF image | |
| %(prog)s V1_Human_Heart --include-hires-tiff | |
| # Also creates V1_Human_Heart.h5ad next to the folder by default | |
| # Disable h5ad writing if you only want raw 10x outputs | |
| %(prog)s V1_Human_Heart --no-h5ad | |
| # List all available datasets | |
| %(prog)s --list-datasets | |
| """, | |
| ) | |
| parser.add_argument( | |
| "dataset", | |
| nargs="?", | |
| help="Name of Visium dataset to download. Use --list-datasets to see valid options.", | |
| choices=VALID_DATASETS, | |
| metavar="DATASET", | |
| ) | |
| parser.add_argument( | |
| "-o", | |
| "--output-dir", | |
| type=Path, | |
| default=Path.cwd(), | |
| help="Directory where the dataset will be downloaded (default: current directory)", | |
| ) | |
| parser.add_argument( | |
| "--include-hires-tiff", | |
| action="store_true", | |
| help="Download the high-resolution tissue section image (larger file size)", | |
| ) | |
| parser.add_argument( | |
| "--no-h5ad", | |
| action="store_true", | |
| help="Do not read with squidpy nor write the consolidated {DATASET}.h5ad file (default: write)", | |
| ) | |
| parser.add_argument( | |
| "-l", | |
| "--list-datasets", | |
| action="store_true", | |
| help="List all available dataset IDs and exit", | |
| ) | |
| args = parser.parse_args() | |
| # Handle --list-datasets | |
| if args.list_datasets: | |
| print("Available Visium datasets:\n") | |
| for ds in VALID_DATASETS: | |
| print(f" {ds}") | |
| return | |
| # Require dataset_id if not listing datasets | |
| if not args.dataset: | |
| parser.error("the following arguments are required: DATASET") | |
| # Create output directory if it doesn't exist | |
| output_dir = args.output_dir.expanduser().absolute() | |
| output_dir.mkdir(parents=True, exist_ok=True) | |
| print(f"Downloading dataset: {args.dataset}") | |
| print(f"Output directory: {output_dir}") | |
| if args.include_hires_tiff: | |
| print("Including high-resolution TIFF image") | |
| try: | |
| download_visium_dataset( | |
| sample_id=args.dataset, | |
| include_hires_tiff=args.include_hires_tiff, | |
| base_dir=output_dir, | |
| ) | |
| print("\nDownload complete!") | |
| print(f"Dataset location: {output_dir / args.dataset}") | |
| # Load into AnnData via squidpy and write an .h5ad next to the folder | |
| if not args.no_h5ad: | |
| try: | |
| from squidpy.read._read import visium as read_visium # type: ignore | |
| sample_path = output_dir / args.dataset | |
| source_img = None | |
| if args.include_hires_tiff: | |
| # Only pass if we actually downloaded it | |
| potential_img = sample_path / "image.tif" | |
| if potential_img.exists(): | |
| source_img = potential_img | |
| print("Reading dataset into AnnData …") | |
| adata = ( | |
| read_visium(sample_path, source_image_path=source_img) | |
| if source_img is not None | |
| else read_visium(sample_path) | |
| ) | |
| h5ad_path = output_dir / f"{args.dataset}.h5ad" | |
| print(f"Writing {h5ad_path} …") | |
| # Use write_h5ad for clarity; overwrites if exists | |
| adata.write_h5ad(h5ad_path) | |
| print("h5ad save complete!") | |
| except (ImportError, FileNotFoundError, OSError, RuntimeError, ValueError) as e: | |
| print( | |
| f"\nError while creating h5ad (use --no-h5ad to skip): {e}", | |
| file=sys.stderr, | |
| ) | |
| sys.exit(1) | |
| except (RuntimeError, OSError) as e: | |
| print(f"\nError downloading dataset: {e}", file=sys.stderr) | |
| sys.exit(1) | |
| if __name__ == "__main__": | |
| main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment