basavyr/download_and_prepare_tiny_imagenet.md

## download_and_prepare_tiny_imagenet.md

      
    Raw
  

              download_and_prepare_tiny_imagenet.md
            
          
    Use the code below to download and process the smaller variant of the Imagenet dataset.
Credit: https://github.com/basavyr/insta_exp/blob/main/src/utils.py#L204
def download_and_prepare_tiny_imagenet():
    base_name = "tiny-imagenet-200"
    url = f"http://cs231n.stanford.edu/{base_name}.zip"
    zip_path = os.path.join(DATASETS_DIR, f"{base_name}.zip")
    extract_path = os.path.join(DATASETS_DIR, base_name)

    # Move train and val images into a single ImageFolder-style directory
    tiny_imagenet_dir = os.path.join(
        DATASETS_DIR, DEFAULT_TINY_IMAGENET_IMAGE_FOLDER)
    os.makedirs(tiny_imagenet_dir, exist_ok=True)

    print(f"Downloading Tiny ImageNet to {zip_path} ...")
    urllib.request.urlretrieve(url, zip_path)

    print("Extracting...")
    with zipfile.ZipFile(zip_path, 'r') as zip_ref:
        zip_ref.extractall(DATASETS_DIR)

    # Move train images
    train_dir = os.path.join(extract_path, "train")
    for class_name in os.listdir(train_dir):
        class_dir = os.path.join(train_dir, class_name, "images")
        target_class_dir = os.path.join(tiny_imagenet_dir, class_name)
        os.makedirs(target_class_dir, exist_ok=True)
        for img in os.listdir(class_dir):
            shutil.copy(os.path.join(class_dir, img),
                        os.path.join(target_class_dir, img))
    # Move val images
    val_dir = os.path.join(extract_path, "val")
    val_annotations = os.path.join(val_dir, "val_annotations.txt")
    val_img_dir = os.path.join(val_dir, "images")
    with open(val_annotations, 'r') as f:
        for line in f:
            img, class_name, *_ = line.strip().split('\t')
            target_class_dir = os.path.join(tiny_imagenet_dir, class_name)
            os.makedirs(target_class_dir, exist_ok=True)
            shutil.copy(os.path.join(val_img_dir, img),
                        os.path.join(target_class_dir, img))

    print(f"Tiny ImageNet prepared at {tiny_imagenet_dir}")

    return tiny_imagenet_dir
No results found