Use the code below to download and process the smaller variant of the Imagenet dataset.
Credit: https://github.com/basavyr/insta_exp/blob/main/src/utils.py#L204
def download_and_prepare_tiny_imagenet():
base_name = "tiny-imagenet-200"
url = f"http://cs231n.stanford.edu/{base_name}.zip"
zip_path = os.path.join(DATASETS_DIR, f"{base_name}.zip")
extract_path = os.path.join(DATASETS_DIR, base_name)
# Move train and val images into a single ImageFolder-style directory
tiny_imagenet_dir = os.path.join(
DATASETS_DIR, DEFAULT_TINY_IMAGENET_IMAGE_FOLDER)
os.makedirs(tiny_imagenet_dir, exist_ok=True)
print(f"Downloading Tiny ImageNet to {zip_path} ...")
urllib.request.urlretrieve(url, zip_path)
print("Extracting...")
with zipfile.ZipFile(zip_path, 'r') as zip_ref:
zip_ref.extractall(DATASETS_DIR)
# Move train images
train_dir = os.path.join(extract_path, "train")
for class_name in os.listdir(train_dir):
class_dir = os.path.join(train_dir, class_name, "images")
target_class_dir = os.path.join(tiny_imagenet_dir, class_name)
os.makedirs(target_class_dir, exist_ok=True)
for img in os.listdir(class_dir):
shutil.copy(os.path.join(class_dir, img),
os.path.join(target_class_dir, img))
# Move val images
val_dir = os.path.join(extract_path, "val")
val_annotations = os.path.join(val_dir, "val_annotations.txt")
val_img_dir = os.path.join(val_dir, "images")
with open(val_annotations, 'r') as f:
for line in f:
img, class_name, *_ = line.strip().split('\t')
target_class_dir = os.path.join(tiny_imagenet_dir, class_name)
os.makedirs(target_class_dir, exist_ok=True)
shutil.copy(os.path.join(val_img_dir, img),
os.path.join(target_class_dir, img))
print(f"Tiny ImageNet prepared at {tiny_imagenet_dir}")
return tiny_imagenet_dir