Created
October 5, 2025 16:59
-
-
Save parsaM110/8d492a177741b84f119b9e40fc32014e to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| { | |
| "nbformat": 4, | |
| "nbformat_minor": 0, | |
| "metadata": { | |
| "colab": { | |
| "provenance": [] | |
| }, | |
| "kernelspec": { | |
| "name": "python3", | |
| "display_name": "Python 3" | |
| }, | |
| "language_info": { | |
| "name": "python" | |
| }, | |
| "accelerator": "GPU", | |
| "gpuClass": "standard" | |
| }, | |
| "cells": [ | |
| { | |
| "cell_type": "code", | |
| "execution_count": 1, | |
| "metadata": { | |
| "id": "IaY5IKw_2veM" | |
| }, | |
| "outputs": [], | |
| "source": [ | |
| "import numpy as np" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "source": [ | |
| "array_cpu = np.random.randint(0,255, size=(4000,4000))" | |
| ], | |
| "metadata": { | |
| "id": "JwvjHsHuFvtX" | |
| }, | |
| "execution_count": null, | |
| "outputs": [] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "source": [ | |
| "array_cpu.nbytes/1e6" | |
| ], | |
| "metadata": { | |
| "colab": { | |
| "base_uri": "https://localhost:8080/" | |
| }, | |
| "id": "fz6VkF3o3epe", | |
| "outputId": "5bc19755-72cf-4c36-ac5b-a6d173023453" | |
| }, | |
| "execution_count": 4, | |
| "outputs": [ | |
| { | |
| "output_type": "execute_result", | |
| "data": { | |
| "text/plain": [ | |
| "128.0" | |
| ] | |
| }, | |
| "metadata": {}, | |
| "execution_count": 4 | |
| } | |
| ] | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "source": [ | |
| "* **CuPy**: Drop-in NumPy replacement on GPU. Best when your workload is expressible via vectorized array ops (ufuncs, reductions, broadcasting) or uses libraries (cuBLAS/cuFFT/cuDNN). Very fast with minimal code. Also lets you write custom kernels via `ElementwiseKernel`, `ReductionKernel`, or `RawKernel` if needed.\n", | |
| "* **Numba (CUDA target)**: You write custom kernels with `@cuda.jit`. Great for **irregular** control flow, custom memory access patterns, or algorithms that don’t map cleanly to standard array ops. More control (shared memory, tiling, warps), more code. No automatic BLAS/FFT—those you’d call separately." | |
| ], | |
| "metadata": { | |
| "id": "DzoCRZ6FGETn" | |
| } | |
| }, | |
| { | |
| "cell_type": "code", | |
| "source": [ | |
| "import cupy as cp" | |
| ], | |
| "metadata": { | |
| "id": "hUEhBV0b3I-y" | |
| }, | |
| "execution_count": 2, | |
| "outputs": [] | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "source": [ | |
| "**cupy doesn't always mean faster**\n", | |
| "\n" | |
| ], | |
| "metadata": { | |
| "id": "vvff6z7wIlpq" | |
| } | |
| }, | |
| { | |
| "cell_type": "code", | |
| "source": [ | |
| "%%timeit\n", | |
| "\n", | |
| "np.asarray(array_cpu)" | |
| ], | |
| "metadata": { | |
| "colab": { | |
| "base_uri": "https://localhost:8080/" | |
| }, | |
| "id": "KkixcwBSZKBp", | |
| "outputId": "00eeb862-a630-4911-ad07-9bc59f9fcf0c" | |
| }, | |
| "execution_count": 9, | |
| "outputs": [ | |
| { | |
| "output_type": "stream", | |
| "name": "stdout", | |
| "text": [ | |
| "76.8 ns ± 29.8 ns per loop (mean ± std. dev. of 7 runs, 10000000 loops each)\n" | |
| ] | |
| } | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "source": [ | |
| "%%timeit\n", | |
| "\n", | |
| "cp.asarray(array_cpu)" | |
| ], | |
| "metadata": { | |
| "colab": { | |
| "base_uri": "https://localhost:8080/" | |
| }, | |
| "id": "RsYfRDzI33bz", | |
| "outputId": "7bec0218-f19b-4ae4-9d28-8f9535d0798a" | |
| }, | |
| "execution_count": 8, | |
| "outputs": [ | |
| { | |
| "output_type": "stream", | |
| "name": "stdout", | |
| "text": [ | |
| "24.2 ms ± 364 µs per loop (mean ± std. dev. of 7 runs, 10 loops each)\n" | |
| ] | |
| } | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "source": [ | |
| "type(array_gpu)" | |
| ], | |
| "metadata": { | |
| "colab": { | |
| "base_uri": "https://localhost:8080/" | |
| }, | |
| "id": "w4P3FpZU4x5C", | |
| "outputId": "ae6ccc30-800c-4847-bdba-50ba40341ca8" | |
| }, | |
| "execution_count": null, | |
| "outputs": [ | |
| { | |
| "output_type": "execute_result", | |
| "data": { | |
| "text/plain": [ | |
| "cupy.ndarray" | |
| ] | |
| }, | |
| "metadata": {}, | |
| "execution_count": 19 | |
| } | |
| ] | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "source": [ | |
| "**using cupy scipy**\n", | |
| "\n", | |
| "\n", | |
| "\n" | |
| ], | |
| "metadata": { | |
| "id": "cxiSPqH1Iz0i" | |
| } | |
| }, | |
| { | |
| "cell_type": "code", | |
| "source": [ | |
| "from scipy import fft" | |
| ], | |
| "metadata": { | |
| "id": "PPbiz1Y944zk" | |
| }, | |
| "execution_count": 11, | |
| "outputs": [] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "source": [ | |
| "%%timeit\n", | |
| "\n", | |
| "fft.fftn(array_cpu)" | |
| ], | |
| "metadata": { | |
| "colab": { | |
| "base_uri": "https://localhost:8080/" | |
| }, | |
| "id": "YW_5hsqN5Fa-", | |
| "outputId": "33d98f5a-224b-45a3-c8e1-742a438c63d5" | |
| }, | |
| "execution_count": 12, | |
| "outputs": [ | |
| { | |
| "output_type": "stream", | |
| "name": "stdout", | |
| "text": [ | |
| "373 ms ± 52.7 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)\n" | |
| ] | |
| } | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "source": [ | |
| "from cupyx.scipy import fft as fft_gpu" | |
| ], | |
| "metadata": { | |
| "id": "3sUQe8465eyH" | |
| }, | |
| "execution_count": 13, | |
| "outputs": [] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "source": [ | |
| "%%timeit\n", | |
| "\n", | |
| "fft_gpu.fftn(array_gpu)" | |
| ], | |
| "metadata": { | |
| "colab": { | |
| "base_uri": "https://localhost:8080/" | |
| }, | |
| "id": "XPVOi14w5vZ4", | |
| "outputId": "b7a96087-3e36-40c5-e71e-e916c1d5bece" | |
| }, | |
| "execution_count": 14, | |
| "outputs": [ | |
| { | |
| "output_type": "stream", | |
| "name": "stdout", | |
| "text": [ | |
| "The slowest run took 4.16 times longer than the fastest. This could mean that an intermediate result is being cached.\n", | |
| "120 µs ± 79.4 µs per loop (mean ± std. dev. of 7 runs, 1 loop each)\n" | |
| ] | |
| } | |
| ] | |
| } | |
| ] | |
| } |
Author
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
everything in colab is with ctrl + m +
jupyter commands