Last active
November 25, 2025 01:44
-
-
Save rwcitek/ba1f0eabd82245be17f36e09d8f34397 to your computer and use it in GitHub Desktop.
imputing-missing-not-at-random.ipynb
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| { | |
| "cells": [ | |
| { | |
| "cell_type": "markdown", | |
| "metadata": { | |
| "id": "view-in-github", | |
| "colab_type": "text" | |
| }, | |
| "source": [ | |
| "<a href=\"https://colab.research.google.com/gist/rwcitek/ba1f0eabd82245be17f36e09d8f34397/imputing-missing-not-at-random.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 17, | |
| "metadata": { | |
| "id": "lIYdn1woOS1n" | |
| }, | |
| "outputs": [], | |
| "source": [ | |
| "import pandas as pd\n", | |
| "import numpy as np\n" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "source": [ | |
| "# 1. Create a sample dataset to simulate your data\n", | |
| "np.random.seed(42)\n", | |
| "df = pd.DataFrame()\n", | |
| "df['Target'] = np.random.randint(0, 2, 10)\n", | |
| "df['Feature_A'] = np.random.rand(10)\n", | |
| "df['Beta_Feature'] = np.array([0.1, 0.2, np.nan, 0.7, 0.9, np.nan, 0.3, 0.4, np.nan, 0.8])\n", | |
| "df\n" | |
| ], | |
| "metadata": { | |
| "id": "9oEwobfHlU6c", | |
| "outputId": "59725800-8dcf-4c6f-a0c5-c814f7f6812f", | |
| "colab": { | |
| "base_uri": "https://localhost:8080/", | |
| "height": 363 | |
| } | |
| }, | |
| "execution_count": 18, | |
| "outputs": [ | |
| { | |
| "output_type": "execute_result", | |
| "data": { | |
| "text/plain": [ | |
| " Target Feature_A Beta_Feature\n", | |
| "0 0 0.155995 0.1\n", | |
| "1 1 0.058084 0.2\n", | |
| "2 0 0.866176 NaN\n", | |
| "3 0 0.601115 0.7\n", | |
| "4 0 0.708073 0.9\n", | |
| "5 1 0.020584 NaN\n", | |
| "6 0 0.969910 0.3\n", | |
| "7 0 0.832443 0.4\n", | |
| "8 0 0.212339 NaN\n", | |
| "9 1 0.181825 0.8" | |
| ], | |
| "text/html": [ | |
| "\n", | |
| " <div id=\"df-d0529cd6-c8f7-4f26-a39c-5d50d504c738\" class=\"colab-df-container\">\n", | |
| " <div>\n", | |
| "<style scoped>\n", | |
| " .dataframe tbody tr th:only-of-type {\n", | |
| " vertical-align: middle;\n", | |
| " }\n", | |
| "\n", | |
| " .dataframe tbody tr th {\n", | |
| " vertical-align: top;\n", | |
| " }\n", | |
| "\n", | |
| " .dataframe thead th {\n", | |
| " text-align: right;\n", | |
| " }\n", | |
| "</style>\n", | |
| "<table border=\"1\" class=\"dataframe\">\n", | |
| " <thead>\n", | |
| " <tr style=\"text-align: right;\">\n", | |
| " <th></th>\n", | |
| " <th>Target</th>\n", | |
| " <th>Feature_A</th>\n", | |
| " <th>Beta_Feature</th>\n", | |
| " </tr>\n", | |
| " </thead>\n", | |
| " <tbody>\n", | |
| " <tr>\n", | |
| " <th>0</th>\n", | |
| " <td>0</td>\n", | |
| " <td>0.155995</td>\n", | |
| " <td>0.1</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>1</th>\n", | |
| " <td>1</td>\n", | |
| " <td>0.058084</td>\n", | |
| " <td>0.2</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>2</th>\n", | |
| " <td>0</td>\n", | |
| " <td>0.866176</td>\n", | |
| " <td>NaN</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>3</th>\n", | |
| " <td>0</td>\n", | |
| " <td>0.601115</td>\n", | |
| " <td>0.7</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>4</th>\n", | |
| " <td>0</td>\n", | |
| " <td>0.708073</td>\n", | |
| " <td>0.9</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>5</th>\n", | |
| " <td>1</td>\n", | |
| " <td>0.020584</td>\n", | |
| " <td>NaN</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>6</th>\n", | |
| " <td>0</td>\n", | |
| " <td>0.969910</td>\n", | |
| " <td>0.3</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>7</th>\n", | |
| " <td>0</td>\n", | |
| " <td>0.832443</td>\n", | |
| " <td>0.4</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>8</th>\n", | |
| " <td>0</td>\n", | |
| " <td>0.212339</td>\n", | |
| " <td>NaN</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>9</th>\n", | |
| " <td>1</td>\n", | |
| " <td>0.181825</td>\n", | |
| " <td>0.8</td>\n", | |
| " </tr>\n", | |
| " </tbody>\n", | |
| "</table>\n", | |
| "</div>\n", | |
| " <div class=\"colab-df-buttons\">\n", | |
| "\n", | |
| " <div class=\"colab-df-container\">\n", | |
| " <button class=\"colab-df-convert\" onclick=\"convertToInteractive('df-d0529cd6-c8f7-4f26-a39c-5d50d504c738')\"\n", | |
| " title=\"Convert this dataframe to an interactive table.\"\n", | |
| " style=\"display:none;\">\n", | |
| "\n", | |
| " <svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\" viewBox=\"0 -960 960 960\">\n", | |
| " <path d=\"M120-120v-720h720v720H120Zm60-500h600v-160H180v160Zm220 220h160v-160H400v160Zm0 220h160v-160H400v160ZM180-400h160v-160H180v160Zm440 0h160v-160H620v160ZM180-180h160v-160H180v160Zm440 0h160v-160H620v160Z\"/>\n", | |
| " </svg>\n", | |
| " </button>\n", | |
| "\n", | |
| " <style>\n", | |
| " .colab-df-container {\n", | |
| " display:flex;\n", | |
| " gap: 12px;\n", | |
| " }\n", | |
| "\n", | |
| " .colab-df-convert {\n", | |
| " background-color: #E8F0FE;\n", | |
| " border: none;\n", | |
| " border-radius: 50%;\n", | |
| " cursor: pointer;\n", | |
| " display: none;\n", | |
| " fill: #1967D2;\n", | |
| " height: 32px;\n", | |
| " padding: 0 0 0 0;\n", | |
| " width: 32px;\n", | |
| " }\n", | |
| "\n", | |
| " .colab-df-convert:hover {\n", | |
| " background-color: #E2EBFA;\n", | |
| " box-shadow: 0px 1px 2px rgba(60, 64, 67, 0.3), 0px 1px 3px 1px rgba(60, 64, 67, 0.15);\n", | |
| " fill: #174EA6;\n", | |
| " }\n", | |
| "\n", | |
| " .colab-df-buttons div {\n", | |
| " margin-bottom: 4px;\n", | |
| " }\n", | |
| "\n", | |
| " [theme=dark] .colab-df-convert {\n", | |
| " background-color: #3B4455;\n", | |
| " fill: #D2E3FC;\n", | |
| " }\n", | |
| "\n", | |
| " [theme=dark] .colab-df-convert:hover {\n", | |
| " background-color: #434B5C;\n", | |
| " box-shadow: 0px 1px 3px 1px rgba(0, 0, 0, 0.15);\n", | |
| " filter: drop-shadow(0px 1px 2px rgba(0, 0, 0, 0.3));\n", | |
| " fill: #FFFFFF;\n", | |
| " }\n", | |
| " </style>\n", | |
| "\n", | |
| " <script>\n", | |
| " const buttonEl =\n", | |
| " document.querySelector('#df-d0529cd6-c8f7-4f26-a39c-5d50d504c738 button.colab-df-convert');\n", | |
| " buttonEl.style.display =\n", | |
| " google.colab.kernel.accessAllowed ? 'block' : 'none';\n", | |
| "\n", | |
| " async function convertToInteractive(key) {\n", | |
| " const element = document.querySelector('#df-d0529cd6-c8f7-4f26-a39c-5d50d504c738');\n", | |
| " const dataTable =\n", | |
| " await google.colab.kernel.invokeFunction('convertToInteractive',\n", | |
| " [key], {});\n", | |
| " if (!dataTable) return;\n", | |
| "\n", | |
| " const docLinkHtml = 'Like what you see? Visit the ' +\n", | |
| " '<a target=\"_blank\" href=https://colab.research.google.com/notebooks/data_table.ipynb>data table notebook</a>'\n", | |
| " + ' to learn more about interactive tables.';\n", | |
| " element.innerHTML = '';\n", | |
| " dataTable['output_type'] = 'display_data';\n", | |
| " await google.colab.output.renderOutput(dataTable, element);\n", | |
| " const docLink = document.createElement('div');\n", | |
| " docLink.innerHTML = docLinkHtml;\n", | |
| " element.appendChild(docLink);\n", | |
| " }\n", | |
| " </script>\n", | |
| " </div>\n", | |
| "\n", | |
| "\n", | |
| " <div id=\"df-af3438a0-61b4-4ab2-8b1d-0d9a3e47fc15\">\n", | |
| " <button class=\"colab-df-quickchart\" onclick=\"quickchart('df-af3438a0-61b4-4ab2-8b1d-0d9a3e47fc15')\"\n", | |
| " title=\"Suggest charts\"\n", | |
| " style=\"display:none;\">\n", | |
| "\n", | |
| "<svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\"viewBox=\"0 0 24 24\"\n", | |
| " width=\"24px\">\n", | |
| " <g>\n", | |
| " <path d=\"M19 3H5c-1.1 0-2 .9-2 2v14c0 1.1.9 2 2 2h14c1.1 0 2-.9 2-2V5c0-1.1-.9-2-2-2zM9 17H7v-7h2v7zm4 0h-2V7h2v10zm4 0h-2v-4h2v4z\"/>\n", | |
| " </g>\n", | |
| "</svg>\n", | |
| " </button>\n", | |
| "\n", | |
| "<style>\n", | |
| " .colab-df-quickchart {\n", | |
| " --bg-color: #E8F0FE;\n", | |
| " --fill-color: #1967D2;\n", | |
| " --hover-bg-color: #E2EBFA;\n", | |
| " --hover-fill-color: #174EA6;\n", | |
| " --disabled-fill-color: #AAA;\n", | |
| " --disabled-bg-color: #DDD;\n", | |
| " }\n", | |
| "\n", | |
| " [theme=dark] .colab-df-quickchart {\n", | |
| " --bg-color: #3B4455;\n", | |
| " --fill-color: #D2E3FC;\n", | |
| " --hover-bg-color: #434B5C;\n", | |
| " --hover-fill-color: #FFFFFF;\n", | |
| " --disabled-bg-color: #3B4455;\n", | |
| " --disabled-fill-color: #666;\n", | |
| " }\n", | |
| "\n", | |
| " .colab-df-quickchart {\n", | |
| " background-color: var(--bg-color);\n", | |
| " border: none;\n", | |
| " border-radius: 50%;\n", | |
| " cursor: pointer;\n", | |
| " display: none;\n", | |
| " fill: var(--fill-color);\n", | |
| " height: 32px;\n", | |
| " padding: 0;\n", | |
| " width: 32px;\n", | |
| " }\n", | |
| "\n", | |
| " .colab-df-quickchart:hover {\n", | |
| " background-color: var(--hover-bg-color);\n", | |
| " box-shadow: 0 1px 2px rgba(60, 64, 67, 0.3), 0 1px 3px 1px rgba(60, 64, 67, 0.15);\n", | |
| " fill: var(--button-hover-fill-color);\n", | |
| " }\n", | |
| "\n", | |
| " .colab-df-quickchart-complete:disabled,\n", | |
| " .colab-df-quickchart-complete:disabled:hover {\n", | |
| " background-color: var(--disabled-bg-color);\n", | |
| " fill: var(--disabled-fill-color);\n", | |
| " box-shadow: none;\n", | |
| " }\n", | |
| "\n", | |
| " .colab-df-spinner {\n", | |
| " border: 2px solid var(--fill-color);\n", | |
| " border-color: transparent;\n", | |
| " border-bottom-color: var(--fill-color);\n", | |
| " animation:\n", | |
| " spin 1s steps(1) infinite;\n", | |
| " }\n", | |
| "\n", | |
| " @keyframes spin {\n", | |
| " 0% {\n", | |
| " border-color: transparent;\n", | |
| " border-bottom-color: var(--fill-color);\n", | |
| " border-left-color: var(--fill-color);\n", | |
| " }\n", | |
| " 20% {\n", | |
| " border-color: transparent;\n", | |
| " border-left-color: var(--fill-color);\n", | |
| " border-top-color: var(--fill-color);\n", | |
| " }\n", | |
| " 30% {\n", | |
| " border-color: transparent;\n", | |
| " border-left-color: var(--fill-color);\n", | |
| " border-top-color: var(--fill-color);\n", | |
| " border-right-color: var(--fill-color);\n", | |
| " }\n", | |
| " 40% {\n", | |
| " border-color: transparent;\n", | |
| " border-right-color: var(--fill-color);\n", | |
| " border-top-color: var(--fill-color);\n", | |
| " }\n", | |
| " 60% {\n", | |
| " border-color: transparent;\n", | |
| " border-right-color: var(--fill-color);\n", | |
| " }\n", | |
| " 80% {\n", | |
| " border-color: transparent;\n", | |
| " border-right-color: var(--fill-color);\n", | |
| " border-bottom-color: var(--fill-color);\n", | |
| " }\n", | |
| " 90% {\n", | |
| " border-color: transparent;\n", | |
| " border-bottom-color: var(--fill-color);\n", | |
| " }\n", | |
| " }\n", | |
| "</style>\n", | |
| "\n", | |
| " <script>\n", | |
| " async function quickchart(key) {\n", | |
| " const quickchartButtonEl =\n", | |
| " document.querySelector('#' + key + ' button');\n", | |
| " quickchartButtonEl.disabled = true; // To prevent multiple clicks.\n", | |
| " quickchartButtonEl.classList.add('colab-df-spinner');\n", | |
| " try {\n", | |
| " const charts = await google.colab.kernel.invokeFunction(\n", | |
| " 'suggestCharts', [key], {});\n", | |
| " } catch (error) {\n", | |
| " console.error('Error during call to suggestCharts:', error);\n", | |
| " }\n", | |
| " quickchartButtonEl.classList.remove('colab-df-spinner');\n", | |
| " quickchartButtonEl.classList.add('colab-df-quickchart-complete');\n", | |
| " }\n", | |
| " (() => {\n", | |
| " let quickchartButtonEl =\n", | |
| " document.querySelector('#df-af3438a0-61b4-4ab2-8b1d-0d9a3e47fc15 button');\n", | |
| " quickchartButtonEl.style.display =\n", | |
| " google.colab.kernel.accessAllowed ? 'block' : 'none';\n", | |
| " })();\n", | |
| " </script>\n", | |
| " </div>\n", | |
| "\n", | |
| " <div id=\"id_68a90d63-2248-4542-9c58-a31113598d92\">\n", | |
| " <style>\n", | |
| " .colab-df-generate {\n", | |
| " background-color: #E8F0FE;\n", | |
| " border: none;\n", | |
| " border-radius: 50%;\n", | |
| " cursor: pointer;\n", | |
| " display: none;\n", | |
| " fill: #1967D2;\n", | |
| " height: 32px;\n", | |
| " padding: 0 0 0 0;\n", | |
| " width: 32px;\n", | |
| " }\n", | |
| "\n", | |
| " .colab-df-generate:hover {\n", | |
| " background-color: #E2EBFA;\n", | |
| " box-shadow: 0px 1px 2px rgba(60, 64, 67, 0.3), 0px 1px 3px 1px rgba(60, 64, 67, 0.15);\n", | |
| " fill: #174EA6;\n", | |
| " }\n", | |
| "\n", | |
| " [theme=dark] .colab-df-generate {\n", | |
| " background-color: #3B4455;\n", | |
| " fill: #D2E3FC;\n", | |
| " }\n", | |
| "\n", | |
| " [theme=dark] .colab-df-generate:hover {\n", | |
| " background-color: #434B5C;\n", | |
| " box-shadow: 0px 1px 3px 1px rgba(0, 0, 0, 0.15);\n", | |
| " filter: drop-shadow(0px 1px 2px rgba(0, 0, 0, 0.3));\n", | |
| " fill: #FFFFFF;\n", | |
| " }\n", | |
| " </style>\n", | |
| " <button class=\"colab-df-generate\" onclick=\"generateWithVariable('df')\"\n", | |
| " title=\"Generate code using this dataframe.\"\n", | |
| " style=\"display:none;\">\n", | |
| "\n", | |
| " <svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\"viewBox=\"0 0 24 24\"\n", | |
| " width=\"24px\">\n", | |
| " <path d=\"M7,19H8.4L18.45,9,17,7.55,7,17.6ZM5,21V16.75L18.45,3.32a2,2,0,0,1,2.83,0l1.4,1.43a1.91,1.91,0,0,1,.58,1.4,1.91,1.91,0,0,1-.58,1.4L9.25,21ZM18.45,9,17,7.55Zm-12,3A5.31,5.31,0,0,0,4.9,8.1,5.31,5.31,0,0,0,1,6.5,5.31,5.31,0,0,0,4.9,4.9,5.31,5.31,0,0,0,6.5,1,5.31,5.31,0,0,0,8.1,4.9,5.31,5.31,0,0,0,12,6.5,5.46,5.46,0,0,0,6.5,12Z\"/>\n", | |
| " </svg>\n", | |
| " </button>\n", | |
| " <script>\n", | |
| " (() => {\n", | |
| " const buttonEl =\n", | |
| " document.querySelector('#id_68a90d63-2248-4542-9c58-a31113598d92 button.colab-df-generate');\n", | |
| " buttonEl.style.display =\n", | |
| " google.colab.kernel.accessAllowed ? 'block' : 'none';\n", | |
| "\n", | |
| " buttonEl.onclick = () => {\n", | |
| " google.colab.notebook.generateWithVariable('df');\n", | |
| " }\n", | |
| " })();\n", | |
| " </script>\n", | |
| " </div>\n", | |
| "\n", | |
| " </div>\n", | |
| " </div>\n" | |
| ], | |
| "application/vnd.google.colaboratory.intrinsic+json": { | |
| "type": "dataframe", | |
| "variable_name": "df", | |
| "summary": "{\n \"name\": \"df\",\n \"rows\": 10,\n \"fields\": [\n {\n \"column\": \"Target\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 0,\n \"min\": 0,\n \"max\": 1,\n \"num_unique_values\": 2,\n \"samples\": [\n 1,\n 0\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"Feature_A\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 0.3698211542093973,\n \"min\": 0.020584494295802447,\n \"max\": 0.9699098521619943,\n \"num_unique_values\": 10,\n \"samples\": [\n 0.21233911067827616,\n 0.05808361216819946\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"Beta_Feature\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 0.3132015933791495,\n \"min\": 0.1,\n \"max\": 0.9,\n \"num_unique_values\": 7,\n \"samples\": [\n 0.1,\n 0.2\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n }\n ]\n}" | |
| } | |
| }, | |
| "metadata": {}, | |
| "execution_count": 18 | |
| } | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "source": [ | |
| "# Define the column to impute and the arbitrary value\n", | |
| "COLUMN_NAME = 'Beta_Feature'\n", | |
| "ARBITRARY_VALUE = -999 # Safe choice for a 0-1 range feature\n" | |
| ], | |
| "metadata": { | |
| "id": "dgRuOYQblemm" | |
| }, | |
| "execution_count": 19, | |
| "outputs": [] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "source": [ | |
| "# Create the new binary indicator column\n", | |
| "indicator_column_name = f'{COLUMN_NAME}_has_value'\n", | |
| "df[indicator_column_name] = df[COLUMN_NAME].notnull().astype(\"int8\")\n", | |
| "df\n" | |
| ], | |
| "metadata": { | |
| "id": "S3hvozP7lXso", | |
| "outputId": "6b7eee7d-06fd-4ec6-9fb9-2724d33dc4c9", | |
| "colab": { | |
| "base_uri": "https://localhost:8080/", | |
| "height": 363 | |
| } | |
| }, | |
| "execution_count": 20, | |
| "outputs": [ | |
| { | |
| "output_type": "execute_result", | |
| "data": { | |
| "text/plain": [ | |
| " Target Feature_A Beta_Feature Beta_Feature_has_value\n", | |
| "0 0 0.155995 0.1 1\n", | |
| "1 1 0.058084 0.2 1\n", | |
| "2 0 0.866176 NaN 0\n", | |
| "3 0 0.601115 0.7 1\n", | |
| "4 0 0.708073 0.9 1\n", | |
| "5 1 0.020584 NaN 0\n", | |
| "6 0 0.969910 0.3 1\n", | |
| "7 0 0.832443 0.4 1\n", | |
| "8 0 0.212339 NaN 0\n", | |
| "9 1 0.181825 0.8 1" | |
| ], | |
| "text/html": [ | |
| "\n", | |
| " <div id=\"df-2a6aa544-f486-4a5e-93dd-e7cb9ff678f1\" class=\"colab-df-container\">\n", | |
| " <div>\n", | |
| "<style scoped>\n", | |
| " .dataframe tbody tr th:only-of-type {\n", | |
| " vertical-align: middle;\n", | |
| " }\n", | |
| "\n", | |
| " .dataframe tbody tr th {\n", | |
| " vertical-align: top;\n", | |
| " }\n", | |
| "\n", | |
| " .dataframe thead th {\n", | |
| " text-align: right;\n", | |
| " }\n", | |
| "</style>\n", | |
| "<table border=\"1\" class=\"dataframe\">\n", | |
| " <thead>\n", | |
| " <tr style=\"text-align: right;\">\n", | |
| " <th></th>\n", | |
| " <th>Target</th>\n", | |
| " <th>Feature_A</th>\n", | |
| " <th>Beta_Feature</th>\n", | |
| " <th>Beta_Feature_has_value</th>\n", | |
| " </tr>\n", | |
| " </thead>\n", | |
| " <tbody>\n", | |
| " <tr>\n", | |
| " <th>0</th>\n", | |
| " <td>0</td>\n", | |
| " <td>0.155995</td>\n", | |
| " <td>0.1</td>\n", | |
| " <td>1</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>1</th>\n", | |
| " <td>1</td>\n", | |
| " <td>0.058084</td>\n", | |
| " <td>0.2</td>\n", | |
| " <td>1</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>2</th>\n", | |
| " <td>0</td>\n", | |
| " <td>0.866176</td>\n", | |
| " <td>NaN</td>\n", | |
| " <td>0</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>3</th>\n", | |
| " <td>0</td>\n", | |
| " <td>0.601115</td>\n", | |
| " <td>0.7</td>\n", | |
| " <td>1</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>4</th>\n", | |
| " <td>0</td>\n", | |
| " <td>0.708073</td>\n", | |
| " <td>0.9</td>\n", | |
| " <td>1</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>5</th>\n", | |
| " <td>1</td>\n", | |
| " <td>0.020584</td>\n", | |
| " <td>NaN</td>\n", | |
| " <td>0</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>6</th>\n", | |
| " <td>0</td>\n", | |
| " <td>0.969910</td>\n", | |
| " <td>0.3</td>\n", | |
| " <td>1</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>7</th>\n", | |
| " <td>0</td>\n", | |
| " <td>0.832443</td>\n", | |
| " <td>0.4</td>\n", | |
| " <td>1</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>8</th>\n", | |
| " <td>0</td>\n", | |
| " <td>0.212339</td>\n", | |
| " <td>NaN</td>\n", | |
| " <td>0</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>9</th>\n", | |
| " <td>1</td>\n", | |
| " <td>0.181825</td>\n", | |
| " <td>0.8</td>\n", | |
| " <td>1</td>\n", | |
| " </tr>\n", | |
| " </tbody>\n", | |
| "</table>\n", | |
| "</div>\n", | |
| " <div class=\"colab-df-buttons\">\n", | |
| "\n", | |
| " <div class=\"colab-df-container\">\n", | |
| " <button class=\"colab-df-convert\" onclick=\"convertToInteractive('df-2a6aa544-f486-4a5e-93dd-e7cb9ff678f1')\"\n", | |
| " title=\"Convert this dataframe to an interactive table.\"\n", | |
| " style=\"display:none;\">\n", | |
| "\n", | |
| " <svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\" viewBox=\"0 -960 960 960\">\n", | |
| " <path d=\"M120-120v-720h720v720H120Zm60-500h600v-160H180v160Zm220 220h160v-160H400v160Zm0 220h160v-160H400v160ZM180-400h160v-160H180v160Zm440 0h160v-160H620v160ZM180-180h160v-160H180v160Zm440 0h160v-160H620v160Z\"/>\n", | |
| " </svg>\n", | |
| " </button>\n", | |
| "\n", | |
| " <style>\n", | |
| " .colab-df-container {\n", | |
| " display:flex;\n", | |
| " gap: 12px;\n", | |
| " }\n", | |
| "\n", | |
| " .colab-df-convert {\n", | |
| " background-color: #E8F0FE;\n", | |
| " border: none;\n", | |
| " border-radius: 50%;\n", | |
| " cursor: pointer;\n", | |
| " display: none;\n", | |
| " fill: #1967D2;\n", | |
| " height: 32px;\n", | |
| " padding: 0 0 0 0;\n", | |
| " width: 32px;\n", | |
| " }\n", | |
| "\n", | |
| " .colab-df-convert:hover {\n", | |
| " background-color: #E2EBFA;\n", | |
| " box-shadow: 0px 1px 2px rgba(60, 64, 67, 0.3), 0px 1px 3px 1px rgba(60, 64, 67, 0.15);\n", | |
| " fill: #174EA6;\n", | |
| " }\n", | |
| "\n", | |
| " .colab-df-buttons div {\n", | |
| " margin-bottom: 4px;\n", | |
| " }\n", | |
| "\n", | |
| " [theme=dark] .colab-df-convert {\n", | |
| " background-color: #3B4455;\n", | |
| " fill: #D2E3FC;\n", | |
| " }\n", | |
| "\n", | |
| " [theme=dark] .colab-df-convert:hover {\n", | |
| " background-color: #434B5C;\n", | |
| " box-shadow: 0px 1px 3px 1px rgba(0, 0, 0, 0.15);\n", | |
| " filter: drop-shadow(0px 1px 2px rgba(0, 0, 0, 0.3));\n", | |
| " fill: #FFFFFF;\n", | |
| " }\n", | |
| " </style>\n", | |
| "\n", | |
| " <script>\n", | |
| " const buttonEl =\n", | |
| " document.querySelector('#df-2a6aa544-f486-4a5e-93dd-e7cb9ff678f1 button.colab-df-convert');\n", | |
| " buttonEl.style.display =\n", | |
| " google.colab.kernel.accessAllowed ? 'block' : 'none';\n", | |
| "\n", | |
| " async function convertToInteractive(key) {\n", | |
| " const element = document.querySelector('#df-2a6aa544-f486-4a5e-93dd-e7cb9ff678f1');\n", | |
| " const dataTable =\n", | |
| " await google.colab.kernel.invokeFunction('convertToInteractive',\n", | |
| " [key], {});\n", | |
| " if (!dataTable) return;\n", | |
| "\n", | |
| " const docLinkHtml = 'Like what you see? Visit the ' +\n", | |
| " '<a target=\"_blank\" href=https://colab.research.google.com/notebooks/data_table.ipynb>data table notebook</a>'\n", | |
| " + ' to learn more about interactive tables.';\n", | |
| " element.innerHTML = '';\n", | |
| " dataTable['output_type'] = 'display_data';\n", | |
| " await google.colab.output.renderOutput(dataTable, element);\n", | |
| " const docLink = document.createElement('div');\n", | |
| " docLink.innerHTML = docLinkHtml;\n", | |
| " element.appendChild(docLink);\n", | |
| " }\n", | |
| " </script>\n", | |
| " </div>\n", | |
| "\n", | |
| "\n", | |
| " <div id=\"df-d6fc00be-be78-4191-bb58-8bc4a77c21d0\">\n", | |
| " <button class=\"colab-df-quickchart\" onclick=\"quickchart('df-d6fc00be-be78-4191-bb58-8bc4a77c21d0')\"\n", | |
| " title=\"Suggest charts\"\n", | |
| " style=\"display:none;\">\n", | |
| "\n", | |
| "<svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\"viewBox=\"0 0 24 24\"\n", | |
| " width=\"24px\">\n", | |
| " <g>\n", | |
| " <path d=\"M19 3H5c-1.1 0-2 .9-2 2v14c0 1.1.9 2 2 2h14c1.1 0 2-.9 2-2V5c0-1.1-.9-2-2-2zM9 17H7v-7h2v7zm4 0h-2V7h2v10zm4 0h-2v-4h2v4z\"/>\n", | |
| " </g>\n", | |
| "</svg>\n", | |
| " </button>\n", | |
| "\n", | |
| "<style>\n", | |
| " .colab-df-quickchart {\n", | |
| " --bg-color: #E8F0FE;\n", | |
| " --fill-color: #1967D2;\n", | |
| " --hover-bg-color: #E2EBFA;\n", | |
| " --hover-fill-color: #174EA6;\n", | |
| " --disabled-fill-color: #AAA;\n", | |
| " --disabled-bg-color: #DDD;\n", | |
| " }\n", | |
| "\n", | |
| " [theme=dark] .colab-df-quickchart {\n", | |
| " --bg-color: #3B4455;\n", | |
| " --fill-color: #D2E3FC;\n", | |
| " --hover-bg-color: #434B5C;\n", | |
| " --hover-fill-color: #FFFFFF;\n", | |
| " --disabled-bg-color: #3B4455;\n", | |
| " --disabled-fill-color: #666;\n", | |
| " }\n", | |
| "\n", | |
| " .colab-df-quickchart {\n", | |
| " background-color: var(--bg-color);\n", | |
| " border: none;\n", | |
| " border-radius: 50%;\n", | |
| " cursor: pointer;\n", | |
| " display: none;\n", | |
| " fill: var(--fill-color);\n", | |
| " height: 32px;\n", | |
| " padding: 0;\n", | |
| " width: 32px;\n", | |
| " }\n", | |
| "\n", | |
| " .colab-df-quickchart:hover {\n", | |
| " background-color: var(--hover-bg-color);\n", | |
| " box-shadow: 0 1px 2px rgba(60, 64, 67, 0.3), 0 1px 3px 1px rgba(60, 64, 67, 0.15);\n", | |
| " fill: var(--button-hover-fill-color);\n", | |
| " }\n", | |
| "\n", | |
| " .colab-df-quickchart-complete:disabled,\n", | |
| " .colab-df-quickchart-complete:disabled:hover {\n", | |
| " background-color: var(--disabled-bg-color);\n", | |
| " fill: var(--disabled-fill-color);\n", | |
| " box-shadow: none;\n", | |
| " }\n", | |
| "\n", | |
| " .colab-df-spinner {\n", | |
| " border: 2px solid var(--fill-color);\n", | |
| " border-color: transparent;\n", | |
| " border-bottom-color: var(--fill-color);\n", | |
| " animation:\n", | |
| " spin 1s steps(1) infinite;\n", | |
| " }\n", | |
| "\n", | |
| " @keyframes spin {\n", | |
| " 0% {\n", | |
| " border-color: transparent;\n", | |
| " border-bottom-color: var(--fill-color);\n", | |
| " border-left-color: var(--fill-color);\n", | |
| " }\n", | |
| " 20% {\n", | |
| " border-color: transparent;\n", | |
| " border-left-color: var(--fill-color);\n", | |
| " border-top-color: var(--fill-color);\n", | |
| " }\n", | |
| " 30% {\n", | |
| " border-color: transparent;\n", | |
| " border-left-color: var(--fill-color);\n", | |
| " border-top-color: var(--fill-color);\n", | |
| " border-right-color: var(--fill-color);\n", | |
| " }\n", | |
| " 40% {\n", | |
| " border-color: transparent;\n", | |
| " border-right-color: var(--fill-color);\n", | |
| " border-top-color: var(--fill-color);\n", | |
| " }\n", | |
| " 60% {\n", | |
| " border-color: transparent;\n", | |
| " border-right-color: var(--fill-color);\n", | |
| " }\n", | |
| " 80% {\n", | |
| " border-color: transparent;\n", | |
| " border-right-color: var(--fill-color);\n", | |
| " border-bottom-color: var(--fill-color);\n", | |
| " }\n", | |
| " 90% {\n", | |
| " border-color: transparent;\n", | |
| " border-bottom-color: var(--fill-color);\n", | |
| " }\n", | |
| " }\n", | |
| "</style>\n", | |
| "\n", | |
| " <script>\n", | |
| " async function quickchart(key) {\n", | |
| " const quickchartButtonEl =\n", | |
| " document.querySelector('#' + key + ' button');\n", | |
| " quickchartButtonEl.disabled = true; // To prevent multiple clicks.\n", | |
| " quickchartButtonEl.classList.add('colab-df-spinner');\n", | |
| " try {\n", | |
| " const charts = await google.colab.kernel.invokeFunction(\n", | |
| " 'suggestCharts', [key], {});\n", | |
| " } catch (error) {\n", | |
| " console.error('Error during call to suggestCharts:', error);\n", | |
| " }\n", | |
| " quickchartButtonEl.classList.remove('colab-df-spinner');\n", | |
| " quickchartButtonEl.classList.add('colab-df-quickchart-complete');\n", | |
| " }\n", | |
| " (() => {\n", | |
| " let quickchartButtonEl =\n", | |
| " document.querySelector('#df-d6fc00be-be78-4191-bb58-8bc4a77c21d0 button');\n", | |
| " quickchartButtonEl.style.display =\n", | |
| " google.colab.kernel.accessAllowed ? 'block' : 'none';\n", | |
| " })();\n", | |
| " </script>\n", | |
| " </div>\n", | |
| "\n", | |
| " <div id=\"id_9e21f89f-2ba4-4ac1-a3e8-79045c1b6b9e\">\n", | |
| " <style>\n", | |
| " .colab-df-generate {\n", | |
| " background-color: #E8F0FE;\n", | |
| " border: none;\n", | |
| " border-radius: 50%;\n", | |
| " cursor: pointer;\n", | |
| " display: none;\n", | |
| " fill: #1967D2;\n", | |
| " height: 32px;\n", | |
| " padding: 0 0 0 0;\n", | |
| " width: 32px;\n", | |
| " }\n", | |
| "\n", | |
| " .colab-df-generate:hover {\n", | |
| " background-color: #E2EBFA;\n", | |
| " box-shadow: 0px 1px 2px rgba(60, 64, 67, 0.3), 0px 1px 3px 1px rgba(60, 64, 67, 0.15);\n", | |
| " fill: #174EA6;\n", | |
| " }\n", | |
| "\n", | |
| " [theme=dark] .colab-df-generate {\n", | |
| " background-color: #3B4455;\n", | |
| " fill: #D2E3FC;\n", | |
| " }\n", | |
| "\n", | |
| " [theme=dark] .colab-df-generate:hover {\n", | |
| " background-color: #434B5C;\n", | |
| " box-shadow: 0px 1px 3px 1px rgba(0, 0, 0, 0.15);\n", | |
| " filter: drop-shadow(0px 1px 2px rgba(0, 0, 0, 0.3));\n", | |
| " fill: #FFFFFF;\n", | |
| " }\n", | |
| " </style>\n", | |
| " <button class=\"colab-df-generate\" onclick=\"generateWithVariable('df')\"\n", | |
| " title=\"Generate code using this dataframe.\"\n", | |
| " style=\"display:none;\">\n", | |
| "\n", | |
| " <svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\"viewBox=\"0 0 24 24\"\n", | |
| " width=\"24px\">\n", | |
| " <path d=\"M7,19H8.4L18.45,9,17,7.55,7,17.6ZM5,21V16.75L18.45,3.32a2,2,0,0,1,2.83,0l1.4,1.43a1.91,1.91,0,0,1,.58,1.4,1.91,1.91,0,0,1-.58,1.4L9.25,21ZM18.45,9,17,7.55Zm-12,3A5.31,5.31,0,0,0,4.9,8.1,5.31,5.31,0,0,0,1,6.5,5.31,5.31,0,0,0,4.9,4.9,5.31,5.31,0,0,0,6.5,1,5.31,5.31,0,0,0,8.1,4.9,5.31,5.31,0,0,0,12,6.5,5.46,5.46,0,0,0,6.5,12Z\"/>\n", | |
| " </svg>\n", | |
| " </button>\n", | |
| " <script>\n", | |
| " (() => {\n", | |
| " const buttonEl =\n", | |
| " document.querySelector('#id_9e21f89f-2ba4-4ac1-a3e8-79045c1b6b9e button.colab-df-generate');\n", | |
| " buttonEl.style.display =\n", | |
| " google.colab.kernel.accessAllowed ? 'block' : 'none';\n", | |
| "\n", | |
| " buttonEl.onclick = () => {\n", | |
| " google.colab.notebook.generateWithVariable('df');\n", | |
| " }\n", | |
| " })();\n", | |
| " </script>\n", | |
| " </div>\n", | |
| "\n", | |
| " </div>\n", | |
| " </div>\n" | |
| ], | |
| "application/vnd.google.colaboratory.intrinsic+json": { | |
| "type": "dataframe", | |
| "variable_name": "df", | |
| "summary": "{\n \"name\": \"df\",\n \"rows\": 10,\n \"fields\": [\n {\n \"column\": \"Target\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 0,\n \"min\": 0,\n \"max\": 1,\n \"num_unique_values\": 2,\n \"samples\": [\n 1,\n 0\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"Feature_A\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 0.3698211542093973,\n \"min\": 0.020584494295802447,\n \"max\": 0.9699098521619943,\n \"num_unique_values\": 10,\n \"samples\": [\n 0.21233911067827616,\n 0.05808361216819946\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"Beta_Feature\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 0.3132015933791495,\n \"min\": 0.1,\n \"max\": 0.9,\n \"num_unique_values\": 7,\n \"samples\": [\n 0.1,\n 0.2\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"Beta_Feature_has_value\",\n \"properties\": {\n \"dtype\": \"int8\",\n \"num_unique_values\": 2,\n \"samples\": [\n 0,\n 1\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n }\n ]\n}" | |
| } | |
| }, | |
| "metadata": {}, | |
| "execution_count": 20 | |
| } | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "source": [ | |
| "# Impute the missing values with the arbitrary value\n", | |
| "df.fillna({COLUMN_NAME: ARBITRARY_VALUE}, inplace=True)\n", | |
| "df" | |
| ], | |
| "metadata": { | |
| "id": "85xK9wzslsDO", | |
| "outputId": "35f2fbac-7dbc-4468-d4c7-5f45a0d2fea8", | |
| "colab": { | |
| "base_uri": "https://localhost:8080/", | |
| "height": 363 | |
| } | |
| }, | |
| "execution_count": 21, | |
| "outputs": [ | |
| { | |
| "output_type": "execute_result", | |
| "data": { | |
| "text/plain": [ | |
| " Target Feature_A Beta_Feature Beta_Feature_has_value\n", | |
| "0 0 0.155995 0.1 1\n", | |
| "1 1 0.058084 0.2 1\n", | |
| "2 0 0.866176 -999.0 0\n", | |
| "3 0 0.601115 0.7 1\n", | |
| "4 0 0.708073 0.9 1\n", | |
| "5 1 0.020584 -999.0 0\n", | |
| "6 0 0.969910 0.3 1\n", | |
| "7 0 0.832443 0.4 1\n", | |
| "8 0 0.212339 -999.0 0\n", | |
| "9 1 0.181825 0.8 1" | |
| ], | |
| "text/html": [ | |
| "\n", | |
| " <div id=\"df-997a21f3-2b63-4f9f-bd6b-e8f84ef0b33a\" class=\"colab-df-container\">\n", | |
| " <div>\n", | |
| "<style scoped>\n", | |
| " .dataframe tbody tr th:only-of-type {\n", | |
| " vertical-align: middle;\n", | |
| " }\n", | |
| "\n", | |
| " .dataframe tbody tr th {\n", | |
| " vertical-align: top;\n", | |
| " }\n", | |
| "\n", | |
| " .dataframe thead th {\n", | |
| " text-align: right;\n", | |
| " }\n", | |
| "</style>\n", | |
| "<table border=\"1\" class=\"dataframe\">\n", | |
| " <thead>\n", | |
| " <tr style=\"text-align: right;\">\n", | |
| " <th></th>\n", | |
| " <th>Target</th>\n", | |
| " <th>Feature_A</th>\n", | |
| " <th>Beta_Feature</th>\n", | |
| " <th>Beta_Feature_has_value</th>\n", | |
| " </tr>\n", | |
| " </thead>\n", | |
| " <tbody>\n", | |
| " <tr>\n", | |
| " <th>0</th>\n", | |
| " <td>0</td>\n", | |
| " <td>0.155995</td>\n", | |
| " <td>0.1</td>\n", | |
| " <td>1</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>1</th>\n", | |
| " <td>1</td>\n", | |
| " <td>0.058084</td>\n", | |
| " <td>0.2</td>\n", | |
| " <td>1</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>2</th>\n", | |
| " <td>0</td>\n", | |
| " <td>0.866176</td>\n", | |
| " <td>-999.0</td>\n", | |
| " <td>0</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>3</th>\n", | |
| " <td>0</td>\n", | |
| " <td>0.601115</td>\n", | |
| " <td>0.7</td>\n", | |
| " <td>1</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>4</th>\n", | |
| " <td>0</td>\n", | |
| " <td>0.708073</td>\n", | |
| " <td>0.9</td>\n", | |
| " <td>1</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>5</th>\n", | |
| " <td>1</td>\n", | |
| " <td>0.020584</td>\n", | |
| " <td>-999.0</td>\n", | |
| " <td>0</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>6</th>\n", | |
| " <td>0</td>\n", | |
| " <td>0.969910</td>\n", | |
| " <td>0.3</td>\n", | |
| " <td>1</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>7</th>\n", | |
| " <td>0</td>\n", | |
| " <td>0.832443</td>\n", | |
| " <td>0.4</td>\n", | |
| " <td>1</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>8</th>\n", | |
| " <td>0</td>\n", | |
| " <td>0.212339</td>\n", | |
| " <td>-999.0</td>\n", | |
| " <td>0</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>9</th>\n", | |
| " <td>1</td>\n", | |
| " <td>0.181825</td>\n", | |
| " <td>0.8</td>\n", | |
| " <td>1</td>\n", | |
| " </tr>\n", | |
| " </tbody>\n", | |
| "</table>\n", | |
| "</div>\n", | |
| " <div class=\"colab-df-buttons\">\n", | |
| "\n", | |
| " <div class=\"colab-df-container\">\n", | |
| " <button class=\"colab-df-convert\" onclick=\"convertToInteractive('df-997a21f3-2b63-4f9f-bd6b-e8f84ef0b33a')\"\n", | |
| " title=\"Convert this dataframe to an interactive table.\"\n", | |
| " style=\"display:none;\">\n", | |
| "\n", | |
| " <svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\" viewBox=\"0 -960 960 960\">\n", | |
| " <path d=\"M120-120v-720h720v720H120Zm60-500h600v-160H180v160Zm220 220h160v-160H400v160Zm0 220h160v-160H400v160ZM180-400h160v-160H180v160Zm440 0h160v-160H620v160ZM180-180h160v-160H180v160Zm440 0h160v-160H620v160Z\"/>\n", | |
| " </svg>\n", | |
| " </button>\n", | |
| "\n", | |
| " <style>\n", | |
| " .colab-df-container {\n", | |
| " display:flex;\n", | |
| " gap: 12px;\n", | |
| " }\n", | |
| "\n", | |
| " .colab-df-convert {\n", | |
| " background-color: #E8F0FE;\n", | |
| " border: none;\n", | |
| " border-radius: 50%;\n", | |
| " cursor: pointer;\n", | |
| " display: none;\n", | |
| " fill: #1967D2;\n", | |
| " height: 32px;\n", | |
| " padding: 0 0 0 0;\n", | |
| " width: 32px;\n", | |
| " }\n", | |
| "\n", | |
| " .colab-df-convert:hover {\n", | |
| " background-color: #E2EBFA;\n", | |
| " box-shadow: 0px 1px 2px rgba(60, 64, 67, 0.3), 0px 1px 3px 1px rgba(60, 64, 67, 0.15);\n", | |
| " fill: #174EA6;\n", | |
| " }\n", | |
| "\n", | |
| " .colab-df-buttons div {\n", | |
| " margin-bottom: 4px;\n", | |
| " }\n", | |
| "\n", | |
| " [theme=dark] .colab-df-convert {\n", | |
| " background-color: #3B4455;\n", | |
| " fill: #D2E3FC;\n", | |
| " }\n", | |
| "\n", | |
| " [theme=dark] .colab-df-convert:hover {\n", | |
| " background-color: #434B5C;\n", | |
| " box-shadow: 0px 1px 3px 1px rgba(0, 0, 0, 0.15);\n", | |
| " filter: drop-shadow(0px 1px 2px rgba(0, 0, 0, 0.3));\n", | |
| " fill: #FFFFFF;\n", | |
| " }\n", | |
| " </style>\n", | |
| "\n", | |
| " <script>\n", | |
| " const buttonEl =\n", | |
| " document.querySelector('#df-997a21f3-2b63-4f9f-bd6b-e8f84ef0b33a button.colab-df-convert');\n", | |
| " buttonEl.style.display =\n", | |
| " google.colab.kernel.accessAllowed ? 'block' : 'none';\n", | |
| "\n", | |
| " async function convertToInteractive(key) {\n", | |
| " const element = document.querySelector('#df-997a21f3-2b63-4f9f-bd6b-e8f84ef0b33a');\n", | |
| " const dataTable =\n", | |
| " await google.colab.kernel.invokeFunction('convertToInteractive',\n", | |
| " [key], {});\n", | |
| " if (!dataTable) return;\n", | |
| "\n", | |
| " const docLinkHtml = 'Like what you see? Visit the ' +\n", | |
| " '<a target=\"_blank\" href=https://colab.research.google.com/notebooks/data_table.ipynb>data table notebook</a>'\n", | |
| " + ' to learn more about interactive tables.';\n", | |
| " element.innerHTML = '';\n", | |
| " dataTable['output_type'] = 'display_data';\n", | |
| " await google.colab.output.renderOutput(dataTable, element);\n", | |
| " const docLink = document.createElement('div');\n", | |
| " docLink.innerHTML = docLinkHtml;\n", | |
| " element.appendChild(docLink);\n", | |
| " }\n", | |
| " </script>\n", | |
| " </div>\n", | |
| "\n", | |
| "\n", | |
| " <div id=\"df-0b30a286-d761-4ea9-b0a4-b28caba8b623\">\n", | |
| " <button class=\"colab-df-quickchart\" onclick=\"quickchart('df-0b30a286-d761-4ea9-b0a4-b28caba8b623')\"\n", | |
| " title=\"Suggest charts\"\n", | |
| " style=\"display:none;\">\n", | |
| "\n", | |
| "<svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\"viewBox=\"0 0 24 24\"\n", | |
| " width=\"24px\">\n", | |
| " <g>\n", | |
| " <path d=\"M19 3H5c-1.1 0-2 .9-2 2v14c0 1.1.9 2 2 2h14c1.1 0 2-.9 2-2V5c0-1.1-.9-2-2-2zM9 17H7v-7h2v7zm4 0h-2V7h2v10zm4 0h-2v-4h2v4z\"/>\n", | |
| " </g>\n", | |
| "</svg>\n", | |
| " </button>\n", | |
| "\n", | |
| "<style>\n", | |
| " .colab-df-quickchart {\n", | |
| " --bg-color: #E8F0FE;\n", | |
| " --fill-color: #1967D2;\n", | |
| " --hover-bg-color: #E2EBFA;\n", | |
| " --hover-fill-color: #174EA6;\n", | |
| " --disabled-fill-color: #AAA;\n", | |
| " --disabled-bg-color: #DDD;\n", | |
| " }\n", | |
| "\n", | |
| " [theme=dark] .colab-df-quickchart {\n", | |
| " --bg-color: #3B4455;\n", | |
| " --fill-color: #D2E3FC;\n", | |
| " --hover-bg-color: #434B5C;\n", | |
| " --hover-fill-color: #FFFFFF;\n", | |
| " --disabled-bg-color: #3B4455;\n", | |
| " --disabled-fill-color: #666;\n", | |
| " }\n", | |
| "\n", | |
| " .colab-df-quickchart {\n", | |
| " background-color: var(--bg-color);\n", | |
| " border: none;\n", | |
| " border-radius: 50%;\n", | |
| " cursor: pointer;\n", | |
| " display: none;\n", | |
| " fill: var(--fill-color);\n", | |
| " height: 32px;\n", | |
| " padding: 0;\n", | |
| " width: 32px;\n", | |
| " }\n", | |
| "\n", | |
| " .colab-df-quickchart:hover {\n", | |
| " background-color: var(--hover-bg-color);\n", | |
| " box-shadow: 0 1px 2px rgba(60, 64, 67, 0.3), 0 1px 3px 1px rgba(60, 64, 67, 0.15);\n", | |
| " fill: var(--button-hover-fill-color);\n", | |
| " }\n", | |
| "\n", | |
| " .colab-df-quickchart-complete:disabled,\n", | |
| " .colab-df-quickchart-complete:disabled:hover {\n", | |
| " background-color: var(--disabled-bg-color);\n", | |
| " fill: var(--disabled-fill-color);\n", | |
| " box-shadow: none;\n", | |
| " }\n", | |
| "\n", | |
| " .colab-df-spinner {\n", | |
| " border: 2px solid var(--fill-color);\n", | |
| " border-color: transparent;\n", | |
| " border-bottom-color: var(--fill-color);\n", | |
| " animation:\n", | |
| " spin 1s steps(1) infinite;\n", | |
| " }\n", | |
| "\n", | |
| " @keyframes spin {\n", | |
| " 0% {\n", | |
| " border-color: transparent;\n", | |
| " border-bottom-color: var(--fill-color);\n", | |
| " border-left-color: var(--fill-color);\n", | |
| " }\n", | |
| " 20% {\n", | |
| " border-color: transparent;\n", | |
| " border-left-color: var(--fill-color);\n", | |
| " border-top-color: var(--fill-color);\n", | |
| " }\n", | |
| " 30% {\n", | |
| " border-color: transparent;\n", | |
| " border-left-color: var(--fill-color);\n", | |
| " border-top-color: var(--fill-color);\n", | |
| " border-right-color: var(--fill-color);\n", | |
| " }\n", | |
| " 40% {\n", | |
| " border-color: transparent;\n", | |
| " border-right-color: var(--fill-color);\n", | |
| " border-top-color: var(--fill-color);\n", | |
| " }\n", | |
| " 60% {\n", | |
| " border-color: transparent;\n", | |
| " border-right-color: var(--fill-color);\n", | |
| " }\n", | |
| " 80% {\n", | |
| " border-color: transparent;\n", | |
| " border-right-color: var(--fill-color);\n", | |
| " border-bottom-color: var(--fill-color);\n", | |
| " }\n", | |
| " 90% {\n", | |
| " border-color: transparent;\n", | |
| " border-bottom-color: var(--fill-color);\n", | |
| " }\n", | |
| " }\n", | |
| "</style>\n", | |
| "\n", | |
| " <script>\n", | |
| " async function quickchart(key) {\n", | |
| " const quickchartButtonEl =\n", | |
| " document.querySelector('#' + key + ' button');\n", | |
| " quickchartButtonEl.disabled = true; // To prevent multiple clicks.\n", | |
| " quickchartButtonEl.classList.add('colab-df-spinner');\n", | |
| " try {\n", | |
| " const charts = await google.colab.kernel.invokeFunction(\n", | |
| " 'suggestCharts', [key], {});\n", | |
| " } catch (error) {\n", | |
| " console.error('Error during call to suggestCharts:', error);\n", | |
| " }\n", | |
| " quickchartButtonEl.classList.remove('colab-df-spinner');\n", | |
| " quickchartButtonEl.classList.add('colab-df-quickchart-complete');\n", | |
| " }\n", | |
| " (() => {\n", | |
| " let quickchartButtonEl =\n", | |
| " document.querySelector('#df-0b30a286-d761-4ea9-b0a4-b28caba8b623 button');\n", | |
| " quickchartButtonEl.style.display =\n", | |
| " google.colab.kernel.accessAllowed ? 'block' : 'none';\n", | |
| " })();\n", | |
| " </script>\n", | |
| " </div>\n", | |
| "\n", | |
| " <div id=\"id_923dd2d0-803f-40b8-b268-34c74cc9e9df\">\n", | |
| " <style>\n", | |
| " .colab-df-generate {\n", | |
| " background-color: #E8F0FE;\n", | |
| " border: none;\n", | |
| " border-radius: 50%;\n", | |
| " cursor: pointer;\n", | |
| " display: none;\n", | |
| " fill: #1967D2;\n", | |
| " height: 32px;\n", | |
| " padding: 0 0 0 0;\n", | |
| " width: 32px;\n", | |
| " }\n", | |
| "\n", | |
| " .colab-df-generate:hover {\n", | |
| " background-color: #E2EBFA;\n", | |
| " box-shadow: 0px 1px 2px rgba(60, 64, 67, 0.3), 0px 1px 3px 1px rgba(60, 64, 67, 0.15);\n", | |
| " fill: #174EA6;\n", | |
| " }\n", | |
| "\n", | |
| " [theme=dark] .colab-df-generate {\n", | |
| " background-color: #3B4455;\n", | |
| " fill: #D2E3FC;\n", | |
| " }\n", | |
| "\n", | |
| " [theme=dark] .colab-df-generate:hover {\n", | |
| " background-color: #434B5C;\n", | |
| " box-shadow: 0px 1px 3px 1px rgba(0, 0, 0, 0.15);\n", | |
| " filter: drop-shadow(0px 1px 2px rgba(0, 0, 0, 0.3));\n", | |
| " fill: #FFFFFF;\n", | |
| " }\n", | |
| " </style>\n", | |
| " <button class=\"colab-df-generate\" onclick=\"generateWithVariable('df')\"\n", | |
| " title=\"Generate code using this dataframe.\"\n", | |
| " style=\"display:none;\">\n", | |
| "\n", | |
| " <svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\"viewBox=\"0 0 24 24\"\n", | |
| " width=\"24px\">\n", | |
| " <path d=\"M7,19H8.4L18.45,9,17,7.55,7,17.6ZM5,21V16.75L18.45,3.32a2,2,0,0,1,2.83,0l1.4,1.43a1.91,1.91,0,0,1,.58,1.4,1.91,1.91,0,0,1-.58,1.4L9.25,21ZM18.45,9,17,7.55Zm-12,3A5.31,5.31,0,0,0,4.9,8.1,5.31,5.31,0,0,0,1,6.5,5.31,5.31,0,0,0,4.9,4.9,5.31,5.31,0,0,0,6.5,1,5.31,5.31,0,0,0,8.1,4.9,5.31,5.31,0,0,0,12,6.5,5.46,5.46,0,0,0,6.5,12Z\"/>\n", | |
| " </svg>\n", | |
| " </button>\n", | |
| " <script>\n", | |
| " (() => {\n", | |
| " const buttonEl =\n", | |
| " document.querySelector('#id_923dd2d0-803f-40b8-b268-34c74cc9e9df button.colab-df-generate');\n", | |
| " buttonEl.style.display =\n", | |
| " google.colab.kernel.accessAllowed ? 'block' : 'none';\n", | |
| "\n", | |
| " buttonEl.onclick = () => {\n", | |
| " google.colab.notebook.generateWithVariable('df');\n", | |
| " }\n", | |
| " })();\n", | |
| " </script>\n", | |
| " </div>\n", | |
| "\n", | |
| " </div>\n", | |
| " </div>\n" | |
| ], | |
| "application/vnd.google.colaboratory.intrinsic+json": { | |
| "type": "dataframe", | |
| "variable_name": "df", | |
| "summary": "{\n \"name\": \"df\",\n \"rows\": 10,\n \"fields\": [\n {\n \"column\": \"Target\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 0,\n \"min\": 0,\n \"max\": 1,\n \"num_unique_values\": 2,\n \"samples\": [\n 1,\n 0\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"Feature_A\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 0.3698211542093973,\n \"min\": 0.020584494295802447,\n \"max\": 0.9699098521619943,\n \"num_unique_values\": 10,\n \"samples\": [\n 0.21233911067827616,\n 0.05808361216819946\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"Beta_Feature\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 482.7975356652563,\n \"min\": -999.0,\n \"max\": 0.9,\n \"num_unique_values\": 8,\n \"samples\": [\n 0.2,\n 0.3\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"Beta_Feature_has_value\",\n \"properties\": {\n \"dtype\": \"int8\",\n \"num_unique_values\": 2,\n \"samples\": [\n 0,\n 1\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n }\n ]\n}" | |
| } | |
| }, | |
| "metadata": {}, | |
| "execution_count": 21 | |
| } | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "source": [], | |
| "metadata": { | |
| "id": "F2xb2pBnmg2o" | |
| }, | |
| "execution_count": 21, | |
| "outputs": [] | |
| } | |
| ], | |
| "metadata": { | |
| "colab": { | |
| "provenance": [], | |
| "include_colab_link": true | |
| }, | |
| "kernelspec": { | |
| "display_name": "Python 3", | |
| "name": "python3" | |
| } | |
| }, | |
| "nbformat": 4, | |
| "nbformat_minor": 0 | |
| } |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment