Last active
March 18, 2020 06:16
-
-
Save ashishpatel26/69cb721f0c7b1a2ec89f351f45e7d3af to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| { | |
| "nbformat": 4, | |
| "nbformat_minor": 0, | |
| "metadata": { | |
| "colab": { | |
| "name": "Stacking.ipynb", | |
| "provenance": [], | |
| "collapsed_sections": [], | |
| "include_colab_link": true | |
| }, | |
| "hide_input": false, | |
| "kernelspec": { | |
| "display_name": "Python 3", | |
| "language": "python", | |
| "name": "python3" | |
| }, | |
| "language_info": { | |
| "codemirror_mode": { | |
| "name": "ipython", | |
| "version": 3 | |
| }, | |
| "file_extension": ".py", | |
| "mimetype": "text/x-python", | |
| "name": "python", | |
| "nbconvert_exporter": "python", | |
| "pygments_lexer": "ipython3", | |
| "version": "3.7.6" | |
| }, | |
| "latex_envs": { | |
| "LaTeX_envs_menu_present": true, | |
| "autoclose": false, | |
| "autocomplete": true, | |
| "bibliofile": "biblio.bib", | |
| "cite_by": "apalike", | |
| "current_citInitial": 1, | |
| "eqLabelWithNumbers": true, | |
| "eqNumInitial": 1, | |
| "hotkeys": { | |
| "equation": "Ctrl-E", | |
| "itemize": "Ctrl-I" | |
| }, | |
| "labels_anchors": false, | |
| "latex_user_defs": false, | |
| "report_style_numbering": false, | |
| "user_envs_cfg": false | |
| }, | |
| "nbTranslate": { | |
| "displayLangs": [ | |
| "*" | |
| ], | |
| "hotkey": "alt-t", | |
| "langInMainMenu": true, | |
| "sourceLang": "en", | |
| "targetLang": "fr", | |
| "useGoogleTranslate": true | |
| }, | |
| "toc": { | |
| "base_numbering": 1, | |
| "nav_menu": {}, | |
| "number_sections": true, | |
| "sideBar": true, | |
| "skip_h1_title": false, | |
| "title_cell": "Table of Contents", | |
| "title_sidebar": "Contents", | |
| "toc_cell": false, | |
| "toc_position": {}, | |
| "toc_section_display": true, | |
| "toc_window_display": false | |
| }, | |
| "varInspector": { | |
| "cols": { | |
| "lenName": 16, | |
| "lenType": 16, | |
| "lenVar": 40 | |
| }, | |
| "kernels_config": { | |
| "python": { | |
| "delete_cmd_postfix": "", | |
| "delete_cmd_prefix": "del ", | |
| "library": "var_list.py", | |
| "varRefreshCmd": "print(var_dic_list())" | |
| }, | |
| "r": { | |
| "delete_cmd_postfix": ") ", | |
| "delete_cmd_prefix": "rm(", | |
| "library": "var_list.r", | |
| "varRefreshCmd": "cat(var_dic_list()) " | |
| } | |
| }, | |
| "types_to_exclude": [ | |
| "module", | |
| "function", | |
| "builtin_function_or_method", | |
| "instance", | |
| "_Feature" | |
| ], | |
| "window_display": false | |
| } | |
| }, | |
| "cells": [ | |
| { | |
| "cell_type": "markdown", | |
| "metadata": { | |
| "id": "view-in-github", | |
| "colab_type": "text" | |
| }, | |
| "source": [ | |
| "<a href=\"https://colab.research.google.com/github/ashishpatel26/Ensemble-Learning-Algorithm-Medium/blob/master/Stacking.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>" | |
| ] | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "metadata": { | |
| "colab_type": "text", | |
| "id": "COhWmSXqImMq" | |
| }, | |
| "source": [ | |
| "# Import IRIS dataset from sklearn" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "metadata": { | |
| "ExecuteTime": { | |
| "end_time": "2020-03-18T06:08:00.937732Z", | |
| "start_time": "2020-03-18T06:08:00.918743Z" | |
| }, | |
| "colab_type": "code", | |
| "id": "LmAaNZs-In8M", | |
| "colab": {} | |
| }, | |
| "source": [ | |
| "from sklearn import datasets" | |
| ], | |
| "execution_count": 0, | |
| "outputs": [] | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "metadata": { | |
| "colab_type": "text", | |
| "id": "ZG42X93cIqS-" | |
| }, | |
| "source": [ | |
| "#Impoert\tRandom\tforest\tLogistic\tregression,\tnaive\tbayes\tand\tknn\tclassifier\tclasses\tfor\tcreating\tstacking" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "metadata": { | |
| "ExecuteTime": { | |
| "end_time": "2020-03-18T06:08:01.042666Z", | |
| "start_time": "2020-03-18T06:08:01.032672Z" | |
| }, | |
| "colab_type": "code", | |
| "id": "gfrzPwfZIpD1", | |
| "colab": {} | |
| }, | |
| "source": [ | |
| "from sklearn.ensemble import RandomForestClassifier\n", | |
| "from sklearn.linear_model import LogisticRegression\n", | |
| "from sklearn.naive_bayes import GaussianNB\n", | |
| "from sklearn.neighbors import KNeighborsClassifier" | |
| ], | |
| "execution_count": 0, | |
| "outputs": [] | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "metadata": { | |
| "colab_type": "text", | |
| "id": "9QJhDmDaJYE7" | |
| }, | |
| "source": [ | |
| "# Import numpy for array based operations" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "metadata": { | |
| "ExecuteTime": { | |
| "end_time": "2020-03-18T06:08:01.294510Z", | |
| "start_time": "2020-03-18T06:08:01.140606Z" | |
| }, | |
| "colab_type": "code", | |
| "id": "puO_NK5LJWvT", | |
| "colab": {} | |
| }, | |
| "source": [ | |
| "import numpy as np" | |
| ], | |
| "execution_count": 0, | |
| "outputs": [] | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "metadata": { | |
| "colab_type": "text", | |
| "id": "gUjuiqWTJe2L" | |
| }, | |
| "source": [ | |
| "# Load the dataset" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "metadata": { | |
| "ExecuteTime": { | |
| "end_time": "2020-03-18T06:08:01.334485Z", | |
| "start_time": "2020-03-18T06:08:01.318495Z" | |
| }, | |
| "colab_type": "code", | |
| "id": "J5FFTMLiJdRK", | |
| "colab": {} | |
| }, | |
| "source": [ | |
| "iris = datasets.load_iris()" | |
| ], | |
| "execution_count": 0, | |
| "outputs": [] | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "metadata": { | |
| "colab_type": "text", | |
| "id": "cTcZm1U2Jmfb" | |
| }, | |
| "source": [ | |
| "# Extract data and target out of dataset" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "metadata": { | |
| "ExecuteTime": { | |
| "end_time": "2020-03-18T06:08:01.484896Z", | |
| "start_time": "2020-03-18T06:08:01.475902Z" | |
| }, | |
| "colab_type": "code", | |
| "id": "RuxlCFJrJlGx", | |
| "colab": {} | |
| }, | |
| "source": [ | |
| "X, y = iris.data[:, 1:3], iris.target" | |
| ], | |
| "execution_count": 0, | |
| "outputs": [] | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "metadata": { | |
| "colab_type": "text", | |
| "id": "Z5CHi94aJ8rw" | |
| }, | |
| "source": [ | |
| "# We will define a method to calculate accuracy of prericted output with known lables" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "metadata": { | |
| "ExecuteTime": { | |
| "end_time": "2020-03-18T06:08:01.644797Z", | |
| "start_time": "2020-03-18T06:08:01.640799Z" | |
| }, | |
| "colab_type": "code", | |
| "id": "a30zMP9XJyiR", | |
| "colab": {} | |
| }, | |
| "source": [ | |
| "def CalculateAccuracy(y_test, pred_label):\n", | |
| " nnz = np.shape(y_test)[0] - np.count_nonzero(pred_label - y_test)\n", | |
| " acc = 100*nnz / float(np.shape(y_test)[0])\n", | |
| " return acc" | |
| ], | |
| "execution_count": 0, | |
| "outputs": [] | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "metadata": { | |
| "colab_type": "text", | |
| "id": "T_ej07aBKYPo" | |
| }, | |
| "source": [ | |
| "#Create\ta\tKNN\tclassifier\twith\t2\tnearest\tneighbors" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "metadata": { | |
| "ExecuteTime": { | |
| "end_time": "2020-03-18T06:08:01.813692Z", | |
| "start_time": "2020-03-18T06:08:01.805697Z" | |
| }, | |
| "colab_type": "code", | |
| "id": "dbbkJNMRJ5Oo", | |
| "colab": {} | |
| }, | |
| "source": [ | |
| "clf1 = KNeighborsClassifier(n_neighbors=2)" | |
| ], | |
| "execution_count": 0, | |
| "outputs": [] | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "metadata": { | |
| "colab_type": "text", | |
| "id": "vIRqPjz1KcFs" | |
| }, | |
| "source": [ | |
| "#We\twill\tcreate\ta\trandom\tforest\tclassifier\twith\t2\tdecision\ttrees" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "metadata": { | |
| "ExecuteTime": { | |
| "end_time": "2020-03-18T06:08:02.003575Z", | |
| "start_time": "2020-03-18T06:08:01.978590Z" | |
| }, | |
| "colab_type": "code", | |
| "id": "ib8tYBIfKZuG", | |
| "colab": {} | |
| }, | |
| "source": [ | |
| "clf2 = RandomForestClassifier(n_estimators=2, random_state=1)" | |
| ], | |
| "execution_count": 0, | |
| "outputs": [] | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "metadata": { | |
| "colab_type": "text", | |
| "id": "cLXFrxIvKgMz" | |
| }, | |
| "source": [ | |
| "#Create\ta\tNaive\tbayes\tclassifier" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "metadata": { | |
| "ExecuteTime": { | |
| "end_time": "2020-03-18T06:08:02.167473Z", | |
| "start_time": "2020-03-18T06:08:02.159478Z" | |
| }, | |
| "colab_type": "code", | |
| "id": "dnC5KuMaKd0u", | |
| "colab": {} | |
| }, | |
| "source": [ | |
| "clf3 = GaussianNB()" | |
| ], | |
| "execution_count": 0, | |
| "outputs": [] | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "metadata": { | |
| "colab_type": "text", | |
| "id": "7cxS0kTwKk2f" | |
| }, | |
| "source": [ | |
| "#Finally\tcreate\ta\tlogistic\tregression\tclassifier\tto\tcombine\tprediction\tfrom\tabove\tclassifiers." | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "metadata": { | |
| "ExecuteTime": { | |
| "end_time": "2020-03-18T06:08:02.346868Z", | |
| "start_time": "2020-03-18T06:08:02.338873Z" | |
| }, | |
| "colab_type": "code", | |
| "id": "hRhzOv5lKhqW", | |
| "colab": {} | |
| }, | |
| "source": [ | |
| "lr = LogisticRegression()" | |
| ], | |
| "execution_count": 0, | |
| "outputs": [] | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "metadata": { | |
| "colab_type": "text", | |
| "id": "2tedDwXAKrit" | |
| }, | |
| "source": [ | |
| "#Now\twe\twill\tTrain\tall\tfirst\tlevel\tclassifiers" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "metadata": { | |
| "ExecuteTime": { | |
| "end_time": "2020-03-18T06:08:02.512764Z", | |
| "start_time": "2020-03-18T06:08:02.504770Z" | |
| }, | |
| "colab_type": "code", | |
| "id": "4s4fzZhiKmr1", | |
| "outputId": "09945d58-446b-44e1-ceed-a75ea38c9c77", | |
| "colab": { | |
| "base_uri": "https://localhost:8080/", | |
| "height": 34 | |
| } | |
| }, | |
| "source": [ | |
| "clf1.fit(X,\ty)\n", | |
| "clf2.fit(X,\ty)\n", | |
| "clf3.fit(X,\ty)" | |
| ], | |
| "execution_count": 11, | |
| "outputs": [ | |
| { | |
| "output_type": "execute_result", | |
| "data": { | |
| "text/plain": [ | |
| "GaussianNB(priors=None, var_smoothing=1e-09)" | |
| ] | |
| }, | |
| "metadata": { | |
| "tags": [] | |
| }, | |
| "execution_count": 11 | |
| } | |
| ] | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "metadata": { | |
| "colab_type": "text", | |
| "id": "8EhDjAnnKx1z" | |
| }, | |
| "source": [ | |
| "#Predict\tthe\tlabels\tfor\tinput\tdata\tby\tall\tthe\tclassifier;\tprint\ttheir\taccuracy\tand\tstore\tthe\tprediction\tinto\tan\tarray\t(f1,f2,f3)" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "metadata": { | |
| "ExecuteTime": { | |
| "end_time": "2020-03-18T06:08:02.672666Z", | |
| "start_time": "2020-03-18T06:08:02.663672Z" | |
| }, | |
| "colab_type": "code", | |
| "id": "aOF0lKoFKs_F", | |
| "outputId": "ca16ccf9-21f8-440d-ecbe-6f7bf51b6e72", | |
| "colab": { | |
| "base_uri": "https://localhost:8080/", | |
| "height": 34 | |
| } | |
| }, | |
| "source": [ | |
| "f1 = clf1.predict(X)\n", | |
| "acc1 = CalculateAccuracy(y,\tf1)\n", | |
| "print(\"accuracy from KNN: \"+str(acc1))" | |
| ], | |
| "execution_count": 12, | |
| "outputs": [ | |
| { | |
| "output_type": "stream", | |
| "text": [ | |
| "accuracy from KNN: 96.66666666666667\n" | |
| ], | |
| "name": "stdout" | |
| } | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "metadata": { | |
| "ExecuteTime": { | |
| "end_time": "2020-03-18T06:08:02.782596Z", | |
| "start_time": "2020-03-18T06:08:02.773602Z" | |
| }, | |
| "colab_type": "code", | |
| "id": "F_JMQ-Z2K8Zc", | |
| "outputId": "8746b75b-33ed-4fbe-9fe9-3b348b309c4e", | |
| "colab": { | |
| "base_uri": "https://localhost:8080/", | |
| "height": 34 | |
| } | |
| }, | |
| "source": [ | |
| "f2 = clf2.predict(X)\n", | |
| "acc2 = CalculateAccuracy(y,\tf2)\n", | |
| "print(\"accuracy from Random Forest: \"+str(acc2))" | |
| ], | |
| "execution_count": 13, | |
| "outputs": [ | |
| { | |
| "output_type": "stream", | |
| "text": [ | |
| "accuracy from Random Forest: 94.66666666666667\n" | |
| ], | |
| "name": "stdout" | |
| } | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "metadata": { | |
| "ExecuteTime": { | |
| "end_time": "2020-03-18T06:08:02.783596Z", | |
| "start_time": "2020-03-18T06:08:02.716Z" | |
| }, | |
| "colab_type": "code", | |
| "id": "EmBxIl3RLKfb", | |
| "outputId": "b78d2abd-bab3-457b-b8db-551952333e1d", | |
| "colab": { | |
| "base_uri": "https://localhost:8080/", | |
| "height": 34 | |
| } | |
| }, | |
| "source": [ | |
| "f3 = clf3.predict(X)\n", | |
| "acc3 = CalculateAccuracy(y,\tf3)\n", | |
| "print(\"accuracy from Naive Bayes: \"+str(acc3))" | |
| ], | |
| "execution_count": 14, | |
| "outputs": [ | |
| { | |
| "output_type": "stream", | |
| "text": [ | |
| "accuracy from Naive Bayes: 92.0\n" | |
| ], | |
| "name": "stdout" | |
| } | |
| ] | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "metadata": { | |
| "colab_type": "text", | |
| "id": "1xHG9NGDLatn" | |
| }, | |
| "source": [ | |
| "#Combine\tthe\tpredictions\tinto\ta\tsingle\tarray\tand\ttranspose\tthe\tarray\tto\tmatch\tinput\tshape\tof\tor\tclassifier. " | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "metadata": { | |
| "ExecuteTime": { | |
| "end_time": "2020-03-18T06:08:02.883535Z", | |
| "start_time": "2020-03-18T06:08:02.874541Z" | |
| }, | |
| "colab_type": "code", | |
| "id": "Aa8W_r6ULPTk", | |
| "colab": {} | |
| }, | |
| "source": [ | |
| "f = [f1, f2, f3]\n", | |
| "f = np.transpose(f)" | |
| ], | |
| "execution_count": 0, | |
| "outputs": [] | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "metadata": { | |
| "colab_type": "text", | |
| "id": "8Lf29p2-LiEO" | |
| }, | |
| "source": [ | |
| "#Now train the classifier" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "metadata": { | |
| "ExecuteTime": { | |
| "end_time": "2020-03-18T06:08:03.041436Z", | |
| "start_time": "2020-03-18T06:08:03.032442Z" | |
| }, | |
| "colab_type": "code", | |
| "id": "YWaO4ygrLeK6", | |
| "colab": {} | |
| }, | |
| "source": [ | |
| "lr.fit(f, y)\n", | |
| "final = lr.predict(f)" | |
| ], | |
| "execution_count": 0, | |
| "outputs": [] | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "metadata": { | |
| "colab_type": "text", | |
| "id": "Ep5EeixqLn1Z" | |
| }, | |
| "source": [ | |
| "#Calculate\tand\tprint\tthe\taccuracy\tof\tfinal\tclassifier." | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "metadata": { | |
| "ExecuteTime": { | |
| "end_time": "2020-03-18T06:08:03.201840Z", | |
| "start_time": "2020-03-18T06:08:03.193846Z" | |
| }, | |
| "colab_type": "code", | |
| "id": "2gbjrT4VLlCa", | |
| "outputId": "07284535-71fa-4017-b1e9-c7da8a31f0ff", | |
| "colab": { | |
| "base_uri": "https://localhost:8080/", | |
| "height": 34 | |
| } | |
| }, | |
| "source": [ | |
| "acc4 = CalculateAccuracy(y, final)\n", | |
| "print(\"accuracy from Stacking:\"+str(acc4))" | |
| ], | |
| "execution_count": 17, | |
| "outputs": [ | |
| { | |
| "output_type": "stream", | |
| "text": [ | |
| "accuracy from Stacking:97.33333333333333\n" | |
| ], | |
| "name": "stdout" | |
| } | |
| ] | |
| } | |
| ] | |
| } |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment